diff --git a/Jenkinsfile b/Jenkinsfile
index 8493c5090f..f1fb07ea52 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,2 +1,2 @@
 @Library('pipeline-library')_
-FullVitisLibPipeline (branch: 'next', libname: 'Vitis_Libraries', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vivado_syn:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_aie_sim:vitis_aie_x86sim', TOOLVERSION: '2022.1_stable_latest')
+FullVitisLibPipeline (branch: 'master', libname: 'Vitis_Libraries', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vivado_syn:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_aie_sim:vitis_aie_x86sim', TOOLVERSION: '2022.1_released')
diff --git a/blas/Jenkinsfile b/blas/Jenkinsfile
index b79985f137..0ea47ca8ee 100644
--- a/blas/Jenkinsfile
+++ b/blas/Jenkinsfile
@@ -1,5 +1,4 @@
 @Library('pipeline-library')_
 
-VitisLibPipeline (branch: 'next', libname: 'xf_blas', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
-        upstream_dependencies: 'xf_hpc,next,../hpc',
-                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest')
+VitisLibPipeline (branch: 'main', libname: 'xf_blas', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
+                  upstream_dependencies: 'xf_hpc,main,../hpc', devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released', mail_on:'daily:PR')
diff --git a/blas/L2/tests/memKernel/gemm_1CU/Makefile b/blas/L2/tests/memKernel/gemm_1CU/Makefile
index 23a05a3cd0..bd70f04ebb 100644
--- a/blas/L2/tests/memKernel/gemm_1CU/Makefile
+++ b/blas/L2/tests/memKernel/gemm_1CU/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/src/memKernel/sw/main.cpp $(XFLIB_DIR)/L2/src/xcl2/xcl2.cpp 
@@ -134,6 +136,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/xf_blas/helpers/utils -I $(XFLIB_DIR)
 CXXFLAGS += -O3
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -206,11 +213,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -244,21 +246,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -305,14 +307,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/blas.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	make check
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	make check
 else
@@ -349,12 +353,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/blas/L2/tests/memKernel/gemm_1CU/utils.mk b/blas/L2/tests/memKernel/gemm_1CU/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/blas/L2/tests/memKernel/gemm_1CU/utils.mk
+++ b/blas/L2/tests/memKernel/gemm_1CU/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/blas/L2/tests/memKernel/gemm_1CU_gui/Makefile b/blas/L2/tests/memKernel/gemm_1CU_gui/Makefile
index e7c13d65e3..cbfcad3e0e 100644
--- a/blas/L2/tests/memKernel/gemm_1CU_gui/Makefile
+++ b/blas/L2/tests/memKernel/gemm_1CU_gui/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/src/memKernel/sw/api_gemm.cpp $(XFLIB_DIR)/L2/src/xcl2/xcl2.cpp 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/xf_blas/helpers/utils -I $(XFLIB_DIR)
 CXXFLAGS += -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := api_gemm.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -190,11 +197,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -228,21 +230,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -289,14 +291,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/blas.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -330,12 +334,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/blas/L2/tests/memKernel/gemm_1CU_gui/utils.mk b/blas/L2/tests/memKernel/gemm_1CU_gui/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/blas/L2/tests/memKernel/gemm_1CU_gui/utils.mk
+++ b/blas/L2/tests/memKernel/gemm_1CU_gui/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/blas/L2/tests/streamingKernel/gemmKernel/Makefile b/blas/L2/tests/streamingKernel/gemmKernel/Makefile
index 9d19023772..b41fb937c7 100644
--- a/blas/L2/tests/streamingKernel/gemmKernel/Makefile
+++ b/blas/L2/tests/streamingKernel/gemmKernel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/streamingKernel/sw/host/gemm_stream.cpp $(XFLIB_DIR)/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/xf_blas/helpers/utils -I $(XFLIB_DIR)/L2/include/streamingKernel/ -I $(XFLIB_DIR)/L2/include/streamingKernel/sw/host -I $(XFLIB_DIR)/L2/include/streamingKernel/sw/compiler -I $(XFLIB_DIR)/L2/include/streamingKernel/sw -I $(XFLIB_DIR)/L2/include/xcl2 -I $(XFLIB_DIR)/L2/include/streamingKernel/sw/host/ -I $(XFLIB_DIR)/L1/include/hw
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -206,11 +213,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -244,21 +246,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -305,14 +307,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/gemmKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	make check
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	make check
 else
@@ -349,12 +353,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/blas/L2/tests/streamingKernel/gemmKernel/utils.mk b/blas/L2/tests/streamingKernel/gemmKernel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/blas/L2/tests/streamingKernel/gemmKernel/utils.mk
+++ b/blas/L2/tests/streamingKernel/gemmKernel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/blas/L2/tests/streamingKernel/gemmLoadStore/Makefile b/blas/L2/tests/streamingKernel/gemmLoadStore/Makefile
index de2b5f59bc..20675e5c63 100644
--- a/blas/L2/tests/streamingKernel/gemmLoadStore/Makefile
+++ b/blas/L2/tests/streamingKernel/gemmLoadStore/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/src/streamingKernel/sw/host/gemmLdSt.cpp $(XFLIB_DIR)/L2/src/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/xf_blas/helpers/utils -I $(XFLIB_DIR)
 CXXFLAGS += -O3
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -200,11 +207,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -238,21 +240,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -299,14 +301,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/blas.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	make check
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	make check
 else
@@ -343,12 +347,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/blas/L2/tests/streamingKernel/gemmLoadStore/utils.mk b/blas/L2/tests/streamingKernel/gemmLoadStore/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/blas/L2/tests/streamingKernel/gemmLoadStore/utils.mk
+++ b/blas/L2/tests/streamingKernel/gemmLoadStore/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/blas/L3/benchmarks/gemm/memKernel/Makefile b/blas/L3/benchmarks/gemm/memKernel/Makefile
index b565dedf0f..64f5bb9dba 100644
--- a/blas/L3/benchmarks/gemm/memKernel/Makefile
+++ b/blas/L3/benchmarks/gemm/memKernel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L3/benchmarks/gemm/gemm_bench.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include/sw -I $(XFLIB_DIR)/L3/benchmarks/gemm -I
 LDFLAGS += -luuid -lxrt_coreutil
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := gemm_bench.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -188,11 +195,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -226,21 +228,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -287,14 +289,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/blas.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -331,12 +335,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/blas/L3/benchmarks/gemm/memKernel/utils.mk b/blas/L3/benchmarks/gemm/memKernel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/blas/L3/benchmarks/gemm/memKernel/utils.mk
+++ b/blas/L3/benchmarks/gemm/memKernel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/blas/L3/examples/memKernel/gemm/Makefile b/blas/L3/examples/memKernel/gemm/Makefile
index 4aaf354608..6e358eeb0e 100644
--- a/blas/L3/examples/memKernel/gemm/Makefile
+++ b/blas/L3/examples/memKernel/gemm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L3/examples/memKernel/gemm/gemm_example.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include/sw -I $(XFLIB_DIR)/L3/examples/memKernel
 LDFLAGS += -luuid -lxrt_coreutil
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := gemm_example.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -188,11 +195,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -226,21 +228,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -287,14 +289,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/blas.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -331,12 +335,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/blas/L3/examples/memKernel/gemm/utils.mk b/blas/L3/examples/memKernel/gemm/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/blas/L3/examples/memKernel/gemm/utils.mk
+++ b/blas/L3/examples/memKernel/gemm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/blas/L3/tests/gemm/memKernel/gemm_float/Makefile b/blas/L3/tests/gemm/memKernel/gemm_float/Makefile
index 8c67f1bcb8..4e0fff2f49 100644
--- a/blas/L3/tests/gemm/memKernel/gemm_float/Makefile
+++ b/blas/L3/tests/gemm/memKernel/gemm_float/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/gemm/gemm_test.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include/sw -I $(XFLIB_DIR)/L3/tests/gemm -I $(XF
 LDFLAGS += -luuid -lxrt_coreutil
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := gemm_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -188,11 +195,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -226,21 +228,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -287,14 +289,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/blas.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -331,12 +335,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/blas/L3/tests/gemm/memKernel/gemm_float/utils.mk b/blas/L3/tests/gemm/memKernel/gemm_float/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/blas/L3/tests/gemm/memKernel/gemm_float/utils.mk
+++ b/blas/L3/tests/gemm/memKernel/gemm_float/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/blas/docs/src/conf.py b/blas/docs/src/conf.py
index a8344aa4db..b3aaf34970 100644
--- a/blas/docs/src/conf.py
+++ b/blas/docs/src/conf.py
@@ -39,13 +39,13 @@
 # -- Project information -----------------------------------------------------
 
 project = 'Vitis BLAS Library'
-copyright = '2021, Xilinx Inc.'
+copyright = '2022, Xilinx Inc.'
 author = 'Xilinx Inc.'
 
 # The short X.Y version
-version = '2021.2'
+version = '2022.1'
 # The full version, including alpha/beta/rc tags
-release = 'v2021.2'
+release = 'v2022.1'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/codec/.gitignore b/codec/.gitignore
index fc746d91a5..808b5024de 100644
--- a/codec/.gitignore
+++ b/codec/.gitignore
@@ -1,6 +1,5 @@
 *.log
 *.yuv
-*.jpg
 _x_*
 build_dir*
 emconfig.json
diff --git a/codec/Jenkinsfile b/codec/Jenkinsfile
index 5d5beb6a10..24c8fb076d 100644
--- a/codec/Jenkinsfile
+++ b/codec/Jenkinsfile
@@ -1,5 +1,5 @@
 @Library('pipeline-library')_
-VitisLibPipeline (branch: 'regression', libname: 'xf_codec', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
-                  upstream_dependencies: 'xf_utils_hw,next,../utils; xf_database,next,../database; xf_fintech,next,../quantitative_finance',
-                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest')
+VitisLibPipeline (branch: 'main', libname: 'xf_codec', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
+                  upstream_dependencies: 'xf_utils_hw,main,../utils; xf_database,main,../database; xf_fintech,main,../quantitative_finance',
+                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released')
 
diff --git a/codec/L1/README.md b/codec/L1/README.md
index bd4ef5f673..3c4da2591b 100644
--- a/codec/L1/README.md
+++ b/codec/L1/README.md
@@ -1,104 +1,26 @@
-JPEG Decoder
-============
-
-Jpeg Decoder example resides in ``L2/demos/jpegDec`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
-
-Executable Usage
-----------------
-
-* **Work Directory(Step 1)**
-
-The steps for library download and environment setup can be found in :ref:`l2_vitis_codec`. For getting the design,
-
-```
-   cd L2/demos/jpegDec
-```
-
-* **Build kernel(Step 2)**
-
-Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
-
-```
-   make run TARGET=hw DEVICE=xilinx_u250_xdma_201830_2
-```   
-
-* **Run kernel(Step 3)**
-
-To get the benchmark results, please run the following command.
-
-```
-   ./build_dir.hw.xilinx_u250_xdma_201830_2/host.exe -xclbin build_dir.hw.xilinx_u250_xdma_201830_2/jpegDecoder.xclbin -JPEGFile android.jpg
-```   
-
-JPEG Decoder Input Arguments:
-
-```
-   Usage: host.exe -[-xclbin -dataSetDir -refDir]
-          -xclbin:    the kernel name
-          -JPEGFile:  the path point to input *.jpg
-```          
-
-Note: Default arguments are set in Makefile, you can use other :ref:`pictures` listed in the table.
-
-* **Example output(Step 4)** 
-
-```
-   Found Platform
-   Platform Name: Xilinx
-   INFO: Found Device=xilinx_u250_xdma_201830_2
-   INFO: Importing build_dir.hw.xilinx_u250_xdma_201830_2/jpegDecoder.xclbin
-   Loading: 'build_dir.hw.xilinx_u250_xdma_201830_2/jpegDecoder.xclbin'
-   INFO: Kernel has been created
-   INFO: Finish kernel setup
-   ...
-
-   INFO: Finish kernel execution
-   INFO: Finish E2E execution
-   INFO: Data transfer from host to device: 40 us
-   INFO: Data transfer from device to host: 6 us
-   INFO: Average kernel execution per run: 988 us
-   ...
-
-   INFO: android.yuv will be generated from the jpeg decoder's output   oINFO: android.yuv is generated correctly
-   INFO: android.yuv is generated correctly
-```   
-
-Profiling
----------
-
-The hardware resource utilizations are listed in the following table.
-Different tool versions may result slightly different resource.
-
-##### Table 1 IP resources for jpegDecoder with huffman decoder(L1 IP)
-
-|           IP          |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
-|-----------------------|----------|----------|----------|----------|---------|-----------------|
-|     huffman_decoder   |     5    |     0    |    12    |    6963  |   7344  |       286       |
-
-##### Table 2 IP resources for jpegDecoder with jfif parser and huffman decoder(L1 IP)
-
-|           IP          |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
-|-----------------------|----------|----------|----------|----------|---------|-----------------|
-| kernel_parser_decoder |     5    |     0    |    12    |    7615  |   8382  |       257       |
-
-##### Table 3 Hardware resources for jpegDecoder with jfif parser, huffman, iq and idct (L2 kernel)   
-
-|        Kernel         |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
-|-----------------------|----------|----------|----------|----------|---------|-----------------|
-|      jpegDecoder      |     7    |     0    |    39    |   12298  |  13417  |       257       |
-
-Result
-------
-
-To check the output yuv file, download https://sourceforge.net/projects/raw-yuvplayer/ . 
-Then upload the rebuild_image.yuv, set the right sample radio and custom size on the software, and check the yuv file.
-
-Table 1 : Jpeg Decoder profiling
-
-![Table 1 : Jpeg Decoder profiling](../../../docs/images/jpegDecoderpofile.png)
-
-##### Note      
-```      
-    | 1. MAX_DEC_PIX is for benchmark. If testcase image is larger than 20M, the value of MAX_DEC_PIX should be enlarged following the size of image.   
-    | 2. MAXCMP_BC is for benchmark. If testcase image is larger than 20M, the value of MAXCMP_BC should be enlarged following the size of image.   
-```
+# Level 1: HLS Modules
+
+The Level 1 APIs of Vitis Codec Library is presented as HLS C++ modules.
+
+This level of API is mainly provided for hardware-savvy developers.
+The API description and design details of these modules can be found
+in _L1 Module User Guide_ section of the library document.
+
+## License
+
+Licensed using the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
+
+    Copyright 2022 Xilinx, Inc.
+    
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+    Copyright 2022 Xilinx, Inc.
diff --git a/codec/L1/include/xlnx_cfg.h b/codec/L1/include/xlnx_cfg.h
new file mode 100644
index 0000000000..1873897c90
--- /dev/null
+++ b/codec/L1/include/xlnx_cfg.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef XLNX_CFG_H
+#define XLNX_CFG_H
+
+#define XLNX_DEBUG_DCT
+#define XLNX_DEBUG_CMAP
+
+#define XLNX_DISABLE_BLK_DICT
+#define XLNX_DISABLE_RECT_DCT
+#define XLNX_DISABLE_ARC
+#define XLNX_DISABLE_2NDCMP
+
+#endif
diff --git a/codec/L1/meta/api.json b/codec/L1/meta/api.json
index 90676c5346..4b800a960c 100644
--- a/codec/L1/meta/api.json
+++ b/codec/L1/meta/api.json
@@ -29,43 +29,38 @@
                         "type": "const int"
                     },
                     {
-                        "name": "img_info",
+                        "name": null,
                         "direction": "",
-                        "type": "xf::codec::img_info&"
+                        "type": ""
                     },
                     {
-                        "name": "hls_cmpnfo[MAX_NUM_COLOR]",
+                        "name": null,
                         "direction": "",
-                        "type": "xf::codec::hls_compInfo"
+                        "type": "*pout"
                     },
                     {
                         "name": "block_strm",
                         "direction": "",
-                        "type": "hls::stream<ap_uint<24> >&"
+                        "type": "*pout"
                     },
                     {
                         "name": "rtn",
                         "direction": "",
-                        "type": "int&"
+                        "type": "*pout"
                     },
                     {
                         "name": "rtn2",
                         "direction": "",
-                        "type": "bool&"
-                    },
-                    {
-			"name": "pout",
-                        "direction": "",
-                        "type": "xf::codec::decOutput*"
+                        "type": "*pout"
                     }
                 ]
             }
         },
         {
-            "api_name": "xf::codec::top_order_tokenize",
+            "api_name": "top_order_tokenize",
             "spec": {
                 "schema": "vitis_libraries_api_list_schema-1.0",
-                "api_name": "xf::codec::top_order_tokenize",
+                "api_name": "top_order_tokenize",
                 "display_name": "top_order_tokenize",
                 "brief": "JXL order_tokenize case",
                 "target_domain": "",
@@ -73,7 +68,7 @@
                     "topOrderTokenize.hpp"
                 ],
                 "search_paths": [
-                    "/L1/include/"
+                    "/L1/tests/jxlEnc/order_tokenize/kernel"
                 ],
                 "instance": "function",
                 "parameters": [],
@@ -103,4 +98,4 @@
         }
     ],
     "target_domain": ""
-}
+}
\ No newline at end of file
diff --git a/codec/L1/src/XAcc_jfifparser.cpp b/codec/L1/src/XAcc_jfifparser.cpp
index ef1a3eca29..255fd16e7d 100644
--- a/codec/L1/src/XAcc_jfifparser.cpp
+++ b/codec/L1/src/XAcc_jfifparser.cpp
@@ -599,7 +599,6 @@ void decoder_jpg_top(ap_uint<AXI_WIDTH>* ptr,
  * @param block_strm the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val
  * @param rtn the flag of the jfif parser succeed
  * @param rtn2 the flag of the decode succeed
- * @param pout the decOutput
  */
 void kernelParserDecoderTop(ap_uint<AXI_WIDTH>* datatoDDR,
                             const int size,
diff --git a/codec/L1/src/XAcc_jpegdecoder.cpp b/codec/L1/src/XAcc_jpegdecoder.cpp
index 2fe79f81ed..8dad7791a3 100644
--- a/codec/L1/src/XAcc_jpegdecoder.cpp
+++ b/codec/L1/src/XAcc_jpegdecoder.cpp
@@ -172,18 +172,17 @@ void Huffman_decoder(
     const int16_t dc_huff_start_addr[2][16],
     //
     const ap_uint<12> cyc_cmp,
-// regs
-#ifndef __SYNTHESIS__
+    // regs
+    //#ifndef __SYNTHESIS__
     const uint8_t hls_cs_cmpc,
     const uint16_t hls_mcuh,
-#endif
+    //#endif
     const uint8_t hls_mbs[MAX_NUM_COLOR],
     const uint32_t hls_mcuc,
 
     // output
     bool& rtn2,
     hls::stream<ap_uint<24> >& block_strm) {
-
 #pragma HLS INLINE off
 
     ap_uint<12> hls_cmp = cyc_cmp;
@@ -1133,8 +1132,8 @@ void top_mcu_decoder(
 
     xf::codec::details::Huffman_decoder(huff_sos_strm, sign_no_huff, dht_tbl1, ac_val, ac_huff_start_code,
                                         ac_huff_start_addr, dc_val, dc_huff_start_code, dc_huff_start_addr, hls_cmp,
-#ifndef __SYNTHESIS__
+                                        //#ifndef __SYNTHESIS__
                                         hls_cs_cmpc, hls_mcuh,
-#endif
+                                        //#endif
                                         hls_mbs, hls_mcuc, rtn2, block_strm);
 }
diff --git a/codec/L1/tests/jpegdec/description.json b/codec/L1/tests/jpegdec/description.json
index e6ea189600..981c08c43b 100644
--- a/codec/L1/tests/jpegdec/description.json
+++ b/codec/L1/tests/jpegdec/description.json
@@ -2,15 +2,15 @@
     "name": "Xilinx jpeg decoder HLS Test",
     "description": "Xilinx jpeg decoder HLS Test",
     "flow": "hls",
-    "platform_whitelist": [
+    "platform_allowlist": [
         "u200"
     ],
-    "platform_blacklist": [],
-    "part_whitelist": [],
-    "part_blacklist": [],
+    "platform_blocklist": [],
+    "part_allowlist": [],
+    "part_blocklist": [],
     "project": "test",
     "solution": "solution1",
-    "clock": "2.5",
+    "clock": "3.33",
     "topfunction": "kernel_parser_decoder",
     "top": {
         "source": [
@@ -41,17 +41,17 @@
                 "env": "",
                 "cmd": "",
                 "max_memory_MB": {
-                    "hls_vivado_syn": 16384,
+                    "vivado_syn": 16384,
                     "hls_csim": 16384,
                     "hls_cosim": 16384,
-                    "hls_vivado_impl": 16384,
+                    "vivado_impl": 16384,
                     "hls_csynth": 16384
                 },
                 "max_time_min": {
-                    "hls_vivado_syn": 480,
+                    "vivado_syn": 480,
                     "hls_csim": 120,
                     "hls_cosim": 480,
-                    "hls_vivado_impl": 480,
+                    "vivado_impl": 480,
                     "hls_csynth": 240
                 }
             }
@@ -60,8 +60,8 @@
             "hls_csim",
             "hls_csynth",
             "hls_cosim",
-            "hls_vivado_syn",
-            "hls_vivado_impl"
+            "vivado_syn",
+            "vivado_impl"
         ],
         "category": "canary"
     }
diff --git a/codec/L1/tests/jpegdec/run_hls.tcl b/codec/L1/tests/jpegdec/run_hls.tcl
index 54799e04eb..3f9a7909b8 100644
--- a/codec/L1/tests/jpegdec/run_hls.tcl
+++ b/codec/L1/tests/jpegdec/run_hls.tcl
@@ -20,14 +20,15 @@ set PROJ "test.prj"
 set SOLN "solution1"
 
 if {![info exists CLKP]} {
-  set CLKP 2.5
+  set CLKP 3.33
 }
 
 open_project -reset $PROJ
 
-add_files "test_decoder.cpp ${XF_PROJ_ROOT}/L1/src/XAcc_jfifparser.cpp ${XF_PROJ_ROOT}/L1/src/XAcc_jpegdecoder.cpp" -cflags "-I${XF_PROJ_ROOT}/L1/include -std=c++0x"
+add_files "test_decoder.cpp ${XF_PROJ_ROOT}/L1/src/XAcc_jfifparser.cpp ${XF_PROJ_ROOT}/L1/src/XAcc_jpegdecoder.cpp" -cflags "-I${XF_PROJ_ROOT}/L1/include -std=c++0x -g -O0"
 add_files -tb "test_decoder.cpp" -cflags "-I${XF_PROJ_ROOT}/L1/include -std=c++0x"
 set_top kernel_parser_decoder
+#set_top Huffman_decoder
 
 open_solution -reset $SOLN
 
@@ -57,4 +58,4 @@ if {$VIVADO_IMPL == 1} {
   export_design -flow impl -rtl verilog
 }
 
-exit
\ No newline at end of file
+exit
diff --git a/codec/L1/tests/jpegdec/test_decoder.cpp b/codec/L1/tests/jpegdec/test_decoder.cpp
index 696ff71901..3b949b4f76 100644
--- a/codec/L1/tests/jpegdec/test_decoder.cpp
+++ b/codec/L1/tests/jpegdec/test_decoder.cpp
@@ -182,7 +182,7 @@ int main(int argc, const char* argv[]) {
                 printf("Warning: [code 3] huffman data is not in expectation!\n");
             }
         }
-        printf("Info: Ready to decode next input file!\n");
+        return 1;
     }
 
     xf::codec::details::hls_next_mcupos2(block_strm, hls_block, hls_sfv, hls_sfh, hls_mbs, hls_bch[0], hls_bc[0],
@@ -214,6 +214,7 @@ int main(int argc, const char* argv[]) {
     free(hls_block);
 
     std::cout << "Ready for next image!\n ";
+    return 0;
 }
 #endif
 
diff --git a/codec/L1/tests/jxlEnc/order_tokenize/description.json b/codec/L1/tests/jxlEnc/order_tokenize/description.json
index 79271d6ad1..57dcaf4922 100644
--- a/codec/L1/tests/jxlEnc/order_tokenize/description.json
+++ b/codec/L1/tests/jxlEnc/order_tokenize/description.json
@@ -2,12 +2,12 @@
     "name": "Xilinx Order Tokenize HLS Test",
     "description": "Xilinx jxl Order Tokenize HLS Test",
     "flow": "hls",
-    "platform_whitelist": [
+    "platform_allowlist": [
         "u200"
     ],
-    "platform_blacklist": [],
-    "part_whitelist": [],
-    "part_blacklist": [],
+    "platform_blocklist": [],
+    "part_allowlist": [],
+    "part_blocklist": [],
     "project": "tokenize",
     "solution": "solution1",
     "clock": "3.33",
@@ -40,17 +40,17 @@
                 "env": "",
                 "cmd": "",
                 "max_memory_MB": {
-                    "hls_vivado_syn": 16384,
+                    "vivado_syn": 16384,
                     "hls_csim": 16384,
                     "hls_cosim": 16384,
-                    "hls_vivado_impl": 16384,
+                    "vivado_impl": 16384,
                     "hls_csynth": 16384
                 },
                 "max_time_min": {
-                    "hls_vivado_syn": 480,
+                    "vivado_syn": 480,
                     "hls_csim": 120,
                     "hls_cosim": 480,
-                    "hls_vivado_impl": 480,
+                    "vivado_impl": 480,
                     "hls_csynth": 240
                 }
             }
@@ -59,8 +59,8 @@
             "hls_csim",
             "hls_csynth",
             "hls_cosim",
-            "hls_vivado_syn",
-            "hls_vivado_impl"
+            "vivado_syn",
+            "vivado_impl"
         ],
         "category": "canary"
     }
diff --git a/codec/L2/README.md b/codec/L2/README.md
index 5fb50aea86..a3b68a0034 100644
--- a/codec/L2/README.md
+++ b/codec/L2/README.md
@@ -1,3 +1,22 @@
 # Level 2: Predefined Codec Kernels
 
 The Level2 of Vitis Codec Library contains host-callable kernels. For more details information, please reference to _L2 User Guide_ in the document for usage and design information.
+
+## License
+
+Licensed using the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
+
+    Copyright 2022 Xilinx, Inc.
+    
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+    Copyright 2022 Xilinx, Inc.
diff --git a/codec/L2/demos/README.md b/codec/L2/demos/README.md
index ffe4a4d524..b569ac1917 100644
--- a/codec/L2/demos/README.md
+++ b/codec/L2/demos/README.md
@@ -7,7 +7,7 @@ Here are benchmarks of the Vitis Codec Library using the Vitis environment and c
 ### Vitis Codec Library
 - Alveo U200 installed and configured as per [Alveo U200 Data Center Accelerator Card](https://www.xilinx.com/products/boards-and-kits/alveo/u200.html#gettingStarted)
 - Xilinx runtime (XRT) installed
-- Xilinx Vitis 2021.2 installed and configured
+- Xilinx Vitis 2022.1 installed and configured
 
 ## Pictures
 
@@ -44,7 +44,26 @@ These codec benchmarks can be downloaded from [vitis libraries](https://github.c
 Specifying the corresponding Vitis, XRT, and path to the platform repository by running following commands.
 
 ```
-   source <intstall_path>/installs/lin64/Vitis/2021.2/settings64.sh
+   source <intstall_path>/installs/lin64/Vitis/2022.1/settings64.sh
    source /opt/xilinx/xrt/setup.sh
    export PLATFORM_REPO_PATHS=/opt/xilinx/platforms
 ```
+
+## License
+
+Licensed using the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
+
+    Copyright 2022 Xilinx, Inc.
+    
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+    Copyright 2022 Xilinx, Inc.
diff --git a/codec/L2/demos/jpegDec/Makefile b/codec/L2/demos/jpegDec/Makefile
index fb700498e8..bfe33c9463 100644
--- a/codec/L2/demos/jpegDec/Makefile
+++ b/codec/L2/demos/jpegDec/Makefile
@@ -142,7 +142,7 @@ LIBRARY_PATH := $(LD_LIBRARY_PATH):$(XILINX_XRT)/lib
 
 ########################## Kernel compiler global settings ##########################
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
-VPP_FLAGS +=  --config $(CUR_DIR)/conn_u200.cfg -g 
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u200.cfg
 VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/jpegDec -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/demos/jpegDec/kernel
 
 else ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
@@ -351,4 +351,4 @@ cleanall: cleanh cleank
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
 	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/codec/L2/demos/jpegDec_sc/Makefile b/codec/L2/demos/jpegDec_sc/Makefile
new file mode 100644
index 0000000000..8438349a9d
--- /dev/null
+++ b/codec/L2/demos/jpegDec_sc/Makefile
@@ -0,0 +1,284 @@
+# Copyright 2019-2021 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# sc makefile-generator v1.0.0
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_u50_gen3x16_xdma_5_202210_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u50 u280
+PLATFORM_BLOCKLIST +=  zc
+
+GCC_INTOOL := 8.3.0
+BINUTILS_INTOOL := 2.37
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# set debug switch
+ifneq ($(debug),yes)
+CXXFLAGS += -O3
+endif
+
+# get global setting
+ifdef XILINX_SC_PFM_CONFIG
+CXXFLAGS += -DXILINX_SC_PFM_CONFIG=$(XILINX_SC_PFM_CONFIG)
+endif
+ifdef XILINX_SC_PFM_EXT
+CXXFLAGS += -DXILINX_SC_PFM_EXT=$(XILINX_SC_PFM_EXT)
+endif
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -I $(XILINX_VITIS)/system_compiler/include -I $(XILINX_HLS)/include 
+LDFLAGS += -L$(XILINX_XRT)/lib -L$(XILINX_VITIS)/system_compiler/lib/x86 -lvpp_acc -l$(LIB_XRT) -lxrt_coreutil  -Wl,-rpath=$(XILINX_VITIS)/system_compiler/lib/x86:$(XILINX_XRT)/lib:$(GCC_HOME)/lib64  -Wl,--enable-new-dtags -lpthread 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --temp_dir $(TEMP_DIR) --save-temps -g -I $(XILINX_VITIS)/system_compiler/include 
+VPP_LDFLAGS += 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += 
+LDFLAGS += 
+VPP_FLAGS += 
+VPP_LDFLAGS += 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+ifeq ($(TARGET),sw)
+  $(error Error: The sw target is not supported anymore. Please use sw_emu instead)
+else ifeq ($(TARGET),sw_emu)
+  LIB_XRT  := xrt_swemu
+  HOST_PREAMBLE := XCL_EMULATION_MODE=sw_emu
+else ifeq ($(TARGET),hw_emu)
+  LIB_XRT  := xrt_hwemu
+  HOST_PREAMBLE := XCL_EMULATION_MODE=hw_emu
+  ifneq (,$(findstring -g,$(EXTRA_VPPFLAGS) $(CXXFLAGS)))
+    # for sourcing pre/post xsim scripts
+    ifneq ($(XILINX_SC_HW_EMU),0)
+      HOST_PREAMBLE += XILINX_SC_HW_EMU=1 XILINX_SC_BUILD_DIR=$(PWD)/$(BUILD_DIR)
+    endif
+  endif
+else ifeq ($(TARGET),hw)
+  LIB_XRT  := xrt_core
+endif
+
+########################## Setting up Host Variables ##########################
+
+#Inclue Required Host Source Files
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/jpegDec_sc/host/test_decoder.cpp $(XFLIB_DIR)/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.cpp 
+CXXFLAGS +=  -D USE_HBM
+CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/jpegDec -I $(XFLIB_DIR)/L2/demos/jpegDec_sc/host -I $(XFLIB_DIR)/L2/demos/jpegDec_sc/kernel -I $(XFLIB_DIR)/../utils/L1/include
+CXXFLAGS += -O3 -D KERNEL0 -B/usr/lib/x86_64-linux-gnu
+
+else ifneq (,$(shell echo $(XPLATFORM) | awk '/u280/'))
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/jpegDec_sc/host/test_decoder.cpp $(XFLIB_DIR)/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.cpp 
+CXXFLAGS +=  -D USE_HBM
+CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/jpegDec -I $(XFLIB_DIR)/L2/demos/jpegDec_sc/host -I $(XFLIB_DIR)/L2/demos/jpegDec_sc/kernel -I $(XFLIB_DIR)/../utils/L1/include
+CXXFLAGS += -O3 -D KERNEL0 -B/usr/lib/x86_64-linux-gnu
+
+else 
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/jpegDec_sc/host/test_decoder.cpp $(XFLIB_DIR)/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.cpp 
+CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/jpegDec -I $(XFLIB_DIR)/L2/demos/jpegDec_sc/host -I $(XFLIB_DIR)/L2/demos/jpegDec_sc/kernel -I $(XFLIB_DIR)/../utils/L1/include
+CXXFLAGS += -O3 -D KERNEL0 -B/usr/lib/x86_64-linux-gnu
+
+endif
+EXE_NAME := host.exe
+EXE_OBJS := $(addprefix $(TEMP_DIR)/, $(addsuffix .o,$(basename $(HOST_SRCS))))
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(EXE_OBJS) 
+MAKEDEPEND = $(CXX) $< -MM -MP -MF $(basename $@).d -MT $@  $(CXXFLAGS)
+
+HOST_ARGS :=  -JPEGFile $(XFLIB_DIR)/L2/demos/jpegDec/images/t0.jpg
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+LIBRARY_PATH := $(LD_LIBRARY_PATH):$(XILINX_XRT)/lib
+
+########################## Kernel compiler global settings ##########################
+VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/jpegDec -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/demos/jpegDec_sc/kernel
+
+######################### binary container global settings ##########################
+VPP_FLAGS_kernelJpegDecoder +=  -D KERNEL0
+VPP_FLAGS_kernelJpegDecoder += --hls.clock 300000000:kernelJpegDecoder
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_JDK += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_JDK += --kernel_frequency 300
+endif
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS_TMP := $(BUILD_DIR)/$(TARGET).o
+BINARY_CONTAINERS := $(BUILD_DIR)/$(TARGET).xclbin
+ifeq ($(TARGET),sw_emu)
+  BINARY_CONTAINERS_TMP :=
+endif
+else
+# placeholder for non_x86
+endif
+
+.SECONDEXPANSION:
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+ACC_SRCS_kernelJpegDecoder += $(XFLIB_DIR)/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.cpp
+ACC_OBJS_kernelJpegDecoder  := $(addprefix $(TEMP_DIR)/, $(addsuffix .o,$(basename $(ACC_SRCS_kernelJpegDecoder))))
+$(ACC_OBJS_kernelJpegDecoder): $(TEMP_DIR)/%.o : %.cpp $$(@D)/.f
+	@echo "--> Making $@ from: $?"
+	$(MAKEDEPEND)
+	$(VPP) $(VPP_FLAGS) $(VPP_FLAGS_kernelJpegDecoder) -o $@  -c $<
+BINARY_CONTAINERS_DEPS  += $(ACC_OBJS_kernelJpegDecoder) 
+$(BINARY_CONTAINERS_TMP) : $(BINARY_CONTAINERS_DEPS)
+	@echo "--> Making $@ from: $?"
+	$(VPP) $(VPP_FLAGS) $(VPP_LDFLAGS) $(VPP_LDFLAGS_JDK) -o $(BINARY_CONTAINERS) -l $^
+EXE_FILE_DEPS += $(BINARY_CONTAINERS_TMP)
+EXE_FILE_DEPS += $(BINARY_CONTAINERS_DEPS)
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(TEMP_DIR)/%.o : %.cpp $$(@D)/.f
+	@echo "--> Making $@ from: $?"
+	mkdir -p $(BUILD_DIR)
+	$(MAKEDEPEND)
+	$(CXX) -o $@ $(CXXFLAGS)  -I . -c $<
+$(EXE_FILE): $(EXE_FILE_DEPS)  
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+else
+# place holder for arch64
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+
+%/.f:
+	mkdir -p $(dir $@)
+	touch $@
+
+.PRECIOUS: %/.f
+
+RUN_DEPS += $(EXE_FILE) $(EMCONFIG)
+
+run: check_device  $(RUN_DEPS)
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(HOST_PREAMBLE) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+# place holder for arch64
+endif
+endif
+
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(HOST_PREAMBLE) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+# place holder for arch64
+endif
+endif
+
+#hw
+ifeq ($(TARGET), hw)
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(HOST_PREAMBLE) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+# place holder for arch64
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: all clean cleanall emconfig
+emconfig: $(EMCONFIG)
+ifeq ($(HOST_ARCH), x86)
+all:  check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
+else
+all:  check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
+endif
+
+.PHONY: host xclbin
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+xclbin: $(BINARY_CONTAINERS_TMP)
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
+	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
+	-$(RMDIR) _x_temp.* 
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+
+clean: cleanh
\ No newline at end of file
diff --git a/codec/L2/demos/jpegDec_sc/README.md b/codec/L2/demos/jpegDec_sc/README.md
new file mode 100644
index 0000000000..fb0ea4adc5
--- /dev/null
+++ b/codec/L2/demos/jpegDec_sc/README.md
@@ -0,0 +1,123 @@
+JPEG Decoder System complier demo
+============
+
+Jpeg Decoder example resides in ``L2/demos/jpegDec_sc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running jpeg decoder system complier demo.
+
+Executable Usage
+----------------
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in [here](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#building). For getting the design,
+
+```
+   cd L2/demos/jpegDec_sc
+```
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+```
+   make run TARGET=hw DEVICE=xilinx_u250_xdma_201830_2
+```   
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+```
+   ./build_dir.hw.xilinx_u250_xdma_201830_2/host.exe -xclbin build_dir.hw.xilinx_u250_xdma_201830_2/jpegDecoder.xclbin -JPEGFile android.jpg
+```   
+
+JPEG Decoder Input Arguments:
+
+```
+   Usage: host.exe -[-xclbin -dataSetDir -refDir]
+          -xclbin:    the kernel name
+          -JPEGFile:  the path point to input *.jpg
+```          
+
+Note: Default arguments are set in Makefile, you can use other [pictures](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#pictures) listed in the table.
+
+* **Example output(Step 4)** 
+
+```
+   Found Platform
+   Platform Name: Xilinx
+   INFO: Found Device=xilinx_u250_xdma_201830_2
+   INFO: Importing build_dir.hw.xilinx_u250_xdma_201830_2/jpegDecoder.xclbin
+   Loading: 'build_dir.hw.xilinx_u250_xdma_201830_2/jpegDecoder.xclbin'
+   INFO: Kernel has been created
+   INFO: Finish kernel setup
+   ...
+
+   INFO: Finish kernel execution
+   INFO: Finish E2E execution
+   INFO: Data transfer from host to device: 40 us
+   INFO: Data transfer from device to host: 6 us
+   INFO: Average kernel execution per run: 988 us
+   ...
+
+   INFO: android.yuv will be generated from the jpeg decoder's output   oINFO: android.yuv is generated correctly
+   INFO: android.yuv is generated correctly
+```   
+
+Profiling
+---------
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+##### Table 1 IP resources for jpegDecoder with huffman decoder(L1 IP)
+
+|           IP          |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
+|-----------------------|----------|----------|----------|----------|---------|-----------------|
+|     huffman_decoder   |     5    |     0    |    12    |    6963  |   7344  |       286       |
+
+##### Table 2 IP resources for jpegDecoder with jfif parser and huffman decoder(L1 IP)
+
+|           IP          |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
+|-----------------------|----------|----------|----------|----------|---------|-----------------|
+| kernel_parser_decoder |     5    |     0    |    12    |    7615  |   8382  |       257       |
+
+##### Table 3 Hardware resources for jpegDecoder with jfif parser, huffman, iq and idct (L2 kernel)   
+
+|        Kernel         |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
+|-----------------------|----------|----------|----------|----------|---------|-----------------|
+|      jpegDecoder      |     7    |     0    |    39    |   12298  |  13417  |       257       |
+
+Result
+------
+
+To check the output yuv file, download https://sourceforge.net/projects/raw-yuvplayer/ . 
+Then upload the rebuild_image.yuv, set the right sample radio and custom size on the software, and check the yuv file.
+
+Table 1 : Jpeg Decoder profiling
+
+![Table 1 : Jpeg Decoder profiling](../../../docs/images/jpegDecoderpofile.png)
+
+##### Note      
+```      
+    | 1. MAX_DEC_PIX is for benchmark. If testcase image is larger than 20M, the value of MAX_DEC_PIX should be enlarged following the size of image.   
+    | 2. MAXCMP_BC is for benchmark. If testcase image is larger than 20M, the value of MAXCMP_BC should be enlarged following the size of image.   
+```
+
+## License
+
+Licensed using the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
+
+    Copyright 2022 Xilinx, Inc.
+    
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+    Copyright 2022 Xilinx, Inc.
diff --git a/codec/L2/demos/jpegDec_sc/description.json b/codec/L2/demos/jpegDec_sc/description.json
new file mode 100644
index 0000000000..364603bc7a
--- /dev/null
+++ b/codec/L2/demos/jpegDec_sc/description.json
@@ -0,0 +1,133 @@
+{
+    "gui": false,
+    "name": "Xilinx JPEG Decoder (SC) Test", 
+    "description": "This example is a SystemCompiler example for decoder supports the 'Sequential DCT-based mode' of ISO/IEC 10918-1 standard. It is a high-performance implementation based-on Xilinx HLS design methodolygy. It can process 1 Huffman token and create up to 8 DCT coeffiects within one cycle. It is also an easy-to-use decoder as it can direct parser the JPEG file header without help of software functions", 
+    "flow": "vitis", 
+    "platform_allowlist": [
+        "u50",
+        "u280"
+    ], 
+    "platform_blocklist": [
+        "zc"
+    ],
+    "platform_properties": {
+        "u50": {
+	    "host": {
+	        "compiler": {
+		    "symbols": [
+		        "USE_HBM"
+		    ]
+		}
+	    }
+	},
+        "u280": {
+	    "host": {
+	        "compiler": {
+		    "symbols": [
+		        "USE_HBM"
+		    ]
+		}
+	    }
+	}
+    },
+    "launch": [
+        {
+            "cmd_args": " -JPEGFile LIB_DIR/L2/demos/jpegDec/images/t0.jpg", 
+            "name": "generic launch for all flows",
+            "ld_library_path": [
+                          "$(LD_LIBRARY_PATH)",
+                          "$(XILINX_XRT)/lib"
+                               ] 
+        }
+    ], 
+    "host": {
+        "host_exe": "host.exe", 
+        "compiler": {
+            "sources": [
+                "LIB_DIR/L2/demos/jpegDec_sc/host/test_decoder.cpp", 
+                "LIB_DIR/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.cpp"
+            ], 
+            "includepaths": [
+                "LIB_DIR/L2/include/hw/jpegDec", 
+                "LIB_DIR/L2/demos/jpegDec_sc/host", 
+                "LIB_DIR/L2/demos/jpegDec_sc/kernel", 
+                "LIB_DIR/../utils/L1/include"
+            ], 
+            "options": "-O3 -D KERNEL0 -B/usr/lib/x86_64-linux-gnu"
+        }
+    }, 
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/L2/include/hw/jpegDec", 
+                "LIB_DIR/../utils/L1/include", 
+                "LIB_DIR/L2/demos/jpegDec_sc/kernel"
+            ]
+        }
+    }, 
+    "containers": [
+        {
+            "accelerators": [
+                {
+                    "location": "LIB_DIR/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.cpp", 
+                    "frequency": 300.0, 
+                    "clflags": " -D KERNEL0", 
+                    "name": "kernelJpegDecoder",
+		    "num_compute_units": 1,
+                    "compute_units": [
+                        {
+                            "name": "JDK",
+                            "slr": "SLR0",
+                            "arguments": [
+                                {
+                                    "name": "jpeg_pointer",
+                                    "memory": "HBM[0]"
+                                },
+				{
+                                    "name": "yuv_mcu_pointer",
+                                    "memory": "HBM[1]"
+                                },
+				{
+                                    "name": "infos",
+                                    "memory": "HBM[2]"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ], 
+            "frequency": 300.0, 
+            "name": "JDK"
+        }
+    ], 
+    "testinfo": {
+        "disable": false, 
+        "jobs": [
+            {
+                "index": 0, 
+                "dependency": [], 
+                "env": "", 
+                "cmd": "", 
+                "max_memory_MB": {
+                    "vitis_hw_build": 40960, 
+                    "vitis_hw_emu": 40960, 
+                    "vitis_sw_emu": 10240, 
+                    "vitis_hw_run": 10240
+                }, 
+                "max_time_min": {
+                    "vitis_hw_build": 1600, 
+                    "vitis_hw_emu": 400, 
+                    "vitis_sw_emu": 120, 
+                    "vitis_hw_run": 10
+                }
+            }
+        ], 
+        "targets": [
+            "vitis_sw_emu", 
+            "vitis_hw_emu", 
+            "vitis_hw_build", 
+            "vitis_hw_run"
+        ], 
+        "category": "canary"
+    }
+}
diff --git a/codec/L2/demos/jpegDec_sc/host/test_decoder.cpp b/codec/L2/demos/jpegDec_sc/host/test_decoder.cpp
new file mode 100644
index 0000000000..254f73e666
--- /dev/null
+++ b/codec/L2/demos/jpegDec_sc/host/test_decoder.cpp
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//#define _HLS_TEST_ 1
+
+#ifndef _HLS_TEST_
+//#include "xcl2.hpp"
+#endif
+
+#include "kernelJpegDecoder.hpp"
+#include "utils_XAcc_jpeg.hpp"
+#include "xf_utils_sw/logger.hpp"
+
+#include "utils.hpp"
+
+#ifndef __SYNTHESIS__
+
+// ------------------------------------------------------------
+// for tmp application and reorder
+int16_t* hls_block = (int16_t*)malloc(sizeof(int16_t) * MAX_NUM_COLOR * MAXCMP_BC * 64);
+xf::codec::idct_out_t* yuv_row_pointer = (uint8_t*)malloc(sizeof(uint8_t) * MAX_NUM_COLOR * MAXCMP_BC * 64);
+
+// ------------------------------------------------------------
+// input strm_iDCT_x8[8] is the row of block yuv in mcu order of sample
+// output image_height*image_width*Y ... image_height*image_width*U ... image_height*image_width*V 0a to form a file to
+// show the picture
+void rebuild_raw_yuv(std::string file_name,
+                     xf::codec::bas_info* bas_info,
+                     int hls_bc[MAX_NUM_COLOR],
+                     // hls::stream<xf::codec::idct_out_t >   strm_iDCT_x8[8],
+                     ap_uint<64>* yuv_mcu_pointer) {
+    std::string file = file_name.substr(file_name.find_last_of('/') + 1);
+    std::string fn = file.substr(0, file.find_last_of(".")) + ".raw";
+    FILE* f = fopen(fn.c_str(), "wb");
+    std::cout << "WARNING: " << fn << " will be opened for binary write." << std::endl;
+    if (!f) {
+        std::cerr << "ERROR: " << fn << " cannot be opened for binary write." << std::endl;
+    }
+
+    xf::codec::idct_out_t* yuv_mcu_pointer_pix = (uint8_t*)malloc(sizeof(uint8_t) * bas_info->all_blocks * 64);
+
+    int cnt = 0;
+    int cnt_row = 0;
+    for (int b = 0; b < (int)(bas_info->all_blocks); b++) {
+        for (int i = 0; i < 8; i++) { // write one block of Y or U or V
+            for (int j = 0; j < 8; j++) {
+                yuv_mcu_pointer_pix[cnt] = yuv_mcu_pointer[cnt_row](8 * (j + 1) - 1, 8 * j); // strm_iDCT_x8[j].read();
+                cnt++;
+            }
+            cnt_row++;
+        }
+    }
+
+write_mcu_raw_data:
+    fwrite(yuv_mcu_pointer, sizeof(char), bas_info->all_blocks * 64, f);
+
+    // fwrite(&end_file, 1, 1, f);//write 0x0a
+    fclose(f);
+
+    file = file_name.substr(file_name.find_last_of('/') + 1);
+    fn = file.substr(0, file.find_last_of(".")) + ".yuv";
+    f = fopen(fn.c_str(), "wb");
+    std::cout << "WARNING: " << fn << " will be opened for binary write." << std::endl;
+    if (!f) {
+        std::cerr << "ERROR: " << fn << " cannot be opened for binary write." << std::endl;
+    }
+
+    xf::codec::COLOR_FORMAT fmt = bas_info->format;
+
+    int dpos[MAX_NUM_COLOR]; // the dc position of the pointer
+    for (int cmp = 0; cmp < MAX_NUM_COLOR; cmp++) {
+        dpos[cmp] = 0;
+    }
+
+    uint16_t block_width = bas_info->axi_width[0];
+    int n_mcu = 0;
+
+    printf("INFO: fmt %d, bas_info->mcu_cmp = %d \n", fmt, (int)(bas_info->mcu_cmp));
+    printf("INFO: bas_info->hls_mbs[cmp] %d, %d, %d \n", bas_info->hls_mbs[0], bas_info->hls_mbs[1],
+           bas_info->hls_mbs[2]);
+
+LOOP_write_yuv_buffer:
+    while (n_mcu < (int)(bas_info->hls_mcuc)) {
+        for (int cmp = 0; cmp < MAX_NUM_COLOR; cmp++) {              // 0,1,2
+            for (int mbs = 0; mbs < bas_info->hls_mbs[cmp]; mbs++) { // 0,1,2,3, 0, 0,
+
+                for (int i = 0; i < 8; i++) { // write one block of Y or U or V
+                    for (int j = 0; j < 8; j++) {
+                        yuv_row_pointer[(cmp)*bas_info->axi_height[0] * bas_info->axi_width[0] * 64 + (dpos[cmp]) * 8 +
+                                        j * bas_info->axi_width[cmp] * 8 + i] = *yuv_mcu_pointer_pix;
+                        yuv_mcu_pointer_pix++;
+                    }
+                } // end block
+
+                if (fmt == xf::codec::C420) { // 420 mbs= 0 1 2 3 0 0
+
+                    if (mbs == 0) {
+                        if (cmp != 0 && (dpos[cmp] % bas_info->axi_width[1] == bas_info->axi_width[1] - 1)) {
+                            dpos[cmp] += 1 + bas_info->axi_width[1] * (8 - 1);
+                        } else {
+                            dpos[cmp] += 1;
+                        }
+                    } else if (mbs == 1) {
+                        dpos[cmp] += block_width * 8 - 1;
+                    } else if (mbs == 2) {
+                        dpos[cmp] += 1;
+                    } else {
+                        if (dpos[cmp] % (block_width * (8) * 2) == (8 + 1) * block_width - 1) {
+                            dpos[cmp] += 1 + block_width * (8 - 1);
+                        } else {
+                            dpos[cmp] -= block_width * 8 - 1;
+                        }
+                    }
+                } else if (fmt == xf::codec::C422) { // 422 mbs 0 1 0 0
+                    if (mbs == 0) {
+                        if (cmp != 0 && (dpos[cmp] % bas_info->axi_width[1] == bas_info->axi_width[1] - 1)) {
+                            dpos[cmp] += 1 + bas_info->axi_width[1] * (8 - 1);
+                        } else {
+                            dpos[cmp] += 1;
+                        }
+                    } else { // cmp=0, mbs=1
+                        if (dpos[cmp] % (block_width) == block_width - 1) {
+                            dpos[cmp] += 1 + block_width * (8 - 1);
+                        } else {
+                            dpos[cmp] += 1;
+                        }
+                    }
+                } else {
+                    if (dpos[cmp] % block_width == block_width - 1) {
+                        dpos[cmp] += 1 + block_width * (8 - 1);
+                    } else {
+                        dpos[cmp] += 1;
+                    }
+                }
+            }
+        } // end one mcu
+        n_mcu++;
+    }
+
+    for (int i = 0; i < 16; i++) {
+        for (int j = 0; j < 8; j++) {
+            printf("%02X, ", (uint8_t)(yuv_row_pointer[8 * i + j]));
+        }
+        printf("\n");
+    }
+
+    for (int i = 0; i < 16; i++) {
+        for (int j = 0; j < 8; j++) {
+            printf("%d, ", (uint8_t)(yuv_row_pointer[8 * i + j]));
+        }
+        printf("\n");
+    }
+
+LOOP_write_y:
+    fwrite(yuv_row_pointer, sizeof(char), bas_info->axi_height[0] * bas_info->axi_width[0] * 64, f);
+LOOP_write_u:
+    fwrite(yuv_row_pointer + bas_info->axi_height[0] * bas_info->axi_width[0] * 64, sizeof(char),
+           bas_info->axi_height[1] * bas_info->axi_width[1] * 64, f);
+LOOP_write_v:
+    fwrite(yuv_row_pointer + bas_info->axi_height[0] * bas_info->axi_width[0] * 128, sizeof(char),
+           bas_info->axi_height[2] * bas_info->axi_width[2] * 64, f);
+
+    // fwrite(&end_file, 1, 1, f);//write 0x0a
+    fclose(f);
+
+    printf("Please open the YUV file with fmt %d and (width, height) = (%d, %d) \n", fmt, bas_info->axi_width[0] * 8,
+           bas_info->axi_height[0] * 8);
+
+    // write yuv info to a file
+    fn = file.substr(0, file.find_last_of(".")) + ".yuv.h";
+    f = fopen(fn.c_str(), "aw");
+    std::cout << "WARNING: " << fn << " will be opened for binary write." << std::endl;
+    if (!f) {
+        std::cerr << "ERROR: " << fn << " cannot be opened for binary write." << std::endl;
+    }
+    fprintf(f, "INFO: fmt=%d, bas_info->mcu_cmp=%d\n", fmt, (int)(bas_info->mcu_cmp));
+    fprintf(f, "INFO: bas_info->hls_mbs[cmp] %d, %d, %d \n", bas_info->hls_mbs[0], bas_info->hls_mbs[1],
+            bas_info->hls_mbs[2]);
+    fprintf(f, "Please open the YUV file with fmt %d and (width, height) = (%d, %d) \n", fmt,
+            bas_info->axi_width[0] * 8, bas_info->axi_height[0] * 8);
+    fclose(f);
+}
+
+// ------------------------------------------------------------
+void rebuild_infos(xf::codec::img_info& img_info,
+                   xf::codec::cmp_info cmp_info[MAX_NUM_COLOR],
+                   xf::codec::bas_info& bas_info,
+                   int& rtn,
+                   int& rtn2,
+                   ap_uint<32> infos[1024]) {
+    img_info.hls_cs_cmpc = *(infos + 0);
+    img_info.hls_mcuc = *(infos + 1);
+    img_info.hls_mcuh = *(infos + 2);
+    img_info.hls_mcuv = *(infos + 3);
+    rtn = *(infos + 4);
+    rtn2 = *(infos + 5);
+
+    bas_info.all_blocks = *(infos + 10);
+    for (int i = 0; i < MAX_NUM_COLOR; i++) {
+        bas_info.axi_height[i] = *(infos + 11 + i);
+    }
+    for (int i = 0; i < 4; i++) {
+        bas_info.axi_map_row2cmp[i] = *(infos + 14 + i);
+    }
+    bas_info.axi_mcuv = *(infos + 18);
+    bas_info.axi_num_cmp = *(infos + 19);
+    bas_info.axi_num_cmp_mcu = *(infos + 20);
+    for (int i = 0; i < MAX_NUM_COLOR; i++) {
+        bas_info.axi_width[i] = *(infos + 21 + i);
+    }
+    int format = *(infos + 24);
+    bas_info.format = (xf::codec::COLOR_FORMAT)format;
+    for (int i = 0; i < MAX_NUM_COLOR; i++) {
+        bas_info.hls_mbs[i] = *(infos + 25 + i);
+    }
+    bas_info.hls_mcuc = *(infos + 28);
+    for (int c = 0; c < MAX_NUM_COLOR; c++) {
+        for (int i = 0; i < 8; i++) {
+            for (int j = 0; j < 8; j++) {
+                bas_info.idct_q_table_x[c][i][j] = *(infos + 29 + c * 64 + i * 8 + j);
+            }
+        }
+    }
+    for (int c = 0; c < MAX_NUM_COLOR; c++) {
+        for (int i = 0; i < 8; i++) {
+            for (int j = 0; j < 8; j++) {
+                bas_info.idct_q_table_y[c][i][j] = *(infos + 221 + c * 64 + i * 8 + j);
+            }
+        }
+    }
+    bas_info.mcu_cmp = *(infos + 413);
+    for (int c = 0; c < MAX_NUM_COLOR; c++) {
+        for (int i = 0; i < 64; i++) {
+            bas_info.min_nois_thld_x[c][i] = *(infos + 414 + c * 64 + i);
+        }
+    }
+    for (int c = 0; c < MAX_NUM_COLOR; c++) {
+        for (int i = 0; i < 64; i++) {
+            bas_info.min_nois_thld_y[c][i] = *(infos + 606 + c * 64 + i);
+        }
+    }
+    for (int c = 0; c < MAX_NUM_COLOR; c++) {
+        for (int i = 0; i < 8; i++) {
+            for (int j = 0; j < 8; j++) {
+                bas_info.q_tables[c][i][j] = *(infos + 798 + c * 64 + i * 8 + j);
+            }
+        }
+    }
+    for (int c = 0; c < MAX_NUM_COLOR; c++) {
+        cmp_info[c].bc = *(infos + 990 + c * 6);
+        cmp_info[c].bch = *(infos + 991 + c * 6);
+        cmp_info[c].bcv = *(infos + 992 + c * 6);
+        cmp_info[c].mbs = *(infos + 993 + c * 6);
+        cmp_info[c].sfh = *(infos + 994 + c * 6);
+        cmp_info[c].sfv = *(infos + 995 + c * 6);
+    }
+
+    printf("test INFO:  bas_info->mcu_cmp = %d \n", (int)(bas_info.mcu_cmp));
+    printf("test INFO: bas_info->hls_mbs[cmp] %d, %d, %d \n", bas_info.hls_mbs[0], bas_info.hls_mbs[1],
+           bas_info.hls_mbs[2]);
+}
+
+// ------------------------------------------------------------
+
+int main(int argc, const char* argv[]) {
+    std::cout << "\n------------ Test for decode image.jpg  -------------\n";
+    std::string optValue;
+    std::string JPEGFile;
+    std::string xclbin_path;
+
+    // cmd arg parser.
+    ArgParser parser(argc, argv);
+
+    // Read In paths addresses
+    if (parser.getCmdOption("-JPEGFile", optValue)) {
+        JPEGFile = optValue;
+        std::cout << "COMMOND: host.exe -JPEGFile " << optValue << std::endl;
+    } else {
+        std::cout << "INFO: JPEG file not specified for this test. use "
+                     "'-JPEGFile' to specified it. \n";
+    }
+
+    ///// declaration
+
+    // load data to simulate the ddr data
+    // size of jpeg_pointer, output of yuv_mcu_pointer, and output image infos
+    int size;
+    uint8_t* jpeg_pointer;
+#ifndef _HLS_TEST_
+    ap_uint<64>* yuv_mcu_pointer = aligned_alloc<ap_uint<64> >(sizeof(ap_uint<64>) * MAXCMP_BC * 8);
+    ap_uint<32>* infos = aligned_alloc<ap_uint<32> >(sizeof(ap_uint<32>) * 1024);
+#else
+    ap_uint<64>* yuv_mcu_pointer = (ap_uint<64>*)malloc(sizeof(ap_uint<64>) * MAXCMP_BC * 8);
+    ap_uint<32>* infos = (ap_uint<32>*)malloc(sizeof(ap_uint<32>) * 1024);
+#endif
+    int err = load_dat(jpeg_pointer, JPEGFile, size);
+    if (err) {
+        printf("Alloc buf failed!, size:%d Bytes\n", size);
+        return err;
+    } else {
+        printf("Alloc buf successfully!, size:%d Bytes\n", size);
+    }
+
+    // Variables to measure time
+
+    // To test SYNTHESIS top
+    hls::stream<ap_uint<24> > block_strm;
+    xf::codec::cmp_info cmp_info[MAX_NUM_COLOR];
+    xf::codec::img_info img_info;
+    xf::codec::bas_info bas_info;
+    img_info.hls_cs_cmpc = 0; // init
+
+    // 0: decode jfif successful
+    // 1: marker in jfif is not in expectation
+    int rtn = 0;
+
+    // 0: decode huffman successful
+    // 1: huffman data is not in expectation
+    int rtn2 = false;
+
+#ifdef _HLS_TEST_
+    uint32_t hls_mcuc;
+    uint16_t hls_mcuh;
+    uint16_t hls_mcuv;
+    uint8_t hls_cs_cmpc;
+    hls::stream<ap_uint<5> > idx_coef;
+    hls::stream<xf::codec::idct_out_t> strm_iDCT_x8[8];
+
+    // L2 top
+    kernelJpegDecoder((ap_uint<CH_W>*)jpeg_pointer, (int)size,
+                      //&img_info, cmp_info, &bas_info,
+                      yuv_mcu_pointer, infos);
+    // strm_iDCT_x8);//idx_coef,
+
+    rebuild_infos(img_info, cmp_info, bas_info, rtn, rtn2, infos);
+    // one shoot test for the IDCT
+    printf("INFO: bas_info.q_tables are : \n");
+    for (int id = 0; id < 2; id++) {
+        for (int i = 0; i < 8; i++) {
+            for (int j = 0; j < 8; j++) {
+                printf("%d, ", (int)(bas_info.q_tables[id][i][j]));
+            }
+            printf("\n");
+        }
+    }
+#else
+    xf::common::utils_sw::Logger logger(std::cout, std::cerr);
+
+    // send task requests
+    auto jpeg_pointer_pool = jpegDec_acc::create_bufpool(vpp::input);
+    auto yuv_mcu_pointer_pool = jpegDec_acc::create_bufpool(vpp::output);
+    auto infos_pool = jpegDec_acc::create_bufpool(vpp::output);
+
+    jpegDec_acc::send_while([&]() -> bool {
+        uint8_t* acc_jpeg_pointer = (uint8_t*)jpegDec_acc::alloc_buf(jpeg_pointer_pool, sizeof(uint8_t) * size);
+        ap_uint<64>* acc_yuv_mcu_pointer =
+            (ap_uint<64>*)jpegDec_acc::alloc_buf(yuv_mcu_pointer_pool, sizeof(ap_uint<64>) * MAXCMP_BC * 8);
+        ap_uint<32>* acc_infos = (ap_uint<32>*)jpegDec_acc::alloc_buf(infos_pool, sizeof(ap_uint<32>) * 1024);
+
+        memcpy(acc_jpeg_pointer, jpeg_pointer, sizeof(uint8_t) * size);
+
+        jpegDec_acc::compute((ap_uint<CH_W>*)acc_jpeg_pointer, size, acc_yuv_mcu_pointer, acc_infos);
+
+        return 0;
+    });
+
+    // send result receiving requests
+    jpegDec_acc::receive_all_in_order([&]() {
+        ap_uint<64>* acc_yuv_mcu_pointer = (ap_uint<64>*)jpegDec_acc::get_buf(yuv_mcu_pointer_pool);
+        ap_uint<32>* acc_infos = (ap_uint<32>*)jpegDec_acc::get_buf(infos_pool);
+
+        memcpy(yuv_mcu_pointer, acc_yuv_mcu_pointer, sizeof(ap_uint<64>) * MAXCMP_BC * 8);
+        memcpy(infos, acc_infos, sizeof(ap_uint<32>) * 1024);
+
+        rebuild_infos(img_info, cmp_info, bas_info, rtn, rtn2, acc_infos);
+
+    });
+
+    struct timeval start_time, end_time;
+    gettimeofday(&start_time, 0);
+    jpegDec_acc::join();
+    gettimeofday(&end_time, 0);
+
+    std::cout << "INFO: Finish kernel execution" << std::endl;
+    std::cout << "INFO: Finish E2E execution" << std::endl;
+    std::cout << "-------------------------------------------------------" << std::endl;
+    unsigned long exec_timeE2E = diff(&end_time, &start_time);
+    std::cout << "INFO: Average E2E per run: " << exec_timeE2E << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+
+    rebuild_infos(img_info, cmp_info, bas_info, rtn, rtn2, infos);
+#endif
+    // for image info
+    int hls_bc[MAX_NUM_COLOR];
+    for (int i = 0; i < MAX_NUM_COLOR; i++) {
+        hls_bc[i] = cmp_info[i].bc;
+    }
+
+    // todo merge to syn-code
+
+    if (rtn || rtn2) {
+        printf("Warning: Decoding the bad case input file!\n");
+        if (rtn == 1) {
+            printf("Warning: [code 1] marker in jfif is not in expectation!\n");
+        } else if (rtn == 2) {
+            printf("ERROR: [code 2] huffman table is not in expectation!\n");
+        } else {
+            if (rtn2) {
+                printf("Warning: [code 3] huffman data is not in expectation!\n");
+            }
+        }
+        return 1;
+#ifndef _HLS_TEST_
+        logger.error(xf::common::utils_sw::Logger::Message::TEST_FAIL);
+    } else {
+        logger.info(xf::common::utils_sw::Logger::Message::TEST_PASS);
+#endif
+    }
+
+    printf("INFO: writing the YUV file!\n");
+    rebuild_raw_yuv(JPEGFile, &bas_info, hls_bc, yuv_mcu_pointer);
+
+    free(jpeg_pointer);
+    free(hls_block);
+    free(infos);
+    free(yuv_row_pointer);
+
+    std::cout << "Ready for next image!\n ";
+
+    return 0;
+}
+#endif
+
+// ************************************************************
diff --git a/codec/L2/demos/jpegDec_sc/host/utils.hpp b/codec/L2/demos/jpegDec_sc/host/utils.hpp
new file mode 100644
index 0000000000..5c35306056
--- /dev/null
+++ b/codec/L2/demos/jpegDec_sc/host/utils.hpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef UTILS_H
+#define UTILS_H
+#include <sys/time.h>
+#include <new>
+#include <cstdlib>
+#include <algorithm>
+#include <string>
+#include <vector>
+// ------------------------------------------------------------
+
+#if __linux
+template <typename T>
+T* aligned_alloc(std::size_t num) {
+    void* ptr = nullptr;
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) {
+        throw std::bad_alloc();
+    }
+    return reinterpret_cast<T*>(ptr);
+}
+#endif
+
+// ------------------------------------------------------------
+// Compute time difference
+unsigned long diff(const struct timeval* newTime, const struct timeval* oldTime) {
+    return (newTime->tv_sec - oldTime->tv_sec) * 1000000 + (newTime->tv_usec - oldTime->tv_usec);
+}
+
+// ------------------------------------------------------------
+// load the data file (.txt, .bin, .jpg ...)to ptr
+template <typename T>
+int load_dat(T*& data, const std::string& name, int& size) {
+    uint64_t n;
+    std::string fn = name;
+    FILE* f = fopen(fn.c_str(), "rb");
+    std::cout << "WARNING: " << fn << " will be opened for binary read." << std::endl;
+    if (!f) {
+        std::cerr << "ERROR: " << fn << " cannot be opened for binary read." << std::endl;
+        return -1;
+    }
+
+    fseek(f, 0, SEEK_END);
+    n = (uint64_t)ftell(f);
+    if (n > MAX_DEC_PIX) {
+        std::cout << " read n bytes > MAX_DEC_PIX, please set a larger MAX_DEC_PIX " << std::endl;
+        return 1;
+    }
+#if __linux
+    data = aligned_alloc<T>(n);
+#else
+    data = (T*)malloc(MAX_DEC_PIX);
+#endif
+    fseek(f, 0, SEEK_SET);
+    size = fread(data, sizeof(char), n, f);
+    fclose(f);
+    std::cout << n << " entries read from " << fn << std::endl;
+
+    return 0;
+}
+
+// ------------------------------------------------------------
+// get the arg
+class ArgParser {
+   public:
+    ArgParser(int& argc, const char* argv[]) {
+        for (int i = 1; i < argc; ++i) mTokens.push_back(std::string(argv[i]));
+    }
+    bool getCmdOption(const std::string option, std::string& value) const {
+        std::vector<std::string>::const_iterator itr;
+        itr = std::find(this->mTokens.begin(), this->mTokens.end(), option);
+        if (itr != this->mTokens.end() && ++itr != this->mTokens.end()) {
+            value = *itr;
+            return true;
+        }
+        return false;
+    }
+    bool getCmdOption(const std::string option) const {
+        std::vector<std::string>::const_iterator itr;
+        itr = std::find(this->mTokens.begin(), this->mTokens.end(), option);
+        if (itr != this->mTokens.end())
+            return true;
+        else
+            return false;
+    }
+
+   private:
+    std::vector<std::string> mTokens;
+};
+
+#endif
\ No newline at end of file
diff --git a/codec/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.cpp b/codec/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.cpp
new file mode 100644
index 0000000000..6712266894
--- /dev/null
+++ b/codec/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file kernelJpegDecoder.cpp
+ * @brief kernelJpegDecoder template function implementation and kernel_decoder warpper.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#include "kernelJpegDecoder.hpp"
+
+void jpegDec_acc::compute(ap_uint<AXI_WIDTH>* jpeg_pointer,
+                          const int size,
+                          ap_uint<64>* yuv_mcu_pointer,
+                          ap_uint<32>* infos) {
+    JDK(jpeg_pointer, size, yuv_mcu_pointer, infos);
+}
+
+// ------------------------------------------------------------
+// @brief Level 2 : kernel for jfif parser + huffman decoder + iQ_iDCT, kernelJpegDecoder Abb to JDK
+// a.input the jpg 420/422/444 baseline file
+// b.output the as the 8x8 's Column scan order YUV (0~255), like [Y*allpixels,U*0.5*allpixels, V*0.5*allpixels], and
+// image infos
+// c.Fault tolerance: If the picture's format is incorrect, error codes will directly end the kernel
+// and wait for the input of the next image. Error codes cloud help to position at which decoding stage does the error
+// occur
+// d.performance: input throughput: 150MB/s~300MB/s(1symbol/clk), output 1~1.6GB/s (max 8B/clk),
+// frequency 250MHz for kernel, for only huffman core 286MHz by vivado 2018.3
+
+void jpegDec_acc::JDK(ap_uint<AXI_WIDTH>* jpeg_pointer,
+                      const int size,
+                      ap_uint<64>* yuv_mcu_pointer,
+                      ap_uint<32>* infos) {
+    // clang-format off
+	//const uint64_t max_pix = MAX_NUM_PIX;//for 8K*8K
+	const uint64_t max_pix = MAX_DEC_PIX;//for 800*800
+	const uint64_t max_yuv = MAXCMP_BC * 8;//blocknum * 8 rows
+	const uint64_t burst_lenth = BURST_LENTH;
+#pragma HLS INTERFACE m_axi port = jpeg_pointer     depth = 65000 offset = direct  bundle = gmem_in0 \
+					  latency = 64 num_read_outstanding = 32 max_read_burst_length = 32
+#pragma HLS INTERFACE m_axi port = yuv_mcu_pointer 	depth = 230400 offset = direct  bundle = gmem_in1 \
+					  latency = 64 num_write_outstanding = 32 max_write_burst_length = 32
+#pragma HLS INTERFACE m_axi port = infos 			depth = 1024   offset = direct  bundle = gmem_in2 \
+					  latency = 64 num_write_outstanding = 32 max_write_burst_length = 32
+//	#pragma HLS INTERFACE s_axilite port=jpeg_pointer      	bundle=control
+//	#pragma HLS INTERFACE s_axilite port=yuv_mcu_pointer    bundle=control
+//	#pragma HLS INTERFACE s_axilite port=size      			bundle=control
+//	#pragma HLS INTERFACE s_axilite port=infos    		    bundle=control
+//	#pragma HLS INTERFACE s_axilite port=return         	bundle=control
+
+	xf::codec::kernelJpegDecoderTop(jpeg_pointer, size, yuv_mcu_pointer, infos);
+}
diff --git a/codec/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.hpp b/codec/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.hpp
new file mode 100644
index 0000000000..06b5634549
--- /dev/null
+++ b/codec/L2/demos/jpegDec_sc/kernel/kernelJpegDecoder.hpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file kernelJpegDecoder.hpp
+ * @brief kernelJpegDecoder template function implementation and kernel_decoder warpper.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef _XF_CODEC_KERNEL_JPEG_DEC_SC_HPP_
+#define _XF_CODEC_KERNEL_JPEG_DEC_SC_HPP_
+
+#include "XAcc_jpegdecoder.hpp"
+#include "XAcc_jfifparser.hpp"
+#include "XAcc_idct.hpp"
+
+#include "vpp_acc.hpp"
+
+// ------------------------------------------------------------
+/**
+ * @brief Level 2 : kernel for jfif parser + huffman decoder + iQ_iDCT
+ *
+ * @tparam CH_W size of data path in dataflow region, in bit.
+ *         when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases.
+ *         when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource.
+ *
+ * @param jpeg_pointer the input jpeg to be read from DDR.
+ * @param size the total bytes to be read from DDR.
+ * @param yuv_mcu_pointer the output yuv to DDR in mcu order. 1 ap_uint<64> has 8 uint8_t pixels after idct.
+ * @param info information of the image, maybe use in the recovery image.
+ */
+// a.input the jpg 420/422/444 baseline file
+// b.output the as the 8x8 's Column scan order YUV (0~255), like [Y*allpixels,U*0.5*allpixels, V*0.5*allpixels], and
+// image infos
+// c.Fault tolerance: If the picture's format is incorrect, error codes will directly end the kernel
+// and wait for the input of the next image. Error codes cloud help to position at which decoding stage does the error
+// occur
+// d.performance: input throughput: 150MB/s~300MB/s(1symbol/clk), output 1~1.6GB/s (max 8B/clk),
+// frequency 250MHz for kernel, for only huffman core 286MHz by vivado 2018.3
+
+class jpegDec_acc : public VPP_ACC<jpegDec_acc, 1> {
+    // port bindings
+    ZERO_COPY(jpeg_pointer);
+    // ZERO_COPY(size);
+    ZERO_COPY(yuv_mcu_pointer);
+    ZERO_COPY(infos);
+
+    SYS_PORT(jpeg_pointer, DDR[0]);
+    SYS_PORT(yuv_mcu_pointer, DDR[0]);
+    SYS_PORT(infos, DDR[0]);
+
+    SYS_PORT_PFM(u50, jpeg_pointer, HBM[0]);
+    SYS_PORT_PFM(u50, yuv_mcu_pointer, HBM[1]);
+    SYS_PORT_PFM(u50, infos, HBM[2]);
+
+   public:
+    static void compute(ap_uint<AXI_WIDTH>* jpeg_pointer,
+                        const int size,
+                        ap_uint<64>* yuv_mcu_pointer,
+                        ap_uint<32>* infos);
+    static void JDK(ap_uint<AXI_WIDTH>* jpeg_pointer, const int size, ap_uint<64>* yuv_mcu_pointer, ap_uint<32>* infos);
+};
+
+#endif // _XF_CODEC_KERNEL_JPEG_DEC_SC_HPP_
\ No newline at end of file
diff --git a/codec/L2/demos/jpegDec_sc/utils.mk b/codec/L2/demos/jpegDec_sc/utils.mk
new file mode 100644
index 0000000000..1937b53d2b
--- /dev/null
+++ b/codec/L2/demos/jpegDec_sc/utils.mk
@@ -0,0 +1,239 @@
+#
+# Copyright 2019-2021 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# sc makefile-generator v1.0.0
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
+ifeq ($(HOST_ARCH), x86)
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#check binutils
+BINUTILS := $(shell ld -v | cut -f 4 -d " " | cut -f 1 -d "-")
+BINUTILS_REQ := $(BINUTILS_INTOOL)
+ifneq ($(shell expr $(BINUTILS) \>= $(BINUTILS_REQ)), 1)
+export PATH := $(XILINX_VIVADO)/tps/lnx64/binutils-$(BINUTILS_INTOOL)/bin:$(PATH)
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/demos/jxlEnc/README.md b/codec/L2/demos/jxlEnc/README.md
new file mode 100644
index 0000000000..74be710ecd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/README.md
@@ -0,0 +1,125 @@
+JXL Encoder
+===============
+
+JXL Encoder example resides in ``L2/demos/jxlEnc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Executable Usage
+----------------
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in [here](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#building). For getting the design,
+
+```
+   cd L2/demos/jxlEnc
+```
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+```
+   make run TARGET=hw DEVICE=xilinx_u50_gen3x16_xdma_201920_3
+```   
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+```
+  ./build_dir.hw.xilinx_u50_gen3x16_xdma_201920_3/host.exe --xclbin ./build_dir.hw.xilinx_u50_gen3x16_xdma_201920_3/jxlEnc.xclbin PNGFilePath JXLFilePath 
+```   
+
+JXL Encoder Input Arguments:
+
+```
+   Usage: host.exe -[-xclbin]
+          --xclbin:     the kernel name
+          PNGFilePath:  the path to the input *.PNG
+          JXLFilePath:  the path to the output *.jxl
+```          
+
+Note: Default arguments are set in Makefile, you can use other [pictures](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#pictures) listed in the table.
+
+* **Example output(Step 4)** 
+
+```
+   Found Platform
+   Platform Name: Xilinx
+   Info: Context created
+   Info: Command queue created
+   INFO: Found Device=xilinx_u50_gen3x16_xdma_201920_3
+   INFO: Importing build_dir.sw_emu.xilinx_u50_gen3x16_xdma_201920_3/jxlEnc.xclbin
+   Loading: 'build_dir.sw_emu.xilinx_u50_gen3x16_xdma_201920_3/jxlEnc.xclbin'
+   Info: Program created
+   Info: Kernel created
+   INFO: kernel has been created
+   INFO: Kernel Start
+   INFO: Finish kernel execution
+   INFO: Finish E2E execution
+   ...
+
+   INFO: Finish kernel execution
+   INFO: Finish E2E execution
+   INFO: Data transfer from host to device: 100 us
+   INFO: Data transfer from device to host: 20 us
+   INFO: kernel execution time: 600 ms
+```
+
+Profiling
+---------
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+
+##### Table 1 IP resources for JXL encoder 
+
+|      IP                |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   |
+|------------------------|----------|----------|----------|----------|---------|
+|    lossy_enc_compute   |    364   |    53    |    498   |   145111 |  121741 |
+|    cluster_histogram   |    70    |    28    |    51    |   60744  |  38507  |
+|    tokInit_histogram   |    150   |    41    |    95    |   64710  |  39289  |
+
+
+##### Table 2 JXL Encoder Performance
+  
+###### lossy_enc_compute 
+|       Image       |      Size     |  Time(ms)  |  Throughput(MP/s)  |
+|-------------------|---------------|------------|--------------------|
+|  lena_c_512.png   |    512x512    |    3.63    |        72.21       |     
+|  hq_1024x1024.png |   1024x1024   |    13.06   |        80.29       |    
+|  hq_2Kx2K.png     |   2048x2048   |    50.33   |        83.34       |  
+  
+###### cluster_histogram 
+|       Image       |      Size     |  Time(ms)  |  Throughput(MP/s)  |
+|-------------------|---------------|------------|--------------------|
+|  lena_c_512.png   |    512x512    |    4.6     |        56.98       |     
+|  hq_1024x1024.png |   1024x1024   |    14.6    |        71.82       |    
+|  hq_2Kx2K.png     |   2048x2048   |    41.13   |        101.97      |   
+  
+###### tokInit_histogram 
+|       Image       |      Size     |   Time(ms)  |  Throughput(MP/s)  |
+|-------------------|---------------|-------------|--------------------|
+|  lena_c_512.png   |    512x512    |    6.07     |        43.19       |     
+|  hq_1024x1024.png |   1024x1024   |    18.03    |        58.16       |    
+|  hq_2Kx2K.png     |   2048x2048   |    79.30    |        52.89       |   
+
+## License
+
+Licensed using the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
+
+    Copyright 2022 Xilinx, Inc.
+    
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+    Copyright 2022 Xilinx, Inc.
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/acc_cluster_histogram/Makefile b/codec/L2/demos/jxlEnc/acc_cluster_histogram/Makefile
new file mode 100644
index 0000000000..3d7f53ad6e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_cluster_histogram/Makefile
@@ -0,0 +1,331 @@
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to build xclbin application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
+	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_u50_gen3x16_xdma_5_202210_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u50
+PLATFORM_BLOCKLIST +=  zc
+
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# get global setting
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += -I$(CUR_DIR)/src/ -fmessage-length=0 --sysroot=$(SYSROOT)  -I$(SYSROOT)/usr/include/xrt -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(SYSROOT)/usr/lib -L$(XILINX_VITIS_AIETOOLS)/lib/aarch64.o -Wl,--as-needed -lxilinxopencl -lxrt_coreutil 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+########################## Setting up Host Variables ##########################
+ifeq ($(TARGET),sw_emu)
+CXXFLAGS += -D SW_EMU_TEST
+endif
+ifeq ($(TARGET),hw_emu)
+CXXFLAGS += -D HW_EMU_TEST
+endif
+
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
+#Inclue Required Host Source Files
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cjxl.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cmdline.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/codec_config.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/box/box.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/time.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_ac_strategy.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_adaptive_quantization.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_cache.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_group.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_chroma_from_luma.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_init_histogram.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_host.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase1.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase2.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase3.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/acc_cluster_histogram/host/host_cluster_histogram.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
+CXXFLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/ -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/build/lib/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lodepng -I $(XFLIB_DIR)/L2/demos/jxlEnc/acc_cluster_histogram/kernel -I $(XFLIB_DIR)/L2/demos/jxlEnc/acc_cluster_histogram/host -I $(XFLIB_DIR)/L2/demos/jxlEnc/others/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram
+CXXFLAGS += -O3 
+
+EXE_NAME := host.exe
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
+
+HOST_ARGS :=  --xclbin $(BUILD_DIR)/jxlEnc.xclbin $(XFLIB_DIR)/L2/demos/jxlEnc/images/t0.png t0.jxl
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u50.cfg
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/jxlEnc
+
+else 
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/jxlEnc
+
+endif
+
+######################### binary container global settings ##########################
+VPP_FLAGS_JxlEnc_ans_clusterHistogram +=  -D KERNEL_NAME=JxlEnc_ans_clusterHistogram
+VPP_FLAGS_JxlEnc_ans_clusterHistogram += --hls.clock 300000000:JxlEnc_ans_clusterHistogram
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_jxlEnc += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_jxlEnc += --kernel_frequency 300
+endif
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS += $(BUILD_DIR)/jxlEnc.xclbin
+else
+BINARY_CONTAINERS += $(BUILD_DIR)/jxlEnc_pkg.$(LINK_TARGET_FMT)
+BINARY_CONTAINERS_PKG += $(BUILD_DIR)/jxlEnc.xclbin
+endif
+
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+$(TEMP_DIR)/JxlEnc_ans_clusterHistogram.xo: $(XFLIB_DIR)/L2/demos/jxlEnc/acc_cluster_histogram/kernel/hls_cluster_histogram.cpp 
+	$(ECHO) "Compiling Kernel: JxlEnc_ans_clusterHistogram"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_JxlEnc_ans_clusterHistogram) $(VPP_FLAGS) -k JxlEnc_ans_clusterHistogram -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
+BINARY_CONTAINER_jxlEnc_OBJS += $(TEMP_DIR)/JxlEnc_ans_clusterHistogram.xo
+BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_jxlEnc_OBJS)
+$(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
+	mkdir -p $(BUILD_DIR)
+	$(VPP) -l $(VPP_FLAGS) --temp_dir $(TEMP_DIR) --report_dir $(BUILD_REPORT_DIR)/jxlEnc $(VPP_LDFLAGS)  $(VPP_LDFLAGS_jxlEnc) $(AIE_LDFLAGS)   -o $@ $^
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_xrt
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+else
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_sysroot
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+############################## Preparing sdcard folder ##############################
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE := $(SYSROOT)/../../uImage
+else
+K_IMAGE := $(SYSROOT)/../../Image
+endif
+RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
+$(RUN_SCRIPT):
+	rm -rf $(RUN_SCRIPT)
+	@echo 'export LD_LIBRARY_PATH=/mnt:/tmp:$(LIBRARY_PATH)' >> $(RUN_SCRIPT)
+ifneq ($(filter sw_emu hw_emu, $(TARGET)),)
+	@echo 'export XCL_EMULATION_MODE=$(TARGET)' >> $(RUN_SCRIPT)
+endif
+	@echo 'export XILINX_VITIS=/mnt' >> $(RUN_SCRIPT)
+	@echo 'export XILINX_XRT=/usr' >> $(RUN_SCRIPT)
+	@echo 'if [ -f platform_desc.txt  ]; then' >> $(RUN_SCRIPT)
+	@echo '        cp platform_desc.txt /etc/xocl.txt' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo './$(EXE_NAME) $(PKG_HOST_ARGS)' >> $(RUN_SCRIPT)
+	@echo 'return_code=$$?' >> $(RUN_SCRIPT)
+	@echo 'if [ $$return_code -ne 0 ]; then' >> $(RUN_SCRIPT)
+	@echo '        echo "ERROR: Embedded host run failed, RC=$$return_code"' >> $(RUN_SCRIPT)
+	@echo 'else' >> $(RUN_SCRIPT)
+	@echo '        echo "INFO: TEST PASSED, RC=0"' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo 'echo "INFO: Embedded host run completed."' >> $(RUN_SCRIPT)
+	@echo 'exit $$return_code' >> $(RUN_SCRIPT)
+DATA_FILE := 
+DATA_DIR := 
+SD_FILES += $(RUN_SCRIPT)
+SD_FILES += $(EXE_FILE)
+SD_FILES += $(EMCONFIG)
+SD_FILES += xrt.ini
+SD_FILES += $(DATA_FILE)# where define DATAFILE in json
+SD_FILES_WITH_PREFIX = $(foreach sd_file,$(SD_FILES), $(if $(filter $(sd_file),$(wildcard $(sd_file))), --package.sd_file $(sd_file)))
+SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
+PACKAGE_FILES := $(BINARY_CONTAINERS)
+PACKAGE_FILES += $(AIE_CONTAINER)
+SD_CARD := $(CUR_DIR)/package_$(TARGET)
+vck190_dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+	@echo "Generating sd_card folder...."
+	mkdir -p $(SD_CARD)
+	chmod a+rx $(BUILD_DIR)/run_script.sh
+ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+vck190_dfx_hw := true
+endif
+endif
+ifeq ($(vck190_dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
+
+.PHONY: sd_card
+sd_card: $(SD_CARD)
+endif
+############################## Setting Essential Checks and Building Rules ##############################
+RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
+RUN_DEPS += $(SD_CARD)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	./check.sh
+endif
+endif
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS) 
+	./check.sh
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	./check.sh
+endif
+endif
+#hw
+ifeq ($(TARGET), hw)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+else
+	$(ECHO) "Please copy the content of sd_card folder and data to an SD Card and run on the board"
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: clean cleanall emconfig
+emconfig: $(EMCONFIG)
+
+.PHONY: host
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+
+.PHONY: xclbin
+ifeq ($(HOST_ARCH), x86)
+xclbin:  check_vpp check_xrt $(BINARY_CONTAINERS) 
+else
+xclbin:  check_vpp check_sysroot $(BINARY_CONTAINERS) 
+endif
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
+
+clean: cleanh
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/acc_cluster_histogram/check.sh b/codec/L2/demos/jxlEnc/acc_cluster_histogram/check.sh
new file mode 100755
index 0000000000..2328296af1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_cluster_histogram/check.sh
@@ -0,0 +1 @@
+echo "3ba213afa0ed5f639877f96990ebd51a  t0.jxl" | md5sum -c -
diff --git a/codec/L2/demos/jxlEnc/acc_cluster_histogram/conn_u50.cfg b/codec/L2/demos/jxlEnc/acc_cluster_histogram/conn_u50.cfg
new file mode 100644
index 0000000000..72c25fe004
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_cluster_histogram/conn_u50.cfg
@@ -0,0 +1,14 @@
+[hls]
+#pre_tcl=hls_pre.tcl
+
+[connectivity]
+sp=JxlEnc_ans_clusterHistogram_1.m_axi_histogram_gmem:HBM[0]  
+sp=JxlEnc_ans_clusterHistogram_1.m_axi_histocnt_gmem:HBM[1] 
+sp=JxlEnc_ans_clusterHistogram_1.m_axi_histosize_gmem:HBM[2] 
+sp=JxlEnc_ans_clusterHistogram_1.m_axi_nonempty_gmem:HBM[3] 
+sp=JxlEnc_ans_clusterHistogram_1.m_axi_ctx_gmem:HBM[4]
+sp=JxlEnc_ans_clusterHistogram_1.m_axi_histo_clusd_gmem:HBM[5] 
+sp=JxlEnc_ans_clusterHistogram_1.m_axi_histosize_clusd_gmem:HBM[6] 
+sp=JxlEnc_ans_clusterHistogram_1.m_axi_histo_clusdin_gmem:HBM[7] 
+#slr=hls_ANSclusterHistogram_1:SLR1
+
diff --git a/codec/L2/demos/jxlEnc/acc_cluster_histogram/description.json b/codec/L2/demos/jxlEnc/acc_cluster_histogram/description.json
new file mode 100644
index 0000000000..88ca52a561
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_cluster_histogram/description.json
@@ -0,0 +1,330 @@
+{
+    "gui": false,
+    "name": "JXL ACC_CLUSTER Demo",
+    "description": "This example is based on Google's PIK, which was chosen as the base framework for JPEG XL. The pikEnc is based on the 'fast mode' of PIK which can provide better encoding efficnty than most of other still image encoding methods. The pikEnc is based on Xilinx HLS design methodology and optimized for FPGA arthitecture. It can proved higher throughput and lower latency compared to software-based solutions",
+    "flow": "vitis",
+    "platform_allowlist": [
+        "u50"
+    ],
+    "platform_blocklist": [
+        "zc"
+    ],
+    "platform_properties": {
+        "u50": {
+            "v++": {
+                "compiler": {
+                    "clflags": [
+                        "--config PROJECT/conn_u50.cfg"
+                    ]
+                }
+            }
+        }
+    },
+    "data": [
+        "./data"
+    ],
+    "launch": [
+        {
+            "cmd_args": " --xclbin BUILD/jxlEnc.xclbin LIB_DIR/L2/demos/jxlEnc/images/t0.png t0.jxl",
+            "name": "generic launch for all flows"
+        }
+    ],
+    "post_launch": [
+        {
+            "launch_cmd": [
+                "./check.sh"
+            ]
+        }
+    ],
+    "host": {
+        "host_exe": "host.exe",
+        "compiler": {
+            "sources": [
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cjxl.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cmdline.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/codec_config.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/box/box.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/time.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_ac_strategy.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_adaptive_quantization.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_cache.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_group.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_chroma_from_luma.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_init_histogram.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_host.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase1.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase2.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase3.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/acc_cluster_histogram/host/host_cluster_histogram.cpp",
+                "LIB_DIR/ext/xcl2/xcl2.cpp"
+            ],
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/ext/xcl2",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/build/lib/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lodepng",
+                "LIB_DIR/L2/demos/jxlEnc/acc_cluster_histogram/kernel",
+                "LIB_DIR/L2/demos/jxlEnc/acc_cluster_histogram/host",
+                "LIB_DIR/L2/demos/jxlEnc/others/include",
+                "LIB_DIR/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram"
+            ],
+            "options": "-O3 "
+        }
+    },
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/L2/include/hw/jxlEnc"
+            ]
+        }
+    },
+    "containers": [
+        {
+            "name": "jxlEnc",
+            "accelerators": [
+                {
+                    "location": "LIB_DIR/L2/demos/jxlEnc/acc_cluster_histogram/kernel/hls_cluster_histogram.cpp",
+                    "frequency": 300.0,
+                    "clflags": " -D KERNEL_NAME=JxlEnc_ans_clusterHistogram",
+                    "name": "JxlEnc_ans_clusterHistogram",
+                    "num_compute_units": 1,
+                    "compute_units": [
+                        {
+                            "name": "JxlEnc_ans_clusterHistogram",
+                            "arguments": [
+                                {
+                                    "name": "gmem0_0",
+                                    "memory": "DDR[0]"
+                                },
+                                {
+                                    "name": "gmem0_1",
+                                    "memory": "DDR[0]"
+                                },
+                                {
+                                    "name": "gmem1_0",
+                                    "memory": "DDR[1]"
+                                },
+                                {
+                                    "name": "gmem1_1",
+                                    "memory": "DDR[1]"
+                                },
+                                {
+                                    "name": "gmem1_2",
+                                    "memory": "DDR[1]"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "frequency": 300
+        }
+    ],
+    "testinfo": {
+        "disable": false,
+        "jobs": [
+            {
+                "index": 0,
+                "dependency": [],
+                "env": "",
+                "cmd": "",
+                "max_memory_MB": {
+                    "vitis_hw_build": 81920,
+                    "vitis_hw_emu": 40960,
+                    "vitis_sw_emu": 10240,
+                    "vitis_hw_run": 10240
+                },
+                "max_time_min": {
+                    "vitis_hw_build": 3200,
+                    "vitis_hw_emu": 1600,
+                    "vitis_sw_emu": 120,
+                    "vitis_hw_run": 10
+                }
+            }
+        ],
+        "targets": [
+            "vitis_sw_emu",
+            "vitis_hw_emu",
+            "vitis_hw"
+        ],
+        "category": "canary"
+    }
+}
diff --git a/codec/L2/demos/jxlEnc/acc_cluster_histogram/host/host_cluster_histogram.cpp b/codec/L2/demos/jxlEnc/acc_cluster_histogram/host/host_cluster_histogram.cpp
new file mode 100644
index 0000000000..22f6dc963a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_cluster_histogram/host/host_cluster_histogram.cpp
@@ -0,0 +1,689 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_CLUSTER_HISTOGRAM_CPP
+#define HOST_CLUSTER_HISTOGRAM_CPP
+
+#include <iostream>
+#include <sys/time.h>
+
+#include "xcl2.hpp"
+#include "xf_utils_sw/logger.hpp"
+
+#define XCL_BANK(n) (((unsigned int)(n)) | XCL_MEM_TOPOLOGY)
+
+#define XCL_BANK0 XCL_BANK(0)
+#define XCL_BANK1 XCL_BANK(1)
+#define XCL_BANK2 XCL_BANK(2)
+#define XCL_BANK3 XCL_BANK(3)
+#define XCL_BANK4 XCL_BANK(4)
+#define XCL_BANK5 XCL_BANK(5)
+#define XCL_BANK6 XCL_BANK(6)
+#define XCL_BANK7 XCL_BANK(7)
+#define XCL_BANK8 XCL_BANK(8)
+#define XCL_BANK9 XCL_BANK(9)
+#define XCL_BANK10 XCL_BANK(10)
+#define XCL_BANK11 XCL_BANK(11)
+#define XCL_BANK12 XCL_BANK(12)
+#define XCL_BANK13 XCL_BANK(13)
+#define XCL_BANK14 XCL_BANK(14)
+#define XCL_BANK15 XCL_BANK(15)
+#define XCL_BANK16 XCL_BANK(16)
+#define XCL_BANK17 XCL_BANK(17)
+#define XCL_BANK18 XCL_BANK(18)
+#define XCL_BANK19 XCL_BANK(19)
+#define XCL_BANK20 XCL_BANK(20)
+#define XCL_BANK21 XCL_BANK(21)
+#define XCL_BANK22 XCL_BANK(22)
+#define XCL_BANK23 XCL_BANK(23)
+#define XCL_BANK24 XCL_BANK(24)
+#define XCL_BANK25 XCL_BANK(25)
+#define XCL_BANK26 XCL_BANK(26)
+#define XCL_BANK27 XCL_BANK(27)
+#define XCL_BANK28 XCL_BANK(28)
+#define XCL_BANK29 XCL_BANK(29)
+#define XCL_BANK30 XCL_BANK(30)
+#define XCL_BANK31 XCL_BANK(31)
+#define XCL_BANK32 XCL_BANK(32)
+#define XCL_BANK33 XCL_BANK(33)
+
+unsigned long diff(const struct timeval* newTime, const struct timeval* oldTime) {
+    return (newTime->tv_sec - oldTime->tv_sec) * 1000000 + (newTime->tv_usec - oldTime->tv_usec);
+}
+
+template <typename T>
+T* aligned_alloc(std::size_t num) {
+    void* ptr = NULL;
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+    return reinterpret_cast<T*>(ptr);
+}
+
+void hls_ANSclusterHistogram_wrapper(std::string xclbinPath,
+                                     uint32_t* config,
+                                     //====================
+                                     int32_t* histograms0_ptr,
+                                     uint32_t* histo_totalcnt0_ptr,
+                                     uint32_t* histo_size0_ptr,
+                                     uint32_t* nonempty_histo0_ptr,
+                                     uint8_t* ctx_map0_ptr,
+                                     int32_t* histograms_clusd0_ptr,
+                                     uint32_t* histo_size_clusd0_ptr,
+                                     int32_t* histograms_clusdin0_ptr,
+                                     //====================
+                                     int32_t* histograms1_ptr,
+                                     uint32_t* histo_totalcnt1_ptr,
+                                     uint32_t* histo_size1_ptr,
+                                     uint32_t* nonempty_histo1_ptr,
+                                     uint8_t* ctx_map1_ptr,
+                                     int32_t* histograms_clusd1_ptr,
+                                     uint32_t* histo_size_clusd1_ptr,
+                                     int32_t* histograms_clusdin1_ptr,
+                                     //======================
+                                     int32_t* histograms2_ptr,
+                                     uint32_t* histo_totalcnt2_ptr,
+                                     uint32_t* histo_size2_ptr,
+                                     uint32_t* nonempty_histo2_ptr,
+                                     uint8_t* ctx_map2_ptr,
+                                     int32_t* histograms_clusd2_ptr,
+                                     uint32_t* histo_size_clusd2_ptr,
+                                     int32_t* histograms_clusdin2_ptr,
+                                     //======================
+                                     int32_t* histograms3_ptr,
+                                     uint32_t* histo_totalcnt3_ptr,
+                                     uint32_t* histo_size3_ptr,
+                                     uint32_t* nonempty_histo3_ptr,
+                                     uint8_t* ctx_map3_ptr,
+                                     int32_t* histograms_clusd3_ptr,
+                                     uint32_t* histo_size_clusd3_ptr,
+                                     int32_t* histograms_clusdin3_ptr,
+                                     //======================
+                                     int32_t* histograms4_ptr,
+                                     uint32_t* histo_totalcnt4_ptr,
+                                     uint32_t* histo_size4_ptr,
+                                     uint32_t* nonempty_histo4_ptr,
+                                     uint8_t* ctx_map4_ptr,
+                                     int32_t* histograms_clusd4_ptr,
+                                     uint32_t* histo_size_clusd4_ptr,
+                                     int32_t* histograms_clusdin4_ptr) {
+    printf("[HOST] size= %d\n", config[6]);
+
+    xf::common::utils_sw::Logger logger(std::cout, std::cerr);
+    cl_int fail;
+
+    struct timeval start_time; // End to end time clock start
+    gettimeofday(&start_time, 0);
+
+    // platform related operations
+    std::vector<cl::Device> devices = xcl::get_xil_devices();
+    cl::Device device = devices[0];
+
+    // Creating Context and Command Queue for selected Device
+    cl::Context context(device, NULL, NULL, NULL, &fail);
+    logger.logCreateContext(fail);
+    cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &fail);
+    logger.logCreateCommandQueue(fail);
+    std::string devName = device.getInfo<CL_DEVICE_NAME>();
+    printf("INFO: Found Device=%s\n", devName.c_str());
+    cl::Program::Binaries xclBins = xcl::import_binary_file(xclbinPath);
+
+    devices.resize(1);
+    cl::Program program(context, devices, xclBins, NULL, &fail);
+    logger.logCreateProgram(fail);
+
+    int repInt = 1;
+    // create kernels
+    std::vector<cl::Kernel> cluster_kernel(repInt);
+    for (int i = 0; i < repInt; i++) {
+        cluster_kernel[i] = cl::Kernel(program, "JxlEnc_ans_clusterHistogram", &fail);
+        logger.logCreateKernel(fail);
+    }
+    std::cout << "INFO: kernel has been created" << std::endl;
+
+    // declare map of host buffers
+    std::cout << "kernel config size:" << 30 << std::endl;
+    std::cout << "histogram size: " << config[0] << "," << config[1] << "," << config[2] << "," << config[3] << ","
+              << config[4] << std::endl;
+    std::cout << "non-empty histogram size: "
+              << "," << config[5] << "," << config[6] << "," << config[7] << "," << config[8] << "," << config[9]
+              << std::endl;
+    std::cout << "largest idx: " << config[10] << "," << config[11] << "," << config[12] << "," << config[13] << ","
+              << config[14] << std::endl;
+    std::cout << "num cluster: " << config[15] << "," << config[16] << "," << config[17] << "," << config[18] << ","
+              << config[19] << std::endl;
+    std::cout << "histo_size_clusdin: " << config[20] << "," << config[21] << "," << config[22] << "," << config[23]
+              << "," << config[24] << std::endl;
+    std::cout << "do_once: " << config[25] << "," << config[26] << "," << config[27] << "," << config[28] << ","
+              << config[29] << std::endl;
+
+#define MAX_NUM_CONFIG 30
+    uint32_t* hb_config = aligned_alloc<uint32_t>(MAX_NUM_CONFIG);
+
+    int32_t* hb_histograms0_ptr = aligned_alloc<int32_t>(163840);
+    int32_t* hb_histograms1_ptr = aligned_alloc<int32_t>(163840);
+    int32_t* hb_histograms2_ptr = aligned_alloc<int32_t>(163840);
+    int32_t* hb_histograms3_ptr = aligned_alloc<int32_t>(163840);
+    int32_t* hb_histograms4_ptr = aligned_alloc<int32_t>(163840);
+
+    uint32_t* hb_histo_totalcnt0_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histo_totalcnt1_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histo_totalcnt2_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histo_totalcnt3_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histo_totalcnt4_ptr = aligned_alloc<uint32_t>(4096);
+
+    uint32_t* hb_histo_size0_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histo_size1_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histo_size2_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histo_size3_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histo_size4_ptr = aligned_alloc<uint32_t>(4096);
+
+    uint32_t* hb_nonempty_histo0_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_nonempty_histo1_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_nonempty_histo2_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_nonempty_histo3_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_nonempty_histo4_ptr = aligned_alloc<uint32_t>(4096);
+
+    uint8_t* hb_ctx_map0_ptr = aligned_alloc<uint8_t>(4096);
+    uint8_t* hb_ctx_map1_ptr = aligned_alloc<uint8_t>(4096);
+    uint8_t* hb_ctx_map2_ptr = aligned_alloc<uint8_t>(4096);
+    uint8_t* hb_ctx_map3_ptr = aligned_alloc<uint8_t>(4096);
+    uint8_t* hb_ctx_map4_ptr = aligned_alloc<uint8_t>(4096);
+
+    int32_t* hb_histograms_clusd0_ptr = aligned_alloc<int32_t>(5120);
+    int32_t* hb_histograms_clusd1_ptr = aligned_alloc<int32_t>(5120);
+    int32_t* hb_histograms_clusd2_ptr = aligned_alloc<int32_t>(5120);
+    int32_t* hb_histograms_clusd3_ptr = aligned_alloc<int32_t>(5120);
+    int32_t* hb_histograms_clusd4_ptr = aligned_alloc<int32_t>(5120);
+
+    uint32_t* hb_histo_size_clusd0_ptr = aligned_alloc<uint32_t>(128);
+    uint32_t* hb_histo_size_clusd1_ptr = aligned_alloc<uint32_t>(128);
+    uint32_t* hb_histo_size_clusd2_ptr = aligned_alloc<uint32_t>(128);
+    uint32_t* hb_histo_size_clusd3_ptr = aligned_alloc<uint32_t>(128);
+    uint32_t* hb_histo_size_clusd4_ptr = aligned_alloc<uint32_t>(128);
+
+    int32_t* hb_histograms_clusdin0_ptr = aligned_alloc<int32_t>(4096);
+    int32_t* hb_histograms_clusdin1_ptr = aligned_alloc<int32_t>(4096);
+    int32_t* hb_histograms_clusdin2_ptr = aligned_alloc<int32_t>(4096);
+    int32_t* hb_histograms_clusdin3_ptr = aligned_alloc<int32_t>(4096);
+    int32_t* hb_histograms_clusdin4_ptr = aligned_alloc<int32_t>(4096);
+
+    for (int j = 0; j < MAX_NUM_CONFIG; j++) {
+        hb_config[j] = config[j];
+    }
+
+    for (int j = 0; j < 163840; j++) {
+        hb_histograms0_ptr[j] = histograms0_ptr[j];
+        hb_histograms1_ptr[j] = histograms1_ptr[j];
+        hb_histograms2_ptr[j] = histograms2_ptr[j];
+        hb_histograms3_ptr[j] = histograms3_ptr[j];
+        hb_histograms4_ptr[j] = histograms4_ptr[j];
+    }
+
+    for (int j = 0; j < 4096; j++) {
+        hb_histo_totalcnt0_ptr[j] = histo_totalcnt0_ptr[j];
+        hb_histo_totalcnt1_ptr[j] = histo_totalcnt1_ptr[j];
+        hb_histo_totalcnt2_ptr[j] = histo_totalcnt2_ptr[j];
+        hb_histo_totalcnt3_ptr[j] = histo_totalcnt3_ptr[j];
+        hb_histo_totalcnt4_ptr[j] = histo_totalcnt4_ptr[j];
+    }
+
+    for (int j = 0; j < 4096; j++) {
+        hb_histo_size0_ptr[j] = histo_size0_ptr[j];
+        hb_histo_size1_ptr[j] = histo_size1_ptr[j];
+        hb_histo_size2_ptr[j] = histo_size2_ptr[j];
+        hb_histo_size3_ptr[j] = histo_size3_ptr[j];
+        hb_histo_size4_ptr[j] = histo_size4_ptr[j];
+    }
+
+    for (int j = 0; j < 4096; j++) {
+        hb_nonempty_histo0_ptr[j] = nonempty_histo0_ptr[j];
+        hb_nonempty_histo1_ptr[j] = nonempty_histo1_ptr[j];
+        hb_nonempty_histo2_ptr[j] = nonempty_histo2_ptr[j];
+        hb_nonempty_histo3_ptr[j] = nonempty_histo3_ptr[j];
+        hb_nonempty_histo4_ptr[j] = nonempty_histo4_ptr[j];
+    }
+
+    std::vector<cl_mem_ext_ptr_t> mext_o(41);
+    mext_o[0] = {XCL_BANK(7), hb_config, 0};
+
+    mext_o[1] = {XCL_BANK(0), hb_histograms0_ptr, 0};
+    mext_o[2] = {XCL_BANK(0), hb_histograms1_ptr, 0};
+    mext_o[3] = {XCL_BANK(0), hb_histograms2_ptr, 0};
+    mext_o[4] = {XCL_BANK(0), hb_histograms3_ptr, 0};
+    mext_o[5] = {XCL_BANK(0), hb_histograms4_ptr, 0};
+
+    mext_o[6] = {XCL_BANK(1), hb_histo_totalcnt0_ptr, 0};
+    mext_o[7] = {XCL_BANK(1), hb_histo_totalcnt1_ptr, 0};
+    mext_o[8] = {XCL_BANK(1), hb_histo_totalcnt2_ptr, 0};
+    mext_o[9] = {XCL_BANK(1), hb_histo_totalcnt3_ptr, 0};
+    mext_o[10] = {XCL_BANK(1), hb_histo_totalcnt4_ptr, 0};
+
+    mext_o[11] = {XCL_BANK(2), hb_histo_size0_ptr, 0};
+    mext_o[12] = {XCL_BANK(2), hb_histo_size1_ptr, 0};
+    mext_o[13] = {XCL_BANK(2), hb_histo_size2_ptr, 0};
+    mext_o[14] = {XCL_BANK(2), hb_histo_size3_ptr, 0};
+    mext_o[15] = {XCL_BANK(2), hb_histo_size4_ptr, 0};
+
+    mext_o[16] = {XCL_BANK(3), hb_nonempty_histo0_ptr, 0};
+    mext_o[17] = {XCL_BANK(3), hb_nonempty_histo1_ptr, 0};
+    mext_o[18] = {XCL_BANK(3), hb_nonempty_histo2_ptr, 0};
+    mext_o[19] = {XCL_BANK(3), hb_nonempty_histo3_ptr, 0};
+    mext_o[20] = {XCL_BANK(3), hb_nonempty_histo4_ptr, 0};
+
+    mext_o[21] = {XCL_BANK(4), hb_ctx_map0_ptr, 0};
+    mext_o[22] = {XCL_BANK(4), hb_ctx_map1_ptr, 0};
+    mext_o[23] = {XCL_BANK(4), hb_ctx_map2_ptr, 0};
+    mext_o[24] = {XCL_BANK(4), hb_ctx_map3_ptr, 0};
+    mext_o[25] = {XCL_BANK(4), hb_ctx_map4_ptr, 0};
+
+    mext_o[26] = {XCL_BANK(5), hb_histograms_clusd0_ptr, 0};
+    mext_o[27] = {XCL_BANK(5), hb_histograms_clusd1_ptr, 0};
+    mext_o[28] = {XCL_BANK(5), hb_histograms_clusd2_ptr, 0};
+    mext_o[29] = {XCL_BANK(5), hb_histograms_clusd3_ptr, 0};
+    mext_o[30] = {XCL_BANK(5), hb_histograms_clusd4_ptr, 0};
+
+    mext_o[31] = {XCL_BANK(6), hb_histo_size_clusd0_ptr, 0};
+    mext_o[32] = {XCL_BANK(6), hb_histo_size_clusd1_ptr, 0};
+    mext_o[33] = {XCL_BANK(6), hb_histo_size_clusd2_ptr, 0};
+    mext_o[34] = {XCL_BANK(6), hb_histo_size_clusd3_ptr, 0};
+    mext_o[35] = {XCL_BANK(6), hb_histo_size_clusd4_ptr, 0};
+
+    mext_o[36] = {XCL_BANK(7), hb_histograms_clusdin0_ptr, 0};
+    mext_o[37] = {XCL_BANK(7), hb_histograms_clusdin1_ptr, 0};
+    mext_o[38] = {XCL_BANK(7), hb_histograms_clusdin2_ptr, 0};
+    mext_o[39] = {XCL_BANK(7), hb_histograms_clusdin3_ptr, 0};
+    mext_o[40] = {XCL_BANK(7), hb_histograms_clusdin4_ptr, 0};
+
+    // create device buffer and map dev buf to host buf
+    cl::Buffer db_config;
+    cl::Buffer db_histograms0_ptr;
+    cl::Buffer db_histograms1_ptr;
+    cl::Buffer db_histograms2_ptr;
+    cl::Buffer db_histograms3_ptr;
+    cl::Buffer db_histograms4_ptr;
+    cl::Buffer db_histo_totalcnt0_ptr;
+    cl::Buffer db_histo_totalcnt1_ptr;
+    cl::Buffer db_histo_totalcnt2_ptr;
+    cl::Buffer db_histo_totalcnt3_ptr;
+    cl::Buffer db_histo_totalcnt4_ptr;
+    cl::Buffer db_histo_size0_ptr;
+    cl::Buffer db_histo_size1_ptr;
+    cl::Buffer db_histo_size2_ptr;
+    cl::Buffer db_histo_size3_ptr;
+    cl::Buffer db_histo_size4_ptr;
+    cl::Buffer db_nonempty_histo0_ptr;
+    cl::Buffer db_nonempty_histo1_ptr;
+    cl::Buffer db_nonempty_histo2_ptr;
+    cl::Buffer db_nonempty_histo3_ptr;
+    cl::Buffer db_nonempty_histo4_ptr;
+    cl::Buffer db_ctx_map0_ptr;
+    cl::Buffer db_ctx_map1_ptr;
+    cl::Buffer db_ctx_map2_ptr;
+    cl::Buffer db_ctx_map3_ptr;
+    cl::Buffer db_ctx_map4_ptr;
+    cl::Buffer db_histograms_clusd0_ptr;
+    cl::Buffer db_histograms_clusd1_ptr;
+    cl::Buffer db_histograms_clusd2_ptr;
+    cl::Buffer db_histograms_clusd3_ptr;
+    cl::Buffer db_histograms_clusd4_ptr;
+    cl::Buffer db_histo_size_clusd0_ptr;
+    cl::Buffer db_histo_size_clusd1_ptr;
+    cl::Buffer db_histo_size_clusd2_ptr;
+    cl::Buffer db_histo_size_clusd3_ptr;
+    cl::Buffer db_histo_size_clusd4_ptr;
+    cl::Buffer db_histograms_clusdin0_ptr;
+    cl::Buffer db_histograms_clusdin1_ptr;
+    cl::Buffer db_histograms_clusdin2_ptr;
+    cl::Buffer db_histograms_clusdin3_ptr;
+    cl::Buffer db_histograms_clusdin4_ptr;
+
+    db_config = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                           sizeof(uint32_t) * 30, &mext_o[0]);
+
+    db_histograms0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[1]);
+    db_histograms1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[2]);
+    db_histograms2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[3]);
+    db_histograms3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[4]);
+    db_histograms4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[5]);
+
+    db_histo_totalcnt0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[6]);
+    db_histo_totalcnt1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[7]);
+    db_histo_totalcnt2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[8]);
+    db_histo_totalcnt3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[9]);
+    db_histo_totalcnt4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[10]);
+
+    db_histo_size0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(uint32_t) * 4096, &mext_o[11]);
+    db_histo_size1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(uint32_t) * 4096, &mext_o[12]);
+    db_histo_size2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(uint32_t) * 4096, &mext_o[13]);
+    db_histo_size3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(uint32_t) * 4096, &mext_o[14]);
+    db_histo_size4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(uint32_t) * 4096, &mext_o[15]);
+
+    db_nonempty_histo0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[16]);
+    db_nonempty_histo1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[17]);
+    db_nonempty_histo2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[18]);
+    db_nonempty_histo3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[19]);
+    db_nonempty_histo4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(uint32_t) * 4096, &mext_o[20]);
+
+    db_ctx_map0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                 sizeof(uint8_t) * 4096, &mext_o[21]);
+    db_ctx_map1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                 sizeof(uint8_t) * 4096, &mext_o[22]);
+    db_ctx_map2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                 sizeof(uint8_t) * 4096, &mext_o[23]);
+    db_ctx_map3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                 sizeof(uint8_t) * 4096, &mext_o[24]);
+    db_ctx_map4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                 sizeof(uint8_t) * 4096, &mext_o[25]);
+
+    db_histograms_clusd0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(int32_t) * 5120, &mext_o[26]);
+    db_histograms_clusd1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(int32_t) * 5120, &mext_o[27]);
+    db_histograms_clusd2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(int32_t) * 5120, &mext_o[28]);
+    db_histograms_clusd3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(int32_t) * 5120, &mext_o[29]);
+    db_histograms_clusd4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(int32_t) * 5120, &mext_o[30]);
+
+    db_histo_size_clusd0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(uint32_t) * 128, &mext_o[31]);
+    db_histo_size_clusd1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(uint32_t) * 128, &mext_o[32]);
+    db_histo_size_clusd2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(uint32_t) * 128, &mext_o[33]);
+    db_histo_size_clusd3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(uint32_t) * 128, &mext_o[34]);
+    db_histo_size_clusd4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                          sizeof(uint32_t) * 128, &mext_o[35]);
+
+    db_histograms_clusdin0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                            sizeof(int32_t) * 4096, &mext_o[36]);
+    db_histograms_clusdin1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                            sizeof(int32_t) * 4096, &mext_o[37]);
+    db_histograms_clusdin2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                            sizeof(int32_t) * 4096, &mext_o[38]);
+    db_histograms_clusdin3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                            sizeof(int32_t) * 4096, &mext_o[39]);
+    db_histograms_clusdin4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                            sizeof(int32_t) * 4096, &mext_o[40]);
+
+    // add buffers to migrate
+    std::vector<cl::Memory> ob_in;
+    std::vector<cl::Memory> ob_out;
+
+    ob_in.push_back(db_config);
+    ob_in.push_back(db_histograms0_ptr);
+    ob_in.push_back(db_histograms1_ptr);
+    ob_in.push_back(db_histograms2_ptr);
+    ob_in.push_back(db_histograms3_ptr);
+    ob_in.push_back(db_histograms4_ptr);
+    ob_in.push_back(db_histo_totalcnt0_ptr);
+    ob_in.push_back(db_histo_totalcnt1_ptr);
+    ob_in.push_back(db_histo_totalcnt2_ptr);
+    ob_in.push_back(db_histo_totalcnt3_ptr);
+    ob_in.push_back(db_histo_totalcnt4_ptr);
+    ob_in.push_back(db_histo_size0_ptr);
+    ob_in.push_back(db_histo_size1_ptr);
+    ob_in.push_back(db_histo_size2_ptr);
+    ob_in.push_back(db_histo_size3_ptr);
+    ob_in.push_back(db_histo_size4_ptr);
+    ob_in.push_back(db_nonempty_histo0_ptr);
+    ob_in.push_back(db_nonempty_histo1_ptr);
+    ob_in.push_back(db_nonempty_histo2_ptr);
+    ob_in.push_back(db_nonempty_histo3_ptr);
+    ob_in.push_back(db_nonempty_histo4_ptr);
+
+    ob_out.push_back(db_config);
+    ob_out.push_back(db_ctx_map0_ptr);
+    ob_out.push_back(db_ctx_map1_ptr);
+    ob_out.push_back(db_ctx_map2_ptr);
+    ob_out.push_back(db_ctx_map3_ptr);
+    ob_out.push_back(db_ctx_map4_ptr);
+    ob_out.push_back(db_histograms_clusd0_ptr);
+    ob_out.push_back(db_histograms_clusd1_ptr);
+    ob_out.push_back(db_histograms_clusd2_ptr);
+    ob_out.push_back(db_histograms_clusd3_ptr);
+    ob_out.push_back(db_histograms_clusd4_ptr);
+    ob_out.push_back(db_histo_size_clusd0_ptr);
+    ob_out.push_back(db_histo_size_clusd1_ptr);
+    ob_out.push_back(db_histo_size_clusd2_ptr);
+    ob_out.push_back(db_histo_size_clusd3_ptr);
+    ob_out.push_back(db_histo_size_clusd4_ptr);
+    ob_out.push_back(db_histograms_clusdin0_ptr);
+    ob_out.push_back(db_histograms_clusdin1_ptr);
+    ob_out.push_back(db_histograms_clusdin2_ptr);
+    ob_out.push_back(db_histograms_clusdin3_ptr);
+    ob_out.push_back(db_histograms_clusdin4_ptr);
+
+    // set kernel args
+    for (int i = 0; i < repInt; i++) {
+        cluster_kernel[i].setArg(0, db_config);
+        cluster_kernel[i].setArg(1, db_histograms0_ptr);
+        cluster_kernel[i].setArg(2, db_histo_totalcnt0_ptr);
+        cluster_kernel[i].setArg(3, db_histo_size0_ptr);
+        cluster_kernel[i].setArg(4, db_nonempty_histo0_ptr);
+        cluster_kernel[i].setArg(5, db_ctx_map0_ptr);
+        cluster_kernel[i].setArg(6, db_histograms_clusd0_ptr);
+        cluster_kernel[i].setArg(7, db_histo_size_clusd0_ptr);
+        cluster_kernel[i].setArg(8, db_histograms_clusdin0_ptr);
+        cluster_kernel[i].setArg(9, db_histograms1_ptr);
+        cluster_kernel[i].setArg(10, db_histo_totalcnt1_ptr);
+        cluster_kernel[i].setArg(11, db_histo_size1_ptr);
+        cluster_kernel[i].setArg(12, db_nonempty_histo1_ptr);
+        cluster_kernel[i].setArg(13, db_ctx_map1_ptr);
+        cluster_kernel[i].setArg(14, db_histograms_clusd1_ptr);
+        cluster_kernel[i].setArg(15, db_histo_size_clusd1_ptr);
+        cluster_kernel[i].setArg(16, db_histograms_clusdin1_ptr);
+        cluster_kernel[i].setArg(17, db_histograms2_ptr);
+        cluster_kernel[i].setArg(18, db_histo_totalcnt2_ptr);
+        cluster_kernel[i].setArg(19, db_histo_size2_ptr);
+        cluster_kernel[i].setArg(20, db_nonempty_histo2_ptr);
+        cluster_kernel[i].setArg(21, db_ctx_map2_ptr);
+        cluster_kernel[i].setArg(22, db_histograms_clusd2_ptr);
+        cluster_kernel[i].setArg(23, db_histo_size_clusd2_ptr);
+        cluster_kernel[i].setArg(24, db_histograms_clusdin2_ptr);
+        cluster_kernel[i].setArg(25, db_histograms3_ptr);
+        cluster_kernel[i].setArg(26, db_histo_totalcnt3_ptr);
+        cluster_kernel[i].setArg(27, db_histo_size3_ptr);
+        cluster_kernel[i].setArg(28, db_nonempty_histo3_ptr);
+        cluster_kernel[i].setArg(29, db_ctx_map3_ptr);
+        cluster_kernel[i].setArg(30, db_histograms_clusd3_ptr);
+        cluster_kernel[i].setArg(31, db_histo_size_clusd3_ptr);
+        cluster_kernel[i].setArg(32, db_histograms_clusdin3_ptr);
+        cluster_kernel[i].setArg(33, db_histograms4_ptr);
+        cluster_kernel[i].setArg(34, db_histo_totalcnt4_ptr);
+        cluster_kernel[i].setArg(35, db_histo_size4_ptr);
+        cluster_kernel[i].setArg(36, db_nonempty_histo4_ptr);
+        cluster_kernel[i].setArg(37, db_ctx_map4_ptr);
+        cluster_kernel[i].setArg(38, db_histograms_clusd4_ptr);
+        cluster_kernel[i].setArg(39, db_histo_size_clusd4_ptr);
+        cluster_kernel[i].setArg(40, db_histograms_clusdin4_ptr);
+    }
+
+    // launch kernel and calculate kernel execution time
+    std::cout << "INFO: Kernel Start" << std::endl;
+    // declare events
+    std::vector<cl::Event> events_write(1);
+    std::vector<cl::Event> events_kernel(1);
+    std::vector<cl::Event> events_read(1);
+
+    // migrate
+    q.enqueueMigrateMemObjects(ob_in, 0, nullptr, &events_write[0]);
+    q.enqueueTask(cluster_kernel[0], &events_write, &events_kernel[0]);
+    q.enqueueMigrateMemObjects(ob_out, 1, &events_kernel, &events_read[0]);
+    q.finish();
+
+    struct timeval end_time;
+    gettimeofday(&end_time, 0);
+    std::cout << "INFO: Finish kernel execution" << std::endl;
+    std::cout << "INFO: Finish E2E execution" << std::endl;
+
+    // print related times
+    unsigned long timeStart, timeEnd, exec_time0;
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Data transfer from host to device: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Kernel1 Data transfer from device to host: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    exec_time0 = 0;
+    for (int i = 0; i < 1; ++i) {
+        events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+        events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+        exec_time0 += (timeEnd - timeStart) / 1000.0;
+
+        std::cout << "INFO: Kernel" << i + 1 << " execution: " << (timeEnd - timeStart) / 1000.0 << " us\n";
+        std::cout << "-------------------------------------------------------" << std::endl;
+    }
+    std::cout << "INFO: kernel total execution: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    unsigned long exec_timeE2E = diff(&end_time, &start_time);
+    std::cout << "INFO: FPGA execution time:" << exec_timeE2E << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+
+    for (int j = 0; j < MAX_NUM_CONFIG; j++) {
+        config[j] = hb_config[j];
+    }
+    // std::cout << "out kernel config size:" << 30 << std::endl;
+    // std::cout << "histogram size: " << config[0] << "," << config[1] << "," << config[2]
+    //    << "," << config[3] << "," << config[4] << std::endl;
+    // std::cout << "non-empty histogram size: " << "," << config[5] << "," << config[6]
+    //    << "," << config[7] << "," << config[8] << "," << config[9] << std::endl;
+    // std::cout << "largest idx: " << config[10] << "," << config[11] << "," << config[12]
+    //    << "," << config[13] << "," << config[14] << std::endl;
+    // std::cout << "num cluster: " << config[15] << "," << config[16] << "," << config[17]
+    //    << "," << config[18] << "," << config[19] << std::endl;
+    // std::cout << "histo_size_clusdin: " << config[20] << "," << config[21] << "," << config[22]
+    //    << "," << config[23] << "," << config[24] << std::endl;
+    // std::cout << "do_once: " << config[25] << "," << config[26] << "," << config[27]
+    //    << "," << config[28] << "," << config[29] << std::endl;
+
+    // output
+    std::cout << "ctx_map_ptr:" << std::endl;
+    for (int j = 0; j < 4096; j++) {
+        ctx_map0_ptr[j] = hb_ctx_map0_ptr[j];
+        ctx_map1_ptr[j] = hb_ctx_map1_ptr[j];
+        ctx_map2_ptr[j] = hb_ctx_map2_ptr[j];
+        ctx_map3_ptr[j] = hb_ctx_map3_ptr[j];
+        ctx_map4_ptr[j] = hb_ctx_map4_ptr[j];
+    }
+
+    std::cout << "histograms_clusd_ptr:" << std::endl;
+    for (int j = 0; j < 5120; j++) {
+        histograms_clusd0_ptr[j] = hb_histograms_clusd0_ptr[j];
+        histograms_clusd1_ptr[j] = hb_histograms_clusd1_ptr[j];
+        histograms_clusd2_ptr[j] = hb_histograms_clusd2_ptr[j];
+        histograms_clusd3_ptr[j] = hb_histograms_clusd3_ptr[j];
+        histograms_clusd4_ptr[j] = hb_histograms_clusd4_ptr[j];
+    }
+
+    std::cout << "histo_size_clusd_ptr:" << std::endl;
+    for (int j = 0; j < 128; j++) {
+        histo_size_clusd0_ptr[j] = hb_histo_size_clusd0_ptr[j];
+        histo_size_clusd1_ptr[j] = hb_histo_size_clusd1_ptr[j];
+        histo_size_clusd2_ptr[j] = hb_histo_size_clusd2_ptr[j];
+        histo_size_clusd3_ptr[j] = hb_histo_size_clusd3_ptr[j];
+        histo_size_clusd4_ptr[j] = hb_histo_size_clusd4_ptr[j];
+    }
+
+    std::cout << "histograms_clusdin_ptr:" << std::endl;
+    for (int j = 0; j < 4096; j++) {
+        histograms_clusdin0_ptr[j] = hb_histograms_clusdin0_ptr[j];
+        histograms_clusdin1_ptr[j] = hb_histograms_clusdin1_ptr[j];
+        histograms_clusdin2_ptr[j] = hb_histograms_clusdin2_ptr[j];
+        histograms_clusdin3_ptr[j] = hb_histograms_clusdin3_ptr[j];
+        histograms_clusdin4_ptr[j] = hb_histograms_clusdin4_ptr[j];
+    }
+
+    // for(int i=0; i<config[17]; i++) {
+    //    for(int j=0; j<histo_size_clusd2_ptr[i]; j++) {
+    //        printf("[HOST] cluster 2 %d %d %d\n", i, j, histograms_clusd2_ptr[i*40+j]);
+    //    }
+    //}
+    // for(int j=0; j<config[22]; j++) {
+    //    printf("[HOST] cluster in 2 %d %d\n", j, histograms_clusdin2_ptr[j]);
+    //}
+
+    free(hb_config);
+    free(hb_histograms0_ptr);
+    free(hb_histograms1_ptr);
+    free(hb_histograms2_ptr);
+    free(hb_histograms3_ptr);
+    free(hb_histograms4_ptr);
+    free(hb_histo_totalcnt0_ptr);
+    free(hb_histo_totalcnt1_ptr);
+    free(hb_histo_totalcnt2_ptr);
+    free(hb_histo_totalcnt3_ptr);
+    free(hb_histo_totalcnt4_ptr);
+    free(hb_histo_size0_ptr);
+    free(hb_histo_size1_ptr);
+    free(hb_histo_size2_ptr);
+    free(hb_histo_size3_ptr);
+    free(hb_histo_size4_ptr);
+    free(hb_nonempty_histo0_ptr);
+    free(hb_nonempty_histo1_ptr);
+    free(hb_nonempty_histo2_ptr);
+    free(hb_nonempty_histo3_ptr);
+    free(hb_nonempty_histo4_ptr);
+    free(hb_ctx_map0_ptr);
+    free(hb_ctx_map1_ptr);
+    free(hb_ctx_map2_ptr);
+    free(hb_ctx_map3_ptr);
+    free(hb_ctx_map4_ptr);
+    free(hb_histograms_clusd0_ptr);
+    free(hb_histograms_clusd1_ptr);
+    free(hb_histograms_clusd2_ptr);
+    free(hb_histograms_clusd3_ptr);
+    free(hb_histograms_clusd4_ptr);
+    free(hb_histo_size_clusd0_ptr);
+    free(hb_histo_size_clusd1_ptr);
+    free(hb_histo_size_clusd2_ptr);
+    free(hb_histo_size_clusd3_ptr);
+    free(hb_histo_size_clusd4_ptr);
+    free(hb_histograms_clusdin0_ptr);
+    free(hb_histograms_clusdin1_ptr);
+    free(hb_histograms_clusdin2_ptr);
+    free(hb_histograms_clusdin3_ptr);
+    free(hb_histograms_clusdin4_ptr);
+    std::cout << "finished opencl host" << std::endl;
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_cluster_histogram/host/host_cluster_histogram.hpp b/codec/L2/demos/jxlEnc/acc_cluster_histogram/host/host_cluster_histogram.hpp
new file mode 100644
index 0000000000..2f83f68519
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_cluster_histogram/host/host_cluster_histogram.hpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_CLUSTER_HISTOGRAM_HPP
+#define HOST_CLUSTER_HISTOGRAM_HPP
+
+#include <iostream>
+#include <sys/time.h>
+#include "xcl2.hpp"
+#include "xf_utils_sw/logger.hpp"
+
+void hls_ANSclusterHistogram_wrapper(std::string xclbinPath,
+                                     uint32_t* config,
+                                     //====================
+                                     int32_t* histograms0_ptr,
+                                     uint32_t* histo_totalcnt0_ptr,
+                                     uint32_t* histo_size0_ptr,
+                                     uint32_t* nonempty_histo0_ptr,
+                                     uint8_t* ctx_map0_ptr,
+                                     int32_t* histograms_clusd0_ptr,
+                                     uint32_t* histo_size_clusd0_ptr,
+                                     int32_t* histograms_clusdin0_ptr,
+                                     //====================
+                                     int32_t* histograms1_ptr,
+                                     uint32_t* histo_totalcnt1_ptr,
+                                     uint32_t* histo_size1_ptr,
+                                     uint32_t* nonempty_histo1_ptr,
+                                     uint8_t* ctx_map1_ptr,
+                                     int32_t* histograms_clusd1_ptr,
+                                     uint32_t* histo_size_clusd1_ptr,
+                                     int32_t* histograms_clusdin1_ptr,
+                                     //======================
+                                     int32_t* histograms2_ptr,
+                                     uint32_t* histo_totalcnt2_ptr,
+                                     uint32_t* histo_size2_ptr,
+                                     uint32_t* nonempty_histo2_ptr,
+                                     uint8_t* ctx_map2_ptr,
+                                     int32_t* histograms_clusd2_ptr,
+                                     uint32_t* histo_size_clusd2_ptr,
+                                     int32_t* histograms_clusdin2_ptr,
+                                     //======================
+                                     int32_t* histograms3_ptr,
+                                     uint32_t* histo_totalcnt3_ptr,
+                                     uint32_t* histo_size3_ptr,
+                                     uint32_t* nonempty_histo3_ptr,
+                                     uint8_t* ctx_map3_ptr,
+                                     int32_t* histograms_clusd3_ptr,
+                                     uint32_t* histo_size_clusd3_ptr,
+                                     int32_t* histograms_clusdin3_ptr,
+                                     //======================
+                                     int32_t* histograms4_ptr,
+                                     uint32_t* histo_totalcnt4_ptr,
+                                     uint32_t* histo_size4_ptr,
+                                     uint32_t* nonempty_histo4_ptr,
+                                     uint8_t* ctx_map4_ptr,
+                                     int32_t* histograms_clusd4_ptr,
+                                     uint32_t* histo_size_clusd4_ptr,
+                                     int32_t* histograms_clusdin4_ptr);
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_cluster_histogram/kernel/hls_cluster_histogram.cpp b/codec/L2/demos/jxlEnc/acc_cluster_histogram/kernel/hls_cluster_histogram.cpp
new file mode 100644
index 0000000000..b298fd9e73
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_cluster_histogram/kernel/hls_cluster_histogram.cpp
@@ -0,0 +1,1321 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HLS_CLUSTER_HISTOGRAM_CPP
+#define HLS_CLUSTER_HISTOGRAM_CPP
+
+#include "stdio.h"
+#include "hls_cluster_histogram.hpp"
+
+#define FLOAT_MAX 3.402823466e+38F
+
+unsigned get_uram(unsigned idx0,
+                  unsigned idx1,
+#ifndef __SYNTHESIS__
+                  std::vector<std::vector<ap_uint<64> > >& histograms
+#else
+                  ap_uint<64> histograms[4096][20]
+#endif
+                  ) {
+    ap_uint<64> uram_tmp = histograms[idx0][idx1 / 2];
+    return idx1 % 2 == 0 ? uram_tmp.range(31, 0) : uram_tmp.range(63, 32);
+}
+
+inline float compute_8(float in[8]) {
+    float tmp_x0 = in[0] + in[1];
+    float tmp_x1 = in[2] + in[3];
+    float tmp_x2 = in[4] + in[5];
+    float tmp_x3 = in[6] + in[7];
+    float tmp_x4 = tmp_x0 + tmp_x1;
+    float tmp_x5 = tmp_x2 + tmp_x3;
+    return tmp_x4 + tmp_x5;
+}
+
+void GetIdx(unsigned int numNonempty,
+            unsigned int nonempty_histo[4096],
+            unsigned int histo_size[4096],
+            hls::stream<unsigned int>& stream_idx,
+            hls::stream<unsigned int>& stream_a_size0) {
+GETIDX:
+    for (unsigned int i = 0; i < numNonempty; i++) {
+#pragma HLS LOOP_TRIPCOUNT min = 10000 max = 10000
+#pragma HLS pipeline
+        unsigned int idx = nonempty_histo[i];
+        stream_idx.write(idx);
+        unsigned int tmp = histo_size[idx];
+        stream_a_size0.write(tmp);
+    }
+}
+
+void GetA(bool isEntropy,
+          unsigned int numNonempty,
+          unsigned int histo_size[4096],
+#ifndef __SYNTHESIS__
+          std::vector<std::vector<ap_uint<64> > >& histograms,
+#else
+          ap_uint<64> histograms[4096][20],
+#endif
+          unsigned int histo_totalcnt[4096],
+          unsigned int nonempty_histo[4096],
+          hls::stream<unsigned int>& stream_idx,
+          hls::stream<unsigned int>& stream_b_size0,
+          hls::stream<unsigned int>& stream_b_size1,
+          hls::stream<unsigned int>& stream_size0,
+          hls::stream<unsigned int>& stream_a,
+          hls::stream<unsigned int>& stream_a_size0,
+          hls::stream<unsigned int>& stream_a_size1,
+          hls::stream<unsigned int>& stream_a_count) {
+    unsigned char count_a = 0;
+    unsigned int count_context = 0;
+    unsigned int idx;
+    unsigned char a_size;
+    unsigned int a_total_count;
+    unsigned char size;
+GETA_OUT:
+    while (count_context < numNonempty) {
+#pragma HLS LOOP_TRIPCOUNT min = 10000 max = 10000
+#pragma HLS pipeline
+        if (count_a == 0) {
+            idx = stream_idx.read();
+            a_size = stream_a_size0.read();
+            stream_a_size1.write(a_size);
+            if (!isEntropy) {
+                unsigned char b_size = stream_b_size0.read();
+                stream_b_size1.write(b_size);
+                size = hls::max(a_size, b_size);
+            } else {
+                size = a_size;
+            }
+            // printf("GetA isEntropy=%d, idx=%d, count_context=%d, count_a=%d,
+            // size=%d, a_size=%d\n",
+            //  isEntropy, idx, count_context, count_a, size, a_size);
+            stream_size0.write(size);
+            a_total_count = histo_totalcnt[idx];
+            stream_a_count.write(a_total_count);
+        }
+        unsigned int tmp = get_uram(idx, count_a, histograms); // histograms[idx][count_a];
+        stream_a.write(tmp);
+        count_a++;
+        if (count_a == a_size) {
+            count_a = 0;
+            count_context++;
+        }
+    }
+}
+
+void GetB(bool isEntropy,
+          unsigned int numNonempty,
+          unsigned int refSize,
+          unsigned int ref_histo[40],
+          unsigned int ref_totalcount,
+          hls::stream<unsigned int>& stream_b,
+          hls::stream<unsigned int>& stream_b_size0,
+          hls::stream<unsigned int>& stream_b_count) {
+    unsigned int count_context = 0;
+    unsigned char count_b = 0;
+    unsigned char b_size;
+    unsigned int b_total_count;
+    if (!isEntropy) {
+    GETB_OUT:
+        while (count_context < numNonempty) {
+#pragma HLS LOOP_TRIPCOUNT min = 10000 max = 10000
+            if (count_b == 0) {
+                b_size = refSize;
+                stream_b_size0.write(b_size);
+                b_total_count = ref_totalcount;
+                stream_b_count.write(b_total_count);
+            }
+            stream_b.write(ref_histo[count_b]);
+            count_b++;
+            if (count_b == b_size) {
+                count_b = 0;
+                count_context++;
+            }
+        }
+    }
+}
+
+void DoHistogramDistanceEntropy(bool isEntropy,
+                                unsigned int numNonempty,
+                                hls::stream<unsigned int>& stream_size0,
+                                hls::stream<unsigned int>& stream_size1,
+                                hls::stream<unsigned int>& stream_size2,
+                                hls::stream<unsigned int>& stream_a,
+                                hls::stream<unsigned int>& stream_a_size1,
+                                hls::stream<unsigned int>& stream_a_count,
+                                hls::stream<unsigned int>& stream_b,
+                                hls::stream<unsigned int>& stream_b_size1,
+                                hls::stream<unsigned int>& stream_b_count,
+                                hls::stream<float>& stream_dist_total,
+                                hls::stream<float>& stream_dist) {
+    int count_debug = 0;
+    unsigned int count_context = 0;
+    unsigned char count_s = 0;
+    unsigned char a_size;
+    unsigned int a_total_count;
+    unsigned char b_size;
+    unsigned int b_total_count;
+    unsigned int sum_count = 0;
+    unsigned char size = 0;
+    float total;
+    float totallog2;
+DISTANCE_OUT:
+    while (count_context < numNonempty) {
+#pragma HLS LOOP_TRIPCOUNT min = 10000 max = 10000
+        if (count_s == 0) {
+            sum_count = 0;
+            a_size = stream_a_size1.read();
+            a_total_count = stream_a_count.read();
+            if (!isEntropy) {
+                b_size = stream_b_size1.read();
+                b_total_count = stream_b_count.read();
+                total = a_total_count + b_total_count;
+            } else {
+                total = a_total_count;
+            }
+            totallog2 = total == 0 ? 0 : hls::log2(total);
+            size = stream_size0.read();
+            stream_size1.write(size);
+            stream_size2.write(size);
+            // printf("DoHist count_context=%d, count_s=%d, size=%d\n", count_context,
+            // count_s, size);
+        }
+        unsigned int counts;
+        if (!isEntropy) {
+            unsigned int a_counts = a_size > count_s ? stream_a.read() : 0;
+            unsigned int b_counts = b_size > count_s ? stream_b.read() : 0;
+            counts = a_counts + b_counts;
+        } else {
+            unsigned int tmp = stream_a.read();
+            counts = tmp;
+        }
+        float countlog2 = counts == 0 ? 0 : hls::log2((float)counts);
+        bool flag = counts == total;
+        sum_count += flag ? 0 : counts;
+        float tmp = flag ? 0 : counts * countlog2;
+        stream_dist.write(tmp);
+        count_s++;
+        if (count_s == size) {
+            // printf("DoHist Write stream_dist_total %d %d %d\n", count_context,
+            // count_s, size);
+            count_debug++;
+            stream_dist_total.write(sum_count * totallog2);
+            count_s = 0;
+            count_context++;
+        }
+    }
+    // printf("stream_dist_total in=%d\n", count_debug);
+}
+
+void GroupSum(unsigned int numNonempty,
+              hls::stream<unsigned int>& stream_size1,
+              hls::stream<float>& stream_dist,
+              hls::stream<float>& stream_sum) {
+    int count_debug = 0;
+    unsigned int count_context = 0;
+    unsigned char count_s = 0;
+    float sum_array[8];
+    unsigned char size;
+GROUPSUM_OUT:
+    while (count_context < numNonempty) {
+#pragma HLS LOOP_TRIPCOUNT min = 10000 max = 10000
+        if (count_s == 0) {
+            size = stream_size1.read();
+        }
+        unsigned char idx = count_s % 8;
+        sum_array[idx] = stream_dist.read();
+        if (idx == 7) {
+            float sum_part = compute_8(sum_array);
+            stream_sum.write(sum_part);
+            // printf("GroupSum Write stream_sum %d %d %d\n", count_context, count_s,
+            // size);
+            count_debug++;
+        }
+        count_s++;
+        if (count_s == size) {
+            count_s = 0;
+            count_context++;
+        }
+    }
+    // printf("stream_sum in=%d\n", count_debug);
+}
+
+void GetDist(bool isEntropy,
+             unsigned int numNonempty,
+             unsigned int j,
+             float histo_entropy[4096],
+             float ref_entropy,
+             float dists[4096],
+             unsigned int best[4096],
+             unsigned int& largest_idx,
+             hls::stream<unsigned int>& stream_size2,
+             hls::stream<float>& stream_dist_total,
+             hls::stream<float>& stream_sum) {
+    int count_debug = 0;
+    float dist_std;
+    unsigned int count_context = 0;
+    unsigned char count_s = 0;
+    largest_idx = 0;
+    unsigned char size;
+    float sum_dist = 0;
+    float reg_curr;
+    float reg0;
+    float reg1;
+    float reg2;
+    unsigned short addr_curr = 0;
+    unsigned short addr0 = 0xffff;
+    unsigned short addr1 = 0xffff;
+    unsigned short addr2 = 0xffff;
+GET_DIST_OUT:
+    while (count_context < numNonempty) {
+#pragma HLS LOOP_TRIPCOUNT min = 1250 max = 1250
+        if (count_s == 0) {
+            size = stream_size2.read() / 8;
+            sum_dist = 0;
+            // printf("GetDist count_context=%d, count_s=%d, size=%d\n",
+            // count_context, count_s, size);
+        }
+        // printf("GetDist count_context=%d, count_s=%d, size=%d\n", count_context,
+        // count_s, size);
+        sum_dist += stream_sum.read();
+        // printf("GetDist Read stream_sum %d %d %d\n", count_context, count_s,
+        // size);
+        if (count_s == size - 1) {
+            count_debug++;
+            // printf("GetDist Read stream_dist_total %d %d %d\n", count_context,
+            // count_s, size);
+            float tmp = stream_dist_total.read();
+            dist_std = tmp - sum_dist;
+
+            // update dist, may update same addess
+            addr_curr = count_context;
+            if (addr_curr == addr0) {
+                reg_curr = reg0;
+            } else if (addr_curr == addr1) {
+                reg_curr = reg1;
+            } else if (addr_curr == addr2) {
+                reg_curr = reg2;
+            } else {
+                reg_curr = dists[addr_curr];
+            }
+
+            float tmp_largest = dists[largest_idx];
+            if (!isEntropy) {
+                if (dist_std - histo_entropy[addr_curr] - ref_entropy < reg_curr) {
+                    best[addr_curr] = j;
+                    reg_curr = dist_std - histo_entropy[addr_curr] - ref_entropy;
+                }
+            } else {
+                reg_curr = dist_std;
+            }
+            if (reg_curr > tmp_largest) {
+                largest_idx = addr_curr;
+            }
+
+            dists[addr_curr] = reg_curr;
+            reg2 = reg1;
+            reg1 = reg0;
+            reg0 = reg_curr;
+            addr2 = addr1;
+            addr1 = addr0;
+            addr0 = addr_curr;
+        }
+        count_s++;
+        if (count_s == size) {
+            count_s = 0;
+            count_context++;
+        }
+    }
+    // printf("stream_sum out=%d\n", count_debug);
+}
+
+void hls_HistogramDistance(bool isEntropy,
+                           unsigned int numNonempty,
+                           unsigned int j,
+                           unsigned int histo_size[4096],
+#ifndef __SYNTHESIS__
+                           std::vector<std::vector<ap_uint<64> > >& histograms,
+#else
+                           ap_uint<64> histograms[4096][20],
+#endif
+                           unsigned int histo_totalcnt[4096],
+                           float histo_entropy[4096],
+                           unsigned int nonempty_histo[4096],
+                           unsigned int refSize,
+                           unsigned int ref_histo[40],
+                           unsigned int ref_totalcount,
+                           float ref_entropy,
+                           float dists[4096],
+                           unsigned int best[4096],
+                           unsigned int& largest_idx) {
+    hls::stream<unsigned int> stream_size0("stream_size0");
+#pragma HLS STREAM variable = stream_size0 depth = 64
+    hls::stream<unsigned int> stream_size1("stream_size1");
+#pragma HLS STREAM variable = stream_size1 depth = 64
+    hls::stream<unsigned int> stream_size2("stream_size2");
+#pragma HLS STREAM variable = stream_size2 depth = 64
+
+    hls::stream<unsigned int> stream_a("stream_a");
+#pragma HLS STREAM variable = stream_a depth = 64
+    hls::stream<unsigned int> stream_a_size0("stream_a_size0");
+#pragma HLS STREAM variable = stream_a_size0 depth = 64
+    hls::stream<unsigned int> stream_a_size1("stream_a_size1");
+#pragma HLS STREAM variable = stream_a_size1 depth = 64
+    hls::stream<unsigned int> stream_a_count("stream_a_count");
+#pragma HLS STREAM variable = stream_a_count depth = 64
+
+    hls::stream<unsigned int> stream_b("stream_b");
+#pragma HLS STREAM variable = stream_b depth = 64
+    hls::stream<unsigned int> stream_b_size0("stream_b_size0");
+#pragma HLS STREAM variable = stream_b_size0 depth = 64
+    hls::stream<unsigned int> stream_b_size1("stream_b_size1");
+#pragma HLS STREAM variable = stream_b_size1 depth = 64
+    hls::stream<unsigned int> stream_b_count("stream_b_count");
+#pragma HLS STREAM variable = stream_b_count depth = 64
+
+    hls::stream<float> stream_dist_total("stream_dist_total");
+#pragma HLS STREAM variable = stream_dist_total depth = 64
+    hls::stream<float> stream_dist("stream_dist");
+#pragma HLS STREAM variable = stream_dist depth = 64
+    hls::stream<float> stream_sum("stream_sum");
+#pragma HLS STREAM variable = stream_sum depth = 64
+    hls::stream<unsigned int> stream_idx("stream_idx");
+#pragma HLS STREAM variable = stream_idx depth = 64
+
+// clang-format on
+#pragma HLS dataflow
+
+    GetIdx(numNonempty, nonempty_histo, histo_size, stream_idx, stream_a_size0);
+
+    GetB(isEntropy, numNonempty, refSize, ref_histo, ref_totalcount, stream_b, stream_b_size0, stream_b_count);
+
+    GetA(isEntropy, numNonempty, histo_size, histograms, histo_totalcnt, nonempty_histo, stream_idx, stream_b_size0,
+         stream_b_size1, stream_size0, stream_a, stream_a_size0, stream_a_size1, stream_a_count);
+
+    DoHistogramDistanceEntropy(isEntropy, numNonempty, stream_size0, stream_size1, stream_size2, stream_a,
+                               stream_a_size1, stream_a_count, stream_b, stream_b_size1, stream_b_count,
+                               stream_dist_total, stream_dist);
+
+    GroupSum(numNonempty, stream_size1, stream_dist, stream_sum);
+
+    GetDist(isEntropy, numNonempty, j, histo_entropy, ref_entropy, dists, best, largest_idx, stream_size2,
+            stream_dist_total, stream_sum);
+}
+
+int hls_ClusterHisgtogram(unsigned int largest_idx,
+                          unsigned int numNonempty,
+                          unsigned int nonempty_histo[4096],
+                          unsigned int histo_totalcnt[4096],
+                          unsigned int histo_size[4096],
+#ifndef __SYNTHESIS__
+                          std::vector<std::vector<ap_uint<64> > >& histograms,
+#else
+                          ap_uint<64> histograms[4096][20],
+#endif
+                          unsigned int histo_size_clusd[128],
+#ifndef __SYNTHESIS__
+                          std::vector<std::vector<unsigned int> >& histograms_clusd,
+#else
+                          unsigned int histograms_clusd[128][40],
+#endif
+                          unsigned char histogram_symbols[4096]) {
+    unsigned char max_histograms = 128;
+    float min_distance = 64.0;
+    unsigned int best[4096];
+    float dists[4096];
+    float entropy[4096];
+    unsigned int total_count[4096];
+    float out_entropy[4096];
+    float histo_entropy[4096];
+
+    unsigned int size_b = 0;
+    unsigned int total_count_b = 0;
+    float hls_entropy_b = 0;
+    unsigned int data_b[40];
+    unsigned int tmp_largest_idx;
+
+    hls_HistogramDistance(true, numNonempty, 0, histo_size, histograms, histo_totalcnt, histo_entropy, nonempty_histo,
+                          size_b, data_b, total_count_b, hls_entropy_b, entropy, best, tmp_largest_idx);
+
+INIT_1:
+    for (unsigned int i = 0; i < numNonempty; i++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4096 max = 4096
+#pragma HLS pipeline
+        unsigned int idx = nonempty_histo[i];
+        histo_entropy[idx] = entropy[i];
+        best[i] = 0;
+        dists[i] = FLOAT_MAX;
+    }
+
+    unsigned int numHisto_clusd = 0;
+    unsigned int max_count = hls::min((int)max_histograms, (int)numNonempty);
+    largest_idx = nonempty_histo[largest_idx];
+    dists[largest_idx] = FLOAT_MAX;
+    unsigned int idx = largest_idx;
+FIRST_SCAN:
+    while (numHisto_clusd < max_count && dists[largest_idx] >= min_distance) {
+#pragma HLS LOOP_TRIPCOUNT min = 128 max = 128
+        histogram_symbols[idx] = numHisto_clusd;
+        unsigned char data_size = histo_size[idx];
+    GEN_REF1:
+        for (unsigned char k = 0; k < data_size; k++) {
+#pragma HLS LOOP_TRIPCOUNT min = 40 max = 40
+            histograms_clusd[numHisto_clusd][k] = get_uram(idx, k, histograms); // histograms[idx][k];
+            data_b[k] = get_uram(idx, k, histograms);                           // histograms[idx][k];
+        }
+        histo_size_clusd[numHisto_clusd] = data_size;
+        size_b = data_size;
+        total_count[numHisto_clusd] = histo_totalcnt[idx];
+        total_count_b = histo_totalcnt[idx];
+        out_entropy[numHisto_clusd] = histo_entropy[idx];
+        hls_entropy_b = histo_entropy[idx];
+// printf("push idx=%d, size_b=%d, total_count_b=%d, hls_entropy_b=%f\n",
+//  idx, size_b, total_count_b, hls_entropy_b);
+#pragma HLS ALLOCATION function instances = hls_HistogramDistance limit = 1
+        hls_HistogramDistance(false, numNonempty, 0, histo_size, histograms, histo_totalcnt, entropy, nonempty_histo,
+                              size_b, data_b, total_count_b, hls_entropy_b, dists, best, largest_idx);
+        idx = nonempty_histo[largest_idx];
+        numHisto_clusd++;
+    }
+
+INIT_2:
+    for (unsigned int j = 0; j < numNonempty; j++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4096 max = 4096
+#pragma HLS pipeline
+        best[j] = 0;
+        dists[j] = FLOAT_MAX;
+    }
+
+SECOND_SCAN:
+    for (unsigned int j = 0; j < numHisto_clusd; j++) {
+#pragma HLS LOOP_TRIPCOUNT min = 128 max = 128
+        size_b = histo_size_clusd[j];
+        total_count_b = total_count[j];
+        hls_entropy_b = out_entropy[j];
+    GEN_REF2:
+        for (unsigned char k = 0; k < size_b; k++) {
+#pragma HLS LOOP_TRIPCOUNT min = 40 max = 40
+#pragma HLS pipeline
+            data_b[k] = histograms_clusd[j][k];
+        }
+        hls_HistogramDistance(false, numNonempty, j, histo_size, histograms, histo_totalcnt, entropy, nonempty_histo,
+                              size_b, data_b, total_count_b, hls_entropy_b, dists, best, largest_idx);
+    }
+
+OUTPUT1:
+    for (unsigned int i = 0; i < numNonempty; i++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4096 max = 4096
+        unsigned int idx_in = nonempty_histo[i];
+        unsigned int idx_out = best[i];
+        unsigned int other_data_size = histo_size[idx_in];
+        unsigned int total_count = histo_totalcnt[idx_in];
+        if (other_data_size > histo_size_clusd[idx_out]) {
+            histo_size_clusd[idx_out] = other_data_size;
+        }
+    OUTPUT2:
+        for (unsigned char k = 0; k < other_data_size; ++k) {
+#pragma HLS LOOP_TRIPCOUNT min = 40 max = 40
+            unsigned int data_tmp = get_uram(idx_in, k, histograms); // histograms[idx_in][k];
+            histograms_clusd[idx_out][k] += data_tmp * numHisto_clusd;
+        }
+        histogram_symbols[idx_in] = idx_out;
+    }
+    return numHisto_clusd;
+}
+
+// clang-format off
+void hls_fastclusterHistogram_wrapper(
+    unsigned int largest_idx, 
+    unsigned int numNonempty,
+    unsigned int nonempty_histo[4096],
+    unsigned int numHisto,
+    unsigned int histo_totalcnt[4096],
+    unsigned int histo_size[4096], 
+#ifndef __SYNTHESIS__
+    std::vector<std::vector<ap_uint<64> > >& histograms,
+#else
+    ap_uint<64> histograms[4096][20],
+#endif
+    unsigned int& numHisto_clusd,
+    unsigned int histo_size_clusd[128],
+#ifndef __SYNTHESIS__
+    std::vector<std::vector<unsigned int> >& histograms_clusd,
+#else
+    unsigned int histograms_clusd[128][40],
+#endif
+    unsigned char histogram_symbols[4096]
+) {
+  //printf("[KERNEL] hls_fastclusterHistogram_wrapper in %d %d %d %d\n", largest_idx,
+  //        numNonempty, numHisto, numHisto_clusd);
+    // clang-format on
+    if (numHisto > 1) {
+        if (numNonempty == 0) {
+            numHisto_clusd = 1;
+        } else {
+            numHisto_clusd = hls_ClusterHisgtogram(largest_idx, numNonempty, nonempty_histo, histo_totalcnt, histo_size,
+                                                   histograms, histo_size_clusd, histograms_clusd, histogram_symbols);
+            // printf("[KERNEL]size= %d\n", numNonempty);
+            // for(int i=0; i<numNonempty; i++) {
+            //  printf("[KERNEL] hls_fastclusterHistogram_wrapper %d %d %d %d\n",
+            //          i, histo_totalcnt[i], histo_size[i], histograms[i][1]);
+            //}
+        }
+    } else {
+        numHisto_clusd = 1;
+        histo_size_clusd[0] = histo_size[0];
+    HISTO:
+        for (unsigned char k = 0; k < histo_size[0]; k++) {
+#pragma HLS LOOP_TRIPCOUNT min = 40 max = 40
+#pragma HLS pipeline
+            histograms_clusd[0][k] = get_uram(0, k, histograms); // histograms[0][k];
+        }
+    }
+    // printf("[KERNEL] hls_fastclusterHistogram_wrapper out %d %d %d %d\n", largest_idx,
+    //        numNonempty, numHisto, numHisto_clusd);
+}
+
+void buildCTXHistogram(uint32_t numHisto_ptr,
+                       uint8_t* ctx_map_ptr,
+
+                       uint32_t& numHisto_clusd_ptr,
+                       int32_t* histograms_clusdin_ptr,
+                       uint32_t& histo_size_clusdin_ptr) {
+    ap_uint<32> num_histograms = numHisto_clusd_ptr;
+    uint32_t entry_bits;
+    uint32_t floor_log2 = 32 - num_histograms.countLeadingZeros() - 1;
+    if ((num_histograms & (num_histograms - 1)) == 0) {
+        entry_bits = floor_log2; // power of two
+    } else {
+        entry_bits = floor_log2 + 1;
+    }
+    if (numHisto_ptr > 1 && entry_bits >= 4) {
+        uint32_t max_tok = 0;
+        for (uint32_t k = 0; k < numHisto_ptr; ++k) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<32> value = ctx_map_ptr[k];
+            uint32_t tok;
+            if (value < 16) {
+                tok = value;
+            } else {
+                uint32_t n = 32 - value.countLeadingZeros() - 1;
+                uint32_t m = value - (1 << n);
+                tok = 16 + ((n - 4) << 2) + (m >> (n - 2));
+            }
+            max_tok = tok > max_tok ? tok : max_tok;
+            ++histograms_clusdin_ptr[tok];
+        }
+        histo_size_clusdin_ptr = (max_tok + 8) / 8 * 8;
+    }
+}
+
+void load_histo(uint32_t numNonempty_ptr,
+                uint32_t* nonempty_histo_ptr,
+                int32_t* histograms_ptr,
+
+                uint32_t nonempty_histo_tmp[4096],
+#ifndef __SYNTHESIS__
+                std::vector<std::vector<ap_uint<64> > >& histograms_tmp
+#else
+                ap_uint<64> histograms_tmp[4096][20]
+#endif
+                ) {
+    for (int i = 0; i < 4096; i++) {
+#pragma HLS PIPELINE II = 1
+        for (int j = 0; j < 20; j++) {
+#pragma HLS UNROLL
+            histograms_tmp[i][j] = 0;
+        }
+    }
+
+    for (int i = 0; i < numNonempty_ptr; i++) {
+        uint32_t reg = nonempty_histo_ptr[i];
+        nonempty_histo_tmp[i] = reg;
+        for (ap_uint<8> j = 0; j < 20; j++) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<64> val;
+            val.range(31, 0) = histograms_ptr[reg * 40 + j * 2];
+            val.range(63, 32) = histograms_ptr[reg * 40 + j * 2 + 1];
+            histograms_tmp[reg][j] = val;
+        }
+    }
+}
+
+void load_nonempty(uint32_t* nonempty_histo_ptr, uint32_t nonempty_histo_tmp[4096]) {
+    for (int i = 0; i < 4096; i++) {
+#pragma HLS PIPELINE II = 1
+        nonempty_histo_tmp[i] = nonempty_histo_ptr[i];
+    }
+}
+
+void load_total_cnt(uint32_t* histo_totalcnt_ptr, uint32_t histo_totalcnt_tmp[4096]) {
+    for (int i = 0; i < 4096; i++) {
+#pragma HLS PIPELINE II = 1
+        histo_totalcnt_tmp[i] = histo_totalcnt_ptr[i];
+    }
+}
+
+void load_size(uint32_t* histo_size_ptr, uint32_t histo_size_tmp[4096]) {
+    for (int i = 0; i < 4096; i++) {
+#pragma HLS PIPELINE II = 1
+        histo_size_tmp[i] = histo_size_ptr[i];
+    }
+}
+
+void memset_histo_clusdin(int32_t histograms_clusdin_tmp[40]) {
+    for (int i = 0; i < 40; i++) {
+#pragma HLS UNROLL
+        histograms_clusdin_tmp[i] = 0;
+    }
+}
+
+void memset_histo_clusd(
+#ifndef __SYNTHESIS__
+    std::vector<std::vector<unsigned> >& histograms_clusd_tmp
+#else
+    unsigned histograms_clusd_tmp[128][40]
+#endif
+    ) {
+    for (int i = 0; i < 128; i++) {
+        for (int j = 0; j < 40; j++) {
+#pragma HLS PIPELINE II = 1
+            histograms_clusd_tmp[i][j] = 0;
+        }
+    }
+}
+
+void memset_ctx_map(uint8_t ctx_map_tmp[4096]) {
+    for (int i = 0; i < 4096; i++) {
+#pragma HLS PIPELINE II = 1
+        ctx_map_tmp[i] = 0;
+    }
+}
+
+void load_data(uint32_t numNonempty_ptr,
+               int32_t* histograms_ptr,
+               uint32_t* nonempty_histo_ptr,
+               uint32_t* histo_totalcnt_ptr,
+               uint32_t* histo_size_ptr,
+#ifndef __SYNTHESIS__
+               std::vector<std::vector<ap_uint<64> > >& histograms_tmp,
+#else
+               ap_uint<64> histograms_tmp[4096][20],
+#endif
+               uint32_t nonempty_histo_tmp[4096],
+               uint32_t histo_totalcnt_tmp[4096],
+               uint32_t histo_size_tmp[4096],
+               int32_t histograms_clusdin_tmp[40],
+#ifndef __SYNTHESIS__
+               std::vector<std::vector<unsigned> >& histograms_clusd_tmp,
+#else
+               unsigned histograms_clusd_tmp[128][40],
+#endif
+               uint8_t ctx_map_tmp[4096]) {
+#pragma HLS DATAFLOW
+    load_histo(numNonempty_ptr, nonempty_histo_ptr, histograms_ptr, nonempty_histo_tmp, histograms_tmp);
+
+    load_total_cnt(histo_totalcnt_ptr, histo_totalcnt_tmp);
+
+    load_size(histo_size_ptr, histo_size_tmp);
+
+    memset_histo_clusdin(histograms_clusdin_tmp);
+
+    memset_histo_clusd(histograms_clusd_tmp);
+
+    memset_ctx_map(ctx_map_tmp);
+}
+
+void write_histo_clusd(
+#ifndef __SYNTHESIS__
+    std::vector<std::vector<unsigned> >& histograms_clusd_tmp,
+#else
+    unsigned histograms_clusd_tmp[128][40],
+#endif
+    int32_t* histograms_clusd_ptr) {
+    for (int i = 0; i < 128; i++) {
+        for (int j = 0; j < 40; j++) {
+#pragma HLS PIPELINE II = 1
+            histograms_clusd_ptr[i * 40 + j] = histograms_clusd_tmp[i][j];
+        }
+    }
+}
+
+void write_size_clusd(uint32_t histo_size_clusd_tmp[128], uint32_t* histo_size_clusd_ptr) {
+    for (int i = 0; i < 128; i++) {
+#pragma HLS PIPELINE II = 1
+        histo_size_clusd_ptr[i] = histo_size_clusd_tmp[i];
+    }
+}
+
+void write_ctx_map(uint8_t ctx_map_tmp[4096], uint8_t* ctx_map_ptr) {
+    for (int i = 0; i < 4096; i++) {
+#pragma HLS PIPELINE II = 1
+        ctx_map_ptr[i] = ctx_map_tmp[i];
+    }
+}
+
+void write_histo_clusdin(int32_t histograms_clusdin_tmp[40], int32_t* histograms_clusdin_ptr) {
+    for (int i = 0; i < 40; i++) {
+#pragma HLS PIPELINE II = 1
+        histograms_clusdin_ptr[i] = histograms_clusdin_tmp[i];
+    }
+}
+
+void write_data(
+#ifndef __SYNTHESIS__
+    std::vector<std::vector<unsigned> >& histograms_clusd_tmp,
+#else
+    unsigned histograms_clusd_tmp[128][40],
+#endif
+    uint32_t histo_size_clusd_tmp[128],
+    uint8_t ctx_map_tmp[4096],
+    int32_t histograms_clusdin_tmp[40],
+    int32_t* histograms_clusd_ptr,
+    uint32_t* histo_size_clusd_ptr,
+    uint8_t* ctx_map_ptr,
+    int32_t* histograms_clusdin_ptr) {
+#pragma HLS DATAFLOW
+    write_histo_clusd(histograms_clusd_tmp, histograms_clusd_ptr);
+
+    write_size_clusd(histo_size_clusd_tmp, histo_size_clusd_ptr);
+
+    write_ctx_map(ctx_map_tmp, ctx_map_ptr);
+
+    write_histo_clusdin(histograms_clusdin_tmp, histograms_clusdin_ptr);
+}
+
+/**
+ * @brief JXL ANS cluster Histogram kernel
+ *
+ * @param config                    configuration for the kernel.
+ * @param histograms0_ptr           histograms for Block Context Map.
+ * @param histo_totalcnt0_ptr       Count of context for histograms for Block Context Map.
+ * @param histo_size0_ptr           size for each context
+ * @param nonempty_histo0_ptr       indicate which context is empty
+ * @param ctx_map0_ptr              the input context map
+ * @param histograms_clusd0_ptr     the clustered histogram
+ * @param histograms_clusdin0_ptr   the context for the clustered histogram
+ * @param histograms1_ptr           histograms for Modular frame tree.
+ * @param histo_totalcnt1_ptr       Count of context for histograms for Modular frame tree.
+ * @param histo_size1_ptr           size for each context
+ * @param nonempty_histo1_ptr       indicate which context is empty
+ * @param ctx_map1_ptr              the input context map
+ * @param histograms_clusd1_ptr     the clustered histogram
+ * @param histograms_clusdin1_ptr   the context for the clustered histogram
+ * @param histograms2_ptr           histograms for code from Modular frame.
+ * @param histo_totalcnt2_ptr       Count of context for histograms for Modular frame.
+ * @param histo_size2_ptr           size for each context
+ * @param nonempty_histo2_ptr       indicate which context is empty
+ * @param ctx_map2_ptr              the input context map
+ * @param histograms_clusd2_ptr     the clustered histogram
+ * @param histograms_clusdin2_ptr   the context for the clustered histogram
+ * @param histograms3_ptr           histograms for coef orders.
+ * @param histo_totalcnt3_ptr       Count of context for histograms for coef orders.
+ * @param histo_size3_ptr           size for each context
+ * @param nonempty_histo3_ptr       indicate which context is empty
+ * @param ctx_map3_ptr              the input context map
+ * @param histograms_clusd3_ptr     the clustered histogram
+ * @param histograms_clusdin3_ptr   the context for the clustered histogram
+ * @param histograms4_ptr           histograms for ac coefficients.
+ * @param histo_totalcnt4_ptr       Count of context for histograms for ac coefficients.
+ * @param histo_size4_ptr           size for each context
+ * @param nonempty_histo4_ptr       indicate which context is empty
+ * @param ctx_map4_ptr              the input context map
+ * @param histograms_clusd4_ptr     the clustered histogram
+ * @param histograms_clusdin4_ptr   the context for the clustered histogram
+ */
+
+// clang-format off
+void hls_ANSclusterHistogram_core(
+    uint32_t numNonempty_ptr,
+    uint32_t* nonempty_histo_ptr,
+
+    uint32_t lidx_ptr,
+    uint32_t numHisto_ptr,
+    uint32_t* histo_totalcnt_ptr,
+    uint32_t* histo_size_ptr,
+    int32_t* histograms_ptr,
+
+    uint8_t* ctx_map_ptr, 
+    uint32_t* histo_size_clusd_ptr,
+    int32_t* histograms_clusd_ptr,
+
+    int32_t* histograms_clusdin_ptr,
+    uint32_t& numHisto_clusd_ptr, 
+    uint32_t& histo_size_clusdin_ptr) {
+// clang-format on
+// No dataflow, sequentially run
+#ifndef __SYNTHESIS__
+    std::vector<std::vector<ap_uint<64> > > histograms_tmp(4096, std::vector<ap_uint<64> >(20));
+    uint32_t* nonempty_histo_tmp = (uint32_t*)malloc(4096 * sizeof(uint32_t));
+    uint32_t* histo_totalcnt_tmp = (uint32_t*)malloc(4096 * sizeof(uint32_t));
+    uint32_t* histo_size_tmp = (uint32_t*)malloc(4096 * sizeof(uint32_t));
+
+    uint32_t* histo_size_clusd_tmp = (uint32_t*)malloc(128 * sizeof(uint32_t));
+    std::vector<std::vector<unsigned> > histograms_clusd_tmp(128, std::vector<unsigned>(40));
+    uint8_t* ctx_map_tmp = (uint8_t*)malloc(4096 * sizeof(uint8_t));
+
+    int32_t* histograms_clusdin_tmp = (int32_t*)malloc(40 * sizeof(int32_t));
+#else
+    ap_uint<64> histograms_tmp[4096][20];
+#pragma HLS BIND_STORAGE impl = URAM variable = histograms_tmp
+#pragma HLS ARRAY_PARTITION variable = histograms_tmp complete dim = 2
+    uint32_t nonempty_histo_tmp[4096];
+#pragma HLS BIND_STORAGE impl = URAM variable = nonempty_histo_tmp
+    uint32_t histo_totalcnt_tmp[4096];
+#pragma HLS BIND_STORAGE impl = URAM variable = histo_totalcnt_tmp
+    uint32_t histo_size_tmp[4096];
+#pragma HLS BIND_STORAGE impl = URAM variable = histo_size_tmp
+    unsigned histograms_clusd_tmp[128][40];
+#pragma HLS BIND_STORAGE impl = LUTRAM variable = histograms_clusd_tmp
+    uint32_t histo_size_clusd_tmp[128];
+#pragma HLS BIND_STORAGE impl = LUTRAM variable = histo_size_clusd_tmp
+    uint8_t ctx_map_tmp[4096];
+#pragma HLS BIND_STORAGE impl = URAM variable = ctx_map_tmp
+    int32_t histograms_clusdin_tmp[40];
+#pragma HLS ARRAY_PARTITION variable = histograms_clusdin_tmp complete dim = 0
+#endif
+
+    load_data(numNonempty_ptr, histograms_ptr, nonempty_histo_ptr, histo_totalcnt_ptr, histo_size_ptr, histograms_tmp,
+              nonempty_histo_tmp, histo_totalcnt_tmp, histo_size_tmp, histograms_clusdin_tmp, histograms_clusd_tmp,
+              ctx_map_tmp);
+
+    hls_fastclusterHistogram_wrapper(lidx_ptr, numNonempty_ptr, nonempty_histo_tmp, numHisto_ptr, histo_totalcnt_tmp,
+                                     histo_size_tmp, histograms_tmp, numHisto_clusd_ptr, histo_size_clusd_tmp,
+                                     histograms_clusd_tmp, ctx_map_tmp);
+
+    buildCTXHistogram(numHisto_ptr, ctx_map_tmp, numHisto_clusd_ptr, histograms_clusdin_tmp, histo_size_clusdin_ptr);
+
+    write_data(histograms_clusd_tmp, histo_size_clusd_tmp, ctx_map_tmp, histograms_clusdin_tmp, histograms_clusd_ptr,
+               histo_size_clusd_ptr, ctx_map_ptr, histograms_clusdin_ptr);
+}
+
+namespace xf {
+namespace codec {
+
+/**
+* @brief JXL ANS cluster Histogram kernel
+*
+* @param config                    configuration for the kernel.
+* @param histograms0_ptr           histograms for Block Context Map.
+* @param histo_totalcnt0_ptr       Count of context for histograms for Block Context Map.
+* @param histo_size0_ptr           size for each context
+* @param nonempty_histo0_ptr       indicate which context is empty
+* @param ctx_map0_ptr              the input context map
+* @param histograms_clusd0_ptr     the clustered histogram
+* @param histograms_clusdin0_ptr   the context for the clustered histogram
+* @param histograms1_ptr           histograms for Modular frame tree.
+* @param histo_totalcnt1_ptr       Count of context for histograms for Modular frame tree.
+* @param histo_size1_ptr           size for each context
+* @param nonempty_histo1_ptr       indicate which context is empty
+* @param ctx_map1_ptr              the input context map
+* @param histograms_clusd1_ptr     the clustered histogram
+* @param histograms_clusdin1_ptr   the context for the clustered histogram
+* @param histograms2_ptr           histograms for code from Modular frame.
+* @param histo_totalcnt2_ptr       Count of context for histograms for Modular frame.
+* @param histo_size2_ptr           size for each context
+* @param nonempty_histo2_ptr       indicate which context is empty
+* @param ctx_map2_ptr              the input context map
+* @param histograms_clusd2_ptr     the clustered histogram
+* @param histograms_clusdin2_ptr   the context for the clustered histogram
+* @param histograms3_ptr           histograms for coef orders.
+* @param histo_totalcnt3_ptr       Count of context for histograms for coef orders.
+* @param histo_size3_ptr           size for each context
+* @param nonempty_histo3_ptr       indicate which context is empty
+* @param ctx_map3_ptr              the input context map
+* @param histograms_clusd3_ptr     the clustered histogram
+* @param histograms_clusdin3_ptr   the context for the clustered histogram
+* @param histograms4_ptr           histograms for ac coefficients.
+* @param histo_totalcnt4_ptr       Count of context for histograms for ac coefficients.
+* @param histo_size4_ptr           size for each context
+* @param nonempty_histo4_ptr       indicate which context is empty
+* @param ctx_map4_ptr              the input context map
+* @param histograms_clusd4_ptr     the clustered histogram
+* @param histograms_clusdin4_ptr   the context for the clustered histogram
+*/
+
+// clang-format off
+ extern "C" void JxlEnc_ans_clusterHistogram(
+    uint32_t* config,
+
+    int32_t* histograms0_ptr,
+    uint32_t* histo_totalcnt0_ptr,
+    uint32_t* histo_size0_ptr,
+
+    uint32_t* nonempty_histo0_ptr,
+
+    uint8_t* ctx_map0_ptr,
+
+    int32_t* histograms_clusd0_ptr,
+    uint32_t* histo_size_clusd0_ptr,
+
+    int32_t* histograms_clusdin0_ptr,
+    //====================
+    int32_t* histograms1_ptr,
+    uint32_t* histo_totalcnt1_ptr,
+    uint32_t* histo_size1_ptr,
+
+    uint32_t* nonempty_histo1_ptr,
+
+    uint8_t* ctx_map1_ptr,
+
+    int32_t* histograms_clusd1_ptr,
+    uint32_t* histo_size_clusd1_ptr,
+
+    int32_t* histograms_clusdin1_ptr,
+    //======================
+    int32_t* histograms2_ptr,
+    uint32_t* histo_totalcnt2_ptr,
+    uint32_t* histo_size2_ptr,
+
+    uint32_t* nonempty_histo2_ptr,
+
+    uint8_t* ctx_map2_ptr,
+
+    int32_t* histograms_clusd2_ptr,
+    uint32_t* histo_size_clusd2_ptr,
+
+    int32_t* histograms_clusdin2_ptr,
+    //======================
+    int32_t* histograms3_ptr,
+    uint32_t* histo_totalcnt3_ptr,
+    uint32_t* histo_size3_ptr,
+
+    uint32_t* nonempty_histo3_ptr,
+
+    uint8_t* ctx_map3_ptr,
+
+    int32_t* histograms_clusd3_ptr,
+    uint32_t* histo_size_clusd3_ptr,
+
+    int32_t* histograms_clusdin3_ptr,
+    //======================
+    int32_t* histograms4_ptr,
+    uint32_t* histo_totalcnt4_ptr,
+    uint32_t* histo_size4_ptr,
+
+    uint32_t* nonempty_histo4_ptr,
+
+    uint8_t* ctx_map4_ptr,
+
+    int32_t* histograms_clusd4_ptr,
+    uint32_t* histo_size_clusd4_ptr,
+
+    int32_t* histograms_clusdin4_ptr
+) {
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histogram_gmem port = histograms0_ptr depth = 163840
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histogram_gmem port = histograms1_ptr depth = 163840
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histogram_gmem port = histograms2_ptr depth = 163840
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histogram_gmem port = histograms3_ptr depth = 163840
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histogram_gmem port = histograms4_ptr depth = 163840
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histocnt_gmem port = histo_totalcnt0_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histocnt_gmem port = histo_totalcnt1_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histocnt_gmem port = histo_totalcnt2_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histocnt_gmem port = histo_totalcnt3_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histocnt_gmem port = histo_totalcnt4_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histosize_gmem port = histo_size0_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histosize_gmem port = histo_size1_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histosize_gmem port = histo_size2_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histosize_gmem port = histo_size3_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = histosize_gmem port = histo_size4_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = nonempty_gmem port = nonempty_histo0_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = nonempty_gmem port = nonempty_histo1_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = nonempty_gmem port = nonempty_histo2_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = nonempty_gmem port = nonempty_histo3_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 1 num_read_outstanding = \
+    8 max_write_burst_length = 2 max_read_burst_length = 64 bundle = nonempty_gmem port = nonempty_histo4_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = ctx_gmem port = ctx_map0_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = ctx_gmem port = ctx_map1_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = ctx_gmem port = ctx_map2_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = ctx_gmem port = ctx_map3_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = ctx_gmem port = ctx_map4_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusd_gmem port = histograms_clusd0_ptr depth = 5120
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusd_gmem port = histograms_clusd1_ptr depth = 5120
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusd_gmem port = histograms_clusd2_ptr depth = 5120
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusd_gmem port = histograms_clusd3_ptr depth = 5120
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusd_gmem port = histograms_clusd4_ptr depth = 5120
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histosize_clusd_gmem port = histo_size_clusd0_ptr depth = 128
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histosize_clusd_gmem port = histo_size_clusd1_ptr depth = 128
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histosize_clusd_gmem port = histo_size_clusd2_ptr depth = 128
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histosize_clusd_gmem port = histo_size_clusd3_ptr depth = 128
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histosize_clusd_gmem port = histo_size_clusd4_ptr depth = 128
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusdin_gmem port = histograms_clusdin0_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusdin_gmem port = histograms_clusdin1_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusdin_gmem port = histograms_clusdin2_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusdin_gmem port = histograms_clusdin3_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusdin_gmem port = histograms_clusdin4_ptr depth = 4096
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 32 num_write_outstanding = 8 num_read_outstanding = \
+    1 max_write_burst_length = 64 max_read_burst_length = 2 bundle = histo_clusdin_gmem port = config depth = 35
+    // clang-format on
+
+    // No dataflow, run sequentially
+
+    uint32_t do_once[5];
+    do_once[0] = config[25];
+    do_once[1] = config[26];
+    do_once[2] = config[27];
+    do_once[3] = config[28];
+    do_once[4] = config[29];
+
+    uint32_t numHisto0_ptr = config[0];
+    uint32_t numNonempty0_ptr = config[5];
+    uint32_t lidx0_ptr = config[10];
+    uint32_t numHisto_clusd0_ptr;
+    uint32_t histo_size_clusdin0_ptr;
+
+    uint32_t numHisto1_ptr = config[1];
+    uint32_t numNonempty1_ptr = config[6];
+    uint32_t lidx1_ptr = config[11];
+    uint32_t numHisto_clusd1_ptr;
+    uint32_t histo_size_clusdin1_ptr;
+
+    uint32_t numHisto2_ptr = config[2];
+    uint32_t numNonempty2_ptr = config[7];
+    uint32_t lidx2_ptr = config[12];
+
+    uint32_t numHisto_clusd2_ptr;
+    uint32_t histo_size_clusdin2_ptr;
+
+    uint32_t numHisto3_ptr = config[3];
+    uint32_t numNonempty3_ptr = config[8];
+    uint32_t lidx3_ptr = config[13];
+
+    uint32_t numHisto_clusd3_ptr;
+    uint32_t histo_size_clusdin3_ptr;
+
+    uint32_t numHisto4_ptr = config[4];
+    uint32_t numNonempty4_ptr = config[9];
+    uint32_t lidx4_ptr = config[14];
+
+    uint32_t numHisto_clusd4_ptr;
+    uint32_t histo_size_clusdin4_ptr;
+
+    if (do_once[0] != 0) {
+        // clang-format off
+    hls_ANSclusterHistogram_core(
+        numNonempty0_ptr,
+        nonempty_histo0_ptr,
+
+        lidx0_ptr,
+        numHisto0_ptr,
+        histo_totalcnt0_ptr,
+        histo_size0_ptr,
+        histograms0_ptr,
+
+        ctx_map0_ptr,
+        histo_size_clusd0_ptr,
+        histograms_clusd0_ptr,
+
+        histograms_clusdin0_ptr,
+        numHisto_clusd0_ptr,
+        histo_size_clusdin0_ptr);
+        // clang-format on
+    }
+
+    if (do_once[1] != 0) {
+        // clang-format off
+    hls_ANSclusterHistogram_core(
+        numNonempty1_ptr,
+        nonempty_histo1_ptr,
+
+        lidx1_ptr,
+        numHisto1_ptr,
+        histo_totalcnt1_ptr,
+        histo_size1_ptr,
+        histograms1_ptr,
+
+        ctx_map1_ptr,
+        histo_size_clusd1_ptr,
+        histograms_clusd1_ptr,
+
+        histograms_clusdin1_ptr,
+        numHisto_clusd1_ptr,
+        histo_size_clusdin1_ptr);
+        // clang-format on
+    }
+
+    if (do_once[2] != 0) {
+        // clang-format off
+    hls_ANSclusterHistogram_core(
+        numNonempty2_ptr,
+        nonempty_histo2_ptr,
+
+        lidx2_ptr,
+        numHisto2_ptr,
+        histo_totalcnt2_ptr,
+        histo_size2_ptr,
+        histograms2_ptr,
+
+        ctx_map2_ptr,
+        histo_size_clusd2_ptr,
+        histograms_clusd2_ptr,
+
+        histograms_clusdin2_ptr,
+        numHisto_clusd2_ptr,
+        histo_size_clusdin2_ptr);
+        // clang-format on
+    }
+
+    if (do_once[3] != 0) {
+        // clang-format off
+    hls_ANSclusterHistogram_core(
+        numNonempty3_ptr,
+        nonempty_histo3_ptr,
+
+        lidx3_ptr,
+        numHisto3_ptr,
+        histo_totalcnt3_ptr,
+        histo_size3_ptr,
+        histograms3_ptr,
+
+        ctx_map3_ptr,
+        histo_size_clusd3_ptr,
+        histograms_clusd3_ptr,
+
+        histograms_clusdin3_ptr,
+        numHisto_clusd3_ptr,
+        histo_size_clusdin3_ptr);
+        // clang-format on
+    }
+
+    if (do_once[4] != 0) {
+// clang-format off
+    #pragma HLS ALLOCATION function instances = hls_ANSclusterHistogram_core limit = 1
+    hls_ANSclusterHistogram_core(
+        numNonempty4_ptr,
+        nonempty_histo4_ptr,
+
+        lidx4_ptr,
+        numHisto4_ptr,
+        histo_totalcnt4_ptr,
+        histo_size4_ptr,
+        histograms4_ptr,
+
+        ctx_map4_ptr,
+        histo_size_clusd4_ptr,
+        histograms_clusd4_ptr,
+
+        histograms_clusdin4_ptr,
+        numHisto_clusd4_ptr,
+        histo_size_clusdin4_ptr);
+        // clang-format on
+    }
+
+    config[19] = numHisto_clusd4_ptr;
+    config[24] = histo_size_clusdin4_ptr;
+    config[18] = numHisto_clusd3_ptr;
+    config[23] = histo_size_clusdin3_ptr;
+    config[17] = numHisto_clusd2_ptr;
+    config[22] = histo_size_clusdin2_ptr;
+    config[16] = numHisto_clusd1_ptr;
+    config[21] = histo_size_clusdin1_ptr;
+    config[15] = numHisto_clusd0_ptr;
+    config[20] = histo_size_clusdin0_ptr;
+    // printf("[KERNEL] cluster size = (%d, %d, %d, %d, %d)\n", numHisto_clusd0_ptr, numHisto_clusd1_ptr,
+    //        numHisto_clusd2_ptr, numHisto_clusd3_ptr, numHisto_clusd4_ptr);
+    // printf("[KERNEL] cluster in size = (%d, %d, %d, %d, %d)\n", histo_size_clusdin0_ptr, histo_size_clusdin1_ptr,
+    //        histo_size_clusdin2_ptr, histo_size_clusdin3_ptr, histo_size_clusdin4_ptr);
+    // for(int i=0; i<numHisto_clusd2_ptr; i++) {
+    //  for(int j=0; j<histo_size_clusd2_ptr[i]; j++) {
+    //      printf("[KERNEL] cluster 2 %d %d %d\n", i, j, histograms_clusd2_ptr[i*40+j]);
+    //  }
+    //}
+    // for(int j=0; j<numHisto_clusd2_ptr; j++) {
+    //    printf("[KERNEL] cluster in 2 %d %d\n", j, histograms_clusdin2_ptr[j]);
+    //}
+}
+
+} // namespace codec
+} // xf
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_cluster_histogram/utils.mk b/codec/L2/demos/jxlEnc/acc_cluster_histogram/utils.mk
new file mode 100644
index 0000000000..0ee80e90da
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_cluster_histogram/utils.mk
@@ -0,0 +1,270 @@
+#
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+
+#get suffix of kernel by PLATFORM
+VITIS_VER = $(shell v++ --version | grep 'v++' | sed 's/^[[:space:]]*//' | sed -e 's/^[*]* v++ v//g' | cut -d " " -f1)
+DEVICE_TYPE = $(shell platforminfo -p $(PLATFORM) | grep 'FPGA Family' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(DEVICE_TYPE), versal)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+LINK_TARGET_FMT := xsa
+else
+LINK_TARGET_FMT := xclbin
+endif
+else
+LINK_TARGET_FMT := xclbin
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+ifeq ($(HOST_ARCH), x86)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#Check OS and setting env for xrt c++ api
+OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
+OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
+
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/Makefile b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/Makefile
new file mode 100644
index 0000000000..2fb67f323b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/Makefile
@@ -0,0 +1,333 @@
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to build xclbin application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
+	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_u50_gen3x16_xdma_5_202210_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u50
+PLATFORM_BLOCKLIST +=  zc
+
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# get global setting
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += -I$(CUR_DIR)/src/ -fmessage-length=0 --sysroot=$(SYSROOT)  -I$(SYSROOT)/usr/include/xrt -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(SYSROOT)/usr/lib -L$(XILINX_VITIS_AIETOOLS)/lib/aarch64.o -Wl,--as-needed -lxilinxopencl -lxrt_coreutil 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+########################## Setting up Host Variables ##########################
+ifeq ($(TARGET),sw_emu)
+CXXFLAGS += -D SW_EMU_TEST
+endif
+ifeq ($(TARGET),hw_emu)
+CXXFLAGS += -D HW_EMU_TEST
+endif
+
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
+#Inclue Required Host Source Files
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cjxl.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cmdline.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/codec_config.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/box/box.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/time.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_host.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase1.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase2.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase3.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/acc_lossy_enc_compute/host/host_lossy_enc_compute.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
+CXXFLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/ -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/build/lib/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lodepng -I $(XFLIB_DIR)/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute -I $(XFLIB_DIR)/L2/demos/jxlEnc/others/include/ -I $(XFLIB_DIR)/L2/demos/jxlEnc/acc_lossy_enc_compute/kernel -I $(XFLIB_DIR)/L2/demos/jxlEnc/acc_lossy_enc_compute/host
+CXXFLAGS += -O3 
+
+EXE_NAME := host.exe
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
+
+HOST_ARGS :=  --xclbin $(BUILD_DIR)/jxlEnc.xclbin $(XFLIB_DIR)/L2/demos/jxlEnc/images/small32x32.png small32x32.jxl
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u50.cfg
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/jxlEnc
+
+else 
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/jxlEnc
+
+endif
+
+######################### binary container global settings ##########################
+VPP_FLAGS_JxlEnc_lossy_enc_compute +=  -D KERNEL_NAME=JxlEnc_lossy_enc_compute
+VPP_FLAGS_JxlEnc_lossy_enc_compute += --hls.clock 300000000:JxlEnc_lossy_enc_compute
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_jxlEnc += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_jxlEnc += --kernel_frequency 300
+endif
+VPP_LDFLAGS_jxlEnc_temp := -g --advanced.param compiler.userPostSysLinkOverlayTcl=postSysLink.tcl
+VPP_LDFLAGS_jxlEnc += $(VPP_LDFLAGS_jxlEnc_temp)
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS += $(BUILD_DIR)/jxlEnc.xclbin
+else
+BINARY_CONTAINERS += $(BUILD_DIR)/jxlEnc_pkg.$(LINK_TARGET_FMT)
+BINARY_CONTAINERS_PKG += $(BUILD_DIR)/jxlEnc.xclbin
+endif
+
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+$(TEMP_DIR)/JxlEnc_lossy_enc_compute.xo: $(XFLIB_DIR)/L2/demos/jxlEnc/acc_lossy_enc_compute/kernel/hls_lossy_enc_compute.cpp 
+	$(ECHO) "Compiling Kernel: JxlEnc_lossy_enc_compute"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_JxlEnc_lossy_enc_compute) $(VPP_FLAGS) -k JxlEnc_lossy_enc_compute -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
+BINARY_CONTAINER_jxlEnc_OBJS += $(TEMP_DIR)/JxlEnc_lossy_enc_compute.xo
+BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_jxlEnc_OBJS)
+$(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
+	mkdir -p $(BUILD_DIR)
+	$(VPP) -l $(VPP_FLAGS) --temp_dir $(TEMP_DIR) --report_dir $(BUILD_REPORT_DIR)/jxlEnc $(VPP_LDFLAGS)  $(VPP_LDFLAGS_jxlEnc) $(AIE_LDFLAGS)   -o $@ $^
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_xrt
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+else
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_sysroot
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+############################## Preparing sdcard folder ##############################
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE := $(SYSROOT)/../../uImage
+else
+K_IMAGE := $(SYSROOT)/../../Image
+endif
+RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
+$(RUN_SCRIPT):
+	rm -rf $(RUN_SCRIPT)
+	@echo 'export LD_LIBRARY_PATH=/mnt:/tmp:$(LIBRARY_PATH)' >> $(RUN_SCRIPT)
+ifneq ($(filter sw_emu hw_emu, $(TARGET)),)
+	@echo 'export XCL_EMULATION_MODE=$(TARGET)' >> $(RUN_SCRIPT)
+endif
+	@echo 'export XILINX_VITIS=/mnt' >> $(RUN_SCRIPT)
+	@echo 'export XILINX_XRT=/usr' >> $(RUN_SCRIPT)
+	@echo 'if [ -f platform_desc.txt  ]; then' >> $(RUN_SCRIPT)
+	@echo '        cp platform_desc.txt /etc/xocl.txt' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo './$(EXE_NAME) $(PKG_HOST_ARGS)' >> $(RUN_SCRIPT)
+	@echo 'return_code=$$?' >> $(RUN_SCRIPT)
+	@echo 'if [ $$return_code -ne 0 ]; then' >> $(RUN_SCRIPT)
+	@echo '        echo "ERROR: Embedded host run failed, RC=$$return_code"' >> $(RUN_SCRIPT)
+	@echo 'else' >> $(RUN_SCRIPT)
+	@echo '        echo "INFO: TEST PASSED, RC=0"' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo 'echo "INFO: Embedded host run completed."' >> $(RUN_SCRIPT)
+	@echo 'exit $$return_code' >> $(RUN_SCRIPT)
+DATA_FILE := 
+DATA_DIR := 
+SD_FILES += $(RUN_SCRIPT)
+SD_FILES += $(EXE_FILE)
+SD_FILES += $(EMCONFIG)
+SD_FILES += xrt.ini
+SD_FILES += $(DATA_FILE)# where define DATAFILE in json
+SD_FILES_WITH_PREFIX = $(foreach sd_file,$(SD_FILES), $(if $(filter $(sd_file),$(wildcard $(sd_file))), --package.sd_file $(sd_file)))
+SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
+PACKAGE_FILES := $(BINARY_CONTAINERS)
+PACKAGE_FILES += $(AIE_CONTAINER)
+SD_CARD := $(CUR_DIR)/package_$(TARGET)
+vck190_dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+	@echo "Generating sd_card folder...."
+	mkdir -p $(SD_CARD)
+	chmod a+rx $(BUILD_DIR)/run_script.sh
+ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+vck190_dfx_hw := true
+endif
+endif
+ifeq ($(vck190_dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
+
+.PHONY: sd_card
+sd_card: $(SD_CARD)
+endif
+############################## Setting Essential Checks and Building Rules ##############################
+RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
+RUN_DEPS += $(SD_CARD)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	./check.sh
+endif
+endif
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS) 
+	./check.sh
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	./check.sh
+endif
+endif
+#hw
+ifeq ($(TARGET), hw)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+else
+	$(ECHO) "Please copy the content of sd_card folder and data to an SD Card and run on the board"
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: clean cleanall emconfig
+emconfig: $(EMCONFIG)
+
+.PHONY: host
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+
+.PHONY: xclbin
+ifeq ($(HOST_ARCH), x86)
+xclbin:  check_vpp check_xrt $(BINARY_CONTAINERS) 
+else
+xclbin:  check_vpp check_sysroot $(BINARY_CONTAINERS) 
+endif
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
+
+clean: cleanh
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/check.sh b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/check.sh
new file mode 100755
index 0000000000..d9450ab8d2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/check.sh
@@ -0,0 +1 @@
+echo "bcf0915760ea2ffbfd33a1bb2abe028a small32x32.jxl" | md5sum -c -
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/conn_u50.cfg b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/conn_u50.cfg
new file mode 100644
index 0000000000..70beb22bc7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/conn_u50.cfg
@@ -0,0 +1,21 @@
+[hls]
+#pre_tcl=hls_pre.tcl
+
+[connectivity]
+sp=JxlEnc_lossy_enc_compute_1.config:HBM[14] 
+sp=JxlEnc_lossy_enc_compute_1.config_fl:HBM[15] 
+sp=JxlEnc_lossy_enc_compute_1.hls_opsin_1:HBM[0] 
+sp=JxlEnc_lossy_enc_compute_1.hls_opsin_2:HBM[1]
+sp=JxlEnc_lossy_enc_compute_1.hls_opsin_3:HBM[2] 
+sp=JxlEnc_lossy_enc_compute_1.quant_field_row:HBM[3]
+sp=JxlEnc_lossy_enc_compute_1.masking_field_row:HBM[4] 
+sp=JxlEnc_lossy_enc_compute_1.aq_map_f:HBM[5] 
+sp=JxlEnc_lossy_enc_compute_1.cmap_axi:HBM[6]  
+sp=JxlEnc_lossy_enc_compute_1.ac_coef_axiout:HBM[7] 
+sp=JxlEnc_lossy_enc_compute_1.strategy_all:HBM[8] 
+sp=JxlEnc_lossy_enc_compute_1.raw_quant_field_i:HBM[9] 
+sp=JxlEnc_lossy_enc_compute_1.hls_order:HBM[10] 
+sp=JxlEnc_lossy_enc_compute_1.hls_dc8x8:HBM[11] 
+sp=JxlEnc_lossy_enc_compute_1.hls_dc16x16:HBM[12] 
+sp=JxlEnc_lossy_enc_compute_1.hls_dc32x32:HBM[13] 
+
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/description.json b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/description.json
new file mode 100644
index 0000000000..3e66da981d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/description.json
@@ -0,0 +1,328 @@
+{
+    "gui": false,
+    "name": "JXL ACC_LOSSY_ENC Demo",
+    "description": "This example is based on Google's PIK, which was chosen as the base framework for JPEG XL. The pikEnc is based on the 'fast mode' of PIK which can provide better encoding efficnty than most of other still image encoding methods. The pikEnc is based on Xilinx HLS design methodology and optimized for FPGA arthitecture. It can proved higher throughput and lower latency compared to software-based solutions",
+    "flow": "vitis",
+    "platform_allowlist": [
+        "u50"
+    ],
+    "platform_blocklist": [
+        "zc"
+    ],
+    "platform_properties": {
+        "u50": {
+            "v++": {
+                "compiler": {
+                    "clflags": [
+                        "--config PROJECT/conn_u50.cfg"
+                    ]
+                }
+            }
+        }
+    },
+    "data": [
+        "./data"
+    ],
+    "launch": [
+        {
+            "cmd_args": " --xclbin BUILD/jxlEnc.xclbin LIB_DIR/L2/demos/jxlEnc/images/small32x32.png small32x32.jxl",
+            "name": "generic launch for all flows"
+        }
+    ],
+    "post_launch": [
+        {
+            "launch_cmd": [
+                "./check.sh"
+            ]
+        }
+    ],
+    "host": {
+        "host_exe": "host.exe",
+        "compiler": {
+            "sources": [
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cjxl.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cmdline.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/codec_config.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/box/box.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/time.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_host.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase1.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase2.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase3.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/acc_lossy_enc_compute/host/host_lossy_enc_compute.cpp",
+                "LIB_DIR/ext/xcl2/xcl2.cpp"
+            ],
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/ext/xcl2",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/build/lib/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lodepng",
+                "LIB_DIR/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute",
+                "LIB_DIR/L2/demos/jxlEnc/others/include/",
+                "LIB_DIR/L2/demos/jxlEnc/acc_lossy_enc_compute/kernel",
+                "LIB_DIR/L2/demos/jxlEnc/acc_lossy_enc_compute/host"
+            ],
+            "options": "-O3 "
+        }
+    },
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/L2/include/hw/jxlEnc"
+            ]
+        }
+    },
+    "containers": [
+        {
+            "name": "jxlEnc",
+            "accelerators": [
+                {
+                    "location": "LIB_DIR/L2/demos/jxlEnc/acc_lossy_enc_compute/kernel/hls_lossy_enc_compute.cpp",
+                    "frequency": 300.0,
+                    "clflags": " -D KERNEL_NAME=JxlEnc_lossy_enc_compute",
+                    "name": "JxlEnc_lossy_enc_compute",
+                    "num_compute_units": 1,
+                    "compute_units": [
+                        {
+                            "name": "JxlEnc_lossy_enc_compute",
+                            "arguments": [
+                                {
+                                    "name": "gmem0_0",
+                                    "memory": "DDR[0]"
+                                },
+                                {
+                                    "name": "gmem0_1",
+                                    "memory": "DDR[0]"
+                                },
+                                {
+                                    "name": "gmem1_0",
+                                    "memory": "DDR[1]"
+                                },
+                                {
+                                    "name": "gmem1_1",
+                                    "memory": "DDR[1]"
+                                },
+                                {
+                                    "name": "gmem1_2",
+                                    "memory": "DDR[1]"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "ldclflags": "-g --advanced.param compiler.userPostSysLinkOverlayTcl=postSysLink.tcl",
+            "frequency": 300
+        }
+    ],
+    "testinfo": {
+        "disable": false,
+        "jobs": [
+            {
+                "index": 0,
+                "dependency": [],
+                "env": "",
+                "cmd": "",
+                "max_memory_MB": {
+                    "vitis_hw_build": 81920,
+                    "vitis_hw_emu": 40960,
+                    "vitis_sw_emu": 10240,
+                    "vitis_hw_run": 10240
+                },
+                "max_time_min": {
+                    "vitis_hw_build": 3200,
+                    "vitis_hw_emu": 1600,
+                    "vitis_sw_emu": 120,
+                    "vitis_hw_run": 10
+                }
+            }
+        ],
+        "targets": [
+            "vitis_sw_emu",
+            "vitis_hw_emu",
+            "vitis_hw"
+        ],
+        "category": "canary"
+    }
+}
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/host/host_lossy_enc_compute.cpp b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/host/host_lossy_enc_compute.cpp
new file mode 100644
index 0000000000..4e2fac99cb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/host/host_lossy_enc_compute.cpp
@@ -0,0 +1,366 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_LOSSY_ENC_COMPUTE_CPP
+#define HOST_LOSSY_ENC_COMPUTE_CPP
+
+#include <iostream>
+#include <sys/time.h>
+
+#include "host_lossy_enc_compute.hpp"
+
+#ifndef HLS_TEST
+#include "xf_utils_sw/logger.hpp"
+#include "xcl2.hpp"
+#endif
+
+unsigned long diff(const struct timeval* newTime, const struct timeval* oldTime) {
+    return (newTime->tv_sec - oldTime->tv_sec) * 1000000 + (newTime->tv_usec - oldTime->tv_usec);
+}
+
+template <typename T>
+T* aligned_alloc(std::size_t num) {
+    void* ptr = NULL;
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+    return reinterpret_cast<T*>(ptr);
+}
+
+void hls_lossy_enc_compute_wrapper(std::string xclbinPath,          // xclbin
+                                   int config[MAX_NUM_CONFIG],      // mm15, input
+                                   float config_fl[MAX_NUM_CONFIG], // mm16, input
+                                   float* hls_opsin_1,              // mm1, input
+                                   float* hls_opsin_2,              // mm2, input
+                                   float* hls_opsin_3,              // mm3, input
+                                   float* hls_quant_field,          // mm4, input
+                                   float* hls_masking_field,        // mm5, input
+                                   float* aq_map_f,                 // mm6, input
+                                   int8_t* cmap_axi,                // mm7, output
+                                   int* ac_coef_axiout,             // mm8, output
+                                   uint8_t* strategy_all,           // mm9, output
+                                   int* raw_quant_field_i,          // mm10, output
+                                   uint32_t* hls_order,             // mm11, output
+                                   float* hls_dc8x8,                // mm12, output
+                                   float* hls_dc16x16,              // mm13, output
+                                   float* hls_dc32x32               // mm14, output
+                                   ) {
+#ifndef HLS_TEST
+
+    xf::common::utils_sw::Logger logger(std::cout, std::cerr);
+    cl_int fail;
+
+    struct timeval start_time; // End to end time clock start
+    gettimeofday(&start_time, 0);
+
+    // platform related operations
+    std::vector<cl::Device> devices = xcl::get_xil_devices();
+    cl::Device device = devices[0];
+
+    // Creating Context and Command Queue for selected Device
+    cl::Context context(device, NULL, NULL, NULL, &fail);
+    logger.logCreateContext(fail);
+    cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &fail);
+    logger.logCreateCommandQueue(fail);
+    std::string devName = device.getInfo<CL_DEVICE_NAME>();
+    printf("INFO: Found Device=%s\n", devName.c_str());
+    cl::Program::Binaries xclBins = xcl::import_binary_file(xclbinPath);
+
+    devices.resize(1);
+    cl::Program program(context, devices, xclBins, NULL, &fail);
+    logger.logCreateProgram(fail);
+
+    int repInt = 1;
+    // create kernels
+    std::vector<cl::Kernel> hls_lossy_enc_compute(repInt);
+    for (int i = 0; i < repInt; i++) {
+        hls_lossy_enc_compute[i] = cl::Kernel(program, "JxlEnc_lossy_enc_compute", &fail);
+        logger.logCreateKernel(fail);
+    }
+    std::cout << "INFO: kernel has been created" << std::endl;
+
+    // 1. create all I/O Buffer
+    int32_t* hb_config = aligned_alloc<int32_t>(MAX_NUM_CONFIG);
+    float* hb_config_fl = aligned_alloc<float>(MAX_NUM_CONFIG);
+    float* hb_hls_opsin_1 = aligned_alloc<float>(ALL_PIXEL);
+    float* hb_hls_opsin_2 = aligned_alloc<float>(ALL_PIXEL);
+    float* hb_hls_opsin_3 = aligned_alloc<float>(ALL_PIXEL);
+    float* hb_hls_quant_field = aligned_alloc<float>(BLOCK8_H * BLOCK8_W);
+    float* hb_hls_masking_field = aligned_alloc<float>(BLOCK8_H * BLOCK8_W);
+    float* hb_aq_map_f = aligned_alloc<float>(BLOCK8_H * BLOCK8_W);
+    int8_t* hb_cmap_axi = aligned_alloc<int8_t>(TILE_W * TILE_H * 2);
+    int32_t* hb_ac_coef_axiout = aligned_alloc<int32_t>(ALL_PIXEL);
+    uint8_t* hb_strategy_all = aligned_alloc<uint8_t>(BLOCK8_W * BLOCK8_H);
+    int32_t* hb_raw_quant_field_i = aligned_alloc<int32_t>(BLOCK8_H * BLOCK8_W);
+    uint32_t* hb_hls_order = aligned_alloc<uint32_t>(MAX_ORDER);
+    float* hb_hls_dc8x8 = aligned_alloc<float>(ALL_PIXEL);
+    float* hb_hls_dc16x16 = aligned_alloc<float>(ALL_PIXEL);
+    float* hb_hls_dc32x32 = aligned_alloc<float>(ALL_PIXEL);
+
+    //==================================================
+    // 2. init all the host Buffers
+    //==================================================
+
+    // input port
+    for (int j = 0; j < MAX_NUM_CONFIG; j++) {
+        hb_config[j] = config[j];
+    }
+
+    for (int j = 0; j < MAX_NUM_CONFIG; j++) {
+        hb_config_fl[j] = config_fl[j];
+    }
+
+    for (int j = 0; j < ALL_PIXEL; j++) {
+        hb_hls_opsin_1[j] = hls_opsin_1[j];
+    }
+
+    for (int j = 0; j < ALL_PIXEL; j++) {
+        hb_hls_opsin_2[j] = hls_opsin_2[j];
+    }
+
+    for (int j = 0; j < ALL_PIXEL; j++) {
+        hb_hls_opsin_3[j] = hls_opsin_3[j];
+    }
+
+    for (int j = 0; j < BLOCK8_H * BLOCK8_W; j++) {
+        hb_hls_quant_field[j] = hls_quant_field[j];
+    }
+
+    for (int j = 0; j < BLOCK8_H * BLOCK8_W; j++) {
+        hb_hls_masking_field[j] = hls_masking_field[j];
+    }
+
+    for (int j = 0; j < BLOCK8_H * BLOCK8_W; j++) {
+        hb_aq_map_f[j] = aq_map_f[j];
+    }
+
+    // mapping to HBM banks
+    std::vector<cl_mem_ext_ptr_t> mext_o(33);
+    mext_o[0] = {(((unsigned int)(14)) | XCL_MEM_TOPOLOGY), hb_config, 0};
+    mext_o[1] = {(((unsigned int)(15)) | XCL_MEM_TOPOLOGY), hb_config_fl, 0};
+    mext_o[2] = {(((unsigned int)(0)) | XCL_MEM_TOPOLOGY), hb_hls_opsin_1, 0};
+    mext_o[3] = {(((unsigned int)(1)) | XCL_MEM_TOPOLOGY), hb_hls_opsin_2, 0};
+    mext_o[4] = {(((unsigned int)(2)) | XCL_MEM_TOPOLOGY), hb_hls_opsin_3, 0};
+    mext_o[5] = {(((unsigned int)(3)) | XCL_MEM_TOPOLOGY), hb_hls_quant_field, 0};
+    mext_o[6] = {(((unsigned int)(4)) | XCL_MEM_TOPOLOGY), hb_hls_masking_field, 0};
+    mext_o[7] = {(((unsigned int)(5)) | XCL_MEM_TOPOLOGY), hb_aq_map_f, 0};
+    mext_o[8] = {(((unsigned int)(6)) | XCL_MEM_TOPOLOGY), hb_cmap_axi, 0};
+    mext_o[9] = {(((unsigned int)(7)) | XCL_MEM_TOPOLOGY), hb_ac_coef_axiout, 0};
+    mext_o[10] = {(((unsigned int)(8)) | XCL_MEM_TOPOLOGY), hb_strategy_all, 0};
+    mext_o[11] = {(((unsigned int)(9)) | XCL_MEM_TOPOLOGY), hb_raw_quant_field_i, 0};
+    mext_o[12] = {(((unsigned int)(10)) | XCL_MEM_TOPOLOGY), hb_hls_order, 0};
+    mext_o[13] = {(((unsigned int)(11)) | XCL_MEM_TOPOLOGY), hb_hls_dc8x8, 0};
+    mext_o[14] = {(((unsigned int)(12)) | XCL_MEM_TOPOLOGY), hb_hls_dc16x16, 0};
+    mext_o[15] = {(((unsigned int)(13)) | XCL_MEM_TOPOLOGY), hb_hls_dc32x32, 0};
+
+    //===================================================
+    // 3. create device Buffer and map dev buf to host buf,
+    //===================================================
+    cl::Buffer db_config;            // mm15, input
+    cl::Buffer db_config_fl;         // mm16, input
+    cl::Buffer db_hls_opsin_1;       // mm1, input
+    cl::Buffer db_hls_opsin_2;       // mm2, input
+    cl::Buffer db_hls_opsin_3;       // mm3, input
+    cl::Buffer db_hls_quant_field;   // mm4, input
+    cl::Buffer db_hls_masking_field; // mm5, input
+    cl::Buffer db_aq_map_f;          // mm6, input
+    cl::Buffer db_cmap_axi;          // mm7, output
+    cl::Buffer db_ac_coef_axiout;    // mm8, output
+    cl::Buffer db_strategy_all;      // mm9, output
+    cl::Buffer db_raw_quant_field_i; // mm10, output
+    cl::Buffer db_hls_order;         // mm11, output
+    cl::Buffer db_hls_dc8x8;         // mm12, output
+    cl::Buffer db_hls_dc16x16;       // mm13, output
+    cl::Buffer db_hls_dc32x32;       // mm14, output
+
+    // init cl Buffer
+    db_config = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                           sizeof(int) * MAX_NUM_CONFIG, &mext_o[0]);
+    db_config_fl = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                              sizeof(float) * MAX_NUM_CONFIG, &mext_o[1]);
+    db_hls_opsin_1 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                sizeof(float) * ALL_PIXEL, &mext_o[2]);
+    db_hls_opsin_2 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                sizeof(float) * ALL_PIXEL, &mext_o[3]);
+    db_hls_opsin_3 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                sizeof(float) * ALL_PIXEL, &mext_o[4]);
+    db_hls_quant_field = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(float) * (BLOCK8_H * BLOCK8_W), &mext_o[5]);
+    db_hls_masking_field = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                      sizeof(float) * (BLOCK8_H * BLOCK8_W), &mext_o[6]);
+    db_aq_map_f = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                             sizeof(float) * (BLOCK8_H * BLOCK8_W), &mext_o[7]);
+    db_cmap_axi = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                             sizeof(int8_t) * (TILE_W * TILE_H * 2), &mext_o[8]);
+    db_ac_coef_axiout = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                   sizeof(int) * ALL_PIXEL, &mext_o[9]);
+    db_strategy_all = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                 sizeof(uint8_t) * (BLOCK8_H * BLOCK8_W), &mext_o[10]);
+    db_raw_quant_field_i = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                      sizeof(int) * (BLOCK8_H * BLOCK8_W), &mext_o[11]);
+    db_hls_order = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                              sizeof(uint32_t) * MAX_ORDER, &mext_o[12]);
+    db_hls_dc8x8 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                              sizeof(float) * ALL_PIXEL, &mext_o[13]);
+    db_hls_dc16x16 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                sizeof(float) * ALL_PIXEL, &mext_o[14]);
+    db_hls_dc32x32 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                sizeof(float) * ALL_PIXEL, &mext_o[15]);
+    //==================================
+    // add Buffers to migrate
+    std::vector<cl::Memory> ob_in;
+    std::vector<cl::Memory> ob_out;
+
+    ob_in.push_back(db_config);
+    ob_in.push_back(db_config_fl);
+    ob_in.push_back(db_hls_opsin_1);
+    ob_in.push_back(db_hls_opsin_2);
+    ob_in.push_back(db_hls_opsin_3);
+    ob_in.push_back(db_hls_quant_field);
+    ob_in.push_back(db_hls_masking_field);
+    ob_in.push_back(db_aq_map_f);
+
+    ob_out.push_back(db_cmap_axi);
+    ob_out.push_back(db_ac_coef_axiout);
+    ob_out.push_back(db_strategy_all);
+    ob_out.push_back(db_raw_quant_field_i);
+    ob_out.push_back(db_hls_order);
+    ob_out.push_back(db_hls_dc8x8);
+    ob_out.push_back(db_hls_dc16x16);
+    ob_out.push_back(db_hls_dc32x32);
+
+    // set kernel args
+    for (int i = 0; i < repInt; i++) {
+        hls_lossy_enc_compute[i].setArg(0, db_config);
+        hls_lossy_enc_compute[i].setArg(1, db_config_fl);
+        hls_lossy_enc_compute[i].setArg(2, db_hls_opsin_1);
+        hls_lossy_enc_compute[i].setArg(3, db_hls_opsin_2);
+        hls_lossy_enc_compute[i].setArg(4, db_hls_opsin_3);
+        hls_lossy_enc_compute[i].setArg(5, db_hls_quant_field);
+        hls_lossy_enc_compute[i].setArg(6, db_hls_masking_field);
+        hls_lossy_enc_compute[i].setArg(7, db_aq_map_f);
+        hls_lossy_enc_compute[i].setArg(8, db_cmap_axi);
+        hls_lossy_enc_compute[i].setArg(9, db_ac_coef_axiout);
+        hls_lossy_enc_compute[i].setArg(10, db_strategy_all);
+        hls_lossy_enc_compute[i].setArg(11, db_raw_quant_field_i);
+        hls_lossy_enc_compute[i].setArg(12, db_hls_order);
+        hls_lossy_enc_compute[i].setArg(13, db_hls_dc8x8);
+        hls_lossy_enc_compute[i].setArg(14, db_hls_dc16x16);
+        hls_lossy_enc_compute[i].setArg(15, db_hls_dc32x32);
+    }
+
+    // launch kernel and calculate kernel execution time
+    std::cout << "INFO: Kernel Start" << std::endl;
+    // declare events
+    std::vector<cl::Event> events_write(1);
+    std::vector<cl::Event> events_kernel(1);
+    std::vector<cl::Event> events_read(1);
+
+    // migrate,
+    q.enqueueMigrateMemObjects(ob_in, 0, nullptr, &events_write[0]);
+    q.enqueueTask(hls_lossy_enc_compute[0], &events_write, &events_kernel[0]);
+    q.enqueueMigrateMemObjects(ob_out, 1, &events_kernel, &events_read[0]);
+    q.finish();
+
+    struct timeval end_time;
+    gettimeofday(&end_time, 0);
+    std::cout << "INFO: Finish kernel execution" << std::endl;
+    std::cout << "INFO: Finish E2E execution" << std::endl;
+
+    // print related times
+    unsigned long timeStart, timeEnd, exec_time0;
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Data transfer from host to device: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Kernel1 Data transfer from device to host: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    exec_time0 = 0;
+    for (int i = 0; i < 1; ++i) {
+        events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+        events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+        exec_time0 += (timeEnd - timeStart) / 1000.0;
+
+        std::cout << "INFO: Kernel" << i + 1 << " execution: " << (timeEnd - timeStart) / 1000.0 << " us\n";
+        std::cout << "-------------------------------------------------------" << std::endl;
+    }
+    std::cout << "INFO: kernel total execution: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    unsigned long exec_timeE2E = diff(&end_time, &start_time);
+    std::cout << "INFO: FPGA execution time:" << exec_timeE2E << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+
+    // output
+    for (int j = 0; j < TILE_W * TILE_H * 2; j++) {
+        cmap_axi[j] = hb_cmap_axi[j];
+    }
+
+    for (int j = 0; j < ALL_PIXEL; j++) {
+        ac_coef_axiout[j] = hb_ac_coef_axiout[j];
+    }
+
+    for (int j = 0; j < BLOCK8_W * BLOCK8_H; j++) {
+        strategy_all[j] = hb_strategy_all[j];
+    }
+
+    for (int j = 0; j < BLOCK8_H * BLOCK8_W; j++) {
+        raw_quant_field_i[j] = hb_raw_quant_field_i[j];
+    }
+
+    for (int j = 0; j < MAX_ORDER; j++) {
+        hls_order[j] = hb_hls_order[j];
+    }
+
+    for (int j = 0; j < ALL_PIXEL; j++) {
+        hls_dc8x8[j] = hb_hls_dc8x8[j];
+    }
+
+    for (int j = 0; j < ALL_PIXEL; j++) {
+        hls_dc16x16[j] = hb_hls_dc16x16[j];
+    }
+
+    for (int j = 0; j < ALL_PIXEL; j++) {
+        hls_dc32x32[j] = hb_hls_dc32x32[j];
+    }
+
+    // free mem
+    free(hb_hls_opsin_1);
+    free(hb_hls_opsin_2);
+    free(hb_hls_opsin_3);
+    free(hb_hls_quant_field);
+    free(hb_hls_masking_field);
+    free(hb_aq_map_f);
+    free(hb_cmap_axi);
+    free(hb_ac_coef_axiout);
+    free(hb_strategy_all);
+    free(hb_raw_quant_field_i);
+    free(hb_hls_order);
+    free(hb_hls_dc8x8);
+    free(hb_hls_dc16x16);
+    free(hb_hls_dc32x32);
+    free(hb_config);
+    free(hb_config_fl);
+#else
+    hls_lossy_enc_compute(config, config_fl, hls_opsin_1, hls_opsin_2, hls_opsin_3, hls_quant_field, hls_masking_field,
+                          aq_map_f, cmap_axi, ac_coef_axiout, strategy_all, raw_quant_field_i, hls_order, hls_dc8x8,
+                          hls_dc16x16, hls_dc32x32);
+#endif
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/host/host_lossy_enc_compute.hpp b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/host/host_lossy_enc_compute.hpp
new file mode 100644
index 0000000000..9dc93ad803
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/host/host_lossy_enc_compute.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_CLUSTER_HISTOGRAM_HPP
+#define HOST_CLUSTER_HISTOGRAM_HPP
+
+#include <iostream>
+#include <sys/time.h>
+
+#ifndef HLS_TEST
+#include "xcl2.hpp"
+#include "xf_utils_sw/logger.hpp"
+
+const int PIXEL_W = 2048;
+const int PIXEL_H = 2048;
+const int FRAME_DIM = 3;
+const int ALL_PIXEL = PIXEL_W * PIXEL_H * FRAME_DIM;
+const int BLOCK8_W = PIXEL_W / 8;
+const int BLOCK8_H = PIXEL_H / 8;
+const int BLOCK8_NUM = BLOCK8_W * BLOCK8_H * FRAME_DIM;
+const int TILE_W = PIXEL_W / 64;
+const int TILE_H = PIXEL_H / 64;
+const int MAX_ORDER = 320 * 3 + 1;
+const int MAX_NUM_CONFIG = 32;
+
+#else
+#include "hls_lossy_enc_compute.hpp"
+#endif
+
+void hls_lossy_enc_compute_wrapper(std::string xclbinPath,
+                                   int config[MAX_NUM_CONFIG],
+                                   float config_fl[MAX_NUM_CONFIG],
+                                   float* hls_opsin_1,
+                                   float* hls_opsin_2,
+                                   float* hls_opsin_3,
+                                   float* quant_field_row,
+                                   float* masking_field_row,
+                                   float* aq_map_f,
+                                   int8_t* cmap_axi,
+                                   int* ac_coef_axiout,
+                                   uint8_t* strategy_all,
+                                   int* raw_quant_field_i,
+                                   uint32_t* hls_order,
+                                   float* hls_dc8x8,
+                                   float* hls_dc16x16,
+                                   float* hls_dc32x32);
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/kernel/hls_lossy_enc_compute.cpp b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/kernel/hls_lossy_enc_compute.cpp
new file mode 100644
index 0000000000..c0ce310ce9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/kernel/hls_lossy_enc_compute.cpp
@@ -0,0 +1,9420 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HLS_LOSSY_ENC_COMPUTE_CPP
+#define HLS_LOSSY_ENC_COMPUTE_CPP
+
+#include "hls_lossy_enc_compute.hpp"
+
+#define FIX
+
+// uint8_t covered_blocks_x_set[6] = {1, 1, 1, 1, 2, 4};
+// uint8_t covered_blocks_y_set[6] = {1, 1, 1, 1, 2, 4};
+uint8_t strategy_block[6] = {1, 1, 1, 1, 2, 4};
+
+const float inv_matrix_8[3][64] = {{0,
+                                    560,
+                                    558.510437012,
+                                    489.194152832,
+                                    428.480621338,
+                                    375.302246094,
+                                    328.723815918,
+                                    287.926147461,
+                                    560,
+                                    560,
+                                    541.309387207,
+                                    478.786773682,
+                                    421.547454834,
+                                    370.409942627,
+                                    325.138336182,
+                                    285.227325439,
+                                    558.510437012,
+                                    541.309387207,
+                                    500.443756104,
+                                    451.472991943,
+                                    402.49432373,
+                                    356.627593994,
+                                    314.88571167,
+                                    277.434692383,
+                                    489.194152832,
+                                    478.786773682,
+                                    451.472991943,
+                                    414.922729492,
+                                    375.302246094,
+                                    336.170715332,
+                                    299.277435303,
+                                    265.364807129,
+                                    428.480621338,
+                                    421.547454834,
+                                    402.49432373,
+                                    375.302246094,
+                                    344.016448975,
+                                    311.624298096,
+                                    279.983337402,
+                                    250.119842529,
+                                    375.302246094,
+                                    370.409942627,
+                                    356.627593994,
+                                    336.170715332,
+                                    311.624298096,
+                                    285.227325439,
+                                    258.613525391,
+                                    232.845169067,
+                                    328.723815918,
+                                    325.138336182,
+                                    314.88571167,
+                                    299.277435303,
+                                    279.983337402,
+                                    258.613525391,
+                                    236.484725952,
+                                    214.558776855,
+                                    287.926147461,
+                                    285.227325439,
+                                    277.434692383,
+                                    265.364807129,
+                                    250.119842529,
+                                    232.845169067,
+                                    214.558776855,
+                                    196.071777344},
+                                   {0,
+                                    3150,
+                                    3139.25854492,
+                                    2648.63037109,
+                                    2234.68115234,
+                                    1885.42749023,
+                                    1590.75805664,
+                                    1342.14172363,
+                                    3150,
+                                    3150,
+                                    3015.80957031,
+                                    2576.58398438,
+                                    2188.41503906,
+                                    1853.96557617,
+                                    1568.54064941,
+                                    1326.02929688,
+                                    3139.25854492,
+                                    3015.80957031,
+                                    2726.99536133,
+                                    2389.61645508,
+                                    2062.38256836,
+                                    1765.96655273,
+                                    1505.39343262,
+                                    1279.74853516,
+                                    2648.63037109,
+                                    2576.58398438,
+                                    2389.61645508,
+                                    2144.4074707,
+                                    1885.42749023,
+                                    1637.12109375,
+                                    1410.37487793,
+                                    1208.78967285,
+                                    2234.68115234,
+                                    2188.41503906,
+                                    2062.38256836,
+                                    1885.42749023,
+                                    1686.28210449,
+                                    1485.42663574,
+                                    1294.84509277,
+                                    1060.59338379,
+                                    1885.42749023,
+                                    1853.96557617,
+                                    1765.96655273,
+                                    1637.12109375,
+                                    1485.42663574,
+                                    1326.02929688,
+                                    1169.49206543,
+                                    785.963012695,
+                                    1590.75805664,
+                                    1568.54064941,
+                                    1505.39343262,
+                                    1410.37487793,
+                                    1294.84509277,
+                                    1169.49206543,
+                                    838.701721191,
+                                    558.03729248,
+                                    1342.14172363,
+                                    1326.02929688,
+                                    1279.74853516,
+                                    1208.78967285,
+                                    1060.59338379,
+                                    785.963012695,
+                                    558.03729248,
+                                    382.654693604},
+                                   {0,
+                                    293.959503174,
+                                    169.469955444,
+                                    119.412483215,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    83.5508270264,
+                                    58.8718566895,
+                                    293.959503174,
+                                    233.598114014,
+                                    156.027160645,
+                                    112.817504883,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    81.1647109985,
+                                    57.4251747131,
+                                    169.469955444,
+                                    156.027160645,
+                                    126.80493927,
+                                    96.6006240845,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    74.5768890381,
+                                    53.3726730347,
+                                    119.412483215,
+                                    112.817504883,
+                                    96.6006240845,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    65.2038497925,
+                                    47.4551811218,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    72.5535202026,
+                                    54.6778106689,
+                                    39.419506073,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    72.5535202026,
+                                    57.4251747131,
+                                    44.3317565918,
+                                    29.2122058868,
+                                    83.5508270264,
+                                    81.1647109985,
+                                    74.5768890381,
+                                    65.2038497925,
+                                    54.6778106689,
+                                    44.3317565918,
+                                    31.1723690033,
+                                    20.7407989502,
+                                    58.8718566895,
+                                    57.4251747131,
+                                    53.3726730347,
+                                    47.4551811218,
+                                    39.419506073,
+                                    29.2122058868,
+                                    20.7407989502,
+                                    14.2222824097}};
+const float inv_matrix_16[3][256] = {{0,
+                                      0,
+                                      2384.4128418,
+                                      2060.98974609,
+                                      1763.60900879,
+                                      1491.73779297,
+                                      1261.77709961,
+                                      1067.26635742,
+                                      956.67767334,
+                                      861.364074707,
+                                      775.546569824,
+                                      703.312927246,
+                                      644.910888672,
+                                      591.358520508,
+                                      542.252990723,
+                                      501.345214844,
+                                      0,
+                                      0,
+                                      2303.75878906,
+                                      2012.80981445,
+                                      1727.63220215,
+                                      1467.21154785,
+                                      1244.41430664,
+                                      1054.64306641,
+                                      950.44720459,
+                                      856.371826172,
+                                      771.497619629,
+                                      700.552734375,
+                                      642.589599609,
+                                      589.392944336,
+                                      540.578857422,
+                                      500.060272217,
+                                      2384.4128418,
+                                      2303.75878906,
+                                      2113.18408203,
+                                      1884.00744629,
+                                      1629.57141113,
+                                      1398.57958984,
+                                      1195.04504395,
+                                      1031.75708008,
+                                      932.273986816,
+                                      841.744262695,
+                                      759.593811035,
+                                      692.403076172,
+                                      635.722961426,
+                                      583.569458008,
+                                      535.612548828,
+                                      496.2421875,
+                                      2060.98974609,
+                                      2012.80981445,
+                                      1884.00744629,
+                                      1693.40161133,
+                                      1491.73779297,
+                                      1297.99816895,
+                                      1120.69970703,
+                                      996.043395996,
+                                      903.588256836,
+                                      818.460021973,
+                                      740.524108887,
+                                      679.239624023,
+                                      624.590454102,
+                                      574.100036621,
+                                      528.409057617,
+                                      489.997619629,
+                                      1763.60900879,
+                                      1727.63220215,
+                                      1629.57141113,
+                                      1491.73779297,
+                                      1336.38830566,
+                                      1179.42834473,
+                                      1039.25634766,
+                                      950.44720459,
+                                      866.416687012,
+                                      787.946533203,
+                                      717.456176758,
+                                      661.633422852,
+                                      609.623046875,
+                                      561.31427002,
+                                      518.629089355,
+                                      481.495361328,
+                                      1491.73779297,
+                                      1467.21154785,
+                                      1398.57958984,
+                                      1297.99816895,
+                                      1179.42834473,
+                                      1054.64294434,
+                                      975.919921875,
+                                      898.074401855,
+                                      823.012390137,
+                                      751.853820801,
+                                      692.403076172,
+                                      640.284667969,
+                                      591.358520508,
+                                      545.629760742,
+                                      506.54699707,
+                                      470.954223633,
+                                      1261.77709961,
+                                      1244.41430664,
+                                      1195.04504395,
+                                      1120.69970703,
+                                      1039.25634766,
+                                      975.919921875,
+                                      909.174133301,
+                                      841.744262695,
+                                      775.546569824,
+                                      714.580871582,
+                                      664.092590332,
+                                      615.952392578,
+                                      570.392150879,
+                                      528.409057617,
+                                      492.477874756,
+                                      458.628570557,
+                                      1067.26635742,
+                                      1054.64306641,
+                                      1031.75708008,
+                                      996.043395996,
+                                      950.44720459,
+                                      898.074401855,
+                                      841.744262695,
+                                      783.770263672,
+                                      726.22833252,
+                                      679.239624023,
+                                      633.465698242,
+                                      589.392944336,
+                                      547.332580566,
+                                      510.515045166,
+                                      476.757659912,
+                                      444.792907715,
+                                      956.67767334,
+                                      950.44720459,
+                                      932.273986816,
+                                      903.588256836,
+                                      866.416687012,
+                                      823.012390137,
+                                      775.546569824,
+                                      726.22833252,
+                                      684.443725586,
+                                      642.589599609,
+                                      601.375,
+                                      561.31439209,
+                                      524.175048828,
+                                      491.234863281,
+                                      459.72479248,
+                                      429.72869873,
+                                      861.364074707,
+                                      856.371826172,
+                                      841.744262695,
+                                      818.460021973,
+                                      787.946533203,
+                                      751.853820801,
+                                      714.580871582,
+                                      679.239624023,
+                                      642.589599609,
+                                      605.472290039,
+                                      568.554870605,
+                                      532.708679199,
+                                      501.345214844,
+                                      470.954223633,
+                                      441.705718994,
+                                      413.71182251,
+                                      775.546569824,
+                                      771.497619629,
+                                      759.593811035,
+                                      740.524108887,
+                                      717.456176758,
+                                      692.403076172,
+                                      664.092590332,
+                                      633.465698242,
+                                      601.375,
+                                      568.554870605,
+                                      535.612426758,
+                                      506.546936035,
+                                      477.933990479,
+                                      450.024688721,
+                                      423.003997803,
+                                      395.167694092,
+                                      703.312927246,
+                                      700.552734375,
+                                      692.403076172,
+                                      679.239624023,
+                                      661.633422852,
+                                      640.284667969,
+                                      615.952392578,
+                                      589.392944336,
+                                      561.31439209,
+                                      532.708679199,
+                                      506.546936035,
+                                      480.302856445,
+                                      454.290039062,
+                                      428.756591797,
+                                      403.216186523,
+                                      375.228302002,
+                                      644.910888672,
+                                      642.589599609,
+                                      635.722961426,
+                                      624.590454102,
+                                      609.623046875,
+                                      591.358520508,
+                                      570.392150879,
+                                      547.332580566,
+                                      524.175048828,
+                                      501.345214844,
+                                      477.933990479,
+                                      454.290039062,
+                                      430.704803467,
+                                      407.340545654,
+                                      380.75769043,
+                                      355.171173096,
+                                      591.358520508,
+                                      589.392944336,
+                                      583.569458008,
+                                      574.100036621,
+                                      561.31427002,
+                                      545.629760742,
+                                      528.409057617,
+                                      510.515045166,
+                                      491.234863281,
+                                      470.954223633,
+                                      450.024688721,
+                                      428.756591797,
+                                      407.340545654,
+                                      382.62991333,
+                                      358.535705566,
+                                      335.223266602,
+                                      542.252990723,
+                                      540.578857422,
+                                      535.612548828,
+                                      528.409057617,
+                                      518.629089355,
+                                      506.54699707,
+                                      492.477874756,
+                                      476.757659912,
+                                      459.72479248,
+                                      441.705718994,
+                                      423.003997803,
+                                      403.216186523,
+                                      380.75769043,
+                                      358.535705566,
+                                      336.753845215,
+                                      315.57409668,
+                                      501.345214844,
+                                      500.060272217,
+                                      496.2421875,
+                                      489.997619629,
+                                      481.495361328,
+                                      470.954223633,
+                                      458.628570557,
+                                      444.792907715,
+                                      429.72869873,
+                                      413.71182251,
+                                      395.167694092,
+                                      375.228302002,
+                                      355.171173096,
+                                      335.223266602,
+                                      315.57409668,
+                                      296.378265381},
+                                     {0,
+                                      0,
+                                      5616.41552734,
+                                      4437.54785156,
+                                      3710.52368164,
+                                      3312.08374023,
+                                      2956.42822266,
+                                      2638.96386719,
+                                      2378.97973633,
+                                      2146.23095703,
+                                      1936.2532959,
+                                      1722.18615723,
+                                      1498.60571289,
+                                      1304.05163574,
+                                      1134.75488281,
+                                      951.882019043,
+                                      0,
+                                      0,
+                                      5312.58251953,
+                                      4271.09716797,
+                                      3658.99584961,
+                                      3275.03710938,
+                                      2928.76391602,
+                                      2617.74536133,
+                                      2363.77954102,
+                                      2134.02709961,
+                                      1926.33569336,
+                                      1711.35717773,
+                                      1489.96264648,
+                                      1297.10559082,
+                                      1129.14038086,
+                                      946.136962891,
+                                      5616.41552734,
+                                      5312.58251953,
+                                      4620.59277344,
+                                      3880.56469727,
+                                      3516.76147461,
+                                      3170.29418945,
+                                      2849.4152832,
+                                      2562.00634766,
+                                      2319.43164062,
+                                      2098.26171875,
+                                      1897.17285156,
+                                      1679.53442383,
+                                      1464.50524902,
+                                      1276.60888672,
+                                      1112.54638672,
+                                      929.184143066,
+                                      4437.54785156,
+                                      4271.09716797,
+                                      3880.56469727,
+                                      3609.64770508,
+                                      3312.08374023,
+                                      3013.74951172,
+                                      2727.90283203,
+                                      2474.97729492,
+                                      2249.39648438,
+                                      2041.30578613,
+                                      1850.4362793,
+                                      1628.60998535,
+                                      1423.58496094,
+                                      1243.54284668,
+                                      1077.57275391,
+                                      901.836975098,
+                                      3710.52368164,
+                                      3658.99584961,
+                                      3516.76147461,
+                                      3312.08374023,
+                                      3073.94458008,
+                                      2824.09741211,
+                                      2580.27368164,
+                                      2363.77954102,
+                                      2158.58081055,
+                                      1966.61950684,
+                                      1778.07653809,
+                                      1561.42590332,
+                                      1369.25976562,
+                                      1199.41723633,
+                                      1031.11547852,
+                                      865.35723877,
+                                      3312.08374023,
+                                      3275.03710938,
+                                      3170.29418945,
+                                      3013.74951172,
+                                      2824.09741211,
+                                      2617.74511719,
+                                      2425.91333008,
+                                      2235.92993164,
+                                      2052.44384766,
+                                      1878.20617676,
+                                      1679.53442383,
+                                      1481.39880371,
+                                      1304.05163574,
+                                      1146.11157227,
+                                      975.344787598,
+                                      821.329833984,
+                                      2956.42822266,
+                                      2928.76391602,
+                                      2849.4152832,
+                                      2727.90283203,
+                                      2580.27368164,
+                                      2425.91333008,
+                                      2263.03759766,
+                                      2098.26171875,
+                                      1936.2532959,
+                                      1766.65966797,
+                                      1570.74584961,
+                                      1392.13525391,
+                                      1230.68457031,
+                                      1077.57275391,
+                                      912.64251709,
+                                      771.521240234,
+                                      2638.96386719,
+                                      2617.74536133,
+                                      2562.00634766,
+                                      2474.97729492,
+                                      2363.77954102,
+                                      2235.92993164,
+                                      2098.26171875,
+                                      1956.39318848,
+                                      1813.07836914,
+                                      1628.60998535,
+                                      1456.17285156,
+                                      1297.10559082,
+                                      1151.85449219,
+                                      993.464355469,
+                                      845.405334473,
+                                      717.737731934,
+                                      2378.97973633,
+                                      2363.77954102,
+                                      2319.43164062,
+                                      2249.39648438,
+                                      2158.58081055,
+                                      2052.44384766,
+                                      1936.2532959,
+                                      1813.07836914,
+                                      1648.67211914,
+                                      1489.96264648,
+                                      1339.6640625,
+                                      1199.41748047,
+                                      1057.31555176,
+                                      907.217956543,
+                                      775.878479004,
+                                      661.709289551,
+                                      2146.23095703,
+                                      2134.02709961,
+                                      2098.26171875,
+                                      2041.30578613,
+                                      1966.61950684,
+                                      1878.20617676,
+                                      1766.65966797,
+                                      1628.60998535,
+                                      1489.96264648,
+                                      1354.33557129,
+                                      1224.33178711,
+                                      1098.37109375,
+                                      951.882019043,
+                                      821.329833984,
+                                      706.041503906,
+                                      604.99597168,
+                                      1936.2532959,
+                                      1926.33569336,
+                                      1897.17285156,
+                                      1850.4362793,
+                                      1778.07653809,
+                                      1679.53442383,
+                                      1570.74584961,
+                                      1456.17285156,
+                                      1339.6640625,
+                                      1224.33178711,
+                                      1112.54614258,
+                                      975.344482422,
+                                      850.33416748,
+                                      737.812194824,
+                                      637.541503906,
+                                      531.866638184,
+                                      1722.18615723,
+                                      1711.35717773,
+                                      1679.53442383,
+                                      1628.60998535,
+                                      1561.42590332,
+                                      1481.39880371,
+                                      1392.13525391,
+                                      1297.10559082,
+                                      1199.41748047,
+                                      1098.37109375,
+                                      975.344482422,
+                                      860.309997559,
+                                      754.414855957,
+                                      658.18359375,
+                                      565.168762207,
+                                      455.065155029,
+                                      1498.60571289,
+                                      1489.96264648,
+                                      1464.50524902,
+                                      1423.58496094,
+                                      1369.25976562,
+                                      1304.05163574,
+                                      1230.68457031,
+                                      1151.85449219,
+                                      1057.31555176,
+                                      951.882019043,
+                                      850.33416748,
+                                      754.414855957,
+                                      665.260375977,
+                                      582.761047363,
+                                      475.564758301,
+                                      385.666412354,
+                                      1304.05163574,
+                                      1297.10559082,
+                                      1276.60888672,
+                                      1243.54284668,
+                                      1199.41723633,
+                                      1146.11157227,
+                                      1077.57275391,
+                                      993.464355469,
+                                      907.217956543,
+                                      821.329833984,
+                                      737.812194824,
+                                      658.18359375,
+                                      582.761047363,
+                                      482.643035889,
+                                      396.775939941,
+                                      324.039428711,
+                                      1134.75488281,
+                                      1129.14038086,
+                                      1112.54638672,
+                                      1077.57275391,
+                                      1031.11547852,
+                                      975.344787598,
+                                      912.64251709,
+                                      845.405334473,
+                                      775.878479004,
+                                      706.041503906,
+                                      637.541503906,
+                                      565.168762207,
+                                      475.564758301,
+                                      396.775939941,
+                                      328.516326904,
+                                      270.136077881,
+                                      951.882019043,
+                                      946.136962891,
+                                      929.184143066,
+                                      901.836975098,
+                                      865.35723877,
+                                      821.329833984,
+                                      771.521240234,
+                                      717.737731934,
+                                      661.709289551,
+                                      604.99597168,
+                                      531.866638184,
+                                      455.065155029,
+                                      385.666412354,
+                                      324.039428711,
+                                      270.136077881,
+                                      223.60848999},
+                                     {0,
+                                      0,
+                                      615.613830566,
+                                      448.953399658,
+                                      337.930267334,
+                                      263.807556152,
+                                      205.943115234,
+                                      160.770889282,
+                                      141.832733154,
+                                      126.301643372,
+                                      112.471244812,
+                                      100.763389587,
+                                      91.1208114624,
+                                      82.4009933472,
+                                      74.5156097412,
+                                      58.8962364197,
+                                      0,
+                                      0,
+                                      571.402038574,
+                                      426.532226562,
+                                      327.784393311,
+                                      257.417816162,
+                                      201.765563965,
+                                      157.966430664,
+                                      140.812332153,
+                                      125.492965698,
+                                      111.822540283,
+                                      100.304679871,
+                                      90.7403564453,
+                                      82.0832748413,
+                                      74.2487335205,
+                                      58.3933258057,
+                                      615.613830566,
+                                      571.402038574,
+                                      473.941894531,
+                                      372.602783203,
+                                      300.644775391,
+                                      239.80960083,
+                                      190.039825439,
+                                      154.182662964,
+                                      137.840042114,
+                                      123.126365662,
+                                      109.91746521,
+                                      98.952003479,
+                                      89.6162185669,
+                                      81.1429672241,
+                                      73.4578170776,
+                                      56.9167442322,
+                                      448.953399658,
+                                      426.532226562,
+                                      372.602783203,
+                                      318.224456787,
+                                      263.807556152,
+                                      214.746795654,
+                                      172.817260742,
+                                      148.295852661,
+                                      133.160797119,
+                                      119.368148804,
+                                      106.872108459,
+                                      96.7725219727,
+                                      87.7978591919,
+                                      79.6171722412,
+                                      70.2083129883,
+                                      54.5584373474,
+                                      337.930267334,
+                                      327.784393311,
+                                      300.644775391,
+                                      263.807556152,
+                                      224.206954956,
+                                      186.378311157,
+                                      155.421569824,
+                                      140.812332153,
+                                      127.120582581,
+                                      114.460098267,
+                                      103.118339539,
+                                      93.8680496216,
+                                      85.3613052368,
+                                      77.5634307861,
+                                      65.9593734741,
+                                      51.4587516785,
+                                      263.807556152,
+                                      257.417816162,
+                                      239.80960083,
+                                      214.746795654,
+                                      186.378311157,
+                                      157.966400146,
+                                      144.988540649,
+                                      132.263153076,
+                                      120.102050781,
+                                      108.680435181,
+                                      98.952003479,
+                                      90.3628005981,
+                                      82.4009933472,
+                                      75.0543060303,
+                                      60.9631996155,
+                                      47.7897415161,
+                                      205.943115234,
+                                      201.765563965,
+                                      190.039825439,
+                                      172.817260742,
+                                      155.421569824,
+                                      144.988540649,
+                                      134.070770264,
+                                      123.126365662,
+                                      112.471244812,
+                                      102.638969421,
+                                      94.2730102539,
+                                      86.3905029297,
+                                      79.0208206177,
+                                      70.2083129883,
+                                      55.4867515564,
+                                      43.7368011475,
+                                      160.770889282,
+                                      157.966430664,
+                                      154.182662964,
+                                      148.295852661,
+                                      140.812332153,
+                                      132.263153076,
+                                      123.126365662,
+                                      113.789886475,
+                                      104.582710266,
+                                      96.7725219727,
+                                      89.2471008301,
+                                      82.0832748413,
+                                      75.3261566162,
+                                      62.5737113953,
+                                      49.7861824036,
+                                      39.4813766479,
+                                      141.832733154,
+                                      140.812332153,
+                                      137.840042114,
+                                      133.160797119,
+                                      127.120582581,
+                                      120.102050781,
+                                      112.471244812,
+                                      104.582710266,
+                                      97.6333694458,
+                                      90.7403564453,
+                                      84.0226669312,
+                                      77.5634460449,
+                                      68.3460235596,
+                                      55.020149231,
+                                      44.0871162415,
+                                      35.1875991821,
+                                      126.301643372,
+                                      125.492965698,
+                                      123.126365662,
+                                      119.368148804,
+                                      114.460098267,
+                                      108.680435181,
+                                      102.638969421,
+                                      96.7725219727,
+                                      90.7403564453,
+                                      84.6872787476,
+                                      78.7255554199,
+                                      72.1355895996,
+                                      58.8962364197,
+                                      47.7897415161,
+                                      38.5730819702,
+                                      30.993062973,
+                                      112.471244812,
+                                      111.822540283,
+                                      109.91746521,
+                                      106.872108459,
+                                      103.118339539,
+                                      98.952003479,
+                                      94.2730102539,
+                                      89.2471008301,
+                                      84.0226669312,
+                                      78.7255554199,
+                                      73.4578094482,
+                                      60.9631729126,
+                                      50.1978492737,
+                                      41.0546913147,
+                                      33.3810348511,
+                                      24.7806758881,
+                                      100.763389587,
+                                      100.304679871,
+                                      98.952003479,
+                                      96.7725219727,
+                                      93.8680496216,
+                                      90.3628005981,
+                                      86.3905029297,
+                                      82.0832748413,
+                                      77.5634460449,
+                                      72.1355895996,
+                                      60.9631729126,
+                                      51.0341072083,
+                                      42.3694725037,
+                                      34.9223136902,
+                                      27.7260704041,
+                                      18.5722160339,
+                                      91.1208114624,
+                                      90.7403564453,
+                                      89.6162185669,
+                                      87.7978591919,
+                                      85.3613052368,
+                                      82.4009933472,
+                                      79.0208206177,
+                                      75.3261566162,
+                                      68.3460235596,
+                                      58.8962364197,
+                                      50.1978492737,
+                                      42.3694725037,
+                                      35.4553947449,
+                                      29.343132019,
+                                      20.1489048004,
+                                      13.676407814,
+                                      82.4009933472,
+                                      82.0832748413,
+                                      81.1429672241,
+                                      79.6171722412,
+                                      77.5634307861,
+                                      75.0543060303,
+                                      70.2083129883,
+                                      62.5737113953,
+                                      55.020149231,
+                                      47.7897415161,
+                                      41.0546913147,
+                                      34.9223136902,
+                                      29.343132019,
+                                      20.7069969177,
+                                      14.4138498306,
+                                      9.9115486145,
+                                      74.5156097412,
+                                      74.2487335205,
+                                      73.4578170776,
+                                      70.2083129883,
+                                      65.9593734741,
+                                      60.9631996155,
+                                      55.4867515564,
+                                      49.7861824036,
+                                      44.0871162415,
+                                      38.5730819702,
+                                      33.3810348511,
+                                      27.7260704041,
+                                      20.1489048004,
+                                      14.4138498306,
+                                      10.166267395,
+                                      7.07980155945,
+                                      58.8962364197,
+                                      58.3933258057,
+                                      56.9167442322,
+                                      54.5584373474,
+                                      51.4587516785,
+                                      47.7897415161,
+                                      43.7368011475,
+                                      39.4813766479,
+                                      35.1875991821,
+                                      30.993062973,
+                                      24.7806758881,
+                                      18.5722160339,
+                                      13.676407814,
+                                      9.9115486145,
+                                      7.07980155945,
+                                      4.99121952057}};
+const float inv_matrix_32[3][1024] = {{0,
+                                       0,
+                                       0,
+                                       0,
+                                       5011.67871094,
+                                       4561.02685547,
+                                       4150.89794922,
+                                       3787.85327148,
+                                       3459.89013672,
+                                       3160.32299805,
+                                       2886.69311523,
+                                       2636.75488281,
+                                       2408.45727539,
+                                       2220.78833008,
+                                       2069.29418945,
+                                       1928.13452148,
+                                       1796.60424805,
+                                       1674.04626465,
+                                       1559.84912109,
+                                       1455.32824707,
+                                       1364.40710449,
+                                       1279.16601562,
+                                       1199.25048828,
+                                       1124.32775879,
+                                       1054.08581543,
+                                       988.231933594,
+                                       932.328857422,
+                                       879.889831543,
+                                       830.400390625,
+                                       783.694335938,
+                                       739.61541748,
+                                       698.015563965,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       4953.88232422,
+                                       4518.67041016,
+                                       4118.65429688,
+                                       3763.55249023,
+                                       3440.43725586,
+                                       3144.51098633,
+                                       2873.68359375,
+                                       2625.9453125,
+                                       2399.40185547,
+                                       2214.77026367,
+                                       2064.08569336,
+                                       1923.60375977,
+                                       1792.64550781,
+                                       1670.57409668,
+                                       1556.79296875,
+                                       1452.8614502,
+                                       1362.2097168,
+                                       1277.20385742,
+                                       1197.49438477,
+                                       1122.75280762,
+                                       1052.67053223,
+                                       986.958068848,
+                                       931.291748047,
+                                       878.947387695,
+                                       829.542602539,
+                                       782.912841797,
+                                       738.902404785,
+                                       697.364379883,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       4793.61474609,
+                                       4398.46826172,
+                                       4026.78955078,
+                                       3692.97387695,
+                                       3383.59692383,
+                                       3098.10839844,
+                                       2835.38208008,
+                                       2594.04101562,
+                                       2372.62280273,
+                                       2196.91870117,
+                                       2048.6171875,
+                                       1910.1348877,
+                                       1780.86755371,
+                                       1660.23608398,
+                                       1547.6887207,
+                                       1445.50598145,
+                                       1355.65515137,
+                                       1271.34863281,
+                                       1192.25231934,
+                                       1118.05004883,
+                                       1048.44384766,
+                                       983.335632324,
+                                       928.192504883,
+                                       876.130004883,
+                                       826.978088379,
+                                       780.575439453,
+                                       736.76965332,
+                                       695.41619873,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       4561.02685547,
+                                       4217.54345703,
+                                       3889.28466797,
+                                       3582.40161133,
+                                       3293.56469727,
+                                       3024.01489258,
+                                       2773.8503418,
+                                       2542.54394531,
+                                       2329.23535156,
+                                       2167.82006836,
+                                       2023.34472656,
+                                       1888.08752441,
+                                       1761.55737305,
+                                       1643.26379395,
+                                       1532.72387695,
+                                       1433.39599609,
+                                       1344.85412598,
+                                       1261.69274902,
+                                       1183.6015625,
+                                       1110.28479004,
+                                       1041.46118164,
+                                       977.688903809,
+                                       923.064758301,
+                                       871.467163086,
+                                       822.732299805,
+                                       776.704589844,
+                                       733.236633301,
+                                       692.188110352,
+                                       5011.67871094,
+                                       4953.88232422,
+                                       4793.61474609,
+                                       4561.02685547,
+                                       4287.29882812,
+                                       3998.23925781,
+                                       3716.125,
+                                       3440.43725586,
+                                       3176.31298828,
+                                       2926.47753906,
+                                       2692.17285156,
+                                       2473.73583984,
+                                       2276.53393555,
+                                       2128.3894043,
+                                       1988.98718262,
+                                       1858.03149414,
+                                       1735.17175293,
+                                       1620.02612305,
+                                       1512.1998291,
+                                       1416.7467041,
+                                       1329.98596191,
+                                       1248.38586426,
+                                       1171.66845703,
+                                       1099.56396484,
+                                       1031.81274414,
+                                       969.874816895,
+                                       915.964599609,
+                                       865.006896973,
+                                       816.846923828,
+                                       771.336730957,
+                                       728.335144043,
+                                       687.707824707,
+                                       4561.02685547,
+                                       4518.67041016,
+                                       4398.46826172,
+                                       4217.54345703,
+                                       3998.23925781,
+                                       3763.55249023,
+                                       3519.85961914,
+                                       3276.21459961,
+                                       3038.5234375,
+                                       2810.43310547,
+                                       2594.04101562,
+                                       2390.41162109,
+                                       2220.78833008,
+                                       2079.79077148,
+                                       1946.46655273,
+                                       1820.70666504,
+                                       1702.30786133,
+                                       1591.01000977,
+                                       1486.51586914,
+                                       1395.84521484,
+                                       1311.29003906,
+                                       1231.62927246,
+                                       1156.62243652,
+                                       1086.03051758,
+                                       1019.62091064,
+                                       959.981201172,
+                                       906.967834473,
+                                       856.815124512,
+                                       809.379150391,
+                                       764.521179199,
+                                       722.108276367,
+                                       682.245117188,
+                                       4150.89794922,
+                                       4118.65429688,
+                                       4026.78955078,
+                                       3889.28466797,
+                                       3716.125,
+                                       3519.85961914,
+                                       3311.12646484,
+                                       3098.10839844,
+                                       2886.69311523,
+                                       2680.90209961,
+                                       2483.34204102,
+                                       2295.77954102,
+                                       2156.40185547,
+                                       2023.34472656,
+                                       1896.84716797,
+                                       1776.97375488,
+                                       1663.66943359,
+                                       1556.79296875,
+                                       1457.80285645,
+                                       1371.0369873,
+                                       1289.05615234,
+                                       1211.6673584,
+                                       1138.67041016,
+                                       1069.86108398,
+                                       1005.03588867,
+                                       948.117004395,
+                                       896.168579102,
+                                       846.973266602,
+                                       800.399719238,
+                                       756.319885254,
+                                       714.610473633,
+                                       675.848571777,
+                                       3787.85327148,
+                                       3763.55249023,
+                                       3692.97387695,
+                                       3582.40161133,
+                                       3440.43725586,
+                                       3276.21459961,
+                                       3098.10839844,
+                                       2913.08789062,
+                                       2726.56884766,
+                                       2542.54394531,
+                                       2363.82275391,
+                                       2214.77026367,
+                                       2085.0793457,
+                                       1960.44067383,
+                                       1841.2644043,
+                                       1727.76757812,
+                                       1620.02624512,
+                                       1518.01391602,
+                                       1426.21765137,
+                                       1342.71228027,
+                                       1263.61376953,
+                                       1188.7791748,
+                                       1118.05004883,
+                                       1051.25854492,
+                                       988.232055664,
+                                       934.408508301,
+                                       883.676330566,
+                                       835.576843262,
+                                       789.991882324,
+                                       746.805908203,
+                                       705.905090332,
+                                       668.410766602,
+                                       3459.89013672,
+                                       3440.43725586,
+                                       3383.59692383,
+                                       3293.56469727,
+                                       3176.31298828,
+                                       3038.5234375,
+                                       2886.69311523,
+                                       2726.56884766,
+                                       2562.8984375,
+                                       2399.40185547,
+                                       2251.3984375,
+                                       2128.38916016,
+                                       2008.47729492,
+                                       1892.4576416,
+                                       1780.86755371,
+                                       1674.04638672,
+                                       1572.18115234,
+                                       1475.3458252,
+                                       1391.27539062,
+                                       1311.29003906,
+                                       1235.31958008,
+                                       1163.2689209,
+                                       1095.02172852,
+                                       1030.44628906,
+                                       970.984558105,
+                                       918.996520996,
+                                       869.613464355,
+                                       822.732299805,
+                                       778.249511719,
+                                       736.060913086,
+                                       696.064697266,
+                                       659.988525391,
+                                       3160.32299805,
+                                       3144.51098633,
+                                       3098.10839844,
+                                       3024.01489258,
+                                       2926.47753906,
+                                       2810.43310547,
+                                       2680.90209961,
+                                       2542.54394531,
+                                       2399.40185547,
+                                       2263.89282227,
+                                       2150.73901367,
+                                       2038.4329834,
+                                       1928.13452148,
+                                       1820.70666504,
+                                       1716.77062988,
+                                       1616.75537109,
+                                       1520.93566895,
+                                       1433.39599609,
+                                       1353.48266602,
+                                       1277.20385742,
+                                       1204.54553223,
+                                       1135.45678711,
+                                       1069.86108398,
+                                       1007.66223145,
+                                       952.402770996,
+                                       902.032531738,
+                                       854.112670898,
+                                       808.556152344,
+                                       765.27355957,
+                                       724.174499512,
+                                       685.184509277,
+                                       650.644348145,
+                                       2886.69311523,
+                                       2873.68359375,
+                                       2835.38208008,
+                                       2773.8503418,
+                                       2692.17285156,
+                                       2594.04101562,
+                                       2483.34204102,
+                                       2363.82275391,
+                                       2251.3984375,
+                                       2150.73901367,
+                                       2048.6171875,
+                                       1946.46655273,
+                                       1845.42907715,
+                                       1746.38916016,
+                                       1650.01464844,
+                                       1556.79296875,
+                                       1467.77758789,
+                                       1389.00024414,
+                                       1313.34448242,
+                                       1240.89025879,
+                                       1171.66845703,
+                                       1105.66882324,
+                                       1042.85168457,
+                                       983.335632324,
+                                       932.328857422,
+                                       883.676208496,
+                                       837.313964844,
+                                       793.171142578,
+                                       751.173278809,
+                                       711.242553711,
+                                       674.120605469,
+                                       640.444885254,
+                                       2636.75488281,
+                                       2625.9453125,
+                                       2594.04101562,
+                                       2542.54394531,
+                                       2473.73583984,
+                                       2390.41162109,
+                                       2295.77954102,
+                                       2214.77026367,
+                                       2128.38916016,
+                                       2038.4329834,
+                                       1946.46655273,
+                                       1853.81237793,
+                                       1761.55737305,
+                                       1670.57421875,
+                                       1581.54528809,
+                                       1494.99133301,
+                                       1416.74682617,
+                                       1342.71228027,
+                                       1271.34875488,
+                                       1202.77600098,
+                                       1137.06164551,
+                                       1074.23132324,
+                                       1014.27752686,
+                                       959.981201172,
+                                       910.948364258,
+                                       864.090393066,
+                                       819.361022949,
+                                       776.704589844,
+                                       736.060913086,
+                                       697.364379883,
+                                       662.218017578,
+                                       629.4609375,
+                                       2408.45727539,
+                                       2399.40185547,
+                                       2372.62280273,
+                                       2329.23535156,
+                                       2276.53393555,
+                                       2220.78833008,
+                                       2156.40185547,
+                                       2085.0793457,
+                                       2008.47729492,
+                                       1928.13452148,
+                                       1845.42907715,
+                                       1761.55737305,
+                                       1677.53186035,
+                                       1594.18786621,
+                                       1512.1998291,
+                                       1435.8034668,
+                                       1364.40710449,
+                                       1295.05419922,
+                                       1227.95825195,
+                                       1163.2689209,
+                                       1101.08483887,
+                                       1041.46118164,
+                                       984.471923828,
+                                       935.451171875,
+                                       888.446838379,
+                                       843.440002441,
+                                       800.399719238,
+                                       759.286071777,
+                                       720.051269531,
+                                       682.831237793,
+                                       649.558654785,
+                                       617.765075684,
+                                       2220.78833008,
+                                       2214.77026367,
+                                       2196.91870117,
+                                       2167.82006836,
+                                       2128.3894043,
+                                       2079.79077148,
+                                       2023.34472656,
+                                       1960.44067383,
+                                       1892.4576416,
+                                       1820.70666504,
+                                       1746.38916016,
+                                       1670.57421875,
+                                       1594.18786621,
+                                       1518.01391602,
+                                       1445.50622559,
+                                       1377.7244873,
+                                       1311.29003906,
+                                       1246.50488281,
+                                       1183.6015625,
+                                       1122.75280762,
+                                       1064.07983398,
+                                       1007.66223145,
+                                       956.720947266,
+                                       909.950622559,
+                                       865.006896973,
+                                       821.887329102,
+                                       780.575439453,
+                                       741.04473877,
+                                       703.25994873,
+                                       668.410766602,
+                                       636.225524902,
+                                       605.431762695,
+                                       2069.29418945,
+                                       2064.08569336,
+                                       2048.6171875,
+                                       2023.34472656,
+                                       1988.98718262,
+                                       1946.46655273,
+                                       1896.84716797,
+                                       1841.2644043,
+                                       1780.86755371,
+                                       1716.77062988,
+                                       1650.01464844,
+                                       1581.54528809,
+                                       1512.1998291,
+                                       1445.50622559,
+                                       1382.21533203,
+                                       1319.54187012,
+                                       1257.8659668,
+                                       1197.49438477,
+                                       1138.67041016,
+                                       1081.58068848,
+                                       1026.36474609,
+                                       974.326965332,
+                                       928.192504883,
+                                       883.676208496,
+                                       840.805603027,
+                                       799.591430664,
+                                       760.030456543,
+                                       722.108276367,
+                                       685.801452637,
+                                       653.370849609,
+                                       622.301086426,
+                                       592.535888672,
+                                       1928.13452148,
+                                       1923.60375977,
+                                       1910.1348877,
+                                       1888.08752441,
+                                       1858.03149414,
+                                       1820.70666504,
+                                       1776.97375488,
+                                       1727.76757812,
+                                       1674.04638672,
+                                       1616.75537109,
+                                       1556.79296875,
+                                       1494.99133301,
+                                       1435.8034668,
+                                       1377.7244873,
+                                       1319.54187012,
+                                       1261.69274902,
+                                       1204.54553223,
+                                       1148.40356445,
+                                       1093.51477051,
+                                       1040.0736084,
+                                       988.232055664,
+                                       942.805053711,
+                                       899.092651367,
+                                       856.815124512,
+                                       816.011779785,
+                                       776.704589844,
+                                       738.902404785,
+                                       702.601013184,
+                                       668.97833252,
+                                       637.802612305,
+                                       607.867736816,
+                                       579.151306152,
+                                       1796.60424805,
+                                       1792.64550781,
+                                       1780.86755371,
+                                       1761.55737305,
+                                       1735.17175293,
+                                       1702.30786133,
+                                       1663.66943359,
+                                       1620.02624512,
+                                       1572.18115234,
+                                       1520.93566895,
+                                       1467.77758789,
+                                       1416.74682617,
+                                       1364.40710449,
+                                       1311.29003906,
+                                       1257.8659668,
+                                       1204.54553223,
+                                       1151.67944336,
+                                       1099.56396484,
+                                       1048.44384766,
+                                       998.518310547,
+                                       953.479187012,
+                                       910.948364258,
+                                       869.613342285,
+                                       829.542602539,
+                                       790.784973145,
+                                       753.372253418,
+                                       717.322814941,
+                                       682.831237793,
+                                       651.732788086,
+                                       621.794555664,
+                                       593.00592041,
+                                       565.35144043,
+                                       1674.04626465,
+                                       1670.57409668,
+                                       1660.23608398,
+                                       1643.26379395,
+                                       1620.02612305,
+                                       1591.01000977,
+                                       1556.79296875,
+                                       1518.01391602,
+                                       1475.3458252,
+                                       1433.39599609,
+                                       1389.00024414,
+                                       1342.71228027,
+                                       1295.05419922,
+                                       1246.50488281,
+                                       1197.49438477,
+                                       1148.40356445,
+                                       1099.56396484,
+                                       1051.25854492,
+                                       1003.72686768,
+                                       959.981201172,
+                                       918.996520996,
+                                       878.947387695,
+                                       839.930419922,
+                                       802.020446777,
+                                       765.27355957,
+                                       729.730224609,
+                                       695.41619873,
+                                       663.897949219,
+                                       634.132019043,
+                                       605.431762695,
+                                       577.793395996,
+                                       551.20690918,
+                                       1559.84912109,
+                                       1556.79296875,
+                                       1547.6887207,
+                                       1532.72387695,
+                                       1512.1998291,
+                                       1486.51586914,
+                                       1457.80285645,
+                                       1426.21765137,
+                                       1391.27539062,
+                                       1353.48266602,
+                                       1313.34448242,
+                                       1271.34875488,
+                                       1227.95825195,
+                                       1183.6015625,
+                                       1138.67041016,
+                                       1093.51477051,
+                                       1048.44384766,
+                                       1003.72686768,
+                                       962.165222168,
+                                       923.064758301,
+                                       884.62689209,
+                                       846.973266602,
+                                       810.203491211,
+                                       774.396850586,
+                                       739.61541748,
+                                       705.905090332,
+                                       674.120605469,
+                                       644.708618164,
+                                       616.264648438,
+                                       588.796264648,
+                                       562.305175781,
+                                       536.786437988,
+                                       1455.32824707,
+                                       1452.8614502,
+                                       1445.50598145,
+                                       1433.39599609,
+                                       1416.7467041,
+                                       1395.84521484,
+                                       1371.0369873,
+                                       1342.71228027,
+                                       1311.29003906,
+                                       1277.20385742,
+                                       1240.89025879,
+                                       1202.77600098,
+                                       1163.2689209,
+                                       1122.75280762,
+                                       1081.58068848,
+                                       1040.0736084,
+                                       998.518310547,
+                                       959.981201172,
+                                       923.064758301,
+                                       886.533447266,
+                                       850.530761719,
+                                       815.17779541,
+                                       780.575439453,
+                                       746.805908203,
+                                       713.934814453,
+                                       682.245117188,
+                                       653.370849609,
+                                       625.353942871,
+                                       598.214355469,
+                                       571.965026855,
+                                       546.611877441,
+                                       522.155395508,
+                                       1364.40710449,
+                                       1362.2097168,
+                                       1355.65515137,
+                                       1344.85412598,
+                                       1329.98596191,
+                                       1311.29003906,
+                                       1289.05615234,
+                                       1263.61376953,
+                                       1235.31958008,
+                                       1204.54553223,
+                                       1171.66845703,
+                                       1137.06164551,
+                                       1101.08483887,
+                                       1064.07983398,
+                                       1026.36474609,
+                                       988.232055664,
+                                       953.479187012,
+                                       918.996520996,
+                                       884.62689209,
+                                       850.530761719,
+                                       816.846923828,
+                                       783.694335938,
+                                       751.173278809,
+                                       719.367553711,
+                                       688.345153809,
+                                       659.988647461,
+                                       632.568969727,
+                                       605.91784668,
+                                       580.059387207,
+                                       555.010437012,
+                                       530.781066895,
+                                       507.375701904,
+                                       1279.16601562,
+                                       1277.20385742,
+                                       1271.34863281,
+                                       1261.69274902,
+                                       1248.38586426,
+                                       1231.62927246,
+                                       1211.6673584,
+                                       1188.7791748,
+                                       1163.2689209,
+                                       1135.45678711,
+                                       1105.66882324,
+                                       1074.23132324,
+                                       1041.46118164,
+                                       1007.66223145,
+                                       974.326965332,
+                                       942.805053711,
+                                       910.948364258,
+                                       878.947387695,
+                                       846.973266602,
+                                       815.17779541,
+                                       783.694335938,
+                                       752.638000488,
+                                       722.108276367,
+                                       692.188110352,
+                                       664.459472656,
+                                       637.802490234,
+                                       611.796875,
+                                       586.477539062,
+                                       561.871887207,
+                                       537.999816895,
+                                       514.875244141,
+                                       492.505737305,
+                                       1199.25048828,
+                                       1197.49438477,
+                                       1192.25231934,
+                                       1183.6015625,
+                                       1171.66845703,
+                                       1156.62243652,
+                                       1138.67041016,
+                                       1118.05004883,
+                                       1095.02172852,
+                                       1069.86108398,
+                                       1042.85168457,
+                                       1014.27752686,
+                                       984.471923828,
+                                       956.720947266,
+                                       928.192504883,
+                                       899.092651367,
+                                       869.613342285,
+                                       839.930419922,
+                                       810.203491211,
+                                       780.575439453,
+                                       751.173278809,
+                                       722.108276367,
+                                       693.4765625,
+                                       666.712768555,
+                                       640.975524902,
+                                       615.765563965,
+                                       591.129272461,
+                                       567.10345459,
+                                       543.718078613,
+                                       520.995727539,
+                                       498.952667236,
+                                       480.805541992,
+                                       1124.32775879,
+                                       1122.75280762,
+                                       1118.05004883,
+                                       1110.28479004,
+                                       1099.56396484,
+                                       1086.03051758,
+                                       1069.86108398,
+                                       1051.25854492,
+                                       1030.44628906,
+                                       1007.66223145,
+                                       983.335632324,
+                                       959.981201172,
+                                       935.451171875,
+                                       909.950622559,
+                                       883.676208496,
+                                       856.815124512,
+                                       829.542602539,
+                                       802.020446777,
+                                       774.396850586,
+                                       746.805908203,
+                                       719.367553711,
+                                       692.188110352,
+                                       666.712768555,
+                                       642.038635254,
+                                       617.765075684,
+                                       593.947570801,
+                                       570.63269043,
+                                       547.859313965,
+                                       525.65826416,
+                                       504.054443359,
+                                       484.735015869,
+                                       470.0362854,
+                                       1054.08581543,
+                                       1052.67053223,
+                                       1048.44384766,
+                                       1041.46118164,
+                                       1031.81274414,
+                                       1019.62091064,
+                                       1005.03588867,
+                                       988.232055664,
+                                       970.984558105,
+                                       952.402770996,
+                                       932.328857422,
+                                       910.948364258,
+                                       888.446838379,
+                                       865.006896973,
+                                       840.805603027,
+                                       816.011779785,
+                                       790.784973145,
+                                       765.27355957,
+                                       739.61541748,
+                                       713.934814453,
+                                       688.345153809,
+                                       664.459472656,
+                                       640.975524902,
+                                       617.765075684,
+                                       594.891723633,
+                                       572.41003418,
+                                       550.3671875,
+                                       528.801818848,
+                                       507.746917725,
+                                       487.717651367,
+                                       473.343078613,
+                                       459.212097168,
+                                       988.231933594,
+                                       986.958068848,
+                                       983.335632324,
+                                       977.688903809,
+                                       969.874816895,
+                                       959.981201172,
+                                       948.117004395,
+                                       934.408508301,
+                                       918.996520996,
+                                       902.032531738,
+                                       883.676208496,
+                                       864.090393066,
+                                       843.440002441,
+                                       821.887329102,
+                                       799.591430664,
+                                       776.704589844,
+                                       753.372253418,
+                                       729.730224609,
+                                       705.905090332,
+                                       682.245117188,
+                                       659.988647461,
+                                       637.802490234,
+                                       615.765563965,
+                                       593.947570801,
+                                       572.41003418,
+                                       551.20690918,
+                                       530.384277344,
+                                       509.981781006,
+                                       490.03225708,
+                                       475.728912354,
+                                       461.936004639,
+                                       448.361358643,
+                                       932.328857422,
+                                       931.291748047,
+                                       928.192504883,
+                                       923.064758301,
+                                       915.964599609,
+                                       906.967834473,
+                                       896.168579102,
+                                       883.676330566,
+                                       869.613464355,
+                                       854.112670898,
+                                       837.313964844,
+                                       819.361022949,
+                                       800.399719238,
+                                       780.575439453,
+                                       760.030456543,
+                                       738.902404785,
+                                       717.322814941,
+                                       695.41619873,
+                                       674.120605469,
+                                       653.370849609,
+                                       632.568969727,
+                                       611.796875,
+                                       591.129272461,
+                                       570.63269043,
+                                       550.3671875,
+                                       530.384277344,
+                                       510.730102539,
+                                       491.443481445,
+                                       477.170257568,
+                                       463.766784668,
+                                       450.541046143,
+                                       437.510101318,
+                                       879.889831543,
+                                       878.947387695,
+                                       876.130004883,
+                                       871.467163086,
+                                       865.006896973,
+                                       856.815124512,
+                                       846.973266602,
+                                       835.576843262,
+                                       822.732299805,
+                                       808.556152344,
+                                       793.171142578,
+                                       776.704589844,
+                                       759.286071777,
+                                       741.04473877,
+                                       722.108276367,
+                                       702.601013184,
+                                       682.831237793,
+                                       663.897949219,
+                                       644.708618164,
+                                       625.353942871,
+                                       605.91784668,
+                                       586.477539062,
+                                       567.10345459,
+                                       547.859313965,
+                                       528.801818848,
+                                       509.981781006,
+                                       491.443481445,
+                                       477.65222168,
+                                       464.686828613,
+                                       451.85736084,
+                                       439.183532715,
+                                       426.682556152,
+                                       830.400390625,
+                                       829.542602539,
+                                       826.978088379,
+                                       822.732299805,
+                                       816.846923828,
+                                       809.379150391,
+                                       800.399719238,
+                                       789.991882324,
+                                       778.249511719,
+                                       765.27355957,
+                                       751.173278809,
+                                       736.060913086,
+                                       720.051269531,
+                                       703.25994873,
+                                       685.801452637,
+                                       668.97833252,
+                                       651.732788086,
+                                       634.132019043,
+                                       616.264648438,
+                                       598.214355469,
+                                       580.059387207,
+                                       561.871887207,
+                                       543.718078613,
+                                       525.65826416,
+                                       507.746917725,
+                                       490.03225708,
+                                       477.170257568,
+                                       464.686828613,
+                                       452.297576904,
+                                       440.024200439,
+                                       427.886230469,
+                                       415.901092529,
+                                       783.694335938,
+                                       782.912841797,
+                                       780.575439453,
+                                       776.704589844,
+                                       771.336730957,
+                                       764.521179199,
+                                       756.319885254,
+                                       746.805908203,
+                                       736.060913086,
+                                       724.174499512,
+                                       711.242553711,
+                                       697.364379883,
+                                       682.831237793,
+                                       668.410766602,
+                                       653.370849609,
+                                       637.802612305,
+                                       621.794555664,
+                                       605.431762695,
+                                       588.796264648,
+                                       571.965026855,
+                                       555.010437012,
+                                       537.999816895,
+                                       520.995727539,
+                                       504.054443359,
+                                       487.717651367,
+                                       475.728912354,
+                                       463.766784668,
+                                       451.85736084,
+                                       440.024200439,
+                                       428.288726807,
+                                       416.670166016,
+                                       405.185882568,
+                                       739.61541748,
+                                       738.902404785,
+                                       736.76965332,
+                                       733.236633301,
+                                       728.335144043,
+                                       722.108276367,
+                                       714.610473633,
+                                       705.905090332,
+                                       696.064697266,
+                                       685.184509277,
+                                       674.120605469,
+                                       662.218017578,
+                                       649.558654785,
+                                       636.225524902,
+                                       622.301086426,
+                                       607.867736816,
+                                       593.00592041,
+                                       577.793395996,
+                                       562.305175781,
+                                       546.611877441,
+                                       530.781066895,
+                                       514.875244141,
+                                       498.952667236,
+                                       484.735015869,
+                                       473.343078613,
+                                       461.936004639,
+                                       450.541046143,
+                                       439.183532715,
+                                       427.886230469,
+                                       416.670166016,
+                                       405.554260254,
+                                       394.555480957,
+                                       698.015563965,
+                                       697.364379883,
+                                       695.41619873,
+                                       692.188110352,
+                                       687.707824707,
+                                       682.245117188,
+                                       675.848571777,
+                                       668.410766602,
+                                       659.988525391,
+                                       650.644348145,
+                                       640.444885254,
+                                       629.4609375,
+                                       617.765075684,
+                                       605.431762695,
+                                       592.535888672,
+                                       579.151306152,
+                                       565.35144043,
+                                       551.20690918,
+                                       536.786437988,
+                                       522.155395508,
+                                       507.375701904,
+                                       492.505737305,
+                                       480.805541992,
+                                       470.0362854,
+                                       459.212097168,
+                                       448.361358643,
+                                       437.510101318,
+                                       426.682556152,
+                                       415.901092529,
+                                       405.185882568,
+                                       394.555480957,
+                                       384.026672363},
+                                      {0,
+                                       0,
+                                       0,
+                                       0,
+                                       10016.1787109,
+                                       8949.01855469,
+                                       7995.55859375,
+                                       7162.60107422,
+                                       6422.47558594,
+                                       5758.82910156,
+                                       5163.75830078,
+                                       4630.17675781,
+                                       4151.73242188,
+                                       3734.18823242,
+                                       3370.10986328,
+                                       3041.52880859,
+                                       2744.98388672,
+                                       2477.35107422,
+                                       2235.81323242,
+                                       2038.74963379,
+                                       1932.10974121,
+                                       1831.04748535,
+                                       1735.27160645,
+                                       1644.50561523,
+                                       1558.48730469,
+                                       1476.96801758,
+                                       1386.82666016,
+                                       1301.52868652,
+                                       1221.47717285,
+                                       1146.34912109,
+                                       1075.84216309,
+                                       1009.67150879,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       9878.22460938,
+                                       8849.74414062,
+                                       7921.35595703,
+                                       7107.29541016,
+                                       6379.01171875,
+                                       5724.14550781,
+                                       5135.74365234,
+                                       4607.32568359,
+                                       4132.93945312,
+                                       3719.50512695,
+                                       3357.80053711,
+                                       3031.15722656,
+                                       2736.20654297,
+                                       2469.89428711,
+                                       2229.45581055,
+                                       2035.87133789,
+                                       1929.51806641,
+                                       1828.70812988,
+                                       1733.15539551,
+                                       1642.58703613,
+                                       1556.74450684,
+                                       1475.38232422,
+                                       1385.13513184,
+                                       1300,
+                                       1220.09375,
+                                       1145.09570312,
+                                       1074.70495605,
+                                       1008.63867188,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       9497.34082031,
+                                       8569.00976562,
+                                       7710.1953125,
+                                       6947.08251953,
+                                       6252.30078125,
+                                       5622.56835938,
+                                       5053.41699219,
+                                       4539.99316406,
+                                       4077.45068359,
+                                       3676.05541992,
+                                       3321.32641602,
+                                       3000.390625,
+                                       2710.14355469,
+                                       2447.73339844,
+                                       2210.55053711,
+                                       2027.28417969,
+                                       1921.78308105,
+                                       1821.72375488,
+                                       1726.8347168,
+                                       1636.85534668,
+                                       1551.53735352,
+                                       1470.2409668,
+                                       1380.08117676,
+                                       1295.43139648,
+                                       1215.95825195,
+                                       1141.34753418,
+                                       1071.30383301,
+                                       1005.54919434,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       8949.01855469,
+                                       8149.28955078,
+                                       7394.22412109,
+                                       6697.34423828,
+                                       6052.48828125,
+                                       5461.01953125,
+                                       4921.63427734,
+                                       4431.66796875,
+                                       3987.81860352,
+                                       3605.57324219,
+                                       3262.00317383,
+                                       2950.23999023,
+                                       2667.58154297,
+                                       2411.48632812,
+                                       2179.58496094,
+                                       2013.13024902,
+                                       1909.02331543,
+                                       1810.1940918,
+                                       1716.39379883,
+                                       1627.38232422,
+                                       1542.92712402,
+                                       1460.9855957,
+                                       1371.72302246,
+                                       1287.87316895,
+                                       1209.11425781,
+                                       1135.14245605,
+                                       1065.67199707,
+                                       1000.43200684,
+                                       10016.1787109,
+                                       9878.22460938,
+                                       9497.34082031,
+                                       8949.01855469,
+                                       8310.703125,
+                                       7644.40527344,
+                                       6999.56738281,
+                                       6379.01171875,
+                                       5793.93896484,
+                                       5249.58837891,
+                                       4747.62841797,
+                                       4287.62841797,
+                                       3871.05200195,
+                                       3510.74609375,
+                                       3181.88964844,
+                                       2882.29760742,
+                                       2609.7644043,
+                                       2362.1328125,
+                                       2137.33789062,
+                                       1993.63818359,
+                                       1891.43066406,
+                                       1794.28063965,
+                                       1701.97070312,
+                                       1614.28540039,
+                                       1531.01391602,
+                                       1448.18615723,
+                                       1360.15734863,
+                                       1277.40795898,
+                                       1199.63305664,
+                                       1126.54284668,
+                                       1057.86291504,
+                                       993.333496094,
+                                       8949.01855469,
+                                       8849.74414062,
+                                       8569.00976562,
+                                       8149.28955078,
+                                       7644.40527344,
+                                       7107.29541016,
+                                       6556.77978516,
+                                       6014.109375,
+                                       5492.59033203,
+                                       4999.91259766,
+                                       4539.99316406,
+                                       4114.296875,
+                                       3734.18823242,
+                                       3394.95922852,
+                                       3083.60522461,
+                                       2798.61328125,
+                                       2538.30639648,
+                                       2300.9543457,
+                                       2084.83349609,
+                                       1969.11364746,
+                                       1869.26220703,
+                                       1774.20117188,
+                                       1683.74963379,
+                                       1597.7220459,
+                                       1515.93359375,
+                                       1431.99487305,
+                                       1345.51452637,
+                                       1264.14855957,
+                                       1187.61206055,
+                                       1115.63220215,
+                                       1047.94946289,
+                                       984.940734863,
+                                       7995.55859375,
+                                       7921.35595703,
+                                       7710.1953125,
+                                       7394.22412109,
+                                       6999.56738281,
+                                       6556.77978516,
+                                       6091.37744141,
+                                       5622.56835938,
+                                       5163.75830078,
+                                       4723.70117188,
+                                       4307.68896484,
+                                       3918.65771484,
+                                       3578.03271484,
+                                       3262.00317383,
+                                       2970.13378906,
+                                       2701.54418945,
+                                       2455.08642578,
+                                       2229.45581055,
+                                       2041.63623047,
+                                       1939.92504883,
+                                       1842.82971191,
+                                       1750.22119141,
+                                       1661.9576416,
+                                       1577.88708496,
+                                       1497.85339355,
+                                       1412.59997559,
+                                       1327.95666504,
+                                       1248.23461914,
+                                       1173.171875,
+                                       1102.515625,
+                                       1036.02380371,
+                                       975.335021973,
+                                       7162.60107422,
+                                       7107.29541016,
+                                       6947.08251953,
+                                       6697.34423828,
+                                       6379.01171875,
+                                       6014.109375,
+                                       5622.56835938,
+                                       5220.67480469,
+                                       4820.77636719,
+                                       4431.66796875,
+                                       4059.2434082,
+                                       3719.50512695,
+                                       3407.50024414,
+                                       3115.79980469,
+                                       2844.60986328,
+                                       2593.61132812,
+                                       2362.13305664,
+                                       2149.27954102,
+                                       2004.73095703,
+                                       1906.4909668,
+                                       1812.48913574,
+                                       1722.64440918,
+                                       1636.85534668,
+                                       1555.00537109,
+                                       1476.96813965,
+                                       1390.21911621,
+                                       1307.67175293,
+                                       1229.82922363,
+                                       1156.4543457,
+                                       1087.31677246,
+                                       1022.19244385,
+                                       964.170288086,
+                                       6422.47558594,
+                                       6379.01171875,
+                                       6252.30078125,
+                                       6052.48828125,
+                                       5793.93896484,
+                                       5492.59033203,
+                                       5163.75830078,
+                                       4820.77636719,
+                                       4474.43066406,
+                                       4132.93945312,
+                                       3809.15112305,
+                                       3510.74560547,
+                                       3227.25976562,
+                                       2960.15966797,
+                                       2710.14355469,
+                                       2477.3515625,
+                                       2261.5234375,
+                                       2062.12646484,
+                                       1963.74353027,
+                                       1869.26220703,
+                                       1778.62731934,
+                                       1691.80358887,
+                                       1608.73010254,
+                                       1529.32519531,
+                                       1450.00341797,
+                                       1365.09509277,
+                                       1284.86962891,
+                                       1209.11425781,
+                                       1137.61865234,
+                                       1070.17382812,
+                                       1006.57757568,
+                                       951.533935547,
+                                       5758.82910156,
+                                       5724.14550781,
+                                       5622.56835938,
+                                       5461.01953125,
+                                       5249.58837891,
+                                       4999.91259766,
+                                       4723.70117188,
+                                       4431.66796875,
+                                       4132.93945312,
+                                       3839.88183594,
+                                       3564.3984375,
+                                       3297.38085938,
+                                       3041.52880859,
+                                       2798.61328125,
+                                       2569.67749023,
+                                       2355.21166992,
+                                       2155.28833008,
+                                       2013.13024902,
+                                       1919.21801758,
+                                       1828.70812988,
+                                       1741.64990234,
+                                       1658.05053711,
+                                       1577.88708496,
+                                       1501.11230469,
+                                       1419.60339355,
+                                       1337.48803711,
+                                       1259.77709961,
+                                       1186.2878418,
+                                       1116.83618164,
+                                       1051.23815918,
+                                       989.356018066,
+                                       937.521850586,
+                                       5163.75830078,
+                                       5135.74365234,
+                                       5053.41699219,
+                                       4921.63427734,
+                                       4747.62841797,
+                                       4539.99316406,
+                                       4307.68896484,
+                                       4059.2434082,
+                                       3809.15112305,
+                                       3564.3984375,
+                                       3321.32641602,
+                                       3083.60522461,
+                                       2853.95654297,
+                                       2634.29614258,
+                                       2425.88378906,
+                                       2229.45581055,
+                                       2053.26318359,
+                                       1961.06884766,
+                                       1871.70092773,
+                                       1785.30419922,
+                                       1701.97070312,
+                                       1621.74597168,
+                                       1544.64245605,
+                                       1470.2409668,
+                                       1386.82666016,
+                                       1307.67150879,
+                                       1232.63317871,
+                                       1161.55883789,
+                                       1094.29150391,
+                                       1030.67077637,
+                                       972.740783691,
+                                       922.236572266,
+                                       4630.17675781,
+                                       4607.32568359,
+                                       4539.99316406,
+                                       4431.66796875,
+                                       4287.62841797,
+                                       4114.296875,
+                                       3918.65771484,
+                                       3719.50512695,
+                                       3510.74560547,
+                                       3297.38085938,
+                                       3083.60522461,
+                                       2872.79980469,
+                                       2667.58154297,
+                                       2469.89453125,
+                                       2281.10717773,
+                                       2102.11450195,
+                                       1993.63842773,
+                                       1906.4909668,
+                                       1821.72387695,
+                                       1739.5189209,
+                                       1660.00183105,
+                                       1583.25292969,
+                                       1509.31469727,
+                                       1431.99487305,
+                                       1351.99133301,
+                                       1275.92382812,
+                                       1203.68237305,
+                                       1135.14245605,
+                                       1070.17382812,
+                                       1008.63867188,
+                                       954.878356934,
+                                       905.786682129,
+                                       4151.73242188,
+                                       4132.93945312,
+                                       4077.45068359,
+                                       3987.81860352,
+                                       3871.05200195,
+                                       3734.18823242,
+                                       3578.03271484,
+                                       3407.50024414,
+                                       3227.25976562,
+                                       3041.52880859,
+                                       2853.95654297,
+                                       2667.58154297,
+                                       2484.84399414,
+                                       2307.63012695,
+                                       2137.33789062,
+                                       2015.94567871,
+                                       1932.10974121,
+                                       1849.96801758,
+                                       1769.79614258,
+                                       1691.80358887,
+                                       1616.14465332,
+                                       1542.92712402,
+                                       1472.10400391,
+                                       1391.92028809,
+                                       1315.41491699,
+                                       1242.52575684,
+                                       1173.171875,
+                                       1107.25793457,
+                                       1044.67651367,
+                                       985.821166992,
+                                       935.89440918,
+                                       888.28326416,
+                                       3734.18823242,
+                                       3719.50512695,
+                                       3676.05541992,
+                                       3605.57324219,
+                                       3510.74609375,
+                                       3394.95922852,
+                                       3262.00317383,
+                                       3115.79980469,
+                                       2960.15966797,
+                                       2798.61328125,
+                                       2634.29614258,
+                                       2469.89453125,
+                                       2307.63012695,
+                                       2149.27954102,
+                                       2027.2845459,
+                                       1947.80200195,
+                                       1869.26220703,
+                                       1792.02880859,
+                                       1716.39379883,
+                                       1642.58703613,
+                                       1570.78308105,
+                                       1501.11230469,
+                                       1426.6628418,
+                                       1350.36767578,
+                                       1277.40795898,
+                                       1207.75256348,
+                                       1141.34753418,
+                                       1078.12231445,
+                                       1017.99298096,
+                                       964.170288086,
+                                       915.916137695,
+                                       869.840393066,
+                                       3370.10986328,
+                                       3357.80053711,
+                                       3321.32641602,
+                                       3262.00317383,
+                                       3181.88964844,
+                                       3083.60522461,
+                                       2970.13378906,
+                                       2844.60986328,
+                                       2710.14355469,
+                                       2569.67749023,
+                                       2425.88378906,
+                                       2281.10717773,
+                                       2137.33789062,
+                                       2027.2845459,
+                                       1953.08789062,
+                                       1879.05322266,
+                                       1805.62060547,
+                                       1733.15539551,
+                                       1661.9576416,
+                                       1592.26843262,
+                                       1524.27880859,
+                                       1455.47753906,
+                                       1380.08117676,
+                                       1307.67150879,
+                                       1238.27075195,
+                                       1171.8729248,
+                                       1108.44836426,
+                                       1047.94946289,
+                                       990.31439209,
+                                       941.609558105,
+                                       895.070007324,
+                                       850.572570801,
+                                       3041.52880859,
+                                       3031.15722656,
+                                       3000.390625,
+                                       2950.23999023,
+                                       2882.29760742,
+                                       2798.61328125,
+                                       2701.54418945,
+                                       2593.61132812,
+                                       2477.3515625,
+                                       2355.21166992,
+                                       2229.45581055,
+                                       2102.11450195,
+                                       2015.94567871,
+                                       1947.80200195,
+                                       1879.05322266,
+                                       1810.1940918,
+                                       1741.64990234,
+                                       1673.77990723,
+                                       1606.8861084,
+                                       1541.21508789,
+                                       1476.96813965,
+                                       1403.92370605,
+                                       1332.70874023,
+                                       1264.14855957,
+                                       1198.28808594,
+                                       1135.14245605,
+                                       1074.70495605,
+                                       1016.94714355,
+                                       965.022094727,
+                                       918.278381348,
+                                       873.481933594,
+                                       830.592407227,
+                                       2744.98388672,
+                                       2736.20654297,
+                                       2710.14355469,
+                                       2667.58154297,
+                                       2609.7644043,
+                                       2538.30639648,
+                                       2455.08642578,
+                                       2362.13305664,
+                                       2261.5234375,
+                                       2155.28833008,
+                                       2053.26318359,
+                                       1993.63842773,
+                                       1932.10974121,
+                                       1869.26220703,
+                                       1805.62060547,
+                                       1741.64990234,
+                                       1677.75500488,
+                                       1614.28540039,
+                                       1551.53735352,
+                                       1489.75976562,
+                                       1421.36291504,
+                                       1351.99133301,
+                                       1284.86938477,
+                                       1220.09375,
+                                       1157.72741699,
+                                       1097.8046875,
+                                       1040.33654785,
+                                       985.821166992,
+                                       939.153625488,
+                                       894.312011719,
+                                       851.274475098,
+                                       810.01184082,
+                                       2477.35107422,
+                                       2469.89428711,
+                                       2447.73339844,
+                                       2411.48632812,
+                                       2362.1328125,
+                                       2300.9543457,
+                                       2229.45581055,
+                                       2149.27954102,
+                                       2062.12646484,
+                                       2013.13024902,
+                                       1961.06884766,
+                                       1906.4909668,
+                                       1849.96801758,
+                                       1792.02880859,
+                                       1733.15539551,
+                                       1673.77990723,
+                                       1614.28540039,
+                                       1555.00537109,
+                                       1496.22851562,
+                                       1431.99487305,
+                                       1365.09509277,
+                                       1300,
+                                       1236.85754395,
+                                       1175.77709961,
+                                       1116.83618164,
+                                       1060.08496094,
+                                       1005.54919434,
+                                       957.398681641,
+                                       912.780822754,
+                                       869.840393066,
+                                       828.566345215,
+                                       788.938354492,
+                                       2235.81323242,
+                                       2229.45581055,
+                                       2210.55053711,
+                                       2179.58496094,
+                                       2137.33789062,
+                                       2084.83349609,
+                                       2041.63623047,
+                                       2004.73095703,
+                                       1963.74353027,
+                                       1919.21801758,
+                                       1871.70092773,
+                                       1821.72387695,
+                                       1769.79614258,
+                                       1716.39379883,
+                                       1661.9576416,
+                                       1606.8861084,
+                                       1551.53735352,
+                                       1496.22851562,
+                                       1435.56774902,
+                                       1371.72302246,
+                                       1309.21435547,
+                                       1248.23461914,
+                                       1188.93847656,
+                                       1131.44470215,
+                                       1075.84216309,
+                                       1022.19244385,
+                                       972.740783691,
+                                       928.625244141,
+                                       886.038757324,
+                                       844.988342285,
+                                       805.471496582,
+                                       767.476257324,
+                                       2038.74963379,
+                                       2035.87133789,
+                                       2027.28417969,
+                                       2013.13024902,
+                                       1993.63818359,
+                                       1969.11364746,
+                                       1939.92504883,
+                                       1906.4909668,
+                                       1869.26220703,
+                                       1828.70812988,
+                                       1785.30419922,
+                                       1739.5189209,
+                                       1691.80358887,
+                                       1642.58703613,
+                                       1592.26843262,
+                                       1541.21508789,
+                                       1489.75976562,
+                                       1431.99487305,
+                                       1371.72302246,
+                                       1312.30871582,
+                                       1253.98498535,
+                                       1196.94519043,
+                                       1141.34753418,
+                                       1087.31677246,
+                                       1034.94958496,
+                                       984.940734863,
+                                       941.609558105,
+                                       899.638793945,
+                                       859.054748535,
+                                       819.872619629,
+                                       782.097106934,
+                                       745.724487305,
+                                       1932.10974121,
+                                       1929.51806641,
+                                       1921.78308105,
+                                       1909.02331543,
+                                       1891.43066406,
+                                       1869.26220703,
+                                       1842.82971191,
+                                       1812.48913574,
+                                       1778.62731934,
+                                       1741.64990234,
+                                       1701.97070312,
+                                       1660.00183105,
+                                       1616.14465332,
+                                       1570.78308105,
+                                       1524.27880859,
+                                       1476.96813965,
+                                       1421.36291504,
+                                       1365.09509277,
+                                       1309.21435547,
+                                       1253.98498535,
+                                       1199.63305664,
+                                       1146.34912109,
+                                       1094.29150391,
+                                       1043.58874512,
+                                       994.342956543,
+                                       951.534057617,
+                                       910.440185547,
+                                       870.567016602,
+                                       831.947387695,
+                                       794.602966309,
+                                       758.545227051,
+                                       723.776733398,
+                                       1831.04748535,
+                                       1828.70812988,
+                                       1821.72375488,
+                                       1810.1940918,
+                                       1794.28063965,
+                                       1774.20117188,
+                                       1750.22119141,
+                                       1722.64440918,
+                                       1691.80358887,
+                                       1658.05053711,
+                                       1621.74597168,
+                                       1583.25292969,
+                                       1542.92712402,
+                                       1501.11230469,
+                                       1455.47753906,
+                                       1403.92370605,
+                                       1351.99133301,
+                                       1300,
+                                       1248.23461914,
+                                       1196.94519043,
+                                       1146.34912109,
+                                       1096.63146973,
+                                       1047.94946289,
+                                       1000.43200684,
+                                       958.241088867,
+                                       918.278259277,
+                                       879.356750488,
+                                       841.526489258,
+                                       804.825805664,
+                                       769.28125,
+                                       734.910339355,
+                                       701.721008301,
+                                       1735.27160645,
+                                       1733.15539551,
+                                       1726.8347168,
+                                       1716.39379883,
+                                       1701.97070312,
+                                       1683.74963379,
+                                       1661.9576416,
+                                       1636.85534668,
+                                       1608.73010254,
+                                       1577.88708496,
+                                       1544.64245605,
+                                       1509.31469727,
+                                       1472.10400391,
+                                       1426.6628418,
+                                       1380.08117676,
+                                       1332.70874023,
+                                       1284.86938477,
+                                       1236.85754395,
+                                       1188.93847656,
+                                       1141.34753418,
+                                       1094.29150391,
+                                       1047.94946289,
+                                       1002.47418213,
+                                       961.622131348,
+                                       923.031555176,
+                                       885.292236328,
+                                       848.471923828,
+                                       812.623657227,
+                                       777.789794922,
+                                       744.001464844,
+                                       711.280090332,
+                                       684.97052002,
+                                       1644.50561523,
+                                       1642.58703613,
+                                       1636.85534668,
+                                       1627.38232422,
+                                       1614.28540039,
+                                       1597.7220459,
+                                       1577.88708496,
+                                       1555.00537109,
+                                       1529.32519531,
+                                       1501.11230469,
+                                       1470.2409668,
+                                       1431.99487305,
+                                       1391.92028809,
+                                       1350.36767578,
+                                       1307.67150879,
+                                       1264.14855957,
+                                       1220.09375,
+                                       1175.77709961,
+                                       1131.44470215,
+                                       1087.31677246,
+                                       1043.58874512,
+                                       1000.43200684,
+                                       961.622131348,
+                                       924.624450684,
+                                       888.28326416,
+                                       852.680969238,
+                                       817.885742188,
+                                       783.954040527,
+                                       750.929992676,
+                                       718.848205566,
+                                       690.50970459,
+                                       669.78717041,
+                                       1558.48730469,
+                                       1556.74450684,
+                                       1551.53735352,
+                                       1542.92712402,
+                                       1531.01391602,
+                                       1515.93359375,
+                                       1497.85339355,
+                                       1476.96813965,
+                                       1450.00341797,
+                                       1419.60339355,
+                                       1386.82666016,
+                                       1351.99133301,
+                                       1315.41491699,
+                                       1277.40795898,
+                                       1238.27075195,
+                                       1198.28808594,
+                                       1157.72741699,
+                                       1116.83618164,
+                                       1075.84216309,
+                                       1034.94958496,
+                                       994.342956543,
+                                       958.241088867,
+                                       923.031555176,
+                                       888.28326416,
+                                       854.091186523,
+                                       820.536315918,
+                                       787.687927246,
+                                       755.602600098,
+                                       724.327697754,
+                                       694.713867188,
+                                       674.449768066,
+                                       654.522705078,
+                                       1476.96801758,
+                                       1475.38232422,
+                                       1470.2409668,
+                                       1460.9855957,
+                                       1448.18615723,
+                                       1431.99487305,
+                                       1412.59997559,
+                                       1390.21911621,
+                                       1365.09509277,
+                                       1337.48803711,
+                                       1307.67150879,
+                                       1275.92382812,
+                                       1242.52575684,
+                                       1207.75256348,
+                                       1171.8729248,
+                                       1135.14245605,
+                                       1097.8046875,
+                                       1060.08496094,
+                                       1022.19244385,
+                                       984.940734863,
+                                       951.534057617,
+                                       918.278259277,
+                                       885.292236328,
+                                       852.680969238,
+                                       820.536315918,
+                                       788.938354492,
+                                       757.955322266,
+                                       727.644897461,
+                                       698.05480957,
+                                       677.813598633,
+                                       658.364379883,
+                                       639.217041016,
+                                       1386.82666016,
+                                       1385.13513184,
+                                       1380.08117676,
+                                       1371.72302246,
+                                       1360.15734863,
+                                       1345.51452637,
+                                       1327.95666504,
+                                       1307.67175293,
+                                       1284.86962891,
+                                       1259.77709961,
+                                       1232.63317871,
+                                       1203.68237305,
+                                       1173.171875,
+                                       1141.34753418,
+                                       1108.44836426,
+                                       1074.70495605,
+                                       1040.33654785,
+                                       1005.54919434,
+                                       972.740783691,
+                                       941.609558105,
+                                       910.440185547,
+                                       879.356750488,
+                                       848.471923828,
+                                       817.885742188,
+                                       787.687927246,
+                                       757.955322266,
+                                       728.75579834,
+                                       700.14642334,
+                                       679.84564209,
+                                       660.946289062,
+                                       642.291992188,
+                                       623.906738281,
+                                       1301.52868652,
+                                       1300,
+                                       1295.43139648,
+                                       1287.87316895,
+                                       1277.40795898,
+                                       1264.14855957,
+                                       1248.23461914,
+                                       1229.82922363,
+                                       1209.11425781,
+                                       1186.2878418,
+                                       1161.55883789,
+                                       1135.14245605,
+                                       1107.25793457,
+                                       1078.12231445,
+                                       1047.94946289,
+                                       1016.94714355,
+                                       985.821166992,
+                                       957.398681641,
+                                       928.625244141,
+                                       899.638793945,
+                                       870.567016602,
+                                       841.526489258,
+                                       812.623657227,
+                                       783.954040527,
+                                       755.602600098,
+                                       727.644897461,
+                                       700.14642334,
+                                       680.525085449,
+                                       662.243774414,
+                                       644.148803711,
+                                       626.268066406,
+                                       608.625976562,
+                                       1221.47717285,
+                                       1220.09375,
+                                       1215.95825195,
+                                       1209.11425781,
+                                       1199.63305664,
+                                       1187.61206055,
+                                       1173.171875,
+                                       1156.4543457,
+                                       1137.61865234,
+                                       1116.83618164,
+                                       1094.29150391,
+                                       1070.17382812,
+                                       1044.67651367,
+                                       1017.99298096,
+                                       990.31439209,
+                                       965.022094727,
+                                       939.153625488,
+                                       912.780822754,
+                                       886.038757324,
+                                       859.054748535,
+                                       831.947387695,
+                                       804.825805664,
+                                       777.789794922,
+                                       750.929992676,
+                                       724.327697754,
+                                       698.05480957,
+                                       679.84564209,
+                                       662.243774414,
+                                       644.769775391,
+                                       627.454284668,
+                                       610.324890137,
+                                       593.40612793,
+                                       1146.34912109,
+                                       1145.09570312,
+                                       1141.34753418,
+                                       1135.14245605,
+                                       1126.54284668,
+                                       1115.63220215,
+                                       1102.515625,
+                                       1087.31677246,
+                                       1070.17382812,
+                                       1051.23815918,
+                                       1030.67077637,
+                                       1008.63867188,
+                                       985.821166992,
+                                       964.170288086,
+                                       941.609558105,
+                                       918.278381348,
+                                       894.312011719,
+                                       869.840393066,
+                                       844.988342285,
+                                       819.872619629,
+                                       794.602966309,
+                                       769.28125,
+                                       744.001464844,
+                                       718.848205566,
+                                       694.713867188,
+                                       677.813598633,
+                                       660.946289062,
+                                       644.148803711,
+                                       627.454284668,
+                                       610.893005371,
+                                       594.491943359,
+                                       578.275817871,
+                                       1075.84216309,
+                                       1074.70495605,
+                                       1071.30383301,
+                                       1065.67199707,
+                                       1057.86291504,
+                                       1047.94946289,
+                                       1036.02380371,
+                                       1022.19244385,
+                                       1006.57757568,
+                                       989.356018066,
+                                       972.740783691,
+                                       954.878356934,
+                                       935.89440918,
+                                       915.916137695,
+                                       895.070007324,
+                                       873.481933594,
+                                       851.274475098,
+                                       828.566345215,
+                                       805.471496582,
+                                       782.097106934,
+                                       758.545227051,
+                                       734.910339355,
+                                       711.280090332,
+                                       690.50970459,
+                                       674.449768066,
+                                       658.364379883,
+                                       642.291992188,
+                                       626.268066406,
+                                       610.324890137,
+                                       594.491943359,
+                                       578.796020508,
+                                       563.261047363,
+                                       1009.67150879,
+                                       1008.63867188,
+                                       1005.54919434,
+                                       1000.43200684,
+                                       993.333496094,
+                                       984.940734863,
+                                       975.335021973,
+                                       964.170288086,
+                                       951.533935547,
+                                       937.521850586,
+                                       922.236572266,
+                                       905.786682129,
+                                       888.28326416,
+                                       869.840393066,
+                                       850.572570801,
+                                       830.592407227,
+                                       810.01184082,
+                                       788.938354492,
+                                       767.476257324,
+                                       745.724487305,
+                                       723.776733398,
+                                       701.721008301,
+                                       684.97052002,
+                                       669.78717041,
+                                       654.522705078,
+                                       639.217041016,
+                                       623.906738281,
+                                       608.625976562,
+                                       593.40612793,
+                                       578.275817871,
+                                       563.261047363,
+                                       548.385559082},
+                                      {0,
+                                       0,
+                                       0,
+                                       0,
+                                       1554.1237793,
+                                       1242.53955078,
+                                       993.424560547,
+                                       821.738647461,
+                                       688.023742676,
+                                       576.067199707,
+                                       482.328430176,
+                                       403.842987061,
+                                       338.128967285,
+                                       283.233520508,
+                                       237.367095947,
+                                       198.928222656,
+                                       166.714080811,
+                                       139.71661377,
+                                       117.091148376,
+                                       100.366226196,
+                                       93.5875701904,
+                                       87.2667160034,
+                                       81.3727798462,
+                                       75.8769226074,
+                                       70.7522583008,
+                                       65.9736862183,
+                                       62.4703788757,
+                                       59.2027587891,
+                                       56.1060714722,
+                                       53.1713485718,
+                                       50.3901405334,
+                                       47.7544021606,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       1511.89892578,
+                                       1215.3125,
+                                       975.19708252,
+                                       811.432128906,
+                                       680.458190918,
+                                       570.428588867,
+                                       478.074890137,
+                                       400.60269165,
+                                       335.640289307,
+                                       281.318328857,
+                                       235.876022339,
+                                       197.761489868,
+                                       165.797119141,
+                                       138.993164062,
+                                       116.518371582,
+                                       100.18183136,
+                                       93.4242019653,
+                                       87.1216278076,
+                                       81.2436294556,
+                                       75.76171875,
+                                       70.6492919922,
+                                       65.8815155029,
+                                       62.4058837891,
+                                       59.1439094543,
+                                       56.0522842407,
+                                       53.1221389771,
+                                       50.3450584412,
+                                       47.7130508423,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       1398.31689453,
+                                       1139.93933105,
+                                       926.469055176,
+                                       781.859680176,
+                                       658.586914062,
+                                       554.038146973,
+                                       465.659057617,
+                                       391.113586426,
+                                       328.333618164,
+                                       275.682922363,
+                                       231.480926514,
+                                       194.317352295,
+                                       163.08682251,
+                                       136.852478027,
+                                       114.822052002,
+                                       99.6321716309,
+                                       92.9370193481,
+                                       86.6887664795,
+                                       80.8581924438,
+                                       75.4178009033,
+                                       70.3418579102,
+                                       65.6360702515,
+                                       62.2131195068,
+                                       58.9679450989,
+                                       55.8914489746,
+                                       52.974937439,
+                                       50.2101821899,
+                                       47.5893363953,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       1242.53955078,
+                                       1031.72070312,
+                                       865.446105957,
+                                       736.612304688,
+                                       624.660888672,
+                                       528.352294922,
+                                       446.048339844,
+                                       376.032348633,
+                                       316.662597656,
+                                       266.643341064,
+                                       224.406723022,
+                                       188.757797241,
+                                       158.70111084,
+                                       133.381164551,
+                                       112.06615448,
+                                       98.727722168,
+                                       92.1346588135,
+                                       85.9753189087,
+                                       80.2224273682,
+                                       74.8501815796,
+                                       69.8341827393,
+                                       65.2862091064,
+                                       61.8940887451,
+                                       58.6766357422,
+                                       55.625087738,
+                                       52.7310905457,
+                                       49.9866943359,
+                                       47.3842887878,
+                                       1554.1237793,
+                                       1511.89892578,
+                                       1398.31689453,
+                                       1242.53955078,
+                                       1072.70471191,
+                                       913.631103516,
+                                       791.500732422,
+                                       680.458190918,
+                                       581.79699707,
+                                       495.450836182,
+                                       420.656097412,
+                                       356.335296631,
+                                       301.3465271,
+                                       254.681488037,
+                                       214.999893188,
+                                       181.334152222,
+                                       152.824005127,
+                                       128.714950562,
+                                       108.351615906,
+                                       97.4852905273,
+                                       91.031036377,
+                                       84.9928512573,
+                                       79.3460922241,
+                                       74.0670623779,
+                                       69.1331710815,
+                                       64.8018112183,
+                                       61.4521331787,
+                                       58.2728424072,
+                                       55.2557067871,
+                                       52.3927955627,
+                                       49.6765098572,
+                                       47.0995864868,
+                                       1242.53955078,
+                                       1215.3125,
+                                       1139.93933105,
+                                       1031.72070312,
+                                       913.631103516,
+                                       811.432128906,
+                                       711.604919434,
+                                       618.224060059,
+                                       533.334899902,
+                                       457.657806396,
+                                       391.113586426,
+                                       333.178527832,
+                                       283.233520508,
+                                       240.389190674,
+                                       203.691101074,
+                                       172.362747192,
+                                       145.689254761,
+                                       123.027793884,
+                                       103.808616638,
+                                       95.9272842407,
+                                       89.6447677612,
+                                       83.7569274902,
+                                       78.2422027588,
+                                       73.0794143677,
+                                       68.2481460571,
+                                       64.1881027222,
+                                       60.8917617798,
+                                       57.7605171204,
+                                       54.7867393494,
+                                       51.9630203247,
+                                       49.2822341919,
+                                       46.7375259399,
+                                       993.424560547,
+                                       975.19708252,
+                                       926.469055176,
+                                       865.446105957,
+                                       791.500732422,
+                                       711.604919434,
+                                       631.209533691,
+                                       554.038146973,
+                                       482.328430176,
+                                       417.209503174,
+                                       359.053955078,
+                                       307.756866455,
+                                       263.145568848,
+                                       224.406723022,
+                                       190.955062866,
+                                       162.196685791,
+                                       137.561203003,
+                                       116.518371582,
+                                       100.551231384,
+                                       94.0806045532,
+                                       87.9983291626,
+                                       82.2864456177,
+                                       76.9266967773,
+                                       71.9007492065,
+                                       67.1905670166,
+                                       63.4515571594,
+                                       60.2185935974,
+                                       57.1445236206,
+                                       54.2224197388,
+                                       51.4454956055,
+                                       48.8071632385,
+                                       46.300994873,
+                                       821.738647461,
+                                       811.432128906,
+                                       781.859680176,
+                                       736.612304688,
+                                       680.458190918,
+                                       618.224060059,
+                                       554.038146973,
+                                       491.015136719,
+                                       431.260406494,
+                                       376.032348633,
+                                       325.949676514,
+                                       281.318328857,
+                                       241.920471191,
+                                       207.367233276,
+                                       177.270202637,
+                                       151.198699951,
+                                       128.714981079,
+                                       109.396255493,
+                                       98.1919021606,
+                                       91.9756011963,
+                                       86.1172180176,
+                                       80.602897644,
+                                       75.4178009033,
+                                       70.5465774536,
+                                       65.9736938477,
+                                       62.5996856689,
+                                       59.4391670227,
+                                       56.4305839539,
+                                       53.5677680969,
+                                       50.8446311951,
+                                       48.2551269531,
+                                       45.7933807373,
+                                       688.023742676,
+                                       680.458190918,
+                                       658.586914062,
+                                       624.660888672,
+                                       581.79699707,
+                                       533.334899902,
+                                       482.328430176,
+                                       431.260406494,
+                                       381.958526611,
+                                       335.640289307,
+                                       293.096069336,
+                                       254.681427002,
+                                       220.30645752,
+                                       189.852081299,
+                                       163.08682251,
+                                       139.716659546,
+                                       119.419540405,
+                                       101.869186401,
+                                       95.5869064331,
+                                       89.6447677612,
+                                       84.0289993286,
+                                       78.7296905518,
+                                       73.7354660034,
+                                       69.0339279175,
+                                       64.8706207275,
+                                       61.6408843994,
+                                       58.5607948303,
+                                       55.625087738,
+                                       52.8284225464,
+                                       50.1653556824,
+                                       47.6305236816,
+                                       45.2185592651,
+                                       576.067199707,
+                                       570.428588867,
+                                       554.038146973,
+                                       528.352294922,
+                                       495.450836182,
+                                       457.657806396,
+                                       417.209503174,
+                                       376.032348633,
+                                       335.640289307,
+                                       297.179992676,
+                                       261.421112061,
+                                       228.614364624,
+                                       198.928222656,
+                                       172.362747192,
+                                       148.803924561,
+                                       128.066162109,
+                                       109.923492432,
+                                       98.727722168,
+                                       92.7755966187,
+                                       87.1216278076,
+                                       81.7623062134,
+                                       76.6913833618,
+                                       71.9007492065,
+                                       67.3809127808,
+                                       63.7176971436,
+                                       60.5841941833,
+                                       57.5914230347,
+                                       54.7350311279,
+                                       52.0104789734,
+                                       49.4130935669,
+                                       46.938117981,
+                                       44.5807800293,
+                                       482.328430176,
+                                       478.074890137,
+                                       465.659057617,
+                                       446.048339844,
+                                       420.656097412,
+                                       391.113586426,
+                                       359.053955078,
+                                       325.949676514,
+                                       293.096069336,
+                                       261.421112061,
+                                       231.480926514,
+                                       203.691101074,
+                                       178.274459839,
+                                       155.306304932,
+                                       134.755508423,
+                                       116.518371582,
+                                       101.297218323,
+                                       95.4174804688,
+                                       89.7970199585,
+                                       84.4398193359,
+                                       79.3460922241,
+                                       74.5129318237,
+                                       69.935256958,
+                                       65.6360702515,
+                                       62.4703788757,
+                                       59.4391593933,
+                                       56.5394515991,
+                                       53.7678108215,
+                                       51.1205253601,
+                                       48.5936508179,
+                                       46.1830673218,
+                                       43.8845863342,
+                                       403.842987061,
+                                       400.60269165,
+                                       391.113586426,
+                                       376.032348633,
+                                       356.335296631,
+                                       333.178527832,
+                                       307.756866455,
+                                       281.318328857,
+                                       254.681427002,
+                                       228.614364624,
+                                       203.691101074,
+                                       180.30632019,
+                                       158.70111084,
+                                       138.99319458,
+                                       121.205970764,
+                                       105.29486084,
+                                       97.4852981567,
+                                       91.9756011963,
+                                       86.6887817383,
+                                       81.6321105957,
+                                       76.8088912964,
+                                       72.2191696167,
+                                       67.8605422974,
+                                       64.1881027222,
+                                       61.1397399902,
+                                       58.2155418396,
+                                       55.4135246277,
+                                       52.7310905457,
+                                       50.1653556824,
+                                       47.7130508423,
+                                       45.3707275391,
+                                       43.1347961426,
+                                       338.128967285,
+                                       335.640289307,
+                                       328.333618164,
+                                       316.662597656,
+                                       301.3465271,
+                                       283.233520508,
+                                       263.145568848,
+                                       241.920471191,
+                                       220.30645752,
+                                       198.928222656,
+                                       178.274459839,
+                                       158.70111084,
+                                       140.445129395,
+                                       123.643127441,
+                                       108.351615906,
+                                       98.9074707031,
+                                       93.5875701904,
+                                       88.4422607422,
+                                       83.4863510132,
+                                       78.7296905518,
+                                       74.1781234741,
+                                       69.8341827393,
+                                       65.7064590454,
+                                       62.6645126343,
+                                       59.736907959,
+                                       56.9232521057,
+                                       54.2224197388,
+                                       51.6327171326,
+                                       49.1519355774,
+                                       46.7775268555,
+                                       44.5066757202,
+                                       42.336353302,
+                                       283.233520508,
+                                       281.318328857,
+                                       275.682922363,
+                                       266.643341064,
+                                       254.681488037,
+                                       240.389190674,
+                                       224.406723022,
+                                       207.367233276,
+                                       189.852081299,
+                                       172.362747192,
+                                       155.306304932,
+                                       138.99319458,
+                                       123.643127441,
+                                       109.396255493,
+                                       99.632194519,
+                                       94.5781402588,
+                                       89.6447677612,
+                                       84.8540420532,
+                                       80.2224273682,
+                                       75.76171875,
+                                       71.4796905518,
+                                       67.3809127808,
+                                       63.9857673645,
+                                       61.0775909424,
+                                       58.2728424072,
+                                       55.5720672607,
+                                       52.974937439,
+                                       50.4805107117,
+                                       48.0872917175,
+                                       45.7933807373,
+                                       43.5965652466,
+                                       41.4943313599,
+                                       237.367095947,
+                                       235.876022339,
+                                       231.480926514,
+                                       224.406723022,
+                                       214.999893188,
+                                       203.691101074,
+                                       190.955062866,
+                                       177.270202637,
+                                       163.08682251,
+                                       148.803924561,
+                                       134.755508423,
+                                       121.205970764,
+                                       108.351615906,
+                                       99.632194519,
+                                       94.9123535156,
+                                       90.2564239502,
+                                       85.6926956177,
+                                       81.2436294556,
+                                       76.9266967773,
+                                       72.7549057007,
+                                       68.7375793457,
+                                       65.077835083,
+                                       62.2131195068,
+                                       59.4391593933,
+                                       56.7582321167,
+                                       54.1716041565,
+                                       51.6796913147,
+                                       49.2822341919,
+                                       46.9784011841,
+                                       44.7668800354,
+                                       42.6460189819,
+                                       40.6138343811,
+                                       198.928222656,
+                                       197.761489868,
+                                       194.317352295,
+                                       188.757797241,
+                                       181.334152222,
+                                       172.362747192,
+                                       162.196685791,
+                                       151.198699951,
+                                       139.716659546,
+                                       128.066162109,
+                                       116.518371582,
+                                       105.29486084,
+                                       98.9074707031,
+                                       94.5781402588,
+                                       90.2564239502,
+                                       85.9753189087,
+                                       81.7623062134,
+                                       77.6397247314,
+                                       73.6254806519,
+                                       69.7333374023,
+                                       65.9736938477,
+                                       63.1215667725,
+                                       60.4009246826,
+                                       57.7605171204,
+                                       55.2032775879,
+                                       52.7310905457,
+                                       50.3450584412,
+                                       48.0454750061,
+                                       45.832118988,
+                                       43.7042236328,
+                                       41.6606483459,
+                                       39.699886322,
+                                       166.714080811,
+                                       165.797119141,
+                                       163.08682251,
+                                       158.70111084,
+                                       152.824005127,
+                                       145.689254761,
+                                       137.561203003,
+                                       128.714981079,
+                                       119.419540405,
+                                       109.923492432,
+                                       101.297218323,
+                                       97.4852981567,
+                                       93.5875701904,
+                                       89.6447677612,
+                                       85.6926956177,
+                                       81.7623062134,
+                                       77.8798141479,
+                                       74.0670623779,
+                                       70.3418579102,
+                                       66.7183837891,
+                                       63.7845306396,
+                                       61.1397399902,
+                                       58.5607872009,
+                                       56.0522842407,
+                                       53.6176719666,
+                                       51.2593917847,
+                                       48.9790611267,
+                                       46.7775268555,
+                                       44.6550750732,
+                                       42.6114387512,
+                                       40.6459236145,
+                                       38.7575035095,
+                                       139.71661377,
+                                       138.993164062,
+                                       136.852478027,
+                                       133.381164551,
+                                       128.714950562,
+                                       123.027793884,
+                                       116.518371582,
+                                       109.396255493,
+                                       101.869186401,
+                                       98.727722168,
+                                       95.4174804688,
+                                       91.9756011963,
+                                       88.4422607422,
+                                       84.8540420532,
+                                       81.2436294556,
+                                       77.6397247314,
+                                       74.0670623779,
+                                       70.5465774536,
+                                       67.0957107544,
+                                       64.1881027222,
+                                       61.6408843994,
+                                       59.1439094543,
+                                       56.7033996582,
+                                       54.3243103027,
+                                       52.0104789734,
+                                       49.7648124695,
+                                       47.5893363953,
+                                       45.4853858948,
+                                       43.4536628723,
+                                       41.4943313599,
+                                       39.6071586609,
+                                       37.7914886475,
+                                       117.091148376,
+                                       116.518371582,
+                                       114.822052002,
+                                       112.06615448,
+                                       108.351615906,
+                                       103.808616638,
+                                       100.551231384,
+                                       98.1919021606,
+                                       95.5869064331,
+                                       92.7755966187,
+                                       89.7970199585,
+                                       86.6887817383,
+                                       83.4863510132,
+                                       80.2224273682,
+                                       76.9266967773,
+                                       73.6254806519,
+                                       70.3418579102,
+                                       67.0957107544,
+                                       64.3236160278,
+                                       61.8940887451,
+                                       59.4985046387,
+                                       57.1445236206,
+                                       54.83852005,
+                                       52.5856742859,
+                                       50.3901405334,
+                                       48.2551269531,
+                                       46.1830673218,
+                                       44.1756248474,
+                                       42.2339172363,
+                                       40.3584899902,
+                                       38.5494613647,
+                                       36.8065338135,
+                                       100.366226196,
+                                       100.18183136,
+                                       99.6321716309,
+                                       98.727722168,
+                                       97.4852905273,
+                                       95.9272842407,
+                                       94.0806045532,
+                                       91.9756011963,
+                                       89.6447677612,
+                                       87.1216278076,
+                                       84.4398193359,
+                                       81.6321105957,
+                                       78.7296905518,
+                                       75.76171875,
+                                       72.7549057007,
+                                       69.7333374023,
+                                       66.7183837891,
+                                       64.1881027222,
+                                       61.8940887451,
+                                       59.6175003052,
+                                       57.3672447205,
+                                       55.1509132385,
+                                       52.974937439,
+                                       50.8446311951,
+                                       48.7643356323,
+                                       46.7375259399,
+                                       44.7668800354,
+                                       42.8544273376,
+                                       41.0015525818,
+                                       39.2091522217,
+                                       37.4776496887,
+                                       35.8070869446,
+                                       93.5875701904,
+                                       93.4242019653,
+                                       92.9370193481,
+                                       92.1346588135,
+                                       91.031036377,
+                                       89.6447677612,
+                                       87.9983291626,
+                                       86.1172180176,
+                                       84.0289993286,
+                                       81.7623062134,
+                                       79.3460922241,
+                                       76.8088912964,
+                                       74.1781234741,
+                                       71.4796905518,
+                                       68.7375793457,
+                                       65.9736938477,
+                                       63.7845306396,
+                                       61.6408843994,
+                                       59.4985046387,
+                                       57.3672447205,
+                                       55.2557067871,
+                                       53.1713485718,
+                                       51.1205253601,
+                                       49.1086196899,
+                                       47.1400909424,
+                                       45.2185668945,
+                                       43.3469619751,
+                                       41.5275192261,
+                                       39.7618980408,
+                                       38.0512619019,
+                                       36.396320343,
+                                       34.7973823547,
+                                       87.2667160034,
+                                       87.1216278076,
+                                       86.6887664795,
+                                       85.9753189087,
+                                       84.9928512573,
+                                       83.7569274902,
+                                       82.2864456177,
+                                       80.602897644,
+                                       78.7296905518,
+                                       76.6913833618,
+                                       74.5129318237,
+                                       72.2191696167,
+                                       69.8341827393,
+                                       67.3809127808,
+                                       65.077835083,
+                                       63.1215667725,
+                                       61.1397399902,
+                                       59.1439094543,
+                                       57.1445236206,
+                                       55.1509132385,
+                                       53.1713485718,
+                                       51.2130279541,
+                                       49.2822341919,
+                                       47.3842887878,
+                                       45.5237045288,
+                                       43.7042160034,
+                                       41.9289016724,
+                                       40.2001571655,
+                                       38.5198707581,
+                                       36.8894119263,
+                                       35.3097419739,
+                                       33.7813949585,
+                                       81.3727798462,
+                                       81.2436294556,
+                                       80.8581924438,
+                                       80.2224273682,
+                                       79.3460922241,
+                                       78.2422027588,
+                                       76.9266967773,
+                                       75.4178009033,
+                                       73.7354660034,
+                                       71.9007492065,
+                                       69.935256958,
+                                       67.8605422974,
+                                       65.7064590454,
+                                       63.9857673645,
+                                       62.2131195068,
+                                       60.4009246826,
+                                       58.5607872009,
+                                       56.7033996582,
+                                       54.83852005,
+                                       52.974937439,
+                                       51.1205253601,
+                                       49.2822341919,
+                                       47.4661407471,
+                                       45.6774940491,
+                                       43.9208030701,
+                                       42.199848175,
+                                       40.5177879333,
+                                       38.877155304,
+                                       37.2799949646,
+                                       35.7278671265,
+                                       34.2218933105,
+                                       32.9320831299,
+                                       75.8769226074,
+                                       75.76171875,
+                                       75.4178009033,
+                                       74.8501815796,
+                                       74.0670623779,
+                                       73.0794143677,
+                                       71.9007492065,
+                                       70.5465774536,
+                                       69.0339279175,
+                                       67.3809127808,
+                                       65.6360702515,
+                                       64.1881027222,
+                                       62.6645126343,
+                                       61.0775909424,
+                                       59.4391593933,
+                                       57.7605171204,
+                                       56.0522842407,
+                                       54.3243103027,
+                                       52.5856742859,
+                                       50.8446311951,
+                                       49.1086196899,
+                                       47.3842887878,
+                                       45.6774940491,
+                                       43.9933776855,
+                                       42.336353302,
+                                       40.7102241516,
+                                       39.1181678772,
+                                       37.5628471375,
+                                       36.0463790894,
+                                       34.5704689026,
+                                       33.2245254517,
+                                       32.1316642761,
+                                       70.7522583008,
+                                       70.6492919922,
+                                       70.3418579102,
+                                       69.8341827393,
+                                       69.1331710815,
+                                       68.2481460571,
+                                       67.1905670166,
+                                       65.9736938477,
+                                       64.8706207275,
+                                       63.7176971436,
+                                       62.4703788757,
+                                       61.1397399902,
+                                       59.736907959,
+                                       58.2728424072,
+                                       56.7582321167,
+                                       55.2032775879,
+                                       53.6176719666,
+                                       52.0104789734,
+                                       50.3901405334,
+                                       48.7643356323,
+                                       47.1400909424,
+                                       45.5237045288,
+                                       43.9208030701,
+                                       42.336353302,
+                                       40.7746887207,
+                                       39.2395439148,
+                                       37.7341346741,
+                                       36.2611160278,
+                                       34.8227424622,
+                                       33.4466362,
+                                       32.3772735596,
+                                       31.328754425,
+                                       65.9736862183,
+                                       65.8815155029,
+                                       65.6360702515,
+                                       65.2862091064,
+                                       64.8018112183,
+                                       64.1881027222,
+                                       63.4515571594,
+                                       62.5996856689,
+                                       61.6408843994,
+                                       60.5841941833,
+                                       59.4391593933,
+                                       58.2155418396,
+                                       56.9232521057,
+                                       55.5720672607,
+                                       54.1716041565,
+                                       52.7310905457,
+                                       51.2593917847,
+                                       49.7648124695,
+                                       48.2551269531,
+                                       46.7375259399,
+                                       45.2185668945,
+                                       43.7042160034,
+                                       42.199848175,
+                                       40.7102241516,
+                                       39.2395439148,
+                                       37.7914886475,
+                                       36.3692169189,
+                                       34.9754295349,
+                                       33.6123847961,
+                                       32.5545730591,
+                                       31.5306549072,
+                                       30.5255126953,
+                                       62.4703788757,
+                                       62.4058837891,
+                                       62.2131195068,
+                                       61.8940887451,
+                                       61.4521331787,
+                                       60.8917617798,
+                                       60.2185935974,
+                                       59.4391670227,
+                                       58.5607948303,
+                                       57.5914230347,
+                                       56.5394515991,
+                                       55.4135246277,
+                                       54.2224197388,
+                                       52.974937439,
+                                       51.6796913147,
+                                       50.3450584412,
+                                       48.9790611267,
+                                       47.5893363953,
+                                       46.1830673218,
+                                       44.7668800354,
+                                       43.3469619751,
+                                       41.9289016724,
+                                       40.5177879333,
+                                       39.1181678772,
+                                       37.7341346741,
+                                       36.3692169189,
+                                       35.0265541077,
+                                       33.7088127136,
+                                       32.6617202759,
+                                       31.6664142609,
+                                       30.6867351532,
+                                       29.723903656,
+                                       59.2027587891,
+                                       59.1439094543,
+                                       58.9679450989,
+                                       58.6766357422,
+                                       58.2728424072,
+                                       57.7605171204,
+                                       57.1445236206,
+                                       56.4305839539,
+                                       55.625087738,
+                                       54.7350311279,
+                                       53.7678108215,
+                                       52.7310905457,
+                                       51.6327171326,
+                                       50.4805107117,
+                                       49.2822341919,
+                                       48.0454750061,
+                                       46.7775268555,
+                                       45.4853858948,
+                                       44.1756248474,
+                                       42.8544273376,
+                                       41.5275192261,
+                                       40.2001571655,
+                                       38.877155304,
+                                       37.5628471375,
+                                       36.2611160278,
+                                       34.9754295349,
+                                       33.7088127136,
+                                       32.6975517273,
+                                       31.7346553802,
+                                       30.7841281891,
+                                       29.8474140167,
+                                       28.9257545471,
+                                       56.1060714722,
+                                       56.0522842407,
+                                       55.8914489746,
+                                       55.625087738,
+                                       55.2557067871,
+                                       54.7867393494,
+                                       54.2224197388,
+                                       53.5677680969,
+                                       52.8284225464,
+                                       52.0104789734,
+                                       51.1205253601,
+                                       50.1653556824,
+                                       49.1519355774,
+                                       48.0872917175,
+                                       46.9784011841,
+                                       45.832118988,
+                                       44.6550750732,
+                                       43.4536628723,
+                                       42.2339172363,
+                                       41.0015525818,
+                                       39.7618980408,
+                                       38.5198707581,
+                                       37.2799949646,
+                                       36.0463790894,
+                                       34.8227424622,
+                                       33.6123847961,
+                                       32.6617202759,
+                                       31.7346553802,
+                                       30.8167037964,
+                                       29.9094753265,
+                                       29.0143985748,
+                                       28.1327323914,
+                                       53.1713485718,
+                                       53.1221389771,
+                                       52.974937439,
+                                       52.7310905457,
+                                       52.3927955627,
+                                       51.9630203247,
+                                       51.4454956055,
+                                       50.8446311951,
+                                       50.1653556824,
+                                       49.4130935669,
+                                       48.5936508179,
+                                       47.7130508423,
+                                       46.7775268555,
+                                       45.7933807373,
+                                       44.7668800354,
+                                       43.7042236328,
+                                       42.6114387512,
+                                       41.4943313599,
+                                       40.3584899902,
+                                       39.2091522217,
+                                       38.0512619019,
+                                       36.8894119263,
+                                       35.7278671265,
+                                       34.5704689026,
+                                       33.4466362,
+                                       32.5545730591,
+                                       31.6664142609,
+                                       30.7841281891,
+                                       29.9094753265,
+                                       29.0440425873,
+                                       28.1892433167,
+                                       27.3463401794,
+                                       50.3901405334,
+                                       50.3450584412,
+                                       50.2101821899,
+                                       49.9866943359,
+                                       49.6765098572,
+                                       49.2822341919,
+                                       48.8071632385,
+                                       48.2551269531,
+                                       47.6305236816,
+                                       46.938117981,
+                                       46.1830673218,
+                                       45.3707275391,
+                                       44.5066757202,
+                                       43.5965652466,
+                                       42.6460189819,
+                                       41.6606483459,
+                                       40.6459236145,
+                                       39.6071586609,
+                                       38.5494613647,
+                                       37.4776496887,
+                                       36.396320343,
+                                       35.3097419739,
+                                       34.2218933105,
+                                       33.2245254517,
+                                       32.3772735596,
+                                       31.5306549072,
+                                       30.6867351532,
+                                       29.8474140167,
+                                       29.0143985748,
+                                       28.1892433167,
+                                       27.3733463287,
+                                       26.5679397583,
+                                       47.7544021606,
+                                       47.7130508423,
+                                       47.5893363953,
+                                       47.3842887878,
+                                       47.0995864868,
+                                       46.7375259399,
+                                       46.300994873,
+                                       45.7933807373,
+                                       45.2185592651,
+                                       44.5807800293,
+                                       43.8845863342,
+                                       43.1347961426,
+                                       42.336353302,
+                                       41.4943313599,
+                                       40.6138343811,
+                                       39.699886322,
+                                       38.7575035095,
+                                       37.7914886475,
+                                       36.8065338135,
+                                       35.8070869446,
+                                       34.7973823547,
+                                       33.7813949585,
+                                       32.9320831299,
+                                       32.1316642761,
+                                       31.328754425,
+                                       30.5255126953,
+                                       29.723903656,
+                                       28.9257545471,
+                                       28.1327323914,
+                                       27.3463401794,
+                                       26.5679397583,
+                                       25.7987575531}};
+
+ap_uint<24> inv_matrix_8_fix[3][64] = {
+    {0,      573440, 571914, 500934, 438764, 384309, 336613, 294836, 573440, 573440, 554300, 490277, 431664,
+     379299, 332941, 292072, 571914, 554300, 512454, 462308, 412154, 365186, 322442, 284093, 500934, 490277,
+     462308, 424880, 384309, 344238, 306460, 271733, 438764, 431664, 412154, 384309, 352272, 319103, 286702,
+     256122, 384309, 379299, 365186, 344238, 319103, 292072, 264820, 238433, 336613, 332941, 322442, 306460,
+     286702, 264820, 242160, 219708, 294836, 292072, 284093, 271733, 256122, 238433, 219708, 200777},
+    {0,       3225600, 3214600, 2712197, 2288313, 1930677, 1628936, 1374353, 3225600, 3225600, 3088189,
+     2638422, 2240937, 1898460, 1606185, 1357854, 3214600, 3088189, 2792443, 2446967, 2111879, 1808349,
+     1541522, 1310462, 2712197, 2638422, 2446967, 2195873, 1930677, 1676412, 1444223, 1237800, 2288313,
+     2240937, 2111879, 1930677, 1726752, 1521076, 1325921, 1086047, 1930677, 1898460, 1808349, 1676412,
+     1521076, 1357854, 1197559, 804826,  1628936, 1606185, 1541522, 1444223, 1325921, 1197559, 858830,
+     571430,  1374353, 1357854, 1310462, 1237800, 1086047, 804826,  571430,  391838},
+    {0,     301014, 173537, 122278, 87381,  87381,  85556, 60284, 301014, 239204, 159771, 115525, 87381,
+     87381, 83112,  58803,  173537, 159771, 129848, 98919, 87381, 87381,  76366,  54653,  122278, 115525,
+     98919, 87381,  87381,  87381,  66768,  48594,  87381, 87381, 87381,  87381,  87381,  74294,  55990,
+     40365, 87381,  87381,  87381,  87381,  74294,  58803, 45395, 29913,  85556,  83112,  76366,  66768,
+     55990, 45395,  31920,  21238,  60284,  58803,  54653, 48594, 40365,  29913,  21238,  14563}};
+
+ap_uint<24> inv_matrix_16_fix[3][256] = {
+    {0,       0,       2441638, 2110453, 1805935, 1527539, 1292059, 1092880, 979637,  882036,  794159,  720192,
+     660388,  605551,  555267,  513377,  0,       0,       2359049, 2061117, 1769095, 1502424, 1274280, 1079954,
+     973257,  876924,  790013,  717366,  658011,  603538,  553552,  512061,  2441638, 2359049, 2163900, 1929223,
+     1668681, 1432145, 1223726, 1056519, 954648,  861946,  777824,  709020,  650980,  597575,  548467,  508152,
+     2110453, 2061117, 1929223, 1734043, 1527539, 1329150, 1147596, 1019948, 925274,  838103,  758296,  695541,
+     639580,  587878,  541090,  501757,  1805935, 1769095, 1668681, 1527539, 1368461, 1207734, 1064198, 973257,
+     887210,  806857,  734675,  677512,  624254,  574785,  531076,  493051,  1527539, 1502424, 1432145, 1329150,
+     1207734, 1079954, 999342,  919628,  842764,  769898,  709020,  655651,  605551,  558724,  518704,  482257,
+     1292059, 1274280, 1223726, 1147596, 1064198, 999342,  930994,  861946,  794159,  731730,  680030,  630735,
+     584081,  541090,  504297,  469635,  1092880, 1079954, 1056519, 1019948, 973257,  919628,  861946,  802580,
+     743657,  695541,  648668,  603538,  560468,  522767,  488199,  455467,  979637,  973257,  954648,  925274,
+     887210,  842764,  794159,  743657,  700870,  658011,  615808,  574785,  536755,  503024,  470758,  440042,
+     882036,  876924,  861946,  838103,  806857,  769898,  731730,  695541,  658011,  620003,  582200,  545493,
+     513377,  482257,  452306,  423640,  794159,  790013,  777824,  758296,  734675,  709020,  680030,  648668,
+     615808,  582200,  548467,  518704,  489404,  460825,  433156,  404651,  720192,  717366,  709020,  695541,
+     677512,  655651,  630735,  603538,  574785,  545493,  518704,  491830,  465193,  439046,  412893,  384233,
+     660388,  658011,  650980,  639580,  624254,  605551,  584081,  560468,  536755,  513377,  489404,  465193,
+     441041,  417116,  389895,  363695,  605551,  603538,  597575,  587878,  574785,  558724,  541090,  522767,
+     503024,  482257,  460825,  439046,  417116,  391813,  367140,  343268,  555267,  553552,  548467,  541090,
+     531076,  518704,  504297,  488199,  470758,  452306,  433156,  412893,  389895,  367140,  344835,  323147,
+     513377,  512061,  508152,  501757,  493051,  482257,  469635,  455467,  440042,  423640,  404651,  384233,
+     363695,  343268,  323147,  303491},
+    {0,       0,       5751209, 4544049, 3799576, 3391573, 3027382, 2702299, 2436075, 2197740, 1982723, 1763518,
+     1534572, 1335348, 1161989, 974727,  0,       0,       5440084, 4373603, 3746811, 3353638, 2999054, 2680571,
+     2420510, 2185243, 1972567, 1752429, 1525721, 1328236, 1156239, 968844,  5751209, 5440084, 4731487, 3973698,
+     3601163, 3246381, 2917801, 2623494, 2375098, 2148620, 1942705, 1719843, 1499653, 1307247, 1139247, 951484,
+     4544049, 4373603, 3973698, 3696279, 3391573, 3086079, 2793372, 2534376, 2303382, 2090297, 1894846, 1667696,
+     1457751, 1273387, 1103434, 923481,  3799576, 3746811, 3601163, 3391573, 3147719, 2891875, 2642200, 2420510,
+     2210386, 2013818, 1820750, 1598900, 1402122, 1228203, 1055862, 886125,  3391573, 3353638, 3246381, 3086079,
+     2891875, 2680571, 2484135, 2289592, 2101702, 1923283, 1719843, 1516952, 1335348, 1173618, 998753,  841041,
+     3027382, 2999054, 2917801, 2793372, 2642200, 2484135, 2317350, 2148620, 1982723, 1809059, 1608443, 1425546,
+     1260221, 1103434, 934545,  790037,  2702299, 2680571, 2623494, 2534376, 2420510, 2289592, 2148620, 2003346,
+     1856592, 1667696, 1491121, 1328236, 1179499, 1017307, 865695,  734963,  2436075, 2420510, 2375098, 2303382,
+     2210386, 2101702, 1982723, 1856592, 1688240, 1525721, 1371816, 1228203, 1082691, 928991,  794499,  677590,
+     2197740, 2185243, 2148620, 2090297, 2013818, 1923283, 1809059, 1667696, 1525721, 1386839, 1253715, 1124732,
+     974727,  841041,  722986,  619515,  1982723, 1972567, 1942705, 1894846, 1820750, 1719843, 1608443, 1491121,
+     1371816, 1253715, 1139247, 998752,  870742,  755519,  652842,  544631,  1763518, 1752429, 1719843, 1667696,
+     1598900, 1516952, 1425546, 1328236, 1228203, 1124732, 998752,  880957,  772520,  673980,  578732,  465986,
+     1534572, 1525721, 1499653, 1457751, 1402122, 1335348, 1260221, 1179499, 1082691, 974727,  870742,  772520,
+     681226,  596747,  486978,  394922,  1335348, 1328236, 1307247, 1273387, 1228203, 1173618, 1103434, 1017307,
+     928991,  841041,  755519,  673980,  596747,  494226,  406298,  331816,  1161989, 1156239, 1139247, 1103434,
+     1055862, 998753,  934545,  865695,  794499,  722986,  652842,  578732,  486978,  406298,  336400,  276619,
+     974727,  968844,  951484,  923481,  886125,  841041,  790037,  734963,  677590,  619515,  544631,  465986,
+     394922,  331816,  276619,  228975},
+    {0,      0,      630388, 459728, 346040, 270138, 210885, 164629, 145236, 129332, 115170, 103181, 93307,  84378,
+     76303,  60309,  0,      0,      585115, 436769, 335651, 263595, 206607, 161757, 144191, 128504, 114506, 102711,
+     92918,  84053,  76030,  59794,  630388, 585115, 485316, 381545, 307860, 245565, 194600, 157883, 141148, 126081,
+     112555, 101326, 91767,  83090,  75220,  58282,  459728, 436769, 381545, 325861, 270138, 219900, 176964, 151854,
+     136356, 122232, 109437, 99095,  89905,  81527,  71893,  55867,  346040, 335651, 307860, 270138, 229587, 190851,
+     159151, 144191, 130171, 117207, 105593, 96120,  87409,  79424,  67542,  52693,  270138, 263595, 245565, 219900,
+     190851, 161757, 148468, 135437, 122984, 111288, 101326, 92531,  84378,  76855,  62426,  48936,  210885, 206607,
+     194600, 176964, 159151, 148468, 137288, 126081, 115170, 105102, 96535,  88463,  80917,  71893,  56818,  44786,
+     164629, 161757, 157883, 151854, 144191, 135437, 126081, 116520, 107092, 99095,  91389,  84053,  77133,  64075,
+     50981,  40428,  145236, 144191, 141148, 136356, 130171, 122984, 115170, 107092, 99976,  92918,  86039,  79424,
+     69986,  56340,  45145,  36032,  129332, 128504, 126081, 122232, 117207, 111288, 105102, 99095,  92918,  86719,
+     80614,  73866,  60309,  48936,  39498,  31736,  115170, 114506, 112555, 109437, 105593, 101326, 96535,  91389,
+     86039,  80614,  75220,  62426,  51402,  42040,  34182,  25375,  103181, 102711, 101326, 99095,  96120,  92531,
+     88463,  84053,  79424,  73866,  62426,  52258,  43386,  35760,  28391,  19017,  93307,  92918,  91767,  89905,
+     87409,  84378,  80917,  77133,  69986,  60309,  51402,  43386,  36306,  30047,  20632,  14004,  84378,  84053,
+     83090,  81527,  79424,  76855,  71893,  64075,  56340,  48936,  42040,  35760,  30047,  21203,  14759,  10149,
+     76303,  76030,  75220,  71893,  67542,  62426,  56818,  50981,  45145,  39498,  34182,  28391,  20632,  14759,
+     10410,  7249,   60309,  59794,  58282,  55867,  52693,  48936,  44786,  40428,  36032,  31736,  25375,  19017,
+     14004,  10149,  7249,   5111}};
+
+ap_uint<24> inv_matrix_32_fix[3][1024] = {
+    {0,       0,       0,       0,       5131959, 4670491, 4250519, 3878761, 3542927, 3236170, 2955973, 2700037,
+     2466260, 2274087, 2118957, 1974409, 1839722, 1714223, 1597285, 1490256, 1397152, 1309866, 1228032, 1151311,
+     1079383, 1011949, 954704,  901007,  850330,  802503,  757366,  714767,  0,       0,       0,       0,
+     5072775, 4627118, 4217502, 3853877, 3523007, 3219979, 2942652, 2688968, 2456987, 2267924, 2113623, 1969770,
+     1835669, 1710667, 1594156, 1487730, 1394902, 1307856, 1226234, 1149698, 1077934, 1010645, 953642,  900042,
+     849451,  801702,  756636,  714101,  0,       0,       0,       0,       4908661, 4504031, 4123432, 3781605,
+     3464803, 3172463, 2903431, 2656298, 2429565, 2249644, 2097784, 1955978, 1823608, 1700081, 1584833, 1480198,
+     1388190, 1301861, 1220866, 1144883, 1073606, 1006935, 950469,  897157,  846825,  799309,  754452,  712106,
+     0,       0,       0,       0,       4670491, 4318764, 3982627, 3668379, 3372610, 3096591, 2840422, 2603565,
+     2385137, 2219847, 2071905, 1933401, 1803834, 1682702, 1569509, 1467797, 1377130, 1291973, 1212008, 1136931,
+     1066456, 1001153, 945218,  892382,  842477,  795345,  750834,  708800,  5131959, 5072775, 4908661, 4670491,
+     4390194, 4094197, 3805312, 3523007, 3252544, 2996713, 2756785, 2533105, 2331170, 2179470, 2036722, 1902624,
+     1776815, 1658906, 1548492, 1450748, 1361905, 1278347, 1199788, 1125953, 1056576, 993151,  937947,  885767,
+     836451,  789848,  745815,  704212,  4670491, 4627118, 4504031, 4318764, 4094197, 3853877, 3604336, 3354843,
+     3111448, 2877883, 2656298, 2447781, 2274087, 2129705, 1993181, 1864403, 1743163, 1629194, 1522192, 1429345,
+     1342761, 1261188, 1184381, 1112095, 1044091, 983020,  928735,  877378,  828804,  782869,  739438,  698619,
+     4250519, 4217502, 4123432, 3982627, 3805312, 3604336, 3390593, 3172463, 2955973, 2745243, 2542942, 2350878,
+     2208155, 2071905, 1942371, 1819621, 1703597, 1594156, 1492790, 1403941, 1319993, 1240747, 1165998, 1095537,
+     1029156, 970871,  917676,  867300,  819609,  774471,  731761,  692068,  3878761, 3853877, 3781605, 3668379,
+     3523007, 3354843, 3172463, 2983002, 2792006, 2603565, 2420554, 2267924, 2135121, 2007491, 1885454, 1769234,
+     1658906, 1554446, 1460446, 1374937, 1293940, 1217309, 1144883, 1076488, 1011949, 956834,  904884,  855630,
+     808951,  764729,  722846,  684452,  3542927, 3523007, 3464803, 3372610, 3252544, 3111448, 2955973, 2792006,
+     2624408, 2456987, 2305432, 2179470, 2056680, 1937876, 1823608, 1714223, 1609913, 1510754, 1424666, 1342761,
+     1264967, 1191187, 1121302, 1055177, 994288,  941052,  890484,  842477,  796927,  753726,  712770,  675828,
+     3236170, 3219979, 3172463, 3096591, 2996713, 2877883, 2745243, 2603565, 2456987, 2318226, 2202356, 2087355,
+     1974409, 1864403, 1757973, 1655557, 1557438, 1467797, 1385966, 1307856, 1233454, 1162707, 1095537, 1031846,
+     975260,  923681,  874611,  827961,  783640,  741554,  701628,  666259,  2955973, 2942652, 2903431, 2840422,
+     2756785, 2656298, 2542942, 2420554, 2305432, 2202356, 2097784, 1993181, 1889719, 1788302, 1689615, 1594156,
+     1503004, 1422336, 1344864, 1270671, 1199788, 1132204, 1067880, 1006935, 954704,  904884,  857409,  812207,
+     769201,  728312,  690299,  655815,  2700037, 2688968, 2656298, 2603565, 2533105, 2447781, 2350878, 2267924,
+     2179470, 2087355, 1993181, 1898303, 1803834, 1710668, 1619502, 1530871, 1450748, 1374937, 1301861, 1231642,
+     1164351, 1100012, 1038620, 983020,  932811,  884828,  839025,  795345,  753726,  714101,  678111,  644568,
+     2466260, 2456987, 2429565, 2385137, 2331170, 2274087, 2208155, 2135121, 2056680, 1974409, 1889719, 1803834,
+     1717792, 1632448, 1548492, 1470262, 1397152, 1326135, 1257429, 1191187, 1127510, 1066456, 1008099, 957902,
+     909769,  863682,  819609,  777508,  737332,  699219,  665148,  632591,  2274087, 2267924, 2249644, 2219847,
+     2179470, 2129705, 2071905, 2007491, 1937876, 1864403, 1788302, 1710668, 1632448, 1554446, 1480198, 1410789,
+     1342761, 1276421, 1212008, 1149698, 1089617, 1031846, 979682,  931789,  885767,  841612,  799309,  758829,
+     720138,  684452,  651494,  619962,  2118957, 2113623, 2097784, 2071905, 2036722, 1993181, 1942371, 1885454,
+     1823608, 1757973, 1689615, 1619502, 1548492, 1480198, 1415388, 1351210, 1288054, 1226234, 1165998, 1107538,
+     1050997, 997710,  950469,  904884,  860984,  818781,  778271,  739438,  702260,  669051,  637236,  606756,
+     1974409, 1969770, 1955978, 1933401, 1902624, 1864403, 1819621, 1769234, 1714223, 1655557, 1594156, 1530871,
+     1470262, 1410789, 1351210, 1291973, 1233454, 1175965, 1119759, 1065035, 1011949, 965432,  920670,  877378,
+     835596,  795345,  756636,  719463,  685033,  653109,  622456,  593050,  1839722, 1835669, 1823608, 1803834,
+     1776815, 1743163, 1703597, 1658906, 1609913, 1557438, 1503004, 1450748, 1397152, 1342761, 1288054, 1233454,
+     1179319, 1125953, 1073606, 1022482, 976362,  932811,  890484,  849451,  809763,  771453,  734538,  699219,
+     667374,  636717,  607238,  578919,  1714223, 1710667, 1700081, 1682702, 1658906, 1629194, 1594156, 1554446,
+     1510754, 1467797, 1422336, 1374937, 1326135, 1276421, 1226234, 1175965, 1125953, 1076488, 1027816, 983020,
+     941052,  900042,  860088,  821268,  783640,  747243,  712106,  679831,  649351,  619962,  591660,  564435,
+     1597285, 1594156, 1584833, 1569509, 1548492, 1522192, 1492790, 1460446, 1424666, 1385966, 1344864, 1301861,
+     1257429, 1212008, 1165998, 1119759, 1073606, 1027816, 985257,  945218,  905857,  867300,  829648,  792982,
+     757366,  722846,  690299,  660181,  631055,  602927,  575800,  549669,  1490256, 1487730, 1480198, 1467797,
+     1450748, 1429345, 1403941, 1374937, 1342761, 1307856, 1270671, 1231642, 1191187, 1149698, 1107538, 1065035,
+     1022482, 983020,  945218,  907810,  870943,  834742,  799309,  764729,  731069,  698619,  669051,  640362,
+     612571,  585692,  559730,  534687,  1397152, 1394902, 1388190, 1377130, 1361905, 1342761, 1319993, 1293940,
+     1264967, 1233454, 1199788, 1164351, 1127510, 1089617, 1050997, 1011949, 976362,  941052,  905857,  870943,
+     836451,  802503,  769201,  736632,  704865,  675828,  647750,  620459,  593980,  568330,  543519,  519552,
+     1309866, 1307856, 1301861, 1291973, 1278347, 1261188, 1240747, 1217309, 1191187, 1162707, 1132204, 1100012,
+     1066456, 1031846, 997710,  965432,  932811,  900042,  867300,  834742,  802503,  770701,  739438,  708800,
+     680406,  653109,  626480,  600553,  575356,  550911,  527232,  504325,  1228032, 1226234, 1220866, 1212008,
+     1199788, 1184381, 1165998, 1144883, 1121302, 1095537, 1067880, 1038620, 1008099, 979682,  950469,  920670,
+     890484,  860088,  829648,  799309,  769201,  739438,  710120,  682713,  656358,  630543,  605316,  580713,
+     556767,  533499,  510927,  492344,  1151311, 1149698, 1144883, 1136931, 1125953, 1112095, 1095537, 1076488,
+     1055177, 1031846, 1006935, 983020,  957902,  931789,  904884,  877378,  849451,  821268,  792982,  764729,
+     736632,  708800,  682713,  657447,  632591,  608202,  584327,  561007,  538274,  516151,  496368,  481317,
+     1079383, 1077934, 1073606, 1066456, 1056576, 1044091, 1029156, 1011949, 994288,  975260,  954704,  932811,
+     909769,  885767,  860984,  835596,  809763,  783640,  757366,  731069,  704865,  680406,  656358,  632591,
+     609169,  586147,  563576,  541493,  519932,  499422,  484703,  470233,  1011949, 1010645, 1006935, 1001153,
+     993151,  983020,  970871,  956834,  941052,  923681,  904884,  884828,  863682,  841612,  818781,  795345,
+     771453,  747243,  722846,  698619,  675828,  653109,  630543,  608202,  586147,  564435,  543113,  522221,
+     501793,  487146,  473022,  459122,  954704,  953642,  950469,  945218,  937947,  928735,  917676,  904884,
+     890484,  874611,  857409,  839025,  819609,  799309,  778271,  756636,  734538,  712106,  690299,  669051,
+     647750,  626480,  605316,  584327,  563576,  543113,  522987,  503238,  488622,  474897,  461354,  448010,
+     901007,  900042,  897157,  892382,  885767,  877378,  867300,  855630,  842477,  827961,  812207,  795345,
+     777508,  758829,  739438,  719463,  699219,  679831,  660181,  640362,  620459,  600553,  580713,  561007,
+     541493,  522221,  503238,  489115,  475839,  462701,  449723,  436922,  850330,  849451,  846825,  842477,
+     836451,  828804,  819609,  808951,  796927,  783640,  769201,  753726,  737332,  720138,  702260,  685033,
+     667374,  649351,  631055,  612571,  593980,  575356,  556767,  538274,  519932,  501793,  488622,  475839,
+     463152,  450584,  438155,  425882,  802503,  801702,  799309,  795345,  789848,  782869,  774471,  764729,
+     753726,  741554,  728312,  714101,  699219,  684452,  669051,  653109,  636717,  619962,  602927,  585692,
+     568330,  550911,  533499,  516151,  499422,  487146,  474897,  462701,  450584,  438567,  426670,  414910,
+     757366,  756636,  754452,  750834,  745815,  739438,  731761,  722846,  712770,  701628,  690299,  678111,
+     665148,  651494,  637236,  622456,  607238,  591660,  575800,  559730,  543519,  527232,  510927,  496368,
+     484703,  473022,  461354,  449723,  438155,  426670,  415287,  404024,  714767,  714101,  712106,  708800,
+     704212,  698619,  692068,  684452,  675828,  666259,  655815,  644568,  632591,  619962,  606756,  593050,
+     578919,  564435,  549669,  534687,  519552,  504325,  492344,  481317,  470233,  459122,  448010,  436922,
+     425882,  414910,  404024,  393243},
+    {0,        0,       0,       0,       10256567, 9163795, 8187452, 7334503, 6576615,  5897041,  5287688, 4741301,
+     4251374,  3823808, 3450992, 3114525, 2810863,  2536807, 2289472, 2087679, 1978480,  1874992,  1776918, 1683973,
+     1595891,  1512415, 1420110, 1332765, 1250792,  1173861, 1101662, 1033903, 0,        0,        0,       0,
+     10115302, 9062138, 8111468, 7277870, 6532108,  5861525, 5259001, 4717901, 4232130,  3808773,  3438387, 3103905,
+     2801875,  2529171, 2282962, 2084732, 1975826,  1872597, 1774751, 1682009, 1594106,  1510791,  1418378, 1331200,
+     1249376,  1172578, 1100497, 1032846, 0,        0,       0,       0,       9725277,  8774666,  7895240, 7113812,
+     6402356,  5757510, 5174699, 4648953, 4175309,  3764280, 3401038, 3072400, 2775187,  2506479,  2263603, 2075939,
+     1967905,  1865445, 1768278, 1676139, 1588774,  1505526, 1413203, 1326521, 1245141,  1168739,  1097015, 1029682,
+     0,        0,       0,       0,       9163795,  8344872, 7571685, 6858080, 6197748,  5592084,  5039753, 4538028,
+     4083526,  3692107, 3340291, 3021045, 2731603,  2469362, 2231895, 2061445, 1954839,  1853638,  1757587, 1666439,
+     1579957,  1496049, 1404644, 1318782, 1238133,  1162385, 1091248, 1024442, 10256567, 10115302, 9725277, 9163795,
+     8510160,  7827871, 7167557, 6532108, 5932993,  5375578, 4861571, 4390531, 3963957,  3595004,  3258255, 2951472,
+     2672398,  2418824, 2188634, 2041485, 1936825,  1837343, 1742818, 1653028, 1567758,  1482942,  1392801, 1308065,
+     1228424,  1153579, 1083251, 1017173, 9163795,  9062138, 8774666, 8344872, 7827871,  7277870,  6714142, 6158448,
+     5624412,  5119910, 4648953, 4213040, 3823808,  3476438, 3157611, 2865780, 2599225,  2356177,  2134869, 2016372,
+     1914124,  1816782, 1724159, 1636067, 1552316,  1466362, 1377806, 1294488, 1216114,  1142407,  1073100, 1008579,
+     8187452,  8111468, 7895240, 7571685, 7167557,  6714142, 6237570, 5757510, 5287688,  4837070,  4411073, 4012705,
+     3663905,  3340291, 3041417, 2766381, 2514008,  2282962, 2090635, 1986483, 1887057,  1792226,  1701844, 1615756,
+     1533801,  1446502, 1359827, 1278192, 1201328,  1128976, 1060888, 998743,  7334503,  7277870,  7113812, 6858080,
+     6532108,  6158448, 5757510, 5345971, 4936475,  4538028, 4156665, 3808773, 3489280,  3190579,  2912880, 2655858,
+     2418824,  2200862, 2052844, 1952246, 1855988,  1763987, 1676139, 1592325, 1512415,  1423584,  1339055, 1259345,
+     1184209,  1113412, 1046725, 987310,  6576615,  6532108, 6402356, 6197748, 5932993,  5624412,  5287688, 4936475,
+     4581817,  4232130, 3900570, 3595003, 3304714,  3031203, 2775187, 2536808, 2315800,  2111617,  2010873, 1914124,
+     1821314,  1732406, 1647339, 1566029, 1484803,  1397857, 1315706, 1238133, 1164921,  1095858,  1030735, 974370,
+     5897041,  5861525, 5757510, 5592084, 5375578,  5119910, 4837070, 4538028, 4232130,  3932039,  3649944, 3376518,
+     3114525,  2865780, 2631349, 2411736, 2207015,  2061445, 1965279, 1872597, 1783449,  1697843,  1615756, 1537139,
+     1453673,  1369587, 1290011, 1214758, 1143640,  1076467, 1013100, 960022,  5287688,  5259001,  5174699, 5039753,
+     4861571,  4648953, 4411073, 4156665, 3900570,  3649944, 3401038, 3157611, 2922451,  2697519,  2484105, 2282962,
+     2102541,  2008134, 1916621, 1828151, 1742818,  1660667, 1581713, 1505526, 1420110,  1339055,  1262216, 1189436,
+     1120554,  1055406, 996086,  944370,  4741301,  4717901, 4648953, 4538028, 4390531,  4213040,  4012705, 3808773,
+     3595003,  3376518, 3157611, 2941747, 2731603,  2529172, 2335853, 2152565, 2041485,  1952246,  1865445, 1781267,
+     1699841,  1621251, 1545538, 1466362, 1384439,  1306546, 1232570, 1162385, 1095858,  1032846,  977795,  927525,
+     4251374,  4232130, 4175309, 4083526, 3963957,  3823808, 3663905, 3489280, 3304714,  3114525,  2922451, 2731603,
+     2544480,  2363013, 2188634, 2064328, 1978480,  1894367, 1812271, 1732406, 1654932,  1579957,  1507434, 1425326,
+     1346984,  1272346, 1201328, 1133832, 1069748,  1009480, 958355,  909602,  3823808,  3808773,  3764280, 3692107,
+     3595004,  3476438, 3340291, 3190579, 3031203,  2865780, 2697519, 2529172, 2363013,  2200862,  2075939, 1994549,
+     1914124,  1835037, 1757587, 1682009, 1608481,  1537139, 1460902, 1382776, 1308065,  1236738,  1168739, 1103997,
+     1042424,  987310,  937898,  890716,  3450992,  3438387, 3401038, 3340291, 3258255,  3157611,  3041417, 2912880,
+     2775187,  2631349, 2484105, 2335853, 2188634,  2075939, 1999962, 1924150, 1848955,  1774751,  1701844, 1630482,
+     1560861,  1490409, 1413203, 1339055, 1267989,  1199997, 1135051, 1073100, 1014081,  964208,   916551,  870986,
+     3114525,  3103905, 3072400, 3021045, 2951472,  2865780, 2766381, 2655858, 2536808,  2411736,  2282962, 2152565,
+     2064328,  1994549, 1924150, 1853638, 1783449,  1713950, 1645451, 1578204, 1512415,  1437617,  1364693, 1294488,
+     1227047,  1162385, 1100497, 1041353, 988182,   940317,  894445,  850526,  2810863,  2801875,  2775187, 2731603,
+     2672398,  2599225, 2514008, 2418824, 2315800,  2207015, 2102541, 2041485, 1978480,  1914124,  1848955, 1783449,
+     1718021,  1653028, 1588774, 1525514, 1455475,  1384439, 1315706, 1249376, 1185512,  1124152,  1065304, 1009480,
+     961693,   915775,  871705,  829452,  2536807,  2529171, 2506479, 2469362, 2418824,  2356177,  2282962, 2200862,
+     2111617,  2061445, 2008134, 1952246, 1894367,  1835037, 1774751, 1713950, 1653028,  1592325,  1532138, 1466362,
+     1397857,  1331200, 1266542, 1203995, 1143640,  1085527, 1029682, 980376,  934687,   890716,   848451,  807872,
+     2289472,  2282962, 2263603, 2231895, 2188634,  2134869, 2090635, 2052844, 2010873,  1965279,  1916621, 1865445,
+     1812271,  1757587, 1701844, 1645451, 1588774,  1532138, 1470021, 1404644, 1340635,  1278192,  1217473, 1158599,
+     1101662,  1046725, 996086,  950912,  907303,   865268,  824802,  785895,  2087679,  2084732,  2075939, 2061445,
+     2041485,  2016372, 1986483, 1952246, 1914124,  1872597, 1828151, 1781267, 1732406,  1682009,  1630482, 1578204,
+     1525514,  1466362, 1404644, 1343804, 1284080,  1225671, 1168739, 1113412, 1059788,  1008579,  964208,  921230,
+     879672,   839549,  800867,  763621,  1978480,  1975826, 1967905, 1954839, 1936825,  1914124,  1887057, 1855988,
+     1821314,  1783449, 1742818, 1699841, 1654932,  1608481, 1560861, 1512415, 1455475,  1397857,  1340635, 1284080,
+     1228424,  1173861, 1120554, 1068634, 1018207,  974370,  932290,  891460,  851914,   813673,   776750,  741147,
+     1874992,  1872597, 1865445, 1853638, 1837343,  1816782, 1792226, 1763987, 1732406,  1697843,  1660667, 1621251,
+     1579957,  1537139, 1490409, 1437617, 1384439,  1331200, 1278192, 1225671, 1173861,  1122950,  1073100, 1024442,
+     981238,   940316,  900461,  861723,  824141,   787744,  752548,  718562,  1776918,  1774751,  1768278, 1757587,
+     1742818,  1724159, 1701844, 1676139, 1647339,  1615756, 1581713, 1545538, 1507434,  1460902,  1413203, 1364693,
+     1315706,  1266542, 1217473, 1168739, 1120554,  1073100, 1026533, 984701,  945184,   906539,   868835,  832126,
+     796456,   761857,  728350,  701409,  1683973,  1682009, 1676139, 1666439, 1653028,  1636067,  1615756, 1592325,
+     1566029,  1537139, 1505526, 1466362, 1425326,  1382776, 1339055, 1294488, 1249376,  1203995,  1158599, 1113412,
+     1068634,  1024442, 984701,  946815,  909602,   873145,  837515,  802768,  768952,   736100,   707081,  685862,
+     1595891,  1594106, 1588774, 1579957, 1567758,  1552316, 1533801, 1512415, 1484803,  1453673,  1420110, 1384439,
+     1346984,  1308065, 1267989, 1227047, 1185512,  1143640, 1101662, 1059788, 1018207,  981238,   945184,  909602,
+     874589,   840229,  806592,  773737,  741711,   711387,  690636,  670231,  1512415,  1510791,  1505526, 1496049,
+     1482942,  1466362, 1446502, 1423584, 1397857,  1369587, 1339055, 1306546, 1272346,  1236738,  1199997, 1162385,
+     1124152,  1085527, 1046725, 1008579, 974370,   940316,  906539,  873145,  840229,   807872,   776146,  745108,
+     714808,   694081,  674165,  654558,  1420110,  1418378, 1413203, 1404644, 1392801,  1377806,  1359827, 1339055,
+     1315706,  1290011, 1262216, 1232570, 1201328,  1168739, 1135051, 1100497, 1065304,  1029682,  996086,  964208,
+     932290,   900461,  868835,  837515,  806592,   776146,  746245,  716949,  696161,   676809,   657707,  638880,
+     1332765,  1331200, 1326521, 1318782, 1308065,  1294488, 1278192, 1259345, 1238133,  1214758,  1189436, 1162385,
+     1133832,  1103997, 1073100, 1041353, 1009480,  980376,  950912,  921230,  891460,   861723,   832126,  802768,
+     773737,   745108,  716949,  696857,  678137,   659608,  641298,  623233,  1250792,  1249376,  1245141, 1238133,
+     1228424,  1216114, 1201328, 1184209, 1164921,  1143640, 1120554, 1095858, 1069748,  1042424,  1014081, 988182,
+     961693,   934687,  907303,  879672,  851914,   824141,  796456,  768952,  741711,   714808,   696161,  678137,
+     660244,   642513,  624972,  607647,  1173861,  1172578, 1168739, 1162385, 1153579,  1142407,  1128976, 1113412,
+     1095858,  1076467, 1055406, 1032846, 1009480,  987310,  964208,  940317,  915775,   890716,   865268,  839549,
+     813673,   787744,  761857,  736100,  711387,   694081,  676809,  659608,  642513,   625554,   608759,  592154,
+     1101662,  1100497, 1097015, 1091248, 1083251,  1073100, 1060888, 1046725, 1030735,  1013100,  996086,  977795,
+     958355,   937898,  916551,  894445,  871705,   848451,  824802,  800867,  776750,   752548,   728350,  707081,
+     690636,   674165,  657707,  641298,  624972,   608759,  592687,  576779,  1033903,  1032846,  1029682, 1024442,
+     1017173,  1008579, 998743,  987310,  974370,   960022,  944370,  927525,  909602,   890716,   870986,  850526,
+     829452,   807872,  785895,  763621,  741147,   718562,  701409,  685862,  670231,   654558,   638880,  623233,
+     607647,   592154,  576779,  561546},
+    {0,       0,       0,       0,       1591422, 1272360, 1017266, 841460,  704536, 589892,  493904,  413535,  346244,
+     290031,  243063,  203702,  170715,  143069,  119901,  102775,  95833,   89361,  83325,   77697,   72450,   67557,
+     63969,   60623,   57452,   54447,   51599,   48900,   0,       0,       0,      0,       1548184, 1244480, 998601,
+     830906,  696789,  584118,  489548,  410217,  343695,  288069,  241537,  202507, 169776,  142329,  119314,  102586,
+     95666,   89212,   83193,   77580,   72344,   67462,   63903,   60563,   57397,  54397,   51553,   48858,   0,
+     0,       0,       0,       1431876, 1167297, 948704,  800624,  674393,  567335, 476834,  400500,  336213,  282299,
+     237036,  198980,  167000,  140136,  117577,  102023,  95167,   88769,   82798,  77227,   72030,   67211,   63706,
+     60383,   57232,   54246,   51415,   48731,   0,       0,       0,       0,      1272360, 1056482, 886216,  754291,
+     639652,  541032,  456753,  385057,  324262,  273042,  229792,  193287,  162509, 136582,  114755,  101097,  94345,
+     88038,   82147,   76646,   71510,   66853,   63379,   60084,   56960,   53996,  51186,   48521,   1591422, 1548184,
+     1431876, 1272360, 1098449, 935558,  810496,  696789,  595760,  507341,  430751, 364887,  308578,  260793,  220159,
+     185686,  156491,  131804,  110952,  99824,   93215,   87032,   81250,   75844,  70792,   66357,   62926,   59671,
+     56581,   53650,   50868,   48229,   1272360, 1244480, 1167297, 1056482, 935558, 830906,  728683,  633061,  546134,
+     468641,  400500,  341174,  290031,  246158,  208579,  176499,  149185,  125980, 106300,  98229,   91796,   85767,
+     80120,   74833,   69886,   65728,   62353,   59146,   56101,   53210,   50465,  47859,   1017266, 998601,  948704,
+     886216,  810496,  728683,  646358,  567335,  493904,  427222,  367671,  315143, 269461,  229792,  195537,  166089,
+     140862,  119314,  102964,  96338,   90110,   84261,   78772,   73626,   68803,  64974,   61663,   58515,   55523,
+     52680,   49978,   47412,   841460,  830906,  800624,  754291,  696789,  633061, 567335,  502799,  441610,  385057,
+     333772,  288069,  247726,  212344,  181524,  154827,  131804,  112021,  100548, 94183,   88184,   82537,   77227,
+     72239,   67557,   64102,   60865,   57784,   54853,   52064,   49413,   46892,  704536,  696789,  674393,  639652,
+     595760,  546134,  493904,  441610,  391125,  343695,  300130,  260793,  225593, 194408,  167000,  143069,  122285,
+     104314,  97880,   91796,   86045,   80619,   75505,   70690,   66427,   63120,  59966,   56960,   54096,   51369,
+     48773,   46303,   589892,  584118,  567335,  541032,  507341,  468641,  427222, 385057,  343695,  304312,  267695,
+     234101,  203702,  176499,  152375,  131139,  112561,  101097,  95002,   89212,  83724,   78531,   73626,   68998,
+     65246,   62038,   58973,   56048,   53258,   50599,   48064,   45650,   493904, 489548,  476834,  456753,  430751,
+     400500,  367671,  333772,  300130,  267695,  237036,  208579,  182553,  159033, 137989,  119314,  103728,  97707,
+     91952,   86466,   81250,   76301,   71613,   67211,   63969,   60865,   57896,  55058,   52347,   49759,   47291,
+     44937,   413535,  410217,  400500,  385057,  364887,  341174,  315143,  288069, 260793,  234101,  208579,  184633,
+     162509,  142329,  124114,  107821,  99824,   94183,   88769,   83591,   78652,  73952,   69489,   65728,   62607,
+     59612,   56743,   53996,   51369,   48858,   46459,   44170,   346244,  343695, 336213,  324262,  308578,  290031,
+     269461,  247726,  225593,  203702,  182553,  162509,  143815,  126610,  110952, 101281,  95833,   90564,   85490,
+     80619,   75958,   71510,   67283,   64168,   61170,   58289,   55523,   52871,  50331,   47900,   45574,   43352,
+     290031,  288069,  282299,  273042,  260793,  246158,  229792,  212344,  194408, 176499,  159033,  142329,  126610,
+     112021,  102023,  96848,   91796,   86890,   82147,   77580,   73195,   68998,  65521,   62543,   59671,   56905,
+     54246,   51692,   49241,   46892,   44642,   42490,   243063,  241537,  237036, 229792,  220159,  208579,  195537,
+     181524,  167000,  152375,  137989,  124114,  110952,  102023,  97190,   92422,  87749,   83193,   78772,   74501,
+     70387,   66639,   63706,   60865,   58120,   55471,   52920,   50465,   48105,  45841,   43669,   41588,   203702,
+     202507,  198980,  193287,  185686,  176499,  166089,  154827,  143069,  131139, 119314,  107821,  101281,  96848,
+     92422,   88038,   83724,   79503,   75392,   71406,   67557,   64636,   61850,  59146,   56528,   53996,   51553,
+     49198,   46932,   44753,   42660,   40652,   170715,  169776,  167000,  162509, 156491,  149185,  140862,  131804,
+     122285,  112561,  103728,  99824,   95833,   91796,   87749,   83724,   79748,  75844,   72030,   68319,   65315,
+     62607,   59966,   57397,   54904,   52489,   50154,   47900,   45726,   43634,  41621,   39687,   143069,  142329,
+     140136,  136582,  131804,  125980,  119314,  112021,  104314,  101097,  97707,  94183,   90564,   86890,   83193,
+     79503,   75844,   72239,   68706,   65728,   63120,   60563,   58064,   55628,  53258,   50959,   48731,   46577,
+     44496,   42490,   40557,   38698,   119901,  119314,  117577,  114755,  110952, 106300,  102964,  100548,  97880,
+     95002,   91952,   88769,   85490,   82147,   78772,   75392,   72030,   68706,  65867,   63379,   60926,   58515,
+     56154,   53847,   51599,   49413,   47291,   45235,   43247,   41327,   39474,  37689,   102775,  102586,  102023,
+     101097,  99824,   98229,   96338,   94183,   91796,   89212,   86466,   83591,  80619,   77580,   74501,   71406,
+     68319,   65728,   63379,   61048,   58744,   56474,   54246,   52064,   49934,  47859,   45841,   43882,   41985,
+     40150,   38377,   36666,   95833,   95666,   95167,   94345,   93215,   91796,  90110,   88184,   86045,   83724,
+     81250,   78652,   75958,   73195,   70387,   67557,   65315,   63120,   60926,  58744,   56581,   54447,   52347,
+     50287,   48271,   46303,   44387,   42524,   40716,   38964,   37269,   35632,  89361,   89212,   88769,   88038,
+     87032,   85767,   84261,   82537,   80619,   78531,   76301,   73952,   71510,  68998,   66639,   64636,   62607,
+     60563,   58515,   56474,   54447,   52442,   50465,   48521,   46616,   44753,  42935,   41164,   39444,   37774,
+     36157,   34592,   83325,   83193,   82798,   82147,   81250,   80120,   78772,  77227,   75505,   73626,   71613,
+     69489,   67283,   65521,   63706,   61850,   59966,   58064,   56154,   54246,  52347,   50465,   48605,   46773,
+     44974,   43212,   41490,   39810,   38174,   36585,   35043,   33722,   77697,  77580,   77227,   76646,   75844,
+     74833,   73626,   72239,   70690,   68998,   67211,   65728,   64168,   62543,  60865,   59146,   57397,   55628,
+     53847,   52064,   50287,   48521,   46773,   45049,   43352,   41687,   40057,  38464,   36911,   35400,   34021,
+     32902,   72450,   72344,   72030,   71510,   70792,   69886,   68803,   67557,  66427,   65246,   63969,   62607,
+     61170,   59671,   58120,   56528,   54904,   53258,   51599,   49934,   48271,  46616,   44974,   43352,   41753,
+     40181,   38639,   37131,   35658,   34249,   33154,   32080,   67557,   67462,  67211,   66853,   66357,   65728,
+     64974,   64102,   63120,   62038,   60865,   59612,   58289,   56905,   55471,  53996,   52489,   50959,   49413,
+     47859,   46303,   44753,   43212,   41687,   40181,   38698,   37242,   35814,  34419,   33335,   32287,   31258,
+     63969,   63903,   63706,   63379,   62926,   62353,   61663,   60865,   59966,  58973,   57896,   56743,   55523,
+     54246,   52920,   51553,   50154,   48731,   47291,   45841,   44387,   42935,  41490,   40057,   38639,   37242,
+     35867,   34517,   33445,   32426,   31423,   30437,   60623,   60563,   60383,  60084,   59671,   59146,   58515,
+     57784,   56960,   56048,   55058,   53996,   52871,   51692,   50465,   49198,  47900,   46577,   45235,   43882,
+     42524,   41164,   39810,   38464,   37131,   35814,   34517,   33482,   32496,  31522,   30563,   29619,   57452,
+     57397,   57232,   56960,   56581,   56101,   55523,   54853,   54096,   53258,  52347,   51369,   50331,   49241,
+     48105,   46932,   45726,   44496,   43247,   41985,   40716,   39444,   38174,  36911,   35658,   34419,   33445,
+     32496,   31556,   30627,   29710,   28807,   54447,   54397,   54246,   53996,  53650,   53210,   52680,   52064,
+     51369,   50599,   49759,   48858,   47900,   46892,   45841,   44753,   43634,  42490,   41327,   40150,   38964,
+     37774,   36585,   35400,   34249,   33335,   32426,   31522,   30627,   29741,  28865,   28002,   51599,   51553,
+     51415,   51186,   50868,   50465,   49978,   49413,   48773,   48064,   47291,  46459,   45574,   44642,   43669,
+     42660,   41621,   40557,   39474,   38377,   37269,   36157,   35043,   34021,  33154,   32287,   31423,   30563,
+     29710,   28865,   28030,   27205,   48900,   48858,   48731,   48521,   48229,  47859,   47412,   46892,   46303,
+     45650,   44937,   44170,   43352,   42490,   41588,   40652,   39687,   38698,  37689,   36666,   35632,   34592,
+     33722,   32902,   32080,   31258,   30437,   29619,   28807,   28002,   27205,  26417}};
+
+const uint8_t LUTCeilLog2Nonzero[1024] = {
+    127, 0,  1,  2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+    5,   5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+    6,   6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+    7,   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+    7,   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+    8,   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+    8,   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+    8,   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+    8,   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10};
+
+//==========================================================//
+// load data
+//==========================================================//
+void load_dct8_pixel(unsigned ysize, unsigned xsize, float* axi_opsin, hls::stream<float>& opsin8x8_stream) {
+#pragma HLS INLINE off
+
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+
+loop_load_dct8_pixel:
+    for (int i = 0; i < ysize64 * xsize64; i++) {
+        uint32_t addr = i * 4096 * 3;
+        for (int j = 0; j < 4096 * 3; j++) {
+#pragma HLS PIPELINE II = 1
+            float reg = axi_opsin[addr + j];
+            opsin8x8_stream.write(reg);
+        }
+    }
+}
+
+void load_dct16_pixel(unsigned ysize, unsigned xsize, float* axi_opsin, hls::stream<float>& opsin16x16_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+
+loop_load_dct16_pixel:
+    for (int i = 0; i < ysize64 * xsize64; i++) {
+        uint32_t addr = i * 4096 * 3;
+        for (int j = 0; j < 4096 * 3; j++) {
+#pragma HLS PIPELINE II = 1
+            float reg = axi_opsin[addr + j];
+            opsin16x16_stream.write(reg);
+        }
+    }
+}
+
+void load_dct32_pixel(unsigned ysize, unsigned xsize, float* axi_opsin, hls::stream<float>& opsin32x32_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+
+loop_load_dct32_pixel:
+    for (int i = 0; i < ysize64 * xsize64; i++) {
+        uint32_t addr = i * 4096 * 3;
+        for (int j = 0; j < 4096 * 3; j++) {
+#pragma HLS PIPELINE II = 1
+            float reg = axi_opsin[addr + j];
+            opsin32x32_stream.write(reg);
+        }
+    }
+}
+
+void loadPixel(unsigned ysize,
+               unsigned xsize,
+               float* axi_opsin_1,
+               float* axi_opsin_2,
+               float* axi_opsin_3,
+               hls::stream<float>& opsin8x8_stream,
+               hls::stream<float>& opsin16x16_stream,
+               hls::stream<float>& opsin32x32_stream) {
+#pragma HLS INLINE
+    load_dct8_pixel(ysize, xsize, axi_opsin_1, opsin8x8_stream);
+    load_dct16_pixel(ysize, xsize, axi_opsin_2, opsin16x16_stream);
+    load_dct32_pixel(ysize, xsize, axi_opsin_3, opsin32x32_stream);
+}
+
+void load_rqf_mask(int xsize,
+                   int ysize,
+                   float* aq_map_f,
+                   float* masking_field_row,
+                   float* quant_field_row,
+                   int stride,
+                   hls::stream<float>& stream_q,
+                   hls::stream<float>& stream_mask,
+                   hls::stream<float>& stream_rqf) {
+#pragma HLS INLINE off
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = (xsize_blocks + 7) / 8;
+    uint32_t xnum_tile = (xsize_blocks + 7) / 8;
+    uint32_t ynum_tile = (ysize_blocks + 7) / 8;
+LOOP_0:
+    for (int tid = 0; tid < xnum_tile * ynum_tile; tid++) {
+        int tx1 = tid % n_enc_tiles;
+        int ty1 = tid / n_enc_tiles;
+        int by = ty1 * 8;
+        int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+        int bx = tx1 * 8;
+        int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+        int rect_ysize = by1 - by;
+        int rect_xsize = bx1 - bx;
+    LOOP_1:
+        for (int iy = 0; iy < rect_ysize; iy++) {
+        LOOP_2:
+            for (int ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS PIPELINE II = 1
+                int x = 8 * (bx + ix);
+                int y = 8 * (by + iy);
+                int index0 = (y / 8 * stride) + x / 8;
+                float quant_norm8 = 0;
+                float masking = 0;
+                quant_norm8 = quant_field_row[index0];
+                stream_q.write(quant_norm8);
+                masking = masking_field_row[index0];
+                stream_mask.write(masking);
+                int index = (by + iy) * xsize_blocks + (bx + ix);
+                float rqf_tmp = aq_map_f[index];
+                stream_rqf.write(rqf_tmp);
+            }
+        }
+    }
+}
+
+//==========================================================================//
+// data write out
+//==========================================================================//
+void ac_coeff_writeout(int xsize, int ysize, hls::stream<int>& ac_coef_strm, int* ac_coef_axiout) {
+    unsigned xsizeblock = (xsize + 7) / 8;
+    unsigned ysizeblock = (ysize + 7) / 8;
+    for (int i = 0; i < xsizeblock * ysizeblock * 3 * 64; i++) {
+        ac_coef_axiout[i] = ac_coef_strm.read();
+    }
+}
+
+void dc_8x8_writeout(unsigned ysize,
+                     unsigned xsize,
+                     float* hls_dc8x8,
+                     hls::stream<uint8_t>& stream_rectx_dc0,
+                     hls::stream<uint8_t>& stream_recty_dc0,
+                     hls::stream<float>& dc_coef8x8_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+    uint32_t ysize32 = tile_ysize / 32;
+    uint32_t xsize32 = tile_xsize / 32;
+    uint32_t ysize16 = tile_ysize / 16;
+    uint32_t xsize16 = tile_xsize / 16;
+    uint32_t ysize8 = tile_ysize / 8;
+    uint32_t xsize8 = tile_xsize / 8;
+
+    // dc writeout
+    int N = 1;
+    int block_n = N * N;
+    int block_half_n = N * 8;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = (xsize_blocks + 7) / 8;
+loop_dc8_writeout:
+    for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+        for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+            int rect_ysize = stream_recty_dc0.read();
+            int rect_xsize = stream_rectx_dc0.read();
+            for (uint32_t y8 = 0; y8 < 8; y8++) {
+                for (uint32_t x8 = 0; x8 < 8; x8++) {
+                    for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+                        int c_tmp = 0;
+                        if (c == 0) {
+                            c_tmp = 1;
+                        } else if (c == 1) {
+                            c_tmp = 0;
+                        } else {
+                            c_tmp = 2;
+                        }
+
+                        size_t addr = y64 * xsize8 * 8 + x64 * 8 + y8 * xsize8 + x8;
+
+                        if (x8 < rect_xsize && y8 < rect_ysize) {
+                            float reg = dc_coef8x8_stream.read();
+                            hls_dc8x8[c_tmp * tile_ysize * tile_xsize + addr] = reg;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void dc_16x16_writeout(unsigned ysize,
+                       unsigned xsize,
+                       float* hls_dc16x16,
+                       hls::stream<uint8_t>& stream_rectx_dc1,
+                       hls::stream<uint8_t>& stream_recty_dc1,
+                       hls::stream<float>& dc_coef16x16_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    size_t ysize64 = tile_ysize / 64;
+    size_t xsize64 = tile_xsize / 64;
+    size_t ysize32 = tile_ysize / 32;
+    size_t xsize32 = tile_xsize / 32;
+    size_t ysize16 = tile_ysize / 16;
+    size_t xsize16 = tile_xsize / 16;
+    size_t ysize8 = tile_ysize / 8;
+    size_t xsize8 = tile_xsize / 8;
+    int N = 2;
+    int block_n = N * N;
+    int block_half_n = N * 8;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = (xsize_blocks + 7) / 8;
+    for (size_t y64 = 0; y64 < ysize64; y64++) {
+        for (size_t x64 = 0; x64 < xsize64; x64++) {
+            int rect_ysize = stream_recty_dc1.read();
+            int rect_xsize = stream_rectx_dc1.read();
+            for (size_t y16 = 0; y16 < 4; y16++) {
+                for (size_t x16 = 0; x16 < 4; x16++) {
+                    for (int c = 0; c < 3; c++) {
+                        for (size_t m = 0; m < 2; m++) {
+                            for (size_t n = 0; n < 2; n++) {
+#pragma HLS PIPELINE II = 1
+                                // edge judgement
+                                // int tx1 = x64; // tid % n_enc_tiles;
+                                // int ty1 = y64; // tid / n_enc_tiles;
+                                // int by = ty1 * 8;
+                                // int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+                                // int bx = tx1 * 8;
+                                // int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+                                // int rect_ysize = by1 - by;
+                                // int rect_xsize = bx1 - bx;
+                                // int tile_xsize = (xsize + 63) / 64 * 64;
+                                // int tile_ysize = (ysize + 63) / 64 * 64;
+
+                                int c_tmp = 0;
+                                if (c == 0) {
+                                    c_tmp = 1;
+                                } else if (c == 1) {
+                                    c_tmp = 0;
+                                } else {
+                                    c_tmp = 2;
+                                }
+
+                                size_t addr =
+                                    y64 * xsize16 * 4 * 4 + x64 * 4 * 4 + y16 * xsize16 * 4 + x16 * 4 + m * 2 + n;
+
+                                if ((2 * x16 + 1) < rect_xsize && (2 * y16 + 1) < rect_ysize) {
+                                    float reg = dc_coef16x16_stream.read();
+                                    hls_dc16x16[c_tmp * tile_ysize * tile_xsize + addr] = reg;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void dc_32x32_writeout(unsigned ysize,
+                       unsigned xsize,
+                       float* hls_dc32x32,
+                       hls::stream<uint8_t>& stream_rectx_dc2,
+                       hls::stream<uint8_t>& stream_recty_dc2,
+                       hls::stream<float>& dc_coef32x32_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    size_t ysize64 = tile_ysize / 64;
+    size_t xsize64 = tile_xsize / 64;
+    size_t ysize32 = tile_ysize / 32;
+    size_t xsize32 = tile_xsize / 32;
+    size_t ysize16 = tile_ysize / 16;
+    size_t xsize16 = tile_xsize / 16;
+    size_t ysize8 = tile_ysize / 8;
+    size_t xsize8 = tile_xsize / 8;
+    int N = 4;
+    int block_n = N * N;
+    int block_half_n = N * 8;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = (xsize_blocks + 7) / 8;
+    for (size_t y64 = 0; y64 < ysize64; y64++) {
+        for (size_t x64 = 0; x64 < xsize64; x64++) {
+            int rect_ysize = stream_recty_dc2.read();
+            int rect_xsize = stream_rectx_dc2.read();
+            for (size_t y32 = 0; y32 < 2; y32++) {
+                for (size_t x32 = 0; x32 < 2; x32++) {
+                    for (int c = 0; c < 3; c++) {
+                        for (size_t m = 0; m < 4; m++) {
+                            for (size_t n = 0; n < 4; n++) {
+#pragma HLS PIPELINE II = 1
+                                // edge judgement
+                                // int tx1 = x64; // tid % n_enc_tiles;
+                                // int ty1 = y64; // tid / n_enc_tiles;
+                                // int by = ty1 * 8;
+                                // int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+                                // int bx = tx1 * 8;
+                                // int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+                                // int rect_ysize = by1 - by;
+                                // int rect_xsize = bx1 - bx;
+                                // int tile_xsize = (xsize + 63) / 64 * 64;
+                                // int tile_ysize = (ysize + 63) / 64 * 64;
+
+                                int c_tmp = 0;
+                                if (c == 0) {
+                                    c_tmp = 1;
+                                } else if (c == 1) {
+                                    c_tmp = 0;
+                                } else {
+                                    c_tmp = 2;
+                                }
+
+                                size_t addr = y64 * xsize32 * 2 * 16 + x64 * 2 * 16 + y32 * xsize32 * 1 * 16 +
+                                              x32 * 1 * 16 + m * 4 + n;
+
+                                if ((4 * x32 + 3) < rect_xsize && (4 * y32 + 3) < rect_ysize) {
+                                    float reg = dc_coef32x32_stream.read();
+                                    hls_dc32x32[c_tmp * tile_ysize * tile_xsize + addr] = reg;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void GetDCSize(short xsize,
+               short ysize,
+               hls::stream<uint8_t>& stream_rectx_dc,
+               hls::stream<uint8_t>& stream_recty_dc,
+               hls::stream<uint8_t>& stream_rectx0,
+               hls::stream<uint8_t>& stream_recty0,
+               hls::stream<uint8_t>& stream_rectx1,
+               hls::stream<uint8_t>& stream_recty1,
+               hls::stream<uint8_t>& stream_rectx2,
+               hls::stream<uint8_t>& stream_recty2) {
+    uint16_t xsize_blocks = xsize / 8;
+    uint16_t ysize_blocks = ysize / 8;
+LOOP_0:
+    for (uint16_t y = 0; y < (ysize_blocks + 8 - 1) / 8; y++) {
+    LOOP_1:
+        for (uint16_t x = 0; x < (xsize_blocks + 8 - 1) / 8; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            uint8_t rect_ysize = stream_recty_dc.read();
+            uint8_t rect_xsize = stream_rectx_dc.read();
+            stream_rectx0.write(rect_xsize);
+            stream_recty0.write(rect_ysize);
+            stream_rectx1.write(rect_xsize);
+            stream_recty1.write(rect_ysize);
+            stream_rectx2.write(rect_xsize);
+            stream_recty2.write(rect_ysize);
+        }
+    }
+}
+void dc_writeout(unsigned ysize,
+                 unsigned xsize,
+                 float* hls_dc8x8,
+                 float* hls_dc16x16,
+                 float* hls_dc32x32,
+
+                 hls::stream<uint8_t>& stream_rectx_dc,
+                 hls::stream<uint8_t>& stream_recty_dc,
+                 hls::stream<float>& dc_coef8x8_stream,
+                 hls::stream<float>& dc_coef16x16_stream,
+                 hls::stream<float>& dc_coef32x32_stream) {
+#pragma HLS INLINE
+    hls::stream<uint8_t, 1024> stream_rectx_dc0;
+    hls::stream<uint8_t, 1024> stream_recty_dc0;
+    hls::stream<uint8_t, 1024> stream_rectx_dc1;
+    hls::stream<uint8_t, 1024> stream_recty_dc1;
+    hls::stream<uint8_t, 1024> stream_rectx_dc2;
+    hls::stream<uint8_t, 1024> stream_recty_dc2;
+    GetDCSize(xsize, ysize, stream_rectx_dc, stream_recty_dc, stream_rectx_dc0, stream_recty_dc0, stream_rectx_dc1,
+              stream_recty_dc1, stream_rectx_dc2, stream_recty_dc2);
+    dc_8x8_writeout(ysize, xsize, hls_dc8x8, stream_rectx_dc0, stream_recty_dc0, dc_coef8x8_stream);
+    dc_16x16_writeout(ysize, xsize, hls_dc16x16, stream_rectx_dc1, stream_recty_dc1, dc_coef16x16_stream);
+    dc_32x32_writeout(ysize, xsize, hls_dc32x32, stream_rectx_dc2, stream_recty_dc2, dc_coef32x32_stream);
+}
+
+void cfl_writeout(unsigned xsize,
+                  unsigned ysize,
+                  hls::stream<int8_t>& cmapx_strm,
+                  hls::stream<int8_t>& cmapb_strm,
+                  int8_t* cmap_axi) {
+#pragma HLS INLINE off
+
+    int xnum_tile = (xsize + 63) / 64;
+    int ynum_tile = (ysize + 63) / 64;
+    int num_tile = xnum_tile * ynum_tile;
+
+    for (int tid = 0; tid < num_tile; tid++) {
+#pragma HLS PIPELINE II = 2
+        cmap_axi[tid] = cmapx_strm.read();
+        cmap_axi[num_tile + tid] = cmapb_strm.read();
+    }
+}
+
+void acs_rqf_writeout(int xsize,
+                      int ysize,
+                      unsigned char* strategy_all,
+                      int* raw_quant_field_i,
+                      hls::stream<uint8_t>& stream_strategy,
+                      hls::stream<int>& stream_rqf) {
+#pragma HLS INLINE off
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    uint32_t xnum_tile = (xsize_blocks + 7) / 8;
+    uint32_t ynum_tile = (ysize_blocks + 7) / 8;
+    ap_uint<64> visited;
+LOOP_1:
+    for (uint8_t ty1 = 0; ty1 < ynum_tile; ty1++) {
+    LOOP_2:
+        for (uint8_t tx1 = 0; tx1 < xnum_tile; tx1++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            int by0 = ty1 * 8;
+            int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+            int bx0 = tx1 * 8;
+            int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+            int rect_ysize = by1 - by0;
+            int rect_xsize = bx1 - bx0;
+            visited = 0;
+        LOOP_3:
+            for (uint8_t y = 0; y < rect_ysize; ++y) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+            LOOP_4:
+                for (uint8_t x = 0; x < rect_xsize; ++x) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+                    uint8_t idx = y * 8 + x;
+                    if (visited.range(idx, idx) == 0) {
+                        char strategy = stream_strategy.read();
+                        int rqf = stream_rqf.read();
+                        int b = strategy_block[strategy];
+                    LOOP_5:
+                        for (uint8_t iy = 0; iy < b; iy++) {
+                        LOOP_6:
+                            for (uint8_t ix = 0; ix < b; ix++) {
+#pragma HLS pipeline
+                                uint16_t idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                                uint16_t idxout = (y + by0 + iy) * xsize_blocks + (x + bx0 + ix);
+                                strategy_all[(y + by0 + iy) * xsize_blocks + (x + bx0 + ix)] = strategy;
+                                raw_quant_field_i[(y + by0 + iy) * xsize_blocks + (x + bx0 + ix)] = rqf;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+//=========================================================//
+// module
+//=========================================================//
+// cfl -----------------------------------------------------
+void hls_CFLComputeTile(unsigned xsize,
+                        unsigned ysize,
+                        hls::stream<float>& ac_coef_strm,
+                        hls::stream<int>& rqf_in_stream,
+                        hls::stream<uint8_t>& acs_strm,
+                        hls::stream<int8_t>& cmapx_strm,
+                        hls::stream<int8_t>& cmapb_strm,
+                        hls::stream<int8_t>& cmapx_axi_strm,
+                        hls::stream<int8_t>& cmapb_axi_strm,
+                        hls::stream<float>& ac_coef_cflout_strm,
+                        hls::stream<int>& rqf_out_stream,
+                        hls::stream<uint8_t>& acs_cflout_strm) {
+#pragma HLS INLINE off
+    const uint8_t kDefaultColorFactor = 84U;
+    const float kInvColorFactor = 1.0f / kDefaultColorFactor;
+    const float kYToBRatio = 1.0f;
+    const float kDistanceMultiplierAC = 1e-3f;
+
+    unsigned xsize_alg = (xsize + 7) / 8 * 8;
+    unsigned ysize_alg = (ysize + 7) / 8 * 8;
+    int xnum_tile = (xsize + 63) / 64;
+    int ynum_tile = (ysize + 63) / 64;
+    int num_tile = xnum_tile * ynum_tile;
+    unsigned tx0 = 0;
+    unsigned ty0 = 0;
+
+    for (int tid = 0; tid < num_tile; tid++) {
+        ca_x_t ca_x = 0;
+        cb_x_t cb_x = 0;
+        ca_b_t ca_b = 0;
+        cb_b_t cb_b = 0;
+        unsigned xsize;
+        unsigned ysize;
+
+        if (ty0 + 64 > ysize_alg) {
+            ysize = ysize_alg - ty0;
+        } else {
+            ysize = 64;
+        }
+
+        if (tx0 + 64 > xsize_alg) {
+            xsize = xsize_alg - tx0;
+        } else {
+            xsize = 64;
+        }
+
+        unsigned total_pix = xsize * ysize;
+        unsigned cur_pix = 0;
+
+        while (cur_pix < total_pix) {
+            uint8_t acsRaw = acs_strm.read();
+            acs_cflout_strm.write(acsRaw);
+            rqf_out_stream.write(rqf_in_stream.read());
+
+            float q = 27.996826171875;
+            float q_dc_x = 0.000218007407966069877147674560546875;
+            float q_dc_b = 0.00348811852745711803436279296875;
+
+            unsigned csize;
+            if (acsRaw == 0) {
+                csize = 64;
+            } else if (acsRaw == 4) {
+                csize = 256;
+            } else if (acsRaw == 5) {
+                csize = 1024;
+            }
+
+            int error_flag = 0;
+
+            for (unsigned i = 0; i < csize; i++) {
+#pragma HLS PIPELINE II = 3
+                float b_y = ac_coef_strm.read();
+                float b_x = ac_coef_strm.read();
+                float b_b = ac_coef_strm.read();
+
+                ac_coef_cflout_strm.write(b_y);
+                ac_coef_cflout_strm.write(b_x);
+                ac_coef_cflout_strm.write(b_b);
+
+                float qm_x;
+                float qm_b;
+
+                if (acsRaw == 0) {
+                    qm_x = qmx8x8[i];
+                    qm_b = qmb8x8[i];
+                } else if (acsRaw == 4) {
+                    qm_x = qmx16x16[i];
+                    qm_b = qmb16x16[i];
+                } else if (acsRaw == 5) {
+                    qm_x = qmx32x32[i];
+                    qm_b = qmb32x32[i];
+                }
+
+                float qqm_x = q * qm_x;
+                float qqm_b = q * qm_b;
+
+                float coeffs_yx = b_y * qqm_x;
+                float coeffs_x = b_x * qqm_x;
+                float a = kInvColorFactor * coeffs_yx;
+                float b = 0.0f * coeffs_yx - coeffs_x;
+                ca_x = (ca_x_t)(a * a) + ca_x;
+                cb_x = (cb_x_t)(a * b) + cb_x;
+
+                float coeffs_yb = b_y * qqm_b;
+                float coeffs_b = b_b * qqm_b;
+
+                a = kInvColorFactor * coeffs_yb;
+                b = kYToBRatio * coeffs_yb - coeffs_b;
+                ca_b = (ca_b_t)(a * a) + ca_b;
+                cb_b = (cb_b_t)(a * b) + cb_b;
+
+                cur_pix++;
+            }
+        }
+
+        float x;
+        x = -(float)cb_x / ((float)ca_x + total_pix * kDistanceMultiplierAC * 0.5f);
+        int8_t cmap_x_reg = hls::max(-128.0f, hls::min(127.0f, hls::roundf(x)));
+        cmapx_strm.write(cmap_x_reg);
+        cmapx_axi_strm.write(cmap_x_reg);
+
+        x = -(float)cb_b / ((float)ca_b + total_pix * kDistanceMultiplierAC * 0.5f);
+        int8_t cmap_b_reg = hls::max(-128.0f, hls::min(127.0f, hls::roundf(x)));
+        cmapb_strm.write(cmap_b_reg);
+        cmapb_axi_strm.write(cmap_b_reg);
+
+        // printf("cmap_x:%d, cmap_b:%d\n", (int32_t)cmap_x_reg, (int32_t)cmap_b_reg);
+
+        if (tx0 + 64 >= xsize_alg) {
+            tx0 = 0;
+            ty0 = ty0 + 64;
+        } else {
+            tx0 = tx0 + 64;
+        }
+    }
+}
+//--------------------------hls_compute_coefficients--------------------------//
+float adjustQuantBias(size_t c, int32_t quant_i, const float* biases) {
+    int32_t min = INT32_MIN;
+    cast<float, int32_t> mi, ani, anno;
+    mi.i = min;
+    int32_t and_result = quant_i & mi.i;
+    ani.i = and_result;
+    float sign = ani.f;
+    // int32_t and_no_result = (~mi.i) & quant_i;
+    // anno.i = and_no_result;
+    float abs_quant = hls::abs(quant_i);
+    //   printf("%f %f\n", sign, abs_quant);
+    bool is_01 = abs_quant < 1.125f;
+    bool not_0 = abs_quant > 0;
+    cast<float, int32_t> bi, si;
+    bi.f = biases[c];
+    int32_t iTmp = bi.i ^ ani.i;
+    si.i = iTmp;
+    float one_bias = not_0 ? (si.f) : 0;
+    float tmp = quant_i ? (1.0 / quant_i) : 0.0f;
+    float bias = quant_i - biases[3] * tmp;
+    return is_01 ? one_bias : bias;
+}
+
+void hls_ComputeCoefficients(uint32_t xsize,
+                             uint32_t ysize,
+                             hls::stream<uint8_t>& acsStrm,
+                             hls::stream<float>& dctStrm,
+                             hls::stream<int>& quantFieldStrm,
+                             hls::stream<int8_t>& ytoxMapStrm,
+                             hls::stream<int8_t>& ytobMapStrm,
+                             hls::stream<uint8_t>& acs_coeff_stream1,
+                             hls::stream<int>& coeffOutStrm,
+                             hls::stream<int>& coeff_axi_stream,
+                             hls::stream<uint8_t>& acs_axi_strm,
+                             hls::stream<int>& qf_axi_strm) {
+#pragma HLS INLINE off
+    uint8_t acs;
+    uint8_t xblocks, yblocks;
+    int8_t ytox_map, ytob_map;
+    float x_factor, b_factor;
+    float qm_multiplier = 1.0f;
+    bool stop(false);
+    float coef_dct[3];
+#pragma HLS ARRAY_PARTITION variable = coef_dct complete dim = 1
+    ap_uint<32> offset;
+    int block_out;
+    float thr_x, thr_y, thr_b, out_x, out_b;
+
+    float thresy[4] = {0.5f, 0.6f, 0.6f, 0.65f};
+    float thresxb[4] = {0.5f, 0.75f, 0.75f, 0.75f};
+#pragma HLS ARRAY_PARTITION variable = thresy complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = thresxb complete dim = 1
+
+    uint32_t xsize_blocks = (xsize + 7) / 8;
+    uint32_t ysize_blocks = (ysize + 7) / 8;
+    uint16_t xsize_tails = DivCeil(xsize_blocks, kEncTileDimInBlocks);
+    uint16_t ysize_tails = DivCeil(ysize_blocks, kEncTileDimInBlocks);
+    uint16_t xsize_left = 8 - (xsize_tails * 8 - xsize_blocks); // not aligned for blocks
+    uint16_t ysize_left = 8 - (ysize_tails * 8 - ysize_blocks); // not aligned for blocks
+    uint16_t num_blocks;
+
+    for (uint16_t ty = 0; ty < ysize_tails; ++ty) {
+        for (uint16_t tx = 0; tx < xsize_tails; ++tx) {
+            ytoxMapStrm.read(ytox_map);
+            ytobMapStrm.read(ytob_map);
+            x_factor = base_correlation_x + ytox_map * color_scale;
+            b_factor = base_correlation_b + ytob_map * color_scale;
+            if (tx == (xsize_tails - 1) && ty != (ysize_tails - 1)) {
+                num_blocks = xsize_left * 8;
+            } else if (tx != (xsize_tails - 1) && ty == (ysize_tails - 1)) {
+                num_blocks = ysize_left * 8;
+            } else if (tx == (xsize_tails - 1) && ty == (ysize_tails - 1)) {
+                num_blocks = xsize_left * ysize_left;
+            } else {
+                num_blocks = 64;
+            }
+
+            uint32_t total = num_blocks * 64;
+            uint32_t cur = 0;
+            ap_uint<16> size = 0, count = 0;
+            ap_uint<16> y, x;
+            int quant;
+            float qac, fquant, inv_qac;
+
+            while (cur < total) {
+#pragma HLS PIPELINE II = 3
+                if (count == 0) {
+                    acsStrm.read(acs);
+                    acs_axi_strm.write(acs);
+                    acs_coeff_stream1.write(acs);
+                    if (acs == Type::DCT) {
+                        xblocks = 1;
+                        yblocks = 1;
+                    } else if (acs == Type::DCT16X16) {
+                        xblocks = 2;
+                        yblocks = 2;
+                    } else {
+                        xblocks = 4;
+                        yblocks = 4;
+                    }
+
+                    size = kDCTBlockSize * xblocks * yblocks;
+                    quant = quantFieldStrm.read();
+                    qf_axi_strm.write(quant);
+                    qac = global_scale_float * quant;
+                    fquant = qac * qm_multiplier;       // fquant_table[quant - 1];
+                    inv_qac = inv_global_scale / quant; // inv_qac_table[quant - 1];
+                }
+                y = count / (yblocks * kBlockDim);
+                x = count % (xblocks * kBlockDim);
+                ap_uint<32> off;
+                ap_uint<32> yfix;
+                if (x == 0) {
+                    off = y * kBlockDim * xblocks;
+                    ap_uint<32> yhalf = yblocks * 4; // ysize * kBlockDim / 2
+                    if (y >= yhalf)
+                        yfix = 2;
+                    else
+                        yfix = 0;
+                }
+
+                thr_x = 0;
+                thr_y = 0;
+                thr_b = 0;
+                if (xblocks == 1) {
+                    if (x >= 4) {
+                        thr_x = thresxb[yfix + 1]; //(c == 1) ? thresy[yfix + 1] : thresxb[yfix + 1];
+                        thr_y = thresy[yfix + 1];
+                        thr_b = thresxb[yfix + 1];
+                    } else {
+                        thr_x = thresxb[yfix]; //(c == 1) ? thresy[yfix + 1] :
+                                               // thresxb[yfix + 1];
+                        thr_y = thresy[yfix];
+                        thr_b = thresxb[yfix];
+                    }
+                } else {
+                    ap_uint<32> xhalf = xblocks * 4; // xsize * kBlockDim / 2
+                    ap_uint<32> xfix;
+                    if (x < xhalf)
+                        xfix = 0;
+                    else
+                        xfix = 1;
+                    thr_x = thresxb[yfix + xfix];
+                    thr_y = thresy[yfix + xfix];
+                    thr_b = thresxb[yfix + xfix]; // thr = (c == 1) ? thresy[yfix +
+                                                  // xfix] : thresxb[yfix + xfix];
+                }
+
+                float q_x;
+                float q_y;
+                float q_b;
+                if (acs == Type::DCT) {
+                    q_x = inv_dequant_stable[0 + off + x] * fquant;
+                    q_y = inv_dequant_stable[64 + off + x] * fquant;
+                    q_b = inv_dequant_stable[128 + off + x] * fquant;
+                } else if (acs == Type::DCT16X16) {
+                    q_x = inv_dequant_stable[768 + off + x] * fquant;
+                    q_y = inv_dequant_stable[1024 + off + x] * fquant;
+                    q_b = inv_dequant_stable[1280 + off + x] * fquant;
+                } else if (acs == Type::DCT32X32) {
+                    q_x = inv_dequant_stable[1536 + off + x] * fquant;
+                    q_y = inv_dequant_stable[2560 + off + x] * fquant;
+                    q_b = inv_dequant_stable[3584 + off + x] * fquant;
+                }
+
+                coef_dct[1] = dctStrm.read();
+                coef_dct[0] = dctStrm.read();
+                coef_dct[2] = dctStrm.read();
+
+                float val_y;
+                val_y = q_y * coef_dct[1];
+
+                bool nzero_mask_y = hls::abs(val_y) >= thr_y;
+
+                int32_t v_y;
+                if (nzero_mask_y) {
+                    v_y = hls::roundf(val_y);
+                } else {
+                    v_y = 0;
+                }
+
+                float adj_quant = adjustQuantBias(1, v_y, kDefaultQuantBias);
+                float dequantm;
+                if (acs == Type::DCT) {
+                    dequantm = dequant_table[64 + off + x];
+                } else if (acs == Type::DCT16X16) {
+                    dequantm = dequant_table[1024 + off + x];
+                } else if (acs == Type::DCT32X32) {
+                    dequantm = dequant_table[2560 + off + x];
+                }
+                coef_dct[1] = adj_quant * dequantm * inv_qac;
+
+                out_x = coef_dct[0] - x_factor * coef_dct[1];
+                coef_dct[0] = out_x;
+
+                out_b = coef_dct[2] - b_factor * coef_dct[1];
+                coef_dct[2] = out_b;
+
+                float val_x; //= q * coef_dct[c]; // block_in[off + x]
+                float val_b;
+                val_x = q_x * coef_dct[0];
+                val_b = q_b * coef_dct[2];
+
+                bool nzero_mask_x = hls::abs(val_x) >= thr_x;
+
+                bool nzero_mask_b = hls::abs(val_b) >= thr_b;
+
+                int32_t v_x;
+
+                int32_t v_b;
+                if (nzero_mask_x) {
+                    v_x = hls::roundf(val_x);
+                } else {
+                    v_x = 0;
+                }
+
+                if (nzero_mask_b) {
+                    v_b = hls::roundf(val_b);
+                } else {
+                    v_b = 0;
+                }
+
+                coeffOutStrm.write(v_y);
+                coeffOutStrm.write(v_x);
+                coeffOutStrm.write(v_b);
+
+                coeff_axi_stream.write(v_y);
+                coeff_axi_stream.write(v_x);
+                coeff_axi_stream.write(v_b);
+                cur++;
+                count++;
+                if (count == size) count = 0;
+            } // while
+        }     // tx
+    }         // ty
+}
+
+//--------------------- Compute ALL orders---------------------//
+template <int RANGE> // opt1:256(slow), opt2:8(fast)
+void hls_sort(int size,
+              hls::stream<unsigned>& count_instrm,
+              hls::stream<unsigned>& pos_instrm,
+              hls::stream<unsigned>& pos_outstrm) {
+    unsigned count_shift[RANGE];
+    unsigned pos_shift[RANGE];
+    ap_uint<RANGE> cmp = 0;
+
+    for (int i = 0; i < RANGE; i++) {
+#pragma HLS UNROLL
+        count_shift[i] = 0;
+    }
+
+    for (int i = 0; i < size + RANGE; i++) {
+#pragma HLS PIPELINE II = 1
+        unsigned count_reg;
+        unsigned pos_reg;
+        if (i < size) {
+            pos_reg = pos_instrm.read();
+            count_reg = count_instrm.read();
+        } else {
+            count_reg = -1;
+            pos_reg = -1;
+        }
+
+        for (int i = 0; i < RANGE; i++) {
+#pragma HLS UNROLL
+            cmp[i] = count_reg >= count_shift[i];
+        }
+
+        if (i >= RANGE) {
+            pos_outstrm.write(pos_shift[0]);
+        }
+
+        for (int i = 1; i < RANGE; i++) {
+#pragma HLS UNROLL
+            if (cmp[i] == 1) {
+                count_shift[i - 1] = count_shift[i];
+                pos_shift[i - 1] = pos_shift[i];
+            }
+        }
+
+        unsigned insert_pos;
+        ap_uint<RANGE> cmp_br = ~cmp;
+        cmp_br.reverse();
+        if (cmp_br == 0) {
+            insert_pos = RANGE - 1;
+        } else if (cmp == 0) {
+            insert_pos = 0;
+        } else {
+            insert_pos = cmp_br.countLeadingZeros() - 1;
+        }
+
+        count_shift[insert_pos] = count_reg;
+        pos_shift[insert_pos] = pos_reg;
+    }
+}
+
+void hls_sort_top(hls::stream<unsigned>& count_instrm,
+                  hls::stream<unsigned>& pos_instrm,
+                  hls::stream<unsigned>& pos_outstrm) {
+    unsigned sz;
+    for (uint8_t o = 0; o < 2; ++o) {
+        if (o == 0) {
+            sz = 64;
+        } else {
+            sz = 256;
+        }
+
+        for (uint8_t c = 0; c < 3; c++) {
+#ifndef __SYNTHESIS__
+            hls_sort<8>(sz, count_instrm, pos_instrm, pos_outstrm);
+#else
+            hls_sort<8>(sz, count_instrm, pos_instrm, pos_outstrm);
+#endif
+        }
+    }
+}
+
+void init_numzeros(int32_t num_zeros[3][320]) {
+    for (int i = 0; i < 320; i++) {
+        for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+            num_zeros[c][i] = 0;
+        }
+    }
+}
+
+void count_numzeros(unsigned xsize,
+                    unsigned ysize,
+                    hls::stream<uint8_t>& ac_strategy_strm,
+                    hls::stream<int>& ac_coef_quant_strm,
+                    hls::stream<ap_uint<3> >& used_orders_strm,
+                    int32_t num_zeros[3][320]) {
+#pragma HLS INLINE off
+    unsigned xsize_alg = (xsize + 7) / 8 * 8;
+    unsigned ysize_alg = (ysize + 7) / 8 * 8;
+    unsigned total_pix = xsize_alg * ysize_alg;
+    unsigned cur_pix = 0;
+
+    const int32_t offset8x8 = 0;
+    // const int32_t offsetIDT = 64;
+    const int32_t offset16x16 = 64;
+
+    ap_uint<3> used_orders_ap = 0;
+
+    while (cur_pix < total_pix) {
+        uint8_t acsRaw = ac_strategy_strm.read();
+        unsigned size;
+        if (acsRaw == 0) {
+            size = 64;
+            used_orders_ap[0] = 1;
+        } else if (acsRaw != 0 && acsRaw < 4) {
+            used_orders_ap[1] = 1;
+            size = 64;
+        } else if (acsRaw == 4) {
+            used_orders_ap[2] = 1;
+            size = 256;
+        } else if (acsRaw == 5) {
+            size = 1024;
+        }
+        cur_pix = cur_pix + size;
+
+        for (unsigned k = 0; k < size; k++) {
+            for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+#pragma HLS DEPENDENCE variable = num_zeros type = inter false
+                bool is_zerox = ac_coef_quant_strm.read() == 0;
+                if (is_zerox) {
+                    if (acsRaw == 0) {
+                        num_zeros[c][offset8x8 + k]++;
+                    } else if (acsRaw == 4) {
+                        num_zeros[c][offset16x16 + k]++;
+                    }
+                }
+            }
+        }
+    }
+
+    used_orders_strm.write(used_orders_ap);
+    num_zeros[0][offset8x8] = -1;
+    num_zeros[0][offset16x16 + 0] = -1;
+    num_zeros[0][offset16x16 + 1] = -1;
+    num_zeros[0][offset16x16 + 16] = -1;
+    num_zeros[0][offset16x16 + 17] = -1;
+    num_zeros[1][offset8x8] = -1;
+    num_zeros[1][offset16x16 + 0] = -1;
+    num_zeros[1][offset16x16 + 1] = -1;
+    num_zeros[1][offset16x16 + 16] = -1;
+    num_zeros[1][offset16x16 + 17] = -1;
+    num_zeros[2][offset8x8] = -1;
+    num_zeros[2][offset16x16 + 0] = -1;
+    num_zeros[2][offset16x16 + 1] = -1;
+    num_zeros[2][offset16x16 + 16] = -1;
+    num_zeros[2][offset16x16 + 17] = -1;
+}
+
+void load_nz2strm(int32_t num_zeros[3][320], hls::stream<unsigned>& count_strm, hls::stream<unsigned>& pos_strm) {
+    const int32_t offset8x8 = 0;
+    // const int32_t offsetIDT = 64;
+    const int32_t offset16x16 = 64;
+
+    const uint32_t coef8x8_zigzag[64] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                         12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                         35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                         58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+    const uint32_t coef16x16_zigzag[256] = {
+        0,   1,   16,  17,  32,  2,   3,   18,  33,  48,  64,  49,  34,  19,  4,   5,   20,  35,  50,  65,  80,  96,
+        81,  66,  51,  36,  21,  6,   7,   22,  37,  52,  67,  82,  97,  112, 128, 113, 98,  83,  68,  53,  38,  23,
+        8,   9,   24,  39,  54,  69,  84,  99,  114, 129, 144, 160, 145, 130, 115, 100, 85,  70,  55,  40,  25,  10,
+        11,  26,  41,  56,  71,  86,  101, 116, 131, 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87,  72,  57,
+        42,  27,  12,  13,  28,  43,  58,  73,  88,  103, 118, 133, 148, 163, 178, 193, 208, 224, 209, 194, 179, 164,
+        149, 134, 119, 104, 89,  74,  59,  44,  29,  14,  15,  30,  45,  60,  75,  90,  105, 120, 135, 150, 165, 180,
+        195, 210, 225, 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91,  76,  61,  46,  31,  47,  62,  77,
+        92,  107, 122, 137, 152, 167, 182, 197, 212, 227, 242, 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93,
+        78,  63,  79,  94,  109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185, 170, 155, 140,
+        125, 110, 95,  111, 126, 141, 156, 171, 186, 201, 216, 231, 246, 247, 232, 217, 202, 187, 172, 157, 142, 127,
+        143, 158, 173, 188, 203, 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235, 250, 251,
+        236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255};
+
+    unsigned sz;
+
+    for (uint8_t o = 0; o < 2; ++o) {
+        float inv_sqrt_sz;
+
+        if (o == 0) {
+            sz = 64;
+            inv_sqrt_sz = 1.0f / 8.0f;
+        } else {
+            sz = 256;
+            inv_sqrt_sz = 1.0f / 16.0f;
+        }
+
+        for (uint8_t c = 0; c < 3; c++) {
+            for (unsigned i = 0; i < sz; ++i) {
+#pragma HLS PIPELINE II = 1
+                unsigned pos;
+                if (o == 0) {
+                    pos = coef8x8_zigzag[i];
+                } else {
+                    pos = coef16x16_zigzag[i];
+                }
+
+                // We don't care for the exact number -> quantize number of zeros,
+                // to get less permuted order.
+                if (o == 0) {
+                    pos_strm.write(pos);
+                    count_strm.write(num_zeros[c][offset8x8 + pos] * inv_sqrt_sz + 0.1f);
+                } else {
+                    pos_strm.write(pos);
+                    count_strm.write(num_zeros[c][offset16x16 + pos] * inv_sqrt_sz + 0.1f);
+                }
+            }
+        }
+    }
+}
+
+void order_writeout(hls::stream<ap_uint<3> >& used_orders_strm,
+                    hls::stream<unsigned>& pos_strm,
+                    uint32_t hls_order[320 * 3 + 1] // AXI port
+                    ) {
+    const int32_t offset8x8 = 0;
+    // const int32_t offsetIDT = 64;
+    const int32_t offset16x16 = 64;
+
+    const uint32_t coef8x8_zigzag[64] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                         12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                         35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                         58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+    const uint32_t coef16x16_zigzag[256] = {
+        0,   1,   16,  17,  32,  2,   3,   18,  33,  48,  64,  49,  34,  19,  4,   5,   20,  35,  50,  65,  80,  96,
+        81,  66,  51,  36,  21,  6,   7,   22,  37,  52,  67,  82,  97,  112, 128, 113, 98,  83,  68,  53,  38,  23,
+        8,   9,   24,  39,  54,  69,  84,  99,  114, 129, 144, 160, 145, 130, 115, 100, 85,  70,  55,  40,  25,  10,
+        11,  26,  41,  56,  71,  86,  101, 116, 131, 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87,  72,  57,
+        42,  27,  12,  13,  28,  43,  58,  73,  88,  103, 118, 133, 148, 163, 178, 193, 208, 224, 209, 194, 179, 164,
+        149, 134, 119, 104, 89,  74,  59,  44,  29,  14,  15,  30,  45,  60,  75,  90,  105, 120, 135, 150, 165, 180,
+        195, 210, 225, 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91,  76,  61,  46,  31,  47,  62,  77,
+        92,  107, 122, 137, 152, 167, 182, 197, 212, 227, 242, 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93,
+        78,  63,  79,  94,  109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185, 170, 155, 140,
+        125, 110, 95,  111, 126, 141, 156, 171, 186, 201, 216, 231, 246, 247, 232, 217, 202, 187, 172, 157, 142, 127,
+        143, 158, 173, 188, 203, 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235, 250, 251,
+        236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255};
+
+    unsigned sz;
+    ap_uint<3> used_orders_ap = used_orders_strm.read();
+    for (uint8_t o = 0; o < 2; ++o) {
+        if (o == 0) {
+            sz = 64;
+        } else {
+            sz = 256;
+        }
+
+        bool is_nondefault = false;
+        for (uint8_t c = 0; c < 3; c++) {
+            for (unsigned i = 0; i < sz; ++i) {
+#pragma HLS PIPELINE II = 1
+                unsigned pos_reg = pos_strm.read();
+                if (o == 0) {
+                    hls_order[c * 320 + offset8x8 + i] = pos_reg;
+                } else {
+                    hls_order[c * 320 + offset16x16 + i] = pos_reg;
+                }
+                if (o == 0) {
+                    is_nondefault |= coef8x8_zigzag[i] != pos_reg;
+                } else {
+                    is_nondefault |= coef16x16_zigzag[i] != pos_reg;
+                }
+            }
+        }
+        if (!is_nondefault) {
+            if (o == 0)
+                used_orders_ap[0] = 0;
+            else
+                used_orders_ap[2] = 0;
+        }
+    }
+    hls_order[320 * 3] = used_orders_ap;
+}
+
+void order_finalize_dataflow(hls::stream<ap_uint<3> >& used_orders_strm,
+                             int32_t num_zeros[3][320],
+                             uint32_t hls_order[320 * 3 + 1]) {
+// #pragma HLS INTERFACE mode = m_axi bundle = mm1 latency = 32 offset = slave num_write_outstanding =             \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+//         hls_opsin_1
+// #pragma HLS INTERFACE mode = m_axi bundle = mm2 latency = 32 offset = slave num_write_outstanding =             \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+//         hls_opsin_2
+// #pragma HLS INTERFACE mode = m_axi bundle = mm3 latency = 32 offset = slave num_write_outstanding =             \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+//         hls_opsin_3
+// #pragma HLS INTERFACE mode = m_axi bundle = mm4 latency = 32 offset = slave num_write_outstanding = \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+//         BLOCK8_H* BLOCK8_W port = quant_field_row
+// #pragma HLS INTERFACE mode = m_axi bundle = mm5 latency = 32 offset = slave num_write_outstanding = \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+//         BLOCK8_H* BLOCK8_W port = masking_field_row
+// #pragma HLS INTERFACE mode = m_axi bundle = mm6 latency = 32 offset = slave num_write_outstanding = \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+//         BLOCK8_H* BLOCK8_W port = aq_map_f
+// #pragma HLS INTERFACE mode = m_axi bundle = mm7 latency = 32 offset = slave num_write_outstanding = \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+//         TILE_W* TILE_H* 2 port = cmap_axi
+// #pragma HLS INTERFACE mode = m_axi bundle = mm8 latency = 32 offset = slave num_write_outstanding =             \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+//         ac_coef_axiout
+// #pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = slave num_write_outstanding = \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+//         BLOCK8_W* BLOCK8_H port = strategy_all
+// #pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = slave num_write_outstanding = \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =       \
+//         BLOCK8_H* BLOCK8_W port = raw_quant_field_i
+#pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = slave num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_ORDER port = \
+        hls_order
+// #pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = slave num_write_outstanding =            \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+//         hls_dc8x8
+// #pragma HLS INTERFACE mode = m_axi bundle = mm13 latency = 32 offset = slave num_write_outstanding =            \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+//         hls_dc16x16
+// #pragma HLS INTERFACE mode = m_axi bundle = mm14 latency = 32 offset = slave num_write_outstanding =            \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+//         hls_dc32x32
+// #pragma HLS INTERFACE mode = m_axi bundle = mm15 latency = 32 offset = slave num_write_outstanding =                 \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_NUM_CONFIG port = \
+//         config
+// #pragma HLS INTERFACE mode = m_axi bundle = mm16 latency = 32 offset = slave num_write_outstanding =                 \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_NUM_CONFIG port = \
+//         config_fl
+#pragma HLS DATAFLOW
+    hls::stream<unsigned, 32> count_instrm("count_instrm");
+    hls::stream<unsigned, 32> pos_instrm("pos_instrm");
+    hls::stream<unsigned, 8> pos_outstrm("pos_outstrm");
+
+    load_nz2strm(num_zeros, count_instrm, pos_instrm);
+
+    hls_sort_top(count_instrm, pos_instrm, pos_outstrm);
+
+    order_writeout(used_orders_strm, pos_outstrm, hls_order);
+}
+
+//-------------------------- dct --------------------------//
+// dct8x8
+void hls_DCT1DImpl_8x8(float in[64], float out[64]) {
+#pragma HLS INLINE off
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+    float sqrt2 = 1.4142135623730951f;
+
+loop_dct8x8:
+    for (int i = 0; i < 8; i += 1) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS LOOP_FLATTEN off
+#pragma HLS pipeline II = 2
+        float tmp8_0 = in[i * 8 + 0] + in[i * 8 + 7];
+        float tmp8_1 = in[i * 8 + 1] + in[i * 8 + 6];
+        float tmp8_2 = in[i * 8 + 2] + in[i * 8 + 5];
+        float tmp8_3 = in[i * 8 + 3] + in[i * 8 + 4];
+        float tmp8_4 = in[i * 8 + 0] - in[i * 8 + 7];
+        float tmp8_5 = in[i * 8 + 1] - in[i * 8 + 6];
+        float tmp8_6 = in[i * 8 + 2] - in[i * 8 + 5];
+        float tmp8_7 = in[i * 8 + 3] - in[i * 8 + 4];
+
+        float t00 = tmp8_0 + tmp8_3;
+        float t01 = tmp8_1 + tmp8_2;
+        float t02 = tmp8_0 - tmp8_3;
+        float t03 = tmp8_1 - tmp8_2;
+
+        float t16 = tmp8_4 * kMultipliers_N8_c1;
+        float t17 = tmp8_5 * kMultipliers_N8_c2;
+        float t18 = tmp8_6 * kMultipliers_N8_c3;
+        float t19 = tmp8_7 * kMultipliers_N8_c4;
+
+        // tmp 0~3
+        float t04 = t00 + t01;
+        float t05 = t00 - t01;
+        float t06 = t02 * kMultipliers_N4_c1;
+        float t07 = t03 * kMultipliers_N4_c2;
+
+        float t09 = t05;
+        float t10 = t06 + t07;
+        float t11 = t06 - t07;
+
+        float t13 = t09;
+        float t14 = t10 * sqrt2 + t11;
+        float t15 = t11;
+        // tmp 4~7
+        float t00_a = t16 + t19;
+        float t01_a = t17 + t18;
+        float t02_a = t16 - t19;
+        float t03_a = t17 - t18;
+
+        float t04_a = t00_a + t01_a;
+        float t05_a = t00_a - t01_a;
+        float t06_a = t02_a * kMultipliers_N4_c1;
+        float t07_a = t03_a * kMultipliers_N4_c2;
+
+        float t08_a = t04_a;
+        float t09_a = t05_a;
+        float t10_a = t06_a + t07_a;
+        float t11_a = t06_a - t07_a;
+
+        float t12_a = t08_a;
+        float t13_a = t09_a;
+        float t14_a = t10_a * sqrt2 + t11_a;
+        float t15_a = t11_a;
+
+        float tmp8_out1 = t14;
+        float tmp8_out2 = t05;
+        float tmp8_out3 = t15;
+        float tmp8_out4 = t12_a * sqrt2 + t14_a;
+        float tmp8_out5 = t14_a + t13_a;
+        float tmp8_out6 = t13_a + t15_a;
+        float tmp8_out7 = t15_a;
+
+        out[i * 8 + 0] = t04;
+        out[i * 8 + 1] = tmp8_out4;
+        out[i * 8 + 2] = tmp8_out1;
+        out[i * 8 + 3] = tmp8_out5;
+        out[i * 8 + 4] = t05;
+        out[i * 8 + 5] = tmp8_out6;
+        out[i * 8 + 6] = tmp8_out3;
+        out[i * 8 + 7] = tmp8_out7;
+    }
+}
+
+void hls_TransposeBlock8(float in[64], float out[64]) {
+#pragma HLS INLINE off
+loop_transposeBlock8:
+    for (int m = 0; m < 8; m++) {
+        for (int n = 0; n < 8; n++) {
+#pragma HLS pipeline II = 1
+            float mul = 1.0f / 8.0f;
+            out[n * 8 + m] = mul * in[m * 8 + n];
+        }
+    }
+}
+
+void dct8_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+    float sqrt2 = 1.4142135623730951f;
+
+loop_dct_block:
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS LOOP_FLATTEN off
+#pragma HLS pipeline
+                int addr = 8 * x + bx * 64 + by * 256;
+
+                float mem_0 = in[addr + 0];
+                float mem_1 = in[addr + 1];
+                float mem_2 = in[addr + 2];
+                float mem_3 = in[addr + 3];
+                float mem_4 = in[addr + 4];
+                float mem_5 = in[addr + 5];
+                float mem_6 = in[addr + 6];
+                float mem_7 = in[addr + 7];
+
+                float tmp8_0 = mem_0 + mem_7;
+                float tmp8_1 = mem_1 + mem_6;
+                float tmp8_2 = mem_2 + mem_5;
+                float tmp8_3 = mem_3 + mem_4;
+                float tmp8_4 = mem_0 - mem_7;
+                float tmp8_5 = mem_1 - mem_6;
+                float tmp8_6 = mem_2 - mem_5;
+                float tmp8_7 = mem_3 - mem_4;
+
+                float t00 = tmp8_0 + tmp8_3;
+                float t01 = tmp8_1 + tmp8_2;
+                float t02 = tmp8_0 - tmp8_3;
+                float t03 = tmp8_1 - tmp8_2;
+
+                float t16 = tmp8_4 * kMultipliers_N8_c1;
+                float t17 = tmp8_5 * kMultipliers_N8_c2;
+                float t18 = tmp8_6 * kMultipliers_N8_c3;
+                float t19 = tmp8_7 * kMultipliers_N8_c4;
+
+                // tmp 0~3
+                float t04 = t00 + t01;
+                float t05 = t00 - t01;
+                float t06 = t02 * kMultipliers_N4_c1;
+                float t07 = t03 * kMultipliers_N4_c2;
+
+                float t08 = t04;
+                float t09 = t05;
+                float t10 = t06 + t07;
+                float t11 = t06 - t07;
+
+                float t12 = t08;
+                float t13 = t09;
+                float t14 = t10 * sqrt2 + t11;
+                float t15 = t11;
+                // tmp 4~7
+                float t00_a = t16 + t19;
+                float t01_a = t17 + t18;
+                float t02_a = t16 - t19;
+                float t03_a = t17 - t18;
+
+                float t04_a = t00_a + t01_a;
+                float t05_a = t00_a - t01_a;
+                float t06_a = t02_a * kMultipliers_N4_c1;
+                float t07_a = t03_a * kMultipliers_N4_c2;
+
+                float t08_a = t04_a;
+                float t09_a = t05_a;
+                float t10_a = t06_a + t07_a;
+                float t11_a = t06_a - t07_a;
+
+                float t12_a = t08_a;
+                float t13_a = t09_a;
+                float t14_a = t10_a * sqrt2 + t11_a;
+                float t15_a = t11_a;
+
+                float tmp8_out0 = t12;
+                float tmp8_out1 = t14;
+                float tmp8_out2 = t13;
+                float tmp8_out3 = t15;
+                float tmp8_out4 = t12_a * sqrt2 + t14_a;
+                float tmp8_out5 = t14_a + t13_a;
+                float tmp8_out6 = t13_a + t15_a;
+                float tmp8_out7 = t15_a;
+
+                out[addr + 0] = tmp8_out0;
+                out[addr + 1] = tmp8_out4;
+                out[addr + 2] = tmp8_out1;
+                out[addr + 3] = tmp8_out5;
+                out[addr + 4] = tmp8_out2;
+                out[addr + 5] = tmp8_out6;
+                out[addr + 6] = tmp8_out3;
+                out[addr + 7] = tmp8_out7;
+            }
+        }
+    }
+}
+
+void hls_TransposeBlock_dct8(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> y = 0; y < 8; y++) {
+                for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS pipeline II = 1
+                    ap_uint<10> addr_i, addr_o;
+                    addr_i(9, 8) = by(1, 0);
+                    addr_i(7, 5) = x(2, 0);
+                    addr_i(4, 3) = bx(1, 0);
+                    addr_i(2, 0) = y(2, 0);
+                    addr_o(9, 8) = by(1, 0);
+                    addr_o(7, 5) = y(2, 0);
+                    addr_o(4, 3) = bx(1, 0);
+                    addr_o(2, 0) = x(2, 0);
+                    float mul = 1.0f / 8.0f;
+                    out[addr_o] = mul * in[addr_i];
+                }
+            }
+        }
+    }
+}
+
+void split_ac_dc_dct8(float in[64], float to_ac[64], float to_dc[1]) {
+#pragma HLS INLINE off
+    for (int m = 0; m < 8; m++) {
+        for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+            to_ac[8 * m + n] = in[8 * m + n];
+            if (m == 0 && n == 0) {
+                to_dc[0] = in[0];
+            }
+        }
+    }
+}
+
+void feed_ac_dct8(uint32_t x8,
+                  uint32_t y8,
+                  hls::stream<uint8_t>& stream_recty,
+                  hls::stream<uint8_t>& stream_rectx,
+                  float in[64],
+                  hls::stream<float>& ac_coef8x8_stream) {
+#pragma HLS INLINE off
+    uint8_t rect_xsize;
+    uint8_t rect_ysize;
+hls_feed_b64:
+    for (int m = 0; m < 8; m++) {
+        for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+            if (m == 0 && n == 0) {
+                rect_xsize = stream_rectx.read();
+                rect_ysize = stream_recty.read();
+            }
+            if (x8 < rect_xsize && y8 < rect_ysize) {
+                ac_coef8x8_stream.write(in[m * 8 + n]);
+            }
+        }
+    }
+}
+
+void feed_dc_dct8(uint32_t x8,
+                  uint32_t y8,
+                  hls::stream<uint8_t>& stream_recty,
+                  hls::stream<uint8_t>& stream_rectx,
+                  float in[1],
+                  hls::stream<float>& dc_coef8x8_stream) {
+#pragma HLS INLINE off
+    uint8_t rect_xsize;
+    uint8_t rect_ysize;
+hls_feed_b64:
+    for (int m = 0; m < 8; m++) {
+        for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+            if (m == 0 && n == 0) {
+                rect_xsize = stream_rectx.read();
+                rect_ysize = stream_recty.read();
+            }
+            if (x8 < rect_xsize && y8 < rect_ysize) {
+                if (m == 0 && n == 0) {
+                    dc_coef8x8_stream.write(in[0]);
+                }
+            }
+        }
+    }
+}
+
+void load_b64(float temp0[64], hls::stream<float>& opsin8x8_stream) {
+#pragma HLS INLINE off
+    for (int m = 0; m < 8; m++) {
+        for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+            int addr = n * 8 + m;
+            temp0[addr] = opsin8x8_stream.read();
+        }
+    }
+}
+
+void hls_dct8x8_module(unsigned ysize,
+                       unsigned xsize,
+                       hls::stream<uint8_t>& stream_recty8,
+                       hls::stream<uint8_t>& stream_rectx8,
+                       hls::stream<uint8_t>& stream_recty8_1,
+                       hls::stream<uint8_t>& stream_rectx8_1,
+                       hls::stream<float>& opsin8x8_stream,
+                       hls::stream<float>& ac_coef8x8_stream,
+                       hls::stream<float>& dc_coef8x8_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+
+    float temp0[64];
+#pragma HLS bind_storage variable = temp0 type = ram_2p impl = bram
+    float temp1[64];
+#pragma HLS bind_storage variable = temp1 type = ram_2p impl = bram
+    float temp2[64];
+#pragma HLS bind_storage variable = temp2 type = ram_2p impl = bram
+    float temp3[64];
+#pragma HLS bind_storage variable = temp3 type = ram_2p impl = bram
+    float temp4[64];
+#pragma HLS bind_storage variable = temp4 type = ram_2p impl = bram
+    float to_ac[64];
+#pragma HLS bind_storage variable = to_ac type = ram_2p impl = bram
+    float to_dc[1];
+#pragma HLS bind_storage variable = to_dc type = ram_2p impl = bram
+
+loop_dct8_all:
+    for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+        for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+            for (uint32_t y8 = 0; y8 < 8; y8++) {
+                for (uint32_t x8 = 0; x8 < 8; x8++) {
+                    for (int c = 0; c < 3; c++) {
+#pragma HLS DATAFLOW
+                        load_b64(temp0, opsin8x8_stream);
+                        hls_DCT1DImpl_8x8(temp0, temp1);
+                        hls_TransposeBlock8(temp1, temp2);
+                        hls_DCT1DImpl_8x8(temp2, temp3);
+                        hls_TransposeBlock8(temp3, temp4);
+                        split_ac_dc_dct8(temp4, to_ac, to_dc);
+                        feed_ac_dct8(x8, y8, stream_recty8, stream_rectx8, to_ac, ac_coef8x8_stream);
+                        feed_dc_dct8(x8, y8, stream_recty8_1, stream_rectx8_1, to_dc, dc_coef8x8_stream);
+                    }
+                }
+            }
+        }
+    }
+}
+
+void hls_DCT1DImpl_16(float in[256], float out[256]) {
+#pragma HLS INLINE off
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+
+    float kMultipliers_N16_0 = 0.5024192861881557;
+    float kMultipliers_N16_1 = 0.5224986149396889;
+    float kMultipliers_N16_2 = 0.5669440348163577;
+    float kMultipliers_N16_3 = 0.6468217833599901;
+    float kMultipliers_N16_4 = 0.7881546234512502;
+    float kMultipliers_N16_5 = 1.060677685990347;
+    float kMultipliers_N16_6 = 1.7224470982383342;
+    float kMultipliers_N16_7 = 5.101148618689155;
+    float sqrt2 = 1.4142135623730951f;
+
+    float tmp16_0 = in[0] + in[15];
+    float tmp16_1 = in[1] + in[14];
+    float tmp16_2 = in[2] + in[13];
+    float tmp16_3 = in[3] + in[12];
+    float tmp16_4 = in[4] + in[11];
+    float tmp16_5 = in[5] + in[10];
+    float tmp16_6 = in[6] + in[9];
+    float tmp16_7 = in[7] + in[8];
+    float tmp16_8 = in[0] - in[15];
+    float tmp16_9 = in[1] - in[14];
+    float tmp16_10 = in[2] - in[13];
+    float tmp16_11 = in[3] - in[12];
+    float tmp16_12 = in[4] - in[11];
+    float tmp16_13 = in[5] - in[10];
+    float tmp16_14 = in[6] - in[9];
+    float tmp16_15 = in[7] - in[8];
+
+    float tmp8_0 = tmp16_0 + tmp16_7;
+    float tmp8_1 = tmp16_1 + tmp16_6;
+    float tmp8_2 = tmp16_2 + tmp16_5;
+    float tmp8_3 = tmp16_3 + tmp16_4;
+    float tmp8_4 = tmp16_0 - tmp16_7;
+    float tmp8_5 = tmp16_1 - tmp16_6;
+    float tmp8_6 = tmp16_2 - tmp16_5;
+    float tmp8_7 = tmp16_3 - tmp16_4;
+
+    float t00 = tmp8_0 + tmp8_3;
+    float t01 = tmp8_1 + tmp8_2;
+    float t02 = tmp8_0 - tmp8_3;
+    float t03 = tmp8_1 - tmp8_2;
+    float t04 = tmp8_4 * kMultipliers_N8_c1;
+    float t05 = tmp8_5 * kMultipliers_N8_c2;
+    float t06 = tmp8_6 * kMultipliers_N8_c3;
+    float t07 = tmp8_7 * kMultipliers_N8_c4;
+
+    float t08 = t02 * kMultipliers_N4_c1;
+    float t09 = t03 * kMultipliers_N4_c2;
+    float t10 = t04 + t07;
+    float t11 = t05 + t06;
+    float t12 = t04 - t07;
+    float t13 = t05 - t06;
+
+    float t14 = t08 + t09;
+    float t15 = t10 + t11;
+    float t16 = t08 - t09;
+    float t17 = t10 - t11;
+
+    float t18 = t12 * kMultipliers_N4_c1;
+    float t19 = t13 * kMultipliers_N4_c2;
+    float t20 = t14 * sqrt2;
+    float t21 = t15 * sqrt2;
+
+    float t22 = t18 + t19;
+    float t23 = t18 - t19;
+
+    float t24 = t22 * sqrt2;
+
+    float t25 = t24 + t23;
+
+    float t26 = kMultipliers_N16_0 * tmp16_8;
+    float t27 = kMultipliers_N16_1 * tmp16_9;
+    float t28 = kMultipliers_N16_2 * tmp16_10;
+    float t29 = kMultipliers_N16_3 * tmp16_11;
+    float t30 = kMultipliers_N16_4 * tmp16_12;
+    float t31 = kMultipliers_N16_5 * tmp16_13;
+    float t32 = kMultipliers_N16_6 * tmp16_14;
+    float t33 = kMultipliers_N16_7 * tmp16_15;
+
+    float dmp8_0 = t26 + t33;
+    float dmp8_1 = t27 + t32;
+    float dmp8_2 = t28 + t31;
+    float dmp8_3 = t29 + t30;
+    float dmp8_4 = t26 - t33;
+    float dmp8_5 = t27 - t32;
+    float dmp8_6 = t28 - t31;
+    float dmp8_7 = t29 - t30;
+
+    float d00 = dmp8_0 + dmp8_3;
+    float d01 = dmp8_1 + dmp8_2;
+    float d02 = dmp8_0 - dmp8_3;
+    float d03 = dmp8_1 - dmp8_2;
+    float d04 = dmp8_4 * kMultipliers_N8_c1;
+    float d05 = dmp8_5 * kMultipliers_N8_c2;
+    float d06 = dmp8_6 * kMultipliers_N8_c3;
+    float d07 = dmp8_7 * kMultipliers_N8_c4;
+
+    float d08 = d02 * kMultipliers_N4_c1;
+    float d09 = d03 * kMultipliers_N4_c2;
+    float d10 = d04 + d07;
+    float d11 = d05 + d06;
+    float d12 = d04 - d07;
+    float d13 = d05 - d06;
+
+    float d14 = d08 + d09;
+    float d15 = d10 + d11;
+    float d16 = d08 - d09;
+    float d17 = d10 - d11;
+
+    float d18 = d12 * kMultipliers_N4_c1;
+    float d19 = d13 * kMultipliers_N4_c2;
+    float d20 = d14 * sqrt2;
+    float d21 = d15 * sqrt2;
+
+    float d22 = d18 + d19;
+    float d23 = d18 - d19;
+
+    float d24 = d22 * sqrt2;
+
+    float d25 = d24 + d23;
+
+    float d26 = d00 + d01;
+    float d27 = d21 + d25;
+    float d28 = d20 + d16;
+    float d29 = d25 + d17;
+    float d30 = d00 - d01;
+    float d31 = d17 + d23;
+    float d32 = d26 * sqrt2;
+
+    out[0] = t00 + t01;
+    out[1] = d32 + d27;
+    out[2] = t21 + t25;
+    out[3] = d27 + d28;
+    out[4] = t20 + t16;
+    out[5] = d28 + d29;
+    out[6] = t25 + t17;
+    out[7] = d29 + d30;
+    out[8] = t00 - t01;
+    out[9] = d30 + d31;
+    out[10] = t17 + t23;
+    out[11] = d31 + d16;
+    out[12] = t16;
+    out[13] = d16 + d23;
+    out[14] = t23;
+    out[15] = d23;
+}
+
+void hls_dct16_block(float in[256], float out[256]) {
+#pragma HLS INLINE off
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+
+    float kMultipliers_N16_0 = 0.5024192861881557;
+    float kMultipliers_N16_1 = 0.5224986149396889;
+    float kMultipliers_N16_2 = 0.5669440348163577;
+    float kMultipliers_N16_3 = 0.6468217833599901;
+    float kMultipliers_N16_4 = 0.7881546234512502;
+    float kMultipliers_N16_5 = 1.060677685990347;
+    float kMultipliers_N16_6 = 1.7224470982383342;
+    float kMultipliers_N16_7 = 5.101148618689155;
+    float sqrt2 = 1.4142135623730951f;
+
+    for (int i = 0; i < 16; i++) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS LOOP_FLATTEN off
+#pragma HLS pipeline II = 11
+        float tmp16_0 = in[16 * i + 0] + in[16 * i + 15];
+        float tmp16_1 = in[16 * i + 1] + in[16 * i + 14];
+        float tmp16_2 = in[16 * i + 2] + in[16 * i + 13];
+        float tmp16_3 = in[16 * i + 3] + in[16 * i + 12];
+        float tmp16_4 = in[16 * i + 4] + in[16 * i + 11];
+        float tmp16_5 = in[16 * i + 5] + in[16 * i + 10];
+        float tmp16_6 = in[16 * i + 6] + in[16 * i + 9];
+        float tmp16_7 = in[16 * i + 7] + in[16 * i + 8];
+        float tmp16_8 = in[16 * i + 0] - in[16 * i + 15];
+        float tmp16_9 = in[16 * i + 1] - in[16 * i + 14];
+        float tmp16_10 = in[16 * i + 2] - in[16 * i + 13];
+        float tmp16_11 = in[16 * i + 3] - in[16 * i + 12];
+        float tmp16_12 = in[16 * i + 4] - in[16 * i + 11];
+        float tmp16_13 = in[16 * i + 5] - in[16 * i + 10];
+        float tmp16_14 = in[16 * i + 6] - in[16 * i + 9];
+        float tmp16_15 = in[16 * i + 7] - in[16 * i + 8];
+
+        float tmp8_0 = tmp16_0 + tmp16_7;
+        float tmp8_1 = tmp16_1 + tmp16_6;
+        float tmp8_2 = tmp16_2 + tmp16_5;
+        float tmp8_3 = tmp16_3 + tmp16_4;
+        float tmp8_4 = tmp16_0 - tmp16_7;
+        float tmp8_5 = tmp16_1 - tmp16_6;
+        float tmp8_6 = tmp16_2 - tmp16_5;
+        float tmp8_7 = tmp16_3 - tmp16_4;
+
+        float t00 = tmp8_0 + tmp8_3;
+        float t01 = tmp8_1 + tmp8_2;
+        float t02 = tmp8_0 - tmp8_3;
+        float t03 = tmp8_1 - tmp8_2;
+        float t04 = tmp8_4 * kMultipliers_N8_c1;
+        float t05 = tmp8_5 * kMultipliers_N8_c2;
+        float t06 = tmp8_6 * kMultipliers_N8_c3;
+        float t07 = tmp8_7 * kMultipliers_N8_c4;
+
+        float t08 = t02 * kMultipliers_N4_c1;
+        float t09 = t03 * kMultipliers_N4_c2;
+        float t10 = t04 + t07;
+        float t11 = t05 + t06;
+        float t12 = t04 - t07;
+        float t13 = t05 - t06;
+
+        float t14 = t08 + t09;
+        float t15 = t10 + t11;
+        float t16 = t08 - t09;
+        float t17 = t10 - t11;
+
+        float t18 = t12 * kMultipliers_N4_c1;
+        float t19 = t13 * kMultipliers_N4_c2;
+        float t20 = t14 * sqrt2;
+        float t21 = t15 * sqrt2;
+
+        float t22 = t18 + t19;
+        float t23 = t18 - t19;
+
+        float t24 = t22 * sqrt2;
+
+        float t25 = t24 + t23;
+
+        float t26 = kMultipliers_N16_0 * tmp16_8;
+        float t27 = kMultipliers_N16_1 * tmp16_9;
+        float t28 = kMultipliers_N16_2 * tmp16_10;
+        float t29 = kMultipliers_N16_3 * tmp16_11;
+        float t30 = kMultipliers_N16_4 * tmp16_12;
+        float t31 = kMultipliers_N16_5 * tmp16_13;
+        float t32 = kMultipliers_N16_6 * tmp16_14;
+        float t33 = kMultipliers_N16_7 * tmp16_15;
+
+        float dmp8_0 = t26 + t33;
+        float dmp8_1 = t27 + t32;
+        float dmp8_2 = t28 + t31;
+        float dmp8_3 = t29 + t30;
+        float dmp8_4 = t26 - t33;
+        float dmp8_5 = t27 - t32;
+        float dmp8_6 = t28 - t31;
+        float dmp8_7 = t29 - t30;
+
+        float d00 = dmp8_0 + dmp8_3;
+        float d01 = dmp8_1 + dmp8_2;
+        float d02 = dmp8_0 - dmp8_3;
+        float d03 = dmp8_1 - dmp8_2;
+        float d04 = dmp8_4 * kMultipliers_N8_c1;
+        float d05 = dmp8_5 * kMultipliers_N8_c2;
+        float d06 = dmp8_6 * kMultipliers_N8_c3;
+        float d07 = dmp8_7 * kMultipliers_N8_c4;
+
+        float d08 = d02 * kMultipliers_N4_c1;
+        float d09 = d03 * kMultipliers_N4_c2;
+        float d10 = d04 + d07;
+        float d11 = d05 + d06;
+        float d12 = d04 - d07;
+        float d13 = d05 - d06;
+
+        float d14 = d08 + d09;
+        float d15 = d10 + d11;
+        float d16 = d08 - d09;
+        float d17 = d10 - d11;
+
+        float d18 = d12 * kMultipliers_N4_c1;
+        float d19 = d13 * kMultipliers_N4_c2;
+        float d20 = d14 * sqrt2;
+        float d21 = d15 * sqrt2;
+
+        float d22 = d18 + d19;
+        float d23 = d18 - d19;
+
+        float d24 = d22 * sqrt2;
+
+        float d25 = d24 + d23;
+
+        float d26 = d00 + d01;
+        float d27 = d21 + d25;
+        float d28 = d20 + d16;
+        float d29 = d25 + d17;
+        float d30 = d00 - d01;
+        float d31 = d17 + d23;
+        float d32 = d26 * sqrt2;
+
+        out[16 * i + 0] = t00 + t01;
+        out[16 * i + 1] = d32 + d27;
+        out[16 * i + 2] = t21 + t25;
+        out[16 * i + 3] = d27 + d28;
+        out[16 * i + 4] = t20 + t16;
+        out[16 * i + 5] = d28 + d29;
+        out[16 * i + 6] = t25 + t17;
+        out[16 * i + 7] = d29 + d30;
+        out[16 * i + 8] = t00 - t01;
+        out[16 * i + 9] = d30 + d31;
+        out[16 * i + 10] = t17 + t23;
+        out[16 * i + 11] = d31 + d16;
+        out[16 * i + 12] = t16;
+        out[16 * i + 13] = d16 + d23;
+        out[16 * i + 14] = t23;
+        out[16 * i + 15] = d23;
+    }
+}
+
+void hls_ReinterpretingIDCT16(float input[4], float output[4]) {
+#pragma HLS INLINE off
+    float resample = 0.901764214038848876953125;
+
+    float t0 = input[0];
+    float t1 = input[1] * resample;
+    float t2 = input[2] * resample;
+    float t3 = input[3] * resample * resample;
+
+    float t4 = t0 + t2;
+    float t5 = t1 + t3;
+    float t6 = t0 - t2;
+    float t7 = t1 - t3;
+
+    float t8 = t4;
+    float t9 = t6;
+    float t10 = t5;
+    float t11 = t7;
+
+    output[0] = t8 + t10;
+    output[2] = t8 - t10;
+    output[1] = t9 + t11;
+    output[3] = t9 - t11;
+}
+
+// dct 16x16
+void load_dct16(float in[256], hls::stream<float>& opsin16x16_stream) {
+#pragma HLS INLINE off
+    for (int y8 = 0; y8 < 2; y8++) {
+        for (int x8 = 0; x8 < 2; x8++) {
+            for (int m = 0; m < 8; m++) {
+                for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+                    int addr = y8 * 16 * 8 + x8 * 8 + m * 16 + n;
+                    in[addr] = opsin16x16_stream.read();
+                }
+            }
+        }
+    }
+}
+
+void transposeDct16(float in[256], float out[256]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 16; i++) {
+        for (int j = 0; j < 16; j++) {
+#pragma HLS PIPELINE II = 1
+            out[j * 16 + i] = in[i * 16 + j];
+        }
+    }
+}
+
+void transposeDct16_scale(float in[256], float out[256]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 16; i++) {
+        for (int j = 0; j < 16; j++) {
+#pragma HLS PIPELINE II = 1
+            float mul = 1.0f / 16.0f;
+            out[j * 16 + i] = mul * in[i * 16 + j];
+        }
+    }
+}
+
+void dct16_ac_writeout(float to_ac[256],
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<float>& ac_coef16x16_stream,
+                       uint32_t x16,
+                       uint32_t y16) {
+#pragma HLS INLINE off
+    uint8_t rect_xsize;
+    uint8_t rect_ysize;
+    for (int m = 0; m < 256; m++) {
+#pragma HLS PIPELINE II = 1
+        if (m == 0) {
+            rect_xsize = stream_rectx.read();
+            rect_ysize = stream_recty.read();
+        }
+        if ((2 * x16 + 1) < rect_xsize && (2 * y16 + 1) < rect_ysize) {
+            ac_coef16x16_stream.write(to_ac[m]);
+        }
+    }
+}
+
+void dct16_dc_writeout(float to_dc[4],
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<float>& dc_coef16x16_stream,
+                       uint32_t x16,
+                       uint32_t y16) {
+#pragma HLS INLINE off
+    uint8_t rect_xsize;
+    uint8_t rect_ysize;
+    for (int m = 0; m < 4; m++) {
+#pragma HLS PIPELINE II = 1
+        if (m == 0) {
+            rect_xsize = stream_rectx.read();
+            rect_ysize = stream_recty.read();
+        }
+        if ((2 * x16 + 1) < rect_xsize && (2 * y16 + 1) < rect_ysize) {
+            dc_coef16x16_stream.write(to_dc[m]);
+        }
+    }
+}
+
+void dct16_ac_dc_split(float in[256], float ac_out1[256], float dc_out[4]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 256; i++) {
+#pragma HLS PIPELINE II = 1
+        ac_out1[i] = in[i];
+        if (i == 0)
+            dc_out[0] = in[i];
+        else if (i == 1)
+            dc_out[1] = in[i];
+        else if (i == 16)
+            dc_out[2] = in[i];
+        else if (i == 17)
+            dc_out[3] = in[i];
+    }
+}
+
+void dct16_test_load(float from[256], hls::stream<float>& opsin16x16_stream) {
+#pragma HLS INLINE off
+    for (int m = 0; m < 16; m++) {
+        for (int n = 0; n < 16; n++) {
+#pragma HLS PIPELINE II = 1
+            int addr = 16 * m + n;
+            from[addr] = opsin16x16_stream.read();
+        }
+    }
+}
+
+void hls_dct16x16_module(unsigned ysize,
+                         unsigned xsize,
+                         hls::stream<uint8_t>& stream_recty16,
+                         hls::stream<uint8_t>& stream_rectx16,
+                         hls::stream<uint8_t>& stream_recty16_1,
+                         hls::stream<uint8_t>& stream_rectx16_1,
+                         hls::stream<float>& opsin16x16_stream,
+                         hls::stream<float>& ac_coef16x16_stream,
+                         hls::stream<float>& dc_coef16x16_stream) {
+#pragma HLS INLINE off
+
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+    uint32_t ysize16 = tile_ysize / 16;
+    uint32_t xsize16 = tile_xsize / 16;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+
+    float from[256];
+#pragma HLS bind_storage variable = from type = ram_2p impl = bram
+    float temp0[256];
+#pragma HLS bind_storage variable = temp0 type = ram_2p impl = bram
+    float temp1[256];
+#pragma HLS bind_storage variable = temp1 type = ram_2p impl = bram
+    float temp2[256];
+#pragma HLS bind_storage variable = temp2 type = ram_2p impl = bram
+    float temp3[256];
+#pragma HLS bind_storage variable = temp3 type = ram_2p impl = bram
+    float temp4[256];
+#pragma HLS bind_storage variable = temp4 type = ram_2p impl = bram
+    float to_ac[256];
+#pragma HLS bind_storage variable = to_ac type = ram_2p impl = bram
+    float to_dc[4];
+#pragma HLS bind_storage variable = to_dc type = ram_2p impl = bram
+    float dc_mem[4];
+#pragma HLS bind_storage variable = dc_mem type = ram_2p impl = bram
+
+    for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+        for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+            for (uint32_t y16 = 0; y16 < 4; y16++) {
+                for (uint32_t x16 = 0; x16 < 4; x16++) {
+                    for (int c = 0; c < 3; c++) {
+// #pragma HLS PIPELINE rewind
+#pragma HLS DATAFLOW
+                        dct16_test_load(from, opsin16x16_stream);
+                        transposeDct16(from, temp0);
+                        hls_dct16_block(temp0, temp1);
+                        transposeDct16_scale(temp1, temp2);
+                        hls_dct16_block(temp2, temp3);
+                        transposeDct16_scale(temp3, temp4);
+                        dct16_ac_dc_split(temp4, to_ac, to_dc);
+                        // output ac_coeff_stream
+                        dct16_ac_writeout(to_ac, stream_recty16, stream_rectx16, ac_coef16x16_stream, x16, y16);
+                        // output dc_coeff_stream
+                        hls_ReinterpretingIDCT16(to_dc, dc_mem);
+                        dct16_dc_writeout(dc_mem, stream_recty16_1, stream_rectx16_1, dc_coef16x16_stream, x16, y16);
+                    }
+                }
+            }
+        }
+    }
+}
+
+// template <bool transpose_scale>
+void hls_DCT1DImpl_32(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    float kMultipliers_N32_0 = 0.5006029982351963;
+    float kMultipliers_N32_1 = 0.5054709598975436;
+    float kMultipliers_N32_2 = 0.5154473099226246;
+    float kMultipliers_N32_3 = 0.531042591089784;
+    float kMultipliers_N32_4 = 0.553103896034444;
+    float kMultipliers_N32_5 = 0.5829349682061339;
+    float kMultipliers_N32_6 = 0.622504123035664;
+    float kMultipliers_N32_7 = 0.674808341455005;
+    float kMultipliers_N32_8 = 0.7445362710022986;
+    float kMultipliers_N32_9 = 0.839349645415526;
+    float kMultipliers_N32_10 = 0.9725682378619608;
+    float kMultipliers_N32_11 = 1.169439933432884;
+    float kMultipliers_N32_12 = 1.4841646163141662;
+    float kMultipliers_N32_13 = 2.057781009953411;
+    float kMultipliers_N32_14 = 3.407608418468719;
+    float kMultipliers_N32_15 = 10.19000812354803;
+
+    float sqrt2 = 1.4142135623730951f;
+
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+
+    float kMultipliers_N16_0 = 0.5024192861881557;
+    float kMultipliers_N16_1 = 0.5224986149396889;
+    float kMultipliers_N16_2 = 0.5669440348163577;
+    float kMultipliers_N16_3 = 0.6468217833599901;
+    float kMultipliers_N16_4 = 0.7881546234512502;
+    float kMultipliers_N16_5 = 1.060677685990347;
+    float kMultipliers_N16_6 = 1.7224470982383342;
+    float kMultipliers_N16_7 = 5.101148618689155;
+
+    for (int i = 0; i < 32; i++) {
+#pragma HLS PIPELINE II = 30
+        float tmp32_b16_0 = in[0 + 32 * i] + in[31 + 32 * i];
+        float tmp32_b16_1 = in[1 + 32 * i] + in[30 + 32 * i];
+        float tmp32_b16_2 = in[2 + 32 * i] + in[29 + 32 * i];
+        float tmp32_b16_3 = in[3 + 32 * i] + in[28 + 32 * i];
+        float tmp32_b16_4 = in[4 + 32 * i] + in[27 + 32 * i];
+        float tmp32_b16_5 = in[5 + 32 * i] + in[26 + 32 * i];
+        float tmp32_b16_6 = in[6 + 32 * i] + in[25 + 32 * i];
+        float tmp32_b16_7 = in[7 + 32 * i] + in[24 + 32 * i];
+        float tmp32_b16_8 = in[8 + 32 * i] + in[23 + 32 * i];
+        float tmp32_b16_9 = in[9 + 32 * i] + in[22 + 32 * i];
+        float tmp32_b16_10 = in[10 + 32 * i] + in[21 + 32 * i];
+        float tmp32_b16_11 = in[11 + 32 * i] + in[20 + 32 * i];
+        float tmp32_b16_12 = in[12 + 32 * i] + in[19 + 32 * i];
+        float tmp32_b16_13 = in[13 + 32 * i] + in[18 + 32 * i];
+        float tmp32_b16_14 = in[14 + 32 * i] + in[17 + 32 * i];
+        float tmp32_b16_15 = in[15 + 32 * i] + in[16 + 32 * i];
+
+        float tmp16_0_b16 = tmp32_b16_0 + tmp32_b16_15;
+        float tmp16_1_b16 = tmp32_b16_1 + tmp32_b16_14;
+        float tmp16_2_b16 = tmp32_b16_2 + tmp32_b16_13;
+        float tmp16_3_b16 = tmp32_b16_3 + tmp32_b16_12;
+        float tmp16_4_b16 = tmp32_b16_4 + tmp32_b16_11;
+        float tmp16_5_b16 = tmp32_b16_5 + tmp32_b16_10;
+        float tmp16_6_b16 = tmp32_b16_6 + tmp32_b16_9;
+        float tmp16_7_b16 = tmp32_b16_7 + tmp32_b16_8;
+        float tmp16_8_b16 = tmp32_b16_0 - tmp32_b16_15;
+        float tmp16_9_b16 = tmp32_b16_1 - tmp32_b16_14;
+        float tmp16_10_b16 = tmp32_b16_2 - tmp32_b16_13;
+        float tmp16_11_b16 = tmp32_b16_3 - tmp32_b16_12;
+        float tmp16_12_b16 = tmp32_b16_4 - tmp32_b16_11;
+        float tmp16_13_b16 = tmp32_b16_5 - tmp32_b16_10;
+        float tmp16_14_b16 = tmp32_b16_6 - tmp32_b16_9;
+        float tmp16_15_b16 = tmp32_b16_7 - tmp32_b16_8;
+
+        float tmp8_0_b16 = tmp16_0_b16 + tmp16_7_b16;
+        float tmp8_1_b16 = tmp16_1_b16 + tmp16_6_b16;
+        float tmp8_2_b16 = tmp16_2_b16 + tmp16_5_b16;
+        float tmp8_3_b16 = tmp16_3_b16 + tmp16_4_b16;
+        float tmp8_4_b16 = tmp16_0_b16 - tmp16_7_b16;
+        float tmp8_5_b16 = tmp16_1_b16 - tmp16_6_b16;
+        float tmp8_6_b16 = tmp16_2_b16 - tmp16_5_b16;
+        float tmp8_7_b16 = tmp16_3_b16 - tmp16_4_b16;
+
+        float t00_b16 = tmp8_0_b16 + tmp8_3_b16;
+        float t01_b16 = tmp8_1_b16 + tmp8_2_b16;
+        float t02_b16 = tmp8_0_b16 - tmp8_3_b16;
+        float t03_b16 = tmp8_1_b16 - tmp8_2_b16;
+        float t04_b16 = tmp8_4_b16 * kMultipliers_N8_c1;
+        float t05_b16 = tmp8_5_b16 * kMultipliers_N8_c2;
+        float t06_b16 = tmp8_6_b16 * kMultipliers_N8_c3;
+        float t07_b16 = tmp8_7_b16 * kMultipliers_N8_c4;
+
+        float t08_b16 = t02_b16 * kMultipliers_N4_c1;
+        float t09_b16 = t03_b16 * kMultipliers_N4_c2;
+        float t10_b16 = t04_b16 + t07_b16;
+        float t11_b16 = t05_b16 + t06_b16;
+        float t12_b16 = t04_b16 - t07_b16;
+        float t13_b16 = t05_b16 - t06_b16;
+
+        float t14_b16 = t08_b16 + t09_b16;
+        float t15_b16 = t10_b16 + t11_b16;
+        float t16_b16 = t08_b16 - t09_b16;
+        float t17_b16 = t10_b16 - t11_b16;
+
+        float t18_b16 = t12_b16 * kMultipliers_N4_c1;
+        float t19_b16 = t13_b16 * kMultipliers_N4_c2;
+        float t20_b16 = t14_b16 * sqrt2;
+        float t21_b16 = t15_b16 * sqrt2;
+
+        float t22_b16 = t18_b16 + t19_b16;
+        float t23_b16 = t18_b16 - t19_b16;
+
+        float t24_b16 = t22_b16 * sqrt2;
+
+        float t25_b16 = t24_b16 + t23_b16;
+
+        float t26_b16 = kMultipliers_N16_0 * tmp16_8_b16;
+        float t27_b16 = kMultipliers_N16_1 * tmp16_9_b16;
+        float t28_b16 = kMultipliers_N16_2 * tmp16_10_b16;
+        float t29_b16 = kMultipliers_N16_3 * tmp16_11_b16;
+        float t30_b16 = kMultipliers_N16_4 * tmp16_12_b16;
+        float t31_b16 = kMultipliers_N16_5 * tmp16_13_b16;
+        float t32_b16 = kMultipliers_N16_6 * tmp16_14_b16;
+        float t33_b16 = kMultipliers_N16_7 * tmp16_15_b16;
+
+        float dmp8_0_b16 = t26_b16 + t33_b16;
+        float dmp8_1_b16 = t27_b16 + t32_b16;
+        float dmp8_2_b16 = t28_b16 + t31_b16;
+        float dmp8_3_b16 = t29_b16 + t30_b16;
+        float dmp8_4_b16 = t26_b16 - t33_b16;
+        float dmp8_5_b16 = t27_b16 - t32_b16;
+        float dmp8_6_b16 = t28_b16 - t31_b16;
+        float dmp8_7_b16 = t29_b16 - t30_b16;
+
+        float d00_b16 = dmp8_0_b16 + dmp8_3_b16;
+        float d01_b16 = dmp8_1_b16 + dmp8_2_b16;
+        float d02_b16 = dmp8_0_b16 - dmp8_3_b16;
+        float d03_b16 = dmp8_1_b16 - dmp8_2_b16;
+        float d04_b16 = dmp8_4_b16 * kMultipliers_N8_c1;
+        float d05_b16 = dmp8_5_b16 * kMultipliers_N8_c2;
+        float d06_b16 = dmp8_6_b16 * kMultipliers_N8_c3;
+        float d07_b16 = dmp8_7_b16 * kMultipliers_N8_c4;
+
+        float d08_b16 = d02_b16 * kMultipliers_N4_c1;
+        float d09_b16 = d03_b16 * kMultipliers_N4_c2;
+        float d10_b16 = d04_b16 + d07_b16;
+        float d11_b16 = d05_b16 + d06_b16;
+        float d12_b16 = d04_b16 - d07_b16;
+        float d13_b16 = d05_b16 - d06_b16;
+
+        float d14_b16 = d08_b16 + d09_b16;
+        float d15_b16 = d10_b16 + d11_b16;
+        float d16_b16 = d08_b16 - d09_b16;
+        float d17_b16 = d10_b16 - d11_b16;
+
+        float d18_b16 = d12_b16 * kMultipliers_N4_c1;
+        float d19_b16 = d13_b16 * kMultipliers_N4_c2;
+        float d20_b16 = d14_b16 * sqrt2;
+        float d21_b16 = d15_b16 * sqrt2;
+
+        float d22_b16 = d18_b16 + d19_b16;
+        float d23_b16 = d18_b16 - d19_b16;
+
+        float d24_b16 = d22_b16 * sqrt2;
+
+        float d25_b16 = d24_b16 + d23_b16;
+
+        float d26_b16 = d00_b16 + d01_b16;
+        float d27_b16 = d21_b16 + d25_b16;
+        float d28_b16 = d20_b16 + d16_b16;
+        float d29_b16 = d25_b16 + d17_b16;
+        float d30_b16 = d00_b16 - d01_b16;
+        float d31_b16 = d17_b16 + d23_b16;
+        float d32_b16 = d26_b16 * sqrt2;
+
+        float tmp32_b16_out1_0 = t00_b16 + t01_b16;
+        float tmp32_b16_out1_1 = d32_b16 + d27_b16;
+        float tmp32_b16_out1_2 = t21_b16 + t25_b16;
+        float tmp32_b16_out1_3 = d27_b16 + d28_b16;
+        float tmp32_b16_out1_4 = t20_b16 + t16_b16;
+        float tmp32_b16_out1_5 = d28_b16 + d29_b16;
+        float tmp32_b16_out1_6 = t25_b16 + t17_b16;
+        float tmp32_b16_out1_7 = d29_b16 + d30_b16;
+        float tmp32_b16_out1_8 = t00_b16 - t01_b16;
+        float tmp32_b16_out1_9 = d30_b16 + d31_b16;
+        float tmp32_b16_out1_10 = t17_b16 + t23_b16;
+        float tmp32_b16_out1_11 = d31_b16 + d16_b16;
+        float tmp32_b16_out1_12 = t16_b16;
+        float tmp32_b16_out1_13 = d16_b16 + d23_b16;
+        float tmp32_b16_out1_14 = t23_b16;
+        float tmp32_b16_out1_15 = d23_b16;
+
+        float tmp32_b32_add_sub_16 = in[0 + 32 * i] - in[31 + 32 * i];
+        float tmp32_b32_add_sub_17 = in[1 + 32 * i] - in[30 + 32 * i];
+        float tmp32_b32_add_sub_18 = in[2 + 32 * i] - in[29 + 32 * i];
+        float tmp32_b32_add_sub_19 = in[3 + 32 * i] - in[28 + 32 * i];
+        float tmp32_b32_add_sub_20 = in[4 + 32 * i] - in[27 + 32 * i];
+        float tmp32_b32_add_sub_21 = in[5 + 32 * i] - in[26 + 32 * i];
+        float tmp32_b32_add_sub_22 = in[6 + 32 * i] - in[25 + 32 * i];
+        float tmp32_b32_add_sub_23 = in[7 + 32 * i] - in[24 + 32 * i];
+        float tmp32_b32_add_sub_24 = in[8 + 32 * i] - in[23 + 32 * i];
+        float tmp32_b32_add_sub_25 = in[9 + 32 * i] - in[22 + 32 * i];
+        float tmp32_b32_add_sub_26 = in[10 + 32 * i] - in[21 + 32 * i];
+        float tmp32_b32_add_sub_27 = in[11 + 32 * i] - in[20 + 32 * i];
+        float tmp32_b32_add_sub_28 = in[12 + 32 * i] - in[19 + 32 * i];
+        float tmp32_b32_add_sub_29 = in[13 + 32 * i] - in[18 + 32 * i];
+        float tmp32_b32_add_sub_30 = in[14 + 32 * i] - in[17 + 32 * i];
+        float tmp32_b32_add_sub_31 = in[15 + 32 * i] - in[16 + 32 * i];
+
+        float tmp32_b32_mul_16 = tmp32_b32_add_sub_16 * kMultipliers_N32_0;
+        float tmp32_b32_mul_17 = tmp32_b32_add_sub_17 * kMultipliers_N32_1;
+        float tmp32_b32_mul_18 = tmp32_b32_add_sub_18 * kMultipliers_N32_2;
+        float tmp32_b32_mul_19 = tmp32_b32_add_sub_19 * kMultipliers_N32_3;
+        float tmp32_b32_mul_20 = tmp32_b32_add_sub_20 * kMultipliers_N32_4;
+        float tmp32_b32_mul_21 = tmp32_b32_add_sub_21 * kMultipliers_N32_5;
+        float tmp32_b32_mul_22 = tmp32_b32_add_sub_22 * kMultipliers_N32_6;
+        float tmp32_b32_mul_23 = tmp32_b32_add_sub_23 * kMultipliers_N32_7;
+        float tmp32_b32_mul_24 = tmp32_b32_add_sub_24 * kMultipliers_N32_8;
+        float tmp32_b32_mul_25 = tmp32_b32_add_sub_25 * kMultipliers_N32_9;
+        float tmp32_b32_mul_26 = tmp32_b32_add_sub_26 * kMultipliers_N32_10;
+        float tmp32_b32_mul_27 = tmp32_b32_add_sub_27 * kMultipliers_N32_11;
+        float tmp32_b32_mul_28 = tmp32_b32_add_sub_28 * kMultipliers_N32_12;
+        float tmp32_b32_mul_29 = tmp32_b32_add_sub_29 * kMultipliers_N32_13;
+        float tmp32_b32_mul_30 = tmp32_b32_add_sub_30 * kMultipliers_N32_14;
+        float tmp32_b32_mul_31 = tmp32_b32_add_sub_31 * kMultipliers_N32_15;
+
+        float tmp16_0_b32 = tmp32_b32_mul_16 + tmp32_b32_mul_31;
+        float tmp16_1_b32 = tmp32_b32_mul_17 + tmp32_b32_mul_30;
+        float tmp16_2_b32 = tmp32_b32_mul_18 + tmp32_b32_mul_29;
+        float tmp16_3_b32 = tmp32_b32_mul_19 + tmp32_b32_mul_28;
+        float tmp16_4_b32 = tmp32_b32_mul_20 + tmp32_b32_mul_27;
+        float tmp16_5_b32 = tmp32_b32_mul_21 + tmp32_b32_mul_26;
+        float tmp16_6_b32 = tmp32_b32_mul_22 + tmp32_b32_mul_25;
+        float tmp16_7_b32 = tmp32_b32_mul_23 + tmp32_b32_mul_24;
+        float tmp16_8_b32 = tmp32_b32_mul_16 - tmp32_b32_mul_31;
+        float tmp16_9_b32 = tmp32_b32_mul_17 - tmp32_b32_mul_30;
+        float tmp16_10_b32 = tmp32_b32_mul_18 - tmp32_b32_mul_29;
+        float tmp16_11_b32 = tmp32_b32_mul_19 - tmp32_b32_mul_28;
+        float tmp16_12_b32 = tmp32_b32_mul_20 - tmp32_b32_mul_27;
+        float tmp16_13_b32 = tmp32_b32_mul_21 - tmp32_b32_mul_26;
+        float tmp16_14_b32 = tmp32_b32_mul_22 - tmp32_b32_mul_25;
+        float tmp16_15_b32 = tmp32_b32_mul_23 - tmp32_b32_mul_24;
+
+        float tmp8_0_b32 = tmp16_0_b32 + tmp16_7_b32;
+        float tmp8_1_b32 = tmp16_1_b32 + tmp16_6_b32;
+        float tmp8_2_b32 = tmp16_2_b32 + tmp16_5_b32;
+        float tmp8_3_b32 = tmp16_3_b32 + tmp16_4_b32;
+        float tmp8_4_b32 = tmp16_0_b32 - tmp16_7_b32;
+        float tmp8_5_b32 = tmp16_1_b32 - tmp16_6_b32;
+        float tmp8_6_b32 = tmp16_2_b32 - tmp16_5_b32;
+        float tmp8_7_b32 = tmp16_3_b32 - tmp16_4_b32;
+
+        float t00_b32 = tmp8_0_b32 + tmp8_3_b32;
+        float t01_b32 = tmp8_1_b32 + tmp8_2_b32;
+        float t02_b32 = tmp8_0_b32 - tmp8_3_b32;
+        float t03_b32 = tmp8_1_b32 - tmp8_2_b32;
+        float t04_b32 = tmp8_4_b32 * kMultipliers_N8_c1;
+        float t05_b32 = tmp8_5_b32 * kMultipliers_N8_c2;
+        float t06_b32 = tmp8_6_b32 * kMultipliers_N8_c3;
+        float t07_b32 = tmp8_7_b32 * kMultipliers_N8_c4;
+
+        float t08_b32 = t02_b32 * kMultipliers_N4_c1;
+        float t09_b32 = t03_b32 * kMultipliers_N4_c2;
+        float t10_b32 = t04_b32 + t07_b32;
+        float t11_b32 = t05_b32 + t06_b32;
+        float t12_b32 = t04_b32 - t07_b32;
+        float t13_b32 = t05_b32 - t06_b32;
+
+        float t14_b32 = t08_b32 + t09_b32;
+        float t15_b32 = t10_b32 + t11_b32;
+        float t16_b32 = t08_b32 - t09_b32;
+        float t17_b32 = t10_b32 - t11_b32;
+
+        float t18_b32 = t12_b32 * kMultipliers_N4_c1;
+        float t19_b32 = t13_b32 * kMultipliers_N4_c2;
+        float t20_b32 = t14_b32 * sqrt2;
+        float t21_b32 = t15_b32 * sqrt2;
+
+        float t22_b32 = t18_b32 + t19_b32;
+        float t23_b32 = t18_b32 - t19_b32;
+
+        float t24_b32 = t22_b32 * sqrt2;
+
+        float t25_b32 = t24_b32 + t23_b32;
+
+        float t26_b32 = kMultipliers_N16_0 * tmp16_8_b32;
+        float t27_b32 = kMultipliers_N16_1 * tmp16_9_b32;
+        float t28_b32 = kMultipliers_N16_2 * tmp16_10_b32;
+        float t29_b32 = kMultipliers_N16_3 * tmp16_11_b32;
+        float t30_b32 = kMultipliers_N16_4 * tmp16_12_b32;
+        float t31_b32 = kMultipliers_N16_5 * tmp16_13_b32;
+        float t32_b32 = kMultipliers_N16_6 * tmp16_14_b32;
+        float t33_b32 = kMultipliers_N16_7 * tmp16_15_b32;
+
+        float dmp8_0_b32 = t26_b32 + t33_b32;
+        float dmp8_1_b32 = t27_b32 + t32_b32;
+        float dmp8_2_b32 = t28_b32 + t31_b32;
+        float dmp8_3_b32 = t29_b32 + t30_b32;
+        float dmp8_4_b32 = t26_b32 - t33_b32;
+        float dmp8_5_b32 = t27_b32 - t32_b32;
+        float dmp8_6_b32 = t28_b32 - t31_b32;
+        float dmp8_7_b32 = t29_b32 - t30_b32;
+
+        float d00_b32 = dmp8_0_b32 + dmp8_3_b32;
+        float d01_b32 = dmp8_1_b32 + dmp8_2_b32;
+        float d02_b32 = dmp8_0_b32 - dmp8_3_b32;
+        float d03_b32 = dmp8_1_b32 - dmp8_2_b32;
+        float d04_b32 = dmp8_4_b32 * kMultipliers_N8_c1;
+        float d05_b32 = dmp8_5_b32 * kMultipliers_N8_c2;
+        float d06_b32 = dmp8_6_b32 * kMultipliers_N8_c3;
+        float d07_b32 = dmp8_7_b32 * kMultipliers_N8_c4;
+
+        float d08_b32 = d02_b32 * kMultipliers_N4_c1;
+        float d09_b32 = d03_b32 * kMultipliers_N4_c2;
+        float d10_b32 = d04_b32 + d07_b32;
+        float d11_b32 = d05_b32 + d06_b32;
+        float d12_b32 = d04_b32 - d07_b32;
+        float d13_b32 = d05_b32 - d06_b32;
+
+        float d14_b32 = d08_b32 + d09_b32;
+        float d15_b32 = d10_b32 + d11_b32;
+        float d16_b32 = d08_b32 - d09_b32;
+        float d17_b32 = d10_b32 - d11_b32;
+
+        float d18_b32 = d12_b32 * kMultipliers_N4_c1;
+        float d19_b32 = d13_b32 * kMultipliers_N4_c2;
+        float d20_b32 = d14_b32 * sqrt2;
+        float d21_b32 = d15_b32 * sqrt2;
+
+        float d22_b32 = d18_b32 + d19_b32;
+        float d23_b32 = d18_b32 - d19_b32;
+
+        float d24_b32 = d22_b32 * sqrt2;
+
+        float d25_b32 = d24_b32 + d23_b32;
+
+        float d26_b32 = d00_b32 + d01_b32;
+        float d27_b32 = d21_b32 + d25_b32;
+        float d28_b32 = d20_b32 + d16_b32;
+        float d29_b32 = d25_b32 + d17_b32;
+        float d30_b32 = d00_b32 - d01_b32;
+        float d31_b32 = d17_b32 + d23_b32;
+        float d32_b32 = d26_b32 * sqrt2;
+
+        float tmp32_b32_add_out2_16 = t00_b32 + t01_b32;
+        float tmp32_b32_add_out2_17 = d32_b32 + d27_b32;
+        float tmp32_b32_add_out2_18 = t21_b32 + t25_b32;
+        float tmp32_b32_add_out2_19 = d27_b32 + d28_b32;
+        float tmp32_b32_add_out2_20 = t20_b32 + t16_b32;
+        float tmp32_b32_add_out2_21 = d28_b32 + d29_b32;
+        float tmp32_b32_add_out2_22 = t25_b32 + t17_b32;
+        float tmp32_b32_add_out2_23 = d29_b32 + d30_b32;
+        float tmp32_b32_add_out2_24 = t00_b32 - t01_b32;
+        float tmp32_b32_add_out2_25 = d30_b32 + d31_b32;
+        float tmp32_b32_add_out2_26 = t17_b32 + t23_b32;
+        float tmp32_b32_add_out2_27 = d31_b32 + d16_b32;
+        float tmp32_b32_add_out2_28 = t16_b32;
+        float tmp32_b32_add_out2_29 = d16_b32 + d23_b32;
+        float tmp32_b32_add_out2_30 = t23_b32;
+        float tmp32_b32_add_out2_31 = d23_b32;
+
+        float tmp32_b32_out2_16 = tmp32_b32_add_out2_16 * sqrt2 + tmp32_b32_add_out2_17;
+        float tmp32_b32_out2_17 = tmp32_b32_add_out2_17 + tmp32_b32_add_out2_18;
+        float tmp32_b32_out2_18 = tmp32_b32_add_out2_18 + tmp32_b32_add_out2_19;
+        float tmp32_b32_out2_19 = tmp32_b32_add_out2_19 + tmp32_b32_add_out2_20;
+        float tmp32_b32_out2_20 = tmp32_b32_add_out2_20 + tmp32_b32_add_out2_21;
+        float tmp32_b32_out2_21 = tmp32_b32_add_out2_21 + tmp32_b32_add_out2_22;
+        float tmp32_b32_out2_22 = tmp32_b32_add_out2_22 + tmp32_b32_add_out2_23;
+        float tmp32_b32_out2_23 = tmp32_b32_add_out2_23 + tmp32_b32_add_out2_24;
+        float tmp32_b32_out2_24 = tmp32_b32_add_out2_24 + tmp32_b32_add_out2_25;
+        float tmp32_b32_out2_25 = tmp32_b32_add_out2_25 + tmp32_b32_add_out2_26;
+        float tmp32_b32_out2_26 = tmp32_b32_add_out2_26 + tmp32_b32_add_out2_27;
+        float tmp32_b32_out2_27 = tmp32_b32_add_out2_27 + tmp32_b32_add_out2_28;
+        float tmp32_b32_out2_28 = tmp32_b32_add_out2_28 + tmp32_b32_add_out2_29;
+        float tmp32_b32_out2_29 = tmp32_b32_add_out2_29 + tmp32_b32_add_out2_30;
+        float tmp32_b32_out2_30 = tmp32_b32_add_out2_30 + tmp32_b32_add_out2_31;
+        float tmp32_b32_out2_31 = tmp32_b32_add_out2_31;
+
+        out[0 + 32 * i] = tmp32_b16_out1_0;
+        out[2 + 32 * i] = tmp32_b16_out1_1;
+        out[4 + 32 * i] = tmp32_b16_out1_2;
+        out[6 + 32 * i] = tmp32_b16_out1_3;
+        out[8 + 32 * i] = tmp32_b16_out1_4;
+        out[10 + 32 * i] = tmp32_b16_out1_5;
+        out[12 + 32 * i] = tmp32_b16_out1_6;
+        out[14 + 32 * i] = tmp32_b16_out1_7;
+        out[16 + 32 * i] = tmp32_b16_out1_8;
+        out[18 + 32 * i] = tmp32_b16_out1_9;
+        out[20 + 32 * i] = tmp32_b16_out1_10;
+        out[22 + 32 * i] = tmp32_b16_out1_11;
+        out[24 + 32 * i] = tmp32_b16_out1_12;
+        out[26 + 32 * i] = tmp32_b16_out1_13;
+        out[28 + 32 * i] = tmp32_b16_out1_14;
+        out[30 + 32 * i] = tmp32_b16_out1_15;
+
+        out[1 + 32 * i] = tmp32_b32_out2_16;
+        out[3 + 32 * i] = tmp32_b32_out2_17;
+        out[5 + 32 * i] = tmp32_b32_out2_18;
+        out[7 + 32 * i] = tmp32_b32_out2_19;
+        out[9 + 32 * i] = tmp32_b32_out2_20;
+        out[11 + 32 * i] = tmp32_b32_out2_21;
+        out[13 + 32 * i] = tmp32_b32_out2_22;
+        out[15 + 32 * i] = tmp32_b32_out2_23;
+        out[17 + 32 * i] = tmp32_b32_out2_24;
+        out[19 + 32 * i] = tmp32_b32_out2_25;
+        out[21 + 32 * i] = tmp32_b32_out2_26;
+        out[23 + 32 * i] = tmp32_b32_out2_27;
+        out[25 + 32 * i] = tmp32_b32_out2_28;
+        out[27 + 32 * i] = tmp32_b32_out2_29;
+        out[29 + 32 * i] = tmp32_b32_out2_30;
+        out[31 + 32 * i] = tmp32_b32_out2_31;
+    }
+}
+
+void hls_IDCT1D_32(float from[16], float to[16]) {
+#pragma HLS INLINE off
+    float IDCT_kMUltipliers_N4_0 = 0.541196100146197;
+    float IDCT_kMUltipliers_N4_1 = 1.3065629648763764;
+    float sqrt2 = 1.4142135623730951f;
+
+    for (int i = 0; i < 4; i++) {
+#pragma HLS PIPELINE II = 128
+        float* from_addr = &from[i];
+        float* to_addr = &to[i];
+
+        float tmp_IDCT_in_0 = from[i + 0];
+        float tmp_IDCT_in_1 = from[i + 8];
+        float tmp_IDCT_in_2 = from[i + 4];
+        float tmp_IDCT_in_3 = from[i + 12];
+
+        float tmp_IDCT_add_0 = tmp_IDCT_in_0 + tmp_IDCT_in_1;
+        float in1_dct = tmp_IDCT_in_2 * sqrt2;
+        float tmp_IDCT_add_1 = tmp_IDCT_in_0 - tmp_IDCT_in_1;
+        float in2_dct = tmp_IDCT_in_3 + tmp_IDCT_in_2;
+
+        float tmp_IDCT_add_2 = in1_dct + in2_dct;
+        float tmp_IDCT_add_3 = in1_dct - in2_dct;
+
+        to[i + 0] = IDCT_kMUltipliers_N4_0 * tmp_IDCT_add_2 + tmp_IDCT_add_0;
+        to[i + 4] = IDCT_kMUltipliers_N4_1 * tmp_IDCT_add_3 + tmp_IDCT_add_1;
+        to[i + 8] = tmp_IDCT_add_1 - IDCT_kMUltipliers_N4_1 * tmp_IDCT_add_3;
+        to[i + 12] = tmp_IDCT_add_0 - IDCT_kMUltipliers_N4_0 * tmp_IDCT_add_2;
+    }
+}
+
+void hls_idct32_scale_2d(float in[16], float out[16]) {
+#pragma HLS INLINE off
+
+Loop_idct32_1:
+    for (int y = 0; y < 4; y++) {
+    Loop_idct32_2:
+        for (int x = 0; x < 4; x++) {
+#pragma HLS PIPELINE
+            float resampley;
+            float resamplex;
+            if (x == 0) {
+                resamplex = 1;
+            } else if (x == 1) {
+                resamplex = 0.974886834621429443359375;
+            } else if (x == 2) {
+                resamplex = 0.901764214038848876953125;
+            } else if (x == 3) {
+                resamplex = 0.78705489635467529296875;
+            }
+            if (y == 0) {
+                resampley = 1;
+            } else if (y == 1) {
+                resampley = 0.974886834621429443359375;
+            } else if (y == 2) {
+                resampley = 0.901764214038848876953125;
+            } else if (y == 3) {
+                resampley = 0.78705489635467529296875;
+            }
+            out[y * 4 + x] = in[y * 4 + x] * resampley * resamplex;
+        }
+    }
+}
+
+void hls_idct_transpose4x4(float in[16], float out[16]) {
+#pragma HLS INLINE off
+Loop_idct_transpose:
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+#pragma HLS PIPELINE
+            out[i * 4 + j] = in[j * 4 + i];
+        }
+    }
+}
+
+void load_dct32(hls::stream<float>& opsin32x32_stream, float from[1024]) {
+#pragma HLS INLINE off
+load_dct32:
+    for (int m = 0; m < 32; m++) {
+        for (int n = 0; n < 32; n++) {
+#pragma HLS PIPELINE II = 1
+            int addr = n * 32 + m; // m * 32 + n;
+            from[addr] = opsin32x32_stream.read();
+        }
+    }
+}
+
+void transpose_dct32(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 32; i++) {
+        for (int j = 0; j < 32; j++) {
+#pragma HLS PIPELINE II = 1
+            out[32 * j + i] = in[i * 32 + j];
+        }
+    }
+}
+
+void transpose_scaled_dct32(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 32; i++) {
+        for (int j = 0; j < 32; j++) {
+#pragma HLS PIPELINE II = 1
+            float mul = 1.0f / 32.0f;
+            float temp = mul * in[i * 32 + j];
+            out[32 * j + i] = temp;
+        }
+    }
+}
+
+void split_ac_dc_dct32(float in[1024], float to_ac[1024], float to_dc[16]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 32; i++) {
+        for (int j = 0; j < 32; j++) {
+#pragma HLS PIPELINE II = 1
+            to_ac[32 * i + j] = in[32 * i + j];
+
+            if (j < 4 && i < 4) {
+                to_dc[i * 4 + j] = in[32 * i + j];
+            }
+        }
+    }
+}
+
+void scaled_dct32(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 32; i++) {
+        for (int j = 0; j < 32; j++) {
+#pragma HLS PIPELINE II = 1
+            float mul = 1.0f / 32.0f;
+            out[32 * i + j] = mul * in[i * 32 + j];
+        }
+    }
+}
+
+void feed_dct32_ac(uint32_t x32,
+                   uint32_t y32,
+                   hls::stream<uint8_t>& stream_rectx32,
+                   hls::stream<uint8_t>& stream_recty32,
+                   float in[1024],
+                   hls::stream<float>& ac_coef32x32_stream) {
+#pragma HLS INLINE off
+    uint32_t rect_xsize;
+    uint32_t rect_ysize;
+loop_feed_dct32_ac:
+    for (int m = 0; m < 1024; m++) {
+#pragma HLS PIPELINE II = 1
+        if (m == 0) {
+            rect_xsize = stream_rectx32.read();
+            rect_ysize = stream_recty32.read();
+        }
+        if ((4 * x32 + 3) < rect_xsize && (4 * y32 + 3) < rect_ysize) {
+            // printf("feed ac %d %d %d\n", in[m], rect_xsize, rect_ysize);
+            ac_coef32x32_stream.write(in[m]);
+        }
+    }
+}
+
+void feed_dct32_dc(uint32_t x32,
+                   uint32_t y32,
+                   hls::stream<uint8_t>& stream_rectx32,
+                   hls::stream<uint8_t>& stream_recty32,
+                   float dc_mem[16],
+                   hls::stream<float>& dc_coef32x32_stream) {
+#pragma HLS INLINE off
+    uint32_t rect_xsize;
+    uint32_t rect_ysize;
+loop_feed_dct32_dc:
+    for (int m = 0; m < 16; m++) {
+#pragma HLS PIPELINE II = 1
+        if (m == 0) {
+            rect_xsize = stream_rectx32.read();
+            rect_ysize = stream_recty32.read();
+        }
+        if ((4 * x32 + 3) < rect_xsize && (4 * y32 + 3) < rect_ysize) {
+            dc_coef32x32_stream.write(dc_mem[m]);
+        }
+    }
+}
+
+void hls_dct32x32_module(unsigned ysize,
+                         unsigned xsize,
+                         hls::stream<uint8_t>& stream_recty32,
+                         hls::stream<uint8_t>& stream_rectx32,
+                         hls::stream<uint8_t>& stream_recty32_1,
+                         hls::stream<uint8_t>& stream_rectx32_1,
+                         hls::stream<float>& opsin32x32_stream,
+                         hls::stream<float>& ac_coef32x32_stream,
+                         hls::stream<float>& dc_coef32x32_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+
+    int ysize64 = tile_ysize / 64;
+    int xsize64 = tile_xsize / 64;
+
+    float from[1024];
+#pragma HLS bind_storage variable = from type = ram_2p impl = bram
+    float temp0[1024];
+#pragma HLS bind_storage variable = temp0 type = ram_2p impl = bram
+    float temp1[1024];
+#pragma HLS bind_storage variable = temp1 type = ram_2p impl = bram
+    float temp2[1024];
+#pragma HLS bind_storage variable = temp2 type = ram_2p impl = bram
+    float temp3[1024];
+#pragma HLS bind_storage variable = temp3 type = ram_2p impl = bram
+    float temp4[1024];
+#pragma HLS bind_storage variable = temp4 type = ram_2p impl = bram
+    float temp5[1024];
+#pragma HLS bind_storage variable = temp4 type = ram_2p impl = bram
+    float to_ac[1024];
+#pragma HLS bind_storage variable = to_ac type = ram_2p impl = bram
+    float to_dc[16];
+#pragma HLS bind_storage variable = to_dc type = ram_2p impl = bram
+    float dc_mem[16];
+#pragma HLS bind_storage variable = to_dc type = ram_2p impl = bram
+    float dc_temp0[16];
+#pragma HLS bind_storage variable = temp0 type = ram_2p impl = bram
+    float dc_temp1[16];
+#pragma HLS bind_storage variable = temp1 type = ram_2p impl = bram
+    float dc_temp2[16];
+#pragma HLS bind_storage variable = temp2 type = ram_2p impl = bram
+
+loop_dct32_tile_y:
+    for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+        for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+            for (uint32_t y32 = 0; y32 < 2; y32++) {
+                for (uint32_t x32 = 0; x32 < 2; x32++) {
+                    for (int c = 0; c < 3; c++) {
+#pragma HLS DATAFLOW
+                        load_dct32(opsin32x32_stream, from);
+                        hls_DCT1DImpl_32(from, temp1);
+                        scaled_dct32(temp1, temp2);
+                        transpose_dct32(temp2, temp3);
+                        hls_DCT1DImpl_32(temp3, temp4);
+                        transpose_scaled_dct32(temp4, temp5);
+                        split_ac_dc_dct32(temp5, to_ac, to_dc);
+                        // output ac_coeff dct32
+                        feed_dct32_ac(x32, y32, stream_rectx32, stream_recty32, to_ac, ac_coef32x32_stream);
+                        // ouput dc_coeff dct32
+                        hls_idct32_scale_2d(to_dc, dc_temp0);
+                        hls_IDCT1D_32(dc_temp0, dc_temp1);
+                        hls_idct_transpose4x4(dc_temp1, dc_temp2);
+                        hls_IDCT1D_32(dc_temp2, dc_mem);
+                        feed_dct32_dc(x32, y32, stream_rectx32_1, stream_recty32_1, dc_mem, dc_coef32x32_stream);
+                    }
+                }
+            }
+        }
+    }
+}
+
+int Div_Ceil2(int a, int b) {
+#pragma HLS inline
+    return (a + b - 1) / b;
+}
+
+void GetRectSizeDCT(short xsize,
+                    short ysize,
+                    hls::stream<uint8_t>& stream_rectx_dct,
+                    hls::stream<uint8_t>& stream_recty_dct,
+                    hls::stream<uint8_t>& stream_rectx32,
+                    hls::stream<uint8_t>& stream_recty32,
+                    hls::stream<uint8_t>& stream_rectx32_1,
+                    hls::stream<uint8_t>& stream_recty32_1,
+                    hls::stream<uint8_t>& stream_rectx16,
+                    hls::stream<uint8_t>& stream_recty16,
+                    hls::stream<uint8_t>& stream_rectx16_1,
+                    hls::stream<uint8_t>& stream_recty16_1,
+                    hls::stream<uint8_t>& stream_rectx8,
+                    hls::stream<uint8_t>& stream_recty8,
+                    hls::stream<uint8_t>& stream_rectx8_1,
+                    hls::stream<uint8_t>& stream_recty8_1) {
+    uint16_t xsize_blocks = xsize / 8;
+    uint16_t ysize_blocks = ysize / 8;
+LOOP_0:
+    for (uint16_t y = 0; y < Div_Ceil2(ysize_blocks, 8); y++) {
+    LOOP_1:
+        for (uint16_t x = 0; x < Div_Ceil2(xsize_blocks, 8); x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            // uint16_t by = y * 8;
+            // uint16_t by1 = ((y + 1) * 8) < ysize_blocks ? ((y + 1) * 8) : ysize_blocks;
+            // uint16_t bx = x * 8;
+            // uint16_t bx1 = ((x + 1) * 8) < xsize_blocks ? ((x + 1) * 8) : xsize_blocks;
+            // uint8_t rect_ysize = by1 - by;
+            // uint8_t rect_xsize = bx1 - bx;
+            uint8_t rect_ysize = stream_recty_dct.read();
+            uint8_t rect_xsize = stream_rectx_dct.read();
+            // printf("rect_xsize=%d, rect_ysize=%d\n", rect_xsize, rect_ysize);
+            for (int i = 0; i < 192; i++) {
+                if (i < 12) {
+                    stream_rectx32.write(rect_xsize);
+                    stream_recty32.write(rect_ysize);
+                    stream_rectx32_1.write(rect_xsize);
+                    stream_recty32_1.write(rect_ysize);
+                }
+                if (i < 48) {
+                    stream_rectx16.write(rect_xsize);
+                    stream_recty16.write(rect_ysize);
+                    stream_rectx16_1.write(rect_xsize);
+                    stream_recty16_1.write(rect_ysize);
+                }
+                stream_rectx8.write(rect_xsize);
+                stream_recty8.write(rect_ysize);
+                stream_rectx8_1.write(rect_xsize);
+                stream_recty8_1.write(rect_ysize);
+            }
+        }
+    }
+}
+
+void hls_dct_top(unsigned ysize,
+                 unsigned xsize,
+                 hls::stream<uint8_t>& stream_rectx_dct,
+                 hls::stream<uint8_t>& stream_recty_dct,
+                 hls::stream<float>& opsin8x8_stream,
+                 hls::stream<float>& opsin16x16_stream,
+                 hls::stream<float>& opsin32x32_stream,
+                 hls::stream<float>& ac_coef8x8_stream,
+                 hls::stream<float>& ac_coef16x16_stream,
+                 hls::stream<float>& ac_coef32x32_stream,
+                 hls::stream<float>& dc_coef8x8_stream,
+                 hls::stream<float>& dc_coef16x16_stream,
+                 hls::stream<float>& dc_coef32x32_stream) {
+    // #pragma HLS INLINE
+    hls::stream<uint8_t, 1024> stream_rectx32;
+    hls::stream<uint8_t, 1024> stream_recty32;
+    hls::stream<uint8_t, 1024> stream_rectx32_1;
+    hls::stream<uint8_t, 1024> stream_recty32_1;
+    hls::stream<uint8_t, 1024> stream_rectx16;
+    hls::stream<uint8_t, 1024> stream_recty16;
+    hls::stream<uint8_t, 1024> stream_rectx16_1;
+    hls::stream<uint8_t, 1024> stream_recty16_1;
+    hls::stream<uint8_t, 1024> stream_rectx8;
+    hls::stream<uint8_t, 1024> stream_recty8;
+    hls::stream<uint8_t, 1024> stream_rectx8_1;
+    hls::stream<uint8_t, 1024> stream_recty8_1;
+// #pragma HLS DATAFLOW
+#pragma HLS INLINE
+    GetRectSizeDCT(xsize, ysize, stream_rectx_dct, stream_recty_dct, stream_rectx32, stream_recty32, stream_rectx32_1,
+                   stream_recty32_1, stream_rectx16, stream_recty16, stream_rectx16_1, stream_recty16_1, stream_rectx8,
+                   stream_recty8, stream_rectx8_1, stream_recty8_1);
+    hls_dct8x8_module(ysize, xsize, stream_recty8, stream_rectx8, stream_recty8_1, stream_rectx8_1, opsin8x8_stream,
+                      ac_coef8x8_stream, dc_coef8x8_stream);
+    hls_dct16x16_module(ysize, xsize, stream_recty16, stream_rectx16, stream_recty16_1, stream_rectx16_1,
+                        opsin16x16_stream, ac_coef16x16_stream, dc_coef16x16_stream);
+    hls_dct32x32_module(ysize, xsize, stream_recty32, stream_rectx32, stream_recty32_1, stream_rectx32_1,
+                        opsin32x32_stream, ac_coef32x32_stream, dc_coef32x32_stream);
+}
+
+//-----------------------acs_heuristic---------------------//
+
+int Div_Ceil(int a, int b) {
+#pragma HLS inline
+    return (a + b - 1) / b;
+}
+
+float EvalRationalPolynomial3_2(float x, float p[3], float q[3]) {
+    float yp = p[2];
+    float yq = q[2];
+    yp = (yp * x) + p[1];
+    yq = (yq * x) + q[1];
+    yp = (yp * x) + p[0];
+    yq = (yq * x) + q[0];
+    return yp / yq;
+}
+
+float FastLog2f_HLS2(float x) {
+    union {
+        float x_f;
+        int x_i;
+    } u = {x};
+    float p[3] = {-1.8503833400518310E-06f, 1.4287160470083755E+00f, 7.4245873327820566E-01f};
+    float q[3] = {9.9032814277590719E-01f, 1.0096718572241148E+00f, 1.7409343003366853E-01f};
+    int x_bits = u.x_i;
+    int exp_bits = x_bits - 0x3f2aaaab; // = 2/3
+    int exp_shifted = exp_bits >> 23;
+    int result0 = exp_shifted << 23;
+    int result = x_bits - result0;
+    u.x_i = result;
+    float mantissa = u.x_f;
+    float exp_val = static_cast<float>(exp_shifted);
+    float output = EvalRationalPolynomial3_2(mantissa - 1.0f, p, q) + exp_val;
+    return output;
+}
+
+float FastPow2f_HLS(float x) {
+    int floorx = floor(x);
+    int tmp = ((floorx + 127) << 23);
+    union {
+        float x_f;
+        int x_i;
+    } u;
+    u.x_i = tmp;
+    float exp = u.x_f;
+    float frac = x - floorx;
+    float num = frac + 1.01749063e+01;
+    num = num * frac + 4.88687798e+01;
+    num = num * frac + 9.85506591e+01;
+    num = num * exp;
+    float den = frac * 2.10242958e-01 - 2.22328856e-02;
+    den = den * frac - 1.94414990e+01;
+    den = den * frac + 9.85506633e+01;
+    return num / den;
+}
+
+float FastPowf_HLS(float base, float exponent) {
+    return FastPow2f_HLS(FastLog2f_HLS2(base) * exponent);
+}
+
+int CeilLog2NonzeroHLS(ap_int<32> x) {
+    int leading_zeros = x.countLeadingZeros();
+    int floor_log2 = 63 ^ (leading_zeros + 32);
+    if ((x & (x - 1)) != 0) {
+        floor_log2 = floor_log2 + 1;
+    }
+    return floor_log2;
+}
+
+void GetACSSize(short xsize,
+                short ysize,
+                hls::stream<uint8_t>& stream_rectx_acs,
+                hls::stream<uint8_t>& stream_recty_acs,
+                hls::stream<uint8_t>& stream_rectx0,
+                hls::stream<uint8_t>& stream_recty0,
+                hls::stream<uint8_t>& stream_rectx1,
+                hls::stream<uint8_t>& stream_recty1,
+                hls::stream<uint8_t>& stream_rectx2,
+                hls::stream<uint8_t>& stream_recty2,
+                hls::stream<uint8_t>& stream_rectx3,
+                hls::stream<uint8_t>& stream_recty3,
+                hls::stream<uint8_t>& stream_rectx10,
+                hls::stream<uint8_t>& stream_recty10) {
+    uint16_t xsize_blocks = xsize / 8;
+    uint16_t ysize_blocks = ysize / 8;
+LOOP_0:
+    for (uint16_t y = 0; y < Div_Ceil(ysize_blocks, 8); y++) {
+    LOOP_1:
+        for (uint16_t x = 0; x < Div_Ceil(xsize_blocks, 8); x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            // uint16_t by = y * 8;
+            // uint16_t by1 = ((y + 1) * 8) < ysize_blocks ? ((y + 1) * 8) : ysize_blocks;
+            // uint16_t bx = x * 8;
+            // uint16_t bx1 = ((x + 1) * 8) < xsize_blocks ? ((x + 1) * 8) : xsize_blocks;
+            // uint8_t rect_ysize = by1 - by;
+            // uint8_t rect_xsize = bx1 - bx;
+            uint8_t rect_ysize = stream_recty_acs.read();
+            uint8_t rect_xsize = stream_rectx_acs.read();
+            stream_rectx0.write(rect_xsize);
+            stream_recty0.write(rect_ysize);
+            stream_rectx1.write(rect_xsize);
+            stream_recty1.write(rect_ysize);
+            stream_rectx2.write(rect_xsize);
+            stream_recty2.write(rect_ysize);
+            stream_rectx3.write(rect_xsize);
+            stream_recty3.write(rect_ysize);
+            stream_rectx10.write(rect_xsize);
+            stream_recty10.write(rect_ysize);
+        }
+    }
+}
+
+void DupQuantAndMask(uint16_t num_tile,
+                     hls::stream<uint8_t>& stream_rectx,
+                     hls::stream<uint8_t>& stream_recty,
+                     hls::stream<float>& stream_q_org,
+                     hls::stream<float>& stream_mask_org,
+                     hls::stream<float>& stream_q_org_8,
+                     hls::stream<float>& stream_mask_org_8,
+                     hls::stream<float>& stream_q_org_16,
+                     hls::stream<float>& stream_mask_org_16,
+                     hls::stream<float>& stream_q_org_32,
+                     hls::stream<float>& stream_mask_org_32) {
+DUP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+    DUP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        DUP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+                // do computation once for 16 and 32
+                float tmp0 = stream_q_org.read();
+                stream_q_org_8.write(tmp0);
+                tmp0 *= tmp0;
+                tmp0 *= tmp0;
+                tmp0 *= tmp0;
+                stream_q_org_16.write(tmp0);
+                stream_q_org_32.write(tmp0);
+                float tmp1 = stream_mask_org.read();
+                stream_mask_org_8.write(tmp1);
+                stream_mask_org_16.write(tmp1);
+                stream_mask_org_32.write(tmp1);
+            }
+        }
+    }
+}
+
+void GetQAndMask_8(uint16_t num_tile,
+                   hls::stream<uint8_t>& stream_rectx,
+                   hls::stream<uint8_t>& stream_recty,
+                   hls::stream<uint8_t>& stream_rectx_out,
+                   hls::stream<uint8_t>& stream_recty_out,
+                   hls::stream<float>& stream_q_org,
+                   hls::stream<float>& stream_mask_org,
+                   hls::stream<float>& stream_q,
+                   hls::stream<float>& stream_mask) {
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline II = 64
+                float quant_norm8 = 0;
+                float masking = 0;
+                quant_norm8 = stream_q_org.read();
+                stream_q.write(quant_norm8);
+                masking = 2.0f * stream_mask_org.read();
+                stream_mask.write(masking);
+            }
+        }
+    }
+}
+
+template <int N>
+void GetQAndMask_16_32(uint16_t num_tile,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<uint8_t>& stream_rectx_out,
+                       hls::stream<uint8_t>& stream_recty_out,
+                       hls::stream<float>& stream_q_org,
+                       hls::stream<float>& stream_mask_org,
+                       hls::stream<float>& stream_q,
+                       hls::stream<float>& stream_mask) {
+    uint8_t block_n = N * N;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+        float q_array[64];
+#pragma HLS BIND_STORAGE variable = q_array type = RAM_1P impl = bram
+        float mask_array[64];
+#pragma HLS BIND_STORAGE variable = mask_array type = RAM_1P impl = bram
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS loop_flatten off
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS pipeline II = 1
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS loop_flatten off
+                int index = iy * 8 + ix;
+                q_array[index] = stream_q_org.read();
+                mask_array[index] = stream_mask_org.read();
+            }
+        }
+    LOOP_3:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS loop_flatten off
+        LOOP_4:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS loop_flatten off
+                float quant_norm8 = 0;
+                float masking = 0;
+                float masking_norm2 = 0;
+                float masking_max = 0;
+            LOOP_5:
+                for (uint8_t dy = 0; dy < N; dy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS loop_flatten off
+                LOOP_6:
+                    for (uint8_t dx = 0; dx < N; dx++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS loop_flatten off
+#pragma HLS pipeline
+                        uint8_t idx = (iy + dy) * 8 + ix + dx;
+                        float qval = q_array[idx];
+                        quant_norm8 += qval;
+                        float maskval = mask_array[idx];
+                        masking_max = fmax(masking_max, maskval);
+                        masking_norm2 += maskval * maskval;
+                    }
+                }
+                quant_norm8 /= block_n;
+            // Change: use 3 sqrtf to replace FastPowf_HLS, and try to only use on sqrtf to do all things
+            // float tmp = quant_norm8;
+            // quant_norm8 = sqrtf(quant_norm8);
+            // quant_norm8 = sqrtf(quant_norm8);
+            // quant_norm8 = sqrtf(quant_norm8);
+            LOOP_7:
+                for (int dx = 0; dx < 3; dx++) {
+#pragma HLS pipeline
+                    quant_norm8 = sqrtf(quant_norm8);
+                }
+                // quant_norm8 = FastPowf_HLS(quant_norm8, 1.0f / 8.0f);
+                masking_norm2 = sqrtf(masking_norm2 / block_n);
+                masking = masking_norm2 + masking_max;
+                stream_q.write(quant_norm8);
+                stream_mask.write(masking);
+            }
+        }
+    }
+}
+
+template <int N>
+void ComputeEntropy1(uint16_t num_tile,
+                     hls::stream<uint8_t>& stream_rectx,
+                     hls::stream<uint8_t>& stream_recty,
+                     hls::stream<uint8_t>& stream_rectx_out,
+                     hls::stream<uint8_t>& stream_recty_out,
+                     hls::stream<float>& stream_q,
+                     hls::stream<float>& stream_dctin,
+#ifdef FIX
+                     hls::stream<ap_int<28> >& stream_loss,
+                     hls::stream<ap_int<44> >& stream_loss2,
+                     hls::stream<ap_int<42> >& stream_entropy,
+                     hls::stream<ap_int<11> >& stream_nzeros
+#else
+                     hls::stream<float>& stream_loss,
+                     hls::stream<float>& stream_loss2,
+                     hls::stream<float>& stream_entropy,
+                     hls::stream<float>& stream_nzeros
+#endif
+                     ) {
+    uint8_t block_n = N * N;
+    int count_array;
+    float info_loss = 0.0;
+    float info_loss2 = 0.0;
+    float entropy = 0.0;
+    float zeros_mul = 7.565053364251793f;
+    float cost2 = 4.4628149885273363f;
+    float cost_delta = 5.3359184934516337f;
+    float cmap_factor;
+    float q;
+    float entropy_v[3] = {0.0, 0.0, 0.0};
+    float nzeros_v[3] = {0.0, 0.0, 0.0};
+    float entropy_array[8];
+    float info_loss_array[8];
+    float info_loss2_array[8];
+    float nzeros_array[8];
+    float y_ram[1024];
+    float cmap_factors_init[3] = {0.0f, 0.0f, 1.0f};
+#ifdef FIX
+    ap_int<23> info_loss_fix[8];
+    ap_int<45> info_loss2_fix[8];
+    ap_int<11> nzeros_fix[8];
+    ap_int<32> y_fix_ram[1024];
+    ap_int<32> cost2_fix = (int)(cost2 * 1024);
+    ap_int<32> cost_delta_fix = (int)(cost_delta * 1024);
+    ap_int<28> info_loss_sum;
+    ap_int<44> info_loss2_sum;
+    ap_int<11> nzeros_sum;
+    ap_int<42> entropy_sum;
+#endif
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint16_t rect_ysize = stream_recty.read();
+        uint16_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+        float q_tmp[64];
+    LOOP_1:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+            LOOP_3:
+                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+                LOOP_4:
+                    for (uint16_t i = 0; i < block_n * 64; i += 1) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+#pragma HLS pipeline II = 1
+                        float in = stream_dctin.read();
+#ifdef FIX
+                        if (i == 0) {
+                            nzeros_sum = 0;
+                            entropy_sum = 0;
+                            if (c == 0) {
+                                q = stream_q.read();
+                                info_loss_sum = 0;
+                                info_loss2_sum = 0;
+                            }
+                        }
+                        ap_int<30> in_fix = in * 0x1fffffff; // exp=29
+                        float in_fix_y_tmp;
+                        if (c == 0) {
+                            y_fix_ram[i] = in_fix;
+                            in_fix_y_tmp = in_fix;
+                        } else {
+                            in_fix_y_tmp = y_fix_ram[i];
+                        }
+                        ap_int<30> in_fix_y = (c == 2) ? in_fix_y_tmp : 0;
+                        ap_int<31> in_fix_m = in_fix - in_fix_y;
+
+                        ap_uint<24> im_fix;
+                        if (N == 1) {
+                            im_fix = inv_matrix_8_fix[c][i]; // exp=10
+                        }
+                        if (N == 2) {
+                            im_fix = inv_matrix_16_fix[c][i];
+                        }
+                        if (N == 4) {
+                            im_fix = inv_matrix_32_fix[c][i];
+                        }
+
+                        ap_uint<15> rqf_fix = q * 32768;          // exp=15
+                        ap_int<55> val_tmp0 = in_fix_m * im_fix;  // exp=29+10=39
+                        ap_int<28> val_tmp1 = val_tmp0 >> 27;     // exp=39-27=12
+                        ap_int<43> val_tmp2 = val_tmp1 * rqf_fix; // exp=12+15=27
+                        ap_int<35> val_fix = val_tmp2 >> 11;      // exp=27-11=16
+
+                        // actual value is not that large, so just reduce bitwidth
+                        ap_int<11> val_shift0 = val_fix >> 15;
+                        ap_int<10> val_shift1 = val_fix >> 16;
+                        if (val_shift0.range(0, 0) == 1) {
+                            val_shift1 += 1;
+                        }
+                        ap_int<10> rval_fix = val_shift1;                          // exp=0
+                        ap_int<32> val_shift_back = val_shift1 * 65536;            // exp=16
+                        ap_uint<16> diff_fix = hls::abs(val_shift_back - val_fix); // exp=-16 hls_abs?
+                        ap_uint<32> diff_fix_square = diff_fix * diff_fix;         // exp=-32
+                        ap_uint<10> q_fix = hls::abs(rval_fix);                    // hls_abs?
+                        bool q_fix_is_zero = q_fix == 0;
+                        float entropy_tmp = (q_fix > 1 ? cost2 : 0.0f) + sqrtf(q_fix) * cost_delta;
+                        ap_uint<32> entropy_fix = (uint32_t)(entropy_tmp * 65536);
+
+                        info_loss_sum += diff_fix;
+                        info_loss2_sum += diff_fix_square;
+                        nzeros_sum += q_fix_is_zero ? 0 : 1;
+                        entropy_sum += entropy_fix;
+
+                        if (i == block_n * 64 - 1) {
+                            stream_entropy.write(entropy_sum);
+                            stream_nzeros.write(nzeros_sum);
+                        }
+                        if (i == block_n * 64 - 1 && c == 2) {
+                            stream_loss.write(info_loss_sum);
+                            stream_loss2.write(info_loss2_sum);
+                        }
+#else
+                        if (c == 0 && i == 0) {
+                            q = stream_q.read();
+                            count_array = 0;
+                        }
+                        cmap_factor = cmap_factors_init[c];
+                        float in_y_tmp;
+                        if (c == 0) {
+                            y_ram[i] = in;
+                            in_y_tmp = in;
+                        } else {
+                            in_y_tmp = y_ram[i];
+                        }
+                        float in_y = in_y_tmp * cmap_factor;
+                        float im;
+                        if (N == 1) {
+                            im = inv_matrix_8[c][i];
+                        }
+                        if (N == 2) {
+                            im = inv_matrix_16[c][i];
+                        }
+                        if (N == 4) {
+                            im = inv_matrix_32[c][i];
+                        }
+                        const float val = (in - in_y) * im * q;
+                        const int rval = roundf(val);
+                        const float diff = fabs(val - rval);
+
+                        info_loss_array[count_array] = diff;
+                        info_loss2_array[count_array] = diff * diff;
+
+                        const int q = abs(rval);
+                        const bool q_is_zero = q == 0;
+                        float tmp = (q >= 1.5f ? cost2 : 0.0f) + sqrtf(q) * cost_delta;
+                        entropy_array[count_array] = tmp;
+                        nzeros_array[count_array] = q_is_zero ? 0.0f : 1.0f;
+                        count_array++;
+                        if (count_array == 8) {
+                            float sum0 = entropy_array[0] + entropy_array[1] + entropy_array[2] + entropy_array[3] +
+                                         entropy_array[4] + entropy_array[5] + entropy_array[6] + entropy_array[7];
+                            stream_entropy.write(sum0);
+                            float sum1 = nzeros_array[0] + nzeros_array[1] + nzeros_array[2] + nzeros_array[3] +
+                                         nzeros_array[4] + nzeros_array[5] + nzeros_array[6] + nzeros_array[7];
+                            stream_nzeros.write(sum1);
+                            float sum2 = info_loss_array[0] + info_loss_array[1] + info_loss_array[2] +
+                                         info_loss_array[3] + info_loss_array[4] + info_loss_array[5] +
+                                         info_loss_array[6] + info_loss_array[7];
+                            stream_loss.write(sum2);
+                            float sum3 = info_loss2_array[0] + info_loss2_array[1] + info_loss2_array[2] +
+                                         info_loss2_array[3] + info_loss2_array[4] + info_loss2_array[5] +
+                                         info_loss2_array[6] + info_loss2_array[7];
+                            stream_loss2.write(sum3);
+                            count_array = 0;
+                        }
+#endif
+                    } // loop i
+                }
+            }
+        }
+    }
+}
+
+template <int N>
+void ComputeEntropy2(uint16_t num_tile,
+                     hls::stream<uint8_t>& stream_rectx,
+                     hls::stream<uint8_t>& stream_recty,
+                     hls::stream<uint8_t>& stream_rectx_out,
+                     hls::stream<uint8_t>& stream_recty_out,
+                     hls::stream<float>& stream_loss,
+                     hls::stream<float>& stream_loss2,
+                     hls::stream<float>& stream_entropy,
+                     hls::stream<float>& stream_nzeros,
+                     hls::stream<float>& stream_loss_sum,
+                     hls::stream<float>& stream_loss2_sum,
+                     hls::stream<float>& stream_entropy_sum,
+                     hls::stream<float>& stream_nzeros_sum) {
+    float entropy_v[3];
+    float nzeros_v[3];
+    float info_loss;
+    float info_loss2;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+    LOOP_1:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+            LOOP_3:
+                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+                LOOP_4:
+                    for (uint8_t i = 0; i < 64 * N * N / 8; i += 1) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline II = 8
+                        if (c == 0 && i == 0) {
+                            info_loss = 0.0;
+                            info_loss2 = 0.0;
+                        }
+                        if (i == 0) {
+                            entropy_v[c] = 0;
+                            nzeros_v[c] = 0;
+                        }
+                        entropy_v[c] += stream_entropy.read();
+                        nzeros_v[c] += stream_nzeros.read();
+                        info_loss += stream_loss.read();
+                        info_loss2 += stream_loss2.read();
+                        if (i == 64 * N * N / 8 - 1) {
+                            stream_entropy_sum.write(entropy_v[c]);
+                            stream_nzeros_sum.write(nzeros_v[c]);
+                        }
+                        if (c == 2 && i == 64 * N * N / 8 - 1) {
+                            stream_loss_sum.write(info_loss);
+                            stream_loss2_sum.write(info_loss2);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+template <int N>
+void ComputeEntropy3(uint16_t num_tile,
+                     float cost1,
+                     float mul,
+                     hls::stream<uint8_t>& stream_rectx,
+                     hls::stream<uint8_t>& stream_recty,
+#ifdef FIX
+                     hls::stream<ap_int<28> >& stream_loss,
+                     hls::stream<ap_int<44> >& stream_loss2,
+                     hls::stream<ap_int<42> >& stream_entropy,
+                     hls::stream<ap_int<11> >& stream_nzeros,
+#else
+                     hls::stream<float>& stream_loss_sum,
+                     hls::stream<float>& stream_loss2_sum,
+                     hls::stream<float>& stream_entropy_sum,
+                     hls::stream<float>& stream_nzeros_sum,
+#endif
+                     hls::stream<float>& stream_mask,
+                     hls::stream<float>& stream_entropy_final) {
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+    LOOP_1:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#ifdef FIX
+                float entropy = 0.0;
+                float zeros_mul = 7.565053364251793f;
+                float entropy_v[3];
+                ap_int<11> nzeros_v[3];
+                float entropy_bits[3] = {0.0, 0.0, 0.0};
+                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+#pragma HLS pipeline
+                    ap_int<42> entropy_tmp = stream_entropy.read();
+                    entropy_v[c] = entropy_tmp / 65536.0;
+                    nzeros_v[c] = stream_nzeros.read();
+                    entropy_v[c] += nzeros_v[c] * cost1;
+                    uint8_t nbits = LUTCeilLog2Nonzero[(nzeros_v[c] + 1)] + 1;
+                    entropy_bits[c] = zeros_mul * (LUTCeilLog2Nonzero[nbits + 17] + nbits);
+                }
+                entropy = entropy_v[0] + entropy_v[1] + entropy_v[2];
+                entropy += entropy_bits[0] + entropy_bits[1] + entropy_bits[2];
+                ap_int<28> tmp_loss = stream_loss.read();
+                float loss_f = tmp_loss / 65536.0;
+                ap_int<44> tmp_loss2 = stream_loss2.read();
+                float loss2_f = tmp_loss2 / 65536.0 / 65536.0;
+                float info_loss_multiplier = 138.0f;
+                float info_loss_multiplier2 = 50.46839691767866;
+                float loss = ((info_loss_multiplier * loss_f) + (info_loss_multiplier2 * N * sqrtf(loss2_f)));
+                float loss_mask = stream_mask.read() * loss;
+                float ret = entropy + loss_mask;
+                if (N == 1) {
+                    ret = 3.0f + 0.745f * ret;
+                }
+                ret = ret * mul;
+                stream_entropy_final.write(ret);
+#else
+                float entropy = 0.0;
+                float zeros_mul = 7.565053364251793f;
+                float entropy_v[3] = {0.0, 0.0, 0.0};
+                float nzeros_v[3] = {0.0, 0.0, 0.0};
+                float entropy_bits[3] = {0.0, 0.0, 0.0};
+                for (int c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+#pragma HLS pipeline
+
+                    entropy_v[c] = stream_entropy_sum.read();
+                    nzeros_v[c] = stream_nzeros_sum.read();
+                    entropy_v[c] += nzeros_v[c] * cost1;
+                    // TODO: Integer to integer, can we use look up table to implement this?
+                    // int nbits = CeilLog2NonzeroHLS(nzeros_v[c] + 1) + 1;
+                    // entropy_bits[c] = zeros_mul * (CeilLog2NonzeroHLS(nbits + 17) + nbits);
+                    int nbits = LUTCeilLog2Nonzero[(short)(nzeros_v[c] + 1)] + 1;
+                    entropy_bits[c] = zeros_mul * (LUTCeilLog2Nonzero[nbits + 17] + nbits);
+                }
+                entropy = entropy_v[0] + entropy_v[1] + entropy_v[2];
+                entropy += entropy_bits[0] + entropy_bits[1] + entropy_bits[2];
+                float tmp_loss = stream_loss_sum.read();
+                float tmp_loss2 = stream_loss2_sum.read();
+                float info_loss_multiplier = 138.0f;
+                float info_loss_multiplier2 = 50.46839691767866;
+                float ret = entropy +
+                            stream_mask.read() * ((info_loss_multiplier * tmp_loss) +
+                                                  (info_loss_multiplier2 * sqrtf((float)(N * N * tmp_loss2))));
+                if (N == 1) {
+                    ret = 3.0f + 0.745f * ret;
+                }
+                stream_entropy_final.write(ret * mul);
+#endif
+            }
+        }
+    }
+}
+
+template <int N>
+void BufferN(uint16_t num_tile,
+             float* ping,
+             float* pang,
+             hls::stream<uint8_t>& stream_rectx,
+             hls::stream<uint8_t>& stream_recty,
+             hls::stream<float>& stream_dctin,
+             hls::stream<bool>& stream_con,
+             hls::stream<bool>& stream_ok) {
+    uint8_t block = N;
+    uint8_t block_n = N * N;
+    bool flag = true;
+    uint16_t size = 4096;
+    uint8_t w = 64;
+    uint16_t total_size = 4096 * 3;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_con.write(1);
+    LOOP_1:
+        for (uint8_t jy = 0; jy < 8 / N; jy += 1) {
+        LOOP_2:
+            for (uint8_t jx = 0; jx < 8 / N; jx += 1) {
+            LOOP_3:
+                for (uint8_t c = 0; c < 3; c++) {
+                LOOP_4:
+                    for (uint8_t iy = 0; iy < 8 * N; iy += 1) {
+                    LOOP_5:
+                        for (uint8_t ix = 0; ix < 8 * N; ix += 1) {
+                            uint8_t y = jy * 8 * N + iy;
+                            uint8_t x = jx * 8 * N + ix;
+                            bool read = false;
+                            if (N == 1 && (jy < rect_ysize) && (jx < rect_xsize)) {
+                                read = true;
+                            }
+                            if (N == 2 && (jy * 2 + 1) < rect_ysize && (jx * 2 + 1) < rect_xsize) {
+                                read = true;
+                            }
+                            if (N == 4 && (jy * 4 + 3) < rect_ysize && (jx * 4 + 3) < rect_xsize) {
+                                read = true;
+                            }
+                            if (read) {
+                                float tmp = stream_dctin.read();
+#ifdef __SYNTHESIS__
+                                if (flag) {
+                                    ping[c * size + y * w + x] = tmp;
+                                } else {
+                                    pang[c * size + y * w + x] = tmp;
+                                }
+#else
+                                if (flag) {
+                                    ping[total_size * tid + c * size + y * w + x] = tmp;
+                                } else {
+                                    pang[total_size * tid + c * size + y * w + x] = tmp;
+                                }
+#endif
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        flag = !flag;
+        stream_ok.write(1);
+    }
+}
+
+void EstimateEntropy8(uint16_t num_tile,
+                      float cost1,
+                      float mul8x8,
+                      hls::stream<uint8_t>& stream_rectx,
+                      hls::stream<uint8_t>& stream_recty,
+                      hls::stream<float>& stream_q_org,
+                      hls::stream<float>& stream_mask_org,
+                      hls::stream<float>& stream_dctin,
+                      hls::stream<float>& stream_entropy_8) {
+#pragma HLS inline
+
+    hls::stream<uint8_t> stream_rectx80("rectx80");
+#pragma HLS stream variable = stream_rectx80 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx80 type = fifo
+    hls::stream<uint8_t> stream_recty80("recty80");
+#pragma HLS stream variable = stream_recty80 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty80 type = fifo
+    hls::stream<uint8_t> stream_rectx81("rectx81");
+#pragma HLS stream variable = stream_rectx81 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx81 type = fifo
+    hls::stream<uint8_t> stream_recty81("recty81");
+#pragma HLS stream variable = stream_recty81 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty81 type = fifo
+    hls::stream<uint8_t> stream_rectx82("rectx82");
+#pragma HLS stream variable = stream_rectx82 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx82 type = fifo
+    hls::stream<uint8_t> stream_recty82("recty82");
+#pragma HLS stream variable = stream_recty82 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty82 type = fifo
+
+#ifdef FIX
+    hls::stream<ap_int<28> > stream_loss("loss_8");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<ap_int<44> > stream_loss2("loss2_8");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<ap_int<42> > stream_entropy("entropy_8");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<ap_int<11> > stream_nzeros("nzeros_8");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#else
+    hls::stream<float> stream_loss("loss_8");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<float> stream_loss2("loss2_8");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<float> stream_entropy("entropy_8");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<float> stream_nzeros("nzeros_8");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#endif
+    hls::stream<float> stream_loss_sum("loss_8_sum");
+    ;
+#pragma HLS stream variable = stream_loss_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss_sum type = fifo
+    hls::stream<float> stream_loss2_sum("loss2_8_sum");
+#pragma HLS stream variable = stream_loss2_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2_sum type = fifo
+    hls::stream<float> stream_entropy_sum("entropy_8_sum");
+#pragma HLS stream variable = stream_entropy_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_sum type = fifo
+    hls::stream<float> stream_nzeros_sum("nzeros_8_sum");
+#pragma HLS stream variable = stream_nzeros_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros_sum type = fifo
+    hls::stream<float> stream_q("q_8");
+#pragma HLS stream variable = stream_q depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q type = fifo
+    hls::stream<float> stream_mask("mask_8");
+#pragma HLS stream variable = stream_mask depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask type = fifo
+
+    // #pragma HLS dataflow
+    GetQAndMask_8(num_tile, stream_rectx, stream_recty, stream_rectx80, stream_recty80, stream_q_org, stream_mask_org,
+                  stream_q, stream_mask);
+
+#ifdef FIX
+    ComputeEntropy1<1>(num_tile, stream_rectx80, stream_recty80, stream_rectx81, stream_recty81, stream_q, stream_dctin,
+                       stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy3<1>(num_tile, cost1, mul8x8, stream_rectx81, stream_recty81, stream_loss, stream_loss2,
+                       stream_entropy, stream_nzeros, stream_mask, stream_entropy_8);
+
+#else
+
+    ComputeEntropy1<1>(num_tile, stream_rectx80, stream_recty80, stream_rectx81, stream_recty81, stream_q, stream_dctin,
+                       stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy2<1>(num_tile, stream_rectx81, stream_recty81, stream_rectx82, stream_recty82, stream_loss,
+                       stream_loss2, stream_entropy, stream_nzeros, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum);
+
+    ComputeEntropy3<1>(num_tile, cost1, mul8x8, stream_rectx82, stream_recty82, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum, stream_mask, stream_entropy_8);
+#endif
+}
+
+void EstimateEntropy16(uint16_t num_tile,
+                       float cost1,
+                       float mul16x16,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<float>& stream_q_org,
+                       hls::stream<float>& stream_mask_org,
+                       hls::stream<float>& stream_dctin,
+                       hls::stream<float>& stream_entropy_16) {
+#pragma HLS inline
+
+    hls::stream<uint8_t> stream_rectx160("rectx160");
+#pragma HLS stream variable = stream_rectx160 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx160 type = fifo
+    hls::stream<uint8_t> stream_recty160("recty160");
+#pragma HLS stream variable = stream_recty160 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty160 type = fifo
+    hls::stream<uint8_t> stream_rectx161("rectx161");
+#pragma HLS stream variable = stream_rectx161 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx161 type = fifo
+    hls::stream<uint8_t> stream_recty161("recty161");
+#pragma HLS stream variable = stream_recty161 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty161 type = fifo
+    hls::stream<uint8_t> stream_rectx162("rectx162");
+#pragma HLS stream variable = stream_rectx162 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx162 type = fifo
+    hls::stream<uint8_t> stream_recty162("recty162");
+#pragma HLS stream variable = stream_recty162 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty162 type = fifo
+
+#ifdef FIX
+    hls::stream<ap_int<28> > stream_loss("loss_16");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<ap_int<44> > stream_loss2("loss2_16");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<ap_int<42> > stream_entropy("entropy_16");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<ap_int<11> > stream_nzeros("nzeros_16");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#else
+    hls::stream<float> stream_loss("loss_16");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<float> stream_loss2("loss2_16");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<float> stream_entropy("entropy_16");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<float> stream_nzeros("nzeros_16");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#endif
+    hls::stream<float> stream_loss_sum("loss_16_sum");
+    ;
+#pragma HLS stream variable = stream_loss_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss_sum type = fifo
+    hls::stream<float> stream_loss2_sum("loss2_16_sum");
+#pragma HLS stream variable = stream_loss2_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2_sum type = fifo
+    hls::stream<float> stream_entropy_sum("entropy_16_sum");
+#pragma HLS stream variable = stream_entropy_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_sum type = fifo
+    hls::stream<float> stream_nzeros_sum("nzeros_16_sum");
+#pragma HLS stream variable = stream_nzeros_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros_sum type = fifo
+    hls::stream<float> stream_q("q_16");
+#pragma HLS stream variable = stream_q depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q type = fifo
+    hls::stream<float> stream_mask("mask_16");
+#pragma HLS stream variable = stream_mask depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask type = fifo
+
+    // #pragma HLS dataflow
+    GetQAndMask_16_32<2>(num_tile, stream_rectx, stream_recty, stream_rectx160, stream_recty160, stream_q_org,
+                         stream_mask_org, stream_q, stream_mask);
+
+#ifdef FIX
+
+    ComputeEntropy1<2>(num_tile, stream_rectx160, stream_recty160, stream_rectx161, stream_recty161, stream_q,
+                       stream_dctin, stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy3<2>(num_tile, cost1, mul16x16, stream_rectx161, stream_recty161, stream_loss, stream_loss2,
+                       stream_entropy, stream_nzeros, stream_mask, stream_entropy_16);
+
+#else
+
+    ComputeEntropy1<2>(num_tile, stream_rectx160, stream_recty160, stream_rectx161, stream_recty161, stream_q,
+                       stream_dctin, stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy2<2>(num_tile, stream_rectx161, stream_recty161, stream_rectx162, stream_recty162, stream_loss,
+                       stream_loss2, stream_entropy, stream_nzeros, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum);
+
+    ComputeEntropy3<2>(num_tile, cost1, mul16x16, stream_rectx162, stream_recty162, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum, stream_mask, stream_entropy_16);
+#endif
+}
+
+void EstimateEntropy32(uint16_t num_tile,
+                       float cost1,
+                       float mul32x32,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<float>& stream_q_org,
+                       hls::stream<float>& stream_mask_org,
+                       hls::stream<float>& stream_dctin,
+                       hls::stream<float>& stream_entropy_32) {
+#pragma HLS inline
+
+    hls::stream<uint8_t> stream_rectx320("rectx320");
+#pragma HLS stream variable = stream_rectx320 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx320 type = fifo
+    hls::stream<uint8_t> stream_recty320("recty320");
+#pragma HLS stream variable = stream_recty320 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty320 type = fifo
+    hls::stream<uint8_t> stream_rectx321("rectx321");
+#pragma HLS stream variable = stream_rectx321 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx321 type = fifo
+    hls::stream<uint8_t> stream_recty321("recty321");
+#pragma HLS stream variable = stream_recty321 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty321 type = fifo
+    hls::stream<uint8_t> stream_rectx322("rectx322");
+#pragma HLS stream variable = stream_rectx322 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx322 type = fifo
+    hls::stream<uint8_t> stream_recty322("recty322");
+#pragma HLS stream variable = stream_recty322 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty322 type = fifo
+
+#ifdef FIX
+    hls::stream<ap_int<28> > stream_loss("loss_32");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<ap_int<44> > stream_loss2("loss2_32");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<ap_int<42> > stream_entropy("entropy_32");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<ap_int<11> > stream_nzeros("nzeros_32");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#else
+    hls::stream<float> stream_loss("loss_32");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<float> stream_loss2("loss2_32");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<float> stream_entropy("entropy_32");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<float> stream_nzeros("nzeros_32");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#endif
+    hls::stream<float> stream_loss_sum("loss_32_sum");
+    ;
+#pragma HLS stream variable = stream_loss_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss_sum type = fifo
+    hls::stream<float> stream_loss2_sum("loss2_32_sum");
+#pragma HLS stream variable = stream_loss2_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2_sum type = fifo
+    hls::stream<float> stream_entropy_sum("entropy_32_sum");
+#pragma HLS stream variable = stream_entropy_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_sum type = fifo
+    hls::stream<float> stream_nzeros_sum("nzeros_32_sum");
+#pragma HLS stream variable = stream_nzeros_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros_sum type = fifo
+    hls::stream<float> stream_q("q_32");
+#pragma HLS stream variable = stream_q depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q type = fifo
+    hls::stream<float> stream_mask("mask_32");
+#pragma HLS stream variable = stream_mask depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask type = fifo
+
+    // #pragma HLS dataflow
+    GetQAndMask_16_32<4>(num_tile, stream_rectx, stream_recty, stream_rectx320, stream_recty320, stream_q_org,
+                         stream_mask_org, stream_q, stream_mask);
+
+#ifdef FIX
+    ComputeEntropy1<4>(num_tile, stream_rectx320, stream_recty320, stream_rectx321, stream_recty321, stream_q,
+                       stream_dctin, stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy3<4>(num_tile, cost1, mul32x32, stream_rectx321, stream_recty321, stream_loss, stream_loss2,
+                       stream_entropy, stream_nzeros, stream_mask, stream_entropy_32);
+
+#else
+
+    ComputeEntropy1<4>(num_tile, stream_rectx320, stream_recty320, stream_rectx321, stream_recty321, stream_q,
+                       stream_dctin, stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy2<4>(num_tile, stream_rectx321, stream_recty321, stream_rectx322, stream_recty322, stream_loss,
+                       stream_loss2, stream_entropy, stream_nzeros, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum);
+
+    ComputeEntropy3<4>(num_tile, cost1, mul32x32, stream_rectx322, stream_recty322, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum, stream_mask, stream_entropy_32);
+#endif
+}
+
+void CompareEntropy(uint16_t num_tile,
+                    hls::stream<uint8_t>& stream_rectx,
+                    hls::stream<uint8_t>& stream_recty,
+                    hls::stream<uint8_t>& stream_rectx_out,
+                    hls::stream<uint8_t>& stream_recty_out,
+                    hls::stream<float>& stream_entropy_8,
+                    hls::stream<float>& stream_entropy_16,
+                    hls::stream<float>& stream_entropy_32,
+                    uint8_t* strategy_ping,
+                    uint8_t* strategy_pang,
+                    hls::stream<bool>& stream_con,
+                    hls::stream<bool>& stream_ok) {
+    bool flag = true;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+        float entropy_32;
+        float entropy_16;
+        float entropy_8;
+        float entropy_sum[16] = {0};
+        stream_con.write(1);
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline
+                uint8_t idx = iy * 8 + ix;
+                uint8_t idx_8_sum = (iy / 2) * 4 + ix / 2;
+                entropy_sum[idx_8_sum] += stream_entropy_8.read();
+#ifdef __SYNTHESIS__
+                if (flag) {
+                    strategy_ping[idx] = 0;
+                } else {
+                    strategy_pang[idx] = 0;
+                }
+#else
+                int idx1 = 64 * tid + idx;
+                if (flag) {
+                    strategy_ping[idx1] = 0;
+                } else {
+                    strategy_pang[idx1] = 0;
+                }
+#endif
+                if (iy % 2 == 1 && ix % 2 == 1) {
+                    entropy_16 = stream_entropy_16.read();
+                    entropy_8 = entropy_sum[(iy / 2) * 4 + ix / 2];
+                    if (entropy_16 < entropy_8) {
+                    LOOP_3:
+                        for (uint8_t y = iy - 1; y < iy + 1; y++) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                        LOOP_4:
+                            for (uint8_t x = ix - 1; x < ix + 1; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                                uint8_t idx = y * 8 + x;
+#ifdef __SYNTHESIS__
+                                if (flag) {
+                                    strategy_ping[idx] = 4;
+                                } else {
+                                    strategy_pang[idx] = 4;
+                                }
+#else
+                                int idx1 = 64 * tid + idx;
+                                if (flag) {
+                                    strategy_ping[idx1] = 4;
+                                } else {
+                                    strategy_pang[idx1] = 4;
+                                }
+#endif
+                                entropy_sum[(y / 2) * 4 + x / 2] = entropy_16;
+                            }
+                        }
+                    }
+                    if (iy % 4 == 3 && ix % 4 == 3) {
+                        entropy_32 = stream_entropy_32.read();
+                        entropy_16 = 0;
+                    LOOP_5:
+                        for (uint8_t y = iy - 3; y < iy + 1; y += 2) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                        LOOP_6:
+                            for (uint8_t x = ix - 3; x < ix + 1; x += 2) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                                uint8_t idx_16 = y * 8 + x;
+                                entropy_16 += entropy_sum[(y / 2) * 4 + x / 2];
+                            }
+                        }
+                        if (entropy_32 < entropy_16) {
+                        LOOP_7:
+                            for (uint8_t y = iy - 3; y < iy + 1; y++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS unroll
+                            LOOP_8:
+                                for (uint8_t x = ix - 3; x < ix + 1; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS unroll
+                                    uint8_t idx = y * 8 + x;
+#ifdef __SYNTHESIS__
+                                    if (flag) {
+                                        strategy_ping[idx] = 5;
+                                    } else {
+                                        strategy_pang[idx] = 5;
+                                    }
+#else
+                                    int idx1 = 64 * tid + idx;
+                                    if (flag) {
+                                        strategy_ping[idx1] = 5;
+                                    } else {
+                                        strategy_pang[idx1] = 5;
+                                    }
+#endif
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        flag = !flag;
+        stream_ok.write(1);
+    }
+}
+
+void Reorder(uint16_t num_tile,
+             float* ping8,
+             float* pang8,
+             float* ping16,
+             float* pang16,
+             float* ping32,
+             float* pang32,
+             uint8_t* strategy_ping,
+             uint8_t* strategy_pang,
+             hls::stream<uint8_t>& stream_rectx,
+             hls::stream<uint8_t>& stream_recty,
+             hls::stream<uint8_t>& stream_rectx_out,
+             hls::stream<uint8_t>& stream_recty_out,
+             hls::stream<bool>& stream_con,
+             hls::stream<bool>& stream_ok,
+             hls::stream<bool>& stream_con8,
+             hls::stream<bool>& stream_ok8,
+             hls::stream<bool>& stream_con16,
+             hls::stream<bool>& stream_ok16,
+             hls::stream<bool>& stream_con32,
+             hls::stream<bool>& stream_ok32,
+             hls::stream<uint8_t>& stream_strategy,
+             hls::stream<uint8_t>& stream_strategy1,
+             hls::stream<float>& stream_select) {
+    bool flag = true;
+    uint16_t size = 4096;
+    uint8_t w = 64;
+    uint16_t total_size = 4096 * 3;
+    ap_uint<64> visited;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        float entropy_32;
+        float entropy_16;
+        float entropy_8;
+        float entropy_sum[16] = {0};
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+        stream_ok.read();
+        stream_ok8.read();
+        stream_ok16.read();
+        stream_ok32.read();
+        visited = 0;
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+                uint8_t strategy;
+                uint8_t idx = iy * 8 + ix;
+                if (visited.range(idx, idx) == 0) {
+#ifdef __SYNTHESIS__
+                    if (flag) {
+                        strategy = strategy_ping[idx];
+                    } else {
+                        strategy = strategy_pang[idx];
+                    }
+#else
+                    if (flag) {
+                        strategy = strategy_ping[tid * 64 + idx];
+                    } else {
+                        strategy = strategy_pang[tid * 64 + idx];
+                    }
+#endif
+                    stream_strategy.write(strategy);
+                    stream_strategy1.write(strategy);
+                    if (strategy == 4) {
+                    LOOP_3:
+                        for (uint8_t y = 0; y < 2; y++) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                        LOOP_4:
+                            for (uint8_t x = 0; x < 2; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                                uint8_t idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                            }
+                        }
+                    LOOP_5:
+                        for (uint8_t y = 0; y < 16; y++) {
+                        LOOP_6:
+                            for (uint8_t x = 0; x < 16; x++) {
+                            LOOP_7:
+                                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS pipeline
+                                    uint16_t sy = iy * 8 + y;
+                                    uint16_t sx = ix * 8 + x;
+#ifdef __SYNTHESIS__
+                                    uint16_t idx = c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping16[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang16[idx];
+                                        stream_select.write(tmp);
+                                    }
+#else
+                                    int idx = total_size * tid + c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping16[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang16[idx];
+                                        stream_select.write(tmp);
+                                    }
+#endif
+                                }
+                            }
+                        }
+                    } else if (strategy == 5) {
+                    LOOP_8:
+                        for (uint8_t y = 0; y < 4; y++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS unroll
+                        LOOP_9:
+                            for (uint8_t x = 0; x < 4; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS unroll
+                                uint8_t idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                            }
+                        }
+                    LOOP_10:
+                        for (uint8_t y = 0; y < 32; y++) {
+                        LOOP_11:
+                            for (uint8_t x = 0; x < 32; x++) {
+                            LOOP_12:
+                                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS pipeline
+                                    uint16_t sy = iy * 8 + y;
+                                    uint16_t sx = ix * 8 + x;
+#ifdef __SYNTHESIS__
+                                    uint16_t idx = c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping32[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang32[idx];
+                                        stream_select.write(tmp);
+                                    }
+#else
+                                    int idx = total_size * tid + c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping32[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang32[idx];
+                                        stream_select.write(tmp);
+                                    }
+#endif
+                                }
+                            }
+                        }
+                    } else {
+                        visited.range(idx, idx) = 1;
+                    LOOP_13:
+                        for (uint8_t y = 0; y < 8; y++) {
+                        LOOP_14:
+                            for (uint8_t x = 0; x < 8; x++) {
+                            LOOP_15:
+                                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS pipeline
+                                    uint16_t sy = iy * 8 + y;
+                                    uint16_t sx = ix * 8 + x;
+#ifdef __SYNTHESIS__
+                                    uint16_t idx = c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping8[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang8[idx];
+                                        stream_select.write(tmp);
+                                    }
+#else
+                                    int idx = total_size * tid + c * size + sy * w + sx;
+                                    float tmp;
+                                    if (flag) {
+                                        tmp = ping8[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        tmp = pang8[idx];
+                                        stream_select.write(tmp);
+                                    }
+#endif
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        flag = !flag;
+        stream_con.read();
+        stream_con8.read();
+        stream_con16.read();
+        stream_con32.read();
+    }
+}
+
+void ConsumeStrategyDCT(int xsize,
+                        int ysize,
+                        hls::stream<char>& stream_strategy,
+                        hls::stream<float>& stream_select,
+                        float* dctx_8x8,
+                        float* dcty_8x8,
+                        float* dctb_8x8,
+                        float* dctx_16x16,
+                        float* dcty_16x16,
+                        float* dctb_16x16,
+                        float* dctx_32x32,
+                        float* dcty_32x32,
+                        float* dctb_32x32) {
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = Div_Ceil(xsize_blocks, 8);
+    int count = 0;
+    int count_s = 0;
+    ap_uint<64> visited;
+LOOP_0:
+    for (int tid = 0; tid < Div_Ceil(xsize_blocks, 8) * Div_Ceil(ysize_blocks, 8); tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        int tx1 = tid % n_enc_tiles;
+        int ty1 = tid / n_enc_tiles;
+        int by = ty1 * 8;
+        int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+        int bx = tx1 * 8;
+        int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+        int rect_ysize = by1 - by;
+        int rect_xsize = bx1 - bx;
+        int tile_xsize = (xsize + 63) / 64 * 64;
+        int tile_ysize = (ysize + 63) / 64 * 64;
+        visited = 0;
+        for (int iy = 0; iy < rect_ysize; iy++) {
+            for (int ix = 0; ix < rect_xsize; ix++) {
+                char strategy;
+                int idx = iy * 8 + ix;
+                if (visited.range(idx, idx) == 0) {
+                    strategy = stream_strategy.read();
+                    if (strategy == 0) {
+                        visited.range(idx, idx) = 1;
+                        for (int y = 0; y < 8; y++) {
+                            for (int x = 0; x < 8; x++) {
+                                for (int c = 0; c < 3; c++) {
+                                    float tmp = stream_select.read();
+                                    int idx = ((ty1 * 64 + iy * 8 + y) * xsize) + (tx1 * 64 + ix * 8 + x);
+                                    if (c == 0) {
+                                        dcty_8x8[idx] = tmp;
+                                    } else if (c == 1) {
+                                        dctx_8x8[idx] = tmp;
+                                    } else if (c == 2) {
+                                        dctb_8x8[idx] = tmp;
+                                    }
+                                }
+                            }
+                        }
+                    } else if (strategy == 4) {
+                        for (int y = 0; y < 2; y++) {
+                            for (int x = 0; x < 2; x++) {
+                                int idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                            }
+                        }
+                        for (int y = 0; y < 16; y++) {
+                            for (int x = 0; x < 16; x++) {
+                                for (int c = 0; c < 3; c++) {
+                                    float tmp = stream_select.read();
+                                    int idx = ((ty1 * 64 + iy * 8 + y) * xsize) + (tx1 * 64 + ix * 8 + x);
+                                    if (c == 0) {
+                                        dcty_16x16[idx] = tmp;
+                                    } else if (c == 1) {
+                                        dctx_16x16[idx] = tmp;
+                                    } else if (c == 2) {
+                                        dctb_16x16[idx] = tmp;
+                                    }
+                                }
+                            }
+                        }
+                    } else if (strategy == 5) {
+                        for (int y = 0; y < 4; y++) {
+                            for (int x = 0; x < 4; x++) {
+                                int idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                            }
+                        }
+                        for (int y = 0; y < 32; y++) {
+                            for (int x = 0; x < 32; x++) {
+                                for (int c = 0; c < 3; c++) {
+                                    float tmp = stream_select.read();
+                                    int idx = ((ty1 * 64 + iy * 8 + y) * xsize) + (tx1 * 64 + ix * 8 + x);
+                                    if (c == 0) {
+                                        dcty_32x32[idx] = tmp;
+                                    } else if (c == 1) {
+                                        dctx_32x32[idx] = tmp;
+                                    } else if (c == 2) {
+                                        dctb_32x32[idx] = tmp;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    // std::cout << "use count_s=" << count_s << ", count=" << count << std::endl;
+}
+
+void SetQuantField(uint16_t num_tile,
+                   float inv_global_scale,
+                   hls::stream<uint8_t>& stream_rectx,
+                   hls::stream<uint8_t>& stream_recty,
+                   hls::stream<float>& stream_rqf_org,
+                   hls::stream<uint8_t>& stream_strategy1,
+                   hls::stream<int>& stream_rqf) {
+    ap_uint<64> visited;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        visited = 0;
+        float rqf_array[64];
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline
+                uint16_t index = iy * 8 + ix;
+                rqf_array[index] = stream_rqf_org.read();
+            }
+        }
+    LOOP_3:
+        for (uint8_t y = 0; y < rect_ysize; ++y) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_4:
+            for (uint8_t x = 0; x < rect_xsize; ++x) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline
+                float max = -3.40282e+038;
+                uint8_t idx = y * 8 + x;
+                if (visited.range(idx, idx) == 0) {
+                    uint8_t strategy = stream_strategy1.read();
+                    uint8_t b = strategy_block[strategy];
+                LOOP_5:
+                    for (uint8_t iy = 0; iy < b; iy++) {
+                    LOOP_6:
+                        for (uint8_t ix = 0; ix < b; ix++) {
+#pragma HLS pipeline
+                            uint16_t idx = (iy + y) * 8 + (ix + x);
+                            visited.range(idx, idx) = 1;
+                            float tmp = rqf_array[idx];
+                            max = fmax(tmp, max);
+                        }
+                    }
+                    float tmp = max;
+                    tmp = tmp * inv_global_scale + 0.5f;
+                    int16_t tmp_i = (int16_t)tmp;
+                    tmp_i = tmp_i > 256 ? 256 : tmp_i;
+                    int16_t val = tmp_i > 1 ? tmp_i : 1;
+                    stream_rqf.write(val);
+                }
+            }
+        }
+    }
+}
+
+template <int N>
+void DupDCT(uint16_t num_tile,
+            hls::stream<uint8_t>& stream_rectx,
+            hls::stream<uint8_t>& stream_recty,
+            hls::stream<uint8_t>& stream_rectx_out0,
+            hls::stream<uint8_t>& stream_recty_out0,
+            hls::stream<uint8_t>& stream_rectx_out1,
+            hls::stream<uint8_t>& stream_recty_out1,
+            hls::stream<float>& stream_dctin,
+            hls::stream<float>& stream_dctout0,
+            hls::stream<float>& stream_dctout1) {
+    uint8_t block_n = N * N;
+    const uint16_t size = 64 * block_n;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out0.write(rect_ysize);
+        stream_rectx_out0.write(rect_xsize);
+        stream_recty_out1.write(rect_ysize);
+        stream_rectx_out1.write(rect_xsize);
+    LOOP_1:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+        LOOP_2:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+            LOOP_3:
+                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+                LOOP_4:
+                    for (uint16_t i = 0; i < size; i += 1) {
+#pragma HLS LOOP_TRIPCOUNT min = 1024 max = 1024
+#pragma HLS pipeline II = 1
+                        float tmp = stream_dctin.read();
+                        stream_dctout0.write(tmp);
+                        stream_dctout1.write(tmp);
+                    }
+                }
+            }
+        }
+    }
+}
+
+void ComputeTileACSHLS(uint16_t num_tile,
+                       short ysize,
+                       short xsize,
+                       float cost1,
+                       float butteraugli_target,
+                       float inv_global_scale,
+                       float mul8x8,
+                       float mul16x16,
+                       float mul32x32,
+                       hls::stream<uint8_t>& stream_rectx_acs,
+                       hls::stream<uint8_t>& stream_recty_acs,
+                       hls::stream<float>& stream_rqf_org,
+                       hls::stream<float>& stream_q_org,
+                       hls::stream<float>& stream_mask_org,
+                       hls::stream<float>& stream_dctin8,
+                       hls::stream<float>& stream_dctin16,
+                       hls::stream<float>& stream_dctin32,
+                       hls::stream<uint8_t>& stream_strategy,
+                       hls::stream<float>& stream_select,
+                       hls::stream<int>& stream_rqf) {
+#pragma HLS INLINE
+    hls::stream<uint8_t> stream_rectx0("rectx0");
+#pragma HLS stream variable = stream_rectx0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx0 type = fifo
+    hls::stream<uint8_t> stream_recty0("recty0");
+#pragma HLS stream variable = stream_recty0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty0 type = fifo
+
+    hls::stream<uint8_t> stream_rectx1("rectx1");
+#pragma HLS stream variable = stream_rectx1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx1 type = fifo
+    hls::stream<uint8_t> stream_recty1("recty1");
+#pragma HLS stream variable = stream_recty1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty1 type = fifo
+
+    hls::stream<uint8_t> stream_rectx2("rectx2");
+#pragma HLS stream variable = stream_rectx2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx2 type = fifo
+    hls::stream<uint8_t> stream_recty2("recty2");
+#pragma HLS stream variable = stream_recty2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty2 type = fifo
+
+    hls::stream<uint8_t> stream_rectx3("rectx3");
+#pragma HLS stream variable = stream_rectx3 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx3 type = fifo
+    hls::stream<uint8_t> stream_recty3("recty3");
+#pragma HLS stream variable = stream_recty3 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty3 type = fifo
+
+    hls::stream<uint8_t> stream_rectx4("rectx4");
+#pragma HLS stream variable = stream_rectx4 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx4 type = fifo
+    hls::stream<uint8_t> stream_recty4("recty4");
+#pragma HLS stream variable = stream_recty4 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty4 type = fifo
+
+    hls::stream<uint8_t> stream_rectx5("rectx5");
+#pragma HLS stream variable = stream_rectx5 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx5 type = fifo
+    hls::stream<uint8_t> stream_recty5("recty5");
+#pragma HLS stream variable = stream_recty5 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty5 type = fifo
+
+    hls::stream<uint8_t> stream_rectx6("rectx6");
+#pragma HLS stream variable = stream_rectx6 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx6 type = fifo
+    hls::stream<uint8_t> stream_recty6("recty6");
+#pragma HLS stream variable = stream_recty6 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty6 type = fifo
+
+    hls::stream<uint8_t> stream_rectx7("rectx7");
+#pragma HLS stream variable = stream_rectx7 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx7 type = fifo
+    hls::stream<uint8_t> stream_recty7("recty7");
+#pragma HLS stream variable = stream_recty7 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty7 type = fifo
+
+    hls::stream<uint8_t> stream_rectx8("rectx8");
+#pragma HLS stream variable = stream_rectx8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx8 type = fifo
+    hls::stream<uint8_t> stream_recty8("recty8");
+#pragma HLS stream variable = stream_recty8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty8 type = fifo
+
+    hls::stream<uint8_t> stream_rectx9("rectx9");
+#pragma HLS stream variable = stream_rectx9 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx9 type = fifo
+    hls::stream<uint8_t> stream_recty9("recty9");
+#pragma HLS stream variable = stream_recty9 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty9 type = fifo
+
+    hls::stream<uint8_t> stream_rectx10("rectx10");
+#pragma HLS stream variable = stream_rectx10 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx10 type = fifo
+    hls::stream<uint8_t> stream_recty10("recty10");
+#pragma HLS stream variable = stream_recty10 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty10 type = fifo
+
+    hls::stream<uint8_t> stream_rectx11("rectx11");
+#pragma HLS stream variable = stream_rectx11 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx11 type = fifo
+    hls::stream<uint8_t> stream_recty11("recty11");
+#pragma HLS stream variable = stream_recty11 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty11 type = fifo
+
+    hls::stream<uint8_t> stream_rectx12("rectx12");
+#pragma HLS stream variable = stream_rectx12 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx12 type = fifo
+    hls::stream<uint8_t> stream_recty12("recty12");
+#pragma HLS stream variable = stream_recty12 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty12 type = fifo
+
+    hls::stream<float> stream_dctin8_0("dctin8_0");
+#pragma HLS stream variable = stream_dctin8_0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin8_0 type = fifo
+    hls::stream<float> stream_dctin16_0("dctin16_0");
+#pragma HLS stream variable = stream_dctin16_0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin16_0 type = fifo
+    hls::stream<float> stream_dctin32_0("dctin32_0");
+#pragma HLS stream variable = stream_dctin32_0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin32_0 type = fifo
+    hls::stream<float> stream_dctin8_1("dctin8_1");
+#pragma HLS stream variable = stream_dctin8_1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin8_1 type = fifo
+    hls::stream<float> stream_dctin16_1("dctin16_1");
+#pragma HLS stream variable = stream_dctin16_1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin16_1 type = fifo
+    hls::stream<float> stream_dctin32_1("dctin32_1");
+#pragma HLS stream variable = stream_dctin32_1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin32_1 type = fifo
+
+    hls::stream<float> stream_entropy_8("entropy_8");
+#pragma HLS stream variable = stream_entropy_8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_8 type = fifo
+    hls::stream<float> stream_entropy_16("entropy_16");
+#pragma HLS stream variable = stream_entropy_16 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_16 type = fifo
+    hls::stream<float> stream_entropy_32("entropy_32");
+#pragma HLS stream variable = stream_entropy_32 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_32 type = fifo
+
+    hls::stream<bool> stream_con("con");
+#pragma HLS stream variable = stream_con depth = 2
+#pragma HLS BIND_STORAGE variable = stream_con type = fifo
+    hls::stream<bool> stream_ok("ok");
+#pragma HLS stream variable = stream_ok depth = 2
+#pragma HLS BIND_STORAGE variable = stream_ok type = fifo
+    hls::stream<bool> stream_con8("con8");
+#pragma HLS stream variable = stream_con8 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_con8 type = fifo
+    hls::stream<bool> stream_ok8("ok");
+#pragma HLS stream variable = stream_ok8 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_ok8 type = fifo
+    hls::stream<bool> stream_con16("con16");
+#pragma HLS stream variable = stream_con16 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_con16 type = fifo
+    hls::stream<bool> stream_ok16("ok16");
+#pragma HLS stream variable = stream_ok16 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_ok16 type = fifo
+    hls::stream<bool> stream_con32("con32");
+#pragma HLS stream variable = stream_con32 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_con32 type = fifo
+    hls::stream<bool> stream_ok32("ok32");
+#pragma HLS stream variable = stream_ok32 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_ok32 type = fifo
+
+    hls::stream<uint8_t> stream_strategy1("strategy1");
+#pragma HLS stream variable = stream_strategy1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_strategy1 type = fifo
+
+    hls::stream<float> stream_q_org_8("q_org_8");
+#pragma HLS stream variable = stream_q_org_8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q_org_8 type = fifo
+    hls::stream<float> stream_mask_org_8("mask_org_8");
+#pragma HLS stream variable = stream_mask_org_8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask_org_8 type = fifo
+    hls::stream<float> stream_q_org_16("q_org_8");
+#pragma HLS stream variable = stream_q_org_16 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q_org_16 type = fifo
+    hls::stream<float> stream_mask_org_16("mask_org_8");
+#pragma HLS stream variable = stream_mask_org_16 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask_org_16 type = fifo
+    hls::stream<float> stream_q_org_32("q_org_8");
+#pragma HLS stream variable = stream_q_org_32 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q_org_32 type = fifo
+    hls::stream<float> stream_mask_org_32("mask_org_8");
+#pragma HLS stream variable = stream_mask_org_32 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask_org_32 type = fifo
+
+// #pragma HLS dataflow
+
+#ifdef __SYNTHESIS__
+    uint8_t strategy_ping[64];
+    uint8_t strategy_pang[64];
+#pragma HLS bind_storage variable = strategy_ping type = RAM_T2P impl = bram
+#pragma HLS shared variable = strategy_ping
+#pragma HLS stable variable = strategy_ping
+#pragma HLS bind_storage variable = strategy_pang type = RAM_T2P impl = bram
+#pragma HLS shared variable = strategy_pang
+#pragma HLS stable variable = strategy_pang
+#else
+    uint8_t* strategy_ping = (uint8_t*)malloc(sizeof(uint8_t) * 64 * 32 * 32);
+    uint8_t* strategy_pang = (uint8_t*)malloc(sizeof(uint8_t) * 64 * 32 * 32);
+#endif
+
+#ifdef __SYNTHESIS__
+    float ping8[3 * 64 * 64];
+    float pang8[3 * 64 * 64];
+    float ping16[3 * 64 * 64];
+    float pang16[3 * 64 * 64];
+    float ping32[3 * 64 * 64];
+    float pang32[3 * 64 * 64];
+#pragma HLS bind_storage variable = ping8 type = RAM_T2P impl = uram
+#pragma HLS shared variable = ping8
+#pragma HLS stable variable = ping8
+#pragma HLS bind_storage variable = pang8 type = RAM_T2P impl = uram
+#pragma HLS shared variable = pang8
+#pragma HLS stable variable = pang8
+#pragma HLS bind_storage variable = ping16 type = RAM_T2P impl = uram
+#pragma HLS shared variable = ping16
+#pragma HLS stable variable = ping16
+#pragma HLS bind_storage variable = pang16 type = RAM_T2P impl = uram
+#pragma HLS shared variable = pang16
+#pragma HLS stable variable = pang16
+#pragma HLS bind_storage variable = ping32 type = RAM_T2P impl = uram
+#pragma HLS shared variable = ping32
+#pragma HLS stable variable = ping32
+#pragma HLS bind_storage variable = pang32 type = RAM_T2P impl = uram
+#pragma HLS shared variable = pang32
+#pragma HLS stable variable = pang32
+#else
+    float* ping8 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* pang8 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* ping16 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* pang16 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* ping32 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* pang32 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+#endif
+
+    // #pragma HLS dataflow
+    GetACSSize(xsize, ysize, stream_rectx_acs, stream_recty_acs, stream_rectx0, stream_recty0, stream_rectx1,
+               stream_recty1, stream_rectx2, stream_recty2, stream_rectx3, stream_recty3, stream_rectx10,
+               stream_recty10);
+
+    DupQuantAndMask(num_tile, stream_rectx0, stream_recty0, stream_q_org, stream_mask_org, stream_q_org_8,
+                    stream_mask_org_8, stream_q_org_16, stream_mask_org_16, stream_q_org_32, stream_mask_org_32);
+
+    DupDCT<1>(num_tile, stream_rectx1, stream_recty1, stream_rectx4, stream_recty4, stream_rectx7, stream_recty7,
+              stream_dctin8, stream_dctin8_0, stream_dctin8_1);
+
+    DupDCT<2>(num_tile, stream_rectx2, stream_recty2, stream_rectx5, stream_recty5, stream_rectx8, stream_recty8,
+              stream_dctin16, stream_dctin16_0, stream_dctin16_1);
+
+    DupDCT<4>(num_tile, stream_rectx3, stream_recty3, stream_rectx6, stream_recty6, stream_rectx9, stream_recty9,
+              stream_dctin32, stream_dctin32_0, stream_dctin32_1);
+
+    EstimateEntropy8(num_tile, cost1, mul8x8, stream_rectx4, stream_recty4, stream_q_org_8, stream_mask_org_8,
+                     stream_dctin8_0, stream_entropy_8);
+
+    EstimateEntropy16(num_tile, cost1, mul16x16, stream_rectx5, stream_recty5, stream_q_org_16, stream_mask_org_16,
+                      stream_dctin16_0, stream_entropy_16);
+
+    EstimateEntropy32(num_tile, cost1, mul32x32, stream_rectx6, stream_recty6, stream_q_org_32, stream_mask_org_32,
+                      stream_dctin32_0, stream_entropy_32);
+
+    CompareEntropy(num_tile, stream_rectx10, stream_recty10, stream_rectx11, stream_recty11, stream_entropy_8,
+                   stream_entropy_16, stream_entropy_32, strategy_ping, strategy_pang, stream_con, stream_ok);
+
+    BufferN<1>(num_tile, ping8, pang8, stream_rectx7, stream_recty7, stream_dctin8_1, stream_con8, stream_ok8);
+
+    BufferN<2>(num_tile, ping16, pang16, stream_rectx8, stream_recty8, stream_dctin16_1, stream_con16, stream_ok16);
+
+    BufferN<4>(num_tile, ping32, pang32, stream_rectx9, stream_recty9, stream_dctin32_1, stream_con32, stream_ok32);
+
+    Reorder(num_tile, ping8, pang8, ping16, pang16, ping32, pang32, strategy_ping, strategy_pang, stream_rectx11,
+            stream_recty11, stream_rectx12, stream_recty12, stream_con, stream_ok, stream_con8, stream_ok8,
+            stream_con16, stream_ok16, stream_con32, stream_ok32, stream_strategy, stream_strategy1, stream_select);
+
+    SetQuantField(num_tile, inv_global_scale, stream_rectx12, stream_recty12, stream_rqf_org, stream_strategy1,
+                  stream_rqf);
+}
+
+void GetSourceSize(short xsize,
+                   short ysize,
+                   hls::stream<uint8_t>& stream_rectx_dct,
+                   hls::stream<uint8_t>& stream_recty_dct,
+                   hls::stream<uint8_t>& stream_rectx_acs,
+                   hls::stream<uint8_t>& stream_recty_acs,
+                   hls::stream<uint8_t>& stream_rectx_dc,
+                   hls::stream<uint8_t>& stream_recty_dc) {
+    uint16_t xsize_blocks = xsize / 8;
+    uint16_t ysize_blocks = ysize / 8;
+LOOP_0:
+    for (uint16_t y = 0; y < Div_Ceil(ysize_blocks, 8); y++) {
+    LOOP_1:
+        for (uint16_t x = 0; x < Div_Ceil(xsize_blocks, 8); x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            uint16_t by = y * 8;
+            uint16_t by1 = ((y + 1) * 8) < ysize_blocks ? ((y + 1) * 8) : ysize_blocks;
+            uint16_t bx = x * 8;
+            uint16_t bx1 = ((x + 1) * 8) < xsize_blocks ? ((x + 1) * 8) : xsize_blocks;
+            uint8_t rect_ysize = by1 - by;
+            uint8_t rect_xsize = bx1 - bx;
+            stream_rectx_dct.write(rect_xsize);
+            stream_recty_dct.write(rect_ysize);
+            stream_rectx_acs.write(rect_xsize);
+            stream_recty_acs.write(rect_ysize);
+            stream_rectx_dc.write(rect_xsize);
+            stream_recty_dc.write(rect_ysize);
+        }
+    }
+}
+
+//=========================================================//
+// data flow region
+//=========================================================//
+void hls_lossy_enc_compute_dataflow(
+    // config
+    uint32_t ysize,
+    uint32_t xsize,
+    int masking_field_stride,
+    int quant_field_stride,
+    float butteraugli_target,
+    float cost1,
+    float inv_global_scale,
+    float* hls_opsin_1,       // mm1, input
+    float* hls_opsin_2,       // mm2, input
+    float* hls_opsin_3,       // mm3, input
+    float* quant_field_row,   // mm4, input
+    float* masking_field_row, // mm5, input
+    float* aq_map_f,          // mm6, input
+    int8_t* cmap_axi,         // mm7, output
+    int* ac_coef_axiout,      // mm8, output
+    // unsigned char* strategy_all, // mm9, output
+    uint8_t* strategy_all,  // mm9, output
+    int* raw_quant_field_i, // mm10, output
+    float* hls_dc8x8,       // mm11, output
+    float* hls_dc16x16,     // mm12, output
+    float* hls_dc32x32,     // mm13, output
+    int32_t num_zeros[3][320],
+    hls::stream<ap_uint<3>, 2>& used_orders_strm) {
+#pragma HLS INTERFACE mode = m_axi bundle = mm1 latency = 32 offset = slave num_write_outstanding =             \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_opsin_1
+#pragma HLS INTERFACE mode = m_axi bundle = mm2 latency = 32 offset = slave num_write_outstanding =             \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_opsin_2
+#pragma HLS INTERFACE mode = m_axi bundle = mm3 latency = 32 offset = slave num_write_outstanding =             \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_opsin_3
+#pragma HLS INTERFACE mode = m_axi bundle = mm4 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        BLOCK8_H* BLOCK8_W port = quant_field_row
+#pragma HLS INTERFACE mode = m_axi bundle = mm5 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        BLOCK8_H* BLOCK8_W port = masking_field_row
+#pragma HLS INTERFACE mode = m_axi bundle = mm6 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        BLOCK8_H* BLOCK8_W port = aq_map_f
+#pragma HLS INTERFACE mode = m_axi bundle = mm7 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        TILE_W* TILE_H* 2 port = cmap_axi
+#pragma HLS INTERFACE mode = m_axi bundle = mm8 latency = 32 offset = slave num_write_outstanding =             \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        ac_coef_axiout
+#pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        BLOCK8_W* BLOCK8_H port = strategy_all
+#pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =       \
+        BLOCK8_H* BLOCK8_W port = raw_quant_field_i
+// #pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = slave num_write_outstanding =            \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_ORDER port = \
+//         hls_order
+#pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = slave num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_dc8x8
+#pragma HLS INTERFACE mode = m_axi bundle = mm13 latency = 32 offset = slave num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_dc16x16
+#pragma HLS INTERFACE mode = m_axi bundle = mm14 latency = 32 offset = slave num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_dc32x32
+// #pragma HLS INTERFACE mode = m_axi bundle = mm15 latency = 32 offset = slave num_write_outstanding =                 \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_NUM_CONFIG port = \
+//         config
+// #pragma HLS INTERFACE mode = m_axi bundle = mm16 latency = 32 offset = slave num_write_outstanding =                 \
+//     1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_NUM_CONFIG port = \
+//         config_fl
+#pragma HLS DATAFLOW
+
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    int xnum_tile = (xsize + 63) / 64;
+    int ynum_tile = (ysize + 63) / 64;
+    int num_tile = xnum_tile * ynum_tile;
+    hls::stream<uint8_t, 1024> stream_rectx_dct;
+    hls::stream<uint8_t, 1024> stream_recty_dct;
+    hls::stream<uint8_t, 1024> stream_rectx_acs;
+    hls::stream<uint8_t, 1024> stream_recty_acs;
+    hls::stream<uint8_t, 1024> stream_rectx_dc;
+    hls::stream<uint8_t, 1024> stream_recty_dc;
+    GetSourceSize(xsize, ysize, stream_rectx_dct, stream_recty_dct, stream_rectx_acs, stream_recty_acs, stream_rectx_dc,
+                  stream_recty_dc);
+
+    // load data
+    hls::stream<float, 4096> stream_q_org("q_org");
+    hls::stream<float, 4096> stream_mask_org("mask_org");
+    hls::stream<float, 4096> stream_rqf_org("rqf_org");
+    load_rqf_mask(xsize, ysize, aq_map_f, masking_field_row, quant_field_row, quant_field_stride, stream_q_org,
+                  stream_mask_org, stream_rqf_org);
+
+    // load pixel
+    hls::stream<float, 4096> opsin8x8_stream;
+    hls::stream<float, 4096> opsin16x16_stream;
+    hls::stream<float, 4096> opsin32x32_stream;
+    loadPixel(ysize, xsize, hls_opsin_1, hls_opsin_2, hls_opsin_3, opsin8x8_stream, opsin16x16_stream,
+              opsin32x32_stream);
+
+    // 1. dct8x8, dct16x16, dct32x32
+    hls::stream<float, 4096> ac_coef8x8_stream("ac_coef8");
+    hls::stream<float, 4096> ac_coef16x16_stream("ac_coef16");
+    hls::stream<float, 4096> ac_coef32x32_stream("ac_coef32");
+
+    hls::stream<float, 4096> dc_coef8x8_stream("dc_coef8");
+    hls::stream<float, 4096> dc_coef16x16_stream("dc_coef16");
+    hls::stream<float, 4096> dc_coef32x32_stream("dc_coef32");
+    hls_dct_top(ysize, xsize, stream_rectx_dct, stream_recty_dct, opsin8x8_stream, opsin16x16_stream, opsin32x32_stream,
+                ac_coef8x8_stream, ac_coef16x16_stream, ac_coef32x32_stream, dc_coef8x8_stream, dc_coef16x16_stream,
+                dc_coef32x32_stream);
+
+    // 2. ac strategy
+    float k8x8mul1 = -0.55;
+    float k8x8mul2 = 1.0735757687292623f;
+    float k8x8base = 1.4;
+    float mul8x8 = k8x8mul2 + k8x8mul1 / (butteraugli_target + k8x8base);
+    float k16X16mul1 = -0.35;
+    float k16X16mul2 = 0.82098067020252011;
+    float k16X16base = 2.0;
+    float entropy_mul16X16 = k16X16mul2 + k16X16mul1 / (butteraugli_target + k16X16base);
+    float entropy_mul32X32 = 0.9188333021616017f;
+    hls::stream<uint8_t, 4096> acs_stream;
+    hls::stream<float, 4096> dct_select_stream;
+    hls::stream<uint8_t, 4096> acs_out_stream("acs_out_stream");
+    hls::stream<int, 4096> rqf_out_stream("rqf_out_stream");
+    hls::stream<int, 4096> rqf_out_stream2("rqf_out_stream2");
+    ComputeTileACSHLS((uint16_t)num_tile, (short)ysize, (short)xsize, cost1, butteraugli_target, inv_global_scale,
+                      mul8x8, entropy_mul16X16, entropy_mul32X32, stream_rectx_acs, stream_recty_acs, stream_rqf_org,
+                      stream_q_org, stream_mask_org, ac_coef8x8_stream, ac_coef16x16_stream, ac_coef32x32_stream,
+                      acs_stream, dct_select_stream, rqf_out_stream);
+
+    // 3. cfl heuristic
+    hls::stream<int8_t, 4096> cmapx_stream("cmapx_stream");
+    hls::stream<int8_t, 4096> cmapb_stream("cmapb_stream");
+    hls::stream<int8_t, 4096> cmapx_axi_stream("cmpax_axi_stream");
+    hls::stream<int8_t, 4096> cmapb_axi_stream("cmapb_axi_stream");
+
+    hls::stream<int> rqf_cfl_stream("rqf_cfl_stream");
+#pragma HLS stream variable = rqf_cfl_stream depth = 4096 * 6
+#pragma HLS BIND_STORAGE variable = rqf_cfl_stream type = fifo impl = uram
+    hls::stream<uint8_t> acs_cfl_stream("acs_cfl_stream");
+#pragma HLS stream variable = acs_cfl_stream depth = 4096 * 6
+#pragma HLS BIND_STORAGE variable = acs_cfl_stream type = fifo impl = uram
+    hls::stream<float> ac_coef_cfl_stream("ac_coef_cfl_stream");
+#pragma HLS stream variable = ac_coef_cfl_stream depth = 4096 * 6
+#pragma HLS BIND_STORAGE variable = ac_coef_cfl_stream type = fifo impl = uram
+
+    hls_CFLComputeTile(xsize, ysize, dct_select_stream, rqf_out_stream, acs_stream, cmapx_stream, cmapb_stream,
+                       cmapx_axi_stream, cmapb_axi_stream, ac_coef_cfl_stream, rqf_cfl_stream, acs_cfl_stream);
+
+    // 4. ComputeCoefficients
+    hls::stream<uint8_t, 4096> acs_coeff_stream1("acs_coeff_stream1");
+    hls::stream<int, 4096> ac_quant_coeff_stream("ac_quant_coeff_stream");
+    hls::stream<int, 4096> coeff_axi_stream("coeff_axi_stream");
+    hls::stream<uint8_t, 4096> acs_axi_stream("acs_axi_stream");
+    hls::stream<int, 4096> qf_axi_stream("qf_axi_stream");
+    hls_ComputeCoefficients(xsize, ysize, acs_cfl_stream, ac_coef_cfl_stream, rqf_cfl_stream, cmapx_stream,
+                            cmapb_stream, acs_coeff_stream1, ac_quant_coeff_stream, coeff_axi_stream, acs_axi_stream,
+                            qf_axi_stream);
+
+    // 5. ComputeAllCoeffOrders
+    count_numzeros(xsize, ysize, acs_coeff_stream1, ac_quant_coeff_stream, used_orders_strm, num_zeros);
+
+    // 6. axi writeout
+    dc_writeout(ysize, xsize, hls_dc8x8, hls_dc16x16, hls_dc32x32, stream_rectx_dc, stream_recty_dc, dc_coef8x8_stream,
+                dc_coef16x16_stream, dc_coef32x32_stream);
+    cfl_writeout(xsize, ysize, cmapx_axi_stream, cmapb_axi_stream, cmap_axi);
+    ac_coeff_writeout(xsize, ysize, coeff_axi_stream, ac_coef_axiout);
+    acs_rqf_writeout(xsize, ysize, strategy_all, raw_quant_field_i, acs_axi_stream, qf_axi_stream);
+}
+
+namespace xf {
+namespace codec {
+
+// ------------------------------------------------------------
+/**
+ * @brief Level 2 : kernel implement for JXL lossy frame encode computing
+ *
+ * @param config the int config signal, such as image size, field stride and etc.
+ * @param config_fl the floating config signal, such as cost, inv_global_scale and etc.
+ * @param hls_opsin_1 the input RGB image data for channnel-1.
+ * @param hls_opsin_2 the input RGB image data for channnel-2.
+ * @param hls_opsin_3 the input RGB image data for channnel-3.
+ * @param quant_field_row the initial quant_filed data.
+ * @param masking_filed_row the initial masking_filed data.
+ * @param aq_map_f the initial adjust quant map data.
+ * @param cmap_axi the output of color correlation map.
+ * @param ac_coef_axiout the output of quanted AC coefficients.
+ * @param strategy_all the output of strategy for each block in image
+ * @param raw_quant_field_i the output of computed raw_quant_field
+ * @param hls_order the output of orders for each block in image
+ * @param hls_dc8x8 the DC coefficients output for 8x8 blocks
+ * @param hls_dc16x16 the DC coefficients output for 16x16 blocks
+ * @param hls_dc32x32 the DC coefficients output for 32x32 blocks
+ */
+// ------------------------------------------------------------
+
+extern "C" void JxlEnc_lossy_enc_compute(int config[MAX_NUM_CONFIG],      // mm15, input
+                                         float config_fl[MAX_NUM_CONFIG], // mm16, input
+                                         float* hls_opsin_1,              // mm1, input
+                                         float* hls_opsin_2,              // mm2, input
+                                         float* hls_opsin_3,              // mm3, input
+                                         float* quant_field_row,          // mm4, input
+                                         float* masking_field_row,        // mm5, input
+                                         float* aq_map_f,                 // mm6, input
+                                         int8_t* cmap_axi,                // mm7, output
+                                         int* ac_coef_axiout,             // mm8, output
+                                         //    unsigned char* strategy_all,     // mm9, output
+                                         uint8_t* strategy_all,  // mm9, output
+                                         int* raw_quant_field_i, // mm10, output
+                                         uint32_t* hls_order,    // mm11, output
+                                         float* hls_dc8x8,       // mm12, output
+                                         float* hls_dc16x16,     // mm13, output
+                                         float* hls_dc32x32      // mm14, output
+                                         ) {
+#pragma HLS INTERFACE mode = m_axi bundle = mm1 latency = 32 offset = slave num_write_outstanding =             \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_opsin_1
+#pragma HLS INTERFACE mode = m_axi bundle = mm2 latency = 32 offset = slave num_write_outstanding =             \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_opsin_2
+#pragma HLS INTERFACE mode = m_axi bundle = mm3 latency = 32 offset = slave num_write_outstanding =             \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_opsin_3
+#pragma HLS INTERFACE mode = m_axi bundle = mm4 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        BLOCK8_H* BLOCK8_W port = quant_field_row
+#pragma HLS INTERFACE mode = m_axi bundle = mm5 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        BLOCK8_H* BLOCK8_W port = masking_field_row
+#pragma HLS INTERFACE mode = m_axi bundle = mm6 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        BLOCK8_H* BLOCK8_W port = aq_map_f
+#pragma HLS INTERFACE mode = m_axi bundle = mm7 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        TILE_W* TILE_H* 2 port = cmap_axi
+#pragma HLS INTERFACE mode = m_axi bundle = mm8 latency = 32 offset = slave num_write_outstanding =             \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        ac_coef_axiout
+#pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =      \
+        BLOCK8_W* BLOCK8_H port = strategy_all
+#pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =       \
+        BLOCK8_H* BLOCK8_W port = raw_quant_field_i
+#pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = slave num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_ORDER port = \
+        hls_order
+#pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = slave num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_dc8x8
+#pragma HLS INTERFACE mode = m_axi bundle = mm13 latency = 32 offset = slave num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_dc16x16
+#pragma HLS INTERFACE mode = m_axi bundle = mm14 latency = 32 offset = slave num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_dc32x32
+#pragma HLS INTERFACE mode = m_axi bundle = mm15 latency = 32 offset = slave num_write_outstanding =                 \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_NUM_CONFIG port = \
+        config
+#pragma HLS INTERFACE mode = m_axi bundle = mm16 latency = 32 offset = slave num_write_outstanding =                 \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_NUM_CONFIG port = \
+        config_fl
+
+    // global config
+    uint32_t ysize = config[0];
+    uint32_t xsize = config[1];
+    int masking_field_stride = config[2];
+    int quant_field_stride = config[3];
+    float butteraugli_target = config_fl[0];
+    float cost1 = config_fl[1];
+    float inv_global_scale = config_fl[2];
+    int32_t num_zeros[3][320];
+#pragma HLS BIND_STORAGE type = ram_2p variable = num_zeros impl = BRAM
+
+    // Non-Dataflow region: initialization zeros
+    init_numzeros(num_zeros);
+
+    // Dataflow region: enc_compute
+    hls::stream<ap_uint<3>, 2> used_orders_strm;
+    hls_lossy_enc_compute_dataflow(ysize, xsize, masking_field_stride, quant_field_stride, butteraugli_target, cost1,
+                                   inv_global_scale, hls_opsin_1, hls_opsin_2, hls_opsin_3, quant_field_row,
+                                   masking_field_row, aq_map_f, cmap_axi, ac_coef_axiout, strategy_all,
+                                   raw_quant_field_i, hls_dc8x8, hls_dc16x16, hls_dc32x32, num_zeros, used_orders_strm);
+
+    // Non-Dataflow region: compute orders
+    order_finalize_dataflow(used_orders_strm, num_zeros, hls_order); // 8us
+}
+
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/postSysLink.tcl b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/postSysLink.tcl
new file mode 100644
index 0000000000..2dc2f67034
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/postSysLink.tcl
@@ -0,0 +1 @@
+set_property -dict [list CONFIG.ECC_EN {false} CONFIG.ECC_SCRUB_EN {false}] [get_bd_cells hmss_0]
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/utils.mk b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/utils.mk
new file mode 100644
index 0000000000..0ee80e90da
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute/utils.mk
@@ -0,0 +1,270 @@
+#
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+
+#get suffix of kernel by PLATFORM
+VITIS_VER = $(shell v++ --version | grep 'v++' | sed 's/^[[:space:]]*//' | sed -e 's/^[*]* v++ v//g' | cut -d " " -f1)
+DEVICE_TYPE = $(shell platforminfo -p $(PLATFORM) | grep 'FPGA Family' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(DEVICE_TYPE), versal)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+LINK_TARGET_FMT := xsa
+else
+LINK_TARGET_FMT := xclbin
+endif
+else
+LINK_TARGET_FMT := xclbin
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+ifeq ($(HOST_ARCH), x86)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#Check OS and setting env for xrt c++ api
+OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
+OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
+
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/Makefile b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/Makefile
new file mode 100644
index 0000000000..1788ef2638
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/Makefile
@@ -0,0 +1,281 @@
+# Copyright 2019-2021 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# sc makefile-generator v1.0.0
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_u50_gen3x16_xdma_5_202210_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u50
+PLATFORM_BLOCKLIST +=  zc
+
+GCC_INTOOL := 8.3.0
+BINUTILS_INTOOL := 2.37
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# set debug switch
+ifneq ($(debug),yes)
+CXXFLAGS += -O3
+endif
+
+# get global setting
+ifdef XILINX_SC_PFM_CONFIG
+CXXFLAGS += -DXILINX_SC_PFM_CONFIG=$(XILINX_SC_PFM_CONFIG)
+endif
+ifdef XILINX_SC_PFM_EXT
+CXXFLAGS += -DXILINX_SC_PFM_EXT=$(XILINX_SC_PFM_EXT)
+endif
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -I $(XILINX_VITIS)/system_compiler/include -I $(XILINX_HLS)/include 
+LDFLAGS += -L$(XILINX_XRT)/lib -L$(XILINX_VITIS)/system_compiler/lib/x86 -lvpp_acc -l$(LIB_XRT) -lxrt_coreutil  -Wl,-rpath=$(XILINX_VITIS)/system_compiler/lib/x86:$(XILINX_XRT)/lib:$(GCC_HOME)/lib64  -Wl,--enable-new-dtags -lpthread 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --temp_dir $(TEMP_DIR) --save-temps -g -I $(XILINX_VITIS)/system_compiler/include 
+VPP_LDFLAGS += 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += 
+LDFLAGS += 
+VPP_FLAGS += 
+VPP_LDFLAGS += 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+ifeq ($(TARGET),sw)
+  $(error Error: The sw target is not supported anymore. Please use sw_emu instead)
+else ifeq ($(TARGET),sw_emu)
+  LIB_XRT  := xrt_swemu
+  HOST_PREAMBLE := XCL_EMULATION_MODE=sw_emu
+else ifeq ($(TARGET),hw_emu)
+  LIB_XRT  := xrt_hwemu
+  HOST_PREAMBLE := XCL_EMULATION_MODE=hw_emu
+  ifneq (,$(findstring -g,$(EXTRA_VPPFLAGS) $(CXXFLAGS)))
+    # for sourcing pre/post xsim scripts
+    ifneq ($(XILINX_SC_HW_EMU),0)
+      HOST_PREAMBLE += XILINX_SC_HW_EMU=1 XILINX_SC_BUILD_DIR=$(PWD)/$(BUILD_DIR)
+    endif
+  endif
+else ifeq ($(TARGET),hw)
+  LIB_XRT  := xrt_core
+endif
+
+########################## Setting up Host Variables ##########################
+
+#Inclue Required Host Source Files
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cjxl.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cmdline.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/codec_config.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/box/box.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/time.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_host.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase1.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase2.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase3.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host/host_lossy_enc_compute.cpp 
+CXXFLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/ -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/build/lib/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lodepng -I $(XFLIB_DIR)/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute -I $(XFLIB_DIR)/L2/demos/jxlEnc/others/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel -I $(XFLIB_DIR)/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host
+CXXFLAGS += -O3 
+
+EXE_NAME := host.exe
+EXE_OBJS := $(addprefix $(TEMP_DIR)/, $(addsuffix .o,$(basename $(HOST_SRCS))))
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(EXE_OBJS) 
+MAKEDEPEND = $(CXX) $< -MM -MP -MF $(basename $@).d -MT $@  $(CXXFLAGS)
+
+HOST_ARGS :=  --xclbin $(BUILD_DIR)/jxlEnc.xclbin $(XFLIB_DIR)/L2/demos/jxlEnc/images/small32x32.png small32x32.jxl
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel
+
+######################### binary container global settings ##########################
+VPP_FLAGS_hls_lossy_enc_compute +=  -D KERNEL_NAME=hls_lossy_enc_compute
+VPP_FLAGS_hls_lossy_enc_compute += --hls.clock 300000000:hls_lossy_enc_compute
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_hls_lossy_enc_compute += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_hls_lossy_enc_compute += --kernel_frequency 300
+endif
+VPP_LDFLAGS_hls_lossy_enc_compute_temp := --advanced.param compiler.userPostSysLinkOverlayTcl=postSysLink.tcl
+VPP_LDFLAGS_hls_lossy_enc_compute += $(VPP_LDFLAGS_hls_lossy_enc_compute_temp)
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS_TMP := $(BUILD_DIR)/$(TARGET).o
+BINARY_CONTAINERS := $(BUILD_DIR)/$(TARGET).xclbin
+ifeq ($(TARGET),sw_emu)
+  BINARY_CONTAINERS_TMP :=
+endif
+else
+# placeholder for non_x86
+endif
+
+.SECONDEXPANSION:
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+ACC_SRCS_hls_lossy_enc_compute += $(XFLIB_DIR)/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel/hls_lossy_enc_compute.cpp
+ACC_OBJS_hls_lossy_enc_compute  := $(addprefix $(TEMP_DIR)/, $(addsuffix .o,$(basename $(ACC_SRCS_hls_lossy_enc_compute))))
+$(ACC_OBJS_hls_lossy_enc_compute): $(TEMP_DIR)/%.o : %.cpp $$(@D)/.f
+	@echo "--> Making $@ from: $?"
+	$(MAKEDEPEND)
+	$(VPP) $(VPP_FLAGS) $(VPP_FLAGS_hls_lossy_enc_compute) -o $@  -c $<
+BINARY_CONTAINERS_DEPS  += $(ACC_OBJS_hls_lossy_enc_compute) 
+$(BINARY_CONTAINERS_TMP) : $(BINARY_CONTAINERS_DEPS)
+	@echo "--> Making $@ from: $?"
+	$(VPP) $(VPP_FLAGS) $(VPP_LDFLAGS) $(VPP_LDFLAGS_hls_lossy_enc_compute) -o $(BINARY_CONTAINERS) -l $^
+EXE_FILE_DEPS += $(BINARY_CONTAINERS_TMP)
+EXE_FILE_DEPS += $(BINARY_CONTAINERS_DEPS)
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(TEMP_DIR)/%.o : %.cpp $$(@D)/.f
+	@echo "--> Making $@ from: $?"
+	mkdir -p $(BUILD_DIR)
+	$(MAKEDEPEND)
+	$(CXX) -o $@ $(CXXFLAGS)  -I . -c $<
+$(TEMP_DIR)/%.o : %.cc $$(@D)/.f
+	@echo "--> Making $@ from: $?"
+	mkdir -p $(BUILD_DIR)
+	$(MAKEDEPEND)
+	$(CXX) -o $@ $(CXXFLAGS)  -I . -c $<
+$(TEMP_DIR)/%.o : %.c $$(@D)/.f
+	@echo "--> Making $@ from: $?"
+	mkdir -p $(BUILD_DIR)
+	$(MAKEDEPEND)
+	$(CXX) -o $@ $(CXXFLAGS)  -I . -c $<
+$(EXE_FILE): $(EXE_FILE_DEPS)  
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+else
+# place holder for arch64
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+
+%/.f:
+	mkdir -p $(dir $@)
+	touch $@
+
+.PRECIOUS: %/.f
+
+RUN_DEPS += $(EXE_FILE) $(EMCONFIG)
+
+run: check_device  $(RUN_DEPS)
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(HOST_PREAMBLE) $(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+else
+# place holder for arch64
+endif
+endif
+
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(HOST_PREAMBLE) $(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+else
+# place holder for arch64
+endif
+endif
+
+#hw
+ifeq ($(TARGET), hw)
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(HOST_PREAMBLE) $(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+else
+# place holder for arch64
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: all clean cleanall emconfig
+emconfig: $(EMCONFIG)
+ifeq ($(HOST_ARCH), x86)
+all:  check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
+else
+all:  check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
+endif
+
+.PHONY: host xclbin
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+xclbin: $(BINARY_CONTAINERS_TMP)
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
+	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
+	-$(RMDIR) _x_temp.* 
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+
+clean: cleanh
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/check.sh b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/check.sh
new file mode 100755
index 0000000000..d9450ab8d2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/check.sh
@@ -0,0 +1 @@
+echo "bcf0915760ea2ffbfd33a1bb2abe028a small32x32.jxl" | md5sum -c -
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/conn_u50.cfg b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/conn_u50.cfg
new file mode 100644
index 0000000000..9324a2c545
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/conn_u50.cfg
@@ -0,0 +1,21 @@
+[hls]
+#pre_tcl=hls_pre.tcl
+
+[connectivity]
+sp=hls_lossy_enc_compute_1.config:HBM[14] 
+sp=hls_lossy_enc_compute_1.config_fl:HBM[15] 
+sp=hls_lossy_enc_compute_1.hls_opsin_1:HBM[0] 
+sp=hls_lossy_enc_compute_1.hls_opsin_2:HBM[1]
+sp=hls_lossy_enc_compute_1.hls_opsin_3:HBM[2] 
+sp=hls_lossy_enc_compute_1.quant_field_row:HBM[3]
+sp=hls_lossy_enc_compute_1.masking_field_row:HBM[4] 
+sp=hls_lossy_enc_compute_1.aq_map_f:HBM[5] 
+sp=hls_lossy_enc_compute_1.cmap_axi:HBM[6]  
+sp=hls_lossy_enc_compute_1.ac_coef_axiout:HBM[7] 
+sp=hls_lossy_enc_compute_1.strategy_all:HBM[8] 
+sp=hls_lossy_enc_compute_1.raw_quant_field_i:HBM[9] 
+sp=hls_lossy_enc_compute_1.hls_order:HBM[10] 
+sp=hls_lossy_enc_compute_1.hls_dc8x8:HBM[11] 
+sp=hls_lossy_enc_compute_1.hls_dc16x16:HBM[12] 
+sp=hls_lossy_enc_compute_1.hls_dc32x32:HBM[13] 
+
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/description.json b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/description.json
new file mode 100644
index 0000000000..7ca4133772
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/description.json
@@ -0,0 +1,285 @@
+{
+    "gui": false,
+    "name": "Xilinx JXL ACC LOSSY ENC Demo (SC)",
+    "description": "",
+    "flow": "vitis",
+    "platform_allowlist": [
+        "u50"
+    ],
+    "platform_blocklist": [
+        "zc"
+    ],
+    "launch": [
+        {
+            "cmd_args": " --xclbin BUILD/jxlEnc.xclbin LIB_DIR/L2/demos/jxlEnc/images/small32x32.png small32x32.jxl",
+            "name": "generic launch for all flows"
+        }
+    ],
+    "post_launch": [
+        {
+            "launch_cmd": [
+                "./check.sh"
+            ]
+        }
+    ],
+    "host": {
+        "host_exe": "host.exe",
+        "compiler": {
+            "sources": [
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cjxl.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cmdline.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/codec_config.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/box/box.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/time.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_host.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase1.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase2.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase3.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host/host_lossy_enc_compute.cpp"
+            ],
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/build/lib/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lodepng",
+                "LIB_DIR/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute",
+                "LIB_DIR/L2/demos/jxlEnc/others/include",
+                "LIB_DIR/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel",
+                "LIB_DIR/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host"
+            ],
+            "options": "-O3 "
+        }
+    },
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel"
+            ]
+        }
+    },
+    "containers": [
+        {
+            "accelerators": [
+                {
+                    "location": "LIB_DIR/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel/hls_lossy_enc_compute.cpp",
+                    "frequency": 300.0,
+                    "clflags": " -D KERNEL_NAME=hls_lossy_enc_compute",
+                    "name": "hls_lossy_enc_compute"
+                }
+            ],
+            "frequency": 300.0,
+            "name": "hls_lossy_enc_compute",
+            "ldclflags": "--advanced.param compiler.userPostSysLinkOverlayTcl=postSysLink.tcl"
+        }
+    ],
+    "testinfo": {
+        "disable": false,
+        "jobs": [
+            {
+                "index": 0,
+                "dependency": [],
+                "env": "",
+                "cmd": "",
+                "max_memory_MB": {
+                    "vitis_hw_build": 81920,
+                    "vitis_hw_emu": 40960,
+                    "vitis_sw_emu": 10240,
+                    "vitis_hw_run": 10240
+                },
+                "max_time_min": {
+                    "vitis_hw_build": 3200,
+                    "vitis_hw_emu": 1600,
+                    "vitis_sw_emu": 120,
+                    "vitis_hw_run": 10
+                }
+            }
+        ],
+        "targets": [
+            "vitis_sw_emu",
+            "vitis_hw_emu",
+            "vitis_hw"
+        ],
+        "category": "canary"
+    }
+}
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host/host_lossy_enc_compute.cpp b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host/host_lossy_enc_compute.cpp
new file mode 100644
index 0000000000..fa27f3e2f8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host/host_lossy_enc_compute.cpp
@@ -0,0 +1,483 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_LOSSY_ENC_COMPUTE_SC_CPP
+#define HOST_LOSSY_ENC_COMPUTE_SC_CPP
+
+#include <iostream>
+#include <sys/time.h>
+#include "ap_int.h"
+
+#include "host_lossy_enc_compute.hpp"
+
+#ifndef HLS_TEST
+#include "xf_utils_sw/logger.hpp"
+#endif
+
+unsigned long diff(const struct timeval* newTime, const struct timeval* oldTime) {
+    return (newTime->tv_sec - oldTime->tv_sec) * 1000000 + (newTime->tv_usec - oldTime->tv_usec);
+}
+
+template <typename T>
+T* aligned_alloc(std::size_t num) {
+    void* ptr = NULL;
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+    return reinterpret_cast<T*>(ptr);
+}
+
+void hls_lossy_enc_compute_wrapper(std::string xclbinPath,      // xclbin
+                                   int config[32],              // mm15, input
+                                   float config_fl[32],         // mm16, input
+                                   float* hls_opsin_1,          // mm1, input
+                                   float* hls_opsin_2,          // mm2, input
+                                   float* hls_opsin_3,          // mm3, input
+                                   float* hls_quant_field,      // mm4, input
+                                   float* hls_masking_field,    // mm5, input
+                                   float* aq_map_f,             // mm6, input
+                                   int8_t* cmap_axi,            // mm7, output
+                                   int* ac_coef_axiout,         // mm8, output
+                                   unsigned char* strategy_all, // mm9, output
+                                   int* raw_quant_field_i,      // mm10, output
+                                   uint32_t* hls_order,         // mm11, output
+                                   float* hls_dc8x8,            // mm12, output
+                                   float* hls_dc16x16,          // mm13, output
+                                   float* hls_dc32x32           // mm14, output
+                                   ) {
+#ifndef HLS_TEST
+
+    auto config_pool = lossy_acc::create_bufpool(vpp::input);
+    auto config_fl_pool = lossy_acc::create_bufpool(vpp::input);
+    auto hls_opsin_1_pool = lossy_acc::create_bufpool(vpp::input);
+    auto hls_opsin_2_pool = lossy_acc::create_bufpool(vpp::input);
+    auto hls_opsin_3_pool = lossy_acc::create_bufpool(vpp::input);
+    auto quant_field_row_pool = lossy_acc::create_bufpool(vpp::input);
+    auto masking_field_row_pool = lossy_acc::create_bufpool(vpp::input);
+    auto aq_map_f_pool = lossy_acc::create_bufpool(vpp::input);
+    auto cmap_axi_pool = lossy_acc::create_bufpool(vpp::output);
+    auto ac_coef_axiout_pool = lossy_acc::create_bufpool(vpp::output);
+    auto strategy_all_pool = lossy_acc::create_bufpool(vpp::output);
+    auto raw_quant_field_i_pool = lossy_acc::create_bufpool(vpp::output);
+    auto hls_order_pool = lossy_acc::create_bufpool(vpp::output);
+    auto hls_dc8x8_pool = lossy_acc::create_bufpool(vpp::output);
+    auto hls_dc16x16_pool = lossy_acc::create_bufpool(vpp::output);
+    auto hls_dc32x32_pool = lossy_acc::create_bufpool(vpp::output);
+
+    lossy_acc::send_while([&]() -> bool {
+        int* acc_config = (int*)lossy_acc::alloc_buf(config_pool, sizeof(int) * MAX_NUM_CONFIG);
+        float* acc_config_fl = (float*)lossy_acc::alloc_buf(config_fl_pool, sizeof(float) * MAX_NUM_CONFIG);
+        float* acc_hls_opsin_1 = (float*)lossy_acc::alloc_buf(hls_opsin_1_pool, sizeof(float) * ALL_PIXEL);
+        float* acc_hls_opsin_2 = (float*)lossy_acc::alloc_buf(hls_opsin_2_pool, sizeof(float) * ALL_PIXEL);
+        float* acc_hls_opsin_3 = (float*)lossy_acc::alloc_buf(hls_opsin_3_pool, sizeof(float) * ALL_PIXEL);
+        float* acc_quant_field_row =
+            (float*)lossy_acc::alloc_buf(quant_field_row_pool, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        float* acc_masking_field_row =
+            (float*)lossy_acc::alloc_buf(masking_field_row_pool, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        float* acc_aq_map_f = (float*)lossy_acc::alloc_buf(aq_map_f_pool, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        int8_t* acc_cmap_axi = (int8_t*)lossy_acc::alloc_buf(cmap_axi_pool, sizeof(int8_t) * TILE_W * TILE_H * 2);
+        int* acc_ac_coef_axiout = (int*)lossy_acc::alloc_buf(ac_coef_axiout_pool, sizeof(int) * ALL_PIXEL);
+        unsigned char* acc_strategy_all =
+            (unsigned char*)lossy_acc::alloc_buf(strategy_all_pool, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        int* acc_raw_quant_field_i =
+            (int*)lossy_acc::alloc_buf(raw_quant_field_i_pool, sizeof(int) * BLOCK8_H * BLOCK8_W);
+        uint32_t* acc_hls_order = (uint32_t*)lossy_acc::alloc_buf(hls_order_pool, sizeof(uint32_t) * MAX_ORDER);
+        float* acc_hls_dc8x8 = (float*)lossy_acc::alloc_buf(hls_dc8x8_pool, sizeof(float) * ALL_PIXEL);
+        float* acc_hls_dc16x16 = (float*)lossy_acc::alloc_buf(hls_dc16x16_pool, sizeof(float) * ALL_PIXEL);
+        float* acc_hls_dc32x32 = (float*)lossy_acc::alloc_buf(hls_dc32x32_pool, sizeof(float) * ALL_PIXEL);
+
+        memcpy(acc_config, config, sizeof(int) * MAX_NUM_CONFIG);
+        memcpy(acc_config_fl, config_fl, sizeof(float) * MAX_NUM_CONFIG);
+        memcpy(acc_hls_opsin_1, hls_opsin_1, sizeof(float) * ALL_PIXEL);
+        memcpy(acc_hls_opsin_2, hls_opsin_2, sizeof(float) * ALL_PIXEL);
+        memcpy(acc_hls_opsin_3, hls_opsin_3, sizeof(float) * ALL_PIXEL);
+        memcpy(acc_quant_field_row, hls_quant_field, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        memcpy(acc_masking_field_row, hls_masking_field, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        memcpy(acc_aq_map_f, aq_map_f, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        // memcpy(acc_cmap_axi, cmap_axi, sizeof(int8_t) * TILE_W * TILE_H * 2);
+        // memcpy(acc_ac_coef_axiout, ac_coef_axiout, sizeof(int) * ALL_PIXEL);
+        // memcpy(acc_strategy_all, strategy_all, sizeof(unsigned char) * BLOCK8_H * BLOCK8_W);
+        // memcpy(acc_raw_quant_field_i, raw_quant_field_i, sizeof(int) * BLOCK8_H * BLOCK8_W);
+        // memcpy(acc_hls_order, hls_order, sizeof(uint32_t) * MAX_ORDER);
+        // memcpy(acc_hls_dc8x8, hls_dc8x8, sizeof(float) * ALL_PIXEL);
+        // memcpy(acc_hls_dc16x16, hls_dc16x16, sizeof(float) * ALL_PIXEL);
+        // memcpy(acc_hls_dc32x32, hls_dc32x32, sizeof(float) * ALL_PIXEL);
+
+        lossy_acc::compute(acc_config, acc_config_fl, acc_hls_opsin_1, acc_hls_opsin_2, acc_hls_opsin_3,
+                           acc_quant_field_row, acc_masking_field_row, acc_aq_map_f, acc_cmap_axi, acc_ac_coef_axiout,
+                           acc_strategy_all, acc_raw_quant_field_i, acc_hls_order, acc_hls_dc8x8, acc_hls_dc16x16,
+                           acc_hls_dc32x32);
+        return 0;
+    });
+
+    lossy_acc::receive_all_in_order([&]() {
+        int* acc_config = (int*)lossy_acc::get_buf(config_pool);
+        float* acc_config_fl = (float*)lossy_acc::get_buf(config_fl_pool);
+        float* acc_hls_opsin_1 = (float*)lossy_acc::get_buf(hls_opsin_1_pool);
+        float* acc_hls_opsin_2 = (float*)lossy_acc::get_buf(hls_opsin_2_pool);
+        float* acc_hls_opsin_3 = (float*)lossy_acc::get_buf(hls_opsin_3_pool);
+        float* acc_quant_field_row = (float*)lossy_acc::get_buf(quant_field_row_pool);
+        float* acc_masking_field_row = (float*)lossy_acc::get_buf(masking_field_row_pool);
+        float* acc_aq_map_f = (float*)lossy_acc::get_buf(aq_map_f_pool);
+        int8_t* acc_cmap_axi = (int8_t*)lossy_acc::get_buf(cmap_axi_pool);
+        int* acc_ac_coef_axiout = (int*)lossy_acc::get_buf(ac_coef_axiout_pool);
+        unsigned char* acc_strategy_all = (unsigned char*)lossy_acc::get_buf(strategy_all_pool);
+        int* acc_raw_quant_field_i = (int*)lossy_acc::get_buf(raw_quant_field_i_pool);
+        uint32_t* acc_hls_order = (uint32_t*)lossy_acc::get_buf(hls_order_pool);
+        float* acc_hls_dc8x8 = (float*)lossy_acc::get_buf(hls_dc8x8_pool);
+        float* acc_hls_dc16x16 = (float*)lossy_acc::get_buf(hls_dc16x16_pool);
+        float* acc_hls_dc32x32 = (float*)lossy_acc::get_buf(hls_dc32x32_pool);
+
+        // memcpy(config, acc_config, sizeof(int) * MAX_NUM_CONFIG);
+        // memcpy(config_fl, acc_config_fl, sizeof(float) * MAX_NUM_CONFIG);
+        // memcpy(hls_opsin_1, acc_hls_opsin_1, sizeof(float) * ALL_PIXEL);
+        // memcpy(hls_opsin_2, acc_hls_opsin_2, sizeof(float) * ALL_PIXEL);
+        // memcpy(hls_opsin_3, acc_hls_opsin_3, sizeof(float) * ALL_PIXEL);
+        // memcpy(hls_quant_field, acc_quant_field_row, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        // memcpy(hls_masking_field, acc_masking_field_row, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        // memcpy(aq_map_f, acc_aq_map_f, sizeof(float) * BLOCK8_H * BLOCK8_W);
+        memcpy(cmap_axi, acc_cmap_axi, sizeof(int8_t) * TILE_W * TILE_H * 2);
+        memcpy(ac_coef_axiout, acc_ac_coef_axiout, sizeof(int) * ALL_PIXEL);
+        memcpy(strategy_all, acc_strategy_all, sizeof(unsigned char) * BLOCK8_H * BLOCK8_W);
+        memcpy(raw_quant_field_i, acc_raw_quant_field_i, sizeof(int) * BLOCK8_H * BLOCK8_W);
+        memcpy(hls_order, acc_hls_order, sizeof(uint32_t) * MAX_ORDER);
+        memcpy(hls_dc8x8, acc_hls_dc8x8, sizeof(float) * ALL_PIXEL);
+        memcpy(hls_dc16x16, acc_hls_dc16x16, sizeof(float) * ALL_PIXEL);
+        memcpy(hls_dc32x32, acc_hls_dc32x32, sizeof(float) * ALL_PIXEL);
+    });
+
+    lossy_acc::join();
+
+// xf::common::utils_sw::Logger logger(std::cout, std::cerr);
+// cl_int fail;
+
+// struct timeval start_time; // End to end time clock start
+// gettimeofday(&start_time, 0);
+
+// // platform related operations
+// std::vector<cl::Device> devices = xcl::get_xil_devices();
+// cl::Device device = devices[0];
+
+// // Creating Context and Command Queue for selected Device
+// cl::Context context(device, NULL, NULL, NULL, &fail);
+// logger.logCreateContext(fail);
+// cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &fail);
+// logger.logCreateCommandQueue(fail);
+// std::string devName = device.getInfo<CL_DEVICE_NAME>();
+// printf("INFO: Found Device=%s\n", devName.c_str());
+// cl::Program::Binaries xclBins = xcl::import_binary_file(xclbinPath);
+
+// devices.resize(1);
+// cl::Program program(context, devices, xclBins, NULL, &fail);
+// logger.logCreateProgram(fail);
+
+// int repInt = 1;
+// // create kernels
+// std::vector<cl::Kernel> hls_lossy_enc_compute(repInt);
+// for (int i = 0; i < repInt; i++) {
+//     hls_lossy_enc_compute[i] = cl::Kernel(program, "hls_lossy_enc_compute", &fail);
+//     logger.logCreateKernel(fail);
+// }
+// std::cout << "INFO: kernel has been created" << std::endl;
+
+// // 1. create all I/O Buffer
+// float* hb_hls_opsin_1 = aligned_alloc<float>(ALL_PIXEL);
+// float* hb_hls_opsin_2 = aligned_alloc<float>(ALL_PIXEL);
+// float* hb_hls_opsin_3 = aligned_alloc<float>(ALL_PIXEL);
+// float* hb_hls_quant_field = aligned_alloc<float>(BLOCK8_H * BLOCK8_W);
+// float* hb_hls_masking_field = aligned_alloc<float>(BLOCK8_H * BLOCK8_W);
+// float* hb_aq_map_f = aligned_alloc<float>(BLOCK8_H * BLOCK8_W);
+// int8_t* hb_cmap_axi = aligned_alloc<int8_t>(TILE_W * TILE_H * 2);
+// int32_t* hb_ac_coef_axiout = aligned_alloc<int32_t>(ALL_PIXEL);
+// unsigned char* hb_strategy_all = aligned_alloc<u_char>(BLOCK8_W * BLOCK8_H);
+// int32_t* hb_raw_quant_field_i = aligned_alloc<int32_t>(BLOCK8_H * BLOCK8_W);
+// uint32_t* hb_hls_order = aligned_alloc<uint32_t>(MAX_ORDER);
+// float* hb_hls_dc8x8 = aligned_alloc<float>(ALL_PIXEL);
+// float* hb_hls_dc16x16 = aligned_alloc<float>(ALL_PIXEL);
+// float* hb_hls_dc32x32 = aligned_alloc<float>(ALL_PIXEL);
+// int32_t* hb_config = aligned_alloc<int32_t>(MAX_NUM_CONFIG);
+// float* hb_config_fl = aligned_alloc<float>(MAX_NUM_CONFIG);
+
+// //==================================================
+// // 2. init all the host Buffers
+// //==================================================
+
+// // input port
+// for (int j = 0; j < MAX_NUM_CONFIG; j++) {
+//     hb_config[j] = config[j];
+// }
+
+// for (int j = 0; j < MAX_NUM_CONFIG; j++) {
+//     hb_config_fl[j] = config_fl[j];
+// }
+
+// for (int j = 0; j < ALL_PIXEL; j++) {
+//     hb_hls_opsin_1[j] = hls_opsin_1[j];
+// }
+
+// for (int j = 0; j < ALL_PIXEL; j++) {
+//     hb_hls_opsin_2[j] = hls_opsin_2[j];
+// }
+
+// for (int j = 0; j < ALL_PIXEL; j++) {
+//     hb_hls_opsin_3[j] = hls_opsin_3[j];
+// }
+
+// for (int j = 0; j < BLOCK8_H * BLOCK8_W; j++) {
+//     hb_hls_quant_field[j] = hls_quant_field[j];
+// }
+
+// for (int j = 0; j < BLOCK8_H * BLOCK8_W; j++) {
+//     hb_hls_masking_field[j] = hls_masking_field[j];
+// }
+
+// for (int j = 0; j < BLOCK8_H * BLOCK8_W; j++) {
+//     hb_aq_map_f[j] = aq_map_f[j];
+// }
+
+// // mapping to HBM banks
+// std::vector<cl_mem_ext_ptr_t> mext_o(33);
+// mext_o[0] = {(((unsigned int)(0)) | XCL_MEM_TOPOLOGY), hb_hls_opsin_1, 0};
+// mext_o[1] = {(((unsigned int)(1)) | XCL_MEM_TOPOLOGY), hb_hls_opsin_2, 0};
+// mext_o[2] = {(((unsigned int)(2)) | XCL_MEM_TOPOLOGY), hb_hls_opsin_3, 0};
+// mext_o[3] = {(((unsigned int)(3)) | XCL_MEM_TOPOLOGY), hb_hls_quant_field, 0};
+// mext_o[4] = {(((unsigned int)(4)) | XCL_MEM_TOPOLOGY), hb_hls_masking_field, 0};
+// mext_o[5] = {(((unsigned int)(5)) | XCL_MEM_TOPOLOGY), hb_aq_map_f, 0};
+// mext_o[6] = {(((unsigned int)(6)) | XCL_MEM_TOPOLOGY), hb_cmap_axi, 0};
+// mext_o[7] = {(((unsigned int)(7)) | XCL_MEM_TOPOLOGY), hb_ac_coef_axiout, 0};
+// mext_o[8] = {(((unsigned int)(8)) | XCL_MEM_TOPOLOGY), hb_strategy_all, 0};
+// mext_o[9] = {(((unsigned int)(9)) | XCL_MEM_TOPOLOGY), hb_raw_quant_field_i, 0};
+// mext_o[10] = {(((unsigned int)(10)) | XCL_MEM_TOPOLOGY), hb_hls_order, 0};
+// mext_o[11] = {(((unsigned int)(11)) | XCL_MEM_TOPOLOGY), hb_hls_dc8x8, 0};
+// mext_o[12] = {(((unsigned int)(12)) | XCL_MEM_TOPOLOGY), hb_hls_dc16x16, 0};
+// mext_o[13] = {(((unsigned int)(13)) | XCL_MEM_TOPOLOGY), hb_hls_dc32x32, 0};
+// mext_o[14] = {(((unsigned int)(14)) | XCL_MEM_TOPOLOGY), hb_config, 0};
+// mext_o[15] = {(((unsigned int)(15)) | XCL_MEM_TOPOLOGY), hb_config_fl, 0};
+
+// //===================================================
+// // 3. create device Buffer and map dev buf to host buf,
+// //===================================================
+// cl::Buffer db_hls_opsin_1;       // mm1, input
+// cl::Buffer db_hls_opsin_2;       // mm2, input
+// cl::Buffer db_hls_opsin_3;       // mm3, input
+// cl::Buffer db_hls_quant_field;   // mm4, input
+// cl::Buffer db_hls_masking_field; // mm5, input
+// cl::Buffer db_aq_map_f;          // mm6, input
+// cl::Buffer db_cmap_axi;          // mm7, output
+// cl::Buffer db_ac_coef_axiout;    // mm8, output
+// cl::Buffer db_strategy_all;      // mm9, output
+// cl::Buffer db_raw_quant_field_i; // mm10, output
+// cl::Buffer db_hls_order;         // mm11, output
+// cl::Buffer db_hls_dc8x8;         // mm12, output
+// cl::Buffer db_hls_dc16x16;       // mm13, output
+// cl::Buffer db_hls_dc32x32;       // mm14, output
+// cl::Buffer db_config;            // mm15, input
+// cl::Buffer db_config_fl;         // mm16, input
+
+// // init cl Buffer
+// db_hls_opsin_1 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                             sizeof(float) * ALL_PIXEL, &mext_o[0]);
+
+// db_hls_opsin_2 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                             sizeof(float) * ALL_PIXEL, &mext_o[1]);
+
+// db_hls_opsin_3 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                             sizeof(float) * ALL_PIXEL, &mext_o[2]);
+
+// db_hls_quant_field = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                                 sizeof(float) * (BLOCK8_H * BLOCK8_W), &mext_o[3]);
+
+// db_hls_masking_field = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                                   sizeof(float) * (BLOCK8_H * BLOCK8_W), &mext_o[4]);
+
+// db_aq_map_f = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                          sizeof(float) * (BLOCK8_H * BLOCK8_W), &mext_o[5]);
+
+// db_cmap_axi = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                          sizeof(int8_t) * (TILE_W * TILE_H * 2), &mext_o[6]);
+
+// db_ac_coef_axiout = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                                sizeof(int32_t) * ALL_PIXEL, &mext_o[7]);
+
+// db_strategy_all = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                              sizeof(u_char) * (BLOCK8_H * BLOCK8_W), &mext_o[8]);
+
+// db_raw_quant_field_i = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                                   sizeof(float) * (BLOCK8_H * BLOCK8_W), &mext_o[9]);
+
+// db_hls_order = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                           sizeof(float) * MAX_ORDER, &mext_o[10]);
+
+// db_hls_dc8x8 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                           sizeof(float) * ALL_PIXEL, &mext_o[11]);
+
+// db_hls_dc16x16 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                             sizeof(float) * ALL_PIXEL, &mext_o[12]);
+
+// db_hls_dc32x32 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                             sizeof(float) * ALL_PIXEL, &mext_o[13]);
+
+// db_config = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                        sizeof(float) * MAX_NUM_CONFIG, &mext_o[14]);
+
+// db_config_fl = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+//                           sizeof(float) * MAX_NUM_CONFIG, &mext_o[15]);
+
+// //==================================
+// // add Buffers to migrate
+// std::vector<cl::Memory> ob_in;
+// std::vector<cl::Memory> ob_out;
+
+// ob_in.push_back(db_config);
+// ob_in.push_back(db_config_fl);
+// ob_in.push_back(db_hls_opsin_1);
+// ob_in.push_back(db_hls_opsin_2);
+// ob_in.push_back(db_hls_opsin_3);
+// ob_in.push_back(db_hls_quant_field);
+// ob_in.push_back(db_hls_masking_field);
+// ob_in.push_back(db_aq_map_f);
+
+// ob_out.push_back(db_cmap_axi);
+// ob_out.push_back(db_ac_coef_axiout);
+// ob_out.push_back(db_strategy_all);
+// ob_out.push_back(db_raw_quant_field_i);
+// ob_out.push_back(db_hls_order);
+// ob_out.push_back(db_hls_dc8x8);
+// ob_out.push_back(db_hls_dc16x16);
+// ob_out.push_back(db_hls_dc32x32);
+
+// // set kernel args
+// for (int i = 0; i < repInt; i++) {
+//     hls_lossy_enc_compute[i].setArg(0, db_config);
+//     hls_lossy_enc_compute[i].setArg(1, db_config_fl);
+//     hls_lossy_enc_compute[i].setArg(2, db_hls_opsin_1);
+//     hls_lossy_enc_compute[i].setArg(3, db_hls_opsin_2);
+//     hls_lossy_enc_compute[i].setArg(4, db_hls_opsin_3);
+//     hls_lossy_enc_compute[i].setArg(5, db_hls_quant_field);
+//     hls_lossy_enc_compute[i].setArg(6, db_hls_masking_field);
+//     hls_lossy_enc_compute[i].setArg(7, db_aq_map_f);
+//     hls_lossy_enc_compute[i].setArg(8, db_cmap_axi);
+//     hls_lossy_enc_compute[i].setArg(9, db_ac_coef_axiout);
+//     hls_lossy_enc_compute[i].setArg(10, db_strategy_all);
+//     hls_lossy_enc_compute[i].setArg(11, db_raw_quant_field_i);
+//     hls_lossy_enc_compute[i].setArg(12, db_hls_order);
+//     hls_lossy_enc_compute[i].setArg(13, db_hls_dc8x8);
+//     hls_lossy_enc_compute[i].setArg(14, db_hls_dc16x16);
+//     hls_lossy_enc_compute[i].setArg(15, db_hls_dc32x32);
+// }
+
+// // launch kernel and calculate kernel execution time
+// std::cout << "INFO: Kernel Start" << std::endl;
+// // declare events
+// std::vector<cl::Event> events_write(1);
+// std::vector<cl::Event> events_kernel(1);
+// std::vector<cl::Event> events_read(1);
+
+// // migrate,
+// q.enqueueMigrateMemObjects(ob_in, 0, nullptr, &events_write[0]);
+// q.enqueueTask(hls_lossy_enc_compute[0], &events_write, &events_kernel[0]);
+// q.enqueueMigrateMemObjects(ob_out, 1, &events_kernel, &events_read[0]);
+// q.finish();
+
+// struct timeval end_time;
+// gettimeofday(&end_time, 0);
+// std::cout << "INFO: Finish kernel execution" << std::endl;
+// std::cout << "INFO: Finish E2E execution" << std::endl;
+
+// // print related times
+// unsigned long timeStart, timeEnd, exec_time0;
+// std::cout << "-------------------------------------------------------" << std::endl;
+// events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+// events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+// exec_time0 = (timeEnd - timeStart) / 1000.0;
+// std::cout << "INFO: Data transfer from host to device: " << exec_time0 << " us\n";
+// std::cout << "-------------------------------------------------------" << std::endl;
+// events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+// events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+// exec_time0 = (timeEnd - timeStart) / 1000.0;
+// std::cout << "INFO: Kernel1 Data transfer from device to host: " << exec_time0 << " us\n";
+// std::cout << "-------------------------------------------------------" << std::endl;
+// exec_time0 = 0;
+// for (int i = 0; i < 1; ++i) {
+//     events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+//     events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+//     exec_time0 += (timeEnd - timeStart) / 1000.0;
+
+//     std::cout << "INFO: Kernel" << i + 1 << " execution: " << (timeEnd - timeStart) / 1000.0 << " us\n";
+//     std::cout << "-------------------------------------------------------" << std::endl;
+// }
+// std::cout << "INFO: kernel total execution: " << exec_time0 << " us\n";
+// std::cout << "-------------------------------------------------------" << std::endl;
+// unsigned long exec_timeE2E = diff(&end_time, &start_time);
+// std::cout << "INFO: FPGA execution time:" << exec_timeE2E << " us\n";
+// std::cout << "-------------------------------------------------------" << std::endl;
+
+// // output
+// for (int j = 0; j < TILE_W * TILE_H * 2; j++) {
+//     cmap_axi[j] = hb_cmap_axi[j];
+// }
+
+// for (int j = 0; j < ALL_PIXEL; j++) {
+//     ac_coef_axiout[j] = hb_ac_coef_axiout[j];
+// }
+
+// for (int j = 0; j < BLOCK8_W * BLOCK8_H; j++) {
+//     strategy_all[j] = hb_strategy_all[j];
+// }
+
+// for (int j = 0; j < BLOCK8_H * BLOCK8_W; j++) {
+//     raw_quant_field_i[j] = hb_raw_quant_field_i[j];
+// }
+
+// for (int j = 0; j < MAX_ORDER; j++) {
+//     hls_order[j] = hb_hls_order[j];
+// }
+
+// for (int j = 0; j < ALL_PIXEL; j++) {
+//     hls_dc8x8[j] = hb_hls_dc8x8[j];
+// }
+
+// for (int j = 0; j < ALL_PIXEL; j++) {
+//     hls_dc16x16[j] = hb_hls_dc16x16[j];
+// }
+
+// for (int j = 0; j < ALL_PIXEL; j++) {
+//     hls_dc32x32[j] = hb_hls_dc32x32[j];
+// }
+
+// // free mem
+// free(hb_hls_opsin_1);
+// free(hb_hls_opsin_2);
+// free(hb_hls_opsin_3);
+// free(hb_hls_quant_field);
+// free(hb_hls_masking_field);
+// free(hb_aq_map_f);
+// free(hb_cmap_axi);
+// free(hb_ac_coef_axiout);
+// free(hb_strategy_all);
+// free(hb_raw_quant_field_i);
+// free(hb_hls_order);
+// free(hb_hls_dc8x8);
+// free(hb_hls_dc16x16);
+// free(hb_hls_dc32x32);
+// free(hb_config);
+// free(hb_config_fl);
+#else
+    hls_lossy_enc_compute(config, config_fl, hls_opsin_1, hls_opsin_2, hls_opsin_3, hls_quant_field, hls_masking_field,
+                          aq_map_f, cmap_axi, ac_coef_axiout, strategy_all, raw_quant_field_i, hls_order, hls_dc8x8,
+                          hls_dc16x16, hls_dc32x32);
+#endif
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host/host_lossy_enc_compute.hpp b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host/host_lossy_enc_compute.hpp
new file mode 100644
index 0000000000..cb79414c90
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/host/host_lossy_enc_compute.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_LOSSY_ENC_COMPUTE_SC_HPP
+#define HOST_LOSSY_ENC_COMPUTE_SC_HPP
+
+#include <iostream>
+#include <sys/time.h>
+
+#ifndef HLS_TEST
+#include "xf_utils_sw/logger.hpp"
+#include "hls_lossy_enc_compute.hpp"
+
+// const int PIXEL_W = 2048;
+// const int PIXEL_H = 2048;
+// const int FRAME_DIM = 3;
+// const int ALL_PIXEL = PIXEL_W * PIXEL_H * FRAME_DIM;
+// const int BLOCK8_W = PIXEL_W / 8;
+// const int BLOCK8_H = PIXEL_H / 8;
+// const int BLOCK8_NUM = BLOCK8_W * BLOCK8_H * FRAME_DIM;
+// const int TILE_W = PIXEL_W / 64;
+// const int TILE_H = PIXEL_H / 64;
+// const int MAX_ORDER = 320 * 3 + 1;
+// const int MAX_NUM_CONFIG = 32;
+
+#else
+#include "hls_lossy_enc_compute.hpp"
+#endif
+
+void hls_lossy_enc_compute_wrapper(std::string xclbinPath,
+                                   int config[32],
+                                   float config_fl[32],
+                                   float* hls_opsin_1,
+                                   float* hls_opsin_2,
+                                   float* hls_opsin_3,
+                                   float* quant_field_row,
+                                   float* masking_field_row,
+                                   float* aq_map_f,
+                                   int8_t* cmap_axi,
+                                   int* ac_coef_axiout,
+                                   unsigned char* strategy_all,
+                                   int* raw_quant_field_i,
+                                   uint32_t* hls_order,
+                                   float* hls_dc8x8,
+                                   float* hls_dc16x16,
+                                   float* hls_dc32x32);
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel/hls_lossy_enc_compute.cpp b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel/hls_lossy_enc_compute.cpp
new file mode 100644
index 0000000000..fdf8acb5a4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel/hls_lossy_enc_compute.cpp
@@ -0,0 +1,9401 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HLS_LOSSY_ENC_COMPUTE_CPP
+#define HLS_LOSSY_ENC_COMPUTE_CPP
+
+#include "hls_lossy_enc_compute.hpp"
+
+#define FIX
+
+// uint8_t covered_blocks_x_set[6] = {1, 1, 1, 1, 2, 4};
+// uint8_t covered_blocks_y_set[6] = {1, 1, 1, 1, 2, 4};
+uint8_t strategy_block[6] = {1, 1, 1, 1, 2, 4};
+
+const float inv_matrix_8[3][64] = {{0,
+                                    560,
+                                    558.510437012,
+                                    489.194152832,
+                                    428.480621338,
+                                    375.302246094,
+                                    328.723815918,
+                                    287.926147461,
+                                    560,
+                                    560,
+                                    541.309387207,
+                                    478.786773682,
+                                    421.547454834,
+                                    370.409942627,
+                                    325.138336182,
+                                    285.227325439,
+                                    558.510437012,
+                                    541.309387207,
+                                    500.443756104,
+                                    451.472991943,
+                                    402.49432373,
+                                    356.627593994,
+                                    314.88571167,
+                                    277.434692383,
+                                    489.194152832,
+                                    478.786773682,
+                                    451.472991943,
+                                    414.922729492,
+                                    375.302246094,
+                                    336.170715332,
+                                    299.277435303,
+                                    265.364807129,
+                                    428.480621338,
+                                    421.547454834,
+                                    402.49432373,
+                                    375.302246094,
+                                    344.016448975,
+                                    311.624298096,
+                                    279.983337402,
+                                    250.119842529,
+                                    375.302246094,
+                                    370.409942627,
+                                    356.627593994,
+                                    336.170715332,
+                                    311.624298096,
+                                    285.227325439,
+                                    258.613525391,
+                                    232.845169067,
+                                    328.723815918,
+                                    325.138336182,
+                                    314.88571167,
+                                    299.277435303,
+                                    279.983337402,
+                                    258.613525391,
+                                    236.484725952,
+                                    214.558776855,
+                                    287.926147461,
+                                    285.227325439,
+                                    277.434692383,
+                                    265.364807129,
+                                    250.119842529,
+                                    232.845169067,
+                                    214.558776855,
+                                    196.071777344},
+                                   {0,
+                                    3150,
+                                    3139.25854492,
+                                    2648.63037109,
+                                    2234.68115234,
+                                    1885.42749023,
+                                    1590.75805664,
+                                    1342.14172363,
+                                    3150,
+                                    3150,
+                                    3015.80957031,
+                                    2576.58398438,
+                                    2188.41503906,
+                                    1853.96557617,
+                                    1568.54064941,
+                                    1326.02929688,
+                                    3139.25854492,
+                                    3015.80957031,
+                                    2726.99536133,
+                                    2389.61645508,
+                                    2062.38256836,
+                                    1765.96655273,
+                                    1505.39343262,
+                                    1279.74853516,
+                                    2648.63037109,
+                                    2576.58398438,
+                                    2389.61645508,
+                                    2144.4074707,
+                                    1885.42749023,
+                                    1637.12109375,
+                                    1410.37487793,
+                                    1208.78967285,
+                                    2234.68115234,
+                                    2188.41503906,
+                                    2062.38256836,
+                                    1885.42749023,
+                                    1686.28210449,
+                                    1485.42663574,
+                                    1294.84509277,
+                                    1060.59338379,
+                                    1885.42749023,
+                                    1853.96557617,
+                                    1765.96655273,
+                                    1637.12109375,
+                                    1485.42663574,
+                                    1326.02929688,
+                                    1169.49206543,
+                                    785.963012695,
+                                    1590.75805664,
+                                    1568.54064941,
+                                    1505.39343262,
+                                    1410.37487793,
+                                    1294.84509277,
+                                    1169.49206543,
+                                    838.701721191,
+                                    558.03729248,
+                                    1342.14172363,
+                                    1326.02929688,
+                                    1279.74853516,
+                                    1208.78967285,
+                                    1060.59338379,
+                                    785.963012695,
+                                    558.03729248,
+                                    382.654693604},
+                                   {0,
+                                    293.959503174,
+                                    169.469955444,
+                                    119.412483215,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    83.5508270264,
+                                    58.8718566895,
+                                    293.959503174,
+                                    233.598114014,
+                                    156.027160645,
+                                    112.817504883,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    81.1647109985,
+                                    57.4251747131,
+                                    169.469955444,
+                                    156.027160645,
+                                    126.80493927,
+                                    96.6006240845,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    74.5768890381,
+                                    53.3726730347,
+                                    119.412483215,
+                                    112.817504883,
+                                    96.6006240845,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    65.2038497925,
+                                    47.4551811218,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    72.5535202026,
+                                    54.6778106689,
+                                    39.419506073,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    85.3333358765,
+                                    72.5535202026,
+                                    57.4251747131,
+                                    44.3317565918,
+                                    29.2122058868,
+                                    83.5508270264,
+                                    81.1647109985,
+                                    74.5768890381,
+                                    65.2038497925,
+                                    54.6778106689,
+                                    44.3317565918,
+                                    31.1723690033,
+                                    20.7407989502,
+                                    58.8718566895,
+                                    57.4251747131,
+                                    53.3726730347,
+                                    47.4551811218,
+                                    39.419506073,
+                                    29.2122058868,
+                                    20.7407989502,
+                                    14.2222824097}};
+const float inv_matrix_16[3][256] = {{0,
+                                      0,
+                                      2384.4128418,
+                                      2060.98974609,
+                                      1763.60900879,
+                                      1491.73779297,
+                                      1261.77709961,
+                                      1067.26635742,
+                                      956.67767334,
+                                      861.364074707,
+                                      775.546569824,
+                                      703.312927246,
+                                      644.910888672,
+                                      591.358520508,
+                                      542.252990723,
+                                      501.345214844,
+                                      0,
+                                      0,
+                                      2303.75878906,
+                                      2012.80981445,
+                                      1727.63220215,
+                                      1467.21154785,
+                                      1244.41430664,
+                                      1054.64306641,
+                                      950.44720459,
+                                      856.371826172,
+                                      771.497619629,
+                                      700.552734375,
+                                      642.589599609,
+                                      589.392944336,
+                                      540.578857422,
+                                      500.060272217,
+                                      2384.4128418,
+                                      2303.75878906,
+                                      2113.18408203,
+                                      1884.00744629,
+                                      1629.57141113,
+                                      1398.57958984,
+                                      1195.04504395,
+                                      1031.75708008,
+                                      932.273986816,
+                                      841.744262695,
+                                      759.593811035,
+                                      692.403076172,
+                                      635.722961426,
+                                      583.569458008,
+                                      535.612548828,
+                                      496.2421875,
+                                      2060.98974609,
+                                      2012.80981445,
+                                      1884.00744629,
+                                      1693.40161133,
+                                      1491.73779297,
+                                      1297.99816895,
+                                      1120.69970703,
+                                      996.043395996,
+                                      903.588256836,
+                                      818.460021973,
+                                      740.524108887,
+                                      679.239624023,
+                                      624.590454102,
+                                      574.100036621,
+                                      528.409057617,
+                                      489.997619629,
+                                      1763.60900879,
+                                      1727.63220215,
+                                      1629.57141113,
+                                      1491.73779297,
+                                      1336.38830566,
+                                      1179.42834473,
+                                      1039.25634766,
+                                      950.44720459,
+                                      866.416687012,
+                                      787.946533203,
+                                      717.456176758,
+                                      661.633422852,
+                                      609.623046875,
+                                      561.31427002,
+                                      518.629089355,
+                                      481.495361328,
+                                      1491.73779297,
+                                      1467.21154785,
+                                      1398.57958984,
+                                      1297.99816895,
+                                      1179.42834473,
+                                      1054.64294434,
+                                      975.919921875,
+                                      898.074401855,
+                                      823.012390137,
+                                      751.853820801,
+                                      692.403076172,
+                                      640.284667969,
+                                      591.358520508,
+                                      545.629760742,
+                                      506.54699707,
+                                      470.954223633,
+                                      1261.77709961,
+                                      1244.41430664,
+                                      1195.04504395,
+                                      1120.69970703,
+                                      1039.25634766,
+                                      975.919921875,
+                                      909.174133301,
+                                      841.744262695,
+                                      775.546569824,
+                                      714.580871582,
+                                      664.092590332,
+                                      615.952392578,
+                                      570.392150879,
+                                      528.409057617,
+                                      492.477874756,
+                                      458.628570557,
+                                      1067.26635742,
+                                      1054.64306641,
+                                      1031.75708008,
+                                      996.043395996,
+                                      950.44720459,
+                                      898.074401855,
+                                      841.744262695,
+                                      783.770263672,
+                                      726.22833252,
+                                      679.239624023,
+                                      633.465698242,
+                                      589.392944336,
+                                      547.332580566,
+                                      510.515045166,
+                                      476.757659912,
+                                      444.792907715,
+                                      956.67767334,
+                                      950.44720459,
+                                      932.273986816,
+                                      903.588256836,
+                                      866.416687012,
+                                      823.012390137,
+                                      775.546569824,
+                                      726.22833252,
+                                      684.443725586,
+                                      642.589599609,
+                                      601.375,
+                                      561.31439209,
+                                      524.175048828,
+                                      491.234863281,
+                                      459.72479248,
+                                      429.72869873,
+                                      861.364074707,
+                                      856.371826172,
+                                      841.744262695,
+                                      818.460021973,
+                                      787.946533203,
+                                      751.853820801,
+                                      714.580871582,
+                                      679.239624023,
+                                      642.589599609,
+                                      605.472290039,
+                                      568.554870605,
+                                      532.708679199,
+                                      501.345214844,
+                                      470.954223633,
+                                      441.705718994,
+                                      413.71182251,
+                                      775.546569824,
+                                      771.497619629,
+                                      759.593811035,
+                                      740.524108887,
+                                      717.456176758,
+                                      692.403076172,
+                                      664.092590332,
+                                      633.465698242,
+                                      601.375,
+                                      568.554870605,
+                                      535.612426758,
+                                      506.546936035,
+                                      477.933990479,
+                                      450.024688721,
+                                      423.003997803,
+                                      395.167694092,
+                                      703.312927246,
+                                      700.552734375,
+                                      692.403076172,
+                                      679.239624023,
+                                      661.633422852,
+                                      640.284667969,
+                                      615.952392578,
+                                      589.392944336,
+                                      561.31439209,
+                                      532.708679199,
+                                      506.546936035,
+                                      480.302856445,
+                                      454.290039062,
+                                      428.756591797,
+                                      403.216186523,
+                                      375.228302002,
+                                      644.910888672,
+                                      642.589599609,
+                                      635.722961426,
+                                      624.590454102,
+                                      609.623046875,
+                                      591.358520508,
+                                      570.392150879,
+                                      547.332580566,
+                                      524.175048828,
+                                      501.345214844,
+                                      477.933990479,
+                                      454.290039062,
+                                      430.704803467,
+                                      407.340545654,
+                                      380.75769043,
+                                      355.171173096,
+                                      591.358520508,
+                                      589.392944336,
+                                      583.569458008,
+                                      574.100036621,
+                                      561.31427002,
+                                      545.629760742,
+                                      528.409057617,
+                                      510.515045166,
+                                      491.234863281,
+                                      470.954223633,
+                                      450.024688721,
+                                      428.756591797,
+                                      407.340545654,
+                                      382.62991333,
+                                      358.535705566,
+                                      335.223266602,
+                                      542.252990723,
+                                      540.578857422,
+                                      535.612548828,
+                                      528.409057617,
+                                      518.629089355,
+                                      506.54699707,
+                                      492.477874756,
+                                      476.757659912,
+                                      459.72479248,
+                                      441.705718994,
+                                      423.003997803,
+                                      403.216186523,
+                                      380.75769043,
+                                      358.535705566,
+                                      336.753845215,
+                                      315.57409668,
+                                      501.345214844,
+                                      500.060272217,
+                                      496.2421875,
+                                      489.997619629,
+                                      481.495361328,
+                                      470.954223633,
+                                      458.628570557,
+                                      444.792907715,
+                                      429.72869873,
+                                      413.71182251,
+                                      395.167694092,
+                                      375.228302002,
+                                      355.171173096,
+                                      335.223266602,
+                                      315.57409668,
+                                      296.378265381},
+                                     {0,
+                                      0,
+                                      5616.41552734,
+                                      4437.54785156,
+                                      3710.52368164,
+                                      3312.08374023,
+                                      2956.42822266,
+                                      2638.96386719,
+                                      2378.97973633,
+                                      2146.23095703,
+                                      1936.2532959,
+                                      1722.18615723,
+                                      1498.60571289,
+                                      1304.05163574,
+                                      1134.75488281,
+                                      951.882019043,
+                                      0,
+                                      0,
+                                      5312.58251953,
+                                      4271.09716797,
+                                      3658.99584961,
+                                      3275.03710938,
+                                      2928.76391602,
+                                      2617.74536133,
+                                      2363.77954102,
+                                      2134.02709961,
+                                      1926.33569336,
+                                      1711.35717773,
+                                      1489.96264648,
+                                      1297.10559082,
+                                      1129.14038086,
+                                      946.136962891,
+                                      5616.41552734,
+                                      5312.58251953,
+                                      4620.59277344,
+                                      3880.56469727,
+                                      3516.76147461,
+                                      3170.29418945,
+                                      2849.4152832,
+                                      2562.00634766,
+                                      2319.43164062,
+                                      2098.26171875,
+                                      1897.17285156,
+                                      1679.53442383,
+                                      1464.50524902,
+                                      1276.60888672,
+                                      1112.54638672,
+                                      929.184143066,
+                                      4437.54785156,
+                                      4271.09716797,
+                                      3880.56469727,
+                                      3609.64770508,
+                                      3312.08374023,
+                                      3013.74951172,
+                                      2727.90283203,
+                                      2474.97729492,
+                                      2249.39648438,
+                                      2041.30578613,
+                                      1850.4362793,
+                                      1628.60998535,
+                                      1423.58496094,
+                                      1243.54284668,
+                                      1077.57275391,
+                                      901.836975098,
+                                      3710.52368164,
+                                      3658.99584961,
+                                      3516.76147461,
+                                      3312.08374023,
+                                      3073.94458008,
+                                      2824.09741211,
+                                      2580.27368164,
+                                      2363.77954102,
+                                      2158.58081055,
+                                      1966.61950684,
+                                      1778.07653809,
+                                      1561.42590332,
+                                      1369.25976562,
+                                      1199.41723633,
+                                      1031.11547852,
+                                      865.35723877,
+                                      3312.08374023,
+                                      3275.03710938,
+                                      3170.29418945,
+                                      3013.74951172,
+                                      2824.09741211,
+                                      2617.74511719,
+                                      2425.91333008,
+                                      2235.92993164,
+                                      2052.44384766,
+                                      1878.20617676,
+                                      1679.53442383,
+                                      1481.39880371,
+                                      1304.05163574,
+                                      1146.11157227,
+                                      975.344787598,
+                                      821.329833984,
+                                      2956.42822266,
+                                      2928.76391602,
+                                      2849.4152832,
+                                      2727.90283203,
+                                      2580.27368164,
+                                      2425.91333008,
+                                      2263.03759766,
+                                      2098.26171875,
+                                      1936.2532959,
+                                      1766.65966797,
+                                      1570.74584961,
+                                      1392.13525391,
+                                      1230.68457031,
+                                      1077.57275391,
+                                      912.64251709,
+                                      771.521240234,
+                                      2638.96386719,
+                                      2617.74536133,
+                                      2562.00634766,
+                                      2474.97729492,
+                                      2363.77954102,
+                                      2235.92993164,
+                                      2098.26171875,
+                                      1956.39318848,
+                                      1813.07836914,
+                                      1628.60998535,
+                                      1456.17285156,
+                                      1297.10559082,
+                                      1151.85449219,
+                                      993.464355469,
+                                      845.405334473,
+                                      717.737731934,
+                                      2378.97973633,
+                                      2363.77954102,
+                                      2319.43164062,
+                                      2249.39648438,
+                                      2158.58081055,
+                                      2052.44384766,
+                                      1936.2532959,
+                                      1813.07836914,
+                                      1648.67211914,
+                                      1489.96264648,
+                                      1339.6640625,
+                                      1199.41748047,
+                                      1057.31555176,
+                                      907.217956543,
+                                      775.878479004,
+                                      661.709289551,
+                                      2146.23095703,
+                                      2134.02709961,
+                                      2098.26171875,
+                                      2041.30578613,
+                                      1966.61950684,
+                                      1878.20617676,
+                                      1766.65966797,
+                                      1628.60998535,
+                                      1489.96264648,
+                                      1354.33557129,
+                                      1224.33178711,
+                                      1098.37109375,
+                                      951.882019043,
+                                      821.329833984,
+                                      706.041503906,
+                                      604.99597168,
+                                      1936.2532959,
+                                      1926.33569336,
+                                      1897.17285156,
+                                      1850.4362793,
+                                      1778.07653809,
+                                      1679.53442383,
+                                      1570.74584961,
+                                      1456.17285156,
+                                      1339.6640625,
+                                      1224.33178711,
+                                      1112.54614258,
+                                      975.344482422,
+                                      850.33416748,
+                                      737.812194824,
+                                      637.541503906,
+                                      531.866638184,
+                                      1722.18615723,
+                                      1711.35717773,
+                                      1679.53442383,
+                                      1628.60998535,
+                                      1561.42590332,
+                                      1481.39880371,
+                                      1392.13525391,
+                                      1297.10559082,
+                                      1199.41748047,
+                                      1098.37109375,
+                                      975.344482422,
+                                      860.309997559,
+                                      754.414855957,
+                                      658.18359375,
+                                      565.168762207,
+                                      455.065155029,
+                                      1498.60571289,
+                                      1489.96264648,
+                                      1464.50524902,
+                                      1423.58496094,
+                                      1369.25976562,
+                                      1304.05163574,
+                                      1230.68457031,
+                                      1151.85449219,
+                                      1057.31555176,
+                                      951.882019043,
+                                      850.33416748,
+                                      754.414855957,
+                                      665.260375977,
+                                      582.761047363,
+                                      475.564758301,
+                                      385.666412354,
+                                      1304.05163574,
+                                      1297.10559082,
+                                      1276.60888672,
+                                      1243.54284668,
+                                      1199.41723633,
+                                      1146.11157227,
+                                      1077.57275391,
+                                      993.464355469,
+                                      907.217956543,
+                                      821.329833984,
+                                      737.812194824,
+                                      658.18359375,
+                                      582.761047363,
+                                      482.643035889,
+                                      396.775939941,
+                                      324.039428711,
+                                      1134.75488281,
+                                      1129.14038086,
+                                      1112.54638672,
+                                      1077.57275391,
+                                      1031.11547852,
+                                      975.344787598,
+                                      912.64251709,
+                                      845.405334473,
+                                      775.878479004,
+                                      706.041503906,
+                                      637.541503906,
+                                      565.168762207,
+                                      475.564758301,
+                                      396.775939941,
+                                      328.516326904,
+                                      270.136077881,
+                                      951.882019043,
+                                      946.136962891,
+                                      929.184143066,
+                                      901.836975098,
+                                      865.35723877,
+                                      821.329833984,
+                                      771.521240234,
+                                      717.737731934,
+                                      661.709289551,
+                                      604.99597168,
+                                      531.866638184,
+                                      455.065155029,
+                                      385.666412354,
+                                      324.039428711,
+                                      270.136077881,
+                                      223.60848999},
+                                     {0,
+                                      0,
+                                      615.613830566,
+                                      448.953399658,
+                                      337.930267334,
+                                      263.807556152,
+                                      205.943115234,
+                                      160.770889282,
+                                      141.832733154,
+                                      126.301643372,
+                                      112.471244812,
+                                      100.763389587,
+                                      91.1208114624,
+                                      82.4009933472,
+                                      74.5156097412,
+                                      58.8962364197,
+                                      0,
+                                      0,
+                                      571.402038574,
+                                      426.532226562,
+                                      327.784393311,
+                                      257.417816162,
+                                      201.765563965,
+                                      157.966430664,
+                                      140.812332153,
+                                      125.492965698,
+                                      111.822540283,
+                                      100.304679871,
+                                      90.7403564453,
+                                      82.0832748413,
+                                      74.2487335205,
+                                      58.3933258057,
+                                      615.613830566,
+                                      571.402038574,
+                                      473.941894531,
+                                      372.602783203,
+                                      300.644775391,
+                                      239.80960083,
+                                      190.039825439,
+                                      154.182662964,
+                                      137.840042114,
+                                      123.126365662,
+                                      109.91746521,
+                                      98.952003479,
+                                      89.6162185669,
+                                      81.1429672241,
+                                      73.4578170776,
+                                      56.9167442322,
+                                      448.953399658,
+                                      426.532226562,
+                                      372.602783203,
+                                      318.224456787,
+                                      263.807556152,
+                                      214.746795654,
+                                      172.817260742,
+                                      148.295852661,
+                                      133.160797119,
+                                      119.368148804,
+                                      106.872108459,
+                                      96.7725219727,
+                                      87.7978591919,
+                                      79.6171722412,
+                                      70.2083129883,
+                                      54.5584373474,
+                                      337.930267334,
+                                      327.784393311,
+                                      300.644775391,
+                                      263.807556152,
+                                      224.206954956,
+                                      186.378311157,
+                                      155.421569824,
+                                      140.812332153,
+                                      127.120582581,
+                                      114.460098267,
+                                      103.118339539,
+                                      93.8680496216,
+                                      85.3613052368,
+                                      77.5634307861,
+                                      65.9593734741,
+                                      51.4587516785,
+                                      263.807556152,
+                                      257.417816162,
+                                      239.80960083,
+                                      214.746795654,
+                                      186.378311157,
+                                      157.966400146,
+                                      144.988540649,
+                                      132.263153076,
+                                      120.102050781,
+                                      108.680435181,
+                                      98.952003479,
+                                      90.3628005981,
+                                      82.4009933472,
+                                      75.0543060303,
+                                      60.9631996155,
+                                      47.7897415161,
+                                      205.943115234,
+                                      201.765563965,
+                                      190.039825439,
+                                      172.817260742,
+                                      155.421569824,
+                                      144.988540649,
+                                      134.070770264,
+                                      123.126365662,
+                                      112.471244812,
+                                      102.638969421,
+                                      94.2730102539,
+                                      86.3905029297,
+                                      79.0208206177,
+                                      70.2083129883,
+                                      55.4867515564,
+                                      43.7368011475,
+                                      160.770889282,
+                                      157.966430664,
+                                      154.182662964,
+                                      148.295852661,
+                                      140.812332153,
+                                      132.263153076,
+                                      123.126365662,
+                                      113.789886475,
+                                      104.582710266,
+                                      96.7725219727,
+                                      89.2471008301,
+                                      82.0832748413,
+                                      75.3261566162,
+                                      62.5737113953,
+                                      49.7861824036,
+                                      39.4813766479,
+                                      141.832733154,
+                                      140.812332153,
+                                      137.840042114,
+                                      133.160797119,
+                                      127.120582581,
+                                      120.102050781,
+                                      112.471244812,
+                                      104.582710266,
+                                      97.6333694458,
+                                      90.7403564453,
+                                      84.0226669312,
+                                      77.5634460449,
+                                      68.3460235596,
+                                      55.020149231,
+                                      44.0871162415,
+                                      35.1875991821,
+                                      126.301643372,
+                                      125.492965698,
+                                      123.126365662,
+                                      119.368148804,
+                                      114.460098267,
+                                      108.680435181,
+                                      102.638969421,
+                                      96.7725219727,
+                                      90.7403564453,
+                                      84.6872787476,
+                                      78.7255554199,
+                                      72.1355895996,
+                                      58.8962364197,
+                                      47.7897415161,
+                                      38.5730819702,
+                                      30.993062973,
+                                      112.471244812,
+                                      111.822540283,
+                                      109.91746521,
+                                      106.872108459,
+                                      103.118339539,
+                                      98.952003479,
+                                      94.2730102539,
+                                      89.2471008301,
+                                      84.0226669312,
+                                      78.7255554199,
+                                      73.4578094482,
+                                      60.9631729126,
+                                      50.1978492737,
+                                      41.0546913147,
+                                      33.3810348511,
+                                      24.7806758881,
+                                      100.763389587,
+                                      100.304679871,
+                                      98.952003479,
+                                      96.7725219727,
+                                      93.8680496216,
+                                      90.3628005981,
+                                      86.3905029297,
+                                      82.0832748413,
+                                      77.5634460449,
+                                      72.1355895996,
+                                      60.9631729126,
+                                      51.0341072083,
+                                      42.3694725037,
+                                      34.9223136902,
+                                      27.7260704041,
+                                      18.5722160339,
+                                      91.1208114624,
+                                      90.7403564453,
+                                      89.6162185669,
+                                      87.7978591919,
+                                      85.3613052368,
+                                      82.4009933472,
+                                      79.0208206177,
+                                      75.3261566162,
+                                      68.3460235596,
+                                      58.8962364197,
+                                      50.1978492737,
+                                      42.3694725037,
+                                      35.4553947449,
+                                      29.343132019,
+                                      20.1489048004,
+                                      13.676407814,
+                                      82.4009933472,
+                                      82.0832748413,
+                                      81.1429672241,
+                                      79.6171722412,
+                                      77.5634307861,
+                                      75.0543060303,
+                                      70.2083129883,
+                                      62.5737113953,
+                                      55.020149231,
+                                      47.7897415161,
+                                      41.0546913147,
+                                      34.9223136902,
+                                      29.343132019,
+                                      20.7069969177,
+                                      14.4138498306,
+                                      9.9115486145,
+                                      74.5156097412,
+                                      74.2487335205,
+                                      73.4578170776,
+                                      70.2083129883,
+                                      65.9593734741,
+                                      60.9631996155,
+                                      55.4867515564,
+                                      49.7861824036,
+                                      44.0871162415,
+                                      38.5730819702,
+                                      33.3810348511,
+                                      27.7260704041,
+                                      20.1489048004,
+                                      14.4138498306,
+                                      10.166267395,
+                                      7.07980155945,
+                                      58.8962364197,
+                                      58.3933258057,
+                                      56.9167442322,
+                                      54.5584373474,
+                                      51.4587516785,
+                                      47.7897415161,
+                                      43.7368011475,
+                                      39.4813766479,
+                                      35.1875991821,
+                                      30.993062973,
+                                      24.7806758881,
+                                      18.5722160339,
+                                      13.676407814,
+                                      9.9115486145,
+                                      7.07980155945,
+                                      4.99121952057}};
+const float inv_matrix_32[3][1024] = {{0,
+                                       0,
+                                       0,
+                                       0,
+                                       5011.67871094,
+                                       4561.02685547,
+                                       4150.89794922,
+                                       3787.85327148,
+                                       3459.89013672,
+                                       3160.32299805,
+                                       2886.69311523,
+                                       2636.75488281,
+                                       2408.45727539,
+                                       2220.78833008,
+                                       2069.29418945,
+                                       1928.13452148,
+                                       1796.60424805,
+                                       1674.04626465,
+                                       1559.84912109,
+                                       1455.32824707,
+                                       1364.40710449,
+                                       1279.16601562,
+                                       1199.25048828,
+                                       1124.32775879,
+                                       1054.08581543,
+                                       988.231933594,
+                                       932.328857422,
+                                       879.889831543,
+                                       830.400390625,
+                                       783.694335938,
+                                       739.61541748,
+                                       698.015563965,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       4953.88232422,
+                                       4518.67041016,
+                                       4118.65429688,
+                                       3763.55249023,
+                                       3440.43725586,
+                                       3144.51098633,
+                                       2873.68359375,
+                                       2625.9453125,
+                                       2399.40185547,
+                                       2214.77026367,
+                                       2064.08569336,
+                                       1923.60375977,
+                                       1792.64550781,
+                                       1670.57409668,
+                                       1556.79296875,
+                                       1452.8614502,
+                                       1362.2097168,
+                                       1277.20385742,
+                                       1197.49438477,
+                                       1122.75280762,
+                                       1052.67053223,
+                                       986.958068848,
+                                       931.291748047,
+                                       878.947387695,
+                                       829.542602539,
+                                       782.912841797,
+                                       738.902404785,
+                                       697.364379883,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       4793.61474609,
+                                       4398.46826172,
+                                       4026.78955078,
+                                       3692.97387695,
+                                       3383.59692383,
+                                       3098.10839844,
+                                       2835.38208008,
+                                       2594.04101562,
+                                       2372.62280273,
+                                       2196.91870117,
+                                       2048.6171875,
+                                       1910.1348877,
+                                       1780.86755371,
+                                       1660.23608398,
+                                       1547.6887207,
+                                       1445.50598145,
+                                       1355.65515137,
+                                       1271.34863281,
+                                       1192.25231934,
+                                       1118.05004883,
+                                       1048.44384766,
+                                       983.335632324,
+                                       928.192504883,
+                                       876.130004883,
+                                       826.978088379,
+                                       780.575439453,
+                                       736.76965332,
+                                       695.41619873,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       4561.02685547,
+                                       4217.54345703,
+                                       3889.28466797,
+                                       3582.40161133,
+                                       3293.56469727,
+                                       3024.01489258,
+                                       2773.8503418,
+                                       2542.54394531,
+                                       2329.23535156,
+                                       2167.82006836,
+                                       2023.34472656,
+                                       1888.08752441,
+                                       1761.55737305,
+                                       1643.26379395,
+                                       1532.72387695,
+                                       1433.39599609,
+                                       1344.85412598,
+                                       1261.69274902,
+                                       1183.6015625,
+                                       1110.28479004,
+                                       1041.46118164,
+                                       977.688903809,
+                                       923.064758301,
+                                       871.467163086,
+                                       822.732299805,
+                                       776.704589844,
+                                       733.236633301,
+                                       692.188110352,
+                                       5011.67871094,
+                                       4953.88232422,
+                                       4793.61474609,
+                                       4561.02685547,
+                                       4287.29882812,
+                                       3998.23925781,
+                                       3716.125,
+                                       3440.43725586,
+                                       3176.31298828,
+                                       2926.47753906,
+                                       2692.17285156,
+                                       2473.73583984,
+                                       2276.53393555,
+                                       2128.3894043,
+                                       1988.98718262,
+                                       1858.03149414,
+                                       1735.17175293,
+                                       1620.02612305,
+                                       1512.1998291,
+                                       1416.7467041,
+                                       1329.98596191,
+                                       1248.38586426,
+                                       1171.66845703,
+                                       1099.56396484,
+                                       1031.81274414,
+                                       969.874816895,
+                                       915.964599609,
+                                       865.006896973,
+                                       816.846923828,
+                                       771.336730957,
+                                       728.335144043,
+                                       687.707824707,
+                                       4561.02685547,
+                                       4518.67041016,
+                                       4398.46826172,
+                                       4217.54345703,
+                                       3998.23925781,
+                                       3763.55249023,
+                                       3519.85961914,
+                                       3276.21459961,
+                                       3038.5234375,
+                                       2810.43310547,
+                                       2594.04101562,
+                                       2390.41162109,
+                                       2220.78833008,
+                                       2079.79077148,
+                                       1946.46655273,
+                                       1820.70666504,
+                                       1702.30786133,
+                                       1591.01000977,
+                                       1486.51586914,
+                                       1395.84521484,
+                                       1311.29003906,
+                                       1231.62927246,
+                                       1156.62243652,
+                                       1086.03051758,
+                                       1019.62091064,
+                                       959.981201172,
+                                       906.967834473,
+                                       856.815124512,
+                                       809.379150391,
+                                       764.521179199,
+                                       722.108276367,
+                                       682.245117188,
+                                       4150.89794922,
+                                       4118.65429688,
+                                       4026.78955078,
+                                       3889.28466797,
+                                       3716.125,
+                                       3519.85961914,
+                                       3311.12646484,
+                                       3098.10839844,
+                                       2886.69311523,
+                                       2680.90209961,
+                                       2483.34204102,
+                                       2295.77954102,
+                                       2156.40185547,
+                                       2023.34472656,
+                                       1896.84716797,
+                                       1776.97375488,
+                                       1663.66943359,
+                                       1556.79296875,
+                                       1457.80285645,
+                                       1371.0369873,
+                                       1289.05615234,
+                                       1211.6673584,
+                                       1138.67041016,
+                                       1069.86108398,
+                                       1005.03588867,
+                                       948.117004395,
+                                       896.168579102,
+                                       846.973266602,
+                                       800.399719238,
+                                       756.319885254,
+                                       714.610473633,
+                                       675.848571777,
+                                       3787.85327148,
+                                       3763.55249023,
+                                       3692.97387695,
+                                       3582.40161133,
+                                       3440.43725586,
+                                       3276.21459961,
+                                       3098.10839844,
+                                       2913.08789062,
+                                       2726.56884766,
+                                       2542.54394531,
+                                       2363.82275391,
+                                       2214.77026367,
+                                       2085.0793457,
+                                       1960.44067383,
+                                       1841.2644043,
+                                       1727.76757812,
+                                       1620.02624512,
+                                       1518.01391602,
+                                       1426.21765137,
+                                       1342.71228027,
+                                       1263.61376953,
+                                       1188.7791748,
+                                       1118.05004883,
+                                       1051.25854492,
+                                       988.232055664,
+                                       934.408508301,
+                                       883.676330566,
+                                       835.576843262,
+                                       789.991882324,
+                                       746.805908203,
+                                       705.905090332,
+                                       668.410766602,
+                                       3459.89013672,
+                                       3440.43725586,
+                                       3383.59692383,
+                                       3293.56469727,
+                                       3176.31298828,
+                                       3038.5234375,
+                                       2886.69311523,
+                                       2726.56884766,
+                                       2562.8984375,
+                                       2399.40185547,
+                                       2251.3984375,
+                                       2128.38916016,
+                                       2008.47729492,
+                                       1892.4576416,
+                                       1780.86755371,
+                                       1674.04638672,
+                                       1572.18115234,
+                                       1475.3458252,
+                                       1391.27539062,
+                                       1311.29003906,
+                                       1235.31958008,
+                                       1163.2689209,
+                                       1095.02172852,
+                                       1030.44628906,
+                                       970.984558105,
+                                       918.996520996,
+                                       869.613464355,
+                                       822.732299805,
+                                       778.249511719,
+                                       736.060913086,
+                                       696.064697266,
+                                       659.988525391,
+                                       3160.32299805,
+                                       3144.51098633,
+                                       3098.10839844,
+                                       3024.01489258,
+                                       2926.47753906,
+                                       2810.43310547,
+                                       2680.90209961,
+                                       2542.54394531,
+                                       2399.40185547,
+                                       2263.89282227,
+                                       2150.73901367,
+                                       2038.4329834,
+                                       1928.13452148,
+                                       1820.70666504,
+                                       1716.77062988,
+                                       1616.75537109,
+                                       1520.93566895,
+                                       1433.39599609,
+                                       1353.48266602,
+                                       1277.20385742,
+                                       1204.54553223,
+                                       1135.45678711,
+                                       1069.86108398,
+                                       1007.66223145,
+                                       952.402770996,
+                                       902.032531738,
+                                       854.112670898,
+                                       808.556152344,
+                                       765.27355957,
+                                       724.174499512,
+                                       685.184509277,
+                                       650.644348145,
+                                       2886.69311523,
+                                       2873.68359375,
+                                       2835.38208008,
+                                       2773.8503418,
+                                       2692.17285156,
+                                       2594.04101562,
+                                       2483.34204102,
+                                       2363.82275391,
+                                       2251.3984375,
+                                       2150.73901367,
+                                       2048.6171875,
+                                       1946.46655273,
+                                       1845.42907715,
+                                       1746.38916016,
+                                       1650.01464844,
+                                       1556.79296875,
+                                       1467.77758789,
+                                       1389.00024414,
+                                       1313.34448242,
+                                       1240.89025879,
+                                       1171.66845703,
+                                       1105.66882324,
+                                       1042.85168457,
+                                       983.335632324,
+                                       932.328857422,
+                                       883.676208496,
+                                       837.313964844,
+                                       793.171142578,
+                                       751.173278809,
+                                       711.242553711,
+                                       674.120605469,
+                                       640.444885254,
+                                       2636.75488281,
+                                       2625.9453125,
+                                       2594.04101562,
+                                       2542.54394531,
+                                       2473.73583984,
+                                       2390.41162109,
+                                       2295.77954102,
+                                       2214.77026367,
+                                       2128.38916016,
+                                       2038.4329834,
+                                       1946.46655273,
+                                       1853.81237793,
+                                       1761.55737305,
+                                       1670.57421875,
+                                       1581.54528809,
+                                       1494.99133301,
+                                       1416.74682617,
+                                       1342.71228027,
+                                       1271.34875488,
+                                       1202.77600098,
+                                       1137.06164551,
+                                       1074.23132324,
+                                       1014.27752686,
+                                       959.981201172,
+                                       910.948364258,
+                                       864.090393066,
+                                       819.361022949,
+                                       776.704589844,
+                                       736.060913086,
+                                       697.364379883,
+                                       662.218017578,
+                                       629.4609375,
+                                       2408.45727539,
+                                       2399.40185547,
+                                       2372.62280273,
+                                       2329.23535156,
+                                       2276.53393555,
+                                       2220.78833008,
+                                       2156.40185547,
+                                       2085.0793457,
+                                       2008.47729492,
+                                       1928.13452148,
+                                       1845.42907715,
+                                       1761.55737305,
+                                       1677.53186035,
+                                       1594.18786621,
+                                       1512.1998291,
+                                       1435.8034668,
+                                       1364.40710449,
+                                       1295.05419922,
+                                       1227.95825195,
+                                       1163.2689209,
+                                       1101.08483887,
+                                       1041.46118164,
+                                       984.471923828,
+                                       935.451171875,
+                                       888.446838379,
+                                       843.440002441,
+                                       800.399719238,
+                                       759.286071777,
+                                       720.051269531,
+                                       682.831237793,
+                                       649.558654785,
+                                       617.765075684,
+                                       2220.78833008,
+                                       2214.77026367,
+                                       2196.91870117,
+                                       2167.82006836,
+                                       2128.3894043,
+                                       2079.79077148,
+                                       2023.34472656,
+                                       1960.44067383,
+                                       1892.4576416,
+                                       1820.70666504,
+                                       1746.38916016,
+                                       1670.57421875,
+                                       1594.18786621,
+                                       1518.01391602,
+                                       1445.50622559,
+                                       1377.7244873,
+                                       1311.29003906,
+                                       1246.50488281,
+                                       1183.6015625,
+                                       1122.75280762,
+                                       1064.07983398,
+                                       1007.66223145,
+                                       956.720947266,
+                                       909.950622559,
+                                       865.006896973,
+                                       821.887329102,
+                                       780.575439453,
+                                       741.04473877,
+                                       703.25994873,
+                                       668.410766602,
+                                       636.225524902,
+                                       605.431762695,
+                                       2069.29418945,
+                                       2064.08569336,
+                                       2048.6171875,
+                                       2023.34472656,
+                                       1988.98718262,
+                                       1946.46655273,
+                                       1896.84716797,
+                                       1841.2644043,
+                                       1780.86755371,
+                                       1716.77062988,
+                                       1650.01464844,
+                                       1581.54528809,
+                                       1512.1998291,
+                                       1445.50622559,
+                                       1382.21533203,
+                                       1319.54187012,
+                                       1257.8659668,
+                                       1197.49438477,
+                                       1138.67041016,
+                                       1081.58068848,
+                                       1026.36474609,
+                                       974.326965332,
+                                       928.192504883,
+                                       883.676208496,
+                                       840.805603027,
+                                       799.591430664,
+                                       760.030456543,
+                                       722.108276367,
+                                       685.801452637,
+                                       653.370849609,
+                                       622.301086426,
+                                       592.535888672,
+                                       1928.13452148,
+                                       1923.60375977,
+                                       1910.1348877,
+                                       1888.08752441,
+                                       1858.03149414,
+                                       1820.70666504,
+                                       1776.97375488,
+                                       1727.76757812,
+                                       1674.04638672,
+                                       1616.75537109,
+                                       1556.79296875,
+                                       1494.99133301,
+                                       1435.8034668,
+                                       1377.7244873,
+                                       1319.54187012,
+                                       1261.69274902,
+                                       1204.54553223,
+                                       1148.40356445,
+                                       1093.51477051,
+                                       1040.0736084,
+                                       988.232055664,
+                                       942.805053711,
+                                       899.092651367,
+                                       856.815124512,
+                                       816.011779785,
+                                       776.704589844,
+                                       738.902404785,
+                                       702.601013184,
+                                       668.97833252,
+                                       637.802612305,
+                                       607.867736816,
+                                       579.151306152,
+                                       1796.60424805,
+                                       1792.64550781,
+                                       1780.86755371,
+                                       1761.55737305,
+                                       1735.17175293,
+                                       1702.30786133,
+                                       1663.66943359,
+                                       1620.02624512,
+                                       1572.18115234,
+                                       1520.93566895,
+                                       1467.77758789,
+                                       1416.74682617,
+                                       1364.40710449,
+                                       1311.29003906,
+                                       1257.8659668,
+                                       1204.54553223,
+                                       1151.67944336,
+                                       1099.56396484,
+                                       1048.44384766,
+                                       998.518310547,
+                                       953.479187012,
+                                       910.948364258,
+                                       869.613342285,
+                                       829.542602539,
+                                       790.784973145,
+                                       753.372253418,
+                                       717.322814941,
+                                       682.831237793,
+                                       651.732788086,
+                                       621.794555664,
+                                       593.00592041,
+                                       565.35144043,
+                                       1674.04626465,
+                                       1670.57409668,
+                                       1660.23608398,
+                                       1643.26379395,
+                                       1620.02612305,
+                                       1591.01000977,
+                                       1556.79296875,
+                                       1518.01391602,
+                                       1475.3458252,
+                                       1433.39599609,
+                                       1389.00024414,
+                                       1342.71228027,
+                                       1295.05419922,
+                                       1246.50488281,
+                                       1197.49438477,
+                                       1148.40356445,
+                                       1099.56396484,
+                                       1051.25854492,
+                                       1003.72686768,
+                                       959.981201172,
+                                       918.996520996,
+                                       878.947387695,
+                                       839.930419922,
+                                       802.020446777,
+                                       765.27355957,
+                                       729.730224609,
+                                       695.41619873,
+                                       663.897949219,
+                                       634.132019043,
+                                       605.431762695,
+                                       577.793395996,
+                                       551.20690918,
+                                       1559.84912109,
+                                       1556.79296875,
+                                       1547.6887207,
+                                       1532.72387695,
+                                       1512.1998291,
+                                       1486.51586914,
+                                       1457.80285645,
+                                       1426.21765137,
+                                       1391.27539062,
+                                       1353.48266602,
+                                       1313.34448242,
+                                       1271.34875488,
+                                       1227.95825195,
+                                       1183.6015625,
+                                       1138.67041016,
+                                       1093.51477051,
+                                       1048.44384766,
+                                       1003.72686768,
+                                       962.165222168,
+                                       923.064758301,
+                                       884.62689209,
+                                       846.973266602,
+                                       810.203491211,
+                                       774.396850586,
+                                       739.61541748,
+                                       705.905090332,
+                                       674.120605469,
+                                       644.708618164,
+                                       616.264648438,
+                                       588.796264648,
+                                       562.305175781,
+                                       536.786437988,
+                                       1455.32824707,
+                                       1452.8614502,
+                                       1445.50598145,
+                                       1433.39599609,
+                                       1416.7467041,
+                                       1395.84521484,
+                                       1371.0369873,
+                                       1342.71228027,
+                                       1311.29003906,
+                                       1277.20385742,
+                                       1240.89025879,
+                                       1202.77600098,
+                                       1163.2689209,
+                                       1122.75280762,
+                                       1081.58068848,
+                                       1040.0736084,
+                                       998.518310547,
+                                       959.981201172,
+                                       923.064758301,
+                                       886.533447266,
+                                       850.530761719,
+                                       815.17779541,
+                                       780.575439453,
+                                       746.805908203,
+                                       713.934814453,
+                                       682.245117188,
+                                       653.370849609,
+                                       625.353942871,
+                                       598.214355469,
+                                       571.965026855,
+                                       546.611877441,
+                                       522.155395508,
+                                       1364.40710449,
+                                       1362.2097168,
+                                       1355.65515137,
+                                       1344.85412598,
+                                       1329.98596191,
+                                       1311.29003906,
+                                       1289.05615234,
+                                       1263.61376953,
+                                       1235.31958008,
+                                       1204.54553223,
+                                       1171.66845703,
+                                       1137.06164551,
+                                       1101.08483887,
+                                       1064.07983398,
+                                       1026.36474609,
+                                       988.232055664,
+                                       953.479187012,
+                                       918.996520996,
+                                       884.62689209,
+                                       850.530761719,
+                                       816.846923828,
+                                       783.694335938,
+                                       751.173278809,
+                                       719.367553711,
+                                       688.345153809,
+                                       659.988647461,
+                                       632.568969727,
+                                       605.91784668,
+                                       580.059387207,
+                                       555.010437012,
+                                       530.781066895,
+                                       507.375701904,
+                                       1279.16601562,
+                                       1277.20385742,
+                                       1271.34863281,
+                                       1261.69274902,
+                                       1248.38586426,
+                                       1231.62927246,
+                                       1211.6673584,
+                                       1188.7791748,
+                                       1163.2689209,
+                                       1135.45678711,
+                                       1105.66882324,
+                                       1074.23132324,
+                                       1041.46118164,
+                                       1007.66223145,
+                                       974.326965332,
+                                       942.805053711,
+                                       910.948364258,
+                                       878.947387695,
+                                       846.973266602,
+                                       815.17779541,
+                                       783.694335938,
+                                       752.638000488,
+                                       722.108276367,
+                                       692.188110352,
+                                       664.459472656,
+                                       637.802490234,
+                                       611.796875,
+                                       586.477539062,
+                                       561.871887207,
+                                       537.999816895,
+                                       514.875244141,
+                                       492.505737305,
+                                       1199.25048828,
+                                       1197.49438477,
+                                       1192.25231934,
+                                       1183.6015625,
+                                       1171.66845703,
+                                       1156.62243652,
+                                       1138.67041016,
+                                       1118.05004883,
+                                       1095.02172852,
+                                       1069.86108398,
+                                       1042.85168457,
+                                       1014.27752686,
+                                       984.471923828,
+                                       956.720947266,
+                                       928.192504883,
+                                       899.092651367,
+                                       869.613342285,
+                                       839.930419922,
+                                       810.203491211,
+                                       780.575439453,
+                                       751.173278809,
+                                       722.108276367,
+                                       693.4765625,
+                                       666.712768555,
+                                       640.975524902,
+                                       615.765563965,
+                                       591.129272461,
+                                       567.10345459,
+                                       543.718078613,
+                                       520.995727539,
+                                       498.952667236,
+                                       480.805541992,
+                                       1124.32775879,
+                                       1122.75280762,
+                                       1118.05004883,
+                                       1110.28479004,
+                                       1099.56396484,
+                                       1086.03051758,
+                                       1069.86108398,
+                                       1051.25854492,
+                                       1030.44628906,
+                                       1007.66223145,
+                                       983.335632324,
+                                       959.981201172,
+                                       935.451171875,
+                                       909.950622559,
+                                       883.676208496,
+                                       856.815124512,
+                                       829.542602539,
+                                       802.020446777,
+                                       774.396850586,
+                                       746.805908203,
+                                       719.367553711,
+                                       692.188110352,
+                                       666.712768555,
+                                       642.038635254,
+                                       617.765075684,
+                                       593.947570801,
+                                       570.63269043,
+                                       547.859313965,
+                                       525.65826416,
+                                       504.054443359,
+                                       484.735015869,
+                                       470.0362854,
+                                       1054.08581543,
+                                       1052.67053223,
+                                       1048.44384766,
+                                       1041.46118164,
+                                       1031.81274414,
+                                       1019.62091064,
+                                       1005.03588867,
+                                       988.232055664,
+                                       970.984558105,
+                                       952.402770996,
+                                       932.328857422,
+                                       910.948364258,
+                                       888.446838379,
+                                       865.006896973,
+                                       840.805603027,
+                                       816.011779785,
+                                       790.784973145,
+                                       765.27355957,
+                                       739.61541748,
+                                       713.934814453,
+                                       688.345153809,
+                                       664.459472656,
+                                       640.975524902,
+                                       617.765075684,
+                                       594.891723633,
+                                       572.41003418,
+                                       550.3671875,
+                                       528.801818848,
+                                       507.746917725,
+                                       487.717651367,
+                                       473.343078613,
+                                       459.212097168,
+                                       988.231933594,
+                                       986.958068848,
+                                       983.335632324,
+                                       977.688903809,
+                                       969.874816895,
+                                       959.981201172,
+                                       948.117004395,
+                                       934.408508301,
+                                       918.996520996,
+                                       902.032531738,
+                                       883.676208496,
+                                       864.090393066,
+                                       843.440002441,
+                                       821.887329102,
+                                       799.591430664,
+                                       776.704589844,
+                                       753.372253418,
+                                       729.730224609,
+                                       705.905090332,
+                                       682.245117188,
+                                       659.988647461,
+                                       637.802490234,
+                                       615.765563965,
+                                       593.947570801,
+                                       572.41003418,
+                                       551.20690918,
+                                       530.384277344,
+                                       509.981781006,
+                                       490.03225708,
+                                       475.728912354,
+                                       461.936004639,
+                                       448.361358643,
+                                       932.328857422,
+                                       931.291748047,
+                                       928.192504883,
+                                       923.064758301,
+                                       915.964599609,
+                                       906.967834473,
+                                       896.168579102,
+                                       883.676330566,
+                                       869.613464355,
+                                       854.112670898,
+                                       837.313964844,
+                                       819.361022949,
+                                       800.399719238,
+                                       780.575439453,
+                                       760.030456543,
+                                       738.902404785,
+                                       717.322814941,
+                                       695.41619873,
+                                       674.120605469,
+                                       653.370849609,
+                                       632.568969727,
+                                       611.796875,
+                                       591.129272461,
+                                       570.63269043,
+                                       550.3671875,
+                                       530.384277344,
+                                       510.730102539,
+                                       491.443481445,
+                                       477.170257568,
+                                       463.766784668,
+                                       450.541046143,
+                                       437.510101318,
+                                       879.889831543,
+                                       878.947387695,
+                                       876.130004883,
+                                       871.467163086,
+                                       865.006896973,
+                                       856.815124512,
+                                       846.973266602,
+                                       835.576843262,
+                                       822.732299805,
+                                       808.556152344,
+                                       793.171142578,
+                                       776.704589844,
+                                       759.286071777,
+                                       741.04473877,
+                                       722.108276367,
+                                       702.601013184,
+                                       682.831237793,
+                                       663.897949219,
+                                       644.708618164,
+                                       625.353942871,
+                                       605.91784668,
+                                       586.477539062,
+                                       567.10345459,
+                                       547.859313965,
+                                       528.801818848,
+                                       509.981781006,
+                                       491.443481445,
+                                       477.65222168,
+                                       464.686828613,
+                                       451.85736084,
+                                       439.183532715,
+                                       426.682556152,
+                                       830.400390625,
+                                       829.542602539,
+                                       826.978088379,
+                                       822.732299805,
+                                       816.846923828,
+                                       809.379150391,
+                                       800.399719238,
+                                       789.991882324,
+                                       778.249511719,
+                                       765.27355957,
+                                       751.173278809,
+                                       736.060913086,
+                                       720.051269531,
+                                       703.25994873,
+                                       685.801452637,
+                                       668.97833252,
+                                       651.732788086,
+                                       634.132019043,
+                                       616.264648438,
+                                       598.214355469,
+                                       580.059387207,
+                                       561.871887207,
+                                       543.718078613,
+                                       525.65826416,
+                                       507.746917725,
+                                       490.03225708,
+                                       477.170257568,
+                                       464.686828613,
+                                       452.297576904,
+                                       440.024200439,
+                                       427.886230469,
+                                       415.901092529,
+                                       783.694335938,
+                                       782.912841797,
+                                       780.575439453,
+                                       776.704589844,
+                                       771.336730957,
+                                       764.521179199,
+                                       756.319885254,
+                                       746.805908203,
+                                       736.060913086,
+                                       724.174499512,
+                                       711.242553711,
+                                       697.364379883,
+                                       682.831237793,
+                                       668.410766602,
+                                       653.370849609,
+                                       637.802612305,
+                                       621.794555664,
+                                       605.431762695,
+                                       588.796264648,
+                                       571.965026855,
+                                       555.010437012,
+                                       537.999816895,
+                                       520.995727539,
+                                       504.054443359,
+                                       487.717651367,
+                                       475.728912354,
+                                       463.766784668,
+                                       451.85736084,
+                                       440.024200439,
+                                       428.288726807,
+                                       416.670166016,
+                                       405.185882568,
+                                       739.61541748,
+                                       738.902404785,
+                                       736.76965332,
+                                       733.236633301,
+                                       728.335144043,
+                                       722.108276367,
+                                       714.610473633,
+                                       705.905090332,
+                                       696.064697266,
+                                       685.184509277,
+                                       674.120605469,
+                                       662.218017578,
+                                       649.558654785,
+                                       636.225524902,
+                                       622.301086426,
+                                       607.867736816,
+                                       593.00592041,
+                                       577.793395996,
+                                       562.305175781,
+                                       546.611877441,
+                                       530.781066895,
+                                       514.875244141,
+                                       498.952667236,
+                                       484.735015869,
+                                       473.343078613,
+                                       461.936004639,
+                                       450.541046143,
+                                       439.183532715,
+                                       427.886230469,
+                                       416.670166016,
+                                       405.554260254,
+                                       394.555480957,
+                                       698.015563965,
+                                       697.364379883,
+                                       695.41619873,
+                                       692.188110352,
+                                       687.707824707,
+                                       682.245117188,
+                                       675.848571777,
+                                       668.410766602,
+                                       659.988525391,
+                                       650.644348145,
+                                       640.444885254,
+                                       629.4609375,
+                                       617.765075684,
+                                       605.431762695,
+                                       592.535888672,
+                                       579.151306152,
+                                       565.35144043,
+                                       551.20690918,
+                                       536.786437988,
+                                       522.155395508,
+                                       507.375701904,
+                                       492.505737305,
+                                       480.805541992,
+                                       470.0362854,
+                                       459.212097168,
+                                       448.361358643,
+                                       437.510101318,
+                                       426.682556152,
+                                       415.901092529,
+                                       405.185882568,
+                                       394.555480957,
+                                       384.026672363},
+                                      {0,
+                                       0,
+                                       0,
+                                       0,
+                                       10016.1787109,
+                                       8949.01855469,
+                                       7995.55859375,
+                                       7162.60107422,
+                                       6422.47558594,
+                                       5758.82910156,
+                                       5163.75830078,
+                                       4630.17675781,
+                                       4151.73242188,
+                                       3734.18823242,
+                                       3370.10986328,
+                                       3041.52880859,
+                                       2744.98388672,
+                                       2477.35107422,
+                                       2235.81323242,
+                                       2038.74963379,
+                                       1932.10974121,
+                                       1831.04748535,
+                                       1735.27160645,
+                                       1644.50561523,
+                                       1558.48730469,
+                                       1476.96801758,
+                                       1386.82666016,
+                                       1301.52868652,
+                                       1221.47717285,
+                                       1146.34912109,
+                                       1075.84216309,
+                                       1009.67150879,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       9878.22460938,
+                                       8849.74414062,
+                                       7921.35595703,
+                                       7107.29541016,
+                                       6379.01171875,
+                                       5724.14550781,
+                                       5135.74365234,
+                                       4607.32568359,
+                                       4132.93945312,
+                                       3719.50512695,
+                                       3357.80053711,
+                                       3031.15722656,
+                                       2736.20654297,
+                                       2469.89428711,
+                                       2229.45581055,
+                                       2035.87133789,
+                                       1929.51806641,
+                                       1828.70812988,
+                                       1733.15539551,
+                                       1642.58703613,
+                                       1556.74450684,
+                                       1475.38232422,
+                                       1385.13513184,
+                                       1300,
+                                       1220.09375,
+                                       1145.09570312,
+                                       1074.70495605,
+                                       1008.63867188,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       9497.34082031,
+                                       8569.00976562,
+                                       7710.1953125,
+                                       6947.08251953,
+                                       6252.30078125,
+                                       5622.56835938,
+                                       5053.41699219,
+                                       4539.99316406,
+                                       4077.45068359,
+                                       3676.05541992,
+                                       3321.32641602,
+                                       3000.390625,
+                                       2710.14355469,
+                                       2447.73339844,
+                                       2210.55053711,
+                                       2027.28417969,
+                                       1921.78308105,
+                                       1821.72375488,
+                                       1726.8347168,
+                                       1636.85534668,
+                                       1551.53735352,
+                                       1470.2409668,
+                                       1380.08117676,
+                                       1295.43139648,
+                                       1215.95825195,
+                                       1141.34753418,
+                                       1071.30383301,
+                                       1005.54919434,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       8949.01855469,
+                                       8149.28955078,
+                                       7394.22412109,
+                                       6697.34423828,
+                                       6052.48828125,
+                                       5461.01953125,
+                                       4921.63427734,
+                                       4431.66796875,
+                                       3987.81860352,
+                                       3605.57324219,
+                                       3262.00317383,
+                                       2950.23999023,
+                                       2667.58154297,
+                                       2411.48632812,
+                                       2179.58496094,
+                                       2013.13024902,
+                                       1909.02331543,
+                                       1810.1940918,
+                                       1716.39379883,
+                                       1627.38232422,
+                                       1542.92712402,
+                                       1460.9855957,
+                                       1371.72302246,
+                                       1287.87316895,
+                                       1209.11425781,
+                                       1135.14245605,
+                                       1065.67199707,
+                                       1000.43200684,
+                                       10016.1787109,
+                                       9878.22460938,
+                                       9497.34082031,
+                                       8949.01855469,
+                                       8310.703125,
+                                       7644.40527344,
+                                       6999.56738281,
+                                       6379.01171875,
+                                       5793.93896484,
+                                       5249.58837891,
+                                       4747.62841797,
+                                       4287.62841797,
+                                       3871.05200195,
+                                       3510.74609375,
+                                       3181.88964844,
+                                       2882.29760742,
+                                       2609.7644043,
+                                       2362.1328125,
+                                       2137.33789062,
+                                       1993.63818359,
+                                       1891.43066406,
+                                       1794.28063965,
+                                       1701.97070312,
+                                       1614.28540039,
+                                       1531.01391602,
+                                       1448.18615723,
+                                       1360.15734863,
+                                       1277.40795898,
+                                       1199.63305664,
+                                       1126.54284668,
+                                       1057.86291504,
+                                       993.333496094,
+                                       8949.01855469,
+                                       8849.74414062,
+                                       8569.00976562,
+                                       8149.28955078,
+                                       7644.40527344,
+                                       7107.29541016,
+                                       6556.77978516,
+                                       6014.109375,
+                                       5492.59033203,
+                                       4999.91259766,
+                                       4539.99316406,
+                                       4114.296875,
+                                       3734.18823242,
+                                       3394.95922852,
+                                       3083.60522461,
+                                       2798.61328125,
+                                       2538.30639648,
+                                       2300.9543457,
+                                       2084.83349609,
+                                       1969.11364746,
+                                       1869.26220703,
+                                       1774.20117188,
+                                       1683.74963379,
+                                       1597.7220459,
+                                       1515.93359375,
+                                       1431.99487305,
+                                       1345.51452637,
+                                       1264.14855957,
+                                       1187.61206055,
+                                       1115.63220215,
+                                       1047.94946289,
+                                       984.940734863,
+                                       7995.55859375,
+                                       7921.35595703,
+                                       7710.1953125,
+                                       7394.22412109,
+                                       6999.56738281,
+                                       6556.77978516,
+                                       6091.37744141,
+                                       5622.56835938,
+                                       5163.75830078,
+                                       4723.70117188,
+                                       4307.68896484,
+                                       3918.65771484,
+                                       3578.03271484,
+                                       3262.00317383,
+                                       2970.13378906,
+                                       2701.54418945,
+                                       2455.08642578,
+                                       2229.45581055,
+                                       2041.63623047,
+                                       1939.92504883,
+                                       1842.82971191,
+                                       1750.22119141,
+                                       1661.9576416,
+                                       1577.88708496,
+                                       1497.85339355,
+                                       1412.59997559,
+                                       1327.95666504,
+                                       1248.23461914,
+                                       1173.171875,
+                                       1102.515625,
+                                       1036.02380371,
+                                       975.335021973,
+                                       7162.60107422,
+                                       7107.29541016,
+                                       6947.08251953,
+                                       6697.34423828,
+                                       6379.01171875,
+                                       6014.109375,
+                                       5622.56835938,
+                                       5220.67480469,
+                                       4820.77636719,
+                                       4431.66796875,
+                                       4059.2434082,
+                                       3719.50512695,
+                                       3407.50024414,
+                                       3115.79980469,
+                                       2844.60986328,
+                                       2593.61132812,
+                                       2362.13305664,
+                                       2149.27954102,
+                                       2004.73095703,
+                                       1906.4909668,
+                                       1812.48913574,
+                                       1722.64440918,
+                                       1636.85534668,
+                                       1555.00537109,
+                                       1476.96813965,
+                                       1390.21911621,
+                                       1307.67175293,
+                                       1229.82922363,
+                                       1156.4543457,
+                                       1087.31677246,
+                                       1022.19244385,
+                                       964.170288086,
+                                       6422.47558594,
+                                       6379.01171875,
+                                       6252.30078125,
+                                       6052.48828125,
+                                       5793.93896484,
+                                       5492.59033203,
+                                       5163.75830078,
+                                       4820.77636719,
+                                       4474.43066406,
+                                       4132.93945312,
+                                       3809.15112305,
+                                       3510.74560547,
+                                       3227.25976562,
+                                       2960.15966797,
+                                       2710.14355469,
+                                       2477.3515625,
+                                       2261.5234375,
+                                       2062.12646484,
+                                       1963.74353027,
+                                       1869.26220703,
+                                       1778.62731934,
+                                       1691.80358887,
+                                       1608.73010254,
+                                       1529.32519531,
+                                       1450.00341797,
+                                       1365.09509277,
+                                       1284.86962891,
+                                       1209.11425781,
+                                       1137.61865234,
+                                       1070.17382812,
+                                       1006.57757568,
+                                       951.533935547,
+                                       5758.82910156,
+                                       5724.14550781,
+                                       5622.56835938,
+                                       5461.01953125,
+                                       5249.58837891,
+                                       4999.91259766,
+                                       4723.70117188,
+                                       4431.66796875,
+                                       4132.93945312,
+                                       3839.88183594,
+                                       3564.3984375,
+                                       3297.38085938,
+                                       3041.52880859,
+                                       2798.61328125,
+                                       2569.67749023,
+                                       2355.21166992,
+                                       2155.28833008,
+                                       2013.13024902,
+                                       1919.21801758,
+                                       1828.70812988,
+                                       1741.64990234,
+                                       1658.05053711,
+                                       1577.88708496,
+                                       1501.11230469,
+                                       1419.60339355,
+                                       1337.48803711,
+                                       1259.77709961,
+                                       1186.2878418,
+                                       1116.83618164,
+                                       1051.23815918,
+                                       989.356018066,
+                                       937.521850586,
+                                       5163.75830078,
+                                       5135.74365234,
+                                       5053.41699219,
+                                       4921.63427734,
+                                       4747.62841797,
+                                       4539.99316406,
+                                       4307.68896484,
+                                       4059.2434082,
+                                       3809.15112305,
+                                       3564.3984375,
+                                       3321.32641602,
+                                       3083.60522461,
+                                       2853.95654297,
+                                       2634.29614258,
+                                       2425.88378906,
+                                       2229.45581055,
+                                       2053.26318359,
+                                       1961.06884766,
+                                       1871.70092773,
+                                       1785.30419922,
+                                       1701.97070312,
+                                       1621.74597168,
+                                       1544.64245605,
+                                       1470.2409668,
+                                       1386.82666016,
+                                       1307.67150879,
+                                       1232.63317871,
+                                       1161.55883789,
+                                       1094.29150391,
+                                       1030.67077637,
+                                       972.740783691,
+                                       922.236572266,
+                                       4630.17675781,
+                                       4607.32568359,
+                                       4539.99316406,
+                                       4431.66796875,
+                                       4287.62841797,
+                                       4114.296875,
+                                       3918.65771484,
+                                       3719.50512695,
+                                       3510.74560547,
+                                       3297.38085938,
+                                       3083.60522461,
+                                       2872.79980469,
+                                       2667.58154297,
+                                       2469.89453125,
+                                       2281.10717773,
+                                       2102.11450195,
+                                       1993.63842773,
+                                       1906.4909668,
+                                       1821.72387695,
+                                       1739.5189209,
+                                       1660.00183105,
+                                       1583.25292969,
+                                       1509.31469727,
+                                       1431.99487305,
+                                       1351.99133301,
+                                       1275.92382812,
+                                       1203.68237305,
+                                       1135.14245605,
+                                       1070.17382812,
+                                       1008.63867188,
+                                       954.878356934,
+                                       905.786682129,
+                                       4151.73242188,
+                                       4132.93945312,
+                                       4077.45068359,
+                                       3987.81860352,
+                                       3871.05200195,
+                                       3734.18823242,
+                                       3578.03271484,
+                                       3407.50024414,
+                                       3227.25976562,
+                                       3041.52880859,
+                                       2853.95654297,
+                                       2667.58154297,
+                                       2484.84399414,
+                                       2307.63012695,
+                                       2137.33789062,
+                                       2015.94567871,
+                                       1932.10974121,
+                                       1849.96801758,
+                                       1769.79614258,
+                                       1691.80358887,
+                                       1616.14465332,
+                                       1542.92712402,
+                                       1472.10400391,
+                                       1391.92028809,
+                                       1315.41491699,
+                                       1242.52575684,
+                                       1173.171875,
+                                       1107.25793457,
+                                       1044.67651367,
+                                       985.821166992,
+                                       935.89440918,
+                                       888.28326416,
+                                       3734.18823242,
+                                       3719.50512695,
+                                       3676.05541992,
+                                       3605.57324219,
+                                       3510.74609375,
+                                       3394.95922852,
+                                       3262.00317383,
+                                       3115.79980469,
+                                       2960.15966797,
+                                       2798.61328125,
+                                       2634.29614258,
+                                       2469.89453125,
+                                       2307.63012695,
+                                       2149.27954102,
+                                       2027.2845459,
+                                       1947.80200195,
+                                       1869.26220703,
+                                       1792.02880859,
+                                       1716.39379883,
+                                       1642.58703613,
+                                       1570.78308105,
+                                       1501.11230469,
+                                       1426.6628418,
+                                       1350.36767578,
+                                       1277.40795898,
+                                       1207.75256348,
+                                       1141.34753418,
+                                       1078.12231445,
+                                       1017.99298096,
+                                       964.170288086,
+                                       915.916137695,
+                                       869.840393066,
+                                       3370.10986328,
+                                       3357.80053711,
+                                       3321.32641602,
+                                       3262.00317383,
+                                       3181.88964844,
+                                       3083.60522461,
+                                       2970.13378906,
+                                       2844.60986328,
+                                       2710.14355469,
+                                       2569.67749023,
+                                       2425.88378906,
+                                       2281.10717773,
+                                       2137.33789062,
+                                       2027.2845459,
+                                       1953.08789062,
+                                       1879.05322266,
+                                       1805.62060547,
+                                       1733.15539551,
+                                       1661.9576416,
+                                       1592.26843262,
+                                       1524.27880859,
+                                       1455.47753906,
+                                       1380.08117676,
+                                       1307.67150879,
+                                       1238.27075195,
+                                       1171.8729248,
+                                       1108.44836426,
+                                       1047.94946289,
+                                       990.31439209,
+                                       941.609558105,
+                                       895.070007324,
+                                       850.572570801,
+                                       3041.52880859,
+                                       3031.15722656,
+                                       3000.390625,
+                                       2950.23999023,
+                                       2882.29760742,
+                                       2798.61328125,
+                                       2701.54418945,
+                                       2593.61132812,
+                                       2477.3515625,
+                                       2355.21166992,
+                                       2229.45581055,
+                                       2102.11450195,
+                                       2015.94567871,
+                                       1947.80200195,
+                                       1879.05322266,
+                                       1810.1940918,
+                                       1741.64990234,
+                                       1673.77990723,
+                                       1606.8861084,
+                                       1541.21508789,
+                                       1476.96813965,
+                                       1403.92370605,
+                                       1332.70874023,
+                                       1264.14855957,
+                                       1198.28808594,
+                                       1135.14245605,
+                                       1074.70495605,
+                                       1016.94714355,
+                                       965.022094727,
+                                       918.278381348,
+                                       873.481933594,
+                                       830.592407227,
+                                       2744.98388672,
+                                       2736.20654297,
+                                       2710.14355469,
+                                       2667.58154297,
+                                       2609.7644043,
+                                       2538.30639648,
+                                       2455.08642578,
+                                       2362.13305664,
+                                       2261.5234375,
+                                       2155.28833008,
+                                       2053.26318359,
+                                       1993.63842773,
+                                       1932.10974121,
+                                       1869.26220703,
+                                       1805.62060547,
+                                       1741.64990234,
+                                       1677.75500488,
+                                       1614.28540039,
+                                       1551.53735352,
+                                       1489.75976562,
+                                       1421.36291504,
+                                       1351.99133301,
+                                       1284.86938477,
+                                       1220.09375,
+                                       1157.72741699,
+                                       1097.8046875,
+                                       1040.33654785,
+                                       985.821166992,
+                                       939.153625488,
+                                       894.312011719,
+                                       851.274475098,
+                                       810.01184082,
+                                       2477.35107422,
+                                       2469.89428711,
+                                       2447.73339844,
+                                       2411.48632812,
+                                       2362.1328125,
+                                       2300.9543457,
+                                       2229.45581055,
+                                       2149.27954102,
+                                       2062.12646484,
+                                       2013.13024902,
+                                       1961.06884766,
+                                       1906.4909668,
+                                       1849.96801758,
+                                       1792.02880859,
+                                       1733.15539551,
+                                       1673.77990723,
+                                       1614.28540039,
+                                       1555.00537109,
+                                       1496.22851562,
+                                       1431.99487305,
+                                       1365.09509277,
+                                       1300,
+                                       1236.85754395,
+                                       1175.77709961,
+                                       1116.83618164,
+                                       1060.08496094,
+                                       1005.54919434,
+                                       957.398681641,
+                                       912.780822754,
+                                       869.840393066,
+                                       828.566345215,
+                                       788.938354492,
+                                       2235.81323242,
+                                       2229.45581055,
+                                       2210.55053711,
+                                       2179.58496094,
+                                       2137.33789062,
+                                       2084.83349609,
+                                       2041.63623047,
+                                       2004.73095703,
+                                       1963.74353027,
+                                       1919.21801758,
+                                       1871.70092773,
+                                       1821.72387695,
+                                       1769.79614258,
+                                       1716.39379883,
+                                       1661.9576416,
+                                       1606.8861084,
+                                       1551.53735352,
+                                       1496.22851562,
+                                       1435.56774902,
+                                       1371.72302246,
+                                       1309.21435547,
+                                       1248.23461914,
+                                       1188.93847656,
+                                       1131.44470215,
+                                       1075.84216309,
+                                       1022.19244385,
+                                       972.740783691,
+                                       928.625244141,
+                                       886.038757324,
+                                       844.988342285,
+                                       805.471496582,
+                                       767.476257324,
+                                       2038.74963379,
+                                       2035.87133789,
+                                       2027.28417969,
+                                       2013.13024902,
+                                       1993.63818359,
+                                       1969.11364746,
+                                       1939.92504883,
+                                       1906.4909668,
+                                       1869.26220703,
+                                       1828.70812988,
+                                       1785.30419922,
+                                       1739.5189209,
+                                       1691.80358887,
+                                       1642.58703613,
+                                       1592.26843262,
+                                       1541.21508789,
+                                       1489.75976562,
+                                       1431.99487305,
+                                       1371.72302246,
+                                       1312.30871582,
+                                       1253.98498535,
+                                       1196.94519043,
+                                       1141.34753418,
+                                       1087.31677246,
+                                       1034.94958496,
+                                       984.940734863,
+                                       941.609558105,
+                                       899.638793945,
+                                       859.054748535,
+                                       819.872619629,
+                                       782.097106934,
+                                       745.724487305,
+                                       1932.10974121,
+                                       1929.51806641,
+                                       1921.78308105,
+                                       1909.02331543,
+                                       1891.43066406,
+                                       1869.26220703,
+                                       1842.82971191,
+                                       1812.48913574,
+                                       1778.62731934,
+                                       1741.64990234,
+                                       1701.97070312,
+                                       1660.00183105,
+                                       1616.14465332,
+                                       1570.78308105,
+                                       1524.27880859,
+                                       1476.96813965,
+                                       1421.36291504,
+                                       1365.09509277,
+                                       1309.21435547,
+                                       1253.98498535,
+                                       1199.63305664,
+                                       1146.34912109,
+                                       1094.29150391,
+                                       1043.58874512,
+                                       994.342956543,
+                                       951.534057617,
+                                       910.440185547,
+                                       870.567016602,
+                                       831.947387695,
+                                       794.602966309,
+                                       758.545227051,
+                                       723.776733398,
+                                       1831.04748535,
+                                       1828.70812988,
+                                       1821.72375488,
+                                       1810.1940918,
+                                       1794.28063965,
+                                       1774.20117188,
+                                       1750.22119141,
+                                       1722.64440918,
+                                       1691.80358887,
+                                       1658.05053711,
+                                       1621.74597168,
+                                       1583.25292969,
+                                       1542.92712402,
+                                       1501.11230469,
+                                       1455.47753906,
+                                       1403.92370605,
+                                       1351.99133301,
+                                       1300,
+                                       1248.23461914,
+                                       1196.94519043,
+                                       1146.34912109,
+                                       1096.63146973,
+                                       1047.94946289,
+                                       1000.43200684,
+                                       958.241088867,
+                                       918.278259277,
+                                       879.356750488,
+                                       841.526489258,
+                                       804.825805664,
+                                       769.28125,
+                                       734.910339355,
+                                       701.721008301,
+                                       1735.27160645,
+                                       1733.15539551,
+                                       1726.8347168,
+                                       1716.39379883,
+                                       1701.97070312,
+                                       1683.74963379,
+                                       1661.9576416,
+                                       1636.85534668,
+                                       1608.73010254,
+                                       1577.88708496,
+                                       1544.64245605,
+                                       1509.31469727,
+                                       1472.10400391,
+                                       1426.6628418,
+                                       1380.08117676,
+                                       1332.70874023,
+                                       1284.86938477,
+                                       1236.85754395,
+                                       1188.93847656,
+                                       1141.34753418,
+                                       1094.29150391,
+                                       1047.94946289,
+                                       1002.47418213,
+                                       961.622131348,
+                                       923.031555176,
+                                       885.292236328,
+                                       848.471923828,
+                                       812.623657227,
+                                       777.789794922,
+                                       744.001464844,
+                                       711.280090332,
+                                       684.97052002,
+                                       1644.50561523,
+                                       1642.58703613,
+                                       1636.85534668,
+                                       1627.38232422,
+                                       1614.28540039,
+                                       1597.7220459,
+                                       1577.88708496,
+                                       1555.00537109,
+                                       1529.32519531,
+                                       1501.11230469,
+                                       1470.2409668,
+                                       1431.99487305,
+                                       1391.92028809,
+                                       1350.36767578,
+                                       1307.67150879,
+                                       1264.14855957,
+                                       1220.09375,
+                                       1175.77709961,
+                                       1131.44470215,
+                                       1087.31677246,
+                                       1043.58874512,
+                                       1000.43200684,
+                                       961.622131348,
+                                       924.624450684,
+                                       888.28326416,
+                                       852.680969238,
+                                       817.885742188,
+                                       783.954040527,
+                                       750.929992676,
+                                       718.848205566,
+                                       690.50970459,
+                                       669.78717041,
+                                       1558.48730469,
+                                       1556.74450684,
+                                       1551.53735352,
+                                       1542.92712402,
+                                       1531.01391602,
+                                       1515.93359375,
+                                       1497.85339355,
+                                       1476.96813965,
+                                       1450.00341797,
+                                       1419.60339355,
+                                       1386.82666016,
+                                       1351.99133301,
+                                       1315.41491699,
+                                       1277.40795898,
+                                       1238.27075195,
+                                       1198.28808594,
+                                       1157.72741699,
+                                       1116.83618164,
+                                       1075.84216309,
+                                       1034.94958496,
+                                       994.342956543,
+                                       958.241088867,
+                                       923.031555176,
+                                       888.28326416,
+                                       854.091186523,
+                                       820.536315918,
+                                       787.687927246,
+                                       755.602600098,
+                                       724.327697754,
+                                       694.713867188,
+                                       674.449768066,
+                                       654.522705078,
+                                       1476.96801758,
+                                       1475.38232422,
+                                       1470.2409668,
+                                       1460.9855957,
+                                       1448.18615723,
+                                       1431.99487305,
+                                       1412.59997559,
+                                       1390.21911621,
+                                       1365.09509277,
+                                       1337.48803711,
+                                       1307.67150879,
+                                       1275.92382812,
+                                       1242.52575684,
+                                       1207.75256348,
+                                       1171.8729248,
+                                       1135.14245605,
+                                       1097.8046875,
+                                       1060.08496094,
+                                       1022.19244385,
+                                       984.940734863,
+                                       951.534057617,
+                                       918.278259277,
+                                       885.292236328,
+                                       852.680969238,
+                                       820.536315918,
+                                       788.938354492,
+                                       757.955322266,
+                                       727.644897461,
+                                       698.05480957,
+                                       677.813598633,
+                                       658.364379883,
+                                       639.217041016,
+                                       1386.82666016,
+                                       1385.13513184,
+                                       1380.08117676,
+                                       1371.72302246,
+                                       1360.15734863,
+                                       1345.51452637,
+                                       1327.95666504,
+                                       1307.67175293,
+                                       1284.86962891,
+                                       1259.77709961,
+                                       1232.63317871,
+                                       1203.68237305,
+                                       1173.171875,
+                                       1141.34753418,
+                                       1108.44836426,
+                                       1074.70495605,
+                                       1040.33654785,
+                                       1005.54919434,
+                                       972.740783691,
+                                       941.609558105,
+                                       910.440185547,
+                                       879.356750488,
+                                       848.471923828,
+                                       817.885742188,
+                                       787.687927246,
+                                       757.955322266,
+                                       728.75579834,
+                                       700.14642334,
+                                       679.84564209,
+                                       660.946289062,
+                                       642.291992188,
+                                       623.906738281,
+                                       1301.52868652,
+                                       1300,
+                                       1295.43139648,
+                                       1287.87316895,
+                                       1277.40795898,
+                                       1264.14855957,
+                                       1248.23461914,
+                                       1229.82922363,
+                                       1209.11425781,
+                                       1186.2878418,
+                                       1161.55883789,
+                                       1135.14245605,
+                                       1107.25793457,
+                                       1078.12231445,
+                                       1047.94946289,
+                                       1016.94714355,
+                                       985.821166992,
+                                       957.398681641,
+                                       928.625244141,
+                                       899.638793945,
+                                       870.567016602,
+                                       841.526489258,
+                                       812.623657227,
+                                       783.954040527,
+                                       755.602600098,
+                                       727.644897461,
+                                       700.14642334,
+                                       680.525085449,
+                                       662.243774414,
+                                       644.148803711,
+                                       626.268066406,
+                                       608.625976562,
+                                       1221.47717285,
+                                       1220.09375,
+                                       1215.95825195,
+                                       1209.11425781,
+                                       1199.63305664,
+                                       1187.61206055,
+                                       1173.171875,
+                                       1156.4543457,
+                                       1137.61865234,
+                                       1116.83618164,
+                                       1094.29150391,
+                                       1070.17382812,
+                                       1044.67651367,
+                                       1017.99298096,
+                                       990.31439209,
+                                       965.022094727,
+                                       939.153625488,
+                                       912.780822754,
+                                       886.038757324,
+                                       859.054748535,
+                                       831.947387695,
+                                       804.825805664,
+                                       777.789794922,
+                                       750.929992676,
+                                       724.327697754,
+                                       698.05480957,
+                                       679.84564209,
+                                       662.243774414,
+                                       644.769775391,
+                                       627.454284668,
+                                       610.324890137,
+                                       593.40612793,
+                                       1146.34912109,
+                                       1145.09570312,
+                                       1141.34753418,
+                                       1135.14245605,
+                                       1126.54284668,
+                                       1115.63220215,
+                                       1102.515625,
+                                       1087.31677246,
+                                       1070.17382812,
+                                       1051.23815918,
+                                       1030.67077637,
+                                       1008.63867188,
+                                       985.821166992,
+                                       964.170288086,
+                                       941.609558105,
+                                       918.278381348,
+                                       894.312011719,
+                                       869.840393066,
+                                       844.988342285,
+                                       819.872619629,
+                                       794.602966309,
+                                       769.28125,
+                                       744.001464844,
+                                       718.848205566,
+                                       694.713867188,
+                                       677.813598633,
+                                       660.946289062,
+                                       644.148803711,
+                                       627.454284668,
+                                       610.893005371,
+                                       594.491943359,
+                                       578.275817871,
+                                       1075.84216309,
+                                       1074.70495605,
+                                       1071.30383301,
+                                       1065.67199707,
+                                       1057.86291504,
+                                       1047.94946289,
+                                       1036.02380371,
+                                       1022.19244385,
+                                       1006.57757568,
+                                       989.356018066,
+                                       972.740783691,
+                                       954.878356934,
+                                       935.89440918,
+                                       915.916137695,
+                                       895.070007324,
+                                       873.481933594,
+                                       851.274475098,
+                                       828.566345215,
+                                       805.471496582,
+                                       782.097106934,
+                                       758.545227051,
+                                       734.910339355,
+                                       711.280090332,
+                                       690.50970459,
+                                       674.449768066,
+                                       658.364379883,
+                                       642.291992188,
+                                       626.268066406,
+                                       610.324890137,
+                                       594.491943359,
+                                       578.796020508,
+                                       563.261047363,
+                                       1009.67150879,
+                                       1008.63867188,
+                                       1005.54919434,
+                                       1000.43200684,
+                                       993.333496094,
+                                       984.940734863,
+                                       975.335021973,
+                                       964.170288086,
+                                       951.533935547,
+                                       937.521850586,
+                                       922.236572266,
+                                       905.786682129,
+                                       888.28326416,
+                                       869.840393066,
+                                       850.572570801,
+                                       830.592407227,
+                                       810.01184082,
+                                       788.938354492,
+                                       767.476257324,
+                                       745.724487305,
+                                       723.776733398,
+                                       701.721008301,
+                                       684.97052002,
+                                       669.78717041,
+                                       654.522705078,
+                                       639.217041016,
+                                       623.906738281,
+                                       608.625976562,
+                                       593.40612793,
+                                       578.275817871,
+                                       563.261047363,
+                                       548.385559082},
+                                      {0,
+                                       0,
+                                       0,
+                                       0,
+                                       1554.1237793,
+                                       1242.53955078,
+                                       993.424560547,
+                                       821.738647461,
+                                       688.023742676,
+                                       576.067199707,
+                                       482.328430176,
+                                       403.842987061,
+                                       338.128967285,
+                                       283.233520508,
+                                       237.367095947,
+                                       198.928222656,
+                                       166.714080811,
+                                       139.71661377,
+                                       117.091148376,
+                                       100.366226196,
+                                       93.5875701904,
+                                       87.2667160034,
+                                       81.3727798462,
+                                       75.8769226074,
+                                       70.7522583008,
+                                       65.9736862183,
+                                       62.4703788757,
+                                       59.2027587891,
+                                       56.1060714722,
+                                       53.1713485718,
+                                       50.3901405334,
+                                       47.7544021606,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       1511.89892578,
+                                       1215.3125,
+                                       975.19708252,
+                                       811.432128906,
+                                       680.458190918,
+                                       570.428588867,
+                                       478.074890137,
+                                       400.60269165,
+                                       335.640289307,
+                                       281.318328857,
+                                       235.876022339,
+                                       197.761489868,
+                                       165.797119141,
+                                       138.993164062,
+                                       116.518371582,
+                                       100.18183136,
+                                       93.4242019653,
+                                       87.1216278076,
+                                       81.2436294556,
+                                       75.76171875,
+                                       70.6492919922,
+                                       65.8815155029,
+                                       62.4058837891,
+                                       59.1439094543,
+                                       56.0522842407,
+                                       53.1221389771,
+                                       50.3450584412,
+                                       47.7130508423,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       1398.31689453,
+                                       1139.93933105,
+                                       926.469055176,
+                                       781.859680176,
+                                       658.586914062,
+                                       554.038146973,
+                                       465.659057617,
+                                       391.113586426,
+                                       328.333618164,
+                                       275.682922363,
+                                       231.480926514,
+                                       194.317352295,
+                                       163.08682251,
+                                       136.852478027,
+                                       114.822052002,
+                                       99.6321716309,
+                                       92.9370193481,
+                                       86.6887664795,
+                                       80.8581924438,
+                                       75.4178009033,
+                                       70.3418579102,
+                                       65.6360702515,
+                                       62.2131195068,
+                                       58.9679450989,
+                                       55.8914489746,
+                                       52.974937439,
+                                       50.2101821899,
+                                       47.5893363953,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       1242.53955078,
+                                       1031.72070312,
+                                       865.446105957,
+                                       736.612304688,
+                                       624.660888672,
+                                       528.352294922,
+                                       446.048339844,
+                                       376.032348633,
+                                       316.662597656,
+                                       266.643341064,
+                                       224.406723022,
+                                       188.757797241,
+                                       158.70111084,
+                                       133.381164551,
+                                       112.06615448,
+                                       98.727722168,
+                                       92.1346588135,
+                                       85.9753189087,
+                                       80.2224273682,
+                                       74.8501815796,
+                                       69.8341827393,
+                                       65.2862091064,
+                                       61.8940887451,
+                                       58.6766357422,
+                                       55.625087738,
+                                       52.7310905457,
+                                       49.9866943359,
+                                       47.3842887878,
+                                       1554.1237793,
+                                       1511.89892578,
+                                       1398.31689453,
+                                       1242.53955078,
+                                       1072.70471191,
+                                       913.631103516,
+                                       791.500732422,
+                                       680.458190918,
+                                       581.79699707,
+                                       495.450836182,
+                                       420.656097412,
+                                       356.335296631,
+                                       301.3465271,
+                                       254.681488037,
+                                       214.999893188,
+                                       181.334152222,
+                                       152.824005127,
+                                       128.714950562,
+                                       108.351615906,
+                                       97.4852905273,
+                                       91.031036377,
+                                       84.9928512573,
+                                       79.3460922241,
+                                       74.0670623779,
+                                       69.1331710815,
+                                       64.8018112183,
+                                       61.4521331787,
+                                       58.2728424072,
+                                       55.2557067871,
+                                       52.3927955627,
+                                       49.6765098572,
+                                       47.0995864868,
+                                       1242.53955078,
+                                       1215.3125,
+                                       1139.93933105,
+                                       1031.72070312,
+                                       913.631103516,
+                                       811.432128906,
+                                       711.604919434,
+                                       618.224060059,
+                                       533.334899902,
+                                       457.657806396,
+                                       391.113586426,
+                                       333.178527832,
+                                       283.233520508,
+                                       240.389190674,
+                                       203.691101074,
+                                       172.362747192,
+                                       145.689254761,
+                                       123.027793884,
+                                       103.808616638,
+                                       95.9272842407,
+                                       89.6447677612,
+                                       83.7569274902,
+                                       78.2422027588,
+                                       73.0794143677,
+                                       68.2481460571,
+                                       64.1881027222,
+                                       60.8917617798,
+                                       57.7605171204,
+                                       54.7867393494,
+                                       51.9630203247,
+                                       49.2822341919,
+                                       46.7375259399,
+                                       993.424560547,
+                                       975.19708252,
+                                       926.469055176,
+                                       865.446105957,
+                                       791.500732422,
+                                       711.604919434,
+                                       631.209533691,
+                                       554.038146973,
+                                       482.328430176,
+                                       417.209503174,
+                                       359.053955078,
+                                       307.756866455,
+                                       263.145568848,
+                                       224.406723022,
+                                       190.955062866,
+                                       162.196685791,
+                                       137.561203003,
+                                       116.518371582,
+                                       100.551231384,
+                                       94.0806045532,
+                                       87.9983291626,
+                                       82.2864456177,
+                                       76.9266967773,
+                                       71.9007492065,
+                                       67.1905670166,
+                                       63.4515571594,
+                                       60.2185935974,
+                                       57.1445236206,
+                                       54.2224197388,
+                                       51.4454956055,
+                                       48.8071632385,
+                                       46.300994873,
+                                       821.738647461,
+                                       811.432128906,
+                                       781.859680176,
+                                       736.612304688,
+                                       680.458190918,
+                                       618.224060059,
+                                       554.038146973,
+                                       491.015136719,
+                                       431.260406494,
+                                       376.032348633,
+                                       325.949676514,
+                                       281.318328857,
+                                       241.920471191,
+                                       207.367233276,
+                                       177.270202637,
+                                       151.198699951,
+                                       128.714981079,
+                                       109.396255493,
+                                       98.1919021606,
+                                       91.9756011963,
+                                       86.1172180176,
+                                       80.602897644,
+                                       75.4178009033,
+                                       70.5465774536,
+                                       65.9736938477,
+                                       62.5996856689,
+                                       59.4391670227,
+                                       56.4305839539,
+                                       53.5677680969,
+                                       50.8446311951,
+                                       48.2551269531,
+                                       45.7933807373,
+                                       688.023742676,
+                                       680.458190918,
+                                       658.586914062,
+                                       624.660888672,
+                                       581.79699707,
+                                       533.334899902,
+                                       482.328430176,
+                                       431.260406494,
+                                       381.958526611,
+                                       335.640289307,
+                                       293.096069336,
+                                       254.681427002,
+                                       220.30645752,
+                                       189.852081299,
+                                       163.08682251,
+                                       139.716659546,
+                                       119.419540405,
+                                       101.869186401,
+                                       95.5869064331,
+                                       89.6447677612,
+                                       84.0289993286,
+                                       78.7296905518,
+                                       73.7354660034,
+                                       69.0339279175,
+                                       64.8706207275,
+                                       61.6408843994,
+                                       58.5607948303,
+                                       55.625087738,
+                                       52.8284225464,
+                                       50.1653556824,
+                                       47.6305236816,
+                                       45.2185592651,
+                                       576.067199707,
+                                       570.428588867,
+                                       554.038146973,
+                                       528.352294922,
+                                       495.450836182,
+                                       457.657806396,
+                                       417.209503174,
+                                       376.032348633,
+                                       335.640289307,
+                                       297.179992676,
+                                       261.421112061,
+                                       228.614364624,
+                                       198.928222656,
+                                       172.362747192,
+                                       148.803924561,
+                                       128.066162109,
+                                       109.923492432,
+                                       98.727722168,
+                                       92.7755966187,
+                                       87.1216278076,
+                                       81.7623062134,
+                                       76.6913833618,
+                                       71.9007492065,
+                                       67.3809127808,
+                                       63.7176971436,
+                                       60.5841941833,
+                                       57.5914230347,
+                                       54.7350311279,
+                                       52.0104789734,
+                                       49.4130935669,
+                                       46.938117981,
+                                       44.5807800293,
+                                       482.328430176,
+                                       478.074890137,
+                                       465.659057617,
+                                       446.048339844,
+                                       420.656097412,
+                                       391.113586426,
+                                       359.053955078,
+                                       325.949676514,
+                                       293.096069336,
+                                       261.421112061,
+                                       231.480926514,
+                                       203.691101074,
+                                       178.274459839,
+                                       155.306304932,
+                                       134.755508423,
+                                       116.518371582,
+                                       101.297218323,
+                                       95.4174804688,
+                                       89.7970199585,
+                                       84.4398193359,
+                                       79.3460922241,
+                                       74.5129318237,
+                                       69.935256958,
+                                       65.6360702515,
+                                       62.4703788757,
+                                       59.4391593933,
+                                       56.5394515991,
+                                       53.7678108215,
+                                       51.1205253601,
+                                       48.5936508179,
+                                       46.1830673218,
+                                       43.8845863342,
+                                       403.842987061,
+                                       400.60269165,
+                                       391.113586426,
+                                       376.032348633,
+                                       356.335296631,
+                                       333.178527832,
+                                       307.756866455,
+                                       281.318328857,
+                                       254.681427002,
+                                       228.614364624,
+                                       203.691101074,
+                                       180.30632019,
+                                       158.70111084,
+                                       138.99319458,
+                                       121.205970764,
+                                       105.29486084,
+                                       97.4852981567,
+                                       91.9756011963,
+                                       86.6887817383,
+                                       81.6321105957,
+                                       76.8088912964,
+                                       72.2191696167,
+                                       67.8605422974,
+                                       64.1881027222,
+                                       61.1397399902,
+                                       58.2155418396,
+                                       55.4135246277,
+                                       52.7310905457,
+                                       50.1653556824,
+                                       47.7130508423,
+                                       45.3707275391,
+                                       43.1347961426,
+                                       338.128967285,
+                                       335.640289307,
+                                       328.333618164,
+                                       316.662597656,
+                                       301.3465271,
+                                       283.233520508,
+                                       263.145568848,
+                                       241.920471191,
+                                       220.30645752,
+                                       198.928222656,
+                                       178.274459839,
+                                       158.70111084,
+                                       140.445129395,
+                                       123.643127441,
+                                       108.351615906,
+                                       98.9074707031,
+                                       93.5875701904,
+                                       88.4422607422,
+                                       83.4863510132,
+                                       78.7296905518,
+                                       74.1781234741,
+                                       69.8341827393,
+                                       65.7064590454,
+                                       62.6645126343,
+                                       59.736907959,
+                                       56.9232521057,
+                                       54.2224197388,
+                                       51.6327171326,
+                                       49.1519355774,
+                                       46.7775268555,
+                                       44.5066757202,
+                                       42.336353302,
+                                       283.233520508,
+                                       281.318328857,
+                                       275.682922363,
+                                       266.643341064,
+                                       254.681488037,
+                                       240.389190674,
+                                       224.406723022,
+                                       207.367233276,
+                                       189.852081299,
+                                       172.362747192,
+                                       155.306304932,
+                                       138.99319458,
+                                       123.643127441,
+                                       109.396255493,
+                                       99.632194519,
+                                       94.5781402588,
+                                       89.6447677612,
+                                       84.8540420532,
+                                       80.2224273682,
+                                       75.76171875,
+                                       71.4796905518,
+                                       67.3809127808,
+                                       63.9857673645,
+                                       61.0775909424,
+                                       58.2728424072,
+                                       55.5720672607,
+                                       52.974937439,
+                                       50.4805107117,
+                                       48.0872917175,
+                                       45.7933807373,
+                                       43.5965652466,
+                                       41.4943313599,
+                                       237.367095947,
+                                       235.876022339,
+                                       231.480926514,
+                                       224.406723022,
+                                       214.999893188,
+                                       203.691101074,
+                                       190.955062866,
+                                       177.270202637,
+                                       163.08682251,
+                                       148.803924561,
+                                       134.755508423,
+                                       121.205970764,
+                                       108.351615906,
+                                       99.632194519,
+                                       94.9123535156,
+                                       90.2564239502,
+                                       85.6926956177,
+                                       81.2436294556,
+                                       76.9266967773,
+                                       72.7549057007,
+                                       68.7375793457,
+                                       65.077835083,
+                                       62.2131195068,
+                                       59.4391593933,
+                                       56.7582321167,
+                                       54.1716041565,
+                                       51.6796913147,
+                                       49.2822341919,
+                                       46.9784011841,
+                                       44.7668800354,
+                                       42.6460189819,
+                                       40.6138343811,
+                                       198.928222656,
+                                       197.761489868,
+                                       194.317352295,
+                                       188.757797241,
+                                       181.334152222,
+                                       172.362747192,
+                                       162.196685791,
+                                       151.198699951,
+                                       139.716659546,
+                                       128.066162109,
+                                       116.518371582,
+                                       105.29486084,
+                                       98.9074707031,
+                                       94.5781402588,
+                                       90.2564239502,
+                                       85.9753189087,
+                                       81.7623062134,
+                                       77.6397247314,
+                                       73.6254806519,
+                                       69.7333374023,
+                                       65.9736938477,
+                                       63.1215667725,
+                                       60.4009246826,
+                                       57.7605171204,
+                                       55.2032775879,
+                                       52.7310905457,
+                                       50.3450584412,
+                                       48.0454750061,
+                                       45.832118988,
+                                       43.7042236328,
+                                       41.6606483459,
+                                       39.699886322,
+                                       166.714080811,
+                                       165.797119141,
+                                       163.08682251,
+                                       158.70111084,
+                                       152.824005127,
+                                       145.689254761,
+                                       137.561203003,
+                                       128.714981079,
+                                       119.419540405,
+                                       109.923492432,
+                                       101.297218323,
+                                       97.4852981567,
+                                       93.5875701904,
+                                       89.6447677612,
+                                       85.6926956177,
+                                       81.7623062134,
+                                       77.8798141479,
+                                       74.0670623779,
+                                       70.3418579102,
+                                       66.7183837891,
+                                       63.7845306396,
+                                       61.1397399902,
+                                       58.5607872009,
+                                       56.0522842407,
+                                       53.6176719666,
+                                       51.2593917847,
+                                       48.9790611267,
+                                       46.7775268555,
+                                       44.6550750732,
+                                       42.6114387512,
+                                       40.6459236145,
+                                       38.7575035095,
+                                       139.71661377,
+                                       138.993164062,
+                                       136.852478027,
+                                       133.381164551,
+                                       128.714950562,
+                                       123.027793884,
+                                       116.518371582,
+                                       109.396255493,
+                                       101.869186401,
+                                       98.727722168,
+                                       95.4174804688,
+                                       91.9756011963,
+                                       88.4422607422,
+                                       84.8540420532,
+                                       81.2436294556,
+                                       77.6397247314,
+                                       74.0670623779,
+                                       70.5465774536,
+                                       67.0957107544,
+                                       64.1881027222,
+                                       61.6408843994,
+                                       59.1439094543,
+                                       56.7033996582,
+                                       54.3243103027,
+                                       52.0104789734,
+                                       49.7648124695,
+                                       47.5893363953,
+                                       45.4853858948,
+                                       43.4536628723,
+                                       41.4943313599,
+                                       39.6071586609,
+                                       37.7914886475,
+                                       117.091148376,
+                                       116.518371582,
+                                       114.822052002,
+                                       112.06615448,
+                                       108.351615906,
+                                       103.808616638,
+                                       100.551231384,
+                                       98.1919021606,
+                                       95.5869064331,
+                                       92.7755966187,
+                                       89.7970199585,
+                                       86.6887817383,
+                                       83.4863510132,
+                                       80.2224273682,
+                                       76.9266967773,
+                                       73.6254806519,
+                                       70.3418579102,
+                                       67.0957107544,
+                                       64.3236160278,
+                                       61.8940887451,
+                                       59.4985046387,
+                                       57.1445236206,
+                                       54.83852005,
+                                       52.5856742859,
+                                       50.3901405334,
+                                       48.2551269531,
+                                       46.1830673218,
+                                       44.1756248474,
+                                       42.2339172363,
+                                       40.3584899902,
+                                       38.5494613647,
+                                       36.8065338135,
+                                       100.366226196,
+                                       100.18183136,
+                                       99.6321716309,
+                                       98.727722168,
+                                       97.4852905273,
+                                       95.9272842407,
+                                       94.0806045532,
+                                       91.9756011963,
+                                       89.6447677612,
+                                       87.1216278076,
+                                       84.4398193359,
+                                       81.6321105957,
+                                       78.7296905518,
+                                       75.76171875,
+                                       72.7549057007,
+                                       69.7333374023,
+                                       66.7183837891,
+                                       64.1881027222,
+                                       61.8940887451,
+                                       59.6175003052,
+                                       57.3672447205,
+                                       55.1509132385,
+                                       52.974937439,
+                                       50.8446311951,
+                                       48.7643356323,
+                                       46.7375259399,
+                                       44.7668800354,
+                                       42.8544273376,
+                                       41.0015525818,
+                                       39.2091522217,
+                                       37.4776496887,
+                                       35.8070869446,
+                                       93.5875701904,
+                                       93.4242019653,
+                                       92.9370193481,
+                                       92.1346588135,
+                                       91.031036377,
+                                       89.6447677612,
+                                       87.9983291626,
+                                       86.1172180176,
+                                       84.0289993286,
+                                       81.7623062134,
+                                       79.3460922241,
+                                       76.8088912964,
+                                       74.1781234741,
+                                       71.4796905518,
+                                       68.7375793457,
+                                       65.9736938477,
+                                       63.7845306396,
+                                       61.6408843994,
+                                       59.4985046387,
+                                       57.3672447205,
+                                       55.2557067871,
+                                       53.1713485718,
+                                       51.1205253601,
+                                       49.1086196899,
+                                       47.1400909424,
+                                       45.2185668945,
+                                       43.3469619751,
+                                       41.5275192261,
+                                       39.7618980408,
+                                       38.0512619019,
+                                       36.396320343,
+                                       34.7973823547,
+                                       87.2667160034,
+                                       87.1216278076,
+                                       86.6887664795,
+                                       85.9753189087,
+                                       84.9928512573,
+                                       83.7569274902,
+                                       82.2864456177,
+                                       80.602897644,
+                                       78.7296905518,
+                                       76.6913833618,
+                                       74.5129318237,
+                                       72.2191696167,
+                                       69.8341827393,
+                                       67.3809127808,
+                                       65.077835083,
+                                       63.1215667725,
+                                       61.1397399902,
+                                       59.1439094543,
+                                       57.1445236206,
+                                       55.1509132385,
+                                       53.1713485718,
+                                       51.2130279541,
+                                       49.2822341919,
+                                       47.3842887878,
+                                       45.5237045288,
+                                       43.7042160034,
+                                       41.9289016724,
+                                       40.2001571655,
+                                       38.5198707581,
+                                       36.8894119263,
+                                       35.3097419739,
+                                       33.7813949585,
+                                       81.3727798462,
+                                       81.2436294556,
+                                       80.8581924438,
+                                       80.2224273682,
+                                       79.3460922241,
+                                       78.2422027588,
+                                       76.9266967773,
+                                       75.4178009033,
+                                       73.7354660034,
+                                       71.9007492065,
+                                       69.935256958,
+                                       67.8605422974,
+                                       65.7064590454,
+                                       63.9857673645,
+                                       62.2131195068,
+                                       60.4009246826,
+                                       58.5607872009,
+                                       56.7033996582,
+                                       54.83852005,
+                                       52.974937439,
+                                       51.1205253601,
+                                       49.2822341919,
+                                       47.4661407471,
+                                       45.6774940491,
+                                       43.9208030701,
+                                       42.199848175,
+                                       40.5177879333,
+                                       38.877155304,
+                                       37.2799949646,
+                                       35.7278671265,
+                                       34.2218933105,
+                                       32.9320831299,
+                                       75.8769226074,
+                                       75.76171875,
+                                       75.4178009033,
+                                       74.8501815796,
+                                       74.0670623779,
+                                       73.0794143677,
+                                       71.9007492065,
+                                       70.5465774536,
+                                       69.0339279175,
+                                       67.3809127808,
+                                       65.6360702515,
+                                       64.1881027222,
+                                       62.6645126343,
+                                       61.0775909424,
+                                       59.4391593933,
+                                       57.7605171204,
+                                       56.0522842407,
+                                       54.3243103027,
+                                       52.5856742859,
+                                       50.8446311951,
+                                       49.1086196899,
+                                       47.3842887878,
+                                       45.6774940491,
+                                       43.9933776855,
+                                       42.336353302,
+                                       40.7102241516,
+                                       39.1181678772,
+                                       37.5628471375,
+                                       36.0463790894,
+                                       34.5704689026,
+                                       33.2245254517,
+                                       32.1316642761,
+                                       70.7522583008,
+                                       70.6492919922,
+                                       70.3418579102,
+                                       69.8341827393,
+                                       69.1331710815,
+                                       68.2481460571,
+                                       67.1905670166,
+                                       65.9736938477,
+                                       64.8706207275,
+                                       63.7176971436,
+                                       62.4703788757,
+                                       61.1397399902,
+                                       59.736907959,
+                                       58.2728424072,
+                                       56.7582321167,
+                                       55.2032775879,
+                                       53.6176719666,
+                                       52.0104789734,
+                                       50.3901405334,
+                                       48.7643356323,
+                                       47.1400909424,
+                                       45.5237045288,
+                                       43.9208030701,
+                                       42.336353302,
+                                       40.7746887207,
+                                       39.2395439148,
+                                       37.7341346741,
+                                       36.2611160278,
+                                       34.8227424622,
+                                       33.4466362,
+                                       32.3772735596,
+                                       31.328754425,
+                                       65.9736862183,
+                                       65.8815155029,
+                                       65.6360702515,
+                                       65.2862091064,
+                                       64.8018112183,
+                                       64.1881027222,
+                                       63.4515571594,
+                                       62.5996856689,
+                                       61.6408843994,
+                                       60.5841941833,
+                                       59.4391593933,
+                                       58.2155418396,
+                                       56.9232521057,
+                                       55.5720672607,
+                                       54.1716041565,
+                                       52.7310905457,
+                                       51.2593917847,
+                                       49.7648124695,
+                                       48.2551269531,
+                                       46.7375259399,
+                                       45.2185668945,
+                                       43.7042160034,
+                                       42.199848175,
+                                       40.7102241516,
+                                       39.2395439148,
+                                       37.7914886475,
+                                       36.3692169189,
+                                       34.9754295349,
+                                       33.6123847961,
+                                       32.5545730591,
+                                       31.5306549072,
+                                       30.5255126953,
+                                       62.4703788757,
+                                       62.4058837891,
+                                       62.2131195068,
+                                       61.8940887451,
+                                       61.4521331787,
+                                       60.8917617798,
+                                       60.2185935974,
+                                       59.4391670227,
+                                       58.5607948303,
+                                       57.5914230347,
+                                       56.5394515991,
+                                       55.4135246277,
+                                       54.2224197388,
+                                       52.974937439,
+                                       51.6796913147,
+                                       50.3450584412,
+                                       48.9790611267,
+                                       47.5893363953,
+                                       46.1830673218,
+                                       44.7668800354,
+                                       43.3469619751,
+                                       41.9289016724,
+                                       40.5177879333,
+                                       39.1181678772,
+                                       37.7341346741,
+                                       36.3692169189,
+                                       35.0265541077,
+                                       33.7088127136,
+                                       32.6617202759,
+                                       31.6664142609,
+                                       30.6867351532,
+                                       29.723903656,
+                                       59.2027587891,
+                                       59.1439094543,
+                                       58.9679450989,
+                                       58.6766357422,
+                                       58.2728424072,
+                                       57.7605171204,
+                                       57.1445236206,
+                                       56.4305839539,
+                                       55.625087738,
+                                       54.7350311279,
+                                       53.7678108215,
+                                       52.7310905457,
+                                       51.6327171326,
+                                       50.4805107117,
+                                       49.2822341919,
+                                       48.0454750061,
+                                       46.7775268555,
+                                       45.4853858948,
+                                       44.1756248474,
+                                       42.8544273376,
+                                       41.5275192261,
+                                       40.2001571655,
+                                       38.877155304,
+                                       37.5628471375,
+                                       36.2611160278,
+                                       34.9754295349,
+                                       33.7088127136,
+                                       32.6975517273,
+                                       31.7346553802,
+                                       30.7841281891,
+                                       29.8474140167,
+                                       28.9257545471,
+                                       56.1060714722,
+                                       56.0522842407,
+                                       55.8914489746,
+                                       55.625087738,
+                                       55.2557067871,
+                                       54.7867393494,
+                                       54.2224197388,
+                                       53.5677680969,
+                                       52.8284225464,
+                                       52.0104789734,
+                                       51.1205253601,
+                                       50.1653556824,
+                                       49.1519355774,
+                                       48.0872917175,
+                                       46.9784011841,
+                                       45.832118988,
+                                       44.6550750732,
+                                       43.4536628723,
+                                       42.2339172363,
+                                       41.0015525818,
+                                       39.7618980408,
+                                       38.5198707581,
+                                       37.2799949646,
+                                       36.0463790894,
+                                       34.8227424622,
+                                       33.6123847961,
+                                       32.6617202759,
+                                       31.7346553802,
+                                       30.8167037964,
+                                       29.9094753265,
+                                       29.0143985748,
+                                       28.1327323914,
+                                       53.1713485718,
+                                       53.1221389771,
+                                       52.974937439,
+                                       52.7310905457,
+                                       52.3927955627,
+                                       51.9630203247,
+                                       51.4454956055,
+                                       50.8446311951,
+                                       50.1653556824,
+                                       49.4130935669,
+                                       48.5936508179,
+                                       47.7130508423,
+                                       46.7775268555,
+                                       45.7933807373,
+                                       44.7668800354,
+                                       43.7042236328,
+                                       42.6114387512,
+                                       41.4943313599,
+                                       40.3584899902,
+                                       39.2091522217,
+                                       38.0512619019,
+                                       36.8894119263,
+                                       35.7278671265,
+                                       34.5704689026,
+                                       33.4466362,
+                                       32.5545730591,
+                                       31.6664142609,
+                                       30.7841281891,
+                                       29.9094753265,
+                                       29.0440425873,
+                                       28.1892433167,
+                                       27.3463401794,
+                                       50.3901405334,
+                                       50.3450584412,
+                                       50.2101821899,
+                                       49.9866943359,
+                                       49.6765098572,
+                                       49.2822341919,
+                                       48.8071632385,
+                                       48.2551269531,
+                                       47.6305236816,
+                                       46.938117981,
+                                       46.1830673218,
+                                       45.3707275391,
+                                       44.5066757202,
+                                       43.5965652466,
+                                       42.6460189819,
+                                       41.6606483459,
+                                       40.6459236145,
+                                       39.6071586609,
+                                       38.5494613647,
+                                       37.4776496887,
+                                       36.396320343,
+                                       35.3097419739,
+                                       34.2218933105,
+                                       33.2245254517,
+                                       32.3772735596,
+                                       31.5306549072,
+                                       30.6867351532,
+                                       29.8474140167,
+                                       29.0143985748,
+                                       28.1892433167,
+                                       27.3733463287,
+                                       26.5679397583,
+                                       47.7544021606,
+                                       47.7130508423,
+                                       47.5893363953,
+                                       47.3842887878,
+                                       47.0995864868,
+                                       46.7375259399,
+                                       46.300994873,
+                                       45.7933807373,
+                                       45.2185592651,
+                                       44.5807800293,
+                                       43.8845863342,
+                                       43.1347961426,
+                                       42.336353302,
+                                       41.4943313599,
+                                       40.6138343811,
+                                       39.699886322,
+                                       38.7575035095,
+                                       37.7914886475,
+                                       36.8065338135,
+                                       35.8070869446,
+                                       34.7973823547,
+                                       33.7813949585,
+                                       32.9320831299,
+                                       32.1316642761,
+                                       31.328754425,
+                                       30.5255126953,
+                                       29.723903656,
+                                       28.9257545471,
+                                       28.1327323914,
+                                       27.3463401794,
+                                       26.5679397583,
+                                       25.7987575531}};
+
+ap_uint<24> inv_matrix_8_fix[3][64] = {
+    {0,      573440, 571914, 500934, 438764, 384309, 336613, 294836, 573440, 573440, 554300, 490277, 431664,
+     379299, 332941, 292072, 571914, 554300, 512454, 462308, 412154, 365186, 322442, 284093, 500934, 490277,
+     462308, 424880, 384309, 344238, 306460, 271733, 438764, 431664, 412154, 384309, 352272, 319103, 286702,
+     256122, 384309, 379299, 365186, 344238, 319103, 292072, 264820, 238433, 336613, 332941, 322442, 306460,
+     286702, 264820, 242160, 219708, 294836, 292072, 284093, 271733, 256122, 238433, 219708, 200777},
+    {0,       3225600, 3214600, 2712197, 2288313, 1930677, 1628936, 1374353, 3225600, 3225600, 3088189,
+     2638422, 2240937, 1898460, 1606185, 1357854, 3214600, 3088189, 2792443, 2446967, 2111879, 1808349,
+     1541522, 1310462, 2712197, 2638422, 2446967, 2195873, 1930677, 1676412, 1444223, 1237800, 2288313,
+     2240937, 2111879, 1930677, 1726752, 1521076, 1325921, 1086047, 1930677, 1898460, 1808349, 1676412,
+     1521076, 1357854, 1197559, 804826,  1628936, 1606185, 1541522, 1444223, 1325921, 1197559, 858830,
+     571430,  1374353, 1357854, 1310462, 1237800, 1086047, 804826,  571430,  391838},
+    {0,     301014, 173537, 122278, 87381,  87381,  85556, 60284, 301014, 239204, 159771, 115525, 87381,
+     87381, 83112,  58803,  173537, 159771, 129848, 98919, 87381, 87381,  76366,  54653,  122278, 115525,
+     98919, 87381,  87381,  87381,  66768,  48594,  87381, 87381, 87381,  87381,  87381,  74294,  55990,
+     40365, 87381,  87381,  87381,  87381,  74294,  58803, 45395, 29913,  85556,  83112,  76366,  66768,
+     55990, 45395,  31920,  21238,  60284,  58803,  54653, 48594, 40365,  29913,  21238,  14563}};
+
+ap_uint<24> inv_matrix_16_fix[3][256] = {
+    {0,       0,       2441638, 2110453, 1805935, 1527539, 1292059, 1092880, 979637,  882036,  794159,  720192,
+     660388,  605551,  555267,  513377,  0,       0,       2359049, 2061117, 1769095, 1502424, 1274280, 1079954,
+     973257,  876924,  790013,  717366,  658011,  603538,  553552,  512061,  2441638, 2359049, 2163900, 1929223,
+     1668681, 1432145, 1223726, 1056519, 954648,  861946,  777824,  709020,  650980,  597575,  548467,  508152,
+     2110453, 2061117, 1929223, 1734043, 1527539, 1329150, 1147596, 1019948, 925274,  838103,  758296,  695541,
+     639580,  587878,  541090,  501757,  1805935, 1769095, 1668681, 1527539, 1368461, 1207734, 1064198, 973257,
+     887210,  806857,  734675,  677512,  624254,  574785,  531076,  493051,  1527539, 1502424, 1432145, 1329150,
+     1207734, 1079954, 999342,  919628,  842764,  769898,  709020,  655651,  605551,  558724,  518704,  482257,
+     1292059, 1274280, 1223726, 1147596, 1064198, 999342,  930994,  861946,  794159,  731730,  680030,  630735,
+     584081,  541090,  504297,  469635,  1092880, 1079954, 1056519, 1019948, 973257,  919628,  861946,  802580,
+     743657,  695541,  648668,  603538,  560468,  522767,  488199,  455467,  979637,  973257,  954648,  925274,
+     887210,  842764,  794159,  743657,  700870,  658011,  615808,  574785,  536755,  503024,  470758,  440042,
+     882036,  876924,  861946,  838103,  806857,  769898,  731730,  695541,  658011,  620003,  582200,  545493,
+     513377,  482257,  452306,  423640,  794159,  790013,  777824,  758296,  734675,  709020,  680030,  648668,
+     615808,  582200,  548467,  518704,  489404,  460825,  433156,  404651,  720192,  717366,  709020,  695541,
+     677512,  655651,  630735,  603538,  574785,  545493,  518704,  491830,  465193,  439046,  412893,  384233,
+     660388,  658011,  650980,  639580,  624254,  605551,  584081,  560468,  536755,  513377,  489404,  465193,
+     441041,  417116,  389895,  363695,  605551,  603538,  597575,  587878,  574785,  558724,  541090,  522767,
+     503024,  482257,  460825,  439046,  417116,  391813,  367140,  343268,  555267,  553552,  548467,  541090,
+     531076,  518704,  504297,  488199,  470758,  452306,  433156,  412893,  389895,  367140,  344835,  323147,
+     513377,  512061,  508152,  501757,  493051,  482257,  469635,  455467,  440042,  423640,  404651,  384233,
+     363695,  343268,  323147,  303491},
+    {0,       0,       5751209, 4544049, 3799576, 3391573, 3027382, 2702299, 2436075, 2197740, 1982723, 1763518,
+     1534572, 1335348, 1161989, 974727,  0,       0,       5440084, 4373603, 3746811, 3353638, 2999054, 2680571,
+     2420510, 2185243, 1972567, 1752429, 1525721, 1328236, 1156239, 968844,  5751209, 5440084, 4731487, 3973698,
+     3601163, 3246381, 2917801, 2623494, 2375098, 2148620, 1942705, 1719843, 1499653, 1307247, 1139247, 951484,
+     4544049, 4373603, 3973698, 3696279, 3391573, 3086079, 2793372, 2534376, 2303382, 2090297, 1894846, 1667696,
+     1457751, 1273387, 1103434, 923481,  3799576, 3746811, 3601163, 3391573, 3147719, 2891875, 2642200, 2420510,
+     2210386, 2013818, 1820750, 1598900, 1402122, 1228203, 1055862, 886125,  3391573, 3353638, 3246381, 3086079,
+     2891875, 2680571, 2484135, 2289592, 2101702, 1923283, 1719843, 1516952, 1335348, 1173618, 998753,  841041,
+     3027382, 2999054, 2917801, 2793372, 2642200, 2484135, 2317350, 2148620, 1982723, 1809059, 1608443, 1425546,
+     1260221, 1103434, 934545,  790037,  2702299, 2680571, 2623494, 2534376, 2420510, 2289592, 2148620, 2003346,
+     1856592, 1667696, 1491121, 1328236, 1179499, 1017307, 865695,  734963,  2436075, 2420510, 2375098, 2303382,
+     2210386, 2101702, 1982723, 1856592, 1688240, 1525721, 1371816, 1228203, 1082691, 928991,  794499,  677590,
+     2197740, 2185243, 2148620, 2090297, 2013818, 1923283, 1809059, 1667696, 1525721, 1386839, 1253715, 1124732,
+     974727,  841041,  722986,  619515,  1982723, 1972567, 1942705, 1894846, 1820750, 1719843, 1608443, 1491121,
+     1371816, 1253715, 1139247, 998752,  870742,  755519,  652842,  544631,  1763518, 1752429, 1719843, 1667696,
+     1598900, 1516952, 1425546, 1328236, 1228203, 1124732, 998752,  880957,  772520,  673980,  578732,  465986,
+     1534572, 1525721, 1499653, 1457751, 1402122, 1335348, 1260221, 1179499, 1082691, 974727,  870742,  772520,
+     681226,  596747,  486978,  394922,  1335348, 1328236, 1307247, 1273387, 1228203, 1173618, 1103434, 1017307,
+     928991,  841041,  755519,  673980,  596747,  494226,  406298,  331816,  1161989, 1156239, 1139247, 1103434,
+     1055862, 998753,  934545,  865695,  794499,  722986,  652842,  578732,  486978,  406298,  336400,  276619,
+     974727,  968844,  951484,  923481,  886125,  841041,  790037,  734963,  677590,  619515,  544631,  465986,
+     394922,  331816,  276619,  228975},
+    {0,      0,      630388, 459728, 346040, 270138, 210885, 164629, 145236, 129332, 115170, 103181, 93307,  84378,
+     76303,  60309,  0,      0,      585115, 436769, 335651, 263595, 206607, 161757, 144191, 128504, 114506, 102711,
+     92918,  84053,  76030,  59794,  630388, 585115, 485316, 381545, 307860, 245565, 194600, 157883, 141148, 126081,
+     112555, 101326, 91767,  83090,  75220,  58282,  459728, 436769, 381545, 325861, 270138, 219900, 176964, 151854,
+     136356, 122232, 109437, 99095,  89905,  81527,  71893,  55867,  346040, 335651, 307860, 270138, 229587, 190851,
+     159151, 144191, 130171, 117207, 105593, 96120,  87409,  79424,  67542,  52693,  270138, 263595, 245565, 219900,
+     190851, 161757, 148468, 135437, 122984, 111288, 101326, 92531,  84378,  76855,  62426,  48936,  210885, 206607,
+     194600, 176964, 159151, 148468, 137288, 126081, 115170, 105102, 96535,  88463,  80917,  71893,  56818,  44786,
+     164629, 161757, 157883, 151854, 144191, 135437, 126081, 116520, 107092, 99095,  91389,  84053,  77133,  64075,
+     50981,  40428,  145236, 144191, 141148, 136356, 130171, 122984, 115170, 107092, 99976,  92918,  86039,  79424,
+     69986,  56340,  45145,  36032,  129332, 128504, 126081, 122232, 117207, 111288, 105102, 99095,  92918,  86719,
+     80614,  73866,  60309,  48936,  39498,  31736,  115170, 114506, 112555, 109437, 105593, 101326, 96535,  91389,
+     86039,  80614,  75220,  62426,  51402,  42040,  34182,  25375,  103181, 102711, 101326, 99095,  96120,  92531,
+     88463,  84053,  79424,  73866,  62426,  52258,  43386,  35760,  28391,  19017,  93307,  92918,  91767,  89905,
+     87409,  84378,  80917,  77133,  69986,  60309,  51402,  43386,  36306,  30047,  20632,  14004,  84378,  84053,
+     83090,  81527,  79424,  76855,  71893,  64075,  56340,  48936,  42040,  35760,  30047,  21203,  14759,  10149,
+     76303,  76030,  75220,  71893,  67542,  62426,  56818,  50981,  45145,  39498,  34182,  28391,  20632,  14759,
+     10410,  7249,   60309,  59794,  58282,  55867,  52693,  48936,  44786,  40428,  36032,  31736,  25375,  19017,
+     14004,  10149,  7249,   5111}};
+
+ap_uint<24> inv_matrix_32_fix[3][1024] = {
+    {0,       0,       0,       0,       5131959, 4670491, 4250519, 3878761, 3542927, 3236170, 2955973, 2700037,
+     2466260, 2274087, 2118957, 1974409, 1839722, 1714223, 1597285, 1490256, 1397152, 1309866, 1228032, 1151311,
+     1079383, 1011949, 954704,  901007,  850330,  802503,  757366,  714767,  0,       0,       0,       0,
+     5072775, 4627118, 4217502, 3853877, 3523007, 3219979, 2942652, 2688968, 2456987, 2267924, 2113623, 1969770,
+     1835669, 1710667, 1594156, 1487730, 1394902, 1307856, 1226234, 1149698, 1077934, 1010645, 953642,  900042,
+     849451,  801702,  756636,  714101,  0,       0,       0,       0,       4908661, 4504031, 4123432, 3781605,
+     3464803, 3172463, 2903431, 2656298, 2429565, 2249644, 2097784, 1955978, 1823608, 1700081, 1584833, 1480198,
+     1388190, 1301861, 1220866, 1144883, 1073606, 1006935, 950469,  897157,  846825,  799309,  754452,  712106,
+     0,       0,       0,       0,       4670491, 4318764, 3982627, 3668379, 3372610, 3096591, 2840422, 2603565,
+     2385137, 2219847, 2071905, 1933401, 1803834, 1682702, 1569509, 1467797, 1377130, 1291973, 1212008, 1136931,
+     1066456, 1001153, 945218,  892382,  842477,  795345,  750834,  708800,  5131959, 5072775, 4908661, 4670491,
+     4390194, 4094197, 3805312, 3523007, 3252544, 2996713, 2756785, 2533105, 2331170, 2179470, 2036722, 1902624,
+     1776815, 1658906, 1548492, 1450748, 1361905, 1278347, 1199788, 1125953, 1056576, 993151,  937947,  885767,
+     836451,  789848,  745815,  704212,  4670491, 4627118, 4504031, 4318764, 4094197, 3853877, 3604336, 3354843,
+     3111448, 2877883, 2656298, 2447781, 2274087, 2129705, 1993181, 1864403, 1743163, 1629194, 1522192, 1429345,
+     1342761, 1261188, 1184381, 1112095, 1044091, 983020,  928735,  877378,  828804,  782869,  739438,  698619,
+     4250519, 4217502, 4123432, 3982627, 3805312, 3604336, 3390593, 3172463, 2955973, 2745243, 2542942, 2350878,
+     2208155, 2071905, 1942371, 1819621, 1703597, 1594156, 1492790, 1403941, 1319993, 1240747, 1165998, 1095537,
+     1029156, 970871,  917676,  867300,  819609,  774471,  731761,  692068,  3878761, 3853877, 3781605, 3668379,
+     3523007, 3354843, 3172463, 2983002, 2792006, 2603565, 2420554, 2267924, 2135121, 2007491, 1885454, 1769234,
+     1658906, 1554446, 1460446, 1374937, 1293940, 1217309, 1144883, 1076488, 1011949, 956834,  904884,  855630,
+     808951,  764729,  722846,  684452,  3542927, 3523007, 3464803, 3372610, 3252544, 3111448, 2955973, 2792006,
+     2624408, 2456987, 2305432, 2179470, 2056680, 1937876, 1823608, 1714223, 1609913, 1510754, 1424666, 1342761,
+     1264967, 1191187, 1121302, 1055177, 994288,  941052,  890484,  842477,  796927,  753726,  712770,  675828,
+     3236170, 3219979, 3172463, 3096591, 2996713, 2877883, 2745243, 2603565, 2456987, 2318226, 2202356, 2087355,
+     1974409, 1864403, 1757973, 1655557, 1557438, 1467797, 1385966, 1307856, 1233454, 1162707, 1095537, 1031846,
+     975260,  923681,  874611,  827961,  783640,  741554,  701628,  666259,  2955973, 2942652, 2903431, 2840422,
+     2756785, 2656298, 2542942, 2420554, 2305432, 2202356, 2097784, 1993181, 1889719, 1788302, 1689615, 1594156,
+     1503004, 1422336, 1344864, 1270671, 1199788, 1132204, 1067880, 1006935, 954704,  904884,  857409,  812207,
+     769201,  728312,  690299,  655815,  2700037, 2688968, 2656298, 2603565, 2533105, 2447781, 2350878, 2267924,
+     2179470, 2087355, 1993181, 1898303, 1803834, 1710668, 1619502, 1530871, 1450748, 1374937, 1301861, 1231642,
+     1164351, 1100012, 1038620, 983020,  932811,  884828,  839025,  795345,  753726,  714101,  678111,  644568,
+     2466260, 2456987, 2429565, 2385137, 2331170, 2274087, 2208155, 2135121, 2056680, 1974409, 1889719, 1803834,
+     1717792, 1632448, 1548492, 1470262, 1397152, 1326135, 1257429, 1191187, 1127510, 1066456, 1008099, 957902,
+     909769,  863682,  819609,  777508,  737332,  699219,  665148,  632591,  2274087, 2267924, 2249644, 2219847,
+     2179470, 2129705, 2071905, 2007491, 1937876, 1864403, 1788302, 1710668, 1632448, 1554446, 1480198, 1410789,
+     1342761, 1276421, 1212008, 1149698, 1089617, 1031846, 979682,  931789,  885767,  841612,  799309,  758829,
+     720138,  684452,  651494,  619962,  2118957, 2113623, 2097784, 2071905, 2036722, 1993181, 1942371, 1885454,
+     1823608, 1757973, 1689615, 1619502, 1548492, 1480198, 1415388, 1351210, 1288054, 1226234, 1165998, 1107538,
+     1050997, 997710,  950469,  904884,  860984,  818781,  778271,  739438,  702260,  669051,  637236,  606756,
+     1974409, 1969770, 1955978, 1933401, 1902624, 1864403, 1819621, 1769234, 1714223, 1655557, 1594156, 1530871,
+     1470262, 1410789, 1351210, 1291973, 1233454, 1175965, 1119759, 1065035, 1011949, 965432,  920670,  877378,
+     835596,  795345,  756636,  719463,  685033,  653109,  622456,  593050,  1839722, 1835669, 1823608, 1803834,
+     1776815, 1743163, 1703597, 1658906, 1609913, 1557438, 1503004, 1450748, 1397152, 1342761, 1288054, 1233454,
+     1179319, 1125953, 1073606, 1022482, 976362,  932811,  890484,  849451,  809763,  771453,  734538,  699219,
+     667374,  636717,  607238,  578919,  1714223, 1710667, 1700081, 1682702, 1658906, 1629194, 1594156, 1554446,
+     1510754, 1467797, 1422336, 1374937, 1326135, 1276421, 1226234, 1175965, 1125953, 1076488, 1027816, 983020,
+     941052,  900042,  860088,  821268,  783640,  747243,  712106,  679831,  649351,  619962,  591660,  564435,
+     1597285, 1594156, 1584833, 1569509, 1548492, 1522192, 1492790, 1460446, 1424666, 1385966, 1344864, 1301861,
+     1257429, 1212008, 1165998, 1119759, 1073606, 1027816, 985257,  945218,  905857,  867300,  829648,  792982,
+     757366,  722846,  690299,  660181,  631055,  602927,  575800,  549669,  1490256, 1487730, 1480198, 1467797,
+     1450748, 1429345, 1403941, 1374937, 1342761, 1307856, 1270671, 1231642, 1191187, 1149698, 1107538, 1065035,
+     1022482, 983020,  945218,  907810,  870943,  834742,  799309,  764729,  731069,  698619,  669051,  640362,
+     612571,  585692,  559730,  534687,  1397152, 1394902, 1388190, 1377130, 1361905, 1342761, 1319993, 1293940,
+     1264967, 1233454, 1199788, 1164351, 1127510, 1089617, 1050997, 1011949, 976362,  941052,  905857,  870943,
+     836451,  802503,  769201,  736632,  704865,  675828,  647750,  620459,  593980,  568330,  543519,  519552,
+     1309866, 1307856, 1301861, 1291973, 1278347, 1261188, 1240747, 1217309, 1191187, 1162707, 1132204, 1100012,
+     1066456, 1031846, 997710,  965432,  932811,  900042,  867300,  834742,  802503,  770701,  739438,  708800,
+     680406,  653109,  626480,  600553,  575356,  550911,  527232,  504325,  1228032, 1226234, 1220866, 1212008,
+     1199788, 1184381, 1165998, 1144883, 1121302, 1095537, 1067880, 1038620, 1008099, 979682,  950469,  920670,
+     890484,  860088,  829648,  799309,  769201,  739438,  710120,  682713,  656358,  630543,  605316,  580713,
+     556767,  533499,  510927,  492344,  1151311, 1149698, 1144883, 1136931, 1125953, 1112095, 1095537, 1076488,
+     1055177, 1031846, 1006935, 983020,  957902,  931789,  904884,  877378,  849451,  821268,  792982,  764729,
+     736632,  708800,  682713,  657447,  632591,  608202,  584327,  561007,  538274,  516151,  496368,  481317,
+     1079383, 1077934, 1073606, 1066456, 1056576, 1044091, 1029156, 1011949, 994288,  975260,  954704,  932811,
+     909769,  885767,  860984,  835596,  809763,  783640,  757366,  731069,  704865,  680406,  656358,  632591,
+     609169,  586147,  563576,  541493,  519932,  499422,  484703,  470233,  1011949, 1010645, 1006935, 1001153,
+     993151,  983020,  970871,  956834,  941052,  923681,  904884,  884828,  863682,  841612,  818781,  795345,
+     771453,  747243,  722846,  698619,  675828,  653109,  630543,  608202,  586147,  564435,  543113,  522221,
+     501793,  487146,  473022,  459122,  954704,  953642,  950469,  945218,  937947,  928735,  917676,  904884,
+     890484,  874611,  857409,  839025,  819609,  799309,  778271,  756636,  734538,  712106,  690299,  669051,
+     647750,  626480,  605316,  584327,  563576,  543113,  522987,  503238,  488622,  474897,  461354,  448010,
+     901007,  900042,  897157,  892382,  885767,  877378,  867300,  855630,  842477,  827961,  812207,  795345,
+     777508,  758829,  739438,  719463,  699219,  679831,  660181,  640362,  620459,  600553,  580713,  561007,
+     541493,  522221,  503238,  489115,  475839,  462701,  449723,  436922,  850330,  849451,  846825,  842477,
+     836451,  828804,  819609,  808951,  796927,  783640,  769201,  753726,  737332,  720138,  702260,  685033,
+     667374,  649351,  631055,  612571,  593980,  575356,  556767,  538274,  519932,  501793,  488622,  475839,
+     463152,  450584,  438155,  425882,  802503,  801702,  799309,  795345,  789848,  782869,  774471,  764729,
+     753726,  741554,  728312,  714101,  699219,  684452,  669051,  653109,  636717,  619962,  602927,  585692,
+     568330,  550911,  533499,  516151,  499422,  487146,  474897,  462701,  450584,  438567,  426670,  414910,
+     757366,  756636,  754452,  750834,  745815,  739438,  731761,  722846,  712770,  701628,  690299,  678111,
+     665148,  651494,  637236,  622456,  607238,  591660,  575800,  559730,  543519,  527232,  510927,  496368,
+     484703,  473022,  461354,  449723,  438155,  426670,  415287,  404024,  714767,  714101,  712106,  708800,
+     704212,  698619,  692068,  684452,  675828,  666259,  655815,  644568,  632591,  619962,  606756,  593050,
+     578919,  564435,  549669,  534687,  519552,  504325,  492344,  481317,  470233,  459122,  448010,  436922,
+     425882,  414910,  404024,  393243},
+    {0,        0,       0,       0,       10256567, 9163795, 8187452, 7334503, 6576615,  5897041,  5287688, 4741301,
+     4251374,  3823808, 3450992, 3114525, 2810863,  2536807, 2289472, 2087679, 1978480,  1874992,  1776918, 1683973,
+     1595891,  1512415, 1420110, 1332765, 1250792,  1173861, 1101662, 1033903, 0,        0,        0,       0,
+     10115302, 9062138, 8111468, 7277870, 6532108,  5861525, 5259001, 4717901, 4232130,  3808773,  3438387, 3103905,
+     2801875,  2529171, 2282962, 2084732, 1975826,  1872597, 1774751, 1682009, 1594106,  1510791,  1418378, 1331200,
+     1249376,  1172578, 1100497, 1032846, 0,        0,       0,       0,       9725277,  8774666,  7895240, 7113812,
+     6402356,  5757510, 5174699, 4648953, 4175309,  3764280, 3401038, 3072400, 2775187,  2506479,  2263603, 2075939,
+     1967905,  1865445, 1768278, 1676139, 1588774,  1505526, 1413203, 1326521, 1245141,  1168739,  1097015, 1029682,
+     0,        0,       0,       0,       9163795,  8344872, 7571685, 6858080, 6197748,  5592084,  5039753, 4538028,
+     4083526,  3692107, 3340291, 3021045, 2731603,  2469362, 2231895, 2061445, 1954839,  1853638,  1757587, 1666439,
+     1579957,  1496049, 1404644, 1318782, 1238133,  1162385, 1091248, 1024442, 10256567, 10115302, 9725277, 9163795,
+     8510160,  7827871, 7167557, 6532108, 5932993,  5375578, 4861571, 4390531, 3963957,  3595004,  3258255, 2951472,
+     2672398,  2418824, 2188634, 2041485, 1936825,  1837343, 1742818, 1653028, 1567758,  1482942,  1392801, 1308065,
+     1228424,  1153579, 1083251, 1017173, 9163795,  9062138, 8774666, 8344872, 7827871,  7277870,  6714142, 6158448,
+     5624412,  5119910, 4648953, 4213040, 3823808,  3476438, 3157611, 2865780, 2599225,  2356177,  2134869, 2016372,
+     1914124,  1816782, 1724159, 1636067, 1552316,  1466362, 1377806, 1294488, 1216114,  1142407,  1073100, 1008579,
+     8187452,  8111468, 7895240, 7571685, 7167557,  6714142, 6237570, 5757510, 5287688,  4837070,  4411073, 4012705,
+     3663905,  3340291, 3041417, 2766381, 2514008,  2282962, 2090635, 1986483, 1887057,  1792226,  1701844, 1615756,
+     1533801,  1446502, 1359827, 1278192, 1201328,  1128976, 1060888, 998743,  7334503,  7277870,  7113812, 6858080,
+     6532108,  6158448, 5757510, 5345971, 4936475,  4538028, 4156665, 3808773, 3489280,  3190579,  2912880, 2655858,
+     2418824,  2200862, 2052844, 1952246, 1855988,  1763987, 1676139, 1592325, 1512415,  1423584,  1339055, 1259345,
+     1184209,  1113412, 1046725, 987310,  6576615,  6532108, 6402356, 6197748, 5932993,  5624412,  5287688, 4936475,
+     4581817,  4232130, 3900570, 3595003, 3304714,  3031203, 2775187, 2536808, 2315800,  2111617,  2010873, 1914124,
+     1821314,  1732406, 1647339, 1566029, 1484803,  1397857, 1315706, 1238133, 1164921,  1095858,  1030735, 974370,
+     5897041,  5861525, 5757510, 5592084, 5375578,  5119910, 4837070, 4538028, 4232130,  3932039,  3649944, 3376518,
+     3114525,  2865780, 2631349, 2411736, 2207015,  2061445, 1965279, 1872597, 1783449,  1697843,  1615756, 1537139,
+     1453673,  1369587, 1290011, 1214758, 1143640,  1076467, 1013100, 960022,  5287688,  5259001,  5174699, 5039753,
+     4861571,  4648953, 4411073, 4156665, 3900570,  3649944, 3401038, 3157611, 2922451,  2697519,  2484105, 2282962,
+     2102541,  2008134, 1916621, 1828151, 1742818,  1660667, 1581713, 1505526, 1420110,  1339055,  1262216, 1189436,
+     1120554,  1055406, 996086,  944370,  4741301,  4717901, 4648953, 4538028, 4390531,  4213040,  4012705, 3808773,
+     3595003,  3376518, 3157611, 2941747, 2731603,  2529172, 2335853, 2152565, 2041485,  1952246,  1865445, 1781267,
+     1699841,  1621251, 1545538, 1466362, 1384439,  1306546, 1232570, 1162385, 1095858,  1032846,  977795,  927525,
+     4251374,  4232130, 4175309, 4083526, 3963957,  3823808, 3663905, 3489280, 3304714,  3114525,  2922451, 2731603,
+     2544480,  2363013, 2188634, 2064328, 1978480,  1894367, 1812271, 1732406, 1654932,  1579957,  1507434, 1425326,
+     1346984,  1272346, 1201328, 1133832, 1069748,  1009480, 958355,  909602,  3823808,  3808773,  3764280, 3692107,
+     3595004,  3476438, 3340291, 3190579, 3031203,  2865780, 2697519, 2529172, 2363013,  2200862,  2075939, 1994549,
+     1914124,  1835037, 1757587, 1682009, 1608481,  1537139, 1460902, 1382776, 1308065,  1236738,  1168739, 1103997,
+     1042424,  987310,  937898,  890716,  3450992,  3438387, 3401038, 3340291, 3258255,  3157611,  3041417, 2912880,
+     2775187,  2631349, 2484105, 2335853, 2188634,  2075939, 1999962, 1924150, 1848955,  1774751,  1701844, 1630482,
+     1560861,  1490409, 1413203, 1339055, 1267989,  1199997, 1135051, 1073100, 1014081,  964208,   916551,  870986,
+     3114525,  3103905, 3072400, 3021045, 2951472,  2865780, 2766381, 2655858, 2536808,  2411736,  2282962, 2152565,
+     2064328,  1994549, 1924150, 1853638, 1783449,  1713950, 1645451, 1578204, 1512415,  1437617,  1364693, 1294488,
+     1227047,  1162385, 1100497, 1041353, 988182,   940317,  894445,  850526,  2810863,  2801875,  2775187, 2731603,
+     2672398,  2599225, 2514008, 2418824, 2315800,  2207015, 2102541, 2041485, 1978480,  1914124,  1848955, 1783449,
+     1718021,  1653028, 1588774, 1525514, 1455475,  1384439, 1315706, 1249376, 1185512,  1124152,  1065304, 1009480,
+     961693,   915775,  871705,  829452,  2536807,  2529171, 2506479, 2469362, 2418824,  2356177,  2282962, 2200862,
+     2111617,  2061445, 2008134, 1952246, 1894367,  1835037, 1774751, 1713950, 1653028,  1592325,  1532138, 1466362,
+     1397857,  1331200, 1266542, 1203995, 1143640,  1085527, 1029682, 980376,  934687,   890716,   848451,  807872,
+     2289472,  2282962, 2263603, 2231895, 2188634,  2134869, 2090635, 2052844, 2010873,  1965279,  1916621, 1865445,
+     1812271,  1757587, 1701844, 1645451, 1588774,  1532138, 1470021, 1404644, 1340635,  1278192,  1217473, 1158599,
+     1101662,  1046725, 996086,  950912,  907303,   865268,  824802,  785895,  2087679,  2084732,  2075939, 2061445,
+     2041485,  2016372, 1986483, 1952246, 1914124,  1872597, 1828151, 1781267, 1732406,  1682009,  1630482, 1578204,
+     1525514,  1466362, 1404644, 1343804, 1284080,  1225671, 1168739, 1113412, 1059788,  1008579,  964208,  921230,
+     879672,   839549,  800867,  763621,  1978480,  1975826, 1967905, 1954839, 1936825,  1914124,  1887057, 1855988,
+     1821314,  1783449, 1742818, 1699841, 1654932,  1608481, 1560861, 1512415, 1455475,  1397857,  1340635, 1284080,
+     1228424,  1173861, 1120554, 1068634, 1018207,  974370,  932290,  891460,  851914,   813673,   776750,  741147,
+     1874992,  1872597, 1865445, 1853638, 1837343,  1816782, 1792226, 1763987, 1732406,  1697843,  1660667, 1621251,
+     1579957,  1537139, 1490409, 1437617, 1384439,  1331200, 1278192, 1225671, 1173861,  1122950,  1073100, 1024442,
+     981238,   940316,  900461,  861723,  824141,   787744,  752548,  718562,  1776918,  1774751,  1768278, 1757587,
+     1742818,  1724159, 1701844, 1676139, 1647339,  1615756, 1581713, 1545538, 1507434,  1460902,  1413203, 1364693,
+     1315706,  1266542, 1217473, 1168739, 1120554,  1073100, 1026533, 984701,  945184,   906539,   868835,  832126,
+     796456,   761857,  728350,  701409,  1683973,  1682009, 1676139, 1666439, 1653028,  1636067,  1615756, 1592325,
+     1566029,  1537139, 1505526, 1466362, 1425326,  1382776, 1339055, 1294488, 1249376,  1203995,  1158599, 1113412,
+     1068634,  1024442, 984701,  946815,  909602,   873145,  837515,  802768,  768952,   736100,   707081,  685862,
+     1595891,  1594106, 1588774, 1579957, 1567758,  1552316, 1533801, 1512415, 1484803,  1453673,  1420110, 1384439,
+     1346984,  1308065, 1267989, 1227047, 1185512,  1143640, 1101662, 1059788, 1018207,  981238,   945184,  909602,
+     874589,   840229,  806592,  773737,  741711,   711387,  690636,  670231,  1512415,  1510791,  1505526, 1496049,
+     1482942,  1466362, 1446502, 1423584, 1397857,  1369587, 1339055, 1306546, 1272346,  1236738,  1199997, 1162385,
+     1124152,  1085527, 1046725, 1008579, 974370,   940316,  906539,  873145,  840229,   807872,   776146,  745108,
+     714808,   694081,  674165,  654558,  1420110,  1418378, 1413203, 1404644, 1392801,  1377806,  1359827, 1339055,
+     1315706,  1290011, 1262216, 1232570, 1201328,  1168739, 1135051, 1100497, 1065304,  1029682,  996086,  964208,
+     932290,   900461,  868835,  837515,  806592,   776146,  746245,  716949,  696161,   676809,   657707,  638880,
+     1332765,  1331200, 1326521, 1318782, 1308065,  1294488, 1278192, 1259345, 1238133,  1214758,  1189436, 1162385,
+     1133832,  1103997, 1073100, 1041353, 1009480,  980376,  950912,  921230,  891460,   861723,   832126,  802768,
+     773737,   745108,  716949,  696857,  678137,   659608,  641298,  623233,  1250792,  1249376,  1245141, 1238133,
+     1228424,  1216114, 1201328, 1184209, 1164921,  1143640, 1120554, 1095858, 1069748,  1042424,  1014081, 988182,
+     961693,   934687,  907303,  879672,  851914,   824141,  796456,  768952,  741711,   714808,   696161,  678137,
+     660244,   642513,  624972,  607647,  1173861,  1172578, 1168739, 1162385, 1153579,  1142407,  1128976, 1113412,
+     1095858,  1076467, 1055406, 1032846, 1009480,  987310,  964208,  940317,  915775,   890716,   865268,  839549,
+     813673,   787744,  761857,  736100,  711387,   694081,  676809,  659608,  642513,   625554,   608759,  592154,
+     1101662,  1100497, 1097015, 1091248, 1083251,  1073100, 1060888, 1046725, 1030735,  1013100,  996086,  977795,
+     958355,   937898,  916551,  894445,  871705,   848451,  824802,  800867,  776750,   752548,   728350,  707081,
+     690636,   674165,  657707,  641298,  624972,   608759,  592687,  576779,  1033903,  1032846,  1029682, 1024442,
+     1017173,  1008579, 998743,  987310,  974370,   960022,  944370,  927525,  909602,   890716,   870986,  850526,
+     829452,   807872,  785895,  763621,  741147,   718562,  701409,  685862,  670231,   654558,   638880,  623233,
+     607647,   592154,  576779,  561546},
+    {0,       0,       0,       0,       1591422, 1272360, 1017266, 841460,  704536, 589892,  493904,  413535,  346244,
+     290031,  243063,  203702,  170715,  143069,  119901,  102775,  95833,   89361,  83325,   77697,   72450,   67557,
+     63969,   60623,   57452,   54447,   51599,   48900,   0,       0,       0,      0,       1548184, 1244480, 998601,
+     830906,  696789,  584118,  489548,  410217,  343695,  288069,  241537,  202507, 169776,  142329,  119314,  102586,
+     95666,   89212,   83193,   77580,   72344,   67462,   63903,   60563,   57397,  54397,   51553,   48858,   0,
+     0,       0,       0,       1431876, 1167297, 948704,  800624,  674393,  567335, 476834,  400500,  336213,  282299,
+     237036,  198980,  167000,  140136,  117577,  102023,  95167,   88769,   82798,  77227,   72030,   67211,   63706,
+     60383,   57232,   54246,   51415,   48731,   0,       0,       0,       0,      1272360, 1056482, 886216,  754291,
+     639652,  541032,  456753,  385057,  324262,  273042,  229792,  193287,  162509, 136582,  114755,  101097,  94345,
+     88038,   82147,   76646,   71510,   66853,   63379,   60084,   56960,   53996,  51186,   48521,   1591422, 1548184,
+     1431876, 1272360, 1098449, 935558,  810496,  696789,  595760,  507341,  430751, 364887,  308578,  260793,  220159,
+     185686,  156491,  131804,  110952,  99824,   93215,   87032,   81250,   75844,  70792,   66357,   62926,   59671,
+     56581,   53650,   50868,   48229,   1272360, 1244480, 1167297, 1056482, 935558, 830906,  728683,  633061,  546134,
+     468641,  400500,  341174,  290031,  246158,  208579,  176499,  149185,  125980, 106300,  98229,   91796,   85767,
+     80120,   74833,   69886,   65728,   62353,   59146,   56101,   53210,   50465,  47859,   1017266, 998601,  948704,
+     886216,  810496,  728683,  646358,  567335,  493904,  427222,  367671,  315143, 269461,  229792,  195537,  166089,
+     140862,  119314,  102964,  96338,   90110,   84261,   78772,   73626,   68803,  64974,   61663,   58515,   55523,
+     52680,   49978,   47412,   841460,  830906,  800624,  754291,  696789,  633061, 567335,  502799,  441610,  385057,
+     333772,  288069,  247726,  212344,  181524,  154827,  131804,  112021,  100548, 94183,   88184,   82537,   77227,
+     72239,   67557,   64102,   60865,   57784,   54853,   52064,   49413,   46892,  704536,  696789,  674393,  639652,
+     595760,  546134,  493904,  441610,  391125,  343695,  300130,  260793,  225593, 194408,  167000,  143069,  122285,
+     104314,  97880,   91796,   86045,   80619,   75505,   70690,   66427,   63120,  59966,   56960,   54096,   51369,
+     48773,   46303,   589892,  584118,  567335,  541032,  507341,  468641,  427222, 385057,  343695,  304312,  267695,
+     234101,  203702,  176499,  152375,  131139,  112561,  101097,  95002,   89212,  83724,   78531,   73626,   68998,
+     65246,   62038,   58973,   56048,   53258,   50599,   48064,   45650,   493904, 489548,  476834,  456753,  430751,
+     400500,  367671,  333772,  300130,  267695,  237036,  208579,  182553,  159033, 137989,  119314,  103728,  97707,
+     91952,   86466,   81250,   76301,   71613,   67211,   63969,   60865,   57896,  55058,   52347,   49759,   47291,
+     44937,   413535,  410217,  400500,  385057,  364887,  341174,  315143,  288069, 260793,  234101,  208579,  184633,
+     162509,  142329,  124114,  107821,  99824,   94183,   88769,   83591,   78652,  73952,   69489,   65728,   62607,
+     59612,   56743,   53996,   51369,   48858,   46459,   44170,   346244,  343695, 336213,  324262,  308578,  290031,
+     269461,  247726,  225593,  203702,  182553,  162509,  143815,  126610,  110952, 101281,  95833,   90564,   85490,
+     80619,   75958,   71510,   67283,   64168,   61170,   58289,   55523,   52871,  50331,   47900,   45574,   43352,
+     290031,  288069,  282299,  273042,  260793,  246158,  229792,  212344,  194408, 176499,  159033,  142329,  126610,
+     112021,  102023,  96848,   91796,   86890,   82147,   77580,   73195,   68998,  65521,   62543,   59671,   56905,
+     54246,   51692,   49241,   46892,   44642,   42490,   243063,  241537,  237036, 229792,  220159,  208579,  195537,
+     181524,  167000,  152375,  137989,  124114,  110952,  102023,  97190,   92422,  87749,   83193,   78772,   74501,
+     70387,   66639,   63706,   60865,   58120,   55471,   52920,   50465,   48105,  45841,   43669,   41588,   203702,
+     202507,  198980,  193287,  185686,  176499,  166089,  154827,  143069,  131139, 119314,  107821,  101281,  96848,
+     92422,   88038,   83724,   79503,   75392,   71406,   67557,   64636,   61850,  59146,   56528,   53996,   51553,
+     49198,   46932,   44753,   42660,   40652,   170715,  169776,  167000,  162509, 156491,  149185,  140862,  131804,
+     122285,  112561,  103728,  99824,   95833,   91796,   87749,   83724,   79748,  75844,   72030,   68319,   65315,
+     62607,   59966,   57397,   54904,   52489,   50154,   47900,   45726,   43634,  41621,   39687,   143069,  142329,
+     140136,  136582,  131804,  125980,  119314,  112021,  104314,  101097,  97707,  94183,   90564,   86890,   83193,
+     79503,   75844,   72239,   68706,   65728,   63120,   60563,   58064,   55628,  53258,   50959,   48731,   46577,
+     44496,   42490,   40557,   38698,   119901,  119314,  117577,  114755,  110952, 106300,  102964,  100548,  97880,
+     95002,   91952,   88769,   85490,   82147,   78772,   75392,   72030,   68706,  65867,   63379,   60926,   58515,
+     56154,   53847,   51599,   49413,   47291,   45235,   43247,   41327,   39474,  37689,   102775,  102586,  102023,
+     101097,  99824,   98229,   96338,   94183,   91796,   89212,   86466,   83591,  80619,   77580,   74501,   71406,
+     68319,   65728,   63379,   61048,   58744,   56474,   54246,   52064,   49934,  47859,   45841,   43882,   41985,
+     40150,   38377,   36666,   95833,   95666,   95167,   94345,   93215,   91796,  90110,   88184,   86045,   83724,
+     81250,   78652,   75958,   73195,   70387,   67557,   65315,   63120,   60926,  58744,   56581,   54447,   52347,
+     50287,   48271,   46303,   44387,   42524,   40716,   38964,   37269,   35632,  89361,   89212,   88769,   88038,
+     87032,   85767,   84261,   82537,   80619,   78531,   76301,   73952,   71510,  68998,   66639,   64636,   62607,
+     60563,   58515,   56474,   54447,   52442,   50465,   48521,   46616,   44753,  42935,   41164,   39444,   37774,
+     36157,   34592,   83325,   83193,   82798,   82147,   81250,   80120,   78772,  77227,   75505,   73626,   71613,
+     69489,   67283,   65521,   63706,   61850,   59966,   58064,   56154,   54246,  52347,   50465,   48605,   46773,
+     44974,   43212,   41490,   39810,   38174,   36585,   35043,   33722,   77697,  77580,   77227,   76646,   75844,
+     74833,   73626,   72239,   70690,   68998,   67211,   65728,   64168,   62543,  60865,   59146,   57397,   55628,
+     53847,   52064,   50287,   48521,   46773,   45049,   43352,   41687,   40057,  38464,   36911,   35400,   34021,
+     32902,   72450,   72344,   72030,   71510,   70792,   69886,   68803,   67557,  66427,   65246,   63969,   62607,
+     61170,   59671,   58120,   56528,   54904,   53258,   51599,   49934,   48271,  46616,   44974,   43352,   41753,
+     40181,   38639,   37131,   35658,   34249,   33154,   32080,   67557,   67462,  67211,   66853,   66357,   65728,
+     64974,   64102,   63120,   62038,   60865,   59612,   58289,   56905,   55471,  53996,   52489,   50959,   49413,
+     47859,   46303,   44753,   43212,   41687,   40181,   38698,   37242,   35814,  34419,   33335,   32287,   31258,
+     63969,   63903,   63706,   63379,   62926,   62353,   61663,   60865,   59966,  58973,   57896,   56743,   55523,
+     54246,   52920,   51553,   50154,   48731,   47291,   45841,   44387,   42935,  41490,   40057,   38639,   37242,
+     35867,   34517,   33445,   32426,   31423,   30437,   60623,   60563,   60383,  60084,   59671,   59146,   58515,
+     57784,   56960,   56048,   55058,   53996,   52871,   51692,   50465,   49198,  47900,   46577,   45235,   43882,
+     42524,   41164,   39810,   38464,   37131,   35814,   34517,   33482,   32496,  31522,   30563,   29619,   57452,
+     57397,   57232,   56960,   56581,   56101,   55523,   54853,   54096,   53258,  52347,   51369,   50331,   49241,
+     48105,   46932,   45726,   44496,   43247,   41985,   40716,   39444,   38174,  36911,   35658,   34419,   33445,
+     32496,   31556,   30627,   29710,   28807,   54447,   54397,   54246,   53996,  53650,   53210,   52680,   52064,
+     51369,   50599,   49759,   48858,   47900,   46892,   45841,   44753,   43634,  42490,   41327,   40150,   38964,
+     37774,   36585,   35400,   34249,   33335,   32426,   31522,   30627,   29741,  28865,   28002,   51599,   51553,
+     51415,   51186,   50868,   50465,   49978,   49413,   48773,   48064,   47291,  46459,   45574,   44642,   43669,
+     42660,   41621,   40557,   39474,   38377,   37269,   36157,   35043,   34021,  33154,   32287,   31423,   30563,
+     29710,   28865,   28030,   27205,   48900,   48858,   48731,   48521,   48229,  47859,   47412,   46892,   46303,
+     45650,   44937,   44170,   43352,   42490,   41588,   40652,   39687,   38698,  37689,   36666,   35632,   34592,
+     33722,   32902,   32080,   31258,   30437,   29619,   28807,   28002,   27205,  26417}};
+
+const uint8_t LUTCeilLog2Nonzero[1024] = {
+    127, 0,  1,  2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+    5,   5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+    6,   6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+    7,   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+    7,   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+    8,   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+    8,   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+    8,   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+    8,   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10,  10, 10, 10, 10, 10, 10, 10, 10};
+
+//==========================================================//
+// load data
+//==========================================================//
+void load_dct8_pixel(unsigned ysize, unsigned xsize, float* axi_opsin, hls::stream<float>& opsin8x8_stream) {
+#pragma HLS INLINE off
+
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+
+loop_load_dct8_pixel:
+    for (int i = 0; i < ysize64 * xsize64; i++) {
+        uint32_t addr = i * 4096 * 3;
+        for (int j = 0; j < 4096 * 3; j++) {
+#pragma HLS PIPELINE II = 1
+            float reg = axi_opsin[addr + j];
+            opsin8x8_stream.write(reg);
+        }
+    }
+}
+
+void load_dct16_pixel(unsigned ysize, unsigned xsize, float* axi_opsin, hls::stream<float>& opsin16x16_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+
+loop_load_dct16_pixel:
+    for (int i = 0; i < ysize64 * xsize64; i++) {
+        uint32_t addr = i * 4096 * 3;
+        for (int j = 0; j < 4096 * 3; j++) {
+#pragma HLS PIPELINE II = 1
+            float reg = axi_opsin[addr + j];
+            opsin16x16_stream.write(reg);
+        }
+    }
+}
+
+void load_dct32_pixel(unsigned ysize, unsigned xsize, float* axi_opsin, hls::stream<float>& opsin32x32_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+
+loop_load_dct32_pixel:
+    for (int i = 0; i < ysize64 * xsize64; i++) {
+        uint32_t addr = i * 4096 * 3;
+        for (int j = 0; j < 4096 * 3; j++) {
+#pragma HLS PIPELINE II = 1
+            float reg = axi_opsin[addr + j];
+            opsin32x32_stream.write(reg);
+        }
+    }
+}
+
+void loadPixel(unsigned ysize,
+               unsigned xsize,
+               float* axi_opsin_1,
+               float* axi_opsin_2,
+               float* axi_opsin_3,
+               hls::stream<float>& opsin8x8_stream,
+               hls::stream<float>& opsin16x16_stream,
+               hls::stream<float>& opsin32x32_stream) {
+#pragma HLS INLINE
+    load_dct8_pixel(ysize, xsize, axi_opsin_1, opsin8x8_stream);
+    load_dct16_pixel(ysize, xsize, axi_opsin_2, opsin16x16_stream);
+    load_dct32_pixel(ysize, xsize, axi_opsin_3, opsin32x32_stream);
+}
+
+void load_rqf_mask(int xsize,
+                   int ysize,
+                   float* aq_map_f,
+                   float* masking_field_row,
+                   float* quant_field_row,
+                   int stride,
+                   hls::stream<float>& stream_q,
+                   hls::stream<float>& stream_mask,
+                   hls::stream<float>& stream_rqf) {
+#pragma HLS INLINE off
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = (xsize_blocks + 7) / 8;
+    uint32_t xnum_tile = (xsize_blocks + 7) / 8;
+    uint32_t ynum_tile = (ysize_blocks + 7) / 8;
+LOOP_0:
+    for (int tid = 0; tid < xnum_tile * ynum_tile; tid++) {
+        int tx1 = tid % n_enc_tiles;
+        int ty1 = tid / n_enc_tiles;
+        int by = ty1 * 8;
+        int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+        int bx = tx1 * 8;
+        int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+        int rect_ysize = by1 - by;
+        int rect_xsize = bx1 - bx;
+    LOOP_1:
+        for (int iy = 0; iy < rect_ysize; iy++) {
+        LOOP_2:
+            for (int ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS PIPELINE II = 1
+                int x = 8 * (bx + ix);
+                int y = 8 * (by + iy);
+                int index0 = (y / 8 * stride) + x / 8;
+                float quant_norm8 = 0;
+                float masking = 0;
+                quant_norm8 = quant_field_row[index0];
+                stream_q.write(quant_norm8);
+                masking = masking_field_row[index0];
+                stream_mask.write(masking);
+                int index = (by + iy) * xsize_blocks + (bx + ix);
+                float rqf_tmp = aq_map_f[index];
+                stream_rqf.write(rqf_tmp);
+            }
+        }
+    }
+}
+
+//==========================================================================//
+// data write out
+//==========================================================================//
+void ac_coeff_writeout(int xsize, int ysize, hls::stream<int>& ac_coef_strm, int* ac_coef_axiout) {
+    unsigned xsizeblock = (xsize + 7) / 8;
+    unsigned ysizeblock = (ysize + 7) / 8;
+    for (int i = 0; i < xsizeblock * ysizeblock * 3 * 64; i++) {
+        ac_coef_axiout[i] = ac_coef_strm.read();
+    }
+}
+
+void dc_8x8_writeout(unsigned ysize,
+                     unsigned xsize,
+                     float* hls_dc8x8,
+                     hls::stream<uint8_t>& stream_rectx_dc0,
+                     hls::stream<uint8_t>& stream_recty_dc0,
+                     hls::stream<float>& dc_coef8x8_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+    uint32_t ysize32 = tile_ysize / 32;
+    uint32_t xsize32 = tile_xsize / 32;
+    uint32_t ysize16 = tile_ysize / 16;
+    uint32_t xsize16 = tile_xsize / 16;
+    uint32_t ysize8 = tile_ysize / 8;
+    uint32_t xsize8 = tile_xsize / 8;
+
+    // dc writeout
+    int N = 1;
+    int block_n = N * N;
+    int block_half_n = N * 8;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = (xsize_blocks + 7) / 8;
+loop_dc8_writeout:
+    for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+        for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+            int rect_ysize = stream_recty_dc0.read();
+            int rect_xsize = stream_rectx_dc0.read();
+            for (uint32_t y8 = 0; y8 < 8; y8++) {
+                for (uint32_t x8 = 0; x8 < 8; x8++) {
+                    for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+                        // edge judgement
+                        // int tx1 = x64;
+                        // int ty1 = y64;
+                        // int by = ty1 * 8;
+                        // int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+                        // int bx = tx1 * 8;
+                        // int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+                        // int rect_ysize = by1 - by;
+                        // int rect_xsize = bx1 - bx;
+                        // int tile_xsize = (xsize + 63) / 64 * 64;
+                        // int tile_ysize = (ysize + 63) / 64 * 64;
+
+                        int c_tmp = 0;
+                        if (c == 0) {
+                            c_tmp = 1;
+                        } else if (c == 1) {
+                            c_tmp = 0;
+                        } else {
+                            c_tmp = 2;
+                        }
+
+                        size_t addr = y64 * xsize8 * 8 + x64 * 8 + y8 * xsize8 + x8;
+
+                        if (x8 < rect_xsize && y8 < rect_ysize) {
+                            float reg = dc_coef8x8_stream.read();
+                            hls_dc8x8[c_tmp * tile_ysize * tile_xsize + addr] = reg;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void dc_16x16_writeout(unsigned ysize,
+                       unsigned xsize,
+                       float* hls_dc16x16,
+                       hls::stream<uint8_t>& stream_rectx_dc1,
+                       hls::stream<uint8_t>& stream_recty_dc1,
+                       hls::stream<float>& dc_coef16x16_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    size_t ysize64 = tile_ysize / 64;
+    size_t xsize64 = tile_xsize / 64;
+    size_t ysize32 = tile_ysize / 32;
+    size_t xsize32 = tile_xsize / 32;
+    size_t ysize16 = tile_ysize / 16;
+    size_t xsize16 = tile_xsize / 16;
+    size_t ysize8 = tile_ysize / 8;
+    size_t xsize8 = tile_xsize / 8;
+    int N = 2;
+    int block_n = N * N;
+    int block_half_n = N * 8;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = (xsize_blocks + 7) / 8;
+    for (size_t y64 = 0; y64 < ysize64; y64++) {
+        for (size_t x64 = 0; x64 < xsize64; x64++) {
+            int rect_ysize = stream_recty_dc1.read();
+            int rect_xsize = stream_rectx_dc1.read();
+            for (size_t y16 = 0; y16 < 4; y16++) {
+                for (size_t x16 = 0; x16 < 4; x16++) {
+                    for (int c = 0; c < 3; c++) {
+                        for (size_t m = 0; m < 2; m++) {
+                            for (size_t n = 0; n < 2; n++) {
+#pragma HLS PIPELINE II = 1
+                                // edge judgement
+                                // int tx1 = x64; // tid % n_enc_tiles;
+                                // int ty1 = y64; // tid / n_enc_tiles;
+                                // int by = ty1 * 8;
+                                // int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+                                // int bx = tx1 * 8;
+                                // int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+                                // int rect_ysize = by1 - by;
+                                // int rect_xsize = bx1 - bx;
+                                // int tile_xsize = (xsize + 63) / 64 * 64;
+                                // int tile_ysize = (ysize + 63) / 64 * 64;
+
+                                int c_tmp = 0;
+                                if (c == 0) {
+                                    c_tmp = 1;
+                                } else if (c == 1) {
+                                    c_tmp = 0;
+                                } else {
+                                    c_tmp = 2;
+                                }
+
+                                size_t addr =
+                                    y64 * xsize16 * 4 * 4 + x64 * 4 * 4 + y16 * xsize16 * 4 + x16 * 4 + m * 2 + n;
+
+                                if ((2 * x16 + 1) < rect_xsize && (2 * y16 + 1) < rect_ysize) {
+                                    float reg = dc_coef16x16_stream.read();
+                                    hls_dc16x16[c_tmp * tile_ysize * tile_xsize + addr] = reg;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void dc_32x32_writeout(unsigned ysize,
+                       unsigned xsize,
+                       float* hls_dc32x32,
+                       hls::stream<uint8_t>& stream_rectx_dc2,
+                       hls::stream<uint8_t>& stream_recty_dc2,
+                       hls::stream<float>& dc_coef32x32_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    size_t ysize64 = tile_ysize / 64;
+    size_t xsize64 = tile_xsize / 64;
+    size_t ysize32 = tile_ysize / 32;
+    size_t xsize32 = tile_xsize / 32;
+    size_t ysize16 = tile_ysize / 16;
+    size_t xsize16 = tile_xsize / 16;
+    size_t ysize8 = tile_ysize / 8;
+    size_t xsize8 = tile_xsize / 8;
+    int N = 4;
+    int block_n = N * N;
+    int block_half_n = N * 8;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = (xsize_blocks + 7) / 8;
+    for (size_t y64 = 0; y64 < ysize64; y64++) {
+        for (size_t x64 = 0; x64 < xsize64; x64++) {
+            int rect_ysize = stream_recty_dc2.read();
+            int rect_xsize = stream_rectx_dc2.read();
+            for (size_t y32 = 0; y32 < 2; y32++) {
+                for (size_t x32 = 0; x32 < 2; x32++) {
+                    for (int c = 0; c < 3; c++) {
+                        for (size_t m = 0; m < 4; m++) {
+                            for (size_t n = 0; n < 4; n++) {
+#pragma HLS PIPELINE II = 1
+                                // edge judgement
+                                // int tx1 = x64; // tid % n_enc_tiles;
+                                // int ty1 = y64; // tid / n_enc_tiles;
+                                // int by = ty1 * 8;
+                                // int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+                                // int bx = tx1 * 8;
+                                // int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+                                // int rect_ysize = by1 - by;
+                                // int rect_xsize = bx1 - bx;
+                                // int tile_xsize = (xsize + 63) / 64 * 64;
+                                // int tile_ysize = (ysize + 63) / 64 * 64;
+
+                                int c_tmp = 0;
+                                if (c == 0) {
+                                    c_tmp = 1;
+                                } else if (c == 1) {
+                                    c_tmp = 0;
+                                } else {
+                                    c_tmp = 2;
+                                }
+
+                                size_t addr = y64 * xsize32 * 2 * 16 + x64 * 2 * 16 + y32 * xsize32 * 1 * 16 +
+                                              x32 * 1 * 16 + m * 4 + n;
+
+                                if ((4 * x32 + 3) < rect_xsize && (4 * y32 + 3) < rect_ysize) {
+                                    float reg = dc_coef32x32_stream.read();
+                                    hls_dc32x32[c_tmp * tile_ysize * tile_xsize + addr] = reg;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void GetDCSize(short xsize,
+               short ysize,
+               hls::stream<uint8_t>& stream_rectx_dc,
+               hls::stream<uint8_t>& stream_recty_dc,
+               hls::stream<uint8_t>& stream_rectx0,
+               hls::stream<uint8_t>& stream_recty0,
+               hls::stream<uint8_t>& stream_rectx1,
+               hls::stream<uint8_t>& stream_recty1,
+               hls::stream<uint8_t>& stream_rectx2,
+               hls::stream<uint8_t>& stream_recty2) {
+    uint16_t xsize_blocks = xsize / 8;
+    uint16_t ysize_blocks = ysize / 8;
+LOOP_0:
+    for (uint16_t y = 0; y < (ysize_blocks + 8 - 1) / 8; y++) {
+    LOOP_1:
+        for (uint16_t x = 0; x < (xsize_blocks + 8 - 1) / 8; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            uint8_t rect_ysize = stream_recty_dc.read();
+            uint8_t rect_xsize = stream_rectx_dc.read();
+            stream_rectx0.write(rect_xsize);
+            stream_recty0.write(rect_ysize);
+            stream_rectx1.write(rect_xsize);
+            stream_recty1.write(rect_ysize);
+            stream_rectx2.write(rect_xsize);
+            stream_recty2.write(rect_ysize);
+        }
+    }
+}
+void dc_writeout(unsigned ysize,
+                 unsigned xsize,
+                 float* hls_dc8x8,
+                 float* hls_dc16x16,
+                 float* hls_dc32x32,
+
+                 hls::stream<uint8_t>& stream_rectx_dc,
+                 hls::stream<uint8_t>& stream_recty_dc,
+                 hls::stream<float>& dc_coef8x8_stream,
+                 hls::stream<float>& dc_coef16x16_stream,
+                 hls::stream<float>& dc_coef32x32_stream) {
+#pragma HLS INLINE
+    hls::stream<uint8_t, 1024> stream_rectx_dc0;
+    hls::stream<uint8_t, 1024> stream_recty_dc0;
+    hls::stream<uint8_t, 1024> stream_rectx_dc1;
+    hls::stream<uint8_t, 1024> stream_recty_dc1;
+    hls::stream<uint8_t, 1024> stream_rectx_dc2;
+    hls::stream<uint8_t, 1024> stream_recty_dc2;
+    GetDCSize(xsize, ysize, stream_rectx_dc, stream_recty_dc, stream_rectx_dc0, stream_recty_dc0, stream_rectx_dc1,
+              stream_recty_dc1, stream_rectx_dc2, stream_recty_dc2);
+    dc_8x8_writeout(ysize, xsize, hls_dc8x8, stream_rectx_dc0, stream_recty_dc0, dc_coef8x8_stream);
+    dc_16x16_writeout(ysize, xsize, hls_dc16x16, stream_rectx_dc1, stream_recty_dc1, dc_coef16x16_stream);
+    dc_32x32_writeout(ysize, xsize, hls_dc32x32, stream_rectx_dc2, stream_recty_dc2, dc_coef32x32_stream);
+}
+
+void cfl_writeout(unsigned xsize,
+                  unsigned ysize,
+                  hls::stream<int8_t>& cmapx_strm,
+                  hls::stream<int8_t>& cmapb_strm,
+                  int8_t* cmap_axi) {
+#pragma HLS INLINE off
+
+    int xnum_tile = (xsize + 63) / 64;
+    int ynum_tile = (ysize + 63) / 64;
+    int num_tile = xnum_tile * ynum_tile;
+
+    for (int tid = 0; tid < num_tile; tid++) {
+#pragma HLS PIPELINE II = 2
+        cmap_axi[tid] = cmapx_strm.read();
+        cmap_axi[num_tile + tid] = cmapb_strm.read();
+    }
+}
+
+void acs_rqf_writeout(int xsize,
+                      int ysize,
+                      unsigned char* strategy_all,
+                      int* raw_quant_field_i,
+                      hls::stream<uint8_t>& stream_strategy,
+                      hls::stream<int>& stream_rqf) {
+#pragma HLS INLINE off
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    uint32_t xnum_tile = (xsize_blocks + 7) / 8;
+    uint32_t ynum_tile = (ysize_blocks + 7) / 8;
+    ap_uint<64> visited;
+LOOP_1:
+    for (uint8_t ty1 = 0; ty1 < ynum_tile; ty1++) {
+    LOOP_2:
+        for (uint8_t tx1 = 0; tx1 < xnum_tile; tx1++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            int by0 = ty1 * 8;
+            int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+            int bx0 = tx1 * 8;
+            int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+            int rect_ysize = by1 - by0;
+            int rect_xsize = bx1 - bx0;
+            visited = 0;
+        LOOP_3:
+            for (uint8_t y = 0; y < rect_ysize; ++y) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+            LOOP_4:
+                for (uint8_t x = 0; x < rect_xsize; ++x) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+                    uint8_t idx = y * 8 + x;
+                    if (visited.range(idx, idx) == 0) {
+                        char strategy = stream_strategy.read();
+                        int rqf = stream_rqf.read();
+                        int b = strategy_block[strategy];
+                    LOOP_5:
+                        for (uint8_t iy = 0; iy < b; iy++) {
+                        LOOP_6:
+                            for (uint8_t ix = 0; ix < b; ix++) {
+#pragma HLS pipeline
+                                uint16_t idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                                uint16_t idxout = (y + by0 + iy) * xsize_blocks + (x + bx0 + ix);
+                                strategy_all[(y + by0 + iy) * xsize_blocks + (x + bx0 + ix)] = strategy;
+                                raw_quant_field_i[(y + by0 + iy) * xsize_blocks + (x + bx0 + ix)] = rqf;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+//=========================================================//
+// module
+//=========================================================//
+// cfl -----------------------------------------------------
+void hls_CFLComputeTile(unsigned xsize,
+                        unsigned ysize,
+                        hls::stream<float>& ac_coef_strm,
+                        hls::stream<int>& rqf_in_stream,
+                        hls::stream<uint8_t>& acs_strm,
+                        hls::stream<int8_t>& cmapx_strm,
+                        hls::stream<int8_t>& cmapb_strm,
+                        hls::stream<int8_t>& cmapx_axi_strm,
+                        hls::stream<int8_t>& cmapb_axi_strm,
+                        hls::stream<float>& ac_coef_cflout_strm,
+                        hls::stream<int>& rqf_out_stream,
+                        hls::stream<uint8_t>& acs_cflout_strm) {
+#pragma HLS INLINE off
+    const uint8_t kDefaultColorFactor = 84U;
+    const float kInvColorFactor = 1.0f / kDefaultColorFactor;
+    const float kYToBRatio = 1.0f;
+    const float kDistanceMultiplierAC = 1e-3f;
+
+    unsigned xsize_alg = (xsize + 7) / 8 * 8;
+    unsigned ysize_alg = (ysize + 7) / 8 * 8;
+    int xnum_tile = (xsize + 63) / 64;
+    int ynum_tile = (ysize + 63) / 64;
+    int num_tile = xnum_tile * ynum_tile;
+    unsigned tx0 = 0;
+    unsigned ty0 = 0;
+
+    for (int tid = 0; tid < num_tile; tid++) {
+        ca_x_t ca_x = 0;
+        cb_x_t cb_x = 0;
+        ca_b_t ca_b = 0;
+        cb_b_t cb_b = 0;
+        unsigned xsize;
+        unsigned ysize;
+
+        if (ty0 + 64 > ysize_alg) {
+            ysize = ysize_alg - ty0;
+        } else {
+            ysize = 64;
+        }
+
+        if (tx0 + 64 > xsize_alg) {
+            xsize = xsize_alg - tx0;
+        } else {
+            xsize = 64;
+        }
+
+        unsigned total_pix = xsize * ysize;
+        unsigned cur_pix = 0;
+
+        while (cur_pix < total_pix) {
+            uint8_t acsRaw = acs_strm.read();
+            acs_cflout_strm.write(acsRaw);
+            rqf_out_stream.write(rqf_in_stream.read());
+
+            float q = 27.996826171875;
+            float q_dc_x = 0.000218007407966069877147674560546875;
+            float q_dc_b = 0.00348811852745711803436279296875;
+
+            unsigned csize;
+            if (acsRaw == 0) {
+                csize = 64;
+            } else if (acsRaw == 4) {
+                csize = 256;
+            } else if (acsRaw == 5) {
+                csize = 1024;
+            }
+
+            int error_flag = 0;
+
+            for (unsigned i = 0; i < csize; i++) {
+#pragma HLS PIPELINE II = 3
+                float b_y = ac_coef_strm.read();
+                float b_x = ac_coef_strm.read();
+                float b_b = ac_coef_strm.read();
+
+                ac_coef_cflout_strm.write(b_y);
+                ac_coef_cflout_strm.write(b_x);
+                ac_coef_cflout_strm.write(b_b);
+
+                float qm_x;
+                float qm_b;
+
+                if (acsRaw == 0) {
+                    qm_x = qmx8x8[i];
+                    qm_b = qmb8x8[i];
+                } else if (acsRaw == 4) {
+                    qm_x = qmx16x16[i];
+                    qm_b = qmb16x16[i];
+                } else if (acsRaw == 5) {
+                    qm_x = qmx32x32[i];
+                    qm_b = qmb32x32[i];
+                }
+
+                float qqm_x = q * qm_x;
+                float qqm_b = q * qm_b;
+
+                float coeffs_yx = b_y * qqm_x;
+                float coeffs_x = b_x * qqm_x;
+                float a = kInvColorFactor * coeffs_yx;
+                float b = 0.0f * coeffs_yx - coeffs_x;
+                ca_x = (ca_x_t)(a * a) + ca_x;
+                cb_x = (cb_x_t)(a * b) + cb_x;
+
+                float coeffs_yb = b_y * qqm_b;
+                float coeffs_b = b_b * qqm_b;
+
+                a = kInvColorFactor * coeffs_yb;
+                b = kYToBRatio * coeffs_yb - coeffs_b;
+                ca_b = (ca_b_t)(a * a) + ca_b;
+                cb_b = (cb_b_t)(a * b) + cb_b;
+
+                cur_pix++;
+            }
+        }
+
+        float x;
+        x = -(float)cb_x / ((float)ca_x + total_pix * kDistanceMultiplierAC * 0.5f);
+        int8_t cmap_x_reg = std::max(-128.0f, std::min(127.0f, std::roundf(x)));
+        cmapx_strm.write(cmap_x_reg);
+        cmapx_axi_strm.write(cmap_x_reg);
+
+        x = -(float)cb_b / ((float)ca_b + total_pix * kDistanceMultiplierAC * 0.5f);
+        int8_t cmap_b_reg = std::max(-128.0f, std::min(127.0f, std::roundf(x)));
+        cmapb_strm.write(cmap_b_reg);
+        cmapb_axi_strm.write(cmap_b_reg);
+
+        if (tx0 + 64 >= xsize_alg) {
+            tx0 = 0;
+            ty0 = ty0 + 64;
+        } else {
+            tx0 = tx0 + 64;
+        }
+    }
+}
+//--------------------------hls_compute_coefficients--------------------------//
+float adjustQuantBias(size_t c, int32_t quant_i, const float* biases) {
+    int32_t min = INT32_MIN;
+    cast<float, int32_t> mi, ani, anno;
+    mi.i = min;
+    int32_t and_result = quant_i & mi.i;
+    ani.i = and_result;
+    float sign = ani.f;
+    // int32_t and_no_result = (~mi.i) & quant_i;
+    // anno.i = and_no_result;
+    float abs_quant = std::abs(quant_i);
+    //   printf("%f %f\n", sign, abs_quant);
+    bool is_01 = abs_quant < 1.125f;
+    bool not_0 = abs_quant > 0;
+    cast<float, int32_t> bi, si;
+    bi.f = biases[c];
+    int32_t iTmp = bi.i ^ ani.i;
+    si.i = iTmp;
+    float one_bias = not_0 ? (si.f) : 0;
+    float tmp = quant_i ? (1.0 / quant_i) : 0.0f;
+    float bias = quant_i - biases[3] * tmp;
+    return is_01 ? one_bias : bias;
+}
+
+void hls_ComputeCoefficients(uint32_t xsize,
+                             uint32_t ysize,
+                             hls::stream<uint8_t>& acsStrm,
+                             hls::stream<float>& dctStrm,
+                             hls::stream<int>& quantFieldStrm,
+                             hls::stream<int8_t>& ytoxMapStrm,
+                             hls::stream<int8_t>& ytobMapStrm,
+                             hls::stream<uint8_t>& acs_coeff_stream1,
+                             hls::stream<int>& coeffOutStrm,
+                             hls::stream<int>& coeff_axi_stream,
+                             hls::stream<uint8_t>& acs_axi_strm,
+                             hls::stream<int>& qf_axi_strm) {
+#pragma HLS INLINE off
+    uint8_t acs;
+    uint8_t xblocks, yblocks;
+    int8_t ytox_map, ytob_map;
+    float x_factor, b_factor;
+    float qm_multiplier = 1.0f;
+    bool stop(false);
+    float coef_dct[3];
+#pragma HLS ARRAY_PARTITION variable = coef_dct complete dim = 1
+    ap_uint<32> offset;
+    int block_out;
+    float thr_x, thr_y, thr_b, out_x, out_b;
+
+    float thresy[4] = {0.5f, 0.6f, 0.6f, 0.65f};
+    float thresxb[4] = {0.5f, 0.75f, 0.75f, 0.75f};
+#pragma HLS ARRAY_PARTITION variable = thresy complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = thresxb complete dim = 1
+
+    uint32_t xsize_blocks = (xsize + 7) / 8;
+    uint32_t ysize_blocks = (ysize + 7) / 8;
+    uint16_t xsize_tails = DivCeil(xsize_blocks, kEncTileDimInBlocks);
+    uint16_t ysize_tails = DivCeil(ysize_blocks, kEncTileDimInBlocks);
+    uint16_t xsize_left = 8 - (xsize_tails * 8 - xsize_blocks); // not aligned for blocks
+    uint16_t ysize_left = 8 - (ysize_tails * 8 - ysize_blocks); // not aligned for blocks
+    uint16_t num_blocks;
+
+    for (uint16_t ty = 0; ty < ysize_tails; ++ty) {
+        for (uint16_t tx = 0; tx < xsize_tails; ++tx) {
+            ytoxMapStrm.read(ytox_map);
+            ytobMapStrm.read(ytob_map);
+            x_factor = base_correlation_x + ytox_map * color_scale;
+            b_factor = base_correlation_b + ytob_map * color_scale;
+            if (tx == (xsize_tails - 1) && ty != (ysize_tails - 1)) {
+                num_blocks = xsize_left * 8;
+            } else if (tx != (xsize_tails - 1) && ty == (ysize_tails - 1)) {
+                num_blocks = ysize_left * 8;
+            } else if (tx == (xsize_tails - 1) && ty == (ysize_tails - 1)) {
+                num_blocks = xsize_left * ysize_left;
+            } else {
+                num_blocks = 64;
+            }
+
+            uint32_t total = num_blocks * 64;
+            uint32_t cur = 0;
+            ap_uint<16> size = 0, count = 0;
+            ap_uint<16> y, x;
+            int quant;
+            float qac, fquant, inv_qac;
+
+            while (cur < total) {
+#pragma HLS PIPELINE II = 3
+                if (count == 0) {
+                    acsStrm.read(acs);
+                    acs_axi_strm.write(acs);
+                    acs_coeff_stream1.write(acs);
+                    if (acs == Type::DCT) {
+                        xblocks = 1;
+                        yblocks = 1;
+                    } else if (acs == Type::DCT16X16) {
+                        xblocks = 2;
+                        yblocks = 2;
+                    } else {
+                        xblocks = 4;
+                        yblocks = 4;
+                    }
+
+                    size = kDCTBlockSize * xblocks * yblocks;
+                    quant = quantFieldStrm.read();
+                    qf_axi_strm.write(quant);
+                    qac = global_scale_float * quant;
+                    fquant = qac * qm_multiplier;       // fquant_table[quant - 1];
+                    inv_qac = inv_global_scale / quant; // inv_qac_table[quant - 1];
+                }
+                y = count / (yblocks * kBlockDim);
+                x = count % (xblocks * kBlockDim);
+                ap_uint<32> off;
+                ap_uint<32> yfix;
+                if (x == 0) {
+                    off = y * kBlockDim * xblocks;
+                    ap_uint<32> yhalf = yblocks * 4; // ysize * kBlockDim / 2
+                    if (y >= yhalf)
+                        yfix = 2;
+                    else
+                        yfix = 0;
+                }
+
+                thr_x = 0;
+                thr_y = 0;
+                thr_b = 0;
+                if (xblocks == 1) {
+                    if (x >= 4) {
+                        thr_x = thresxb[yfix + 1]; //(c == 1) ? thresy[yfix + 1] : thresxb[yfix + 1];
+                        thr_y = thresy[yfix + 1];
+                        thr_b = thresxb[yfix + 1];
+                    } else {
+                        thr_x = thresxb[yfix]; //(c == 1) ? thresy[yfix + 1] :
+                                               // thresxb[yfix + 1];
+                        thr_y = thresy[yfix];
+                        thr_b = thresxb[yfix];
+                    }
+                } else {
+                    ap_uint<32> xhalf = xblocks * 4; // xsize * kBlockDim / 2
+                    ap_uint<32> xfix;
+                    if (x < xhalf)
+                        xfix = 0;
+                    else
+                        xfix = 1;
+                    thr_x = thresxb[yfix + xfix];
+                    thr_y = thresy[yfix + xfix];
+                    thr_b = thresxb[yfix + xfix]; // thr = (c == 1) ? thresy[yfix +
+                                                  // xfix] : thresxb[yfix + xfix];
+                }
+
+                float q_x;
+                float q_y;
+                float q_b;
+                if (acs == Type::DCT) {
+                    q_x = inv_dequant_stable[0 + off + x] * fquant;
+                    q_y = inv_dequant_stable[64 + off + x] * fquant;
+                    q_b = inv_dequant_stable[128 + off + x] * fquant;
+                } else if (acs == Type::DCT16X16) {
+                    q_x = inv_dequant_stable[768 + off + x] * fquant;
+                    q_y = inv_dequant_stable[1024 + off + x] * fquant;
+                    q_b = inv_dequant_stable[1280 + off + x] * fquant;
+                } else if (acs == Type::DCT32X32) {
+                    q_x = inv_dequant_stable[1536 + off + x] * fquant;
+                    q_y = inv_dequant_stable[2560 + off + x] * fquant;
+                    q_b = inv_dequant_stable[3584 + off + x] * fquant;
+                }
+
+                coef_dct[1] = dctStrm.read();
+                coef_dct[0] = dctStrm.read();
+                coef_dct[2] = dctStrm.read();
+
+                float val_y;
+                val_y = q_y * coef_dct[1];
+
+                bool nzero_mask_y = std::abs(val_y) >= thr_y;
+
+                int32_t v_y;
+                if (nzero_mask_y) {
+                    v_y = std::roundf(val_y);
+                } else {
+                    v_y = 0;
+                }
+
+                float adj_quant = adjustQuantBias(1, v_y, kDefaultQuantBias);
+                float dequantm;
+                if (acs == Type::DCT) {
+                    dequantm = dequant_table[64 + off + x];
+                } else if (acs == Type::DCT16X16) {
+                    dequantm = dequant_table[1024 + off + x];
+                } else if (acs == Type::DCT32X32) {
+                    dequantm = dequant_table[2560 + off + x];
+                }
+                coef_dct[1] = adj_quant * dequantm * inv_qac;
+
+                out_x = coef_dct[0] - x_factor * coef_dct[1];
+                coef_dct[0] = out_x;
+
+                out_b = coef_dct[2] - b_factor * coef_dct[1];
+                coef_dct[2] = out_b;
+
+                float val_x; //= q * coef_dct[c]; // block_in[off + x]
+                float val_b;
+                val_x = q_x * coef_dct[0];
+                val_b = q_b * coef_dct[2];
+
+                bool nzero_mask_x = std::abs(val_x) >= thr_x;
+
+                bool nzero_mask_b = std::abs(val_b) >= thr_b;
+
+                int32_t v_x;
+
+                int32_t v_b;
+                if (nzero_mask_x) {
+                    v_x = std::roundf(val_x);
+                } else {
+                    v_x = 0;
+                }
+
+                if (nzero_mask_b) {
+                    v_b = std::roundf(val_b);
+                } else {
+                    v_b = 0;
+                }
+
+                coeffOutStrm.write(v_y);
+                coeffOutStrm.write(v_x);
+                coeffOutStrm.write(v_b);
+
+                coeff_axi_stream.write(v_y);
+                coeff_axi_stream.write(v_x);
+                coeff_axi_stream.write(v_b);
+                cur++;
+                count++;
+                if (count == size) count = 0;
+            } // while
+        }     // tx
+    }         // ty
+}
+
+//--------------------- Compute ALL orders---------------------//
+template <int RANGE> // opt1:256(slow), opt2:8(fast)
+void hls_sort(int size,
+              hls::stream<unsigned>& count_instrm,
+              hls::stream<unsigned>& pos_instrm,
+              hls::stream<unsigned>& pos_outstrm) {
+    unsigned count_shift[RANGE];
+    unsigned pos_shift[RANGE];
+    ap_uint<RANGE> cmp = 0;
+
+    for (int i = 0; i < RANGE; i++) {
+#pragma HLS UNROLL
+        count_shift[i] = 0;
+    }
+
+    for (int i = 0; i < size + RANGE; i++) {
+#pragma HLS PIPELINE II = 1
+        unsigned count_reg;
+        unsigned pos_reg;
+        if (i < size) {
+            pos_reg = pos_instrm.read();
+            count_reg = count_instrm.read();
+        } else {
+            count_reg = -1;
+            pos_reg = -1;
+        }
+
+        for (int i = 0; i < RANGE; i++) {
+#pragma HLS UNROLL
+            cmp[i] = count_reg >= count_shift[i];
+        }
+
+        if (i >= RANGE) {
+            pos_outstrm.write(pos_shift[0]);
+        }
+
+        for (int i = 1; i < RANGE; i++) {
+#pragma HLS UNROLL
+            if (cmp[i] == 1) {
+                count_shift[i - 1] = count_shift[i];
+                pos_shift[i - 1] = pos_shift[i];
+            }
+        }
+
+        unsigned insert_pos;
+        ap_uint<RANGE> cmp_br = ~cmp;
+        cmp_br.reverse();
+        if (cmp_br == 0) {
+            insert_pos = RANGE - 1;
+        } else if (cmp == 0) {
+            insert_pos = 0;
+        } else {
+            insert_pos = cmp_br.countLeadingZeros() - 1;
+        }
+
+        count_shift[insert_pos] = count_reg;
+        pos_shift[insert_pos] = pos_reg;
+    }
+}
+
+void hls_sort_top(hls::stream<unsigned>& count_instrm,
+                  hls::stream<unsigned>& pos_instrm,
+                  hls::stream<unsigned>& pos_outstrm) {
+    unsigned sz;
+    for (uint8_t o = 0; o < 2; ++o) {
+        if (o == 0) {
+            sz = 64;
+        } else {
+            sz = 256;
+        }
+
+        for (uint8_t c = 0; c < 3; c++) {
+#ifndef __SYNTHESIS__
+            hls_sort<8>(sz, count_instrm, pos_instrm, pos_outstrm);
+#else
+            hls_sort<8>(sz, count_instrm, pos_instrm, pos_outstrm);
+#endif
+        }
+    }
+}
+
+void init_numzeros(int32_t num_zeros[3][320]) {
+    for (int i = 0; i < 320; i++) {
+        for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+            num_zeros[c][i] = 0;
+        }
+    }
+}
+
+void count_numzeros(unsigned xsize,
+                    unsigned ysize,
+                    hls::stream<uint8_t>& ac_strategy_strm,
+                    hls::stream<int>& ac_coef_quant_strm,
+                    hls::stream<ap_uint<3> >& used_orders_strm,
+                    int32_t num_zeros[3][320]) {
+#pragma HLS INLINE off
+    unsigned xsize_alg = (xsize + 7) / 8 * 8;
+    unsigned ysize_alg = (ysize + 7) / 8 * 8;
+    unsigned total_pix = xsize_alg * ysize_alg;
+    unsigned cur_pix = 0;
+
+    const int32_t offset8x8 = 0;
+    // const int32_t offsetIDT = 64;
+    const int32_t offset16x16 = 64;
+
+    ap_uint<3> used_orders_ap = 0;
+
+    while (cur_pix < total_pix) {
+        uint8_t acsRaw = ac_strategy_strm.read();
+        unsigned size;
+        if (acsRaw == 0) {
+            size = 64;
+            used_orders_ap[0] = 1;
+        } else if (acsRaw != 0 && acsRaw < 4) {
+            used_orders_ap[1] = 1;
+            size = 64;
+        } else if (acsRaw == 4) {
+            used_orders_ap[2] = 1;
+            size = 256;
+        } else if (acsRaw == 5) {
+            size = 1024;
+        }
+        cur_pix = cur_pix + size;
+
+        for (unsigned k = 0; k < size; k++) {
+            for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+#pragma HLS DEPENDENCE variable = num_zeros type = inter false
+                bool is_zerox = ac_coef_quant_strm.read() == 0;
+                if (is_zerox) {
+                    if (acsRaw == 0) {
+                        num_zeros[c][offset8x8 + k]++;
+                    } else if (acsRaw == 4) {
+                        num_zeros[c][offset16x16 + k]++;
+                    }
+                }
+            }
+        }
+    }
+
+    used_orders_strm.write(used_orders_ap);
+    num_zeros[0][offset8x8] = -1;
+    num_zeros[0][offset16x16 + 0] = -1;
+    num_zeros[0][offset16x16 + 1] = -1;
+    num_zeros[0][offset16x16 + 16] = -1;
+    num_zeros[0][offset16x16 + 17] = -1;
+    num_zeros[1][offset8x8] = -1;
+    num_zeros[1][offset16x16 + 0] = -1;
+    num_zeros[1][offset16x16 + 1] = -1;
+    num_zeros[1][offset16x16 + 16] = -1;
+    num_zeros[1][offset16x16 + 17] = -1;
+    num_zeros[2][offset8x8] = -1;
+    num_zeros[2][offset16x16 + 0] = -1;
+    num_zeros[2][offset16x16 + 1] = -1;
+    num_zeros[2][offset16x16 + 16] = -1;
+    num_zeros[2][offset16x16 + 17] = -1;
+}
+
+void load_nz2strm(int32_t num_zeros[3][320], hls::stream<unsigned>& count_strm, hls::stream<unsigned>& pos_strm) {
+    const int32_t offset8x8 = 0;
+    // const int32_t offsetIDT = 64;
+    const int32_t offset16x16 = 64;
+
+    const uint32_t coef8x8_zigzag[64] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                         12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                         35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                         58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+    const uint32_t coef16x16_zigzag[256] = {
+        0,   1,   16,  17,  32,  2,   3,   18,  33,  48,  64,  49,  34,  19,  4,   5,   20,  35,  50,  65,  80,  96,
+        81,  66,  51,  36,  21,  6,   7,   22,  37,  52,  67,  82,  97,  112, 128, 113, 98,  83,  68,  53,  38,  23,
+        8,   9,   24,  39,  54,  69,  84,  99,  114, 129, 144, 160, 145, 130, 115, 100, 85,  70,  55,  40,  25,  10,
+        11,  26,  41,  56,  71,  86,  101, 116, 131, 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87,  72,  57,
+        42,  27,  12,  13,  28,  43,  58,  73,  88,  103, 118, 133, 148, 163, 178, 193, 208, 224, 209, 194, 179, 164,
+        149, 134, 119, 104, 89,  74,  59,  44,  29,  14,  15,  30,  45,  60,  75,  90,  105, 120, 135, 150, 165, 180,
+        195, 210, 225, 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91,  76,  61,  46,  31,  47,  62,  77,
+        92,  107, 122, 137, 152, 167, 182, 197, 212, 227, 242, 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93,
+        78,  63,  79,  94,  109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185, 170, 155, 140,
+        125, 110, 95,  111, 126, 141, 156, 171, 186, 201, 216, 231, 246, 247, 232, 217, 202, 187, 172, 157, 142, 127,
+        143, 158, 173, 188, 203, 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235, 250, 251,
+        236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255};
+
+    unsigned sz;
+
+    for (uint8_t o = 0; o < 2; ++o) {
+        float inv_sqrt_sz;
+
+        if (o == 0) {
+            sz = 64;
+            inv_sqrt_sz = 1.0f / 8.0f;
+        } else {
+            sz = 256;
+            inv_sqrt_sz = 1.0f / 16.0f;
+        }
+
+        for (uint8_t c = 0; c < 3; c++) {
+            for (unsigned i = 0; i < sz; ++i) {
+#pragma HLS PIPELINE II = 1
+                unsigned pos;
+                if (o == 0) {
+                    pos = coef8x8_zigzag[i];
+                } else {
+                    pos = coef16x16_zigzag[i];
+                }
+
+                // We don't care for the exact number -> quantize number of zeros,
+                // to get less permuted order.
+                if (o == 0) {
+                    pos_strm.write(pos);
+                    count_strm.write(num_zeros[c][offset8x8 + pos] * inv_sqrt_sz + 0.1f);
+                } else {
+                    pos_strm.write(pos);
+                    count_strm.write(num_zeros[c][offset16x16 + pos] * inv_sqrt_sz + 0.1f);
+                }
+            }
+        }
+    }
+}
+
+void order_writeout(hls::stream<ap_uint<3> >& used_orders_strm,
+                    hls::stream<unsigned>& pos_strm,
+                    uint32_t hls_order[320 * 3 + 1] // AXI port
+                    ) {
+    const int32_t offset8x8 = 0;
+    // const int32_t offsetIDT = 64;
+    const int32_t offset16x16 = 64;
+
+    const uint32_t coef8x8_zigzag[64] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                         12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                         35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                         58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+    const uint32_t coef16x16_zigzag[256] = {
+        0,   1,   16,  17,  32,  2,   3,   18,  33,  48,  64,  49,  34,  19,  4,   5,   20,  35,  50,  65,  80,  96,
+        81,  66,  51,  36,  21,  6,   7,   22,  37,  52,  67,  82,  97,  112, 128, 113, 98,  83,  68,  53,  38,  23,
+        8,   9,   24,  39,  54,  69,  84,  99,  114, 129, 144, 160, 145, 130, 115, 100, 85,  70,  55,  40,  25,  10,
+        11,  26,  41,  56,  71,  86,  101, 116, 131, 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87,  72,  57,
+        42,  27,  12,  13,  28,  43,  58,  73,  88,  103, 118, 133, 148, 163, 178, 193, 208, 224, 209, 194, 179, 164,
+        149, 134, 119, 104, 89,  74,  59,  44,  29,  14,  15,  30,  45,  60,  75,  90,  105, 120, 135, 150, 165, 180,
+        195, 210, 225, 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91,  76,  61,  46,  31,  47,  62,  77,
+        92,  107, 122, 137, 152, 167, 182, 197, 212, 227, 242, 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93,
+        78,  63,  79,  94,  109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185, 170, 155, 140,
+        125, 110, 95,  111, 126, 141, 156, 171, 186, 201, 216, 231, 246, 247, 232, 217, 202, 187, 172, 157, 142, 127,
+        143, 158, 173, 188, 203, 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235, 250, 251,
+        236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255};
+
+    unsigned sz;
+    ap_uint<3> used_orders_ap = used_orders_strm.read();
+    for (uint8_t o = 0; o < 2; ++o) {
+        if (o == 0) {
+            sz = 64;
+        } else {
+            sz = 256;
+        }
+
+        bool is_nondefault = false;
+        for (uint8_t c = 0; c < 3; c++) {
+            for (unsigned i = 0; i < sz; ++i) {
+#pragma HLS PIPELINE II = 1
+                unsigned pos_reg = pos_strm.read();
+                if (o == 0) {
+                    hls_order[c * 320 + offset8x8 + i] = pos_reg;
+                } else {
+                    hls_order[c * 320 + offset16x16 + i] = pos_reg;
+                }
+                if (o == 0) {
+                    is_nondefault |= coef8x8_zigzag[i] != pos_reg;
+                } else {
+                    is_nondefault |= coef16x16_zigzag[i] != pos_reg;
+                }
+            }
+        }
+        if (!is_nondefault) {
+            if (o == 0)
+                used_orders_ap[0] = 0;
+            else
+                used_orders_ap[2] = 0;
+        }
+    }
+    hls_order[320 * 3] = used_orders_ap;
+}
+
+void order_finalize_dataflow(hls::stream<ap_uint<3> >& used_orders_strm,
+                             int32_t num_zeros[3][320],
+                             uint32_t hls_order[320 * 3 + 1]) {
+#pragma HLS DATAFLOW
+    hls::stream<unsigned, 32> count_instrm("count_instrm");
+    hls::stream<unsigned, 32> pos_instrm("pos_instrm");
+    hls::stream<unsigned, 8> pos_outstrm("pos_outstrm");
+
+    load_nz2strm(num_zeros, count_instrm, pos_instrm);
+
+    hls_sort_top(count_instrm, pos_instrm, pos_outstrm);
+
+    order_writeout(used_orders_strm, pos_outstrm, hls_order);
+}
+
+//-------------------------- dct --------------------------//
+// dct8x8
+void hls_DCT1DImpl_8x8(float in[64], float out[64]) {
+#pragma HLS INLINE off
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+    float sqrt2 = 1.4142135623730951f;
+
+loop_dct8x8:
+    for (int i = 0; i < 8; i += 1) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS LOOP_FLATTEN off
+#pragma HLS pipeline II = 2
+        float tmp8_0 = in[i * 8 + 0] + in[i * 8 + 7];
+        float tmp8_1 = in[i * 8 + 1] + in[i * 8 + 6];
+        float tmp8_2 = in[i * 8 + 2] + in[i * 8 + 5];
+        float tmp8_3 = in[i * 8 + 3] + in[i * 8 + 4];
+        float tmp8_4 = in[i * 8 + 0] - in[i * 8 + 7];
+        float tmp8_5 = in[i * 8 + 1] - in[i * 8 + 6];
+        float tmp8_6 = in[i * 8 + 2] - in[i * 8 + 5];
+        float tmp8_7 = in[i * 8 + 3] - in[i * 8 + 4];
+
+        float t00 = tmp8_0 + tmp8_3;
+        float t01 = tmp8_1 + tmp8_2;
+        float t02 = tmp8_0 - tmp8_3;
+        float t03 = tmp8_1 - tmp8_2;
+
+        float t16 = tmp8_4 * kMultipliers_N8_c1;
+        float t17 = tmp8_5 * kMultipliers_N8_c2;
+        float t18 = tmp8_6 * kMultipliers_N8_c3;
+        float t19 = tmp8_7 * kMultipliers_N8_c4;
+
+        // tmp 0~3
+        float t04 = t00 + t01;
+        float t05 = t00 - t01;
+        float t06 = t02 * kMultipliers_N4_c1;
+        float t07 = t03 * kMultipliers_N4_c2;
+
+        float t09 = t05;
+        float t10 = t06 + t07;
+        float t11 = t06 - t07;
+
+        float t13 = t09;
+        float t14 = t10 * sqrt2 + t11;
+        float t15 = t11;
+        // tmp 4~7
+        float t00_a = t16 + t19;
+        float t01_a = t17 + t18;
+        float t02_a = t16 - t19;
+        float t03_a = t17 - t18;
+
+        float t04_a = t00_a + t01_a;
+        float t05_a = t00_a - t01_a;
+        float t06_a = t02_a * kMultipliers_N4_c1;
+        float t07_a = t03_a * kMultipliers_N4_c2;
+
+        float t08_a = t04_a;
+        float t09_a = t05_a;
+        float t10_a = t06_a + t07_a;
+        float t11_a = t06_a - t07_a;
+
+        float t12_a = t08_a;
+        float t13_a = t09_a;
+        float t14_a = t10_a * sqrt2 + t11_a;
+        float t15_a = t11_a;
+
+        float tmp8_out1 = t14;
+        float tmp8_out2 = t05;
+        float tmp8_out3 = t15;
+        float tmp8_out4 = t12_a * sqrt2 + t14_a;
+        float tmp8_out5 = t14_a + t13_a;
+        float tmp8_out6 = t13_a + t15_a;
+        float tmp8_out7 = t15_a;
+
+        out[i * 8 + 0] = t04;
+        out[i * 8 + 1] = tmp8_out4;
+        out[i * 8 + 2] = tmp8_out1;
+        out[i * 8 + 3] = tmp8_out5;
+        out[i * 8 + 4] = t05;
+        out[i * 8 + 5] = tmp8_out6;
+        out[i * 8 + 6] = tmp8_out3;
+        out[i * 8 + 7] = tmp8_out7;
+    }
+}
+
+void hls_TransposeBlock8(float in[64], float out[64]) {
+#pragma HLS INLINE off
+loop_transposeBlock8:
+    for (int m = 0; m < 8; m++) {
+        for (int n = 0; n < 8; n++) {
+#pragma HLS pipeline II = 1
+            float mul = 1.0f / 8.0f;
+            out[n * 8 + m] = mul * in[m * 8 + n];
+        }
+    }
+}
+
+void dct8_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+    float sqrt2 = 1.4142135623730951f;
+
+loop_dct_block:
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS LOOP_FLATTEN off
+#pragma HLS pipeline
+                int addr = 8 * x + bx * 64 + by * 256;
+
+                float mem_0 = in[addr + 0];
+                float mem_1 = in[addr + 1];
+                float mem_2 = in[addr + 2];
+                float mem_3 = in[addr + 3];
+                float mem_4 = in[addr + 4];
+                float mem_5 = in[addr + 5];
+                float mem_6 = in[addr + 6];
+                float mem_7 = in[addr + 7];
+
+                float tmp8_0 = mem_0 + mem_7;
+                float tmp8_1 = mem_1 + mem_6;
+                float tmp8_2 = mem_2 + mem_5;
+                float tmp8_3 = mem_3 + mem_4;
+                float tmp8_4 = mem_0 - mem_7;
+                float tmp8_5 = mem_1 - mem_6;
+                float tmp8_6 = mem_2 - mem_5;
+                float tmp8_7 = mem_3 - mem_4;
+
+                float t00 = tmp8_0 + tmp8_3;
+                float t01 = tmp8_1 + tmp8_2;
+                float t02 = tmp8_0 - tmp8_3;
+                float t03 = tmp8_1 - tmp8_2;
+
+                float t16 = tmp8_4 * kMultipliers_N8_c1;
+                float t17 = tmp8_5 * kMultipliers_N8_c2;
+                float t18 = tmp8_6 * kMultipliers_N8_c3;
+                float t19 = tmp8_7 * kMultipliers_N8_c4;
+
+                // tmp 0~3
+                float t04 = t00 + t01;
+                float t05 = t00 - t01;
+                float t06 = t02 * kMultipliers_N4_c1;
+                float t07 = t03 * kMultipliers_N4_c2;
+
+                float t08 = t04;
+                float t09 = t05;
+                float t10 = t06 + t07;
+                float t11 = t06 - t07;
+
+                float t12 = t08;
+                float t13 = t09;
+                float t14 = t10 * sqrt2 + t11;
+                float t15 = t11;
+                // tmp 4~7
+                float t00_a = t16 + t19;
+                float t01_a = t17 + t18;
+                float t02_a = t16 - t19;
+                float t03_a = t17 - t18;
+
+                float t04_a = t00_a + t01_a;
+                float t05_a = t00_a - t01_a;
+                float t06_a = t02_a * kMultipliers_N4_c1;
+                float t07_a = t03_a * kMultipliers_N4_c2;
+
+                float t08_a = t04_a;
+                float t09_a = t05_a;
+                float t10_a = t06_a + t07_a;
+                float t11_a = t06_a - t07_a;
+
+                float t12_a = t08_a;
+                float t13_a = t09_a;
+                float t14_a = t10_a * sqrt2 + t11_a;
+                float t15_a = t11_a;
+
+                float tmp8_out0 = t12;
+                float tmp8_out1 = t14;
+                float tmp8_out2 = t13;
+                float tmp8_out3 = t15;
+                float tmp8_out4 = t12_a * sqrt2 + t14_a;
+                float tmp8_out5 = t14_a + t13_a;
+                float tmp8_out6 = t13_a + t15_a;
+                float tmp8_out7 = t15_a;
+
+                out[addr + 0] = tmp8_out0;
+                out[addr + 1] = tmp8_out4;
+                out[addr + 2] = tmp8_out1;
+                out[addr + 3] = tmp8_out5;
+                out[addr + 4] = tmp8_out2;
+                out[addr + 5] = tmp8_out6;
+                out[addr + 6] = tmp8_out3;
+                out[addr + 7] = tmp8_out7;
+            }
+        }
+    }
+}
+
+void hls_TransposeBlock_dct8(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> y = 0; y < 8; y++) {
+                for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS pipeline II = 1
+                    ap_uint<10> addr_i, addr_o;
+                    addr_i(9, 8) = by(1, 0);
+                    addr_i(7, 5) = x(2, 0);
+                    addr_i(4, 3) = bx(1, 0);
+                    addr_i(2, 0) = y(2, 0);
+                    addr_o(9, 8) = by(1, 0);
+                    addr_o(7, 5) = y(2, 0);
+                    addr_o(4, 3) = bx(1, 0);
+                    addr_o(2, 0) = x(2, 0);
+                    float mul = 1.0f / 8.0f;
+                    out[addr_o] = mul * in[addr_i];
+                }
+            }
+        }
+    }
+}
+
+void split_ac_dc_dct8(float in[64], float to_ac[64], float to_dc[1]) {
+#pragma HLS INLINE off
+    for (int m = 0; m < 8; m++) {
+        for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+            to_ac[8 * m + n] = in[8 * m + n];
+            if (m == 0 && n == 0) {
+                to_dc[0] = in[0];
+            }
+        }
+    }
+}
+
+void feed_ac_dct8(uint32_t x8,
+                  uint32_t y8,
+                  hls::stream<uint8_t>& stream_recty,
+                  hls::stream<uint8_t>& stream_rectx,
+                  float in[64],
+                  hls::stream<float>& ac_coef8x8_stream) {
+#pragma HLS INLINE off
+    uint8_t rect_xsize;
+    uint8_t rect_ysize;
+hls_feed_b64:
+    for (int m = 0; m < 8; m++) {
+        for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+            if (m == 0 && n == 0) {
+                rect_xsize = stream_rectx.read();
+                rect_ysize = stream_recty.read();
+            }
+            if (x8 < rect_xsize && y8 < rect_ysize) {
+                ac_coef8x8_stream.write(in[m * 8 + n]);
+            }
+        }
+    }
+}
+
+void feed_dc_dct8(uint32_t x8,
+                  uint32_t y8,
+                  hls::stream<uint8_t>& stream_recty,
+                  hls::stream<uint8_t>& stream_rectx,
+                  float in[1],
+                  hls::stream<float>& dc_coef8x8_stream) {
+#pragma HLS INLINE off
+    uint8_t rect_xsize;
+    uint8_t rect_ysize;
+hls_feed_b64:
+    for (int m = 0; m < 8; m++) {
+        for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+            if (m == 0 && n == 0) {
+                rect_xsize = stream_rectx.read();
+                rect_ysize = stream_recty.read();
+            }
+            if (x8 < rect_xsize && y8 < rect_ysize) {
+                if (m == 0 && n == 0) {
+                    dc_coef8x8_stream.write(in[0]);
+                }
+            }
+        }
+    }
+}
+
+void hls_dct8x8_module(unsigned ysize,
+                       unsigned xsize,
+                       hls::stream<uint8_t>& stream_recty8,
+                       hls::stream<uint8_t>& stream_rectx8,
+                       hls::stream<uint8_t>& stream_recty8_1,
+                       hls::stream<uint8_t>& stream_rectx8_1,
+                       hls::stream<float>& opsin8x8_stream,
+                       hls::stream<float>& ac_coef8x8_stream,
+                       hls::stream<float>& dc_coef8x8_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+
+loop_dct8_all:
+    for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+        for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+            int tx1 = x64;
+            int ty1 = y64;
+            int by = ty1 * 8;
+            int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+            int bx = tx1 * 8;
+            int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+            int rect_ysize = by1 - by;
+            int rect_xsize = bx1 - bx;
+        loop_dct8_tile:
+            for (uint32_t y8 = 0; y8 < 8; y8++) {
+                for (uint32_t x8 = 0; x8 < 8; x8++) {
+                    for (int c = 0; c < 3; c++) {
+#pragma HLS DATAFLOW
+                        float temp0[64];
+#pragma HLS bind_storage variable = temp0 type = ram_2p impl = bram
+                        float temp1[64];
+#pragma HLS bind_storage variable = temp1 type = ram_2p impl = bram
+                        float temp2[64];
+#pragma HLS bind_storage variable = temp2 type = ram_2p impl = bram
+                        float temp3[64];
+#pragma HLS bind_storage variable = temp3 type = ram_2p impl = bram
+                        float temp4[64];
+#pragma HLS bind_storage variable = temp4 type = ram_2p impl = bram
+                        float to_ac[64];
+#pragma HLS bind_storage variable = to_ac type = ram_2p impl = bram
+                        float to_dc[1];
+#pragma HLS bind_storage variable = to_dc type = ram_2p impl = bram
+
+                    load_b64:
+                        for (int m = 0; m < 8; m++) {
+                            for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+                                int addr = n * 8 + m;
+                                temp0[addr] = opsin8x8_stream.read();
+                            }
+                        }
+
+                        hls_DCT1DImpl_8x8(temp0, temp1);
+                        hls_TransposeBlock8(temp1, temp2);
+                        hls_DCT1DImpl_8x8(temp2, temp3);
+                        hls_TransposeBlock8(temp3, temp4);
+                        split_ac_dc_dct8(temp4, to_ac, to_dc);
+                        feed_ac_dct8(x8, y8, stream_recty8, stream_rectx8, to_ac, ac_coef8x8_stream);
+                        feed_dc_dct8(x8, y8, stream_recty8_1, stream_rectx8_1, to_dc, dc_coef8x8_stream);
+                    }
+                }
+            }
+        }
+    }
+}
+
+void hls_DCT1DImpl_16(float in[256], float out[256]) {
+#pragma HLS INLINE off
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+
+    float kMultipliers_N16_0 = 0.5024192861881557;
+    float kMultipliers_N16_1 = 0.5224986149396889;
+    float kMultipliers_N16_2 = 0.5669440348163577;
+    float kMultipliers_N16_3 = 0.6468217833599901;
+    float kMultipliers_N16_4 = 0.7881546234512502;
+    float kMultipliers_N16_5 = 1.060677685990347;
+    float kMultipliers_N16_6 = 1.7224470982383342;
+    float kMultipliers_N16_7 = 5.101148618689155;
+    float sqrt2 = 1.4142135623730951f;
+
+    float tmp16_0 = in[0] + in[15];
+    float tmp16_1 = in[1] + in[14];
+    float tmp16_2 = in[2] + in[13];
+    float tmp16_3 = in[3] + in[12];
+    float tmp16_4 = in[4] + in[11];
+    float tmp16_5 = in[5] + in[10];
+    float tmp16_6 = in[6] + in[9];
+    float tmp16_7 = in[7] + in[8];
+    float tmp16_8 = in[0] - in[15];
+    float tmp16_9 = in[1] - in[14];
+    float tmp16_10 = in[2] - in[13];
+    float tmp16_11 = in[3] - in[12];
+    float tmp16_12 = in[4] - in[11];
+    float tmp16_13 = in[5] - in[10];
+    float tmp16_14 = in[6] - in[9];
+    float tmp16_15 = in[7] - in[8];
+
+    float tmp8_0 = tmp16_0 + tmp16_7;
+    float tmp8_1 = tmp16_1 + tmp16_6;
+    float tmp8_2 = tmp16_2 + tmp16_5;
+    float tmp8_3 = tmp16_3 + tmp16_4;
+    float tmp8_4 = tmp16_0 - tmp16_7;
+    float tmp8_5 = tmp16_1 - tmp16_6;
+    float tmp8_6 = tmp16_2 - tmp16_5;
+    float tmp8_7 = tmp16_3 - tmp16_4;
+
+    float t00 = tmp8_0 + tmp8_3;
+    float t01 = tmp8_1 + tmp8_2;
+    float t02 = tmp8_0 - tmp8_3;
+    float t03 = tmp8_1 - tmp8_2;
+    float t04 = tmp8_4 * kMultipliers_N8_c1;
+    float t05 = tmp8_5 * kMultipliers_N8_c2;
+    float t06 = tmp8_6 * kMultipliers_N8_c3;
+    float t07 = tmp8_7 * kMultipliers_N8_c4;
+
+    float t08 = t02 * kMultipliers_N4_c1;
+    float t09 = t03 * kMultipliers_N4_c2;
+    float t10 = t04 + t07;
+    float t11 = t05 + t06;
+    float t12 = t04 - t07;
+    float t13 = t05 - t06;
+
+    float t14 = t08 + t09;
+    float t15 = t10 + t11;
+    float t16 = t08 - t09;
+    float t17 = t10 - t11;
+
+    float t18 = t12 * kMultipliers_N4_c1;
+    float t19 = t13 * kMultipliers_N4_c2;
+    float t20 = t14 * sqrt2;
+    float t21 = t15 * sqrt2;
+
+    float t22 = t18 + t19;
+    float t23 = t18 - t19;
+
+    float t24 = t22 * sqrt2;
+
+    float t25 = t24 + t23;
+
+    float t26 = kMultipliers_N16_0 * tmp16_8;
+    float t27 = kMultipliers_N16_1 * tmp16_9;
+    float t28 = kMultipliers_N16_2 * tmp16_10;
+    float t29 = kMultipliers_N16_3 * tmp16_11;
+    float t30 = kMultipliers_N16_4 * tmp16_12;
+    float t31 = kMultipliers_N16_5 * tmp16_13;
+    float t32 = kMultipliers_N16_6 * tmp16_14;
+    float t33 = kMultipliers_N16_7 * tmp16_15;
+
+    float dmp8_0 = t26 + t33;
+    float dmp8_1 = t27 + t32;
+    float dmp8_2 = t28 + t31;
+    float dmp8_3 = t29 + t30;
+    float dmp8_4 = t26 - t33;
+    float dmp8_5 = t27 - t32;
+    float dmp8_6 = t28 - t31;
+    float dmp8_7 = t29 - t30;
+
+    float d00 = dmp8_0 + dmp8_3;
+    float d01 = dmp8_1 + dmp8_2;
+    float d02 = dmp8_0 - dmp8_3;
+    float d03 = dmp8_1 - dmp8_2;
+    float d04 = dmp8_4 * kMultipliers_N8_c1;
+    float d05 = dmp8_5 * kMultipliers_N8_c2;
+    float d06 = dmp8_6 * kMultipliers_N8_c3;
+    float d07 = dmp8_7 * kMultipliers_N8_c4;
+
+    float d08 = d02 * kMultipliers_N4_c1;
+    float d09 = d03 * kMultipliers_N4_c2;
+    float d10 = d04 + d07;
+    float d11 = d05 + d06;
+    float d12 = d04 - d07;
+    float d13 = d05 - d06;
+
+    float d14 = d08 + d09;
+    float d15 = d10 + d11;
+    float d16 = d08 - d09;
+    float d17 = d10 - d11;
+
+    float d18 = d12 * kMultipliers_N4_c1;
+    float d19 = d13 * kMultipliers_N4_c2;
+    float d20 = d14 * sqrt2;
+    float d21 = d15 * sqrt2;
+
+    float d22 = d18 + d19;
+    float d23 = d18 - d19;
+
+    float d24 = d22 * sqrt2;
+
+    float d25 = d24 + d23;
+
+    float d26 = d00 + d01;
+    float d27 = d21 + d25;
+    float d28 = d20 + d16;
+    float d29 = d25 + d17;
+    float d30 = d00 - d01;
+    float d31 = d17 + d23;
+    float d32 = d26 * sqrt2;
+
+    out[0] = t00 + t01;
+    out[1] = d32 + d27;
+    out[2] = t21 + t25;
+    out[3] = d27 + d28;
+    out[4] = t20 + t16;
+    out[5] = d28 + d29;
+    out[6] = t25 + t17;
+    out[7] = d29 + d30;
+    out[8] = t00 - t01;
+    out[9] = d30 + d31;
+    out[10] = t17 + t23;
+    out[11] = d31 + d16;
+    out[12] = t16;
+    out[13] = d16 + d23;
+    out[14] = t23;
+    out[15] = d23;
+}
+
+void hls_dct16_block(float in[256], float out[256]) {
+#pragma HLS INLINE off
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+
+    float kMultipliers_N16_0 = 0.5024192861881557;
+    float kMultipliers_N16_1 = 0.5224986149396889;
+    float kMultipliers_N16_2 = 0.5669440348163577;
+    float kMultipliers_N16_3 = 0.6468217833599901;
+    float kMultipliers_N16_4 = 0.7881546234512502;
+    float kMultipliers_N16_5 = 1.060677685990347;
+    float kMultipliers_N16_6 = 1.7224470982383342;
+    float kMultipliers_N16_7 = 5.101148618689155;
+    float sqrt2 = 1.4142135623730951f;
+
+    for (int i = 0; i < 16; i++) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS LOOP_FLATTEN off
+#pragma HLS pipeline II = 11
+        float tmp16_0 = in[16 * i + 0] + in[16 * i + 15];
+        float tmp16_1 = in[16 * i + 1] + in[16 * i + 14];
+        float tmp16_2 = in[16 * i + 2] + in[16 * i + 13];
+        float tmp16_3 = in[16 * i + 3] + in[16 * i + 12];
+        float tmp16_4 = in[16 * i + 4] + in[16 * i + 11];
+        float tmp16_5 = in[16 * i + 5] + in[16 * i + 10];
+        float tmp16_6 = in[16 * i + 6] + in[16 * i + 9];
+        float tmp16_7 = in[16 * i + 7] + in[16 * i + 8];
+        float tmp16_8 = in[16 * i + 0] - in[16 * i + 15];
+        float tmp16_9 = in[16 * i + 1] - in[16 * i + 14];
+        float tmp16_10 = in[16 * i + 2] - in[16 * i + 13];
+        float tmp16_11 = in[16 * i + 3] - in[16 * i + 12];
+        float tmp16_12 = in[16 * i + 4] - in[16 * i + 11];
+        float tmp16_13 = in[16 * i + 5] - in[16 * i + 10];
+        float tmp16_14 = in[16 * i + 6] - in[16 * i + 9];
+        float tmp16_15 = in[16 * i + 7] - in[16 * i + 8];
+
+        float tmp8_0 = tmp16_0 + tmp16_7;
+        float tmp8_1 = tmp16_1 + tmp16_6;
+        float tmp8_2 = tmp16_2 + tmp16_5;
+        float tmp8_3 = tmp16_3 + tmp16_4;
+        float tmp8_4 = tmp16_0 - tmp16_7;
+        float tmp8_5 = tmp16_1 - tmp16_6;
+        float tmp8_6 = tmp16_2 - tmp16_5;
+        float tmp8_7 = tmp16_3 - tmp16_4;
+
+        float t00 = tmp8_0 + tmp8_3;
+        float t01 = tmp8_1 + tmp8_2;
+        float t02 = tmp8_0 - tmp8_3;
+        float t03 = tmp8_1 - tmp8_2;
+        float t04 = tmp8_4 * kMultipliers_N8_c1;
+        float t05 = tmp8_5 * kMultipliers_N8_c2;
+        float t06 = tmp8_6 * kMultipliers_N8_c3;
+        float t07 = tmp8_7 * kMultipliers_N8_c4;
+
+        float t08 = t02 * kMultipliers_N4_c1;
+        float t09 = t03 * kMultipliers_N4_c2;
+        float t10 = t04 + t07;
+        float t11 = t05 + t06;
+        float t12 = t04 - t07;
+        float t13 = t05 - t06;
+
+        float t14 = t08 + t09;
+        float t15 = t10 + t11;
+        float t16 = t08 - t09;
+        float t17 = t10 - t11;
+
+        float t18 = t12 * kMultipliers_N4_c1;
+        float t19 = t13 * kMultipliers_N4_c2;
+        float t20 = t14 * sqrt2;
+        float t21 = t15 * sqrt2;
+
+        float t22 = t18 + t19;
+        float t23 = t18 - t19;
+
+        float t24 = t22 * sqrt2;
+
+        float t25 = t24 + t23;
+
+        float t26 = kMultipliers_N16_0 * tmp16_8;
+        float t27 = kMultipliers_N16_1 * tmp16_9;
+        float t28 = kMultipliers_N16_2 * tmp16_10;
+        float t29 = kMultipliers_N16_3 * tmp16_11;
+        float t30 = kMultipliers_N16_4 * tmp16_12;
+        float t31 = kMultipliers_N16_5 * tmp16_13;
+        float t32 = kMultipliers_N16_6 * tmp16_14;
+        float t33 = kMultipliers_N16_7 * tmp16_15;
+
+        float dmp8_0 = t26 + t33;
+        float dmp8_1 = t27 + t32;
+        float dmp8_2 = t28 + t31;
+        float dmp8_3 = t29 + t30;
+        float dmp8_4 = t26 - t33;
+        float dmp8_5 = t27 - t32;
+        float dmp8_6 = t28 - t31;
+        float dmp8_7 = t29 - t30;
+
+        float d00 = dmp8_0 + dmp8_3;
+        float d01 = dmp8_1 + dmp8_2;
+        float d02 = dmp8_0 - dmp8_3;
+        float d03 = dmp8_1 - dmp8_2;
+        float d04 = dmp8_4 * kMultipliers_N8_c1;
+        float d05 = dmp8_5 * kMultipliers_N8_c2;
+        float d06 = dmp8_6 * kMultipliers_N8_c3;
+        float d07 = dmp8_7 * kMultipliers_N8_c4;
+
+        float d08 = d02 * kMultipliers_N4_c1;
+        float d09 = d03 * kMultipliers_N4_c2;
+        float d10 = d04 + d07;
+        float d11 = d05 + d06;
+        float d12 = d04 - d07;
+        float d13 = d05 - d06;
+
+        float d14 = d08 + d09;
+        float d15 = d10 + d11;
+        float d16 = d08 - d09;
+        float d17 = d10 - d11;
+
+        float d18 = d12 * kMultipliers_N4_c1;
+        float d19 = d13 * kMultipliers_N4_c2;
+        float d20 = d14 * sqrt2;
+        float d21 = d15 * sqrt2;
+
+        float d22 = d18 + d19;
+        float d23 = d18 - d19;
+
+        float d24 = d22 * sqrt2;
+
+        float d25 = d24 + d23;
+
+        float d26 = d00 + d01;
+        float d27 = d21 + d25;
+        float d28 = d20 + d16;
+        float d29 = d25 + d17;
+        float d30 = d00 - d01;
+        float d31 = d17 + d23;
+        float d32 = d26 * sqrt2;
+
+        out[16 * i + 0] = t00 + t01;
+        out[16 * i + 1] = d32 + d27;
+        out[16 * i + 2] = t21 + t25;
+        out[16 * i + 3] = d27 + d28;
+        out[16 * i + 4] = t20 + t16;
+        out[16 * i + 5] = d28 + d29;
+        out[16 * i + 6] = t25 + t17;
+        out[16 * i + 7] = d29 + d30;
+        out[16 * i + 8] = t00 - t01;
+        out[16 * i + 9] = d30 + d31;
+        out[16 * i + 10] = t17 + t23;
+        out[16 * i + 11] = d31 + d16;
+        out[16 * i + 12] = t16;
+        out[16 * i + 13] = d16 + d23;
+        out[16 * i + 14] = t23;
+        out[16 * i + 15] = d23;
+    }
+}
+
+void hls_ReinterpretingIDCT16(float input[4], float output[4]) {
+#pragma HLS INLINE off
+    float resample = 0.901764214038848876953125;
+
+    float t0 = input[0];
+    float t1 = input[1] * resample;
+    float t2 = input[2] * resample;
+    float t3 = input[3] * resample * resample;
+
+    float t4 = t0 + t2;
+    float t5 = t1 + t3;
+    float t6 = t0 - t2;
+    float t7 = t1 - t3;
+
+    float t8 = t4;
+    float t9 = t6;
+    float t10 = t5;
+    float t11 = t7;
+
+    output[0] = t8 + t10;
+    output[2] = t8 - t10;
+    output[1] = t9 + t11;
+    output[3] = t9 - t11;
+}
+
+// dct 16x16
+void load_dct16(float in[256], hls::stream<float>& opsin16x16_stream) {
+#pragma HLS INLINE off
+    for (int y8 = 0; y8 < 2; y8++) {
+        for (int x8 = 0; x8 < 2; x8++) {
+            for (int m = 0; m < 8; m++) {
+                for (int n = 0; n < 8; n++) {
+#pragma HLS PIPELINE II = 1
+                    int addr = y8 * 16 * 8 + x8 * 8 + m * 16 + n;
+                    in[addr] = opsin16x16_stream.read();
+                }
+            }
+        }
+    }
+}
+
+void transposeDct16(float in[256], float out[256]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 16; i++) {
+        for (int j = 0; j < 16; j++) {
+#pragma HLS PIPELINE II = 1
+            out[j * 16 + i] = in[i * 16 + j];
+        }
+    }
+}
+
+void transposeDct16_scale(float in[256], float out[256]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 16; i++) {
+        for (int j = 0; j < 16; j++) {
+#pragma HLS PIPELINE II = 1
+            float mul = 1.0f / 16.0f;
+            out[j * 16 + i] = mul * in[i * 16 + j];
+        }
+    }
+}
+
+void dct16_ac_writeout(float to_ac[256],
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<float>& ac_coef16x16_stream,
+                       uint32_t x16,
+                       uint32_t y16) {
+#pragma HLS INLINE off
+    uint8_t rect_xsize;
+    uint8_t rect_ysize;
+    for (int m = 0; m < 256; m++) {
+#pragma HLS PIPELINE II = 1
+        if (m == 0) {
+            rect_xsize = stream_rectx.read();
+            rect_ysize = stream_recty.read();
+        }
+        if ((2 * x16 + 1) < rect_xsize && (2 * y16 + 1) < rect_ysize) {
+            ac_coef16x16_stream.write(to_ac[m]);
+        }
+    }
+}
+
+void dct16_dc_writeout(float to_dc[4],
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<float>& dc_coef16x16_stream,
+                       uint32_t x16,
+                       uint32_t y16) {
+#pragma HLS INLINE off
+    uint8_t rect_xsize;
+    uint8_t rect_ysize;
+    for (int m = 0; m < 4; m++) {
+#pragma HLS PIPELINE II = 1
+        if (m == 0) {
+            rect_xsize = stream_rectx.read();
+            rect_ysize = stream_recty.read();
+        }
+        if ((2 * x16 + 1) < rect_xsize && (2 * y16 + 1) < rect_ysize) {
+            dc_coef16x16_stream.write(to_dc[m]);
+        }
+    }
+}
+
+void dct16_ac_dc_split(float in[256], float ac_out1[256], float dc_out[4]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 256; i++) {
+#pragma HLS PIPELINE II = 1
+        ac_out1[i] = in[i];
+        if (i == 0)
+            dc_out[0] = in[i];
+        else if (i == 1)
+            dc_out[1] = in[i];
+        else if (i == 16)
+            dc_out[2] = in[i];
+        else if (i == 17)
+            dc_out[3] = in[i];
+    }
+}
+
+void hls_dct16x16_module(unsigned ysize,
+                         unsigned xsize,
+                         hls::stream<uint8_t>& stream_recty16,
+                         hls::stream<uint8_t>& stream_rectx16,
+                         hls::stream<uint8_t>& stream_recty16_1,
+                         hls::stream<uint8_t>& stream_rectx16_1,
+                         hls::stream<float>& opsin16x16_stream,
+                         hls::stream<float>& ac_coef16x16_stream,
+                         hls::stream<float>& dc_coef16x16_stream) {
+#pragma HLS INLINE off
+
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+
+    uint32_t ysize64 = tile_ysize / 64;
+    uint32_t xsize64 = tile_xsize / 64;
+    uint32_t ysize16 = tile_ysize / 16;
+    uint32_t xsize16 = tile_xsize / 16;
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+
+    for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+        for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+            int tx1 = x64;
+            int ty1 = y64;
+            int by = ty1 * 8;
+            int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+            int bx = tx1 * 8;
+            int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+            int rect_ysize = by1 - by;
+            int rect_xsize = bx1 - bx;
+            for (uint32_t y16 = 0; y16 < 4; y16++) {
+                for (uint32_t x16 = 0; x16 < 4; x16++) {
+                    for (int c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+#pragma HLS DATAFLOW
+                        float from[256];
+#pragma HLS bind_storage variable = from type = ram_2p impl = bram
+                        float temp0[256];
+#pragma HLS bind_storage variable = temp0 type = ram_2p impl = bram
+                        float temp1[256];
+#pragma HLS bind_storage variable = temp1 type = ram_2p impl = bram
+                        float temp2[256];
+#pragma HLS bind_storage variable = temp2 type = ram_2p impl = bram
+                        float temp3[256];
+#pragma HLS bind_storage variable = temp3 type = ram_2p impl = bram
+                        float temp4[256];
+#pragma HLS bind_storage variable = temp4 type = ram_2p impl = bram
+                        float to_ac[256];
+#pragma HLS bind_storage variable = to_ac type = ram_2p impl = bram
+                        float to_dc[4];
+#pragma HLS bind_storage variable = to_dc type = ram_2p impl = bram
+                        float dc_mem[4];
+#pragma HLS bind_storage variable = dc_mem type = ram_2p impl = bram
+
+                    dct16_test_load:
+                        for (int m = 0; m < 16; m++) {
+                            for (int n = 0; n < 16; n++) {
+#pragma HLS PIPELINE II = 1
+                                int addr = 16 * m + n;
+                                from[addr] = opsin16x16_stream.read();
+                            }
+                        }
+
+                        transposeDct16(from, temp0);
+                        hls_dct16_block(temp0, temp1);
+                        transposeDct16_scale(temp1, temp2);
+                        hls_dct16_block(temp2, temp3);
+                        transposeDct16_scale(temp3, temp4);
+                        dct16_ac_dc_split(temp4, to_ac, to_dc);
+                        // output ac_coeff_stream
+                        dct16_ac_writeout(to_ac, stream_recty16, stream_rectx16, ac_coef16x16_stream, x16, y16);
+                        // output dc_coeff_stream
+                        hls_ReinterpretingIDCT16(to_dc, dc_mem);
+                        dct16_dc_writeout(dc_mem, stream_recty16_1, stream_rectx16_1, dc_coef16x16_stream, x16, y16);
+                    }
+                }
+            }
+        }
+    }
+}
+
+// template <bool transpose_scale>
+void hls_DCT1DImpl_32(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    float kMultipliers_N32_0 = 0.5006029982351963;
+    float kMultipliers_N32_1 = 0.5054709598975436;
+    float kMultipliers_N32_2 = 0.5154473099226246;
+    float kMultipliers_N32_3 = 0.531042591089784;
+    float kMultipliers_N32_4 = 0.553103896034444;
+    float kMultipliers_N32_5 = 0.5829349682061339;
+    float kMultipliers_N32_6 = 0.622504123035664;
+    float kMultipliers_N32_7 = 0.674808341455005;
+    float kMultipliers_N32_8 = 0.7445362710022986;
+    float kMultipliers_N32_9 = 0.839349645415526;
+    float kMultipliers_N32_10 = 0.9725682378619608;
+    float kMultipliers_N32_11 = 1.169439933432884;
+    float kMultipliers_N32_12 = 1.4841646163141662;
+    float kMultipliers_N32_13 = 2.057781009953411;
+    float kMultipliers_N32_14 = 3.407608418468719;
+    float kMultipliers_N32_15 = 10.19000812354803;
+
+    float sqrt2 = 1.4142135623730951f;
+
+    float kMultipliers_N8_c1 = 0.5097955791041592;
+
+    float kMultipliers_N8_c2 = 0.6013448869350453;
+    float kMultipliers_N8_c3 = 0.8999762231364156;
+    float kMultipliers_N8_c4 = 2.5629154477415055;
+    float kMultipliers_N4_c1 = 0.541196100146197;
+    float kMultipliers_N4_c2 = 1.3065629648763764;
+
+    float kMultipliers_N16_0 = 0.5024192861881557;
+    float kMultipliers_N16_1 = 0.5224986149396889;
+    float kMultipliers_N16_2 = 0.5669440348163577;
+    float kMultipliers_N16_3 = 0.6468217833599901;
+    float kMultipliers_N16_4 = 0.7881546234512502;
+    float kMultipliers_N16_5 = 1.060677685990347;
+    float kMultipliers_N16_6 = 1.7224470982383342;
+    float kMultipliers_N16_7 = 5.101148618689155;
+
+    for (int i = 0; i < 32; i++) {
+#pragma HLS PIPELINE II = 30
+        float tmp32_b16_0 = in[0 + 32 * i] + in[31 + 32 * i];
+        float tmp32_b16_1 = in[1 + 32 * i] + in[30 + 32 * i];
+        float tmp32_b16_2 = in[2 + 32 * i] + in[29 + 32 * i];
+        float tmp32_b16_3 = in[3 + 32 * i] + in[28 + 32 * i];
+        float tmp32_b16_4 = in[4 + 32 * i] + in[27 + 32 * i];
+        float tmp32_b16_5 = in[5 + 32 * i] + in[26 + 32 * i];
+        float tmp32_b16_6 = in[6 + 32 * i] + in[25 + 32 * i];
+        float tmp32_b16_7 = in[7 + 32 * i] + in[24 + 32 * i];
+        float tmp32_b16_8 = in[8 + 32 * i] + in[23 + 32 * i];
+        float tmp32_b16_9 = in[9 + 32 * i] + in[22 + 32 * i];
+        float tmp32_b16_10 = in[10 + 32 * i] + in[21 + 32 * i];
+        float tmp32_b16_11 = in[11 + 32 * i] + in[20 + 32 * i];
+        float tmp32_b16_12 = in[12 + 32 * i] + in[19 + 32 * i];
+        float tmp32_b16_13 = in[13 + 32 * i] + in[18 + 32 * i];
+        float tmp32_b16_14 = in[14 + 32 * i] + in[17 + 32 * i];
+        float tmp32_b16_15 = in[15 + 32 * i] + in[16 + 32 * i];
+
+        float tmp16_0_b16 = tmp32_b16_0 + tmp32_b16_15;
+        float tmp16_1_b16 = tmp32_b16_1 + tmp32_b16_14;
+        float tmp16_2_b16 = tmp32_b16_2 + tmp32_b16_13;
+        float tmp16_3_b16 = tmp32_b16_3 + tmp32_b16_12;
+        float tmp16_4_b16 = tmp32_b16_4 + tmp32_b16_11;
+        float tmp16_5_b16 = tmp32_b16_5 + tmp32_b16_10;
+        float tmp16_6_b16 = tmp32_b16_6 + tmp32_b16_9;
+        float tmp16_7_b16 = tmp32_b16_7 + tmp32_b16_8;
+        float tmp16_8_b16 = tmp32_b16_0 - tmp32_b16_15;
+        float tmp16_9_b16 = tmp32_b16_1 - tmp32_b16_14;
+        float tmp16_10_b16 = tmp32_b16_2 - tmp32_b16_13;
+        float tmp16_11_b16 = tmp32_b16_3 - tmp32_b16_12;
+        float tmp16_12_b16 = tmp32_b16_4 - tmp32_b16_11;
+        float tmp16_13_b16 = tmp32_b16_5 - tmp32_b16_10;
+        float tmp16_14_b16 = tmp32_b16_6 - tmp32_b16_9;
+        float tmp16_15_b16 = tmp32_b16_7 - tmp32_b16_8;
+
+        float tmp8_0_b16 = tmp16_0_b16 + tmp16_7_b16;
+        float tmp8_1_b16 = tmp16_1_b16 + tmp16_6_b16;
+        float tmp8_2_b16 = tmp16_2_b16 + tmp16_5_b16;
+        float tmp8_3_b16 = tmp16_3_b16 + tmp16_4_b16;
+        float tmp8_4_b16 = tmp16_0_b16 - tmp16_7_b16;
+        float tmp8_5_b16 = tmp16_1_b16 - tmp16_6_b16;
+        float tmp8_6_b16 = tmp16_2_b16 - tmp16_5_b16;
+        float tmp8_7_b16 = tmp16_3_b16 - tmp16_4_b16;
+
+        float t00_b16 = tmp8_0_b16 + tmp8_3_b16;
+        float t01_b16 = tmp8_1_b16 + tmp8_2_b16;
+        float t02_b16 = tmp8_0_b16 - tmp8_3_b16;
+        float t03_b16 = tmp8_1_b16 - tmp8_2_b16;
+        float t04_b16 = tmp8_4_b16 * kMultipliers_N8_c1;
+        float t05_b16 = tmp8_5_b16 * kMultipliers_N8_c2;
+        float t06_b16 = tmp8_6_b16 * kMultipliers_N8_c3;
+        float t07_b16 = tmp8_7_b16 * kMultipliers_N8_c4;
+
+        float t08_b16 = t02_b16 * kMultipliers_N4_c1;
+        float t09_b16 = t03_b16 * kMultipliers_N4_c2;
+        float t10_b16 = t04_b16 + t07_b16;
+        float t11_b16 = t05_b16 + t06_b16;
+        float t12_b16 = t04_b16 - t07_b16;
+        float t13_b16 = t05_b16 - t06_b16;
+
+        float t14_b16 = t08_b16 + t09_b16;
+        float t15_b16 = t10_b16 + t11_b16;
+        float t16_b16 = t08_b16 - t09_b16;
+        float t17_b16 = t10_b16 - t11_b16;
+
+        float t18_b16 = t12_b16 * kMultipliers_N4_c1;
+        float t19_b16 = t13_b16 * kMultipliers_N4_c2;
+        float t20_b16 = t14_b16 * sqrt2;
+        float t21_b16 = t15_b16 * sqrt2;
+
+        float t22_b16 = t18_b16 + t19_b16;
+        float t23_b16 = t18_b16 - t19_b16;
+
+        float t24_b16 = t22_b16 * sqrt2;
+
+        float t25_b16 = t24_b16 + t23_b16;
+
+        float t26_b16 = kMultipliers_N16_0 * tmp16_8_b16;
+        float t27_b16 = kMultipliers_N16_1 * tmp16_9_b16;
+        float t28_b16 = kMultipliers_N16_2 * tmp16_10_b16;
+        float t29_b16 = kMultipliers_N16_3 * tmp16_11_b16;
+        float t30_b16 = kMultipliers_N16_4 * tmp16_12_b16;
+        float t31_b16 = kMultipliers_N16_5 * tmp16_13_b16;
+        float t32_b16 = kMultipliers_N16_6 * tmp16_14_b16;
+        float t33_b16 = kMultipliers_N16_7 * tmp16_15_b16;
+
+        float dmp8_0_b16 = t26_b16 + t33_b16;
+        float dmp8_1_b16 = t27_b16 + t32_b16;
+        float dmp8_2_b16 = t28_b16 + t31_b16;
+        float dmp8_3_b16 = t29_b16 + t30_b16;
+        float dmp8_4_b16 = t26_b16 - t33_b16;
+        float dmp8_5_b16 = t27_b16 - t32_b16;
+        float dmp8_6_b16 = t28_b16 - t31_b16;
+        float dmp8_7_b16 = t29_b16 - t30_b16;
+
+        float d00_b16 = dmp8_0_b16 + dmp8_3_b16;
+        float d01_b16 = dmp8_1_b16 + dmp8_2_b16;
+        float d02_b16 = dmp8_0_b16 - dmp8_3_b16;
+        float d03_b16 = dmp8_1_b16 - dmp8_2_b16;
+        float d04_b16 = dmp8_4_b16 * kMultipliers_N8_c1;
+        float d05_b16 = dmp8_5_b16 * kMultipliers_N8_c2;
+        float d06_b16 = dmp8_6_b16 * kMultipliers_N8_c3;
+        float d07_b16 = dmp8_7_b16 * kMultipliers_N8_c4;
+
+        float d08_b16 = d02_b16 * kMultipliers_N4_c1;
+        float d09_b16 = d03_b16 * kMultipliers_N4_c2;
+        float d10_b16 = d04_b16 + d07_b16;
+        float d11_b16 = d05_b16 + d06_b16;
+        float d12_b16 = d04_b16 - d07_b16;
+        float d13_b16 = d05_b16 - d06_b16;
+
+        float d14_b16 = d08_b16 + d09_b16;
+        float d15_b16 = d10_b16 + d11_b16;
+        float d16_b16 = d08_b16 - d09_b16;
+        float d17_b16 = d10_b16 - d11_b16;
+
+        float d18_b16 = d12_b16 * kMultipliers_N4_c1;
+        float d19_b16 = d13_b16 * kMultipliers_N4_c2;
+        float d20_b16 = d14_b16 * sqrt2;
+        float d21_b16 = d15_b16 * sqrt2;
+
+        float d22_b16 = d18_b16 + d19_b16;
+        float d23_b16 = d18_b16 - d19_b16;
+
+        float d24_b16 = d22_b16 * sqrt2;
+
+        float d25_b16 = d24_b16 + d23_b16;
+
+        float d26_b16 = d00_b16 + d01_b16;
+        float d27_b16 = d21_b16 + d25_b16;
+        float d28_b16 = d20_b16 + d16_b16;
+        float d29_b16 = d25_b16 + d17_b16;
+        float d30_b16 = d00_b16 - d01_b16;
+        float d31_b16 = d17_b16 + d23_b16;
+        float d32_b16 = d26_b16 * sqrt2;
+
+        float tmp32_b16_out1_0 = t00_b16 + t01_b16;
+        float tmp32_b16_out1_1 = d32_b16 + d27_b16;
+        float tmp32_b16_out1_2 = t21_b16 + t25_b16;
+        float tmp32_b16_out1_3 = d27_b16 + d28_b16;
+        float tmp32_b16_out1_4 = t20_b16 + t16_b16;
+        float tmp32_b16_out1_5 = d28_b16 + d29_b16;
+        float tmp32_b16_out1_6 = t25_b16 + t17_b16;
+        float tmp32_b16_out1_7 = d29_b16 + d30_b16;
+        float tmp32_b16_out1_8 = t00_b16 - t01_b16;
+        float tmp32_b16_out1_9 = d30_b16 + d31_b16;
+        float tmp32_b16_out1_10 = t17_b16 + t23_b16;
+        float tmp32_b16_out1_11 = d31_b16 + d16_b16;
+        float tmp32_b16_out1_12 = t16_b16;
+        float tmp32_b16_out1_13 = d16_b16 + d23_b16;
+        float tmp32_b16_out1_14 = t23_b16;
+        float tmp32_b16_out1_15 = d23_b16;
+
+        float tmp32_b32_add_sub_16 = in[0 + 32 * i] - in[31 + 32 * i];
+        float tmp32_b32_add_sub_17 = in[1 + 32 * i] - in[30 + 32 * i];
+        float tmp32_b32_add_sub_18 = in[2 + 32 * i] - in[29 + 32 * i];
+        float tmp32_b32_add_sub_19 = in[3 + 32 * i] - in[28 + 32 * i];
+        float tmp32_b32_add_sub_20 = in[4 + 32 * i] - in[27 + 32 * i];
+        float tmp32_b32_add_sub_21 = in[5 + 32 * i] - in[26 + 32 * i];
+        float tmp32_b32_add_sub_22 = in[6 + 32 * i] - in[25 + 32 * i];
+        float tmp32_b32_add_sub_23 = in[7 + 32 * i] - in[24 + 32 * i];
+        float tmp32_b32_add_sub_24 = in[8 + 32 * i] - in[23 + 32 * i];
+        float tmp32_b32_add_sub_25 = in[9 + 32 * i] - in[22 + 32 * i];
+        float tmp32_b32_add_sub_26 = in[10 + 32 * i] - in[21 + 32 * i];
+        float tmp32_b32_add_sub_27 = in[11 + 32 * i] - in[20 + 32 * i];
+        float tmp32_b32_add_sub_28 = in[12 + 32 * i] - in[19 + 32 * i];
+        float tmp32_b32_add_sub_29 = in[13 + 32 * i] - in[18 + 32 * i];
+        float tmp32_b32_add_sub_30 = in[14 + 32 * i] - in[17 + 32 * i];
+        float tmp32_b32_add_sub_31 = in[15 + 32 * i] - in[16 + 32 * i];
+
+        float tmp32_b32_mul_16 = tmp32_b32_add_sub_16 * kMultipliers_N32_0;
+        float tmp32_b32_mul_17 = tmp32_b32_add_sub_17 * kMultipliers_N32_1;
+        float tmp32_b32_mul_18 = tmp32_b32_add_sub_18 * kMultipliers_N32_2;
+        float tmp32_b32_mul_19 = tmp32_b32_add_sub_19 * kMultipliers_N32_3;
+        float tmp32_b32_mul_20 = tmp32_b32_add_sub_20 * kMultipliers_N32_4;
+        float tmp32_b32_mul_21 = tmp32_b32_add_sub_21 * kMultipliers_N32_5;
+        float tmp32_b32_mul_22 = tmp32_b32_add_sub_22 * kMultipliers_N32_6;
+        float tmp32_b32_mul_23 = tmp32_b32_add_sub_23 * kMultipliers_N32_7;
+        float tmp32_b32_mul_24 = tmp32_b32_add_sub_24 * kMultipliers_N32_8;
+        float tmp32_b32_mul_25 = tmp32_b32_add_sub_25 * kMultipliers_N32_9;
+        float tmp32_b32_mul_26 = tmp32_b32_add_sub_26 * kMultipliers_N32_10;
+        float tmp32_b32_mul_27 = tmp32_b32_add_sub_27 * kMultipliers_N32_11;
+        float tmp32_b32_mul_28 = tmp32_b32_add_sub_28 * kMultipliers_N32_12;
+        float tmp32_b32_mul_29 = tmp32_b32_add_sub_29 * kMultipliers_N32_13;
+        float tmp32_b32_mul_30 = tmp32_b32_add_sub_30 * kMultipliers_N32_14;
+        float tmp32_b32_mul_31 = tmp32_b32_add_sub_31 * kMultipliers_N32_15;
+
+        float tmp16_0_b32 = tmp32_b32_mul_16 + tmp32_b32_mul_31;
+        float tmp16_1_b32 = tmp32_b32_mul_17 + tmp32_b32_mul_30;
+        float tmp16_2_b32 = tmp32_b32_mul_18 + tmp32_b32_mul_29;
+        float tmp16_3_b32 = tmp32_b32_mul_19 + tmp32_b32_mul_28;
+        float tmp16_4_b32 = tmp32_b32_mul_20 + tmp32_b32_mul_27;
+        float tmp16_5_b32 = tmp32_b32_mul_21 + tmp32_b32_mul_26;
+        float tmp16_6_b32 = tmp32_b32_mul_22 + tmp32_b32_mul_25;
+        float tmp16_7_b32 = tmp32_b32_mul_23 + tmp32_b32_mul_24;
+        float tmp16_8_b32 = tmp32_b32_mul_16 - tmp32_b32_mul_31;
+        float tmp16_9_b32 = tmp32_b32_mul_17 - tmp32_b32_mul_30;
+        float tmp16_10_b32 = tmp32_b32_mul_18 - tmp32_b32_mul_29;
+        float tmp16_11_b32 = tmp32_b32_mul_19 - tmp32_b32_mul_28;
+        float tmp16_12_b32 = tmp32_b32_mul_20 - tmp32_b32_mul_27;
+        float tmp16_13_b32 = tmp32_b32_mul_21 - tmp32_b32_mul_26;
+        float tmp16_14_b32 = tmp32_b32_mul_22 - tmp32_b32_mul_25;
+        float tmp16_15_b32 = tmp32_b32_mul_23 - tmp32_b32_mul_24;
+
+        float tmp8_0_b32 = tmp16_0_b32 + tmp16_7_b32;
+        float tmp8_1_b32 = tmp16_1_b32 + tmp16_6_b32;
+        float tmp8_2_b32 = tmp16_2_b32 + tmp16_5_b32;
+        float tmp8_3_b32 = tmp16_3_b32 + tmp16_4_b32;
+        float tmp8_4_b32 = tmp16_0_b32 - tmp16_7_b32;
+        float tmp8_5_b32 = tmp16_1_b32 - tmp16_6_b32;
+        float tmp8_6_b32 = tmp16_2_b32 - tmp16_5_b32;
+        float tmp8_7_b32 = tmp16_3_b32 - tmp16_4_b32;
+
+        float t00_b32 = tmp8_0_b32 + tmp8_3_b32;
+        float t01_b32 = tmp8_1_b32 + tmp8_2_b32;
+        float t02_b32 = tmp8_0_b32 - tmp8_3_b32;
+        float t03_b32 = tmp8_1_b32 - tmp8_2_b32;
+        float t04_b32 = tmp8_4_b32 * kMultipliers_N8_c1;
+        float t05_b32 = tmp8_5_b32 * kMultipliers_N8_c2;
+        float t06_b32 = tmp8_6_b32 * kMultipliers_N8_c3;
+        float t07_b32 = tmp8_7_b32 * kMultipliers_N8_c4;
+
+        float t08_b32 = t02_b32 * kMultipliers_N4_c1;
+        float t09_b32 = t03_b32 * kMultipliers_N4_c2;
+        float t10_b32 = t04_b32 + t07_b32;
+        float t11_b32 = t05_b32 + t06_b32;
+        float t12_b32 = t04_b32 - t07_b32;
+        float t13_b32 = t05_b32 - t06_b32;
+
+        float t14_b32 = t08_b32 + t09_b32;
+        float t15_b32 = t10_b32 + t11_b32;
+        float t16_b32 = t08_b32 - t09_b32;
+        float t17_b32 = t10_b32 - t11_b32;
+
+        float t18_b32 = t12_b32 * kMultipliers_N4_c1;
+        float t19_b32 = t13_b32 * kMultipliers_N4_c2;
+        float t20_b32 = t14_b32 * sqrt2;
+        float t21_b32 = t15_b32 * sqrt2;
+
+        float t22_b32 = t18_b32 + t19_b32;
+        float t23_b32 = t18_b32 - t19_b32;
+
+        float t24_b32 = t22_b32 * sqrt2;
+
+        float t25_b32 = t24_b32 + t23_b32;
+
+        float t26_b32 = kMultipliers_N16_0 * tmp16_8_b32;
+        float t27_b32 = kMultipliers_N16_1 * tmp16_9_b32;
+        float t28_b32 = kMultipliers_N16_2 * tmp16_10_b32;
+        float t29_b32 = kMultipliers_N16_3 * tmp16_11_b32;
+        float t30_b32 = kMultipliers_N16_4 * tmp16_12_b32;
+        float t31_b32 = kMultipliers_N16_5 * tmp16_13_b32;
+        float t32_b32 = kMultipliers_N16_6 * tmp16_14_b32;
+        float t33_b32 = kMultipliers_N16_7 * tmp16_15_b32;
+
+        float dmp8_0_b32 = t26_b32 + t33_b32;
+        float dmp8_1_b32 = t27_b32 + t32_b32;
+        float dmp8_2_b32 = t28_b32 + t31_b32;
+        float dmp8_3_b32 = t29_b32 + t30_b32;
+        float dmp8_4_b32 = t26_b32 - t33_b32;
+        float dmp8_5_b32 = t27_b32 - t32_b32;
+        float dmp8_6_b32 = t28_b32 - t31_b32;
+        float dmp8_7_b32 = t29_b32 - t30_b32;
+
+        float d00_b32 = dmp8_0_b32 + dmp8_3_b32;
+        float d01_b32 = dmp8_1_b32 + dmp8_2_b32;
+        float d02_b32 = dmp8_0_b32 - dmp8_3_b32;
+        float d03_b32 = dmp8_1_b32 - dmp8_2_b32;
+        float d04_b32 = dmp8_4_b32 * kMultipliers_N8_c1;
+        float d05_b32 = dmp8_5_b32 * kMultipliers_N8_c2;
+        float d06_b32 = dmp8_6_b32 * kMultipliers_N8_c3;
+        float d07_b32 = dmp8_7_b32 * kMultipliers_N8_c4;
+
+        float d08_b32 = d02_b32 * kMultipliers_N4_c1;
+        float d09_b32 = d03_b32 * kMultipliers_N4_c2;
+        float d10_b32 = d04_b32 + d07_b32;
+        float d11_b32 = d05_b32 + d06_b32;
+        float d12_b32 = d04_b32 - d07_b32;
+        float d13_b32 = d05_b32 - d06_b32;
+
+        float d14_b32 = d08_b32 + d09_b32;
+        float d15_b32 = d10_b32 + d11_b32;
+        float d16_b32 = d08_b32 - d09_b32;
+        float d17_b32 = d10_b32 - d11_b32;
+
+        float d18_b32 = d12_b32 * kMultipliers_N4_c1;
+        float d19_b32 = d13_b32 * kMultipliers_N4_c2;
+        float d20_b32 = d14_b32 * sqrt2;
+        float d21_b32 = d15_b32 * sqrt2;
+
+        float d22_b32 = d18_b32 + d19_b32;
+        float d23_b32 = d18_b32 - d19_b32;
+
+        float d24_b32 = d22_b32 * sqrt2;
+
+        float d25_b32 = d24_b32 + d23_b32;
+
+        float d26_b32 = d00_b32 + d01_b32;
+        float d27_b32 = d21_b32 + d25_b32;
+        float d28_b32 = d20_b32 + d16_b32;
+        float d29_b32 = d25_b32 + d17_b32;
+        float d30_b32 = d00_b32 - d01_b32;
+        float d31_b32 = d17_b32 + d23_b32;
+        float d32_b32 = d26_b32 * sqrt2;
+
+        float tmp32_b32_add_out2_16 = t00_b32 + t01_b32;
+        float tmp32_b32_add_out2_17 = d32_b32 + d27_b32;
+        float tmp32_b32_add_out2_18 = t21_b32 + t25_b32;
+        float tmp32_b32_add_out2_19 = d27_b32 + d28_b32;
+        float tmp32_b32_add_out2_20 = t20_b32 + t16_b32;
+        float tmp32_b32_add_out2_21 = d28_b32 + d29_b32;
+        float tmp32_b32_add_out2_22 = t25_b32 + t17_b32;
+        float tmp32_b32_add_out2_23 = d29_b32 + d30_b32;
+        float tmp32_b32_add_out2_24 = t00_b32 - t01_b32;
+        float tmp32_b32_add_out2_25 = d30_b32 + d31_b32;
+        float tmp32_b32_add_out2_26 = t17_b32 + t23_b32;
+        float tmp32_b32_add_out2_27 = d31_b32 + d16_b32;
+        float tmp32_b32_add_out2_28 = t16_b32;
+        float tmp32_b32_add_out2_29 = d16_b32 + d23_b32;
+        float tmp32_b32_add_out2_30 = t23_b32;
+        float tmp32_b32_add_out2_31 = d23_b32;
+
+        float tmp32_b32_out2_16 = tmp32_b32_add_out2_16 * sqrt2 + tmp32_b32_add_out2_17;
+        float tmp32_b32_out2_17 = tmp32_b32_add_out2_17 + tmp32_b32_add_out2_18;
+        float tmp32_b32_out2_18 = tmp32_b32_add_out2_18 + tmp32_b32_add_out2_19;
+        float tmp32_b32_out2_19 = tmp32_b32_add_out2_19 + tmp32_b32_add_out2_20;
+        float tmp32_b32_out2_20 = tmp32_b32_add_out2_20 + tmp32_b32_add_out2_21;
+        float tmp32_b32_out2_21 = tmp32_b32_add_out2_21 + tmp32_b32_add_out2_22;
+        float tmp32_b32_out2_22 = tmp32_b32_add_out2_22 + tmp32_b32_add_out2_23;
+        float tmp32_b32_out2_23 = tmp32_b32_add_out2_23 + tmp32_b32_add_out2_24;
+        float tmp32_b32_out2_24 = tmp32_b32_add_out2_24 + tmp32_b32_add_out2_25;
+        float tmp32_b32_out2_25 = tmp32_b32_add_out2_25 + tmp32_b32_add_out2_26;
+        float tmp32_b32_out2_26 = tmp32_b32_add_out2_26 + tmp32_b32_add_out2_27;
+        float tmp32_b32_out2_27 = tmp32_b32_add_out2_27 + tmp32_b32_add_out2_28;
+        float tmp32_b32_out2_28 = tmp32_b32_add_out2_28 + tmp32_b32_add_out2_29;
+        float tmp32_b32_out2_29 = tmp32_b32_add_out2_29 + tmp32_b32_add_out2_30;
+        float tmp32_b32_out2_30 = tmp32_b32_add_out2_30 + tmp32_b32_add_out2_31;
+        float tmp32_b32_out2_31 = tmp32_b32_add_out2_31;
+
+        out[0 + 32 * i] = tmp32_b16_out1_0;
+        out[2 + 32 * i] = tmp32_b16_out1_1;
+        out[4 + 32 * i] = tmp32_b16_out1_2;
+        out[6 + 32 * i] = tmp32_b16_out1_3;
+        out[8 + 32 * i] = tmp32_b16_out1_4;
+        out[10 + 32 * i] = tmp32_b16_out1_5;
+        out[12 + 32 * i] = tmp32_b16_out1_6;
+        out[14 + 32 * i] = tmp32_b16_out1_7;
+        out[16 + 32 * i] = tmp32_b16_out1_8;
+        out[18 + 32 * i] = tmp32_b16_out1_9;
+        out[20 + 32 * i] = tmp32_b16_out1_10;
+        out[22 + 32 * i] = tmp32_b16_out1_11;
+        out[24 + 32 * i] = tmp32_b16_out1_12;
+        out[26 + 32 * i] = tmp32_b16_out1_13;
+        out[28 + 32 * i] = tmp32_b16_out1_14;
+        out[30 + 32 * i] = tmp32_b16_out1_15;
+
+        out[1 + 32 * i] = tmp32_b32_out2_16;
+        out[3 + 32 * i] = tmp32_b32_out2_17;
+        out[5 + 32 * i] = tmp32_b32_out2_18;
+        out[7 + 32 * i] = tmp32_b32_out2_19;
+        out[9 + 32 * i] = tmp32_b32_out2_20;
+        out[11 + 32 * i] = tmp32_b32_out2_21;
+        out[13 + 32 * i] = tmp32_b32_out2_22;
+        out[15 + 32 * i] = tmp32_b32_out2_23;
+        out[17 + 32 * i] = tmp32_b32_out2_24;
+        out[19 + 32 * i] = tmp32_b32_out2_25;
+        out[21 + 32 * i] = tmp32_b32_out2_26;
+        out[23 + 32 * i] = tmp32_b32_out2_27;
+        out[25 + 32 * i] = tmp32_b32_out2_28;
+        out[27 + 32 * i] = tmp32_b32_out2_29;
+        out[29 + 32 * i] = tmp32_b32_out2_30;
+        out[31 + 32 * i] = tmp32_b32_out2_31;
+
+        //         if (transpose_scale) {
+        //             for (int i = 0; i < 32; i++) {
+        //                 for (int j = 0; j < 32; j++) {
+        // #pragma HLS PIPELINE II = 1
+        //                     float mul = 1.0f / 32.0f;
+        //                     out[32 * j + i] = out[i * 32 + j];
+        //                 }
+        //             }
+        //         }
+    }
+}
+
+void hls_IDCT1D_32(float from[16], float to[16]) {
+#pragma HLS INLINE off
+    float IDCT_kMUltipliers_N4_0 = 0.541196100146197;
+    float IDCT_kMUltipliers_N4_1 = 1.3065629648763764;
+    float sqrt2 = 1.4142135623730951f;
+
+    for (int i = 0; i < 4; i++) {
+#pragma HLS PIPELINE II = 128
+        float* from_addr = &from[i];
+        float* to_addr = &to[i];
+
+        float tmp_IDCT_in_0 = from[i + 0];
+        float tmp_IDCT_in_1 = from[i + 8];
+        float tmp_IDCT_in_2 = from[i + 4];
+        float tmp_IDCT_in_3 = from[i + 12];
+
+        float tmp_IDCT_add_0 = tmp_IDCT_in_0 + tmp_IDCT_in_1;
+        float in1_dct = tmp_IDCT_in_2 * sqrt2;
+        float tmp_IDCT_add_1 = tmp_IDCT_in_0 - tmp_IDCT_in_1;
+        float in2_dct = tmp_IDCT_in_3 + tmp_IDCT_in_2;
+
+        float tmp_IDCT_add_2 = in1_dct + in2_dct;
+        float tmp_IDCT_add_3 = in1_dct - in2_dct;
+
+        to[i + 0] = IDCT_kMUltipliers_N4_0 * tmp_IDCT_add_2 + tmp_IDCT_add_0;
+        to[i + 4] = IDCT_kMUltipliers_N4_1 * tmp_IDCT_add_3 + tmp_IDCT_add_1;
+        to[i + 8] = tmp_IDCT_add_1 - IDCT_kMUltipliers_N4_1 * tmp_IDCT_add_3;
+        to[i + 12] = tmp_IDCT_add_0 - IDCT_kMUltipliers_N4_0 * tmp_IDCT_add_2;
+    }
+}
+
+void hls_idct32_scale_2d(float in[16], float out[16]) {
+#pragma HLS INLINE off
+
+Loop_idct32_1:
+    for (int y = 0; y < 4; y++) {
+    Loop_idct32_2:
+        for (int x = 0; x < 4; x++) {
+#pragma HLS PIPELINE
+            float resampley;
+            float resamplex;
+            if (x == 0) {
+                resamplex = 1;
+            } else if (x == 1) {
+                resamplex = 0.974886834621429443359375;
+            } else if (x == 2) {
+                resamplex = 0.901764214038848876953125;
+            } else if (x == 3) {
+                resamplex = 0.78705489635467529296875;
+            }
+            if (y == 0) {
+                resampley = 1;
+            } else if (y == 1) {
+                resampley = 0.974886834621429443359375;
+            } else if (y == 2) {
+                resampley = 0.901764214038848876953125;
+            } else if (y == 3) {
+                resampley = 0.78705489635467529296875;
+            }
+            out[y * 4 + x] = in[y * 4 + x] * resampley * resamplex;
+        }
+    }
+}
+
+void hls_idct_transpose4x4(float in[16], float out[16]) {
+#pragma HLS INLINE off
+Loop_idct_transpose:
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+#pragma HLS PIPELINE
+            out[i * 4 + j] = in[j * 4 + i];
+        }
+    }
+}
+
+// void hls_ReinterpretingIDCT32(float input[16], float output[16]) {
+// #pragma HLS INLINE
+//     float temp0[16];
+// #pragma HLS bind_storage variable = temp0 type = ram_2p impl = bram
+//     float temp1[16];
+// #pragma HLS bind_storage variable = temp1 type = ram_2p impl = bram
+//     float temp2[16];
+// #pragma HLS bind_storage variable = temp2 type = ram_2p impl = bram
+//     hls_idct32_scale_2d(input, temp0);
+//     hls_IDCT1D_32(temp0, temp1);
+//     hls_idct_transpose4x4(temp1, temp2);
+//     hls_IDCT1D_32(temp2, output);
+// }
+
+void load_dct32(hls::stream<float>& opsin32x32_stream, float from[1024]) {
+#pragma HLS INLINE off
+load_dct32:
+    for (int m = 0; m < 32; m++) {
+        for (int n = 0; n < 32; n++) {
+#pragma HLS PIPELINE II = 1
+            int addr = n * 32 + m; // m * 32 + n;
+            from[addr] = opsin32x32_stream.read();
+        }
+    }
+}
+
+void transpose_dct32(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 32; i++) {
+        for (int j = 0; j < 32; j++) {
+#pragma HLS PIPELINE II = 1
+            out[32 * j + i] = in[i * 32 + j];
+        }
+    }
+}
+
+void transpose_scaled_dct32(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 32; i++) {
+        for (int j = 0; j < 32; j++) {
+#pragma HLS PIPELINE II = 1
+            float mul = 1.0f / 32.0f;
+            float temp = mul * in[i * 32 + j];
+            out[32 * j + i] = temp;
+        }
+    }
+}
+
+void split_ac_dc_dct32(float in[1024], float to_ac[1024], float to_dc[16]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 32; i++) {
+        for (int j = 0; j < 32; j++) {
+#pragma HLS PIPELINE II = 1
+            to_ac[32 * i + j] = in[32 * i + j];
+
+            if (j < 4 && i < 4) {
+                to_dc[i * 4 + j] = in[32 * i + j];
+            }
+        }
+    }
+}
+
+void scaled_dct32(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+    for (int i = 0; i < 32; i++) {
+        for (int j = 0; j < 32; j++) {
+#pragma HLS PIPELINE II = 1
+            float mul = 1.0f / 32.0f;
+            out[32 * i + j] = mul * in[i * 32 + j];
+        }
+    }
+}
+
+void feed_dct32_ac(uint32_t x32,
+                   uint32_t y32,
+                   hls::stream<uint8_t>& stream_rectx32,
+                   hls::stream<uint8_t>& stream_recty32,
+                   float in[1024],
+                   hls::stream<float>& ac_coef32x32_stream) {
+#pragma HLS INLINE off
+    uint32_t rect_xsize;
+    uint32_t rect_ysize;
+loop_feed_dct32_ac:
+    for (int m = 0; m < 1024; m++) {
+#pragma HLS PIPELINE II = 1
+        if (m == 0) {
+            rect_xsize = stream_rectx32.read();
+            rect_ysize = stream_recty32.read();
+        }
+        if ((4 * x32 + 3) < rect_xsize && (4 * y32 + 3) < rect_ysize) {
+            // printf("feed ac %d %d %d\n", in[m], rect_xsize, rect_ysize);
+            ac_coef32x32_stream.write(in[m]);
+        }
+    }
+}
+
+void feed_dct32_dc(uint32_t x32,
+                   uint32_t y32,
+                   hls::stream<uint8_t>& stream_rectx32,
+                   hls::stream<uint8_t>& stream_recty32,
+                   float dc_mem[16],
+                   hls::stream<float>& dc_coef32x32_stream) {
+#pragma HLS INLINE off
+    uint32_t rect_xsize;
+    uint32_t rect_ysize;
+loop_feed_dct32_dc:
+    for (int m = 0; m < 16; m++) {
+#pragma HLS PIPELINE II = 1
+        if (m == 0) {
+            rect_xsize = stream_rectx32.read();
+            rect_ysize = stream_recty32.read();
+        }
+        if ((4 * x32 + 3) < rect_xsize && (4 * y32 + 3) < rect_ysize) {
+            dc_coef32x32_stream.write(dc_mem[m]);
+        }
+    }
+}
+
+void hls_dct32x32_module(unsigned ysize,
+                         unsigned xsize,
+                         hls::stream<uint8_t>& stream_recty32,
+                         hls::stream<uint8_t>& stream_rectx32,
+                         hls::stream<uint8_t>& stream_recty32_1,
+                         hls::stream<uint8_t>& stream_rectx32_1,
+                         hls::stream<float>& opsin32x32_stream,
+                         hls::stream<float>& ac_coef32x32_stream,
+                         hls::stream<float>& dc_coef32x32_stream) {
+#pragma HLS INLINE off
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+
+    int ysize64 = tile_ysize / 64;
+    int xsize64 = tile_xsize / 64;
+//    int xsize_blocks = xsize / 8;
+//    int ysize_blocks = ysize / 8;
+loop_dct32_tile_y:
+    for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+#pragma HLS LOOP_TRIPCOUNT min = 1 max = 1
+    loop_dct32_tile_x:
+        for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+#pragma HLS LOOP_TRIPCOUNT min = 1 max = 1
+        //            int tx1 = x64;
+        //            int ty1 = y64;
+        //            int by = ty1 * 8;
+        //            int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+        //            int bx = tx1 * 8;
+        //            int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+        //            int rect_ysize = by1 - by;
+        //            int rect_xsize = bx1 - bx;
+        loop_dct32_y:
+            for (uint32_t y32 = 0; y32 < 2; y32++) {
+            loop_dct32_x:
+                for (uint32_t x32 = 0; x32 < 2; x32++) {
+                loop_dct32_c:
+                    for (int c = 0; c < 3; c++) {
+#pragma HLS DATAFLOW
+                        float from[1024];
+#pragma HLS bind_storage variable = from type = ram_2p impl = bram
+                        float temp0[1024];
+#pragma HLS bind_storage variable = temp0 type = ram_2p impl = bram
+                        float temp1[1024];
+#pragma HLS bind_storage variable = temp1 type = ram_2p impl = bram
+                        float temp2[1024];
+#pragma HLS bind_storage variable = temp2 type = ram_2p impl = bram
+                        float temp3[1024];
+#pragma HLS bind_storage variable = temp3 type = ram_2p impl = bram
+                        float temp4[1024];
+#pragma HLS bind_storage variable = temp4 type = ram_2p impl = bram
+                        float temp5[1024];
+#pragma HLS bind_storage variable = temp4 type = ram_2p impl = bram
+                        float to_ac[1024];
+#pragma HLS bind_storage variable = to_ac type = ram_2p impl = bram
+                        float to_dc[16];
+#pragma HLS bind_storage variable = to_dc type = ram_2p impl = bram
+                        float dc_mem[16];
+#pragma HLS bind_storage variable = to_dc type = ram_2p impl = bram
+                        float dc_temp0[16];
+#pragma HLS bind_storage variable = temp0 type = ram_2p impl = bram
+                        float dc_temp1[16];
+#pragma HLS bind_storage variable = temp1 type = ram_2p impl = bram
+                        float dc_temp2[16];
+#pragma HLS bind_storage variable = temp2 type = ram_2p impl = bram
+
+                        load_dct32(opsin32x32_stream, from);
+                        hls_DCT1DImpl_32(from, temp1);
+                        scaled_dct32(temp1, temp2);
+                        transpose_dct32(temp2, temp3);
+                        hls_DCT1DImpl_32(temp3, temp4);
+                        transpose_scaled_dct32(temp4, temp5);
+                        split_ac_dc_dct32(temp5, to_ac, to_dc);
+                        // output ac_coeff dct32
+                        feed_dct32_ac(x32, y32, stream_rectx32, stream_recty32, to_ac, ac_coef32x32_stream);
+                        // feed_dct32_ac(x32, y32, rect_xsize, rect_ysize, to_ac, ac_coef32x32_stream);
+                        // ouput dc_coeff dct32
+                        hls_idct32_scale_2d(to_dc, dc_temp0);
+                        hls_IDCT1D_32(dc_temp0, dc_temp1);
+                        hls_idct_transpose4x4(dc_temp1, dc_temp2);
+                        hls_IDCT1D_32(dc_temp2, dc_mem);
+                        // feed_dct32_dc(x32, y32, rect_xsize, rect_ysize, dc_mem, dc_coef32x32_stream);
+                        feed_dct32_dc(x32, y32, stream_rectx32_1, stream_recty32_1, dc_mem, dc_coef32x32_stream);
+                    }
+                }
+            }
+        }
+    }
+    // printf("count1 = %d\n", count);
+}
+
+int Div_Ceil2(int a, int b) {
+#pragma HLS inline
+    return (a + b - 1) / b;
+}
+
+void GetRectSizeDCT(short xsize,
+                    short ysize,
+                    hls::stream<uint8_t>& stream_rectx_dct,
+                    hls::stream<uint8_t>& stream_recty_dct,
+                    hls::stream<uint8_t>& stream_rectx32,
+                    hls::stream<uint8_t>& stream_recty32,
+                    hls::stream<uint8_t>& stream_rectx32_1,
+                    hls::stream<uint8_t>& stream_recty32_1,
+                    hls::stream<uint8_t>& stream_rectx16,
+                    hls::stream<uint8_t>& stream_recty16,
+                    hls::stream<uint8_t>& stream_rectx16_1,
+                    hls::stream<uint8_t>& stream_recty16_1,
+                    hls::stream<uint8_t>& stream_rectx8,
+                    hls::stream<uint8_t>& stream_recty8,
+                    hls::stream<uint8_t>& stream_rectx8_1,
+                    hls::stream<uint8_t>& stream_recty8_1) {
+    uint16_t xsize_blocks = xsize / 8;
+    uint16_t ysize_blocks = ysize / 8;
+LOOP_0:
+    for (uint16_t y = 0; y < Div_Ceil2(ysize_blocks, 8); y++) {
+    LOOP_1:
+        for (uint16_t x = 0; x < Div_Ceil2(xsize_blocks, 8); x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            // uint16_t by = y * 8;
+            // uint16_t by1 = ((y + 1) * 8) < ysize_blocks ? ((y + 1) * 8) : ysize_blocks;
+            // uint16_t bx = x * 8;
+            // uint16_t bx1 = ((x + 1) * 8) < xsize_blocks ? ((x + 1) * 8) : xsize_blocks;
+            // uint8_t rect_ysize = by1 - by;
+            // uint8_t rect_xsize = bx1 - bx;
+            uint8_t rect_ysize = stream_recty_dct.read();
+            uint8_t rect_xsize = stream_rectx_dct.read();
+            // printf("rect_xsize=%d, rect_ysize=%d\n", rect_xsize, rect_ysize);
+            for (int i = 0; i < 192; i++) {
+                if (i < 12) {
+                    stream_rectx32.write(rect_xsize);
+                    stream_recty32.write(rect_ysize);
+                    stream_rectx32_1.write(rect_xsize);
+                    stream_recty32_1.write(rect_ysize);
+                }
+                if (i < 48) {
+                    stream_rectx16.write(rect_xsize);
+                    stream_recty16.write(rect_ysize);
+                    stream_rectx16_1.write(rect_xsize);
+                    stream_recty16_1.write(rect_ysize);
+                }
+                stream_rectx8.write(rect_xsize);
+                stream_recty8.write(rect_ysize);
+                stream_rectx8_1.write(rect_xsize);
+                stream_recty8_1.write(rect_ysize);
+            }
+        }
+    }
+}
+
+void hls_dct_top(unsigned ysize,
+                 unsigned xsize,
+                 hls::stream<uint8_t>& stream_rectx_dct,
+                 hls::stream<uint8_t>& stream_recty_dct,
+                 hls::stream<float>& opsin8x8_stream,
+                 hls::stream<float>& opsin16x16_stream,
+                 hls::stream<float>& opsin32x32_stream,
+                 hls::stream<float>& ac_coef8x8_stream,
+                 hls::stream<float>& ac_coef16x16_stream,
+                 hls::stream<float>& ac_coef32x32_stream,
+                 hls::stream<float>& dc_coef8x8_stream,
+                 hls::stream<float>& dc_coef16x16_stream,
+                 hls::stream<float>& dc_coef32x32_stream) {
+    // #pragma HLS INLINE
+    hls::stream<uint8_t, 1024> stream_rectx32;
+    hls::stream<uint8_t, 1024> stream_recty32;
+    hls::stream<uint8_t, 1024> stream_rectx32_1;
+    hls::stream<uint8_t, 1024> stream_recty32_1;
+    hls::stream<uint8_t, 1024> stream_rectx16;
+    hls::stream<uint8_t, 1024> stream_recty16;
+    hls::stream<uint8_t, 1024> stream_rectx16_1;
+    hls::stream<uint8_t, 1024> stream_recty16_1;
+    hls::stream<uint8_t, 1024> stream_rectx8;
+    hls::stream<uint8_t, 1024> stream_recty8;
+    hls::stream<uint8_t, 1024> stream_rectx8_1;
+    hls::stream<uint8_t, 1024> stream_recty8_1;
+// #pragma HLS DATAFLOW
+#pragma HLS INLINE
+    GetRectSizeDCT(xsize, ysize, stream_rectx_dct, stream_recty_dct, stream_rectx32, stream_recty32, stream_rectx32_1,
+                   stream_recty32_1, stream_rectx16, stream_recty16, stream_rectx16_1, stream_recty16_1, stream_rectx8,
+                   stream_recty8, stream_rectx8_1, stream_recty8_1);
+    hls_dct8x8_module(ysize, xsize, stream_recty8, stream_rectx8, stream_recty8_1, stream_rectx8_1, opsin8x8_stream,
+                      ac_coef8x8_stream, dc_coef8x8_stream);
+    hls_dct16x16_module(ysize, xsize, stream_recty16, stream_rectx16, stream_recty16_1, stream_rectx16_1,
+                        opsin16x16_stream, ac_coef16x16_stream, dc_coef16x16_stream);
+    hls_dct32x32_module(ysize, xsize, stream_recty32, stream_rectx32, stream_recty32_1, stream_rectx32_1,
+                        opsin32x32_stream, ac_coef32x32_stream, dc_coef32x32_stream);
+}
+
+//-----------------------acs_heuristic---------------------//
+
+int Div_Ceil(int a, int b) {
+#pragma HLS inline
+    return (a + b - 1) / b;
+}
+
+float EvalRationalPolynomial3_2(float x, float p[3], float q[3]) {
+    float yp = p[2];
+    float yq = q[2];
+    yp = (yp * x) + p[1];
+    yq = (yq * x) + q[1];
+    yp = (yp * x) + p[0];
+    yq = (yq * x) + q[0];
+    return yp / yq;
+}
+
+float FastLog2f_HLS2(float x) {
+    union {
+        float x_f;
+        int x_i;
+    } u = {x};
+    float p[3] = {-1.8503833400518310E-06f, 1.4287160470083755E+00f, 7.4245873327820566E-01f};
+    float q[3] = {9.9032814277590719E-01f, 1.0096718572241148E+00f, 1.7409343003366853E-01f};
+    int x_bits = u.x_i;
+    int exp_bits = x_bits - 0x3f2aaaab; // = 2/3
+    int exp_shifted = exp_bits >> 23;
+    int result0 = exp_shifted << 23;
+    int result = x_bits - result0;
+    u.x_i = result;
+    float mantissa = u.x_f;
+    float exp_val = static_cast<float>(exp_shifted);
+    float output = EvalRationalPolynomial3_2(mantissa - 1.0f, p, q) + exp_val;
+    return output;
+}
+
+float FastPow2f_HLS(float x) {
+    int floorx = floor(x);
+    int tmp = ((floorx + 127) << 23);
+    union {
+        float x_f;
+        int x_i;
+    } u;
+    u.x_i = tmp;
+    float exp = u.x_f;
+    float frac = x - floorx;
+    float num = frac + 1.01749063e+01;
+    num = num * frac + 4.88687798e+01;
+    num = num * frac + 9.85506591e+01;
+    num = num * exp;
+    float den = frac * 2.10242958e-01 - 2.22328856e-02;
+    den = den * frac - 1.94414990e+01;
+    den = den * frac + 9.85506633e+01;
+    return num / den;
+}
+
+float FastPowf_HLS(float base, float exponent) {
+    return FastPow2f_HLS(FastLog2f_HLS2(base) * exponent);
+}
+
+int CeilLog2NonzeroHLS(ap_int<32> x) {
+    int leading_zeros = x.countLeadingZeros();
+    int floor_log2 = 63 ^ (leading_zeros + 32);
+    if ((x & (x - 1)) != 0) {
+        floor_log2 = floor_log2 + 1;
+    }
+    return floor_log2;
+}
+
+void GetACSSize(short xsize,
+                short ysize,
+                hls::stream<uint8_t>& stream_rectx_acs,
+                hls::stream<uint8_t>& stream_recty_acs,
+                hls::stream<uint8_t>& stream_rectx0,
+                hls::stream<uint8_t>& stream_recty0,
+                hls::stream<uint8_t>& stream_rectx1,
+                hls::stream<uint8_t>& stream_recty1,
+                hls::stream<uint8_t>& stream_rectx2,
+                hls::stream<uint8_t>& stream_recty2,
+                hls::stream<uint8_t>& stream_rectx3,
+                hls::stream<uint8_t>& stream_recty3,
+                hls::stream<uint8_t>& stream_rectx10,
+                hls::stream<uint8_t>& stream_recty10) {
+    uint16_t xsize_blocks = xsize / 8;
+    uint16_t ysize_blocks = ysize / 8;
+LOOP_0:
+    for (uint16_t y = 0; y < Div_Ceil(ysize_blocks, 8); y++) {
+    LOOP_1:
+        for (uint16_t x = 0; x < Div_Ceil(xsize_blocks, 8); x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            // uint16_t by = y * 8;
+            // uint16_t by1 = ((y + 1) * 8) < ysize_blocks ? ((y + 1) * 8) : ysize_blocks;
+            // uint16_t bx = x * 8;
+            // uint16_t bx1 = ((x + 1) * 8) < xsize_blocks ? ((x + 1) * 8) : xsize_blocks;
+            // uint8_t rect_ysize = by1 - by;
+            // uint8_t rect_xsize = bx1 - bx;
+            uint8_t rect_ysize = stream_recty_acs.read();
+            uint8_t rect_xsize = stream_rectx_acs.read();
+            stream_rectx0.write(rect_xsize);
+            stream_recty0.write(rect_ysize);
+            stream_rectx1.write(rect_xsize);
+            stream_recty1.write(rect_ysize);
+            stream_rectx2.write(rect_xsize);
+            stream_recty2.write(rect_ysize);
+            stream_rectx3.write(rect_xsize);
+            stream_recty3.write(rect_ysize);
+            stream_rectx10.write(rect_xsize);
+            stream_recty10.write(rect_ysize);
+        }
+    }
+}
+
+void DupQuantAndMask(uint16_t num_tile,
+                     hls::stream<uint8_t>& stream_rectx,
+                     hls::stream<uint8_t>& stream_recty,
+                     hls::stream<float>& stream_q_org,
+                     hls::stream<float>& stream_mask_org,
+                     hls::stream<float>& stream_q_org_8,
+                     hls::stream<float>& stream_mask_org_8,
+                     hls::stream<float>& stream_q_org_16,
+                     hls::stream<float>& stream_mask_org_16,
+                     hls::stream<float>& stream_q_org_32,
+                     hls::stream<float>& stream_mask_org_32) {
+DUP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+    DUP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        DUP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+                // do computation once for 16 and 32
+                float tmp0 = stream_q_org.read();
+                stream_q_org_8.write(tmp0);
+                tmp0 *= tmp0;
+                tmp0 *= tmp0;
+                tmp0 *= tmp0;
+                stream_q_org_16.write(tmp0);
+                stream_q_org_32.write(tmp0);
+                float tmp1 = stream_mask_org.read();
+                stream_mask_org_8.write(tmp1);
+                stream_mask_org_16.write(tmp1);
+                stream_mask_org_32.write(tmp1);
+            }
+        }
+    }
+}
+
+void GetQAndMask_8(uint16_t num_tile,
+                   hls::stream<uint8_t>& stream_rectx,
+                   hls::stream<uint8_t>& stream_recty,
+                   hls::stream<uint8_t>& stream_rectx_out,
+                   hls::stream<uint8_t>& stream_recty_out,
+                   hls::stream<float>& stream_q_org,
+                   hls::stream<float>& stream_mask_org,
+                   hls::stream<float>& stream_q,
+                   hls::stream<float>& stream_mask) {
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline II = 64
+                float quant_norm8 = 0;
+                float masking = 0;
+                quant_norm8 = stream_q_org.read();
+                stream_q.write(quant_norm8);
+                masking = 2.0f * stream_mask_org.read();
+                stream_mask.write(masking);
+            }
+        }
+    }
+}
+
+template <int N>
+void GetQAndMask_16_32(uint16_t num_tile,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<uint8_t>& stream_rectx_out,
+                       hls::stream<uint8_t>& stream_recty_out,
+                       hls::stream<float>& stream_q_org,
+                       hls::stream<float>& stream_mask_org,
+                       hls::stream<float>& stream_q,
+                       hls::stream<float>& stream_mask) {
+    uint8_t block_n = N * N;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+        float q_array[64];
+#pragma HLS BIND_STORAGE variable = q_array type = RAM_1P impl = bram
+        float mask_array[64];
+#pragma HLS BIND_STORAGE variable = mask_array type = RAM_1P impl = bram
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS loop_flatten off
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS pipeline II = 1
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS loop_flatten off
+                int index = iy * 8 + ix;
+                q_array[index] = stream_q_org.read();
+                mask_array[index] = stream_mask_org.read();
+            }
+        }
+    LOOP_3:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS loop_flatten off
+        LOOP_4:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS loop_flatten off
+                float quant_norm8 = 0;
+                float masking = 0;
+                float masking_norm2 = 0;
+                float masking_max = 0;
+            LOOP_5:
+                for (uint8_t dy = 0; dy < N; dy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS loop_flatten off
+                LOOP_6:
+                    for (uint8_t dx = 0; dx < N; dx++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS loop_flatten off
+#pragma HLS pipeline
+                        uint8_t idx = (iy + dy) * 8 + ix + dx;
+                        float qval = q_array[idx];
+                        quant_norm8 += qval;
+                        float maskval = mask_array[idx];
+                        masking_max = fmax(masking_max, maskval);
+                        masking_norm2 += maskval * maskval;
+                    }
+                }
+                quant_norm8 /= block_n;
+            // Change: use 3 sqrtf to replace FastPowf_HLS, and try to only use on sqrtf to do all things
+            // float tmp = quant_norm8;
+            // quant_norm8 = sqrtf(quant_norm8);
+            // quant_norm8 = sqrtf(quant_norm8);
+            // quant_norm8 = sqrtf(quant_norm8);
+            LOOP_7:
+                for (int dx = 0; dx < 3; dx++) {
+#pragma HLS pipeline
+                    quant_norm8 = sqrtf(quant_norm8);
+                }
+                // quant_norm8 = FastPowf_HLS(quant_norm8, 1.0f / 8.0f);
+                masking_norm2 = sqrtf(masking_norm2 / block_n);
+                masking = masking_norm2 + masking_max;
+                stream_q.write(quant_norm8);
+                stream_mask.write(masking);
+            }
+        }
+    }
+}
+
+template <int N>
+void ComputeEntropy1(uint16_t num_tile,
+                     hls::stream<uint8_t>& stream_rectx,
+                     hls::stream<uint8_t>& stream_recty,
+                     hls::stream<uint8_t>& stream_rectx_out,
+                     hls::stream<uint8_t>& stream_recty_out,
+                     hls::stream<float>& stream_q,
+                     hls::stream<float>& stream_dctin,
+#ifdef FIX
+                     hls::stream<ap_int<28> >& stream_loss,
+                     hls::stream<ap_int<44> >& stream_loss2,
+                     hls::stream<ap_int<42> >& stream_entropy,
+                     hls::stream<ap_int<11> >& stream_nzeros
+#else
+                     hls::stream<float>& stream_loss,
+                     hls::stream<float>& stream_loss2,
+                     hls::stream<float>& stream_entropy,
+                     hls::stream<float>& stream_nzeros
+#endif
+                     ) {
+    uint8_t block_n = N * N;
+    int count_array;
+    float info_loss = 0.0;
+    float info_loss2 = 0.0;
+    float entropy = 0.0;
+    float zeros_mul = 7.565053364251793f;
+    float cost2 = 4.4628149885273363f;
+    float cost_delta = 5.3359184934516337f;
+    float cmap_factor;
+    float q;
+    float entropy_v[3] = {0.0, 0.0, 0.0};
+    float nzeros_v[3] = {0.0, 0.0, 0.0};
+    float entropy_array[8];
+    float info_loss_array[8];
+    float info_loss2_array[8];
+    float nzeros_array[8];
+    float y_ram[1024];
+    float cmap_factors_init[3] = {0.0f, 0.0f, 1.0f};
+#ifdef FIX
+    ap_int<23> info_loss_fix[8];
+    ap_int<45> info_loss2_fix[8];
+    ap_int<11> nzeros_fix[8];
+    ap_int<32> y_fix_ram[1024];
+    ap_int<32> cost2_fix = (int)(cost2 * 1024);
+    ap_int<32> cost_delta_fix = (int)(cost_delta * 1024);
+    ap_int<28> info_loss_sum;
+    ap_int<44> info_loss2_sum;
+    ap_int<11> nzeros_sum;
+    ap_int<42> entropy_sum;
+#endif
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint16_t rect_ysize = stream_recty.read();
+        uint16_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+        float q_tmp[64];
+    LOOP_1:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+            LOOP_3:
+                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+                LOOP_4:
+                    for (uint16_t i = 0; i < block_n * 64; i += 1) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+#pragma HLS pipeline II = 1
+                        float in = stream_dctin.read();
+#ifdef FIX
+                        if (i == 0) {
+                            nzeros_sum = 0;
+                            entropy_sum = 0;
+                            if (c == 0) {
+                                q = stream_q.read();
+                                info_loss_sum = 0;
+                                info_loss2_sum = 0;
+                            }
+                        }
+                        ap_int<30> in_fix = in * 0x1fffffff; // exp=29
+                        float in_fix_y_tmp;
+                        if (c == 0) {
+                            y_fix_ram[i] = in_fix;
+                            in_fix_y_tmp = in_fix;
+                        } else {
+                            in_fix_y_tmp = y_fix_ram[i];
+                        }
+                        ap_int<30> in_fix_y = (c == 2) ? in_fix_y_tmp : 0;
+                        ap_int<31> in_fix_m = in_fix - in_fix_y;
+
+                        ap_uint<24> im_fix;
+                        if (N == 1) {
+                            im_fix = inv_matrix_8_fix[c][i]; // exp=10
+                        }
+                        if (N == 2) {
+                            im_fix = inv_matrix_16_fix[c][i];
+                        }
+                        if (N == 4) {
+                            im_fix = inv_matrix_32_fix[c][i];
+                        }
+
+                        ap_uint<15> rqf_fix = q * 32768;          // exp=15
+                        ap_int<55> val_tmp0 = in_fix_m * im_fix;  // exp=29+10=39
+                        ap_int<28> val_tmp1 = val_tmp0 >> 27;     // exp=39-27=12
+                        ap_int<43> val_tmp2 = val_tmp1 * rqf_fix; // exp=12+15=27
+                        ap_int<35> val_fix = val_tmp2 >> 11;      // exp=27-11=16
+
+                        // actual value is not that large, so just reduce bitwidth
+                        ap_int<11> val_shift0 = val_fix >> 15;
+                        ap_int<10> val_shift1 = val_fix >> 16;
+                        if (val_shift0.range(0, 0) == 1) {
+                            val_shift1 += 1;
+                        }
+                        ap_int<10> rval_fix = val_shift1;                          // exp=0
+                        ap_int<32> val_shift_back = val_shift1 * 65536;            // exp=16
+                        ap_uint<16> diff_fix = std::abs(val_shift_back - val_fix); // exp=-16 hls_abs?
+                        ap_uint<32> diff_fix_square = diff_fix * diff_fix;         // exp=-32
+                        ap_uint<10> q_fix = std::abs(rval_fix);                    // hls_abs?
+                        bool q_fix_is_zero = q_fix == 0;
+                        float entropy_tmp = (q_fix > 1 ? cost2 : 0.0f) + sqrtf(q_fix) * cost_delta;
+                        ap_uint<32> entropy_fix = (uint32_t)(entropy_tmp * 65536);
+
+                        info_loss_sum += diff_fix;
+                        info_loss2_sum += diff_fix_square;
+                        nzeros_sum += q_fix_is_zero ? 0 : 1;
+                        entropy_sum += entropy_fix;
+
+                        if (i == block_n * 64 - 1) {
+                            stream_entropy.write(entropy_sum);
+                            stream_nzeros.write(nzeros_sum);
+                        }
+                        if (i == block_n * 64 - 1 && c == 2) {
+                            stream_loss.write(info_loss_sum);
+                            stream_loss2.write(info_loss2_sum);
+                        }
+#else
+                        if (c == 0 && i == 0) {
+                            q = stream_q.read();
+                            count_array = 0;
+                        }
+                        cmap_factor = cmap_factors_init[c];
+                        float in_y_tmp;
+                        if (c == 0) {
+                            y_ram[i] = in;
+                            in_y_tmp = in;
+                        } else {
+                            in_y_tmp = y_ram[i];
+                        }
+                        float in_y = in_y_tmp * cmap_factor;
+                        float im;
+                        if (N == 1) {
+                            im = inv_matrix_8[c][i];
+                        }
+                        if (N == 2) {
+                            im = inv_matrix_16[c][i];
+                        }
+                        if (N == 4) {
+                            im = inv_matrix_32[c][i];
+                        }
+                        const float val = (in - in_y) * im * q;
+                        const int rval = roundf(val);
+                        const float diff = fabs(val - rval);
+
+                        info_loss_array[count_array] = diff;
+                        info_loss2_array[count_array] = diff * diff;
+
+                        const int q = abs(rval);
+                        const bool q_is_zero = q == 0;
+                        float tmp = (q >= 1.5f ? cost2 : 0.0f) + sqrtf(q) * cost_delta;
+                        entropy_array[count_array] = tmp;
+                        nzeros_array[count_array] = q_is_zero ? 0.0f : 1.0f;
+                        count_array++;
+                        if (count_array == 8) {
+                            float sum0 = entropy_array[0] + entropy_array[1] + entropy_array[2] + entropy_array[3] +
+                                         entropy_array[4] + entropy_array[5] + entropy_array[6] + entropy_array[7];
+                            stream_entropy.write(sum0);
+                            float sum1 = nzeros_array[0] + nzeros_array[1] + nzeros_array[2] + nzeros_array[3] +
+                                         nzeros_array[4] + nzeros_array[5] + nzeros_array[6] + nzeros_array[7];
+                            stream_nzeros.write(sum1);
+                            float sum2 = info_loss_array[0] + info_loss_array[1] + info_loss_array[2] +
+                                         info_loss_array[3] + info_loss_array[4] + info_loss_array[5] +
+                                         info_loss_array[6] + info_loss_array[7];
+                            stream_loss.write(sum2);
+                            float sum3 = info_loss2_array[0] + info_loss2_array[1] + info_loss2_array[2] +
+                                         info_loss2_array[3] + info_loss2_array[4] + info_loss2_array[5] +
+                                         info_loss2_array[6] + info_loss2_array[7];
+                            stream_loss2.write(sum3);
+                            count_array = 0;
+                        }
+#endif
+                    } // loop i
+                }
+            }
+        }
+    }
+}
+
+template <int N>
+void ComputeEntropy2(uint16_t num_tile,
+                     hls::stream<uint8_t>& stream_rectx,
+                     hls::stream<uint8_t>& stream_recty,
+                     hls::stream<uint8_t>& stream_rectx_out,
+                     hls::stream<uint8_t>& stream_recty_out,
+                     hls::stream<float>& stream_loss,
+                     hls::stream<float>& stream_loss2,
+                     hls::stream<float>& stream_entropy,
+                     hls::stream<float>& stream_nzeros,
+                     hls::stream<float>& stream_loss_sum,
+                     hls::stream<float>& stream_loss2_sum,
+                     hls::stream<float>& stream_entropy_sum,
+                     hls::stream<float>& stream_nzeros_sum) {
+    float entropy_v[3];
+    float nzeros_v[3];
+    float info_loss;
+    float info_loss2;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+    LOOP_1:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+            LOOP_3:
+                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+                LOOP_4:
+                    for (uint8_t i = 0; i < 64 * N * N / 8; i += 1) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline II = 8
+                        if (c == 0 && i == 0) {
+                            info_loss = 0.0;
+                            info_loss2 = 0.0;
+                        }
+                        if (i == 0) {
+                            entropy_v[c] = 0;
+                            nzeros_v[c] = 0;
+                        }
+                        entropy_v[c] += stream_entropy.read();
+                        nzeros_v[c] += stream_nzeros.read();
+                        info_loss += stream_loss.read();
+                        info_loss2 += stream_loss2.read();
+                        if (i == 64 * N * N / 8 - 1) {
+                            stream_entropy_sum.write(entropy_v[c]);
+                            stream_nzeros_sum.write(nzeros_v[c]);
+                        }
+                        if (c == 2 && i == 64 * N * N / 8 - 1) {
+                            stream_loss_sum.write(info_loss);
+                            stream_loss2_sum.write(info_loss2);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+template <int N>
+void ComputeEntropy3(uint16_t num_tile,
+                     float cost1,
+                     float mul,
+                     hls::stream<uint8_t>& stream_rectx,
+                     hls::stream<uint8_t>& stream_recty,
+#ifdef FIX
+                     hls::stream<ap_int<28> >& stream_loss,
+                     hls::stream<ap_int<44> >& stream_loss2,
+                     hls::stream<ap_int<42> >& stream_entropy,
+                     hls::stream<ap_int<11> >& stream_nzeros,
+#else
+                     hls::stream<float>& stream_loss_sum,
+                     hls::stream<float>& stream_loss2_sum,
+                     hls::stream<float>& stream_entropy_sum,
+                     hls::stream<float>& stream_nzeros_sum,
+#endif
+                     hls::stream<float>& stream_mask,
+                     hls::stream<float>& stream_entropy_final) {
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+    LOOP_1:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#ifdef FIX
+                float entropy = 0.0;
+                float zeros_mul = 7.565053364251793f;
+                float entropy_v[3];
+                ap_int<11> nzeros_v[3];
+                float entropy_bits[3] = {0.0, 0.0, 0.0};
+                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+#pragma HLS pipeline
+                    ap_int<42> entropy_tmp = stream_entropy.read();
+                    entropy_v[c] = entropy_tmp / 65536.0;
+                    nzeros_v[c] = stream_nzeros.read();
+                    entropy_v[c] += nzeros_v[c] * cost1;
+                    uint8_t nbits = LUTCeilLog2Nonzero[(nzeros_v[c] + 1)] + 1;
+                    entropy_bits[c] = zeros_mul * (LUTCeilLog2Nonzero[nbits + 17] + nbits);
+                }
+                entropy = entropy_v[0] + entropy_v[1] + entropy_v[2];
+                entropy += entropy_bits[0] + entropy_bits[1] + entropy_bits[2];
+                ap_int<28> tmp_loss = stream_loss.read();
+                float loss_f = tmp_loss / 65536.0;
+                ap_int<44> tmp_loss2 = stream_loss2.read();
+                float loss2_f = tmp_loss2 / 65536.0 / 65536.0;
+                float info_loss_multiplier = 138.0f;
+                float info_loss_multiplier2 = 50.46839691767866;
+                float loss = ((info_loss_multiplier * loss_f) + (info_loss_multiplier2 * N * sqrtf(loss2_f)));
+                float loss_mask = stream_mask.read() * loss;
+                float ret = entropy + loss_mask;
+                if (N == 1) {
+                    ret = 3.0f + 0.745f * ret;
+                }
+                ret = ret * mul;
+                stream_entropy_final.write(ret);
+#else
+                float entropy = 0.0;
+                float zeros_mul = 7.565053364251793f;
+                float entropy_v[3] = {0.0, 0.0, 0.0};
+                float nzeros_v[3] = {0.0, 0.0, 0.0};
+                float entropy_bits[3] = {0.0, 0.0, 0.0};
+                for (int c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+#pragma HLS pipeline
+
+                    entropy_v[c] = stream_entropy_sum.read();
+                    nzeros_v[c] = stream_nzeros_sum.read();
+                    entropy_v[c] += nzeros_v[c] * cost1;
+                    // TODO: Integer to integer, can we use look up table to implement this?
+                    // int nbits = CeilLog2NonzeroHLS(nzeros_v[c] + 1) + 1;
+                    // entropy_bits[c] = zeros_mul * (CeilLog2NonzeroHLS(nbits + 17) + nbits);
+                    int nbits = LUTCeilLog2Nonzero[(short)(nzeros_v[c] + 1)] + 1;
+                    entropy_bits[c] = zeros_mul * (LUTCeilLog2Nonzero[nbits + 17] + nbits);
+                }
+                entropy = entropy_v[0] + entropy_v[1] + entropy_v[2];
+                entropy += entropy_bits[0] + entropy_bits[1] + entropy_bits[2];
+                float tmp_loss = stream_loss_sum.read();
+                float tmp_loss2 = stream_loss2_sum.read();
+                float info_loss_multiplier = 138.0f;
+                float info_loss_multiplier2 = 50.46839691767866;
+                float ret = entropy +
+                            stream_mask.read() * ((info_loss_multiplier * tmp_loss) +
+                                                  (info_loss_multiplier2 * sqrtf((float)(N * N * tmp_loss2))));
+                if (N == 1) {
+                    ret = 3.0f + 0.745f * ret;
+                }
+                stream_entropy_final.write(ret * mul);
+#endif
+            }
+        }
+    }
+}
+
+template <int N>
+void BufferN(uint16_t num_tile,
+             float* ping,
+             float* pang,
+             hls::stream<uint8_t>& stream_rectx,
+             hls::stream<uint8_t>& stream_recty,
+             hls::stream<float>& stream_dctin,
+             hls::stream<bool>& stream_con,
+             hls::stream<bool>& stream_ok) {
+    uint8_t block = N;
+    uint8_t block_n = N * N;
+    bool flag = true;
+    uint16_t size = 4096;
+    uint8_t w = 64;
+    uint16_t total_size = 4096 * 3;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_con.write(1);
+    LOOP_1:
+        for (uint8_t jy = 0; jy < 8 / N; jy += 1) {
+        LOOP_2:
+            for (uint8_t jx = 0; jx < 8 / N; jx += 1) {
+            LOOP_3:
+                for (uint8_t c = 0; c < 3; c++) {
+                LOOP_4:
+                    for (uint8_t iy = 0; iy < 8 * N; iy += 1) {
+                    LOOP_5:
+                        for (uint8_t ix = 0; ix < 8 * N; ix += 1) {
+                            uint8_t y = jy * 8 * N + iy;
+                            uint8_t x = jx * 8 * N + ix;
+                            bool read = false;
+                            if (N == 1 && (jy < rect_ysize) && (jx < rect_xsize)) {
+                                read = true;
+                            }
+                            if (N == 2 && (jy * 2 + 1) < rect_ysize && (jx * 2 + 1) < rect_xsize) {
+                                read = true;
+                            }
+                            if (N == 4 && (jy * 4 + 3) < rect_ysize && (jx * 4 + 3) < rect_xsize) {
+                                read = true;
+                            }
+                            if (read) {
+                                float tmp = stream_dctin.read();
+#ifdef __SYNTHESIS__
+                                if (flag) {
+                                    ping[c * size + y * w + x] = tmp;
+                                } else {
+                                    pang[c * size + y * w + x] = tmp;
+                                }
+#else
+                                if (flag) {
+                                    ping[total_size * tid + c * size + y * w + x] = tmp;
+                                } else {
+                                    pang[total_size * tid + c * size + y * w + x] = tmp;
+                                }
+#endif
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        flag = !flag;
+        stream_ok.write(1);
+    }
+}
+
+void EstimateEntropy8(uint16_t num_tile,
+                      float cost1,
+                      float mul8x8,
+                      hls::stream<uint8_t>& stream_rectx,
+                      hls::stream<uint8_t>& stream_recty,
+                      hls::stream<float>& stream_q_org,
+                      hls::stream<float>& stream_mask_org,
+                      hls::stream<float>& stream_dctin,
+                      hls::stream<float>& stream_entropy_8) {
+#pragma HLS inline
+
+    hls::stream<uint8_t> stream_rectx80("rectx80");
+#pragma HLS stream variable = stream_rectx80 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx80 type = fifo
+    hls::stream<uint8_t> stream_recty80("recty80");
+#pragma HLS stream variable = stream_recty80 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty80 type = fifo
+    hls::stream<uint8_t> stream_rectx81("rectx81");
+#pragma HLS stream variable = stream_rectx81 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx81 type = fifo
+    hls::stream<uint8_t> stream_recty81("recty81");
+#pragma HLS stream variable = stream_recty81 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty81 type = fifo
+    hls::stream<uint8_t> stream_rectx82("rectx82");
+#pragma HLS stream variable = stream_rectx82 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx82 type = fifo
+    hls::stream<uint8_t> stream_recty82("recty82");
+#pragma HLS stream variable = stream_recty82 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty82 type = fifo
+
+#ifdef FIX
+    hls::stream<ap_int<28> > stream_loss("loss_8");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<ap_int<44> > stream_loss2("loss2_8");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<ap_int<42> > stream_entropy("entropy_8");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<ap_int<11> > stream_nzeros("nzeros_8");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#else
+    hls::stream<float> stream_loss("loss_8");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<float> stream_loss2("loss2_8");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<float> stream_entropy("entropy_8");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<float> stream_nzeros("nzeros_8");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#endif
+    hls::stream<float> stream_loss_sum("loss_8_sum");
+    ;
+#pragma HLS stream variable = stream_loss_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss_sum type = fifo
+    hls::stream<float> stream_loss2_sum("loss2_8_sum");
+#pragma HLS stream variable = stream_loss2_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2_sum type = fifo
+    hls::stream<float> stream_entropy_sum("entropy_8_sum");
+#pragma HLS stream variable = stream_entropy_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_sum type = fifo
+    hls::stream<float> stream_nzeros_sum("nzeros_8_sum");
+#pragma HLS stream variable = stream_nzeros_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros_sum type = fifo
+    hls::stream<float> stream_q("q_8");
+#pragma HLS stream variable = stream_q depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q type = fifo
+    hls::stream<float> stream_mask("mask_8");
+#pragma HLS stream variable = stream_mask depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask type = fifo
+
+    // #pragma HLS dataflow
+    GetQAndMask_8(num_tile, stream_rectx, stream_recty, stream_rectx80, stream_recty80, stream_q_org, stream_mask_org,
+                  stream_q, stream_mask);
+
+#ifdef FIX
+    ComputeEntropy1<1>(num_tile, stream_rectx80, stream_recty80, stream_rectx81, stream_recty81, stream_q, stream_dctin,
+                       stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy3<1>(num_tile, cost1, mul8x8, stream_rectx81, stream_recty81, stream_loss, stream_loss2,
+                       stream_entropy, stream_nzeros, stream_mask, stream_entropy_8);
+
+#else
+
+    ComputeEntropy1<1>(num_tile, stream_rectx80, stream_recty80, stream_rectx81, stream_recty81, stream_q, stream_dctin,
+                       stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy2<1>(num_tile, stream_rectx81, stream_recty81, stream_rectx82, stream_recty82, stream_loss,
+                       stream_loss2, stream_entropy, stream_nzeros, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum);
+
+    ComputeEntropy3<1>(num_tile, cost1, mul8x8, stream_rectx82, stream_recty82, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum, stream_mask, stream_entropy_8);
+#endif
+}
+
+void EstimateEntropy16(uint16_t num_tile,
+                       float cost1,
+                       float mul16x16,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<float>& stream_q_org,
+                       hls::stream<float>& stream_mask_org,
+                       hls::stream<float>& stream_dctin,
+                       hls::stream<float>& stream_entropy_16) {
+#pragma HLS inline
+
+    hls::stream<uint8_t> stream_rectx160("rectx160");
+#pragma HLS stream variable = stream_rectx160 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx160 type = fifo
+    hls::stream<uint8_t> stream_recty160("recty160");
+#pragma HLS stream variable = stream_recty160 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty160 type = fifo
+    hls::stream<uint8_t> stream_rectx161("rectx161");
+#pragma HLS stream variable = stream_rectx161 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx161 type = fifo
+    hls::stream<uint8_t> stream_recty161("recty161");
+#pragma HLS stream variable = stream_recty161 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty161 type = fifo
+    hls::stream<uint8_t> stream_rectx162("rectx162");
+#pragma HLS stream variable = stream_rectx162 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx162 type = fifo
+    hls::stream<uint8_t> stream_recty162("recty162");
+#pragma HLS stream variable = stream_recty162 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty162 type = fifo
+
+#ifdef FIX
+    hls::stream<ap_int<28> > stream_loss("loss_16");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<ap_int<44> > stream_loss2("loss2_16");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<ap_int<42> > stream_entropy("entropy_16");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<ap_int<11> > stream_nzeros("nzeros_16");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#else
+    hls::stream<float> stream_loss("loss_16");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<float> stream_loss2("loss2_16");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<float> stream_entropy("entropy_16");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<float> stream_nzeros("nzeros_16");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#endif
+    hls::stream<float> stream_loss_sum("loss_16_sum");
+    ;
+#pragma HLS stream variable = stream_loss_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss_sum type = fifo
+    hls::stream<float> stream_loss2_sum("loss2_16_sum");
+#pragma HLS stream variable = stream_loss2_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2_sum type = fifo
+    hls::stream<float> stream_entropy_sum("entropy_16_sum");
+#pragma HLS stream variable = stream_entropy_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_sum type = fifo
+    hls::stream<float> stream_nzeros_sum("nzeros_16_sum");
+#pragma HLS stream variable = stream_nzeros_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros_sum type = fifo
+    hls::stream<float> stream_q("q_16");
+#pragma HLS stream variable = stream_q depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q type = fifo
+    hls::stream<float> stream_mask("mask_16");
+#pragma HLS stream variable = stream_mask depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask type = fifo
+
+    // #pragma HLS dataflow
+    GetQAndMask_16_32<2>(num_tile, stream_rectx, stream_recty, stream_rectx160, stream_recty160, stream_q_org,
+                         stream_mask_org, stream_q, stream_mask);
+
+#ifdef FIX
+
+    ComputeEntropy1<2>(num_tile, stream_rectx160, stream_recty160, stream_rectx161, stream_recty161, stream_q,
+                       stream_dctin, stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy3<2>(num_tile, cost1, mul16x16, stream_rectx161, stream_recty161, stream_loss, stream_loss2,
+                       stream_entropy, stream_nzeros, stream_mask, stream_entropy_16);
+
+#else
+
+    ComputeEntropy1<2>(num_tile, stream_rectx160, stream_recty160, stream_rectx161, stream_recty161, stream_q,
+                       stream_dctin, stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy2<2>(num_tile, stream_rectx161, stream_recty161, stream_rectx162, stream_recty162, stream_loss,
+                       stream_loss2, stream_entropy, stream_nzeros, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum);
+
+    ComputeEntropy3<2>(num_tile, cost1, mul16x16, stream_rectx162, stream_recty162, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum, stream_mask, stream_entropy_16);
+#endif
+}
+
+void EstimateEntropy32(uint16_t num_tile,
+                       float cost1,
+                       float mul32x32,
+                       hls::stream<uint8_t>& stream_rectx,
+                       hls::stream<uint8_t>& stream_recty,
+                       hls::stream<float>& stream_q_org,
+                       hls::stream<float>& stream_mask_org,
+                       hls::stream<float>& stream_dctin,
+                       hls::stream<float>& stream_entropy_32) {
+#pragma HLS inline
+
+    hls::stream<uint8_t> stream_rectx320("rectx320");
+#pragma HLS stream variable = stream_rectx320 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx320 type = fifo
+    hls::stream<uint8_t> stream_recty320("recty320");
+#pragma HLS stream variable = stream_recty320 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty320 type = fifo
+    hls::stream<uint8_t> stream_rectx321("rectx321");
+#pragma HLS stream variable = stream_rectx321 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx321 type = fifo
+    hls::stream<uint8_t> stream_recty321("recty321");
+#pragma HLS stream variable = stream_recty321 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty321 type = fifo
+    hls::stream<uint8_t> stream_rectx322("rectx322");
+#pragma HLS stream variable = stream_rectx322 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx322 type = fifo
+    hls::stream<uint8_t> stream_recty322("recty322");
+#pragma HLS stream variable = stream_recty322 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty322 type = fifo
+
+#ifdef FIX
+    hls::stream<ap_int<28> > stream_loss("loss_32");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<ap_int<44> > stream_loss2("loss2_32");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<ap_int<42> > stream_entropy("entropy_32");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<ap_int<11> > stream_nzeros("nzeros_32");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#else
+    hls::stream<float> stream_loss("loss_32");
+#pragma HLS stream variable = stream_loss depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss type = fifo
+    hls::stream<float> stream_loss2("loss2_32");
+#pragma HLS stream variable = stream_loss2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2 type = fifo
+    hls::stream<float> stream_entropy("entropy_32");
+#pragma HLS stream variable = stream_entropy depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy type = fifo
+    hls::stream<float> stream_nzeros("nzeros_32");
+#pragma HLS stream variable = stream_nzeros depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros type = fifo
+#endif
+    hls::stream<float> stream_loss_sum("loss_32_sum");
+    ;
+#pragma HLS stream variable = stream_loss_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss_sum type = fifo
+    hls::stream<float> stream_loss2_sum("loss2_32_sum");
+#pragma HLS stream variable = stream_loss2_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_loss2_sum type = fifo
+    hls::stream<float> stream_entropy_sum("entropy_32_sum");
+#pragma HLS stream variable = stream_entropy_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_sum type = fifo
+    hls::stream<float> stream_nzeros_sum("nzeros_32_sum");
+#pragma HLS stream variable = stream_nzeros_sum depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_nzeros_sum type = fifo
+    hls::stream<float> stream_q("q_32");
+#pragma HLS stream variable = stream_q depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q type = fifo
+    hls::stream<float> stream_mask("mask_32");
+#pragma HLS stream variable = stream_mask depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask type = fifo
+
+    // #pragma HLS dataflow
+    GetQAndMask_16_32<4>(num_tile, stream_rectx, stream_recty, stream_rectx320, stream_recty320, stream_q_org,
+                         stream_mask_org, stream_q, stream_mask);
+
+#ifdef FIX
+    ComputeEntropy1<4>(num_tile, stream_rectx320, stream_recty320, stream_rectx321, stream_recty321, stream_q,
+                       stream_dctin, stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy3<4>(num_tile, cost1, mul32x32, stream_rectx321, stream_recty321, stream_loss, stream_loss2,
+                       stream_entropy, stream_nzeros, stream_mask, stream_entropy_32);
+
+#else
+
+    ComputeEntropy1<4>(num_tile, stream_rectx320, stream_recty320, stream_rectx321, stream_recty321, stream_q,
+                       stream_dctin, stream_loss, stream_loss2, stream_entropy, stream_nzeros);
+
+    ComputeEntropy2<4>(num_tile, stream_rectx321, stream_recty321, stream_rectx322, stream_recty322, stream_loss,
+                       stream_loss2, stream_entropy, stream_nzeros, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum);
+
+    ComputeEntropy3<4>(num_tile, cost1, mul32x32, stream_rectx322, stream_recty322, stream_loss_sum, stream_loss2_sum,
+                       stream_entropy_sum, stream_nzeros_sum, stream_mask, stream_entropy_32);
+#endif
+}
+
+void CompareEntropy(uint16_t num_tile,
+                    hls::stream<uint8_t>& stream_rectx,
+                    hls::stream<uint8_t>& stream_recty,
+                    hls::stream<uint8_t>& stream_rectx_out,
+                    hls::stream<uint8_t>& stream_recty_out,
+                    hls::stream<float>& stream_entropy_8,
+                    hls::stream<float>& stream_entropy_16,
+                    hls::stream<float>& stream_entropy_32,
+                    uint8_t* strategy_ping,
+                    uint8_t* strategy_pang,
+                    hls::stream<bool>& stream_con,
+                    hls::stream<bool>& stream_ok) {
+    bool flag = true;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+        float entropy_32;
+        float entropy_16;
+        float entropy_8;
+        float entropy_sum[16] = {0};
+        stream_con.write(1);
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline
+                uint8_t idx = iy * 8 + ix;
+                uint8_t idx_8_sum = (iy / 2) * 4 + ix / 2;
+                entropy_sum[idx_8_sum] += stream_entropy_8.read();
+#ifdef __SYNTHESIS__
+                if (flag) {
+                    strategy_ping[idx] = 0;
+                } else {
+                    strategy_pang[idx] = 0;
+                }
+#else
+                int idx1 = 64 * tid + idx;
+                if (flag) {
+                    strategy_ping[idx1] = 0;
+                } else {
+                    strategy_pang[idx1] = 0;
+                }
+#endif
+                if (iy % 2 == 1 && ix % 2 == 1) {
+                    entropy_16 = stream_entropy_16.read();
+                    entropy_8 = entropy_sum[(iy / 2) * 4 + ix / 2];
+                    if (entropy_16 < entropy_8) {
+                    LOOP_3:
+                        for (uint8_t y = iy - 1; y < iy + 1; y++) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                        LOOP_4:
+                            for (uint8_t x = ix - 1; x < ix + 1; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                                uint8_t idx = y * 8 + x;
+#ifdef __SYNTHESIS__
+                                if (flag) {
+                                    strategy_ping[idx] = 4;
+                                } else {
+                                    strategy_pang[idx] = 4;
+                                }
+#else
+                                int idx1 = 64 * tid + idx;
+                                if (flag) {
+                                    strategy_ping[idx1] = 4;
+                                } else {
+                                    strategy_pang[idx1] = 4;
+                                }
+#endif
+                                entropy_sum[(y / 2) * 4 + x / 2] = entropy_16;
+                            }
+                        }
+                    }
+                    if (iy % 4 == 3 && ix % 4 == 3) {
+                        entropy_32 = stream_entropy_32.read();
+                        entropy_16 = 0;
+                    LOOP_5:
+                        for (uint8_t y = iy - 3; y < iy + 1; y += 2) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                        LOOP_6:
+                            for (uint8_t x = ix - 3; x < ix + 1; x += 2) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                                uint8_t idx_16 = y * 8 + x;
+                                entropy_16 += entropy_sum[(y / 2) * 4 + x / 2];
+                            }
+                        }
+                        if (entropy_32 < entropy_16) {
+                        LOOP_7:
+                            for (uint8_t y = iy - 3; y < iy + 1; y++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS unroll
+                            LOOP_8:
+                                for (uint8_t x = ix - 3; x < ix + 1; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS unroll
+                                    uint8_t idx = y * 8 + x;
+#ifdef __SYNTHESIS__
+                                    if (flag) {
+                                        strategy_ping[idx] = 5;
+                                    } else {
+                                        strategy_pang[idx] = 5;
+                                    }
+#else
+                                    int idx1 = 64 * tid + idx;
+                                    if (flag) {
+                                        strategy_ping[idx1] = 5;
+                                    } else {
+                                        strategy_pang[idx1] = 5;
+                                    }
+#endif
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        flag = !flag;
+        stream_ok.write(1);
+    }
+}
+
+void Reorder(uint16_t num_tile,
+             float* ping8,
+             float* pang8,
+             float* ping16,
+             float* pang16,
+             float* ping32,
+             float* pang32,
+             uint8_t* strategy_ping,
+             uint8_t* strategy_pang,
+             hls::stream<uint8_t>& stream_rectx,
+             hls::stream<uint8_t>& stream_recty,
+             hls::stream<uint8_t>& stream_rectx_out,
+             hls::stream<uint8_t>& stream_recty_out,
+             hls::stream<bool>& stream_con,
+             hls::stream<bool>& stream_ok,
+             hls::stream<bool>& stream_con8,
+             hls::stream<bool>& stream_ok8,
+             hls::stream<bool>& stream_con16,
+             hls::stream<bool>& stream_ok16,
+             hls::stream<bool>& stream_con32,
+             hls::stream<bool>& stream_ok32,
+             hls::stream<uint8_t>& stream_strategy,
+             hls::stream<uint8_t>& stream_strategy1,
+             hls::stream<float>& stream_select) {
+    bool flag = true;
+    uint16_t size = 4096;
+    uint8_t w = 64;
+    uint16_t total_size = 4096 * 3;
+    ap_uint<64> visited;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        float entropy_32;
+        float entropy_16;
+        float entropy_8;
+        float entropy_sum[16] = {0};
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out.write(rect_ysize);
+        stream_rectx_out.write(rect_xsize);
+        stream_ok.read();
+        stream_ok8.read();
+        stream_ok16.read();
+        stream_ok32.read();
+        visited = 0;
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+                uint8_t strategy;
+                uint8_t idx = iy * 8 + ix;
+                if (visited.range(idx, idx) == 0) {
+#ifdef __SYNTHESIS__
+                    if (flag) {
+                        strategy = strategy_ping[idx];
+                    } else {
+                        strategy = strategy_pang[idx];
+                    }
+#else
+                    if (flag) {
+                        strategy = strategy_ping[tid * 64 + idx];
+                    } else {
+                        strategy = strategy_pang[tid * 64 + idx];
+                    }
+#endif
+                    stream_strategy.write(strategy);
+                    stream_strategy1.write(strategy);
+                    if (strategy == 4) {
+                    LOOP_3:
+                        for (uint8_t y = 0; y < 2; y++) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                        LOOP_4:
+                            for (uint8_t x = 0; x < 2; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+#pragma HLS unroll
+                                uint8_t idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                            }
+                        }
+                    LOOP_5:
+                        for (uint8_t y = 0; y < 16; y++) {
+                        LOOP_6:
+                            for (uint8_t x = 0; x < 16; x++) {
+                            LOOP_7:
+                                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS pipeline
+                                    uint16_t sy = iy * 8 + y;
+                                    uint16_t sx = ix * 8 + x;
+#ifdef __SYNTHESIS__
+                                    uint16_t idx = c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping16[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang16[idx];
+                                        stream_select.write(tmp);
+                                    }
+#else
+                                    int idx = total_size * tid + c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping16[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang16[idx];
+                                        stream_select.write(tmp);
+                                    }
+#endif
+                                }
+                            }
+                        }
+                    } else if (strategy == 5) {
+                    LOOP_8:
+                        for (uint8_t y = 0; y < 4; y++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS unroll
+                        LOOP_9:
+                            for (uint8_t x = 0; x < 4; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 4 max = 4
+#pragma HLS unroll
+                                uint8_t idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                            }
+                        }
+                    LOOP_10:
+                        for (uint8_t y = 0; y < 32; y++) {
+                        LOOP_11:
+                            for (uint8_t x = 0; x < 32; x++) {
+                            LOOP_12:
+                                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS pipeline
+                                    uint16_t sy = iy * 8 + y;
+                                    uint16_t sx = ix * 8 + x;
+#ifdef __SYNTHESIS__
+                                    uint16_t idx = c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping32[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang32[idx];
+                                        stream_select.write(tmp);
+                                    }
+#else
+                                    int idx = total_size * tid + c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping32[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang32[idx];
+                                        stream_select.write(tmp);
+                                    }
+#endif
+                                }
+                            }
+                        }
+                    } else {
+                        visited.range(idx, idx) = 1;
+                    LOOP_13:
+                        for (uint8_t y = 0; y < 8; y++) {
+                        LOOP_14:
+                            for (uint8_t x = 0; x < 8; x++) {
+                            LOOP_15:
+                                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS pipeline
+                                    uint16_t sy = iy * 8 + y;
+                                    uint16_t sx = ix * 8 + x;
+#ifdef __SYNTHESIS__
+                                    uint16_t idx = c * size + sy * w + sx;
+                                    if (flag) {
+                                        float tmp = ping8[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        float tmp = pang8[idx];
+                                        stream_select.write(tmp);
+                                    }
+#else
+                                    int idx = total_size * tid + c * size + sy * w + sx;
+                                    float tmp;
+                                    if (flag) {
+                                        tmp = ping8[idx];
+                                        stream_select.write(tmp);
+                                    } else {
+                                        tmp = pang8[idx];
+                                        stream_select.write(tmp);
+                                    }
+#endif
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        flag = !flag;
+        stream_con.read();
+        stream_con8.read();
+        stream_con16.read();
+        stream_con32.read();
+    }
+}
+
+void ConsumeStrategyDCT(int xsize,
+                        int ysize,
+                        hls::stream<char>& stream_strategy,
+                        hls::stream<float>& stream_select,
+                        float* dctx_8x8,
+                        float* dcty_8x8,
+                        float* dctb_8x8,
+                        float* dctx_16x16,
+                        float* dcty_16x16,
+                        float* dctb_16x16,
+                        float* dctx_32x32,
+                        float* dcty_32x32,
+                        float* dctb_32x32) {
+    int xsize_blocks = xsize / 8;
+    int ysize_blocks = ysize / 8;
+    int n_enc_tiles = Div_Ceil(xsize_blocks, 8);
+    int count = 0;
+    int count_s = 0;
+    ap_uint<64> visited;
+LOOP_0:
+    for (int tid = 0; tid < Div_Ceil(xsize_blocks, 8) * Div_Ceil(ysize_blocks, 8); tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        int tx1 = tid % n_enc_tiles;
+        int ty1 = tid / n_enc_tiles;
+        int by = ty1 * 8;
+        int by1 = fmin((int)((ty1 + 1) * 8), ysize_blocks);
+        int bx = tx1 * 8;
+        int bx1 = fmin((int)((tx1 + 1) * 8), xsize_blocks);
+        int rect_ysize = by1 - by;
+        int rect_xsize = bx1 - bx;
+        int tile_xsize = (xsize + 63) / 64 * 64;
+        int tile_ysize = (ysize + 63) / 64 * 64;
+        visited = 0;
+        for (int iy = 0; iy < rect_ysize; iy++) {
+            for (int ix = 0; ix < rect_xsize; ix++) {
+                char strategy;
+                int idx = iy * 8 + ix;
+                if (visited.range(idx, idx) == 0) {
+                    strategy = stream_strategy.read();
+                    if (strategy == 0) {
+                        visited.range(idx, idx) = 1;
+                        for (int y = 0; y < 8; y++) {
+                            for (int x = 0; x < 8; x++) {
+                                for (int c = 0; c < 3; c++) {
+                                    float tmp = stream_select.read();
+                                    int idx = ((ty1 * 64 + iy * 8 + y) * xsize) + (tx1 * 64 + ix * 8 + x);
+                                    if (c == 0) {
+                                        dcty_8x8[idx] = tmp;
+                                    } else if (c == 1) {
+                                        dctx_8x8[idx] = tmp;
+                                    } else if (c == 2) {
+                                        dctb_8x8[idx] = tmp;
+                                    }
+                                }
+                            }
+                        }
+                    } else if (strategy == 4) {
+                        for (int y = 0; y < 2; y++) {
+                            for (int x = 0; x < 2; x++) {
+                                int idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                            }
+                        }
+                        for (int y = 0; y < 16; y++) {
+                            for (int x = 0; x < 16; x++) {
+                                for (int c = 0; c < 3; c++) {
+                                    float tmp = stream_select.read();
+                                    int idx = ((ty1 * 64 + iy * 8 + y) * xsize) + (tx1 * 64 + ix * 8 + x);
+                                    if (c == 0) {
+                                        dcty_16x16[idx] = tmp;
+                                    } else if (c == 1) {
+                                        dctx_16x16[idx] = tmp;
+                                    } else if (c == 2) {
+                                        dctb_16x16[idx] = tmp;
+                                    }
+                                }
+                            }
+                        }
+                    } else if (strategy == 5) {
+                        for (int y = 0; y < 4; y++) {
+                            for (int x = 0; x < 4; x++) {
+                                int idx = (iy + y) * 8 + (ix + x);
+                                visited.range(idx, idx) = 1;
+                            }
+                        }
+                        for (int y = 0; y < 32; y++) {
+                            for (int x = 0; x < 32; x++) {
+                                for (int c = 0; c < 3; c++) {
+                                    float tmp = stream_select.read();
+                                    int idx = ((ty1 * 64 + iy * 8 + y) * xsize) + (tx1 * 64 + ix * 8 + x);
+                                    if (c == 0) {
+                                        dcty_32x32[idx] = tmp;
+                                    } else if (c == 1) {
+                                        dctx_32x32[idx] = tmp;
+                                    } else if (c == 2) {
+                                        dctb_32x32[idx] = tmp;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    // std::cout << "use count_s=" << count_s << ", count=" << count << std::endl;
+}
+
+void SetQuantField(uint16_t num_tile,
+                   float inv_global_scale,
+                   hls::stream<uint8_t>& stream_rectx,
+                   hls::stream<uint8_t>& stream_recty,
+                   hls::stream<float>& stream_rqf_org,
+                   hls::stream<uint8_t>& stream_strategy1,
+                   hls::stream<int>& stream_rqf) {
+    ap_uint<64> visited;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        visited = 0;
+        float rqf_array[64];
+    LOOP_1:
+        for (uint8_t iy = 0; iy < rect_ysize; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_2:
+            for (uint8_t ix = 0; ix < rect_xsize; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline
+                uint16_t index = iy * 8 + ix;
+                rqf_array[index] = stream_rqf_org.read();
+            }
+        }
+    LOOP_3:
+        for (uint8_t y = 0; y < rect_ysize; ++y) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+        LOOP_4:
+            for (uint8_t x = 0; x < rect_xsize; ++x) {
+#pragma HLS LOOP_TRIPCOUNT min = 8 max = 8
+#pragma HLS pipeline
+                float max = -3.40282e+038;
+                uint8_t idx = y * 8 + x;
+                if (visited.range(idx, idx) == 0) {
+                    uint8_t strategy = stream_strategy1.read();
+                    uint8_t b = strategy_block[strategy];
+                LOOP_5:
+                    for (uint8_t iy = 0; iy < b; iy++) {
+                    LOOP_6:
+                        for (uint8_t ix = 0; ix < b; ix++) {
+#pragma HLS pipeline
+                            uint16_t idx = (iy + y) * 8 + (ix + x);
+                            visited.range(idx, idx) = 1;
+                            float tmp = rqf_array[idx];
+                            max = fmax(tmp, max);
+                        }
+                    }
+                    float tmp = max;
+                    tmp = tmp * inv_global_scale + 0.5f;
+                    int16_t tmp_i = (int16_t)tmp;
+                    tmp_i = tmp_i > 256 ? 256 : tmp_i;
+                    int16_t val = tmp_i > 1 ? tmp_i : 1;
+                    stream_rqf.write(val);
+                }
+            }
+        }
+    }
+}
+
+template <int N>
+void DupDCT(uint16_t num_tile,
+            hls::stream<uint8_t>& stream_rectx,
+            hls::stream<uint8_t>& stream_recty,
+            hls::stream<uint8_t>& stream_rectx_out0,
+            hls::stream<uint8_t>& stream_recty_out0,
+            hls::stream<uint8_t>& stream_rectx_out1,
+            hls::stream<uint8_t>& stream_recty_out1,
+            hls::stream<float>& stream_dctin,
+            hls::stream<float>& stream_dctout0,
+            hls::stream<float>& stream_dctout1) {
+    uint8_t block_n = N * N;
+    const uint16_t size = 64 * block_n;
+LOOP_0:
+    for (uint16_t tid = 0; tid < num_tile; tid++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+        uint8_t rect_ysize = stream_recty.read();
+        uint8_t rect_xsize = stream_rectx.read();
+        stream_recty_out0.write(rect_ysize);
+        stream_rectx_out0.write(rect_xsize);
+        stream_recty_out1.write(rect_ysize);
+        stream_rectx_out1.write(rect_xsize);
+    LOOP_1:
+        for (uint8_t iy = 0; iy + N - 1 < rect_ysize; iy += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+        LOOP_2:
+            for (uint8_t ix = 0; ix + N - 1 < rect_xsize; ix += N) {
+#pragma HLS LOOP_TRIPCOUNT min = 2 max = 2
+            LOOP_3:
+                for (uint8_t c = 0; c < 3; c++) {
+#pragma HLS LOOP_TRIPCOUNT min = 3 max = 3
+                LOOP_4:
+                    for (uint16_t i = 0; i < size; i += 1) {
+#pragma HLS LOOP_TRIPCOUNT min = 1024 max = 1024
+#pragma HLS pipeline II = 1
+                        float tmp = stream_dctin.read();
+                        stream_dctout0.write(tmp);
+                        stream_dctout1.write(tmp);
+                    }
+                }
+            }
+        }
+    }
+}
+
+void ComputeTileACSHLS(uint16_t num_tile,
+                       short ysize,
+                       short xsize,
+                       float cost1,
+                       float butteraugli_target,
+                       float inv_global_scale,
+                       float mul8x8,
+                       float mul16x16,
+                       float mul32x32,
+                       hls::stream<uint8_t>& stream_rectx_acs,
+                       hls::stream<uint8_t>& stream_recty_acs,
+                       hls::stream<float>& stream_rqf_org,
+                       hls::stream<float>& stream_q_org,
+                       hls::stream<float>& stream_mask_org,
+                       hls::stream<float>& stream_dctin8,
+                       hls::stream<float>& stream_dctin16,
+                       hls::stream<float>& stream_dctin32,
+                       hls::stream<uint8_t>& stream_strategy,
+                       hls::stream<float>& stream_select,
+                       hls::stream<int>& stream_rqf) {
+#pragma HLS INLINE
+    hls::stream<uint8_t> stream_rectx0("rectx0");
+#pragma HLS stream variable = stream_rectx0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx0 type = fifo
+    hls::stream<uint8_t> stream_recty0("recty0");
+#pragma HLS stream variable = stream_recty0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty0 type = fifo
+
+    hls::stream<uint8_t> stream_rectx1("rectx1");
+#pragma HLS stream variable = stream_rectx1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx1 type = fifo
+    hls::stream<uint8_t> stream_recty1("recty1");
+#pragma HLS stream variable = stream_recty1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty1 type = fifo
+
+    hls::stream<uint8_t> stream_rectx2("rectx2");
+#pragma HLS stream variable = stream_rectx2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx2 type = fifo
+    hls::stream<uint8_t> stream_recty2("recty2");
+#pragma HLS stream variable = stream_recty2 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty2 type = fifo
+
+    hls::stream<uint8_t> stream_rectx3("rectx3");
+#pragma HLS stream variable = stream_rectx3 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx3 type = fifo
+    hls::stream<uint8_t> stream_recty3("recty3");
+#pragma HLS stream variable = stream_recty3 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty3 type = fifo
+
+    hls::stream<uint8_t> stream_rectx4("rectx4");
+#pragma HLS stream variable = stream_rectx4 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx4 type = fifo
+    hls::stream<uint8_t> stream_recty4("recty4");
+#pragma HLS stream variable = stream_recty4 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty4 type = fifo
+
+    hls::stream<uint8_t> stream_rectx5("rectx5");
+#pragma HLS stream variable = stream_rectx5 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx5 type = fifo
+    hls::stream<uint8_t> stream_recty5("recty5");
+#pragma HLS stream variable = stream_recty5 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty5 type = fifo
+
+    hls::stream<uint8_t> stream_rectx6("rectx6");
+#pragma HLS stream variable = stream_rectx6 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx6 type = fifo
+    hls::stream<uint8_t> stream_recty6("recty6");
+#pragma HLS stream variable = stream_recty6 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty6 type = fifo
+
+    hls::stream<uint8_t> stream_rectx7("rectx7");
+#pragma HLS stream variable = stream_rectx7 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx7 type = fifo
+    hls::stream<uint8_t> stream_recty7("recty7");
+#pragma HLS stream variable = stream_recty7 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty7 type = fifo
+
+    hls::stream<uint8_t> stream_rectx8("rectx8");
+#pragma HLS stream variable = stream_rectx8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx8 type = fifo
+    hls::stream<uint8_t> stream_recty8("recty8");
+#pragma HLS stream variable = stream_recty8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty8 type = fifo
+
+    hls::stream<uint8_t> stream_rectx9("rectx9");
+#pragma HLS stream variable = stream_rectx9 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx9 type = fifo
+    hls::stream<uint8_t> stream_recty9("recty9");
+#pragma HLS stream variable = stream_recty9 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty9 type = fifo
+
+    hls::stream<uint8_t> stream_rectx10("rectx10");
+#pragma HLS stream variable = stream_rectx10 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx10 type = fifo
+    hls::stream<uint8_t> stream_recty10("recty10");
+#pragma HLS stream variable = stream_recty10 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty10 type = fifo
+
+    hls::stream<uint8_t> stream_rectx11("rectx11");
+#pragma HLS stream variable = stream_rectx11 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx11 type = fifo
+    hls::stream<uint8_t> stream_recty11("recty11");
+#pragma HLS stream variable = stream_recty11 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty11 type = fifo
+
+    hls::stream<uint8_t> stream_rectx12("rectx12");
+#pragma HLS stream variable = stream_rectx12 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_rectx12 type = fifo
+    hls::stream<uint8_t> stream_recty12("recty12");
+#pragma HLS stream variable = stream_recty12 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_recty12 type = fifo
+
+    hls::stream<float> stream_dctin8_0("dctin8_0");
+#pragma HLS stream variable = stream_dctin8_0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin8_0 type = fifo
+    hls::stream<float> stream_dctin16_0("dctin16_0");
+#pragma HLS stream variable = stream_dctin16_0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin16_0 type = fifo
+    hls::stream<float> stream_dctin32_0("dctin32_0");
+#pragma HLS stream variable = stream_dctin32_0 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin32_0 type = fifo
+    hls::stream<float> stream_dctin8_1("dctin8_1");
+#pragma HLS stream variable = stream_dctin8_1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin8_1 type = fifo
+    hls::stream<float> stream_dctin16_1("dctin16_1");
+#pragma HLS stream variable = stream_dctin16_1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin16_1 type = fifo
+    hls::stream<float> stream_dctin32_1("dctin32_1");
+#pragma HLS stream variable = stream_dctin32_1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_dctin32_1 type = fifo
+
+    hls::stream<float> stream_entropy_8("entropy_8");
+#pragma HLS stream variable = stream_entropy_8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_8 type = fifo
+    hls::stream<float> stream_entropy_16("entropy_16");
+#pragma HLS stream variable = stream_entropy_16 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_16 type = fifo
+    hls::stream<float> stream_entropy_32("entropy_32");
+#pragma HLS stream variable = stream_entropy_32 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_entropy_32 type = fifo
+
+    hls::stream<bool> stream_con("con");
+#pragma HLS stream variable = stream_con depth = 2
+#pragma HLS BIND_STORAGE variable = stream_con type = fifo
+    hls::stream<bool> stream_ok("ok");
+#pragma HLS stream variable = stream_ok depth = 2
+#pragma HLS BIND_STORAGE variable = stream_ok type = fifo
+    hls::stream<bool> stream_con8("con8");
+#pragma HLS stream variable = stream_con8 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_con8 type = fifo
+    hls::stream<bool> stream_ok8("ok");
+#pragma HLS stream variable = stream_ok8 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_ok8 type = fifo
+    hls::stream<bool> stream_con16("con16");
+#pragma HLS stream variable = stream_con16 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_con16 type = fifo
+    hls::stream<bool> stream_ok16("ok16");
+#pragma HLS stream variable = stream_ok16 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_ok16 type = fifo
+    hls::stream<bool> stream_con32("con32");
+#pragma HLS stream variable = stream_con32 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_con32 type = fifo
+    hls::stream<bool> stream_ok32("ok32");
+#pragma HLS stream variable = stream_ok32 depth = 2
+#pragma HLS BIND_STORAGE variable = stream_ok32 type = fifo
+
+    hls::stream<uint8_t> stream_strategy1("strategy1");
+#pragma HLS stream variable = stream_strategy1 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_strategy1 type = fifo
+
+    hls::stream<float> stream_q_org_8("q_org_8");
+#pragma HLS stream variable = stream_q_org_8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q_org_8 type = fifo
+    hls::stream<float> stream_mask_org_8("mask_org_8");
+#pragma HLS stream variable = stream_mask_org_8 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask_org_8 type = fifo
+    hls::stream<float> stream_q_org_16("q_org_8");
+#pragma HLS stream variable = stream_q_org_16 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q_org_16 type = fifo
+    hls::stream<float> stream_mask_org_16("mask_org_8");
+#pragma HLS stream variable = stream_mask_org_16 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask_org_16 type = fifo
+    hls::stream<float> stream_q_org_32("q_org_8");
+#pragma HLS stream variable = stream_q_org_32 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_q_org_32 type = fifo
+    hls::stream<float> stream_mask_org_32("mask_org_8");
+#pragma HLS stream variable = stream_mask_org_32 depth = 1024
+#pragma HLS BIND_STORAGE variable = stream_mask_org_32 type = fifo
+
+// #pragma HLS dataflow
+
+#ifdef __SYNTHESIS__
+    uint8_t strategy_ping[64];
+    uint8_t strategy_pang[64];
+#pragma HLS bind_storage variable = strategy_ping type = RAM_T2P impl = bram
+#pragma HLS shared variable = strategy_ping
+#pragma HLS stable variable = strategy_ping
+#pragma HLS bind_storage variable = strategy_pang type = RAM_T2P impl = bram
+#pragma HLS shared variable = strategy_pang
+#pragma HLS stable variable = strategy_pang
+#else
+    uint8_t* strategy_ping = (uint8_t*)malloc(sizeof(uint8_t) * 64 * 32 * 32);
+    uint8_t* strategy_pang = (uint8_t*)malloc(sizeof(uint8_t) * 64 * 32 * 32);
+#endif
+
+#ifdef __SYNTHESIS__
+    float ping8[3 * 64 * 64];
+    float pang8[3 * 64 * 64];
+    float ping16[3 * 64 * 64];
+    float pang16[3 * 64 * 64];
+    float ping32[3 * 64 * 64];
+    float pang32[3 * 64 * 64];
+#pragma HLS bind_storage variable = ping8 type = RAM_T2P impl = uram
+#pragma HLS shared variable = ping8
+#pragma HLS stable variable = ping8
+#pragma HLS bind_storage variable = pang8 type = RAM_T2P impl = uram
+#pragma HLS shared variable = pang8
+#pragma HLS stable variable = pang8
+#pragma HLS bind_storage variable = ping16 type = RAM_T2P impl = uram
+#pragma HLS shared variable = ping16
+#pragma HLS stable variable = ping16
+#pragma HLS bind_storage variable = pang16 type = RAM_T2P impl = uram
+#pragma HLS shared variable = pang16
+#pragma HLS stable variable = pang16
+#pragma HLS bind_storage variable = ping32 type = RAM_T2P impl = uram
+#pragma HLS shared variable = ping32
+#pragma HLS stable variable = ping32
+#pragma HLS bind_storage variable = pang32 type = RAM_T2P impl = uram
+#pragma HLS shared variable = pang32
+#pragma HLS stable variable = pang32
+#else
+    float* ping8 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* pang8 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* ping16 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* pang16 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* ping32 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+    float* pang32 = (float*)malloc(sizeof(float) * 2048 * 2048 * 3);
+#endif
+
+    // #pragma HLS dataflow
+    GetACSSize(xsize, ysize, stream_rectx_acs, stream_recty_acs, stream_rectx0, stream_recty0, stream_rectx1,
+               stream_recty1, stream_rectx2, stream_recty2, stream_rectx3, stream_recty3, stream_rectx10,
+               stream_recty10);
+
+    DupQuantAndMask(num_tile, stream_rectx0, stream_recty0, stream_q_org, stream_mask_org, stream_q_org_8,
+                    stream_mask_org_8, stream_q_org_16, stream_mask_org_16, stream_q_org_32, stream_mask_org_32);
+
+    DupDCT<1>(num_tile, stream_rectx1, stream_recty1, stream_rectx4, stream_recty4, stream_rectx7, stream_recty7,
+              stream_dctin8, stream_dctin8_0, stream_dctin8_1);
+
+    DupDCT<2>(num_tile, stream_rectx2, stream_recty2, stream_rectx5, stream_recty5, stream_rectx8, stream_recty8,
+              stream_dctin16, stream_dctin16_0, stream_dctin16_1);
+
+    DupDCT<4>(num_tile, stream_rectx3, stream_recty3, stream_rectx6, stream_recty6, stream_rectx9, stream_recty9,
+              stream_dctin32, stream_dctin32_0, stream_dctin32_1);
+
+    EstimateEntropy8(num_tile, cost1, mul8x8, stream_rectx4, stream_recty4, stream_q_org_8, stream_mask_org_8,
+                     stream_dctin8_0, stream_entropy_8);
+
+    EstimateEntropy16(num_tile, cost1, mul16x16, stream_rectx5, stream_recty5, stream_q_org_16, stream_mask_org_16,
+                      stream_dctin16_0, stream_entropy_16);
+
+    EstimateEntropy32(num_tile, cost1, mul32x32, stream_rectx6, stream_recty6, stream_q_org_32, stream_mask_org_32,
+                      stream_dctin32_0, stream_entropy_32);
+
+    CompareEntropy(num_tile, stream_rectx10, stream_recty10, stream_rectx11, stream_recty11, stream_entropy_8,
+                   stream_entropy_16, stream_entropy_32, strategy_ping, strategy_pang, stream_con, stream_ok);
+
+    BufferN<1>(num_tile, ping8, pang8, stream_rectx7, stream_recty7, stream_dctin8_1, stream_con8, stream_ok8);
+
+    BufferN<2>(num_tile, ping16, pang16, stream_rectx8, stream_recty8, stream_dctin16_1, stream_con16, stream_ok16);
+
+    BufferN<4>(num_tile, ping32, pang32, stream_rectx9, stream_recty9, stream_dctin32_1, stream_con32, stream_ok32);
+
+    Reorder(num_tile, ping8, pang8, ping16, pang16, ping32, pang32, strategy_ping, strategy_pang, stream_rectx11,
+            stream_recty11, stream_rectx12, stream_recty12, stream_con, stream_ok, stream_con8, stream_ok8,
+            stream_con16, stream_ok16, stream_con32, stream_ok32, stream_strategy, stream_strategy1, stream_select);
+
+    SetQuantField(num_tile, inv_global_scale, stream_rectx12, stream_recty12, stream_rqf_org, stream_strategy1,
+                  stream_rqf);
+}
+
+void GetSourceSize(short xsize,
+                   short ysize,
+                   hls::stream<uint8_t>& stream_rectx_dct,
+                   hls::stream<uint8_t>& stream_recty_dct,
+                   hls::stream<uint8_t>& stream_rectx_acs,
+                   hls::stream<uint8_t>& stream_recty_acs,
+                   hls::stream<uint8_t>& stream_rectx_dc,
+                   hls::stream<uint8_t>& stream_recty_dc) {
+    uint16_t xsize_blocks = xsize / 8;
+    uint16_t ysize_blocks = ysize / 8;
+LOOP_0:
+    for (uint16_t y = 0; y < Div_Ceil(ysize_blocks, 8); y++) {
+    LOOP_1:
+        for (uint16_t x = 0; x < Div_Ceil(xsize_blocks, 8); x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 64 max = 64
+            uint16_t by = y * 8;
+            uint16_t by1 = ((y + 1) * 8) < ysize_blocks ? ((y + 1) * 8) : ysize_blocks;
+            uint16_t bx = x * 8;
+            uint16_t bx1 = ((x + 1) * 8) < xsize_blocks ? ((x + 1) * 8) : xsize_blocks;
+            uint8_t rect_ysize = by1 - by;
+            uint8_t rect_xsize = bx1 - bx;
+            stream_rectx_dct.write(rect_xsize);
+            stream_recty_dct.write(rect_ysize);
+            stream_rectx_acs.write(rect_xsize);
+            stream_recty_acs.write(rect_ysize);
+            stream_rectx_dc.write(rect_xsize);
+            stream_recty_dc.write(rect_ysize);
+        }
+    }
+}
+
+//=========================================================//
+// data flow region
+//=========================================================//
+void hls_lossy_enc_compute_dataflow(
+    // config
+    uint32_t ysize,
+    uint32_t xsize,
+    int masking_field_stride,
+    int quant_field_stride,
+    float butteraugli_target,
+    float cost1,
+    float inv_global_scale,
+    float* hls_opsin_1,          // mm1, input
+    float* hls_opsin_2,          // mm2, input
+    float* hls_opsin_3,          // mm3, input
+    float* quant_field_row,      // mm4, input
+    float* masking_field_row,    // mm5, input
+    float* aq_map_f,             // mm6, input
+    int8_t* cmap_axi,            // mm7, output
+    int* ac_coef_axiout,         // mm8, output
+    unsigned char* strategy_all, // mm9, output
+    int* raw_quant_field_i,      // mm10, output
+    float* hls_dc8x8,            // mm11, output
+    float* hls_dc16x16,          // mm12, output
+    float* hls_dc32x32,          // mm13, output
+    int32_t num_zeros[3][320],
+    hls::stream<ap_uint<3>, 2>& used_orders_strm) {
+#pragma HLS DATAFLOW
+
+    int tile_xsize = (xsize + 63) / 64 * 64;
+    int tile_ysize = (ysize + 63) / 64 * 64;
+    int xnum_tile = (xsize + 63) / 64;
+    int ynum_tile = (ysize + 63) / 64;
+    int num_tile = xnum_tile * ynum_tile;
+    hls::stream<uint8_t, 1024> stream_rectx_dct;
+    hls::stream<uint8_t, 1024> stream_recty_dct;
+    hls::stream<uint8_t, 1024> stream_rectx_acs;
+    hls::stream<uint8_t, 1024> stream_recty_acs;
+    hls::stream<uint8_t, 1024> stream_rectx_dc;
+    hls::stream<uint8_t, 1024> stream_recty_dc;
+    GetSourceSize(xsize, ysize, stream_rectx_dct, stream_recty_dct, stream_rectx_acs, stream_recty_acs, stream_rectx_dc,
+                  stream_recty_dc);
+
+    // load data
+    hls::stream<float, 4096> stream_q_org("q_org");
+    hls::stream<float, 4096> stream_mask_org("mask_org");
+    hls::stream<float, 4096> stream_rqf_org("rqf_org");
+    load_rqf_mask(xsize, ysize, aq_map_f, masking_field_row, quant_field_row, quant_field_stride, stream_q_org,
+                  stream_mask_org, stream_rqf_org);
+
+    // load pixel
+    hls::stream<float, 4096> opsin8x8_stream;
+    hls::stream<float, 4096> opsin16x16_stream;
+    hls::stream<float, 4096> opsin32x32_stream;
+    loadPixel(ysize, xsize, hls_opsin_1, hls_opsin_2, hls_opsin_3, opsin8x8_stream, opsin16x16_stream,
+              opsin32x32_stream);
+
+    // 1. dct8x8, dct16x16, dct32x32
+    hls::stream<float, 4096> ac_coef8x8_stream("ac_coef8");
+    hls::stream<float, 4096> ac_coef16x16_stream("ac_coef16");
+    hls::stream<float, 4096> ac_coef32x32_stream("ac_coef32");
+
+    hls::stream<float, 4096> dc_coef8x8_stream("dc_coef8");
+    hls::stream<float, 4096> dc_coef16x16_stream("dc_coef16");
+    hls::stream<float, 4096> dc_coef32x32_stream("dc_coef32");
+    hls_dct_top(ysize, xsize, stream_rectx_dct, stream_recty_dct, opsin8x8_stream, opsin16x16_stream, opsin32x32_stream,
+                ac_coef8x8_stream, ac_coef16x16_stream, ac_coef32x32_stream, dc_coef8x8_stream, dc_coef16x16_stream,
+                dc_coef32x32_stream);
+
+    // 2. ac strategy
+    float k8x8mul1 = -0.55;
+    float k8x8mul2 = 1.0735757687292623f;
+    float k8x8base = 1.4;
+    float mul8x8 = k8x8mul2 + k8x8mul1 / (butteraugli_target + k8x8base);
+    float k16X16mul1 = -0.35;
+    float k16X16mul2 = 0.82098067020252011;
+    float k16X16base = 2.0;
+    float entropy_mul16X16 = k16X16mul2 + k16X16mul1 / (butteraugli_target + k16X16base);
+    float entropy_mul32X32 = 0.9188333021616017f;
+    hls::stream<uint8_t, 4096> acs_stream;
+    hls::stream<float, 4096> dct_select_stream;
+    hls::stream<uint8_t, 4096> acs_out_stream("acs_out_stream");
+    hls::stream<int, 4096> rqf_out_stream("rqf_out_stream");
+    hls::stream<int, 4096> rqf_out_stream2("rqf_out_stream2");
+    ComputeTileACSHLS((uint16_t)num_tile, (short)ysize, (short)xsize, cost1, butteraugli_target, inv_global_scale,
+                      mul8x8, entropy_mul16X16, entropy_mul32X32, stream_rectx_acs, stream_recty_acs, stream_rqf_org,
+                      stream_q_org, stream_mask_org, ac_coef8x8_stream, ac_coef16x16_stream, ac_coef32x32_stream,
+                      acs_stream, dct_select_stream, rqf_out_stream);
+
+    // 3. cfl heuristic
+    hls::stream<int8_t, 4096> cmapx_stream("cmapx_stream");
+    hls::stream<int8_t, 4096> cmapb_stream("cmapb_stream");
+    hls::stream<int8_t, 4096> cmapx_axi_stream("cmpax_axi_stream");
+    hls::stream<int8_t, 4096> cmapb_axi_stream("cmapb_axi_stream");
+
+    hls::stream<int> rqf_cfl_stream("rqf_cfl_stream");
+#pragma HLS stream variable = rqf_cfl_stream depth = 4096 * 6
+#pragma HLS BIND_STORAGE variable = rqf_cfl_stream type = fifo impl = uram
+    hls::stream<uint8_t> acs_cfl_stream("acs_cfl_stream");
+#pragma HLS stream variable = acs_cfl_stream depth = 4096 * 6
+#pragma HLS BIND_STORAGE variable = acs_cfl_stream type = fifo impl = uram
+    hls::stream<float> ac_coef_cfl_stream("ac_coef_cfl_stream");
+#pragma HLS stream variable = ac_coef_cfl_stream depth = 4096 * 6
+#pragma HLS BIND_STORAGE variable = ac_coef_cfl_stream type = fifo impl = uram
+
+    hls_CFLComputeTile(xsize, ysize, dct_select_stream, rqf_out_stream, acs_stream, cmapx_stream, cmapb_stream,
+                       cmapx_axi_stream, cmapb_axi_stream, ac_coef_cfl_stream, rqf_cfl_stream, acs_cfl_stream);
+
+    // 4. ComputeCoefficients
+    hls::stream<uint8_t, 4096> acs_coeff_stream1("acs_coeff_stream1");
+    hls::stream<int, 4096> ac_quant_coeff_stream("ac_quant_coeff_stream");
+    hls::stream<int, 4096> coeff_axi_stream("coeff_axi_stream");
+    hls::stream<uint8_t, 4096> acs_axi_stream("acs_axi_stream");
+    hls::stream<int, 4096> qf_axi_stream("qf_axi_stream");
+    hls_ComputeCoefficients(xsize, ysize, acs_cfl_stream, ac_coef_cfl_stream, rqf_cfl_stream, cmapx_stream,
+                            cmapb_stream, acs_coeff_stream1, ac_quant_coeff_stream, coeff_axi_stream, acs_axi_stream,
+                            qf_axi_stream);
+
+    // 5. ComputeAllCoeffOrders
+    count_numzeros(xsize, ysize, acs_coeff_stream1, ac_quant_coeff_stream, used_orders_strm, num_zeros);
+
+    // 6. axi writeout
+    dc_writeout(ysize, xsize, hls_dc8x8, hls_dc16x16, hls_dc32x32, stream_rectx_dc, stream_recty_dc, dc_coef8x8_stream,
+                dc_coef16x16_stream, dc_coef32x32_stream);
+    cfl_writeout(xsize, ysize, cmapx_axi_stream, cmapb_axi_stream, cmap_axi);
+    ac_coeff_writeout(xsize, ysize, coeff_axi_stream, ac_coef_axiout);
+    acs_rqf_writeout(xsize, ysize, strategy_all, raw_quant_field_i, acs_axi_stream, qf_axi_stream);
+}
+
+void lossy_acc::lossyEncComp(int config[MAX_NUM_CONFIG],      // mm15, input
+                             float config_fl[MAX_NUM_CONFIG], // mm16, input
+                             float* hls_opsin_1,              // mm1, input
+                             float* hls_opsin_2,              // mm2, input
+                             float* hls_opsin_3,              // mm3, input
+                             float* quant_field_row,          // mm4, input
+                             float* masking_field_row,        // mm5, input
+                             float* aq_map_f,                 // mm6, input
+                             int8_t* cmap_axi,                // mm7, output
+                             int* ac_coef_axiout,             // mm8, output
+                             unsigned char* strategy_all,     // mm9, output
+                             int* raw_quant_field_i,          // mm10, output
+                             uint32_t* hls_order,             // mm11, output
+                             float* hls_dc8x8,                // mm12, output
+                             float* hls_dc16x16,              // mm13, output
+                             float* hls_dc32x32               // mm14, output
+                             ) {
+#pragma HLS INTERFACE mode = m_axi bundle = mm1 latency = 32 offset = direct num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_opsin_1
+#pragma HLS INTERFACE mode = m_axi bundle = mm2 latency = 32 offset = direct num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_opsin_2
+#pragma HLS INTERFACE mode = m_axi bundle = mm3 latency = 32 offset = direct num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_opsin_3
+#pragma HLS INTERFACE mode = m_axi bundle = mm4 latency = 32 offset = direct num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =       \
+        BLOCK8_H* BLOCK8_W port = quant_field_row
+#pragma HLS INTERFACE mode = m_axi bundle = mm5 latency = 32 offset = direct num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =       \
+        BLOCK8_H* BLOCK8_W port = masking_field_row
+#pragma HLS INTERFACE mode = m_axi bundle = mm6 latency = 32 offset = direct num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =       \
+        BLOCK8_H* BLOCK8_W port = aq_map_f
+#pragma HLS INTERFACE mode = m_axi bundle = mm7 latency = 32 offset = direct num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =       \
+        TILE_W* TILE_H* 2 port = cmap_axi
+#pragma HLS INTERFACE mode = m_axi bundle = mm8 latency = 32 offset = direct num_write_outstanding =            \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        ac_coef_axiout
+#pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = direct num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =       \
+        BLOCK8_W* BLOCK8_H port = strategy_all
+#pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = direct num_write_outstanding = \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth =        \
+        BLOCK8_H* BLOCK8_W port = raw_quant_field_i
+#pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = direct num_write_outstanding =           \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_ORDER port = \
+        hls_order
+#pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = direct num_write_outstanding =           \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_dc8x8
+#pragma HLS INTERFACE mode = m_axi bundle = mm13 latency = 32 offset = direct num_write_outstanding =           \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_dc16x16
+#pragma HLS INTERFACE mode = m_axi bundle = mm14 latency = 32 offset = direct num_write_outstanding =           \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        hls_dc32x32
+#pragma HLS INTERFACE mode = m_axi bundle = mm15 latency = 32 offset = direct num_write_outstanding =                \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_NUM_CONFIG port = \
+        config
+#pragma HLS INTERFACE mode = m_axi bundle = mm16 latency = 32 offset = direct num_write_outstanding =                \
+    1 num_read_outstanding = 64 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_NUM_CONFIG port = \
+        config_fl
+
+    // global config
+    uint32_t ysize = config[0];
+    uint32_t xsize = config[1];
+    int masking_field_stride = config[2];
+    int quant_field_stride = config[3];
+    float butteraugli_target = config_fl[0];
+    float cost1 = config_fl[1];
+    float inv_global_scale = config_fl[2];
+    int32_t num_zeros[3][320];
+#pragma HLS BIND_STORAGE type = ram_2p variable = num_zeros impl = BRAM
+
+    // Non-Dataflow region: initialization zeros
+    init_numzeros(num_zeros);
+
+    // Dataflow region: enc_compute
+    hls::stream<ap_uint<3>, 2> used_orders_strm;
+    hls_lossy_enc_compute_dataflow(ysize, xsize, masking_field_stride, quant_field_stride, butteraugli_target, cost1,
+                                   inv_global_scale, hls_opsin_1, hls_opsin_2, hls_opsin_3, quant_field_row,
+                                   masking_field_row, aq_map_f, cmap_axi, ac_coef_axiout, strategy_all,
+                                   raw_quant_field_i, hls_dc8x8, hls_dc16x16, hls_dc32x32, num_zeros, used_orders_strm);
+
+    // Non-Dataflow region: compute orders
+    order_finalize_dataflow(used_orders_strm, num_zeros, hls_order);
+}
+
+// ------------------------------------------------------------
+/**
+ * @brief Level 2 : kernel implement for JXL lossy frame encode computing
+ *
+ * @param config the int config signal, such as image size, field stride and etc.
+ * @param config_fl the floating config signal, such as cost, inv_global_scale and etc.
+ * @param hls_opsin_1 the input RGB image data for channnel-1.
+ * @param hls_opsin_2 the input RGB image data for channnel-2.
+ * @param hls_opsin_3 the input RGB image data for channnel-3.
+ * @param quant_field_row the initial quant_filed data.
+ * @param masking_filed_row the initial masking_filed data.
+ * @param aq_map_f the initial adjust quant map data.
+ * @param cmap_axi the output of color correlation map.
+ * @param ac_coef_axiout the output of quanted AC coefficients.
+ * @param strategy_all the output of strategy for each block in image
+ * @param raw_quant_field_i the output of computed raw_quant_field
+ * @param hls_order the output of orders for each block in image
+ * @param hls_dc8x8 the DC coefficients output for 8x8 blocks
+ * @param hls_dc16x16 the DC coefficients output for 16x16 blocks
+ * @param hls_dc32x32 the DC coefficients output for 32x32 blocks
+ */
+// ------------------------------------------------------------
+
+void lossy_acc::compute(int* config,
+                        float* config_fl,
+                        float* hls_opsin_1,
+                        float* hls_opsin_2,
+                        float* hls_opsin_3,
+                        float* quant_field_row,
+                        float* masking_field_row,
+                        float* aq_map_f,
+                        int8_t* cmap_axi,
+                        int* ac_coef_axiout,
+                        unsigned char* strategy_all,
+                        int* raw_quant_field_i,
+                        uint32_t* hls_order,
+                        float* hls_dc8x8,
+                        float* hls_dc16x16,
+                        float* hls_dc32x32) {
+    lossyEncComp(config, config_fl, hls_opsin_1, hls_opsin_2, hls_opsin_3, quant_field_row, masking_field_row, aq_map_f,
+                 cmap_axi, ac_coef_axiout, strategy_all, raw_quant_field_i, hls_order, hls_dc8x8, hls_dc16x16,
+                 hls_dc32x32);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel/hls_lossy_enc_compute.hpp b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel/hls_lossy_enc_compute.hpp
new file mode 100644
index 0000000000..3263f1841d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/kernel/hls_lossy_enc_compute.hpp
@@ -0,0 +1,4011 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HLS_LOSSY_ENC_COMPUTE_HPP
+#define HLS_LOSSY_ENC_COMPUTE_HPP
+
+#include "vpp_acc.hpp"
+
+#include <stdint.h>
+#include <ap_int.h>
+#include <hls_stream.h>
+#include "stddef.h"
+#include <algorithm>
+#include <cmath>
+
+typedef ap_fixed<38, 24> ca_x_t;
+typedef ap_fixed<38, 24> cb_x_t;
+typedef ap_fixed<38, 18> ca_b_t;
+typedef ap_fixed<38, 21> cb_b_t;
+
+enum Type {
+    // Regular block size DCT
+    DCT = 0,
+    // Encode pixels without transforming
+    IDENTITY = 1,
+    // Use 2-by-2 DCT
+    DCT2X2 = 2,
+    // Use 4-by-4 DCT
+    DCT4X4 = 3,
+    // Use 16-by-16 DCT
+    DCT16X16 = 4,
+    // Use 32-by-32 DCT
+    DCT32X32 = 5
+};
+
+template <typename T>
+T DivCeil(T a, size_t b) {
+    return (a + b - 1) / b; // 8
+}
+
+template <typename I, typename F>
+inline F bitsToF(I in) {
+    union {
+        I __I;
+        F __F;
+    } __T;
+    __T.__I = in;
+    return __T.__F;
+}
+
+template <typename F, typename I>
+inline I fToBits(F in) {
+    union {
+        I __I;
+        F __F;
+    } __T;
+    __T.__F = in;
+    return __T.__I;
+}
+
+template <typename MType, typename DType>
+union cast;
+
+template <typename DT>
+union cast<DT, int8_t> {
+    DT f;
+    int8_t i;
+};
+
+template <typename DT>
+union cast<DT, int32_t> {
+    DT f;
+    int32_t i;
+};
+
+template <typename DT>
+union cast<DT, uint32_t> {
+    DT f;
+    uint32_t i;
+};
+
+template <typename DT>
+union cast<DT, int64_t> {
+    DT f;
+    int64_t i;
+};
+
+const int PIXEL_W = 2048;
+const int PIXEL_H = 2048;
+const int FRAME_DIM = 3;
+const int ALL_PIXEL = PIXEL_W * PIXEL_H * FRAME_DIM;
+const int BLOCK8_W = PIXEL_W / 8;
+const int BLOCK8_H = PIXEL_H / 8;
+const int BLOCK8_NUM = BLOCK8_W * BLOCK8_H * FRAME_DIM;
+const int TILE_W = PIXEL_W / 64;
+const int TILE_H = PIXEL_H / 64;
+const int MAX_ORDER = 320 * 3 + 1;
+const int MAX_NUM_CONFIG = 32;
+
+const size_t kBlockDim = 8;
+const size_t kColorTileDim = 64;
+const size_t kDCTBlockSize = 64;
+const size_t kEncTileDimInBlocks = 8;
+const int kGlobalScaleDenom = 1 << 16;
+const size_t kColorTileDimInBlocks = 8; // kColorTileDim / kBlockDim
+const int global_scale = 4587;          // global_scale_(global_scale)
+const float global_scale_float = global_scale * (1.0 / kGlobalScaleDenom);
+const float inv_global_scale = 1.0 * kGlobalScaleDenom / global_scale;
+
+static const uint8_t kDefaultColorFactor = 84;
+static float color_scale = 1.0f / (uint32_t)kDefaultColorFactor;
+static const float kYToBRatio = 1.0f;
+static float base_correlation_x = 0.0f;
+static float base_correlation_b = kYToBRatio;
+
+static const float kDefaultQuantBias[4] = {
+    1.0f - 0.05465007330715401f, 1.0f - 0.07005449891748593f, 1.0f - 0.049935103337343655f, 0.145f,
+};
+
+const float qmx8x8[64] = {0,
+                          3150,
+                          3139.258544921875,
+                          2648.63037109375,
+                          2234.68115234375,
+                          1885.427490234375,
+                          1590.758056640625,
+                          1342.1417236328125,
+                          3150,
+                          3150,
+                          3015.8095703125,
+                          2576.583984375,
+                          2188.4150390625,
+                          1853.965576171875,
+                          1568.5406494140625,
+                          1326.029296875,
+                          3139.258544921875,
+                          3015.8095703125,
+                          2726.995361328125,
+                          2389.616455078125,
+                          2062.382568359375,
+                          1765.966552734375,
+                          1505.3934326171875,
+                          1279.74853515625,
+                          2648.63037109375,
+                          2576.583984375,
+                          2389.616455078125,
+                          2144.407470703125,
+                          1885.427490234375,
+                          1637.12109375,
+                          1410.3748779296875,
+                          1208.7896728515625,
+                          2234.68115234375,
+                          2188.4150390625,
+                          2062.382568359375,
+                          1885.427490234375,
+                          1686.2821044921875,
+                          1485.4266357421875,
+                          1294.8450927734375,
+                          1060.5933837890625,
+                          1885.427490234375,
+                          1853.965576171875,
+                          1765.966552734375,
+                          1637.12109375,
+                          1485.4266357421875,
+                          1326.029296875,
+                          1169.4920654296875,
+                          785.9630126953125,
+                          1590.758056640625,
+                          1568.5406494140625,
+                          1505.3934326171875,
+                          1410.3748779296875,
+                          1294.8450927734375,
+                          1169.4920654296875,
+                          838.70172119140625,
+                          558.03729248046875,
+                          1342.1417236328125,
+                          1326.029296875,
+                          1279.74853515625,
+                          1208.7896728515625,
+                          1060.5933837890625,
+                          785.9630126953125,
+                          558.03729248046875,
+                          382.654693603515625};
+const float qmb8x8[64] = {0,
+                          293.959503173828125,
+                          169.4699554443359375,
+                          119.41248321533203125,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          83.5508270263671875,
+                          58.871856689453125,
+                          293.959503173828125,
+                          233.598114013671875,
+                          156.02716064453125,
+                          112.8175048828125,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          81.16471099853515625,
+                          57.425174713134765625,
+                          169.4699554443359375,
+                          156.02716064453125,
+                          126.80493927001953125,
+                          96.60062408447265625,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          74.5768890380859375,
+                          53.37267303466796875,
+                          119.41248321533203125,
+                          112.8175048828125,
+                          96.60062408447265625,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          65.20384979248046875,
+                          47.455181121826171875,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          72.55352020263671875,
+                          54.6778106689453125,
+                          39.419506072998046875,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          72.55352020263671875,
+                          57.425174713134765625,
+                          44.331756591796875,
+                          29.2122058868408203125,
+                          83.5508270263671875,
+                          81.16471099853515625,
+                          74.5768890380859375,
+                          65.20384979248046875,
+                          54.6778106689453125,
+                          44.331756591796875,
+                          31.1723690032958984375,
+                          20.7407989501953125,
+                          58.871856689453125,
+                          57.425174713134765625,
+                          53.37267303466796875,
+                          47.455181121826171875,
+                          39.419506072998046875,
+                          29.2122058868408203125,
+                          20.7407989501953125,
+                          14.22228240966796875};
+const float qmx16x16[256] = {0,
+                             0,
+                             5616.41552734375,
+                             4437.5478515625,
+                             3710.523681640625,
+                             3312.083740234375,
+                             2956.42822265625,
+                             2638.9638671875,
+                             2378.979736328125,
+                             2146.23095703125,
+                             1936.2532958984375,
+                             1722.1861572265625,
+                             1498.605712890625,
+                             1304.0516357421875,
+                             1134.7548828125,
+                             951.88201904296875,
+                             0,
+                             0,
+                             5312.58251953125,
+                             4271.09716796875,
+                             3658.995849609375,
+                             3275.037109375,
+                             2928.763916015625,
+                             2617.745361328125,
+                             2363.779541015625,
+                             2134.027099609375,
+                             1926.335693359375,
+                             1711.357177734375,
+                             1489.962646484375,
+                             1297.1055908203125,
+                             1129.140380859375,
+                             946.136962890625,
+                             5616.41552734375,
+                             5312.58251953125,
+                             4620.5927734375,
+                             3880.564697265625,
+                             3516.761474609375,
+                             3170.294189453125,
+                             2849.415283203125,
+                             2562.00634765625,
+                             2319.431640625,
+                             2098.26171875,
+                             1897.1728515625,
+                             1679.534423828125,
+                             1464.5052490234375,
+                             1276.60888671875,
+                             1112.54638671875,
+                             929.18414306640625,
+                             4437.5478515625,
+                             4271.09716796875,
+                             3880.564697265625,
+                             3609.647705078125,
+                             3312.083740234375,
+                             3013.74951171875,
+                             2727.90283203125,
+                             2474.977294921875,
+                             2249.396484375,
+                             2041.3057861328125,
+                             1850.436279296875,
+                             1628.6099853515625,
+                             1423.5849609375,
+                             1243.5428466796875,
+                             1077.57275390625,
+                             901.83697509765625,
+                             3710.523681640625,
+                             3658.995849609375,
+                             3516.761474609375,
+                             3312.083740234375,
+                             3073.944580078125,
+                             2824.097412109375,
+                             2580.273681640625,
+                             2363.779541015625,
+                             2158.580810546875,
+                             1966.6195068359375,
+                             1778.0765380859375,
+                             1561.4259033203125,
+                             1369.259765625,
+                             1199.417236328125,
+                             1031.115478515625,
+                             865.35723876953125,
+                             3312.083740234375,
+                             3275.037109375,
+                             3170.294189453125,
+                             3013.74951171875,
+                             2824.097412109375,
+                             2617.7451171875,
+                             2425.913330078125,
+                             2235.929931640625,
+                             2052.44384765625,
+                             1878.2061767578125,
+                             1679.534423828125,
+                             1481.3988037109375,
+                             1304.0516357421875,
+                             1146.111572265625,
+                             975.34478759765625,
+                             821.329833984375,
+                             2956.42822265625,
+                             2928.763916015625,
+                             2849.415283203125,
+                             2727.90283203125,
+                             2580.273681640625,
+                             2425.913330078125,
+                             2263.03759765625,
+                             2098.26171875,
+                             1936.2532958984375,
+                             1766.65966796875,
+                             1570.745849609375,
+                             1392.13525390625,
+                             1230.6845703125,
+                             1077.57275390625,
+                             912.64251708984375,
+                             771.521240234375,
+                             2638.9638671875,
+                             2617.745361328125,
+                             2562.00634765625,
+                             2474.977294921875,
+                             2363.779541015625,
+                             2235.929931640625,
+                             2098.26171875,
+                             1956.3931884765625,
+                             1813.078369140625,
+                             1628.6099853515625,
+                             1456.1728515625,
+                             1297.1055908203125,
+                             1151.8544921875,
+                             993.46435546875,
+                             845.40533447265625,
+                             717.73773193359375,
+                             2378.979736328125,
+                             2363.779541015625,
+                             2319.431640625,
+                             2249.396484375,
+                             2158.580810546875,
+                             2052.44384765625,
+                             1936.2532958984375,
+                             1813.078369140625,
+                             1648.672119140625,
+                             1489.962646484375,
+                             1339.6640625,
+                             1199.41748046875,
+                             1057.3155517578125,
+                             907.21795654296875,
+                             775.87847900390625,
+                             661.70928955078125,
+                             2146.23095703125,
+                             2134.027099609375,
+                             2098.26171875,
+                             2041.3057861328125,
+                             1966.6195068359375,
+                             1878.2061767578125,
+                             1766.65966796875,
+                             1628.6099853515625,
+                             1489.962646484375,
+                             1354.3355712890625,
+                             1224.331787109375,
+                             1098.37109375,
+                             951.88201904296875,
+                             821.329833984375,
+                             706.04150390625,
+                             604.9959716796875,
+                             1936.2532958984375,
+                             1926.335693359375,
+                             1897.1728515625,
+                             1850.436279296875,
+                             1778.0765380859375,
+                             1679.534423828125,
+                             1570.745849609375,
+                             1456.1728515625,
+                             1339.6640625,
+                             1224.331787109375,
+                             1112.546142578125,
+                             975.344482421875,
+                             850.33416748046875,
+                             737.81219482421875,
+                             637.54150390625,
+                             531.86663818359375,
+                             1722.1861572265625,
+                             1711.357177734375,
+                             1679.534423828125,
+                             1628.6099853515625,
+                             1561.4259033203125,
+                             1481.3988037109375,
+                             1392.13525390625,
+                             1297.1055908203125,
+                             1199.41748046875,
+                             1098.37109375,
+                             975.344482421875,
+                             860.30999755859375,
+                             754.41485595703125,
+                             658.18359375,
+                             565.16876220703125,
+                             455.065155029296875,
+                             1498.605712890625,
+                             1489.962646484375,
+                             1464.5052490234375,
+                             1423.5849609375,
+                             1369.259765625,
+                             1304.0516357421875,
+                             1230.6845703125,
+                             1151.8544921875,
+                             1057.3155517578125,
+                             951.88201904296875,
+                             850.33416748046875,
+                             754.41485595703125,
+                             665.2603759765625,
+                             582.76104736328125,
+                             475.56475830078125,
+                             385.666412353515625,
+                             1304.0516357421875,
+                             1297.1055908203125,
+                             1276.60888671875,
+                             1243.5428466796875,
+                             1199.417236328125,
+                             1146.111572265625,
+                             1077.57275390625,
+                             993.46435546875,
+                             907.21795654296875,
+                             821.329833984375,
+                             737.81219482421875,
+                             658.18359375,
+                             582.76104736328125,
+                             482.643035888671875,
+                             396.77593994140625,
+                             324.0394287109375,
+                             1134.7548828125,
+                             1129.140380859375,
+                             1112.54638671875,
+                             1077.57275390625,
+                             1031.115478515625,
+                             975.34478759765625,
+                             912.64251708984375,
+                             845.40533447265625,
+                             775.87847900390625,
+                             706.04150390625,
+                             637.54150390625,
+                             565.16876220703125,
+                             475.56475830078125,
+                             396.77593994140625,
+                             328.516326904296875,
+                             270.136077880859375,
+                             951.88201904296875,
+                             946.136962890625,
+                             929.18414306640625,
+                             901.83697509765625,
+                             865.35723876953125,
+                             821.329833984375,
+                             771.521240234375,
+                             717.73773193359375,
+                             661.70928955078125,
+                             604.9959716796875,
+                             531.86663818359375,
+                             455.065155029296875,
+                             385.666412353515625,
+                             324.0394287109375,
+                             270.136077880859375,
+                             223.608489990234375};
+const float qmb16x16[256] = {0,
+                             0,
+                             615.61383056640625,
+                             448.953399658203125,
+                             337.930267333984375,
+                             263.80755615234375,
+                             205.943115234375,
+                             160.7708892822265625,
+                             141.832733154296875,
+                             126.30164337158203125,
+                             112.47124481201171875,
+                             100.76338958740234375,
+                             91.12081146240234375,
+                             82.40099334716796875,
+                             74.5156097412109375,
+                             58.896236419677734375,
+                             0,
+                             0,
+                             571.40203857421875,
+                             426.5322265625,
+                             327.784393310546875,
+                             257.417816162109375,
+                             201.76556396484375,
+                             157.9664306640625,
+                             140.8123321533203125,
+                             125.4929656982421875,
+                             111.822540283203125,
+                             100.30467987060546875,
+                             90.7403564453125,
+                             82.08327484130859375,
+                             74.2487335205078125,
+                             58.3933258056640625,
+                             615.61383056640625,
+                             571.40203857421875,
+                             473.94189453125,
+                             372.602783203125,
+                             300.644775390625,
+                             239.809600830078125,
+                             190.039825439453125,
+                             154.1826629638671875,
+                             137.8400421142578125,
+                             123.12636566162109375,
+                             109.9174652099609375,
+                             98.95200347900390625,
+                             89.61621856689453125,
+                             81.14296722412109375,
+                             73.45781707763671875,
+                             56.916744232177734375,
+                             448.953399658203125,
+                             426.5322265625,
+                             372.602783203125,
+                             318.224456787109375,
+                             263.80755615234375,
+                             214.746795654296875,
+                             172.8172607421875,
+                             148.2958526611328125,
+                             133.160797119140625,
+                             119.3681488037109375,
+                             106.87210845947265625,
+                             96.77252197265625,
+                             87.79785919189453125,
+                             79.6171722412109375,
+                             70.20831298828125,
+                             54.558437347412109375,
+                             337.930267333984375,
+                             327.784393310546875,
+                             300.644775390625,
+                             263.80755615234375,
+                             224.2069549560546875,
+                             186.3783111572265625,
+                             155.42156982421875,
+                             140.8123321533203125,
+                             127.12058258056640625,
+                             114.4600982666015625,
+                             103.11833953857421875,
+                             93.86804962158203125,
+                             85.36130523681640625,
+                             77.5634307861328125,
+                             65.95937347412109375,
+                             51.458751678466796875,
+                             263.80755615234375,
+                             257.417816162109375,
+                             239.809600830078125,
+                             214.746795654296875,
+                             186.3783111572265625,
+                             157.966400146484375,
+                             144.9885406494140625,
+                             132.263153076171875,
+                             120.10205078125,
+                             108.6804351806640625,
+                             98.95200347900390625,
+                             90.36280059814453125,
+                             82.40099334716796875,
+                             75.0543060302734375,
+                             60.963199615478515625,
+                             47.78974151611328125,
+                             205.943115234375,
+                             201.76556396484375,
+                             190.039825439453125,
+                             172.8172607421875,
+                             155.42156982421875,
+                             144.9885406494140625,
+                             134.070770263671875,
+                             123.12636566162109375,
+                             112.47124481201171875,
+                             102.63896942138671875,
+                             94.27301025390625,
+                             86.3905029296875,
+                             79.02082061767578125,
+                             70.20831298828125,
+                             55.486751556396484375,
+                             43.7368011474609375,
+                             160.7708892822265625,
+                             157.9664306640625,
+                             154.1826629638671875,
+                             148.2958526611328125,
+                             140.8123321533203125,
+                             132.263153076171875,
+                             123.12636566162109375,
+                             113.789886474609375,
+                             104.58271026611328125,
+                             96.77252197265625,
+                             89.247100830078125,
+                             82.08327484130859375,
+                             75.3261566162109375,
+                             62.573711395263671875,
+                             49.786182403564453125,
+                             39.48137664794921875,
+                             141.832733154296875,
+                             140.8123321533203125,
+                             137.8400421142578125,
+                             133.160797119140625,
+                             127.12058258056640625,
+                             120.10205078125,
+                             112.47124481201171875,
+                             104.58271026611328125,
+                             97.63336944580078125,
+                             90.7403564453125,
+                             84.02266693115234375,
+                             77.563446044921875,
+                             68.3460235595703125,
+                             55.02014923095703125,
+                             44.087116241455078125,
+                             35.18759918212890625,
+                             126.30164337158203125,
+                             125.4929656982421875,
+                             123.12636566162109375,
+                             119.3681488037109375,
+                             114.4600982666015625,
+                             108.6804351806640625,
+                             102.63896942138671875,
+                             96.77252197265625,
+                             90.7403564453125,
+                             84.68727874755859375,
+                             78.725555419921875,
+                             72.135589599609375,
+                             58.896236419677734375,
+                             47.78974151611328125,
+                             38.57308197021484375,
+                             30.9930629730224609375,
+                             112.47124481201171875,
+                             111.822540283203125,
+                             109.9174652099609375,
+                             106.87210845947265625,
+                             103.11833953857421875,
+                             98.95200347900390625,
+                             94.27301025390625,
+                             89.247100830078125,
+                             84.02266693115234375,
+                             78.725555419921875,
+                             73.4578094482421875,
+                             60.96317291259765625,
+                             50.197849273681640625,
+                             41.054691314697265625,
+                             33.38103485107421875,
+                             24.7806758880615234375,
+                             100.76338958740234375,
+                             100.30467987060546875,
+                             98.95200347900390625,
+                             96.77252197265625,
+                             93.86804962158203125,
+                             90.36280059814453125,
+                             86.3905029296875,
+                             82.08327484130859375,
+                             77.563446044921875,
+                             72.135589599609375,
+                             60.96317291259765625,
+                             51.034107208251953125,
+                             42.369472503662109375,
+                             34.922313690185546875,
+                             27.726070404052734375,
+                             18.572216033935546875,
+                             91.12081146240234375,
+                             90.7403564453125,
+                             89.61621856689453125,
+                             87.79785919189453125,
+                             85.36130523681640625,
+                             82.40099334716796875,
+                             79.02082061767578125,
+                             75.3261566162109375,
+                             68.3460235595703125,
+                             58.896236419677734375,
+                             50.197849273681640625,
+                             42.369472503662109375,
+                             35.455394744873046875,
+                             29.34313201904296875,
+                             20.1489048004150390625,
+                             13.67640781402587890625,
+                             82.40099334716796875,
+                             82.08327484130859375,
+                             81.14296722412109375,
+                             79.6171722412109375,
+                             77.5634307861328125,
+                             75.0543060302734375,
+                             70.20831298828125,
+                             62.573711395263671875,
+                             55.02014923095703125,
+                             47.78974151611328125,
+                             41.054691314697265625,
+                             34.922313690185546875,
+                             29.34313201904296875,
+                             20.706996917724609375,
+                             14.41384983062744140625,
+                             9.911548614501953125,
+                             74.5156097412109375,
+                             74.2487335205078125,
+                             73.45781707763671875,
+                             70.20831298828125,
+                             65.95937347412109375,
+                             60.963199615478515625,
+                             55.486751556396484375,
+                             49.786182403564453125,
+                             44.087116241455078125,
+                             38.57308197021484375,
+                             33.38103485107421875,
+                             27.726070404052734375,
+                             20.1489048004150390625,
+                             14.41384983062744140625,
+                             10.16626739501953125,
+                             7.0798015594482421875,
+                             58.896236419677734375,
+                             58.3933258056640625,
+                             56.916744232177734375,
+                             54.558437347412109375,
+                             51.458751678466796875,
+                             47.78974151611328125,
+                             43.7368011474609375,
+                             39.48137664794921875,
+                             35.18759918212890625,
+                             30.9930629730224609375,
+                             24.7806758880615234375,
+                             18.572216033935546875,
+                             13.67640781402587890625,
+                             9.911548614501953125,
+                             7.0798015594482421875,
+                             4.99121952056884765625};
+const float qmx32x32[1024] = {0,
+                              0,
+                              0,
+                              0,
+                              10016.1787109375,
+                              8949.0185546875,
+                              7995.55859375,
+                              7162.60107421875,
+                              6422.4755859375,
+                              5758.8291015625,
+                              5163.75830078125,
+                              4630.1767578125,
+                              4151.732421875,
+                              3734.188232421875,
+                              3370.10986328125,
+                              3041.52880859375,
+                              2744.98388671875,
+                              2477.35107421875,
+                              2235.813232421875,
+                              2038.7496337890625,
+                              1932.1097412109375,
+                              1831.0474853515625,
+                              1735.2716064453125,
+                              1644.505615234375,
+                              1558.4873046875,
+                              1476.968017578125,
+                              1386.82666015625,
+                              1301.5286865234375,
+                              1221.4771728515625,
+                              1146.34912109375,
+                              1075.8421630859375,
+                              1009.6715087890625,
+                              0,
+                              0,
+                              0,
+                              0,
+                              9878.224609375,
+                              8849.744140625,
+                              7921.35595703125,
+                              7107.29541015625,
+                              6379.01171875,
+                              5724.1455078125,
+                              5135.74365234375,
+                              4607.32568359375,
+                              4132.939453125,
+                              3719.505126953125,
+                              3357.800537109375,
+                              3031.1572265625,
+                              2736.20654296875,
+                              2469.894287109375,
+                              2229.455810546875,
+                              2035.871337890625,
+                              1929.51806640625,
+                              1828.7081298828125,
+                              1733.1553955078125,
+                              1642.5870361328125,
+                              1556.7445068359375,
+                              1475.38232421875,
+                              1385.1351318359375,
+                              1300,
+                              1220.09375,
+                              1145.095703125,
+                              1074.7049560546875,
+                              1008.638671875,
+                              0,
+                              0,
+                              0,
+                              0,
+                              9497.3408203125,
+                              8569.009765625,
+                              7710.1953125,
+                              6947.08251953125,
+                              6252.30078125,
+                              5622.568359375,
+                              5053.4169921875,
+                              4539.9931640625,
+                              4077.45068359375,
+                              3676.055419921875,
+                              3321.326416015625,
+                              3000.390625,
+                              2710.1435546875,
+                              2447.7333984375,
+                              2210.550537109375,
+                              2027.2841796875,
+                              1921.7830810546875,
+                              1821.7237548828125,
+                              1726.834716796875,
+                              1636.8553466796875,
+                              1551.537353515625,
+                              1470.240966796875,
+                              1380.0811767578125,
+                              1295.431396484375,
+                              1215.958251953125,
+                              1141.3475341796875,
+                              1071.3038330078125,
+                              1005.5491943359375,
+                              0,
+                              0,
+                              0,
+                              0,
+                              8949.0185546875,
+                              8149.28955078125,
+                              7394.22412109375,
+                              6697.34423828125,
+                              6052.48828125,
+                              5461.01953125,
+                              4921.63427734375,
+                              4431.66796875,
+                              3987.818603515625,
+                              3605.5732421875,
+                              3262.003173828125,
+                              2950.239990234375,
+                              2667.58154296875,
+                              2411.486328125,
+                              2179.5849609375,
+                              2013.1302490234375,
+                              1909.0233154296875,
+                              1810.194091796875,
+                              1716.393798828125,
+                              1627.38232421875,
+                              1542.9271240234375,
+                              1460.985595703125,
+                              1371.7230224609375,
+                              1287.8731689453125,
+                              1209.1142578125,
+                              1135.1424560546875,
+                              1065.6719970703125,
+                              1000.4320068359375,
+                              10016.1787109375,
+                              9878.224609375,
+                              9497.3408203125,
+                              8949.0185546875,
+                              8310.703125,
+                              7644.4052734375,
+                              6999.5673828125,
+                              6379.01171875,
+                              5793.93896484375,
+                              5249.58837890625,
+                              4747.62841796875,
+                              4287.62841796875,
+                              3871.052001953125,
+                              3510.74609375,
+                              3181.8896484375,
+                              2882.297607421875,
+                              2609.764404296875,
+                              2362.1328125,
+                              2137.337890625,
+                              1993.63818359375,
+                              1891.4306640625,
+                              1794.2806396484375,
+                              1701.970703125,
+                              1614.285400390625,
+                              1531.013916015625,
+                              1448.1861572265625,
+                              1360.1573486328125,
+                              1277.407958984375,
+                              1199.633056640625,
+                              1126.5428466796875,
+                              1057.8629150390625,
+                              993.33349609375,
+                              8949.0185546875,
+                              8849.744140625,
+                              8569.009765625,
+                              8149.28955078125,
+                              7644.4052734375,
+                              7107.29541015625,
+                              6556.77978515625,
+                              6014.109375,
+                              5492.59033203125,
+                              4999.91259765625,
+                              4539.9931640625,
+                              4114.296875,
+                              3734.188232421875,
+                              3394.959228515625,
+                              3083.605224609375,
+                              2798.61328125,
+                              2538.306396484375,
+                              2300.954345703125,
+                              2084.83349609375,
+                              1969.1136474609375,
+                              1869.26220703125,
+                              1774.201171875,
+                              1683.7496337890625,
+                              1597.7220458984375,
+                              1515.93359375,
+                              1431.994873046875,
+                              1345.5145263671875,
+                              1264.1485595703125,
+                              1187.612060546875,
+                              1115.6322021484375,
+                              1047.949462890625,
+                              984.94073486328125,
+                              7995.55859375,
+                              7921.35595703125,
+                              7710.1953125,
+                              7394.22412109375,
+                              6999.5673828125,
+                              6556.77978515625,
+                              6091.37744140625,
+                              5622.568359375,
+                              5163.75830078125,
+                              4723.701171875,
+                              4307.68896484375,
+                              3918.65771484375,
+                              3578.03271484375,
+                              3262.003173828125,
+                              2970.1337890625,
+                              2701.544189453125,
+                              2455.08642578125,
+                              2229.455810546875,
+                              2041.63623046875,
+                              1939.925048828125,
+                              1842.8297119140625,
+                              1750.22119140625,
+                              1661.9576416015625,
+                              1577.8870849609375,
+                              1497.8533935546875,
+                              1412.5999755859375,
+                              1327.9566650390625,
+                              1248.234619140625,
+                              1173.171875,
+                              1102.515625,
+                              1036.0238037109375,
+                              975.33502197265625,
+                              7162.60107421875,
+                              7107.29541015625,
+                              6947.08251953125,
+                              6697.34423828125,
+                              6379.01171875,
+                              6014.109375,
+                              5622.568359375,
+                              5220.6748046875,
+                              4820.7763671875,
+                              4431.66796875,
+                              4059.243408203125,
+                              3719.505126953125,
+                              3407.500244140625,
+                              3115.7998046875,
+                              2844.60986328125,
+                              2593.611328125,
+                              2362.133056640625,
+                              2149.279541015625,
+                              2004.73095703125,
+                              1906.490966796875,
+                              1812.4891357421875,
+                              1722.6444091796875,
+                              1636.8553466796875,
+                              1555.00537109375,
+                              1476.9681396484375,
+                              1390.2191162109375,
+                              1307.6717529296875,
+                              1229.8292236328125,
+                              1156.454345703125,
+                              1087.3167724609375,
+                              1022.19244384765625,
+                              964.1702880859375,
+                              6422.4755859375,
+                              6379.01171875,
+                              6252.30078125,
+                              6052.48828125,
+                              5793.93896484375,
+                              5492.59033203125,
+                              5163.75830078125,
+                              4820.7763671875,
+                              4474.4306640625,
+                              4132.939453125,
+                              3809.151123046875,
+                              3510.74560546875,
+                              3227.259765625,
+                              2960.15966796875,
+                              2710.1435546875,
+                              2477.3515625,
+                              2261.5234375,
+                              2062.12646484375,
+                              1963.7435302734375,
+                              1869.26220703125,
+                              1778.6273193359375,
+                              1691.8035888671875,
+                              1608.7301025390625,
+                              1529.3251953125,
+                              1450.00341796875,
+                              1365.0950927734375,
+                              1284.86962890625,
+                              1209.1142578125,
+                              1137.61865234375,
+                              1070.173828125,
+                              1006.57757568359375,
+                              951.533935546875,
+                              5758.8291015625,
+                              5724.1455078125,
+                              5622.568359375,
+                              5461.01953125,
+                              5249.58837890625,
+                              4999.91259765625,
+                              4723.701171875,
+                              4431.66796875,
+                              4132.939453125,
+                              3839.8818359375,
+                              3564.3984375,
+                              3297.380859375,
+                              3041.52880859375,
+                              2798.61328125,
+                              2569.677490234375,
+                              2355.211669921875,
+                              2155.288330078125,
+                              2013.1302490234375,
+                              1919.218017578125,
+                              1828.7081298828125,
+                              1741.64990234375,
+                              1658.050537109375,
+                              1577.8870849609375,
+                              1501.1123046875,
+                              1419.6033935546875,
+                              1337.488037109375,
+                              1259.777099609375,
+                              1186.287841796875,
+                              1116.836181640625,
+                              1051.2381591796875,
+                              989.35601806640625,
+                              937.5218505859375,
+                              5163.75830078125,
+                              5135.74365234375,
+                              5053.4169921875,
+                              4921.63427734375,
+                              4747.62841796875,
+                              4539.9931640625,
+                              4307.68896484375,
+                              4059.243408203125,
+                              3809.151123046875,
+                              3564.3984375,
+                              3321.326416015625,
+                              3083.605224609375,
+                              2853.95654296875,
+                              2634.296142578125,
+                              2425.8837890625,
+                              2229.455810546875,
+                              2053.26318359375,
+                              1961.06884765625,
+                              1871.700927734375,
+                              1785.30419921875,
+                              1701.970703125,
+                              1621.7459716796875,
+                              1544.6424560546875,
+                              1470.240966796875,
+                              1386.82666015625,
+                              1307.6715087890625,
+                              1232.6331787109375,
+                              1161.558837890625,
+                              1094.29150390625,
+                              1030.6707763671875,
+                              972.74078369140625,
+                              922.236572265625,
+                              4630.1767578125,
+                              4607.32568359375,
+                              4539.9931640625,
+                              4431.66796875,
+                              4287.62841796875,
+                              4114.296875,
+                              3918.65771484375,
+                              3719.505126953125,
+                              3510.74560546875,
+                              3297.380859375,
+                              3083.605224609375,
+                              2872.7998046875,
+                              2667.58154296875,
+                              2469.89453125,
+                              2281.107177734375,
+                              2102.114501953125,
+                              1993.638427734375,
+                              1906.490966796875,
+                              1821.723876953125,
+                              1739.5189208984375,
+                              1660.0018310546875,
+                              1583.2529296875,
+                              1509.314697265625,
+                              1431.994873046875,
+                              1351.9913330078125,
+                              1275.923828125,
+                              1203.682373046875,
+                              1135.1424560546875,
+                              1070.173828125,
+                              1008.638671875,
+                              954.87835693359375,
+                              905.78668212890625,
+                              4151.732421875,
+                              4132.939453125,
+                              4077.45068359375,
+                              3987.818603515625,
+                              3871.052001953125,
+                              3734.188232421875,
+                              3578.03271484375,
+                              3407.500244140625,
+                              3227.259765625,
+                              3041.52880859375,
+                              2853.95654296875,
+                              2667.58154296875,
+                              2484.843994140625,
+                              2307.630126953125,
+                              2137.337890625,
+                              2015.9456787109375,
+                              1932.1097412109375,
+                              1849.968017578125,
+                              1769.796142578125,
+                              1691.8035888671875,
+                              1616.1446533203125,
+                              1542.9271240234375,
+                              1472.10400390625,
+                              1391.9202880859375,
+                              1315.4149169921875,
+                              1242.5257568359375,
+                              1173.171875,
+                              1107.2579345703125,
+                              1044.676513671875,
+                              985.8211669921875,
+                              935.8944091796875,
+                              888.28326416015625,
+                              3734.188232421875,
+                              3719.505126953125,
+                              3676.055419921875,
+                              3605.5732421875,
+                              3510.74609375,
+                              3394.959228515625,
+                              3262.003173828125,
+                              3115.7998046875,
+                              2960.15966796875,
+                              2798.61328125,
+                              2634.296142578125,
+                              2469.89453125,
+                              2307.630126953125,
+                              2149.279541015625,
+                              2027.2845458984375,
+                              1947.802001953125,
+                              1869.26220703125,
+                              1792.02880859375,
+                              1716.393798828125,
+                              1642.5870361328125,
+                              1570.7830810546875,
+                              1501.1123046875,
+                              1426.662841796875,
+                              1350.36767578125,
+                              1277.407958984375,
+                              1207.7525634765625,
+                              1141.3475341796875,
+                              1078.122314453125,
+                              1017.99298095703125,
+                              964.1702880859375,
+                              915.9161376953125,
+                              869.84039306640625,
+                              3370.10986328125,
+                              3357.800537109375,
+                              3321.326416015625,
+                              3262.003173828125,
+                              3181.8896484375,
+                              3083.605224609375,
+                              2970.1337890625,
+                              2844.60986328125,
+                              2710.1435546875,
+                              2569.677490234375,
+                              2425.8837890625,
+                              2281.107177734375,
+                              2137.337890625,
+                              2027.2845458984375,
+                              1953.087890625,
+                              1879.05322265625,
+                              1805.62060546875,
+                              1733.1553955078125,
+                              1661.9576416015625,
+                              1592.2684326171875,
+                              1524.27880859375,
+                              1455.4775390625,
+                              1380.0811767578125,
+                              1307.6715087890625,
+                              1238.270751953125,
+                              1171.8729248046875,
+                              1108.4483642578125,
+                              1047.949462890625,
+                              990.31439208984375,
+                              941.60955810546875,
+                              895.07000732421875,
+                              850.57257080078125,
+                              3041.52880859375,
+                              3031.1572265625,
+                              3000.390625,
+                              2950.239990234375,
+                              2882.297607421875,
+                              2798.61328125,
+                              2701.544189453125,
+                              2593.611328125,
+                              2477.3515625,
+                              2355.211669921875,
+                              2229.455810546875,
+                              2102.114501953125,
+                              2015.9456787109375,
+                              1947.802001953125,
+                              1879.05322265625,
+                              1810.194091796875,
+                              1741.64990234375,
+                              1673.7799072265625,
+                              1606.8861083984375,
+                              1541.215087890625,
+                              1476.9681396484375,
+                              1403.9237060546875,
+                              1332.708740234375,
+                              1264.1485595703125,
+                              1198.2880859375,
+                              1135.1424560546875,
+                              1074.7049560546875,
+                              1016.9471435546875,
+                              965.0220947265625,
+                              918.27838134765625,
+                              873.48193359375,
+                              830.5924072265625,
+                              2744.98388671875,
+                              2736.20654296875,
+                              2710.1435546875,
+                              2667.58154296875,
+                              2609.764404296875,
+                              2538.306396484375,
+                              2455.08642578125,
+                              2362.133056640625,
+                              2261.5234375,
+                              2155.288330078125,
+                              2053.26318359375,
+                              1993.638427734375,
+                              1932.1097412109375,
+                              1869.26220703125,
+                              1805.62060546875,
+                              1741.64990234375,
+                              1677.7550048828125,
+                              1614.285400390625,
+                              1551.537353515625,
+                              1489.759765625,
+                              1421.3629150390625,
+                              1351.9913330078125,
+                              1284.869384765625,
+                              1220.09375,
+                              1157.7274169921875,
+                              1097.8046875,
+                              1040.3365478515625,
+                              985.8211669921875,
+                              939.15362548828125,
+                              894.31201171875,
+                              851.27447509765625,
+                              810.0118408203125,
+                              2477.35107421875,
+                              2469.894287109375,
+                              2447.7333984375,
+                              2411.486328125,
+                              2362.1328125,
+                              2300.954345703125,
+                              2229.455810546875,
+                              2149.279541015625,
+                              2062.12646484375,
+                              2013.1302490234375,
+                              1961.06884765625,
+                              1906.490966796875,
+                              1849.968017578125,
+                              1792.02880859375,
+                              1733.1553955078125,
+                              1673.7799072265625,
+                              1614.285400390625,
+                              1555.00537109375,
+                              1496.228515625,
+                              1431.994873046875,
+                              1365.0950927734375,
+                              1300,
+                              1236.8575439453125,
+                              1175.777099609375,
+                              1116.836181640625,
+                              1060.0849609375,
+                              1005.5491943359375,
+                              957.398681640625,
+                              912.78082275390625,
+                              869.84039306640625,
+                              828.56634521484375,
+                              788.9383544921875,
+                              2235.813232421875,
+                              2229.455810546875,
+                              2210.550537109375,
+                              2179.5849609375,
+                              2137.337890625,
+                              2084.83349609375,
+                              2041.63623046875,
+                              2004.73095703125,
+                              1963.7435302734375,
+                              1919.218017578125,
+                              1871.700927734375,
+                              1821.723876953125,
+                              1769.796142578125,
+                              1716.393798828125,
+                              1661.9576416015625,
+                              1606.8861083984375,
+                              1551.537353515625,
+                              1496.228515625,
+                              1435.5677490234375,
+                              1371.7230224609375,
+                              1309.21435546875,
+                              1248.234619140625,
+                              1188.9384765625,
+                              1131.4447021484375,
+                              1075.8421630859375,
+                              1022.19244384765625,
+                              972.74078369140625,
+                              928.625244140625,
+                              886.03875732421875,
+                              844.98834228515625,
+                              805.47149658203125,
+                              767.47625732421875,
+                              2038.7496337890625,
+                              2035.871337890625,
+                              2027.2841796875,
+                              2013.1302490234375,
+                              1993.63818359375,
+                              1969.1136474609375,
+                              1939.925048828125,
+                              1906.490966796875,
+                              1869.26220703125,
+                              1828.7081298828125,
+                              1785.30419921875,
+                              1739.5189208984375,
+                              1691.8035888671875,
+                              1642.5870361328125,
+                              1592.2684326171875,
+                              1541.215087890625,
+                              1489.759765625,
+                              1431.994873046875,
+                              1371.7230224609375,
+                              1312.3087158203125,
+                              1253.9849853515625,
+                              1196.9451904296875,
+                              1141.3475341796875,
+                              1087.3167724609375,
+                              1034.9495849609375,
+                              984.94073486328125,
+                              941.60955810546875,
+                              899.6387939453125,
+                              859.05474853515625,
+                              819.87261962890625,
+                              782.09710693359375,
+                              745.7244873046875,
+                              1932.1097412109375,
+                              1929.51806640625,
+                              1921.7830810546875,
+                              1909.0233154296875,
+                              1891.4306640625,
+                              1869.26220703125,
+                              1842.8297119140625,
+                              1812.4891357421875,
+                              1778.6273193359375,
+                              1741.64990234375,
+                              1701.970703125,
+                              1660.0018310546875,
+                              1616.1446533203125,
+                              1570.7830810546875,
+                              1524.27880859375,
+                              1476.9681396484375,
+                              1421.3629150390625,
+                              1365.0950927734375,
+                              1309.21435546875,
+                              1253.9849853515625,
+                              1199.633056640625,
+                              1146.34912109375,
+                              1094.29150390625,
+                              1043.5887451171875,
+                              994.34295654296875,
+                              951.5340576171875,
+                              910.440185546875,
+                              870.5670166015625,
+                              831.9473876953125,
+                              794.60296630859375,
+                              758.54522705078125,
+                              723.7767333984375,
+                              1831.0474853515625,
+                              1828.7081298828125,
+                              1821.7237548828125,
+                              1810.194091796875,
+                              1794.2806396484375,
+                              1774.201171875,
+                              1750.22119140625,
+                              1722.6444091796875,
+                              1691.8035888671875,
+                              1658.050537109375,
+                              1621.7459716796875,
+                              1583.2529296875,
+                              1542.9271240234375,
+                              1501.1123046875,
+                              1455.4775390625,
+                              1403.9237060546875,
+                              1351.9913330078125,
+                              1300,
+                              1248.234619140625,
+                              1196.9451904296875,
+                              1146.34912109375,
+                              1096.6314697265625,
+                              1047.949462890625,
+                              1000.4320068359375,
+                              958.2410888671875,
+                              918.27825927734375,
+                              879.35675048828125,
+                              841.5264892578125,
+                              804.8258056640625,
+                              769.28125,
+                              734.91033935546875,
+                              701.72100830078125,
+                              1735.2716064453125,
+                              1733.1553955078125,
+                              1726.834716796875,
+                              1716.393798828125,
+                              1701.970703125,
+                              1683.7496337890625,
+                              1661.9576416015625,
+                              1636.8553466796875,
+                              1608.7301025390625,
+                              1577.8870849609375,
+                              1544.6424560546875,
+                              1509.314697265625,
+                              1472.10400390625,
+                              1426.662841796875,
+                              1380.0811767578125,
+                              1332.708740234375,
+                              1284.869384765625,
+                              1236.8575439453125,
+                              1188.9384765625,
+                              1141.3475341796875,
+                              1094.29150390625,
+                              1047.949462890625,
+                              1002.47418212890625,
+                              961.62213134765625,
+                              923.03155517578125,
+                              885.292236328125,
+                              848.471923828125,
+                              812.6236572265625,
+                              777.789794921875,
+                              744.00146484375,
+                              711.28009033203125,
+                              684.97052001953125,
+                              1644.505615234375,
+                              1642.5870361328125,
+                              1636.8553466796875,
+                              1627.38232421875,
+                              1614.285400390625,
+                              1597.7220458984375,
+                              1577.8870849609375,
+                              1555.00537109375,
+                              1529.3251953125,
+                              1501.1123046875,
+                              1470.240966796875,
+                              1431.994873046875,
+                              1391.9202880859375,
+                              1350.36767578125,
+                              1307.6715087890625,
+                              1264.1485595703125,
+                              1220.09375,
+                              1175.777099609375,
+                              1131.4447021484375,
+                              1087.3167724609375,
+                              1043.5887451171875,
+                              1000.4320068359375,
+                              961.62213134765625,
+                              924.62445068359375,
+                              888.28326416015625,
+                              852.68096923828125,
+                              817.8857421875,
+                              783.95404052734375,
+                              750.92999267578125,
+                              718.84820556640625,
+                              690.50970458984375,
+                              669.78717041015625,
+                              1558.4873046875,
+                              1556.7445068359375,
+                              1551.537353515625,
+                              1542.9271240234375,
+                              1531.013916015625,
+                              1515.93359375,
+                              1497.8533935546875,
+                              1476.9681396484375,
+                              1450.00341796875,
+                              1419.6033935546875,
+                              1386.82666015625,
+                              1351.9913330078125,
+                              1315.4149169921875,
+                              1277.407958984375,
+                              1238.270751953125,
+                              1198.2880859375,
+                              1157.7274169921875,
+                              1116.836181640625,
+                              1075.8421630859375,
+                              1034.9495849609375,
+                              994.34295654296875,
+                              958.2410888671875,
+                              923.03155517578125,
+                              888.28326416015625,
+                              854.0911865234375,
+                              820.53631591796875,
+                              787.68792724609375,
+                              755.60260009765625,
+                              724.32769775390625,
+                              694.7138671875,
+                              674.44976806640625,
+                              654.522705078125,
+                              1476.968017578125,
+                              1475.38232421875,
+                              1470.240966796875,
+                              1460.985595703125,
+                              1448.1861572265625,
+                              1431.994873046875,
+                              1412.5999755859375,
+                              1390.2191162109375,
+                              1365.0950927734375,
+                              1337.488037109375,
+                              1307.6715087890625,
+                              1275.923828125,
+                              1242.5257568359375,
+                              1207.7525634765625,
+                              1171.8729248046875,
+                              1135.1424560546875,
+                              1097.8046875,
+                              1060.0849609375,
+                              1022.19244384765625,
+                              984.94073486328125,
+                              951.5340576171875,
+                              918.27825927734375,
+                              885.292236328125,
+                              852.68096923828125,
+                              820.53631591796875,
+                              788.9383544921875,
+                              757.955322265625,
+                              727.6448974609375,
+                              698.0548095703125,
+                              677.8135986328125,
+                              658.3643798828125,
+                              639.217041015625,
+                              1386.82666015625,
+                              1385.1351318359375,
+                              1380.0811767578125,
+                              1371.7230224609375,
+                              1360.1573486328125,
+                              1345.5145263671875,
+                              1327.9566650390625,
+                              1307.6717529296875,
+                              1284.86962890625,
+                              1259.777099609375,
+                              1232.6331787109375,
+                              1203.682373046875,
+                              1173.171875,
+                              1141.3475341796875,
+                              1108.4483642578125,
+                              1074.7049560546875,
+                              1040.3365478515625,
+                              1005.5491943359375,
+                              972.74078369140625,
+                              941.60955810546875,
+                              910.440185546875,
+                              879.35675048828125,
+                              848.471923828125,
+                              817.8857421875,
+                              787.68792724609375,
+                              757.955322265625,
+                              728.75579833984375,
+                              700.14642333984375,
+                              679.84564208984375,
+                              660.9462890625,
+                              642.2919921875,
+                              623.90673828125,
+                              1301.5286865234375,
+                              1300,
+                              1295.431396484375,
+                              1287.8731689453125,
+                              1277.407958984375,
+                              1264.1485595703125,
+                              1248.234619140625,
+                              1229.8292236328125,
+                              1209.1142578125,
+                              1186.287841796875,
+                              1161.558837890625,
+                              1135.1424560546875,
+                              1107.2579345703125,
+                              1078.122314453125,
+                              1047.949462890625,
+                              1016.9471435546875,
+                              985.8211669921875,
+                              957.398681640625,
+                              928.625244140625,
+                              899.6387939453125,
+                              870.5670166015625,
+                              841.5264892578125,
+                              812.6236572265625,
+                              783.95404052734375,
+                              755.60260009765625,
+                              727.6448974609375,
+                              700.14642333984375,
+                              680.52508544921875,
+                              662.2437744140625,
+                              644.1488037109375,
+                              626.26806640625,
+                              608.6259765625,
+                              1221.4771728515625,
+                              1220.09375,
+                              1215.958251953125,
+                              1209.1142578125,
+                              1199.633056640625,
+                              1187.612060546875,
+                              1173.171875,
+                              1156.454345703125,
+                              1137.61865234375,
+                              1116.836181640625,
+                              1094.29150390625,
+                              1070.173828125,
+                              1044.676513671875,
+                              1017.99298095703125,
+                              990.31439208984375,
+                              965.0220947265625,
+                              939.15362548828125,
+                              912.78082275390625,
+                              886.03875732421875,
+                              859.05474853515625,
+                              831.9473876953125,
+                              804.8258056640625,
+                              777.789794921875,
+                              750.92999267578125,
+                              724.32769775390625,
+                              698.0548095703125,
+                              679.84564208984375,
+                              662.2437744140625,
+                              644.769775390625,
+                              627.45428466796875,
+                              610.32489013671875,
+                              593.4061279296875,
+                              1146.34912109375,
+                              1145.095703125,
+                              1141.3475341796875,
+                              1135.1424560546875,
+                              1126.5428466796875,
+                              1115.6322021484375,
+                              1102.515625,
+                              1087.3167724609375,
+                              1070.173828125,
+                              1051.2381591796875,
+                              1030.6707763671875,
+                              1008.638671875,
+                              985.8211669921875,
+                              964.1702880859375,
+                              941.60955810546875,
+                              918.27838134765625,
+                              894.31201171875,
+                              869.84039306640625,
+                              844.98834228515625,
+                              819.87261962890625,
+                              794.60296630859375,
+                              769.28125,
+                              744.00146484375,
+                              718.84820556640625,
+                              694.7138671875,
+                              677.8135986328125,
+                              660.9462890625,
+                              644.1488037109375,
+                              627.45428466796875,
+                              610.89300537109375,
+                              594.491943359375,
+                              578.27581787109375,
+                              1075.8421630859375,
+                              1074.7049560546875,
+                              1071.3038330078125,
+                              1065.6719970703125,
+                              1057.8629150390625,
+                              1047.949462890625,
+                              1036.0238037109375,
+                              1022.19244384765625,
+                              1006.57757568359375,
+                              989.35601806640625,
+                              972.74078369140625,
+                              954.87835693359375,
+                              935.8944091796875,
+                              915.9161376953125,
+                              895.07000732421875,
+                              873.48193359375,
+                              851.27447509765625,
+                              828.56634521484375,
+                              805.47149658203125,
+                              782.09710693359375,
+                              758.54522705078125,
+                              734.91033935546875,
+                              711.28009033203125,
+                              690.50970458984375,
+                              674.44976806640625,
+                              658.3643798828125,
+                              642.2919921875,
+                              626.26806640625,
+                              610.32489013671875,
+                              594.491943359375,
+                              578.7960205078125,
+                              563.26104736328125,
+                              1009.6715087890625,
+                              1008.638671875,
+                              1005.5491943359375,
+                              1000.4320068359375,
+                              993.33349609375,
+                              984.94073486328125,
+                              975.33502197265625,
+                              964.1702880859375,
+                              951.533935546875,
+                              937.5218505859375,
+                              922.236572265625,
+                              905.78668212890625,
+                              888.28326416015625,
+                              869.84039306640625,
+                              850.57257080078125,
+                              830.5924072265625,
+                              810.0118408203125,
+                              788.9383544921875,
+                              767.47625732421875,
+                              745.7244873046875,
+                              723.7767333984375,
+                              701.72100830078125,
+                              684.97052001953125,
+                              669.78717041015625,
+                              654.522705078125,
+                              639.217041015625,
+                              623.90673828125,
+                              608.6259765625,
+                              593.4061279296875,
+                              578.27581787109375,
+                              563.26104736328125,
+                              548.38555908203125};
+
+const float qmb32x32[1024] = {0,
+                              0,
+                              0,
+                              0,
+                              1554.123779296875,
+                              1242.53955078125,
+                              993.424560546875,
+                              821.7386474609375,
+                              688.02374267578125,
+                              576.06719970703125,
+                              482.32843017578125,
+                              403.842987060546875,
+                              338.12896728515625,
+                              283.2335205078125,
+                              237.367095947265625,
+                              198.92822265625,
+                              166.714080810546875,
+                              139.71661376953125,
+                              117.09114837646484375,
+                              100.3662261962890625,
+                              93.5875701904296875,
+                              87.26671600341796875,
+                              81.37277984619140625,
+                              75.876922607421875,
+                              70.75225830078125,
+                              65.97368621826171875,
+                              62.470378875732421875,
+                              59.2027587890625,
+                              56.10607147216796875,
+                              53.17134857177734375,
+                              50.390140533447265625,
+                              47.75440216064453125,
+                              0,
+                              0,
+                              0,
+                              0,
+                              1511.89892578125,
+                              1215.3125,
+                              975.19708251953125,
+                              811.43212890625,
+                              680.45819091796875,
+                              570.4285888671875,
+                              478.07489013671875,
+                              400.602691650390625,
+                              335.640289306640625,
+                              281.318328857421875,
+                              235.8760223388671875,
+                              197.7614898681640625,
+                              165.797119140625,
+                              138.9931640625,
+                              116.51837158203125,
+                              100.18183135986328125,
+                              93.42420196533203125,
+                              87.1216278076171875,
+                              81.24362945556640625,
+                              75.76171875,
+                              70.6492919921875,
+                              65.8815155029296875,
+                              62.4058837890625,
+                              59.143909454345703125,
+                              56.05228424072265625,
+                              53.12213897705078125,
+                              50.345058441162109375,
+                              47.71305084228515625,
+                              0,
+                              0,
+                              0,
+                              0,
+                              1398.31689453125,
+                              1139.9393310546875,
+                              926.46905517578125,
+                              781.85968017578125,
+                              658.5869140625,
+                              554.03814697265625,
+                              465.6590576171875,
+                              391.11358642578125,
+                              328.3336181640625,
+                              275.68292236328125,
+                              231.480926513671875,
+                              194.317352294921875,
+                              163.086822509765625,
+                              136.85247802734375,
+                              114.822052001953125,
+                              99.632171630859375,
+                              92.93701934814453125,
+                              86.6887664794921875,
+                              80.85819244384765625,
+                              75.4178009033203125,
+                              70.34185791015625,
+                              65.63607025146484375,
+                              62.2131195068359375,
+                              58.967945098876953125,
+                              55.891448974609375,
+                              52.97493743896484375,
+                              50.21018218994140625,
+                              47.589336395263671875,
+                              0,
+                              0,
+                              0,
+                              0,
+                              1242.53955078125,
+                              1031.720703125,
+                              865.44610595703125,
+                              736.6123046875,
+                              624.660888671875,
+                              528.352294921875,
+                              446.04833984375,
+                              376.0323486328125,
+                              316.66259765625,
+                              266.643341064453125,
+                              224.4067230224609375,
+                              188.7577972412109375,
+                              158.70111083984375,
+                              133.38116455078125,
+                              112.06615447998046875,
+                              98.72772216796875,
+                              92.1346588134765625,
+                              85.97531890869140625,
+                              80.2224273681640625,
+                              74.85018157958984375,
+                              69.8341827392578125,
+                              65.2862091064453125,
+                              61.8940887451171875,
+                              58.6766357421875,
+                              55.625087738037109375,
+                              52.731090545654296875,
+                              49.9866943359375,
+                              47.384288787841796875,
+                              1554.123779296875,
+                              1511.89892578125,
+                              1398.31689453125,
+                              1242.53955078125,
+                              1072.7047119140625,
+                              913.631103515625,
+                              791.500732421875,
+                              680.45819091796875,
+                              581.7969970703125,
+                              495.450836181640625,
+                              420.656097412109375,
+                              356.335296630859375,
+                              301.346527099609375,
+                              254.681488037109375,
+                              214.9998931884765625,
+                              181.3341522216796875,
+                              152.824005126953125,
+                              128.7149505615234375,
+                              108.35161590576171875,
+                              97.48529052734375,
+                              91.031036376953125,
+                              84.99285125732421875,
+                              79.34609222412109375,
+                              74.0670623779296875,
+                              69.13317108154296875,
+                              64.80181121826171875,
+                              61.4521331787109375,
+                              58.2728424072265625,
+                              55.255706787109375,
+                              52.392795562744140625,
+                              49.676509857177734375,
+                              47.09958648681640625,
+                              1242.53955078125,
+                              1215.3125,
+                              1139.9393310546875,
+                              1031.720703125,
+                              913.631103515625,
+                              811.43212890625,
+                              711.60491943359375,
+                              618.22406005859375,
+                              533.33489990234375,
+                              457.657806396484375,
+                              391.11358642578125,
+                              333.17852783203125,
+                              283.2335205078125,
+                              240.389190673828125,
+                              203.69110107421875,
+                              172.3627471923828125,
+                              145.6892547607421875,
+                              123.02779388427734375,
+                              103.80861663818359375,
+                              95.92728424072265625,
+                              89.64476776123046875,
+                              83.756927490234375,
+                              78.2422027587890625,
+                              73.07941436767578125,
+                              68.24814605712890625,
+                              64.18810272216796875,
+                              60.89176177978515625,
+                              57.760517120361328125,
+                              54.786739349365234375,
+                              51.96302032470703125,
+                              49.28223419189453125,
+                              46.73752593994140625,
+                              993.424560546875,
+                              975.19708251953125,
+                              926.46905517578125,
+                              865.44610595703125,
+                              791.500732421875,
+                              711.60491943359375,
+                              631.20953369140625,
+                              554.03814697265625,
+                              482.32843017578125,
+                              417.209503173828125,
+                              359.053955078125,
+                              307.756866455078125,
+                              263.14556884765625,
+                              224.4067230224609375,
+                              190.9550628662109375,
+                              162.196685791015625,
+                              137.5612030029296875,
+                              116.51837158203125,
+                              100.55123138427734375,
+                              94.08060455322265625,
+                              87.99832916259765625,
+                              82.28644561767578125,
+                              76.92669677734375,
+                              71.90074920654296875,
+                              67.1905670166015625,
+                              63.451557159423828125,
+                              60.218593597412109375,
+                              57.14452362060546875,
+                              54.22241973876953125,
+                              51.44549560546875,
+                              48.807163238525390625,
+                              46.300994873046875,
+                              821.7386474609375,
+                              811.43212890625,
+                              781.85968017578125,
+                              736.6123046875,
+                              680.45819091796875,
+                              618.22406005859375,
+                              554.03814697265625,
+                              491.01513671875,
+                              431.260406494140625,
+                              376.0323486328125,
+                              325.949676513671875,
+                              281.318328857421875,
+                              241.92047119140625,
+                              207.3672332763671875,
+                              177.27020263671875,
+                              151.198699951171875,
+                              128.7149810791015625,
+                              109.3962554931640625,
+                              98.19190216064453125,
+                              91.9756011962890625,
+                              86.117218017578125,
+                              80.60289764404296875,
+                              75.4178009033203125,
+                              70.54657745361328125,
+                              65.97369384765625,
+                              62.5996856689453125,
+                              59.439167022705078125,
+                              56.430583953857421875,
+                              53.567768096923828125,
+                              50.844631195068359375,
+                              48.255126953125,
+                              45.7933807373046875,
+                              688.02374267578125,
+                              680.45819091796875,
+                              658.5869140625,
+                              624.660888671875,
+                              581.7969970703125,
+                              533.33489990234375,
+                              482.32843017578125,
+                              431.260406494140625,
+                              381.958526611328125,
+                              335.640289306640625,
+                              293.0960693359375,
+                              254.681427001953125,
+                              220.30645751953125,
+                              189.852081298828125,
+                              163.086822509765625,
+                              139.7166595458984375,
+                              119.4195404052734375,
+                              101.8691864013671875,
+                              95.58690643310546875,
+                              89.64476776123046875,
+                              84.02899932861328125,
+                              78.7296905517578125,
+                              73.73546600341796875,
+                              69.03392791748046875,
+                              64.8706207275390625,
+                              61.6408843994140625,
+                              58.560794830322265625,
+                              55.625087738037109375,
+                              52.82842254638671875,
+                              50.165355682373046875,
+                              47.630523681640625,
+                              45.21855926513671875,
+                              576.06719970703125,
+                              570.4285888671875,
+                              554.03814697265625,
+                              528.352294921875,
+                              495.450836181640625,
+                              457.657806396484375,
+                              417.209503173828125,
+                              376.0323486328125,
+                              335.640289306640625,
+                              297.17999267578125,
+                              261.421112060546875,
+                              228.6143646240234375,
+                              198.92822265625,
+                              172.3627471923828125,
+                              148.803924560546875,
+                              128.066162109375,
+                              109.923492431640625,
+                              98.72772216796875,
+                              92.77559661865234375,
+                              87.1216278076171875,
+                              81.76230621337890625,
+                              76.69138336181640625,
+                              71.90074920654296875,
+                              67.38091278076171875,
+                              63.7176971435546875,
+                              60.584194183349609375,
+                              57.59142303466796875,
+                              54.7350311279296875,
+                              52.010478973388671875,
+                              49.41309356689453125,
+                              46.93811798095703125,
+                              44.580780029296875,
+                              482.32843017578125,
+                              478.07489013671875,
+                              465.6590576171875,
+                              446.04833984375,
+                              420.656097412109375,
+                              391.11358642578125,
+                              359.053955078125,
+                              325.949676513671875,
+                              293.0960693359375,
+                              261.421112060546875,
+                              231.480926513671875,
+                              203.69110107421875,
+                              178.2744598388671875,
+                              155.306304931640625,
+                              134.7555084228515625,
+                              116.51837158203125,
+                              101.29721832275390625,
+                              95.41748046875,
+                              89.79701995849609375,
+                              84.4398193359375,
+                              79.34609222412109375,
+                              74.51293182373046875,
+                              69.9352569580078125,
+                              65.63607025146484375,
+                              62.470378875732421875,
+                              59.439159393310546875,
+                              56.53945159912109375,
+                              53.767810821533203125,
+                              51.120525360107421875,
+                              48.59365081787109375,
+                              46.18306732177734375,
+                              43.884586334228515625,
+                              403.842987060546875,
+                              400.602691650390625,
+                              391.11358642578125,
+                              376.0323486328125,
+                              356.335296630859375,
+                              333.17852783203125,
+                              307.756866455078125,
+                              281.318328857421875,
+                              254.681427001953125,
+                              228.6143646240234375,
+                              203.69110107421875,
+                              180.3063201904296875,
+                              158.70111083984375,
+                              138.993194580078125,
+                              121.20597076416015625,
+                              105.29486083984375,
+                              97.48529815673828125,
+                              91.9756011962890625,
+                              86.68878173828125,
+                              81.632110595703125,
+                              76.80889129638671875,
+                              72.21916961669921875,
+                              67.86054229736328125,
+                              64.18810272216796875,
+                              61.139739990234375,
+                              58.215541839599609375,
+                              55.413524627685546875,
+                              52.731090545654296875,
+                              50.165355682373046875,
+                              47.71305084228515625,
+                              45.3707275390625,
+                              43.134796142578125,
+                              338.12896728515625,
+                              335.640289306640625,
+                              328.3336181640625,
+                              316.66259765625,
+                              301.346527099609375,
+                              283.2335205078125,
+                              263.14556884765625,
+                              241.92047119140625,
+                              220.30645751953125,
+                              198.92822265625,
+                              178.2744598388671875,
+                              158.70111083984375,
+                              140.44512939453125,
+                              123.64312744140625,
+                              108.35161590576171875,
+                              98.907470703125,
+                              93.5875701904296875,
+                              88.4422607421875,
+                              83.48635101318359375,
+                              78.7296905517578125,
+                              74.17812347412109375,
+                              69.8341827392578125,
+                              65.70645904541015625,
+                              62.66451263427734375,
+                              59.736907958984375,
+                              56.923252105712890625,
+                              54.22241973876953125,
+                              51.632717132568359375,
+                              49.151935577392578125,
+                              46.77752685546875,
+                              44.50667572021484375,
+                              42.336353302001953125,
+                              283.2335205078125,
+                              281.318328857421875,
+                              275.68292236328125,
+                              266.643341064453125,
+                              254.681488037109375,
+                              240.389190673828125,
+                              224.4067230224609375,
+                              207.3672332763671875,
+                              189.852081298828125,
+                              172.3627471923828125,
+                              155.306304931640625,
+                              138.993194580078125,
+                              123.64312744140625,
+                              109.3962554931640625,
+                              99.63219451904296875,
+                              94.5781402587890625,
+                              89.64476776123046875,
+                              84.85404205322265625,
+                              80.2224273681640625,
+                              75.76171875,
+                              71.4796905517578125,
+                              67.38091278076171875,
+                              63.985767364501953125,
+                              61.0775909423828125,
+                              58.2728424072265625,
+                              55.5720672607421875,
+                              52.97493743896484375,
+                              50.480510711669921875,
+                              48.087291717529296875,
+                              45.7933807373046875,
+                              43.59656524658203125,
+                              41.49433135986328125,
+                              237.367095947265625,
+                              235.8760223388671875,
+                              231.480926513671875,
+                              224.4067230224609375,
+                              214.9998931884765625,
+                              203.69110107421875,
+                              190.9550628662109375,
+                              177.27020263671875,
+                              163.086822509765625,
+                              148.803924560546875,
+                              134.7555084228515625,
+                              121.20597076416015625,
+                              108.35161590576171875,
+                              99.63219451904296875,
+                              94.912353515625,
+                              90.2564239501953125,
+                              85.69269561767578125,
+                              81.24362945556640625,
+                              76.92669677734375,
+                              72.75490570068359375,
+                              68.737579345703125,
+                              65.0778350830078125,
+                              62.2131195068359375,
+                              59.439159393310546875,
+                              56.75823211669921875,
+                              54.171604156494140625,
+                              51.679691314697265625,
+                              49.28223419189453125,
+                              46.97840118408203125,
+                              44.766880035400390625,
+                              42.64601898193359375,
+                              40.613834381103515625,
+                              198.92822265625,
+                              197.7614898681640625,
+                              194.317352294921875,
+                              188.7577972412109375,
+                              181.3341522216796875,
+                              172.3627471923828125,
+                              162.196685791015625,
+                              151.198699951171875,
+                              139.7166595458984375,
+                              128.066162109375,
+                              116.51837158203125,
+                              105.29486083984375,
+                              98.907470703125,
+                              94.5781402587890625,
+                              90.2564239501953125,
+                              85.97531890869140625,
+                              81.76230621337890625,
+                              77.6397247314453125,
+                              73.62548065185546875,
+                              69.73333740234375,
+                              65.97369384765625,
+                              63.1215667724609375,
+                              60.4009246826171875,
+                              57.760517120361328125,
+                              55.203277587890625,
+                              52.731090545654296875,
+                              50.345058441162109375,
+                              48.045475006103515625,
+                              45.832118988037109375,
+                              43.7042236328125,
+                              41.660648345947265625,
+                              39.699886322021484375,
+                              166.714080810546875,
+                              165.797119140625,
+                              163.086822509765625,
+                              158.70111083984375,
+                              152.824005126953125,
+                              145.6892547607421875,
+                              137.5612030029296875,
+                              128.7149810791015625,
+                              119.4195404052734375,
+                              109.923492431640625,
+                              101.29721832275390625,
+                              97.48529815673828125,
+                              93.5875701904296875,
+                              89.64476776123046875,
+                              85.69269561767578125,
+                              81.76230621337890625,
+                              77.87981414794921875,
+                              74.0670623779296875,
+                              70.34185791015625,
+                              66.7183837890625,
+                              63.7845306396484375,
+                              61.139739990234375,
+                              58.560787200927734375,
+                              56.05228424072265625,
+                              53.617671966552734375,
+                              51.25939178466796875,
+                              48.979061126708984375,
+                              46.77752685546875,
+                              44.6550750732421875,
+                              42.611438751220703125,
+                              40.645923614501953125,
+                              38.757503509521484375,
+                              139.71661376953125,
+                              138.9931640625,
+                              136.85247802734375,
+                              133.38116455078125,
+                              128.7149505615234375,
+                              123.02779388427734375,
+                              116.51837158203125,
+                              109.3962554931640625,
+                              101.8691864013671875,
+                              98.72772216796875,
+                              95.41748046875,
+                              91.9756011962890625,
+                              88.4422607421875,
+                              84.85404205322265625,
+                              81.24362945556640625,
+                              77.6397247314453125,
+                              74.0670623779296875,
+                              70.54657745361328125,
+                              67.09571075439453125,
+                              64.18810272216796875,
+                              61.6408843994140625,
+                              59.143909454345703125,
+                              56.703399658203125,
+                              54.324310302734375,
+                              52.010478973388671875,
+                              49.764812469482421875,
+                              47.589336395263671875,
+                              45.485385894775390625,
+                              43.453662872314453125,
+                              41.49433135986328125,
+                              39.607158660888671875,
+                              37.7914886474609375,
+                              117.09114837646484375,
+                              116.51837158203125,
+                              114.822052001953125,
+                              112.06615447998046875,
+                              108.35161590576171875,
+                              103.80861663818359375,
+                              100.55123138427734375,
+                              98.19190216064453125,
+                              95.58690643310546875,
+                              92.77559661865234375,
+                              89.79701995849609375,
+                              86.68878173828125,
+                              83.48635101318359375,
+                              80.2224273681640625,
+                              76.92669677734375,
+                              73.62548065185546875,
+                              70.34185791015625,
+                              67.09571075439453125,
+                              64.32361602783203125,
+                              61.8940887451171875,
+                              59.498504638671875,
+                              57.14452362060546875,
+                              54.838520050048828125,
+                              52.585674285888671875,
+                              50.390140533447265625,
+                              48.255126953125,
+                              46.18306732177734375,
+                              44.175624847412109375,
+                              42.233917236328125,
+                              40.358489990234375,
+                              38.54946136474609375,
+                              36.8065338134765625,
+                              100.3662261962890625,
+                              100.18183135986328125,
+                              99.632171630859375,
+                              98.72772216796875,
+                              97.48529052734375,
+                              95.92728424072265625,
+                              94.08060455322265625,
+                              91.9756011962890625,
+                              89.64476776123046875,
+                              87.1216278076171875,
+                              84.4398193359375,
+                              81.632110595703125,
+                              78.7296905517578125,
+                              75.76171875,
+                              72.75490570068359375,
+                              69.73333740234375,
+                              66.7183837890625,
+                              64.18810272216796875,
+                              61.8940887451171875,
+                              59.61750030517578125,
+                              57.367244720458984375,
+                              55.150913238525390625,
+                              52.97493743896484375,
+                              50.844631195068359375,
+                              48.76433563232421875,
+                              46.73752593994140625,
+                              44.766880035400390625,
+                              42.854427337646484375,
+                              41.001552581787109375,
+                              39.2091522216796875,
+                              37.477649688720703125,
+                              35.807086944580078125,
+                              93.5875701904296875,
+                              93.42420196533203125,
+                              92.93701934814453125,
+                              92.1346588134765625,
+                              91.031036376953125,
+                              89.64476776123046875,
+                              87.99832916259765625,
+                              86.117218017578125,
+                              84.02899932861328125,
+                              81.76230621337890625,
+                              79.34609222412109375,
+                              76.80889129638671875,
+                              74.17812347412109375,
+                              71.4796905517578125,
+                              68.737579345703125,
+                              65.97369384765625,
+                              63.7845306396484375,
+                              61.6408843994140625,
+                              59.498504638671875,
+                              57.367244720458984375,
+                              55.255706787109375,
+                              53.17134857177734375,
+                              51.120525360107421875,
+                              49.10861968994140625,
+                              47.1400909423828125,
+                              45.21856689453125,
+                              43.34696197509765625,
+                              41.52751922607421875,
+                              39.761898040771484375,
+                              38.05126190185546875,
+                              36.396320343017578125,
+                              34.797382354736328125,
+                              87.26671600341796875,
+                              87.1216278076171875,
+                              86.6887664794921875,
+                              85.97531890869140625,
+                              84.99285125732421875,
+                              83.756927490234375,
+                              82.28644561767578125,
+                              80.60289764404296875,
+                              78.7296905517578125,
+                              76.69138336181640625,
+                              74.51293182373046875,
+                              72.21916961669921875,
+                              69.8341827392578125,
+                              67.38091278076171875,
+                              65.0778350830078125,
+                              63.1215667724609375,
+                              61.139739990234375,
+                              59.143909454345703125,
+                              57.14452362060546875,
+                              55.150913238525390625,
+                              53.17134857177734375,
+                              51.2130279541015625,
+                              49.28223419189453125,
+                              47.384288787841796875,
+                              45.52370452880859375,
+                              43.70421600341796875,
+                              41.92890167236328125,
+                              40.20015716552734375,
+                              38.519870758056640625,
+                              36.88941192626953125,
+                              35.309741973876953125,
+                              33.78139495849609375,
+                              81.37277984619140625,
+                              81.24362945556640625,
+                              80.85819244384765625,
+                              80.2224273681640625,
+                              79.34609222412109375,
+                              78.2422027587890625,
+                              76.92669677734375,
+                              75.4178009033203125,
+                              73.73546600341796875,
+                              71.90074920654296875,
+                              69.9352569580078125,
+                              67.86054229736328125,
+                              65.70645904541015625,
+                              63.985767364501953125,
+                              62.2131195068359375,
+                              60.4009246826171875,
+                              58.560787200927734375,
+                              56.703399658203125,
+                              54.838520050048828125,
+                              52.97493743896484375,
+                              51.120525360107421875,
+                              49.28223419189453125,
+                              47.4661407470703125,
+                              45.677494049072265625,
+                              43.920803070068359375,
+                              42.199848175048828125,
+                              40.517787933349609375,
+                              38.877155303955078125,
+                              37.279994964599609375,
+                              35.72786712646484375,
+                              34.221893310546875,
+                              32.9320831298828125,
+                              75.876922607421875,
+                              75.76171875,
+                              75.4178009033203125,
+                              74.85018157958984375,
+                              74.0670623779296875,
+                              73.07941436767578125,
+                              71.90074920654296875,
+                              70.54657745361328125,
+                              69.03392791748046875,
+                              67.38091278076171875,
+                              65.63607025146484375,
+                              64.18810272216796875,
+                              62.66451263427734375,
+                              61.0775909423828125,
+                              59.439159393310546875,
+                              57.760517120361328125,
+                              56.05228424072265625,
+                              54.324310302734375,
+                              52.585674285888671875,
+                              50.844631195068359375,
+                              49.10861968994140625,
+                              47.384288787841796875,
+                              45.677494049072265625,
+                              43.993377685546875,
+                              42.336353302001953125,
+                              40.710224151611328125,
+                              39.118167877197265625,
+                              37.562847137451171875,
+                              36.04637908935546875,
+                              34.570468902587890625,
+                              33.22452545166015625,
+                              32.131664276123046875,
+                              70.75225830078125,
+                              70.6492919921875,
+                              70.34185791015625,
+                              69.8341827392578125,
+                              69.13317108154296875,
+                              68.24814605712890625,
+                              67.1905670166015625,
+                              65.97369384765625,
+                              64.8706207275390625,
+                              63.7176971435546875,
+                              62.470378875732421875,
+                              61.139739990234375,
+                              59.736907958984375,
+                              58.2728424072265625,
+                              56.75823211669921875,
+                              55.203277587890625,
+                              53.617671966552734375,
+                              52.010478973388671875,
+                              50.390140533447265625,
+                              48.76433563232421875,
+                              47.1400909423828125,
+                              45.52370452880859375,
+                              43.920803070068359375,
+                              42.336353302001953125,
+                              40.774688720703125,
+                              39.239543914794921875,
+                              37.734134674072265625,
+                              36.26111602783203125,
+                              34.822742462158203125,
+                              33.446636199951171875,
+                              32.3772735595703125,
+                              31.328754425048828125,
+                              65.97368621826171875,
+                              65.8815155029296875,
+                              65.63607025146484375,
+                              65.2862091064453125,
+                              64.80181121826171875,
+                              64.18810272216796875,
+                              63.451557159423828125,
+                              62.5996856689453125,
+                              61.6408843994140625,
+                              60.584194183349609375,
+                              59.439159393310546875,
+                              58.215541839599609375,
+                              56.923252105712890625,
+                              55.5720672607421875,
+                              54.171604156494140625,
+                              52.731090545654296875,
+                              51.25939178466796875,
+                              49.764812469482421875,
+                              48.255126953125,
+                              46.73752593994140625,
+                              45.21856689453125,
+                              43.70421600341796875,
+                              42.199848175048828125,
+                              40.710224151611328125,
+                              39.239543914794921875,
+                              37.7914886474609375,
+                              36.3692169189453125,
+                              34.975429534912109375,
+                              33.612384796142578125,
+                              32.55457305908203125,
+                              31.5306549072265625,
+                              30.5255126953125,
+                              62.470378875732421875,
+                              62.4058837890625,
+                              62.2131195068359375,
+                              61.8940887451171875,
+                              61.4521331787109375,
+                              60.89176177978515625,
+                              60.218593597412109375,
+                              59.439167022705078125,
+                              58.560794830322265625,
+                              57.59142303466796875,
+                              56.53945159912109375,
+                              55.413524627685546875,
+                              54.22241973876953125,
+                              52.97493743896484375,
+                              51.679691314697265625,
+                              50.345058441162109375,
+                              48.979061126708984375,
+                              47.589336395263671875,
+                              46.18306732177734375,
+                              44.766880035400390625,
+                              43.34696197509765625,
+                              41.92890167236328125,
+                              40.517787933349609375,
+                              39.118167877197265625,
+                              37.734134674072265625,
+                              36.3692169189453125,
+                              35.026554107666015625,
+                              33.708812713623046875,
+                              32.66172027587890625,
+                              31.6664142608642578125,
+                              30.6867351531982421875,
+                              29.723903656005859375,
+                              59.2027587890625,
+                              59.143909454345703125,
+                              58.967945098876953125,
+                              58.6766357421875,
+                              58.2728424072265625,
+                              57.760517120361328125,
+                              57.14452362060546875,
+                              56.430583953857421875,
+                              55.625087738037109375,
+                              54.7350311279296875,
+                              53.767810821533203125,
+                              52.731090545654296875,
+                              51.632717132568359375,
+                              50.480510711669921875,
+                              49.28223419189453125,
+                              48.045475006103515625,
+                              46.77752685546875,
+                              45.485385894775390625,
+                              44.175624847412109375,
+                              42.854427337646484375,
+                              41.52751922607421875,
+                              40.20015716552734375,
+                              38.877155303955078125,
+                              37.562847137451171875,
+                              36.26111602783203125,
+                              34.975429534912109375,
+                              33.708812713623046875,
+                              32.697551727294921875,
+                              31.7346553802490234375,
+                              30.7841281890869140625,
+                              29.8474140167236328125,
+                              28.925754547119140625,
+                              56.10607147216796875,
+                              56.05228424072265625,
+                              55.891448974609375,
+                              55.625087738037109375,
+                              55.255706787109375,
+                              54.786739349365234375,
+                              54.22241973876953125,
+                              53.567768096923828125,
+                              52.82842254638671875,
+                              52.010478973388671875,
+                              51.120525360107421875,
+                              50.165355682373046875,
+                              49.151935577392578125,
+                              48.087291717529296875,
+                              46.97840118408203125,
+                              45.832118988037109375,
+                              44.6550750732421875,
+                              43.453662872314453125,
+                              42.233917236328125,
+                              41.001552581787109375,
+                              39.761898040771484375,
+                              38.519870758056640625,
+                              37.279994964599609375,
+                              36.04637908935546875,
+                              34.822742462158203125,
+                              33.612384796142578125,
+                              32.66172027587890625,
+                              31.7346553802490234375,
+                              30.81670379638671875,
+                              29.9094753265380859375,
+                              29.0143985748291015625,
+                              28.132732391357421875,
+                              53.17134857177734375,
+                              53.12213897705078125,
+                              52.97493743896484375,
+                              52.731090545654296875,
+                              52.392795562744140625,
+                              51.96302032470703125,
+                              51.44549560546875,
+                              50.844631195068359375,
+                              50.165355682373046875,
+                              49.41309356689453125,
+                              48.59365081787109375,
+                              47.71305084228515625,
+                              46.77752685546875,
+                              45.7933807373046875,
+                              44.766880035400390625,
+                              43.7042236328125,
+                              42.611438751220703125,
+                              41.49433135986328125,
+                              40.358489990234375,
+                              39.2091522216796875,
+                              38.05126190185546875,
+                              36.88941192626953125,
+                              35.72786712646484375,
+                              34.570468902587890625,
+                              33.446636199951171875,
+                              32.55457305908203125,
+                              31.6664142608642578125,
+                              30.7841281890869140625,
+                              29.9094753265380859375,
+                              29.0440425872802734375,
+                              28.189243316650390625,
+                              27.346340179443359375,
+                              50.390140533447265625,
+                              50.345058441162109375,
+                              50.21018218994140625,
+                              49.9866943359375,
+                              49.676509857177734375,
+                              49.28223419189453125,
+                              48.807163238525390625,
+                              48.255126953125,
+                              47.630523681640625,
+                              46.93811798095703125,
+                              46.18306732177734375,
+                              45.3707275390625,
+                              44.50667572021484375,
+                              43.59656524658203125,
+                              42.64601898193359375,
+                              41.660648345947265625,
+                              40.645923614501953125,
+                              39.607158660888671875,
+                              38.54946136474609375,
+                              37.477649688720703125,
+                              36.396320343017578125,
+                              35.309741973876953125,
+                              34.221893310546875,
+                              33.22452545166015625,
+                              32.3772735595703125,
+                              31.5306549072265625,
+                              30.6867351531982421875,
+                              29.8474140167236328125,
+                              29.0143985748291015625,
+                              28.189243316650390625,
+                              27.3733463287353515625,
+                              26.56793975830078125,
+                              47.75440216064453125,
+                              47.71305084228515625,
+                              47.589336395263671875,
+                              47.384288787841796875,
+                              47.09958648681640625,
+                              46.73752593994140625,
+                              46.300994873046875,
+                              45.7933807373046875,
+                              45.21855926513671875,
+                              44.580780029296875,
+                              43.884586334228515625,
+                              43.134796142578125,
+                              42.336353302001953125,
+                              41.49433135986328125,
+                              40.613834381103515625,
+                              39.699886322021484375,
+                              38.757503509521484375,
+                              37.7914886474609375,
+                              36.8065338134765625,
+                              35.807086944580078125,
+                              34.797382354736328125,
+                              33.78139495849609375,
+                              32.9320831298828125,
+                              32.131664276123046875,
+                              31.328754425048828125,
+                              30.5255126953125,
+                              29.723903656005859375,
+                              28.925754547119140625,
+                              28.132732391357421875,
+                              27.346340179443359375,
+                              26.56793975830078125,
+                              25.7987575531005859375};
+
+static float dequant_table[] = {
+    0.00031746,  0.00031746,  0.000318547, 0.000377554, 0.000447491, 0.000530384, 0.000628631, 0.000745078, 0.00031746,
+    0.00031746,  0.000331586, 0.000388111, 0.000456952, 0.000539384, 0.000637535, 0.000754131, 0.000318547, 0.000331586,
+    0.000366704, 0.000418477, 0.000484876, 0.000566262, 0.000664278, 0.000781404, 0.000377554, 0.000388111, 0.000418477,
+    0.000466329, 0.000530384, 0.000610828, 0.000709031, 0.000827274, 0.000447491, 0.000456952, 0.000484876, 0.000530384,
+    0.000593021, 0.000673207, 0.000772293, 0.000942868, 0.000530384, 0.000539384, 0.000566262, 0.000610828, 0.000673207,
+    0.000754131, 0.000855072, 0.00127232,  0.000628631, 0.000637535, 0.000664278, 0.000709031, 0.000772293, 0.000855072,
+    0.00119232,  0.00179199,  0.000745078, 0.000754131, 0.000781404, 0.000827274, 0.000942868, 0.00127232,  0.00179199,
+    0.00261332,  0.00178571,  0.00178571,  0.00179048,  0.00204418,  0.00233383,  0.00266452,  0.00304207,  0.00347311,
+    0.00178571,  0.00178571,  0.00184737,  0.00208861,  0.00237221,  0.00269971,  0.00307561,  0.00350598,  0.00179048,
+    0.00184737,  0.00199823,  0.00221497,  0.00248451,  0.00280405,  0.00317576,  0.00360445,  0.00204418,  0.00208861,
+    0.00221497,  0.00241009,  0.00266452,  0.00297468,  0.00334138,  0.0037684,   0.00233383,  0.00237221,  0.00248451,
+    0.00266452,  0.00290684,  0.00320899,  0.00357164,  0.00399808,  0.00266452,  0.00269971,  0.00280405,  0.00297468,
+    0.00320899,  0.00350598,  0.00386677,  0.0042947,   0.00304207,  0.00307561,  0.00317576,  0.00334138,  0.00357164,
+    0.00386677,  0.0042286,   0.00466073,  0.00347311,  0.00350598,  0.00360445,  0.0037684,   0.00399808,  0.0042947,
+    0.00466073,  0.00510017,  0.00195312,  0.00340183,  0.00590075,  0.00837433,  0.0117188,   0.0117188,   0.0119688,
+    0.016986,    0.00340183,  0.00428086,  0.00640914,  0.00886387,  0.0117188,   0.0117188,   0.0123206,   0.017414,
+    0.00590075,  0.00640914,  0.00788613,  0.0103519,   0.0117188,   0.0117188,   0.013409,    0.0187362,   0.00837433,
+    0.00886387,  0.0103519,   0.0117188,   0.0117188,   0.0117188,   0.0153365,   0.0210725,   0.0117188,   0.0117188,
+    0.0117188,   0.0117188,   0.0117188,   0.0137829,   0.018289,    0.0253682,   0.0117188,   0.0117188,   0.0117188,
+    0.0117188,   0.0137829,   0.017414,    0.0225572,   0.0342323,   0.0119688,   0.0123206,   0.013409,    0.0153365,
+    0.018289,    0.0225572,   0.0320797,   0.0482141,   0.016986,    0.017414,    0.0187362,   0.0210725,   0.0253682,
+    0.0342323,   0.0482141,   0.0703122,   0.00357143,  0.000316456, 0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.000316456, 0.000316456, 0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.0166667,   0.00115741,  0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.00115741,  0.00115741,  0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0555556,   0.005,       0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.005,       0.005,       0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.00033456,  0.000260417, 0.00078125,
+    0.00078125,  0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.000260417, 0.000390625, 0.00078125,  0.00078125,
+    0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00078125,  0.00078125,  0.0015625,   0.0015625,   0.00208333,
+    0.00208333,  0.00208333,  0.00208333,  0.00078125,  0.00078125,  0.0015625,   0.0015625,   0.00208333,  0.00208333,
+    0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00333333,  0.00333333,  0.00333333,
+    0.00333333,  0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00333333,  0.00333333,  0.00333333,  0.00333333,
+    0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00333333,  0.00333333,  0.00333333,  0.00333333,  0.00208333,
+    0.00208333,  0.00208333,  0.00208333,  0.00333333,  0.00333333,  0.00333333,  0.00333333,  0.00033456,  0.00104167,
+    0.003125,    0.003125,    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00104167,  0.0015625,   0.003125,
+    0.003125,    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.003125,    0.003125,    0.00555556,  0.00555556,
+    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.003125,    0.003125,    0.00555556,  0.00555556,  0.00714286,
+    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00833333,  0.00833333,
+    0.00833333,  0.00833333,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00833333,  0.00833333,  0.00833333,
+    0.00833333,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00833333,  0.00833333,  0.00833333,  0.00833333,
+    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00833333,  0.00833333,  0.00833333,  0.00833333,  0.00033456,
+    0.0015625,   0.0078125,   0.0078125,   0.03125,     0.03125,     0.03125,     0.03125,     0.0015625,   0.003125,
+    0.0078125,   0.0078125,   0.03125,     0.03125,     0.03125,     0.03125,     0.0078125,   0.0078125,   0.015625,
+    0.015625,    0.03125,     0.03125,     0.03125,     0.03125,     0.0078125,   0.0078125,   0.015625,    0.015625,
+    0.03125,     0.03125,     0.03125,     0.03125,     0.03125,     0.03125,     0.03125,     0.03125,     0.0625,
+    0.0625,      0.0625,      0.0625,      0.03125,     0.03125,     0.03125,     0.03125,     0.0625,      0.0625,
+    0.0625,      0.0625,      0.03125,     0.03125,     0.03125,     0.03125,     0.0625,      0.0625,      0.0625,
+    0.0625,      0.03125,     0.03125,     0.03125,     0.03125,     0.0625,      0.0625,      0.0625,      0.0625,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00892857,  0.00892857,  0.0104546,   0.0104546,   0.0122415,   0.0122415,   0.0146543,
+    0.0146543,   0.00892857,  0.00892857,  0.0104546,   0.0104546,   0.0122415,   0.0122415,   0.0146543,   0.0146543,
+    0.0104546,   0.0104546,   0.0111607,   0.0111607,   0.012706,    0.012706,    0.0153522,   0.0153522,   0.0104546,
+    0.0104546,   0.0111607,   0.0111607,   0.012706,    0.012706,    0.0153522,   0.0153522,   0.0122415,   0.0122415,
+    0.012706,    0.012706,    0.0139509,   0.0139509,   0.0174327,   0.0174327,   0.0122415,   0.0122415,   0.012706,
+    0.012706,    0.0139509,   0.0139509,   0.0174327,   0.0174327,   0.0146543,   0.0146543,   0.0153522,   0.0153522,
+    0.0174327,   0.0174327,   0.0209263,   0.0209263,   0.0146543,   0.0146543,   0.0153522,   0.0153522,   0.0174327,
+    0.0174327,   0.0209263,   0.0209263,   0.00011115,  0.000140677, 0.000178049, 0.00022535,  0.000269504, 0.000301925,
+    0.000338246, 0.000378937, 0.000420348, 0.000465933, 0.000516461, 0.000580657, 0.000667287, 0.000766841, 0.000881248,
+    0.00105055,  0.000140677, 0.000155098, 0.000188232, 0.000234132, 0.000273299, 0.00030534,  0.000341441, 0.000382008,
+    0.000423051, 0.000468598, 0.00051912,  0.000584332, 0.000671158, 0.000770947, 0.000885629, 0.00105693,  0.000178049,
+    0.000188232, 0.000216422, 0.000257694, 0.000284353, 0.000315428, 0.000350949, 0.000390319, 0.00043114,  0.000476585,
+    0.0005271,   0.000595403, 0.000682824, 0.000783325, 0.000898839, 0.00107621,  0.00022535,  0.000234132, 0.000257694,
+    0.000277035, 0.000301925, 0.000331813, 0.000366582, 0.000404044, 0.000444564, 0.000489882, 0.000540413, 0.000614021,
+    0.000702452, 0.000804154, 0.000928012, 0.00110885,  0.000269504, 0.000273299, 0.000284353, 0.000301925, 0.000325315,
+    0.000354095, 0.000387556, 0.000423051, 0.000463267, 0.000508487, 0.000562406, 0.00064044,  0.000730322, 0.000833738,
+    0.000969823, 0.00115559,  0.000301925, 0.00030534,  0.000315428, 0.000331813, 0.000354095, 0.000382008, 0.000412216,
+    0.000447241, 0.000487224, 0.000532423, 0.000595403, 0.000675038, 0.000766841, 0.000872515, 0.00102528,  0.00121754,
+    0.000338246, 0.000341441, 0.000350949, 0.000366582, 0.000387556, 0.000412216, 0.000441884, 0.000476585, 0.000516461,
+    0.00056604,  0.00063664,  0.000718321, 0.000812556, 0.000928012, 0.00109572,  0.00129614,  0.000378937, 0.000382008,
+    0.000390319, 0.000404044, 0.000423051, 0.000447241, 0.000476585, 0.000511145, 0.000551548, 0.000614021, 0.000686732,
+    0.000770947, 0.000868165, 0.00100658,  0.00118286,  0.00139327,  0.000420348, 0.000423051, 0.00043114,  0.000444564,
+    0.000463267, 0.000487224, 0.000516461, 0.000551548, 0.000606549, 0.000671158, 0.000746456, 0.000833738, 0.000945791,
+    0.00110227,  0.00128886,  0.00151124,  0.000465933, 0.000468598, 0.000476585, 0.000489882, 0.000508487, 0.000532423,
+    0.00056604,  0.000614021, 0.000671158, 0.000738369, 0.000816772, 0.000910439, 0.00105055,  0.00121754,  0.00141635,
+    0.0016529,   0.000516461, 0.00051912,  0.0005271,   0.000540413, 0.000562406, 0.000595403, 0.00063664,  0.000686732,
+    0.000746456, 0.000816772, 0.000898839, 0.00102528,  0.00117601,  0.00135536,  0.00156853,  0.00188017,  0.000580657,
+    0.000584332, 0.000595403, 0.000614021, 0.00064044,  0.000675038, 0.000718321, 0.000770947, 0.000833738, 0.000910439,
+    0.00102528,  0.00116237,  0.00132553,  0.00151933,  0.00176938,  0.00219749,  0.000667287, 0.000671158, 0.000682824,
+    0.000702452, 0.000730322, 0.000766841, 0.000812556, 0.000868165, 0.000945791, 0.00105055,  0.00117601,  0.00132553,
+    0.00150317,  0.00171597,  0.00210276,  0.00259291,  0.000766841, 0.000770947, 0.000783325, 0.000804154, 0.000833738,
+    0.000872515, 0.000928012, 0.00100658,  0.00110227,  0.00121754,  0.00135536,  0.00151933,  0.00171597,  0.00207192,
+    0.00252031,  0.00308604,  0.000881248, 0.000885629, 0.000898839, 0.000928012, 0.000969823, 0.00102528,  0.00109572,
+    0.00118286,  0.00128886,  0.00141635,  0.00156853,  0.00176938,  0.00210276,  0.00252031,  0.00304399,  0.00370184,
+    0.00105055,  0.00105693,  0.00107621,  0.00110885,  0.00115559,  0.00121754,  0.00129614,  0.00139327,  0.00151124,
+    0.0016529,   0.00188017,  0.00219749,  0.00259291,  0.00308604,  0.00370184,  0.0044721,   0.000313334, 0.000362504,
+    0.000419391, 0.000485204, 0.000567019, 0.000670359, 0.000792533, 0.000936973, 0.00104528,  0.00116095,  0.00128941,
+    0.00142184,  0.0015506,   0.00169102,  0.00184416,  0.00199463,  0.000362504, 0.000385066, 0.000434073, 0.000496818,
+    0.000578827, 0.000681565, 0.000803591, 0.000948188, 0.00105214,  0.00116772,  0.00129618,  0.00142744,  0.0015562,
+    0.00169666,  0.00184987,  0.00199976,  0.000419391, 0.000434073, 0.00047322,  0.000530783, 0.000613658, 0.000715011,
+    0.000836789, 0.00096922,  0.00107265,  0.00118801,  0.00131649,  0.00144425,  0.00157301,  0.00171359,  0.00186702,
+    0.00201515,  0.000485204, 0.000496818, 0.000530783, 0.000590527, 0.000670359, 0.000770417, 0.0008923,   0.00100397,
+    0.0011067,   0.00122181,  0.00135039,  0.00147223,  0.00160105,  0.00174186,  0.00189247,  0.00204083,  0.000567019,
+    0.000578827, 0.000613658, 0.000670359, 0.000748285, 0.000847868, 0.000962226, 0.00105214,  0.00115418,  0.00126912,
+    0.00139381,  0.00151141,  0.00164036,  0.00178153,  0.00192816,  0.00207686,  0.000670359, 0.000681565, 0.000715011,
+    0.000770417, 0.000847868, 0.000948188, 0.00102467,  0.00111349,  0.00121505,  0.00133005,  0.00144425,  0.00156181,
+    0.00169102,  0.00183274,  0.00197415,  0.00212335,  0.000792533, 0.000803591, 0.000836789, 0.0008923,   0.000962226,
+    0.00102467,  0.0010999,   0.00118801,  0.00128941,  0.00139942,  0.00150581,  0.0016235,   0.00175318,  0.00189247,
+    0.00203055,  0.00218041,  0.000936973, 0.000948188, 0.00096922,  0.00100397,  0.00105214,  0.00111349,  0.00118801,
+    0.00127588,  0.00137698,  0.00147223,  0.00157862,  0.00169666,  0.00182704,  0.00195881,  0.0020975,   0.00224824,
+    0.00104528,  0.00105214,  0.00107265,  0.0011067,   0.00115418,  0.00121505,  0.00128941,  0.00137698,  0.00146104,
+    0.0015562,   0.00166286,  0.00178153,  0.00190776,  0.00203569,  0.00217521,  0.00232705,  0.00116095,  0.00116772,
+    0.00118801,  0.00122181,  0.00126912,  0.00133005,  0.00139942,  0.00147223,  0.0015562,   0.0016516,   0.00175885,
+    0.0018772,   0.00199463,  0.00212335,  0.00226395,  0.00241714,  0.00128941,  0.00129618,  0.00131649,  0.00135039,
+    0.00139381,  0.00144425,  0.00150581,  0.00157862,  0.00166286,  0.00175885,  0.00186702,  0.00197415,  0.00209234,
+    0.0022221,   0.00236404,  0.00253057,  0.00142184,  0.00142744,  0.00144425,  0.00147223,  0.00151141,  0.00156181,
+    0.0016235,   0.00169666,  0.00178153,  0.0018772,   0.00197415,  0.00208202,  0.00220124,  0.00233233,  0.00248006,
+    0.00266504,  0.0015506,   0.0015562,   0.00157301,  0.00160105,  0.00164036,  0.00169102,  0.00175318,  0.00182704,
+    0.00190776,  0.00199463,  0.00209234,  0.00220124,  0.00232178,  0.00245495,  0.00262634,  0.00281554,  0.00169102,
+    0.00169666,  0.00171359,  0.00174186,  0.00178153,  0.00183274,  0.00189247,  0.00195881,  0.00203569,  0.00212335,
+    0.0022221,   0.00233233,  0.00245495,  0.00261349,  0.00278912,  0.00298309,  0.00184416,  0.00184987,  0.00186702,
+    0.00189247,  0.00192816,  0.00197415,  0.00203055,  0.0020975,   0.00217521,  0.00226395,  0.00236404,  0.00248006,
+    0.00262634,  0.00278912,  0.00296953,  0.00316883,  0.00199463,  0.00199976,  0.00201515,  0.00204083,  0.00207686,
+    0.00212335,  0.00218041,  0.00224824,  0.00232705,  0.00241714,  0.00253057,  0.00266504,  0.00281554,  0.00298309,
+    0.00316883,  0.00337407,  0.000863928, 0.00118463,  0.00162439,  0.0022274,   0.00295919,  0.00379064,  0.00485571,
+    0.00622003,  0.00705056,  0.00791755,  0.00889116,  0.00992424,  0.0109744,   0.0121358,   0.01342,     0.016979,
+    0.00118463,  0.00135013,  0.00175008,  0.00234449,  0.00305079,  0.00388474,  0.00495625,  0.00633046,  0.00710165,
+    0.00796857,  0.00894274,  0.00996962,  0.0110205,   0.0121827,   0.0134682,   0.0171252,   0.00162439,  0.00175008,
+    0.00210996,  0.00268382,  0.00332618,  0.00416997,  0.00526206,  0.00648581,  0.00725479,  0.00812174,  0.00909774,
+    0.0101059,   0.0111587,   0.0123239,   0.0136133,   0.0175695,   0.0022274,   0.00234449,  0.00268382,  0.00314244,
+    0.00379064,  0.00465665,  0.00578646,  0.00674328,  0.00750972,  0.00837744,  0.00935698,  0.0103335,   0.0113898,
+    0.0125601,   0.0142433,   0.018329,    0.00295919,  0.00305079,  0.00332618,  0.00379064,  0.00446017,  0.00536543,
+    0.00643411,  0.00710165,  0.00786655,  0.00873667,  0.0096976,   0.0106533,   0.0117149,   0.0128927,   0.0151608,
+    0.019433,    0.00379064,  0.00388474,  0.00416997,  0.00465665,  0.00536543,  0.00633046,  0.0068971,   0.00756068,
+    0.00832625,  0.00920129,  0.0101059,   0.0110665,   0.0121358,   0.0133237,   0.0164033,   0.020925,    0.00485571,
+    0.00495625,  0.00526206,  0.00578646,  0.00643411,  0.0068971,   0.00745875,  0.00812174,  0.00889116,  0.00974289,
+    0.0106075,   0.0115753,   0.0126549,   0.0142433,   0.0180223,   0.022864,    0.00622003,  0.00633046,  0.00648581,
+    0.00674328,  0.00710165,  0.00756068,  0.00812174,  0.00878813,  0.00956181,  0.0103335,   0.0112048,   0.0121827,
+    0.0132756,   0.0159812,   0.0200859,   0.0253284,   0.00705056,  0.00710165,  0.00725479,  0.00750972,  0.00786655,
+    0.00832625,  0.00889116,  0.00956181,  0.0102424,   0.0110205,   0.0119015,   0.0128927,   0.0146314,   0.0181752,
+    0.0226824,   0.0284191,   0.00791755,  0.00796857,  0.00812174,  0.00837744,  0.00873667,  0.00920129,  0.00974289,
+    0.0103335,   0.0110205,   0.0118081,   0.0127024,   0.0138628,   0.016979,    0.020925,    0.0259248,   0.0322653,
+    0.00889116,  0.00894274,  0.00909774,  0.00935698,  0.0096976,   0.0101059,   0.0106075,   0.0112048,   0.0119015,
+    0.0127024,   0.0136133,   0.0164033,   0.0199212,   0.0243578,   0.0299571,   0.040354,    0.00992424,  0.00996962,
+    0.0101059,   0.0103335,   0.0106533,   0.0110665,   0.0115753,   0.0121827,   0.0128927,   0.0138628,   0.0164033,
+    0.0195947,   0.0236019,   0.028635,    0.0360671,   0.0538439,   0.0109744,   0.0110205,   0.0111587,   0.0113898,
+    0.0117149,   0.0121358,   0.0126549,   0.0132756,   0.0146314,   0.016979,    0.0199212,   0.0236019,   0.0282045,
+    0.0340795,   0.0496305,   0.0731186,   0.0121358,   0.0121827,   0.0123239,   0.0125601,   0.0128927,   0.0133237,
+    0.0142433,   0.0159812,   0.0181752,   0.020925,    0.0243578,   0.028635,    0.0340795,   0.0482929,   0.0693777,
+    0.100892,    0.01342,     0.0134682,   0.0136133,   0.0142433,   0.0151608,   0.0164033,   0.0180223,   0.0200859,
+    0.0226824,   0.0259248,   0.0299571,   0.0360671,   0.0496305,   0.0693777,   0.0983645,   0.141247,    0.016979,
+    0.0171252,   0.0175695,   0.018329,    0.019433,    0.020925,    0.022864,    0.0253284,   0.0284191,   0.0322653,
+    0.040354,    0.0538439,   0.0731186,   0.100892,    0.141247,    0.200352,    6.36197e-05, 7.12062e-05, 7.96975e-05,
+    8.92013e-05, 9.98385e-05, 0.000111744, 0.000125069, 0.000139614, 0.000155703, 0.000173646, 0.000193657, 0.000215974,
+    0.000240863, 0.000267796, 0.000296726, 0.000328782, 0.000364301, 0.000403657, 0.000447265, 0.000490497, 0.000517569,
+    0.000546135, 0.000576279, 0.000608085, 0.000641648, 0.000677063, 0.000721071, 0.000768327, 0.000818681, 0.000872335,
+    0.000929504, 0.000990421, 7.12062e-05, 7.46078e-05, 8.18455e-05, 9.08471e-05, 0.000101233, 0.000112998, 0.000126241,
+    0.000140701, 0.000156764, 0.000174699, 0.000194714, 0.000217046, 0.000241959, 0.000268853, 0.000297814, 0.000329907,
+    0.000365469, 0.000404876, 0.00044854,  0.00049119,  0.000518264, 0.000546834, 0.000576982, 0.000608796, 0.000642366,
+    0.00067779,  0.000721951, 0.000769231, 0.000819609, 0.000873289, 0.000930488, 0.000991435, 7.96975e-05, 8.18455e-05,
+    8.74937e-05, 9.5499e-05,  0.000105293, 0.0001167,   0.000129698, 0.000143945, 0.000159941, 0.000177855, 0.000197886,
+    0.000220265, 0.000245251, 0.000272031, 0.000301085, 0.00033329,  0.000368984, 0.000408541, 0.000452376, 0.000493271,
+    0.00052035,  0.000548931, 0.000579094, 0.000610927, 0.000644522, 0.000680161, 0.000724595, 0.000771944, 0.000822397,
+    0.000876157, 0.000933442, 0.000994481, 8.92013e-05, 9.08471e-05, 9.5499e-05,  0.000102605, 0.000111744, 0.00012271,
+    0.000135241, 0.000149313, 0.000165221, 0.000183116, 0.000203185, 0.000225649, 0.000250764, 0.000277348, 0.00030656,
+    0.000338955, 0.000374871, 0.000414682, 0.000458803, 0.000496739, 0.000523828, 0.000552427, 0.000582617, 0.000614484,
+    0.000648119, 0.000684469, 0.00072901,  0.000776474, 0.000827052, 0.000880947, 0.000938375, 0.000999568, 9.98385e-05,
+    0.000101233, 0.000105293, 0.000111744, 0.000120327, 0.000130815, 0.000142866, 0.000156764, 0.000172594, 0.000190491,
+    0.000210631, 0.000233229, 0.000258328, 0.00028484,  0.000314279, 0.000346945, 0.000383176, 0.000423346, 0.000467872,
+    0.000501596, 0.0005287,   0.000557326, 0.000587554, 0.000619469, 0.000653162, 0.000690519, 0.000735209, 0.000782835,
+    0.000833588, 0.000887672, 0.000945302, 0.00100671,  0.000111744, 0.000112998, 0.0001167,   0.00012271,  0.000130815,
+    0.000140701, 0.000152514, 0.000166276, 0.000182063, 0.000200004, 0.000220265, 0.000243055, 0.000267796, 0.000294554,
+    0.000324296, 0.00035732,  0.000393964, 0.000434602, 0.000479655, 0.000507843, 0.00053497,  0.000563634, 0.000593912,
+    0.000625891, 0.00065966,  0.000698327, 0.00074321,  0.000791046, 0.000842026, 0.000896353, 0.000954244, 0.00101529,
+    0.000125069, 0.000126241, 0.000129698, 0.000135241, 0.000142866, 0.000152514, 0.000164166, 0.000177855, 0.000193657,
+    0.000211698, 0.000232143, 0.000255189, 0.000279483, 0.00030656,  0.000336685, 0.000370159, 0.000407318, 0.00044854,
+    0.000489803, 0.000515484, 0.000542644, 0.000571356, 0.0006017,   0.000633759, 0.000667622, 0.000707914, 0.000753037,
+    0.000801131, 0.00085239,  0.000907017, 0.000965229, 0.00102529,  0.000139614, 0.000140701, 0.000143945, 0.000149313,
+    0.000156764, 0.000166276, 0.000177855, 0.000191546, 0.000207435, 0.000225649, 0.000246351, 0.000268853, 0.00029347,
+    0.000320945, 0.000351542, 0.000385563, 0.000423346, 0.000465272, 0.00049882,  0.000524524, 0.000551727, 0.000580503,
+    0.000610927, 0.000643085, 0.000677063, 0.000719311, 0.000764718, 0.000813121, 0.000864712, 0.000919695, 0.000978289,
+    0.00103716,  0.000155703, 0.000156764, 0.000159941, 0.000165221, 0.000172594, 0.000182063, 0.000193657, 0.000207435,
+    0.000223492, 0.000241959, 0.000262526, 0.00028484,  0.00030986,  0.00033782,  0.000368984, 0.000403657, 0.00044218,
+    0.000484936, 0.000509231, 0.00053497,  0.000562231, 0.000591085, 0.000621608, 0.000653883, 0.000689654, 0.00073255,
+    0.000778289, 0.000827052, 0.000879029, 0.000934428, 0.000993465, 0.00105093,  0.000173646, 0.000174699, 0.000177855,
+    0.000183116, 0.000190491, 0.000200004, 0.000211698, 0.000225649, 0.000241959, 0.000260425, 0.000280552, 0.000303271,
+    0.000328782, 0.00035732,  0.000389154, 0.00042459,  0.000463975, 0.000496739, 0.000521046, 0.000546834, 0.000574168,
+    0.000603118, 0.000633759, 0.000666173, 0.000704422, 0.00074767,  0.000793791, 0.000842966, 0.000895386, 0.000951259,
+    0.00101076,  0.00106664,  0.000193657, 0.000194714, 0.000197886, 0.000203185, 0.000210631, 0.000220265, 0.000232143,
+    0.000246351, 0.000262526, 0.000280552, 0.000301085, 0.000324296, 0.000350391, 0.000379608, 0.000412221, 0.00044854,
+    0.00048703,  0.000509926, 0.000534273, 0.000560129, 0.000587554, 0.000616619, 0.000647399, 0.000680161, 0.000721071,
+    0.000764718, 0.000811271, 0.000860912, 0.000913833, 0.000970242, 0.00102802,  0.00108432,  0.000215974, 0.000217046,
+    0.000220265, 0.000225649, 0.000233229, 0.000243055, 0.000255189, 0.000268853, 0.00028484,  0.000303271, 0.000324296,
+    0.000348092, 0.000374871, 0.000404876, 0.000438384, 0.000475711, 0.000501595, 0.000524524, 0.000548931, 0.000574872,
+    0.000602409, 0.000631611, 0.000662552, 0.000698327, 0.00073965,  0.000783746, 0.000830784, 0.000880947, 0.000934428,
+    0.000991435, 0.00104725,  0.00110401,  0.000240863, 0.000241959, 0.000245251, 0.000250764, 0.000258328, 0.000267796,
+    0.000279483, 0.00029347,  0.00030986,  0.000328782, 0.000350391, 0.000374871, 0.00040244,  0.000433345, 0.000467872,
+    0.000496045, 0.000517569, 0.00054055,  0.000565037, 0.000591085, 0.000618757, 0.000648119, 0.0006793,   0.000718432,
+    0.000760216, 0.000804812, 0.00085239,  0.000903132, 0.000957234, 0.00101438,  0.0010685,   0.00112577,  0.000267796,
+    0.000268853, 0.000272031, 0.000277348, 0.00028484,  0.000294554, 0.00030656,  0.000320945, 0.00033782,  0.00035732,
+    0.000379608, 0.000404876, 0.000433345, 0.000465272, 0.000493271, 0.000513399, 0.00053497,  0.000558027, 0.000582617,
+    0.000608796, 0.000636625, 0.000666173, 0.000700936, 0.000740539, 0.000782835, 0.000827984, 0.000876157, 0.000927539,
+    0.000982325, 0.00103716,  0.0010918,   0.00114964,  0.000296726, 0.000297814, 0.000301085, 0.00030656,  0.000314279,
+    0.000324296, 0.000336685, 0.000351542, 0.000368984, 0.000389154, 0.000412221, 0.000438384, 0.000467872, 0.000493271,
+    0.00051201,  0.000532183, 0.000553826, 0.000576982, 0.0006017,   0.000628035, 0.000656048, 0.00068706,  0.000724595,
+    0.000764718, 0.000807578, 0.000853335, 0.000902162, 0.000954244, 0.00100978,  0.00106201,  0.00111723,  0.00117568,
+    0.000328782, 0.000329907, 0.00033329,  0.000338955, 0.000346945, 0.00035732,  0.000370159, 0.000385563, 0.000403657,
+    0.00042459,  0.00044854,  0.000475711, 0.000496045, 0.000513399, 0.000532183, 0.000552427, 0.000574168, 0.00059745,
+    0.000622322, 0.000648839, 0.000677063, 0.000712289, 0.000750352, 0.000791046, 0.000834524, 0.000880947, 0.000930488,
+    0.000983335, 0.00103625,  0.00108899,  0.00114484,  0.00120396,  0.000364301, 0.000365469, 0.000368984, 0.000374871,
+    0.000383176, 0.000393964, 0.000407318, 0.000423346, 0.00044218,  0.000463975, 0.00048703,  0.000501595, 0.000517569,
+    0.00053497,  0.000553826, 0.000574168, 0.000596035, 0.000619469, 0.000644522, 0.000671249, 0.00070355,  0.00073965,
+    0.000778289, 0.000819609, 0.000863761, 0.000910909, 0.000961228, 0.00101438,  0.00106479,  0.00111818,  0.00117471,
+    0.00123455,  0.000403657, 0.000404876, 0.000408541, 0.000414682, 0.000423346, 0.000434602, 0.00044854,  0.000465272,
+    0.000484936, 0.000496739, 0.000509926, 0.000524524, 0.00054055,  0.000558027, 0.000576982, 0.00059745,  0.000619469,
+    0.000643085, 0.000668347, 0.000698327, 0.00073255,  0.000769231, 0.000808501, 0.000850501, 0.000895386, 0.000943321,
+    0.000994481, 0.0010445,   0.00109555,  0.00114964,  0.0012069,   0.00126753,  0.000447265, 0.00044854,  0.000452376,
+    0.000458803, 0.000467872, 0.000479655, 0.000489803, 0.00049882,  0.000509231, 0.000521046, 0.000534273, 0.000548931,
+    0.000565037, 0.000582617, 0.0006017,   0.000622322, 0.000644522, 0.000668347, 0.000696588, 0.00072901,  0.000763817,
+    0.000801131, 0.000841086, 0.000883826, 0.000929504, 0.000978289, 0.00102802,  0.00107686,  0.00112862,  0.00118345,
+    0.00124151,  0.00130297,  0.000490497, 0.00049119,  0.000493271, 0.000496739, 0.000501596, 0.000507843, 0.000515484,
+    0.000524524, 0.00053497,  0.000546834, 0.000560129, 0.000574872, 0.000591085, 0.000608796, 0.000628035, 0.000648839,
+    0.000671249, 0.000698327, 0.00072901,  0.000762016, 0.000797458, 0.00083546,  0.000876157, 0.000919695, 0.000966231,
+    0.00101529,  0.00106201,  0.00111156,  0.00116407,  0.0012197,   0.00127861,  0.00134098,  0.000517569, 0.000518264,
+    0.00052035,  0.000523828, 0.0005287,   0.00053497,  0.000542644, 0.000551727, 0.000562231, 0.000574168, 0.000587554,
+    0.000602409, 0.000618757, 0.000636625, 0.000656048, 0.000677063, 0.00070355,  0.00073255,  0.000763817, 0.000797458,
+    0.000833588, 0.000872335, 0.000913833, 0.000958232, 0.00100569,  0.00105093,  0.00109837,  0.00114868,  0.001202,
+    0.00125849,  0.00131831,  0.00138164,  0.000546135, 0.000546834, 0.000548931, 0.000552427, 0.000557326, 0.000563634,
+    0.000571356, 0.000580503, 0.000591085, 0.000603118, 0.000616619, 0.000631611, 0.000648119, 0.000666173, 0.00068706,
+    0.000712289, 0.00073965,  0.000769231, 0.000801131, 0.00083546,  0.000872335, 0.000911883, 0.000954244, 0.000999568,
+    0.00104358,  0.00108899,  0.00113719,  0.00118832,  0.0012425,   0.00129991,  0.00136071,  0.00142507,  0.000576279,
+    0.000576982, 0.000579094, 0.000582617, 0.000587554, 0.000593912, 0.0006017,   0.000610927, 0.000621608, 0.000633759,
+    0.000647399, 0.000662552, 0.0006793,   0.000700936, 0.000724595, 0.000750352, 0.000778289, 0.000808501, 0.000841086,
+    0.000876157, 0.000913833, 0.000954244, 0.000997532, 0.00103991,  0.00108339,  0.00112957,  0.00117859,  0.00123058,
+    0.00128569,  0.00134408,  0.00140592,  0.00145992,  0.000608085, 0.000608796, 0.000610927, 0.000614484, 0.000619469,
+    0.000625891, 0.000633759, 0.000643085, 0.000653883, 0.000666173, 0.000680161, 0.000698327, 0.000718432, 0.000740539,
+    0.000764718, 0.000791046, 0.000819609, 0.000850501, 0.000883826, 0.000919695, 0.000958232, 0.000999568, 0.00103991,
+    0.00108152,  0.00112577,  0.00117277,  0.00122266,  0.00127559,  0.00133168,  0.00139111,  0.00144821,  0.00149301,
+    0.000641648, 0.000642366, 0.000644522, 0.000648119, 0.000653162, 0.00065966,  0.000667622, 0.000677063, 0.000689654,
+    0.000704422, 0.000721071, 0.00073965,  0.000760216, 0.000782835, 0.000807578, 0.000834524, 0.000863761, 0.000895386,
+    0.000929504, 0.000966231, 0.00100569,  0.00104358,  0.00108339,  0.00112577,  0.00117084,  0.00121872,  0.00126954,
+    0.00132345,  0.00138059,  0.00143944,  0.00148269,  0.00152783,  0.000677063, 0.00067779,  0.000680161, 0.000684469,
+    0.000690519, 0.000698327, 0.000707914, 0.000719311, 0.00073255,  0.00074767,  0.000764718, 0.000783746, 0.000804812,
+    0.000827984, 0.000853335, 0.000880947, 0.000910909, 0.000943321, 0.000978289, 0.00101529,  0.00105093,  0.00108899,
+    0.00112957,  0.00117277,  0.00121872,  0.00126753,  0.00131934,  0.0013743,   0.00143255,  0.00147533,  0.00151892,
+    0.00156441,  0.000721071, 0.000721951, 0.000724595, 0.00072901,  0.000735209, 0.00074321,  0.000753037, 0.000764718,
+    0.000778289, 0.000793791, 0.000811271, 0.000830784, 0.00085239,  0.000876157, 0.000902162, 0.000930488, 0.000961228,
+    0.000994481, 0.00102802,  0.00106201,  0.00109837,  0.00113719,  0.00117859,  0.00122266,  0.00126954,  0.00131934,
+    0.0013722,   0.00142827,  0.00147092,  0.00151298,  0.00155692,  0.0016028,   0.000768327, 0.000769231, 0.000771944,
+    0.000776474, 0.000782835, 0.000791046, 0.000801131, 0.000813121, 0.000827052, 0.000842966, 0.000860912, 0.000880947,
+    0.000903132, 0.000927539, 0.000954244, 0.000983335, 0.00101438,  0.0010445,   0.00107686,  0.00111156,  0.00114868,
+    0.00118832,  0.00123058,  0.00127559,  0.00132345,  0.0013743,   0.00142827,  0.00146945,  0.00151002,  0.00155244,
+    0.00159676,  0.00164305,  0.000818681, 0.000819609, 0.000822397, 0.000827052, 0.000833588, 0.000842026, 0.00085239,
+    0.000864712, 0.000879029, 0.000895386, 0.000913833, 0.000934428, 0.000957234, 0.000982325, 0.00100978,  0.00103625,
+    0.00106479,  0.00109555,  0.00112862,  0.00116407,  0.001202,    0.0012425,   0.00128569,  0.00133168,  0.00138059,
+    0.00143255,  0.00147092,  0.00151002,  0.00155094,  0.00159374,  0.00163847,  0.00168519,  0.000872335, 0.000873289,
+    0.000876157, 0.000880947, 0.000887672, 0.000896353, 0.000907017, 0.000919695, 0.000934428, 0.000951259, 0.000970242,
+    0.000991435, 0.00101438,  0.00103716,  0.00106201,  0.00108899,  0.00111818,  0.00114964,  0.00118345,  0.0012197,
+    0.00125849,  0.00129991,  0.00134408,  0.00139111,  0.00143944,  0.00147533,  0.00151298,  0.00155244,  0.00159374,
+    0.00163695,  0.00168211,  0.00172928,  0.000929504, 0.000930488, 0.000933442, 0.000938375, 0.000945302, 0.000954244,
+    0.000965229, 0.000978289, 0.000993465, 0.00101076,  0.00102802,  0.00104725,  0.0010685,   0.0010918,   0.00111723,
+    0.00114484,  0.00117471,  0.0012069,   0.00124151,  0.00127861,  0.00131831,  0.00136071,  0.00140592,  0.00144821,
+    0.00148269,  0.00151892,  0.00155692,  0.00159676,  0.00163847,  0.00168211,  0.00172772,  0.00177538,  0.000990421,
+    0.000991435, 0.000994481, 0.000999568, 0.00100671,  0.00101529,  0.00102529,  0.00103716,  0.00105093,  0.00106664,
+    0.00108432,  0.00110401,  0.00112577,  0.00114964,  0.00117568,  0.00120396,  0.00123455,  0.00126753,  0.00130297,
+    0.00134098,  0.00138164,  0.00142507,  0.00145992,  0.00149301,  0.00152783,  0.00156441,  0.0016028,   0.00164305,
+    0.00168519,  0.00172928,  0.00177538,  0.00182353,  0.000136878, 0.000150402, 0.000165263, 0.000181592, 0.000199534,
+    0.000219249, 0.000240912, 0.000264002, 0.000289027, 0.000316423, 0.000346417, 0.000379254, 0.000415204, 0.000450291,
+    0.000483257, 0.000518636, 0.000556606, 0.000597355, 0.000641088, 0.00068713,  0.000732919, 0.000781759, 0.000833854,
+    0.00088942,  0.000948689, 0.00101191,  0.00107258,  0.00113651,  0.00120424,  0.00127601,  0.00135205,  0.00143263,
+    0.000150402, 0.000156389, 0.00016898,  0.00018439,  0.000201862, 0.000221304, 0.000242798, 0.000265706, 0.000290661,
+    0.000318014, 0.000347985, 0.000380815, 0.000416771, 0.000451514, 0.000484476, 0.000519858, 0.000557835, 0.000598597,
+    0.000642346, 0.000688297, 0.000734101, 0.00078296,  0.000835077, 0.000890668, 0.000949965, 0.00101321,  0.00107378,
+    0.00113772,  0.00120548,  0.00127728,  0.00135336,  0.00143397,  0.000165263, 0.00016898,  0.00017868,  0.000192254,
+    0.000208611, 0.000227352, 0.000248337, 0.000270784, 0.000295543, 0.000322778, 0.000352686, 0.000385499, 0.000421474,
+    0.000455183, 0.000488134, 0.000523523, 0.000561524, 0.000602324, 0.000646125, 0.000691799, 0.000737651, 0.000786566,
+    0.000838749, 0.000894414, 0.000953794, 0.00101695,  0.00107736,  0.00114138,  0.00120922,  0.00128111,  0.00135728,
+    0.00143799,  0.000181592, 0.00018439,  0.000192254, 0.000204148, 0.000219249, 0.000237105, 0.000257117, 0.000279142,
+    0.000303622, 0.000330686, 0.00036051,  0.000393307, 0.000429325, 0.000461293, 0.000494231, 0.000529636, 0.000567679,
+    0.000608545, 0.000652433, 0.000697644, 0.000743575, 0.000792586, 0.000844879, 0.00090067,  0.000960189, 0.00102282,
+    0.00108335,  0.00114749,  0.00121546,  0.00128749,  0.00136382,  0.00144469,  0.000199534, 0.000201862, 0.000208611,
+    0.000219249, 0.000233247, 0.00025011,  0.000269098, 0.000290661, 0.00031483,  0.000341708, 0.000371447, 0.000404247,
+    0.000439264, 0.000469839, 0.000502768, 0.000538204, 0.000576312, 0.000617274, 0.000661288, 0.000705842, 0.000751888,
+    0.000801034, 0.000853484, 0.000909452, 0.000969168, 0.00103106,  0.00109175,  0.00115606,  0.00122422,  0.00129645,
+    0.00137299,  0.00145411,  0.000219249, 0.000221304, 0.000227352, 0.000237105, 0.00025011,  0.000265706, 0.000284102,
+    0.00030523,  0.000329107, 0.000355817, 0.000385499, 0.000418338, 0.000450291, 0.000480818, 0.000513751, 0.000549237,
+    0.000587438, 0.000628532, 0.000672714, 0.000716412, 0.000762608, 0.000811933, 0.000864586, 0.000920784, 0.000980757,
+    0.00104169,  0.00110257,  0.00116711,  0.00123551,  0.00130801,  0.00138483,  0.00146575,  0.000240912, 0.000242798,
+    0.000248337, 0.000257117, 0.000269098, 0.000284102, 0.000302012, 0.000322778, 0.000346417, 0.000373009, 0.000402683,
+    0.000435582, 0.000463735, 0.000494231, 0.000527191, 0.000562755, 0.000601081, 0.000642346, 0.000685964, 0.000729375,
+    0.000775761, 0.000825309, 0.000878217, 0.000934701, 0.000994989, 0.00105472,  0.00111586,  0.00118067,  0.00124938,
+    0.00132219,  0.00139936,  0.00147962,  0.000264002, 0.000265706, 0.000270784, 0.000279142, 0.000290661, 0.00030523,
+    0.000322778, 0.000343278, 0.000366761, 0.000393307, 0.000423044, 0.000451514, 0.000479598, 0.000510089, 0.000543105,
+    0.000578782, 0.000617274, 0.000658755, 0.000701155, 0.000744761, 0.000791381, 0.000841199, 0.000894414, 0.000951241,
+    0.00101191,  0.0010702,   0.00113164,  0.00119678,  0.00126584,  0.00133904,  0.00141662,  0.00149609,  0.000289027,
+    0.000290661, 0.000295543, 0.000303622, 0.00031483,  0.000329107, 0.000346417, 0.000366761, 0.000390183, 0.000416771,
+    0.000444168, 0.000469839, 0.00049789,  0.000528413, 0.000561524, 0.000597355, 0.000636059, 0.000677807, 0.000718765,
+    0.000762608, 0.000809507, 0.000859646, 0.000913224, 0.000970453, 0.00102988,  0.00108814,  0.00114994,  0.00121546,
+    0.00128493,  0.00135858,  0.00143665,  0.00151518,  0.000316423, 0.000318014, 0.000322778, 0.000330686, 0.000341708,
+    0.000355817, 0.000373009, 0.000393307, 0.000416771, 0.000441717, 0.000464956, 0.000490573, 0.000518636, 0.000549237,
+    0.000582489, 0.000618523, 0.00065749,  0.000697644, 0.000738835, 0.00078296,  0.000830189, 0.000880703, 0.000934701,
+    0.000992396, 0.00104998,  0.00110861,  0.00117081,  0.00123677,  0.00130672,  0.00138088,  0.00145946,  0.00153694,
+    0.000346417, 0.000347985, 0.000352686, 0.00036051,  0.000371447, 0.000385499, 0.000402683, 0.000423044, 0.000444168,
+    0.000464956, 0.000488134, 0.000513751, 0.000541879, 0.00057261,  0.000606055, 0.000642346, 0.000681302, 0.000719942,
+    0.000761415, 0.000805873, 0.000853484, 0.00090443,  0.000958909, 0.00101695,  0.00107258,  0.00113164,  0.0011943,
+    0.00126076,  0.00133125,  0.00140599,  0.00148341,  0.00156141,  0.000379254, 0.000380815, 0.000385499, 0.000393307,
+    0.000404247, 0.000418338, 0.000435582, 0.000451514, 0.000469839, 0.000490573, 0.000513751, 0.000539429, 0.000567679,
+    0.000598597, 0.000632293, 0.0006689,   0.000705842, 0.000744761, 0.000786566, 0.00083141,  0.00087946,  0.000930898,
+    0.000985923, 0.00104169,  0.00109776,  0.00115729,  0.00122046,  0.00128749,  0.00135858,  0.00143397,  0.00151008,
+    0.00158866,  0.000415204, 0.000416771, 0.000421474, 0.000429325, 0.000439264, 0.000450291, 0.000463735, 0.000479598,
+    0.00049789,  0.000518636, 0.000541879, 0.000567679, 0.000596114, 0.000627279, 0.000661288, 0.000696474, 0.000732919,
+    0.000772168, 0.00081436,  0.000859646, 0.000908195, 0.000960189, 0.00101577,  0.001069,    0.00112556,  0.00118562,
+    0.00124938,  0.00131703,  0.00138879,  0.00146449,  0.00153951,  0.00161874,  0.000450291, 0.000451514, 0.000455183,
+    0.000461293, 0.000469839, 0.000480818, 0.000494231, 0.000510089, 0.000528413, 0.000549237, 0.00057261,  0.000598597,
+    0.000627279, 0.000658755, 0.000691799, 0.000725835, 0.000762608, 0.000802243, 0.000844879, 0.000890668, 0.000939779,
+    0.000992396, 0.00104524,  0.00109896,  0.00115606,  0.00121671,  0.00128111,  0.00134945,  0.00142195,  0.00149609,
+    0.00157177,  0.00165171,  0.000483257, 0.000484476, 0.000488134, 0.000494231, 0.000502768, 0.000513751, 0.000527191,
+    0.000543105, 0.000561524, 0.000582489, 0.000606055, 0.000632293, 0.000661288, 0.000691799, 0.000723476, 0.000757839,
+    0.000794997, 0.000835077, 0.000878217, 0.000924573, 0.000974312, 0.00102635,  0.00107736,  0.00113164,  0.00118934,
+    0.00125064,  0.00131574,  0.00138483,  0.00145815,  0.00153052,  0.00160694,  0.00168766,  0.000518636, 0.000519858,
+    0.000523523, 0.000529636, 0.000538204, 0.000549237, 0.000562755, 0.000578782, 0.000597355, 0.000618523, 0.000642346,
+    0.0006689,   0.000696474, 0.000725835, 0.000757839, 0.000792586, 0.000830189, 0.000870774, 0.000914482, 0.00096147,
+    0.00101191,  0.00106066,  0.00111223,  0.00116711,  0.00122547,  0.00128749,  0.00135336,  0.00142328,  0.00149482,
+    0.00156788,  0.00164509,  0.00172666,  0.000556606, 0.000557835, 0.000561524, 0.000567679, 0.000576312, 0.000587438,
+    0.000601081, 0.000617274, 0.000636059, 0.00065749,  0.000681302, 0.000705842, 0.000732919, 0.000762608, 0.000794997,
+    0.000830189, 0.000868297, 0.000909452, 0.000953794, 0.00100148,  0.00104879,  0.00109776,  0.00114994,  0.00120548,
+    0.00126457,  0.00132737,  0.00139407,  0.00146449,  0.00153437,  0.00160825,  0.00168632,  0.00176881,  0.000597355,
+    0.000598597, 0.000602324, 0.000608545, 0.000617274, 0.000628532, 0.000642346, 0.000658755, 0.000677807, 0.000697644,
+    0.000719942, 0.000744761, 0.000772168, 0.000802243, 0.000835077, 0.000870774, 0.000909452, 0.000951241, 0.000996287,
+    0.00104169,  0.00108814,  0.00113772,  0.00119057,  0.00124685,  0.00130672,  0.00137037,  0.00143799,  0.00150626,
+    0.00157696,  0.00165171,  0.00173072,  0.0018142,   0.000641088, 0.000642346, 0.000646125, 0.000652433, 0.000661288,
+    0.000672714, 0.000685964, 0.000701155, 0.000718765, 0.000738835, 0.000761415, 0.000786566, 0.00081436,  0.000844879,
+    0.000878217, 0.000914482, 0.000953794, 0.000996287, 0.00103932,  0.00108335,  0.00113042,  0.00118067,  0.00123426,
+    0.00129133,  0.00135205,  0.00141662,  0.00148341,  0.00155109,  0.00162268,  0.00169838,  0.00177839,  0.00186294,
+    0.00068713,  0.000688297, 0.000691799, 0.000697644, 0.000705842, 0.000716412, 0.000729375, 0.000744761, 0.000762608,
+    0.00078296,  0.000805873, 0.00083141,  0.000859646, 0.000890668, 0.000924573, 0.00096147,  0.00100148,  0.00104169,
+    0.00108335,  0.00112799,  0.00117574,  0.00122673,  0.00128111,  0.00133904,  0.00140069,  0.00146575,  0.00153052,
+    0.00159909,  0.00167164,  0.00174836,  0.00182945,  0.00191514,  0.000732919, 0.000734101, 0.000737651, 0.000743575,
+    0.000751888, 0.000762608, 0.000775761, 0.000791381, 0.000809507, 0.000830189, 0.000853484, 0.00087946,  0.000908195,
+    0.000939779, 0.000974312, 0.00101191,  0.00104879,  0.00108814,  0.00113042,  0.00117574,  0.00122422,  0.00127601,
+    0.00133125,  0.00139011,  0.00145276,  0.00151518,  0.00158086,  0.00165039,  0.00172396,  0.00180177,  0.00188402,
+    0.00197093,  0.000781759, 0.00078296,  0.000786566, 0.000792586, 0.000801034, 0.000811933, 0.000825309, 0.000841199,
+    0.000859646, 0.000880703, 0.00090443,  0.000930898, 0.000960189, 0.000992396, 0.00102635,  0.00106066,  0.00109776,
+    0.00113772,  0.00118067,  0.00122673,  0.00127601,  0.00132866,  0.00138483,  0.00144469,  0.00150498,  0.00156788,
+    0.00163453,  0.0017051,   0.00177977,  0.00185874,  0.00194222,  0.00203043,  0.000833854, 0.000835077, 0.000838749,
+    0.000844879, 0.000853484, 0.000864586, 0.000878217, 0.000894414, 0.000913224, 0.000934701, 0.000958909, 0.000985923,
+    0.00101577,  0.00104524,  0.00107736,  0.00111223,  0.00114994,  0.00119057,  0.00123426,  0.00128111,  0.00133125,
+    0.00138483,  0.00144201,  0.0014999,   0.00156012,  0.00162399,  0.00169168,  0.00176335,  0.00183919,  0.0019194,
+    0.0020042,   0.00207984,  0.00088942,  0.000890668, 0.000894414, 0.00090067,  0.000909452, 0.000920784, 0.000934701,
+    0.000951241, 0.000970453, 0.000992396, 0.00101695,  0.00104169,  0.001069,    0.00109896,  0.00113164,  0.00116711,
+    0.00120548,  0.00124685,  0.00129133,  0.00133904,  0.00139011,  0.00144469,  0.0014999,   0.00155754,  0.00161874,
+    0.00168365,  0.00175244,  0.00182529,  0.00190238,  0.00198391,  0.00206298,  0.0021275,   0.000948689, 0.000949965,
+    0.000953794, 0.000960189, 0.000969168, 0.000980757, 0.000994989, 0.00101191,  0.00102988,  0.00104998,  0.00107258,
+    0.00109776,  0.00112556,  0.00115606,  0.00118934,  0.00122547,  0.00126457,  0.00130672,  0.00135205,  0.00140069,
+    0.00145276,  0.00150498,  0.00156012,  0.00161874,  0.00168098,  0.001747,    0.00181697,  0.00189107,  0.00196949,
+    0.00205037,  0.00211263,  0.00217764,  0.00101191,  0.00101321,  0.00101695,  0.00102282,  0.00103106,  0.00104169,
+    0.00105472,  0.0010702,   0.00108814,  0.00110861,  0.00113164,  0.00115729,  0.00118562,  0.00121671,  0.00125064,
+    0.00128749,  0.00132737,  0.00137037,  0.00141662,  0.00146575,  0.00151518,  0.00156788,  0.00162399,  0.00168365,
+    0.001747,    0.0018142,   0.00188543,  0.00196085,  0.00204068,  0.00210204,  0.0021648,   0.00223034,  0.00107258,
+    0.00107378,  0.00107736,  0.00108335,  0.00109175,  0.00110257,  0.00111586,  0.00113164,  0.00114994,  0.00117081,
+    0.0011943,   0.00122046,  0.00124938,  0.00128111,  0.00131574,  0.00135336,  0.00139407,  0.00143799,  0.00148341,
+    0.00153052,  0.00158086,  0.00163453,  0.00169168,  0.00175244,  0.00181697,  0.00188543,  0.00195798,  0.00203482,
+    0.00209569,  0.00215626,  0.00221955,  0.00228566,  0.00113651,  0.00113772,  0.00114138,  0.00114749,  0.00115606,
+    0.00116711,  0.00118067,  0.00119678,  0.00121546,  0.00123677,  0.00126076,  0.00128749,  0.00131703,  0.00134945,
+    0.00138483,  0.00142328,  0.00146449,  0.00150626,  0.00155109,  0.00159909,  0.00165039,  0.0017051,   0.00176335,
+    0.00182529,  0.00189107,  0.00196085,  0.00203482,  0.00209357,  0.00215199,  0.00221309,  0.00227695,  0.00234366,
+    0.00120424,  0.00120548,  0.00120922,  0.00121546,  0.00122422,  0.00123551,  0.00124938,  0.00126584,  0.00128493,
+    0.00130672,  0.00133125,  0.00135858,  0.00138879,  0.00142195,  0.00145815,  0.00149482,  0.00153437,  0.00157696,
+    0.00162268,  0.00167164,  0.00172396,  0.00177977,  0.00183919,  0.00190238,  0.00196949,  0.00204068,  0.00209569,
+    0.00215199,  0.00221093,  0.0022726,   0.00233707,  0.00240442,  0.00127601,  0.00127728,  0.00128111,  0.00128749,
+    0.00129645,  0.00130801,  0.00132219,  0.00133904,  0.00135858,  0.00138088,  0.00140599,  0.00143397,  0.00146449,
+    0.00149609,  0.00153052,  0.00156788,  0.00160825,  0.00165171,  0.00169838,  0.00174836,  0.00180177,  0.00185874,
+    0.0019194,   0.00198391,  0.00205037,  0.00210204,  0.00215626,  0.00221309,  0.0022726,   0.00233487,  0.00239998,
+    0.002468,    0.00135205,  0.00135336,  0.00135728,  0.00136382,  0.00137299,  0.00138483,  0.00139936,  0.00141662,
+    0.00143665,  0.00145946,  0.00148341,  0.00151008,  0.00153951,  0.00157177,  0.00160694,  0.00164509,  0.00168632,
+    0.00173072,  0.00177839,  0.00182945,  0.00188402,  0.00194222,  0.0020042,   0.00206298,  0.00211263,  0.0021648,
+    0.00221955,  0.00227695,  0.00233707,  0.00239998,  0.00246576,  0.0025345,   0.00143263,  0.00143397,  0.00143799,
+    0.00144469,  0.00145411,  0.00146575,  0.00147962,  0.00149609,  0.00151518,  0.00153694,  0.00156141,  0.00158866,
+    0.00161874,  0.00165171,  0.00168766,  0.00172666,  0.00176881,  0.0018142,   0.00186294,  0.00191514,  0.00197093,
+    0.00203043,  0.00207984,  0.0021275,   0.00217764,  0.00223034,  0.00228566,  0.00234366,  0.00240442,  0.002468,
+    0.0025345,   0.00260399,  0.000262914, 0.000328843, 0.000411305, 0.000514445, 0.000643449, 0.000804803, 0.00100662,
+    0.00121693,  0.00145344,  0.00173591,  0.00207328,  0.00247621,  0.00295745,  0.00353066,  0.00421288,  0.00502694,
+    0.00599829,  0.00715734,  0.00854036,  0.00996351,  0.0106852,   0.0114591,   0.0122891,   0.0131792,   0.0141338,
+    0.0151576,   0.0160076,   0.0168911,   0.0178234,   0.0188071,   0.0198452,   0.0209405,   0.000328843, 0.000360778,
+    0.000433614, 0.000533468, 0.00066142,  0.000822834, 0.00102543,  0.00123239,  0.0014696,   0.00175307,  0.00209172,
+    0.00249624,  0.00297938,  0.00355469,  0.00423952,  0.0050566,   0.00603147,  0.0071946,   0.00858234,  0.00998185,
+    0.0107039,   0.0114782,   0.0123087,   0.0131993,   0.0141544,   0.0151788,   0.0160241,   0.0169079,   0.0178405,
+    0.0188245,   0.0198629,   0.0209586,   0.000411305, 0.000433614, 0.00049507,  0.000589093, 0.000715145, 0.00087724,
+    0.00107937,  0.001279,    0.0015184,   0.00180493,  0.00214749,  0.0025568,   0.00304568,  0.00362736,  0.00432001,
+    0.00514622,  0.0061317,   0.00730714,  0.00870913,  0.0100369,   0.01076,     0.0115355,   0.0123673,   0.0132595,
+    0.0142163,   0.0152355,   0.0160738,   0.0169584,   0.0178918,   0.0188769,   0.0199163,   0.0210131,   0.000514445,
+    0.000533468, 0.000589093, 0.000679349, 0.000804803, 0.000969255, 0.00115547,  0.00135757,  0.00160087,  0.00189268,
+    0.00224191,  0.00265935,  0.00315794,  0.00375033,  0.00445619,  0.00529779,  0.00630115,  0.00749731,  0.0089233,
+    0.0101289,   0.0108537,   0.0116312,   0.0124653,   0.01336,     0.0143196,   0.0153172,   0.0161566,   0.0170426,
+    0.0179775,   0.0189641,   0.0200053,   0.021104,    0.000643449, 0.00066142,  0.000715145, 0.000804803, 0.000932223,
+    0.00109453,  0.00126342,  0.0014696,   0.00171881,  0.00201836,  0.00237724,  0.00280635,  0.00331844,  0.00392647,
+    0.00465117,  0.00551468,  0.00654347,  0.00776911,  0.00922921,  0.010258,    0.0109853,   0.0117657,   0.012603,
+    0.0135013,   0.0144648,   0.0154317,   0.0162728,   0.0171607,   0.0180977,   0.0190866,   0.0201302,   0.0212316,
+    0.000804803, 0.000822834, 0.00087724,  0.000969255, 0.00109453,  0.00123239,  0.00140527,  0.00161754,  0.00187499,
+    0.00218504,  0.0025568,   0.00300139,  0.00353066,  0.00415992,  0.00490939,  0.00580172,  0.00686392,  0.00812824,
+    0.00963311,  0.0104246,   0.0111551,   0.0119393,   0.0127808,   0.0136837,   0.0146524,   0.0155792,   0.0164226,
+    0.0173129,   0.0182526,   0.0192445,   0.0202913,   0.0213961,   0.00100662,  0.00102543,  0.00107937,  0.00115547,
+    0.00126342,  0.00140527,  0.00158426,  0.00180493,  0.00207328,  0.00239688,  0.0027851,   0.00324932,  0.00380018,
+    0.00445619,  0.00523683,  0.00616535,  0.00726949,  0.00858234,  0.00994518,  0.0106292,   0.0113639,   0.0121527,
+    0.0129994,   0.0139081,   0.014883,    0.0157601,   0.0166062,   0.0174995,   0.0184426,   0.019438,    0.0204888,
+    0.0215978,   0.00121693,  0.00123239,  0.001279,    0.00135757,  0.0014696,   0.00161754,  0.00180493,  0.0020366,
+    0.00231878,  0.00265935,  0.00306796,  0.00355469,  0.00413359,  0.00482236,  0.00564111,  0.00661381,  0.0077691,
+    0.00914108,  0.0101841,   0.0108724,   0.0116121,   0.0124065,   0.0132595,   0.014175,    0.0151576,   0.0159745,
+    0.0168239,   0.0177209,   0.0186679,   0.0196678,   0.0207232,   0.0218372,   0.00145344,  0.0014696,   0.0015184,
+    0.00160087,  0.00171881,  0.00187499,  0.00207328,  0.00231878,  0.00261809,  0.00297938,  0.00341185,  0.00392647,
+    0.00453913,  0.00526726,  0.0061317,   0.00715734,  0.00837384,  0.00981651,  0.0104617,   0.0111551,   0.0119007,
+    0.0127017,   0.013562,    0.0144856,   0.0154153,   0.016223,    0.0170763,   0.0179775,   0.0189292,   0.0199341,
+    0.0209949,   0.0221148,   0.00173591,  0.00175307,  0.00180493,  0.00189268,  0.00201836,  0.00218504,  0.00239688,
+    0.00265935,  0.00297938,  0.00336496,  0.00382525,  0.00437418,  0.00502694,  0.00580172,  0.00672025,  0.00780846,
+    0.00909724,  0.0101289,   0.0107787,   0.0114782,   0.0122306,   0.0130393,   0.0139081,   0.014841,    0.0156942,
+    0.016506,    0.0173637,   0.0182698,   0.0192269,   0.0202376,   0.0213046,   0.0224312,   0.00207328,  0.00209172,
+    0.00214749,  0.00224191,  0.00237724,  0.0025568,   0.0027851,   0.00306796,  0.00341185,  0.00382525,  0.00432001,
+    0.00490939,  0.00560933,  0.00643889,  0.00742085,  0.00858234,  0.00987194,  0.0104803,   0.0111362,   0.0118428,
+    0.012603,    0.0134205,   0.0142989,   0.0152355,   0.0160076,   0.0168239,   0.0176868,   0.0185985,   0.0195616,
+    0.0205788,   0.021653,    0.022787,    0.00247621,  0.00249624,  0.0025568,   0.00265935,  0.00280635,  0.00300139,
+    0.00324932,  0.00355469,  0.00392647,  0.00437418,  0.00490939,  0.00554612,  0.00630115,  0.0071946,   0.00825042,
+    0.00949714,  0.010258,    0.0108724,   0.0115355,   0.0122501,   0.0130193,   0.0138467,   0.0147361,   0.0155792,
+    0.016356,    0.0171775,   0.0180461,   0.0189641,   0.0199341,   0.0209586,   0.0220406,   0.0231831,   0.00295745,
+    0.00297938,  0.00304568,  0.00315794,  0.00331844,  0.00353066,  0.00380018,  0.00413359,  0.00453913,  0.00502694,
+    0.00560933,  0.00630115,  0.00712022,  0.00808779,  0.00922921,  0.0101105,   0.0106852,   0.0113068,   0.011978,
+    0.0127017,   0.0134811,   0.0143196,   0.0152192,   0.015958,    0.0167401,   0.0175675,   0.0184426,   0.0193676,
+    0.0203451,   0.0213778,   0.0224685,   0.0236204,   0.00353066,  0.00355469,  0.00362736,  0.00375033,  0.00392647,
+    0.00415992,  0.00445619,  0.00482236,  0.00526726,  0.00580172,  0.00643889,  0.0071946,   0.00808779,  0.00914108,
+    0.0100369,   0.0105733,   0.0111551,   0.0117849,   0.0124653,   0.0131993,   0.01399,     0.014841,    0.0156285,
+    0.0163726,   0.0171607,   0.0179947,   0.0188769,   0.0198096,   0.0207955,   0.0218372,   0.0229376,   0.0240997,
+    0.00421288,  0.00423952,  0.00432001,  0.00445619,  0.00465117,  0.00490939,  0.00523683,  0.00564111,  0.0061317,
+    0.00672025,  0.00742085,  0.00825042,  0.00922921,  0.0100369,   0.010536,    0.0110795,   0.0116696,   0.0123087,
+    0.0129994,   0.0137448,   0.0145481,   0.0153662,   0.0160738,   0.0168239,   0.0176186,   0.0184599,   0.01935,
+    0.0202913,   0.0212864,   0.0223379,   0.0234488,   0.0246222,   0.00502694,  0.0050566,   0.00514622,  0.00529779,
+    0.00551468,  0.00580172,  0.00616535,  0.00661381,  0.00715734,  0.00780846,  0.00858234,  0.00949714,  0.0101105,
+    0.0105733,   0.0110795,   0.0116312,   0.0122306,   0.01288,     0.0135823,   0.0143403,   0.0151576,   0.0158424,
+    0.016556,    0.0173129,   0.0181149,   0.0189641,   0.0198629,   0.0208136,   0.0218188,   0.0228811,   0.0240035,
+    0.025189,    0.00599829,  0.00603147,  0.0061317,   0.00630115,  0.00654347,  0.00686392,  0.00726949,  0.0077691,
+    0.00837384,  0.00909724,  0.00987194,  0.010258,    0.0106852,   0.0111551,   0.0116696,   0.0122306,   0.0128403,
+    0.0135013,   0.0142163,   0.0149884,   0.0156778,   0.016356,    0.0170763,   0.0178405,   0.0186506,   0.0195086,
+    0.0204169,   0.0213778,   0.0223939,   0.0234679,   0.0246027,   0.0258015,   0.00715734,  0.0071946,   0.00730714,
+    0.00749731,  0.00776911,  0.00812824,  0.00858234,  0.00914108,  0.00981651,  0.0101289,   0.0104803,   0.0108724,
+    0.0113068,   0.0117849,   0.0123087,   0.01288,     0.0135013,   0.014175,    0.0149041,   0.0155792,   0.016223,
+    0.0169079,   0.0176356,   0.018408,    0.0192269,   0.0200945,   0.0210131,   0.0219851,   0.023013,    0.0240997,
+    0.025248,    0.026461,    0.00854036,  0.00858234,  0.00870913,  0.0089233,   0.00922921,  0.00963311,  0.00994518,
+    0.0101841,   0.0104617,   0.0107787,   0.0111362,   0.0115355,   0.011978,    0.0124653,   0.0129994,   0.0135823,
+    0.0142163,   0.0149041,   0.0155464,   0.0161566,   0.0168071,   0.0174995,   0.0182354,   0.0190166,   0.0198452,
+    0.0207232,   0.021653,    0.0226369,   0.0236777,   0.0247779,   0.0259407,   0.0271691,   0.00996351,  0.00998185,
+    0.0100369,   0.0101289,   0.010258,    0.0104246,   0.0106292,   0.0108724,   0.0111551,   0.0114782,   0.0118428,
+    0.0122501,   0.0127017,   0.0131993,   0.0137448,   0.0143403,   0.0149884,   0.0155792,   0.0161566,   0.0167736,
+    0.0174315,   0.0181321,   0.0188769,   0.0196678,   0.0205068,   0.0213961,   0.0223379,   0.0233348,   0.0243893,
+    0.0255042,   0.0266826,   0.0279274,   0.0106852,   0.0107039,   0.01076,     0.0108537,   0.0109853,   0.0111551,
+    0.0113639,   0.0116121,   0.0119007,   0.0122306,   0.012603,    0.0130193,   0.0134811,   0.01399,     0.0145481,
+    0.0151576,   0.0156778,   0.016223,    0.0168071,   0.0174315,   0.0180977,   0.0188071,   0.0195616,   0.020363,
+    0.0212134,   0.0221148,   0.0230697,   0.0240804,   0.0251497,   0.0262803,   0.0274753,   0.0287378,   0.0114591,
+    0.0114782,   0.0115355,   0.0116312,   0.0117657,   0.0119393,   0.0121527,   0.0124065,   0.0127017,   0.0130393,
+    0.0134205,   0.0138467,   0.0143196,   0.014841,    0.0153662,   0.0158424,   0.016356,    0.0169079,   0.0174995,
+    0.0181321,   0.0188071,   0.0195263,   0.0202913,   0.021104,    0.0219666,   0.0228811,   0.0238499,   0.0248755,
+    0.0259606,   0.027108,    0.0283208,   0.0296021,   0.0122891,   0.0123087,   0.0123673,   0.0124653,   0.012603,
+    0.0127808,   0.0129994,   0.0132595,   0.013562,    0.0139081,   0.0142989,   0.0147361,   0.0152192,   0.0156285,
+    0.0160738,   0.016556,    0.0170763,   0.0176356,   0.0182354,   0.0188769,   0.0195616,   0.0202913,   0.0210677,
+    0.0218926,   0.0227683,   0.0236968,   0.0246805,   0.025722,    0.026824,    0.0279894,   0.0292211,   0.0303655,
+    0.0131792,   0.0131993,   0.0132595,   0.01336,     0.0135013,   0.0136837,   0.0139081,   0.014175,    0.0144856,
+    0.014841,    0.0152355,   0.0155792,   0.015958,    0.0163726,   0.0168239,   0.0173129,   0.0178405,   0.018408,
+    0.0190166,   0.0196678,   0.020363,    0.021104,    0.0218926,   0.0227307,   0.0236204,   0.0245639,   0.0255636,
+    0.026622,    0.027742,    0.0289264,   0.0300982,   0.0311219,   0.0141338,   0.0141544,   0.0142163,   0.0143196,
+    0.0144648,   0.0146524,   0.014883,    0.0151576,   0.0154153,   0.0156942,   0.0160076,   0.016356,    0.0167401,
+    0.0171607,   0.0176186,   0.0181149,   0.0186506,   0.0192269,   0.0198452,   0.0205068,   0.0212134,   0.0219666,
+    0.0227683,   0.0236204,   0.024525,    0.0254845,   0.0265012,   0.0275778,   0.0287169,   0.0298984,   0.0308859,
+    0.0319196,   0.0151576,   0.0151788,   0.0152355,   0.0153172,   0.0154317,   0.0155792,   0.0157601,   0.0159745,
+    0.016223,    0.016506,    0.0168239,   0.0171775,   0.0175675,   0.0179947,   0.0184599,   0.0189641,   0.0195086,
+    0.0200945,   0.0207232,   0.0213961,   0.0221148,   0.0228811,   0.0236968,   0.0245639,   0.0254845,   0.026461,
+    0.0274958,   0.0285915,   0.0297509,   0.0307177,   0.0317152,   0.0327595,   0.0160076,   0.0160241,   0.0160738,
+    0.0161566,   0.0162728,   0.0164226,   0.0166062,   0.0168239,   0.0170763,   0.0173637,   0.0176868,   0.0180461,
+    0.0184426,   0.0188769,   0.01935,     0.0198629,   0.0204169,   0.0210131,   0.021653,    0.0223379,   0.0230697,
+    0.0238499,   0.0246805,   0.0255636,   0.0265012,   0.0274958,   0.0285498,   0.0296658,   0.0306169,   0.0315792,
+    0.0325874,   0.033643,    0.0168911,   0.0169079,   0.0169584,   0.0170426,   0.0171607,   0.0173129,   0.0174995,
+    0.0177209,   0.0179775,   0.0182698,   0.0185985,   0.0189641,   0.0193676,   0.0198096,   0.0202913,   0.0208136,
+    0.0213778,   0.0219851,   0.0226369,   0.0233348,   0.0240804,   0.0248755,   0.025722,    0.026622,    0.0275778,
+    0.0285915,   0.0296658,   0.0305833,   0.0315113,   0.0324843,   0.0335037,   0.0345713,   0.0178234,   0.0178405,
+    0.0178918,   0.0179775,   0.0180977,   0.0182526,   0.0184426,   0.0186679,   0.0189292,   0.0192269,   0.0195616,
+    0.0199341,   0.0203451,   0.0207955,   0.0212864,   0.0218188,   0.0223939,   0.023013,    0.0236777,   0.0243893,
+    0.0251497,   0.0259606,   0.026824,    0.027742,    0.0287169,   0.0297509,   0.0306169,   0.0315113,   0.0324499,
+    0.0334342,   0.0344656,   0.0355458,   0.0188071,   0.0188245,   0.0188769,   0.0189641,   0.0190866,   0.0192445,
+    0.019438,    0.0196678,   0.0199341,   0.0202376,   0.0205788,   0.0209586,   0.0213778,   0.0218372,   0.0223379,
+    0.0228811,   0.0234679,   0.0240997,   0.0247779,   0.0255042,   0.0262803,   0.027108,    0.0279894,   0.0289264,
+    0.0298984,   0.0307177,   0.0315792,   0.0324843,   0.0334342,   0.0344305,   0.0354745,   0.036568,    0.0198452,
+    0.0198629,   0.0199163,   0.0200053,   0.0201302,   0.0202913,   0.0204888,   0.0207232,   0.0209949,   0.0213046,
+    0.021653,    0.0220406,   0.0224685,   0.0229376,   0.0234488,   0.0240035,   0.0246027,   0.025248,    0.0259407,
+    0.0266826,   0.0274753,   0.0283208,   0.0292211,   0.0300982,   0.0308859,   0.0317152,   0.0325874,   0.0335037,
+    0.0344656,   0.0354745,   0.0365319,   0.0376393,   0.0209405,   0.0209586,   0.0210131,   0.021104,    0.0212316,
+    0.0213961,   0.0215978,   0.0218372,   0.0221148,   0.0224312,   0.022787,    0.0231831,   0.0236204,   0.0240997,
+    0.0246222,   0.025189,    0.0258015,   0.026461,    0.0271691,   0.0279274,   0.0287378,   0.0296021,   0.0303655,
+    0.0311219,   0.0319196,   0.0327595,   0.033643,    0.0345713,   0.0355458,   0.036568,    0.0376393,   0.0387616,
+    0.000138107, 0.000160471, 0.000186456, 0.000216649, 0.000251732, 0.000292495, 0.00033986};
+
+static float inv_dequant_stable[] = {
+    0.000000,     3150.000000, 3139.258545, 2648.630371, 2234.681152,  1885.427490, 1590.758057, 1342.141724,
+    3150.000000,  3150.000000, 3015.809570, 2576.583984, 2188.415039,  1853.965576, 1568.540649, 1326.029297,
+    3139.258545,  3015.809570, 2726.995361, 2389.616455, 2062.382568,  1765.966431, 1505.393555, 1279.748535,
+    2648.630371,  2576.583984, 2389.616455, 2144.407471, 1885.427490,  1637.121094, 1410.374878, 1208.789673,
+    2234.681152,  2188.415039, 2062.382568, 1885.427490, 1686.281982,  1485.426636, 1294.845093, 1060.593384,
+    1885.427490,  1853.965576, 1765.966431, 1637.121094, 1485.426636,  1326.029297, 1169.492065, 785.962952,
+    1590.758057,  1568.540649, 1505.393555, 1410.374878, 1294.845093,  1169.492065, 838.701721,  558.037292,
+    1342.141724,  1326.029297, 1279.748535, 1208.789673, 1060.593384,  785.962952,  558.037292,  382.654694,
+    0.000000,     560.000000,  558.510437,  489.194183,  428.480621,   375.302246,  328.723816,  287.926147,
+    560.000000,   560.000000,  541.309387,  478.786804,  421.547455,   370.409943,  325.138336,  285.227325,
+    558.510437,   541.309387,  500.443756,  451.472992,  402.494324,   356.627594,  314.885712,  277.434692,
+    489.194183,   478.786804,  451.472992,  414.922729,  375.302246,   336.170715,  299.277435,  265.364777,
+    428.480621,   421.547455,  402.494324,  375.302246,  344.016449,   311.624298,  279.983337,  250.119843,
+    375.302246,   370.409943,  356.627594,  336.170715,  311.624298,   285.227325,  258.613495,  232.845169,
+    328.723816,   325.138336,  314.885712,  299.277435,  279.983337,   258.613495,  236.484726,  214.558777,
+    287.926147,   285.227325,  277.434692,  265.364777,  250.119843,   232.845169,  214.558777,  196.071777,
+    0.000000,     293.959503,  169.469955,  119.412476,  85.333336,    85.333336,   83.550827,   58.871857,
+    293.959503,   233.598114,  156.027161,  112.817505,  85.333336,    85.333336,   81.164711,   57.425171,
+    169.469955,   156.027161,  126.804932,  96.600616,   85.333336,    85.333336,   74.576889,   53.372673,
+    119.412476,   112.817505,  96.600616,   85.333336,   85.333336,    85.333336,   65.203850,   47.455181,
+    85.333336,    85.333336,   85.333336,   85.333336,   85.333336,    72.553520,   54.677811,   39.419506,
+    85.333336,    85.333336,   85.333336,   85.333336,   72.553520,    57.425171,   44.331757,   29.212204,
+    83.550827,    81.164711,   74.576889,   65.203850,   54.677811,    44.331757,   31.172369,   20.740799,
+    58.871857,    57.425171,   53.372673,   47.455181,   39.419506,    29.212204,   20.740799,   14.222282,
+    0.000000,     3160.000000, 280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    3160.000000,  3160.000000, 280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    0.000000,     864.000000,  60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    864.000000,   864.000000,  60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    0.000000,     200.000000,  18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    200.000000,   200.000000,  18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    0.000000,     3840.000000, 1280.000000, 1280.000000, 480.000000,   480.000000,  480.000000,  480.000000,
+    3840.000000,  2560.000000, 1280.000000, 1280.000000, 480.000000,   480.000000,  480.000000,  480.000000,
+    1280.000000,  1280.000000, 640.000000,  640.000000,  480.000000,   480.000000,  480.000000,  480.000000,
+    1280.000000,  1280.000000, 640.000000,  640.000000,  480.000000,   480.000000,  480.000000,  480.000000,
+    480.000000,   480.000000,  480.000000,  480.000000,  300.000000,   300.000000,  300.000000,  300.000000,
+    480.000000,   480.000000,  480.000000,  480.000000,  300.000000,   300.000000,  300.000000,  300.000000,
+    480.000000,   480.000000,  480.000000,  480.000000,  300.000000,   300.000000,  300.000000,  300.000000,
+    480.000000,   480.000000,  480.000000,  480.000000,  300.000000,   300.000000,  300.000000,  300.000000,
+    0.000000,     960.000000,  320.000000,  320.000000,  140.000000,   140.000000,  140.000000,  140.000000,
+    960.000000,   640.000000,  320.000000,  320.000000,  140.000000,   140.000000,  140.000000,  140.000000,
+    320.000000,   320.000000,  180.000000,  180.000000,  140.000000,   140.000000,  140.000000,  140.000000,
+    320.000000,   320.000000,  180.000000,  180.000000,  140.000000,   140.000000,  140.000000,  140.000000,
+    140.000000,   140.000000,  140.000000,  140.000000,  120.000000,   120.000000,  120.000000,  120.000000,
+    140.000000,   140.000000,  140.000000,  140.000000,  120.000000,   120.000000,  120.000000,  120.000000,
+    140.000000,   140.000000,  140.000000,  140.000000,  120.000000,   120.000000,  120.000000,  120.000000,
+    140.000000,   140.000000,  140.000000,  140.000000,  120.000000,   120.000000,  120.000000,  120.000000,
+    0.000000,     640.000000,  128.000000,  128.000000,  32.000000,    32.000000,   32.000000,   32.000000,
+    640.000000,   320.000000,  128.000000,  128.000000,  32.000000,    32.000000,   32.000000,   32.000000,
+    128.000000,   128.000000,  64.000000,   64.000000,   32.000000,    32.000000,   32.000000,   32.000000,
+    128.000000,   128.000000,  64.000000,   64.000000,   32.000000,    32.000000,   32.000000,   32.000000,
+    32.000000,    32.000000,   32.000000,   32.000000,   16.000000,    16.000000,   16.000000,   16.000000,
+    32.000000,    32.000000,   32.000000,   32.000000,   16.000000,    16.000000,   16.000000,   16.000000,
+    32.000000,    32.000000,   32.000000,   32.000000,   16.000000,    16.000000,   16.000000,   16.000000,
+    32.000000,    32.000000,   32.000000,   32.000000,   16.000000,    16.000000,   16.000000,   16.000000,
+    0.000000,     2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    0.000000,     392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    0.000000,     112.000000,  95.651627,   95.651627,   81.689583,    81.689583,   68.239342,   68.239342,
+    112.000000,   112.000000,  95.651627,   95.651627,   81.689583,    81.689583,   68.239342,   68.239342,
+    95.651627,    95.651627,   89.600014,   89.600014,   78.702759,    78.702759,   65.137154,   65.137154,
+    95.651627,    95.651627,   89.600014,   89.600014,   78.702759,    78.702759,   65.137154,   65.137154,
+    81.689583,    81.689583,   78.702759,   78.702759,   71.680023,    71.680023,   57.363346,   57.363346,
+    81.689583,    81.689583,   78.702759,   78.702759,   71.680023,    71.680023,   57.363346,   57.363346,
+    68.239342,    68.239342,   65.137154,   65.137154,   57.363346,    57.363346,   47.786716,   47.786716,
+    68.239342,    68.239342,   65.137154,   65.137154,   57.363346,    57.363346,   47.786716,   47.786716,
+    0.000000,     0.000000,    5616.416016, 4437.547852, 3710.523682,  3312.083740, 2956.428467, 2638.963867,
+    2378.979736,  2146.230957, 1936.253296, 1722.186157, 1498.605713,  1304.051636, 1134.754883, 951.882080,
+    0.000000,     0.000000,    5312.582520, 4271.097168, 3658.995850,  3275.037109, 2928.764160, 2617.745361,
+    2363.779541,  2134.027100, 1926.335571, 1711.357300, 1489.962646,  1297.105591, 1129.140381, 946.136963,
+    5616.416016,  5312.582520, 4620.592773, 3880.564697, 3516.761230,  3170.294189, 2849.415527, 2562.006348,
+    2319.431641,  2098.261719, 1897.172852, 1679.534424, 1464.505249,  1276.608887, 1112.546509, 929.184143,
+    4437.547852,  4271.097168, 3880.564697, 3609.647705, 3312.083740,  3013.749512, 2727.902588, 2474.977295,
+    2249.396484,  2041.305786, 1850.436279, 1628.609985, 1423.584961,  1243.542969, 1077.572754, 901.837036,
+    3710.523682,  3658.995850, 3516.761230, 3312.083740, 3073.944824,  2824.097412, 2580.273682, 2363.779541,
+    2158.580811,  1966.619507, 1778.076416, 1561.425903, 1369.259766,  1199.417236, 1031.115479, 865.357239,
+    3312.083740,  3275.037109, 3170.294189, 3013.749512, 2824.097412,  2617.745117, 2425.913330, 2235.929932,
+    2052.443848,  1878.206177, 1679.534424, 1481.398804, 1304.051636,  1146.111572, 975.344788,  821.329834,
+    2956.428467,  2928.764160, 2849.415527, 2727.902588, 2580.273682,  2425.913330, 2263.037598, 2098.261719,
+    1936.253296,  1766.659668, 1570.745850, 1392.135254, 1230.684570,  1077.572754, 912.642517,  771.521240,
+    2638.963867,  2617.745361, 2562.006348, 2474.977295, 2363.779541,  2235.929932, 2098.261719, 1956.393188,
+    1813.078247,  1628.609985, 1456.172852, 1297.105591, 1151.854614,  993.464294,  845.405334,  717.737732,
+    2378.979736,  2363.779541, 2319.431641, 2249.396484, 2158.580811,  2052.443848, 1936.253296, 1813.078247,
+    1648.672119,  1489.962646, 1339.664185, 1199.417480, 1057.315552,  907.217957,  775.878479,  661.709290,
+    2146.230957,  2134.027100, 2098.261719, 2041.305786, 1966.619507,  1878.206177, 1766.659668, 1628.609985,
+    1489.962646,  1354.335571, 1224.331787, 1098.371216, 951.882080,   821.329834,  706.041565,  604.996033,
+    1936.253296,  1926.335571, 1897.172852, 1850.436279, 1778.076416,  1679.534424, 1570.745850, 1456.172852,
+    1339.664185,  1224.331787, 1112.546143, 975.344482,  850.334167,   737.812134,  637.541504,  531.866638,
+    1722.186157,  1711.357300, 1679.534424, 1628.609985, 1561.425903,  1481.398804, 1392.135254, 1297.105591,
+    1199.417480,  1098.371216, 975.344482,  860.309998,  754.414917,   658.183533,  565.168762,  455.065186,
+    1498.605713,  1489.962646, 1464.505249, 1423.584961, 1369.259766,  1304.051636, 1230.684570, 1151.854614,
+    1057.315552,  951.882080,  850.334167,  754.414917,  665.260315,   582.761047,  475.564758,  385.666412,
+    1304.051636,  1297.105591, 1276.608887, 1243.542969, 1199.417236,  1146.111572, 1077.572754, 993.464294,
+    907.217957,   821.329834,  737.812134,  658.183533,  582.761047,   482.643036,  396.775940,  324.039429,
+    1134.754883,  1129.140381, 1112.546509, 1077.572754, 1031.115479,  975.344788,  912.642517,  845.405334,
+    775.878479,   706.041565,  637.541504,  565.168762,  475.564758,   396.775940,  328.516357,  270.136078,
+    951.882080,   946.136963,  929.184143,  901.837036,  865.357239,   821.329834,  771.521240,  717.737732,
+    661.709290,   604.996033,  531.866638,  455.065186,  385.666412,   324.039429,  270.136078,  223.608490,
+    0.000000,     0.000000,    2384.412598, 2060.989746, 1763.609009,  1491.737793, 1261.776978, 1067.266357,
+    956.677612,   861.364075,  775.546631,  703.312927,  644.910889,   591.358521,  542.252991,  501.345215,
+    0.000000,     0.000000,    2303.758789, 2012.809937, 1727.632080,  1467.211548, 1244.414307, 1054.643066,
+    950.447205,   856.371826,  771.497620,  700.552734,  642.589600,   589.392944,  540.578857,  500.060272,
+    2384.412598,  2303.758789, 2113.184082, 1884.007446, 1629.571411,  1398.579590, 1195.044922, 1031.757080,
+    932.273987,   841.744202,  759.593811,  692.403137,  635.722961,   583.569458,  535.612549,  496.242188,
+    2060.989746,  2012.809937, 1884.007446, 1693.401611, 1491.737793,  1297.998291, 1120.699707, 996.043396,
+    903.588257,   818.460022,  740.524109,  679.239563,  624.590454,   574.100037,  528.409058,  489.997620,
+    1763.609009,  1727.632080, 1629.571411, 1491.737793, 1336.388306,  1179.428345, 1039.256348, 950.447205,
+    866.416626,   787.946472,  717.456177,  661.633423,  609.623047,   561.314270,  518.629089,  481.495361,
+    1491.737793,  1467.211548, 1398.579590, 1297.998291, 1179.428345,  1054.642944, 975.919922,  898.074402,
+    823.012451,   751.853821,  692.403137,  640.284668,  591.358521,   545.629761,  506.546997,  470.954254,
+    1261.776978,  1244.414307, 1195.044922, 1120.699707, 1039.256348,  975.919922,  909.174133,  841.744202,
+    775.546631,   714.580872,  664.092590,  615.952393,  570.392151,   528.409058,  492.477875,  458.628601,
+    1067.266357,  1054.643066, 1031.757080, 996.043396,  950.447205,   898.074402,  841.744202,  783.770203,
+    726.228333,   679.239563,  633.465698,  589.392944,  547.332581,   510.515045,  476.757660,  444.792908,
+    956.677612,   950.447205,  932.273987,  903.588257,  866.416626,   823.012451,  775.546631,  726.228333,
+    684.443726,   642.589600,  601.375000,  561.314392,  524.175049,   491.234863,  459.724792,  429.728699,
+    861.364075,   856.371826,  841.744202,  818.460022,  787.946472,   751.853821,  714.580872,  679.239563,
+    642.589600,   605.472290,  568.554810,  532.708740,  501.345215,   470.954254,  441.705719,  413.711823,
+    775.546631,   771.497620,  759.593811,  740.524109,  717.456177,   692.403137,  664.092590,  633.465698,
+    601.375000,   568.554810,  535.612427,  506.546936,  477.933990,   450.024689,  423.003998,  395.167694,
+    703.312927,   700.552734,  692.403137,  679.239563,  661.633423,   640.284668,  615.952393,  589.392944,
+    561.314392,   532.708740,  506.546936,  480.302856,  454.290039,   428.756592,  403.216187,  375.228302,
+    644.910889,   642.589600,  635.722961,  624.590454,  609.623047,   591.358521,  570.392151,  547.332581,
+    524.175049,   501.345215,  477.933990,  454.290039,  430.704803,   407.340515,  380.757690,  355.171173,
+    591.358521,   589.392944,  583.569458,  574.100037,  561.314270,   545.629761,  528.409058,  510.515045,
+    491.234863,   470.954254,  450.024689,  428.756592,  407.340515,   382.629913,  358.535706,  335.223267,
+    542.252991,   540.578857,  535.612549,  528.409058,  518.629089,   506.546997,  492.477875,  476.757660,
+    459.724792,   441.705719,  423.003998,  403.216187,  380.757690,   358.535706,  336.753815,  315.574097,
+    501.345215,   500.060272,  496.242188,  489.997620,  481.495361,   470.954254,  458.628601,  444.792908,
+    429.728699,   413.711823,  395.167694,  375.228302,  355.171173,   335.223267,  315.574097,  296.378265,
+    0.000000,     0.000000,    615.613831,  448.953400,  337.930267,   263.807556,  205.943130,  160.770889,
+    141.832733,   126.301643,  112.471252,  100.763390,  91.120811,    82.400993,   74.515610,   58.896236,
+    0.000000,     0.000000,    571.402039,  426.532227,  327.784393,   257.417816,  201.765564,  157.966431,
+    140.812332,   125.492966,  111.822540,  100.304680,  90.740356,    82.083275,   74.248734,   58.393326,
+    615.613831,   571.402039,  473.941895,  372.602753,  300.644775,   239.809601,  190.039810,  154.182663,
+    137.840027,   123.126366,  109.917458,  98.951996,   89.616219,    81.142967,   73.457825,   56.916744,
+    448.953400,   426.532227,  372.602753,  318.224457,  263.807556,   214.746811,  172.817261,  148.295853,
+    133.160797,   119.368141,  106.872108,  96.772522,   87.797852,    79.617172,   70.208313,   54.558437,
+    337.930267,   327.784393,  300.644775,  263.807556,  224.206940,   186.378311,  155.421555,  140.812332,
+    127.120590,   114.460091,  103.118340,  93.868050,   85.361305,    77.563431,   65.959373,   51.458752,
+    263.807556,   257.417816,  239.809601,  214.746811,  186.378311,   157.966400,  144.988541,  132.263153,
+    120.102051,   108.680435,  98.951996,   90.362801,   82.400993,    75.054314,   60.963200,   47.789742,
+    205.943130,   201.765564,  190.039810,  172.817261,  155.421555,   144.988541,  134.070770,  123.126366,
+    112.471252,   102.638969,  94.273003,   86.390495,   79.020828,    70.208313,   55.486752,   43.736801,
+    160.770889,   157.966431,  154.182663,  148.295853,  140.812332,   132.263153,  123.126366,  113.789886,
+    104.582710,   96.772522,   89.247108,   82.083275,   75.326157,    62.573708,   49.786186,   39.481380,
+    141.832733,   140.812332,  137.840027,  133.160797,  127.120590,   120.102051,  112.471252,  104.582710,
+    97.633369,    90.740356,   84.022667,   77.563446,   68.346024,    55.020145,   44.087116,   35.187599,
+    126.301643,   125.492966,  123.126366,  119.368141,  114.460091,   108.680435,  102.638969,  96.772522,
+    90.740356,    84.687279,   78.725555,   72.135590,   58.896236,    47.789742,   38.573082,   30.993063,
+    112.471252,   111.822540,  109.917458,  106.872108,  103.118340,   98.951996,   94.273003,   89.247108,
+    84.022667,    78.725555,   73.457809,   60.963173,   50.197853,    41.054691,   33.381031,   24.780676,
+    100.763390,   100.304680,  98.951996,   96.772522,   93.868050,    90.362801,   86.390495,   82.083275,
+    77.563446,    72.135590,   60.963173,   51.034103,   42.369473,    34.922314,   27.726070,   18.572216,
+    91.120811,    90.740356,   89.616219,   87.797852,   85.361305,    82.400993,   79.020828,   75.326157,
+    68.346024,    58.896236,   50.197853,   42.369473,   35.455399,    29.343132,   20.148905,   13.676408,
+    82.400993,    82.083275,   81.142967,   79.617172,   77.563431,    75.054314,   70.208313,   62.573708,
+    55.020145,    47.789742,   41.054691,   34.922314,   29.343132,    20.706997,   14.413850,   9.911549,
+    74.515610,    74.248734,   73.457825,   70.208313,   65.959373,    60.963200,   55.486752,   49.786186,
+    44.087116,    38.573082,   33.381031,   27.726070,   20.148905,    14.413850,   10.166267,   7.079802,
+    58.896236,    58.393326,   56.916744,   54.558437,   51.458752,    47.789742,   43.736801,   39.481380,
+    35.187599,    30.993063,   24.780676,   18.572216,   13.676408,    9.911549,    7.079802,    4.991220,
+    0.000000,     0.000000,    0.000000,    0.000000,    10016.177734, 8949.019531, 7995.559082, 7162.601074,
+    6422.475586,  5758.828613, 5163.758301, 4630.176758, 4151.732422,  3734.188232, 3370.109863, 3041.528564,
+    2744.983643,  2477.351074, 2235.813232, 2038.749634, 1932.109741,  1831.047485, 1735.271729, 1644.505737,
+    1558.487183,  1476.968018, 1386.826660, 1301.528687, 1221.477173,  1146.349243, 1075.842163, 1009.671509,
+    0.000000,     0.000000,    0.000000,    0.000000,    9878.224609,  8849.744141, 7921.355469, 7107.295410,
+    6379.011230,  5724.145508, 5135.744141, 4607.326172, 4132.939453,  3719.505127, 3357.800781, 3031.157227,
+    2736.206543,  2469.894287, 2229.455811, 2035.871338, 1929.518066,  1828.708130, 1733.155273, 1642.587036,
+    1556.744507,  1475.382324, 1385.135010, 1300.000122, 1220.093750,  1145.095825, 1074.704956, 1008.638672,
+    0.000000,     0.000000,    0.000000,    0.000000,    9497.340820,  8569.009766, 7710.195312, 6947.082520,
+    6252.300781,  5622.568359, 5053.416504, 4539.993652, 4077.450684,  3676.055664, 3321.326660, 3000.390625,
+    2710.143555,  2447.733643, 2210.550537, 2027.284180, 1921.783081,  1821.723755, 1726.834595, 1636.855469,
+    1551.537354,  1470.240967, 1380.081177, 1295.431274, 1215.958252,  1141.347534, 1071.303833, 1005.549255,
+    0.000000,     0.000000,    0.000000,    0.000000,    8949.019531,  8149.289551, 7394.224121, 6697.344727,
+    6052.488281,  5461.019531, 4921.634277, 4431.667969, 3987.818604,  3605.573242, 3262.003174, 2950.239990,
+    2667.581299,  2411.486328, 2179.584961, 2013.130249, 1909.023438,  1810.194092, 1716.393799, 1627.382446,
+    1542.927124,  1460.985596, 1371.723022, 1287.873291, 1209.114380,  1135.142578, 1065.671997, 1000.432007,
+    10016.177734, 9878.224609, 9497.340820, 8949.019531, 8310.704102,  7644.405273, 6999.567383, 6379.011230,
+    5793.938965,  5249.588867, 4747.628418, 4287.628418, 3871.052002,  3510.746094, 3181.889648, 2882.297852,
+    2609.764404,  2362.132812, 2137.337891, 1993.638184, 1891.430786,  1794.280640, 1701.970703, 1614.285400,
+    1531.013916,  1448.186157, 1360.157349, 1277.407837, 1199.633057,  1126.542847, 1057.862915, 993.333557,
+    8949.019531,  8849.744141, 8569.009766, 8149.289551, 7644.405273,  7107.295410, 6556.779785, 6014.109863,
+    5492.590332,  4999.912598, 4539.993652, 4114.296875, 3734.188232,  3394.959229, 3083.605469, 2798.613281,
+    2538.306152,  2300.954346, 2084.833496, 1969.113525, 1869.262207,  1774.201294, 1683.749756, 1597.722046,
+    1515.933594,  1431.994873, 1345.514648, 1264.148682, 1187.612061,  1115.632202, 1047.949463, 984.940796,
+    7995.559082,  7921.355469, 7710.195312, 7394.224121, 6999.567383,  6556.779785, 6091.377441, 5622.568359,
+    5163.758301,  4723.701172, 4307.688965, 3918.657715, 3578.032715,  3262.003174, 2970.133789, 2701.544189,
+    2455.086426,  2229.455811, 2041.636230, 1939.925049, 1842.829834,  1750.221191, 1661.957764, 1577.887085,
+    1497.853394,  1412.599976, 1327.956665, 1248.234619, 1173.171875,  1102.515625, 1036.023804, 975.335022,
+    7162.601074,  7107.295410, 6947.082520, 6697.344727, 6379.011230,  6014.109863, 5622.568359, 5220.674805,
+    4820.776367,  4431.667969, 4059.243408, 3719.505127, 3407.500000,  3115.799805, 2844.609619, 2593.611328,
+    2362.133057,  2149.279541, 2004.730957, 1906.490967, 1812.489014,  1722.644409, 1636.855469, 1555.005493,
+    1476.968018,  1390.219116, 1307.671753, 1229.829224, 1156.454346,  1087.316772, 1022.192383, 964.170288,
+    6422.475586,  6379.011230, 6252.300781, 6052.488281, 5793.938965,  5492.590332, 5163.758301, 4820.776367,
+    4474.431152,  4132.939453, 3809.151123, 3510.745605, 3227.259766,  2960.159424, 2710.143555, 2477.351562,
+    2261.523438,  2062.126221, 1963.743530, 1869.262207, 1778.627319,  1691.803589, 1608.729980, 1529.325195,
+    1450.003418,  1365.094971, 1284.869629, 1209.114380, 1137.618530,  1070.173828, 1006.577637, 951.533936,
+    5758.828613,  5724.145508, 5622.568359, 5461.019531, 5249.588867,  4999.912598, 4723.701172, 4431.667969,
+    4132.939453,  3839.881836, 3564.398193, 3297.380859, 3041.528564,  2798.613281, 2569.677490, 2355.211426,
+    2155.288330,  2013.130249, 1919.218018, 1828.708130, 1741.649902,  1658.050537, 1577.887085, 1501.112305,
+    1419.603271,  1337.488037, 1259.777100, 1186.287842, 1116.836182,  1051.238037, 989.356018,  937.521851,
+    5163.758301,  5135.744141, 5053.416504, 4921.634277, 4747.628418,  4539.993652, 4307.688965, 4059.243408,
+    3809.151123,  3564.398193, 3321.326660, 3083.605469, 2853.956543,  2634.296387, 2425.883789, 2229.455811,
+    2053.263184,  1961.068726, 1871.700928, 1785.304199, 1701.970703,  1621.745972, 1544.642456, 1470.240967,
+    1386.826660,  1307.671509, 1232.633179, 1161.558716, 1094.291504,  1030.670776, 972.740723,  922.236572,
+    4630.176758,  4607.326172, 4539.993652, 4431.667969, 4287.628418,  4114.296875, 3918.657715, 3719.505127,
+    3510.745605,  3297.380859, 3083.605469, 2872.799805, 2667.581299,  2469.894531, 2281.107178, 2102.114502,
+    1993.638428,  1906.490967, 1821.723877, 1739.518799, 1660.001831,  1583.252930, 1509.314819, 1431.994873,
+    1351.991333,  1275.923828, 1203.682495, 1135.142578, 1070.173828,  1008.638672, 954.878296,  905.786682,
+    4151.732422,  4132.939453, 4077.450684, 3987.818604, 3871.052002,  3734.188232, 3578.032715, 3407.500000,
+    3227.259766,  3041.528564, 2853.956543, 2667.581299, 2484.843994,  2307.630371, 2137.337891, 2015.945557,
+    1932.109741,  1849.967896, 1769.796143, 1691.803589, 1616.144531,  1542.927124, 1472.104004, 1391.920288,
+    1315.414917,  1242.525757, 1173.171875, 1107.257935, 1044.676514,  985.821167,  935.894348,  888.283264,
+    3734.188232,  3719.505127, 3676.055664, 3605.573242, 3510.746094,  3394.959229, 3262.003174, 3115.799805,
+    2960.159424,  2798.613281, 2634.296387, 2469.894531, 2307.630371,  2149.279541, 2027.284424, 1947.802002,
+    1869.262207,  1792.028931, 1716.393799, 1642.587036, 1570.783203,  1501.112305, 1426.662964, 1350.367676,
+    1277.407837,  1207.752563, 1141.347534, 1078.122192, 1017.992920,  964.170288,  915.916138,  869.840393,
+    3370.109863,  3357.800781, 3321.326660, 3262.003174, 3181.889648,  3083.605469, 2970.133789, 2844.609619,
+    2710.143555,  2569.677490, 2425.883789, 2281.107178, 2137.337891,  2027.284424, 1953.087891, 1879.053223,
+    1805.620605,  1733.155273, 1661.957764, 1592.268433, 1524.278809,  1455.477539, 1380.081177, 1307.671509,
+    1238.270752,  1171.872925, 1108.448364, 1047.949463, 990.314453,   941.609558,  895.070007,  850.572571,
+    3041.528564,  3031.157227, 3000.390625, 2950.239990, 2882.297852,  2798.613281, 2701.544189, 2593.611328,
+    2477.351562,  2355.211426, 2229.455811, 2102.114502, 2015.945557,  1947.802002, 1879.053223, 1810.194092,
+    1741.649902,  1673.779907, 1606.886108, 1541.215088, 1476.968018,  1403.923828, 1332.708740, 1264.148682,
+    1198.288086,  1135.142578, 1074.704956, 1016.947144, 965.022034,   918.278442,  873.481934,  830.592407,
+    2744.983643,  2736.206543, 2710.143555, 2667.581299, 2609.764404,  2538.306152, 2455.086426, 2362.133057,
+    2261.523438,  2155.288330, 2053.263184, 1993.638428, 1932.109741,  1869.262207, 1805.620605, 1741.649902,
+    1677.755005,  1614.285400, 1551.537354, 1489.759766, 1421.362915,  1351.991333, 1284.869263, 1220.093750,
+    1157.727417,  1097.804565, 1040.336426, 985.821167,  939.153625,   894.312012,  851.274536,  810.011841,
+    2477.351074,  2469.894287, 2447.733643, 2411.486328, 2362.132812,  2300.954346, 2229.455811, 2149.279541,
+    2062.126221,  2013.130249, 1961.068726, 1906.490967, 1849.967896,  1792.028931, 1733.155273, 1673.779907,
+    1614.285400,  1555.005493, 1496.228516, 1431.994873, 1365.094971,  1300.000122, 1236.857544, 1175.777100,
+    1116.836182,  1060.084961, 1005.549255, 957.398621,  912.780823,   869.840393,  828.566345,  788.938354,
+    2235.813232,  2229.455811, 2210.550537, 2179.584961, 2137.337891,  2084.833496, 2041.636230, 2004.730957,
+    1963.743530,  1919.218018, 1871.700928, 1821.723877, 1769.796143,  1716.393799, 1661.957764, 1606.886108,
+    1551.537354,  1496.228516, 1435.567749, 1371.723022, 1309.214355,  1248.234619, 1188.938477, 1131.444702,
+    1075.842163,  1022.192383, 972.740723,  928.625183,  886.038757,   844.988281,  805.471558,  767.476257,
+    2038.749634,  2035.871338, 2027.284180, 2013.130249, 1993.638184,  1969.113525, 1939.925049, 1906.490967,
+    1869.262207,  1828.708130, 1785.304199, 1739.518799, 1691.803589,  1642.587036, 1592.268433, 1541.215088,
+    1489.759766,  1431.994873, 1371.723022, 1312.308716, 1253.984985,  1196.945190, 1141.347534, 1087.316772,
+    1034.949585,  984.940796,  941.609558,  899.638794,  859.054749,   819.872620,  782.097107,  745.724487,
+    1932.109741,  1929.518066, 1921.783081, 1909.023438, 1891.430786,  1869.262207, 1842.829834, 1812.489014,
+    1778.627319,  1741.649902, 1701.970703, 1660.001831, 1616.144531,  1570.783203, 1524.278809, 1476.968018,
+    1421.362915,  1365.094971, 1309.214355, 1253.984985, 1199.633057,  1146.349243, 1094.291504, 1043.588867,
+    994.342957,   951.534058,  910.440247,  870.566956,  831.947388,   794.602966,  758.545227,  723.776733,
+    1831.047485,  1828.708130, 1821.723755, 1810.194092, 1794.280640,  1774.201294, 1750.221191, 1722.644409,
+    1691.803589,  1658.050537, 1621.745972, 1583.252930, 1542.927124,  1501.112305, 1455.477539, 1403.923828,
+    1351.991333,  1300.000122, 1248.234619, 1196.945190, 1146.349243,  1096.631470, 1047.949463, 1000.432007,
+    958.241089,   918.278259,  879.356750,  841.526489,  804.825806,   769.281250,  734.910400,  701.720947,
+    1735.271729,  1733.155273, 1726.834595, 1716.393799, 1701.970703,  1683.749756, 1661.957764, 1636.855469,
+    1608.729980,  1577.887085, 1544.642456, 1509.314819, 1472.104004,  1426.662964, 1380.081177, 1332.708740,
+    1284.869263,  1236.857544, 1188.938477, 1141.347534, 1094.291504,  1047.949463, 1002.474182, 961.622131,
+    923.031555,   885.292297,  848.471924,  812.623657,  777.789856,   744.001404,  711.280090,  684.970581,
+    1644.505737,  1642.587036, 1636.855469, 1627.382446, 1614.285400,  1597.722046, 1577.887085, 1555.005493,
+    1529.325195,  1501.112305, 1470.240967, 1431.994873, 1391.920288,  1350.367676, 1307.671509, 1264.148682,
+    1220.093750,  1175.777100, 1131.444702, 1087.316772, 1043.588867,  1000.432007, 961.622131,  924.624451,
+    888.283264,   852.680969,  817.885742,  783.954041,  750.929993,   718.848206,  690.509705,  669.787170,
+    1558.487183,  1556.744507, 1551.537354, 1542.927124, 1531.013916,  1515.933594, 1497.853394, 1476.968018,
+    1450.003418,  1419.603271, 1386.826660, 1351.991333, 1315.414917,  1277.407837, 1238.270752, 1198.288086,
+    1157.727417,  1116.836182, 1075.842163, 1034.949585, 994.342957,   958.241089,  923.031555,  888.283264,
+    854.091187,   820.536316,  787.687988,  755.602661,  724.327698,   694.713867,  674.449768,  654.522705,
+    1476.968018,  1475.382324, 1470.240967, 1460.985596, 1448.186157,  1431.994873, 1412.599976, 1390.219116,
+    1365.094971,  1337.488037, 1307.671509, 1275.923828, 1242.525757,  1207.752563, 1171.872925, 1135.142578,
+    1097.804565,  1060.084961, 1022.192383, 984.940796,  951.534058,   918.278259,  885.292297,  852.680969,
+    820.536316,   788.938354,  757.955322,  727.644897,  698.054810,   677.813538,  658.364380,  639.217041,
+    1386.826660,  1385.135010, 1380.081177, 1371.723022, 1360.157349,  1345.514648, 1327.956665, 1307.671753,
+    1284.869629,  1259.777100, 1232.633179, 1203.682495, 1173.171875,  1141.347534, 1108.448364, 1074.704956,
+    1040.336426,  1005.549255, 972.740723,  941.609558,  910.440247,   879.356750,  848.471924,  817.885742,
+    787.687988,   757.955322,  728.755737,  700.146423,  679.845642,   660.946289,  642.291992,  623.906799,
+    1301.528687,  1300.000122, 1295.431274, 1287.873291, 1277.407837,  1264.148682, 1248.234619, 1229.829224,
+    1209.114380,  1186.287842, 1161.558716, 1135.142578, 1107.257935,  1078.122192, 1047.949463, 1016.947144,
+    985.821167,   957.398621,  928.625183,  899.638794,  870.566956,   841.526489,  812.623657,  783.954041,
+    755.602661,   727.644897,  700.146423,  680.525146,  662.243774,   644.148804,  626.268066,  608.625916,
+    1221.477173,  1220.093750, 1215.958252, 1209.114380, 1199.633057,  1187.612061, 1173.171875, 1156.454346,
+    1137.618530,  1116.836182, 1094.291504, 1070.173828, 1044.676514,  1017.992920, 990.314453,  965.022034,
+    939.153625,   912.780823,  886.038757,  859.054749,  831.947388,   804.825806,  777.789856,  750.929993,
+    724.327698,   698.054810,  679.845642,  662.243774,  644.769775,   627.454285,  610.324890,  593.406128,
+    1146.349243,  1145.095825, 1141.347534, 1135.142578, 1126.542847,  1115.632202, 1102.515625, 1087.316772,
+    1070.173828,  1051.238037, 1030.670776, 1008.638672, 985.821167,   964.170288,  941.609558,  918.278442,
+    894.312012,   869.840393,  844.988281,  819.872620,  794.602966,   769.281250,  744.001404,  718.848206,
+    694.713867,   677.813538,  660.946289,  644.148804,  627.454285,   610.892944,  594.491943,  578.275757,
+    1075.842163,  1074.704956, 1071.303833, 1065.671997, 1057.862915,  1047.949463, 1036.023804, 1022.192383,
+    1006.577637,  989.356018,  972.740723,  954.878296,  935.894348,   915.916138,  895.070007,  873.481934,
+    851.274536,   828.566345,  805.471558,  782.097107,  758.545227,   734.910400,  711.280090,  690.509705,
+    674.449768,   658.364380,  642.291992,  626.268066,  610.324890,   594.491943,  578.796021,  563.260986,
+    1009.671509,  1008.638672, 1005.549255, 1000.432007, 993.333557,   984.940796,  975.335022,  964.170288,
+    951.533936,   937.521851,  922.236572,  905.786682,  888.283264,   869.840393,  850.572571,  830.592407,
+    810.011841,   788.938354,  767.476257,  745.724487,  723.776733,   701.720947,  684.970581,  669.787170,
+    654.522705,   639.217041,  623.906799,  608.625916,  593.406128,   578.275757,  563.260986,  548.385559,
+    0.000000,     0.000000,    0.000000,    0.000000,    5011.678711,  4561.026367, 4150.897949, 3787.853271,
+    3459.890381,  3160.322998, 2886.693115, 2636.754883, 2408.457275,  2220.788330, 2069.294189, 1928.134521,
+    1796.604248,  1674.046265, 1559.848999, 1455.328247, 1364.407104,  1279.166016, 1199.250488, 1124.327759,
+    1054.085815,  988.231934,  932.328857,  879.889832,  830.400330,   783.694336,  739.615356,  698.015564,
+    0.000000,     0.000000,    0.000000,    0.000000,    4953.881836,  4518.670410, 4118.654297, 3763.552734,
+    3440.437256,  3144.510986, 2873.683594, 2625.945068, 2399.401855,  2214.770264, 2064.085693, 1923.603760,
+    1792.645508,  1670.574097, 1556.793091, 1452.861328, 1362.209839,  1277.203857, 1197.494507, 1122.752808,
+    1052.670532,  986.958130,  931.291748,  878.947388,  829.542664,   782.912842,  738.902405,  697.364319,
+    0.000000,     0.000000,    0.000000,    0.000000,    4793.614746,  4398.468262, 4026.789795, 3692.973877,
+    3383.596924,  3098.108398, 2835.382080, 2594.041016, 2372.622803,  2196.918701, 2048.617188, 1910.134766,
+    1780.867554,  1660.236206, 1547.688721, 1445.505981, 1355.655151,  1271.348633, 1192.252319, 1118.050049,
+    1048.443970,  983.335632,  928.192505,  876.130005,  826.978088,   780.575439,  736.769653,  695.416138,
+    0.000000,     0.000000,    0.000000,    0.000000,    4561.026367,  4217.543457, 3889.284912, 3582.401611,
+    3293.564941,  3024.014893, 2773.850098, 2542.543945, 2329.235352,  2167.820068, 2023.344727, 1888.087524,
+    1761.557373,  1643.263794, 1532.723755, 1433.395996, 1344.854126,  1261.692749, 1183.601440, 1110.284790,
+    1041.461182,  977.688965,  923.064758,  871.467163,  822.732300,   776.704590,  733.236694,  692.188110,
+    5011.678711,  4953.881836, 4793.614746, 4561.026367, 4287.298828,  3998.239258, 3716.125000, 3440.437256,
+    3176.312988,  2926.477539, 2692.172852, 2473.735840, 2276.533936,  2128.389404, 1988.987183, 1858.031494,
+    1735.171753,  1620.026123, 1512.199829, 1416.746704, 1329.985962,  1248.385864, 1171.668457, 1099.563843,
+    1031.812866,  969.874817,  915.964600,  865.006897,  816.846924,   771.336731,  728.335144,  687.707825,
+    4561.026367,  4518.670410, 4398.468262, 4217.543457, 3998.239258,  3763.552734, 3519.859619, 3276.214600,
+    3038.523682,  2810.433105, 2594.041016, 2390.411621, 2220.788330,  2079.791016, 1946.466553, 1820.706665,
+    1702.307983,  1591.010010, 1486.515869, 1395.845215, 1311.290039,  1231.629150, 1156.622437, 1086.030518,
+    1019.620911,  959.981201,  906.967834,  856.815063,  809.379150,   764.521179,  722.108276,  682.245117,
+    4150.897949,  4118.654297, 4026.789795, 3889.284912, 3716.125000,  3519.859619, 3311.126465, 3098.108398,
+    2886.693115,  2680.902100, 2483.341797, 2295.779541, 2156.401855,  2023.344727, 1896.847168, 1776.973755,
+    1663.669312,  1556.793091, 1457.802856, 1371.036987, 1289.056152,  1211.667358, 1138.670532, 1069.861084,
+    1005.035828,  948.117004,  896.168518,  846.973267,  800.399719,   756.319885,  714.610474,  675.848572,
+    3787.853271,  3763.552734, 3692.973877, 3582.401611, 3440.437256,  3276.214600, 3098.108398, 2913.088135,
+    2726.568848,  2542.543945, 2363.822754, 2214.770264, 2085.079346,  1960.440552, 1841.264404, 1727.767578,
+    1620.026245,  1518.013916, 1426.217773, 1342.712280, 1263.613770,  1188.779175, 1118.050049, 1051.258545,
+    988.231995,   934.408508,  883.676331,  835.576843,  789.991882,   746.805908,  705.905090,  668.410767,
+    3459.890381,  3440.437256, 3383.596924, 3293.564941, 3176.312988,  3038.523682, 2886.693115, 2726.568848,
+    2562.898193,  2399.401855, 2251.398438, 2128.389404, 2008.477295,  1892.457642, 1780.867554, 1674.046387,
+    1572.181152,  1475.345703, 1391.275391, 1311.290039, 1235.319580,  1163.268921, 1095.021729, 1030.446289,
+    970.984558,   918.996521,  869.613525,  822.732300,  778.249512,   736.060913,  696.064697,  659.988525,
+    3160.322998,  3144.510986, 3098.108398, 3024.014893, 2926.477539,  2810.433105, 2680.902100, 2542.543945,
+    2399.401855,  2263.892822, 2150.739014, 2038.433105, 1928.134521,  1820.706665, 1716.770630, 1616.755371,
+    1520.935669,  1433.395996, 1353.482666, 1277.203857, 1204.545532,  1135.456787, 1069.861084, 1007.662292,
+    952.402710,   902.032532,  854.112671,  808.556152,  765.273621,   724.174500,  685.184509,  650.644348,
+    2886.693115,  2873.683594, 2835.382080, 2773.850098, 2692.172852,  2594.041016, 2483.341797, 2363.822754,
+    2251.398438,  2150.739014, 2048.617188, 1946.466553, 1845.429077,  1746.389160, 1650.014648, 1556.793091,
+    1467.777588,  1389.000244, 1313.344360, 1240.890259, 1171.668457,  1105.668823, 1042.851685, 983.335632,
+    932.328857,   883.676147,  837.313965,  793.171143,  751.173340,   711.242554,  674.120605,  640.444885,
+    2636.754883,  2625.945068, 2594.041016, 2542.543945, 2473.735840,  2390.411621, 2295.779541, 2214.770264,
+    2128.389404,  2038.433105, 1946.466553, 1853.812378, 1761.557373,  1670.574341, 1581.545288, 1494.991333,
+    1416.746948,  1342.712280, 1271.348755, 1202.776001, 1137.061523,  1074.231323, 1014.277527, 959.981201,
+    910.948364,   864.090393,  819.361023,  776.704590,  736.060913,   697.364319,  662.218079,  629.460938,
+    2408.457275,  2399.401855, 2372.622803, 2329.235352, 2276.533936,  2220.788330, 2156.401855, 2085.079346,
+    2008.477295,  1928.134521, 1845.429077, 1761.557373, 1677.531860,  1594.187866, 1512.199829, 1435.803467,
+    1364.407104,  1295.054199, 1227.958252, 1163.268921, 1101.084717,  1041.461182, 984.471924,  935.451172,
+    888.446838,   843.439941,  800.399719,  759.286133,  720.051270,   682.831299,  649.558655,  617.765076,
+    2220.788330,  2214.770264, 2196.918701, 2167.820068, 2128.389404,  2079.791016, 2023.344727, 1960.440552,
+    1892.457642,  1820.706665, 1746.389160, 1670.574341, 1594.187866,  1518.013916, 1445.506348, 1377.724487,
+    1311.290039,  1246.504883, 1183.601440, 1122.752808, 1064.079834,  1007.662292, 956.720947,  909.950623,
+    865.006897,   821.887329,  780.575439,  741.044800,  703.259888,   668.410767,  636.225525,  605.431763,
+    2069.294189,  2064.085693, 2048.617188, 2023.344727, 1988.987183,  1946.466553, 1896.847168, 1841.264404,
+    1780.867554,  1716.770630, 1650.014648, 1581.545288, 1512.199829,  1445.506348, 1382.215332, 1319.541870,
+    1257.865967,  1197.494507, 1138.670532, 1081.580688, 1026.364746,  974.326904,  928.192505,  883.676147,
+    840.805603,   799.591431,  760.030457,  722.108276,  685.801392,   653.370850,  622.301086,  592.535889,
+    1928.134521,  1923.603760, 1910.134766, 1888.087524, 1858.031494,  1820.706665, 1776.973755, 1727.767578,
+    1674.046387,  1616.755371, 1556.793091, 1494.991333, 1435.803467,  1377.724487, 1319.541870, 1261.692749,
+    1204.545532,  1148.403564, 1093.514893, 1040.073730, 988.231995,   942.804993,  899.092651,  856.815063,
+    816.011780,   776.704590,  738.902405,  702.601074,  668.978333,   637.802612,  607.867737,  579.151306,
+    1796.604248,  1792.645508, 1780.867554, 1761.557373, 1735.171753,  1702.307983, 1663.669312, 1620.026245,
+    1572.181152,  1520.935669, 1467.777588, 1416.746948, 1364.407104,  1311.290039, 1257.865967, 1204.545532,
+    1151.679443,  1099.563843, 1048.443970, 998.518372,  953.479187,   910.948364,  869.613342,  829.542664,
+    790.784973,   753.372253,  717.322815,  682.831299,  651.732788,   621.794556,  593.005920,  565.351440,
+    1674.046265,  1670.574097, 1660.236206, 1643.263794, 1620.026123,  1591.010010, 1556.793091, 1518.013916,
+    1475.345703,  1433.395996, 1389.000244, 1342.712280, 1295.054199,  1246.504883, 1197.494507, 1148.403564,
+    1099.563843,  1051.258545, 1003.726868, 959.981201,  918.996521,   878.947388,  839.930481,  802.020447,
+    765.273621,   729.730225,  695.416138,  663.897949,  634.132019,   605.431763,  577.793396,  551.206848,
+    1559.848999,  1556.793091, 1547.688721, 1532.723755, 1512.199829,  1486.515869, 1457.802856, 1426.217773,
+    1391.275391,  1353.482666, 1313.344360, 1271.348755, 1227.958252,  1183.601440, 1138.670532, 1093.514893,
+    1048.443970,  1003.726868, 962.165222,  923.064758,  884.626892,   846.973267,  810.203491,  774.396851,
+    739.615356,   705.905090,  674.120605,  644.708618,  616.264648,   588.796265,  562.305176,  536.786438,
+    1455.328247,  1452.861328, 1445.505981, 1433.395996, 1416.746704,  1395.845215, 1371.036987, 1342.712280,
+    1311.290039,  1277.203857, 1240.890259, 1202.776001, 1163.268921,  1122.752808, 1081.580688, 1040.073730,
+    998.518372,   959.981201,  923.064758,  886.533447,  850.530762,   815.177795,  780.575439,  746.805908,
+    713.934814,   682.245117,  653.370850,  625.353882,  598.214355,   571.965027,  546.611938,  522.155396,
+    1364.407104,  1362.209839, 1355.655151, 1344.854126, 1329.985962,  1311.290039, 1289.056152, 1263.613770,
+    1235.319580,  1204.545532, 1171.668457, 1137.061523, 1101.084717,  1064.079834, 1026.364746, 988.231995,
+    953.479187,   918.996521,  884.626892,  850.530762,  816.846924,   783.694336,  751.173340,  719.367554,
+    688.345093,   659.988647,  632.568970,  605.917847,  580.059387,   555.010437,  530.781067,  507.375702,
+    1279.166016,  1277.203857, 1271.348633, 1261.692749, 1248.385864,  1231.629150, 1211.667358, 1188.779175,
+    1163.268921,  1135.456787, 1105.668823, 1074.231323, 1041.461182,  1007.662292, 974.326904,  942.804993,
+    910.948364,   878.947388,  846.973267,  815.177795,  783.694336,   752.638062,  722.108276,  692.188110,
+    664.459473,   637.802490,  611.796875,  586.477539,  561.871887,   537.999817,  514.875244,  492.505737,
+    1199.250488,  1197.494507, 1192.252319, 1183.601440, 1171.668457,  1156.622437, 1138.670532, 1118.050049,
+    1095.021729,  1069.861084, 1042.851685, 1014.277527, 984.471924,   956.720947,  928.192505,  899.092651,
+    869.613342,   839.930481,  810.203491,  780.575439,  751.173340,   722.108276,  693.476562,  666.712769,
+    640.975464,   615.765564,  591.129272,  567.103455,  543.718079,   520.995728,  498.952637,  480.805573,
+    1124.327759,  1122.752808, 1118.050049, 1110.284790, 1099.563843,  1086.030518, 1069.861084, 1051.258545,
+    1030.446289,  1007.662292, 983.335632,  959.981201,  935.451172,   909.950623,  883.676147,  856.815063,
+    829.542664,   802.020447,  774.396851,  746.805908,  719.367554,   692.188110,  666.712769,  642.038696,
+    617.765076,   593.947571,  570.632690,  547.859314,  525.658264,   504.054474,  484.734985,  470.036285,
+    1054.085815,  1052.670532, 1048.443970, 1041.461182, 1031.812866,  1019.620911, 1005.035828, 988.231995,
+    970.984558,   952.402710,  932.328857,  910.948364,  888.446838,   865.006897,  840.805603,  816.011780,
+    790.784973,   765.273621,  739.615356,  713.934814,  688.345093,   664.459473,  640.975464,  617.765076,
+    594.891724,   572.410034,  550.367126,  528.801819,  507.746918,   487.717651,  473.343079,  459.212067,
+    988.231934,   986.958130,  983.335632,  977.688965,  969.874817,   959.981201,  948.117004,  934.408508,
+    918.996521,   902.032532,  883.676147,  864.090393,  843.439941,   821.887329,  799.591431,  776.704590,
+    753.372253,   729.730225,  705.905090,  682.245117,  659.988647,   637.802490,  615.765564,  593.947571,
+    572.410034,   551.206848,  530.384338,  509.981781,  490.032288,   475.728912,  461.936005,  448.361359,
+    932.328857,   931.291748,  928.192505,  923.064758,  915.964600,   906.967834,  896.168518,  883.676331,
+    869.613525,   854.112671,  837.313965,  819.361023,  800.399719,   780.575439,  760.030457,  738.902405,
+    717.322815,   695.416138,  674.120605,  653.370850,  632.568970,   611.796875,  591.129272,  570.632690,
+    550.367126,   530.384338,  510.730133,  491.443481,  477.170258,   463.766785,  450.541077,  437.510101,
+    879.889832,   878.947388,  876.130005,  871.467163,  865.006897,   856.815063,  846.973267,  835.576843,
+    822.732300,   808.556152,  793.171143,  776.704590,  759.286133,   741.044800,  722.108276,  702.601074,
+    682.831299,   663.897949,  644.708618,  625.353882,  605.917847,   586.477539,  567.103455,  547.859314,
+    528.801819,   509.981781,  491.443481,  477.652222,  464.686829,   451.857361,  439.183533,  426.682556,
+    830.400330,   829.542664,  826.978088,  822.732300,  816.846924,   809.379150,  800.399719,  789.991882,
+    778.249512,   765.273621,  751.173340,  736.060913,  720.051270,   703.259888,  685.801392,  668.978333,
+    651.732788,   634.132019,  616.264648,  598.214355,  580.059387,   561.871887,  543.718079,  525.658264,
+    507.746918,   490.032288,  477.170258,  464.686829,  452.297577,   440.024200,  427.886261,  415.901093,
+    783.694336,   782.912842,  780.575439,  776.704590,  771.336731,   764.521179,  756.319885,  746.805908,
+    736.060913,   724.174500,  711.242554,  697.364319,  682.831299,   668.410767,  653.370850,  637.802612,
+    621.794556,   605.431763,  588.796265,  571.965027,  555.010437,   537.999817,  520.995728,  504.054474,
+    487.717651,   475.728912,  463.766785,  451.857361,  440.024200,   428.288727,  416.670166,  405.185883,
+    739.615356,   738.902405,  736.769653,  733.236694,  728.335144,   722.108276,  714.610474,  705.905090,
+    696.064697,   685.184509,  674.120605,  662.218079,  649.558655,   636.225525,  622.301086,  607.867737,
+    593.005920,   577.793396,  562.305176,  546.611938,  530.781067,   514.875244,  498.952637,  484.734985,
+    473.343079,   461.936005,  450.541077,  439.183533,  427.886261,   416.670166,  405.554260,  394.555481,
+    698.015564,   697.364319,  695.416138,  692.188110,  687.707825,   682.245117,  675.848572,  668.410767,
+    659.988525,   650.644348,  640.444885,  629.460938,  617.765076,   605.431763,  592.535889,  579.151306,
+    565.351440,   551.206848,  536.786438,  522.155396,  507.375702,   492.505737,  480.805573,  470.036285,
+    459.212067,   448.361359,  437.510101,  426.682556,  415.901093,   405.185883,  394.555481,  384.026642,
+    0.000000,     0.000000,    0.000000,    0.000000,    1554.123779,  1242.539551, 993.424500,  821.738708,
+    688.023743,   576.067200,  482.328461,  403.842957,  338.128937,   283.233490,  237.367096,  198.928207,
+    166.714081,   139.716614,  117.091141,  100.366226,  93.587563,    87.266724,   81.372780,   75.876930,
+    70.752258,    65.973686,   62.470375,   59.202759,   56.106068,    53.171352,   50.390141,   47.754402,
+    0.000000,     0.000000,    0.000000,    0.000000,    1511.898926,  1215.312500, 975.197021,  811.432129,
+    680.458130,   570.428589,  478.074890,  400.602692,  335.640289,   281.318329,  235.876022,  197.761490,
+    165.797119,   138.993164,  116.518372,  100.181839,  93.424210,    87.121628,   81.243637,   75.761719,
+    70.649292,    65.881516,   62.405884,   59.143909,   56.052280,    53.122139,   50.345058,   47.713055,
+    0.000000,     0.000000,    0.000000,    0.000000,    1398.316895,  1139.939331, 926.469055,  781.859680,
+    658.586914,   554.038086,  465.659058,  391.113556,  328.333618,   275.682922,  231.480927,  194.317368,
+    163.086807,   136.852478,  114.822044,  99.632172,   92.937019,    86.688774,   80.858185,   75.417809,
+    70.341858,    65.636070,   62.213120,   58.967945,   55.891445,    52.974934,   50.210182,   47.589340,
+    0.000000,     0.000000,    0.000000,    0.000000,    1242.539551,  1031.720703, 865.446106,  736.612244,
+    624.660889,   528.352295,  446.048340,  376.032318,  316.662598,   266.643341,  224.406723,  188.757797,
+    158.701111,   133.381165,  112.066162,  98.727715,   92.134651,    85.975319,   80.222427,   74.850182,
+    69.834183,    65.286209,   61.894089,   58.676640,   55.625088,    52.731091,   49.986694,   47.384289,
+    1554.123779,  1511.898926, 1398.316895, 1242.539551, 1072.704712,  913.631165,  791.500732,  680.458130,
+    581.796936,   495.450836,  420.656097,  356.335327,  301.346527,   254.681503,  214.999893,  181.334152,
+    152.824005,   128.714935,  108.351624,  97.485291,   91.031036,    84.992851,   79.346092,   74.067062,
+    69.133171,    64.801811,   61.452133,   58.272842,   55.255707,    52.392796,   49.676514,   47.099586,
+    1242.539551,  1215.312500, 1139.939331, 1031.720703, 913.631165,   811.432129,  711.604919,  618.224060,
+    533.334961,   457.657806,  391.113556,  333.178528,  283.233490,   240.389191,  203.691101,  172.362762,
+    145.689255,   123.027794,  103.808617,  95.927284,   89.644768,    83.756927,   78.242203,   73.079414,
+    68.248146,    64.188103,   60.891762,   57.760517,   54.786736,    51.963017,   49.282234,   46.737526,
+    993.424500,   975.197021,  926.469055,  865.446106,  791.500732,   711.604919,  631.209534,  554.038086,
+    482.328461,   417.209503,  359.053955,  307.756866,  263.145569,   224.406723,  190.955078,  162.196686,
+    137.561203,   116.518372,  100.551231,  94.080605,   87.998329,    82.286446,   76.926704,   71.900749,
+    67.190567,    63.451557,   60.218594,   57.144524,   54.222420,    51.445496,   48.807163,   46.300991,
+    821.738708,   811.432129,  781.859680,  736.612244,  680.458130,   618.224060,  554.038086,  491.015137,
+    431.260406,   376.032318,  325.949646,  281.318329,  241.920471,   207.367233,  177.270203,  151.198700,
+    128.714981,   109.396255,  98.191902,   91.975601,   86.117218,    80.602898,   75.417809,   70.546577,
+    65.973694,    62.599686,   59.439167,   56.430584,   53.567768,    50.844631,   48.255127,   45.793381,
+    688.023743,   680.458130,  658.586914,  624.660889,  581.796936,   533.334961,  482.328461,  431.260406,
+    381.958527,   335.640289,  293.096069,  254.681427,  220.306442,   189.852081,  163.086807,  139.716660,
+    119.419540,   101.869186,  95.586914,   89.644768,   84.028999,    78.729691,   73.735466,   69.033928,
+    64.870621,    61.640888,   58.560795,   55.625088,   52.828423,    50.165352,   47.630527,   45.218559,
+    576.067200,   570.428589,  554.038086,  528.352295,  495.450836,   457.657806,  417.209503,  376.032318,
+    335.640289,   297.179993,  261.421112,  228.614380,  198.928207,   172.362762,  148.803925,  128.066162,
+    109.923485,   98.727715,   92.775597,   87.121628,   81.762306,    76.691391,   71.900749,   67.380913,
+    63.717697,    60.584194,   57.591423,   54.735031,   52.010479,    49.413094,   46.938114,   44.580780,
+    482.328461,   478.074890,  465.659058,  446.048340,  420.656097,   391.113556,  359.053955,  325.949646,
+    293.096069,   261.421112,  231.480927,  203.691101,  178.274475,   155.306305,  134.755493,  116.518372,
+    101.297218,   95.417488,   89.797020,   84.439827,   79.346092,    74.512932,   69.935249,   65.636070,
+    62.470375,    59.439159,   56.539452,   53.767811,   51.120529,    48.593651,   46.183067,   43.884586,
+    403.842957,   400.602692,  391.113556,  376.032318,  356.335327,   333.178528,  307.756866,  281.318329,
+    254.681427,   228.614380,  203.691101,  180.306320,  158.701111,   138.993195,  121.205971,  105.294853,
+    97.485306,    91.975601,   86.688774,   81.632111,   76.808891,    72.219170,   67.860542,   64.188103,
+    61.139740,    58.215542,   55.413525,   52.731091,   50.165352,    47.713055,   45.370728,   43.134796,
+    338.128937,   335.640289,  328.333618,  316.662598,  301.346527,   283.233490,  263.145569,  241.920471,
+    220.306442,   198.928207,  178.274475,  158.701111,  140.445145,   123.643127,  108.351624,  98.907478,
+    93.587563,    88.442261,   83.486351,   78.729691,   74.178123,    69.834183,   65.706459,   62.664513,
+    59.736908,    56.923252,   54.222420,   51.632717,   49.151936,    46.777527,   44.506676,   42.336353,
+    283.233490,   281.318329,  275.682922,  266.643341,  254.681503,   240.389191,  224.406723,  207.367233,
+    189.852081,   172.362762,  155.306305,  138.993195,  123.643127,   109.396255,  99.632195,   94.578140,
+    89.644768,    84.854034,   80.222427,   75.761719,   71.479691,    67.380913,   63.985767,   61.077591,
+    58.272842,    55.572067,   52.974934,   50.480511,   48.087288,    45.793381,   43.596565,   41.494331,
+    237.367096,   235.876022,  231.480927,  224.406723,  214.999893,   203.691101,  190.955078,  177.270203,
+    163.086807,   148.803925,  134.755493,  121.205971,  108.351624,   99.632195,   94.912354,   90.256424,
+    85.692696,    81.243637,   76.926704,   72.754906,   68.737579,    65.077835,   62.213120,   59.439159,
+    56.758232,    54.171604,   51.679691,   49.282234,   46.978401,    44.766880,   42.646019,   40.613834,
+    198.928207,   197.761490,  194.317368,  188.757797,  181.334152,   172.362762,  162.196686,  151.198700,
+    139.716660,   128.066162,  116.518372,  105.294853,  98.907478,    94.578140,   90.256424,   85.975319,
+    81.762306,    77.639725,   73.625481,   69.733337,   65.973694,    63.121567,   60.400925,   57.760517,
+    55.203278,    52.731091,   50.345058,   48.045479,   45.832119,    43.704224,   41.660648,   39.699886,
+    166.714081,   165.797119,  163.086807,  158.701111,  152.824005,   145.689255,  137.561203,  128.714981,
+    119.419540,   109.923485,  101.297218,  97.485306,   93.587563,    89.644768,   85.692696,   81.762306,
+    77.879822,    74.067062,   70.341858,   66.718384,   63.784531,    61.139740,   58.560787,   56.052280,
+    53.617672,    51.259396,   48.979061,   46.777527,   44.655071,    42.611439,   40.645927,   38.757504,
+    139.716614,   138.993164,  136.852478,  133.381165,  128.714935,   123.027794,  116.518372,  109.396255,
+    101.869186,   98.727715,   95.417488,   91.975601,   88.442261,    84.854034,   81.243637,   77.639725,
+    74.067062,    70.546577,   67.095711,   64.188103,   61.640888,    59.143909,   56.703403,   54.324310,
+    52.010479,    49.764812,   47.589340,   45.485382,   43.453663,    41.494331,   39.607159,   37.791485,
+    117.091141,   116.518372,  114.822044,  112.066162,  108.351624,   103.808617,  100.551231,  98.191902,
+    95.586914,    92.775597,   89.797020,   86.688774,   83.486351,    80.222427,   76.926704,   73.625481,
+    70.341858,    67.095711,   64.323616,   61.894089,   59.498505,    57.144524,   54.838520,   52.585674,
+    50.390141,    48.255127,   46.183067,   44.175625,   42.233917,    40.358486,   38.549461,   36.806534,
+    100.366226,   100.181839,  99.632172,   98.727715,   97.485291,    95.927284,   94.080605,   91.975601,
+    89.644768,    87.121628,   84.439827,   81.632111,   78.729691,    75.761719,   72.754906,   69.733337,
+    66.718384,    64.188103,   61.894089,   59.617504,   57.367245,    55.150913,   52.974934,   50.844631,
+    48.764336,    46.737526,   44.766880,   42.854427,   41.001553,    39.209156,   37.477650,   35.807087,
+    93.587563,    93.424210,   92.937019,   92.134651,   91.031036,    89.644768,   87.998329,   86.117218,
+    84.028999,    81.762306,   79.346092,   76.808891,   74.178123,    71.479691,   68.737579,   65.973694,
+    63.784531,    61.640888,   59.498505,   57.367245,   55.255707,    53.171352,   51.120529,   49.108620,
+    47.140091,    45.218567,   43.346962,   41.527519,   39.761902,    38.051262,   36.396320,   34.797382,
+    87.266724,    87.121628,   86.688774,   85.975319,   84.992851,    83.756927,   82.286446,   80.602898,
+    78.729691,    76.691391,   74.512932,   72.219170,   69.834183,    67.380913,   65.077835,   63.121567,
+    61.139740,    59.143909,   57.144524,   55.150913,   53.171352,    51.213028,   49.282234,   47.384289,
+    45.523705,    43.704216,   41.928902,   40.200157,   38.519875,    36.889412,   35.309746,   33.781395,
+    81.372780,    81.243637,   80.858185,   80.222427,   79.346092,    78.242203,   76.926704,   75.417809,
+    73.735466,    71.900749,   69.935249,   67.860542,   65.706459,    63.985767,   62.213120,   60.400925,
+    58.560787,    56.703403,   54.838520,   52.974934,   51.120529,    49.282234,   47.466137,   45.677494,
+    43.920807,    42.199848,   40.517788,   38.877151,   37.279995,    35.727867,   34.221893,   32.932083,
+    75.876930,    75.761719,   75.417809,   74.850182,   74.067062,    73.079414,   71.900749,   70.546577,
+    69.033928,    67.380913,   65.636070,   64.188103,   62.664513,    61.077591,   59.439159,   57.760517,
+    56.052280,    54.324310,   52.585674,   50.844631,   49.108620,    47.384289,   45.677494,   43.993374,
+    42.336353,    40.710224,   39.118168,   37.562847,   36.046383,    34.570469,   33.224525,   32.131664,
+    70.752258,    70.649292,   70.341858,   69.834183,   69.133171,    68.248146,   67.190567,   65.973694,
+    64.870621,    63.717697,   62.470375,   61.139740,   59.736908,    58.272842,   56.758232,   55.203278,
+    53.617672,    52.010479,   50.390141,   48.764336,   47.140091,    45.523705,   43.920807,   42.336353,
+    40.774689,    39.239544,   37.734135,   36.261116,   34.822742,    33.446636,   32.377274,   31.328754,
+    65.973686,    65.881516,   65.636070,   65.286209,   64.801811,    64.188103,   63.451557,   62.599686,
+    61.640888,    60.584194,   59.439159,   58.215542,   56.923252,    55.572067,   54.171604,   52.731091,
+    51.259396,    49.764812,   48.255127,   46.737526,   45.218567,    43.704216,   42.199848,   40.710224,
+    39.239544,    37.791485,   36.369217,   34.975430,   33.612385,    32.554573,   31.530655,   30.525511,
+    62.470375,    62.405884,   62.213120,   61.894089,   61.452133,    60.891762,   60.218594,   59.439167,
+    58.560795,    57.591423,   56.539452,   55.413525,   54.222420,    52.974934,   51.679691,   50.345058,
+    48.979061,    47.589340,   46.183067,   44.766880,   43.346962,    41.928902,   40.517788,   39.118168,
+    37.734135,    36.369217,   35.026558,   33.708813,   32.661720,    31.666414,   30.686733,   29.723904,
+    59.202759,    59.143909,   58.967945,   58.676640,   58.272842,    57.760517,   57.144524,   56.430584,
+    55.625088,    54.735031,   53.767811,   52.731091,   51.632717,    50.480511,   49.282234,   48.045479,
+    46.777527,    45.485382,   44.175625,   42.854427,   41.527519,    40.200157,   38.877151,   37.562847,
+    36.261116,    34.975430,   33.708813,   32.697552,   31.734655,    30.784130,   29.847412,   28.925755,
+    56.106068,    56.052280,   55.891445,   55.625088,   55.255707,    54.786736,   54.222420,   53.567768,
+    52.828423,    52.010479,   51.120529,   50.165352,   49.151936,    48.087288,   46.978401,   45.832119,
+    44.655071,    43.453663,   42.233917,   41.001553,   39.761902,    38.519875,   37.279995,   36.046383,
+    34.822742,    33.612385,   32.661720,   31.734655,   30.816704,    29.909475,   29.014397,   28.132734,
+    53.171352,    53.122139,   52.974934,   52.731091,   52.392796,    51.963017,   51.445496,   50.844631,
+    50.165352,    49.413094,   48.593651,   47.713055,   46.777527,    45.793381,   44.766880,   43.704224,
+    42.611439,    41.494331,   40.358486,   39.209156,   38.051262,    36.889412,   35.727867,   34.570469,
+    33.446636,    32.554573,   31.666414,   30.784130,   29.909475,    29.044043,   28.189245,   27.346340,
+    50.390141,    50.345058,   50.210182,   49.986694,   49.676514,    49.282234,   48.807163,   48.255127,
+    47.630527,    46.938114,   46.183067,   45.370728,   44.506676,    43.596565,   42.646019,   41.660648,
+    40.645927,    39.607159,   38.549461,   37.477650,   36.396320,    35.309746,   34.221893,   33.224525,
+    32.377274,    31.530655,   30.686733,   29.847412,   29.014397,    28.189245,   27.373348,   26.567940,
+    47.754402,    47.713055,   47.589340,   47.384289,   47.099586,    46.737526,   46.300991,   45.793381,
+    45.218559,    44.580780,   43.884586,   43.134796,   42.336353,    41.494331,   40.613834,   39.699886,
+    38.757504,    37.791485,   36.806534,   35.807087,   34.797382,    33.781395,   32.932083,   32.131664,
+    31.328754,    30.525511,   29.723904,   28.925755,   28.132734,    27.346340,   26.567940,   25.798756};
+
+class lossy_acc : public VPP_ACC<lossy_acc, 1> {
+    // port bindings
+    ZERO_COPY(config);            // mm14, input
+    ZERO_COPY(config_fl);         // mm15, input
+    ZERO_COPY(hls_opsin_1);       // mm0, input
+    ZERO_COPY(hls_opsin_2);       // mm1, input
+    ZERO_COPY(hls_opsin_3);       // mm2, input
+    ZERO_COPY(quant_field_row);   // mm3, input
+    ZERO_COPY(masking_field_row); // mm4, input
+    ZERO_COPY(aq_map_f);          // mm5, input
+    ZERO_COPY(cmap_axi);          // mm6, output
+    ZERO_COPY(ac_coef_axiout);    // mm7, output
+    ZERO_COPY(strategy_all);      // mm8, output
+    ZERO_COPY(raw_quant_field_i); // mm9, output
+    ZERO_COPY(hls_order);         // mm10, output
+    ZERO_COPY(hls_dc8x8);         // mm11, output
+    ZERO_COPY(hls_dc16x16);       // mm12, output
+    ZERO_COPY(hls_dc32x32);       // mm13, output
+
+    SYS_PORT(config, HBM[14]);           // HBM-14
+    SYS_PORT(config_fl, HBM[15]);        // HBM-15
+    SYS_PORT(hls_opsin_1, HBM[0]);       // HBM-0
+    SYS_PORT(hls_opsin_2, HBM[1]);       // HBM-1
+    SYS_PORT(hls_opsin_3, HBM[2]);       // HBM-2
+    SYS_PORT(quant_field_row, HBM[3]);   // HBM-3
+    SYS_PORT(masking_field_row, HBM[4]); // HBM-4
+    SYS_PORT(aq_map_f, HBM[5]);          // HBM-5
+    SYS_PORT(cmap_axi, HBM[6]);          // HBM-6
+    SYS_PORT(ac_coef_axiout, HBM[7]);    // HBM-7
+    SYS_PORT(strategy_all, HBM[8]);      // HBM-8
+    SYS_PORT(raw_quant_field_i, HBM[9]); // HBM-9
+    SYS_PORT(hls_order, HBM[10]);        // HBM-10
+    SYS_PORT(hls_dc8x8, HBM[11]);        // HBM-11
+    SYS_PORT(hls_dc16x16, HBM[12]);      // HBM-12
+    SYS_PORT(hls_dc32x32, HBM[13]);      // HBM-13
+
+   public:
+    static void compute(int* config,
+                        float* config_fl,
+                        float* hls_opsin_1,
+                        float* hls_opsin_2,
+                        float* hls_opsin_3,
+                        float* quant_field_row,
+                        float* masking_field_row,
+                        float* aq_map_f,
+                        int8_t* cmap_axi,
+                        int* ac_coef_axiout,
+                        unsigned char* strategy_all,
+                        int* raw_quant_field_i,
+                        uint32_t* hls_order,
+                        float* hls_dc8x8,
+                        float* hls_dc16x16,
+                        float* hls_dc32x32);
+
+    // ------------------------------------------------------------
+    /**
+     * @brief Level 2 : kernel implement for JXL lossy frame encode computing
+     *
+     * @param config the int config signal, such as image size, field stride and etc.
+     * @param config_fl the floating config signal, such as cost, inv_global_scale and etc.
+     * @param hls_opsin_1 the input RGB image data for channnel-1.
+     * @param hls_opsin_2 the input RGB image data for channnel-2.
+     * @param hls_opsin_3 the input RGB image data for channnel-3.
+     * @param quant_field_row the initial quant_filed data.
+     * @param masking_filed_row the initial masking_filed data.
+     * @param aq_map_f the initial adjust quant map data.
+     * @param cmap_axi the output of color correlation map.
+     * @param ac_coef_axiout the output of quanted AC coefficients.
+     * @param strategy_all the output of strategy for each block in image
+     * @param raw_quant_field_i the output of computed raw_quant_field
+     * @param hls_order the output of orders for each block in image
+     * @param hls_dc8x8 the DC coefficients output for 8x8 blocks
+     * @param hls_dc16x16 the DC coefficients output for 16x16 blocks
+     * @param hls_dc32x32 the DC coefficients output for 32x32 blocks
+     */
+    // ------------------------------------------------------------
+
+    static void lossyEncComp(int config[MAX_NUM_CONFIG],
+                             float config_fl[MAX_NUM_CONFIG],
+                             float* hls_opsin_1,
+                             float* hls_opsin_2,
+                             float* hls_opsin_3,
+                             float* quant_field_row,
+                             float* masking_field_row,
+                             float* aq_map_f,
+                             int8_t* cmap_axi,
+                             int* ac_coef_axiout,
+                             unsigned char* strategy_all,
+                             int* raw_quant_field_i,
+                             uint32_t* hls_order,
+                             float* hls_dc8x8,
+                             float* hls_dc16x16,
+                             float* hls_dc32x32);
+};
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/postSysLink.tcl b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/postSysLink.tcl
new file mode 100644
index 0000000000..2dc2f67034
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/postSysLink.tcl
@@ -0,0 +1 @@
+set_property -dict [list CONFIG.ECC_EN {false} CONFIG.ECC_SCRUB_EN {false}] [get_bd_cells hmss_0]
diff --git a/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/utils.mk b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/utils.mk
new file mode 100644
index 0000000000..1937b53d2b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_lossy_enc_compute_sc/utils.mk
@@ -0,0 +1,239 @@
+#
+# Copyright 2019-2021 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# sc makefile-generator v1.0.0
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
+ifeq ($(HOST_ARCH), x86)
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#check binutils
+BINUTILS := $(shell ld -v | cut -f 4 -d " " | cut -f 1 -d "-")
+BINUTILS_REQ := $(BINUTILS_INTOOL)
+ifneq ($(shell expr $(BINUTILS) \>= $(BINUTILS_REQ)), 1)
+export PATH := $(XILINX_VIVADO)/tps/lnx64/binutils-$(BINUTILS_INTOOL)/bin:$(PATH)
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/demos/jxlEnc/acc_tokInit_histogram/Makefile b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/Makefile
new file mode 100644
index 0000000000..3dfa176276
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/Makefile
@@ -0,0 +1,331 @@
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to build xclbin application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
+	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_u50_gen3x16_xdma_5_202210_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u50
+PLATFORM_BLOCKLIST +=  zc
+
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# get global setting
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += -I$(CUR_DIR)/src/ -fmessage-length=0 --sysroot=$(SYSROOT)  -I$(SYSROOT)/usr/include/xrt -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(SYSROOT)/usr/lib -L$(XILINX_VITIS_AIETOOLS)/lib/aarch64.o -Wl,--as-needed -lxilinxopencl -lxrt_coreutil 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+########################## Setting up Host Variables ##########################
+ifeq ($(TARGET),sw_emu)
+CXXFLAGS += -D SW_EMU_TEST
+endif
+ifeq ($(TARGET),hw_emu)
+CXXFLAGS += -D HW_EMU_TEST
+endif
+
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
+#Inclue Required Host Source Files
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cjxl.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cmdline.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/codec_config.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/tools/box/box.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/time.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_cluster_histogram.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_ac_strategy.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_adaptive_quantization.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_cache.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_chroma_from_luma.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_cluster.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/acc_enc_group.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_host.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase1.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase2.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase3.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L2/demos/jxlEnc/acc_tokInit_histogram/host/host_tokinit_histogram.cpp 
+CXXFLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/ -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/lib/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/build/lib/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lcms/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/highway -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include -I $(XFLIB_DIR)/L2/demos/jxlEnc/third_partys/third_party/lodepng -I $(XFLIB_DIR)/L2/demos/jxlEnc/acc_tokInit_histogram/kernel -I $(XFLIB_DIR)/L2/demos/jxlEnc/acc_tokInit_histogram/host -I $(XFLIB_DIR)/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram -I $(XFLIB_DIR)/L2/demos/jxlEnc/others/include
+CXXFLAGS += -O3 
+
+EXE_NAME := host.exe
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
+
+HOST_ARGS :=  --xclbin $(BUILD_DIR)/jxlEnc.xclbin $(XFLIB_DIR)/L2/demos/jxlEnc/images/t0.png t0.jxl
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u50.cfg
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/jxlEnc
+
+else 
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/jxlEnc
+
+endif
+
+######################### binary container global settings ##########################
+VPP_FLAGS_JxlEnc_ans_initHistogram +=  -D KERNEL_NAME=JxlEnc_ans_initHistogram
+VPP_FLAGS_JxlEnc_ans_initHistogram += --hls.clock 300000000:JxlEnc_ans_initHistogram
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_jxlEnc += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_jxlEnc += --kernel_frequency 300
+endif
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS += $(BUILD_DIR)/jxlEnc.xclbin
+else
+BINARY_CONTAINERS += $(BUILD_DIR)/jxlEnc_pkg.$(LINK_TARGET_FMT)
+BINARY_CONTAINERS_PKG += $(BUILD_DIR)/jxlEnc.xclbin
+endif
+
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+$(TEMP_DIR)/JxlEnc_ans_initHistogram.xo: $(XFLIB_DIR)/L2/demos/jxlEnc/acc_tokInit_histogram/kernel/hls_init_histogram.cpp 
+	$(ECHO) "Compiling Kernel: JxlEnc_ans_initHistogram"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_JxlEnc_ans_initHistogram) $(VPP_FLAGS) -k JxlEnc_ans_initHistogram -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
+BINARY_CONTAINER_jxlEnc_OBJS += $(TEMP_DIR)/JxlEnc_ans_initHistogram.xo
+BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_jxlEnc_OBJS)
+$(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
+	mkdir -p $(BUILD_DIR)
+	$(VPP) -l $(VPP_FLAGS) --temp_dir $(TEMP_DIR) --report_dir $(BUILD_REPORT_DIR)/jxlEnc $(VPP_LDFLAGS)  $(VPP_LDFLAGS_jxlEnc) $(AIE_LDFLAGS)   -o $@ $^
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_xrt
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+else
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_sysroot
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+############################## Preparing sdcard folder ##############################
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE := $(SYSROOT)/../../uImage
+else
+K_IMAGE := $(SYSROOT)/../../Image
+endif
+RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
+$(RUN_SCRIPT):
+	rm -rf $(RUN_SCRIPT)
+	@echo 'export LD_LIBRARY_PATH=/mnt:/tmp:$(LIBRARY_PATH)' >> $(RUN_SCRIPT)
+ifneq ($(filter sw_emu hw_emu, $(TARGET)),)
+	@echo 'export XCL_EMULATION_MODE=$(TARGET)' >> $(RUN_SCRIPT)
+endif
+	@echo 'export XILINX_VITIS=/mnt' >> $(RUN_SCRIPT)
+	@echo 'export XILINX_XRT=/usr' >> $(RUN_SCRIPT)
+	@echo 'if [ -f platform_desc.txt  ]; then' >> $(RUN_SCRIPT)
+	@echo '        cp platform_desc.txt /etc/xocl.txt' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo './$(EXE_NAME) $(PKG_HOST_ARGS)' >> $(RUN_SCRIPT)
+	@echo 'return_code=$$?' >> $(RUN_SCRIPT)
+	@echo 'if [ $$return_code -ne 0 ]; then' >> $(RUN_SCRIPT)
+	@echo '        echo "ERROR: Embedded host run failed, RC=$$return_code"' >> $(RUN_SCRIPT)
+	@echo 'else' >> $(RUN_SCRIPT)
+	@echo '        echo "INFO: TEST PASSED, RC=0"' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo 'echo "INFO: Embedded host run completed."' >> $(RUN_SCRIPT)
+	@echo 'exit $$return_code' >> $(RUN_SCRIPT)
+DATA_FILE := 
+DATA_DIR := 
+SD_FILES += $(RUN_SCRIPT)
+SD_FILES += $(EXE_FILE)
+SD_FILES += $(EMCONFIG)
+SD_FILES += xrt.ini
+SD_FILES += $(DATA_FILE)# where define DATAFILE in json
+SD_FILES_WITH_PREFIX = $(foreach sd_file,$(SD_FILES), $(if $(filter $(sd_file),$(wildcard $(sd_file))), --package.sd_file $(sd_file)))
+SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
+PACKAGE_FILES := $(BINARY_CONTAINERS)
+PACKAGE_FILES += $(AIE_CONTAINER)
+SD_CARD := $(CUR_DIR)/package_$(TARGET)
+vck190_dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+	@echo "Generating sd_card folder...."
+	mkdir -p $(SD_CARD)
+	chmod a+rx $(BUILD_DIR)/run_script.sh
+ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+vck190_dfx_hw := true
+endif
+endif
+ifeq ($(vck190_dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
+
+.PHONY: sd_card
+sd_card: $(SD_CARD)
+endif
+############################## Setting Essential Checks and Building Rules ##############################
+RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
+RUN_DEPS += $(SD_CARD)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	./check.sh
+endif
+endif
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS) 
+	./check.sh
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	./check.sh
+endif
+endif
+#hw
+ifeq ($(TARGET), hw)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	./check.sh
+else
+	$(ECHO) "Please copy the content of sd_card folder and data to an SD Card and run on the board"
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: clean cleanall emconfig
+emconfig: $(EMCONFIG)
+
+.PHONY: host
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+
+.PHONY: xclbin
+ifeq ($(HOST_ARCH), x86)
+xclbin:  check_vpp check_xrt $(BINARY_CONTAINERS) 
+else
+xclbin:  check_vpp check_sysroot $(BINARY_CONTAINERS) 
+endif
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
+
+clean: cleanh
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/acc_tokInit_histogram/check.sh b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/check.sh
new file mode 100755
index 0000000000..780685e603
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/check.sh
@@ -0,0 +1 @@
+echo "7d5224e22f239d3b9322d507b8ca3fb9 t0.jxl" | md5sum -c -
diff --git a/codec/L2/demos/jxlEnc/acc_tokInit_histogram/conn_u50.cfg b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/conn_u50.cfg
new file mode 100644
index 0000000000..c1d1936089
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/conn_u50.cfg
@@ -0,0 +1,41 @@
+[hls]
+#pre_tcl=JxlEnc_ans_pre.tcl
+
+[connectivity]
+sp=JxlEnc_ans_initHistogram_1.ac_coeff_ordered_ddr:HBM[2] 
+sp=JxlEnc_ans_initHistogram_1.strategy_ddr:HBM[3] 
+sp=JxlEnc_ans_initHistogram_1.qf_ddr:HBM[4] 
+sp=JxlEnc_ans_initHistogram_1.qdc_ddr:HBM[5]
+sp=JxlEnc_ans_initHistogram_1.ctx_map:HBM[6] 
+sp=JxlEnc_ans_initHistogram_1.qf_thresholds:HBM[6]
+sp=JxlEnc_ans_initHistogram_1.config:HBM[7] 
+sp=JxlEnc_ans_initHistogram_1.ac_tokens_ddr:HBM[8] 
+sp=JxlEnc_ans_initHistogram_1.tokens0_ptr:HBM[9]  
+sp=JxlEnc_ans_initHistogram_1.tokens1_ptr:HBM[10] 
+sp=JxlEnc_ans_initHistogram_1.tokens2_ptr:HBM[11] 
+sp=JxlEnc_ans_initHistogram_1.tokens3_ptr:HBM[12] 
+# nonempty
+sp=JxlEnc_ans_initHistogram_1.nonempty0_ptr:HBM[9]
+sp=JxlEnc_ans_initHistogram_1.nonempty1_ptr:HBM[9]
+sp=JxlEnc_ans_initHistogram_1.nonempty2_ptr:HBM[9]
+sp=JxlEnc_ans_initHistogram_1.nonempty3_ptr:HBM[9]
+sp=JxlEnc_ans_initHistogram_1.nonempty4_ptr:HBM[9]
+# histograms_ptr
+sp=JxlEnc_ans_initHistogram_1.histograms0_ptr:HBM[10]
+sp=JxlEnc_ans_initHistogram_1.histograms1_ptr:HBM[10]
+sp=JxlEnc_ans_initHistogram_1.histograms2_ptr:HBM[10]
+sp=JxlEnc_ans_initHistogram_1.histograms3_ptr:HBM[10]
+sp=JxlEnc_ans_initHistogram_1.histograms4_ptr:HBM[10]
+# histograms_size
+sp=JxlEnc_ans_initHistogram_1.histograms_size0_ptr:HBM[11]
+sp=JxlEnc_ans_initHistogram_1.histograms_size1_ptr:HBM[11]
+sp=JxlEnc_ans_initHistogram_1.histograms_size2_ptr:HBM[11]
+sp=JxlEnc_ans_initHistogram_1.histograms_size3_ptr:HBM[11]
+sp=JxlEnc_ans_initHistogram_1.histograms_size4_ptr:HBM[11]
+# total_count
+sp=JxlEnc_ans_initHistogram_1.total_count0_ptr:HBM[12]
+sp=JxlEnc_ans_initHistogram_1.total_count1_ptr:HBM[12]
+sp=JxlEnc_ans_initHistogram_1.total_count2_ptr:HBM[12]
+sp=JxlEnc_ans_initHistogram_1.total_count3_ptr:HBM[12]
+sp=JxlEnc_ans_initHistogram_1.total_count4_ptr:HBM[12]
+
diff --git a/codec/L2/demos/jxlEnc/acc_tokInit_histogram/description.json b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/description.json
new file mode 100644
index 0000000000..34d11bd3a8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/description.json
@@ -0,0 +1,329 @@
+{
+    "gui": false,
+    "name": "JXL ACC_TOKINIT Demo",
+    "description": "This example is based on Google's PIK, which was chosen as the base framework for JPEG XL. The pikEnc is based on the 'fast mode' of PIK which can provide better encoding efficnty than most of other still image encoding methods. The pikEnc is based on Xilinx HLS design methodology and optimized for FPGA arthitecture. It can proved higher throughput and lower latency compared to software-based solutions",
+    "flow": "vitis",
+    "platform_allowlist": [
+        "u50"
+    ],
+    "platform_blocklist": [
+        "zc"
+    ],
+    "platform_properties": {
+        "u50": {
+            "v++": {
+                "compiler": {
+                    "clflags": [
+                        "--config PROJECT/conn_u50.cfg"
+                    ]
+                }
+            }
+        }
+    },
+    "data": [
+        "./data"
+    ],
+    "launch": [
+        {
+            "cmd_args": " --xclbin BUILD/jxlEnc.xclbin LIB_DIR/L2/demos/jxlEnc/images/t0.png t0.jxl",
+            "name": "generic launch for all flows"
+        }
+    ],
+    "post_launch": [
+        {
+            "launch_cmd": [
+                "./check.sh"
+            ]
+        }
+    ],
+    "host": {
+        "host_exe": "host.exe",
+        "compiler": {
+            "sources": [
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cjxl.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cmdline.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/codec_config.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/tools/box/box.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/time.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_cluster_histogram.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_ac_strategy.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_adaptive_quantization.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_cache.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_chroma_from_luma.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_cluster.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/acc_enc_group.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_host.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase1.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase2.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase3.cpp",
+                "LIB_DIR/ext/xcl2/xcl2.cpp",
+                "LIB_DIR/L2/demos/jxlEnc/acc_tokInit_histogram/host/host_tokinit_histogram.cpp"
+            ],
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/ext/xcl2",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/lib/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/build/lib/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lcms/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/highway",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include",
+                "LIB_DIR/L2/demos/jxlEnc/third_partys/third_party/lodepng",
+                "LIB_DIR/L2/demos/jxlEnc/acc_tokInit_histogram/kernel",
+                "LIB_DIR/L2/demos/jxlEnc/acc_tokInit_histogram/host",
+                "LIB_DIR/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram",
+                "LIB_DIR/L2/demos/jxlEnc/others/include"
+            ],
+            "options": "-O3 "
+        }
+    },
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/L2/include/hw/jxlEnc"
+            ]
+        }
+    },
+    "containers": [
+        {
+            "name": "jxlEnc",
+            "accelerators": [
+                {
+                    "location": "LIB_DIR/L2/demos/jxlEnc/acc_tokInit_histogram/kernel/hls_init_histogram.cpp",
+                    "frequency": 300.0,
+                    "clflags": " -D KERNEL_NAME=JxlEnc_ans_initHistogram",
+                    "name": "JxlEnc_ans_initHistogram",
+                    "num_compute_units": 1,
+                    "compute_units": [
+                        {
+                            "name": "JxlEnc_ans_initHistogram",
+                            "arguments": [
+                                {
+                                    "name": "gmem0_0",
+                                    "memory": "DDR[0]"
+                                },
+                                {
+                                    "name": "gmem0_1",
+                                    "memory": "DDR[0]"
+                                },
+                                {
+                                    "name": "gmem1_0",
+                                    "memory": "DDR[1]"
+                                },
+                                {
+                                    "name": "gmem1_1",
+                                    "memory": "DDR[1]"
+                                },
+                                {
+                                    "name": "gmem1_2",
+                                    "memory": "DDR[1]"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "frequency": 300
+        }
+    ],
+    "testinfo": {
+        "disable": false,
+        "jobs": [
+            {
+                "index": 0,
+                "dependency": [],
+                "env": "",
+                "cmd": "",
+                "max_memory_MB": {
+                    "vitis_hw_build": 81920,
+                    "vitis_hw_emu": 40960,
+                    "vitis_sw_emu": 10240,
+                    "vitis_hw_run": 10240
+                },
+                "max_time_min": {
+                    "vitis_hw_build": 3200,
+                    "vitis_hw_emu": 1600,
+                    "vitis_sw_emu": 120,
+                    "vitis_hw_run": 10
+                }
+            }
+        ],
+        "targets": [
+            "vitis_sw_emu",
+            "vitis_hw_emu",
+            "vitis_hw"
+        ],
+        "category": "canary"
+    }
+}
diff --git a/codec/L2/demos/jxlEnc/acc_tokInit_histogram/host/host_tokinit_histogram.cpp b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/host/host_tokinit_histogram.cpp
new file mode 100644
index 0000000000..f9072acc8f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/host/host_tokinit_histogram.cpp
@@ -0,0 +1,638 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_TOKINIT_HISTOGRAM_CPP
+#define HOST_TOKINIT_HISTOGRAM_CPP
+
+#include <iostream>
+#include <sys/time.h>
+#include "ap_int.h"
+
+#ifndef HLS_TEST
+#include "xf_utils_sw/logger.hpp"
+#include "xcl2.hpp"
+const int PIXEL_W = 2048;
+const int PIXEL_H = 2048;
+const int FRAME_DIM = 3;
+const int ALL_PIXEL = PIXEL_W * PIXEL_H * FRAME_DIM;
+const int MAX_NUM_BLK88_W = PIXEL_W / 8;
+const int MAX_NUM_BLK88_H = PIXEL_H / 8;
+const int MAX_NUM_BLK88 = MAX_NUM_BLK88_W * MAX_NUM_BLK88_H;
+const int MAX_ORDERS_SIZE = (3 * 64 + 3 * 64 + 3 * 256 + 3 * 1024);
+const int MAX_QF_THRESH_SIZE = 256;
+const int MAX_CTX_MAP_SIZE = 256;
+const int MAX_AC_TOKEN_SIZE = ALL_PIXEL;
+#else
+#include "hls_init_histogram.hpp"
+#endif
+
+#define MAX_NUM_CONFIG 32
+
+unsigned long diff(const struct timeval* newTime, const struct timeval* oldTime) {
+    return (newTime->tv_sec - oldTime->tv_sec) * 1000000 + (newTime->tv_usec - oldTime->tv_usec);
+}
+
+template <typename T>
+T* aligned_alloc(std::size_t num) {
+    void* ptr = NULL;
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+    return reinterpret_cast<T*>(ptr);
+}
+
+void hls_ANSinitHistogram_wrapper(std::string xclbinPath,
+                                  int config[32],
+                                  //====================
+                                  int32_t* ac_coeff_ordered_ddr,
+                                  int32_t* strategy_ddr,
+                                  int32_t* qf_ddr,
+                                  uint8_t* qdc_ddr,
+                                  uint8_t* ctx_map,
+                                  uint32_t* qf_thresholds,
+                                  uint64_t* ac_tokens_ddr,
+                                  //====================
+                                  uint64_t* tokens0_ptr,
+                                  uint64_t* tokens1_ptr,
+                                  uint64_t* tokens2_ptr,
+                                  uint64_t* tokens3_ptr,
+                                  //====================
+                                  int32_t* histograms0_ptr,
+                                  uint32_t* histograms_size0_ptr,
+                                  uint32_t* total_count0_ptr,
+                                  uint32_t* nonempty0_ptr,
+                                  //======================
+                                  int32_t* histograms1_ptr,
+                                  uint32_t* histograms_size1_ptr,
+                                  uint32_t* total_count1_ptr,
+                                  uint32_t* nonempty1_ptr,
+                                  //======================
+                                  int32_t* histograms2_ptr,
+                                  uint32_t* histograms_size2_ptr,
+                                  uint32_t* total_count2_ptr,
+                                  uint32_t* nonempty2_ptr,
+                                  //======================
+                                  int32_t* histograms3_ptr,
+                                  uint32_t* histograms_size3_ptr,
+                                  uint32_t* total_count3_ptr,
+                                  uint32_t* nonempty3_ptr,
+                                  //======================
+                                  int32_t* histograms4_ptr,
+                                  uint32_t* histograms_size4_ptr,
+                                  uint32_t* total_count4_ptr,
+                                  uint32_t* nonempty4_ptr) {
+#ifndef HLS_TEST
+
+    xf::common::utils_sw::Logger logger(std::cout, std::cerr);
+    cl_int fail;
+
+    struct timeval start_time; // End to end time clock start
+    gettimeofday(&start_time, 0);
+
+    // platform related operations
+    std::vector<cl::Device> devices = xcl::get_xil_devices();
+    cl::Device device = devices[0];
+
+    // Creating Context and Command Queue for selected Device
+    cl::Context context(device, NULL, NULL, NULL, &fail);
+    logger.logCreateContext(fail);
+    cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &fail);
+    logger.logCreateCommandQueue(fail);
+    std::string devName = device.getInfo<CL_DEVICE_NAME>();
+    printf("INFO: Found Device=%s\n", devName.c_str());
+    cl::Program::Binaries xclBins = xcl::import_binary_file(xclbinPath);
+
+    devices.resize(1);
+    cl::Program program(context, devices, xclBins, NULL, &fail);
+    logger.logCreateProgram(fail);
+
+    int repInt = 1;
+    // create kernels
+    // std::vector<cl::Kernel> cluster_kernel(repInt);
+    std::vector<cl::Kernel> initHist_kernel(repInt);
+    for (int i = 0; i < repInt; i++) {
+        initHist_kernel[i] = cl::Kernel(program, "JxlEnc_ans_initHistogram", &fail);
+        logger.logCreateKernel(fail);
+    }
+    std::cout << "INFO: kernel has been created" << std::endl;
+
+    // declare map of host Buffers
+    std::cout << "kernel config size:" << 26 << std::endl;
+    std::cout << "group_dim: " << config[4] << std::endl;
+    std::cout << "do_once: " << config[12] << "," << config[13] << "," << config[14] << "," << config[15] << ","
+              << config[16] << std::endl;
+
+    // add code for hls_ANSinitTop
+    // 1. create all I/O Buffer
+    uint32_t* hb_config = aligned_alloc<uint32_t>(MAX_NUM_CONFIG);
+
+    int32_t* hb_ac_coeff_ordered_ddr = aligned_alloc<int32_t>(ALL_PIXEL);
+    int32_t* hb_strategy_ddr = aligned_alloc<int32_t>(MAX_NUM_BLK88);
+    int32_t* hb_qf_ddr = aligned_alloc<int32_t>(MAX_NUM_BLK88);
+    uint8_t* hb_qdc_ddr = aligned_alloc<uint8_t>(MAX_NUM_BLK88);
+    uint8_t* hb_ctx_map = aligned_alloc<uint8_t>(MAX_CTX_MAP_SIZE);
+    uint32_t* hb_qf_thresholds = aligned_alloc<uint32_t>(MAX_QF_THRESH_SIZE);
+    uint64_t* hb_ac_tokens_ddr = aligned_alloc<uint64_t>(MAX_AC_TOKEN_SIZE);
+
+    ap_uint<64>* hb_token0_ptr = aligned_alloc<ap_uint<64> >(MAX_AC_TOKEN_SIZE);
+    ap_uint<64>* hb_token1_ptr = aligned_alloc<ap_uint<64> >(MAX_AC_TOKEN_SIZE);
+    ap_uint<64>* hb_token2_ptr = aligned_alloc<ap_uint<64> >(MAX_AC_TOKEN_SIZE);
+    ap_uint<64>* hb_token3_ptr = aligned_alloc<ap_uint<64> >(MAX_AC_TOKEN_SIZE);
+
+    int32_t* hb_histograms0_ptr = aligned_alloc<int32_t>(163840);
+    int32_t* hb_histograms1_ptr = aligned_alloc<int32_t>(163840);
+    int32_t* hb_histograms2_ptr = aligned_alloc<int32_t>(163840);
+    int32_t* hb_histograms3_ptr = aligned_alloc<int32_t>(163840);
+    int32_t* hb_histograms4_ptr = aligned_alloc<int32_t>(163840);
+
+    uint32_t* hb_histograms_size0_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histograms_size1_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histograms_size2_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histograms_size3_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_histograms_size4_ptr = aligned_alloc<uint32_t>(4096);
+
+    uint32_t* hb_total_count0_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_total_count1_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_total_count2_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_total_count3_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_total_count4_ptr = aligned_alloc<uint32_t>(4096);
+
+    uint32_t* hb_nonempty0_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_nonempty1_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_nonempty2_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_nonempty3_ptr = aligned_alloc<uint32_t>(4096);
+    uint32_t* hb_nonempty4_ptr = aligned_alloc<uint32_t>(4096);
+
+    //==================================================
+    // 2. init all the host Buffers
+    //==================================================
+    for (int j = 0; j < MAX_NUM_CONFIG; j++) {
+        hb_config[j] = config[j];
+    }
+
+    for (int j = 0; j < ALL_PIXEL; j++) {
+        hb_ac_coeff_ordered_ddr[j] = ac_coeff_ordered_ddr[j];
+    }
+
+    for (int j = 0; j < MAX_NUM_BLK88; j++) {
+        hb_strategy_ddr[j] = strategy_ddr[j];
+    }
+
+    for (int j = 0; j < MAX_NUM_BLK88; j++) {
+        hb_qdc_ddr[j] = qdc_ddr[j];
+    }
+
+    for (int j = 0; j < MAX_NUM_BLK88; j++) {
+        hb_qf_ddr[j] = qf_ddr[j];
+    }
+
+    for (int j = 0; j < MAX_CTX_MAP_SIZE; j++) {
+        hb_ctx_map[j] = ctx_map[j];
+    }
+
+    for (int j = 0; j < MAX_QF_THRESH_SIZE; j++) {
+        hb_qf_thresholds[j] = qf_thresholds[j];
+    }
+
+    for (int j = 0; j < MAX_AC_TOKEN_SIZE; j++) {
+        hb_ac_tokens_ddr[j] = ac_tokens_ddr[j];
+    }
+
+    for (int j = 0; j < MAX_AC_TOKEN_SIZE; j++) {
+        hb_token0_ptr[j] = (ap_uint<64>)tokens0_ptr[j];
+        hb_token1_ptr[j] = (ap_uint<64>)tokens1_ptr[j];
+        hb_token2_ptr[j] = (ap_uint<64>)tokens2_ptr[j];
+        hb_token3_ptr[j] = (ap_uint<64>)tokens3_ptr[j];
+    }
+
+    for (int j = 0; j < 163840; j++) {
+        hb_histograms0_ptr[j] = 0;
+        hb_histograms1_ptr[j] = 0;
+        hb_histograms2_ptr[j] = 0;
+        hb_histograms3_ptr[j] = 0;
+        hb_histograms4_ptr[j] = 0;
+    }
+
+    for (int j = 0; j < 4096; j++) {
+        hb_histograms_size0_ptr[j] = 0;
+        hb_histograms_size1_ptr[j] = 0;
+        hb_histograms_size2_ptr[j] = 0;
+        hb_histograms_size3_ptr[j] = 0;
+        hb_histograms_size4_ptr[j] = 0;
+    }
+
+    for (int j = 0; j < 4096; j++) {
+        hb_total_count0_ptr[j] = 0;
+        hb_total_count1_ptr[j] = 0;
+        hb_total_count2_ptr[j] = 0;
+        hb_total_count3_ptr[j] = 0;
+        hb_total_count4_ptr[j] = 0;
+    }
+
+    for (int j = 0; j < 4096; j++) {
+        hb_nonempty0_ptr[j] = 0;
+        hb_nonempty1_ptr[j] = 0;
+        hb_nonempty2_ptr[j] = 0;
+        hb_nonempty3_ptr[j] = 0;
+        hb_nonempty4_ptr[j] = 0;
+    }
+
+    // mapping to HBM banks
+    std::vector<cl_mem_ext_ptr_t> mext_o(33);
+    mext_o[0] = {(((unsigned int)(7)) | XCL_MEM_TOPOLOGY), hb_config, 0};
+
+    mext_o[1] = {(((unsigned int)(2)) | XCL_MEM_TOPOLOGY), hb_ac_coeff_ordered_ddr, 0};
+    mext_o[2] = {(((unsigned int)(3)) | XCL_MEM_TOPOLOGY), hb_strategy_ddr, 0};
+    mext_o[3] = {(((unsigned int)(4)) | XCL_MEM_TOPOLOGY), hb_qf_ddr, 0};
+    mext_o[4] = {(((unsigned int)(5)) | XCL_MEM_TOPOLOGY), hb_qdc_ddr, 0};
+    mext_o[5] = {(((unsigned int)(6)) | XCL_MEM_TOPOLOGY), hb_ctx_map, 0};
+    mext_o[6] = {(((unsigned int)(6)) | XCL_MEM_TOPOLOGY), hb_qf_thresholds, 0};
+    mext_o[7] = {(((unsigned int)(8)) | XCL_MEM_TOPOLOGY), hb_ac_tokens_ddr, 0};
+
+    mext_o[8] = {(((unsigned int)(9)) | XCL_MEM_TOPOLOGY), hb_token0_ptr, 0};
+    mext_o[9] = {(((unsigned int)(10)) | XCL_MEM_TOPOLOGY), hb_token1_ptr, 0};
+    mext_o[10] = {(((unsigned int)(11)) | XCL_MEM_TOPOLOGY), hb_token2_ptr, 0};
+    mext_o[11] = {(((unsigned int)(12)) | XCL_MEM_TOPOLOGY), hb_token3_ptr, 0};
+
+    mext_o[12] = {(((unsigned int)(9)) | XCL_MEM_TOPOLOGY), hb_nonempty0_ptr, 0};
+    mext_o[13] = {(((unsigned int)(9)) | XCL_MEM_TOPOLOGY), hb_nonempty1_ptr, 0};
+    mext_o[14] = {(((unsigned int)(9)) | XCL_MEM_TOPOLOGY), hb_nonempty2_ptr, 0};
+    mext_o[15] = {(((unsigned int)(9)) | XCL_MEM_TOPOLOGY), hb_nonempty3_ptr, 0};
+    mext_o[16] = {(((unsigned int)(9)) | XCL_MEM_TOPOLOGY), hb_nonempty4_ptr, 0};
+
+    mext_o[17] = {(((unsigned int)(10)) | XCL_MEM_TOPOLOGY), hb_histograms0_ptr, 0};
+    mext_o[18] = {(((unsigned int)(10)) | XCL_MEM_TOPOLOGY), hb_histograms1_ptr, 0};
+    mext_o[19] = {(((unsigned int)(10)) | XCL_MEM_TOPOLOGY), hb_histograms2_ptr, 0};
+    mext_o[20] = {(((unsigned int)(10)) | XCL_MEM_TOPOLOGY), hb_histograms3_ptr, 0};
+    mext_o[21] = {(((unsigned int)(10)) | XCL_MEM_TOPOLOGY), hb_histograms4_ptr, 0};
+
+    mext_o[22] = {(((unsigned int)(11)) | XCL_MEM_TOPOLOGY), hb_histograms_size0_ptr, 0};
+    mext_o[23] = {(((unsigned int)(11)) | XCL_MEM_TOPOLOGY), hb_histograms_size1_ptr, 0};
+    mext_o[24] = {(((unsigned int)(11)) | XCL_MEM_TOPOLOGY), hb_histograms_size2_ptr, 0};
+    mext_o[25] = {(((unsigned int)(11)) | XCL_MEM_TOPOLOGY), hb_histograms_size3_ptr, 0};
+    mext_o[26] = {(((unsigned int)(11)) | XCL_MEM_TOPOLOGY), hb_histograms_size4_ptr, 0};
+
+    mext_o[27] = {(((unsigned int)(12)) | XCL_MEM_TOPOLOGY), hb_total_count0_ptr, 0};
+    mext_o[28] = {(((unsigned int)(12)) | XCL_MEM_TOPOLOGY), hb_total_count1_ptr, 0};
+    mext_o[29] = {(((unsigned int)(12)) | XCL_MEM_TOPOLOGY), hb_total_count2_ptr, 0};
+    mext_o[30] = {(((unsigned int)(12)) | XCL_MEM_TOPOLOGY), hb_total_count3_ptr, 0};
+    mext_o[31] = {(((unsigned int)(12)) | XCL_MEM_TOPOLOGY), hb_total_count4_ptr, 0};
+
+    //===================================================
+    // 3. create device Buffer and map dev buf to host buf,
+    //===================================================
+    cl::Buffer db_config;
+
+    cl::Buffer db_ac_coef_ordered_ddr;
+    cl::Buffer db_strategy_ddr;
+    cl::Buffer db_qf_ddr;
+    cl::Buffer db_qdc_ddr;
+    cl::Buffer db_ctx_map;
+    cl::Buffer db_qf_thresholds;
+    cl::Buffer db_ac_tokens_ddr;
+
+    cl::Buffer db_token0_ptr;
+    cl::Buffer db_token1_ptr;
+    cl::Buffer db_token2_ptr;
+    cl::Buffer db_token3_ptr;
+
+    cl::Buffer db_histograms0_ptr;
+    cl::Buffer db_histograms1_ptr;
+    cl::Buffer db_histograms2_ptr;
+    cl::Buffer db_histograms3_ptr;
+    cl::Buffer db_histograms4_ptr;
+
+    cl::Buffer db_histograms_size0_ptr;
+    cl::Buffer db_histograms_size1_ptr;
+    cl::Buffer db_histograms_size2_ptr;
+    cl::Buffer db_histograms_size3_ptr;
+    cl::Buffer db_histograms_size4_ptr;
+
+    cl::Buffer db_total_count0_ptr;
+    cl::Buffer db_total_count1_ptr;
+    cl::Buffer db_total_count2_ptr;
+    cl::Buffer db_total_count3_ptr;
+    cl::Buffer db_total_count4_ptr;
+
+    cl::Buffer db_nonempty0_ptr;
+    cl::Buffer db_nonempty1_ptr;
+    cl::Buffer db_nonempty2_ptr;
+    cl::Buffer db_nonempty3_ptr;
+    cl::Buffer db_nonempty4_ptr;
+
+    // init cl Buffer
+    db_config = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                           sizeof(int32_t) * MAX_NUM_CONFIG, &mext_o[0]);
+
+    db_ac_coef_ordered_ddr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                        sizeof(int32_t) * ALL_PIXEL, &mext_o[1]);
+
+    db_strategy_ddr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                 sizeof(int32_t) * MAX_NUM_BLK88, &mext_o[2]);
+
+    db_qf_ddr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                           sizeof(int32_t) * MAX_NUM_BLK88, &mext_o[3]);
+
+    db_qdc_ddr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                            sizeof(int32_t) * MAX_NUM_BLK88, &mext_o[4]);
+
+    db_ctx_map = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                            sizeof(int32_t) * MAX_CTX_MAP_SIZE, &mext_o[5]);
+
+    db_qf_thresholds = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                  sizeof(int32_t) * MAX_QF_THRESH_SIZE, &mext_o[6]);
+    db_ac_tokens_ddr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                  sizeof(int32_t) * MAX_AC_TOKEN_SIZE, &mext_o[7]);
+    //=================================
+    db_token0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                               sizeof(ap_uint<64>) * MAX_AC_TOKEN_SIZE, &mext_o[8]);
+
+    db_token1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                               sizeof(ap_uint<64>) * MAX_AC_TOKEN_SIZE, &mext_o[9]);
+
+    db_token2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                               sizeof(ap_uint<64>) * MAX_AC_TOKEN_SIZE, &mext_o[10]);
+
+    db_token3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                               sizeof(ap_uint<64>) * MAX_AC_TOKEN_SIZE, &mext_o[11]);
+    //===================================
+    db_nonempty0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                  sizeof(uint32_t) * 4096, &mext_o[12]);
+    db_nonempty1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                  sizeof(uint32_t) * 4096, &mext_o[13]);
+    db_nonempty2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                  sizeof(uint32_t) * 4096, &mext_o[14]);
+    db_nonempty3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                  sizeof(uint32_t) * 4096, &mext_o[15]);
+    db_nonempty4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                  sizeof(uint32_t) * 4096, &mext_o[16]);
+    //=================================
+    db_histograms0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[17]);
+    db_histograms1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[18]);
+    db_histograms2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[19]);
+    db_histograms3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[20]);
+    db_histograms4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                    sizeof(int32_t) * 163840, &mext_o[21]);
+    //=================================
+    db_histograms_size0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                         sizeof(uint32_t) * 4096, &mext_o[22]);
+
+    db_histograms_size1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                         sizeof(uint32_t) * 4096, &mext_o[23]);
+
+    db_histograms_size2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                         sizeof(uint32_t) * 4096, &mext_o[24]);
+
+    db_histograms_size3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                         sizeof(uint32_t) * 4096, &mext_o[25]);
+
+    db_histograms_size4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                         sizeof(uint32_t) * 4096, &mext_o[26]);
+
+    //==================================
+    db_total_count0_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                     sizeof(uint32_t) * 4096, &mext_o[27]);
+
+    db_total_count1_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                     sizeof(uint32_t) * 4096, &mext_o[28]);
+
+    db_total_count2_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                     sizeof(uint32_t) * 4096, &mext_o[29]);
+
+    db_total_count3_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                     sizeof(uint32_t) * 4096, &mext_o[30]);
+
+    db_total_count4_ptr = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                                     sizeof(uint32_t) * 4096, &mext_o[31]);
+
+    //==================================
+    // add Buffers to migrate
+    std::vector<cl::Memory> ob_in;
+    std::vector<cl::Memory> ob_out;
+
+    ob_in.push_back(db_config);
+    ob_in.push_back(db_ac_coef_ordered_ddr);
+    ob_in.push_back(db_strategy_ddr);
+    ob_in.push_back(db_qf_ddr);
+    ob_in.push_back(db_qdc_ddr);
+    ob_in.push_back(db_ctx_map);
+    ob_in.push_back(db_qf_thresholds);
+    ob_in.push_back(db_token0_ptr);
+    ob_in.push_back(db_token1_ptr);
+    ob_in.push_back(db_token2_ptr);
+    ob_in.push_back(db_token3_ptr);
+
+    ob_out.push_back(db_config);
+    ob_out.push_back(db_ac_tokens_ddr);
+    ob_out.push_back(db_histograms0_ptr);
+    ob_out.push_back(db_histograms1_ptr);
+    ob_out.push_back(db_histograms2_ptr);
+    ob_out.push_back(db_histograms3_ptr);
+    ob_out.push_back(db_histograms4_ptr);
+    ob_out.push_back(db_histograms_size0_ptr);
+    ob_out.push_back(db_histograms_size1_ptr);
+    ob_out.push_back(db_histograms_size2_ptr);
+    ob_out.push_back(db_histograms_size3_ptr);
+    ob_out.push_back(db_histograms_size4_ptr);
+    ob_out.push_back(db_total_count0_ptr);
+    ob_out.push_back(db_total_count1_ptr);
+    ob_out.push_back(db_total_count2_ptr);
+    ob_out.push_back(db_total_count3_ptr);
+    ob_out.push_back(db_total_count4_ptr);
+    ob_out.push_back(db_nonempty0_ptr);
+    ob_out.push_back(db_nonempty1_ptr);
+    ob_out.push_back(db_nonempty2_ptr);
+    ob_out.push_back(db_nonempty3_ptr);
+    ob_out.push_back(db_nonempty4_ptr);
+
+    // set kernel args
+    for (int i = 0; i < repInt; i++) {
+        initHist_kernel[i].setArg(0, db_config);
+        initHist_kernel[i].setArg(1, db_ac_coef_ordered_ddr);
+        initHist_kernel[i].setArg(2, db_strategy_ddr);
+        initHist_kernel[i].setArg(3, db_qf_ddr);
+        initHist_kernel[i].setArg(4, db_qdc_ddr);
+        initHist_kernel[i].setArg(5, db_ctx_map);
+        initHist_kernel[i].setArg(6, db_qf_thresholds);
+        initHist_kernel[i].setArg(7, db_ac_tokens_ddr);
+        initHist_kernel[i].setArg(8, db_token0_ptr);
+        initHist_kernel[i].setArg(9, db_token1_ptr);
+        initHist_kernel[i].setArg(10, db_token2_ptr);
+        initHist_kernel[i].setArg(11, db_token3_ptr);
+        initHist_kernel[i].setArg(12, db_histograms0_ptr);
+        initHist_kernel[i].setArg(13, db_histograms_size0_ptr);
+        initHist_kernel[i].setArg(14, db_total_count0_ptr);
+        initHist_kernel[i].setArg(15, db_nonempty0_ptr);
+        initHist_kernel[i].setArg(16, db_histograms1_ptr);
+        initHist_kernel[i].setArg(17, db_histograms_size1_ptr);
+        initHist_kernel[i].setArg(18, db_total_count1_ptr);
+        initHist_kernel[i].setArg(19, db_nonempty1_ptr);
+        initHist_kernel[i].setArg(20, db_histograms2_ptr);
+        initHist_kernel[i].setArg(21, db_histograms_size2_ptr);
+        initHist_kernel[i].setArg(22, db_total_count2_ptr);
+        initHist_kernel[i].setArg(23, db_nonempty2_ptr);
+        initHist_kernel[i].setArg(24, db_histograms3_ptr);
+        initHist_kernel[i].setArg(25, db_histograms_size3_ptr);
+        initHist_kernel[i].setArg(26, db_total_count3_ptr);
+        initHist_kernel[i].setArg(27, db_nonempty3_ptr);
+        initHist_kernel[i].setArg(28, db_histograms4_ptr);
+        initHist_kernel[i].setArg(29, db_histograms_size4_ptr);
+        initHist_kernel[i].setArg(30, db_total_count4_ptr);
+        initHist_kernel[i].setArg(31, db_nonempty4_ptr);
+    }
+
+    // launch kernel and calculate kernel execution time
+    std::cout << "INFO: Kernel Start" << std::endl;
+    // declare events
+    std::vector<cl::Event> events_write(1);
+    std::vector<cl::Event> events_kernel(1);
+    std::vector<cl::Event> events_read(1);
+
+    // migrate,
+    q.enqueueMigrateMemObjects(ob_in, 0, nullptr, &events_write[0]);
+    q.enqueueTask(initHist_kernel[0], &events_write, &events_kernel[0]);
+    q.enqueueMigrateMemObjects(ob_out, 1, &events_kernel, &events_read[0]);
+    q.finish();
+
+    struct timeval end_time;
+    gettimeofday(&end_time, 0);
+    std::cout << "INFO: Finish kernel execution" << std::endl;
+    std::cout << "INFO: Finish E2E execution" << std::endl;
+
+    // print related times
+    unsigned long timeStart, timeEnd, exec_time0;
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Data transfer from host to device: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Kernel1 Data transfer from device to host: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    exec_time0 = 0;
+    for (int i = 0; i < 1; ++i) {
+        events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+        events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+        exec_time0 += (timeEnd - timeStart) / 1000.0;
+
+        std::cout << "INFO: Kernel" << i + 1 << " execution: " << (timeEnd - timeStart) / 1000.0 << " us\n";
+        std::cout << "-------------------------------------------------------" << std::endl;
+    }
+    std::cout << "INFO: kernel total execution: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    unsigned long exec_timeE2E = diff(&end_time, &start_time);
+    std::cout << "INFO: FPGA execution time:" << exec_timeE2E << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+
+    for (int j = 0; j < MAX_NUM_CONFIG; j++) {
+        config[j] = hb_config[j];
+    }
+
+    for (int j = 0; j < ALL_PIXEL; j++) {
+        ac_tokens_ddr[j] = hb_ac_tokens_ddr[j];
+    }
+
+    // output
+    std::cout << "histograms_ptr:" << std::endl;
+    for (int j = 0; j < 163840; j++) {
+        histograms0_ptr[j] = hb_histograms0_ptr[j];
+        histograms1_ptr[j] = hb_histograms1_ptr[j];
+        histograms2_ptr[j] = hb_histograms2_ptr[j];
+        histograms3_ptr[j] = hb_histograms3_ptr[j];
+        histograms4_ptr[j] = hb_histograms4_ptr[j];
+    }
+
+    std::cout << "histograms_size:" << std::endl;
+    for (int j = 0; j < 4096; j++) {
+        histograms_size0_ptr[j] = hb_histograms_size0_ptr[j];
+        histograms_size1_ptr[j] = hb_histograms_size1_ptr[j];
+        histograms_size2_ptr[j] = hb_histograms_size2_ptr[j];
+        histograms_size3_ptr[j] = hb_histograms_size3_ptr[j];
+        histograms_size4_ptr[j] = hb_histograms_size4_ptr[j];
+    }
+
+    std::cout << "total_count_ptr:" << std::endl;
+    for (int j = 0; j < 4096; j++) {
+        total_count0_ptr[j] = hb_total_count0_ptr[j];
+        total_count1_ptr[j] = hb_total_count1_ptr[j];
+        total_count2_ptr[j] = hb_total_count2_ptr[j];
+        total_count3_ptr[j] = hb_total_count3_ptr[j];
+        total_count4_ptr[j] = hb_total_count4_ptr[j];
+    }
+
+    for (int j = 0; j < 4096; j++) {
+        nonempty0_ptr[j] = hb_nonempty0_ptr[j];
+        nonempty1_ptr[j] = hb_nonempty1_ptr[j];
+        nonempty2_ptr[j] = hb_nonempty2_ptr[j];
+        nonempty3_ptr[j] = hb_nonempty3_ptr[j];
+        nonempty4_ptr[j] = hb_nonempty4_ptr[j];
+    }
+
+    free(hb_config);
+    free(hb_ac_coeff_ordered_ddr);
+    free(hb_strategy_ddr);
+    free(hb_qf_ddr);
+    free(hb_qdc_ddr);
+    free(hb_ctx_map);
+    free(hb_qf_thresholds);
+    free(hb_ac_tokens_ddr);
+    free(hb_token0_ptr);
+    free(hb_token1_ptr);
+    free(hb_token2_ptr);
+    free(hb_token3_ptr);
+    free(hb_histograms0_ptr);
+    free(hb_histograms1_ptr);
+    free(hb_histograms2_ptr);
+    free(hb_histograms3_ptr);
+    free(hb_histograms4_ptr);
+    free(hb_histograms_size0_ptr);
+    free(hb_histograms_size1_ptr);
+    free(hb_histograms_size2_ptr);
+    free(hb_histograms_size3_ptr);
+    free(hb_histograms_size4_ptr);
+    free(hb_total_count0_ptr);
+    free(hb_total_count1_ptr);
+    free(hb_total_count2_ptr);
+    free(hb_total_count3_ptr);
+    free(hb_total_count4_ptr);
+    free(hb_nonempty0_ptr);
+    free(hb_nonempty1_ptr);
+    free(hb_nonempty2_ptr);
+    free(hb_nonempty3_ptr);
+    free(hb_nonempty4_ptr);
+
+    std::cout << "finished opencl host" << std::endl;
+#else
+    ap_uint<64>* hls_tokens0_ptr = reinterpret_cast<ap_uint<64>*>(tokens0_ptr);
+    ap_uint<64>* hls_tokens1_ptr = reinterpret_cast<ap_uint<64>*>(tokens1_ptr);
+    ap_uint<64>* hls_tokens2_ptr = reinterpret_cast<ap_uint<64>*>(tokens2_ptr);
+    ap_uint<64>* hls_tokens3_ptr = reinterpret_cast<ap_uint<64>*>(tokens3_ptr);
+
+    hls_ANSinitHistogram(config, ac_coeff_ordered_ddr, strategy_ddr, qf_ddr, qdc_ddr, ctx_map, qf_thresholds,
+                         ac_tokens_ddr, hls_tokens0_ptr, hls_tokens1_ptr, hls_tokens2_ptr, hls_tokens3_ptr,
+                         histograms0_ptr, histograms_size0_ptr, total_count0_ptr, nonempty0_ptr, histograms1_ptr,
+                         histograms_size1_ptr, total_count1_ptr, nonempty1_ptr, histograms2_ptr, histograms_size2_ptr,
+                         total_count2_ptr, nonempty2_ptr, histograms3_ptr, histograms_size3_ptr, total_count3_ptr,
+                         nonempty3_ptr, histograms4_ptr, histograms_size4_ptr, total_count4_ptr, nonempty4_ptr);
+#endif
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_tokInit_histogram/host/host_tokinit_histogram.hpp b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/host/host_tokinit_histogram.hpp
new file mode 100644
index 0000000000..75b3cf0f8a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/host/host_tokinit_histogram.hpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_TOKINIT_HISTOGRAM_HPP
+#define HOST_TOKINIT_HISTOGRAM_HPP
+
+#include <iostream>
+#include <sys/time.h>
+#ifndef HLS_TEST
+#include "xcl2.hpp"
+#include "xf_utils_sw/logger.hpp"
+const int PIXEL_W = 2048;
+const int PIXEL_H = 2048;
+const int FRAME_DIM = 3;
+const int ALL_PIXEL = PIXEL_W * PIXEL_H * FRAME_DIM;
+const int MAX_NUM_BLK88_W = PIXEL_W / 8;
+const int MAX_NUM_BLK88_H = PIXEL_H / 8;
+const int MAX_NUM_BLK88 = MAX_NUM_BLK88_W * MAX_NUM_BLK88_H;
+const int MAX_ORDERS_SIZE = (3 * 64 + 3 * 64 + 3 * 256 + 3 * 1024);
+const int MAX_QF_THRESH_SIZE = 256;
+const int MAX_CTX_MAP_SIZE = 256;
+const int MAX_AC_TOKEN_SIZE = ALL_PIXEL;
+#else
+#include "hls_init_histogram.hpp"
+#endif
+
+void hls_ANSinitHistogram_wrapper(std::string xclbinPath,
+                                  int config[32],
+                                  //====================
+                                  int32_t* ac_coeff_ordered_ddr,
+                                  int32_t* strategy_ddr,
+                                  int32_t* qf_ddr,
+                                  uint8_t* qdc_ddr,
+                                  uint8_t* ctx_map,
+                                  uint32_t* qf_thresholds,
+                                  uint64_t* ac_tokens_ddr,
+                                  //====================
+                                  uint64_t* tokens0_ptr,
+                                  uint64_t* tokens1_ptr,
+                                  uint64_t* tokens2_ptr,
+                                  uint64_t* tokens3_ptr,
+                                  //====================
+                                  int32_t* histograms0_ptr,
+                                  uint32_t* histograms_size0_ptr,
+                                  uint32_t* total_count0_ptr,
+                                  uint32_t* nonempty0_ptr,
+                                  //======================
+                                  int32_t* histograms1_ptr,
+                                  uint32_t* histograms_size1_ptr,
+                                  uint32_t* total_count1_ptr,
+                                  uint32_t* nonempty1_ptr,
+                                  //======================
+                                  int32_t* histograms2_ptr,
+                                  uint32_t* histograms_size2_ptr,
+                                  uint32_t* total_count2_ptr,
+                                  uint32_t* nonempty2_ptr,
+                                  //======================
+                                  int32_t* histograms3_ptr,
+                                  uint32_t* histograms_size3_ptr,
+                                  uint32_t* total_count3_ptr,
+                                  uint32_t* nonempty3_ptr,
+                                  //======================
+                                  int32_t* histograms4_ptr,
+                                  uint32_t* histograms_size4_ptr,
+                                  uint32_t* total_count4_ptr,
+                                  uint32_t* nonempty4_ptr);
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_tokInit_histogram/kernel/hls_init_histogram.cpp b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/kernel/hls_init_histogram.cpp
new file mode 100644
index 0000000000..bc647c9759
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/kernel/hls_init_histogram.cpp
@@ -0,0 +1,1725 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HLS_INIT_HISTOGRAM_CPP
+#define HLS_INIT_HISTOGRAM_CPP
+
+#include "hls_init_histogram.hpp"
+#include "hls_math.h"
+
+#define kBlockDim 8
+#define kDCTBlockSize kBlockDim* kBlockDim
+#define kNonZeroBuckets 37
+#define kZeroDensityContextCount 458
+//====================================================================================//
+// hls_initHistogram_qc.cpp
+//====================================================================================//
+void hls_InitHistogram(hls::stream<ap_uint<64> >& token_stream,
+#ifndef __SYNTHESIS__
+                       std::vector<std::vector<int32_t> >& histograms_uram,
+#else
+                       int32_t histograms_uram[4096][40],
+#endif
+                       uint32_t histograms_size[4096],
+                       uint32_t total_count[4096],
+                       uint32_t& nempty_cnt,
+                       uint32_t nonempty_[4096],
+                       uint32_t& large_idx) {
+#pragma HLS INLINE off
+
+    int32_t histo_reg[4] = {0, 0, 0, 0};
+#pragma HLS array_partition variable = histo_reg complete dim = 1
+    int32_t histo_ctx[4] = {-1, -1, -1, -1};
+#pragma HLS array_partition variable = histo_ctx complete dim = 1
+    int32_t histo_tok[4] = {-1, -1, -1, -1};
+#pragma HLS array_partition variable = histo_tok complete dim = 1
+
+    uint32_t totalcnt_reg[4] = {0, 0, 0, 0};
+#pragma HLS array_partition variable = totalcnt_reg complete dim = 1
+
+    uint32_t histo_size_reg[4] = {0, 0, 0, 0};
+#pragma HLS array_partition variable = histo_size_reg complete dim = 1
+
+    ap_uint<64> token_reg = 0;
+    token_reg = token_stream.read();
+
+    nempty_cnt = 0;
+    uint32_t max_totalcnt = 0;
+
+    int tmp_test = 0; // csim-only
+
+INIT_HISTOGRAM_LOOP:
+    while (token_reg[63] != 1) {
+#pragma HLS PIPELINE II = 1
+#pragma HLS DEPENDENCE variable = total_count inter false
+#pragma HLS DEPENDENCE variable = histograms_uram inter false
+#pragma HLS DEPENDENCE variable = histograms_size inter false
+
+        // csim-only
+        tmp_test++;
+
+        ap_uint<32> value = token_reg.range(31, 0);
+        ap_uint<31> context = token_reg.range(62, 32);
+        uint32_t tok;
+
+        if (value < 16) {
+            tok = value;
+        } else {
+            uint32_t n = 32 - value.countLeadingZeros() - 1;
+            uint32_t m = value - (1 << n);
+            tok = 16 + ((n - 4) << 2) + (m >> (n - 2));
+        }
+        int32_t histo_read;
+        int32_t histo_write;
+        if (context == histo_ctx[0] && tok == histo_tok[0]) {
+            histo_read = histo_reg[0];
+        } else if (context == histo_ctx[1] && tok == histo_tok[1]) {
+            histo_read = histo_reg[1];
+        } else if (context == histo_ctx[2] && tok == histo_tok[2]) {
+            histo_read = histo_reg[2];
+        } else if (context == histo_ctx[3] && tok == histo_tok[3]) {
+            histo_read = histo_reg[3];
+        } else {
+            histo_read = histograms_uram[context][tok];
+        }
+        histo_write = histo_read + 1;
+
+        uint32_t tot_cnt_read;
+        uint32_t tot_cnt_write;
+        uint32_t siz_read;
+        uint32_t siz_write;
+        if (context == histo_ctx[0]) {
+            tot_cnt_read = totalcnt_reg[0];
+            siz_read = histo_size_reg[0];
+        } else if (context == histo_ctx[1]) {
+            tot_cnt_read = totalcnt_reg[1];
+            siz_read = histo_size_reg[1];
+        } else if (context == histo_ctx[2]) {
+            tot_cnt_read = totalcnt_reg[2];
+            siz_read = histo_size_reg[2];
+        } else if (context == histo_ctx[3]) {
+            tot_cnt_read = totalcnt_reg[3];
+            siz_read = histo_size_reg[3];
+        } else {
+            tot_cnt_read = total_count[context];
+            siz_read = histograms_size[context];
+        }
+
+        tot_cnt_write = tot_cnt_read + 1;
+
+        if (tot_cnt_read == 0) {
+            nonempty_[nempty_cnt] = context;
+            nempty_cnt++;
+        }
+        if (tot_cnt_write > max_totalcnt) {
+            large_idx = context;
+            max_totalcnt = tot_cnt_write;
+        }
+
+        if (siz_read <= tok) {
+            siz_write = (tok + 8) / 8 * 8;
+        } else {
+            siz_write = siz_read;
+        }
+
+        token_reg = token_stream.read();
+        histograms_uram[context][tok] = histo_write; // II=1
+        histo_reg[3] = histo_reg[2];
+        histo_reg[2] = histo_reg[1];
+        histo_reg[1] = histo_reg[0];
+        histo_reg[0] = histo_write;
+        histo_ctx[3] = histo_ctx[2];
+        histo_ctx[2] = histo_ctx[1];
+        histo_ctx[1] = histo_ctx[0];
+        histo_ctx[0] = context;
+        histo_tok[3] = histo_tok[2];
+        histo_tok[2] = histo_tok[1];
+        histo_tok[1] = histo_tok[0];
+        histo_tok[0] = tok;
+        total_count[context] = tot_cnt_write; // shoulde be II=1
+        totalcnt_reg[3] = totalcnt_reg[2];
+        totalcnt_reg[2] = totalcnt_reg[1];
+        totalcnt_reg[1] = totalcnt_reg[0];
+        totalcnt_reg[0] = tot_cnt_write;
+        histograms_size[context] = siz_write;
+        histo_size_reg[3] = histo_size_reg[2];
+        histo_size_reg[2] = histo_size_reg[1];
+        histo_size_reg[1] = histo_size_reg[0];
+        histo_size_reg[0] = siz_write;
+    }
+}
+
+void init_histogram_core(hls::stream<ap_uint<64> >& token_stream,
+                         int32_t* histograms_ptr,
+                         uint32_t* histograms_size_ptr,
+                         uint32_t* total_count_ptr,
+                         uint32_t* nonempty_ptr,
+                         hls::stream<uint32_t>& strm_nempty_cnt,
+                         hls::stream<uint32_t>& strm_largest_idx) {
+#pragma HLS INLINE off
+
+#ifndef __SYNTHESIS__
+    std::vector<std::vector<int32_t> > histograms_uram(4096, std::vector<int32_t>(40));
+#else
+    int32_t histograms_uram[4096][40]; // pragma
+#pragma HLS BIND_STORAGE impl = URAM variable = histograms_uram type = ram_s2p
+#pragma HLS ARRAY_PARTITION variable = histograms_uram complete dim = 2
+                                       // uram pargma
+#endif
+
+    uint32_t histograms_size[4096];
+    uint32_t total_count[4096];
+    uint32_t nonempty_[4096];
+    uint32_t nempty_cnt;
+
+HISTOGRAM_URAM_INIT_LOOP:
+    for (int j = 0; j < 4096; j++) {
+#pragma HLS PIPELINE II = 1
+        histograms_size[j] = 0;
+        total_count[j] = 0;
+        for (int k = 0; k < 40; k++) {
+#pragma HLS UNROLL
+            histograms_uram[j][k] = 0;
+        }
+    }
+
+    uint32_t largest_idx_tmp = 0;
+
+    hls_InitHistogram(token_stream, histograms_uram, histograms_size, total_count, nempty_cnt, nonempty_,
+                      largest_idx_tmp);
+
+    // nempty_cnt_ptr = nempty_cnt;
+    strm_nempty_cnt.write(nempty_cnt);
+    strm_largest_idx.write(largest_idx_tmp);
+
+    uint32_t nempty_context;
+HISTOGRAM_WRITEOUT_LOOP:
+    for (int i = 0; i < nempty_cnt; i++) {
+        for (int j = 0; j < 40; j++) {
+#pragma HLS PIPELINE II = 1
+            if (j == 0) nempty_context = nonempty_[i];
+            histograms_ptr[nempty_context * 40 + j] = histograms_uram[nempty_context][j];
+        }
+    }
+
+HISTOGRAM_SIZE_WRITEOUT_LOOP:
+    for (int j = 0; j < 4096; j++) {
+#pragma HLS PIPELINE II = 1
+        histograms_size_ptr[j] = histograms_size[j];
+    }
+
+HISTOGRAM_CNT_WRITEOUT_LOOP:
+    for (int j = 0; j < 4096; j++) {
+#pragma HLS PIPELINE II = 1
+        total_count_ptr[j] = total_count[j];
+    }
+
+HISTOGRAM_NEMPTY_WRITEOUT_LOOP:
+    for (int j = 0; j < 4096; j++) {
+#pragma HLS PIPELINE II = 1
+        nonempty_ptr[j] = nonempty_[j];
+    }
+}
+
+void init_histogram_top(
+    // bool do_once[5],
+    hls::stream<int>& strm_do_once,
+    hls::stream<ap_uint<64> >& token_stream0,
+    hls::stream<ap_uint<64> >& token_stream1,
+    hls::stream<ap_uint<64> >& token_stream2,
+    hls::stream<ap_uint<64> >& token_stream3,
+    hls::stream<ap_uint<64> >& token_stream4,
+    hls::stream<uint32_t>& strm_nempty_cnt,
+    hls::stream<uint32_t>& strm_largest_idx,
+
+    int32_t* histograms0_ptr,
+    uint32_t* histograms_size0_ptr,
+    uint32_t* total_count0_ptr,
+    uint32_t* nonempty0_ptr,
+
+    int32_t* histograms1_ptr,
+    uint32_t* histograms_size1_ptr,
+    uint32_t* total_count1_ptr,
+    uint32_t* nonempty1_ptr,
+
+    int32_t* histograms2_ptr,
+    uint32_t* histograms_size2_ptr,
+    uint32_t* total_count2_ptr,
+    uint32_t* nonempty2_ptr,
+
+    int32_t* histograms3_ptr,
+    uint32_t* histograms_size3_ptr,
+    uint32_t* total_count3_ptr,
+    uint32_t* nonempty3_ptr,
+
+    int32_t* histograms4_ptr,
+    uint32_t* histograms_size4_ptr,
+    uint32_t* total_count4_ptr,
+    uint32_t* nonempty4_ptr) {
+#pragma HLS INLINE off
+    int do_once[5];
+
+    do_once[0] = strm_do_once.read();
+    do_once[1] = strm_do_once.read();
+    do_once[2] = strm_do_once.read();
+    do_once[3] = strm_do_once.read();
+    do_once[4] = strm_do_once.read();
+
+    if (do_once[0]) {
+        init_histogram_core(token_stream0, histograms0_ptr, histograms_size0_ptr, total_count0_ptr, nonempty0_ptr,
+                            strm_nempty_cnt, strm_largest_idx);
+    } else {
+        strm_nempty_cnt.write(0);
+        strm_largest_idx.write(0);
+    }
+
+    if (do_once[1]) {
+        init_histogram_core(token_stream1, histograms1_ptr, histograms_size1_ptr, total_count1_ptr, nonempty1_ptr,
+                            strm_nempty_cnt, strm_largest_idx);
+    } else {
+        strm_nempty_cnt.write(0);
+        strm_largest_idx.write(0);
+    }
+
+    if (do_once[2]) {
+        init_histogram_core(token_stream2, histograms2_ptr, histograms_size2_ptr, total_count2_ptr, nonempty2_ptr,
+                            strm_nempty_cnt, strm_largest_idx);
+    } else {
+        strm_nempty_cnt.write(0);
+        strm_largest_idx.write(0);
+    }
+
+    if (do_once[3]) {
+        init_histogram_core(token_stream3, histograms3_ptr, histograms_size3_ptr, total_count3_ptr, nonempty3_ptr,
+                            strm_nempty_cnt, strm_largest_idx);
+    } else {
+        strm_nempty_cnt.write(0);
+        strm_largest_idx.write(0);
+    }
+
+    if (do_once[4]) {
+        init_histogram_core(token_stream4, histograms4_ptr, histograms_size4_ptr, total_count4_ptr, nonempty4_ptr,
+                            strm_nempty_cnt, strm_largest_idx);
+    } else {
+        strm_nempty_cnt.write(0);
+        strm_largest_idx.write(0);
+    }
+}
+
+void load_token(hls::stream<int>& strm_do_once, ap_uint<64>* tokens_ptr, hls::stream<ap_uint<64> >& token_stream) {
+#pragma HLS INLINE off
+
+    int enable = strm_do_once.read();
+    if (enable) {
+        ap_uint<64> token_reg;
+        uint32_t token_size = tokens_ptr[0];
+    LOAD_TOKEN_LOOP:
+        for (int i = 0; i < (token_size + 1 + 256) / 256; i++) {
+            for (int j = 0; j < 256; j++) {
+#pragma HLS PIPELINE II = 1
+                token_reg.range(62, 0) = tokens_ptr[i * 256 + j];
+                token_reg[63] = 0;
+                if (i * 256 + j != 0 && i * 256 + j < token_size + 1) token_stream.write(token_reg);
+            }
+        }
+        token_reg[63] = 1;
+        token_stream.write(token_reg);
+    }
+}
+//=====================================================================================================//
+// hls_enc_entropy_coder.cpp
+//=====================================================================================================//
+#define MAX_NUM_BLOCK88_JXL (256 / 8)
+
+const uint8_t kNumOrders = 13;
+
+uint8_t covered_blocks_x[] = {1, 1, 1, 1, 2, 4, 1, 2, 1, 4, 2, 4, 1, 1, 1, 1, 1, 1, 8, 4, 8, 16, 8, 16, 32, 16, 32};
+
+uint8_t covered_blocks_y[] = {1, 1, 1, 1, 2, 4, 2, 1, 4, 1, 4, 2, 1, 1, 1, 1, 1, 1, 8, 8, 4, 16, 16, 8, 32, 32, 16};
+uint64_t coverd_blocks_lut[] = {0, 0, 1, 0, 2, 0, 1, 0, 3};
+
+uint16_t hls_kCoeffFreqContext[64] = {
+    0xBAD, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 15, 16, 16, 17, 17,
+    18,    18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25,
+    26,    26, 26, 26, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30,
+};
+
+uint16_t hls_kCoeffNumNonzeroContext[64] = {
+    0xBAD, 0,   31,  62,  62,  93,  93,  93,  93,  123, 123, 123, 123, 152, 152, 152, 152, 152, 152, 152, 152, 180,
+    180,   180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
+    206,   206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
+};
+
+uint64_t hls_kCoeffOrderOffset[] = {
+    0,   1,   2,   3,   4,   5,   6,   10,  14,  18,  34,   50,   66,   68,   70,   72,   76,   80,   84,   92,
+    100, 108, 172, 236, 300, 332, 364, 396, 652, 908, 1164, 1292, 1420, 1548, 2572, 3596, 4620, 5132, 5644, 6156,
+};
+
+uint8_t hls_kStrategyOrder[] = {
+    0, 1, 1, 1, 2, 3, 4, 4, 5, 5, 6, 6, 1, 1, 1, 1, 1, 1, 7, 8, 8, 9, 10, 10, 11, 12, 12,
+};
+
+uint8_t hls_kDefaultCtxMap[39] = {
+    // Default ctx map clusters all the large transforms together.
+    0, 1, 2, 2, 3,  3,  4,  5,  6,  6,  6,  6,  6,  //
+    7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, //
+    7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, //
+};
+
+uint32_t hls_covered_block_lut(int covered_blocks) {
+#pragma HLS INLINE
+    int log2_covered_blk = 0;
+    if (covered_blocks == 4) {
+        log2_covered_blk = 2;
+    } else if (covered_blocks == 16) {
+        log2_covered_blk = 4;
+    } else {
+        log2_covered_blk = 0;
+    }
+
+    return log2_covered_blk;
+}
+
+// Non-zero context is based on number of non-zeros and block context.
+// For better clustering, contexts with same number of non-zeros are grouped.
+uint32_t hls_ZeroDensityContextsOffset(uint64_t num_ctxs, uint32_t block_ctx) {
+#pragma HLS INLINE
+    return num_ctxs * kNonZeroBuckets + kZeroDensityContextCount * block_ctx;
+}
+
+// Specialization for 8x8, where only top-left is LLF/DC.
+// About 1% overall speedup vs. NumNonZeroExceptLLF.
+int32_t HLS_NumNonZero8x8ExceptDC(hls::stream<int32_t>& strm_ac_coeff_raster, int32_t* nzeros_pos) {
+    int sum_zeros = 1;
+HLS_COUNT_NZ8X8_INNER_LOOP:
+    for (int k = 0; k < kBlockDim * kBlockDim; k++) {
+#pragma HLS PIPELINE II = 1
+        int32_t ac_coeff = strm_ac_coeff_raster.read();
+
+        // strm_ac_coeff_raster_out.write(ac_coeff);
+        if (k == 0) {
+            continue;
+        } else {
+            if (!ac_coeff) {
+                sum_zeros++;
+            }
+        }
+    }
+
+    *nzeros_pos = (kDCTBlockSize - sum_zeros);
+
+    return (kDCTBlockSize - sum_zeros);
+}
+
+int hls_Is_FirstBlock(int by, int bx, int8_t strategy) {
+#pragma HLS INLINE
+    int32_t isFirstBlock = 0;
+
+    if (strategy == 4) {
+        if ((bx % 2 == 0) && (by % 2 == 0)) {
+            isFirstBlock = 1;
+        }
+    } else if (strategy == 5) {
+        if ((bx % 4 == 0) && (by % 4 == 0)) {
+            isFirstBlock = 1;
+        }
+    } else {
+        isFirstBlock = 1;
+    }
+
+    return isFirstBlock;
+}
+
+int32_t hls_PredictFromTopAndLeft(const int32_t* row_top,
+                                  const int32_t* row,
+                                  int covered_blocks,
+                                  int log2_covered_blocks,
+                                  int c,
+                                  int32_t x,
+                                  int32_t y,
+                                  int32_t default_val) {
+#pragma HLS INLINE
+    int32_t predict_nzeros = 0;
+
+    if (x == 0 && y == 0) {
+        predict_nzeros = default_val;
+    } else if (x == 0) {
+        predict_nzeros = row_top[0]; // nzero_row_abv[0];
+    } else if (y == 0) {
+        predict_nzeros = row[x - 1]; // nzero_row_left[c];
+    } else {
+        predict_nzeros = (row_top[x] + row[x - 1] + 1) / 2;
+    }
+
+    return predict_nzeros;
+}
+
+// TODO(user): investigate, why disabling pre-clustering makes entropy code
+// less dense. Perhaps we would need to add HQ clustering algorithm that would
+// be able to squeeze better by spending more CPU cycles.
+uint32_t hls_ZeroDensityContext(
+    uint32_t nonzeros_left, uint32_t k, uint32_t covered_blocks, uint32_t log2_covered_blocks, uint32_t prev) {
+#pragma HLS INLINE
+    nonzeros_left = (nonzeros_left + covered_blocks - 1) >> log2_covered_blocks;
+    k >>= log2_covered_blocks;
+
+    return (hls_kCoeffNumNonzeroContext[nonzeros_left] + hls_kCoeffFreqContext[k]) * 2 + prev;
+}
+
+// Non-zero context is based on number of non-zeros and block context.
+// For better clustering, contexts with same number of non-zeros are grouped.
+uint32_t hls_NonZeroContext(uint64_t num_ctxs, uint32_t non_zeros, uint32_t block_ctx) {
+#pragma HLS INLINE
+    uint32_t ctx;
+    if (non_zeros >= 64) non_zeros = 64;
+    if (non_zeros < 8) {
+        ctx = non_zeros;
+    } else {
+        ctx = 4 + non_zeros / 2;
+    }
+    return ctx * num_ctxs + block_ctx;
+}
+
+// Encodes non-negative (X) into (2 * X), negative (-X) into (2 * X - 1)
+uint32_t hls_PackSigned(int32_t value) {
+#pragma HLS INLINE
+    // JXL_NO_SANITIZE("unsigned-integer-overflow") {
+    return (static_cast<uint32_t>(value) << 1) ^ ((static_cast<uint32_t>(~value) >> 31) - 1);
+}
+
+int hls_dim_sanf_order(int i) {
+#pragma HLS INLINE
+    int c = 0;
+    if (i == 0)
+        c = 1;
+    else if (i == 1)
+        c = 0;
+    else
+        c = 2;
+
+    return c;
+}
+
+uint64_t hls_Context(uint8_t* ctx_map,
+                     uint32_t* qf_thresholds,
+                     int qf_thresholds_size,
+                     int kNumOrders,
+                     int num_dc_ctxs,
+                     int dc_idx,
+                     int32_t qf,
+                     uint64_t ord,
+                     uint32_t c) {
+#pragma HLS INLINE
+    uint32_t qf_idx = 0;
+    for (uint32_t i = 0; i < qf_thresholds_size; i++) {
+#pragma HLS UNROLL
+        if (qf > qf_thresholds[i]) qf_idx++;
+    }
+
+    uint32_t idx = c < 2 ? (c ^ 1) : 2;
+    idx = idx * kNumOrders + ord;
+    idx = idx * (qf_thresholds_size + 1) + qf_idx;
+    idx = idx * num_dc_ctxs + dc_idx;
+    return ctx_map[idx];
+}
+
+//==================== dataflow stage-1 ===========================//
+void hls_count_nz(int ysize_blocks,
+                  int xsize_blocks,
+                  int nzeros_stride,
+                  hls::stream<int32_t>& strm_ac_coeff_in,
+                  hls::stream<int32_t>& strm_strategy_in,
+
+                  hls::stream<int32_t>& strm_ac_coeff_out,
+                  hls::stream<int32_t>& strm_strategy_out,
+                  hls::stream<int32_t>& strm_nzeros,
+                  hls::stream<int32_t>& strm_predict_nzeros) {
+#pragma HLS INLINE off
+    // bram
+    int32_t nzero_row_left[3] = {0, 0, 0};
+    int32_t nzero_row_0[MAX_NUM_BLOCK88_JXL * MAX_NUM_BLOCK88_JXL];
+    int32_t nzero_row_1[MAX_NUM_BLOCK88_JXL * MAX_NUM_BLOCK88_JXL];
+    int32_t nzero_row_2[MAX_NUM_BLOCK88_JXL * MAX_NUM_BLOCK88_JXL];
+
+    // global config
+    int32_t hls_strategy;
+
+HLS_COUNT_NZ_OUTTER_LOOP:
+    for (uint32_t by = 0; by < ysize_blocks; ++by) {
+        for (uint32_t bx = 0; bx < xsize_blocks; ++bx) {
+            for (int i = 0; i < 3; i++) {
+                int c = hls_dim_sanf_order(i);
+                int32_t* row_nzeros;
+                int32_t* row_nzeros_top;
+
+                if (c == 0) {
+                    row_nzeros = &nzero_row_0[by * nzeros_stride + 0];
+                    row_nzeros_top = &nzero_row_0[(by - 1) * nzeros_stride + 0];
+                } else if (c == 1) {
+                    row_nzeros = &nzero_row_1[by * nzeros_stride + 0];
+                    row_nzeros_top = &nzero_row_1[(by - 1) * nzeros_stride + 0];
+
+                } else {
+                    row_nzeros = &nzero_row_2[by * nzeros_stride + 0];
+                    row_nzeros_top = &nzero_row_2[(by - 1) * nzeros_stride + 0];
+                }
+
+                if (i == 0) {
+                    hls_strategy = strm_strategy_in.read();
+                    strm_strategy_out.write(hls_strategy);
+                }
+
+                bool hls_isFirstBlock = hls_Is_FirstBlock(by, bx, hls_strategy);
+                uint32_t cx = covered_blocks_x[hls_strategy]; // lut
+                uint32_t cy = covered_blocks_y[hls_strategy];
+                const uint32_t covered_blocks = cx * cy; // = #LLF coefficients
+                uint32_t log2_covered_blocks = hls_covered_block_lut(covered_blocks);
+                uint32_t size = covered_blocks * kDCTBlockSize;
+
+                if (hls_isFirstBlock) {
+                    int32_t* nzeros_pos = row_nzeros + bx;
+                    int num_zeros = 0;
+                    for (int y = 0; y < cy * kBlockDim; y++) {
+                        for (int x = 0; x < cx * kBlockDim; x++) {
+#pragma HLS PIPELINE II = 1
+                            int32_t ac_coeff = strm_ac_coeff_in.read();
+                            strm_ac_coeff_out.write(ac_coeff);
+                            if (!ac_coeff) {
+                                num_zeros++;
+                            }
+                        }
+                    }
+
+                    //=============Move to an independent process, linked with hls::stream<int> num_zeros=======
+                    int nzeros = int(cx * cy * kDCTBlockSize) - num_zeros;
+                    const int32_t shifted_nzeros =
+                        static_cast<int32_t>((nzeros + covered_blocks - 1) >> log2_covered_blocks);
+                NZ_EXCEPT_LLF_INNER_LOOP3:
+                    for (int32_t y = 0; y < cy; y++) {
+                        for (int32_t x = 0; x < cx; x++) {
+#pragma HLS PIPELINE II = 1
+                            nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
+                        }
+                    }
+
+                    int32_t predicted_nzeros = hls_PredictFromTopAndLeft(row_nzeros_top, row_nzeros, covered_blocks,
+                                                                         log2_covered_blocks, c, bx, by, 32);
+
+                    strm_nzeros.write(nzeros);
+                    strm_predict_nzeros.write(predicted_nzeros);
+                    //===============================
+                }
+            }
+        }
+    }
+}
+
+// void hls_block_context(
+//     // config
+//     int rect_x0,
+//     int rect_y0,
+//     int ysize_blocks,
+//     int xsize_blocks,
+//     int num_ctxs,
+//     int num_dc_ctxs,
+//     int qf_thresholds_size,
+//     // bram
+//     uint8_t ctx_map[MAX_CTX_MAP_SIZE],
+//     uint32_t qf_thresholds[MAX_QF_THRESH_SIZE],
+//     // strm
+//     hls::stream<int32_t>& strm_qf,
+//     hls::stream<uint8_t>& strm_qdc,
+//     hls::stream<int32_t>& strm_strategy_in,
+//     hls::stream<int32_t>& strm_strategy_out,
+//     hls::stream<uint32_t>& strm_block_ctx) {
+// #pragma HLS INLINE off
+
+//     // global config
+//     int hls_strategy;
+//     uint8_t dc_idx;
+//     int32_t hls_qf;
+
+// Block_CTX_LOOP:
+//     for (uint32_t by = 0; by < ysize_blocks; ++by) {
+//         for (uint32_t bx = 0; bx < xsize_blocks; ++bx) {
+//             for (int i = 0; i < 3; i++) {
+// #pragma HLS PIPELINE II = 1
+
+//                 if (i == 0) {
+//                     hls_strategy = strm_strategy_in.read();
+//                     strm_strategy_out.write(hls_strategy);
+
+//                     dc_idx = strm_qdc.read();
+//                     hls_qf = strm_qf.read();
+//                 }
+//                 int ord = hls_kStrategyOrder[hls_strategy];
+//                 int c = hls_dim_sanf_order(i);
+//                 uint32_t block_ctx = hls_Context(ctx_map, qf_thresholds, qf_thresholds_size, kNumOrders, num_dc_ctxs,
+//                                                  dc_idx, hls_qf, ord, c);
+//                 strm_block_ctx.write(block_ctx);
+//             }
+//         }
+//     }
+// }
+
+//===================================================================================//
+void hls_collect_syn(int xsize_blocks,
+                     int ysize_blocks,
+                     hls::stream<int32_t>& strm_strategy_in,
+                     hls::stream<ap_uint<65> >& strm_token_nz,
+                     hls::stream<ap_uint<65> >& strm_token_ac,
+                     hls::stream<ap_uint<64> >& strm_token_out,
+                     hls::stream<ap_uint<64> >& strm_token_internal) {
+#pragma HLS INLINE off
+    int hls_strategy;
+
+COLLECT_SYN_OUTTER_LOOP:
+    for (uint32_t by = 0; by < ysize_blocks; ++by) {
+        for (uint32_t bx = 0; bx < xsize_blocks; ++bx) {
+            for (int i = 0; i < 3; i++) {
+                int c = hls_dim_sanf_order(i);
+                if (i == 0) {
+                    hls_strategy = strm_strategy_in.read();
+                }
+                bool hls_isFirstBlock = hls_Is_FirstBlock(by, bx, hls_strategy);
+                // covered block size
+                uint32_t cx = covered_blocks_x[hls_strategy]; // lut
+                uint32_t cy = covered_blocks_y[hls_strategy];
+                uint32_t covered_blocks = cx * cy; // = #LLF coefficients
+                uint32_t size = covered_blocks * kDCTBlockSize;
+                // loop in block
+                if (hls_isFirstBlock) {
+                COLLECT_SYN_INNER_LOOP:
+                    for (int k = 0; k < size + 1; k++) {
+#pragma HLS PIPELINE II = 1
+                        if (k == 0) {
+                            ap_uint<65> token_nz_reg = strm_token_nz.read();
+                            ap_uint<64> token_out_reg = token_nz_reg.range(63, 0);
+                            strm_token_out.write(token_out_reg);
+                            strm_token_internal.write(token_out_reg);
+                        } else {
+                            ap_uint<65> token_ac_reg = strm_token_ac.read();
+                            bool blk_end = token_ac_reg[64];
+
+                            if (blk_end) {
+                                break;
+                            } else {
+                                ap_uint<64> token_out_reg = token_ac_reg.range(63, 0);
+                                strm_token_out.write(token_out_reg);
+                                strm_token_internal.write(token_out_reg);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void hls_tokenize_nz(
+    // config
+    int rect_x0,
+    int rect_y0,
+    int ysize_blocks,
+    int xsize_blocks,
+    int num_ctxs,
+    int num_dc_ctxs,
+    int qf_thresholds_size,
+    // bram
+    uint8_t ctx_map[MAX_CTX_MAP_SIZE],
+    uint32_t qf_thresholds[MAX_QF_THRESH_SIZE],
+    // stream
+    hls::stream<int32_t>& strm_qf,
+    hls::stream<uint8_t>& strm_qdc,
+    hls::stream<int32_t>& strm_strategy_in,
+    hls::stream<int32_t>& strm_nzeros,
+    hls::stream<int32_t>& strm_predict_nzeros,
+    hls::stream<int32_t>& strm_strategy_out,
+    hls::stream<int32_t>& strm_strategy_out2,
+    hls::stream<uint32_t>& strm_histo_offset,
+    hls::stream<int32_t>& strm_nzero_out,
+    hls::stream<ap_uint<65> >& strm_token_nz) {
+#pragma HLS INLINE off
+
+    // global config
+    int hls_strategy;
+    uint8_t dc_idx;
+    int32_t hls_qf;
+
+TOKENIZE_NZ_LOOP:
+    for (uint32_t by = 0; by < ysize_blocks; ++by) {
+        for (uint32_t bx = 0; bx < xsize_blocks; ++bx) {
+            for (int i = 0; i < 3; i++) {
+#pragma HLS PIPELINE II = 1
+
+                // only read 1 strategy per block
+                if (i == 0) {
+                    // strategy
+                    hls_strategy = strm_strategy_in.read();
+                    strm_strategy_out.write(hls_strategy);
+                    strm_strategy_out2.write(hls_strategy);
+
+                    // qdc & qf
+                    dc_idx = strm_qdc.read();
+                    hls_qf = strm_qf.read();
+                }
+
+                bool hls_isFirstBlock = hls_Is_FirstBlock(by, bx, hls_strategy);
+
+                if (hls_isFirstBlock) {
+                    int32_t nzeros = strm_nzeros.read();
+
+                    strm_nzero_out.write(nzeros);
+
+                    int32_t predicted_nzeros = strm_predict_nzeros.read();
+
+                    //=================Move this block_ctx calculation into an independent process======
+                    int ord = hls_kStrategyOrder[hls_strategy];
+                    int c = hls_dim_sanf_order(i);
+                    uint32_t block_ctx = hls_Context(ctx_map, qf_thresholds, qf_thresholds_size, kNumOrders,
+                                                     num_dc_ctxs, dc_idx, hls_qf, ord, c);
+                    //==================================================================================
+
+                    int32_t nzero_ctx = hls_NonZeroContext(num_ctxs, predicted_nzeros, block_ctx);
+
+                    uint32_t histo_offset = hls_ZeroDensityContextsOffset(num_ctxs, block_ctx);
+                    strm_histo_offset.write(histo_offset);
+
+                    ap_uint<65> token_nz_reg;
+                    token_nz_reg.range(31, 0) = (uint32_t)nzeros;
+                    token_nz_reg.range(63, 32) = (uint32_t)nzero_ctx;
+                    token_nz_reg[64] = 0;
+                    strm_token_nz.write(token_nz_reg);
+                }
+            }
+        }
+    }
+}
+
+void hls_tokenize_ac(int xsize_blocks,
+                     int ysize_blocks,
+                     hls::stream<int32_t>& strm_coeff_ordered,
+                     hls::stream<int32_t>& strm_strategy_in,
+                     hls::stream<uint32_t>& strm_histo_offset,
+                     hls::stream<int32_t>& strm_nzeros_tokenAc,
+                     hls::stream<ap_uint<65> >& strm_token_ac) {
+#pragma HLS INLINE off
+    // global variable
+    int hls_block_offset = 0;
+    ap_uint<64> token_reg;
+    ap_uint<64> token_reg_out;
+    uint32_t offset[3] = {};
+    int hls_strategy;
+
+TOKENIZE_AC_OUTTER_LOOP:
+    for (uint32_t by = 0; by < ysize_blocks; ++by) {
+        for (uint32_t bx = 0; bx < xsize_blocks; ++bx) {
+            for (int i = 0; i < 3; i++) {
+                if (i == 0) {
+                    hls_strategy = strm_strategy_in.read();
+                }
+
+                int c = hls_dim_sanf_order(i);
+
+                bool hls_isFirstBlock = hls_Is_FirstBlock(by, bx, hls_strategy);
+
+                if (hls_isFirstBlock) {
+                    uint32_t cx = covered_blocks_x[hls_strategy]; // lut
+                    uint32_t cy = covered_blocks_y[hls_strategy];
+                    const uint32_t covered_blocks = cx * cy; // = #LLF coefficients
+                    uint32_t log2_covered_blocks = hls_covered_block_lut(covered_blocks);
+                    uint32_t size = covered_blocks * kDCTBlockSize;
+                    uint32_t histo_offset = strm_histo_offset.read();
+                    int32_t nzeros = strm_nzeros_tokenAc.read();
+
+                    // Skip LLF.
+                    int32_t prev = (nzeros > (int32_t)(size / 16) ? 0 : 1);
+                TOKENIZE_AC_INNER_LOOP:
+                    for (int32_t k = 0; k < size; ++k) {
+#pragma HLS PIPELINE II = 1
+                        int32_t coeff = strm_coeff_ordered.read();
+                        if (k >= covered_blocks) {
+                            uint32_t ctx = histo_offset +
+                                           hls_ZeroDensityContext(nzeros, k, covered_blocks, log2_covered_blocks, prev);
+
+                            uint32_t u_coeff = hls_PackSigned(coeff);
+
+                            if (nzeros > 0) {
+                                ap_uint<65> token_ac_reg;
+                                token_ac_reg.range(31, 0) = (uint32_t)u_coeff;
+                                token_ac_reg.range(63, 32) = (uint32_t)ctx;
+                                token_ac_reg[64] = 0; // block_end
+                                strm_token_ac.write(token_ac_reg);
+
+                                prev = coeff != 0;
+                                nzeros -= prev;
+                            }
+                        }
+                    }
+
+                    // end of a block
+                    ap_uint<65> token_ac_reg;
+                    token_ac_reg.range(63, 0) = 0;
+                    token_ac_reg[64] = 1; // block_end
+                    strm_token_ac.write(token_ac_reg);
+
+                    // offset
+                    offset[c] += size;
+                }
+            }
+        }
+    }
+}
+
+void hls_ac_tokenize_core(int rect_x0,
+                          int rect_y0,
+                          int xsize_blocks,
+                          int ysize_blocks,
+                          int num_ctxs,
+                          int num_dc_ctxs,
+                          int qf_thresholds_size,
+                          int nzeros_stride,
+                          uint8_t ctx_map[MAX_QF_THRESH_SIZE],
+                          uint32_t qf_thresholds[MAX_CTX_MAP_SIZE],
+                          hls::stream<int32_t>& strm_ac_coeff,
+                          hls::stream<int32_t>& strm_strategy,
+                          hls::stream<int32_t>& strm_qf,
+                          hls::stream<uint8_t>& strm_qdc,
+                          hls::stream<ap_uint<64> >& strm_token_internal,
+                          hls::stream<ap_uint<64> >& strm_token_out) {
+#pragma HLS DATAFLOW
+    hls::stream<int32_t, 1024> strm_ac_coeff_nz;
+    hls::stream<int32_t, 16> strm_nzeros;
+    hls::stream<int32_t, 16> strm_predict_nzeros;
+    hls::stream<int32_t, 16> strm_strategy_0_1;
+    hls::stream<int32_t, 16> strm_ac_ordered_0;
+    hls_count_nz(
+        // config
+        ysize_blocks, xsize_blocks, nzeros_stride,
+        // stream_in
+        strm_ac_coeff, strm_strategy, strm_ac_coeff_nz,
+        // stream_out
+        strm_strategy_0_1, strm_nzeros, strm_predict_nzeros);
+
+    // hls::stream<int32_t, 16> strm_block_ctx;
+    // hls_block_context(rect_x0, rect_y0, ysize_blocks, xsize_blocks, num_ctxs, num_dc_ctxs, qf_thresholds_size,
+    // ctx_map,
+    //                   qf_thresholds, strm_qf, strm_qdc, strm_strategy_in, strm_strategy_out, strm_block_ctx);
+
+    hls::stream<int32_t, 16> strm_strategy_1;
+    hls::stream<int32_t, 16> strm_strategy_1_2;
+    hls::stream<ap_uint<65>, 16> strm_token_nz;
+    hls::stream<uint32_t, 16> strm_histo_offset;
+    hls::stream<int32_t, 16> strm_nzeros2;
+    hls_tokenize_nz(
+        // config
+        rect_x0, rect_y0, ysize_blocks, xsize_blocks, num_ctxs, num_dc_ctxs, qf_thresholds_size, ctx_map, qf_thresholds,
+        // stream_in
+        strm_qf, strm_qdc, strm_strategy_0_1, strm_nzeros, strm_predict_nzeros,
+        // stream_out
+        strm_strategy_1, strm_strategy_1_2, strm_histo_offset, strm_nzeros2, strm_token_nz);
+
+    hls::stream<ap_uint<65>, 16> strm_token_ac;
+    hls_tokenize_ac(
+        // config
+        xsize_blocks, ysize_blocks,
+        // stream_in
+        strm_ac_coeff_nz, strm_strategy_1_2, strm_histo_offset, strm_nzeros2,
+        // sgream_out
+        strm_token_ac);
+
+    hls_collect_syn(
+        // config
+        xsize_blocks, ysize_blocks,
+        // stream_in
+        strm_strategy_1, strm_token_nz, strm_token_ac,
+        // stream_out
+        strm_token_out, strm_token_internal);
+}
+
+void load_ac_raster_by_group(hls::stream<int>& strm_config,
+                             int32_t* ac_coeff_ddr,
+                             hls::stream<int32_t>& strm_ac_coeff_raster) {
+#pragma HLS INLINE off
+
+    int group_dim = strm_config.read();
+    int pixel_xsize = strm_config.read();
+    int pixel_ysize = strm_config.read();
+
+    int xsize_groups = (pixel_xsize + group_dim - 1) / group_dim;
+    int ysize_groups = (pixel_ysize + group_dim - 1) / group_dim;
+    int num_groups = xsize_groups * ysize_groups;
+
+    uint64_t group_offset = 0;
+
+LOAD_AC_RASTER_OUTTER_LOOP:
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        // paras-calculated
+        int gx = group_index % xsize_groups;
+        int gy = group_index / xsize_groups;
+        int hls_x0 = gx * (group_dim >> 3);
+        int hls_y0 = gy * (group_dim >> 3);
+        // rect xsize_blocks& ysize_blocks
+        int size_max = group_dim >> 3;
+        int hls_xsize_blocks = (pixel_xsize + kBlockDim - 1) / kBlockDim;
+        int hls_ysize_blocks = (pixel_ysize + kBlockDim - 1) / kBlockDim;
+        int rect_xsize_blocks = (hls_x0 + size_max <= hls_xsize_blocks)
+                                    ? size_max
+                                    : (hls_xsize_blocks > hls_x0 ? hls_xsize_blocks - hls_x0 : 0);
+        int rect_ysize_blocks = (hls_y0 + size_max <= hls_ysize_blocks)
+                                    ? size_max
+                                    : (hls_ysize_blocks > hls_y0 ? hls_ysize_blocks - hls_y0 : 0);
+        // calculate core-config
+        int rect_x0 = hls_x0;
+        int rect_y0 = hls_y0;
+        int xsize_blocks = rect_xsize_blocks;
+        int ysize_blocks = rect_ysize_blocks;
+
+    // loading ac_coeff by group
+    LOAD_AC_RASTER_INNER_LOOP:
+        for (int k = 0; k < xsize_blocks * ysize_blocks * kDCTBlockSize * 3; k++) {
+#pragma HLS PIPELINE II = 1
+            int32_t ac_coef_reg = ac_coeff_ddr[k + group_offset];
+            strm_ac_coeff_raster.write(ac_coef_reg);
+        }
+
+        // move to next group set
+        group_offset += ysize_blocks * xsize_blocks * kDCTBlockSize * 3;
+    }
+}
+
+void load_ac_ordered_by_group(hls::stream<int>& strm_config,
+                              int32_t* ac_coeff_ordered_ddr,
+                              hls::stream<int32_t>& strm_ac_coeff0) {
+#pragma HLS INLINE off
+
+    int group_dim = strm_config.read();
+    int pixel_xsize = strm_config.read();
+    int pixel_ysize = strm_config.read();
+
+    int xsize_groups = (pixel_xsize + group_dim - 1) / group_dim;
+    int ysize_groups = (pixel_ysize + group_dim - 1) / group_dim;
+    int num_groups = xsize_groups * ysize_groups;
+
+    uint64_t group_offset = 0;
+
+LOAD_AC_ORDERED_OUTTER_LOOP:
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        // paras-calculated
+        int gx = group_index % xsize_groups;
+        int gy = group_index / xsize_groups;
+        int hls_x0 = gx * (group_dim >> 3);
+        int hls_y0 = gy * (group_dim >> 3);
+        // rect xsize_blocks& ysize_blocks
+        int size_max = group_dim >> 3;
+        int hls_xsize_blocks = (pixel_xsize + kBlockDim - 1) / kBlockDim;
+        int hls_ysize_blocks = (pixel_ysize + kBlockDim - 1) / kBlockDim;
+        int rect_xsize_blocks = (hls_x0 + size_max <= hls_xsize_blocks)
+                                    ? size_max
+                                    : (hls_xsize_blocks > hls_x0 ? hls_xsize_blocks - hls_x0 : 0);
+        int rect_ysize_blocks = (hls_y0 + size_max <= hls_ysize_blocks)
+                                    ? size_max
+                                    : (hls_ysize_blocks > hls_y0 ? hls_ysize_blocks - hls_y0 : 0);
+        // calculate core-config
+        int rect_x0 = hls_x0;
+        int rect_y0 = hls_y0;
+        int xsize_blocks = rect_xsize_blocks;
+        int ysize_blocks = rect_ysize_blocks;
+
+    // loading ac_coeff by group
+    LOAD_AC_ORDERED_INNER_LOOP:
+        for (int k = 0; k < xsize_blocks * ysize_blocks * kDCTBlockSize * 3; k++) {
+#pragma HLS PIPELINE II = 1
+            int32_t ac_coef_ordered_reg = ac_coeff_ordered_ddr[k + group_offset];
+            strm_ac_coeff0.write(ac_coef_ordered_reg);
+        }
+
+        // move to next group set
+        group_offset += ysize_blocks * xsize_blocks * kDCTBlockSize * 3;
+    }
+}
+
+void load_ac_strategy_by_group(hls::stream<int>& strm_config,
+                               int32_t* strategy_ddr,
+                               hls::stream<int32_t>& strm_strategy) {
+#pragma HLS INLINE off
+
+    // local calculated
+    int group_dim = strm_config.read();
+    int pixel_xsize = strm_config.read();
+    int pixel_ysize = strm_config.read();
+
+    // pre-process
+    int xsize_groups = (pixel_xsize + group_dim - 1) / group_dim;
+    int ysize_groups = (pixel_ysize + group_dim - 1) / group_dim;
+    int num_groups = xsize_groups * ysize_groups;
+
+    uint64_t group_offset = 0;
+
+LOAD_AC_STRATEGY_OUTTER_LOOP:
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        // paras-calculated
+        int gx = group_index % xsize_groups;
+        int gy = group_index / xsize_groups;
+        int hls_x0 = gx * (group_dim >> 3);
+        int hls_y0 = gy * (group_dim >> 3);
+        // rect xsize_blocks& ysize_blocks
+        int size_max = group_dim >> 3;
+        int hls_xsize_blocks = (pixel_xsize + kBlockDim - 1) / kBlockDim;
+        int hls_ysize_blocks = (pixel_ysize + kBlockDim - 1) / kBlockDim;
+        int rect_xsize_blocks = (hls_x0 + size_max <= hls_xsize_blocks)
+                                    ? size_max
+                                    : (hls_xsize_blocks > hls_x0 ? hls_xsize_blocks - hls_x0 : 0);
+        int rect_ysize_blocks = (hls_y0 + size_max <= hls_ysize_blocks)
+                                    ? size_max
+                                    : (hls_ysize_blocks > hls_y0 ? hls_ysize_blocks - hls_y0 : 0);
+        // calculate core-config
+        int xsize_blocks = rect_xsize_blocks;
+        int ysize_blocks = rect_ysize_blocks;
+
+    // loading strategy by group
+    LOAD_AC_STRATEGY_INNER_LOOP:
+        for (int k = 0; k < xsize_blocks * ysize_blocks; k++) {
+#pragma HLS PIPELINE II = 1
+            int32_t strategy = strategy_ddr[k + group_offset];
+            strm_strategy.write(strategy);
+        }
+
+        // move to next group set
+        group_offset = group_offset + xsize_blocks * ysize_blocks;
+    }
+}
+
+void load_qdc_by_group(hls::stream<int>& strm_config, uint8_t* qdc_ddr, hls::stream<uint8_t>& strm_qdc) {
+#pragma HLS INLINE off
+
+    int group_dim = strm_config.read();
+    int pixel_xsize = strm_config.read();
+    int pixel_ysize = strm_config.read();
+
+    int xsize_groups = (pixel_xsize + group_dim - 1) / group_dim;
+    int ysize_groups = (pixel_ysize + group_dim - 1) / group_dim;
+    int num_groups = xsize_groups * ysize_groups;
+
+    uint64_t group_offset = 0;
+LOAD_QDC_OUTTER_LOOP:
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        // paras-calculated
+        int gx = group_index % xsize_groups;
+        int gy = group_index / xsize_groups;
+        int hls_x0 = gx * (group_dim >> 3);
+        int hls_y0 = gy * (group_dim >> 3);
+        // rect xsize_blocks& ysize_blocks
+        int size_max = group_dim >> 3;
+        int hls_xsize_blocks = (pixel_xsize + kBlockDim - 1) / kBlockDim;
+        int hls_ysize_blocks = (pixel_ysize + kBlockDim - 1) / kBlockDim;
+        int rect_xsize_blocks = (hls_x0 + size_max <= hls_xsize_blocks)
+                                    ? size_max
+                                    : (hls_xsize_blocks > hls_x0 ? hls_xsize_blocks - hls_x0 : 0);
+        int rect_ysize_blocks = (hls_y0 + size_max <= hls_ysize_blocks)
+                                    ? size_max
+                                    : (hls_ysize_blocks > hls_y0 ? hls_ysize_blocks - hls_y0 : 0);
+        // calculate core-config
+        int xsize_blocks = rect_xsize_blocks;
+        int ysize_blocks = rect_ysize_blocks;
+
+    LOAD_QDC_INNER_LOOP:
+        for (int by = 0; by < ysize_blocks; by++) {
+            for (int bx = 0; bx < xsize_blocks; bx++) {
+#pragma HLS PIPELINE II = 1
+                int32_t dc_idx = qdc_ddr[bx + by * xsize_blocks + group_offset];
+
+                strm_qdc.write(dc_idx);
+            }
+        }
+
+        group_offset += ysize_blocks * xsize_blocks;
+    }
+}
+
+void load_qf_by_group(hls::stream<int>& strm_config, int32_t* qf_ddr, hls::stream<int32_t>& strm_qf) {
+#pragma HLS INLINE off
+
+    int group_dim = strm_config.read();
+    int pixel_xsize = strm_config.read();
+    int pixel_ysize = strm_config.read();
+
+    int xsize_groups = (pixel_xsize + group_dim - 1) / group_dim;
+    int ysize_groups = (pixel_ysize + group_dim - 1) / group_dim;
+    int num_groups = xsize_groups * ysize_groups;
+
+    uint64_t group_offset = 0;
+LOAD_QF_OUTTER_LOOP:
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        // paras-calculated
+        int gx = group_index % xsize_groups;
+        int gy = group_index / xsize_groups;
+        int hls_x0 = gx * (group_dim >> 3);
+        int hls_y0 = gy * (group_dim >> 3);
+        // rect xsize_blocks& ysize_blocks
+        int size_max = group_dim >> 3;
+        int hls_xsize_blocks = (pixel_xsize + kBlockDim - 1) / kBlockDim;
+        int hls_ysize_blocks = (pixel_ysize + kBlockDim - 1) / kBlockDim;
+        int rect_xsize_blocks = (hls_x0 + size_max <= hls_xsize_blocks)
+                                    ? size_max
+                                    : (hls_xsize_blocks > hls_x0 ? hls_xsize_blocks - hls_x0 : 0);
+        int rect_ysize_blocks = (hls_y0 + size_max <= hls_ysize_blocks)
+                                    ? size_max
+                                    : (hls_ysize_blocks > hls_y0 ? hls_ysize_blocks - hls_y0 : 0);
+        // calculate core-config
+        int xsize_blocks = rect_xsize_blocks;
+        int ysize_blocks = rect_ysize_blocks;
+
+    LOAD_QF_INNER_LOOP:
+        for (int by = 0; by < ysize_blocks; by++) {
+            for (int bx = 0; bx < xsize_blocks; bx++) {
+#pragma HLS PIPELINE II = 1
+                uint8_t hls_qf = qf_ddr[bx + by * xsize_blocks + group_offset];
+                strm_qf.write(hls_qf);
+            }
+        }
+
+        group_offset += ysize_blocks * xsize_blocks;
+    }
+}
+
+void ac_token_writeout(uint64_t* ac_tokens_ddr, hls::stream<ap_uint<64> >& strm_token_out) {
+#pragma HLS INLINE off
+
+    bool token_stream_end = 0;
+    uint64_t idx_token = 0;
+
+AC_TOKEN_WRITEOUT_LOOP:
+    while (!token_stream_end) {
+#pragma HLS PIPELINE II = 1
+        ap_uint<64> token_reg = strm_token_out.read();
+        ac_tokens_ddr[idx_token] = token_reg;
+
+        token_stream_end = token_reg[63];
+        idx_token++;
+    }
+}
+
+void hls_TokenizeCoefficients(
+    // bram
+    uint8_t hls_ctx_map[MAX_CTX_MAP_SIZE],
+    uint32_t hls_qf_thresholds[MAX_CTX_MAP_SIZE],
+    // strm input
+    hls::stream<int32_t>& strm_global_config,
+    // size of pixel
+    hls::stream<int32_t>& strm_ac_coeff_ordered,
+    // size of blk_num
+    hls::stream<int32_t>& strm_strategy,
+    hls::stream<int32_t>& strm_qf,
+    hls::stream<uint8_t>& strm_qdc,
+    // size of bram
+    hls::stream<ap_uint<64> >& strm_token_internal,
+    // output
+    hls::stream<ap_uint<64> >& strm_token_out
+
+    ) {
+#pragma HLS INLINE off
+
+    // global config
+    int group_dim = strm_global_config.read();
+    int pixel_xsize = strm_global_config.read();
+    int pixel_ysize = strm_global_config.read();
+    int qf_thresholds_size = strm_global_config.read();
+    int num_ctxs = strm_global_config.read();
+    int num_dc_ctxs = strm_global_config.read();
+    int nzeros_stride = strm_global_config.read();
+
+    int xsize_groups = (pixel_xsize + group_dim - 1) / group_dim;
+    int ysize_groups = (pixel_ysize + group_dim - 1) / group_dim;
+    int num_groups = xsize_groups * ysize_groups;
+
+    // global variable
+    ap_uint<64> token_reg_out;
+
+TOKENIZE_COEFF_LOOP:
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        // paras-calculated
+        int gx = group_index % xsize_groups;
+        int gy = group_index / xsize_groups;
+        int hls_x0 = gx * (group_dim >> 3);
+        int hls_y0 = gy * (group_dim >> 3);
+        int size_max = group_dim >> 3;
+        int hls_xsize_blocks = (pixel_xsize + kBlockDim - 1) / kBlockDim;
+        int hls_ysize_blocks = (pixel_ysize + kBlockDim - 1) / kBlockDim;
+        int xsize_blocks = (hls_x0 + size_max <= hls_xsize_blocks)
+                               ? size_max
+                               : (hls_xsize_blocks > hls_x0 ? hls_xsize_blocks - hls_x0 : 0);
+        int ysize_blocks = (hls_y0 + size_max <= hls_ysize_blocks)
+                               ? size_max
+                               : (hls_ysize_blocks > hls_y0 ? hls_ysize_blocks - hls_y0 : 0);
+
+        hls_ac_tokenize_core(hls_x0, hls_y0, xsize_blocks, ysize_blocks, num_ctxs, num_dc_ctxs, qf_thresholds_size,
+                             nzeros_stride, hls_ctx_map, hls_qf_thresholds, strm_ac_coeff_ordered, strm_strategy,
+                             strm_qf, strm_qdc, strm_token_internal, strm_token_out);
+
+        // post-process
+        token_reg_out(61, 0) = 0;
+        token_reg_out[62] = 1;
+        token_reg_out[63] = 0;
+        strm_token_out.write(token_reg_out);
+    }
+
+    token_reg_out[62] = 0;
+    token_reg_out[63] = 1;
+    strm_token_out.write(token_reg_out);
+
+    ap_uint<64> token_reg;
+    token_reg(62, 0) = 0;
+    token_reg[63] = 1;
+    strm_token_internal.write(token_reg);
+}
+
+//=====================================================================================================//
+// hls_init_histogram.cpp
+//=====================================================================================================//
+void hls_largest_And_empty_write_out(int config[32],
+                                     hls::stream<uint32_t>& strm_nempty_cnt,
+                                     hls::stream<uint32_t>& strm_largest_idx
+
+                                     ) {
+WRITE_LARGEST_IDX_LOOP:
+    for (int i = 17; i < 22; i++) {
+#pragma HLS PIPELINE II = 1
+        uint32_t largest_reg = strm_largest_idx.read();
+        config[i] = largest_reg;
+    }
+
+WRITE_NEMPTY_CNT_LOOP:
+    for (int i = 22; i < 27; i++) {
+#pragma HLS PIPELINE II = 1
+        uint32_t nempty_cnt = strm_nempty_cnt.read();
+        config[i] = nempty_cnt;
+    }
+}
+
+void load_config(int config[32],
+                 hls::stream<int>& strm_config_2,
+                 hls::stream<int>& strm_config_3,
+                 hls::stream<int>& strm_config_4,
+                 hls::stream<int>& strm_config_5,
+                 hls::stream<int32_t>& strm_global_config,
+                 hls::stream<int>& strm_do_once,
+                 hls::stream<int>& strm_do_once_0,
+                 hls::stream<int>& strm_do_once_1,
+                 hls::stream<int>& strm_do_once_2,
+                 hls::stream<int>& strm_do_once_3) {
+    strm_config_2.write(config[4]);
+    strm_config_2.write(config[5]);
+    strm_config_2.write(config[6]);
+
+    strm_config_3.write(config[4]);
+    strm_config_3.write(config[5]);
+    strm_config_3.write(config[6]);
+
+    strm_config_4.write(config[4]);
+    strm_config_4.write(config[5]);
+    strm_config_4.write(config[6]);
+
+    strm_config_5.write(config[4]);
+    strm_config_5.write(config[5]);
+    strm_config_5.write(config[6]);
+
+    strm_global_config.write(config[4]);
+    strm_global_config.write(config[5]);
+    strm_global_config.write(config[6]);
+    strm_global_config.write(config[9]);
+    strm_global_config.write(config[7]);
+    strm_global_config.write(config[8]);
+    strm_global_config.write(config[10]);
+
+    strm_do_once_0.write(config[12]);
+    strm_do_once_1.write(config[13]);
+    strm_do_once_2.write(config[14]);
+    strm_do_once_3.write(config[15]);
+
+    strm_do_once.write(config[12]);
+    strm_do_once.write(config[13]);
+    strm_do_once.write(config[14]);
+    strm_do_once.write(config[15]);
+    strm_do_once.write(config[16]);
+}
+
+void load_bram(
+    // host config
+    int config[32],
+    uint8_t ctx_map[MAX_QF_THRESH_SIZE],
+    uint32_t qf_thresholds[MAX_CTX_MAP_SIZE],
+    uint8_t hls_ctx_map[MAX_CTX_MAP_SIZE],
+    uint32_t hls_qf_thresholds[MAX_QF_THRESH_SIZE]) {
+    // load size config
+    int ctx_map_size = config[11];
+    int qf_threshold_size = config[9];
+
+// loading into bram
+LOAD_CTX_MAP_LOOP:
+    for (int i = 0; i < ctx_map_size; i++) {
+#pragma HLS PIPELINE II = 1
+        hls_ctx_map[i] = ctx_map[i];
+    }
+
+LOAD_QF_THRESHOLDS_LOOP:
+    for (int i = 0; i < qf_threshold_size; i++) {
+#pragma HLS PIPELINE II = 1
+        hls_qf_thresholds[i] = qf_thresholds[i];
+    }
+}
+
+void hls_ANSinitHistogram_core(hls::stream<int32_t>& strm_global_config,
+                               hls::stream<int32_t>& strm_config_2,
+                               hls::stream<int32_t>& strm_config_3,
+                               hls::stream<int32_t>& strm_config_4,
+                               hls::stream<int32_t>& strm_config_5,
+                               hls::stream<int32_t>& strm_do_once_0,
+                               hls::stream<int32_t>& strm_do_once_1,
+                               hls::stream<int32_t>& strm_do_once_2,
+                               hls::stream<int32_t>& strm_do_once_3,
+                               hls::stream<int32_t>& strm_do_once,
+
+                               uint8_t hls_ctx_map[MAX_CTX_MAP_SIZE],
+                               uint32_t hls_qf_thresholds[MAX_QF_THRESH_SIZE],
+                               // ac_coef_ordered_ddr
+                               int32_t ac_coeff_ordered_ddr[ALL_PIXEL],
+                               // ac_strategy ddr
+                               int32_t strategy_ddr[MAX_NUM_BLK88],
+                               // qf ddr
+                               int32_t qf_ddr[MAX_NUM_BLK88],
+                               // qdc ddr
+                               uint8_t qdc_ddr[MAX_NUM_BLK88],
+                               // ctx_map ddr
+                               uint8_t ctx_map[MAX_QF_THRESH_SIZE], //
+                               // quant field threshold
+                               uint32_t qf_thresholds[MAX_CTX_MAP_SIZE], //
+                               // ac_token_output
+                               uint64_t ac_tokens_ddr[MAX_AC_TOKEN_SIZE],
+
+                               ap_uint<64>* tokens0_ptr,
+                               ap_uint<64>* tokens1_ptr,
+                               ap_uint<64>* tokens2_ptr,
+                               ap_uint<64>* tokens3_ptr,
+                               hls::stream<uint32_t>& strm_nempty_cnt,
+                               hls::stream<uint32_t>& strm_largest_idx,
+
+                               int32_t* histograms0_ptr,
+                               uint32_t* histograms_size0_ptr,
+                               uint32_t* total_count0_ptr,
+                               uint32_t* nonempty0_ptr,
+
+                               int32_t* histograms1_ptr,
+                               uint32_t* histograms_size1_ptr,
+                               uint32_t* total_count1_ptr,
+                               uint32_t* nonempty1_ptr,
+
+                               int32_t* histograms2_ptr,
+                               uint32_t* histograms_size2_ptr,
+                               uint32_t* total_count2_ptr,
+                               uint32_t* nonempty2_ptr,
+
+                               int32_t* histograms3_ptr,
+                               uint32_t* histograms_size3_ptr,
+                               uint32_t* total_count3_ptr,
+                               uint32_t* nonempty3_ptr,
+
+                               int32_t* histograms4_ptr,
+                               uint32_t* histograms_size4_ptr,
+                               uint32_t* total_count4_ptr,
+                               uint32_t* nonempty4_ptr) {
+#pragma HLS DATAFLOW
+
+    //================================== core ==========================================//
+    hls::stream<int32_t, 40960> strm_ac_coeff0;
+    hls::stream<int32_t, 16> strm_ac_coeff1;
+    hls::stream<int32_t, 16> strm_strategy;
+    hls::stream<int32_t, 16> strm_qf;
+    hls::stream<uint8_t, 16> strm_qdc;
+    hls::stream<ap_uint<64>, 16> token_stream0;
+    hls::stream<ap_uint<64>, 16> token_stream1;
+    hls::stream<ap_uint<64>, 16> token_stream2;
+    hls::stream<ap_uint<64>, 16> token_stream3;
+    load_token(strm_do_once_0, tokens0_ptr, token_stream0);
+    load_token(strm_do_once_1, tokens1_ptr, token_stream1);
+    load_token(strm_do_once_2, tokens2_ptr, token_stream2);
+    load_token(strm_do_once_3, tokens3_ptr, token_stream3);
+    load_ac_ordered_by_group(strm_config_2, ac_coeff_ordered_ddr, strm_ac_coeff0);
+    load_ac_strategy_by_group(strm_config_3, strategy_ddr, strm_strategy);
+    load_qf_by_group(strm_config_4, qf_ddr, strm_qf);
+    load_qdc_by_group(strm_config_5, qdc_ddr, strm_qdc);
+
+    hls::stream<ap_uint<64>, 16> token_stream_internal;
+    hls::stream<ap_uint<64>, 16> strm_token_out;
+    hls_TokenizeCoefficients(hls_ctx_map, hls_qf_thresholds, strm_global_config, strm_ac_coeff0, strm_strategy, strm_qf,
+                             strm_qdc, token_stream_internal, strm_token_out);
+
+    init_histogram_top(strm_do_once, token_stream0, token_stream1, token_stream2, token_stream3, token_stream_internal,
+                       strm_nempty_cnt, strm_largest_idx, histograms0_ptr, histograms_size0_ptr, total_count0_ptr,
+                       nonempty0_ptr,
+
+                       histograms1_ptr, histograms_size1_ptr, total_count1_ptr, nonempty1_ptr,
+
+                       histograms2_ptr, histograms_size2_ptr, total_count2_ptr, nonempty2_ptr,
+
+                       histograms3_ptr, histograms_size3_ptr, total_count3_ptr, nonempty3_ptr,
+
+                       histograms4_ptr, histograms_size4_ptr, total_count4_ptr, nonempty4_ptr);
+
+    ac_token_writeout(ac_tokens_ddr, strm_token_out);
+}
+
+namespace xf {
+namespace codec {
+
+/**
+* @brief JXL ANS init Histogram kernel
+*
+* @param config                    configuration for the kernel.
+* @param ac_coef_ordered_ddr       ac coefficients
+* @param strategy_ddr              ac strategy
+* @param qf_ddr                    quant field
+* @param qdc_ddr                   qdc
+* @param ctx_map                   ctx_map ddr
+* @param qf_thresholds             quantfield_thresholds
+* @param ac_tokens_ddr             the ouput of ac tokens
+* @param token0_ptr                tokens for Block Context Map
+* @param token1_ptr                tokens for Modular frame tree
+* @param token2_ptr                tokens for coef orders
+* @param token3_ptr                tokens for Modular frames
+* @param histograms0_ptr           histograms for Block Context Map.
+* @param histo_totalcnt0_ptr       Count of context for histograms for Block Context Map.
+* @param histo_size0_ptr           size for each context
+* @param nonempty_histo0_ptr       indicate which context is empty
+* @param histograms1_ptr           histograms for Modular frame tree.
+* @param histo_totalcnt1_ptr       Count of context for histograms for Modular frame tree.
+* @param histo_size1_ptr           size for each context
+* @param nonempty_histo1_ptr       indicate which context is empty
+* @param histograms2_ptr           histograms for code from Modular frame.
+* @param histo_totalcnt2_ptr       Count of context for histograms for Modular frame.
+* @param histo_size2_ptr           size for each context
+* @param nonempty_histo2_ptr       indicate which context is empty
+* @param histograms3_ptr           histograms for coef orders.
+* @param histo_totalcnt3_ptr       Count of context for histograms for coef orders.
+* @param histo_size3_ptr           size for each context
+* @param nonempty_histo3_ptr       indicate which context is empty
+* @param histograms4_ptr           histograms for ac coefficients.
+* @param histo_totalcnt4_ptr       Count of context for histograms for ac coefficients.
+* @param histo_size4_ptr           size for each context
+* @param nonempty_histo4_ptr       indicate which context is empty
+*/
+
+extern "C" void JxlEnc_ans_initHistogram(
+    // host config
+    int config[32],
+    // ac_coef_ordered_ddr
+    int32_t ac_coeff_ordered_ddr[ALL_PIXEL],
+    // ac_strategy ddr
+    int32_t strategy_ddr[MAX_NUM_BLK88],
+    // qf ddr
+    int32_t qf_ddr[MAX_NUM_BLK88],
+    // qdc ddr
+    uint8_t qdc_ddr[MAX_NUM_BLK88],
+    // ctx_map ddr
+    uint8_t ctx_map[MAX_QF_THRESH_SIZE],
+    // quant field threshold
+    uint32_t qf_thresholds[MAX_CTX_MAP_SIZE],
+    // ac_token_output
+    uint64_t ac_tokens_ddr[MAX_AC_TOKEN_SIZE],
+
+    ap_uint<64>* tokens0_ptr,
+    ap_uint<64>* tokens1_ptr,
+    ap_uint<64>* tokens2_ptr,
+    ap_uint<64>* tokens3_ptr,
+
+    int32_t* histograms0_ptr,
+    uint32_t* histograms_size0_ptr,
+    uint32_t* total_count0_ptr,
+    uint32_t* nonempty0_ptr,
+
+    int32_t* histograms1_ptr,
+    uint32_t* histograms_size1_ptr,
+    uint32_t* total_count1_ptr,
+    uint32_t* nonempty1_ptr,
+
+    int32_t* histograms2_ptr,
+    uint32_t* histograms_size2_ptr,
+    uint32_t* total_count2_ptr,
+    uint32_t* nonempty2_ptr,
+
+    int32_t* histograms3_ptr,
+    uint32_t* histograms_size3_ptr,
+    uint32_t* total_count3_ptr,
+    uint32_t* nonempty3_ptr,
+
+    int32_t* histograms4_ptr,
+    uint32_t* histograms_size4_ptr,
+    uint32_t* total_count4_ptr,
+    uint32_t* nonempty4_ptr) {
+// size of pixel
+#pragma HLS INTERFACE mode = m_axi bundle = mm2 latency = 32 offset = slave num_write_outstanding =           \
+    1 num_read_outstanding = 8 max_write_burst_length = 2 max_read_burst_length = 64 depth = ALL_PIXEL port = \
+        ac_coeff_ordered_ddr
+// size of num_blk
+#pragma HLS INTERFACE mode = m_axi bundle = mm3 latency = 32 offset = slave num_write_outstanding =               \
+    1 num_read_outstanding = 8 max_write_burst_length = 2 max_read_burst_length = 64 depth = MAX_NUM_BLK88 port = \
+        strategy_ddr
+#pragma HLS INTERFACE mode = m_axi bundle = mm4 latency = 32 offset = slave num_write_outstanding =               \
+    1 num_read_outstanding = 8 max_write_burst_length = 2 max_read_burst_length = 64 depth = MAX_NUM_BLK88 port = \
+        qf_ddr
+#pragma HLS INTERFACE mode = m_axi bundle = mm5 latency = 32 offset = slave num_write_outstanding =               \
+    1 num_read_outstanding = 8 max_write_burst_length = 2 max_read_burst_length = 64 depth = MAX_NUM_BLK88 port = \
+        qdc_ddr
+// size of bram
+#pragma HLS INTERFACE mode = m_axi bundle = mm6 latency = 32 offset = slave num_write_outstanding =                  \
+    1 num_read_outstanding = 8 max_write_burst_length = 2 max_read_burst_length = 16 depth = MAX_CTX_MAP_SIZE port = \
+        ctx_map
+#pragma HLS INTERFACE mode = m_axi bundle = mm6 latency = 32 offset = slave num_write_outstanding =                    \
+    1 num_read_outstanding = 8 max_write_burst_length = 2 max_read_burst_length = 16 depth = MAX_QF_THRESH_SIZE port = \
+        qf_thresholds
+// config
+#pragma HLS INTERFACE mode = m_axi bundle = mm7 latency = 32 offset = slave num_write_outstanding = \
+    1 num_read_outstanding = 8 max_write_burst_length = 2 max_read_burst_length = 16 depth = 32 port = config
+// output
+#pragma HLS INTERFACE mode = m_axi bundle = mm8 latency = 32 offset = slave num_write_outstanding =                   \
+    8 num_read_outstanding = 1 max_write_burst_length = 64 max_read_burst_length = 2 depth = MAX_AC_TOKEN_SIZE port = \
+        ac_tokens_ddr
+
+#pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = slave num_write_outstanding =                    \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_AC_TOKEN_SIZE port = \
+        tokens0_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = slave num_write_outstanding =                   \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_AC_TOKEN_SIZE port = \
+        tokens1_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = slave num_write_outstanding =                   \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_AC_TOKEN_SIZE port = \
+        tokens2_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = slave num_write_outstanding =                   \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = MAX_AC_TOKEN_SIZE port = \
+        tokens3_ptr
+
+#pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = slave num_write_outstanding =       \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        nonempty0_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = slave num_write_outstanding =       \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        nonempty1_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = slave num_write_outstanding =       \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        nonempty2_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = slave num_write_outstanding =       \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        nonempty3_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm9 latency = 32 offset = slave num_write_outstanding =       \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        nonempty4_ptr
+
+#pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = slave num_write_outstanding =        \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 163840 port = \
+        histograms0_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = slave num_write_outstanding =        \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 163840 port = \
+        histograms1_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = slave num_write_outstanding =        \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 163840 port = \
+        histograms2_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = slave num_write_outstanding =        \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 163840 port = \
+        histograms3_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm10 latency = 32 offset = slave num_write_outstanding =        \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 163840 port = \
+        histograms4_ptr
+
+#pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        histograms_size0_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        histograms_size1_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        histograms_size2_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        histograms_size3_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm11 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        histograms_size4_ptr
+
+#pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        total_count0_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        total_count1_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        total_count2_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        total_count3_ptr
+#pragma HLS INTERFACE mode = m_axi bundle = mm12 latency = 32 offset = slave num_write_outstanding =      \
+    8 num_read_outstanding = 8 max_write_burst_length = 64 max_read_burst_length = 64 depth = 4096 port = \
+        total_count4_ptr
+
+    //========================== top ==================================================//
+    hls::stream<int32_t, 8> strm_global_config;
+    hls::stream<int32_t, 8> strm_config_2;
+    hls::stream<int32_t, 8> strm_config_3;
+    hls::stream<int32_t, 8> strm_config_4;
+    hls::stream<int32_t, 8> strm_config_5;
+    hls::stream<int32_t, 2> strm_do_once_0("strm_do_once_0");
+    hls::stream<int32_t, 2> strm_do_once_1("strm_do_once_1");
+    hls::stream<int32_t, 2> strm_do_once_2("strm_do_once_2");
+    hls::stream<int32_t, 2> strm_do_once_3("strm_do_once_3");
+    hls::stream<int32_t, 8> strm_do_once("strm_do_once");
+    load_config(config, strm_config_2, strm_config_3, strm_config_4, strm_config_5, strm_global_config, strm_do_once,
+                strm_do_once_0, strm_do_once_1, strm_do_once_2, strm_do_once_3);
+
+    uint8_t hls_ctx_map[MAX_CTX_MAP_SIZE];
+#pragma HLS BIND_STORAGE impl = BRAM variable = hls_ctx_map type = ram_s2p
+    uint32_t hls_qf_thresholds[MAX_QF_THRESH_SIZE];
+#pragma HLS BIND_STORAGE impl = BRAM variable = hls_qf_thresholds type = ram_s2p
+    load_bram(config, ctx_map, qf_thresholds, hls_ctx_map, hls_qf_thresholds);
+
+    //=============================== core =====================================//
+    hls::stream<uint32_t, 8> strm_nempty_cnt;
+    hls::stream<uint32_t, 8> strm_largest_idx;
+    hls_ANSinitHistogram_core(
+        strm_global_config, strm_config_2, strm_config_3, strm_config_4, strm_config_5, strm_do_once_0, strm_do_once_1,
+        strm_do_once_2, strm_do_once_3, strm_do_once, hls_ctx_map, hls_qf_thresholds,
+
+        ac_coeff_ordered_ddr, strategy_ddr, qf_ddr, qdc_ddr, ctx_map, qf_thresholds, ac_tokens_ddr,
+
+        tokens0_ptr, tokens1_ptr, tokens2_ptr, tokens3_ptr, strm_nempty_cnt, strm_largest_idx,
+
+        histograms0_ptr, histograms_size0_ptr, total_count0_ptr, nonempty0_ptr,
+
+        histograms1_ptr, histograms_size1_ptr, total_count1_ptr, nonempty1_ptr,
+
+        histograms2_ptr, histograms_size2_ptr, total_count2_ptr, nonempty2_ptr,
+
+        histograms3_ptr, histograms_size3_ptr, total_count3_ptr, nonempty3_ptr,
+
+        histograms4_ptr, histograms_size4_ptr, total_count4_ptr, nonempty4_ptr);
+    //======================= larget_And_empty write out =========================//
+    hls_largest_And_empty_write_out(config, strm_nempty_cnt, strm_largest_idx);
+    //======================= End of All =========================================//
+}
+
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/demos/jxlEnc/acc_tokInit_histogram/postSysLink.tcl b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/postSysLink.tcl
new file mode 100644
index 0000000000..2dc2f67034
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/postSysLink.tcl
@@ -0,0 +1 @@
+set_property -dict [list CONFIG.ECC_EN {false} CONFIG.ECC_SCRUB_EN {false}] [get_bd_cells hmss_0]
diff --git a/codec/L2/demos/jxlEnc/acc_tokInit_histogram/utils.mk b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/utils.mk
new file mode 100644
index 0000000000..0ee80e90da
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/acc_tokInit_histogram/utils.mk
@@ -0,0 +1,270 @@
+#
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+
+#get suffix of kernel by PLATFORM
+VITIS_VER = $(shell v++ --version | grep 'v++' | sed 's/^[[:space:]]*//' | sed -e 's/^[*]* v++ v//g' | cut -d " " -f1)
+DEVICE_TYPE = $(shell platforminfo -p $(PLATFORM) | grep 'FPGA Family' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(DEVICE_TYPE), versal)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+LINK_TARGET_FMT := xsa
+else
+LINK_TARGET_FMT := xclbin
+endif
+else
+LINK_TARGET_FMT := xclbin
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+ifeq ($(HOST_ARCH), x86)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#Check OS and setting env for xrt c++ api
+OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
+OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
+
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/demos/jxlEnc/images/small32x32.png b/codec/L2/demos/jxlEnc/images/small32x32.png
new file mode 100644
index 0000000000..e50f46a988
Binary files /dev/null and b/codec/L2/demos/jxlEnc/images/small32x32.png differ
diff --git a/codec/L2/demos/jxlEnc/images/t0.png b/codec/L2/demos/jxlEnc/images/t0.png
new file mode 100644
index 0000000000..294bbaae40
Binary files /dev/null and b/codec/L2/demos/jxlEnc/images/t0.png differ
diff --git a/codec/L2/demos/jxlEnc/images/t1.png b/codec/L2/demos/jxlEnc/images/t1.png
new file mode 100644
index 0000000000..3b0012f91f
Binary files /dev/null and b/codec/L2/demos/jxlEnc/images/t1.png differ
diff --git a/codec/L2/demos/jxlEnc/images/t2.png b/codec/L2/demos/jxlEnc/images/t2.png
new file mode 100644
index 0000000000..da8ecb130a
Binary files /dev/null and b/codec/L2/demos/jxlEnc/images/t2.png differ
diff --git a/codec/L2/demos/jxlEnc/others/include/acc_cluster_histogram.hpp b/codec/L2/demos/jxlEnc/others/include/acc_cluster_histogram.hpp
new file mode 100644
index 0000000000..41d50d123e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/acc_cluster_histogram.hpp
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_CLUSTER_HISTOGRAM_HPP
+#define ACC_CLUSTER_HISTOGRAM_HPP
+
+#include "acc_phase3.hpp"
+
+namespace jxl {
+void acc_ANSclusterHistogram(bool is_small_image,
+                             bool do_once[5],
+                             char* do_inner,
+                             char* do_prefix_in,
+
+                             std::vector<HistogramParams>& params,
+
+                             std::vector<std::vector<Histogram> >& histograms_,
+                             std::vector<size_t>& num_contexts,
+                             std::vector<std::vector<uint8_t>*> context_map,
+                             std::vector<std::vector<uint32_t> >& nonempty_histograms,
+                             std::vector<uint32_t>& largest_idx,
+
+                             std::vector<EntropyEncodingData*> codes,
+                             std::vector<std::vector<Histogram> >& clustered_histograms,
+                             std::vector<std::vector<uint32_t> >& histogram_symbols,
+
+                             std::vector<BitWriter*> writer,
+                             std::vector<size_t> layer,
+                             std::vector<std::vector<Histogram> >& clustered_histogramsin,
+                             std::vector<std::vector<std::vector<Token> > >& tokensin,
+                             std::vector<EntropyEncodingData>& codesin,
+                             std::vector<std::vector<uint8_t> >& context_map_in);
+
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/acc_common.hpp b/codec/L2/demos/jxlEnc/others/include/acc_common.hpp
new file mode 100644
index 0000000000..6fc619cc31
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/acc_common.hpp
@@ -0,0 +1,839 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_COMMON_HPP
+#define ACC_COMMON_HPP
+
+#include "xlnx_cfg.h"
+
+#include <ap_int.h>
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+#include "acc_enc_ac_strategy.hpp"
+#include "acc_enc_chroma_from_luma.hpp"
+#include "acc_enc_group.hpp"
+
+namespace jxl {
+namespace {
+void ClusterGroups(PassesEncoderState* enc_state) {
+    if (enc_state->shared.frame_header.passes.num_passes > 1) {
+        // TODO(veluca): implement this for progressive modes.
+        return;
+    }
+    // This only considers pass 0 for now.
+    std::vector<uint8_t> context_map;
+    EntropyEncodingData codes;
+    auto& ac = enc_state->passes[0].ac_tokens;
+    size_t limit = std::ceil(std::sqrt(ac.size()));
+    if (limit == 1) return;
+    size_t num_contexts = enc_state->shared.block_ctx_map.NumACContexts();
+    std::vector<float> costs(ac.size());
+    HistogramParams params;
+    params.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params.lz77_method = HistogramParams::LZ77Method::kNone;
+    params.ans_histogram_strategy = HistogramParams::ANSHistogramStrategy::kApproximate;
+    size_t max = 0;
+    auto token_cost = [&](std::vector<std::vector<Token> >& tokens, size_t num_ctx, bool estimate = true) {
+        // TODO(veluca): not estimating is very expensive.
+        BitWriter writer;
+        size_t c = BuildAndEncodeHistograms(params, num_ctx, tokens, &codes, &context_map, estimate ? nullptr : &writer,
+                                            0, /*aux_out=*/0);
+        if (estimate) return c;
+        for (size_t i = 0; i < tokens.size(); i++) {
+            WriteTokens(tokens[i], codes, context_map, &writer, 0, nullptr);
+        }
+        return writer.BitsWritten();
+    };
+    for (size_t i = 0; i < ac.size(); i++) {
+        std::vector<std::vector<Token> > tokens{ac[i]};
+        costs[i] = token_cost(tokens, enc_state->shared.block_ctx_map.NumACContexts());
+        if (costs[i] > costs[max]) {
+            max = i;
+        }
+    }
+    auto dist = [&](int i, int j) {
+        std::vector<std::vector<Token> > tokens{ac[i], ac[j]};
+        return token_cost(tokens, num_contexts) - costs[i] - costs[j];
+    };
+    std::vector<size_t> out{max};
+    std::vector<size_t> old_map(ac.size());
+    std::vector<float> dists(ac.size());
+    size_t farthest = 0;
+    for (size_t i = 0; i < ac.size(); i++) {
+        if (i == max) continue;
+        dists[i] = dist(max, i);
+        if (dists[i] > dists[farthest]) {
+            farthest = i;
+        }
+    }
+
+    while (dists[farthest] > 0 && out.size() < limit) {
+        out.push_back(farthest);
+        dists[farthest] = 0;
+        enc_state->histogram_idx[farthest] = out.size() - 1;
+        for (size_t i = 0; i < ac.size(); i++) {
+            float d = dist(out.back(), i);
+            if (d < dists[i]) {
+                dists[i] = d;
+                old_map[i] = enc_state->histogram_idx[i];
+                enc_state->histogram_idx[i] = out.size() - 1;
+            }
+            if (dists[i] > dists[farthest]) {
+                farthest = i;
+            }
+        }
+    }
+
+    std::vector<size_t> remap(out.size());
+    std::iota(remap.begin(), remap.end(), 0);
+    for (size_t i = 0; i < enc_state->histogram_idx.size(); i++) {
+        enc_state->histogram_idx[i] = remap[enc_state->histogram_idx[i]];
+    }
+    auto remap_cost = [&](std::vector<size_t> remap) {
+        std::vector<size_t> re_remap(remap.size(), remap.size());
+        size_t r = 0;
+        for (size_t i = 0; i < remap.size(); i++) {
+            if (re_remap[remap[i]] == remap.size()) {
+                re_remap[remap[i]] = r++;
+            }
+            remap[i] = re_remap[remap[i]];
+        }
+        auto tokens = ac;
+        size_t max_hist = 0;
+        for (size_t i = 0; i < tokens.size(); i++) {
+            for (size_t j = 0; j < tokens[i].size(); j++) {
+                size_t hist = remap[enc_state->histogram_idx[i]];
+                tokens[i][j].context += hist * num_contexts;
+                max_hist = std::max(hist + 1, max_hist);
+            }
+        }
+        return token_cost(tokens, max_hist * num_contexts, /*estimate=*/false);
+    };
+
+    for (size_t src = 0; src < out.size(); src++) {
+        float cost = remap_cost(remap);
+        size_t best = src;
+        for (size_t j = src + 1; j < out.size(); j++) {
+            if (remap[src] == remap[j]) continue;
+            auto remap_c = remap;
+            std::replace(remap_c.begin(), remap_c.end(), remap[src], remap[j]);
+            float c = remap_cost(remap_c);
+            if (c < cost) {
+                best = j;
+                cost = c;
+            }
+        }
+        if (src != best) {
+            std::replace(remap.begin(), remap.end(), remap[src], remap[best]);
+        }
+    }
+    std::vector<size_t> re_remap(remap.size(), remap.size());
+    size_t r = 0;
+    for (size_t i = 0; i < remap.size(); i++) {
+        if (re_remap[remap[i]] == remap.size()) {
+            re_remap[remap[i]] = r++;
+        }
+        remap[i] = re_remap[remap[i]];
+    }
+
+    enc_state->shared.num_histograms = *std::max_element(remap.begin(), remap.end()) + 1;
+    for (size_t i = 0; i < enc_state->histogram_idx.size(); i++) {
+        enc_state->histogram_idx[i] = remap[enc_state->histogram_idx[i]];
+    }
+    for (size_t i = 0; i < ac.size(); i++) {
+        for (size_t j = 0; j < ac[i].size(); j++) {
+            ac[i][j].context += enc_state->histogram_idx[i] * num_contexts;
+        }
+    }
+} // ClusterGroups
+
+void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
+    if (enc_state.cparams.decoding_speed_tier >= 1) {
+        static constexpr uint8_t kSimpleCtxMap[] = {
+            // Cluster all blocks together
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
+            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //
+            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //
+        };
+        static_assert(3 * kNumOrders == sizeof(kSimpleCtxMap) / sizeof *kSimpleCtxMap, "Update simple context map");
+
+        auto bcm = enc_state.shared.block_ctx_map;
+        bcm.ctx_map.assign(std::begin(kSimpleCtxMap), std::end(kSimpleCtxMap));
+        bcm.num_ctxs = 2;
+        bcm.num_dc_ctxs = 1;
+        return;
+    }
+    if (enc_state.cparams.speed_tier >= SpeedTier::kFalcon) {
+        return;
+    }
+    const ImageI& rqf = enc_state.shared.raw_quant_field;
+    // No need to change context modeling for small images.
+    size_t tot = rqf.xsize() * rqf.ysize();
+    size_t size_for_ctx_model = (1 << 10) * enc_state.cparams.butteraugli_distance;
+    //  if (tot < size_for_ctx_model) return;
+
+    struct OccCounters {
+        // count the occurrences of each qf value and each strategy type.
+        OccCounters(const ImageI& rqf, const AcStrategyImage& ac_strategy) {
+            for (size_t y = 0; y < rqf.ysize(); y++) {
+                const int32_t* qf_row = rqf.Row(y);
+                AcStrategyRow acs_row = ac_strategy.ConstRow(y);
+                for (size_t x = 0; x < rqf.xsize(); x++) {
+                    int ord = kStrategyOrder[acs_row[x].RawStrategy()];
+                    int qf = qf_row[x] - 1;
+                    qf_counts[qf]++;
+                    qf_ord_counts[ord][qf]++;
+                    ord_counts[ord]++;
+                }
+            }
+        }
+
+        size_t qf_counts[256] = {};
+        size_t qf_ord_counts[kNumOrders][256] = {};
+        size_t ord_counts[kNumOrders] = {};
+    };
+    // The OccCounters struct is too big to allocate on the stack.
+    std::unique_ptr<OccCounters> counters(new OccCounters(rqf, enc_state.shared.ac_strategy));
+
+    // Splitting the context model according to the quantization field seems to
+    // mostly benefit only large images.
+    size_t size_for_qf_split = (1 << 13) * enc_state.cparams.butteraugli_distance;
+    size_t num_qf_segments = tot < size_for_qf_split ? 1 : 2;
+    std::vector<uint32_t>& qft = enc_state.shared.block_ctx_map.qf_thresholds;
+    qft.clear();
+    // Divide the quant field in up to num_qf_segments segments.
+    size_t cumsum = 0;
+    size_t next = 1;
+    size_t last_cut = 256;
+    size_t cut = tot * next / num_qf_segments;
+    for (uint32_t j = 0; j < 256; j++) {
+        cumsum += counters->qf_counts[j];
+        if (cumsum > cut) {
+            if (j != 0) {
+                qft.push_back(j);
+            }
+            last_cut = j;
+            while (cumsum > cut) {
+                next++;
+                cut = tot * next / num_qf_segments;
+            }
+        } else if (next > qft.size() + 1) {
+            if (j - 1 == last_cut && j != 0) {
+                qft.push_back(j);
+            }
+        }
+    }
+
+    // Count the occurrences of each segment.
+    std::vector<size_t> counts(kNumOrders * (qft.size() + 1));
+    size_t qft_pos = 0;
+    for (size_t j = 0; j < 256; j++) {
+        if (qft_pos < qft.size() && j == qft[qft_pos]) {
+            qft_pos++;
+        }
+        for (size_t i = 0; i < kNumOrders; i++) {
+            counts[qft_pos + i * (qft.size() + 1)] += counters->qf_ord_counts[i][j];
+        }
+    }
+
+    // Repeatedly merge the lowest-count pair.
+    std::vector<uint8_t> remap((qft.size() + 1) * kNumOrders);
+    std::iota(remap.begin(), remap.end(), 0);
+    std::vector<uint8_t> clusters(remap);
+    size_t nb_clusters = 4; // Clamp1((int)(tot / size_for_ctx_model / 2), 4, 8);
+    // This is O(n^2 log n), but n <= 14.
+    while (clusters.size() > nb_clusters) {
+        std::sort(clusters.begin(), clusters.end(), [&](int a, int b) { return counts[a] > counts[b]; });
+        counts[clusters[clusters.size() - 2]] += counts[clusters.back()];
+        counts[clusters.back()] = 0;
+        remap[clusters.back()] = clusters[clusters.size() - 2];
+        clusters.pop_back();
+    }
+    for (size_t i = 0; i < remap.size(); i++) {
+        while (remap[remap[i]] != remap[i]) {
+            remap[i] = remap[remap[i]];
+        }
+    }
+    // Relabel starting from 0.
+    std::vector<uint8_t> remap_remap(remap.size(), remap.size());
+    size_t num = 0;
+    for (size_t i = 0; i < remap.size(); i++) {
+        if (remap_remap[remap[i]] == remap.size()) {
+            remap_remap[remap[i]] = num++;
+        }
+        remap[i] = remap_remap[remap[i]];
+    }
+    // Write the block context map.
+    auto& ctx_map = enc_state.shared.block_ctx_map.ctx_map;
+    ctx_map = remap;
+    ctx_map.resize(remap.size() * 3);
+    for (size_t i = remap.size(); i < remap.size() * 3; i++) {
+        ctx_map[i] = remap[i % remap.size()] + num;
+    }
+    enc_state.shared.block_ctx_map.num_ctxs = *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+}
+
+// Returns the target size based on whether bitrate or direct targetsize is
+// given.
+size_t TargetSize(const CompressParams& cparams, const FrameDimensions& frame_dim) {
+    if (cparams.target_size > 0) {
+        return cparams.target_size;
+    }
+    if (cparams.target_bitrate > 0.0) {
+        return 0.5 + cparams.target_bitrate * frame_dim.xsize * frame_dim.ysize / kBitsPerByte;
+    }
+    return 0;
+}
+} // namespace
+
+class LossyFrameEncoder {
+   public:
+    LossyFrameEncoder(const CompressParams& cparams,
+                      const FrameHeader& frame_header,
+                      PassesEncoderState* JXL_RESTRICT enc_state,
+                      ThreadPool* pool,
+                      AuxOut* aux_out)
+        : enc_state_(enc_state), pool_(pool), aux_out_(aux_out) {
+        JXL_CHECK(InitializePassesSharedState(frame_header, &enc_state_->shared,
+                                              /*encoder=*/true));
+        enc_state_->cparams = cparams;
+        enc_state_->passes.clear();
+    }
+
+    Status ComputeEncodingData(const ImageBundle* linear,
+                               Image3F* JXL_RESTRICT opsin,
+                               ThreadPool* pool,
+                               ModularFrameEncoder* modular_frame_encoder,
+                               BitWriter* JXL_RESTRICT writer,
+                               FrameHeader* frame_header) {
+        PROFILER_ZONE("ComputeEncodingData uninstrumented");
+        JXL_ASSERT((opsin->xsize() % kBlockDim) == 0 && (opsin->ysize() % kBlockDim) == 0);
+        PassesSharedState& shared = enc_state_->shared;
+
+        if (!enc_state_->cparams.max_error_mode) {
+            float x_qm_scale_steps[3] = {0.65f, 1.25f, 9.0f};
+            shared.frame_header.x_qm_scale = 1;
+            for (float x_qm_scale_step : x_qm_scale_steps) {
+                if (enc_state_->cparams.butteraugli_distance > x_qm_scale_step) {
+                    shared.frame_header.x_qm_scale++;
+                }
+            }
+        }
+
+        JXL_RETURN_IF_ERROR(enc_state_->heuristics->LossyFrameHeuristics(enc_state_, modular_frame_encoder, linear,
+                                                                         opsin, pool_, aux_out_));
+
+        /*    InitializePassesEncoder(*opsin, pool_, enc_state_,
+           modular_frame_encoder,
+                                    aux_out_);*/
+
+        enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+        for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+            pass.ac_tokens.resize(shared.frame_dim.num_groups);
+        }
+
+        ComputeAllCoeffOrders(shared.frame_dim);
+        shared.num_histograms = 1;
+
+        const auto tokenize_group_init = [&](const size_t num_threads) {
+            group_caches_.resize(num_threads);
+            return true;
+        };
+        const auto tokenize_group = [&](const int group_index, const int thread) {
+            // Tokenize coefficients.
+            const Rect rect = shared.BlockGroupRect(group_index);
+            for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size(); idx_pass++) {
+                JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+                const int32_t* JXL_RESTRICT ac_rows[3] = {
+                    enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+                    enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+                    enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+                };
+                // Ensure group cache is initialized.
+                group_caches_[thread].InitOnce();
+                TokenizeCoefficients(&shared.coeff_orders[idx_pass * shared.coeff_order_size], rect, ac_rows,
+                                     shared.ac_strategy, frame_header->chroma_subsampling,
+                                     &group_caches_[thread].num_nzeroes,
+                                     &enc_state_->passes[idx_pass].ac_tokens[group_index], enc_state_->shared.quant_dc,
+                                     enc_state_->shared.raw_quant_field, enc_state_->shared.block_ctx_map);
+            }
+        };
+        RunOnPool(pool_, 0, shared.frame_dim.num_groups, tokenize_group_init, tokenize_group, "TokenizeGroup");
+
+        *frame_header = shared.frame_header;
+        return true;
+    }
+
+    Status ComputeJPEGTranscodingData(const jpeg::JPEGData& jpeg_data,
+                                      ModularFrameEncoder* modular_frame_encoder,
+                                      FrameHeader* frame_header) {
+        PROFILER_ZONE("ComputeJPEGTranscodingData uninstrumented");
+        PassesSharedState& shared = enc_state_->shared;
+
+        frame_header->x_qm_scale = 2;
+        frame_header->b_qm_scale = 2;
+
+        FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+
+        const size_t xsize = frame_dim.xsize_padded;
+        const size_t ysize = frame_dim.ysize_padded;
+        const size_t xsize_blocks = frame_dim.xsize_blocks;
+        const size_t ysize_blocks = frame_dim.ysize_blocks;
+
+        // no-op chroma from luma
+        shared.cmap = ColorCorrelationMap(xsize, ysize, false);
+        shared.ac_strategy.FillDCT8();
+        FillImage(uint8_t(0), &shared.epf_sharpness);
+
+        enc_state_->coeffs.clear();
+        enc_state_->coeffs.emplace_back(make_unique<ACImageT<int32_t> >(kGroupDim * kGroupDim, frame_dim.num_groups));
+
+        // convert JPEG quantization table to a Quantizer object
+        float dcquantization[3];
+        std::vector<QuantEncoding> qe(DequantMatrices::kNum, QuantEncoding::Library(0));
+
+        auto jpeg_c_map = JpegOrder(frame_header->color_transform, jpeg_data.components.size() == 1);
+
+        std::vector<int> qt(192);
+        for (size_t c = 0; c < 3; c++) {
+            size_t jpeg_c = jpeg_c_map[c];
+            const int* quant = jpeg_data.quant[jpeg_data.components[jpeg_c].quant_idx].values.data();
+
+            dcquantization[c] = 255 * 8.0f / quant[0];
+            for (size_t y = 0; y < 8; y++) {
+                for (size_t x = 0; x < 8; x++) {
+                    // JPEG XL transposes the DCT, JPEG doesn't.
+                    qt[c * 64 + 8 * x + y] = quant[8 * y + x];
+                }
+            }
+        }
+        DequantMatricesSetCustomDC(&shared.matrices, dcquantization);
+        float dcquantization_r[3] = {1.0f / dcquantization[0], 1.0f / dcquantization[1], 1.0f / dcquantization[2]};
+
+        qe[AcStrategy::Type::DCT] = QuantEncoding::RAW(qt);
+        DequantMatricesSetCustom(&shared.matrices, qe, modular_frame_encoder);
+
+        // Ensure that InvGlobalScale() is 1.
+        shared.quantizer = Quantizer(&shared.matrices, 1, kGlobalScaleDenom);
+        // Recompute MulDC() and InvMulDC().
+        shared.quantizer.RecomputeFromGlobalScale();
+
+        // Per-block dequant scaling should be 1.
+        FillImage(static_cast<int>(shared.quantizer.InvGlobalScale()), &shared.raw_quant_field);
+
+        std::vector<int32_t> scaled_qtable(192);
+        for (size_t c = 0; c < 3; c++) {
+            for (size_t i = 0; i < 64; i++) {
+                scaled_qtable[64 * c + i] = (1 << kCFLFixedPointPrecision) * qt[64 + i] / qt[64 * c + i];
+            }
+        }
+
+        auto jpeg_row = [&](size_t c, size_t y) {
+            return jpeg_data.components[jpeg_c_map[c]].coeffs.data() +
+                   jpeg_data.components[jpeg_c_map[c]].width_in_blocks * kDCTBlockSize * y;
+        };
+
+        Image3F dc = Image3F(xsize_blocks, ysize_blocks);
+        bool DCzero = (shared.frame_header.color_transform == ColorTransform::kYCbCr);
+        // Compute chroma-from-luma for AC (doesn't seem to be useful for DC)
+        if (frame_header->chroma_subsampling.Is444() && enc_state_->cparams.force_cfl_jpeg_recompression &&
+            jpeg_data.components.size() == 3) {
+            for (size_t c : {0, 2}) {
+                ImageSB* map = (c == 0 ? &shared.cmap.ytox_map : &shared.cmap.ytob_map);
+                const float kScale = kDefaultColorFactor;
+                const int kOffset = 127;
+                const float kBase = c == 0 ? shared.cmap.YtoXRatio(0) : shared.cmap.YtoBRatio(0);
+                const float kZeroThresh =
+                    kScale * kZeroBiasDefault[c] * 0.9999f; // just epsilon less for better rounding
+
+                auto process_row = [&](int task, int thread) {
+                    size_t ty = task;
+                    int8_t* JXL_RESTRICT row_out = map->Row(ty);
+                    for (size_t tx = 0; tx < map->xsize(); ++tx) {
+                        const size_t y0 = ty * kColorTileDimInBlocks;
+                        const size_t x0 = tx * kColorTileDimInBlocks;
+                        const size_t y1 = std::min(frame_dim.ysize_blocks, (ty + 1) * kColorTileDimInBlocks);
+                        const size_t x1 = std::min(frame_dim.xsize_blocks, (tx + 1) * kColorTileDimInBlocks);
+                        int32_t d_num_zeros[257] = {0};
+                        // TODO(veluca): this needs SIMD + fixed point adaptation, and/or
+                        // conversion to the new CfL algorithm.
+                        for (size_t y = y0; y < y1; ++y) {
+                            const int16_t* JXL_RESTRICT row_m = jpeg_row(1, y);
+                            const int16_t* JXL_RESTRICT row_s = jpeg_row(c, y);
+                            for (size_t x = x0; x < x1; ++x) {
+                                for (size_t coeffpos = 1; coeffpos < kDCTBlockSize; coeffpos++) {
+                                    const float scaled_m = row_m[x * kDCTBlockSize + coeffpos] *
+                                                           scaled_qtable[64 * c + coeffpos] *
+                                                           (1.0f / (1 << kCFLFixedPointPrecision));
+                                    const float scaled_s = kScale * row_s[x * kDCTBlockSize + coeffpos] +
+                                                           (kOffset - kBase * kScale) * scaled_m;
+                                    if (std::abs(scaled_m) > 1e-8f) {
+                                        float from, to;
+                                        if (scaled_m > 0) {
+                                            from = (scaled_s - kZeroThresh) / scaled_m;
+                                            to = (scaled_s + kZeroThresh) / scaled_m;
+                                        } else {
+                                            from = (scaled_s + kZeroThresh) / scaled_m;
+                                            to = (scaled_s - kZeroThresh) / scaled_m;
+                                        }
+                                        if (from < 0.0f) {
+                                            from = 0.0f;
+                                        }
+                                        if (to > 255.0f) {
+                                            to = 255.0f;
+                                        }
+                                        // Instead of clamping the both values
+                                        // we just check that range is sane.
+                                        if (from <= to) {
+                                            d_num_zeros[static_cast<int>(std::ceil(from))]++;
+                                            d_num_zeros[static_cast<int>(std::floor(to + 1))]--;
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        int best = 0;
+                        int32_t best_sum = 0;
+                        FindIndexOfSumMaximum(d_num_zeros, 256, &best, &best_sum);
+                        int32_t offset_sum = 0;
+                        for (int i = 0; i < 256; ++i) {
+                            if (i <= kOffset) {
+                                offset_sum += d_num_zeros[i];
+                            }
+                        }
+                        row_out[tx] = 0;
+                        if (best_sum > offset_sum + 1) {
+                            row_out[tx] = best - kOffset;
+                        }
+                    }
+                };
+
+                RunOnPool(pool_, 0, map->ysize(), ThreadPool::SkipInit(), process_row, "FindCorrelation");
+            }
+        }
+        if (!frame_header->chroma_subsampling.Is444()) {
+            ZeroFillImage(&dc);
+            enc_state_->coeffs[0]->ZeroFill();
+        }
+        // JPEG DC is from -1024 to 1023.
+        std::vector<size_t> dc_counts[3] = {};
+        dc_counts[0].resize(2048);
+        dc_counts[1].resize(2048);
+        dc_counts[2].resize(2048);
+        size_t total_dc[3] = {};
+        for (size_t c : {1, 0, 2}) {
+            if (jpeg_data.components.size() == 1 && c != 1) {
+                enc_state_->coeffs[0]->ZeroFillPlane(c);
+                ZeroFillImage(&dc.Plane(c));
+                // Ensure no division by 0.
+                dc_counts[c][1024] = 1;
+                total_dc[c] = 1;
+                continue;
+            }
+            size_t hshift = frame_header->chroma_subsampling.HShift(c);
+            size_t vshift = frame_header->chroma_subsampling.VShift(c);
+            ImageSB& map = (c == 0 ? shared.cmap.ytox_map : shared.cmap.ytob_map);
+            for (size_t group_index = 0; group_index < frame_dim.num_groups; group_index++) {
+                const size_t gx = group_index % frame_dim.xsize_groups;
+                const size_t gy = group_index / frame_dim.xsize_groups;
+                size_t offset = 0;
+                int32_t* JXL_RESTRICT ac = enc_state_->coeffs[0]->PlaneRow(c, group_index, 0).ptr32;
+                for (size_t by = gy * kGroupDimInBlocks; by < ysize_blocks && by < (gy + 1) * kGroupDimInBlocks; ++by) {
+                    if ((by >> vshift) << vshift != by) continue;
+                    const int16_t* JXL_RESTRICT inputjpeg = jpeg_row(c, by >> vshift);
+                    const int16_t* JXL_RESTRICT inputjpegY = jpeg_row(1, by);
+                    float* JXL_RESTRICT fdc = dc.PlaneRow(c, by >> vshift);
+                    const int8_t* JXL_RESTRICT cm = map.ConstRow(by / kColorTileDimInBlocks);
+                    for (size_t bx = gx * kGroupDimInBlocks; bx < xsize_blocks && bx < (gx + 1) * kGroupDimInBlocks;
+                         ++bx) {
+                        if ((bx >> hshift) << hshift != bx) continue;
+                        size_t base = (bx >> hshift) * kDCTBlockSize;
+                        int idc;
+                        if (DCzero) {
+                            idc = inputjpeg[base];
+                        } else {
+                            idc = inputjpeg[base] + 1024 / qt[c * 64];
+                        }
+                        dc_counts[c][std::min(static_cast<uint32_t>(idc + 1024), uint32_t(2047))]++;
+                        total_dc[c]++;
+                        fdc[bx >> hshift] = idc * dcquantization_r[c];
+                        if (c == 1 || !enc_state_->cparams.force_cfl_jpeg_recompression ||
+                            !frame_header->chroma_subsampling.Is444()) {
+                            for (size_t y = 0; y < 8; y++) {
+                                for (size_t x = 0; x < 8; x++) {
+                                    ac[offset + y * 8 + x] = inputjpeg[base + x * 8 + y];
+                                }
+                            }
+                        } else {
+                            const int32_t scale = shared.cmap.RatioJPEG(cm[bx / kColorTileDimInBlocks]);
+
+                            for (size_t y = 0; y < 8; y++) {
+                                for (size_t x = 0; x < 8; x++) {
+                                    int Y = inputjpegY[kDCTBlockSize * bx + x * 8 + y];
+                                    int QChroma = inputjpeg[kDCTBlockSize * bx + x * 8 + y];
+                                    // Fixed-point multiply of CfL scale with quant table ratio
+                                    // first, and Y value second.
+                                    int coeff_scale = (scale * scaled_qtable[64 * c + y * 8 + x] +
+                                                       (1 << (kCFLFixedPointPrecision - 1))) >>
+                                                      kCFLFixedPointPrecision;
+                                    int cfl_factor = (Y * coeff_scale + (1 << (kCFLFixedPointPrecision - 1))) >>
+                                                     kCFLFixedPointPrecision;
+                                    int QCR = QChroma - cfl_factor;
+                                    ac[offset + y * 8 + x] = QCR;
+                                }
+                            }
+                        }
+                        offset += 64;
+                    }
+                }
+            }
+        }
+
+        auto& dct = enc_state_->shared.block_ctx_map.dc_thresholds;
+        auto& num_dc_ctxs = enc_state_->shared.block_ctx_map.num_dc_ctxs;
+        enc_state_->shared.block_ctx_map.num_dc_ctxs = 1;
+        for (size_t i = 0; i < 3; i++) {
+            dct[i].clear();
+            int num_thresholds = (CeilLog2Nonzero(total_dc[i]) - 10) / 2;
+            // up to 3 buckets per channel:
+            // dark/medium/bright, yellow/unsat/blue, green/unsat/red
+            num_thresholds = std::min(std::max(num_thresholds, 0), 2);
+            size_t cumsum = 0;
+            size_t cut = total_dc[i] / (num_thresholds + 1);
+            for (int j = 0; j < 2048; j++) {
+                cumsum += dc_counts[i][j];
+                if (cumsum > cut) {
+                    dct[i].push_back(j - 1025);
+                    cut = total_dc[i] * (dct[i].size() + 1) / (num_thresholds + 1);
+                }
+            }
+            num_dc_ctxs *= dct[i].size() + 1;
+        }
+
+        auto& ctx_map = enc_state_->shared.block_ctx_map.ctx_map;
+        ctx_map.clear();
+        ctx_map.resize(3 * kNumOrders * num_dc_ctxs, 0);
+
+        int lbuckets = (dct[1].size() + 1);
+        for (size_t i = 0; i < num_dc_ctxs; i++) {
+            // up to 9 contexts for luma
+            ctx_map[i] = i / lbuckets;
+            // up to 3 contexts for chroma
+            ctx_map[kNumOrders * num_dc_ctxs + i] = num_dc_ctxs / lbuckets + (i % lbuckets);
+            ctx_map[2 * kNumOrders * num_dc_ctxs + i] = num_dc_ctxs / lbuckets + (i % lbuckets);
+        }
+        enc_state_->shared.block_ctx_map.num_ctxs = *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+
+        enc_state_->histogram_idx.resize(shared.frame_dim.num_groups);
+
+        // disable DC frame for now
+        shared.frame_header.UpdateFlag(false, FrameHeader::kUseDcFrame);
+        auto compute_dc_coeffs = [&](int group_index, int /* thread */) {
+            modular_frame_encoder->AddVarDCTDC(dc, group_index, /*nl_dc=*/false, enc_state_);
+            modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/true, enc_state_);
+        };
+        RunOnPool(pool_, 0, shared.frame_dim.num_dc_groups, ThreadPool::SkipInit(), compute_dc_coeffs,
+                  "Compute DC coeffs");
+
+        // Must happen before WriteFrameHeader!
+        shared.frame_header.UpdateFlag(true, FrameHeader::kSkipAdaptiveDCSmoothing);
+
+        enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+        for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+            pass.ac_tokens.resize(shared.frame_dim.num_groups);
+        }
+
+        JXL_CHECK(enc_state_->passes.size() == 1); // skipping coeff splitting so need to have only one pass
+
+        ComputeAllCoeffOrders(frame_dim);
+        shared.num_histograms = 1;
+
+        const auto tokenize_group_init = [&](const size_t num_threads) {
+            group_caches_.resize(num_threads);
+            return true;
+        };
+        const auto tokenize_group = [&](const int group_index, const int thread) {
+            // Tokenize coefficients.
+            const Rect rect = shared.BlockGroupRect(group_index);
+            for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size(); idx_pass++) {
+                JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+                const int32_t* JXL_RESTRICT ac_rows[3] = {
+                    enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+                    enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+                    enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+                };
+                // Ensure group cache is initialized.
+                group_caches_[thread].InitOnce();
+                TokenizeCoefficients(&shared.coeff_orders[idx_pass * shared.coeff_order_size], rect, ac_rows,
+                                     shared.ac_strategy, frame_header->chroma_subsampling,
+                                     &group_caches_[thread].num_nzeroes,
+                                     &enc_state_->passes[idx_pass].ac_tokens[group_index], enc_state_->shared.quant_dc,
+                                     enc_state_->shared.raw_quant_field, enc_state_->shared.block_ctx_map);
+            }
+        };
+        RunOnPool(pool_, 0, shared.frame_dim.num_groups, tokenize_group_init, tokenize_group, "TokenizeGroup");
+        *frame_header = shared.frame_header;
+        return true;
+    }
+
+    Status EncodeGlobalDCInfo(const FrameHeader& frame_header, BitWriter* writer) const {
+        // Encode quantizer DC and global scale.
+        JXL_RETURN_IF_ERROR(enc_state_->shared.quantizer.Encode(writer, kLayerQuant, aux_out_));
+        EncodeBlockCtxMap(enc_state_->shared.block_ctx_map, writer, aux_out_);
+        ColorCorrelationMapEncodeDC(&enc_state_->shared.cmap, writer, kLayerDC, aux_out_);
+        return true;
+    }
+
+    Status EncodeGlobalACInfo(BitWriter* writer, ModularFrameEncoder* modular_frame_encoder) {
+        JXL_RETURN_IF_ERROR(DequantMatricesEncode(&enc_state_->shared.matrices, writer, kLayerDequantTables, aux_out_,
+                                                  modular_frame_encoder));
+        if (enc_state_->cparams.speed_tier <= SpeedTier::kTortoise) {
+            ClusterGroups(enc_state_);
+        }
+        size_t num_histo_bits = CeilLog2Nonzero(enc_state_->shared.frame_dim.num_groups);
+        if (num_histo_bits != 0) {
+            BitWriter::Allotment allotment(writer, num_histo_bits);
+            writer->Write(num_histo_bits, enc_state_->shared.num_histograms - 1);
+            ReclaimAndCharge(writer, &allotment, kLayerAC, aux_out_);
+        }
+
+        for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses(); i++) {
+            // Encode coefficient orders.
+            size_t order_bits = 0;
+            JXL_RETURN_IF_ERROR(U32Coder::CanEncode(kOrderEnc, enc_state_->used_orders[i], &order_bits));
+            BitWriter::Allotment allotment(writer, order_bits);
+            JXL_CHECK(U32Coder::Write(kOrderEnc, enc_state_->used_orders[i], writer));
+            ReclaimAndCharge(writer, &allotment, kLayerOrder, aux_out_);
+            EncodeCoeffOrders(enc_state_->used_orders[i],
+                              &enc_state_->shared.coeff_orders[i * enc_state_->shared.coeff_order_size], writer,
+                              kLayerOrder, aux_out_);
+
+            // Encode histograms.
+            HistogramParams hist_params(enc_state_->cparams.speed_tier,
+                                        enc_state_->shared.block_ctx_map.NumACContexts());
+            if (enc_state_->cparams.speed_tier > SpeedTier::kTortoise) {
+                hist_params.lz77_method = HistogramParams::LZ77Method::kNone;
+            }
+            if (enc_state_->cparams.decoding_speed_tier >= 1) {
+                hist_params.max_histograms = 6;
+            }
+            BuildAndEncodeHistograms(
+                hist_params, enc_state_->shared.num_histograms * enc_state_->shared.block_ctx_map.NumACContexts(),
+                enc_state_->passes[i].ac_tokens, &enc_state_->passes[i].codes, &enc_state_->passes[i].context_map,
+                writer, kLayerAC, aux_out_);
+        }
+
+        return true;
+    }
+
+    Status EncodeACGroup(size_t pass, size_t group_index, BitWriter* group_code, AuxOut* local_aux_out) {
+        return EncodeGroupTokenizedCoefficients(group_index, pass, enc_state_->histogram_idx[group_index], *enc_state_,
+                                                group_code, local_aux_out);
+    }
+
+    PassesEncoderState* State() { return enc_state_; }
+
+    void ComputeAllCoeffOrders(const FrameDimensions& frame_dim) {
+        PROFILER_FUNC;
+        enc_state_->used_orders.resize(enc_state_->progressive_splitter.GetNumPasses());
+        for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses(); i++) {
+            // No coefficient reordering in Falcon or faster.
+            if (enc_state_->cparams.speed_tier < SpeedTier::kFalcon) {
+                enc_state_->used_orders[i] =
+                    ComputeUsedOrders(enc_state_->cparams.speed_tier, enc_state_->shared.ac_strategy,
+                                      Rect(enc_state_->shared.raw_quant_field));
+            }
+            ComputeCoeffOrder(enc_state_->cparams.speed_tier, *enc_state_->coeffs[i], enc_state_->shared.ac_strategy,
+                              frame_dim, enc_state_->used_orders[i],
+                              &enc_state_->shared.coeff_orders[i * enc_state_->shared.coeff_order_size]);
+        }
+    }
+
+    std::vector<EncCache>& get_group_cashes() { return group_caches_; }
+
+   private:
+    template <typename V, typename R>
+    static inline void FindIndexOfSumMaximum(const V* array, const size_t len, R* idx, V* sum) {
+        JXL_ASSERT(len > 0);
+        V maxval = 0;
+        V val = 0;
+        R maxidx = 0;
+        for (size_t i = 0; i < len; ++i) {
+            val += array[i];
+            if (val > maxval) {
+                maxval = val;
+                maxidx = i;
+            }
+        }
+        *idx = maxidx;
+        *sum = maxval;
+    }
+
+    PassesEncoderState* JXL_RESTRICT enc_state_;
+    ThreadPool* pool_;
+    AuxOut* aux_out_;
+    std::vector<EncCache> group_caches_;
+};
+} // namespace jxl
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/acc_dct-inl.h b/codec/L2/demos/jxlEnc/others/include/acc_dct-inl.h
new file mode 100644
index 0000000000..ee6c0568c5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/acc_dct-inl.h
@@ -0,0 +1,347 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD floating-point (I)DCT, any power of two.
+#ifndef ACC_DCT_INL_HPP
+#define ACC_DCT_INL_HPP
+
+#if defined(LIB_JXL_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DCT_INL_H_
+#undef LIB_JXL_DCT_INL_H_
+#else
+#define LIB_JXL_DCT_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_block-inl.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/transpose-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+template <size_t SZ>
+struct FVImpl {
+    using type = HWY_CAPPED(float, SZ);
+};
+
+template <>
+struct FVImpl<0> {
+    using type = HWY_FULL(float);
+};
+
+template <size_t SZ>
+using FV = typename FVImpl<SZ>::type;
+
+// Implementation of Lowest Complexity Self Recursive Radix-2 DCT II/III
+// Algorithms, by Siriani M. Perera and Jianhua Liu.
+
+template <size_t N, size_t SZ>
+struct CoeffBundle {
+    static void AddReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2, float* JXL_RESTRICT aout) {
+        for (size_t i = 0; i < N; i++) {
+            auto in1 = Load(FV<SZ>(), ain1 + i * SZ);
+            auto in2 = Load(FV<SZ>(), ain2 + (N - i - 1) * SZ);
+            Store(in1 + in2, FV<SZ>(), aout + i * SZ);
+        }
+    }
+    static void SubReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2, float* JXL_RESTRICT aout) {
+        for (size_t i = 0; i < N; i++) {
+            auto in1 = Load(FV<SZ>(), ain1 + i * SZ);
+            auto in2 = Load(FV<SZ>(), ain2 + (N - i - 1) * SZ);
+            Store(in1 - in2, FV<SZ>(), aout + i * SZ);
+        }
+    }
+    static void B(float* JXL_RESTRICT coeff) {
+        auto sqrt2 = Set(FV<SZ>(), square_root<2>::value);
+        auto in1 = Load(FV<SZ>(), coeff);
+        auto in2 = Load(FV<SZ>(), coeff + SZ);
+        Store(MulAdd(in1, sqrt2, in2), FV<SZ>(), coeff);
+        for (size_t i = 1; i + 1 < N; i++) {
+            auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+            auto in2 = Load(FV<SZ>(), coeff + (i + 1) * SZ);
+            Store(in1 + in2, FV<SZ>(), coeff + i * SZ);
+        }
+    }
+    static void BTranspose(float* JXL_RESTRICT coeff) {
+        for (size_t i = N - 1; i > 0; i--) {
+            auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+            auto in2 = Load(FV<SZ>(), coeff + (i - 1) * SZ);
+            Store(in1 + in2, FV<SZ>(), coeff + i * SZ);
+        }
+        auto sqrt2 = Set(FV<SZ>(), square_root<2>::value);
+        auto in1 = Load(FV<SZ>(), coeff);
+        Store(in1 * sqrt2, FV<SZ>(), coeff);
+    }
+    // Ideally optimized away by compiler (except the multiply).
+    static void InverseEvenOdd(const float* JXL_RESTRICT ain, float* JXL_RESTRICT aout) {
+        for (size_t i = 0; i < N / 2; i++) {
+            auto in1 = Load(FV<SZ>(), ain + i * SZ);
+            Store(in1, FV<SZ>(), aout + 2 * i * SZ);
+        }
+        for (size_t i = N / 2; i < N; i++) {
+            auto in1 = Load(FV<SZ>(), ain + i * SZ);
+            Store(in1, FV<SZ>(), aout + (2 * (i - N / 2) + 1) * SZ);
+        }
+    }
+    // Ideally optimized away by compiler.
+    static void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride, float* JXL_RESTRICT aout) {
+        for (size_t i = 0; i < N / 2; i++) {
+            auto in1 = LoadU(FV<SZ>(), ain + 2 * i * ain_stride);
+            Store(in1, FV<SZ>(), aout + i * SZ);
+        }
+        for (size_t i = N / 2; i < N; i++) {
+            auto in1 = LoadU(FV<SZ>(), ain + (2 * (i - N / 2) + 1) * ain_stride);
+            Store(in1, FV<SZ>(), aout + i * SZ);
+        }
+    }
+    // Invoked on full vector.
+    static void Multiply(float* JXL_RESTRICT coeff) {
+        for (size_t i = 0; i < N / 2; i++) {
+            auto in1 = Load(FV<SZ>(), coeff + (N / 2 + i) * SZ);
+            auto mul = Set(FV<SZ>(), WcMultipliers<N>::kMultipliers[i]);
+            Store(in1 * mul, FV<SZ>(), coeff + (N / 2 + i) * SZ);
+        }
+    }
+    static void MultiplyAndAdd(const float* JXL_RESTRICT coeff, float* JXL_RESTRICT out, size_t out_stride) {
+        for (size_t i = 0; i < N / 2; i++) {
+            auto mul = Set(FV<SZ>(), WcMultipliers<N>::kMultipliers[i]);
+            auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+            auto in2 = Load(FV<SZ>(), coeff + (N / 2 + i) * SZ);
+            auto out1 = MulAdd(mul, in2, in1);
+            auto out2 = NegMulAdd(mul, in2, in1);
+            StoreU(out1, FV<SZ>(), out + i * out_stride);
+            StoreU(out2, FV<SZ>(), out + (N - i - 1) * out_stride);
+        }
+    }
+    template <typename Block>
+    static void LoadFromBlock(const Block& in, size_t off, float* JXL_RESTRICT coeff) {
+        for (size_t i = 0; i < N; i++) {
+            Store(in.LoadPart(FV<SZ>(), i, off), FV<SZ>(), coeff + i * SZ);
+        }
+    }
+    template <typename Block>
+    static void StoreToBlockAndScale(const float* JXL_RESTRICT coeff, const Block& out, size_t off) {
+        auto mul = Set(FV<SZ>(), 1.0f / N);
+        for (size_t i = 0; i < N; i++) {
+            out.StorePart(FV<SZ>(), mul * Load(FV<SZ>(), coeff + i * SZ), i, off);
+        }
+    }
+};
+
+template <size_t N, size_t SZ>
+struct DCT1DImpl;
+
+template <size_t SZ>
+struct DCT1DImpl<1, SZ> {
+    JXL_INLINE void operator()(float* JXL_RESTRICT mem) {}
+};
+
+template <size_t SZ>
+struct DCT1DImpl<2, SZ> {
+    JXL_INLINE void operator()(float* JXL_RESTRICT mem) {
+        auto in1 = Load(FV<SZ>(), mem);
+        auto in2 = Load(FV<SZ>(), mem + SZ);
+        Store(in1 + in2, FV<SZ>(), mem);
+        Store(in1 - in2, FV<SZ>(), mem + SZ);
+    }
+};
+
+template <size_t N, size_t SZ>
+struct DCT1DImpl {
+    void operator()(float* JXL_RESTRICT mem) {
+        // This is relatively small (4kB with 64-DCT and AVX-512)
+        HWY_ALIGN float tmp[N * SZ];
+        CoeffBundle<N / 2, SZ>::AddReverse(mem, mem + N / 2 * SZ, tmp);
+        DCT1DImpl<N / 2, SZ>()(tmp);
+        CoeffBundle<N / 2, SZ>::SubReverse(mem, mem + N / 2 * SZ, tmp + N / 2 * SZ);
+        CoeffBundle<N, SZ>::Multiply(tmp);
+        DCT1DImpl<N / 2, SZ>()(tmp + N / 2 * SZ);
+        CoeffBundle<N / 2, SZ>::B(tmp + N / 2 * SZ);
+        CoeffBundle<N, SZ>::InverseEvenOdd(tmp, mem);
+    }
+};
+
+template <size_t N, size_t SZ>
+struct IDCT1DImpl;
+
+template <size_t SZ>
+struct IDCT1DImpl<1, SZ> {
+    JXL_INLINE void operator()(const float* from, size_t from_stride, float* to, size_t to_stride) {
+        StoreU(LoadU(FV<SZ>(), from), FV<SZ>(), to);
+    }
+};
+
+template <size_t SZ>
+struct IDCT1DImpl<2, SZ> {
+    JXL_INLINE void operator()(const float* from, size_t from_stride, float* to, size_t to_stride) {
+        JXL_DASSERT(from_stride >= SZ);
+        JXL_DASSERT(to_stride >= SZ);
+        auto in1 = LoadU(FV<SZ>(), from);
+        auto in2 = LoadU(FV<SZ>(), from + from_stride);
+        StoreU(in1 + in2, FV<SZ>(), to);
+        StoreU(in1 - in2, FV<SZ>(), to + to_stride);
+    }
+};
+
+template <size_t N, size_t SZ>
+struct IDCT1DImpl {
+    void operator()(const float* from, size_t from_stride, float* to, size_t to_stride) {
+        JXL_DASSERT(from_stride >= SZ);
+        JXL_DASSERT(to_stride >= SZ);
+        // This is relatively small (4kB with 64-DCT and AVX-512)
+        HWY_ALIGN float tmp[N * SZ];
+        CoeffBundle<N, SZ>::ForwardEvenOdd(from, from_stride, tmp);
+        IDCT1DImpl<N / 2, SZ>()(tmp, SZ, tmp, SZ);
+        CoeffBundle<N / 2, SZ>::BTranspose(tmp + N / 2 * SZ);
+        IDCT1DImpl<N / 2, SZ>()(tmp + N / 2 * SZ, SZ, tmp + N / 2 * SZ, SZ);
+        CoeffBundle<N, SZ>::MultiplyAndAdd(tmp, to, to_stride);
+    }
+};
+
+template <size_t N, size_t M_or_0, typename FromBlock, typename ToBlock>
+void DCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) {
+    size_t M = M_or_0 != 0 ? M_or_0 : Mp;
+    constexpr size_t SZ = MaxLanes(FV<M_or_0>());
+    HWY_ALIGN float tmp[N * SZ];
+    for (size_t i = 0; i < M; i += Lanes(FV<M_or_0>())) {
+        // TODO(veluca): consider removing the temporary memory here (as is done in
+        // IDCT), if it turns out that some compilers don't optimize away the loads
+        // and this is performance-critical.
+        CoeffBundle<N, SZ>::LoadFromBlock(from, i, tmp);
+        DCT1DImpl<N, SZ>()(tmp);
+        CoeffBundle<N, SZ>::StoreToBlockAndScale(tmp, to, i);
+    }
+}
+
+template <size_t N, size_t M_or_0, typename FromBlock, typename ToBlock>
+void IDCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) {
+    size_t M = M_or_0 != 0 ? M_or_0 : Mp;
+    constexpr size_t SZ = MaxLanes(FV<M_or_0>());
+    for (size_t i = 0; i < M; i += Lanes(FV<M_or_0>())) {
+        IDCT1DImpl<N, SZ>()(from.Address(0, i), from.Stride(), to.Address(0, i), to.Stride());
+    }
+}
+
+template <size_t N, size_t M, typename = void>
+struct DCT1D {
+    template <typename FromBlock, typename ToBlock>
+    void operator()(const FromBlock& from, const ToBlock& to) {
+        return DCT1DWrapper<N, M>(from, to, M);
+    }
+};
+
+template <size_t N, size_t M>
+struct DCT1D<N, M, typename std::enable_if<(M > MaxLanes(FV<0>()))>::type> {
+    template <typename FromBlock, typename ToBlock>
+    void operator()(const FromBlock& from, const ToBlock& to) {
+        return NoInlineWrapper(DCT1DWrapper<N, 0, FromBlock, ToBlock>, from, to, M);
+    }
+};
+
+template <size_t N, size_t M, typename = void>
+struct IDCT1D {
+    template <typename FromBlock, typename ToBlock>
+    void operator()(const FromBlock& from, const ToBlock& to) {
+        return IDCT1DWrapper<N, M>(from, to, M);
+    }
+};
+
+template <size_t N, size_t M>
+struct IDCT1D<N, M, typename std::enable_if<(M > MaxLanes(FV<0>()))>::type> {
+    template <typename FromBlock, typename ToBlock>
+    void operator()(const FromBlock& from, const ToBlock& to) {
+        return NoInlineWrapper(IDCT1DWrapper<N, 0, FromBlock, ToBlock>, from, to, M);
+    }
+};
+
+// Computes the in-place NxN transposed-scaled-DCT (tsDCT) of block.
+// Requires that block is HWY_ALIGN'ed.
+//
+// See also DCTSlow, ComputeDCT
+template <size_t N>
+struct ComputeTransposedScaledDCT {
+    // scratch_space must be aligned, and should have space for N*N floats.
+    template <class From>
+    HWY_MAYBE_UNUSED void operator()(const From& from, float* JXL_RESTRICT to, float* JXL_RESTRICT scratch_space) {
+        float* JXL_RESTRICT block = scratch_space;
+        DCT1D<N, N>()(from, DCTTo(to, N));
+        Transpose<N, N>::Run(DCTFrom(to, N), DCTTo(block, N));
+        DCT1D<N, N>()(DCTFrom(block, N), DCTTo(to, N));
+    }
+};
+
+// Computes the in-place NxN transposed-scaled-iDCT (tsIDCT)of block.
+// Requires that block is HWY_ALIGN'ed.
+//
+// See also IDCTSlow, ComputeIDCT.
+
+template <size_t N>
+struct ComputeTransposedScaledIDCT {
+    // scratch_space must be aligned, and should have space for N*N floats.
+    template <class To>
+    HWY_MAYBE_UNUSED void operator()(float* JXL_RESTRICT from, const To& to, float* JXL_RESTRICT scratch_space) {
+        float* JXL_RESTRICT block = scratch_space;
+        IDCT1D<N, N>()(DCTFrom(from, N), DCTTo(block, N));
+        Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(from, N));
+        IDCT1D<N, N>()(DCTFrom(from, N), to);
+    }
+};
+// Computes the non-transposed, scaled DCT of a block, that needs to be
+// HWY_ALIGN'ed. Used for rectangular blocks.
+template <size_t ROWS, size_t COLS>
+struct ComputeScaledDCT {
+    // scratch_space must be aligned, and should have space for ROWS*COLS
+    // floats.
+    template <class From>
+    HWY_MAYBE_UNUSED void operator()(const From& from, float* to, float* JXL_RESTRICT scratch_space) {
+        float* JXL_RESTRICT block = scratch_space;
+        if (ROWS < COLS) {
+            DCT1D<ROWS, COLS>()(from, DCTTo(block, COLS));
+            Transpose<ROWS, COLS>::Run(DCTFrom(block, COLS), DCTTo(to, ROWS));
+            DCT1D<COLS, ROWS>()(DCTFrom(to, ROWS), DCTTo(block, ROWS));
+            Transpose<COLS, ROWS>::Run(DCTFrom(block, ROWS), DCTTo(to, COLS));
+        } else {
+            DCT1D<ROWS, COLS>()(from, DCTTo(to, COLS));
+            Transpose<ROWS, COLS>::Run(DCTFrom(to, COLS), DCTTo(block, ROWS));
+            DCT1D<COLS, ROWS>()(DCTFrom(block, ROWS), DCTTo(to, ROWS));
+        }
+    }
+};
+// Computes the non-transposed, scaled DCT of a block, that needs to be
+// HWY_ALIGN'ed. Used for rectangular blocks.
+template <size_t ROWS, size_t COLS>
+struct ComputeScaledIDCT {
+    // scratch_space must be aligned, and should have space for ROWS*COLS
+    // floats.
+    template <class To>
+    HWY_MAYBE_UNUSED void operator()(float* JXL_RESTRICT from, const To& to, float* JXL_RESTRICT scratch_space) {
+        float* JXL_RESTRICT block = scratch_space;
+        // Reverse the steps done in ComputeScaledDCT.
+        if (ROWS < COLS) {
+            Transpose<ROWS, COLS>::Run(DCTFrom(from, COLS), DCTTo(block, ROWS));
+            IDCT1D<COLS, ROWS>()(DCTFrom(block, ROWS), DCTTo(from, ROWS));
+            Transpose<COLS, ROWS>::Run(DCTFrom(from, ROWS), DCTTo(block, COLS));
+            IDCT1D<ROWS, COLS>()(DCTFrom(block, COLS), to);
+        } else {
+            IDCT1D<COLS, ROWS>()(DCTFrom(from, ROWS), DCTTo(block, ROWS));
+            Transpose<COLS, ROWS>::Run(DCTFrom(block, ROWS), DCTTo(from, COLS));
+            IDCT1D<ROWS, COLS>()(DCTFrom(from, COLS), to);
+        }
+    }
+};
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+#endif // LIB_JXL_DCT_INL_H_
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/acc_enc_ac_strategy.hpp b/codec/L2/demos/jxlEnc/others/include/acc_enc_ac_strategy.hpp
new file mode 100644
index 0000000000..bf1eea7920
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/acc_enc_ac_strategy.hpp
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_AC_STRATEGY_H_
+#define LIB_JXL_ENC_AC_STRATEGY_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quant_weights.h"
+
+// `FindBestAcStrategy` uses heuristics to choose which AC strategy should be
+// used in each block, as well as the initial quantization field.
+
+namespace jxl {
+
+// AC strategy selection: utility struct.
+
+struct ACSConfig {
+    const DequantMatrices* JXL_RESTRICT dequant;
+    float info_loss_multiplier;
+    float info_loss_multiplier2;
+    float* JXL_RESTRICT quant_field_row;
+    size_t quant_field_stride;
+    float* JXL_RESTRICT masking_field_row;
+    size_t masking_field_stride;
+    const float* JXL_RESTRICT src_rows[3];
+    size_t src_stride;
+    // Cost for 1 (-1), 2 (-2) explicitly, cost for others computed with cost1 +
+    // cost2 + sqrt(q) * cost_delta.
+    float cost1;
+    float cost2;
+    float cost_delta;
+    float base_entropy;
+    float zeros_mul;
+    const float& Pixel(size_t c, size_t x, size_t y) const { return src_rows[c][y * src_stride + x]; }
+    float Masking(size_t bx, size_t by) const {
+        JXL_DASSERT(masking_field_row[by * masking_field_stride + bx] > 0);
+        return masking_field_row[by * masking_field_stride + bx];
+    }
+    float Quant(size_t bx, size_t by) const {
+        JXL_DASSERT(quant_field_row[by * quant_field_stride + bx] > 0);
+        return quant_field_row[by * quant_field_stride + bx];
+    }
+    void SetQuant(size_t bx, size_t by, float value) const {
+        JXL_DASSERT(value > 0);
+        quant_field_row[by * quant_field_stride + bx] = value;
+    }
+};
+
+struct AcStrategyHeuristics {
+    void Init(const Image3F& src, PassesEncoderState* enc_state);
+    void ProcessRect(const Rect& rect,
+                     size_t xsize,
+                     size_t ysize,
+                     std::vector<std::vector<float> >& dctIDT,
+                     std::vector<std::vector<float> >& dct2x2,
+                     std::vector<std::vector<float> >& dct4x4,
+                     std::vector<std::vector<float> >& dct8x8,
+                     std::vector<std::vector<float> >& dct16x16,
+                     std::vector<std::vector<float> >& dct32x32,
+                     std::vector<std::vector<float> >& dcIDT,
+                     std::vector<std::vector<float> >& dc2x2,
+                     std::vector<std::vector<float> >& dc4x4,
+                     std::vector<std::vector<float> >& dc8x8,
+                     std::vector<std::vector<float> >& dc16x16,
+                     std::vector<std::vector<float> >& dc32x32);
+    void Finalize(AuxOut* aux_out);
+    ACSConfig config;
+    PassesEncoderState* enc_state;
+};
+
+// Debug.
+void DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize, size_t ysize, const char* tag, AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_AC_STRATEGY_H_
diff --git a/codec/L2/demos/jxlEnc/others/include/acc_enc_chroma_from_luma.hpp b/codec/L2/demos/jxlEnc/others/include/acc_enc_chroma_from_luma.hpp
new file mode 100644
index 0000000000..47fe2ae994
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/acc_enc_chroma_from_luma.hpp
@@ -0,0 +1,80 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
+#define LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
+
+// Chroma-from-luma, computed using heuristics to determine the best linear
+// model for the X and B channels from the Y channel.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer, size_t layer, AuxOut* aux_out);
+
+struct CfLHeuristics {
+    void Init(const Image3F& opsin);
+
+    void PrepareForThreads(size_t num_threads) { mem = hwy::AllocateAligned<float>(num_threads * kItemsPerThread); }
+
+    void ComputeTile(const Rect& r,
+                     const Image3F& opsin,
+                     const DequantMatrices& dequant,
+                     const AcStrategyImage* ac_strategy,
+                     const Quantizer* quantizer,
+                     bool fast,
+                     size_t thread,
+                     ColorCorrelationMap* cmap,
+                     size_t xsize,
+                     size_t ysize,
+                     std::vector<std::vector<float> >& dctIDT,
+                     std::vector<std::vector<float> >& dct2x2,
+                     std::vector<std::vector<float> >& dct4x4,
+                     std::vector<std::vector<float> >& dct8x8,
+                     std::vector<std::vector<float> >& dct16x16,
+                     std::vector<std::vector<float> >& dct32x32,
+                     std::vector<std::vector<float> >& dcIDT,
+                     std::vector<std::vector<float> >& dc2x2,
+                     std::vector<std::vector<float> >& dc4x4,
+                     std::vector<std::vector<float> >& dc8x8,
+                     std::vector<std::vector<float> >& dc16x16,
+                     std::vector<std::vector<float> >& dc32x32);
+
+    void ComputeDC(bool fast, ColorCorrelationMap* cmap);
+
+    ImageF dc_values;
+    hwy::AlignedFreeUniquePtr<float[]> mem;
+
+    // Working set is too large for stack; allocate dynamically.
+    constexpr static size_t kItemsPerThread = AcStrategy::kMaxCoeffArea * 3       // Blocks
+                                              + kColorTileDim * kColorTileDim * 4 // AC coeff storage
+                                              + AcStrategy::kMaxCoeffArea * 2;    // Scratch space
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
diff --git a/codec/L2/demos/jxlEnc/others/include/acc_enc_cluster.hpp b/codec/L2/demos/jxlEnc/others/include/acc_enc_cluster.hpp
new file mode 100644
index 0000000000..ffa6373b66
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/acc_enc_cluster.hpp
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for clustering similar histograms together.
+
+#ifndef LIB_JXL_ENC_CLUSTER_H_
+#define LIB_JXL_ENC_CLUSTER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/enc_ans.h"
+
+namespace jxl {
+
+struct Histogram {
+    Histogram() { total_count_ = 0; }
+    void Clear() {
+        data_.clear();
+        total_count_ = 0;
+    }
+    void Add(size_t symbol) {
+        if (data_.size() <= symbol) {
+            data_.resize(DivCeil(symbol + 1, kRounding) * kRounding);
+        }
+        ++data_[symbol];
+        ++total_count_;
+    }
+    void AddHistogram(const Histogram& other) {
+        if (other.data_.size() > data_.size()) {
+            data_.resize(other.data_.size());
+        }
+        for (size_t i = 0; i < other.data_.size(); ++i) {
+            data_[i] += other.data_[i];
+        }
+        total_count_ += other.total_count_;
+    }
+    float PopulationCost() const { return ANSPopulationCost(data_.data(), data_.size()); }
+    float ShannonEntropy() const;
+
+    std::vector<ANSHistBin> data_;
+    size_t total_count_;
+    mutable float entropy_; // WARNING: not kept up-to-date.
+    static constexpr size_t kRounding = 8;
+};
+
+void acc_FastClusterHistograms(const std::vector<Histogram>& in,
+                               std::vector<uint32_t> nonempty_histograms,
+                               uint32_t largest_idx_in,
+                               const size_t num_contexts,
+                               size_t max_histograms,
+                               float min_distance,
+                               std::vector<Histogram>* out,
+                               std::vector<uint32_t>* histogram_symbols);
+
+void HistogramReindex(std::vector<Histogram>* out, std::vector<uint32_t>* symbols);
+
+void ClusterHistograms(HistogramParams params,
+                       const std::vector<Histogram>& in,
+                       size_t num_contexts,
+                       size_t max_histograms,
+                       std::vector<Histogram>* out,
+                       std::vector<uint32_t>* histogram_symbols);
+
+void ClusterHistogramsNew(HistogramParams params,
+                          const std::vector<Histogram>& in,
+                          size_t num_contexts,
+                          size_t max_histograms,
+                          std::vector<Histogram>* out,
+                          std::vector<uint32_t>* histogram_symbols);
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_CLUSTER_H_
diff --git a/codec/L2/demos/jxlEnc/others/include/acc_enc_group.hpp b/codec/L2/demos/jxlEnc/others/include/acc_enc_group.hpp
new file mode 100644
index 0000000000..529a307d16
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/acc_enc_group.hpp
@@ -0,0 +1,48 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_GROUP_H_
+#define LIB_JXL_ENC_GROUP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+
+namespace jxl {
+
+// Fills DC
+void ComputeCoefficients(size_t group_idx,
+                         PassesEncoderState* enc_state,
+                         const Image3F& opsin,
+                         Image3F* dc,
+                         size_t xsize,
+                         size_t ysize,
+                         std::vector<std::vector<float> >& dctIDT,
+                         std::vector<std::vector<float> >& dct2x2,
+                         std::vector<std::vector<float> >& dct4x4,
+                         std::vector<std::vector<float> >& dct8x8,
+                         std::vector<std::vector<float> >& dct16x16,
+                         std::vector<std::vector<float> >& dct32x32,
+                         std::vector<std::vector<float> >& dcIDT,
+                         std::vector<std::vector<float> >& dc2x2,
+                         std::vector<std::vector<float> >& dc4x4,
+                         std::vector<std::vector<float> >& dc8x8,
+                         std::vector<std::vector<float> >& dc16x16,
+                         std::vector<std::vector<float> >& dc32x32);
+
+Status EncodeGroupTokenizedCoefficients(size_t group_idx,
+                                        size_t pass_idx,
+                                        size_t histogram_idx,
+                                        const PassesEncoderState& enc_state,
+                                        BitWriter* writer,
+                                        AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_GROUP_H_
diff --git a/codec/L2/demos/jxlEnc/others/include/acc_init_histogram.hpp b/codec/L2/demos/jxlEnc/others/include/acc_init_histogram.hpp
new file mode 100644
index 0000000000..18691cbf62
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/acc_init_histogram.hpp
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_INIT_HISTOGRAM_HPP
+#define ACCC_INIT_HISTOGRAM_HPP
+
+#include "acc_phase3.hpp"
+
+namespace jxl {
+bool acc_InitHistogram(std::vector<Histogram>& histograms, std::vector<std::vector<Token> >& tokens);
+
+void acc_ANSinitHistogram(LossyFrameEncoder& lossy_frame_encoder,
+                          std::unique_ptr<FrameHeader>& frame_header,
+
+                          std::vector<HistogramParams>& params,
+                          bool do_once[5],
+
+                          std::vector<std::vector<Token> >& tokens0,
+                          std::vector<std::vector<Token> >& tokens1,
+                          std::vector<std::vector<Token> >& tokens2,
+                          std::vector<std::vector<Token> >& tokens3,
+
+                          char* do_prefix_out,
+                          std::vector<uint32_t>& largest_idx,
+                          std::vector<std::vector<uint32_t> >& nonempty_histograms,
+                          std::vector<std::vector<Histogram> >& histograms_);
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/acc_store_encode_data.hpp b/codec/L2/demos/jxlEnc/others/include/acc_store_encode_data.hpp
new file mode 100644
index 0000000000..3a10b60a96
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/acc_store_encode_data.hpp
@@ -0,0 +1,579 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_STORE_ENCODE_DATA_HPP
+#define ACC_STORE_ENCODE_DATA_HPP
+
+#include "acc_phase3.hpp"
+
+namespace jxl {
+
+bool ans_fuzzer_friendly_ = false;
+static const int kMaxNumSymbolsForSmallCode = 4;
+
+struct SizeWriterNew {
+    size_t size = 0;
+    void Write(size_t num, size_t bits) { size += num; }
+};
+
+template <typename Writer>
+void StoreVarLenUint8New(size_t n, Writer* writer) {
+    JXL_DASSERT(n <= 255);
+    if (n == 0) {
+        writer->Write(1, 0);
+    } else {
+        writer->Write(1, 1);
+        size_t nbits = FloorLog2Nonzero(n);
+        writer->Write(3, nbits);
+        writer->Write(nbits, n - (1ULL << nbits));
+    }
+}
+
+template <typename Writer>
+void StoreVarLenUint16New(size_t n, Writer* writer) {
+    JXL_DASSERT(n <= 65535);
+    if (n == 0) {
+        writer->Write(1, 0);
+    } else {
+        writer->Write(1, 1);
+        size_t nbits = FloorLog2Nonzero(n);
+        writer->Write(4, nbits);
+        writer->Write(nbits, n - (1ULL << nbits));
+    }
+}
+
+template <typename Writer>
+void EncodeUintConfig(const HybridUintConfig uint_config, Writer* writer, size_t log_alpha_size) {
+    writer->Write(CeilLog2Nonzero(log_alpha_size + 1), uint_config.split_exponent);
+    if (uint_config.split_exponent == log_alpha_size) {
+        return; // msb/lsb don't matter.
+    }
+    size_t nbits = CeilLog2Nonzero(uint_config.split_exponent + 1);
+    writer->Write(nbits, uint_config.msb_in_token);
+    nbits = CeilLog2Nonzero(uint_config.split_exponent - uint_config.msb_in_token + 1);
+    writer->Write(nbits, uint_config.lsb_in_token);
+}
+template <typename Writer>
+void EncodeUintConfigsNew(const std::vector<HybridUintConfig>& uint_config, Writer* writer, size_t log_alpha_size) {
+    // TODO(veluca): RLE?
+    for (size_t i = 0; i < uint_config.size(); i++) {
+        EncodeUintConfig(uint_config[i], writer, log_alpha_size);
+    }
+}
+
+void ANSBuildInfoTableNew(const ANSHistBin* counts,
+                          const AliasTable::Entry* table,
+                          size_t alphabet_size,
+                          size_t log_alpha_size,
+                          ANSEncSymbolInfo* info) {
+    size_t log_entry_size = ANS_LOG_TAB_SIZE - log_alpha_size;
+    size_t entry_size_minus_1 = (1 << log_entry_size) - 1;
+    // create valid alias table for empty streams.
+    for (size_t s = 0; s < std::max<size_t>(1, alphabet_size); ++s) {
+        const ANSHistBin freq = s == alphabet_size ? ANS_TAB_SIZE : counts[s];
+        info[s].freq_ = static_cast<uint16_t>(freq);
+#ifdef USE_MULT_BY_RECIPROCAL
+        if (freq != 0) {
+            info[s].ifreq_ = ((1ull << RECIPROCAL_PRECISION) + info[s].freq_ - 1) / info[s].freq_;
+        } else {
+            info[s].ifreq_ = 1; // shouldn't matter (symbol shouldn't occur), but...
+        }
+#endif
+        info[s].reverse_map_.resize(freq);
+    }
+    for (int i = 0; i < ANS_TAB_SIZE; i++) {
+        AliasTable::Symbol s = AliasTable::Lookup(table, i, log_entry_size, entry_size_minus_1);
+        info[s.value].reverse_map_[s.offset] = i;
+    }
+}
+
+float EstimateDataBitsNew(const ANSHistBin* histogram, const ANSHistBin* counts, size_t len) {
+    float sum = 0.0f;
+    int total_histogram = 0;
+    int total_counts = 0;
+    for (size_t i = 0; i < len; ++i) {
+        total_histogram += histogram[i];
+        total_counts += counts[i];
+        if (histogram[i] > 0) {
+            JXL_ASSERT(counts[i] > 0);
+            // += histogram[i] * -log(counts[i]/total_counts)
+            sum += histogram[i] * std::max(0.0f, ANS_LOG_TAB_SIZE - FastLog2f(counts[i]));
+        }
+    }
+    if (total_histogram > 0) {
+        JXL_ASSERT(total_counts == ANS_TAB_SIZE);
+    }
+    return sum;
+}
+
+float EstimateDataBitsFlatNew(const ANSHistBin* histogram, size_t len) {
+    const float flat_bits = std::max(FastLog2f(len), 0.0f);
+    int total_histogram = 0;
+    for (size_t i = 0; i < len; ++i) {
+        total_histogram += histogram[i];
+    }
+    return total_histogram * flat_bits;
+}
+
+// Static Huffman code for encoding logcounts. The last symbol is used as RLE
+// sequence.
+static const uint8_t kLogCountBitLengths[ANS_LOG_TAB_SIZE + 2] = {
+    5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 6, 7, 7,
+};
+static const uint8_t kLogCountSymbols[ANS_LOG_TAB_SIZE + 2] = {
+    17, 11, 15, 3, 9, 7, 4, 2, 5, 6, 0, 33, 1, 65,
+};
+
+// Returns the difference between largest count that can be represented and is
+// smaller than "count" and smallest representable count larger than "count".
+static int SmallestIncrement(uint32_t count, uint32_t shift) {
+    int bits = count == 0 ? -1 : FloorLog2Nonzero(count);
+    int drop_bits = bits - GetPopulationCountPrecision(bits, shift);
+    return drop_bits < 0 ? 1 : (1 << drop_bits);
+}
+
+template <bool minimize_error_of_sum>
+bool RebalanceHistogramNew(
+    const float* targets, int max_symbol, int table_size, uint32_t shift, int* omit_pos, ANSHistBin* counts) {
+    int sum = 0;
+    float sum_nonrounded = 0.0;
+    int remainder_pos = 0; // if all of them are handled in first loop
+    int remainder_log = -1;
+    for (int n = 0; n < max_symbol; ++n) {
+        if (targets[n] > 0 && targets[n] < 1.0f) {
+            counts[n] = 1;
+            sum_nonrounded += targets[n];
+            sum += counts[n];
+        }
+    }
+    const float discount_ratio = (table_size - sum) / (table_size - sum_nonrounded);
+    JXL_ASSERT(discount_ratio > 0);
+    JXL_ASSERT(discount_ratio <= 1.0f);
+    // Invariant for minimize_error_of_sum == true:
+    // abs(sum - sum_nonrounded)
+    //   <= SmallestIncrement(max(targets[])) + max_symbol
+    for (int n = 0; n < max_symbol; ++n) {
+        if (targets[n] >= 1.0f) {
+            sum_nonrounded += targets[n];
+            counts[n] = static_cast<ANSHistBin>(targets[n] * discount_ratio); // truncate
+            if (counts[n] == 0) counts[n] = 1;
+            if (counts[n] == table_size) counts[n] = table_size - 1;
+            // Round the count to the closest nonzero multiple of SmallestIncrement
+            // (when minimize_error_of_sum is false) or one of two closest so as to
+            // keep the sum as close as possible to sum_nonrounded.
+            int inc = SmallestIncrement(counts[n], shift);
+            counts[n] -= counts[n] & (inc - 1);
+            // TODO(robryk): Should we rescale targets[n]?
+            const float target = minimize_error_of_sum ? (sum_nonrounded - sum) : targets[n];
+            if (counts[n] == 0 || (target > counts[n] + inc / 2 && counts[n] + inc < table_size)) {
+                counts[n] += inc;
+            }
+            sum += counts[n];
+            const int count_log = FloorLog2Nonzero(static_cast<uint32_t>(counts[n]));
+            if (count_log > remainder_log) {
+                remainder_pos = n;
+                remainder_log = count_log;
+            }
+        }
+    }
+    JXL_ASSERT(remainder_pos != -1);
+    // NOTE: This is the only place where counts could go negative. We could
+    // detect that, return false and make ANSHistBin uint32_t.
+    counts[remainder_pos] -= sum - table_size;
+    *omit_pos = remainder_pos;
+    return counts[remainder_pos] > 0;
+}
+
+Status NormalizeCountsNew(ANSHistBin* counts,
+                          int* omit_pos,
+                          const int length,
+                          const int precision_bits,
+                          uint32_t shift,
+                          int* num_symbols,
+                          int* symbols) {
+    const int32_t table_size = 1 << precision_bits; // target sum / table size
+    uint64_t total = 0;
+    int max_symbol = 0;
+    int symbol_count = 0;
+    for (int n = 0; n < length; ++n) {
+        total += counts[n];
+        if (counts[n] > 0) {
+            if (symbol_count < kMaxNumSymbolsForSmallCode) {
+                symbols[symbol_count] = n;
+            }
+            ++symbol_count;
+            max_symbol = n + 1;
+        }
+    }
+    *num_symbols = symbol_count;
+    if (symbol_count == 0) {
+        return true;
+    }
+    if (symbol_count == 1) {
+        counts[symbols[0]] = table_size;
+        return true;
+    }
+    if (symbol_count > table_size) return JXL_FAILURE("Too many entries in an ANS histogram");
+
+    // printf("%s: %s: %d, max_symbol=%d\n", __FILE__, __FUNCTION__, __LINE__, max_symbol);
+    const float norm = 1.f * table_size / total;
+    std::vector<float> targets(max_symbol);
+    for (size_t n = 0; n < targets.size(); ++n) {
+        targets[n] = norm * counts[n];
+    }
+    if (!RebalanceHistogramNew<false>(&targets[0], max_symbol, table_size, shift, omit_pos, counts)) {
+        // Use an alternative rebalancing mechanism if the one above failed
+        // to create a histogram that is positive wherever the original one was.
+        if (!RebalanceHistogramNew<true>(&targets[0], max_symbol, table_size, shift, omit_pos, counts)) {
+            return JXL_FAILURE("Logic error: couldn't rebalance a histogram");
+        }
+    }
+    return true;
+}
+
+template <typename Writer>
+bool EncodeCountsNew(const ANSHistBin* counts,
+                     const int alphabet_size,
+                     const int omit_pos,
+                     const int num_symbols,
+                     uint32_t shift,
+                     const int* symbols,
+                     Writer* writer) {
+    bool ok = true;
+    if (num_symbols <= 2) {
+        // Small tree marker to encode 1-2 symbols.
+        writer->Write(1, 1);
+        if (num_symbols == 0) {
+            writer->Write(1, 0);
+            StoreVarLenUint8New(0, writer);
+        } else {
+            writer->Write(1, num_symbols - 1);
+            for (int i = 0; i < num_symbols; ++i) {
+                StoreVarLenUint8New(symbols[i], writer);
+            }
+        }
+        if (num_symbols == 2) {
+            writer->Write(ANS_LOG_TAB_SIZE, counts[symbols[0]]);
+        }
+    } else {
+        // Mark non-small tree.
+        writer->Write(1, 0);
+        // Mark non-flat histogram.
+        writer->Write(1, 0);
+
+        // Precompute sequences for RLE encoding. Contains the number of identical
+        // values starting at a given index. Only contains the value at the first
+        // element of the series.
+        std::vector<uint32_t> same(alphabet_size, 0);
+        int last = 0;
+        for (int i = 1; i < alphabet_size; i++) {
+            // Store the sequence length once different symbol reached, or we're at
+            // the end, or the length is longer than we can encode, or we are at
+            // the omit_pos. We don't support including the omit_pos in an RLE
+            // sequence because this value may use a different amount of log2 bits
+            // than standard, it is too complex to handle in the decoder.
+            if (counts[i] != counts[last] || i + 1 == alphabet_size || (i - last) >= 255 || i == omit_pos ||
+                i == omit_pos + 1) {
+                same[last] = (i - last);
+                last = i + 1;
+            }
+        }
+
+        int length = 0;
+        std::vector<int> logcounts(alphabet_size);
+        int omit_log = 0;
+        for (int i = 0; i < alphabet_size; ++i) {
+            JXL_ASSERT(counts[i] <= ANS_TAB_SIZE);
+            JXL_ASSERT(counts[i] >= 0);
+            if (i == omit_pos) {
+                length = i + 1;
+            } else if (counts[i] > 0) {
+                logcounts[i] = FloorLog2Nonzero(static_cast<uint32_t>(counts[i])) + 1;
+                length = i + 1;
+                if (i < omit_pos) {
+                    omit_log = std::max(omit_log, logcounts[i] + 1);
+                } else {
+                    omit_log = std::max(omit_log, logcounts[i]);
+                }
+            }
+        }
+        logcounts[omit_pos] = omit_log;
+
+        // Elias gamma-like code for shift. Only difference is that if the number
+        // of bits to be encoded is equal to FloorLog2(ANS_LOG_TAB_SIZE+1), we skip
+        // the terminating 0 in unary coding.
+        int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1);
+        int log = FloorLog2Nonzero(shift + 1);
+        writer->Write(log, (1 << log) - 1);
+        if (log != upper_bound_log) writer->Write(1, 0);
+        writer->Write(log, ((1 << log) - 1) & (shift + 1));
+
+        // Since num_symbols >= 3, we know that length >= 3, therefore we encode
+        // length - 3.
+        if (length - 3 > 255) {
+            // Pretend that everything is OK, but complain about correctness later.
+            StoreVarLenUint8New(255, writer);
+            ok = false;
+        } else {
+            StoreVarLenUint8New(length - 3, writer);
+        }
+
+        // The logcount values are encoded with a static Huffman code.
+        static const size_t kMinReps = 4;
+        size_t rep = ANS_LOG_TAB_SIZE + 1;
+        // printf("%s: %s: %d, length=%d\n", __FILE__, __FUNCTION__, __LINE__, length);
+        for (int i = 0; i < length; ++i) {
+            if (i > 0 && same[i - 1] > kMinReps) {
+                // Encode the RLE symbol and skip the repeated ones.
+                writer->Write(kLogCountBitLengths[rep], kLogCountSymbols[rep]);
+                StoreVarLenUint8New(same[i - 1] - kMinReps - 1, writer);
+                i += same[i - 1] - 2;
+                continue;
+            }
+            writer->Write(kLogCountBitLengths[logcounts[i]], kLogCountSymbols[logcounts[i]]);
+        }
+        for (int i = 0; i < length; ++i) {
+            if (i > 0 && same[i - 1] > kMinReps) {
+                // Skip symbols encoded by RLE.
+                i += same[i - 1] - 2;
+                continue;
+            }
+            if (logcounts[i] > 1 && i != omit_pos) {
+                int bitcount = GetPopulationCountPrecision(logcounts[i] - 1, shift);
+                int drop_bits = logcounts[i] - 1 - bitcount;
+                JXL_CHECK((counts[i] & ((1 << drop_bits) - 1)) == 0);
+                writer->Write(bitcount, (counts[i] >> drop_bits) - (1 << bitcount));
+            }
+        }
+    }
+    return ok;
+}
+
+void EncodeFlatHistogramNew(const int alphabet_size, BitWriter* writer) {
+    // Mark non-small tree.
+    writer->Write(1, 0);
+    // Mark uniform histogram.
+    writer->Write(1, 1);
+    JXL_ASSERT(alphabet_size > 0);
+    // Encode alphabet size.
+    StoreVarLenUint8New(alphabet_size - 1, writer);
+}
+
+float ComputeHistoAndDataCostNew(const ANSHistBin* histogram, size_t alphabet_size, uint32_t method) {
+    if (method == 0) { // Flat code
+        return ANS_LOG_TAB_SIZE + 2 + EstimateDataBitsFlatNew(histogram, alphabet_size);
+    }
+    // Non-flat: shift = method-1.
+    uint32_t shift = method - 1;
+    std::vector<ANSHistBin> counts(histogram, histogram + alphabet_size);
+    int omit_pos = 0;
+    int num_symbols;
+    int symbols[kMaxNumSymbolsForSmallCode] = {};
+    JXL_CHECK(
+        NormalizeCountsNew(counts.data(), &omit_pos, alphabet_size, ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols));
+    SizeWriterNew writer;
+    // Ignore the correctness, no real encoding happens at this stage.
+    (void)EncodeCountsNew(counts.data(), alphabet_size, omit_pos, num_symbols, shift, symbols, &writer);
+    return writer.size + EstimateDataBitsNew(histogram, counts.data(), alphabet_size);
+}
+
+uint32_t ComputeBestMethodNew(const ANSHistBin* histogram,
+                              size_t alphabet_size,
+                              float* cost,
+                              HistogramParams::ANSHistogramStrategy ans_histogram_strategy) {
+    size_t method = 0;
+    float fcost = ComputeHistoAndDataCostNew(histogram, alphabet_size, 0);
+    // printf("%s: %s: %d, ANS_LOG_TAB_SIZE=%d, ans_histogram_strategy=%d\n", __FILE__, __FUNCTION__, __LINE__,
+    //  ANS_LOG_TAB_SIZE, ans_histogram_strategy != HistogramParams::ANSHistogramStrategy::kPrecise);
+    for (uint32_t shift = 0; shift <= ANS_LOG_TAB_SIZE;
+         ans_histogram_strategy != HistogramParams::ANSHistogramStrategy::kPrecise ? shift += 2 : shift++) {
+        float c = ComputeHistoAndDataCostNew(histogram, alphabet_size, shift + 1);
+        if (c < fcost) {
+            method = shift + 1;
+            fcost = c;
+        } else if (ans_histogram_strategy == HistogramParams::ANSHistogramStrategy::kFast) {
+            // do not be as precise if estimating cost.
+            break;
+        }
+    }
+    // printf("%s: %s: %d, alphabet_size=%zu, method=%zu, fcost=%f, ANS_TAB_SIZE=%d\n",
+    //  __FILE__, __FUNCTION__, __LINE__,
+    //  alphabet_size, method, fcost);
+    *cost = fcost;
+    return method;
+}
+
+size_t BuildAndStoreANSEncodingDataNew(HistogramParams::ANSHistogramStrategy ans_histogram_strategy,
+                                       const ANSHistBin* histogram,
+                                       size_t alphabet_size,
+                                       size_t log_alpha_size,
+                                       bool use_prefix_code,
+                                       ANSEncSymbolInfo* info,
+                                       BitWriter* writer) {
+    // printf("%s: %s: %d, ans_histogram_strategy=%d, alphabet_size=%zu, log_alpha_size=%zu, ANS_TAB_SIZE=%d,
+    // ANS_MAX_ALPHABET_SIZE=%d, ANS_LOG_TAB_SIZE=%d\n",
+    //  __FILE__, __FUNCTION__, __LINE__,
+    //  ans_histogram_strategy, alphabet_size, log_alpha_size, ANS_TAB_SIZE, ANS_MAX_ALPHABET_SIZE, ANS_LOG_TAB_SIZE);
+    if (use_prefix_code) {
+        if (alphabet_size <= 1) return 0;
+        std::vector<uint32_t> histo(alphabet_size);
+        for (size_t i = 0; i < alphabet_size; i++) {
+            histo[i] = histogram[i];
+            JXL_CHECK(histogram[i] >= 0);
+        }
+        size_t cost = 0;
+        {
+            std::vector<uint8_t> depths(alphabet_size);
+            std::vector<uint16_t> bits(alphabet_size);
+            BitWriter tmp_writer;
+            BitWriter* w = writer ? writer : &tmp_writer;
+            size_t start = w->BitsWritten();
+            BitWriter::Allotment allotment(w, 8 * alphabet_size + 8); // safe upper bound
+            BuildAndStoreHuffmanTree(histo.data(), alphabet_size, depths.data(), bits.data(), w);
+            ReclaimAndCharge(w, &allotment, 0, /*aux_out=*/nullptr);
+
+            for (size_t i = 0; i < alphabet_size; i++) {
+                info[i].bits = depths[i] == 0 ? 0 : bits[i];
+                info[i].depth = depths[i];
+            }
+            cost = w->BitsWritten() - start;
+        }
+        // Estimate data cost.
+        for (size_t i = 0; i < alphabet_size; i++) {
+            cost += histogram[i] * info[i].depth;
+        }
+        return cost;
+    }
+    JXL_ASSERT(alphabet_size <= ANS_TAB_SIZE);
+    // Ensure we ignore trailing zeros in the histogram.
+    if (alphabet_size != 0) {
+        size_t largest_symbol = 0;
+        for (size_t i = 0; i < alphabet_size; i++) {
+            if (histogram[i] != 0) largest_symbol = i;
+        }
+        alphabet_size = largest_symbol + 1;
+    }
+    // printf("%s: %s: %d, updated alphabet_size=%zu\n", __FILE__, __FUNCTION__, __LINE__, alphabet_size);
+    float cost;
+    uint32_t method = ComputeBestMethodNew(histogram, alphabet_size, &cost, ans_histogram_strategy);
+    JXL_ASSERT(cost >= 0);
+    int num_symbols;
+    int symbols[kMaxNumSymbolsForSmallCode] = {};
+    std::vector<ANSHistBin> counts(histogram, histogram + alphabet_size);
+    if (!counts.empty()) {
+        size_t sum = 0;
+        for (size_t i = 0; i < counts.size(); i++) {
+            sum += counts[i];
+        }
+        if (sum == 0) {
+            counts[0] = ANS_TAB_SIZE;
+        }
+    }
+    if (method == 0) {
+        counts = CreateFlatHistogram(alphabet_size, ANS_TAB_SIZE);
+        AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE];
+        InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a);
+        ANSBuildInfoTableNew(counts.data(), a, alphabet_size, log_alpha_size, info);
+        if (writer != nullptr) {
+            EncodeFlatHistogramNew(alphabet_size, writer);
+        }
+        return cost;
+    }
+    int omit_pos = 0;
+    uint32_t shift = method - 1;
+    JXL_CHECK(
+        NormalizeCountsNew(counts.data(), &omit_pos, alphabet_size, ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols));
+    AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE];
+    InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a);
+    ANSBuildInfoTableNew(counts.data(), a, alphabet_size, log_alpha_size, info);
+    if (writer != nullptr) {
+        bool ok = EncodeCountsNew(counts.data(), alphabet_size, omit_pos, num_symbols, shift, symbols, writer);
+        (void)ok;
+        JXL_DASSERT(ok);
+    }
+    return cost;
+}
+
+float ANSPopulationCostNew(const ANSHistBin* data, size_t alphabet_size) {
+    float c;
+    ComputeBestMethodNew(data, alphabet_size, &c, HistogramParams::ANSHistogramStrategy::kFast);
+    return c;
+}
+
+size_t StoreEntropyCodesNew(const HistogramParams& params,
+                            const std::vector<std::vector<Token> >& tokens,
+                            EntropyEncodingData* codes,
+                            bool use_prefix_code,
+                            BitWriter* writer,
+                            size_t layer,
+                            AuxOut* aux_out,
+                            std::vector<Histogram> clustered_histograms) {
+    size_t cost = 0;
+    codes->use_prefix_code = use_prefix_code;
+    size_t log_alpha_size = codes->lz77.enabled ? 8 : 7; // Sane default.
+    if (ans_fuzzer_friendly_) {
+        codes->uint_config.clear();
+        codes->uint_config.resize(1, HybridUintConfig(7, 0, 0));
+    } else {
+        codes->uint_config.resize(clustered_histograms.size());
+        if (params.uint_method == HistogramParams::HybridUintMethod::kContextMap) {
+            codes->uint_config.clear();
+            codes->uint_config.resize(clustered_histograms.size(), HybridUintConfig(2, 0, 1));
+        }
+    }
+    if (log_alpha_size < 5) log_alpha_size = 5;
+    SizeWriterNew size_writer; // Used if writer == nullptr to estimate costs.
+    cost += 1;
+    if (writer) writer->Write(1, use_prefix_code);
+    if (use_prefix_code) {
+        log_alpha_size = PREFIX_MAX_BITS;
+    } else {
+        cost += 2;
+    }
+    if (writer == nullptr) {
+        EncodeUintConfigsNew(codes->uint_config, &size_writer, log_alpha_size);
+    } else {
+        if (!use_prefix_code) writer->Write(2, log_alpha_size - 5);
+        EncodeUintConfigsNew(codes->uint_config, writer, log_alpha_size);
+    }
+    if (use_prefix_code) {
+        for (size_t c = 0; c < clustered_histograms.size(); ++c) {
+            size_t num_symbol = 1;
+            for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) {
+                if (clustered_histograms[c].data_[i]) num_symbol = i + 1;
+            }
+            if (writer) {
+                StoreVarLenUint16New(num_symbol - 1, writer);
+            } else {
+                StoreVarLenUint16New(num_symbol - 1, &size_writer);
+            }
+        }
+    }
+    cost += size_writer.size;
+    // printf("%s: %s: %d, final clustered_histograms size=%zu\n", __FILE__, __FUNCTION__, __LINE__,
+    // clustered_histograms.size());
+    for (size_t c = 0; c < clustered_histograms.size(); ++c) {
+        size_t num_symbol = 1;
+        // printf("%s: %s: %d, final clustered_histograms data size=%zu\n", __FILE__, __FUNCTION__, __LINE__,
+        // clustered_histograms[c].data_.size());
+        for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) {
+            if (clustered_histograms[c].data_[i]) num_symbol = i + 1;
+        }
+        codes->encoding_info.emplace_back();
+        codes->encoding_info.back().resize(std::max<size_t>(1, num_symbol));
+        // printf("%s: %s: %d, encoding_info size=%zu, adder=%zu\n", __FILE__, __FUNCTION__, __LINE__,
+        // codes->encoding_info.size(), num_symbol);
+        BitWriter::Allotment allotment(writer, 256 + num_symbol * 24);
+        cost += BuildAndStoreANSEncodingDataNew(params.ans_histogram_strategy, clustered_histograms[c].data_.data(),
+                                                num_symbol, log_alpha_size, use_prefix_code,
+                                                codes->encoding_info.back().data(), writer);
+        allotment.FinishedHistogram(writer);
+        ReclaimAndCharge(writer, &allotment, layer, aux_out);
+    }
+    return cost;
+}
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_host.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_host.hpp
new file mode 100644
index 0000000000..01fb212447
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_host.hpp
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_HOST_HPP
+#define ACC_HOST_HPP
+
+#include "acc_common.hpp"
+#include "acc_phase1.hpp"
+#include "acc_phase2.hpp"
+#include "acc_phase3.hpp"
+
+namespace jxl {
+
+Status acc_host(std::string xclbinPath,
+                Image3F& opsin,
+                LossyFrameEncoder& lossy_frame_encoder,
+                const ImageBundle* JXL_RESTRICT ib_or_linear,
+                ThreadPool* pool,
+                std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                BitWriter* writer,
+                AuxOut* aux_out,
+                std::unique_ptr<FrameHeader>& frame_header,
+                const FrameInfo& frame_info,
+                CompressParams cparams,
+                const std::vector<ImageF>* extra_channels,
+                PassesEncoderState* passes_enc_state,
+                FrameDimensions frame_dim,
+                const size_t num_groups,
+                const ImageBundle& ib,
+                std::vector<AuxOut>& aux_outs,
+                const std::function<Status(size_t)>& resize_aux_outs);
+}
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_phase1.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_phase1.hpp
new file mode 100644
index 0000000000..202b712afd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_phase1.hpp
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_PHASE1_HPP
+#define ACC_PHASE1_HPP
+
+#include <ap_int.h>
+
+#include "acc_common.hpp"
+#include "xlnx_cfg.h"
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "acc_enc_ac_strategy.hpp"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "acc_enc_chroma_from_luma.hpp"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "acc_enc_group.hpp"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+Status acc_phase1(Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const FrameInfo& frame_info,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  AuxOut* aux_out,
+                  ThreadPool* pool);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_phase2.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_phase2.hpp
new file mode 100644
index 0000000000..4adc5b8932
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_phase2.hpp
@@ -0,0 +1,95 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_PHASE2_HPP
+#define HLS_PHASE2_HPP
+
+#include <ap_int.h>
+
+#include "acc_common.hpp"
+#include "xlnx_cfg.h"
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "acc_enc_ac_strategy.hpp"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "acc_enc_chroma_from_luma.hpp"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "acc_enc_group.hpp"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+#include "lib/jxl/enc_transforms-inl.h"
+
+namespace jxl {
+
+Status acc_phase2(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const std::vector<ImageF>* extra_channels,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  ThreadPool* pool,
+                  AuxOut* aux_out);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_phase3.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_phase3.hpp
new file mode 100644
index 0000000000..f36ca63ed8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/acc_phase3.hpp
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_PHASE3_HPP
+#define HLS_PHASE3_HPP
+
+#include <ap_int.h>
+
+#include "acc_common.hpp"
+#include "xlnx_cfg.h"
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "acc_enc_ac_strategy.hpp"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_cluster.h"
+#include "acc_enc_chroma_from_luma.hpp"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_huffman.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "acc_enc_group.hpp"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+
+namespace jxl {
+
+Status acc_phase3(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  PassesEncoderState* passes_enc_state,
+                  FrameDimensions frame_dim,
+                  BitWriter* writer,
+                  const size_t num_groups,
+                  AuxOut* aux_out,
+                  ThreadPool* pool,
+                  std::vector<AuxOut>& aux_outs,
+                  const ImageBundle& ib,
+                  const std::function<Status(size_t)>& resize_aux_outs);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/xlnx_cfg.h b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/xlnx_cfg.h
new file mode 100644
index 0000000000..bc8ce19ab6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_cluster_histogram/xlnx_cfg.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef XLNX_CFG_H
+#define XLNX_CFG_H
+
+//#define XLNX_DEBUG_DCT
+//#define XLNX_DEBUG_CMAP
+
+//#define XLNX_QC_DEBUG
+//#define XLNX_QC_DEBUG_AC_ESTIMATE_ENTROPY
+//#define XLNX_QC_DEBUG_DCT
+//#define XLNX_QC_DEBUG_DC
+//#define XLNX_QC_DEBUG_ENC_GROUP
+//#define XLNX_QC_DEBUG_ENC_GROUP_DC
+
+#define XLNX_DISABLE_BLK_DICT
+#define XLNX_DISABLE_RECT_DCT
+#define XLNX_DISABLE_ARC
+#define XLNX_DISABLE_2NDCMP
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_host.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_host.hpp
new file mode 100644
index 0000000000..01fb212447
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_host.hpp
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_HOST_HPP
+#define ACC_HOST_HPP
+
+#include "acc_common.hpp"
+#include "acc_phase1.hpp"
+#include "acc_phase2.hpp"
+#include "acc_phase3.hpp"
+
+namespace jxl {
+
+Status acc_host(std::string xclbinPath,
+                Image3F& opsin,
+                LossyFrameEncoder& lossy_frame_encoder,
+                const ImageBundle* JXL_RESTRICT ib_or_linear,
+                ThreadPool* pool,
+                std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                BitWriter* writer,
+                AuxOut* aux_out,
+                std::unique_ptr<FrameHeader>& frame_header,
+                const FrameInfo& frame_info,
+                CompressParams cparams,
+                const std::vector<ImageF>* extra_channels,
+                PassesEncoderState* passes_enc_state,
+                FrameDimensions frame_dim,
+                const size_t num_groups,
+                const ImageBundle& ib,
+                std::vector<AuxOut>& aux_outs,
+                const std::function<Status(size_t)>& resize_aux_outs);
+}
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_phase1.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_phase1.hpp
new file mode 100644
index 0000000000..202b712afd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_phase1.hpp
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_PHASE1_HPP
+#define ACC_PHASE1_HPP
+
+#include <ap_int.h>
+
+#include "acc_common.hpp"
+#include "xlnx_cfg.h"
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "acc_enc_ac_strategy.hpp"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "acc_enc_chroma_from_luma.hpp"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "acc_enc_group.hpp"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+Status acc_phase1(Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const FrameInfo& frame_info,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  AuxOut* aux_out,
+                  ThreadPool* pool);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_phase2.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_phase2.hpp
new file mode 100644
index 0000000000..0d737bf54c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_phase2.hpp
@@ -0,0 +1,96 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_PHASE2_HPP
+#define HLS_PHASE2_HPP
+
+#include "ap_int.h"
+#include "ap_fixed.h"
+#include "hls_math.h"
+
+#include "acc_common.hpp"
+#include "xlnx_cfg.h"
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+#include <string>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_ac_strategy.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_chroma_from_luma.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+Status acc_phase2(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const std::vector<ImageF>* extra_channels,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  ThreadPool* pool,
+                  AuxOut* aux_out);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_phase3.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_phase3.hpp
new file mode 100644
index 0000000000..acea1f77e9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/acc_phase3.hpp
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_PHASE3_HPP
+#define HLS_PHASE3_HPP
+
+#include <ap_int.h>
+
+#include "acc_common.hpp"
+// #include "xlnx_cfg.h"
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_ac_strategy.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_chroma_from_luma.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+Status acc_phase3(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  PassesEncoderState* passes_enc_state,
+                  FrameDimensions frame_dim,
+                  BitWriter* writer,
+                  const size_t num_groups,
+                  AuxOut* aux_out,
+                  ThreadPool* pool,
+                  std::vector<AuxOut>& aux_outs,
+                  const ImageBundle& ib,
+                  const std::function<Status(size_t)>& resize_aux_outs);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/xlnx_cfg.h b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/xlnx_cfg.h
new file mode 100644
index 0000000000..2d28564478
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_lossy_enc_compute/xlnx_cfg.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef XLNX_CFG_H
+#define XLNX_CFG_H
+
+//#define XLNX_DEBUG_DCT
+//#define XLNX_DEBUG_CMAP
+
+//#define XLNX_QC_DEBUG
+//#define XLNX_QC_DEBUG_AC_ESTIMATE_ENTROPY
+//#define XLNX_QC_DEBUG_DCT
+//#define XLNX_QC_DEBUG_DC
+//#define XLNX_QC_DEBUG_ENC_GROUP
+//#define XLNX_QC_DEBUG_ENC_GROUP_DC
+
+#define XLNX_DISABLE_BLK_DICT
+#define XLNX_DISABLE_RECT_DCT
+#define XLNX_DISABLE_ARC
+#define XLNX_DISABLE_2NDCMP
+#define DISABLE_ACC_BIT_WRITER
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_host.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_host.hpp
new file mode 100644
index 0000000000..01fb212447
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_host.hpp
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_HOST_HPP
+#define ACC_HOST_HPP
+
+#include "acc_common.hpp"
+#include "acc_phase1.hpp"
+#include "acc_phase2.hpp"
+#include "acc_phase3.hpp"
+
+namespace jxl {
+
+Status acc_host(std::string xclbinPath,
+                Image3F& opsin,
+                LossyFrameEncoder& lossy_frame_encoder,
+                const ImageBundle* JXL_RESTRICT ib_or_linear,
+                ThreadPool* pool,
+                std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                BitWriter* writer,
+                AuxOut* aux_out,
+                std::unique_ptr<FrameHeader>& frame_header,
+                const FrameInfo& frame_info,
+                CompressParams cparams,
+                const std::vector<ImageF>* extra_channels,
+                PassesEncoderState* passes_enc_state,
+                FrameDimensions frame_dim,
+                const size_t num_groups,
+                const ImageBundle& ib,
+                std::vector<AuxOut>& aux_outs,
+                const std::function<Status(size_t)>& resize_aux_outs);
+}
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_phase1.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_phase1.hpp
new file mode 100644
index 0000000000..202b712afd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_phase1.hpp
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_PHASE1_HPP
+#define ACC_PHASE1_HPP
+
+#include <ap_int.h>
+
+#include "acc_common.hpp"
+#include "xlnx_cfg.h"
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "acc_enc_ac_strategy.hpp"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "acc_enc_chroma_from_luma.hpp"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "acc_enc_group.hpp"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+Status acc_phase1(Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const FrameInfo& frame_info,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  AuxOut* aux_out,
+                  ThreadPool* pool);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_phase2.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_phase2.hpp
new file mode 100644
index 0000000000..4adc5b8932
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_phase2.hpp
@@ -0,0 +1,95 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_PHASE2_HPP
+#define HLS_PHASE2_HPP
+
+#include <ap_int.h>
+
+#include "acc_common.hpp"
+#include "xlnx_cfg.h"
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "acc_enc_ac_strategy.hpp"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "acc_enc_chroma_from_luma.hpp"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "acc_enc_group.hpp"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+#include "lib/jxl/enc_transforms-inl.h"
+
+namespace jxl {
+
+Status acc_phase2(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const std::vector<ImageF>* extra_channels,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  ThreadPool* pool,
+                  AuxOut* aux_out);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_phase3.hpp b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_phase3.hpp
new file mode 100644
index 0000000000..2c87cd9993
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/acc_phase3.hpp
@@ -0,0 +1,97 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_PHASE3_HPP
+#define ACC_PHASE3_HPP
+
+#include <ap_int.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+
+#include "acc_common.hpp"
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "acc_enc_cluster.hpp"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "lib/jxl/enc_huffman.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+#include "acc_enc_ac_strategy.hpp"
+#include "acc_enc_chroma_from_luma.hpp"
+#include "acc_enc_group.hpp"
+#include "xlnx_cfg.h"
+
+namespace jxl {
+
+Status acc_phase3(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  PassesEncoderState* passes_enc_state,
+                  FrameDimensions frame_dim,
+                  BitWriter* writer,
+                  const size_t num_groups,
+                  AuxOut* aux_out,
+                  ThreadPool* pool,
+                  std::vector<AuxOut>& aux_outs,
+                  const ImageBundle& ib,
+                  const std::function<Status(size_t)>& resize_aux_outs);
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/xlnx_cfg.h b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/xlnx_cfg.h
new file mode 100644
index 0000000000..bc8ce19ab6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/include/host_acc_tokInit_histogram/xlnx_cfg.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef XLNX_CFG_H
+#define XLNX_CFG_H
+
+//#define XLNX_DEBUG_DCT
+//#define XLNX_DEBUG_CMAP
+
+//#define XLNX_QC_DEBUG
+//#define XLNX_QC_DEBUG_AC_ESTIMATE_ENTROPY
+//#define XLNX_QC_DEBUG_DCT
+//#define XLNX_QC_DEBUG_DC
+//#define XLNX_QC_DEBUG_ENC_GROUP
+//#define XLNX_QC_DEBUG_ENC_GROUP_DC
+
+#define XLNX_DISABLE_BLK_DICT
+#define XLNX_DISABLE_RECT_DCT
+#define XLNX_DISABLE_ARC
+#define XLNX_DISABLE_2NDCMP
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/src/acc_cluster_histogram.cpp b/codec/L2/demos/jxlEnc/others/src/acc_cluster_histogram.cpp
new file mode 100644
index 0000000000..ead487d886
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/acc_cluster_histogram.cpp
@@ -0,0 +1,201 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_CLUSTER_HISTOGRAM_CPP
+#define ACC_CLUSTER_HISTOGRAM_CPP
+
+#include "acc_cluster_histogram.hpp"
+
+namespace jxl {
+void acc_ANSclusterHistogram(bool is_small_image,
+                             bool do_once[5],
+                             char* do_inner,
+                             char* do_prefix_in,
+
+                             std::vector<HistogramParams>& params,
+
+                             std::vector<std::vector<Histogram> >& histograms_,
+                             std::vector<size_t>& num_contexts,
+                             std::vector<std::vector<uint8_t>*> context_map,
+                             std::vector<std::vector<uint32_t> >& nonempty_histograms,
+                             std::vector<uint32_t>& largest_idx,
+
+                             std::vector<EntropyEncodingData*> codes,
+                             std::vector<std::vector<Histogram> >& clustered_histograms,
+                             std::vector<std::vector<uint32_t> >& histogram_symbols,
+
+                             std::vector<BitWriter*> writer,
+                             std::vector<size_t> layer,
+                             std::vector<std::vector<Histogram> >& clustered_histogramsin,
+                             std::vector<std::vector<std::vector<Token> > >& tokensin,
+                             std::vector<EntropyEncodingData>& codesin,
+                             std::vector<std::vector<uint8_t> >& context_map_in) {
+    constexpr float kMinDistanceForDistinctFast = 64.0f;
+    constexpr float kMinDistanceForDistinctBest = 16.0f;
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+
+        codes[i]->lz77.nonserialized_distance_context = num_contexts[i];
+        codes[i]->lz77.enabled = false;
+        codes[i]->lz77.min_symbol = 224;
+        codes[i]->encoding_info.clear();
+        context_map[i]->resize(histograms_[i].size());
+        clustered_histograms[i] = histograms_[i];
+
+        if (histograms_[i].size() > 1) {
+            size_t max_histograms = std::min(kClustersLimit, params[i].max_histograms);
+            acc_FastClusterHistograms(histograms_[i], nonempty_histograms[i], largest_idx[i],
+                                      nonempty_histograms[i].size(), max_histograms, kMinDistanceForDistinctFast,
+                                      &clustered_histograms[i], &histogram_symbols[i]);
+        }
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+        if (histograms_[i].size() > 1) {
+            // Convert the context map to a canonical form.
+            HistogramReindex(&clustered_histograms[i], &histogram_symbols[i]);
+
+            for (size_t c = 0; c < histograms_[i].size(); ++c) {
+                (*context_map[i])[c] = static_cast<uint8_t>(histogram_symbols[i][c]);
+            }
+        }
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+        size_t histograms_size = histograms_[i].size();
+        if (histograms_size > 1) {
+            if (writer[i] != nullptr) {
+                size_t num_histograms = clustered_histograms[i].size();
+                if (num_histograms == 1) {
+                } else {
+                    for (size_t j = 0; j < (*context_map[i]).size(); j++) {
+                        tokensin[i][0].emplace_back(0, (*context_map[i])[j]);
+                    }
+
+                    size_t entry_bits = CeilLog2Nonzero(num_histograms);
+                    if (entry_bits < 4) {
+                    } else {
+                        do_inner[i] = 1;
+                    }
+                }
+            }
+        }
+
+        if (do_inner[i]) {
+            codesin[i].lz77.nonserialized_distance_context = 1;
+            codesin[i].lz77.enabled = false;
+            codesin[i].lz77.min_symbol = 224;
+
+            bool use_prefix_code = false;
+            do_prefix_in[i] = (char)use_prefix_code;
+
+            std::vector<Histogram> ctxHistograms_(1);
+            HybridUintConfig uint_config; //  Default config for clustering.
+
+            for (size_t j = 0; j < tokensin[i].size(); ++j) {
+                for (size_t k = 0; k < tokensin[i][j].size(); ++k) {
+                    const Token token = tokensin[i][j][k];
+                    uint32_t tok, nbits, bits;
+                    uint_config.Encode(token.value, &tok, &nbits, &bits);
+                    ctxHistograms_[0].Add(tok);
+                    clustered_histogramsin[i] = ctxHistograms_;
+
+                    codesin[i].encoding_info.clear();
+                    context_map_in[i].resize(clustered_histogramsin[i].size());
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+
+        if (i == 0) {
+            if (!is_small_image) {
+                writer[0]->update_part(1);
+            } else {
+                writer[0]->update_part(1);
+            }
+
+        } else if (i == 1) {
+            if (!is_small_image) {
+                writer[1]->update_part(31);
+            } else {
+                writer[1]->update_part(31);
+            }
+        } else if (i == 2) {
+            if (!is_small_image) {
+                writer[2]->update_part(51);
+            } else {
+                writer[2]->update_part(51);
+            }
+        } else if (i == 3) {
+            if (!is_small_image) {
+                writer[3]->update_part(1);
+            } else {
+                writer[3]->update_part(81);
+            }
+        } else if (i == 4) {
+            if (!is_small_image) {
+                writer[4]->update_part(21);
+            } else {
+                writer[4]->update_part(101);
+            }
+        }
+
+        size_t histograms_size = histograms_[i].size();
+
+        const size_t max_contexts = std::min(num_contexts[i], kClustersLimit);
+        BitWriter::Allotment allotment(writer[i], 128 + num_contexts[i] * 40 + max_contexts * 96);
+        if (writer[i]) {
+            JXL_CHECK(Bundle::Write(codes[i]->lz77, writer[i], layer[i], nullptr));
+        }
+
+        if (histograms_size > 1) {
+            size_t num_histograms = clustered_histograms[i].size();
+            if (writer[i] != nullptr) {
+                // printf("%s: %s: %d, Start EncodeContextMap context size=%zu\n\n",
+                // __FILE__, __FUNCTION__, __LINE__, (*context_map).size());
+                if (num_histograms == 1) {
+                    writer[i]->Write(1, 1);
+                    writer[i]->Write(2, 0);
+                } else {
+                    size_t entry_bits = CeilLog2Nonzero(num_histograms);
+                    if (entry_bits < 4) {
+                        writer[i]->Write(1, 1);
+                        writer[i]->Write(2, entry_bits);
+                        for (size_t j = 0; j < (*context_map[i]).size(); j++) {
+                            writer[i]->Write(entry_bits, (*context_map[i])[j]);
+                        }
+                    } else {
+                        writer[i]->Write(1, 0);
+                        writer[i]->Write(1, 0);
+                    }
+                }
+            }
+        }
+        // StoreEntropyCodesNew
+        allotment.FinishedHistogram(writer[i]);
+        ReclaimAndCharge(writer[i], &allotment, layer[i], nullptr);
+
+        if (do_inner[i]) {
+            // do inner ontext map = true
+            BitWriter::Allotment allotment(writer[i], 128 + 1 * 40 + 96);
+            JXL_CHECK(Bundle::Write(codesin[i].lz77, writer[i], 0, nullptr));
+
+            // StoreEntropyCodesNew
+            // WriteToken
+            allotment.FinishedHistogram(writer[i]);
+            ReclaimAndCharge(writer[i], &allotment, 0, nullptr);
+        }
+    }
+}
+
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/src/acc_enc_ac_strategy.cpp b/codec/L2/demos/jxlEnc/others/src/acc_enc_ac_strategy.cpp
new file mode 100644
index 0000000000..6fdd0da827
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/acc_enc_ac_strategy.cpp
@@ -0,0 +1,1197 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "acc_enc_ac_strategy.hpp"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+#include <iomanip>
+#include <iostream>
+
+#include "xlnx_cfg.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "xilinx/src/acc_enc_ac_strategy.cpp"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/fast_math-inl.h"
+
+// Some of the floating point constants in this file and in other
+// files in the libjxl project have been obtained using the
+// tools/optimizer/simplex_fork.py tool. It is a variation of
+// Nelder-Mead optimization, and we generally try to minimize
+// BPP * pnorm aggregate as reported by the benchmark_xl tool,
+// but occasionally the values are optimized by using additional
+// constraints such as maintaining a certain density, or ratio of
+// popularity of integral transforms. Jyrki visually reviews all
+// such changes and often makes manual changes to maintain good
+// visual quality to changes where butteraugli was not sufficiently
+// sensitive to some kind of degradation. Unfortunately image quality
+// is still more of an art than science.
+
+// This must come before the begin/end_target, but HWY_ONCE is only true
+// after that, so use an "include guard".
+#ifndef LIB_JXL_ENC_AC_STRATEGY_
+#define LIB_JXL_ENC_AC_STRATEGY_
+// Parameters of the heuristic are marked with a OPTIMIZE comment.
+namespace jxl {
+
+// Debugging utilities.
+
+// Returns a linear sRGB color (as bytes) for each AC strategy.
+const uint8_t* TypeColor(const uint8_t& raw_strategy) {
+    JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy));
+    static_assert(AcStrategy::kNumValidStrategies == 27, "Change colors");
+    static constexpr uint8_t kColors[][3] = {
+        {0xFF, 0xFF, 0x00}, // DCT8
+        {0xFF, 0x80, 0x80}, // HORNUSS
+        {0xFF, 0x80, 0x80}, // DCT2x2
+        {0xFF, 0x80, 0x80}, // DCT4x4
+        {0x80, 0xFF, 0x00}, // DCT16x16
+        {0x00, 0xC0, 0x00}, // DCT32x32
+        {0xC0, 0xFF, 0x00}, // DCT16x8
+        {0xC0, 0xFF, 0x00}, // DCT8x16
+        {0x00, 0xFF, 0x00}, // DCT32x8
+        {0x00, 0xFF, 0x00}, // DCT8x32
+        {0x00, 0xFF, 0x00}, // DCT32x16
+        {0x00, 0xFF, 0x00}, // DCT16x32
+        {0xFF, 0x80, 0x00}, // DCT4x8
+        {0xFF, 0x80, 0x00}, // DCT8x4
+        {0xFF, 0xFF, 0x80}, // AFV0
+        {0xFF, 0xFF, 0x80}, // AFV1
+        {0xFF, 0xFF, 0x80}, // AFV2
+        {0xFF, 0xFF, 0x80}, // AFV3
+        {0x00, 0xC0, 0xFF}, // DCT64x64
+        {0x00, 0xFF, 0xFF}, // DCT64x32
+        {0x00, 0xFF, 0xFF}, // DCT32x64
+        {0x00, 0x40, 0xFF}, // DCT128x128
+        {0x00, 0x80, 0xFF}, // DCT128x64
+        {0x00, 0x80, 0xFF}, // DCT64x128
+        {0x00, 0x00, 0xC0}, // DCT256x256
+        {0x00, 0x00, 0xFF}, // DCT256x128
+        {0x00, 0x00, 0xFF}, // DCT128x256
+    };
+    return kColors[raw_strategy];
+}
+
+const uint8_t* TypeMask(const uint8_t& raw_strategy) {
+    JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy));
+    static_assert(AcStrategy::kNumValidStrategies == 27, "Add masks");
+    // implicitly, first row and column is made dark
+    static constexpr uint8_t kMask[][64] = {
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+        },                          // DCT8
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 1, 0, 0, 1, 0, 0, //
+            0, 0, 1, 0, 0, 1, 0, 0, //
+            0, 0, 1, 1, 1, 1, 0, 0, //
+            0, 0, 1, 0, 0, 1, 0, 0, //
+            0, 0, 1, 0, 0, 1, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+        },                          // HORNUSS
+        {
+            1, 1, 1, 1, 1, 1, 1, 1, //
+            1, 0, 1, 0, 1, 0, 1, 0, //
+            1, 1, 1, 1, 1, 1, 1, 1, //
+            1, 0, 1, 0, 1, 0, 1, 0, //
+            1, 1, 1, 1, 1, 1, 1, 1, //
+            1, 0, 1, 0, 1, 0, 1, 0, //
+            1, 1, 1, 1, 1, 1, 1, 1, //
+            1, 0, 1, 0, 1, 0, 1, 0, //
+        },                          // 2x2
+        {
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            1, 1, 1, 1, 1, 1, 1, 1, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+        },                          // 4x4
+        {},                         // DCT16x16 (unused)
+        {},                         // DCT32x32 (unused)
+        {},                         // DCT16x8 (unused)
+        {},                         // DCT8x16 (unused)
+        {},                         // DCT32x8 (unused)
+        {},                         // DCT8x32 (unused)
+        {},                         // DCT32x16 (unused)
+        {},                         // DCT16x32 (unused)
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            1, 1, 1, 1, 1, 1, 1, 1, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+        },                          // DCT4x8
+        {
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+            0, 0, 0, 0, 1, 0, 0, 0, //
+        },                          // DCT8x4
+        {
+            1, 1, 1, 1, 1, 0, 0, 0, //
+            1, 1, 1, 1, 0, 0, 0, 0, //
+            1, 1, 1, 0, 0, 0, 0, 0, //
+            1, 1, 0, 0, 0, 0, 0, 0, //
+            1, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+        },                          // AFV0
+        {
+            0, 0, 0, 0, 1, 1, 1, 1, //
+            0, 0, 0, 0, 0, 1, 1, 1, //
+            0, 0, 0, 0, 0, 0, 1, 1, //
+            0, 0, 0, 0, 0, 0, 0, 1, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+        },                          // AFV1
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            1, 0, 0, 0, 0, 0, 0, 0, //
+            1, 1, 0, 0, 0, 0, 0, 0, //
+            1, 1, 1, 0, 0, 0, 0, 0, //
+            1, 1, 1, 1, 0, 0, 0, 0, //
+        },                          // AFV2
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 0, //
+            0, 0, 0, 0, 0, 0, 0, 1, //
+            0, 0, 0, 0, 0, 0, 1, 1, //
+            0, 0, 0, 0, 0, 1, 1, 1, //
+        },                          // AFV3
+    };
+    return kMask[raw_strategy];
+}
+
+void DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize, size_t ysize, const char* tag, AuxOut* aux_out) {
+    Image3F color_acs(xsize, ysize);
+    for (size_t y = 0; y < ysize; y++) {
+        float* JXL_RESTRICT rows[3] = {
+            color_acs.PlaneRow(0, y), color_acs.PlaneRow(1, y), color_acs.PlaneRow(2, y),
+        };
+        const AcStrategyRow acs_row = ac_strategy.ConstRow(y / kBlockDim);
+        for (size_t x = 0; x < xsize; x++) {
+            AcStrategy acs = acs_row[x / kBlockDim];
+            const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy());
+            for (size_t c = 0; c < 3; c++) {
+                rows[c][x] = color[c] / 255.f;
+            }
+        }
+    }
+    size_t stride = color_acs.PixelsPerRow();
+    for (size_t c = 0; c < 3; c++) {
+        for (size_t by = 0; by < DivCeil(ysize, kBlockDim); by++) {
+            float* JXL_RESTRICT row = color_acs.PlaneRow(c, by * kBlockDim);
+            const AcStrategyRow acs_row = ac_strategy.ConstRow(by);
+            for (size_t bx = 0; bx < DivCeil(xsize, kBlockDim); bx++) {
+                AcStrategy acs = acs_row[bx];
+                if (!acs.IsFirstBlock()) continue;
+                const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy());
+                const uint8_t* JXL_RESTRICT mask = TypeMask(acs.RawStrategy());
+                if (acs.covered_blocks_x() == 1 && acs.covered_blocks_y() == 1) {
+                    for (size_t iy = 0; iy < kBlockDim && by * kBlockDim + iy < ysize; iy++) {
+                        for (size_t ix = 0; ix < kBlockDim && bx * kBlockDim + ix < xsize; ix++) {
+                            if (mask[iy * kBlockDim + ix]) {
+                                row[iy * stride + bx * kBlockDim + ix] = color[c] / 800.f;
+                            }
+                        }
+                    }
+                }
+                // draw block edges
+                for (size_t ix = 0; ix < kBlockDim * acs.covered_blocks_x() && bx * kBlockDim + ix < xsize; ix++) {
+                    row[0 * stride + bx * kBlockDim + ix] = color[c] / 350.f;
+                }
+                for (size_t iy = 0; iy < kBlockDim * acs.covered_blocks_y() && by * kBlockDim + iy < ysize; iy++) {
+                    row[iy * stride + bx * kBlockDim + 0] = color[c] / 350.f;
+                }
+            }
+        }
+    }
+    aux_out->DumpImage(tag, color_acs);
+}
+
+} // namespace jxl
+#endif // LIB_JXL_ENC_AC_STRATEGY_
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+bool MultiBlockTransformCrossesHorizontalBoundary(const AcStrategyImage& ac_strategy,
+                                                  size_t start_x,
+                                                  size_t y,
+                                                  size_t end_x) {
+    if (start_x >= ac_strategy.xsize() || y >= ac_strategy.ysize()) {
+        return false;
+    }
+    if (y % 8 == 0) {
+        // Nothing crosses 64x64 boundaries, and the memory on the other side
+        // of the 64x64 block may still uninitialized.
+        return false;
+    }
+    end_x = std::min(end_x, ac_strategy.xsize());
+    // The first multiblock might be before the start_x, let's adjust it
+    // to point to the first IsFirstBlock() == true block we find by backward
+    // tracing.
+    AcStrategyRow row = ac_strategy.ConstRow(y);
+    const size_t start_x_limit = start_x & ~7;
+    while (start_x != start_x_limit && !row[start_x].IsFirstBlock()) {
+        --start_x;
+    }
+    for (size_t x = start_x; x < end_x;) {
+        if (row[x].IsFirstBlock()) {
+            x += row[x].covered_blocks_x();
+        } else {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool MultiBlockTransformCrossesVerticalBoundary(const AcStrategyImage& ac_strategy,
+                                                size_t x,
+                                                size_t start_y,
+                                                size_t end_y) {
+    if (x >= ac_strategy.xsize() || start_y >= ac_strategy.ysize()) {
+        return false;
+    }
+    if (x % 8 == 0) {
+        // Nothing crosses 64x64 boundaries, and the memory on the other side
+        // of the 64x64 block may still uninitialized.
+        return false;
+    }
+    end_y = std::min(end_y, ac_strategy.ysize());
+    // The first multiblock might be before the start_y, let's adjust it
+    // to point to the first IsFirstBlock() == true block we find by backward
+    // tracing.
+    const size_t start_y_limit = start_y & ~7;
+    while (start_y != start_y_limit && !ac_strategy.ConstRow(start_y)[x].IsFirstBlock()) {
+        --start_y;
+    }
+
+    for (size_t y = start_y; y < end_y;) {
+        AcStrategyRow row = ac_strategy.ConstRow(y);
+        if (row[x].IsFirstBlock()) {
+            y += row[x].covered_blocks_y();
+        } else {
+            return true;
+        }
+    }
+    return false;
+}
+
+float EstimateEntropy(const AcStrategy& acs,
+                      size_t x,
+                      size_t y,
+                      const ACSConfig& config,
+                      const float* JXL_RESTRICT cmap_factors,
+                      float* block,
+                      float* scratch_space,
+                      uint32_t* quantized,
+                      size_t xsize,
+                      size_t ysize,
+                      std::vector<std::vector<float> >& dctIDT,
+                      std::vector<std::vector<float> >& dct2x2,
+                      std::vector<std::vector<float> >& dct4x4,
+                      std::vector<std::vector<float> >& dct8x8,
+                      std::vector<std::vector<float> >& dct16x16,
+                      std::vector<std::vector<float> >& dct32x32,
+                      std::vector<std::vector<float> >& dcIDT,
+                      std::vector<std::vector<float> >& dc2x2,
+                      std::vector<std::vector<float> >& dc4x4,
+                      std::vector<std::vector<float> >& dc8x8,
+                      std::vector<std::vector<float> >& dc16x16,
+                      std::vector<std::vector<float> >& dc32x32) {
+    const size_t size = (1 << acs.log2_covered_blocks()) * kDCTBlockSize;
+
+    // Apply transform.
+    for (size_t c = 0; c < 3; c++) {
+        float* JXL_RESTRICT block_c = block + size * c;
+//    TransformFromPixels(acs.Strategy(), &config.Pixel(c, x, y),
+//                        config.src_stride, block_c, scratch_space);
+
+#ifdef XLNX_QC_DEBUG_AC_ESTIMATE_ENTROPY
+        if (acs.RawStrategy() == 4) {
+            std::cout << "========================debug===================== convered blocks: "
+                      << acs.covered_blocks_x() << " tile_xsize: " << tile_xsize << " x: " << x << " y: " << y
+                      << std::endl;
+            for (int i = 0; i < 64; i++) {
+                std::cout << std::setw(15) << block_c[i] << " ";
+            }
+            std::cout << std::endl;
+            for (int i = 0; i < 64; i++) {
+                std::cout << std::setw(15) << dct2x2[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + i] << " ";
+            }
+            std::cout << std::endl;
+            for (int i = 0; i < 64; i++) {
+                if (block_c[i] != dct2x2[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + i]) std::cout << "!!!";
+            }
+            std::cout << std::endl;
+        }
+#endif
+        size_t tile_xsize = (xsize + 63) / 64 * 64;
+        size_t tile_ysize = (ysize + 63) / 64 * 64;
+        for (int i = 0; i < 32 * 32; i++) {
+            if (acs.RawStrategy() == 0) {
+                if (i < 64) block_c[i] = dct8x8[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + i];
+            } else if (acs.RawStrategy() == 1) {
+                if (i < 64) block_c[i] = dctIDT[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + i];
+            } else if (acs.RawStrategy() == 2) {
+                if (i < 64) block_c[i] = dct2x2[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + i];
+            } else if (acs.RawStrategy() == 3) {
+                if (i < 64) block_c[i] = dct4x4[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + i];
+            } else if (acs.RawStrategy() == 4) {
+                if (i < 256) block_c[i] = dct16x16[c][16 * 16 * (y / 16 * (tile_xsize / 16) + x / 16) + i];
+            } else if (acs.RawStrategy() == 5) {
+                block_c[i] = dct32x32[c][32 * 32 * (y / 32 * (tile_xsize / 32) + x / 32) + i];
+            } else {
+                std::cout << "unsupported DCT" << std::endl;
+            }
+        }
+    }
+
+    HWY_FULL(float) df;
+
+    const size_t num_blocks = acs.covered_blocks_x() * acs.covered_blocks_y();
+    float quant_norm8 = 0;
+    float masking = 0;
+    if (num_blocks == 1) {
+        // When it is only one 8x8, we don't need aggregation of values.
+        quant_norm8 = config.Quant(x / 8, y / 8);
+        masking = 2.0f * config.Masking(x / 8, y / 8);
+    } else if (num_blocks == 2) {
+        // Taking max instead of 8th norm seems to work
+        // better for smallest blocks up to 16x8. Jyrki couldn't get
+        // improvements in trying the same for 16x16 blocks.
+        if (acs.covered_blocks_y() == 2) {
+            quant_norm8 = std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8, y / 8 + 1));
+            masking = 2.0f * std::max(config.Masking(x / 8, y / 8), config.Masking(x / 8, y / 8 + 1));
+        } else {
+            quant_norm8 = std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8 + 1, y / 8));
+            masking = 2.0f * std::max(config.Masking(x / 8, y / 8), config.Masking(x / 8 + 1, y / 8));
+        }
+    } else {
+        float masking_norm2 = 0;
+        float masking_max = 0;
+        // Load QF value, calculate empirical heuristic on masking field
+        // for weighting the information loss. Information loss manifests
+        // itself as ringing, and masking could hide it.
+        for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+            for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+                float qval = config.Quant(x / 8 + ix, y / 8 + iy);
+                qval *= qval;
+                qval *= qval;
+                quant_norm8 += qval * qval;
+                float maskval = config.Masking(x / 8 + ix, y / 8 + iy);
+                masking_max = std::max<float>(masking_max, maskval);
+                masking_norm2 += maskval * maskval;
+            }
+        }
+        quant_norm8 /= num_blocks;
+        quant_norm8 = FastPowf(quant_norm8, 1.0f / 8.0f);
+        masking_norm2 = sqrt(masking_norm2 / num_blocks);
+        // This is a highly empirical formula.
+        masking = (masking_norm2 + masking_max);
+    }
+    const auto q = Set(df, quant_norm8);
+
+    // Compute entropy.
+    float entropy = config.base_entropy;
+    auto info_loss = Zero(df);
+    auto info_loss2 = Zero(df);
+
+    for (size_t c = 0; c < 3; c++) {
+        const float* inv_matrix = config.dequant->InvMatrix(acs.RawStrategy(), c);
+        const auto cmap_factor = Set(df, cmap_factors[c]);
+
+        auto entropy_v = Zero(df);
+        auto nzeros_v = Zero(df);
+        auto cost1 = Set(df, config.cost1);
+        auto cost2 = Set(df, config.cost2);
+        auto cost_delta = Set(df, config.cost_delta);
+        for (size_t i = 0; i < num_blocks * kDCTBlockSize; i += Lanes(df)) {
+            const auto in = Load(df, block + c * size + i);
+            const auto in_y = Load(df, block + size + i) * cmap_factor;
+            const auto im = Load(df, inv_matrix + i);
+            const auto val = (in - in_y) * im * q;
+            const auto rval = Round(val);
+            const auto diff = AbsDiff(val, rval);
+            info_loss += diff;
+            info_loss2 += diff * diff;
+            const auto q = Abs(rval);
+            const auto q_is_zero = q == Zero(df);
+            entropy_v += IfThenElseZero(q >= Set(df, 1.5f), cost2);
+            // We used to have q * C here, but that cost model seems to
+            // be punishing large values more than necessary. Sqrt tries
+            // to avoid large values less aggressively. Having high accuracy
+            // around zero is most important at low qualities, and there
+            // we have directly specified costs for 0, 1, and 2.
+            entropy_v += Sqrt(q) * cost_delta;
+            nzeros_v += IfThenZeroElse(q_is_zero, Set(df, 1.0f));
+        }
+        entropy_v += nzeros_v * cost1;
+
+        entropy += GetLane(SumOfLanes(entropy_v));
+        size_t num_nzeros = GetLane(SumOfLanes(nzeros_v));
+        // Add #bit of num_nonzeros, as an estimate of the cost for encoding the
+        // number of non-zeros of the block.
+        size_t nbits = CeilLog2Nonzero(num_nzeros + 1) + 1;
+        // Also add #bit of #bit of num_nonzeros, to estimate the ANS cost, with a
+        // bias.
+        entropy += config.zeros_mul * (CeilLog2Nonzero(nbits + 17) + nbits);
+    }
+    float ret = entropy +
+                masking * ((config.info_loss_multiplier * GetLane(SumOfLanes(info_loss))) +
+                           (config.info_loss_multiplier2 * sqrt(num_blocks * GetLane(SumOfLanes(info_loss2)))));
+    return ret;
+}
+
+uint8_t FindBest8x8Transform(size_t x,
+                             size_t y,
+                             int encoding_speed_tier,
+                             const ACSConfig& config,
+                             const float* JXL_RESTRICT cmap_factors,
+                             AcStrategyImage* JXL_RESTRICT ac_strategy,
+                             float* block,
+                             float* scratch_space,
+                             uint32_t* quantized,
+                             float* entropy_out,
+                             size_t xsize,
+                             size_t ysize,
+                             std::vector<std::vector<float> >& dctIDT,
+                             std::vector<std::vector<float> >& dct2x2,
+                             std::vector<std::vector<float> >& dct4x4,
+                             std::vector<std::vector<float> >& dct8x8,
+                             std::vector<std::vector<float> >& dct16x16,
+                             std::vector<std::vector<float> >& dct32x32,
+                             std::vector<std::vector<float> >& dcIDT,
+                             std::vector<std::vector<float> >& dc2x2,
+                             std::vector<std::vector<float> >& dc4x4,
+                             std::vector<std::vector<float> >& dc8x8,
+                             std::vector<std::vector<float> >& dc16x16,
+                             std::vector<std::vector<float> >& dc32x32) {
+    struct TransformTry8x8 {
+        AcStrategy::Type type;
+        int encoding_speed_tier_max_limit;
+        float entropy_add;
+        float entropy_mul;
+    };
+    static const TransformTry8x8 kTransforms8x8[] = {
+        {
+            AcStrategy::Type::DCT, 9, 3.0f, 0.745f,
+        },
+        {
+            AcStrategy::Type::DCT4X4, 5, 4.0f, 1.0179946967008329f,
+        },
+        {
+            AcStrategy::Type::DCT2X2, 4, 4.0f, 0.76721119707580943f,
+        },
+#ifndef XLNX_DISABLE_RECT_DCT
+        {
+            AcStrategy::Type::DCT4X8, 5, 0.0f, 0.700754622182473063f,
+        },
+        {
+            AcStrategy::Type::DCT8X4, 5, 0.0f, 0.700754622182473063f,
+        },
+#endif
+        {
+            AcStrategy::Type::IDENTITY, 5, 8.0f, 0.81217614513585534f,
+        },
+#ifndef XLNX_DISABLE_RECT_DCT
+        {
+            AcStrategy::Type::AFV0, 4, 3.0f, 0.70086131125719425f,
+        },
+        {
+            AcStrategy::Type::AFV1, 4, 3.0f, 0.70086131125719425f,
+        },
+        {
+            AcStrategy::Type::AFV2, 4, 3.0f, 0.70086131125719425f,
+        },
+        {
+            AcStrategy::Type::AFV3, 4, 3.0f, 0.70086131125719425f,
+        },
+#endif
+    };
+    double best = 1e30;
+    uint8_t best_tx = kTransforms8x8[0].type;
+    for (auto tx : kTransforms8x8) {
+        if (tx.encoding_speed_tier_max_limit < encoding_speed_tier) {
+            continue;
+        }
+        AcStrategy acs = AcStrategy::FromRawStrategy(tx.type);
+        float entropy =
+            EstimateEntropy(acs, x, y, config, cmap_factors, block, scratch_space, quantized, xsize, ysize, dctIDT,
+                            dct2x2, dct4x4, dct8x8, dct16x16, dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+        entropy = tx.entropy_add + tx.entropy_mul * entropy;
+        if (entropy < best) {
+            best_tx = tx.type;
+            best = entropy;
+        }
+    }
+    *entropy_out = best;
+    return best_tx;
+}
+
+// bx, by addresses the 64x64 block at 8x8 subresolution
+// cx, cy addresses the left, upper 8x8 block position of the candidate
+// transform.
+/*void TryMergeAcs(AcStrategy::Type acs_raw, size_t bx, size_t by, size_t cx,
+                 size_t cy, const ACSConfig& config,
+                 const float* JXL_RESTRICT cmap_factors,
+                 AcStrategyImage* JXL_RESTRICT ac_strategy,
+                 const float entropy_mul, const uint8_t candidate_priority,
+                 uint8_t* priority, float* JXL_RESTRICT entropy_estimate,
+                 float* block, float* scratch_space, uint32_t* quantized) {
+  AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw);
+  float entropy_current = 0;
+  for (size_t iy = 0; iy < acs.covered_blocks_y(); ++iy) {
+    for (size_t ix = 0; ix < acs.covered_blocks_x(); ++ix) {
+      if (priority[(cy + iy) * 8 + (cx + ix)] >= candidate_priority) {
+        // Transform would reuse already allocated blocks and
+        // lead to invalid overlaps, for example DCT64X32 vs.
+        // DCT32X64.
+        return;
+      }
+      entropy_current += entropy_estimate[(cy + iy) * 8 + (cx + ix)];
+    }
+  }
+  float entropy_candidate =
+      entropy_mul * EstimateEntropy(acs, (bx + cx) * 8, (by + cy) * 8, config,
+                                    cmap_factors, block, scratch_space,
+                                    quantized);
+  if (entropy_candidate >= entropy_current) return;
+  // Accept the candidate.
+  for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+    for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+      entropy_estimate[(cy + iy) * 8 + cx + ix] = 0;
+      priority[(cy + iy) * 8 + cx + ix] = candidate_priority;
+    }
+  }
+  ac_strategy->Set(bx + cx, by + cy, acs_raw);
+  //  if (acs_raw > 5) printf("try_merge acs: %d\n", acs_raw);
+  entropy_estimate[cy * 8 + cx] = entropy_candidate;
+}*/
+
+static void SetEntropyForTransform(
+    size_t cx, size_t cy, const AcStrategy::Type acs_raw, float entropy, float* JXL_RESTRICT entropy_estimate) {
+    const AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw);
+    for (size_t dy = 0; dy < acs.covered_blocks_y(); ++dy) {
+        for (size_t dx = 0; dx < acs.covered_blocks_x(); ++dx) {
+            entropy_estimate[(cy + dy) * 8 + cx + dx] = 0.0;
+        }
+    }
+    entropy_estimate[cy * 8 + cx] = entropy;
+}
+
+AcStrategy::Type AcsSquare(size_t blocks) {
+    if (blocks == 2) {
+        return AcStrategy::Type::DCT16X16;
+    } else if (blocks == 4) {
+        return AcStrategy::Type::DCT32X32;
+    } else {
+        return AcStrategy::Type::DCT64X64;
+    }
+}
+
+AcStrategy::Type AcsVerticalSplit(size_t blocks) {
+    if (blocks == 2) {
+        return AcStrategy::Type::DCT16X8;
+    } else if (blocks == 4) {
+        return AcStrategy::Type::DCT32X16;
+    } else {
+        return AcStrategy::Type::DCT64X32;
+    }
+}
+
+AcStrategy::Type AcsHorizontalSplit(size_t blocks) {
+    if (blocks == 2) {
+        return AcStrategy::Type::DCT8X16;
+    } else if (blocks == 4) {
+        return AcStrategy::Type::DCT16X32;
+    } else {
+        return AcStrategy::Type::DCT32X64;
+    }
+}
+
+// The following function tries to merge smaller transforms into
+// squares and the rectangles originating from a single middle division
+// (horizontal or vertical) fairly.
+//
+// This is now generalized to concern about squares
+// of blocks X blocks size, where a block is 8x8 pixels.
+void FindBestFirstLevelDivisionForSquare(size_t blocks,
+                                         bool allow_square_transform,
+                                         size_t bx,
+                                         size_t by,
+                                         size_t cx,
+                                         size_t cy,
+                                         const ACSConfig& config,
+                                         const float* JXL_RESTRICT cmap_factors,
+                                         AcStrategyImage* JXL_RESTRICT ac_strategy,
+                                         const float entropy_mul_JXK,
+                                         const float entropy_mul_JXJ,
+                                         float* JXL_RESTRICT entropy_estimate,
+                                         float* block,
+                                         float* scratch_space,
+                                         uint32_t* quantized,
+
+                                         size_t xsize,
+                                         size_t ysize,
+
+                                         std::vector<std::vector<float> >& dctIDT,
+                                         std::vector<std::vector<float> >& dct2x2,
+                                         std::vector<std::vector<float> >& dct4x4,
+                                         std::vector<std::vector<float> >& dct8x8,
+                                         std::vector<std::vector<float> >& dct16x16,
+                                         std::vector<std::vector<float> >& dct32x32,
+
+                                         std::vector<std::vector<float> >& dcIDT,
+                                         std::vector<std::vector<float> >& dc2x2,
+                                         std::vector<std::vector<float> >& dc4x4,
+                                         std::vector<std::vector<float> >& dc8x8,
+                                         std::vector<std::vector<float> >& dc16x16,
+                                         std::vector<std::vector<float> >& dc32x32
+                                         //================================
+
+                                         ) {
+    // We denote J for the larger dimension here, and K for the smaller.
+    // For example, for 32x32 block splitting, J would be 32, K 16.
+    const size_t blocks_half = blocks / 2;
+    const AcStrategy::Type acs_rawJXK = AcsVerticalSplit(blocks);
+    const AcStrategy::Type acs_rawKXJ = AcsHorizontalSplit(blocks);
+    const AcStrategy::Type acs_rawJXJ = AcsSquare(blocks);
+    const AcStrategy acsJXK = AcStrategy::FromRawStrategy(acs_rawJXK);
+    const AcStrategy acsKXJ = AcStrategy::FromRawStrategy(acs_rawKXJ);
+    const AcStrategy acsJXJ = AcStrategy::FromRawStrategy(acs_rawJXJ);
+    AcStrategyRow row0 = ac_strategy->ConstRow(by + cy + 0);
+    AcStrategyRow row1 = ac_strategy->ConstRow(by + cy + blocks_half);
+    // Let's check if we can consider a JXJ block here at all.
+    // This is not necessary in the basic use of hierarchically merging
+    // blocks in the simplest possible way, but is needed when we try other
+    // 'floating' options of merging, possibly after a simple hierarchical
+    // merge has been explored.
+    if (MultiBlockTransformCrossesHorizontalBoundary(*ac_strategy, bx + cx, by + cy, bx + cx + blocks) ||
+        MultiBlockTransformCrossesHorizontalBoundary(*ac_strategy, bx + cx, by + cy + blocks, bx + cx + blocks) ||
+        MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx, by + cy, by + cy + blocks) ||
+        MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx + blocks, by + cy, by + cy + blocks)) {
+        return; // not suitable for JxJ analysis, some transforms leak out.
+    }
+    // For floating transforms there may be
+    // already blocks selected that make either or both JXK and
+    // KXJ not feasible for this location.
+    const bool allow_JXK =
+        !MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx + blocks_half, by + cy, by + cy + blocks);
+    const bool allow_KXJ =
+        !MultiBlockTransformCrossesHorizontalBoundary(*ac_strategy, bx + cx, by + cy + blocks_half, bx + cx + blocks);
+    // Current entropies aggregated on NxN resolution.
+    float entropy[2][2] = {};
+    for (size_t dy = 0; dy < blocks; ++dy) {
+        for (size_t dx = 0; dx < blocks; ++dx) {
+            entropy[dy / blocks_half][dx / blocks_half] += entropy_estimate[(cy + dy) * 8 + (cx + dx)];
+        }
+    }
+    float entropy_JXK_left = std::numeric_limits<float>::max();
+    float entropy_JXK_right = std::numeric_limits<float>::max();
+    float entropy_KXJ_top = std::numeric_limits<float>::max();
+    float entropy_KXJ_bottom = std::numeric_limits<float>::max();
+    float entropy_JXJ = std::numeric_limits<float>::max();
+#ifndef XLNX_DISABLE_RECT_DCT
+    if (allow_JXK) {
+        if (row0[bx + cx + 0].RawStrategy() != acs_rawJXK) {
+            entropy_JXK_left = entropy_mul_JXK * EstimateEntropy(acsJXK, (bx + cx + 0) * 8, (by + cy + 0) * 8, config,
+                                                                 cmap_factors, block, scratch_space, quantized);
+        }
+        if (row0[bx + cx + blocks_half].RawStrategy() != acs_rawJXK) {
+            entropy_JXK_right =
+                entropy_mul_JXK * EstimateEntropy(acsJXK, (bx + cx + blocks_half) * 8, (by + cy + 0) * 8, config,
+                                                  cmap_factors, block, scratch_space, quantized);
+        }
+    }
+    if (allow_KXJ) {
+        if (row0[bx + cx].RawStrategy() != acs_rawKXJ) {
+            entropy_KXJ_top = entropy_mul_JXK * EstimateEntropy(acsKXJ, (bx + cx + 0) * 8, (by + cy + 0) * 8, config,
+                                                                cmap_factors, block, scratch_space, quantized);
+        }
+        if (row1[bx + cx].RawStrategy() != acs_rawKXJ) {
+            entropy_KXJ_bottom =
+                entropy_mul_JXK * EstimateEntropy(acsKXJ, (bx + cx + 0) * 8, (by + cy + blocks_half) * 8, config,
+                                                  cmap_factors, block, scratch_space, quantized);
+        }
+    }
+#endif
+    if (allow_square_transform && acs_rawJXJ != AcStrategy::Type::DCT64X64) {
+        // We control the exploration of the square transform separately so that
+        // we can turn it off at high decoding speeds for 32x32, but still allow
+        // exploring 16x32 and 32x16.
+        entropy_JXJ =
+            entropy_mul_JXJ * EstimateEntropy(acsJXJ, (bx + cx + 0) * 8, (by + cy + 0) * 8, config, cmap_factors, block,
+                                              scratch_space, quantized, xsize, ysize, dctIDT, dct2x2, dct4x4, dct8x8,
+                                              dct16x16, dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+    }
+
+// Test if this block should have JXK or KXJ transforms,
+// because it can have only one or the other.
+#ifndef XLNX_DISABLE_RECT_DCT
+    float costJxN = std::min(entropy_JXK_left, entropy[0][0] + entropy[1][0]) +
+                    std::min(entropy_JXK_right, entropy[0][1] + entropy[1][1]);
+    float costNxJ = std::min(entropy_KXJ_top, entropy[0][0] + entropy[0][1]) +
+                    std::min(entropy_KXJ_bottom, entropy[1][0] + entropy[1][1]);
+    if (entropy_JXJ < costJxN && entropy_JXJ < costNxJ) {
+#else
+    if (entropy_JXJ < entropy[0][0] + entropy[1][0] + entropy[0][1] + entropy[1][1]) {
+#endif
+        ac_strategy->Set(bx + cx, by + cy, acs_rawJXJ);
+        SetEntropyForTransform(cx, cy, acs_rawJXJ, entropy_JXJ, entropy_estimate);
+    }
+#ifndef XLNX_DISABLE_RECT_DCT
+    else if (costJxN < costNxJ) {
+        if (entropy_JXK_left < entropy[0][0] + entropy[1][0]) {
+            ac_strategy->Set(bx + cx, by + cy, acs_rawJXK);
+            SetEntropyForTransform(cx, cy, acs_rawJXK, entropy_JXK_left, entropy_estimate);
+        }
+        if (entropy_JXK_right < entropy[0][1] + entropy[1][1]) {
+            ac_strategy->Set(bx + cx + blocks_half, by + cy, acs_rawJXK);
+            SetEntropyForTransform(cx + blocks_half, cy, acs_rawJXK, entropy_JXK_right, entropy_estimate);
+        }
+    } else {
+        if (entropy_KXJ_top < entropy[0][0] + entropy[0][1]) {
+            ac_strategy->Set(bx + cx, by + cy, acs_rawKXJ);
+            SetEntropyForTransform(cx, cy, acs_rawKXJ, entropy_KXJ_top, entropy_estimate);
+        }
+        if (entropy_KXJ_bottom < entropy[1][0] + entropy[1][1]) {
+            ac_strategy->Set(bx + cx, by + cy + blocks_half, acs_rawKXJ);
+            SetEntropyForTransform(cx, cy + blocks_half, acs_rawKXJ, entropy_KXJ_bottom, entropy_estimate);
+        }
+    }
+#endif
+}
+
+void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
+                    const ACSConfig& config,
+                    const Rect& rect,
+                    size_t xsize,
+                    size_t ysize,
+                    std::vector<std::vector<float> >& dctIDT,
+                    std::vector<std::vector<float> >& dct2x2,
+                    std::vector<std::vector<float> >& dct4x4,
+                    std::vector<std::vector<float> >& dct8x8,
+                    std::vector<std::vector<float> >& dct16x16,
+                    std::vector<std::vector<float> >& dct32x32,
+
+                    std::vector<std::vector<float> >& dcIDT,
+                    std::vector<std::vector<float> >& dc2x2,
+                    std::vector<std::vector<float> >& dc4x4,
+                    std::vector<std::vector<float> >& dc8x8,
+                    std::vector<std::vector<float> >& dc16x16,
+                    std::vector<std::vector<float> >& dc32x32
+                    //================================
+                    ) {
+    // Main philosophy here:
+    // 1. First find best 8x8 transform for each area.
+    // 2. Merging them into larger transforms where possibly, but
+    // starting from the smallest transforms (16x8 and 8x16).
+    // Additional complication: 16x8 and 8x16 are considered
+    // simultanouesly and fairly against each other.
+    // We are looking at 64x64 squares since the YtoX and YtoB
+    // maps happen to be at that resolution, and having
+    // integral transforms cross these boundaries leads to
+    // additional complications.
+    const CompressParams& cparams = enc_state->cparams;
+    const float butteraugli_target = cparams.butteraugli_distance;
+    AcStrategyImage* ac_strategy = &enc_state->shared.ac_strategy;
+    // TODO(veluca): reuse allocations
+    auto mem = hwy::AllocateAligned<float>(5 * AcStrategy::kMaxCoeffArea);
+    auto qmem = hwy::AllocateAligned<uint32_t>(AcStrategy::kMaxCoeffArea);
+    uint32_t* JXL_RESTRICT quantized = qmem.get();
+    float* JXL_RESTRICT block = mem.get();
+    float* JXL_RESTRICT scratch_space = mem.get() + 3 * AcStrategy::kMaxCoeffArea;
+    size_t bx = rect.x0();
+    size_t by = rect.y0();
+    JXL_ASSERT(rect.xsize() <= 8);
+    JXL_ASSERT(rect.ysize() <= 8);
+    size_t tx = bx / kColorTileDimInBlocks;
+    size_t ty = by / kColorTileDimInBlocks;
+    const float cmap_factors[3] = {
+        enc_state->shared.cmap.YtoXRatio(enc_state->shared.cmap.ytox_map.ConstRow(ty)[tx]), 0.0f,
+        enc_state->shared.cmap.YtoBRatio(enc_state->shared.cmap.ytob_map.ConstRow(ty)[tx]),
+    };
+    if (cparams.speed_tier > SpeedTier::kHare) return;
+    // First compute the best 8x8 transform for each square. Later, we do not
+    // experiment with different combinations, but only use the best of the 8x8s
+    // when DCT8X8 is specified in the tree search.
+    // 8x8 transforms have 10 variants, but every larger transform is just a DCT.
+    float entropy_estimate[64] = {};
+    // Favor all 8x8 transforms (against 16x8 and larger transforms)) at
+    // low butteraugli_target distances.
+    static const float k8x8mul1 = -0.55;
+    static const float k8x8mul2 = 1.0735757687292623f;
+    static const float k8x8base = 1.4;
+    const float mul8x8 = k8x8mul2 + k8x8mul1 / (butteraugli_target + k8x8base);
+    for (size_t iy = 0; iy < rect.ysize(); iy++) {
+        for (size_t ix = 0; ix < rect.xsize(); ix++) {
+            float entropy = 0.0;
+            const uint8_t best_of_8x8s = FindBest8x8Transform(
+                8 * (bx + ix), 8 * (by + iy), static_cast<int>(cparams.speed_tier), config, cmap_factors, ac_strategy,
+                block, scratch_space, quantized, &entropy, xsize, ysize, dctIDT, dct2x2, dct4x4, dct8x8, dct16x16,
+                dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+            ac_strategy->Set(bx + ix, by + iy, static_cast<AcStrategy::Type>(best_of_8x8s));
+            //      if (static_cast<AcStrategy::Type>(best_of_8x8s) > 5) {
+            //        printf("after find best8x8 acs: %d\n",
+            //               static_cast<AcStrategy::Type>(best_of_8x8s));
+            //      }
+            entropy_estimate[iy * 8 + ix] = entropy * mul8x8;
+        }
+    }
+    // Merge when a larger transform is better than the previously
+    // searched best combination of 8x8 transforms.
+    struct MergeTry {
+        AcStrategy::Type type;
+        uint8_t priority;
+        uint8_t decoding_speed_tier_max_limit;
+        uint8_t encoding_speed_tier_max_limit;
+        float entropy_mul;
+    };
+    static const float k8X16mul1 = -0.55;
+    static const float k8X16mul2 = 0.9019587899705066;
+    static const float k8X16base = 1.6;
+    const float entropy_mul16X8 = k8X16mul2 + k8X16mul1 / (butteraugli_target + k8X16base);
+    //  const float entropy_mul16X8 = mul8X16 * 0.91195782912371126f;
+
+    static const float k16X16mul1 = -0.35;
+    static const float k16X16mul2 = 0.82098067020252011;
+    static const float k16X16base = 2.0;
+    const float entropy_mul16X16 = k16X16mul2 + k16X16mul1 / (butteraugli_target + k16X16base);
+    //  const float entropy_mul16X16 = mul16X16 * 0.83183417727960129f;
+
+    static const float k32X16mul1 = -0.1;
+    static const float k32X16mul2 = 0.86098067020252011;
+    static const float k32X16base = 2.5;
+    const float entropy_mul16X32 = k32X16mul2 + k32X16mul1 / (butteraugli_target + k32X16base);
+
+    const float entropy_mul32X32 = 0.9188333021616017f;
+    const float entropy_mul64X64 = 1.50f;
+    // TODO(jyrki): Consider this feedback in further changes:
+    // Also effectively when the multipliers for smaller blocks are
+    // below 1, this raises the bar for the bigger blocks even higher
+    // in that sense these constants are not independent (e.g. changing
+    // the constant for DCT16x32 by -5% (making it more likely) also
+    // means that DCT32x32 becomes harder to do when starting from
+    // two DCT16x32s). It might be better to make them more independent,
+    // e.g. by not applying the multiplier when storing the new entropy
+    // estimates in TryMergeToACSCandidate().
+    const MergeTry kTransformsForMerge[9] = {
+        {AcStrategy::Type::DCT16X8, 2, 4, 5, entropy_mul16X8},
+        {AcStrategy::Type::DCT8X16, 2, 4, 5, entropy_mul16X8},
+        // FindBestFirstLevelDivisionForSquare looks for DCT16X16 and its
+        // subdivisions. {AcStrategy::Type::DCT16X16, 3, entropy_mul16X16},
+        {AcStrategy::Type::DCT16X32, 4, 4, 4, entropy_mul16X32},
+        {AcStrategy::Type::DCT32X16, 4, 4, 4, entropy_mul16X32},
+        // FindBestFirstLevelDivisionForSquare looks for DCT32X32 and its
+        // subdivisions. {AcStrategy::Type::DCT32X32, 5, 1, 5,
+        // 0.9822994906548809f},
+        // TODO(jyrki): re-enable 64x32 and 64x64 if/when possible.
+        {AcStrategy::Type::DCT64X32, 6, 1, 3, 1.27f},
+        {AcStrategy::Type::DCT32X64, 6, 1, 3, 1.27f},
+        // {AcStrategy::Type::DCT64X64, 8, 1, 3, 2.0846542128012948f},
+    };
+    /*
+    These sizes not yet included in merge heuristic:
+    set(AcStrategy::Type::DCT32X8, 0.0f, 2.261390410971102f);
+    set(AcStrategy::Type::DCT8X32, 0.0f, 2.261390410971102f);
+    set(AcStrategy::Type::DCT128X128, 0.0f, 1.0f);
+    set(AcStrategy::Type::DCT128X64, 0.0f, 0.73f);
+    set(AcStrategy::Type::DCT64X128, 0.0f, 0.73f);
+    set(AcStrategy::Type::DCT256X256, 0.0f, 1.0f);
+    set(AcStrategy::Type::DCT256X128, 0.0f, 0.73f);
+    set(AcStrategy::Type::DCT128X256, 0.0f, 0.73f);
+    */
+
+    // Priority is a tricky kludge to avoid collisions so that transforms
+    // don't overlap.
+    uint8_t priority[64] = {};
+    for (auto tx : kTransformsForMerge) {
+        if (tx.decoding_speed_tier_max_limit < cparams.decoding_speed_tier) {
+            continue;
+        }
+        AcStrategy acs = AcStrategy::FromRawStrategy(tx.type);
+        for (size_t cy = 0; cy + acs.covered_blocks_y() - 1 < rect.ysize(); cy += acs.covered_blocks_y()) {
+            for (size_t cx = 0; cx + acs.covered_blocks_x() - 1 < rect.xsize(); cx += acs.covered_blocks_x()) {
+                if (cy + 7 < rect.ysize() && cx + 7 < rect.xsize()) {
+                    if (cparams.decoding_speed_tier < 4 && tx.type == AcStrategy::Type::DCT32X64) {
+                        // We handle both DCT8X16 and DCT16X8 at the same time.
+                        if ((cy | cx) % 8 == 0) {
+                            FindBestFirstLevelDivisionForSquare(8, true, bx, by, cx, cy, config, cmap_factors,
+                                                                ac_strategy, tx.entropy_mul, entropy_mul64X64,
+                                                                entropy_estimate, block, scratch_space, quantized,
+                                                                xsize, ysize, dctIDT, dct2x2, dct4x4, dct8x8, dct16x16,
+                                                                dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+                        }
+                        continue;
+                    } else if (tx.type == AcStrategy::Type::DCT32X16) {
+                        // We handled both DCT8X16 and DCT16X8 at the same time,
+                        // and that is above. The last column and last row,
+                        // when the last column or last row is odd numbered,
+                        // are still handled by TryMergeAcs.
+                        continue;
+                    }
+                }
+                if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) ||
+                    (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) {
+                    // already covered by FindBest32X32
+                    continue;
+                }
+
+                if (cy + 3 < rect.ysize() && cx + 3 < rect.xsize()) {
+                    if (tx.type == AcStrategy::Type::DCT16X32) {
+                        // We handle both DCT8X16 and DCT16X8 at the same time.
+                        bool enable_32x32 = cparams.decoding_speed_tier < 4;
+                        if ((cy | cx) % 4 == 0) {
+                            FindBestFirstLevelDivisionForSquare(4, enable_32x32, bx, by, cx, cy, config, cmap_factors,
+                                                                ac_strategy, tx.entropy_mul, entropy_mul32X32,
+                                                                entropy_estimate, block, scratch_space, quantized,
+                                                                xsize, ysize, dctIDT, dct2x2, dct4x4, dct8x8, dct16x16,
+                                                                dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+                        }
+                        continue;
+                    } else if (tx.type == AcStrategy::Type::DCT32X16) {
+                        // We handled both DCT8X16 and DCT16X8 at the same time,
+                        // and that is above. The last column and last row,
+                        // when the last column or last row is odd numbered,
+                        // are still handled by TryMergeAcs.
+                        continue;
+                    }
+                }
+                if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) ||
+                    (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) {
+                    // already covered by FindBest32X32
+                    continue;
+                }
+                if (cy + 1 < rect.ysize() && cx + 1 < rect.xsize()) {
+                    if (tx.type == AcStrategy::Type::DCT8X16) {
+                        // We handle both DCT8X16 and DCT16X8 at the same time.
+                        if ((cy | cx) % 2 == 0) {
+                            FindBestFirstLevelDivisionForSquare(2, true, bx, by, cx, cy, config, cmap_factors,
+                                                                ac_strategy, tx.entropy_mul, entropy_mul16X16,
+                                                                entropy_estimate, block, scratch_space, quantized,
+                                                                xsize, ysize, dctIDT, dct2x2, dct4x4, dct8x8, dct16x16,
+                                                                dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+                        }
+                        continue;
+                    } else if (tx.type == AcStrategy::Type::DCT16X8) {
+                        // We handled both DCT8X16 and DCT16X8 at the same time,
+                        // and that is above. The last column and last row,
+                        // when the last column or last row is odd numbered,
+                        // are still handled by TryMergeAcs.
+                        continue;
+                    }
+                }
+                if ((tx.type == AcStrategy::Type::DCT8X16 && cy % 2 == 1) ||
+                    (tx.type == AcStrategy::Type::DCT16X8 && cx % 2 == 1)) {
+                    // already covered by FindBestFirstLevelDivisionForSquare
+                    continue;
+                }
+// All other merge sizes are handled here.
+// Some of the DCT16X8s and DCT8X16s will still leak through here
+// when there is an odd number of 8x8 blocks, then the last row
+// and column will get their DCT16X8s and DCT8X16s through the
+// normal integral transform merging process.
+
+#ifndef XLNX_DISABLE_RECT_DCT
+                TryMergeAcs(tx.type, bx, by, cx, cy, config, cmap_factors, ac_strategy, tx.entropy_mul, tx.priority,
+                            &priority[0], entropy_estimate, block, scratch_space, quantized);
+#endif
+            }
+        }
+    }
+    // Here we still try to do some non-aligned matching, find a few more
+    // 16X8, 8X16 and 16X16s between the non-2-aligned blocks.
+    if (cparams.speed_tier >= SpeedTier::kHare) {
+        return;
+    }
+    /*  std::cout<<std::endl<<std::endl<<"=============final
+      find================"<<std::endl<<std::endl; for (int ii = 0; ii < 3; ++ii)
+      { for (size_t cy = 1 - (ii == 1); cy + 1 < rect.ysize(); cy += 2) { for
+      (size_t cx = 1 - (ii == 2); cx + 1 < rect.xsize(); cx += 2) {
+            FindBestFirstLevelDivisionForSquare(
+                2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+                entropy_mul16X8, entropy_mul16X16, entropy_estimate, block,
+                scratch_space, quantized, dct8x8, dct16x16, dct32x32, dc8x8_y,
+                dc8x8_x, dc8x8_b, dc16x16_y, dc16x16_x, dc16x16_b, dc32x32_y,
+                dc32x32_x, dc32x32_b);
+          }
+        }
+      }*/
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ProcessRectACS);
+
+void AcStrategyHeuristics::Init(const Image3F& src, PassesEncoderState* enc_state) {
+    this->enc_state = enc_state;
+    config.dequant = &enc_state->shared.matrices;
+    const CompressParams& cparams = enc_state->cparams;
+    const float butteraugli_target = cparams.butteraugli_distance;
+
+    // Image row pointers and strides.
+    config.quant_field_row = enc_state->initial_quant_field.Row(0);
+    config.quant_field_stride = enc_state->initial_quant_field.PixelsPerRow();
+    auto& mask = enc_state->initial_quant_masking;
+    if (mask.xsize() > 0 && mask.ysize() > 0) {
+        config.masking_field_row = mask.Row(0);
+        config.masking_field_stride = mask.PixelsPerRow();
+    }
+
+    config.src_rows[0] = src.ConstPlaneRow(0, 0);
+    config.src_rows[1] = src.ConstPlaneRow(1, 0);
+    config.src_rows[2] = src.ConstPlaneRow(2, 0);
+    config.src_stride = src.PixelsPerRow();
+
+    // Entropy estimate is composed of two factors:
+    //  - estimate of the number of bits that will be used by the block
+    //  - information loss due to quantization
+    // The following constant controls the relative weights of these components.
+    config.info_loss_multiplier = 138.0f;
+    config.info_loss_multiplier2 = 50.46839691767866;
+    // TODO(jyrki): explore base_entropy setting more.
+    // A small value (0?) works better at high distance, while a larger value
+    // may be more effective at low distance/high bpp.
+    config.base_entropy = 0.0;
+    config.zeros_mul = 7.565053364251793f;
+    // Lots of +1 and -1 coefficients at high quality, it is
+    // beneficial to favor them. At low qualities zeros matter more
+    // and +1 / -1 coefficients are already quite harmful.
+    float slope = std::min<float>(1.0f, butteraugli_target * (1.0f / 3));
+    config.cost1 = 1 + slope * 8.8703248061477744f;
+    config.cost2 = 4.4628149885273363f;
+    config.cost_delta = 5.3359184934516337f;
+    JXL_ASSERT(enc_state->shared.ac_strategy.xsize() == enc_state->shared.frame_dim.xsize_blocks);
+    JXL_ASSERT(enc_state->shared.ac_strategy.ysize() == enc_state->shared.frame_dim.ysize_blocks);
+}
+
+void AcStrategyHeuristics::ProcessRect(const Rect& rect,
+
+                                       size_t xsize,
+                                       size_t ysize,
+                                       std::vector<std::vector<float> >& dctIDT,
+                                       std::vector<std::vector<float> >& dct2x2,
+                                       std::vector<std::vector<float> >& dct4x4,
+                                       std::vector<std::vector<float> >& dct8x8,
+                                       std::vector<std::vector<float> >& dct16x16,
+                                       std::vector<std::vector<float> >& dct32x32,
+
+                                       std::vector<std::vector<float> >& dcIDT,
+                                       std::vector<std::vector<float> >& dc2x2,
+                                       std::vector<std::vector<float> >& dc4x4,
+                                       std::vector<std::vector<float> >& dc8x8,
+                                       std::vector<std::vector<float> >& dc16x16,
+                                       std::vector<std::vector<float> >& dc32x32) {
+    PROFILER_FUNC;
+    const CompressParams& cparams = enc_state->cparams;
+    // In Falcon mode, use DCT8 everywhere and uniform quantization.
+    if (cparams.speed_tier >= SpeedTier::kCheetah) {
+        enc_state->shared.ac_strategy.FillDCT8(rect);
+        return;
+    }
+    HWY_DYNAMIC_DISPATCH(ProcessRectACS)
+    (enc_state, config, rect, xsize, ysize, dctIDT, dct2x2, dct4x4, dct8x8, dct16x16, dct32x32, dcIDT, dc2x2, dc4x4,
+     dc8x8, dc16x16, dc32x32);
+}
+
+void AcStrategyHeuristics::Finalize(AuxOut* aux_out) {
+    const auto& ac_strategy = enc_state->shared.ac_strategy;
+    // Accounting and debug output.
+    if (aux_out != nullptr) {
+        aux_out->num_dct2_blocks = 32 * (ac_strategy.CountBlocks(AcStrategy::Type::DCT32X64) +
+                                         ac_strategy.CountBlocks(AcStrategy::Type::DCT64X32));
+        aux_out->num_dct4_blocks = 64 * ac_strategy.CountBlocks(AcStrategy::Type::DCT64X64);
+        aux_out->num_dct4x8_blocks =
+            ac_strategy.CountBlocks(AcStrategy::Type::DCT4X8) + ac_strategy.CountBlocks(AcStrategy::Type::DCT8X4);
+        aux_out->num_afv_blocks =
+            ac_strategy.CountBlocks(AcStrategy::Type::AFV0) + ac_strategy.CountBlocks(AcStrategy::Type::AFV1) +
+            ac_strategy.CountBlocks(AcStrategy::Type::AFV2) + ac_strategy.CountBlocks(AcStrategy::Type::AFV3);
+        aux_out->num_dct8_blocks = ac_strategy.CountBlocks(AcStrategy::Type::DCT);
+        aux_out->num_dct8x16_blocks =
+            ac_strategy.CountBlocks(AcStrategy::Type::DCT8X16) + ac_strategy.CountBlocks(AcStrategy::Type::DCT16X8);
+        aux_out->num_dct8x32_blocks =
+            ac_strategy.CountBlocks(AcStrategy::Type::DCT8X32) + ac_strategy.CountBlocks(AcStrategy::Type::DCT32X8);
+        aux_out->num_dct16_blocks = ac_strategy.CountBlocks(AcStrategy::Type::DCT16X16);
+        aux_out->num_dct16x32_blocks =
+            ac_strategy.CountBlocks(AcStrategy::Type::DCT16X32) + ac_strategy.CountBlocks(AcStrategy::Type::DCT32X16);
+        aux_out->num_dct32_blocks = ac_strategy.CountBlocks(AcStrategy::Type::DCT32X32);
+#ifdef XLNX_DEBUG_DCT
+        printf("=====================================\n");
+        printf("DCT info: \n");
+        printf("afv_blocks: %ld\n", aux_out->num_afv_blocks);
+        printf("dct2: %ld\n", aux_out->num_dct2_blocks);
+        printf("dct4: %ld\n", aux_out->num_dct4_blocks);
+        printf("dct8: %ld\n", aux_out->num_dct8_blocks);
+        printf("dct16: %ld\n", aux_out->num_dct16_blocks);
+        printf("dct32: %ld\n", aux_out->num_dct32_blocks);
+        printf("dct64: %ld\n\n", ac_strategy.CountBlocks(AcStrategy::Type::DCT64X64));
+        printf("dct4x8: %ld\n", aux_out->num_dct4x8_blocks);
+        printf("dct8x16: %ld\n", aux_out->num_dct8x16_blocks);
+        printf("dct8x32: %ld\n", aux_out->num_dct8x32_blocks);
+        printf("dct16x32: %ld\n\n", aux_out->num_dct16x32_blocks);
+#endif
+    }
+
+    if (WantDebugOutput(aux_out)) {
+        DumpAcStrategy(ac_strategy, enc_state->shared.frame_dim.xsize, enc_state->shared.frame_dim.ysize, "ac_strategy",
+                       aux_out);
+    }
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/others/src/acc_enc_adaptive_quantization.cpp b/codec/L2/demos/jxlEnc/others/src/acc_enc_adaptive_quantization.cpp
new file mode 100644
index 0000000000..f1d2dd060c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/acc_enc_adaptive_quantization.cpp
@@ -0,0 +1,1009 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_adaptive_quantization.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <string>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "xilinx/src/acc_enc_adaptive_quantization.cpp"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_group.h"
+#include "lib/jxl/dec_reconstruct.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "acc_enc_group.hpp"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+
+// The following functions modulate an exponent (out_val) and return the updated
+// value. Their descriptor is limited to 8 lanes for 8x8 blocks.
+
+// Hack for mask estimation. Eventually replace this code with butteraugli's
+// masking.
+float ComputeMaskForAcStrategyUse(const float out_val) {
+    const float kMul = 1.0f;
+    const float kOffset = 0.4f;
+    return kMul / (out_val + kOffset);
+}
+
+template <class D, class V>
+V ComputeMask(const D d, const V out_val) {
+    const auto kBase = Set(d, -0.74174993f);
+    const auto kMul4 = Set(d, 3.2353257320940401f);
+    const auto kMul2 = Set(d, 12.906028311180409f);
+    const auto kOffset2 = Set(d, 305.04035728311436f);
+    const auto kMul3 = Set(d, 5.0220313103171232f);
+    const auto kOffset3 = Set(d, 2.1925739705298404f);
+    const auto kOffset4 = Set(d, 0.25f) * kOffset3;
+    const auto kMul0 = Set(d, 0.74760422233706747f);
+    const auto k1 = Set(d, 1.0f);
+
+    // Avoid division by zero.
+    const auto v1 = Max(out_val * kMul0, Set(d, 1e-3f));
+    const auto v2 = k1 / (v1 + kOffset2);
+    const auto v3 = k1 / MulAdd(v1, v1, kOffset3);
+    const auto v4 = k1 / MulAdd(v1, v1, kOffset4);
+    // TODO(jyrki):
+    // A log or two here could make sense. In butteraugli we have effectively
+    // log(log(x + C)) for this kind of use, as a single log is used in
+    // saturating visual masking and here the modulation values are exponential,
+    // another log would counter that.
+    return kBase + MulAdd(kMul4, v4, MulAdd(kMul2, v2, kMul3 * v3));
+}
+
+// For converting full vectors to a subset. Assumes `vfull` lanes are identical.
+template <class D, class VFull>
+Vec<D> CapTo(const D d, VFull vfull) {
+    using T = typename D::T;
+    const HWY_FULL(T) dfull;
+    HWY_ALIGN T lanes[MaxLanes(dfull)];
+    Store(vfull, dfull, lanes);
+    return Load(d, lanes);
+}
+
+// mul and mul2 represent a scaling difference between jxl and butteraugli.
+static const float kSGmul = 226.0480446705883f;
+static const float kSGmul2 = 1.0f / 73.377132366608819f;
+static const float kLog2 = 0.693147181f;
+// Includes correction factor for std::log -> log2.
+static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2;
+static const float kSGVOffset = 7.14672470003f;
+
+template <bool invert, typename D, typename V>
+V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) {
+    // The opsin space in jxl is the cubic root of photons, i.e., v * v * v
+    // is related to the number of photons.
+    //
+    // SimpleGamma(v * v * v) is the psychovisual space in butteraugli.
+    // This ratio allows quantization to move from jxl's opsin space to
+    // butteraugli's log-gamma space.
+    v = ZeroIfNegative(v);
+    const auto kNumMul = Set(d, kSGRetMul * 3 * kSGmul);
+    const auto kVOffset = Set(d, kSGVOffset * kLog2);
+    const auto kDenMul = Set(d, kLog2 * kSGmul);
+
+    const auto v2 = v * v;
+
+    const auto num = kNumMul * v2;
+    const auto den = MulAdd(kDenMul * v, v2, kVOffset);
+    return invert ? num / den : den / num;
+}
+
+template <bool invert = false>
+static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) {
+    using DScalar = HWY_CAPPED(float, 1);
+    auto vscalar = Load(DScalar(), &v);
+    return GetLane(RatioOfDerivativesOfCubicRootToSimpleGamma<invert>(DScalar(), vscalar));
+}
+
+// TODO(veluca): this function computes an approximation of the derivative of
+// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or
+// exact derivatives. For reference, SimpleGamma was:
+/*
+template <typename D, typename V>
+V SimpleGamma(const D d, V v) {
+  // A simple HDR compatible gamma function.
+  const auto mul = Set(d, kSGmul);
+  const auto kRetMul = Set(d, kSGRetMul);
+  const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f);
+  const auto kVOffset = Set(d, kSGVOffset);
+
+  v *= mul;
+
+  // This should happen rarely, but may lead to a NaN, which is rather
+  // undesirable. Since negative photons don't exist we solve the NaNs by
+  // clamping here.
+  // TODO(veluca): with FastLog2f, this no longer leads to NaNs.
+  v = ZeroIfNegative(v);
+  return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd;
+}
+*/
+
+template <class D, class V>
+V GammaModulation(
+    const D d, const size_t x, const size_t y, const ImageF& xyb_x, const ImageF& xyb_y, const V out_val) {
+    const float kBias = 0.16f;
+    JXL_DASSERT(kBias > kOpsinAbsorbanceBias[0]);
+    JXL_DASSERT(kBias > kOpsinAbsorbanceBias[1]);
+    JXL_DASSERT(kBias > kOpsinAbsorbanceBias[2]);
+    auto overall_ratio = Zero(d);
+    auto bias = Set(d, kBias);
+    auto half = Set(d, 0.5f);
+    for (size_t dy = 0; dy < 8; ++dy) {
+        const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy);
+        const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy);
+        for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+            const auto iny = Load(d, row_in_y + x + dx) + bias;
+            const auto inx = Load(d, row_in_x + x + dx);
+            const auto r = iny - inx;
+            const auto g = iny + inx;
+            const auto ratio_r = RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, r);
+            const auto ratio_g = RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, g);
+            const auto avg_ratio = half * (ratio_r + ratio_g);
+
+            overall_ratio += avg_ratio;
+        }
+    }
+    overall_ratio = SumOfLanes(overall_ratio);
+    overall_ratio *= Set(d, 1.0f / 64);
+    // ideally -1.0, but likely optimal correction adds some entropy, so slightly
+    // less than that.
+    // ln(2) constant folded in because we want std::log but have FastLog2f.
+    const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f);
+    return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val);
+}
+
+// Change precision in 8x8 blocks that have high frequency content.
+template <class D, class V>
+V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb, const V out_val) {
+    // Zero out the invalid differences for the rightmost value per row.
+    const Rebind<uint32_t, D> du;
+    HWY_ALIGN constexpr uint32_t kMaskRight[kBlockDim] = {~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, 0};
+
+    auto sum = Zero(d); // sum of absolute differences with right and below
+
+    for (size_t dy = 0; dy < 8; ++dy) {
+        const float* JXL_RESTRICT row_in = xyb.Row(y + dy) + x;
+        const float* JXL_RESTRICT row_in_next = dy == 7 ? row_in : xyb.Row(y + dy + 1) + x;
+
+// In SCALAR, there is no guarantee of having extra row padding.
+// Hence, we need to ensure we don't access pixels outside the row itself.
+// In SIMD modes, however, rows are padded, so it's safe to access one
+// garbage value after the row. The vector then gets masked with kMaskRight
+// to remove the influence of that value.
+#if HWY_TARGET != HWY_SCALAR
+        for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+#else
+        for (size_t dx = 0; dx < 7; dx += Lanes(d)) {
+#endif
+            const auto p = Load(d, row_in + dx);
+            const auto pr = LoadU(d, row_in + dx + 1);
+            const auto mask = BitCast(d, Load(du, kMaskRight + dx));
+            sum += And(mask, AbsDiff(p, pr));
+
+            const auto pd = Load(d, row_in_next + dx);
+            sum += AbsDiff(p, pd);
+        }
+    }
+
+    sum = SumOfLanes(sum);
+    return MulAdd(sum, Set(d, -2.0052193233688884f / 112), out_val);
+}
+
+void PerBlockModulations(const float butteraugli_target,
+                         const ImageF& xyb_x,
+                         const ImageF& xyb_y,
+                         const float scale,
+                         const Rect& rect,
+                         ImageF* out) {
+    JXL_ASSERT(SameSize(xyb_x, xyb_y));
+    JXL_ASSERT(DivCeil(xyb_x.xsize(), kBlockDim) == out->xsize());
+    JXL_ASSERT(DivCeil(xyb_x.ysize(), kBlockDim) == out->ysize());
+
+    float base_level = 0.5f * scale;
+    float kDampenRampStart = 7.0f;
+    float kDampenRampEnd = 14.0f;
+    float dampen = 1.0f;
+    if (butteraugli_target >= kDampenRampStart) {
+        dampen = 1.0f - ((butteraugli_target - kDampenRampStart) / (kDampenRampEnd - kDampenRampStart));
+        if (dampen < 0) {
+            dampen = 0;
+        }
+    }
+    const float mul = scale * dampen;
+    const float add = (1.0f - dampen) * base_level;
+    for (size_t iy = rect.y0(); iy < rect.y0() + rect.ysize(); iy++) {
+        const size_t y = iy * 8;
+        float* const JXL_RESTRICT row_out = out->Row(iy);
+        const HWY_CAPPED(float, kBlockDim) df;
+        for (size_t ix = rect.x0(); ix < rect.x0() + rect.xsize(); ix++) {
+            size_t x = ix * 8;
+            auto out_val = Set(df, row_out[ix]);
+            out_val = ComputeMask(df, out_val);
+            out_val = HfModulation(df, x, y, xyb_y, out_val);
+            out_val = GammaModulation(df, x, y, xyb_x, xyb_y, out_val);
+            // We want multiplicative quantization field, so everything
+            // until this point has been modulating the exponent.
+            row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add;
+        }
+    }
+}
+
+template <typename D, typename V>
+V MaskingSqrt(const D d, V v) {
+    static const float kLogOffset = 26.481471032459346f;
+    static const float kMul = 211.50759899638012f;
+    const auto mul_v = Set(d, kMul * 1e8);
+    const auto offset_v = Set(d, kLogOffset);
+    return Set(d, 0.25f) * Sqrt(MulAdd(v, Sqrt(mul_v), offset_v));
+}
+
+float MaskingSqrt(const float v) {
+    using DScalar = HWY_CAPPED(float, 1);
+    auto vscalar = Load(DScalar(), &v);
+    return GetLane(MaskingSqrt(DScalar(), vscalar));
+}
+
+void StoreMin4(const float v, float& min0, float& min1, float& min2, float& min3) {
+    if (v < min3) {
+        if (v < min0) {
+            min3 = min2;
+            min2 = min1;
+            min1 = min0;
+            min0 = v;
+        } else if (v < min1) {
+            min3 = min2;
+            min2 = min1;
+            min1 = v;
+        } else if (v < min2) {
+            min3 = min2;
+            min2 = v;
+        } else {
+            min3 = v;
+        }
+    }
+}
+
+// Look for smooth areas near the area of degradation.
+// If the areas are generally smooth, don't do masking.
+// Output is downsampled 2x.
+void FuzzyErosion(const Rect& from_rect, const ImageF& from, const Rect& to_rect, ImageF* to) {
+    const size_t xsize = from.xsize();
+    const size_t ysize = from.ysize();
+    constexpr int kStep = 1;
+    static_assert(kStep == 1, "Step must be 1");
+    JXL_ASSERT(to_rect.xsize() * 2 == from_rect.xsize());
+    JXL_ASSERT(to_rect.ysize() * 2 == from_rect.ysize());
+    for (size_t fy = 0; fy < from_rect.ysize(); ++fy) {
+        size_t y = fy + from_rect.y0();
+        size_t ym1 = y >= kStep ? y - kStep : y;
+        size_t yp1 = y + kStep < ysize ? y + kStep : y;
+        const float* rowt = from.Row(ym1);
+        const float* row = from.Row(y);
+        const float* rowb = from.Row(yp1);
+        float* row_out = to_rect.Row(to, fy / 2);
+        for (size_t fx = 0; fx < from_rect.xsize(); ++fx) {
+            size_t x = fx + from_rect.x0();
+            size_t xm1 = x >= kStep ? x - kStep : x;
+            size_t xp1 = x + kStep < xsize ? x + kStep : x;
+            float min0 = row[x];
+            float min1 = row[xm1];
+            float min2 = row[xp1];
+            float min3 = rowt[xm1];
+            // Sort the first four values.
+            if (min0 > min1) std::swap(min0, min1);
+            if (min0 > min2) std::swap(min0, min2);
+            if (min0 > min3) std::swap(min0, min3);
+            if (min1 > min2) std::swap(min1, min2);
+            if (min1 > min3) std::swap(min1, min3);
+            if (min2 > min3) std::swap(min2, min3);
+            // The remaining five values of a 3x3 neighbourhood.
+            StoreMin4(rowt[x], min0, min1, min2, min3);
+            StoreMin4(rowt[xp1], min0, min1, min2, min3);
+            StoreMin4(rowb[xm1], min0, min1, min2, min3);
+            StoreMin4(rowb[x], min0, min1, min2, min3);
+            StoreMin4(rowb[xp1], min0, min1, min2, min3);
+            static const float kMulC = 0.05f;
+            static const float kMul0 = 0.05f;
+            static const float kMul1 = 0.05f;
+            static const float kMul2 = 0.05f;
+            static const float kMul3 = 0.05f;
+            float v = kMulC * row[x] + kMul0 * min0 + kMul1 * min1 + kMul2 * min2 + kMul3 * min3;
+            if (fx % 2 == 0 && fy % 2 == 0) {
+                row_out[fx / 2] = v;
+            } else {
+                row_out[fx / 2] += v;
+            }
+        }
+    }
+}
+
+struct AdaptiveQuantizationImpl {
+    void Init(const Image3F& xyb) {
+        JXL_DASSERT(xyb.xsize() % kBlockDim == 0);
+        JXL_DASSERT(xyb.ysize() % kBlockDim == 0);
+        const size_t xsize = xyb.xsize();
+        const size_t ysize = xyb.ysize();
+        aq_map = ImageF(xsize / kBlockDim, ysize / kBlockDim);
+    }
+    void PrepareBuffers(size_t num_threads) {
+        diff_buffer = ImageF(kEncTileDim + 8, num_threads);
+        for (size_t i = pre_erosion.size(); i < num_threads; i++) {
+            pre_erosion.emplace_back(kEncTileDimInBlocks * 2 + 2, kEncTileDimInBlocks * 2 + 2);
+        }
+    }
+
+    void ComputeTile(
+        float butteraugli_target, float scale, const Image3F& xyb, const Rect& rect, const int thread, ImageF* mask) {
+        PROFILER_ZONE("aq DiffPrecompute");
+        const size_t xsize = xyb.xsize();
+        const size_t ysize = xyb.ysize();
+
+        // The XYB gamma is 3.0 to be able to decode faster with two muls.
+        // Butteraugli's gamma is matching the gamma of human eye, around 2.6.
+        // We approximate the gamma difference by adding one cubic root into
+        // the adaptive quantization. This gives us a total gamma of 2.6666
+        // for quantization uses.
+        const float match_gamma_offset = 0.019;
+
+        const HWY_FULL(float) df;
+        const float kXMul = 23.426802998210313f;
+        const auto kXMulv = Set(df, kXMul);
+
+        size_t y_start = rect.y0() * 8;
+        size_t y_end = y_start + rect.ysize() * 8;
+
+        size_t x0 = rect.x0() * 8;
+        size_t x1 = x0 + rect.xsize() * 8;
+        if (x0 != 0) x0 -= 4;
+        if (x1 != xyb.xsize()) x1 += 4;
+        if (y_start != 0) y_start -= 4;
+        if (y_end != xyb.ysize()) y_end += 4;
+        pre_erosion[thread].ShrinkTo((x1 - x0) / 4, (y_end - y_start) / 4);
+
+        // Computes image (padded to multiple of 8x8) of local pixel differences.
+        // Subsample both directions by 4.
+        for (size_t y = y_start; y < y_end; ++y) {
+            size_t y2 = y + 1 < ysize ? y + 1 : y;
+            size_t y1 = y > 0 ? y - 1 : y;
+
+            const float* row_in = xyb.PlaneRow(1, y);
+            const float* row_in1 = xyb.PlaneRow(1, y1);
+            const float* row_in2 = xyb.PlaneRow(1, y2);
+            const float* row_x_in = xyb.PlaneRow(0, y);
+            const float* row_x_in1 = xyb.PlaneRow(0, y1);
+            const float* row_x_in2 = xyb.PlaneRow(0, y2);
+            float* JXL_RESTRICT row_out = diff_buffer.Row(thread);
+
+            auto scalar_pixel = [&](size_t x) {
+                const size_t x2 = x + 1 < xsize ? x + 1 : x;
+                const size_t x1 = x > 0 ? x - 1 : x;
+                const float base = 0.25f * (row_in2[x] + row_in1[x] + row_in[x1] + row_in[x2]);
+                const float gammac = RatioOfDerivativesOfCubicRootToSimpleGamma(row_in[x] + match_gamma_offset);
+                float diff = gammac * (row_in[x] - base);
+                diff *= diff;
+                const float base_x = 0.25f * (row_x_in2[x] + row_x_in1[x] + row_x_in[x1] + row_x_in[x2]);
+                float diff_x = gammac * (row_x_in[x] - base_x);
+                diff_x *= diff_x;
+                diff += kXMul * diff_x;
+                diff = MaskingSqrt(diff);
+                if ((y % 4) != 0) {
+                    row_out[x - x0] += diff;
+                } else {
+                    row_out[x - x0] = diff;
+                }
+            };
+
+            size_t x = x0;
+            // First pixel of the row.
+            if (x0 == 0) {
+                scalar_pixel(x0);
+                ++x;
+            }
+            // SIMD
+            const auto match_gamma_offset_v = Set(df, match_gamma_offset);
+            const auto quarter = Set(df, 0.25f);
+            for (; x + 1 + Lanes(df) < x1; x += Lanes(df)) {
+                const auto in = LoadU(df, row_in + x);
+                const auto in_r = LoadU(df, row_in + x + 1);
+                const auto in_l = LoadU(df, row_in + x - 1);
+                const auto in_t = LoadU(df, row_in2 + x);
+                const auto in_b = LoadU(df, row_in1 + x);
+                auto base = quarter * (in_r + in_l + in_t + in_b);
+                auto gammacv =
+                    RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/false>(df, in + match_gamma_offset_v);
+                auto diff = gammacv * (in - base);
+                diff *= diff;
+
+                const auto in_x = LoadU(df, row_x_in + x);
+                const auto in_x_r = LoadU(df, row_x_in + x + 1);
+                const auto in_x_l = LoadU(df, row_x_in + x - 1);
+                const auto in_x_t = LoadU(df, row_x_in2 + x);
+                const auto in_x_b = LoadU(df, row_x_in1 + x);
+                auto base_x = quarter * (in_x_r + in_x_l + in_x_t + in_x_b);
+                auto diff_x = gammacv * (in_x - base_x);
+                diff_x *= diff_x;
+                diff += kXMulv * diff_x;
+                diff = MaskingSqrt(df, diff);
+                if ((y & 3) != 0) {
+                    diff += LoadU(df, row_out + x - x0);
+                }
+                StoreU(diff, df, row_out + x - x0);
+            }
+            // Scalar
+            for (; x < x1; ++x) {
+                scalar_pixel(x);
+            }
+            if (y % 4 == 3) {
+                float* row_dout = pre_erosion[thread].Row((y - y_start) / 4);
+                for (size_t x = 0; x < (x1 - x0) / 4; x++) {
+                    row_dout[x] =
+                        (row_out[x * 4] + row_out[x * 4 + 1] + row_out[x * 4 + 2] + row_out[x * 4 + 3]) * 0.25f;
+                }
+            }
+        }
+        Rect from_rect(x0 % 8 == 0 ? 0 : 1, y_start % 8 == 0 ? 0 : 1, rect.xsize() * 2, rect.ysize() * 2);
+        FuzzyErosion(from_rect, pre_erosion[thread], rect, &aq_map);
+        for (size_t y = 0; y < rect.ysize(); ++y) {
+            const float* aq_map_row = rect.ConstRow(aq_map, y);
+            float* mask_row = rect.Row(mask, y);
+            for (size_t x = 0; x < rect.xsize(); ++x) {
+                mask_row[x] = ComputeMaskForAcStrategyUse(aq_map_row[x]);
+            }
+        }
+        PerBlockModulations(butteraugli_target, xyb.Plane(0), xyb.Plane(1), scale, rect, &aq_map);
+    }
+    std::vector<ImageF> pre_erosion;
+    ImageF aq_map;
+    ImageF diff_buffer;
+};
+
+ImageF AdaptiveQuantizationMap(const float butteraugli_target,
+                               const Image3F& xyb,
+                               const FrameDimensions& frame_dim,
+                               float scale,
+                               ThreadPool* pool,
+                               ImageF* mask) {
+    PROFILER_ZONE("aq AdaptiveQuantMap");
+
+    AdaptiveQuantizationImpl impl;
+    impl.Init(xyb);
+    *mask = ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+    RunOnPool(pool, 0, DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+                           DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks),
+              [&](size_t num_threads) {
+                  impl.PrepareBuffers(num_threads);
+                  return true;
+              },
+              [&](const int tid, int thread) {
+                  size_t n_enc_tiles = DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
+                  size_t tx = tid % n_enc_tiles;
+                  size_t ty = tid / n_enc_tiles;
+                  size_t by0 = ty * kEncTileDimInBlocks;
+                  size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
+                  size_t bx0 = tx * kEncTileDimInBlocks;
+                  size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
+                  Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+                  impl.ComputeTile(butteraugli_target, scale, xyb, r, thread, mask);
+              },
+              "AQ DiffPrecompute");
+
+    return std::move(impl).aq_map;
+}
+
+} // namespace
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(AdaptiveQuantizationMap);
+
+namespace {
+bool FLAGS_log_search_state = false;
+// If true, prints the quantization maps at each iteration.
+bool FLAGS_dump_quant_state = false;
+
+void DumpHeatmap(
+    const AuxOut* aux_out, const std::string& label, const ImageF& image, float good_threshold, float bad_threshold) {
+    Image3F heatmap = CreateHeatMapImage(image, good_threshold, bad_threshold);
+    char filename[200];
+    snprintf(filename, sizeof(filename), "%s%05d", label.c_str(), aux_out->num_butteraugli_iters);
+    aux_out->DumpImage(filename, heatmap);
+}
+
+void DumpHeatmaps(const AuxOut* aux_out,
+                  float ba_target,
+                  const ImageF& quant_field,
+                  const ImageF& tile_heatmap,
+                  const ImageF& bt_diffmap) {
+    if (!WantDebugOutput(aux_out)) return;
+    ImageF inv_qmap(quant_field.xsize(), quant_field.ysize());
+    for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        const float* JXL_RESTRICT row_q = quant_field.ConstRow(y);
+        float* JXL_RESTRICT row_inv_q = inv_qmap.Row(y);
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+            row_inv_q[x] = 1.0f / row_q[x]; // never zero
+        }
+    }
+    DumpHeatmap(aux_out, "quant_heatmap", inv_qmap, 4.0f * ba_target, 6.0f * ba_target);
+    DumpHeatmap(aux_out, "tile_heatmap", tile_heatmap, ba_target, 1.5f * ba_target);
+    // matches heat maps produced by the command line tool.
+    DumpHeatmap(aux_out, "bt_diffmap", bt_diffmap, ButteraugliFuzzyInverse(1.5), ButteraugliFuzzyInverse(0.5));
+}
+
+ImageF TileDistMap(const ImageF& distmap, int tile_size, int margin, const AcStrategyImage& ac_strategy) {
+    PROFILER_FUNC;
+    const int tile_xsize = (distmap.xsize() + tile_size - 1) / tile_size;
+    const int tile_ysize = (distmap.ysize() + tile_size - 1) / tile_size;
+    ImageF tile_distmap(tile_xsize, tile_ysize);
+    size_t distmap_stride = tile_distmap.PixelsPerRow();
+    for (int tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+        AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(tile_y);
+        float* JXL_RESTRICT dist_row = tile_distmap.Row(tile_y);
+        for (int tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+            AcStrategy acs = ac_strategy_row[tile_x];
+            if (!acs.IsFirstBlock()) continue;
+            int this_tile_xsize = acs.covered_blocks_x() * tile_size;
+            int this_tile_ysize = acs.covered_blocks_y() * tile_size;
+            int y_begin = std::max<int>(0, tile_size * tile_y - margin);
+            int y_end = std::min<int>(distmap.ysize(), tile_size * tile_y + this_tile_ysize + margin);
+            int x_begin = std::max<int>(0, tile_size * tile_x - margin);
+            int x_end = std::min<int>(distmap.xsize(), tile_size * tile_x + this_tile_xsize + margin);
+            float dist_norm = 0.0;
+            double pixels = 0;
+            for (int y = y_begin; y < y_end; ++y) {
+                float ymul = 1.0;
+                constexpr float kBorderMul = 0.98f;
+                constexpr float kCornerMul = 0.7f;
+                if (margin != 0 && (y == y_begin || y == y_end - 1)) {
+                    ymul = kBorderMul;
+                }
+                const float* const JXL_RESTRICT row = distmap.Row(y);
+                for (int x = x_begin; x < x_end; ++x) {
+                    float xmul = ymul;
+                    if (margin != 0 && (x == x_begin || x == x_end - 1)) {
+                        if (xmul == 1.0) {
+                            xmul = kBorderMul;
+                        } else {
+                            xmul = kCornerMul;
+                        }
+                    }
+                    float v = row[x];
+                    v *= v;
+                    v *= v;
+                    v *= v;
+                    v *= v;
+                    dist_norm += xmul * v;
+                    pixels += xmul;
+                }
+            }
+            if (pixels == 0) pixels = 1;
+            // 16th norm is less than the max norm, we reduce the difference
+            // with this normalization factor.
+            constexpr float kTileNorm = 1.2f;
+            const float tile_dist = kTileNorm * std::pow(dist_norm / pixels, 1.0f / 16.0f);
+            dist_row[tile_x] = tile_dist;
+            for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+                for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+                    dist_row[tile_x + distmap_stride * iy + ix] = tile_dist;
+                }
+            }
+        }
+    }
+    return tile_distmap;
+}
+
+constexpr float kDcQuantPow = 0.57f;
+static const float kDcQuant = 1.12f;
+static const float kAcQuant = 0.787f;
+
+void FindBestQuantization(
+    const ImageBundle& linear, const Image3F& opsin, PassesEncoderState* enc_state, ThreadPool* pool, AuxOut* aux_out) {
+    const CompressParams& cparams = enc_state->cparams;
+    Quantizer& quantizer = enc_state->shared.quantizer;
+    ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
+    ImageF& quant_field = enc_state->initial_quant_field;
+
+    const float butteraugli_target = cparams.butteraugli_distance;
+    ButteraugliParams params = cparams.ba_params;
+    params.intensity_target = linear.metadata()->IntensityTarget();
+    // Hack the default intensity target value to be 80.0, the intensity
+    // target of sRGB images and a more reasonable viewing default than
+    // JPEG XL file format's default.
+    if (fabs(params.intensity_target - 255.0f) < 1e-3) {
+        params.intensity_target = 80.0f;
+    }
+    JxlButteraugliComparator comparator(params);
+    JXL_CHECK(comparator.SetReferenceImage(linear));
+    bool lower_is_better = (comparator.GoodQualityScore() < comparator.BadQualityScore());
+    const float initial_quant_dc = InitialQuantDC(butteraugli_target);
+    AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field), &quant_field);
+    ImageF tile_distmap;
+    ImageF initial_quant_field = CopyImage(quant_field);
+
+    float initial_qf_min, initial_qf_max;
+    ImageMinMax(initial_quant_field, &initial_qf_min, &initial_qf_max);
+    float initial_qf_ratio = initial_qf_max / initial_qf_min;
+    float qf_max_deviation_low = std::sqrt(250 / initial_qf_ratio);
+    float asymmetry = 2;
+    if (qf_max_deviation_low < asymmetry) asymmetry = qf_max_deviation_low;
+    float qf_lower = initial_qf_min / (asymmetry * qf_max_deviation_low);
+    float qf_higher = initial_qf_max * (qf_max_deviation_low / asymmetry);
+
+    JXL_ASSERT(qf_higher / qf_lower < 253);
+
+    constexpr int kOriginalComparisonRound = 1;
+    int iters = cparams.max_butteraugli_iters;
+    if (iters > 7) {
+        iters = 7;
+    }
+    if (cparams.speed_tier != SpeedTier::kTortoise) {
+        iters = 2;
+    }
+    for (int i = 0; i < iters + 1; ++i) {
+        if (FLAGS_dump_quant_state) {
+            printf("\nQuantization field:\n");
+            for (size_t y = 0; y < quant_field.ysize(); ++y) {
+                for (size_t x = 0; x < quant_field.xsize(); ++x) {
+                    printf(" %.5f", quant_field.Row(y)[x]);
+                }
+                printf("\n");
+            }
+        }
+        quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+        ImageBundle linear = RoundtripImage(opsin, enc_state, pool);
+        PROFILER_ZONE("enc Butteraugli");
+        float score;
+        ImageF diffmap;
+        JXL_CHECK(comparator.CompareWith(linear, &diffmap, &score));
+        if (!lower_is_better) {
+            score = -score;
+            diffmap = ScaleImage(-1.0f, diffmap);
+        }
+        tile_distmap = TileDistMap(diffmap, 8, 0, enc_state->shared.ac_strategy);
+        if (WantDebugOutput(aux_out)) {
+            aux_out->DumpImage(("dec" + ToString(i)).c_str(), *linear.color());
+            DumpHeatmaps(aux_out, butteraugli_target, quant_field, tile_distmap, diffmap);
+        }
+        if (aux_out != nullptr) ++aux_out->num_butteraugli_iters;
+        if (FLAGS_log_search_state) {
+            float minval, maxval;
+            ImageMinMax(quant_field, &minval, &maxval);
+            printf("\nButteraugli iter: %d/%d\n", i, cparams.max_butteraugli_iters);
+            printf("Butteraugli distance: %f\n", score);
+            printf("quant range: %f ... %f  DC quant: %f\n", minval, maxval, initial_quant_dc);
+            if (FLAGS_dump_quant_state) {
+                quantizer.DumpQuantizationMap(raw_quant_field);
+            }
+        }
+
+        if (i == iters) break;
+
+        double kPow[8] = {
+            0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+        };
+        double kPowMod[8] = {
+            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+        };
+        if (i == kOriginalComparisonRound) {
+            // Don't allow optimization to make the quant field a lot worse than
+            // what the initial guess was. This allows the AC field to have enough
+            // precision to reduce the oscillations due to the dc reconstruction.
+            double kInitMul = 0.6;
+            const double kOneMinusInitMul = 1.0 - kInitMul;
+            for (size_t y = 0; y < quant_field.ysize(); ++y) {
+                float* const JXL_RESTRICT row_q = quant_field.Row(y);
+                const float* const JXL_RESTRICT row_init = initial_quant_field.Row(y);
+                for (size_t x = 0; x < quant_field.xsize(); ++x) {
+                    double clamp = kOneMinusInitMul * row_q[x] + kInitMul * row_init[x];
+                    if (row_q[x] < clamp) {
+                        row_q[x] = clamp;
+                        if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+                        if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+                    }
+                }
+            }
+        }
+
+        double cur_pow = 0.0;
+        if (i < 7) {
+            cur_pow = kPow[i] + (butteraugli_target - 1.0) * kPowMod[i];
+            if (cur_pow < 0) {
+                cur_pow = 0;
+            }
+        }
+        if (cur_pow == 0.0) {
+            for (size_t y = 0; y < quant_field.ysize(); ++y) {
+                const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y);
+                float* const JXL_RESTRICT row_q = quant_field.Row(y);
+                for (size_t x = 0; x < quant_field.xsize(); ++x) {
+                    const float diff = row_dist[x] / butteraugli_target;
+                    if (diff > 1.0f) {
+                        float old = row_q[x];
+                        row_q[x] *= diff;
+                        int qf_old = old * quantizer.InvGlobalScale() + 0.5;
+                        int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5;
+                        if (qf_old == qf_new) {
+                            row_q[x] = old + quantizer.Scale();
+                        }
+                    }
+                    if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+                    if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+                }
+            }
+        } else {
+            for (size_t y = 0; y < quant_field.ysize(); ++y) {
+                const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y);
+                float* const JXL_RESTRICT row_q = quant_field.Row(y);
+                for (size_t x = 0; x < quant_field.xsize(); ++x) {
+                    const float diff = row_dist[x] / butteraugli_target;
+                    if (diff <= 1.0f) {
+                        row_q[x] *= std::pow(diff, cur_pow);
+                    } else {
+                        float old = row_q[x];
+                        row_q[x] *= diff;
+                        int qf_old = old * quantizer.InvGlobalScale() + 0.5;
+                        int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5;
+                        if (qf_old == qf_new) {
+                            row_q[x] = old + quantizer.Scale();
+                        }
+                    }
+                    if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+                    if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+                }
+            }
+        }
+    }
+    quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+}
+
+void FindBestQuantizationMaxError(const Image3F& opsin,
+                                  PassesEncoderState* enc_state,
+                                  ThreadPool* pool,
+                                  AuxOut* aux_out) {
+    // TODO(veluca): this only works if opsin is in XYB. The current encoder does
+    // not have code paths that produce non-XYB opsin here.
+    JXL_CHECK(enc_state->shared.frame_header.color_transform == ColorTransform::kXYB);
+    const CompressParams& cparams = enc_state->cparams;
+    Quantizer& quantizer = enc_state->shared.quantizer;
+    ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
+    ImageF& quant_field = enc_state->initial_quant_field;
+
+    // TODO(veluca): better choice of this value.
+    const float initial_quant_dc = 16 * std::sqrt(0.1f / cparams.butteraugli_distance);
+    AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field), &quant_field);
+
+    const float inv_max_err[3] = {1.0f / enc_state->cparams.max_error[0], 1.0f / enc_state->cparams.max_error[1],
+                                  1.0f / enc_state->cparams.max_error[2]};
+
+    for (int i = 0; i < cparams.max_butteraugli_iters + 1; ++i) {
+        quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+        if (aux_out) {
+            aux_out->DumpXybImage(("ops" + ToString(i)).c_str(), opsin);
+        }
+        ImageBundle decoded = RoundtripImage(opsin, enc_state, pool);
+        if (aux_out) {
+            aux_out->DumpXybImage(("dec" + ToString(i)).c_str(), *decoded.color());
+        }
+
+        for (size_t by = 0; by < enc_state->shared.frame_dim.ysize_blocks; by++) {
+            AcStrategyRow ac_strategy_row = enc_state->shared.ac_strategy.ConstRow(by);
+            for (size_t bx = 0; bx < enc_state->shared.frame_dim.xsize_blocks; bx++) {
+                AcStrategy acs = ac_strategy_row[bx];
+                if (!acs.IsFirstBlock()) continue;
+                float max_error = 0;
+                for (size_t c = 0; c < 3; c++) {
+                    for (size_t y = by * kBlockDim; y < (by + acs.covered_blocks_y()) * kBlockDim; y++) {
+                        if (y >= decoded.ysize()) continue;
+                        const float* JXL_RESTRICT in_row = opsin.ConstPlaneRow(c, y);
+                        const float* JXL_RESTRICT dec_row = decoded.color()->ConstPlaneRow(c, y);
+                        for (size_t x = bx * kBlockDim; x < (bx + acs.covered_blocks_x()) * kBlockDim; x++) {
+                            if (x >= decoded.xsize()) continue;
+                            max_error = std::max(std::abs(in_row[x] - dec_row[x]) * inv_max_err[c], max_error);
+                        }
+                    }
+                }
+                // Target an error between max_error/2 and max_error.
+                // If the error in the varblock is above the target, increase the qf to
+                // compensate. If the error is below the target, decrease the qf.
+                // However, to avoid an excessive increase of the qf, only do so if the
+                // error is less than half the maximum allowed error.
+                const float qf_mul = (max_error < 0.5f) ? max_error * 2.0f : (max_error > 1.0f) ? max_error : 1.0f;
+                for (size_t qy = by; qy < by + acs.covered_blocks_y(); qy++) {
+                    float* JXL_RESTRICT quant_field_row = quant_field.Row(qy);
+                    for (size_t qx = bx; qx < bx + acs.covered_blocks_x(); qx++) {
+                        quant_field_row[qx] *= qf_mul;
+                    }
+                }
+            }
+        }
+    }
+    quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+}
+
+} // namespace
+
+void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect, ImageF* quant_field) {
+    // Replace the whole quant_field in non-8x8 blocks with the maximum of each
+    // 8x8 block.
+    size_t stride = quant_field->PixelsPerRow();
+    for (size_t y = 0; y < rect.ysize(); ++y) {
+        AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(rect, y);
+        float* JXL_RESTRICT quant_row = rect.Row(quant_field, y);
+        for (size_t x = 0; x < rect.xsize(); ++x) {
+            AcStrategy acs = ac_strategy_row[x];
+            if (!acs.IsFirstBlock()) continue;
+            JXL_ASSERT(x + acs.covered_blocks_x() <= quant_field->xsize());
+            JXL_ASSERT(y + acs.covered_blocks_y() <= quant_field->ysize());
+            float max = quant_row[x];
+            for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+                for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+                    max = std::max(quant_row[x + ix + iy * stride], max);
+                }
+            }
+            for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+                for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+                    quant_row[x + ix + iy * stride] = max;
+                }
+            }
+        }
+    }
+}
+
+float InitialQuantDC(float butteraugli_target) {
+    const float kDcMul = 2.9; // Butteraugli target where non-linearity kicks in.
+    const float butteraugli_target_dc = std::max<float>(
+        0.5f * butteraugli_target,
+        std::min<float>(butteraugli_target, kDcMul * std::pow((1.0f / kDcMul) * butteraugli_target, kDcQuantPow)));
+    // We want the maximum DC value to be at most 2**15 * kInvDCQuant / quant_dc.
+    // The maximum DC value might not be in the kXybRange because of inverse
+    // gaborish, so we add some slack to the maximum theoretical quant obtained
+    // this way (64).
+    return std::min(kDcQuant / butteraugli_target_dc, 50.f);
+}
+
+ImageF InitialQuantField(const float butteraugli_target,
+                         const Image3F& opsin,
+                         const FrameDimensions& frame_dim,
+                         ThreadPool* pool,
+                         float rescale,
+                         ImageF* mask) {
+    PROFILER_FUNC;
+    const float quant_ac = kAcQuant / butteraugli_target;
+    return HWY_DYNAMIC_DISPATCH(AdaptiveQuantizationMap)(butteraugli_target, opsin, frame_dim, quant_ac * rescale, pool,
+                                                         mask);
+}
+
+void FindBestQuantizer(const ImageBundle* linear,
+                       const Image3F& opsin,
+                       PassesEncoderState* enc_state,
+                       ThreadPool* pool,
+                       AuxOut* aux_out,
+                       double rescale) {
+    const CompressParams& cparams = enc_state->cparams;
+    if (cparams.max_error_mode) {
+        PROFILER_ZONE("enc find best maxerr");
+        FindBestQuantizationMaxError(opsin, enc_state, pool, aux_out);
+    } else if (cparams.speed_tier <= SpeedTier::kKitten) {
+        // Normal encoding to a butteraugli score.
+        PROFILER_ZONE("enc find best2");
+        FindBestQuantization(*linear, opsin, enc_state, pool, aux_out);
+    }
+}
+
+ImageBundle RoundtripImage(const Image3F& opsin, PassesEncoderState* enc_state, ThreadPool* pool) {
+    PROFILER_ZONE("enc roundtrip");
+    std::unique_ptr<PassesDecoderState> dec_state = jxl::make_unique<PassesDecoderState>();
+    JXL_CHECK(dec_state->output_encoding_info.Set(
+        *enc_state->shared.metadata, ColorEncoding::LinearSRGB(enc_state->shared.metadata->m.color_encoding.IsGray())));
+    dec_state->shared = &enc_state->shared;
+    JXL_ASSERT(opsin.ysize() % kBlockDim == 0);
+
+    const size_t xsize_groups = DivCeil(opsin.xsize(), kGroupDim);
+    const size_t ysize_groups = DivCeil(opsin.ysize(), kGroupDim);
+    const size_t num_groups = xsize_groups * ysize_groups;
+
+    size_t num_special_frames = enc_state->special_frames.size();
+
+    std::unique_ptr<ModularFrameEncoder> modular_frame_encoder =
+        jxl::make_unique<ModularFrameEncoder>(enc_state->shared.frame_header, enc_state->cparams);
+    /*  InitializePassesEncoder(opsin, pool, enc_state, modular_frame_encoder.get(),
+                              nullptr);*/
+    JXL_CHECK(dec_state->Init());
+    dec_state->InitForAC(pool);
+
+    ImageBundle decoded(&enc_state->shared.metadata->m);
+    decoded.origin = enc_state->shared.frame_header.frame_origin;
+    decoded.SetFromImage(Image3F(opsin.xsize(), opsin.ysize()), dec_state->output_encoding_info.color_encoding);
+
+    // Same as dec_state->shared->frame_header.nonserialized_metadata->m
+    const ImageMetadata& metadata = *decoded.metadata();
+    if (!metadata.extra_channel_info.empty()) {
+        // Add dummy extra channels to the dec_state: FinalizeFrameDecoding moves
+        // these extra channels to the ImageBundle, and is required that the amount
+        // of extra channels matches its metadata()->extra_channel_info.size().
+        // Normally we'd place these extra channels in the ImageBundle, but in this
+        // case FinalizeFrameDecoding is the one that does this.
+        std::vector<ImageF> extra_channels;
+        extra_channels.reserve(metadata.extra_channel_info.size());
+        for (size_t i = 0; i < metadata.extra_channel_info.size(); i++) {
+            extra_channels.emplace_back(decoded.xsize(), decoded.ysize());
+            // Must initialize the image with data to not affect blending with
+            // uninitialized memory.
+            ZeroFillImage(&extra_channels.back());
+        }
+        dec_state->extra_channels = std::move(extra_channels);
+    }
+
+    hwy::AlignedUniquePtr<GroupDecCache[]> group_dec_caches;
+    const auto allocate_storage = [&](size_t num_threads) {
+        dec_state->EnsureStorage(num_threads);
+        group_dec_caches = hwy::MakeUniqueAlignedArray<GroupDecCache>(num_threads);
+        return true;
+    };
+    const auto process_group = [&](const int group_index, const int thread) {
+        if (dec_state->shared->frame_header.loop_filter.epf_iters > 0) {
+            ComputeSigma(dec_state->shared->BlockGroupRect(group_index), dec_state.get());
+        }
+        JXL_CHECK(DecodeGroupForRoundtrip(enc_state->coeffs, group_index, dec_state.get(), &group_dec_caches[thread],
+                                          thread, &decoded, nullptr));
+    };
+    RunOnPool(pool, 0, num_groups, allocate_storage, process_group, "AQ loop");
+
+    // Fine to do a JXL_ASSERT instead of error handling, since this only happens
+    // on the encoder side where we can't be fed with invalid data.
+    JXL_CHECK(FinalizeFrameDecoding(&decoded, dec_state.get(), pool,
+                                    /*force_fir=*/false, /*skip_blending=*/true));
+    // Ensure we don't create any new special frames.
+    enc_state->special_frames.resize(num_special_frames);
+
+    return decoded;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/others/src/acc_enc_cache.cpp b/codec/L2/demos/jxlEnc/others/src/acc_enc_cache.cpp
new file mode 100644
index 0000000000..75a87723b1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/acc_enc_cache.cpp
@@ -0,0 +1,204 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_cache.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <type_traits>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_frame.h"
+#include "acc_enc_group.hpp"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+void InitializePassesEncoder(const Image3F& opsin,
+                             ThreadPool* pool,
+                             PassesEncoderState* enc_state,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             AuxOut* aux_out,
+                             size_t xsize,
+                             size_t ysize,
+                             std::vector<std::vector<float> >& dctIDT,
+                             std::vector<std::vector<float> >& dct2x2,
+                             std::vector<std::vector<float> >& dct4x4,
+                             std::vector<std::vector<float> >& dct8x8,
+                             std::vector<std::vector<float> >& dct16x16,
+                             std::vector<std::vector<float> >& dct32x32,
+                             std::vector<std::vector<float> >& dcIDT,
+                             std::vector<std::vector<float> >& dc2x2,
+                             std::vector<std::vector<float> >& dc4x4,
+                             std::vector<std::vector<float> >& dc8x8,
+                             std::vector<std::vector<float> >& dc16x16,
+                             std::vector<std::vector<float> >& dc32x32) {
+    PROFILER_FUNC;
+
+    PassesSharedState& JXL_RESTRICT shared = enc_state->shared;
+
+    enc_state->histogram_idx.resize(shared.frame_dim.num_groups);
+
+    enc_state->x_qm_multiplier = std::pow(1.25f, shared.frame_header.x_qm_scale - 2.0f);
+    enc_state->b_qm_multiplier = std::pow(1.25f, shared.frame_header.b_qm_scale - 2.0f);
+
+    if (enc_state->coeffs.size() < shared.frame_header.passes.num_passes) {
+        enc_state->coeffs.reserve(shared.frame_header.passes.num_passes);
+        for (size_t i = enc_state->coeffs.size(); i < shared.frame_header.passes.num_passes; i++) {
+            // Allocate enough coefficients for each group on every row.
+            enc_state->coeffs.emplace_back(
+                make_unique<ACImageT<int32_t> >(kGroupDim * kGroupDim, shared.frame_dim.num_groups));
+        }
+    }
+    while (enc_state->coeffs.size() > shared.frame_header.passes.num_passes) {
+        enc_state->coeffs.pop_back();
+    }
+
+    Image3F dc(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+    /*  RunOnPool(
+          pool, 0, shared.frame_dim.num_groups, ThreadPool::SkipInit(),
+          [&](size_t group_idx, size_t _) {
+            ComputeCoefficients(group_idx, enc_state, opsin, &dc,xsize, ysize, dctIDT, dct2x2,
+                      dct4x4, dct8x8, dct16x16, dct32x32, dcIDT, dc2x2, dc4x4,
+                      dc8x8, dc16x16, dc32x32);
+          },
+          "Compute coeffs");*/
+
+    for (int i = 0; i < shared.frame_dim.num_groups; i++) {
+        ComputeCoefficients(i, enc_state, opsin, &dc, xsize, ysize, dctIDT, dct2x2, dct4x4, dct8x8, dct16x16, dct32x32,
+                            dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+    }
+
+    if (shared.frame_header.flags & FrameHeader::kUseDcFrame) {
+        CompressParams cparams = enc_state->cparams;
+        // Guess a distance that produces good initial results.
+        cparams.butteraugli_distance =
+            std::max(kMinButteraugliDistance, enc_state->cparams.butteraugli_distance * 0.1f);
+        cparams.dots = Override::kOff;
+        cparams.noise = Override::kOff;
+        cparams.patches = Override::kOff;
+        cparams.gaborish = Override::kOff;
+        cparams.epf = 0;
+        cparams.max_error_mode = true;
+        cparams.resampling = 1;
+        cparams.ec_resampling = 1;
+        for (size_t c = 0; c < 3; c++) {
+            cparams.max_error[c] = shared.quantizer.MulDC()[c];
+        }
+        JXL_ASSERT(cparams.progressive_dc > 0);
+        cparams.progressive_dc--;
+        // The DC frame will have alpha=0. Don't erase its contents.
+        cparams.keep_invisible = Override::kOn;
+        // No EPF or Gaborish in DC frames.
+        cparams.epf = 0;
+        cparams.gaborish = Override::kOff;
+        // Use kVarDCT in max_error_mode for intermediate progressive DC,
+        // and kModular for the smallest DC (first in the bitstream)
+        if (cparams.progressive_dc == 0) {
+            cparams.modular_mode = true;
+            cparams.quality_pair.first = cparams.quality_pair.second =
+                99.f - enc_state->cparams.butteraugli_distance * 0.2f;
+        }
+        ImageBundle ib(&shared.metadata->m);
+        // This is a lie - dc is in XYB
+        // (but EncodeFrame will skip RGB->XYB conversion anyway)
+        ib.SetFromImage(std::move(dc), ColorEncoding::LinearSRGB(shared.metadata->m.color_encoding.IsGray()));
+        if (!ib.metadata()->extra_channel_info.empty()) {
+            // Add dummy extra channels to the patch image: dc_level frames do not yet
+            // support extra channels, but the codec expects that the amount of extra
+            // channels in frames matches that in the metadata of the codestream.
+            std::vector<ImageF> extra_channels;
+            extra_channels.reserve(ib.metadata()->extra_channel_info.size());
+            for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) {
+                extra_channels.emplace_back(ib.xsize(), ib.ysize());
+                // Must initialize the image with data to not affect blending with
+                // uninitialized memory.
+                // TODO(lode): dc_level must copy and use the real extra channels
+                // instead.
+                ZeroFillImage(&extra_channels.back());
+            }
+            ib.SetExtraChannels(std::move(extra_channels));
+        }
+        std::unique_ptr<PassesEncoderState> state = jxl::make_unique<PassesEncoderState>();
+
+        auto special_frame = std::unique_ptr<BitWriter>(new BitWriter());
+        FrameInfo dc_frame_info;
+        dc_frame_info.frame_type = FrameType::kDCFrame;
+        dc_frame_info.dc_level = shared.frame_header.dc_level + 1;
+        dc_frame_info.ib_needs_color_transform = false;
+        dc_frame_info.save_before_color_transform = true; // Implicitly true
+        // TODO(lode): the EncodeFrame / DecodeFrame pair here is likely broken in
+        // case of dc_level >= 3, since EncodeFrame may output multiple frames
+        // to the bitwriter, while DecodeFrame reads only one.
+        JXL_CHECK(
+            EncodeFrame(cparams, dc_frame_info, shared.metadata, ib, state.get(), pool, special_frame.get(), nullptr));
+        const Span<const uint8_t> encoded = special_frame->GetSpan();
+        enc_state->special_frames.emplace_back(std::move(special_frame));
+
+        BitReader br(encoded);
+        ImageBundle decoded(&shared.metadata->m);
+        std::unique_ptr<PassesDecoderState> dec_state = jxl::make_unique<PassesDecoderState>();
+        JXL_CHECK(dec_state->output_encoding_info.Set(
+            *shared.metadata, ColorEncoding::LinearSRGB(shared.metadata->m.color_encoding.IsGray())));
+        JXL_CHECK(DecodeFrame({}, dec_state.get(), pool, &br, &decoded, *shared.metadata, /*constraints=*/nullptr));
+        // TODO(lode): shared.frame_header.dc_level should be equal to
+        // dec_state.shared->frame_header.dc_level - 1 here, since above we set
+        // dc_frame_info.dc_level = shared.frame_header.dc_level + 1, and
+        // dc_frame_info.dc_level is used by EncodeFrame. However, if EncodeFrame
+        // outputs multiple frames, this assumption could be wrong.
+        shared.dc_storage = CopyImage(dec_state->shared->dc_frames[shared.frame_header.dc_level]);
+        ZeroFillImage(&shared.quant_dc);
+        shared.dc = &shared.dc_storage;
+        JXL_CHECK(br.Close());
+    } else {
+        auto compute_dc_coeffs = [&](int group_index, int /* thread */) {
+            modular_frame_encoder->AddVarDCTDC(dc, group_index, enc_state->cparams.butteraugli_distance >= 2.0f &&
+                                                                    enc_state->cparams.speed_tier < SpeedTier::kFalcon,
+                                               enc_state);
+        };
+        RunOnPool(pool, 0, shared.frame_dim.num_dc_groups, ThreadPool::SkipInit(), compute_dc_coeffs,
+                  "Compute DC coeffs");
+        // TODO(veluca): this is only useful in tests and if inspection is enabled.
+        if (!(shared.frame_header.flags & FrameHeader::kSkipAdaptiveDCSmoothing)) {
+            AdaptiveDCSmoothing(shared.quantizer.MulDC(), &shared.dc_storage, pool);
+        }
+    }
+    auto compute_ac_meta = [&](int group_index, int /* thread */) {
+        modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/false, enc_state);
+    };
+    RunOnPool(pool, 0, shared.frame_dim.num_dc_groups, ThreadPool::SkipInit(), compute_ac_meta, "Compute AC Metadata");
+
+    if (aux_out != nullptr) {
+        aux_out->InspectImage3F("compressed_image:InitializeFrameEncCache:dc_dec", shared.dc_storage);
+    }
+}
+
+void EncCache::InitOnce() {
+    PROFILER_FUNC;
+
+    if (num_nzeroes.xsize() == 0) {
+        num_nzeroes = Image3I(kGroupDimInBlocks, kGroupDimInBlocks);
+    }
+}
+
+} // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/others/src/acc_enc_chroma_from_luma.cpp b/codec/L2/demos/jxlEnc/others/src/acc_enc_chroma_from_luma.cpp
new file mode 100644
index 0000000000..7fca240d0b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/acc_enc_chroma_from_luma.cpp
@@ -0,0 +1,536 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "acc_enc_chroma_from_luma.hpp"
+
+#include <float.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "xilinx/src/acc_enc_chroma_from_luma.cpp"
+#include <hwy/aligned_allocator.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/quantizer.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+static HWY_FULL(float) df;
+
+struct CFLFunction {
+    static constexpr float kCoeff = 1.f / 3;
+    static constexpr float kThres = 100.0f;
+    static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor;
+    CFLFunction(const float* values_m, const float* values_s, size_t num, float base, float distance_mul)
+        : values_m(values_m), values_s(values_s), num(num), base(base), distance_mul(distance_mul) {}
+
+    // Returns f'(x), where f is 1/3 * sum ((|color residual| + 1)^2-1) +
+    // distance_mul * x^2 * num.
+    float Compute(float x, float eps, float* fpeps, float* fmeps) const {
+        float first_derivative = 2 * distance_mul * num * x;
+        float first_derivative_peps = 2 * distance_mul * num * (x + eps);
+        float first_derivative_meps = 2 * distance_mul * num * (x - eps);
+
+        const auto inv_color_factor = Set(df, kInvColorFactor);
+        const auto thres = Set(df, kThres);
+        const auto coeffx2 = Set(df, kCoeff * 2.0f);
+        const auto one = Set(df, 1.0f);
+        const auto zero = Set(df, 0.0f);
+        const auto base_v = Set(df, base);
+        const auto x_v = Set(df, x);
+        const auto xpe_v = Set(df, x + eps);
+        const auto xme_v = Set(df, x - eps);
+        auto fd_v = Zero(df);
+        auto fdpe_v = Zero(df);
+        auto fdme_v = Zero(df);
+        JXL_ASSERT(num % Lanes(df) == 0);
+
+        for (size_t i = 0; i < num; i += Lanes(df)) {
+            // color residual = ax + b
+            const auto a = inv_color_factor * Load(df, values_m + i);
+            const auto b = base_v * Load(df, values_m + i) - Load(df, values_s + i);
+            const auto v = a * x_v + b;
+            const auto vpe = a * xpe_v + b;
+            const auto vme = a * xme_v + b;
+            const auto av = Abs(v);
+            const auto avpe = Abs(vpe);
+            const auto avme = Abs(vme);
+            auto d = coeffx2 * (av + one) * a;
+            auto dpe = coeffx2 * (avpe + one) * a;
+            auto dme = coeffx2 * (avme + one) * a;
+            d = IfThenElse(v < zero, zero - d, d);
+            dpe = IfThenElse(vpe < zero, zero - dpe, dpe);
+            dme = IfThenElse(vme < zero, zero - dme, dme);
+            fd_v += IfThenElse(av >= thres, zero, d);
+            fdpe_v += IfThenElse(av >= thres, zero, dpe);
+            fdme_v += IfThenElse(av >= thres, zero, dme);
+        }
+
+        *fpeps = first_derivative_peps + GetLane(SumOfLanes(fdpe_v));
+        *fmeps = first_derivative_meps + GetLane(SumOfLanes(fdme_v));
+        return first_derivative + GetLane(SumOfLanes(fd_v));
+    }
+
+    const float* JXL_RESTRICT values_m;
+    const float* JXL_RESTRICT values_s;
+    size_t num;
+    float base;
+    float distance_mul;
+};
+
+int32_t FindBestMultiplier(
+    const float* values_m, const float* values_s, size_t num, float base, float distance_mul, bool fast) {
+    if (num == 0) {
+        return 0;
+    }
+    float x;
+    if (fast) {
+        static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor;
+        auto ca = Zero(df);
+        auto cb = Zero(df);
+        const auto inv_color_factor = Set(df, kInvColorFactor);
+        const auto base_v = Set(df, base);
+        for (size_t i = 0; i < num; i += Lanes(df)) {
+            // color residual = ax + b
+            const auto a = inv_color_factor * Load(df, values_m + i);
+            const auto b = base_v * Load(df, values_m + i) - Load(df, values_s + i);
+            ca = MulAdd(a, a, ca);
+            cb = MulAdd(a, b, cb);
+        }
+        // + distance_mul * x^2 * num
+        x = -GetLane(SumOfLanes(cb)) / (GetLane(SumOfLanes(ca)) + num * distance_mul * 0.5f);
+    } else {
+        constexpr float eps = 1;
+        constexpr float kClamp = 20.0f;
+        CFLFunction fn(values_m, values_s, num, base, distance_mul);
+        x = 0;
+        // Up to 20 Newton iterations, with approximate derivatives.
+        // Derivatives are approximate due to the high amount of noise in the exact
+        // derivatives.
+        for (size_t i = 0; i < 20; i++) {
+            float dfpeps, dfmeps;
+            float df = fn.Compute(x, eps, &dfpeps, &dfmeps);
+            float ddf = (dfpeps - dfmeps) / (2 * eps);
+            float step = df / ddf;
+            x -= std::min(kClamp, std::max(-kClamp, step));
+            if (std::abs(step) < 3e-3) break;
+        }
+    }
+    return std::max(-128.0f, std::min(127.0f, roundf(x)));
+}
+
+void InitDCStorage(size_t num_blocks, ImageF* dc_values) {
+    // First row: Y channel
+    // Second row: X channel
+    // Third row: Y channel
+    // Fourth row: B channel
+    *dc_values = ImageF(RoundUpTo(num_blocks, Lanes(df)), 4);
+
+    JXL_ASSERT(dc_values->xsize() != 0);
+    // Zero-fill the last lanes
+    for (size_t y = 0; y < 4; y++) {
+        for (size_t x = dc_values->xsize() - Lanes(df); x < dc_values->xsize(); x++) {
+            dc_values->Row(y)[x] = 0;
+        }
+    }
+}
+
+void ComputeDC(const ImageF& dc_values, bool fast, int* dc_x, int* dc_b) {
+    constexpr float kDistanceMultiplierDC = 1e-5f;
+    const float* JXL_RESTRICT dc_values_yx = dc_values.Row(0);
+    const float* JXL_RESTRICT dc_values_x = dc_values.Row(1);
+    const float* JXL_RESTRICT dc_values_yb = dc_values.Row(2);
+    const float* JXL_RESTRICT dc_values_b = dc_values.Row(3);
+    *dc_x = FindBestMultiplier(dc_values_yx, dc_values_x, dc_values.xsize(), 0.0f, kDistanceMultiplierDC, fast);
+    *dc_b = FindBestMultiplier(dc_values_yb, dc_values_b, dc_values.xsize(), kYToBRatio, kDistanceMultiplierDC, fast);
+}
+
+void ComputeTile(const Image3F& opsin,
+                 const DequantMatrices& dequant,
+                 const AcStrategyImage* ac_strategy,
+                 const Quantizer* quantizer,
+                 const Rect& r,
+                 bool fast,
+                 bool use_dct8,
+                 ImageSB* map_x,
+                 ImageSB* map_b,
+                 ImageF* dc_values,
+                 float* mem,
+
+                 //==========acc interface========
+                 size_t xsize,
+                 size_t ysize,
+                 std::vector<std::vector<float> >& dctIDT,
+                 std::vector<std::vector<float> >& dct2x2,
+                 std::vector<std::vector<float> >& dct4x4,
+                 std::vector<std::vector<float> >& dct8x8,
+                 std::vector<std::vector<float> >& dct16x16,
+                 std::vector<std::vector<float> >& dct32x32,
+
+                 std::vector<std::vector<float> >& dcIDT,
+                 std::vector<std::vector<float> >& dc2x2,
+                 std::vector<std::vector<float> >& dc4x4,
+                 std::vector<std::vector<float> >& dc8x8,
+                 std::vector<std::vector<float> >& dc16x16,
+                 std::vector<std::vector<float> >& dc32x32
+                 //================================
+                 ) {
+    static_assert(kEncTileDimInBlocks == kColorTileDimInBlocks, "Invalid color tile dim");
+    size_t xsize_blocks = opsin.xsize() / kBlockDim;
+    constexpr float kDistanceMultiplierAC = 1e-3f;
+
+    const size_t y0 = r.y0();
+    const size_t x0 = r.x0();
+    const size_t x1 = r.x0() + r.xsize();
+    const size_t y1 = r.y0() + r.ysize();
+
+    int ty = y0 / kColorTileDimInBlocks;
+    int tx = x0 / kColorTileDimInBlocks;
+
+    int8_t* JXL_RESTRICT row_out_x = map_x->Row(ty);
+    int8_t* JXL_RESTRICT row_out_b = map_b->Row(ty);
+
+    float* JXL_RESTRICT dc_values_yx = dc_values->Row(0);
+    float* JXL_RESTRICT dc_values_x = dc_values->Row(1);
+    float* JXL_RESTRICT dc_values_yb = dc_values->Row(2);
+    float* JXL_RESTRICT dc_values_b = dc_values->Row(3);
+
+    // All are aligned.
+    float* HWY_RESTRICT block_y = mem;
+    float* HWY_RESTRICT block_x = block_y + AcStrategy::kMaxCoeffArea;
+    float* HWY_RESTRICT block_b = block_x + AcStrategy::kMaxCoeffArea;
+    float* HWY_RESTRICT coeffs_yx = block_b + AcStrategy::kMaxCoeffArea;
+    float* HWY_RESTRICT coeffs_x = coeffs_yx + kColorTileDim * kColorTileDim;
+    float* HWY_RESTRICT coeffs_yb = coeffs_x + kColorTileDim * kColorTileDim;
+    float* HWY_RESTRICT coeffs_b = coeffs_yb + kColorTileDim * kColorTileDim;
+    float* HWY_RESTRICT scratch_space = coeffs_b + kColorTileDim * kColorTileDim;
+    JXL_DASSERT(scratch_space + 2 * AcStrategy::kMaxCoeffArea == block_y + CfLHeuristics::kItemsPerThread);
+
+    // Small (~256 bytes each)
+    HWY_ALIGN_MAX float dc_y[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+    HWY_ALIGN_MAX float dc_x[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+    HWY_ALIGN_MAX float dc_b[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+    size_t num_ac = 0;
+
+    for (size_t y = y0; y < y1; ++y) {
+        const float* JXL_RESTRICT row_y = opsin.ConstPlaneRow(1, y * kBlockDim);
+        const float* JXL_RESTRICT row_x = opsin.ConstPlaneRow(0, y * kBlockDim);
+        const float* JXL_RESTRICT row_b = opsin.ConstPlaneRow(2, y * kBlockDim);
+        size_t stride = opsin.PixelsPerRow();
+
+        for (size_t x = x0; x < x1; x++) {
+            AcStrategy acs =
+                use_dct8 ? AcStrategy::FromRawStrategy(AcStrategy::Type::DCT) : ac_strategy->ConstRow(y)[x];
+            if (!acs.IsFirstBlock()) continue;
+            size_t xs = acs.covered_blocks_x();
+            //      TransformFromPixels(acs.Strategy(), row_y + x * kBlockDim, stride,
+            //                          block_y, scratch_space);
+            //      DCFromLowestFrequencies(acs.Strategy(), block_y, dc_y, xs);
+            /*      TransformFromPixels(acs.Strategy(), row_x + x * kBlockDim, stride,
+                                      block_x, scratch_space);
+                  DCFromLowestFrequencies(acs.Strategy(), block_x, dc_x, xs);*/
+            /*      TransformFromPixels(acs.Strategy(), row_b + x * kBlockDim, stride,
+                                block_b, scratch_space);
+            DCFromLowestFrequencies(acs.Strategy(), block_b, dc_b, xs);*/
+
+            //================color Y AC
+            size_t tile_xsize = (xsize + 63) / 64 * 64;
+            size_t tile_ysize = (ysize + 63) / 64 * 64;
+            for (int i = 0; i < 32 * 32; i++) {
+                if (acs.RawStrategy() == 0) {
+                    if (i < 64) block_y[i] = dct8x8[1][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 1) {
+                    if (i < 64) block_y[i] = dctIDT[1][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 2) {
+                    if (i < 64) block_y[i] = dct2x2[1][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 3) {
+                    if (i < 64) block_y[i] = dct4x4[1][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 4) {
+                    if (i < 256) block_y[i] = dct16x16[1][16 * 16 * (y / 2 * (tile_xsize / 16) + x / 2) + i];
+                } else if (acs.RawStrategy() == 5) {
+                    block_y[i] = dct32x32[1][32 * 32 * (y / 4 * (tile_xsize / 32) + x / 4) + i];
+                } else {
+                    std::cout << "unsupported DCT" << std::endl;
+                }
+            }
+
+            //================color Y DC
+            if (acs.RawStrategy() == 0) {
+                dc_y[0] = dc8x8[1][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 1) {
+                dc_y[0] = dcIDT[1][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 2) {
+                dc_y[0] = dc2x2[1][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 3) {
+                dc_y[0] = dc4x4[1][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 4) {
+                for (int i = 0; i < 2; i++) {
+                    for (int j = 0; j < 2; j++) {
+                        dc_y[i * xs + j] = dc16x16[1][4 * (y / 2 * (tile_xsize / 16) + x / 2) + i * 2 + j];
+                    }
+                }
+            } else if (acs.RawStrategy() == 5) {
+                for (int i = 0; i < 4; i++) {
+                    for (int j = 0; j < 4; j++) {
+                        dc_y[i * xs + j] = dc32x32[1][16 * (y / 4 * (tile_xsize / 32) + x / 4) + i * 4 + j];
+                    }
+                }
+            } else {
+                std::cout << "unsupported DCFromLowFREQ" << std::endl;
+            }
+
+            //================color X AC
+            for (int i = 0; i < 32 * 32; i++) {
+                if (acs.RawStrategy() == 0) {
+                    if (i < 64) block_x[i] = dct8x8[0][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 1) {
+                    if (i < 64) block_x[i] = dctIDT[0][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 2) {
+                    if (i < 64) block_x[i] = dct2x2[0][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 3) {
+                    if (i < 64) block_x[i] = dct4x4[0][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 4) {
+                    if (i < 256) block_x[i] = dct16x16[0][16 * 16 * (y / 2 * (tile_xsize / 16) + x / 2) + i];
+                } else if (acs.RawStrategy() == 5) {
+                    block_x[i] = dct32x32[0][32 * 32 * (y / 4 * (tile_xsize / 32) + x / 4) + i];
+                } else {
+                    std::cout << "unsupported DCT" << std::endl;
+                }
+            }
+
+            //================color X DC
+            if (acs.RawStrategy() == 0) {
+                dc_x[0] = dc8x8[0][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 1) {
+                dc_x[0] = dcIDT[0][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 2) {
+                dc_x[0] = dc2x2[0][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 3) {
+                dc_x[0] = dc4x4[0][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 4) {
+                for (int i = 0; i < 2; i++) {
+                    for (int j = 0; j < 2; j++) {
+                        dc_x[i * xs + j] = dc16x16[0][4 * (y / 2 * (tile_xsize / 16) + x / 2) + i * 2 + j];
+                    }
+                }
+            } else if (acs.RawStrategy() == 5) {
+                for (int i = 0; i < 4; i++) {
+                    for (int j = 0; j < 4; j++) {
+                        dc_x[i * xs + j] = dc32x32[0][16 * (y / 4 * (tile_xsize / 32) + x / 4) + i * 4 + j];
+                    }
+                }
+            } else {
+                std::cout << "unsupported DCFromLowFREQ" << std::endl;
+            }
+
+            //================color B AC
+            for (int i = 0; i < 32 * 32; i++) {
+                if (acs.RawStrategy() == 0) {
+                    if (i < 64) block_b[i] = dct8x8[2][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 1) {
+                    if (i < 64) block_b[i] = dctIDT[2][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 2) {
+                    if (i < 64) block_b[i] = dct2x2[2][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 3) {
+                    if (i < 64) block_b[i] = dct4x4[2][64 * (y * (tile_xsize / 8) + x) + i];
+                } else if (acs.RawStrategy() == 4) {
+                    if (i < 256) block_b[i] = dct16x16[2][16 * 16 * (y / 2 * (tile_xsize / 16) + x / 2) + i];
+                } else if (acs.RawStrategy() == 5) {
+                    block_b[i] = dct32x32[2][32 * 32 * (y / 4 * (tile_xsize / 32) + x / 4) + i];
+                } else {
+                    std::cout << "unsupported DCT" << std::endl;
+                }
+            }
+
+            //================color B DC
+            if (acs.RawStrategy() == 0) {
+                dc_b[0] = dc8x8[2][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 1) {
+                dc_b[0] = dcIDT[2][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 2) {
+                dc_b[0] = dc2x2[2][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 3) {
+                dc_b[0] = dc4x4[2][(y * (tile_xsize / 8) + x)];
+            } else if (acs.RawStrategy() == 4) {
+                for (int i = 0; i < 2; i++) {
+                    for (int j = 0; j < 2; j++) {
+                        dc_b[i * xs + j] = dc16x16[2][4 * (y / 2 * (tile_xsize / 16) + x / 2) + i * 2 + j];
+                    }
+                }
+            } else if (acs.RawStrategy() == 5) {
+                for (int i = 0; i < 4; i++) {
+                    for (int j = 0; j < 4; j++) {
+                        dc_b[i * xs + j] = dc32x32[2][16 * (y / 4 * (tile_xsize / 32) + x / 4) + i * 4 + j];
+                    }
+                }
+            } else {
+                std::cout << "unsupported DCFromLowFREQ" << std::endl;
+            }
+            //=======DCT FINISH
+
+            const float* const JXL_RESTRICT qm_x = dequant.InvMatrix(acs.Strategy(), 0);
+            const float* const JXL_RESTRICT qm_b = dequant.InvMatrix(acs.Strategy(), 2);
+            // Why does a constant seem to work better than
+            // raw_quant_field->Row(y)[x] ?
+            float q = use_dct8 ? 1 : quantizer->Scale() * 400.0f;
+            float q_dc_x = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(0);
+            float q_dc_b = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(2);
+
+            // Copy DCs in dc_values.
+            for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+                for (size_t ix = 0; ix < xs; ix++) {
+                    dc_values_yx[(iy + y) * xsize_blocks + ix + x] = dc_y[iy * xs + ix] * q_dc_x;
+                    dc_values_x[(iy + y) * xsize_blocks + ix + x] = dc_x[iy * xs + ix] * q_dc_x;
+                    dc_values_yb[(iy + y) * xsize_blocks + ix + x] = dc_y[iy * xs + ix] * q_dc_b;
+                    dc_values_b[(iy + y) * xsize_blocks + ix + x] = dc_b[iy * xs + ix] * q_dc_b;
+                }
+            }
+
+            // Do not use this block for computing AC CfL.
+            if (acs.covered_blocks_x() + x0 > x1 || acs.covered_blocks_y() + y0 > y1) {
+                continue;
+            }
+
+            // Copy AC coefficients in the local block. The order in which
+            // coefficients get stored does not matter.
+            size_t cx = acs.covered_blocks_x();
+            size_t cy = acs.covered_blocks_y();
+            CoefficientLayout(&cy, &cx);
+            // Zero out LFs. This introduces terms in the optimization loop that
+            // don't affect the result, as they are all 0, but allow for simpler
+            // SIMDfication.
+            for (size_t iy = 0; iy < cy; iy++) {
+                for (size_t ix = 0; ix < cx; ix++) {
+                    block_y[cx * kBlockDim * iy + ix] = 0;
+                    block_x[cx * kBlockDim * iy + ix] = 0;
+                    block_b[cx * kBlockDim * iy + ix] = 0;
+                }
+            }
+            const auto qv = Set(df, q);
+            for (size_t i = 0; i < cx * cy * 64; i += Lanes(df)) {
+                const auto b_y = Load(df, block_y + i);
+                const auto b_x = Load(df, block_x + i);
+                const auto b_b = Load(df, block_b + i);
+                const auto qqm_x = qv * Load(df, qm_x + i);
+                const auto qqm_b = qv * Load(df, qm_b + i);
+                Store(b_y * qqm_x, df, coeffs_yx + num_ac);
+                Store(b_x * qqm_x, df, coeffs_x + num_ac);
+                Store(b_y * qqm_b, df, coeffs_yb + num_ac);
+                Store(b_b * qqm_b, df, coeffs_b + num_ac);
+                num_ac += Lanes(df);
+            }
+        }
+    }
+    JXL_CHECK(num_ac % Lanes(df) == 0);
+    row_out_x[tx] = FindBestMultiplier(coeffs_yx, coeffs_x, num_ac, 0.0f, kDistanceMultiplierAC, fast);
+    row_out_b[tx] = FindBestMultiplier(coeffs_yb, coeffs_b, num_ac, kYToBRatio, kDistanceMultiplierAC, fast);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(InitDCStorage);
+HWY_EXPORT(ComputeDC);
+HWY_EXPORT(ComputeTile);
+
+void CfLHeuristics::Init(const Image3F& opsin) {
+    size_t xsize_blocks = opsin.xsize() / kBlockDim;
+    size_t ysize_blocks = opsin.ysize() / kBlockDim;
+    HWY_DYNAMIC_DISPATCH(InitDCStorage)
+    (xsize_blocks * ysize_blocks, &dc_values);
+}
+
+void CfLHeuristics::ComputeTile(const Rect& r,
+                                const Image3F& opsin,
+                                const DequantMatrices& dequant,
+                                const AcStrategyImage* ac_strategy,
+                                const Quantizer* quantizer,
+                                bool fast,
+                                size_t thread,
+                                ColorCorrelationMap* cmap,
+
+                                //==========acc interface========
+                                size_t xsize,
+                                size_t ysize,
+                                std::vector<std::vector<float> >& dctIDT,
+                                std::vector<std::vector<float> >& dct2x2,
+                                std::vector<std::vector<float> >& dct4x4,
+                                std::vector<std::vector<float> >& dct8x8,
+                                std::vector<std::vector<float> >& dct16x16,
+                                std::vector<std::vector<float> >& dct32x32,
+
+                                std::vector<std::vector<float> >& dcIDT,
+                                std::vector<std::vector<float> >& dc2x2,
+                                std::vector<std::vector<float> >& dc4x4,
+                                std::vector<std::vector<float> >& dc8x8,
+                                std::vector<std::vector<float> >& dc16x16,
+                                std::vector<std::vector<float> >& dc32x32
+                                //================================
+                                ) {
+    bool use_dct8 = ac_strategy == nullptr;
+    HWY_DYNAMIC_DISPATCH(ComputeTile)
+    (opsin, dequant, ac_strategy, quantizer, r, fast, use_dct8, &cmap->ytox_map, &cmap->ytob_map, &dc_values,
+     mem.get() + thread * kItemsPerThread, xsize, ysize, dctIDT, dct2x2, dct4x4, dct8x8, dct16x16, dct32x32, dcIDT,
+     dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+}
+
+void CfLHeuristics::ComputeDC(bool fast, ColorCorrelationMap* cmap) {
+    int32_t ytob_dc = 0;
+    int32_t ytox_dc = 0;
+    HWY_DYNAMIC_DISPATCH(ComputeDC)(dc_values, fast, &ytox_dc, &ytob_dc);
+    cmap->SetYToBDC(ytob_dc);
+    cmap->SetYToXDC(ytox_dc);
+}
+
+void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer, size_t layer, AuxOut* aux_out) {
+    float color_factor = map->GetColorFactor();
+    float base_correlation_x = map->GetBaseCorrelationX();
+    float base_correlation_b = map->GetBaseCorrelationB();
+    int32_t ytox_dc = map->GetYToXDC();
+    int32_t ytob_dc = map->GetYToBDC();
+
+    BitWriter::Allotment allotment(writer, 1 + 2 * kBitsPerByte + 12 + 32);
+    if (ytox_dc == 0 && ytob_dc == 0 && color_factor == kDefaultColorFactor && base_correlation_x == 0.0f &&
+        base_correlation_b == kYToBRatio) {
+        writer->Write(1, 1);
+        ReclaimAndCharge(writer, &allotment, layer, aux_out);
+        return;
+    }
+    writer->Write(1, 0);
+    JXL_CHECK(U32Coder::Write(kColorFactorDist, color_factor, writer));
+    JXL_CHECK(F16Coder::Write(base_correlation_x, writer));
+    JXL_CHECK(F16Coder::Write(base_correlation_b, writer));
+    writer->Write(kBitsPerByte, ytox_dc - std::numeric_limits<int8_t>::min());
+    writer->Write(kBitsPerByte, ytob_dc - std::numeric_limits<int8_t>::min());
+    ReclaimAndCharge(writer, &allotment, layer, aux_out);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/others/src/acc_enc_cluster.cpp b/codec/L2/demos/jxlEnc/others/src/acc_enc_cluster.cpp
new file mode 100644
index 0000000000..6f31a84d63
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/acc_enc_cluster.cpp
@@ -0,0 +1,758 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "acc_enc_cluster.hpp"
+
+#include <ap_int.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <map>
+#include <memory>
+#include <numeric>
+#include <queue>
+#include <tuple>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "enc_cluster.cpp"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/fast_math-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+template <class V>
+V Entropy(V count, V inv_total, V total) {
+    const HWY_CAPPED(float, Histogram::kRounding) d;
+    const auto zero = Set(d, 0.0f);
+    return IfThenZeroElse(count == total,
+                          zero - count * FastLog2f(d, count) +
+                              count * FastLog2f(d, total)); // zero-count*FastLog2f(d, inv_total * count));
+}
+
+void HistogramEntropy(const Histogram& a) {
+    a.entropy_ = 0.0f;
+    if (a.total_count_ == 0) return;
+
+    const HWY_CAPPED(float, Histogram::kRounding) df;
+    const HWY_CAPPED(int32_t, Histogram::kRounding) di;
+
+    const auto inv_tot = Set(df, 1.0f / a.total_count_);
+    auto entropy_lanes = Zero(df);
+    auto total = Set(df, a.total_count_);
+    // printf("%s: %s: %d, a.data_.size=%d\n", __FILE__, __FUNCTION__, __LINE__,
+    // a.data_.size());
+    for (size_t i = 0; i < a.data_.size(); i += Lanes(di)) {
+        const auto counts = LoadU(di, &a.data_[i]);
+        entropy_lanes += Entropy(ConvertTo(df, counts), inv_tot, total);
+    }
+    a.entropy_ += GetLane(SumOfLanes(entropy_lanes));
+}
+
+float HistogramDistance(const Histogram& a, const Histogram& b) {
+    if (a.total_count_ == 0 || b.total_count_ == 0) return 0;
+
+    const HWY_CAPPED(float, Histogram::kRounding) df;
+    const HWY_CAPPED(int32_t, Histogram::kRounding) di;
+
+    const auto inv_tot = Set(df, 1.0f / (a.total_count_ + b.total_count_));
+    auto distance_lanes = Zero(df);
+    auto total = Set(df, a.total_count_ + b.total_count_);
+
+    for (size_t i = 0; i < std::max(a.data_.size(), b.data_.size()); i += Lanes(di)) {
+        const auto a_counts = a.data_.size() > i ? LoadU(di, &a.data_[i]) : Zero(di);
+        const auto b_counts = b.data_.size() > i ? LoadU(di, &b.data_[i]) : Zero(di);
+        const auto counts = ConvertTo(df, a_counts + b_counts);
+        distance_lanes += Entropy(counts, inv_tot, total);
+    }
+    const float total_distance = GetLane(SumOfLanes(distance_lanes));
+    return total_distance - a.entropy_ - b.entropy_;
+}
+
+// First step of a k-means clustering with a fancy distance metric.
+/*void FastClusterHistograms(const std::vector<Histogram>& in,
+                           const size_t num_contexts_in, size_t max_histograms,
+                           float min_distance, std::vector<Histogram>* out,
+                           std::vector<uint32_t>* histogram_symbols) {
+  PROFILER_FUNC;
+  size_t largest_idx = 0;
+  std::vector<uint32_t> nonempty_histograms;
+  nonempty_histograms.reserve(in.size());
+  int largest_count = 0;
+  printf("%s: %s: %d, num_contexts_in=%d\n", __FILE__, __FUNCTION__, __LINE__,
+num_contexts_in); for (size_t i = 0; i < num_contexts_in; i++) {  // get
+position for largest total_count_ id in in if (in[i].total_count_ == 0)
+continue; HistogramEntropy(in[i]); if (in[i].total_count_ >
+in[largest_idx].total_count_) { largest_idx = i; largest_count =
+in[i].total_count_;
+    }
+    nonempty_histograms.push_back(i);
+  }
+  // No symbols.
+  if (nonempty_histograms.empty()) {
+    out->resize(1);
+    histogram_symbols->clear();
+    histogram_symbols->resize(in.size(), 0);
+    return;
+  }
+  largest_idx = std::find(nonempty_histograms.begin(),
+                          nonempty_histograms.end(), largest_idx) -
+                nonempty_histograms.begin(); // get position for largest
+total_count_ id in nonempty_histograms size_t num_contexts =
+nonempty_histograms.size(); printf("%s: %s: %d, num_contexts of non-empty=%d,
+largest_idx=%d, largest_count=%d\n", __FILE__, __FUNCTION__, __LINE__,
+    num_contexts, largest_idx, largest_count);
+  out->clear();
+  out->reserve(max_histograms);
+  std::vector<float> dists(num_contexts, std::numeric_limits<float>::max());
+  histogram_symbols->resize(in.size(), max_histograms);
+
+  int while_count = 0;
+  while (out->size() < max_histograms && out->size() < num_contexts) {
+    (*histogram_symbols)[nonempty_histograms[largest_idx]] = out->size();
+    out->push_back(in[nonempty_histograms[largest_idx]]);
+    largest_idx = 0;
+    while_count++;
+    for (size_t i = 0; i < num_contexts; i++) {
+      dists[i] = std::min(
+          HistogramDistance(in[nonempty_histograms[i]], out->back()), dists[i]);
+      // Avoid repeating histograms
+      if ((*histogram_symbols)[nonempty_histograms[i]] != max_histograms) {
+        continue;
+      }
+      if (dists[i] > dists[largest_idx]) largest_idx = i;
+    }
+    if (dists[largest_idx] < min_distance) break;
+  }
+
+  for (size_t i = 0; i < num_contexts_in; i++) {
+    if ((*histogram_symbols)[i] != max_histograms) continue;
+    if (in[i].total_count_ == 0) {
+      (*histogram_symbols)[i] = 0;
+      continue;
+    }
+    size_t best = 0;
+    float best_dist = HistogramDistance(in[i], (*out)[best]);
+    for (size_t j = 1; j < out->size(); j++) {
+      float dist = HistogramDistance(in[i], (*out)[j]);
+      if (dist < best_dist) {
+        best = j;
+        best_dist = dist;
+      }
+    }
+    (*out)[best].AddHistogram(in[i]);
+    HistogramEntropy((*out)[best]);
+    (*histogram_symbols)[i] = best;
+  }
+
+  printf("%s: %s: %d, out size=%zu, FastClusterHistograms size=%zu,
+while_count=%d\n", __FILE__, __FUNCTION__, __LINE__, out->size(),
+histogram_symbols->size(), while_count);
+}*/
+
+float accHistogramDistanceEntropy(const Histogram& a, const Histogram& b, bool isEntropy) {
+    if (!isEntropy) {
+        if (a.total_count_ == 0 || b.total_count_ == 0) return 0;
+    } else {
+        a.entropy_ = 0.0f;
+        if (a.total_count_ == 0) return 0;
+    }
+
+    float total;
+    if (!isEntropy) {
+        total = a.total_count_ + b.total_count_;
+    } else {
+        total = a.total_count_;
+    }
+    float totallog2 = total == 0 ? 0 : std::log2(total) /*acc::log2(total)*/;
+    float distance_lanes = 0;
+    size_t sum_count = 0;
+    float sum_dist = 0;
+
+    size_t size;
+    if (!isEntropy) {
+        size = std::max(a.data_.size(), b.data_.size());
+    } else {
+        size = a.data_.size();
+    }
+
+    for (size_t i = 0; i < size; i++) {
+        float counts;
+        if (!isEntropy) {
+            size_t a_counts = a.data_.size() > i ? a.data_[i] : 0;
+            size_t b_counts = b.data_.size() > i ? b.data_[i] : 0;
+            counts = a_counts + b_counts;
+        } else {
+            counts = a.data_[i];
+        }
+
+        float countlog2 = counts == 0 ? 0 : /*acc::log2(counts)*/ std::log2(counts);
+
+        sum_count += counts == total ? 0 : counts;
+        sum_dist += counts == total ? 0 : counts * countlog2;
+    }
+    distance_lanes = sum_count * totallog2 - sum_dist;
+    float result;
+    if (!isEntropy) {
+        result = distance_lanes - a.entropy_ - b.entropy_;
+    } else {
+        result = distance_lanes;
+    }
+    return result;
+}
+
+// clang-format off
+float accHistogramDistanceEntropy(
+#ifndef __SYNTHESIS__
+                                  bool isEntropy, 
+                                  int32_t a_size,
+                                  int32_t a_total_count,
+                                  std::vector<int32_t> a_histo, 
+                                  int32_t b_size,
+                                  int32_t b_total_count,
+                                  std::vector<int32_t> b_histo
+#else
+                                  bool isEntropy, 
+                                  int32_t a_size,
+                                  int32_t a_total_count,
+                                  a_histo[40], 
+                                  int32_t b_size,
+                                  int32_t b_total_count,
+                                  b_histo[40]
+#endif
+) {
+    // clang-format on
+    if (!isEntropy) {
+        if (a_total_count == 0 || b_total_count == 0) return 0;
+    } else {
+        if (a_total_count == 0) return 0;
+    }
+
+    float total;
+    if (!isEntropy) {
+        total = a_total_count + b_total_count;
+    } else {
+        total = a_total_count;
+    }
+    float totallog2 = total == 0 ? 0 : /*acc::log2(total)*/ std::log2(total);
+    float distance_lanes = 0;
+    size_t sum_count = 0;
+    float sum_dist = 0;
+
+    size_t size;
+    if (!isEntropy) {
+        size = std::max(a_size, b_size);
+    } else {
+        size = a_size;
+    }
+
+    for (size_t i = 0; i < size; i++) {
+        float counts;
+        if (!isEntropy) {
+            size_t a_counts = a_size > i ? a_histo[i] : 0;
+            size_t b_counts = b_size > i ? b_histo[i] : 0;
+            counts = a_counts + b_counts;
+        } else {
+            counts = a_histo[i];
+        }
+
+        float countlog2 = counts == 0 ? 0 : /*acc::log2(counts)*/ std::log2(counts);
+
+        sum_count += counts == total ? 0 : counts;
+        sum_dist += counts == total ? 0 : counts * countlog2;
+    }
+    distance_lanes = sum_count * totallog2 - sum_dist;
+    return distance_lanes;
+}
+
+void acc_HistogramDistance(bool isEntropy,
+                           size_t num_contexts,
+                           size_t j,
+                           const std::vector<Histogram> in,
+                           std::vector<uint32_t> nonempty_histograms,
+                           Histogram& ref,
+                           std::vector<float>& dists,
+                           std::vector<size_t>& best,
+                           size_t& largest_idx) {
+    largest_idx = 0;
+    for (size_t i = 0; i < num_contexts; i++) {
+        const Histogram a = in[nonempty_histograms[i]];
+        float dist_std = accHistogramDistanceEntropy(isEntropy, a.data_.size(), a.total_count_, a.data_,
+                                                     ref.data_.size(), ref.total_count_, ref.data_);
+        if (!isEntropy) {
+            if (dist_std - a.entropy_ - ref.entropy_ < dists[i]) {
+                best[i] = j;
+                dists[i] = dist_std - a.entropy_ - ref.entropy_;
+            }
+        } else {
+            dists[i] = dist_std;
+        }
+        if (dists[i] > dists[largest_idx]) largest_idx = i;
+    }
+}
+
+// clang-format off
+void acc_HistogramDistance(
+#ifndef __SYNTHESIS__
+                           bool isEntropy, uint32_t num_contexts, uint32_t j,
+
+                           const std::vector<uint32_t> acc_histoSize,
+                           const std::vector<std::vector<int32_t> > acc_uramHisto,
+                           const std::vector<std::vector<int32_t> > acc_hbmHisto,
+                           const std::vector<uint32_t> acc_totalcount,
+                           const std::vector<float> acc_entropy,
+                           std::vector<uint32_t> nonempty_histograms,
+
+                           uint32_t refSize,
+                           std::vector<int32_t> ref_histo,
+                           uint32_t ref_totalcount,
+                           float ref_entropy,
+
+                           std::vector<float>& dists,
+                           std::vector<uint32_t>& best, 
+                           uint32_t& largest_idx
+#else
+                           bool isEntropy, uint32_t num_contexts, uint32_t j,
+
+                           uint32_t acc_histoSize[8192],
+                           int32_t acc_uramHisto[4096][40],
+                           int32_t acc_hbmHisto[4096][40],
+                           uint32_t acc_totalcount[8192],
+                           float acc_entropy[8192],
+                           uint32_t nonempty_histograms[8192],
+
+                           uint32_t refSize,
+                           int32_t ref_histo[40],
+                           uint32_t ref_totalcount,
+                           float ref_entropy,
+
+                           float dists[1024],
+                           uint32_t best[1024], 
+                           uint32_t& largest_idx
+#endif
+) {
+    // clang-format on
+    largest_idx = 0;
+    for (size_t i = 0; i < num_contexts; i++) {
+        int idx = nonempty_histograms[i];
+        std::vector<int32_t> tmp_histo = idx < 4096 ? acc_uramHisto[idx] : acc_hbmHisto[idx - 4096];
+        float dist_std = accHistogramDistanceEntropy(isEntropy, acc_histoSize[idx], acc_totalcount[idx], tmp_histo,
+                                                     refSize, ref_totalcount, ref_histo);
+        if (!isEntropy) {
+            if (dist_std - acc_entropy[i] - ref_entropy < dists[i]) {
+                best[i] = j;
+                dists[i] = dist_std - acc_entropy[i] - ref_entropy;
+            }
+        } else {
+            dists[i] = dist_std;
+        }
+        if (dists[i] > dists[largest_idx]) largest_idx = i;
+    }
+}
+
+void FastClusterHistograms(const std::vector<Histogram>& in,
+                           const size_t num_contexts_in,
+                           size_t max_histograms,
+                           float min_distance,
+                           std::vector<Histogram>* out,
+                           std::vector<uint32_t>* histogram_symbols) {
+    PROFILER_FUNC;
+    uint32_t largest_idx = 0;
+    std::vector<uint32_t> nonempty_histograms;
+    nonempty_histograms.reserve(in.size());
+    for (size_t i = 0; i < num_contexts_in; i++) {
+        if (in[i].total_count_ == 0) continue;
+
+        if (in[i].total_count_ > in[largest_idx].total_count_) {
+            largest_idx = i;
+        }
+        nonempty_histograms.push_back(i);
+    }
+
+    largest_idx =
+        std::find(nonempty_histograms.begin(), nonempty_histograms.end(), largest_idx) - nonempty_histograms.begin();
+
+    size_t num_contexts = nonempty_histograms.size();
+    std::vector<float> entropy(num_contexts);
+    //  for(size_t i=0;i<num_contexts;i++){
+    //    entropy[i]=accHistogramDistanceEntropy(in[nonempty_histograms[i]],in[nonempty_histograms[i]],true);
+    //  }
+
+    std::vector<std::vector<int32_t> > acc_uramHisto(4096, std::vector<int32_t>(40, 0));
+    std::vector<std::vector<int32_t> > acc_hbmHisto(4096, std::vector<int32_t>(40, 0));
+    std::vector<uint32_t> acc_total_count(8192, 0);
+    std::vector<float> acc_entropy(8192, 0);
+    std::vector<uint32_t> acc_histoSize(8192, 0);
+
+    for (int i = 0; i < in.size(); i++) {
+        acc_total_count[i] = in[i].total_count_;
+        acc_entropy[i] = in[i].entropy_;
+        acc_histoSize[i] = in[i].data_.size();
+        for (int j = 0; j < in[i].data_.size(); j++) {
+            if (i < 4096) {
+                acc_uramHisto[i][j] = in[i].data_[j];
+            } else if (i < 8192) {
+                acc_hbmHisto[i - 4096][j] = in[i].data_[j];
+            } else {
+                std::cout << "Error Histogram too big!" << std::endl;
+            }
+        }
+    }
+
+    Histogram tmp0;
+    std::vector<uint32_t> tmp1;
+    uint32_t tmp2;
+    acc_HistogramDistance(true, num_contexts, 0, acc_histoSize, acc_uramHisto, acc_hbmHisto, acc_total_count,
+                          acc_entropy, nonempty_histograms, tmp0.data_.size(), tmp0.data_, tmp0.total_count_,
+                          tmp0.entropy_, entropy, tmp1, tmp2);
+
+    for (size_t i = 0; i < num_contexts; i++) {
+        in[nonempty_histograms[i]].entropy_ = entropy[i];
+        acc_entropy[nonempty_histograms[i]] = entropy[i];
+    }
+
+    // No symbols.
+    if (nonempty_histograms.empty()) {
+        out->resize(1);
+        histogram_symbols->clear();
+        histogram_symbols->resize(in.size(), 0);
+        return;
+    }
+
+    out->clear();
+    out->reserve(max_histograms);
+    std::vector<float> dists(num_contexts, std::numeric_limits<float>::max());
+    std::vector<uint32_t> best_tmp(num_contexts, 0); // no use
+    histogram_symbols->clear();
+    histogram_symbols->resize(in.size(), 0);
+
+    while (out->size() < max_histograms && out->size() < num_contexts) {
+        (*histogram_symbols)[nonempty_histograms[largest_idx]] = out->size();
+        out->push_back(in[nonempty_histograms[largest_idx]]);
+        Histogram backhisto = out->back();
+        acc_HistogramDistance(false, num_contexts, 0, acc_histoSize, acc_uramHisto, acc_hbmHisto, acc_total_count,
+                              entropy, nonempty_histograms, backhisto.data_.size(), backhisto.data_,
+                              backhisto.total_count_, backhisto.entropy_, dists, best_tmp, largest_idx);
+        if (dists[largest_idx] < min_distance) break;
+    }
+
+    std::vector<float> best_dist(num_contexts, std::numeric_limits<float>::max());
+    std::vector<uint32_t> best(num_contexts, 0);
+
+    for (size_t j = 0; j < out->size(); j++) {
+        Histogram outHisto = (*out)[j];
+        acc_HistogramDistance(false, num_contexts, j, acc_histoSize, acc_uramHisto, acc_hbmHisto, acc_total_count,
+                              entropy, nonempty_histograms, outHisto.data_.size(), outHisto.data_,
+                              outHisto.total_count_, outHisto.entropy_, best_dist, best, largest_idx);
+    }
+
+    for (size_t i = 0; i < num_contexts; i++) {
+        for (size_t j = 0; j < out->size(); j++) {
+            (*out)[best[i]].AddHistogram(in[nonempty_histograms[i]]);
+            (*histogram_symbols)[nonempty_histograms[i]] = best[i];
+        }
+    }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(FastClusterHistograms); // Local function
+HWY_EXPORT(HistogramEntropy);      // Local function
+
+float Histogram::ShannonEntropy() const {
+    HWY_DYNAMIC_DISPATCH(HistogramEntropy)(*this);
+    return entropy_;
+}
+
+// Reorder histograms in *out so that the new symbols in *symbols come in
+// increasing order.
+void HistogramReindex(std::vector<Histogram>* out, std::vector<uint32_t>* symbols) {
+    std::vector<Histogram> tmp(*out);
+    std::map<int, int> new_index;
+    int next_index = 0;
+    for (uint32_t symbol : *symbols) {
+        if (new_index.find(symbol) == new_index.end()) {
+            new_index[symbol] = next_index;
+            (*out)[next_index] = tmp[symbol];
+            ++next_index;
+        }
+    }
+    out->resize(next_index);
+    for (uint32_t& symbol : *symbols) {
+        symbol = new_index[symbol];
+    }
+}
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+void ClusterHistograms(const HistogramParams params,
+                       const std::vector<Histogram>& in,
+                       const size_t num_contexts,
+                       size_t max_histograms,
+                       std::vector<Histogram>* out,
+                       std::vector<uint32_t>* histogram_symbols) {
+    constexpr float kMinDistanceForDistinctFast = 64.0f;
+    constexpr float kMinDistanceForDistinctBest = 16.0f;
+    max_histograms = std::min(max_histograms, params.max_histograms);
+    // printf("%s: %s: %d, max_histograms=%d\n", __FILE__, __FUNCTION__, __LINE__,
+    //       max_histograms);
+    if (params.clustering == HistogramParams::ClusteringType::kFastest) {
+        HWY_DYNAMIC_DISPATCH(FastClusterHistograms)
+        (in, num_contexts, 4, kMinDistanceForDistinctFast, out, histogram_symbols);
+    } else if (params.clustering == HistogramParams::ClusteringType::kFast) {
+        HWY_DYNAMIC_DISPATCH(FastClusterHistograms)
+        (in, num_contexts, max_histograms, kMinDistanceForDistinctFast, out, histogram_symbols);
+    } else {
+        PROFILER_FUNC;
+        HWY_DYNAMIC_DISPATCH(FastClusterHistograms)
+        (in, num_contexts, max_histograms, kMinDistanceForDistinctBest, out, histogram_symbols);
+
+        // printf("%s: %s: %d, FastClusterHistograms out->size=%d\n", __FILE__,
+        //       __FUNCTION__, __LINE__, out->size());
+        for (size_t i = 0; i < out->size(); i++) {
+            (*out)[i].entropy_ = ANSPopulationCost((*out)[i].data_.data(), (*out)[i].data_.size());
+        }
+        uint32_t next_version = 2;
+        std::vector<uint32_t> version(out->size(), 1);
+        std::vector<uint32_t> renumbering(out->size());
+        std::iota(renumbering.begin(), renumbering.end(), 0);
+
+        // Try to pair up clusters if doing so reduces the total cost.
+
+        struct HistogramPair {
+            // validity of a pair: p.version == max(version[i], version[j])
+            float cost;
+            uint32_t first;
+            uint32_t second;
+            uint32_t version;
+            // We use > because priority queues sort in *decreasing* order, but we
+            // want lower cost elements to appear first.
+            bool operator<(const HistogramPair& other) const {
+                return std::make_tuple(cost, first, second, version) >
+                       std::make_tuple(other.cost, other.first, other.second, other.version);
+            }
+        };
+
+        // Create list of all pairs by increasing merging cost.
+        std::priority_queue<HistogramPair> pairs_to_merge;
+        for (uint32_t i = 0; i < out->size(); i++) {
+            for (uint32_t j = i + 1; j < out->size(); j++) {
+                Histogram histo;
+                histo.AddHistogram((*out)[i]);
+                histo.AddHistogram((*out)[j]);
+                float cost =
+                    ANSPopulationCost(histo.data_.data(), histo.data_.size()) - (*out)[i].entropy_ - (*out)[j].entropy_;
+                // Avoid enqueueing pairs that are not advantageous to merge.
+                if (cost >= 0) continue;
+                pairs_to_merge.push(HistogramPair{cost, i, j, std::max(version[i], version[j])});
+            }
+        }
+
+        int merge_count = 0;
+        // Merge the best pair to merge, add new pairs that get formed as a
+        // consequence.
+        while (!pairs_to_merge.empty()) {
+            merge_count++;
+            uint32_t first = pairs_to_merge.top().first;
+            uint32_t second = pairs_to_merge.top().second;
+            uint32_t ver = pairs_to_merge.top().version;
+            pairs_to_merge.pop();
+            if (ver != std::max(version[first], version[second]) || version[first] == 0 || version[second] == 0) {
+                continue;
+            }
+            (*out)[first].AddHistogram((*out)[second]);
+            (*out)[first].entropy_ = ANSPopulationCost((*out)[first].data_.data(), (*out)[first].data_.size());
+            for (size_t i = 0; i < renumbering.size(); i++) {
+                if (renumbering[i] == second) {
+                    renumbering[i] = first;
+                }
+            }
+            version[second] = 0;
+            version[first] = next_version++;
+            for (uint32_t j = 0; j < out->size(); j++) {
+                if (j == first) continue;
+                if (version[j] == 0) continue;
+                Histogram histo;
+                histo.AddHistogram((*out)[first]);
+                histo.AddHistogram((*out)[j]);
+                float cost = ANSPopulationCost(histo.data_.data(), histo.data_.size()) - (*out)[first].entropy_ -
+                             (*out)[j].entropy_;
+                // Avoid enqueueing pairs that are not advantageous to merge.
+                if (cost >= 0) continue;
+                pairs_to_merge.push(
+                    HistogramPair{cost, std::min(first, j), std::max(first, j), std::max(version[first], version[j])});
+            }
+        }
+        std::vector<uint32_t> reverse_renumbering(out->size(), -1);
+        size_t num_alive = 0;
+        for (size_t i = 0; i < out->size(); i++) {
+            if (version[i] == 0) continue;
+            (*out)[num_alive++] = (*out)[i];
+            reverse_renumbering[i] = num_alive - 1;
+        }
+        out->resize(num_alive);
+        // printf(
+        //    "%s: %s: %d, culster num_alive=%zu, histogram_symbols size=%zu, "
+        //    "merge_count=%d\n",
+        //    __FILE__, __FUNCTION__, __LINE__, num_alive,
+        //    histogram_symbols->size(), merge_count);
+        for (size_t i = 0; i < histogram_symbols->size(); i++) {
+            (*histogram_symbols)[i] = reverse_renumbering[renumbering[(*histogram_symbols)[i]]];
+        }
+    }
+
+    // Convert the context map to a canonical form.
+    HistogramReindex(out, histogram_symbols);
+    // printf("%s: %s: %d, culster final out size=%zu, histogram_symbols
+    // size=%zu\n",
+    //       __FILE__, __FUNCTION__, __LINE__, out->size(),
+    //       histogram_symbols->size());
+}
+
+void acc_FastClusterHistograms(const std::vector<Histogram>& in,
+                               std::vector<uint32_t> nonempty_histograms,
+                               uint32_t largest_idx_in,
+                               const size_t num_contexts,
+                               size_t max_histograms,
+                               float min_distance,
+                               std::vector<Histogram>* out,
+                               std::vector<uint32_t>* histogram_symbols) {
+    PROFILER_FUNC;
+
+    uint32_t largest_idx = largest_idx_in;
+    std::vector<float> entropy(num_contexts);
+    //  for(size_t i=0;i<num_contexts;i++){
+    //    entropy[i]=accHistogramDistanceEntropy(in[nonempty_histograms[i]],in[nonempty_histograms[i]],true);
+    //  }
+
+    std::vector<std::vector<int32_t> > acc_uramHisto(4096, std::vector<int32_t>(40, 0));
+    std::vector<std::vector<int32_t> > acc_hbmHisto(4096, std::vector<int32_t>(40, 0));
+    std::vector<uint32_t> acc_total_count(8192, 0);
+    std::vector<float> acc_entropy(8192, 0);
+    std::vector<uint32_t> acc_histoSize(8192, 0);
+
+    for (int i = 0; i < in.size(); i++) {
+        acc_total_count[i] = in[i].total_count_;
+        acc_entropy[i] = in[i].entropy_;
+        acc_histoSize[i] = in[i].data_.size();
+        for (int j = 0; j < in[i].data_.size(); j++) {
+            if (i < 4096) {
+                acc_uramHisto[i][j] = in[i].data_[j];
+            } else if (i < 8192) {
+                acc_hbmHisto[i - 4096][j] = in[i].data_[j];
+            } else {
+                std::cout << "Error Histogram too big!" << std::endl;
+            }
+        }
+    }
+
+    Histogram tmp0;
+    std::vector<uint32_t> tmp1;
+    uint32_t tmp2;
+    jxl::N_SCALAR::acc_HistogramDistance(true, num_contexts, 0, acc_histoSize, acc_uramHisto, acc_hbmHisto,
+                                         acc_total_count, acc_entropy, nonempty_histograms, tmp0.data_.size(),
+                                         tmp0.data_, tmp0.total_count_, tmp0.entropy_, entropy, tmp1, tmp2);
+
+    for (size_t i = 0; i < num_contexts; i++) {
+        in[nonempty_histograms[i]].entropy_ = entropy[i];
+        acc_entropy[nonempty_histograms[i]] = entropy[i];
+    }
+
+    // No symbols.
+    if (nonempty_histograms.empty()) {
+        out->resize(1);
+        histogram_symbols->clear();
+        histogram_symbols->resize(in.size(), 0);
+        return;
+    }
+
+    out->clear();
+    out->reserve(max_histograms);
+    std::vector<float> dists(num_contexts, std::numeric_limits<float>::max());
+    std::vector<uint32_t> best_tmp(num_contexts, 0); // no use
+    histogram_symbols->clear();
+    histogram_symbols->resize(in.size(), 0);
+
+    while (out->size() < max_histograms && out->size() < num_contexts) {
+        (*histogram_symbols)[nonempty_histograms[largest_idx]] = out->size();
+        out->push_back(in[nonempty_histograms[largest_idx]]);
+        Histogram backhisto = out->back();
+        jxl::N_SCALAR::acc_HistogramDistance(false, num_contexts, 0, acc_histoSize, acc_uramHisto, acc_hbmHisto,
+                                             acc_total_count, entropy, nonempty_histograms, backhisto.data_.size(),
+                                             backhisto.data_, backhisto.total_count_, backhisto.entropy_, dists,
+                                             best_tmp, largest_idx);
+        if (dists[largest_idx] < min_distance) break;
+    }
+
+    std::vector<float> best_dist(num_contexts, std::numeric_limits<float>::max());
+    std::vector<uint32_t> best(num_contexts, 0);
+
+    for (size_t j = 0; j < out->size(); j++) {
+        Histogram outHisto = (*out)[j];
+        jxl::N_SCALAR::acc_HistogramDistance(false, num_contexts, j, acc_histoSize, acc_uramHisto, acc_hbmHisto,
+                                             acc_total_count, entropy, nonempty_histograms, outHisto.data_.size(),
+                                             outHisto.data_, outHisto.total_count_, outHisto.entropy_, best_dist, best,
+                                             largest_idx);
+    }
+
+    for (size_t i = 0; i < num_contexts; i++) {
+        for (size_t j = 0; j < out->size(); j++) {
+            (*out)[best[i]].AddHistogram(in[nonempty_histograms[i]]);
+            (*histogram_symbols)[nonempty_histograms[i]] = best[i];
+        }
+    }
+}
+
+void ClusterHistogramsNew(const HistogramParams params,
+                          const std::vector<Histogram>& in,
+                          const size_t num_contexts,
+                          size_t max_histograms,
+                          std::vector<Histogram>* out,
+                          std::vector<uint32_t>* histogram_symbols) {
+    constexpr float kMinDistanceForDistinctFast = 64.0f;
+    constexpr float kMinDistanceForDistinctBest = 16.0f;
+    max_histograms = std::min(max_histograms, params.max_histograms);
+    // printf("%s: %s: %d, max_histograms=%d\n", __FILE__, __FUNCTION__, __LINE__,
+    //       max_histograms);
+
+    uint32_t largest_idx = 0;
+    std::vector<uint32_t> nonempty_histograms;
+    nonempty_histograms.reserve(in.size());
+    for (size_t i = 0; i < num_contexts; i++) {
+        if (in[i].total_count_ == 0) continue;
+
+        if (in[i].total_count_ > in[largest_idx].total_count_) {
+            largest_idx = i;
+        }
+        nonempty_histograms.push_back(i);
+    }
+
+    largest_idx =
+        std::find(nonempty_histograms.begin(), nonempty_histograms.end(), largest_idx) - nonempty_histograms.begin();
+
+    acc_FastClusterHistograms(in, nonempty_histograms, largest_idx, nonempty_histograms.size(), max_histograms,
+                              kMinDistanceForDistinctFast, out, histogram_symbols);
+
+    // Convert the context map to a canonical form.
+    HistogramReindex(out, histogram_symbols);
+    // printf("%s: %s: %d, culster final out size=%zu, histogram_symbols
+    // size=%zu\n",
+    //       __FILE__, __FUNCTION__, __LINE__, out->size(),
+    //       histogram_symbols->size());
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp b/codec/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp
new file mode 100644
index 0000000000..f96e3592e6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/acc_enc_frame.cpp
@@ -0,0 +1,584 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "acc_host.hpp"
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "acc_enc_chroma_from_luma.hpp"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "acc_enc_group.hpp"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+namespace {
+
+uint64_t FrameFlagsFromParams(const CompressParams& cparams) {
+    uint64_t flags = 0;
+
+    const float dist = cparams.butteraugli_distance;
+
+    // We don't add noise at low butteraugli distances because the original
+    // noise is stored within the compressed image and adding noise makes things
+    // worse.
+    if (ApplyOverride(cparams.noise, dist >= kMinButteraugliForNoise) || cparams.photon_noise_iso > 0) {
+        flags |= FrameHeader::kNoise;
+    }
+
+    if (cparams.progressive_dc > 0 && cparams.modular_mode == false) {
+        flags |= FrameHeader::kUseDcFrame;
+    }
+
+    return flags;
+}
+
+Status LoopFilterFromParams(const CompressParams& cparams, FrameHeader* JXL_RESTRICT frame_header) {
+    LoopFilter* loop_filter = &frame_header->loop_filter;
+
+    // Gaborish defaults to enabled in Hare or slower.
+    loop_filter->gab = ApplyOverride(cparams.gaborish, cparams.speed_tier <= SpeedTier::kHare &&
+                                                           frame_header->encoding == FrameEncoding::kVarDCT &&
+                                                           cparams.decoding_speed_tier < 4);
+
+    if (cparams.epf != -1) {
+        loop_filter->epf_iters = cparams.epf;
+    } else {
+        if (frame_header->encoding == FrameEncoding::kModular) {
+            loop_filter->epf_iters = 0;
+        } else {
+            constexpr float kThresholds[3] = {0.7, 1.5, 4.0};
+            loop_filter->epf_iters = 0;
+            if (cparams.decoding_speed_tier < 3) {
+                for (size_t i = cparams.decoding_speed_tier == 2 ? 1 : 0; i < 3; i++) {
+                    if (cparams.butteraugli_distance >= kThresholds[i]) {
+                        loop_filter->epf_iters++;
+                    }
+                }
+            }
+        }
+    }
+    // Strength of EPF in modular mode.
+    if (frame_header->encoding == FrameEncoding::kModular && cparams.quality_pair.first < 100) {
+        // TODO(veluca): this formula is nonsense.
+        loop_filter->epf_sigma_for_modular = 20.0f * (1.0f - cparams.quality_pair.first / 100);
+    }
+    if (frame_header->encoding == FrameEncoding::kModular && cparams.lossy_palette) {
+        loop_filter->epf_sigma_for_modular = 1.0f;
+    }
+
+    return true;
+}
+
+Status MakeFrameHeader(const CompressParams& cparams,
+                       const ProgressiveSplitter& progressive_splitter,
+                       const FrameInfo& frame_info,
+                       const ImageBundle& ib,
+                       FrameHeader* JXL_RESTRICT frame_header) {
+    frame_header->nonserialized_is_preview = frame_info.is_preview;
+    frame_header->is_last = frame_info.is_last;
+    frame_header->save_before_color_transform = frame_info.save_before_color_transform;
+    frame_header->frame_type = frame_info.frame_type;
+    frame_header->name = ib.name;
+
+    progressive_splitter.InitPasses(&frame_header->passes);
+
+    if (cparams.modular_mode) {
+        frame_header->encoding = FrameEncoding::kModular;
+        frame_header->group_size_shift = cparams.modular_group_size_shift;
+    }
+
+    frame_header->chroma_subsampling = ib.chroma_subsampling;
+    if (ib.IsJPEG()) {
+        // we are transcoding a JPEG, so we don't get to choose
+        frame_header->encoding = FrameEncoding::kVarDCT;
+        frame_header->color_transform = ib.color_transform;
+    } else {
+        frame_header->color_transform = cparams.color_transform;
+        if (!cparams.modular_mode &&
+            (frame_header->chroma_subsampling.MaxHShift() != 0 || frame_header->chroma_subsampling.MaxVShift() != 0)) {
+            return JXL_FAILURE(
+                "Chroma subsampling is not supported in VarDCT mode when not "
+                "recompressing JPEGs");
+        }
+    }
+
+    frame_header->flags = FrameFlagsFromParams(cparams);
+    // Noise is not supported in the Modular encoder for now.
+    if (frame_header->encoding != FrameEncoding::kVarDCT) {
+        frame_header->UpdateFlag(false, FrameHeader::Flags::kNoise);
+    }
+
+    JXL_RETURN_IF_ERROR(LoopFilterFromParams(cparams, frame_header));
+
+    frame_header->dc_level = frame_info.dc_level;
+    if (frame_header->dc_level > 2) {
+        // With 3 or more progressive_dc frames, the implementation does not yet
+        // work, see enc_cache.cc.
+        return JXL_FAILURE("progressive_dc > 2 is not yet supported");
+    }
+    if (cparams.progressive_dc > 0 && (cparams.ec_resampling != 1 || cparams.resampling != 1)) {
+        return JXL_FAILURE("Resampling not supported with DC frames");
+    }
+    if (cparams.resampling != 1 && cparams.resampling != 2 && cparams.resampling != 4 && cparams.resampling != 8) {
+        return JXL_FAILURE("Invalid resampling factor");
+    }
+    if (cparams.ec_resampling != 1 && cparams.ec_resampling != 2 && cparams.ec_resampling != 4 &&
+        cparams.ec_resampling != 8) {
+        return JXL_FAILURE("Invalid ec_resampling factor");
+    }
+    // Resized frames.
+    if (frame_info.frame_type != FrameType::kDCFrame) {
+        frame_header->frame_origin = ib.origin;
+        size_t ups = 1;
+        if (cparams.already_downsampled) ups = cparams.resampling;
+        frame_header->frame_size.xsize = ib.xsize() * ups;
+        frame_header->frame_size.ysize = ib.ysize() * ups;
+        if (ib.origin.x0 != 0 || ib.origin.y0 != 0 || frame_header->frame_size.xsize != frame_header->default_xsize() ||
+            frame_header->frame_size.ysize != frame_header->default_ysize()) {
+            frame_header->custom_size_or_origin = true;
+        }
+    }
+    // Upsampling.
+    frame_header->upsampling = cparams.resampling;
+    const std::vector<ExtraChannelInfo>& extra_channels = frame_header->nonserialized_metadata->m.extra_channel_info;
+    frame_header->extra_channel_upsampling.clear();
+    frame_header->extra_channel_upsampling.resize(extra_channels.size(), cparams.ec_resampling);
+    frame_header->save_as_reference = frame_info.save_as_reference;
+
+    // Set blending-related information.
+    if (ib.blend || frame_header->custom_size_or_origin) {
+        // Set blend_channel to the first alpha channel. These values are only
+        // encoded in case a blend mode involving alpha is used and there are more
+        // than one extra channels.
+        size_t index = 0;
+        if (extra_channels.size() > 1) {
+            for (size_t i = 0; i < extra_channels.size(); i++) {
+                if (extra_channels[i].type == ExtraChannel::kAlpha) {
+                    index = i;
+                    break;
+                }
+            }
+        }
+        frame_header->blending_info.alpha_channel = index;
+        frame_header->blending_info.mode = ib.blend ? ib.blendmode : BlendMode::kReplace;
+        // previous frames are saved with ID 1.
+        frame_header->blending_info.source = 1;
+        for (size_t i = 0; i < extra_channels.size(); i++) {
+            frame_header->extra_channel_blending_info[i].alpha_channel = index;
+            BlendMode default_blend = ib.blendmode;
+            if (extra_channels[i].type != ExtraChannel::kBlack && i != index) {
+                // K needs to be blended, spot colors and other stuff gets added
+                default_blend = BlendMode::kAdd;
+            }
+            frame_header->extra_channel_blending_info[i].mode = ib.blend ? default_blend : BlendMode::kReplace;
+            frame_header->extra_channel_blending_info[i].source = 1;
+        }
+    }
+
+    frame_header->animation_frame.duration = ib.duration;
+
+    // TODO(veluca): timecode.
+
+    return true;
+}
+
+} // namespace
+
+Status EncodeFrame(const CompressParams& cparams_orig,
+                   const FrameInfo& frame_info,
+                   const CodecMetadata* metadata,
+                   const ImageBundle& ib,
+                   PassesEncoderState* passes_enc_state,
+                   ThreadPool* pool,
+                   BitWriter* writer,
+                   AuxOut* aux_out,
+                   std::string xclbinPath) {
+    ib.VerifyMetadata();
+    passes_enc_state->special_frames.clear();
+
+    CompressParams cparams = cparams_orig;
+
+    if (cparams.progressive_dc < 0) {
+        if (cparams.progressive_dc != -1) {
+            return JXL_FAILURE("Invalid progressive DC setting value (%d)", cparams.progressive_dc);
+        }
+        cparams.progressive_dc = 0;
+        // Enable progressive_dc for lower qualities.
+        if (cparams.butteraugli_distance >= kMinButteraugliDistanceForProgressiveDc) {
+            cparams.progressive_dc = 1;
+        }
+    }
+    if (cparams.ec_resampling < cparams.resampling) {
+        cparams.ec_resampling = cparams.resampling;
+    }
+    if (cparams.resampling > 1) cparams.progressive_dc = 0;
+
+    if (frame_info.dc_level + cparams.progressive_dc > 4) {
+        return JXL_FAILURE("Too many levels of progressive DC");
+    }
+
+    if (cparams.butteraugli_distance != 0 && cparams.butteraugli_distance < kMinButteraugliDistance) {
+        return JXL_FAILURE("Butteraugli distance is too low (%f)", cparams.butteraugli_distance);
+    }
+    if (cparams.butteraugli_distance > 0.9f && cparams.modular_mode == false && cparams.quality_pair.first == 100) {
+        // in case the color image is lossy, make the alpha slightly lossy too
+        cparams.quality_pair.first = std::max(90.f, 99.f - 0.3f * cparams.butteraugli_distance);
+    }
+
+    if (ib.IsJPEG()) {
+        cparams.gaborish = Override::kOff;
+        cparams.epf = 0;
+        cparams.modular_mode = false;
+    }
+
+    if (ib.xsize() == 0 || ib.ysize() == 0) return JXL_FAILURE("Empty image");
+
+    // Assert that this metadata is correctly set up for the compression params,
+    // this should have been done by enc_file.cc
+    JXL_ASSERT(metadata->m.xyb_encoded == (cparams.color_transform == ColorTransform::kXYB));
+    std::unique_ptr<FrameHeader> frame_header = jxl::make_unique<FrameHeader>(metadata);
+    JXL_RETURN_IF_ERROR(
+        MakeFrameHeader(cparams, passes_enc_state->progressive_splitter, frame_info, ib, frame_header.get()));
+    // Check that if the codestream header says xyb_encoded, the color_transform
+    // matches the requirement. This is checked from the cparams here, even though
+    // optimally we'd be able to check this against what has actually been written
+    // in the main codestream header, but since ib is a const object and the data
+    // written to the main codestream header is (in modified form) in ib, the
+    // encoder cannot indicate this fact in the ib's metadata.
+    if (cparams_orig.color_transform == ColorTransform::kXYB) {
+        if (frame_header->color_transform != ColorTransform::kXYB) {
+            return JXL_FAILURE(
+                "The color transform of frames must be xyb if the codestream is xyb "
+                "encoded");
+        }
+    } else {
+        if (frame_header->color_transform == ColorTransform::kXYB) {
+            return JXL_FAILURE(
+                "The color transform of frames cannot be xyb if the codestream is "
+                "not xyb encoded");
+        }
+    }
+
+    FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+
+    const size_t num_groups = frame_dim.num_groups;
+
+    Image3F opsin;
+    const ColorEncoding& c_linear = ColorEncoding::LinearSRGB(ib.IsGray());
+    std::unique_ptr<ImageMetadata> metadata_linear = jxl::make_unique<ImageMetadata>();
+    metadata_linear->xyb_encoded = (cparams.color_transform == ColorTransform::kXYB);
+    metadata_linear->color_encoding = c_linear;
+    ImageBundle linear_storage(metadata_linear.get());
+
+    std::vector<AuxOut> aux_outs;
+    // LossyFrameEncoder stores a reference to a std::function<Status(size_t)>
+    // so we need to keep the std::function<Status(size_t)> being referenced
+    // alive while lossy_frame_encoder is used. We could make resize_aux_outs a
+    // lambda type by making LossyFrameEncoder a template instead, but this is
+    // simpler.
+    const std::function<Status(size_t)> resize_aux_outs = [&aux_outs, aux_out](size_t num_threads) -> Status {
+        if (aux_out != nullptr) {
+            size_t old_size = aux_outs.size();
+            for (size_t i = num_threads; i < old_size; i++) {
+                aux_out->Assimilate(aux_outs[i]);
+            }
+            aux_outs.resize(num_threads);
+            // Each thread needs these INPUTS. Don't copy the entire AuxOut
+            // because it may contain stats which would be Assimilated multiple
+            // times below.
+            for (size_t i = old_size; i < aux_outs.size(); i++) {
+                aux_outs[i].dump_image = aux_out->dump_image;
+                aux_outs[i].debug_prefix = aux_out->debug_prefix;
+            }
+        }
+        return true;
+    };
+
+    LossyFrameEncoder lossy_frame_encoder(cparams, *frame_header, passes_enc_state, pool, aux_out);
+    std::unique_ptr<ModularFrameEncoder> modular_frame_encoder =
+        jxl::make_unique<ModularFrameEncoder>(*frame_header, cparams);
+
+    const std::vector<ImageF>* extra_channels = &ib.extra_channels();
+    std::vector<ImageF> extra_channels_storage;
+    const ImageBundle* JXL_RESTRICT ib_or_linear;
+
+    if (ib.IsJPEG()) {
+        JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeJPEGTranscodingData(*ib.jpeg_data, modular_frame_encoder.get(),
+                                                                           frame_header.get()));
+    } else if (!lossy_frame_encoder.State()->heuristics->HandlesColorConversion(cparams, ib) ||
+               frame_header->encoding != FrameEncoding::kVarDCT) {
+        acc_host(xclbinPath, opsin, lossy_frame_encoder, ib_or_linear, pool, modular_frame_encoder, writer, aux_out,
+                 frame_header, frame_info, cparams, &ib.extra_channels(), passes_enc_state, frame_dim, num_groups, ib,
+                 aux_outs, resize_aux_outs);
+        if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        } else if (frame_header->upsampling != 1 && !cparams.already_downsampled) {
+            // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+            // after noise, if necessary.
+            DownsampleImage(&opsin, frame_header->upsampling);
+        }
+
+    } else {
+        JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData(&ib, &opsin, pool, modular_frame_encoder.get(),
+                                                                    writer, frame_header.get()));
+    }
+
+    if (!ib.IsJPEG() && (!lossy_frame_encoder.State()->heuristics->HandlesColorConversion(cparams, ib) ||
+                         frame_header->encoding != FrameEncoding::kVarDCT) &&
+        frame_header->encoding == FrameEncoding::kVarDCT) {
+    } else {
+        if (cparams.ec_resampling != 1 && !cparams.already_downsampled) {
+            extra_channels = &extra_channels_storage;
+            for (size_t i = 0; i < ib.extra_channels().size(); i++) {
+                extra_channels_storage.emplace_back(CopyImage(ib.extra_channels()[i]));
+                DownsampleImage(&extra_channels_storage.back(), cparams.ec_resampling);
+            }
+        }
+        // needs to happen *AFTER* VarDCT-ComputeEncodingData.
+        JXL_RETURN_IF_ERROR(modular_frame_encoder->ComputeEncodingData(
+            *frame_header, *ib.metadata(), &opsin, *extra_channels, lossy_frame_encoder.State(), pool, aux_out,
+            /* do_color=*/frame_header->encoding == FrameEncoding::kModular));
+
+        writer->AppendByteAligned(lossy_frame_encoder.State()->special_frames);
+        frame_header->UpdateFlag(lossy_frame_encoder.State()->shared.image_features.patches.HasAny(),
+                                 FrameHeader::kPatches);
+        frame_header->UpdateFlag(lossy_frame_encoder.State()->shared.image_features.splines.HasAny(),
+                                 FrameHeader::kSplines);
+        JXL_RETURN_IF_ERROR(WriteFrameHeader(*frame_header, writer, aux_out));
+
+        const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+
+        // DC global info + DC groups + AC global info + AC groups *
+        // num_passes.
+        const bool has_ac_global = true;
+        std::vector<BitWriter> group_codes(
+            NumTocEntries(frame_dim.num_groups, frame_dim.num_dc_groups, num_passes, has_ac_global));
+        const size_t global_ac_index = frame_dim.num_dc_groups + 1;
+        const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+        const auto get_output = [&](const size_t index) { return &group_codes[is_small_image ? 0 : index]; };
+        auto ac_group_code = [&](size_t pass, size_t group) {
+            return get_output(AcGroupIndex(pass, group, frame_dim.num_groups, frame_dim.num_dc_groups, has_ac_global));
+        };
+
+        if (frame_header->flags & FrameHeader::kPatches) {
+            PatchDictionaryEncoder::Encode(lossy_frame_encoder.State()->shared.image_features.patches, get_output(0),
+                                           kLayerDictionary, aux_out);
+        }
+
+        if (frame_header->flags & FrameHeader::kSplines) {
+            EncodeSplines(lossy_frame_encoder.State()->shared.image_features.splines, get_output(0), kLayerSplines,
+                          HistogramParams(), aux_out);
+        }
+
+        if (frame_header->flags & FrameHeader::kNoise) {
+            EncodeNoise(lossy_frame_encoder.State()->shared.image_features.noise_params, get_output(0), kLayerNoise,
+                        aux_out);
+        }
+
+        JXL_RETURN_IF_ERROR(DequantMatricesEncodeDC(&lossy_frame_encoder.State()->shared.matrices, get_output(0),
+                                                    kLayerDequantTables, aux_out));
+        if (frame_header->encoding == FrameEncoding::kVarDCT) {
+            JXL_RETURN_IF_ERROR(lossy_frame_encoder.EncodeGlobalDCInfo(*frame_header, get_output(0)));
+        }
+        JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeGlobalInfo(get_output(0), aux_out));
+        JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream(get_output(0), aux_out, kLayerModularGlobal,
+                                                                ModularStreamId::Global()));
+
+        const auto process_dc_group = [&](const int group_index, const int thread) {
+            AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+            BitWriter* output = get_output(group_index + 1);
+            if (frame_header->encoding == FrameEncoding::kVarDCT && !(frame_header->flags & FrameHeader::kUseDcFrame)) {
+                BitWriter::Allotment allotment(output, 2);
+                output->Write(2, modular_frame_encoder->extra_dc_precision[group_index]);
+                ReclaimAndCharge(output, &allotment, kLayerDC, my_aux_out);
+                JXL_CHECK(modular_frame_encoder->EncodeStream(output, my_aux_out, kLayerDC,
+                                                              ModularStreamId::VarDCTDC(group_index)));
+            }
+            JXL_CHECK(modular_frame_encoder->EncodeStream(output, my_aux_out, kLayerModularDcGroup,
+                                                          ModularStreamId::ModularDC(group_index)));
+            if (frame_header->encoding == FrameEncoding::kVarDCT) {
+                const Rect& rect = lossy_frame_encoder.State()->shared.DCGroupRect(group_index);
+                size_t nb_bits = CeilLog2Nonzero(rect.xsize() * rect.ysize());
+                if (nb_bits != 0) {
+                    BitWriter::Allotment allotment(output, nb_bits);
+                    output->Write(nb_bits, modular_frame_encoder->ac_metadata_size[group_index] - 1);
+                    ReclaimAndCharge(output, &allotment, kLayerControlFields, my_aux_out);
+                }
+                JXL_CHECK(modular_frame_encoder->EncodeStream(output, my_aux_out, kLayerControlFields,
+                                                              ModularStreamId::ACMetadata(group_index)));
+            }
+        };
+        RunOnPool(pool, 0, frame_dim.num_dc_groups, resize_aux_outs, process_dc_group, "EncodeDCGroup");
+
+        if (frame_header->encoding == FrameEncoding::kVarDCT) {
+            JXL_RETURN_IF_ERROR(
+                lossy_frame_encoder.EncodeGlobalACInfo(get_output(global_ac_index), modular_frame_encoder.get()));
+        }
+
+        std::atomic<int> num_errors{0};
+        const auto process_group = [&](const int group_index, const int thread) {
+            AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+
+            for (size_t i = 0; i < num_passes; i++) {
+                if (frame_header->encoding == FrameEncoding::kVarDCT) {
+                    if (!lossy_frame_encoder.EncodeACGroup(i, group_index, ac_group_code(i, group_index), my_aux_out)) {
+                        num_errors.fetch_add(1, std::memory_order_relaxed);
+                        return;
+                    }
+                }
+                // Write all modular encoded data (color?, alpha, depth, extra channels)
+                if (!modular_frame_encoder->EncodeStream(ac_group_code(i, group_index), my_aux_out,
+                                                         kLayerModularAcGroup,
+                                                         ModularStreamId::ModularAC(group_index, i))) {
+                    num_errors.fetch_add(1, std::memory_order_relaxed);
+                    return;
+                }
+            }
+        };
+        RunOnPool(pool, 0, num_groups, resize_aux_outs, process_group, "EncodeGroupCoefficients");
+
+        // Resizing aux_outs to 0 also Assimilates the array.
+        static_cast<void>(resize_aux_outs(0));
+        JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+        for (BitWriter& bw : group_codes) {
+            bw.ZeroPadToByte(); // end of group.
+        }
+
+        std::vector<coeff_order_t>* permutation_ptr = nullptr;
+        std::vector<coeff_order_t> permutation;
+        if (cparams.centerfirst && !(num_passes == 1 && num_groups == 1)) {
+            permutation_ptr = &permutation;
+            // Don't permute global DC/AC or DC.
+            permutation.resize(global_ac_index + 1);
+            std::iota(permutation.begin(), permutation.end(), 0);
+            std::vector<coeff_order_t> ac_group_order(num_groups);
+            std::iota(ac_group_order.begin(), ac_group_order.end(), 0);
+            size_t group_dim = frame_dim.group_dim;
+
+            // The center of the image is either given by parameters or chosen
+            // to be the middle of the image by default if center_x, center_y resp.
+            // are not provided.
+
+            int64_t imag_cx;
+            if (cparams.center_x != static_cast<size_t>(-1)) {
+                JXL_RETURN_IF_ERROR(cparams.center_x < ib.xsize());
+                imag_cx = cparams.center_x;
+            } else {
+                imag_cx = ib.xsize() / 2;
+            }
+
+            int64_t imag_cy;
+            if (cparams.center_y != static_cast<size_t>(-1)) {
+                JXL_RETURN_IF_ERROR(cparams.center_y < ib.ysize());
+                imag_cy = cparams.center_y;
+            } else {
+                imag_cy = ib.ysize() / 2;
+            }
+
+            // The center of the group containing the center of the image.
+            int64_t cx = (imag_cx / group_dim) * group_dim + group_dim / 2;
+            int64_t cy = (imag_cy / group_dim) * group_dim + group_dim / 2;
+            // This identifies in what area of the central group the center of the
+            // image
+            // lies in.
+            double direction = -std::atan2(imag_cy - cy, imag_cx - cx);
+            // This identifies the side of the central group the center of the image
+            // lies closest to. This can take values 0, 1, 2, 3 corresponding to left,
+            // bottom, right, top.
+            int64_t side = std::fmod((direction + 5 * kPi / 4), 2 * kPi) * 2 / kPi;
+            auto get_distance_from_center = [&](size_t gid) {
+                Rect r = passes_enc_state->shared.GroupRect(gid);
+                int64_t gcx = r.x0() + group_dim / 2;
+                int64_t gcy = r.y0() + group_dim / 2;
+                int64_t dx = gcx - cx;
+                int64_t dy = gcy - cy;
+                // The angle is determined by taking atan2 and adding an appropriate
+                // starting point depending on the side we want to start on.
+                double angle = std::remainder(std::atan2(dy, dx) + kPi / 4 + side * (kPi / 2), 2 * kPi);
+                // Concentric squares in clockwise order.
+                return std::make_pair(std::max(std::abs(dx), std::abs(dy)), angle);
+            };
+            std::sort(ac_group_order.begin(), ac_group_order.end(), [&](coeff_order_t a, coeff_order_t b) {
+                return get_distance_from_center(a) < get_distance_from_center(b);
+            });
+            std::vector<coeff_order_t> inv_ac_group_order(ac_group_order.size(), 0);
+            for (size_t i = 0; i < ac_group_order.size(); i++) {
+                inv_ac_group_order[ac_group_order[i]] = i;
+            }
+            for (size_t i = 0; i < num_passes; i++) {
+                size_t pass_start = permutation.size();
+                for (coeff_order_t v : inv_ac_group_order) {
+                    permutation.push_back(pass_start + v);
+                }
+            }
+            std::vector<BitWriter> new_group_codes(group_codes.size());
+            for (size_t i = 0; i < permutation.size(); i++) {
+                new_group_codes[permutation[i]] = std::move(group_codes[i]);
+            }
+            group_codes = std::move(new_group_codes);
+        }
+
+        JXL_RETURN_IF_ERROR(WriteGroupOffsets(group_codes, permutation_ptr, writer, aux_out));
+        writer->AppendByteAligned(group_codes);
+        writer->ZeroPadToByte(); // end of frame.
+    }
+    return true;
+}
+
+} // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/others/src/acc_enc_group.cpp b/codec/L2/demos/jxlEnc/others/src/acc_enc_group.cpp
new file mode 100644
index 0000000000..090e12c212
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/acc_enc_group.cpp
@@ -0,0 +1,525 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "acc_enc_group.hpp"
+
+#include <iomanip>
+#include <iostream>
+#include <utility>
+
+#include "hwy/aligned_allocator.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "xilinx/src/acc_enc_group.cpp"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quantizer-inl.h"
+#include "lib/jxl/quantizer.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// NOTE: caller takes care of extracting quant from rect of RawQuantField.
+void QuantizeBlockAC(const Quantizer& quantizer,
+                     const bool error_diffusion,
+                     size_t c,
+                     int32_t quant,
+                     float qm_multiplier,
+                     size_t quant_kind,
+                     size_t xsize,
+                     size_t ysize,
+                     const float* JXL_RESTRICT block_in,
+                     int32_t* JXL_RESTRICT block_out) {
+    PROFILER_FUNC;
+    const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
+    const float qac = quantizer.Scale() * quant;
+    // Not SIMD-fied for now.
+    float thres[4] = {0.5f, 0.6f, 0.6f, 0.65f};
+    if (c != 1) {
+        for (int i = 1; i < 4; ++i) {
+            thres[i] = 0.75f;
+        }
+    }
+
+    if (!error_diffusion) {
+        HWY_CAPPED(float, kBlockDim) df;
+        HWY_CAPPED(int32_t, kBlockDim) di;
+        HWY_CAPPED(uint32_t, kBlockDim) du;
+        const auto quant = Set(df, qac * qm_multiplier);
+
+        for (size_t y = 0; y < ysize * kBlockDim; y++) {
+            size_t yfix = static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2;
+            const size_t off = y * kBlockDim * xsize;
+            for (size_t x = 0; x < xsize * kBlockDim; x += Lanes(df)) {
+                auto thr = Zero(df);
+                if (xsize == 1) {
+                    HWY_ALIGN uint32_t kMask[kBlockDim] = {0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u};
+                    const auto mask = MaskFromVec(BitCast(df, Load(du, kMask + x)));
+                    thr = IfThenElse(mask, Set(df, thres[yfix + 1]), Set(df, thres[yfix]));
+                } else {
+                    // Same for all lanes in the vector.
+                    thr = Set(df, thres[yfix + static_cast<size_t>(x >= xsize * kBlockDim / 2)]);
+                }
+
+                const auto q = Load(df, qm + off + x) * quant;
+                const auto in = Load(df, block_in + off + x);
+                const auto val = q * in;
+                const auto nzero_mask = Abs(val) >= thr;
+                const auto v = ConvertTo(di, IfThenElseZero(nzero_mask, Round(val)));
+                Store(v, di, block_out + off + x);
+            }
+        }
+        return;
+    }
+
+retry:
+    int hfNonZeros[4] = {};
+    float hfError[4] = {};
+    float hfMaxError[4] = {};
+    size_t hfMaxErrorIx[4] = {};
+    for (size_t y = 0; y < ysize * kBlockDim; y++) {
+        for (size_t x = 0; x < xsize * kBlockDim; x++) {
+            const size_t pos = y * kBlockDim * xsize + x;
+            if (x < xsize && y < ysize) {
+                // Ensure block is initialized
+                block_out[pos] = 0;
+                continue;
+            }
+            const size_t hfix =
+                (static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2 + static_cast<size_t>(x >= xsize * kBlockDim / 2));
+            const float val = block_in[pos] * (qm[pos] * qac * qm_multiplier);
+            float v = (std::abs(val) < thres[hfix]) ? 0 : rintf(val);
+            const float error = std::abs(val) - std::abs(v);
+            hfError[hfix] += error;
+            if (hfMaxError[hfix] < error) {
+                hfMaxError[hfix] = error;
+                hfMaxErrorIx[hfix] = pos;
+            }
+            if (v != 0.0f) {
+                hfNonZeros[hfix] += std::abs(v);
+            }
+            block_out[pos] = static_cast<int32_t>(rintf(v));
+        }
+    }
+    if (c != 1) return;
+    // TODO(veluca): include AFV?
+    const size_t kPartialBlockKinds = (1 << AcStrategy::Type::IDENTITY) | (1 << AcStrategy::Type::DCT2X2) |
+                                      (1 << AcStrategy::Type::DCT4X4) | (1 << AcStrategy::Type::DCT4X8) |
+                                      (1 << AcStrategy::Type::DCT8X4);
+    if ((1 << quant_kind) & kPartialBlockKinds) return;
+    float hfErrorLimit = 0.1f * (xsize * ysize) * kDCTBlockSize * 0.25f;
+    bool goretry = false;
+    for (int i = 1; i < 4; ++i) {
+        if (hfError[i] >= hfErrorLimit && hfNonZeros[i] <= (xsize + ysize) * 0.25f) {
+            if (thres[i] >= 0.4f) {
+                thres[i] -= 0.01f;
+                goretry = true;
+            }
+        }
+    }
+    if (goretry) goto retry;
+    for (int i = 1; i < 4; ++i) {
+        if (hfError[i] >= hfErrorLimit && hfNonZeros[i] == 0) {
+            const size_t pos = hfMaxErrorIx[i];
+            if (hfMaxError[i] >= 0.4f) {
+                block_out[pos] = block_in[pos] > 0.0f ? 1.0f : -1.0f;
+            }
+        }
+    }
+}
+
+// NOTE: caller takes care of extracting quant from rect of RawQuantField.
+void QuantizeRoundtripYBlockAC(const Quantizer& quantizer,
+                               const bool error_diffusion,
+                               int32_t quant,
+                               size_t quant_kind,
+                               size_t xsize,
+                               size_t ysize,
+                               const float* JXL_RESTRICT biases,
+                               float* JXL_RESTRICT inout,
+                               int32_t* JXL_RESTRICT quantized) {
+    QuantizeBlockAC(quantizer, error_diffusion, 1, quant, 1.0f, quant_kind, xsize, ysize, inout, quantized);
+
+    PROFILER_ZONE("enc quant adjust bias");
+    const float* JXL_RESTRICT dequant_matrix = quantizer.DequantMatrix(quant_kind, 1);
+
+    HWY_CAPPED(float, kDCTBlockSize) df;
+    HWY_CAPPED(int32_t, kDCTBlockSize) di;
+    const auto inv_qac = Set(df, quantizer.inv_quant_ac(quant));
+    for (size_t k = 0; k < kDCTBlockSize * xsize * ysize; k += Lanes(df)) {
+        const auto quant = Load(di, quantized + k);
+        const auto adj_quant = AdjustQuantBias(di, 1, quant, biases);
+        const auto dequantm = Load(df, dequant_matrix + k);
+        Store(adj_quant * dequantm * inv_qac, df, inout + k);
+    }
+}
+
+void ComputeCoefficients(size_t group_idx,
+                         PassesEncoderState* enc_state,
+                         const Image3F& opsin,
+                         Image3F* dc,
+
+                         //==========acc interface========
+                         size_t xsize,
+                         size_t ysize,
+                         std::vector<std::vector<float> >& dctIDT,
+                         std::vector<std::vector<float> >& dct2x2,
+                         std::vector<std::vector<float> >& dct4x4,
+                         std::vector<std::vector<float> >& dct8x8,
+                         std::vector<std::vector<float> >& dct16x16,
+                         std::vector<std::vector<float> >& dct32x32,
+
+                         std::vector<std::vector<float> >& dcIDT,
+                         std::vector<std::vector<float> >& dc2x2,
+                         std::vector<std::vector<float> >& dc4x4,
+                         std::vector<std::vector<float> >& dc8x8,
+                         std::vector<std::vector<float> >& dc16x16,
+                         std::vector<std::vector<float> >& dc32x32
+                         //================================
+                         ) {
+    PROFILER_FUNC;
+    const Rect block_group_rect = enc_state->shared.BlockGroupRect(group_idx);
+    const Rect group_rect = enc_state->shared.GroupRect(group_idx);
+    const Rect cmap_rect(block_group_rect.x0() / kColorTileDimInBlocks, block_group_rect.y0() / kColorTileDimInBlocks,
+                         DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
+                         DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
+
+    const size_t xsize_blocks = block_group_rect.xsize();
+    const size_t ysize_blocks = block_group_rect.ysize();
+
+    const size_t dc_stride = static_cast<size_t>(dc->PixelsPerRow());
+    const size_t opsin_stride = static_cast<size_t>(opsin.PixelsPerRow());
+
+    const ImageI& full_quant_field = enc_state->shared.raw_quant_field;
+    const CompressParams& cparams = enc_state->cparams;
+
+    // TODO(veluca): consider strategies to reduce this memory.
+    auto mem = hwy::AllocateAligned<int32_t>(3 * AcStrategy::kMaxCoeffArea);
+    auto fmem = hwy::AllocateAligned<float>(5 * AcStrategy::kMaxCoeffArea);
+    float* JXL_RESTRICT scratch_space = fmem.get() + 3 * AcStrategy::kMaxCoeffArea;
+    {
+        // Only use error diffusion in Squirrel mode or slower.
+        const bool error_diffusion = cparams.speed_tier <= SpeedTier::kSquirrel;
+        constexpr HWY_CAPPED(float, kDCTBlockSize) d;
+
+        int32_t* JXL_RESTRICT coeffs[kMaxNumPasses][3] = {};
+        size_t num_passes = enc_state->progressive_splitter.GetNumPasses();
+        JXL_DASSERT(num_passes > 0);
+        for (size_t i = 0; i < num_passes; i++) {
+            // TODO(veluca): 16-bit quantized coeffs are not implemented yet.
+            JXL_ASSERT(enc_state->coeffs[i]->Type() == ACType::k32);
+            for (size_t c = 0; c < 3; c++) {
+                coeffs[i][c] = enc_state->coeffs[i]->PlaneRow(c, group_idx, 0).ptr32;
+            }
+        }
+
+        HWY_ALIGN float* coeffs_in = fmem.get();
+        HWY_ALIGN int32_t* quantized = mem.get();
+
+        size_t offset = 0;
+
+        for (size_t by = 0; by < ysize_blocks; ++by) {
+            const int32_t* JXL_RESTRICT row_quant_ac = block_group_rect.ConstRow(full_quant_field, by);
+            size_t ty = by / kColorTileDimInBlocks;
+            const int8_t* JXL_RESTRICT row_cmap[3] = {
+                cmap_rect.ConstRow(enc_state->shared.cmap.ytox_map, ty), nullptr,
+                cmap_rect.ConstRow(enc_state->shared.cmap.ytob_map, ty),
+            };
+            const float* JXL_RESTRICT opsin_rows[3] = {
+                group_rect.ConstPlaneRow(opsin, 0, by * kBlockDim), group_rect.ConstPlaneRow(opsin, 1, by * kBlockDim),
+                group_rect.ConstPlaneRow(opsin, 2, by * kBlockDim),
+            };
+            float* JXL_RESTRICT dc_rows[3] = {
+                block_group_rect.PlaneRow(dc, 0, by), block_group_rect.PlaneRow(dc, 1, by),
+                block_group_rect.PlaneRow(dc, 2, by),
+            };
+            AcStrategyRow ac_strategy_row = enc_state->shared.ac_strategy.ConstRow(block_group_rect, by);
+            for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks); tx++) {
+                const auto x_factor = Set(d, enc_state->shared.cmap.YtoXRatio(row_cmap[0][tx]));
+                const auto b_factor = Set(d, enc_state->shared.cmap.YtoBRatio(row_cmap[2][tx]));
+                for (size_t bx = tx * kColorTileDimInBlocks; bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks;
+                     ++bx) {
+                    const AcStrategy acs = ac_strategy_row[bx];
+                    if (!acs.IsFirstBlock()) continue;
+
+                    size_t xblocks = acs.covered_blocks_x();
+                    size_t yblocks = acs.covered_blocks_y();
+
+                    CoefficientLayout(&yblocks, &xblocks); // QC: xblocks and yblocks are
+                                                           // updated inside. Calculate
+                                                           // how may horizontal 8x8
+                                                           // blocks (xblocks) covered by
+                                                           // the ACstrategy and vertical
+                                                           // 8x8 blocks (yblocks)
+                                                           // covered by the acs.
+
+                    size_t size = kDCTBlockSize * xblocks * yblocks;
+
+                    // DCT Y channel, roundtrip-quantize it and set DC.
+                    const int32_t quant_ac = row_quant_ac[bx];
+                    //          TransformFromPixels(acs.Strategy(), opsin_rows[1] + bx *
+                    //          kBlockDim,
+                    //                              opsin_stride, coeffs_in + size,
+                    //                              scratch_space);
+
+                    //         DCFromLowestFrequencies(acs.Strategy(), coeffs_in + size,
+                    //                                 dc_rows[1] + bx, dc_stride);
+
+                    size_t tile_xsize = (xsize + 63) / 64 * 64;
+                    size_t tile_ysize = (ysize + 63) / 64 * 64;
+                    float* coef_dct = coeffs_in + size;
+                    size_t block_cnt8x8 = (block_group_rect.y0() + by) * (tile_xsize / 8) + block_group_rect.x0() + bx;
+                    size_t block_cnt16x16 =
+                        (block_group_rect.y0() + by) / 2 * (tile_xsize / 16) + (block_group_rect.x0() + bx) / 2;
+                    size_t block_cnt32x32 =
+                        (block_group_rect.y0() + by) / 4 * (tile_xsize / 32) + (block_group_rect.x0() + bx) / 4;
+
+#ifdef XLNX_QC_DEBUG_ENC_GROUP
+                    if (acs.RawStrategy() == 0) {
+                        std::cout << "========================debug===================== "
+                                     "convered blocks: "
+                                  << acs.covered_blocks_x() << " tile_xsize: " << tile_xsize
+                                  << " bx: " << block_group_rect.x0() << " " << bx << " by: " << block_group_rect.y0()
+                                  << " " << by << std::endl;
+                        for (int i = 0; i < 64; i++) {
+                            std::cout << std::setw(15) << coef_dct[i] << " ";
+                        }
+                        std::cout << std::endl;
+                        for (int i = 0; i < 64; i++) {
+                            std::cout << std::setw(15) << dct8x8[1][64 * block_cnt8x8 + i] << " ";
+                        }
+                        std::cout << std::endl;
+                        for (int i = 0; i < 64; i++) {
+                            if (coef_dct[i] != dct8x8[1][64 * block_cnt8x8 + i]) std::cout << "!!!";
+                        }
+                        std::cout << std::endl;
+                    }
+#endif
+
+                    for (int i = 0; i < 32 * 32; i++) {
+                        if (acs.RawStrategy() == 0) {
+                            if (i < 64) coef_dct[i] = dct8x8[1][64 * block_cnt8x8 + i];
+                        } else if (acs.RawStrategy() == 1) {
+                            if (i < 64) coef_dct[i] = dctIDT[1][64 * block_cnt8x8 + i];
+                        } else if (acs.RawStrategy() == 2) {
+                            if (i < 64) coef_dct[i] = dct2x2[1][64 * block_cnt8x8 + i];
+                        } else if (acs.RawStrategy() == 3) {
+                            if (i < 64) coef_dct[i] = dct4x4[1][64 * block_cnt8x8 + i];
+                        } else if (acs.RawStrategy() == 4) {
+                            if (i < 256) coef_dct[i] = dct16x16[1][16 * 16 * block_cnt16x16 + i];
+                        } else if (acs.RawStrategy() == 5) {
+                            coef_dct[i] = dct32x32[1][32 * 32 * block_cnt32x32 + i];
+                        } else {
+                            std::cout << "unsupported DCT" << std::endl;
+                        }
+                    }
+
+                    float* coef_dc = dc_rows[1] + bx;
+
+#ifdef XLNX_QC_DEBUG_ENC_GROUP_DC
+                    if (acs.RawStrategy() == 5) {
+                        std::cout << "========================debug===================== "
+                                     "convered blocks: "
+                                  << acs.covered_blocks_x() << " tile_xsize: " << tile_xsize
+                                  << " bx: " << block_group_rect.x0() << " " << bx << " by: " << block_group_rect.y0()
+                                  << " " << by << " dc_stride: " << dc_stride << std::endl;
+                        for (int i = 0; i < 4; i++) {
+                            for (int j = 0; j < 4; j++) {
+                                std::cout << std::setw(15) << coef_dc[i * dc_stride + j] << " ";
+                            }
+                        }
+                        std::cout << std::endl;
+                        for (int i = 0; i < 16; i++) {
+                            std::cout << std::setw(15) << dc32x32[1][16 * block_cnt32x32 + i] << " ";
+                        }
+                        std::cout << std::endl;
+                        for (int i = 0; i < 4; i++) {
+                            for (int j = 0; j < 4; j++) {
+                                if (coef_dc[i * dc_stride + j] != dc32x32[1][16 * block_cnt32x32 + i * 4 + j])
+                                    std::cout << "!!!";
+                            }
+                        }
+                        std::cout << std::endl;
+                    }
+#endif
+
+                    if (acs.RawStrategy() == 0) {
+                        coef_dc[0] = dc8x8[1][block_cnt8x8];
+                    } else if (acs.RawStrategy() == 1) {
+                        coef_dc[0] = dcIDT[1][block_cnt8x8];
+                    } else if (acs.RawStrategy() == 2) {
+                        coef_dc[0] = dc2x2[1][block_cnt8x8];
+                    } else if (acs.RawStrategy() == 3) {
+                        coef_dc[0] = dc4x4[1][block_cnt8x8];
+                    } else if (acs.RawStrategy() == 4) {
+                        for (int i = 0; i < 2; i++) {
+                            for (int j = 0; j < 2; j++) {
+                                coef_dc[i * dc_stride + j] = dc16x16[1][4 * block_cnt16x16 + i * 2 + j];
+                            }
+                        }
+                    } else if (acs.RawStrategy() == 5) {
+                        for (int i = 0; i < 4; i++) {
+                            for (int j = 0; j < 4; j++) {
+                                coef_dc[i * dc_stride + j] = dc32x32[1][16 * block_cnt32x32 + i * 4 + j];
+                            }
+                        }
+                    } else {
+                        std::cout << "unsupported DCFromLowFREQ" << std::endl;
+                    }
+
+                    QuantizeRoundtripYBlockAC(enc_state->shared.quantizer, error_diffusion, quant_ac, acs.RawStrategy(),
+                                              xblocks, yblocks, kDefaultQuantBias, coeffs_in + size, quantized + size);
+
+                    // DCT X and B channels
+                    for (size_t c : {0, 2}) {
+                        //            TransformFromPixels(acs.Strategy(), opsin_rows[c] + bx
+                        //            * kBlockDim,
+                        //                                opsin_stride, coeffs_in + c *
+                        //                                size, scratch_space);
+                        coef_dct = coeffs_in + c * size;
+                        for (int i = 0; i < 32 * 32; i++) {
+                            if (acs.RawStrategy() == 0) {
+                                if (i < 64) coef_dct[i] = dct8x8[c][64 * block_cnt8x8 + i];
+                            } else if (acs.RawStrategy() == 1) {
+                                if (i < 64) coef_dct[i] = dctIDT[c][64 * block_cnt8x8 + i];
+                            } else if (acs.RawStrategy() == 2) {
+                                if (i < 64) coef_dct[i] = dct2x2[c][64 * block_cnt8x8 + i];
+                            } else if (acs.RawStrategy() == 3) {
+                                if (i < 64) coef_dct[i] = dct4x4[c][64 * block_cnt8x8 + i];
+                            } else if (acs.RawStrategy() == 4) {
+                                if (i < 256) coef_dct[i] = dct16x16[c][16 * 16 * block_cnt16x16 + i];
+                            } else if (acs.RawStrategy() == 5) {
+                                coef_dct[i] = dct32x32[c][32 * 32 * block_cnt32x32 + i];
+                            } else {
+                                std::cout << "unsupported DCT" << std::endl;
+                            }
+                        }
+                    }
+
+                    // Unapply color correlation
+                    for (size_t k = 0; k < size; k += Lanes(d)) {
+                        const auto in_x = Load(d, coeffs_in + k);
+                        const auto in_y = Load(d, coeffs_in + size + k);
+                        const auto in_b = Load(d, coeffs_in + 2 * size + k);
+                        const auto out_x = in_x - x_factor * in_y;
+                        const auto out_b = in_b - b_factor * in_y;
+                        Store(out_x, d, coeffs_in + k);
+                        Store(out_b, d, coeffs_in + 2 * size + k);
+                    }
+
+                    // Quantize X and B channels and set DC.
+                    for (size_t c : {0, 2}) {
+                        QuantizeBlockAC(enc_state->shared.quantizer, error_diffusion, c, quant_ac,
+                                        c == 0 ? enc_state->x_qm_multiplier : enc_state->b_qm_multiplier,
+                                        acs.RawStrategy(), xblocks, yblocks, coeffs_in + c * size,
+                                        quantized + c * size);
+                        /*           DCFromLowestFrequencies(acs.Strategy(), coeffs_in + c *
+                           size, dc_rows[c] + bx, dc_stride);*/
+                        coef_dc = dc_rows[c] + bx;
+                        if (acs.RawStrategy() == 0) {
+                            coef_dc[0] = dc8x8[c][block_cnt8x8];
+                        } else if (acs.RawStrategy() == 1) {
+                            coef_dc[0] = dcIDT[c][block_cnt8x8];
+                        } else if (acs.RawStrategy() == 2) {
+                            coef_dc[0] = dc2x2[c][block_cnt8x8];
+                        } else if (acs.RawStrategy() == 3) {
+                            coef_dc[0] = dc4x4[c][block_cnt8x8];
+                        } else if (acs.RawStrategy() == 4) {
+                            for (int i = 0; i < 2; i++) {
+                                for (int j = 0; j < 2; j++) {
+                                    coef_dc[i * dc_stride + j] = dc16x16[c][4 * block_cnt16x16 + i * 2 + j];
+                                }
+                            }
+                        } else if (acs.RawStrategy() == 5) {
+                            for (int i = 0; i < 4; i++) {
+                                for (int j = 0; j < 4; j++) {
+                                    coef_dc[i * dc_stride + j] = dc32x32[c][16 * block_cnt32x32 + i * 4 + j];
+                                }
+                            }
+                        } else {
+                            std::cout << "unsupported DCFromLowFREQ" << std::endl;
+                        }
+                    }
+                    enc_state->progressive_splitter.SplitACCoefficients(quantized, size, acs, bx, by, offset, coeffs);
+                    offset += size;
+                }
+            }
+        }
+    }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ComputeCoefficients);
+void ComputeCoefficients(size_t group_idx,
+                         PassesEncoderState* enc_state,
+                         const Image3F& opsin,
+                         Image3F* dc,
+                         //==========acc interface========
+                         size_t xsize,
+                         size_t ysize,
+                         std::vector<std::vector<float> >& dctIDT,
+                         std::vector<std::vector<float> >& dct2x2,
+                         std::vector<std::vector<float> >& dct4x4,
+                         std::vector<std::vector<float> >& dct8x8,
+                         std::vector<std::vector<float> >& dct16x16,
+                         std::vector<std::vector<float> >& dct32x32,
+
+                         std::vector<std::vector<float> >& dcIDT,
+                         std::vector<std::vector<float> >& dc2x2,
+                         std::vector<std::vector<float> >& dc4x4,
+                         std::vector<std::vector<float> >& dc8x8,
+                         std::vector<std::vector<float> >& dc16x16,
+                         std::vector<std::vector<float> >& dc32x32
+                         //================================
+                         ) {
+    return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin, dc, xsize, ysize, dctIDT, dct2x2,
+                                                     dct4x4, dct8x8, dct16x16, dct32x32, dcIDT, dc2x2, dc4x4, dc8x8,
+                                                     dc16x16, dc32x32);
+}
+
+Status EncodeGroupTokenizedCoefficients(size_t group_idx,
+                                        size_t pass_idx,
+                                        size_t histogram_idx,
+                                        const PassesEncoderState& enc_state,
+                                        BitWriter* writer,
+                                        AuxOut* aux_out) {
+    // Select which histogram to use among those of the current pass.
+    const size_t num_histograms = enc_state.shared.num_histograms;
+    // num_histograms is 0 only for lossless.
+    JXL_ASSERT(num_histograms == 0 || histogram_idx < num_histograms);
+    size_t histo_selector_bits = CeilLog2Nonzero(num_histograms);
+
+    if (histo_selector_bits != 0) {
+        BitWriter::Allotment allotment(writer, histo_selector_bits);
+        writer->Write(histo_selector_bits, histogram_idx);
+        ReclaimAndCharge(writer, &allotment, kLayerAC, aux_out);
+    }
+    WriteTokens(enc_state.passes[pass_idx].ac_tokens[group_idx], enc_state.passes[pass_idx].codes,
+                enc_state.passes[pass_idx].context_map, writer, kLayerACTokens, aux_out);
+
+    return true;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/others/src/acc_init_histogram.cpp b/codec/L2/demos/jxlEnc/others/src/acc_init_histogram.cpp
new file mode 100644
index 0000000000..cf426cded0
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/acc_init_histogram.cpp
@@ -0,0 +1,115 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_INIT_HISTOGRAM_CPP
+#define ACC_INIT_HISTOGRAM_CPP
+
+#include "acc_init_histogram.hpp"
+
+namespace jxl {
+bool acc_InitHistogram(std::vector<Histogram>& histograms, std::vector<std::vector<Token> >& tokens) {
+    size_t total_tokens = 0;
+    HybridUintConfig uint_config; //  Default config for clustering.
+
+    for (size_t i = 0; i < tokens.size(); ++i) {
+        for (size_t j = 0; j < tokens[i].size(); ++j) {
+            const Token token = tokens[i][j];
+            total_tokens++;
+            uint32_t tok, nbits, bits;
+            uint_config.Encode(token.value, &tok, &nbits, &bits);
+            tok += 0;
+            histograms[token.context].Add(tok);
+        }
+    }
+    bool use_prefix_code = total_tokens < 100;
+    return false;
+}
+
+void acc_ANSinitHistogram(LossyFrameEncoder& lossy_frame_encoder,
+                          std::unique_ptr<FrameHeader>& frame_header,
+
+                          std::vector<HistogramParams>& params,
+                          bool do_once[5],
+
+                          std::vector<std::vector<Token> >& tokens0,
+                          std::vector<std::vector<Token> >& tokens1,
+                          std::vector<std::vector<Token> >& tokens2,
+                          std::vector<std::vector<Token> >& tokens3,
+
+                          char* do_prefix_out,
+                          std::vector<uint32_t>& largest_idx,
+                          std::vector<std::vector<uint32_t> >& nonempty_histograms,
+                          std::vector<std::vector<Histogram> >& histograms_) {
+    PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+    PassesSharedState& shared = enc_state_->shared;
+    std::vector<EncCache>& group_caches_ = lossy_frame_encoder.get_group_cashes();
+
+    group_caches_.resize(1);
+    for (int group_index = 0; group_index < shared.frame_dim.num_groups; group_index++) {
+        // Tokenize coefficients.
+        const Rect rect = shared.BlockGroupRect(group_index);
+        for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size(); idx_pass++) {
+            JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+            const int32_t* JXL_RESTRICT ac_rows[3] = {
+                enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+                enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+                enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+            };
+            // Ensure group cache is initialized.
+            group_caches_[0].InitOnce();
+            TokenizeCoefficients(&shared.coeff_orders[idx_pass * shared.coeff_order_size], rect, ac_rows,
+                                 shared.ac_strategy, frame_header->chroma_subsampling, &group_caches_[0].num_nzeroes,
+                                 &enc_state_->passes[idx_pass].ac_tokens[group_index], enc_state_->shared.quant_dc,
+                                 enc_state_->shared.raw_quant_field, enc_state_->shared.block_ctx_map);
+        }
+    };
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+
+        std::vector<std::vector<Token> >& tokens = tokens0;
+        if (i == 0) {
+            tokens = tokens0;
+        } else if (i == 1) {
+            tokens = tokens1;
+        } else if (i == 2) {
+            tokens = tokens2;
+        } else if (i == 3) {
+            tokens = tokens3;
+        } else if (i == 4) {
+            tokens = enc_state_->passes[0].ac_tokens;
+        }
+
+        bool use_prefix_code = acc_InitHistogram(histograms_[i], tokens);
+
+        do_prefix_out[i] = (char)use_prefix_code;
+
+        int count = 0;
+        for (int j = 0; j < histograms_[i].size(); j++) {
+            count += histograms_[i][j].data_.size();
+        }
+
+        if (histograms_[i].size() > 1) {
+            size_t max_histograms = std::min(kClustersLimit, params[i].max_histograms);
+
+            largest_idx[i] = 0;
+            nonempty_histograms[i].reserve(histograms_[i].size());
+            for (size_t j = 0; j < histograms_[i].size(); j++) {
+                if (histograms_[i][j].total_count_ == 0) continue;
+
+                if (histograms_[i][j].total_count_ > histograms_[i][largest_idx[i]].total_count_) {
+                    largest_idx[i] = j;
+                }
+                nonempty_histograms[i].push_back(j);
+            }
+
+            largest_idx[i] = std::find(nonempty_histograms[i].begin(), nonempty_histograms[i].end(), largest_idx[i]) -
+                             nonempty_histograms[i].begin();
+        }
+    }
+}
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_host.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_host.cpp
new file mode 100644
index 0000000000..c4c5a60e2c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_host.cpp
@@ -0,0 +1,308 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "acc_host.hpp"
+
+namespace jxl {
+void FindBestDequantMatrices(const CompressParams& cparams,
+                             const Image3F& opsin,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             DequantMatrices* dequant_matrices) {
+    // TODO(veluca): quant matrices for no-gaborish.
+    // TODO(veluca): heuristics for in-bitstream quant tables.
+    *dequant_matrices = DequantMatrices();
+    if (cparams.max_error_mode) {
+        // Set numerators of all quantization matrices to constant values.
+        float weights[3][1] = {
+            {1.0f / cparams.max_error[0]}, {1.0f / cparams.max_error[1]}, {1.0f / cparams.max_error[2]}};
+        DctQuantWeightParams dct_params(weights);
+        std::vector<QuantEncoding> encodings(DequantMatrices::kNum, QuantEncoding::DCT(dct_params));
+        DequantMatricesSetCustom(dequant_matrices, encodings, modular_frame_encoder);
+        float dc_weights[3] = {1.0f / cparams.max_error[0], 1.0f / cparams.max_error[1], 1.0f / cparams.max_error[2]};
+        DequantMatricesSetCustomDC(dequant_matrices, dc_weights);
+    }
+}
+
+bool DefaultEncoderHeuristics::HandlesColorConversion(const CompressParams& cparams, const ImageBundle& ib) {
+    return cparams.noise != Override::kOn && cparams.patches != Override::kOn &&
+           cparams.speed_tier >= SpeedTier::kWombat && cparams.resampling == 1 &&
+           cparams.color_transform == ColorTransform::kXYB && !cparams.modular_mode && !ib.HasAlpha();
+}
+
+Status acc_host(std::string xclbinPath,
+                Image3F& opsin,
+                LossyFrameEncoder& lossy_frame_encoder,
+                const ImageBundle* JXL_RESTRICT ib_or_linear,
+                ThreadPool* pool,
+                std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                BitWriter* writer,
+                AuxOut* aux_out,
+                std::unique_ptr<FrameHeader>& frame_header,
+                const FrameInfo& frame_info,
+                CompressParams cparams,
+                const std::vector<ImageF>* extra_channels,
+                PassesEncoderState* passes_enc_state,
+                FrameDimensions frame_dim,
+                const size_t num_groups,
+                const ImageBundle& ib,
+                std::vector<AuxOut>& aux_outs,
+                const std::function<Status(size_t)>& resize_aux_outs) {
+    acc_phase1(opsin, lossy_frame_encoder, cparams, frame_header, frame_info, ib_or_linear, ib, aux_out, pool);
+
+    acc_phase2(xclbinPath, opsin, lossy_frame_encoder, modular_frame_encoder, cparams, frame_header, extra_channels,
+               ib_or_linear, ib, pool, aux_out);
+
+    acc_phase3(xclbinPath, opsin, lossy_frame_encoder, modular_frame_encoder, cparams, frame_header, passes_enc_state,
+               frame_dim, writer, num_groups, aux_out, pool, aux_outs, ib, resize_aux_outs);
+
+    return true;
+}
+
+Status DefaultEncoderHeuristics::LossyFrameHeuristics(PassesEncoderState* enc_state,
+                                                      ModularFrameEncoder* modular_frame_encoder,
+                                                      const ImageBundle* original_pixels,
+                                                      Image3F* opsin,
+                                                      ThreadPool* pool,
+                                                      AuxOut* aux_out) {
+    PROFILER_ZONE("JxlLossyFrameHeuristics uninstrumented");
+
+    CompressParams& cparams = enc_state->cparams;
+    PassesSharedState& shared = enc_state->shared;
+
+    // Compute parameters for noise synthesis.
+    if (shared.frame_header.flags & FrameHeader::kNoise) {
+        PROFILER_ZONE("enc GetNoiseParam");
+        if (cparams.photon_noise_iso > 0) {
+            shared.image_features.noise_params =
+                SimulatePhotonNoise(opsin->xsize(), opsin->ysize(), cparams.photon_noise_iso);
+        } else {
+            // Don't start at zero amplitude since adding noise is expensive -- it
+            // significantly slows down decoding, and this is unlikely to
+            // completely go away even with advanced optimizations. After the
+            // kNoiseModelingRampUpDistanceRange we have reached the full level,
+            // i.e. noise is no longer represented by the compressed image, so we
+            // can add full noise by the noise modeling itself.
+            static const float kNoiseModelingRampUpDistanceRange = 0.6;
+            static const float kNoiseLevelAtStartOfRampUp = 0.25;
+            static const float kNoiseRampupStart = 1.0;
+            // TODO(user) test and properly select quality_coef with smooth
+            // filter
+            float quality_coef = 1.0f;
+            const float rampup = (cparams.butteraugli_distance - kNoiseRampupStart) / kNoiseModelingRampUpDistanceRange;
+            if (rampup < 1.0f) {
+                quality_coef = kNoiseLevelAtStartOfRampUp + (1.0f - kNoiseLevelAtStartOfRampUp) * rampup;
+            }
+            if (rampup < 0.0f) {
+                quality_coef = kNoiseRampupStart;
+            }
+            if (!GetNoiseParameter(*opsin, &shared.image_features.noise_params, quality_coef)) {
+                shared.frame_header.flags &= ~FrameHeader::kNoise;
+            }
+        }
+    }
+    if (enc_state->shared.frame_header.upsampling != 1 && !cparams.already_downsampled) {
+        // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+        // after noise, if necessary.
+        DownsampleImage(opsin, cparams.resampling);
+        PadImageToBlockMultipleInPlace(opsin);
+    }
+
+    const FrameDimensions& frame_dim = enc_state->shared.frame_dim;
+    size_t target_size = TargetSize(cparams, frame_dim);
+    size_t opsin_target_size = target_size;
+    if (cparams.target_size > 0 || cparams.target_bitrate > 0.0) {
+        cparams.target_size = opsin_target_size;
+    } else if (cparams.butteraugli_distance < 0) {
+        return JXL_FAILURE("Expected non-negative distance");
+    }
+
+#ifndef XLNX_DISABLE_BLK_DICT
+    // Find and subtract splines.
+    if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+        shared.image_features.splines = FindSplines(*opsin);
+        JXL_RETURN_IF_ERROR(shared.image_features.splines.SubtractFrom(opsin, shared.cmap));
+    }
+
+    // Find and subtract patches/dots.
+    if (ApplyOverride(cparams.patches, cparams.speed_tier <= SpeedTier::kSquirrel)) {
+        FindBestPatchDictionary(*opsin, enc_state, pool, aux_out);
+        PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin);
+    }
+#endif
+
+    static const float kAcQuant = 0.79f;
+    const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
+    Quantizer& quantizer = enc_state->shared.quantizer;
+    // We don't know the quant field yet, but for computing the global scale
+    // assuming that it will be the same as for Falcon mode is good enough.
+    quantizer.ComputeGlobalScaleAndQuant(quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
+
+    // TODO(veluca): we can now run all the code from here to FindBestQuantizer
+    // (excluded) one rect at a time. Do that.
+
+    // Dependency graph:
+    //
+    // input: either XYB or input image
+    //
+    // input image -> XYB [optional]
+    // XYB -> initial quant field
+    // XYB -> Gaborished XYB
+    // Gaborished XYB -> CfL1
+    // initial quant field, Gaborished XYB, CfL1 -> ACS
+    // initial quant field, ACS, Gaborished XYB -> EPF control field
+    // initial quant field -> adjusted initial quant field
+    // adjusted initial quant field, ACS -> raw quant field
+    // raw quant field, ACS, Gaborished XYB -> CfL2
+    //
+    // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
+
+    ArControlFieldHeuristics ar_heuristics;
+    AcStrategyHeuristics acs_heuristics;
+    CfLHeuristics cfl_heuristics;
+
+    if (!opsin->xsize()) {
+        JXL_ASSERT(HandlesColorConversion(cparams, *original_pixels));
+        *opsin = Image3F(RoundUpToBlockDim(original_pixels->xsize()), RoundUpToBlockDim(original_pixels->ysize()));
+        opsin->ShrinkTo(original_pixels->xsize(), original_pixels->ysize());
+        ToXYB(*original_pixels, pool, opsin, /*linear=*/nullptr);
+        PadImageToBlockMultipleInPlace(opsin);
+    }
+
+    // Compute an initial estimate of the quantization field.
+    // Call InitialQuantField only in Hare mode or slower. Otherwise, rely
+    // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
+    // mode.
+    if (cparams.speed_tier > SpeedTier::kHare || cparams.uniform_quant > 0) {
+        enc_state->initial_quant_field = ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+        float q = cparams.uniform_quant > 0 ? cparams.uniform_quant : kAcQuant / cparams.butteraugli_distance;
+        FillImage(q, &enc_state->initial_quant_field);
+    } else {
+        // Call this here, as it relies on pre-gaborish values.
+        float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
+        if (!shared.frame_header.loop_filter.gab) {
+            butteraugli_distance_for_iqf *= 0.73f;
+        }
+        enc_state->initial_quant_field = InitialQuantField(butteraugli_distance_for_iqf, *opsin, shared.frame_dim, pool,
+                                                           1.0f, &enc_state->initial_quant_masking);
+    }
+
+    // TODO(veluca): do something about animations.
+
+    // Apply inverse-gaborish.
+    if (shared.frame_header.loop_filter.gab) {
+        GaborishInverse(opsin, 0.9908511000000001f, pool);
+    }
+
+    cfl_heuristics.Init(*opsin);
+    acs_heuristics.Init(*opsin, enc_state);
+    ar_heuristics.PrepareForThreads(/*num_threads*/ 1);
+    cfl_heuristics.PrepareForThreads(/*num_threads*/ 1);
+
+    //  auto process_tile = [&](size_t tid, size_t thread) {
+    for (int tid = 0; tid < DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+                                DivCeil(enc_state->shared.frame_dim.ysize_blocks, kEncTileDimInBlocks);
+         tid++) {
+        size_t thread = 0;
+        size_t n_enc_tiles = DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
+        size_t tx = tid % n_enc_tiles;
+        size_t ty = tid / n_enc_tiles;
+        size_t by0 = ty * kEncTileDimInBlocks;
+        size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, enc_state->shared.frame_dim.ysize_blocks);
+        size_t bx0 = tx * kEncTileDimInBlocks;
+        size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, enc_state->shared.frame_dim.xsize_blocks);
+        Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+
+        // For speeds up to Wombat, we only compute the color correlation map
+        // once we know the transform type and the quantization map.
+        if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+            //      cfl_heuristics.ComputeTile(r, *opsin, enc_state->shared.matrices,
+            //                                 /*ac_strategy=*/nullptr,
+            //                                 /*quantizer=*/nullptr, /*fast=*/false, thread,
+            //                                 &enc_state->shared.cmap);
+        }
+
+// Choose block sizes.
+//    acs_heuristics.ProcessRect(r);
+
+// Choose amount of post-processing smoothing.
+// TODO(veluca): should this go *after* AdjustQuantField?
+#ifndef XLNX_DISABLE_ARC
+        ar_heuristics.RunRect(r, *opsin, enc_state, thread);
+#else
+        ImageB* JXL_RESTRICT epf_sharpness = &enc_state->shared.epf_sharpness;
+        FillPlane(static_cast<uint8_t>(4), epf_sharpness, r);
+#endif
+        // Always set the initial quant field, so we can compute the CfL map with
+        // more accuracy. The initial quant field might change in slower modes, but
+        // adjusting the quant field with butteraugli when all the other encoding
+        // parameters are fixed is likely a more reliable choice anyway.
+        AdjustQuantField(enc_state->shared.ac_strategy, r, &enc_state->initial_quant_field);
+        quantizer.SetQuantFieldRect(enc_state->initial_quant_field, r, &enc_state->shared.raw_quant_field);
+
+// Compute a non-default CfL map if we are at Hare speed, or slower.
+#ifndef XLNX_DISABLE_2NDCMP
+        if (cparams.speed_tier <= SpeedTier::kHare) {
+            cfl_heuristics.ComputeTile(
+                r, *opsin, enc_state->shared.matrices, &enc_state->shared.ac_strategy, &enc_state->shared.quantizer,
+                /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread, &enc_state->shared.cmap);
+        }
+#endif
+    };
+    /*  RunOnPool(pool, 0, DivCeil(enc_state->shared.frame_dim.xsize_blocks,
+                                 kEncTileDimInBlocks) *
+                             DivCeil(enc_state->shared.frame_dim.ysize_blocks,
+                                     kEncTileDimInBlocks),
+                [&](const size_t num_threads) {
+                  ar_heuristics.PrepareForThreads(num_threads);
+                  cfl_heuristics.PrepareForThreads(num_threads);
+                  return true;
+                },
+                process_tile, "Enc Heuristics");*/
+
+    acs_heuristics.Finalize(aux_out);
+    if (cparams.speed_tier <= SpeedTier::kHare) {
+        cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat, &enc_state->shared.cmap);
+    }
+
+    FindBestDequantMatrices(cparams, *opsin, modular_frame_encoder, &enc_state->shared.matrices);
+
+    // Refine quantization levels.
+    FindBestQuantizer(original_pixels, *opsin, enc_state, pool, aux_out);
+
+    // Choose a context model that depends on the amount of quantization for AC.
+    if (cparams.speed_tier < SpeedTier::kFalcon) {
+        FindBestBlockEntropyModel(*enc_state);
+    }
+
+#ifdef XLNX_DEBUG_CMAP
+    std::cout << "=========================================" << std::endl;
+    std::cout << "ColorMap info: " << std::endl;
+    ImageSB* JXL_RESTRICT tmp_map = &enc_state->shared.cmap.ytox_map;
+    int32_t dc = enc_state->shared.cmap.GetYToXDC();
+    std::cout << "Y to X dc: " << dc << std::endl;
+    for (int i = 0; i < tmp_map->ysize(); i++) {
+        int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+        for (int j = 0; j < tmp_map->xsize(); j++) {
+            std::cout << (int)row_out[j] << " ";
+        }
+        std::cout << std::endl;
+    }
+
+    tmp_map = &enc_state->shared.cmap.ytox_map;
+    dc = enc_state->shared.cmap.GetYToBDC();
+    std::cout << "Y to B dc: " << dc << std::endl;
+    for (int i = 0; i < tmp_map->ysize(); i++) {
+        int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+        for (int j = 0; j < tmp_map->xsize(); j++) {
+            std::cout << (int)row_out[j] << " ";
+        }
+        std::cout << std::endl;
+    }
+    std::cout << std::endl;
+#endif
+
+    return true;
+}
+} // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase1.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase1.cpp
new file mode 100644
index 0000000000..a37f251c20
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase1.cpp
@@ -0,0 +1,276 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_KERNEL1_CPP
+#define HLS_KERNEL1_CPP
+
+#include "acc_phase1.hpp"
+
+namespace jxl {
+namespace {
+// Invisible (alpha = 0) pixels tend to be a mess in optimized PNGs.
+// Since they have no visual impact whatsoever, we can replace them with
+// something that compresses better and reduces artifacts near the edges. This
+// does some kind of smooth stuff that seems to work.
+// Replace invisible pixels with a weighted average of the pixel to the left,
+// the pixel to the topright, and non-invisible neighbours.
+// Produces downward-blurry smears, with in the upwards direction only a 1px
+// edge duplication but not more. It would probably be better to smear in all
+// directions. That requires an alpha-weighed convolution with a large enough
+// kernel though, which might be overkill...
+void SimplifyInvisible(Image3F* image, const ImageF& alpha, bool lossless) {
+    for (size_t c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < image->ysize(); ++y) {
+            float* JXL_RESTRICT row = image->PlaneRow(c, y);
+            const float* JXL_RESTRICT prow = (y > 0 ? image->PlaneRow(c, y - 1) : nullptr);
+            const float* JXL_RESTRICT nrow = (y + 1 < image->ysize() ? image->PlaneRow(c, y + 1) : nullptr);
+            const float* JXL_RESTRICT a = alpha.Row(y);
+            const float* JXL_RESTRICT pa = (y > 0 ? alpha.Row(y - 1) : nullptr);
+            const float* JXL_RESTRICT na = (y + 1 < image->ysize() ? alpha.Row(y + 1) : nullptr);
+            for (size_t x = 0; x < image->xsize(); ++x) {
+                if (a[x] == 0) {
+                    if (lossless) {
+                        row[x] = 0;
+                        continue;
+                    }
+                    float d = 0.f;
+                    row[x] = 0;
+                    if (x > 0) {
+                        row[x] += row[x - 1];
+                        d++;
+                        if (a[x - 1] > 0.f) {
+                            row[x] += row[x - 1];
+                            d++;
+                        }
+                    }
+                    if (x + 1 < image->xsize()) {
+                        if (y > 0) {
+                            row[x] += prow[x + 1];
+                            d++;
+                        }
+                        if (a[x + 1] > 0.f) {
+                            row[x] += 2.f * row[x + 1];
+                            d += 2.f;
+                        }
+                        if (y > 0 && pa[x + 1] > 0.f) {
+                            row[x] += 2.f * prow[x + 1];
+                            d += 2.f;
+                        }
+                        if (y + 1 < image->ysize() && na[x + 1] > 0.f) {
+                            row[x] += 2.f * nrow[x + 1];
+                            d += 2.f;
+                        }
+                    }
+                    if (y > 0 && pa[x] > 0.f) {
+                        row[x] += 2.f * prow[x];
+                        d += 2.f;
+                    }
+                    if (y + 1 < image->ysize() && na[x] > 0.f) {
+                        row[x] += 2.f * nrow[x];
+                        d += 2.f;
+                    }
+                    if (d > 1.f) row[x] /= d;
+                }
+            }
+        }
+    }
+}
+} // namespace
+
+Status acc_phase1(Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const FrameInfo& frame_info,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  AuxOut* aux_out,
+                  ThreadPool* pool) {
+    const ColorEncoding& c_linear = ColorEncoding::LinearSRGB(ib.IsGray());
+    std::unique_ptr<ImageMetadata> metadata_linear = jxl::make_unique<ImageMetadata>();
+    metadata_linear->xyb_encoded = (cparams.color_transform == ColorTransform::kXYB);
+    metadata_linear->color_encoding = c_linear;
+    ImageBundle linear_storage(metadata_linear.get());
+
+    // Allocating a large enough image avoids a copy when padding.
+    opsin = Image3F(RoundUpToBlockDim(ib.xsize()), RoundUpToBlockDim(ib.ysize()));
+    opsin.ShrinkTo(ib.xsize(), ib.ysize());
+
+    const bool want_linear =
+        frame_header->encoding == FrameEncoding::kVarDCT && cparams.speed_tier <= SpeedTier::kKitten;
+    ib_or_linear = &ib;
+
+    if (frame_header->color_transform == ColorTransform::kXYB && frame_info.ib_needs_color_transform) {
+        // linear_storage would only be used by the Butteraugli loop (passing
+        // linear sRGB avoids a color conversion there). Otherwise, don't
+        // fill it to reduce memory usage.
+        ib_or_linear = ToXYB(ib, pool, &opsin, want_linear ? &linear_storage : nullptr);
+    } else { // RGB or YCbCr: don't do anything (forward YCbCr is not
+             // implemented, this is only used when the input is already in
+             // YCbCr)
+             // If encoding a special DC or reference frame, don't do anything:
+             // input is already in XYB.
+        CopyImageTo(ib.color(), &opsin);
+    }
+    bool lossless = (frame_header->encoding == FrameEncoding::kModular && cparams.quality_pair.first == 100);
+    if (ib.HasAlpha() && !ib.AlphaIsPremultiplied() && !ApplyOverride(cparams.keep_invisible, lossless) &&
+        cparams.ec_resampling == cparams.resampling) {
+        // simplify invisible pixels
+        SimplifyInvisible(&opsin, ib.alpha(), lossless);
+        if (want_linear) {
+            SimplifyInvisible(const_cast<Image3F*>(&ib_or_linear->color()), ib.alpha(), lossless);
+        }
+    }
+    if (aux_out != nullptr) {
+        JXL_RETURN_IF_ERROR(aux_out->InspectImage3F("enc_frame:OpsinDynamicsImage", opsin));
+    }
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        PadImageToBlockMultipleInPlace(&opsin);
+        PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+        //  std::vector<EncCache>& group_caches_ =
+        //  lossy_frame_encoder.get_group_cashes();
+
+        JXL_ASSERT((opsin.xsize() % kBlockDim) == 0 && (opsin.ysize() % kBlockDim) == 0);
+        PassesSharedState& shared = enc_state_->shared;
+
+        if (!enc_state_->cparams.max_error_mode) {
+            float x_qm_scale_steps[3] = {0.65f, 1.25f, 9.0f};
+            shared.frame_header.x_qm_scale = 1;
+            for (float x_qm_scale_step : x_qm_scale_steps) {
+                if (enc_state_->cparams.butteraugli_distance > x_qm_scale_step) {
+                    shared.frame_header.x_qm_scale++;
+                }
+            }
+        }
+
+        Image3F* opsin_ = &opsin;
+        //  CompressParams& cparams = enc_state->cparams;
+        //  PassesSharedState& shared = enc_state->shared;
+
+        // Compute parameters for noise synthesis.
+        if (shared.frame_header.flags & FrameHeader::kNoise) {
+            PROFILER_ZONE("enc GetNoiseParam");
+            if (cparams.photon_noise_iso > 0) {
+                shared.image_features.noise_params =
+                    SimulatePhotonNoise(opsin_->xsize(), opsin_->ysize(), cparams.photon_noise_iso);
+            } else {
+                // Don't start at zero amplitude since adding noise is expensive -- it
+                // significantly slows down decoding, and this is unlikely to
+                // completely go away even with advanced optimizations. After the
+                // kNoiseModelingRampUpDistanceRange we have reached the full level,
+                // i.e. noise is no longer represented by the compressed image, so we
+                // can add full noise by the noise modeling itself.
+                static const float kNoiseModelingRampUpDistanceRange = 0.6;
+                static const float kNoiseLevelAtStartOfRampUp = 0.25;
+                static const float kNoiseRampupStart = 1.0;
+                // TODO(user) test and properly select quality_coef with smooth
+                // filter
+                float quality_coef = 1.0f;
+                const float rampup =
+                    (cparams.butteraugli_distance - kNoiseRampupStart) / kNoiseModelingRampUpDistanceRange;
+                if (rampup < 1.0f) {
+                    quality_coef = kNoiseLevelAtStartOfRampUp + (1.0f - kNoiseLevelAtStartOfRampUp) * rampup;
+                }
+                if (rampup < 0.0f) {
+                    quality_coef = kNoiseRampupStart;
+                }
+                if (!GetNoiseParameter(*opsin_, &shared.image_features.noise_params, quality_coef)) {
+                    shared.frame_header.flags &= ~FrameHeader::kNoise;
+                }
+            }
+        }
+        if (enc_state_->shared.frame_header.upsampling != 1 && !cparams.already_downsampled) {
+            // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+            // after noise, if necessary.
+            DownsampleImage(opsin_, cparams.resampling);
+            PadImageToBlockMultipleInPlace(opsin_);
+        }
+
+        const FrameDimensions& frame_dim_ = enc_state_->shared.frame_dim;
+        size_t target_size = TargetSize(cparams, frame_dim_);
+        size_t opsin_target_size = target_size;
+        if (cparams.target_size > 0 || cparams.target_bitrate > 0.0) {
+            cparams.target_size = opsin_target_size;
+        } else if (cparams.butteraugli_distance < 0) {
+            return JXL_FAILURE("Expected non-negative distance");
+        }
+
+#ifndef XLNX_DISABLE_BLK_DICT
+        // Find and subtract splines.
+        if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+            shared.image_features.splines = FindSplines(*opsin_);
+            JXL_RETURN_IF_ERROR(shared.image_features.splines.SubtractFrom(opsin_, shared.cmap));
+        }
+
+        // Find and subtract patches/dots.
+        if (ApplyOverride(cparams.patches, cparams.speed_tier <= SpeedTier::kSquirrel)) {
+            FindBestPatchDictionary(*opsin_, enc_state_, pool, aux_out);
+            PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin_);
+        }
+#endif
+
+        static const float kAcQuant = 0.79f;
+        const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
+        Quantizer& quantizer = enc_state_->shared.quantizer;
+        // We don't know the quant field yet, but for computing the global scale
+        // assuming that it will be the same as for Falcon mode is good enough.
+        quantizer.ComputeGlobalScaleAndQuant(quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
+
+        // TODO(veluca): we can now run all the code from here to FindBestQuantizer
+        // (excluded) one rect at a time. Do that.
+
+        // Dependency graph:
+        //
+        // input: either XYB or input image
+        //
+        // input image -> XYB [optional]
+        // XYB -> initial quant field
+        // XYB -> Gaborished XYB
+        // Gaborished XYB -> CfL1
+        // initial quant field, Gaborished XYB, CfL1 -> ACS
+        // initial quant field, ACS, Gaborished XYB -> EPF control field
+        // initial quant field -> adjusted initial quant field
+        // adjusted initial quant field, ACS -> raw quant field
+        // raw quant field, ACS, Gaborished XYB -> CfL2
+        //
+        // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
+
+        if (!opsin_->xsize()) {
+            JXL_ASSERT(enc_state_->heuristics->HandlesColorConversion(cparams, *ib_or_linear));
+            *opsin_ = Image3F(RoundUpToBlockDim(ib_or_linear->xsize()), RoundUpToBlockDim(ib_or_linear->ysize()));
+            opsin_->ShrinkTo(ib_or_linear->xsize(), ib_or_linear->ysize());
+            ToXYB(*ib_or_linear, pool, opsin_, /*linear=*/nullptr);
+            PadImageToBlockMultipleInPlace(opsin_);
+        }
+
+        // Compute an initial estimate of the quantization field.
+        // Call InitialQuantField only in Hare mode or slower. Otherwise, rely
+        // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
+        // mode.
+        if (cparams.speed_tier > SpeedTier::kHare || cparams.uniform_quant > 0) {
+            enc_state_->initial_quant_field = ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+            float q = cparams.uniform_quant > 0 ? cparams.uniform_quant : kAcQuant / cparams.butteraugli_distance;
+            FillImage(q, &enc_state_->initial_quant_field);
+        } else {
+            // Call this here, as it relies on pre-gaborish values.
+            float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
+            if (!shared.frame_header.loop_filter.gab) {
+                butteraugli_distance_for_iqf *= 0.73f;
+            }
+            enc_state_->initial_quant_field = InitialQuantField(butteraugli_distance_for_iqf, *opsin_, shared.frame_dim,
+                                                                pool, 1.0f, &enc_state_->initial_quant_masking);
+        }
+
+        // TODO(veluca): do something about animations.
+
+        // Apply inverse-gaborish.
+        if (shared.frame_header.loop_filter.gab) {
+            GaborishInverse(opsin_, 0.9908511000000001f, pool);
+        }
+    }
+    return true;
+}
+} // namespace jxl
+#endif
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase2.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase2.cpp
new file mode 100644
index 0000000000..f47dd76fde
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase2.cpp
@@ -0,0 +1,415 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_KERNEL2_CPP
+#define HLS_KERNEL2_CPP
+
+#include "acc_phase2.hpp"
+
+namespace jxl {
+
+Status acc_phase2(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const std::vector<ImageF>* extra_channels,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  ThreadPool* pool,
+                  AuxOut* aux_out) {
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        std::vector<EncCache>& group_caches_ = lossy_frame_encoder.get_group_cashes();
+        PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+        PassesSharedState& shared = enc_state_->shared;
+        Image3F* opsin_ = &opsin;
+        Quantizer& quantizer = enc_state_->shared.quantizer;
+
+        size_t tile_xsize = (opsin.xsize() + 63) / 64 * 64;
+        size_t tile_ysize = (opsin.ysize() + 63) / 64 * 64;
+#ifdef XLNX_QC_DEBUG_DCT
+/*std::cout << std::endl
+          << "======================================== full origin pixel "
+             "=============================================="
+          << std::endl;
+for (int c = 0; c < 3; c++) {
+  if (c == 0) {
+    std::cout << std::setw(15) << 0 << " ";
+    for (int m = 0; m < tile_xsize; m++) {
+      std::cout << std::setw(15) << m << " ";
+    }
+    std::cout << std::endl << std::endl;
+
+    for (int y = 0; y < tile_ysize; y++) {
+      std::cout << std::setw(15) << y << " ";
+      const float* JXL_RESTRICT row_y = opsin.ConstPlaneRow(c, y);
+      for (int x = 0; x < tile_xsize; x++) {
+        std::cout << std::setw(15) << row_y[x] << " ";
+      }
+      std::cout << std::endl;
+    }
+    std::cout << std::endl;
+  }
+}*/
+#endif
+
+        std::vector<std::vector<float> > dctIDT(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct2x2(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct4x4(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct8x8(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct16x16(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct32x32(3, std::vector<float>(tile_xsize * tile_ysize));
+
+        std::vector<std::vector<float> > dcIDT(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc2x2(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc4x4(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc8x8(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc16x16(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc32x32(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 8) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 8) {
+                    float* mem = (float*)calloc(8UL * 8UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::IDENTITY);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 64; m++) {
+                        dctIDT[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + m] = mem[m];
+                    }
+                    dcIDT[c][y / 8 * tile_xsize / 8 + x / 8] = dc_mem[0];
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 8) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 8) {
+                    float* mem = (float*)calloc(8UL * 8UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT2X2);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 64; m++) {
+                        dct2x2[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + m] = mem[m];
+                    }
+                    dc2x2[c][y / 8 * tile_xsize / 8 + x / 8] = dc_mem[0];
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 8) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 8) {
+                    float* mem = (float*)calloc(8UL * 8UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT4X4);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 64; m++) {
+                        dct4x4[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + m] = mem[m];
+                    }
+                    dc4x4[c][y / 8 * tile_xsize / 8 + x / 8] = dc_mem[0];
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 8) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 8) {
+                    float* mem = (float*)calloc(8UL * 8UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 64; m++) {
+                        dct8x8[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + m] = mem[m];
+                    }
+                    dc8x8[c][y / 8 * (tile_xsize / 8) + x / 8] = dc_mem[0];
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 16) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 16) {
+                    float* mem = (float*)calloc(16UL * 16UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT16X16);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 16 * 16; m++) {
+                        dct16x16[c][16 * 16 * (y / 16 * (tile_xsize / 16) + x / 16) + m] = mem[m];
+                    }
+                    for (int m = 0; m < 4; m++) {
+                        dc16x16[c][4 * (y / 16 * (tile_xsize / 16) + x / 16) + m] = dc_mem[m];
+                    }
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 32) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 32) {
+                    float* mem = (float*)calloc(32UL * 32UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT32X32);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 32 * 32; m++) {
+                        dct32x32[c][32 * 32 * (y / 32 * (tile_xsize / 32) + x / 32) + m] = mem[m];
+                    }
+                    for (int m = 0; m < 16; m++) {
+                        dc32x32[c][16 * (y / 32 * (tile_xsize / 32) + x / 32) + m] = dc_mem[m];
+                    }
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+#ifdef XLNX_QC_DEBUG_DCT
+        std::cout << std::endl
+                  << "======================================== full coef "
+                     "=============================================="
+                  << std::endl;
+        for (int c = 0; c < 3; c++) {
+            if (c == 1) {
+                std::cout << std::setw(15) << 0 << " ";
+                for (int m = 0; m < tile_xsize; m++) {
+                    std::cout << std::setw(15) << m << " ";
+                }
+                std::cout << std::endl << std::endl;
+                for (int y = 0; y < tile_ysize; y++) {
+                    std::cout << std::setw(15) << y << " ";
+                    for (int x = 0; x < tile_xsize; x++) {
+                        std::cout << std::setw(15) << dct8x8[c][y * tile_xsize + x] << " ";
+                    }
+                    std::cout << std::endl;
+                }
+            }
+        }
+#endif
+
+#ifdef XLNX_QC_DEBUG_DC
+        std::cout << std::endl
+                  << "======================================== full DC "
+                     "=============================================="
+                  << std::endl;
+        for (int c = 0; c < 3; c++) {
+            if (c == 1) {
+                std::cout << std::setw(15) << 0 << " ";
+                for (int m = 0; m < tile_xsize / 8; m++) {
+                    std::cout << std::setw(15) << m << " ";
+                }
+                std::cout << std::endl << std::endl;
+                for (int y = 0; y < tile_ysize / 8; y++) {
+                    std::cout << std::setw(15) << y << " ";
+                    for (int x = 0; x < tile_xsize / 8; x++) {
+                        std::cout << std::setw(15) << dc32x32[c][y * tile_xsize / 8 + x] << " ";
+                    }
+                    std::cout << std::endl;
+                }
+            }
+        }
+#endif
+
+        ArControlFieldHeuristics ar_heuristics;
+        AcStrategyHeuristics acs_heuristics;
+        CfLHeuristics cfl_heuristics;
+
+        cfl_heuristics.Init(*opsin_);
+        acs_heuristics.Init(*opsin_, enc_state_);
+        ar_heuristics.PrepareForThreads(/*num_threads*/ 1);
+        cfl_heuristics.PrepareForThreads(/*num_threads*/ 1);
+
+        //  auto process_tile = [&](size_t tid, size_t thread) {
+        for (int tid = 0; tid < DivCeil(enc_state_->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+                                    DivCeil(enc_state_->shared.frame_dim.ysize_blocks, kEncTileDimInBlocks);
+             tid++) {
+            size_t thread = 0;
+            size_t n_enc_tiles = DivCeil(enc_state_->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
+            size_t tx = tid % n_enc_tiles;
+            size_t ty = tid / n_enc_tiles;
+            size_t by0 = ty * kEncTileDimInBlocks;
+            size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, enc_state_->shared.frame_dim.ysize_blocks);
+            size_t bx0 = tx * kEncTileDimInBlocks;
+            size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, enc_state_->shared.frame_dim.xsize_blocks);
+            Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+
+            // For speeds up to Wombat, we only compute the color correlation map
+            // once we know the transform type and the quantization map.
+            if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+                cfl_heuristics.ComputeTile(r, *opsin_, enc_state_->shared.matrices,
+                                           /*ac_strategy=*/nullptr,
+                                           /*quantizer=*/nullptr, /*fast=*/false, thread, &enc_state_->shared.cmap,
+                                           opsin.xsize(), opsin.ysize(), dctIDT, dct2x2, dct4x4, dct8x8, dct16x16,
+                                           dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+            }
+
+            // Choose block sizes.
+            acs_heuristics.ProcessRect(r, opsin.xsize(), opsin.ysize(), dctIDT, dct2x2, dct4x4, dct8x8, dct16x16,
+                                       dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+
+// Choose amount of post-processing smoothing.
+// TODO(veluca): should this go *after* AdjustQuantField?
+#ifndef XLNX_DISABLE_ARC
+            ar_heuristics.RunRect(r, *opsin_, enc_state_, thread);
+#else
+            ImageB* JXL_RESTRICT epf_sharpness = &enc_state_->shared.epf_sharpness;
+            FillPlane(static_cast<uint8_t>(4), epf_sharpness, r);
+#endif
+            // Always set the initial quant field, so we can compute the CfL map
+            // with more accuracy. The initial quant field might change in slower
+            // modes, but adjusting the quant field with butteraugli when all the
+            // other encoding parameters are fixed is likely a more reliable choice
+            // anyway.
+            AdjustQuantField(enc_state_->shared.ac_strategy, r, &enc_state_->initial_quant_field);
+            quantizer.SetQuantFieldRect(enc_state_->initial_quant_field, r, &enc_state_->shared.raw_quant_field);
+
+// Compute a non-default CfL map if we are at Hare speed, or slower.
+#ifndef XLNX_DISABLE_2NDCMP
+            if (cparams.speed_tier <= SpeedTier::kHare) {
+                cfl_heuristics.ComputeTile(r, *opsin_, enc_state_->shared.matrices, &enc_state_->shared.ac_strategy,
+                                           &enc_state_->shared.quantizer,
+                                           /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread,
+                                           &enc_state_->shared.cmap, dctIDT, dct2x2, dct4x4, dct8x8, dct16x16, dct32x32,
+                                           dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+            }
+#endif
+        };
+        /*  RunOnPool(pool, 0, DivCeil(enc_state_->shared.frame_dim.xsize_blocks,
+                                     kEncTileDimInBlocks) *
+                                 DivCeil(enc_state_->shared.frame_dim.ysize_blocks,
+                                         kEncTileDimInBlocks),
+                    [&](const size_t num_threads) {
+                      ar_heuristics.PrepareForThreads(num_threads);
+                      cfl_heuristics.PrepareForThreads(num_threads);
+                      return true;
+                    },
+                    process_tile, "Enc Heuristics");*/
+
+        acs_heuristics.Finalize(aux_out);
+        if (cparams.speed_tier <= SpeedTier::kHare) {
+            cfl_heuristics.ComputeDC(
+                /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, &enc_state_->shared.cmap);
+        }
+
+        FindBestDequantMatrices(cparams, *opsin_, modular_frame_encoder.get(), &enc_state_->shared.matrices);
+
+        // Refine quantization levels.
+        FindBestQuantizer(ib_or_linear, *opsin_, enc_state_, pool, aux_out);
+
+        // Choose a context model that depends on the amount of quantization for
+        // AC.
+        if (cparams.speed_tier < SpeedTier::kFalcon) {
+            FindBestBlockEntropyModel(*enc_state_);
+        }
+
+#ifdef XLNX_DEBUG_CMAP
+        std::cout << "=========================================" << std::endl;
+        std::cout << "ColorMap info: " << std::endl;
+        ImageSB* JXL_RESTRICT tmp_map = &enc_state_->shared.cmap.ytox_map;
+        int32_t dc = enc_state_->shared.cmap.GetYToXDC();
+        std::cout << "Y to X dc: " << dc << std::endl;
+        for (int i = 0; i < tmp_map->ysize(); i++) {
+            int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+            for (int j = 0; j < tmp_map->xsize(); j++) {
+                std::cout << (int)row_out[j] << " ";
+            }
+            std::cout << std::endl;
+        }
+
+        tmp_map = &enc_state_->shared.cmap.ytox_map;
+        dc = enc_state_->shared.cmap.GetYToBDC();
+        std::cout << "Y to B dc: " << dc << std::endl;
+        for (int i = 0; i < tmp_map->ysize(); i++) {
+            int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+            for (int j = 0; j < tmp_map->xsize(); j++) {
+                std::cout << (int)row_out[j] << " ";
+            }
+            std::cout << std::endl;
+        }
+        std::cout << std::endl;
+#endif
+
+        InitializePassesEncoder(opsin, pool, enc_state_, modular_frame_encoder.get(), aux_out, opsin.xsize(),
+                                opsin.ysize(), dctIDT, dct2x2, dct4x4, dct8x8, dct16x16, dct32x32, dcIDT, dc2x2, dc4x4,
+                                dc8x8, dc16x16, dc32x32);
+
+        enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+        for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+            pass.ac_tokens.resize(shared.frame_dim.num_groups);
+        }
+
+        lossy_frame_encoder.ComputeAllCoeffOrders(shared.frame_dim);
+        shared.num_histograms = 1;
+
+        *frame_header = shared.frame_header;
+
+        // needs to happen *AFTER* VarDCT-ComputeEncodingData.
+        JXL_RETURN_IF_ERROR(modular_frame_encoder->ComputeEncodingData(
+            *frame_header, *ib.metadata(), &opsin, *extra_channels, lossy_frame_encoder.State(), pool, aux_out,
+            /* do_color=*/frame_header->encoding == FrameEncoding::kModular));
+    }
+    return true;
+}
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase3.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase3.cpp
new file mode 100644
index 0000000000..225caa2097
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_cluster_histogram/acc_phase3.cpp
@@ -0,0 +1,1586 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_KERNEL3_CPP
+#define HLS_KERNEL3_CPP
+
+#include "acc_phase3.hpp"
+
+#include <sys/time.h>
+
+#include "acc_init_histogram.hpp"
+#include "acc_store_encode_data.hpp"
+#include "lib/jxl/lehmer_code.h"
+
+#ifndef HLS_TEST
+#include "host_cluster_histogram.hpp"
+#else
+#include "hls_cluster_histogram.hpp"
+#endif
+// void test(int* in, int* out);
+
+// inline int tvdiff(struct timeval* tv0, struct timeval* tv1) {
+//  return (tv1->tv_sec - tv0->tv_sec) * 1000000 + (tv1->tv_usec - tv0->tv_usec);
+//}
+
+namespace jxl {
+namespace {
+size_t IndexOf(const std::vector<uint8_t>& v, uint8_t value) {
+    size_t i = 0;
+    for (; i < v.size(); ++i) {
+        if (v[i] == value) return i;
+    }
+    return i;
+}
+
+void MoveToFront(std::vector<uint8_t>* v, size_t index) {
+    uint8_t value = (*v)[index];
+    for (size_t i = index; i != 0; --i) {
+        (*v)[i] = (*v)[i - 1];
+    }
+    (*v)[0] = value;
+}
+
+std::vector<uint8_t> MoveToFrontTransform(const std::vector<uint8_t>& v) {
+    if (v.empty()) return v;
+    uint8_t max_value = *std::max_element(v.begin(), v.end());
+    std::vector<uint8_t> mtf(max_value + 1);
+    for (size_t i = 0; i <= max_value; ++i) mtf[i] = i;
+    std::vector<uint8_t> result(v.size());
+    for (size_t i = 0; i < v.size(); ++i) {
+        size_t index = IndexOf(mtf, v[i]);
+        JXL_ASSERT(index < mtf.size());
+        result[i] = static_cast<uint8_t>(index);
+        MoveToFront(&mtf, index);
+    }
+    return result;
+}
+} // namespace
+
+namespace {
+
+void acc_TokenizePermutation(const coeff_order_t* JXL_RESTRICT order,
+                             size_t skip,
+                             size_t size,
+                             std::vector<Token>* tokens) {
+    std::vector<LehmerT> lehmer(size);
+    std::vector<uint32_t> temp(size + 1);
+    ComputeLehmerCode(order, temp.data(), size, lehmer.data());
+    size_t end = size;
+    while (end > skip && lehmer[end - 1] == 0) {
+        --end;
+    }
+    tokens->emplace_back(CoeffOrderContext(size), end - skip);
+    uint32_t last = 0;
+    for (size_t i = skip; i < end; ++i) {
+        tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
+        last = lehmer[i];
+    }
+}
+
+} // namespace
+
+namespace {
+void acc_EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order,
+                          AcStrategy acs,
+                          std::vector<Token>* tokens,
+                          coeff_order_t* order_zigzag) {
+    const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+    const size_t size = kDCTBlockSize * llf;
+    const coeff_order_t* natural_coeff_order_lut = acs.NaturalCoeffOrderLut();
+    for (size_t i = 0; i < size; ++i) {
+        order_zigzag[i] = natural_coeff_order_lut[order[i]];
+    }
+    acc_TokenizePermutation(order_zigzag, llf, size, tokens);
+}
+} // namespace
+
+Status acc_predictAndtoken(LossyFrameEncoder& lossy_frame_encoder,
+                           std::unique_ptr<FrameHeader>& frame_header,
+                           std::vector<std::vector<Token> >& coefOrders_tokens,
+                           ThreadPool* pool) {
+    std::vector<EncCache>& group_caches_ = lossy_frame_encoder.get_group_cashes();
+    PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+    PassesSharedState& shared = enc_state_->shared;
+
+    const auto tokenize_group_init = [&](const size_t num_threads) {
+        group_caches_.resize(num_threads);
+        return true;
+    };
+    const auto tokenize_group = [&](const int group_index, const int thread) {
+        // Tokenize coefficients.
+        const Rect rect = shared.BlockGroupRect(group_index);
+        for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size(); idx_pass++) {
+            JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+            const int32_t* JXL_RESTRICT ac_rows[3] = {
+                enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+                enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+                enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+            };
+            // Ensure group cache is initialized.
+            group_caches_[thread].InitOnce();
+            TokenizeCoefficients(&shared.coeff_orders[idx_pass * shared.coeff_order_size], rect, ac_rows,
+                                 shared.ac_strategy, frame_header->chroma_subsampling,
+                                 &group_caches_[thread].num_nzeroes,
+                                 &enc_state_->passes[idx_pass].ac_tokens[group_index], enc_state_->shared.quant_dc,
+                                 enc_state_->shared.raw_quant_field, enc_state_->shared.block_ctx_map);
+        }
+    };
+    RunOnPool(pool, 0, shared.frame_dim.num_groups, tokenize_group_init, tokenize_group, "TokenizeGroup");
+
+    const coeff_order_t* JXL_RESTRICT order = &enc_state_->shared.coeff_orders[0 * enc_state_->shared.coeff_order_size];
+    auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
+    uint16_t computed = 0;
+    uint16_t used_orders = enc_state_->used_orders[0];
+
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+            uint8_t ord = kStrategyOrder[o];
+            if (computed & (1 << ord)) continue;
+            computed |= 1 << ord;
+            if ((used_orders & (1 << ord)) == 0) continue;
+            AcStrategy acs = AcStrategy::FromRawStrategy(o);
+            for (size_t c = 0; c < 3; c++) {
+                acc_EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &coefOrders_tokens[0], mem.get());
+            }
+        }
+    }
+    return true;
+}
+
+BitWriter* get_output(const size_t index, std::vector<BitWriter>& group_codes, bool is_small_image) {
+    return &group_codes[is_small_image ? 0 : index];
+}
+
+Status acc_histogram(std::string xclbinPath,
+                     LossyFrameEncoder& lossy_frame_encoder,
+                     std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                     PassesEncoderState* passes_enc_state,
+                     FrameDimensions frame_dim,
+                     std::unique_ptr<FrameHeader>& frame_header,
+                     CompressParams cparams,
+                     std::vector<std::vector<Token> >& coefOrders_tokens,
+                     BitWriter* group_codes_writer,
+                     BitWriter* acInfo_writer,
+                     size_t& ans_cost,
+                     size_t& mtf_cost,
+                     std::vector<std::vector<Token> >& bcm_tokens,
+                     std::vector<std::vector<Token> >& bcm_mtf_tokens,
+                     EntropyEncodingData& bcm_codes,
+                     std::vector<uint8_t>& bcm_dummy_context_map,
+
+                     EntropyEncodingData& modularFramTree_code,
+                     std::vector<uint8_t>& modularFramTree_ctxmap,
+
+                     EntropyEncodingData& coefOrders_codes,
+                     std::vector<uint8_t>& coefOrders_context_map,
+
+                     std::vector<AuxOut>& aux_outs,
+                     AuxOut* aux_out) {
+    std::vector<EncCache>& group_caches_ = lossy_frame_encoder.get_group_cashes();
+    PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+    PassesSharedState& shared = enc_state_->shared;
+
+    const coeff_order_t* JXL_RESTRICT order = &enc_state_->shared.coeff_orders[0 * enc_state_->shared.coeff_order_size];
+    auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
+    uint16_t computed = 0;
+    uint16_t used_orders = enc_state_->used_orders[0];
+
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+            uint8_t ord = kStrategyOrder[o];
+            if (computed & (1 << ord)) continue;
+            computed |= 1 << ord;
+            if ((used_orders & (1 << ord)) == 0) continue;
+            AcStrategy acs = AcStrategy::FromRawStrategy(o);
+            for (size_t c = 0; c < 3; c++) {
+                acc_EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &coefOrders_tokens[0], mem.get());
+            }
+        }
+    }
+
+    HistogramParams params0;
+    params0.clustering = HistogramParams::ClusteringType::kFast;
+    params0.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params0.lz77_method = HistogramParams::LZ77Method::kNone;
+    HistogramParams params1;
+    params1.clustering = HistogramParams::ClusteringType::kFast;
+    params1.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params1.lz77_method = HistogramParams::LZ77Method::kNone;
+    HistogramParams params2;
+    params2.clustering = HistogramParams::ClusteringType::kFast;
+    params2.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params2.lz77_method = HistogramParams::LZ77Method::kNone;
+    HistogramParams params3;
+    params3.clustering = HistogramParams::ClusteringType::kFast;
+    params3.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params3.lz77_method = HistogramParams::LZ77Method::kNone;
+    HistogramParams params4(enc_state_->cparams.speed_tier, enc_state_->shared.block_ctx_map.NumACContexts());
+    if (enc_state_->cparams.decoding_speed_tier >= 1) {
+        params4.max_histograms = 6;
+    }
+
+    params4.clustering = HistogramParams::ClusteringType::kFast;
+    params4.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params4.lz77_method = HistogramParams::LZ77Method::kNone;
+    std::vector<uint8_t> context_map0;
+    std::vector<uint8_t> context_map1;
+    std::vector<uint8_t> context_map2;
+    std::vector<uint8_t> context_map3;
+    std::vector<uint8_t> context_map4;
+    std::vector<uint8_t> context_map_c0;
+    std::vector<uint8_t> context_map_c1;
+    std::vector<uint8_t> context_map_c2;
+    std::vector<uint8_t> context_map_c3;
+    std::vector<uint8_t> context_map_c4;
+    std::vector<std::vector<Token> > tokens0(1);
+    std::vector<std::vector<Token> > tokens1(1);
+    std::vector<std::vector<Token> > tokens2(1);
+    std::vector<std::vector<Token> > tokens3(1);
+    std::vector<std::vector<Token> > tokens4(1);
+    std::vector<std::vector<Token> > tokens_c0(1);
+    std::vector<std::vector<Token> > tokens_c1(1);
+    std::vector<std::vector<Token> > tokens_c2(1);
+    std::vector<std::vector<Token> > tokens_c3(1);
+    std::vector<std::vector<Token> > tokens_c4(1);
+    EntropyEncodingData codes0;
+    EntropyEncodingData codes1;
+    EntropyEncodingData codes2;
+    EntropyEncodingData codes3;
+    EntropyEncodingData codes4;
+    EntropyEncodingData codes_c0;
+    EntropyEncodingData codes_c1;
+    EntropyEncodingData codes_c2;
+    EntropyEncodingData codes_c3;
+    EntropyEncodingData codes_c4;
+    std::vector<Histogram> clustered_histograms0;
+    std::vector<Histogram> clustered_histograms1;
+    std::vector<Histogram> clustered_histograms2;
+    std::vector<Histogram> clustered_histograms3;
+    std::vector<Histogram> clustered_histograms4;
+    std::vector<Histogram> clustered_histograms_c0;
+    std::vector<Histogram> clustered_histograms_c1;
+    std::vector<Histogram> clustered_histograms_c2;
+    std::vector<Histogram> clustered_histograms_c3;
+    std::vector<Histogram> clustered_histograms_c4;
+    BitWriter* writer0 = nullptr;
+    BitWriter* writer1 = nullptr;
+    BitWriter* writer2 = nullptr;
+    BitWriter* writer3 = nullptr;
+    BitWriter* writer4 = nullptr;
+    size_t layer0 = 0;
+    size_t layer1 = 0;
+    size_t layer2 = 0;
+    size_t layer3 = 0;
+    size_t layer4 = 0;
+    size_t num_contexts0 = 1;
+    size_t num_contexts1 = 1;
+    size_t num_contexts2 = 1;
+    size_t num_contexts3 = 1;
+    size_t num_contexts4 = 1;
+    bool do_once[5] = {0, 0, 0, 0, 0};
+    char* do_inner = (char*)malloc(sizeof(char) * 8);
+    for (int i = 0; i < 5; i++) do_inner[i] = 0;
+    char* do_prefix_in = (char*)malloc(sizeof(char) * 8);
+    for (int i = 0; i < 5; i++) do_prefix_in[i] = 0;
+    char* do_prefix_out = (char*)malloc(sizeof(char) * 8);
+    for (int i = 0; i < 5; i++) do_prefix_out[i] = 0;
+
+    const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+    const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+
+    if (!is_small_image) {
+        group_codes_writer->init(200);
+        group_codes_writer->update_part(0);
+    } else {
+        group_codes_writer->init(200);
+        group_codes_writer->update_part(0);
+    }
+
+    bool all_default = true;
+    const float* dc_quant = (lossy_frame_encoder.State()->shared.matrices).DCQuants();
+    for (size_t c = 0; c < 3; c++) {
+        if (dc_quant[c] != kDCQuant[c]) {
+            all_default = false;
+        }
+    }
+    BitWriter::Allotment allotment(group_codes_writer, 1 + sizeof(float) * kBitsPerByte * 3);
+    group_codes_writer->Write(1, all_default);
+    if (!all_default) {
+        for (size_t c = 0; c < 3; c++) {
+            JXL_RETURN_IF_ERROR(F16Coder::Write(dc_quant[c] * 128.0f, group_codes_writer));
+        }
+    }
+    ReclaimAndCharge(group_codes_writer, &allotment, kLayerDequantTables, aux_out);
+
+    auto& dct = enc_state_->shared.block_ctx_map.dc_thresholds;
+    auto& qft = enc_state_->shared.block_ctx_map.qf_thresholds;
+    auto& ctx_map = enc_state_->shared.block_ctx_map.ctx_map;
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        JXL_RETURN_IF_ERROR(enc_state_->shared.quantizer.Encode(group_codes_writer, kLayerQuant, aux_out));
+        //============Encode GlobalDCInfo: Block Context Map=========
+        if (dct[0].empty() && dct[1].empty() && dct[2].empty() && qft.empty() && ctx_map.size() == 21 &&
+            std::equal(ctx_map.begin(), ctx_map.end(), jxl::kDefaultCtxMap)) {
+            group_codes_writer->Write(1, 1); // default
+        } else {
+            group_codes_writer->Write(1, 0);
+            for (int j : {0, 1, 2}) {
+                group_codes_writer->Write(4, dct[j].size());
+                for (int i : dct[j]) {
+                    JXL_CHECK(U32Coder::Write(kDCThresholdDist, PackSigned(i), group_codes_writer));
+                }
+            }
+            group_codes_writer->Write(4, qft.size());
+            for (uint32_t i : qft) {
+                JXL_CHECK(U32Coder::Write(kQFThresholdDist, i - 1, group_codes_writer));
+            }
+            for (size_t i = 0; i < ctx_map.size(); i++) {
+                bcm_tokens[0].emplace_back(0, ctx_map[i]);
+            }
+
+            {
+                std::vector<uint8_t> context_map = ctx_map;
+                BitWriter* writer = group_codes_writer;
+                writer0 = group_codes_writer;
+                size_t num_histograms = enc_state_->shared.block_ctx_map.num_ctxs;
+                if (num_histograms == 1) {
+                    // Simple code
+                    writer->Write(1, 1);
+                    // 0 bits per entry.
+                    writer->Write(2, 0);
+                } else {
+                    std::vector<std::vector<Token> > tokens(1);
+                    for (size_t i = 0; i < context_map.size(); i++) {
+                        tokens[0].emplace_back(0, context_map[i]);
+                    }
+
+                    size_t entry_bits = CeilLog2Nonzero(num_histograms);
+                    size_t simple_cost = entry_bits * context_map.size();
+                    if (entry_bits < 4) {
+                        writer->Write(1, 1);
+                        writer->Write(2, entry_bits);
+                        for (size_t i = 0; i < context_map.size(); i++) {
+                            writer->Write(entry_bits, context_map[i]);
+                        }
+                    } else {
+                        writer->Write(1, 0);
+                        writer->Write(1, 0);
+                        EntropyEncodingData context_codes0;
+                        std::vector<std::vector<Token> > context_tokens0(1);
+                        do_once[0] = true;
+                        num_contexts0 = 1;
+                        tokens0 = tokens;
+                        codes0 = bcm_codes;
+                        context_map0 = bcm_dummy_context_map;
+                        // codes_c0 = context_codes0;
+                        // writer0 = writer;
+                        layer0 = 0;
+
+                        // BuildAndEncodeHistogramsNew0
+                        // =========================================================
+                    }
+                }
+            }
+        }
+        //=============================
+        //============Encode GlobalDCInfo: Color Correlation Map=========
+        if (!is_small_image) {
+            group_codes_writer->update_part(20);
+        } else {
+            group_codes_writer->update_part(20);
+        }
+        ColorCorrelationMapEncodeDC(&enc_state_->shared.cmap, group_codes_writer, kLayerDC, aux_out);
+        //=============================
+    }
+
+    if (!is_small_image) {
+        group_codes_writer->update_part(30);
+    } else {
+        group_codes_writer->update_part(30);
+    }
+
+    writer1 = group_codes_writer;
+    writer2 = group_codes_writer;
+    BitWriter::Allotment allotmentGlobalInfo(group_codes_writer, 1);
+    // If we are using brotli, or not using modular mode.
+    if (modular_frame_encoder->tree_tokens.empty() || modular_frame_encoder->tree_tokens[0].empty()) {
+        group_codes_writer->Write(1, 0);
+        ReclaimAndCharge(group_codes_writer, &allotmentGlobalInfo, kLayerModularTree, aux_out);
+    } else {
+        group_codes_writer->Write(1, 1);
+        ReclaimAndCharge(group_codes_writer, &allotmentGlobalInfo, kLayerModularTree, aux_out);
+        // Write tree
+        if (cparams.speed_tier > SpeedTier::kKitten) {
+            params1.ans_histogram_strategy = HistogramParams::ANSHistogramStrategy::kApproximate;
+            params2.ans_histogram_strategy = HistogramParams::ANSHistogramStrategy::kApproximate;
+        }
+
+        if (cparams.decoding_speed_tier >= 1) {
+            params1.max_histograms = 12;
+            params2.max_histograms = 12;
+        }
+
+        EntropyEncodingData context_codes1;
+        std::vector<std::vector<Token> > context_tokens1(1);
+        std::vector<uint8_t> dummy_context_map1;
+
+        do_once[1] = true;
+        num_contexts1 = kNumTreeContexts;
+        tokens1 = modular_frame_encoder->tree_tokens;
+        codes1 = modularFramTree_code;
+        context_map1 = modularFramTree_ctxmap;
+        ////codes_c0 = context_codes0;
+        ////writer0 = writer;
+        layer1 = kLayerModularTree;
+
+        // BuildAndEncodeHistogramsNew1
+
+        if (!is_small_image) {
+            group_codes_writer->update_part(50);
+        } else {
+            group_codes_writer->update_part(50);
+        }
+        params2.image_widths = modular_frame_encoder->image_widths;
+        // Write histograms.
+        EntropyEncodingData context_codes2;
+        std::vector<std::vector<Token> > context_tokens2(1);
+        std::vector<uint8_t> dummy_context_map2;
+
+        do_once[2] = true;
+        num_contexts2 = (modular_frame_encoder->tree.size() + 1) / 2;
+        tokens2 = modular_frame_encoder->tokens;
+        codes2 = modular_frame_encoder->code;
+        context_map2 = modular_frame_encoder->context_map;
+        ////codes_c0 = context_codes0;
+        ////writer0 = writer;
+        layer2 = kLayerModularGlobal;
+
+        // BuildAndEncodeHistogramsNew2
+    }
+
+    //============================= Encode Global ACInfo =============
+    if (!is_small_image) {
+        acInfo_writer->init(200);
+        acInfo_writer->update_part(0);
+    } else {
+        acInfo_writer->update_part(80);
+    }
+    writer3 = acInfo_writer;
+    writer4 = acInfo_writer;
+
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        bool all_default = true;
+        const std::vector<QuantEncoding>& encodings = (enc_state_->shared.matrices).encodings();
+
+        for (size_t i = 0; i < encodings.size(); i++) {
+            if (encodings[i].mode != QuantEncoding::kQuantModeLibrary || encodings[i].predefined != 0) {
+                all_default = false;
+            }
+        }
+        // TODO(janwas): better bound
+        BitWriter::Allotment allotment(acInfo_writer, 512 * 1024);
+        acInfo_writer->Write(1, all_default);
+        ReclaimAndCharge(acInfo_writer, &allotment, kLayerDequantTables, aux_out);
+
+        size_t num_histo_bits = CeilLog2Nonzero(enc_state_->shared.frame_dim.num_groups);
+        if (num_histo_bits != 0) {
+            BitWriter::Allotment allotment(acInfo_writer, num_histo_bits);
+            acInfo_writer->Write(num_histo_bits, enc_state_->shared.num_histograms - 1);
+            ReclaimAndCharge(acInfo_writer, &allotment, kLayerAC, aux_out);
+        }
+
+        //============= encode coef orders========
+        // Encode coefficient orders.
+        uint16_t used_orders = enc_state_->used_orders[0];
+        size_t order_bits = 0;
+        JXL_RETURN_IF_ERROR(U32Coder::CanEncode(kOrderEnc, enc_state_->used_orders[0], &order_bits));
+        BitWriter::Allotment allotmentCoef(acInfo_writer, order_bits);
+        JXL_CHECK(U32Coder::Write(kOrderEnc, enc_state_->used_orders[0], acInfo_writer));
+        ReclaimAndCharge(acInfo_writer, &allotmentCoef, kLayerOrder, aux_out);
+
+        // Do not write anything if no order is used.
+        EntropyEncodingData context_codes3;
+        std::vector<std::vector<Token> > context_tokens3(1);
+        std::vector<uint8_t> dummy_context_map3;
+        do_once[3] = true;
+        num_contexts3 = kPermutationContexts;
+        tokens3 = coefOrders_tokens;
+        codes3 = coefOrders_codes;
+        context_map3 = coefOrders_context_map;
+        ////codes_c0 = context_codes0;
+        ////writer0 = writer;
+        layer3 = kLayerOrder;
+        // BuildAndEncodeHistogramsNew3
+
+        if (!is_small_image) {
+            acInfo_writer->update_part(20);
+        } else {
+            acInfo_writer->update_part(100);
+        }
+    }
+
+    std::vector<std::vector<Histogram> > histograms_(5);
+    histograms_[0].resize(num_contexts0);
+    histograms_[1].resize(num_contexts1);
+    histograms_[2].resize(num_contexts2);
+    histograms_[3].resize(num_contexts3);
+    histograms_[4].resize(enc_state_->shared.num_histograms * enc_state_->shared.block_ctx_map.NumACContexts());
+
+    std::vector<HistogramParams> params(5);
+    std::vector<size_t> num_contexts(5);
+    std::vector<size_t> layer(5);
+    std::vector<EntropyEncodingData*> codes(5);
+    std::vector<std::vector<uint8_t>*> context_map(5);
+    std::vector<EntropyEncodingData*> codes_c(5);
+    std::vector<BitWriter*> writer(5);
+    writer[0] = writer0;
+    writer[1] = writer1;
+    writer[2] = writer2;
+    writer[3] = writer3;
+    writer[4] = writer4;
+
+    std::vector<std::vector<uint32_t> > nonempty_histograms(5);
+    std::vector<uint32_t> largest_idx(5);
+
+    std::vector<std::vector<Histogram> > clustered_histograms(5);
+
+    std::vector<std::vector<Histogram> > clustered_histogramsin(5);
+    std::vector<std::vector<std::vector<Token> > > tokensin(5, std::vector<std::vector<Token> >(1));
+    std::vector<EntropyEncodingData> codesin(5);
+    std::vector<std::vector<uint8_t> > context_map_in(5);
+
+    constexpr float kMinDistanceForDistinctFast = 64.0f;
+    constexpr float kMinDistanceForDistinctBest = 16.0f;
+
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        do_once[4] = true;
+    }
+
+    // Build histograms.
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+        if (i == 0) {
+            params[0] = params0;
+            num_contexts[0] = num_contexts0;
+            layer[0] = layer0;
+            codes[0] = &codes0;
+            context_map[0] = &context_map0;
+            codes_c[0] = &codes_c0;
+        } else if (i == 1) {
+            params[1] = params1;
+            num_contexts[1] = num_contexts1;
+            layer[1] = layer1;
+            codes[1] = &codes1;
+            context_map[1] = &context_map1;
+            codes_c[1] = &codes_c1;
+        } else if (i == 2) {
+            params[2] = params2;
+            num_contexts[2] = num_contexts2;
+            layer[2] = layer2;
+            codes[2] = &codes2;
+            context_map[2] = &context_map2;
+            codes_c[2] = &codes_c2;
+        } else if (i == 3) {
+            params[3] = params3;
+            num_contexts[3] = num_contexts3;
+            layer[3] = layer3;
+            codes[3] = &codes3;
+            context_map[3] = &context_map3;
+            codes_c[3] = &codes_c3;
+        } else if (i == 4) {
+            params[4] = params4;
+            num_contexts[4] = num_contexts4;
+            layer[4] = kLayerAC;
+            codes[4] = &enc_state_->passes[0].codes;
+            context_map[4] = &enc_state_->passes[0].context_map;
+            codes_c[4] = &codes_c4;
+        }
+    }
+
+    acc_ANSinitHistogram(lossy_frame_encoder, frame_header, params, do_once, tokens0, tokens1, tokens2, tokens3,
+                         do_prefix_out, largest_idx, nonempty_histograms, histograms_);
+
+    uint32_t numHisto[5];
+    uint32_t numCtx[5];
+
+    std::vector<int32_t*> histograms_ptr(5);
+    std::vector<uint32_t*> histo_totalcnt_ptr(5);
+    std::vector<uint32_t*> histo_size_ptr(5);
+    std::vector<uint32_t*> nonempty_histo_ptr(5);
+
+    for (int i = 0; i < 5; i++) {
+        numHisto[i] = histograms_[i].size();
+        numCtx[i] = num_contexts[i];
+        histograms_ptr[i] = (int32_t*)malloc(4096 * 40 * sizeof(int32_t));
+        memset(histograms_ptr[i], 0, 4096 * 40 * sizeof(int32_t));
+        histo_totalcnt_ptr[i] = (uint32_t*)malloc(4096 * sizeof(uint32_t));
+        memset(histo_totalcnt_ptr[i], 0, 4096 * sizeof(int32_t));
+        histo_size_ptr[i] = (uint32_t*)malloc(4096 * sizeof(int32_t));
+        memset(histo_size_ptr[i], 0, 4096 * sizeof(uint32_t));
+        nonempty_histo_ptr[i] = (uint32_t*)malloc(4096 * sizeof(uint32_t));
+        memset(nonempty_histo_ptr[i], 0, 4096 * sizeof(uint32_t));
+        for (int j = 0; j < histograms_[i].size(); j++) {
+            histo_totalcnt_ptr[i][j] = histograms_[i][j].total_count_;
+            histo_size_ptr[i][j] = histograms_[i][j].data_.size();
+            for (int k = 0; k < histograms_[i][j].data_.size(); k++) {
+                histograms_ptr[i][j * 40 + k] = histograms_[i][j].data_[k];
+            }
+        }
+
+        for (int j = 0; j < nonempty_histograms[i].size(); j++) {
+            nonempty_histo_ptr[i][j] = nonempty_histograms[i][j];
+        }
+    }
+
+    uint32_t numHisto_clusd[5];
+    uint32_t histo_size_clusdin[5] = {0, 0, 0, 0, 0};
+
+    std::vector<uint8_t*> ctx_map_ptr(5);
+    std::vector<int32_t*> histograms_clusd_ptr(5);
+    std::vector<uint32_t*> histo_size_clusd_ptr(5);
+    std::vector<int32_t*> histograms_clusdin_ptr(5);
+    for (int i = 0; i < 5; i++) {
+        ctx_map_ptr[i] = (uint8_t*)malloc(4096 * sizeof(uint8_t));
+        memset(ctx_map_ptr[i], 0, 4096 * sizeof(uint8_t));
+        histograms_clusd_ptr[i] = (int32_t*)malloc(128 * 40 * sizeof(int32_t));
+        memset(histograms_clusd_ptr[i], 0, 128 * 40 * sizeof(int32_t));
+        histo_size_clusd_ptr[i] = (uint32_t*)malloc(128 * sizeof(uint32_t));
+        memset(histo_size_clusd_ptr[i], 0, 128 * sizeof(uint32_t));
+        histograms_clusdin_ptr[i] = (int32_t*)malloc(4096 * sizeof(int32_t));
+        memset(histograms_clusdin_ptr[i], 0, 4096 * sizeof(int32_t));
+    }
+
+    uint32_t* config = (uint32_t*)malloc(35 * sizeof(uint32_t));
+    memset(config, 0, 35 * sizeof(uint32_t));
+
+    config[0] = histograms_[0].size();
+    config[1] = histograms_[1].size();
+    config[2] = histograms_[2].size();
+    config[3] = histograms_[3].size();
+    config[4] = histograms_[4].size();
+    config[5] = nonempty_histograms[0].size();
+    config[6] = nonempty_histograms[1].size();
+    config[7] = nonempty_histograms[2].size();
+    config[8] = nonempty_histograms[3].size();
+    config[9] = nonempty_histograms[4].size();
+    config[10] = largest_idx[0];
+    config[11] = largest_idx[1];
+    config[12] = largest_idx[2];
+    config[13] = largest_idx[3];
+    config[14] = largest_idx[4];
+
+    config[25] = do_once[0];
+    config[26] = do_once[1];
+    config[27] = do_once[2];
+    config[28] = do_once[3];
+    config[29] = do_once[4];
+
+// clang-format off
+#ifndef HLS_TEST
+  hls_ANSclusterHistogram_wrapper(
+    xclbinPath,
+    config,
+    //======= 
+    histograms_ptr[0],
+    histo_totalcnt_ptr[0],
+    histo_size_ptr[0],
+    nonempty_histo_ptr[0],
+    ctx_map_ptr[0],
+    histograms_clusd_ptr[0],
+    histo_size_clusd_ptr[0],
+    histograms_clusdin_ptr[0],
+    //========
+    histograms_ptr[1],
+    histo_totalcnt_ptr[1],
+    histo_size_ptr[1],
+    nonempty_histo_ptr[1],
+    ctx_map_ptr[1],
+    histograms_clusd_ptr[1],
+    histo_size_clusd_ptr[1],
+    histograms_clusdin_ptr[1],
+    //=======
+    histograms_ptr[2],
+    histo_totalcnt_ptr[2],
+    histo_size_ptr[2],
+    nonempty_histo_ptr[2],
+    ctx_map_ptr[2],
+    histograms_clusd_ptr[2],
+    histo_size_clusd_ptr[2],
+    histograms_clusdin_ptr[2],
+    //=======
+    histograms_ptr[3],
+    histo_totalcnt_ptr[3],
+    histo_size_ptr[3],
+    nonempty_histo_ptr[3],
+    ctx_map_ptr[3],
+    histograms_clusd_ptr[3],
+    histo_size_clusd_ptr[3],
+    histograms_clusdin_ptr[3],
+    //======
+    histograms_ptr[4],
+    histo_totalcnt_ptr[4],
+    histo_size_ptr[4],
+    nonempty_histo_ptr[4],
+    ctx_map_ptr[4],
+    histograms_clusd_ptr[4],
+    histo_size_clusd_ptr[4],
+    histograms_clusdin_ptr[4]
+);
+#else
+  acc_ANSclusterHistogram(config, 
+    histograms_ptr[0],
+    histo_totalcnt_ptr[0],
+    histo_size_ptr[0],
+
+    nonempty_histo_ptr[0],
+
+    ctx_map_ptr[0],
+
+    histograms_clusd_ptr[0],
+    histo_size_clusd_ptr[0],
+
+    histograms_clusdin_ptr[0],
+    //========
+    histograms_ptr[1],
+    histo_totalcnt_ptr[1],
+    histo_size_ptr[1],
+
+    nonempty_histo_ptr[1],
+
+    ctx_map_ptr[1],
+
+    histograms_clusd_ptr[1],
+    histo_size_clusd_ptr[1],
+
+    histograms_clusdin_ptr[1],
+    //=======
+    histograms_ptr[2],
+    histo_totalcnt_ptr[2],
+    histo_size_ptr[2],
+
+    nonempty_histo_ptr[2],
+
+    ctx_map_ptr[2],
+
+    histograms_clusd_ptr[2],
+    histo_size_clusd_ptr[2],
+
+    histograms_clusdin_ptr[2],
+    //=======
+    histograms_ptr[3],
+    histo_totalcnt_ptr[3],
+    histo_size_ptr[3],
+
+    nonempty_histo_ptr[3],
+
+    ctx_map_ptr[3],
+
+    histograms_clusd_ptr[3],
+    histo_size_clusd_ptr[3],
+
+    histograms_clusdin_ptr[3],
+    //======
+    histograms_ptr[4],
+    histo_totalcnt_ptr[4],
+    histo_size_ptr[4],
+
+    nonempty_histo_ptr[4],
+
+    ctx_map_ptr[4],
+
+    histograms_clusd_ptr[4],
+    histo_size_clusd_ptr[4],
+
+    histograms_clusdin_ptr[4]
+);
+#endif
+    // clang-format on
+
+    numHisto_clusd[0] = config[15];
+    numHisto_clusd[1] = config[16];
+    numHisto_clusd[2] = config[17];
+    numHisto_clusd[3] = config[18];
+    numHisto_clusd[4] = config[19];
+    histo_size_clusdin[0] = config[20];
+    histo_size_clusdin[1] = config[21];
+    histo_size_clusdin[2] = config[22];
+    histo_size_clusdin[3] = config[23];
+    histo_size_clusdin[4] = config[24];
+
+    for (int i = 0; i < 5; i++) {
+        do_inner[i] = 0;
+        if (histograms_[i].size() > 1) {
+            if (numHisto_clusd[i] == 1) {
+            } else {
+                size_t entry_bits = CeilLog2Nonzero(numHisto_clusd[i]);
+                if (entry_bits < 4) {
+                } else {
+                    do_inner[i] = 1;
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+
+        if (do_inner[i]) {
+            clustered_histogramsin[i].resize(1);
+            clustered_histogramsin[i][0].data_.resize(histo_size_clusdin[i]);
+            for (int j = 0; j < histo_size_clusdin[i]; j++) {
+                clustered_histogramsin[i][0].data_[j] = histograms_clusdin_ptr[i][j];
+            }
+            context_map_in[i].resize(histo_size_clusdin[i]);
+        }
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+        size_t histograms_size = numHisto[i];
+        if (histograms_size > 1) {
+            if (writer[i] != nullptr) {
+                size_t num_histograms = numHisto_clusd[i];
+                if (num_histograms == 1) {
+                } else {
+                    for (size_t j = 0; j < numHisto[i]; j++) {
+                        tokensin[i][0].emplace_back(0, ctx_map_ptr[i][j]);
+                    }
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+        if (numHisto[i] > 1) {
+            clustered_histograms[i].resize(numHisto_clusd[i]);
+            for (int j = 0; j < numHisto_clusd[i]; j++) {
+                clustered_histograms[i][j].data_.resize(histo_size_clusd_ptr[i][j]);
+                for (int k = 0; k < histo_size_clusd_ptr[i][j]; k++) {
+                    clustered_histograms[i][j].data_[k] = histograms_clusd_ptr[i][j * 40 + k];
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+
+        codes[i]->lz77.nonserialized_distance_context = num_contexts[i];
+        codes[i]->lz77.enabled = false;
+        codes[i]->lz77.min_symbol = 224;
+        codes[i]->encoding_info.clear();
+        if (do_inner[i]) {
+            codesin[i].lz77.nonserialized_distance_context = 1;
+            codesin[i].lz77.enabled = false;
+            codesin[i].lz77.min_symbol = 224;
+            codesin[i].encoding_info.clear();
+        }
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+        context_map[i]->resize(numHisto[i]);
+        if (numHisto[i] > 1) {
+            for (size_t c = 0; c < numHisto[i]; ++c) {
+                (*context_map[i])[c] = ctx_map_ptr[i][c];
+            }
+        }
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+        if (i == 0) {
+            tokens_c0 = tokensin[i];
+            codes_c0 = codesin[i];
+            context_map_c0 = context_map_in[i];
+            clustered_histograms0 = clustered_histograms[i];
+            clustered_histograms_c0 = clustered_histogramsin[i];
+        } else if (i == 1) {
+            tokens_c1 = tokensin[i];
+            codes_c1 = codesin[i];
+            context_map_c1 = context_map_in[i];
+            clustered_histograms1 = clustered_histograms[i];
+            clustered_histograms_c1 = clustered_histogramsin[i];
+        } else if (i == 2) {
+            tokens_c2 = tokensin[i];
+            codes_c2 = codesin[i];
+            context_map_c2 = context_map_in[i];
+            clustered_histograms2 = clustered_histograms[i];
+            clustered_histograms_c2 = clustered_histogramsin[i];
+        } else if (i == 3) {
+            tokens_c3 = tokensin[i];
+            codes_c3 = codesin[i];
+            context_map_c3 = context_map_in[i];
+            clustered_histograms3 = clustered_histograms[i];
+            clustered_histograms_c3 = clustered_histogramsin[i];
+        } else if (i == 4) {
+            tokens_c4 = tokensin[i];
+            codes_c4 = codesin[i];
+            context_map_c4 = context_map_in[i];
+            clustered_histograms4 = clustered_histograms[i];
+            clustered_histograms_c4 = clustered_histogramsin[i];
+        }
+        do_prefix_in[i] = 0;
+    }
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+
+        if (i == 0) {
+            if (!is_small_image) {
+                writer[0]->update_part(1);
+            } else {
+                writer[0]->update_part(1);
+            }
+
+        } else if (i == 1) {
+            if (!is_small_image) {
+                writer[1]->update_part(31);
+            } else {
+                writer[1]->update_part(31);
+            }
+        } else if (i == 2) {
+            if (!is_small_image) {
+                writer[2]->update_part(51);
+            } else {
+                writer[2]->update_part(51);
+            }
+        } else if (i == 3) {
+            if (!is_small_image) {
+                writer[3]->update_part(1);
+            } else {
+                writer[3]->update_part(81);
+            }
+        } else if (i == 4) {
+            if (!is_small_image) {
+                writer[4]->update_part(21);
+            } else {
+                writer[4]->update_part(101);
+            }
+        }
+
+        size_t histograms_size = numHisto[i];
+
+        const size_t max_contexts = std::min((size_t)numCtx[i], kClustersLimit);
+        BitWriter::Allotment allotment(writer[i], 128 + numCtx[i] * 40 + max_contexts * 96);
+        if (writer[i]) {
+            LZ77Params lz77;
+            lz77.nonserialized_distance_context = numCtx[i];
+            lz77.enabled = false;
+            lz77.min_symbol = 224;
+            JXL_CHECK(Bundle::Write(lz77 /*codes[i]->lz77*/, writer[i], layer[i], nullptr));
+        }
+
+        if (histograms_size > 1) {
+            size_t num_histograms = numHisto_clusd[i];
+            if (writer[i] != nullptr) {
+                if (num_histograms == 1) {
+                    writer[i]->Write(1, 1);
+                    writer[i]->Write(2, 0);
+                } else {
+                    size_t entry_bits = CeilLog2Nonzero(num_histograms);
+                    if (entry_bits < 4) {
+                        writer[i]->Write(1, 1);
+                        writer[i]->Write(2, entry_bits);
+                        for (size_t j = 0; j < numHisto[i]; j++) {
+                            writer[i]->Write(entry_bits, ctx_map_ptr[i][j]);
+                        }
+                    } else {
+                        writer[i]->Write(1, 0);
+                        writer[i]->Write(1, 0);
+                    }
+                }
+            }
+        }
+        // StoreEntropyCodesNew
+        allotment.FinishedHistogram(writer[i]);
+        ReclaimAndCharge(writer[i], &allotment, layer[i], nullptr);
+
+        if (do_inner[i]) {
+            // do inner ontext map = true
+            BitWriter::Allotment allotment(writer[i], 128 + 1 * 40 + 96);
+            LZ77Params lz77;
+            lz77.nonserialized_distance_context = 1;
+            lz77.enabled = false;
+            lz77.min_symbol = 224;
+            JXL_CHECK(Bundle::Write(lz77 /*codesin[i].lz77*/, writer[i], 0, nullptr));
+
+            // StoreEntropyCodesNew
+            // WriteToken
+            allotment.FinishedHistogram(writer[i]);
+            ReclaimAndCharge(writer[i], &allotment, 0, nullptr);
+        }
+    }
+
+    // ==============================================
+    // Do StoreEntropyCodes for outer histogram
+    // ==============================================
+    // printf("do_prefix_out = %d, %d, %d, %d, %d\n", do_prefix_out[0],
+    // do_prefix_out[1], do_prefix_out[2], do_prefix_out[3], do_prefix_out[4]);
+
+    if (do_once[0]) {
+        if (!is_small_image) {
+            writer0->update_part(4);
+        } else {
+            writer0->update_part(4);
+        }
+        StoreEntropyCodesNew(params0, tokens0, &codes0, do_prefix_out[0], writer0, layer0, nullptr,
+                             clustered_histograms0);
+        bcm_codes = codes0;
+        bcm_dummy_context_map = context_map0;
+    }
+    if (do_once[1]) {
+        if (!is_small_image) {
+            writer1->update_part(34);
+        } else {
+            writer1->update_part(34);
+        }
+        StoreEntropyCodesNew(params1, tokens1, &codes1, do_prefix_out[1], writer1, layer1, nullptr,
+                             clustered_histograms1);
+        modularFramTree_code = codes1;
+        modularFramTree_ctxmap = context_map1;
+    }
+    if (do_once[2]) {
+        if (!is_small_image) {
+            writer2->update_part(54);
+        } else {
+            writer2->update_part(54);
+        }
+        StoreEntropyCodesNew(params2, tokens2, &codes2, do_prefix_out[2], writer2, layer2, nullptr,
+                             clustered_histograms2);
+        modular_frame_encoder->code = codes2;
+        modular_frame_encoder->context_map = context_map2;
+    }
+    if (do_once[3]) {
+        if (!is_small_image) {
+            writer3->update_part(4);
+        } else {
+            writer3->update_part(84);
+        }
+        StoreEntropyCodesNew(params3, tokens3, &codes3, do_prefix_out[3], writer3, layer3, nullptr,
+                             clustered_histograms3);
+        coefOrders_codes = codes3;
+        coefOrders_context_map = context_map3;
+    }
+    if (do_once[4]) {
+        if (!is_small_image) {
+            writer4->update_part(24);
+        } else {
+            writer4->update_part(104);
+        }
+        StoreEntropyCodesNew(params4, tokens4, &codes4, do_prefix_out[4], writer4, layer4, nullptr,
+                             clustered_histograms4);
+        enc_state_->passes[0].codes = codes4;
+        enc_state_->passes[0].context_map = context_map4;
+    }
+
+    // ==============================================
+    // Do StoreEntropyCodes for inner histogram
+    // ==============================================
+    // printf("do_prefix_in = %d, %d, %d, %d, %d\n", do_prefix_in[0],
+    // do_prefix_in[1], do_prefix_in[2], do_prefix_in[3], do_prefix_in[4]);
+
+    if (do_inner[0]) {
+        if (!is_small_image) {
+            writer0->update_part(2);
+        } else {
+            writer0->update_part(2);
+        }
+        StoreEntropyCodesNew(params0, tokens_c0, &codes_c0, do_prefix_in[0], writer0, 0, nullptr,
+                             clustered_histograms_c0);
+    }
+    if (do_inner[1]) {
+        if (!is_small_image) {
+            writer1->update_part(32);
+        } else {
+            writer1->update_part(32);
+        }
+        StoreEntropyCodesNew(params1, tokens_c1, &codes_c1, do_prefix_in[1], writer1, 0, nullptr,
+                             clustered_histograms_c1);
+    }
+    if (do_inner[2]) {
+        if (!is_small_image) {
+            writer2->update_part(52);
+        } else {
+            writer2->update_part(52);
+        }
+        StoreEntropyCodesNew(params2, tokens_c2, &codes_c2, do_prefix_in[2], writer2, 0, nullptr,
+                             clustered_histograms_c2);
+    }
+    if (do_inner[3]) {
+        if (!is_small_image) {
+            writer3->update_part(2);
+        } else {
+            writer3->update_part(82);
+        }
+        StoreEntropyCodesNew(params3, tokens_c3, &codes_c3, do_prefix_in[3], writer3, 0, nullptr,
+                             clustered_histograms_c3);
+    }
+    if (do_inner[4]) {
+        if (!is_small_image) {
+            writer4->update_part(22);
+        } else {
+            writer4->update_part(102);
+        }
+        StoreEntropyCodesNew(params4, tokens_c4, &codes_c4, do_prefix_in[4], writer4, 0, nullptr,
+                             clustered_histograms_c4);
+    }
+
+    // ==============================================
+    // Do WriteTokens for inner histogram
+    // ==============================================
+    // printf("do_inner = %d, %d, %d, %d, %d\n", do_inner[0], do_inner[1],
+    // do_inner[2], do_inner[3], do_inner[4]);
+    if (do_inner[0]) {
+        if (!is_small_image) {
+            writer0->update_part(3);
+        } else {
+            writer0->update_part(3);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c0[0], codes_c0, context_map_c0, writer0);
+    }
+    if (do_inner[1]) {
+        if (!is_small_image) {
+            writer1->update_part(33);
+        } else {
+            writer1->update_part(33);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu, context_map.size=%d\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c1[0], codes_c1, context_map_c1, writer1);
+    }
+    if (do_inner[2]) {
+        if (!is_small_image) {
+            writer2->update_part(53);
+        } else {
+            writer2->update_part(53);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu, context_map.size=%d\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c2[0], codes_c2, context_map_c2, writer2);
+    }
+    if (do_inner[3]) {
+        if (!is_small_image) {
+            writer3->update_part(3);
+        } else {
+            writer3->update_part(83);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu, context_map.size=%d\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c3[0], codes_c3, context_map_c3, writer3);
+    }
+    if (do_inner[4]) {
+        if (!is_small_image) {
+            writer4->update_part(23);
+        } else {
+            writer4->update_part(103);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu, context_map.size=%d\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c4[0], codes_c4, context_map_c4, writer4);
+    }
+    return true;
+}
+
+Status acc_ANS_tokens(LossyFrameEncoder& lossy_frame_encoder,
+                      std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                      const size_t num_groups,
+                      PassesEncoderState* passes_enc_state,
+                      FrameDimensions frame_dim,
+                      std::unique_ptr<FrameHeader>& frame_header,
+                      std::vector<std::vector<Token> >& coefOrders_tokens,
+                      std::vector<BitWriter>& group_codes,
+                      BitWriter* group_codes_writer,
+                      BitWriter* acInfo_writer,
+                      std::vector<BitWriter*>& dc_group_writers,
+                      std::vector<BitWriter*>& acGroupWriters,
+                      size_t& ans_cost,
+                      size_t& mtf_cost,
+                      std::vector<std::vector<Token> >& bcm_tokens,
+                      std::vector<std::vector<Token> >& bcm_mtf_tokens,
+                      EntropyEncodingData& bcm_codes,
+                      std::vector<uint8_t>& bcm_dummy_context_map,
+
+                      EntropyEncodingData& modularFramTree_code,
+                      std::vector<uint8_t>& modularFramTree_ctxmap,
+
+                      EntropyEncodingData& coefOrders_codes,
+                      std::vector<uint8_t>& coefOrders_context_map,
+                      std::vector<AuxOut>& aux_outs,
+                      AuxOut* aux_out) {
+    PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+    PassesSharedState& shared = enc_state_->shared;
+    const size_t global_ac_index = frame_dim.num_dc_groups + 1;
+
+    const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+    const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+
+    const bool has_ac_global = true;
+
+    auto& dct = enc_state_->shared.block_ctx_map.dc_thresholds;
+    auto& qft = enc_state_->shared.block_ctx_map.qf_thresholds;
+    auto& ctx_map = enc_state_->shared.block_ctx_map.ctx_map;
+
+    //============ANSWriteTokens Encode GlobalDCInfo: Block Context Map=========
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        if (dct[0].empty() && dct[1].empty() && dct[2].empty() && qft.empty() && ctx_map.size() == 21 &&
+            std::equal(ctx_map.begin(), ctx_map.end(), jxl::kDefaultCtxMap)) {
+        } else {
+            if (enc_state_->shared.block_ctx_map.num_ctxs == 1) {
+            } else {
+                size_t entry_bits = CeilLog2Nonzero(enc_state_->shared.block_ctx_map.num_ctxs);
+                size_t simple_cost = entry_bits * ctx_map.size();
+                if (entry_bits < 4 /* && simple_cost < ans_cost &&
+            simple_cost < mtf_cost*/) {
+                } else {
+                    if (!is_small_image) {
+                        group_codes_writer->update_part(10);
+                    } else {
+                        group_codes_writer->update_part(10);
+                    }
+                    WriteTokens(bcm_tokens[0], bcm_codes, bcm_dummy_context_map, group_codes_writer);
+                }
+            }
+            BitWriter::Allotment allotmentGlobalDCInfoBCM(
+                group_codes_writer, (dct[0].size() + dct[1].size() + dct[2].size() + qft.size()) * 34 + 1 + 4 + 4 +
+                                        ctx_map.size() * 10 + 1024);
+            ReclaimAndCharge(group_codes_writer, &allotmentGlobalDCInfoBCM, kLayerAC, aux_out);
+        }
+    }
+
+    //============ANSWriteTokens Encode GlobalDCInfo: modular frame tree=========
+    if (modular_frame_encoder->tree_tokens.empty() || modular_frame_encoder->tree_tokens[0].empty()) {
+    } else {
+        if (!is_small_image) {
+            group_codes_writer->update_part(40);
+        } else {
+            group_codes_writer->update_part(40);
+        }
+        WriteTokens(modular_frame_encoder->tree_tokens[0], modularFramTree_code, modularFramTree_ctxmap,
+                    group_codes_writer, kLayerModularTree, aux_out);
+    }
+
+    //============ANSWriteTokens Encode GlobalDCInfo: modular frame token=========
+    if (!is_small_image) {
+        group_codes_writer->update_part(60);
+    } else {
+        group_codes_writer->update_part(60);
+    }
+    size_t stream_id = ModularStreamId::Global().ID(frame_dim);
+    if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+        // Image with no channels, header never gets decoded.
+    } else {
+        JXL_RETURN_IF_ERROR(Bundle::Write(modular_frame_encoder->stream_headers[stream_id], group_codes_writer,
+                                          kLayerModularGlobal, aux_out));
+        WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                    modular_frame_encoder->context_map, group_codes_writer, kLayerModularGlobal, aux_out);
+    }
+
+    //=============================
+
+    //============================= ANSWriteTokens DC group=============
+    for (int group_index = 0; group_index < frame_dim.num_dc_groups; group_index++) {
+        BitWriter* tmp = get_output(group_index + 1, group_codes, is_small_image);
+        dc_group_writers.emplace_back(tmp);
+        if (!is_small_image) {
+            tmp->init(200);
+            tmp->update_part(0);
+        } else {
+            tmp->update_part(70);
+        }
+    }
+
+    for (int group_index = 0; group_index < frame_dim.num_dc_groups; group_index++) {
+        AuxOut* my_aux_out = aux_out ? &aux_outs[0] : nullptr;
+        BitWriter* output = dc_group_writers[group_index];
+        if (frame_header->encoding == FrameEncoding::kVarDCT && !(frame_header->flags & FrameHeader::kUseDcFrame)) {
+            BitWriter::Allotment allotment(output, 2);
+            output->Write(2, modular_frame_encoder->extra_dc_precision[group_index]);
+            ReclaimAndCharge(output, &allotment, kLayerDC, my_aux_out);
+            size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim);
+            if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+                // Image with no channels, header never gets decoded.
+            } else {
+                Bundle::Write(modular_frame_encoder->stream_headers[stream_id], output, kLayerDC, aux_out);
+                WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                            modular_frame_encoder->context_map, output, kLayerDC, my_aux_out);
+            }
+        }
+
+        size_t stream_id = ModularStreamId::ModularDC(group_index).ID(frame_dim);
+        if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+            // Image with no channels, header never gets decoded.
+        } else {
+            Bundle::Write(modular_frame_encoder->stream_headers[stream_id], output, kLayerModularDcGroup, aux_out);
+            WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                        modular_frame_encoder->context_map, output, kLayerModularDcGroup, my_aux_out);
+        }
+
+        if (frame_header->encoding == FrameEncoding::kVarDCT) {
+            const Rect& rect = lossy_frame_encoder.State()->shared.DCGroupRect(group_index);
+            size_t nb_bits = CeilLog2Nonzero(rect.xsize() * rect.ysize());
+            if (nb_bits != 0) {
+                BitWriter::Allotment allotment(output, nb_bits);
+                output->Write(nb_bits, modular_frame_encoder->ac_metadata_size[group_index] - 1);
+                ReclaimAndCharge(output, &allotment, kLayerControlFields, my_aux_out);
+            }
+            size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim);
+            if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+                // Image with no channels, header never gets decoded.
+            } else {
+                Bundle::Write(modular_frame_encoder->stream_headers[stream_id], output, kLayerControlFields, aux_out);
+                WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                            modular_frame_encoder->context_map, output, kLayerControlFields, my_aux_out);
+            }
+        }
+    };
+
+    //============================= ANSWriteTokens AC Info=============
+    for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses(); i++) {
+        uint16_t used_orders = enc_state_->used_orders[i];
+        if (used_orders != 0) {
+            if (!is_small_image) {
+                acInfo_writer->update_part(19);
+            } else {
+                acInfo_writer->update_part(90);
+            }
+            WriteTokens(coefOrders_tokens[0], coefOrders_codes, coefOrders_context_map, acInfo_writer, kLayerOrder,
+                        aux_out);
+        }
+    }
+
+    //==========================================
+    if (!is_small_image) {
+        acInfo_writer->update_part(29);
+    } else {
+        acInfo_writer->update_part(109);
+    }
+    //===============
+
+    //========================Encode AC Group=============
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        for (size_t i = 0; i < num_passes; i++) {
+            BitWriter* tmp =
+                get_output(AcGroupIndex(i, group_index, frame_dim.num_groups, frame_dim.num_dc_groups, has_ac_global),
+                           group_codes, is_small_image);
+            acGroupWriters.emplace_back(tmp);
+        }
+    }
+
+    int sum = 0;
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        AuxOut* my_aux_out = aux_out ? &aux_outs[0] : nullptr;
+        for (size_t i = 0; i < num_passes; i++) {
+            BitWriter* acGroupWriter = acGroupWriters[group_index * num_passes + i];
+            if (frame_header->encoding == FrameEncoding::kVarDCT) {
+                // Select which histogram to use among those of the current pass.
+                const size_t num_histograms = enc_state_->shared.num_histograms;
+                // num_histograms is 0 only for lossless.
+                JXL_ASSERT(num_histograms == 0 || enc_state_->histogram_idx[group_index] < num_histograms);
+                size_t histo_selector_bits = CeilLog2Nonzero(num_histograms);
+
+                if (histo_selector_bits != 0) {
+                    BitWriter::Allotment allotment(acGroupWriter, histo_selector_bits);
+                    acGroupWriter->Write(histo_selector_bits, enc_state_->histogram_idx[group_index]);
+                    ReclaimAndCharge(acGroupWriter, &allotment, kLayerAC, aux_out);
+                }
+                sum = sum + enc_state_->passes[i].ac_tokens[group_index].size();
+                WriteTokens(enc_state_->passes[i].ac_tokens[group_index], enc_state_->passes[i].codes,
+                            enc_state_->passes[i].context_map, acGroupWriter, kLayerACTokens, aux_out);
+            }
+
+            size_t stream_id = ModularStreamId::ModularAC(group_index, i).ID(frame_dim);
+            if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+                // Image with no channels, header never gets decoded.
+            } else {
+                Bundle::Write(modular_frame_encoder->stream_headers[stream_id], acGroupWriter, kLayerModularAcGroup,
+                              aux_out);
+                WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                            modular_frame_encoder->context_map, acGroupWriter, kLayerModularAcGroup, aux_out);
+            }
+        }
+    }
+    //=====================
+
+    return true;
+}
+
+Status acc_writeout(LossyFrameEncoder& lossy_frame_encoder,
+                    const size_t num_groups,
+                    PassesEncoderState* passes_enc_state,
+                    std::unique_ptr<FrameHeader>& frame_header,
+                    FrameDimensions frame_dim,
+                    std::vector<BitWriter>& group_codes,
+                    BitWriter* writer,
+                    BitWriter* group_codes_writer,
+                    BitWriter* acInfo_writer,
+                    std::vector<BitWriter*>& dc_group_writers,
+                    std::vector<BitWriter*>& acGroupWriters,
+                    AuxOut* aux_out,
+                    const std::function<Status(size_t)>& resize_aux_outs) {
+    const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+    const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+
+    writer->AppendByteAligned(lossy_frame_encoder.State()->special_frames);
+    frame_header->UpdateFlag(lossy_frame_encoder.State()->shared.image_features.patches.HasAny(),
+                             FrameHeader::kPatches);
+    frame_header->UpdateFlag(lossy_frame_encoder.State()->shared.image_features.splines.HasAny(),
+                             FrameHeader::kSplines);
+    JXL_RETURN_IF_ERROR(WriteFrameHeader(*frame_header, writer, aux_out));
+
+    // Resizing aux_outs to 0 also Assimilates the array.
+    std::atomic<int> num_errors{0};
+    static_cast<void>(resize_aux_outs(0));
+    JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+    for (BitWriter& bw : group_codes) {
+        bw.ZeroPadToByte(); // end of group.
+    }
+
+    if (is_small_image) {
+        std::vector<int> group_codes_seq{0,  1,  2,  3,  4,  10, 19, 20, 29, 30, 31, 32,  33,  34,  40,  50,  51,
+                                         52, 53, 54, 60, 70, 80, 81, 82, 83, 84, 90, 100, 101, 102, 103, 104, 109};
+        group_codes_writer->Finalize(group_codes_seq);
+        //  group_codes_writer->Finalize();
+    } else {
+        // std::cout << "===============Group Codes writer Final=================="
+        //          << std::endl;
+        std::vector<int> group_codes_seq{0, 1, 2, 3, 4, 10, 19, 20, 29, 30, 31, 32, 33, 34, 40, 50, 51, 52, 53, 54, 60};
+        group_codes_writer->Finalize(group_codes_seq);
+        //    group_codes_writer->Finalize();
+        std::vector<int> dc_group_seq{0};
+        for (int group_index = 0; group_index < frame_dim.num_dc_groups; group_index++) {
+            dc_group_writers[group_index]->Finalize(dc_group_seq);
+            //        dc_group_writers[group_index]->Finalize();
+        }
+        // std::cout << "===============AC Info writer Final=================="
+        //          << std::endl;
+        std::vector<int> acInfo_seq{0, 1, 2, 3, 4, 10, 19, 20, 21, 22, 23, 24, 29};
+        acInfo_writer->Finalize(acInfo_seq);
+        //  acInfo_writer->Finalize();
+        std::vector<int> acGroup_seq{0};
+        for (int group_index = 0; group_index < num_groups; group_index++) {
+            for (size_t i = 0; i < num_passes; i++) {
+                acGroupWriters[group_index * num_passes + i]->Finalize(acGroup_seq);
+                //           acGroupWriters[group_index * num_passes + i]->Finalize();
+            }
+        }
+    }
+    //  std::cout << "===============Others writer Final=================="
+    //            << std::endl;
+    BitWriter::Allotment allotmentGrpOffset(writer, MaxBits(group_codes.size()));
+    writer->Write(1, 0); // no permutation
+    std::vector<int> write_seq{0};
+    //  writer->Finalize(write_seq);
+    writer->Finalize();
+    //  }
+    writer->ZeroPadToByte(); // before TOC entries
+
+    for (size_t i = 0; i < group_codes.size(); i++) {
+        JXL_ASSERT(group_codes[i].BitsWritten() % kBitsPerByte == 0);
+        const size_t group_size = group_codes[i].BitsWritten() / kBitsPerByte;
+        JXL_RETURN_IF_ERROR(U32Coder::Write(kTocDist, group_size, writer));
+    }
+    //  writer->Finalize(write_seq);
+    writer->Finalize();
+    writer->ZeroPadToByte(); // before first group
+    ReclaimAndCharge(writer, &allotmentGrpOffset, kLayerTOC, aux_out);
+
+    writer->AppendByteAligned(group_codes);
+    writer->ZeroPadToByte(); // end of frame.
+
+    return true;
+}
+
+Status acc_phase3(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  PassesEncoderState* passes_enc_state,
+                  FrameDimensions frame_dim,
+                  BitWriter* writer,
+                  const size_t num_groups,
+                  AuxOut* aux_out,
+                  ThreadPool* pool,
+                  std::vector<AuxOut>& aux_outs,
+                  const ImageBundle& ib,
+                  const std::function<Status(size_t)>& resize_aux_outs) {
+    //  std::cout << "===========acc_kernel3 start================" << std::endl;
+    std::vector<std::vector<Token> > coefOrders_tokens(1);
+
+    const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+
+    // DC global info + DC groups + AC global info + AC groups *
+    // num_passes.
+    const bool has_ac_global = true;
+    std::vector<BitWriter> group_codes(
+        NumTocEntries(frame_dim.num_groups, frame_dim.num_dc_groups, num_passes, has_ac_global));
+    const size_t global_ac_index = frame_dim.num_dc_groups + 1;
+    const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+
+    BitWriter* group_codes_writer = get_output(0, group_codes, is_small_image);
+    BitWriter* acInfo_writer = get_output(global_ac_index, group_codes, is_small_image);
+
+    std::vector<std::vector<Token> > bcm_tokens(1), bcm_mtf_tokens(1);
+    EntropyEncodingData bcm_codes;
+    std::vector<uint8_t> bcm_dummy_context_map;
+    size_t ans_cost, mtf_cost;
+
+    EntropyEncodingData modularFramTree_code;
+    std::vector<uint8_t> modularFramTree_ctxmap;
+
+    EntropyEncodingData coefOrders_codes;
+    std::vector<uint8_t> coefOrders_context_map;
+
+    std::vector<BitWriter*> dc_group_writers;
+    std::vector<BitWriter*> acGroupWriters;
+    struct timeval start_time, token_time, hist_time, ans_time;
+    gettimeofday(&start_time, 0);
+    //  acc_predictAndtoken(lossy_frame_encoder, frame_header, coefOrders_tokens,
+    //                      pool);
+
+    gettimeofday(&token_time, 0);
+    acc_histogram(xclbinPath, lossy_frame_encoder, modular_frame_encoder, passes_enc_state, frame_dim, frame_header,
+                  cparams, coefOrders_tokens, group_codes_writer, acInfo_writer, ans_cost, mtf_cost, bcm_tokens,
+                  bcm_mtf_tokens, bcm_codes, bcm_dummy_context_map,
+
+                  modularFramTree_code, modularFramTree_ctxmap,
+
+                  coefOrders_codes, coefOrders_context_map,
+
+                  aux_outs, aux_out);
+    gettimeofday(&hist_time, 0);
+    acc_ANS_tokens(lossy_frame_encoder, modular_frame_encoder, num_groups, passes_enc_state, frame_dim, frame_header,
+                   coefOrders_tokens, group_codes, group_codes_writer, acInfo_writer, dc_group_writers, acGroupWriters,
+                   ans_cost, mtf_cost, bcm_tokens, bcm_mtf_tokens, bcm_codes, bcm_dummy_context_map,
+
+                   modularFramTree_code, modularFramTree_ctxmap,
+
+                   coefOrders_codes, coefOrders_context_map, aux_outs, aux_out);
+
+    acc_writeout(lossy_frame_encoder, num_groups, passes_enc_state, frame_header, frame_dim, group_codes, writer,
+                 group_codes_writer, acInfo_writer, dc_group_writers, acGroupWriters, aux_out, resize_aux_outs);
+    gettimeofday(&ans_time, 0);
+
+    return true;
+}
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_host.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_host.cpp
new file mode 100644
index 0000000000..c4c5a60e2c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_host.cpp
@@ -0,0 +1,308 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "acc_host.hpp"
+
+namespace jxl {
+void FindBestDequantMatrices(const CompressParams& cparams,
+                             const Image3F& opsin,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             DequantMatrices* dequant_matrices) {
+    // TODO(veluca): quant matrices for no-gaborish.
+    // TODO(veluca): heuristics for in-bitstream quant tables.
+    *dequant_matrices = DequantMatrices();
+    if (cparams.max_error_mode) {
+        // Set numerators of all quantization matrices to constant values.
+        float weights[3][1] = {
+            {1.0f / cparams.max_error[0]}, {1.0f / cparams.max_error[1]}, {1.0f / cparams.max_error[2]}};
+        DctQuantWeightParams dct_params(weights);
+        std::vector<QuantEncoding> encodings(DequantMatrices::kNum, QuantEncoding::DCT(dct_params));
+        DequantMatricesSetCustom(dequant_matrices, encodings, modular_frame_encoder);
+        float dc_weights[3] = {1.0f / cparams.max_error[0], 1.0f / cparams.max_error[1], 1.0f / cparams.max_error[2]};
+        DequantMatricesSetCustomDC(dequant_matrices, dc_weights);
+    }
+}
+
+bool DefaultEncoderHeuristics::HandlesColorConversion(const CompressParams& cparams, const ImageBundle& ib) {
+    return cparams.noise != Override::kOn && cparams.patches != Override::kOn &&
+           cparams.speed_tier >= SpeedTier::kWombat && cparams.resampling == 1 &&
+           cparams.color_transform == ColorTransform::kXYB && !cparams.modular_mode && !ib.HasAlpha();
+}
+
+Status acc_host(std::string xclbinPath,
+                Image3F& opsin,
+                LossyFrameEncoder& lossy_frame_encoder,
+                const ImageBundle* JXL_RESTRICT ib_or_linear,
+                ThreadPool* pool,
+                std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                BitWriter* writer,
+                AuxOut* aux_out,
+                std::unique_ptr<FrameHeader>& frame_header,
+                const FrameInfo& frame_info,
+                CompressParams cparams,
+                const std::vector<ImageF>* extra_channels,
+                PassesEncoderState* passes_enc_state,
+                FrameDimensions frame_dim,
+                const size_t num_groups,
+                const ImageBundle& ib,
+                std::vector<AuxOut>& aux_outs,
+                const std::function<Status(size_t)>& resize_aux_outs) {
+    acc_phase1(opsin, lossy_frame_encoder, cparams, frame_header, frame_info, ib_or_linear, ib, aux_out, pool);
+
+    acc_phase2(xclbinPath, opsin, lossy_frame_encoder, modular_frame_encoder, cparams, frame_header, extra_channels,
+               ib_or_linear, ib, pool, aux_out);
+
+    acc_phase3(xclbinPath, opsin, lossy_frame_encoder, modular_frame_encoder, cparams, frame_header, passes_enc_state,
+               frame_dim, writer, num_groups, aux_out, pool, aux_outs, ib, resize_aux_outs);
+
+    return true;
+}
+
+Status DefaultEncoderHeuristics::LossyFrameHeuristics(PassesEncoderState* enc_state,
+                                                      ModularFrameEncoder* modular_frame_encoder,
+                                                      const ImageBundle* original_pixels,
+                                                      Image3F* opsin,
+                                                      ThreadPool* pool,
+                                                      AuxOut* aux_out) {
+    PROFILER_ZONE("JxlLossyFrameHeuristics uninstrumented");
+
+    CompressParams& cparams = enc_state->cparams;
+    PassesSharedState& shared = enc_state->shared;
+
+    // Compute parameters for noise synthesis.
+    if (shared.frame_header.flags & FrameHeader::kNoise) {
+        PROFILER_ZONE("enc GetNoiseParam");
+        if (cparams.photon_noise_iso > 0) {
+            shared.image_features.noise_params =
+                SimulatePhotonNoise(opsin->xsize(), opsin->ysize(), cparams.photon_noise_iso);
+        } else {
+            // Don't start at zero amplitude since adding noise is expensive -- it
+            // significantly slows down decoding, and this is unlikely to
+            // completely go away even with advanced optimizations. After the
+            // kNoiseModelingRampUpDistanceRange we have reached the full level,
+            // i.e. noise is no longer represented by the compressed image, so we
+            // can add full noise by the noise modeling itself.
+            static const float kNoiseModelingRampUpDistanceRange = 0.6;
+            static const float kNoiseLevelAtStartOfRampUp = 0.25;
+            static const float kNoiseRampupStart = 1.0;
+            // TODO(user) test and properly select quality_coef with smooth
+            // filter
+            float quality_coef = 1.0f;
+            const float rampup = (cparams.butteraugli_distance - kNoiseRampupStart) / kNoiseModelingRampUpDistanceRange;
+            if (rampup < 1.0f) {
+                quality_coef = kNoiseLevelAtStartOfRampUp + (1.0f - kNoiseLevelAtStartOfRampUp) * rampup;
+            }
+            if (rampup < 0.0f) {
+                quality_coef = kNoiseRampupStart;
+            }
+            if (!GetNoiseParameter(*opsin, &shared.image_features.noise_params, quality_coef)) {
+                shared.frame_header.flags &= ~FrameHeader::kNoise;
+            }
+        }
+    }
+    if (enc_state->shared.frame_header.upsampling != 1 && !cparams.already_downsampled) {
+        // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+        // after noise, if necessary.
+        DownsampleImage(opsin, cparams.resampling);
+        PadImageToBlockMultipleInPlace(opsin);
+    }
+
+    const FrameDimensions& frame_dim = enc_state->shared.frame_dim;
+    size_t target_size = TargetSize(cparams, frame_dim);
+    size_t opsin_target_size = target_size;
+    if (cparams.target_size > 0 || cparams.target_bitrate > 0.0) {
+        cparams.target_size = opsin_target_size;
+    } else if (cparams.butteraugli_distance < 0) {
+        return JXL_FAILURE("Expected non-negative distance");
+    }
+
+#ifndef XLNX_DISABLE_BLK_DICT
+    // Find and subtract splines.
+    if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+        shared.image_features.splines = FindSplines(*opsin);
+        JXL_RETURN_IF_ERROR(shared.image_features.splines.SubtractFrom(opsin, shared.cmap));
+    }
+
+    // Find and subtract patches/dots.
+    if (ApplyOverride(cparams.patches, cparams.speed_tier <= SpeedTier::kSquirrel)) {
+        FindBestPatchDictionary(*opsin, enc_state, pool, aux_out);
+        PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin);
+    }
+#endif
+
+    static const float kAcQuant = 0.79f;
+    const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
+    Quantizer& quantizer = enc_state->shared.quantizer;
+    // We don't know the quant field yet, but for computing the global scale
+    // assuming that it will be the same as for Falcon mode is good enough.
+    quantizer.ComputeGlobalScaleAndQuant(quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
+
+    // TODO(veluca): we can now run all the code from here to FindBestQuantizer
+    // (excluded) one rect at a time. Do that.
+
+    // Dependency graph:
+    //
+    // input: either XYB or input image
+    //
+    // input image -> XYB [optional]
+    // XYB -> initial quant field
+    // XYB -> Gaborished XYB
+    // Gaborished XYB -> CfL1
+    // initial quant field, Gaborished XYB, CfL1 -> ACS
+    // initial quant field, ACS, Gaborished XYB -> EPF control field
+    // initial quant field -> adjusted initial quant field
+    // adjusted initial quant field, ACS -> raw quant field
+    // raw quant field, ACS, Gaborished XYB -> CfL2
+    //
+    // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
+
+    ArControlFieldHeuristics ar_heuristics;
+    AcStrategyHeuristics acs_heuristics;
+    CfLHeuristics cfl_heuristics;
+
+    if (!opsin->xsize()) {
+        JXL_ASSERT(HandlesColorConversion(cparams, *original_pixels));
+        *opsin = Image3F(RoundUpToBlockDim(original_pixels->xsize()), RoundUpToBlockDim(original_pixels->ysize()));
+        opsin->ShrinkTo(original_pixels->xsize(), original_pixels->ysize());
+        ToXYB(*original_pixels, pool, opsin, /*linear=*/nullptr);
+        PadImageToBlockMultipleInPlace(opsin);
+    }
+
+    // Compute an initial estimate of the quantization field.
+    // Call InitialQuantField only in Hare mode or slower. Otherwise, rely
+    // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
+    // mode.
+    if (cparams.speed_tier > SpeedTier::kHare || cparams.uniform_quant > 0) {
+        enc_state->initial_quant_field = ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+        float q = cparams.uniform_quant > 0 ? cparams.uniform_quant : kAcQuant / cparams.butteraugli_distance;
+        FillImage(q, &enc_state->initial_quant_field);
+    } else {
+        // Call this here, as it relies on pre-gaborish values.
+        float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
+        if (!shared.frame_header.loop_filter.gab) {
+            butteraugli_distance_for_iqf *= 0.73f;
+        }
+        enc_state->initial_quant_field = InitialQuantField(butteraugli_distance_for_iqf, *opsin, shared.frame_dim, pool,
+                                                           1.0f, &enc_state->initial_quant_masking);
+    }
+
+    // TODO(veluca): do something about animations.
+
+    // Apply inverse-gaborish.
+    if (shared.frame_header.loop_filter.gab) {
+        GaborishInverse(opsin, 0.9908511000000001f, pool);
+    }
+
+    cfl_heuristics.Init(*opsin);
+    acs_heuristics.Init(*opsin, enc_state);
+    ar_heuristics.PrepareForThreads(/*num_threads*/ 1);
+    cfl_heuristics.PrepareForThreads(/*num_threads*/ 1);
+
+    //  auto process_tile = [&](size_t tid, size_t thread) {
+    for (int tid = 0; tid < DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+                                DivCeil(enc_state->shared.frame_dim.ysize_blocks, kEncTileDimInBlocks);
+         tid++) {
+        size_t thread = 0;
+        size_t n_enc_tiles = DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
+        size_t tx = tid % n_enc_tiles;
+        size_t ty = tid / n_enc_tiles;
+        size_t by0 = ty * kEncTileDimInBlocks;
+        size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, enc_state->shared.frame_dim.ysize_blocks);
+        size_t bx0 = tx * kEncTileDimInBlocks;
+        size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, enc_state->shared.frame_dim.xsize_blocks);
+        Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+
+        // For speeds up to Wombat, we only compute the color correlation map
+        // once we know the transform type and the quantization map.
+        if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+            //      cfl_heuristics.ComputeTile(r, *opsin, enc_state->shared.matrices,
+            //                                 /*ac_strategy=*/nullptr,
+            //                                 /*quantizer=*/nullptr, /*fast=*/false, thread,
+            //                                 &enc_state->shared.cmap);
+        }
+
+// Choose block sizes.
+//    acs_heuristics.ProcessRect(r);
+
+// Choose amount of post-processing smoothing.
+// TODO(veluca): should this go *after* AdjustQuantField?
+#ifndef XLNX_DISABLE_ARC
+        ar_heuristics.RunRect(r, *opsin, enc_state, thread);
+#else
+        ImageB* JXL_RESTRICT epf_sharpness = &enc_state->shared.epf_sharpness;
+        FillPlane(static_cast<uint8_t>(4), epf_sharpness, r);
+#endif
+        // Always set the initial quant field, so we can compute the CfL map with
+        // more accuracy. The initial quant field might change in slower modes, but
+        // adjusting the quant field with butteraugli when all the other encoding
+        // parameters are fixed is likely a more reliable choice anyway.
+        AdjustQuantField(enc_state->shared.ac_strategy, r, &enc_state->initial_quant_field);
+        quantizer.SetQuantFieldRect(enc_state->initial_quant_field, r, &enc_state->shared.raw_quant_field);
+
+// Compute a non-default CfL map if we are at Hare speed, or slower.
+#ifndef XLNX_DISABLE_2NDCMP
+        if (cparams.speed_tier <= SpeedTier::kHare) {
+            cfl_heuristics.ComputeTile(
+                r, *opsin, enc_state->shared.matrices, &enc_state->shared.ac_strategy, &enc_state->shared.quantizer,
+                /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread, &enc_state->shared.cmap);
+        }
+#endif
+    };
+    /*  RunOnPool(pool, 0, DivCeil(enc_state->shared.frame_dim.xsize_blocks,
+                                 kEncTileDimInBlocks) *
+                             DivCeil(enc_state->shared.frame_dim.ysize_blocks,
+                                     kEncTileDimInBlocks),
+                [&](const size_t num_threads) {
+                  ar_heuristics.PrepareForThreads(num_threads);
+                  cfl_heuristics.PrepareForThreads(num_threads);
+                  return true;
+                },
+                process_tile, "Enc Heuristics");*/
+
+    acs_heuristics.Finalize(aux_out);
+    if (cparams.speed_tier <= SpeedTier::kHare) {
+        cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat, &enc_state->shared.cmap);
+    }
+
+    FindBestDequantMatrices(cparams, *opsin, modular_frame_encoder, &enc_state->shared.matrices);
+
+    // Refine quantization levels.
+    FindBestQuantizer(original_pixels, *opsin, enc_state, pool, aux_out);
+
+    // Choose a context model that depends on the amount of quantization for AC.
+    if (cparams.speed_tier < SpeedTier::kFalcon) {
+        FindBestBlockEntropyModel(*enc_state);
+    }
+
+#ifdef XLNX_DEBUG_CMAP
+    std::cout << "=========================================" << std::endl;
+    std::cout << "ColorMap info: " << std::endl;
+    ImageSB* JXL_RESTRICT tmp_map = &enc_state->shared.cmap.ytox_map;
+    int32_t dc = enc_state->shared.cmap.GetYToXDC();
+    std::cout << "Y to X dc: " << dc << std::endl;
+    for (int i = 0; i < tmp_map->ysize(); i++) {
+        int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+        for (int j = 0; j < tmp_map->xsize(); j++) {
+            std::cout << (int)row_out[j] << " ";
+        }
+        std::cout << std::endl;
+    }
+
+    tmp_map = &enc_state->shared.cmap.ytox_map;
+    dc = enc_state->shared.cmap.GetYToBDC();
+    std::cout << "Y to B dc: " << dc << std::endl;
+    for (int i = 0; i < tmp_map->ysize(); i++) {
+        int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+        for (int j = 0; j < tmp_map->xsize(); j++) {
+            std::cout << (int)row_out[j] << " ";
+        }
+        std::cout << std::endl;
+    }
+    std::cout << std::endl;
+#endif
+
+    return true;
+}
+} // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase1.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase1.cpp
new file mode 100644
index 0000000000..a37f251c20
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase1.cpp
@@ -0,0 +1,276 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_KERNEL1_CPP
+#define HLS_KERNEL1_CPP
+
+#include "acc_phase1.hpp"
+
+namespace jxl {
+namespace {
+// Invisible (alpha = 0) pixels tend to be a mess in optimized PNGs.
+// Since they have no visual impact whatsoever, we can replace them with
+// something that compresses better and reduces artifacts near the edges. This
+// does some kind of smooth stuff that seems to work.
+// Replace invisible pixels with a weighted average of the pixel to the left,
+// the pixel to the topright, and non-invisible neighbours.
+// Produces downward-blurry smears, with in the upwards direction only a 1px
+// edge duplication but not more. It would probably be better to smear in all
+// directions. That requires an alpha-weighed convolution with a large enough
+// kernel though, which might be overkill...
+void SimplifyInvisible(Image3F* image, const ImageF& alpha, bool lossless) {
+    for (size_t c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < image->ysize(); ++y) {
+            float* JXL_RESTRICT row = image->PlaneRow(c, y);
+            const float* JXL_RESTRICT prow = (y > 0 ? image->PlaneRow(c, y - 1) : nullptr);
+            const float* JXL_RESTRICT nrow = (y + 1 < image->ysize() ? image->PlaneRow(c, y + 1) : nullptr);
+            const float* JXL_RESTRICT a = alpha.Row(y);
+            const float* JXL_RESTRICT pa = (y > 0 ? alpha.Row(y - 1) : nullptr);
+            const float* JXL_RESTRICT na = (y + 1 < image->ysize() ? alpha.Row(y + 1) : nullptr);
+            for (size_t x = 0; x < image->xsize(); ++x) {
+                if (a[x] == 0) {
+                    if (lossless) {
+                        row[x] = 0;
+                        continue;
+                    }
+                    float d = 0.f;
+                    row[x] = 0;
+                    if (x > 0) {
+                        row[x] += row[x - 1];
+                        d++;
+                        if (a[x - 1] > 0.f) {
+                            row[x] += row[x - 1];
+                            d++;
+                        }
+                    }
+                    if (x + 1 < image->xsize()) {
+                        if (y > 0) {
+                            row[x] += prow[x + 1];
+                            d++;
+                        }
+                        if (a[x + 1] > 0.f) {
+                            row[x] += 2.f * row[x + 1];
+                            d += 2.f;
+                        }
+                        if (y > 0 && pa[x + 1] > 0.f) {
+                            row[x] += 2.f * prow[x + 1];
+                            d += 2.f;
+                        }
+                        if (y + 1 < image->ysize() && na[x + 1] > 0.f) {
+                            row[x] += 2.f * nrow[x + 1];
+                            d += 2.f;
+                        }
+                    }
+                    if (y > 0 && pa[x] > 0.f) {
+                        row[x] += 2.f * prow[x];
+                        d += 2.f;
+                    }
+                    if (y + 1 < image->ysize() && na[x] > 0.f) {
+                        row[x] += 2.f * nrow[x];
+                        d += 2.f;
+                    }
+                    if (d > 1.f) row[x] /= d;
+                }
+            }
+        }
+    }
+}
+} // namespace
+
+Status acc_phase1(Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const FrameInfo& frame_info,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  AuxOut* aux_out,
+                  ThreadPool* pool) {
+    const ColorEncoding& c_linear = ColorEncoding::LinearSRGB(ib.IsGray());
+    std::unique_ptr<ImageMetadata> metadata_linear = jxl::make_unique<ImageMetadata>();
+    metadata_linear->xyb_encoded = (cparams.color_transform == ColorTransform::kXYB);
+    metadata_linear->color_encoding = c_linear;
+    ImageBundle linear_storage(metadata_linear.get());
+
+    // Allocating a large enough image avoids a copy when padding.
+    opsin = Image3F(RoundUpToBlockDim(ib.xsize()), RoundUpToBlockDim(ib.ysize()));
+    opsin.ShrinkTo(ib.xsize(), ib.ysize());
+
+    const bool want_linear =
+        frame_header->encoding == FrameEncoding::kVarDCT && cparams.speed_tier <= SpeedTier::kKitten;
+    ib_or_linear = &ib;
+
+    if (frame_header->color_transform == ColorTransform::kXYB && frame_info.ib_needs_color_transform) {
+        // linear_storage would only be used by the Butteraugli loop (passing
+        // linear sRGB avoids a color conversion there). Otherwise, don't
+        // fill it to reduce memory usage.
+        ib_or_linear = ToXYB(ib, pool, &opsin, want_linear ? &linear_storage : nullptr);
+    } else { // RGB or YCbCr: don't do anything (forward YCbCr is not
+             // implemented, this is only used when the input is already in
+             // YCbCr)
+             // If encoding a special DC or reference frame, don't do anything:
+             // input is already in XYB.
+        CopyImageTo(ib.color(), &opsin);
+    }
+    bool lossless = (frame_header->encoding == FrameEncoding::kModular && cparams.quality_pair.first == 100);
+    if (ib.HasAlpha() && !ib.AlphaIsPremultiplied() && !ApplyOverride(cparams.keep_invisible, lossless) &&
+        cparams.ec_resampling == cparams.resampling) {
+        // simplify invisible pixels
+        SimplifyInvisible(&opsin, ib.alpha(), lossless);
+        if (want_linear) {
+            SimplifyInvisible(const_cast<Image3F*>(&ib_or_linear->color()), ib.alpha(), lossless);
+        }
+    }
+    if (aux_out != nullptr) {
+        JXL_RETURN_IF_ERROR(aux_out->InspectImage3F("enc_frame:OpsinDynamicsImage", opsin));
+    }
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        PadImageToBlockMultipleInPlace(&opsin);
+        PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+        //  std::vector<EncCache>& group_caches_ =
+        //  lossy_frame_encoder.get_group_cashes();
+
+        JXL_ASSERT((opsin.xsize() % kBlockDim) == 0 && (opsin.ysize() % kBlockDim) == 0);
+        PassesSharedState& shared = enc_state_->shared;
+
+        if (!enc_state_->cparams.max_error_mode) {
+            float x_qm_scale_steps[3] = {0.65f, 1.25f, 9.0f};
+            shared.frame_header.x_qm_scale = 1;
+            for (float x_qm_scale_step : x_qm_scale_steps) {
+                if (enc_state_->cparams.butteraugli_distance > x_qm_scale_step) {
+                    shared.frame_header.x_qm_scale++;
+                }
+            }
+        }
+
+        Image3F* opsin_ = &opsin;
+        //  CompressParams& cparams = enc_state->cparams;
+        //  PassesSharedState& shared = enc_state->shared;
+
+        // Compute parameters for noise synthesis.
+        if (shared.frame_header.flags & FrameHeader::kNoise) {
+            PROFILER_ZONE("enc GetNoiseParam");
+            if (cparams.photon_noise_iso > 0) {
+                shared.image_features.noise_params =
+                    SimulatePhotonNoise(opsin_->xsize(), opsin_->ysize(), cparams.photon_noise_iso);
+            } else {
+                // Don't start at zero amplitude since adding noise is expensive -- it
+                // significantly slows down decoding, and this is unlikely to
+                // completely go away even with advanced optimizations. After the
+                // kNoiseModelingRampUpDistanceRange we have reached the full level,
+                // i.e. noise is no longer represented by the compressed image, so we
+                // can add full noise by the noise modeling itself.
+                static const float kNoiseModelingRampUpDistanceRange = 0.6;
+                static const float kNoiseLevelAtStartOfRampUp = 0.25;
+                static const float kNoiseRampupStart = 1.0;
+                // TODO(user) test and properly select quality_coef with smooth
+                // filter
+                float quality_coef = 1.0f;
+                const float rampup =
+                    (cparams.butteraugli_distance - kNoiseRampupStart) / kNoiseModelingRampUpDistanceRange;
+                if (rampup < 1.0f) {
+                    quality_coef = kNoiseLevelAtStartOfRampUp + (1.0f - kNoiseLevelAtStartOfRampUp) * rampup;
+                }
+                if (rampup < 0.0f) {
+                    quality_coef = kNoiseRampupStart;
+                }
+                if (!GetNoiseParameter(*opsin_, &shared.image_features.noise_params, quality_coef)) {
+                    shared.frame_header.flags &= ~FrameHeader::kNoise;
+                }
+            }
+        }
+        if (enc_state_->shared.frame_header.upsampling != 1 && !cparams.already_downsampled) {
+            // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+            // after noise, if necessary.
+            DownsampleImage(opsin_, cparams.resampling);
+            PadImageToBlockMultipleInPlace(opsin_);
+        }
+
+        const FrameDimensions& frame_dim_ = enc_state_->shared.frame_dim;
+        size_t target_size = TargetSize(cparams, frame_dim_);
+        size_t opsin_target_size = target_size;
+        if (cparams.target_size > 0 || cparams.target_bitrate > 0.0) {
+            cparams.target_size = opsin_target_size;
+        } else if (cparams.butteraugli_distance < 0) {
+            return JXL_FAILURE("Expected non-negative distance");
+        }
+
+#ifndef XLNX_DISABLE_BLK_DICT
+        // Find and subtract splines.
+        if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+            shared.image_features.splines = FindSplines(*opsin_);
+            JXL_RETURN_IF_ERROR(shared.image_features.splines.SubtractFrom(opsin_, shared.cmap));
+        }
+
+        // Find and subtract patches/dots.
+        if (ApplyOverride(cparams.patches, cparams.speed_tier <= SpeedTier::kSquirrel)) {
+            FindBestPatchDictionary(*opsin_, enc_state_, pool, aux_out);
+            PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin_);
+        }
+#endif
+
+        static const float kAcQuant = 0.79f;
+        const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
+        Quantizer& quantizer = enc_state_->shared.quantizer;
+        // We don't know the quant field yet, but for computing the global scale
+        // assuming that it will be the same as for Falcon mode is good enough.
+        quantizer.ComputeGlobalScaleAndQuant(quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
+
+        // TODO(veluca): we can now run all the code from here to FindBestQuantizer
+        // (excluded) one rect at a time. Do that.
+
+        // Dependency graph:
+        //
+        // input: either XYB or input image
+        //
+        // input image -> XYB [optional]
+        // XYB -> initial quant field
+        // XYB -> Gaborished XYB
+        // Gaborished XYB -> CfL1
+        // initial quant field, Gaborished XYB, CfL1 -> ACS
+        // initial quant field, ACS, Gaborished XYB -> EPF control field
+        // initial quant field -> adjusted initial quant field
+        // adjusted initial quant field, ACS -> raw quant field
+        // raw quant field, ACS, Gaborished XYB -> CfL2
+        //
+        // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
+
+        if (!opsin_->xsize()) {
+            JXL_ASSERT(enc_state_->heuristics->HandlesColorConversion(cparams, *ib_or_linear));
+            *opsin_ = Image3F(RoundUpToBlockDim(ib_or_linear->xsize()), RoundUpToBlockDim(ib_or_linear->ysize()));
+            opsin_->ShrinkTo(ib_or_linear->xsize(), ib_or_linear->ysize());
+            ToXYB(*ib_or_linear, pool, opsin_, /*linear=*/nullptr);
+            PadImageToBlockMultipleInPlace(opsin_);
+        }
+
+        // Compute an initial estimate of the quantization field.
+        // Call InitialQuantField only in Hare mode or slower. Otherwise, rely
+        // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
+        // mode.
+        if (cparams.speed_tier > SpeedTier::kHare || cparams.uniform_quant > 0) {
+            enc_state_->initial_quant_field = ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+            float q = cparams.uniform_quant > 0 ? cparams.uniform_quant : kAcQuant / cparams.butteraugli_distance;
+            FillImage(q, &enc_state_->initial_quant_field);
+        } else {
+            // Call this here, as it relies on pre-gaborish values.
+            float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
+            if (!shared.frame_header.loop_filter.gab) {
+                butteraugli_distance_for_iqf *= 0.73f;
+            }
+            enc_state_->initial_quant_field = InitialQuantField(butteraugli_distance_for_iqf, *opsin_, shared.frame_dim,
+                                                                pool, 1.0f, &enc_state_->initial_quant_masking);
+        }
+
+        // TODO(veluca): do something about animations.
+
+        // Apply inverse-gaborish.
+        if (shared.frame_header.loop_filter.gab) {
+            GaborishInverse(opsin_, 0.9908511000000001f, pool);
+        }
+    }
+    return true;
+}
+} // namespace jxl
+#endif
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase2.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase2.cpp
new file mode 100644
index 0000000000..322d6e5003
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase2.cpp
@@ -0,0 +1,587 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_KERNEL2_CPP
+#define HLS_KERNEL2_CPP
+
+#include "acc_phase2.hpp"
+#include "host_lossy_enc_compute.hpp"
+
+#include <iostream>
+#include <queue>
+#include <fstream>
+
+namespace jxl {
+
+void collect_dc(PassesEncoderState* enc_state,
+                Image3F* dc,
+                size_t xsize,
+                size_t ysize,
+                float* hls_dc8x8,
+                float* hls_dc16x16,
+                float* hls_dc32x32) {
+    for (int i = 0; i < enc_state->shared.frame_dim.num_groups; i++) {
+        const Rect block_group_rect = enc_state->shared.BlockGroupRect(i);
+        const size_t xsize_blocks = block_group_rect.xsize();
+        const size_t ysize_blocks = block_group_rect.ysize();
+
+        const size_t dc_stride = static_cast<size_t>(dc->PixelsPerRow());
+
+        {
+            size_t offset = 0;
+
+            for (size_t by = 0; by < ysize_blocks; ++by) {
+                size_t ty = by / kColorTileDimInBlocks;
+                float* JXL_RESTRICT dc_rows[3] = {
+                    block_group_rect.PlaneRow(dc, 0, by), block_group_rect.PlaneRow(dc, 1, by),
+                    block_group_rect.PlaneRow(dc, 2, by),
+                };
+                AcStrategyRow ac_strategy_row = enc_state->shared.ac_strategy.ConstRow(block_group_rect, by);
+                for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks); tx++) {
+                    for (size_t bx = tx * kColorTileDimInBlocks;
+                         bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks; ++bx) {
+                        const AcStrategy acs = ac_strategy_row[bx];
+                        if (!acs.IsFirstBlock()) continue;
+
+                        size_t xblocks = acs.covered_blocks_x();
+                        size_t yblocks = acs.covered_blocks_y();
+
+                        size_t size = kDCTBlockSize * xblocks * yblocks;
+
+                        size_t tile_xsize = (xsize + 63) / 64 * 64;
+                        size_t tile_ysize = (ysize + 63) / 64 * 64;
+
+                        size_t block_cnt8x8 =
+                            (block_group_rect.y0() + by) * (tile_xsize / 8) + block_group_rect.x0() + bx;
+                        size_t block_cnt16x16 =
+                            (block_group_rect.y0() + by) / 2 * (tile_xsize / 16) + (block_group_rect.x0() + bx) / 2;
+                        size_t block_cnt32x32 =
+                            (block_group_rect.y0() + by) / 4 * (tile_xsize / 32) + (block_group_rect.x0() + bx) / 4;
+
+                        for (size_t c : {0, 1, 2}) {
+                            float* coef_dc = dc_rows[c] + bx;
+                            if (acs.RawStrategy() == 0) {
+                                coef_dc[0] = hls_dc8x8[c * tile_xsize * tile_ysize + block_cnt8x8];
+                            } else if (acs.RawStrategy() == 4) {
+                                for (int i = 0; i < 2; i++) {
+                                    for (int j = 0; j < 2; j++) {
+                                        coef_dc[i * dc_stride + j] =
+                                            hls_dc16x16[c * tile_xsize * tile_ysize + 4 * block_cnt16x16 + i * 2 + j];
+                                    }
+                                }
+                            } else if (acs.RawStrategy() == 5) {
+                                for (int i = 0; i < 4; i++) {
+                                    for (int j = 0; j < 4; j++) {
+                                        coef_dc[i * dc_stride + j] =
+                                            hls_dc32x32[c * tile_ysize * tile_xsize + 16 * block_cnt32x32 + i * 4 + j];
+                                    }
+                                }
+                            } else {
+                                std::cout << "unsupported DCFromLowFREQ" << std::endl;
+                            }
+                        }
+                        offset += size;
+                    }
+                }
+            }
+        }
+    }
+}
+
+Status acc_phase2(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const std::vector<ImageF>* extra_channels,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  ThreadPool* pool,
+                  AuxOut* aux_out) {
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        //===================================================================================================//
+        // kernel-2 CPU part, pre-processing
+        //===================================================================================================//
+
+        // pointer define
+        PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+        PassesSharedState& shared = enc_state_->shared;
+
+        // define sizes
+        uint32_t tile_xsize = (opsin.xsize() + 63) / 64 * 64;
+        uint32_t tile_ysize = (opsin.ysize() + 63) / 64 * 64;
+        uint32_t ysize64 = tile_ysize / 64;
+        uint32_t xsize64 = tile_xsize / 64;
+        int xsize_blocks = enc_state_->shared.frame_dim.xsize_blocks;
+        int ysize_blocks = enc_state_->shared.frame_dim.ysize_blocks;
+        int xnum_tile = (opsin.xsize() + 63) / 64;
+        int ynum_tile = (opsin.ysize() + 63) / 64;
+        unsigned xsize_8alg = (opsin.xsize() + 7) / 8 * 8;
+        unsigned ysize_8alg = (opsin.ysize() + 7) / 8 * 8;
+        int num_tile = xnum_tile * ynum_tile;
+
+        Image3F* opsin_ = &opsin;
+        Quantizer& quantizer = enc_state_->shared.quantizer;
+        enc_state_->shared.matrices = DequantMatrices();
+        enc_state_->histogram_idx.resize(shared.frame_dim.num_groups);
+        enc_state_->x_qm_multiplier = std::pow(1.25f, shared.frame_header.x_qm_scale - 2.0f);
+        enc_state_->b_qm_multiplier = std::pow(1.25f, shared.frame_header.b_qm_scale - 2.0f);
+
+        if (enc_state_->coeffs.size() < shared.frame_header.passes.num_passes) {
+            enc_state_->coeffs.reserve(shared.frame_header.passes.num_passes);
+            for (size_t i = enc_state_->coeffs.size(); i < shared.frame_header.passes.num_passes; i++) {
+                // Allocate enough coefficients for each group on every row.
+                enc_state_->coeffs.emplace_back(
+                    make_unique<ACImageT<int32_t> >(kGroupDim * kGroupDim, shared.frame_dim.num_groups));
+            }
+        }
+
+        while (enc_state_->coeffs.size() > shared.frame_header.passes.num_passes) {
+            enc_state_->coeffs.pop_back();
+        }
+
+        Image3F dc(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+
+        AcStrategyHeuristics acs_heuristics;
+        CfLHeuristics cfl_heuristics;
+        cfl_heuristics.Init(*opsin_);
+        cfl_heuristics.PrepareForThreads(1);
+        acs_heuristics.Init(*opsin_, enc_state_);
+
+        //========================================================================//
+        // host interface
+        //========================================================================//
+        int config[MAX_NUM_CONFIG];
+        float config_fl[MAX_NUM_CONFIG];
+        float* hls_opsin_1 = (float*)malloc(ALL_PIXEL * sizeof(float));
+        float* hls_opsin_2 = (float*)malloc(ALL_PIXEL * sizeof(float));
+        float* hls_opsin_3 = (float*)malloc(ALL_PIXEL * sizeof(float));
+        float* hls_quant_field = (float*)malloc(BLOCK8_H * BLOCK8_W * sizeof(float));
+        float* hls_masking_field = (float*)malloc(BLOCK8_H * BLOCK8_W * sizeof(float));
+        float* aq_map_f = (float*)malloc(BLOCK8_H * BLOCK8_W * sizeof(float));
+        int8_t* cmap_axi = (int8_t*)malloc(TILE_W * TILE_H * 2 * sizeof(int8_t));
+        int* ac_coef_axiout = (int*)malloc(ALL_PIXEL * sizeof(int));
+        uint8_t* strategy_all = (uint8_t*)malloc(sizeof(uint8_t*) * BLOCK8_H * BLOCK8_W);
+        int* raw_quant_field_i = (int*)malloc(BLOCK8_H * BLOCK8_W * sizeof(int));
+        uint32_t hls_order[MAX_ORDER];
+        float* hls_dc8x8 = (float*)malloc(ALL_PIXEL * sizeof(float));
+        float* hls_dc16x16 = (float*)malloc(ALL_PIXEL * sizeof(float));
+        float* hls_dc32x32 = (float*)malloc(ALL_PIXEL * sizeof(float));
+
+        float* Image_reorder_dct8 = (float*)malloc(ALL_PIXEL * sizeof(float));
+        float* Image_reorder_dct16 = (float*)malloc(ALL_PIXEL * sizeof(float));
+        float* Image_reorder_dct32 = (float*)malloc(ALL_PIXEL * sizeof(float));
+
+        config[0] = opsin.ysize();
+        config[1] = opsin.xsize();
+        config[2] = acs_heuristics.config.masking_field_stride;
+        config[3] = acs_heuristics.config.quant_field_stride;
+        config_fl[0] = acs_heuristics.enc_state->cparams.butteraugli_distance;
+        config_fl[1] = acs_heuristics.config.cost1;
+        config_fl[2] = quantizer.InvGlobalScale();
+
+        for (int c = 0; c < 3; c++) {
+            for (int y = 0; y < tile_ysize; y++) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                memcpy(&hls_opsin_1[c * tile_xsize * tile_ysize + y * tile_xsize], row, tile_xsize * sizeof(float));
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (int y = 0; y < tile_ysize; y++) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                memcpy(&hls_opsin_2[c * tile_xsize * tile_ysize + y * tile_xsize], row, tile_xsize * sizeof(float));
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (int y = 0; y < tile_ysize; y++) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                memcpy(&hls_opsin_3[c * tile_xsize * tile_ysize + y * tile_xsize], row, tile_xsize * sizeof(float));
+            }
+        }
+
+        for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+            for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+                for (uint32_t y8 = 0; y8 < 8; y8++) {
+                    for (uint32_t x8 = 0; x8 < 8; x8++) {
+                        for (int c = 0; c < 3; c++) {
+                            for (int m = 0; m < 8; m++) {
+                                for (int n = 0; n < 8; n++) {
+                                    uint32_t c_tmp = 0;
+                                    if (c == 0) {
+                                        c_tmp = 1;
+                                    } else if (c == 1) {
+                                        c_tmp = 0;
+                                    } else {
+                                        c_tmp = 2;
+                                    }
+                                    uint32_t addr = c_tmp * tile_xsize * tile_ysize + y64 * tile_xsize * 64 + x64 * 64 +
+                                                    y8 * tile_xsize * 8 + x8 * 8 + m * tile_xsize + n;
+
+                                    float reg = hls_opsin_1[addr];
+                                    Image_reorder_dct8[n + 8 * m + 64 * c + 64 * 3 * x8 + 512 * 3 * y8 +
+                                                       4096 * 3 * x64 + 4096 * 3 * xsize64 * y64] = reg;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+            for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+                for (uint32_t y16 = 0; y16 < 4; y16++) {
+                    for (uint32_t x16 = 0; x16 < 4; x16++) {
+                        for (uint32_t c = 0; c < 3; c++) {
+                            for (uint32_t m = 0; m < 16; m++) {
+                                for (uint32_t n = 0; n < 16; n++) {
+                                    uint32_t c_tmp = 0;
+                                    if (c == 0) {
+                                        c_tmp = 1;
+                                    } else if (c == 1) {
+                                        c_tmp = 0;
+                                    } else {
+                                        c_tmp = 2;
+                                    }
+
+                                    uint32_t addr = c_tmp * tile_xsize * tile_ysize + y64 * tile_xsize * 64 + x64 * 64 +
+                                                    y16 * tile_xsize * 16 + x16 * 16 + m * tile_xsize + n;
+                                    float reg = hls_opsin_2[addr];
+                                    Image_reorder_dct16[4096 * 3 * xsize64 * y64 + 4096 * 3 * x64 + 1024 * 3 * y16 +
+                                                        256 * 3 * x16 + 256 * c + 16 * m + n] = reg;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        for (uint32_t y64 = 0; y64 < ysize64; y64++) {
+            for (uint32_t x64 = 0; x64 < xsize64; x64++) {
+                for (uint32_t y32 = 0; y32 < 2; y32++) {
+                    for (uint32_t x32 = 0; x32 < 2; x32++) {
+                        for (uint32_t c = 0; c < 3; c++) {
+                            for (uint32_t m = 0; m < 32; m++) {
+                                for (uint32_t n = 0; n < 32; n++) {
+                                    uint32_t c_tmp = 0;
+                                    if (c == 0) {
+                                        c_tmp = 1;
+                                    } else if (c == 1) {
+                                        c_tmp = 0;
+                                    } else {
+                                        c_tmp = 2;
+                                    }
+
+                                    uint32_t addr = c_tmp * tile_xsize * tile_ysize + y64 * tile_xsize * 64 + x64 * 64 +
+                                                    y32 * tile_xsize * 32 + x32 * 32 + m * tile_xsize + n;
+                                    float reg = hls_opsin_3[addr];
+                                    Image_reorder_dct32[4096 * 3 * xsize64 * y64 + 4096 * 3 * x64 + 2048 * 3 * y32 +
+                                                        1024 * 3 * x32 + 1024 * c + 32 * m + n] = reg;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // input: rqf
+        for (int y = 0; y < ysize_blocks; y++) {
+            float* aq_row = enc_state_->initial_quant_field.Row(y);
+            for (int x = 0; x < xsize_blocks; x++) {
+                aq_map_f[y * xsize_blocks + x] = aq_row[x];
+            }
+        }
+
+        // input: masking field
+        for (int i = 0; i < BLOCK8_H * BLOCK8_W; i++) {
+            hls_masking_field[i] = acs_heuristics.config.masking_field_row[i];
+        }
+
+        // input: quant_field
+        for (int i = 0; i < BLOCK8_H * BLOCK8_W; i++) {
+            hls_quant_field[i] = acs_heuristics.config.quant_field_row[i];
+        }
+
+        //================================================================//
+        // kernel-2 FPGA kernel part, pass HLS test
+        // hls_kernel2_top.cpp
+        //===============================================================//
+        hls_lossy_enc_compute_wrapper(xclbinPath,
+                                      // input
+                                      config, config_fl, Image_reorder_dct8, Image_reorder_dct16, Image_reorder_dct32,
+                                      hls_quant_field, hls_masking_field, aq_map_f,
+                                      // output
+                                      cmap_axi, ac_coef_axiout, strategy_all, raw_quant_field_i, hls_order, hls_dc8x8,
+                                      hls_dc16x16, hls_dc32x32);
+
+        //==============================================================//
+        // kernel-2 CPU part, post-processing
+        //==============================================================//
+        // ac_coef host post-process
+        int* ac_coef = (int*)malloc(ALL_PIXEL * sizeof(int));
+        {
+            bool visit[8][8];
+            int i = 0, addr = 0;
+            for (int ty = 0; ty < ynum_tile; ty++) {
+                for (int tx = 0; tx < xnum_tile; tx++) {
+                    for (int by = 0; by < 8; by++) {
+                        for (int bx = 0; bx < 8; bx++) {
+                            visit[by][bx] = false;
+                        }
+                    }
+                    for (int by = 0; by < 8; by++) {
+                        for (int bx = 0; bx < 8; bx++) {
+                            if (!visit[by][bx] && (ty * 8 + by) < ysize_8alg / 8 && (tx * 8 + bx) < xsize_8alg / 8) {
+                                int idx_acs = (ty * 8 + by) * xsize_8alg / 8 + tx * 8 + bx;
+                                char strategy = strategy_all[idx_acs];
+                                int b = 0;
+                                if (strategy == 4) {
+                                    b = 2;
+                                } else if (strategy == 5) {
+                                    b = 4;
+                                } else {
+                                    b = 1;
+                                }
+                                for (int iy = 0; iy < b; iy++) {
+                                    for (int ix = 0; ix < b; ix++) {
+                                        visit[by + iy][bx + ix] = true;
+                                        for (int j = 0; j < 64; j++) {
+                                            for (unsigned c = 0; c < 3; ++c) {
+                                                if (c == 0 && j == 0) {
+                                                    addr = ((ty * 8 + by + iy) * 64 * 3 * xsize_8alg / 8 +
+                                                            (tx * 8 + bx + ix) * 64 * 3);
+                                                }
+                                                ac_coef[addr + j * 3 + c] = ac_coef_axiout[i];
+                                                i++;
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // acs host post-processing
+        AcStrategyImage* acs_strategy = &acs_heuristics.enc_state->shared.ac_strategy;
+        for (size_t y = 0; y < ysize_blocks; ++y) {
+            for (size_t x = 0; x < xsize_blocks; ++x) {
+                int index = y * xsize_blocks + x;
+                int value = strategy_all[index];
+                if (value == 4 && y % 2 == 0 && x % 2 == 0) {
+                    acs_strategy->Set(x, y, static_cast<AcStrategy::Type>(value));
+                } else if (value == 5 && y % 4 == 0 && x % 4 == 0) {
+                    acs_strategy->Set(x, y, static_cast<AcStrategy::Type>(value));
+                } else if (value < 4) {
+                    acs_strategy->Set(x, y, static_cast<AcStrategy::Type>(value));
+                }
+            }
+        }
+
+        // rqf host post-processing
+        ImageI* raw_quant_field = &enc_state_->shared.raw_quant_field;
+        for (int y = 0; y < ysize_blocks; y++) {
+            float* aq_row = enc_state_->initial_quant_field.Row(y); // quant_field.Row(y);
+            int* row_qi = raw_quant_field->Row(y);
+            for (int x = 0; x < xsize_blocks; x++) {
+                row_qi[x] = raw_quant_field_i[y * xsize_blocks + x];
+                aq_row[x] = aq_map_f[y * xsize_blocks + x];
+            }
+        }
+
+        // epf init
+        ImageB* epf_sharpness = &enc_state_->shared.epf_sharpness;
+        for (int y = 0; y < enc_state_->shared.frame_dim.ysize_blocks; y++) {
+            uint8_t* row = epf_sharpness->Row(y);
+            for (int x = 0; x < enc_state_->shared.frame_dim.xsize_blocks; x++) {
+                row[x] = 4;
+            }
+        }
+
+        // dc coeff post-processing
+        collect_dc(enc_state_, &dc, opsin.xsize(), opsin.ysize(), hls_dc8x8, hls_dc16x16, hls_dc32x32);
+
+        // cmap host post-processing
+        const FrameDimensions frame_dim = enc_state_->shared.frame_dim;
+        ImageSB* map_x = &(enc_state_->shared.cmap).ytox_map;
+        ImageSB* map_b = &(enc_state_->shared.cmap).ytob_map;
+
+        for (int tid = 0; tid < DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+                                    DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks);
+             tid++) {
+            size_t n_enc_tiles = DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
+            size_t tx = tid % n_enc_tiles;
+            size_t ty = tid / n_enc_tiles;
+            size_t by0 = ty * kEncTileDimInBlocks;
+            size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
+            size_t bx0 = tx * kEncTileDimInBlocks;
+            size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
+            Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+            static_assert(kEncTileDimInBlocks == kColorTileDimInBlocks, "Invalid color tile dim");
+
+            size_t num_ac = 0;
+
+            int8_t* JXL_RESTRICT row_out_x = map_x->Row(ty);
+            int8_t* JXL_RESTRICT row_out_b = map_b->Row(ty);
+
+            row_out_x[tx] = cmap_axi[tid];
+            row_out_b[tx] = cmap_axi[num_tile + tid];
+        }
+
+        // ac_coeff host post-processing
+        for (size_t group_index = 0; group_index < frame_dim.num_groups; group_index++) {
+            const size_t gx = group_index % frame_dim.xsize_groups;
+            const size_t gy = group_index / frame_dim.xsize_groups;
+            const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks, kGroupDimInBlocks, kGroupDimInBlocks,
+                            frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+            ACPtr rows[3];
+            // ACType type = (*enc_state_->coeffs[0]).Type();
+            for (size_t c = 0; c < 3; c++) {
+                rows[c] = (*enc_state_->coeffs[0]).PlaneRow(c, group_index, 0);
+            }
+            size_t ac_offset = 0;
+            for (size_t by = 0; by < rect.ysize(); ++by) {
+                AcStrategyRow acs_row = enc_state_->shared.ac_strategy.ConstRow(rect, by);
+                for (size_t bx = 0; bx < rect.xsize(); ++bx) {
+                    AcStrategy acs = acs_row[bx];
+                    if (!acs.IsFirstBlock()) continue;
+                    size_t size = kDCTBlockSize << acs.log2_covered_blocks();
+                    size_t cxsize = acs.covered_blocks_x();
+                    size_t cysize = acs.covered_blocks_y();
+
+                    int addr = 0;
+                    for (int cy = 0; cy < cysize; cy++) {
+                        for (int cx = 0; cx < cxsize; cx++) {
+                            for (int i = 0; i < 64; i++) {
+                                for (size_t c = 0; c < 3; ++c) {
+                                    int reorder[3] = {1, 0, 2};
+                                    rows[c].ptr32[ac_offset + addr] =
+                                        ac_coef[(gy * 32 + by + cy) * 64 * 3 * xsize_8alg / 8 +
+                                                (gx * 32 + bx + cx) * 64 * 3 + i * 3 + reorder[c]];
+                                }
+                                addr++;
+                            }
+                        }
+                    }
+                    ac_offset += size;
+                }
+            }
+        }
+
+        // hls_order host-post processing
+        enc_state_->used_orders.resize(enc_state_->progressive_splitter.GetNumPasses());
+        coeff_order_t* JXL_RESTRICT order = &enc_state_->shared.coeff_orders[0 * enc_state_->shared.coeff_order_size];
+
+        const int32_t offset8x8 = 0;
+        const int32_t offset16x16 = 64;
+
+        uint32_t hls_order_reg = hls_order[320 * 3];
+        uint32_t mask_0 = 0x00000001;
+        uint32_t mask_2 = 0x00000004;
+        uint32_t all_used_orders_set[32];
+        for (int i = 0; i < 32; i++) {
+            if (i == 0) {
+                all_used_orders_set[i] = hls_order_reg & mask_0;
+            } else if (i == 2) {
+                all_used_orders_set[i] = hls_order_reg & mask_2;
+            } else if (i == 1 || i == 3) {
+                all_used_orders_set[i] = 0;
+            } else {
+                all_used_orders_set[i] = 0;
+            }
+        }
+
+        uint32_t computed = 0;
+        for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+            uint8_t ord = kStrategyOrder[o];
+            if (computed & (1 << ord)) continue;
+            computed |= 1 << ord;
+            AcStrategy acs = AcStrategy::FromRawStrategy(o);
+            size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
+            if (all_used_orders_set[ord] == 0) {
+                for (size_t c = 0; c < 3; c++) {
+                    size_t offset = CoeffOrderOffset(ord, c);
+                    JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
+                    SetDefaultOrder(AcStrategy::FromRawStrategy(o), &order[offset]);
+                }
+            } else {
+                for (size_t c = 0; c < 3; c++) {
+                    int reorder[3] = {1, 0, 2};
+                    for (int i = 0; i < sz; i++) {
+                        size_t offset = CoeffOrderOffset(ord, c);
+                        coeff_order_t* JXL_RESTRICT cur_order = &order[offset];
+                        if (o == 0) {
+                            cur_order[i] = hls_order[reorder[c] * 320 + offset8x8 + i];
+                        } else if (o == 4) {
+                            cur_order[i] = hls_order[reorder[c] * 320 + offset16x16 + i];
+                        }
+                    }
+                }
+            }
+        }
+        enc_state_->used_orders[0] = hls_order_reg;
+
+        // Choose a context model that depends on the amount of quantization for AC.
+        if (cparams.speed_tier < SpeedTier::kFalcon) {
+            FindBestBlockEntropyModel(*enc_state_);
+        }
+
+        // resize ac_tokens vector
+        enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+        for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+            pass.ac_tokens.resize(shared.frame_dim.num_groups);
+        }
+
+        shared.num_histograms = 1;
+        *frame_header = shared.frame_header;
+
+        // Modular VarDCTDC
+        for (int group_index = 0; group_index < shared.frame_dim.num_dc_groups; group_index++) {
+            modular_frame_encoder->AddVarDCTDC(dc, group_index, enc_state_->cparams.butteraugli_distance >= 2.0f &&
+                                                                    enc_state_->cparams.speed_tier < SpeedTier::kFalcon,
+                                               enc_state_);
+        };
+
+        // Modular ACMetadata
+        for (int group_index = 0; group_index < shared.frame_dim.num_dc_groups; group_index++) {
+            modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/false, enc_state_);
+        };
+
+        // Modular encode
+        JXL_RETURN_IF_ERROR(modular_frame_encoder->ComputeEncodingData(
+            *frame_header, *ib.metadata(), &opsin, *extra_channels, lossy_frame_encoder.State(), pool, aux_out,
+            /* do_color=*/frame_header->encoding == FrameEncoding::kModular));
+
+        // free host mem
+        free(hls_opsin_1);
+        free(hls_opsin_2);
+        free(hls_opsin_3);
+        free(Image_reorder_dct8);
+        free(Image_reorder_dct16);
+        free(Image_reorder_dct32);
+        free(aq_map_f);
+        free(hls_masking_field);
+        free(hls_quant_field);
+        free(cmap_axi);
+        free(ac_coef_axiout);
+        free(strategy_all);
+        free(raw_quant_field_i);
+        free(hls_dc8x8);
+        free(hls_dc16x16);
+        free(hls_dc32x32);
+        free(ac_coef);
+    }
+    return true;
+}
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase3.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase3.cpp
new file mode 100644
index 0000000000..a938eccdd9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_lossy_enc_compute/acc_phase3.cpp
@@ -0,0 +1,243 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_KERNEL3_CPP
+#define HLS_KERNEL3_CPP
+
+#include "acc_phase3.hpp"
+
+namespace jxl {
+
+Status acc_phase3(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  PassesEncoderState* passes_enc_state,
+                  FrameDimensions frame_dim,
+                  BitWriter* writer,
+                  const size_t num_groups,
+                  AuxOut* aux_out,
+                  ThreadPool* pool,
+                  std::vector<AuxOut>& aux_outs,
+                  const ImageBundle& ib,
+                  const std::function<Status(size_t)>& resize_aux_outs) {
+    std::vector<EncCache>& group_caches_ = lossy_frame_encoder.get_group_cashes();
+    PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+    PassesSharedState& shared = enc_state_->shared;
+
+    const auto tokenize_group_init = [&](const size_t num_threads) {
+        group_caches_.resize(num_threads);
+        return true;
+    };
+    const auto tokenize_group = [&](const int group_index, const int thread) {
+        // Tokenize coefficients.
+        const Rect rect = shared.BlockGroupRect(group_index);
+        for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size(); idx_pass++) {
+            JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+            const int32_t* JXL_RESTRICT ac_rows[3] = {
+                enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+                enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+                enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+            };
+            // Ensure group cache is initialized.
+            group_caches_[thread].InitOnce();
+            TokenizeCoefficients(&shared.coeff_orders[idx_pass * shared.coeff_order_size], rect, ac_rows,
+                                 shared.ac_strategy, frame_header->chroma_subsampling,
+                                 &group_caches_[thread].num_nzeroes,
+                                 &enc_state_->passes[idx_pass].ac_tokens[group_index], enc_state_->shared.quant_dc,
+                                 enc_state_->shared.raw_quant_field, enc_state_->shared.block_ctx_map);
+        }
+    };
+    RunOnPool(pool, 0, shared.frame_dim.num_groups, tokenize_group_init, tokenize_group, "TokenizeGroup");
+
+    writer->AppendByteAligned(lossy_frame_encoder.State()->special_frames);
+    frame_header->UpdateFlag(lossy_frame_encoder.State()->shared.image_features.patches.HasAny(),
+                             FrameHeader::kPatches);
+    frame_header->UpdateFlag(lossy_frame_encoder.State()->shared.image_features.splines.HasAny(),
+                             FrameHeader::kSplines);
+    JXL_RETURN_IF_ERROR(WriteFrameHeader(*frame_header, writer, aux_out));
+
+    const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+
+    // DC global info + DC groups + AC global info + AC groups *
+    // num_passes.
+    const bool has_ac_global = true;
+    std::vector<BitWriter> group_codes(
+        NumTocEntries(frame_dim.num_groups, frame_dim.num_dc_groups, num_passes, has_ac_global));
+    const size_t global_ac_index = frame_dim.num_dc_groups + 1;
+    const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+    const auto get_output = [&](const size_t index) { return &group_codes[is_small_image ? 0 : index]; };
+    auto ac_group_code = [&](size_t pass, size_t group) {
+        return get_output(AcGroupIndex(pass, group, frame_dim.num_groups, frame_dim.num_dc_groups, has_ac_global));
+    };
+
+    if (frame_header->flags & FrameHeader::kPatches) {
+        PatchDictionaryEncoder::Encode(lossy_frame_encoder.State()->shared.image_features.patches, get_output(0),
+                                       kLayerDictionary, aux_out);
+    }
+
+    if (frame_header->flags & FrameHeader::kSplines) {
+        EncodeSplines(lossy_frame_encoder.State()->shared.image_features.splines, get_output(0), kLayerSplines,
+                      HistogramParams(), aux_out);
+    }
+
+    if (frame_header->flags & FrameHeader::kNoise) {
+        EncodeNoise(lossy_frame_encoder.State()->shared.image_features.noise_params, get_output(0), kLayerNoise,
+                    aux_out);
+    }
+
+    JXL_RETURN_IF_ERROR(DequantMatricesEncodeDC(&lossy_frame_encoder.State()->shared.matrices, get_output(0),
+                                                kLayerDequantTables, aux_out));
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        JXL_RETURN_IF_ERROR(lossy_frame_encoder.EncodeGlobalDCInfo(*frame_header, get_output(0)));
+    }
+    JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeGlobalInfo(get_output(0), aux_out));
+    JXL_RETURN_IF_ERROR(
+        modular_frame_encoder->EncodeStream(get_output(0), aux_out, kLayerModularGlobal, ModularStreamId::Global()));
+
+    const auto process_dc_group = [&](const int group_index, const int thread) {
+        AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+        BitWriter* output = get_output(group_index + 1);
+        if (frame_header->encoding == FrameEncoding::kVarDCT && !(frame_header->flags & FrameHeader::kUseDcFrame)) {
+            BitWriter::Allotment allotment(output, 2);
+            output->Write(2, modular_frame_encoder->extra_dc_precision[group_index]);
+            ReclaimAndCharge(output, &allotment, kLayerDC, my_aux_out);
+            JXL_CHECK(modular_frame_encoder->EncodeStream(output, my_aux_out, kLayerDC,
+                                                          ModularStreamId::VarDCTDC(group_index)));
+        }
+        JXL_CHECK(modular_frame_encoder->EncodeStream(output, my_aux_out, kLayerModularDcGroup,
+                                                      ModularStreamId::ModularDC(group_index)));
+        if (frame_header->encoding == FrameEncoding::kVarDCT) {
+            const Rect& rect = lossy_frame_encoder.State()->shared.DCGroupRect(group_index);
+            size_t nb_bits = CeilLog2Nonzero(rect.xsize() * rect.ysize());
+            if (nb_bits != 0) {
+                BitWriter::Allotment allotment(output, nb_bits);
+                output->Write(nb_bits, modular_frame_encoder->ac_metadata_size[group_index] - 1);
+                ReclaimAndCharge(output, &allotment, kLayerControlFields, my_aux_out);
+            }
+            JXL_CHECK(modular_frame_encoder->EncodeStream(output, my_aux_out, kLayerControlFields,
+                                                          ModularStreamId::ACMetadata(group_index)));
+        }
+    };
+    RunOnPool(pool, 0, frame_dim.num_dc_groups, resize_aux_outs, process_dc_group, "EncodeDCGroup");
+
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        JXL_RETURN_IF_ERROR(
+            lossy_frame_encoder.EncodeGlobalACInfo(get_output(global_ac_index), modular_frame_encoder.get()));
+    }
+
+    std::atomic<int> num_errors{0};
+    const auto process_group = [&](const int group_index, const int thread) {
+        AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+
+        for (size_t i = 0; i < num_passes; i++) {
+            if (frame_header->encoding == FrameEncoding::kVarDCT) {
+                if (!lossy_frame_encoder.EncodeACGroup(i, group_index, ac_group_code(i, group_index), my_aux_out)) {
+                    num_errors.fetch_add(1, std::memory_order_relaxed);
+                    return;
+                }
+            }
+            // Write all modular encoded data (color?, alpha, depth, extra channels)
+            if (!modular_frame_encoder->EncodeStream(ac_group_code(i, group_index), my_aux_out, kLayerModularAcGroup,
+                                                     ModularStreamId::ModularAC(group_index, i))) {
+                num_errors.fetch_add(1, std::memory_order_relaxed);
+                return;
+            }
+        }
+    };
+    RunOnPool(pool, 0, num_groups, resize_aux_outs, process_group, "EncodeGroupCoefficients");
+
+    // Resizing aux_outs to 0 also Assimilates the array.
+    static_cast<void>(resize_aux_outs(0));
+    JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+    for (BitWriter& bw : group_codes) {
+        bw.ZeroPadToByte(); // end of group.
+    }
+
+    std::vector<coeff_order_t>* permutation_ptr = nullptr;
+    std::vector<coeff_order_t> permutation;
+    if (cparams.centerfirst && !(num_passes == 1 && num_groups == 1)) {
+        permutation_ptr = &permutation;
+        // Don't permute global DC/AC or DC.
+        permutation.resize(global_ac_index + 1);
+        std::iota(permutation.begin(), permutation.end(), 0);
+        std::vector<coeff_order_t> ac_group_order(num_groups);
+        std::iota(ac_group_order.begin(), ac_group_order.end(), 0);
+        size_t group_dim = frame_dim.group_dim;
+
+        // The center of the image is either given by parameters or chosen
+        // to be the middle of the image by default if center_x, center_y resp.
+        // are not provided.
+
+        int64_t imag_cx;
+        if (cparams.center_x != static_cast<size_t>(-1)) {
+            JXL_RETURN_IF_ERROR(cparams.center_x < ib.xsize());
+            imag_cx = cparams.center_x;
+        } else {
+            imag_cx = ib.xsize() / 2;
+        }
+
+        int64_t imag_cy;
+        if (cparams.center_y != static_cast<size_t>(-1)) {
+            JXL_RETURN_IF_ERROR(cparams.center_y < ib.ysize());
+            imag_cy = cparams.center_y;
+        } else {
+            imag_cy = ib.ysize() / 2;
+        }
+
+        // The center of the group containing the center of the image.
+        int64_t cx = (imag_cx / group_dim) * group_dim + group_dim / 2;
+        int64_t cy = (imag_cy / group_dim) * group_dim + group_dim / 2;
+        // This identifies in what area of the central group the center of the
+        // image
+        // lies in.
+        double direction = -std::atan2(imag_cy - cy, imag_cx - cx);
+        // This identifies the side of the central group the center of the image
+        // lies closest to. This can take values 0, 1, 2, 3 corresponding to left,
+        // bottom, right, top.
+        int64_t side = std::fmod((direction + 5 * kPi / 4), 2 * kPi) * 2 / kPi;
+        auto get_distance_from_center = [&](size_t gid) {
+            Rect r = passes_enc_state->shared.GroupRect(gid);
+            int64_t gcx = r.x0() + group_dim / 2;
+            int64_t gcy = r.y0() + group_dim / 2;
+            int64_t dx = gcx - cx;
+            int64_t dy = gcy - cy;
+            // The angle is determined by taking atan2 and adding an appropriate
+            // starting point depending on the side we want to start on.
+            double angle = std::remainder(std::atan2(dy, dx) + kPi / 4 + side * (kPi / 2), 2 * kPi);
+            // Concentric squares in clockwise order.
+            return std::make_pair(std::max(std::abs(dx), std::abs(dy)), angle);
+        };
+        std::sort(ac_group_order.begin(), ac_group_order.end(), [&](coeff_order_t a, coeff_order_t b) {
+            return get_distance_from_center(a) < get_distance_from_center(b);
+        });
+        std::vector<coeff_order_t> inv_ac_group_order(ac_group_order.size(), 0);
+        for (size_t i = 0; i < ac_group_order.size(); i++) {
+            inv_ac_group_order[ac_group_order[i]] = i;
+        }
+        for (size_t i = 0; i < num_passes; i++) {
+            size_t pass_start = permutation.size();
+            for (coeff_order_t v : inv_ac_group_order) {
+                permutation.push_back(pass_start + v);
+            }
+        }
+        std::vector<BitWriter> new_group_codes(group_codes.size());
+        for (size_t i = 0; i < permutation.size(); i++) {
+            new_group_codes[permutation[i]] = std::move(group_codes[i]);
+        }
+        group_codes = std::move(new_group_codes);
+    }
+
+    JXL_RETURN_IF_ERROR(WriteGroupOffsets(group_codes, permutation_ptr, writer, aux_out));
+    writer->AppendByteAligned(group_codes);
+    writer->ZeroPadToByte(); // end of frame.
+    return true;
+}
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_host.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_host.cpp
new file mode 100644
index 0000000000..c4c5a60e2c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_host.cpp
@@ -0,0 +1,308 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "acc_host.hpp"
+
+namespace jxl {
+void FindBestDequantMatrices(const CompressParams& cparams,
+                             const Image3F& opsin,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             DequantMatrices* dequant_matrices) {
+    // TODO(veluca): quant matrices for no-gaborish.
+    // TODO(veluca): heuristics for in-bitstream quant tables.
+    *dequant_matrices = DequantMatrices();
+    if (cparams.max_error_mode) {
+        // Set numerators of all quantization matrices to constant values.
+        float weights[3][1] = {
+            {1.0f / cparams.max_error[0]}, {1.0f / cparams.max_error[1]}, {1.0f / cparams.max_error[2]}};
+        DctQuantWeightParams dct_params(weights);
+        std::vector<QuantEncoding> encodings(DequantMatrices::kNum, QuantEncoding::DCT(dct_params));
+        DequantMatricesSetCustom(dequant_matrices, encodings, modular_frame_encoder);
+        float dc_weights[3] = {1.0f / cparams.max_error[0], 1.0f / cparams.max_error[1], 1.0f / cparams.max_error[2]};
+        DequantMatricesSetCustomDC(dequant_matrices, dc_weights);
+    }
+}
+
+bool DefaultEncoderHeuristics::HandlesColorConversion(const CompressParams& cparams, const ImageBundle& ib) {
+    return cparams.noise != Override::kOn && cparams.patches != Override::kOn &&
+           cparams.speed_tier >= SpeedTier::kWombat && cparams.resampling == 1 &&
+           cparams.color_transform == ColorTransform::kXYB && !cparams.modular_mode && !ib.HasAlpha();
+}
+
+Status acc_host(std::string xclbinPath,
+                Image3F& opsin,
+                LossyFrameEncoder& lossy_frame_encoder,
+                const ImageBundle* JXL_RESTRICT ib_or_linear,
+                ThreadPool* pool,
+                std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                BitWriter* writer,
+                AuxOut* aux_out,
+                std::unique_ptr<FrameHeader>& frame_header,
+                const FrameInfo& frame_info,
+                CompressParams cparams,
+                const std::vector<ImageF>* extra_channels,
+                PassesEncoderState* passes_enc_state,
+                FrameDimensions frame_dim,
+                const size_t num_groups,
+                const ImageBundle& ib,
+                std::vector<AuxOut>& aux_outs,
+                const std::function<Status(size_t)>& resize_aux_outs) {
+    acc_phase1(opsin, lossy_frame_encoder, cparams, frame_header, frame_info, ib_or_linear, ib, aux_out, pool);
+
+    acc_phase2(xclbinPath, opsin, lossy_frame_encoder, modular_frame_encoder, cparams, frame_header, extra_channels,
+               ib_or_linear, ib, pool, aux_out);
+
+    acc_phase3(xclbinPath, opsin, lossy_frame_encoder, modular_frame_encoder, cparams, frame_header, passes_enc_state,
+               frame_dim, writer, num_groups, aux_out, pool, aux_outs, ib, resize_aux_outs);
+
+    return true;
+}
+
+Status DefaultEncoderHeuristics::LossyFrameHeuristics(PassesEncoderState* enc_state,
+                                                      ModularFrameEncoder* modular_frame_encoder,
+                                                      const ImageBundle* original_pixels,
+                                                      Image3F* opsin,
+                                                      ThreadPool* pool,
+                                                      AuxOut* aux_out) {
+    PROFILER_ZONE("JxlLossyFrameHeuristics uninstrumented");
+
+    CompressParams& cparams = enc_state->cparams;
+    PassesSharedState& shared = enc_state->shared;
+
+    // Compute parameters for noise synthesis.
+    if (shared.frame_header.flags & FrameHeader::kNoise) {
+        PROFILER_ZONE("enc GetNoiseParam");
+        if (cparams.photon_noise_iso > 0) {
+            shared.image_features.noise_params =
+                SimulatePhotonNoise(opsin->xsize(), opsin->ysize(), cparams.photon_noise_iso);
+        } else {
+            // Don't start at zero amplitude since adding noise is expensive -- it
+            // significantly slows down decoding, and this is unlikely to
+            // completely go away even with advanced optimizations. After the
+            // kNoiseModelingRampUpDistanceRange we have reached the full level,
+            // i.e. noise is no longer represented by the compressed image, so we
+            // can add full noise by the noise modeling itself.
+            static const float kNoiseModelingRampUpDistanceRange = 0.6;
+            static const float kNoiseLevelAtStartOfRampUp = 0.25;
+            static const float kNoiseRampupStart = 1.0;
+            // TODO(user) test and properly select quality_coef with smooth
+            // filter
+            float quality_coef = 1.0f;
+            const float rampup = (cparams.butteraugli_distance - kNoiseRampupStart) / kNoiseModelingRampUpDistanceRange;
+            if (rampup < 1.0f) {
+                quality_coef = kNoiseLevelAtStartOfRampUp + (1.0f - kNoiseLevelAtStartOfRampUp) * rampup;
+            }
+            if (rampup < 0.0f) {
+                quality_coef = kNoiseRampupStart;
+            }
+            if (!GetNoiseParameter(*opsin, &shared.image_features.noise_params, quality_coef)) {
+                shared.frame_header.flags &= ~FrameHeader::kNoise;
+            }
+        }
+    }
+    if (enc_state->shared.frame_header.upsampling != 1 && !cparams.already_downsampled) {
+        // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+        // after noise, if necessary.
+        DownsampleImage(opsin, cparams.resampling);
+        PadImageToBlockMultipleInPlace(opsin);
+    }
+
+    const FrameDimensions& frame_dim = enc_state->shared.frame_dim;
+    size_t target_size = TargetSize(cparams, frame_dim);
+    size_t opsin_target_size = target_size;
+    if (cparams.target_size > 0 || cparams.target_bitrate > 0.0) {
+        cparams.target_size = opsin_target_size;
+    } else if (cparams.butteraugli_distance < 0) {
+        return JXL_FAILURE("Expected non-negative distance");
+    }
+
+#ifndef XLNX_DISABLE_BLK_DICT
+    // Find and subtract splines.
+    if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+        shared.image_features.splines = FindSplines(*opsin);
+        JXL_RETURN_IF_ERROR(shared.image_features.splines.SubtractFrom(opsin, shared.cmap));
+    }
+
+    // Find and subtract patches/dots.
+    if (ApplyOverride(cparams.patches, cparams.speed_tier <= SpeedTier::kSquirrel)) {
+        FindBestPatchDictionary(*opsin, enc_state, pool, aux_out);
+        PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin);
+    }
+#endif
+
+    static const float kAcQuant = 0.79f;
+    const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
+    Quantizer& quantizer = enc_state->shared.quantizer;
+    // We don't know the quant field yet, but for computing the global scale
+    // assuming that it will be the same as for Falcon mode is good enough.
+    quantizer.ComputeGlobalScaleAndQuant(quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
+
+    // TODO(veluca): we can now run all the code from here to FindBestQuantizer
+    // (excluded) one rect at a time. Do that.
+
+    // Dependency graph:
+    //
+    // input: either XYB or input image
+    //
+    // input image -> XYB [optional]
+    // XYB -> initial quant field
+    // XYB -> Gaborished XYB
+    // Gaborished XYB -> CfL1
+    // initial quant field, Gaborished XYB, CfL1 -> ACS
+    // initial quant field, ACS, Gaborished XYB -> EPF control field
+    // initial quant field -> adjusted initial quant field
+    // adjusted initial quant field, ACS -> raw quant field
+    // raw quant field, ACS, Gaborished XYB -> CfL2
+    //
+    // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
+
+    ArControlFieldHeuristics ar_heuristics;
+    AcStrategyHeuristics acs_heuristics;
+    CfLHeuristics cfl_heuristics;
+
+    if (!opsin->xsize()) {
+        JXL_ASSERT(HandlesColorConversion(cparams, *original_pixels));
+        *opsin = Image3F(RoundUpToBlockDim(original_pixels->xsize()), RoundUpToBlockDim(original_pixels->ysize()));
+        opsin->ShrinkTo(original_pixels->xsize(), original_pixels->ysize());
+        ToXYB(*original_pixels, pool, opsin, /*linear=*/nullptr);
+        PadImageToBlockMultipleInPlace(opsin);
+    }
+
+    // Compute an initial estimate of the quantization field.
+    // Call InitialQuantField only in Hare mode or slower. Otherwise, rely
+    // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
+    // mode.
+    if (cparams.speed_tier > SpeedTier::kHare || cparams.uniform_quant > 0) {
+        enc_state->initial_quant_field = ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+        float q = cparams.uniform_quant > 0 ? cparams.uniform_quant : kAcQuant / cparams.butteraugli_distance;
+        FillImage(q, &enc_state->initial_quant_field);
+    } else {
+        // Call this here, as it relies on pre-gaborish values.
+        float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
+        if (!shared.frame_header.loop_filter.gab) {
+            butteraugli_distance_for_iqf *= 0.73f;
+        }
+        enc_state->initial_quant_field = InitialQuantField(butteraugli_distance_for_iqf, *opsin, shared.frame_dim, pool,
+                                                           1.0f, &enc_state->initial_quant_masking);
+    }
+
+    // TODO(veluca): do something about animations.
+
+    // Apply inverse-gaborish.
+    if (shared.frame_header.loop_filter.gab) {
+        GaborishInverse(opsin, 0.9908511000000001f, pool);
+    }
+
+    cfl_heuristics.Init(*opsin);
+    acs_heuristics.Init(*opsin, enc_state);
+    ar_heuristics.PrepareForThreads(/*num_threads*/ 1);
+    cfl_heuristics.PrepareForThreads(/*num_threads*/ 1);
+
+    //  auto process_tile = [&](size_t tid, size_t thread) {
+    for (int tid = 0; tid < DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+                                DivCeil(enc_state->shared.frame_dim.ysize_blocks, kEncTileDimInBlocks);
+         tid++) {
+        size_t thread = 0;
+        size_t n_enc_tiles = DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
+        size_t tx = tid % n_enc_tiles;
+        size_t ty = tid / n_enc_tiles;
+        size_t by0 = ty * kEncTileDimInBlocks;
+        size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, enc_state->shared.frame_dim.ysize_blocks);
+        size_t bx0 = tx * kEncTileDimInBlocks;
+        size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, enc_state->shared.frame_dim.xsize_blocks);
+        Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+
+        // For speeds up to Wombat, we only compute the color correlation map
+        // once we know the transform type and the quantization map.
+        if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+            //      cfl_heuristics.ComputeTile(r, *opsin, enc_state->shared.matrices,
+            //                                 /*ac_strategy=*/nullptr,
+            //                                 /*quantizer=*/nullptr, /*fast=*/false, thread,
+            //                                 &enc_state->shared.cmap);
+        }
+
+// Choose block sizes.
+//    acs_heuristics.ProcessRect(r);
+
+// Choose amount of post-processing smoothing.
+// TODO(veluca): should this go *after* AdjustQuantField?
+#ifndef XLNX_DISABLE_ARC
+        ar_heuristics.RunRect(r, *opsin, enc_state, thread);
+#else
+        ImageB* JXL_RESTRICT epf_sharpness = &enc_state->shared.epf_sharpness;
+        FillPlane(static_cast<uint8_t>(4), epf_sharpness, r);
+#endif
+        // Always set the initial quant field, so we can compute the CfL map with
+        // more accuracy. The initial quant field might change in slower modes, but
+        // adjusting the quant field with butteraugli when all the other encoding
+        // parameters are fixed is likely a more reliable choice anyway.
+        AdjustQuantField(enc_state->shared.ac_strategy, r, &enc_state->initial_quant_field);
+        quantizer.SetQuantFieldRect(enc_state->initial_quant_field, r, &enc_state->shared.raw_quant_field);
+
+// Compute a non-default CfL map if we are at Hare speed, or slower.
+#ifndef XLNX_DISABLE_2NDCMP
+        if (cparams.speed_tier <= SpeedTier::kHare) {
+            cfl_heuristics.ComputeTile(
+                r, *opsin, enc_state->shared.matrices, &enc_state->shared.ac_strategy, &enc_state->shared.quantizer,
+                /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread, &enc_state->shared.cmap);
+        }
+#endif
+    };
+    /*  RunOnPool(pool, 0, DivCeil(enc_state->shared.frame_dim.xsize_blocks,
+                                 kEncTileDimInBlocks) *
+                             DivCeil(enc_state->shared.frame_dim.ysize_blocks,
+                                     kEncTileDimInBlocks),
+                [&](const size_t num_threads) {
+                  ar_heuristics.PrepareForThreads(num_threads);
+                  cfl_heuristics.PrepareForThreads(num_threads);
+                  return true;
+                },
+                process_tile, "Enc Heuristics");*/
+
+    acs_heuristics.Finalize(aux_out);
+    if (cparams.speed_tier <= SpeedTier::kHare) {
+        cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat, &enc_state->shared.cmap);
+    }
+
+    FindBestDequantMatrices(cparams, *opsin, modular_frame_encoder, &enc_state->shared.matrices);
+
+    // Refine quantization levels.
+    FindBestQuantizer(original_pixels, *opsin, enc_state, pool, aux_out);
+
+    // Choose a context model that depends on the amount of quantization for AC.
+    if (cparams.speed_tier < SpeedTier::kFalcon) {
+        FindBestBlockEntropyModel(*enc_state);
+    }
+
+#ifdef XLNX_DEBUG_CMAP
+    std::cout << "=========================================" << std::endl;
+    std::cout << "ColorMap info: " << std::endl;
+    ImageSB* JXL_RESTRICT tmp_map = &enc_state->shared.cmap.ytox_map;
+    int32_t dc = enc_state->shared.cmap.GetYToXDC();
+    std::cout << "Y to X dc: " << dc << std::endl;
+    for (int i = 0; i < tmp_map->ysize(); i++) {
+        int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+        for (int j = 0; j < tmp_map->xsize(); j++) {
+            std::cout << (int)row_out[j] << " ";
+        }
+        std::cout << std::endl;
+    }
+
+    tmp_map = &enc_state->shared.cmap.ytox_map;
+    dc = enc_state->shared.cmap.GetYToBDC();
+    std::cout << "Y to B dc: " << dc << std::endl;
+    for (int i = 0; i < tmp_map->ysize(); i++) {
+        int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+        for (int j = 0; j < tmp_map->xsize(); j++) {
+            std::cout << (int)row_out[j] << " ";
+        }
+        std::cout << std::endl;
+    }
+    std::cout << std::endl;
+#endif
+
+    return true;
+}
+} // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase1.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase1.cpp
new file mode 100644
index 0000000000..a37f251c20
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase1.cpp
@@ -0,0 +1,276 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef HLS_KERNEL1_CPP
+#define HLS_KERNEL1_CPP
+
+#include "acc_phase1.hpp"
+
+namespace jxl {
+namespace {
+// Invisible (alpha = 0) pixels tend to be a mess in optimized PNGs.
+// Since they have no visual impact whatsoever, we can replace them with
+// something that compresses better and reduces artifacts near the edges. This
+// does some kind of smooth stuff that seems to work.
+// Replace invisible pixels with a weighted average of the pixel to the left,
+// the pixel to the topright, and non-invisible neighbours.
+// Produces downward-blurry smears, with in the upwards direction only a 1px
+// edge duplication but not more. It would probably be better to smear in all
+// directions. That requires an alpha-weighed convolution with a large enough
+// kernel though, which might be overkill...
+void SimplifyInvisible(Image3F* image, const ImageF& alpha, bool lossless) {
+    for (size_t c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < image->ysize(); ++y) {
+            float* JXL_RESTRICT row = image->PlaneRow(c, y);
+            const float* JXL_RESTRICT prow = (y > 0 ? image->PlaneRow(c, y - 1) : nullptr);
+            const float* JXL_RESTRICT nrow = (y + 1 < image->ysize() ? image->PlaneRow(c, y + 1) : nullptr);
+            const float* JXL_RESTRICT a = alpha.Row(y);
+            const float* JXL_RESTRICT pa = (y > 0 ? alpha.Row(y - 1) : nullptr);
+            const float* JXL_RESTRICT na = (y + 1 < image->ysize() ? alpha.Row(y + 1) : nullptr);
+            for (size_t x = 0; x < image->xsize(); ++x) {
+                if (a[x] == 0) {
+                    if (lossless) {
+                        row[x] = 0;
+                        continue;
+                    }
+                    float d = 0.f;
+                    row[x] = 0;
+                    if (x > 0) {
+                        row[x] += row[x - 1];
+                        d++;
+                        if (a[x - 1] > 0.f) {
+                            row[x] += row[x - 1];
+                            d++;
+                        }
+                    }
+                    if (x + 1 < image->xsize()) {
+                        if (y > 0) {
+                            row[x] += prow[x + 1];
+                            d++;
+                        }
+                        if (a[x + 1] > 0.f) {
+                            row[x] += 2.f * row[x + 1];
+                            d += 2.f;
+                        }
+                        if (y > 0 && pa[x + 1] > 0.f) {
+                            row[x] += 2.f * prow[x + 1];
+                            d += 2.f;
+                        }
+                        if (y + 1 < image->ysize() && na[x + 1] > 0.f) {
+                            row[x] += 2.f * nrow[x + 1];
+                            d += 2.f;
+                        }
+                    }
+                    if (y > 0 && pa[x] > 0.f) {
+                        row[x] += 2.f * prow[x];
+                        d += 2.f;
+                    }
+                    if (y + 1 < image->ysize() && na[x] > 0.f) {
+                        row[x] += 2.f * nrow[x];
+                        d += 2.f;
+                    }
+                    if (d > 1.f) row[x] /= d;
+                }
+            }
+        }
+    }
+}
+} // namespace
+
+Status acc_phase1(Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const FrameInfo& frame_info,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  AuxOut* aux_out,
+                  ThreadPool* pool) {
+    const ColorEncoding& c_linear = ColorEncoding::LinearSRGB(ib.IsGray());
+    std::unique_ptr<ImageMetadata> metadata_linear = jxl::make_unique<ImageMetadata>();
+    metadata_linear->xyb_encoded = (cparams.color_transform == ColorTransform::kXYB);
+    metadata_linear->color_encoding = c_linear;
+    ImageBundle linear_storage(metadata_linear.get());
+
+    // Allocating a large enough image avoids a copy when padding.
+    opsin = Image3F(RoundUpToBlockDim(ib.xsize()), RoundUpToBlockDim(ib.ysize()));
+    opsin.ShrinkTo(ib.xsize(), ib.ysize());
+
+    const bool want_linear =
+        frame_header->encoding == FrameEncoding::kVarDCT && cparams.speed_tier <= SpeedTier::kKitten;
+    ib_or_linear = &ib;
+
+    if (frame_header->color_transform == ColorTransform::kXYB && frame_info.ib_needs_color_transform) {
+        // linear_storage would only be used by the Butteraugli loop (passing
+        // linear sRGB avoids a color conversion there). Otherwise, don't
+        // fill it to reduce memory usage.
+        ib_or_linear = ToXYB(ib, pool, &opsin, want_linear ? &linear_storage : nullptr);
+    } else { // RGB or YCbCr: don't do anything (forward YCbCr is not
+             // implemented, this is only used when the input is already in
+             // YCbCr)
+             // If encoding a special DC or reference frame, don't do anything:
+             // input is already in XYB.
+        CopyImageTo(ib.color(), &opsin);
+    }
+    bool lossless = (frame_header->encoding == FrameEncoding::kModular && cparams.quality_pair.first == 100);
+    if (ib.HasAlpha() && !ib.AlphaIsPremultiplied() && !ApplyOverride(cparams.keep_invisible, lossless) &&
+        cparams.ec_resampling == cparams.resampling) {
+        // simplify invisible pixels
+        SimplifyInvisible(&opsin, ib.alpha(), lossless);
+        if (want_linear) {
+            SimplifyInvisible(const_cast<Image3F*>(&ib_or_linear->color()), ib.alpha(), lossless);
+        }
+    }
+    if (aux_out != nullptr) {
+        JXL_RETURN_IF_ERROR(aux_out->InspectImage3F("enc_frame:OpsinDynamicsImage", opsin));
+    }
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        PadImageToBlockMultipleInPlace(&opsin);
+        PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+        //  std::vector<EncCache>& group_caches_ =
+        //  lossy_frame_encoder.get_group_cashes();
+
+        JXL_ASSERT((opsin.xsize() % kBlockDim) == 0 && (opsin.ysize() % kBlockDim) == 0);
+        PassesSharedState& shared = enc_state_->shared;
+
+        if (!enc_state_->cparams.max_error_mode) {
+            float x_qm_scale_steps[3] = {0.65f, 1.25f, 9.0f};
+            shared.frame_header.x_qm_scale = 1;
+            for (float x_qm_scale_step : x_qm_scale_steps) {
+                if (enc_state_->cparams.butteraugli_distance > x_qm_scale_step) {
+                    shared.frame_header.x_qm_scale++;
+                }
+            }
+        }
+
+        Image3F* opsin_ = &opsin;
+        //  CompressParams& cparams = enc_state->cparams;
+        //  PassesSharedState& shared = enc_state->shared;
+
+        // Compute parameters for noise synthesis.
+        if (shared.frame_header.flags & FrameHeader::kNoise) {
+            PROFILER_ZONE("enc GetNoiseParam");
+            if (cparams.photon_noise_iso > 0) {
+                shared.image_features.noise_params =
+                    SimulatePhotonNoise(opsin_->xsize(), opsin_->ysize(), cparams.photon_noise_iso);
+            } else {
+                // Don't start at zero amplitude since adding noise is expensive -- it
+                // significantly slows down decoding, and this is unlikely to
+                // completely go away even with advanced optimizations. After the
+                // kNoiseModelingRampUpDistanceRange we have reached the full level,
+                // i.e. noise is no longer represented by the compressed image, so we
+                // can add full noise by the noise modeling itself.
+                static const float kNoiseModelingRampUpDistanceRange = 0.6;
+                static const float kNoiseLevelAtStartOfRampUp = 0.25;
+                static const float kNoiseRampupStart = 1.0;
+                // TODO(user) test and properly select quality_coef with smooth
+                // filter
+                float quality_coef = 1.0f;
+                const float rampup =
+                    (cparams.butteraugli_distance - kNoiseRampupStart) / kNoiseModelingRampUpDistanceRange;
+                if (rampup < 1.0f) {
+                    quality_coef = kNoiseLevelAtStartOfRampUp + (1.0f - kNoiseLevelAtStartOfRampUp) * rampup;
+                }
+                if (rampup < 0.0f) {
+                    quality_coef = kNoiseRampupStart;
+                }
+                if (!GetNoiseParameter(*opsin_, &shared.image_features.noise_params, quality_coef)) {
+                    shared.frame_header.flags &= ~FrameHeader::kNoise;
+                }
+            }
+        }
+        if (enc_state_->shared.frame_header.upsampling != 1 && !cparams.already_downsampled) {
+            // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+            // after noise, if necessary.
+            DownsampleImage(opsin_, cparams.resampling);
+            PadImageToBlockMultipleInPlace(opsin_);
+        }
+
+        const FrameDimensions& frame_dim_ = enc_state_->shared.frame_dim;
+        size_t target_size = TargetSize(cparams, frame_dim_);
+        size_t opsin_target_size = target_size;
+        if (cparams.target_size > 0 || cparams.target_bitrate > 0.0) {
+            cparams.target_size = opsin_target_size;
+        } else if (cparams.butteraugli_distance < 0) {
+            return JXL_FAILURE("Expected non-negative distance");
+        }
+
+#ifndef XLNX_DISABLE_BLK_DICT
+        // Find and subtract splines.
+        if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+            shared.image_features.splines = FindSplines(*opsin_);
+            JXL_RETURN_IF_ERROR(shared.image_features.splines.SubtractFrom(opsin_, shared.cmap));
+        }
+
+        // Find and subtract patches/dots.
+        if (ApplyOverride(cparams.patches, cparams.speed_tier <= SpeedTier::kSquirrel)) {
+            FindBestPatchDictionary(*opsin_, enc_state_, pool, aux_out);
+            PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin_);
+        }
+#endif
+
+        static const float kAcQuant = 0.79f;
+        const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
+        Quantizer& quantizer = enc_state_->shared.quantizer;
+        // We don't know the quant field yet, but for computing the global scale
+        // assuming that it will be the same as for Falcon mode is good enough.
+        quantizer.ComputeGlobalScaleAndQuant(quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
+
+        // TODO(veluca): we can now run all the code from here to FindBestQuantizer
+        // (excluded) one rect at a time. Do that.
+
+        // Dependency graph:
+        //
+        // input: either XYB or input image
+        //
+        // input image -> XYB [optional]
+        // XYB -> initial quant field
+        // XYB -> Gaborished XYB
+        // Gaborished XYB -> CfL1
+        // initial quant field, Gaborished XYB, CfL1 -> ACS
+        // initial quant field, ACS, Gaborished XYB -> EPF control field
+        // initial quant field -> adjusted initial quant field
+        // adjusted initial quant field, ACS -> raw quant field
+        // raw quant field, ACS, Gaborished XYB -> CfL2
+        //
+        // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
+
+        if (!opsin_->xsize()) {
+            JXL_ASSERT(enc_state_->heuristics->HandlesColorConversion(cparams, *ib_or_linear));
+            *opsin_ = Image3F(RoundUpToBlockDim(ib_or_linear->xsize()), RoundUpToBlockDim(ib_or_linear->ysize()));
+            opsin_->ShrinkTo(ib_or_linear->xsize(), ib_or_linear->ysize());
+            ToXYB(*ib_or_linear, pool, opsin_, /*linear=*/nullptr);
+            PadImageToBlockMultipleInPlace(opsin_);
+        }
+
+        // Compute an initial estimate of the quantization field.
+        // Call InitialQuantField only in Hare mode or slower. Otherwise, rely
+        // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
+        // mode.
+        if (cparams.speed_tier > SpeedTier::kHare || cparams.uniform_quant > 0) {
+            enc_state_->initial_quant_field = ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+            float q = cparams.uniform_quant > 0 ? cparams.uniform_quant : kAcQuant / cparams.butteraugli_distance;
+            FillImage(q, &enc_state_->initial_quant_field);
+        } else {
+            // Call this here, as it relies on pre-gaborish values.
+            float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
+            if (!shared.frame_header.loop_filter.gab) {
+                butteraugli_distance_for_iqf *= 0.73f;
+            }
+            enc_state_->initial_quant_field = InitialQuantField(butteraugli_distance_for_iqf, *opsin_, shared.frame_dim,
+                                                                pool, 1.0f, &enc_state_->initial_quant_masking);
+        }
+
+        // TODO(veluca): do something about animations.
+
+        // Apply inverse-gaborish.
+        if (shared.frame_header.loop_filter.gab) {
+            GaborishInverse(opsin_, 0.9908511000000001f, pool);
+        }
+    }
+    return true;
+}
+} // namespace jxl
+#endif
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase2.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase2.cpp
new file mode 100644
index 0000000000..545403e15e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase2.cpp
@@ -0,0 +1,415 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_PHASE2_CPP
+#define ACC_PHASE2_CPP
+
+#include "acc_phase2.hpp"
+
+namespace jxl {
+
+Status acc_phase2(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  const std::vector<ImageF>* extra_channels,
+                  const ImageBundle* JXL_RESTRICT ib_or_linear,
+                  const ImageBundle& ib,
+                  ThreadPool* pool,
+                  AuxOut* aux_out) {
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        std::vector<EncCache>& group_caches_ = lossy_frame_encoder.get_group_cashes();
+        PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+        PassesSharedState& shared = enc_state_->shared;
+        Image3F* opsin_ = &opsin;
+        Quantizer& quantizer = enc_state_->shared.quantizer;
+
+        size_t tile_xsize = (opsin.xsize() + 63) / 64 * 64;
+        size_t tile_ysize = (opsin.ysize() + 63) / 64 * 64;
+#ifdef XLNX_QC_DEBUG_DCT
+/*std::cout << std::endl
+          << "======================================== full origin pixel "
+             "=============================================="
+          << std::endl;
+for (int c = 0; c < 3; c++) {
+  if (c == 0) {
+    std::cout << std::setw(15) << 0 << " ";
+    for (int m = 0; m < tile_xsize; m++) {
+      std::cout << std::setw(15) << m << " ";
+    }
+    std::cout << std::endl << std::endl;
+
+    for (int y = 0; y < tile_ysize; y++) {
+      std::cout << std::setw(15) << y << " ";
+      const float* JXL_RESTRICT row_y = opsin.ConstPlaneRow(c, y);
+      for (int x = 0; x < tile_xsize; x++) {
+        std::cout << std::setw(15) << row_y[x] << " ";
+      }
+      std::cout << std::endl;
+    }
+    std::cout << std::endl;
+  }
+}*/
+#endif
+
+        std::vector<std::vector<float> > dctIDT(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct2x2(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct4x4(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct8x8(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct16x16(3, std::vector<float>(tile_xsize * tile_ysize));
+        std::vector<std::vector<float> > dct32x32(3, std::vector<float>(tile_xsize * tile_ysize));
+
+        std::vector<std::vector<float> > dcIDT(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc2x2(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc4x4(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc8x8(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc16x16(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+        std::vector<std::vector<float> > dc32x32(3, std::vector<float>((tile_xsize * tile_ysize + 63) / 64 * 64));
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 8) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 8) {
+                    float* mem = (float*)calloc(8UL * 8UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::IDENTITY);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 64; m++) {
+                        dctIDT[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + m] = mem[m];
+                    }
+                    dcIDT[c][y / 8 * tile_xsize / 8 + x / 8] = dc_mem[0];
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 8) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 8) {
+                    float* mem = (float*)calloc(8UL * 8UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT2X2);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 64; m++) {
+                        dct2x2[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + m] = mem[m];
+                    }
+                    dc2x2[c][y / 8 * tile_xsize / 8 + x / 8] = dc_mem[0];
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 8) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 8) {
+                    float* mem = (float*)calloc(8UL * 8UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT4X4);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 64; m++) {
+                        dct4x4[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + m] = mem[m];
+                    }
+                    dc4x4[c][y / 8 * tile_xsize / 8 + x / 8] = dc_mem[0];
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 8) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 8) {
+                    float* mem = (float*)calloc(8UL * 8UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 64; m++) {
+                        dct8x8[c][64 * (y / 8 * (tile_xsize / 8) + x / 8) + m] = mem[m];
+                    }
+                    dc8x8[c][y / 8 * (tile_xsize / 8) + x / 8] = dc_mem[0];
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 16) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 16) {
+                    float* mem = (float*)calloc(16UL * 16UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT16X16);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 16 * 16; m++) {
+                        dct16x16[c][16 * 16 * (y / 16 * (tile_xsize / 16) + x / 16) + m] = mem[m];
+                    }
+                    for (int m = 0; m < 4; m++) {
+                        dc16x16[c][4 * (y / 16 * (tile_xsize / 16) + x / 16) + m] = dc_mem[m];
+                    }
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+        for (int c = 0; c < 3; c++) {
+            for (size_t y = 0; y < tile_ysize; y = y + 32) {
+                const float* JXL_RESTRICT row = opsin.ConstPlaneRow(c, y);
+                size_t stride = opsin.PixelsPerRow();
+
+                for (size_t x = 0; x < tile_xsize; x = x + 32) {
+                    float* mem = (float*)calloc(32UL * 32UL, sizeof(float));
+                    float* dc_mem =
+                        (float*)calloc(AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks, sizeof(float));
+                    float* scratch_space = (float*)calloc(2048UL, sizeof(float));
+                    AcStrategy acs = AcStrategy::FromRawStrategy(AcStrategy::Type::DCT32X32);
+                    size_t xs = acs.covered_blocks_x();
+                    N_SCALAR::TransformFromPixels(acs.Strategy(), row + x, stride, mem, scratch_space);
+                    N_SCALAR::DCFromLowestFrequencies(acs.Strategy(), mem, dc_mem, xs);
+                    for (int m = 0; m < 32 * 32; m++) {
+                        dct32x32[c][32 * 32 * (y / 32 * (tile_xsize / 32) + x / 32) + m] = mem[m];
+                    }
+                    for (int m = 0; m < 16; m++) {
+                        dc32x32[c][16 * (y / 32 * (tile_xsize / 32) + x / 32) + m] = dc_mem[m];
+                    }
+                    free(mem);
+                    free(dc_mem);
+                    free(scratch_space);
+                }
+            }
+        }
+
+#ifdef XLNX_QC_DEBUG_DCT
+        std::cout << std::endl
+                  << "======================================== full coef "
+                     "=============================================="
+                  << std::endl;
+        for (int c = 0; c < 3; c++) {
+            if (c == 1) {
+                std::cout << std::setw(15) << 0 << " ";
+                for (int m = 0; m < tile_xsize; m++) {
+                    std::cout << std::setw(15) << m << " ";
+                }
+                std::cout << std::endl << std::endl;
+                for (int y = 0; y < tile_ysize; y++) {
+                    std::cout << std::setw(15) << y << " ";
+                    for (int x = 0; x < tile_xsize; x++) {
+                        std::cout << std::setw(15) << dct8x8[c][y * tile_xsize + x] << " ";
+                    }
+                    std::cout << std::endl;
+                }
+            }
+        }
+#endif
+
+#ifdef XLNX_QC_DEBUG_DC
+        std::cout << std::endl
+                  << "======================================== full DC "
+                     "=============================================="
+                  << std::endl;
+        for (int c = 0; c < 3; c++) {
+            if (c == 1) {
+                std::cout << std::setw(15) << 0 << " ";
+                for (int m = 0; m < tile_xsize / 8; m++) {
+                    std::cout << std::setw(15) << m << " ";
+                }
+                std::cout << std::endl << std::endl;
+                for (int y = 0; y < tile_ysize / 8; y++) {
+                    std::cout << std::setw(15) << y << " ";
+                    for (int x = 0; x < tile_xsize / 8; x++) {
+                        std::cout << std::setw(15) << dc32x32[c][y * tile_xsize / 8 + x] << " ";
+                    }
+                    std::cout << std::endl;
+                }
+            }
+        }
+#endif
+
+        ArControlFieldHeuristics ar_heuristics;
+        AcStrategyHeuristics acs_heuristics;
+        CfLHeuristics cfl_heuristics;
+
+        cfl_heuristics.Init(*opsin_);
+        acs_heuristics.Init(*opsin_, enc_state_);
+        ar_heuristics.PrepareForThreads(/*num_threads*/ 1);
+        cfl_heuristics.PrepareForThreads(/*num_threads*/ 1);
+
+        //  auto process_tile = [&](size_t tid, size_t thread) {
+        for (int tid = 0; tid < DivCeil(enc_state_->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+                                    DivCeil(enc_state_->shared.frame_dim.ysize_blocks, kEncTileDimInBlocks);
+             tid++) {
+            size_t thread = 0;
+            size_t n_enc_tiles = DivCeil(enc_state_->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
+            size_t tx = tid % n_enc_tiles;
+            size_t ty = tid / n_enc_tiles;
+            size_t by0 = ty * kEncTileDimInBlocks;
+            size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, enc_state_->shared.frame_dim.ysize_blocks);
+            size_t bx0 = tx * kEncTileDimInBlocks;
+            size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, enc_state_->shared.frame_dim.xsize_blocks);
+            Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+
+            // For speeds up to Wombat, we only compute the color correlation map
+            // once we know the transform type and the quantization map.
+            if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+                cfl_heuristics.ComputeTile(r, *opsin_, enc_state_->shared.matrices,
+                                           /*ac_strategy=*/nullptr,
+                                           /*quantizer=*/nullptr, /*fast=*/false, thread, &enc_state_->shared.cmap,
+                                           opsin.xsize(), opsin.ysize(), dctIDT, dct2x2, dct4x4, dct8x8, dct16x16,
+                                           dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+            }
+
+            // Choose block sizes.
+            acs_heuristics.ProcessRect(r, opsin.xsize(), opsin.ysize(), dctIDT, dct2x2, dct4x4, dct8x8, dct16x16,
+                                       dct32x32, dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+
+// Choose amount of post-processing smoothing.
+// TODO(veluca): should this go *after* AdjustQuantField?
+#ifndef XLNX_DISABLE_ARC
+            ar_heuristics.RunRect(r, *opsin_, enc_state_, thread);
+#else
+            ImageB* JXL_RESTRICT epf_sharpness = &enc_state_->shared.epf_sharpness;
+            FillPlane(static_cast<uint8_t>(4), epf_sharpness, r);
+#endif
+            // Always set the initial quant field, so we can compute the CfL map
+            // with more accuracy. The initial quant field might change in slower
+            // modes, but adjusting the quant field with butteraugli when all the
+            // other encoding parameters are fixed is likely a more reliable choice
+            // anyway.
+            AdjustQuantField(enc_state_->shared.ac_strategy, r, &enc_state_->initial_quant_field);
+            quantizer.SetQuantFieldRect(enc_state_->initial_quant_field, r, &enc_state_->shared.raw_quant_field);
+
+// Compute a non-default CfL map if we are at Hare speed, or slower.
+#ifndef XLNX_DISABLE_2NDCMP
+            if (cparams.speed_tier <= SpeedTier::kHare) {
+                cfl_heuristics.ComputeTile(r, *opsin_, enc_state_->shared.matrices, &enc_state_->shared.ac_strategy,
+                                           &enc_state_->shared.quantizer,
+                                           /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread,
+                                           &enc_state_->shared.cmap, dctIDT, dct2x2, dct4x4, dct8x8, dct16x16, dct32x32,
+                                           dcIDT, dc2x2, dc4x4, dc8x8, dc16x16, dc32x32);
+            }
+#endif
+        };
+        /*  RunOnPool(pool, 0, DivCeil(enc_state_->shared.frame_dim.xsize_blocks,
+                                     kEncTileDimInBlocks) *
+                                 DivCeil(enc_state_->shared.frame_dim.ysize_blocks,
+                                         kEncTileDimInBlocks),
+                    [&](const size_t num_threads) {
+                      ar_heuristics.PrepareForThreads(num_threads);
+                      cfl_heuristics.PrepareForThreads(num_threads);
+                      return true;
+                    },
+                    process_tile, "Enc Heuristics");*/
+
+        acs_heuristics.Finalize(aux_out);
+        if (cparams.speed_tier <= SpeedTier::kHare) {
+            cfl_heuristics.ComputeDC(
+                /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, &enc_state_->shared.cmap);
+        }
+
+        FindBestDequantMatrices(cparams, *opsin_, modular_frame_encoder.get(), &enc_state_->shared.matrices);
+
+        // Refine quantization levels.
+        FindBestQuantizer(ib_or_linear, *opsin_, enc_state_, pool, aux_out);
+
+        // Choose a context model that depends on the amount of quantization for
+        // AC.
+        if (cparams.speed_tier < SpeedTier::kFalcon) {
+            FindBestBlockEntropyModel(*enc_state_);
+        }
+
+#ifdef XLNX_DEBUG_CMAP
+        std::cout << "=========================================" << std::endl;
+        std::cout << "ColorMap info: " << std::endl;
+        ImageSB* JXL_RESTRICT tmp_map = &enc_state_->shared.cmap.ytox_map;
+        int32_t dc = enc_state_->shared.cmap.GetYToXDC();
+        std::cout << "Y to X dc: " << dc << std::endl;
+        for (int i = 0; i < tmp_map->ysize(); i++) {
+            int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+            for (int j = 0; j < tmp_map->xsize(); j++) {
+                std::cout << (int)row_out[j] << " ";
+            }
+            std::cout << std::endl;
+        }
+
+        tmp_map = &enc_state_->shared.cmap.ytox_map;
+        dc = enc_state_->shared.cmap.GetYToBDC();
+        std::cout << "Y to B dc: " << dc << std::endl;
+        for (int i = 0; i < tmp_map->ysize(); i++) {
+            int8_t* JXL_RESTRICT row_out = tmp_map->Row(i);
+            for (int j = 0; j < tmp_map->xsize(); j++) {
+                std::cout << (int)row_out[j] << " ";
+            }
+            std::cout << std::endl;
+        }
+        std::cout << std::endl;
+#endif
+
+        InitializePassesEncoder(opsin, pool, enc_state_, modular_frame_encoder.get(), aux_out, opsin.xsize(),
+                                opsin.ysize(), dctIDT, dct2x2, dct4x4, dct8x8, dct16x16, dct32x32, dcIDT, dc2x2, dc4x4,
+                                dc8x8, dc16x16, dc32x32);
+
+        enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+        for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+            pass.ac_tokens.resize(shared.frame_dim.num_groups);
+        }
+
+        lossy_frame_encoder.ComputeAllCoeffOrders(shared.frame_dim);
+        shared.num_histograms = 1;
+
+        *frame_header = shared.frame_header;
+
+        // needs to happen *AFTER* VarDCT-ComputeEncodingData.
+        JXL_RETURN_IF_ERROR(modular_frame_encoder->ComputeEncodingData(
+            *frame_header, *ib.metadata(), &opsin, *extra_channels, lossy_frame_encoder.State(), pool, aux_out,
+            /* do_color=*/frame_header->encoding == FrameEncoding::kModular));
+    }
+    return true;
+}
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase3.cpp b/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase3.cpp
new file mode 100644
index 0000000000..ee15e2154a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/others/src/host_acc_tokInit_histogram/acc_phase3.cpp
@@ -0,0 +1,1572 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef ACC_PHASE3_CPP
+#define ACC_PHASE3_CPP
+
+#include "acc_phase3.hpp"
+
+#include <sys/time.h>
+
+#include "acc_cluster_histogram.hpp"
+#include "acc_store_encode_data.hpp"
+#include "lib/jxl/lehmer_code.h"
+#include "host_tokinit_histogram.hpp"
+
+void test(int* in, int* out);
+
+inline int tvdiff(struct timeval* tv0, struct timeval* tv1) {
+    return (tv1->tv_sec - tv0->tv_sec) * 1000000 + (tv1->tv_usec - tv0->tv_usec);
+}
+
+namespace jxl {
+namespace {
+size_t IndexOf(const std::vector<uint8_t>& v, uint8_t value) {
+    size_t i = 0;
+    for (; i < v.size(); ++i) {
+        if (v[i] == value) return i;
+    }
+    return i;
+}
+
+void MoveToFront(std::vector<uint8_t>* v, size_t index) {
+    uint8_t value = (*v)[index];
+    for (size_t i = index; i != 0; --i) {
+        (*v)[i] = (*v)[i - 1];
+    }
+    (*v)[0] = value;
+}
+
+std::vector<uint8_t> MoveToFrontTransform(const std::vector<uint8_t>& v) {
+    if (v.empty()) return v;
+    uint8_t max_value = *std::max_element(v.begin(), v.end());
+    std::vector<uint8_t> mtf(max_value + 1);
+    for (size_t i = 0; i <= max_value; ++i) mtf[i] = i;
+    std::vector<uint8_t> result(v.size());
+    for (size_t i = 0; i < v.size(); ++i) {
+        size_t index = IndexOf(mtf, v[i]);
+        JXL_ASSERT(index < mtf.size());
+        result[i] = static_cast<uint8_t>(index);
+        MoveToFront(&mtf, index);
+    }
+    return result;
+}
+} // namespace
+
+namespace {
+
+void acc_TokenizePermutation(const coeff_order_t* JXL_RESTRICT order,
+                             size_t skip,
+                             size_t size,
+                             std::vector<Token>* tokens) {
+    std::vector<LehmerT> lehmer(size);
+    std::vector<uint32_t> temp(size + 1);
+    ComputeLehmerCode(order, temp.data(), size, lehmer.data());
+    size_t end = size;
+    while (end > skip && lehmer[end - 1] == 0) {
+        --end;
+    }
+    tokens->emplace_back(CoeffOrderContext(size), end - skip);
+    uint32_t last = 0;
+    for (size_t i = skip; i < end; ++i) {
+        tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
+        last = lehmer[i];
+    }
+}
+
+} // namespace
+
+namespace {
+void acc_EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order,
+                          AcStrategy acs,
+                          std::vector<Token>* tokens,
+                          coeff_order_t* order_zigzag) {
+    const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+    const size_t size = kDCTBlockSize * llf;
+    const coeff_order_t* natural_coeff_order_lut = acs.NaturalCoeffOrderLut();
+    for (size_t i = 0; i < size; ++i) {
+        order_zigzag[i] = natural_coeff_order_lut[order[i]];
+    }
+    acc_TokenizePermutation(order_zigzag, llf, size, tokens);
+}
+} // namespace
+
+Status acc_predictAndtoken(LossyFrameEncoder& lossy_frame_encoder,
+                           std::unique_ptr<FrameHeader>& frame_header,
+                           std::vector<std::vector<Token> >& coefOrders_tokens,
+                           ThreadPool* pool) {
+    std::vector<EncCache>& group_caches_ = lossy_frame_encoder.get_group_cashes();
+    PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+    PassesSharedState& shared = enc_state_->shared;
+
+    //------------------------------------------------------------------------//
+    // Tokenize coefficients.
+    //------------------------------------------------------------------------//
+    group_caches_.resize(1); // multi-thread settings, would be further removed
+    int thread = 0;
+    for (int group_index = 0; group_index < shared.frame_dim.num_groups; group_index++) {
+        const Rect rect = shared.BlockGroupRect(group_index);
+        for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size(); idx_pass++) {
+            const int32_t* JXL_RESTRICT ac_rows[3] = {
+                enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+                enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+                enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+            };
+
+            // Ensure group cache is initialized.
+            group_caches_[thread].InitOnce();
+            const coeff_order_t* acc_coeff_orders = &shared.coeff_orders[idx_pass * shared.coeff_order_size];
+            TokenizeCoefficients(&shared.coeff_orders[idx_pass * shared.coeff_order_size], rect, ac_rows,
+                                 shared.ac_strategy, frame_header->chroma_subsampling,
+                                 &group_caches_[thread].num_nzeroes,
+                                 &enc_state_->passes[idx_pass].ac_tokens[group_index], enc_state_->shared.quant_dc,
+                                 enc_state_->shared.raw_quant_field, enc_state_->shared.block_ctx_map);
+        }
+    };
+
+    const coeff_order_t* JXL_RESTRICT order = &enc_state_->shared.coeff_orders[0 * enc_state_->shared.coeff_order_size];
+    auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
+    uint16_t computed = 0;
+    uint16_t used_orders = enc_state_->used_orders[0];
+
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+            uint8_t ord = kStrategyOrder[o];
+            if (computed & (1 << ord)) continue;
+            computed |= 1 << ord;
+            if ((used_orders & (1 << ord)) == 0) continue;
+            AcStrategy acs = AcStrategy::FromRawStrategy(o);
+            for (size_t c = 0; c < 3; c++) {
+                acc_EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &coefOrders_tokens[0], mem.get());
+            }
+        }
+    }
+    return true;
+}
+
+BitWriter* get_output(const size_t index, std::vector<BitWriter>& group_codes, bool is_small_image) {
+    return &group_codes[is_small_image ? 0 : index];
+}
+
+Status acc_histogram(LossyFrameEncoder& lossy_frame_encoder,
+                     std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                     PassesEncoderState* passes_enc_state,
+                     FrameDimensions frame_dim,
+                     std::unique_ptr<FrameHeader>& frame_header,
+                     CompressParams cparams,
+                     std::vector<std::vector<Token> >& coefOrders_tokens,
+                     BitWriter* group_codes_writer,
+                     BitWriter* acInfo_writer,
+                     size_t& ans_cost,
+                     size_t& mtf_cost,
+                     std::vector<std::vector<Token> >& bcm_tokens,
+                     std::vector<std::vector<Token> >& bcm_mtf_tokens,
+                     EntropyEncodingData& bcm_codes,
+                     std::vector<uint8_t>& bcm_dummy_context_map,
+
+                     EntropyEncodingData& modularFramTree_code,
+                     std::vector<uint8_t>& modularFramTree_ctxmap,
+
+                     EntropyEncodingData& coefOrders_codes,
+                     std::vector<uint8_t>& coefOrders_context_map,
+
+                     std::vector<AuxOut>& aux_outs,
+                     AuxOut* aux_out,
+                     std::string xclbinPath) {
+    std::vector<EncCache>& group_caches_ = lossy_frame_encoder.get_group_cashes();
+    PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+    PassesSharedState& shared = enc_state_->shared;
+
+    const coeff_order_t* JXL_RESTRICT order = &enc_state_->shared.coeff_orders[0 * enc_state_->shared.coeff_order_size];
+    auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
+    uint16_t computed = 0;
+    uint16_t used_orders = enc_state_->used_orders[0];
+
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+            uint8_t ord = kStrategyOrder[o];
+            if (computed & (1 << ord)) continue;
+            computed |= 1 << ord;
+            if ((used_orders & (1 << ord)) == 0) continue;
+            AcStrategy acs = AcStrategy::FromRawStrategy(o);
+            for (size_t c = 0; c < 3; c++) {
+                acc_EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &coefOrders_tokens[0], mem.get());
+            }
+        }
+    }
+
+    HistogramParams params0;
+    params0.clustering = HistogramParams::ClusteringType::kFast;
+    params0.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params0.lz77_method = HistogramParams::LZ77Method::kNone;
+    HistogramParams params1;
+    params1.clustering = HistogramParams::ClusteringType::kFast;
+    params1.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params1.lz77_method = HistogramParams::LZ77Method::kNone;
+    HistogramParams params2;
+    params2.clustering = HistogramParams::ClusteringType::kFast;
+    params2.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params2.lz77_method = HistogramParams::LZ77Method::kNone;
+    HistogramParams params3;
+    params3.clustering = HistogramParams::ClusteringType::kFast;
+    params3.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params3.lz77_method = HistogramParams::LZ77Method::kNone;
+    HistogramParams params4(enc_state_->cparams.speed_tier, enc_state_->shared.block_ctx_map.NumACContexts());
+    if (enc_state_->cparams.decoding_speed_tier >= 1) {
+        params4.max_histograms = 6;
+    }
+
+    params4.clustering = HistogramParams::ClusteringType::kFast;
+    params4.uint_method = HistogramParams::HybridUintMethod::kNone;
+    params4.lz77_method = HistogramParams::LZ77Method::kNone;
+    std::vector<uint8_t> context_map0;
+    std::vector<uint8_t> context_map1;
+    std::vector<uint8_t> context_map2;
+    std::vector<uint8_t> context_map3;
+    std::vector<uint8_t> context_map4;
+    std::vector<uint8_t> context_map_c0;
+    std::vector<uint8_t> context_map_c1;
+    std::vector<uint8_t> context_map_c2;
+    std::vector<uint8_t> context_map_c3;
+    std::vector<uint8_t> context_map_c4;
+    std::vector<std::vector<Token> > tokens0(1);
+    std::vector<std::vector<Token> > tokens1(1);
+    std::vector<std::vector<Token> > tokens2(1);
+    std::vector<std::vector<Token> > tokens3(1);
+    std::vector<std::vector<Token> > tokens4(1);
+    std::vector<std::vector<Token> > tokens_c0(1);
+    std::vector<std::vector<Token> > tokens_c1(1);
+    std::vector<std::vector<Token> > tokens_c2(1);
+    std::vector<std::vector<Token> > tokens_c3(1);
+    std::vector<std::vector<Token> > tokens_c4(1);
+    EntropyEncodingData codes0;
+    EntropyEncodingData codes1;
+    EntropyEncodingData codes2;
+    EntropyEncodingData codes3;
+    EntropyEncodingData codes4;
+    EntropyEncodingData codes_c0;
+    EntropyEncodingData codes_c1;
+    EntropyEncodingData codes_c2;
+    EntropyEncodingData codes_c3;
+    EntropyEncodingData codes_c4;
+    std::vector<Histogram> clustered_histograms0;
+    std::vector<Histogram> clustered_histograms1;
+    std::vector<Histogram> clustered_histograms2;
+    std::vector<Histogram> clustered_histograms3;
+    std::vector<Histogram> clustered_histograms4;
+    std::vector<Histogram> clustered_histograms_c0;
+    std::vector<Histogram> clustered_histograms_c1;
+    std::vector<Histogram> clustered_histograms_c2;
+    std::vector<Histogram> clustered_histograms_c3;
+    std::vector<Histogram> clustered_histograms_c4;
+    BitWriter* writer0 = nullptr;
+    BitWriter* writer1 = nullptr;
+    BitWriter* writer2 = nullptr;
+    BitWriter* writer3 = nullptr;
+    BitWriter* writer4 = nullptr;
+    size_t layer0 = 0;
+    size_t layer1 = 0;
+    size_t layer2 = 0;
+    size_t layer3 = 0;
+    size_t layer4 = 0;
+    size_t num_contexts0 = 1;
+    size_t num_contexts1 = 1;
+    size_t num_contexts2 = 1;
+    size_t num_contexts3 = 1;
+    size_t num_contexts4 = 1;
+    bool do_once[5] = {0, 0, 0, 0, 0};
+    char* do_inner = (char*)malloc(sizeof(char) * 8);
+    for (int i = 0; i < 5; i++) do_inner[i] = 0;
+    char* do_prefix_in = (char*)malloc(sizeof(char) * 8);
+    for (int i = 0; i < 5; i++) do_prefix_in[i] = 0;
+    char* do_prefix_out = (char*)malloc(sizeof(char) * 8);
+    for (int i = 0; i < 5; i++) do_prefix_out[i] = 0;
+
+    const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+    const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+
+    if (!is_small_image) {
+        group_codes_writer->init(200);
+        group_codes_writer->update_part(0);
+    } else {
+        group_codes_writer->init(200);
+        group_codes_writer->update_part(0);
+    }
+
+    bool all_default = true;
+    const float* dc_quant = (lossy_frame_encoder.State()->shared.matrices).DCQuants();
+    for (size_t c = 0; c < 3; c++) {
+        if (dc_quant[c] != kDCQuant[c]) {
+            all_default = false;
+        }
+    }
+    BitWriter::Allotment allotment(group_codes_writer, 1 + sizeof(float) * kBitsPerByte * 3);
+    group_codes_writer->Write(1, all_default);
+    if (!all_default) {
+        for (size_t c = 0; c < 3; c++) {
+            JXL_RETURN_IF_ERROR(F16Coder::Write(dc_quant[c] * 128.0f, group_codes_writer));
+        }
+    }
+    ReclaimAndCharge(group_codes_writer, &allotment, kLayerDequantTables, aux_out);
+
+    auto& dct = enc_state_->shared.block_ctx_map.dc_thresholds;
+    auto& qft = enc_state_->shared.block_ctx_map.qf_thresholds;
+    auto& ctx_map = enc_state_->shared.block_ctx_map.ctx_map;
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        JXL_RETURN_IF_ERROR(enc_state_->shared.quantizer.Encode(group_codes_writer, kLayerQuant, aux_out));
+        //============Encode GlobalDCInfo: Block Context Map=========
+        if (dct[0].empty() && dct[1].empty() && dct[2].empty() && qft.empty() && ctx_map.size() == 21 &&
+            std::equal(ctx_map.begin(), ctx_map.end(), jxl::kDefaultCtxMap)) {
+            group_codes_writer->Write(1, 1); // default
+        } else {
+            group_codes_writer->Write(1, 0);
+            for (int j : {0, 1, 2}) {
+                group_codes_writer->Write(4, dct[j].size());
+                for (int i : dct[j]) {
+                    JXL_CHECK(U32Coder::Write(kDCThresholdDist, PackSigned(i), group_codes_writer));
+                }
+            }
+            group_codes_writer->Write(4, qft.size());
+            for (uint32_t i : qft) {
+                JXL_CHECK(U32Coder::Write(kQFThresholdDist, i - 1, group_codes_writer));
+            }
+            for (size_t i = 0; i < ctx_map.size(); i++) {
+                bcm_tokens[0].emplace_back(0, ctx_map[i]);
+            }
+
+            {
+                std::vector<uint8_t> context_map = ctx_map;
+                BitWriter* writer = group_codes_writer;
+                writer0 = group_codes_writer;
+                size_t num_histograms = enc_state_->shared.block_ctx_map.num_ctxs;
+                if (num_histograms == 1) {
+                    // Simple code
+                    writer->Write(1, 1);
+                    // 0 bits per entry.
+                    writer->Write(2, 0);
+                } else {
+                    std::vector<std::vector<Token> > tokens(1);
+                    for (size_t i = 0; i < context_map.size(); i++) {
+                        tokens[0].emplace_back(0, context_map[i]);
+                    }
+
+                    size_t entry_bits = CeilLog2Nonzero(num_histograms);
+                    size_t simple_cost = entry_bits * context_map.size();
+                    if (entry_bits < 4) {
+                        writer->Write(1, 1);
+                        writer->Write(2, entry_bits);
+                        for (size_t i = 0; i < context_map.size(); i++) {
+                            writer->Write(entry_bits, context_map[i]);
+                        }
+                    } else {
+                        writer->Write(1, 0);
+                        writer->Write(1, 0);
+                        EntropyEncodingData context_codes0;
+                        std::vector<std::vector<Token> > context_tokens0(1);
+                        do_once[0] = true;
+                        num_contexts0 = 1;
+                        tokens0 = tokens;
+                        codes0 = bcm_codes;
+                        context_map0 = bcm_dummy_context_map;
+                        // codes_c0 = context_codes0;
+                        // writer0 = writer;
+                        layer0 = 0;
+
+                        // BuildAndEncodeHistogramsNew0
+                        // =========================================================
+                    }
+                }
+            }
+        }
+        //=============================
+        //============Encode GlobalDCInfo: Color Correlation Map=========
+        if (!is_small_image) {
+            group_codes_writer->update_part(20);
+        } else {
+            group_codes_writer->update_part(20);
+        }
+        ColorCorrelationMapEncodeDC(&enc_state_->shared.cmap, group_codes_writer, kLayerDC, aux_out);
+        //=============================
+    }
+
+    if (!is_small_image) {
+        group_codes_writer->update_part(30);
+    } else {
+        group_codes_writer->update_part(30);
+    }
+
+    writer1 = group_codes_writer;
+    writer2 = group_codes_writer;
+    BitWriter::Allotment allotmentGlobalInfo(group_codes_writer, 1);
+    // If we are using brotli, or not using modular mode.
+    if (modular_frame_encoder->tree_tokens.empty() || modular_frame_encoder->tree_tokens[0].empty()) {
+        group_codes_writer->Write(1, 0);
+        ReclaimAndCharge(group_codes_writer, &allotmentGlobalInfo, kLayerModularTree, aux_out);
+    } else {
+        group_codes_writer->Write(1, 1);
+        ReclaimAndCharge(group_codes_writer, &allotmentGlobalInfo, kLayerModularTree, aux_out);
+        // Write tree
+        if (cparams.speed_tier > SpeedTier::kKitten) {
+            params1.ans_histogram_strategy = HistogramParams::ANSHistogramStrategy::kApproximate;
+            params2.ans_histogram_strategy = HistogramParams::ANSHistogramStrategy::kApproximate;
+        }
+
+        if (cparams.decoding_speed_tier >= 1) {
+            params1.max_histograms = 12;
+            params2.max_histograms = 12;
+        }
+
+        EntropyEncodingData context_codes1;
+        std::vector<std::vector<Token> > context_tokens1(1);
+        std::vector<uint8_t> dummy_context_map1;
+
+        do_once[1] = true;
+        num_contexts1 = kNumTreeContexts;
+        tokens1 = modular_frame_encoder->tree_tokens;
+        codes1 = modularFramTree_code;
+        context_map1 = modularFramTree_ctxmap;
+        ////codes_c0 = context_codes0;
+        ////writer0 = writer;
+        layer1 = kLayerModularTree;
+
+        // BuildAndEncodeHistogramsNew1
+
+        if (!is_small_image) {
+            group_codes_writer->update_part(50);
+        } else {
+            group_codes_writer->update_part(50);
+        }
+        params2.image_widths = modular_frame_encoder->image_widths;
+        // Write histograms.
+        EntropyEncodingData context_codes2;
+        std::vector<std::vector<Token> > context_tokens2(1);
+        std::vector<uint8_t> dummy_context_map2;
+
+        do_once[2] = true;
+        num_contexts2 = (modular_frame_encoder->tree.size() + 1) / 2;
+        tokens2 = modular_frame_encoder->tokens;
+        codes2 = modular_frame_encoder->code;
+        context_map2 = modular_frame_encoder->context_map;
+        ////codes_c0 = context_codes0;
+        ////writer0 = writer;
+        layer2 = kLayerModularGlobal;
+
+        // BuildAndEncodeHistogramsNew2
+    }
+
+    //============================= Encode Global ACInfo =============
+    if (!is_small_image) {
+        acInfo_writer->init(200);
+        acInfo_writer->update_part(0);
+    } else {
+        acInfo_writer->update_part(80);
+    }
+    writer3 = acInfo_writer;
+    writer4 = acInfo_writer;
+
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        bool all_default = true;
+        const std::vector<QuantEncoding>& encodings = (enc_state_->shared.matrices).encodings();
+
+        for (size_t i = 0; i < encodings.size(); i++) {
+            if (encodings[i].mode != QuantEncoding::kQuantModeLibrary || encodings[i].predefined != 0) {
+                all_default = false;
+            }
+        }
+        // TODO(janwas): better bound
+        BitWriter::Allotment allotment(acInfo_writer, 512 * 1024);
+        acInfo_writer->Write(1, all_default);
+        ReclaimAndCharge(acInfo_writer, &allotment, kLayerDequantTables, aux_out);
+
+        size_t num_histo_bits = CeilLog2Nonzero(enc_state_->shared.frame_dim.num_groups);
+        if (num_histo_bits != 0) {
+            BitWriter::Allotment allotment(acInfo_writer, num_histo_bits);
+            acInfo_writer->Write(num_histo_bits, enc_state_->shared.num_histograms - 1);
+            ReclaimAndCharge(acInfo_writer, &allotment, kLayerAC, aux_out);
+        }
+
+        //============= encode coef orders========
+        // Encode coefficient orders.
+        uint16_t used_orders = enc_state_->used_orders[0];
+        size_t order_bits = 0;
+        JXL_RETURN_IF_ERROR(U32Coder::CanEncode(kOrderEnc, enc_state_->used_orders[0], &order_bits));
+        BitWriter::Allotment allotmentCoef(acInfo_writer, order_bits);
+        JXL_CHECK(U32Coder::Write(kOrderEnc, enc_state_->used_orders[0], acInfo_writer));
+        ReclaimAndCharge(acInfo_writer, &allotmentCoef, kLayerOrder, aux_out);
+
+        // Do not write anything if no order is used.
+        EntropyEncodingData context_codes3;
+        std::vector<std::vector<Token> > context_tokens3(1);
+        std::vector<uint8_t> dummy_context_map3;
+        do_once[3] = true;
+        num_contexts3 = kPermutationContexts;
+        tokens3 = coefOrders_tokens;
+        codes3 = coefOrders_codes;
+        context_map3 = coefOrders_context_map;
+        ////codes_c0 = context_codes0;
+        ////writer0 = writer;
+        layer3 = kLayerOrder;
+        // BuildAndEncodeHistogramsNew3
+
+        if (!is_small_image) {
+            acInfo_writer->update_part(20);
+        } else {
+            acInfo_writer->update_part(100);
+        }
+    }
+
+    std::vector<std::vector<Histogram> > histograms_(5);
+    histograms_[0].resize(num_contexts0);
+    histograms_[1].resize(num_contexts1);
+    histograms_[2].resize(num_contexts2);
+    histograms_[3].resize(num_contexts3);
+    histograms_[4].resize(enc_state_->shared.num_histograms * enc_state_->shared.block_ctx_map.NumACContexts());
+
+    std::vector<HistogramParams> params(5);
+    std::vector<size_t> num_contexts(5);
+    std::vector<size_t> layer(5);
+    std::vector<EntropyEncodingData*> codes(5);
+    std::vector<std::vector<uint8_t>*> context_map(5);
+    std::vector<EntropyEncodingData*> codes_c(5);
+    std::vector<BitWriter*> writer(5);
+    writer[0] = writer0;
+    writer[1] = writer1;
+    writer[2] = writer2;
+    writer[3] = writer3;
+    writer[4] = writer4;
+
+    std::vector<std::vector<uint32_t> > nonempty_histograms(5);
+    std::vector<uint32_t> largest_idx(5);
+
+    std::vector<std::vector<Histogram> > clustered_histograms(5);
+    std::vector<std::vector<uint32_t> > histogram_symbols(5);
+
+    std::vector<std::vector<Histogram> > clustered_histogramsin(5);
+    std::vector<std::vector<std::vector<Token> > > tokensin(5, std::vector<std::vector<Token> >(1));
+    std::vector<EntropyEncodingData> codesin(5);
+    std::vector<std::vector<uint8_t> > context_map_in(5);
+
+    constexpr float kMinDistanceForDistinctFast = 64.0f;
+    constexpr float kMinDistanceForDistinctBest = 16.0f;
+
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        do_once[4] = true;
+    }
+
+    // Build histograms.
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+        if (i == 0) {
+            params[0] = params0;
+            num_contexts[0] = num_contexts0;
+            layer[0] = layer0;
+            codes[0] = &codes0;
+            context_map[0] = &context_map0;
+            codes_c[0] = &codes_c0;
+        } else if (i == 1) {
+            params[1] = params1;
+            num_contexts[1] = num_contexts1;
+            layer[1] = layer1;
+            codes[1] = &codes1;
+            context_map[1] = &context_map1;
+            codes_c[1] = &codes_c1;
+        } else if (i == 2) {
+            params[2] = params2;
+            num_contexts[2] = num_contexts2;
+            layer[2] = layer2;
+            codes[2] = &codes2;
+            context_map[2] = &context_map2;
+            codes_c[2] = &codes_c2;
+        } else if (i == 3) {
+            params[3] = params3;
+            num_contexts[3] = num_contexts3;
+            layer[3] = layer3;
+            codes[3] = &codes3;
+            context_map[3] = &context_map3;
+            codes_c[3] = &codes_c3;
+        } else if (i == 4) {
+            params[4] = params4;
+            num_contexts[4] = num_contexts4;
+            layer[4] = kLayerAC;
+            codes[4] = &enc_state_->passes[0].codes;
+            context_map[4] = &enc_state_->passes[0].context_map;
+            codes_c[4] = &codes_c4;
+        }
+    }
+
+    // lossy_frame  ---- > mem
+
+    int config[32];
+
+    // config init
+    int pixel_xsize = shared.frame_dim.xsize;
+    int pixel_ysize = shared.frame_dim.ysize;
+    int group_dim = shared.frame_dim.group_dim;
+    int xsize_blocks = shared.frame_dim.xsize_blocks;
+    int ysize_blocks = shared.frame_dim.ysize_blocks;
+
+    config[0] = enc_state_->shared.quant_dc.bytes_per_row();
+    config[1] = enc_state_->shared.raw_quant_field.bytes_per_row();
+    config[4] = group_dim;
+    config[5] = pixel_xsize;
+    config[6] = pixel_ysize;
+    config[7] = enc_state_->shared.block_ctx_map.num_ctxs;
+    config[8] = enc_state_->shared.block_ctx_map.num_dc_ctxs;
+    config[9] = enc_state_->shared.block_ctx_map.qf_thresholds.size();
+    config[10] = 32; // nzero_stride
+    config[11] = enc_state_->shared.block_ctx_map.ctx_map.size();
+    config[12] = do_once[0];
+    config[13] = do_once[1];
+    config[14] = do_once[2];
+    config[15] = do_once[3];
+    config[16] = do_once[4];
+    /*
+     uint32_t nempty_cnt_ptr[5];
+     uint32_t largest_idx_ptr[5];
+     */
+
+    // orders
+    uint32_t* coeff_orders_ddr = new uint32_t[MAX_ORDERS_SIZE];
+    const coeff_order_t* JXL_RESTRICT orders = &shared.coeff_orders[0];
+    for (int i = 0; i < MAX_ORDERS_SIZE; i++) {
+        coeff_orders_ddr[i] = orders[i];
+    }
+
+    // ac_coef
+    int32_t* ac_coeff_ordered_ddr = new int32_t[ALL_PIXEL];
+    uint64_t group_offset_1 = 0;
+    for (int group_index = 0; group_index < shared.frame_dim.num_groups; group_index++) {
+        const Rect rect = shared.BlockGroupRect(group_index);
+        const int32_t* ac_rows[3] = {
+            enc_state_->coeffs[0]->PlaneRow(0, group_index, 0).ptr32,
+            enc_state_->coeffs[0]->PlaneRow(1, group_index, 0).ptr32,
+            enc_state_->coeffs[0]->PlaneRow(2, group_index, 0).ptr32,
+        };
+        int rect_xsize_block = rect.xsize();
+        int rect_ysize_block = rect.ysize();
+
+        uint32_t offset[3] = {0, 0, 0};
+        int acc_block_offset = 0;
+        for (int by = 0; by < rect_ysize_block; by++) {
+            int by_offset = by * rect_xsize_block * kDCTBlockSize * 3;
+            for (int bx = 0; bx < rect_xsize_block; bx++) {
+                int bx_offset = bx * kDCTBlockSize * 3;
+                for (int c : {1, 0, 2}) {
+                    // strategy
+                    AcStrategyRow acs_row = shared.ac_strategy.ConstRow(rect, by);
+                    AcStrategy acs = acs_row[bx];
+
+                    // cx & cy
+                    uint32_t cx = acs.covered_blocks_x();
+                    uint32_t cy = acs.covered_blocks_y();
+
+                    // covered blocks
+                    const uint32_t covered_blocks = cx * cy; // = #LLF coefficients
+
+                    // size
+                    const uint32_t size = covered_blocks * kDCTBlockSize;
+
+                    // ordered_coef
+                    int ord = kStrategyOrder[acs.RawStrategy()];
+                    const coeff_order_t* JXL_RESTRICT order = &orders[CoeffOrderOffset(ord, c)];
+
+                    // block address
+                    if (acs.IsFirstBlock()) {
+                        const int32_t* block = ac_rows[c] + offset[c];
+                        for (int k = 0; k < covered_blocks * kDCTBlockSize; k++) {
+                            ac_coeff_ordered_ddr[k + acc_block_offset + group_offset_1] = block[order[k]];
+                        }
+                        acc_block_offset += size;
+                        offset[c] += size;
+                    }
+                }
+            }
+        }
+        group_offset_1 += rect_ysize_block * rect_xsize_block * kDCTBlockSize * 3;
+    }
+
+    // ac_strategy
+    int32_t* strategy_ddr = new int32_t[MAX_NUM_BLK88];
+    int group_offset_0 = 0;
+    for (int group_index = 0; group_index < shared.frame_dim.num_groups; group_index++) {
+        const Rect rect = shared.BlockGroupRect(group_index);
+        int rect_xsize_block = rect.xsize();
+        int rect_ysize_block = rect.ysize();
+        for (int by = 0; by < rect_ysize_block; by++) {
+            for (int bx = 0; bx < rect_xsize_block; bx++) {
+                AcStrategyRow acs_row = shared.ac_strategy.ConstRow(rect, by);
+                AcStrategy acs = acs_row[bx];
+                strategy_ddr[group_offset_0 + by * rect_xsize_block + bx] = acs.Strategy();
+            }
+        }
+
+        group_offset_0 += rect_ysize_block * rect_xsize_block;
+    }
+
+    // quant field & quant dc
+    const int32_t* qf_rows = enc_state_->shared.raw_quant_field.ConstRow(0);
+    const uint8_t* qdc_rows = enc_state_->shared.quant_dc.ConstRow(0);
+
+    int32_t* qf_ddr = new int32_t[MAX_NUM_BLK88];
+    uint8_t* qdc_ddr = new uint8_t[MAX_NUM_BLK88];
+
+    int qdc_bytes_per_row = enc_state_->shared.quant_dc.bytes_per_row();
+    int qf_bytes_per_row = enc_state_->shared.raw_quant_field.bytes_per_row();
+
+    int group_offset_qdc_qf = 0;
+    for (int group_index = 0; group_index < shared.frame_dim.num_groups; group_index++) {
+        const Rect rect = shared.BlockGroupRect(group_index);
+        int rect_xsize_block = rect.xsize();
+        int rect_ysize_block = rect.ysize();
+        for (int by = 0; by < rect_ysize_block; by++) {
+            for (int bx = 0; bx < rect_xsize_block; bx++) {
+                // qdc & qf
+                int qdc_y_offset = (rect.y0() + by) * qdc_bytes_per_row + rect.x0();
+                uint8_t dc_idx = qdc_rows[bx + qdc_y_offset];
+                qdc_ddr[bx + by * rect_xsize_block + group_offset_qdc_qf] = dc_idx;
+
+                int qf_y_offset = (rect.y0() + by) * (qf_bytes_per_row >> 2) + rect.x0();
+                int32_t acc_qf = qf_rows[bx + qf_y_offset];
+                qf_ddr[bx + by * rect_xsize_block + group_offset_qdc_qf] = acc_qf;
+            }
+        }
+
+        group_offset_qdc_qf += rect_ysize_block * rect_xsize_block;
+    }
+
+    // block_ctx_map & qf_thresholds
+    uint8_t* acc_ctx_map = new uint8_t[MAX_CTX_MAP_SIZE];
+    uint32_t* qf_thresholds = new uint32_t[MAX_QF_THRESH_SIZE];
+
+    int ctx_map_size = enc_state_->shared.block_ctx_map.ctx_map.size();
+    int qf_thresholds_size = enc_state_->shared.block_ctx_map.qf_thresholds.size();
+
+    for (int i = 0; i < ctx_map_size; i++) {
+        acc_ctx_map[i] = enc_state_->shared.block_ctx_map.ctx_map[i];
+    }
+
+    for (int i = 0; i < qf_thresholds_size; i++) {
+        qf_thresholds[i] = enc_state_->shared.block_ctx_map.qf_thresholds[i];
+    }
+
+    // ac_token
+    uint64_t* ac_tokens_ddr = new uint64_t[MAX_AC_TOKEN_SIZE];
+
+    // tokenize
+    uint32_t token0_size = 0;
+    for (int i = 0; i < tokens0.size(); i++) {
+        token0_size += tokens0[i].size();
+    }
+
+    // ap_uint<64>* tokens0_ptr = (ap_uint<64>*)malloc(MAX_AC_TOKEN_SIZE * sizeof(ap_uint<64>));
+    uint64_t* tokens0_ptr = (uint64_t*)malloc(MAX_AC_TOKEN_SIZE * sizeof(uint64_t));
+    tokens0_ptr[0] = token0_size;
+    uint32_t cnt = 1;
+    for (int i = 0; i < tokens0.size(); i++) {
+        for (int j = 0; j < tokens0[i].size(); j++) {
+            ap_uint<64> reg;
+            reg.range(31, 0) = tokens0[i][j].value;
+            reg.range(62, 32) = tokens0[i][j].context;
+            reg[63] = tokens0[i][j].is_lz77_length;
+            tokens0_ptr[cnt] = (uint64_t)reg;
+            cnt++;
+        }
+    }
+
+    uint32_t token1_size = 0;
+    for (int i = 0; i < tokens1.size(); i++) {
+        token1_size += tokens1[i].size();
+    }
+
+    // for acc cosim
+    // token1_size = MAX_AC_TOKEN_SIZE;
+
+    // ap_uint<64>* tokens1_ptr = (ap_uint<64>*)malloc(MAX_AC_TOKEN_SIZE * sizeof(ap_uint<64>));
+    uint64_t* tokens1_ptr = (uint64_t*)malloc(MAX_AC_TOKEN_SIZE * sizeof(uint64_t));
+
+    tokens1_ptr[0] = token1_size;
+    cnt = 1;
+    for (int i = 0; i < tokens1.size(); i++) {
+        for (int j = 0; j < tokens1[i].size(); j++) {
+            ap_uint<64> reg;
+            reg.range(31, 0) = tokens1[i][j].value;
+            reg.range(62, 32) = tokens1[i][j].context;
+            reg[63] = tokens1[i][j].is_lz77_length;
+            tokens1_ptr[cnt] = (uint64_t)reg;
+            cnt++;
+        }
+    }
+
+    uint32_t token2_size = 0;
+    for (int i = 0; i < tokens2.size(); i++) {
+        token2_size += tokens2[i].size();
+    }
+    // ap_uint<64>* tokens2_ptr = (ap_uint<64>*)malloc(MAX_AC_TOKEN_SIZE * sizeof(ap_uint<64>));
+    uint64_t* tokens2_ptr = (uint64_t*)malloc(MAX_AC_TOKEN_SIZE * sizeof(uint64_t));
+    tokens2_ptr[0] = token2_size;
+    cnt = 1;
+    for (int i = 0; i < tokens2.size(); i++) {
+        for (int j = 0; j < tokens2[i].size(); j++) {
+            ap_uint<64> reg;
+            reg.range(31, 0) = tokens2[i][j].value;
+            reg.range(62, 32) = tokens2[i][j].context;
+            reg[63] = tokens2[i][j].is_lz77_length;
+            tokens2_ptr[cnt] = (uint64_t)reg;
+            cnt++;
+        }
+    }
+
+    uint32_t token3_size = 0;
+    for (int i = 0; i < tokens3.size(); i++) {
+        token3_size += tokens3[i].size();
+    }
+    // ap_uint<64>* tokens3_ptr = (ap_uint<64>*)malloc(MAX_AC_TOKEN_SIZE * sizeof(ap_uint<64>));
+    uint64_t* tokens3_ptr = (uint64_t*)malloc(MAX_AC_TOKEN_SIZE * sizeof(uint64_t));
+    tokens3_ptr[0] = token3_size;
+    cnt = 1;
+    for (int i = 0; i < tokens3.size(); i++) {
+        for (int j = 0; j < tokens3[i].size(); j++) {
+            ap_uint<64> reg;
+            reg.range(31, 0) = tokens3[i][j].value;
+            reg.range(62, 32) = tokens3[i][j].context;
+            reg[63] = tokens3[i][j].is_lz77_length;
+            tokens3_ptr[cnt] = (uint64_t)reg;
+            cnt++;
+        }
+    }
+
+    printf("==== token_size:%d, %d, %d, %d =====\n ", token0_size, token1_size, token2_size, token3_size);
+
+    std::vector<int32_t*> histograms_ptr(5);
+    std::vector<uint32_t*> histograms_size_ptr(5);
+    std::vector<uint32_t*> total_count_ptr(5);
+    std::vector<uint32_t*> nonempty_ptr(5);
+    uint32_t nempty_cnt_ptr[5];
+    uint32_t largest_idx_ptr[5];
+
+    for (int i = 0; i < 5; i++) {
+        histograms_ptr[i] = (int32_t*)malloc(4096 * 40 * sizeof(int32_t));
+        histograms_size_ptr[i] = (uint32_t*)malloc(4096 * sizeof(uint32_t));
+        total_count_ptr[i] = (uint32_t*)malloc(4096 * sizeof(uint32_t));
+        nonempty_ptr[i] = (uint32_t*)malloc(4096 * sizeof(uint32_t));
+    }
+
+    //=====================================s
+    // kernel_code
+    //=====================================
+    // std::string xclbinPath =
+    //     "/wrk/xsjhdnobkup3/tianminr/jxl_debug/xf_codec/L2/demos/jxlEnc/tokInit_histogram/"
+    //     "build_dir.sw_emu.xilinx_u50_gen3x16_xdma_201920_3/jxlEnc.xclbin";
+    // std::string xclbinPath =
+    //     "/wrk/xsjhdnobkup3/tianminr/jxl_debug/xf_codec/L2/demos/jxlEnc/tokInit_histogram/"
+    //     "build_dir.hw_emu.xilinx_u50_gen3x16_xdma_201920_3/jxlEnc.xclbin";
+    // std::string xclbinPath =
+    // "/wrk/xsjhdnobkup3/tianminr/jxl_hw/xf_codec/L2/demos/jxlEnc/tokInit_histogram/build_dir.hw.xilinx_u50_gen3x16_xdma_201920_3/jxlEnc.xclbin";
+
+    hls_ANSinitHistogram_wrapper(xclbinPath, config,
+                                 //====================
+                                 ac_coeff_ordered_ddr, strategy_ddr, qf_ddr, qdc_ddr, acc_ctx_map, qf_thresholds,
+                                 ac_tokens_ddr,
+                                 //====================
+                                 tokens0_ptr, tokens1_ptr, tokens2_ptr, tokens3_ptr,
+                                 //====================
+                                 histograms_ptr[0], histograms_size_ptr[0], total_count_ptr[0], nonempty_ptr[0],
+                                 //====================
+                                 histograms_ptr[1], histograms_size_ptr[1], total_count_ptr[1], nonempty_ptr[1],
+                                 //====================
+                                 histograms_ptr[2], histograms_size_ptr[2], total_count_ptr[2], nonempty_ptr[2],
+                                 //====================
+                                 histograms_ptr[3], histograms_size_ptr[3], total_count_ptr[3], nonempty_ptr[3],
+                                 //=====================
+                                 histograms_ptr[4], histograms_size_ptr[4], total_count_ptr[4], nonempty_ptr[4]);
+
+    // post-largeset & nempty
+    largest_idx_ptr[0] = config[17];
+    largest_idx_ptr[1] = config[18];
+    largest_idx_ptr[2] = config[19];
+    largest_idx_ptr[3] = config[20];
+    largest_idx_ptr[4] = config[21];
+
+    nempty_cnt_ptr[0] = config[22];
+    nempty_cnt_ptr[1] = config[23];
+    nempty_cnt_ptr[2] = config[24];
+    nempty_cnt_ptr[3] = config[25];
+    nempty_cnt_ptr[4] = config[26];
+
+    int all_end = 0;
+    int group_end = 0;
+    int idx = 0, vec_idx = 0;
+    int group_index = 0;
+    std::vector<std::vector<Token> >& acc_ac_tokens = enc_state_->passes[0].ac_tokens;
+    while (!all_end) {
+        uint64_t token_tmp = ac_tokens_ddr[idx];
+        all_end = token_tmp >> 63;
+        group_end = token_tmp >> 62;
+        if (group_end == 1) {
+            group_index++;
+            vec_idx = 0;
+        } else if (all_end) {
+            continue;
+        } else {
+            uint32_t value = token_tmp % 0xffffffff00000000;
+            uint32_t context = (token_tmp >> 32) % 0xfffffffff000000;
+            acc_ac_tokens[group_index].emplace_back(context, value);
+        }
+        idx++;
+    }
+
+    largest_idx[0] = largest_idx_ptr[0];
+    largest_idx[1] = largest_idx_ptr[1];
+    largest_idx[2] = largest_idx_ptr[2];
+    largest_idx[3] = largest_idx_ptr[3];
+    largest_idx[4] = largest_idx_ptr[4];
+
+    delete[] strategy_ddr;
+    delete[] ac_coeff_ordered_ddr;
+    delete[] coeff_orders_ddr;
+    delete[] ac_tokens_ddr;
+    delete[] acc_ctx_map;
+    delete[] qf_thresholds;
+
+    for (int i = 0; i < 5; i++) {
+        if (do_once[i]) {
+            for (int j = 0; j < histograms_[i].size(); j++) {
+                histograms_[i][j].data_.resize(histograms_size_ptr[i][j]);
+                histograms_[i][j].total_count_ = total_count_ptr[i][j];
+                for (int k = 0; k < histograms_size_ptr[i][j]; k++) {
+                    histograms_[i][j].data_[k] = histograms_ptr[i][j * 40 + k];
+                }
+            }
+
+            nonempty_histograms[i].reserve(histograms_[i].size());
+            for (int j = 0; j < nempty_cnt_ptr[i]; j++) {
+                nonempty_histograms[i].push_back(nonempty_ptr[i][j]);
+            }
+
+            if (histograms_[i].size() > 1) {
+                largest_idx[i] =
+                    std::find(nonempty_histograms[i].begin(), nonempty_histograms[i].end(), largest_idx[i]) -
+                    nonempty_histograms[i].begin();
+            }
+        }
+    }
+    do_prefix_out[0] = 0;
+    do_prefix_out[1] = 0;
+    do_prefix_out[2] = 0;
+    do_prefix_out[3] = 0;
+    do_prefix_out[4] = 0;
+
+    // kernel: acs_clusterHistogram
+    acc_ANSclusterHistogram(is_small_image, do_once, do_inner, do_prefix_in, params, histograms_, num_contexts,
+                            context_map, nonempty_histograms, largest_idx, codes, clustered_histograms,
+                            histogram_symbols, writer, layer, clustered_histogramsin, tokensin, codesin,
+                            context_map_in);
+
+    for (int i = 0; i < 5; i++) {
+        if (!do_once[i]) continue;
+        if (i == 0) {
+            tokens_c0 = tokensin[i];
+            codes_c0 = codesin[i];
+            context_map_c0 = context_map_in[i];
+            clustered_histograms0 = clustered_histograms[i];
+            clustered_histograms_c0 = clustered_histogramsin[i];
+        } else if (i == 1) {
+            tokens_c1 = tokensin[i];
+            codes_c1 = codesin[i];
+            context_map_c1 = context_map_in[i];
+            clustered_histograms1 = clustered_histograms[i];
+            clustered_histograms_c1 = clustered_histogramsin[i];
+        } else if (i == 2) {
+            tokens_c2 = tokensin[i];
+            codes_c2 = codesin[i];
+            context_map_c2 = context_map_in[i];
+            clustered_histograms2 = clustered_histograms[i];
+            clustered_histograms_c2 = clustered_histogramsin[i];
+        } else if (i == 3) {
+            tokens_c3 = tokensin[i];
+            codes_c3 = codesin[i];
+            context_map_c3 = context_map_in[i];
+            clustered_histograms3 = clustered_histograms[i];
+            clustered_histograms_c3 = clustered_histogramsin[i];
+        } else if (i == 4) {
+            tokens_c4 = tokensin[i];
+            codes_c4 = codesin[i];
+            context_map_c4 = context_map_in[i];
+            clustered_histograms4 = clustered_histograms[i];
+            clustered_histograms_c4 = clustered_histogramsin[i];
+        }
+    }
+
+    // ==============================================
+    // Do StoreEntropyCodes for outer histogram
+    // ==============================================
+    // printf("do_prefix_out = %d, %d, %d, %d, %d\n", do_prefix_out[0],
+    // do_prefix_out[1], do_prefix_out[2], do_prefix_out[3], do_prefix_out[4]);
+
+    if (do_once[0]) {
+        if (!is_small_image) {
+            writer0->update_part(4);
+        } else {
+            writer0->update_part(4);
+        }
+        StoreEntropyCodesNew(params0, tokens0, &codes0, do_prefix_out[0], writer0, layer0, nullptr,
+                             clustered_histograms0);
+        bcm_codes = codes0;
+        bcm_dummy_context_map = context_map0;
+    }
+    if (do_once[1]) {
+        if (!is_small_image) {
+            writer1->update_part(34);
+        } else {
+            writer1->update_part(34);
+        }
+        StoreEntropyCodesNew(params1, tokens1, &codes1, do_prefix_out[1], writer1, layer1, nullptr,
+                             clustered_histograms1);
+        modularFramTree_code = codes1;
+        modularFramTree_ctxmap = context_map1;
+    }
+    if (do_once[2]) {
+        if (!is_small_image) {
+            writer2->update_part(54);
+        } else {
+            writer2->update_part(54);
+        }
+        StoreEntropyCodesNew(params2, tokens2, &codes2, do_prefix_out[2], writer2, layer2, nullptr,
+                             clustered_histograms2);
+        modular_frame_encoder->code = codes2;
+        modular_frame_encoder->context_map = context_map2;
+    }
+    if (do_once[3]) {
+        if (!is_small_image) {
+            writer3->update_part(4);
+        } else {
+            writer3->update_part(84);
+        }
+        StoreEntropyCodesNew(params3, tokens3, &codes3, do_prefix_out[3], writer3, layer3, nullptr,
+                             clustered_histograms3);
+        coefOrders_codes = codes3;
+        coefOrders_context_map = context_map3;
+    }
+    if (do_once[4]) {
+        if (!is_small_image) {
+            writer4->update_part(24);
+        } else {
+            writer4->update_part(104);
+        }
+        StoreEntropyCodesNew(params4, tokens4, &codes4, do_prefix_out[4], writer4, layer4, nullptr,
+                             clustered_histograms4);
+        enc_state_->passes[0].codes = codes4;
+        enc_state_->passes[0].context_map = context_map4;
+    }
+
+    // ==============================================
+    // Do StoreEntropyCodes for inner histogram
+    // ==============================================
+    // printf("do_prefix_in = %d, %d, %d, %d, %d\n", do_prefix_in[0],
+    // do_prefix_in[1], do_prefix_in[2], do_prefix_in[3], do_prefix_in[4]);
+
+    if (do_inner[0]) {
+        if (!is_small_image) {
+            writer0->update_part(2);
+        } else {
+            writer0->update_part(2);
+        }
+        StoreEntropyCodesNew(params0, tokens_c0, &codes_c0, do_prefix_in[0], writer0, 0, nullptr,
+                             clustered_histograms_c0);
+    }
+    if (do_inner[1]) {
+        if (!is_small_image) {
+            writer1->update_part(32);
+        } else {
+            writer1->update_part(32);
+        }
+        StoreEntropyCodesNew(params1, tokens_c1, &codes_c1, do_prefix_in[1], writer1, 0, nullptr,
+                             clustered_histograms_c1);
+    }
+    if (do_inner[2]) {
+        if (!is_small_image) {
+            writer2->update_part(52);
+        } else {
+            writer2->update_part(52);
+        }
+        StoreEntropyCodesNew(params2, tokens_c2, &codes_c2, do_prefix_in[2], writer2, 0, nullptr,
+                             clustered_histograms_c2);
+    }
+    if (do_inner[3]) {
+        if (!is_small_image) {
+            writer3->update_part(2);
+        } else {
+            writer3->update_part(82);
+        }
+        StoreEntropyCodesNew(params3, tokens_c3, &codes_c3, do_prefix_in[3], writer3, 0, nullptr,
+                             clustered_histograms_c3);
+    }
+    if (do_inner[4]) {
+        if (!is_small_image) {
+            writer4->update_part(22);
+        } else {
+            writer4->update_part(102);
+        }
+        StoreEntropyCodesNew(params4, tokens_c4, &codes_c4, do_prefix_in[4], writer4, 0, nullptr,
+                             clustered_histograms_c4);
+    }
+
+    // ==============================================
+    // Do WriteTokens for inner histogram
+    // ==============================================
+    // printf("do_inner = %d, %d, %d, %d, %d\n", do_inner[0], do_inner[1],
+    // do_inner[2], do_inner[3], do_inner[4]);
+    if (do_inner[0]) {
+        if (!is_small_image) {
+            writer0->update_part(3);
+        } else {
+            writer0->update_part(3);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c0[0], codes_c0, context_map_c0, writer0);
+    }
+    if (do_inner[1]) {
+        if (!is_small_image) {
+            writer1->update_part(33);
+        } else {
+            writer1->update_part(33);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu, context_map.size=%d\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c1[0], codes_c1, context_map_c1, writer1);
+    }
+    if (do_inner[2]) {
+        if (!is_small_image) {
+            writer2->update_part(53);
+        } else {
+            writer2->update_part(53);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu, context_map.size=%d\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c2[0], codes_c2, context_map_c2, writer2);
+    }
+    if (do_inner[3]) {
+        if (!is_small_image) {
+            writer3->update_part(3);
+        } else {
+            writer3->update_part(83);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu, context_map.size=%d\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c3[0], codes_c3, context_map_c3, writer3);
+    }
+    if (do_inner[4]) {
+        if (!is_small_image) {
+            writer4->update_part(23);
+        } else {
+            writer4->update_part(103);
+        }
+        // printf("%s: %s: %d, WriteTokens token size out=%zu,
+        // codes.encoding_info.size=%zu, context_map.size=%d\n",
+        //  __FILE__, __FUNCTION__, __LINE__, tokens_c0[0].size(),
+        //  codes_c0.encoding_info.size(), context_map_c0.size());
+        WriteTokens(tokens_c4[0], codes_c4, context_map_c4, writer4);
+    }
+    return true;
+}
+
+Status acc_ANS_tokens(LossyFrameEncoder& lossy_frame_encoder,
+                      std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                      const size_t num_groups,
+                      PassesEncoderState* passes_enc_state,
+                      FrameDimensions frame_dim,
+                      std::unique_ptr<FrameHeader>& frame_header,
+                      std::vector<std::vector<Token> >& coefOrders_tokens,
+                      std::vector<BitWriter>& group_codes,
+                      BitWriter* group_codes_writer,
+                      BitWriter* acInfo_writer,
+                      std::vector<BitWriter*>& dc_group_writers,
+                      std::vector<BitWriter*>& acGroupWriters,
+                      size_t& ans_cost,
+                      size_t& mtf_cost,
+                      std::vector<std::vector<Token> >& bcm_tokens,
+                      std::vector<std::vector<Token> >& bcm_mtf_tokens,
+                      EntropyEncodingData& bcm_codes,
+                      std::vector<uint8_t>& bcm_dummy_context_map,
+
+                      EntropyEncodingData& modularFramTree_code,
+                      std::vector<uint8_t>& modularFramTree_ctxmap,
+
+                      EntropyEncodingData& coefOrders_codes,
+                      std::vector<uint8_t>& coefOrders_context_map,
+                      std::vector<AuxOut>& aux_outs,
+                      AuxOut* aux_out) {
+    PassesEncoderState* JXL_RESTRICT enc_state_ = lossy_frame_encoder.State();
+    PassesSharedState& shared = enc_state_->shared;
+    const size_t global_ac_index = frame_dim.num_dc_groups + 1;
+
+    const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+    const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+
+    const bool has_ac_global = true;
+
+    auto& dct = enc_state_->shared.block_ctx_map.dc_thresholds;
+    auto& qft = enc_state_->shared.block_ctx_map.qf_thresholds;
+    auto& ctx_map = enc_state_->shared.block_ctx_map.ctx_map;
+
+    //============ANSWriteTokens Encode GlobalDCInfo: Block Context Map=========
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        if (dct[0].empty() && dct[1].empty() && dct[2].empty() && qft.empty() && ctx_map.size() == 21 &&
+            std::equal(ctx_map.begin(), ctx_map.end(), jxl::kDefaultCtxMap)) {
+        } else {
+            if (enc_state_->shared.block_ctx_map.num_ctxs == 1) {
+            } else {
+                size_t entry_bits = CeilLog2Nonzero(enc_state_->shared.block_ctx_map.num_ctxs);
+                size_t simple_cost = entry_bits * ctx_map.size();
+                if (entry_bits < 4 /* && simple_cost < ans_cost &&
+            simple_cost < mtf_cost*/) {
+                } else {
+                    if (!is_small_image) {
+                        group_codes_writer->update_part(10);
+                    } else {
+                        group_codes_writer->update_part(10);
+                    }
+                    WriteTokens(bcm_tokens[0], bcm_codes, bcm_dummy_context_map, group_codes_writer);
+                }
+            }
+            BitWriter::Allotment allotmentGlobalDCInfoBCM(
+                group_codes_writer, (dct[0].size() + dct[1].size() + dct[2].size() + qft.size()) * 34 + 1 + 4 + 4 +
+                                        ctx_map.size() * 10 + 1024);
+            ReclaimAndCharge(group_codes_writer, &allotmentGlobalDCInfoBCM, kLayerAC, aux_out);
+        }
+    }
+
+    //============ANSWriteTokens Encode GlobalDCInfo: modular frame
+    // tree=========
+    if (modular_frame_encoder->tree_tokens.empty() || modular_frame_encoder->tree_tokens[0].empty()) {
+    } else {
+        if (!is_small_image) {
+            group_codes_writer->update_part(40);
+        } else {
+            group_codes_writer->update_part(40);
+        }
+        WriteTokens(modular_frame_encoder->tree_tokens[0], modularFramTree_code, modularFramTree_ctxmap,
+                    group_codes_writer, kLayerModularTree, aux_out);
+    }
+
+    //============ANSWriteTokens Encode GlobalDCInfo: modular frame
+    // token=========
+    if (!is_small_image) {
+        group_codes_writer->update_part(60);
+    } else {
+        group_codes_writer->update_part(60);
+    }
+    size_t stream_id = ModularStreamId::Global().ID(frame_dim);
+    if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+        // Image with no channels, header never gets decoded.
+    } else {
+        JXL_RETURN_IF_ERROR(Bundle::Write(modular_frame_encoder->stream_headers[stream_id], group_codes_writer,
+                                          kLayerModularGlobal, aux_out));
+        WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                    modular_frame_encoder->context_map, group_codes_writer, kLayerModularGlobal, aux_out);
+    }
+
+    //=============================
+
+    //============================= ANSWriteTokens DC group=============
+    for (int group_index = 0; group_index < frame_dim.num_dc_groups; group_index++) {
+        BitWriter* tmp = get_output(group_index + 1, group_codes, is_small_image);
+        dc_group_writers.emplace_back(tmp);
+        if (!is_small_image) {
+            tmp->init(200);
+            tmp->update_part(0);
+        } else {
+            tmp->update_part(70);
+        }
+    }
+
+    for (int group_index = 0; group_index < frame_dim.num_dc_groups; group_index++) {
+        AuxOut* my_aux_out = aux_out ? &aux_outs[0] : nullptr;
+        BitWriter* output = dc_group_writers[group_index];
+        if (frame_header->encoding == FrameEncoding::kVarDCT && !(frame_header->flags & FrameHeader::kUseDcFrame)) {
+            BitWriter::Allotment allotment(output, 2);
+            output->Write(2, modular_frame_encoder->extra_dc_precision[group_index]);
+            ReclaimAndCharge(output, &allotment, kLayerDC, my_aux_out);
+            size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim);
+            if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+                // Image with no channels, header never gets decoded.
+            } else {
+                Bundle::Write(modular_frame_encoder->stream_headers[stream_id], output, kLayerDC, aux_out);
+                WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                            modular_frame_encoder->context_map, output, kLayerDC, my_aux_out);
+            }
+        }
+
+        size_t stream_id = ModularStreamId::ModularDC(group_index).ID(frame_dim);
+        if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+            // Image with no channels, header never gets decoded.
+        } else {
+            Bundle::Write(modular_frame_encoder->stream_headers[stream_id], output, kLayerModularDcGroup, aux_out);
+            WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                        modular_frame_encoder->context_map, output, kLayerModularDcGroup, my_aux_out);
+        }
+
+        if (frame_header->encoding == FrameEncoding::kVarDCT) {
+            const Rect& rect = lossy_frame_encoder.State()->shared.DCGroupRect(group_index);
+            size_t nb_bits = CeilLog2Nonzero(rect.xsize() * rect.ysize());
+            if (nb_bits != 0) {
+                BitWriter::Allotment allotment(output, nb_bits);
+                output->Write(nb_bits, modular_frame_encoder->ac_metadata_size[group_index] - 1);
+                ReclaimAndCharge(output, &allotment, kLayerControlFields, my_aux_out);
+            }
+            size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim);
+            if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+                // Image with no channels, header never gets decoded.
+            } else {
+                Bundle::Write(modular_frame_encoder->stream_headers[stream_id], output, kLayerControlFields, aux_out);
+                WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                            modular_frame_encoder->context_map, output, kLayerControlFields, my_aux_out);
+            }
+        }
+    };
+
+    //============================= ANSWriteTokens AC Info=============
+    for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses(); i++) {
+        uint16_t used_orders = enc_state_->used_orders[i];
+        if (used_orders != 0) {
+            if (!is_small_image) {
+                acInfo_writer->update_part(19);
+            } else {
+                acInfo_writer->update_part(90);
+            }
+            WriteTokens(coefOrders_tokens[0], coefOrders_codes, coefOrders_context_map, acInfo_writer, kLayerOrder,
+                        aux_out);
+        }
+    }
+
+    //==========================================
+    if (!is_small_image) {
+        acInfo_writer->update_part(29);
+    } else {
+        acInfo_writer->update_part(109);
+    }
+    //===============
+
+    //========================Encode AC Group=============
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        for (size_t i = 0; i < num_passes; i++) {
+            BitWriter* tmp =
+                get_output(AcGroupIndex(i, group_index, frame_dim.num_groups, frame_dim.num_dc_groups, has_ac_global),
+                           group_codes, is_small_image);
+            acGroupWriters.emplace_back(tmp);
+        }
+    }
+
+    int sum = 0;
+    for (int group_index = 0; group_index < num_groups; group_index++) {
+        AuxOut* my_aux_out = aux_out ? &aux_outs[0] : nullptr;
+        for (size_t i = 0; i < num_passes; i++) {
+            BitWriter* acGroupWriter = acGroupWriters[group_index * num_passes + i];
+            if (frame_header->encoding == FrameEncoding::kVarDCT) {
+                // Select which histogram to use among those of the current pass.
+                const size_t num_histograms = enc_state_->shared.num_histograms;
+                // num_histograms is 0 only for lossless.
+                JXL_ASSERT(num_histograms == 0 || enc_state_->histogram_idx[group_index] < num_histograms);
+                size_t histo_selector_bits = CeilLog2Nonzero(num_histograms);
+
+                if (histo_selector_bits != 0) {
+                    BitWriter::Allotment allotment(acGroupWriter, histo_selector_bits);
+                    acGroupWriter->Write(histo_selector_bits, enc_state_->histogram_idx[group_index]);
+                    ReclaimAndCharge(acGroupWriter, &allotment, kLayerAC, aux_out);
+                }
+                sum = sum + enc_state_->passes[i].ac_tokens[group_index].size();
+                WriteTokens(enc_state_->passes[i].ac_tokens[group_index], enc_state_->passes[i].codes,
+                            enc_state_->passes[i].context_map, acGroupWriter, kLayerACTokens, aux_out);
+            }
+
+            size_t stream_id = ModularStreamId::ModularAC(group_index, i).ID(frame_dim);
+            if (modular_frame_encoder->stream_images[stream_id].channel.empty()) {
+                // Image with no channels, header never gets decoded.
+            } else {
+                Bundle::Write(modular_frame_encoder->stream_headers[stream_id], acGroupWriter, kLayerModularAcGroup,
+                              aux_out);
+                WriteTokens(modular_frame_encoder->tokens[stream_id], modular_frame_encoder->code,
+                            modular_frame_encoder->context_map, acGroupWriter, kLayerModularAcGroup, aux_out);
+            }
+        }
+    }
+    //=====================
+
+    return true;
+}
+
+Status acc_writeout(LossyFrameEncoder& lossy_frame_encoder,
+                    const size_t num_groups,
+                    PassesEncoderState* passes_enc_state,
+                    std::unique_ptr<FrameHeader>& frame_header,
+                    FrameDimensions frame_dim,
+                    std::vector<BitWriter>& group_codes,
+                    BitWriter* writer,
+                    BitWriter* group_codes_writer,
+                    BitWriter* acInfo_writer,
+                    std::vector<BitWriter*>& dc_group_writers,
+                    std::vector<BitWriter*>& acGroupWriters,
+                    AuxOut* aux_out,
+                    const std::function<Status(size_t)>& resize_aux_outs) {
+    const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+    const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+
+    writer->AppendByteAligned(lossy_frame_encoder.State()->special_frames);
+    frame_header->UpdateFlag(lossy_frame_encoder.State()->shared.image_features.patches.HasAny(),
+                             FrameHeader::kPatches);
+    frame_header->UpdateFlag(lossy_frame_encoder.State()->shared.image_features.splines.HasAny(),
+                             FrameHeader::kSplines);
+    JXL_RETURN_IF_ERROR(WriteFrameHeader(*frame_header, writer, aux_out));
+
+    // Resizing aux_outs to 0 also Assimilates the array.
+    std::atomic<int> num_errors{0};
+    static_cast<void>(resize_aux_outs(0));
+    JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+    for (BitWriter& bw : group_codes) {
+        bw.ZeroPadToByte(); // end of group.
+    }
+
+    if (is_small_image) {
+        std::vector<int> group_codes_seq{0,  1,  2,  3,  4,  10, 19, 20, 29, 30, 31, 32,  33,  34,  40,  50,  51,
+                                         52, 53, 54, 60, 70, 80, 81, 82, 83, 84, 90, 100, 101, 102, 103, 104, 109};
+        group_codes_writer->Finalize(group_codes_seq);
+        //  group_codes_writer->Finalize();
+    } else {
+        std::cout << "===============Group Codes writer Final==================" << std::endl;
+        std::vector<int> group_codes_seq{0, 1, 2, 3, 4, 10, 19, 20, 29, 30, 31, 32, 33, 34, 40, 50, 51, 52, 53, 54, 60};
+        group_codes_writer->Finalize(group_codes_seq);
+        //    group_codes_writer->Finalize();
+        std::cout << "===============DC Group writer Final==================" << std::endl;
+        std::vector<int> dc_group_seq{0};
+        for (int group_index = 0; group_index < frame_dim.num_dc_groups; group_index++) {
+            dc_group_writers[group_index]->Finalize(dc_group_seq);
+            //        dc_group_writers[group_index]->Finalize();
+        }
+        std::cout << "===============AC Info writer Final==================" << std::endl;
+        std::vector<int> acInfo_seq{0, 1, 2, 3, 4, 10, 19, 20, 21, 22, 23, 24, 29};
+        acInfo_writer->Finalize(acInfo_seq);
+        //  acInfo_writer->Finalize();
+        std::cout << "===============AC Group writer Final==================" << std::endl;
+        std::vector<int> acGroup_seq{0};
+        for (int group_index = 0; group_index < num_groups; group_index++) {
+            for (size_t i = 0; i < num_passes; i++) {
+                acGroupWriters[group_index * num_passes + i]->Finalize(acGroup_seq);
+                //           acGroupWriters[group_index * num_passes + i]->Finalize();
+            }
+        }
+    }
+    std::cout << "===============Others writer Final==================" << std::endl;
+    BitWriter::Allotment allotmentGrpOffset(writer, MaxBits(group_codes.size()));
+    writer->Write(1, 0); // no permutation
+    std::vector<int> write_seq{0};
+    //  writer->Finalize(write_seq);
+    writer->Finalize();
+    //  }
+    writer->ZeroPadToByte(); // before TOC entries
+
+    for (size_t i = 0; i < group_codes.size(); i++) {
+        JXL_ASSERT(group_codes[i].BitsWritten() % kBitsPerByte == 0);
+        const size_t group_size = group_codes[i].BitsWritten() / kBitsPerByte;
+        JXL_RETURN_IF_ERROR(U32Coder::Write(kTocDist, group_size, writer));
+    }
+    //  writer->Finalize(write_seq);
+    writer->Finalize();
+    writer->ZeroPadToByte(); // before first group
+    ReclaimAndCharge(writer, &allotmentGrpOffset, kLayerTOC, aux_out);
+
+    writer->AppendByteAligned(group_codes);
+    writer->ZeroPadToByte(); // end of frame.
+
+    return true;
+}
+
+Status acc_phase3(std::string xclbinPath,
+                  Image3F& opsin,
+                  LossyFrameEncoder& lossy_frame_encoder,
+                  std::unique_ptr<ModularFrameEncoder>& modular_frame_encoder,
+                  CompressParams cparams,
+                  std::unique_ptr<FrameHeader>& frame_header,
+                  PassesEncoderState* passes_enc_state,
+                  FrameDimensions frame_dim,
+                  BitWriter* writer,
+                  const size_t num_groups,
+                  AuxOut* aux_out,
+                  ThreadPool* pool,
+                  std::vector<AuxOut>& aux_outs,
+                  const ImageBundle& ib,
+                  const std::function<Status(size_t)>& resize_aux_outs) {
+    std::cout << "===========acc_kernel3 start================" << std::endl;
+    std::vector<std::vector<Token> > coefOrders_tokens(1);
+
+    const size_t num_passes = passes_enc_state->progressive_splitter.GetNumPasses();
+
+    // DC global info + DC groups + AC global info + AC groups *
+    // num_passes.
+    const bool has_ac_global = true;
+    std::vector<BitWriter> group_codes(
+        NumTocEntries(frame_dim.num_groups, frame_dim.num_dc_groups, num_passes, has_ac_global));
+    const size_t global_ac_index = frame_dim.num_dc_groups + 1;
+    const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+
+    BitWriter* group_codes_writer = get_output(0, group_codes, is_small_image);
+    BitWriter* acInfo_writer = get_output(global_ac_index, group_codes, is_small_image);
+
+    std::vector<std::vector<Token> > bcm_tokens(1), bcm_mtf_tokens(1);
+    EntropyEncodingData bcm_codes;
+    std::vector<uint8_t> bcm_dummy_context_map;
+    size_t ans_cost, mtf_cost;
+
+    EntropyEncodingData modularFramTree_code;
+    std::vector<uint8_t> modularFramTree_ctxmap;
+
+    EntropyEncodingData coefOrders_codes;
+    std::vector<uint8_t> coefOrders_context_map;
+
+    std::vector<BitWriter*> dc_group_writers;
+    std::vector<BitWriter*> acGroupWriters;
+    struct timeval start_time, token_time, hist_time, ans_time;
+    gettimeofday(&start_time, 0);
+    //  acc_predictAndtoken(lossy_frame_encoder, frame_header,
+    //  coefOrders_tokens,
+    //                      pool);
+
+    gettimeofday(&token_time, 0);
+    acc_histogram(lossy_frame_encoder, modular_frame_encoder, passes_enc_state, frame_dim, frame_header, cparams,
+                  coefOrders_tokens, group_codes_writer, acInfo_writer, ans_cost, mtf_cost, bcm_tokens, bcm_mtf_tokens,
+                  bcm_codes, bcm_dummy_context_map,
+
+                  modularFramTree_code, modularFramTree_ctxmap,
+
+                  coefOrders_codes, coefOrders_context_map,
+
+                  aux_outs, aux_out, xclbinPath);
+    gettimeofday(&hist_time, 0);
+    acc_ANS_tokens(lossy_frame_encoder, modular_frame_encoder, num_groups, passes_enc_state, frame_dim, frame_header,
+                   coefOrders_tokens, group_codes, group_codes_writer, acInfo_writer, dc_group_writers, acGroupWriters,
+                   ans_cost, mtf_cost, bcm_tokens, bcm_mtf_tokens, bcm_codes, bcm_dummy_context_map,
+
+                   modularFramTree_code, modularFramTree_ctxmap,
+
+                   coefOrders_codes, coefOrders_context_map, aux_outs, aux_out);
+
+    acc_writeout(lossy_frame_encoder, num_groups, passes_enc_state, frame_header, frame_dim, group_codes, writer,
+                 group_codes_writer, acInfo_writer, dc_group_writers, acGroupWriters, aux_out, resize_aux_outs);
+    gettimeofday(&ans_time, 0);
+
+    return true;
+}
+} // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/.clang-format b/codec/L2/demos/jxlEnc/third_partys/.clang-format
new file mode 100644
index 0000000000..ff5c354782
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/.clang-format
@@ -0,0 +1,89 @@
+---
+Language:        Cpp
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: false
+BraceWrapping:
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+ColumnLimit:     120
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: true
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories:
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IndentCaseLabels: true
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 4
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp03
+TabWidth:        8
+UseTab:          Never
+...
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/build/lib/include/jxl/jxl_export.h b/codec/L2/demos/jxlEnc/third_partys/build/lib/include/jxl/jxl_export.h
new file mode 100644
index 0000000000..1c73f277d3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/build/lib/include/jxl/jxl_export.h
@@ -0,0 +1,42 @@
+
+#ifndef JXL_EXPORT_H
+#define JXL_EXPORT_H
+
+#ifdef JXL_STATIC_DEFINE
+#  define JXL_EXPORT
+#  define JXL_NO_EXPORT
+#else
+#  ifndef JXL_EXPORT
+#    ifdef JXL_INTERNAL_LIBRARY_BUILD
+        /* We are building this library */
+#      define JXL_EXPORT __attribute__((visibility("default")))
+#    else
+        /* We are using this library */
+#      define JXL_EXPORT __attribute__((visibility("default")))
+#    endif
+#  endif
+
+#  ifndef JXL_NO_EXPORT
+#    define JXL_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
+#endif
+
+#ifndef JXL_DEPRECATED
+#  define JXL_DEPRECATED __attribute__ ((__deprecated__))
+#endif
+
+#ifndef JXL_DEPRECATED_EXPORT
+#  define JXL_DEPRECATED_EXPORT JXL_EXPORT JXL_DEPRECATED
+#endif
+
+#ifndef JXL_DEPRECATED_NO_EXPORT
+#  define JXL_DEPRECATED_NO_EXPORT JXL_NO_EXPORT JXL_DEPRECATED
+#endif
+
+#if 0 /* DEFINE_NO_DEPRECATED */
+#  ifndef JXL_NO_DEPRECATED
+#    define JXL_NO_DEPRECATED
+#  endif
+#endif
+
+#endif /* JXL_EXPORT_H */
diff --git a/codec/L2/demos/jxlEnc/third_partys/build/lib/include/jxl/jxl_threads_export.h b/codec/L2/demos/jxlEnc/third_partys/build/lib/include/jxl/jxl_threads_export.h
new file mode 100644
index 0000000000..d385f7b624
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/build/lib/include/jxl/jxl_threads_export.h
@@ -0,0 +1,42 @@
+
+#ifndef JXL_THREADS_EXPORT_H
+#define JXL_THREADS_EXPORT_H
+
+#ifdef JXL_THREADS_STATIC_DEFINE
+#  define JXL_THREADS_EXPORT
+#  define JXL_THREADS_NO_EXPORT
+#else
+#  ifndef JXL_THREADS_EXPORT
+#    ifdef JXL_THREADS_INTERNAL_LIBRARY_BUILD
+        /* We are building this library */
+#      define JXL_THREADS_EXPORT __attribute__((visibility("default")))
+#    else
+        /* We are using this library */
+#      define JXL_THREADS_EXPORT __attribute__((visibility("default")))
+#    endif
+#  endif
+
+#  ifndef JXL_THREADS_NO_EXPORT
+#    define JXL_THREADS_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
+#endif
+
+#ifndef JXL_THREADS_DEPRECATED
+#  define JXL_THREADS_DEPRECATED __attribute__ ((__deprecated__))
+#endif
+
+#ifndef JXL_THREADS_DEPRECATED_EXPORT
+#  define JXL_THREADS_DEPRECATED_EXPORT JXL_THREADS_EXPORT JXL_THREADS_DEPRECATED
+#endif
+
+#ifndef JXL_THREADS_DEPRECATED_NO_EXPORT
+#  define JXL_THREADS_DEPRECATED_NO_EXPORT JXL_THREADS_NO_EXPORT JXL_THREADS_DEPRECATED
+#endif
+
+#if 0 /* DEFINE_NO_DEPRECATED */
+#  ifndef JXL_THREADS_NO_DEPRECATED
+#    define JXL_THREADS_NO_DEPRECATED
+#  endif
+#endif
+
+#endif /* JXL_THREADS_EXPORT_H */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc
new file mode 100644
index 0000000000..43749fdab2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec.cc
@@ -0,0 +1,226 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+
+#include "lib/jxl/base/file_io.h"
+#if JPEGXL_ENABLE_APNG
+#include "lib/extras/codec_apng.h"
+#endif
+#if JPEGXL_ENABLE_EXR
+#include "lib/extras/codec_exr.h"
+#endif
+#if JPEGXL_ENABLE_GIF
+#include "lib/extras/codec_gif.h"
+#endif
+#include "lib/extras/codec_jpg.h"
+#include "lib/extras/codec_pgx.h"
+#include "lib/extras/codec_png.h"
+#include "lib/extras/codec_pnm.h"
+#include "lib/extras/codec_psd.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+namespace {
+
+// Any valid encoding is larger (ensures codecs can read the first few bytes)
+constexpr size_t kMinBytes = 9;
+
+}  // namespace
+
+std::string ExtensionFromCodec(Codec codec, const bool is_gray,
+                               const size_t bits_per_sample) {
+  switch (codec) {
+    case Codec::kJPG:
+      return ".jpg";
+    case Codec::kPGX:
+      return ".pgx";
+    case Codec::kPNG:
+      return ".png";
+    case Codec::kPNM:
+      if (is_gray) return ".pgm";
+      return (bits_per_sample == 32) ? ".pfm" : ".ppm";
+    case Codec::kGIF:
+      return ".gif";
+    case Codec::kEXR:
+      return ".exr";
+    case Codec::kPSD:
+      return ".psd";
+    case Codec::kUnknown:
+      return std::string();
+  }
+  JXL_UNREACHABLE;
+  return std::string();
+}
+
+Codec CodecFromExtension(const std::string& extension,
+                         size_t* JXL_RESTRICT bits_per_sample) {
+  if (extension == ".png") return Codec::kPNG;
+
+  if (extension == ".jpg") return Codec::kJPG;
+  if (extension == ".jpeg") return Codec::kJPG;
+
+  if (extension == ".pgx") return Codec::kPGX;
+
+  if (extension == ".pbm") {
+    *bits_per_sample = 1;
+    return Codec::kPNM;
+  }
+  if (extension == ".pgm") return Codec::kPNM;
+  if (extension == ".ppm") return Codec::kPNM;
+  if (extension == ".pfm") {
+    *bits_per_sample = 32;
+    return Codec::kPNM;
+  }
+
+  if (extension == ".gif") return Codec::kGIF;
+
+  if (extension == ".exr") return Codec::kEXR;
+
+  if (extension == ".psd") return Codec::kPSD;
+
+  return Codec::kUnknown;
+}
+
+Status SetFromBytes(const Span<const uint8_t> bytes, CodecInOut* io,
+                    ThreadPool* pool, Codec* orig_codec) {
+  if (bytes.size() < kMinBytes) return JXL_FAILURE("Too few bytes");
+
+  io->metadata.m.bit_depth.bits_per_sample = 0;  // (For is-set check below)
+
+  Codec codec;
+  if (DecodeImagePNG(bytes, pool, io)) {
+    codec = Codec::kPNG;
+  }
+#if JPEGXL_ENABLE_APNG
+  else if (DecodeImageAPNG(bytes, pool, io)) {
+    codec = Codec::kPNG;
+  }
+#endif
+  else if (DecodeImagePGX(bytes, pool, io)) {
+    codec = Codec::kPGX;
+  } else if (DecodeImagePNM(bytes, pool, io)) {
+    codec = Codec::kPNM;
+  }
+#if JPEGXL_ENABLE_GIF
+  else if (DecodeImageGIF(bytes, pool, io)) {
+    codec = Codec::kGIF;
+  }
+#endif
+  else if (DecodeImageJPG(bytes, pool, io)) {
+    codec = Codec::kJPG;
+  }
+  else if (DecodeImagePSD(bytes, pool, io)) {
+    codec = Codec::kPSD;
+  }
+#if JPEGXL_ENABLE_EXR
+  else if (DecodeImageEXR(bytes, pool, io)) {
+    codec = Codec::kEXR;
+  }
+#endif
+  else {
+    return JXL_FAILURE("Codecs failed to decode");
+  }
+  if (orig_codec) *orig_codec = codec;
+
+  io->CheckMetadata();
+  return true;
+}
+
+Status SetFromFile(const std::string& pathname, CodecInOut* io,
+                   ThreadPool* pool, Codec* orig_codec) {
+  PaddedBytes encoded;
+  JXL_RETURN_IF_ERROR(ReadFile(pathname, &encoded));
+  JXL_RETURN_IF_ERROR(
+      SetFromBytes(Span<const uint8_t>(encoded), io, pool, orig_codec));
+  return true;
+}
+
+Status Encode(const CodecInOut& io, const Codec codec,
+              const ColorEncoding& c_desired, size_t bits_per_sample,
+              PaddedBytes* bytes, ThreadPool* pool) {
+  JXL_CHECK(!io.Main().c_current().ICC().empty());
+  JXL_CHECK(!c_desired.ICC().empty());
+  io.CheckMetadata();
+  if (io.Main().IsJPEG() && codec != Codec::kJPG) {
+    return JXL_FAILURE(
+        "Output format has to be JPEG for losslessly recompressed JPEG "
+        "reconstruction");
+  }
+
+  switch (codec) {
+    case Codec::kPNG:
+      return EncodeImagePNG(&io, c_desired, bits_per_sample, pool, bytes);
+    case Codec::kJPG:
+#if JPEGXL_ENABLE_JPEG
+      return EncodeImageJPG(
+          &io, io.use_sjpeg ? JpegEncoder::kSJpeg : JpegEncoder::kLibJpeg,
+          io.jpeg_quality, YCbCrChromaSubsampling(), pool, bytes,
+          io.Main().IsJPEG() ? DecodeTarget::kQuantizedCoeffs
+                             : DecodeTarget::kPixels);
+#else
+      return JXL_FAILURE("JPEG XL was built without JPEG support");
+#endif
+    case Codec::kPNM:
+      return EncodeImagePNM(&io, c_desired, bits_per_sample, pool, bytes);
+    case Codec::kPGX:
+      return EncodeImagePGX(&io, c_desired, bits_per_sample, pool, bytes);
+    case Codec::kGIF:
+      return JXL_FAILURE("Encoding to GIF is not implemented");
+    case Codec::kPSD:
+      return EncodeImagePSD(&io, c_desired, bits_per_sample, pool, bytes);
+    case Codec::kEXR:
+#if JPEGXL_ENABLE_EXR
+      return EncodeImageEXR(&io, c_desired, pool, bytes);
+#else
+      return JXL_FAILURE("JPEG XL was built without OpenEXR support");
+#endif
+    case Codec::kUnknown:
+      return JXL_FAILURE("Cannot encode using Codec::kUnknown");
+  }
+
+  return JXL_FAILURE("Invalid codec");
+}
+
+Status EncodeToFile(const CodecInOut& io, const ColorEncoding& c_desired,
+                    size_t bits_per_sample, const std::string& pathname,
+                    ThreadPool* pool) {
+  const std::string extension = Extension(pathname);
+  const Codec codec = CodecFromExtension(extension, &bits_per_sample);
+
+  // Warn about incorrect usage of PBM/PGM/PGX/PPM - only the latter supports
+  // color, but CodecFromExtension lumps them all together.
+  if (codec == Codec::kPNM && extension != ".pfm") {
+    if (!io.Main().IsGray() && extension != ".ppm") {
+      JXL_WARNING("For color images, the filename should end with .ppm.\n");
+    } else if (io.Main().IsGray() && extension == ".ppm") {
+      JXL_WARNING(
+          "For grayscale images, the filename should not end with .ppm.\n");
+    }
+    if (bits_per_sample > 16) {
+      JXL_WARNING("PPM only supports up to 16 bits per sample");
+      bits_per_sample = 16;
+    }
+  } else if (codec == Codec::kPGX && !io.Main().IsGray()) {
+    JXL_WARNING("Storing color image to PGX - use .ppm extension instead.\n");
+  }
+  if (bits_per_sample > 16 && codec == Codec::kPNG) {
+    JXL_WARNING("PNG only supports up to 16 bits per sample");
+    bits_per_sample = 16;
+  }
+
+  PaddedBytes encoded;
+  return Encode(io, codec, c_desired, bits_per_sample, &encoded, pool) &&
+         WriteFile(encoded, pathname);
+}
+
+Status EncodeToFile(const CodecInOut& io, const std::string& pathname,
+                    ThreadPool* pool) {
+  // TODO(lode): need to take the floating_point_sample field into account
+  return EncodeToFile(io, io.metadata.m.color_encoding,
+                      io.metadata.m.bit_depth.bits_per_sample, pathname, pool);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec.h
new file mode 100644
index 0000000000..17209fdfe6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec.h
@@ -0,0 +1,85 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_H_
+#define LIB_EXTRAS_CODEC_H_
+
+// Facade for image encoders/decoders (PNG, PNM, ...).
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/field_encodings.h"  // MakeBit
+
+namespace jxl {
+
+// Codecs supported by CodecInOut::Encode.
+enum class Codec : uint32_t {
+  kUnknown,  // for CodecFromExtension
+  kPNG,
+  kPNM,
+  kPGX,
+  kJPG,
+  kGIF,
+  kEXR,
+  kPSD
+};
+
+static inline constexpr uint64_t EnumBits(Codec /*unused*/) {
+  // Return only fully-supported codecs (kGIF is decode-only).
+  return MakeBit(Codec::kPNM) | MakeBit(Codec::kPNG)
+#if JPEGXL_ENABLE_JPEG
+         | MakeBit(Codec::kJPG)
+#endif
+#if JPEGXL_ENABLE_EXR
+         | MakeBit(Codec::kEXR)
+#endif
+         | MakeBit(Codec::kPSD);
+}
+
+// Lower case ASCII including dot, e.g. ".png".
+std::string ExtensionFromCodec(Codec codec, bool is_gray,
+                               size_t bits_per_sample);
+
+// If and only if extension is ".pfm", *bits_per_sample is updated to 32 so
+// that Encode() would encode to PFM instead of PPM.
+Codec CodecFromExtension(const std::string& extension,
+                         size_t* JXL_RESTRICT bits_per_sample);
+
+// Decodes "bytes" and sets io->metadata.m.
+// dec_hints may specify the "color_space" (otherwise, defaults to sRGB).
+Status SetFromBytes(const Span<const uint8_t> bytes, CodecInOut* io,
+                    ThreadPool* pool = nullptr, Codec* orig_codec = nullptr);
+
+// Reads from file and calls SetFromBytes.
+Status SetFromFile(const std::string& pathname, CodecInOut* io,
+                   ThreadPool* pool = nullptr, Codec* orig_codec = nullptr);
+
+// Replaces "bytes" with an encoding of pixels transformed from c_current
+// color space to c_desired.
+Status Encode(const CodecInOut& io, Codec codec, const ColorEncoding& c_desired,
+              size_t bits_per_sample, PaddedBytes* bytes,
+              ThreadPool* pool = nullptr);
+
+// Deduces codec, calls Encode and writes to file.
+Status EncodeToFile(const CodecInOut& io, const ColorEncoding& c_desired,
+                    size_t bits_per_sample, const std::string& pathname,
+                    ThreadPool* pool = nullptr);
+// Same, but defaults to metadata.original color_encoding and bits_per_sample.
+Status EncodeToFile(const CodecInOut& io, const std::string& pathname,
+                    ThreadPool* pool = nullptr);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_apng.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_apng.cc
new file mode 100644
index 0000000000..bef59f6369
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_apng.cc
@@ -0,0 +1,410 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec_apng.h"
+
+// Parts of this code are taken from apngdis, which has the following license:
+/* APNG Disassembler 2.8
+ *
+ * Deconstructs APNG files into individual frames.
+ *
+ * http://apngdis.sourceforge.net
+ *
+ * Copyright (c) 2010-2015 Max Stepin
+ * maxst at users.sourceforge.net
+ *
+ * zlib license
+ * ------------
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/luminance.h"
+#include "png.h" /* original (unpatched) libpng is ok */
+
+namespace jxl {
+
+namespace {
+
+constexpr bool isAbc(char c) {
+  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+#define notabc(c) ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
+
+constexpr uint32_t kId_IHDR = 0x52444849;
+constexpr uint32_t kId_acTL = 0x4C546361;
+constexpr uint32_t kId_fcTL = 0x4C546366;
+constexpr uint32_t kId_IDAT = 0x54414449;
+constexpr uint32_t kId_fdAT = 0x54416466;
+constexpr uint32_t kId_IEND = 0x444E4549;
+
+struct CHUNK {
+  unsigned char* p;
+  unsigned int size;
+};
+
+struct APNGFrame {
+  unsigned char *p, **rows;
+  unsigned int w, h, delay_num, delay_den;
+};
+
+struct Reader {
+  const uint8_t* next;
+  const uint8_t* last;
+  bool Read(void* data, size_t len) {
+    size_t cap = last - next;
+    size_t to_copy = std::min(cap, len);
+    memcpy(data, next, to_copy);
+    next += to_copy;
+    return (len == to_copy);
+  }
+  bool Eof() { return next == last; }
+};
+
+const unsigned long cMaxPNGSize = 1000000UL;
+const size_t kMaxPNGChunkSize = 100000000;  // 100 MB
+
+void info_fn(png_structp png_ptr, png_infop info_ptr) {
+  png_set_expand(png_ptr);
+  png_set_strip_16(png_ptr);
+  png_set_gray_to_rgb(png_ptr);
+  png_set_palette_to_rgb(png_ptr);
+  png_set_add_alpha(png_ptr, 0xff, PNG_FILLER_AFTER);
+  (void)png_set_interlace_handling(png_ptr);
+  png_read_update_info(png_ptr, info_ptr);
+}
+
+void row_fn(png_structp png_ptr, png_bytep new_row, png_uint_32 row_num,
+            int pass) {
+  APNGFrame* frame = (APNGFrame*)png_get_progressive_ptr(png_ptr);
+  png_progressive_combine_row(png_ptr, frame->rows[row_num], new_row);
+}
+
+inline unsigned int read_chunk(Reader* r, CHUNK* pChunk) {
+  unsigned char len[4];
+  pChunk->size = 0;
+  pChunk->p = 0;
+  if (r->Read(&len, 4)) {
+    const auto size = png_get_uint_32(len);
+    // Check first, to avoid overflow.
+    if (size > kMaxPNGChunkSize) {
+      JXL_WARNING("APNG chunk size is too big");
+      return 0;
+    }
+    pChunk->size = size + 12;
+    pChunk->p = new unsigned char[pChunk->size];
+    memcpy(pChunk->p, len, 4);
+    if (r->Read(pChunk->p + 4, pChunk->size - 4)) {
+      return *(unsigned int*)(pChunk->p + 4);
+    }
+  }
+  return 0;
+}
+
+int processing_start(png_structp& png_ptr, png_infop& info_ptr, void* frame_ptr,
+                     bool hasInfo, CHUNK& chunkIHDR,
+                     std::vector<CHUNK>& chunksInfo) {
+  unsigned char header[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+
+  png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+  info_ptr = png_create_info_struct(png_ptr);
+  if (!png_ptr || !info_ptr) return 1;
+
+  if (setjmp(png_jmpbuf(png_ptr))) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, 0);
+    return 1;
+  }
+
+  png_set_crc_action(png_ptr, PNG_CRC_QUIET_USE, PNG_CRC_QUIET_USE);
+  png_set_progressive_read_fn(png_ptr, frame_ptr, info_fn, row_fn, NULL);
+
+  png_process_data(png_ptr, info_ptr, header, 8);
+  png_process_data(png_ptr, info_ptr, chunkIHDR.p, chunkIHDR.size);
+
+  if (hasInfo) {
+    for (unsigned int i = 0; i < chunksInfo.size(); i++) {
+      png_process_data(png_ptr, info_ptr, chunksInfo[i].p, chunksInfo[i].size);
+    }
+  }
+  return 0;
+}
+
+int processing_data(png_structp png_ptr, png_infop info_ptr, unsigned char* p,
+                    unsigned int size) {
+  if (!png_ptr || !info_ptr) return 1;
+
+  if (setjmp(png_jmpbuf(png_ptr))) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, 0);
+    return 1;
+  }
+
+  png_process_data(png_ptr, info_ptr, p, size);
+  return 0;
+}
+
+int processing_finish(png_structp png_ptr, png_infop info_ptr) {
+  unsigned char footer[12] = {0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130};
+
+  if (!png_ptr || !info_ptr) return 1;
+
+  if (setjmp(png_jmpbuf(png_ptr))) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, 0);
+    return 1;
+  }
+
+  png_process_data(png_ptr, info_ptr, footer, 12);
+  png_destroy_read_struct(&png_ptr, &info_ptr, 0);
+
+  return 0;
+}
+
+}  // namespace
+
+Status DecodeImageAPNG(Span<const uint8_t> bytes, ThreadPool* pool,
+                       CodecInOut* io) {
+  Reader r;
+  unsigned int id, i, j, w, h, w0, h0, x0, y0;
+  unsigned int delay_num, delay_den, dop, bop, rowbytes, imagesize;
+  unsigned char sig[8];
+  png_structp png_ptr;
+  png_infop info_ptr;
+  CHUNK chunk;
+  CHUNK chunkIHDR;
+  std::vector<CHUNK> chunksInfo;
+  bool isAnimated = false;
+  bool skipFirst = false;
+  bool hasInfo = false;
+  bool all_dispose_bg = true;
+  APNGFrame frameRaw = {};
+
+  r = {bytes.data(), bytes.data() + bytes.size()};
+  // Not an aPNG => not an error
+  unsigned char png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+  if (r.Read(sig, 8) || memcmp(sig, png_signature, 8) != 0) {
+    return false;
+  }
+  id = read_chunk(&r, &chunkIHDR);
+
+  io->frames.clear();
+  io->dec_pixels = 0;
+  io->metadata.m.SetUintSamples(8);
+  io->metadata.m.SetAlphaBits(8);
+  io->metadata.m.color_encoding =
+      ColorEncoding::SRGB();  // todo: get data from png metadata
+  (void)io->dec_hints.Foreach(
+      [](const std::string& key, const std::string& /*value*/) {
+        JXL_WARNING("APNG decoder ignoring %s hint", key.c_str());
+        return true;
+      });
+
+  bool errorstate = true;
+  if (id == kId_IHDR && chunkIHDR.size == 25) {
+    w0 = w = png_get_uint_32(chunkIHDR.p + 8);
+    h0 = h = png_get_uint_32(chunkIHDR.p + 12);
+
+    if (w > cMaxPNGSize || h > cMaxPNGSize) {
+      return false;
+    }
+
+    x0 = 0;
+    y0 = 0;
+    delay_num = 1;
+    delay_den = 10;
+    dop = 0;
+    bop = 0;
+    rowbytes = w * 4;
+    imagesize = h * rowbytes;
+
+    frameRaw.p = new unsigned char[imagesize];
+    frameRaw.rows = new png_bytep[h * sizeof(png_bytep)];
+    for (j = 0; j < h; j++) frameRaw.rows[j] = frameRaw.p + j * rowbytes;
+
+    if (!processing_start(png_ptr, info_ptr, (void*)&frameRaw, hasInfo,
+                          chunkIHDR, chunksInfo)) {
+      bool last_base_was_none = true;
+      while (!r.Eof()) {
+        id = read_chunk(&r, &chunk);
+        if (!id) break;
+        JXL_ASSERT(chunk.p != nullptr);
+
+        if (id == kId_acTL && !hasInfo && !isAnimated) {
+          isAnimated = true;
+          skipFirst = true;
+          io->metadata.m.have_animation = true;
+          io->metadata.m.animation.tps_numerator = 1000;
+        } else if (id == kId_IEND ||
+                   (id == kId_fcTL && (!hasInfo || isAnimated))) {
+          if (hasInfo) {
+            if (!processing_finish(png_ptr, info_ptr)) {
+              ImageBundle bundle(&io->metadata.m);
+              bundle.duration = delay_num * 1000 / delay_den;
+              bundle.origin.x0 = x0;
+              bundle.origin.y0 = y0;
+              // TODO(veluca): this could in principle be implemented.
+              if (last_base_was_none && !all_dispose_bg &&
+                  (x0 != 0 || y0 != 0 || w0 != w || h0 != h || bop != 0)) {
+                return JXL_FAILURE(
+                    "APNG with dispose-to-0 is not supported for non-full or "
+                    "blended frames");
+              }
+              switch (dop) {
+                case 0:
+                  bundle.use_for_next_frame = true;
+                  last_base_was_none = false;
+                  all_dispose_bg = false;
+                  break;
+                case 2:
+                  bundle.use_for_next_frame = false;
+                  all_dispose_bg = false;
+                  break;
+                default:
+                  bundle.use_for_next_frame = false;
+                  last_base_was_none = true;
+              }
+              bundle.blend = bop != 0;
+              io->dec_pixels += w0 * h0;
+
+              Image3F sub_frame(w0, h0);
+              ImageF sub_frame_alpha(w0, h0);
+              for (size_t y = 0; y < h0; ++y) {
+                float* const JXL_RESTRICT row_r = sub_frame.PlaneRow(0, y);
+                float* const JXL_RESTRICT row_g = sub_frame.PlaneRow(1, y);
+                float* const JXL_RESTRICT row_b = sub_frame.PlaneRow(2, y);
+                float* const JXL_RESTRICT row_alpha = sub_frame_alpha.Row(y);
+                uint8_t* const f = frameRaw.rows[y];
+                for (size_t x = 0; x < w0; ++x) {
+                  if (f[4 * x + 3] == 0) {
+                    row_alpha[x] = 0;
+                    row_r[x] = 0;
+                    row_g[x] = 0;
+                    row_b[x] = 0;
+                    continue;
+                  }
+                  row_r[x] = f[4 * x + 0] * (1.f / 255);
+                  row_g[x] = f[4 * x + 1] * (1.f / 255);
+                  row_b[x] = f[4 * x + 2] * (1.f / 255);
+                  row_alpha[x] = f[4 * x + 3] * (1.f / 255);
+                }
+              }
+              bundle.SetFromImage(std::move(sub_frame), ColorEncoding::SRGB());
+              bundle.SetAlpha(std::move(sub_frame_alpha),
+                              /*alpha_is_premultiplied=*/false);
+              io->frames.push_back(std::move(bundle));
+            } else {
+              delete[] chunk.p;
+              break;
+            }
+          }
+
+          if (id == kId_IEND) {
+            errorstate = false;
+            break;
+          }
+          // At this point the old frame is done. Let's start a new one.
+          w0 = png_get_uint_32(chunk.p + 12);
+          h0 = png_get_uint_32(chunk.p + 16);
+          x0 = png_get_uint_32(chunk.p + 20);
+          y0 = png_get_uint_32(chunk.p + 24);
+          delay_num = png_get_uint_16(chunk.p + 28);
+          delay_den = png_get_uint_16(chunk.p + 30);
+          dop = chunk.p[32];
+          bop = chunk.p[33];
+
+          if (!delay_den) delay_den = 100;
+
+          if (w0 > cMaxPNGSize || h0 > cMaxPNGSize || x0 > cMaxPNGSize ||
+              y0 > cMaxPNGSize || x0 + w0 > w || y0 + h0 > h || dop > 2 ||
+              bop > 1) {
+            delete[] chunk.p;
+            break;
+          }
+
+          if (hasInfo) {
+            memcpy(chunkIHDR.p + 8, chunk.p + 12, 8);
+            if (processing_start(png_ptr, info_ptr, (void*)&frameRaw, hasInfo,
+                                 chunkIHDR, chunksInfo)) {
+              delete[] chunk.p;
+              break;
+            }
+          } else
+            skipFirst = false;
+
+          if (io->frames.size() == (skipFirst ? 1 : 0)) {
+            bop = 0;
+            if (dop == 2) dop = 1;
+          }
+        } else if (id == kId_IDAT) {
+          hasInfo = true;
+          if (processing_data(png_ptr, info_ptr, chunk.p, chunk.size)) {
+            delete[] chunk.p;
+            break;
+          }
+        } else if (id == kId_fdAT && isAnimated) {
+          png_save_uint_32(chunk.p + 4, chunk.size - 16);
+          memcpy(chunk.p + 8, "IDAT", 4);
+          if (processing_data(png_ptr, info_ptr, chunk.p + 4, chunk.size - 4)) {
+            delete[] chunk.p;
+            break;
+          }
+        } else if (!isAbc(chunk.p[4]) || !isAbc(chunk.p[5]) ||
+                   !isAbc(chunk.p[6]) || !isAbc(chunk.p[7])) {
+          delete[] chunk.p;
+          break;
+        } else if (!hasInfo) {
+          if (processing_data(png_ptr, info_ptr, chunk.p, chunk.size)) {
+            delete[] chunk.p;
+            break;
+          }
+          chunksInfo.push_back(chunk);
+          continue;
+        }
+        delete[] chunk.p;
+      }
+    }
+    delete[] frameRaw.rows;
+    delete[] frameRaw.p;
+  }
+
+  for (i = 0; i < chunksInfo.size(); i++) delete[] chunksInfo[i].p;
+
+  chunksInfo.clear();
+  delete[] chunkIHDR.p;
+
+  if (errorstate) return false;
+  SetIntensityTarget(io);
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_apng.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_apng.h
new file mode 100644
index 0000000000..53d3bfa2ac
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_apng.h
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_APNG_H_
+#define LIB_EXTRAS_CODEC_APNG_H_
+
+// Decodes APNG images in memory.
+
+#include <stdint.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+
+namespace jxl {
+
+// Decodes `bytes` into `io`. io->dec_hints are ignored.
+Status DecodeImageAPNG(const Span<const uint8_t> bytes, ThreadPool* pool,
+                       CodecInOut* io);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_APNG_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_exr.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_exr.cc
new file mode 100644
index 0000000000..efd0c1a12c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_exr.cc
@@ -0,0 +1,350 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec_exr.h"
+
+#include <ImfChromaticitiesAttribute.h>
+#include <ImfIO.h>
+#include <ImfRgbaFile.h>
+#include <ImfStandardAttributes.h>
+
+#include <vector>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+
+namespace jxl {
+
+namespace {
+
+namespace OpenEXR = OPENEXR_IMF_NAMESPACE;
+namespace Imath = IMATH_NAMESPACE;
+
+// OpenEXR::Int64 is deprecated in favor of using uint64_t directly, but using
+// uint64_t as recommended causes build failures with previous OpenEXR versions
+// on macOS, where the definition for OpenEXR::Int64 was actually not equivalent
+// to uint64_t. This alternative should work in all cases.
+using ExrInt64 = decltype(std::declval<OpenEXR::IStream>().tellg());
+
+constexpr int kExrBitsPerSample = 16;
+constexpr int kExrAlphaBits = 16;
+
+float GetIntensityTarget(const CodecInOut& io,
+                         const OpenEXR::Header& exr_header) {
+  if (OpenEXR::hasWhiteLuminance(exr_header)) {
+    const float exr_luminance = OpenEXR::whiteLuminance(exr_header);
+    if (io.target_nits != 0) {
+      JXL_WARNING(
+          "overriding OpenEXR whiteLuminance of %g with user-specified value "
+          "of %g",
+          exr_luminance, io.target_nits);
+      return io.target_nits;
+    }
+    return exr_luminance;
+  }
+  if (io.target_nits != 0) {
+    return io.target_nits;
+  }
+  JXL_WARNING(
+      "no OpenEXR whiteLuminance tag found and no intensity_target specified, "
+      "defaulting to %g",
+      kDefaultIntensityTarget);
+  return kDefaultIntensityTarget;
+}
+
+size_t GetNumThreads(ThreadPool* pool) {
+  size_t exr_num_threads = 1;
+  RunOnPool(
+      pool, 0, 1,
+      [&](size_t num_threads) {
+        exr_num_threads = num_threads;
+        return true;
+      },
+      [&](const int /* task */, const int /*thread*/) {},
+      "DecodeImageEXRThreads");
+  return exr_num_threads;
+}
+
+class InMemoryIStream : public OpenEXR::IStream {
+ public:
+  // The data pointed to by `bytes` must outlive the InMemoryIStream.
+  explicit InMemoryIStream(const Span<const uint8_t> bytes)
+      : IStream(/*fileName=*/""), bytes_(bytes) {}
+
+  bool isMemoryMapped() const override { return true; }
+  char* readMemoryMapped(const int n) override {
+    JXL_ASSERT(pos_ + n <= bytes_.size());
+    char* const result =
+        const_cast<char*>(reinterpret_cast<const char*>(bytes_.data() + pos_));
+    pos_ += n;
+    return result;
+  }
+  bool read(char c[], const int n) override {
+    std::copy_n(readMemoryMapped(n), n, c);
+    return pos_ < bytes_.size();
+  }
+
+  ExrInt64 tellg() override { return pos_; }
+  void seekg(const ExrInt64 pos) override {
+    JXL_ASSERT(pos + 1 <= bytes_.size());
+    pos_ = pos;
+  }
+
+ private:
+  const Span<const uint8_t> bytes_;
+  size_t pos_ = 0;
+};
+
+class InMemoryOStream : public OpenEXR::OStream {
+ public:
+  // `bytes` must outlive the InMemoryOStream.
+  explicit InMemoryOStream(PaddedBytes* const bytes)
+      : OStream(/*fileName=*/""), bytes_(*bytes) {}
+
+  void write(const char c[], const int n) override {
+    if (bytes_.size() < pos_ + n) {
+      bytes_.resize(pos_ + n);
+    }
+    std::copy_n(c, n, bytes_.begin() + pos_);
+    pos_ += n;
+  }
+
+  ExrInt64 tellp() override { return pos_; }
+  void seekp(const ExrInt64 pos) override {
+    if (bytes_.size() + 1 < pos) {
+      bytes_.resize(pos - 1);
+    }
+    pos_ = pos;
+  }
+
+ private:
+  PaddedBytes& bytes_;
+  size_t pos_ = 0;
+};
+
+}  // namespace
+
+Status DecodeImageEXR(Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io) {
+  // Get the number of threads we should be using for OpenEXR.
+  // OpenEXR creates its own set of threads, independent from ours. `pool` is
+  // only used for converting from a buffer of OpenEXR::Rgba to Image3F.
+  // TODO(sboukortt): look into changing that with OpenEXR 2.3 which allows
+  // custom thread pools according to its changelog.
+  OpenEXR::setGlobalThreadCount(GetNumThreads(pool));
+
+  InMemoryIStream is(bytes);
+
+#ifdef __EXCEPTIONS
+  std::unique_ptr<OpenEXR::RgbaInputFile> input_ptr;
+  try {
+    input_ptr.reset(new OpenEXR::RgbaInputFile(is));
+  } catch (...) {
+    return JXL_FAILURE("OpenEXR failed to parse input");
+  }
+  OpenEXR::RgbaInputFile& input = *input_ptr;
+#else
+  OpenEXR::RgbaInputFile input(is);
+#endif
+
+  if ((input.channels() & OpenEXR::RgbaChannels::WRITE_RGB) !=
+      OpenEXR::RgbaChannels::WRITE_RGB) {
+    return JXL_FAILURE("only RGB OpenEXR files are supported");
+  }
+  const bool has_alpha = (input.channels() & OpenEXR::RgbaChannels::WRITE_A) ==
+                         OpenEXR::RgbaChannels::WRITE_A;
+
+  const float intensity_target = GetIntensityTarget(*io, input.header());
+
+  auto image_size = input.displayWindow().size();
+  // Size is computed as max - min, but both bounds are inclusive.
+  ++image_size.x;
+  ++image_size.y;
+  Image3F image(image_size.x, image_size.y);
+  ZeroFillImage(&image);
+  ImageF alpha;
+  if (has_alpha) {
+    alpha = ImageF(image_size.x, image_size.y);
+    FillImage(1.f, &alpha);
+  }
+
+  const int row_size = input.dataWindow().size().x + 1;
+  // Number of rows to read at a time.
+  // https://www.openexr.com/documentation/ReadingAndWritingImageFiles.pdf
+  // recommends reading the whole file at once.
+  const int y_chunk_size = input.displayWindow().size().y + 1;
+  std::vector<OpenEXR::Rgba> input_rows(row_size * y_chunk_size);
+  for (int start_y =
+           std::max(input.dataWindow().min.y, input.displayWindow().min.y);
+       start_y <=
+       std::min(input.dataWindow().max.y, input.displayWindow().max.y);
+       start_y += y_chunk_size) {
+    // Inclusive.
+    const int end_y = std::min(
+        start_y + y_chunk_size - 1,
+        std::min(input.dataWindow().max.y, input.displayWindow().max.y));
+    input.setFrameBuffer(
+        input_rows.data() - input.dataWindow().min.x - start_y * row_size,
+        /*xStride=*/1, /*yStride=*/row_size);
+    input.readPixels(start_y, end_y);
+    RunOnPool(
+        pool, start_y, end_y + 1, ThreadPool::SkipInit(),
+        [&](const int exr_y, const int /*thread*/) {
+          const int image_y = exr_y - input.displayWindow().min.y;
+          const OpenEXR::Rgba* const JXL_RESTRICT input_row =
+              &input_rows[(exr_y - start_y) * row_size];
+          float* const JXL_RESTRICT rows[] = {
+              image.PlaneRow(0, image_y),
+              image.PlaneRow(1, image_y),
+              image.PlaneRow(2, image_y),
+          };
+          float* const JXL_RESTRICT alpha_row =
+              has_alpha ? alpha.Row(image_y) : nullptr;
+          for (int exr_x = std::max(input.dataWindow().min.x,
+                                    input.displayWindow().min.x);
+               exr_x <=
+               std::min(input.dataWindow().max.x, input.displayWindow().max.x);
+               ++exr_x) {
+            const int image_x = exr_x - input.displayWindow().min.x;
+            const OpenEXR::Rgba& pixel =
+                input_row[exr_x - input.dataWindow().min.x];
+            rows[0][image_x] = pixel.r;
+            rows[1][image_x] = pixel.g;
+            rows[2][image_x] = pixel.b;
+            if (has_alpha) {
+              alpha_row[image_x] = pixel.a;
+            }
+          }
+        },
+        "DecodeImageEXR");
+  }
+
+  ColorEncoding color_encoding;
+  color_encoding.tf.SetTransferFunction(TransferFunction::kLinear);
+  color_encoding.SetColorSpace(ColorSpace::kRGB);
+  PrimariesCIExy primaries = ColorEncoding::SRGB().GetPrimaries();
+  CIExy white_point = ColorEncoding::SRGB().GetWhitePoint();
+  if (OpenEXR::hasChromaticities(input.header())) {
+    const auto& chromaticities = OpenEXR::chromaticities(input.header());
+    primaries.r.x = chromaticities.red.x;
+    primaries.r.y = chromaticities.red.y;
+    primaries.g.x = chromaticities.green.x;
+    primaries.g.y = chromaticities.green.y;
+    primaries.b.x = chromaticities.blue.x;
+    primaries.b.y = chromaticities.blue.y;
+    white_point.x = chromaticities.white.x;
+    white_point.y = chromaticities.white.y;
+  }
+  JXL_RETURN_IF_ERROR(color_encoding.SetPrimaries(primaries));
+  JXL_RETURN_IF_ERROR(color_encoding.SetWhitePoint(white_point));
+  JXL_RETURN_IF_ERROR(color_encoding.CreateICC());
+
+  io->metadata.m.bit_depth.bits_per_sample = kExrBitsPerSample;
+  // EXR uses binary16 or binary32 floating point format.
+  io->metadata.m.bit_depth.exponent_bits_per_sample =
+      kExrBitsPerSample == 16 ? 5 : 8;
+  io->metadata.m.bit_depth.floating_point_sample = true;
+  io->SetFromImage(std::move(image), color_encoding);
+  io->metadata.m.color_encoding = color_encoding;
+  io->metadata.m.SetIntensityTarget(intensity_target);
+  if (has_alpha) {
+    io->metadata.m.SetAlphaBits(kExrAlphaBits, /*alpha_is_premultiplied=*/true);
+    io->Main().SetAlpha(std::move(alpha), /*alpha_is_premultiplied=*/true);
+  }
+  return true;
+}
+
+Status EncodeImageEXR(const CodecInOut* io, const ColorEncoding& c_desired,
+                      ThreadPool* pool, PaddedBytes* bytes) {
+  // As in `DecodeImageEXR`, `pool` is only used for pixel conversion, not for
+  // actual OpenEXR I/O.
+  OpenEXR::setGlobalThreadCount(GetNumThreads(pool));
+
+  ColorEncoding c_linear = c_desired;
+  c_linear.tf.SetTransferFunction(TransferFunction::kLinear);
+  JXL_RETURN_IF_ERROR(c_linear.CreateICC());
+  ImageMetadata metadata = io->metadata.m;
+  ImageBundle store(&metadata);
+  const ImageBundle* linear;
+  JXL_RETURN_IF_ERROR(
+      TransformIfNeeded(io->Main(), c_linear, pool, &store, &linear));
+
+  const bool has_alpha = io->Main().HasAlpha();
+  const bool alpha_is_premultiplied = io->Main().AlphaIsPremultiplied();
+
+  OpenEXR::Header header(io->xsize(), io->ysize());
+  const PrimariesCIExy& primaries = c_linear.HasPrimaries()
+                                        ? c_linear.GetPrimaries()
+                                        : ColorEncoding::SRGB().GetPrimaries();
+  OpenEXR::Chromaticities chromaticities;
+  chromaticities.red = Imath::V2f(primaries.r.x, primaries.r.y);
+  chromaticities.green = Imath::V2f(primaries.g.x, primaries.g.y);
+  chromaticities.blue = Imath::V2f(primaries.b.x, primaries.b.y);
+  chromaticities.white =
+      Imath::V2f(c_linear.GetWhitePoint().x, c_linear.GetWhitePoint().y);
+  OpenEXR::addChromaticities(header, chromaticities);
+  OpenEXR::addWhiteLuminance(header, io->metadata.m.IntensityTarget());
+
+  // Ensure that the destructor of RgbaOutputFile has run before we look at the
+  // size of `bytes`.
+  {
+    InMemoryOStream os(bytes);
+    OpenEXR::RgbaOutputFile output(
+        os, header, has_alpha ? OpenEXR::WRITE_RGBA : OpenEXR::WRITE_RGB);
+    // How many rows to write at once. Again, the OpenEXR documentation
+    // recommends writing the whole image in one call.
+    const int y_chunk_size = io->ysize();
+    std::vector<OpenEXR::Rgba> output_rows(io->xsize() * y_chunk_size);
+
+    for (size_t start_y = 0; start_y < io->ysize(); start_y += y_chunk_size) {
+      // Inclusive.
+      const size_t end_y =
+          std::min(start_y + y_chunk_size - 1, io->ysize() - 1);
+      output.setFrameBuffer(output_rows.data() - start_y * io->xsize(),
+                            /*xStride=*/1, /*yStride=*/io->xsize());
+      RunOnPool(
+          pool, start_y, end_y + 1, ThreadPool::SkipInit(),
+          [&](const int y, const int /*thread*/) {
+            const float* const JXL_RESTRICT input_rows[] = {
+                linear->color().ConstPlaneRow(0, y),
+                linear->color().ConstPlaneRow(1, y),
+                linear->color().ConstPlaneRow(2, y),
+            };
+            OpenEXR::Rgba* const JXL_RESTRICT row_data =
+                &output_rows[(y - start_y) * io->xsize()];
+            if (has_alpha) {
+              const float* const JXL_RESTRICT alpha_row =
+                  io->Main().alpha().ConstRow(y);
+              if (alpha_is_premultiplied) {
+                for (size_t x = 0; x < io->xsize(); ++x) {
+                  row_data[x] =
+                      OpenEXR::Rgba(input_rows[0][x], input_rows[1][x],
+                                    input_rows[2][x], alpha_row[x]);
+                }
+              } else {
+                for (size_t x = 0; x < io->xsize(); ++x) {
+                  row_data[x] = OpenEXR::Rgba(alpha_row[x] * input_rows[0][x],
+                                              alpha_row[x] * input_rows[1][x],
+                                              alpha_row[x] * input_rows[2][x],
+                                              alpha_row[x]);
+                }
+              }
+            } else {
+              for (size_t x = 0; x < io->xsize(); ++x) {
+                row_data[x] = OpenEXR::Rgba(input_rows[0][x], input_rows[1][x],
+                                            input_rows[2][x], 1.f);
+              }
+            }
+          },
+          "EncodeImageEXR");
+      output.writePixels(/*numScanLines=*/end_y - start_y + 1);
+    }
+  }
+
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_exr.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_exr.h
new file mode 100644
index 0000000000..b0da5c5b8e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_exr.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_EXR_H_
+#define LIB_EXTRAS_CODEC_EXR_H_
+
+// Encodes OpenEXR images in memory.
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+
+namespace jxl {
+
+// Decodes `bytes` into `io`. io->dec_hints are ignored.
+Status DecodeImageEXR(Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io);
+
+// Transforms from io->c_current to `c_desired` (with the transfer function set
+// to linear as that is the OpenEXR convention) and encodes into `bytes`.
+Status EncodeImageEXR(const CodecInOut* io, const ColorEncoding& c_desired,
+                      ThreadPool* pool, PaddedBytes* bytes);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_EXR_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_gif.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_gif.cc
new file mode 100644
index 0000000000..1fb2a11ac7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_gif.cc
@@ -0,0 +1,343 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec_gif.h"
+
+#include <gif_lib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/luminance.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+namespace {
+
+struct ReadState {
+  Span<const uint8_t> bytes;
+};
+
+struct DGifCloser {
+  void operator()(GifFileType* const ptr) const { DGifCloseFile(ptr, nullptr); }
+};
+using GifUniquePtr = std::unique_ptr<GifFileType, DGifCloser>;
+
+// Gif does not support partial transparency, so this considers anything non-0
+// as opaque.
+bool AllOpaque(const ImageF& alpha) {
+  for (size_t y = 0; y < alpha.ysize(); ++y) {
+    const float* const JXL_RESTRICT row = alpha.ConstRow(y);
+    for (size_t x = 0; x < alpha.xsize(); ++x) {
+      if (row[x] == 0.f) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DecodeImageGIF(Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io) {
+  int error = GIF_OK;
+  ReadState state = {bytes};
+  const auto ReadFromSpan = [](GifFileType* const gif, GifByteType* const bytes,
+                               int n) {
+    ReadState* const state = reinterpret_cast<ReadState*>(gif->UserData);
+    // giflib API requires the input size `n` to be signed int.
+    if (static_cast<size_t>(n) > state->bytes.size()) {
+      n = state->bytes.size();
+    }
+    memcpy(bytes, state->bytes.data(), n);
+    state->bytes.remove_prefix(n);
+    return n;
+  };
+  GifUniquePtr gif(DGifOpen(&state, ReadFromSpan, &error));
+  if (gif == nullptr) {
+    if (error == D_GIF_ERR_NOT_GIF_FILE) {
+      // Not an error.
+      return false;
+    } else {
+      return JXL_FAILURE("Failed to read GIF: %s", GifErrorString(error));
+    }
+  }
+  error = DGifSlurp(gif.get());
+  if (error != GIF_OK) {
+    return JXL_FAILURE("Failed to read GIF: %s", GifErrorString(gif->Error));
+  }
+
+  msan::UnpoisonMemory(gif.get(), sizeof(*gif));
+  if (gif->SColorMap) {
+    msan::UnpoisonMemory(gif->SColorMap, sizeof(*gif->SColorMap));
+    msan::UnpoisonMemory(
+        gif->SColorMap->Colors,
+        sizeof(*gif->SColorMap->Colors) * gif->SColorMap->ColorCount);
+  }
+  msan::UnpoisonMemory(gif->SavedImages,
+                       sizeof(*gif->SavedImages) * gif->ImageCount);
+
+  const SizeConstraints* constraints = &io->constraints;
+
+  JXL_RETURN_IF_ERROR(
+      VerifyDimensions<uint32_t>(constraints, gif->SWidth, gif->SHeight));
+  uint64_t total_pixel_count =
+      static_cast<uint64_t>(gif->SWidth) * gif->SHeight;
+  for (int i = 0; i < gif->ImageCount; ++i) {
+    const SavedImage& image = gif->SavedImages[i];
+    uint32_t w = image.ImageDesc.Width;
+    uint32_t h = image.ImageDesc.Height;
+    JXL_RETURN_IF_ERROR(VerifyDimensions<uint32_t>(constraints, w, h));
+    uint64_t pixel_count = static_cast<uint64_t>(w) * h;
+    if (total_pixel_count + pixel_count < total_pixel_count) {
+      return JXL_FAILURE("Image too big");
+    }
+    total_pixel_count += pixel_count;
+    if (total_pixel_count > constraints->dec_max_pixels) {
+      return JXL_FAILURE("Image too big");
+    }
+  }
+
+  if (!gif->SColorMap) {
+    for (int i = 0; i < gif->ImageCount; ++i) {
+      if (!gif->SavedImages[i].ImageDesc.ColorMap) {
+        return JXL_FAILURE("Missing GIF color map");
+      }
+    }
+  }
+
+  if (gif->ImageCount > 1) {
+    io->metadata.m.have_animation = true;
+    // Delays in GIF are specified in 100ths of a second.
+    io->metadata.m.animation.tps_numerator = 100;
+  }
+
+  io->frames.clear();
+  io->frames.reserve(gif->ImageCount);
+  io->dec_pixels = 0;
+
+  io->metadata.m.SetUintSamples(8);
+  io->metadata.m.color_encoding = ColorEncoding::SRGB();
+  io->metadata.m.SetAlphaBits(0);
+  (void)io->dec_hints.Foreach(
+      [](const std::string& key, const std::string& /*value*/) {
+        JXL_WARNING("GIF decoder ignoring %s hint", key.c_str());
+        return true;
+      });
+
+  Image3F canvas(gif->SWidth, gif->SHeight);
+  io->SetSize(gif->SWidth, gif->SHeight);
+  ImageF alpha(gif->SWidth, gif->SHeight);
+  GifColorType background_color;
+  if (gif->SColorMap == nullptr) {
+    background_color = {0, 0, 0};
+  } else {
+    if (gif->SBackGroundColor >= gif->SColorMap->ColorCount) {
+      return JXL_FAILURE("GIF specifies out-of-bounds background color");
+    }
+    background_color = gif->SColorMap->Colors[gif->SBackGroundColor];
+  }
+  FillPlane<float>(background_color.Red, &canvas.Plane(0));
+  FillPlane<float>(background_color.Green, &canvas.Plane(1));
+  FillPlane<float>(background_color.Blue, &canvas.Plane(2));
+  ZeroFillImage(&alpha);
+
+  Rect previous_rect_if_restore_to_background;
+
+  bool has_alpha = false;
+  bool replace = true;
+  bool last_base_was_none = true;
+  for (int i = 0; i < gif->ImageCount; ++i) {
+    const SavedImage& image = gif->SavedImages[i];
+    msan::UnpoisonMemory(image.RasterBits, sizeof(*image.RasterBits) *
+                                               image.ImageDesc.Width *
+                                               image.ImageDesc.Height);
+    const Rect image_rect(image.ImageDesc.Left, image.ImageDesc.Top,
+                          image.ImageDesc.Width, image.ImageDesc.Height);
+    io->dec_pixels += image_rect.xsize() * image_rect.ysize();
+    Rect total_rect;
+    if (previous_rect_if_restore_to_background.xsize() != 0 ||
+        previous_rect_if_restore_to_background.ysize() != 0) {
+      const size_t xbegin = std::min(
+          image_rect.x0(), previous_rect_if_restore_to_background.x0());
+      const size_t ybegin = std::min(
+          image_rect.y0(), previous_rect_if_restore_to_background.y0());
+      const size_t xend =
+          std::max(image_rect.x0() + image_rect.xsize(),
+                   previous_rect_if_restore_to_background.x0() +
+                       previous_rect_if_restore_to_background.xsize());
+      const size_t yend =
+          std::max(image_rect.y0() + image_rect.ysize(),
+                   previous_rect_if_restore_to_background.y0() +
+                       previous_rect_if_restore_to_background.ysize());
+      total_rect = Rect(xbegin, ybegin, xend - xbegin, yend - ybegin);
+      previous_rect_if_restore_to_background = Rect();
+      replace = true;
+    } else {
+      total_rect = image_rect;
+      replace = false;
+    }
+    if (!image_rect.IsInside(canvas)) {
+      return JXL_FAILURE("GIF frame extends outside of the canvas");
+    }
+    const ColorMapObject* const color_map =
+        image.ImageDesc.ColorMap ? image.ImageDesc.ColorMap : gif->SColorMap;
+    JXL_CHECK(color_map);
+    msan::UnpoisonMemory(color_map, sizeof(*color_map));
+    msan::UnpoisonMemory(color_map->Colors,
+                         sizeof(*color_map->Colors) * color_map->ColorCount);
+    GraphicsControlBlock gcb;
+    DGifSavedExtensionToGCB(gif.get(), i, &gcb);
+    msan::UnpoisonMemory(&gcb, sizeof(gcb));
+
+    ImageBundle bundle(&io->metadata.m);
+    if (io->metadata.m.have_animation) {
+      bundle.duration = gcb.DelayTime;
+      bundle.origin.x0 = total_rect.x0();
+      bundle.origin.y0 = total_rect.y0();
+      if (last_base_was_none) {
+        replace = true;
+      }
+      bundle.blend = !replace;
+      // TODO(veluca): this could in principle be implemented.
+      if (last_base_was_none &&
+          (total_rect.x0() != 0 || total_rect.y0() != 0 ||
+           total_rect.xsize() != canvas.xsize() ||
+           total_rect.ysize() != canvas.ysize() || !replace)) {
+        return JXL_FAILURE(
+            "GIF with dispose-to-0 is not supported for non-full or "
+            "blended frames");
+      }
+      switch (gcb.DisposalMode) {
+        case DISPOSE_DO_NOT:
+        case DISPOSE_BACKGROUND:
+          bundle.use_for_next_frame = true;
+          last_base_was_none = false;
+          break;
+        case DISPOSE_PREVIOUS:
+          bundle.use_for_next_frame = false;
+          break;
+        default:
+          bundle.use_for_next_frame = false;
+          last_base_was_none = true;
+      }
+    }
+    Image3F frame = CopyImage(canvas);
+    ImageF frame_alpha = CopyImage(alpha);
+    for (size_t y = 0, byte_index = 0; y < image_rect.ysize(); ++y) {
+      float* const JXL_RESTRICT row_r = image_rect.Row(&frame.Plane(0), y);
+      float* const JXL_RESTRICT row_g = image_rect.Row(&frame.Plane(1), y);
+      float* const JXL_RESTRICT row_b = image_rect.Row(&frame.Plane(2), y);
+      float* const JXL_RESTRICT row_alpha = image_rect.Row(&frame_alpha, y);
+      for (size_t x = 0; x < image_rect.xsize(); ++x, ++byte_index) {
+        const GifByteType byte = image.RasterBits[byte_index];
+        if (byte >= color_map->ColorCount) {
+          return JXL_FAILURE("GIF color is out of bounds");
+        }
+        if (byte == gcb.TransparentColor) continue;
+        GifColorType color = color_map->Colors[byte];
+        row_alpha[x] = 1.f;
+        row_r[x] = (1.f / 255) * color.Red;
+        row_g[x] = (1.f / 255) * color.Green;
+        row_b[x] = (1.f / 255) * color.Blue;
+      }
+    }
+    Image3F sub_frame(total_rect.xsize(), total_rect.ysize());
+    ImageF sub_frame_alpha(total_rect.xsize(), total_rect.ysize());
+    bool blend_alpha = false;
+    if (replace) {
+      CopyImageTo(total_rect, frame, &sub_frame);
+      CopyImageTo(total_rect, frame_alpha, &sub_frame_alpha);
+    } else {
+      for (size_t y = 0, byte_index = 0; y < image_rect.ysize(); ++y) {
+        float* const JXL_RESTRICT row_r = sub_frame.PlaneRow(0, y);
+        float* const JXL_RESTRICT row_g = sub_frame.PlaneRow(1, y);
+        float* const JXL_RESTRICT row_b = sub_frame.PlaneRow(2, y);
+        float* const JXL_RESTRICT row_alpha = sub_frame_alpha.Row(y);
+        for (size_t x = 0; x < image_rect.xsize(); ++x, ++byte_index) {
+          const GifByteType byte = image.RasterBits[byte_index];
+          if (byte > color_map->ColorCount) {
+            return JXL_FAILURE("GIF color is out of bounds");
+          }
+          if (byte == gcb.TransparentColor) {
+            row_alpha[x] = 0;
+            row_r[x] = 0;
+            row_g[x] = 0;
+            row_b[x] = 0;
+            blend_alpha =
+                true;  // need to use alpha channel if BlendMode blend is used
+            continue;
+          }
+          GifColorType color = color_map->Colors[byte];
+          row_alpha[x] = 1.f;
+          row_r[x] = (1.f / 255) * color.Red;
+          row_g[x] = (1.f / 255) * color.Green;
+          row_b[x] = (1.f / 255) * color.Blue;
+        }
+      }
+    }
+    bundle.SetFromImage(std::move(sub_frame), ColorEncoding::SRGB());
+    if (has_alpha || !AllOpaque(frame_alpha) || blend_alpha) {
+      if (!has_alpha) {
+        has_alpha = true;
+        io->metadata.m.SetAlphaBits(8);
+        for (ImageBundle& previous_frame : io->frames) {
+          ImageF previous_alpha(previous_frame.xsize(), previous_frame.ysize());
+          FillImage(1.f, &previous_alpha);
+          previous_frame.SetAlpha(std::move(previous_alpha),
+                                  /*alpha_is_premultiplied=*/false);
+        }
+      }
+      bundle.SetAlpha(std::move(sub_frame_alpha),
+                      /*alpha_is_premultiplied=*/false);
+    }
+    io->frames.push_back(std::move(bundle));
+    switch (gcb.DisposalMode) {
+      case DISPOSE_DO_NOT:
+        canvas = std::move(frame);
+        alpha = std::move(frame_alpha);
+        break;
+
+      case DISPOSE_BACKGROUND:
+        FillPlane<float>((1.f / 255) * background_color.Red, &canvas.Plane(0),
+                         image_rect);
+        FillPlane<float>((1.f / 255) * background_color.Green, &canvas.Plane(1),
+                         image_rect);
+        FillPlane<float>((1.f / 255) * background_color.Blue, &canvas.Plane(2),
+                         image_rect);
+        FillPlane(0.f, &alpha, image_rect);
+        previous_rect_if_restore_to_background = image_rect;
+        break;
+
+      case DISPOSE_PREVIOUS:
+        break;
+
+      case DISPOSAL_UNSPECIFIED:
+      default:
+        FillPlane<float>((1.f / 255) * background_color.Red, &canvas.Plane(0));
+        FillPlane<float>((1.f / 255) * background_color.Green,
+                         &canvas.Plane(1));
+        FillPlane<float>((1.f / 255) * background_color.Blue, &canvas.Plane(2));
+        ZeroFillImage(&alpha);
+    }
+  }
+
+  SetIntensityTarget(io);
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_gif.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_gif.h
new file mode 100644
index 0000000000..03e0e55253
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_gif.h
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_GIF_H_
+#define LIB_EXTRAS_CODEC_GIF_H_
+
+// Decodes GIF images in memory.
+
+#include <stdint.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+
+namespace jxl {
+
+// Decodes `bytes` into `io`. io->dec_hints are ignored.
+Status DecodeImageGIF(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_GIF_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc
new file mode 100644
index 0000000000..842d52a8b0
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.cc
@@ -0,0 +1,519 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec_jpg.h"
+
+#include <stddef.h>
+#include <stdio.h>
+
+#if JPEGXL_ENABLE_JPEG
+// After stddef/stdio
+#include <jpeglib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#endif  // JPEGXL_ENABLE_JPEG
+
+#include <algorithm>
+#include <iterator>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/time.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/jpeg/enc_jpeg_data_reader.h"
+#include "lib/jxl/luminance.h"
+#include "lib/jxl/sanitizers.h"
+#if JPEGXL_ENABLE_SJPEG
+#include "sjpeg.h"
+#endif
+
+namespace jxl {
+
+#if JPEGXL_ENABLE_JPEG
+namespace {
+
+constexpr float kJPEGSampleMultiplier = MAXJSAMPLE;
+constexpr unsigned char kICCSignature[12] = {
+    0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+constexpr int kICCMarker = JPEG_APP0 + 2;
+constexpr size_t kMaxBytesInMarker = 65533;
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+                                             0x66, 0x00, 0x00};
+constexpr int kExifMarker = JPEG_APP0 + 1;
+
+constexpr float kJPEGSampleMin = 0;
+constexpr float kJPEGSampleMax = MAXJSAMPLE;
+
+bool MarkerIsICC(const jpeg_saved_marker_ptr marker) {
+  return marker->marker == kICCMarker &&
+         marker->data_length >= sizeof kICCSignature + 2 &&
+         std::equal(std::begin(kICCSignature), std::end(kICCSignature),
+                    marker->data);
+}
+bool MarkerIsExif(const jpeg_saved_marker_ptr marker) {
+  return marker->marker == kExifMarker &&
+         marker->data_length >= sizeof kExifSignature + 2 &&
+         std::equal(std::begin(kExifSignature), std::end(kExifSignature),
+                    marker->data);
+}
+
+Status ReadICCProfile(jpeg_decompress_struct* const cinfo,
+                      PaddedBytes* const icc) {
+  constexpr size_t kICCSignatureSize = sizeof kICCSignature;
+  // ICC signature + uint8_t index + uint8_t max_index.
+  constexpr size_t kICCHeadSize = kICCSignatureSize + 2;
+  // Markers are 1-indexed, and we keep them that way in this vector to get a
+  // convenient 0 at the front for when we compute the offsets later.
+  std::vector<size_t> marker_lengths;
+  int num_markers = 0;
+  int seen_markers_count = 0;
+  bool has_num_markers = false;
+  for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+       marker = marker->next) {
+    // marker is initialized by libjpeg, which we are not instrumenting with
+    // msan.
+    msan::UnpoisonMemory(marker, sizeof(*marker));
+    msan::UnpoisonMemory(marker->data, marker->data_length);
+    if (!MarkerIsICC(marker)) continue;
+
+    const int current_marker = marker->data[kICCSignatureSize];
+    if (current_marker == 0) {
+      return JXL_FAILURE("inconsistent JPEG ICC marker numbering");
+    }
+    const int current_num_markers = marker->data[kICCSignatureSize + 1];
+    if (current_marker > current_num_markers) {
+      return JXL_FAILURE("inconsistent JPEG ICC marker numbering");
+    }
+    if (has_num_markers) {
+      if (current_num_markers != num_markers) {
+        return JXL_FAILURE("inconsistent numbers of JPEG ICC markers");
+      }
+    } else {
+      num_markers = current_num_markers;
+      has_num_markers = true;
+      marker_lengths.resize(num_markers + 1);
+    }
+
+    size_t marker_length = marker->data_length - kICCHeadSize;
+
+    if (marker_length == 0) {
+      // NB: if we allow empty chunks, then the next check is incorrect.
+      return JXL_FAILURE("Empty ICC chunk");
+    }
+
+    if (marker_lengths[current_marker] != 0) {
+      return JXL_FAILURE("duplicate JPEG ICC marker number");
+    }
+    marker_lengths[current_marker] = marker_length;
+    seen_markers_count++;
+  }
+
+  if (marker_lengths.empty()) {
+    // Not an error.
+    return false;
+  }
+
+  if (seen_markers_count != num_markers) {
+    JXL_DASSERT(has_num_markers);
+    return JXL_FAILURE("Incomplete set of ICC chunks");
+  }
+
+  std::vector<size_t> offsets = std::move(marker_lengths);
+  std::partial_sum(offsets.begin(), offsets.end(), offsets.begin());
+  icc->resize(offsets.back());
+
+  for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+       marker = marker->next) {
+    if (!MarkerIsICC(marker)) continue;
+    const uint8_t* first = marker->data + kICCHeadSize;
+    uint8_t current_marker = marker->data[kICCSignatureSize];
+    size_t offset = offsets[current_marker - 1];
+    size_t marker_length = offsets[current_marker] - offset;
+    std::copy_n(first, marker_length, icc->data() + offset);
+  }
+
+  return true;
+}
+
+void ReadExif(jpeg_decompress_struct* const cinfo, PaddedBytes* const exif) {
+  constexpr size_t kExifSignatureSize = sizeof kExifSignature;
+  for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+       marker = marker->next) {
+    // marker is initialized by libjpeg, which we are not instrumenting with
+    // msan.
+    msan::UnpoisonMemory(marker, sizeof(*marker));
+    msan::UnpoisonMemory(marker->data, marker->data_length);
+    if (!MarkerIsExif(marker)) continue;
+    size_t marker_length = marker->data_length - kExifSignatureSize;
+    exif->resize(marker_length);
+    std::copy_n(marker->data + kExifSignatureSize, marker_length, exif->data());
+    return;
+  }
+}
+
+// TODO (jon): take orientation into account when writing jpeg output
+// TODO (jon): write Exif blob also in sjpeg encoding
+// TODO (jon): overwrite orientation in Exif blob to avoid double orientation
+
+void WriteICCProfile(jpeg_compress_struct* const cinfo,
+                     const PaddedBytes& icc) {
+  constexpr size_t kMaxIccBytesInMarker =
+      kMaxBytesInMarker - sizeof kICCSignature - 2;
+  const int num_markers =
+      static_cast<int>(DivCeil(icc.size(), kMaxIccBytesInMarker));
+  size_t begin = 0;
+  for (int current_marker = 0; current_marker < num_markers; ++current_marker) {
+    const size_t length = std::min(kMaxIccBytesInMarker, icc.size() - begin);
+    jpeg_write_m_header(
+        cinfo, kICCMarker,
+        static_cast<unsigned int>(length + sizeof kICCSignature + 2));
+    for (const unsigned char c : kICCSignature) {
+      jpeg_write_m_byte(cinfo, c);
+    }
+    jpeg_write_m_byte(cinfo, current_marker + 1);
+    jpeg_write_m_byte(cinfo, num_markers);
+    for (size_t i = 0; i < length; ++i) {
+      jpeg_write_m_byte(cinfo, icc[begin]);
+      ++begin;
+    }
+  }
+}
+void WriteExif(jpeg_compress_struct* const cinfo, const PaddedBytes& exif) {
+  if (exif.size() < 4) return;
+  jpeg_write_m_header(
+      cinfo, kExifMarker,
+      static_cast<unsigned int>(exif.size() - 4 + sizeof kExifSignature));
+  for (const unsigned char c : kExifSignature) {
+    jpeg_write_m_byte(cinfo, c);
+  }
+  for (size_t i = 4; i < exif.size(); ++i) {
+    jpeg_write_m_byte(cinfo, exif[i]);
+  }
+}
+
+Status SetChromaSubsampling(const YCbCrChromaSubsampling& chroma_subsampling,
+                            jpeg_compress_struct* const cinfo) {
+  for (size_t i = 0; i < 3; i++) {
+    cinfo->comp_info[i].h_samp_factor =
+        1 << (chroma_subsampling.MaxHShift() -
+              chroma_subsampling.HShift(i < 2 ? i ^ 1 : i));
+    cinfo->comp_info[i].v_samp_factor =
+        1 << (chroma_subsampling.MaxVShift() -
+              chroma_subsampling.VShift(i < 2 ? i ^ 1 : i));
+  }
+  return true;
+}
+
+void MyErrorExit(j_common_ptr cinfo) {
+  jmp_buf* env = static_cast<jmp_buf*>(cinfo->client_data);
+  (*cinfo->err->output_message)(cinfo);
+  jpeg_destroy_decompress(reinterpret_cast<j_decompress_ptr>(cinfo));
+  longjmp(*env, 1);
+}
+
+void MyOutputMessage(j_common_ptr cinfo) {
+#if JXL_DEBUG_WARNING == 1
+  char buf[JMSG_LENGTH_MAX];
+  (*cinfo->err->format_message)(cinfo, buf);
+  JXL_WARNING("%s", buf);
+#endif
+}
+
+}  // namespace
+#endif  // JPEGXL_ENABLE_JPEG
+
+Status DecodeImageJPG(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io, double* const elapsed_deinterleave) {
+  if (elapsed_deinterleave != nullptr) *elapsed_deinterleave = 0;
+  // Don't do anything for non-JPEG files (no need to report an error)
+  if (!IsJPG(bytes)) return false;
+  const DecodeTarget target = io->dec_target;
+
+  // Use brunsli JPEG decoder to read quantized coefficients.
+  if (target == DecodeTarget::kQuantizedCoeffs) {
+    return jxl::jpeg::DecodeImageJPG(bytes, io);
+  }
+
+#if JPEGXL_ENABLE_JPEG
+  // TODO(veluca): use JPEGData also for pixels?
+
+  // We need to declare all the non-trivial destructor local variables before
+  // the call to setjmp().
+  ColorEncoding color_encoding;
+  PaddedBytes icc;
+  Image3F image;
+  std::unique_ptr<JSAMPLE[]> row;
+  ImageBundle bundle(&io->metadata.m);
+
+  const auto try_catch_block = [&]() -> bool {
+    jpeg_decompress_struct cinfo;
+#ifdef MEMORY_SANITIZER
+    // cinfo is initialized by libjpeg, which we are not instrumenting with
+    // msan, therefore we need to initialize cinfo here.
+    memset(&cinfo, 0, sizeof(cinfo));
+#endif
+    // Setup error handling in jpeg library so we can deal with broken jpegs in
+    // the fuzzer.
+    jpeg_error_mgr jerr;
+    jmp_buf env;
+    cinfo.err = jpeg_std_error(&jerr);
+    jerr.error_exit = &MyErrorExit;
+    jerr.output_message = &MyOutputMessage;
+    if (setjmp(env)) {
+      return false;
+    }
+    cinfo.client_data = static_cast<void*>(&env);
+
+    jpeg_create_decompress(&cinfo);
+    unsigned char* tmp_jpg_buff = (unsigned char*)malloc(bytes.size());
+    memcpy(tmp_jpg_buff, bytes.data(), bytes.size());
+    jpeg_mem_src(&cinfo, tmp_jpg_buff, bytes.size());
+    jpeg_save_markers(&cinfo, kICCMarker, 0xFFFF);
+    jpeg_save_markers(&cinfo, kExifMarker, 0xFFFF);
+    jpeg_read_header(&cinfo, TRUE);
+    const auto failure = [&cinfo](const char* str) -> Status {
+      jpeg_abort_decompress(&cinfo);
+      jpeg_destroy_decompress(&cinfo);
+      return JXL_FAILURE("%s", str);
+    };
+    if (!VerifyDimensions(&io->constraints, cinfo.image_width,
+                          cinfo.image_height)) {
+      return failure("image too big");
+    }
+    // Might cause CPU-zip bomb.
+    if (cinfo.arith_code) {
+      return failure("arithmetic code JPEGs are not supported");
+    }
+    if (ReadICCProfile(&cinfo, &icc)) {
+      if (!color_encoding.SetICC(std::move(icc))) {
+        return failure("read an invalid ICC profile");
+      }
+    } else {
+      color_encoding = ColorEncoding::SRGB(cinfo.output_components == 1);
+    }
+    ReadExif(&cinfo, &io->blobs.exif);
+    io->metadata.m.SetUintSamples(BITS_IN_JSAMPLE);
+    io->metadata.m.color_encoding = color_encoding;
+    int nbcomp = cinfo.num_components;
+    if (nbcomp != 1 && nbcomp != 3) {
+      return failure("unsupported number of components in JPEG");
+    }
+    (void)io->dec_hints.Foreach(
+        [](const std::string& key, const std::string& /*value*/) {
+          JXL_WARNING("JPEG decoder ignoring %s hint", key.c_str());
+          return true;
+        });
+
+    jpeg_start_decompress(&cinfo);
+    JXL_ASSERT(cinfo.output_components == nbcomp);
+    image = Image3F(cinfo.image_width, cinfo.image_height);
+    row.reset(new JSAMPLE[cinfo.output_components * cinfo.image_width]);
+    for (size_t y = 0; y < image.ysize(); ++y) {
+      JSAMPROW rows[] = {row.get()};
+      jpeg_read_scanlines(&cinfo, rows, 1);
+      msan::UnpoisonMemory(
+          row.get(),
+          sizeof(JSAMPLE) * cinfo.output_components * cinfo.image_width);
+      auto start = Now();
+      float* const JXL_RESTRICT output_row[] = {
+          image.PlaneRow(0, y), image.PlaneRow(1, y), image.PlaneRow(2, y)};
+      if (cinfo.output_components == 1) {
+        for (size_t x = 0; x < image.xsize(); ++x) {
+          output_row[0][x] = output_row[1][x] = output_row[2][x] =
+              row[x] * (1.f / kJPEGSampleMultiplier);
+        }
+      } else {  // 3 components
+        for (size_t x = 0; x < image.xsize(); ++x) {
+          for (size_t c = 0; c < 3; ++c) {
+            output_row[c][x] = row[3 * x + c] * (1.f / kJPEGSampleMultiplier);
+          }
+        }
+      }
+      auto end = Now();
+      if (elapsed_deinterleave != nullptr) {
+        *elapsed_deinterleave += end - start;
+      }
+    }
+    io->SetFromImage(std::move(image), color_encoding);
+
+    jpeg_finish_decompress(&cinfo);
+    jpeg_destroy_decompress(&cinfo);
+    io->dec_pixels = io->xsize() * io->ysize();
+    return true;
+  };
+
+  return try_catch_block();
+#else   // JPEGXL_ENABLE_JPEG
+  return JXL_FAILURE("JPEG decoding not enabled at build time.");
+#endif  // JPEGXL_ENABLE_JPEG
+}
+
+#if JPEGXL_ENABLE_JPEG
+Status EncodeWithLibJpeg(const ImageBundle* ib, const CodecInOut* io,
+                         size_t quality,
+                         const YCbCrChromaSubsampling& chroma_subsampling,
+                         PaddedBytes* bytes) {
+  jpeg_compress_struct cinfo;
+  // cinfo is initialized by libjpeg, which we are not instrumenting with
+  // msan.
+  msan::UnpoisonMemory(&cinfo, sizeof(cinfo));
+  jpeg_error_mgr jerr;
+  cinfo.err = jpeg_std_error(&jerr);
+  jpeg_create_compress(&cinfo);
+  unsigned char* buffer = nullptr;
+  unsigned long size = 0;
+  jpeg_mem_dest(&cinfo, &buffer, &size);
+  cinfo.image_width = ib->xsize();
+  cinfo.image_height = ib->ysize();
+  if (ib->IsGray()) {
+    cinfo.input_components = 1;
+    cinfo.in_color_space = JCS_GRAYSCALE;
+  } else {
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_RGB;
+  }
+  jpeg_set_defaults(&cinfo);
+  cinfo.optimize_coding = TRUE;
+  if (cinfo.input_components == 3) {
+    JXL_RETURN_IF_ERROR(SetChromaSubsampling(chroma_subsampling, &cinfo));
+  }
+  jpeg_set_quality(&cinfo, quality, TRUE);
+  jpeg_start_compress(&cinfo, TRUE);
+  if (!ib->IsSRGB()) {
+    WriteICCProfile(&cinfo, ib->c_current().ICC());
+  }
+  WriteExif(&cinfo, io->blobs.exif);
+  if (cinfo.input_components > 3 || cinfo.input_components < 0)
+    return JXL_FAILURE("invalid numbers of components");
+
+  std::unique_ptr<JSAMPLE[]> row(
+      new JSAMPLE[cinfo.input_components * cinfo.image_width]);
+  for (size_t y = 0; y < ib->ysize(); ++y) {
+    const float* const JXL_RESTRICT input_row[3] = {
+        ib->color().ConstPlaneRow(0, y), ib->color().ConstPlaneRow(1, y),
+        ib->color().ConstPlaneRow(2, y)};
+    for (size_t x = 0; x < ib->xsize(); ++x) {
+      for (size_t c = 0; c < static_cast<size_t>(cinfo.input_components); ++c) {
+        JXL_RETURN_IF_ERROR(c < 3);
+        row[cinfo.input_components * x + c] = static_cast<JSAMPLE>(
+            std::max(std::min(kJPEGSampleMultiplier * input_row[c][x] + .5f,
+                              kJPEGSampleMax),
+                     kJPEGSampleMin));
+      }
+    }
+    JSAMPROW rows[] = {row.get()};
+    jpeg_write_scanlines(&cinfo, rows, 1);
+  }
+  jpeg_finish_compress(&cinfo);
+  jpeg_destroy_compress(&cinfo);
+  bytes->resize(size);
+  // Compressed image data is initialized by libjpeg, which we are not
+  // instrumenting with msan.
+  msan::UnpoisonMemory(buffer, size);
+  std::copy_n(buffer, size, bytes->data());
+  std::free(buffer);
+  return true;
+}
+
+Status EncodeWithSJpeg(const ImageBundle* ib, size_t quality,
+                       const YCbCrChromaSubsampling& chroma_subsampling,
+                       PaddedBytes* bytes) {
+#if !JPEGXL_ENABLE_SJPEG
+  return JXL_FAILURE("JPEG XL was built without sjpeg support");
+#else
+  sjpeg::EncoderParam param(quality);
+  if (!ib->IsSRGB()) {
+    param.iccp.assign(ib->metadata()->color_encoding.ICC().begin(),
+                      ib->metadata()->color_encoding.ICC().end());
+  }
+  if (chroma_subsampling.Is444()) {
+    param.yuv_mode = SJPEG_YUV_444;
+  } else if (chroma_subsampling.Is420()) {
+    param.yuv_mode = SJPEG_YUV_SHARP;
+  } else {
+    return JXL_FAILURE("sjpeg does not support this chroma subsampling mode");
+  }
+  std::vector<uint8_t> rgb;
+  rgb.reserve(ib->xsize() * ib->ysize() * 3);
+  for (size_t y = 0; y < ib->ysize(); ++y) {
+    const float* const rows[] = {
+        ib->color().ConstPlaneRow(0, y), ib->color().ConstPlaneRow(1, y),
+        ib->color().ConstPlaneRow(2, y),
+    };
+    for (size_t x = 0; x < ib->xsize(); ++x) {
+      for (const float* const row : rows) {
+        rgb.push_back(static_cast<uint8_t>(
+            std::max(0.f, std::min(255.f, roundf(255.f * row[x])))));
+      }
+    }
+  }
+  std::string output;
+  JXL_RETURN_IF_ERROR(sjpeg::Encode(rgb.data(), ib->xsize(), ib->ysize(),
+                                    ib->xsize() * 3, param, &output));
+  bytes->assign(
+      reinterpret_cast<const uint8_t*>(output.data()),
+      reinterpret_cast<const uint8_t*>(output.data() + output.size()));
+  return true;
+#endif
+}
+#endif  // JPEGXL_ENABLE_JPEG
+
+Status EncodeImageJPG(const CodecInOut* io, JpegEncoder encoder, size_t quality,
+                      YCbCrChromaSubsampling chroma_subsampling,
+                      ThreadPool* pool, PaddedBytes* bytes,
+                      const DecodeTarget target) {
+  if (io->Main().HasAlpha()) {
+    return JXL_FAILURE("alpha is not supported");
+  }
+  if (quality > 100) {
+    return JXL_FAILURE("please specify a 0-100 JPEG quality");
+  }
+
+  if (target == DecodeTarget::kQuantizedCoeffs) {
+    auto write = [&bytes](const uint8_t* buf, size_t len) {
+      bytes->append(buf, buf + len);
+      return len;
+    };
+    return jpeg::WriteJpeg(*io->Main().jpeg_data, write);
+  }
+
+#if JPEGXL_ENABLE_JPEG
+  const ImageBundle* ib;
+  ImageMetadata metadata = io->metadata.m;
+  ImageBundle ib_store(&metadata);
+  JXL_RETURN_IF_ERROR(TransformIfNeeded(
+      io->Main(), io->metadata.m.color_encoding, pool, &ib_store, &ib));
+
+  switch (encoder) {
+    case JpegEncoder::kLibJpeg:
+      JXL_RETURN_IF_ERROR(
+          EncodeWithLibJpeg(ib, io, quality, chroma_subsampling, bytes));
+      break;
+    case JpegEncoder::kSJpeg:
+      JXL_RETURN_IF_ERROR(
+          EncodeWithSJpeg(ib, quality, chroma_subsampling, bytes));
+      break;
+    default:
+      return JXL_FAILURE("tried to use an unknown JPEG encoder");
+  }
+
+  return true;
+#else   // JPEGXL_ENABLE_JPEG
+  return JXL_FAILURE("JPEG pixel encoding not enabled at build time");
+#endif  // JPEGXL_ENABLE_JPEG
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.h
new file mode 100644
index 0000000000..84041ac86f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_jpg.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_JPG_H_
+#define LIB_EXTRAS_CODEC_JPG_H_
+
+// Encodes JPG pixels and metadata in memory.
+
+#include <stdint.h>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+
+namespace jxl {
+
+enum class JpegEncoder {
+  kLibJpeg,
+  kSJpeg,
+};
+
+static inline bool IsJPG(const Span<const uint8_t> bytes) {
+  if (bytes.size() < 2) return false;
+  if (bytes[0] != 0xFF || bytes[1] != 0xD8) return false;
+  return true;
+}
+
+// Decodes `bytes` into `io`. io->dec_hints are ignored.
+// `elapsed_deinterleave`, if non-null, will be set to the time (in seconds)
+// that it took to deinterleave the raw JSAMPLEs to planar floats.
+Status DecodeImageJPG(Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io, double* elapsed_deinterleave = nullptr);
+
+// Encodes into `bytes`.
+Status EncodeImageJPG(const CodecInOut* io, JpegEncoder encoder, size_t quality,
+                      YCbCrChromaSubsampling chroma_subsampling,
+                      ThreadPool* pool, PaddedBytes* bytes,
+                      DecodeTarget target = DecodeTarget::kPixels);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_JPG_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc
new file mode 100644
index 0000000000..7904cfb4d0
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.cc
@@ -0,0 +1,358 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec_pgx.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/fields.h"  // AllDefault
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+namespace {
+
+struct HeaderPGX {
+  // NOTE: PGX is always grayscale
+  size_t xsize;
+  size_t ysize;
+  size_t bits_per_sample;
+  bool big_endian;
+  bool is_signed;
+};
+
+class Parser {
+ public:
+  explicit Parser(const Span<const uint8_t> bytes)
+      : pos_(bytes.data()), end_(pos_ + bytes.size()) {}
+
+  // Sets "pos" to the first non-header byte/pixel on success.
+  Status ParseHeader(HeaderPGX* header, const uint8_t** pos) {
+    // codec.cc ensures we have at least two bytes => no range check here.
+    if (pos_[0] != 'P' || pos_[1] != 'G') return false;
+    pos_ += 2;
+    return ParseHeaderPGX(header, pos);
+  }
+
+  // Exposed for testing
+  Status ParseUnsigned(size_t* number) {
+    if (pos_ == end_) return JXL_FAILURE("PGX: reached end before number");
+    if (!IsDigit(*pos_)) return JXL_FAILURE("PGX: expected unsigned number");
+
+    *number = 0;
+    while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+      *number *= 10;
+      *number += *pos_ - '0';
+      ++pos_;
+    }
+
+    return true;
+  }
+
+ private:
+  static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
+  static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+  static bool IsWhitespace(const uint8_t c) {
+    return IsLineBreak(c) || c == '\t' || c == ' ';
+  }
+
+  Status SkipSpace() {
+    if (pos_ == end_) return JXL_FAILURE("PGX: reached end before space");
+    const uint8_t c = *pos_;
+    if (c != ' ') return JXL_FAILURE("PGX: expected space");
+    ++pos_;
+    return true;
+  }
+
+  Status SkipLineBreak() {
+    if (pos_ == end_) return JXL_FAILURE("PGX: reached end before line break");
+    // Line break can be either "\n" (0a) or "\r\n" (0d 0a).
+    if (*pos_ == '\n') {
+      pos_++;
+      return true;
+    } else if (*pos_ == '\r' && pos_ + 1 != end_ && *(pos_ + 1) == '\n') {
+      pos_ += 2;
+      return true;
+    }
+    return JXL_FAILURE("PGX: expected line break");
+  }
+
+  Status SkipSingleWhitespace() {
+    if (pos_ == end_) return JXL_FAILURE("PGX: reached end before whitespace");
+    if (!IsWhitespace(*pos_)) return JXL_FAILURE("PGX: expected whitespace");
+    ++pos_;
+    return true;
+  }
+
+  Status ParseHeaderPGX(HeaderPGX* header, const uint8_t** pos) {
+    JXL_RETURN_IF_ERROR(SkipSpace());
+    if (pos_ + 2 > end_) return JXL_FAILURE("PGX: header too small");
+    if (*pos_ == 'M' && *(pos_ + 1) == 'L') {
+      header->big_endian = true;
+    } else if (*pos_ == 'L' && *(pos_ + 1) == 'M') {
+      header->big_endian = false;
+    } else {
+      return JXL_FAILURE("PGX: invalid endianness");
+    }
+    pos_ += 2;
+    JXL_RETURN_IF_ERROR(SkipSpace());
+    if (pos_ == end_) return JXL_FAILURE("PGX: header too small");
+    if (*pos_ == '+') {
+      header->is_signed = false;
+    } else if (*pos_ == '-') {
+      header->is_signed = true;
+    } else {
+      return JXL_FAILURE("PGX: invalid signedness");
+    }
+    pos_++;
+    // Skip optional space
+    if (pos_ < end_ && *pos_ == ' ') pos_++;
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->bits_per_sample));
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+    // 0xa, or 0xd 0xa.
+    JXL_RETURN_IF_ERROR(SkipLineBreak());
+
+    if (header->bits_per_sample > 16) {
+      return JXL_FAILURE("PGX: >16 bits not yet supported");
+    }
+    // TODO(lode): support signed integers. This may require changing the way
+    // external_image works.
+    if (header->is_signed) {
+      return JXL_FAILURE("PGX: signed not yet supported");
+    }
+
+    size_t numpixels = header->xsize * header->ysize;
+    size_t bytes_per_pixel = header->bits_per_sample <= 8
+                                 ? 1
+                                 : header->bits_per_sample <= 16 ? 2 : 4;
+    if (pos_ + numpixels * bytes_per_pixel > end_) {
+      return JXL_FAILURE("PGX: data too small");
+    }
+
+    *pos = pos_;
+    return true;
+  }
+
+  const uint8_t* pos_;
+  const uint8_t* const end_;
+};
+
+constexpr size_t kMaxHeaderSize = 200;
+
+Status EncodeHeader(const ImageBundle& ib, const size_t bits_per_sample,
+                    char* header, int* JXL_RESTRICT chars_written) {
+  if (ib.HasAlpha()) return JXL_FAILURE("PGX: can't store alpha");
+  if (!ib.IsGray()) return JXL_FAILURE("PGX: must be grayscale");
+  // TODO(lode): verify other bit depths: for other bit depths such as 1 or 4
+  // bits, have a test case to verify it works correctly. For bits > 16, we may
+  // need to change the way external_image works.
+  if (bits_per_sample != 8 && bits_per_sample != 16) {
+    return JXL_FAILURE("PGX: bits other than 8 or 16 not yet supported");
+  }
+
+  // Use ML (Big Endian), LM may not be well supported by all decoders.
+  snprintf(header, kMaxHeaderSize, "PG ML + %zu %zu %zu\n%n", bits_per_sample,
+           ib.xsize(), ib.ysize(), chars_written);
+  return true;
+}
+
+Status ApplyHints(CodecInOut* io) {
+  bool got_color_space = false;
+
+  JXL_RETURN_IF_ERROR(io->dec_hints.Foreach(
+      [io, &got_color_space](const std::string& key,
+                             const std::string& value) -> Status {
+        ColorEncoding* c_original = &io->metadata.m.color_encoding;
+        if (key == "color_space") {
+          if (!ParseDescription(value, c_original) ||
+              !c_original->CreateICC()) {
+            return JXL_FAILURE("PGX: Failed to apply color_space");
+          }
+
+          if (!io->metadata.m.color_encoding.IsGray()) {
+            return JXL_FAILURE("PGX: color_space hint must be grayscale");
+          }
+
+          got_color_space = true;
+        } else if (key == "icc_pathname") {
+          PaddedBytes icc;
+          JXL_RETURN_IF_ERROR(ReadFile(value, &icc));
+          JXL_RETURN_IF_ERROR(c_original->SetICC(std::move(icc)));
+          got_color_space = true;
+        } else {
+          JXL_WARNING("PGX decoder ignoring %s hint", key.c_str());
+        }
+        return true;
+      }));
+
+  if (!got_color_space) {
+    JXL_WARNING("PGX: no color_space/icc_pathname given, assuming sRGB");
+    JXL_RETURN_IF_ERROR(
+        io->metadata.m.color_encoding.SetSRGB(ColorSpace::kGray));
+  }
+
+  return true;
+}
+
+template <typename T>
+void ExpectNear(T a, T b, T precision) {
+  JXL_CHECK(std::abs(a - b) <= precision);
+}
+
+Span<const uint8_t> MakeSpan(const char* str) {
+  return Span<const uint8_t>(reinterpret_cast<const uint8_t*>(str),
+                             strlen(str));
+}
+
+}  // namespace
+
+Status DecodeImagePGX(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io) {
+  Parser parser(bytes);
+  HeaderPGX header = {};
+  const uint8_t* pos;
+  if (!parser.ParseHeader(&header, &pos)) return false;
+  JXL_RETURN_IF_ERROR(
+      VerifyDimensions(&io->constraints, header.xsize, header.ysize));
+  if (header.bits_per_sample == 0 || header.bits_per_sample > 32) {
+    return JXL_FAILURE("PGX: bits_per_sample invalid");
+  }
+
+  JXL_RETURN_IF_ERROR(ApplyHints(io));
+  io->metadata.m.SetUintSamples(header.bits_per_sample);
+  io->metadata.m.SetAlphaBits(0);
+  io->dec_pixels = header.xsize * header.ysize;
+  io->SetSize(header.xsize, header.ysize);
+  io->frames.clear();
+  io->frames.reserve(1);
+  ImageBundle ib(&io->metadata.m);
+
+  const bool has_alpha = false;
+  const bool flipped_y = false;
+  const Span<const uint8_t> span(pos, bytes.data() + bytes.size() - pos);
+  JXL_RETURN_IF_ERROR(ConvertFromExternal(
+      span, header.xsize, header.ysize, io->metadata.m.color_encoding,
+      has_alpha,
+      /*alpha_is_premultiplied=*/false,
+      io->metadata.m.bit_depth.bits_per_sample,
+      header.big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN, flipped_y, pool,
+      &ib));
+  io->frames.push_back(std::move(ib));
+  SetIntensityTarget(io);
+  return true;
+}
+
+Status EncodeImagePGX(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes) {
+  if (!Bundle::AllDefault(io->metadata.m)) {
+    JXL_WARNING("PGX encoder ignoring metadata - use a different codec");
+  }
+  if (!c_desired.IsSRGB()) {
+    JXL_WARNING(
+        "PGX encoder cannot store custom ICC profile; decoder\n"
+        "will need hint key=color_space to get the same values");
+  }
+
+  ImageBundle ib = io->Main().Copy();
+
+  ImageMetadata metadata = io->metadata.m;
+  ImageBundle store(&metadata);
+  const ImageBundle* transformed;
+  JXL_RETURN_IF_ERROR(
+      TransformIfNeeded(ib, c_desired, pool, &store, &transformed));
+  PaddedBytes pixels(ib.xsize() * ib.ysize() *
+                     (bits_per_sample / kBitsPerByte));
+  size_t stride = ib.xsize() * (bits_per_sample / kBitsPerByte);
+  JXL_RETURN_IF_ERROR(
+      ConvertToExternal(*transformed, bits_per_sample,
+                        /*float_out=*/false,
+                        /*num_channels=*/1, JXL_BIG_ENDIAN, stride, pool,
+                        pixels.data(), pixels.size(), /*out_callback=*/nullptr,
+                        /*out_opaque=*/nullptr, metadata.GetOrientation()));
+
+  char header[kMaxHeaderSize];
+  int header_size = 0;
+  JXL_RETURN_IF_ERROR(EncodeHeader(ib, bits_per_sample, header, &header_size));
+
+  bytes->resize(static_cast<size_t>(header_size) + pixels.size());
+  memcpy(bytes->data(), header, static_cast<size_t>(header_size));
+  memcpy(bytes->data() + header_size, pixels.data(), pixels.size());
+
+  return true;
+}
+
+void TestCodecPGX() {
+  {
+    std::string pgx = "PG ML + 8 2 3\npixels";
+
+    CodecInOut io;
+    ThreadPool* pool = nullptr;
+
+    Status ok = DecodeImagePGX(MakeSpan(pgx.c_str()), pool, &io);
+    JXL_CHECK(ok == true);
+
+    ScaleImage(255.f, io.Main().color());
+
+    JXL_CHECK(!io.metadata.m.bit_depth.floating_point_sample);
+    JXL_CHECK(io.metadata.m.bit_depth.bits_per_sample == 8);
+    JXL_CHECK(io.metadata.m.color_encoding.IsGray());
+    JXL_CHECK(io.xsize() == 2);
+    JXL_CHECK(io.ysize() == 3);
+    float eps = 1e-5;
+    ExpectNear<float>('p', io.Main().color()->Plane(0).Row(0)[0], eps);
+    ExpectNear<float>('i', io.Main().color()->Plane(0).Row(0)[1], eps);
+    ExpectNear<float>('x', io.Main().color()->Plane(0).Row(1)[0], eps);
+    ExpectNear<float>('e', io.Main().color()->Plane(0).Row(1)[1], eps);
+    ExpectNear<float>('l', io.Main().color()->Plane(0).Row(2)[0], eps);
+    ExpectNear<float>('s', io.Main().color()->Plane(0).Row(2)[1], eps);
+  }
+
+  {
+    std::string pgx = "PG ML + 16 2 3\np_i_x_e_l_s_";
+
+    CodecInOut io;
+    ThreadPool* pool = nullptr;
+
+    Status ok = DecodeImagePGX(MakeSpan(pgx.c_str()), pool, &io);
+    JXL_CHECK(ok == true);
+
+    ScaleImage(255.f, io.Main().color());
+
+    JXL_CHECK(!io.metadata.m.bit_depth.floating_point_sample);
+    JXL_CHECK(io.metadata.m.bit_depth.bits_per_sample == 16);
+    JXL_CHECK(io.metadata.m.color_encoding.IsGray());
+    JXL_CHECK(io.xsize() == 2);
+    JXL_CHECK(io.ysize() == 3);
+    float eps = 1e-7;
+    const auto& plane = io.Main().color()->Plane(0);
+    ExpectNear(256.0f * 'p' + '_', plane.Row(0)[0] * 257, eps);
+    ExpectNear(256.0f * 'i' + '_', plane.Row(0)[1] * 257, eps);
+    ExpectNear(256.0f * 'x' + '_', plane.Row(1)[0] * 257, eps);
+    ExpectNear(256.0f * 'e' + '_', plane.Row(1)[1] * 257, eps);
+    ExpectNear(256.0f * 'l' + '_', plane.Row(2)[0] * 257, eps);
+    ExpectNear(256.0f * 's' + '_', plane.Row(2)[1] * 257, eps);
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.h
new file mode 100644
index 0000000000..deb76da0e4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pgx.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_PGX_H_
+#define LIB_EXTRAS_CODEC_PGX_H_
+
+// Encodes/decodes PGX pixels in memory.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+
+namespace jxl {
+
+// Decodes `bytes` into `io`. io->dec_hints may specify "color_space", which
+// defaults to sRGB.
+Status DecodeImagePGX(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io);
+
+// Transforms from io->c_current to `c_desired` and encodes into `bytes`.
+Status EncodeImagePGX(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes);
+
+void TestCodecPGX();
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_PGX_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc
new file mode 100644
index 0000000000..f6fabd865b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.cc
@@ -0,0 +1,872 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec_png.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Lodepng library:
+#include <lodepng.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+namespace {
+
+#define JXL_PNG_VERBOSE 0
+
+// Retrieves XMP and EXIF/IPTC from itext and text.
+class BlobsReaderPNG {
+ public:
+  static Status Decode(const LodePNGInfo& info, Blobs* blobs) {
+    for (unsigned idx_itext = 0; idx_itext < info.itext_num; ++idx_itext) {
+      // We trust these are properly null-terminated by LodePNG.
+      const char* key = info.itext_keys[idx_itext];
+      const char* value = info.itext_strings[idx_itext];
+      if (strstr(key, "XML:com.adobe.xmp")) {
+        blobs->xmp.resize(strlen(value));  // safe, see above
+        memcpy(blobs->xmp.data(), value, blobs->xmp.size());
+      }
+    }
+
+    for (unsigned idx_text = 0; idx_text < info.text_num; ++idx_text) {
+      // We trust these are properly null-terminated by LodePNG.
+      const char* key = info.text_keys[idx_text];
+      const char* value = info.text_strings[idx_text];
+      std::string type;
+      PaddedBytes bytes;
+
+      // Handle text chunks annotated with key "Raw profile type ####", with
+      // #### a type, which may contain metadata.
+      const char* kKey = "Raw profile type ";
+      if (strncmp(key, kKey, strlen(kKey)) != 0) continue;
+
+      if (!MaybeDecodeBase16(key, value, &type, &bytes)) {
+        JXL_WARNING("Couldn't parse 'Raw format type' text chunk");
+        continue;
+      }
+      if (type == "exif") {
+        if (!blobs->exif.empty()) {
+          JXL_WARNING("overwriting EXIF (%zu bytes) with base16 (%zu bytes)",
+                      blobs->exif.size(), bytes.size());
+        }
+        blobs->exif = std::move(bytes);
+      } else if (type == "iptc") {
+        // TODO (jon): Deal with IPTC in some way
+      } else if (type == "8bim") {
+        // TODO (jon): Deal with 8bim in some way
+      } else if (type == "xmp") {
+        if (!blobs->xmp.empty()) {
+          JXL_WARNING("overwriting XMP (%zu bytes) with base16 (%zu bytes)",
+                      blobs->xmp.size(), bytes.size());
+        }
+        blobs->xmp = std::move(bytes);
+      } else {
+        JXL_WARNING(
+            "Unknown type in 'Raw format type' text chunk: %s: %zu bytes",
+            type.c_str(), bytes.size());
+      }
+    }
+
+    return true;
+  }
+
+ private:
+  // Returns false if invalid.
+  static JXL_INLINE Status DecodeNibble(const char c,
+                                        uint32_t* JXL_RESTRICT nibble) {
+    if ('a' <= c && c <= 'f') {
+      *nibble = 10 + c - 'a';
+    } else if ('0' <= c && c <= '9') {
+      *nibble = c - '0';
+    } else {
+      *nibble = 0;
+      return JXL_FAILURE("Invalid metadata nibble");
+    }
+    JXL_ASSERT(*nibble < 16);
+    return true;
+  }
+
+  // Parses a PNG text chunk with key of the form "Raw profile type ####", with
+  // #### a type.
+  // Returns whether it could successfully parse the content.
+  // We trust key and encoded are null-terminated because they come from
+  // LodePNG.
+  static Status MaybeDecodeBase16(const char* key, const char* encoded,
+                                  std::string* type, PaddedBytes* bytes) {
+    const char* encoded_end = encoded + strlen(encoded);
+
+    const char* kKey = "Raw profile type ";
+    if (strncmp(key, kKey, strlen(kKey)) != 0) return false;
+    *type = key + strlen(kKey);
+    const size_t kMaxTypeLen = 20;
+    if (type->length() > kMaxTypeLen) return false;  // Type too long
+
+    // Header: freeform string and number of bytes
+    unsigned long bytes_to_decode;
+    int header_len;
+    std::vector<char> description((encoded_end - encoded) + 1);
+    const int fields = sscanf(encoded, "\n%[^\n]\n%8lu%n", description.data(),
+                              &bytes_to_decode, &header_len);
+    if (fields != 2) return false;  // Failed to decode metadata header
+    JXL_ASSERT(bytes->empty());
+    bytes->reserve(bytes_to_decode);
+
+    // Encoding: base16 with newline after 72 chars.
+    const char* pos = encoded + header_len;
+    for (size_t i = 0; i < bytes_to_decode; ++i) {
+      if (i % 36 == 0) {
+        if (pos + 1 >= encoded_end) return false;  // Truncated base16 1
+        if (*pos != '\n') return false;            // Expected newline
+        ++pos;
+      }
+
+      if (pos + 2 >= encoded_end) return false;  // Truncated base16 2;
+      uint32_t nibble0, nibble1;
+      JXL_RETURN_IF_ERROR(DecodeNibble(pos[0], &nibble0));
+      JXL_RETURN_IF_ERROR(DecodeNibble(pos[1], &nibble1));
+      bytes->push_back(static_cast<uint8_t>((nibble0 << 4) + nibble1));
+      pos += 2;
+    }
+    if (pos + 1 != encoded_end) return false;  // Too many encoded bytes
+    if (pos[0] != '\n') return false;          // Incorrect metadata terminator
+    return true;
+  }
+};
+
+// Stores XMP and EXIF/IPTC into itext and text.
+class BlobsWriterPNG {
+ public:
+  static Status Encode(const Blobs& blobs, LodePNGInfo* JXL_RESTRICT info) {
+    if (!blobs.exif.empty()) {
+      JXL_RETURN_IF_ERROR(EncodeBase16("exif", blobs.exif, info));
+    }
+    if (!blobs.iptc.empty()) {
+      JXL_RETURN_IF_ERROR(EncodeBase16("iptc", blobs.iptc, info));
+    }
+
+    if (!blobs.xmp.empty()) {
+      JXL_RETURN_IF_ERROR(EncodeBase16("xmp", blobs.xmp, info));
+
+      // Below is the official way, but it does not seem to work in ImageMagick.
+      // Exiv2 and exiftool are OK with either way of encoding XMP.
+      if (/* DISABLES CODE */ (0)) {
+        const char* key = "XML:com.adobe.xmp";
+        const std::string text(reinterpret_cast<const char*>(blobs.xmp.data()),
+                               blobs.xmp.size());
+        if (lodepng_add_itext(info, key, "", "", text.c_str()) != 0) {
+          return JXL_FAILURE("Failed to add itext");
+        }
+      }
+    }
+
+    return true;
+  }
+
+ private:
+  static JXL_INLINE char EncodeNibble(const uint8_t nibble) {
+    JXL_ASSERT(nibble < 16);
+    return (nibble < 10) ? '0' + nibble : 'a' + nibble - 10;
+  }
+
+  static Status EncodeBase16(const std::string& type, const PaddedBytes& bytes,
+                             LodePNGInfo* JXL_RESTRICT info) {
+    // Encoding: base16 with newline after 72 chars.
+    const size_t base16_size =
+        2 * bytes.size() + DivCeil(bytes.size(), size_t(36)) + 1;
+    std::string base16;
+    base16.reserve(base16_size);
+    for (size_t i = 0; i < bytes.size(); ++i) {
+      if (i % 36 == 0) base16.push_back('\n');
+      base16.push_back(EncodeNibble(bytes[i] >> 4));
+      base16.push_back(EncodeNibble(bytes[i] & 0x0F));
+    }
+    base16.push_back('\n');
+    JXL_ASSERT(base16.length() == base16_size);
+
+    char key[30];
+    snprintf(key, sizeof(key), "Raw profile type %s", type.c_str());
+
+    char header[30];
+    snprintf(header, sizeof(header), "\n%s\n%8zu", type.c_str(), bytes.size());
+
+    const std::string& encoded = std::string(header) + base16;
+    if (lodepng_add_text(info, key, encoded.c_str()) != 0) {
+      return JXL_FAILURE("Failed to add text");
+    }
+
+    return true;
+  }
+};
+
+// Retrieves ColorEncoding from PNG chunks.
+class ColorEncodingReaderPNG {
+ public:
+  // Fills original->color_encoding or returns false.
+  Status operator()(const Span<const uint8_t> bytes, const bool is_gray,
+                    CodecInOut* io) {
+    ColorEncoding* c_original = &io->metadata.m.color_encoding;
+    JXL_RETURN_IF_ERROR(Decode(bytes, &io->blobs));
+
+    const ColorSpace color_space =
+        is_gray ? ColorSpace::kGray : ColorSpace::kRGB;
+
+    if (have_pq_) {
+      c_original->SetColorSpace(color_space);
+      c_original->white_point = WhitePoint::kD65;
+      c_original->primaries = Primaries::k2100;
+      c_original->tf.SetTransferFunction(TransferFunction::kPQ);
+      c_original->rendering_intent = RenderingIntent::kRelative;
+      if (c_original->CreateICC()) return true;
+      JXL_WARNING("Failed to synthesize BT.2100 PQ");
+      // Else: try the actual ICC profile.
+    }
+
+    // ICC overrides anything else if present.
+    if (c_original->SetICC(std::move(icc_))) {
+      if (have_srgb_) {
+        JXL_WARNING("Invalid PNG with both sRGB and ICC; ignoring sRGB");
+      }
+      if (is_gray != c_original->IsGray()) {
+        return JXL_FAILURE("Mismatch between ICC and PNG header grayscale");
+      }
+      return true;  // it's fine to ignore gAMA/cHRM.
+    }
+
+    // PNG requires that sRGB override gAMA/cHRM.
+    if (have_srgb_) {
+      return c_original->SetSRGB(color_space, rendering_intent_);
+    }
+
+    // Try to create a custom profile:
+
+    c_original->SetColorSpace(color_space);
+
+    // Attempt to set whitepoint and primaries if there is a cHRM chunk, or else
+    // use default sRGB (the PNG then is device-dependent).
+    // In case of grayscale, do not attempt to set the primaries and ignore the
+    // ones the PNG image has (but still set the white point).
+    if (!have_chrm_ || !c_original->SetWhitePoint(white_point_) ||
+        (!is_gray && !c_original->SetPrimaries(primaries_))) {
+#if JXL_PNG_VERBOSE >= 1
+      JXL_WARNING("No (valid) cHRM, assuming sRGB");
+#endif
+      c_original->white_point = WhitePoint::kD65;
+      c_original->primaries = Primaries::kSRGB;
+    }
+
+    if (!have_gama_ || !c_original->tf.SetGamma(gamma_)) {
+#if JXL_PNG_VERBOSE >= 1
+      JXL_WARNING("No (valid) gAMA nor sRGB, assuming sRGB");
+#endif
+      c_original->tf.SetTransferFunction(TransferFunction::kSRGB);
+    }
+
+    c_original->rendering_intent = RenderingIntent::kRelative;
+    if (c_original->CreateICC()) return true;
+
+    JXL_WARNING(
+        "DATA LOSS: unable to create an ICC profile for PNG gAMA/cHRM.\n"
+        "Image pixels will be interpreted as sRGB. Please add an ICC \n"
+        "profile to the input image");
+    return c_original->SetSRGB(color_space);
+  }
+
+  // Whether the image has any color profile information (ICC chunk, sRGB
+  // chunk, cHRM chunk, and so on), or has no color information chunks at all.
+  bool HaveColorProfile() const {
+    return have_pq_ || have_srgb_ || have_gama_ || have_chrm_ || have_icc_;
+  }
+
+ private:
+  Status DecodeICC(const unsigned char* const payload,
+                   const size_t payload_size) {
+    if (payload_size == 0) return JXL_FAILURE("Empty ICC payload");
+    const unsigned char* pos = payload;
+    const unsigned char* end = payload + payload_size;
+
+    // Profile name
+    if (*pos == '\0') return JXL_FAILURE("Expected ICC name");
+    for (size_t i = 0;; ++i) {
+      if (i == 80) return JXL_FAILURE("ICC profile name too long");
+      if (pos == end) return JXL_FAILURE("Not enough bytes for ICC name");
+      if (*pos++ == '\0') break;
+    }
+
+    // Special case for BT.2100 PQ (https://w3c.github.io/png-hdr-pq/) - try to
+    // synthesize the profile because table-based curves are less accurate.
+    // strcmp is safe because we already verified the string is 0-terminated.
+    if (!strcmp(reinterpret_cast<const char*>(payload), "ITUR_2100_PQ_FULL")) {
+      have_pq_ = true;
+    }
+
+    // Skip over compression method (only one is allowed)
+    if (pos == end) return JXL_FAILURE("Not enough bytes for ICC method");
+    if (*pos++ != 0) return JXL_FAILURE("Unsupported ICC method");
+
+    // Decompress
+    unsigned char* icc_buf = nullptr;
+    size_t icc_size = 0;
+    LodePNGDecompressSettings settings;
+    lodepng_decompress_settings_init(&settings);
+    const unsigned err = lodepng_zlib_decompress(
+        &icc_buf, &icc_size, pos, payload_size - (pos - payload), &settings);
+    if (err == 0) {
+      icc_.resize(icc_size);
+      memcpy(icc_.data(), icc_buf, icc_size);
+    }
+    free(icc_buf);
+    have_icc_ = true;
+    return true;
+  }
+
+  // Returns floating-point value from the PNG encoding (times 10^5).
+  static double F64FromU32(const uint32_t x) {
+    return static_cast<int32_t>(x) * 1E-5;
+  }
+
+  Status DecodeSRGB(const unsigned char* payload, const size_t payload_size) {
+    if (payload_size != 1) return JXL_FAILURE("Wrong sRGB size");
+    // (PNG uses the same values as ICC.)
+    if (payload[0] >= 4) return JXL_FAILURE("Invalid Rendering Intent");
+    rendering_intent_ = static_cast<RenderingIntent>(payload[0]);
+    have_srgb_ = true;
+    return true;
+  }
+
+  Status DecodeGAMA(const unsigned char* payload, const size_t payload_size) {
+    if (payload_size != 4) return JXL_FAILURE("Wrong gAMA size");
+    gamma_ = F64FromU32(LoadBE32(payload));
+    have_gama_ = true;
+    return true;
+  }
+
+  Status DecodeCHRM(const unsigned char* payload, const size_t payload_size) {
+    if (payload_size != 32) return JXL_FAILURE("Wrong cHRM size");
+    white_point_.x = F64FromU32(LoadBE32(payload + 0));
+    white_point_.y = F64FromU32(LoadBE32(payload + 4));
+    primaries_.r.x = F64FromU32(LoadBE32(payload + 8));
+    primaries_.r.y = F64FromU32(LoadBE32(payload + 12));
+    primaries_.g.x = F64FromU32(LoadBE32(payload + 16));
+    primaries_.g.y = F64FromU32(LoadBE32(payload + 20));
+    primaries_.b.x = F64FromU32(LoadBE32(payload + 24));
+    primaries_.b.y = F64FromU32(LoadBE32(payload + 28));
+    have_chrm_ = true;
+    return true;
+  }
+
+  Status DecodeEXIF(const unsigned char* payload, const size_t payload_size,
+                    Blobs* blobs) {
+    // If we already have EXIF, keep the larger one.
+    if (blobs->exif.size() > payload_size) return true;
+    blobs->exif.resize(payload_size);
+    memcpy(blobs->exif.data(), payload, payload_size);
+    return true;
+  }
+
+  Status Decode(const Span<const uint8_t> bytes, Blobs* blobs) {
+    // Look for colorimetry and text chunks in the PNG image. The PNG chunks
+    // begin after the PNG magic header of 8 bytes.
+    const unsigned char* chunk = bytes.data() + 8;
+    const unsigned char* end = bytes.data() + bytes.size();
+    for (;;) {
+      // chunk points to the first field of a PNG chunk. The chunk has
+      // respectively 4 bytes of length, 4 bytes type, length bytes of data,
+      // 4 bytes CRC.
+      if (chunk + 4 >= end) {
+        break;  // Regular end reached.
+      }
+
+      char type_char[5];
+      if (chunk + 8 >= end) {
+        JXL_NOTIFY_ERROR("PNG: malformed chunk");
+        break;
+      }
+      lodepng_chunk_type(type_char, chunk);
+      std::string type = type_char;
+
+      if (type == "acTL" || type == "fcTL" || type == "fdAT") {
+        // this is an APNG file, without proper handling we would just return
+        // the first frame, so for now codec_apng handles animation until the
+        // animation chunk handling is added here
+        return false;
+      }
+      if (type == "eXIf" || type == "iCCP" || type == "sRGB" ||
+          type == "gAMA" || type == "cHRM") {
+        const unsigned char* payload = lodepng_chunk_data_const(chunk);
+        const size_t payload_size = lodepng_chunk_length(chunk);
+        // The entire chunk needs also 4 bytes of CRC after the payload.
+        if (payload + payload_size + 4 >= end) {
+          JXL_NOTIFY_ERROR("PNG: truncated chunk");
+          break;
+        }
+        if (lodepng_chunk_check_crc(chunk) != 0) {
+          JXL_NOTIFY_ERROR("CRC mismatch in unknown PNG chunk");
+          chunk = lodepng_chunk_next_const(chunk, end);
+          continue;
+        }
+
+        if (type == "eXIf") {
+          JXL_RETURN_IF_ERROR(DecodeEXIF(payload, payload_size, blobs));
+        } else if (type == "iCCP") {
+          JXL_RETURN_IF_ERROR(DecodeICC(payload, payload_size));
+        } else if (type == "sRGB") {
+          JXL_RETURN_IF_ERROR(DecodeSRGB(payload, payload_size));
+        } else if (type == "gAMA") {
+          JXL_RETURN_IF_ERROR(DecodeGAMA(payload, payload_size));
+        } else if (type == "cHRM") {
+          JXL_RETURN_IF_ERROR(DecodeCHRM(payload, payload_size));
+        }
+      }
+
+      chunk = lodepng_chunk_next_const(chunk, end);
+    }
+    return true;
+  }
+
+  PaddedBytes icc_;
+
+  bool have_pq_ = false;
+  bool have_srgb_ = false;
+  bool have_gama_ = false;
+  bool have_chrm_ = false;
+  bool have_icc_ = false;
+
+  // Only valid if have_srgb_:
+  RenderingIntent rendering_intent_;
+
+  // Only valid if have_gama_:
+  double gamma_;
+
+  // Only valid if have_chrm_:
+  CIExy white_point_;
+  PrimariesCIExy primaries_;
+};
+
+Status ApplyHints(const bool is_gray, CodecInOut* io) {
+  bool got_color_space = false;
+
+  JXL_RETURN_IF_ERROR(io->dec_hints.Foreach(
+      [is_gray, io, &got_color_space](const std::string& key,
+                                      const std::string& value) -> Status {
+        ColorEncoding* c_original = &io->metadata.m.color_encoding;
+        if (key == "color_space") {
+          if (!ParseDescription(value, c_original) ||
+              !c_original->CreateICC()) {
+            return JXL_FAILURE("PNG: Failed to apply color_space");
+          }
+
+          if (is_gray != io->metadata.m.color_encoding.IsGray()) {
+            return JXL_FAILURE(
+                "PNG: mismatch between file and color_space hint");
+          }
+
+          got_color_space = true;
+        } else if (key == "icc_pathname") {
+          PaddedBytes icc;
+          JXL_RETURN_IF_ERROR(ReadFile(value, &icc));
+          JXL_RETURN_IF_ERROR(c_original->SetICC(std::move(icc)));
+          got_color_space = true;
+        } else {
+          JXL_WARNING("PNG decoder ignoring %s hint", key.c_str());
+        }
+        return true;
+      }));
+
+  if (!got_color_space) {
+    JXL_WARNING("PNG: no color_space/icc_pathname given, assuming sRGB");
+    JXL_RETURN_IF_ERROR(io->metadata.m.color_encoding.SetSRGB(
+        is_gray ? ColorSpace::kGray : ColorSpace::kRGB));
+  }
+
+  return true;
+}
+
+// Stores ColorEncoding into PNG chunks.
+class ColorEncodingWriterPNG {
+ public:
+  static Status Encode(const ColorEncoding& c, LodePNGInfo* JXL_RESTRICT info) {
+    // Prefer to only write sRGB - smaller.
+    if (c.IsSRGB()) {
+      JXL_RETURN_IF_ERROR(AddSRGB(c, info));
+      // PNG recommends not including both sRGB and iCCP, so skip the latter.
+    } else if (!c.HaveFields() || !c.tf.IsGamma()) {
+      // Having a gamma value means that the source was a PNG with gAMA and
+      // without iCCP.
+      JXL_ASSERT(!c.ICC().empty());
+      JXL_RETURN_IF_ERROR(AddICC(c.ICC(), info));
+    }
+
+    // gAMA and cHRM are always allowed but will be overridden by sRGB/iCCP.
+    JXL_RETURN_IF_ERROR(MaybeAddGAMA(c, info));
+    JXL_RETURN_IF_ERROR(MaybeAddCHRM(c, info));
+    return true;
+  }
+
+ private:
+  static Status AddChunk(const char* type, const PaddedBytes& payload,
+                         LodePNGInfo* JXL_RESTRICT info) {
+    // Ignore original location/order of chunks; place them in the first group.
+    if (lodepng_chunk_create(&info->unknown_chunks_data[0],
+                             &info->unknown_chunks_size[0], payload.size(),
+                             type, payload.data()) != 0) {
+      return JXL_FAILURE("Failed to add chunk");
+    }
+    return true;
+  }
+
+  static Status AddICC(const PaddedBytes& icc, LodePNGInfo* JXL_RESTRICT info) {
+    LodePNGCompressSettings settings;
+    lodepng_compress_settings_init(&settings);
+    unsigned char* out = nullptr;
+    size_t out_size = 0;
+    if (lodepng_zlib_compress(&out, &out_size, icc.data(), icc.size(),
+                              &settings) != 0) {
+      return JXL_FAILURE("Failed to compress ICC");
+    }
+
+    PaddedBytes payload;
+    payload.resize(3 + out_size);
+    // TODO(janwas): use special name if PQ
+    payload[0] = '1';  // profile name
+    payload[1] = '\0';
+    payload[2] = 0;  // compression method (zlib)
+    memcpy(&payload[3], out, out_size);
+    free(out);
+
+    return AddChunk("iCCP", payload, info);
+  }
+
+  static Status AddSRGB(const ColorEncoding& c,
+                        LodePNGInfo* JXL_RESTRICT info) {
+    PaddedBytes payload;
+    payload.push_back(static_cast<uint8_t>(c.rendering_intent));
+    return AddChunk("sRGB", payload, info);
+  }
+
+  // Returns PNG encoding of floating-point value (times 10^5).
+  static uint32_t U32FromF64(const double x) {
+    return static_cast<int32_t>(roundf(x * 1E5));
+  }
+
+  static Status MaybeAddGAMA(const ColorEncoding& c,
+                             LodePNGInfo* JXL_RESTRICT info) {
+    double gamma;
+    if (c.tf.IsGamma()) {
+      gamma = c.tf.GetGamma();
+    } else if (c.tf.IsLinear()) {
+      gamma = 1;
+    } else if (c.tf.IsSRGB()) {
+      gamma = 0.45455;
+    } else {
+      return true;
+    }
+
+    PaddedBytes payload(4);
+    StoreBE32(U32FromF64(gamma), payload.data());
+    return AddChunk("gAMA", payload, info);
+  }
+
+  static Status MaybeAddCHRM(const ColorEncoding& c,
+                             LodePNGInfo* JXL_RESTRICT info) {
+    CIExy white_point = c.GetWhitePoint();
+    // A PNG image stores both whitepoint and primaries in the cHRM chunk, but
+    // for grayscale images we don't have primaries. It does not matter what
+    // values are stored in the PNG though (all colors are a multiple of the
+    // whitepoint), so choose default ones. See
+    // http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html section 4.2.2.1.
+    PrimariesCIExy primaries =
+        c.IsGray() ? ColorEncoding().GetPrimaries() : c.GetPrimaries();
+
+    if (c.primaries == Primaries::kSRGB && c.white_point == WhitePoint::kD65) {
+      // For sRGB, the cHRM chunk is supposed to have very specific values which
+      // don't quite match the pre-quantized ones we have (red is off by
+      // 0.00010). Technically, this is only required for full sRGB, but for
+      // consistency, we might as well use them whenever the primaries and white
+      // point are sRGB's.
+      white_point.x = 0.31270;
+      white_point.y = 0.32900;
+      primaries.r.x = 0.64000;
+      primaries.r.y = 0.33000;
+      primaries.g.x = 0.30000;
+      primaries.g.y = 0.60000;
+      primaries.b.x = 0.15000;
+      primaries.b.y = 0.06000;
+    }
+
+    PaddedBytes payload(32);
+    StoreBE32(U32FromF64(white_point.x), &payload[0]);
+    StoreBE32(U32FromF64(white_point.y), &payload[4]);
+    StoreBE32(U32FromF64(primaries.r.x), &payload[8]);
+    StoreBE32(U32FromF64(primaries.r.y), &payload[12]);
+    StoreBE32(U32FromF64(primaries.g.x), &payload[16]);
+    StoreBE32(U32FromF64(primaries.g.y), &payload[20]);
+    StoreBE32(U32FromF64(primaries.b.x), &payload[24]);
+    StoreBE32(U32FromF64(primaries.b.y), &payload[28]);
+    return AddChunk("cHRM", payload, info);
+  }
+};
+
+// RAII - ensures state is freed even if returning early.
+struct PNGState {
+  PNGState() { lodepng_state_init(&s); }
+  ~PNGState() { lodepng_state_cleanup(&s); }
+
+  LodePNGState s;
+};
+
+Status CheckGray(const LodePNGColorMode& mode, bool has_icc, bool* is_gray) {
+  switch (mode.colortype) {
+    case LCT_GREY:
+    case LCT_GREY_ALPHA:
+      *is_gray = true;
+      return true;
+
+    case LCT_RGB:
+    case LCT_RGBA:
+      *is_gray = false;
+      return true;
+
+    case LCT_PALETTE: {
+      if (has_icc) {
+        // If an ICC profile is present, the PNG specification requires
+        // palette to be interpreted as RGB colored, not grayscale, so we must
+        // output color in that case and unfortunately can't optimize it to
+        // gray if the palette only has gray entries.
+        *is_gray = false;
+        return true;
+      } else {
+        *is_gray = true;
+        for (size_t i = 0; i < mode.palettesize; i++) {
+          if (mode.palette[i * 4] != mode.palette[i * 4 + 1] ||
+              mode.palette[i * 4] != mode.palette[i * 4 + 2]) {
+            *is_gray = false;
+            break;
+          }
+        }
+        return true;
+      }
+    }
+
+    default:
+      *is_gray = false;
+      return JXL_FAILURE("Unexpected PNG color type");
+  }
+}
+
+Status CheckAlpha(const LodePNGColorMode& mode, bool* has_alpha) {
+  if (mode.key_defined) {
+    // Color key marks a single color as transparent.
+    *has_alpha = true;
+    return true;
+  }
+
+  switch (mode.colortype) {
+    case LCT_GREY:
+    case LCT_RGB:
+      *has_alpha = false;
+      return true;
+
+    case LCT_GREY_ALPHA:
+    case LCT_RGBA:
+      *has_alpha = true;
+      return true;
+
+    case LCT_PALETTE: {
+      *has_alpha = false;
+      for (size_t i = 0; i < mode.palettesize; i++) {
+        // PNG palettes are always 8-bit.
+        if (mode.palette[i * 4 + 3] != 255) {
+          *has_alpha = true;
+          break;
+        }
+      }
+      return true;
+    }
+
+    default:
+      *has_alpha = false;
+      return JXL_FAILURE("Unexpected PNG color type");
+  }
+}
+
+LodePNGColorType MakeType(const bool is_gray, const bool has_alpha) {
+  if (is_gray) {
+    return has_alpha ? LCT_GREY_ALPHA : LCT_GREY;
+  }
+  return has_alpha ? LCT_RGBA : LCT_RGB;
+}
+
+// Inspects first chunk of the given type and updates state with the information
+// when the chunk is relevant and present in the file.
+Status InspectChunkType(const Span<const uint8_t> bytes,
+                        const std::string& type, LodePNGState* state) {
+  const unsigned char* chunk = lodepng_chunk_find_const(
+      bytes.data(), bytes.data() + bytes.size(), type.c_str());
+  if (chunk && lodepng_inspect_chunk(state, chunk - bytes.data(), bytes.data(),
+                                     bytes.size()) != 0) {
+    return JXL_FAILURE("Invalid chunk \"%s\" in PNG image", type.c_str());
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DecodeImagePNG(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io) {
+  unsigned w, h;
+  PNGState state;
+  if (lodepng_inspect(&w, &h, &state.s, bytes.data(), bytes.size()) != 0) {
+    return false;  // not an error - just wrong format
+  }
+  JXL_RETURN_IF_ERROR(VerifyDimensions(&io->constraints, w, h));
+  io->SetSize(w, h);
+  // Palette RGB values
+  if (!InspectChunkType(bytes, "PLTE", &state.s)) {
+    return false;
+  }
+  // Transparent color key, or palette transparency
+  if (!InspectChunkType(bytes, "tRNS", &state.s)) {
+    return false;
+  }
+  // ICC profile
+  if (!InspectChunkType(bytes, "iCCP", &state.s)) {
+    return false;
+  }
+  const LodePNGColorMode& color_mode = state.s.info_png.color;
+  bool has_icc = state.s.info_png.iccp_defined;
+
+  bool is_gray, has_alpha;
+  JXL_RETURN_IF_ERROR(CheckGray(color_mode, has_icc, &is_gray));
+  JXL_RETURN_IF_ERROR(CheckAlpha(color_mode, &has_alpha));
+  // We want LodePNG to promote 1/2/4 bit pixels to 8.
+  size_t bits_per_sample = std::max(color_mode.bitdepth, 8u);
+  if (bits_per_sample != 8 && bits_per_sample != 16) {
+    return JXL_FAILURE("Unexpected PNG bit depth");
+  }
+  io->metadata.m.SetUintSamples(static_cast<uint32_t>(bits_per_sample));
+  io->metadata.m.SetAlphaBits(
+      has_alpha ? io->metadata.m.bit_depth.bits_per_sample : 0);
+
+  // Always decode to 8/16-bit RGB/RGBA, not LCT_PALETTE.
+  state.s.info_raw.bitdepth = static_cast<unsigned>(bits_per_sample);
+  state.s.info_raw.colortype = MakeType(is_gray, has_alpha);
+  unsigned char* out = nullptr;
+  const unsigned err =
+      lodepng_decode(&out, &w, &h, &state.s, bytes.data(), bytes.size());
+  // Automatically call free(out) on return.
+  std::unique_ptr<unsigned char, void (*)(void*)> out_ptr{out, free};
+  if (err != 0) {
+    return JXL_FAILURE("PNG decode failed: %s", lodepng_error_text(err));
+  }
+
+  if (!BlobsReaderPNG::Decode(state.s.info_png, &io->blobs)) {
+    JXL_WARNING("PNG metadata may be incomplete");
+  }
+  ColorEncodingReaderPNG reader;
+  JXL_RETURN_IF_ERROR(reader(bytes, is_gray, io));
+#if JXL_PNG_VERBOSE >= 1
+  printf("PNG read %s\n", Description(io->metadata.m.color_encoding).c_str());
+#endif
+
+  const size_t num_channels = (is_gray ? 1 : 3) + has_alpha;
+  const size_t out_size = w * h * num_channels * bits_per_sample / kBitsPerByte;
+
+  const JxlEndianness endianness = JXL_BIG_ENDIAN;  // PNG requirement
+  const Span<const uint8_t> span(out, out_size);
+  const bool ok =
+      ConvertFromExternal(span, w, h, io->metadata.m.color_encoding, has_alpha,
+                          /*alpha_is_premultiplied=*/false,
+                          io->metadata.m.bit_depth.bits_per_sample, endianness,
+                          /*flipped_y=*/false, pool, &io->Main());
+  JXL_RETURN_IF_ERROR(ok);
+  io->dec_pixels = w * h;
+  io->metadata.m.bit_depth.bits_per_sample = io->Main().DetectRealBitdepth();
+  io->metadata.m.xyb_encoded = false;
+  SetIntensityTarget(io);
+  if (!reader.HaveColorProfile()) {
+    JXL_RETURN_IF_ERROR(ApplyHints(is_gray, io));
+  } else {
+    (void)io->dec_hints.Foreach(
+        [](const std::string& key, const std::string& /*value*/) {
+          JXL_WARNING("PNG decoder ignoring %s hint", key.c_str());
+          return true;
+        });
+  }
+  return true;
+}
+
+Status EncodeImagePNG(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes) {
+  if (bits_per_sample > 8) {
+    bits_per_sample = 16;
+  } else if (bits_per_sample < 8) {
+    // PNG can also do 4, 2, and 1 bits per sample, but it isn't implemented
+    bits_per_sample = 8;
+  }
+  ImageBundle ib = io->Main().Copy();
+  const size_t alpha_bits = ib.HasAlpha() ? bits_per_sample : 0;
+  ImageMetadata metadata = io->metadata.m;
+  ImageBundle store(&metadata);
+  const ImageBundle* transformed;
+  JXL_RETURN_IF_ERROR(
+      TransformIfNeeded(ib, c_desired, pool, &store, &transformed));
+  size_t stride = ib.oriented_xsize() *
+                  DivCeil(c_desired.Channels() * bits_per_sample + alpha_bits,
+                          kBitsPerByte);
+  PaddedBytes raw_bytes(stride * ib.oriented_ysize());
+  JXL_RETURN_IF_ERROR(ConvertToExternal(
+      *transformed, bits_per_sample, /*float_out=*/false,
+      c_desired.Channels() + (ib.HasAlpha() ? 1 : 0), JXL_BIG_ENDIAN, stride,
+      pool, raw_bytes.data(), raw_bytes.size(), /*out_callback=*/nullptr,
+      /*out_opaque=*/nullptr, metadata.GetOrientation()));
+
+  PNGState state;
+  // For maximum compatibility, still store 8-bit even if pixels are all zero.
+  state.s.encoder.auto_convert = 0;
+
+  LodePNGInfo* info = &state.s.info_png;
+  info->color.bitdepth = bits_per_sample;
+  info->color.colortype = MakeType(ib.IsGray(), ib.HasAlpha());
+  state.s.info_raw = info->color;
+
+  JXL_RETURN_IF_ERROR(ColorEncodingWriterPNG::Encode(c_desired, info));
+  JXL_RETURN_IF_ERROR(BlobsWriterPNG::Encode(io->blobs, info));
+
+  unsigned char* out = nullptr;
+  size_t out_size = 0;
+  const unsigned err =
+      lodepng_encode(&out, &out_size, raw_bytes.data(), ib.oriented_xsize(),
+                     ib.oriented_ysize(), &state.s);
+  // Automatically call free(out) on return.
+  std::unique_ptr<unsigned char, void (*)(void*)> out_ptr{out, free};
+  if (err != 0) {
+    return JXL_FAILURE("Failed to encode PNG: %s", lodepng_error_text(err));
+  }
+  bytes->resize(out_size);
+  memcpy(bytes->data(), out, out_size);
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.h
new file mode 100644
index 0000000000..ca5c76fadf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_png.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_PNG_H_
+#define LIB_EXTRAS_CODEC_PNG_H_
+
+// Encodes/decodes PNG pixels and metadata in memory.
+
+#include <stddef.h>
+#include <stdint.h>
+
+// TODO(janwas): workaround for incorrect Win64 codegen (cause unknown)
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+
+namespace jxl {
+
+// Decodes `bytes` into `io`. io->dec_hints are ignored.
+Status DecodeImagePNG(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io);
+
+// Transforms from io->c_current to `c_desired` and encodes into `bytes`.
+Status EncodeImagePNG(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_PNG_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc
new file mode 100644
index 0000000000..3914924a25
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.cc
@@ -0,0 +1,603 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec_pnm.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/fields.h"  // AllDefault
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+namespace {
+
+struct HeaderPNM {
+  size_t xsize;
+  size_t ysize;
+  bool is_bit;   // PBM
+  bool is_gray;  // PGM
+  int is_yuv;    // Y4M: where 1 = 444, 2 = 422, 3 = 420
+  size_t bits_per_sample;
+  bool floating_point;
+  bool big_endian;
+};
+
+class Parser {
+ public:
+  explicit Parser(const Span<const uint8_t> bytes)
+      : pos_(bytes.data()), end_(pos_ + bytes.size()) {}
+
+  // Sets "pos" to the first non-header byte/pixel on success.
+  Status ParseHeader(HeaderPNM* header, const uint8_t** pos) {
+    // codec.cc ensures we have at least two bytes => no range check here.
+    if (pos_[0] == 'Y' && pos_[1] == 'U') return ParseHeaderY4M(header, pos);
+    if (pos_[0] != 'P') return false;
+    const uint8_t type = pos_[1];
+    pos_ += 2;
+
+    header->is_bit = false;
+    header->is_yuv = 0;
+
+    switch (type) {
+      case '4':
+        header->is_bit = true;
+        header->is_gray = true;
+        header->bits_per_sample = 1;
+        return ParseHeaderPNM(header, pos);
+
+      case '5':
+        header->is_gray = true;
+        return ParseHeaderPNM(header, pos);
+
+      case '6':
+        header->is_gray = false;
+        return ParseHeaderPNM(header, pos);
+
+        // TODO(jon): P7 (PAM)
+
+      case 'F':
+        header->is_gray = false;
+        return ParseHeaderPFM(header, pos);
+
+      case 'f':
+        header->is_gray = true;
+        return ParseHeaderPFM(header, pos);
+    }
+    return false;
+  }
+
+  // Exposed for testing
+  Status ParseUnsigned(size_t* number) {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before number");
+    if (!IsDigit(*pos_)) return JXL_FAILURE("PNM: expected unsigned number");
+
+    *number = 0;
+    while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+      *number *= 10;
+      *number += *pos_ - '0';
+      ++pos_;
+    }
+
+    return true;
+  }
+
+  Status ParseSigned(double* number) {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before signed");
+
+    if (*pos_ != '-' && *pos_ != '+' && !IsDigit(*pos_)) {
+      return JXL_FAILURE("PNM: expected signed number");
+    }
+
+    // Skip sign
+    const bool is_neg = *pos_ == '-';
+    if (is_neg || *pos_ == '+') {
+      ++pos_;
+      if (pos_ == end_) return JXL_FAILURE("PNM: reached end before digits");
+    }
+
+    // Leading digits
+    *number = 0.0;
+    while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+      *number *= 10;
+      *number += *pos_ - '0';
+      ++pos_;
+    }
+
+    // Decimal places?
+    if (pos_ < end_ && *pos_ == '.') {
+      ++pos_;
+      double place = 0.1;
+      while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+        *number += (*pos_ - '0') * place;
+        place *= 0.1;
+        ++pos_;
+      }
+    }
+
+    if (is_neg) *number = -*number;
+    return true;
+  }
+
+ private:
+  static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
+  static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+  static bool IsWhitespace(const uint8_t c) {
+    return IsLineBreak(c) || c == '\t' || c == ' ';
+  }
+
+  Status SkipBlank() {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before blank");
+    const uint8_t c = *pos_;
+    if (c != ' ' && c != '\n') return JXL_FAILURE("PNM: expected blank");
+    ++pos_;
+    return true;
+  }
+
+  Status SkipSingleWhitespace() {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before whitespace");
+    if (!IsWhitespace(*pos_)) return JXL_FAILURE("PNM: expected whitespace");
+    ++pos_;
+    return true;
+  }
+
+  Status SkipWhitespace() {
+    if (pos_ == end_) return JXL_FAILURE("PNM: reached end before whitespace");
+    if (!IsWhitespace(*pos_) && *pos_ != '#') {
+      return JXL_FAILURE("PNM: expected whitespace/comment");
+    }
+
+    while (pos_ < end_ && IsWhitespace(*pos_)) {
+      ++pos_;
+    }
+
+    // Comment(s)
+    while (pos_ != end_ && *pos_ == '#') {
+      while (pos_ != end_ && !IsLineBreak(*pos_)) {
+        ++pos_;
+      }
+      // Newline(s)
+      while (pos_ != end_ && IsLineBreak(*pos_)) pos_++;
+    }
+
+    while (pos_ < end_ && IsWhitespace(*pos_)) {
+      ++pos_;
+    }
+    return true;
+  }
+
+  Status ExpectString(const char* str, size_t len) {
+    // Unlikely to happen.
+    if (pos_ + len < pos_) return JXL_FAILURE("Y4M: overflow");
+
+    if (pos_ + len > end_ || strncmp(str, (const char*)pos_, len) != 0) {
+      return JXL_FAILURE("Y4M: expected %s", str);
+    }
+    pos_ += len;
+    return true;
+  }
+
+  Status ReadChar(char* out) {
+    // Unlikely to happen.
+    if (pos_ + 1 < pos_) return JXL_FAILURE("Y4M: overflow");
+
+    if (pos_ >= end_) {
+      return JXL_FAILURE("Y4M: unexpected end of input");
+    }
+    *out = *pos_;
+    pos_++;
+    return true;
+  }
+
+  // TODO(jon): support multi-frame y4m
+  Status ParseHeaderY4M(HeaderPNM* header, const uint8_t** pos) {
+    JXL_RETURN_IF_ERROR(ExpectString("YUV4MPEG2", 9));
+    header->is_gray = false;
+    header->is_yuv = 3;
+    // TODO(jon): check if 4:2:0 is indeed the default
+    header->bits_per_sample = 8;
+    // TODO(jon): check if there's a y4m convention for higher bit depths
+    while (pos_ < end_) {
+      char next = 0;
+      JXL_RETURN_IF_ERROR(ReadChar(&next));
+      if (next == 0x0A) break;
+      if (next != ' ') continue;
+      char field = 0;
+      JXL_RETURN_IF_ERROR(ReadChar(&field));
+      switch (field) {
+        case 'W':
+          JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+          break;
+        case 'H':
+          JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+          break;
+        case 'I':
+          JXL_RETURN_IF_ERROR(ReadChar(&next));
+          if (next != 'p') {
+            return JXL_FAILURE(
+                "Y4M: only progressive (no frame interlacing) allowed");
+          }
+          break;
+        case 'C': {
+          char c1 = 0;
+          JXL_RETURN_IF_ERROR(ReadChar(&c1));
+          char c2 = 0;
+          JXL_RETURN_IF_ERROR(ReadChar(&c2));
+          char c3 = 0;
+          JXL_RETURN_IF_ERROR(ReadChar(&c3));
+          if (c1 != '4') return JXL_FAILURE("Y4M: invalid C param");
+          if (c2 == '4') {
+            if (c3 != '4') return JXL_FAILURE("Y4M: invalid C param");
+            header->is_yuv = 1;  // 444
+          } else if (c2 == '2') {
+            if (c3 == '2') {
+              header->is_yuv = 2;  // 422
+            } else if (c3 == '0') {
+              header->is_yuv = 3;  // 420
+            } else {
+              return JXL_FAILURE("Y4M: invalid C param");
+            }
+          } else {
+            return JXL_FAILURE("Y4M: invalid C param");
+          }
+        }
+          [[fallthrough]];
+          // no break: fallthrough because this field can have values like
+          // "C420jpeg" (we are ignoring the chroma sample location and treat
+          // everything like C420jpeg)
+        case 'F':  // Framerate in fps as numerator:denominator
+                   // TODO(jon): actually read this and set corresponding jxl
+                   // metadata
+        case 'A':  // Pixel aspect ratio (ignoring it, could perhaps adjust
+                   // intrinsic dimensions based on this?)
+        case 'X':  // Comment, ignore
+          // ignore the field value and go to next one
+          while (pos_ < end_) {
+            if (pos_[0] == ' ' || pos_[0] == 0x0A) break;
+            pos_++;
+          }
+          break;
+        default:
+          return JXL_FAILURE("Y4M: parse error");
+      }
+    }
+    JXL_RETURN_IF_ERROR(ExpectString("FRAME", 5));
+    while (true) {
+      char next = 0;
+      JXL_RETURN_IF_ERROR(ReadChar(&next));
+      if (next == 0x0A) {
+        *pos = pos_;
+        return true;
+      }
+    }
+  }
+
+  Status ParseHeaderPNM(HeaderPNM* header, const uint8_t** pos) {
+    JXL_RETURN_IF_ERROR(SkipWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+
+    JXL_RETURN_IF_ERROR(SkipWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+
+    if (!header->is_bit) {
+      JXL_RETURN_IF_ERROR(SkipWhitespace());
+      size_t max_val;
+      JXL_RETURN_IF_ERROR(ParseUnsigned(&max_val));
+      if (max_val == 0 || max_val >= 65536) {
+        return JXL_FAILURE("PNM: bad MaxVal");
+      }
+      header->bits_per_sample = CeilLog2Nonzero(max_val);
+    }
+    header->floating_point = false;
+    header->big_endian = true;
+
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+
+    *pos = pos_;
+    return true;
+  }
+
+  Status ParseHeaderPFM(HeaderPNM* header, const uint8_t** pos) {
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+
+    JXL_RETURN_IF_ERROR(SkipBlank());
+    JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+    // The scale has no meaning as multiplier, only its sign is used to
+    // indicate endianness. All software expects nominal range 0..1.
+    double scale;
+    JXL_RETURN_IF_ERROR(ParseSigned(&scale));
+    header->big_endian = scale >= 0.0;
+    header->bits_per_sample = 32;
+    header->floating_point = true;
+
+    JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+
+    *pos = pos_;
+    return true;
+  }
+
+  const uint8_t* pos_;
+  const uint8_t* const end_;
+};
+
+constexpr size_t kMaxHeaderSize = 200;
+
+Status EncodeHeader(const ImageBundle& ib, const size_t bits_per_sample,
+                    const bool little_endian, char* header,
+                    int* JXL_RESTRICT chars_written) {
+  if (ib.HasAlpha()) return JXL_FAILURE("PNM: can't store alpha");
+
+  if (bits_per_sample == 32) {  // PFM
+    const char type = ib.IsGray() ? 'f' : 'F';
+    const double scale = little_endian ? -1.0 : 1.0;
+    snprintf(header, kMaxHeaderSize, "P%c\n%zu %zu\n%.1f\n%n", type,
+             ib.oriented_xsize(), ib.oriented_ysize(), scale, chars_written);
+  } else if (bits_per_sample == 1) {  // PBM
+    if (!ib.IsGray()) {
+      return JXL_FAILURE("Cannot encode color as PBM");
+    }
+    snprintf(header, kMaxHeaderSize, "P4\n%zu %zu\n%n", ib.oriented_xsize(),
+             ib.oriented_ysize(), chars_written);
+  } else {  // PGM/PPM
+    const uint32_t max_val = (1U << bits_per_sample) - 1;
+    if (max_val >= 65536) return JXL_FAILURE("PNM cannot have > 16 bits");
+    const char type = ib.IsGray() ? '5' : '6';
+    snprintf(header, kMaxHeaderSize, "P%c\n%zu %zu\n%u\n%n", type,
+             ib.oriented_xsize(), ib.oriented_ysize(), max_val, chars_written);
+  }
+  return true;
+}
+
+Status ApplyHints(const bool is_gray, CodecInOut* io) {
+  bool got_color_space = false;
+
+  JXL_RETURN_IF_ERROR(io->dec_hints.Foreach(
+      [is_gray, io, &got_color_space](const std::string& key,
+                                      const std::string& value) -> Status {
+        ColorEncoding* c_original = &io->metadata.m.color_encoding;
+        if (key == "color_space") {
+          if (!ParseDescription(value, c_original) ||
+              !c_original->CreateICC()) {
+            return JXL_FAILURE("PNM: Failed to apply color_space");
+          }
+
+          if (is_gray != io->metadata.m.color_encoding.IsGray()) {
+            return JXL_FAILURE(
+                "PNM: mismatch between file and color_space hint");
+          }
+
+          got_color_space = true;
+        } else if (key == "icc_pathname") {
+          PaddedBytes icc;
+          JXL_RETURN_IF_ERROR(ReadFile(value, &icc));
+          JXL_RETURN_IF_ERROR(c_original->SetICC(std::move(icc)));
+          got_color_space = true;
+        } else {
+          JXL_WARNING("PNM decoder ignoring %s hint", key.c_str());
+        }
+        return true;
+      }));
+
+  if (!got_color_space) {
+    JXL_WARNING("PNM: no color_space/icc_pathname given, assuming sRGB");
+    JXL_RETURN_IF_ERROR(io->metadata.m.color_encoding.SetSRGB(
+        is_gray ? ColorSpace::kGray : ColorSpace::kRGB));
+  }
+
+  return true;
+}
+
+Span<const uint8_t> MakeSpan(const char* str) {
+  return Span<const uint8_t>(reinterpret_cast<const uint8_t*>(str),
+                             strlen(str));
+}
+
+// Flip the image vertically for loading/saving PFM files which have the
+// scanlines inverted.
+void VerticallyFlipImage(Image3F* const image) {
+  for (int c = 0; c < 3; c++) {
+    for (size_t y = 0; y < image->ysize() / 2; y++) {
+      float* first_row = image->PlaneRow(c, y);
+      float* other_row = image->PlaneRow(c, image->ysize() - y - 1);
+      for (size_t x = 0; x < image->xsize(); ++x) {
+        float tmp = first_row[x];
+        first_row[x] = other_row[x];
+        other_row[x] = tmp;
+      }
+    }
+  }
+}
+
+}  // namespace
+
+Status DecodeImagePNM(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io) {
+  Parser parser(bytes);
+  HeaderPNM header = {};
+  const uint8_t* pos = nullptr;
+  if (!parser.ParseHeader(&header, &pos)) return false;
+  JXL_RETURN_IF_ERROR(
+      VerifyDimensions(&io->constraints, header.xsize, header.ysize));
+
+  if (header.bits_per_sample == 0 || header.bits_per_sample > 32) {
+    return JXL_FAILURE("PNM: bits_per_sample invalid");
+  }
+
+  JXL_RETURN_IF_ERROR(ApplyHints(header.is_gray, io));
+  if (header.floating_point) {
+    io->metadata.m.SetFloat32Samples();
+  } else {
+    io->metadata.m.SetUintSamples(header.bits_per_sample);
+  }
+  io->metadata.m.SetAlphaBits(0);
+  io->dec_pixels = header.xsize * header.ysize;
+
+  if (header.is_yuv > 0) {
+    Image3F yuvdata(header.xsize, header.ysize);
+    ImageBundle bundle(&io->metadata.m);
+    const int hshift[3][3] = {{0, 0, 0}, {0, 1, 1}, {0, 1, 1}};
+    const int vshift[3][3] = {{0, 0, 0}, {0, 0, 0}, {0, 1, 1}};
+
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t y = 0; y < header.ysize >> vshift[header.is_yuv - 1][c];
+           ++y) {
+        float* const JXL_RESTRICT row =
+            yuvdata.PlaneRow((c == 2 ? 2 : 1 - c), y);
+        if (pos + (header.xsize >> hshift[header.is_yuv - 1][c]) >
+            bytes.data() + bytes.size())
+          return JXL_FAILURE("Not enough image data");
+        for (size_t x = 0; x < header.xsize >> hshift[header.is_yuv - 1][c];
+             ++x) {
+          row[x] = (1.f / 255.f) * ((*pos++) - 128.f);
+        }
+      }
+    }
+    bundle.SetFromImage(std::move(yuvdata), io->metadata.m.color_encoding);
+    bundle.color_transform = ColorTransform::kYCbCr;
+
+    YCbCrChromaSubsampling subsampling;
+    uint8_t cssh[3] = {
+        2, static_cast<uint8_t>(hshift[header.is_yuv - 1][1] ? 1 : 2),
+        static_cast<uint8_t>(hshift[header.is_yuv - 1][2] ? 1 : 2)};
+    uint8_t cssv[3] = {
+        2, static_cast<uint8_t>(vshift[header.is_yuv - 1][1] ? 1 : 2),
+        static_cast<uint8_t>(vshift[header.is_yuv - 1][2] ? 1 : 2)};
+
+    JXL_RETURN_IF_ERROR(subsampling.Set(cssh, cssv));
+
+    bundle.chroma_subsampling = subsampling;
+
+    io->Main() = std::move(bundle);
+  } else {
+    const bool flipped_y = header.bits_per_sample == 32;  // PFMs are flipped
+    const Span<const uint8_t> span(pos, bytes.data() + bytes.size() - pos);
+    JXL_RETURN_IF_ERROR(ConvertFromExternal(
+        span, header.xsize, header.ysize, io->metadata.m.color_encoding,
+        /*has_alpha=*/false, /*alpha_is_premultiplied=*/false,
+        io->metadata.m.bit_depth.bits_per_sample,
+        header.big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN, flipped_y, pool,
+        &io->Main()));
+  }
+  if (!header.floating_point) {
+    io->metadata.m.bit_depth.bits_per_sample = io->Main().DetectRealBitdepth();
+  }
+  io->SetSize(header.xsize, header.ysize);
+  SetIntensityTarget(io);
+  return true;
+}
+
+Status EncodeImagePNM(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes) {
+  const bool floating_point = bits_per_sample > 16;
+  // Choose native for PFM; PGM/PPM require big-endian (N/A for PBM)
+  const JxlEndianness endianness =
+      floating_point ? JXL_NATIVE_ENDIAN : JXL_BIG_ENDIAN;
+
+  ImageMetadata metadata_copy = io->metadata.m;
+  // AllDefault sets all_default, which can cause a race condition.
+  if (!Bundle::AllDefault(metadata_copy)) {
+    JXL_WARNING("PNM encoder ignoring metadata - use a different codec");
+  }
+  if (!c_desired.IsSRGB()) {
+    JXL_WARNING(
+        "PNM encoder cannot store custom ICC profile; decoder\n"
+        "will need hint key=color_space to get the same values");
+  }
+
+  ImageBundle ib = io->Main().Copy();
+  // In case of PFM the image must be flipped upside down since that format
+  // is designed that way.
+  const ImageBundle* to_color_transform = &ib;
+  ImageBundle flipped;
+  if (floating_point) {
+    flipped = ib.Copy();
+    VerticallyFlipImage(flipped.color());
+    to_color_transform = &flipped;
+  }
+  ImageMetadata metadata = io->metadata.m;
+  ImageBundle store(&metadata);
+  const ImageBundle* transformed;
+  JXL_RETURN_IF_ERROR(TransformIfNeeded(*to_color_transform, c_desired, pool,
+                                        &store, &transformed));
+  size_t stride = ib.oriented_xsize() *
+                  (c_desired.Channels() * bits_per_sample) / kBitsPerByte;
+  PaddedBytes pixels(stride * ib.oriented_ysize());
+  JXL_RETURN_IF_ERROR(ConvertToExternal(
+      *transformed, bits_per_sample, floating_point, c_desired.Channels(),
+      endianness, stride, pool, pixels.data(), pixels.size(),
+      /*out_callback=*/nullptr, /*out_opaque=*/nullptr,
+      metadata.GetOrientation()));
+
+  char header[kMaxHeaderSize];
+  int header_size = 0;
+  bool is_little_endian = endianness == JXL_LITTLE_ENDIAN ||
+                          (endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
+  JXL_RETURN_IF_ERROR(EncodeHeader(*transformed, bits_per_sample,
+                                   is_little_endian, header, &header_size));
+
+  bytes->resize(static_cast<size_t>(header_size) + pixels.size());
+  memcpy(bytes->data(), header, static_cast<size_t>(header_size));
+  memcpy(bytes->data() + header_size, pixels.data(), pixels.size());
+
+  return true;
+}
+
+void TestCodecPNM() {
+  size_t u = 77777;  // Initialized to wrong value.
+  double d = 77.77;
+// Failing to parse invalid strings results in a crash if `JXL_CRASH_ON_ERROR`
+// is defined and hence the tests fail. Therefore we only run these tests if
+// `JXL_CRASH_ON_ERROR` is not defined.
+#ifndef JXL_CRASH_ON_ERROR
+  JXL_CHECK(false == Parser(MakeSpan("")).ParseUnsigned(&u));
+  JXL_CHECK(false == Parser(MakeSpan("+")).ParseUnsigned(&u));
+  JXL_CHECK(false == Parser(MakeSpan("-")).ParseUnsigned(&u));
+  JXL_CHECK(false == Parser(MakeSpan("A")).ParseUnsigned(&u));
+
+  JXL_CHECK(false == Parser(MakeSpan("")).ParseSigned(&d));
+  JXL_CHECK(false == Parser(MakeSpan("+")).ParseSigned(&d));
+  JXL_CHECK(false == Parser(MakeSpan("-")).ParseSigned(&d));
+  JXL_CHECK(false == Parser(MakeSpan("A")).ParseSigned(&d));
+#endif
+  JXL_CHECK(true == Parser(MakeSpan("1")).ParseUnsigned(&u));
+  JXL_CHECK(u == 1);
+
+  JXL_CHECK(true == Parser(MakeSpan("32")).ParseUnsigned(&u));
+  JXL_CHECK(u == 32);
+
+  JXL_CHECK(true == Parser(MakeSpan("1")).ParseSigned(&d));
+  JXL_CHECK(d == 1.0);
+  JXL_CHECK(true == Parser(MakeSpan("+2")).ParseSigned(&d));
+  JXL_CHECK(d == 2.0);
+  JXL_CHECK(true == Parser(MakeSpan("-3")).ParseSigned(&d));
+  JXL_CHECK(std::abs(d - -3.0) < 1E-15);
+  JXL_CHECK(true == Parser(MakeSpan("3.141592")).ParseSigned(&d));
+  JXL_CHECK(std::abs(d - 3.141592) < 1E-15);
+  JXL_CHECK(true == Parser(MakeSpan("-3.141592")).ParseSigned(&d));
+  JXL_CHECK(std::abs(d - -3.141592) < 1E-15);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.h
new file mode 100644
index 0000000000..9547ecc929
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_pnm.h
@@ -0,0 +1,40 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_PNM_H_
+#define LIB_EXTRAS_CODEC_PNM_H_
+
+// Encodes/decodes PBM/PGM/PPM/PFM pixels in memory.
+
+#include <stddef.h>
+#include <stdint.h>
+
+// TODO(janwas): workaround for incorrect Win64 codegen (cause unknown)
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+
+namespace jxl {
+
+// Decodes `bytes` into `io`. io->dec_hints may specify "color_space", which
+// defaults to sRGB.
+Status DecodeImagePNM(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io);
+
+// Transforms from io->c_current to `c_desired` and encodes into `bytes`.
+Status EncodeImagePNM(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes);
+
+void TestCodecPNM();
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_PNM_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc
new file mode 100644
index 0000000000..37d3177e35
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.cc
@@ -0,0 +1,609 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec_psd.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"  // AllDefault
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+namespace {
+
+uint64_t get_be_int(int bytes, const uint8_t*& pos, const uint8_t* maxpos) {
+  uint64_t r = 0;
+  if (pos + bytes <= maxpos) {
+    if (bytes == 1) {
+      r = *pos;
+    } else if (bytes == 2) {
+      r = LoadBE16(pos);
+    } else if (bytes == 4) {
+      r = LoadBE32(pos);
+    } else if (bytes == 8) {
+      r = LoadBE64(pos);
+    }
+  }
+  pos += bytes;
+  return r;
+}
+
+// Copies up to n bytes, without reading from maxpos (the STL-style end).
+void safe_copy(const uint8_t* JXL_RESTRICT pos,
+               const uint8_t* JXL_RESTRICT maxpos, char* JXL_RESTRICT out,
+               size_t n) {
+  for (size_t i = 0; i < n; ++i) {
+    if (pos + i >= maxpos) return;
+    out[i] = pos[i];
+  }
+}
+
+// maxpos is the STL-style end! The valid range is up to [pos, maxpos).
+int safe_strncmp(const uint8_t* pos, const uint8_t* maxpos, const char* s2,
+                 size_t n) {
+  if (pos + n > maxpos) return 1;
+  return strncmp((const char*)pos, s2, n);
+}
+constexpr int PSD_VERBOSITY = 1;
+
+Status decode_layer(const uint8_t*& pos, const uint8_t* maxpos,
+                    ImageBundle& layer, std::vector<int> chans,
+                    std::vector<bool> invert, int w, int h, int version,
+                    int colormodel, bool is_layer, int depth) {
+  int compression_method = 2;
+  int nb_channels = chans.size();
+  JXL_DEBUG_V(PSD_VERBOSITY,
+              "Trying to decode layer with dimensions %ix%i and %i channels", w,
+              h, nb_channels);
+  if (w <= 0 || h <= 0) return JXL_FAILURE("PSD: empty layer");
+  for (int c = 0; c < nb_channels; c++) {
+    // skip nop byte padding
+    while (pos < maxpos && *pos == 128) pos++;
+    JXL_DEBUG_V(PSD_VERBOSITY, "Channel %i (pos %zu)", c, (size_t)pos);
+    // Merged image stores all channels together (same compression method)
+    // Layers store channel per channel
+    if (is_layer || c == 0) {
+      compression_method = get_be_int(2, pos, maxpos);
+      JXL_DEBUG_V(PSD_VERBOSITY, "compression method: %i", compression_method);
+      if (compression_method > 1 || compression_method < 0) {
+        return JXL_FAILURE("PSD: can't handle compression method %i",
+                           compression_method);
+      }
+    }
+
+    if (!is_layer && c < colormodel) {
+      // skip to the extra channels
+      if (compression_method == 0) {
+        pos += w * h * (depth >> 3) * colormodel;
+        c = colormodel - 1;
+        continue;
+      }
+      size_t skip_amount = 0;
+      for (int i = 0; i < nb_channels; i++) {
+        if (i < colormodel) {
+          for (int y = 0; y < h; y++) {
+            skip_amount += get_be_int(2 * version, pos, maxpos);
+          }
+        } else {
+          pos += h * 2 * version;
+        }
+      }
+      pos += skip_amount;
+      c = colormodel - 1;
+      continue;
+    }
+    if (is_layer || c == 0) {
+      // skip the line-counts, we don't need them
+      if (compression_method == 1) {
+        pos += h * (is_layer ? 1 : nb_channels) * 2 *
+               version;  // PSB uses 4 bytes per rowsize instead of 2
+      }
+    }
+    int c_id = chans[c];
+    if (c_id < 0) continue;  // skip
+    if (static_cast<unsigned int>(c_id) >= 3 + layer.extra_channels().size())
+      return JXL_FAILURE("PSD: can't handle channel id %i", c_id);
+    ImageF& ch = (c_id < 3 ? layer.color()->Plane(c_id)
+                           : layer.extra_channels()[c_id - 3]);
+
+    for (int y = 0; y < h; y++) {
+      if (pos > maxpos) return JXL_FAILURE("PSD: premature end of input");
+      float* const JXL_RESTRICT row = ch.Row(y);
+      if (compression_method == 0) {
+        // uncompressed is easy
+        if (depth == 8) {
+          for (int x = 0; x < w; x++) {
+            row[x] = get_be_int(1, pos, maxpos) * (1.f / 255.f);
+          }
+        } else if (depth == 16) {
+          for (int x = 0; x < w; x++) {
+            row[x] = get_be_int(2, pos, maxpos) * (1.f / 65535.f);
+          }
+        } else if (depth == 32) {
+          for (int x = 0; x < w; x++) {
+            uint32_t f = get_be_int(4, pos, maxpos);
+            memcpy(&row[x], &f, 4);
+          }
+        }
+      } else {
+        // RLE is not that hard
+        if (depth != 8)
+          return JXL_FAILURE("PSD: did not expect RLE with depth>1");
+        for (int x = 0; x < w;) {
+          if (pos >= maxpos) return JXL_FAILURE("PSD: out of bounds");
+          int8_t rle = *pos++;
+          if (rle <= 0) {
+            if (rle == -128) continue;  // nop
+            int count = 1 - rle;
+            float v = get_be_int(1, pos, maxpos) * (1.f / 255.f);
+            while (count && x < w) {
+              row[x] = v;
+              count--;
+              x++;
+            }
+            if (count) return JXL_FAILURE("PSD: row overflow");
+          } else {
+            int count = 1 + rle;
+            while (count && x < w) {
+              row[x] = get_be_int(1, pos, maxpos) * (1.f / 255.f);
+              count--;
+              x++;
+            }
+            if (count) return JXL_FAILURE("PSD: row overflow");
+          }
+        }
+      }
+      if (invert[c]) {
+        // sometimes 0 means full ink
+        for (int x = 0; x < w; x++) {
+          row[x] = 1.f - row[x];
+        }
+      }
+    }
+    JXL_DEBUG_V(PSD_VERBOSITY, "Channel %i read.", c);
+  }
+
+  return true;
+}
+
+}  // namespace
+
+Status DecodeImagePSD(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io) {
+  const uint8_t* pos = bytes.data();
+  const uint8_t* maxpos = bytes.data() + bytes.size();
+  if (safe_strncmp(pos, maxpos, "8BPS", 4)) return false;  // not a PSD file
+  JXL_DEBUG_V(PSD_VERBOSITY, "trying psd decode");
+  pos += 4;
+  int version = get_be_int(2, pos, maxpos);
+  JXL_DEBUG_V(PSD_VERBOSITY, "Version=%i", version);
+  if (version < 1 || version > 2)
+    return JXL_FAILURE("PSD: unknown format version");
+  // PSD = version 1, PSB = version 2
+  pos += 6;
+  int nb_channels = get_be_int(2, pos, maxpos);
+  size_t ysize = get_be_int(4, pos, maxpos);
+  size_t xsize = get_be_int(4, pos, maxpos);
+  const SizeConstraints* constraints = &io->constraints;
+  JXL_RETURN_IF_ERROR(VerifyDimensions(constraints, xsize, ysize));
+  uint64_t total_pixel_count = static_cast<uint64_t>(xsize) * ysize;
+  int bitdepth = get_be_int(2, pos, maxpos);
+  if (bitdepth != 8 && bitdepth != 16 && bitdepth != 32) {
+    return JXL_FAILURE("PSD: bit depth %i invalid or not supported", bitdepth);
+  }
+  if (bitdepth == 32) {
+    io->metadata.m.SetFloat32Samples();
+  } else {
+    io->metadata.m.SetUintSamples(bitdepth);
+  }
+  int colormodel = get_be_int(2, pos, maxpos);
+  // 1 = Grayscale, 3 = RGB, 4 = CMYK
+  if (colormodel != 1 && colormodel != 3 && colormodel != 4)
+    return JXL_FAILURE("PSD: unsupported color model");
+
+  int real_nb_channels = colormodel;
+  std::vector<std::vector<float>> spotcolor;
+
+  if (get_be_int(4, pos, maxpos))
+    return JXL_FAILURE("PSD: Unsupported color mode section");
+
+  bool hasmergeddata = true;
+  bool have_alpha = false;
+  bool merged_has_alpha = false;
+  size_t metalength = get_be_int(4, pos, maxpos);
+  const uint8_t* metaoffset = pos;
+  while (pos < metaoffset + metalength) {
+    char header[5] = "????";
+    safe_copy(pos, maxpos, header, 4);
+    if (memcmp(header, "8BIM", 4) != 0) {
+      return JXL_FAILURE("PSD: Unexpected image resource header: %s", header);
+    }
+    pos += 4;
+    int id = get_be_int(2, pos, maxpos);
+    int namelength = get_be_int(1, pos, maxpos);
+    pos += namelength;
+    if (!(namelength & 1)) pos++;  // padding to even length
+    size_t blocklength = get_be_int(4, pos, maxpos);
+    // JXL_DEBUG_V(PSD_VERBOSITY, "block id: %i | block length: %zu",id,
+    // blocklength);
+    if (pos > maxpos) return JXL_FAILURE("PSD: Unexpected end of file");
+    if (id == 1039) {  // ICC profile
+      size_t delta = maxpos - pos;
+      if (delta < blocklength) {
+        return JXL_FAILURE("PSD: Invalid block length");
+      }
+      PaddedBytes icc;
+      icc.resize(blocklength);
+      memcpy(icc.data(), pos, blocklength);
+      if (!io->metadata.m.color_encoding.SetICC(std::move(icc))) {
+        return JXL_FAILURE("PSD: Invalid color profile");
+      }
+    } else if (id == 1057) {  // compatibility mode or not?
+      if (get_be_int(4, pos, maxpos) != 1) {
+        return JXL_FAILURE("PSD: expected version=1 in id=1057 resource block");
+      }
+      hasmergeddata = get_be_int(1, pos, maxpos);
+      pos++;
+      blocklength -= 6;       // already skipped these bytes
+    } else if (id == 1077) {  // spot colors
+      int version = get_be_int(4, pos, maxpos);
+      if (version != 1) {
+        return JXL_FAILURE(
+            "PSD: expected DisplayInfo version 1, got version %i", version);
+      }
+      int spotcolorcount = nb_channels - colormodel;
+      JXL_DEBUG_V(PSD_VERBOSITY, "Reading %i spot colors. %zu", spotcolorcount,
+                  blocklength);
+      for (int k = 0; k < spotcolorcount; k++) {
+        int colorspace = get_be_int(2, pos, maxpos);
+        if ((colormodel == 3 && colorspace != 0) ||
+            (colormodel == 4 && colorspace != 2)) {
+          return JXL_FAILURE(
+              "PSD: cannot handle spot colors in different color spaces than "
+              "image itself");
+        }
+        if (colorspace == 2) JXL_WARNING("PSD: K ignored in CMYK spot color");
+        std::vector<float> color;
+        color.push_back(get_be_int(2, pos, maxpos) / 65535.f);  // R or C
+        color.push_back(get_be_int(2, pos, maxpos) / 65535.f);  // G or M
+        color.push_back(get_be_int(2, pos, maxpos) / 65535.f);  // B or Y
+        color.push_back(get_be_int(2, pos, maxpos) / 65535.f);  // ignored or K
+        color.push_back(get_be_int(2, pos, maxpos) /
+                        100.f);  // solidity (alpha, basically)
+        int kind = get_be_int(1, pos, maxpos);
+        JXL_DEBUG_V(PSD_VERBOSITY, "Kind=%i", kind);
+        color.push_back(kind);
+        spotcolor.push_back(color);
+        if (kind == 2) {
+          JXL_DEBUG_V(PSD_VERBOSITY, "Actual spot color");
+        } else if (kind == 1) {
+          JXL_DEBUG_V(PSD_VERBOSITY, "Mask (alpha) channel");
+        } else if (kind == 0) {
+          JXL_DEBUG_V(PSD_VERBOSITY, "Selection (alpha) channel");
+        } else {
+          return JXL_FAILURE("PSD: Unknown extra channel type");
+        }
+      }
+      if (blocklength & 1) pos++;
+      blocklength = 0;
+    }
+    pos += blocklength;
+    if (blocklength & 1) pos++;  // padding again
+  }
+
+  size_t layerlength = get_be_int(4 * version, pos, maxpos);
+  const uint8_t* after_layers_pos = pos + layerlength;
+  if (after_layers_pos < pos) return JXL_FAILURE("PSD: invalid layer length");
+  if (layerlength) {
+    pos += 4 * version;  // don't care about layerinfolength
+    JXL_DEBUG_V(PSD_VERBOSITY, "Layer section length: %zu", layerlength);
+    int layercount = static_cast<int16_t>(get_be_int(2, pos, maxpos));
+    JXL_DEBUG_V(PSD_VERBOSITY, "Layer count: %i", layercount);
+    io->frames.clear();
+
+    if (layercount == 0) {
+      if (get_be_int(2, pos, maxpos) != 0) {
+        return JXL_FAILURE(
+            "PSD: Expected zero padding before additional layer info");
+      }
+      while (pos < after_layers_pos) {
+        if (safe_strncmp(pos, maxpos, "8BIM", 4) &&
+            safe_strncmp(pos, maxpos, "8B64", 4))
+          return JXL_FAILURE("PSD: Unexpected layer info signature");
+        pos += 4;
+        const uint8_t* tpos = pos;
+        pos += 4;
+        size_t blocklength = get_be_int(4 * version, pos, maxpos);
+        JXL_DEBUG_V(PSD_VERBOSITY, "Length=%zu", blocklength);
+        if (blocklength > 0) {
+          if (pos >= maxpos) return JXL_FAILURE("PSD: Unexpected end of file");
+          size_t delta = maxpos - pos;
+          if (delta < blocklength) {
+            return JXL_FAILURE("PSD: Invalid block length");
+          }
+        }
+        if (!safe_strncmp(tpos, maxpos, "Layr", 4) ||
+            !safe_strncmp(tpos, maxpos, "Lr16", 4) ||
+            !safe_strncmp(tpos, maxpos, "Lr32", 4)) {
+          layercount = static_cast<int16_t>(get_be_int(2, pos, maxpos));
+          if (layercount < 0) {
+            return JXL_FAILURE("PSD: Invalid layer count");
+          }
+          JXL_DEBUG_V(PSD_VERBOSITY, "Real layer count: %i", layercount);
+          if (layercount > 1) have_alpha = true;
+          break;
+        }
+        if (!safe_strncmp(tpos, maxpos, "Mtrn", 4) ||
+            !safe_strncmp(tpos, maxpos, "Mt16", 4) ||
+            !safe_strncmp(tpos, maxpos, "Mt32", 4)) {
+          JXL_DEBUG_V(PSD_VERBOSITY, "Merged layer has transparency channel");
+          if (nb_channels > real_nb_channels) {
+            have_alpha = true;
+            merged_has_alpha = true;
+          }
+        }
+        pos += blocklength;
+      }
+    } else if (layercount < 0) {
+      // negative layer count indicates merged has alpha and it is to be shown
+      if (nb_channels > real_nb_channels) {
+        have_alpha = true;
+        merged_has_alpha = true;
+      }
+      layercount = -layercount;
+    } else {
+      // multiple layers implies there is alpha
+      have_alpha = true;
+    }
+
+    ExtraChannelInfo info;
+    info.bit_depth.bits_per_sample = bitdepth;
+    info.dim_shift = 0;
+
+    if (colormodel == 4) {  // cmyk
+      info.type = ExtraChannel::kBlack;
+      io->metadata.m.extra_channel_info.push_back(info);
+    }
+    if (have_alpha) {
+      JXL_DEBUG_V(PSD_VERBOSITY, "Have alpha");
+      real_nb_channels++;
+      info.type = ExtraChannel::kAlpha;
+      info.alpha_associated =
+          false;  // true? PSD is not consistent with this, need to check
+      io->metadata.m.extra_channel_info.push_back(info);
+    }
+    if (merged_has_alpha && !spotcolor.empty() && spotcolor[0][5] == 1) {
+      // first alpha channel
+      spotcolor.erase(spotcolor.begin());
+    }
+    for (size_t i = 0; i < spotcolor.size(); i++) {
+      real_nb_channels++;
+      if (spotcolor[i][5] == 2) {
+        info.type = ExtraChannel::kSpotColor;
+        info.spot_color[0] = spotcolor[i][0];
+        info.spot_color[1] = spotcolor[i][1];
+        info.spot_color[2] = spotcolor[i][2];
+        info.spot_color[3] = spotcolor[i][4];
+      } else if (spotcolor[i][5] == 1) {
+        info.type = ExtraChannel::kAlpha;
+      } else if (spotcolor[i][5] == 0) {
+        info.type = ExtraChannel::kSelectionMask;
+      } else
+        return JXL_FAILURE("PSD: unhandled extra channel");
+      io->metadata.m.extra_channel_info.push_back(info);
+    }
+    std::vector<std::vector<int>> layer_chan_id;
+    std::vector<size_t> layer_offsets(layercount + 1, 0);
+    std::vector<bool> is_real_layer(layercount, false);
+    for (int l = 0; l < layercount; l++) {
+      ImageBundle layer(&io->metadata.m);
+      layer.duration = 0;
+      layer.blend = (l > 0);
+
+      layer.use_for_next_frame = (l + 1 < layercount);
+      layer.origin.y0 = get_be_int(4, pos, maxpos);
+      layer.origin.x0 = get_be_int(4, pos, maxpos);
+      size_t height = get_be_int(4, pos, maxpos) - layer.origin.y0;
+      size_t width = get_be_int(4, pos, maxpos) - layer.origin.x0;
+      JXL_DEBUG_V(PSD_VERBOSITY, "Layer %i: %zu x %zu at origin (%i, %i)", l,
+                  width, height, layer.origin.x0, layer.origin.y0);
+      int nb_chs = get_be_int(2, pos, maxpos);
+      JXL_DEBUG_V(PSD_VERBOSITY, "  channels: %i", nb_chs);
+      std::vector<int> chan_ids;
+      layer_offsets[l + 1] = layer_offsets[l];
+      for (int lc = 0; lc < nb_chs; lc++) {
+        int id = get_be_int(2, pos, maxpos);
+        JXL_DEBUG_V(PSD_VERBOSITY, "    id=%i", id);
+        if (id == 65535) {
+          chan_ids.push_back(colormodel);  // alpha
+        } else if (id == 65534) {
+          chan_ids.push_back(-1);  // layer mask, ignored
+        } else {
+          chan_ids.push_back(id);  // color channel
+        }
+        layer_offsets[l + 1] += get_be_int(4 * version, pos, maxpos);
+      }
+      layer_chan_id.push_back(chan_ids);
+      if (safe_strncmp(pos, maxpos, "8BIM", 4))
+        return JXL_FAILURE("PSD: Layer %i: Unexpected signature (not 8BIM)", l);
+      pos += 4;
+      if (safe_strncmp(pos, maxpos, "norm", 4)) {
+        return JXL_FAILURE(
+            "PSD: Layer %i: Cannot handle non-default blend mode", l);
+      }
+      pos += 4;
+      int opacity = get_be_int(1, pos, maxpos);
+      if (opacity < 100) {
+        JXL_WARNING(
+            "PSD: ignoring opacity of semi-transparent layer %i (opacity=%i)",
+            l, opacity);
+      }
+      pos++;  // clipping
+      int flags = get_be_int(1, pos, maxpos);
+      pos++;
+      bool invisible = (flags & 2);
+      if (invisible) {
+        if (l + 1 < layercount) {
+          layer.blend = false;
+          layer.use_for_next_frame = false;
+        } else {
+          // TODO: instead add dummy last frame?
+          JXL_WARNING("PSD: invisible top layer was made visible");
+        }
+      }
+      size_t extradata = get_be_int(4, pos, maxpos);
+      JXL_DEBUG_V(PSD_VERBOSITY, "  extradata: %zu bytes", extradata);
+      const uint8_t* after_extra = pos + extradata;
+      // TODO: deal with non-empty layer masks
+      pos += get_be_int(4, pos, maxpos);  // skip layer mask data
+      pos += get_be_int(4, pos, maxpos);  // skip layer blend range data
+      size_t namelength = get_be_int(1, pos, maxpos);
+      size_t delta = maxpos - pos;
+      if (delta < namelength) return JXL_FAILURE("PSD: Invalid block length");
+      char lname[256] = {};
+      memcpy(lname, pos, namelength);
+      lname[namelength] = 0;
+      JXL_DEBUG_V(PSD_VERBOSITY, "  name: %s", lname);
+      pos = after_extra;
+      if (width == 0 || height == 0) {
+        JXL_DEBUG_V(PSD_VERBOSITY,
+                    "  NOT A REAL LAYER");  // probably layer group
+        continue;
+      }
+      is_real_layer[l] = true;
+      JXL_RETURN_IF_ERROR(VerifyDimensions(constraints, width, height));
+      uint64_t pixel_count = static_cast<uint64_t>(width) * height;
+      if (!SafeAdd(total_pixel_count, pixel_count, total_pixel_count)) {
+        return JXL_FAILURE("Image too big");
+      }
+      if (total_pixel_count > constraints->dec_max_pixels) {
+        return JXL_FAILURE("Image too big");
+      }
+      Image3F rgb(width, height);
+      layer.SetFromImage(std::move(rgb), io->metadata.m.color_encoding);
+      std::vector<ImageF> ec;
+      for (const auto& ec_meta : layer.metadata()->extra_channel_info) {
+        ImageF extra(width, height);
+        if (ec_meta.type == ExtraChannel::kAlpha) {
+          FillPlane(1.0f, &extra, Rect(extra));  // opaque
+        } else {
+          ZeroFillPlane(&extra, Rect(extra));  // zeroes
+        }
+        ec.push_back(std::move(extra));
+      }
+      if (!ec.empty()) layer.SetExtraChannels(std::move(ec));
+      layer.name = lname;
+      io->dec_pixels += layer.xsize() * layer.ysize();
+      io->frames.push_back(std::move(layer));
+    }
+
+    std::vector<bool> invert(real_nb_channels, false);
+    int il = 0;
+    const uint8_t* bpos = pos;
+    for (int l = 0; l < layercount; l++) {
+      if (!is_real_layer[l]) continue;
+      pos = bpos + layer_offsets[l];
+      if (pos < bpos) return JXL_FAILURE("PSD: invalid layer offset");
+      JXL_DEBUG_V(PSD_VERBOSITY, "At position %i (%zu)",
+                  (int)(pos - bytes.data()), (size_t)pos);
+      ImageBundle& layer = io->frames[il++];
+      std::vector<int>& chan_id = layer_chan_id[l];
+      if (chan_id.size() > invert.size()) invert.resize(chan_id.size(), false);
+      JXL_RETURN_IF_ERROR(decode_layer(pos, maxpos, layer, chan_id, invert,
+                                       layer.xsize(), layer.ysize(), version,
+                                       colormodel, true, bitdepth));
+    }
+  } else
+    return JXL_FAILURE("PSD: no layer data found");
+
+  if (!hasmergeddata && !spotcolor.empty()) {
+    return JXL_FAILURE("PSD: extra channel data declared but not found");
+  }
+
+  if (!spotcolor.empty() || (hasmergeddata && io->frames.empty())) {
+    // PSD only has spot colors / extra alpha/mask data in the merged image
+    // We don't redundantly store the merged image, so we put it in the first
+    // layer (the next layers will kAdd zeroes to it)
+    pos = after_layers_pos;
+    bool have_only_merged = false;
+    if (io->frames.empty()) {
+      // There is only the merged image, no layers
+      ImageBundle nlayer(&io->metadata.m);
+      Image3F rgb(xsize, ysize);
+      nlayer.SetFromImage(std::move(rgb), io->metadata.m.color_encoding);
+      std::vector<ImageF> ec;
+      for (const auto& ec_meta : nlayer.metadata()->extra_channel_info) {
+        ImageF extra(xsize, ysize);
+        if (ec_meta.type == ExtraChannel::kAlpha) {
+          FillPlane(1.0f, &extra, Rect(extra));  // opaque
+        } else {
+          ZeroFillPlane(&extra, Rect(extra));  // zeroes
+        }
+        ec.push_back(std::move(extra));
+      }
+      if (!ec.empty()) nlayer.SetExtraChannels(std::move(ec));
+      io->dec_pixels += nlayer.xsize() * nlayer.ysize();
+      io->frames.push_back(std::move(nlayer));
+      have_only_merged = true;
+    }
+    ImageBundle& layer = io->frames[0];
+    std::vector<int> chan_id(real_nb_channels);
+    std::iota(chan_id.begin(), chan_id.end(), 0);
+    std::vector<bool> invert(real_nb_channels, false);
+    if (static_cast<int>(spotcolor.size()) + colormodel + 1 <
+        real_nb_channels) {
+      return JXL_FAILURE("Inconsistent layer configuration");
+    }
+    if (!merged_has_alpha) {
+      if (colormodel >= real_nb_channels) {
+        return JXL_FAILURE("Inconsistent layer configuration");
+      }
+      chan_id.erase(chan_id.begin() + colormodel);
+      invert.erase(invert.begin() + colormodel);
+    } else {
+      colormodel++;
+    }
+    for (size_t i = colormodel; i < invert.size(); i++) {
+      if (spotcolor[i - colormodel][5] == 2) invert[i] = true;
+      if (spotcolor[i - colormodel][5] == 0) invert[i] = true;
+    }
+    JXL_RETURN_IF_ERROR(decode_layer(
+        pos, maxpos, layer, chan_id, invert, layer.xsize(), layer.ysize(),
+        version, (have_only_merged ? 0 : colormodel), false, bitdepth));
+  }
+
+  if (io->frames.empty()) return JXL_FAILURE("PSD: no layers");
+
+  io->SetSize(xsize, ysize);
+
+  SetIntensityTarget(io);
+
+  return true;
+}
+
+Status EncodeImagePSD(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes) {
+  return JXL_FAILURE("PSD encoding not yet implemented");
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.h
new file mode 100644
index 0000000000..11a9fb882e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_psd.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_PSD_H_
+#define LIB_EXTRAS_CODEC_PSD_H_
+
+// Decodes Photoshop PSD/PSB, preserving the layers
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+
+namespace jxl {
+
+// Decodes `bytes` into `io`.
+Status DecodeImagePSD(const Span<const uint8_t> bytes, ThreadPool* pool,
+                      CodecInOut* io);
+
+// Not implemented yet
+Status EncodeImagePSD(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_CODEC_PSD_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_test.cc
new file mode 100644
index 0000000000..24426444fb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/codec_test.cc
@@ -0,0 +1,375 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <random>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/extras/codec_pgx.h"
+#include "lib/extras/codec_pnm.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/luminance.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+namespace {
+
+CodecInOut CreateTestImage(const size_t xsize, const size_t ysize,
+                           const bool is_gray, const bool add_alpha,
+                           const size_t bits_per_sample,
+                           const ColorEncoding& c_native) {
+  Image3F image(xsize, ysize);
+  std::mt19937_64 rng(129);
+  std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+  if (is_gray) {
+    for (size_t y = 0; y < ysize; ++y) {
+      float* JXL_RESTRICT row0 = image.PlaneRow(0, y);
+      float* JXL_RESTRICT row1 = image.PlaneRow(1, y);
+      float* JXL_RESTRICT row2 = image.PlaneRow(2, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row0[x] = row1[x] = row2[x] = dist(rng);
+      }
+    }
+  } else {
+    RandomFillImage(&image, 1.0f);
+  }
+  CodecInOut io;
+
+  if (bits_per_sample == 32) {
+    io.metadata.m.SetFloat32Samples();
+  } else {
+    io.metadata.m.SetUintSamples(bits_per_sample);
+  }
+  io.metadata.m.color_encoding = c_native;
+  io.SetFromImage(std::move(image), c_native);
+  if (add_alpha) {
+    ImageF alpha(xsize, ysize);
+    RandomFillImage(&alpha, 1.f);
+    io.metadata.m.SetAlphaBits(bits_per_sample <= 8 ? 8 : 16);
+    io.Main().SetAlpha(std::move(alpha), /*alpha_is_premultiplied=*/false);
+  }
+  return io;
+}
+
+// Ensures reading a newly written file leads to the same image pixels.
+void TestRoundTrip(Codec codec, const size_t xsize, const size_t ysize,
+                   const bool is_gray, const bool add_alpha,
+                   const size_t bits_per_sample, ThreadPool* pool) {
+  // JPEG encoding is not lossless.
+  if (codec == Codec::kJPG) return;
+  if (codec == Codec::kPNM && add_alpha) return;
+  // Our EXR codec always uses 16-bit premultiplied alpha, does not support
+  // grayscale, and somehow does not have sufficient precision for this test.
+  if (codec == Codec::kEXR) return;
+  printf("Codec %s bps:%zu gr:%d al:%d\n",
+         ExtensionFromCodec(codec, is_gray, bits_per_sample).c_str(),
+         bits_per_sample, is_gray, add_alpha);
+
+  ColorEncoding c_native;
+  c_native.SetColorSpace(is_gray ? ColorSpace::kGray : ColorSpace::kRGB);
+  // Note: this must not be wider than c_external, otherwise gamut clipping
+  // will cause large round-trip errors.
+  c_native.primaries = Primaries::kP3;
+  c_native.tf.SetTransferFunction(TransferFunction::kLinear);
+  JXL_CHECK(c_native.CreateICC());
+
+  // Generally store same color space to reduce round trip errors..
+  ColorEncoding c_external = c_native;
+  // .. unless we have enough precision for some transforms.
+  if (bits_per_sample >= 16) {
+    c_external.white_point = WhitePoint::kE;
+    c_external.primaries = Primaries::k2100;
+    c_external.tf.SetTransferFunction(TransferFunction::kSRGB);
+  }
+  JXL_CHECK(c_external.CreateICC());
+
+  const CodecInOut io = CreateTestImage(xsize, ysize, is_gray, add_alpha,
+                                        bits_per_sample, c_native);
+  const ImageBundle& ib1 = io.Main();
+
+  PaddedBytes encoded;
+  JXL_CHECK(Encode(io, codec, c_external, bits_per_sample, &encoded, pool));
+
+  CodecInOut io2;
+  io2.target_nits = io.metadata.m.IntensityTarget();
+  // Only for PNM because PNG will warn about ignoring them.
+  if (codec == Codec::kPNM) {
+    io2.dec_hints.Add("color_space", Description(c_external));
+  }
+  JXL_CHECK(SetFromBytes(Span<const uint8_t>(encoded), &io2, pool));
+  ImageBundle& ib2 = io2.Main();
+
+  EXPECT_EQ(Description(c_external),
+            Description(io2.metadata.m.color_encoding));
+
+  // See c_external above - for low bits_per_sample the encoded space is
+  // already the same.
+  if (bits_per_sample < 16) {
+    EXPECT_EQ(Description(ib1.c_current()), Description(ib2.c_current()));
+  }
+
+  if (add_alpha) {
+    EXPECT_TRUE(SamePixels(ib1.alpha(), *ib2.alpha()));
+  }
+
+  JXL_CHECK(ib2.TransformTo(ib1.c_current(), pool));
+
+  double max_l1, max_rel;
+  // Round-trip tolerances must be higher than in external_image_test because
+  // codecs do not support unbounded ranges.
+#if JPEGXL_ENABLE_SKCMS
+  if (bits_per_sample <= 12) {
+    max_l1 = 0.5;
+    max_rel = 6E-3;
+  } else {
+    max_l1 = 1E-3;
+    max_rel = 5E-4;
+  }
+#else  // JPEGXL_ENABLE_SKCMS
+  if (bits_per_sample <= 12) {
+    max_l1 = 0.5;
+    max_rel = 6E-3;
+  } else if (bits_per_sample == 16) {
+    max_l1 = 3E-3;
+    max_rel = 1E-4;
+  } else {
+#ifdef __ARM_ARCH
+    // pow() implementation in arm is a bit less precise than in x86 and
+    // therefore we need a bigger error margin in this case.
+    max_l1 = 1E-7;
+    max_rel = 1E-4;
+#else
+    max_l1 = 1E-7;
+    max_rel = 1E-5;
+#endif
+  }
+#endif  // JPEGXL_ENABLE_SKCMS
+
+  VerifyRelativeError(ib1.color(), *ib2.color(), max_l1, max_rel);
+}
+
+#if 0
+TEST(CodecTest, TestRoundTrip) {
+  ThreadPoolInternal pool(12);
+
+  const size_t xsize = 7;
+  const size_t ysize = 4;
+
+  for (Codec codec : Values<Codec>()) {
+    for (int bits_per_sample : {8, 10, 12, 16, 32}) {
+      for (bool is_gray : {false, true}) {
+        for (bool add_alpha : {false, true}) {
+          TestRoundTrip(codec, xsize, ysize, is_gray, add_alpha,
+                        static_cast<size_t>(bits_per_sample), &pool);
+        }
+      }
+    }
+  }
+}
+#endif
+
+CodecInOut DecodeRoundtrip(const std::string& pathname, Codec expected_codec,
+                           ThreadPool* pool,
+                           const DecoderHints& dec_hints = DecoderHints()) {
+  CodecInOut io;
+  io.dec_hints = dec_hints;
+  const PaddedBytes orig = ReadTestData(pathname);
+  JXL_CHECK(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  const ImageBundle& ib1 = io.Main();
+
+  // Encode/Decode again to make sure Encode carries through all metadata.
+  PaddedBytes encoded;
+  JXL_CHECK(Encode(io, expected_codec, io.metadata.m.color_encoding,
+                   io.metadata.m.bit_depth.bits_per_sample, &encoded, pool));
+
+  CodecInOut io2;
+  io2.dec_hints = dec_hints;
+  JXL_CHECK(SetFromBytes(Span<const uint8_t>(encoded), &io2, pool));
+  const ImageBundle& ib2 = io2.Main();
+  EXPECT_EQ(Description(ib1.metadata()->color_encoding),
+            Description(ib2.metadata()->color_encoding));
+  EXPECT_EQ(Description(ib1.c_current()), Description(ib2.c_current()));
+
+  size_t bits_per_sample = io2.metadata.m.bit_depth.bits_per_sample;
+
+  // "Same" pixels?
+  double max_l1 = bits_per_sample <= 12 ? 1.3 : 2E-3;
+  double max_rel = bits_per_sample <= 12 ? 6E-3 : 1E-4;
+  if (ib1.metadata()->color_encoding.IsGray()) {
+    max_rel *= 2.0;
+  } else if (ib1.metadata()->color_encoding.primaries != Primaries::kSRGB) {
+    // Need more tolerance for large gamuts (anything but sRGB)
+    max_l1 *= 1.5;
+    max_rel *= 3.0;
+  }
+  VerifyRelativeError(ib1.color(), ib2.color(), max_l1, max_rel);
+
+  // Simulate the encoder removing profile and decoder restoring it.
+  if (!ib2.metadata()->color_encoding.WantICC()) {
+    io2.metadata.m.color_encoding.InternalRemoveICC();
+    EXPECT_TRUE(io2.metadata.m.color_encoding.CreateICC());
+  }
+
+  return io2;
+}
+
+#if 0
+TEST(CodecTest, TestMetadataSRGB) {
+  ThreadPoolInternal pool(12);
+
+  const char* paths[] = {"raw.pixls/DJI-FC6310-16bit_srgb8_v4_krita.png",
+                         "raw.pixls/Google-Pixel2XL-16bit_srgb8_v4_krita.png",
+                         "raw.pixls/HUAWEI-EVA-L09-16bit_srgb8_dt.png",
+                         "raw.pixls/Nikon-D300-12bit_srgb8_dt.png",
+                         "raw.pixls/Sony-DSC-RX1RM2-14bit_srgb8_v4_krita.png"};
+  for (const char* relative_pathname : paths) {
+    const CodecInOut io =
+        DecodeRoundtrip(relative_pathname, Codec::kPNG, &pool);
+    EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+    EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+    EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+
+    EXPECT_EQ(64, io.xsize());
+    EXPECT_EQ(64, io.ysize());
+    EXPECT_FALSE(io.metadata.m.HasAlpha());
+
+    const ColorEncoding& c_original = io.metadata.m.color_encoding;
+    EXPECT_FALSE(c_original.ICC().empty());
+    EXPECT_EQ(ColorSpace::kRGB, c_original.GetColorSpace());
+    EXPECT_EQ(WhitePoint::kD65, c_original.white_point);
+    EXPECT_EQ(Primaries::kSRGB, c_original.primaries);
+    EXPECT_TRUE(c_original.tf.IsSRGB());
+  }
+}
+
+TEST(CodecTest, TestMetadataLinear) {
+  ThreadPoolInternal pool(12);
+
+  const char* paths[3] = {
+      "raw.pixls/Google-Pixel2XL-16bit_acescg_g1_v4_krita.png",
+      "raw.pixls/HUAWEI-EVA-L09-16bit_709_g1_dt.png",
+      "raw.pixls/Nikon-D300-12bit_2020_g1_dt.png",
+  };
+  const WhitePoint white_points[3] = {WhitePoint::kCustom, WhitePoint::kD65,
+                                      WhitePoint::kD65};
+  const Primaries primaries[3] = {Primaries::kCustom, Primaries::kSRGB,
+                                  Primaries::k2100};
+
+  for (size_t i = 0; i < 3; ++i) {
+    const CodecInOut io = DecodeRoundtrip(paths[i], Codec::kPNG, &pool);
+    EXPECT_EQ(16, io.metadata.m.bit_depth.bits_per_sample);
+    EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+    EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+
+    EXPECT_EQ(64, io.xsize());
+    EXPECT_EQ(64, io.ysize());
+    EXPECT_FALSE(io.metadata.m.HasAlpha());
+
+    const ColorEncoding& c_original = io.metadata.m.color_encoding;
+    EXPECT_FALSE(c_original.ICC().empty());
+    EXPECT_EQ(ColorSpace::kRGB, c_original.GetColorSpace());
+    EXPECT_EQ(white_points[i], c_original.white_point);
+    EXPECT_EQ(primaries[i], c_original.primaries);
+    EXPECT_TRUE(c_original.tf.IsLinear());
+  }
+}
+
+TEST(CodecTest, TestMetadataICC) {
+  ThreadPoolInternal pool(12);
+
+  const char* paths[] = {
+      "raw.pixls/DJI-FC6310-16bit_709_v4_krita.png",
+      "raw.pixls/Sony-DSC-RX1RM2-14bit_709_v4_krita.png",
+  };
+  for (const char* relative_pathname : paths) {
+    const CodecInOut io =
+        DecodeRoundtrip(relative_pathname, Codec::kPNG, &pool);
+    EXPECT_GE(16, io.metadata.m.bit_depth.bits_per_sample);
+    EXPECT_LE(14, io.metadata.m.bit_depth.bits_per_sample);
+
+    EXPECT_EQ(64, io.xsize());
+    EXPECT_EQ(64, io.ysize());
+    EXPECT_FALSE(io.metadata.m.HasAlpha());
+
+    const ColorEncoding& c_original = io.metadata.m.color_encoding;
+    EXPECT_FALSE(c_original.ICC().empty());
+    EXPECT_EQ(RenderingIntent::kPerceptual, c_original.rendering_intent);
+    EXPECT_EQ(ColorSpace::kRGB, c_original.GetColorSpace());
+    EXPECT_EQ(WhitePoint::kD65, c_original.white_point);
+    EXPECT_EQ(Primaries::kSRGB, c_original.primaries);
+    EXPECT_EQ(TransferFunction::k709, c_original.tf.GetTransferFunction());
+  }
+}
+
+TEST(CodecTest, TestPNGSuite) {
+  ThreadPoolInternal pool(12);
+
+  // Ensure we can load PNG with text, japanese UTF-8, compressed text.
+  (void)DecodeRoundtrip("pngsuite/ct1n0g04.png", Codec::kPNG, &pool);
+  (void)DecodeRoundtrip("pngsuite/ctjn0g04.png", Codec::kPNG, &pool);
+  (void)DecodeRoundtrip("pngsuite/ctzn0g04.png", Codec::kPNG, &pool);
+
+  // Extract gAMA
+  const CodecInOut b1 =
+      DecodeRoundtrip("pngsuite/g10n3p04.png", Codec::kPNG, &pool);
+  EXPECT_TRUE(b1.metadata.color_encoding.tf.IsLinear());
+
+  // Extract cHRM
+  const CodecInOut b_p =
+      DecodeRoundtrip("pngsuite/ccwn2c08.png", Codec::kPNG, &pool);
+  EXPECT_EQ(Primaries::kSRGB, b_p.metadata.color_encoding.primaries);
+  EXPECT_EQ(WhitePoint::kD65, b_p.metadata.color_encoding.white_point);
+
+  // Extract EXIF from (new-style) dedicated chunk
+  const CodecInOut b_exif =
+      DecodeRoundtrip("pngsuite/exif2c08.png", Codec::kPNG, &pool);
+  EXPECT_EQ(978, b_exif.blobs.exif.size());
+}
+#endif
+
+void VerifyWideGamutMetadata(const std::string& relative_pathname,
+                             const Primaries primaries, ThreadPool* pool) {
+  const CodecInOut io = DecodeRoundtrip(relative_pathname, Codec::kPNG, pool);
+
+  EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+
+  const ColorEncoding& c_original = io.metadata.m.color_encoding;
+  EXPECT_FALSE(c_original.ICC().empty());
+  EXPECT_EQ(RenderingIntent::kAbsolute, c_original.rendering_intent);
+  EXPECT_EQ(ColorSpace::kRGB, c_original.GetColorSpace());
+  EXPECT_EQ(WhitePoint::kD65, c_original.white_point);
+  EXPECT_EQ(primaries, c_original.primaries);
+}
+
+TEST(CodecTest, TestWideGamut) {
+  ThreadPoolInternal pool(12);
+  // VerifyWideGamutMetadata("wide-gamut-tests/P3-sRGB-color-bars.png",
+  //                        Primaries::kP3, &pool);
+  VerifyWideGamutMetadata("wide-gamut-tests/P3-sRGB-color-ring.png",
+                          Primaries::kP3, &pool);
+  // VerifyWideGamutMetadata("wide-gamut-tests/R2020-sRGB-color-bars.png",
+  //                        Primaries::k2100, &pool);
+  // VerifyWideGamutMetadata("wide-gamut-tests/R2020-sRGB-color-ring.png",
+  //                        Primaries::k2100, &pool);
+}
+
+TEST(CodecTest, TestPNM) { TestCodecPNM(); }
+TEST(CodecTest, TestPGX) { TestCodecPGX(); }
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/time.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/time.cc
new file mode 100644
index 0000000000..73d1b8f260
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/time.cc
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/time.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <ctime>
+
+#include "lib/jxl/base/os_macros.h"  // for JXL_OS_*
+
+#if JXL_OS_WIN
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif  // NOMINMAX
+#include <windows.h>
+#endif  // JXL_OS_WIN
+
+#if JXL_OS_MAC
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#endif  // JXL_OS_MAC
+
+#if JXL_OS_HAIKU
+#include <OS.h>
+#endif  // JXL_OS_HAIKU
+
+namespace jxl {
+
+double Now() {
+#if JXL_OS_WIN
+  LARGE_INTEGER counter;
+  (void)QueryPerformanceCounter(&counter);
+  LARGE_INTEGER freq;
+  (void)QueryPerformanceFrequency(&freq);
+  return double(counter.QuadPart) / freq.QuadPart;
+#elif JXL_OS_MAC
+  const auto t = mach_absolute_time();
+  // On OSX/iOS platform the elapsed time is cpu time unit
+  // We have to query the time base information to convert it back
+  // See https://developer.apple.com/library/mac/qa/qa1398/_index.html
+  static mach_timebase_info_data_t timebase;
+  if (timebase.denom == 0) {
+    (void)mach_timebase_info(&timebase);
+  }
+  return double(t) * timebase.numer / timebase.denom * 1E-9;
+#elif JXL_OS_HAIKU
+  return double(system_time_nsecs()) * 1E-9;
+#else
+  timespec t;
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  return t.tv_sec + t.tv_nsec * 1E-9;
+#endif
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/time.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/time.h
new file mode 100644
index 0000000000..c71414b877
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/time.h
@@ -0,0 +1,19 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_TIME_H_
+#define LIB_EXTRAS_TIME_H_
+
+// OS-specific function for timing.
+
+namespace jxl {
+
+// Returns current time [seconds] from a monotonic clock with unspecified
+// starting point - only suitable for computing elapsed time.
+double Now();
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_TIME_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/tone_mapping.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/tone_mapping.cc
new file mode 100644
index 0000000000..9bb1c0559c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/tone_mapping.cc
@@ -0,0 +1,160 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/tone_mapping.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/extras/tone_mapping.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+Status ToneMapFrame(const std::pair<float, float> display_nits,
+                    ImageBundle* const ib, ThreadPool* const pool) {
+  // Perform tone mapping as described in Report ITU-R BT.2390-8, section 5.4
+  // (pp. 23-25).
+  // https://www.itu.int/pub/R-REP-BT.2390-8-2020
+
+  HWY_FULL(float) df;
+  using V = decltype(Zero(df));
+
+  ColorEncoding linear_rec2020;
+  linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+  linear_rec2020.primaries = Primaries::k2100;
+  linear_rec2020.white_point = WhitePoint::kD65;
+  linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+  JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC());
+  JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, pool));
+
+  const auto eotf_inv = [&df](const V luminance) -> V {
+    return TF_PQ().EncodedFromDisplay(df, luminance * Set(df, 1. / 10000));
+  };
+
+  const V pq_mastering_min =
+      eotf_inv(Set(df, ib->metadata()->tone_mapping.min_nits));
+  const V pq_mastering_max =
+      eotf_inv(Set(df, ib->metadata()->tone_mapping.intensity_target));
+  const V pq_mastering_range = pq_mastering_max - pq_mastering_min;
+  const V inv_pq_mastering_range =
+      Set(df, 1) / (pq_mastering_max - pq_mastering_min);
+  const V min_lum = (eotf_inv(Set(df, display_nits.first)) - pq_mastering_min) *
+                    inv_pq_mastering_range;
+  const V max_lum =
+      (eotf_inv(Set(df, display_nits.second)) - pq_mastering_min) *
+      inv_pq_mastering_range;
+  const V ks = MulAdd(Set(df, 1.5f), max_lum, Set(df, -0.5f));
+  const V b = min_lum;
+
+  const V inv_one_minus_ks = Set(df, 1) / Max(Set(df, 1e-6f), Set(df, 1) - ks);
+  const auto T = [ks, inv_one_minus_ks](const V a) {
+    return (a - ks) * inv_one_minus_ks;
+  };
+  const auto P = [&T, &df, ks, max_lum](const V b) {
+    const V t_b = T(b);
+    const V t_b_2 = t_b * t_b;
+    const V t_b_3 = t_b_2 * t_b;
+    return MulAdd(
+        MulAdd(Set(df, 2), t_b_3, MulAdd(Set(df, -3), t_b_2, Set(df, 1))), ks,
+        MulAdd(t_b_3 + MulAdd(Set(df, -2), t_b_2, t_b), Set(df, 1) - ks,
+               MulAdd(Set(df, -2), t_b_3, Set(df, 3) * t_b_2) * max_lum));
+  };
+
+  const V inv_max_display_nits = Set(df, 1 / display_nits.second);
+
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, ib->ysize(), ThreadPool::SkipInit(),
+      [&](const int y, const int thread) {
+        float* const JXL_RESTRICT row_r = ib->color()->PlaneRow(0, y);
+        float* const JXL_RESTRICT row_g = ib->color()->PlaneRow(1, y);
+        float* const JXL_RESTRICT row_b = ib->color()->PlaneRow(2, y);
+        for (size_t x = 0; x < ib->xsize(); x += Lanes(df)) {
+          V red = Load(df, row_r + x);
+          V green = Load(df, row_g + x);
+          V blue = Load(df, row_b + x);
+          const V luminance = Set(df, ib->metadata()->IntensityTarget()) *
+                              (MulAdd(Set(df, 0.2627f), red,
+                                      MulAdd(Set(df, 0.6780f), green,
+                                             Set(df, 0.0593f) * blue)));
+          const V normalized_pq =
+              Min(Set(df, 1.f), (eotf_inv(luminance) - pq_mastering_min) *
+                                    inv_pq_mastering_range);
+          const V e2 =
+              IfThenElse(normalized_pq < ks, normalized_pq, P(normalized_pq));
+          const V one_minus_e2 = Set(df, 1) - e2;
+          const V one_minus_e2_2 = one_minus_e2 * one_minus_e2;
+          const V one_minus_e2_4 = one_minus_e2_2 * one_minus_e2_2;
+          const V e3 = MulAdd(b, one_minus_e2_4, e2);
+          const V e4 = MulAdd(e3, pq_mastering_range, pq_mastering_min);
+          const V new_luminance =
+              Min(Set(df, display_nits.second),
+                  ZeroIfNegative(Set(df, 10000) *
+                                 TF_PQ().DisplayFromEncoded(df, e4)));
+
+          const V ratio = new_luminance / luminance;
+          const V multiplier = ratio *
+                               Set(df, ib->metadata()->IntensityTarget()) *
+                               inv_max_display_nits;
+
+          red *= multiplier;
+          green *= multiplier;
+          blue *= multiplier;
+
+          const V gray = new_luminance * inv_max_display_nits;
+
+          // Desaturate out-of-gamut pixels.
+          V gray_mix = Zero(df);
+          for (const V val : {red, green, blue}) {
+            const V inv_val_minus_gray = Set(df, 1) / (val - gray);
+            const V bound1 = val * inv_val_minus_gray;
+            const V bound2 = bound1 - inv_val_minus_gray;
+            const V min_bound = Min(bound1, bound2);
+            const V max_bound = Max(bound1, bound2);
+            gray_mix = Clamp(gray_mix, min_bound, max_bound);
+          }
+          gray_mix = Clamp(gray_mix, Zero(df), Set(df, 1));
+          for (V* const val : {&red, &green, &blue}) {
+            *val = IfThenElse(luminance < Set(df, 1e-6), gray,
+                              MulAdd(gray_mix, gray - *val, *val));
+          }
+
+          Store(red, df, row_r + x);
+          Store(green, df, row_g + x);
+          Store(blue, df, row_b + x);
+        }
+      },
+      "ToneMap"));
+
+  return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+namespace {
+HWY_EXPORT(ToneMapFrame);
+}
+
+Status ToneMapTo(const std::pair<float, float> display_nits,
+                 CodecInOut* const io, ThreadPool* const pool) {
+  const auto tone_map_frame = HWY_DYNAMIC_DISPATCH(ToneMapFrame);
+  for (ImageBundle& ib : io->frames) {
+    JXL_RETURN_IF_ERROR(tone_map_frame(display_nits, &ib, pool));
+  }
+  io->metadata.m.SetIntensityTarget(display_nits.second);
+  return true;
+}
+
+}  // namespace jxl
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/tone_mapping.h b/codec/L2/demos/jxlEnc/third_partys/lib/extras/tone_mapping.h
new file mode 100644
index 0000000000..4f9feeccc6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/tone_mapping.h
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_TONE_MAPPING_H_
+#define LIB_EXTRAS_TONE_MAPPING_H_
+
+#include "lib/jxl/codec_in_out.h"
+
+namespace jxl {
+
+Status ToneMapTo(std::pair<float, float> display_nits, CodecInOut* io,
+                 ThreadPool* pool = nullptr);
+
+}  // namespace jxl
+
+#endif  // LIB_EXTRAS_TONE_MAPPING_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/extras/tone_mapping_gbench.cc b/codec/L2/demos/jxlEnc/third_partys/lib/extras/tone_mapping_gbench.cc
new file mode 100644
index 0000000000..c87c9fcc21
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/extras/tone_mapping_gbench.cc
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/extras/codec.h"
+#include "lib/extras/tone_mapping.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+
+static void BM_ToneMapping(benchmark::State& state) {
+  CodecInOut image;
+  const PaddedBytes image_bytes =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  JXL_CHECK(SetFromBytes(Span<const uint8_t>(image_bytes), &image));
+
+  // Convert to linear Rec. 2020 so that `ToneMapTo` doesn't have to and we
+  // mainly measure the tone mapping itself.
+  ColorEncoding linear_rec2020;
+  linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+  linear_rec2020.primaries = Primaries::k2100;
+  linear_rec2020.white_point = WhitePoint::kD65;
+  linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+  JXL_CHECK(linear_rec2020.CreateICC());
+  JXL_CHECK(image.TransformTo(linear_rec2020));
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    CodecInOut tone_mapping_input;
+    tone_mapping_input.SetFromImage(CopyImage(*image.Main().color()),
+                                    image.Main().c_current());
+    tone_mapping_input.metadata.m.SetIntensityTarget(
+        image.metadata.m.IntensityTarget());
+    state.ResumeTiming();
+
+    JXL_CHECK(ToneMapTo({0.1, 100}, &tone_mapping_input));
+  }
+
+  state.SetItemsProcessed(state.iterations() * image.xsize() * image.ysize());
+}
+BENCHMARK(BM_ToneMapping);
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/butteraugli.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/butteraugli.h
new file mode 100644
index 0000000000..f543413b8c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/butteraugli.h
@@ -0,0 +1,156 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @file butteraugli.h
+ * @brief Butteraugli API for JPEG XL.
+ */
+
+#ifndef JXL_BUTTERAUGLI_H_
+#define JXL_BUTTERAUGLI_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#include "jxl/jxl_export.h"
+#include "jxl/memory_manager.h"
+#include "jxl/parallel_runner.h"
+#include "jxl/types.h"
+
+/**
+ * Opaque structure that holds a butteraugli API.
+ *
+ * Allocated and initialized with JxlButteraugliApiCreate().
+ * Cleaned up and deallocated with JxlButteraugliApiDestroy().
+ */
+typedef struct JxlButteraugliApiStruct JxlButteraugliApi;
+
+/**
+ * Opaque structure that holds intermediary butteraugli results.
+ *
+ * Allocated and initialized with JxlButteraugliCompute().
+ * Cleaned up and deallocated with JxlButteraugliResultDestroy().
+ */
+typedef struct JxlButteraugliResultStruct JxlButteraugliResult;
+
+/**
+ * Deinitializes and frees JxlButteraugliResult instance.
+ *
+ * @param result instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlButteraugliResultDestroy(JxlButteraugliResult* result);
+
+/**
+ * Creates an instance of JxlButteraugliApi and initializes it.
+ *
+ * @p memory_manager will be used for all the library dynamic allocations made
+ * from this instance. The parameter may be NULL, in which case the default
+ * allocator will be used. See jxl/memory_manager.h for details.
+ *
+ * @param memory_manager custom allocator function. It may be NULL. The memory
+ *        manager will be copied internally.
+ * @return @c NULL if the instance can not be allocated or initialized
+ * @return pointer to initialized JxlEncoder otherwise
+ */
+JXL_EXPORT JxlButteraugliApi* JxlButteraugliApiCreate(
+    const JxlMemoryManager* memory_manager);
+
+/**
+ * Set the parallel runner for multithreading.
+ *
+ * @param api api instance.
+ * @param parallel_runner function pointer to runner for multithreading. A
+ * multithreaded runner should be set to reach fast performance.
+ * @param parallel_runner_opaque opaque pointer for parallel_runner.
+ */
+JXL_EXPORT void JxlButteraugliApiSetParallelRunner(
+    JxlButteraugliApi* api, JxlParallelRunner parallel_runner,
+    void* parallel_runner_opaque);
+
+/**
+ * Set the hf_asymmetry option for butteraugli.
+ *
+ * @param api api instance.
+ * @param v new hf_asymmetry value.
+ */
+JXL_EXPORT void JxlButteraugliApiSetHFAsymmetry(JxlButteraugliApi* api,
+                                                float v);
+
+/**
+ * Set the intensity_target option for butteraugli.
+ *
+ * @param api api instance.
+ * @param v new intensity_target value.
+ */
+JXL_EXPORT void JxlButteraugliApiSetIntensityTarget(JxlButteraugliApi* api,
+                                                    float v);
+
+/**
+ * Deinitializes and frees JxlButteraugliApi instance.
+ *
+ * @param api instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlButteraugliApiDestroy(JxlButteraugliApi* api);
+
+/**
+ * Computes intermediary butteraugli result between an original image and a
+ * distortion.
+ *
+ * @param api api instance for this computation.
+ * @param xsize width of the compared images.
+ * @param ysize height of the compared images.
+ * @param pixel_format_orig pixel format for original image.
+ * @param buffer_orig pixel data for original image.
+ * @param size_orig size of buffer_orig in bytes.
+ * @param pixel_format_dist pixel format for distortion.
+ * @param buffer_dist pixel data for distortion.
+ * @param size_dist size of buffer_dist in bytes.
+ * @return @c NULL if the results can not be computed or initialized.
+ * @return pointer to initialized and computed intermediary result.
+ */
+JXL_EXPORT JxlButteraugliResult* JxlButteraugliCompute(
+    const JxlButteraugliApi* api, uint32_t xsize, uint32_t ysize,
+    const JxlPixelFormat* pixel_format_orig, const void* buffer_orig,
+    size_t size_orig, const JxlPixelFormat* pixel_format_dist,
+    const void* buffer_dist, size_t size_dist);
+
+/**
+ * Computes butteraugli max distance based on an intermediary butteraugli
+ * result.
+ *
+ * @param result intermediary result instance.
+ * @return max distance.
+ */
+JXL_EXPORT float JxlButteraugliResultGetMaxDistance(
+    const JxlButteraugliResult* result);
+
+/**
+ * Computes a butteraugli distance based on an intermediary butteraugli result.
+ *
+ * @param result intermediary result instance.
+ * @param pnorm pnorm to calculate.
+ * @return distance using the given pnorm.
+ */
+JXL_EXPORT float JxlButteraugliResultGetDistance(
+    const JxlButteraugliResult* result, float pnorm);
+
+/**
+ * Get a pointer to the distmap in the result.
+ *
+ * @param result intermediary result instance.
+ * @param buffer will be set to the distmap. The distance value for (x,y) will
+ * be available at buffer + y * row_stride + x.
+ * @param row_stride will be set to the row stride of the distmap.
+ */
+JXL_EXPORT void JxlButteraugliResultGetDistmap(
+    const JxlButteraugliResult* result, const float** buffer,
+    uint32_t* row_stride);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_BUTTERAUGLI_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/butteraugli_cxx.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/butteraugli_cxx.h
new file mode 100644
index 0000000000..c0e93ad74a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/butteraugli_cxx.h
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @file butteraugli_cxx.h
+/// @brief C++ header-only helper for @ref butteraugli.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_BUTTERAUGLI_CXX_H_
+#define JXL_BUTTERAUGLI_CXX_H_
+
+#include <memory>
+
+#include "jxl/butteraugli.h"
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error "This a C++ only header. Use jxl/butteraugli.h from C sources."
+#endif
+
+/// Struct to call JxlButteraugliApiDestroy from the JxlButteraugliApiPtr
+/// unique_ptr.
+struct JxlButteraugliApiDestroyStruct {
+  /// Calls @ref JxlButteraugliApiDestroy() on the passed api.
+  void operator()(JxlButteraugliApi* api) { JxlButteraugliApiDestroy(api); }
+};
+
+/// std::unique_ptr<> type that calls JxlButteraugliApiDestroy() when releasing
+/// the pointer.
+///
+/// Use this helper type from C++ sources to ensure the api is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<JxlButteraugliApi, JxlButteraugliApiDestroyStruct>
+    JxlButteraugliApiPtr;
+
+/// Struct to call JxlButteraugliResultDestroy from the JxlButteraugliResultPtr
+/// unique_ptr.
+struct JxlButteraugliResultDestroyStruct {
+  /// Calls @ref JxlButteraugliResultDestroy() on the passed result object.
+  void operator()(JxlButteraugliResult* result) {
+    JxlButteraugliResultDestroy(result);
+  }
+};
+
+/// std::unique_ptr<> type that calls JxlButteraugliResultDestroy() when
+/// releasing the pointer.
+///
+/// Use this helper type from C++ sources to ensure the result object is
+/// destroyed and their internal resources released.
+typedef std::unique_ptr<JxlButteraugliResult, JxlButteraugliResultDestroyStruct>
+    JxlButteraugliResultPtr;
+
+#endif  // JXL_BUTTERAUGLI_CXX_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/codestream_header.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/codestream_header.h
new file mode 100644
index 0000000000..04e40d11d9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/codestream_header.h
@@ -0,0 +1,311 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @file codestream_header.h
+ * @brief Definitions of structs and enums for the metadata from the JPEG XL
+ * codestream headers (signature, metadata, preview dimensions, ...), excluding
+ * color encoding which is in color_encoding.h.
+ */
+
+#ifndef JXL_CODESTREAM_HEADER_H_
+#define JXL_CODESTREAM_HEADER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "jxl/color_encoding.h"
+#include "jxl/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Image orientation metadata.
+ * Values 1..8 match the EXIF definitions.
+ * The name indicates the operation to perform to transform from the encoded
+ * image to the display image.
+ */
+typedef enum {
+  JXL_ORIENT_IDENTITY = 1,
+  JXL_ORIENT_FLIP_HORIZONTAL = 2,
+  JXL_ORIENT_ROTATE_180 = 3,
+  JXL_ORIENT_FLIP_VERTICAL = 4,
+  JXL_ORIENT_TRANSPOSE = 5,
+  JXL_ORIENT_ROTATE_90_CW = 6,
+  JXL_ORIENT_ANTI_TRANSPOSE = 7,
+  JXL_ORIENT_ROTATE_90_CCW = 8,
+} JxlOrientation;
+
+/** Given type of an extra channel.
+ */
+typedef enum {
+  JXL_CHANNEL_ALPHA,
+  JXL_CHANNEL_DEPTH,
+  JXL_CHANNEL_SPOT_COLOR,
+  JXL_CHANNEL_SELECTION_MASK,
+  JXL_CHANNEL_BLACK,
+  JXL_CHANNEL_CFA,
+  JXL_CHANNEL_THERMAL,
+  JXL_CHANNEL_RESERVED0,
+  JXL_CHANNEL_RESERVED1,
+  JXL_CHANNEL_RESERVED2,
+  JXL_CHANNEL_RESERVED3,
+  JXL_CHANNEL_RESERVED4,
+  JXL_CHANNEL_RESERVED5,
+  JXL_CHANNEL_RESERVED6,
+  JXL_CHANNEL_RESERVED7,
+  JXL_CHANNEL_UNKNOWN,
+  JXL_CHANNEL_OPTIONAL
+} JxlExtraChannelType;
+
+/** The codestream preview header */
+typedef struct {
+  /** Preview width in pixels */
+  uint32_t xsize;
+
+  /** Preview height in pixels */
+  uint32_t ysize;
+} JxlPreviewHeader;
+
+/** The codestream animation header, optionally present in the beginning of
+ * the codestream, and if it is it applies to all animation frames, unlike
+ * JxlFrameHeader which applies to an individual frame.
+ */
+typedef struct {
+  /** Numerator of ticks per second of a single animation frame time unit */
+  uint32_t tps_numerator;
+
+  /** Denominator of ticks per second of a single animation frame time unit */
+  uint32_t tps_denominator;
+
+  /** Amount of animation loops, or 0 to repeat infinitely */
+  uint32_t num_loops;
+
+  /** Whether animation time codes are present at animation frames in the
+   * codestream */
+  JXL_BOOL have_timecodes;
+} JxlAnimationHeader;
+
+/** Basic image information. This information is available from the file
+ * signature and first part of the codestream header.
+ */
+typedef struct JxlBasicInfo {
+  /* TODO(lode): need additional fields for (transcoded) JPEG? For reusable
+   * fields orientation must be read from Exif APP1. For has_icc_profile: must
+   * look up where ICC profile is guaranteed to be in a JPEG file to be able to
+   * indicate this. */
+
+  /* TODO(lode): make struct packed, and/or make this opaque struct with getter
+   * functions (still separate struct from opaque decoder) */
+
+  /** Whether the codestream is embedded in the container format. If true,
+   * metadata information and extensions may be available in addition to the
+   * codestream.
+   */
+  JXL_BOOL have_container;
+
+  /** Width of the image in pixels, before applying orientation.
+   */
+  uint32_t xsize;
+
+  /** Height of the image in pixels, before applying orientation.
+   */
+  uint32_t ysize;
+
+  /** Original image color channel bit depth.
+   */
+  uint32_t bits_per_sample;
+
+  /** Original image color channel floating point exponent bits, or 0 if they
+   * are unsigned integer. For example, if the original data is half-precision
+   * (binary16) floating point, bits_per_sample is 16 and
+   * exponent_bits_per_sample is 5, and so on for other floating point
+   * precisions.
+   */
+  uint32_t exponent_bits_per_sample;
+
+  /** Upper bound on the intensity level present in the image in nits. For
+   * unsigned integer pixel encodings, this is the brightness of the largest
+   * representable value. The image does not necessarily contain a pixel
+   * actually this bright. An encoder is allowed to set 255 for SDR images
+   * without computing a histogram.
+   */
+  float intensity_target;
+
+  /** Lower bound on the intensity level present in the image. This may be
+   * loose, i.e. lower than the actual darkest pixel. When tone mapping, a
+   * decoder will map [min_nits, intensity_target] to the display range.
+   */
+  float min_nits;
+
+  /** See the description of @see linear_below.
+   */
+  JXL_BOOL relative_to_max_display;
+
+  /** The tone mapping will leave unchanged (linear mapping) any pixels whose
+   * brightness is strictly below this. The interpretation depends on
+   * relative_to_max_display. If true, this is a ratio [0, 1] of the maximum
+   * display brightness [nits], otherwise an absolute brightness [nits].
+   */
+  float linear_below;
+
+  /** Whether the data in the codestream is encoded in the original color
+   * profile that is attached to the codestream metadata header, or is
+   * encoded in an internally supported absolute color space (which the decoder
+   * can always convert to linear or non-linear sRGB or to XYB). If the original
+   * profile is used, the decoder outputs pixel data in the color space matching
+   * that profile, but doesn't convert it to any other color space. If the
+   * original profile is not used, the decoder only outputs the data as sRGB
+   * (linear if outputting to floating point, nonlinear with standard sRGB
+   * transfer function if outputting to unsigned integers) but will not convert
+   * it to to the original color profile. The decoder also does not convert to
+   * the target display color profile, but instead will always indicate which
+   * color profile the returned pixel data is encoded in when using @see
+   * JXL_COLOR_PROFILE_TARGET_DATA so that a CMS can be used to convert the
+   * data.
+   */
+  JXL_BOOL uses_original_profile;
+
+  /** Indicates a preview image exists near the beginning of the codestream.
+   * The preview itself or its dimensions are not included in the basic info.
+   */
+  JXL_BOOL have_preview;
+
+  /** Indicates animation frames exist in the codestream. The animation
+   * information is not included in the basic info.
+   */
+  JXL_BOOL have_animation;
+
+  /** Image orientation, value 1-8 matching the values used by JEITA CP-3451C
+   * (Exif version 2.3).
+   */
+  JxlOrientation orientation;
+
+  /** Number of color channels encoded in the image, this is either 1 for
+   * grayscale data, or 3 for colored data. This count does not include
+   * the alpha channel or other extra channels. To check presence of an alpha
+   * channel, such as in the case of RGBA color, check alpha_bits != 0.
+   * If and only if this is 1, the JxlColorSpace in the JxlColorEncoding is
+   * JXL_COLOR_SPACE_GRAY.
+   */
+  uint32_t num_color_channels;
+
+  /** Number of additional image channels. This includes the main alpha channel,
+   * but can also include additional channels such as depth, additional alpha
+   * channels, spot colors, and so on. Information about the extra channels
+   * can be queried with JxlDecoderGetExtraChannelInfo. The main alpha channel,
+   * if it exists, also has its information available in the alpha_bits,
+   * alpha_exponent_bits and alpha_premultiplied fields in this JxlBasicInfo.
+   */
+  uint32_t num_extra_channels;
+
+  /** Bit depth of the encoded alpha channel, or 0 if there is no alpha channel.
+   */
+  uint32_t alpha_bits;
+
+  /** Alpha channel floating point exponent bits, or 0 if they are unsigned
+   * integer.
+   */
+  uint32_t alpha_exponent_bits;
+
+  /** Whether the alpha channel is premultiplied
+   */
+  JXL_BOOL alpha_premultiplied;
+
+  /** Dimensions of encoded preview image, only used if have_preview is
+   * JXL_TRUE.
+   */
+  JxlPreviewHeader preview;
+
+  /** Animation header with global animation properties for all frames, only
+   * used if have_animation is JXL_TRUE.
+   */
+  JxlAnimationHeader animation;
+} JxlBasicInfo;
+
+/** Information for a single extra channel.
+ */
+typedef struct {
+  /** Given type of an extra channel.
+   */
+  JxlExtraChannelType type;
+
+  /** Total bits per sample for this channel.
+   */
+  uint32_t bits_per_sample;
+
+  /** Floating point exponent bits per channel, or 0 if they are unsigned
+   * integer.
+   */
+  uint32_t exponent_bits_per_sample;
+
+  /** The exponent the channel is downsampled by on each axis.
+   * TODO(lode): expand this comment to match the JPEG XL specification,
+   * specify how to upscale, how to round the size computation, and to which
+   * extra channels this field applies.
+   */
+  uint32_t dim_shift;
+
+  /** Length of the extra channel name in bytes, or 0 if no name.
+   * Excludes null termination character.
+   */
+  uint32_t name_length;
+
+  /** Whether alpha channel uses premultiplied alpha. Only applicable if
+   * type is JXL_CHANNEL_ALPHA.
+   */
+  JXL_BOOL alpha_associated;
+
+  /** Spot color of the current spot channel in linear RGBA. Only applicable if
+   * type is JXL_CHANNEL_SPOT_COLOR.
+   */
+  float spot_color[4];
+
+  /** Only applicable if type is JXL_CHANNEL_CFA.
+   * TODO(lode): add comment about the meaning of this field.
+   */
+  uint32_t cfa_channel;
+} JxlExtraChannelInfo;
+
+/* TODO(lode): add API to get the codestream header extensions. */
+/** Extensions in the codestream header. */
+typedef struct {
+  /** Extension bits. */
+  uint64_t extensions;
+} JxlHeaderExtensions;
+
+/** The header of one displayed frame. */
+typedef struct {
+  /** How long to wait after rendering in ticks. The duration in seconds of a
+   * tick is given by tps_numerator and tps_denominator in JxlAnimationHeader.
+   */
+  uint32_t duration;
+
+  /** SMPTE timecode of the current frame in form 0xHHMMSSFF, or 0. The bits are
+   * interpreted from most-significant to least-significant as hour, minute,
+   * second, and frame. If timecode is nonzero, it is strictly larger than that
+   * of a previous frame with nonzero duration. These values are only available
+   * if have_timecodes in JxlAnimationHeader is JXL_TRUE.
+   * This value is only used if have_timecodes in JxlAnimationHeader is
+   * JXL_TRUE.
+   */
+  uint32_t timecode;
+
+  /** Length of the frame name in bytes, or 0 if no name.
+   * Excludes null termination character.
+   */
+  uint32_t name_length;
+
+  /** Indicates this is the last animation frame.
+   */
+  JXL_BOOL is_last;
+} JxlFrameHeader;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_CODESTREAM_HEADER_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/color_encoding.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/color_encoding.h
new file mode 100644
index 0000000000..e86dae369a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/color_encoding.h
@@ -0,0 +1,145 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @file color_encoding.h
+ * @brief Color Encoding definitions used by JPEG XL.
+ * All CIE units are for the standard 1931 2 degree observer.
+ */
+
+#ifndef JXL_COLOR_ENCODING_H_
+#define JXL_COLOR_ENCODING_H_
+
+#include <stdint.h>
+
+#include "jxl/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Color space of the image data. */
+typedef enum {
+  /** Tristimulus RGB */
+  JXL_COLOR_SPACE_RGB,
+  /** Luminance based, the primaries in JxlColorEncoding must be ignored. This
+   * value implies that num_color_channels in JxlBasicInfo is 1, any other value
+   * implies num_color_channels is 3. */
+  JXL_COLOR_SPACE_GRAY,
+  /** XYB (opsin) color space */
+  JXL_COLOR_SPACE_XYB,
+  /** None of the other table entries describe the color space appropriately */
+  JXL_COLOR_SPACE_UNKNOWN,
+} JxlColorSpace;
+
+/** Built-in whitepoints for color encoding. Numeric values match CICP (Rec.
+ * ITU-T H.273 | ISO/IEC 23091-2:2019(E)). */
+typedef enum {
+  /** CIE Standard Illuminant D65: 0.3127, 0.3290 */
+  JXL_WHITE_POINT_D65 = 1,
+  /** Custom white point stored in JxlColorEncoding white_point. */
+  JXL_WHITE_POINT_CUSTOM = 2,
+  /** CIE Standard Illuminant E (equal-energy): 1/3, 1/3 */
+  JXL_WHITE_POINT_E = 10,
+  /** DCI-P3 from SMPTE RP 431-2: 0.314, 0.351 */
+  JXL_WHITE_POINT_DCI = 11,
+} JxlWhitePoint;
+
+/** Built-in primaries for color encoding. Numeric values match CICP (Rec. ITU-T
+ * H.273 | ISO/IEC 23091-2:2019(E)). */
+typedef enum {
+  /** The CIE xy values of the red, green and blue primaries are: 0.639998686,
+     0.330010138; 0.300003784, 0.600003357; 0.150002046, 0.059997204 */
+  JXL_PRIMARIES_SRGB = 1,
+  /** Custom white point stored in JxlColorEncoding primaries_red_xy,
+     primaries_green_xy and primaries_blue_xy. */
+  JXL_PRIMARIES_CUSTOM = 2,
+  /** As specified in Rec. ITU-R BT.2100-1 */
+  JXL_PRIMARIES_2100 = 9,
+  /** As specified in SMPTE RP 431-2 */
+  JXL_PRIMARIES_P3 = 11,
+} JxlPrimaries;
+
+/** Built-in transfer functions for color encoding. Numeric values match CICP
+ * (Rec. ITU-T H.273 | ISO/IEC 23091-2:2019(E)) unless specified otherwise. */
+typedef enum {
+  /** As specified in SMPTE RP 431-2 */
+  JXL_TRANSFER_FUNCTION_709 = 1,
+  /** None of the other table entries describe the transfer function. */
+  JXL_TRANSFER_FUNCTION_UNKNOWN = 2,
+  /** The gamma exponent is 1 */
+  JXL_TRANSFER_FUNCTION_LINEAR = 8,
+  /** As specified in IEC 61966-2-1 sRGB */
+  JXL_TRANSFER_FUNCTION_SRGB = 13,
+  /** As specified in SMPTE ST 428-1 */
+  JXL_TRANSFER_FUNCTION_PQ = 16,
+  /** As specified in SMPTE ST 428-1 */
+  JXL_TRANSFER_FUNCTION_DCI = 17,
+  /** As specified in Rec. ITU-R BT.2100-1 (HLG) */
+  JXL_TRANSFER_FUNCTION_HLG = 18,
+  /** Transfer function follows power law given by the gamma value in
+     JxlColorEncoding. Not a CICP value. */
+  JXL_TRANSFER_FUNCTION_GAMMA = 65535,
+} JxlTransferFunction;
+
+/** Renderig intent for color encoding, as specified in ISO 15076-1:2010 */
+typedef enum {
+  /** vendor-specific */
+  JXL_RENDERING_INTENT_PERCEPTUAL = 0,
+  /** media-relative */
+  JXL_RENDERING_INTENT_RELATIVE,
+  /** vendor-specific */
+  JXL_RENDERING_INTENT_SATURATION,
+  /** ICC-absolute */
+  JXL_RENDERING_INTENT_ABSOLUTE,
+} JxlRenderingIntent;
+
+/** Color encoding of the image as structured information.
+ */
+typedef struct {
+  /** Color space of the image data.
+   */
+  JxlColorSpace color_space;
+
+  /** Built-in white point. If this value is JXL_WHITE_POINT_CUSTOM, must
+   * use the numerical whitepoint values from white_point_xy.
+   */
+  JxlWhitePoint white_point;
+
+  /** Numerical whitepoint values in CIE xy space. */
+  double white_point_xy[2];
+
+  /** Built-in RGB primaries. If this value is JXL_PRIMARIES_CUSTOM, must
+   * use the numerical primaries values below. This field and the custom values
+   * below are unused and must be ignored if the color space is
+   * JXL_COLOR_SPACE_GRAY or JXL_COLOR_SPACE_XYB.
+   */
+  JxlPrimaries primaries;
+
+  /** Numerical red primary values in CIE xy space. */
+  double primaries_red_xy[2];
+
+  /** Numerical green primary values in CIE xy space. */
+  double primaries_green_xy[2];
+
+  /** Numerical blue primary values in CIE xy space. */
+  double primaries_blue_xy[2];
+
+  /** Transfer function if have_gamma is 0 */
+  JxlTransferFunction transfer_function;
+
+  /** Gamma value used when transfer_function is JXL_TRANSFER_FUNCTION_GAMMA
+   */
+  double gamma;
+
+  /** Rendering intent defined for the color profile. */
+  JxlRenderingIntent rendering_intent;
+} JxlColorEncoding;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_COLOR_ENCODING_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/decode.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/decode.h
new file mode 100644
index 0000000000..888058682e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/decode.h
@@ -0,0 +1,888 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @file decode.h
+ * @brief Decoding API for JPEG XL.
+ */
+
+#ifndef JXL_DECODE_H_
+#define JXL_DECODE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "jxl/codestream_header.h"
+#include "jxl/color_encoding.h"
+#include "jxl/jxl_export.h"
+#include "jxl/memory_manager.h"
+#include "jxl/parallel_runner.h"
+#include "jxl/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Decoder library version.
+ *
+ * @return the decoder library version as an integer:
+ * MAJOR_VERSION * 1000000 + MINOR_VERSION * 1000 + PATCH_VERSION. For example,
+ * version 1.2.3 would return 1002003.
+ */
+JXL_EXPORT uint32_t JxlDecoderVersion(void);
+
+/** The result of JxlSignatureCheck.
+ */
+typedef enum {
+  /** Not enough bytes were passed to determine if a valid signature was found.
+   */
+  JXL_SIG_NOT_ENOUGH_BYTES = 0,
+
+  /** No valid JPEGXL header was found. */
+  JXL_SIG_INVALID = 1,
+
+  /** A valid JPEG XL codestream signature was found, that is a JPEG XL image
+   * without container.
+   */
+  JXL_SIG_CODESTREAM = 2,
+
+  /** A valid container signature was found, that is a JPEG XL image embedded
+   * in a box format container.
+   */
+  JXL_SIG_CONTAINER = 3,
+} JxlSignature;
+
+/**
+ * JPEG XL signature identification.
+ *
+ * Checks if the passed buffer contains a valid JPEG XL signature. The passed @p
+ * buf of size
+ * @p size doesn't need to be a full image, only the beginning of the file.
+ *
+ * @return a flag indicating if a JPEG XL signature was found and what type.
+ *   - JXL_SIG_NOT_ENOUGH_BYTES not enough bytes were passed to determine
+ *       if a valid signature is there.
+ *   - JXL_SIG_INVALID: no valid signature found for JPEG XL decoding.
+ *   - JXL_SIG_CODESTREAM a valid JPEG XL codestream signature was found.
+ *   - JXL_SIG_CONTAINER a valid JPEG XL container signature was found.
+ */
+JXL_EXPORT JxlSignature JxlSignatureCheck(const uint8_t* buf, size_t len);
+
+/**
+ * Opaque structure that holds the JPEGXL decoder.
+ *
+ * Allocated and initialized with JxlDecoderCreate().
+ * Cleaned up and deallocated with JxlDecoderDestroy().
+ */
+typedef struct JxlDecoderStruct JxlDecoder;
+
+/**
+ * Creates an instance of JxlDecoder and initializes it.
+ *
+ * @p memory_manager will be used for all the library dynamic allocations made
+ * from this instance. The parameter may be NULL, in which case the default
+ * allocator will be used. See jpegxl/memory_manager.h for details.
+ *
+ * @param memory_manager custom allocator function. It may be NULL. The memory
+ *        manager will be copied internally.
+ * @return @c NULL if the instance can not be allocated or initialized
+ * @return pointer to initialized JxlDecoder otherwise
+ */
+JXL_EXPORT JxlDecoder* JxlDecoderCreate(const JxlMemoryManager* memory_manager);
+
+/**
+ * Re-initializes a JxlDecoder instance, so it can be re-used for decoding
+ * another image. All state and settings are reset as if the object was
+ * newly created with JxlDecoderCreate, but the memory manager is kept.
+ *
+ * @param dec instance to be re-initialized.
+ */
+JXL_EXPORT void JxlDecoderReset(JxlDecoder* dec);
+
+/**
+ * Deinitializes and frees JxlDecoder instance.
+ *
+ * @param dec instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlDecoderDestroy(JxlDecoder* dec);
+
+/**
+ * Return value for JxlDecoderProcessInput.
+ * The values above 0x40 are optional informal events that can be subscribed to,
+ * they are never returned if they have not been registered with
+ * JxlDecoderSubscribeEvents.
+ */
+typedef enum {
+  /** Function call finished successfully, or decoding is finished and there is
+   * nothing more to be done.
+   */
+  JXL_DEC_SUCCESS = 0,
+
+  /** An error occurred, for example invalid input file or out of memory.
+   * TODO(lode): add function to get error information from decoder.
+   */
+  JXL_DEC_ERROR = 1,
+
+  /** The decoder needs more input bytes to continue. Before the next
+   * JxlDecoderProcessInput call, more input data must be set, by calling
+   * JxlDecoderReleaseInput (if input was set previously) and then calling
+   * JxlDecoderSetInput. JxlDecoderReleaseInput returns how many bytes are
+   * not yet processed, before a next call to JxlDecoderProcessInput all
+   * unprocessed bytes must be provided again (the address need not match, but
+   * the contents must), and more bytes must be concatenated after the
+   * unprocessed bytes.
+   */
+  JXL_DEC_NEED_MORE_INPUT = 2,
+
+  /** The decoder is able to decode a preview image and requests setting a
+   * preview output buffer using JxlDecoderSetPreviewOutBuffer. This occurs if
+   * JXL_DEC_PREVIEW_IMAGE is requested and it is possible to decode a preview
+   * image from the codestream and the preview out buffer was not yet set. There
+   * is maximum one preview image in a codestream.
+   */
+  JXL_DEC_NEED_PREVIEW_OUT_BUFFER = 3,
+
+  /** The decoder is able to decode a DC image and requests setting a DC output
+   * buffer using JxlDecoderSetDCOutBuffer. This occurs if JXL_DEC_DC_IMAGE is
+   * requested and it is possible to decode a DC image from the codestream and
+   * the DC out buffer was not yet set. This event re-occurs for new frames
+   * if there are multiple animation frames.
+   * DEPRECATED: the DC feature in this form will be removed. You can use
+   * JxlDecoderFlushImage for progressive rendering.
+   */
+  JXL_DEC_NEED_DC_OUT_BUFFER = 4,
+
+  /** The decoder requests an output buffer to store the full resolution image,
+   * which can be set with JxlDecoderSetImageOutBuffer or with
+   * JxlDecoderSetImageOutCallback. This event re-occurs for new frames if there
+   * are multiple animation frames and requires setting an output again.
+   */
+  JXL_DEC_NEED_IMAGE_OUT_BUFFER = 5,
+
+  /** Informative event by JxlDecoderProcessInput: JPEG reconstruction buffer is
+   * too small for reconstructed JPEG codestream to fit.
+   * JxlDecoderSetJPEGBuffer must be called again to make room for remaining
+   * bytes. This event may occur multiple times after
+   * JXL_DEC_JPEG_RECONSTRUCTION
+   */
+  JXL_DEC_JPEG_NEED_MORE_OUTPUT = 6,
+
+  /** Informative event by JxlDecoderProcessInput: basic information such as
+   * image dimensions and extra channels. This event occurs max once per image.
+   */
+  JXL_DEC_BASIC_INFO = 0x40,
+
+  /** Informative event by JxlDecoderProcessInput: user extensions of the
+   * codestream header. This event occurs max once per image and always later
+   * than JXL_DEC_BASIC_INFO and earlier than any pixel data.
+   */
+  JXL_DEC_EXTENSIONS = 0x80,
+
+  /** Informative event by JxlDecoderProcessInput: color encoding or ICC
+   * profile from the codestream header. This event occurs max once per image
+   * and always later than JXL_DEC_BASIC_INFO and earlier than any pixel
+   * data.
+   */
+  JXL_DEC_COLOR_ENCODING = 0x100,
+
+  /** Informative event by JxlDecoderProcessInput: Preview image, a small
+   * frame, decoded. This event can only happen if the image has a preview
+   * frame encoded. This event occurs max once for the codestream and always
+   * later than JXL_DEC_COLOR_ENCODING and before JXL_DEC_FRAME.
+   */
+  JXL_DEC_PREVIEW_IMAGE = 0x200,
+
+  /** Informative event by JxlDecoderProcessInput: Beginning of a frame.
+   * JxlDecoderGetFrameHeader can be used at this point. A note on frames:
+   * a JPEG XL image can have internal frames that are not intended to be
+   * displayed (e.g. used for compositing a final frame), but this only returns
+   * displayed frames. A displayed frame either has an animation duration or is
+   * the only or last frame in the image. This event occurs max once per
+   * displayed frame, always later than JXL_DEC_COLOR_ENCODING, and always
+   * earlier than any pixel data. While JPEG XL supports encoding a single frame
+   * as the composition of multiple internal sub-frames also called frames, this
+   * event is not indicated for the internal frames.
+   */
+  JXL_DEC_FRAME = 0x400,
+
+  /** Informative event by JxlDecoderProcessInput: DC image, 8x8 sub-sampled
+   * frame, decoded. It is not guaranteed that the decoder will always return DC
+   * separately, but when it does it will do so before outputting the full
+   * frame. JxlDecoderSetDCOutBuffer must be used after getting the basic
+   * image information to be able to get the DC pixels, if not this return
+   * status only indicates we're past this point in the codestream. This event
+   * occurs max once per frame and always later than JXL_DEC_FRAME_HEADER
+   * and other header events and earlier than full resolution pixel data.
+   * DEPRECATED: the DC feature in this form will be removed. You can use
+   * JxlDecoderFlushImage for progressive rendering.
+   */
+  JXL_DEC_DC_IMAGE = 0x800,
+
+  /** Informative event by JxlDecoderProcessInput: full frame decoded.
+   * JxlDecoderSetImageOutBuffer must be used after getting the basic image
+   * information to be able to get the image pixels, if not this return status
+   * only indicates we're past this point in the codestream. This event occurs
+   * max once per frame and always later than JXL_DEC_DC_IMAGE.
+   */
+  JXL_DEC_FULL_IMAGE = 0x1000,
+
+  /** Informative event by JxlDecoderProcessInput: JPEG reconstruction data
+   * decoded. JxlDecoderSetJPEGBuffer may be used to set a JPEG
+   * reconstruction buffer after getting the JPEG reconstruction data. If a JPEG
+   * reconstruction buffer is set a byte stream identical to the JPEG codestream
+   * used to encode the image will be written to the JPEG reconstruction buffer
+   * instead of pixels to the image out buffer. This event occurs max once per
+   * image and always before JXL_DEC_FULL_IMAGE.
+   */
+  JXL_DEC_JPEG_RECONSTRUCTION = 0x2000,
+} JxlDecoderStatus;
+
+/** Rewinds decoder to the beginning. The same input must be given again from
+ * the beginning of the file and the decoder will emit events from the beginning
+ * again. When rewinding (as opposed to JxlDecoderReset), the decoder can keep
+ * state about the image, which it can use to skip to a requested frame more
+ * efficiently with JxlDecoderSkipFrames. After rewind,
+ * JxlDecoderSubscribeEvents can be used again, and it is feasible to leave out
+ * events that were already handled before, such as JXL_DEC_BASIC_INFO and
+ * JXL_DEC_COLOR_ENCODING, since they will provide the same information as
+ * before.
+ * @param dec decoder object
+ */
+JXL_EXPORT void JxlDecoderRewind(JxlDecoder* dec);
+
+/** Makes the decoder skip the next `amount` frames. It still needs to process
+ * the input, but will not output the frame events. It can be more efficient
+ * when skipping frames, and even more so when using this after
+ * JxlDecoderRewind. If the decoder is already processing a frame (could
+ * have emitted JXL_DEC_FRAME but not yet JXL_DEC_FULL_IMAGE), it starts
+ * skipping from the next frame. If the amount is larger than the amount of
+ * frames remaining in the image, all remaining frames are skipped. Calling this
+ * function multiple times adds the amount to skip to the already existing
+ * amount.
+ * A frame here is defined as a frame that without skipping emits events such as
+ * JXL_DEC_FRAME and JXL_FULL_IMAGE, frames that are internal to the file format
+ * but are not rendered as part of an animation, or are not the final still
+ * frame of a still image, are not counted.
+ * @param dec decoder object
+ * @param amount the amount of frames to skip
+ */
+JXL_EXPORT void JxlDecoderSkipFrames(JxlDecoder* dec, size_t amount);
+
+/**
+ * Get the default pixel format for this decoder.
+ *
+ * Requires that the decoder can produce JxlBasicInfo.
+ *
+ * @param dec JxlDecoder to query when creating the recommended pixel format.
+ * @param format JxlPixelFormat to populate with the recommended settings for
+ * the data loaded into this decoder.
+ * @return JXL_DEC_SUCCESS if no error, JXL_DEC_NEED_MORE_INPUT if the
+ * basic info isn't yet available, and JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderDefaultPixelFormat(const JxlDecoder* dec, JxlPixelFormat* format);
+
+/**
+ * Set the parallel runner for multithreading. May only be set before starting
+ * decoding.
+ *
+ * @param dec decoder object
+ * @param parallel_runner function pointer to runner for multithreading. It may
+ *        be NULL to use the default, single-threaded, runner. A multithreaded
+ *        runner should be set to reach fast performance.
+ * @param parallel_runner_opaque opaque pointer for parallel_runner.
+ * @return JXL_DEC_SUCCESS if the runner was set, JXL_DEC_ERROR
+ * otherwise (the previous runner remains set).
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner,
+                            void* parallel_runner_opaque);
+
+/**
+ * Returns a hint indicating how many more bytes the decoder is expected to
+ * need to make JxlDecoderGetBasicInfo available after the next
+ * JxlDecoderProcessInput call. This is a suggested large enough value for
+ * the amount of bytes to provide in the next JxlDecoderSetInput call, but it is
+ * not guaranteed to be an upper bound nor a lower bound.
+ * Can be used before the first JxlDecoderProcessInput call, and is correct
+ * the first time in most cases. If not, JxlDecoderSizeHintBasicInfo can be
+ * called again to get an updated hint.
+ *
+ * @param dec decoder object
+ * @return the size hint in bytes if the basic info is not yet fully decoded.
+ * @return 0 when the basic info is already available.
+ */
+JXL_EXPORT size_t JxlDecoderSizeHintBasicInfo(const JxlDecoder* dec);
+
+/** Select for which informative events (JXL_DEC_BASIC_INFO, etc...) the
+ * decoder should return with a status. It is not required to subscribe to any
+ * events, data can still be requested from the decoder as soon as it available.
+ * By default, the decoder is subscribed to no events (events_wanted == 0), and
+ * the decoder will then only return when it cannot continue because it needs
+ * more input data or more output buffer. This function may only be be called
+ * before using JxlDecoderProcessInput
+ *
+ * @param dec decoder object
+ * @param events_wanted bitfield of desired events.
+ * @return JXL_DEC_SUCCESS if no error, JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSubscribeEvents(JxlDecoder* dec,
+                                                      int events_wanted);
+
+/** Enables or disables preserving of original orientation. Some images are
+ * encoded with an orientation tag indicating the image is rotated and/or
+ * mirrored (here called the original orientation).
+ *
+ * *) If keep_orientation is JXL_FALSE (the default): the decoder will perform
+ * work to undo the transformation. This ensures the decoded pixels will not
+ * be rotated or mirrored. The decoder will always set the orientation field
+ * of the JxlBasicInfo to JXL_ORIENT_IDENTITY to match the returned pixel data.
+ * The decoder may also swap xsize and ysize in the JxlBasicInfo compared to the
+ * values inside of the codestream, to correctly match the decoded pixel data,
+ * e.g. when a 90 degree rotation was performed.
+ *
+ * *) If this option is JXL_TRUE: then the image is returned as-is, which may be
+ * rotated or mirrored, and the user must check the orientation field in
+ * JxlBasicInfo after decoding to correctly interpret the decoded pixel data.
+ * This may be faster to decode since the decoder doesn't have to apply the
+ * transformation, but can cause wrong display of the image if the orientation
+ * tag is not correctly taken into account by the user.
+ *
+ * By default, this option is disabled, and the decoder automatically corrects
+ * the orientation.
+ *
+ * This function must be called at the beginning, before decoding is performed.
+ *
+ * @see JxlBasicInfo for the orientation field, and @see JxlOrientation for the
+ * possible values.
+ *
+ * @param dec decoder object
+ * @param keep_orientation JXL_TRUE to enable, JXL_FALSE to disable.
+ * @return JXL_DEC_SUCCESS if no error, JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetKeepOrientation(JxlDecoder* dec, JXL_BOOL keep_orientation);
+
+/**
+ * Decodes JPEG XL file using the available bytes. Requires input has been
+ * set with JxlDecoderSetInput. After JxlDecoderProcessInput, input can
+ * optionally be released with JxlDecoderReleaseInput and then set again to
+ * next bytes in the stream. JxlDecoderReleaseInput returns how many bytes are
+ * not yet processed, before a next call to JxlDecoderProcessInput all
+ * unprocessed bytes must be provided again (the address need not match, but the
+ * contents must), and more bytes may be concatenated after the unprocessed
+ * bytes.
+ *
+ * The returned status indicates whether the decoder needs more input bytes, or
+ * more output buffer for a certain type of output data. No matter what the
+ * returned status is (other than JXL_DEC_ERROR), new information, such as
+ * JxlDecoderGetBasicInfo, may have become available after this call. When
+ * the return value is not JXL_DEC_ERROR or JXL_DEC_SUCCESS, the decoding
+ * requires more JxlDecoderProcessInput calls to continue.
+ *
+ * @param dec decoder object
+ * @return JXL_DEC_SUCCESS when decoding finished and all events handled.
+ * @return JXL_DEC_ERROR when decoding failed, e.g. invalid codestream.
+ * TODO(lode) document the input data mechanism
+ * @return JXL_DEC_NEED_MORE_INPUT more input data is necessary.
+ * @return JXL_DEC_BASIC_INFO when basic info such as image dimensions is
+ * available and this informative event is subscribed to.
+ * @return JXL_DEC_EXTENSIONS when JPEG XL codestream user extensions are
+ * available and this informative event is subscribed to.
+ * @return JXL_DEC_COLOR_ENCODING when color profile information is
+ * available and this informative event is subscribed to.
+ * @return JXL_DEC_PREVIEW_IMAGE when preview pixel information is available and
+ * output in the preview buffer.
+ * @return JXL_DEC_DC_IMAGE when DC pixel information (8x8 downscaled version
+ * of the image) is available and output in the DC buffer.
+ * @return JXL_DEC_FULL_IMAGE when all pixel information at highest detail is
+ * available and has been output in the pixel buffer.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderProcessInput(JxlDecoder* dec);
+
+/**
+ * Sets input data for JxlDecoderProcessInput. The data is owned by the caller
+ * and may be used by the decoder until JxlDecoderReleaseInput is called or
+ * the decoder is destroyed or reset so must be kept alive until then.
+ * @param dec decoder object
+ * @param data pointer to next bytes to read from
+ * @param size amount of bytes available starting from data
+ * @return JXL_DEC_ERROR if input was already set without releasing,
+ * JXL_DEC_SUCCESS otherwise
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetInput(JxlDecoder* dec,
+                                               const uint8_t* data,
+                                               size_t size);
+
+/**
+ * Releases input which was provided with JxlDecoderSetInput. Between
+ * JxlDecoderProcessInput and JxlDecoderReleaseInput, the user may not alter
+ * the data in the buffer. Calling JxlDecoderReleaseInput is required whenever
+ * any input is already set and new input needs to be added with
+ * JxlDecoderSetInput, but is not required before JxlDecoderDestroy or
+ * JxlDecoderReset. Calling JxlDecoderReleaseInput when no input is set is
+ * not an error and returns 0.
+ * @param dec decoder object
+ * @return the amount of bytes the decoder has not yet processed that are
+ * still remaining in the data set by JxlDecoderSetInput, or 0 if no input is
+ * set or JxlDecoderReleaseInput was already called. For a next call to
+ * JxlDecoderProcessInput, the buffer must start with these unprocessed bytes.
+ * This value doesn't provide information about how many bytes the decoder
+ * truly processed internally or how large the original JPEG XL codestream or
+ * file are.
+ */
+JXL_EXPORT size_t JxlDecoderReleaseInput(JxlDecoder* dec);
+
+/**
+ * Outputs the basic image information, such as image dimensions, bit depth and
+ * all other JxlBasicInfo fields, if available.
+ *
+ * @param dec decoder object
+ * @param info struct to copy the information into, or NULL to only check
+ * whether the information is available through the return value.
+ * @return JXL_DEC_SUCCESS if the value is available,
+ *    JXL_DEC_NEED_MORE_INPUT if not yet available, JXL_DEC_ERROR in case
+ *    of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetBasicInfo(const JxlDecoder* dec,
+                                                   JxlBasicInfo* info);
+
+/**
+ * Outputs information for extra channel at the given index. The index must be
+ * smaller than num_extra_channels in the associated JxlBasicInfo.
+ *
+ * @param dec decoder object
+ * @param index index of the extra channel to query.
+ * @param info struct to copy the information into, or NULL to only check
+ * whether the information is available through the return value.
+ * @return JXL_DEC_SUCCESS if the value is available,
+ *    JXL_DEC_NEED_MORE_INPUT if not yet available, JXL_DEC_ERROR in case
+ *    of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelInfo(
+    const JxlDecoder* dec, size_t index, JxlExtraChannelInfo* info);
+
+/**
+ * Outputs name for extra channel at the given index in UTF-8. The index must be
+ * smaller than num_extra_channels in the associated JxlBasicInfo. The buffer
+ * for name must have at least name_length + 1 bytes allocated, gotten from
+ * the associated JxlExtraChannelInfo.
+ *
+ * @param dec decoder object
+ * @param index index of the extra channel to query.
+ * @param name buffer to copy the name into
+ * @param size size of the name buffer in bytes
+ * @return JXL_DEC_SUCCESS if the value is available,
+ *    JXL_DEC_NEED_MORE_INPUT if not yet available, JXL_DEC_ERROR in case
+ *    of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelName(const JxlDecoder* dec,
+                                                          size_t index,
+                                                          char* name,
+                                                          size_t size);
+
+/** Defines which color profile to get: the profile from the codestream
+ * metadata header, which represents the color profile of the original image,
+ * or the color profile from the pixel data received by the decoder. Both are
+ * the same if the basic has uses_original_profile set.
+ */
+typedef enum {
+  /** Get the color profile of the original image from the metadata..
+   */
+  JXL_COLOR_PROFILE_TARGET_ORIGINAL = 0,
+
+  /** Get the color profile of the pixel data the decoder outputs. */
+  JXL_COLOR_PROFILE_TARGET_DATA = 1,
+} JxlColorProfileTarget;
+
+/**
+ * Outputs the color profile as JPEG XL encoded structured data, if available.
+ * This is an alternative to an ICC Profile, which can represent a more limited
+ * amount of color spaces, but represents them exactly through enum values.
+ *
+ * It is often possible to use JxlDecoderGetColorAsICCProfile as an
+ * alternative anyway. The following scenarios are possible:
+ * - The JPEG XL image has an attached ICC Profile, in that case, the encoded
+ *   structured data is not available, this function will return an error status
+ *   and you must use JxlDecoderGetColorAsICCProfile instead.
+ * - The JPEG XL image has an encoded structured color profile, and it
+ *   represents an RGB or grayscale color space. This function will return it.
+ *   You can still use JxlDecoderGetColorAsICCProfile as well as an
+ *   alternative if desired, though depending on which RGB color space is
+ *   represented, the ICC profile may be a close approximation. It is also not
+ *   always feasible to deduce from an ICC profile which named color space it
+ *   exactly represents, if any, as it can represent any arbitrary space.
+ * - The JPEG XL image has an encoded structured color profile, and it indicates
+ *   an unknown or xyb color space. In that case,
+ *   JxlDecoderGetColorAsICCProfile is not available.
+ *
+ * If you wish to render the image using a system that supports ICC profiles,
+ * use JxlDecoderGetColorAsICCProfile first. If you're looking for a specific
+ * color space possibly indicated in the JPEG XL image, use
+ * JxlDecoderGetColorAsEncodedProfile first.
+ *
+ * @param dec decoder object
+ * @param format pixel format to output the data to. Only used for
+ * JXL_COLOR_PROFILE_TARGET_DATA, may be nullptr otherwise.
+ * @param target whether to get the original color profile from the metadata
+ *     or the color profile of the decoded pixels.
+ * @param color_encoding struct to copy the information into, or NULL to only
+ * check whether the information is available through the return value.
+ * @return JXL_DEC_SUCCESS if the data is available and returned,
+ *    JXL_DEC_NEED_MORE_INPUT if not yet available, JXL_DEC_ERROR in case
+ *    the encuded structured color profile does not exist in the codestream.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetColorAsEncodedProfile(
+    const JxlDecoder* dec, const JxlPixelFormat* format,
+    JxlColorProfileTarget target, JxlColorEncoding* color_encoding);
+
+/**
+ * Outputs the size in bytes of the ICC profile returned by
+ * JxlDecoderGetColorAsICCProfile, if available, or indicates there is none
+ * available. In most cases, the image will have an ICC profile available, but
+ * if it does not, JxlDecoderGetColorAsEncodedProfile must be used instead.
+ * @see JxlDecoderGetColorAsEncodedProfile for more information. The ICC
+ * profile is either the exact ICC profile attached to the codestream metadata,
+ * or a close approximation generated from JPEG XL encoded structured data,
+ * depending of what is encoded in the codestream.
+ *
+ * @param dec decoder object
+ * @param format pixel format to output the data to. Only used for
+ * JXL_COLOR_PROFILE_TARGET_DATA, may be nullptr otherwise.
+ * @param target whether to get the original color profile from the metadata
+ *     or the color profile of the decoded pixels.
+ * @param size variable to output the size into, or NULL to only check the
+ *    return status.
+ * @return JXL_DEC_SUCCESS if the ICC profile is available,
+ *    JXL_DEC_NEED_MORE_INPUT if the decoder has not yet received enough
+ *    input data to determine whether an ICC profile is available or what its
+ *    size is, JXL_DEC_ERROR in case the ICC profile is not available and
+ *    cannot be generated.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderGetICCProfileSize(const JxlDecoder* dec, const JxlPixelFormat* format,
+                            JxlColorProfileTarget target, size_t* size);
+
+/**
+ * Outputs ICC profile if available. The profile is only available if
+ * JxlDecoderGetICCProfileSize returns success. The output buffer must have
+ * at least as many bytes as given by JxlDecoderGetICCProfileSize.
+ *
+ * @param dec decoder object
+ * @param format pixel format to output the data to. Only used for
+ * JXL_COLOR_PROFILE_TARGET_DATA, may be nullptr otherwise.
+ * @param target whether to get the original color profile from the metadata
+ *     or the color profile of the decoded pixels.
+ * @param icc_profile buffer to copy the ICC profile into
+ * @param size size of the icc_profile buffer in bytes
+ * @return JXL_DEC_SUCCESS if the profile was successfully returned is
+ *    available, JXL_DEC_NEED_MORE_INPUT if not yet available,
+ *    JXL_DEC_ERROR if the profile doesn't exist or the output size is not
+ *    large enough.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetColorAsICCProfile(
+    const JxlDecoder* dec, const JxlPixelFormat* format,
+    JxlColorProfileTarget target, uint8_t* icc_profile, size_t size);
+
+/** Sets the color profile to use for JXL_COLOR_PROFILE_TARGET_DATA for the
+ * special case when the decoder has a choice. This only has effect for a JXL
+ * image where uses_original_profile is false, and the original color profile is
+ * encoded as an ICC color profile rather than a JxlColorEncoding with known
+ * enum values. In most other cases (uses uses_original_profile is true, or the
+ * color profile is already given as a JxlColorEncoding), this setting is
+ * ignored and the decoder uses a profile related to the image.
+ * No matter what, the JXL_COLOR_PROFILE_TARGET_DATA must still be queried to
+ * know the actual data format of the decoded pixels after decoding.
+ *
+ * The intended use case of this function is for cases where you are using
+ * a color management system to parse the original ICC color profile
+ * (JXL_COLOR_PROFILE_TARGET_ORIGINAL), from this you know that the ICC
+ * profile represents one of the color profiles supported by JxlColorEncoding
+ * (such as sRGB, PQ or HLG): in that case it is beneficial (but not necessary)
+ * to use JxlDecoderSetPreferredColorProfile to match the parsed profile. The
+ * JXL decoder has no color management system built in, but can convert XYB
+ * color to any of the ones supported by JxlColorEncoding.
+ *
+ * Can only be set after the JXL_DEC_COLOR_ENCODING event occurred and before
+ * any other event occurred, and can affect the result of
+ * JXL_COLOR_PROFILE_TARGET_DATA (but not of JXL_COLOR_PROFILE_TARGET_ORIGINAL),
+ * so should be used after getting JXL_COLOR_PROFILE_TARGET_ORIGINAL but before
+ * getting JXL_COLOR_PROFILE_TARGET_DATA. The color_encoding must be grayscale
+ * if num_color_channels from the basic info is 1, RGB if num_color_channels
+ * from the basic info is 3.
+ *
+ * If JxlDecoderSetPreferredColorProfile is not used, then for images for which
+ * uses_original_profile is false and with ICC color profile, the decoder will
+ * choose linear sRGB for color images, linear grayscale for grayscale images.
+ * This function only sets a preference, since for other images the decoder has
+ * no choice what color profile to use, it is determined by the image.
+ *
+ * @param dec decoder object
+ * @param color_encoding the default color encoding to set
+ * @return JXL_DEC_SUCCESS if the preference was set successfully, JXL_DEC_ERROR
+ *    otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreferredColorProfile(
+    JxlDecoder* dec, const JxlColorEncoding* color_encoding);
+
+/**
+ * Returns the minimum size in bytes of the preview image output pixel buffer
+ * for the given format. This is the buffer for JxlDecoderSetPreviewOutBuffer.
+ * Requires the preview header information is available in the decoder.
+ *
+ * @param dec decoder object
+ * @param format format of pixels
+ * @param size output value, buffer size in bytes
+ * @return JXL_DEC_SUCCESS on success, JXL_DEC_ERROR on error, such as
+ *    information not available yet.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size);
+
+/**
+ * Sets the buffer to write the small resolution preview image
+ * to. The size of the buffer must be at least as large as given by
+ * JxlDecoderPreviewOutBufferSize. The buffer follows the format described by
+ * JxlPixelFormat. The preview image dimensions are given by the
+ * JxlPreviewHeader. The buffer is owned by the caller.
+ *
+ * @param dec decoder object
+ * @param format format of pixels. Object owned by user and its contents are
+ * copied internally.
+ * @param buffer buffer type to output the pixel data to
+ * @param size size of buffer in bytes
+ * @return JXL_DEC_SUCCESS on success, JXL_DEC_ERROR on error, such as
+ * size too small.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreviewOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size);
+
+/**
+ * Outputs the information from the frame, such as duration when have_animation.
+ * This function can be called when JXL_DEC_FRAME occurred for the current
+ * frame, even when have_animation in the JxlBasicInfo is JXL_FALSE.
+ *
+ * @param dec decoder object
+ * @param header struct to copy the information into, or NULL to only check
+ * whether the information is available through the return value.
+ * @return JXL_DEC_SUCCESS if the value is available,
+ *    JXL_DEC_NEED_MORE_INPUT if not yet available, JXL_DEC_ERROR in case
+ *    of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetFrameHeader(const JxlDecoder* dec,
+                                                     JxlFrameHeader* header);
+
+/**
+ * Outputs name for the current frame. The buffer
+ * for name must have at least name_length + 1 bytes allocated, gotten from
+ * the associated JxlFrameHeader.
+ *
+ * @param dec decoder object
+ * @param name buffer to copy the name into
+ * @param size size of the name buffer in bytes, including zero termination
+ *    character, so this must be at least JxlFrameHeader.name_length + 1.
+ * @return JXL_DEC_SUCCESS if the value is available,
+ *    JXL_DEC_NEED_MORE_INPUT if not yet available, JXL_DEC_ERROR in case
+ *    of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetFrameName(const JxlDecoder* dec,
+                                                   char* name, size_t size);
+
+/**
+ * Returns the minimum size in bytes of the DC image output buffer
+ * for the given format. This is the buffer for JxlDecoderSetDCOutBuffer.
+ * Requires the basic image information is available in the decoder.
+ *
+ * @param dec decoder object
+ * @param format format of pixels
+ * @param size output value, buffer size in bytes
+ * @return JXL_DEC_SUCCESS on success, JXL_DEC_ERROR on error, such as
+ *    information not available yet.
+ *
+ * DEPRECATED: the DC feature in this form will be removed. You can use
+ * JxlDecoderFlushImage for progressive rendering.
+ */
+JXL_EXPORT JXL_DEPRECATED JxlDecoderStatus JxlDecoderDCOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size);
+
+/**
+ * Sets the buffer to write the lower resolution (8x8 sub-sampled) DC image
+ * to. The size of the buffer must be at least as large as given by
+ * JxlDecoderDCOutBufferSize. The buffer follows the format described by
+ * JxlPixelFormat. The DC image has dimensions ceil(xsize / 8) * ceil(ysize /
+ * 8). The buffer is owned by the caller.
+ *
+ * @param dec decoder object
+ * @param format format of pixels. Object owned by user and its contents are
+ * copied internally.
+ * @param buffer buffer type to output the pixel data to
+ * @param size size of buffer in bytes
+ * @return JXL_DEC_SUCCESS on success, JXL_DEC_ERROR on error, such as
+ * size too small.
+ *
+ * DEPRECATED: the DC feature in this form will be removed. You can use
+ * JxlDecoderFlushImage for progressive rendering.
+ */
+JXL_EXPORT JXL_DEPRECATED JxlDecoderStatus JxlDecoderSetDCOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size);
+
+/**
+ * Returns the minimum size in bytes of the image output pixel buffer for the
+ * given format. This is the buffer for JxlDecoderSetImageOutBuffer. Requires
+ * the basic image information is available in the decoder.
+ *
+ * @param dec decoder object
+ * @param format format of the pixels.
+ * @param size output value, buffer size in bytes
+ * @return JXL_DEC_SUCCESS on success, JXL_DEC_ERROR on error, such as
+ *    information not available yet.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderImageOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size);
+
+/**
+ * Sets the buffer to write the full resolution image to. This can be set when
+ * the JXL_DEC_FRAME event occurs, must be set when the
+ * JXL_DEC_NEED_IMAGE_OUT_BUFFER event occurs, and applies only for the current
+ * frame. The size of the buffer must be at least as large as given by
+ * JxlDecoderImageOutBufferSize. The buffer follows the format described by
+ * JxlPixelFormat. The buffer is owned by the caller.
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user and its contents
+ * are copied internally.
+ * @param buffer buffer type to output the pixel data to
+ * @param size size of buffer in bytes
+ * @return JXL_DEC_SUCCESS on success, JXL_DEC_ERROR on error, such as
+ * size too small.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetImageOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size);
+
+/**
+ * Callback function type for JxlDecoderSetImageOutCallback. @see
+ * JxlDecoderSetImageOutCallback for usage.
+ *
+ * The callback bay be called simultaneously by different threads when using a
+ * threaded parallel runner, on different pixels.
+ *
+ * @param opaque optional user data, as given to JxlDecoderSetImageOutCallback.
+ * @param x horizontal position of leftmost pixel of the pixel data.
+ * @param y vertical position of the pixel data.
+ * @param num_pixels amount of pixels included in the pixel data, horizontally.
+ * This is not the same as xsize of the full image, it may be smaller.
+ * @param pixels pixel data as a horizontal stripe, in the format passed to
+ * JxlDecoderSetImageOutCallback. The memory is not owned by the user, and is
+ * only valid during the time the callback is running.
+ */
+typedef void (*JxlImageOutCallback)(void* opaque, size_t x, size_t y,
+                                    size_t num_pixels, const void* pixels);
+
+/**
+ * Sets pixel output callback. This is an alternative to
+ * JxlDecoderSetImageOutBuffer. This can be set when the JXL_DEC_FRAME event
+ * occurs, must be set when the JXL_DEC_NEED_IMAGE_OUT_BUFFER event occurs, and
+ * applies only for the current frame. Only one of JxlDecoderSetImageOutBuffer
+ * or JxlDecoderSetImageOutCallback may be used for the same frame, not both at
+ * the same time.
+ *
+ * The callback will be called multiple times, to receive the image
+ * data in small chunks. The callback receives a horizontal stripe of pixel
+ * data, 1 pixel high, xsize pixels wide, called a scanline. The xsize here is
+ * not the same as the full image width, the scanline may be a partial section,
+ * and xsize may differ between calls. The user can then process and/or copy the
+ * partial scanline to an image buffer. The callback bay be called
+ * simultaneously by different threads when using a threaded parallel runner, on
+ * different pixels.
+ *
+ * If JxlDecoderFlushImage is not used, then each pixel will be visited exactly
+ * once by the different callback calls, during processing with one or more
+ * JxlDecoderProcessInput calls. These pixels are decoded to full detail, they
+ * are not part of a lower resolution or lower quality progressive pass, but the
+ * final pass.
+ *
+ * If JxlDecoderFlushImage is used, then in addition each pixel will be visited
+ * zero or one times during the blocking JxlDecoderFlushImage call. Pixels
+ * visited as a result of JxlDecoderFlushImage may represent a lower resolution
+ * or lower quality intermediate progressive pass of the image. Any visited
+ * pixel will be of a quality at least as good or better than previous visits of
+ * this pixel. A pixel may be visited zero times if it cannot be decoded yet
+ * or if it was already decoded to full precision (this behavior is not
+ * guaranteed).
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user and its contents
+ * are copied internally.
+ * @param callback the callback function receiving partial scanlines of pixel
+ * data.
+ * @param opaque optional user data, which will be passed on to the callback,
+ * may be NULL.
+ * @return JXL_DEC_SUCCESS on success, JXL_DEC_ERROR on error, such as
+ * JxlDecoderSetImageOutBuffer already set.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetImageOutCallback(JxlDecoder* dec, const JxlPixelFormat* format,
+                              JxlImageOutCallback callback, void* opaque);
+
+/**
+ * Sets output buffer for reconstructed JPEG codestream.
+ *
+ * The data is owned by the caller
+ * and may be used by the decoder until JxlDecoderReleaseJPEGBuffer is called or
+ * the decoder is destroyed or reset so must be kept alive until then.
+ *
+ * @param dec decoder object
+ * @param data pointer to next bytes to write to
+ * @param size amount of bytes available starting from data
+ * @return JXL_DEC_ERROR if input was already set without releasing,
+ * JXL_DEC_SUCCESS otherwise
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetJPEGBuffer(JxlDecoder* dec,
+                                                    uint8_t* data, size_t size);
+
+/**
+ * Releases buffer which was provided with JxlDecoderSetJPEGBuffer.
+ *
+ * Calling JxlDecoderReleaseJPEGBuffer is required whenever
+ * a buffer is already set and a new buffer needs to be added with
+ * JxlDecoderSetJPEGBuffer, but is not required before JxlDecoderDestroy or
+ * JxlDecoderReset.
+ *
+ * Calling JxlDecoderReleaseJPEGBuffer when no input is set is
+ * not an error and returns 0.
+ *
+ * @param dec decoder object
+ * @return the amount of bytes the decoder has not yet written to of the data
+ * set by JxlDecoderSetJPEGBuffer, or 0 if no buffer is set or
+ * JxlDecoderReleaseJPEGBuffer was already called.
+ */
+JXL_EXPORT size_t JxlDecoderReleaseJPEGBuffer(JxlDecoder* dec);
+
+/* TODO(lode): add way to output extra channels */
+
+/**
+ * Outputs progressive step towards the decoded image so far when only partial
+ * input was received. If the flush was successful, the buffer set with
+ * JxlDecoderSetImageOutBuffer will contain partial image data.
+ *
+ * Can be called when JxlDecoderProcessInput returns JXL_DEC_NEED_MORE_INPUT,
+ * after the JXL_DEC_FRAME event already occurred and before the
+ * JXL_DEC_FULL_IMAGE event occurred for a frame.
+ *
+ * @param dec decoder object
+ * @return JXL_DEC_SUCCESS if image data was flushed to the output buffer, or
+ * JXL_DEC_ERROR when no flush was done, e.g. if not enough image data was
+ * available yet even for flush, or no output buffer was set yet. An error is
+ * not fatal, it only indicates no flushed image is available now, regular,
+ *  decoding can still be performed.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_DECODE_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/decode_cxx.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/decode_cxx.h
new file mode 100644
index 0000000000..4e7315289c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/decode_cxx.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @file decode_cxx.h
+/// @brief C++ header-only helper for @ref decode.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_DECODE_CXX_H_
+#define JXL_DECODE_CXX_H_
+
+#include <memory>
+
+#include "jxl/decode.h"
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error "This a C++ only header. Use jxl/decode.h from C sources."
+#endif
+
+/// Struct to call JxlDecoderDestroy from the JxlDecoderPtr unique_ptr.
+struct JxlDecoderDestroyStruct {
+  /// Calls @ref JxlDecoderDestroy() on the passed decoder.
+  void operator()(JxlDecoder* decoder) { JxlDecoderDestroy(decoder); }
+};
+
+/// std::unique_ptr<> type that calls JxlDecoderDestroy() when releasing the
+/// decoder.
+///
+/// Use this helper type from C++ sources to ensure the decoder is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<JxlDecoder, JxlDecoderDestroyStruct> JxlDecoderPtr;
+
+/// Creates an instance of JxlDecoder into a JxlDecoderPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call JxlDecoderDestroy() when
+/// releasing the pointer. See @ref JxlDecoderCreate for details on the
+/// instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+///        manager will be copied internally.
+/// @return a @c NULL JxlDecoderPtr if the instance can not be allocated or
+///         initialized
+/// @return initialized JxlDecoderPtr instance otherwise.
+static inline JxlDecoderPtr JxlDecoderMake(
+    const JxlMemoryManager* memory_manager) {
+  return JxlDecoderPtr(JxlDecoderCreate(memory_manager));
+}
+
+#endif  // JXL_DECODE_CXX_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/encode.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/encode.h
new file mode 100644
index 0000000000..3e0882cc76
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/encode.h
@@ -0,0 +1,379 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @file encode.h
+ * @brief Encoding API for JPEG XL.
+ */
+
+#ifndef JXL_ENCODE_H_
+#define JXL_ENCODE_H_
+
+#include "jxl/decode.h"
+#include "jxl/jxl_export.h"
+#include "jxl/memory_manager.h"
+#include "jxl/parallel_runner.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Encoder library version.
+ *
+ * @return the encoder library version as an integer:
+ * MAJOR_VERSION * 1000000 + MINOR_VERSION * 1000 + PATCH_VERSION. For example,
+ * version 1.2.3 would return 1002003.
+ */
+JXL_EXPORT uint32_t JxlEncoderVersion(void);
+
+/**
+ * Opaque structure that holds the JPEG XL encoder.
+ *
+ * Allocated and initialized with JxlEncoderCreate().
+ * Cleaned up and deallocated with JxlEncoderDestroy().
+ */
+typedef struct JxlEncoderStruct JxlEncoder;
+
+/**
+ * Opaque structure that holds frame specific encoding options for a JPEG XL
+ * encoder.
+ *
+ * Allocated and initialized with JxlEncoderOptionsCreate().
+ * Cleaned up and deallocated when the encoder is destroyed with
+ * JxlEncoderDestroy().
+ */
+typedef struct JxlEncoderOptionsStruct JxlEncoderOptions;
+
+/**
+ * Return value for multiple encoder functions.
+ */
+typedef enum {
+  /** Function call finished successfully, or encoding is finished and there is
+   * nothing more to be done.
+   */
+  JXL_ENC_SUCCESS = 0,
+
+  /** An error occurred, for example out of memory.
+   */
+  JXL_ENC_ERROR = 1,
+
+  /** The encoder needs more output buffer to continue encoding.
+   */
+  JXL_ENC_NEED_MORE_OUTPUT = 2,
+
+  /** The encoder doesn't (yet) support this.
+   */
+  JXL_ENC_NOT_SUPPORTED = 3,
+
+} JxlEncoderStatus;
+
+/**
+ * Creates an instance of JxlEncoder and initializes it.
+ *
+ * @p memory_manager will be used for all the library dynamic allocations made
+ * from this instance. The parameter may be NULL, in which case the default
+ * allocator will be used. See jpegxl/memory_manager.h for details.
+ *
+ * @param memory_manager custom allocator function. It may be NULL. The memory
+ *        manager will be copied internally.
+ * @return @c NULL if the instance can not be allocated or initialized
+ * @return pointer to initialized JxlEncoder otherwise
+ */
+JXL_EXPORT JxlEncoder* JxlEncoderCreate(const JxlMemoryManager* memory_manager);
+
+/**
+ * Re-initializes a JxlEncoder instance, so it can be re-used for encoding
+ * another image. All state and settings are reset as if the object was
+ * newly created with JxlEncoderCreate, but the memory manager is kept.
+ *
+ * @param enc instance to be re-initialized.
+ */
+JXL_EXPORT void JxlEncoderReset(JxlEncoder* enc);
+
+/**
+ * Deinitializes and frees JxlEncoder instance.
+ *
+ * @param enc instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlEncoderDestroy(JxlEncoder* enc);
+
+/**
+ * Set the parallel runner for multithreading. May only be set before starting
+ * encoding.
+ *
+ * @param enc encoder object.
+ * @param parallel_runner function pointer to runner for multithreading. It may
+ *        be NULL to use the default, single-threaded, runner. A multithreaded
+ *        runner should be set to reach fast performance.
+ * @param parallel_runner_opaque opaque pointer for parallel_runner.
+ * @return JXL_ENC_SUCCESS if the runner was set, JXL_ENC_ERROR
+ * otherwise (the previous runner remains set).
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderSetParallelRunner(JxlEncoder* enc, JxlParallelRunner parallel_runner,
+                            void* parallel_runner_opaque);
+
+/**
+ * Encodes JPEG XL file using the available bytes. @p *avail_out indicates how
+ * many output bytes are available, and @p *next_out points to the input bytes.
+ * *avail_out will be decremented by the amount of bytes that have been
+ * processed by the encoder and *next_out will be incremented by the same
+ * amount, so *next_out will now point at the amount of *avail_out unprocessed
+ * bytes.
+ *
+ * The returned status indicates whether the encoder needs more output bytes.
+ * When the return value is not JXL_ENC_ERROR or JXL_ENC_SUCCESS, the encoding
+ * requires more JxlEncoderProcessOutput calls to continue.
+ *
+ * @param enc encoder object.
+ * @param next_out pointer to next bytes to write to.
+ * @param avail_out amount of bytes available starting from *next_out.
+ * @return JXL_ENC_SUCCESS when encoding finished and all events handled.
+ * @return JXL_ENC_ERROR when encoding failed, e.g. invalid input.
+ * @return JXL_ENC_NEED_MORE_OUTPUT more output buffer is necessary.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderProcessOutput(JxlEncoder* enc,
+                                                    uint8_t** next_out,
+                                                    size_t* avail_out);
+
+/**
+ * Sets the buffer to read JPEG encoded bytes from for the next frame to encode.
+ *
+ * If JxlEncoderSetBasicInfo has not yet been called, calling
+ * JxlEncoderAddJPEGFrame will implicitly call it with the parameters of the
+ * added JPEG frame.
+ *
+ * If JxlEncoderSetColorEncoding or JxlEncoderSetICCProfile has not yet been
+ * called, calling JxlEncoderAddJPEGFrame will implicitly call it with the
+ * parameters of the added JPEG frame.
+ *
+ * If the encoder is set to store JPEG reconstruction metadata using @ref
+ * JxlEncoderStoreJPEGMetadata and a single JPEG frame is added, it will be
+ * possible to losslessly reconstruct the JPEG codestream.
+ *
+ * @param options set of encoder options to use when encoding the frame.
+ * @param buffer bytes to read JPEG from. Owned by the caller and its contents
+ * are copied internally.
+ * @param size size of buffer in bytes.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderAddJPEGFrame(
+    const JxlEncoderOptions* options, const uint8_t* buffer, size_t size);
+
+/**
+ * Sets the buffer to read pixels from for the next image to encode. Must call
+ * JxlEncoderSetBasicInfo before JxlEncoderAddImageFrame.
+ *
+ * Currently only some pixel formats are supported:
+ * - JXL_TYPE_UINT8
+ * - JXL_TYPE_UINT16
+ * - JXL_TYPE_FLOAT, with nominal range 0..1
+ *
+ * The color profile of the pixels depends on the value of uses_original_profile
+ * in the JxlBasicInfo. If true, the pixels are assumed to be encoded in the
+ * original profile that is set with JxlEncoderSetColorEncoding or
+ * JxlEncoderSetICCProfile. If false, the pixels are assumed to be nonlinear
+ * sRGB for integer data types (JXL_TYPE_UINT8 and JXL_TYPE_UINT16), and linear
+ * sRGB for floating point data types (JXL_TYPE_FLOAT).
+ *
+ * @param options set of encoder options to use when encoding the frame.
+ * @param pixel_format format for pixels. Object owned by the caller and its
+ * contents are copied internally.
+ * @param buffer buffer type to input the pixel data from. Owned by the caller
+ * and its contents are copied internally.
+ * @param size size of buffer in bytes.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderAddImageFrame(
+    const JxlEncoderOptions* options, const JxlPixelFormat* pixel_format,
+    const void* buffer, size_t size);
+
+/**
+ * Declares that this encoder will not encode anything further.
+ *
+ * Must be called between JxlEncoderAddImageFrame/JPEGFrame of the last frame
+ * and the next call to JxlEncoderProcessOutput, or JxlEncoderProcessOutput
+ * won't output the last frame correctly.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT void JxlEncoderCloseInput(JxlEncoder* enc);
+
+/**
+ * Sets the original color encoding of the image encoded by this encoder. This
+ * is an alternative to JxlEncoderSetICCProfile and only one of these two must
+ * be used. This one sets the color encoding as a @ref JxlColorEncoding, while
+ * the other sets it as ICC binary data.
+ *
+ * @param enc encoder object.
+ * @param color color encoding. Object owned by the caller and its contents are
+ * copied internally.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR or
+ * JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderSetColorEncoding(JxlEncoder* enc, const JxlColorEncoding* color);
+
+/**
+ * Sets the original color encoding of the image encoded by this encoder as an
+ * ICC color profile. This is an alternative to JxlEncoderSetColorEncoding and
+ * only one of these two must be used. This one sets the color encoding as ICC
+ * binary data, while the other defines it as a @ref JxlColorEncoding.
+ *
+ * @param enc encoder object.
+ * @param icc_profile bytes of the original ICC profile
+ * @param size size of the icc_profile buffer in bytes
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR or
+ * JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetICCProfile(JxlEncoder* enc,
+                                                    const uint8_t* icc_profile,
+                                                    size_t size);
+
+/**
+ * Sets the global metadata of the image encoded by this encoder.
+ *
+ * @param enc encoder object.
+ * @param info global image metadata. Object owned by the caller and its
+ * contents are copied internally.
+ * @return JXL_ENC_SUCCESS if the operation was successful,
+ * JXL_ENC_ERROR or JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc,
+                                                   const JxlBasicInfo* info);
+
+/**
+ * Configure the encoder to store JPEG reconstruction metadata in the JPEG XL
+ * container.
+ *
+ * The encoder must be configured to use the JPEG XL container format using @ref
+ * JxlEncoderUseContainer for this to have any effect.
+ *
+ * If this is set to true and a single JPEG frame is added, it will be
+ * possible to losslessly reconstruct the JPEG codestream.
+ *
+ * @param enc encoder object.
+ * @param store_jpeg_metadata true if the encoder should store JPEG metadata.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderStoreJPEGMetadata(JxlEncoder* enc, JXL_BOOL store_jpeg_metadata);
+
+/**
+ * Configure the encoder to use the JPEG XL container format.
+ *
+ * Using the JPEG XL container format allows to store metadata such as JPEG
+ * reconstruction (@ref JxlEncoderStoreJPEGMetadata) or other metadata like
+ * EXIF; but it adds a few bytes to the encoded file for container headers even
+ * if there is no extra metadata.
+ *
+ * @param enc encoder object.
+ * @param use_container true if the encoder should output the JPEG XL container
+ * format.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderUseContainer(JxlEncoder* enc,
+                                                   JXL_BOOL use_container);
+
+/**
+ * Sets lossless/lossy mode for the provided options. Default is lossy.
+ *
+ * @param options set of encoder options to update with the new mode
+ * @param lossless whether the options should be lossless
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderOptionsSetLossless(JxlEncoderOptions* options, JXL_BOOL lossless);
+
+/**
+ * Set the decoding speed tier for the provided options. Minimum is 0 (highest
+ * quality), and maximum is 4 (lowest quality). Default is 0.
+ *
+ * @param options set of encoder options to update with the new decoding speed
+ * tier.
+ * @param tier the decoding speed tier to set.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderOptionsSetDecodingSpeed(JxlEncoderOptions* options, int tier);
+
+/**
+ * Sets encoder effort/speed level without affecting decoding speed. Valid
+ * values are, from faster to slower speed: 3:falcon 4:cheetah 5:hare 6:wombat
+ * 7:squirrel 8:kitten 9:tortoise Default: squirrel (7).
+ *
+ * @param options set of encoder options to update with the new mode.
+ * @param effort the effort value to set.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderOptionsSetEffort(JxlEncoderOptions* options, int effort);
+
+/**
+ * Sets the distance level for lossy compression: target max butteraugli
+ *  distance, lower = higher quality. Range: 0 .. 15.
+ *  0.0 = mathematically lossless (however, use JxlEncoderOptionsSetLossless to
+ *  use true lossless).
+ *  1.0 = visually lossless.
+ *  Recommended range: 0.5 .. 3.0.
+ *  Default value: 1.0.
+ *  If JxlEncoderOptionsSetLossless is used, this value is unused and implied
+ *  to be 0.
+ *
+ * @param options set of encoder options to update with the new mode.
+ * @param distance the distance value to set.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderOptionsSetDistance(JxlEncoderOptions* options, float distance);
+
+/**
+ * Create a new set of encoder options, with all values initially copied from
+ * the @p source options, or set to default if @p source is NULL.
+ *
+ * The returned pointer is an opaque struct tied to the encoder and it will be
+ * deallocated by the encoder when JxlEncoderDestroy() is called. For functions
+ * taking both a @ref JxlEncoder and a @ref JxlEncoderOptions, only
+ * JxlEncoderOptions created with this function for the same encoder instance
+ * can be used.
+ *
+ * @param enc encoder object.
+ * @param source source options to copy initial values from, or NULL to get
+ * defaults initialized to defaults.
+ * @return the opaque struct pointer identifying a new set of encoder options.
+ */
+JXL_EXPORT JxlEncoderOptions* JxlEncoderOptionsCreate(
+    JxlEncoder* enc, const JxlEncoderOptions* source);
+
+/**
+ * Sets a color encoding to be sRGB.
+ *
+ * @param color_encoding color encoding instance.
+ * @param is_gray whether the color encoding should be gray scale or color.
+ */
+JXL_EXPORT void JxlColorEncodingSetToSRGB(JxlColorEncoding* color_encoding,
+                                          JXL_BOOL is_gray);
+
+/**
+ * Sets a color encoding to be linear sRGB.
+ *
+ * @param color_encoding color encoding instance.
+ * @param is_gray whether the color encoding should be gray scale or color.
+ */
+JXL_EXPORT void JxlColorEncodingSetToLinearSRGB(
+    JxlColorEncoding* color_encoding, JXL_BOOL is_gray);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_ENCODE_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/encode_cxx.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/encode_cxx.h
new file mode 100644
index 0000000000..841528f57c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/encode_cxx.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @file encode_cxx.h
+/// @brief C++ header-only helper for @ref encode.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_ENCODE_CXX_H_
+#define JXL_ENCODE_CXX_H_
+
+#include <memory>
+
+#include "jxl/encode.h"
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error "This a C++ only header. Use jxl/encode.h from C sources."
+#endif
+
+/// Struct to call JxlEncoderDestroy from the JxlEncoderPtr unique_ptr.
+struct JxlEncoderDestroyStruct {
+  /// Calls @ref JxlEncoderDestroy() on the passed encoder.
+  void operator()(JxlEncoder* encoder) { JxlEncoderDestroy(encoder); }
+};
+
+/// std::unique_ptr<> type that calls JxlEncoderDestroy() when releasing the
+/// encoder.
+///
+/// Use this helper type from C++ sources to ensure the encoder is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<JxlEncoder, JxlEncoderDestroyStruct> JxlEncoderPtr;
+
+/// Creates an instance of JxlEncoder into a JxlEncoderPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call JxlEncoderDestroy() when
+/// releasing the pointer. See @ref JxlEncoderCreate for details on the
+/// instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+///        manager will be copied internally.
+/// @return a @c NULL JxlEncoderPtr if the instance can not be allocated or
+///         initialized
+/// @return initialized JxlEncoderPtr instance otherwise.
+static inline JxlEncoderPtr JxlEncoderMake(
+    const JxlMemoryManager* memory_manager) {
+  return JxlEncoderPtr(JxlEncoderCreate(memory_manager));
+}
+
+#endif  // JXL_ENCODE_CXX_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/memory_manager.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/memory_manager.h
new file mode 100644
index 0000000000..30e6f9000d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/memory_manager.h
@@ -0,0 +1,67 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @file memory_manager.h
+ * @brief Abstraction functions used by JPEG XL to allocate memory.
+ */
+
+#ifndef JXL_MEMORY_MANAGER_H_
+#define JXL_MEMORY_MANAGER_H_
+
+#include <stddef.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Allocating function for a memory region of a given size.
+ *
+ * Allocates a contiguous memory region of size @p size bytes. The returned
+ * memory may not be aligned to a specific size or initialized at all.
+ *
+ * @param opaque custom memory manager handle provided by the caller.
+ * @param size in bytes of the requested memory region.
+ * @returns @c 0 if the memory can not be allocated,
+ * @returns pointer to the memory otherwise.
+ */
+typedef void* (*jpegxl_alloc_func)(void* opaque, size_t size);
+
+/**
+ * Deallocating function pointer type.
+ *
+ * This function @b MUST do nothing if @p address is @c 0.
+ *
+ * @param opaque custom memory manager handle provided by the caller.
+ * @param address memory region pointer returned by ::jpegxl_alloc_func, or @c 0
+ */
+typedef void (*jpegxl_free_func)(void* opaque, void* address);
+
+/**
+ * Memory Manager struct.
+ * These functions, when provided by the caller, will be used to handle memory
+ * allocations.
+ */
+typedef struct JxlMemoryManagerStruct {
+  /** The opaque pointer that will be passed as the first parameter to all the
+   * functions in this struct. */
+  void* opaque;
+
+  /** Memory allocation function. This can be NULL if and only if also the
+   * free() member in this class is NULL. All dynamic memory will be allocated
+   * and freed with these functions if they are not NULL. */
+  jpegxl_alloc_func alloc;
+  /** Free function matching the alloc() member. */
+  jpegxl_free_func free;
+
+  /* TODO(deymo): Add cache-aligned alloc/free functions here. */
+} JxlMemoryManager;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_MEMORY_MANAGER_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/parallel_runner.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/parallel_runner.h
new file mode 100644
index 0000000000..3411c994d1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/parallel_runner.h
@@ -0,0 +1,151 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/**
+ * @file parallel_runner.h
+ */
+
+/** API for running data operations in parallel in a multi-threaded environment.
+ * This module allows the JPEG XL caller to define their own way of creating and
+ * assigning threads.
+ *
+ * The JxlParallelRunner function type defines a parallel data processing
+ * runner that may be implemented by the caller to allow the library to process
+ * in multiple threads. The multi-threaded processing in this library only
+ * requires to run the same function over each number of a range, possibly
+ * running each call in a different thread. The JPEG XL caller is responsible
+ * for implementing this logic using the thread APIs available in their system.
+ * For convenience, a C++ implementation based on std::thread is provided in
+ * jpegxl/parallel_runner_thread.h (part of the jpegxl_threads library).
+ *
+ * Thread pools usually store small numbers of heterogeneous tasks in a queue.
+ * When tasks are identical or differ only by an integer input parameter, it is
+ * much faster to store just one function of an integer parameter and call it
+ * for each value. Conventional vector-of-tasks can be run in parallel using a
+ * lambda function adapter that simply calls task_funcs[task].
+ *
+ * If no multi-threading is desired, a @c NULL value of JxlParallelRunner
+ * will use an internal implementation without multi-threading.
+ */
+
+#ifndef JXL_PARALLEL_RUNNER_H_
+#define JXL_PARALLEL_RUNNER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Return code used in the JxlParallel* functions as return value. A value
+ * of 0 means success and any other value means error. The special value
+ * JXL_PARALLEL_RET_RUNNER_ERROR can be used by the runner to indicate any
+ * other error.
+ */
+typedef int JxlParallelRetCode;
+
+/**
+ * General error returned by the JxlParallelRunInit function to indicate
+ * an error.
+ */
+#define JXL_PARALLEL_RET_RUNNER_ERROR (-1)
+
+/**
+ * Parallel run initialization callback. See JxlParallelRunner for details.
+ *
+ * This function MUST be called by the JxlParallelRunner only once, on the
+ * same thread that called JxlParallelRunner, before any parallel execution.
+ * The purpose of this call is to provide the maximum number of threads that the
+ * JxlParallelRunner will use, which can be used by JPEG XL to allocate
+ * per-thread storage if needed.
+ *
+ * @param jpegxl_opaque the @p jpegxl_opaque handle provided to
+ * JxlParallelRunner() must be passed here.
+ * @param num_threads the maximum number of threads. This value must be
+ * positive.
+ * @returns 0 if the initialization process was successful.
+ * @returns an error code if there was an error, which should be returned by
+ * JxlParallelRunner().
+ */
+typedef JxlParallelRetCode (*JxlParallelRunInit)(void* jpegxl_opaque,
+                                                 size_t num_threads);
+
+/**
+ * Parallel run data processing callback. See JxlParallelRunner for details.
+ *
+ * This function MUST be called once for every number in the range [start_range,
+ * end_range) (including start_range but not including end_range) passing this
+ * number as the @p value. Calls for different value may be executed from
+ * different threads in parallel.
+ *
+ * @param jpegxl_opaque the @p jpegxl_opaque handle provided to
+ * JxlParallelRunner() must be passed here.
+ * @param value the number in the range [start_range, end_range) of the call.
+ * @param thread_id the thread number where this function is being called from.
+ * This must be lower than the @p num_threads value passed to
+ * JxlParallelRunInit.
+ */
+typedef void (*JxlParallelRunFunction)(void* jpegxl_opaque, uint32_t value,
+                                       size_t thread_id);
+
+/**
+ * JxlParallelRunner function type. A parallel runner implementation can be
+ * provided by a JPEG XL caller to allow running computations in multiple
+ * threads. This function must call the initialization function @p init in the
+ * same thread that called it and then call the passed @p func once for every
+ * number in the range [start_range, end_range) (including start_range but not
+ * including end_range) possibly from different multiple threads in parallel.
+ *
+ * The JxlParallelRunner function does not need to be re-entrant. This means
+ * that the same JxlParallelRunner function with the same runner_opaque
+ * provided parameter will not be called from the library from either @p init or
+ * @p func in the same decoder or encoder instance. However, a single decoding
+ * or encoding instance may call the provided JxlParallelRunner multiple
+ * times for different parts of the decoding or encoding process.
+ *
+ * @returns 0 if the @p init call succeeded (returned 0) and no other error
+ * occurred in the runner code.
+ * @returns JXL_PARALLEL_RET_RUNNER_ERROR if an error occurred in the runner
+ * code, for example, setting up the threads.
+ * @return the return value of @p init() if non-zero.
+ */
+typedef JxlParallelRetCode (*JxlParallelRunner)(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+/* The following is an example of a JxlParallelRunner that doesn't use any
+ * multi-threading. Note that this implementation doesn't store any state
+ * between multiple calls of the ExampleSequentialRunner function, so the
+ * runner_opaque value is not used.
+
+  JxlParallelRetCode ExampleSequentialRunner(void* runner_opaque,
+                                                void* jpegxl_opaque,
+                                                JxlParallelRunInit init,
+                                                JxlParallelRunFunction func,
+                                                uint32_t start_range,
+                                                uint32_t end_range) {
+    // We only use one thread (the currently running thread).
+    JxlParallelRetCode init_ret = (*init)(jpegxl_opaque, 1);
+    if (init_ret != 0) return init_ret;
+
+    // In case of other initialization error (for example when initializing the
+    // threads) one can return JXL_PARALLEL_RET_RUNNER_ERROR.
+
+    for (uint32_t i = start_range; i < end_range; i++) {
+      // Every call is in the thread number 0. These don't need to be in any
+      // order.
+      (*func)(jpegxl_opaque, i, 0);
+    }
+    return 0;
+  }
+ */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_PARALLEL_RUNNER_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/resizable_parallel_runner.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/resizable_parallel_runner.h
new file mode 100644
index 0000000000..88a315dca2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/resizable_parallel_runner.h
@@ -0,0 +1,75 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @file resizable_parallel_runner.h
+ * @brief implementation using std::thread of a resizeable ::JxlParallelRunner.
+ */
+
+/** Implementation of JxlParallelRunner than can be used to enable
+ * multithreading when using the JPEG XL library. This uses std::thread
+ * internally and related synchronization functions. The number of threads
+ * created can be changed after creation of the thread pool; the threads
+ * (including the main thread) are re-used for every
+ * ResizableParallelRunner::Runner call. Only one concurrent
+ * JxlResizableParallelRunner call per instance is allowed at a time.
+ *
+ * This is a scalable, lower-overhead thread pool runner, especially suitable
+ * for data-parallel computations in the fork-join model, where clients need to
+ * know when all tasks have completed.
+ *
+ * Compared to the implementation in @ref thread_parallel_runner.h, this
+ * implementation is tuned for execution on lower-powered systems, including
+ * for example ARM CPUs with big.LITTLE computation models.
+ */
+
+#ifndef JXL_RESIZABLE_PARALLEL_RUNNER_H_
+#define JXL_RESIZABLE_PARALLEL_RUNNER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "jxl/jxl_threads_export.h"
+#include "jxl/memory_manager.h"
+#include "jxl/parallel_runner.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Parallel runner internally using std::thread. Use as JxlParallelRunner.
+ */
+JXL_THREADS_EXPORT JxlParallelRetCode JxlResizableParallelRunner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+/** Creates the runner for JxlResizableParallelRunner. Use as the opaque
+ * runner. The runner will execute tasks on the calling thread until
+ * @ref JxlResizableParallelRunnerSetThreads is called.
+ */
+JXL_THREADS_EXPORT void* JxlResizableParallelRunnerCreate(
+    const JxlMemoryManager* memory_manager);
+
+/** Changes the number of threads for JxlResizableParallelRunner.
+ */
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerSetThreads(
+    void* runner_opaque, size_t num_threads);
+
+/** Suggests a number of threads to use for an image of given size.
+ */
+JXL_THREADS_EXPORT uint32_t
+JxlResizableParallelRunnerSuggestThreads(uint64_t xsize, uint64_t ysize);
+
+/** Destroys the runner created by JxlResizableParallelRunnerCreate.
+ */
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerDestroy(void* runner_opaque);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_RESIZABLE_PARALLEL_RUNNER_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/resizable_parallel_runner_cxx.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/resizable_parallel_runner_cxx.h
new file mode 100644
index 0000000000..54b8b95a57
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/resizable_parallel_runner_cxx.h
@@ -0,0 +1,59 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @file resizable_parallel_runner_cxx.h
+/// @brief C++ header-only helper for @ref resizable_parallel_runner.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
+#define JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
+
+#include <memory>
+
+#include "jxl/resizable_parallel_runner.h"
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error \
+    "This a C++ only header. Use jxl/jxl_resizable_parallel_runner.h from C" \
+    "sources."
+#endif
+
+/// Struct to call JxlResizableParallelRunnerDestroy from the
+/// JxlResizableParallelRunnerPtr unique_ptr.
+struct JxlResizableParallelRunnerDestroyStruct {
+  /// Calls @ref JxlResizableParallelRunnerDestroy() on the passed runner.
+  void operator()(void* runner) { JxlResizableParallelRunnerDestroy(runner); }
+};
+
+/// std::unique_ptr<> type that calls JxlResizableParallelRunnerDestroy() when
+/// releasing the runner.
+///
+/// Use this helper type from C++ sources to ensure the runner is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<void, JxlResizableParallelRunnerDestroyStruct>
+    JxlResizableParallelRunnerPtr;
+
+/// Creates an instance of JxlResizableParallelRunner into a
+/// JxlResizableParallelRunnerPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call
+/// JxlResizableParallelRunnerDestroy() when releasing the pointer. See @ref
+/// JxlResizableParallelRunnerCreate for details on the instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+///        manager will be copied internally.
+/// @param num_worker_threads the number of worker threads to create.
+/// @return a @c NULL JxlResizableParallelRunnerPtr if the instance can not be
+/// allocated or initialized
+/// @return initialized JxlResizableParallelRunnerPtr instance otherwise.
+static inline JxlResizableParallelRunnerPtr JxlResizableParallelRunnerMake(
+    const JxlMemoryManager* memory_manager) {
+  return JxlResizableParallelRunnerPtr(
+      JxlResizableParallelRunnerCreate(memory_manager));
+}
+
+#endif  // JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/thread_parallel_runner.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/thread_parallel_runner.h
new file mode 100644
index 0000000000..c3d8308e0c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/thread_parallel_runner.h
@@ -0,0 +1,69 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @file thread_parallel_runner.h
+ * @brief implementation using std::thread of a ::JxlParallelRunner.
+ */
+
+/** Implementation of JxlParallelRunner than can be used to enable
+ * multithreading when using the JPEG XL library. This uses std::thread
+ * internally and related synchronization functions. The number of threads
+ * created is fixed at construction time and the threads are re-used for every
+ * ThreadParallelRunner::Runner call. Only one concurrent
+ * JxlThreadParallelRunner call per instance is allowed at a time.
+ *
+ * This is a scalable, lower-overhead thread pool runner, especially suitable
+ * for data-parallel computations in the fork-join model, where clients need to
+ * know when all tasks have completed.
+ *
+ * This thread pool can efficiently load-balance millions of tasks using an
+ * atomic counter, thus avoiding per-task virtual or system calls. With 48
+ * hyperthreads and 1M tasks that add to an atomic counter, overall runtime is
+ * 10-20x higher when using std::async, and ~200x for a queue-based thread
+ */
+
+#ifndef JXL_THREAD_PARALLEL_RUNNER_H_
+#define JXL_THREAD_PARALLEL_RUNNER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "jxl/jxl_threads_export.h"
+#include "jxl/memory_manager.h"
+#include "jxl/parallel_runner.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Parallel runner internally using std::thread. Use as JxlParallelRunner.
+ */
+JXL_THREADS_EXPORT JxlParallelRetCode JxlThreadParallelRunner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+/** Creates the runner for JxlThreadParallelRunner. Use as the opaque
+ * runner.
+ */
+JXL_THREADS_EXPORT void* JxlThreadParallelRunnerCreate(
+    const JxlMemoryManager* memory_manager, size_t num_worker_threads);
+
+/** Destroys the runner created by JxlThreadParallelRunnerCreate.
+ */
+JXL_THREADS_EXPORT void JxlThreadParallelRunnerDestroy(void* runner_opaque);
+
+/** Returns a default num_worker_threads value for
+ * JxlThreadParallelRunnerCreate.
+ */
+JXL_THREADS_EXPORT size_t JxlThreadParallelRunnerDefaultNumWorkerThreads();
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_THREAD_PARALLEL_RUNNER_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/thread_parallel_runner_cxx.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/thread_parallel_runner_cxx.h
new file mode 100644
index 0000000000..121c556130
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/thread_parallel_runner_cxx.h
@@ -0,0 +1,59 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @file thread_parallel_runner_cxx.h
+/// @brief C++ header-only helper for @ref thread_parallel_runner.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_THREAD_PARALLEL_RUNNER_CXX_H_
+#define JXL_THREAD_PARALLEL_RUNNER_CXX_H_
+
+#include <memory>
+
+#include "jxl/thread_parallel_runner.h"
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error \
+    "This a C++ only header. Use jxl/jxl_thread_parallel_runner.h from C" \
+    "sources."
+#endif
+
+/// Struct to call JxlThreadParallelRunnerDestroy from the
+/// JxlThreadParallelRunnerPtr unique_ptr.
+struct JxlThreadParallelRunnerDestroyStruct {
+  /// Calls @ref JxlThreadParallelRunnerDestroy() on the passed runner.
+  void operator()(void* runner) { JxlThreadParallelRunnerDestroy(runner); }
+};
+
+/// std::unique_ptr<> type that calls JxlThreadParallelRunnerDestroy() when
+/// releasing the runner.
+///
+/// Use this helper type from C++ sources to ensure the runner is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<void, JxlThreadParallelRunnerDestroyStruct>
+    JxlThreadParallelRunnerPtr;
+
+/// Creates an instance of JxlThreadParallelRunner into a
+/// JxlThreadParallelRunnerPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call
+/// JxlThreadParallelRunnerDestroy() when releasing the pointer. See @ref
+/// JxlThreadParallelRunnerCreate for details on the instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+///        manager will be copied internally.
+/// @param num_worker_threads the number of worker threads to create.
+/// @return a @c NULL JxlThreadParallelRunnerPtr if the instance can not be
+/// allocated or initialized
+/// @return initialized JxlThreadParallelRunnerPtr instance otherwise.
+static inline JxlThreadParallelRunnerPtr JxlThreadParallelRunnerMake(
+    const JxlMemoryManager* memory_manager, size_t num_worker_threads) {
+  return JxlThreadParallelRunnerPtr(
+      JxlThreadParallelRunnerCreate(memory_manager, num_worker_threads));
+}
+
+#endif  // JXL_THREAD_PARALLEL_RUNNER_CXX_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/types.h b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/types.h
new file mode 100644
index 0000000000..58ade64347
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/include/jxl/types.h
@@ -0,0 +1,116 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @file types.h
+ * @brief Data types for the JPEG XL API, for both encoding and decoding.
+ */
+
+#ifndef JXL_TYPES_H_
+#define JXL_TYPES_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * A portable @c bool replacement.
+ *
+ * ::JXL_BOOL is a "documentation" type: actually it is @c int, but in API it
+ * denotes a type, whose only values are ::JXL_TRUE and ::JXL_FALSE.
+ */
+#define JXL_BOOL int
+/** Portable @c true replacement. */
+#define JXL_TRUE 1
+/** Portable @c false replacement. */
+#define JXL_FALSE 0
+
+/** Data type for the sample values per channel per pixel.
+ */
+typedef enum {
+  /** Use 32-bit single-precision floating point values, with range 0.0-1.0
+   * (within gamut, may go outside this range for wide color gamut). Floating
+   * point output, either JXL_TYPE_FLOAT or JXL_TYPE_FLOAT16, is recommended
+   * for HDR and wide gamut images when color profile conversion is required. */
+  JXL_TYPE_FLOAT = 0,
+
+  /** Use 1-bit packed in uint8_t, first pixel in LSB, padded to uint8_t per
+   * row.
+   * TODO(lode): support first in MSB, other padding.
+   */
+  JXL_TYPE_BOOLEAN,
+
+  /** Use type uint8_t. May clip wide color gamut data.
+   */
+  JXL_TYPE_UINT8,
+
+  /** Use type uint16_t. May clip wide color gamut data.
+   */
+  JXL_TYPE_UINT16,
+
+  /** Use type uint32_t. May clip wide color gamut data.
+   */
+  JXL_TYPE_UINT32,
+
+  /** Use 16-bit IEEE 754 half-precision floating point values */
+  JXL_TYPE_FLOAT16,
+} JxlDataType;
+
+/** Ordering of multi-byte data.
+ */
+typedef enum {
+  /** Use the endianness of the system, either little endian or big endian,
+   * without forcing either specific endianness. Do not use if pixel data
+   * should be exported to a well defined format.
+   */
+  JXL_NATIVE_ENDIAN = 0,
+  /** Force little endian */
+  JXL_LITTLE_ENDIAN = 1,
+  /** Force big endian */
+  JXL_BIG_ENDIAN = 2,
+} JxlEndianness;
+
+/** Data type for the sample values per channel per pixel for the output buffer
+ * for pixels. This is not necessarily the same as the data type encoded in the
+ * codestream. The channels are interleaved per pixel. The pixels are
+ * organized row by row, left to right, top to bottom.
+ * TODO(lode): implement padding / alignment (row stride)
+ * TODO(lode): support different channel orders if needed (RGB, BGR, ...)
+ */
+typedef struct {
+  /** Amount of channels available in a pixel buffer.
+   * 1: single-channel data, e.g. grayscale
+   * 2: single-channel + alpha
+   * 3: trichromatic, e.g. RGB
+   * 4: trichromatic + alpha
+   * TODO(lode): this needs finetuning. It is not yet defined how the user
+   * chooses output color space. CMYK+alpha needs 5 channels.
+   */
+  uint32_t num_channels;
+
+  /** Data type of each channel.
+   */
+  JxlDataType data_type;
+
+  /** Whether multi-byte data types are represented in big endian or little
+   * endian format. This applies to JXL_TYPE_UINT16, JXL_TYPE_UINT32
+   * and JXL_TYPE_FLOAT.
+   */
+  JxlEndianness endianness;
+
+  /** Align scanlines to a multiple of align bytes, or 0 to require no
+   * alignment at all (which has the same effect as value 1)
+   */
+  size_t align;
+} JxlPixelFormat;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_TYPES_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_context.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_context.h
new file mode 100644
index 0000000000..94e5bb7c03
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_context.h
@@ -0,0 +1,151 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AC_CONTEXT_H_
+#define LIB_JXL_AC_CONTEXT_H_
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+
+namespace jxl {
+
+// Block context used for scanning order, number of non-zeros, AC coefficients.
+// Equal to the channel.
+constexpr uint32_t kDCTOrderContextStart = 0;
+
+// The number of predicted nonzeros goes from 0 to 1008. We use
+// ceil(log2(predicted+1)) as a context for the number of nonzeros, so from 0 to
+// 10, inclusive.
+constexpr uint32_t kNonZeroBuckets = 37;
+
+static const uint16_t kCoeffFreqContext[64] = {
+    0xBAD, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+    15,    15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
+    23,    23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26,
+    27,    27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30,
+};
+
+static const uint16_t kCoeffNumNonzeroContext[64] = {
+    0xBAD, 0,   31,  62,  62,  93,  93,  93,  93,  123, 123, 123, 123,
+    152,   152, 152, 152, 152, 152, 152, 152, 180, 180, 180, 180, 180,
+    180,   180, 180, 180, 180, 180, 180, 206, 206, 206, 206, 206, 206,
+    206,   206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
+    206,   206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
+};
+
+static const uint8_t kDefaultCtxMap[39] = {
+    // Default ctx map clusters all the large transforms together.
+    0, 1, 2, 2, 3,  3,  4,  5,  6,  6,  6,  6,  6,   //
+    7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14,  //
+    7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14,  //
+};
+
+// Supremum of ZeroDensityContext(x, y) + 1, when x + y < 64.
+constexpr int kZeroDensityContextCount = 458;
+// Supremum of ZeroDensityContext(x, y) + 1.
+constexpr int kZeroDensityContextLimit = 474;
+
+/* This function is used for entropy-sources pre-clustering.
+ *
+ * Ideally, each combination of |nonzeros_left| and |k| should go to its own
+ * bucket; but it implies (64 * 63 / 2) == 2016 buckets. If there is other
+ * dimension (e.g. block context), then number of primary clusters becomes too
+ * big.
+ *
+ * To solve this problem, |nonzeros_left| and |k| values are clustered. It is
+ * known that their sum is at most 64, consequently, the total number buckets
+ * is at most A(64) * B(64).
+ */
+// TODO(user): investigate, why disabling pre-clustering makes entropy code
+// less dense. Perhaps we would need to add HQ clustering algorithm that would
+// be able to squeeze better by spending more CPU cycles.
+static JXL_INLINE size_t ZeroDensityContext(size_t nonzeros_left, size_t k,
+                                            size_t covered_blocks,
+                                            size_t log2_covered_blocks,
+                                            size_t prev) {
+  JXL_DASSERT((1u << log2_covered_blocks) == covered_blocks);
+  nonzeros_left = (nonzeros_left + covered_blocks - 1) >> log2_covered_blocks;
+  k >>= log2_covered_blocks;
+  JXL_DASSERT(k > 0);
+  JXL_DASSERT(k < 64);
+  JXL_DASSERT(nonzeros_left > 0);
+  // Asserting nonzeros_left + k < 65 here causes crashes in debug mode with
+  // invalid input, since the (hot) decoding loop does not check this condition.
+  // As no out-of-bound memory reads are issued even if that condition is
+  // broken, we check this simpler condition which holds anyway. The decoder
+  // will still mark a file in which that condition happens as not valid at the
+  // end of the decoding loop, as `nzeros` will not be `0`.
+  JXL_DASSERT(nonzeros_left < 64);
+  return (kCoeffNumNonzeroContext[nonzeros_left] + kCoeffFreqContext[k]) * 2 +
+         prev;
+}
+
+struct BlockCtxMap {
+  std::vector<int> dc_thresholds[3];
+  std::vector<uint32_t> qf_thresholds;
+  std::vector<uint8_t> ctx_map;
+  size_t num_ctxs, num_dc_ctxs;
+
+  static_assert(3 * kNumOrders ==
+                    sizeof(kDefaultCtxMap) / sizeof *kDefaultCtxMap,
+                "Update default context map");
+
+  size_t Context(int dc_idx, uint32_t qf, size_t ord, size_t c) const {
+    size_t qf_idx = 0;
+    for (uint32_t t : qf_thresholds) {
+      if (qf > t) qf_idx++;
+    }
+    size_t idx = c < 2 ? c ^ 1 : 2;
+    idx = idx * kNumOrders + ord;
+    idx = idx * (qf_thresholds.size() + 1) + qf_idx;
+    idx = idx * num_dc_ctxs + dc_idx;
+    return ctx_map[idx];
+  }
+  // Non-zero context is based on number of non-zeros and block context.
+  // For better clustering, contexts with same number of non-zeros are grouped.
+  uint32_t ZeroDensityContextsOffset(uint32_t block_ctx) const {
+    return num_ctxs * kNonZeroBuckets + kZeroDensityContextCount * block_ctx;
+  }
+
+  // Context map for AC coefficients consists of 2 blocks:
+  //  |num_ctxs x                : context for number of non-zeros in the block
+  //   kNonZeroBuckets|            computed from block context and predicted
+  //                               value (based top and left values)
+  //  |num_ctxs x                : context for AC coefficient symbols,
+  //   kZeroDensityContextCount|   computed from block context,
+  //                               number of non-zeros left and
+  //                               index in scan order
+  uint32_t NumACContexts() const {
+    return num_ctxs * (kNonZeroBuckets + kZeroDensityContextCount);
+  }
+
+  // Non-zero context is based on number of non-zeros and block context.
+  // For better clustering, contexts with same number of non-zeros are grouped.
+  inline uint32_t NonZeroContext(uint32_t non_zeros, uint32_t block_ctx) const {
+    uint32_t ctx;
+    if (non_zeros >= 64) non_zeros = 64;
+    if (non_zeros < 8) {
+      ctx = non_zeros;
+    } else {
+      ctx = 4 + non_zeros / 2;
+    }
+    return ctx * num_ctxs + block_ctx;
+  }
+
+  BlockCtxMap() {
+    ctx_map.assign(std::begin(jxl::kDefaultCtxMap),
+                   std::end(jxl::kDefaultCtxMap));
+    num_ctxs = *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+    num_dc_ctxs = 1;
+  }
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_AC_CONTEXT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc
new file mode 100644
index 0000000000..f262f33319
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.cc
@@ -0,0 +1,110 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ac_strategy.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <numeric>  // iota
+#include <type_traits>
+#include <utility>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+// Tries to generalize zig-zag order to non-square blocks. Surprisingly, in
+// square block frequency along the (i + j == const) diagonals is roughly the
+// same. For historical reasons, consecutive diagonals are traversed
+// in alternating directions - so called "zig-zag" (or "snake") order.
+AcStrategy::CoeffOrderAndLut::CoeffOrderAndLut() {
+  for (size_t s = 0; s < AcStrategy::kNumValidStrategies; s++) {
+    const AcStrategy acs = AcStrategy::FromRawStrategy(s);
+    size_t cx = acs.covered_blocks_x();
+    size_t cy = acs.covered_blocks_y();
+    CoefficientLayout(&cy, &cx);
+    JXL_ASSERT((AcStrategy::CoeffOrderAndLut::kOffset[s + 1] -
+                AcStrategy::CoeffOrderAndLut::kOffset[s]) == cx * cy);
+    coeff_order_t* JXL_RESTRICT order_start =
+        order + AcStrategy::CoeffOrderAndLut::kOffset[s] * kDCTBlockSize;
+    coeff_order_t* JXL_RESTRICT lut_start =
+        lut + AcStrategy::CoeffOrderAndLut::kOffset[s] * kDCTBlockSize;
+
+    // CoefficientLayout ensures cx >= cy.
+    // We compute the zigzag order for a cx x cx block, then discard all the
+    // lines that are not multiple of the ratio between cx and cy.
+    size_t xs = cx / cy;
+    size_t xsm = xs - 1;
+    size_t xss = CeilLog2Nonzero(xs);
+    // First half of the block
+    size_t cur = cx * cy;
+    for (size_t i = 0; i < cx * kBlockDim; i++) {
+      for (size_t j = 0; j <= i; j++) {
+        size_t x = j;
+        size_t y = i - j;
+        if (i % 2) std::swap(x, y);
+        if ((y & xsm) != 0) continue;
+        y >>= xss;
+        size_t val = 0;
+        if (x < cx && y < cy) {
+          val = y * cx + x;
+        } else {
+          val = cur++;
+        }
+        lut_start[y * cx * kBlockDim + x] = val;
+        order_start[val] = y * cx * kBlockDim + x;
+      }
+    }
+    // Second half
+    for (size_t ip = cx * kBlockDim - 1; ip > 0; ip--) {
+      size_t i = ip - 1;
+      for (size_t j = 0; j <= i; j++) {
+        size_t x = cx * kBlockDim - 1 - (i - j);
+        size_t y = cx * kBlockDim - 1 - j;
+        if (i % 2) std::swap(x, y);
+        if ((y & xsm) != 0) continue;
+        y >>= xss;
+        size_t val = cur++;
+        lut_start[y * cx * kBlockDim + x] = val;
+        order_start[val] = y * cx * kBlockDim + x;
+      }
+    }
+  }
+}
+
+const AcStrategy::CoeffOrderAndLut* AcStrategy::CoeffOrder() {
+  static AcStrategy::CoeffOrderAndLut* order =
+      new AcStrategy::CoeffOrderAndLut();
+  return order;
+}
+
+// These definitions are needed before C++17.
+constexpr size_t AcStrategy::kMaxCoeffBlocks;
+constexpr size_t AcStrategy::kMaxBlockDim;
+constexpr size_t AcStrategy::kMaxCoeffArea;
+constexpr size_t AcStrategy::CoeffOrderAndLut::kOffset[];
+
+AcStrategyImage::AcStrategyImage(size_t xsize, size_t ysize)
+    : layers_(xsize, ysize) {
+  row_ = layers_.Row(0);
+  stride_ = layers_.PixelsPerRow();
+}
+
+size_t AcStrategyImage::CountBlocks(AcStrategy::Type type) const {
+  size_t ret = 0;
+  for (size_t y = 0; y < layers_.ysize(); y++) {
+    const uint8_t* JXL_RESTRICT row = layers_.ConstRow(y);
+    for (size_t x = 0; x < layers_.xsize(); x++) {
+      if (row[x] == ((static_cast<uint8_t>(type) << 1) | 1)) ret++;
+    }
+  }
+  return ret;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.h
new file mode 100644
index 0000000000..b51564594c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy.h
@@ -0,0 +1,287 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AC_STRATEGY_H_
+#define LIB_JXL_AC_STRATEGY_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <hwy/base.h>  // kMaxVectorSize
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+
+// Defines the different kinds of transforms, and heuristics to choose between
+// them.
+// `AcStrategy` represents what transform should be used, and which sub-block of
+// that transform we are currently in. Note that DCT4x4 is applied on all four
+// 4x4 sub-blocks of an 8x8 block.
+// `AcStrategyImage` defines which strategy should be used for each 8x8 block
+// of the image. The highest 4 bits represent the strategy to be used, the
+// lowest 4 represent the index of the block inside that strategy.
+
+namespace jxl {
+
+class AcStrategy {
+ public:
+  // Extremal values for the number of blocks/coefficients of a single strategy.
+  static constexpr size_t kMaxCoeffBlocks = 32;
+  static constexpr size_t kMaxBlockDim = kBlockDim * kMaxCoeffBlocks;
+  // Maximum number of coefficients in a block. Guaranteed to be a multiple of
+  // the vector size.
+  static constexpr size_t kMaxCoeffArea = kMaxBlockDim * kMaxBlockDim;
+  static_assert((kMaxCoeffArea * sizeof(float)) % hwy::kMaxVectorSize == 0,
+                "Coefficient area is not a multiple of vector size");
+
+  // Raw strategy types.
+  enum Type : uint32_t {
+    // Regular block size DCT
+    DCT = 0,
+    // Encode pixels without transforming
+    IDENTITY = 1,
+    // Use 2-by-2 DCT
+    DCT2X2 = 2,
+    // Use 4-by-4 DCT
+    DCT4X4 = 3,
+    // Use 16-by-16 DCT
+    DCT16X16 = 4,
+    // Use 32-by-32 DCT
+    DCT32X32 = 5,
+    // Use 16-by-8 DCT
+    DCT16X8 = 6,
+    // Use 8-by-16 DCT
+    DCT8X16 = 7,
+    // Use 32-by-8 DCT
+    DCT32X8 = 8,
+    // Use 8-by-32 DCT
+    DCT8X32 = 9,
+    // Use 32-by-16 DCT
+    DCT32X16 = 10,
+    // Use 16-by-32 DCT
+    DCT16X32 = 11,
+    // 4x8 and 8x4 DCT
+    DCT4X8 = 12,
+    DCT8X4 = 13,
+    // Corner-DCT.
+    AFV0 = 14,
+    AFV1 = 15,
+    AFV2 = 16,
+    AFV3 = 17,
+    // Larger DCTs
+    DCT64X64 = 18,
+    DCT64X32 = 19,
+    DCT32X64 = 20,
+    DCT128X128 = 21,
+    DCT128X64 = 22,
+    DCT64X128 = 23,
+    DCT256X256 = 24,
+    DCT256X128 = 25,
+    DCT128X256 = 26,
+    // Marker for num of valid strategies.
+    kNumValidStrategies
+  };
+
+  static constexpr uint32_t TypeBit(const Type type) {
+    return 1u << static_cast<uint32_t>(type);
+  }
+
+  // Returns true if this block is the first 8x8 block (i.e. top-left) of a
+  // possibly multi-block strategy.
+  JXL_INLINE bool IsFirstBlock() const { return is_first_; }
+
+  JXL_INLINE bool IsMultiblock() const {
+    constexpr uint32_t bits =
+        TypeBit(Type::DCT16X16) | TypeBit(Type::DCT32X32) |
+        TypeBit(Type::DCT16X8) | TypeBit(Type::DCT8X16) |
+        TypeBit(Type::DCT32X8) | TypeBit(Type::DCT8X32) |
+        TypeBit(Type::DCT16X32) | TypeBit(Type::DCT32X16) |
+        TypeBit(Type::DCT32X64) | TypeBit(Type::DCT64X32) |
+        TypeBit(Type::DCT64X64) | TypeBit(DCT64X128) | TypeBit(DCT128X64) |
+        TypeBit(DCT128X128) | TypeBit(DCT128X256) | TypeBit(DCT256X128) |
+        TypeBit(DCT256X256);
+    JXL_DASSERT(Strategy() < kNumValidStrategies);
+    return ((1u << static_cast<uint32_t>(Strategy())) & bits) != 0;
+  }
+
+  // Returns the raw strategy value. Should only be used for tokenization.
+  JXL_INLINE uint8_t RawStrategy() const {
+    return static_cast<uint8_t>(strategy_);
+  }
+
+  JXL_INLINE Type Strategy() const { return strategy_; }
+
+  // Inverse check
+  static JXL_INLINE constexpr bool IsRawStrategyValid(int raw_strategy) {
+    return raw_strategy < static_cast<int32_t>(kNumValidStrategies) &&
+           raw_strategy >= 0;
+  }
+  static JXL_INLINE AcStrategy FromRawStrategy(uint8_t raw_strategy) {
+    return FromRawStrategy(static_cast<Type>(raw_strategy));
+  }
+  static JXL_INLINE AcStrategy FromRawStrategy(Type raw_strategy) {
+    JXL_DASSERT(IsRawStrategyValid(static_cast<uint32_t>(raw_strategy)));
+    return AcStrategy(raw_strategy, /*is_first=*/true);
+  }
+
+  // "Natural order" means the order of increasing of "anisotropic" frequency of
+  // continuous version of DCT basis.
+  // Round-trip, for any given strategy s:
+  //  X = NaturalCoeffOrder(s)[NaturalCoeffOrderLutN(s)[X]]
+  //  X = NaturalCoeffOrderLut(s)[NaturalCoeffOrderN(s)[X]]
+  JXL_INLINE const coeff_order_t* NaturalCoeffOrder() const {
+    return CoeffOrder()->order +
+           CoeffOrderAndLut::kOffset[RawStrategy()] * kDCTBlockSize;
+  }
+
+  JXL_INLINE const coeff_order_t* NaturalCoeffOrderLut() const {
+    return CoeffOrder()->lut +
+           CoeffOrderAndLut::kOffset[RawStrategy()] * kDCTBlockSize;
+  }
+
+  // Number of 8x8 blocks that this strategy will cover. 0 for non-top-left
+  // blocks inside a multi-block transform.
+  JXL_INLINE size_t covered_blocks_x() const {
+    static constexpr uint8_t kLut[] = {1, 1, 1, 1,  2, 4,  1,  2,  1,
+                                       4, 2, 4, 1,  1, 1,  1,  1,  1,
+                                       8, 4, 8, 16, 8, 16, 32, 16, 32};
+    static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies,
+                  "Update LUT");
+    return kLut[size_t(strategy_)];
+  }
+
+  JXL_INLINE size_t covered_blocks_y() const {
+    static constexpr uint8_t kLut[] = {1, 1, 1, 1,  2,  4, 2,  1,  4,
+                                       1, 4, 2, 1,  1,  1, 1,  1,  1,
+                                       8, 8, 4, 16, 16, 8, 32, 32, 16};
+    static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies,
+                  "Update LUT");
+    return kLut[size_t(strategy_)];
+  }
+
+  JXL_INLINE size_t log2_covered_blocks() const {
+    static constexpr uint8_t kLut[] = {0, 0, 0, 0, 2, 4, 1,  1, 2,
+                                       2, 3, 3, 0, 0, 0, 0,  0, 0,
+                                       6, 5, 5, 8, 7, 7, 10, 9, 9};
+    static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies,
+                  "Update LUT");
+    return kLut[size_t(strategy_)];
+  }
+
+  struct CoeffOrderAndLut {
+    // Those offsets get multiplied by kDCTBlockSize.
+    // TODO(veluca): reduce this array by merging together the same order type.
+    static constexpr size_t kOffset[kNumValidStrategies + 1] = {
+        0,  1,  2,  3,  4,  8,   24,  26,  28,  32,  36,  44,   52,   53,
+        54, 55, 56, 57, 58, 122, 154, 186, 442, 570, 698, 1722, 2234, 2746,
+    };
+    static constexpr size_t kTotalTableSize =
+        kOffset[kNumValidStrategies] * kDCTBlockSize;
+    coeff_order_t order[kTotalTableSize];
+    coeff_order_t lut[kTotalTableSize];
+
+   private:
+    CoeffOrderAndLut();
+    friend class AcStrategy;
+  };
+
+ private:
+  friend class AcStrategyRow;
+  JXL_INLINE AcStrategy(Type strategy, bool is_first)
+      : strategy_(strategy), is_first_(is_first) {
+    JXL_DASSERT(IsMultiblock() || is_first == true);
+  }
+
+  Type strategy_;
+  bool is_first_;
+
+  static const CoeffOrderAndLut* CoeffOrder();
+};
+
+// Class to use a certain row of the AC strategy.
+class AcStrategyRow {
+ public:
+  explicit AcStrategyRow(const uint8_t* row) : row_(row) {}
+  AcStrategy operator[](size_t x) const {
+    return AcStrategy(static_cast<AcStrategy::Type>(row_[x] >> 1), row_[x] & 1);
+  }
+
+ private:
+  const uint8_t* JXL_RESTRICT row_;
+};
+
+class AcStrategyImage {
+ public:
+  AcStrategyImage() = default;
+  AcStrategyImage(size_t xsize, size_t ysize);
+  AcStrategyImage(AcStrategyImage&&) = default;
+  AcStrategyImage& operator=(AcStrategyImage&&) = default;
+
+  void FillDCT8(const Rect& rect) {
+    FillPlane<uint8_t>((static_cast<uint8_t>(AcStrategy::Type::DCT) << 1) | 1,
+                       &layers_, rect);
+  }
+  void FillDCT8() { FillDCT8(Rect(layers_)); }
+
+  void FillInvalid() { FillImage(INVALID, &layers_); }
+
+  void Set(size_t x, size_t y, AcStrategy::Type type) {
+#if JXL_ENABLE_ASSERT
+    AcStrategy acs = AcStrategy::FromRawStrategy(type);
+#endif  // JXL_ENABLE_ASSERT
+    JXL_ASSERT(y + acs.covered_blocks_y() <= layers_.ysize());
+    JXL_ASSERT(x + acs.covered_blocks_x() <= layers_.xsize());
+    JXL_CHECK(SetNoBoundsCheck(x, y, type, /*check=*/false));
+  }
+
+  Status SetNoBoundsCheck(size_t x, size_t y, AcStrategy::Type type,
+                          bool check = true) {
+    AcStrategy acs = AcStrategy::FromRawStrategy(type);
+    for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+      for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+        size_t pos = (y + iy) * stride_ + x + ix;
+        if (check && row_[pos] != INVALID) {
+          return JXL_FAILURE("Invalid AC strategy: block overlap");
+        }
+        row_[pos] =
+            (static_cast<uint8_t>(type) << 1) | ((iy | ix) == 0 ? 1 : 0);
+      }
+    }
+    return true;
+  }
+
+  bool IsValid(size_t x, size_t y) { return row_[y * stride_ + x] != INVALID; }
+
+  AcStrategyRow ConstRow(size_t y, size_t x_prefix = 0) const {
+    return AcStrategyRow(layers_.ConstRow(y) + x_prefix);
+  }
+
+  AcStrategyRow ConstRow(const Rect& rect, size_t y) const {
+    return ConstRow(rect.y0() + y, rect.x0());
+  }
+
+  size_t PixelsPerRow() const { return layers_.PixelsPerRow(); }
+
+  size_t xsize() const { return layers_.xsize(); }
+  size_t ysize() const { return layers_.ysize(); }
+
+  // Count the number of blocks of a given type.
+  size_t CountBlocks(AcStrategy::Type type) const;
+
+ private:
+  ImageB layers_;
+  uint8_t* JXL_RESTRICT row_;
+  size_t stride_;
+
+  // A value that does not represent a valid combined AC strategy
+  // value. Used as a sentinel.
+  static constexpr uint8_t INVALID = 0xFF;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_AC_STRATEGY_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy_test.cc
new file mode 100644
index 0000000000..e4ceb88b43
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ac_strategy_test.cc
@@ -0,0 +1,225 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ac_strategy.h"
+
+#include <string.h>
+
+#include <cmath>
+#include <hwy/aligned_allocator.h>
+#include <hwy/base.h>  // HWY_ALIGN_MAX
+#include <hwy/tests/test_util-inl.h>
+#include <utility>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_transforms_testonly.h"
+#include "lib/jxl/enc_transforms.h"
+
+namespace jxl {
+namespace {
+
+// Test that DCT -> IDCT is a noop.
+class AcStrategyRoundtrip : public ::hwy::TestWithParamTargetAndT<int> {
+ protected:
+  void Run() {
+    const AcStrategy::Type type = static_cast<AcStrategy::Type>(GetParam());
+    const AcStrategy acs = AcStrategy::FromRawStrategy(type);
+
+    auto mem = hwy::AllocateAligned<float>(4 * AcStrategy::kMaxCoeffArea);
+    float* scratch_space = mem.get();
+    float* coeffs = scratch_space + AcStrategy::kMaxCoeffArea;
+    float* idct = coeffs + AcStrategy::kMaxCoeffArea;
+
+    for (size_t i = 0; i < std::min(1024u, 64u << acs.log2_covered_blocks());
+         i++) {
+      float* input = idct + AcStrategy::kMaxCoeffArea;
+      std::fill_n(input, AcStrategy::kMaxCoeffArea, 0);
+      input[i] = 0.2f;
+      TransformFromPixels(type, input, acs.covered_blocks_x() * 8, coeffs,
+                          scratch_space);
+      ASSERT_NEAR(coeffs[0], 0.2 / (64 << acs.log2_covered_blocks()), 1e-6)
+          << " i = " << i;
+      TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8,
+                        scratch_space);
+      for (size_t j = 0; j < 64u << acs.log2_covered_blocks(); j++) {
+        ASSERT_NEAR(idct[j], j == i ? 0.2f : 0, 2e-6)
+            << "j = " << j << " i = " << i << " acs " << type;
+      }
+    }
+    // Test DC.
+    std::fill_n(idct, AcStrategy::kMaxCoeffArea, 0);
+    for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+      for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+        float* dc = idct + AcStrategy::kMaxCoeffArea;
+        std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+        dc[y * acs.covered_blocks_x() * 8 + x] = 0.2;
+        LowestFrequenciesFromDC(type, dc, acs.covered_blocks_x() * 8, coeffs);
+        DCFromLowestFrequencies(type, coeffs, idct, acs.covered_blocks_x() * 8);
+        std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+        dc[y * acs.covered_blocks_x() * 8 + x] = 0.2;
+        for (size_t j = 0; j < 64u << acs.log2_covered_blocks(); j++) {
+          ASSERT_NEAR(idct[j], dc[j], 1e-6)
+              << "j = " << j << " x = " << x << " y = " << y << " acs " << type;
+        }
+      }
+    }
+  }
+};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(
+    AcStrategyRoundtrip,
+    ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies)));
+
+TEST_P(AcStrategyRoundtrip, Test) { Run(); }
+
+// Test that DC(2x2) -> DCT coefficients -> IDCT -> downsampled IDCT is a noop.
+class AcStrategyRoundtripDownsample
+    : public ::hwy::TestWithParamTargetAndT<int> {
+ protected:
+  void Run() {
+    const AcStrategy::Type type = static_cast<AcStrategy::Type>(GetParam());
+    const AcStrategy acs = AcStrategy::FromRawStrategy(type);
+
+    auto mem = hwy::AllocateAligned<float>(4 * AcStrategy::kMaxCoeffArea);
+    float* scratch_space = mem.get();
+    float* coeffs = scratch_space + AcStrategy::kMaxCoeffArea;
+    std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0.0f);
+    float* idct = coeffs + AcStrategy::kMaxCoeffArea;
+
+    for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+      for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+        float* dc = idct + AcStrategy::kMaxCoeffArea;
+        std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+        dc[y * acs.covered_blocks_x() * 8 + x] = 0.2f;
+        LowestFrequenciesFromDC(type, dc, acs.covered_blocks_x() * 8, coeffs);
+        TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8,
+                          scratch_space);
+        std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0.0f);
+        std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+        dc[y * acs.covered_blocks_x() * 8 + x] = 0.2f;
+        // Downsample
+        for (size_t dy = 0; dy < acs.covered_blocks_y(); dy++) {
+          for (size_t dx = 0; dx < acs.covered_blocks_x(); dx++) {
+            float sum = 0;
+            for (size_t iy = 0; iy < 8; iy++) {
+              for (size_t ix = 0; ix < 8; ix++) {
+                sum += idct[(dy * 8 + iy) * 8 * acs.covered_blocks_x() +
+                            dx * 8 + ix];
+              }
+            }
+            sum /= 64.0f;
+            ASSERT_NEAR(sum, dc[dy * 8 * acs.covered_blocks_x() + dx], 1e-6)
+                << "acs " << type;
+          }
+        }
+      }
+    }
+  }
+};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(
+    AcStrategyRoundtripDownsample,
+    ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies)));
+
+TEST_P(AcStrategyRoundtripDownsample, Test) { Run(); }
+
+// Test that IDCT(block with zeros in the non-topleft corner) -> downsampled
+// IDCT is the same as IDCT -> DC(2x2) of the same block.
+class AcStrategyDownsample : public ::hwy::TestWithParamTargetAndT<int> {
+ protected:
+  void Run() {
+    const AcStrategy::Type type = static_cast<AcStrategy::Type>(GetParam());
+    const AcStrategy acs = AcStrategy::FromRawStrategy(type);
+    size_t cx = acs.covered_blocks_y();
+    size_t cy = acs.covered_blocks_x();
+    CoefficientLayout(&cy, &cx);
+
+    auto mem = hwy::AllocateAligned<float>(4 * AcStrategy::kMaxCoeffArea);
+    float* scratch_space = mem.get();
+    float* idct = scratch_space + AcStrategy::kMaxCoeffArea;
+    float* idct_acs_downsampled = idct + AcStrategy::kMaxCoeffArea;
+
+    for (size_t y = 0; y < cy; y++) {
+      for (size_t x = 0; x < cx; x++) {
+        float* coeffs = idct + AcStrategy::kMaxCoeffArea;
+        std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0);
+        coeffs[y * cx * 8 + x] = 0.2f;
+        TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8,
+                          scratch_space);
+        std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0);
+        coeffs[y * cx * 8 + x] = 0.2f;
+        DCFromLowestFrequencies(type, coeffs, idct_acs_downsampled,
+                                acs.covered_blocks_x() * 8);
+        // Downsample
+        for (size_t dy = 0; dy < acs.covered_blocks_y(); dy++) {
+          for (size_t dx = 0; dx < acs.covered_blocks_x(); dx++) {
+            float sum = 0;
+            for (size_t iy = 0; iy < 8; iy++) {
+              for (size_t ix = 0; ix < 8; ix++) {
+                sum += idct[(dy * 8 + iy) * 8 * acs.covered_blocks_x() +
+                            dx * 8 + ix];
+              }
+            }
+            sum /= 64;
+            ASSERT_NEAR(
+                sum, idct_acs_downsampled[dy * 8 * acs.covered_blocks_x() + dx],
+                1e-6)
+                << " acs " << type;
+          }
+        }
+      }
+    }
+  }
+};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(
+    AcStrategyDownsample,
+    ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies)));
+
+TEST_P(AcStrategyDownsample, Test) { Run(); }
+
+class AcStrategyTargetTest : public ::hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(AcStrategyTargetTest);
+
+TEST_P(AcStrategyTargetTest, RoundtripAFVDCT) {
+  HWY_ALIGN_MAX float idct[16];
+  for (size_t i = 0; i < 16; i++) {
+    HWY_ALIGN_MAX float pixels[16] = {};
+    pixels[i] = 1;
+    HWY_ALIGN_MAX float coeffs[16] = {};
+
+    AFVDCT4x4(pixels, coeffs);
+    AFVIDCT4x4(coeffs, idct);
+    for (size_t j = 0; j < 16; j++) {
+      EXPECT_NEAR(idct[j], pixels[j], 1e-6);
+    }
+  }
+}
+
+TEST_P(AcStrategyTargetTest, BenchmarkAFV) {
+  const AcStrategy::Type type = AcStrategy::Type::AFV0;
+  HWY_ALIGN_MAX float pixels[64] = {1};
+  HWY_ALIGN_MAX float coeffs[64] = {};
+  HWY_ALIGN_MAX float scratch_space[64] = {};
+  for (size_t i = 0; i < 1 << 14; i++) {
+    TransformToPixels(type, coeffs, pixels, 8, scratch_space);
+    TransformFromPixels(type, pixels, 8, coeffs, scratch_space);
+  }
+  EXPECT_NEAR(pixels[0], 0.0, 1E-6);
+}
+
+TEST_P(AcStrategyTargetTest, BenchmarkAFVDCT) {
+  HWY_ALIGN_MAX float pixels[64] = {1};
+  HWY_ALIGN_MAX float coeffs[64] = {};
+  for (size_t i = 0; i < 1 << 14; i++) {
+    AFVDCT4x4(pixels, coeffs);
+    AFVIDCT4x4(coeffs, pixels);
+  }
+  EXPECT_NEAR(pixels[0], 1.0, 1E-6);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/adaptive_reconstruction_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/adaptive_reconstruction_test.cc
new file mode 100644
index 0000000000..788bb7cc7f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/adaptive_reconstruction_test.cc
@@ -0,0 +1,184 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_reconstruct.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/test_utils.h"
+
+namespace jxl {
+namespace {
+
+const size_t xsize = 16;
+const size_t ysize = 8;
+
+void GenerateFlat(const float background, const float foreground,
+                  std::vector<Image3F>* images) {
+  for (size_t c = 0; c < Image3F::kNumPlanes; ++c) {
+    Image3F in(xsize, ysize);
+    // Plane c = foreground, all others = background.
+    for (size_t y = 0; y < ysize; ++y) {
+      float* rows[3] = {in.PlaneRow(0, y), in.PlaneRow(1, y),
+                        in.PlaneRow(2, y)};
+      for (size_t x = 0; x < xsize; ++x) {
+        rows[0][x] = rows[1][x] = rows[2][x] = background;
+        rows[c][x] = foreground;
+      }
+    }
+    images->push_back(std::move(in));
+  }
+}
+
+// Single foreground point at any position in any channel
+void GeneratePoints(const float background, const float foreground,
+                    std::vector<Image3F>* images) {
+  for (size_t c = 0; c < Image3F::kNumPlanes; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        Image3F in(xsize, ysize);
+        FillImage(background, &in);
+        in.PlaneRow(c, y)[x] = foreground;
+        images->push_back(std::move(in));
+      }
+    }
+  }
+}
+
+void GenerateHorzEdges(const float background, const float foreground,
+                       std::vector<Image3F>* images) {
+  for (size_t c = 0; c < Image3F::kNumPlanes; ++c) {
+    // Begin of foreground rows
+    for (size_t y = 1; y < ysize; ++y) {
+      Image3F in(xsize, ysize);
+      FillImage(background, &in);
+      for (size_t iy = y; iy < ysize; ++iy) {
+        std::fill(in.PlaneRow(c, iy), in.PlaneRow(c, iy) + xsize, foreground);
+      }
+      images->push_back(std::move(in));
+    }
+  }
+}
+
+void GenerateVertEdges(const float background, const float foreground,
+                       std::vector<Image3F>* images) {
+  for (size_t c = 0; c < Image3F::kNumPlanes; ++c) {
+    // Begin of foreground columns
+    for (size_t x = 1; x < xsize; ++x) {
+      Image3F in(xsize, ysize);
+      FillImage(background, &in);
+      for (size_t iy = 0; iy < ysize; ++iy) {
+        float* JXL_RESTRICT row = in.PlaneRow(c, iy);
+        for (size_t ix = x; ix < xsize; ++ix) {
+          row[ix] = foreground;
+        }
+      }
+      images->push_back(std::move(in));
+    }
+  }
+}
+
+void DumpTestImage(const char* name, const Image3F& img) {
+  fprintf(stderr, "Image %s:\n", name);
+  for (size_t y = 0; y < img.ysize(); ++y) {
+    const float* row_x = img.ConstPlaneRow(0, y);
+    const float* row_y = img.ConstPlaneRow(1, y);
+    const float* row_b = img.ConstPlaneRow(2, y);
+    for (size_t x = 0; x < img.xsize(); ++x) {
+      fprintf(stderr, "%5.1f|%5.1f|%5.1f ", row_x[x], row_y[x], row_b[x]);
+    }
+    fprintf(stderr, "\n");
+  }
+  fprintf(stderr, "\n");
+}
+
+// Ensures input remains unchanged by filter - verifies the edge-preserving
+// nature of the filter because inputs are piecewise constant.
+void EnsureUnchanged(const float background, const float foreground,
+                     uint32_t epf_iters) {
+  std::vector<Image3F> images;
+  GenerateFlat(background, foreground, &images);
+  GeneratePoints(background, foreground, &images);
+  GenerateHorzEdges(background, foreground, &images);
+  GenerateVertEdges(background, foreground, &images);
+
+  CodecMetadata metadata;
+  JXL_CHECK(metadata.size.Set(xsize, ysize));
+  metadata.m.xyb_encoded = false;
+  FrameHeader frame_header(&metadata);
+  // Ensure no CT is applied
+  frame_header.color_transform = ColorTransform::kNone;
+  LoopFilter& lf = frame_header.loop_filter;
+  lf.gab = false;
+  lf.epf_iters = epf_iters;
+  FrameDimensions frame_dim = frame_header.ToFrameDimensions();
+
+  jxl::PassesDecoderState state;
+  JXL_CHECK(
+      jxl::InitializePassesSharedState(frame_header, &state.shared_storage));
+  JXL_CHECK(state.Init());
+  state.InitForAC(/*pool=*/nullptr);
+
+  JXL_CHECK(state.filter_weights.Init(lf, frame_dim));
+  FillImage(-0.5f, &state.filter_weights.sigma);
+
+  for (size_t idx_image = 0; idx_image < images.size(); ++idx_image) {
+    const Image3F& in = images[idx_image];
+    state.decoded = CopyImage(in);
+
+    ImageBundle out(&metadata.m);
+    out.SetFromImage(CopyImage(in), ColorEncoding::LinearSRGB());
+    FillImage(-99.f, out.color());  // Initialized with garbage.
+    Image3F padded = PadImageMirror(in, 2 * kBlockDim, 0);
+    // Call with `force_fir` set to true to force to apply filters to all of the
+    // input image.
+    JXL_CHECK(FinalizeFrameDecoding(&out, &state, /*pool=*/nullptr,
+                                    /*force_fir=*/true,
+                                    /*skip_blending=*/true));
+
+#if JXL_HIGH_PRECISION
+    VerifyRelativeError(in, *out.color(), 1E-3, 1E-4);
+#else
+    VerifyRelativeError(in, *out.color(), 1E-2, 1E-2);
+#endif
+    if (testing::Test::HasFatalFailure()) {
+      DumpTestImage("in", in);
+      DumpTestImage("out", *out.color());
+    }
+  }
+}
+
+}  // namespace
+
+class AdaptiveReconstructionTest : public testing::TestWithParam<uint32_t> {};
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(EPFItersGroup, AdaptiveReconstructionTest,
+                                   testing::Values(1, 2, 3),
+                                   testing::PrintToStringParamName());
+
+TEST_P(AdaptiveReconstructionTest, TestBright) {
+  EnsureUnchanged(1.0f, 128.0f, GetParam());
+}
+TEST_P(AdaptiveReconstructionTest, TestDark) {
+  EnsureUnchanged(128.0f, 1.0f, GetParam());
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc
new file mode 100644
index 0000000000..77ac9021d7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.cc
@@ -0,0 +1,111 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/alpha.h"
+
+#include <string.h>
+
+#include <algorithm>
+
+namespace jxl {
+
+static float Clamp(float x) { return std::max(std::min(1.0f, x), 0.0f); }
+
+void PerformAlphaBlending(const AlphaBlendingInputLayer& bg,
+                          const AlphaBlendingInputLayer& fg,
+                          const AlphaBlendingOutput& out, size_t num_pixels,
+                          bool alpha_is_premultiplied, bool clamp) {
+  if (alpha_is_premultiplied) {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      float fga = clamp ? Clamp(fg.a[x]) : fg.a[x];
+      out.r[x] = (fg.r[x] + bg.r[x] * (1.f - fga));
+      out.g[x] = (fg.g[x] + bg.g[x] * (1.f - fga));
+      out.b[x] = (fg.b[x] + bg.b[x] * (1.f - fga));
+      out.a[x] = (1.f - (1.f - fga) * (1.f - bg.a[x]));
+    }
+  } else {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      float fga = clamp ? Clamp(fg.a[x]) : fg.a[x];
+      const float new_a = 1.f - (1.f - fga) * (1.f - bg.a[x]);
+      const float rnew_a = (new_a > 0 ? 1.f / new_a : 0.f);
+      out.r[x] = (fg.r[x] * fga + bg.r[x] * bg.a[x] * (1.f - fga)) * rnew_a;
+      out.g[x] = (fg.g[x] * fga + bg.g[x] * bg.a[x] * (1.f - fga)) * rnew_a;
+      out.b[x] = (fg.b[x] * fga + bg.b[x] * bg.a[x] * (1.f - fga)) * rnew_a;
+      out.a[x] = new_a;
+    }
+  }
+}
+void PerformAlphaBlending(const float* bg, const float* bga, const float* fg,
+                          const float* fga, float* out, size_t num_pixels,
+                          bool alpha_is_premultiplied, bool clamp) {
+  if (bg == bga && fg == fga) {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      float fa = clamp ? fga[x] : std::min(std::max(0.0f, fga[x]), 1.0f);
+      out[x] = (1.f - (1.f - fa) * (1.f - bga[x]));
+    }
+  } else {
+    if (alpha_is_premultiplied) {
+      for (size_t x = 0; x < num_pixels; ++x) {
+        float fa = clamp ? fga[x] : Clamp(fga[x]);
+        out[x] = (fg[x] + bg[x] * (1.f - fa));
+      }
+    } else {
+      for (size_t x = 0; x < num_pixels; ++x) {
+        float fa = clamp ? fga[x] : Clamp(fga[x]);
+        const float new_a = 1.f - (1.f - fa) * (1.f - bga[x]);
+        const float rnew_a = (new_a > 0 ? 1.f / new_a : 0.f);
+        out[x] = (fg[x] * fa + bg[x] * bga[x] * (1.f - fa)) * rnew_a;
+      }
+    }
+  }
+}
+
+void PerformAlphaWeightedAdd(const float* bg, const float* fg, const float* fga,
+                             float* out, size_t num_pixels, bool clamp) {
+  if (fg == fga) {
+    memcpy(out, bg, num_pixels * sizeof(*out));
+  } else {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      out[x] = bg[x] + fg[x] * Clamp(fga[x]);
+    }
+  }
+}
+
+void PerformMulBlending(const float* bg, const float* fg, float* out,
+                        size_t num_pixels, bool clamp) {
+  if (clamp) {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      out[x] = bg[x] * Clamp(fg[x]);
+    }
+  } else {
+    for (size_t x = 0; x < num_pixels; ++x) {
+      out[x] = bg[x] * fg[x];
+    }
+  }
+}
+
+void PremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+                      float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+                      size_t num_pixels) {
+  for (size_t x = 0; x < num_pixels; ++x) {
+    const float multiplier = std::max(kSmallAlpha, a[x]);
+    r[x] *= multiplier;
+    g[x] *= multiplier;
+    b[x] *= multiplier;
+  }
+}
+
+void UnpremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+                        float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+                        size_t num_pixels) {
+  for (size_t x = 0; x < num_pixels; ++x) {
+    const float multiplier = 1.f / std::max(kSmallAlpha, a[x]);
+    r[x] *= multiplier;
+    g[x] *= multiplier;
+    b[x] *= multiplier;
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.h
new file mode 100644
index 0000000000..efb76c800f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/alpha.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ALPHA_H_
+#define LIB_JXL_ALPHA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// A very small value to avoid divisions by zero when converting to
+// unpremultiplied alpha. Page 21 of the technical introduction to OpenEXR
+// (https://www.openexr.com/documentation/TechnicalIntroduction.pdf) recommends
+// "a power of two" that is "less than half of the smallest positive 16-bit
+// floating-point value". That smallest value happens to be the denormal number
+// 2^-24, so 2^-26 should be a good choice.
+static constexpr float kSmallAlpha = 1.f / (1u << 26u);
+
+struct AlphaBlendingInputLayer {
+  const float* r;
+  const float* g;
+  const float* b;
+  const float* a;
+};
+
+struct AlphaBlendingOutput {
+  float* r;
+  float* g;
+  float* b;
+  float* a;
+};
+
+// Note: The pointers in `out` are allowed to alias those in `bg` or `fg`.
+// No pointer shall be null.
+void PerformAlphaBlending(const AlphaBlendingInputLayer& bg,
+                          const AlphaBlendingInputLayer& fg,
+                          const AlphaBlendingOutput& out, size_t num_pixels,
+                          bool alpha_is_premultiplied, bool clamp);
+// Single plane alpha blending
+void PerformAlphaBlending(const float* bg, const float* bga, const float* fg,
+                          const float* fga, float* out, size_t num_pixels,
+                          bool alpha_is_premultiplied, bool clamp);
+
+void PerformAlphaWeightedAdd(const float* bg, const float* fg, const float* fga,
+                             float* out, size_t num_pixels, bool clamp);
+
+void PerformMulBlending(const float* bg, const float* fg, float* out,
+                        size_t num_pixels, bool clamp);
+
+void PremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+                      float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+                      size_t num_pixels);
+void UnpremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+                        float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+                        size_t num_pixels);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ALPHA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/alpha_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/alpha_test.cc
new file mode 100644
index 0000000000..d90bbd37d9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/alpha_test.cc
@@ -0,0 +1,134 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/alpha.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace jxl {
+namespace {
+
+using ::testing::_;
+using ::testing::ElementsAre;
+using ::testing::FloatNear;
+
+TEST(AlphaTest, BlendingWithNonPremultiplied) {
+  const float bg_rgb[3] = {100, 110, 120};
+  const float bg_a = 180.f / 255;
+  const float fg_rgb[3] = {25, 21, 23};
+  const float fg_a = 15420.f / 65535;
+  const float fg_a2 = 2.0f;
+  float out_rgb[3];
+  float out_a;
+  PerformAlphaBlending(
+      /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+      /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a},
+      /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+      /*alpha_is_premultiplied=*/false, /*clamp=*/false);
+  EXPECT_THAT(out_rgb,
+              ElementsAre(FloatNear(77.2f, .05f), FloatNear(83.0f, .05f),
+                          FloatNear(90.6f, .05f)));
+  EXPECT_NEAR(out_a, 3174.f / 4095, 1e-5);
+  PerformAlphaBlending(
+      /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+      /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a2},
+      /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+      /*alpha_is_premultiplied=*/false, /*clamp=*/true);
+  EXPECT_THAT(out_rgb, ElementsAre(FloatNear(fg_rgb[0], .05f),
+                                   FloatNear(fg_rgb[1], .05f),
+                                   FloatNear(fg_rgb[2], .05f)));
+  EXPECT_NEAR(out_a, 1.0f, 1e-5);
+}
+
+TEST(AlphaTest, BlendingWithPremultiplied) {
+  const float bg_rgb[3] = {100, 110, 120};
+  const float bg_a = 180.f / 255;
+  const float fg_rgb[3] = {25, 21, 23};
+  const float fg_a = 15420.f / 65535;
+  const float fg_a2 = 2.0f;
+  float out_rgb[3];
+  float out_a;
+  PerformAlphaBlending(
+      /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+      /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a},
+      /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+      /*alpha_is_premultiplied=*/true, /*clamp=*/false);
+  EXPECT_THAT(out_rgb,
+              ElementsAre(FloatNear(101.5f, .05f), FloatNear(105.1f, .05f),
+                          FloatNear(114.8f, .05f)));
+  EXPECT_NEAR(out_a, 3174.f / 4095, 1e-5);
+  PerformAlphaBlending(
+      /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+      /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a2},
+      /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+      /*alpha_is_premultiplied=*/true, /*clamp=*/true);
+  EXPECT_THAT(out_rgb, ElementsAre(FloatNear(fg_rgb[0], .05f),
+                                   FloatNear(fg_rgb[1], .05f),
+                                   FloatNear(fg_rgb[2], .05f)));
+  EXPECT_NEAR(out_a, 1.0f, 1e-5);
+}
+
+TEST(AlphaTest, Mul) {
+  const float bg = 100;
+  const float fg = 25;
+  float out;
+  PerformMulBlending(&bg, &fg, &out, 1, /*clamp=*/false);
+  EXPECT_THAT(out, FloatNear(fg * bg, .05f));
+  PerformMulBlending(&bg, &fg, &out, 1, /*clamp=*/true);
+  EXPECT_THAT(out, FloatNear(bg, .05f));
+}
+
+TEST(AlphaTest, PremultiplyAndUnpremultiply) {
+  const float alpha[] = {0.f, 63.f / 255, 127.f / 255, 1.f};
+  float r[] = {120, 130, 140, 150};
+  float g[] = {124, 134, 144, 154};
+  float b[] = {127, 137, 147, 157};
+
+  PremultiplyAlpha(r, g, b, alpha, 4);
+  EXPECT_THAT(
+      r, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(130 * 63.f / 255, 1e-5f),
+                     FloatNear(140 * 127.f / 255, 1e-5f), 150));
+  EXPECT_THAT(
+      g, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(134 * 63.f / 255, 1e-5f),
+                     FloatNear(144 * 127.f / 255, 1e-5f), 154));
+  EXPECT_THAT(
+      b, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(137 * 63.f / 255, 1e-5f),
+                     FloatNear(147 * 127.f / 255, 1e-5f), 157));
+
+  UnpremultiplyAlpha(r, g, b, alpha, 4);
+  EXPECT_THAT(r, ElementsAre(FloatNear(120, 1e-4f), FloatNear(130, 1e-4f),
+                             FloatNear(140, 1e-4f), FloatNear(150, 1e-4f)));
+  EXPECT_THAT(g, ElementsAre(FloatNear(124, 1e-4f), FloatNear(134, 1e-4f),
+                             FloatNear(144, 1e-4f), FloatNear(154, 1e-4f)));
+  EXPECT_THAT(b, ElementsAre(FloatNear(127, 1e-4f), FloatNear(137, 1e-4f),
+                             FloatNear(147, 1e-4f), FloatNear(157, 1e-4f)));
+}
+
+TEST(AlphaTest, UnpremultiplyAndPremultiply) {
+  const float alpha[] = {0.f, 63.f / 255, 127.f / 255, 1.f};
+  float r[] = {50, 60, 70, 80};
+  float g[] = {54, 64, 74, 84};
+  float b[] = {57, 67, 77, 87};
+
+  UnpremultiplyAlpha(r, g, b, alpha, 4);
+  EXPECT_THAT(r, ElementsAre(_, FloatNear(60 * 255.f / 63, 1e-4f),
+                             FloatNear(70 * 255.f / 127, 1e-4f), 80));
+  EXPECT_THAT(g, ElementsAre(_, FloatNear(64 * 255.f / 63, 1e-4f),
+                             FloatNear(74 * 255.f / 127, 1e-4f), 84));
+  EXPECT_THAT(b, ElementsAre(_, FloatNear(67 * 255.f / 63, 1e-4f),
+                             FloatNear(77 * 255.f / 127, 1e-4f), 87));
+
+  PremultiplyAlpha(r, g, b, alpha, 4);
+  EXPECT_THAT(r, ElementsAre(FloatNear(50, 1e-4f), FloatNear(60, 1e-4f),
+                             FloatNear(70, 1e-4f), FloatNear(80, 1e-4f)));
+  EXPECT_THAT(g, ElementsAre(FloatNear(54, 1e-4f), FloatNear(64, 1e-4f),
+                             FloatNear(74, 1e-4f), FloatNear(84, 1e-4f)));
+  EXPECT_THAT(b, ElementsAre(FloatNear(57, 1e-4f), FloatNear(67, 1e-4f),
+                             FloatNear(77, 1e-4f), FloatNear(87, 1e-4f)));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc
new file mode 100644
index 0000000000..cc0d58b446
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.cc
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ans_common.h"
+
+#include <numeric>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+std::vector<int> CreateFlatHistogram(int length, int total_count) {
+  JXL_ASSERT(length > 0);
+  JXL_ASSERT(length <= total_count);
+  const int count = total_count / length;
+  std::vector<int> result(length, count);
+  const int rem_counts = total_count % length;
+  for (int i = 0; i < rem_counts; ++i) {
+    ++result[i];
+  }
+  return result;
+}
+
+// First, all trailing non-occuring symbols are removed from the distribution;
+// if this leaves the distribution empty, a dummy symbol with max weight is
+// added. This ensures that the resulting distribution sums to total table size.
+// Then, `entry_size` is chosen to be the largest power of two so that
+// `table_size` = ANS_TAB_SIZE/`entry_size` is at least as big as the
+// distribution size.
+// Note that each entry will only ever contain two different symbols, and
+// consecutive ranges of offsets, which allows us to use a compact
+// representation.
+// Each entry is initialized with only the (symbol=i, offset) pairs; then
+// positions for which the entry overflows (i.e. distribution[i] > entry_size)
+// or is not full are computed, and put into a stack in increasing order.
+// Missing symbols in the distribution are padded with 0 (because `table_size`
+// >= number of symbols). The `cutoff` value for each entry is initialized to
+// the number of occupied slots in that entry (i.e. `distributions[i]`). While
+// the overflowing-symbol stack is not empty (which implies that the
+// underflowing-symbol stack also is not), the top overfull and underfull
+// positions are popped from the stack; the empty slots in the underfull entry
+// are then filled with as many slots as needed from the overfull entry; such
+// slots are placed after the slots in the overfull entry, and `offsets[1]` is
+// computed accordingly. The formerly underfull entry is thus now neither
+// underfull nor overfull, and represents exactly two symbols. The overfull
+// entry might be either overfull or underfull, and is pushed into the
+// corresponding stack.
+void InitAliasTable(std::vector<int> distribution, uint32_t range,
+                    size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a) {
+  while (!distribution.empty() && distribution.back() == 0) {
+    distribution.pop_back();
+  }
+  // Ensure that a valid table is always returned, even for an empty
+  // alphabet. Otherwise, a specially-crafted stream might crash the
+  // decoder.
+  if (distribution.empty()) {
+    distribution.emplace_back(range);
+  }
+  const size_t table_size = 1 << log_alpha_size;
+#if JXL_ENABLE_ASSERT
+  int sum = std::accumulate(distribution.begin(), distribution.end(), 0);
+#endif  // JXL_ENABLE_ASSERT
+  JXL_ASSERT(static_cast<uint32_t>(sum) == range);
+  // range must be a power of two
+  JXL_ASSERT((range & (range - 1)) == 0);
+  JXL_ASSERT(distribution.size() <= table_size);
+  JXL_ASSERT(table_size <= range);
+  const uint32_t entry_size = range >> log_alpha_size;  // this is exact
+  // Special case for single-symbol distributions, that ensures that the state
+  // does not change when decoding from such a distribution. Note that, since we
+  // hardcode offset0 == 0, it is not straightforward (if at all possible) to
+  // fix the general case to produce this result.
+  for (size_t sym = 0; sym < distribution.size(); sym++) {
+    if (distribution[sym] == ANS_TAB_SIZE) {
+      for (size_t i = 0; i < table_size; i++) {
+        a[i].right_value = sym;
+        a[i].cutoff = 0;
+        a[i].offsets1 = entry_size * i;
+        a[i].freq0 = 0;
+        a[i].freq1_xor_freq0 = ANS_TAB_SIZE;
+      }
+      return;
+    }
+  }
+  std::vector<uint32_t> underfull_posn;
+  std::vector<uint32_t> overfull_posn;
+  std::vector<uint32_t> cutoffs(1 << log_alpha_size);
+  // Initialize entries.
+  for (size_t i = 0; i < distribution.size(); i++) {
+    cutoffs[i] = distribution[i];
+    if (cutoffs[i] > entry_size) {
+      overfull_posn.push_back(i);
+    } else if (cutoffs[i] < entry_size) {
+      underfull_posn.push_back(i);
+    }
+  }
+  for (uint32_t i = distribution.size(); i < table_size; i++) {
+    cutoffs[i] = 0;
+    underfull_posn.push_back(i);
+  }
+  // Reassign overflow/underflow values.
+  while (!overfull_posn.empty()) {
+    uint32_t overfull_i = overfull_posn.back();
+    overfull_posn.pop_back();
+    JXL_ASSERT(!underfull_posn.empty());
+    uint32_t underfull_i = underfull_posn.back();
+    underfull_posn.pop_back();
+    uint32_t underfull_by = entry_size - cutoffs[underfull_i];
+    cutoffs[overfull_i] -= underfull_by;
+    // overfull positions have their original symbols
+    a[underfull_i].right_value = overfull_i;
+    a[underfull_i].offsets1 = cutoffs[overfull_i];
+    // Slots in the right part of entry underfull_i were taken from the end
+    // of the symbols in entry overfull_i.
+    if (cutoffs[overfull_i] < entry_size) {
+      underfull_posn.push_back(overfull_i);
+    } else if (cutoffs[overfull_i] > entry_size) {
+      overfull_posn.push_back(overfull_i);
+    }
+  }
+  for (uint32_t i = 0; i < table_size; i++) {
+    // cutoffs[i] is properly initialized but the clang-analyzer doesn't infer
+    // it since it is partially initialized across two for-loops.
+    // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
+    if (cutoffs[i] == entry_size) {
+      a[i].right_value = i;
+      a[i].offsets1 = 0;
+      a[i].cutoff = 0;
+    } else {
+      // Note that, if cutoff is not equal to entry_size,
+      // a[i].offsets1 was initialized with (overfull cutoff) -
+      // (entry_size - a[i].cutoff). Thus, subtracting
+      // a[i].cutoff cannot make it negative.
+      a[i].offsets1 -= cutoffs[i];
+      a[i].cutoff = cutoffs[i];
+    }
+    const size_t freq0 = i < distribution.size() ? distribution[i] : 0;
+    const size_t i1 = a[i].right_value;
+    const size_t freq1 = i1 < distribution.size() ? distribution[i1] : 0;
+    a[i].freq0 = static_cast<uint16_t>(freq0);
+    a[i].freq1_xor_freq0 = static_cast<uint16_t>(freq1 ^ freq0);
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.h
new file mode 100644
index 0000000000..12ce1eff36
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common.h
@@ -0,0 +1,143 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ANS_COMMON_H_
+#define LIB_JXL_ANS_COMMON_H_
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <hwy/cache_control.h>  // Prefetch
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Returns the precision (number of bits) that should be used to store
+// a histogram count such that Log2Floor(count) == logcount.
+static JXL_INLINE uint32_t GetPopulationCountPrecision(uint32_t logcount,
+                                                       uint32_t shift) {
+  int32_t r = std::min<int>(
+      logcount, int(shift) - int((ANS_LOG_TAB_SIZE - logcount) >> 1));
+  if (r < 0) return 0;
+  return r;
+}
+
+// Returns a histogram where the counts are positive, differ by at most 1,
+// and add up to total_count. The bigger counts (if any) are at the beginning
+// of the histogram.
+std::vector<int> CreateFlatHistogram(int length, int total_count);
+
+// An alias table implements a mapping from the [0, ANS_TAB_SIZE) range into
+// the [0, ANS_MAX_ALPHABET_SIZE) range, satisfying the following conditions:
+// - each symbol occurs as many times as specified by any valid distribution
+//   of frequencies of the symbols. A valid distribution here is an array of
+//   ANS_MAX_ALPHABET_SIZE that contains numbers in the range [0, ANS_TAB_SIZE],
+//   and whose sum is ANS_TAB_SIZE.
+// - lookups can be done in constant time, and also return how many smaller
+//   input values map into the same symbol, according to some well-defined order
+//   of input values.
+// - the space used by the alias table is given by a small constant times the
+//   index of the largest symbol with nonzero probability in the distribution.
+// Each of the entries in the table covers a range of `entry_size` values in the
+// [0, ANS_TAB_SIZE) range; consecutive entries represent consecutive
+// sub-ranges. In the range covered by entry `i`, the first `cutoff` values map
+// to symbol `i`, while the others map to symbol `right_value`.
+//
+// TODO(veluca): consider making the order used for computing offsets easier to
+// define - it is currently defined by the algorithm to compute the alias table.
+// Beware of breaking the implicit assumption that symbols that come after the
+// cutoff value should have an offset at least as big as the cutoff.
+
+struct AliasTable {
+  struct Symbol {
+    size_t value;
+    size_t offset;
+    size_t freq;
+  };
+
+// Working set size matters here (~64 tables x 256 entries).
+// offsets0 is always zero (beginning of [0] side among the same symbol).
+// offsets1 is an offset of (pos >= cutoff) side decremented by cutoff.
+#pragma pack(push, 1)
+  struct Entry {
+    uint8_t cutoff;       // < kEntrySizeMinus1 when used by ANS.
+    uint8_t right_value;  // < alphabet size.
+    uint16_t freq0;
+
+    // Only used if `greater` (see Lookup)
+    uint16_t offsets1;         // <= ANS_TAB_SIZE
+    uint16_t freq1_xor_freq0;  // for branchless ternary in Lookup
+  };
+#pragma pack(pop)
+
+  // Dividing `value` by `entry_size` determines `i`, the entry which is
+  // responsible for the input. If the remainder is below `cutoff`, then the
+  // mapped symbol is `i`; since `offsets[0]` stores the number of occurrences
+  // of `i` "before" the start of this entry, the offset of the input will be
+  // `offsets[0] + remainder`. If the remainder is above cutoff, the mapped
+  // symbol is `right_value`; since `offsets[1]` stores the number of
+  // occurrences of `right_value` "before" this entry, minus the `cutoff` value,
+  // the input offset is then `remainder + offsets[1]`.
+  static JXL_INLINE Symbol Lookup(const Entry* JXL_RESTRICT table, size_t value,
+                                  size_t log_entry_size,
+                                  size_t entry_size_minus_1) {
+    const size_t i = value >> log_entry_size;
+    const size_t pos = value & entry_size_minus_1;
+
+#if JXL_BYTE_ORDER_LITTLE
+    uint64_t entry;
+    memcpy(&entry, &table[i].cutoff, sizeof(entry));
+    const size_t cutoff = entry & 0xFF;              // = MOVZX
+    const size_t right_value = (entry >> 8) & 0xFF;  // = MOVZX
+    const size_t freq0 = (entry >> 16) & 0xFFFF;
+#else
+    // Generates multiple loads with complex addressing.
+    const size_t cutoff = table[i].cutoff;
+    const size_t right_value = table[i].right_value;
+    const size_t freq0 = table[i].freq0;
+#endif
+
+    const bool greater = pos >= cutoff;
+
+#if JXL_BYTE_ORDER_LITTLE
+    const uint64_t conditional = greater ? entry : 0;  // = CMOV
+    const size_t offsets1_or_0 = (conditional >> 32) & 0xFFFF;
+    const size_t freq1_xor_freq0_or_0 = conditional >> 48;
+#else
+    const size_t offsets1_or_0 = greater ? table[i].offsets1 : 0;
+    const size_t freq1_xor_freq0_or_0 = greater ? table[i].freq1_xor_freq0 : 0;
+#endif
+
+    // WARNING: moving this code may interfere with CMOV heuristics.
+    Symbol s;
+    s.value = greater ? right_value : i;
+    s.offset = offsets1_or_0 + pos;
+    s.freq = freq0 ^ freq1_xor_freq0_or_0;  // = greater ? freq1 : freq0
+    // XOR avoids implementation-defined conversion from unsigned to signed.
+    // Alternatives considered: BEXTR is 2 cycles on HSW, SET+shift causes
+    // spills, simple ternary has a long dependency chain.
+
+    return s;
+  }
+
+  static HWY_INLINE void Prefetch(const Entry* JXL_RESTRICT table, size_t value,
+                                  size_t log_entry_size) {
+    const size_t i = value >> log_entry_size;
+    hwy::Prefetch(table + i);
+  }
+};
+
+// Computes an alias table for a given distribution.
+void InitAliasTable(std::vector<int> distribution, uint32_t range,
+                    size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ANS_COMMON_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common_test.cc
new file mode 100644
index 0000000000..1960c795ad
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_common_test.cc
@@ -0,0 +1,43 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ans_common.h"
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/ans_params.h"
+
+namespace jxl {
+namespace {
+
+void VerifyAliasDistribution(const std::vector<int>& distribution,
+                             uint32_t range) {
+  constexpr size_t log_alpha_size = 8;
+  AliasTable::Entry table[1 << log_alpha_size];
+  InitAliasTable(distribution, range, log_alpha_size, table);
+  std::vector<std::vector<uint32_t>> offsets(distribution.size());
+  for (uint32_t i = 0; i < range; i++) {
+    AliasTable::Symbol s = AliasTable::Lookup(
+        table, i, ANS_LOG_TAB_SIZE - 8, (1 << (ANS_LOG_TAB_SIZE - 8)) - 1);
+    offsets[s.value].push_back(s.offset);
+  }
+  for (uint32_t i = 0; i < distribution.size(); i++) {
+    ASSERT_EQ(distribution[i], offsets[i].size());
+    std::sort(offsets[i].begin(), offsets[i].end());
+    for (uint32_t j = 0; j < offsets[i].size(); j++) {
+      ASSERT_EQ(offsets[i][j], j);
+    }
+  }
+}
+
+TEST(ANSCommonTest, AliasDistributionSmoke) {
+  VerifyAliasDistribution({ANS_TAB_SIZE / 2, ANS_TAB_SIZE / 2}, ANS_TAB_SIZE);
+  VerifyAliasDistribution({ANS_TAB_SIZE}, ANS_TAB_SIZE);
+  VerifyAliasDistribution({0, 0, 0, ANS_TAB_SIZE, 0}, ANS_TAB_SIZE);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_params.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_params.h
new file mode 100644
index 0000000000..4bbc284c0b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_params.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ANS_PARAMS_H_
+#define LIB_JXL_ANS_PARAMS_H_
+
+// Common parameters that are needed for both the ANS entropy encoding and
+// decoding methods.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace jxl {
+
+// TODO(veluca): decide if 12 is the best constant here (valid range is up to
+// 16). This requires recomputing the Huffman tables in {enc,dec}_ans.cc
+// 14 gives a 0.2% improvement at d1 and makes d8 slightly worse. This is
+// likely not worth the increase in encoder complexity.
+#define ANS_LOG_TAB_SIZE 12u
+#define ANS_TAB_SIZE (1 << ANS_LOG_TAB_SIZE)
+#define ANS_TAB_MASK (ANS_TAB_SIZE - 1)
+
+// Largest possible symbol to be encoded by either ANS or prefix coding.
+#define PREFIX_MAX_ALPHABET_SIZE 4096
+#define ANS_MAX_ALPHABET_SIZE 256
+
+// Max number of bits for prefix coding.
+#define PREFIX_MAX_BITS 15
+
+#define ANS_SIGNATURE 0x13  // Initial state, used as CRC.
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ANS_PARAMS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_test.cc
new file mode 100644
index 0000000000..808c5f3aaa
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/ans_test.cc
@@ -0,0 +1,280 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <random>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+namespace {
+
+void RoundtripTestcase(int n_histograms, int alphabet_size,
+                       const std::vector<Token>& input_values) {
+  constexpr uint16_t kMagic1 = 0x9e33;
+  constexpr uint16_t kMagic2 = 0x8b04;
+
+  BitWriter writer;
+  // Space for magic bytes.
+  BitWriter::Allotment allotment_magic1(&writer, 16);
+  writer.Write(16, kMagic1);
+  ReclaimAndCharge(&writer, &allotment_magic1, 0, nullptr);
+
+  std::vector<uint8_t> context_map;
+  EntropyEncodingData codes;
+  std::vector<std::vector<Token>> input_values_vec;
+  input_values_vec.push_back(input_values);
+
+  BuildAndEncodeHistograms(HistogramParams(), n_histograms, input_values_vec,
+                           &codes, &context_map, &writer, 0, nullptr);
+  WriteTokens(input_values_vec[0], codes, context_map, &writer, 0, nullptr);
+
+  // Magic bytes + padding
+  BitWriter::Allotment allotment_magic2(&writer, 24);
+  writer.Write(16, kMagic2);
+  writer.ZeroPadToByte();
+  ReclaimAndCharge(&writer, &allotment_magic2, 0, nullptr);
+
+  // We do not truncate the output. Reading past the end reads out zeroes
+  // anyway.
+  BitReader br(writer.GetSpan());
+
+  ASSERT_EQ(br.ReadBits(16), kMagic1);
+
+  std::vector<uint8_t> dec_context_map;
+  ANSCode decoded_codes;
+  ASSERT_TRUE(
+      DecodeHistograms(&br, n_histograms, &decoded_codes, &dec_context_map));
+  ASSERT_EQ(dec_context_map, context_map);
+  ANSSymbolReader reader(&decoded_codes, &br);
+
+  for (const Token& symbol : input_values) {
+    uint32_t read_symbol =
+        reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+    ASSERT_EQ(read_symbol, symbol.value);
+  }
+  ASSERT_TRUE(reader.CheckANSFinalState());
+
+  ASSERT_EQ(br.ReadBits(16), kMagic2);
+  EXPECT_TRUE(br.Close());
+}
+
+TEST(ANSTest, EmptyRoundtrip) {
+  RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE, std::vector<Token>());
+}
+
+TEST(ANSTest, SingleSymbolRoundtrip) {
+  for (uint32_t i = 0; i < ANS_MAX_ALPHABET_SIZE; i++) {
+    RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE, {{0, i}});
+  }
+  for (uint32_t i = 0; i < ANS_MAX_ALPHABET_SIZE; i++) {
+    RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE,
+                      std::vector<Token>(1024, {0, i}));
+  }
+}
+
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+constexpr size_t kReps = 10;
+#else
+constexpr size_t kReps = 100;
+#endif
+
+void RoundtripRandomStream(int alphabet_size, size_t reps = kReps,
+                           size_t num = 1 << 18) {
+  constexpr int kNumHistograms = 3;
+  std::mt19937_64 rng;
+  for (size_t i = 0; i < reps; i++) {
+    std::vector<Token> symbols;
+    for (size_t j = 0; j < num; j++) {
+      int context = std::uniform_int_distribution<>(0, kNumHistograms - 1)(rng);
+      int value = std::uniform_int_distribution<>(0, alphabet_size - 1)(rng);
+      symbols.emplace_back(context, value);
+    }
+    RoundtripTestcase(kNumHistograms, alphabet_size, symbols);
+  }
+}
+
+void RoundtripRandomUnbalancedStream(int alphabet_size) {
+  constexpr int kNumHistograms = 3;
+  constexpr int kPrecision = 1 << 10;
+  std::mt19937_64 rng;
+  for (int i = 0; i < 100; i++) {
+    std::vector<int> distributions[kNumHistograms];
+    for (int j = 0; j < kNumHistograms; j++) {
+      distributions[j].resize(kPrecision);
+      int symbol = 0;
+      int remaining = 1;
+      for (int k = 0; k < kPrecision; k++) {
+        if (remaining == 0) {
+          if (symbol < alphabet_size - 1) symbol++;
+          // There is no meaning behind this distribution: it's anything that
+          // will create a nonuniform distribution and won't have too few
+          // symbols usually. Also we want different distributions we get to be
+          // sufficiently dissimilar.
+          remaining =
+              std::uniform_int_distribution<>(0, (kPrecision - k) / 1)(rng);
+        }
+        distributions[j][k] = symbol;
+        remaining--;
+      }
+    }
+    std::vector<Token> symbols;
+    for (int j = 0; j < 1 << 18; j++) {
+      int context = std::uniform_int_distribution<>(0, kNumHistograms - 1)(rng);
+      int value = distributions[context][std::uniform_int_distribution<>(
+          0, kPrecision - 1)(rng)];
+      symbols.emplace_back(context, value);
+    }
+    RoundtripTestcase(kNumHistograms + 1, alphabet_size, symbols);
+  }
+}
+
+TEST(ANSTest, RandomStreamRoundtrip3Small) { RoundtripRandomStream(3, 1, 16); }
+
+TEST(ANSTest, RandomStreamRoundtrip3) { RoundtripRandomStream(3); }
+
+TEST(ANSTest, RandomStreamRoundtripBig) {
+  RoundtripRandomStream(ANS_MAX_ALPHABET_SIZE);
+}
+
+TEST(ANSTest, RandomUnbalancedStreamRoundtrip3) {
+  RoundtripRandomUnbalancedStream(3);
+}
+
+TEST(ANSTest, RandomUnbalancedStreamRoundtripBig) {
+  RoundtripRandomUnbalancedStream(ANS_MAX_ALPHABET_SIZE);
+}
+
+TEST(ANSTest, UintConfigRoundtrip) {
+  for (size_t log_alpha_size = 5; log_alpha_size <= 8; log_alpha_size++) {
+    std::vector<HybridUintConfig> uint_config, uint_config_dec;
+    for (size_t i = 0; i < log_alpha_size; i++) {
+      for (size_t j = 0; j <= i; j++) {
+        for (size_t k = 0; k <= i - j; k++) {
+          uint_config.emplace_back(i, j, k);
+        }
+      }
+    }
+    uint_config.emplace_back(log_alpha_size, 0, 0);
+    uint_config_dec.resize(uint_config.size());
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, 10 * uint_config.size());
+    EncodeUintConfigs(uint_config, &writer, log_alpha_size);
+    ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+    writer.ZeroPadToByte();
+    BitReader br(writer.GetSpan());
+    EXPECT_TRUE(DecodeUintConfigs(log_alpha_size, &uint_config_dec, &br));
+    EXPECT_TRUE(br.Close());
+    for (size_t i = 0; i < uint_config.size(); i++) {
+      EXPECT_EQ(uint_config[i].split_token, uint_config_dec[i].split_token);
+      EXPECT_EQ(uint_config[i].msb_in_token, uint_config_dec[i].msb_in_token);
+      EXPECT_EQ(uint_config[i].lsb_in_token, uint_config_dec[i].lsb_in_token);
+    }
+  }
+}
+
+void TestCheckpointing(bool ans, bool lz77) {
+  std::vector<std::vector<Token>> input_values(1);
+  for (size_t i = 0; i < 1024; i++) {
+    input_values[0].push_back(Token(0, i % 4));
+  }
+  // up to lz77 window size.
+  for (size_t i = 0; i < (1 << 20) - 1022; i++) {
+    input_values[0].push_back(Token(0, (i % 5) + 4));
+  }
+  // Ensure that when the window wraps around, new values are different.
+  input_values[0].push_back(Token(0, 0));
+  for (size_t i = 0; i < 1024; i++) {
+    input_values[0].push_back(Token(0, i % 4));
+  }
+
+  std::vector<uint8_t> context_map;
+  EntropyEncodingData codes;
+  HistogramParams params;
+  params.lz77_method = lz77 ? HistogramParams::LZ77Method::kLZ77
+                            : HistogramParams::LZ77Method::kNone;
+  params.force_huffman = !ans;
+
+  BitWriter writer;
+  {
+    auto input_values_copy = input_values;
+    BuildAndEncodeHistograms(params, 1, input_values_copy, &codes, &context_map,
+                             &writer, 0, nullptr);
+    WriteTokens(input_values_copy[0], codes, context_map, &writer, 0, nullptr);
+    writer.ZeroPadToByte();
+  }
+
+  // We do not truncate the output. Reading past the end reads out zeroes
+  // anyway.
+  BitReader br(writer.GetSpan());
+  Status status = true;
+  {
+    BitReaderScopedCloser bc(&br, &status);
+
+    std::vector<uint8_t> dec_context_map;
+    ANSCode decoded_codes;
+    ASSERT_TRUE(DecodeHistograms(&br, 1, &decoded_codes, &dec_context_map));
+    ASSERT_EQ(dec_context_map, context_map);
+    ANSSymbolReader reader(&decoded_codes, &br);
+
+    ANSSymbolReader::Checkpoint checkpoint;
+    size_t br_pos;
+    constexpr size_t kInterval = ANSSymbolReader::kMaxCheckpointInterval - 2;
+    for (size_t i = 0; i < input_values[0].size(); i++) {
+      if (i % kInterval == 0 && i > 0) {
+        reader.Restore(checkpoint);
+        ASSERT_TRUE(br.Close());
+        br = BitReader(writer.GetSpan());
+        br.SkipBits(br_pos);
+        for (size_t j = i - kInterval; j < i; j++) {
+          Token symbol = input_values[0][j];
+          uint32_t read_symbol =
+              reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+          ASSERT_EQ(read_symbol, symbol.value) << "j = " << j;
+        }
+      }
+      if (i % kInterval == 0) {
+        reader.Save(&checkpoint);
+        br_pos = br.TotalBitsConsumed();
+      }
+      Token symbol = input_values[0][i];
+      uint32_t read_symbol =
+          reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+      ASSERT_EQ(read_symbol, symbol.value) << "i = " << i;
+    }
+    ASSERT_TRUE(reader.CheckANSFinalState());
+  }
+  EXPECT_TRUE(status);
+}
+
+TEST(ANSTest, TestCheckpointingANS) {
+  TestCheckpointing(/*ans=*/true, /*lz77=*/false);
+}
+
+TEST(ANSTest, TestCheckpointingPrefix) {
+  TestCheckpointing(/*ans=*/false, /*lz77=*/false);
+}
+
+TEST(ANSTest, TestCheckpointingANSLZ77) {
+  TestCheckpointing(/*ans=*/true, /*lz77=*/true);
+}
+
+TEST(ANSTest, TestCheckpointingPrefixLZ77) {
+  TestCheckpointing(/*ans=*/false, /*lz77=*/true);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc
new file mode 100644
index 0000000000..e83140d50b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.cc
@@ -0,0 +1,96 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/aux_out.h"
+
+#include <stdint.h>
+
+#include <numeric>  // accumulate
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+void AuxOut::Print(size_t num_inputs) const {
+  if (num_inputs == 0) return;
+
+  LayerTotals all_layers;
+  for (size_t i = 0; i < layers.size(); ++i) {
+    all_layers.Assimilate(layers[i]);
+  }
+
+  printf("Average butteraugli iters: %10.2f\n",
+         num_butteraugli_iters * 1.0 / num_inputs);
+
+  for (size_t i = 0; i < layers.size(); ++i) {
+    if (layers[i].total_bits != 0) {
+      printf("Total layer bits %-10s\t", LayerName(i));
+      printf("%10f%%", 100.0 * layers[i].total_bits / all_layers.total_bits);
+      layers[i].Print(num_inputs);
+    }
+  }
+  printf("Total image size           ");
+  all_layers.Print(num_inputs);
+
+  const uint32_t dc_pred_total =
+      std::accumulate(dc_pred_usage.begin(), dc_pred_usage.end(), 0u);
+  const uint32_t dc_pred_total_xb =
+      std::accumulate(dc_pred_usage_xb.begin(), dc_pred_usage_xb.end(), 0u);
+  if (dc_pred_total + dc_pred_total_xb != 0) {
+    printf("\nDC pred     Y                XB:\n");
+    for (size_t i = 0; i < dc_pred_usage.size(); ++i) {
+      printf("  %6u (%5.2f%%)    %6u (%5.2f%%)\n", dc_pred_usage[i],
+             100.0 * dc_pred_usage[i] / dc_pred_total, dc_pred_usage_xb[i],
+             100.0 * dc_pred_usage_xb[i] / dc_pred_total_xb);
+    }
+  }
+
+  size_t total_blocks = 0;
+  size_t total_positions = 0;
+  if (total_blocks != 0 && total_positions != 0) {
+    printf("\n\t\t  Blocks\t\tPositions\t\t\tBlocks/Position\n");
+    printf(" Total:\t\t    %7zu\t\t     %7zu \t\t\t%10f%%\n\n", total_blocks,
+           total_positions, 100.0 * total_blocks / total_positions);
+  }
+}
+
+void AuxOut::DumpCoeffImage(const char* label,
+                            const Image3S& coeff_image) const {
+  JXL_ASSERT(coeff_image.xsize() % 64 == 0);
+  Image3S reshuffled(coeff_image.xsize() / 8, coeff_image.ysize() * 8);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < coeff_image.ysize(); y++) {
+      for (size_t x = 0; x < coeff_image.xsize(); x += 64) {
+        for (size_t i = 0; i < 64; i++) {
+          reshuffled.PlaneRow(c, 8 * y + i / 8)[x / 8 + i % 8] =
+              coeff_image.PlaneRow(c, y)[x + i];
+        }
+      }
+    }
+  }
+  DumpImage(label, reshuffled);
+}
+
+void ReclaimAndCharge(BitWriter* JXL_RESTRICT writer,
+                      BitWriter::Allotment* JXL_RESTRICT allotment,
+                      size_t layer, AuxOut* JXL_RESTRICT aux_out) {
+  size_t used_bits, unused_bits;
+  allotment->PrivateReclaim(writer, &used_bits, &unused_bits);
+
+#if 0
+  printf("Layer %s bits: max %zu used %zu unused %zu\n", LayerName(layer),
+         allotment->MaxBits(), used_bits, unused_bits);
+#endif
+
+  // This may be a nested call with aux_out == null. Whenever we know that
+  // aux_out is null, we can call ReclaimUnused directly.
+  if (aux_out != nullptr) {
+    aux_out->layers[layer].total_bits += used_bits;
+    aux_out->layers[layer].histogram_bits += allotment->HistogramBits();
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.h
new file mode 100644
index 0000000000..5baf5bbc28
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out.h
@@ -0,0 +1,311 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AUX_OUT_H_
+#define LIB_JXL_AUX_OUT_H_
+
+// Optional output information for debugging and analyzing size usage.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <array>
+#include <functional>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/jxl_inspection.h"
+
+namespace jxl {
+
+// For LayerName and AuxOut::layers[] index. Order does not matter.
+enum {
+  kLayerHeader = 0,
+  kLayerTOC,
+  kLayerNoise,
+  kLayerQuant,
+  kLayerDequantTables,
+  kLayerOrder,
+  kLayerDC,
+  kLayerControlFields,
+  kLayerAC,
+  kLayerACTokens,
+  kLayerDictionary,
+  kLayerDots,
+  kLayerSplines,
+  kLayerLossless,
+  kLayerModularGlobal,
+  kLayerModularDcGroup,
+  kLayerModularAcGroup,
+  kLayerModularTree,
+  kLayerAlpha,
+  kLayerDepth,
+  kLayerExtraChannels,
+  kNumImageLayers
+};
+
+static inline const char* LayerName(size_t layer) {
+  switch (layer) {
+    case kLayerHeader:
+      return "headers";
+    case kLayerTOC:
+      return "TOC";
+    case kLayerNoise:
+      return "noise";
+    case kLayerQuant:
+      return "quantizer";
+    case kLayerDequantTables:
+      return "quant tables";
+    case kLayerOrder:
+      return "order";
+    case kLayerDC:
+      return "DC";
+    case kLayerControlFields:
+      return "ControlFields";
+    case kLayerAC:
+      return "AC";
+    case kLayerACTokens:
+      return "ACTokens";
+    case kLayerDictionary:
+      return "dictionary";
+    case kLayerDots:
+      return "dots";
+    case kLayerSplines:
+      return "splines";
+    case kLayerLossless:
+      return "lossless";
+    case kLayerModularGlobal:
+      return "modularGlobal";
+    case kLayerModularDcGroup:
+      return "modularDcGroup";
+    case kLayerModularAcGroup:
+      return "modularAcGroup";
+    case kLayerModularTree:
+      return "modularTree";
+    case kLayerAlpha:
+      return "alpha";
+    case kLayerDepth:
+      return "depth";
+    case kLayerExtraChannels:
+      return "extra channels";
+    default:
+      JXL_ABORT("Invalid layer %zu\n", layer);
+  }
+}
+
+// Statistics gathered during compression or decompression.
+struct AuxOut {
+ private:
+  struct LayerTotals {
+    void Assimilate(const LayerTotals& victim) {
+      num_clustered_histograms += victim.num_clustered_histograms;
+      histogram_bits += victim.histogram_bits;
+      extra_bits += victim.extra_bits;
+      total_bits += victim.total_bits;
+      clustered_entropy += victim.clustered_entropy;
+    }
+    void Print(size_t num_inputs) const {
+      printf("%10zd", total_bits);
+      if (histogram_bits != 0) {
+        printf("   [c/i:%6.2f | hst:%8zd | ex:%8zd | h+c+e:%12.3f",
+               num_clustered_histograms * 1.0 / num_inputs, histogram_bits >> 3,
+               extra_bits >> 3,
+               (histogram_bits + clustered_entropy + extra_bits) / 8.0);
+        printf("]");
+      }
+      printf("\n");
+    }
+    size_t num_clustered_histograms = 0;
+    size_t extra_bits = 0;
+
+    // Set via BitsWritten below
+    size_t histogram_bits = 0;
+    size_t total_bits = 0;
+
+    double clustered_entropy = 0.0;
+  };
+
+ public:
+  AuxOut() = default;
+  AuxOut(const AuxOut&) = default;
+
+  void Assimilate(const AuxOut& victim) {
+    for (size_t i = 0; i < layers.size(); ++i) {
+      layers[i].Assimilate(victim.layers[i]);
+    }
+    num_blocks += victim.num_blocks;
+    num_dct2_blocks += victim.num_dct2_blocks;
+    num_dct4_blocks += victim.num_dct4_blocks;
+    num_dct4x8_blocks += victim.num_dct4x8_blocks;
+    num_afv_blocks += victim.num_afv_blocks;
+    num_dct8_blocks += victim.num_dct8_blocks;
+    num_dct8x16_blocks += victim.num_dct8x16_blocks;
+    num_dct8x32_blocks += victim.num_dct8x32_blocks;
+    num_dct16_blocks += victim.num_dct16_blocks;
+    num_dct16x32_blocks += victim.num_dct16x32_blocks;
+    num_dct32_blocks += victim.num_dct32_blocks;
+    num_butteraugli_iters += victim.num_butteraugli_iters;
+    for (size_t i = 0; i < dc_pred_usage.size(); ++i) {
+      dc_pred_usage[i] += victim.dc_pred_usage[i];
+      dc_pred_usage_xb[i] += victim.dc_pred_usage_xb[i];
+    }
+  }
+
+  void Print(size_t num_inputs) const;
+
+  template <typename T>
+  void DumpImage(const char* label, const Image3<T>& image) const {
+    if (!dump_image) return;
+    if (debug_prefix.empty()) return;
+    std::ostringstream pathname;
+    pathname << debug_prefix << label << ".png";
+    CodecInOut io;
+    // Always save to 16-bit png.
+    io.metadata.m.SetUintSamples(16);
+    io.metadata.m.color_encoding = ColorEncoding::SRGB();
+    io.SetFromImage(ConvertToFloat(image), io.metadata.m.color_encoding);
+    (void)dump_image(io, pathname.str());
+  }
+  template <typename T>
+  void DumpImage(const char* label, const Plane<T>& image) {
+    DumpImage(label,
+              Image3<T>(CopyImage(image), CopyImage(image), CopyImage(image)));
+  }
+
+  template <typename T>
+  void DumpXybImage(const char* label, const Image3<T>& image) const {
+    if (!dump_image) return;
+    if (debug_prefix.empty()) return;
+    std::ostringstream pathname;
+    pathname << debug_prefix << label << ".png";
+
+    Image3F linear(image.xsize(), image.ysize());
+    OpsinParams opsin_params;
+    opsin_params.Init(kDefaultIntensityTarget);
+    OpsinToLinear(image, Rect(linear), nullptr, &linear, opsin_params);
+
+    CodecInOut io;
+    io.metadata.m.SetUintSamples(16);
+    io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+    io.SetFromImage(std::move(linear), io.metadata.m.color_encoding);
+
+    (void)dump_image(io, pathname.str());
+  }
+
+  // Normalizes all the channels to range 0-1, creating a false-color image
+  // which allows seeing the information from non-RGB channels in an RGB debug
+  // image.
+  template <typename T>
+  void DumpImageNormalized(const char* label, const Image3<T>& image) const {
+    std::array<T, 3> min;
+    std::array<T, 3> max;
+    Image3MinMax(image, &min, &max);
+    Image3B normalized(image.xsize(), image.ysize());
+    for (size_t c = 0; c < 3; ++c) {
+      float mul = min[c] == max[c] ? 0 : (1.0f / (max[c] - min[c]));
+      for (size_t y = 0; y < image.ysize(); ++y) {
+        const T* JXL_RESTRICT row_in = image.ConstPlaneRow(c, y);
+        uint8_t* JXL_RESTRICT row_out = normalized.PlaneRow(c, y);
+        for (size_t x = 0; x < image.xsize(); ++x) {
+          row_out[x] = static_cast<uint8_t>((row_in[x] - min[c]) * mul);
+        }
+      }
+    }
+    DumpImage(label, normalized);
+  }
+
+  template <typename T>
+  void DumpPlaneNormalized(const char* label, const Plane<T>& image) const {
+    T min;
+    T max;
+    ImageMinMax(image, &min, &max);
+    Image3B normalized(image.xsize(), image.ysize());
+    for (size_t c = 0; c < 3; ++c) {
+      float mul = min == max ? 0 : (255.0f / (max - min));
+      for (size_t y = 0; y < image.ysize(); ++y) {
+        const T* JXL_RESTRICT row_in = image.ConstRow(y);
+        uint8_t* JXL_RESTRICT row_out = normalized.PlaneRow(c, y);
+        for (size_t x = 0; x < image.xsize(); ++x) {
+          row_out[x] = static_cast<uint8_t>((row_in[x] - min) * mul);
+        }
+      }
+    }
+    DumpImage(label, normalized);
+  }
+
+  // This dumps coefficients as a 16-bit PNG with coefficients of a block placed
+  // in the area that would contain that block in a normal image. To view the
+  // resulting image manually, rescale intensities by using:
+  // $ convert -auto-level IMAGE.PNG - | display -
+  void DumpCoeffImage(const char* label, const Image3S& coeff_image) const;
+
+  void SetInspectorImage3F(const jxl::InspectorImage3F& inspector) {
+    inspector_image3f_ = inspector;
+  }
+
+  // Allows hooking intermediate data inspection into various places of the
+  // processing pipeline. Returns true iff processing should proceed.
+  bool InspectImage3F(const char* label, const Image3F& image) {
+    if (inspector_image3f_ != nullptr) {
+      return inspector_image3f_(label, image);
+    }
+    return true;
+  }
+
+  std::array<LayerTotals, kNumImageLayers> layers;
+  size_t num_blocks = 0;
+
+  // Number of blocks that use larger DCT (set by ac_strategy).
+  size_t num_dct2_blocks = 0;
+  size_t num_dct4_blocks = 0;
+  size_t num_dct4x8_blocks = 0;
+  size_t num_afv_blocks = 0;
+  size_t num_dct8_blocks = 0;
+  size_t num_dct8x16_blocks = 0;
+  size_t num_dct8x32_blocks = 0;
+  size_t num_dct16_blocks = 0;
+  size_t num_dct16x32_blocks = 0;
+  size_t num_dct32_blocks = 0;
+
+  std::array<uint32_t, 8> dc_pred_usage = {0};
+  std::array<uint32_t, 8> dc_pred_usage_xb = {0};
+
+  int num_butteraugli_iters = 0;
+
+  // If not empty, additional debugging information (e.g. debug images) is
+  // saved in files with this prefix.
+  std::string debug_prefix;
+
+  // By how much the decoded image was downsampled relative to the encoded
+  // image.
+  size_t downsampling = 1;
+
+  jxl::InspectorImage3F inspector_image3f_;
+
+  std::function<Status(const CodecInOut&, const std::string&)> dump_image =
+      nullptr;
+};
+
+// Used to skip image creation if they won't be written to debug directory.
+static inline bool WantDebugOutput(const AuxOut* aux_out) {
+  // Need valid pointer and filename.
+  return aux_out != nullptr && !aux_out->debug_prefix.empty();
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_AUX_OUT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out_fwd.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out_fwd.h
new file mode 100644
index 0000000000..29b31ad87a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/aux_out_fwd.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AUX_OUT_FWD_H_
+#define LIB_JXL_AUX_OUT_FWD_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Helper function that ensures the `bits_written` are charged to `layer` in
+// `aux_out`. Example usage:
+//   BitWriter::Allotment allotment(&writer, max_bits);
+//   writer.Write(..); writer.Write(..);
+//   ReclaimAndCharge(&writer, &allotment, layer, aux_out);
+void ReclaimAndCharge(BitWriter* JXL_RESTRICT writer,
+                      BitWriter::Allotment* JXL_RESTRICT allotment,
+                      size_t layer, AuxOut* JXL_RESTRICT aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_AUX_OUT_FWD_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/arch_macros.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/arch_macros.h
new file mode 100644
index 0000000000..a98301915e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/arch_macros.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_ARCH_MACROS_H_
+#define LIB_JXL_BASE_ARCH_MACROS_H_
+
+// Defines the JXL_ARCH_* macros.
+
+namespace jxl {
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define JXL_ARCH_X64 1
+#else
+#define JXL_ARCH_X64 0
+#endif
+
+#if defined(__powerpc64__) || defined(_M_PPC)
+#define JXL_ARCH_PPC 1
+#else
+#define JXL_ARCH_PPC 0
+#endif
+
+#if defined(__aarch64__) || defined(__arm__)
+#define JXL_ARCH_ARM 1
+#else
+#define JXL_ARCH_ARM 0
+#endif
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_ARCH_MACROS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/bits.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/bits.h
new file mode 100644
index 0000000000..9f86118e72
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/bits.h
@@ -0,0 +1,147 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_BITS_H_
+#define LIB_JXL_BASE_BITS_H_
+
+// Specialized instructions for processing register-sized bit arrays.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+#if JXL_COMPILER_MSVC
+#include <intrin.h>
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace jxl {
+
+// Empty struct used as a size tag type.
+template <size_t N>
+struct SizeTag {};
+
+template <typename T>
+constexpr bool IsSigned() {
+  return T(0) > T(-1);
+}
+
+// Undefined results for x == 0.
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsAboveMS1Bit_Nonzero(SizeTag<4> /* tag */, const uint32_t x) {
+  JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+  unsigned long index;
+  _BitScanReverse(&index, x);
+  return 31 - index;
+#else
+  return static_cast<size_t>(__builtin_clz(x));
+#endif
+}
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsAboveMS1Bit_Nonzero(SizeTag<8> /* tag */, const uint64_t x) {
+  JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+#if JXL_ARCH_X64
+  unsigned long index;
+  _BitScanReverse64(&index, x);
+  return 63 - index;
+#else   // JXL_ARCH_X64
+  // _BitScanReverse64 not available
+  uint32_t msb = static_cast<uint32_t>(x >> 32u);
+  unsigned long index;
+  if (msb == 0) {
+    uint32_t lsb = static_cast<uint32_t>(x & 0xFFFFFFFF);
+    _BitScanReverse(&index, lsb);
+    return 63 - index;
+  } else {
+    _BitScanReverse(&index, msb);
+    return 31 - index;
+  }
+#endif  // JXL_ARCH_X64
+#else
+  return static_cast<size_t>(__builtin_clzll(x));
+#endif
+}
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsAboveMS1Bit_Nonzero(const T x) {
+  static_assert(!IsSigned<T>(), "Num0BitsAboveMS1Bit_Nonzero: use unsigned");
+  return Num0BitsAboveMS1Bit_Nonzero(SizeTag<sizeof(T)>(), x);
+}
+
+// Undefined results for x == 0.
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsBelowLS1Bit_Nonzero(SizeTag<4> /* tag */, const uint32_t x) {
+  JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+  unsigned long index;
+  _BitScanForward(&index, x);
+  return index;
+#else
+  return static_cast<size_t>(__builtin_ctz(x));
+#endif
+}
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsBelowLS1Bit_Nonzero(SizeTag<8> /* tag */, const uint64_t x) {
+  JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+#if JXL_ARCH_X64
+  unsigned long index;
+  _BitScanForward64(&index, x);
+  return index;
+#else   // JXL_ARCH_64
+  // _BitScanForward64 not available
+  uint32_t lsb = static_cast<uint32_t>(x & 0xFFFFFFFF);
+  unsigned long index;
+  if (lsb == 0) {
+    uint32_t msb = static_cast<uint32_t>(x >> 32u);
+    _BitScanForward(&index, msb);
+    return 32 + index;
+  } else {
+    _BitScanForward(&index, lsb);
+    return index;
+  }
+#endif  // JXL_ARCH_X64
+#else
+  return static_cast<size_t>(__builtin_ctzll(x));
+#endif
+}
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsBelowLS1Bit_Nonzero(T x) {
+  static_assert(!IsSigned<T>(), "Num0BitsBelowLS1Bit_Nonzero: use unsigned");
+  return Num0BitsBelowLS1Bit_Nonzero(SizeTag<sizeof(T)>(), x);
+}
+
+// Returns bit width for x == 0.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsAboveMS1Bit(const T x) {
+  return (x == 0) ? sizeof(T) * 8 : Num0BitsAboveMS1Bit_Nonzero(x);
+}
+
+// Returns bit width for x == 0.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsBelowLS1Bit(const T x) {
+  return (x == 0) ? sizeof(T) * 8 : Num0BitsBelowLS1Bit_Nonzero(x);
+}
+
+// Returns base-2 logarithm, rounded down.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t FloorLog2Nonzero(const T x) {
+  return (sizeof(T) * 8 - 1) ^ Num0BitsAboveMS1Bit_Nonzero(x);
+}
+
+// Returns base-2 logarithm, rounded up.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t CeilLog2Nonzero(const T x) {
+  const size_t floor_log2 = FloorLog2Nonzero(x);
+  if ((x & (x - 1)) == 0) return floor_log2;  // power of two
+  return floor_log2 + 1;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_BITS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/byte_order.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/byte_order.h
new file mode 100644
index 0000000000..f27017d661
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/byte_order.h
@@ -0,0 +1,283 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_BYTE_ORDER_H_
+#define LIB_JXL_BASE_BYTE_ORDER_H_
+
+#include <stdint.h>
+#include <string.h>  // memcpy
+
+#include "lib/jxl/base/compiler_specific.h"
+
+#if JXL_COMPILER_MSVC
+#include <intrin.h>  // _byteswap_*
+#endif
+
+#if (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#define JXL_BYTE_ORDER_LITTLE 1
+#else
+// This means that we don't know that the byte order is little endian, in
+// this case we use endian-neutral code that works for both little- and
+// big-endian.
+#define JXL_BYTE_ORDER_LITTLE 0
+#endif
+
+// Returns whether the system is little-endian (least-significant byte first).
+#if JXL_BYTE_ORDER_LITTLE
+static constexpr bool IsLittleEndian() { return true; }
+#else
+static inline bool IsLittleEndian() {
+  const uint32_t multibyte = 1;
+  uint8_t byte;
+  memcpy(&byte, &multibyte, 1);
+  return byte == 1;
+}
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_BSWAP32(x) _byteswap_ulong(x)
+#define JXL_BSWAP64(x) _byteswap_uint64(x)
+#else
+#define JXL_BSWAP32(x) __builtin_bswap32(x)
+#define JXL_BSWAP64(x) __builtin_bswap64(x)
+#endif
+
+static JXL_INLINE uint32_t LoadBE16(const uint8_t* p) {
+  const uint32_t byte1 = p[0];
+  const uint32_t byte0 = p[1];
+  return (byte1 << 8) | byte0;
+}
+
+static JXL_INLINE uint32_t LoadLE16(const uint8_t* p) {
+  const uint32_t byte0 = p[0];
+  const uint32_t byte1 = p[1];
+  return (byte1 << 8) | byte0;
+}
+
+static JXL_INLINE uint32_t LoadBE24(const uint8_t* p) {
+  const uint32_t byte2 = p[0];
+  const uint32_t byte1 = p[1];
+  const uint32_t byte0 = p[2];
+  return (byte2 << 16) | (byte1 << 8) | byte0;
+}
+
+static JXL_INLINE uint32_t LoadLE24(const uint8_t* p) {
+  const uint32_t byte0 = p[0];
+  const uint32_t byte1 = p[1];
+  const uint32_t byte2 = p[2];
+  return (byte2 << 16) | (byte1 << 8) | byte0;
+}
+
+static JXL_INLINE uint32_t LoadBE32(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  uint32_t big;
+  memcpy(&big, p, 4);
+  return JXL_BSWAP32(big);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint32_t byte3 = p[0];
+  const uint32_t byte2 = p[1];
+  const uint32_t byte1 = p[2];
+  const uint32_t byte0 = p[3];
+  return (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
+#endif
+}
+
+static JXL_INLINE uint64_t LoadBE64(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  uint64_t big;
+  memcpy(&big, p, 8);
+  return JXL_BSWAP64(big);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint64_t byte7 = p[0];
+  const uint64_t byte6 = p[1];
+  const uint64_t byte5 = p[2];
+  const uint64_t byte4 = p[3];
+  const uint64_t byte3 = p[4];
+  const uint64_t byte2 = p[5];
+  const uint64_t byte1 = p[6];
+  const uint64_t byte0 = p[7];
+  return (byte7 << 56ull) | (byte6 << 48ull) | (byte5 << 40ull) |
+         (byte4 << 32ull) | (byte3 << 24ull) | (byte2 << 16ull) |
+         (byte1 << 8ull) | byte0;
+#endif
+}
+
+static JXL_INLINE uint32_t LoadLE32(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  uint32_t little;
+  memcpy(&little, p, 4);
+  return little;
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint32_t byte0 = p[0];
+  const uint32_t byte1 = p[1];
+  const uint32_t byte2 = p[2];
+  const uint32_t byte3 = p[3];
+  return (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
+#endif
+}
+
+static JXL_INLINE uint64_t LoadLE64(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  uint64_t little;
+  memcpy(&little, p, 8);
+  return little;
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint64_t byte0 = p[0];
+  const uint64_t byte1 = p[1];
+  const uint64_t byte2 = p[2];
+  const uint64_t byte3 = p[3];
+  const uint64_t byte4 = p[4];
+  const uint64_t byte5 = p[5];
+  const uint64_t byte6 = p[6];
+  const uint64_t byte7 = p[7];
+  return (byte7 << 56) | (byte6 << 48) | (byte5 << 40) | (byte4 << 32) |
+         (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
+#endif
+}
+
+static JXL_INLINE void StoreBE16(const uint32_t native, uint8_t* p) {
+  p[0] = (native >> 8) & 0xFF;
+  p[1] = native & 0xFF;
+}
+
+static JXL_INLINE void StoreLE16(const uint32_t native, uint8_t* p) {
+  p[1] = (native >> 8) & 0xFF;
+  p[0] = native & 0xFF;
+}
+
+static JXL_INLINE void StoreBE24(const uint32_t native, uint8_t* p) {
+  p[0] = (native >> 16) & 0xFF;
+  p[1] = (native >> 8) & 0xFF;
+  p[2] = native & 0xFF;
+}
+
+static JXL_INLINE void StoreLE24(const uint32_t native, uint8_t* p) {
+  p[2] = (native >> 24) & 0xFF;
+  p[1] = (native >> 8) & 0xFF;
+  p[0] = native & 0xFF;
+}
+
+static JXL_INLINE void StoreBE32(const uint32_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  const uint32_t big = JXL_BSWAP32(native);
+  memcpy(p, &big, 4);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[0] = native >> 24;
+  p[1] = (native >> 16) & 0xFF;
+  p[2] = (native >> 8) & 0xFF;
+  p[3] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE void StoreBE64(const uint64_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  const uint64_t big = JXL_BSWAP64(native);
+  memcpy(p, &big, 8);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[0] = native >> 56ull;
+  p[1] = (native >> 48ull) & 0xFF;
+  p[2] = (native >> 40ull) & 0xFF;
+  p[3] = (native >> 32ull) & 0xFF;
+  p[4] = (native >> 24ull) & 0xFF;
+  p[5] = (native >> 16ull) & 0xFF;
+  p[6] = (native >> 8ull) & 0xFF;
+  p[7] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE void StoreLE32(const uint32_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  const uint32_t little = native;
+  memcpy(p, &little, 4);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[3] = native >> 24;
+  p[2] = (native >> 16) & 0xFF;
+  p[1] = (native >> 8) & 0xFF;
+  p[0] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE void StoreLE64(const uint64_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+  const uint64_t little = native;
+  memcpy(p, &little, 8);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[7] = native >> 56;
+  p[6] = (native >> 48) & 0xFF;
+  p[5] = (native >> 40) & 0xFF;
+  p[4] = (native >> 32) & 0xFF;
+  p[3] = (native >> 24) & 0xFF;
+  p[2] = (native >> 16) & 0xFF;
+  p[1] = (native >> 8) & 0xFF;
+  p[0] = native & 0xFF;
+#endif
+}
+
+// Big/Little Endian order.
+struct OrderBE {};
+struct OrderLE {};
+
+// Wrappers for calling from generic code.
+static JXL_INLINE void Store16(OrderBE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreBE16(native, p);
+}
+
+static JXL_INLINE void Store16(OrderLE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreLE16(native, p);
+}
+
+static JXL_INLINE void Store24(OrderBE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreBE24(native, p);
+}
+
+static JXL_INLINE void Store24(OrderLE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreLE24(native, p);
+}
+static JXL_INLINE void Store32(OrderBE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreBE32(native, p);
+}
+
+static JXL_INLINE void Store32(OrderLE /*tag*/, const uint32_t native,
+                               uint8_t* p) {
+  return StoreLE32(native, p);
+}
+
+static JXL_INLINE uint32_t Load16(OrderBE /*tag*/, const uint8_t* p) {
+  return LoadBE16(p);
+}
+
+static JXL_INLINE uint32_t Load16(OrderLE /*tag*/, const uint8_t* p) {
+  return LoadLE16(p);
+}
+
+static JXL_INLINE uint32_t Load24(OrderBE /*tag*/, const uint8_t* p) {
+  return LoadBE24(p);
+}
+
+static JXL_INLINE uint32_t Load24(OrderLE /*tag*/, const uint8_t* p) {
+  return LoadLE24(p);
+}
+static JXL_INLINE uint32_t Load32(OrderBE /*tag*/, const uint8_t* p) {
+  return LoadBE32(p);
+}
+
+static JXL_INLINE uint32_t Load32(OrderLE /*tag*/, const uint8_t* p) {
+  return LoadLE32(p);
+}
+
+#endif  // LIB_JXL_BASE_BYTE_ORDER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc
new file mode 100644
index 0000000000..35ee2aee06
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.cc
@@ -0,0 +1,154 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/cache_aligned.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// Disabled: slower than malloc + alignment.
+#define JXL_USE_MMAP 0
+
+#if JXL_USE_MMAP
+#include <sys/mman.h>
+#endif
+
+#include <algorithm>  // std::max
+#include <atomic>
+#include <hwy/base.h>  // kMaxVectorSize
+#include <limits>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace {
+
+#pragma pack(push, 1)
+struct AllocationHeader {
+  void* allocated;
+  size_t allocated_size;
+  uint8_t left_padding[hwy::kMaxVectorSize];
+};
+#pragma pack(pop)
+
+std::atomic<uint64_t> num_allocations{0};
+std::atomic<uint64_t> bytes_in_use{0};
+std::atomic<uint64_t> max_bytes_in_use{0};
+
+}  // namespace
+
+// Avoids linker errors in pre-C++17 builds.
+constexpr size_t CacheAligned::kPointerSize;
+constexpr size_t CacheAligned::kCacheLineSize;
+constexpr size_t CacheAligned::kAlignment;
+constexpr size_t CacheAligned::kAlias;
+
+void CacheAligned::PrintStats() {
+  printf("Allocations: %zu (max bytes in use: %E)\n",
+         size_t(num_allocations.load(std::memory_order_relaxed)),
+         double(max_bytes_in_use.load(std::memory_order_relaxed)));
+}
+
+size_t CacheAligned::NextOffset() {
+  static std::atomic<uint32_t> next{0};
+  constexpr uint32_t kGroups = CacheAligned::kAlias / CacheAligned::kAlignment;
+  const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
+  return CacheAligned::kAlignment * group;
+}
+
+void* CacheAligned::Allocate(const size_t payload_size, size_t offset) {
+  JXL_ASSERT(payload_size <= std::numeric_limits<size_t>::max() / 2);
+  JXL_ASSERT((offset % kAlignment == 0) && offset <= kAlias);
+
+  // What: | misalign | unused | AllocationHeader |payload
+  // Size: |<= kAlias | offset |                  |payload_size
+  //       ^allocated.^aligned.^header............^payload
+  // The header must immediately precede payload, which must remain aligned.
+  // To avoid wasting space, the header resides at the end of `unused`,
+  // which therefore cannot be empty (offset == 0).
+  if (offset == 0) {
+    offset = kAlignment;  // = round_up(sizeof(AllocationHeader), kAlignment)
+    static_assert(sizeof(AllocationHeader) <= kAlignment, "Else: round up");
+  }
+
+#if JXL_USE_MMAP
+  const size_t allocated_size = offset + payload_size;
+  const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE;
+  void* allocated =
+      mmap(nullptr, allocated_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+  if (allocated == MAP_FAILED) return nullptr;
+  const uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated);
+#else
+  const size_t allocated_size = kAlias + offset + payload_size;
+  void* allocated = malloc(allocated_size);
+  if (allocated == nullptr) return nullptr;
+  // Always round up even if already aligned - we already asked for kAlias
+  // extra bytes and there's no way to give them back.
+  uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias;
+  static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2");
+  static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias");
+  aligned &= ~(kAlias - 1);
+#endif
+
+#if 0
+  // No effect.
+  uintptr_t page_aligned = reinterpret_cast<uintptr_t>(allocated);
+  page_aligned &= ~(4096 - 1);
+  if (madvise(reinterpret_cast<void*>(page_aligned), allocated_size,
+              MADV_WILLNEED) != 0) {
+    JXL_NOTIFY_ERROR("madvise failed");
+  }
+#elif 0
+  // INCREASES both first and subsequent decode times.
+  if (mlock(allocated, allocated_size) != 0) {
+    JXL_NOTIFY_ERROR("mlock failed");
+  }
+#endif
+
+  // Update statistics (#allocations and max bytes in use)
+  num_allocations.fetch_add(1, std::memory_order_relaxed);
+  const uint64_t prev_bytes =
+      bytes_in_use.fetch_add(allocated_size, std::memory_order_acq_rel);
+  uint64_t expected_max = max_bytes_in_use.load(std::memory_order_acquire);
+  for (;;) {
+    const uint64_t desired =
+        std::max(expected_max, prev_bytes + allocated_size);
+    if (max_bytes_in_use.compare_exchange_strong(expected_max, desired,
+                                                 std::memory_order_acq_rel)) {
+      break;
+    }
+  }
+
+  const uintptr_t payload = aligned + offset;  // still aligned
+
+  // Stash `allocated` and payload_size inside header for use by Free().
+  AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
+  header->allocated = allocated;
+  header->allocated_size = allocated_size;
+
+  return JXL_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), 64);
+}
+
+void CacheAligned::Free(const void* aligned_pointer) {
+  if (aligned_pointer == nullptr) {
+    return;
+  }
+  const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+  JXL_ASSERT(payload % kAlignment == 0);
+  const AllocationHeader* header =
+      reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+  // Subtract (2's complement negation).
+  bytes_in_use.fetch_add(~header->allocated_size + 1,
+                         std::memory_order_acq_rel);
+
+#if JXL_USE_MMAP
+  munmap(header->allocated, header->allocated_size);
+#else
+  free(header->allocated);
+#endif
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.h
new file mode 100644
index 0000000000..e57df14837
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/cache_aligned.h
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_CACHE_ALIGNED_H_
+#define LIB_JXL_BASE_CACHE_ALIGNED_H_
+
+// Memory allocator with support for alignment + misalignment.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// Functions that depend on the cache line size.
+class CacheAligned {
+ public:
+  static void PrintStats();
+
+  static constexpr size_t kPointerSize = sizeof(void*);
+  static constexpr size_t kCacheLineSize = 64;
+  // To avoid RFOs, match L2 fill size (pairs of lines).
+  static constexpr size_t kAlignment = 2 * kCacheLineSize;
+  // Minimum multiple for which cache set conflicts and/or loads blocked by
+  // preceding stores can occur.
+  static constexpr size_t kAlias = 2048;
+
+  // Returns a 'random' (cyclical) offset suitable for Allocate.
+  static size_t NextOffset();
+
+  // Returns null or memory whose address is congruent to `offset` (mod kAlias).
+  // This reduces cache conflicts and load/store stalls, especially with large
+  // allocations that would otherwise have similar alignments. At least
+  // `payload_size` (which can be zero) bytes will be accessible.
+  static void* Allocate(size_t payload_size, size_t offset);
+
+  static void* Allocate(const size_t payload_size) {
+    return Allocate(payload_size, NextOffset());
+  }
+
+  static void Free(const void* aligned_pointer);
+};
+
+// Avoids the need for a function pointer (deleter) in CacheAlignedUniquePtr.
+struct CacheAlignedDeleter {
+  void operator()(uint8_t* aligned_pointer) const {
+    return CacheAligned::Free(aligned_pointer);
+  }
+};
+
+using CacheAlignedUniquePtr = std::unique_ptr<uint8_t[], CacheAlignedDeleter>;
+
+// Does not invoke constructors.
+static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes) {
+  return CacheAlignedUniquePtr(
+      static_cast<uint8_t*>(CacheAligned::Allocate(bytes)),
+      CacheAlignedDeleter());
+}
+
+static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes,
+                                                  const size_t offset) {
+  return CacheAlignedUniquePtr(
+      static_cast<uint8_t*>(CacheAligned::Allocate(bytes, offset)),
+      CacheAlignedDeleter());
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_CACHE_ALIGNED_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/compiler_specific.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/compiler_specific.h
new file mode 100644
index 0000000000..b279fa0c82
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/compiler_specific.h
@@ -0,0 +1,153 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_COMPILER_SPECIFIC_H_
+#define LIB_JXL_BASE_COMPILER_SPECIFIC_H_
+
+// Macros for compiler version + nonstandard keywords, e.g. __builtin_expect.
+
+#include <stdint.h>
+
+// #if is shorter and safer than #ifdef. *_VERSION are zero if not detected,
+// otherwise 100 * major + minor version. Note that other packages check for
+// #ifdef COMPILER_MSVC, so we cannot use that same name.
+
+#ifdef _MSC_VER
+#define JXL_COMPILER_MSVC _MSC_VER
+#else
+#define JXL_COMPILER_MSVC 0
+#endif
+
+#ifdef __GNUC__
+#define JXL_COMPILER_GCC (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#define JXL_COMPILER_GCC 0
+#endif
+
+#ifdef __clang__
+#define JXL_COMPILER_CLANG (__clang_major__ * 100 + __clang_minor__)
+// Clang pretends to be GCC for compatibility.
+#undef JXL_COMPILER_GCC
+#define JXL_COMPILER_GCC 0
+#else
+#define JXL_COMPILER_CLANG 0
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_RESTRICT __restrict
+#elif JXL_COMPILER_GCC || JXL_COMPILER_CLANG
+#define JXL_RESTRICT __restrict__
+#else
+#define JXL_RESTRICT
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_INLINE __forceinline
+#define JXL_NOINLINE __declspec(noinline)
+#else
+#define JXL_INLINE inline __attribute__((always_inline))
+#define JXL_NOINLINE __attribute__((noinline))
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_NORETURN __declspec(noreturn)
+#elif JXL_COMPILER_GCC || JXL_COMPILER_CLANG
+#define JXL_NORETURN __attribute__((noreturn))
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_UNREACHABLE __assume(false)
+#elif JXL_COMPILER_CLANG || JXL_COMPILER_GCC >= 405
+#define JXL_UNREACHABLE __builtin_unreachable()
+#else
+#define JXL_UNREACHABLE
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_MAYBE_UNUSED
+#else
+// Encountered "attribute list cannot appear here" when using the C++17
+// [[maybe_unused]], so only use the old style attribute for now.
+#define JXL_MAYBE_UNUSED __attribute__((unused))
+#endif
+
+#if JXL_COMPILER_MSVC
+// Unsupported, __assume is not the same.
+#define JXL_LIKELY(expr) expr
+#define JXL_UNLIKELY(expr) expr
+#else
+#define JXL_LIKELY(expr) __builtin_expect(!!(expr), 1)
+#define JXL_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+#endif
+
+#if JXL_COMPILER_MSVC
+#include <intrin.h>
+
+#pragma intrinsic(_ReadWriteBarrier)
+#define JXL_COMPILER_FENCE _ReadWriteBarrier()
+#elif JXL_COMPILER_GCC || JXL_COMPILER_CLANG
+#define JXL_COMPILER_FENCE asm volatile("" : : : "memory")
+#else
+#define JXL_COMPILER_FENCE
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* JXL_RESTRICT aligned = (float*)JXL_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if JXL_COMPILER_CLANG
+// Early versions of Clang did not support __builtin_assume_aligned.
+#define JXL_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned)
+#elif JXL_COMPILER_GCC
+#define JXL_HAS_ASSUME_ALIGNED 1
+#else
+#define JXL_HAS_ASSUME_ALIGNED 0
+#endif
+
+#if JXL_HAS_ASSUME_ALIGNED
+#define JXL_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
+#else
+#define JXL_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */
+#endif
+
+#ifdef __has_attribute
+#define JXL_HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define JXL_HAVE_ATTRIBUTE(x) 0
+#endif
+
+// Raises warnings if the function return value is unused. Should appear as the
+// first part of a function definition/declaration.
+#if JXL_HAVE_ATTRIBUTE(nodiscard)
+#define JXL_MUST_USE_RESULT [[nodiscard]]
+#elif JXL_COMPILER_CLANG && JXL_HAVE_ATTRIBUTE(warn_unused_result)
+#define JXL_MUST_USE_RESULT __attribute__((warn_unused_result))
+#else
+#define JXL_MUST_USE_RESULT
+#endif
+
+// Disable certain -fsanitize flags for functions that are expected to include
+// things like unsigned integer overflow. For example use in the function
+// declaration JXL_NO_SANITIZE("unsigned-integer-overflow") to silence unsigned
+// integer overflow ubsan messages.
+#if JXL_COMPILER_CLANG && JXL_HAVE_ATTRIBUTE(no_sanitize)
+#define JXL_NO_SANITIZE(X) __attribute__((no_sanitize(X)))
+#else
+#define JXL_NO_SANITIZE(X)
+#endif
+
+#if JXL_HAVE_ATTRIBUTE(__format__)
+#define JXL_FORMAT(idx_fmt, idx_arg) \
+  __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
+#else
+#define JXL_FORMAT(idx_fmt, idx_arg)
+#endif
+
+#if JXL_COMPILER_MSVC
+using ssize_t = intptr_t;
+#endif
+
+#endif  // LIB_JXL_BASE_COMPILER_SPECIFIC_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc
new file mode 100644
index 0000000000..20a911255c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.cc
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/data_parallel.h"
+
+namespace jxl {
+
+// static
+JxlParallelRetCode ThreadPool::SequentialRunnerStatic(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+  JxlParallelRetCode init_ret = (*init)(jpegxl_opaque, 1);
+  if (init_ret != 0) return init_ret;
+
+  for (uint32_t i = start_range; i < end_range; i++) {
+    (*func)(jpegxl_opaque, i, 0);
+  }
+  return 0;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.h
new file mode 100644
index 0000000000..8982974009
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/data_parallel.h
@@ -0,0 +1,155 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_DATA_PARALLEL_H_
+#define LIB_JXL_BASE_DATA_PARALLEL_H_
+
+// Portable, low-overhead C++11 ThreadPool alternative to OpenMP for
+// data-parallel computations.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "jxl/parallel_runner.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+class ThreadPool {
+ public:
+  // Use this type as an InitFunc to skip the initialization step in Run().
+  // When this is used the return value of Run() is always true and does not
+  // need to be checked.
+  struct SkipInit {};
+
+  ThreadPool(JxlParallelRunner runner, void* runner_opaque)
+      : runner_(runner ? runner : &ThreadPool::SequentialRunnerStatic),
+        runner_opaque_(runner ? runner_opaque : static_cast<void*>(this)) {}
+
+  ThreadPool(const ThreadPool&) = delete;
+  ThreadPool& operator&(const ThreadPool&) = delete;
+
+  // Runs init_func(num_threads) followed by data_func(task, thread) on worker
+  // thread(s) for every task in [begin, end). init_func() must return a Status
+  // indicating whether the initialization succeeded.
+  // "thread" is an integer smaller than num_threads.
+  // Not thread-safe - no two calls to Run may overlap.
+  // Subsequent calls will reuse the same threads.
+  //
+  // Precondition: begin <= end.
+  template <class InitFunc, class DataFunc>
+  Status Run(uint32_t begin, uint32_t end, const InitFunc& init_func,
+             const DataFunc& data_func, const char* caller = "") {
+    JXL_ASSERT(begin <= end);
+    if (begin == end) return true;
+    RunCallState<InitFunc, DataFunc> call_state(init_func, data_func);
+    // The runner_ uses the C convention and returns 0 in case of error, so we
+    // convert it to an Status.
+    return (*runner_)(runner_opaque_, static_cast<void*>(&call_state),
+                      &call_state.CallInitFunc, &call_state.CallDataFunc, begin,
+                      end) == 0;
+  }
+
+  // Specialization that returns bool when SkipInit is used.
+  template <class DataFunc>
+  bool Run(uint32_t begin, uint32_t end, const SkipInit /* tag */,
+           const DataFunc& data_func, const char* caller = "") {
+    return Run(begin, end, ReturnTrueInit, data_func, caller);
+  }
+
+ private:
+  static Status ReturnTrueInit(size_t num_threads) { return true; }
+
+  // class holding the state of a Run() call to pass to the runner_ as an
+  // opaque_jpegxl pointer.
+  template <class InitFunc, class DataFunc>
+  class RunCallState final {
+   public:
+    RunCallState(const InitFunc& init_func, const DataFunc& data_func)
+        : init_func_(init_func), data_func_(data_func) {}
+
+    // JxlParallelRunInit interface.
+    static int CallInitFunc(void* jpegxl_opaque, size_t num_threads) {
+      const auto* self =
+          static_cast<RunCallState<InitFunc, DataFunc>*>(jpegxl_opaque);
+      // Returns -1 when the internal init function returns false Status to
+      // indicate an error.
+      return self->init_func_(num_threads) ? 0 : -1;
+    }
+
+    // JxlParallelRunFunction interface.
+    static void CallDataFunc(void* jpegxl_opaque, uint32_t value,
+                             size_t thread_id) {
+      const auto* self =
+          static_cast<RunCallState<InitFunc, DataFunc>*>(jpegxl_opaque);
+      return self->data_func_(value, thread_id);
+    }
+
+   private:
+    const InitFunc& init_func_;
+    const DataFunc& data_func_;
+  };
+
+  // Default JxlParallelRunner used when no runner is provided by the
+  // caller. This runner doesn't use any threading and thread_id is always 0.
+  static JxlParallelRetCode SequentialRunnerStatic(
+      void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+      JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+  // The caller supplied runner function and its opaque void*.
+  const JxlParallelRunner runner_;
+  void* const runner_opaque_;
+};
+
+// TODO(deymo): Convert the return value to a Status when not using SkipInit.
+template <class InitFunc, class DataFunc>
+bool RunOnPool(ThreadPool* pool, const uint32_t begin, const uint32_t end,
+               const InitFunc& init_func, const DataFunc& data_func,
+               const char* caller) {
+  Status ret = true;
+  if (pool == nullptr) {
+    ThreadPool default_pool(nullptr, nullptr);
+    ret = default_pool.Run(begin, end, init_func, data_func, caller);
+  } else {
+    ret = pool->Run(begin, end, init_func, data_func, caller);
+  }
+  return ret;
+}
+
+// Accelerates multiple unsigned 32-bit divisions with the same divisor by
+// precomputing a multiplier. This is useful for splitting a contiguous range of
+// indices (the task index) into 2D indices. Exhaustively tested on dividends
+// up to 4M with non-power of two divisors up to 2K.
+class Divider {
+ public:
+  // "d" is the divisor (what to divide by).
+  explicit Divider(const uint32_t d) : shift_(FloorLog2Nonzero(d)) {
+    // Power of two divisors (including 1) are not supported because it is more
+    // efficient to special-case them at a higher level.
+    JXL_ASSERT((d & (d - 1)) != 0);
+
+    // ceil_log2 = floor_log2 + 1 because we ruled out powers of two above.
+    const uint64_t next_pow2 = 1ULL << (shift_ + 1);
+
+    mul_ = ((next_pow2 - d) << 32) / d + 1;
+  }
+
+  // "n" is the numerator (what is being divided).
+  inline uint32_t operator()(const uint32_t n) const {
+    // Algorithm from "Division by Invariant Integers using Multiplication".
+    // Its "sh1" is hardcoded to 1 because we don't need to handle d=1.
+    const uint32_t hi = (uint64_t(mul_) * n) >> 32;
+    return (hi + ((n - hi) >> 1)) >> shift_;
+  }
+
+ private:
+  uint32_t mul_;
+  const int shift_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_DATA_PARALLEL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/descriptive_statistics.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/descriptive_statistics.cc
new file mode 100644
index 0000000000..9303f2c776
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/descriptive_statistics.cc
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/descriptive_statistics.h"
+
+#include <stdio.h>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+void Stats::Assimilate(const Stats& other) {
+  const int64_t total_n = n_ + other.n_;
+  if (total_n == 0) return;  // Nothing to do; prevents div by zero.
+
+  min_ = std::min(min_, other.min_);
+  max_ = std::max(max_, other.max_);
+
+  product_ *= other.product_;
+
+  const double product_n = n_ * other.n_;
+  const double n2 = n_ * n_;
+  const double other_n2 = other.n_ * other.n_;
+  // Warning: multiplying int64 can overflow here.
+  const double total_n2 = static_cast<double>(total_n) * total_n;
+  const double total_n3 = static_cast<double>(total_n2) * total_n;
+  // Precompute reciprocal for speed - used at least twice.
+  const double inv_total_n = 1.0 / total_n;
+  const double inv_total_n2 = 1.0 / total_n2;
+
+  const double delta = other.m1_ - m1_;
+  const double delta2 = delta * delta;
+  const double delta3 = delta * delta2;
+  const double delta4 = delta2 * delta2;
+
+  m1_ = (n_ * m1_ + other.n_ * other.m1_) * inv_total_n;
+
+  const double new_m2 = m2_ + other.m2_ + delta2 * product_n * inv_total_n;
+
+  const double new_m3 =
+      m3_ + other.m3_ + delta3 * product_n * (n_ - other.n_) * inv_total_n2 +
+      3.0 * delta * (n_ * other.m2_ - other.n_ * m2_) * inv_total_n;
+
+  m4_ += other.m4_ +
+         delta4 * product_n * (n2 - product_n + other_n2) / total_n3 +
+         6.0 * delta2 * (n2 * other.m2_ + other_n2 * m2_) * inv_total_n2 +
+         4.0 * delta * (n_ * other.m3_ - other.n_ * m3_) * inv_total_n;
+
+  m2_ = new_m2;
+  m3_ = new_m3;
+  n_ = total_n;
+}
+
+std::string Stats::ToString(int exclude) const {
+  if (Count() == 0) return std::string("(none)");
+
+  char buf[300];
+  size_t pos = 0;
+  int ret;  // snprintf - bytes written or negative for error.
+
+  if ((exclude & kNoCount) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "Count=%6zu ",
+                   static_cast<size_t>(Count()));
+    JXL_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  if ((exclude & kNoMeanSD) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "Mean=%9.6f SD=%8.5f ", Mean(),
+                   StandardDeviation());
+    JXL_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  if ((exclude & kNoMinMax) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "Min=%8.5f Max=%8.5f ", Min(),
+                   Max());
+    JXL_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  if ((exclude & kNoSkewKurt) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "Skew=%5.2f Kurt=%7.2f ",
+                   Skewness(), Kurtosis());
+    JXL_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  if ((exclude & kNoGeomean) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "GeoMean=%9.6f ",
+                   GeometricMean());
+    JXL_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  JXL_ASSERT(pos < sizeof(buf));
+  return buf;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/descriptive_statistics.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/descriptive_statistics.h
new file mode 100644
index 0000000000..0d1e4850e1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/descriptive_statistics.h
@@ -0,0 +1,126 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_DESCRIPTIVE_STATISTICS_H_
+#define LIB_JXL_BASE_DESCRIPTIVE_STATISTICS_H_
+
+// For analyzing the range/distribution of scalars.
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <cmath>
+#include <string>
+
+namespace jxl {
+
+// Descriptive statistics of a variable (4 moments).
+class Stats {
+ public:
+  void Notify(const float x) {
+    ++n_;
+
+    min_ = std::min(min_, x);
+    max_ = std::max(max_, x);
+
+    product_ *= x;
+
+    // Online moments. Reference: https://goo.gl/9ha694
+    const double d = x - m1_;
+    const double d_div_n = d / n_;
+    const double d2n1_div_n = d * (n_ - 1) * d_div_n;
+    const int64_t n_poly = n_ * n_ - 3 * n_ + 3;
+    m1_ += d_div_n;
+    m4_ += d_div_n * (d_div_n * (d2n1_div_n * n_poly + 6.0 * m2_) - 4.0 * m3_);
+    m3_ += d_div_n * (d2n1_div_n * (n_ - 2) - 3.0 * m2_);
+    m2_ += d2n1_div_n;
+  }
+
+  void Assimilate(const Stats& other);
+
+  int64_t Count() const { return n_; }
+
+  float Min() const { return min_; }
+  float Max() const { return max_; }
+
+  double GeometricMean() const {
+    return n_ == 0 ? 0.0 : pow(product_, 1.0 / n_);
+  }
+
+  double Mean() const { return m1_; }
+  // Same as Mu2. Assumes n_ is large.
+  double SampleVariance() const {
+    return n_ == 0 ? 0.0 : m2_ / static_cast<int>(n_);
+  }
+  // Unbiased estimator for population variance even for smaller n_.
+  double Variance() const {
+    if (n_ == 0) return 0.0;
+    if (n_ == 1) return m2_;
+    return m2_ / static_cast<int>(n_ - 1);
+  }
+  double StandardDeviation() const { return std::sqrt(Variance()); }
+  // Near zero for normal distributions; if positive on a unimodal distribution,
+  // the right tail is fatter. Assumes n_ is large.
+  double SampleSkewness() const {
+    if (std::abs(m2_) < 1E-7) return 0.0;
+    return m3_ * std::sqrt(static_cast<double>(n_)) / std::pow(m2_, 1.5);
+  }
+  // Corrected for bias (same as Wikipedia and Minitab but not Excel).
+  double Skewness() const {
+    if (n_ == 0) return 0.0;
+    const double biased = SampleSkewness();
+    const double r = (n_ - 1.0) / n_;
+    return biased * std::pow(r, 1.5);
+  }
+  // Near zero for normal distributions; smaller values indicate fewer/smaller
+  // outliers and larger indicates more/larger outliers. Assumes n_ is large.
+  double SampleKurtosis() const {
+    if (std::abs(m2_) < 1E-7) return 0.0;
+    return m4_ * n_ / (m2_ * m2_);
+  }
+  // Corrected for bias (same as Wikipedia and Minitab but not Excel).
+  double Kurtosis() const {
+    if (n_ == 0) return 0.0;
+    const double biased = SampleKurtosis();
+    const double r = (n_ - 1.0) / n_;
+    return biased * r * r;
+  }
+
+  // Central moments, useful for "method of moments"-based parameter estimation
+  // of a mixture of two Gaussians. Assumes Count() != 0.
+  double Mu1() const { return m1_; }
+  double Mu2() const { return m2_ / static_cast<int>(n_); }
+  double Mu3() const { return m3_ / static_cast<int>(n_); }
+  double Mu4() const { return m4_ / static_cast<int>(n_); }
+
+  // Which statistics to EXCLUDE in ToString
+  enum {
+    kNoCount = 1,
+    kNoMeanSD = 2,
+    kNoMinMax = 4,
+    kNoSkewKurt = 8,
+    kNoGeomean = 16
+  };
+
+  std::string ToString(int exclude = 0) const;
+
+ private:
+  int64_t n_ = 0;  // signed for faster conversion + safe subtraction
+
+  float min_ = 1E30f;
+  float max_ = -1E30f;
+
+  double product_ = 1.0;
+
+  // Moments
+  double m1_ = 0.0;
+  double m2_ = 0.0;
+  double m3_ = 0.0;
+  double m4_ = 0.0;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_DESCRIPTIVE_STATISTICS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/file_io.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/file_io.h
new file mode 100644
index 0000000000..2ecf854e1b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/file_io.h
@@ -0,0 +1,112 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_FILE_IO_H_
+#define LIB_JXL_BASE_FILE_IO_H_
+
+// Helper functions for reading/writing files.
+
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include <string>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Returns extension including the dot, or empty string if none. Assumes
+// filename is not a hidden file (e.g. ".bashrc"). May be called with a pathname
+// if the filename contains a dot and/or no other path component does.
+static inline std::string Extension(const std::string& filename) {
+  const size_t pos = filename.rfind('.');
+  if (pos == std::string::npos) return std::string();
+  return filename.substr(pos);
+}
+
+// RAII, ensures files are closed even when returning early.
+class FileWrapper {
+ public:
+  FileWrapper(const FileWrapper& other) = delete;
+  FileWrapper& operator=(const FileWrapper& other) = delete;
+
+  explicit FileWrapper(const std::string& pathname, const char* mode)
+      : file_(fopen(pathname.c_str(), mode)) {}
+
+  ~FileWrapper() {
+    if (file_ != nullptr) {
+      const int err = fclose(file_);
+      JXL_CHECK(err == 0);
+    }
+  }
+
+  // We intend to use FileWrapper as a replacement of FILE.
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  operator FILE*() const { return file_; }
+
+ private:
+  FILE* const file_;
+};
+
+template <typename ContainerType>
+static inline Status ReadFile(const std::string& pathname,
+                              ContainerType* JXL_RESTRICT bytes) {
+  FileWrapper f(pathname, "rb");
+  if (f == nullptr) return JXL_FAILURE("Failed to open file for reading");
+
+    // Ensure it is a regular file
+#ifdef _WIN32
+  struct __stat64 s = {};
+  const int err = _stat64(pathname.c_str(), &s);
+  const bool is_file = (s.st_mode & S_IFREG) != 0;
+#else
+  struct stat s = {};
+  const int err = stat(pathname.c_str(), &s);
+  const bool is_file = S_ISREG(s.st_mode);
+#endif
+  if (err != 0) return JXL_FAILURE("Failed to obtain file status");
+  if (!is_file) return JXL_FAILURE("Not a file");
+
+  // Get size of file in bytes
+  const int64_t size = s.st_size;
+  if (size <= 0) return JXL_FAILURE("Empty or invalid file size");
+  bytes->resize(static_cast<size_t>(size));
+
+  size_t pos = 0;
+  while (pos < bytes->size()) {
+    // Needed in case ContainerType is std::string, whose data() is const.
+    char* bytes_writable = reinterpret_cast<char*>(&(*bytes)[0]);
+    const size_t bytes_read =
+        fread(bytes_writable + pos, 1, bytes->size() - pos, f);
+    if (bytes_read == 0) return JXL_FAILURE("Failed to read");
+    pos += bytes_read;
+  }
+  JXL_ASSERT(pos == bytes->size());
+  return true;
+}
+
+template <typename ContainerType>
+static inline Status WriteFile(const ContainerType& bytes,
+                               const std::string& pathname) {
+  FileWrapper f(pathname, "wb");
+  if (f == nullptr) return JXL_FAILURE("Failed to open file for writing");
+
+  size_t pos = 0;
+  while (pos < bytes.size()) {
+    const size_t bytes_written =
+        fwrite(bytes.data() + pos, 1, bytes.size() - pos, f);
+    if (bytes_written == 0) return JXL_FAILURE("Failed to write");
+    pos += bytes_written;
+  }
+  JXL_ASSERT(pos == bytes.size());
+
+  return true;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_FILE_IO_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/iaca.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/iaca.h
new file mode 100644
index 0000000000..e5732dae5c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/iaca.h
@@ -0,0 +1,65 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_IACA_H_
+#define LIB_JXL_BASE_IACA_H_
+
+#include "lib/jxl/base/compiler_specific.h"
+
+// IACA (Intel's Code Analyzer) analyzes instruction latencies, but only for
+// code between special markers. These functions embed such markers in an
+// executable, but only for reading via IACA - they deliberately trigger a
+// crash if executed to ensure they are removed in normal builds.
+
+#ifndef JXL_IACA_ENABLED
+#define JXL_IACA_ENABLED 0
+#endif
+
+namespace jxl {
+
+// Call before the region of interest.
+static JXL_INLINE void BeginIACA() {
+#if JXL_IACA_ENABLED && (JXL_COMPILER_GCC || JXL_COMPILER_CLANG)
+  asm volatile(
+      // UD2 "instruction" raises an invalid opcode exception.
+      ".byte 0x0F, 0x0B\n\t"
+      // Magic sequence recognized by IACA (MOV + addr32 fs:NOP). This actually
+      // clobbers EBX, but we don't care because the code won't be run, and we
+      // want IACA to observe the same code the compiler would have generated
+      // without this marker.
+      "movl $111, %%ebx\n\t"
+      ".byte 0x64, 0x67, 0x90\n\t"
+      :
+      :
+      // (Allegedly) clobbering memory may prevent reordering.
+      : "memory");
+#endif
+}
+
+// Call after the region of interest.
+static JXL_INLINE void EndIACA() {
+#if JXL_IACA_ENABLED && (JXL_COMPILER_GCC || JXL_COMPILER_CLANG)
+  asm volatile(
+      // See above.
+      "movl $222, %%ebx\n\t"
+      ".byte 0x64, 0x67, 0x90\n\t"
+      // UD2
+      ".byte 0x0F, 0x0B\n\t"
+      :
+      :
+      // (Allegedly) clobbering memory may prevent reordering.
+      : "memory");
+#endif
+}
+
+// Add to a scope to mark a region.
+struct ScopeIACA {
+  JXL_INLINE ScopeIACA() { BeginIACA(); }
+  JXL_INLINE ~ScopeIACA() { EndIACA(); }
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_IACA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/os_macros.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/os_macros.h
new file mode 100644
index 0000000000..b230f26758
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/os_macros.h
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_OS_MACROS_H_
+#define LIB_JXL_BASE_OS_MACROS_H_
+
+// Defines the JXL_OS_* macros.
+
+#if defined(_WIN32) || defined(_WIN64)
+#define JXL_OS_WIN 1
+#else
+#define JXL_OS_WIN 0
+#endif
+
+#ifdef __linux__
+#define JXL_OS_LINUX 1
+#else
+#define JXL_OS_LINUX 0
+#endif
+
+#ifdef __MACH__
+#define JXL_OS_MAC 1
+#else
+#define JXL_OS_MAC 0
+#endif
+
+#define JXL_OS_IOS 0
+#ifdef __APPLE__
+#include <TargetConditionals.h>
+#if TARGET_OS_IPHONE
+#undef JXL_OS_IOS
+#define JXL_OS_IOS 1
+#endif
+#endif
+
+#ifdef __FreeBSD__
+#define JXL_OS_FREEBSD 1
+#else
+#define JXL_OS_FREEBSD 0
+#endif
+
+#ifdef __HAIKU__
+#define JXL_OS_HAIKU 1
+#else
+#define JXL_OS_HAIKU 0
+#endif
+
+#endif  // LIB_JXL_BASE_OS_MACROS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/override.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/override.h
new file mode 100644
index 0000000000..1f8b657974
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/override.h
@@ -0,0 +1,29 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_OVERRIDE_H_
+#define LIB_JXL_BASE_OVERRIDE_H_
+
+// 'Trool' for command line arguments: force enable/disable, or use default.
+
+namespace jxl {
+
+// No effect if kDefault, otherwise forces a feature (typically a FrameHeader
+// flag) on or off.
+enum class Override : int { kOn = 1, kOff = 0, kDefault = -1 };
+
+static inline Override OverrideFromBool(bool flag) {
+  return flag ? Override::kOn : Override::kOff;
+}
+
+static inline bool ApplyOverride(Override o, bool default_condition) {
+  if (o == Override::kOn) return true;
+  if (o == Override::kOff) return false;
+  return default_condition;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_OVERRIDE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc
new file mode 100644
index 0000000000..11e4bff6fe
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.cc
@@ -0,0 +1,63 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/padded_bytes.h"
+
+namespace jxl {
+
+void PaddedBytes::IncreaseCapacityTo(size_t capacity) {
+  JXL_ASSERT(capacity > capacity_);
+
+  size_t new_capacity = std::max(capacity, 3 * capacity_ / 2);
+  new_capacity = std::max<size_t>(64, new_capacity);
+
+  // BitWriter writes up to 7 bytes past the end.
+  CacheAlignedUniquePtr new_data = AllocateArray(new_capacity + 8);
+  if (new_data == nullptr) {
+    // Allocation failed, discard all data to ensure this is noticed.
+    size_ = capacity_ = 0;
+    return;
+  }
+
+  if (data_ == nullptr) {
+    // First allocation: ensure first byte is initialized (won't be copied).
+    new_data[0] = 0;
+  } else {
+    // Subsequent resize: copy existing data to new location.
+    memcpy(new_data.get(), data_.get(), size_);
+    // Ensure that the first new byte is initialized, to allow write_bits to
+    // safely append to the newly-resized PaddedBytes.
+    new_data[size_] = 0;
+  }
+
+  capacity_ = new_capacity;
+  std::swap(new_data, data_);
+}
+
+void PaddedBytes::assign(const uint8_t* new_begin, const uint8_t* new_end) {
+  JXL_DASSERT(new_begin <= new_end);
+  const size_t new_size = static_cast<size_t>(new_end - new_begin);
+
+  // memcpy requires non-overlapping ranges, and resizing might invalidate the
+  // new range. Neither happens if the new range is completely to the left or
+  // right of the _allocated_ range (irrespective of size_).
+  const uint8_t* allocated_end = begin() + capacity_;
+  const bool outside = new_end <= begin() || new_begin >= allocated_end;
+  if (outside) {
+    resize(new_size);  // grow or shrink
+    memcpy(data(), new_begin, new_size);
+    return;
+  }
+
+  // There is overlap. The new size cannot be larger because we own the memory
+  // and the new range cannot include anything outside the allocated range.
+  JXL_ASSERT(new_size <= capacity_);
+
+  // memmove allows overlap and capacity_ is sufficient.
+  memmove(data(), new_begin, new_size);
+  size_ = new_size;  // shrink
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.h
new file mode 100644
index 0000000000..1840a6c936
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/padded_bytes.h
@@ -0,0 +1,195 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_PADDED_BYTES_H_
+#define LIB_JXL_BASE_PADDED_BYTES_H_
+
+// std::vector replacement with padding to reduce bounds checks in WriteBits
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>  // memcpy
+
+#include <algorithm>  // max
+#include <initializer_list>
+#include <utility>  // swap
+
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Provides a subset of the std::vector interface with some differences:
+// - allows BitWriter to write 64 bits at a time without bounds checking;
+// - ONLY zero-initializes the first byte (required by BitWriter);
+// - ensures cache-line alignment.
+class PaddedBytes {
+ public:
+  // Required for output params.
+  PaddedBytes() : size_(0), capacity_(0) {}
+
+  explicit PaddedBytes(size_t size) : size_(size), capacity_(0) {
+    if (size != 0) IncreaseCapacityTo(size);
+  }
+
+  PaddedBytes(size_t size, uint8_t value) : size_(size), capacity_(0) {
+    if (size != 0) {
+      IncreaseCapacityTo(size);
+    }
+    if (size_ != 0) {
+      memset(data(), value, size);
+    }
+  }
+
+  PaddedBytes(const PaddedBytes& other) : size_(other.size_), capacity_(0) {
+    if (size_ != 0) IncreaseCapacityTo(size_);
+    if (data() != nullptr) memcpy(data(), other.data(), size_);
+  }
+  PaddedBytes& operator=(const PaddedBytes& other) {
+    // Self-assignment is safe.
+    resize(other.size());
+    if (data() != nullptr) memmove(data(), other.data(), size_);
+    return *this;
+  }
+
+  // default is not OK - need to set other.size_ to 0!
+  PaddedBytes(PaddedBytes&& other) noexcept
+      : size_(other.size_),
+        capacity_(other.capacity_),
+        data_(std::move(other.data_)) {
+    other.size_ = other.capacity_ = 0;
+  }
+  PaddedBytes& operator=(PaddedBytes&& other) noexcept {
+    size_ = other.size_;
+    capacity_ = other.capacity_;
+    data_ = std::move(other.data_);
+
+    if (&other != this) {
+      other.size_ = other.capacity_ = 0;
+    }
+    return *this;
+  }
+
+  void swap(PaddedBytes& other) {
+    std::swap(size_, other.size_);
+    std::swap(capacity_, other.capacity_);
+    std::swap(data_, other.data_);
+  }
+
+  void reserve(size_t capacity) {
+    if (capacity > capacity_) IncreaseCapacityTo(capacity);
+  }
+
+  // NOTE: unlike vector, this does not initialize the new data!
+  // However, we guarantee that write_bits can safely append after
+  // the resize, as we zero-initialize the first new byte of data.
+  // If size < capacity(), does not invalidate the memory.
+  void resize(size_t size) {
+    if (size > capacity_) IncreaseCapacityTo(size);
+    size_ = (data() == nullptr) ? 0 : size;
+  }
+
+  // resize(size) plus explicit initialization of the new data with `value`.
+  void resize(size_t size, uint8_t value) {
+    size_t old_size = size_;
+    resize(size);
+    if (size_ > old_size) {
+      memset(data() + old_size, value, size_ - old_size);
+    }
+  }
+
+  // Amortized constant complexity due to exponential growth.
+  void push_back(uint8_t x) {
+    if (size_ == capacity_) {
+      IncreaseCapacityTo(capacity_ + 1);
+      if (data() == nullptr) return;
+    }
+
+    data_[size_++] = x;
+  }
+
+  size_t size() const { return size_; }
+  size_t capacity() const { return capacity_; }
+
+  uint8_t* data() { return data_.get(); }
+  const uint8_t* data() const { return data_.get(); }
+
+  // std::vector operations implemented in terms of the public interface above.
+
+  void clear() { resize(0); }
+  bool empty() const { return size() == 0; }
+
+  void assign(std::initializer_list<uint8_t> il) {
+    resize(il.size());
+    memcpy(data(), il.begin(), il.size());
+  }
+
+  // Replaces data() with [new_begin, new_end); potentially reallocates.
+  void assign(const uint8_t* new_begin, const uint8_t* new_end);
+
+  uint8_t* begin() { return data(); }
+  const uint8_t* begin() const { return data(); }
+  uint8_t* end() { return begin() + size(); }
+  const uint8_t* end() const { return begin() + size(); }
+
+  uint8_t& operator[](const size_t i) {
+    BoundsCheck(i);
+    return data()[i];
+  }
+  const uint8_t& operator[](const size_t i) const {
+    BoundsCheck(i);
+    return data()[i];
+  }
+
+  uint8_t& back() {
+    JXL_ASSERT(size() != 0);
+    return data()[size() - 1];
+  }
+  const uint8_t& back() const {
+    JXL_ASSERT(size() != 0);
+    return data()[size() - 1];
+  }
+
+  template <typename T>
+  void append(const T& other) {
+    append(reinterpret_cast<const uint8_t*>(other.data()),
+           reinterpret_cast<const uint8_t*>(other.data()) + other.size());
+  }
+
+  void append(const uint8_t* begin, const uint8_t* end) {
+    size_t old_size = size();
+    resize(size() + (end - begin));
+    memcpy(data() + old_size, begin, end - begin);
+  }
+
+ private:
+  void BoundsCheck(size_t i) const {
+    // <= is safe due to padding and required by BitWriter.
+    JXL_ASSERT(i <= size());
+  }
+
+  // Copies existing data to newly allocated "data_". If allocation fails,
+  // data() == nullptr and size_ = capacity_ = 0.
+  // The new capacity will be at least 1.5 times the old capacity. This ensures
+  // that we avoid quadratic behaviour.
+  void IncreaseCapacityTo(size_t capacity);
+
+  size_t size_;
+  size_t capacity_;
+  CacheAlignedUniquePtr data_;
+};
+
+template <typename T>
+static inline void Append(const T& s, PaddedBytes* out,
+                          size_t* JXL_RESTRICT byte_pos) {
+  memcpy(out->data() + *byte_pos, s.data(), s.size());
+  *byte_pos += s.size();
+  JXL_CHECK(*byte_pos <= out->size());
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_PADDED_BYTES_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/profiler.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/profiler.h
new file mode 100644
index 0000000000..13f95d2b7a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/profiler.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_PROFILER_H_
+#define LIB_JXL_BASE_PROFILER_H_
+
+// High precision, low overhead time measurements. Returns exact call counts and
+// total elapsed time for user-defined 'zones' (code regions, i.e. C++ scopes).
+//
+// To use the profiler you must set the JPEGXL_ENABLE_PROFILER CMake flag, which
+// defines PROFILER_ENABLED and links against the libjxl_profiler library.
+
+// If zero, this file has no effect and no measurements will be recorded.
+#ifndef PROFILER_ENABLED
+#define PROFILER_ENABLED 0
+#endif  // PROFILER_ENABLED
+
+#if PROFILER_ENABLED
+
+#include "lib/profiler/profiler.h"
+
+#else  // !PROFILER_ENABLED
+
+#define PROFILER_ZONE(name)
+#define PROFILER_FUNC
+#define PROFILER_PRINT_RESULTS()
+
+#endif  // PROFILER_ENABLED
+
+#endif  // LIB_JXL_BASE_PROFILER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/robust_statistics.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/robust_statistics.h
new file mode 100644
index 0000000000..4e6445b7f9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/robust_statistics.h
@@ -0,0 +1,357 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_ROBUST_STATISTICS_H_
+#define LIB_JXL_BASE_ROBUST_STATISTICS_H_
+
+// Robust statistics: Mode, Median, MedianAbsoluteDeviation.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <hwy/base.h>
+#include <limits>
+#include <type_traits>
+#include <utility>
+#include <vector>
+namespace jxl {
+
+template <typename T>
+T Geomean(const T* items, size_t count) {
+  double product = 1.0;
+  for (size_t i = 0; i < count; ++i) {
+    product *= items[i];
+  }
+  return static_cast<T>(std::pow(product, 1.0 / count));
+}
+
+// Round up for integers
+template <class T, typename std::enable_if<
+                       std::numeric_limits<T>::is_integer>::type* = nullptr>
+inline T Half(T x) {
+  return (x + 1) / 2;
+}
+
+// Mul is faster than div.
+template <class T, typename std::enable_if<
+                       !std::numeric_limits<T>::is_integer>::type* = nullptr>
+inline T Half(T x) {
+  return x * T(0.5);
+}
+
+// Returns the median value. Side effect: values <= median will appear before,
+// values >= median after the middle index.
+// Guarantees average speed O(num_values).
+template <typename T>
+T Median(T* samples, const size_t num_samples) {
+  HWY_ASSERT(num_samples != 0);
+  std::nth_element(samples, samples + num_samples / 2, samples + num_samples);
+  T result = samples[num_samples / 2];
+  // If even size, find largest element in the partially sorted vector to
+  // use as second element to average with
+  if ((num_samples & 1) == 0) {
+    T biggest = *std::max_element(samples, samples + num_samples / 2);
+    result = Half(result + biggest);
+  }
+  return result;
+}
+
+template <typename T>
+T Median(std::vector<T>* samples) {
+  return Median(samples->data(), samples->size());
+}
+
+template <typename T>
+static inline T Median3(const T a, const T b, const T c) {
+  return std::max(std::min(a, b), std::min(c, std::max(a, b)));
+}
+
+template <typename T>
+static inline T Median5(const T a, const T b, const T c, const T d, const T e) {
+  return Median3(e, std::max(std::min(a, b), std::min(c, d)),
+                 std::min(std::max(a, b), std::max(c, d)));
+}
+
+// Returns a robust measure of variability.
+template <typename T>
+T MedianAbsoluteDeviation(const T* samples, const size_t num_samples,
+                          const T median) {
+  HWY_ASSERT(num_samples != 0);
+  std::vector<T> abs_deviations;
+  abs_deviations.reserve(num_samples);
+  for (size_t i = 0; i < num_samples; ++i) {
+    abs_deviations.push_back(std::abs(samples[i] - median));
+  }
+  return Median(&abs_deviations);
+}
+
+template <typename T>
+T MedianAbsoluteDeviation(const std::vector<T>& samples, const T median) {
+  return MedianAbsoluteDeviation(samples.data(), samples.size(), median);
+}
+
+// Half{Range/Sample}Mode are implementations of "Robust estimators of the mode
+// and skewness of continuous data". The mode is less affected by outliers in
+// highly-skewed distributions than the median.
+
+// Robust estimator of the mode for data given as sorted values.
+// O(N*logN), N=num_values.
+class HalfSampleMode {
+ public:
+  // Returns mode. "sorted" must be in ascending order.
+  template <typename T>
+  T operator()(const T* const HWY_RESTRICT sorted,
+               const size_t num_values) const {
+    int64_t center = num_values / 2;
+    int64_t width = num_values;
+
+    // Zoom in on modal intervals of decreasing width. Stop before we reach
+    // width=1, i.e. single values, for which there is no "slope".
+    while (width > 2) {
+      // Round up so we can still reach the outer edges of odd widths.
+      width = Half(width);
+
+      center = CenterOfIntervalWithMinSlope(sorted, num_values, center, width);
+    }
+
+    return sorted[center];  // mode := middle value in modal interval.
+  }
+
+ private:
+  // Returns center of the densest region [c-radius, c+radius].
+  template <typename T>
+  static HWY_INLINE int64_t CenterOfIntervalWithMinSlope(
+      const T* HWY_RESTRICT sorted, const int64_t total_values,
+      const int64_t center, const int64_t width) {
+    const int64_t radius = Half(width);
+
+    auto compute_slope = [radius, total_values, sorted](
+                             int64_t c, int64_t* actual_center = nullptr) {
+      // For symmetry, check 2*radius+1 values, i.e. [min, max].
+      const int64_t min = std::max(c - radius, int64_t(0));
+      const int64_t max = std::min(c + radius, total_values - 1);
+      HWY_ASSERT(min < max);
+      HWY_ASSERT(sorted[min] <=
+                 sorted[max] + std::numeric_limits<float>::epsilon());
+      const float dx = max - min + 1;
+      const float slope = (sorted[max] - sorted[min]) / dx;
+
+      if (actual_center != nullptr) {
+        // c may be out of bounds, so return center of the clamped bounds.
+        *actual_center = Half(min + max);
+      }
+      return slope;
+    };
+
+    // First find min_slope for all centers.
+    float min_slope = std::numeric_limits<float>::max();
+    for (int64_t c = center - radius; c <= center + radius; ++c) {
+      min_slope = std::min(min_slope, compute_slope(c));
+    }
+
+    // Candidates := centers with slope ~= min_slope.
+    std::vector<int64_t> candidates;
+    for (int64_t c = center - radius; c <= center + radius; ++c) {
+      int64_t actual_center;
+      const float slope = compute_slope(c, &actual_center);
+      if (slope <= min_slope * 1.001f) {
+        candidates.push_back(actual_center);
+      }
+    }
+
+    // Keep the median.
+    HWY_ASSERT(!candidates.empty());
+    if (candidates.size() == 1) return candidates[0];
+    return Median(&candidates);
+  }
+};
+
+// Robust estimator of the mode for data given as a CDF.
+// O(N*logN), N=num_bins.
+class HalfRangeMode {
+ public:
+  // Returns mode expressed as a histogram bin index. "cdf" must be weakly
+  // monotonically increasing, e.g. from std::partial_sum.
+  int operator()(const uint32_t* HWY_RESTRICT cdf,
+                 const size_t num_bins) const {
+    int center = num_bins / 2;
+    int width = num_bins;
+
+    // Zoom in on modal intervals of decreasing width. Stop before we reach
+    // width=1, i.e. original bins, because those are noisy.
+    while (width > 2) {
+      // Round up so we can still reach the outer edges of odd widths.
+      width = Half(width);
+
+      center = CenterOfIntervalWithMaxDensity(cdf, num_bins, center, width);
+    }
+
+    return center;  // mode := midpoint of modal interval.
+  }
+
+ private:
+  // Returns center of the densest interval [c-radius, c+radius].
+  static HWY_INLINE int CenterOfIntervalWithMaxDensity(
+      const uint32_t* HWY_RESTRICT cdf, const int total_bins, const int center,
+      const int width) {
+    const int radius = Half(width);
+
+    auto compute_density = [radius, total_bins, cdf](
+                               int c, int* actual_center = nullptr) {
+      // For symmetry, check 2*radius+1 bins, i.e. [min, max].
+      const int min = std::max(c - radius, 1);  // for -1 below
+      const int max = std::min(c + radius, total_bins - 1);
+      HWY_ASSERT(min < max);
+      HWY_ASSERT(cdf[min] <= cdf[max - 1]);
+      const int num_bins = max - min + 1;
+      // Sum over [min, max] == CDF(max) - CDF(min-1).
+      const float density = float(cdf[max] - cdf[min - 1]) / num_bins;
+
+      if (actual_center != nullptr) {
+        // c may be out of bounds, so take center of the clamped bounds.
+        *actual_center = Half(min + max);
+      }
+      return density;
+    };
+
+    // First find max_density for all centers.
+    float max_density = 0.0f;
+    for (int c = center - radius; c <= center + radius; ++c) {
+      max_density = std::max(max_density, compute_density(c));
+    }
+
+    // Candidates := centers with density ~= max_density.
+    std::vector<int> candidates;
+    for (int c = center - radius; c <= center + radius; ++c) {
+      int actual_center;
+      const float density = compute_density(c, &actual_center);
+      if (density >= max_density * 0.999f) {
+        candidates.push_back(actual_center);
+      }
+    }
+
+    // Keep the median.
+    HWY_ASSERT(!candidates.empty());
+    if (candidates.size() == 1) return candidates[0];
+    return Median(&candidates);
+  }
+};
+
+// Sorts integral values in ascending order. About 3x faster than std::sort for
+// input distributions with very few unique values.
+template <class T>
+void CountingSort(T* begin, T* end) {
+  // Unique values and their frequency (similar to flat_map).
+  using Unique = std::pair<T, int>;
+  std::vector<Unique> unique;
+  for (const T* p = begin; p != end; ++p) {
+    const T value = *p;
+    const auto pos =
+        std::find_if(unique.begin(), unique.end(),
+                     [value](const Unique& u) { return u.first == value; });
+    if (pos == unique.end()) {
+      unique.push_back(std::make_pair(*p, 1));
+    } else {
+      ++pos->second;
+    }
+  }
+
+  // Sort in ascending order of value (pair.first).
+  std::sort(unique.begin(), unique.end());
+
+  // Write that many copies of each unique value to the array.
+  T* HWY_RESTRICT p = begin;
+  for (const auto& value_count : unique) {
+    std::fill(p, p + value_count.second, value_count.first);
+    p += value_count.second;
+  }
+  HWY_ASSERT(p == end);
+}
+
+struct Bivariate {
+  Bivariate(float x, float y) : x(x), y(y) {}
+  float x;
+  float y;
+};
+
+class Line {
+ public:
+  constexpr Line(const float slope, const float intercept)
+      : slope_(slope), intercept_(intercept) {}
+
+  constexpr float slope() const { return slope_; }
+  constexpr float intercept() const { return intercept_; }
+
+  // Robust line fit using Siegel's repeated-median algorithm.
+  explicit Line(const std::vector<Bivariate>& points) {
+    const size_t N = points.size();
+    // This straightforward N^2 implementation is OK for small N.
+    HWY_ASSERT(N < 10 * 1000);
+
+    // One for every point i.
+    std::vector<float> medians;
+    medians.reserve(N);
+
+    // One for every j != i. Never cleared to avoid reallocation.
+    std::vector<float> slopes(N - 1);
+
+    for (size_t i = 0; i < N; ++i) {
+      // Index within slopes[] (avoids the hole where j == i).
+      size_t idx_slope = 0;
+
+      for (size_t j = 0; j < N; ++j) {
+        if (j == i) continue;
+
+        const float dy = points[j].y - points[i].y;
+        const float dx = points[j].x - points[i].x;
+        HWY_ASSERT(std::abs(dx) > 1E-7f);  // x must be distinct
+        slopes[idx_slope++] = dy / dx;
+      }
+      HWY_ASSERT(idx_slope == N - 1);
+
+      const float median = Median(&slopes);
+      medians.push_back(median);
+    }
+
+    slope_ = Median(&medians);
+
+    // Solve for intercept, overwriting medians[].
+    for (size_t i = 0; i < N; ++i) {
+      medians[i] = points[i].y - slope_ * points[i].x;
+    }
+    intercept_ = Median(&medians);
+  }
+
+  constexpr float operator()(float x) const { return x * slope_ + intercept_; }
+
+ private:
+  float slope_;
+  float intercept_;
+};
+
+static inline void EvaluateQuality(const Line& line,
+                                   const std::vector<Bivariate>& points,
+                                   float* HWY_RESTRICT max_l1,
+                                   float* HWY_RESTRICT median_abs_deviation) {
+  // For computing median_abs_deviation.
+  std::vector<float> abs_deviations;
+  abs_deviations.reserve(points.size());
+
+  *max_l1 = 0.0f;
+  for (const Bivariate& point : points) {
+    const float l1 = std::abs(line(point.x) - point.y);
+    *max_l1 = std::max(*max_l1, l1);
+    abs_deviations.push_back(l1);
+  }
+
+  *median_abs_deviation = Median(&abs_deviations);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_ROBUST_STATISTICS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/span.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/span.h
new file mode 100644
index 0000000000..f9e59b3710
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/span.h
@@ -0,0 +1,58 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_SPAN_H_
+#define LIB_JXL_BASE_SPAN_H_
+
+// Span (array view) is a non-owning container that provides cheap "cut"
+// operations and could be used as "ArrayLike" data source for PaddedBytes.
+
+#include <stddef.h>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+template <typename T>
+class Span {
+ public:
+  constexpr Span() noexcept : Span(nullptr, 0) {}
+
+  constexpr Span(T* array, size_t length) noexcept
+      : ptr_(array), len_(length) {}
+
+  template <size_t N>
+  explicit constexpr Span(T (&a)[N]) noexcept : Span(a, N) {}
+
+  template <typename ArrayLike>
+  explicit constexpr Span(const ArrayLike& other) noexcept
+      : Span(reinterpret_cast<T*>(other.data()), other.size()) {
+    static_assert(sizeof(*other.data()) == sizeof(T),
+                  "Incompatible type of source.");
+  }
+
+  constexpr T* data() const noexcept { return ptr_; }
+
+  constexpr size_t size() const noexcept { return len_; }
+
+  constexpr T& operator[](size_t i) const noexcept {
+    // MSVC 2015 accepts this as constexpr, but not ptr_[i]
+    return *(data() + i);
+  }
+
+  void remove_prefix(size_t n) noexcept {
+    JXL_ASSERT(size() >= n);
+    ptr_ += n;
+    len_ -= n;
+  }
+
+ private:
+  T* ptr_;
+  size_t len_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_SPAN_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc
new file mode 100644
index 0000000000..9a94345912
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.cc
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/status.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string>
+
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+#include "sanitizer/common_interface_defs.h"  // __sanitizer_print_stack_trace
+#endif                                        // defined(*_SANITIZER)
+
+namespace jxl {
+
+bool Debug(const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  vfprintf(stderr, format, args);
+  va_end(args);
+  return false;
+}
+
+bool Abort() {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+  // If compiled with any sanitizer print a stack trace. This call doesn't crash
+  // the program, instead the trap below will crash it also allowing gdb to
+  // break there.
+  __sanitizer_print_stack_trace();
+#endif  // defined(*_SANITIZER)
+
+#if JXL_COMPILER_MSVC
+  __debugbreak();
+  abort();
+#else
+  __builtin_trap();
+#endif
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.h
new file mode 100644
index 0000000000..e57e6b0632
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/status.h
@@ -0,0 +1,299 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_STATUS_H_
+#define LIB_JXL_BASE_STATUS_H_
+
+// Error handling: Status return type + helper macros.
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// Uncomment to abort when JXL_FAILURE or JXL_STATUS with a fatal error is
+// reached:
+// #define JXL_CRASH_ON_ERROR
+
+#ifndef JXL_ENABLE_ASSERT
+#define JXL_ENABLE_ASSERT 1
+#endif
+
+#ifndef JXL_ENABLE_CHECK
+#define JXL_ENABLE_CHECK 1
+#endif
+
+// Pass -DJXL_DEBUG_ON_ERROR at compile time to print debug messages when a
+// function returns JXL_FAILURE or calls JXL_NOTIFY_ERROR. Note that this is
+// irrelevant if you also pass -DJXL_CRASH_ON_ERROR.
+#if defined(JXL_DEBUG_ON_ERROR) || defined(JXL_CRASH_ON_ERROR)
+#undef JXL_DEBUG_ON_ERROR
+#define JXL_DEBUG_ON_ERROR 1
+#else  // JXL_DEBUG_ON_ERROR || JXL_CRASH_ON_ERROR
+#ifdef NDEBUG
+#define JXL_DEBUG_ON_ERROR 0
+#else  // NDEBUG
+#define JXL_DEBUG_ON_ERROR 1
+#endif  // NDEBUG
+#endif  // JXL_DEBUG_ON_ERROR || JXL_CRASH_ON_ERROR
+
+// Pass -DJXL_DEBUG_ON_ALL_ERROR at compile time to print debug messages on
+// all error (fatal and non-fatal) status. This implies JXL_DEBUG_ON_ERROR.
+#if defined(JXL_DEBUG_ON_ALL_ERROR)
+#undef JXL_DEBUG_ON_ALL_ERROR
+#define JXL_DEBUG_ON_ALL_ERROR 1
+// JXL_DEBUG_ON_ALL_ERROR implies JXL_DEBUG_ON_ERROR too.
+#undef JXL_DEBUG_ON_ERROR
+#define JXL_DEBUG_ON_ERROR 1
+#else  // JXL_DEBUG_ON_ALL_ERROR
+#define JXL_DEBUG_ON_ALL_ERROR 0
+#endif  // JXL_DEBUG_ON_ALL_ERROR
+
+// The Verbose level for the library
+#ifndef JXL_DEBUG_V_LEVEL
+#define JXL_DEBUG_V_LEVEL 0
+#endif  // JXL_DEBUG_V_LEVEL
+
+// Pass -DJXL_DEBUG_ON_ABORT=0 to disable the debug messages on JXL_ASSERT,
+// JXL_CHECK and JXL_ABORT.
+#ifndef JXL_DEBUG_ON_ABORT
+#define JXL_DEBUG_ON_ABORT 1
+#endif  // JXL_DEBUG_ON_ABORT
+
+// Print a debug message on standard error. You should use the JXL_DEBUG macro
+// instead of calling Debug directly. This function returns false, so it can be
+// used as a return value in JXL_FAILURE.
+JXL_FORMAT(1, 2)
+bool Debug(const char* format, ...);
+
+// Print a debug message on standard error if "enabled" is true. "enabled" is
+// normally a macro that evaluates to 0 or 1 at compile time, so the Debug
+// function is never called and optimized out in release builds. Note that the
+// arguments are compiled but not evaluated when enabled is false. The format
+// string must be a explicit string in the call, for example:
+//   JXL_DEBUG(JXL_DEBUG_MYMODULE, "my module message: %d", some_var);
+// Add a header at the top of your module's .cc or .h file (depending on whether
+// you have JXL_DEBUG calls from the .h as well) like this:
+//   #ifndef JXL_DEBUG_MYMODULE
+//   #define JXL_DEBUG_MYMODULE 0
+//   #endif JXL_DEBUG_MYMODULE
+#define JXL_DEBUG(enabled, format, ...)                         \
+  do {                                                          \
+    if (enabled) {                                              \
+      ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, \
+                   ##__VA_ARGS__);                              \
+    }                                                           \
+  } while (0)
+
+// JXL_DEBUG version that prints the debug message if the global verbose level
+// defined at compile time by JXL_DEBUG_V_LEVEL is greater or equal than the
+// passed level.
+#define JXL_DEBUG_V(level, format, ...) \
+  JXL_DEBUG(level <= JXL_DEBUG_V_LEVEL, format, ##__VA_ARGS__)
+
+// Warnings (via JXL_WARNING) are enabled by default in debug builds (opt and
+// debug).
+#ifdef JXL_DEBUG_WARNING
+#undef JXL_DEBUG_WARNING
+#define JXL_DEBUG_WARNING 1
+#else  // JXL_DEBUG_WARNING
+#ifdef NDEBUG
+#define JXL_DEBUG_WARNING 0
+#else  // JXL_DEBUG_WARNING
+#define JXL_DEBUG_WARNING 1
+#endif  // NDEBUG
+#endif  // JXL_DEBUG_WARNING
+#define JXL_WARNING(format, ...) \
+  JXL_DEBUG(JXL_DEBUG_WARNING, format, ##__VA_ARGS__)
+
+// Exits the program after printing a stack trace when possible.
+JXL_NORETURN bool Abort();
+
+// Exits the program after printing file/line plus a formatted string.
+#define JXL_ABORT(format, ...)                                              \
+  ((JXL_DEBUG_ON_ABORT) && ::jxl::Debug(("%s:%d: JXL_ABORT: " format "\n"), \
+                                        __FILE__, __LINE__, ##__VA_ARGS__), \
+   ::jxl::Abort())
+
+// Does not guarantee running the code, use only for debug mode checks.
+#if JXL_ENABLE_ASSERT
+#define JXL_ASSERT(condition)                                      \
+  do {                                                             \
+    if (!(condition)) {                                            \
+      JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_ASSERT: %s", #condition); \
+      ::jxl::Abort();                                              \
+    }                                                              \
+  } while (0)
+#else
+#define JXL_ASSERT(condition) \
+  do {                        \
+  } while (0)
+#endif
+
+// Define JXL_IS_DEBUG_BUILD that denotes asan, msan and other debug builds,
+// but not opt or release.
+#ifndef JXL_IS_DEBUG_BUILD
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) ||         \
+    defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER) || \
+    defined(__clang_analyzer__)
+#define JXL_IS_DEBUG_BUILD 1
+#else
+#define JXL_IS_DEBUG_BUILD 0
+#endif
+#endif  //  JXL_IS_DEBUG_BUILD
+
+// Same as above, but only runs in debug builds (builds where NDEBUG is not
+// defined). This is useful for slower asserts that we want to run more rarely
+// than usual. These will run on asan, msan and other debug builds, but not in
+// opt or release.
+#if JXL_IS_DEBUG_BUILD
+#define JXL_DASSERT(condition)                                      \
+  do {                                                              \
+    if (!(condition)) {                                             \
+      JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_DASSERT: %s", #condition); \
+      ::jxl::Abort();                                               \
+    }                                                               \
+  } while (0)
+#else
+#define JXL_DASSERT(condition) \
+  do {                         \
+  } while (0)
+#endif
+
+// Always runs the condition, so can be used for non-debug calls.
+#if JXL_ENABLE_CHECK
+#define JXL_CHECK(condition)                                      \
+  do {                                                            \
+    if (!(condition)) {                                           \
+      JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_CHECK: %s", #condition); \
+      ::jxl::Abort();                                             \
+    }                                                             \
+  } while (0)
+#else
+#define JXL_CHECK(condition) \
+  do {                       \
+    (void)(condition);       \
+  } while (0)
+#endif
+
+// A jxl::Status value from a StatusCode or Status which prints a debug message
+// when enabled.
+#define JXL_STATUS(status, format, ...)                                        \
+  ::jxl::StatusMessage(::jxl::Status(status), "%s:%d: " format "\n", __FILE__, \
+                       __LINE__, ##__VA_ARGS__)
+
+// Notify of an error but discard the resulting Status value. This is only
+// useful for debug builds or when building with JXL_CRASH_ON_ERROR.
+#define JXL_NOTIFY_ERROR(format, ...)                                      \
+  (void)JXL_STATUS(::jxl::StatusCode::kGenericError, "JXL_ERROR: " format, \
+                   ##__VA_ARGS__)
+
+// An error Status with a message. The JXL_STATUS() macro will return a Status
+// object with a kGenericError code, but the comma operator helps with
+// clang-tidy inference and potentially with optimizations.
+#define JXL_FAILURE(format, ...)                                              \
+  ((void)JXL_STATUS(::jxl::StatusCode::kGenericError, "JXL_FAILURE: " format, \
+                    ##__VA_ARGS__),                                           \
+   ::jxl::Status(::jxl::StatusCode::kGenericError))
+
+// Always evaluates the status exactly once, so can be used for non-debug calls.
+// Returns from the current context if the passed Status expression is an error
+// (fatal or non-fatal). The return value is the passed Status.
+#define JXL_RETURN_IF_ERROR(status)                                       \
+  do {                                                                    \
+    ::jxl::Status jxl_return_if_error_status = (status);                  \
+    if (!jxl_return_if_error_status) {                                    \
+      (void)::jxl::StatusMessage(                                         \
+          jxl_return_if_error_status,                                     \
+          "%s:%d: JXL_RETURN_IF_ERROR code=%d: %s\n", __FILE__, __LINE__, \
+          static_cast<int>(jxl_return_if_error_status.code()), #status);  \
+      return jxl_return_if_error_status;                                  \
+    }                                                                     \
+  } while (0)
+
+// As above, but without calling StatusMessage. Intended for bundles (see
+// fields.h), which have numerous call sites (-> relevant for code size) and do
+// not want to generate excessive messages when decoding partial headers.
+#define JXL_QUIET_RETURN_IF_ERROR(status)                \
+  do {                                                   \
+    ::jxl::Status jxl_return_if_error_status = (status); \
+    if (!jxl_return_if_error_status) {                   \
+      return jxl_return_if_error_status;                 \
+    }                                                    \
+  } while (0)
+
+enum class StatusCode : int32_t {
+  // Non-fatal errors (negative values).
+  kNotEnoughBytes = -1,
+
+  // The only non-error status code.
+  kOk = 0,
+
+  // Fatal-errors (positive values)
+  kGenericError = 1,
+};
+
+// Drop-in replacement for bool that raises compiler warnings if not used
+// after being returned from a function. Example:
+// Status LoadFile(...) { return true; } is more compact than
+// bool JXL_MUST_USE_RESULT LoadFile(...) { return true; }
+// In case of error, the status can carry an extra error code in its value which
+// is split between fatal and non-fatal error codes.
+class JXL_MUST_USE_RESULT Status {
+ public:
+  // We want implicit constructor from bool to allow returning "true" or "false"
+  // on a function when using Status. "true" means kOk while "false" means a
+  // generic fatal error.
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  constexpr Status(bool ok)
+      : code_(ok ? StatusCode::kOk : StatusCode::kGenericError) {}
+
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  constexpr Status(StatusCode code) : code_(code) {}
+
+  // We also want implicit cast to bool to check for return values of functions.
+  // NOLINTNEXTLINE(google-explicit-constructor)
+  constexpr operator bool() const { return code_ == StatusCode::kOk; }
+
+  constexpr StatusCode code() const { return code_; }
+
+  // Returns whether the status code is a fatal error.
+  constexpr bool IsFatalError() const {
+    return static_cast<int32_t>(code_) > 0;
+  }
+
+ private:
+  StatusCode code_;
+};
+
+// Helper function to create a Status and print the debug message or abort when
+// needed.
+inline JXL_FORMAT(2, 3) Status
+    StatusMessage(const Status status, const char* format, ...) {
+  // This block will be optimized out when JXL_DEBUG_ON_ERROR and
+  // JXL_DEBUG_ON_ALL_ERROR are both disabled.
+  if ((JXL_DEBUG_ON_ERROR && status.IsFatalError()) ||
+      (JXL_DEBUG_ON_ALL_ERROR && !status)) {
+    va_list args;
+    va_start(args, format);
+    vfprintf(stderr, format, args);
+    va_end(args);
+  }
+#ifdef JXL_CRASH_ON_ERROR
+  // JXL_CRASH_ON_ERROR means to Abort() only on non-fatal errors.
+  if (status.IsFatalError()) {
+    Abort();
+  }
+#endif  // JXL_CRASH_ON_ERROR
+  return status;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_STATUS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/thread_pool_internal.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/thread_pool_internal.h
new file mode 100644
index 0000000000..6e23a335a7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/base/thread_pool_internal.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_THREAD_POOL_INTERNAL_H_
+#define LIB_JXL_BASE_THREAD_POOL_INTERNAL_H_
+
+#include <stddef.h>
+
+#include <cmath>
+
+#include "jxl/parallel_runner.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/threads/thread_parallel_runner_internal.h"
+
+namespace jxl {
+
+// Helper class to pass an internal ThreadPool-like object using threads. This
+// is only suitable for tests or tools that access the internal API of JPEG XL.
+// In other cases the caller will provide a JxlParallelRunner() for handling
+// this. This class uses jpegxl::ThreadParallelRunner (from jpegxl_threads
+// library). For interface details check jpegxl::ThreadParallelRunner.
+class ThreadPoolInternal : public ThreadPool {
+ public:
+  // Starts the given number of worker threads and blocks until they are ready.
+  // "num_worker_threads" defaults to one per hyperthread. If zero, all tasks
+  // run on the main thread.
+  explicit ThreadPoolInternal(
+      int num_worker_threads = std::thread::hardware_concurrency())
+      : ThreadPool(&jpegxl::ThreadParallelRunner::Runner,
+                   static_cast<void*>(&runner_)),
+        runner_(num_worker_threads) {}
+
+  ThreadPoolInternal(const ThreadPoolInternal&) = delete;
+  ThreadPoolInternal& operator&(const ThreadPoolInternal&) = delete;
+
+  size_t NumThreads() const { return runner_.NumThreads(); }
+  size_t NumWorkerThreads() const { return runner_.NumWorkerThreads(); }
+
+  template <class Func>
+  void RunOnEachThread(const Func& func) {
+    runner_.RunOnEachThread(func);
+  }
+
+ private:
+  jpegxl::ThreadParallelRunner runner_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BASE_THREAD_POOL_INTERNAL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/bit_reader_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/bit_reader_test.cc
new file mode 100644
index 0000000000..c962853190
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/bit_reader_test.cc
@@ -0,0 +1,260 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <random>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+namespace {
+
+TEST(BitReaderTest, ExtendsWithZeroes) {
+  for (size_t size = 4; size < 32; ++size) {
+    std::vector<uint8_t> data(size, 0xff);
+
+    for (size_t n_bytes = 0; n_bytes < size; n_bytes++) {
+      BitReader br(Span<const uint8_t>(data.data(), n_bytes));
+      // Read all the bits
+      for (size_t i = 0; i < n_bytes * kBitsPerByte; i++) {
+        ASSERT_EQ(br.ReadBits(1), 1) << "n_bytes=" << n_bytes << " i=" << i;
+      }
+
+      // PEEK more than the declared size - all will be zero. Cannot consume.
+      for (size_t i = 0; i < BitReader::kMaxBitsPerCall; i++) {
+        ASSERT_EQ(br.PeekBits(i), 0)
+            << "size=" << size << "n_bytes=" << n_bytes << " i=" << i;
+      }
+
+      EXPECT_TRUE(br.Close());
+    }
+  }
+}
+
+struct Symbol {
+  uint32_t num_bits;
+  uint32_t value;
+};
+
+// Reading from output gives the same values.
+TEST(BitReaderTest, TestRoundTrip) {
+  ThreadPoolInternal pool(8);
+  pool.Run(0, 1000, ThreadPool::SkipInit(),
+           [](const int task, const int /* thread */) {
+             constexpr size_t kMaxBits = 8000;
+             BitWriter writer;
+             BitWriter::Allotment allotment(&writer, kMaxBits);
+
+             std::vector<Symbol> symbols;
+             symbols.reserve(1000);
+
+             std::mt19937 rng(55537 + 129 * task);
+             std::uniform_int_distribution<> dist(1, 32);  // closed interval
+
+             for (;;) {
+               const uint32_t num_bits = dist(rng);
+               if (writer.BitsWritten() + num_bits > kMaxBits) break;
+               const uint32_t value = rng() >> (32 - num_bits);
+               symbols.push_back({num_bits, value});
+               writer.Write(num_bits, value);
+             }
+
+             writer.ZeroPadToByte();
+             ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+             BitReader reader(writer.GetSpan());
+             for (const Symbol& s : symbols) {
+               EXPECT_EQ(s.value, reader.ReadBits(s.num_bits));
+             }
+             EXPECT_TRUE(reader.Close());
+           });
+}
+
+// SkipBits is the same as reading that many bits.
+TEST(BitReaderTest, TestSkip) {
+  ThreadPoolInternal pool(8);
+  pool.Run(
+      0, 96, ThreadPool::SkipInit(),
+      [](const int task, const int /* thread */) {
+        constexpr size_t kSize = 100;
+
+        for (size_t skip = 0; skip < 128; ++skip) {
+          BitWriter writer;
+          BitWriter::Allotment allotment(&writer, kSize * kBitsPerByte);
+          // Start with "task" 1-bits.
+          for (int i = 0; i < task; ++i) {
+            writer.Write(1, 1);
+          }
+
+          // Write 0-bits that we will skip over
+          for (size_t i = 0; i < skip; ++i) {
+            writer.Write(1, 0);
+          }
+
+          // Write terminator bits '101'
+          writer.Write(3, 5);
+          EXPECT_EQ(task + skip + 3, writer.BitsWritten());
+          writer.ZeroPadToByte();
+          AuxOut aux_out;
+          ReclaimAndCharge(&writer, &allotment, 0, &aux_out);
+          EXPECT_LT(aux_out.layers[0].total_bits, kSize * 8);
+
+          BitReader reader1(writer.GetSpan());
+          BitReader reader2(writer.GetSpan());
+          // Verify initial 1-bits
+          for (int i = 0; i < task; ++i) {
+            EXPECT_EQ(1, reader1.ReadBits(1));
+            EXPECT_EQ(1, reader2.ReadBits(1));
+          }
+
+          // SkipBits or manually read "skip" bits
+          reader1.SkipBits(skip);
+          for (size_t i = 0; i < skip; ++i) {
+            EXPECT_EQ(0, reader2.ReadBits(1)) << " skip=" << skip << " i=" << i;
+          }
+          EXPECT_EQ(reader1.TotalBitsConsumed(), reader2.TotalBitsConsumed());
+
+          // Ensure both readers see the terminator bits.
+          EXPECT_EQ(5, reader1.ReadBits(3));
+          EXPECT_EQ(5, reader2.ReadBits(3));
+
+          EXPECT_TRUE(reader1.Close());
+          EXPECT_TRUE(reader2.Close());
+        }
+      });
+}
+
+// Verifies byte order and different groupings of bits.
+TEST(BitReaderTest, TestOrder) {
+  constexpr size_t kMaxBits = 16;
+
+  // u(1) - bits written into LSBs of first byte
+  {
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, kMaxBits);
+    for (size_t i = 0; i < 5; ++i) {
+      writer.Write(1, 1);
+    }
+    for (size_t i = 0; i < 5; ++i) {
+      writer.Write(1, 0);
+    }
+    for (size_t i = 0; i < 6; ++i) {
+      writer.Write(1, 1);
+    }
+
+    writer.ZeroPadToByte();
+    ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+    BitReader reader(writer.GetSpan());
+    EXPECT_EQ(0x1F, reader.ReadFixedBits<8>());
+    EXPECT_EQ(0xFC, reader.ReadFixedBits<8>());
+    EXPECT_TRUE(reader.Close());
+  }
+
+  // u(8) - get bytes in the same order
+  {
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, kMaxBits);
+    writer.Write(8, 0xF8);
+    writer.Write(8, 0x3F);
+
+    writer.ZeroPadToByte();
+    ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+    BitReader reader(writer.GetSpan());
+    EXPECT_EQ(0xF8, reader.ReadFixedBits<8>());
+    EXPECT_EQ(0x3F, reader.ReadFixedBits<8>());
+    EXPECT_TRUE(reader.Close());
+  }
+
+  // u(16) - little-endian bytes
+  {
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, kMaxBits);
+    writer.Write(16, 0xF83F);
+
+    writer.ZeroPadToByte();
+    ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+    BitReader reader(writer.GetSpan());
+    EXPECT_EQ(0x3F, reader.ReadFixedBits<8>());
+    EXPECT_EQ(0xF8, reader.ReadFixedBits<8>());
+    EXPECT_TRUE(reader.Close());
+  }
+
+  // Non-byte-aligned, mixed sizes
+  {
+    BitWriter writer;
+    BitWriter::Allotment allotment(&writer, kMaxBits);
+    writer.Write(1, 1);
+    writer.Write(3, 6);
+    writer.Write(8, 0xDB);
+    writer.Write(4, 8);
+
+    writer.ZeroPadToByte();
+    ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+    BitReader reader(writer.GetSpan());
+    EXPECT_EQ(0xBD, reader.ReadFixedBits<8>());
+    EXPECT_EQ(0x8D, reader.ReadFixedBits<8>());
+    EXPECT_TRUE(reader.Close());
+  }
+}
+
+TEST(BitReaderTest, TotalCountersTest) {
+  uint8_t buf[8] = {1, 2, 3, 4};
+  BitReader reader(Span<const uint8_t>(buf, sizeof(buf)));
+
+  EXPECT_EQ(sizeof(buf), reader.TotalBytes());
+  EXPECT_EQ(0, reader.TotalBitsConsumed());
+  reader.ReadFixedBits<1>();
+  EXPECT_EQ(1, reader.TotalBitsConsumed());
+
+  reader.ReadFixedBits<10>();
+  EXPECT_EQ(11, reader.TotalBitsConsumed());
+
+  reader.ReadFixedBits<4>();
+  EXPECT_EQ(15, reader.TotalBitsConsumed());
+
+  reader.ReadFixedBits<1>();
+  EXPECT_EQ(16, reader.TotalBitsConsumed());
+
+  reader.ReadFixedBits<16>();
+  EXPECT_EQ(32, reader.TotalBitsConsumed());
+
+  EXPECT_TRUE(reader.Close());
+}
+
+TEST(BitReaderTest, MoveTest) {
+  uint8_t buf[8] = {1, 2, 3, 4};
+  BitReader reader2;
+  {
+    BitReader reader1(Span<const uint8_t>(buf, sizeof(buf)));
+
+    EXPECT_EQ(0, reader1.TotalBitsConsumed());
+    reader1.ReadFixedBits<16>();
+    EXPECT_EQ(16, reader1.TotalBitsConsumed());
+
+    reader2 = std::move(reader1);
+    // From this point reader1 is invalid, but can continue to access reader2
+    // and we don't need to call Close() on reader1.
+  }
+
+  EXPECT_EQ(16, reader2.TotalBitsConsumed());
+  EXPECT_EQ(3U, reader2.ReadFixedBits<8>());
+  EXPECT_EQ(24, reader2.TotalBitsConsumed());
+
+  EXPECT_TRUE(reader2.Close());
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/bits_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/bits_test.cc
new file mode 100644
index 0000000000..9c109cb772
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/bits_test.cc
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/bits.h"
+
+#include "gtest/gtest.h"
+
+namespace jxl {
+namespace {
+
+TEST(BitsTest, TestNumZeroBits) {
+  // Zero input is well-defined.
+  EXPECT_EQ(32, Num0BitsAboveMS1Bit(0u));
+  EXPECT_EQ(64, Num0BitsAboveMS1Bit(0ull));
+  EXPECT_EQ(32, Num0BitsBelowLS1Bit(0u));
+  EXPECT_EQ(64, Num0BitsBelowLS1Bit(0ull));
+
+  EXPECT_EQ(31, Num0BitsAboveMS1Bit(1u));
+  EXPECT_EQ(30, Num0BitsAboveMS1Bit(2u));
+  EXPECT_EQ(63, Num0BitsAboveMS1Bit(1ull));
+  EXPECT_EQ(62, Num0BitsAboveMS1Bit(2ull));
+
+  EXPECT_EQ(0, Num0BitsBelowLS1Bit(1u));
+  EXPECT_EQ(0, Num0BitsBelowLS1Bit(1ull));
+  EXPECT_EQ(1, Num0BitsBelowLS1Bit(2u));
+  EXPECT_EQ(1, Num0BitsBelowLS1Bit(2ull));
+
+  EXPECT_EQ(0, Num0BitsAboveMS1Bit(0x80000000u));
+  EXPECT_EQ(0, Num0BitsAboveMS1Bit(0x8000000000000000ull));
+  EXPECT_EQ(31, Num0BitsBelowLS1Bit(0x80000000u));
+  EXPECT_EQ(63, Num0BitsBelowLS1Bit(0x8000000000000000ull));
+}
+
+TEST(BitsTest, TestFloorLog2) {
+  // for input = [1, 7]
+  const int expected[7] = {0, 1, 1, 2, 2, 2, 2};
+  for (uint32_t i = 1; i <= 7; ++i) {
+    EXPECT_EQ(expected[i - 1], FloorLog2Nonzero(i)) << " " << i;
+    EXPECT_EQ(expected[i - 1], FloorLog2Nonzero(uint64_t(i))) << " " << i;
+  }
+
+  EXPECT_EQ(31, FloorLog2Nonzero(0x80000000u));
+  EXPECT_EQ(31, FloorLog2Nonzero(0x80000001u));
+  EXPECT_EQ(31, FloorLog2Nonzero(0xFFFFFFFFu));
+
+  EXPECT_EQ(31, FloorLog2Nonzero(0x80000000ull));
+  EXPECT_EQ(31, FloorLog2Nonzero(0x80000001ull));
+  EXPECT_EQ(31, FloorLog2Nonzero(0xFFFFFFFFull));
+
+  EXPECT_EQ(63, FloorLog2Nonzero(0x8000000000000000ull));
+  EXPECT_EQ(63, FloorLog2Nonzero(0x8000000000000001ull));
+  EXPECT_EQ(63, FloorLog2Nonzero(0xFFFFFFFFFFFFFFFFull));
+}
+
+TEST(BitsTest, TestCeilLog2) {
+  // for input = [1, 7]
+  const int expected[7] = {0, 1, 2, 2, 3, 3, 3};
+  for (uint32_t i = 1; i <= 7; ++i) {
+    EXPECT_EQ(expected[i - 1], CeilLog2Nonzero(i)) << " " << i;
+    EXPECT_EQ(expected[i - 1], CeilLog2Nonzero(uint64_t(i))) << " " << i;
+  }
+
+  EXPECT_EQ(31, CeilLog2Nonzero(0x80000000u));
+  EXPECT_EQ(32, CeilLog2Nonzero(0x80000001u));
+  EXPECT_EQ(32, CeilLog2Nonzero(0xFFFFFFFFu));
+
+  EXPECT_EQ(31, CeilLog2Nonzero(0x80000000ull));
+  EXPECT_EQ(32, CeilLog2Nonzero(0x80000001ull));
+  EXPECT_EQ(32, CeilLog2Nonzero(0xFFFFFFFFull));
+
+  EXPECT_EQ(63, CeilLog2Nonzero(0x8000000000000000ull));
+  EXPECT_EQ(64, CeilLog2Nonzero(0x8000000000000001ull));
+  EXPECT_EQ(64, CeilLog2Nonzero(0xFFFFFFFFFFFFFFFFull));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc
new file mode 100644
index 0000000000..6cf2502bf9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/blending.cc
@@ -0,0 +1,383 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/blending.h"
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+bool ImageBlender::NeedsBlending(PassesDecoderState* dec_state) {
+  const PassesSharedState& state = *dec_state->shared;
+  if (!(state.frame_header.frame_type == FrameType::kRegularFrame ||
+        state.frame_header.frame_type == FrameType::kSkipProgressive)) {
+    return false;
+  }
+  const auto& info = state.frame_header.blending_info;
+  bool replace_all = (info.mode == BlendMode::kReplace);
+  for (const auto& ec_i : state.frame_header.extra_channel_blending_info) {
+    if (ec_i.mode != BlendMode::kReplace) {
+      replace_all = false;
+    }
+  }
+  // Replace the full frame: nothing to do.
+  if (!state.frame_header.custom_size_or_origin && replace_all) {
+    return false;
+  }
+  return true;
+}
+
+Status ImageBlender::PrepareBlending(
+    PassesDecoderState* dec_state, FrameOrigin foreground_origin,
+    size_t foreground_xsize, size_t foreground_ysize,
+    const std::vector<ExtraChannelInfo>* extra_channel_info,
+    const ColorEncoding& frame_color_encoding, const Rect& frame_rect,
+    Image3F* output, const Rect& output_rect,
+    std::vector<ImageF>* output_extra_channels,
+    std::vector<Rect> output_extra_channels_rects) {
+  const PassesSharedState& state = *dec_state->shared;
+  info_ = state.frame_header.blending_info;
+
+  ec_info_ = &state.frame_header.extra_channel_blending_info;
+
+  extra_channel_info_ = extra_channel_info;
+  output_ = output;
+  output_rect_ = output_rect;
+  output_extra_channels_ = output_extra_channels;
+  output_extra_channels_rects_ = std::move(output_extra_channels_rects);
+
+  size_t image_xsize = state.frame_header.nonserialized_metadata->xsize();
+  size_t image_ysize = state.frame_header.nonserialized_metadata->ysize();
+
+  // the rect in the canvas that needs to be updated
+  cropbox_ = frame_rect;
+  // the rect of this frame that overlaps with the canvas
+  overlap_ = cropbox_;
+  o_ = foreground_origin;
+  o_.x0 -= frame_rect.x0();
+  o_.y0 -= frame_rect.y0();
+  int x0 = (o_.x0 >= 0 ? o_.x0 : 0);
+  int y0 = (o_.y0 >= 0 ? o_.y0 : 0);
+  int xsize = foreground_xsize;
+  if (o_.x0 < 0) xsize += o_.x0;
+  int ysize = foreground_ysize;
+  if (o_.y0 < 0) ysize += o_.y0;
+  xsize = Clamp1(xsize, 0, (int)cropbox_.xsize() - x0);
+  ysize = Clamp1(ysize, 0, (int)cropbox_.ysize() - y0);
+  cropbox_ = Rect(x0, y0, xsize, ysize);
+  x0 = (o_.x0 < 0 ? -o_.x0 : 0);
+  y0 = (o_.y0 < 0 ? -o_.y0 : 0);
+  overlap_ = Rect(x0, y0, xsize, ysize);
+
+  // Image to write to.
+  ImageBundle& bg = *state.reference_frames[info_.source].frame;
+  if (bg.xsize() == 0 && bg.ysize() == 0) {
+    // there is no background, assume it to be all zeroes
+    ImageBundle empty(&state.metadata->m);
+    Image3F color(image_xsize, image_ysize);
+    ZeroFillImage(&color);
+    empty.SetFromImage(std::move(color), frame_color_encoding);
+    if (!output_extra_channels_->empty()) {
+      std::vector<ImageF> ec;
+      for (size_t i = 0; i < output_extra_channels_->size(); ++i) {
+        ImageF eci(image_xsize, image_ysize);
+        ZeroFillImage(&eci);
+        ec.push_back(std::move(eci));
+      }
+      empty.SetExtraChannels(std::move(ec));
+    }
+    bg = std::move(empty);
+  } else if (state.reference_frames[info_.source].ib_is_in_xyb) {
+    return JXL_FAILURE(
+        "Trying to blend XYB reference frame %i and non-XYB frame",
+        info_.source);
+  }
+
+  if (bg.xsize() < image_xsize || bg.ysize() < image_ysize ||
+      bg.origin.x0 != 0 || bg.origin.y0 != 0) {
+    return JXL_FAILURE("Trying to use a %zux%zu crop as a background",
+                       bg.xsize(), bg.ysize());
+  }
+  if (state.metadata->m.xyb_encoded) {
+    if (!dec_state->output_encoding_info.color_encoding_is_original) {
+      return JXL_FAILURE("Blending in unsupported color space");
+    }
+  }
+
+  if (!overlap_.IsInside(Rect(0, 0, foreground_xsize, foreground_ysize))) {
+    return JXL_FAILURE("Trying to use a %zux%zu crop as a foreground",
+                       foreground_xsize, foreground_ysize);
+  }
+
+  if (!cropbox_.IsInside(bg)) {
+    return JXL_FAILURE(
+        "Trying blend %zux%zu to (%zu,%zu), but background is %zux%zu",
+        cropbox_.xsize(), cropbox_.ysize(), cropbox_.x0(), cropbox_.y0(),
+        bg.xsize(), bg.ysize());
+  }
+
+  CopyImageTo(frame_rect, *bg.color(), output_rect, output);
+  for (size_t i = 0; i < ec_info_->size(); ++i) {
+    const auto& eci = (*ec_info_)[i];
+    const auto& src = *state.reference_frames[eci.source].frame;
+    if (src.xsize() == 0 && src.ysize() == 0) {
+      ZeroFillPlane(&(*output_extra_channels_)[i],
+                    output_extra_channels_rects_[i]);
+    } else {
+      if (src.extra_channels()[i].xsize() < image_xsize ||
+          src.extra_channels()[i].ysize() < image_ysize || src.origin.x0 != 0 ||
+          src.origin.y0 != 0) {
+        return JXL_FAILURE(
+            "Invalid size %zux%zu or origin %+d%+d for extra channel %zu of "
+            "reference frame %zu, expected at least %zux%zu+0+0",
+            src.extra_channels()[i].xsize(), src.extra_channels()[i].ysize(),
+            static_cast<int>(src.origin.x0), static_cast<int>(src.origin.y0), i,
+            static_cast<size_t>(eci.source), image_xsize, image_ysize);
+      }
+      CopyImageTo(frame_rect, src.extra_channels()[i],
+                  output_extra_channels_rects_[i],
+                  &(*output_extra_channels_)[i]);
+    }
+  }
+
+  return true;
+}
+
+ImageBlender::RectBlender ImageBlender::PrepareRect(
+    const Rect& rect, const Image3F& foreground,
+    const std::vector<ImageF>& extra_channels, const Rect& input_rect) const {
+  JXL_DASSERT(rect.xsize() == input_rect.xsize());
+  JXL_DASSERT(rect.ysize() == input_rect.ysize());
+  JXL_DASSERT(input_rect.IsInside(foreground));
+
+  RectBlender blender(false);
+  blender.extra_channel_info_ = extra_channel_info_;
+
+  blender.current_overlap_ = rect.Intersection(overlap_);
+  if (blender.current_overlap_.xsize() == 0 ||
+      blender.current_overlap_.ysize() == 0) {
+    blender.done_ = true;
+    return blender;
+  }
+
+  blender.current_cropbox_ =
+      Rect(o_.x0 + blender.current_overlap_.x0(),
+           o_.y0 + blender.current_overlap_.y0(),
+           blender.current_overlap_.xsize(), blender.current_overlap_.ysize());
+
+  // Turn current_overlap_ from being relative to the full foreground to being
+  // relative to the rect or input_rect.
+  blender.current_overlap_ =
+      Rect(blender.current_overlap_.x0() - rect.x0(),
+           blender.current_overlap_.y0() - rect.y0(),
+           blender.current_overlap_.xsize(), blender.current_overlap_.ysize());
+
+  // And this one is relative to the `foreground` subimage.
+  const Rect input_overlap(blender.current_overlap_.x0() + input_rect.x0(),
+                           blender.current_overlap_.y0() + input_rect.y0(),
+                           blender.current_overlap_.xsize(),
+                           blender.current_overlap_.ysize());
+
+  blender.blending_info_.resize(extra_channels.size() + 1);
+  auto make_blending = [&](const BlendingInfo& info, PatchBlending* pb) {
+    pb->alpha_channel = info.alpha_channel;
+    pb->clamp = info.clamp;
+    switch (info.mode) {
+      case BlendMode::kReplace: {
+        pb->mode = PatchBlendMode::kReplace;
+        break;
+      }
+      case BlendMode::kAdd: {
+        pb->mode = PatchBlendMode::kAdd;
+        break;
+      }
+      case BlendMode::kMul: {
+        pb->mode = PatchBlendMode::kMul;
+        break;
+      }
+      case BlendMode::kBlend: {
+        pb->mode = PatchBlendMode::kBlendAbove;
+        break;
+      }
+      case BlendMode::kAlphaWeightedAdd: {
+        pb->mode = PatchBlendMode::kAlphaWeightedAddAbove;
+        break;
+      }
+      default: {
+        JXL_ABORT("Invalid blend mode");  // should have failed to decode
+      }
+    }
+  };
+  make_blending(info_, &blender.blending_info_[0]);
+  for (size_t i = 0; i < extra_channels.size(); i++) {
+    make_blending((*ec_info_)[i], &blender.blending_info_[1 + i]);
+  }
+
+  Rect cropbox_row = blender.current_cropbox_.Line(0);
+  Rect overlap_row = input_overlap.Line(0);
+  const auto num_ptrs = 3 + extra_channels.size();
+  blender.fg_ptrs_.reserve(num_ptrs);
+  blender.fg_strides_.reserve(num_ptrs);
+  blender.bg_ptrs_.reserve(num_ptrs);
+  blender.bg_strides_.reserve(num_ptrs);
+  for (size_t c = 0; c < 3; c++) {
+    blender.fg_ptrs_.push_back(overlap_row.ConstPlaneRow(foreground, c, 0));
+    blender.fg_strides_.push_back(foreground.PixelsPerRow());
+    blender.bg_ptrs_.push_back(
+        cropbox_row.Translate(output_rect_.x0(), output_rect_.y0())
+            .PlaneRow(output_, c, 0));
+    blender.bg_strides_.push_back(output_->PixelsPerRow());
+  }
+  for (size_t c = 0; c < extra_channels.size(); c++) {
+    blender.fg_ptrs_.push_back(overlap_row.ConstRow(extra_channels[c], 0));
+    blender.fg_strides_.push_back(extra_channels[c].PixelsPerRow());
+    blender.bg_ptrs_.push_back(
+        cropbox_row
+            .Translate(output_extra_channels_rects_[c].x0(),
+                       output_extra_channels_rects_[c].y0())
+            .Row(&(*output_extra_channels_)[c], 0));
+    blender.bg_strides_.push_back((*output_extra_channels_)[c].PixelsPerRow());
+  }
+
+  return blender;
+}
+
+Status PerformBlending(
+    const float* const* bg, const float* const* fg, float* const* out,
+    size_t xsize, const PatchBlending& color_blending,
+    const PatchBlending* ec_blending,
+    const std::vector<ExtraChannelInfo>& extra_channel_info) {
+  bool has_alpha = false;
+  size_t num_ec = extra_channel_info.size();
+  for (size_t i = 0; i < num_ec; i++) {
+    if (extra_channel_info[i].type == jxl::ExtraChannel::kAlpha) {
+      has_alpha = true;
+      break;
+    }
+  }
+  ImageF tmp(xsize, 3 + num_ec);
+  // Blend extra channels first so that we use the pre-blending alpha.
+  for (size_t i = 0; i < num_ec; i++) {
+    if (ec_blending[i].mode == PatchBlendMode::kAdd) {
+      for (size_t x = 0; x < xsize; x++) {
+        tmp.Row(3 + i)[x] = bg[3 + i][x] + fg[3 + i][x];
+      }
+    } else if (ec_blending[i].mode == PatchBlendMode::kBlendAbove) {
+      size_t alpha = ec_blending[i].alpha_channel;
+      bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+      PerformAlphaBlending(bg[3 + i], bg[3 + alpha], fg[3 + i], fg[3 + alpha],
+                           tmp.Row(3 + i), xsize, is_premultiplied,
+                           ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kBlendBelow) {
+      size_t alpha = ec_blending[i].alpha_channel;
+      bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+      PerformAlphaBlending(fg[3 + i], fg[3 + alpha], bg[3 + i], bg[3 + alpha],
+                           tmp.Row(3 + i), xsize, is_premultiplied,
+                           ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kAlphaWeightedAddAbove) {
+      size_t alpha = ec_blending[i].alpha_channel;
+      PerformAlphaWeightedAdd(bg[3 + i], fg[3 + i], fg[3 + alpha],
+                              tmp.Row(3 + i), xsize, ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kAlphaWeightedAddBelow) {
+      size_t alpha = ec_blending[i].alpha_channel;
+      PerformAlphaWeightedAdd(fg[3 + i], bg[3 + i], bg[3 + alpha],
+                              tmp.Row(3 + i), xsize, ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kMul) {
+      PerformMulBlending(bg[3 + i], fg[3 + i], tmp.Row(3 + i), xsize,
+                         ec_blending[i].clamp);
+    } else if (ec_blending[i].mode == PatchBlendMode::kReplace) {
+      memcpy(tmp.Row(3 + i), fg[3 + i], xsize * sizeof(**fg));
+    } else if (ec_blending[i].mode == PatchBlendMode::kNone) {
+      memcpy(tmp.Row(3 + i), bg[3 + i], xsize * sizeof(**fg));
+    } else {
+      JXL_ABORT("Unreachable");
+    }
+  }
+  size_t alpha = color_blending.alpha_channel;
+
+  if (color_blending.mode == PatchBlendMode::kAdd ||
+      (color_blending.mode == PatchBlendMode::kAlphaWeightedAddAbove &&
+       !has_alpha) ||
+      (color_blending.mode == PatchBlendMode::kAlphaWeightedAddBelow &&
+       !has_alpha)) {
+    for (int p = 0; p < 3; p++) {
+      float* out = tmp.Row(p);
+      for (size_t x = 0; x < xsize; x++) {
+        out[x] = bg[p][x] + fg[p][x];
+      }
+    }
+  } else if (color_blending.mode == PatchBlendMode::kBlendAbove
+             // blend without alpha is just replace
+             && has_alpha) {
+    bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+    PerformAlphaBlending(
+        {bg[0], bg[1], bg[2], bg[3 + alpha]},
+        {fg[0], fg[1], fg[2], fg[3 + alpha]},
+        {tmp.Row(0), tmp.Row(1), tmp.Row(2), tmp.Row(3 + alpha)}, xsize,
+        is_premultiplied, color_blending.clamp);
+  } else if (color_blending.mode == PatchBlendMode::kBlendBelow
+             // blend without alpha is just replace
+             && has_alpha) {
+    bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+    PerformAlphaBlending(
+        {fg[0], fg[1], fg[2], fg[3 + alpha]},
+        {bg[0], bg[1], bg[2], bg[3 + alpha]},
+        {tmp.Row(0), tmp.Row(1), tmp.Row(2), tmp.Row(3 + alpha)}, xsize,
+        is_premultiplied, color_blending.clamp);
+  } else if (color_blending.mode == PatchBlendMode::kAlphaWeightedAddAbove) {
+    JXL_DASSERT(has_alpha);
+    for (size_t c = 0; c < 3; c++) {
+      PerformAlphaWeightedAdd(bg[c], fg[c], fg[3 + alpha], tmp.Row(c), xsize,
+                              color_blending.clamp);
+    }
+  } else if (color_blending.mode == PatchBlendMode::kAlphaWeightedAddBelow) {
+    JXL_DASSERT(has_alpha);
+    for (size_t c = 0; c < 3; c++) {
+      PerformAlphaWeightedAdd(fg[c], bg[c], bg[3 + alpha], tmp.Row(c), xsize,
+                              color_blending.clamp);
+    }
+  } else if (color_blending.mode == PatchBlendMode::kMul) {
+    for (int p = 0; p < 3; p++) {
+      PerformMulBlending(bg[p], fg[p], tmp.Row(p), xsize, color_blending.clamp);
+    }
+  } else if (color_blending.mode == PatchBlendMode::kReplace ||
+             color_blending.mode == PatchBlendMode::kBlendAbove ||
+             color_blending.mode == PatchBlendMode::kBlendBelow) {  // kReplace
+    for (size_t p = 0; p < 3; p++) {
+      memcpy(tmp.Row(p), fg[p], xsize * sizeof(**fg));
+    }
+  } else if (color_blending.mode == PatchBlendMode::kNone) {
+    for (size_t p = 0; p < 3; p++) {
+      memcpy(tmp.Row(p), bg[p], xsize * sizeof(**fg));
+    }
+  } else {
+    JXL_ABORT("Unreachable");
+  }
+  for (size_t i = 0; i < 3 + num_ec; i++) {
+    memcpy(out[i], tmp.Row(i), xsize * sizeof(**out));
+  }
+  return true;
+}
+
+Status ImageBlender::RectBlender::DoBlending(size_t y) {
+  if (done_ || y < current_overlap_.y0() ||
+      y >= current_overlap_.y0() + current_overlap_.ysize()) {
+    return true;
+  }
+  y -= current_overlap_.y0();
+  fg_row_ptrs_.resize(fg_ptrs_.size());
+  bg_row_ptrs_.resize(bg_ptrs_.size());
+  for (size_t c = 0; c < fg_row_ptrs_.size(); c++) {
+    fg_row_ptrs_[c] = fg_ptrs_[c] + y * fg_strides_[c];
+    bg_row_ptrs_[c] = bg_ptrs_[c] + y * bg_strides_[c];
+  }
+  return PerformBlending(bg_row_ptrs_.data(), fg_row_ptrs_.data(),
+                         bg_row_ptrs_.data(), current_overlap_.xsize(),
+                         blending_info_[0], blending_info_.data() + 1,
+                         *extra_channel_info_);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/blending.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/blending.h
new file mode 100644
index 0000000000..5e60b146bf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/blending.h
@@ -0,0 +1,91 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BLENDING_H_
+#define LIB_JXL_BLENDING_H_
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+Status PerformBlending(const float* const* bg, const float* const* fg,
+                       float* const* out, size_t xsize,
+                       const PatchBlending& color_blending,
+                       const PatchBlending* ec_blending,
+                       const std::vector<ExtraChannelInfo>& extra_channel_info);
+
+class ImageBlender {
+ public:
+  class RectBlender {
+   public:
+    // Does the blending for a given row of the rect passed to
+    // ImageBlender::PrepareRect.
+    Status DoBlending(size_t y);
+
+    // If this returns true, then nothing needs to be done for this rect and
+    // DoBlending can be skipped (but does not have to).
+    bool done() const { return done_; }
+
+   private:
+    friend class ImageBlender;
+    explicit RectBlender(bool done) : done_(done) {}
+
+    bool done_;
+    Rect current_overlap_;
+    Rect current_cropbox_;
+    const std::vector<ExtraChannelInfo>* extra_channel_info_;
+    std::vector<const float*> fg_ptrs_;
+    std::vector<size_t> fg_strides_;
+    std::vector<float*> bg_ptrs_;
+    std::vector<size_t> bg_strides_;
+    std::vector<const float*> fg_row_ptrs_;
+    std::vector<float*> bg_row_ptrs_;
+    std::vector<PatchBlending> blending_info_;
+  };
+
+  static bool NeedsBlending(PassesDecoderState* dec_state);
+
+  Status PrepareBlending(
+      PassesDecoderState* dec_state, FrameOrigin foreground_origin,
+      size_t foreground_xsize, size_t foreground_ysize,
+      const std::vector<ExtraChannelInfo>* extra_channel_info,
+      const ColorEncoding& frame_color_encoding, const Rect& frame_rect,
+      Image3F* output, const Rect& output_rect,
+      std::vector<ImageF>* output_extra_channels,
+      std::vector<Rect> output_extra_channels_rects);
+  // rect is relative to the full decoded foreground.
+  // But foreground here can be a subset of the full foreground, and input_rect
+  // indicates where that rect is in that subset. For example, if rect =
+  // Rect(10, 10, 20, 20), and foreground is subrect (7, 7, 30, 30) of the full
+  // foreground, then input_rect should be (3, 3, 20, 20), because that is where
+  // rect is relative to the foreground crop.
+  ImageBlender::RectBlender PrepareRect(
+      const Rect& rect, const Image3F& foreground,
+      const std::vector<ImageF>& extra_channels, const Rect& input_rect) const;
+
+  // If this returns true, then it is not necessary to call further methods on
+  // this ImageBlender to achieve blending, although it is not forbidden either
+  // (those methods will just return immediately in that case).
+  bool done() const { return done_; }
+
+ private:
+  BlendingInfo info_;
+  const std::vector<ExtraChannelInfo>* extra_channel_info_;
+  // Destination, as well as background before DoBlending is called.
+  Image3F* output_;
+  Rect output_rect_;
+  std::vector<ImageF>* output_extra_channels_;
+  std::vector<Rect> output_extra_channels_rects_;
+  Rect cropbox_;
+  Rect overlap_;
+  bool done_ = false;
+  const std::vector<BlendingInfo>* ec_info_;
+  FrameOrigin o_{};
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BLENDING_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/blending_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/blending_test.cc
new file mode 100644
index 0000000000..4ce66c2f17
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/blending_test.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/blending.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+namespace {
+
+using ::testing::SizeIs;
+
+TEST(BlendingTest, Crops) {
+  ThreadPool* pool = nullptr;
+
+  const PaddedBytes compressed =
+      ReadTestData("jxl/blending/cropped_traffic_light.jxl");
+  DecompressParams dparams;
+  CodecInOut decoded;
+  ASSERT_TRUE(DecodeFile(dparams, compressed, &decoded, pool));
+  ASSERT_THAT(decoded.frames, SizeIs(4));
+
+  int i = 0;
+  for (const ImageBundle& ib : decoded.frames) {
+    std::ostringstream filename;
+    filename << "jxl/blending/cropped_traffic_light_frame-" << i << ".png";
+    const PaddedBytes compressed_frame = ReadTestData(filename.str());
+    CodecInOut frame;
+    ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(compressed_frame), &frame));
+    EXPECT_TRUE(SamePixels(ib.color(), *frame.Main().color()));
+    ++i;
+  }
+}
+
+TEST(BlendingTest, Offset) {
+  const PaddedBytes background_bytes = ReadTestData("jxl/splines.png");
+  CodecInOut background;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(background_bytes), &background));
+  const PaddedBytes foreground_bytes =
+      ReadTestData("jxl/grayscale_patches.png");
+  CodecInOut foreground;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(foreground_bytes), &foreground));
+
+  ImageBlender blender;
+  CodecMetadata nonserialized_metadata;
+  ASSERT_TRUE(
+      nonserialized_metadata.size.Set(background.xsize(), background.ysize()));
+  PassesSharedState state;
+  state.frame_header.blending_info.mode = BlendMode::kReplace;
+  state.frame_header.blending_info.source = 0;
+  state.frame_header.nonserialized_metadata = &nonserialized_metadata;
+  state.metadata = &background.metadata;
+  state.reference_frames[0].frame = &background.Main();
+  PassesDecoderState dec_state;
+  dec_state.shared = &state;
+  const FrameOrigin foreground_origin = {-50, -50};
+  ImageBundle output(&background.metadata.m);
+  output.SetFromImage(Image3F(background.xsize(), background.ysize()),
+                      background.Main().c_current());
+  ASSERT_TRUE(blender.PrepareBlending(
+      &dec_state, foreground_origin, foreground.xsize(), foreground.ysize(),
+      &nonserialized_metadata.m.extra_channel_info,
+      background.Main().c_current(), Rect(background), output.color(),
+      Rect(*output.color()), {}, {}));
+
+  static constexpr int kStep = 20;
+  for (size_t x0 = 0; x0 < foreground.xsize(); x0 += kStep) {
+    for (size_t y0 = 0; y0 < foreground.ysize(); y0 += kStep) {
+      const Rect rect =
+          Rect(x0, y0, kStep, kStep).Intersection(Rect(foreground.Main()));
+      Image3F foreground_crop(rect.xsize(), rect.ysize());
+      CopyImageTo(rect, *foreground.Main().color(), Rect(foreground_crop),
+                  &foreground_crop);
+      auto rect_blender =
+          blender.PrepareRect(rect, foreground_crop, {}, Rect(foreground_crop));
+      for (size_t y = 0; y < rect.ysize(); ++y) {
+        ASSERT_TRUE(rect_blender.DoBlending(y));
+      }
+    }
+  }
+
+  const PaddedBytes expected_bytes =
+      ReadTestData("jxl/blending/grayscale_patches_on_splines.png");
+  CodecInOut expected;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(expected_bytes), &expected));
+  VerifyRelativeError(*expected.Main().color(), *output.color(), 1. / (2 * 255),
+                      0);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc
new file mode 100644
index 0000000000..fc1ef2875c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.cc
@@ -0,0 +1,2139 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+//
+// The physical architecture of butteraugli is based on the following naming
+// convention:
+//   * Opsin - dynamics of the photosensitive chemicals in the retina
+//             with their immediate electrical processing
+//   * Xyb - hybrid opponent/trichromatic color space
+//     x is roughly red-subtract-green.
+//     y is yellow.
+//     b is blue.
+//     Xyb values are computed from Opsin mixing, not directly from rgb.
+//   * Mask - for visual masking
+//   * Hf - color modeling for spatially high-frequency features
+//   * Lf - color modeling for spatially low-frequency features
+//   * Diffmap - to cluster and build an image of error between the images
+//   * Blur - to hold the smoothing code
+
+#include "lib/jxl/butteraugli/butteraugli.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <new>
+#include <vector>
+
+#if PROFILER_ENABLED
+#include <chrono>
+#endif  // PROFILER_ENABLED
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image_ops.h"
+
+#ifndef JXL_BUTTERAUGLI_ONCE
+#define JXL_BUTTERAUGLI_ONCE
+
+namespace jxl {
+
+std::vector<float> ComputeKernel(float sigma) {
+  const float m = 2.25;  // Accuracy increases when m is increased.
+  const double scaler = -1.0 / (2.0 * sigma * sigma);
+  const int diff = std::max<int>(1, m * std::fabs(sigma));
+  std::vector<float> kernel(2 * diff + 1);
+  for (int i = -diff; i <= diff; ++i) {
+    kernel[i + diff] = std::exp(scaler * i * i);
+  }
+  return kernel;
+}
+
+void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel,
+                          const size_t x, float* BUTTERAUGLI_RESTRICT row_out) {
+  const size_t offset = kernel.size() / 2;
+  int minx = x < offset ? 0 : x - offset;
+  int maxx = std::min<int>(in.xsize() - 1, x + offset);
+  float weight = 0.0f;
+  for (int j = minx; j <= maxx; ++j) {
+    weight += kernel[j - x + offset];
+  }
+  float scale = 1.0f / weight;
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y);
+    float sum = 0.0f;
+    for (int j = minx; j <= maxx; ++j) {
+      sum += row_in[j] * kernel[j - x + offset];
+    }
+    row_out[y] = sum * scale;
+  }
+}
+
+// Computes a horizontal convolution and transposes the result.
+void ConvolutionWithTranspose(const ImageF& in,
+                              const std::vector<float>& kernel,
+                              ImageF* BUTTERAUGLI_RESTRICT out) {
+  PROFILER_FUNC;
+  JXL_CHECK(out->xsize() == in.ysize());
+  JXL_CHECK(out->ysize() == in.xsize());
+  const size_t len = kernel.size();
+  const size_t offset = len / 2;
+  float weight_no_border = 0.0f;
+  for (size_t j = 0; j < len; ++j) {
+    weight_no_border += kernel[j];
+  }
+  const float scale_no_border = 1.0f / weight_no_border;
+  const size_t border1 = std::min(in.xsize(), offset);
+  const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0;
+  std::vector<float> scaled_kernel(len / 2 + 1);
+  for (size_t i = 0; i <= len / 2; ++i) {
+    scaled_kernel[i] = kernel[i] * scale_no_border;
+  }
+
+  // middle
+  switch (len) {
+#if 1  // speed-optimized version
+    case 7: {
+      PROFILER_ZONE("conv7");
+      const float sk0 = scaled_kernel[0];
+      const float sk1 = scaled_kernel[1];
+      const float sk2 = scaled_kernel[2];
+      const float sk3 = scaled_kernel[3];
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          const float sum0 = (row_in[0] + row_in[6]) * sk0;
+          const float sum1 = (row_in[1] + row_in[5]) * sk1;
+          const float sum2 = (row_in[2] + row_in[4]) * sk2;
+          const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2;
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum;
+        }
+      }
+    } break;
+    case 13: {
+      PROFILER_ZONE("conv15");
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0];
+          float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1];
+          float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2];
+          float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3];
+          sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4];
+          sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5];
+          const float sum = (row_in[6]) * scaled_kernel[6];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+        }
+      }
+      break;
+    }
+    case 15: {
+      PROFILER_ZONE("conv15");
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0];
+          float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1];
+          float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2];
+          float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3];
+          sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4];
+          sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5];
+          sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6];
+          const float sum = (row_in[7]) * scaled_kernel[7];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+        }
+      }
+      break;
+    }
+    case 25: {
+      PROFILER_ZONE("conv25");
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          float sum0 = (row_in[0] + row_in[24]) * scaled_kernel[0];
+          float sum1 = (row_in[1] + row_in[23]) * scaled_kernel[1];
+          float sum2 = (row_in[2] + row_in[22]) * scaled_kernel[2];
+          float sum3 = (row_in[3] + row_in[21]) * scaled_kernel[3];
+          sum0 += (row_in[4] + row_in[20]) * scaled_kernel[4];
+          sum1 += (row_in[5] + row_in[19]) * scaled_kernel[5];
+          sum2 += (row_in[6] + row_in[18]) * scaled_kernel[6];
+          sum3 += (row_in[7] + row_in[17]) * scaled_kernel[7];
+          sum0 += (row_in[8] + row_in[16]) * scaled_kernel[8];
+          sum1 += (row_in[9] + row_in[15]) * scaled_kernel[9];
+          sum2 += (row_in[10] + row_in[14]) * scaled_kernel[10];
+          sum3 += (row_in[11] + row_in[13]) * scaled_kernel[11];
+          const float sum = (row_in[12]) * scaled_kernel[12];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+        }
+      }
+      break;
+    }
+    case 33: {
+      PROFILER_ZONE("conv33");
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0];
+          float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1];
+          float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2];
+          float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3];
+          sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4];
+          sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5];
+          sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6];
+          sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7];
+          sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8];
+          sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9];
+          sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10];
+          sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11];
+          sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12];
+          sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13];
+          sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14];
+          sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15];
+          const float sum = (row_in[16]) * scaled_kernel[16];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+        }
+      }
+      break;
+    }
+    case 37: {
+      PROFILER_ZONE("conv37");
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (size_t x = border1; x < border2; ++x, ++row_in) {
+          float sum0 = (row_in[0] + row_in[36]) * scaled_kernel[0];
+          float sum1 = (row_in[1] + row_in[35]) * scaled_kernel[1];
+          float sum2 = (row_in[2] + row_in[34]) * scaled_kernel[2];
+          float sum3 = (row_in[3] + row_in[33]) * scaled_kernel[3];
+          sum0 += (row_in[4] + row_in[32]) * scaled_kernel[4];
+          sum0 += (row_in[5] + row_in[31]) * scaled_kernel[5];
+          sum0 += (row_in[6] + row_in[30]) * scaled_kernel[6];
+          sum0 += (row_in[7] + row_in[29]) * scaled_kernel[7];
+          sum0 += (row_in[8] + row_in[28]) * scaled_kernel[8];
+          sum1 += (row_in[9] + row_in[27]) * scaled_kernel[9];
+          sum2 += (row_in[10] + row_in[26]) * scaled_kernel[10];
+          sum3 += (row_in[11] + row_in[25]) * scaled_kernel[11];
+          sum0 += (row_in[12] + row_in[24]) * scaled_kernel[12];
+          sum1 += (row_in[13] + row_in[23]) * scaled_kernel[13];
+          sum2 += (row_in[14] + row_in[22]) * scaled_kernel[14];
+          sum3 += (row_in[15] + row_in[21]) * scaled_kernel[15];
+          sum0 += (row_in[16] + row_in[20]) * scaled_kernel[16];
+          sum1 += (row_in[17] + row_in[19]) * scaled_kernel[17];
+          const float sum = (row_in[18]) * scaled_kernel[18];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+        }
+      }
+      break;
+    }
+    default:
+      printf("Warning: Unexpected kernel size! %zu\n", len);
+#else
+    default:
+#endif
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y);
+        for (size_t x = border1; x < border2; ++x) {
+          const int d = x - offset;
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          float sum = 0.0f;
+          size_t j;
+          for (j = 0; j <= len / 2; ++j) {
+            sum += row_in[d + j] * scaled_kernel[j];
+          }
+          for (; j < len; ++j) {
+            sum += row_in[d + j] * scaled_kernel[len - 1 - j];
+          }
+          row_out[y] = sum;
+        }
+      }
+  }
+  // left border
+  for (size_t x = 0; x < border1; ++x) {
+    ConvolveBorderColumn(in, kernel, x, out->Row(x));
+  }
+
+  // right border
+  for (size_t x = border2; x < in.xsize(); ++x) {
+    ConvolveBorderColumn(in, kernel, x, out->Row(x));
+  }
+}
+
+// Separate horizontal and vertical (next function) convolution passes.
+void BlurHorizontalConv(const ImageF& in, const intptr_t xbegin,
+                        const intptr_t xend, const intptr_t ybegin,
+                        const intptr_t yend, const std::vector<float>& kernel,
+                        ImageF* out) {
+  if (xbegin >= xend || ybegin >= yend) return;
+  const intptr_t xsize = in.xsize();
+  const intptr_t ysize = in.ysize();
+  JXL_ASSERT(0 <= xbegin && xend <= xsize);
+  JXL_ASSERT(0 <= ybegin && yend <= ysize);
+  (void)xsize;
+  (void)ysize;
+  const intptr_t radius = kernel.size() / 2;
+
+  for (intptr_t y = ybegin; y < yend; ++y) {
+    float* JXL_RESTRICT row_out = out->Row(y);
+    for (intptr_t x = xbegin; x < xend; ++x) {
+      float sum = 0.0f;
+      float sum_weights = 0.0f;
+      const float* JXL_RESTRICT row_in = in.Row(y);
+      for (intptr_t ix = -radius; ix <= radius; ++ix) {
+        const intptr_t in_x = x + ix;
+        if (in_x < 0 || in_x >= xsize) continue;
+        const float weight_x = kernel[ix + radius];
+        sum += row_in[in_x] * weight_x;
+        sum_weights += weight_x;
+      }
+      row_out[x] = sum / sum_weights;
+    }
+  }
+}
+
+void BlurVerticalConv(const ImageF& in, const intptr_t xbegin,
+                      const intptr_t xend, const intptr_t ybegin,
+                      const intptr_t yend, const std::vector<float>& kernel,
+                      ImageF* out) {
+  if (xbegin >= xend || ybegin >= yend) return;
+  const intptr_t xsize = in.xsize();
+  const intptr_t ysize = in.ysize();
+  JXL_ASSERT(0 <= xbegin && xend <= xsize);
+  JXL_ASSERT(0 <= ybegin && yend <= ysize);
+  (void)xsize;
+  const intptr_t radius = kernel.size() / 2;
+  for (intptr_t y = ybegin; y < yend; ++y) {
+    float* JXL_RESTRICT row_out = out->Row(y);
+    for (intptr_t x = xbegin; x < xend; ++x) {
+      float sum = 0.0f;
+      float sum_weights = 0.0f;
+      for (intptr_t iy = -radius; iy <= radius; ++iy) {
+        const intptr_t in_y = y + iy;
+        if (in_y < 0 || in_y >= ysize) continue;
+        const float weight_y = kernel[iy + radius];
+        sum += in.ConstRow(in_y)[x] * weight_y;
+        sum_weights += weight_y;
+      }
+      row_out[x] = sum / sum_weights;
+    }
+  }
+}
+
+// A blur somewhat similar to a 2D Gaussian blur.
+// See: https://en.wikipedia.org/wiki/Gaussian_blur
+//
+// This is a bottleneck because the sigma can be quite large (>7). We can use
+// gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated
+// Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are
+// zero-valued. This leads to noticeable differences at the edges of diffmaps.
+// We retain a special case for 5x5 kernels (even faster than gauss_blur),
+// optionally use gauss_blur followed by fixup of the borders for large images,
+// or fall back to the previous truncated FIR followed by a transpose.
+void Blur(const ImageF& in, float sigma, const ButteraugliParams& params,
+          BlurTemp* temp, ImageF* out) {
+  std::vector<float> kernel = ComputeKernel(sigma);
+  // Separable5 does an in-place convolution, so this fast path is not safe if
+  // in aliases out.
+  if (kernel.size() == 5 && &in != out) {
+    float sum_weights = 0.0f;
+    for (const float w : kernel) {
+      sum_weights += w;
+    }
+    const float scale = 1.0f / sum_weights;
+    const float w0 = kernel[2] * scale;
+    const float w1 = kernel[1] * scale;
+    const float w2 = kernel[0] * scale;
+    const WeightsSeparable5 weights = {
+        {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+        {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+    };
+    Separable5(in, Rect(in), weights, /*pool=*/nullptr, out);
+    return;
+  }
+
+  const bool fast_gauss = params.approximate_border;
+  const bool kBorderFixup = fast_gauss && false;
+  // Fast+fixup is actually slower for small images that are all border.
+  const bool too_small_for_fast_gauss =
+      kBorderFixup &&
+      in.xsize() * in.ysize() < 9 * kernel.size() * kernel.size();
+  // If fast gaussian is disabled, use previous transposed convolution.
+  if (!fast_gauss || too_small_for_fast_gauss) {
+    ImageF* JXL_RESTRICT temp_t = temp->GetTransposed(in);
+    ConvolutionWithTranspose(in, kernel, temp_t);
+    ConvolutionWithTranspose(*temp_t, kernel, out);
+    return;
+  }
+  auto rg = CreateRecursiveGaussian(sigma);
+  ImageF* JXL_RESTRICT temp_ = temp->Get(in);
+  ThreadPool* null_pool = nullptr;
+  FastGaussian(rg, in, null_pool, temp_, out);
+
+  if (kBorderFixup) {
+    // Produce rg_radius extra pixels around each border
+    const intptr_t rg_radius = rg->radius;
+    const intptr_t radius = kernel.size() / 2;
+    const intptr_t xsize = in.xsize();
+    const intptr_t ysize = in.ysize();
+    const intptr_t yend_top = std::min(rg_radius + radius, ysize);
+    const intptr_t ybegin_bottom =
+        std::max(intptr_t(0), ysize - rg_radius - radius);
+    // Top (requires radius extra for the vertical pass)
+    BlurHorizontalConv(in, 0, xsize, 0, yend_top, kernel, temp_);
+    // Bottom
+    BlurHorizontalConv(in, 0, xsize, ybegin_bottom, ysize, kernel, temp_);
+    // Left/right columns between top and bottom
+    const intptr_t xbegin_right = std::max(intptr_t(0), xsize - rg_radius);
+    const intptr_t xend_left = std::min(rg_radius, xsize);
+    BlurHorizontalConv(in, 0, xend_left, yend_top, ybegin_bottom, kernel,
+                       temp_);
+    BlurHorizontalConv(in, xbegin_right, xsize, yend_top, ybegin_bottom, kernel,
+                       temp_);
+
+    // Entire left/right columns
+    BlurVerticalConv(*temp_, 0, xend_left, 0, ysize, kernel, out);
+    BlurVerticalConv(*temp_, xbegin_right, xsize, 0, ysize, kernel, out);
+    // Top/bottom between left/right
+    const intptr_t ybegin_bottom2 = std::max(intptr_t(0), ysize - rg_radius);
+    const intptr_t yend_top2 = std::min(rg_radius, ysize);
+    BlurVerticalConv(*temp_, xend_left, xbegin_right, 0, yend_top2, kernel,
+                     out);
+    BlurVerticalConv(*temp_, xend_left, xbegin_right, ybegin_bottom2, ysize,
+                     kernel, out);
+  }
+}
+
+// Allows PaddedMaltaUnit to call either function via overloading.
+struct MaltaTagLF {};
+struct MaltaTag {};
+
+}  // namespace jxl
+
+#endif  // JXL_BUTTERAUGLI_ONCE
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Vec;
+
+template <class D, class V>
+HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) {
+  static const double kMul = 0.724216145665;
+  const V mul = Set(d, kMul);
+  const V maxval = Set(d, kMaxVal);
+  // If greater than maxval or less than -maxval, replace with if_*.
+  const V if_pos = MulAdd(v - maxval, mul, maxval);
+  const V if_neg = MulSub(v + maxval, mul, maxval);
+  const V pos_or_v = IfThenElse(v >= maxval, if_pos, v);
+  return IfThenElse(v < Neg(maxval), if_neg, pos_or_v);
+}
+
+// Make area around zero less important (remove it).
+template <class D, class V>
+HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) {
+  const auto w = Set(d, kw);
+  return IfThenElse(x > w, x - w, IfThenElseZero(x < Neg(w), x + w));
+}
+
+// Make area around zero more important (2x it until the limit).
+template <class D, class V>
+HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) {
+  const auto w = Set(d, kw);
+  return IfThenElse(x > w, x + w, IfThenElse(x < Neg(w), x - w, x + x));
+}
+
+// XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space.
+// Vals space can be converted to L2-norm space (Euclidean and normalized)
+// through visual masking.
+template <class D, class V>
+HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y,
+                                 const V& b_arg, V* HWY_RESTRICT valx,
+                                 V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) {
+  static const double xmuli = 32.2217497012;
+  static const double ymuli = 13.7697791434;
+  static const double bmuli = 47.504615728;
+  static const double y_to_b_muli = -0.362267051518;
+  const V xmul = Set(d, xmuli);
+  const V ymul = Set(d, ymuli);
+  const V bmul = Set(d, bmuli);
+  const V y_to_b_mul = Set(d, y_to_b_muli);
+  const V b = MulAdd(y_to_b_mul, y, b_arg);
+  *valb = b * bmul;
+  *valx = x * xmul;
+  *valy = y * ymul;
+}
+
+void SuppressXByY(const ImageF& in_x, const ImageF& in_y, const double yw,
+                  ImageF* HWY_RESTRICT out) {
+  JXL_DASSERT(SameSize(in_x, in_y) && SameSize(in_x, *out));
+  const size_t xsize = in_x.xsize();
+  const size_t ysize = in_x.ysize();
+
+  const HWY_FULL(float) d;
+  static const double s = 0.653020556257;
+  const auto sv = Set(d, s);
+  const auto one_minus_s = Set(d, 1.0 - s);
+  const auto ywv = Set(d, yw);
+
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* HWY_RESTRICT row_x = in_x.ConstRow(y);
+    const float* HWY_RESTRICT row_y = in_y.ConstRow(y);
+    float* HWY_RESTRICT row_out = out->Row(y);
+
+    for (size_t x = 0; x < xsize; x += Lanes(d)) {
+      const auto vx = Load(d, row_x + x);
+      const auto vy = Load(d, row_y + x);
+      const auto scaler = MulAdd(ywv / MulAdd(vy, vy, ywv), one_minus_s, sv);
+      Store(scaler * vx, d, row_out + x);
+    }
+  }
+}
+
+static void SeparateFrequencies(size_t xsize, size_t ysize,
+                                const ButteraugliParams& params,
+                                BlurTemp* blur_temp, const Image3F& xyb,
+                                PsychoImage& ps) {
+  PROFILER_FUNC;
+  const HWY_FULL(float) d;
+
+  // Extract lf ...
+  static const double kSigmaLf = 7.15593339443;
+  static const double kSigmaHf = 3.22489901262;
+  static const double kSigmaUhf = 1.56416327805;
+  ps.mf = Image3F(xsize, ysize);
+  ps.hf[0] = ImageF(xsize, ysize);
+  ps.hf[1] = ImageF(xsize, ysize);
+  ps.lf = Image3F(xyb.xsize(), xyb.ysize());
+  ps.mf = Image3F(xyb.xsize(), xyb.ysize());
+  for (int i = 0; i < 3; ++i) {
+    Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &ps.lf.Plane(i));
+
+    // ... and keep everything else in mf.
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* BUTTERAUGLI_RESTRICT row_xyb = xyb.PlaneRow(i, y);
+      const float* BUTTERAUGLI_RESTRICT row_lf = ps.lf.ConstPlaneRow(i, y);
+      float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y);
+      for (size_t x = 0; x < xsize; x += Lanes(d)) {
+        const auto mf = Load(d, row_xyb + x) - Load(d, row_lf + x);
+        Store(mf, d, row_mf + x);
+      }
+    }
+    if (i == 2) {
+      Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i));
+      break;
+    }
+    // Divide mf into mf and hf.
+    for (size_t y = 0; y < ysize; ++y) {
+      float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y);
+      float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y);
+      for (size_t x = 0; x < xsize; x += Lanes(d)) {
+        Store(Load(d, row_mf + x), d, row_hf + x);
+      }
+    }
+    Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i));
+    static const double kRemoveMfRange = 0.29;
+    static const double kAddMfRange = 0.1;
+    if (i == 0) {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(0, y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y);
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          auto mf = Load(d, row_mf + x);
+          auto hf = Load(d, row_hf + x) - mf;
+          mf = RemoveRangeAroundZero(d, kRemoveMfRange, mf);
+          Store(mf, d, row_mf + x);
+          Store(hf, d, row_hf + x);
+        }
+      }
+    } else {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(1, y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y);
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          auto mf = Load(d, row_mf + x);
+          auto hf = Load(d, row_hf + x) - mf;
+
+          mf = AmplifyRangeAroundZero(d, kAddMfRange, mf);
+          Store(mf, d, row_mf + x);
+          Store(hf, d, row_hf + x);
+        }
+      }
+    }
+  }
+
+  // Temporarily used as output of SuppressXByY
+  ps.uhf[0] = ImageF(xsize, ysize);
+  ps.uhf[1] = ImageF(xsize, ysize);
+
+  // Suppress red-green by intensity change in the high freq channels.
+  static const double suppress = 46.0;
+  SuppressXByY(ps.hf[0], ps.hf[1], suppress, &ps.uhf[0]);
+  // hf is the SuppressXByY output, uhf will be written below.
+  ps.hf[0].Swap(ps.uhf[0]);
+
+  for (int i = 0; i < 2; ++i) {
+    // Divide hf into hf and uhf.
+    for (size_t y = 0; y < ysize; ++y) {
+      float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[i].Row(y);
+      float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_uhf[x] = row_hf[x];
+      }
+    }
+    Blur(ps.hf[i], kSigmaUhf, params, blur_temp, &ps.hf[i]);
+    static const double kRemoveHfRange = 1.5;
+    static const double kAddHfRange = 0.132;
+    static const double kRemoveUhfRange = 0.04;
+    static const double kMaxclampHf = 28.4691806922;
+    static const double kMaxclampUhf = 5.19175294647;
+    static double kMulYHf = 2.155;
+    static double kMulYUhf = 2.69313763794;
+    if (i == 0) {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[0].Row(y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y);
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          auto hf = Load(d, row_hf + x);
+          auto uhf = Load(d, row_uhf + x) - hf;
+          hf = RemoveRangeAroundZero(d, kRemoveHfRange, hf);
+          uhf = RemoveRangeAroundZero(d, kRemoveUhfRange, uhf);
+          Store(hf, d, row_hf + x);
+          Store(uhf, d, row_uhf + x);
+        }
+      }
+    } else {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[1].Row(y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y);
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          auto hf = Load(d, row_hf + x);
+          hf = MaximumClamp(d, hf, kMaxclampHf);
+
+          auto uhf = Load(d, row_uhf + x) - hf;
+          uhf = MaximumClamp(d, uhf, kMaxclampUhf);
+          uhf *= Set(d, kMulYUhf);
+          Store(uhf, d, row_uhf + x);
+
+          hf *= Set(d, kMulYHf);
+          hf = AmplifyRangeAroundZero(d, kAddHfRange, hf);
+          Store(hf, d, row_hf + x);
+        }
+      }
+    }
+  }
+  // Modify range around zero code only concerns the high frequency
+  // planes and only the X and Y channels.
+  // Convert low freq xyb to vals space so that we can do a simple squared sum
+  // diff on the low frequencies later.
+  for (size_t y = 0; y < ysize; ++y) {
+    float* BUTTERAUGLI_RESTRICT row_x = ps.lf.PlaneRow(0, y);
+    float* BUTTERAUGLI_RESTRICT row_y = ps.lf.PlaneRow(1, y);
+    float* BUTTERAUGLI_RESTRICT row_b = ps.lf.PlaneRow(2, y);
+    for (size_t x = 0; x < xsize; x += Lanes(d)) {
+      auto valx = Undefined(d);
+      auto valy = Undefined(d);
+      auto valb = Undefined(d);
+      XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x),
+                       Load(d, row_b + x), &valx, &valy, &valb);
+      Store(valx, d, row_x + x);
+      Store(valy, d, row_y + x);
+      Store(valb, d, row_b + x);
+    }
+  }
+}
+
+template <class D>
+Vec<D> MaltaUnit(MaltaTagLF /*tag*/, const D df,
+                 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) {
+  const intptr_t xs3 = 3 * xs;
+
+  const auto center = LoadU(df, d);
+
+  // x grows, y constant
+  const auto sum_yconst = LoadU(df, d - 4) + LoadU(df, d - 2) + center +
+                          LoadU(df, d + 2) + LoadU(df, d + 4);
+  // Will return this, sum of all line kernels
+  auto retval = sum_yconst * sum_yconst;
+  {
+    // y grows, x constant
+    auto sum = LoadU(df, d - xs3 - xs) + LoadU(df, d - xs - xs) + center +
+               LoadU(df, d + xs + xs) + LoadU(df, d + xs3 + xs);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // both grow
+    auto sum = LoadU(df, d - xs3 - 3) + LoadU(df, d - xs - xs - 2) + center +
+               LoadU(df, d + xs + xs + 2) + LoadU(df, d + xs3 + 3);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // y grows, x shrinks
+    auto sum = LoadU(df, d - xs3 + 3) + LoadU(df, d - xs - xs + 2) + center +
+               LoadU(df, d + xs + xs - 2) + LoadU(df, d + xs3 - 3);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // y grows -4 to 4, x shrinks 1 -> -1
+    auto sum = LoadU(df, d - xs3 - xs + 1) + LoadU(df, d - xs - xs + 1) +
+               center + LoadU(df, d + xs + xs - 1) +
+               LoadU(df, d + xs3 + xs - 1);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    //  y grows -4 to 4, x grows -1 -> 1
+    auto sum = LoadU(df, d - xs3 - xs - 1) + LoadU(df, d - xs - xs - 1) +
+               center + LoadU(df, d + xs + xs + 1) +
+               LoadU(df, d + xs3 + xs + 1);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // x grows -4 to 4, y grows -1 to 1
+    auto sum = LoadU(df, d - 4 - xs) + LoadU(df, d - 2 - xs) + center +
+               LoadU(df, d + 2 + xs) + LoadU(df, d + 4 + xs);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // x grows -4 to 4, y shrinks 1 to -1
+    auto sum = LoadU(df, d - 4 + xs) + LoadU(df, d - 2 + xs) + center +
+               LoadU(df, d + 2 - xs) + LoadU(df, d + 4 - xs);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1__*______
+       2___*_____
+       3_________
+       4____0____
+       5_________
+       6_____*___
+       7______*__
+       8_________ */
+    auto sum = LoadU(df, d - xs3 - 2) + LoadU(df, d - xs - xs - 1) + center +
+               LoadU(df, d + xs + xs + 1) + LoadU(df, d + xs3 + 2);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1______*__
+       2_____*___
+       3_________
+       4____0____
+       5_________
+       6___*_____
+       7__*______
+       8_________ */
+    auto sum = LoadU(df, d - xs3 + 2) + LoadU(df, d - xs - xs + 1) + center +
+               LoadU(df, d + xs + xs - 1) + LoadU(df, d + xs3 - 2);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_*_______
+       3__*______
+       4____0____
+       5______*__
+       6_______*_
+       7_________
+       8_________ */
+    auto sum = LoadU(df, d - xs - xs - 3) + LoadU(df, d - xs - 2) + center +
+               LoadU(df, d + xs + 2) + LoadU(df, d + xs + xs + 3);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_______*_
+       3______*__
+       4____0____
+       5__*______
+       6_*_______
+       7_________
+       8_________ */
+    auto sum = LoadU(df, d - xs - xs + 3) + LoadU(df, d - xs + 2) + center +
+               LoadU(df, d + xs - 2) + LoadU(df, d + xs + xs - 3);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2________*
+       3______*__
+       4____0____
+       5__*______
+       6*________
+       7_________
+       8_________ */
+
+    auto sum = LoadU(df, d + xs + xs - 4) + LoadU(df, d + xs - 2) + center +
+               LoadU(df, d - xs + 2) + LoadU(df, d - xs - xs + 4);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2*________
+       3__*______
+       4____0____
+       5______*__
+       6________*
+       7_________
+       8_________ */
+    auto sum = LoadU(df, d - xs - xs - 4) + LoadU(df, d - xs - 2) + center +
+               LoadU(df, d + xs + 2) + LoadU(df, d + xs + xs + 4);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0__*______
+       1_________
+       2___*_____
+       3_________
+       4____0____
+       5_________
+       6_____*___
+       7_________
+       8______*__ */
+    auto sum = LoadU(df, d - xs3 - xs - 2) + LoadU(df, d - xs - xs - 1) +
+               center + LoadU(df, d + xs + xs + 1) +
+               LoadU(df, d + xs3 + xs + 2);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0______*__
+       1_________
+       2_____*___
+       3_________
+       4____0____
+       5_________
+       6___*_____
+       7_________
+       8__*______ */
+    auto sum = LoadU(df, d - xs3 - xs + 2) + LoadU(df, d - xs - xs + 1) +
+               center + LoadU(df, d + xs + xs - 1) +
+               LoadU(df, d + xs3 + xs - 2);
+    retval = MulAdd(sum, sum, retval);
+  }
+  return retval;
+}
+
+template <class D>
+Vec<D> MaltaUnit(MaltaTag /*tag*/, const D df,
+                 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) {
+  const intptr_t xs3 = 3 * xs;
+
+  const auto center = LoadU(df, d);
+
+  // x grows, y constant
+  const auto sum_yconst = LoadU(df, d - 4) + LoadU(df, d - 3) +
+                          LoadU(df, d - 2) + LoadU(df, d - 1) + center +
+                          LoadU(df, d + 1) + LoadU(df, d + 2) +
+                          LoadU(df, d + 3) + LoadU(df, d + 4);
+  // Will return this, sum of all line kernels
+  auto retval = sum_yconst * sum_yconst;
+
+  {
+    // y grows, x constant
+    auto sum = LoadU(df, d - xs3 - xs) + LoadU(df, d - xs3) +
+               LoadU(df, d - xs - xs) + LoadU(df, d - xs) + center +
+               LoadU(df, d + xs) + LoadU(df, d + xs + xs) + LoadU(df, d + xs3) +
+               LoadU(df, d + xs3 + xs);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // both grow
+    auto sum = LoadU(df, d - xs3 - 3) + LoadU(df, d - xs - xs - 2) +
+               LoadU(df, d - xs - 1) + center + LoadU(df, d + xs + 1) +
+               LoadU(df, d + xs + xs + 2) + LoadU(df, d + xs3 + 3);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // y grows, x shrinks
+    auto sum = LoadU(df, d - xs3 + 3) + LoadU(df, d - xs - xs + 2) +
+               LoadU(df, d - xs + 1) + center + LoadU(df, d + xs - 1) +
+               LoadU(df, d + xs + xs - 2) + LoadU(df, d + xs3 - 3);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // y grows -4 to 4, x shrinks 1 -> -1
+    auto sum = LoadU(df, d - xs3 - xs + 1) + LoadU(df, d - xs3 + 1) +
+               LoadU(df, d - xs - xs + 1) + LoadU(df, d - xs) + center +
+               LoadU(df, d + xs) + LoadU(df, d + xs + xs - 1) +
+               LoadU(df, d + xs3 - 1) + LoadU(df, d + xs3 + xs - 1);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    //  y grows -4 to 4, x grows -1 -> 1
+    auto sum = LoadU(df, d - xs3 - xs - 1) + LoadU(df, d - xs3 - 1) +
+               LoadU(df, d - xs - xs - 1) + LoadU(df, d - xs) + center +
+               LoadU(df, d + xs) + LoadU(df, d + xs + xs + 1) +
+               LoadU(df, d + xs3 + 1) + LoadU(df, d + xs3 + xs + 1);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // x grows -4 to 4, y grows -1 to 1
+    auto sum = LoadU(df, d - 4 - xs) + LoadU(df, d - 3 - xs) +
+               LoadU(df, d - 2 - xs) + LoadU(df, d - 1) + center +
+               LoadU(df, d + 1) + LoadU(df, d + 2 + xs) +
+               LoadU(df, d + 3 + xs) + LoadU(df, d + 4 + xs);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    // x grows -4 to 4, y shrinks 1 to -1
+    auto sum = LoadU(df, d - 4 + xs) + LoadU(df, d - 3 + xs) +
+               LoadU(df, d - 2 + xs) + LoadU(df, d - 1) + center +
+               LoadU(df, d + 1) + LoadU(df, d + 2 - xs) +
+               LoadU(df, d + 3 - xs) + LoadU(df, d + 4 - xs);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1__*______
+       2___*_____
+       3___*_____
+       4____0____
+       5_____*___
+       6_____*___
+       7______*__
+       8_________ */
+    auto sum = LoadU(df, d - xs3 - 2) + LoadU(df, d - xs - xs - 1) +
+               LoadU(df, d - xs - 1) + center + LoadU(df, d + xs + 1) +
+               LoadU(df, d + xs + xs + 1) + LoadU(df, d + xs3 + 2);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1______*__
+       2_____*___
+       3_____*___
+       4____0____
+       5___*_____
+       6___*_____
+       7__*______
+       8_________ */
+    auto sum = LoadU(df, d - xs3 + 2) + LoadU(df, d - xs - xs + 1) +
+               LoadU(df, d - xs + 1) + center + LoadU(df, d + xs - 1) +
+               LoadU(df, d + xs + xs - 1) + LoadU(df, d + xs3 - 2);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_*_______
+       3__**_____
+       4____0____
+       5_____**__
+       6_______*_
+       7_________
+       8_________ */
+    auto sum = LoadU(df, d - xs - xs - 3) + LoadU(df, d - xs - 2) +
+               LoadU(df, d - xs - 1) + center + LoadU(df, d + xs + 1) +
+               LoadU(df, d + xs + 2) + LoadU(df, d + xs + xs + 3);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_______*_
+       3_____**__
+       4____0____
+       5__**_____
+       6_*_______
+       7_________
+       8_________ */
+    auto sum = LoadU(df, d - xs - xs + 3) + LoadU(df, d - xs + 2) +
+               LoadU(df, d - xs + 1) + center + LoadU(df, d + xs - 1) +
+               LoadU(df, d + xs - 2) + LoadU(df, d + xs + xs - 3);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_________
+       3______***
+       4___*0*___
+       5***______
+       6_________
+       7_________
+       8_________ */
+
+    auto sum = LoadU(df, d + xs - 4) + LoadU(df, d + xs - 3) +
+               LoadU(df, d + xs - 2) + LoadU(df, d - 1) + center +
+               LoadU(df, d + 1) + LoadU(df, d - xs + 2) +
+               LoadU(df, d - xs + 3) + LoadU(df, d - xs + 4);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_________
+       1_________
+       2_________
+       3***______
+       4___*0*___
+       5______***
+       6_________
+       7_________
+       8_________ */
+    auto sum = LoadU(df, d - xs - 4) + LoadU(df, d - xs - 3) +
+               LoadU(df, d - xs - 2) + LoadU(df, d - 1) + center +
+               LoadU(df, d + 1) + LoadU(df, d + xs + 2) +
+               LoadU(df, d + xs + 3) + LoadU(df, d + xs + 4);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0___*_____
+       1___*_____
+       2___*_____
+       3____*____
+       4____0____
+       5____*____
+       6_____*___
+       7_____*___
+       8_____*___ */
+    auto sum = LoadU(df, d - xs3 - xs - 1) + LoadU(df, d - xs3 - 1) +
+               LoadU(df, d - xs - xs - 1) + LoadU(df, d - xs) + center +
+               LoadU(df, d + xs) + LoadU(df, d + xs + xs + 1) +
+               LoadU(df, d + xs3 + 1) + LoadU(df, d + xs3 + xs + 1);
+    retval = MulAdd(sum, sum, retval);
+  }
+  {
+    /* 0_____*___
+       1_____*___
+       2____ *___
+       3____*____
+       4____0____
+       5____*____
+       6___*_____
+       7___*_____
+       8___*_____ */
+    auto sum = LoadU(df, d - xs3 - xs + 1) + LoadU(df, d - xs3 + 1) +
+               LoadU(df, d - xs - xs + 1) + LoadU(df, d - xs) + center +
+               LoadU(df, d + xs) + LoadU(df, d + xs + xs - 1) +
+               LoadU(df, d + xs3 - 1) + LoadU(df, d + xs3 + xs - 1);
+    retval = MulAdd(sum, sum, retval);
+  }
+  return retval;
+}
+
+// Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known
+// to be far enough from the image borders. "diffs" is a packed image.
+template <class Tag>
+static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs,
+                                                const size_t x0,
+                                                const size_t y0) {
+  const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0;
+  const HWY_CAPPED(float, 1) df;
+  if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) &&
+       y0 < (diffs.ysize() - 4))) {
+    return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow()));
+  }
+
+  PROFILER_ZONE("Padded Malta");
+  float borderimage[12 * 9];  // round up to 4
+  for (int dy = 0; dy < 9; ++dy) {
+    int y = y0 + dy - 4;
+    if (y < 0 || static_cast<size_t>(y) >= diffs.ysize()) {
+      for (int dx = 0; dx < 12; ++dx) {
+        borderimage[dy * 12 + dx] = 0.0f;
+      }
+      continue;
+    }
+
+    const float* row_diffs = diffs.ConstRow(y);
+    for (int dx = 0; dx < 9; ++dx) {
+      int x = x0 + dx - 4;
+      if (x < 0 || static_cast<size_t>(x) >= diffs.xsize()) {
+        borderimage[dy * 12 + dx] = 0.0f;
+      } else {
+        borderimage[dy * 12 + dx] = row_diffs[x];
+      }
+    }
+    std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f);
+  }
+  return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12));
+}
+
+template <class Tag>
+static void MaltaDiffMapT(const Tag tag, const ImageF& lum0, const ImageF& lum1,
+                          const double w_0gt1, const double w_0lt1,
+                          const double norm1, const double len,
+                          const double mulli, ImageF* HWY_RESTRICT diffs,
+                          Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  JXL_DASSERT(SameSize(lum0, lum1) && SameSize(lum0, *diffs));
+  const size_t xsize_ = lum0.xsize();
+  const size_t ysize_ = lum0.ysize();
+
+  const float kWeight0 = 0.5;
+  const float kWeight1 = 0.33;
+
+  const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1);
+  const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1);
+  const float norm2_0gt1 = w_pre0gt1 * norm1;
+  const float norm2_0lt1 = w_pre0lt1 * norm1;
+
+  for (size_t y = 0; y < ysize_; ++y) {
+    const float* HWY_RESTRICT row0 = lum0.ConstRow(y);
+    const float* HWY_RESTRICT row1 = lum1.ConstRow(y);
+    float* HWY_RESTRICT row_diffs = diffs->Row(y);
+    for (size_t x = 0; x < xsize_; ++x) {
+      const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x]));
+      const float diff = row0[x] - row1[x];
+      const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval);
+
+      // Primary symmetric quadratic objective.
+      row_diffs[x] = scaler * diff;
+
+      const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval);
+      const double fabs0 = std::fabs(row0[x]);
+
+      // Secondary half-open quadratic objectives.
+      const double too_small = 0.55 * fabs0;
+      const double too_big = 1.05 * fabs0;
+
+      if (row0[x] < 0) {
+        if (row1[x] > -too_small) {
+          double impact = scaler2 * (row1[x] + too_small);
+          if (diff < 0) {
+            row_diffs[x] -= impact;
+          } else {
+            row_diffs[x] += impact;
+          }
+        } else if (row1[x] < -too_big) {
+          double impact = scaler2 * (-row1[x] - too_big);
+          if (diff < 0) {
+            row_diffs[x] -= impact;
+          } else {
+            row_diffs[x] += impact;
+          }
+        }
+      } else {
+        if (row1[x] < too_small) {
+          double impact = scaler2 * (too_small - row1[x]);
+          if (diff < 0) {
+            row_diffs[x] -= impact;
+          } else {
+            row_diffs[x] += impact;
+          }
+        } else if (row1[x] > too_big) {
+          double impact = scaler2 * (row1[x] - too_big);
+          if (diff < 0) {
+            row_diffs[x] -= impact;
+          } else {
+            row_diffs[x] += impact;
+          }
+        }
+      }
+    }
+  }
+
+  size_t y0 = 0;
+  // Top
+  for (; y0 < 4; ++y0) {
+    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+    for (size_t x0 = 0; x0 < xsize_; ++x0) {
+      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+    }
+  }
+
+  const HWY_FULL(float) df;
+  const size_t aligned_x = std::max(size_t(4), Lanes(df));
+  const intptr_t stride = diffs->PixelsPerRow();
+
+  // Middle
+  for (; y0 < ysize_ - 4; ++y0) {
+    const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0);
+    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+    size_t x0 = 0;
+    for (; x0 < aligned_x; ++x0) {
+      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+    }
+    for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) {
+      auto diff = Load(df, row_diff + x0);
+      diff += MaltaUnit(Tag(), df, row_in + x0, stride);
+      Store(diff, df, row_diff + x0);
+    }
+
+    for (; x0 < xsize_; ++x0) {
+      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+    }
+  }
+
+  // Bottom
+  for (; y0 < ysize_; ++y0) {
+    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+    for (size_t x0 = 0; x0 < xsize_; ++x0) {
+      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+    }
+  }
+}
+
+// Need non-template wrapper functions for HWY_EXPORT.
+void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+                  const double w_0lt1, const double norm1, const double len,
+                  const double mulli, ImageF* HWY_RESTRICT diffs,
+                  Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli,
+                diffs, block_diff_ac, c);
+}
+
+void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+                    const double w_0lt1, const double norm1, const double len,
+                    const double mulli, ImageF* HWY_RESTRICT diffs,
+                    Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli,
+                diffs, block_diff_ac, c);
+}
+
+void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) {
+  PROFILER_FUNC;
+  const size_t xsize = xyb.xsize();
+  const size_t ysize = xyb.ysize();
+  const float bias = mul * bias_arg;
+  const float sqrt_bias = sqrt(bias);
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y);
+    float* BUTTERAUGLI_RESTRICT row_out = out->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      // kBias makes sqrt behave more linearly.
+      row_out[x] = sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias;
+    }
+  }
+}
+
+// std::log(80.0) / std::log(255.0);
+constexpr float kIntensityTargetNormalizationHack = 0.79079917404f;
+static const float kInternalGoodQualityThreshold =
+    17.1984479671f * kIntensityTargetNormalizationHack;
+static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold;
+
+void StoreMin3(const float v, float& min0, float& min1, float& min2) {
+  if (v < min2) {
+    if (v < min0) {
+      min2 = min1;
+      min1 = min0;
+      min0 = v;
+    } else if (v < min1) {
+      min2 = min1;
+      min1 = v;
+    } else {
+      min2 = v;
+    }
+  }
+}
+
+// Look for smooth areas near the area of degradation.
+// If the areas area generally smooth, don't do masking.
+void FuzzyErosion(const ImageF& from, ImageF* to) {
+  const size_t xsize = from.xsize();
+  const size_t ysize = from.ysize();
+  static const int kStep = 3;
+  for (size_t y = 0; y < ysize; ++y) {
+    for (size_t x = 0; x < xsize; ++x) {
+      float min0 = from.Row(y)[x];
+      float min1 = 2 * min0;
+      float min2 = min1;
+      if (x >= kStep) {
+        float v = from.Row(y)[x - kStep];
+        StoreMin3(v, min0, min1, min2);
+        if (y >= kStep) {
+          float v = from.Row(y - kStep)[x - kStep];
+          StoreMin3(v, min0, min1, min2);
+        }
+        if (y < ysize - kStep) {
+          float v = from.Row(y + kStep)[x - kStep];
+          StoreMin3(v, min0, min1, min2);
+        }
+      }
+      if (x < xsize - kStep) {
+        float v = from.Row(y)[x + kStep];
+        StoreMin3(v, min0, min1, min2);
+        if (y >= kStep) {
+          float v = from.Row(y - kStep)[x + kStep];
+          StoreMin3(v, min0, min1, min2);
+        }
+        if (y < ysize - kStep) {
+          float v = from.Row(y + kStep)[x + kStep];
+          StoreMin3(v, min0, min1, min2);
+        }
+      }
+      if (y >= kStep) {
+        float v = from.Row(y - kStep)[x];
+        StoreMin3(v, min0, min1, min2);
+      }
+      if (y < ysize - kStep) {
+        float v = from.Row(y + kStep)[x];
+        StoreMin3(v, min0, min1, min2);
+      }
+      to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2);
+    }
+  }
+}
+
+// Compute values of local frequency and dc masking based on the activity
+// in the two images. img_diff_ac may be null.
+void Mask(const ImageF& mask0, const ImageF& mask1,
+          const ButteraugliParams& params, BlurTemp* blur_temp,
+          ImageF* BUTTERAUGLI_RESTRICT mask,
+          ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
+  // Only X and Y components are involved in masking. B's influence
+  // is considered less important in the high frequency area, and we
+  // don't model masking from lower frequency signals.
+  PROFILER_FUNC;
+  const size_t xsize = mask0.xsize();
+  const size_t ysize = mask0.ysize();
+  *mask = ImageF(xsize, ysize);
+  static const float kMul = 6.19424080439;
+  static const float kBias = 12.61050594197;
+  static const float kRadius = 2.7;
+  ImageF diff0(xsize, ysize);
+  ImageF diff1(xsize, ysize);
+  ImageF blurred0(xsize, ysize);
+  ImageF blurred1(xsize, ysize);
+  DiffPrecompute(mask0, kMul, kBias, &diff0);
+  DiffPrecompute(mask1, kMul, kBias, &diff1);
+  Blur(diff0, kRadius, params, blur_temp, &blurred0);
+  FuzzyErosion(blurred0, &diff0);
+  Blur(diff1, kRadius, params, blur_temp, &blurred1);
+  FuzzyErosion(blurred1, &diff1);
+  for (size_t y = 0; y < ysize; ++y) {
+    for (size_t x = 0; x < xsize; ++x) {
+      mask->Row(y)[x] = diff1.Row(y)[x];
+      if (diff_ac != nullptr) {
+        static const float kMaskToErrorMul = 10.0;
+        float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x];
+        diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff;
+      }
+    }
+  }
+}
+
+// `diff_ac` may be null.
+void MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1,
+                     const size_t xsize, const size_t ysize,
+                     const ButteraugliParams& params, Image3F* temp,
+                     BlurTemp* blur_temp, ImageF* BUTTERAUGLI_RESTRICT mask,
+                     ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
+  ImageF mask0(xsize, ysize);
+  ImageF mask1(xsize, ysize);
+  static const float muls[3] = {
+      8.75000241361f,
+      0.620978104816f,
+      0.307585098253f,
+  };
+  // Silly and unoptimized approach here. TODO(jyrki): rework this.
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_y_hf0 = pi0.hf[1].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_y_hf1 = pi1.hf[1].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_y_uhf0 = pi0.uhf[1].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_y_uhf1 = pi1.uhf[1].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_x_hf0 = pi0.hf[0].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_x_hf1 = pi1.hf[0].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_x_uhf0 = pi0.uhf[0].Row(y);
+    const float* BUTTERAUGLI_RESTRICT row_x_uhf1 = pi1.uhf[0].Row(y);
+    float* BUTTERAUGLI_RESTRICT row0 = mask0.Row(y);
+    float* BUTTERAUGLI_RESTRICT row1 = mask1.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      float xdiff0 = (row_x_uhf0[x] + row_x_hf0[x]) * muls[0];
+      float xdiff1 = (row_x_uhf1[x] + row_x_hf1[x]) * muls[0];
+      float ydiff0 = row_y_uhf0[x] * muls[1] + row_y_hf0[x] * muls[2];
+      float ydiff1 = row_y_uhf1[x] * muls[1] + row_y_hf1[x] * muls[2];
+      row0[x] = xdiff0 * xdiff0 + ydiff0 * ydiff0;
+      row0[x] = sqrt(row0[x]);
+      row1[x] = xdiff1 * xdiff1 + ydiff1 * ydiff1;
+      row1[x] = sqrt(row1[x]);
+    }
+  }
+  Mask(mask0, mask1, params, blur_temp, mask, diff_ac);
+}
+
+double MaskY(double delta) {
+  static const double offset = 0.829591754942;
+  static const double scaler = 0.451936922203;
+  static const double mul = 2.5485944793;
+  const double c = mul / ((scaler * delta) + offset);
+  const double retval = kGlobalScale * (1.0 + c);
+  return retval * retval;
+}
+
+double MaskDcY(double delta) {
+  static const double offset = 0.20025578522;
+  static const double scaler = 3.87449418804;
+  static const double mul = 0.505054525019;
+  const double c = mul / ((scaler * delta) + offset);
+  const double retval = kGlobalScale * (1.0 + c);
+  return retval * retval;
+}
+
+inline float MaskColor(const float color[3], const float mask) {
+  return color[0] * mask + color[1] * mask + color[2] * mask;
+}
+
+// Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks}
+void CombineChannelsToDiffmap(const ImageF& mask, const Image3F& block_diff_dc,
+                              const Image3F& block_diff_ac, float xmul,
+                              ImageF* result) {
+  PROFILER_FUNC;
+  JXL_CHECK(SameSize(mask, *result));
+  size_t xsize = mask.xsize();
+  size_t ysize = mask.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    float* BUTTERAUGLI_RESTRICT row_out = result->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      float val = mask.Row(y)[x];
+      float maskval = MaskY(val);
+      float dc_maskval = MaskDcY(val);
+      float diff_dc[3];
+      float diff_ac[3];
+      for (int i = 0; i < 3; ++i) {
+        diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x];
+        diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x];
+      }
+      diff_ac[0] *= xmul;
+      diff_dc[0] *= xmul;
+      row_out[x] =
+          sqrt(MaskColor(diff_dc, dc_maskval) + MaskColor(diff_ac, maskval));
+    }
+  }
+}
+
+// Adds weighted L2 difference between i0 and i1 to diffmap.
+static void L2Diff(const ImageF& i0, const ImageF& i1, const float w,
+                   Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+  if (w == 0) return;
+
+  const HWY_FULL(float) d;
+  const auto weight = Set(d, w);
+
+  for (size_t y = 0; y < i0.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y);
+    const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y);
+    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+
+    for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
+      const auto diff = Load(d, row0 + x) - Load(d, row1 + x);
+      const auto diff2 = diff * diff;
+      const auto prev = Load(d, row_diff + x);
+      Store(MulAdd(diff2, weight, prev), d, row_diff + x);
+    }
+  }
+}
+
+// Initializes diffmap to the weighted L2 difference between i0 and i1.
+static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w,
+                      Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+  if (w == 0) return;
+
+  const HWY_FULL(float) d;
+  const auto weight = Set(d, w);
+
+  for (size_t y = 0; y < i0.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y);
+    const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y);
+    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+
+    for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
+      const auto diff = Load(d, row0 + x) - Load(d, row1 + x);
+      const auto diff2 = diff * diff;
+      Store(diff2 * weight, d, row_diff + x);
+    }
+  }
+}
+
+// i0 is the original image.
+// i1 is the deformed copy.
+static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1,
+                             float w_0lt1,
+                             Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+  if (w_0gt1 == 0 && w_0lt1 == 0) {
+    return;
+  }
+
+  const HWY_FULL(float) d;
+  const auto vw_0gt1 = Set(d, w_0gt1 * 0.8);
+  const auto vw_0lt1 = Set(d, w_0lt1 * 0.8);
+
+  for (size_t y = 0; y < i0.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y);
+    const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y);
+    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+
+    for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
+      const auto val0 = Load(d, row0 + x);
+      const auto val1 = Load(d, row1 + x);
+
+      // Primary symmetric quadratic objective.
+      const auto diff = val0 - val1;
+      auto total = MulAdd(diff * diff, vw_0gt1, Load(d, row_diff + x));
+
+      // Secondary half-open quadratic objectives.
+      const auto fabs0 = Abs(val0);
+      const auto too_small = Set(d, 0.4) * fabs0;
+      const auto too_big = fabs0;
+
+      const auto if_neg =
+          IfThenElse(val1 > Neg(too_small), val1 + too_small,
+                     IfThenElseZero(val1 < Neg(too_big), Neg(val1) - too_big));
+      const auto if_pos =
+          IfThenElse(val1 < too_small, too_small - val1,
+                     IfThenElseZero(val1 > too_big, val1 - too_big));
+      const auto v = IfThenElse(val0 < Zero(d), if_neg, if_pos);
+      total += vw_0lt1 * v * v;
+      Store(total, d, row_diff + x);
+    }
+  }
+}
+
+// A simple HDR compatible gamma function.
+template <class DF, class V>
+V Gamma(const DF df, V v) {
+  // ln(2) constant folded in because we want std::log but have FastLog2f.
+  const auto kRetMul = Set(df, 19.245013259874995f * 0.693147180559945f);
+  const auto kRetAdd = Set(df, -23.16046239805755);
+  // This should happen rarely, but may lead to a NaN in log, which is
+  // undesirable. Since negative photons don't exist we solve the NaNs by
+  // clamping here.
+  v = ZeroIfNegative(v);
+
+  const auto biased = v + Set(df, 9.9710635769299145);
+  const auto log = FastLog2f(df, biased);
+  // We could fold this into a custom Log2 polynomial, but there would be
+  // relatively little gain.
+  return MulAdd(kRetMul, log, kRetAdd);
+}
+
+template <bool Clamp, class DF, class V>
+BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1,
+                                        const V& in2, V* JXL_RESTRICT out0,
+                                        V* JXL_RESTRICT out1,
+                                        V* JXL_RESTRICT out2) {
+  // https://en.wikipedia.org/wiki/Photopsin absorbance modeling.
+  static const double mixi0 = 0.29956550340058319;
+  static const double mixi1 = 0.63373087833825936;
+  static const double mixi2 = 0.077705617820981968;
+  static const double mixi3 = 1.7557483643287353;
+  static const double mixi4 = 0.22158691104574774;
+  static const double mixi5 = 0.69391388044116142;
+  static const double mixi6 = 0.0987313588422;
+  static const double mixi7 = 1.7557483643287353;
+  static const double mixi8 = 0.02;
+  static const double mixi9 = 0.02;
+  static const double mixi10 = 0.20480129041026129;
+  static const double mixi11 = 12.226454707163354;
+
+  const V mix0 = Set(df, mixi0);
+  const V mix1 = Set(df, mixi1);
+  const V mix2 = Set(df, mixi2);
+  const V mix3 = Set(df, mixi3);
+  const V mix4 = Set(df, mixi4);
+  const V mix5 = Set(df, mixi5);
+  const V mix6 = Set(df, mixi6);
+  const V mix7 = Set(df, mixi7);
+  const V mix8 = Set(df, mixi8);
+  const V mix9 = Set(df, mixi9);
+  const V mix10 = Set(df, mixi10);
+  const V mix11 = Set(df, mixi11);
+
+  *out0 = mix0 * in0 + mix1 * in1 + mix2 * in2 + mix3;
+  *out1 = mix4 * in0 + mix5 * in1 + mix6 * in2 + mix7;
+  *out2 = mix8 * in0 + mix9 * in1 + mix10 * in2 + mix11;
+
+  if (Clamp) {
+    *out0 = Max(*out0, mix3);
+    *out1 = Max(*out1, mix7);
+    *out2 = Max(*out2, mix11);
+  }
+}
+
+// `blurred` is a temporary image used inside this function and not returned.
+Image3F OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params,
+                           Image3F* blurred, BlurTemp* blur_temp) {
+  PROFILER_FUNC;
+  Image3F xyb(rgb.xsize(), rgb.ysize());
+  const double kSigma = 1.2;
+  Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0));
+  Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1));
+  Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2));
+  const HWY_FULL(float) df;
+  const auto intensity_target_multiplier = Set(df, params.intensity_target);
+  for (size_t y = 0; y < rgb.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_r = rgb.ConstPlaneRow(0, y);
+    const float* BUTTERAUGLI_RESTRICT row_g = rgb.ConstPlaneRow(1, y);
+    const float* BUTTERAUGLI_RESTRICT row_b = rgb.ConstPlaneRow(2, y);
+    const float* BUTTERAUGLI_RESTRICT row_blurred_r =
+        blurred->ConstPlaneRow(0, y);
+    const float* BUTTERAUGLI_RESTRICT row_blurred_g =
+        blurred->ConstPlaneRow(1, y);
+    const float* BUTTERAUGLI_RESTRICT row_blurred_b =
+        blurred->ConstPlaneRow(2, y);
+    float* BUTTERAUGLI_RESTRICT row_out_x = xyb.PlaneRow(0, y);
+    float* BUTTERAUGLI_RESTRICT row_out_y = xyb.PlaneRow(1, y);
+    float* BUTTERAUGLI_RESTRICT row_out_b = xyb.PlaneRow(2, y);
+    const auto min = Set(df, 1e-4f);
+    for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) {
+      auto sensitivity0 = Undefined(df);
+      auto sensitivity1 = Undefined(df);
+      auto sensitivity2 = Undefined(df);
+      {
+        // Calculate sensitivity based on the smoothed image gamma derivative.
+        auto pre_mixed0 = Undefined(df);
+        auto pre_mixed1 = Undefined(df);
+        auto pre_mixed2 = Undefined(df);
+        OpsinAbsorbance<true>(
+            df, Load(df, row_blurred_r + x) * intensity_target_multiplier,
+            Load(df, row_blurred_g + x) * intensity_target_multiplier,
+            Load(df, row_blurred_b + x) * intensity_target_multiplier,
+            &pre_mixed0, &pre_mixed1, &pre_mixed2);
+        pre_mixed0 = Max(pre_mixed0, min);
+        pre_mixed1 = Max(pre_mixed1, min);
+        pre_mixed2 = Max(pre_mixed2, min);
+        sensitivity0 = Gamma(df, pre_mixed0) / pre_mixed0;
+        sensitivity1 = Gamma(df, pre_mixed1) / pre_mixed1;
+        sensitivity2 = Gamma(df, pre_mixed2) / pre_mixed2;
+        sensitivity0 = Max(sensitivity0, min);
+        sensitivity1 = Max(sensitivity1, min);
+        sensitivity2 = Max(sensitivity2, min);
+      }
+      auto cur_mixed0 = Undefined(df);
+      auto cur_mixed1 = Undefined(df);
+      auto cur_mixed2 = Undefined(df);
+      OpsinAbsorbance<false>(df,
+                             Load(df, row_r + x) * intensity_target_multiplier,
+                             Load(df, row_g + x) * intensity_target_multiplier,
+                             Load(df, row_b + x) * intensity_target_multiplier,
+                             &cur_mixed0, &cur_mixed1, &cur_mixed2);
+      cur_mixed0 *= sensitivity0;
+      cur_mixed1 *= sensitivity1;
+      cur_mixed2 *= sensitivity2;
+      // This is a kludge. The negative values should be zeroed away before
+      // blurring. Ideally there would be no negative values in the first place.
+      const auto min01 = Set(df, 1.7557483643287353f);
+      const auto min2 = Set(df, 12.226454707163354f);
+      cur_mixed0 = Max(cur_mixed0, min01);
+      cur_mixed1 = Max(cur_mixed1, min01);
+      cur_mixed2 = Max(cur_mixed2, min2);
+
+      Store(cur_mixed0 - cur_mixed1, df, row_out_x + x);
+      Store(cur_mixed0 + cur_mixed1, df, row_out_y + x);
+      Store(cur_mixed2, df, row_out_b + x);
+    }
+  }
+  return xyb;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(SeparateFrequencies);       // Local function.
+HWY_EXPORT(MaskPsychoImage);           // Local function.
+HWY_EXPORT(L2DiffAsymmetric);          // Local function.
+HWY_EXPORT(L2Diff);                    // Local function.
+HWY_EXPORT(SetL2Diff);                 // Local function.
+HWY_EXPORT(CombineChannelsToDiffmap);  // Local function.
+HWY_EXPORT(MaltaDiffMap);              // Local function.
+HWY_EXPORT(MaltaDiffMapLF);            // Local function.
+HWY_EXPORT(OpsinDynamicsImage);        // Local function.
+
+#if BUTTERAUGLI_ENABLE_CHECKS
+
+static inline bool IsNan(const float x) {
+  uint32_t bits;
+  memcpy(&bits, &x, sizeof(bits));
+  const uint32_t bitmask_exp = 0x7F800000;
+  return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF);
+}
+
+static inline bool IsNan(const double x) {
+  uint64_t bits;
+  memcpy(&bits, &x, sizeof(bits));
+  return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) ||
+         (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL);
+}
+
+static inline void CheckImage(const ImageF& image, const char* name) {
+  PROFILER_FUNC;
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row = image.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      if (IsNan(row[x])) {
+        printf("NAN: Image %s @ %zu,%zu (of %zu,%zu)\n", name, x, y,
+               image.xsize(), image.ysize());
+        exit(1);
+      }
+    }
+  }
+}
+
+#define CHECK_NAN(x, str)                \
+  do {                                   \
+    if (IsNan(x)) {                      \
+      printf("%d: %s\n", __LINE__, str); \
+      abort();                           \
+    }                                    \
+  } while (0)
+
+#define CHECK_IMAGE(image, name) CheckImage(image, name)
+
+#else  // BUTTERAUGLI_ENABLE_CHECKS
+
+#define CHECK_NAN(x, str)
+#define CHECK_IMAGE(image, name)
+
+#endif  // BUTTERAUGLI_ENABLE_CHECKS
+
+// Calculate a 2x2 subsampled image for purposes of recursive butteraugli at
+// multiresolution.
+static Image3F SubSample2x(const Image3F& in) {
+  size_t xs = (in.xsize() + 1) / 2;
+  size_t ys = (in.ysize() + 1) / 2;
+  Image3F retval(xs, ys);
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ys; ++y) {
+      for (size_t x = 0; x < xs; ++x) {
+        retval.PlaneRow(c, y)[x] = 0;
+      }
+    }
+  }
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < in.ysize(); ++y) {
+      for (size_t x = 0; x < in.xsize(); ++x) {
+        retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x];
+      }
+    }
+    if ((in.xsize() & 1) != 0) {
+      for (size_t y = 0; y < retval.ysize(); ++y) {
+        size_t last_column = retval.xsize() - 1;
+        retval.PlaneRow(c, y)[last_column] *= 2.0f;
+      }
+    }
+    if ((in.ysize() & 1) != 0) {
+      for (size_t x = 0; x < retval.xsize(); ++x) {
+        size_t last_row = retval.ysize() - 1;
+        retval.PlaneRow(c, last_row)[x] *= 2.0f;
+      }
+    }
+  }
+  return retval;
+}
+
+// Supersample src by 2x and add it to dest.
+static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) {
+  for (size_t y = 0; y < dest.ysize(); ++y) {
+    for (size_t x = 0; x < dest.xsize(); ++x) {
+      // There will be less errors from the more averaged images.
+      // We take it into account to some extent using a scaler.
+      static const double kHeuristicMixingValue = 0.3;
+      dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w;
+      dest.Row(y)[x] += w * src.Row(y / 2)[x / 2];
+    }
+  }
+}
+
+Image3F* ButteraugliComparator::Temp() const {
+  bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel);
+  JXL_ASSERT(!was_in_use);
+  (void)was_in_use;
+  return &temp_;
+}
+
+void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); }
+
+ButteraugliComparator::ButteraugliComparator(const Image3F& rgb0,
+                                             const ButteraugliParams& params)
+    : xsize_(rgb0.xsize()),
+      ysize_(rgb0.ysize()),
+      params_(params),
+      temp_(xsize_, ysize_) {
+  if (xsize_ < 8 || ysize_ < 8) {
+    return;
+  }
+
+  Image3F xyb0 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(rgb0, params, Temp(),
+                                                          &blur_temp_);
+  ReleaseTemp();
+  HWY_DYNAMIC_DISPATCH(SeparateFrequencies)
+  (xsize_, ysize_, params_, &blur_temp_, xyb0, pi0_);
+
+  // Awful recursive construction of samples of different resolution.
+  // This is an after-thought and possibly somewhat parallel in
+  // functionality with the PsychoImage multi-resolution approach.
+  sub_.reset(new ButteraugliComparator(SubSample2x(rgb0), params));
+}
+
+void ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const {
+  HWY_DYNAMIC_DISPATCH(MaskPsychoImage)
+  (pi0_, pi0_, xsize_, ysize_, params_, Temp(), &blur_temp_, mask, nullptr);
+  ReleaseTemp();
+}
+
+void ButteraugliComparator::Diffmap(const Image3F& rgb1, ImageF& result) const {
+  PROFILER_FUNC;
+  if (xsize_ < 8 || ysize_ < 8) {
+    ZeroFillImage(&result);
+    return;
+  }
+  const Image3F xyb1 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(
+      rgb1, params_, Temp(), &blur_temp_);
+  ReleaseTemp();
+  DiffmapOpsinDynamicsImage(xyb1, result);
+  if (sub_) {
+    if (sub_->xsize_ < 8 || sub_->ysize_ < 8) {
+      return;
+    }
+    const Image3F sub_xyb = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(
+        SubSample2x(rgb1), params_, sub_->Temp(), &sub_->blur_temp_);
+    sub_->ReleaseTemp();
+    ImageF subresult;
+    sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult);
+    AddSupersampled2x(subresult, 0.5, result);
+  }
+}
+
+void ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1,
+                                                      ImageF& result) const {
+  PROFILER_FUNC;
+  if (xsize_ < 8 || ysize_ < 8) {
+    ZeroFillImage(&result);
+    return;
+  }
+  PsychoImage pi1;
+  HWY_DYNAMIC_DISPATCH(SeparateFrequencies)
+  (xsize_, ysize_, params_, &blur_temp_, xyb1, pi1);
+  result = ImageF(xsize_, ysize_);
+  DiffmapPsychoImage(pi1, result);
+}
+
+namespace {
+
+void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+                  const double w_0lt1, const double norm1,
+                  ImageF* HWY_RESTRICT diffs,
+                  Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  PROFILER_FUNC;
+  const double len = 3.75;
+  static const double mulli = 0.39905817637;
+  HWY_DYNAMIC_DISPATCH(MaltaDiffMap)
+  (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c);
+}
+
+void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+                    const double w_0lt1, const double norm1,
+                    ImageF* HWY_RESTRICT diffs,
+                    Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+  PROFILER_FUNC;
+  const double len = 3.75;
+  static const double mulli = 0.611612573796;
+  HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF)
+  (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c);
+}
+
+}  // namespace
+
+void ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1,
+                                               ImageF& diffmap) const {
+  PROFILER_FUNC;
+  if (xsize_ < 8 || ysize_ < 8) {
+    ZeroFillImage(&diffmap);
+    return;
+  }
+
+  const float hf_asymmetry_ = params_.hf_asymmetry;
+  const float xmul_ = params_.xmul;
+
+  ImageF diffs(xsize_, ysize_);
+  Image3F block_diff_ac(xsize_, ysize_);
+  ZeroFillImage(&block_diff_ac);
+  static const double wUhfMalta = 1.10039032555;
+  static const double norm1Uhf = 71.7800275169;
+  MaltaDiffMap(pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_,
+               wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1);
+
+  static const double wUhfMaltaX = 173.5;
+  static const double norm1UhfX = 5.0;
+  MaltaDiffMap(pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_,
+               wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac,
+               0);
+
+  static const double wHfMalta = 18.7237414387;
+  static const double norm1Hf = 4498534.45232;
+  MaltaDiffMapLF(pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_),
+                 wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs,
+                 &block_diff_ac, 1);
+
+  static const double wHfMaltaX = 6923.99476109;
+  static const double norm1HfX = 8051.15833247;
+  MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], wHfMaltaX * std::sqrt(hf_asymmetry_),
+                 wHfMaltaX / std::sqrt(hf_asymmetry_), norm1HfX, &diffs,
+                 &block_diff_ac, 0);
+
+  static const double wMfMalta = 37.0819870399;
+  static const double norm1Mf = 130262059.556;
+  MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), wMfMalta, wMfMalta, norm1Mf,
+                 &diffs, &block_diff_ac, 1);
+
+  static const double wMfMaltaX = 8246.75321353;
+  static const double norm1MfX = 1009002.70582;
+  MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), wMfMaltaX, wMfMaltaX,
+                 norm1MfX, &diffs, &block_diff_ac, 0);
+
+  static const double wmul[9] = {
+      400.0,         1.50815703118,  0,
+      2150.0,        10.6195433239,  16.2176043152,
+      29.2353797994, 0.844626970982, 0.703646627719,
+  };
+  Image3F block_diff_dc(xsize_, ysize_);
+  for (size_t c = 0; c < 3; ++c) {
+    if (c < 2) {  // No blue channel error accumulated at HF.
+      HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric)
+      (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_,
+       &block_diff_ac, c);
+    }
+    HWY_DYNAMIC_DISPATCH(L2Diff)
+    (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac, c);
+    HWY_DYNAMIC_DISPATCH(SetL2Diff)
+    (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc, c);
+  }
+
+  ImageF mask;
+  HWY_DYNAMIC_DISPATCH(MaskPsychoImage)
+  (pi0_, pi1, xsize_, ysize_, params_, Temp(), &blur_temp_, &mask,
+   &block_diff_ac.Plane(1));
+  ReleaseTemp();
+
+  HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap)
+  (mask, block_diff_dc, block_diff_ac, xmul_, &diffmap);
+}
+
+double ButteraugliScoreFromDiffmap(const ImageF& diffmap,
+                                   const ButteraugliParams* params) {
+  PROFILER_FUNC;
+  // In approximate-border mode, skip pixels on the border likely to be affected
+  // by FastGauss' zero-valued-boundary behavior. The border is about half of
+  // the largest-diameter kernel (37x37 pixels), but only if the image is big.
+  size_t border = (params != nullptr && params->approximate_border) ? 8 : 0;
+  if (diffmap.xsize() <= 2 * border || diffmap.ysize() <= 2 * border) {
+    border = 0;
+  }
+  float retval = 0.0f;
+  for (size_t y = border; y < diffmap.ysize() - border; ++y) {
+    const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y);
+    for (size_t x = border; x < diffmap.xsize() - border; ++x) {
+      retval = std::max(retval, row[x]);
+    }
+  }
+  return retval;
+}
+
+bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1,
+                        double hf_asymmetry, double xmul, ImageF& diffmap) {
+  ButteraugliParams params;
+  params.hf_asymmetry = hf_asymmetry;
+  params.xmul = xmul;
+  return ButteraugliDiffmap(rgb0, rgb1, params, diffmap);
+}
+
+bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1,
+                        const ButteraugliParams& params, ImageF& diffmap) {
+  PROFILER_FUNC;
+  const size_t xsize = rgb0.xsize();
+  const size_t ysize = rgb0.ysize();
+  if (xsize < 1 || ysize < 1) {
+    return JXL_FAILURE("Zero-sized image");
+  }
+  if (!SameSize(rgb0, rgb1)) {
+    return JXL_FAILURE("Size mismatch");
+  }
+  static const int kMax = 8;
+  if (xsize < kMax || ysize < kMax) {
+    // Butteraugli values for small (where xsize or ysize is smaller
+    // than 8 pixels) images are non-sensical, but most likely it is
+    // less disruptive to try to compute something than just give up.
+    // Temporarily extend the borders of the image to fit 8 x 8 size.
+    size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0;
+    size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0;
+    size_t xscaled = std::max<size_t>(kMax, xsize);
+    size_t yscaled = std::max<size_t>(kMax, ysize);
+    Image3F scaled0(xscaled, yscaled);
+    Image3F scaled1(xscaled, yscaled);
+    for (int i = 0; i < 3; ++i) {
+      for (size_t y = 0; y < yscaled; ++y) {
+        for (size_t x = 0; x < xscaled; ++x) {
+          size_t x2 =
+              std::min<size_t>(xsize - 1, std::max<size_t>(0, x - xborder));
+          size_t y2 =
+              std::min<size_t>(ysize - 1, std::max<size_t>(0, y - yborder));
+          scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2];
+          scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2];
+        }
+      }
+    }
+    ImageF diffmap_scaled;
+    const bool ok =
+        ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled);
+    diffmap = ImageF(xsize, ysize);
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder];
+      }
+    }
+    return ok;
+  }
+  ButteraugliComparator butteraugli(rgb0, params);
+  butteraugli.Diffmap(rgb1, diffmap);
+  return true;
+}
+
+bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1,
+                          float hf_asymmetry, float xmul, ImageF& diffmap,
+                          double& diffvalue) {
+  ButteraugliParams params;
+  params.hf_asymmetry = hf_asymmetry;
+  params.xmul = xmul;
+  return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue);
+}
+
+bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1,
+                          const ButteraugliParams& params, ImageF& diffmap,
+                          double& diffvalue) {
+#if PROFILER_ENABLED
+  auto trace_start = std::chrono::steady_clock::now();
+#endif
+  if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) {
+    return false;
+  }
+#if PROFILER_ENABLED
+  auto trace_end = std::chrono::steady_clock::now();
+  std::chrono::duration<double> elapsed = trace_end - trace_start;
+  const size_t mp = rgb0.xsize() * rgb0.ysize();
+  printf("diff MP/s %f\n", mp / elapsed.count() * 1E-6);
+#endif
+  diffvalue = ButteraugliScoreFromDiffmap(diffmap, &params);
+  return true;
+}
+
+double ButteraugliFuzzyClass(double score) {
+  static const double fuzzy_width_up = 4.8;
+  static const double fuzzy_width_down = 4.8;
+  static const double m0 = 2.0;
+  static const double scaler = 0.7777;
+  double val;
+  if (score < 1.0) {
+    // val in [scaler .. 2.0]
+    val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down));
+    val -= 1.0;           // from [1 .. 2] to [0 .. 1]
+    val *= 2.0 - scaler;  // from [0 .. 1] to [0 .. 2.0 - scaler]
+    val += scaler;        // from [0 .. 2.0 - scaler] to [scaler .. 2.0]
+  } else {
+    // val in [0 .. scaler]
+    val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up));
+    val *= scaler;
+  }
+  return val;
+}
+
+// #define PRINT_OUT_NORMALIZATION
+
+double ButteraugliFuzzyInverse(double seek) {
+  double pos = 0;
+  // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
+  for (double range = 1.0; range >= 1e-10; range *= 0.5) {
+    double cur = ButteraugliFuzzyClass(pos);
+    if (cur < seek) {
+      pos -= range;
+    } else {
+      pos += range;
+    }
+  }
+#ifdef PRINT_OUT_NORMALIZATION
+  if (seek == 1.0) {
+    fprintf(stderr, "Fuzzy inverse %g\n", pos);
+  }
+#endif
+  return pos;
+}
+
+#ifdef PRINT_OUT_NORMALIZATION
+static double print_out_normalization = ButteraugliFuzzyInverse(1.0);
+#endif
+
+namespace {
+
+void ScoreToRgb(double score, double good_threshold, double bad_threshold,
+                float rgb[3]) {
+  double heatmap[12][3] = {
+      {0, 0, 0},       {0, 0, 1},
+      {0, 1, 1},       {0, 1, 0},  // Good level
+      {1, 1, 0},       {1, 0, 0},  // Bad level
+      {1, 0, 1},       {0.5, 0.5, 1.0},
+      {1.0, 0.5, 0.5},  // Pastel colors for the very bad quality range.
+      {1.0, 1.0, 0.5}, {1, 1, 1},
+      {1, 1, 1},  // Last color repeated to have a solid range of white.
+  };
+  if (score < good_threshold) {
+    score = (score / good_threshold) * 0.3;
+  } else if (score < bad_threshold) {
+    score = 0.3 +
+            (score - good_threshold) / (bad_threshold - good_threshold) * 0.15;
+  } else {
+    score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5;
+  }
+  static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]);
+  score = std::min<double>(std::max<double>(score * (kTableSize - 1), 0.0),
+                           kTableSize - 2);
+  int ix = static_cast<int>(score);
+  ix = std::min(std::max(0, ix), kTableSize - 2);  // Handle NaN
+  double mix = score - ix;
+  for (int i = 0; i < 3; ++i) {
+    double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i];
+    rgb[i] = pow(v, 0.5);
+  }
+}
+
+}  // namespace
+
+Image3F CreateHeatMapImage(const ImageF& distmap, double good_threshold,
+                           double bad_threshold) {
+  Image3F heatmap(distmap.xsize(), distmap.ysize());
+  for (size_t y = 0; y < distmap.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y);
+    float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y);
+    float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y);
+    float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y);
+    for (size_t x = 0; x < distmap.xsize(); ++x) {
+      const float d = row_distmap[x];
+      float rgb[3];
+      ScoreToRgb(d, good_threshold, bad_threshold, rgb);
+      row_h0[x] = rgb[0];
+      row_h1[x] = rgb[1];
+      row_h2[x] = rgb[2];
+    }
+  }
+  return heatmap;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.h
new file mode 100644
index 0000000000..d029722d13
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli/butteraugli.h
@@ -0,0 +1,220 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+
+#ifndef LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_
+#define LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <atomic>
+#include <cmath>
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+#define BUTTERAUGLI_ENABLE_CHECKS 0
+#define BUTTERAUGLI_RESTRICT JXL_RESTRICT
+
+// This is the main interface to butteraugli image similarity
+// analysis function.
+
+namespace jxl {
+
+struct ButteraugliParams {
+  // Multiplier for penalizing new HF artifacts more than blurring away
+  // features. 1.0=neutral.
+  float hf_asymmetry = 1.0f;
+
+  // Multiplier for the psychovisual difference in the X channel.
+  float xmul = 1.0f;
+
+  // Number of nits that correspond to 1.0f input values.
+  float intensity_target = 80.0f;
+
+  bool approximate_border = false;
+};
+
+// ButteraugliInterface defines the public interface for butteraugli.
+//
+// It calculates the difference between rgb0 and rgb1.
+//
+// rgb0 and rgb1 contain the images. rgb0[c][px] and rgb1[c][px] contains
+// the red image for c == 0, green for c == 1, blue for c == 2. Location index
+// px is calculated as y * xsize + x.
+//
+// Value of pixels of images rgb0 and rgb1 need to be represented as raw
+// intensity. Most image formats store gamma corrected intensity in pixel
+// values. This gamma correction has to be removed, by applying the following
+// function to values in the 0-1 range:
+// butteraugli_val = pow(input_val, gamma);
+// A typical value of gamma is 2.2. It is usually stored in the image header.
+// Take care not to confuse that value with its inverse. The gamma value should
+// be always greater than one.
+// Butteraugli does not work as intended if the caller does not perform
+// gamma correction.
+//
+// hf_asymmetry is a multiplier for penalizing new HF artifacts more than
+// blurring away features (1.0 -> neutral).
+//
+// diffmap will contain an image of the size xsize * ysize, containing
+// localized differences for values px (indexed with the px the same as rgb0
+// and rgb1). diffvalue will give a global score of similarity.
+//
+// A diffvalue smaller than kButteraugliGood indicates that images can be
+// observed as the same image.
+// diffvalue larger than kButteraugliBad indicates that a difference between
+// the images can be observed.
+// A diffvalue between kButteraugliGood and kButteraugliBad indicates that
+// a subtle difference can be observed between the images.
+//
+// Returns true on success.
+bool ButteraugliInterface(const Image3F &rgb0, const Image3F &rgb1,
+                          const ButteraugliParams &params, ImageF &diffmap,
+                          double &diffvalue);
+
+// Deprecated (calls the previous function)
+bool ButteraugliInterface(const Image3F &rgb0, const Image3F &rgb1,
+                          float hf_asymmetry, float xmul, ImageF &diffmap,
+                          double &diffvalue);
+
+// Converts the butteraugli score into fuzzy class values that are continuous
+// at the class boundary. The class boundary location is based on human
+// raters, but the slope is arbitrary. Particularly, it does not reflect
+// the expectation value of probabilities of the human raters. It is just
+// expected that a smoother class boundary will allow for higher-level
+// optimization algorithms to work faster.
+//
+// Returns 2.0 for a perfect match, and 1.0 for 'ok', 0.0 for bad. Because the
+// scoring is fuzzy, a butteraugli score of 0.96 would return a class of
+// around 1.9.
+double ButteraugliFuzzyClass(double score);
+
+// Input values should be in range 0 (bad) to 2 (good). Use
+// kButteraugliNormalization as normalization.
+double ButteraugliFuzzyInverse(double seek);
+
+// Implementation details, don't use anything below or your code will
+// break in the future.
+
+#ifdef _MSC_VER
+#define BUTTERAUGLI_INLINE __forceinline
+#else
+#define BUTTERAUGLI_INLINE inline
+#endif
+
+#ifdef __clang__
+// Early versions of Clang did not support __builtin_assume_aligned.
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned)
+#elif defined(__GNUC__)
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 1
+#else
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 0
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* JXL_RESTRICT aligned = (float*)JXL_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if BUTTERAUGLI_HAS_ASSUME_ALIGNED
+#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) \
+  __builtin_assume_aligned((ptr), (align))
+#else
+#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) (ptr)
+#endif  // BUTTERAUGLI_HAS_ASSUME_ALIGNED
+
+struct PsychoImage {
+  ImageF uhf[2];  // XY
+  ImageF hf[2];   // XY
+  Image3F mf;     // XYB
+  Image3F lf;     // XYB
+};
+
+// Depending on implementation, Blur either needs a normal or transposed image.
+// Hold one or both of them here and only allocate on demand to reduce memory
+// usage.
+struct BlurTemp {
+  ImageF *Get(const ImageF &in) {
+    if (temp.xsize() == 0) {
+      temp = ImageF(in.xsize(), in.ysize());
+    }
+    return &temp;
+  }
+
+  ImageF *GetTransposed(const ImageF &in) {
+    if (transposed_temp.xsize() == 0) {
+      transposed_temp = ImageF(in.ysize(), in.xsize());
+    }
+    return &transposed_temp;
+  }
+
+  ImageF temp;
+  ImageF transposed_temp;
+};
+
+class ButteraugliComparator {
+ public:
+  // Butteraugli is calibrated at xmul = 1.0. We add a multiplier here so that
+  // we can test the hypothesis that a higher weighing of the X channel would
+  // improve results at higher Butteraugli values.
+  ButteraugliComparator(const Image3F &rgb0, const ButteraugliParams &params);
+  virtual ~ButteraugliComparator() = default;
+
+  // Computes the butteraugli map between the original image given in the
+  // constructor and the distorted image give here.
+  void Diffmap(const Image3F &rgb1, ImageF &result) const;
+
+  // Same as above, but OpsinDynamicsImage() was already applied.
+  void DiffmapOpsinDynamicsImage(const Image3F &xyb1, ImageF &result) const;
+
+  // Same as above, but the frequency decomposition was already applied.
+  void DiffmapPsychoImage(const PsychoImage &pi1, ImageF &diffmap) const;
+
+  void Mask(ImageF *BUTTERAUGLI_RESTRICT mask) const;
+
+ private:
+  Image3F *Temp() const;
+  void ReleaseTemp() const;
+
+  const size_t xsize_;
+  const size_t ysize_;
+  ButteraugliParams params_;
+  PsychoImage pi0_;
+
+  // Shared temporary image storage to reduce the number of allocations;
+  // obtained via Temp(), must call ReleaseTemp when no longer needed.
+  mutable Image3F temp_;
+  mutable std::atomic_flag temp_in_use_ = ATOMIC_FLAG_INIT;
+
+  mutable BlurTemp blur_temp_;
+  std::unique_ptr<ButteraugliComparator> sub_;
+};
+
+// Deprecated.
+bool ButteraugliDiffmap(const Image3F &rgb0, const Image3F &rgb1,
+                        double hf_asymmetry, double xmul, ImageF &diffmap);
+
+bool ButteraugliDiffmap(const Image3F &rgb0, const Image3F &rgb1,
+                        const ButteraugliParams &params, ImageF &diffmap);
+
+double ButteraugliScoreFromDiffmap(const ImageF &diffmap,
+                                   const ButteraugliParams *params = nullptr);
+
+// Generate rgb-representation of the distance between two images.
+Image3F CreateHeatMapImage(const ImageF &distmap, double good_threshold,
+                           double bad_threshold);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli_test.cc
new file mode 100644
index 0000000000..98ec7888aa
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli_test.cc
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "jxl/butteraugli.h"
+
+#include "gtest/gtest.h"
+#include "jxl/butteraugli_cxx.h"
+#include "lib/jxl/test_utils.h"
+
+TEST(ButteraugliTest, Lossless) {
+  uint32_t xsize = 171;
+  uint32_t ysize = 219;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  JxlButteraugliApiPtr api(JxlButteraugliApiCreate(nullptr));
+  JxlButteraugliResultPtr result(JxlButteraugliCompute(
+      api.get(), xsize, ysize, &pixel_format, pixels.data(), pixels.size(),
+      &pixel_format, pixels.data(), pixels.size()));
+  EXPECT_EQ(0.0, JxlButteraugliResultGetDistance(result.get(), 8.0));
+}
+
+TEST(ButteraugliTest, Distmap) {
+  uint32_t xsize = 171;
+  uint32_t ysize = 219;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  JxlButteraugliApiPtr api(JxlButteraugliApiCreate(nullptr));
+  JxlButteraugliResultPtr result(JxlButteraugliCompute(
+      api.get(), xsize, ysize, &pixel_format, pixels.data(), pixels.size(),
+      &pixel_format, pixels.data(), pixels.size()));
+  EXPECT_EQ(0.0, JxlButteraugliResultGetDistance(result.get(), 8.0));
+  const float* distmap;
+  uint32_t row_stride;
+  JxlButteraugliResultGetDistmap(result.get(), &distmap, &row_stride);
+  for (uint32_t y = 0; y < ysize; y++) {
+    for (uint32_t x = 0; x < xsize; x++) {
+      EXPECT_EQ(0.0, distmap[y * row_stride + x]);
+    }
+  }
+}
+
+TEST(ButteraugliTest, Distorted) {
+  uint32_t xsize = 171;
+  uint32_t ysize = 219;
+  std::vector<uint8_t> orig_pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  std::vector<uint8_t> dist_pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  dist_pixels[0] += 128;
+
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  JxlButteraugliApiPtr api(JxlButteraugliApiCreate(nullptr));
+  JxlButteraugliResultPtr result(JxlButteraugliCompute(
+      api.get(), xsize, ysize, &pixel_format, orig_pixels.data(),
+      orig_pixels.size(), &pixel_format, dist_pixels.data(),
+      dist_pixels.size()));
+  EXPECT_NE(0.0, JxlButteraugliResultGetDistance(result.get(), 8.0));
+}
+
+TEST(ButteraugliTest, Api) {
+  uint32_t xsize = 171;
+  uint32_t ysize = 219;
+  std::vector<uint8_t> orig_pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  std::vector<uint8_t> dist_pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  dist_pixels[0] += 128;
+
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  JxlButteraugliApiPtr api(JxlButteraugliApiCreate(nullptr));
+  JxlButteraugliApiSetHFAsymmetry(api.get(), 1.0f);
+  JxlButteraugliApiSetIntensityTarget(api.get(), 250.0f);
+  JxlButteraugliResultPtr result(JxlButteraugliCompute(
+      api.get(), xsize, ysize, &pixel_format, orig_pixels.data(),
+      orig_pixels.size(), &pixel_format, dist_pixels.data(),
+      dist_pixels.size()));
+  double distance0 = JxlButteraugliResultGetDistance(result.get(), 8.0);
+
+  JxlButteraugliApiSetHFAsymmetry(api.get(), 2.0f);
+  result.reset(JxlButteraugliCompute(api.get(), xsize, ysize, &pixel_format,
+                                     orig_pixels.data(), orig_pixels.size(),
+                                     &pixel_format, dist_pixels.data(),
+                                     dist_pixels.size()));
+  double distance1 = JxlButteraugliResultGetDistance(result.get(), 8.0);
+
+  EXPECT_NE(distance0, distance1);
+
+  JxlButteraugliApiSetIntensityTarget(api.get(), 80.0f);
+  result.reset(JxlButteraugliCompute(api.get(), xsize, ysize, &pixel_format,
+                                     orig_pixels.data(), orig_pixels.size(),
+                                     &pixel_format, dist_pixels.data(),
+                                     dist_pixels.size()));
+  double distance2 = JxlButteraugliResultGetDistance(result.get(), 8.0);
+
+  EXPECT_NE(distance1, distance2);
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli_wrapper.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli_wrapper.cc
new file mode 100644
index 0000000000..a2d2bc3c93
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/butteraugli_wrapper.cc
@@ -0,0 +1,207 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <atomic>
+
+#include "jxl/butteraugli.h"
+#include "jxl/parallel_runner.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_butteraugli_pnorm.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/memory_manager_internal.h"
+
+namespace {
+
+void SetMetadataFromPixelFormat(const JxlPixelFormat* pixel_format,
+                                jxl::ImageMetadata* metadata) {
+  uint32_t potential_alpha_bits = 0;
+  switch (pixel_format->data_type) {
+    case JXL_TYPE_FLOAT:
+      metadata->SetFloat32Samples();
+      potential_alpha_bits = 16;
+      break;
+    case JXL_TYPE_FLOAT16:
+      metadata->SetFloat16Samples();
+      potential_alpha_bits = 16;
+      break;
+    case JXL_TYPE_UINT32:
+      metadata->SetUintSamples(32);
+      potential_alpha_bits = 16;
+      break;
+    case JXL_TYPE_UINT16:
+      metadata->SetUintSamples(16);
+      potential_alpha_bits = 16;
+      break;
+    case JXL_TYPE_UINT8:
+      metadata->SetUintSamples(8);
+      potential_alpha_bits = 8;
+      break;
+    case JXL_TYPE_BOOLEAN:
+      metadata->SetUintSamples(2);
+      potential_alpha_bits = 2;
+      break;
+  }
+  if (pixel_format->num_channels == 2 || pixel_format->num_channels == 4) {
+    metadata->SetAlphaBits(potential_alpha_bits);
+  }
+}
+
+}  // namespace
+
+struct JxlButteraugliResultStruct {
+  JxlMemoryManager memory_manager;
+
+  jxl::ImageF distmap;
+  jxl::ButteraugliParams params;
+};
+
+struct JxlButteraugliApiStruct {
+  // Multiplier for penalizing new HF artifacts more than blurring away
+  // features. 1.0=neutral.
+  float hf_asymmetry = 1.0f;
+
+  // Multiplier for the psychovisual difference in the X channel.
+  float xmul = 1.0f;
+
+  // Number of nits that correspond to 1.0f input values.
+  float intensity_target = jxl::kDefaultIntensityTarget;
+
+  bool approximate_border = false;
+
+  JxlMemoryManager memory_manager;
+  std::unique_ptr<jxl::ThreadPool> thread_pool{nullptr};
+};
+
+JxlButteraugliApi* JxlButteraugliApiCreate(
+    const JxlMemoryManager* memory_manager) {
+  JxlMemoryManager local_memory_manager;
+  if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager))
+    return nullptr;
+
+  void* alloc =
+      jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlButteraugliApi));
+  if (!alloc) return nullptr;
+  // Placement new constructor on allocated memory
+  JxlButteraugliApi* ret = new (alloc) JxlButteraugliApi();
+  ret->memory_manager = local_memory_manager;
+  return ret;
+}
+
+void JxlButteraugliApiSetParallelRunner(JxlButteraugliApi* api,
+                                        JxlParallelRunner parallel_runner,
+                                        void* parallel_runner_opaque) {
+  api->thread_pool = jxl::make_unique<jxl::ThreadPool>(parallel_runner,
+                                                       parallel_runner_opaque);
+}
+
+void JxlButteraugliApiSetHFAsymmetry(JxlButteraugliApi* api, float v) {
+  api->hf_asymmetry = v;
+}
+
+void JxlButteraugliApiSetIntensityTarget(JxlButteraugliApi* api, float v) {
+  api->intensity_target = v;
+}
+
+void JxlButteraugliApiDestroy(JxlButteraugliApi* api) {
+  if (api) {
+    // Call destructor directly since custom free function is used.
+    api->~JxlButteraugliApi();
+    jxl::MemoryManagerFree(&api->memory_manager, api);
+  }
+}
+
+JxlButteraugliResult* JxlButteraugliCompute(
+    const JxlButteraugliApi* api, uint32_t xsize, uint32_t ysize,
+    const JxlPixelFormat* pixel_format_orig, const void* buffer_orig,
+    size_t size_orig, const JxlPixelFormat* pixel_format_dist,
+    const void* buffer_dist, size_t size_dist) {
+  jxl::ImageMetadata orig_metadata;
+  SetMetadataFromPixelFormat(pixel_format_orig, &orig_metadata);
+  jxl::ImageBundle orig_ib(&orig_metadata);
+  jxl::ColorEncoding c_current;
+  if (pixel_format_orig->data_type == JXL_TYPE_FLOAT) {
+    c_current =
+        jxl::ColorEncoding::LinearSRGB(pixel_format_orig->num_channels < 3);
+  } else {
+    c_current = jxl::ColorEncoding::SRGB(pixel_format_orig->num_channels < 3);
+  }
+  if (!jxl::BufferToImageBundle(*pixel_format_orig, xsize, ysize, buffer_orig,
+                                size_orig, api->thread_pool.get(), c_current,
+                                &orig_ib)) {
+    return nullptr;
+  }
+
+  jxl::ImageMetadata dist_metadata;
+  SetMetadataFromPixelFormat(pixel_format_dist, &dist_metadata);
+  jxl::ImageBundle dist_ib(&dist_metadata);
+  if (pixel_format_dist->data_type == JXL_TYPE_FLOAT) {
+    c_current =
+        jxl::ColorEncoding::LinearSRGB(pixel_format_dist->num_channels < 3);
+  } else {
+    c_current = jxl::ColorEncoding::SRGB(pixel_format_dist->num_channels < 3);
+  }
+  if (!jxl::BufferToImageBundle(*pixel_format_dist, xsize, ysize, buffer_dist,
+                                size_dist, api->thread_pool.get(), c_current,
+                                &dist_ib)) {
+    return nullptr;
+  }
+
+  void* alloc = jxl::MemoryManagerAlloc(&api->memory_manager,
+                                        sizeof(JxlButteraugliResult));
+  if (!alloc) return nullptr;
+  // Placement new constructor on allocated memory
+  JxlButteraugliResult* result = new (alloc) JxlButteraugliResult();
+  result->memory_manager = api->memory_manager;
+  result->params.hf_asymmetry = api->hf_asymmetry;
+  result->params.xmul = api->xmul;
+  result->params.intensity_target = api->intensity_target;
+  result->params.approximate_border = api->approximate_border;
+  jxl::ButteraugliDistance(orig_ib, dist_ib, result->params, &result->distmap,
+                           api->thread_pool.get());
+
+  return result;
+}
+
+float JxlButteraugliResultGetDistance(const JxlButteraugliResult* result,
+                                      float pnorm) {
+  return static_cast<float>(
+      jxl::ComputeDistanceP(result->distmap, result->params, pnorm));
+}
+
+void JxlButteraugliResultGetDistmap(const JxlButteraugliResult* result,
+                                    const float** buffer,
+                                    uint32_t* row_stride) {
+  *buffer = result->distmap.Row(0);
+  *row_stride = result->distmap.PixelsPerRow();
+}
+
+float JxlButteraugliResultGetMaxDistance(const JxlButteraugliResult* result) {
+  float max_distance = 0.0;
+  for (uint32_t y = 0; y < result->distmap.ysize(); y++) {
+    for (uint32_t x = 0; x < result->distmap.xsize(); x++) {
+      if (result->distmap.ConstRow(y)[x] > max_distance) {
+        max_distance = result->distmap.ConstRow(y)[x];
+      }
+    }
+  }
+  return max_distance;
+}
+
+void JxlButteraugliResultDestroy(JxlButteraugliResult* result) {
+  if (result) {
+    // Call destructor directly since custom free function is used.
+    result->~JxlButteraugliResult();
+    jxl::MemoryManagerFree(&result->memory_manager, result);
+  }
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/byte_order_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/byte_order_test.cc
new file mode 100644
index 0000000000..c1ea19f312
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/byte_order_test.cc
@@ -0,0 +1,53 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/byte_order.h"
+
+#include "gtest/gtest.h"
+
+namespace jxl {
+namespace {
+
+TEST(ByteOrderTest, TestRoundTripBE16) {
+  const uint32_t in = 0x1234;
+  uint8_t buf[2];
+  StoreBE16(in, buf);
+  EXPECT_EQ(in, LoadBE16(buf));
+  EXPECT_NE(in, LoadLE16(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripLE16) {
+  const uint32_t in = 0x1234;
+  uint8_t buf[2];
+  StoreLE16(in, buf);
+  EXPECT_EQ(in, LoadLE16(buf));
+  EXPECT_NE(in, LoadBE16(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripBE32) {
+  const uint32_t in = 0xFEDCBA98u;
+  uint8_t buf[4];
+  StoreBE32(in, buf);
+  EXPECT_EQ(in, LoadBE32(buf));
+  EXPECT_NE(in, LoadLE32(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripLE32) {
+  const uint32_t in = 0xFEDCBA98u;
+  uint8_t buf[4];
+  StoreLE32(in, buf);
+  EXPECT_EQ(in, LoadLE32(buf));
+  EXPECT_NE(in, LoadBE32(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripLE64) {
+  const uint64_t in = 0xFEDCBA9876543210ull;
+  uint8_t buf[8];
+  StoreLE64(in, buf);
+  EXPECT_EQ(in, LoadLE64(buf));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc
new file mode 100644
index 0000000000..63d21cbb4b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.cc
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/chroma_from_luma.h"
+
+namespace jxl {
+
+ColorCorrelationMap::ColorCorrelationMap(size_t xsize, size_t ysize, bool XYB)
+    : ytox_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)),
+      ytob_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)) {
+  ZeroFillImage(&ytox_map);
+  ZeroFillImage(&ytob_map);
+  if (!XYB) {
+    base_correlation_b_ = 0;
+  }
+  RecomputeDCFactors();
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.h
new file mode 100644
index 0000000000..cf2f90e43d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/chroma_from_luma.h
@@ -0,0 +1,151 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_CHROMA_FROM_LUMA_H_
+#define LIB_JXL_CHROMA_FROM_LUMA_H_
+
+// Chroma-from-luma, computed using heuristics to determine the best linear
+// model for the X and B channels from the Y channel.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+// Tile is the rectangular grid of blocks that share color correlation
+// parameters ("factor_x/b" such that residual_b = blue - Y * factor_b).
+static constexpr size_t kColorTileDim = 64;
+
+static_assert(kColorTileDim % kBlockDim == 0,
+              "Color tile dim should be divisible by block dim");
+static constexpr size_t kColorTileDimInBlocks = kColorTileDim / kBlockDim;
+
+static_assert(kGroupDimInBlocks % kColorTileDimInBlocks == 0,
+              "Group dim should be divisible by color tile dim");
+
+static constexpr uint8_t kDefaultColorFactor = 84;
+
+// JPEG DCT coefficients are at most 1024. CfL constants are at most 127, and
+// the ratio of two entries in a JPEG quantization table is at most 255. Thus,
+// since the CfL denominator is 84, this leaves 12 bits of mantissa to be used.
+// For extra caution, we use 11.
+static constexpr uint8_t kCFLFixedPointPrecision = 11;
+
+static constexpr U32Enc kColorFactorDist(Val(kDefaultColorFactor), Val(256),
+                                         BitsOffset(8, 2), BitsOffset(16, 258));
+
+struct ColorCorrelationMap {
+  ColorCorrelationMap() = default;
+  // xsize/ysize are in pixels
+  // set XYB=false to do something close to no-op cmap (needed for now since
+  // cmap is mandatory)
+  ColorCorrelationMap(size_t xsize, size_t ysize, bool XYB = true);
+
+  float YtoXRatio(int32_t x_factor) const {
+    return base_correlation_x_ + x_factor * color_scale_;
+  }
+
+  float YtoBRatio(int32_t b_factor) const {
+    return base_correlation_b_ + b_factor * color_scale_;
+  }
+
+  Status DecodeDC(BitReader* br) {
+    if (br->ReadFixedBits<1>() == 1) {
+      // All default.
+      return true;
+    }
+    SetColorFactor(U32Coder::Read(kColorFactorDist, br));
+    JXL_RETURN_IF_ERROR(F16Coder::Read(br, &base_correlation_x_));
+    if (std::abs(base_correlation_x_) > 4.0f) {
+      return JXL_FAILURE("Base X correlation is out of range");
+    }
+    JXL_RETURN_IF_ERROR(F16Coder::Read(br, &base_correlation_b_));
+    if (std::abs(base_correlation_b_) > 4.0f) {
+      return JXL_FAILURE("Base B correlation is out of range");
+    }
+    ytox_dc_ = static_cast<int>(br->ReadFixedBits<kBitsPerByte>()) +
+               std::numeric_limits<int8_t>::min();
+    ytob_dc_ = static_cast<int>(br->ReadFixedBits<kBitsPerByte>()) +
+               std::numeric_limits<int8_t>::min();
+    RecomputeDCFactors();
+    return true;
+  }
+
+  // We consider a CfL map to be JPEG-reconstruction-compatible if base
+  // correlation is 0, no DC correlation is used, and we use the default color
+  // factor.
+  bool IsJPEGCompatible() const {
+    return base_correlation_x_ == 0 && base_correlation_b_ == 0 &&
+           ytob_dc_ == 0 && ytox_dc_ == 0 &&
+           color_factor_ == kDefaultColorFactor;
+  }
+
+  int32_t RatioJPEG(int32_t factor) const {
+    return factor * (1 << kCFLFixedPointPrecision) / kDefaultColorFactor;
+  }
+
+  void SetColorFactor(uint32_t factor) {
+    color_factor_ = factor;
+    color_scale_ = 1.0f / color_factor_;
+    RecomputeDCFactors();
+  }
+
+  void SetYToBDC(int32_t ytob_dc) {
+    ytob_dc_ = ytob_dc;
+    RecomputeDCFactors();
+  }
+  void SetYToXDC(int32_t ytox_dc) {
+    ytox_dc_ = ytox_dc;
+    RecomputeDCFactors();
+  }
+
+  int32_t GetYToXDC() const { return ytox_dc_; }
+  int32_t GetYToBDC() const { return ytob_dc_; }
+  float GetColorFactor() const { return color_factor_; }
+  float GetBaseCorrelationX() const { return base_correlation_x_; }
+  float GetBaseCorrelationB() const { return base_correlation_b_; }
+
+  const float* DCFactors() const { return dc_factors_; }
+
+  void RecomputeDCFactors() {
+    dc_factors_[0] = YtoXRatio(ytox_dc_);
+    dc_factors_[2] = YtoBRatio(ytob_dc_);
+  }
+
+  ImageSB ytox_map;
+  ImageSB ytob_map;
+
+ private:
+  float dc_factors_[4] = {};
+  // range of factor: -1.51 to +1.52
+  uint32_t color_factor_ = kDefaultColorFactor;
+  float color_scale_ = 1.0f / color_factor_;
+  float base_correlation_x_ = 0.0f;
+  float base_correlation_b_ = kYToBRatio;
+  int32_t ytox_dc_ = 0;
+  int32_t ytob_dc_ = 0;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_CHROMA_FROM_LUMA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/codec_in_out.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/codec_in_out.h
new file mode 100644
index 0000000000..2c2b767a66
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/codec_in_out.h
@@ -0,0 +1,253 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_CODEC_IN_OUT_H_
+#define LIB_JXL_CODEC_IN_OUT_H_
+
+// Holds inputs/outputs for decoding/encoding images.
+
+#include <stddef.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+
+// Per-channel interval, used to convert between (full-range) external and
+// (bounded or unbounded) temp values. See external_image.cc for the definitions
+// of temp/external.
+struct CodecInterval {
+  CodecInterval() = default;
+  constexpr CodecInterval(float min, float max) : min(min), width(max - min) {}
+  // Defaults for temp.
+  float min = 0.0f;
+  float width = 1.0f;
+};
+
+struct SizeConstraints {
+  // Upper limit on pixel dimensions/area, enforced by VerifyDimensions
+  // (called from decoders). Fuzzers set smaller values to limit memory use.
+  uint32_t dec_max_xsize = 0xFFFFFFFFu;
+  uint32_t dec_max_ysize = 0xFFFFFFFFu;
+  uint64_t dec_max_pixels = 0xFFFFFFFFu;  // Might be up to ~0ull
+};
+
+template <typename T,
+          class = typename std::enable_if<std::is_unsigned<T>::value>::type>
+Status VerifyDimensions(const SizeConstraints* constraints, T xs, T ys) {
+  if (!constraints) return true;
+
+  if (xs == 0 || ys == 0) return JXL_FAILURE("Empty image.");
+  if (xs > constraints->dec_max_xsize) return JXL_FAILURE("Image too wide.");
+  if (ys > constraints->dec_max_ysize) return JXL_FAILURE("Image too tall.");
+
+  const uint64_t num_pixels = static_cast<uint64_t>(xs) * ys;
+  if (num_pixels > constraints->dec_max_pixels) {
+    return JXL_FAILURE("Image too big.");
+  }
+
+  return true;
+}
+
+using CodecIntervals = std::array<CodecInterval, 4>;  // RGB[A] or Y[A]
+
+// Allows passing arbitrary metadata to decoders (required for PNM).
+class DecoderHints {
+ public:
+  // key=color_space, value=Description(c/pp): specify the ColorEncoding of
+  //   the pixels for decoding. Otherwise, if the codec did not obtain an ICC
+  //   profile from the image, assume sRGB.
+  //
+  // Strings are taken from the command line, so avoid spaces for convenience.
+  void Add(const std::string& key, const std::string& value) {
+    kv_.emplace_back(key, value);
+  }
+
+  // Calls `func(key, value)` for each key/value in the order they were added,
+  // returning false immediately if `func` returns false.
+  template <class Func>
+  Status Foreach(const Func& func) const {
+    for (const KeyValue& kv : kv_) {
+      Status ok = func(kv.key, kv.value);
+      if (!ok) {
+        return JXL_FAILURE("DecoderHints::Foreach returned false");
+      }
+    }
+    return true;
+  }
+
+ private:
+  // Splitting into key/value avoids parsing in each codec.
+  struct KeyValue {
+    KeyValue(std::string key, std::string value)
+        : key(std::move(key)), value(std::move(value)) {}
+
+    std::string key;
+    std::string value;
+  };
+
+  std::vector<KeyValue> kv_;
+};
+
+// Optional text/EXIF metadata.
+struct Blobs {
+  PaddedBytes exif;
+  PaddedBytes iptc;
+  PaddedBytes jumbf;
+  PaddedBytes xmp;
+};
+
+// For Codec::kJPG, convert between JPEG and pixels or between JPEG and
+// quantized DCT coefficients
+// For pixel data, the nominal range is 0..1.
+enum class DecodeTarget { kPixels, kQuantizedCoeffs };
+
+// Holds a preview, a main image or one or more frames, plus the inputs/outputs
+// to/from decoding/encoding.
+class CodecInOut {
+ public:
+  CodecInOut() : preview_frame(&metadata.m) {
+    frames.reserve(1);
+    frames.emplace_back(&metadata.m);
+  }
+
+  // Move-only.
+  CodecInOut(CodecInOut&&) = default;
+  CodecInOut& operator=(CodecInOut&&) = default;
+
+  size_t LastStillFrame() const {
+    JXL_DASSERT(frames.size() > 0);
+    size_t last = 0;
+    for (size_t i = 0; i < frames.size(); i++) {
+      last = i;
+      if (frames[i].duration > 0) break;
+    }
+    return last;
+  }
+
+  ImageBundle& Main() { return frames[LastStillFrame()]; }
+  const ImageBundle& Main() const { return frames[LastStillFrame()]; }
+
+  // If c_current.IsGray(), all planes must be identical.
+  void SetFromImage(Image3F&& color, const ColorEncoding& c_current) {
+    Main().SetFromImage(std::move(color), c_current);
+    SetIntensityTarget(this);
+    SetSize(Main().xsize(), Main().ysize());
+  }
+
+  void SetSize(size_t xsize, size_t ysize) {
+    JXL_CHECK(metadata.size.Set(xsize, ysize));
+  }
+
+  void CheckMetadata() const {
+    JXL_CHECK(metadata.m.bit_depth.bits_per_sample != 0);
+    JXL_CHECK(!metadata.m.color_encoding.ICC().empty());
+
+    if (preview_frame.xsize() != 0) preview_frame.VerifyMetadata();
+    JXL_CHECK(preview_frame.metadata() == &metadata.m);
+
+    for (const ImageBundle& ib : frames) {
+      ib.VerifyMetadata();
+      JXL_CHECK(ib.metadata() == &metadata.m);
+    }
+  }
+
+  size_t xsize() const { return metadata.size.xsize(); }
+  size_t ysize() const { return metadata.size.ysize(); }
+  void ShrinkTo(size_t xsize, size_t ysize) {
+    // preview is unaffected.
+    for (ImageBundle& ib : frames) {
+      ib.ShrinkTo(xsize, ysize);
+    }
+    SetSize(xsize, ysize);
+  }
+
+  // Calls TransformTo for each ImageBundle (preview/frames).
+  Status TransformTo(const ColorEncoding& c_desired,
+                     ThreadPool* pool = nullptr) {
+    if (metadata.m.have_preview) {
+      JXL_RETURN_IF_ERROR(preview_frame.TransformTo(c_desired, pool));
+    }
+    for (ImageBundle& ib : frames) {
+      JXL_RETURN_IF_ERROR(ib.TransformTo(c_desired, pool));
+    }
+    return true;
+  }
+  // Calls PremultiplyAlpha for each ImageBundle (preview/frames).
+  void PremultiplyAlpha() {
+    ExtraChannelInfo* eci = metadata.m.Find(ExtraChannel::kAlpha);
+    if (eci == nullptr || eci->alpha_associated) return;  // nothing to do
+    if (metadata.m.have_preview) {
+      preview_frame.PremultiplyAlpha();
+    }
+    for (ImageBundle& ib : frames) {
+      ib.PremultiplyAlpha();
+    }
+    eci->alpha_associated = true;
+    return;
+  }
+
+  // -- DECODER INPUT:
+
+  SizeConstraints constraints;
+  // Used to set c_current for codecs that lack color space metadata.
+  DecoderHints dec_hints;
+  // Decode to pixels or keep JPEG as quantized DCT coefficients
+  DecodeTarget dec_target = DecodeTarget::kPixels;
+
+  // Intended white luminance, in nits (cd/m^2).
+  // It is used by codecs that do not know the absolute luminance of their
+  // images. For those codecs, decoders map from white to this luminance. There
+  // is no other way of knowing the target brightness for those codecs - depends
+  // on source material. 709 typically targets 100 nits, BT.2100 PQ up to 10K,
+  // but HDR content is more typically mastered to 4K nits. Codecs that do know
+  // the absolute luminance of their images will typically ignore it as a
+  // decoder input. The corresponding decoder output and encoder input is the
+  // intensity target in the metadata. ALL decoders MUST set that metadata
+  // appropriately, but it does not have to be identical to this hint. Encoders
+  // for codecs that do not encode absolute luminance levels should use that
+  // metadata to decide on what to map to white. Encoders for codecs that *do*
+  // encode absolute luminance levels may use it to decide on encoding values,
+  // but not in a way that would affect the range of interpreted luminance.
+  //
+  // 0 means that it is up to the codec to decide on a reasonable value to use.
+
+  float target_nits = 0;
+
+  // -- DECODER OUTPUT:
+
+  // Total number of pixels decoded (may differ from #frames * xsize * ysize
+  // if frames are cropped)
+  uint64_t dec_pixels = 0;
+
+  // -- DECODER OUTPUT, ENCODER INPUT:
+
+  // Metadata stored into / retrieved from bitstreams.
+
+  Blobs blobs;
+
+  CodecMetadata metadata;  // applies to preview and all frames
+
+  // If metadata.have_preview:
+  ImageBundle preview_frame;
+
+  std::vector<ImageBundle> frames;  // size=1 if !metadata.have_animation
+
+  bool use_sjpeg = false;
+  // If the image should be written to a JPEG, use this quality for encoding.
+  size_t jpeg_quality;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_CODEC_IN_OUT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc
new file mode 100644
index 0000000000..e87728339d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.cc
@@ -0,0 +1,154 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/coeff_order.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/lehmer_code.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+void SetDefaultOrder(AcStrategy acs, coeff_order_t* JXL_RESTRICT order) {
+  PROFILER_FUNC;
+  const size_t size =
+      kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
+  const coeff_order_t* natural_coeff_order = acs.NaturalCoeffOrder();
+  for (size_t k = 0; k < size; ++k) {
+    order[k] = natural_coeff_order[k];
+  }
+}
+
+uint32_t CoeffOrderContext(uint32_t val) {
+  uint32_t token, nbits, bits;
+  HybridUintConfig(0, 0, 0).Encode(val, &token, &nbits, &bits);
+  return std::min(token, kPermutationContexts - 1);
+}
+
+namespace {
+Status ReadPermutation(size_t skip, size_t size, coeff_order_t* order,
+                       BitReader* br, ANSSymbolReader* reader,
+                       const std::vector<uint8_t>& context_map) {
+  std::vector<LehmerT> lehmer(size);
+  // temp space needs to be as large as the next power of 2, so doubling the
+  // allocated size is enough.
+  std::vector<uint32_t> temp(size * 2);
+  uint32_t end =
+      reader->ReadHybridUint(CoeffOrderContext(size), br, context_map) + skip;
+  if (end > size) {
+    return JXL_FAILURE("Invalid permutation size");
+  }
+  uint32_t last = 0;
+  for (size_t i = skip; i < end; ++i) {
+    lehmer[i] =
+        reader->ReadHybridUint(CoeffOrderContext(last), br, context_map);
+    last = lehmer[i];
+    if (lehmer[i] + i >= size) {
+      return JXL_FAILURE("Invalid lehmer code");
+    }
+  }
+  if (order == nullptr) return true;
+  DecodeLehmerCode(lehmer.data(), temp.data(), size, order);
+  return true;
+}
+
+}  // namespace
+
+Status DecodePermutation(size_t skip, size_t size, coeff_order_t* order,
+                         BitReader* br) {
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  JXL_RETURN_IF_ERROR(
+      DecodeHistograms(br, kPermutationContexts, &code, &context_map));
+  ANSSymbolReader reader(&code, br);
+  JXL_RETURN_IF_ERROR(
+      ReadPermutation(skip, size, order, br, &reader, context_map));
+  if (!reader.CheckANSFinalState()) {
+    return JXL_FAILURE("Invalid ANS stream");
+  }
+  return true;
+}
+
+namespace {
+
+Status DecodeCoeffOrder(AcStrategy acs, coeff_order_t* order, BitReader* br,
+                        ANSSymbolReader* reader,
+                        const std::vector<uint8_t>& context_map) {
+  PROFILER_FUNC;
+  const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+  const size_t size = kDCTBlockSize * llf;
+
+  JXL_RETURN_IF_ERROR(
+      ReadPermutation(llf, size, order, br, reader, context_map));
+  if (order == nullptr) return true;
+  const coeff_order_t* natural_coeff_order = acs.NaturalCoeffOrder();
+  for (size_t k = 0; k < size; ++k) {
+    order[k] = natural_coeff_order[order[k]];
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DecodeCoeffOrders(uint16_t used_orders, uint32_t used_acs,
+                         coeff_order_t* order, BitReader* br) {
+  uint16_t computed = 0;
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  std::unique_ptr<ANSSymbolReader> reader;
+  // Bitstream does not have histograms if no coefficient order is used.
+  if (used_orders != 0) {
+    JXL_RETURN_IF_ERROR(
+        DecodeHistograms(br, kPermutationContexts, &code, &context_map));
+    reader = make_unique<ANSSymbolReader>(&code, br);
+  }
+  uint32_t acs_mask = 0;
+  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+    if ((used_acs & (1 << o)) == 0) continue;
+    acs_mask |= 1 << kStrategyOrder[o];
+  }
+  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+    uint8_t ord = kStrategyOrder[o];
+    if (computed & (1 << ord)) continue;
+    computed |= 1 << ord;
+    AcStrategy acs = AcStrategy::FromRawStrategy(o);
+    bool used = (acs_mask & (1 << ord)) != 0;
+    if ((used_orders & (1 << ord)) == 0) {
+      // No need to set the default order if no ACS uses this order.
+      if (used) {
+        for (size_t c = 0; c < 3; c++) {
+          SetDefaultOrder(acs, &order[CoeffOrderOffset(ord, c)]);
+        }
+      }
+    } else {
+      for (size_t c = 0; c < 3; c++) {
+        coeff_order_t* dest = used ? &order[CoeffOrderOffset(ord, c)] : nullptr;
+        JXL_RETURN_IF_ERROR(
+            DecodeCoeffOrder(acs, dest, br, reader.get(), context_map));
+      }
+    }
+  }
+  if (used_orders && !reader->CheckANSFinalState()) {
+    return JXL_FAILURE("Invalid ANS stream");
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.h
new file mode 100644
index 0000000000..c600b7b3bf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COEFF_ORDER_H_
+#define LIB_JXL_COEFF_ORDER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_bit_reader.h"
+
+namespace jxl {
+
+// Those offsets get multiplied by kDCTBlockSize.
+static constexpr size_t kCoeffOrderOffset[] = {
+    0,    1,    2,    3,    4,    5,    6,    10,   14,   18,
+    34,   50,   66,   68,   70,   72,   76,   80,   84,   92,
+    100,  108,  172,  236,  300,  332,  364,  396,  652,  908,
+    1164, 1292, 1420, 1548, 2572, 3596, 4620, 5132, 5644, 6156,
+};
+static_assert(3 * kNumOrders + 1 ==
+                  sizeof(kCoeffOrderOffset) / sizeof(*kCoeffOrderOffset),
+              "Update this array when adding or removing order types.");
+
+static constexpr size_t CoeffOrderOffset(size_t order, size_t c) {
+  return kCoeffOrderOffset[3 * order + c] * kDCTBlockSize;
+}
+
+static constexpr size_t kCoeffOrderMaxSize =
+    kCoeffOrderOffset[3 * kNumOrders] * kDCTBlockSize;
+
+// Mapping from AC strategy to order bucket. Strategies with different natural
+// orders must have different buckets.
+constexpr uint8_t kStrategyOrder[] = {
+    0, 1, 1, 1, 2, 3, 4, 4, 5,  5,  6,  6,  1,  1,
+    1, 1, 1, 1, 7, 8, 8, 9, 10, 10, 11, 12, 12,
+};
+
+static_assert(AcStrategy::kNumValidStrategies ==
+                  sizeof(kStrategyOrder) / sizeof(*kStrategyOrder),
+              "Update this array when adding or removing AC strategies.");
+
+constexpr uint32_t kPermutationContexts = 8;
+
+uint32_t CoeffOrderContext(uint32_t val);
+
+void SetDefaultOrder(AcStrategy acs, coeff_order_t* JXL_RESTRICT order);
+
+Status DecodeCoeffOrders(uint16_t used_orders, uint32_t used_acs,
+                         coeff_order_t* order, BitReader* br);
+
+Status DecodePermutation(size_t skip, size_t size, coeff_order_t* order,
+                         BitReader* br);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COEFF_ORDER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order_fwd.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order_fwd.h
new file mode 100644
index 0000000000..700e9a83d4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order_fwd.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COEFF_ORDER_FWD_H_
+#define LIB_JXL_COEFF_ORDER_FWD_H_
+
+// Breaks circular dependency between ac_strategy and coeff_order.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/compiler_specific.h"
+
+namespace jxl {
+
+// Needs at least 16 bits. A 32-bit type speeds up DecodeAC by 2% at the cost of
+// more memory.
+using coeff_order_t = uint32_t;
+
+// Maximum number of orders to be used. Note that this needs to be multiplied by
+// the number of channels. One per "size class" (plus one extra for DCT8),
+// shared between transforms of size XxY and of size YxX.
+constexpr uint8_t kNumOrders = 13;
+
+// DCT coefficients are laid out in such a way that the number of rows of
+// coefficients is always the smaller coordinate.
+JXL_INLINE constexpr size_t CoefficientRows(size_t rows, size_t columns) {
+  return rows < columns ? rows : columns;
+}
+
+JXL_INLINE constexpr size_t CoefficientColumns(size_t rows, size_t columns) {
+  return rows < columns ? columns : rows;
+}
+
+JXL_INLINE void CoefficientLayout(size_t* JXL_RESTRICT rows,
+                                  size_t* JXL_RESTRICT columns) {
+  size_t r = *rows;
+  size_t c = *columns;
+  *rows = CoefficientRows(r, c);
+  *columns = CoefficientColumns(r, c);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COEFF_ORDER_FWD_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order_test.cc
new file mode 100644
index 0000000000..2408905001
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/coeff_order_test.cc
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/coeff_order.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <numeric>  // iota
+#include <random>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_coeff_order.h"
+
+namespace jxl {
+namespace {
+
+void RoundtripPermutation(coeff_order_t* perm, coeff_order_t* out, size_t len,
+                          size_t* size) {
+  BitWriter writer;
+  EncodePermutation(perm, 0, len, &writer, 0, nullptr);
+  writer.ZeroPadToByte();
+  Status status = true;
+  {
+    BitReader reader(writer.GetSpan());
+    BitReaderScopedCloser closer(&reader, &status);
+    ASSERT_TRUE(DecodePermutation(0, len, out, &reader));
+  }
+  ASSERT_TRUE(status);
+  *size = writer.GetSpan().size();
+}
+
+enum Permutation { kIdentity, kFewSwaps, kFewSlides, kRandom };
+
+constexpr size_t kNumReps = 128;
+constexpr size_t kSwaps = 32;
+
+void TestPermutation(Permutation kind, size_t len) {
+  std::vector<coeff_order_t> perm(len);
+  std::iota(perm.begin(), perm.end(), 0);
+  std::mt19937 rng;
+  if (kind == kFewSwaps) {
+    std::uniform_int_distribution<size_t> dist(0, len - 1);
+    for (size_t i = 0; i < kSwaps; i++) {
+      size_t a = dist(rng);
+      size_t b = dist(rng);
+      std::swap(perm[a], perm[b]);
+    }
+  }
+  if (kind == kFewSlides) {
+    std::uniform_int_distribution<size_t> dist(0, len - 1);
+    for (size_t i = 0; i < kSwaps; i++) {
+      size_t a = dist(rng);
+      size_t b = dist(rng);
+      size_t from = std::min(a, b);
+      size_t to = std::max(a, b);
+      size_t start = perm[from];
+      for (size_t j = from; j < to; j++) {
+        perm[j] = perm[j + 1];
+      }
+      perm[to] = start;
+    }
+  }
+  if (kind == kRandom) {
+    std::shuffle(perm.begin(), perm.end(), rng);
+  }
+  std::vector<coeff_order_t> out(len);
+  size_t size = 0;
+  for (size_t i = 0; i < kNumReps; i++) {
+    RoundtripPermutation(perm.data(), out.data(), len, &size);
+    for (size_t idx = 0; idx < len; idx++) {
+      EXPECT_EQ(perm[idx], out[idx]);
+    }
+  }
+  printf("Encoded size: %zu\n", size);
+}
+
+TEST(CoeffOrderTest, IdentitySmall) { TestPermutation(kIdentity, 256); }
+TEST(CoeffOrderTest, FewSlidesSmall) { TestPermutation(kFewSlides, 256); }
+TEST(CoeffOrderTest, FewSwapsSmall) { TestPermutation(kFewSwaps, 256); }
+TEST(CoeffOrderTest, RandomSmall) { TestPermutation(kRandom, 256); }
+
+TEST(CoeffOrderTest, IdentityMedium) { TestPermutation(kIdentity, 1 << 12); }
+TEST(CoeffOrderTest, FewSlidesMedium) { TestPermutation(kFewSlides, 1 << 12); }
+TEST(CoeffOrderTest, FewSwapsMedium) { TestPermutation(kFewSwaps, 1 << 12); }
+TEST(CoeffOrderTest, RandomMedium) { TestPermutation(kRandom, 1 << 12); }
+
+TEST(CoeffOrderTest, IdentityBig) { TestPermutation(kIdentity, 1 << 16); }
+TEST(CoeffOrderTest, FewSlidesBig) { TestPermutation(kFewSlides, 1 << 16); }
+TEST(CoeffOrderTest, FewSwapsBig) { TestPermutation(kFewSwaps, 1 << 16); }
+TEST(CoeffOrderTest, RandomBig) { TestPermutation(kRandom, 1 << 16); }
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc
new file mode 100644
index 0000000000..0a3899839b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.cc
@@ -0,0 +1,782 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_encoding_internal.h"
+
+#include <errno.h>
+
+#include <array>
+#include <cmath>
+
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/linalg.h"
+
+namespace jxl {
+namespace {
+
+// Highest reasonable value for the gamma of a transfer curve.
+constexpr uint32_t kMaxGamma = 8192;
+
+// These strings are baked into Description - do not change.
+
+std::string ToString(ColorSpace color_space) {
+  switch (color_space) {
+    case ColorSpace::kRGB:
+      return "RGB";
+    case ColorSpace::kGray:
+      return "Gra";
+    case ColorSpace::kXYB:
+      return "XYB";
+    case ColorSpace::kUnknown:
+      return "CS?";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_ABORT("Invalid ColorSpace %u", static_cast<uint32_t>(color_space));
+}
+
+std::string ToString(WhitePoint white_point) {
+  switch (white_point) {
+    case WhitePoint::kD65:
+      return "D65";
+    case WhitePoint::kCustom:
+      return "Cst";
+    case WhitePoint::kE:
+      return "EER";
+    case WhitePoint::kDCI:
+      return "DCI";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_ABORT("Invalid WhitePoint %u", static_cast<uint32_t>(white_point));
+}
+
+std::string ToString(Primaries primaries) {
+  switch (primaries) {
+    case Primaries::kSRGB:
+      return "SRG";
+    case Primaries::k2100:
+      return "202";
+    case Primaries::kP3:
+      return "DCI";
+    case Primaries::kCustom:
+      return "Cst";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_ABORT("Invalid Primaries %u", static_cast<uint32_t>(primaries));
+}
+
+std::string ToString(TransferFunction transfer_function) {
+  switch (transfer_function) {
+    case TransferFunction::kSRGB:
+      return "SRG";
+    case TransferFunction::kLinear:
+      return "Lin";
+    case TransferFunction::k709:
+      return "709";
+    case TransferFunction::kPQ:
+      return "PeQ";
+    case TransferFunction::kHLG:
+      return "HLG";
+    case TransferFunction::kDCI:
+      return "DCI";
+    case TransferFunction::kUnknown:
+      return "TF?";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_ABORT("Invalid TransferFunction %u",
+            static_cast<uint32_t>(transfer_function));
+}
+
+std::string ToString(RenderingIntent rendering_intent) {
+  switch (rendering_intent) {
+    case RenderingIntent::kPerceptual:
+      return "Per";
+    case RenderingIntent::kRelative:
+      return "Rel";
+    case RenderingIntent::kSaturation:
+      return "Sat";
+    case RenderingIntent::kAbsolute:
+      return "Abs";
+  }
+  // Should not happen - visitor fails if enum is invalid.
+  JXL_ABORT("Invalid RenderingIntent %u",
+            static_cast<uint32_t>(rendering_intent));
+}
+
+template <typename Enum>
+Status ParseEnum(const std::string& token, Enum* value) {
+  std::string str;
+  for (Enum e : Values<Enum>()) {
+    if (ToString(e) == token) {
+      *value = e;
+      return true;
+    }
+  }
+  return false;
+}
+
+class Tokenizer {
+ public:
+  Tokenizer(const std::string* input, char separator)
+      : input_(input), separator_(separator) {}
+
+  Status Next(std::string* JXL_RESTRICT next) {
+    const size_t end = input_->find(separator_, start_);
+    if (end == std::string::npos) {
+      *next = input_->substr(start_);  // rest of string
+    } else {
+      *next = input_->substr(start_, end - start_);
+    }
+    if (next->empty()) return JXL_FAILURE("Missing token");
+    start_ = end + 1;
+    return true;
+  }
+
+ private:
+  const std::string* const input_;  // not owned
+  const char separator_;
+  size_t start_ = 0;  // of next token
+};
+
+Status ParseDouble(const std::string& num, double* JXL_RESTRICT d) {
+  char* end;
+  errno = 0;
+  *d = strtod(num.c_str(), &end);
+  if (*d == 0.0 && end == num.c_str()) {
+    return JXL_FAILURE("Invalid double: %s", num.c_str());
+  }
+  if (std::isnan(*d)) {
+    return JXL_FAILURE("Invalid double: %s", num.c_str());
+  }
+  if (errno == ERANGE) {
+    return JXL_FAILURE("Double out of range: %s", num.c_str());
+  }
+  return true;
+}
+
+Status ParseDouble(Tokenizer* tokenizer, double* JXL_RESTRICT d) {
+  std::string num;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&num));
+  return ParseDouble(num, d);
+}
+
+Status ParseColorSpace(Tokenizer* JXL_RESTRICT tokenizer,
+                       ColorEncoding* JXL_RESTRICT c) {
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  ColorSpace cs;
+  if (ParseEnum(str, &cs)) {
+    c->SetColorSpace(cs);
+    return true;
+  }
+
+  return JXL_FAILURE("Unknown ColorSpace %s", str.c_str());
+}
+
+Status ParseWhitePoint(Tokenizer* JXL_RESTRICT tokenizer,
+                       ColorEncoding* JXL_RESTRICT c) {
+  if (c->ImplicitWhitePoint()) return true;
+
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  if (ParseEnum(str, &c->white_point)) return true;
+
+  CIExy xy;
+  Tokenizer xy_tokenizer(&str, ';');
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, &xy.x));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, &xy.y));
+  if (c->SetWhitePoint(xy)) return true;
+
+  return JXL_FAILURE("Invalid white point %s", str.c_str());
+}
+
+Status ParsePrimaries(Tokenizer* JXL_RESTRICT tokenizer,
+                      ColorEncoding* JXL_RESTRICT c) {
+  if (!c->HasPrimaries()) return true;
+
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  if (ParseEnum(str, &c->primaries)) return true;
+
+  PrimariesCIExy xy;
+  Tokenizer xy_tokenizer(&str, ';');
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, &xy.r.x));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, &xy.r.y));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, &xy.g.x));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, &xy.g.y));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, &xy.b.x));
+  JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, &xy.b.y));
+  if (c->SetPrimaries(xy)) return true;
+
+  return JXL_FAILURE("Invalid primaries %s", str.c_str());
+}
+
+Status ParseRenderingIntent(Tokenizer* JXL_RESTRICT tokenizer,
+                            ColorEncoding* JXL_RESTRICT c) {
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  if (ParseEnum(str, &c->rendering_intent)) return true;
+
+  return JXL_FAILURE("Invalid RenderingIntent %s\n", str.c_str());
+}
+
+Status ParseTransferFunction(Tokenizer* JXL_RESTRICT tokenizer,
+                             ColorEncoding* JXL_RESTRICT c) {
+  if (c->tf.SetImplicit()) return true;
+
+  std::string str;
+  JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+  TransferFunction transfer_function;
+  if (ParseEnum(str, &transfer_function)) {
+    c->tf.SetTransferFunction(transfer_function);
+    return true;
+  }
+
+  if (str[0] == 'g') {
+    double gamma;
+    JXL_RETURN_IF_ERROR(ParseDouble(str.substr(1), &gamma));
+    if (c->tf.SetGamma(gamma)) return true;
+  }
+
+  return JXL_FAILURE("Invalid gamma %s", str.c_str());
+}
+
+static double F64FromCustomxyI32(const int32_t i) { return i * 1E-6; }
+static Status F64ToCustomxyI32(const double f, int32_t* JXL_RESTRICT i) {
+  if (!(-4 <= f && f <= 4)) {
+    return JXL_FAILURE("F64 out of bounds for CustomxyI32");
+  }
+  *i = static_cast<int32_t>(roundf(f * 1E6));
+  return true;
+}
+
+}  // namespace
+
+CIExy Customxy::Get() const {
+  CIExy xy;
+  xy.x = F64FromCustomxyI32(x);
+  xy.y = F64FromCustomxyI32(y);
+  return xy;
+}
+
+Status Customxy::Set(const CIExy& xy) {
+  JXL_RETURN_IF_ERROR(F64ToCustomxyI32(xy.x, &x));
+  JXL_RETURN_IF_ERROR(F64ToCustomxyI32(xy.y, &y));
+  size_t extension_bits, total_bits;
+  if (!Bundle::CanEncode(*this, &extension_bits, &total_bits)) {
+    return JXL_FAILURE("Unable to encode XY %f %f", xy.x, xy.y);
+  }
+  return true;
+}
+
+bool CustomTransferFunction::SetImplicit() {
+  if (nonserialized_color_space == ColorSpace::kXYB) {
+    if (!SetGamma(1.0 / 3)) JXL_ASSERT(false);
+    return true;
+  }
+  return false;
+}
+
+Status CustomTransferFunction::SetGamma(double gamma) {
+  if (gamma < (1.0f / kMaxGamma) || gamma > 1.0) {
+    return JXL_FAILURE("Invalid gamma %f", gamma);
+  }
+
+  have_gamma_ = false;
+  if (ApproxEq(gamma, 1.0)) {
+    transfer_function_ = TransferFunction::kLinear;
+    return true;
+  }
+  if (ApproxEq(gamma, 1.0 / 2.6)) {
+    transfer_function_ = TransferFunction::kDCI;
+    return true;
+  }
+  // Don't translate 0.45.. to kSRGB nor k709 - that might change pixel
+  // values because those curves also have a linear part.
+
+  have_gamma_ = true;
+  gamma_ = roundf(gamma * kGammaMul);
+  transfer_function_ = TransferFunction::kUnknown;
+  return true;
+}
+
+namespace {
+
+std::array<ColorEncoding, 2> CreateC2(const Primaries pr,
+                                      const TransferFunction tf) {
+  std::array<ColorEncoding, 2> c2;
+
+  {
+    ColorEncoding* c_rgb = c2.data() + 0;
+    c_rgb->SetColorSpace(ColorSpace::kRGB);
+    c_rgb->white_point = WhitePoint::kD65;
+    c_rgb->primaries = pr;
+    c_rgb->tf.SetTransferFunction(tf);
+    JXL_CHECK(c_rgb->CreateICC());
+  }
+
+  {
+    ColorEncoding* c_gray = c2.data() + 1;
+    c_gray->SetColorSpace(ColorSpace::kGray);
+    c_gray->white_point = WhitePoint::kD65;
+    c_gray->primaries = pr;
+    c_gray->tf.SetTransferFunction(tf);
+    JXL_CHECK(c_gray->CreateICC());
+  }
+
+  return c2;
+}
+
+}  // namespace
+
+const ColorEncoding& ColorEncoding::SRGB(bool is_gray) {
+  static std::array<ColorEncoding, 2> c2 =
+      CreateC2(Primaries::kSRGB, TransferFunction::kSRGB);
+  return c2[is_gray];
+}
+const ColorEncoding& ColorEncoding::LinearSRGB(bool is_gray) {
+  static std::array<ColorEncoding, 2> c2 =
+      CreateC2(Primaries::kSRGB, TransferFunction::kLinear);
+  return c2[is_gray];
+}
+
+CIExy ColorEncoding::GetWhitePoint() const {
+  JXL_DASSERT(have_fields_);
+  CIExy xy;
+  switch (white_point) {
+    case WhitePoint::kCustom:
+      return white_.Get();
+
+    case WhitePoint::kD65:
+      xy.x = 0.3127;
+      xy.y = 0.3290;
+      return xy;
+
+    case WhitePoint::kDCI:
+      // From https://ieeexplore.ieee.org/document/7290729 C.2 page 11
+      xy.x = 0.314;
+      xy.y = 0.351;
+      return xy;
+
+    case WhitePoint::kE:
+      xy.x = xy.y = 1.0 / 3;
+      return xy;
+  }
+  JXL_ABORT("Invalid WhitePoint %u", static_cast<uint32_t>(white_point));
+}
+
+Status ColorEncoding::SetWhitePoint(const CIExy& xy) {
+  JXL_DASSERT(have_fields_);
+  if (xy.x == 0.0 || xy.y == 0.0) {
+    return JXL_FAILURE("Invalid white point %f %f", xy.x, xy.y);
+  }
+  if (ApproxEq(xy.x, 0.3127) && ApproxEq(xy.y, 0.3290)) {
+    white_point = WhitePoint::kD65;
+    return true;
+  }
+  if (ApproxEq(xy.x, 1.0 / 3) && ApproxEq(xy.y, 1.0 / 3)) {
+    white_point = WhitePoint::kE;
+    return true;
+  }
+  if (ApproxEq(xy.x, 0.314) && ApproxEq(xy.y, 0.351)) {
+    white_point = WhitePoint::kDCI;
+    return true;
+  }
+  white_point = WhitePoint::kCustom;
+  return white_.Set(xy);
+}
+
+PrimariesCIExy ColorEncoding::GetPrimaries() const {
+  JXL_DASSERT(have_fields_);
+  JXL_ASSERT(HasPrimaries());
+  PrimariesCIExy xy;
+  switch (primaries) {
+    case Primaries::kCustom:
+      xy.r = red_.Get();
+      xy.g = green_.Get();
+      xy.b = blue_.Get();
+      return xy;
+
+    case Primaries::kSRGB:
+      xy.r.x = 0.639998686;
+      xy.r.y = 0.330010138;
+      xy.g.x = 0.300003784;
+      xy.g.y = 0.600003357;
+      xy.b.x = 0.150002046;
+      xy.b.y = 0.059997204;
+      return xy;
+
+    case Primaries::k2100:
+      xy.r.x = 0.708;
+      xy.r.y = 0.292;
+      xy.g.x = 0.170;
+      xy.g.y = 0.797;
+      xy.b.x = 0.131;
+      xy.b.y = 0.046;
+      return xy;
+
+    case Primaries::kP3:
+      xy.r.x = 0.680;
+      xy.r.y = 0.320;
+      xy.g.x = 0.265;
+      xy.g.y = 0.690;
+      xy.b.x = 0.150;
+      xy.b.y = 0.060;
+      return xy;
+  }
+  JXL_ABORT("Invalid Primaries %u", static_cast<uint32_t>(primaries));
+}
+
+Status ColorEncoding::SetPrimaries(const PrimariesCIExy& xy) {
+  JXL_DASSERT(have_fields_);
+  JXL_ASSERT(HasPrimaries());
+  if (xy.r.x == 0.0 || xy.r.y == 0.0 || xy.g.x == 0.0 || xy.g.y == 0.0 ||
+      xy.b.x == 0.0 || xy.b.y == 0.0) {
+    return JXL_FAILURE("Invalid primaries %f %f %f %f %f %f", xy.r.x, xy.r.y,
+                       xy.g.x, xy.g.y, xy.b.x, xy.b.y);
+  }
+
+  if (ApproxEq(xy.r.x, 0.64) && ApproxEq(xy.r.y, 0.33) &&
+      ApproxEq(xy.g.x, 0.30) && ApproxEq(xy.g.y, 0.60) &&
+      ApproxEq(xy.b.x, 0.15) && ApproxEq(xy.b.y, 0.06)) {
+    primaries = Primaries::kSRGB;
+    return true;
+  }
+
+  if (ApproxEq(xy.r.x, 0.708) && ApproxEq(xy.r.y, 0.292) &&
+      ApproxEq(xy.g.x, 0.170) && ApproxEq(xy.g.y, 0.797) &&
+      ApproxEq(xy.b.x, 0.131) && ApproxEq(xy.b.y, 0.046)) {
+    primaries = Primaries::k2100;
+    return true;
+  }
+  if (ApproxEq(xy.r.x, 0.680) && ApproxEq(xy.r.y, 0.320) &&
+      ApproxEq(xy.g.x, 0.265) && ApproxEq(xy.g.y, 0.690) &&
+      ApproxEq(xy.b.x, 0.150) && ApproxEq(xy.b.y, 0.060)) {
+    primaries = Primaries::kP3;
+    return true;
+  }
+
+  primaries = Primaries::kCustom;
+  JXL_RETURN_IF_ERROR(red_.Set(xy.r));
+  JXL_RETURN_IF_ERROR(green_.Set(xy.g));
+  JXL_RETURN_IF_ERROR(blue_.Set(xy.b));
+  return true;
+}
+
+Status ColorEncoding::CreateICC() {
+  InternalRemoveICC();
+  if (!MaybeCreateProfile(*this, &icc_)) {
+    return JXL_FAILURE("Failed to create profile from fields");
+  }
+  return true;
+}
+
+std::string Description(const ColorEncoding& c_in) {
+  // Copy required for Implicit*
+  ColorEncoding c = c_in;
+
+  std::string d = ToString(c.GetColorSpace());
+
+  if (!c.ImplicitWhitePoint()) {
+    d += '_';
+    if (c.white_point == WhitePoint::kCustom) {
+      const CIExy wp = c.GetWhitePoint();
+      d += ToString(wp.x) + ';';
+      d += ToString(wp.y);
+    } else {
+      d += ToString(c.white_point);
+    }
+  }
+
+  if (c.HasPrimaries()) {
+    d += '_';
+    if (c.primaries == Primaries::kCustom) {
+      const PrimariesCIExy pr = c.GetPrimaries();
+      d += ToString(pr.r.x) + ';';
+      d += ToString(pr.r.y) + ';';
+      d += ToString(pr.g.x) + ';';
+      d += ToString(pr.g.y) + ';';
+      d += ToString(pr.b.x) + ';';
+      d += ToString(pr.b.y);
+    } else {
+      d += ToString(c.primaries);
+    }
+  }
+
+  d += '_';
+  d += ToString(c.rendering_intent);
+
+  if (!c.tf.SetImplicit()) {
+    d += '_';
+    if (c.tf.IsGamma()) {
+      d += 'g';
+      d += ToString(c.tf.GetGamma());
+    } else {
+      d += ToString(c.tf.GetTransferFunction());
+    }
+  }
+
+  return d;
+}
+
+Status ParseDescription(const std::string& description,
+                        ColorEncoding* JXL_RESTRICT c) {
+  Tokenizer tokenizer(&description, '_');
+  JXL_RETURN_IF_ERROR(ParseColorSpace(&tokenizer, c));
+  JXL_RETURN_IF_ERROR(ParseWhitePoint(&tokenizer, c));
+  JXL_RETURN_IF_ERROR(ParsePrimaries(&tokenizer, c));
+  JXL_RETURN_IF_ERROR(ParseRenderingIntent(&tokenizer, c));
+  JXL_RETURN_IF_ERROR(ParseTransferFunction(&tokenizer, c));
+  return true;
+}
+
+Customxy::Customxy() { Bundle::Init(this); }
+Status Customxy::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  uint32_t ux = PackSigned(x);
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(19), BitsOffset(19, 524288),
+                                         BitsOffset(20, 1048576),
+                                         BitsOffset(21, 2097152), 0, &ux));
+  x = UnpackSigned(ux);
+  uint32_t uy = PackSigned(y);
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(19), BitsOffset(19, 524288),
+                                         BitsOffset(20, 1048576),
+                                         BitsOffset(21, 2097152), 0, &uy));
+  y = UnpackSigned(uy);
+  return true;
+}
+
+CustomTransferFunction::CustomTransferFunction() { Bundle::Init(this); }
+Status CustomTransferFunction::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->Conditional(!SetImplicit())) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_gamma_));
+
+    if (visitor->Conditional(have_gamma_)) {
+      // Gamma is represented as a 24-bit int, the exponent used is
+      // gamma_ / 1e7. Valid values are (0, 1]. On the low end side, we also
+      // limit it to kMaxGamma/1e7.
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(24, kGammaMul, &gamma_));
+      if (gamma_ > kGammaMul ||
+          static_cast<uint64_t>(gamma_) * kMaxGamma < kGammaMul) {
+        return JXL_FAILURE("Invalid gamma %u", gamma_);
+      }
+    }
+
+    if (visitor->Conditional(!have_gamma_)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->Enum(TransferFunction::kSRGB, &transfer_function_));
+    }
+  }
+
+  return true;
+}
+
+ColorEncoding::ColorEncoding() { Bundle::Init(this); }
+Status ColorEncoding::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &want_icc_));
+
+  // Always send even if want_icc_ because this affects decoding.
+  // We can skip the white point/primaries because they do not.
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(ColorSpace::kRGB, &color_space_));
+
+  if (visitor->Conditional(!WantICC())) {
+    // Serialize enums. NOTE: we set the defaults to the most common values so
+    // ImageMetadata.all_default is true in the common case.
+
+    if (visitor->Conditional(!ImplicitWhitePoint())) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(WhitePoint::kD65, &white_point));
+      if (visitor->Conditional(white_point == WhitePoint::kCustom)) {
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&white_));
+      }
+    }
+
+    if (visitor->Conditional(HasPrimaries())) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(Primaries::kSRGB, &primaries));
+      if (visitor->Conditional(primaries == Primaries::kCustom)) {
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&red_));
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&green_));
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&blue_));
+      }
+    }
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&tf));
+
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->Enum(RenderingIntent::kRelative, &rendering_intent));
+
+    // We didn't have ICC, so all fields should be known.
+    if (color_space_ == ColorSpace::kUnknown || tf.IsUnknown()) {
+      return JXL_FAILURE(
+          "No ICC but cs %u and tf %u%s",
+          static_cast<unsigned int>(color_space_),
+          tf.IsGamma() ? 0
+                       : static_cast<unsigned int>(tf.GetTransferFunction()),
+          tf.IsGamma() ? "(gamma)" : "");
+    }
+
+    JXL_RETURN_IF_ERROR(CreateICC());
+  }
+
+  if (WantICC() && visitor->IsReading()) {
+    // Haven't called SetICC() yet, do nothing.
+  } else {
+    if (ICC().empty()) return JXL_FAILURE("Empty ICC");
+  }
+
+  return true;
+}
+
+void ConvertInternalToExternalColorEncoding(const ColorEncoding& internal,
+                                            JxlColorEncoding* external) {
+  external->color_space = static_cast<JxlColorSpace>(internal.GetColorSpace());
+
+  external->white_point = static_cast<JxlWhitePoint>(internal.white_point);
+
+  jxl::CIExy whitepoint = internal.GetWhitePoint();
+  external->white_point_xy[0] = whitepoint.x;
+  external->white_point_xy[1] = whitepoint.y;
+
+  if (external->color_space == JXL_COLOR_SPACE_RGB ||
+      external->color_space == JXL_COLOR_SPACE_UNKNOWN) {
+    external->primaries = static_cast<JxlPrimaries>(internal.primaries);
+    jxl::PrimariesCIExy primaries = internal.GetPrimaries();
+    external->primaries_red_xy[0] = primaries.r.x;
+    external->primaries_red_xy[1] = primaries.r.y;
+    external->primaries_green_xy[0] = primaries.g.x;
+    external->primaries_green_xy[1] = primaries.g.y;
+    external->primaries_blue_xy[0] = primaries.b.x;
+    external->primaries_blue_xy[1] = primaries.b.y;
+  }
+
+  if (internal.tf.IsGamma()) {
+    external->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+    external->gamma = internal.tf.GetGamma();
+  } else {
+    external->transfer_function =
+        static_cast<JxlTransferFunction>(internal.tf.GetTransferFunction());
+    external->gamma = 0;
+  }
+
+  external->rendering_intent =
+      static_cast<JxlRenderingIntent>(internal.rendering_intent);
+}
+
+Status ConvertExternalToInternalColorEncoding(const JxlColorEncoding& external,
+                                              ColorEncoding* internal) {
+  internal->SetColorSpace(static_cast<ColorSpace>(external.color_space));
+
+  CIExy wp;
+  wp.x = external.white_point_xy[0];
+  wp.y = external.white_point_xy[1];
+  JXL_RETURN_IF_ERROR(internal->SetWhitePoint(wp));
+
+  if (external.color_space == JXL_COLOR_SPACE_RGB ||
+      external.color_space == JXL_COLOR_SPACE_UNKNOWN) {
+    internal->primaries = static_cast<Primaries>(external.primaries);
+    PrimariesCIExy primaries;
+    primaries.r.x = external.primaries_red_xy[0];
+    primaries.r.y = external.primaries_red_xy[1];
+    primaries.g.x = external.primaries_green_xy[0];
+    primaries.g.y = external.primaries_green_xy[1];
+    primaries.b.x = external.primaries_blue_xy[0];
+    primaries.b.y = external.primaries_blue_xy[1];
+    JXL_RETURN_IF_ERROR(internal->SetPrimaries(primaries));
+  }
+  CustomTransferFunction tf;
+  if (external.transfer_function == JXL_TRANSFER_FUNCTION_GAMMA) {
+    JXL_RETURN_IF_ERROR(tf.SetGamma(external.gamma));
+  } else {
+    tf.SetTransferFunction(
+        static_cast<TransferFunction>(external.transfer_function));
+  }
+  internal->tf = tf;
+
+  internal->rendering_intent =
+      static_cast<RenderingIntent>(external.rendering_intent);
+
+  return true;
+}
+
+/* Chromatic adaptation matrices*/
+static const float kBradford[9] = {
+    0.8951f, 0.2664f, -0.1614f, -0.7502f, 1.7135f,
+    0.0367f, 0.0389f, -0.0685f, 1.0296f,
+};
+
+static const float kBradfordInv[9] = {
+    0.9869929f, -0.1470543f, 0.1599627f, 0.4323053f, 0.5183603f,
+    0.0492912f, -0.0085287f, 0.0400428f, 0.9684867f,
+};
+
+// Adapts whitepoint x, y to D50
+Status AdaptToXYZD50(float wx, float wy, float matrix[9]) {
+  if (wx < 0 || wx > 1 || wy <= 0 || wy > 1) {
+    // Out of range values can cause division through zero
+    // further down with the bradford adaptation too.
+    return JXL_FAILURE("Invalid white point");
+  }
+  float w[3] = {wx / wy, 1.0f, (1.0f - wx - wy) / wy};
+  // 1 / tiny float can still overflow
+  JXL_RETURN_IF_ERROR(std::isfinite(w[0]) && std::isfinite(w[2]));
+  float w50[3] = {0.96422f, 1.0f, 0.82521f};
+
+  float lms[3];
+  float lms50[3];
+
+  MatMul(kBradford, w, 3, 3, 1, lms);
+  MatMul(kBradford, w50, 3, 3, 1, lms50);
+
+  float a[9] = {
+      lms50[0] / lms[0], 0, 0, 0, lms50[1] / lms[1], 0, 0, 0, lms50[2] / lms[2],
+  };
+
+  float b[9];
+  MatMul(a, kBradford, 3, 3, 3, b);
+  MatMul(kBradfordInv, b, 3, 3, 3, matrix);
+
+  return true;
+}
+
+Status PrimariesToXYZD50(float rx, float ry, float gx, float gy, float bx,
+                         float by, float wx, float wy, float matrix[9]) {
+  if (wx < 0 || wx > 1 || wy <= 0 || wy > 1) {
+    return JXL_FAILURE("Invalid white point");
+  }
+  // TODO(lode): also require rx, ry, gx, gy, bx, to be in range 0-1? ICC
+  // profiles in theory forbid negative XYZ values, but in practice the ACES P0
+  // color space uses a negative y for the blue primary.
+  float primaries[9] = {
+      rx, gx, bx, ry, gy, by, 1.0f - rx - ry, 1.0f - gx - gy, 1.0f - bx - by};
+  float primaries_inv[9];
+  memcpy(primaries_inv, primaries, sizeof(float) * 9);
+  JXL_RETURN_IF_ERROR(Inv3x3Matrix(primaries_inv));
+
+  float w[3] = {wx / wy, 1.0f, (1.0f - wx - wy) / wy};
+  // 1 / tiny float can still overflow
+  JXL_RETURN_IF_ERROR(std::isfinite(w[0]) && std::isfinite(w[2]));
+  float xyz[3];
+  MatMul(primaries_inv, w, 3, 3, 1, xyz);
+
+  float a[9] = {
+      xyz[0], 0, 0, 0, xyz[1], 0, 0, 0, xyz[2],
+  };
+
+  float toXYZ[9];
+  MatMul(primaries, a, 3, 3, 3, toXYZ);
+
+  float d50[9];
+  JXL_RETURN_IF_ERROR(AdaptToXYZD50(wx, wy, d50));
+
+  MatMul(d50, toXYZ, 3, 3, 3, matrix);
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.h
new file mode 100644
index 0000000000..13ee3b433f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal.h
@@ -0,0 +1,462 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COLOR_ENCODING_INTERNAL_H_
+#define LIB_JXL_COLOR_ENCODING_INTERNAL_H_
+
+// Metadata for color space conversions.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <cmath>  // std::abs
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "jxl/color_encoding.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// (All CIE units are for the standard 1931 2 degree observer)
+
+// Color space the color pixel data is encoded in. The color pixel data is
+// 3-channel in all cases except in case of kGray, where it uses only 1 channel.
+// This also determines the amount of channels used in modular encoding.
+enum class ColorSpace : uint32_t {
+  // Trichromatic color data. This also includes CMYK if a kBlack
+  // ExtraChannelInfo is present. This implies, if there is an ICC profile, that
+  // the ICC profile uses a 3-channel color space if no kBlack extra channel is
+  // present, or uses color space 'CMYK' if a kBlack extra channel is present.
+  kRGB,
+  // Single-channel data. This implies, if there is an ICC profile, that the ICC
+  // profile also represents single-channel data and has the appropriate color
+  // space ('GRAY').
+  kGray,
+  // Like kRGB, but implies fixed values for primaries etc.
+  kXYB,
+  // For non-RGB/gray data, e.g. from non-electro-optical sensors. Otherwise
+  // the same conditions as kRGB apply.
+  kUnknown
+};
+
+static inline const char* EnumName(ColorSpace /*unused*/) {
+  return "ColorSpace";
+}
+static inline constexpr uint64_t EnumBits(ColorSpace /*unused*/) {
+  using CS = ColorSpace;
+  return MakeBit(CS::kRGB) | MakeBit(CS::kGray) | MakeBit(CS::kXYB) |
+         MakeBit(CS::kUnknown);
+}
+
+// Values from CICP ColourPrimaries.
+enum class WhitePoint : uint32_t {
+  kD65 = 1,     // sRGB/BT.709/Display P3/BT.2020
+  kCustom = 2,  // Actual values encoded in separate fields
+  kE = 10,      // XYZ
+  kDCI = 11,    // DCI-P3
+};
+
+static inline const char* EnumName(WhitePoint /*unused*/) {
+  return "WhitePoint";
+}
+static inline constexpr uint64_t EnumBits(WhitePoint /*unused*/) {
+  return MakeBit(WhitePoint::kD65) | MakeBit(WhitePoint::kCustom) |
+         MakeBit(WhitePoint::kE) | MakeBit(WhitePoint::kDCI);
+}
+
+// Values from CICP ColourPrimaries
+enum class Primaries : uint32_t {
+  kSRGB = 1,    // Same as BT.709
+  kCustom = 2,  // Actual values encoded in separate fields
+  k2100 = 9,    // Same as BT.2020
+  kP3 = 11,
+};
+
+static inline const char* EnumName(Primaries /*unused*/) { return "Primaries"; }
+static inline constexpr uint64_t EnumBits(Primaries /*unused*/) {
+  using Pr = Primaries;
+  return MakeBit(Pr::kSRGB) | MakeBit(Pr::kCustom) | MakeBit(Pr::k2100) |
+         MakeBit(Pr::kP3);
+}
+
+// Values from CICP TransferCharacteristics
+enum TransferFunction : uint32_t {
+  k709 = 1,
+  kUnknown = 2,
+  kLinear = 8,
+  kSRGB = 13,
+  kPQ = 16,   // from BT.2100
+  kDCI = 17,  // from SMPTE RP 431-2 reference projector
+  kHLG = 18,  // from BT.2100
+};
+
+static inline const char* EnumName(TransferFunction /*unused*/) {
+  return "TransferFunction";
+}
+static inline constexpr uint64_t EnumBits(TransferFunction /*unused*/) {
+  using TF = TransferFunction;
+  return MakeBit(TF::k709) | MakeBit(TF::kLinear) | MakeBit(TF::kSRGB) |
+         MakeBit(TF::kPQ) | MakeBit(TF::kDCI) | MakeBit(TF::kHLG) |
+         MakeBit(TF::kUnknown);
+}
+
+enum class RenderingIntent : uint32_t {
+  // Values match ICC sRGB encodings.
+  kPerceptual = 0,  // good for photos, requires a profile with LUT.
+  kRelative,        // good for logos.
+  kSaturation,      // perhaps useful for CG with fully saturated colors.
+  kAbsolute,        // leaves white point unchanged; good for proofing.
+};
+
+static inline const char* EnumName(RenderingIntent /*unused*/) {
+  return "RenderingIntent";
+}
+static inline constexpr uint64_t EnumBits(RenderingIntent /*unused*/) {
+  using RI = RenderingIntent;
+  return MakeBit(RI::kPerceptual) | MakeBit(RI::kRelative) |
+         MakeBit(RI::kSaturation) | MakeBit(RI::kAbsolute);
+}
+
+// Chromaticity (Y is omitted because it is 1 for primaries/white points)
+struct CIExy {
+  double x = 0.0;
+  double y = 0.0;
+};
+
+struct PrimariesCIExy {
+  CIExy r;
+  CIExy g;
+  CIExy b;
+};
+
+// Serializable form of CIExy.
+struct Customxy : public Fields {
+  Customxy();
+  const char* Name() const override { return "Customxy"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  CIExy Get() const;
+  // Returns false if x or y do not fit in the encoding.
+  Status Set(const CIExy& xy);
+
+  int32_t x;
+  int32_t y;
+};
+
+struct CustomTransferFunction : public Fields {
+  CustomTransferFunction();
+  const char* Name() const override { return "CustomTransferFunction"; }
+
+  // Sets fields and returns true if nonserialized_color_space has an implicit
+  // transfer function, otherwise leaves fields unchanged and returns false.
+  bool SetImplicit();
+
+  // Gamma: only used for PNG inputs
+  bool IsGamma() const { return have_gamma_; }
+  double GetGamma() const {
+    JXL_ASSERT(IsGamma());
+    return gamma_ * 1E-7;  // (0, 1)
+  }
+  Status SetGamma(double gamma);
+
+  TransferFunction GetTransferFunction() const {
+    JXL_ASSERT(!IsGamma());
+    return transfer_function_;
+  }
+  void SetTransferFunction(const TransferFunction tf) {
+    have_gamma_ = false;
+    transfer_function_ = tf;
+  }
+
+  bool IsUnknown() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kUnknown);
+  }
+  bool IsSRGB() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kSRGB);
+  }
+  bool IsLinear() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kLinear);
+  }
+  bool IsPQ() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kPQ);
+  }
+  bool IsHLG() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kHLG);
+  }
+  bool Is709() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::k709);
+  }
+  bool IsDCI() const {
+    return !have_gamma_ && (transfer_function_ == TransferFunction::kDCI);
+  }
+  bool IsSame(const CustomTransferFunction& other) const {
+    if (have_gamma_ != other.have_gamma_) return false;
+    if (have_gamma_) {
+      if (gamma_ != other.gamma_) return false;
+    } else {
+      if (transfer_function_ != other.transfer_function_) return false;
+    }
+    return true;
+  }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Must be set before calling VisitFields!
+  ColorSpace nonserialized_color_space = ColorSpace::kRGB;
+
+ private:
+  static constexpr uint32_t kGammaMul = 10000000;
+
+  bool have_gamma_;
+
+  // OETF exponent to go from linear to gamma-compressed.
+  uint32_t gamma_;  // Only used if have_gamma_.
+
+  // Can be kUnknown.
+  TransferFunction transfer_function_;  // Only used if !have_gamma_.
+};
+
+// Compact encoding of data required to interpret and translate pixels to a
+// known color space. Stored in Metadata. Thread-compatible.
+struct ColorEncoding : public Fields {
+  ColorEncoding();
+  const char* Name() const override { return "ColorEncoding"; }
+
+  // Returns ready-to-use color encodings (initialized on-demand).
+  static const ColorEncoding& SRGB(bool is_gray = false);
+  static const ColorEncoding& LinearSRGB(bool is_gray = false);
+
+  // Returns true if an ICC profile was successfully created from fields.
+  // Must be called after modifying fields. Defined in color_management.cc.
+  Status CreateICC();
+
+  // Returns non-empty and valid ICC profile, unless:
+  // - between calling InternalRemoveICC() and CreateICC() in tests;
+  // - WantICC() == true and SetICC() was not yet called;
+  // - after a failed call to SetSRGB(), SetICC(), or CreateICC().
+  const PaddedBytes& ICC() const { return icc_; }
+
+  // Internal only, do not call except from tests.
+  void InternalRemoveICC() { icc_.clear(); }
+
+  // Returns true if `icc` is assigned and decoded successfully. If so,
+  // subsequent WantICC() will return true until DecideIfWantICC() changes it.
+  // Returning false indicates data has been lost.
+  Status SetICC(PaddedBytes&& icc) {
+    if (icc.empty()) return false;
+    icc_ = std::move(icc);
+
+    if (!SetFieldsFromICC()) {
+      InternalRemoveICC();
+      return false;
+    }
+
+    want_icc_ = true;
+    return true;
+  }
+
+  // Sets the raw ICC profile bytes, without parsing the ICC, and without
+  // updating the direct fields such as whitepoint, primaries and color
+  // space. Functions to get and set fields, such as SetWhitePoint, cannot be
+  // used anymore after this and functions such as IsSRGB return false no matter
+  // what the contents of the icc profile.
+  Status SetICCRaw(PaddedBytes&& icc) {
+    if (icc.empty()) return false;
+    icc_ = std::move(icc);
+
+    want_icc_ = true;
+    have_fields_ = false;
+    return true;
+  }
+
+  // Returns whether to send the ICC profile in the codestream.
+  bool WantICC() const { return want_icc_; }
+
+  // Return whether the direct fields are set, if false but ICC is set, only
+  // raw ICC bytes are known.
+  bool HaveFields() const { return have_fields_; }
+
+  // Causes WantICC() to return false if ICC() can be reconstructed from fields.
+  // Defined in color_management.cc.
+  void DecideIfWantICC();
+
+  bool IsGray() const { return color_space_ == ColorSpace::kGray; }
+  size_t Channels() const { return IsGray() ? 1 : 3; }
+
+  // Returns false if the field is invalid and unusable.
+  bool HasPrimaries() const {
+    return !IsGray() && color_space_ != ColorSpace::kXYB;
+  }
+
+  // Returns true after setting the field to a value defined by color_space,
+  // otherwise false and leaves the field unchanged.
+  bool ImplicitWhitePoint() {
+    if (color_space_ == ColorSpace::kXYB) {
+      white_point = WhitePoint::kD65;
+      return true;
+    }
+    return false;
+  }
+
+  // Returns whether the color space is known to be sRGB. If a raw unparsed ICC
+  // profile is set without the fields being set, this returns false, even if
+  // the content of the ICC profile would match sRGB.
+  bool IsSRGB() const {
+    if (!have_fields_) return false;
+    if (!IsGray() && color_space_ != ColorSpace::kRGB) return false;
+    if (white_point != WhitePoint::kD65) return false;
+    if (primaries != Primaries::kSRGB) return false;
+    if (!tf.IsSRGB()) return false;
+    return true;
+  }
+
+  // Returns whether the color space is known to be linear sRGB. If a raw
+  // unparsed ICC profile is set without the fields being set, this returns
+  // false, even if the content of the ICC profile would match linear sRGB.
+  bool IsLinearSRGB() const {
+    if (!have_fields_) return false;
+    if (!IsGray() && color_space_ != ColorSpace::kRGB) return false;
+    if (white_point != WhitePoint::kD65) return false;
+    if (primaries != Primaries::kSRGB) return false;
+    if (!tf.IsLinear()) return false;
+    return true;
+  }
+
+  Status SetSRGB(const ColorSpace cs,
+                 const RenderingIntent ri = RenderingIntent::kRelative) {
+    InternalRemoveICC();
+    JXL_ASSERT(cs == ColorSpace::kGray || cs == ColorSpace::kRGB);
+    color_space_ = cs;
+    white_point = WhitePoint::kD65;
+    primaries = Primaries::kSRGB;
+    tf.SetTransferFunction(TransferFunction::kSRGB);
+    rendering_intent = ri;
+    return CreateICC();
+  }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Accessors ensure tf.nonserialized_color_space is updated at the same time.
+  ColorSpace GetColorSpace() const { return color_space_; }
+  void SetColorSpace(const ColorSpace cs) {
+    color_space_ = cs;
+    tf.nonserialized_color_space = cs;
+  }
+
+  CIExy GetWhitePoint() const;
+  Status SetWhitePoint(const CIExy& xy);
+
+  PrimariesCIExy GetPrimaries() const;
+  Status SetPrimaries(const PrimariesCIExy& xy);
+
+  // Checks if the color spaces (including white point / primaries) are the
+  // same, but ignores the transfer function, rendering intent and ICC bytes.
+  bool SameColorSpace(const ColorEncoding& other) const {
+    if (color_space_ != other.color_space_) return false;
+
+    if (white_point != other.white_point) return false;
+    if (white_point == WhitePoint::kCustom) {
+      if (white_.x != other.white_.x || white_.y != other.white_.y)
+        return false;
+    }
+
+    if (HasPrimaries() != other.HasPrimaries()) return false;
+    if (HasPrimaries()) {
+      if (primaries != other.primaries) return false;
+      if (primaries == Primaries::kCustom) {
+        if (red_.x != other.red_.x || red_.y != other.red_.y) return false;
+        if (green_.x != other.green_.x || green_.y != other.green_.y)
+          return false;
+        if (blue_.x != other.blue_.x || blue_.y != other.blue_.y) return false;
+      }
+    }
+    return true;
+  }
+
+  // Checks if the color space and transfer function are the same, ignoring
+  // rendering intent and ICC bytes
+  bool SameColorEncoding(const ColorEncoding& other) const {
+    return SameColorSpace(other) && tf.IsSame(other.tf);
+  }
+
+  mutable bool all_default;
+
+  // Only valid if HaveFields()
+  WhitePoint white_point;
+  Primaries primaries;  // Only valid if HasPrimaries()
+  CustomTransferFunction tf;
+  RenderingIntent rendering_intent;
+
+ private:
+  // Returns true if all fields have been initialized (possibly to kUnknown).
+  // Returns false if the ICC profile is invalid or decoding it fails.
+  // Defined in color_management.cc.
+  Status SetFieldsFromICC();
+
+  // If true, the codestream contains an ICC profile and we do not serialize
+  // fields. Otherwise, fields are serialized and we create an ICC profile.
+  bool want_icc_;
+
+  // When false, fields such as white_point and tf are invalid and must not be
+  // used. This occurs after setting a raw bytes-only ICC profile, only the
+  // ICC bytes may be used. The color_space_ field is still valid.
+  bool have_fields_ = true;
+
+  PaddedBytes icc_;  // Valid ICC profile
+
+  ColorSpace color_space_;  // Can be kUnknown
+
+  // Only used if white_point == kCustom.
+  Customxy white_;
+
+  // Only used if primaries == kCustom.
+  Customxy red_;
+  Customxy green_;
+  Customxy blue_;
+};
+
+// Returns whether the two inputs are approximately equal.
+static inline bool ApproxEq(const double a, const double b,
+#if JPEGXL_ENABLE_SKCMS
+                            double max_l1 = 1E-3) {
+#else
+                            double max_l1 = 8E-5) {
+#endif
+  // Threshold should be sufficient for ICC's 15-bit fixed-point numbers.
+  // We have seen differences of 7.1E-5 with lcms2 and 1E-3 with skcms.
+  return std::abs(a - b) <= max_l1;
+}
+
+// Returns a representation of the ColorEncoding fields (not icc).
+// Example description: "RGB_D65_SRG_Rel_Lin"
+std::string Description(const ColorEncoding& c);
+Status ParseDescription(const std::string& description,
+                        ColorEncoding* JXL_RESTRICT c);
+
+static inline std::ostream& operator<<(std::ostream& os,
+                                       const ColorEncoding& c) {
+  return os << Description(c);
+}
+
+void ConvertInternalToExternalColorEncoding(const jxl::ColorEncoding& internal,
+                                            JxlColorEncoding* external);
+
+Status ConvertExternalToInternalColorEncoding(const JxlColorEncoding& external,
+                                              jxl::ColorEncoding* internal);
+
+Status PrimariesToXYZD50(float rx, float ry, float gx, float gy, float bx,
+                         float by, float wx, float wy, float matrix[9]);
+Status AdaptToXYZD50(float wx, float wy, float matrix[9]);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COLOR_ENCODING_INTERNAL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal_test.cc
new file mode 100644
index 0000000000..16393813aa
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_encoding_internal_test.cc
@@ -0,0 +1,174 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_encoding_internal.h"
+
+#include <stdio.h>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/test_utils.h"
+
+namespace jxl {
+namespace {
+
+TEST(ColorEncodingTest, RoundTripAll) {
+  for (const test::ColorEncodingDescriptor& cdesc : test::AllEncodings()) {
+    const ColorEncoding c_original = test::ColorEncodingFromDescriptor(cdesc);
+    // Verify Set(Get) yields the same white point/primaries/gamma.
+    {
+      ColorEncoding c;
+      EXPECT_TRUE(c.SetWhitePoint(c_original.GetWhitePoint()));
+      EXPECT_EQ(c_original.white_point, c.white_point);
+    }
+    {
+      ColorEncoding c;
+      EXPECT_TRUE(c.SetPrimaries(c_original.GetPrimaries()));
+      EXPECT_EQ(c_original.primaries, c.primaries);
+    }
+    if (c_original.tf.IsGamma()) {
+      ColorEncoding c;
+      EXPECT_TRUE(c.tf.SetGamma(c_original.tf.GetGamma()));
+      EXPECT_TRUE(c_original.tf.IsSame(c.tf));
+    }
+
+    // Verify ParseDescription(Description) yields the same ColorEncoding
+    {
+      const std::string description = Description(c_original);
+      printf("%s\n", description.c_str());
+      ColorEncoding c;
+      EXPECT_TRUE(ParseDescription(description, &c));
+      EXPECT_TRUE(c_original.SameColorEncoding(c));
+    }
+  }
+}
+
+// Verify Set(Get) for specific custom values
+
+TEST(ColorEncodingTest, NanGamma) {
+  const std::string description = "Gra_2_Per_gnan";
+  ColorEncoding c;
+  EXPECT_FALSE(ParseDescription(description, &c));
+}
+
+TEST(ColorEncodingTest, CustomWhitePoint) {
+  ColorEncoding c;
+  // Nonsensical values
+  CIExy xy_in;
+  xy_in.x = 0.8;
+  xy_in.y = 0.01;
+  EXPECT_TRUE(c.SetWhitePoint(xy_in));
+  const CIExy xy = c.GetWhitePoint();
+
+  ColorEncoding c2;
+  EXPECT_TRUE(c2.SetWhitePoint(xy));
+  EXPECT_TRUE(c.SameColorSpace(c2));
+}
+
+TEST(ColorEncodingTest, CustomPrimaries) {
+  ColorEncoding c;
+  PrimariesCIExy xy_in;
+  // Nonsensical values
+  xy_in.r.x = -0.01;
+  xy_in.r.y = 0.2;
+  xy_in.g.x = 0.4;
+  xy_in.g.y = 0.401;
+  xy_in.b.x = 1.1;
+  xy_in.b.y = -1.2;
+  EXPECT_TRUE(c.SetPrimaries(xy_in));
+  const PrimariesCIExy xy = c.GetPrimaries();
+
+  ColorEncoding c2;
+  EXPECT_TRUE(c2.SetPrimaries(xy));
+  EXPECT_TRUE(c.SameColorSpace(c2));
+}
+
+TEST(ColorEncodingTest, CustomGamma) {
+  ColorEncoding c;
+#ifndef JXL_CRASH_ON_ERROR
+  EXPECT_FALSE(c.tf.SetGamma(0.0));
+  EXPECT_FALSE(c.tf.SetGamma(-1E-6));
+  EXPECT_FALSE(c.tf.SetGamma(1.001));
+#endif
+  EXPECT_TRUE(c.tf.SetGamma(1.0));
+  EXPECT_FALSE(c.tf.IsGamma());
+  EXPECT_TRUE(c.tf.IsLinear());
+
+  EXPECT_TRUE(c.tf.SetGamma(0.123));
+  EXPECT_TRUE(c.tf.IsGamma());
+  const double gamma = c.tf.GetGamma();
+
+  ColorEncoding c2;
+  EXPECT_TRUE(c2.tf.SetGamma(gamma));
+  EXPECT_TRUE(c.SameColorEncoding(c2));
+  EXPECT_TRUE(c2.tf.IsGamma());
+}
+
+TEST(ColorEncodingTest, InternalExternalConversion) {
+  ColorEncoding source_internal;
+  JxlColorEncoding external;
+  ColorEncoding destination_internal;
+
+  for (int i = 0; i < 100; i++) {
+    source_internal.SetColorSpace(static_cast<ColorSpace>(rand() % 4));
+    CIExy wp;
+    wp.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+    wp.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+    EXPECT_TRUE(source_internal.SetWhitePoint(wp));
+    if (source_internal.HasPrimaries()) {
+      PrimariesCIExy primaries;
+      primaries.r.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.r.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.g.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.g.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.b.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      primaries.b.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+      EXPECT_TRUE(source_internal.SetPrimaries(primaries));
+    }
+    CustomTransferFunction tf;
+    EXPECT_TRUE(tf.SetGamma((float(rand()) / float((RAND_MAX)) * 0.5) + 0.25));
+    source_internal.tf = tf;
+    source_internal.rendering_intent = static_cast<RenderingIntent>(rand() % 4);
+
+    ConvertInternalToExternalColorEncoding(source_internal, &external);
+    EXPECT_TRUE(ConvertExternalToInternalColorEncoding(external,
+                                                       &destination_internal));
+
+    EXPECT_EQ(source_internal.GetColorSpace(),
+              destination_internal.GetColorSpace());
+    EXPECT_EQ(source_internal.white_point, destination_internal.white_point);
+    EXPECT_EQ(source_internal.GetWhitePoint().x,
+              destination_internal.GetWhitePoint().x);
+    EXPECT_EQ(source_internal.GetWhitePoint().y,
+              destination_internal.GetWhitePoint().y);
+    if (source_internal.HasPrimaries()) {
+      EXPECT_EQ(source_internal.GetPrimaries().r.x,
+                destination_internal.GetPrimaries().r.x);
+      EXPECT_EQ(source_internal.GetPrimaries().r.y,
+                destination_internal.GetPrimaries().r.y);
+      EXPECT_EQ(source_internal.GetPrimaries().g.x,
+                destination_internal.GetPrimaries().g.x);
+      EXPECT_EQ(source_internal.GetPrimaries().g.y,
+                destination_internal.GetPrimaries().g.y);
+      EXPECT_EQ(source_internal.GetPrimaries().b.x,
+                destination_internal.GetPrimaries().b.x);
+      EXPECT_EQ(source_internal.GetPrimaries().b.y,
+                destination_internal.GetPrimaries().b.y);
+    }
+    EXPECT_EQ(source_internal.tf.IsGamma(), destination_internal.tf.IsGamma());
+    if (source_internal.tf.IsGamma()) {
+      EXPECT_EQ(source_internal.tf.GetGamma(),
+                destination_internal.tf.GetGamma());
+    } else {
+      EXPECT_EQ(source_internal.tf.GetTransferFunction(),
+                destination_internal.tf.GetTransferFunction());
+    }
+    EXPECT_EQ(source_internal.rendering_intent,
+              destination_internal.rendering_intent);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc
new file mode 100644
index 0000000000..feb5140d9a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.cc
@@ -0,0 +1,433 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Defined by build system; this avoids IDE warnings. Must come before
+// color_management.h (affects header definitions).
+#ifndef JPEGXL_ENABLE_SKCMS
+#define JPEGXL_ENABLE_SKCMS 0
+#endif
+
+#include "lib/jxl/color_management.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/color_management.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/linalg.h"  // MatMul, Inv3x3Matrix
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// NOTE: this is only used to provide a reasonable ICC profile that other
+// software can read. Our own transforms use ExtraTF instead because that is
+// more precise and supports unbounded mode.
+std::vector<uint16_t> CreateTableCurve(uint32_t N, const ExtraTF tf) {
+  JXL_ASSERT(N <= 4096);  // ICC MFT2 only allows 4K entries
+  JXL_ASSERT(tf == ExtraTF::kPQ || tf == ExtraTF::kHLG);
+  // No point using float - LCMS converts to 16-bit for A2B/MFT.
+  std::vector<uint16_t> table(N);
+  for (uint32_t i = 0; i < N; ++i) {
+    const float x = static_cast<float>(i) / (N - 1);  // 1.0 at index N - 1.
+    const double dx = static_cast<double>(x);
+    // LCMS requires EOTF (e.g. 2.4 exponent).
+    double y = (tf == ExtraTF::kHLG) ? TF_HLG().DisplayFromEncoded(dx)
+                                     : TF_PQ().DisplayFromEncoded(dx);
+    JXL_ASSERT(y >= 0.0);
+    // Clamp to table range - necessary for HLG.
+    if (y > 1.0) y = 1.0;
+    // 1.0 corresponds to table value 0xFFFF.
+    table[i] = static_cast<uint16_t>(roundf(y * 65535.0));
+  }
+  return table;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(CreateTableCurve);  // Local function.
+
+Status CIEXYZFromWhiteCIExy(const CIExy& xy, float XYZ[3]) {
+  // Target Y = 1.
+  if (std::abs(xy.y) < 1e-12) return JXL_FAILURE("Y value is too small");
+  const float factor = 1 / xy.y;
+  XYZ[0] = xy.x * factor;
+  XYZ[1] = 1;
+  XYZ[2] = (1 - xy.x - xy.y) * factor;
+  return true;
+}
+
+namespace {
+
+// NOTE: this is only used to provide a reasonable ICC profile that other
+// software can read. Our own transforms use ExtraTF instead because that is
+// more precise and supports unbounded mode.
+template <class Func>
+std::vector<uint16_t> CreateTableCurve(uint32_t N, const Func& func) {
+  JXL_ASSERT(N <= 4096);  // ICC MFT2 only allows 4K entries
+  // No point using float - LCMS converts to 16-bit for A2B/MFT.
+  std::vector<uint16_t> table(N);
+  for (uint32_t i = 0; i < N; ++i) {
+    const float x = static_cast<float>(i) / (N - 1);  // 1.0 at index N - 1.
+    // LCMS requires EOTF (e.g. 2.4 exponent).
+    double y = func.DisplayFromEncoded(static_cast<double>(x));
+    JXL_ASSERT(y >= 0.0);
+    // Clamp to table range - necessary for HLG.
+    if (y > 1.0) y = 1.0;
+    // 1.0 corresponds to table value 0xFFFF.
+    table[i] = static_cast<uint16_t>(roundf(y * 65535.0));
+  }
+  return table;
+}
+
+Status CreateICCChadMatrix(CIExy w, float result[9]) {
+  float m[9];
+  if (w.y == 0) {  // WhitePoint can not be pitch-black.
+    return JXL_FAILURE("Invalid WhitePoint");
+  }
+  JXL_RETURN_IF_ERROR(AdaptToXYZD50(w.x, w.y, m));
+  memcpy(result, m, sizeof(float) * 9);
+  return true;
+}
+
+// Creates RGB to XYZ matrix given RGB primaries and whitepoint in xy.
+Status CreateICCRGBMatrix(CIExy r, CIExy g, CIExy b, CIExy w, float result[9]) {
+  float m[9];
+  JXL_RETURN_IF_ERROR(
+      PrimariesToXYZD50(r.x, r.y, g.x, g.y, b.x, b.y, w.x, w.y, m));
+  memcpy(result, m, sizeof(float) * 9);
+  return true;
+}
+
+void WriteICCUint32(uint32_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+  if (icc->size() < pos + 4) icc->resize(pos + 4);
+  (*icc)[pos + 0] = (value >> 24u) & 255;
+  (*icc)[pos + 1] = (value >> 16u) & 255;
+  (*icc)[pos + 2] = (value >> 8u) & 255;
+  (*icc)[pos + 3] = value & 255;
+}
+
+void WriteICCUint16(uint16_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+  if (icc->size() < pos + 2) icc->resize(pos + 2);
+  (*icc)[pos + 0] = (value >> 8u) & 255;
+  (*icc)[pos + 1] = value & 255;
+}
+
+// Writes a 4-character tag
+void WriteICCTag(const char* value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+  if (icc->size() < pos + 4) icc->resize(pos + 4);
+  memcpy(icc->data() + pos, value, 4);
+}
+
+Status WriteICCS15Fixed16(float value, size_t pos,
+                          PaddedBytes* JXL_RESTRICT icc) {
+  // "nextafterf" for 32768.0f towards zero are:
+  // 32767.998046875, 32767.99609375, 32767.994140625
+  // Even the first value works well,...
+  bool ok = (-32767.995f <= value) && (value <= 32767.995f);
+  if (!ok) return JXL_FAILURE("ICC value is out of range / NaN");
+  int32_t i = value * 65536.0f + 0.5f;
+  // Use two's complement
+  uint32_t u = static_cast<uint32_t>(i);
+  WriteICCUint32(u, pos, icc);
+  return true;
+}
+
+Status CreateICCHeader(const ColorEncoding& c,
+                       PaddedBytes* JXL_RESTRICT header) {
+  // TODO(lode): choose color management engine name, e.g. "skia" if
+  // integrated in skia.
+  static const char* kCmm = "jxl ";
+
+  header->resize(128, 0);
+
+  WriteICCUint32(0, 0, header);  // size, correct value filled in at end
+  WriteICCTag(kCmm, 4, header);
+  WriteICCUint32(0x04300000u, 8, header);
+  WriteICCTag("mntr", 12, header);
+  WriteICCTag(c.IsGray() ? "GRAY" : "RGB ", 16, header);
+  WriteICCTag("XYZ ", 20, header);
+
+  // Three uint32_t's date/time encoding.
+  // TODO(lode): encode actual date and time, this is a placeholder
+  uint32_t year = 2019, month = 12, day = 1;
+  uint32_t hour = 0, minute = 0, second = 0;
+  WriteICCUint16(year, 24, header);
+  WriteICCUint16(month, 26, header);
+  WriteICCUint16(day, 28, header);
+  WriteICCUint16(hour, 30, header);
+  WriteICCUint16(minute, 32, header);
+  WriteICCUint16(second, 34, header);
+
+  WriteICCTag("acsp", 36, header);
+  WriteICCTag("APPL", 40, header);
+  WriteICCUint32(0, 44, header);  // flags
+  WriteICCUint32(0, 48, header);  // device manufacturer
+  WriteICCUint32(0, 52, header);  // device model
+  WriteICCUint32(0, 56, header);  // device attributes
+  WriteICCUint32(0, 60, header);  // device attributes
+  WriteICCUint32(static_cast<uint32_t>(c.rendering_intent), 64, header);
+
+  // Mandatory D50 white point of profile connection space
+  WriteICCUint32(0x0000f6d6, 68, header);
+  WriteICCUint32(0x00010000, 72, header);
+  WriteICCUint32(0x0000d32d, 76, header);
+
+  WriteICCTag(kCmm, 80, header);
+
+  return true;
+}
+
+void AddToICCTagTable(const char* tag, size_t offset, size_t size,
+                      PaddedBytes* JXL_RESTRICT tagtable,
+                      std::vector<size_t>* offsets) {
+  WriteICCTag(tag, tagtable->size(), tagtable);
+  // writing true offset deferred to later
+  WriteICCUint32(0, tagtable->size(), tagtable);
+  offsets->push_back(offset);
+  WriteICCUint32(size, tagtable->size(), tagtable);
+}
+
+void FinalizeICCTag(PaddedBytes* JXL_RESTRICT tags, size_t* offset,
+                    size_t* size) {
+  while ((tags->size() & 3) != 0) {
+    tags->push_back(0);
+  }
+  *offset += *size;
+  *size = tags->size() - *offset;
+}
+
+// The input text must be ASCII, writing other characters to UTF-16 is not
+// implemented.
+void CreateICCMlucTag(const std::string& text, PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("mluc", tags->size(), tags);
+  WriteICCUint32(0, tags->size(), tags);
+  WriteICCUint32(1, tags->size(), tags);
+  WriteICCUint32(12, tags->size(), tags);
+  WriteICCTag("enUS", tags->size(), tags);
+  WriteICCUint32(text.size() * 2, tags->size(), tags);
+  WriteICCUint32(28, tags->size(), tags);
+  for (size_t i = 0; i < text.size(); i++) {
+    tags->push_back(0);  // prepend 0 for UTF-16
+    tags->push_back(text[i]);
+  }
+}
+
+Status CreateICCXYZTag(float xyz[3], PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("XYZ ", tags->size(), tags);
+  WriteICCUint32(0, tags->size(), tags);
+  for (size_t i = 0; i < 3; ++i) {
+    JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(xyz[i], tags->size(), tags));
+  }
+  return true;
+}
+
+Status CreateICCChadTag(float chad[9], PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("sf32", tags->size(), tags);
+  WriteICCUint32(0, tags->size(), tags);
+  for (size_t i = 0; i < 9; i++) {
+    JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(chad[i], tags->size(), tags));
+  }
+  return true;
+}
+
+void CreateICCCurvCurvTag(const std::vector<uint16_t>& curve,
+                          PaddedBytes* JXL_RESTRICT tags) {
+  size_t pos = tags->size();
+  tags->resize(tags->size() + 12 + curve.size() * 2, 0);
+  WriteICCTag("curv", pos, tags);
+  WriteICCUint32(0, pos + 4, tags);
+  WriteICCUint32(curve.size(), pos + 8, tags);
+  for (size_t i = 0; i < curve.size(); i++) {
+    WriteICCUint16(curve[i], pos + 12 + i * 2, tags);
+  }
+}
+
+Status CreateICCCurvParaTag(std::vector<float> params, size_t curve_type,
+                            PaddedBytes* JXL_RESTRICT tags) {
+  WriteICCTag("para", tags->size(), tags);
+  WriteICCUint32(0, tags->size(), tags);
+  WriteICCUint16(curve_type, tags->size(), tags);
+  WriteICCUint16(0, tags->size(), tags);
+  for (size_t i = 0; i < params.size(); i++) {
+    JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(params[i], tags->size(), tags));
+  }
+  return true;
+}
+}  // namespace
+
+Status MaybeCreateProfile(const ColorEncoding& c,
+                          PaddedBytes* JXL_RESTRICT icc) {
+  PaddedBytes header, tagtable, tags;
+
+  if (c.GetColorSpace() == ColorSpace::kUnknown || c.tf.IsUnknown()) {
+    return false;  // Not an error
+  }
+
+  switch (c.GetColorSpace()) {
+    case ColorSpace::kRGB:
+    case ColorSpace::kGray:
+      break;  // OK
+    case ColorSpace::kXYB:
+      return JXL_FAILURE("XYB ICC not yet implemented");
+    default:
+      return JXL_FAILURE("Invalid CS %u",
+                         static_cast<unsigned int>(c.GetColorSpace()));
+  }
+
+  JXL_RETURN_IF_ERROR(CreateICCHeader(c, &header));
+
+  std::vector<size_t> offsets;
+  // tag count, deferred to later
+  WriteICCUint32(0, tagtable.size(), &tagtable);
+
+  size_t tag_offset = 0, tag_size = 0;
+
+  CreateICCMlucTag(Description(c), &tags);
+  FinalizeICCTag(&tags, &tag_offset, &tag_size);
+  AddToICCTagTable("desc", tag_offset, tag_size, &tagtable, &offsets);
+
+  const std::string copyright =
+      "Copyright 2019 Google LLC, CC-BY-SA 3.0 Unported "
+      "license(https://creativecommons.org/licenses/by-sa/3.0/legalcode)";
+  CreateICCMlucTag(copyright, &tags);
+  FinalizeICCTag(&tags, &tag_offset, &tag_size);
+  AddToICCTagTable("cprt", tag_offset, tag_size, &tagtable, &offsets);
+
+  // TODO(eustas): isn't it the other way round: gray image has d50 WhitePoint?
+  if (c.IsGray()) {
+    float wtpt[3];
+    JXL_RETURN_IF_ERROR(CIEXYZFromWhiteCIExy(c.GetWhitePoint(), wtpt));
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(wtpt, &tags));
+  } else {
+    float d50[3] = {0.964203, 1.0, 0.824905};
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(d50, &tags));
+  }
+  FinalizeICCTag(&tags, &tag_offset, &tag_size);
+  AddToICCTagTable("wtpt", tag_offset, tag_size, &tagtable, &offsets);
+
+  if (!c.IsGray()) {
+    // Chromatic adaptation matrix
+    float chad[9];
+    JXL_RETURN_IF_ERROR(CreateICCChadMatrix(c.GetWhitePoint(), chad));
+
+    const PrimariesCIExy primaries = c.GetPrimaries();
+    float m[9];
+    JXL_RETURN_IF_ERROR(CreateICCRGBMatrix(primaries.r, primaries.g,
+                                           primaries.b, c.GetWhitePoint(), m));
+    float r[3] = {m[0], m[3], m[6]};
+    float g[3] = {m[1], m[4], m[7]};
+    float b[3] = {m[2], m[5], m[8]};
+
+    JXL_RETURN_IF_ERROR(CreateICCChadTag(chad, &tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("chad", tag_offset, tag_size, &tagtable, &offsets);
+
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(r, &tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("rXYZ", tag_offset, tag_size, &tagtable, &offsets);
+
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(g, &tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("gXYZ", tag_offset, tag_size, &tagtable, &offsets);
+
+    JXL_RETURN_IF_ERROR(CreateICCXYZTag(b, &tags));
+    FinalizeICCTag(&tags, &tag_offset, &tag_size);
+    AddToICCTagTable("bXYZ", tag_offset, tag_size, &tagtable, &offsets);
+  }
+
+  if (c.tf.IsGamma()) {
+    float gamma = 1.0 / c.tf.GetGamma();
+    JXL_RETURN_IF_ERROR(
+        CreateICCCurvParaTag({gamma, 1.0, 0.0, 1.0, 0.0}, 3, &tags));
+  } else {
+    switch (c.tf.GetTransferFunction()) {
+      case TransferFunction::kHLG:
+        CreateICCCurvCurvTag(
+            HWY_DYNAMIC_DISPATCH(CreateTableCurve)(4096, ExtraTF::kHLG), &tags);
+        break;
+      case TransferFunction::kPQ:
+        CreateICCCurvCurvTag(
+            HWY_DYNAMIC_DISPATCH(CreateTableCurve)(4096, ExtraTF::kPQ), &tags);
+        break;
+      case TransferFunction::kSRGB:
+        JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(
+            {2.4, 1.0 / 1.055, 0.055 / 1.055, 1.0 / 12.92, 0.04045}, 3, &tags));
+        break;
+      case TransferFunction::k709:
+        JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(
+            {1.0 / 0.45, 1.0 / 1.099, 0.099 / 1.099, 1.0 / 4.5, 0.081}, 3,
+            &tags));
+        break;
+      case TransferFunction::kLinear:
+        JXL_RETURN_IF_ERROR(
+            CreateICCCurvParaTag({1.0, 1.0, 0.0, 1.0, 0.0}, 3, &tags));
+        break;
+      case TransferFunction::kDCI:
+        JXL_RETURN_IF_ERROR(
+            CreateICCCurvParaTag({2.6, 1.0, 0.0, 1.0, 0.0}, 3, &tags));
+        break;
+      default:
+        JXL_ABORT("Unknown TF %d", c.tf.GetTransferFunction());
+    }
+  }
+  FinalizeICCTag(&tags, &tag_offset, &tag_size);
+  if (c.IsGray()) {
+    AddToICCTagTable("kTRC", tag_offset, tag_size, &tagtable, &offsets);
+  } else {
+    AddToICCTagTable("rTRC", tag_offset, tag_size, &tagtable, &offsets);
+    AddToICCTagTable("gTRC", tag_offset, tag_size, &tagtable, &offsets);
+    AddToICCTagTable("bTRC", tag_offset, tag_size, &tagtable, &offsets);
+  }
+
+  // Tag count
+  WriteICCUint32(offsets.size(), 0, &tagtable);
+  for (size_t i = 0; i < offsets.size(); i++) {
+    WriteICCUint32(offsets[i] + header.size() + tagtable.size(), 4 + 12 * i + 4,
+                   &tagtable);
+  }
+
+  // ICC profile size
+  WriteICCUint32(header.size() + tagtable.size() + tags.size(), 0, &header);
+
+  *icc = header;
+  icc->append(tagtable);
+  icc->append(tags);
+
+  // rendering intent, and region of the checksum itself, set to 0.
+  // TODO(lode): manually verify with a reliable tool that this creates correct
+  // signature (profile id) for ICC profiles.
+  PaddedBytes icc_sum = *icc;
+  memset(icc_sum.data() + 44, 0, 4);
+  memset(icc_sum.data() + 64, 0, 4);
+
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.h
new file mode 100644
index 0000000000..f728fe589a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_management.h
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COLOR_MANAGEMENT_H_
+#define LIB_JXL_COLOR_MANAGEMENT_H_
+
+// ICC profiles and color space conversions.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+enum class ExtraTF {
+  kNone,
+  kPQ,
+  kHLG,
+  kSRGB,
+};
+
+Status MaybeCreateProfile(const ColorEncoding& c,
+                          PaddedBytes* JXL_RESTRICT icc);
+
+Status CIEXYZFromWhiteCIExy(const CIExy& xy, float XYZ[3]);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COLOR_MANAGEMENT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_management_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_management_test.cc
new file mode 100644
index 0000000000..0747e5c9e3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/color_management_test.cc
@@ -0,0 +1,237 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_management.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <new>
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+
+std::ostream& operator<<(std::ostream& os, const CIExy& xy) {
+  return os << "{x=" << xy.x << ", y=" << xy.y << "}";
+}
+
+std::ostream& operator<<(std::ostream& os, const PrimariesCIExy& primaries) {
+  return os << "{r=" << primaries.r << ", g=" << primaries.g
+            << ", b=" << primaries.b << "}";
+}
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::FloatNear;
+
+// Small enough to be fast. If changed, must update Generate*.
+static constexpr size_t kWidth = 16;
+
+struct Globals {
+  // TODO(deymo): Make this a const.
+  static Globals* GetInstance() {
+    static Globals ret;
+    return &ret;
+  }
+
+ private:
+  static constexpr size_t kNumThreads = 0;  // only have a single row.
+
+  Globals() : pool(kNumThreads) {
+    in_gray = GenerateGray();
+    in_color = GenerateColor();
+    out_gray = ImageF(kWidth, 1);
+    out_color = ImageF(kWidth * 3, 1);
+
+    c_native = ColorEncoding::LinearSRGB(/*is_gray=*/false);
+    c_gray = ColorEncoding::LinearSRGB(/*is_gray=*/true);
+  }
+
+  static ImageF GenerateGray() {
+    ImageF gray(kWidth, 1);
+    float* JXL_RESTRICT row = gray.Row(0);
+    // Increasing left to right
+    for (uint32_t x = 0; x < kWidth; ++x) {
+      row[x] = x * 1.0f / (kWidth - 1);  // [0, 1]
+    }
+    return gray;
+  }
+
+  static ImageF GenerateColor() {
+    ImageF image(kWidth * 3, 1);
+    float* JXL_RESTRICT interleaved = image.Row(0);
+    std::fill(interleaved, interleaved + kWidth * 3, 0.0f);
+
+    // [0, 4): neutral
+    for (int32_t x = 0; x < 4; ++x) {
+      interleaved[3 * x + 0] = x * 1.0f / 3;  // [0, 1]
+      interleaved[3 * x + 2] = interleaved[3 * x + 1] = interleaved[3 * x + 0];
+    }
+
+    // [4, 13): pure RGB with low/medium/high saturation
+    for (int32_t c = 0; c < 3; ++c) {
+      interleaved[3 * (4 + c) + c] = 0.08f + c * 0.01f;
+      interleaved[3 * (7 + c) + c] = 0.75f + c * 0.01f;
+      interleaved[3 * (10 + c) + c] = 1.0f;
+    }
+
+    // [13, 16): impure, not quite saturated RGB
+    interleaved[3 * 13 + 0] = 0.86f;
+    interleaved[3 * 13 + 2] = interleaved[3 * 13 + 1] = 0.16f;
+    interleaved[3 * 14 + 1] = 0.87f;
+    interleaved[3 * 14 + 2] = interleaved[3 * 14 + 0] = 0.16f;
+    interleaved[3 * 15 + 2] = 0.88f;
+    interleaved[3 * 15 + 1] = interleaved[3 * 15 + 0] = 0.16f;
+
+    return image;
+  }
+
+ public:
+  ThreadPoolInternal pool;
+
+  // ImageF so we can use VerifyRelativeError; all are interleaved RGB.
+  ImageF in_gray;
+  ImageF in_color;
+  ImageF out_gray;
+  ImageF out_color;
+  ColorEncoding c_native;
+  ColorEncoding c_gray;
+};
+
+class ColorManagementTest
+    : public ::testing::TestWithParam<test::ColorEncodingDescriptor> {
+ public:
+  static void VerifySameFields(const ColorEncoding& c,
+                               const ColorEncoding& c2) {
+    ASSERT_EQ(c.rendering_intent, c2.rendering_intent);
+    ASSERT_EQ(c.GetColorSpace(), c2.GetColorSpace());
+    ASSERT_EQ(c.white_point, c2.white_point);
+    if (c.HasPrimaries()) {
+      ASSERT_EQ(c.primaries, c2.primaries);
+    }
+    ASSERT_TRUE(c.tf.IsSame(c2.tf));
+  }
+
+  // "Same" pixels after converting g->c_native -> c -> g->c_native.
+  static void VerifyPixelRoundTrip(const ColorEncoding& c) {
+    Globals* g = Globals::GetInstance();
+    const ColorEncoding& c_native = c.IsGray() ? g->c_gray : g->c_native;
+    ColorSpaceTransform xform_fwd;
+    ColorSpaceTransform xform_rev;
+    ASSERT_TRUE(xform_fwd.Init(c_native, c, kDefaultIntensityTarget, kWidth,
+                               g->pool.NumThreads()));
+    ASSERT_TRUE(xform_rev.Init(c, c_native, kDefaultIntensityTarget, kWidth,
+                               g->pool.NumThreads()));
+
+    const size_t thread = 0;
+    const ImageF& in = c.IsGray() ? g->in_gray : g->in_color;
+    ImageF* JXL_RESTRICT out = c.IsGray() ? &g->out_gray : &g->out_color;
+    DoColorSpaceTransform(&xform_fwd, thread, in.Row(0),
+                          xform_fwd.BufDst(thread));
+    DoColorSpaceTransform(&xform_rev, thread, xform_fwd.BufDst(thread),
+                          out->Row(0));
+
+#if JPEGXL_ENABLE_SKCMS
+    double max_l1 = 7E-4;
+    double max_rel = 4E-7;
+#else
+    double max_l1 = 5E-5;
+    // Most are lower; reached 3E-7 with D60 AP0.
+    double max_rel = 4E-7;
+#endif
+    if (c.IsGray()) max_rel = 2E-5;
+    VerifyRelativeError(in, *out, max_l1, max_rel);
+  }
+};
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(ColorManagementTestInstantiation,
+                                   ColorManagementTest,
+                                   ::testing::ValuesIn(test::AllEncodings()));
+
+// Exercises the ColorManagement interface for ALL ColorEncoding synthesizable
+// via enums.
+TEST_P(ColorManagementTest, VerifyAllProfiles) {
+  ColorEncoding c = ColorEncodingFromDescriptor(GetParam());
+  printf("%s\n", Description(c).c_str());
+
+  // Can create profile.
+  ASSERT_TRUE(c.CreateICC());
+
+  // Can set an equivalent ColorEncoding from the generated ICC profile.
+  ColorEncoding c3;
+  ASSERT_TRUE(c3.SetICC(PaddedBytes(c.ICC())));
+  VerifySameFields(c, c3);
+
+  VerifyPixelRoundTrip(c);
+}
+
+testing::Matcher<CIExy> CIExyIs(const double x, const double y) {
+  static constexpr double kMaxError = 1e-4;
+  return testing::AllOf(
+      testing::Field(&CIExy::x, testing::DoubleNear(x, kMaxError)),
+      testing::Field(&CIExy::y, testing::DoubleNear(y, kMaxError)));
+}
+
+testing::Matcher<PrimariesCIExy> PrimariesAre(
+    const testing::Matcher<CIExy>& r, const testing::Matcher<CIExy>& g,
+    const testing::Matcher<CIExy>& b) {
+  return testing::AllOf(testing::Field(&PrimariesCIExy::r, r),
+                        testing::Field(&PrimariesCIExy::g, g),
+                        testing::Field(&PrimariesCIExy::b, b));
+}
+
+TEST_F(ColorManagementTest, sRGBChromaticity) {
+  const ColorEncoding sRGB = ColorEncoding::SRGB();
+  EXPECT_THAT(sRGB.GetWhitePoint(), CIExyIs(0.3127, 0.3290));
+  EXPECT_THAT(sRGB.GetPrimaries(),
+              PrimariesAre(CIExyIs(0.64, 0.33), CIExyIs(0.30, 0.60),
+                           CIExyIs(0.15, 0.06)));
+}
+
+TEST_F(ColorManagementTest, D2700Chromaticity) {
+  PaddedBytes icc = ReadTestData("jxl/color_management/sRGB-D2700.icc");
+  ColorEncoding sRGB_D2700;
+  ASSERT_TRUE(sRGB_D2700.SetICC(std::move(icc)));
+
+  EXPECT_THAT(sRGB_D2700.GetWhitePoint(), CIExyIs(0.45986, 0.41060));
+  // The illuminant-relative chromaticities of this profile's primaries are the
+  // same as for sRGB. It is the PCS-relative chromaticities that would be
+  // different.
+  EXPECT_THAT(sRGB_D2700.GetPrimaries(),
+              PrimariesAre(CIExyIs(0.64, 0.33), CIExyIs(0.30, 0.60),
+                           CIExyIs(0.15, 0.06)));
+}
+
+TEST_F(ColorManagementTest, D2700ToSRGB) {
+  PaddedBytes icc = ReadTestData("jxl/color_management/sRGB-D2700.icc");
+  ColorEncoding sRGB_D2700;
+  ASSERT_TRUE(sRGB_D2700.SetICC(std::move(icc)));
+
+  ColorSpaceTransform transform;
+  ASSERT_TRUE(transform.Init(sRGB_D2700, ColorEncoding::SRGB(),
+                             kDefaultIntensityTarget, 1, 1));
+  const float sRGB_D2700_values[3] = {0.863, 0.737, 0.490};
+  float sRGB_values[3];
+  DoColorSpaceTransform(&transform, 0, sRGB_D2700_values, sRGB_values);
+  EXPECT_THAT(sRGB_values,
+              ElementsAre(FloatNear(0.914, 1e-3), FloatNear(0.745, 1e-3),
+                          FloatNear(0.601, 1e-3)));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/common.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/common.h
new file mode 100644
index 0000000000..a71216ecca
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/common.h
@@ -0,0 +1,194 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COMMON_H_
+#define LIB_JXL_COMMON_H_
+
+// Shared constants and helper functions.
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <limits>  // numeric_limits
+#include <memory>  // unique_ptr
+#include <string>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+#ifndef JXL_HIGH_PRECISION
+#define JXL_HIGH_PRECISION 1
+#endif
+
+// Macro that defines whether support for decoding JXL files to JPEG is enabled.
+#ifndef JPEGXL_ENABLE_TRANSCODE_JPEG
+#define JPEGXL_ENABLE_TRANSCODE_JPEG 1
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+namespace jxl {
+// Some enums and typedefs used by more than one header file.
+
+constexpr size_t kBitsPerByte = 8;  // more clear than CHAR_BIT
+
+constexpr inline size_t RoundUpBitsToByteMultiple(size_t bits) {
+  return (bits + 7) & ~size_t(7);
+}
+
+constexpr inline size_t RoundUpToBlockDim(size_t dim) {
+  return (dim + 7) & ~size_t(7);
+}
+
+static inline bool JXL_MAYBE_UNUSED SafeAdd(const uint64_t a, const uint64_t b,
+                                            uint64_t& sum) {
+  sum = a + b;
+  return sum >= a;  // no need to check b - either sum >= both or < both.
+}
+
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+  return (a + b - 1) / b;
+}
+
+// Works for any `align`; if a power of two, compiler emits ADD+AND.
+constexpr inline size_t RoundUpTo(size_t what, size_t align) {
+  return DivCeil(what, align) * align;
+}
+
+constexpr double kPi = 3.14159265358979323846264338327950288;
+
+// Reasonable default for sRGB, matches common monitors. We map white to this
+// many nits (cd/m^2) by default. Butteraugli was tuned for 250 nits, which is
+// very close.
+static constexpr float kDefaultIntensityTarget = 255;
+
+template <typename T>
+constexpr T Pi(T multiplier) {
+  return static_cast<T>(multiplier * kPi);
+}
+
+// Block is the square grid of pixels to which an "energy compaction"
+// transformation (e.g. DCT) is applied. Each block has its own AC quantizer.
+constexpr size_t kBlockDim = 8;
+
+constexpr size_t kDCTBlockSize = kBlockDim * kBlockDim;
+
+constexpr size_t kGroupDim = 256;
+static_assert(kGroupDim % kBlockDim == 0,
+              "Group dim should be divisible by block dim");
+constexpr size_t kGroupDimInBlocks = kGroupDim / kBlockDim;
+
+// Maximum number of passes in an image.
+constexpr size_t kMaxNumPasses = 11;
+
+// Maximum number of reference frames.
+constexpr size_t kMaxNumReferenceFrames = 4;
+
+// Dimensions of a frame, in pixels, and other derived dimensions.
+// Computed from FrameHeader.
+// TODO(veluca): add extra channels.
+struct FrameDimensions {
+  void Set(size_t xsize, size_t ysize, size_t group_size_shift,
+           size_t max_hshift, size_t max_vshift, bool modular_mode,
+           size_t upsampling) {
+    group_dim = (kGroupDim >> 1) << group_size_shift;
+    dc_group_dim = group_dim * kBlockDim;
+    xsize_upsampled = xsize;
+    ysize_upsampled = ysize;
+    this->xsize = DivCeil(xsize, upsampling);
+    this->ysize = DivCeil(ysize, upsampling);
+    xsize_blocks = DivCeil(this->xsize, kBlockDim << max_hshift) << max_hshift;
+    ysize_blocks = DivCeil(this->ysize, kBlockDim << max_vshift) << max_vshift;
+    xsize_padded = xsize_blocks * kBlockDim;
+    ysize_padded = ysize_blocks * kBlockDim;
+    if (modular_mode) {
+      // Modular mode doesn't have any padding.
+      xsize_padded = this->xsize;
+      ysize_padded = this->ysize;
+    }
+    xsize_upsampled_padded = xsize_padded * upsampling;
+    ysize_upsampled_padded = ysize_padded * upsampling;
+    xsize_groups = DivCeil(this->xsize, group_dim);
+    ysize_groups = DivCeil(this->ysize, group_dim);
+    xsize_dc_groups = DivCeil(xsize_blocks, group_dim);
+    ysize_dc_groups = DivCeil(ysize_blocks, group_dim);
+    num_groups = xsize_groups * ysize_groups;
+    num_dc_groups = xsize_dc_groups * ysize_dc_groups;
+  }
+
+  // Image size without any upsampling, i.e. original_size / upsampling.
+  size_t xsize;
+  size_t ysize;
+  // Original image size.
+  size_t xsize_upsampled;
+  size_t ysize_upsampled;
+  // Image size after upsampling the padded image.
+  size_t xsize_upsampled_padded;
+  size_t ysize_upsampled_padded;
+  // Image size after padding to a multiple of kBlockDim (if VarDCT mode).
+  size_t xsize_padded;
+  size_t ysize_padded;
+  // Image size in kBlockDim blocks.
+  size_t xsize_blocks;
+  size_t ysize_blocks;
+  // Image size in number of groups.
+  size_t xsize_groups;
+  size_t ysize_groups;
+  // Image size in number of DC groups.
+  size_t xsize_dc_groups;
+  size_t ysize_dc_groups;
+  // Number of AC or DC groups.
+  size_t num_groups;
+  size_t num_dc_groups;
+  // Size of a group.
+  size_t group_dim;
+  size_t dc_group_dim;
+};
+
+// Prior to C++14 (i.e. C++11): provide our own make_unique
+#if __cplusplus < 201402L
+template <typename T, typename... Args>
+std::unique_ptr<T> make_unique(Args&&... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+#else
+using std::make_unique;
+#endif
+
+template <typename T>
+JXL_INLINE T Clamp1(T val, T low, T hi) {
+  return val < low ? low : val > hi ? hi : val;
+}
+
+// Encodes non-negative (X) into (2 * X), negative (-X) into (2 * X - 1)
+constexpr uint32_t PackSigned(int32_t value)
+    JXL_NO_SANITIZE("unsigned-integer-overflow") {
+  return (static_cast<uint32_t>(value) << 1) ^
+         ((static_cast<uint32_t>(~value) >> 31) - 1);
+}
+
+// Reverse to PackSigned, i.e. UnpackSigned(PackSigned(X)) == X.
+constexpr intptr_t UnpackSigned(size_t value) {
+  return static_cast<intptr_t>((value >> 1) ^ (((~value) & 1) - 1));
+}
+
+// conversion from integer to string.
+template <typename T>
+std::string ToString(T n) {
+  char data[32] = {};
+  if (T(0.1) != T(0)) {
+    // float
+    snprintf(data, sizeof(data), "%g", static_cast<double>(n));
+  } else if (T(-1) > T(0)) {
+    // unsigned
+    snprintf(data, sizeof(data), "%llu", static_cast<unsigned long long>(n));
+  } else {
+    // signed
+    snprintf(data, sizeof(data), "%lld", static_cast<long long>(n));
+  }
+  return data;
+}
+}  // namespace jxl
+
+#endif  // LIB_JXL_COMMON_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc
new file mode 100644
index 0000000000..bac580acaa
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.cc
@@ -0,0 +1,312 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/compressed_dc.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/compressed_dc.cc"
+#include <hwy/aligned_allocator.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+using D = HWY_FULL(float);
+using DScalar = HWY_CAPPED(float, 1);
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Vec;
+
+// TODO(veluca): optimize constants.
+const float w1 = 0.20345139757231578f;
+const float w2 = 0.0334829185968739f;
+const float w0 = 1.0f - 4.0f * (w1 + w2);
+
+template <class V>
+V MaxWorkaround(V a, V b) {
+#if (HWY_TARGET == HWY_AVX3) && HWY_COMPILER_CLANG <= 800
+  // Prevents "Do not know how to split the result of this operator" error
+  return IfThenElse(a > b, a, b);
+#else
+  return Max(a, b);
+#endif
+}
+
+template <typename D>
+JXL_INLINE void ComputePixelChannel(const D d, const float dc_factor,
+                                    const float* JXL_RESTRICT row_top,
+                                    const float* JXL_RESTRICT row,
+                                    const float* JXL_RESTRICT row_bottom,
+                                    Vec<D>* JXL_RESTRICT mc,
+                                    Vec<D>* JXL_RESTRICT sm,
+                                    Vec<D>* JXL_RESTRICT gap, size_t x) {
+  const auto tl = LoadU(d, row_top + x - 1);
+  const auto tc = Load(d, row_top + x);
+  const auto tr = LoadU(d, row_top + x + 1);
+
+  const auto ml = LoadU(d, row + x - 1);
+  *mc = Load(d, row + x);
+  const auto mr = LoadU(d, row + x + 1);
+
+  const auto bl = LoadU(d, row_bottom + x - 1);
+  const auto bc = Load(d, row_bottom + x);
+  const auto br = LoadU(d, row_bottom + x + 1);
+
+  const auto w_center = Set(d, w0);
+  const auto w_side = Set(d, w1);
+  const auto w_corner = Set(d, w2);
+
+  const auto corner = tl + tr + bl + br;
+  const auto side = ml + mr + tc + bc;
+  *sm = corner * w_corner + side * w_side + *mc * w_center;
+
+  const auto dc_quant = Set(d, dc_factor);
+  *gap = MaxWorkaround(*gap, Abs((*mc - *sm) / dc_quant));
+}
+
+template <typename D>
+JXL_INLINE void ComputePixel(
+    const float* JXL_RESTRICT dc_factors,
+    const float* JXL_RESTRICT* JXL_RESTRICT rows_top,
+    const float* JXL_RESTRICT* JXL_RESTRICT rows,
+    const float* JXL_RESTRICT* JXL_RESTRICT rows_bottom,
+    float* JXL_RESTRICT* JXL_RESTRICT out_rows, size_t x) {
+  const D d;
+  auto mc_x = Undefined(d);
+  auto mc_y = Undefined(d);
+  auto mc_b = Undefined(d);
+  auto sm_x = Undefined(d);
+  auto sm_y = Undefined(d);
+  auto sm_b = Undefined(d);
+  auto gap = Set(d, 0.5f);
+  ComputePixelChannel(d, dc_factors[0], rows_top[0], rows[0], rows_bottom[0],
+                      &mc_x, &sm_x, &gap, x);
+  ComputePixelChannel(d, dc_factors[1], rows_top[1], rows[1], rows_bottom[1],
+                      &mc_y, &sm_y, &gap, x);
+  ComputePixelChannel(d, dc_factors[2], rows_top[2], rows[2], rows_bottom[2],
+                      &mc_b, &sm_b, &gap, x);
+  auto factor = MulAdd(Set(d, -4.0f), gap, Set(d, 3.0f));
+  factor = ZeroIfNegative(factor);
+
+  auto out = MulAdd(sm_x - mc_x, factor, mc_x);
+  Store(out, d, out_rows[0] + x);
+  out = MulAdd(sm_y - mc_y, factor, mc_y);
+  Store(out, d, out_rows[1] + x);
+  out = MulAdd(sm_b - mc_b, factor, mc_b);
+  Store(out, d, out_rows[2] + x);
+}
+
+void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc,
+                         ThreadPool* pool) {
+  const size_t xsize = dc->xsize();
+  const size_t ysize = dc->ysize();
+  if (ysize <= 2 || xsize <= 2) return;
+
+  // TODO(veluca): use tile-based processing?
+  // TODO(veluca): decide if changes to the y channel should be propagated to
+  // the x and b channels through color correlation.
+  JXL_ASSERT(w1 + w2 < 0.25f);
+
+  PROFILER_FUNC;
+
+  Image3F smoothed(xsize, ysize);
+  // Fill in borders that the loop below will not. First and last are unused.
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y : {size_t(0), ysize - 1}) {
+      memcpy(smoothed.PlaneRow(c, y), dc->PlaneRow(c, y),
+             xsize * sizeof(float));
+    }
+  }
+  auto process_row = [&](int y, int /*thread*/) {
+    const float* JXL_RESTRICT rows_top[3]{
+        dc->ConstPlaneRow(0, y - 1),
+        dc->ConstPlaneRow(1, y - 1),
+        dc->ConstPlaneRow(2, y - 1),
+    };
+    const float* JXL_RESTRICT rows[3] = {
+        dc->ConstPlaneRow(0, y),
+        dc->ConstPlaneRow(1, y),
+        dc->ConstPlaneRow(2, y),
+    };
+    const float* JXL_RESTRICT rows_bottom[3] = {
+        dc->ConstPlaneRow(0, y + 1),
+        dc->ConstPlaneRow(1, y + 1),
+        dc->ConstPlaneRow(2, y + 1),
+    };
+    float* JXL_RESTRICT rows_out[3] = {
+        smoothed.PlaneRow(0, y),
+        smoothed.PlaneRow(1, y),
+        smoothed.PlaneRow(2, y),
+    };
+    for (size_t x : {size_t(0), xsize - 1}) {
+      for (size_t c = 0; c < 3; c++) {
+        rows_out[c][x] = rows[c][x];
+      }
+    }
+
+    size_t x = 1;
+    // First pixels
+    const size_t N = Lanes(D());
+    for (; x < std::min(N, xsize - 1); x++) {
+      ComputePixel<DScalar>(dc_factors, rows_top, rows, rows_bottom, rows_out,
+                            x);
+    }
+    // Full vectors.
+    for (; x + N <= xsize - 1; x += N) {
+      ComputePixel<D>(dc_factors, rows_top, rows, rows_bottom, rows_out, x);
+    }
+    // Last pixels.
+    for (; x < xsize - 1; x++) {
+      ComputePixel<DScalar>(dc_factors, rows_top, rows, rows_bottom, rows_out,
+                            x);
+    }
+  };
+  RunOnPool(pool, 1, ysize - 1, ThreadPool::SkipInit(), process_row,
+            "DCSmoothingRow");
+  dc->Swap(smoothed);
+}
+
+// DC dequantization.
+void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in,
+               const float* dc_factors, float mul, const float* cfl_factors,
+               YCbCrChromaSubsampling chroma_subsampling,
+               const BlockCtxMap& bctx) {
+  const HWY_FULL(float) df;
+  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
+  if (chroma_subsampling.Is444()) {
+    const auto fac_x = Set(df, dc_factors[0] * mul);
+    const auto fac_y = Set(df, dc_factors[1] * mul);
+    const auto fac_b = Set(df, dc_factors[2] * mul);
+    const auto cfl_fac_x = Set(df, cfl_factors[0]);
+    const auto cfl_fac_b = Set(df, cfl_factors[2]);
+    for (size_t y = 0; y < r.ysize(); y++) {
+      float* dec_row_x = r.PlaneRow(dc, 0, y);
+      float* dec_row_y = r.PlaneRow(dc, 1, y);
+      float* dec_row_b = r.PlaneRow(dc, 2, y);
+      const int32_t* quant_row_x = in.channel[1].plane.Row(y);
+      const int32_t* quant_row_y = in.channel[0].plane.Row(y);
+      const int32_t* quant_row_b = in.channel[2].plane.Row(y);
+      for (size_t x = 0; x < r.xsize(); x += Lanes(di)) {
+        const auto in_q_x = Load(di, quant_row_x + x);
+        const auto in_q_y = Load(di, quant_row_y + x);
+        const auto in_q_b = Load(di, quant_row_b + x);
+        const auto in_x = ConvertTo(df, in_q_x) * fac_x;
+        const auto in_y = ConvertTo(df, in_q_y) * fac_y;
+        const auto in_b = ConvertTo(df, in_q_b) * fac_b;
+        Store(in_y, df, dec_row_y + x);
+        Store(MulAdd(in_y, cfl_fac_x, in_x), df, dec_row_x + x);
+        Store(MulAdd(in_y, cfl_fac_b, in_b), df, dec_row_b + x);
+      }
+    }
+  } else {
+    for (size_t c : {1, 0, 2}) {
+      Rect rect(r.x0() >> chroma_subsampling.HShift(c),
+                r.y0() >> chroma_subsampling.VShift(c),
+                r.xsize() >> chroma_subsampling.HShift(c),
+                r.ysize() >> chroma_subsampling.VShift(c));
+      const auto fac = Set(df, dc_factors[c] * mul);
+      const Channel& ch = in.channel[c < 2 ? c ^ 1 : c];
+      for (size_t y = 0; y < rect.ysize(); y++) {
+        const int32_t* quant_row = ch.plane.Row(y);
+        float* row = rect.PlaneRow(dc, c, y);
+        for (size_t x = 0; x < rect.xsize(); x += Lanes(di)) {
+          const auto in_q = Load(di, quant_row + x);
+          const auto in = ConvertTo(df, in_q) * fac;
+          Store(in, df, row + x);
+        }
+      }
+    }
+  }
+  if (bctx.num_dc_ctxs <= 1) {
+    for (size_t y = 0; y < r.ysize(); y++) {
+      uint8_t* qdc_row = r.Row(quant_dc, y);
+      memset(qdc_row, 0, sizeof(*qdc_row) * r.xsize());
+    }
+  } else {
+    for (size_t y = 0; y < r.ysize(); y++) {
+      uint8_t* qdc_row_val = r.Row(quant_dc, y);
+      const int32_t* quant_row_x =
+          in.channel[1].plane.Row(y >> chroma_subsampling.VShift(0));
+      const int32_t* quant_row_y =
+          in.channel[0].plane.Row(y >> chroma_subsampling.VShift(1));
+      const int32_t* quant_row_b =
+          in.channel[2].plane.Row(y >> chroma_subsampling.VShift(2));
+      for (size_t x = 0; x < r.xsize(); x++) {
+        int bucket_x = 0, bucket_y = 0, bucket_b = 0;
+        for (int t : bctx.dc_thresholds[0]) {
+          if (quant_row_x[x >> chroma_subsampling.HShift(0)] > t) bucket_x++;
+        }
+        for (int t : bctx.dc_thresholds[1]) {
+          if (quant_row_y[x >> chroma_subsampling.HShift(1)] > t) bucket_y++;
+        }
+        for (int t : bctx.dc_thresholds[2]) {
+          if (quant_row_b[x >> chroma_subsampling.HShift(2)] > t) bucket_b++;
+        }
+        int bucket = bucket_x;
+        bucket *= bctx.dc_thresholds[2].size() + 1;
+        bucket += bucket_b;
+        bucket *= bctx.dc_thresholds[1].size() + 1;
+        bucket += bucket_y;
+        qdc_row_val[x] = bucket;
+      }
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(DequantDC);
+HWY_EXPORT(AdaptiveDCSmoothing);
+void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc,
+                         ThreadPool* pool) {
+  return HWY_DYNAMIC_DISPATCH(AdaptiveDCSmoothing)(dc_factors, dc, pool);
+}
+
+void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in,
+               const float* dc_factors, float mul, const float* cfl_factors,
+               YCbCrChromaSubsampling chroma_subsampling,
+               const BlockCtxMap& bctx) {
+  return HWY_DYNAMIC_DISPATCH(DequantDC)(r, dc, quant_dc, in, dc_factors, mul,
+                                         cfl_factors, chroma_subsampling, bctx);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.h
new file mode 100644
index 0000000000..b06e5931f0
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_dc.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COMPRESSED_DC_H_
+#define LIB_JXL_COMPRESSED_DC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/modular_image.h"
+
+// DC handling functions: encoding and decoding of DC to and from bitstream, and
+// related function to initialize the per-group decoder cache.
+
+namespace jxl {
+
+// Smooth DC in already-smooth areas, to counteract banding.
+void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc,
+                         ThreadPool* pool);
+
+void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in,
+               const float* dc_factors, float mul, const float* cfl_factors,
+               YCbCrChromaSubsampling chroma_subsampling,
+               const BlockCtxMap& bctx);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_COMPRESSED_DC_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_image_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_image_test.cc
new file mode 100644
index 0000000000..7546127616
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/compressed_image_test.cc
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <algorithm>
+#include <string>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+namespace {
+
+// Verifies ReconOpsinImage reconstructs with low butteraugli distance.
+void RunRGBRoundTrip(float distance, bool fast) {
+  ThreadPoolInternal pool(4);
+
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  JXL_CHECK(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+  // This test can only handle a single group.
+  io.ShrinkTo(std::min(io.xsize(), kGroupDim), std::min(io.ysize(), kGroupDim));
+
+  Image3F opsin(io.xsize(), io.ysize());
+  (void)ToXYB(io.Main(), &pool, &opsin);
+  opsin = PadImageToMultiple(opsin, kBlockDim);
+  GaborishInverse(&opsin, 1.0f, &pool);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = distance;
+  if (fast) {
+    cparams.speed_tier = SpeedTier::kWombat;
+  }
+
+  JXL_CHECK(io.metadata.size.Set(opsin.xsize(), opsin.ysize()));
+  FrameHeader frame_header(&io.metadata);
+  frame_header.color_transform = ColorTransform::kXYB;
+  frame_header.loop_filter.epf_iters = 0;
+
+  // Use custom weights for Gaborish.
+  frame_header.loop_filter.gab_custom = true;
+  frame_header.loop_filter.gab_x_weight1 = 0.11501538179658321f;
+  frame_header.loop_filter.gab_x_weight2 = 0.089979079587015454f;
+  frame_header.loop_filter.gab_y_weight1 = 0.11501538179658321f;
+  frame_header.loop_filter.gab_y_weight2 = 0.089979079587015454f;
+  frame_header.loop_filter.gab_b_weight1 = 0.11501538179658321f;
+  frame_header.loop_filter.gab_b_weight2 = 0.089979079587015454f;
+
+  PassesEncoderState enc_state;
+  JXL_CHECK(InitializePassesSharedState(frame_header, &enc_state.shared));
+
+  enc_state.shared.quantizer.SetQuant(4.0f, 4.0f,
+                                      &enc_state.shared.raw_quant_field);
+  enc_state.shared.ac_strategy.FillDCT8();
+  enc_state.cparams = cparams;
+  ZeroFillImage(&enc_state.shared.epf_sharpness);
+  CodecInOut io1;
+  io1.Main() = RoundtripImage(opsin, &enc_state, &pool);
+  io1.metadata.m.color_encoding = io1.Main().c_current();
+
+  EXPECT_LE(ButteraugliDistance(io, io1, cparams.ba_params,
+                                /*distmap=*/nullptr, &pool),
+            1.2);
+}
+
+TEST(CompressedImageTest, RGBRoundTrip_1) { RunRGBRoundTrip(1.0, false); }
+
+TEST(CompressedImageTest, RGBRoundTrip_1_fast) { RunRGBRoundTrip(1.0, true); }
+
+TEST(CompressedImageTest, RGBRoundTrip_2) { RunRGBRoundTrip(2.0, false); }
+
+TEST(CompressedImageTest, RGBRoundTrip_2_fast) { RunRGBRoundTrip(2.0, true); }
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve-inl.h
new file mode 100644
index 0000000000..255bb9d051
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve-inl.h
@@ -0,0 +1,119 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_CONVOLVE_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_CONVOLVE_INL_H_
+#undef LIB_JXL_CONVOLVE_INL_H_
+#else
+#define LIB_JXL_CONVOLVE_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/status.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Broadcast;
+#if HWY_TARGET != HWY_SCALAR
+using hwy::HWY_NAMESPACE::CombineShiftRightBytes;
+#endif
+using hwy::HWY_NAMESPACE::Vec;
+
+// Synthesizes left/right neighbors from a vector of center pixels.
+class Neighbors {
+ public:
+  using D = HWY_CAPPED(float, 16);
+  using V = Vec<D>;
+
+  // Returns l[i] == c[Mirror(i - 1)].
+  HWY_INLINE HWY_MAYBE_UNUSED static V FirstL1(const V c) {
+#if HWY_CAP_GE256
+    const D d;
+    HWY_ALIGN constexpr int32_t lanes[16] = {0, 0, 1, 2,  3,  4,  5,  6,
+                                             7, 8, 9, 10, 11, 12, 13, 14};
+    const auto indices = SetTableIndices(d, lanes);
+    // c = PONM'LKJI
+    return TableLookupLanes(c, indices);  // ONML'KJII
+#elif HWY_TARGET == HWY_SCALAR
+    return c;  // Same (the first mirrored value is the last valid one)
+#else  // 128 bit
+    // c = LKJI
+#if HWY_ARCH_X86
+    return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(2, 1, 0, 0))};  // KJII
+#else
+    const D d;
+    // TODO(deymo): Figure out if this can be optimized using a single vsri
+    // instruction to convert LKJI to KJII.
+    HWY_ALIGN constexpr int lanes[4] = {0, 0, 1, 2};  // KJII
+    const auto indices = SetTableIndices(d, lanes);
+    return TableLookupLanes(c, indices);
+#endif
+#endif
+  }
+
+  // Returns l[i] == c[Mirror(i - 2)].
+  HWY_INLINE HWY_MAYBE_UNUSED static V FirstL2(const V c) {
+#if HWY_CAP_GE256
+    const D d;
+    HWY_ALIGN constexpr int32_t lanes[16] = {1, 0, 0, 1, 2,  3,  4,  5,
+                                             6, 7, 8, 9, 10, 11, 12, 13};
+    const auto indices = SetTableIndices(d, lanes);
+    // c = PONM'LKJI
+    return TableLookupLanes(c, indices);  // NMLK'JIIJ
+#elif HWY_TARGET == HWY_SCALAR
+    const D d;
+    JXL_ASSERT(false);  // unsupported, avoid calling this.
+    return Zero(d);
+#else  // 128 bit
+    // c = LKJI
+#if HWY_ARCH_X86
+    return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(1, 0, 0, 1))};  // JIIJ
+#else
+    const D d;
+    HWY_ALIGN constexpr int lanes[4] = {1, 0, 0, 1};  // JIIJ
+    const auto indices = SetTableIndices(d, lanes);
+    return TableLookupLanes(c, indices);
+#endif
+#endif
+  }
+
+  // Returns l[i] == c[Mirror(i - 3)].
+  HWY_INLINE HWY_MAYBE_UNUSED static V FirstL3(const V c) {
+#if HWY_CAP_GE256
+    const D d;
+    HWY_ALIGN constexpr int32_t lanes[16] = {2, 1, 0, 0, 1, 2,  3,  4,
+                                             5, 6, 7, 8, 9, 10, 11, 12};
+    const auto indices = SetTableIndices(d, lanes);
+    // c = PONM'LKJI
+    return TableLookupLanes(c, indices);  // MLKJ'IIJK
+#elif HWY_TARGET == HWY_SCALAR
+    const D d;
+    JXL_ASSERT(false);  // unsupported, avoid calling this.
+    return Zero(d);
+#else  // 128 bit
+    // c = LKJI
+#if HWY_ARCH_X86
+    return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(0, 0, 1, 2))};  // IIJK
+#else
+    const D d;
+    HWY_ALIGN constexpr int lanes[4] = {2, 1, 0, 0};  // IIJK
+    const auto indices = SetTableIndices(d, lanes);
+    return TableLookupLanes(c, indices);
+#endif
+#endif
+  }
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_CONVOLVE_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc
new file mode 100644
index 0000000000..cc7fc3f90e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.cc
@@ -0,0 +1,1332 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/common.h"  // RoundUpTo
+#include "lib/jxl/convolve-inl.h"
+#include "lib/jxl/image_ops.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Vec;
+
+// Weighted sum of 1x5 pixels around ix, iy with [wx2 wx1 wx0 wx1 wx2].
+template <class WrapY>
+static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y,
+                               const int64_t ix, const int64_t iy,
+                               const size_t xsize, const size_t ysize,
+                               const float wx0, const float wx1,
+                               const float wx2) {
+  const WrapMirror wrap_x;
+  const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize));
+  const float in_m2 = row[wrap_x(ix - 2, xsize)];
+  const float in_p2 = row[wrap_x(ix + 2, xsize)];
+  const float in_m1 = row[wrap_x(ix - 1, xsize)];
+  const float in_p1 = row[wrap_x(ix + 1, xsize)];
+  const float in_00 = row[ix];
+  const float sum_2 = wx2 * (in_m2 + in_p2);
+  const float sum_1 = wx1 * (in_m1 + in_p1);
+  const float sum_0 = wx0 * in_00;
+  return sum_2 + sum_1 + sum_0;
+}
+
+template <class WrapY, class V>
+static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
+                     const int64_t iy, const size_t ysize, const V wx0,
+                     const V wx1, const V wx2) {
+  const HWY_FULL(float) d;
+  const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix;
+  const auto in_m2 = LoadU(d, center - 2);
+  const auto in_p2 = LoadU(d, center + 2);
+  const auto in_m1 = LoadU(d, center - 1);
+  const auto in_p1 = LoadU(d, center + 1);
+  const auto in_00 = Load(d, center);
+  const auto sum_2 = wx2 * (in_m2 + in_p2);
+  const auto sum_1 = wx1 * (in_m1 + in_p1);
+  const auto sum_0 = wx0 * in_00;
+  return sum_2 + sum_1 + sum_0;
+}
+
+// Produces result for one pixel
+template <class WrapY>
+float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix,
+                       const int64_t iy, const WeightsSymmetric5& weights) {
+  const float w0 = weights.c[0];
+  const float w1 = weights.r[0];
+  const float w2 = weights.R[0];
+  const float w4 = weights.d[0];
+  const float w5 = weights.L[0];
+  const float w8 = weights.D[0];
+
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  const WrapY wrap_y;
+  // Unrolled loop over all 5 rows of the kernel.
+  float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2);
+
+  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8);
+  float sum1 =
+      WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8);
+
+  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5);
+  sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5);
+
+  return sum0 + sum1;
+}
+
+// Produces result for one vector's worth of pixels
+template <class WrapY>
+static void Symmetric5Interior(const ImageF& in, const Rect& rect,
+                               const int64_t ix, const int64_t iy,
+                               const WeightsSymmetric5& weights,
+                               float* JXL_RESTRICT row_out) {
+  const HWY_FULL(float) d;
+
+  const auto w0 = LoadDup128(d, weights.c);
+  const auto w1 = LoadDup128(d, weights.r);
+  const auto w2 = LoadDup128(d, weights.R);
+  const auto w4 = LoadDup128(d, weights.d);
+  const auto w5 = LoadDup128(d, weights.L);
+  const auto w8 = LoadDup128(d, weights.D);
+
+  const size_t ysize = rect.ysize();
+  const WrapY wrap_y;
+  // Unrolled loop over all 5 rows of the kernel.
+  auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2);
+
+  sum0 += WeightedSum(in, wrap_y, ix, iy - 2, ysize, w2, w5, w8);
+  auto sum1 = WeightedSum(in, wrap_y, ix, iy + 2, ysize, w2, w5, w8);
+
+  sum0 += WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5);
+  sum1 += WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5);
+
+  Store(sum0 + sum1, d, row_out + ix);
+}
+
+template <class WrapY>
+static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy,
+                          const WeightsSymmetric5& weights,
+                          float* JXL_RESTRICT row_out) {
+  const int64_t kRadius = 2;
+  const size_t xsize = rect.xsize();
+
+  size_t ix = 0;
+  const HWY_FULL(float) d;
+  const size_t N = Lanes(d);
+  const size_t aligned_x = RoundUpTo(kRadius, N);
+  for (; ix < std::min(aligned_x, xsize); ++ix) {
+    row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
+  }
+  for (; ix + N + kRadius <= xsize; ix += N) {
+    Symmetric5Interior<WrapY>(in, rect, ix, iy, weights, row_out);
+  }
+  for (; ix < xsize; ++ix) {
+    row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
+  }
+}
+
+static JXL_NOINLINE void Symmetric5BorderRow(const ImageF& in, const Rect& rect,
+                                             const int64_t iy,
+                                             const WeightsSymmetric5& weights,
+                                             float* JXL_RESTRICT row_out) {
+  return Symmetric5Row<WrapMirror>(in, rect, iy, weights, row_out);
+}
+
+#if HWY_TARGET != HWY_SCALAR
+
+// Returns indices for SetTableIndices such that TableLookupLanes on the
+// rightmost unaligned vector (rightmost sample in its most-significant lane)
+// returns the mirrored values, with the mirror outside the last valid sample.
+static inline const int32_t* MirrorLanes(const size_t mod) {
+  const HWY_CAPPED(float, 16) d;
+  constexpr size_t kN = MaxLanes(d);
+
+  // For mod = `image width mod 16` 0..15:
+  // last full vec     mirrored (mem order)  loadedVec  mirrorVec  idxVec
+  // 0123456789abcdef| fedcba9876543210      fed..210   012..def   012..def
+  // 0123456789abcdef|0 0fedcba98765432      0fe..321   234..f00   123..eff
+  // 0123456789abcdef|01 10fedcba987654      10f..432   456..110   234..ffe
+  // 0123456789abcdef|012 210fedcba9876      210..543   67..2210   34..ffed
+  // 0123456789abcdef|0123 3210fedcba98      321..654   8..33210   4..ffedc
+  // 0123456789abcdef|01234 43210fedcba
+  // 0123456789abcdef|012345 543210fedc
+  // 0123456789abcdef|0123456 6543210fe
+  // 0123456789abcdef|01234567 76543210
+  // 0123456789abcdef|012345678 8765432
+  // 0123456789abcdef|0123456789 987654
+  // 0123456789abcdef|0123456789A A9876
+  // 0123456789abcdef|0123456789AB BA98
+  // 0123456789abcdef|0123456789ABC CBA
+  // 0123456789abcdef|0123456789ABCD DC
+  // 0123456789abcdef|0123456789ABCDE E      EDC..10f   EED..210   ffe..321
+#if HWY_CAP_GE512
+  HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {
+      1,  2,  3,  4,  5,  6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15,  //
+      14, 13, 12, 11, 10, 9, 8, 7, 6, 5,  4,  3,  2,  1,  0};
+#elif HWY_CAP_GE256
+  HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {
+      1, 2, 3, 4, 5, 6, 7, 7,  //
+      6, 5, 4, 3, 2, 1, 0};
+#else  // 128-bit
+  HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {1, 2, 3, 3,  //
+                                                              2, 1, 0};
+#endif
+  return idx_lanes + kN - 1 - mod;
+}
+
+#endif  // HWY_TARGET != HWY_SCALAR
+
+namespace strategy {
+
+struct StrategyBase {
+  using D = HWY_CAPPED(float, 16);
+  using V = Vec<D>;
+};
+
+// 3x3 convolution by symmetric kernel with a single scan through the input.
+class Symmetric3 : public StrategyBase {
+ public:
+  static constexpr int64_t kRadius = 1;
+
+  // Only accesses pixels in [0, xsize).
+  template <size_t kSizeModN, class WrapRow>
+  static JXL_INLINE void ConvolveRow(const float* const JXL_RESTRICT row_m,
+                                     const size_t xsize, const int64_t stride,
+                                     const WrapRow& wrap_row,
+                                     const WeightsSymmetric3& weights,
+                                     float* const JXL_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const JXL_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const JXL_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    // Must load in advance - compiler doesn't understand LoadDup128 and
+    // schedules them too late.
+    const V w0 = LoadDup128(d, weights.c);
+    const V w1 = LoadDup128(d, weights.r);
+    const V w2 = LoadDup128(d, weights.d);
+
+    // l, c, r = left, center, right. Leftmost vector: need FirstL1.
+    {
+      const V tc = LoadU(d, row_t + 0);
+      const V mc = LoadU(d, row_m + 0);
+      const V bc = LoadU(d, row_b + 0);
+      const V tl = Neighbors::FirstL1(tc);
+      const V tr = LoadU(d, row_t + 0 + 1);
+      const V ml = Neighbors::FirstL1(mc);
+      const V mr = LoadU(d, row_m + 0 + 1);
+      const V bl = Neighbors::FirstL1(bc);
+      const V br = LoadU(d, row_b + 0 + 1);
+      const V conv =
+          WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+      Store(conv, d, row_out + 0);
+    }
+
+    // Loop as long as we can load enough new values:
+    const size_t N = Lanes(d);
+    size_t x = N;
+    for (; x + N + kRadius <= xsize; x += N) {
+      const auto conv = ConvolveValid(row_t, row_m, row_b, x, w0, w1, w2);
+      Store(conv, d, row_out + x);
+    }
+
+    // For final (partial) vector:
+    const V tc = LoadU(d, row_t + x);
+    const V mc = LoadU(d, row_m + x);
+    const V bc = LoadU(d, row_b + x);
+
+    V tr, mr, br;
+#if HWY_TARGET == HWY_SCALAR
+    tr = tc;  // Single-lane => mirrored right neighbor = center value.
+    mr = mc;
+    br = bc;
+#else
+    if (kSizeModN == 0) {
+      // The above loop didn't handle the last vector because it needs an
+      // additional right neighbor (generated via mirroring).
+      auto mirror = SetTableIndices(d, MirrorLanes(N - 1));
+      tr = TableLookupLanes(tc, mirror);
+      mr = TableLookupLanes(mc, mirror);
+      br = TableLookupLanes(bc, mirror);
+    } else {
+      auto mirror = SetTableIndices(d, MirrorLanes((xsize % N) - 1));
+      // Loads last valid value into uppermost lane and mirrors.
+      tr = TableLookupLanes(LoadU(d, row_t + xsize - N), mirror);
+      mr = TableLookupLanes(LoadU(d, row_m + xsize - N), mirror);
+      br = TableLookupLanes(LoadU(d, row_b + xsize - N), mirror);
+    }
+#endif
+
+    const V tl = LoadU(d, row_t + x - 1);
+    const V ml = LoadU(d, row_m + x - 1);
+    const V bl = LoadU(d, row_b + x - 1);
+    const V conv = WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+    Store(conv, d, row_out + x);
+  }
+
+ private:
+  // Returns sum{x_i * w_i}.
+  template <class V>
+  static JXL_INLINE V WeightedSum(const V tl, const V tc, const V tr,
+                                  const V ml, const V mc, const V mr,
+                                  const V bl, const V bc, const V br,
+                                  const V w0, const V w1, const V w2) {
+    const V sum_tb = tc + bc;
+
+    // Faster than 5 mul + 4 FMA.
+    const V mul0 = mc * w0;
+    const V sum_lr = ml + mr;
+
+    const V x1 = sum_tb + sum_lr;
+    const V mul1 = MulAdd(x1, w1, mul0);
+
+    const V sum_t2 = tl + tr;
+    const V sum_b2 = bl + br;
+    const V x2 = sum_t2 + sum_b2;
+    const V mul2 = MulAdd(x2, w2, mul1);
+    return mul2;
+  }
+
+  static JXL_INLINE V ConvolveValid(const float* JXL_RESTRICT row_t,
+                                    const float* JXL_RESTRICT row_m,
+                                    const float* JXL_RESTRICT row_b,
+                                    const int64_t x, const V w0, const V w1,
+                                    const V w2) {
+    const D d;
+    const V tc = LoadU(d, row_t + x);
+    const V mc = LoadU(d, row_m + x);
+    const V bc = LoadU(d, row_b + x);
+    const V tl = LoadU(d, row_t + x - 1);
+    const V tr = LoadU(d, row_t + x + 1);
+    const V ml = LoadU(d, row_m + x - 1);
+    const V mr = LoadU(d, row_m + x + 1);
+    const V bl = LoadU(d, row_b + x - 1);
+    const V br = LoadU(d, row_b + x + 1);
+    return WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+  }
+};
+
+// 5x5 convolution by separable kernel with a single scan through the input.
+// This is more cache-efficient than separate horizontal/vertical passes, and
+// possibly faster (given enough registers) than tiling and/or transposing.
+//
+// Overview: imagine a 5x5 window around a central pixel. First convolve the
+// rows by multiplying the pixels with the corresponding weights from
+// WeightsSeparable5.horz[abs(x_offset) * 4]. Then multiply each of these
+// intermediate results by the corresponding vertical weight, i.e.
+// vert[abs(y_offset) * 4]. Finally, store the sum of these values as the
+// convolution result at the position of the central pixel in the output.
+//
+// Each of these operations uses SIMD vectors. The central pixel and most
+// importantly the output are aligned, so neighnoring pixels (e.g. x_offset=1)
+// require unaligned loads. Because weights are supplied in identical groups of
+// 4, we can use LoadDup128 to load them (slightly faster).
+//
+// Uses mirrored boundary handling. Until x >= kRadius, the horizontal
+// convolution uses Neighbors class to shuffle vectors as if each of its lanes
+// had been loaded from the mirrored offset. Similarly, the last full vector to
+// write uses mirroring. In the case of scalar vectors, Neighbors is not usable
+// and the value is loaded directly. Otherwise, the number of valid pixels
+// modulo the vector size enables a small optimization: for smaller offsets,
+// a non-mirrored load is sufficient.
+class Separable5 : public StrategyBase {
+ public:
+  static constexpr int64_t kRadius = 2;
+
+  template <size_t kSizeModN, class WrapRow>
+  static JXL_INLINE void ConvolveRow(const float* const JXL_RESTRICT row_m,
+                                     const size_t xsize, const int64_t stride,
+                                     const WrapRow& wrap_row,
+                                     const WeightsSeparable5& weights,
+                                     float* const JXL_RESTRICT row_out) {
+    const D d;
+    const int64_t neg_stride = -stride;  // allows LEA addressing.
+    const float* const JXL_RESTRICT row_t2 =
+        wrap_row(row_m + 2 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_t1 =
+        wrap_row(row_m + 1 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_b1 =
+        wrap_row(row_m + 1 * stride, stride);
+    const float* const JXL_RESTRICT row_b2 =
+        wrap_row(row_m + 2 * stride, stride);
+
+    const V wh0 = LoadDup128(d, weights.horz + 0 * 4);
+    const V wh1 = LoadDup128(d, weights.horz + 1 * 4);
+    const V wh2 = LoadDup128(d, weights.horz + 2 * 4);
+    const V wv0 = LoadDup128(d, weights.vert + 0 * 4);
+    const V wv1 = LoadDup128(d, weights.vert + 1 * 4);
+    const V wv2 = LoadDup128(d, weights.vert + 2 * 4);
+
+    size_t x = 0;
+
+    // More than one iteration for scalars.
+    for (; x < kRadius; x += Lanes(d)) {
+      const V conv0 = HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2) * wv0;
+
+      const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2);
+      const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2);
+      const V conv1 = MulAdd(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2);
+      const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2);
+      const V conv2 = MulAdd(conv2t + conv2b, wv2, conv1);
+      Store(conv2, d, row_out + x);
+    }
+
+    // Main loop: load inputs without padding
+    for (; x + Lanes(d) + kRadius <= xsize; x += Lanes(d)) {
+      const V conv0 = HorzConvolve(row_m + x, wh0, wh1, wh2) * wv0;
+
+      const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2);
+      const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2);
+      const V conv1 = MulAdd(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2);
+      const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2);
+      const V conv2 = MulAdd(conv2t + conv2b, wv2, conv1);
+      Store(conv2, d, row_out + x);
+    }
+
+    // Last full vector to write (the above loop handled mod >= kRadius)
+#if HWY_TARGET == HWY_SCALAR
+    while (x < xsize) {
+#else
+    if (kSizeModN < kRadius) {
+#endif
+      const V conv0 =
+          HorzConvolveLast<kSizeModN>(row_m, x, xsize, wh0, wh1, wh2) * wv0;
+
+      const V conv1t =
+          HorzConvolveLast<kSizeModN>(row_t1, x, xsize, wh0, wh1, wh2);
+      const V conv1b =
+          HorzConvolveLast<kSizeModN>(row_b1, x, xsize, wh0, wh1, wh2);
+      const V conv1 = MulAdd(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t =
+          HorzConvolveLast<kSizeModN>(row_t2, x, xsize, wh0, wh1, wh2);
+      const V conv2b =
+          HorzConvolveLast<kSizeModN>(row_b2, x, xsize, wh0, wh1, wh2);
+      const V conv2 = MulAdd(conv2t + conv2b, wv2, conv1);
+      Store(conv2, d, row_out + x);
+      x += Lanes(d);
+    }
+
+    // If mod = 0, the above vector was the last.
+    if (kSizeModN != 0) {
+      for (; x < xsize; ++x) {
+        float mul = 0.0f;
+        for (int64_t dy = -kRadius; dy <= kRadius; ++dy) {
+          const float wy = weights.vert[std::abs(dy) * 4];
+          const float* clamped_row = wrap_row(row_m + dy * stride, stride);
+          for (int64_t dx = -kRadius; dx <= kRadius; ++dx) {
+            const float wx = weights.horz[std::abs(dx) * 4];
+            const int64_t clamped_x = Mirror(x + dx, xsize);
+            mul += clamped_row[clamped_x] * wx * wy;
+          }
+        }
+        row_out[x] = mul;
+      }
+    }
+  }
+
+ private:
+  // Same as HorzConvolve for the first/last vector in a row.
+  static JXL_INLINE V HorzConvolveFirst(const float* const JXL_RESTRICT row,
+                                        const int64_t x, const int64_t xsize,
+                                        const V wh0, const V wh1, const V wh2) {
+    const D d;
+    const V c = LoadU(d, row + x);
+    const V mul0 = c * wh0;
+
+#if HWY_TARGET == HWY_SCALAR
+    const V l1 = LoadU(d, row + Mirror(x - 1, xsize));
+    const V l2 = LoadU(d, row + Mirror(x - 2, xsize));
+#else
+    (void)xsize;
+    const V l1 = Neighbors::FirstL1(c);
+    const V l2 = Neighbors::FirstL2(c);
+#endif
+
+    const V r1 = LoadU(d, row + x + 1);
+    const V r2 = LoadU(d, row + x + 2);
+
+    const V mul1 = MulAdd(l1 + r1, wh1, mul0);
+    const V mul2 = MulAdd(l2 + r2, wh2, mul1);
+    return mul2;
+  }
+
+  template <size_t kSizeModN>
+  static JXL_INLINE V HorzConvolveLast(const float* const JXL_RESTRICT row,
+                                       const int64_t x, const int64_t xsize,
+                                       const V wh0, const V wh1, const V wh2) {
+    const D d;
+    const V c = LoadU(d, row + x);
+    const V mul0 = c * wh0;
+
+    const V l1 = LoadU(d, row + x - 1);
+    const V l2 = LoadU(d, row + x - 2);
+
+    V r1, r2;
+#if HWY_TARGET == HWY_SCALAR
+    r1 = LoadU(d, row + Mirror(x + 1, xsize));
+    r2 = LoadU(d, row + Mirror(x + 2, xsize));
+#else
+    const size_t N = Lanes(d);
+    if (kSizeModN == 0) {
+      r2 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 2)));
+      r1 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 1)));
+    } else {  // == 1
+      const auto last = LoadU(d, row + xsize - N);
+      r2 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
+      r1 = last;
+    }
+#endif
+
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = l1 + r1;
+    const V mul1 = MulAdd(sum1, wh1, mul0);
+    const V sum2 = l2 + r2;
+    const V mul2 = MulAdd(sum2, wh2, mul1);
+    return mul2;
+  }
+
+  // Requires kRadius valid pixels before/after pos.
+  static JXL_INLINE V HorzConvolve(const float* const JXL_RESTRICT pos,
+                                   const V wh0, const V wh1, const V wh2) {
+    const D d;
+    const V c = LoadU(d, pos);
+    const V mul0 = c * wh0;
+
+    // Loading anew is faster than combining vectors.
+    const V l1 = LoadU(d, pos - 1);
+    const V r1 = LoadU(d, pos + 1);
+    const V l2 = LoadU(d, pos - 2);
+    const V r2 = LoadU(d, pos + 2);
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = l1 + r1;
+    const V mul1 = MulAdd(sum1, wh1, mul0);
+    const V sum2 = l2 + r2;
+    const V mul2 = MulAdd(sum2, wh2, mul1);
+    return mul2;
+  }
+};  // namespace strategy
+
+// 7x7 convolution by separable kernel with a single scan through the input.
+// Extended version of Separable5, see documentation there.
+class Separable7 : public StrategyBase {
+ public:
+  static constexpr int64_t kRadius = 3;
+
+  template <size_t kSizeModN, class WrapRow>
+  static JXL_INLINE void ConvolveRow(const float* const JXL_RESTRICT row_m,
+                                     const size_t xsize, const int64_t stride,
+                                     const WrapRow& wrap_row,
+                                     const WeightsSeparable7& weights,
+                                     float* const JXL_RESTRICT row_out) {
+    const D d;
+    const int64_t neg_stride = -stride;  // allows LEA addressing.
+    const float* const JXL_RESTRICT row_t3 =
+        wrap_row(row_m + 3 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_t2 =
+        wrap_row(row_m + 2 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_t1 =
+        wrap_row(row_m + 1 * neg_stride, stride);
+    const float* const JXL_RESTRICT row_b1 =
+        wrap_row(row_m + 1 * stride, stride);
+    const float* const JXL_RESTRICT row_b2 =
+        wrap_row(row_m + 2 * stride, stride);
+    const float* const JXL_RESTRICT row_b3 =
+        wrap_row(row_m + 3 * stride, stride);
+
+    const V wh0 = LoadDup128(d, weights.horz + 0 * 4);
+    const V wh1 = LoadDup128(d, weights.horz + 1 * 4);
+    const V wh2 = LoadDup128(d, weights.horz + 2 * 4);
+    const V wh3 = LoadDup128(d, weights.horz + 3 * 4);
+    const V wv0 = LoadDup128(d, weights.vert + 0 * 4);
+    const V wv1 = LoadDup128(d, weights.vert + 1 * 4);
+    const V wv2 = LoadDup128(d, weights.vert + 2 * 4);
+    const V wv3 = LoadDup128(d, weights.vert + 3 * 4);
+
+    size_t x = 0;
+
+    // More than one iteration for scalars.
+    for (; x < kRadius; x += Lanes(d)) {
+      const V conv0 =
+          HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2, wh3) * wv0;
+
+      const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv1 = MulAdd(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv2 = MulAdd(conv2t + conv2b, wv2, conv1);
+
+      const V conv3t = HorzConvolveFirst(row_t3, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv3b = HorzConvolveFirst(row_b3, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv3 = MulAdd(conv3t + conv3b, wv3, conv2);
+
+      Store(conv3, d, row_out + x);
+    }
+
+    // Main loop: load inputs without padding
+    for (; x + Lanes(d) + kRadius <= xsize; x += Lanes(d)) {
+      const V conv0 = HorzConvolve(row_m + x, wh0, wh1, wh2, wh3) * wv0;
+
+      const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2, wh3);
+      const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2, wh3);
+      const V conv1 = MulAdd(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2, wh3);
+      const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2, wh3);
+      const V conv2 = MulAdd(conv2t + conv2b, wv2, conv1);
+
+      const V conv3t = HorzConvolve(row_t3 + x, wh0, wh1, wh2, wh3);
+      const V conv3b = HorzConvolve(row_b3 + x, wh0, wh1, wh2, wh3);
+      const V conv3 = MulAdd(conv3t + conv3b, wv3, conv2);
+
+      Store(conv3, d, row_out + x);
+    }
+
+    // Last full vector to write (the above loop handled mod >= kRadius)
+#if HWY_TARGET == HWY_SCALAR
+    while (x < xsize) {
+#else
+    if (kSizeModN < kRadius) {
+#endif
+      const V conv0 =
+          HorzConvolveLast<kSizeModN>(row_m, x, xsize, wh0, wh1, wh2, wh3) *
+          wv0;
+
+      const V conv1t =
+          HorzConvolveLast<kSizeModN>(row_t1, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv1b =
+          HorzConvolveLast<kSizeModN>(row_b1, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv1 = MulAdd(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t =
+          HorzConvolveLast<kSizeModN>(row_t2, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv2b =
+          HorzConvolveLast<kSizeModN>(row_b2, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv2 = MulAdd(conv2t + conv2b, wv2, conv1);
+
+      const V conv3t =
+          HorzConvolveLast<kSizeModN>(row_t3, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv3b =
+          HorzConvolveLast<kSizeModN>(row_b3, x, xsize, wh0, wh1, wh2, wh3);
+      const V conv3 = MulAdd(conv3t + conv3b, wv3, conv2);
+
+      Store(conv3, d, row_out + x);
+      x += Lanes(d);
+    }
+
+    // If mod = 0, the above vector was the last.
+    if (kSizeModN != 0) {
+      for (; x < xsize; ++x) {
+        float mul = 0.0f;
+        for (int64_t dy = -kRadius; dy <= kRadius; ++dy) {
+          const float wy = weights.vert[std::abs(dy) * 4];
+          const float* clamped_row = wrap_row(row_m + dy * stride, stride);
+          for (int64_t dx = -kRadius; dx <= kRadius; ++dx) {
+            const float wx = weights.horz[std::abs(dx) * 4];
+            const int64_t clamped_x = Mirror(x + dx, xsize);
+            mul += clamped_row[clamped_x] * wx * wy;
+          }
+        }
+        row_out[x] = mul;
+      }
+    }
+  }
+
+ private:
+  // Same as HorzConvolve for the first/last vector in a row.
+  static JXL_INLINE V HorzConvolveFirst(const float* const JXL_RESTRICT row,
+                                        const int64_t x, const int64_t xsize,
+                                        const V wh0, const V wh1, const V wh2,
+                                        const V wh3) {
+    const D d;
+    const V c = LoadU(d, row + x);
+    const V mul0 = c * wh0;
+
+#if HWY_TARGET == HWY_SCALAR
+    const V l1 = LoadU(d, row + Mirror(x - 1, xsize));
+    const V l2 = LoadU(d, row + Mirror(x - 2, xsize));
+    const V l3 = LoadU(d, row + Mirror(x - 3, xsize));
+#else
+    (void)xsize;
+    const V l1 = Neighbors::FirstL1(c);
+    const V l2 = Neighbors::FirstL2(c);
+    const V l3 = Neighbors::FirstL3(c);
+#endif
+
+    const V r1 = LoadU(d, row + x + 1);
+    const V r2 = LoadU(d, row + x + 2);
+    const V r3 = LoadU(d, row + x + 3);
+
+    const V mul1 = MulAdd(l1 + r1, wh1, mul0);
+    const V mul2 = MulAdd(l2 + r2, wh2, mul1);
+    const V mul3 = MulAdd(l3 + r3, wh3, mul2);
+    return mul3;
+  }
+
+  template <size_t kSizeModN>
+  static JXL_INLINE V HorzConvolveLast(const float* const JXL_RESTRICT row,
+                                       const int64_t x, const int64_t xsize,
+                                       const V wh0, const V wh1, const V wh2,
+                                       const V wh3) {
+    const D d;
+    const V c = LoadU(d, row + x);
+    const V mul0 = c * wh0;
+
+    const V l1 = LoadU(d, row + x - 1);
+    const V l2 = LoadU(d, row + x - 2);
+    const V l3 = LoadU(d, row + x - 3);
+
+    V r1, r2, r3;
+#if HWY_TARGET == HWY_SCALAR
+    r1 = LoadU(d, row + Mirror(x + 1, xsize));
+    r2 = LoadU(d, row + Mirror(x + 2, xsize));
+    r3 = LoadU(d, row + Mirror(x + 3, xsize));
+#else
+    const size_t N = Lanes(d);
+    if (kSizeModN == 0) {
+      r3 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 3)));
+      r2 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 2)));
+      r1 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 1)));
+    } else if (kSizeModN == 1) {
+      const auto last = LoadU(d, row + xsize - N);
+      r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 2)));
+      r2 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
+      r1 = last;
+    } else /* kSizeModN >= 2 */ {
+      const auto last = LoadU(d, row + xsize - N);
+      r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
+      r2 = last;
+      r1 = LoadU(d, row + x + 1);
+    }
+#endif
+
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = l1 + r1;
+    const V mul1 = MulAdd(sum1, wh1, mul0);
+    const V sum2 = l2 + r2;
+    const V mul2 = MulAdd(sum2, wh2, mul1);
+    const V sum3 = l3 + r3;
+    const V mul3 = MulAdd(sum3, wh3, mul2);
+    return mul3;
+  }
+
+  // Returns one vector of horizontal convolution results; lane i is the result
+  // for pixel pos + i. This is the fast path for interior pixels, i.e. kRadius
+  // valid pixels before/after pos.
+  static JXL_INLINE V HorzConvolve(const float* const JXL_RESTRICT pos,
+                                   const V wh0, const V wh1, const V wh2,
+                                   const V wh3) {
+    const D d;
+    const V c = LoadU(d, pos);
+    const V mul0 = c * wh0;
+
+    // TODO(janwas): better to Combine
+    const V l1 = LoadU(d, pos - 1);
+    const V r1 = LoadU(d, pos + 1);
+    const V l2 = LoadU(d, pos - 2);
+    const V r2 = LoadU(d, pos + 2);
+    const V l3 = LoadU(d, pos - 3);
+    const V r3 = LoadU(d, pos + 3);
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = l1 + r1;
+    const V mul1 = MulAdd(sum1, wh1, mul0);
+    const V sum2 = l2 + r2;
+    const V mul2 = MulAdd(sum2, wh2, mul1);
+    const V sum3 = l3 + r3;
+    const V mul3 = MulAdd(sum3, wh3, mul2);
+    return mul3;
+  }
+};  // namespace HWY_NAMESPACE
+
+}  // namespace strategy
+
+// Single entry point for convolution.
+// "Strategy" (Direct*/Separable*) decides kernel size and how to evaluate it.
+template <class Strategy>
+class ConvolveT {
+  static constexpr int64_t kRadius = Strategy::kRadius;
+  using Simd = HWY_CAPPED(float, 16);
+
+ public:
+  static size_t MinWidth() {
+#if HWY_TARGET == HWY_SCALAR
+    // First/Last use mirrored loads of up to +/- kRadius.
+    return 2 * kRadius;
+#else
+    return Lanes(Simd()) + kRadius;
+#endif
+  }
+
+  // "Image" is ImageF or Image3F.
+  template <class Image, class Weights>
+  static void Run(const Image& in, const Rect& rect, const Weights& weights,
+                  ThreadPool* pool, Image* out) {
+    PROFILER_ZONE("ConvolveT::Run");
+    JXL_CHECK(SameSize(rect, *out));
+    JXL_CHECK(rect.xsize() >= MinWidth());
+
+    static_assert(int64_t(kRadius) <= 3,
+                  "Must handle [0, kRadius) and >= kRadius");
+    switch (rect.xsize() % Lanes(Simd())) {
+      case 0:
+        return RunRows<0>(in, rect, weights, pool, out);
+      case 1:
+        return RunRows<1>(in, rect, weights, pool, out);
+      case 2:
+        return RunRows<2>(in, rect, weights, pool, out);
+      default:
+        return RunRows<3>(in, rect, weights, pool, out);
+    }
+  }
+
+ private:
+  template <size_t kSizeModN, class WrapRow, class Weights>
+  static JXL_INLINE void RunRow(const float* JXL_RESTRICT in,
+                                const size_t xsize, const int64_t stride,
+                                const WrapRow& wrap_row, const Weights& weights,
+                                float* JXL_RESTRICT out) {
+    Strategy::template ConvolveRow<kSizeModN>(in, xsize, stride, wrap_row,
+                                              weights, out);
+  }
+
+  template <size_t kSizeModN, class Weights>
+  static JXL_INLINE void RunBorderRows(const ImageF& in, const Rect& rect,
+                                       const int64_t ybegin, const int64_t yend,
+                                       const Weights& weights, ImageF* out) {
+    const int64_t stride = in.PixelsPerRow();
+    const WrapRowMirror wrap_row(in, rect.ysize());
+    for (int64_t y = ybegin; y < yend; ++y) {
+      RunRow<kSizeModN>(rect.ConstRow(in, y), rect.xsize(), stride, wrap_row,
+                        weights, out->Row(y));
+    }
+  }
+
+  // Image3F.
+  template <size_t kSizeModN, class Weights>
+  static JXL_INLINE void RunBorderRows(const Image3F& in, const Rect& rect,
+                                       const int64_t ybegin, const int64_t yend,
+                                       const Weights& weights, Image3F* out) {
+    const int64_t stride = in.PixelsPerRow();
+    for (int64_t y = ybegin; y < yend; ++y) {
+      for (size_t c = 0; c < 3; ++c) {
+        const WrapRowMirror wrap_row(in.Plane(c), rect.ysize());
+        RunRow<kSizeModN>(rect.ConstPlaneRow(in, c, y), rect.xsize(), stride,
+                          wrap_row, weights, out->PlaneRow(c, y));
+      }
+    }
+  }
+
+  template <size_t kSizeModN, class Weights>
+  static JXL_INLINE void RunInteriorRows(const ImageF& in, const Rect& rect,
+                                         const int64_t ybegin,
+                                         const int64_t yend,
+                                         const Weights& weights,
+                                         ThreadPool* pool, ImageF* out) {
+    const int64_t stride = in.PixelsPerRow();
+    RunOnPool(
+        pool, ybegin, yend, ThreadPool::SkipInit(),
+        [&](const int y, int /*thread*/) HWY_ATTR {
+          RunRow<kSizeModN>(rect.ConstRow(in, y), rect.xsize(), stride,
+                            WrapRowUnchanged(), weights, out->Row(y));
+        },
+        "Convolve");
+  }
+
+  // Image3F.
+  template <size_t kSizeModN, class Weights>
+  static JXL_INLINE void RunInteriorRows(const Image3F& in, const Rect& rect,
+                                         const int64_t ybegin,
+                                         const int64_t yend,
+                                         const Weights& weights,
+                                         ThreadPool* pool, Image3F* out) {
+    const int64_t stride = in.PixelsPerRow();
+    RunOnPool(
+        pool, ybegin, yend, ThreadPool::SkipInit(),
+        [&](const int y, int /*thread*/) HWY_ATTR {
+          for (size_t c = 0; c < 3; ++c) {
+            RunRow<kSizeModN>(rect.ConstPlaneRow(in, c, y), rect.xsize(),
+                              stride, WrapRowUnchanged(), weights,
+                              out->PlaneRow(c, y));
+          }
+        },
+        "Convolve3");
+  }
+
+  template <size_t kSizeModN, class Image, class Weights>
+  static JXL_INLINE void RunRows(const Image& in, const Rect& rect,
+                                 const Weights& weights, ThreadPool* pool,
+                                 Image* out) {
+    const int64_t ysize = rect.ysize();
+    RunBorderRows<kSizeModN>(in, rect, 0, std::min(int64_t(kRadius), ysize),
+                             weights, out);
+    if (ysize > 2 * int64_t(kRadius)) {
+      RunInteriorRows<kSizeModN>(in, rect, int64_t(kRadius),
+                                 ysize - int64_t(kRadius), weights, pool, out);
+    }
+    if (ysize > int64_t(kRadius)) {
+      RunBorderRows<kSizeModN>(in, rect, ysize - int64_t(kRadius), ysize,
+                               weights, out);
+    }
+  }
+};
+
+void Symmetric3(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric3& weights, ThreadPool* pool,
+                ImageF* out) {
+  using Conv = ConvolveT<strategy::Symmetric3>;
+  if (rect.xsize() >= Conv::MinWidth()) {
+    return Conv::Run(in, rect, weights, pool, out);
+  }
+
+  return SlowSymmetric3(in, rect, weights, pool, out);
+}
+
+// Symmetric5 is implemented above without ConvolveT.
+
+void Separable5(const ImageF& in, const Rect& rect,
+                const WeightsSeparable5& weights, ThreadPool* pool,
+                ImageF* out) {
+  using Conv = ConvolveT<strategy::Separable5>;
+  if (rect.xsize() >= Conv::MinWidth()) {
+    return Conv::Run(in, rect, weights, pool, out);
+  }
+
+  return SlowSeparable5(in, rect, weights, pool, out);
+}
+void Separable5_3(const Image3F& in, const Rect& rect,
+                  const WeightsSeparable5& weights, ThreadPool* pool,
+                  Image3F* out) {
+  using Conv = ConvolveT<strategy::Separable5>;
+  if (rect.xsize() >= Conv::MinWidth()) {
+    return Conv::Run(in, rect, weights, pool, out);
+  }
+
+  return SlowSeparable5(in, rect, weights, pool, out);
+}
+
+void Separable7(const ImageF& in, const Rect& rect,
+                const WeightsSeparable7& weights, ThreadPool* pool,
+                ImageF* out) {
+  using Conv = ConvolveT<strategy::Separable7>;
+  if (rect.xsize() >= Conv::MinWidth()) {
+    return Conv::Run(in, rect, weights, pool, out);
+  }
+
+  return SlowSeparable7(in, rect, weights, pool, out);
+}
+void Separable7_3(const Image3F& in, const Rect& rect,
+                  const WeightsSeparable7& weights, ThreadPool* pool,
+                  Image3F* out) {
+  using Conv = ConvolveT<strategy::Separable7>;
+  if (rect.xsize() >= Conv::MinWidth()) {
+    return Conv::Run(in, rect, weights, pool, out);
+  }
+
+  return SlowSeparable7(in, rect, weights, pool, out);
+}
+
+// Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike
+// the fully vectorized strategies below.
+void Symmetric5(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric5& weights, ThreadPool* pool,
+                ImageF* JXL_RESTRICT out) {
+  PROFILER_FUNC;
+
+  const size_t ysize = rect.ysize();
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+      [&](const int task, int /*thread*/) {
+        const int64_t iy = task;
+
+        if (iy < 2 || iy >= static_cast<ssize_t>(ysize) - 2) {
+          Symmetric5BorderRow(in, rect, iy, weights, out->Row(iy));
+        } else {
+          Symmetric5Row<WrapUnchanged>(in, rect, iy, weights, out->Row(iy));
+        }
+      },
+      "Symmetric5x5Convolution");
+}
+
+void Symmetric5_3(const Image3F& in, const Rect& rect,
+                  const WeightsSymmetric5& weights, ThreadPool* pool,
+                  Image3F* JXL_RESTRICT out) {
+  PROFILER_FUNC;
+
+  const size_t ysize = rect.ysize();
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+      [&](const int task, int /*thread*/) {
+        const size_t iy = task;
+
+        if (iy < 2 || iy >= ysize - 2) {
+          for (size_t c = 0; c < 3; ++c) {
+            Symmetric5BorderRow(in.Plane(c), rect, iy, weights,
+                                out->PlaneRow(c, iy));
+          }
+        } else {
+          for (size_t c = 0; c < 3; ++c) {
+            Symmetric5Row<WrapUnchanged>(in.Plane(c), rect, iy, weights,
+                                         out->PlaneRow(c, iy));
+          }
+        }
+      },
+      "Symmetric5x5Convolution3");
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Symmetric3);
+void Symmetric3(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric3& weights, ThreadPool* pool,
+                ImageF* out) {
+  return HWY_DYNAMIC_DISPATCH(Symmetric3)(in, rect, weights, pool, out);
+}
+
+HWY_EXPORT(Symmetric5);
+void Symmetric5(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric5& weights, ThreadPool* pool,
+                ImageF* JXL_RESTRICT out) {
+  return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, rect, weights, pool, out);
+}
+
+HWY_EXPORT(Symmetric5_3);
+void Symmetric5_3(const Image3F& in, const Rect& rect,
+                  const WeightsSymmetric5& weights, ThreadPool* pool,
+                  Image3F* JXL_RESTRICT out) {
+  return HWY_DYNAMIC_DISPATCH(Symmetric5_3)(in, rect, weights, pool, out);
+}
+
+HWY_EXPORT(Separable5);
+void Separable5(const ImageF& in, const Rect& rect,
+                const WeightsSeparable5& weights, ThreadPool* pool,
+                ImageF* out) {
+  return HWY_DYNAMIC_DISPATCH(Separable5)(in, rect, weights, pool, out);
+}
+
+HWY_EXPORT(Separable5_3);
+void Separable5_3(const Image3F& in, const Rect& rect,
+                  const WeightsSeparable5& weights, ThreadPool* pool,
+                  Image3F* out) {
+  return HWY_DYNAMIC_DISPATCH(Separable5_3)(in, rect, weights, pool, out);
+}
+
+HWY_EXPORT(Separable7);
+void Separable7(const ImageF& in, const Rect& rect,
+                const WeightsSeparable7& weights, ThreadPool* pool,
+                ImageF* out) {
+  return HWY_DYNAMIC_DISPATCH(Separable7)(in, rect, weights, pool, out);
+}
+
+HWY_EXPORT(Separable7_3);
+void Separable7_3(const Image3F& in, const Rect& rect,
+                  const WeightsSeparable7& weights, ThreadPool* pool,
+                  Image3F* out) {
+  return HWY_DYNAMIC_DISPATCH(Separable7_3)(in, rect, weights, pool, out);
+}
+
+//------------------------------------------------------------------------------
+// Kernels
+
+// Concentrates energy in low-frequency components (e.g. for antialiasing).
+const WeightsSymmetric3& WeightsSymmetric3Lowpass() {
+  // Computed by research/convolve_weights.py's cubic spline approximations of
+  // prolate spheroidal wave functions.
+  constexpr float w0 = 0.36208932f;
+  constexpr float w1 = 0.12820096f;
+  constexpr float w2 = 0.03127668f;
+  static constexpr WeightsSymmetric3 weights = {
+      {HWY_REP4(w0)}, {HWY_REP4(w1)}, {HWY_REP4(w2)}};
+  return weights;
+}
+
+const WeightsSeparable5& WeightsSeparable5Lowpass() {
+  constexpr float w0 = 0.41714928f;
+  constexpr float w1 = 0.25539268f;
+  constexpr float w2 = 0.03603267f;
+  static constexpr WeightsSeparable5 weights = {
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}};
+  return weights;
+}
+
+const WeightsSymmetric5& WeightsSymmetric5Lowpass() {
+  static constexpr WeightsSymmetric5 weights = {
+      {HWY_REP4(0.1740135f)}, {HWY_REP4(0.1065369f)}, {HWY_REP4(0.0150310f)},
+      {HWY_REP4(0.0652254f)}, {HWY_REP4(0.0012984f)}, {HWY_REP4(0.0092025f)}};
+  return weights;
+}
+
+const WeightsSeparable5& WeightsSeparable5Gaussian1() {
+  constexpr float w0 = 0.38774f;
+  constexpr float w1 = 0.24477f;
+  constexpr float w2 = 0.06136f;
+  static constexpr WeightsSeparable5 weights = {
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}};
+  return weights;
+}
+
+const WeightsSeparable5& WeightsSeparable5Gaussian2() {
+  constexpr float w0 = 0.250301f;
+  constexpr float w1 = 0.221461f;
+  constexpr float w2 = 0.153388f;
+  static constexpr WeightsSeparable5 weights = {
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}};
+  return weights;
+}
+
+//------------------------------------------------------------------------------
+// Slow
+
+namespace {
+
+template <class WrapX, class WrapY>
+float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy,
+                          const int64_t xsize, const int64_t ysize,
+                          const WeightsSymmetric3& weights) {
+  float sum = 0.0f;
+
+  // ix: image; kx: kernel
+  for (int64_t ky = -1; ky <= 1; ky++) {
+    const int64_t y = WrapY()(iy + ky, ysize);
+    const float* JXL_RESTRICT row_in = in.ConstRow(static_cast<size_t>(y));
+
+    const float wc = ky == 0 ? weights.c[0] : weights.r[0];
+    const float wlr = ky == 0 ? weights.r[0] : weights.d[0];
+
+    const int64_t xm1 = WrapX()(ix - 1, xsize);
+    const int64_t xp1 = WrapX()(ix + 1, xsize);
+    sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr;
+  }
+  return sum;
+}
+
+template <class WrapY>
+void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize,
+                       const int64_t ysize, const WeightsSymmetric3& weights,
+                       float* JXL_RESTRICT row_out) {
+  row_out[0] =
+      SlowSymmetric3Pixel<WrapMirror, WrapY>(in, 0, iy, xsize, ysize, weights);
+  for (int64_t ix = 1; ix < xsize - 1; ix++) {
+    row_out[ix] = SlowSymmetric3Pixel<WrapUnchanged, WrapY>(in, ix, iy, xsize,
+                                                            ysize, weights);
+  }
+  {
+    const int64_t ix = xsize - 1;
+    row_out[ix] = SlowSymmetric3Pixel<WrapMirror, WrapY>(in, ix, iy, xsize,
+                                                         ysize, weights);
+  }
+}
+
+}  // namespace
+
+void SlowSymmetric3(const ImageF& in, const Rect& rect,
+                    const WeightsSymmetric3& weights, ThreadPool* pool,
+                    ImageF* JXL_RESTRICT out) {
+  PROFILER_FUNC;
+
+  const int64_t xsize = static_cast<int64_t>(rect.xsize());
+  const int64_t ysize = static_cast<int64_t>(rect.ysize());
+  const int64_t kRadius = 1;
+
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+      [&](const int task, int /*thread*/) {
+        const int64_t iy = task;
+        float* JXL_RESTRICT out_row = out->Row(static_cast<size_t>(iy));
+
+        if (iy < kRadius || iy >= ysize - kRadius) {
+          SlowSymmetric3Row<WrapMirror>(in, iy, xsize, ysize, weights, out_row);
+        } else {
+          SlowSymmetric3Row<WrapUnchanged>(in, iy, xsize, ysize, weights,
+                                           out_row);
+        }
+      },
+      "SlowSymmetric3");
+}
+
+void SlowSymmetric3(const Image3F& in, const Rect& rect,
+                    const WeightsSymmetric3& weights, ThreadPool* pool,
+                    Image3F* JXL_RESTRICT out) {
+  PROFILER_FUNC;
+
+  const int64_t xsize = static_cast<int64_t>(rect.xsize());
+  const int64_t ysize = static_cast<int64_t>(rect.ysize());
+  const int64_t kRadius = 1;
+
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+      [&](const int task, int /*thread*/) {
+        const int64_t iy = task;
+        const size_t oy = static_cast<size_t>(iy);
+
+        if (iy < kRadius || iy >= ysize - kRadius) {
+          for (size_t c = 0; c < 3; ++c) {
+            SlowSymmetric3Row<WrapMirror>(in.Plane(c), iy, xsize, ysize,
+                                          weights, out->PlaneRow(c, oy));
+          }
+        } else {
+          for (size_t c = 0; c < 3; ++c) {
+            SlowSymmetric3Row<WrapUnchanged>(in.Plane(c), iy, xsize, ysize,
+                                             weights, out->PlaneRow(c, oy));
+          }
+        }
+      },
+      "SlowSymmetric3");
+}
+
+namespace {
+
+// Separable kernels, any radius.
+float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x,
+                         const int64_t y, const int64_t radius,
+                         const float* JXL_RESTRICT horz_weights,
+                         const float* JXL_RESTRICT vert_weights) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  const WrapMirror wrap;
+
+  float mul = 0.0f;
+  for (int dy = -radius; dy <= radius; ++dy) {
+    const float wy = vert_weights[std::abs(dy) * 4];
+    const size_t sy = wrap(y + dy, ysize);
+    JXL_CHECK(sy < ysize);
+    const float* const JXL_RESTRICT row = rect.ConstRow(in, sy);
+    for (int dx = -radius; dx <= radius; ++dx) {
+      const float wx = horz_weights[std::abs(dx) * 4];
+      const size_t sx = wrap(x + dx, xsize);
+      JXL_CHECK(sx < xsize);
+      mul += row[sx] * wx * wy;
+    }
+  }
+  return mul;
+}
+
+}  // namespace
+
+void SlowSeparable5(const ImageF& in, const Rect& rect,
+                    const WeightsSeparable5& weights, ThreadPool* pool,
+                    ImageF* out) {
+  PROFILER_FUNC;
+  const float* horz_weights = &weights.horz[0];
+  const float* vert_weights = &weights.vert[0];
+
+  const size_t ysize = rect.ysize();
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+      [&](const int task, int /*thread*/) {
+        const int64_t y = task;
+
+        float* const JXL_RESTRICT row_out = out->Row(y);
+        for (size_t x = 0; x < rect.xsize(); ++x) {
+          row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/2,
+                                          horz_weights, vert_weights);
+        }
+      },
+      "SlowSeparable5");
+}
+
+void SlowSeparable5(const Image3F& in, const Rect& rect,
+                    const WeightsSeparable5& weights, ThreadPool* pool,
+                    Image3F* out) {
+  for (size_t c = 0; c < 3; ++c) {
+    SlowSeparable5(in.Plane(c), rect, weights, pool, &out->Plane(c));
+  }
+}
+
+void SlowSeparable7(const ImageF& in, const Rect& rect,
+                    const WeightsSeparable7& weights, ThreadPool* pool,
+                    ImageF* out) {
+  PROFILER_FUNC;
+  const float* horz_weights = &weights.horz[0];
+  const float* vert_weights = &weights.vert[0];
+
+  const size_t ysize = rect.ysize();
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+      [&](const int task, int /*thread*/) {
+        const int64_t y = task;
+
+        float* const JXL_RESTRICT row_out = out->Row(y);
+        for (size_t x = 0; x < rect.xsize(); ++x) {
+          row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/3,
+                                          horz_weights, vert_weights);
+        }
+      },
+      "SlowSeparable7");
+}
+
+void SlowSeparable7(const Image3F& in, const Rect& rect,
+                    const WeightsSeparable7& weights, ThreadPool* pool,
+                    Image3F* out) {
+  for (size_t c = 0; c < 3; ++c) {
+    SlowSeparable7(in.Plane(c), rect, weights, pool, &out->Plane(c));
+  }
+}
+
+void SlowLaplacian5(const ImageF& in, const Rect& rect, ThreadPool* pool,
+                    ImageF* out) {
+  PROFILER_FUNC;
+  JXL_CHECK(SameSize(rect, *out));
+
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  const WrapMirror wrap;
+
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+      [&](const int task, int /*thread*/) {
+        const int64_t y = task;
+
+        const float* const JXL_RESTRICT row_t =
+            rect.ConstRow(in, wrap(y - 2, ysize));
+        const float* const JXL_RESTRICT row_m = rect.ConstRow(in, y);
+        const float* const JXL_RESTRICT row_b =
+            rect.ConstRow(in, wrap(y + 2, ysize));
+        float* const JXL_RESTRICT row_out = out->Row(y);
+
+        for (int64_t x = 0; static_cast<size_t>(x) < xsize; ++x) {
+          const int64_t xm2 = wrap(x - 2, xsize);
+          const int64_t xp2 = wrap(x + 2, xsize);
+          float r = 0.0f;
+          r += /*               */ 1.0f * row_t[x];
+          r += 1.0f * row_m[xm2] - 4.0f * row_m[x] + 1.0f * row_m[xp2];
+          r += /*               */ 1.0f * row_b[x];
+          row_out[x] = r;
+        }
+      },
+      "SlowLaplacian5");
+}
+
+void SlowLaplacian5(const Image3F& in, const Rect& rect, ThreadPool* pool,
+                    Image3F* out) {
+  for (size_t c = 0; c < 3; ++c) {
+    SlowLaplacian5(in.Plane(c), rect, pool, &out->Plane(c));
+  }
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.h
new file mode 100644
index 0000000000..c2e2ae42fb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve.h
@@ -0,0 +1,131 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_CONVOLVE_H_
+#define LIB_JXL_CONVOLVE_H_
+
+// 2D convolution.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// No valid values outside [0, xsize), but the strategy may still safely load
+// the preceding vector, and/or round xsize up to the vector lane count. This
+// avoids needing PadImage.
+// Requires xsize >= kConvolveLanes + kConvolveMaxRadius.
+static constexpr size_t kConvolveMaxRadius = 3;
+
+// Weights must already be normalized.
+
+struct WeightsSymmetric3 {
+  // d r d (each replicated 4x)
+  // r c r
+  // d r d
+  float c[4];
+  float r[4];
+  float d[4];
+};
+
+struct WeightsSymmetric5 {
+  // The lower-right quadrant is: c r R  (each replicated 4x)
+  //                              r d L
+  //                              R L D
+  float c[4];
+  float r[4];
+  float R[4];
+  float d[4];
+  float D[4];
+  float L[4];
+};
+
+// Weights for separable 5x5 filters (typically but not necessarily the same
+// values for horizontal and vertical directions). The kernel must already be
+// normalized, but note that values for negative offsets are omitted, so the
+// given values do not sum to 1.
+struct WeightsSeparable5 {
+  // Horizontal 1D, distances 0..2 (each replicated 4x)
+  float horz[3 * 4];
+  float vert[3 * 4];
+};
+
+// Weights for separable 7x7 filters (typically but not necessarily the same
+// values for horizontal and vertical directions). The kernel must already be
+// normalized, but note that values for negative offsets are omitted, so the
+// given values do not sum to 1.
+//
+// NOTE: for >= 7x7 Gaussian kernels, it is faster to use FastGaussian instead,
+// at least when images exceed the L1 cache size.
+struct WeightsSeparable7 {
+  // Horizontal 1D, distances 0..3 (each replicated 4x)
+  float horz[4 * 4];
+  float vert[4 * 4];
+};
+
+const WeightsSymmetric3& WeightsSymmetric3Lowpass();
+const WeightsSeparable5& WeightsSeparable5Lowpass();
+const WeightsSymmetric5& WeightsSymmetric5Lowpass();
+
+void SlowSymmetric3(const ImageF& in, const Rect& rect,
+                    const WeightsSymmetric3& weights, ThreadPool* pool,
+                    ImageF* JXL_RESTRICT out);
+void SlowSymmetric3(const Image3F& in, const Rect& rect,
+                    const WeightsSymmetric3& weights, ThreadPool* pool,
+                    Image3F* JXL_RESTRICT out);
+
+void SlowSeparable5(const ImageF& in, const Rect& rect,
+                    const WeightsSeparable5& weights, ThreadPool* pool,
+                    ImageF* out);
+void SlowSeparable5(const Image3F& in, const Rect& rect,
+                    const WeightsSeparable5& weights, ThreadPool* pool,
+                    Image3F* out);
+
+void SlowSeparable7(const ImageF& in, const Rect& rect,
+                    const WeightsSeparable7& weights, ThreadPool* pool,
+                    ImageF* out);
+void SlowSeparable7(const Image3F& in, const Rect& rect,
+                    const WeightsSeparable7& weights, ThreadPool* pool,
+                    Image3F* out);
+
+void SlowLaplacian5(const ImageF& in, const Rect& rect, ThreadPool* pool,
+                    ImageF* out);
+void SlowLaplacian5(const Image3F& in, const Rect& rect, ThreadPool* pool,
+                    Image3F* out);
+
+void Symmetric3(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric3& weights, ThreadPool* pool,
+                ImageF* out);
+
+void Symmetric5(const ImageF& in, const Rect& rect,
+                const WeightsSymmetric5& weights, ThreadPool* pool,
+                ImageF* JXL_RESTRICT out);
+
+void Symmetric5_3(const Image3F& in, const Rect& rect,
+                  const WeightsSymmetric5& weights, ThreadPool* pool,
+                  Image3F* JXL_RESTRICT out);
+
+void Separable5(const ImageF& in, const Rect& rect,
+                const WeightsSeparable5& weights, ThreadPool* pool,
+                ImageF* out);
+
+void Separable5_3(const Image3F& in, const Rect& rect,
+                  const WeightsSeparable5& weights, ThreadPool* pool,
+                  Image3F* out);
+
+void Separable7(const ImageF& in, const Rect& rect,
+                const WeightsSeparable7& weights, ThreadPool* pool,
+                ImageF* out);
+
+void Separable7_3(const Image3F& in, const Rect& rect,
+                  const WeightsSeparable7& weights, ThreadPool* pool,
+                  Image3F* out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_CONVOLVE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve_test.cc
new file mode 100644
index 0000000000..45e7e45f10
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/convolve_test.cc
@@ -0,0 +1,250 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#include <time.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/nanobenchmark.h>
+#include <hwy/tests/test_util-inl.h>
+#include <random>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+
+#ifndef JXL_DEBUG_CONVOLVE
+#define JXL_DEBUG_CONVOLVE 0
+#endif
+
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+void TestNeighbors() {
+  const Neighbors::D d;
+  const Neighbors::V v = Iota(d, 0);
+  HWY_ALIGN float actual[hwy::kTestMaxVectorSize / sizeof(float)] = {0};
+
+  HWY_ALIGN float first_l1[hwy::kTestMaxVectorSize / sizeof(float)] = {
+      0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14};
+  Store(Neighbors::FirstL1(v), d, actual);
+  const size_t N = Lanes(d);
+  EXPECT_EQ(std::vector<float>(first_l1, first_l1 + N),
+            std::vector<float>(actual, actual + N));
+
+#if HWY_TARGET != HWY_SCALAR
+  HWY_ALIGN float first_l2[hwy::kTestMaxVectorSize / sizeof(float)] = {
+      1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13};
+  Store(Neighbors::FirstL2(v), d, actual);
+  EXPECT_EQ(std::vector<float>(first_l2, first_l2 + N),
+            std::vector<float>(actual, actual + N));
+
+  HWY_ALIGN float first_l3[hwy::kTestMaxVectorSize / sizeof(float)] = {
+      2, 1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Store(Neighbors::FirstL3(v), d, actual);
+  EXPECT_EQ(std::vector<float>(first_l3, first_l3 + N),
+            std::vector<float>(actual, actual + N));
+#endif  // HWY_TARGET != HWY_SCALAR
+}
+
+template <class Random>
+void VerifySymmetric3(const size_t xsize, const size_t ysize, ThreadPool* pool,
+                      Random* rng) {
+  const Rect rect(0, 0, xsize, ysize);
+
+  ImageF in(xsize, ysize);
+  GenerateImage(GeneratorRandom<float, Random>(rng, 1.0f), &in);
+
+  ImageF out_expected(xsize, ysize);
+  ImageF out_actual(xsize, ysize);
+
+  const WeightsSymmetric3& weights = WeightsSymmetric3Lowpass();
+  Symmetric3(in, rect, weights, pool, &out_expected);
+  SlowSymmetric3(in, rect, weights, pool, &out_actual);
+
+  VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f);
+}
+
+// Ensures Symmetric and Separable give the same result.
+template <class Random>
+void VerifySymmetric5(const size_t xsize, const size_t ysize, ThreadPool* pool,
+                      Random* rng) {
+  const Rect rect(0, 0, xsize, ysize);
+
+  ImageF in(xsize, ysize);
+  GenerateImage(GeneratorRandom<float, Random>(rng, 1.0f), &in);
+
+  ImageF out_expected(xsize, ysize);
+  ImageF out_actual(xsize, ysize);
+
+  Separable5(in, Rect(in), WeightsSeparable5Lowpass(), pool, &out_expected);
+  Symmetric5(in, rect, WeightsSymmetric5Lowpass(), pool, &out_actual);
+
+  VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f);
+}
+
+template <class Random>
+void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool,
+                      Random* rng) {
+  const Rect rect(0, 0, xsize, ysize);
+
+  ImageF in(xsize, ysize);
+  GenerateImage(GeneratorRandom<float, Random>(rng, 1.0f), &in);
+
+  ImageF out_expected(xsize, ysize);
+  ImageF out_actual(xsize, ysize);
+
+  const WeightsSeparable5& weights = WeightsSeparable5Lowpass();
+  Separable5(in, Rect(in), weights, pool, &out_expected);
+  SlowSeparable5(in, rect, weights, pool, &out_actual);
+
+  VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f);
+}
+
+template <class Random>
+void VerifySeparable7(const size_t xsize, const size_t ysize, ThreadPool* pool,
+                      Random* rng) {
+  const Rect rect(0, 0, xsize, ysize);
+
+  ImageF in(xsize, ysize);
+  GenerateImage(GeneratorRandom<float, Random>(rng, 1.0f), &in);
+
+  ImageF out_expected(xsize, ysize);
+  ImageF out_actual(xsize, ysize);
+
+  // Gaussian sigma 1.0
+  const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+                                      HWY_REP4(0.060626f), HWY_REP4(0.00598f)},
+                                     {HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+                                      HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
+
+  SlowSeparable7(in, rect, weights, pool, &out_expected);
+  Separable7(in, Rect(in), weights, pool, &out_actual);
+
+  VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f);
+}
+
+// For all xsize/ysize and kernels:
+void TestConvolve() {
+  TestNeighbors();
+
+  ThreadPoolInternal pool(4);
+  pool.Run(kConvolveMaxRadius, 40, ThreadPool::SkipInit(),
+           [](const int task, int /*thread*/) {
+             const size_t xsize = task;
+             std::mt19937_64 rng(129 + 13 * xsize);
+
+             ThreadPool* null_pool = nullptr;
+             ThreadPoolInternal pool3(3);
+             for (size_t ysize = kConvolveMaxRadius; ysize < 16; ++ysize) {
+               JXL_DEBUG(JXL_DEBUG_CONVOLVE,
+                         "%zu x %zu (target %d)===============================",
+                         xsize, ysize, HWY_TARGET);
+
+               JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym3------------------");
+               VerifySymmetric3(xsize, ysize, null_pool, &rng);
+               VerifySymmetric3(xsize, ysize, &pool3, &rng);
+
+               JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym5------------------");
+               VerifySymmetric5(xsize, ysize, null_pool, &rng);
+               VerifySymmetric5(xsize, ysize, &pool3, &rng);
+
+               JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep5------------------");
+               VerifySeparable5(xsize, ysize, null_pool, &rng);
+               VerifySeparable5(xsize, ysize, &pool3, &rng);
+
+               JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep7------------------");
+               VerifySeparable7(xsize, ysize, null_pool, &rng);
+               VerifySeparable7(xsize, ysize, &pool3, &rng);
+             }
+           });
+}
+
+// Measures durations, verifies results, prints timings. `unpredictable1`
+// must have value 1 (unknown to the compiler to prevent elision).
+template <class Conv>
+void BenchmarkConv(const char* caption, const Conv& conv,
+                   const hwy::FuncInput unpredictable1) {
+  const size_t kNumInputs = 1;
+  const hwy::FuncInput inputs[kNumInputs] = {unpredictable1};
+  hwy::Result results[kNumInputs];
+
+  const size_t kDim = 160;  // in+out fit in L2
+  ImageF in(kDim, kDim);
+  ZeroFillImage(&in);
+  in.Row(kDim / 2)[kDim / 2] = unpredictable1;
+  ImageF out(kDim, kDim);
+
+  hwy::Params p;
+  p.verbose = false;
+  p.max_evals = 7;
+  p.target_rel_mad = 0.002;
+  const size_t num_results = MeasureClosure(
+      [&in, &conv, &out](const hwy::FuncInput input) {
+        conv(in, &out);
+        return out.Row(input)[0];
+      },
+      inputs, kNumInputs, results, p);
+  if (num_results != kNumInputs) {
+    fprintf(stderr, "MeasureClosure failed.\n");
+  }
+  for (size_t i = 0; i < num_results; ++i) {
+    const double seconds = static_cast<double>(results[i].ticks) /
+                           hwy::platform::InvariantTicksPerSecond();
+    printf("%12s: %7.2f MP/s (MAD=%4.2f%%)\n", caption,
+           kDim * kDim * 1E-6 / seconds,
+           static_cast<double>(results[i].variability) * 100.0);
+  }
+}
+
+struct ConvSymmetric3 {
+  void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const {
+    ThreadPool* null_pool = nullptr;
+    Symmetric3(in, Rect(in), WeightsSymmetric3Lowpass(), null_pool, out);
+  }
+};
+
+struct ConvSeparable5 {
+  void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const {
+    ThreadPool* null_pool = nullptr;
+    Separable5(in, Rect(in), WeightsSeparable5Lowpass(), null_pool, out);
+  }
+};
+
+void BenchmarkAll() {
+#if 0  // disabled to avoid test timeouts, run manually on demand
+  const hwy::FuncInput unpredictable1 = time(nullptr) != 1234;
+  BenchmarkConv("Symmetric3", ConvSymmetric3(), unpredictable1);
+  BenchmarkConv("Separable5", ConvSeparable5(), unpredictable1);
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class ConvolveTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(ConvolveTest);
+
+HWY_EXPORT_AND_TEST_P(ConvolveTest, TestConvolve);
+
+HWY_EXPORT_AND_TEST_P(ConvolveTest, BenchmarkAll);
+
+}  // namespace jxl
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/data_parallel_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/data_parallel_test.cc
new file mode 100644
index 0000000000..63db1f8ca0
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/data_parallel_test.cc
@@ -0,0 +1,111 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/data_parallel.h"
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/test_utils.h"
+
+namespace jxl {
+namespace {
+
+class DataParallelTest : public ::testing::Test {
+ protected:
+  // A fake class to verify that DataParallel is properly calling the
+  // client-provided runner functions.
+  static int FakeRunner(void* runner_opaque, void* jpegxl_opaque,
+                        JxlParallelRunInit init, JxlParallelRunFunction func,
+                        uint32_t start_range, uint32_t end_range) {
+    DataParallelTest* self = static_cast<DataParallelTest*>(runner_opaque);
+    self->runner_called_++;
+    self->jpegxl_opaque_ = jpegxl_opaque;
+    self->init_ = init;
+    self->func_ = func;
+    self->start_range_ = start_range;
+    self->end_range_ = end_range;
+    return self->runner_return_;
+  }
+
+  ThreadPool pool_{&DataParallelTest::FakeRunner, this};
+
+  // Number of times FakeRunner() was called.
+  int runner_called_ = 0;
+
+  // Parameters passed to FakeRunner.
+  void* jpegxl_opaque_ = nullptr;
+  JxlParallelRunInit init_ = nullptr;
+  JxlParallelRunFunction func_ = nullptr;
+  uint32_t start_range_ = -1;
+  uint32_t end_range_ = -1;
+
+  // Return value that FakeRunner will return.
+  int runner_return_ = 0;
+};
+
+// JxlParallelRunInit interface.
+typedef int (*JxlParallelRunInit)();
+int TestInit(void* jpegxl_opaque, size_t num_threads) { return 0; }
+
+}  // namespace
+
+TEST_F(DataParallelTest, RunnerCalledParamenters) {
+  EXPECT_TRUE(pool_.Run(
+      1234, 5678, [](const size_t num_threads) { return true; },
+      [](const int task, const int thread) { return; }));
+  EXPECT_EQ(1, runner_called_);
+  EXPECT_NE(nullptr, init_);
+  EXPECT_NE(nullptr, func_);
+  EXPECT_NE(nullptr, jpegxl_opaque_);
+  EXPECT_EQ(1234u, start_range_);
+  EXPECT_EQ(5678u, end_range_);
+}
+
+TEST_F(DataParallelTest, RunnerFailurePropagates) {
+  runner_return_ = -1;  // FakeRunner return value.
+  EXPECT_FALSE(pool_.Run(
+      1234, 5678, [](const size_t num_threads) { return false; },
+      [](const int task, const int thread) { return; }));
+  EXPECT_FALSE(RunOnPool(
+      nullptr, 1234, 5678, [](const size_t num_threads) { return false; },
+      [](const int task, const int thread) { return; }, "Test"));
+}
+
+TEST_F(DataParallelTest, RunnerNotCalledOnEmptyRange) {
+  runner_return_ = -1;  // FakeRunner return value.
+  EXPECT_TRUE(pool_.Run(
+      123, 123, [](const size_t num_threads) { return false; },
+      [](const int task, const int thread) { return; }));
+  EXPECT_TRUE(RunOnPool(
+      nullptr, 123, 123, [](const size_t num_threads) { return false; },
+      [](const int task, const int thread) { return; }, "Test"));
+  // We don't call the external runner when the range is empty. We don't even
+  // need to call the init function.
+  EXPECT_EQ(0, runner_called_);
+}
+
+// The TestDivider is slow when compiled in debug mode.
+TEST_F(DataParallelTest, JXL_SLOW_TEST(TestDivider)) {
+  jxl::ThreadPoolInternal pool(8);
+  // 1, 2 are powers of two.
+  pool.Run(3, 2 * 1024, ThreadPool::SkipInit(),
+           [](const int d, const int thread) {
+             // powers of two are not supported.
+             if ((d & (d - 1)) == 0) return;
+
+             const Divider div(d);
+#ifdef NDEBUG
+             const int max_dividend = 4 * 1024 * 1024;
+#else
+             const int max_dividend = 2 * 1024 + 1;
+#endif
+             for (int x = 0; x < max_dividend; ++x) {
+               const int q = div(x);
+               ASSERT_EQ(x / d, q) << x << "/" << d;
+             }
+           });
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct-inl.h
new file mode 100644
index 0000000000..ecc3935a5d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct-inl.h
@@ -0,0 +1,361 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD floating-point (I)DCT, any power of two.
+
+#if defined(LIB_JXL_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DCT_INL_H_
+#undef LIB_JXL_DCT_INL_H_
+#else
+#define LIB_JXL_DCT_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_block-inl.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/transpose-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+template <size_t SZ>
+struct FVImpl {
+  using type = HWY_CAPPED(float, SZ);
+};
+
+template <>
+struct FVImpl<0> {
+  using type = HWY_FULL(float);
+};
+
+template <size_t SZ>
+using FV = typename FVImpl<SZ>::type;
+
+// Implementation of Lowest Complexity Self Recursive Radix-2 DCT II/III
+// Algorithms, by Siriani M. Perera and Jianhua Liu.
+
+template <size_t N, size_t SZ>
+struct CoeffBundle {
+  static void AddReverse(const float* JXL_RESTRICT ain1,
+                         const float* JXL_RESTRICT ain2,
+                         float* JXL_RESTRICT aout) {
+    for (size_t i = 0; i < N; i++) {
+      auto in1 = Load(FV<SZ>(), ain1 + i * SZ);
+      auto in2 = Load(FV<SZ>(), ain2 + (N - i - 1) * SZ);
+      Store(in1 + in2, FV<SZ>(), aout + i * SZ);
+    }
+  }
+  static void SubReverse(const float* JXL_RESTRICT ain1,
+                         const float* JXL_RESTRICT ain2,
+                         float* JXL_RESTRICT aout) {
+    for (size_t i = 0; i < N; i++) {
+      auto in1 = Load(FV<SZ>(), ain1 + i * SZ);
+      auto in2 = Load(FV<SZ>(), ain2 + (N - i - 1) * SZ);
+      Store(in1 - in2, FV<SZ>(), aout + i * SZ);
+    }
+  }
+  static void B(float* JXL_RESTRICT coeff) {
+    auto sqrt2 = Set(FV<SZ>(), square_root<2>::value);
+    auto in1 = Load(FV<SZ>(), coeff);
+    auto in2 = Load(FV<SZ>(), coeff + SZ);
+    Store(MulAdd(in1, sqrt2, in2), FV<SZ>(), coeff);
+    for (size_t i = 1; i + 1 < N; i++) {
+      auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+      auto in2 = Load(FV<SZ>(), coeff + (i + 1) * SZ);
+      Store(in1 + in2, FV<SZ>(), coeff + i * SZ);
+    }
+  }
+  static void BTranspose(float* JXL_RESTRICT coeff) {
+    for (size_t i = N - 1; i > 0; i--) {
+      auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+      auto in2 = Load(FV<SZ>(), coeff + (i - 1) * SZ);
+      Store(in1 + in2, FV<SZ>(), coeff + i * SZ);
+    }
+    auto sqrt2 = Set(FV<SZ>(), square_root<2>::value);
+    auto in1 = Load(FV<SZ>(), coeff);
+    Store(in1 * sqrt2, FV<SZ>(), coeff);
+  }
+  // Ideally optimized away by compiler (except the multiply).
+  static void InverseEvenOdd(const float* JXL_RESTRICT ain,
+                             float* JXL_RESTRICT aout) {
+    for (size_t i = 0; i < N / 2; i++) {
+      auto in1 = Load(FV<SZ>(), ain + i * SZ);
+      Store(in1, FV<SZ>(), aout + 2 * i * SZ);
+    }
+    for (size_t i = N / 2; i < N; i++) {
+      auto in1 = Load(FV<SZ>(), ain + i * SZ);
+      Store(in1, FV<SZ>(), aout + (2 * (i - N / 2) + 1) * SZ);
+    }
+  }
+  // Ideally optimized away by compiler.
+  static void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride,
+                             float* JXL_RESTRICT aout) {
+    for (size_t i = 0; i < N / 2; i++) {
+      auto in1 = LoadU(FV<SZ>(), ain + 2 * i * ain_stride);
+      Store(in1, FV<SZ>(), aout + i * SZ);
+    }
+    for (size_t i = N / 2; i < N; i++) {
+      auto in1 = LoadU(FV<SZ>(), ain + (2 * (i - N / 2) + 1) * ain_stride);
+      Store(in1, FV<SZ>(), aout + i * SZ);
+    }
+  }
+  // Invoked on full vector.
+  static void Multiply(float* JXL_RESTRICT coeff) {
+    for (size_t i = 0; i < N / 2; i++) {
+      auto in1 = Load(FV<SZ>(), coeff + (N / 2 + i) * SZ);
+      auto mul = Set(FV<SZ>(), WcMultipliers<N>::kMultipliers[i]);
+      Store(in1 * mul, FV<SZ>(), coeff + (N / 2 + i) * SZ);
+    }
+  }
+  static void MultiplyAndAdd(const float* JXL_RESTRICT coeff,
+                             float* JXL_RESTRICT out, size_t out_stride) {
+    for (size_t i = 0; i < N / 2; i++) {
+      auto mul = Set(FV<SZ>(), WcMultipliers<N>::kMultipliers[i]);
+      auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+      auto in2 = Load(FV<SZ>(), coeff + (N / 2 + i) * SZ);
+      auto out1 = MulAdd(mul, in2, in1);
+      auto out2 = NegMulAdd(mul, in2, in1);
+      StoreU(out1, FV<SZ>(), out + i * out_stride);
+      StoreU(out2, FV<SZ>(), out + (N - i - 1) * out_stride);
+    }
+  }
+  template <typename Block>
+  static void LoadFromBlock(const Block& in, size_t off,
+                            float* JXL_RESTRICT coeff) {
+    for (size_t i = 0; i < N; i++) {
+      Store(in.LoadPart(FV<SZ>(), i, off), FV<SZ>(), coeff + i * SZ);
+    }
+  }
+  template <typename Block>
+  static void StoreToBlockAndScale(const float* JXL_RESTRICT coeff,
+                                   const Block& out, size_t off) {
+    auto mul = Set(FV<SZ>(), 1.0f / N);
+    for (size_t i = 0; i < N; i++) {
+      out.StorePart(FV<SZ>(), mul * Load(FV<SZ>(), coeff + i * SZ), i, off);
+    }
+  }
+};
+
+template <size_t N, size_t SZ>
+struct DCT1DImpl;
+
+template <size_t SZ>
+struct DCT1DImpl<1, SZ> {
+  JXL_INLINE void operator()(float* JXL_RESTRICT mem) {}
+};
+
+template <size_t SZ>
+struct DCT1DImpl<2, SZ> {
+  JXL_INLINE void operator()(float* JXL_RESTRICT mem) {
+    auto in1 = Load(FV<SZ>(), mem);
+    auto in2 = Load(FV<SZ>(), mem + SZ);
+    Store(in1 + in2, FV<SZ>(), mem);
+    Store(in1 - in2, FV<SZ>(), mem + SZ);
+  }
+};
+
+template <size_t N, size_t SZ>
+struct DCT1DImpl {
+  void operator()(float* JXL_RESTRICT mem) {
+    // This is relatively small (4kB with 64-DCT and AVX-512)
+    HWY_ALIGN float tmp[N * SZ];
+    CoeffBundle<N / 2, SZ>::AddReverse(mem, mem + N / 2 * SZ, tmp);
+    DCT1DImpl<N / 2, SZ>()(tmp);
+    CoeffBundle<N / 2, SZ>::SubReverse(mem, mem + N / 2 * SZ, tmp + N / 2 * SZ);
+    CoeffBundle<N, SZ>::Multiply(tmp);
+    DCT1DImpl<N / 2, SZ>()(tmp + N / 2 * SZ);
+    CoeffBundle<N / 2, SZ>::B(tmp + N / 2 * SZ);
+    CoeffBundle<N, SZ>::InverseEvenOdd(tmp, mem);
+  }
+};
+
+template <size_t N, size_t SZ>
+struct IDCT1DImpl;
+
+template <size_t SZ>
+struct IDCT1DImpl<1, SZ> {
+  JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+                             size_t to_stride) {
+    StoreU(LoadU(FV<SZ>(), from), FV<SZ>(), to);
+  }
+};
+
+template <size_t SZ>
+struct IDCT1DImpl<2, SZ> {
+  JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+                             size_t to_stride) {
+    JXL_DASSERT(from_stride >= SZ);
+    JXL_DASSERT(to_stride >= SZ);
+    auto in1 = LoadU(FV<SZ>(), from);
+    auto in2 = LoadU(FV<SZ>(), from + from_stride);
+    StoreU(in1 + in2, FV<SZ>(), to);
+    StoreU(in1 - in2, FV<SZ>(), to + to_stride);
+  }
+};
+
+template <size_t N, size_t SZ>
+struct IDCT1DImpl {
+  void operator()(const float* from, size_t from_stride, float* to,
+                  size_t to_stride) {
+    JXL_DASSERT(from_stride >= SZ);
+    JXL_DASSERT(to_stride >= SZ);
+    // This is relatively small (4kB with 64-DCT and AVX-512)
+    HWY_ALIGN float tmp[N * SZ];
+    CoeffBundle<N, SZ>::ForwardEvenOdd(from, from_stride, tmp);
+    IDCT1DImpl<N / 2, SZ>()(tmp, SZ, tmp, SZ);
+    CoeffBundle<N / 2, SZ>::BTranspose(tmp + N / 2 * SZ);
+    IDCT1DImpl<N / 2, SZ>()(tmp + N / 2 * SZ, SZ, tmp + N / 2 * SZ, SZ);
+    CoeffBundle<N, SZ>::MultiplyAndAdd(tmp, to, to_stride);
+  }
+};
+
+template <size_t N, size_t M_or_0, typename FromBlock, typename ToBlock>
+void DCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) {
+  size_t M = M_or_0 != 0 ? M_or_0 : Mp;
+  constexpr size_t SZ = MaxLanes(FV<M_or_0>());
+  HWY_ALIGN float tmp[N * SZ];
+  for (size_t i = 0; i < M; i += Lanes(FV<M_or_0>())) {
+    // TODO(veluca): consider removing the temporary memory here (as is done in
+    // IDCT), if it turns out that some compilers don't optimize away the loads
+    // and this is performance-critical.
+    CoeffBundle<N, SZ>::LoadFromBlock(from, i, tmp);
+    DCT1DImpl<N, SZ>()(tmp);
+    CoeffBundle<N, SZ>::StoreToBlockAndScale(tmp, to, i);
+  }
+}
+
+template <size_t N, size_t M_or_0, typename FromBlock, typename ToBlock>
+void IDCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) {
+  size_t M = M_or_0 != 0 ? M_or_0 : Mp;
+  constexpr size_t SZ = MaxLanes(FV<M_or_0>());
+  for (size_t i = 0; i < M; i += Lanes(FV<M_or_0>())) {
+    IDCT1DImpl<N, SZ>()(from.Address(0, i), from.Stride(), to.Address(0, i),
+                        to.Stride());
+  }
+}
+
+template <size_t N, size_t M, typename = void>
+struct DCT1D {
+  template <typename FromBlock, typename ToBlock>
+  void operator()(const FromBlock& from, const ToBlock& to) {
+    return DCT1DWrapper<N, M>(from, to, M);
+  }
+};
+
+template <size_t N, size_t M>
+struct DCT1D<N, M, typename std::enable_if<(M > MaxLanes(FV<0>()))>::type> {
+  template <typename FromBlock, typename ToBlock>
+  void operator()(const FromBlock& from, const ToBlock& to) {
+    return NoInlineWrapper(DCT1DWrapper<N, 0, FromBlock, ToBlock>, from, to, M);
+  }
+};
+
+template <size_t N, size_t M, typename = void>
+struct IDCT1D {
+  template <typename FromBlock, typename ToBlock>
+  void operator()(const FromBlock& from, const ToBlock& to) {
+    return IDCT1DWrapper<N, M>(from, to, M);
+  }
+};
+
+template <size_t N, size_t M>
+struct IDCT1D<N, M, typename std::enable_if<(M > MaxLanes(FV<0>()))>::type> {
+  template <typename FromBlock, typename ToBlock>
+  void operator()(const FromBlock& from, const ToBlock& to) {
+    return NoInlineWrapper(IDCT1DWrapper<N, 0, FromBlock, ToBlock>, from, to,
+                           M);
+  }
+};
+
+// Computes the in-place NxN transposed-scaled-DCT (tsDCT) of block.
+// Requires that block is HWY_ALIGN'ed.
+//
+// See also DCTSlow, ComputeDCT
+template <size_t N>
+struct ComputeTransposedScaledDCT {
+  // scratch_space must be aligned, and should have space for N*N floats.
+  template <class From>
+  HWY_MAYBE_UNUSED void operator()(const From& from, float* JXL_RESTRICT to,
+                                   float* JXL_RESTRICT scratch_space) {
+    float* JXL_RESTRICT block = scratch_space;
+    DCT1D<N, N>()(from, DCTTo(to, N));
+    Transpose<N, N>::Run(DCTFrom(to, N), DCTTo(block, N));
+    DCT1D<N, N>()(DCTFrom(block, N), DCTTo(to, N));
+  }
+};
+
+// Computes the in-place NxN transposed-scaled-iDCT (tsIDCT)of block.
+// Requires that block is HWY_ALIGN'ed.
+//
+// See also IDCTSlow, ComputeIDCT.
+
+template <size_t N>
+struct ComputeTransposedScaledIDCT {
+  // scratch_space must be aligned, and should have space for N*N floats.
+  template <class To>
+  HWY_MAYBE_UNUSED void operator()(float* JXL_RESTRICT from, const To& to,
+                                   float* JXL_RESTRICT scratch_space) {
+    float* JXL_RESTRICT block = scratch_space;
+    IDCT1D<N, N>()(DCTFrom(from, N), DCTTo(block, N));
+    Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(from, N));
+    IDCT1D<N, N>()(DCTFrom(from, N), to);
+  }
+};
+// Computes the non-transposed, scaled DCT of a block, that needs to be
+// HWY_ALIGN'ed. Used for rectangular blocks.
+template <size_t ROWS, size_t COLS>
+struct ComputeScaledDCT {
+  // scratch_space must be aligned, and should have space for ROWS*COLS
+  // floats.
+  template <class From>
+  HWY_MAYBE_UNUSED void operator()(const From& from, float* to,
+                                   float* JXL_RESTRICT scratch_space) {
+    float* JXL_RESTRICT block = scratch_space;
+    if (ROWS < COLS) {
+      DCT1D<ROWS, COLS>()(from, DCTTo(block, COLS));
+      Transpose<ROWS, COLS>::Run(DCTFrom(block, COLS), DCTTo(to, ROWS));
+      DCT1D<COLS, ROWS>()(DCTFrom(to, ROWS), DCTTo(block, ROWS));
+      Transpose<COLS, ROWS>::Run(DCTFrom(block, ROWS), DCTTo(to, COLS));
+    } else {
+      DCT1D<ROWS, COLS>()(from, DCTTo(to, COLS));
+      Transpose<ROWS, COLS>::Run(DCTFrom(to, COLS), DCTTo(block, ROWS));
+      DCT1D<COLS, ROWS>()(DCTFrom(block, ROWS), DCTTo(to, ROWS));
+    }
+  }
+};
+// Computes the non-transposed, scaled DCT of a block, that needs to be
+// HWY_ALIGN'ed. Used for rectangular blocks.
+template <size_t ROWS, size_t COLS>
+struct ComputeScaledIDCT {
+  // scratch_space must be aligned, and should have space for ROWS*COLS
+  // floats.
+  template <class To>
+  HWY_MAYBE_UNUSED void operator()(float* JXL_RESTRICT from, const To& to,
+                                   float* JXL_RESTRICT scratch_space) {
+    float* JXL_RESTRICT block = scratch_space;
+    // Reverse the steps done in ComputeScaledDCT.
+    if (ROWS < COLS) {
+      Transpose<ROWS, COLS>::Run(DCTFrom(from, COLS), DCTTo(block, ROWS));
+      IDCT1D<COLS, ROWS>()(DCTFrom(block, ROWS), DCTTo(from, ROWS));
+      Transpose<COLS, ROWS>::Run(DCTFrom(from, ROWS), DCTTo(block, COLS));
+      IDCT1D<ROWS, COLS>()(DCTFrom(block, COLS), to);
+    } else {
+      IDCT1D<COLS, ROWS>()(DCTFrom(from, ROWS), DCTTo(block, ROWS));
+      Transpose<COLS, ROWS>::Run(DCTFrom(block, ROWS), DCTTo(from, COLS));
+      IDCT1D<ROWS, COLS>()(DCTFrom(from, COLS), to);
+    }
+  }
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+#endif  // LIB_JXL_DCT_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_block-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_block-inl.h
new file mode 100644
index 0000000000..179647838d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_block-inl.h
@@ -0,0 +1,108 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Adapters for DCT input/output: from/to contiguous blocks or image rows.
+
+#if defined(LIB_JXL_DCT_BLOCK_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DCT_BLOCK_INL_H_
+#undef LIB_JXL_DCT_BLOCK_INL_H_
+#else
+#define LIB_JXL_DCT_BLOCK_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/status.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Vec;
+
+// Block: (x, y) <-> (N * y + x)
+// Lines: (x, y) <-> (stride * y + x)
+//
+// I.e. Block is a specialization of Lines with fixed stride.
+//
+// FromXXX should implement Read and Load (Read vector).
+// ToXXX should implement Write and Store (Write vector).
+
+template <size_t N>
+using BlockDesc = HWY_CAPPED(float, N);
+
+// Here and in the following, the SZ template parameter specifies the number of
+// values to load/store. Needed because we want to handle 4x4 sub-blocks of
+// 16x16 blocks.
+class DCTFrom {
+ public:
+  DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {}
+
+  template <typename D>
+  HWY_INLINE Vec<D> LoadPart(D, const size_t row, size_t i) const {
+    JXL_DASSERT(Lanes(D()) <= stride_);
+    // Since these functions are used also for DC, no alignment at all is
+    // guaranteed in the case of floating blocks.
+    // TODO(veluca): consider using a different class for DC-to-LF and
+    // DC-from-LF, or copying DC values to/from a temporary aligned location.
+    return LoadU(D(), Address(row, i));
+  }
+
+  HWY_INLINE float Read(const size_t row, const size_t i) const {
+    return *Address(row, i);
+  }
+
+  HWY_INLINE const float* Address(const size_t row,
+                                            const size_t i) const {
+    return data_ + row * stride_ + i;
+  }
+
+  size_t Stride() const { return stride_; }
+
+ private:
+  size_t stride_;
+  const float* JXL_RESTRICT data_;
+};
+
+class DCTTo {
+ public:
+  DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {}
+
+  template <typename D>
+  HWY_INLINE void StorePart(D, const Vec<D>& v, const size_t row,
+                            size_t i) const {
+    JXL_DASSERT(Lanes(D()) <= stride_);
+    // Since these functions are used also for DC, no alignment at all is
+    // guaranteed in the case of floating blocks.
+    // TODO(veluca): consider using a different class for DC-to-LF and
+    // DC-from-LF, or copying DC values to/from a temporary aligned location.
+    StoreU(v, D(), Address(row, i));
+  }
+
+  HWY_INLINE void Write(float v, const size_t row, const size_t i) const {
+    *Address(row, i) = v;
+  }
+
+  HWY_INLINE float* Address(const size_t row, const size_t i) const {
+    return data_ + row * stride_ + i;
+  }
+
+  size_t Stride() const { return stride_; }
+
+ private:
+  size_t stride_;
+  float* JXL_RESTRICT data_;
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_DCT_BLOCK_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_for_test.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_for_test.h
new file mode 100644
index 0000000000..8e32aa7eff
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_for_test.h
@@ -0,0 +1,99 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DCT_FOR_TEST_H_
+#define LIB_JXL_DCT_FOR_TEST_H_
+
+// Unoptimized DCT only for use in tests.
+
+#include <string.h>  // memcpy
+
+#include <cmath>
+#include <vector>
+
+#include "lib/jxl/common.h"  // Pi
+
+namespace jxl {
+
+namespace test {
+static inline double alpha(int u) { return u == 0 ? 0.7071067811865475 : 1.0; }
+
+// N-DCT on M columns, divided by sqrt(N). Matches the definition in the spec.
+template <size_t N, size_t M>
+void DCT1D(double block[N * M], double out[N * M]) {
+  std::vector<double> matrix(N * N);
+  const double scale = std::sqrt(2.0) / N;
+  for (size_t y = 0; y < N; y++) {
+    for (size_t u = 0; u < N; u++) {
+      matrix[N * u + y] = alpha(u) * cos((y + 0.5) * u * Pi(1.0 / N)) * scale;
+    }
+  }
+  for (size_t x = 0; x < M; x++) {
+    for (size_t u = 0; u < N; u++) {
+      out[M * u + x] = 0;
+      for (size_t y = 0; y < N; y++) {
+        out[M * u + x] += matrix[N * u + y] * block[M * y + x];
+      }
+    }
+  }
+}
+
+// N-IDCT on M columns, multiplied by sqrt(N). Matches the definition in the
+// spec.
+template <size_t N, size_t M>
+void IDCT1D(double block[N * M], double out[N * M]) {
+  std::vector<double> matrix(N * N);
+  const double scale = std::sqrt(2.0);
+  for (size_t y = 0; y < N; y++) {
+    for (size_t u = 0; u < N; u++) {
+      // Transpose of DCT matrix.
+      matrix[N * y + u] = alpha(u) * cos((y + 0.5) * u * Pi(1.0 / N)) * scale;
+    }
+  }
+  for (size_t x = 0; x < M; x++) {
+    for (size_t u = 0; u < N; u++) {
+      out[M * u + x] = 0;
+      for (size_t y = 0; y < N; y++) {
+        out[M * u + x] += matrix[N * u + y] * block[M * y + x];
+      }
+    }
+  }
+}
+
+template <size_t N, size_t M>
+void TransposeBlock(double in[N * M], double out[M * N]) {
+  for (size_t x = 0; x < N; x++) {
+    for (size_t y = 0; y < M; y++) {
+      out[y * N + x] = in[x * M + y];
+    }
+  }
+}
+}  // namespace test
+
+// Untransposed DCT.
+template <size_t N>
+void DCTSlow(double block[N * N]) {
+  constexpr size_t kBlockSize = N * N;
+  std::vector<double> g(kBlockSize);
+  test::DCT1D<N, N>(block, g.data());
+  test::TransposeBlock<N, N>(g.data(), block);
+  test::DCT1D<N, N>(block, g.data());
+  test::TransposeBlock<N, N>(g.data(), block);
+}
+
+// Untransposed IDCT.
+template <size_t N>
+void IDCTSlow(double block[N * N]) {
+  constexpr size_t kBlockSize = N * N;
+  std::vector<double> g(kBlockSize);
+  test::IDCT1D<N, N>(block, g.data());
+  test::TransposeBlock<N, N>(g.data(), block);
+  test::IDCT1D<N, N>(block, g.data());
+  test::TransposeBlock<N, N>(g.data(), block);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DCT_FOR_TEST_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc
new file mode 100644
index 0000000000..f9e89a6014
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.cc
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dct_scales.h"
+
+namespace jxl {
+
+// Definition of constexpr arrays.
+constexpr float DCTResampleScales<1, 8>::kScales[];
+constexpr float DCTResampleScales<2, 16>::kScales[];
+constexpr float DCTResampleScales<4, 32>::kScales[];
+constexpr float DCTResampleScales<8, 64>::kScales[];
+constexpr float DCTResampleScales<16, 128>::kScales[];
+constexpr float DCTResampleScales<32, 256>::kScales[];
+constexpr float DCTResampleScales<8, 1>::kScales[];
+constexpr float DCTResampleScales<16, 2>::kScales[];
+constexpr float DCTResampleScales<32, 4>::kScales[];
+constexpr float DCTResampleScales<64, 8>::kScales[];
+constexpr float DCTResampleScales<128, 16>::kScales[];
+constexpr float DCTResampleScales<256, 32>::kScales[];
+constexpr float WcMultipliers<4>::kMultipliers[];
+constexpr float WcMultipliers<8>::kMultipliers[];
+constexpr float WcMultipliers<16>::kMultipliers[];
+constexpr float WcMultipliers<32>::kMultipliers[];
+constexpr float WcMultipliers<64>::kMultipliers[];
+constexpr float WcMultipliers<128>::kMultipliers[];
+constexpr float WcMultipliers<256>::kMultipliers[];
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.h
new file mode 100644
index 0000000000..9ec670aedc
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_scales.h
@@ -0,0 +1,390 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DCT_SCALES_H_
+#define LIB_JXL_DCT_SCALES_H_
+
+// Scaling factors.
+
+#include <stddef.h>
+
+namespace jxl {
+template <size_t V>
+struct square_root {
+  static constexpr float value = square_root<V / 4>::value * 2;
+};
+
+template <>
+struct square_root<1> {
+  static constexpr float value = 1.0f;
+};
+
+template <>
+struct square_root<2> {
+  static constexpr float value = 1.4142135623730951f;
+};
+
+// For n != 0, the n-th basis function of a N-DCT, evaluated in pixel k, has a
+// value of cos((k+1/2) n/(2N) pi). When downsampling by 2x, we average
+// the values for pixel k and k+1 to get the value for pixel (k/2), thus we get
+//
+// [cos((k+1/2) n/N pi) + cos((k+3/2) n/N pi)]/2 =
+// cos(n/(2N) pi) cos((k+1) n/N pi) =
+// cos(n/(2N) pi) cos(((k/2)+1/2) n/(N/2) pi)
+//
+// which is exactly the same as the value of pixel k/2 of a N/2-sized DCT,
+// except for the cos(n/(2N) pi) scaling factor (which does *not*
+// depend on the pixel). Thus, when using the lower-frequency coefficients of a
+// DCT-N to compute a DCT-(N/2), they should be scaled by this constant. Scaling
+// factors for a DCT-(N/4) etc can then be obtained by successive
+// multiplications. The structs below contain the above-mentioned scaling
+// factors.
+//
+// Python code for the tables below:
+//
+// for i in range(N // 8):
+//    v = math.cos(i / (2 * N) * math.pi)
+//    v *= math.cos(i / (N) * math.pi)
+//    v *= math.cos(i / (N / 2) * math.pi)
+//    print(v, end=", ")
+
+template <size_t FROM, size_t TO>
+struct DCTResampleScales;
+
+template <>
+struct DCTResampleScales<8, 1> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+  };
+};
+
+template <>
+struct DCTResampleScales<16, 2> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+      0.901764195028874394,
+  };
+};
+
+template <>
+struct DCTResampleScales<32, 4> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+      0.974886821136879522,
+      0.901764195028874394,
+      0.787054918159101335,
+  };
+};
+
+template <>
+struct DCTResampleScales<64, 8> {
+  static constexpr float kScales[] = {
+      1.0000000000000000, 0.9936866130906366, 0.9748868211368796,
+      0.9440180941651672, 0.9017641950288744, 0.8490574973847023,
+      0.7870549181591013, 0.7171081282466044,
+  };
+};
+
+template <>
+struct DCTResampleScales<128, 16> {
+  static constexpr float kScales[] = {
+      1.0,
+      0.9984194528776054,
+      0.9936866130906366,
+      0.9858278282666936,
+      0.9748868211368796,
+      0.9609244059440204,
+      0.9440180941651672,
+      0.9242615922757944,
+      0.9017641950288744,
+      0.8766500784429904,
+      0.8490574973847023,
+      0.8191378932865928,
+      0.7870549181591013,
+      0.7529833816270532,
+      0.7171081282466044,
+      0.6796228528314651,
+  };
+};
+
+template <>
+struct DCTResampleScales<256, 32> {
+  static constexpr float kScales[] = {
+      1.0,
+      0.9996047255830407,
+      0.9984194528776054,
+      0.9964458326264695,
+      0.9936866130906366,
+      0.9901456355893141,
+      0.9858278282666936,
+      0.9807391980963174,
+      0.9748868211368796,
+      0.9682788310563117,
+      0.9609244059440204,
+      0.9528337534340876,
+      0.9440180941651672,
+      0.9344896436056892,
+      0.9242615922757944,
+      0.913348084400198,
+      0.9017641950288744,
+      0.8895259056651056,
+      0.8766500784429904,
+      0.8631544288990163,
+      0.8490574973847023,
+      0.8343786191696513,
+      0.8191378932865928,
+      0.8033561501721485,
+      0.7870549181591013,
+      0.7702563888779096,
+      0.7529833816270532,
+      0.7352593067735488,
+      0.7171081282466044,
+      0.6985543251889097,
+      0.6796228528314651,
+      0.6603391026591464,
+  };
+};
+
+// Inverses of the above.
+template <>
+struct DCTResampleScales<1, 8> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+  };
+};
+
+template <>
+struct DCTResampleScales<2, 16> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+      1.108937353592731823,
+  };
+};
+
+template <>
+struct DCTResampleScales<4, 32> {
+  static constexpr float kScales[] = {
+      1.000000000000000000,
+      1.025760096781116015,
+      1.108937353592731823,
+      1.270559368765487251,
+  };
+};
+
+template <>
+struct DCTResampleScales<8, 64> {
+  static constexpr float kScales[] = {
+      1.0000000000000000, 1.0063534990068217, 1.0257600967811158,
+      1.0593017296817173, 1.1089373535927318, 1.1777765381970435,
+      1.2705593687654873, 1.3944898413647777,
+  };
+};
+
+template <>
+struct DCTResampleScales<16, 128> {
+  static constexpr float kScales[] = {
+      1.0,
+      1.0015830492062623,
+      1.0063534990068217,
+      1.0143759095928793,
+      1.0257600967811158,
+      1.0406645869480142,
+      1.0593017296817173,
+      1.0819447744633812,
+      1.1089373535927318,
+      1.1407059950032632,
+      1.1777765381970435,
+      1.2207956782315876,
+      1.2705593687654873,
+      1.3280505578213306,
+      1.3944898413647777,
+      1.4714043176061107,
+  };
+};
+
+template <>
+struct DCTResampleScales<32, 256> {
+  static constexpr float kScales[] = {
+      1.0,
+      1.0003954307206069,
+      1.0015830492062623,
+      1.0035668445360069,
+      1.0063534990068217,
+      1.009952439375063,
+      1.0143759095928793,
+      1.0196390660647288,
+      1.0257600967811158,
+      1.0327603660498115,
+      1.0406645869480142,
+      1.049501024072585,
+      1.0593017296817173,
+      1.0701028169146336,
+      1.0819447744633812,
+      1.0948728278734026,
+      1.1089373535927318,
+      1.124194353004584,
+      1.1407059950032632,
+      1.158541237256391,
+      1.1777765381970435,
+      1.1984966740820495,
+      1.2207956782315876,
+      1.244777922949508,
+      1.2705593687654873,
+      1.2982690107339132,
+      1.3280505578213306,
+      1.3600643892400104,
+      1.3944898413647777,
+      1.4315278911623237,
+      1.4714043176061107,
+      1.5143734423314616,
+  };
+};
+
+// Constants for DCT implementation. Generated by the following snippet:
+// for i in range(N // 2):
+//    print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ")
+template <size_t N>
+struct WcMultipliers;
+
+template <>
+struct WcMultipliers<4> {
+  static constexpr float kMultipliers[] = {
+      0.541196100146197,
+      1.3065629648763764,
+  };
+};
+
+template <>
+struct WcMultipliers<8> {
+  static constexpr float kMultipliers[] = {
+      0.5097955791041592,
+      0.6013448869350453,
+      0.8999762231364156,
+      2.5629154477415055,
+  };
+};
+
+template <>
+struct WcMultipliers<16> {
+  static constexpr float kMultipliers[] = {
+      0.5024192861881557, 0.5224986149396889, 0.5669440348163577,
+      0.6468217833599901, 0.7881546234512502, 1.060677685990347,
+      1.7224470982383342, 5.101148618689155,
+  };
+};
+
+template <>
+struct WcMultipliers<32> {
+  static constexpr float kMultipliers[] = {
+      0.5006029982351963, 0.5054709598975436, 0.5154473099226246,
+      0.5310425910897841, 0.5531038960344445, 0.5829349682061339,
+      0.6225041230356648, 0.6748083414550057, 0.7445362710022986,
+      0.8393496454155268, 0.9725682378619608, 1.1694399334328847,
+      1.4841646163141662, 2.057781009953411,  3.407608418468719,
+      10.190008123548033,
+  };
+};
+template <>
+struct WcMultipliers<64> {
+  static constexpr float kMultipliers[] = {
+      0.500150636020651,  0.5013584524464084, 0.5037887256810443,
+      0.5074711720725553, 0.5124514794082247, 0.5187927131053328,
+      0.52657731515427,   0.535909816907992,  0.5469204379855088,
+      0.5597698129470802, 0.57465518403266,   0.5918185358574165,
+      0.6115573478825099, 0.6342389366884031, 0.6603198078137061,
+      0.6903721282002123, 0.7251205223771985, 0.7654941649730891,
+      0.8127020908144905, 0.8683447152233481, 0.9345835970364075,
+      1.0144082649970547, 1.1120716205797176, 1.233832737976571,
+      1.3892939586328277, 1.5939722833856311, 1.8746759800084078,
+      2.282050068005162,  2.924628428158216,  4.084611078129248,
+      6.796750711673633,  20.373878167231453,
+  };
+};
+template <>
+struct WcMultipliers<128> {
+  static constexpr float kMultipliers[] = {
+      0.5000376519155477, 0.5003390374428216, 0.5009427176380873,
+      0.5018505174842379, 0.5030651913013697, 0.5045904432216454,
+      0.5064309549285542, 0.5085924210498143, 0.5110815927066812,
+      0.5139063298475396, 0.5170756631334912, 0.5205998663018917,
+      0.524490540114724,  0.5287607092074876, 0.5334249333971333,
+      0.538499435291984,  0.5440022463817783, 0.549953374183236,
+      0.5563749934898856, 0.5632916653417023, 0.5707305880121454,
+      0.5787218851348208, 0.5872989370937893, 0.5964987630244563,
+      0.606362462272146,  0.6169357260050706, 0.6282694319707711,
+      0.6404203382416639, 0.6534518953751283, 0.6674352009263413,
+      0.6824501259764195, 0.6985866506472291, 0.7159464549705746,
+      0.7346448236478627, 0.7548129391165311, 0.776600658233963,
+      0.8001798956216941, 0.8257487738627852, 0.8535367510066064,
+      0.8838110045596234, 0.9168844461846523, 0.9531258743921193,
+      0.9929729612675466, 1.036949040910389,  1.0856850642580145,
+      1.1399486751015042, 1.2006832557294167, 1.2690611716991191,
+      1.346557628206286,  1.4350550884414341, 1.5369941008524954,
+      1.6555965242641195, 1.7952052190778898, 1.961817848571166,
+      2.163957818751979,  2.4141600002500763, 2.7316450287739396,
+      3.147462191781909,  3.7152427383269746, 4.5362909369693565,
+      5.827688377844654,  8.153848602466814,  13.58429025728446,
+      40.744688103351834,
+  };
+};
+
+template <>
+struct WcMultipliers<256> {
+  static constexpr float kMultipliers[128] = {
+      0.5000094125358878, 0.500084723455784,  0.5002354020255269,
+      0.5004615618093246, 0.5007633734146156, 0.5011410648064231,
+      0.5015949217281668, 0.502125288230386,  0.5027325673091954,
+      0.5034172216566842, 0.5041797745258774, 0.5050208107132756,
+      0.5059409776624396, 0.5069409866925212, 0.5080216143561264,
+      0.509183703931388,  0.5104281670536573, 0.5117559854927805,
+      0.5131682130825206, 0.5146659778093218, 0.516250484068288,
+      0.5179230150949777, 0.5196849355823947, 0.5215376944933958,
+      0.5234828280796439, 0.52552196311921,   0.5276568203859896,
+      0.5298892183652453, 0.5322210772308335, 0.5346544231010253,
+      0.537191392591309,  0.5398342376841637, 0.5425853309375497,
+      0.545447171055775,  0.5484223888484947, 0.551513753605893,
+      0.554724179920619,  0.5580567349898085, 0.5615146464335654,
+      0.5651013106696203, 0.5688203018875696, 0.5726753816701664,
+      0.5766705093136241, 0.5808098529038624, 0.5850978012111273,
+      0.58953897647151,   0.5941382481306648, 0.5989007476325463,
+      0.6038318843443582, 0.6089373627182432, 0.614223200800649,
+      0.6196957502119484, 0.6253617177319102, 0.6312281886412079,
+      0.6373026519855411, 0.6435930279473415, 0.6501076975307724,
+      0.6568555347890955, 0.6638459418498757, 0.6710888870233562,
+      0.6785949463131795, 0.6863753486870501, 0.6944420255086364,
+      0.7028076645818034, 0.7114857693151208, 0.7204907235796304,
+      0.7298378629074134, 0.7395435527641373, 0.749625274727372,
+      0.7601017215162176, 0.7709929019493761, 0.7823202570613161,
+      0.7941067887834509, 0.8063772028037925, 0.8191580674598145,
+      0.83247799080191,   0.8463678182968619, 0.860860854031955,
+      0.8759931087426972, 0.8918035785352535, 0.9083345588266809,
+      0.9256319988042384, 0.9437459026371479, 0.962730784794803,
+      0.9826461881778968, 1.0035572754078206, 1.0255355056139732,
+      1.048659411496106,  1.0730154944316674, 1.0986992590905857,
+      1.1258164135986009, 1.1544842669978943, 1.184833362908442,
+      1.217009397314603,  1.2511754798461228, 1.287514812536712,
+      1.326233878832723,  1.3675662599582539, 1.411777227500661,
+      1.459169302866857,  1.5100890297227016, 1.5649352798258847,
+      1.6241695131835794, 1.6883285509131505, 1.7580406092704062,
+      1.8340456094306077, 1.9172211551275689, 2.0086161135167564,
+      2.1094945286246385, 2.22139377701127,   2.346202662531156,
+      2.486267909203593,  2.644541877144861,  2.824791402350551,
+      3.0318994541759925, 3.2723115884254845, 3.5547153325075804,
+      3.891107790700307,  4.298537526449054,  4.802076008665048,
+      5.440166215091329,  6.274908408039339,  7.413566756422303,
+      9.058751453879703,  11.644627325175037, 16.300023088031555,
+      27.163977662448232, 81.48784219222516,
+  };
+};
+
+// Apply the DCT algorithm-intrinsic constants to DCTResampleScale.
+template <size_t FROM, size_t TO>
+constexpr float DCTTotalResampleScale(size_t x) {
+  return DCTResampleScales<FROM, TO>::kScales[x];
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DCT_SCALES_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_test.cc
new file mode 100644
index 0000000000..a51a3178c9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_test.cc
@@ -0,0 +1,390 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h>
+
+#include <cmath>
+#include <numeric>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dct_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/dct_for_test.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/test_utils.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// Computes the in-place NxN DCT of block.
+// Requires that block is HWY_ALIGN'ed.
+//
+// Performs ComputeTransposedScaledDCT and then transposes and scales it to
+// obtain "vanilla" DCT.
+template <size_t N>
+void ComputeDCT(float block[N * N]) {
+  HWY_ALIGN float tmp_block[N * N];
+  HWY_ALIGN float scratch_space[N * N];
+  ComputeTransposedScaledDCT<N>()(DCTFrom(block, N), tmp_block, scratch_space);
+
+  // Untranspose.
+  Transpose<N, N>::Run(DCTFrom(tmp_block, N), DCTTo(block, N));
+}
+
+// Computes the in-place 8x8 iDCT of block.
+// Requires that block is HWY_ALIGN'ed.
+template <int N>
+void ComputeIDCT(float block[N * N]) {
+  HWY_ALIGN float tmp_block[N * N];
+  HWY_ALIGN float scratch_space[N * N];
+  // Untranspose.
+  Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(tmp_block, N));
+
+  ComputeTransposedScaledIDCT<N>()(tmp_block, DCTTo(block, N), scratch_space);
+}
+
+template <size_t N>
+void TransposeTestT(float accuracy) {
+  constexpr size_t kBlockSize = N * N;
+  HWY_ALIGN float src[kBlockSize];
+  DCTTo to_src(src, N);
+  for (size_t y = 0; y < N; ++y) {
+    for (size_t x = 0; x < N; ++x) {
+      to_src.Write(y * N + x, y, x);
+    }
+  }
+  HWY_ALIGN float dst[kBlockSize];
+  Transpose<N, N>::Run(DCTFrom(src, N), DCTTo(dst, N));
+  DCTFrom from_dst(dst, N);
+  for (size_t y = 0; y < N; ++y) {
+    for (size_t x = 0; x < N; ++x) {
+      float expected = x * N + y;
+      float actual = from_dst.Read(y, x);
+      EXPECT_NEAR(expected, actual, accuracy) << "x = " << x << ", y = " << y;
+    }
+  }
+}
+
+void TransposeTest() {
+  TransposeTestT<8>(1e-7f);
+  TransposeTestT<16>(1e-7f);
+  TransposeTestT<32>(1e-7f);
+}
+
+template <size_t N>
+void ColumnDctRoundtripT(float accuracy) {
+  constexpr size_t kBlockSize = N * N;
+  // Though we are only interested in single column result, dct.h has built-in
+  // limit on minimal number of columns processed. So, to be safe, we do
+  // regular 8x8 block transformation. On the bright side - we could check all
+  // 8 basis vectors at once.
+  HWY_ALIGN float block[kBlockSize];
+  DCTTo to(block, N);
+  DCTFrom from(block, N);
+  for (size_t i = 0; i < N; ++i) {
+    for (size_t j = 0; j < N; ++j) {
+      to.Write((i == j) ? 1.0f : 0.0f, i, j);
+    }
+  }
+
+  // Running (I)DCT on the same memory block seems to trigger a compiler bug on
+  // ARMv7 with clang6.
+  HWY_ALIGN float tmp[kBlockSize];
+  DCTTo to_tmp(tmp, N);
+  DCTFrom from_tmp(tmp, N);
+
+  DCT1D<N, N>()(from, to_tmp);
+  IDCT1D<N, N>()(from_tmp, to);
+
+  for (size_t i = 0; i < N; ++i) {
+    for (size_t j = 0; j < N; ++j) {
+      float expected = (i == j) ? 1.0f : 0.0f;
+      float actual = from.Read(i, j);
+      EXPECT_NEAR(expected, actual, accuracy) << " i=" << i << ", j=" << j;
+    }
+  }
+}
+
+void ColumnDctRoundtrip() {
+  ColumnDctRoundtripT<8>(1e-6f);
+  ColumnDctRoundtripT<16>(1e-6f);
+  ColumnDctRoundtripT<32>(1e-6f);
+}
+
+template <size_t N>
+void TestDctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
+  constexpr size_t kBlockSize = N * N;
+  for (size_t i = start; i < end; i++) {
+    HWY_ALIGN float fast[kBlockSize] = {0.0f};
+    double slow[kBlockSize] = {0.0};
+    fast[i] = 1.0;
+    slow[i] = 1.0;
+    DCTSlow<N>(slow);
+    ComputeDCT<N>(fast);
+    for (size_t k = 0; k < kBlockSize; ++k) {
+      EXPECT_NEAR(fast[k], slow[k], accuracy / N)
+          << "i = " << i << ", k = " << k << ", N = " << N;
+    }
+  }
+}
+
+template <size_t N>
+void TestIdctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
+  constexpr size_t kBlockSize = N * N;
+  for (size_t i = start; i < end; i++) {
+    HWY_ALIGN float fast[kBlockSize] = {0.0f};
+    double slow[kBlockSize] = {0.0};
+    fast[i] = 1.0;
+    slow[i] = 1.0;
+    IDCTSlow<N>(slow);
+    ComputeIDCT<N>(fast);
+    for (size_t k = 0; k < kBlockSize; ++k) {
+      EXPECT_NEAR(fast[k], slow[k], accuracy * N)
+          << "i = " << i << ", k = " << k << ", N = " << N;
+    }
+  }
+}
+
+template <size_t N>
+void TestInverseT(float accuracy) {
+  ThreadPoolInternal pool(N < 32 ? 0 : 8);
+  enum { kBlockSize = N * N };
+  RunOnPool(
+      &pool, 0, kBlockSize, ThreadPool::SkipInit(),
+      [accuracy](const int task, int /*thread*/) {
+        const size_t i = static_cast<size_t>(task);
+        HWY_ALIGN float x[kBlockSize] = {0.0f};
+        x[i] = 1.0;
+
+        ComputeIDCT<N>(x);
+        ComputeDCT<N>(x);
+
+        for (size_t k = 0; k < kBlockSize; ++k) {
+          EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
+              << "i = " << i << ", k = " << k;
+        }
+      },
+      "TestInverse");
+}
+
+void InverseTest() {
+  TestInverseT<8>(1e-6f);
+  TestInverseT<16>(1e-6f);
+  TestInverseT<32>(3e-6f);
+}
+
+template <size_t N>
+void TestDctTranspose(float accuracy, size_t start = 0, size_t end = N * N) {
+  constexpr size_t kBlockSize = N * N;
+  for (size_t i = start; i < end; i++) {
+    for (size_t j = 0; j < kBlockSize; ++j) {
+      // We check that <e_i, Me_j> = <M^\dagger{}e_i, e_j>.
+      // That means (Me_j)_i = (M^\dagger{}e_i)_j
+
+      // x := Me_j
+      HWY_ALIGN float x[kBlockSize] = {0.0f};
+      x[j] = 1.0;
+      ComputeIDCT<N>(x);
+      // y := M^\dagger{}e_i
+      HWY_ALIGN float y[kBlockSize] = {0.0f};
+      y[i] = 1.0;
+      ComputeDCT<N>(y);
+
+      EXPECT_NEAR(x[i] / N, y[j] * N, accuracy) << "i = " << i << ", j = " << j;
+    }
+  }
+}
+
+template <size_t N>
+void TestSlowInverse(float accuracy, size_t start = 0, size_t end = N * N) {
+  constexpr size_t kBlockSize = N * N;
+  for (size_t i = start; i < end; i++) {
+    double x[kBlockSize] = {0.0f};
+    x[i] = 1.0;
+
+    DCTSlow<N>(x);
+    IDCTSlow<N>(x);
+
+    for (size_t k = 0; k < kBlockSize; ++k) {
+      EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
+          << "i = " << i << ", k = " << k;
+    }
+  }
+}
+
+template <size_t ROWS, size_t COLS>
+void TestRectInverseT(float accuracy) {
+  constexpr size_t kBlockSize = ROWS * COLS;
+  for (size_t i = 0; i < kBlockSize; ++i) {
+    HWY_ALIGN float x[kBlockSize] = {0.0f};
+    HWY_ALIGN float out[kBlockSize] = {0.0f};
+    x[i] = 1.0;
+    HWY_ALIGN float coeffs[kBlockSize] = {0.0f};
+    HWY_ALIGN float scratch_space[kBlockSize * 2];
+
+    ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x, COLS), coeffs, scratch_space);
+    ComputeScaledIDCT<ROWS, COLS>()(coeffs, DCTTo(out, COLS), scratch_space);
+
+    for (size_t k = 0; k < kBlockSize; ++k) {
+      EXPECT_NEAR(out[k], (k == i) ? 1.0f : 0.0f, accuracy)
+          << "i = " << i << ", k = " << k << " ROWS = " << ROWS
+          << " COLS = " << COLS;
+    }
+  }
+}
+
+void TestRectInverse() {
+  TestRectInverseT<16, 32>(1e-6f);
+  TestRectInverseT<8, 32>(1e-6f);
+  TestRectInverseT<8, 16>(1e-6f);
+  TestRectInverseT<4, 8>(1e-6f);
+  TestRectInverseT<2, 4>(1e-6f);
+  TestRectInverseT<1, 4>(1e-6f);
+  TestRectInverseT<1, 2>(1e-6f);
+
+  TestRectInverseT<32, 16>(1e-6f);
+  TestRectInverseT<32, 8>(1e-6f);
+  TestRectInverseT<16, 8>(1e-6f);
+  TestRectInverseT<8, 4>(1e-6f);
+  TestRectInverseT<4, 2>(1e-6f);
+  TestRectInverseT<4, 1>(1e-6f);
+  TestRectInverseT<2, 1>(1e-6f);
+}
+
+template <size_t ROWS, size_t COLS>
+void TestRectTransposeT(float accuracy) {
+  constexpr size_t kBlockSize = ROWS * COLS;
+  HWY_ALIGN float scratch_space[kBlockSize * 2];
+  for (size_t px = 0; px < COLS; ++px) {
+    for (size_t py = 0; py < ROWS; ++py) {
+      HWY_ALIGN float x1[kBlockSize] = {0.0f};
+      HWY_ALIGN float x2[kBlockSize] = {0.0f};
+      HWY_ALIGN float coeffs1[kBlockSize] = {0.0f};
+      HWY_ALIGN float coeffs2[kBlockSize] = {0.0f};
+      x1[py * COLS + px] = 1;
+      x2[px * ROWS + py] = 1;
+
+      constexpr size_t OUT_ROWS = ROWS < COLS ? ROWS : COLS;
+      constexpr size_t OUT_COLS = ROWS < COLS ? COLS : ROWS;
+
+      ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x1, COLS), coeffs1, scratch_space);
+      ComputeScaledDCT<COLS, ROWS>()(DCTFrom(x2, ROWS), coeffs2, scratch_space);
+
+      for (size_t x = 0; x < OUT_COLS; ++x) {
+        for (size_t y = 0; y < OUT_ROWS; ++y) {
+          EXPECT_NEAR(coeffs1[y * OUT_COLS + x], coeffs2[y * OUT_COLS + x],
+                      accuracy)
+              << " px = " << px << ", py = " << py << ", x = " << x
+              << ", y = " << y;
+        }
+      }
+    }
+  }
+}
+
+void TestRectTranspose() {
+  TestRectTransposeT<16, 32>(1e-6f);
+  TestRectTransposeT<8, 32>(1e-6f);
+  TestRectTransposeT<8, 16>(1e-6f);
+  TestRectTransposeT<4, 8>(1e-6f);
+  TestRectTransposeT<2, 4>(1e-6f);
+  TestRectTransposeT<1, 4>(1e-6f);
+  TestRectTransposeT<1, 2>(1e-6f);
+
+  // Identical to 8, 16
+  //  TestRectTranspose<16, 8>(1e-6f);
+}
+
+void TestDctAccuracyShard(size_t shard) {
+  if (shard == 0) {
+    TestDctAccuracy<1>(1.1E-7f);
+    TestDctAccuracy<2>(1.1E-7f);
+    TestDctAccuracy<4>(1.1E-7f);
+    TestDctAccuracy<8>(1.1E-7f);
+    TestDctAccuracy<16>(1.3E-7f);
+  }
+  TestDctAccuracy<32>(1.1E-7f, 32 * shard, 32 * (shard + 1));
+}
+
+void TestIdctAccuracyShard(size_t shard) {
+  if (shard == 0) {
+    TestIdctAccuracy<1>(1E-7f);
+    TestIdctAccuracy<2>(1E-7f);
+    TestIdctAccuracy<4>(1E-7f);
+    TestIdctAccuracy<8>(1E-7f);
+    TestIdctAccuracy<16>(1E-7f);
+  }
+  TestIdctAccuracy<32>(1E-7f, 32 * shard, 32 * (shard + 1));
+}
+
+void TestDctTransposeShard(size_t shard) {
+  if (shard == 0) {
+    TestDctTranspose<8>(1E-6f);
+    TestDctTranspose<16>(1E-6f);
+  }
+  TestDctTranspose<32>(3E-6f, 32 * shard, 32 * (shard + 1));
+}
+
+void TestSlowInverseShard(size_t shard) {
+  if (shard == 0) {
+    TestSlowInverse<1>(1E-5f);
+    TestSlowInverse<2>(1E-5f);
+    TestSlowInverse<4>(1E-5f);
+    TestSlowInverse<8>(1E-5f);
+    TestSlowInverse<16>(1E-5f);
+  }
+  TestSlowInverse<32>(1E-5f, 32 * shard, 32 * (shard + 1));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class TransposeTest : public hwy::TestWithParamTarget {};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(TransposeTest);
+
+HWY_EXPORT_AND_TEST_P(TransposeTest, TransposeTest);
+HWY_EXPORT_AND_TEST_P(TransposeTest, InverseTest);
+HWY_EXPORT_AND_TEST_P(TransposeTest, ColumnDctRoundtrip);
+HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectInverse);
+HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectTranspose);
+
+// Tests in the DctShardedTest class are sharded for N=32.
+class DctShardedTest : public ::hwy::TestWithParamTargetAndT<uint32_t> {};
+
+std::vector<uint32_t> ShardRange(uint32_t n) {
+#ifdef JXL_DISABLE_SLOW_TESTS
+  JXL_ASSERT(n > 6);
+  std::vector<uint32_t> ret = {0, 1, 3, 5, n - 1};
+#else
+  std::vector<uint32_t> ret(n);
+  std::iota(ret.begin(), ret.end(), 0);
+#endif  // JXL_DISABLE_SLOW_TESTS
+  return ret;
+}
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(DctShardedTest,
+                                      ::testing::ValuesIn(ShardRange(32)));
+
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctAccuracyShard);
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestIdctAccuracyShard);
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctTransposeShard);
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestSlowInverseShard);
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_util.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_util.h
new file mode 100644
index 0000000000..fb6ce3b971
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dct_util.h
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DCT_UTIL_H_
+#define LIB_JXL_DCT_UTIL_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+union ACPtr {
+  int32_t* ptr32;
+  int16_t* ptr16;
+  ACPtr() = default;
+  explicit ACPtr(int16_t* p) : ptr16(p) {}
+  explicit ACPtr(int32_t* p) : ptr32(p) {}
+};
+
+union ConstACPtr {
+  const int32_t* ptr32;
+  const int16_t* ptr16;
+  ConstACPtr() = default;
+  explicit ConstACPtr(const int16_t* p) : ptr16(p) {}
+  explicit ConstACPtr(const int32_t* p) : ptr32(p) {}
+};
+
+enum class ACType { k16 = 0, k32 = 1 };
+
+class ACImage {
+ public:
+  virtual ~ACImage() = default;
+  virtual ACType Type() const = 0;
+  virtual ACPtr PlaneRow(size_t c, size_t y, size_t xbase) = 0;
+  virtual ConstACPtr PlaneRow(size_t c, size_t y, size_t xbase) const = 0;
+  virtual size_t PixelsPerRow() const = 0;
+  virtual void ZeroFill() = 0;
+  virtual void ZeroFillPlane(size_t c) = 0;
+  virtual bool IsEmpty() const = 0;
+};
+
+template <typename T>
+class ACImageT final : public ACImage {
+ public:
+  ACImageT() = default;
+  ACImageT(size_t xsize, size_t ysize) {
+    static_assert(
+        std::is_same<T, int16_t>::value || std::is_same<T, int32_t>::value,
+        "ACImage must be either 32- or 16- bit");
+    img_ = Image3<T>(xsize, ysize);
+  }
+  ACType Type() const override {
+    return sizeof(T) == 2 ? ACType::k16 : ACType::k32;
+  }
+  ACPtr PlaneRow(size_t c, size_t y, size_t xbase) override {
+    return ACPtr(img_.PlaneRow(c, y) + xbase);
+  }
+  ConstACPtr PlaneRow(size_t c, size_t y, size_t xbase) const override {
+    return ConstACPtr(img_.PlaneRow(c, y) + xbase);
+  }
+
+  size_t PixelsPerRow() const override { return img_.PixelsPerRow(); }
+
+  void ZeroFill() override { ZeroFillImage(&img_); }
+
+  void ZeroFillPlane(size_t c) override { ZeroFillImage(&img_.Plane(c)); }
+
+  bool IsEmpty() const override {
+    return img_.xsize() == 0 || img_.ysize() == 0;
+  }
+
+ private:
+  Image3<T> img_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DCT_UTIL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc
new file mode 100644
index 0000000000..06709d7404
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.cc
@@ -0,0 +1,375 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_ans.h"
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace {
+
+// Decodes a number in the range [0..255], by reading 1 - 11 bits.
+inline int DecodeVarLenUint8(BitReader* input) {
+  if (input->ReadFixedBits<1>()) {
+    int nbits = static_cast<int>(input->ReadFixedBits<3>());
+    if (nbits == 0) {
+      return 1;
+    } else {
+      return static_cast<int>(input->ReadBits(nbits)) + (1 << nbits);
+    }
+  }
+  return 0;
+}
+
+// Decodes a number in the range [0..65535], by reading 1 - 21 bits.
+inline int DecodeVarLenUint16(BitReader* input) {
+  if (input->ReadFixedBits<1>()) {
+    int nbits = static_cast<int>(input->ReadFixedBits<4>());
+    if (nbits == 0) {
+      return 1;
+    } else {
+      return static_cast<int>(input->ReadBits(nbits)) + (1 << nbits);
+    }
+  }
+  return 0;
+}
+
+Status ReadHistogram(int precision_bits, std::vector<int>* counts,
+                     BitReader* input) {
+  int simple_code = input->ReadBits(1);
+  if (simple_code == 1) {
+    int i;
+    int symbols[2] = {0};
+    int max_symbol = 0;
+    const int num_symbols = input->ReadBits(1) + 1;
+    for (i = 0; i < num_symbols; ++i) {
+      symbols[i] = DecodeVarLenUint8(input);
+      if (symbols[i] > max_symbol) max_symbol = symbols[i];
+    }
+    counts->resize(max_symbol + 1);
+    if (num_symbols == 1) {
+      (*counts)[symbols[0]] = 1 << precision_bits;
+    } else {
+      if (symbols[0] == symbols[1]) {  // corrupt data
+        return false;
+      }
+      (*counts)[symbols[0]] = input->ReadBits(precision_bits);
+      (*counts)[symbols[1]] = (1 << precision_bits) - (*counts)[symbols[0]];
+    }
+  } else {
+    int is_flat = input->ReadBits(1);
+    if (is_flat == 1) {
+      int alphabet_size = DecodeVarLenUint8(input) + 1;
+      if (alphabet_size == 0) {
+        return JXL_FAILURE("Invalid alphabet size for flat histogram.");
+      }
+      *counts = CreateFlatHistogram(alphabet_size, 1 << precision_bits);
+      return true;
+    }
+
+    uint32_t shift;
+    {
+      // TODO(veluca): speed up reading with table lookups.
+      int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1);
+      int log = 0;
+      for (; log < upper_bound_log; log++) {
+        if (input->ReadFixedBits<1>() == 0) break;
+      }
+      shift = (input->ReadBits(log) | (1 << log)) - 1;
+      if (shift > ANS_LOG_TAB_SIZE + 1) {
+        return JXL_FAILURE("Invalid shift value");
+      }
+    }
+
+    int length = DecodeVarLenUint8(input) + 3;
+    counts->resize(length);
+    int total_count = 0;
+
+    static const uint8_t huff[128][2] = {
+        {3, 10}, {7, 12}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {5, 0},  {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {6, 11}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {5, 0},  {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {7, 13}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {5, 0},  {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {6, 11}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+        {3, 10}, {5, 0},  {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+        {3, 10}, {4, 4},  {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+    };
+
+    std::vector<int> logcounts(counts->size());
+    int omit_log = -1;
+    int omit_pos = -1;
+    // This array remembers which symbols have an RLE length.
+    std::vector<int> same(counts->size(), 0);
+    for (size_t i = 0; i < logcounts.size(); ++i) {
+      input->Refill();  // for PeekFixedBits + Advance
+      int idx = input->PeekFixedBits<7>();
+      input->Consume(huff[idx][0]);
+      logcounts[i] = huff[idx][1];
+      // The RLE symbol.
+      if (logcounts[i] == ANS_LOG_TAB_SIZE + 1) {
+        int rle_length = DecodeVarLenUint8(input);
+        same[i] = rle_length + 5;
+        i += rle_length + 3;
+        continue;
+      }
+      if (logcounts[i] > omit_log) {
+        omit_log = logcounts[i];
+        omit_pos = i;
+      }
+    }
+    // Invalid input, e.g. due to invalid usage of RLE.
+    if (omit_pos < 0) return JXL_FAILURE("Invalid histogram.");
+    if (static_cast<size_t>(omit_pos) + 1 < logcounts.size() &&
+        logcounts[omit_pos + 1] == ANS_TAB_SIZE + 1) {
+      return JXL_FAILURE("Invalid histogram.");
+    }
+    int prev = 0;
+    int numsame = 0;
+    for (size_t i = 0; i < logcounts.size(); ++i) {
+      if (same[i]) {
+        // RLE sequence, let this loop output the same count for the next
+        // iterations.
+        numsame = same[i] - 1;
+        prev = i > 0 ? (*counts)[i - 1] : 0;
+      }
+      if (numsame > 0) {
+        (*counts)[i] = prev;
+        numsame--;
+      } else {
+        int code = logcounts[i];
+        // omit_pos may not be negative at this point (checked before).
+        if (i == static_cast<size_t>(omit_pos)) {
+          continue;
+        } else if (code == 0) {
+          continue;
+        } else if (code == 1) {
+          (*counts)[i] = 1;
+        } else {
+          int bitcount = GetPopulationCountPrecision(code - 1, shift);
+          (*counts)[i] = (1 << (code - 1)) +
+                         (input->ReadBits(bitcount) << (code - 1 - bitcount));
+        }
+      }
+      total_count += (*counts)[i];
+    }
+    (*counts)[omit_pos] = (1 << precision_bits) - total_count;
+    if ((*counts)[omit_pos] <= 0) {
+      // The histogram we've read sums to more than total_count (including at
+      // least 1 for the omitted value).
+      return JXL_FAILURE("Invalid histogram count.");
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DecodeANSCodes(const size_t num_histograms,
+                      const size_t max_alphabet_size, BitReader* in,
+                      ANSCode* result) {
+  result->degenerate_symbols.resize(num_histograms, -1);
+  if (result->use_prefix_code) {
+    JXL_ASSERT(max_alphabet_size <= 1 << PREFIX_MAX_BITS);
+    result->huffman_data.resize(num_histograms);
+    std::vector<uint16_t> alphabet_sizes(num_histograms);
+    for (size_t c = 0; c < num_histograms; c++) {
+      alphabet_sizes[c] = DecodeVarLenUint16(in) + 1;
+      if (alphabet_sizes[c] > max_alphabet_size) {
+        return JXL_FAILURE("Alphabet size is too long: %u", alphabet_sizes[c]);
+      }
+    }
+    for (size_t c = 0; c < num_histograms; c++) {
+      if (alphabet_sizes[c] > 1) {
+        if (!result->huffman_data[c].ReadFromBitStream(alphabet_sizes[c], in)) {
+          if (!in->AllReadsWithinBounds()) {
+            return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                              "Not enough bytes for huffman code");
+          }
+          return JXL_FAILURE(
+              "Invalid huffman tree number %zu, alphabet size %u", c,
+              alphabet_sizes[c]);
+        }
+      } else {
+        // 0-bit codes does not require extension tables.
+        result->huffman_data[c].table_.clear();
+        result->huffman_data[c].table_.resize(1u << kHuffmanTableBits);
+      }
+      for (const auto& h : result->huffman_data[c].table_) {
+        if (h.bits <= kHuffmanTableBits) {
+          result->UpdateMaxNumBits(c, h.value);
+        }
+      }
+    }
+  } else {
+    JXL_ASSERT(max_alphabet_size <= ANS_MAX_ALPHABET_SIZE);
+    result->alias_tables =
+        AllocateArray(num_histograms * (1 << result->log_alpha_size) *
+                      sizeof(AliasTable::Entry));
+    AliasTable::Entry* alias_tables =
+        reinterpret_cast<AliasTable::Entry*>(result->alias_tables.get());
+    for (size_t c = 0; c < num_histograms; ++c) {
+      std::vector<int> counts;
+      if (!ReadHistogram(ANS_LOG_TAB_SIZE, &counts, in)) {
+        return JXL_FAILURE("Invalid histogram bitstream.");
+      }
+      if (counts.size() > max_alphabet_size) {
+        return JXL_FAILURE("Alphabet size is too long: %zu", counts.size());
+      }
+      while (!counts.empty() && counts.back() == 0) {
+        counts.pop_back();
+      }
+      for (size_t s = 0; s < counts.size(); s++) {
+        if (counts[s] != 0) {
+          result->UpdateMaxNumBits(c, s);
+        }
+      }
+      // InitAliasTable "fixes" empty counts to contain degenerate "0" symbol.
+      int degenerate_symbol = counts.empty() ? 0 : (counts.size() - 1);
+      for (int s = 0; s < degenerate_symbol; ++s) {
+        if (counts[s] != 0) {
+          degenerate_symbol = -1;
+          break;
+        }
+      }
+      result->degenerate_symbols[c] = degenerate_symbol;
+      InitAliasTable(counts, ANS_TAB_SIZE, result->log_alpha_size,
+                     alias_tables + c * (1 << result->log_alpha_size));
+    }
+  }
+  return true;
+}
+Status DecodeUintConfig(size_t log_alpha_size, HybridUintConfig* uint_config,
+                        BitReader* br) {
+  br->Refill();
+  size_t split_exponent = br->ReadBits(CeilLog2Nonzero(log_alpha_size + 1));
+  size_t msb_in_token = 0, lsb_in_token = 0;
+  if (split_exponent != log_alpha_size) {
+    // otherwise, msb/lsb don't matter.
+    size_t nbits = CeilLog2Nonzero(split_exponent + 1);
+    msb_in_token = br->ReadBits(nbits);
+    if (msb_in_token > split_exponent) {
+      // This could be invalid here already and we need to check this before
+      // we use its value to read more bits.
+      return JXL_FAILURE("Invalid HybridUintConfig");
+    }
+    nbits = CeilLog2Nonzero(split_exponent - msb_in_token + 1);
+    lsb_in_token = br->ReadBits(nbits);
+  }
+  if (lsb_in_token + msb_in_token > split_exponent) {
+    return JXL_FAILURE("Invalid HybridUintConfig");
+  }
+  *uint_config = HybridUintConfig(split_exponent, msb_in_token, lsb_in_token);
+  return true;
+}
+
+Status DecodeUintConfigs(size_t log_alpha_size,
+                         std::vector<HybridUintConfig>* uint_config,
+                         BitReader* br) {
+  // TODO(veluca): RLE?
+  for (size_t i = 0; i < uint_config->size(); i++) {
+    JXL_RETURN_IF_ERROR(
+        DecodeUintConfig(log_alpha_size, &(*uint_config)[i], br));
+  }
+  return true;
+}
+
+LZ77Params::LZ77Params() { Bundle::Init(this); }
+Status LZ77Params::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &enabled));
+  if (!visitor->Conditional(enabled)) return true;
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(224), Val(512), Val(4096),
+                                         BitsOffset(15, 8), 224, &min_symbol));
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(3), Val(4), BitsOffset(2, 5),
+                                         BitsOffset(8, 9), 3, &min_length));
+  return true;
+}
+
+void ANSCode::UpdateMaxNumBits(size_t ctx, size_t symbol) {
+  HybridUintConfig* cfg = &uint_config[ctx];
+  // LZ77 symbols use a different uint config.
+  if (lz77.enabled && lz77.nonserialized_distance_context != ctx &&
+      symbol >= lz77.min_symbol) {
+    symbol -= lz77.min_symbol;
+    cfg = &lz77.length_uint_config;
+  }
+  size_t split_token = cfg->split_token;
+  size_t msb_in_token = cfg->msb_in_token;
+  size_t lsb_in_token = cfg->lsb_in_token;
+  size_t split_exponent = cfg->split_exponent;
+  if (symbol < split_token) {
+    max_num_bits = std::max(max_num_bits, split_exponent);
+    return;
+  }
+  uint32_t n_extra_bits =
+      split_exponent - (msb_in_token + lsb_in_token) +
+      ((symbol - split_token) >> (msb_in_token + lsb_in_token));
+  size_t total_bits = msb_in_token + lsb_in_token + n_extra_bits + 1;
+  max_num_bits = std::max(max_num_bits, total_bits);
+}
+
+Status DecodeHistograms(BitReader* br, size_t num_contexts, ANSCode* code,
+                        std::vector<uint8_t>* context_map, bool disallow_lz77) {
+  PROFILER_FUNC;
+  JXL_RETURN_IF_ERROR(Bundle::Read(br, &code->lz77));
+  if (code->lz77.enabled) {
+    num_contexts++;
+    JXL_RETURN_IF_ERROR(DecodeUintConfig(/*log_alpha_size=*/8,
+                                         &code->lz77.length_uint_config, br));
+  }
+  if (code->lz77.enabled && disallow_lz77) {
+    return JXL_FAILURE("Using LZ77 when explicitly disallowed");
+  }
+  size_t num_histograms = 1;
+  context_map->resize(num_contexts);
+  if (num_contexts > 1) {
+    JXL_RETURN_IF_ERROR(DecodeContextMap(context_map, &num_histograms, br));
+  }
+  code->lz77.nonserialized_distance_context = context_map->back();
+  code->use_prefix_code = br->ReadFixedBits<1>();
+  if (code->use_prefix_code) {
+    code->log_alpha_size = PREFIX_MAX_BITS;
+  } else {
+    code->log_alpha_size = br->ReadFixedBits<2>() + 5;
+  }
+  code->uint_config.resize(num_histograms);
+  JXL_RETURN_IF_ERROR(
+      DecodeUintConfigs(code->log_alpha_size, &code->uint_config, br));
+  const size_t max_alphabet_size = 1 << code->log_alpha_size;
+  JXL_RETURN_IF_ERROR(
+      DecodeANSCodes(num_histograms, max_alphabet_size, br, code));
+  // When using LZ77, flat codes might result in valid codestreams with
+  // histograms that potentially allow very large bit counts.
+  // TODO(veluca): in principle, a valid codestream might contain a histogram
+  // that could allow very large numbers of bits that is never used during ANS
+  // decoding. There's no benefit to doing that, though.
+  if (!code->lz77.enabled && code->max_num_bits > 32) {
+    // Just emit a warning as there are many opportunities for false positives.
+    JXL_WARNING("Histogram can represent numbers that are too large: %zu\n",
+                code->max_num_bits);
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.h
new file mode 100644
index 0000000000..15273a8156
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_ans.h
@@ -0,0 +1,432 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_ANS_H_
+#define LIB_JXL_DEC_ANS_H_
+
+// Library to decode the ANS population counts from the bit-stream and build a
+// decoding table from them.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cstring>
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_huffman.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+class ANSSymbolReader;
+
+// Experiments show that best performance is typically achieved for a
+// split-exponent of 3 or 4. Trend seems to be that '4' is better
+// for large-ish pictures, and '3' better for rather small-ish pictures.
+// This is plausible - the more special symbols we have, the better
+// statistics we need to get a benefit out of them.
+
+// Our hybrid-encoding scheme has dedicated tokens for the smallest
+// (1 << split_exponents) numbers, and for the rest
+// encodes (number of bits) + (msb_in_token sub-leading binary digits) +
+// (lsb_in_token lowest binary digits) in the token, with the remaining bits
+// then being encoded as data.
+//
+// Example with split_exponent = 4, msb_in_token = 2, lsb_in_token = 0.
+//
+// Numbers N in [0 .. 15]:
+//   These get represented as (token=N, bits='').
+// Numbers N >= 16:
+//   If n is such that 2**n <= N < 2**(n+1),
+//   and m = N - 2**n is the 'mantissa',
+//   these get represented as:
+// (token=split_token +
+//        ((n - split_exponent) * 4) +
+//        (m >> (n - msb_in_token)),
+//  bits=m & (1 << (n - msb_in_token)) - 1)
+// Specifically, we would get:
+// N = 0 - 15:          (token=N, nbits=0, bits='')
+// N = 16 (10000):      (token=16, nbits=2, bits='00')
+// N = 17 (10001):      (token=16, nbits=2, bits='01')
+// N = 20 (10100):      (token=17, nbits=2, bits='00')
+// N = 24 (11000):      (token=18, nbits=2, bits='00')
+// N = 28 (11100):      (token=19, nbits=2, bits='00')
+// N = 32 (100000):     (token=20, nbits=3, bits='000')
+// N = 65535:           (token=63, nbits=13, bits='1111111111111')
+struct HybridUintConfig {
+  uint32_t split_exponent;
+  uint32_t split_token;
+  uint32_t msb_in_token;
+  uint32_t lsb_in_token;
+  JXL_INLINE void Encode(uint32_t value, uint32_t* JXL_RESTRICT token,
+                         uint32_t* JXL_RESTRICT nbits,
+                         uint32_t* JXL_RESTRICT bits) const {
+    if (value < split_token) {
+      *token = value;
+      *nbits = 0;
+      *bits = 0;
+    } else {
+      uint32_t n = FloorLog2Nonzero(value);
+      uint32_t m = value - (1 << n);
+      *token = split_token +
+               ((n - split_exponent) << (msb_in_token + lsb_in_token)) +
+               ((m >> (n - msb_in_token)) << lsb_in_token) +
+               (m & ((1 << lsb_in_token) - 1));
+      *nbits = n - msb_in_token - lsb_in_token;
+      *bits = (value >> lsb_in_token) & ((1UL << *nbits) - 1);
+    }
+  }
+
+  explicit HybridUintConfig(uint32_t split_exponent = 4,
+                            uint32_t msb_in_token = 2,
+                            uint32_t lsb_in_token = 0)
+      : split_exponent(split_exponent),
+        split_token(1 << split_exponent),
+        msb_in_token(msb_in_token),
+        lsb_in_token(lsb_in_token) {
+    JXL_DASSERT(split_exponent >= msb_in_token + lsb_in_token);
+  }
+};
+
+struct LZ77Params : public Fields {
+  LZ77Params();
+  const char* Name() const override { return "LZ77Params"; }
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+  bool enabled;
+
+  // Symbols above min_symbol use a special hybrid uint encoding and
+  // represent a length, to be added to min_length.
+  uint32_t min_symbol;
+  uint32_t min_length;
+
+  // Not serialized by VisitFields.
+  HybridUintConfig length_uint_config{0, 0, 0};
+
+  size_t nonserialized_distance_context;
+};
+
+static constexpr size_t kWindowSize = 1 << 20;
+static constexpr size_t kNumSpecialDistances = 120;
+// Table of special distance codes from WebP lossless.
+static constexpr int8_t kSpecialDistances[kNumSpecialDistances][2] = {
+    {0, 1},  {1, 0},  {1, 1},  {-1, 1}, {0, 2},  {2, 0},  {1, 2},  {-1, 2},
+    {2, 1},  {-2, 1}, {2, 2},  {-2, 2}, {0, 3},  {3, 0},  {1, 3},  {-1, 3},
+    {3, 1},  {-3, 1}, {2, 3},  {-2, 3}, {3, 2},  {-3, 2}, {0, 4},  {4, 0},
+    {1, 4},  {-1, 4}, {4, 1},  {-4, 1}, {3, 3},  {-3, 3}, {2, 4},  {-2, 4},
+    {4, 2},  {-4, 2}, {0, 5},  {3, 4},  {-3, 4}, {4, 3},  {-4, 3}, {5, 0},
+    {1, 5},  {-1, 5}, {5, 1},  {-5, 1}, {2, 5},  {-2, 5}, {5, 2},  {-5, 2},
+    {4, 4},  {-4, 4}, {3, 5},  {-3, 5}, {5, 3},  {-5, 3}, {0, 6},  {6, 0},
+    {1, 6},  {-1, 6}, {6, 1},  {-6, 1}, {2, 6},  {-2, 6}, {6, 2},  {-6, 2},
+    {4, 5},  {-4, 5}, {5, 4},  {-5, 4}, {3, 6},  {-3, 6}, {6, 3},  {-6, 3},
+    {0, 7},  {7, 0},  {1, 7},  {-1, 7}, {5, 5},  {-5, 5}, {7, 1},  {-7, 1},
+    {4, 6},  {-4, 6}, {6, 4},  {-6, 4}, {2, 7},  {-2, 7}, {7, 2},  {-7, 2},
+    {3, 7},  {-3, 7}, {7, 3},  {-7, 3}, {5, 6},  {-5, 6}, {6, 5},  {-6, 5},
+    {8, 0},  {4, 7},  {-4, 7}, {7, 4},  {-7, 4}, {8, 1},  {8, 2},  {6, 6},
+    {-6, 6}, {8, 3},  {5, 7},  {-5, 7}, {7, 5},  {-7, 5}, {8, 4},  {6, 7},
+    {-6, 7}, {7, 6},  {-7, 6}, {8, 5},  {7, 7},  {-7, 7}, {8, 6},  {8, 7}};
+
+struct ANSCode {
+  CacheAlignedUniquePtr alias_tables;
+  std::vector<HuffmanDecodingData> huffman_data;
+  std::vector<HybridUintConfig> uint_config;
+  std::vector<int> degenerate_symbols;
+  bool use_prefix_code;
+  uint8_t log_alpha_size;  // for ANS.
+  LZ77Params lz77;
+  // Maximum number of bits necessary to represent the result of a
+  // ReadHybridUint call done with this ANSCode.
+  size_t max_num_bits = 0;
+  void UpdateMaxNumBits(size_t ctx, size_t symbol);
+};
+
+class ANSSymbolReader {
+ public:
+  // Invalid symbol reader, to be overwritten.
+  ANSSymbolReader() = default;
+  ANSSymbolReader(const ANSCode* code, BitReader* JXL_RESTRICT br,
+                  size_t distance_multiplier = 0)
+      : alias_tables_(
+            reinterpret_cast<AliasTable::Entry*>(code->alias_tables.get())),
+        huffman_data_(code->huffman_data.data()),
+        use_prefix_code_(code->use_prefix_code),
+        configs(code->uint_config.data()) {
+    if (!use_prefix_code_) {
+      state_ = static_cast<uint32_t>(br->ReadFixedBits<32>());
+      log_alpha_size_ = code->log_alpha_size;
+      log_entry_size_ = ANS_LOG_TAB_SIZE - code->log_alpha_size;
+      entry_size_minus_1_ = (1 << log_entry_size_) - 1;
+    } else {
+      state_ = (ANS_SIGNATURE << 16u);
+    }
+    if (!code->lz77.enabled) return;
+    // a std::vector incurs unacceptable decoding speed loss because of
+    // initialization.
+    lz77_window_storage_ = AllocateArray(kWindowSize * sizeof(uint32_t));
+    lz77_window_ = reinterpret_cast<uint32_t*>(lz77_window_storage_.get());
+    lz77_ctx_ = code->lz77.nonserialized_distance_context;
+    lz77_length_uint_ = code->lz77.length_uint_config;
+    lz77_threshold_ = code->lz77.min_symbol;
+    lz77_min_length_ = code->lz77.min_length;
+    num_special_distances_ =
+        distance_multiplier == 0 ? 0 : kNumSpecialDistances;
+    for (size_t i = 0; i < num_special_distances_; i++) {
+      int dist = kSpecialDistances[i][0];
+      dist += static_cast<int>(distance_multiplier) * kSpecialDistances[i][1];
+      if (dist < 1) dist = 1;
+      special_distances_[i] = dist;
+    }
+  }
+
+  JXL_INLINE size_t ReadSymbolANSWithoutRefill(const size_t histo_idx,
+                                               BitReader* JXL_RESTRICT br) {
+    const uint32_t res = state_ & (ANS_TAB_SIZE - 1u);
+
+    const AliasTable::Entry* table =
+        &alias_tables_[histo_idx << log_alpha_size_];
+    const AliasTable::Symbol symbol =
+        AliasTable::Lookup(table, res, log_entry_size_, entry_size_minus_1_);
+    state_ = symbol.freq * (state_ >> ANS_LOG_TAB_SIZE) + symbol.offset;
+
+#if 1
+    // Branchless version is about equally fast on SKX.
+    const uint32_t new_state =
+        (state_ << 16u) | static_cast<uint32_t>(br->PeekFixedBits<16>());
+    const bool normalize = state_ < (1u << 16u);
+    state_ = normalize ? new_state : state_;
+    br->Consume(normalize ? 16 : 0);
+#else
+    if (JXL_UNLIKELY(state_ < (1u << 16u))) {
+      state_ = (state_ << 16u) | br->PeekFixedBits<16>();
+      br->Consume(16);
+    }
+#endif
+    const uint32_t next_res = state_ & (ANS_TAB_SIZE - 1u);
+    AliasTable::Prefetch(table, next_res, log_entry_size_);
+
+    return symbol.value;
+  }
+
+  JXL_INLINE size_t ReadSymbolHuffWithoutRefill(const size_t histo_idx,
+                                                BitReader* JXL_RESTRICT br) {
+    return huffman_data_[histo_idx].ReadSymbol(br);
+  }
+
+  JXL_INLINE size_t ReadSymbolWithoutRefill(const size_t histo_idx,
+                                            BitReader* JXL_RESTRICT br) {
+    // TODO(veluca): hoist if in hotter loops.
+    if (JXL_UNLIKELY(use_prefix_code_)) {
+      return ReadSymbolHuffWithoutRefill(histo_idx, br);
+    }
+    return ReadSymbolANSWithoutRefill(histo_idx, br);
+  }
+
+  JXL_INLINE size_t ReadSymbol(const size_t histo_idx,
+                               BitReader* JXL_RESTRICT br) {
+    br->Refill();
+    return ReadSymbolWithoutRefill(histo_idx, br);
+  }
+
+  bool CheckANSFinalState() { return state_ == (ANS_SIGNATURE << 16u); }
+
+  template <typename BitReader>
+  static JXL_INLINE uint32_t ReadHybridUintConfig(
+      const HybridUintConfig& config, size_t token, BitReader* br) {
+    size_t split_token = config.split_token;
+    size_t msb_in_token = config.msb_in_token;
+    size_t lsb_in_token = config.lsb_in_token;
+    size_t split_exponent = config.split_exponent;
+    // Fast-track version of hybrid integer decoding.
+    if (token < split_token) return token;
+    uint32_t nbits = split_exponent - (msb_in_token + lsb_in_token) +
+                     ((token - split_token) >> (msb_in_token + lsb_in_token));
+    // Max amount of bits for ReadBits is 32 and max valid left shift is 29
+    // bits. However, for speed no error is propagated here, instead limit the
+    // nbits size. If nbits > 29, the code stream is invalid, but no error is
+    // returned.
+    // Note that in most cases we will emit an error if the histogram allows
+    // representing numbers that would cause invalid shifts, but we need to
+    // keep this check as when LZ77 is enabled it might make sense to have an
+    // histogram that could in principle cause invalid shifts.
+    nbits &= 31u;
+    uint32_t low = token & ((1 << lsb_in_token) - 1);
+    token >>= lsb_in_token;
+    const size_t bits = br->PeekBits(nbits);
+    br->Consume(nbits);
+    size_t ret = (((((1 << msb_in_token) | (token & ((1 << msb_in_token) - 1)))
+                    << nbits) |
+                   bits)
+                  << lsb_in_token) |
+                 low;
+    // TODO(eustas): mark BitReader as unhealthy if nbits > 29 or ret does not
+    //               fit uint32_t
+    return static_cast<uint32_t>(ret);
+  }
+
+  // Takes a *clustered* idx.
+  size_t ReadHybridUintClustered(size_t ctx, BitReader* JXL_RESTRICT br) {
+    if (JXL_UNLIKELY(num_to_copy_ > 0)) {
+      size_t ret = lz77_window_[(copy_pos_++) & kWindowMask];
+      num_to_copy_--;
+      lz77_window_[(num_decoded_++) & kWindowMask] = ret;
+      return ret;
+    }
+    br->Refill();  // covers ReadSymbolWithoutRefill + PeekBits
+    size_t token = ReadSymbolWithoutRefill(ctx, br);
+    if (JXL_UNLIKELY(token >= lz77_threshold_)) {
+      num_to_copy_ =
+          ReadHybridUintConfig(lz77_length_uint_, token - lz77_threshold_, br) +
+          lz77_min_length_;
+      br->Refill();  // covers ReadSymbolWithoutRefill + PeekBits
+      // Distance code.
+      size_t token = ReadSymbolWithoutRefill(lz77_ctx_, br);
+      size_t distance = ReadHybridUintConfig(configs[lz77_ctx_], token, br);
+      if (JXL_LIKELY(distance < num_special_distances_)) {
+        distance = special_distances_[distance];
+      } else {
+        distance = distance + 1 - num_special_distances_;
+      }
+      if (JXL_UNLIKELY(distance > num_decoded_)) {
+        distance = num_decoded_;
+      }
+      if (JXL_UNLIKELY(distance > kWindowSize)) {
+        distance = kWindowSize;
+      }
+      copy_pos_ = num_decoded_ - distance;
+      if (JXL_UNLIKELY(distance == 0)) {
+        JXL_DASSERT(lz77_window_ != nullptr);
+        // distance 0 -> num_decoded_ == copy_pos_ == 0
+        size_t to_fill = std::min<size_t>(num_to_copy_, kWindowSize);
+        memset(lz77_window_, 0, to_fill * sizeof(lz77_window_[0]));
+      }
+      // TODO(eustas): overflow; mark BitReader as unhealthy
+      if (num_to_copy_ < lz77_min_length_) return 0;
+      return ReadHybridUintClustered(ctx, br);  // will trigger a copy.
+    }
+    size_t ret = ReadHybridUintConfig(configs[ctx], token, br);
+    if (lz77_window_) lz77_window_[(num_decoded_++) & kWindowMask] = ret;
+    return ret;
+  }
+
+  JXL_INLINE size_t ReadHybridUint(size_t ctx, BitReader* JXL_RESTRICT br,
+                                   const std::vector<uint8_t>& context_map) {
+    return ReadHybridUintClustered(context_map[ctx], br);
+  }
+
+  // ctx is a *clustered* context!
+  // This function will modify the ANS state as if `count` symbols have been
+  // decoded.
+  bool IsSingleValueAndAdvance(size_t ctx, uint32_t* value, size_t count) {
+    // TODO(veluca): No optimization for Huffman mode yet.
+    if (use_prefix_code_) return false;
+    // TODO(eustas): propagate "degenerate_symbol" to simplify this method.
+    const uint32_t res = state_ & (ANS_TAB_SIZE - 1u);
+    const AliasTable::Entry* table = &alias_tables_[ctx << log_alpha_size_];
+    AliasTable::Symbol symbol =
+        AliasTable::Lookup(table, res, log_entry_size_, entry_size_minus_1_);
+    if (symbol.freq != ANS_TAB_SIZE) return false;
+    if (configs[ctx].split_token <= symbol.value) return false;
+    if (symbol.value >= lz77_threshold_) return false;
+    *value = symbol.value;
+    if (lz77_window_) {
+      for (size_t i = 0; i < count; i++) {
+        lz77_window_[(num_decoded_++) & kWindowMask] = symbol.value;
+      }
+    }
+    return true;
+  }
+
+  static constexpr size_t kMaxCheckpointInterval = 512;
+  struct Checkpoint {
+    uint32_t state;
+    uint32_t num_to_copy;
+    uint32_t copy_pos;
+    uint32_t num_decoded;
+    uint32_t lz77_window[kMaxCheckpointInterval];
+  };
+  void Save(Checkpoint* checkpoint) {
+    checkpoint->state = state_;
+    checkpoint->num_decoded = num_decoded_;
+    checkpoint->num_to_copy = num_to_copy_;
+    checkpoint->copy_pos = copy_pos_;
+    if (lz77_window_) {
+      size_t win_start = num_decoded_ & kWindowMask;
+      size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask;
+      if (win_end > win_start) {
+        memcpy(checkpoint->lz77_window, lz77_window_ + win_start,
+               (win_end - win_start) * sizeof(*lz77_window_));
+      } else {
+        memcpy(checkpoint->lz77_window, lz77_window_ + win_start,
+               (kWindowSize - win_start) * sizeof(*lz77_window_));
+        memcpy(checkpoint->lz77_window + (kWindowSize - win_start),
+               lz77_window_, win_end * sizeof(*lz77_window_));
+      }
+    }
+  }
+  void Restore(const Checkpoint& checkpoint) {
+    state_ = checkpoint.state;
+    JXL_DASSERT(num_decoded_ <=
+                checkpoint.num_decoded + kMaxCheckpointInterval);
+    num_decoded_ = checkpoint.num_decoded;
+    num_to_copy_ = checkpoint.num_to_copy;
+    copy_pos_ = checkpoint.copy_pos;
+    if (lz77_window_) {
+      size_t win_start = num_decoded_ & kWindowMask;
+      size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask;
+      if (win_end > win_start) {
+        memcpy(lz77_window_ + win_start, checkpoint.lz77_window,
+               (win_end - win_start) * sizeof(*lz77_window_));
+      } else {
+        memcpy(lz77_window_ + win_start, checkpoint.lz77_window,
+               (kWindowSize - win_start) * sizeof(*lz77_window_));
+        memcpy(lz77_window_, checkpoint.lz77_window + (kWindowSize - win_start),
+               win_end * sizeof(*lz77_window_));
+      }
+    }
+  }
+
+ private:
+  const AliasTable::Entry* JXL_RESTRICT alias_tables_;  // not owned
+  const HuffmanDecodingData* huffman_data_;
+  bool use_prefix_code_;
+  uint32_t state_ = ANS_SIGNATURE << 16u;
+  const HybridUintConfig* JXL_RESTRICT configs;
+  uint32_t log_alpha_size_;
+  uint32_t log_entry_size_;
+  uint32_t entry_size_minus_1_;
+
+  // LZ77 structures and constants.
+  static constexpr size_t kWindowMask = kWindowSize - 1;
+  CacheAlignedUniquePtr lz77_window_storage_;
+  uint32_t* lz77_window_ = nullptr;
+  uint32_t num_decoded_ = 0;
+  uint32_t num_to_copy_ = 0;
+  uint32_t copy_pos_ = 0;
+  uint32_t lz77_ctx_ = 0;
+  uint32_t lz77_min_length_ = 0;
+  uint32_t lz77_threshold_ = 1 << 20;  // bigger than any symbol.
+  HybridUintConfig lz77_length_uint_;
+  uint32_t special_distances_[kNumSpecialDistances];
+  uint32_t num_special_distances_;
+};
+
+Status DecodeHistograms(BitReader* br, size_t num_contexts, ANSCode* code,
+                        std::vector<uint8_t>* context_map,
+                        bool disallow_lz77 = false);
+
+// Exposed for tests.
+Status DecodeUintConfigs(size_t log_alpha_size,
+                         std::vector<HybridUintConfig>* uint_config,
+                         BitReader* br);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_ANS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_bit_reader.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_bit_reader.h
new file mode 100644
index 0000000000..df70284e3b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_bit_reader.h
@@ -0,0 +1,354 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_BIT_READER_H_
+#define LIB_JXL_DEC_BIT_READER_H_
+
+// Bounds-checked bit reader; 64-bit buffer with support for deferred refills
+// and switching to reading byte-aligned words.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>  // memcpy
+
+#ifdef __BMI2__
+#include <immintrin.h>
+#endif
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+// Reads bits previously written to memory by BitWriter. Uses unaligned 8-byte
+// little-endian loads.
+class BitReader {
+ public:
+  static constexpr size_t kMaxBitsPerCall = 56;
+
+  // Constructs an invalid BitReader, to be overwritten before usage.
+  BitReader()
+      : buf_(0),
+        bits_in_buf_(0),
+        next_byte_{nullptr},
+        end_minus_8_{nullptr},
+        first_byte_(nullptr) {}
+  BitReader(const BitReader&) = delete;
+
+  // bytes need not be aligned nor padded!
+  template <class ArrayLike>
+  explicit BitReader(const ArrayLike& bytes)
+      : buf_(0),
+        bits_in_buf_(0),
+        next_byte_(bytes.data()),
+        // Assumes first_byte_ >= 8.
+        end_minus_8_(bytes.data() - 8 + bytes.size()),
+        first_byte_(bytes.data()) {
+    Refill();
+  }
+  ~BitReader() {
+    // Close() must be called before destroying an initialized bit reader.
+    // Invalid bit readers will have a nullptr in first_byte_.
+    JXL_ASSERT(close_called_ || !first_byte_);
+  }
+
+  // Move operator needs to invalidate the other BitReader such that it is
+  // irrelevant if we call Close() on it or not.
+  BitReader& operator=(BitReader&& other) noexcept {
+    // Ensure the current instance was already closed, before we overwrite it
+    // with other.
+    JXL_ASSERT(close_called_ || !first_byte_);
+
+    JXL_DASSERT(!other.close_called_);
+    buf_ = other.buf_;
+    bits_in_buf_ = other.bits_in_buf_;
+    next_byte_ = other.next_byte_;
+    end_minus_8_ = other.end_minus_8_;
+    first_byte_ = other.first_byte_;
+    overread_bytes_ = other.overread_bytes_;
+    close_called_ = other.close_called_;
+
+    other.first_byte_ = nullptr;
+    other.next_byte_ = nullptr;
+    return *this;
+  }
+  BitReader& operator=(const BitReader& other) = delete;
+
+  // For time-critical reads, refills can be shared by multiple reads.
+  // Based on variant 4 (plus bounds-checking), see
+  // fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/
+  JXL_INLINE void Refill() {
+    if (JXL_UNLIKELY(next_byte_ > end_minus_8_)) {
+      BoundsCheckedRefill();
+    } else {
+      // It's safe to load 64 bits; insert valid (possibly nonzero) bits above
+      // bits_in_buf_. The shift requires bits_in_buf_ < 64.
+      buf_ |= LoadLE64(next_byte_) << bits_in_buf_;
+
+      // Advance by bytes fully absorbed into the buffer.
+      next_byte_ += (63 - bits_in_buf_) >> 3;
+
+      // We absorbed a multiple of 8 bits, so the lower 3 bits of bits_in_buf_
+      // must remain unchanged, otherwise the next refill's shifted bits will
+      // not align with buf_. Set the three upper bits so the result >= 56.
+      bits_in_buf_ |= 56;
+      JXL_DASSERT(56 <= bits_in_buf_ && bits_in_buf_ < 64);
+    }
+  }
+
+  // Returns the bits that would be returned by Read without calling Advance().
+  // It is legal to PEEK at more bits than present in the bitstream (required
+  // by Huffman), and those bits will be zero.
+  template <size_t N>
+  JXL_INLINE uint64_t PeekFixedBits() const {
+    static_assert(N <= kMaxBitsPerCall, "Reading too many bits in one call.");
+    JXL_DASSERT(!close_called_);
+    return buf_ & ((1ULL << N) - 1);
+  }
+
+  JXL_INLINE uint64_t PeekBits(size_t nbits) const {
+    JXL_DASSERT(nbits <= kMaxBitsPerCall);
+    JXL_DASSERT(!close_called_);
+
+    // Slightly faster but requires BMI2. It is infeasible to make the many
+    // callers reside between begin/end_target, especially because only the
+    // callers in dec_ans are time-critical. Therefore only enabled if the
+    // entire binary is compiled for (and thus requires) BMI2.
+#if defined(__BMI2__) && defined(__x86_64__)
+    return _bzhi_u64(buf_, nbits);
+#else
+    const uint64_t mask = (1ULL << nbits) - 1;
+    return buf_ & mask;
+#endif
+  }
+
+  // Removes bits from the buffer. Need not match the previous Peek size, but
+  // the buffer must contain at least num_bits (this prevents consuming more
+  // than the total number of bits).
+  JXL_INLINE void Consume(size_t num_bits) {
+    JXL_DASSERT(!close_called_);
+    JXL_DASSERT(bits_in_buf_ >= num_bits);
+#ifdef JXL_CRASH_ON_ERROR
+    // When JXL_CRASH_ON_ERROR is defined, it is a fatal error to read more bits
+    // than available in the stream. A non-zero overread_bytes_ implies that
+    // next_byte_ is already at the end of the stream, so we don't need to
+    // check that.
+    JXL_ASSERT(bits_in_buf_ >= num_bits + overread_bytes_ * kBitsPerByte);
+#endif
+    bits_in_buf_ -= num_bits;
+    buf_ >>= num_bits;
+  }
+
+  JXL_INLINE uint64_t ReadBits(size_t nbits) {
+    JXL_DASSERT(!close_called_);
+    Refill();
+    const uint64_t bits = PeekBits(nbits);
+    Consume(nbits);
+    return bits;
+  }
+
+  template <size_t N>
+  JXL_INLINE uint64_t ReadFixedBits() {
+    JXL_DASSERT(!close_called_);
+    Refill();
+    const uint64_t bits = PeekFixedBits<N>();
+    Consume(N);
+    return bits;
+  }
+
+  // Equivalent to calling ReadFixedBits(1) `skip` times, but much faster.
+  // `skip` is typically large.
+  void SkipBits(size_t skip) {
+    JXL_DASSERT(!close_called_);
+    // Buffer is large enough - don't zero buf_ below.
+    if (JXL_UNLIKELY(skip <= bits_in_buf_)) {
+      Consume(skip);
+      return;
+    }
+
+    // First deduct what we can satisfy from the buffer
+    skip -= bits_in_buf_;
+    bits_in_buf_ = 0;
+    // Not enough to call Advance - that may leave some bits in the buffer
+    // which were previously ABOVE bits_in_buf.
+    buf_ = 0;
+
+    // Skip whole bytes
+    const size_t whole_bytes = skip / kBitsPerByte;
+    skip %= kBitsPerByte;
+    if (JXL_UNLIKELY(whole_bytes >
+                     static_cast<size_t>(end_minus_8_ + 8 - next_byte_))) {
+      // This is already an overflow condition (skipping past the end of the bit
+      // stream). However if we increase next_byte_ too much we risk overflowing
+      // that value and potentially making it valid again (next_byte_ < end).
+      // This will set next_byte_ to the end of the stream and still consume
+      // some bits in overread_bytes_, however the TotalBitsConsumed() will be
+      // incorrect (still larger than the TotalBytes()).
+      next_byte_ = end_minus_8_ + 8;
+      skip += kBitsPerByte;
+    } else {
+      next_byte_ += whole_bytes;
+    }
+
+    Refill();
+    Consume(skip);
+  }
+
+  size_t TotalBitsConsumed() const {
+    const size_t bytes_read = static_cast<size_t>(next_byte_ - first_byte_);
+    return (bytes_read + overread_bytes_) * kBitsPerByte - bits_in_buf_;
+  }
+
+  Status JumpToByteBoundary() {
+    const size_t remainder = TotalBitsConsumed() % kBitsPerByte;
+    if (remainder == 0) return true;
+    if (JXL_UNLIKELY(ReadBits(kBitsPerByte - remainder) != 0)) {
+      return JXL_FAILURE("Non-zero padding bits");
+    }
+    return true;
+  }
+
+  // For interoperability with other bitreaders (for resuming at
+  // non-byte-aligned positions).
+  const uint8_t* FirstByte() const { return first_byte_; }
+  size_t TotalBytes() const {
+    return static_cast<size_t>(end_minus_8_ + 8 - first_byte_);
+  }
+
+  // Returns span of the remaining (unconsumed) bytes, e.g. for passing to
+  // external decoders such as Brotli.
+  Span<const uint8_t> GetSpan() const {
+    JXL_DASSERT(first_byte_ != nullptr);
+    JXL_ASSERT(TotalBitsConsumed() % kBitsPerByte == 0);
+    const size_t offset = TotalBitsConsumed() / kBitsPerByte;  // no remainder
+    JXL_ASSERT(offset <= TotalBytes());
+    return Span<const uint8_t>(first_byte_ + offset, TotalBytes() - offset);
+  }
+
+  // Returns whether all the bits read so far have been within the input bounds.
+  // When reading past the EOF, the Read*() and Consume() functions return zeros
+  // but flag a failure when calling Close() without checking this function.
+  Status AllReadsWithinBounds() {
+    // Mark up to which point the user checked the out of bounds condition. If
+    // the user handles the condition at higher level (e.g. fetch more bytes
+    // from network, return a custom JXL_FAILURE, ...), Close() should not
+    // output a debug error (which would break tests with JXL_CRASH_ON_ERROR
+    // even when legitimately handling the situation at higher level). This is
+    // used by Bundle::CanRead.
+    checked_out_of_bounds_bits_ = TotalBitsConsumed();
+    if (TotalBitsConsumed() > TotalBytes() * kBitsPerByte) {
+      return false;
+    }
+    return true;
+  }
+
+  // Close the bit reader and return whether all the previous reads were
+  // successful. Close must be called once.
+  Status Close() {
+    JXL_DASSERT(!close_called_);
+    close_called_ = true;
+    if (!first_byte_) return true;
+    if (TotalBitsConsumed() > checked_out_of_bounds_bits_ &&
+        TotalBitsConsumed() > TotalBytes() * kBitsPerByte) {
+      return JXL_FAILURE("Read more bits than available in the bit_reader");
+    }
+    return true;
+  }
+
+ private:
+  // Separate function avoids inlining this relatively cold code into callers.
+  JXL_NOINLINE void BoundsCheckedRefill() {
+    PROFILER_FUNC;
+    const uint8_t* end = end_minus_8_ + 8;
+
+    // Read whole bytes until we have [56, 64) bits (same as LoadLE64)
+    for (; bits_in_buf_ < 64 - kBitsPerByte; bits_in_buf_ += kBitsPerByte) {
+      if (next_byte_ >= end) break;
+      buf_ |= static_cast<uint64_t>(*next_byte_++) << bits_in_buf_;
+    }
+    JXL_DASSERT(bits_in_buf_ < 64);
+
+    // Add extra bytes as 0 at the end of the stream in the bit_buffer_. If
+    // these bits are read, Close() will return a failure.
+    size_t extra_bytes = (63 - bits_in_buf_) / kBitsPerByte;
+    overread_bytes_ += extra_bytes;
+    bits_in_buf_ += extra_bytes * kBitsPerByte;
+
+    JXL_DASSERT(bits_in_buf_ < 64);
+    JXL_DASSERT(bits_in_buf_ >= 56);
+  }
+
+  JXL_NOINLINE uint32_t BoundsCheckedReadByteAlignedWord() {
+    if (next_byte_ + 1 < end_minus_8_ + 8) {
+      uint32_t ret = LoadLE16(next_byte_);
+      next_byte_ += 2;
+      return ret;
+    }
+    overread_bytes_ += 2;
+    return 0;
+  }
+
+  uint64_t buf_;
+  size_t bits_in_buf_;  // [0, 64)
+  const uint8_t* JXL_RESTRICT next_byte_;
+  const uint8_t* end_minus_8_;  // for refill bounds check
+  const uint8_t* first_byte_;   // for GetSpan
+
+  // Number of bytes past the end that were loaded into the buf_. These bytes
+  // are not read from memory, but instead assumed 0. It is an error (likely due
+  // to an invalid stream) to Consume() more bits than specified in the range
+  // passed to the constructor.
+  uint64_t overread_bytes_{0};
+  bool close_called_{false};
+
+  uint64_t checked_out_of_bounds_bits_{0};
+};
+
+// Closes a BitReader when the BitReaderScopedCloser goes out of scope. When
+// closing the bit reader, if the status result was failure it sets this failure
+// to the passed variable pointer. Typical usage.
+//
+// Status ret = true;
+// {
+//   BitReader reader(...);
+//   BitReaderScopedCloser reader_closer(&reader, &ret);
+//
+//   // ... code that can return errors here ...
+// }
+// // ... more code that doesn't use the BitReader.
+// return ret;
+
+class BitReaderScopedCloser {
+ public:
+  BitReaderScopedCloser(BitReader* reader, Status* status)
+      : reader_(reader), status_(status) {
+    JXL_DASSERT(reader_ != nullptr);
+    JXL_DASSERT(status_ != nullptr);
+  }
+  ~BitReaderScopedCloser() {
+    if (reader_ != nullptr) {
+      Status close_ret = reader_->Close();
+      if (!close_ret) *status_ = close_ret;
+    }
+  }
+  void CloseAndSuppressError() {
+    JXL_ASSERT(reader_ != nullptr);
+    (void)reader_->Close();
+    reader_ = nullptr;
+  }
+  BitReaderScopedCloser(const BitReaderScopedCloser&) = delete;
+
+ private:
+  BitReader* reader_;
+  Status* status_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_BIT_READER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc
new file mode 100644
index 0000000000..e40a97fcb9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.cc
@@ -0,0 +1,170 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_cache.h"
+
+#include "lib/jxl/dec_reconstruct.h"
+
+namespace jxl {
+
+void PassesDecoderState::EnsureBordersStorage() {
+  if (!EagerFinalizeImageRect()) return;
+  size_t padding = FinalizeRectPadding();
+  size_t bordery = 2 * padding;
+  size_t borderx = padding + group_border_assigner.PaddingX(padding);
+  Rect horizontal = Rect(0, 0, shared->frame_dim.xsize_padded,
+                         bordery * shared->frame_dim.ysize_groups * 2);
+  if (!SameSize(horizontal, borders_horizontal)) {
+    borders_horizontal = Image3F(horizontal.xsize(), horizontal.ysize());
+  }
+  Rect vertical = Rect(0, 0, borderx * shared->frame_dim.xsize_groups * 2,
+                       shared->frame_dim.ysize_padded);
+  if (!SameSize(vertical, borders_vertical)) {
+    borders_vertical = Image3F(vertical.xsize(), vertical.ysize());
+  }
+}
+
+namespace {
+void SaveBorders(const Rect& block_rect, size_t hshift, size_t vshift,
+                 size_t padding, const ImageF& plane_in,
+                 ImageF* border_storage_h, ImageF* border_storage_v) {
+  constexpr size_t kGroupDataXBorder = PassesDecoderState::kGroupDataXBorder;
+  constexpr size_t kGroupDataYBorder = PassesDecoderState::kGroupDataYBorder;
+  size_t x0 = DivCeil(block_rect.x0() * kBlockDim, 1 << hshift);
+  size_t x1 =
+      DivCeil((block_rect.x0() + block_rect.xsize()) * kBlockDim, 1 << hshift);
+  size_t y0 = DivCeil(block_rect.y0() * kBlockDim, 1 << vshift);
+  size_t y1 =
+      DivCeil((block_rect.y0() + block_rect.ysize()) * kBlockDim, 1 << vshift);
+  size_t gy = block_rect.y0() / kGroupDimInBlocks;
+  size_t gx = block_rect.x0() / kGroupDimInBlocks;
+  // TODO(veluca): this is too much with chroma upsampling. It's just
+  // inefficient though.
+  size_t borderx = GroupBorderAssigner::PaddingX(padding);
+  size_t bordery = padding;
+  size_t borderx_write = padding + borderx;
+  size_t bordery_write = padding + bordery;
+  CopyImageTo(
+      Rect(kGroupDataXBorder, kGroupDataYBorder, x1 - x0, bordery_write),
+      plane_in, Rect(x0, (gy * 2) * bordery_write, x1 - x0, bordery_write),
+      border_storage_h);
+  CopyImageTo(
+      Rect(kGroupDataXBorder, kGroupDataYBorder + y1 - y0 - bordery_write,
+           x1 - x0, bordery_write),
+      plane_in, Rect(x0, (gy * 2 + 1) * bordery_write, x1 - x0, bordery_write),
+      border_storage_h);
+  CopyImageTo(
+      Rect(kGroupDataXBorder, kGroupDataYBorder, borderx_write, y1 - y0),
+      plane_in, Rect((gx * 2) * borderx_write, y0, borderx_write, y1 - y0),
+      border_storage_v);
+  CopyImageTo(Rect(kGroupDataXBorder + x1 - x0 - borderx_write,
+                   kGroupDataYBorder, borderx_write, y1 - y0),
+              plane_in,
+              Rect((gx * 2 + 1) * borderx_write, y0, borderx_write, y1 - y0),
+              border_storage_v);
+}
+
+void LoadBorders(const Rect& block_rect, size_t hshift, size_t vshift,
+                 const FrameDimensions& frame_dim, size_t padding,
+                 const ImageF& border_storage_h, const ImageF& border_storage_v,
+                 const Rect& r, ImageF* plane_out) {
+  constexpr size_t kGroupDataXBorder = PassesDecoderState::kGroupDataXBorder;
+  constexpr size_t kGroupDataYBorder = PassesDecoderState::kGroupDataYBorder;
+  size_t x0 = DivCeil(block_rect.x0() * kBlockDim, 1 << hshift);
+  size_t x1 =
+      DivCeil((block_rect.x0() + block_rect.xsize()) * kBlockDim, 1 << hshift);
+  size_t y0 = DivCeil(block_rect.y0() * kBlockDim, 1 << vshift);
+  size_t y1 =
+      DivCeil((block_rect.y0() + block_rect.ysize()) * kBlockDim, 1 << vshift);
+  size_t gy = block_rect.y0() / kGroupDimInBlocks;
+  size_t gx = block_rect.x0() / kGroupDimInBlocks;
+  size_t borderx = GroupBorderAssigner::PaddingX(padding);
+  size_t bordery = padding;
+  size_t borderx_write = padding + borderx;
+  size_t bordery_write = padding + bordery;
+  // Limits of the area to copy from, in image coordinates.
+  JXL_DASSERT(r.x0() == 0 || r.x0() >= borderx);
+  size_t x0src = DivCeil(r.x0() == 0 ? r.x0() : r.x0() - borderx, 1 << hshift);
+  size_t x1src =
+      DivCeil(r.x0() + r.xsize() +
+                  (r.x0() + r.xsize() == frame_dim.xsize_padded ? 0 : borderx),
+              1 << hshift);
+  JXL_DASSERT(r.y0() == 0 || r.y0() >= bordery);
+  size_t y0src = DivCeil(r.y0() == 0 ? r.y0() : r.y0() - bordery, 1 << vshift);
+  size_t y1src =
+      DivCeil(r.y0() + r.ysize() +
+                  (r.y0() + r.ysize() == frame_dim.ysize_padded ? 0 : bordery),
+              1 << vshift);
+  // Copy other groups' borders from the border storage.
+  if (y0src < y0) {
+    CopyImageTo(
+        Rect(x0src, (gy * 2 - 1) * bordery_write, x1src - x0src, bordery_write),
+        border_storage_h,
+        Rect(kGroupDataXBorder + x0src - x0, kGroupDataYBorder - bordery_write,
+             x1src - x0src, bordery_write),
+        plane_out);
+  }
+  if (y1src > y1) {
+    CopyImageTo(
+        Rect(x0src, (gy * 2 + 2) * bordery_write, x1src - x0src, bordery_write),
+        border_storage_h,
+        Rect(kGroupDataXBorder + x0src - x0, kGroupDataYBorder + y1 - y0,
+             x1src - x0src, bordery_write),
+        plane_out);
+  }
+  if (x0src < x0) {
+    CopyImageTo(
+        Rect((gx * 2 - 1) * borderx_write, y0src, borderx_write, y1src - y0src),
+        border_storage_v,
+        Rect(kGroupDataXBorder - borderx_write, kGroupDataYBorder + y0src - y0,
+             borderx_write, y1src - y0src),
+        plane_out);
+  }
+  if (x1src > x1) {
+    CopyImageTo(
+        Rect((gx * 2 + 2) * borderx_write, y0src, borderx_write, y1src - y0src),
+        border_storage_v,
+        Rect(kGroupDataXBorder + x1 - x0, kGroupDataYBorder + y0src - y0,
+             borderx_write, y1src - y0src),
+        plane_out);
+  }
+}
+
+}  // namespace
+
+Status PassesDecoderState::FinalizeGroup(size_t group_idx, size_t thread,
+                                         Image3F* pixel_data,
+                                         ImageBundle* output) {
+  // Copy the group borders to the border storage.
+  const Rect block_rect = shared->BlockGroupRect(group_idx);
+  const YCbCrChromaSubsampling& cs = shared->frame_header.chroma_subsampling;
+  size_t padding = FinalizeRectPadding();
+  for (size_t c = 0; c < 3; c++) {
+    SaveBorders(block_rect, cs.HShift(c), cs.VShift(c), padding,
+                pixel_data->Plane(c), &borders_horizontal.Plane(c),
+                &borders_vertical.Plane(c));
+  }
+  Rect fir_rects[GroupBorderAssigner::kMaxToFinalize];
+  size_t num_fir_rects = 0;
+  group_border_assigner.GroupDone(group_idx, FinalizeRectPadding(), fir_rects,
+                                  &num_fir_rects);
+  for (size_t i = 0; i < num_fir_rects; i++) {
+    const Rect& r = fir_rects[i];
+    for (size_t c = 0; c < 3; c++) {
+      LoadBorders(block_rect, cs.HShift(c), cs.VShift(c), shared->frame_dim,
+                  padding, borders_horizontal.Plane(c),
+                  borders_vertical.Plane(c), r, &pixel_data->Plane(c));
+    }
+    Rect pixel_data_rect(
+        kGroupDataXBorder + r.x0() - block_rect.x0() * kBlockDim,
+        kGroupDataYBorder + r.y0() - block_rect.y0() * kBlockDim, r.xsize(),
+        r.ysize());
+    JXL_RETURN_IF_ERROR(FinalizeImageRect(pixel_data, pixel_data_rect, {}, this,
+                                          thread, output, r));
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.h
new file mode 100644
index 0000000000..85322aa3b5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_cache.h
@@ -0,0 +1,411 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_CACHE_H_
+#define LIB_JXL_DEC_CACHE_H_
+
+#include <stdint.h>
+
+#include <hwy/base.h>  // HWY_ALIGN_MAX
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_group_border.h"
+#include "lib/jxl/dec_noise.h"
+#include "lib/jxl/dec_upsample.h"
+#include "lib/jxl/filters.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+// Per-frame decoder state. All the images here should be accessed through a
+// group rect (either with block units or pixel units).
+struct PassesDecoderState {
+  PassesSharedState shared_storage;
+  // Allows avoiding copies for encoder loop.
+  const PassesSharedState* JXL_RESTRICT shared = &shared_storage;
+
+  // Upsamplers for all the possible upsampling factors (2 to 8).
+  Upsampler upsamplers[3];
+
+  // Storage for RNG output for noise synthesis.
+  Image3F noise;
+
+  // Storage for pre-color-transform output for displayed
+  // save_before_color_transform frames.
+  Image3F pre_color_transform_frame;
+  // Non-empty (contains originals) if extra-channels were cropped.
+  std::vector<ImageF> pre_color_transform_ec;
+
+  // For ANS decoding.
+  std::vector<ANSCode> code;
+  std::vector<std::vector<uint8_t>> context_map;
+
+  // Multiplier to be applied to the quant matrices of the x channel.
+  float x_dm_multiplier;
+  float b_dm_multiplier;
+
+  // Decoded image.
+  Image3F decoded;
+  std::vector<ImageF> extra_channels;
+
+  // Borders between groups. Only allocated if `decoded` is *not* allocated.
+  // We also store the extremal borders for simplicity. Horizontal borders are
+  // stored in an image as wide as the main frame, in top-to-bottom order (top
+  // border of a group first, followed by the bottom border, followed by top
+  // border of the next group). Vertical borders are similarly stored.
+  Image3F borders_horizontal;
+  Image3F borders_vertical;
+
+  // RGB8 output buffer. If not nullptr, image data will be written to this
+  // buffer instead of being written to the output ImageBundle. The image data
+  // is assumed to have the stride given by `rgb_stride`, hence row `i` starts
+  // at position `i * rgb_stride`.
+  uint8_t* rgb_output;
+  size_t rgb_stride = 0;
+
+  // Whether to use int16 float-XYB-to-uint8-srgb conversion.
+  bool fast_xyb_srgb8_conversion;
+
+  // If true, rgb_output or callback output is RGBA using 4 instead of 3 bytes
+  // per pixel.
+  bool rgb_output_is_rgba;
+
+  // Callback for line-by-line output.
+  std::function<void(const float*, size_t, size_t, size_t)> pixel_callback;
+  // Buffer of upsampling * kApplyImageFeaturesTileDim ones.
+  std::vector<float> opaque_alpha;
+  // One row per thread
+  std::vector<std::vector<float>> pixel_callback_rows;
+
+  // Seed for noise, to have different noise per-frame.
+  size_t noise_seed = 0;
+
+  // Keep track of the transform types used.
+  std::atomic<uint32_t> used_acs{0};
+
+  // Storage for coefficients if in "accumulate" mode.
+  std::unique_ptr<ACImage> coefficients = make_unique<ACImageT<int32_t>>(0, 0);
+
+  // Filter application pipeline used by ApplyImageFeatures. One entry is needed
+  // per thread.
+  std::vector<FilterPipeline> filter_pipelines;
+
+  // Input weights used by the filters. These are shared from multiple threads
+  // but are read-only for the filter application.
+  FilterWeights filter_weights;
+
+  // Manages the status of borders.
+  GroupBorderAssigner group_border_assigner;
+
+  // TODO(veluca): this should eventually become "iff no global modular
+  // transform was applied".
+  bool EagerFinalizeImageRect() const {
+    return shared->frame_header.encoding == FrameEncoding::kVarDCT &&
+           shared->frame_header.nonserialized_metadata->m.extra_channel_info
+               .empty();
+  }
+
+  // Amount of padding that will be accessed, in all directions, outside a rect
+  // during a call to FinalizeImageRect().
+  size_t FinalizeRectPadding() const {
+    size_t padding = shared->frame_header.loop_filter.Padding();
+    padding += shared->frame_header.upsampling == 1 ? 0 : 2;
+    JXL_DASSERT(padding <= kMaxFinalizeRectPadding);
+    for (auto ups : shared->frame_header.extra_channel_upsampling) {
+      if (ups > 1) {
+        padding = std::max(padding, size_t{2});
+      }
+    }
+    // We could be making a distinction between h and w padding here, but it is
+    // likely not worth it.
+    if (!shared->frame_header.chroma_subsampling.Is444()) {
+      padding = std::max(padding / 2 + 1, padding);
+    }
+    return padding;
+  }
+
+  // Storage for intermediate data during FinalizeRect steps.
+  // TODO(veluca): these buffers are larger than strictly necessary.
+  std::vector<Image3F> filter_input_storage;
+  std::vector<Image3F> padded_upsampling_input_storage;
+  std::vector<Image3F> upsampling_input_storage;
+  size_t upsampler_arena_size = 0;
+  std::vector<hwy::AlignedFreeUniquePtr<float[]>> upsampler_storage;
+  // We keep four arrays, one per upsampling level, to reduce memory usage in
+  // the common case of no upsampling.
+  std::vector<Image3F> output_pixel_data_storage[4] = {};
+  std::vector<ImageF> ec_temp_images;
+  std::vector<ImageF> ycbcr_temp_images;
+  std::vector<Image3F> ycbcr_out_images;
+
+  // Buffer for decoded pixel data for a group.
+  std::vector<Image3F> group_data;
+  static constexpr size_t kGroupDataYBorder = kMaxFinalizeRectPadding * 2;
+  static constexpr size_t kGroupDataXBorder =
+      RoundUpToBlockDim(kMaxFinalizeRectPadding) * 2 + kBlockDim;
+
+  void EnsureStorage(size_t num_threads) {
+    // We need one filter_storage per thread, ensure we have at least that many.
+    if (shared->frame_header.loop_filter.epf_iters != 0 ||
+        shared->frame_header.loop_filter.gab) {
+      if (filter_pipelines.size() < num_threads) {
+        filter_pipelines.resize(num_threads);
+      }
+    }
+    // We allocate filter_input_storage unconditionally to ensure that the image
+    // is allocated if we need it for DC upsampling.
+    for (size_t _ = filter_input_storage.size(); _ < num_threads; _++) {
+      // Extra padding along the x dimension to ensure memory accesses don't
+      // load out-of-bounds pixels.
+      filter_input_storage.emplace_back(
+          kApplyImageFeaturesTileDim + 2 * kGroupDataXBorder,
+          kApplyImageFeaturesTileDim + 2 * kGroupDataYBorder);
+    }
+    if (shared->frame_header.upsampling != 1) {
+      for (size_t _ = upsampling_input_storage.size(); _ < num_threads; _++) {
+        // At this point, we only need up to 2 pixels of border per side for
+        // upsampling, but we add an extra border for aligned access.
+        upsampling_input_storage.emplace_back(
+            kApplyImageFeaturesTileDim + 2 * kBlockDim,
+            kApplyImageFeaturesTileDim + 4);
+        padded_upsampling_input_storage.emplace_back(
+            kApplyImageFeaturesTileDim + 2 * kBlockDim,
+            kApplyImageFeaturesTileDim + 4);
+      }
+    }
+    const size_t arena_size = Upsampler::GetArenaSize(
+        kApplyImageFeaturesTileDim * shared->frame_header.upsampling);
+    if (arena_size > upsampler_arena_size) upsampler_storage.clear();
+    for (size_t _ = upsampler_storage.size(); _ < num_threads; _++) {
+      upsampler_storage.emplace_back(hwy::AllocateAligned<float>(arena_size));
+    }
+    upsampler_arena_size = arena_size;
+    for (size_t _ = group_data.size(); _ < num_threads; _++) {
+      group_data.emplace_back(kGroupDim + 2 * kGroupDataXBorder,
+                              kGroupDim + 2 * kGroupDataYBorder);
+#if MEMORY_SANITIZER
+      // Avoid errors due to loading vectors on the outermost padding.
+      FillImage(msan::kSanitizerSentinel, &group_data.back());
+#endif
+    }
+    if (!shared->frame_header.chroma_subsampling.Is444()) {
+      for (size_t _ = ycbcr_temp_images.size(); _ < num_threads; _++) {
+        ycbcr_temp_images.emplace_back(kGroupDim + 2 * kGroupDataXBorder,
+                                       kGroupDim + 2 * kGroupDataYBorder);
+        ycbcr_out_images.emplace_back(kGroupDim + 2 * kGroupDataXBorder,
+                                      kGroupDim + 2 * kGroupDataYBorder);
+      }
+    }
+    if (rgb_output || pixel_callback) {
+      size_t log2_upsampling = CeilLog2Nonzero(shared->frame_header.upsampling);
+      for (size_t _ = output_pixel_data_storage[log2_upsampling].size();
+           _ < num_threads; _++) {
+        output_pixel_data_storage[log2_upsampling].emplace_back(
+            kApplyImageFeaturesTileDim << log2_upsampling,
+            kApplyImageFeaturesTileDim << log2_upsampling);
+      }
+      opaque_alpha.resize(
+          kApplyImageFeaturesTileDim * shared->frame_header.upsampling, 1.0f);
+      if (pixel_callback) {
+        pixel_callback_rows.resize(num_threads);
+        for (size_t i = 0; i < pixel_callback_rows.size(); ++i) {
+          pixel_callback_rows[i].resize(kApplyImageFeaturesTileDim *
+                                        shared->frame_header.upsampling *
+                                        (rgb_output_is_rgba ? 4 : 3));
+        }
+      }
+    }
+    if (shared->metadata->m.num_extra_channels * num_threads >
+        ec_temp_images.size()) {
+      ec_temp_images.resize(shared->metadata->m.num_extra_channels *
+                            num_threads);
+    }
+    for (size_t i = 0; i < shared->metadata->m.num_extra_channels; i++) {
+      if (shared->frame_header.extra_channel_upsampling[i] == 1) continue;
+      // We need up to 2 pixels of padding on each side. On the x axis, we round
+      // up padding so that 0 starts at a multiple of kBlockDim.
+      size_t xs = kApplyImageFeaturesTileDim * shared->frame_header.upsampling /
+                      shared->frame_header.extra_channel_upsampling[i] +
+                  2 * kBlockDim;
+      size_t ys = kApplyImageFeaturesTileDim * shared->frame_header.upsampling /
+                      shared->frame_header.extra_channel_upsampling[i] +
+                  4;
+      for (size_t t = 0; t < num_threads; t++) {
+        auto& eti =
+            ec_temp_images[t * shared->metadata->m.num_extra_channels + i];
+        if (eti.xsize() < xs || eti.ysize() < ys) {
+          eti = ImageF(xs, ys);
+        }
+      }
+    }
+  }
+
+  // Information for colour conversions.
+  OutputEncodingInfo output_encoding_info;
+
+  // Initializes decoder-specific structures using information from *shared.
+  Status Init() {
+    x_dm_multiplier =
+        std::pow(1 / (1.25f), shared->frame_header.x_qm_scale - 2.0f);
+    b_dm_multiplier =
+        std::pow(1 / (1.25f), shared->frame_header.b_qm_scale - 2.0f);
+
+    rgb_output = nullptr;
+    pixel_callback = nullptr;
+    rgb_output_is_rgba = false;
+    fast_xyb_srgb8_conversion = false;
+    used_acs = 0;
+
+    group_border_assigner.Init(shared->frame_dim);
+    const LoopFilter& lf = shared->frame_header.loop_filter;
+    JXL_RETURN_IF_ERROR(filter_weights.Init(lf, shared->frame_dim));
+    for (auto& fp : filter_pipelines) {
+      // De-initialize FilterPipelines.
+      fp.num_filters = 0;
+    }
+    for (size_t i = 0; i < 3; i++) {
+      upsamplers[i].Init(2 << i, shared->metadata->transform_data);
+    }
+    return true;
+  }
+
+  // Initialize the decoder state after all of DC is decoded.
+  void InitForAC(ThreadPool* pool) {
+    shared_storage.coeff_order_size = 0;
+    for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+      if (((1 << o) & used_acs) == 0) continue;
+      uint8_t ord = kStrategyOrder[o];
+      shared_storage.coeff_order_size =
+          std::max(kCoeffOrderOffset[3 * (ord + 1)] * kDCTBlockSize,
+                   shared_storage.coeff_order_size);
+    }
+    size_t sz = shared_storage.frame_header.passes.num_passes *
+                shared_storage.coeff_order_size;
+    if (sz > shared_storage.coeff_orders.size()) {
+      shared_storage.coeff_orders.resize(sz);
+    }
+    if (shared->frame_header.flags & FrameHeader::kNoise) {
+      noise = Image3F(shared->frame_dim.xsize_upsampled_padded,
+                      shared->frame_dim.ysize_upsampled_padded);
+      size_t num_x_groups = DivCeil(noise.xsize(), kGroupDim);
+      size_t num_y_groups = DivCeil(noise.ysize(), kGroupDim);
+      PROFILER_ZONE("GenerateNoise");
+      auto generate_noise = [&](int group_index, int _) {
+        size_t gx = group_index % num_x_groups;
+        size_t gy = group_index / num_x_groups;
+        Rect rect(gx * kGroupDim, gy * kGroupDim, kGroupDim, kGroupDim,
+                  noise.xsize(), noise.ysize());
+        RandomImage3(noise_seed + group_index, rect, &noise);
+      };
+      RunOnPool(pool, 0, num_x_groups * num_y_groups, ThreadPool::SkipInit(),
+                generate_noise, "Generate noise");
+      {
+        PROFILER_ZONE("High pass noise");
+        // 4 * (1 - box kernel)
+        WeightsSymmetric5 weights{{HWY_REP4(-3.84)}, {HWY_REP4(0.16)},
+                                  {HWY_REP4(0.16)},  {HWY_REP4(0.16)},
+                                  {HWY_REP4(0.16)},  {HWY_REP4(0.16)}};
+        // TODO(veluca): avoid copy.
+        // TODO(veluca): avoid having a full copy of the image in main memory.
+        ImageF noise_tmp(noise.xsize(), noise.ysize());
+        for (size_t c = 0; c < 3; c++) {
+          Symmetric5(noise.Plane(c), Rect(noise), weights, pool, &noise_tmp);
+          std::swap(noise.Plane(c), noise_tmp);
+        }
+        noise_seed += shared->frame_dim.num_groups;
+      }
+    }
+    EnsureBordersStorage();
+    if (!EagerFinalizeImageRect()) {
+      // decoded must be padded to a multiple of kBlockDim rows since the last
+      // rows may be used by the filters even if they are outside the frame
+      // dimension.
+      decoded = Image3F(shared->frame_dim.xsize_padded,
+                        shared->frame_dim.ysize_padded);
+    }
+#if MEMORY_SANITIZER
+    // Avoid errors due to loading vectors on the outermost padding.
+    FillImage(msan::kSanitizerSentinel, &decoded);
+#endif
+  }
+
+  void EnsureBordersStorage();
+
+  Status FinalizeGroup(size_t group_idx, size_t thread, Image3F* pixel_data,
+                       ImageBundle* output);
+};
+
+// Temp images required for decoding a single group. Reduces memory allocations
+// for large images because we only initialize min(#threads, #groups) instances.
+struct GroupDecCache {
+  void InitOnce(size_t num_passes, size_t used_acs) {
+    PROFILER_FUNC;
+
+    for (size_t i = 0; i < num_passes; i++) {
+      if (num_nzeroes[i].xsize() == 0) {
+        // Allocate enough for a whole group - partial groups on the
+        // right/bottom border just use a subset. The valid size is passed via
+        // Rect.
+
+        num_nzeroes[i] = Image3I(kGroupDimInBlocks, kGroupDimInBlocks);
+      }
+    }
+    size_t max_block_area = 0;
+
+    for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+      AcStrategy acs = AcStrategy::FromRawStrategy(o);
+      if ((used_acs & (1 << o)) == 0) continue;
+      size_t area =
+          acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize;
+      max_block_area = std::max(area, max_block_area);
+    }
+
+    if (max_block_area > max_block_area_) {
+      max_block_area_ = max_block_area;
+      // We need 3x float blocks for dequantized coefficients and 1x for scratch
+      // space for transforms.
+      float_memory_ = hwy::AllocateAligned<float>(max_block_area_ * 4);
+      // We need 3x int32 or int16 blocks for quantized coefficients.
+      int32_memory_ = hwy::AllocateAligned<int32_t>(max_block_area_ * 3);
+      int16_memory_ = hwy::AllocateAligned<int16_t>(max_block_area_ * 3);
+    }
+
+    dec_group_block = float_memory_.get();
+    scratch_space = dec_group_block + max_block_area_ * 3;
+    dec_group_qblock = int32_memory_.get();
+    dec_group_qblock16 = int16_memory_.get();
+  }
+
+  // Scratch space used by DecGroupImpl().
+  float* dec_group_block;
+  int32_t* dec_group_qblock;
+  int16_t* dec_group_qblock16;
+
+  // For TransformToPixels.
+  float* scratch_space;
+  // Note that scratch_space is never used at the same time as dec_group_qblock.
+  // Moreover, only one of dec_group_qblock16 is ever used.
+  // TODO(veluca): figure out if we can save allocations.
+
+  // AC decoding
+  Image3I num_nzeroes[kMaxNumPasses];
+
+ private:
+  hwy::AlignedFreeUniquePtr<float[]> float_memory_;
+  hwy::AlignedFreeUniquePtr<int32_t[]> int32_memory_;
+  hwy::AlignedFreeUniquePtr<int16_t[]> int16_memory_;
+  size_t max_block_area_ = 0;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_CACHE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc
new file mode 100644
index 0000000000..f7fc3d27a4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.cc
@@ -0,0 +1,105 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_context_map.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/entropy_coder.h"
+
+namespace jxl {
+
+namespace {
+
+void MoveToFront(uint8_t* v, uint8_t index) {
+  uint8_t value = v[index];
+  uint8_t i = index;
+  for (; i; --i) v[i] = v[i - 1];
+  v[0] = value;
+}
+
+void InverseMoveToFrontTransform(uint8_t* v, int v_len) {
+  uint8_t mtf[256];
+  int i;
+  for (i = 0; i < 256; ++i) {
+    mtf[i] = static_cast<uint8_t>(i);
+  }
+  for (i = 0; i < v_len; ++i) {
+    uint8_t index = v[i];
+    v[i] = mtf[index];
+    if (index) MoveToFront(mtf, index);
+  }
+}
+
+bool VerifyContextMap(const std::vector<uint8_t>& context_map,
+                      const size_t num_htrees) {
+  std::vector<bool> have_htree(num_htrees);
+  size_t num_found = 0;
+  for (const uint8_t htree : context_map) {
+    if (htree >= num_htrees) {
+      return JXL_FAILURE("Invalid histogram index in context map.");
+    }
+    if (!have_htree[htree]) {
+      have_htree[htree] = true;
+      ++num_found;
+    }
+  }
+  if (num_found != num_htrees) {
+    return JXL_FAILURE("Incomplete context map.");
+  }
+  return true;
+}
+
+}  // namespace
+
+bool DecodeContextMap(std::vector<uint8_t>* context_map, size_t* num_htrees,
+                      BitReader* input) {
+  bool is_simple = input->ReadFixedBits<1>();
+  if (is_simple) {
+    int bits_per_entry = input->ReadFixedBits<2>();
+    if (bits_per_entry != 0) {
+      for (size_t i = 0; i < context_map->size(); i++) {
+        (*context_map)[i] = input->ReadBits(bits_per_entry);
+      }
+    } else {
+      std::fill(context_map->begin(), context_map->end(), 0);
+    }
+  } else {
+    bool use_mtf = input->ReadFixedBits<1>();
+    ANSCode code;
+    std::vector<uint8_t> dummy_ctx_map;
+    // Usage of LZ77 is disallowed if decoding only two symbols. This doesn't
+    // make sense in non-malicious bitstreams, and could cause a stack overflow
+    // in malicious bitstreams by making every context map require its own
+    // context map.
+    JXL_RETURN_IF_ERROR(
+        DecodeHistograms(input, 1, &code, &dummy_ctx_map,
+                         /*disallow_lz77=*/context_map->size() <= 2));
+    ANSSymbolReader reader(&code, input);
+    size_t i = 0;
+    while (i < context_map->size()) {
+      uint32_t sym = reader.ReadHybridUint(0, input, dummy_ctx_map);
+      if (sym >= kMaxClusters) {
+        return JXL_FAILURE("Invalid cluster ID");
+      }
+      (*context_map)[i] = sym;
+      i++;
+    }
+    if (!reader.CheckANSFinalState()) {
+      return JXL_FAILURE("Invalid context map");
+    }
+    if (use_mtf) {
+      InverseMoveToFrontTransform(context_map->data(), context_map->size());
+    }
+  }
+  *num_htrees = *std::max_element(context_map->begin(), context_map->end()) + 1;
+  return VerifyContextMap(*context_map, *num_htrees);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.h
new file mode 100644
index 0000000000..1db2317827
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_context_map.h
@@ -0,0 +1,30 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_CONTEXT_MAP_H_
+#define LIB_JXL_DEC_CONTEXT_MAP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/dec_bit_reader.h"
+
+namespace jxl {
+
+// Context map uses uint8_t.
+constexpr size_t kMaxClusters = 256;
+
+// Reads the context map from the bit stream. On calling this function,
+// context_map->size() must be the number of possible context ids.
+// Sets *num_htrees to the number of different histogram ids in
+// *context_map.
+bool DecodeContextMap(std::vector<uint8_t>* context_map, size_t* num_htrees,
+                      BitReader* input);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_CONTEXT_MAP_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc
new file mode 100644
index 0000000000..bb9196da23
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.cc
@@ -0,0 +1,494 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_external_image.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_external_image.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+void FloatToU32(const float* in, uint32_t* out, size_t num, float mul,
+                size_t bits_per_sample) {
+  // TODO(eustas): investigate 24..31 bpp cases.
+  if (bits_per_sample == 32) {
+    // Conversion to real 32-bit *unsigned* integers requires more intermediate
+    // precision that what is given by the usual f32 -> i32 conversion
+    // instructions, so we run the non-SIMD path for those.
+    const uint32_t cap = (1ull << bits_per_sample) - 1;
+    for (size_t x = 0; x < num; x++) {
+      float v = in[x];
+      if (v >= 1.0f) {
+        out[x] = cap;
+      } else if (v >= 0.0f) {  // Inverted condition => NaN -> 0.
+        out[x] = static_cast<uint32_t>(v * mul + 0.5f);
+      } else {
+        out[x] = 0;
+      }
+    }
+    return;
+  }
+
+  // General SIMD case for less than 32 bits output.
+  const HWY_FULL(float) d;
+  const hwy::HWY_NAMESPACE::Rebind<uint32_t, decltype(d)> du;
+
+  // Unpoison accessing partially-uninitialized vectors with memory sanitizer.
+  // This is because we run NearestInt() on the vector, which triggers msan even
+  // it it safe to do so since the values are not mixed between lanes.
+  const size_t num_round_up = RoundUpTo(num, Lanes(d));
+  msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
+
+  const auto one = Set(d, 1.0f);
+  const auto scale = Set(d, mul);
+  for (size_t x = 0; x < num; x += Lanes(d)) {
+    auto v = Load(d, in + x);
+    // Clamp turns NaN to 'min'.
+    v = Clamp(v, Zero(d), one);
+    auto i = NearestInt(v * scale);
+    Store(BitCast(du, i), du, out + x);
+  }
+
+  // Poison back the output.
+  msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
+}
+
+void FloatToF16(const float* in, hwy::float16_t* out, size_t num) {
+  const HWY_FULL(float) d;
+  const hwy::HWY_NAMESPACE::Rebind<hwy::float16_t, decltype(d)> du;
+
+  // Unpoison accessing partially-uninitialized vectors with memory sanitizer.
+  // This is because we run DemoteTo() on the vector which triggers msan.
+  const size_t num_round_up = RoundUpTo(num, Lanes(d));
+  msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
+
+  for (size_t x = 0; x < num; x += Lanes(d)) {
+    auto v = Load(d, in + x);
+    auto v16 = DemoteTo(du, v);
+    Store(v16, du, out + x);
+  }
+
+  // Poison back the output.
+  msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+namespace {
+
+// Stores a float in big endian
+void StoreBEFloat(float value, uint8_t* p) {
+  uint32_t u;
+  memcpy(&u, &value, 4);
+  StoreBE32(u, p);
+}
+
+// Stores a float in little endian
+void StoreLEFloat(float value, uint8_t* p) {
+  uint32_t u;
+  memcpy(&u, &value, 4);
+  StoreLE32(u, p);
+}
+
+// The orientation may not be identity.
+// TODO(lode): SIMDify where possible
+template <typename T>
+void UndoOrientation(jxl::Orientation undo_orientation, const Plane<T>& image,
+                     Plane<T>& out, jxl::ThreadPool* pool) {
+  const size_t xsize = image.xsize();
+  const size_t ysize = image.ysize();
+
+  if (undo_orientation == Orientation::kFlipHorizontal) {
+    out = Plane<T>(xsize, ysize);
+    RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+        [&](const int task, int /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          T* JXL_RESTRICT row_out = out.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            row_out[xsize - x - 1] = row_in[x];
+          }
+        },
+        "UndoOrientation");
+  } else if (undo_orientation == Orientation::kRotate180) {
+    out = Plane<T>(xsize, ysize);
+    RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+        [&](const int task, int /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          T* JXL_RESTRICT row_out = out.Row(ysize - y - 1);
+          for (size_t x = 0; x < xsize; ++x) {
+            row_out[xsize - x - 1] = row_in[x];
+          }
+        },
+        "UndoOrientation");
+  } else if (undo_orientation == Orientation::kFlipVertical) {
+    out = Plane<T>(xsize, ysize);
+    RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+        [&](const int task, int /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          T* JXL_RESTRICT row_out = out.Row(ysize - y - 1);
+          for (size_t x = 0; x < xsize; ++x) {
+            row_out[x] = row_in[x];
+          }
+        },
+        "UndoOrientation");
+  } else if (undo_orientation == Orientation::kTranspose) {
+    out = Plane<T>(ysize, xsize);
+    RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+        [&](const int task, int /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            out.Row(x)[y] = row_in[x];
+          }
+        },
+        "UndoOrientation");
+  } else if (undo_orientation == Orientation::kRotate90) {
+    out = Plane<T>(ysize, xsize);
+    RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+        [&](const int task, int /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            out.Row(x)[ysize - y - 1] = row_in[x];
+          }
+        },
+        "UndoOrientation");
+  } else if (undo_orientation == Orientation::kAntiTranspose) {
+    out = Plane<T>(ysize, xsize);
+    RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+        [&](const int task, int /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            out.Row(xsize - x - 1)[ysize - y - 1] = row_in[x];
+          }
+        },
+        "UndoOrientation");
+  } else if (undo_orientation == Orientation::kRotate270) {
+    out = Plane<T>(ysize, xsize);
+    RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+        [&](const int task, int /*thread*/) {
+          const int64_t y = task;
+          const T* JXL_RESTRICT row_in = image.Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            out.Row(xsize - x - 1)[y] = row_in[x];
+          }
+        },
+        "UndoOrientation");
+  }
+}
+}  // namespace
+
+HWY_EXPORT(FloatToU32);
+HWY_EXPORT(FloatToF16);
+
+namespace {
+
+using StoreFuncType = void(uint32_t value, uint8_t* dest);
+template <StoreFuncType StoreFunc>
+void StoreUintRow(uint32_t* JXL_RESTRICT* rows_u32, size_t num_channels,
+                  size_t xsize, size_t bytes_per_sample,
+                  uint8_t* JXL_RESTRICT out) {
+  for (size_t x = 0; x < xsize; ++x) {
+    for (size_t c = 0; c < num_channels; c++) {
+      StoreFunc(rows_u32[c][x],
+                out + (num_channels * x + c) * bytes_per_sample);
+    }
+  }
+}
+
+template <void(StoreFunc)(float, uint8_t*)>
+void StoreFloatRow(const float* JXL_RESTRICT* rows_in, size_t num_channels,
+                   size_t xsize, uint8_t* JXL_RESTRICT out) {
+  for (size_t x = 0; x < xsize; ++x) {
+    for (size_t c = 0; c < num_channels; c++) {
+      StoreFunc(rows_in[c][x], out + (num_channels * x + c) * sizeof(float));
+    }
+  }
+}
+
+void JXL_INLINE Store8(uint32_t value, uint8_t* dest) { *dest = value & 0xff; }
+
+}  // namespace
+
+Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample,
+                         bool float_out, size_t num_channels,
+                         JxlEndianness endianness, size_t stride,
+                         jxl::ThreadPool* pool, void* out_image,
+                         size_t out_size, JxlImageOutCallback out_callback,
+                         void* out_opaque, jxl::Orientation undo_orientation) {
+  if (bits_per_sample < 1 || bits_per_sample > 32) {
+    return JXL_FAILURE("Invalid bits_per_sample value.");
+  }
+  if (!!out_image == !!out_callback) {
+    return JXL_FAILURE(
+        "Must provide either an out_image or an out_callback, but not both.");
+  }
+  // TODO(deymo): Implement 1-bit per pixel packed in 8 samples per byte.
+  if (bits_per_sample == 1) {
+    return JXL_FAILURE("packed 1-bit per sample is not yet supported");
+  }
+  size_t xsize = ib.xsize();
+  size_t ysize = ib.ysize();
+
+  bool want_alpha = num_channels == 2 || num_channels == 4;
+  size_t color_channels = num_channels <= 2 ? 1 : 3;
+
+  // bytes_per_channel and is only valid for bits_per_sample > 1.
+  const size_t bytes_per_channel = DivCeil(bits_per_sample, jxl::kBitsPerByte);
+  const size_t bytes_per_pixel = num_channels * bytes_per_channel;
+
+  const Image3F* color = &ib.color();
+  Image3F temp_color, unpremul;
+  const ImageF* alpha = ib.HasAlpha() ? &ib.alpha() : nullptr;
+  ImageF temp_alpha;
+
+  std::vector<std::vector<uint8_t>> row_out_callback;
+  auto InitOutCallback = [&](size_t num_threads) {
+    if (out_callback) {
+      row_out_callback.resize(num_threads);
+      for (size_t i = 0; i < num_threads; ++i) {
+        row_out_callback[i].resize(stride);
+      }
+    }
+  };
+
+  if (ib.AlphaIsPremultiplied() && ib.HasAlpha()) {
+    unpremul = Image3F(color->xsize(), color->ysize());
+    CopyImageTo(*color, &unpremul);
+    for (size_t y = 0; y < unpremul.ysize(); y++) {
+      UnpremultiplyAlpha(unpremul.PlaneRow(0, y), unpremul.PlaneRow(1, y),
+                         unpremul.PlaneRow(2, y), alpha->Row(y),
+                         unpremul.xsize());
+    }
+    color = &unpremul;
+  }
+  if (undo_orientation != Orientation::kIdentity) {
+    Image3F transformed;
+    for (size_t c = 0; c < color_channels; ++c) {
+      UndoOrientation(undo_orientation, color->Plane(c), transformed.Plane(c),
+                      pool);
+    }
+    transformed.Swap(temp_color);
+    color = &temp_color;
+    if (ib.HasAlpha()) {
+      UndoOrientation(undo_orientation, *alpha, temp_alpha, pool);
+      alpha = &temp_alpha;
+    }
+
+    xsize = color->xsize();
+    ysize = color->ysize();
+  }
+
+  if (stride < bytes_per_pixel * xsize) {
+    return JXL_FAILURE(
+        "stride is smaller than scanline width in bytes: %zu vs %zu", stride,
+        bytes_per_pixel * xsize);
+  }
+
+  const bool little_endian =
+      endianness == JXL_LITTLE_ENDIAN ||
+      (endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
+
+  ImageF ones;
+  if (want_alpha && !ib.HasAlpha()) {
+    ones = ImageF(xsize, 1);
+    FillImage(1.0f, &ones);
+  }
+
+  if (float_out) {
+    if (bits_per_sample == 16) {
+      bool swap_endianness = little_endian != IsLittleEndian();
+      Plane<hwy::float16_t> f16_cache;
+      RunOnPool(
+          pool, 0, static_cast<uint32_t>(ysize),
+          [&](size_t num_threads) {
+            f16_cache =
+                Plane<hwy::float16_t>(xsize, num_channels * num_threads);
+            InitOutCallback(num_threads);
+            return true;
+          },
+          [&](const int task, int thread) {
+            const int64_t y = task;
+            const float* JXL_RESTRICT row_in[4];
+            size_t c = 0;
+            for (; c < color_channels; c++) {
+              row_in[c] = color->PlaneRow(c, y);
+            }
+            if (want_alpha) {
+              row_in[c++] = ib.HasAlpha() ? alpha->Row(y) : ones.Row(0);
+            }
+            JXL_ASSERT(c == num_channels);
+            hwy::float16_t* JXL_RESTRICT row_f16[4];
+            for (size_t r = 0; r < c; r++) {
+              row_f16[r] = f16_cache.Row(r + thread * num_channels);
+              HWY_DYNAMIC_DISPATCH(FloatToF16)
+              (row_in[r], row_f16[r], xsize);
+            }
+            uint8_t* row_out =
+                out_callback
+                    ? row_out_callback[thread].data()
+                    : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
+            // interleave the one scanline
+            hwy::float16_t* row_f16_out =
+                reinterpret_cast<hwy::float16_t*>(row_out);
+            for (size_t x = 0; x < xsize; x++) {
+              for (size_t r = 0; r < c; r++) {
+                row_f16_out[x * num_channels + r] = row_f16[r][x];
+              }
+            }
+            if (swap_endianness) {
+              size_t size = xsize * num_channels * 2;
+              for (size_t i = 0; i < size; i += 2) {
+                std::swap(row_out[i + 0], row_out[i + 1]);
+              }
+            }
+            if (out_callback) {
+              (*out_callback)(out_opaque, 0, y, xsize, row_out);
+            }
+          },
+          "ConvertF16");
+    } else if (bits_per_sample == 32) {
+      RunOnPool(
+          pool, 0, static_cast<uint32_t>(ysize),
+          [&](size_t num_threads) {
+            InitOutCallback(num_threads);
+            return true;
+          },
+          [&](const int task, int thread) {
+            const int64_t y = task;
+            uint8_t* row_out =
+                out_callback
+                    ? row_out_callback[thread].data()
+                    : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
+            const float* JXL_RESTRICT row_in[4];
+            size_t c = 0;
+            for (; c < color_channels; c++) {
+              row_in[c] = color->PlaneRow(c, y);
+            }
+            if (want_alpha) {
+              row_in[c++] = ib.HasAlpha() ? alpha->Row(y) : ones.Row(0);
+            }
+            JXL_ASSERT(c == num_channels);
+            if (little_endian) {
+              StoreFloatRow<StoreLEFloat>(row_in, c, xsize, row_out);
+            } else {
+              StoreFloatRow<StoreBEFloat>(row_in, c, xsize, row_out);
+            }
+            if (out_callback) {
+              (*out_callback)(out_opaque, 0, y, xsize, row_out);
+            }
+          },
+          "ConvertFloat");
+    } else {
+      return JXL_FAILURE("float other than 16-bit and 32-bit not supported");
+    }
+  } else {
+    // Multiplier to convert from floating point 0-1 range to the integer
+    // range.
+    float mul = (1ull << bits_per_sample) - 1;
+    Plane<uint32_t> u32_cache;
+    RunOnPool(
+        pool, 0, static_cast<uint32_t>(ysize),
+        [&](size_t num_threads) {
+          u32_cache = Plane<uint32_t>(xsize, num_channels * num_threads);
+          InitOutCallback(num_threads);
+          return true;
+        },
+        [&](const int task, int thread) {
+          const int64_t y = task;
+          uint8_t* row_out =
+              out_callback
+                  ? row_out_callback[thread].data()
+                  : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
+          const float* JXL_RESTRICT row_in[4];
+          size_t c = 0;
+          for (; c < color_channels; c++) {
+            row_in[c] = color->PlaneRow(c, y);
+          }
+          if (want_alpha) {
+            row_in[c++] = ib.HasAlpha() ? alpha->Row(y) : ones.Row(0);
+          }
+          JXL_ASSERT(c == num_channels);
+          uint32_t* JXL_RESTRICT row_u32[4];
+          for (size_t r = 0; r < c; r++) {
+            row_u32[r] = u32_cache.Row(r + thread * num_channels);
+            // row_u32[] is a per-thread temporary row storage, this isn't
+            // intended to be initialized on a previous run.
+            msan::PoisonMemory(row_u32[r], xsize * sizeof(row_u32[r][0]));
+            HWY_DYNAMIC_DISPATCH(FloatToU32)
+            (row_in[r], row_u32[r], xsize, mul, bits_per_sample);
+          }
+          // TODO(deymo): add bits_per_sample == 1 case here.
+          if (bits_per_sample <= 8) {
+            StoreUintRow<Store8>(row_u32, c, xsize, 1, row_out);
+          } else if (bits_per_sample <= 16) {
+            if (little_endian) {
+              StoreUintRow<StoreLE16>(row_u32, c, xsize, 2, row_out);
+            } else {
+              StoreUintRow<StoreBE16>(row_u32, c, xsize, 2, row_out);
+            }
+          } else if (bits_per_sample <= 24) {
+            if (little_endian) {
+              StoreUintRow<StoreLE24>(row_u32, c, xsize, 3, row_out);
+            } else {
+              StoreUintRow<StoreBE24>(row_u32, c, xsize, 3, row_out);
+            }
+          } else {
+            if (little_endian) {
+              StoreUintRow<StoreLE32>(row_u32, c, xsize, 4, row_out);
+            } else {
+              StoreUintRow<StoreBE32>(row_u32, c, xsize, 4, row_out);
+            }
+          }
+          if (out_callback) {
+            (*out_callback)(out_opaque, 0, y, xsize, row_out);
+          }
+        },
+        "ConvertUint");
+  }
+
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.h
new file mode 100644
index 0000000000..aed8764411
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_EXTERNAL_IMAGE_H_
+#define LIB_JXL_DEC_EXTERNAL_IMAGE_H_
+
+// Interleaved image for color transforms and Codec.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "jxl/decode.h"
+#include "jxl/types.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Converts ib to interleaved void* pixel buffer with the given format.
+// bits_per_sample: must be 8, 16 or 32, and must be 32 if float_out
+// is true. 1 and 32 int are not yet implemented.
+// num_channels: must be 1, 2, 3 or 4 for gray, gray+alpha, RGB, RGB+alpha.
+// This supports the features needed for the C API and does not perform
+// color space conversion.
+// TODO(lode): support 1-bit output (bits_per_sample == 1)
+// TODO(lode): support rectangle crop.
+// stride_out is output scanline size in bytes, must be >=
+// output_xsize * output_bytes_per_pixel.
+// undo_orientation is an EXIF orientation to undo. Depending on the
+// orientation, the output xsize and ysize are swapped compared to input
+// xsize and ysize.
+Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample,
+                         bool float_out, size_t num_channels,
+                         JxlEndianness endianness, size_t stride_out,
+                         jxl::ThreadPool* thread_pool, void* out_image,
+                         size_t out_size, JxlImageOutCallback out_callback,
+                         void* out_opaque, jxl::Orientation undo_orientation);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_EXTERNAL_IMAGE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image_gbench.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image_gbench.cc
new file mode 100644
index 0000000000..283a97529a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_external_image_gbench.cc
@@ -0,0 +1,56 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+namespace {
+
+// Decoder case, interleaves an internal float image.
+void BM_DecExternalImage_ConvertImageRGBA(benchmark::State& state) {
+  const size_t kNumIter = 5;
+  size_t xsize = state.range();
+  size_t ysize = state.range();
+  size_t num_channels = 4;
+
+  ImageMetadata im;
+  im.SetAlphaBits(8);
+  ImageBundle ib(&im);
+  Image3F color(xsize, ysize);
+  ZeroFillImage(&color);
+  ib.SetFromImage(std::move(color), ColorEncoding::SRGB());
+  ImageF alpha(xsize, ysize);
+  ZeroFillImage(&alpha);
+  ib.SetAlpha(std::move(alpha), /*alpha_is_premultiplied=*/false);
+
+  const size_t bytes_per_row = xsize * num_channels;
+  std::vector<uint8_t> interleaved(bytes_per_row * ysize);
+
+  for (auto _ : state) {
+    for (size_t i = 0; i < kNumIter; ++i) {
+      JXL_CHECK(ConvertToExternal(
+          ib,
+          /*bits_per_sample=*/8,
+          /*float_out=*/false, num_channels, JXL_NATIVE_ENDIAN,
+          /*stride*/ bytes_per_row,
+          /*thread_pool=*/nullptr, interleaved.data(), interleaved.size(),
+          /*out_callback=*/nullptr, /*out_opaque=*/nullptr,
+          /*undo_orientation=*/jxl::Orientation::kIdentity));
+    }
+  }
+
+  // Pixels per second.
+  state.SetItemsProcessed(kNumIter * state.iterations() * xsize * ysize);
+  state.SetBytesProcessed(kNumIter * state.iterations() * interleaved.size());
+}
+
+BENCHMARK(BM_DecExternalImage_ConvertImageRGBA)
+    ->RangeMultiplier(2)
+    ->Range(256, 2048);
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc
new file mode 100644
index 0000000000..2ee1f66ffd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.cc
@@ -0,0 +1,186 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_file.h"
+
+#include <stddef.h>
+
+#include <utility>
+#include <vector>
+
+#include "jxl/decode.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+
+namespace jxl {
+namespace {
+
+Status DecodeHeaders(BitReader* reader, CodecInOut* io) {
+  JXL_RETURN_IF_ERROR(ReadSizeHeader(reader, &io->metadata.size));
+
+  JXL_RETURN_IF_ERROR(ReadImageMetadata(reader, &io->metadata.m));
+
+  io->metadata.transform_data.nonserialized_xyb_encoded =
+      io->metadata.m.xyb_encoded;
+  JXL_RETURN_IF_ERROR(Bundle::Read(reader, &io->metadata.transform_data));
+
+  return true;
+}
+
+}  // namespace
+
+Status DecodePreview(const DecompressParams& dparams,
+                     const CodecMetadata& metadata,
+                     BitReader* JXL_RESTRICT reader, ThreadPool* pool,
+                     ImageBundle* JXL_RESTRICT preview, uint64_t* dec_pixels,
+                     const SizeConstraints* constraints) {
+  // No preview present in file.
+  if (!metadata.m.have_preview) {
+    if (dparams.preview == Override::kOn) {
+      return JXL_FAILURE("preview == kOn but no preview present");
+    }
+    return true;
+  }
+
+  // Have preview; prepare to skip or read it.
+  JXL_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+  if (dparams.preview == Override::kOff) {
+    JXL_RETURN_IF_ERROR(SkipFrame(metadata, reader, /*is_preview=*/true));
+    return true;
+  }
+
+  // Else: default or kOn => decode preview.
+  PassesDecoderState dec_state;
+  JXL_RETURN_IF_ERROR(dec_state.output_encoding_info.Set(
+      metadata, ColorEncoding::LinearSRGB(metadata.m.color_encoding.IsGray())));
+  JXL_RETURN_IF_ERROR(DecodeFrame(dparams, &dec_state, pool, reader, preview,
+                                  metadata, constraints,
+                                  /*is_preview=*/true));
+  if (dec_pixels) {
+    *dec_pixels += dec_state.shared->frame_dim.xsize_upsampled *
+                   dec_state.shared->frame_dim.ysize_upsampled;
+  }
+  return true;
+}
+
+// To avoid the complexity of file I/O and buffering, we assume the bitstream
+// is loaded (or for large images/sequences: mapped into) memory.
+Status DecodeFile(const DecompressParams& dparams,
+                  const Span<const uint8_t> file, CodecInOut* JXL_RESTRICT io,
+                  ThreadPool* pool) {
+  PROFILER_ZONE("DecodeFile uninstrumented");
+
+  // Marker
+  JxlSignature signature = JxlSignatureCheck(file.data(), file.size());
+  if (signature == JXL_SIG_NOT_ENOUGH_BYTES || signature == JXL_SIG_INVALID) {
+    return JXL_FAILURE("File does not start with known JPEG XL signature");
+  }
+
+  std::unique_ptr<jpeg::JPEGData> jpeg_data = nullptr;
+  if (dparams.keep_dct) {
+    if (io->Main().jpeg_data == nullptr) {
+      return JXL_FAILURE("Caller must set jpeg_data");
+    }
+    jpeg_data = std::move(io->Main().jpeg_data);
+  }
+
+  Status ret = true;
+  {
+    BitReader reader(file);
+    BitReaderScopedCloser reader_closer(&reader, &ret);
+    (void)reader.ReadFixedBits<16>();  // skip marker
+
+    {
+      JXL_RETURN_IF_ERROR(DecodeHeaders(&reader, io));
+      size_t xsize = io->metadata.xsize();
+      size_t ysize = io->metadata.ysize();
+      JXL_RETURN_IF_ERROR(VerifyDimensions(&io->constraints, xsize, ysize));
+    }
+
+    if (io->metadata.m.color_encoding.WantICC()) {
+      PaddedBytes icc;
+      JXL_RETURN_IF_ERROR(ReadICC(&reader, &icc));
+      JXL_RETURN_IF_ERROR(io->metadata.m.color_encoding.SetICC(std::move(icc)));
+    }
+    // Set ICC profile in jpeg_data.
+    if (jpeg_data) {
+      Status res = jpeg::SetJPEGDataFromICC(io->metadata.m.color_encoding.ICC(),
+                                            jpeg_data.get());
+      if (!res) {
+        return res;
+      }
+    }
+
+    JXL_RETURN_IF_ERROR(DecodePreview(dparams, io->metadata, &reader, pool,
+                                      &io->preview_frame, &io->dec_pixels,
+                                      &io->constraints));
+
+    // Only necessary if no ICC and no preview.
+    JXL_RETURN_IF_ERROR(reader.JumpToByteBoundary());
+    if (io->metadata.m.have_animation && dparams.keep_dct) {
+      return JXL_FAILURE("Cannot decode to JPEG an animation");
+    }
+
+    PassesDecoderState dec_state;
+    JXL_RETURN_IF_ERROR(dec_state.output_encoding_info.Set(
+        io->metadata,
+        ColorEncoding::LinearSRGB(io->metadata.m.color_encoding.IsGray())));
+
+    io->frames.clear();
+    Status dec_ok(false);
+    do {
+      io->frames.emplace_back(&io->metadata.m);
+      if (jpeg_data) {
+        io->frames.back().jpeg_data = std::move(jpeg_data);
+      }
+      // Skip frames that are not displayed.
+      do {
+        dec_ok =
+            DecodeFrame(dparams, &dec_state, pool, &reader, &io->frames.back(),
+                        io->metadata, &io->constraints);
+        if (!dparams.allow_partial_files) {
+          JXL_RETURN_IF_ERROR(dec_ok);
+        } else if (!dec_ok) {
+          io->frames.pop_back();
+          break;
+        }
+      } while (dec_state.shared->frame_header.frame_type !=
+                   FrameType::kRegularFrame &&
+               dec_state.shared->frame_header.frame_type !=
+                   FrameType::kSkipProgressive);
+      io->dec_pixels += io->frames.back().xsize() * io->frames.back().ysize();
+    } while (!dec_state.shared->frame_header.is_last && dec_ok);
+
+    if (io->frames.empty()) return JXL_FAILURE("Not enough data.");
+
+    if (dparams.check_decompressed_size && !dparams.allow_partial_files &&
+        dparams.max_downsampling == 1) {
+      if (reader.TotalBitsConsumed() != file.size() * kBitsPerByte) {
+        return JXL_FAILURE("DecodeFile reader position not at EOF.");
+      }
+    }
+    // Suppress errors when decoding partial files with DC frames.
+    if (!reader.AllReadsWithinBounds() && dparams.allow_partial_files) {
+      reader_closer.CloseAndSuppressError();
+    }
+
+    io->CheckMetadata();
+    // reader is closed here.
+  }
+  return ret;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.h
new file mode 100644
index 0000000000..cd04d5d4c7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_file.h
@@ -0,0 +1,48 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_FILE_H_
+#define LIB_JXL_DEC_FILE_H_
+
+// Top-level interface for JXL decoding.
+
+#include <stdint.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/dec_params.h"
+
+namespace jxl {
+
+// Decodes the preview image, if present, and stores it in `preview`.
+// Must be the first frame in the file. Does nothing if there is no preview
+// frame present according to the metadata.
+Status DecodePreview(const DecompressParams& dparams,
+                     const CodecMetadata& metadata,
+                     BitReader* JXL_RESTRICT reader, ThreadPool* pool,
+                     ImageBundle* JXL_RESTRICT preview, uint64_t* dec_pixels,
+                     const SizeConstraints* constraints);
+
+// Implementation detail: currently decodes to linear sRGB. The contract is:
+// `io` appears 'identical' (modulo compression artifacts) to the encoder input
+// in a color-aware viewer. Note that `io->metadata.m.color_encoding`
+// identifies the color space that was passed to the encoder; clients that want
+// that same encoding must call `io->TransformTo` afterwards.
+Status DecodeFile(const DecompressParams& params,
+                  const Span<const uint8_t> file, CodecInOut* io,
+                  ThreadPool* pool = nullptr);
+
+static inline Status DecodeFile(const DecompressParams& params,
+                                const PaddedBytes& file, CodecInOut* io,
+                                ThreadPool* pool = nullptr) {
+  return DecodeFile(params, Span<const uint8_t>(file), io, pool);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_FILE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc
new file mode 100644
index 0000000000..39da487dd4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.cc
@@ -0,0 +1,1010 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <atomic>
+#include <hwy/aligned_allocator.h>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_group.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/dec_reconstruct.h"
+#include "lib/jxl/dec_upsample.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/filters.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/luminance.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+namespace {
+Status DecodeGlobalDCInfo(BitReader* reader, bool is_jpeg,
+                          PassesDecoderState* state, ThreadPool* pool) {
+  PROFILER_FUNC;
+  JXL_RETURN_IF_ERROR(state->shared_storage.quantizer.Decode(reader));
+
+  JXL_RETURN_IF_ERROR(
+      DecodeBlockCtxMap(reader, &state->shared_storage.block_ctx_map));
+
+  JXL_RETURN_IF_ERROR(state->shared_storage.cmap.DecodeDC(reader));
+
+  // Pre-compute info for decoding a group.
+  if (is_jpeg) {
+    state->shared_storage.quantizer.ClearDCMul();  // Don't dequant DC
+  }
+
+  state->shared_storage.ac_strategy.FillInvalid();
+  return true;
+}
+}  // namespace
+
+Status DecodeFrameHeader(BitReader* JXL_RESTRICT reader,
+                         FrameHeader* JXL_RESTRICT frame_header) {
+  JXL_ASSERT(frame_header->nonserialized_metadata != nullptr);
+  JXL_RETURN_IF_ERROR(ReadFrameHeader(reader, frame_header));
+  return true;
+}
+
+Status SkipFrame(const CodecMetadata& metadata, BitReader* JXL_RESTRICT reader,
+                 bool is_preview) {
+  FrameHeader header(&metadata);
+  header.nonserialized_is_preview = is_preview;
+  JXL_RETURN_IF_ERROR(DecodeFrameHeader(reader, &header));
+
+  // Read TOC.
+  std::vector<uint64_t> group_offsets;
+  std::vector<uint32_t> group_sizes;
+  uint64_t groups_total_size;
+  const bool has_ac_global = true;
+  const FrameDimensions frame_dim = header.ToFrameDimensions();
+  const size_t toc_entries =
+      NumTocEntries(frame_dim.num_groups, frame_dim.num_dc_groups,
+                    header.passes.num_passes, has_ac_global);
+  JXL_RETURN_IF_ERROR(ReadGroupOffsets(toc_entries, reader, &group_offsets,
+                                       &group_sizes, &groups_total_size));
+
+  // Pretend all groups are read.
+  reader->SkipBits(groups_total_size * kBitsPerByte);
+  if (reader->TotalBitsConsumed() > reader->TotalBytes() * kBitsPerByte) {
+    return JXL_FAILURE("Group code extends after stream end");
+  }
+
+  return true;
+}
+
+static BitReader* GetReaderForSection(
+    size_t num_groups, size_t num_passes, size_t group_codes_begin,
+    const std::vector<uint64_t>& group_offsets,
+    const std::vector<uint32_t>& group_sizes, BitReader* JXL_RESTRICT reader,
+    BitReader* JXL_RESTRICT store, size_t index) {
+  if (num_groups == 1 && num_passes == 1) return reader;
+  const size_t group_offset = group_codes_begin + group_offsets[index];
+  const size_t next_group_offset =
+      group_codes_begin + group_offsets[index] + group_sizes[index];
+  // The order of these variables must be:
+  // group_codes_begin <= group_offset <= next_group_offset <= file.size()
+  JXL_DASSERT(group_codes_begin <= group_offset);
+  JXL_DASSERT(group_offset <= next_group_offset);
+  JXL_DASSERT(next_group_offset <= reader->TotalBytes());
+  const size_t group_size = next_group_offset - group_offset;
+  const size_t remaining_size = reader->TotalBytes() - group_offset;
+  const size_t size = std::min(group_size + 8, remaining_size);
+  *store =
+      BitReader(Span<const uint8_t>(reader->FirstByte() + group_offset, size));
+  return store;
+}
+
+Status DecodeFrame(const DecompressParams& dparams,
+                   PassesDecoderState* dec_state, ThreadPool* JXL_RESTRICT pool,
+                   BitReader* JXL_RESTRICT reader, ImageBundle* decoded,
+                   const CodecMetadata& metadata,
+                   const SizeConstraints* constraints, bool is_preview) {
+  PROFILER_ZONE("DecodeFrame uninstrumented");
+
+  FrameDecoder frame_decoder(dec_state, metadata, pool);
+
+  frame_decoder.SetFrameSizeLimits(constraints);
+
+  JXL_RETURN_IF_ERROR(frame_decoder.InitFrame(
+      reader, decoded, is_preview, dparams.allow_partial_files,
+      dparams.allow_partial_files && dparams.allow_more_progressive_steps));
+
+  // Handling of progressive decoding.
+  {
+    const FrameHeader& frame_header = frame_decoder.GetFrameHeader();
+    size_t max_passes = dparams.max_passes;
+    size_t max_downsampling = std::max(
+        dparams.max_downsampling >> (frame_header.dc_level * 3), size_t(1));
+    // TODO(veluca): deal with downsamplings >= 8.
+    if (max_downsampling >= 8) {
+      max_passes = 0;
+    } else {
+      for (uint32_t i = 0; i < frame_header.passes.num_downsample; ++i) {
+        if (max_downsampling >= frame_header.passes.downsample[i] &&
+            max_passes > frame_header.passes.last_pass[i]) {
+          max_passes = frame_header.passes.last_pass[i] + 1;
+        }
+      }
+    }
+    // Do not use downsampling for kReferenceOnly frames.
+    if (frame_header.frame_type == FrameType::kReferenceOnly) {
+      max_passes = frame_header.passes.num_passes;
+    }
+    max_passes = std::min<size_t>(max_passes, frame_header.passes.num_passes);
+    frame_decoder.SetMaxPasses(max_passes);
+  }
+  frame_decoder.SetRenderSpotcolors(dparams.render_spotcolors);
+
+  size_t processed_bytes = reader->TotalBitsConsumed() / kBitsPerByte;
+
+  Status close_ok = true;
+  std::vector<std::unique_ptr<BitReader>> section_readers;
+  {
+    std::vector<std::unique_ptr<BitReaderScopedCloser>> section_closers;
+    std::vector<FrameDecoder::SectionInfo> section_info;
+    std::vector<FrameDecoder::SectionStatus> section_status;
+    size_t bytes_to_skip = 0;
+    for (size_t i = 0; i < frame_decoder.NumSections(); i++) {
+      size_t b = frame_decoder.SectionOffsets()[i];
+      size_t e = b + frame_decoder.SectionSizes()[i];
+      bytes_to_skip += e - b;
+      size_t pos = reader->TotalBitsConsumed() / kBitsPerByte;
+      if (pos + e <= reader->TotalBytes()) {
+        auto br = make_unique<BitReader>(
+            Span<const uint8_t>(reader->FirstByte() + b + pos, e - b));
+        section_info.emplace_back(FrameDecoder::SectionInfo{br.get(), i});
+        section_closers.emplace_back(
+            make_unique<BitReaderScopedCloser>(br.get(), &close_ok));
+        section_readers.emplace_back(std::move(br));
+      } else if (!dparams.allow_partial_files) {
+        return JXL_FAILURE("Premature end of stream.");
+      }
+    }
+    // Skip over the to-be-decoded sections.
+    reader->SkipBits(kBitsPerByte * bytes_to_skip);
+    section_status.resize(section_info.size());
+
+    JXL_RETURN_IF_ERROR(frame_decoder.ProcessSections(
+        section_info.data(), section_info.size(), section_status.data()));
+
+    for (size_t i = 0; i < section_status.size(); i++) {
+      auto s = section_status[i];
+      if (s == FrameDecoder::kDone) {
+        processed_bytes += frame_decoder.SectionSizes()[i];
+        continue;
+      }
+      if (dparams.allow_more_progressive_steps && s == FrameDecoder::kPartial) {
+        continue;
+      }
+      if (dparams.max_downsampling > 1 && s == FrameDecoder::kSkipped) {
+        continue;
+      }
+      return JXL_FAILURE("Invalid section %zu status: %d", section_info[i].id,
+                         s);
+    }
+  }
+
+  JXL_RETURN_IF_ERROR(close_ok);
+
+  JXL_RETURN_IF_ERROR(frame_decoder.FinalizeFrame());
+  decoded->SetDecodedBytes(processed_bytes);
+  return true;
+}
+
+Status FrameDecoder::InitFrame(BitReader* JXL_RESTRICT br, ImageBundle* decoded,
+                               bool is_preview, bool allow_partial_frames,
+                               bool allow_partial_dc_global) {
+  PROFILER_FUNC;
+  decoded_ = decoded;
+  JXL_ASSERT(is_finalized_);
+
+  allow_partial_frames_ = allow_partial_frames;
+  allow_partial_dc_global_ = allow_partial_dc_global;
+
+  // Reset the dequantization matrices to their default values.
+  dec_state_->shared_storage.matrices = DequantMatrices();
+
+  frame_header_.nonserialized_is_preview = is_preview;
+  JXL_RETURN_IF_ERROR(DecodeFrameHeader(br, &frame_header_));
+  frame_dim_ = frame_header_.ToFrameDimensions();
+
+  const size_t num_passes = frame_header_.passes.num_passes;
+  const size_t xsize = frame_dim_.xsize;
+  const size_t ysize = frame_dim_.ysize;
+  const size_t num_groups = frame_dim_.num_groups;
+
+  // Check validity of frame dimensions.
+  JXL_RETURN_IF_ERROR(VerifyDimensions(constraints_, xsize, ysize));
+
+  // If the previous frame was not a kRegularFrame, `decoded` may have different
+  // dimensions; must reset to avoid errors.
+  decoded->RemoveColor();
+  decoded->ClearExtraChannels();
+
+  // Read TOC.
+  uint64_t groups_total_size;
+  const bool has_ac_global = true;
+  const size_t toc_entries = NumTocEntries(num_groups, frame_dim_.num_dc_groups,
+                                           num_passes, has_ac_global);
+  JXL_RETURN_IF_ERROR(ReadGroupOffsets(toc_entries, br, &section_offsets_,
+                                       &section_sizes_, &groups_total_size));
+
+  JXL_DASSERT((br->TotalBitsConsumed() % kBitsPerByte) == 0);
+  const size_t group_codes_begin = br->TotalBitsConsumed() / kBitsPerByte;
+  JXL_DASSERT(!section_offsets_.empty());
+
+  // Overflow check.
+  if (group_codes_begin + groups_total_size < group_codes_begin) {
+    return JXL_FAILURE("Invalid group codes");
+  }
+
+  if (!frame_header_.chroma_subsampling.Is444() &&
+      !(frame_header_.flags & FrameHeader::kSkipAdaptiveDCSmoothing) &&
+      frame_header_.encoding == FrameEncoding::kVarDCT) {
+    return JXL_FAILURE(
+        "Non-444 chroma subsampling is not allowed when adaptive DC "
+        "smoothing is enabled");
+  }
+  JXL_RETURN_IF_ERROR(
+      InitializePassesSharedState(frame_header_, &dec_state_->shared_storage));
+  JXL_RETURN_IF_ERROR(dec_state_->Init());
+  modular_frame_decoder_.Init(frame_dim_);
+
+  if (decoded->IsJPEG()) {
+    if (frame_header_.encoding == FrameEncoding::kModular) {
+      return JXL_FAILURE("Cannot output JPEG from Modular");
+    }
+    jpeg::JPEGData* jpeg_data = decoded->jpeg_data.get();
+    size_t num_components = jpeg_data->components.size();
+    if (num_components != 1 && num_components != 3) {
+      return JXL_FAILURE("Invalid number of components");
+    }
+    if (frame_header_.nonserialized_metadata->m.xyb_encoded) {
+      return JXL_FAILURE("Cannot decode to JPEG an XYB image");
+    }
+    auto jpeg_c_map = JpegOrder(ColorTransform::kYCbCr, num_components == 1);
+    decoded->jpeg_data->width = frame_dim_.xsize;
+    decoded->jpeg_data->height = frame_dim_.ysize;
+    for (size_t c = 0; c < num_components; c++) {
+      auto& component = jpeg_data->components[jpeg_c_map[c]];
+      component.width_in_blocks =
+          frame_dim_.xsize_blocks >> frame_header_.chroma_subsampling.HShift(c);
+      component.height_in_blocks =
+          frame_dim_.ysize_blocks >> frame_header_.chroma_subsampling.VShift(c);
+      component.h_samp_factor =
+          1 << frame_header_.chroma_subsampling.RawHShift(c);
+      component.v_samp_factor =
+          1 << frame_header_.chroma_subsampling.RawVShift(c);
+      component.coeffs.resize(component.width_in_blocks *
+                              component.height_in_blocks * jxl::kDCTBlockSize);
+    }
+  }
+
+  // Clear the state.
+  decoded_dc_global_ = false;
+  decoded_ac_global_ = false;
+  is_finalized_ = false;
+  finalized_dc_ = false;
+  decoded_dc_groups_.clear();
+  decoded_dc_groups_.resize(frame_dim_.num_dc_groups);
+  decoded_passes_per_ac_group_.clear();
+  decoded_passes_per_ac_group_.resize(frame_dim_.num_groups, 0);
+  processed_section_.clear();
+  processed_section_.resize(section_offsets_.size());
+  max_passes_ = frame_header_.passes.num_passes;
+  num_renders_ = 0;
+
+  return true;
+}
+
+Status FrameDecoder::ProcessDCGlobal(BitReader* br) {
+  PROFILER_FUNC;
+  PassesSharedState& shared = dec_state_->shared_storage;
+  if (shared.frame_header.flags & FrameHeader::kPatches) {
+    bool uses_extra_channels = false;
+    JXL_RETURN_IF_ERROR(shared.image_features.patches.Decode(
+        br, frame_dim_.xsize_padded, frame_dim_.ysize_padded,
+        &uses_extra_channels));
+    if (uses_extra_channels && frame_header_.upsampling != 1) {
+      for (size_t ecups : frame_header_.extra_channel_upsampling) {
+        if (ecups != frame_header_.upsampling) {
+          return JXL_FAILURE(
+              "Cannot use extra channels in patches if color channels are "
+              "subsampled differently from extra channels");
+        }
+      }
+    }
+  } else {
+    shared.image_features.patches.Clear();
+  }
+  if (shared.frame_header.flags & FrameHeader::kSplines) {
+    JXL_RETURN_IF_ERROR(shared.image_features.splines.Decode(
+        br, frame_dim_.xsize * frame_dim_.ysize));
+  }
+  if (shared.frame_header.flags & FrameHeader::kNoise) {
+    JXL_RETURN_IF_ERROR(DecodeNoise(br, &shared.image_features.noise_params));
+  }
+
+  JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.DecodeDC(br));
+  if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(
+        jxl::DecodeGlobalDCInfo(br, decoded_->IsJPEG(), dec_state_, pool_));
+  }
+  Status dec_status = modular_frame_decoder_.DecodeGlobalInfo(
+      br, frame_header_, allow_partial_dc_global_);
+  if (dec_status.IsFatalError()) return dec_status;
+  if (dec_status) {
+    decoded_dc_global_ = true;
+  }
+  return dec_status;
+}
+
+Status FrameDecoder::ProcessDCGroup(size_t dc_group_id, BitReader* br) {
+  PROFILER_FUNC;
+  const size_t gx = dc_group_id % frame_dim_.xsize_dc_groups;
+  const size_t gy = dc_group_id / frame_dim_.xsize_dc_groups;
+  const LoopFilter& lf = dec_state_->shared->frame_header.loop_filter;
+  if (frame_header_.encoding == FrameEncoding::kVarDCT &&
+      !(frame_header_.flags & FrameHeader::kUseDcFrame)) {
+    JXL_RETURN_IF_ERROR(
+        modular_frame_decoder_.DecodeVarDCTDC(dc_group_id, br, dec_state_));
+  }
+  const Rect mrect(gx * frame_dim_.dc_group_dim, gy * frame_dim_.dc_group_dim,
+                   frame_dim_.dc_group_dim, frame_dim_.dc_group_dim);
+  JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup(
+      mrect, br, 3, 1000, ModularStreamId::ModularDC(dc_group_id),
+      /*zerofill=*/false));
+  if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(
+        modular_frame_decoder_.DecodeAcMetadata(dc_group_id, br, dec_state_));
+  } else if (lf.epf_iters > 0) {
+    FillImage(kInvSigmaNum / lf.epf_sigma_for_modular,
+              &dec_state_->filter_weights.sigma);
+  }
+  decoded_dc_groups_[dc_group_id] = true;
+  return true;
+}
+
+void FrameDecoder::FinalizeDC() {
+  // Do Adaptive DC smoothing if enabled. This *must* happen between all the
+  // ProcessDCGroup and ProcessACGroup.
+  if (frame_header_.encoding == FrameEncoding::kVarDCT &&
+      !(frame_header_.flags & FrameHeader::kSkipAdaptiveDCSmoothing) &&
+      !(frame_header_.flags & FrameHeader::kUseDcFrame)) {
+    AdaptiveDCSmoothing(dec_state_->shared->quantizer.MulDC(),
+                        &dec_state_->shared_storage.dc_storage, pool_);
+  }
+
+  finalized_dc_ = true;
+}
+
+void FrameDecoder::AllocateOutput() {
+  const CodecMetadata& metadata = *frame_header_.nonserialized_metadata;
+  if (dec_state_->rgb_output == nullptr && !dec_state_->pixel_callback) {
+    decoded_->SetFromImage(Image3F(frame_dim_.xsize_upsampled_padded,
+                                   frame_dim_.ysize_upsampled_padded),
+                           dec_state_->output_encoding_info.color_encoding);
+  }
+  dec_state_->extra_channels.clear();
+  if (metadata.m.num_extra_channels > 0) {
+    for (size_t i = 0; i < metadata.m.num_extra_channels; i++) {
+      uint32_t ecups = frame_header_.extra_channel_upsampling[i];
+      dec_state_->extra_channels.emplace_back(
+          DivCeil(frame_dim_.xsize_upsampled_padded, ecups),
+          DivCeil(frame_dim_.ysize_upsampled_padded, ecups));
+#if MEMORY_SANITIZER
+      // Avoid errors due to loading vectors on the outermost padding.
+      for (size_t y = 0; y < DivCeil(frame_dim_.ysize_upsampled_padded, ecups);
+           y++) {
+        for (size_t x = DivCeil(frame_dim_.xsize_upsampled, ecups);
+             x < DivCeil(frame_dim_.xsize_upsampled_padded, ecups); x++) {
+          dec_state_->extra_channels.back().Row(y)[x] =
+              msan::kSanitizerSentinel;
+        }
+      }
+#endif
+    }
+  }
+  decoded_->origin = dec_state_->shared->frame_header.frame_origin;
+}
+
+Status FrameDecoder::ProcessACGlobal(BitReader* br) {
+  JXL_CHECK(finalized_dc_);
+  JXL_CHECK(decoded_->HasColor() || dec_state_->rgb_output != nullptr ||
+            !!dec_state_->pixel_callback);
+
+  // Decode AC group.
+  if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.Decode(
+        br, &modular_frame_decoder_));
+
+    size_t num_histo_bits =
+        CeilLog2Nonzero(dec_state_->shared->frame_dim.num_groups);
+    dec_state_->shared_storage.num_histograms =
+        1 + br->ReadBits(num_histo_bits);
+
+    dec_state_->code.resize(kMaxNumPasses);
+    dec_state_->context_map.resize(kMaxNumPasses);
+    // Read coefficient orders and histograms.
+    size_t max_num_bits_ac = 0;
+    for (size_t i = 0;
+         i < dec_state_->shared_storage.frame_header.passes.num_passes; i++) {
+      uint16_t used_orders = U32Coder::Read(kOrderEnc, br);
+      JXL_RETURN_IF_ERROR(DecodeCoeffOrders(
+          used_orders, dec_state_->used_acs,
+          &dec_state_->shared_storage
+               .coeff_orders[i * dec_state_->shared_storage.coeff_order_size],
+          br));
+      size_t num_contexts =
+          dec_state_->shared->num_histograms *
+          dec_state_->shared_storage.block_ctx_map.NumACContexts();
+      JXL_RETURN_IF_ERROR(DecodeHistograms(
+          br, num_contexts, &dec_state_->code[i], &dec_state_->context_map[i]));
+      // Add extra values to enable the cheat in hot loop of DecodeACVarBlock.
+      dec_state_->context_map[i].resize(
+          num_contexts + kZeroDensityContextLimit - kZeroDensityContextCount);
+      max_num_bits_ac =
+          std::max(max_num_bits_ac, dec_state_->code[i].max_num_bits);
+    }
+    max_num_bits_ac += CeilLog2Nonzero(
+        dec_state_->shared_storage.frame_header.passes.num_passes);
+    // 16-bit buffer for decoding to JPEG are not implemented.
+    // TODO(veluca): figure out the exact limit - 16 should still work with
+    // 16-bit buffers, but we are excluding it for safety.
+    bool use_16_bit = max_num_bits_ac < 16 && !decoded_->IsJPEG();
+    bool store = frame_header_.passes.num_passes > 1;
+    size_t xs = store ? kGroupDim * kGroupDim : 0;
+    size_t ys = store ? frame_dim_.num_groups : 0;
+    if (use_16_bit) {
+      dec_state_->coefficients = make_unique<ACImageT<int16_t>>(xs, ys);
+    } else {
+      dec_state_->coefficients = make_unique<ACImageT<int32_t>>(xs, ys);
+    }
+    if (store) {
+      dec_state_->coefficients->ZeroFill();
+    }
+  }
+
+  // Set JPEG decoding data.
+  if (decoded_->IsJPEG()) {
+    decoded_->color_transform = frame_header_.color_transform;
+    decoded_->chroma_subsampling = frame_header_.chroma_subsampling;
+    const std::vector<QuantEncoding>& qe =
+        dec_state_->shared_storage.matrices.encodings();
+    if (qe.empty() || qe[0].mode != QuantEncoding::Mode::kQuantModeRAW ||
+        std::abs(qe[0].qraw.qtable_den - 1.f / (8 * 255)) > 1e-8f) {
+      return JXL_FAILURE(
+          "Quantization table is not a JPEG quantization table.");
+    }
+    jpeg::JPEGData* jpeg_data = decoded_->jpeg_data.get();
+    size_t num_components = jpeg_data->components.size();
+    bool is_gray = (num_components == 1);
+    auto jpeg_c_map = JpegOrder(frame_header_.color_transform, is_gray);
+    for (size_t c = 0; c < num_components; c++) {
+      // TODO(eustas): why 1-st quant table for gray?
+      size_t quant_c = is_gray ? 1 : c;
+      size_t qpos = jpeg_data->components[jpeg_c_map[c]].quant_idx;
+      JXL_CHECK(qpos != jpeg_data->quant.size());
+      for (size_t x = 0; x < 8; x++) {
+        for (size_t y = 0; y < 8; y++) {
+          jpeg_data->quant[qpos].values[x * 8 + y] =
+              (*qe[0].qraw.qtable)[quant_c * 64 + y * 8 + x];
+        }
+      }
+    }
+  }
+  // Set memory buffer for pre-color-transform frame, if needed.
+  if (frame_header_.needs_color_transform() &&
+      frame_header_.save_before_color_transform) {
+    dec_state_->pre_color_transform_frame =
+        Image3F(frame_dim_.xsize_upsampled, frame_dim_.ysize_upsampled);
+  } else {
+    // clear pre_color_transform_frame to ensure that previously moved-from
+    // images are not used.
+    dec_state_->pre_color_transform_frame = Image3F();
+  }
+  decoded_ac_global_ = true;
+  return true;
+}
+
+Status FrameDecoder::ProcessACGroup(size_t ac_group_id,
+                                    BitReader* JXL_RESTRICT* br,
+                                    size_t num_passes, size_t thread,
+                                    bool force_draw, bool dc_only) {
+  PROFILER_ZONE("process_group");
+  const size_t gx = ac_group_id % frame_dim_.xsize_groups;
+  const size_t gy = ac_group_id / frame_dim_.xsize_groups;
+  const size_t x = gx * frame_dim_.group_dim;
+  const size_t y = gy * frame_dim_.group_dim;
+
+  if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+    group_dec_caches_[thread].InitOnce(frame_header_.passes.num_passes,
+                                       dec_state_->used_acs);
+    JXL_RETURN_IF_ERROR(DecodeGroup(
+        br, num_passes, ac_group_id, dec_state_, &group_dec_caches_[thread],
+        thread, decoded_, decoded_passes_per_ac_group_[ac_group_id], force_draw,
+        dc_only));
+  }
+
+  // don't limit to image dimensions here (is done in DecodeGroup)
+  const Rect mrect(x, y, frame_dim_.group_dim, frame_dim_.group_dim);
+  for (size_t i = 0; i < frame_header_.passes.num_passes; i++) {
+    int minShift, maxShift;
+    frame_header_.passes.GetDownsamplingBracket(i, minShift, maxShift);
+    if (i >= decoded_passes_per_ac_group_[ac_group_id] &&
+        i < decoded_passes_per_ac_group_[ac_group_id] + num_passes) {
+      JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup(
+          mrect, br[i - decoded_passes_per_ac_group_[ac_group_id]], minShift,
+          maxShift, ModularStreamId::ModularAC(ac_group_id, i),
+          /*zerofill=*/false));
+    } else if (i >= decoded_passes_per_ac_group_[ac_group_id] + num_passes &&
+               force_draw) {
+      JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup(
+          mrect, nullptr, minShift, maxShift,
+          ModularStreamId::ModularAC(ac_group_id, i), /*zerofill=*/true));
+    }
+  }
+  decoded_passes_per_ac_group_[ac_group_id] += num_passes;
+  return true;
+}
+
+Status FrameDecoder::ProcessSections(const SectionInfo* sections, size_t num,
+                                     SectionStatus* section_status) {
+  if (num == 0) return true;  // Nothing to process
+  std::fill(section_status, section_status + num, SectionStatus::kSkipped);
+  size_t dc_global_sec = num;
+  size_t ac_global_sec = num;
+  std::vector<size_t> dc_group_sec(frame_dim_.num_dc_groups, num);
+  std::vector<std::vector<size_t>> ac_group_sec(
+      frame_dim_.num_groups,
+      std::vector<size_t>(frame_header_.passes.num_passes, num));
+  std::vector<size_t> num_ac_passes(frame_dim_.num_groups);
+  if (frame_dim_.num_groups == 1 && frame_header_.passes.num_passes == 1) {
+    JXL_ASSERT(num == 1);
+    JXL_ASSERT(sections[0].id == 0);
+    if (processed_section_[0] == false) {
+      processed_section_[0] = true;
+      ac_group_sec[0].resize(1);
+      dc_global_sec = ac_global_sec = dc_group_sec[0] = ac_group_sec[0][0] = 0;
+      num_ac_passes[0] = 1;
+    } else {
+      section_status[0] = SectionStatus::kDuplicate;
+    }
+  } else {
+    size_t ac_global_index = frame_dim_.num_dc_groups + 1;
+    for (size_t i = 0; i < num; i++) {
+      JXL_ASSERT(sections[i].id < processed_section_.size());
+      if (processed_section_[sections[i].id]) {
+        section_status[i] = SectionStatus::kDuplicate;
+        continue;
+      }
+      if (sections[i].id == 0) {
+        dc_global_sec = i;
+      } else if (sections[i].id < ac_global_index) {
+        dc_group_sec[sections[i].id - 1] = i;
+      } else if (sections[i].id == ac_global_index) {
+        ac_global_sec = i;
+      } else {
+        size_t ac_idx = sections[i].id - ac_global_index - 1;
+        size_t acg = ac_idx % frame_dim_.num_groups;
+        size_t acp = ac_idx / frame_dim_.num_groups;
+        if (acp >= frame_header_.passes.num_passes) {
+          return JXL_FAILURE("Invalid section ID");
+        }
+        if (acp >= max_passes_) {
+          continue;
+        }
+        ac_group_sec[acg][acp] = i;
+      }
+      processed_section_[sections[i].id] = true;
+    }
+    // Count number of new passes per group.
+    for (size_t g = 0; g < ac_group_sec.size(); g++) {
+      size_t j = 0;
+      for (; j + decoded_passes_per_ac_group_[g] < max_passes_; j++) {
+        if (ac_group_sec[g][j + decoded_passes_per_ac_group_[g]] == num) {
+          break;
+        }
+      }
+      num_ac_passes[g] = j;
+    }
+  }
+  if (dc_global_sec != num) {
+    Status dc_global_status = ProcessDCGlobal(sections[dc_global_sec].br);
+    if (dc_global_status.IsFatalError()) return dc_global_status;
+    if (dc_global_status) {
+      section_status[dc_global_sec] = SectionStatus::kDone;
+    } else {
+      section_status[dc_global_sec] = SectionStatus::kPartial;
+    }
+  }
+
+  std::atomic<bool> has_error{false};
+  if (decoded_dc_global_) {
+    RunOnPool(
+        pool_, 0, dc_group_sec.size(), ThreadPool::SkipInit(),
+        [this, &dc_group_sec, &num, &sections, &section_status, &has_error](
+            size_t i, size_t thread) {
+          if (dc_group_sec[i] != num) {
+            if (!ProcessDCGroup(i, sections[dc_group_sec[i]].br)) {
+              has_error = true;
+            } else {
+              section_status[dc_group_sec[i]] = SectionStatus::kDone;
+            }
+          }
+        },
+        "DecodeDCGroup");
+  }
+  if (has_error) return JXL_FAILURE("Error in DC group");
+
+  if (*std::min_element(decoded_dc_groups_.begin(), decoded_dc_groups_.end()) ==
+          true &&
+      !finalized_dc_) {
+    FinalizeDC();
+    AllocateOutput();
+  }
+
+  if (finalized_dc_) dec_state_->EnsureBordersStorage();
+  if (finalized_dc_ && ac_global_sec != num && !decoded_ac_global_) {
+    dec_state_->InitForAC(pool_);
+    JXL_RETURN_IF_ERROR(ProcessACGlobal(sections[ac_global_sec].br));
+    section_status[ac_global_sec] = SectionStatus::kDone;
+  }
+
+  if (decoded_ac_global_) {
+    // The decoded image requires padding for filtering. ProcessACGlobal added
+    // the padding, however when Flush is used, the image is shrunk to the
+    // output size. Add the padding back here. This is a cheap operation
+    // since the image has the original allocated size. The memory and original
+    // size are already there, but for safety we require the indicated xsize and
+    // ysize dimensions match the working area, see PlaneRowBoundsCheck.
+    decoded_->ShrinkTo(frame_dim_.xsize_upsampled_padded,
+                       frame_dim_.ysize_upsampled_padded);
+
+    // Mark all the AC groups that we received as not complete yet.
+    for (size_t i = 0; i < ac_group_sec.size(); i++) {
+      if (num_ac_passes[i] == 0) continue;
+      dec_state_->group_border_assigner.ClearDone(i);
+    }
+
+    RunOnPool(
+        pool_, 0, ac_group_sec.size(),
+        [this](size_t num_threads) {
+          PrepareStorage(num_threads, decoded_passes_per_ac_group_.size());
+          return true;
+        },
+        [this, &ac_group_sec, &num_ac_passes, &num, &sections, &section_status,
+         &has_error](size_t g, size_t thread) {
+          if (num_ac_passes[g] == 0) {  // no new AC pass, nothing to do.
+            return;
+          }
+          (void)num;
+          size_t first_pass = decoded_passes_per_ac_group_[g];
+          BitReader* JXL_RESTRICT readers[kMaxNumPasses];
+          for (size_t i = 0; i < num_ac_passes[g]; i++) {
+            JXL_ASSERT(ac_group_sec[g][first_pass + i] != num);
+            readers[i] = sections[ac_group_sec[g][first_pass + i]].br;
+          }
+          if (!ProcessACGroup(g, readers, num_ac_passes[g],
+                              GetStorageLocation(thread, g),
+                              /*force_draw=*/false, /*dc_only=*/false)) {
+            has_error = true;
+          } else {
+            for (size_t i = 0; i < num_ac_passes[g]; i++) {
+              section_status[ac_group_sec[g][first_pass + i]] =
+                  SectionStatus::kDone;
+            }
+          }
+        },
+        "DecodeGroup");
+  }
+  if (has_error) return JXL_FAILURE("Error in AC group");
+
+  for (size_t i = 0; i < num; i++) {
+    if (section_status[i] == SectionStatus::kSkipped ||
+        section_status[i] == SectionStatus::kPartial) {
+      processed_section_[sections[i].id] = false;
+    }
+  }
+  return true;
+}
+
+Status FrameDecoder::Flush() {
+  bool has_blending = frame_header_.blending_info.mode != BlendMode::kReplace ||
+                      frame_header_.custom_size_or_origin;
+  for (const auto& blending_info_ec :
+       frame_header_.extra_channel_blending_info) {
+    if (blending_info_ec.mode != BlendMode::kReplace) has_blending = true;
+  }
+  // No early Flush() if blending is enabled.
+  if (has_blending && !is_finalized_) {
+    return false;
+  }
+  // No early Flush() - nothing to do - if the frame is a kSkipProgressive
+  // frame.
+  if (frame_header_.frame_type == FrameType::kSkipProgressive &&
+      !is_finalized_) {
+    return true;
+  }
+  if (decoded_->IsJPEG()) {
+    // Nothing to do.
+    return true;
+  }
+  uint32_t completely_decoded_ac_pass = *std::min_element(
+      decoded_passes_per_ac_group_.begin(), decoded_passes_per_ac_group_.end());
+  if (completely_decoded_ac_pass < frame_header_.passes.num_passes) {
+    // We don't have all AC yet: force a draw of all the missing areas.
+    // Mark all sections as not complete.
+    for (size_t i = 0; i < decoded_passes_per_ac_group_.size(); i++) {
+      if (decoded_passes_per_ac_group_[i] == frame_header_.passes.num_passes)
+        continue;
+      dec_state_->group_border_assigner.ClearDone(i);
+    }
+    std::atomic<bool> has_error{false};
+    RunOnPool(
+        pool_, 0, decoded_passes_per_ac_group_.size(),
+        [this](size_t num_threads) {
+          PrepareStorage(num_threads, decoded_passes_per_ac_group_.size());
+          return true;
+        },
+        [this, &has_error](size_t g, size_t thread) {
+          if (decoded_passes_per_ac_group_[g] ==
+              frame_header_.passes.num_passes) {
+            // This group was drawn already, nothing to do.
+            return;
+          }
+          BitReader* JXL_RESTRICT readers[kMaxNumPasses] = {};
+          bool ok = ProcessACGroup(
+              g, readers, /*num_passes=*/0, GetStorageLocation(thread, g),
+              /*force_draw=*/true, /*dc_only=*/!decoded_ac_global_);
+          if (!ok) has_error = true;
+        },
+        "ForceDrawGroup");
+    if (has_error) {
+      return JXL_FAILURE("Drawing groups failed");
+    }
+  }
+  // TODO(veluca): the rest of this function should be removed once we have full
+  // support for per-group decoding.
+
+  // undo global modular transforms and copy int pixel buffers to float ones
+  JXL_RETURN_IF_ERROR(
+      modular_frame_decoder_.FinalizeDecoding(dec_state_, pool_, decoded_));
+
+  JXL_RETURN_IF_ERROR(FinalizeFrameDecoding(decoded_, dec_state_, pool_,
+                                            /*force_fir=*/false,
+                                            /*skip_blending=*/false));
+
+  num_renders_++;
+  return true;
+}
+
+int FrameDecoder::SavedAs(const FrameHeader& header) {
+  if (header.frame_type == FrameType::kDCFrame) {
+    // bits 16, 32, 64, 128 for DC level
+    return 16 << (header.dc_level - 1);
+  } else if (header.CanBeReferenced()) {
+    // bits 1, 2, 4 and 8 for the references
+    return 1 << header.save_as_reference;
+  }
+
+  return 0;
+}
+
+int FrameDecoder::References() const {
+  if (is_finalized_) {
+    return 0;
+  }
+  if ((!decoded_dc_global_ || !decoded_ac_global_ ||
+       *std::min_element(decoded_dc_groups_.begin(),
+                         decoded_dc_groups_.end()) != 1 ||
+       *std::min_element(decoded_passes_per_ac_group_.begin(),
+                         decoded_passes_per_ac_group_.end()) < max_passes_)) {
+    return 0;
+  }
+
+  int result = 0;
+
+  // Blending
+  if (frame_header_.frame_type == FrameType::kRegularFrame ||
+      frame_header_.frame_type == FrameType::kSkipProgressive) {
+    bool cropped = frame_header_.custom_size_or_origin;
+    if (cropped || frame_header_.blending_info.mode != BlendMode::kReplace) {
+      result |= (1 << frame_header_.blending_info.source);
+    }
+    const auto& extra = frame_header_.extra_channel_blending_info;
+    for (size_t i = 0; i < extra.size(); ++i) {
+      if (cropped || extra[i].mode != BlendMode::kReplace) {
+        result |= (1 << extra[i].source);
+      }
+    }
+  }
+
+  // Patches
+  if (frame_header_.flags & FrameHeader::kPatches) {
+    result |= dec_state_->shared->image_features.patches.GetReferences();
+  }
+
+  // DC Level
+  if (frame_header_.flags & FrameHeader::kUseDcFrame) {
+    // Reads from the next dc level
+    int dc_level = frame_header_.dc_level + 1;
+    // bits 16, 32, 64, 128 for DC level
+    result |= (16 << (dc_level - 1));
+  }
+
+  return result;
+}
+
+Status FrameDecoder::FinalizeFrame() {
+  if (is_finalized_) {
+    return JXL_FAILURE("FinalizeFrame called multiple times");
+  }
+  is_finalized_ = true;
+  if (decoded_->IsJPEG()) {
+    // Nothing to do.
+    return true;
+  }
+  if (!finalized_dc_) {
+    // We don't have all of DC: EPF might not behave correctly (and is not
+    // particularly useful anyway on upsampling results), so we disable it.
+    dec_state_->shared_storage.frame_header.loop_filter.epf_iters = 0;
+  }
+  if ((!decoded_dc_global_ || !decoded_ac_global_ ||
+       *std::min_element(decoded_dc_groups_.begin(),
+                         decoded_dc_groups_.end()) != 1 ||
+       *std::min_element(decoded_passes_per_ac_group_.begin(),
+                         decoded_passes_per_ac_group_.end()) < max_passes_) &&
+      !allow_partial_frames_) {
+    return JXL_FAILURE(
+        "FinalizeFrame called before the frame was fully decoded");
+  }
+
+  if (!finalized_dc_) {
+    JXL_ASSERT(allow_partial_frames_);
+    AllocateOutput();
+    dec_state_->InitForAC(nullptr);
+  }
+
+  JXL_RETURN_IF_ERROR(Flush());
+
+  if (dec_state_->shared->frame_header.CanBeReferenced()) {
+    size_t id = dec_state_->shared->frame_header.save_as_reference;
+    auto& reference_frame = dec_state_->shared_storage.reference_frames[id];
+    if (dec_state_->pre_color_transform_frame.xsize() == 0) {
+      reference_frame.storage = decoded_->Copy();
+    } else {
+      reference_frame.storage = ImageBundle(decoded_->metadata());
+      reference_frame.storage.SetFromImage(
+          std::move(dec_state_->pre_color_transform_frame),
+          decoded_->c_current());
+      if (decoded_->HasExtraChannels()) {
+        const std::vector<ImageF>* ecs = &dec_state_->pre_color_transform_ec;
+        if (ecs->empty()) ecs = &decoded_->extra_channels();
+        std::vector<ImageF> extra_channels;
+        for (const auto& ec : *ecs) {
+          extra_channels.push_back(CopyImage(ec));
+        }
+        reference_frame.storage.SetExtraChannels(std::move(extra_channels));
+      }
+    }
+    reference_frame.frame = &reference_frame.storage;
+    reference_frame.ib_is_in_xyb =
+        dec_state_->shared->frame_header.save_before_color_transform;
+    if (!dec_state_->shared->frame_header.save_before_color_transform) {
+      const CodecMetadata* metadata =
+          dec_state_->shared->frame_header.nonserialized_metadata;
+      if (reference_frame.frame->xsize() < metadata->xsize() ||
+          reference_frame.frame->ysize() < metadata->ysize()) {
+        return JXL_FAILURE(
+            "trying to save a reference frame that is too small: %zux%zu "
+            "instead of %zux%zu",
+            reference_frame.frame->xsize(), reference_frame.frame->ysize(),
+            metadata->xsize(), metadata->ysize());
+      }
+      reference_frame.storage.ShrinkTo(metadata->xsize(), metadata->ysize());
+    }
+  }
+  if (frame_header_.nonserialized_is_preview) {
+    // Fix possible larger image size (multiple of kBlockDim)
+    // TODO(lode): verify if and when that happens.
+    decoded_->ShrinkTo(frame_dim_.xsize, frame_dim_.ysize);
+  } else if (!decoded_->IsJPEG()) {
+    // A kRegularFrame is blended with the other frames, and thus results in a
+    // coalesced frame of size equal to image dimensions. Other frames are not
+    // blended, thus their final size is the size that was defined in the
+    // frame_header.
+    if (frame_header_.frame_type == kRegularFrame ||
+        frame_header_.frame_type == kSkipProgressive) {
+      decoded_->ShrinkTo(
+          dec_state_->shared->frame_header.nonserialized_metadata->xsize(),
+          dec_state_->shared->frame_header.nonserialized_metadata->ysize());
+    } else {
+      // xsize_upsampled is the actual frame size, after any upsampling has been
+      // applied.
+      decoded_->ShrinkTo(frame_dim_.xsize_upsampled,
+                         frame_dim_.ysize_upsampled);
+    }
+  }
+
+  if (render_spotcolors_) {
+    for (size_t i = 0; i < decoded_->extra_channels().size(); i++) {
+      // Don't use Find() because there may be multiple spot color channels.
+      const ExtraChannelInfo& eci = decoded_->metadata()->extra_channel_info[i];
+      if (eci.type == ExtraChannel::kOptional) {
+        continue;
+      }
+      if (eci.type == ExtraChannel::kUnknown ||
+          (int(ExtraChannel::kReserved0) <= int(eci.type) &&
+           int(eci.type) <= int(ExtraChannel::kReserved7))) {
+        return JXL_FAILURE(
+            "Unknown extra channel (bits %u, shift %u, name '%s')\n",
+            eci.bit_depth.bits_per_sample, eci.dim_shift, eci.name.c_str());
+      }
+      if (eci.type == ExtraChannel::kSpotColor) {
+        float scale = eci.spot_color[3];
+        for (size_t c = 0; c < 3; c++) {
+          for (size_t y = 0; y < decoded_->ysize(); y++) {
+            float* JXL_RESTRICT p = decoded_->color()->Plane(c).Row(y);
+            const float* JXL_RESTRICT s =
+                decoded_->extra_channels()[i].ConstRow(y);
+            for (size_t x = 0; x < decoded_->xsize(); x++) {
+              float mix = scale * s[x];
+              p[x] = mix * eci.spot_color[c] + (1.0 - mix) * p[x];
+            }
+          }
+        }
+      }
+    }
+  }
+  if (dec_state_->shared->frame_header.dc_level != 0) {
+    dec_state_->shared_storage
+        .dc_frames[dec_state_->shared->frame_header.dc_level - 1] =
+        std::move(*decoded_->color());
+    decoded_->RemoveColor();
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.h
new file mode 100644
index 0000000000..0c86feb8ab
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_frame.h
@@ -0,0 +1,281 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_FRAME_H_
+#define LIB_JXL_DEC_FRAME_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/blending.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// TODO(veluca): remove DecodeFrameHeader once the API migrates to FrameDecoder.
+
+// `frame_header` must have nonserialized_metadata and
+// nonserialized_is_preview set.
+Status DecodeFrameHeader(BitReader* JXL_RESTRICT reader,
+                         FrameHeader* JXL_RESTRICT frame_header);
+
+// Decodes a frame. Groups may be processed in parallel by `pool`.
+// See DecodeFile for explanation of c_decoded.
+// `io` is only used for reading maximum image size. Also updates
+// `dec_state` with the new frame header.
+// `metadata` is the metadata that applies to all frames of the codestream
+// `decoded->metadata` must already be set and must match metadata.m.
+Status DecodeFrame(const DecompressParams& dparams,
+                   PassesDecoderState* dec_state, ThreadPool* JXL_RESTRICT pool,
+                   BitReader* JXL_RESTRICT reader, ImageBundle* decoded,
+                   const CodecMetadata& metadata,
+                   const SizeConstraints* constraints, bool is_preview = false);
+
+// Leaves reader in the same state as DecodeFrame would. Used to skip preview.
+// Also updates `dec_state` with the new frame header.
+Status SkipFrame(const CodecMetadata& metadata, BitReader* JXL_RESTRICT reader,
+                 bool is_preview = false);
+
+// TODO(veluca): implement "forced drawing".
+class FrameDecoder {
+ public:
+  // All parameters must outlive the FrameDecoder.
+  FrameDecoder(PassesDecoderState* dec_state, const CodecMetadata& metadata,
+               ThreadPool* pool)
+      : dec_state_(dec_state), pool_(pool), frame_header_(&metadata) {}
+
+  // `constraints` must outlive the FrameDecoder if not null, or stay alive
+  // until the next call to SetFrameSizeLimits.
+  void SetFrameSizeLimits(const SizeConstraints* constraints) {
+    constraints_ = constraints;
+  }
+  void SetRenderSpotcolors(bool rsc) { render_spotcolors_ = rsc; }
+
+  // Read FrameHeader and table of contents from the given BitReader.
+  // Also checks frame dimensions for their limits, and sets the output
+  // image buffer.
+  // TODO(veluca): remove the `allow_partial_frames` flag - this should be moved
+  // on callers.
+  Status InitFrame(BitReader* JXL_RESTRICT br, ImageBundle* decoded,
+                   bool is_preview, bool allow_partial_frames,
+                   bool allow_partial_dc_global);
+
+  struct SectionInfo {
+    BitReader* JXL_RESTRICT br;
+    size_t id;
+  };
+
+  enum SectionStatus {
+    // Processed correctly.
+    kDone = 0,
+    // Skipped because other required sections were not yet processed.
+    kSkipped = 1,
+    // Skipped because the section was already processed.
+    kDuplicate = 2,
+    // Only partially decoded: the section will need to be processed again.
+    kPartial = 3,
+  };
+
+  // Processes `num` sections; each SectionInfo contains the index
+  // of the section and a BitReader that only contains the data of the section.
+  // `section_status` should point to `num` elements, and will be filled with
+  // information about whether each section was processed or not.
+  // A section is a part of the encoded file that is indexed by the TOC.
+  Status ProcessSections(const SectionInfo* sections, size_t num,
+                         SectionStatus* section_status);
+
+  // Flushes all the data decoded so far to pixels.
+  Status Flush();
+
+  // Runs final operations once a frame data is decoded.
+  // Must be called exactly once per frame, after all calls to ProcessSections.
+  Status FinalizeFrame();
+
+  // Returns dependencies of this frame on reference ids as a bit mask: bits 0-3
+  // indicate reference frame 0-3 for patches and blending, bits 4-7 indicate DC
+  // frames this frame depends on. Only returns a valid result after all calls
+  // to ProcessSections are finished and before FinalizeFrame.
+  int References() const;
+
+  // Returns reference id of storage location where this frame is stored as a
+  // bit flag, or 0 if not stored.
+  // Matches the bit mask used for GetReferences: bits 0-3 indicate it is stored
+  // for patching or blending, bits 4-7 indicate DC frame.
+  // Unlike References, can be ran at any time as
+  // soon as the frame header is known.
+  static int SavedAs(const FrameHeader& header);
+
+  // Returns offset of this section after the end of the TOC. The end of the TOC
+  // is the byte position of the bit reader after InitFrame was called.
+  const std::vector<uint64_t>& SectionOffsets() const {
+    return section_offsets_;
+  }
+  const std::vector<uint32_t>& SectionSizes() const { return section_sizes_; }
+  size_t NumSections() const { return section_sizes_.size(); }
+
+  // TODO(veluca): remove once we remove --downsampling flag.
+  void SetMaxPasses(size_t max_passes) { max_passes_ = max_passes; }
+  const FrameHeader& GetFrameHeader() const { return frame_header_; }
+
+  // Returns whether a DC image has been decoded, accessible at low resolution
+  // at passes.shared_storage.dc_storage
+  bool HasDecodedDC() const {
+    return frame_header_.encoding == FrameEncoding::kVarDCT && finalized_dc_;
+  }
+
+  // Sets the buffer to which uint8 sRGB pixels will be decoded. This is not
+  // supported for all images. If it succeeds, HasRGBBuffer() will return true.
+  // If it does not succeed, the image is decoded to the ImageBundle passed to
+  // InitFrame instead.
+  // If an output callback is set, this function *may not* be called.
+  //
+  // @param undo_orientation: if true, indicates the frame decoder should apply
+  // the exif orientation to bring the image to the intended display
+  // orientation. Performing this operation is not yet supported, so this
+  // results in not setting the buffer if the image has a non-identity EXIF
+  // orientation. When outputting to the ImageBundle, no orientation is undone.
+  void MaybeSetRGB8OutputBuffer(uint8_t* rgb_output, size_t stride,
+                                bool is_rgba, bool undo_orientation) const {
+    if (!CanDoLowMemoryPath(undo_orientation)) return;
+    dec_state_->rgb_output = rgb_output;
+    dec_state_->rgb_output_is_rgba = is_rgba;
+    dec_state_->rgb_stride = stride;
+    JXL_ASSERT(dec_state_->pixel_callback == nullptr);
+#if !JXL_HIGH_PRECISION
+    if (decoded_->metadata()->xyb_encoded &&
+        dec_state_->output_encoding_info.color_encoding.IsSRGB() &&
+        dec_state_->output_encoding_info.all_default_opsin &&
+        HasFastXYBTosRGB8() && frame_header_.needs_color_transform()) {
+      dec_state_->fast_xyb_srgb8_conversion = true;
+    }
+#endif
+  }
+
+  // Same as MaybeSetRGB8OutputBuffer, but with a float callback. This is not
+  // supported for all images. If it succeeds, HasRGBBuffer() will return true.
+  // If it does not succeed, the image is decoded to the ImageBundle passed to
+  // InitFrame instead.
+  // If a RGB8 output buffer is set, this function *may not* be called.
+  //
+  // @param undo_orientation: if true, indicates the frame decoder should apply
+  // the exif orientation to bring the image to the intended display
+  // orientation. Performing this operation is not yet supported, so this
+  // results in not setting the buffer if the image has a non-identity EXIF
+  // orientation. When outputting to the ImageBundle, no orientation is undone.
+  void MaybeSetFloatCallback(
+      const std::function<void(const float* pixels, size_t x, size_t y,
+                               size_t num_pixels)>& cb,
+      bool is_rgba, bool undo_orientation) const {
+    if (!CanDoLowMemoryPath(undo_orientation)) return;
+    dec_state_->pixel_callback = cb;
+    dec_state_->rgb_output_is_rgba = is_rgba;
+    JXL_ASSERT(dec_state_->rgb_output == nullptr);
+  }
+
+  // Returns true if the rgb output buffer passed by MaybeSetRGB8OutputBuffer
+  // has been/will be populated by Flush() / FinalizeFrame(), or if a pixel
+  // callback has been used.
+  bool HasRGBBuffer() const {
+    return dec_state_->rgb_output != nullptr ||
+           dec_state_->pixel_callback != nullptr;
+  }
+
+ private:
+  Status ProcessDCGlobal(BitReader* br);
+  Status ProcessDCGroup(size_t dc_group_id, BitReader* br);
+  void FinalizeDC();
+  void AllocateOutput();
+  Status ProcessACGlobal(BitReader* br);
+  Status ProcessACGroup(size_t ac_group_id, BitReader* JXL_RESTRICT* br,
+                        size_t num_passes, size_t thread, bool force_draw,
+                        bool dc_only);
+
+  // Allocates storage for parallel decoding using up to `num_threads` threads
+  // of up to `num_tasks` tasks. The value of `thread` passed to
+  // `GetStorageLocation` must be smaller than the `num_threads` value passed
+  // here. The value of `task` passed to `GetStorageLocation` must be smaller
+  // than the value of `num_tasks` passed here.
+  void PrepareStorage(size_t num_threads, size_t num_tasks) {
+    size_t storage_size = std::min(num_threads, num_tasks);
+    if (storage_size > group_dec_caches_.size()) {
+      group_dec_caches_.resize(storage_size);
+    }
+    dec_state_->EnsureStorage(storage_size);
+    use_task_id_ = num_threads > num_tasks;
+  }
+
+  size_t GetStorageLocation(size_t thread, size_t task) {
+    if (use_task_id_) return task;
+    return thread;
+  }
+
+  // If the image has default exif orientation (or has an orientation but should
+  // not be undone) and no blending, the current frame cannot be referenced by
+  // future frames, there are no spot colors to be rendered, and alpha is not
+  // premultiplied, then low memory options can be used
+  // (uint8 output buffer or float pixel callback).
+  // TODO(veluca): reduce this set of restrictions.
+  bool CanDoLowMemoryPath(bool undo_orientation) const {
+    if (undo_orientation &&
+        decoded_->metadata()->GetOrientation() != Orientation::kIdentity) {
+      return false;
+    }
+    if (ImageBlender::NeedsBlending(dec_state_)) return false;
+    if (frame_header_.CanBeReferenced()) return false;
+    if (render_spotcolors_ &&
+        decoded_->metadata()->Find(ExtraChannel::kSpotColor)) {
+      return false;
+    }
+    if (decoded_->AlphaIsPremultiplied()) return false;
+    return true;
+  }
+
+  PassesDecoderState* dec_state_;
+  ThreadPool* pool_;
+  std::vector<uint64_t> section_offsets_;
+  std::vector<uint32_t> section_sizes_;
+  size_t max_passes_;
+  // TODO(veluca): figure out the duplication between these and dec_state_.
+  FrameHeader frame_header_;
+  FrameDimensions frame_dim_;
+  ImageBundle* decoded_;
+  ModularFrameDecoder modular_frame_decoder_;
+  bool allow_partial_frames_;
+  bool allow_partial_dc_global_;
+  bool render_spotcolors_ = true;
+
+  std::vector<uint8_t> processed_section_;
+  std::vector<uint8_t> decoded_passes_per_ac_group_;
+  std::vector<uint8_t> decoded_dc_groups_;
+  bool decoded_dc_global_;
+  bool decoded_ac_global_;
+  bool finalized_dc_ = true;
+  bool is_finalized_ = true;
+  size_t num_renders_ = 0;
+
+  std::vector<GroupDecCache> group_dec_caches_;
+
+  // Frame size limits.
+  const SizeConstraints* constraints_ = nullptr;
+
+  // Whether or not the task id should be used for storage indexing, instead of
+  // the thread id.
+  bool use_task_id_ = false;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_FRAME_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc
new file mode 100644
index 0000000000..ce917765d9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.cc
@@ -0,0 +1,774 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_group.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "lib/jxl/frame_header.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_group.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_reconstruct.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer-inl.h"
+#include "lib/jxl/quantizer.h"
+
+#ifndef LIB_JXL_DEC_GROUP_CC
+#define LIB_JXL_DEC_GROUP_CC
+namespace jxl {
+
+// Interface for reading groups for DecodeGroupImpl.
+class GetBlock {
+ public:
+  virtual void StartRow(size_t by) = 0;
+  virtual Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs,
+                           size_t size, size_t log2_covered_blocks,
+                           ACPtr block[3], ACType ac_type) = 0;
+  virtual ~GetBlock() {}
+};
+
+// Controls whether DecodeGroupImpl renders to pixels or not.
+enum DrawMode {
+  // Render to pixels.
+  kDraw = 0,
+  // Don't render to pixels.
+  kDontDraw = 1,
+  // Don't do IDCT or dequantization, but just postprocessing. Used for
+  // progressive DC.
+  kOnlyImageFeatures = 2,
+};
+
+}  // namespace jxl
+#endif  // LIB_JXL_DEC_GROUP_CC
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftRight;
+
+using D = HWY_FULL(float);
+using DU = HWY_FULL(uint32_t);
+using DI = HWY_FULL(int32_t);
+using DI16 = Rebind<int16_t, DI>;
+constexpr D d;
+constexpr DI di;
+constexpr DI16 di16;
+
+// TODO(veluca): consider SIMDfying.
+void Transpose8x8InPlace(int32_t* JXL_RESTRICT block) {
+  for (size_t x = 0; x < 8; x++) {
+    for (size_t y = x + 1; y < 8; y++) {
+      std::swap(block[y * 8 + x], block[x * 8 + y]);
+    }
+  }
+}
+
+template <ACType ac_type>
+void DequantLane(Vec<D> scaled_dequant_x, Vec<D> scaled_dequant_y,
+                 Vec<D> scaled_dequant_b,
+                 const float* JXL_RESTRICT dequant_matrices, size_t dq_ofs,
+                 size_t size, size_t k, Vec<D> x_cc_mul, Vec<D> b_cc_mul,
+                 const float* JXL_RESTRICT biases, ACPtr qblock[3],
+                 float* JXL_RESTRICT block) {
+  const auto x_mul = Load(d, dequant_matrices + dq_ofs + k) * scaled_dequant_x;
+  const auto y_mul =
+      Load(d, dequant_matrices + dq_ofs + size + k) * scaled_dequant_y;
+  const auto b_mul =
+      Load(d, dequant_matrices + dq_ofs + 2 * size + k) * scaled_dequant_b;
+
+  Vec<DI> quantized_x_int;
+  Vec<DI> quantized_y_int;
+  Vec<DI> quantized_b_int;
+  if (ac_type == ACType::k16) {
+    Rebind<int16_t, DI> di16;
+    quantized_x_int = PromoteTo(di, Load(di16, qblock[0].ptr16 + k));
+    quantized_y_int = PromoteTo(di, Load(di16, qblock[1].ptr16 + k));
+    quantized_b_int = PromoteTo(di, Load(di16, qblock[2].ptr16 + k));
+  } else {
+    quantized_x_int = Load(di, qblock[0].ptr32 + k);
+    quantized_y_int = Load(di, qblock[1].ptr32 + k);
+    quantized_b_int = Load(di, qblock[2].ptr32 + k);
+  }
+
+  const auto dequant_x_cc =
+      AdjustQuantBias(di, 0, quantized_x_int, biases) * x_mul;
+  const auto dequant_y =
+      AdjustQuantBias(di, 1, quantized_y_int, biases) * y_mul;
+  const auto dequant_b_cc =
+      AdjustQuantBias(di, 2, quantized_b_int, biases) * b_mul;
+
+  const auto dequant_x = MulAdd(x_cc_mul, dequant_y, dequant_x_cc);
+  const auto dequant_b = MulAdd(b_cc_mul, dequant_y, dequant_b_cc);
+  Store(dequant_x, d, block + k);
+  Store(dequant_y, d, block + size + k);
+  Store(dequant_b, d, block + 2 * size + k);
+}
+
+template <ACType ac_type>
+void DequantBlock(const AcStrategy& acs, float inv_global_scale, int quant,
+                  float x_dm_multiplier, float b_dm_multiplier, Vec<D> x_cc_mul,
+                  Vec<D> b_cc_mul, size_t kind, size_t size,
+                  const Quantizer& quantizer,
+                  const float* JXL_RESTRICT dequant_matrices,
+                  size_t covered_blocks, const size_t* sbx,
+                  const float* JXL_RESTRICT* JXL_RESTRICT dc_row,
+                  size_t dc_stride, const float* JXL_RESTRICT biases,
+                  ACPtr qblock[3], float* JXL_RESTRICT block) {
+  PROFILER_FUNC;
+
+  const auto scaled_dequant_s = inv_global_scale / quant;
+
+  const auto scaled_dequant_x = Set(d, scaled_dequant_s * x_dm_multiplier);
+  const auto scaled_dequant_y = Set(d, scaled_dequant_s);
+  const auto scaled_dequant_b = Set(d, scaled_dequant_s * b_dm_multiplier);
+
+  const size_t dq_ofs = quantizer.DequantMatrixOffset(kind, 0);
+
+  for (size_t k = 0; k < covered_blocks * kDCTBlockSize; k += Lanes(d)) {
+    DequantLane<ac_type>(scaled_dequant_x, scaled_dequant_y, scaled_dequant_b,
+                         dequant_matrices, dq_ofs, size, k, x_cc_mul, b_cc_mul,
+                         biases, qblock, block);
+  }
+  for (size_t c = 0; c < 3; c++) {
+    LowestFrequenciesFromDC(acs.Strategy(), dc_row[c] + sbx[c], dc_stride,
+                            block + c * size);
+  }
+}
+
+Status DecodeGroupImpl(GetBlock* JXL_RESTRICT get_block,
+                       GroupDecCache* JXL_RESTRICT group_dec_cache,
+                       PassesDecoderState* JXL_RESTRICT dec_state,
+                       size_t thread, size_t group_idx, ImageBundle* decoded,
+                       DrawMode draw) {
+  // TODO(veluca): investigate cache usage in this function.
+  PROFILER_FUNC;
+  constexpr size_t kGroupDataXBorder = PassesDecoderState::kGroupDataXBorder;
+  constexpr size_t kGroupDataYBorder = PassesDecoderState::kGroupDataYBorder;
+
+  const Rect block_rect = dec_state->shared->BlockGroupRect(group_idx);
+  const AcStrategyImage& ac_strategy = dec_state->shared->ac_strategy;
+
+  const size_t xsize_blocks = block_rect.xsize();
+  const size_t ysize_blocks = block_rect.ysize();
+
+  const size_t dc_stride = dec_state->shared->dc->PixelsPerRow();
+
+  const float inv_global_scale = dec_state->shared->quantizer.InvGlobalScale();
+  const float* JXL_RESTRICT dequant_matrices =
+      dec_state->shared->quantizer.DequantMatrix(0, 0);
+
+  const YCbCrChromaSubsampling& cs =
+      dec_state->shared->frame_header.chroma_subsampling;
+
+  const size_t idct_stride = dec_state->EagerFinalizeImageRect()
+                                 ? dec_state->group_data[thread].PixelsPerRow()
+                                 : dec_state->decoded.PixelsPerRow();
+
+  HWY_ALIGN int32_t scaled_qtable[64 * 3];
+
+  ACType ac_type = dec_state->coefficients->Type();
+  auto dequant_block = ac_type == ACType::k16 ? DequantBlock<ACType::k16>
+                                              : DequantBlock<ACType::k32>;
+  // Whether or not coefficients should be stored for future usage, and/or read
+  // from past usage.
+  bool accumulate = !dec_state->coefficients->IsEmpty();
+  // Offset of the current block in the group.
+  size_t offset = 0;
+
+  std::array<int, 3> jpeg_c_map;
+  bool jpeg_is_gray = false;
+  std::array<int, 3> dcoff = {};
+
+  // TODO(veluca): all of this should be done only once per image.
+  if (decoded->IsJPEG()) {
+    if (!dec_state->shared->cmap.IsJPEGCompatible()) {
+      return JXL_FAILURE("The CfL map is not JPEG-compatible");
+    }
+    jpeg_is_gray = (decoded->jpeg_data->components.size() == 1);
+    jpeg_c_map = JpegOrder(dec_state->shared->frame_header.color_transform,
+                           jpeg_is_gray);
+    const std::vector<QuantEncoding>& qe =
+        dec_state->shared->matrices.encodings();
+    if (qe.empty() || qe[0].mode != QuantEncoding::Mode::kQuantModeRAW ||
+        std::abs(qe[0].qraw.qtable_den - 1.f / (8 * 255)) > 1e-8f) {
+      return JXL_FAILURE(
+          "Quantization table is not a JPEG quantization table.");
+    }
+    for (size_t c = 0; c < 3; c++) {
+      if (dec_state->shared->frame_header.color_transform ==
+          ColorTransform::kNone) {
+        dcoff[c] = 1024 / (*qe[0].qraw.qtable)[64 * c];
+      }
+      for (size_t i = 0; i < 64; i++) {
+        // Transpose the matrix, as it will be used on the transposed block.
+        int n = qe[0].qraw.qtable->at(64 + i);
+        int d = qe[0].qraw.qtable->at(64 * c + i);
+        if (n <= 0 || d <= 0 || n >= 65536 || d >= 65536) {
+          return JXL_FAILURE("Invalid JPEG quantization table");
+        }
+        scaled_qtable[64 * c + (i % 8) * 8 + (i / 8)] =
+            (1 << kCFLFixedPointPrecision) * n / d;
+      }
+    }
+  }
+
+  size_t hshift[3] = {cs.HShift(0), cs.HShift(1), cs.HShift(2)};
+  size_t vshift[3] = {cs.VShift(0), cs.VShift(1), cs.VShift(2)};
+  Rect r[3];
+  for (size_t i = 0; i < 3; i++) {
+    r[i] =
+        Rect(block_rect.x0() >> hshift[i], block_rect.y0() >> vshift[i],
+             block_rect.xsize() >> hshift[i], block_rect.ysize() >> vshift[i]);
+  }
+
+  for (size_t by = 0; by < ysize_blocks; ++by) {
+    if (draw == kOnlyImageFeatures) break;
+    get_block->StartRow(by);
+    size_t sby[3] = {by >> vshift[0], by >> vshift[1], by >> vshift[2]};
+
+    const int32_t* JXL_RESTRICT row_quant =
+        block_rect.ConstRow(dec_state->shared->raw_quant_field, by);
+
+    const float* JXL_RESTRICT dc_rows[3] = {
+        r[0].ConstPlaneRow(*dec_state->shared->dc, 0, sby[0]),
+        r[1].ConstPlaneRow(*dec_state->shared->dc, 1, sby[1]),
+        r[2].ConstPlaneRow(*dec_state->shared->dc, 2, sby[2]),
+    };
+
+    const size_t ty = (block_rect.y0() + by) / kColorTileDimInBlocks;
+    AcStrategyRow acs_row = ac_strategy.ConstRow(block_rect, by);
+
+    const int8_t* JXL_RESTRICT row_cmap[3] = {
+        dec_state->shared->cmap.ytox_map.ConstRow(ty),
+        nullptr,
+        dec_state->shared->cmap.ytob_map.ConstRow(ty),
+    };
+
+    float* JXL_RESTRICT idct_row[3];
+    int16_t* JXL_RESTRICT jpeg_row[3];
+    for (size_t c = 0; c < 3; c++) {
+      if (dec_state->EagerFinalizeImageRect()) {
+        idct_row[c] = dec_state->group_data[thread].PlaneRow(
+                          c, sby[c] * kBlockDim + kGroupDataYBorder) +
+                      kGroupDataXBorder;
+      } else {
+        idct_row[c] =
+            dec_state->decoded.PlaneRow(c, (r[c].y0() + sby[c]) * kBlockDim) +
+            r[c].x0() * kBlockDim;
+      }
+      if (decoded->IsJPEG()) {
+        auto& component = decoded->jpeg_data->components[jpeg_c_map[c]];
+        jpeg_row[c] =
+            component.coeffs.data() +
+            (component.width_in_blocks * (r[c].y0() + sby[c]) + r[c].x0()) *
+                kDCTBlockSize;
+      }
+    }
+
+    size_t bx = 0;
+    for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
+         tx++) {
+      size_t abs_tx = tx + block_rect.x0() / kColorTileDimInBlocks;
+      auto x_cc_mul =
+          Set(d, dec_state->shared->cmap.YtoXRatio(row_cmap[0][abs_tx]));
+      auto b_cc_mul =
+          Set(d, dec_state->shared->cmap.YtoBRatio(row_cmap[2][abs_tx]));
+      // Increment bx by llf_x because those iterations would otherwise
+      // immediately continue (!IsFirstBlock). Reduces mispredictions.
+      for (; bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks;) {
+        size_t sbx[3] = {bx >> hshift[0], bx >> hshift[1], bx >> hshift[2]};
+        AcStrategy acs = acs_row[bx];
+        const size_t llf_x = acs.covered_blocks_x();
+
+        // Can only happen in the second or lower rows of a varblock.
+        if (JXL_UNLIKELY(!acs.IsFirstBlock())) {
+          bx += llf_x;
+          continue;
+        }
+        PROFILER_ZONE("DecodeGroupImpl inner");
+        const size_t log2_covered_blocks = acs.log2_covered_blocks();
+
+        const size_t covered_blocks = 1 << log2_covered_blocks;
+        const size_t size = covered_blocks * kDCTBlockSize;
+
+        ACPtr qblock[3];
+        if (accumulate) {
+          for (size_t c = 0; c < 3; c++) {
+            qblock[c] = dec_state->coefficients->PlaneRow(c, group_idx, offset);
+          }
+        } else {
+          // No point in reading from bitstream without accumulating and not
+          // drawing.
+          JXL_ASSERT(draw == kDraw);
+          if (ac_type == ACType::k16) {
+            memset(group_dec_cache->dec_group_qblock16, 0,
+                   size * 3 * sizeof(int16_t));
+            for (size_t c = 0; c < 3; c++) {
+              qblock[c].ptr16 = group_dec_cache->dec_group_qblock16 + c * size;
+            }
+          } else {
+            memset(group_dec_cache->dec_group_qblock, 0,
+                   size * 3 * sizeof(int32_t));
+            for (size_t c = 0; c < 3; c++) {
+              qblock[c].ptr32 = group_dec_cache->dec_group_qblock + c * size;
+            }
+          }
+        }
+        JXL_RETURN_IF_ERROR(get_block->LoadBlock(
+            bx, by, acs, size, log2_covered_blocks, qblock, ac_type));
+        offset += size;
+        if (draw == kDontDraw) {
+          bx += llf_x;
+          continue;
+        }
+
+        if (JXL_UNLIKELY(decoded->IsJPEG())) {
+          if (acs.Strategy() != AcStrategy::Type::DCT) {
+            return JXL_FAILURE(
+                "Can only decode to JPEG if only DCT-8 is used.");
+          }
+
+          HWY_ALIGN int32_t transposed_dct_y[64];
+          for (size_t c : {1, 0, 2}) {
+            // Propagate only Y for grayscale.
+            if (jpeg_is_gray && c != 1) {
+              continue;
+            }
+            if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) {
+              continue;
+            }
+            int16_t* JXL_RESTRICT jpeg_pos =
+                jpeg_row[c] + sbx[c] * kDCTBlockSize;
+            // JPEG XL is transposed, JPEG is not.
+            auto transposed_dct = qblock[c].ptr32;
+            Transpose8x8InPlace(transposed_dct);
+            // No CfL - no need to store the y block converted to integers.
+            if (!cs.Is444() ||
+                (row_cmap[0][abs_tx] == 0 && row_cmap[2][abs_tx] == 0)) {
+              for (size_t i = 0; i < 64; i += Lanes(d)) {
+                const auto ini = Load(di, transposed_dct + i);
+                const auto ini16 = DemoteTo(di16, ini);
+                StoreU(ini16, di16, jpeg_pos + i);
+              }
+            } else if (c == 1) {
+              // Y channel: save for restoring X/B, but nothing else to do.
+              for (size_t i = 0; i < 64; i += Lanes(d)) {
+                const auto ini = Load(di, transposed_dct + i);
+                Store(ini, di, transposed_dct_y + i);
+                const auto ini16 = DemoteTo(di16, ini);
+                StoreU(ini16, di16, jpeg_pos + i);
+              }
+            } else {
+              // transposed_dct_y contains the y channel block, transposed.
+              const auto scale = Set(
+                  di, dec_state->shared->cmap.RatioJPEG(row_cmap[c][abs_tx]));
+              const auto round = Set(di, 1 << (kCFLFixedPointPrecision - 1));
+              for (int i = 0; i < 64; i += Lanes(d)) {
+                auto in = Load(di, transposed_dct + i);
+                auto in_y = Load(di, transposed_dct_y + i);
+                auto qt = Load(di, scaled_qtable + c * size + i);
+                auto coeff_scale =
+                    ShiftRight<kCFLFixedPointPrecision>(qt * scale + round);
+                auto cfl_factor = ShiftRight<kCFLFixedPointPrecision>(
+                    in_y * coeff_scale + round);
+                StoreU(DemoteTo(di16, in + cfl_factor), di16, jpeg_pos + i);
+              }
+            }
+            jpeg_pos[0] =
+                Clamp1<float>(dc_rows[c][sbx[c]] - dcoff[c], -2047, 2047);
+          }
+        } else {
+          HWY_ALIGN float* const block = group_dec_cache->dec_group_block;
+          // Dequantize and add predictions.
+          dequant_block(
+              acs, inv_global_scale, row_quant[bx], dec_state->x_dm_multiplier,
+              dec_state->b_dm_multiplier, x_cc_mul, b_cc_mul, acs.RawStrategy(),
+              size, dec_state->shared->quantizer, dequant_matrices,
+              acs.covered_blocks_y() * acs.covered_blocks_x(), sbx, dc_rows,
+              dc_stride,
+              dec_state->output_encoding_info.opsin_params.quant_biases, qblock,
+              block);
+
+          for (size_t c : {1, 0, 2}) {
+            if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) {
+              continue;
+            }
+            // IDCT
+            float* JXL_RESTRICT idct_pos = idct_row[c] + sbx[c] * kBlockDim;
+            TransformToPixels(acs.Strategy(), block + c * size, idct_pos,
+                              idct_stride, group_dec_cache->scratch_space);
+          }
+        }
+        bx += llf_x;
+      }
+    }
+  }
+  if (draw == kDontDraw) {
+    return true;
+  }
+  // No ApplyImageFeatures in JPEG mode or when we need to delay it.
+  if (!decoded->IsJPEG() && dec_state->EagerFinalizeImageRect()) {
+    JXL_RETURN_IF_ERROR(dec_state->FinalizeGroup(
+        group_idx, thread, &dec_state->group_data[thread], decoded));
+  }
+  return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+// Decode quantized AC coefficients of DCT blocks.
+// LLF components in the output block will not be modified.
+template <ACType ac_type>
+Status DecodeACVarBlock(size_t ctx_offset, size_t log2_covered_blocks,
+                        int32_t* JXL_RESTRICT row_nzeros,
+                        const int32_t* JXL_RESTRICT row_nzeros_top,
+                        size_t nzeros_stride, size_t c, size_t bx, size_t by,
+                        size_t lbx, AcStrategy acs,
+                        const coeff_order_t* JXL_RESTRICT coeff_order,
+                        BitReader* JXL_RESTRICT br,
+                        ANSSymbolReader* JXL_RESTRICT decoder,
+                        const std::vector<uint8_t>& context_map,
+                        const uint8_t* qdc_row, const int32_t* qf_row,
+                        const BlockCtxMap& block_ctx_map, ACPtr block,
+                        size_t shift = 0) {
+  PROFILER_FUNC;
+  // Equal to number of LLF coefficients.
+  const size_t covered_blocks = 1 << log2_covered_blocks;
+  const size_t size = covered_blocks * kDCTBlockSize;
+  int32_t predicted_nzeros =
+      PredictFromTopAndLeft(row_nzeros_top, row_nzeros, bx, 32);
+
+  size_t ord = kStrategyOrder[acs.RawStrategy()];
+  const coeff_order_t* JXL_RESTRICT order =
+      &coeff_order[CoeffOrderOffset(ord, c)];
+
+  size_t block_ctx = block_ctx_map.Context(qdc_row[lbx], qf_row[bx], ord, c);
+  const int32_t nzero_ctx =
+      block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx) + ctx_offset;
+
+  size_t nzeros = decoder->ReadHybridUint(nzero_ctx, br, context_map);
+  if (nzeros + covered_blocks > size) {
+    return JXL_FAILURE("Invalid AC: nzeros too large");
+  }
+  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+      row_nzeros[bx + x + y * nzeros_stride] =
+          (nzeros + covered_blocks - 1) >> log2_covered_blocks;
+    }
+  }
+
+  const size_t histo_offset =
+      ctx_offset + block_ctx_map.ZeroDensityContextsOffset(block_ctx);
+
+  // Skip LLF
+  {
+    PROFILER_ZONE("AcDecSkipLLF, reader");
+    size_t prev = (nzeros > size / 16 ? 0 : 1);
+    for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
+      const size_t ctx =
+          histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
+                                            log2_covered_blocks, prev);
+      const size_t u_coeff = decoder->ReadHybridUint(ctx, br, context_map);
+      // Hand-rolled version of UnpackSigned, shifting before the conversion to
+      // signed integer to avoid undefined behavior of shifting negative
+      // numbers.
+      const size_t magnitude = u_coeff >> 1;
+      const size_t neg_sign = (~u_coeff) & 1;
+      const intptr_t coeff =
+          static_cast<intptr_t>((magnitude ^ (neg_sign - 1)) << shift);
+      if (ac_type == ACType::k16) {
+        block.ptr16[order[k]] += coeff;
+      } else {
+        block.ptr32[order[k]] += coeff;
+      }
+      prev = static_cast<size_t>(u_coeff != 0);
+      nzeros -= prev;
+    }
+    if (JXL_UNLIKELY(nzeros != 0)) {
+      return JXL_FAILURE(
+          "Invalid AC: nzeros not 0. Block (%zu, %zu), channel %zu", bx, by, c);
+    }
+  }
+  return true;
+}
+
+// Structs used by DecodeGroupImpl to get a quantized block.
+// GetBlockFromBitstream uses ANS decoding (and thus keeps track of row
+// pointers in row_nzeros), GetBlockFromEncoder simply reads the coefficient
+// image provided by the encoder.
+
+struct GetBlockFromBitstream : public GetBlock {
+  void StartRow(size_t by) override {
+    qf_row = rect.ConstRow(*qf, by);
+    for (size_t c = 0; c < 3; c++) {
+      size_t sby = by >> vshift[c];
+      quant_dc_row = quant_dc->ConstRow(rect.y0() + by) + rect.x0();
+      for (size_t i = 0; i < num_passes; i++) {
+        row_nzeros[i][c] = group_dec_cache->num_nzeroes[i].PlaneRow(c, sby);
+        row_nzeros_top[i][c] =
+            sby == 0
+                ? nullptr
+                : group_dec_cache->num_nzeroes[i].ConstPlaneRow(c, sby - 1);
+      }
+    }
+  }
+
+  Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size,
+                   size_t log2_covered_blocks, ACPtr block[3],
+                   ACType ac_type) override {
+    auto decode_ac_varblock = ac_type == ACType::k16
+                                  ? DecodeACVarBlock<ACType::k16>
+                                  : DecodeACVarBlock<ACType::k32>;
+    for (size_t c : {1, 0, 2}) {
+      size_t sbx = bx >> hshift[c];
+      size_t sby = by >> vshift[c];
+      if (JXL_UNLIKELY((sbx << hshift[c] != bx) || (sby << vshift[c] != by))) {
+        continue;
+      }
+
+      for (size_t pass = 0; JXL_UNLIKELY(pass < num_passes); pass++) {
+        JXL_RETURN_IF_ERROR(decode_ac_varblock(
+            ctx_offset[pass], log2_covered_blocks, row_nzeros[pass][c],
+            row_nzeros_top[pass][c], nzeros_stride, c, sbx, sby, bx, acs,
+            &coeff_orders[pass * coeff_order_size], readers[pass],
+            &decoders[pass], context_map[pass], quant_dc_row, qf_row,
+            *block_ctx_map, block[c], shift_for_pass[pass]));
+      }
+    }
+    return true;
+  }
+
+  Status Init(BitReader* JXL_RESTRICT* JXL_RESTRICT readers, size_t num_passes,
+              size_t group_idx, size_t histo_selector_bits, const Rect& rect,
+              GroupDecCache* JXL_RESTRICT group_dec_cache,
+              PassesDecoderState* dec_state, size_t first_pass) {
+    for (size_t i = 0; i < 3; i++) {
+      hshift[i] = dec_state->shared->frame_header.chroma_subsampling.HShift(i);
+      vshift[i] = dec_state->shared->frame_header.chroma_subsampling.VShift(i);
+    }
+    this->coeff_order_size = dec_state->shared->coeff_order_size;
+    this->coeff_orders =
+        dec_state->shared->coeff_orders.data() + first_pass * coeff_order_size;
+    this->context_map = dec_state->context_map.data() + first_pass;
+    this->readers = readers;
+    this->num_passes = num_passes;
+    this->shift_for_pass =
+        dec_state->shared->frame_header.passes.shift + first_pass;
+    this->group_dec_cache = group_dec_cache;
+    this->rect = rect;
+    block_ctx_map = &dec_state->shared->block_ctx_map;
+    qf = &dec_state->shared->raw_quant_field;
+    quant_dc = &dec_state->shared->quant_dc;
+
+    for (size_t pass = 0; pass < num_passes; pass++) {
+      // Select which histogram set to use among those of the current pass.
+      size_t cur_histogram = 0;
+      if (histo_selector_bits != 0) {
+        cur_histogram = readers[pass]->ReadBits(histo_selector_bits);
+      }
+      if (cur_histogram >= dec_state->shared->num_histograms) {
+        return JXL_FAILURE("Invalid histogram selector");
+      }
+      ctx_offset[pass] = cur_histogram * block_ctx_map->NumACContexts();
+
+      decoders[pass] =
+          ANSSymbolReader(&dec_state->code[pass + first_pass], readers[pass]);
+    }
+    nzeros_stride = group_dec_cache->num_nzeroes[0].PixelsPerRow();
+    for (size_t i = 0; i < num_passes; i++) {
+      JXL_ASSERT(
+          nzeros_stride ==
+          static_cast<size_t>(group_dec_cache->num_nzeroes[i].PixelsPerRow()));
+    }
+    return true;
+  }
+
+  const uint32_t* shift_for_pass = nullptr;  // not owned
+  const coeff_order_t* JXL_RESTRICT coeff_orders;
+  size_t coeff_order_size;
+  const std::vector<uint8_t>* JXL_RESTRICT context_map;
+  ANSSymbolReader decoders[kMaxNumPasses];
+  BitReader* JXL_RESTRICT* JXL_RESTRICT readers;
+  size_t num_passes;
+  size_t ctx_offset[kMaxNumPasses];
+  size_t nzeros_stride;
+  int32_t* JXL_RESTRICT row_nzeros[kMaxNumPasses][3];
+  const int32_t* JXL_RESTRICT row_nzeros_top[kMaxNumPasses][3];
+  GroupDecCache* JXL_RESTRICT group_dec_cache;
+  const BlockCtxMap* block_ctx_map;
+  const ImageI* qf;
+  const ImageB* quant_dc;
+  const int32_t* qf_row;
+  const uint8_t* quant_dc_row;
+  Rect rect;
+  size_t hshift[3], vshift[3];
+};
+
+struct GetBlockFromEncoder : public GetBlock {
+  void StartRow(size_t by) override {}
+
+  Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size,
+                   size_t log2_covered_blocks, ACPtr block[3],
+                   ACType ac_type) override {
+    JXL_DASSERT(ac_type == ACType::k32);
+    for (size_t c = 0; c < 3; c++) {
+      // for each pass
+      for (size_t i = 0; i < quantized_ac->size(); i++) {
+        for (size_t k = 0; k < size; k++) {
+          // TODO(veluca): SIMD.
+          block[c].ptr32[k] +=
+              rows[i][c][offset + k] * (1 << shift_for_pass[i]);
+        }
+      }
+    }
+    offset += size;
+    return true;
+  }
+
+  GetBlockFromEncoder(const std::vector<std::unique_ptr<ACImage>>& ac,
+                      size_t group_idx, const uint32_t* shift_for_pass)
+      : quantized_ac(&ac), shift_for_pass(shift_for_pass) {
+    // TODO(veluca): not supported with chroma subsampling.
+    for (size_t i = 0; i < quantized_ac->size(); i++) {
+      JXL_CHECK((*quantized_ac)[i]->Type() == ACType::k32);
+      for (size_t c = 0; c < 3; c++) {
+        rows[i][c] = (*quantized_ac)[i]->PlaneRow(c, group_idx, 0).ptr32;
+      }
+    }
+  }
+
+  const std::vector<std::unique_ptr<ACImage>>* JXL_RESTRICT quantized_ac;
+  size_t offset = 0;
+  const int32_t* JXL_RESTRICT rows[kMaxNumPasses][3];
+  const uint32_t* shift_for_pass = nullptr;  // not owned
+};
+
+HWY_EXPORT(DecodeGroupImpl);
+
+}  // namespace
+
+Status DecodeGroup(BitReader* JXL_RESTRICT* JXL_RESTRICT readers,
+                   size_t num_passes, size_t group_idx,
+                   PassesDecoderState* JXL_RESTRICT dec_state,
+                   GroupDecCache* JXL_RESTRICT group_dec_cache, size_t thread,
+                   ImageBundle* JXL_RESTRICT decoded, size_t first_pass,
+                   bool force_draw, bool dc_only) {
+  PROFILER_FUNC;
+
+  DrawMode draw = (num_passes + first_pass ==
+                   dec_state->shared->frame_header.passes.num_passes) ||
+                          force_draw
+                      ? kDraw
+                      : kDontDraw;
+
+  if (draw == kDraw && num_passes == 0 && first_pass == 0) {
+    // We reuse filter_input_storage here as it is not currently in use.
+    const Rect src_rect = dec_state->shared->BlockGroupRect(group_idx);
+    const Rect copy_rect(kBlockDim, 2, src_rect.xsize(), src_rect.ysize());
+    CopyImageToWithPadding(src_rect, *dec_state->shared->dc, 2, copy_rect,
+                           &dec_state->filter_input_storage[thread]);
+    EnsurePaddingInPlace(&dec_state->filter_input_storage[thread], copy_rect,
+                         src_rect, dec_state->shared->frame_dim.xsize_blocks,
+                         dec_state->shared->frame_dim.ysize_blocks, 2, 2);
+    Image3F* upsampling_dst = &dec_state->decoded;
+    Rect dst_rect(src_rect.x0() * 8, src_rect.y0() * 8, src_rect.xsize() * 8,
+                  src_rect.ysize() * 8);
+    if (dec_state->EagerFinalizeImageRect()) {
+      upsampling_dst = &dec_state->group_data[thread];
+      dst_rect = Rect(PassesDecoderState::kGroupDataXBorder,
+                      PassesDecoderState::kGroupDataYBorder, dst_rect.xsize(),
+                      dst_rect.ysize());
+    }
+    dec_state->upsamplers[2].UpsampleRect(
+        dec_state->filter_input_storage[thread], copy_rect, upsampling_dst,
+        dst_rect,
+        static_cast<ssize_t>(src_rect.y0()) -
+            static_cast<ssize_t>(copy_rect.y0()),
+        dec_state->shared->frame_dim.ysize_blocks,
+        dec_state->upsampler_storage[thread].get());
+    draw = kOnlyImageFeatures;
+  }
+
+  size_t histo_selector_bits = 0;
+  if (dc_only) {
+    JXL_ASSERT(num_passes == 0);
+  } else {
+    JXL_ASSERT(dec_state->shared->num_histograms > 0);
+    histo_selector_bits = CeilLog2Nonzero(dec_state->shared->num_histograms);
+  }
+
+  GetBlockFromBitstream get_block;
+  JXL_RETURN_IF_ERROR(
+      get_block.Init(readers, num_passes, group_idx, histo_selector_bits,
+                     dec_state->shared->BlockGroupRect(group_idx),
+                     group_dec_cache, dec_state, first_pass));
+
+  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)(
+      &get_block, group_dec_cache, dec_state, thread, group_idx, decoded,
+      draw));
+
+  for (size_t pass = 0; pass < num_passes; pass++) {
+    if (!get_block.decoders[pass].CheckANSFinalState()) {
+      return JXL_FAILURE("ANS checksum failure.");
+    }
+  }
+  return true;
+}
+
+Status DecodeGroupForRoundtrip(const std::vector<std::unique_ptr<ACImage>>& ac,
+                               size_t group_idx,
+                               PassesDecoderState* JXL_RESTRICT dec_state,
+                               GroupDecCache* JXL_RESTRICT group_dec_cache,
+                               size_t thread, ImageBundle* JXL_RESTRICT decoded,
+                               AuxOut* aux_out) {
+  PROFILER_FUNC;
+
+  GetBlockFromEncoder get_block(ac, group_idx,
+                                dec_state->shared->frame_header.passes.shift);
+  group_dec_cache->InitOnce(
+      /*num_passes=*/0,
+      /*used_acs=*/(1u << AcStrategy::kNumValidStrategies) - 1);
+
+  return HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)(&get_block, group_dec_cache,
+                                               dec_state, thread, group_idx,
+                                               decoded, kDraw);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.h
new file mode 100644
index 0000000000..a7b868d3a4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_GROUP_H_
+#define LIB_JXL_DEC_GROUP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+Status DecodeGroup(BitReader* JXL_RESTRICT* JXL_RESTRICT readers,
+                   size_t num_passes, size_t group_idx,
+                   PassesDecoderState* JXL_RESTRICT dec_state,
+                   GroupDecCache* JXL_RESTRICT group_dec_cache, size_t thread,
+                   ImageBundle* JXL_RESTRICT decoded, size_t first_pass,
+                   bool force_draw, bool dc_only);
+
+Status DecodeGroupForRoundtrip(const std::vector<std::unique_ptr<ACImage>>& ac,
+                               size_t group_idx,
+                               PassesDecoderState* JXL_RESTRICT dec_state,
+                               GroupDecCache* JXL_RESTRICT group_dec_cache,
+                               size_t thread, ImageBundle* JXL_RESTRICT decoded,
+                               AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_GROUP_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc
new file mode 100644
index 0000000000..2e08578730
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.cc
@@ -0,0 +1,183 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_group_border.h"
+
+#include <atomic>
+
+namespace jxl {
+
+void GroupBorderAssigner::Init(const FrameDimensions& frame_dim) {
+  frame_dim_ = frame_dim;
+  size_t num_corners =
+      (frame_dim_.xsize_groups + 1) * (frame_dim_.ysize_groups + 1);
+  counters_.reset(new std::atomic<uint8_t>[num_corners]);
+  // Initialize counters.
+  for (size_t y = 0; y < frame_dim_.ysize_groups + 1; y++) {
+    for (size_t x = 0; x < frame_dim_.xsize_groups + 1; x++) {
+      // Counters at image borders don't have anything on the other side, we
+      // pre-fill their value to have more uniform handling afterwards.
+      uint8_t init_value = 0;
+      if (x == 0) {
+        init_value |= kTopLeft | kBottomLeft;
+      }
+      if (x == frame_dim_.xsize_groups) {
+        init_value |= kTopRight | kBottomRight;
+      }
+      if (y == 0) {
+        init_value |= kTopLeft | kTopRight;
+      }
+      if (y == frame_dim_.ysize_groups) {
+        init_value |= kBottomLeft | kBottomRight;
+      }
+      counters_[y * (frame_dim_.xsize_groups + 1) + x] = init_value;
+    }
+  }
+}
+
+void GroupBorderAssigner::ClearDone(size_t group_id) {
+  size_t x = group_id % frame_dim_.xsize_groups;
+  size_t y = group_id / frame_dim_.xsize_groups;
+  size_t top_left_idx = y * (frame_dim_.xsize_groups + 1) + x;
+  size_t top_right_idx = y * (frame_dim_.xsize_groups + 1) + x + 1;
+  size_t bottom_right_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x + 1;
+  size_t bottom_left_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x;
+  counters_[top_left_idx].fetch_and(~kBottomRight);
+  counters_[top_right_idx].fetch_and(~kBottomLeft);
+  counters_[bottom_left_idx].fetch_and(~kTopRight);
+  counters_[bottom_right_idx].fetch_and(~kTopLeft);
+}
+
+// Looking at each corner between groups, we can guarantee that the four
+// involved groups will agree between each other regarding the order in which
+// each of the four groups terminated. Thus, the last of the four groups
+// gets the responsibility of handling the corner. For borders, every border
+// is assigned to its top corner (for vertical borders) or to its left corner
+// (for horizontal borders): the order as seen on those corners will decide who
+// handles that border.
+
+void GroupBorderAssigner::GroupDone(size_t group_id, size_t padding,
+                                    Rect* rects_to_finalize,
+                                    size_t* num_to_finalize) {
+  size_t x = group_id % frame_dim_.xsize_groups;
+  size_t y = group_id / frame_dim_.xsize_groups;
+  Rect block_rect(x * frame_dim_.group_dim / kBlockDim,
+                  y * frame_dim_.group_dim / kBlockDim,
+                  frame_dim_.group_dim / kBlockDim,
+                  frame_dim_.group_dim / kBlockDim, frame_dim_.xsize_blocks,
+                  frame_dim_.ysize_blocks);
+
+  size_t top_left_idx = y * (frame_dim_.xsize_groups + 1) + x;
+  size_t top_right_idx = y * (frame_dim_.xsize_groups + 1) + x + 1;
+  size_t bottom_right_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x + 1;
+  size_t bottom_left_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x;
+
+  auto fetch_status = [this](size_t idx, uint8_t bit) {
+    // Note that the acq-rel semantics of this fetch are actually needed to
+    // ensure that the pixel data of the group is already written to memory.
+    size_t status = counters_[idx].fetch_or(bit);
+    JXL_DASSERT((bit & status) == 0);
+    return bit | status;
+  };
+
+  size_t top_left_status = fetch_status(top_left_idx, kBottomRight);
+  size_t top_right_status = fetch_status(top_right_idx, kBottomLeft);
+  size_t bottom_right_status = fetch_status(bottom_right_idx, kTopLeft);
+  size_t bottom_left_status = fetch_status(bottom_left_idx, kTopRight);
+
+  size_t padx = PaddingX(padding);
+  size_t pady = padding;
+
+  size_t x1 = block_rect.x0() + block_rect.xsize();
+  size_t y1 = block_rect.y0() + block_rect.ysize();
+
+  bool is_last_group_x = frame_dim_.xsize_groups == x + 1;
+  bool is_last_group_y = frame_dim_.ysize_groups == y + 1;
+
+  // Start of border of neighbouring group, end of border of this group, start
+  // of border of this group (on the other side), end of border of next group.
+  size_t xpos[4] = {
+      block_rect.x0() == 0 ? 0 : block_rect.x0() * kBlockDim - padx,
+      block_rect.x0() == 0 ? 0 : block_rect.x0() * kBlockDim + padx,
+      is_last_group_x ? frame_dim_.xsize_padded : x1 * kBlockDim - padx,
+      is_last_group_x ? frame_dim_.xsize_padded : x1 * kBlockDim + padx};
+  size_t ypos[4] = {
+      block_rect.y0() == 0 ? 0 : block_rect.y0() * kBlockDim - pady,
+      block_rect.y0() == 0 ? 0 : block_rect.y0() * kBlockDim + pady,
+      is_last_group_y ? frame_dim_.ysize_padded : y1 * kBlockDim - pady,
+      is_last_group_y ? frame_dim_.ysize_padded : y1 * kBlockDim + pady};
+
+  *num_to_finalize = 0;
+  auto append_rect = [&](size_t x0, size_t x1, size_t y0, size_t y1) {
+    Rect rect(xpos[x0], ypos[y0], xpos[x1] - xpos[x0], ypos[y1] - ypos[y0]);
+    if (rect.xsize() == 0 || rect.ysize() == 0) return;
+    JXL_DASSERT(*num_to_finalize < kMaxToFinalize);
+    rects_to_finalize[(*num_to_finalize)++] = rect;
+  };
+
+  // Because of how group borders are assigned, it is impossible that we need to
+  // process the left and right side of some area but not the center area. Thus,
+  // we compute the first/last part to process in every horizontal strip and
+  // merge them together. We first collect a mask of what parts should be
+  // processed.
+  // We do this horizontally rather than vertically because horizontal borders
+  // are larger.
+  bool available_parts_mask[3][3] = {};  // [x][y]
+  // Center
+  available_parts_mask[1][1] = true;
+  // Corners
+  if (top_left_status == 0xF) available_parts_mask[0][0] = true;
+  if (top_right_status == 0xF) available_parts_mask[2][0] = true;
+  if (bottom_right_status == 0xF) available_parts_mask[2][2] = true;
+  if (bottom_left_status == 0xF) available_parts_mask[0][2] = true;
+  // Other borders
+  if (top_left_status & kTopRight) available_parts_mask[1][0] = true;
+  if (top_left_status & kBottomLeft) available_parts_mask[0][1] = true;
+  if (top_right_status & kBottomRight) available_parts_mask[2][1] = true;
+  if (bottom_left_status & kBottomRight) available_parts_mask[1][2] = true;
+
+  // Collect horizontal ranges.
+  constexpr size_t kNoSegment = 3;
+  std::pair<size_t, size_t> horizontal_segments[3] = {{kNoSegment, kNoSegment},
+                                                      {kNoSegment, kNoSegment},
+                                                      {kNoSegment, kNoSegment}};
+  for (size_t y = 0; y < 3; y++) {
+    for (size_t x = 0; x < 3; x++) {
+      if (!available_parts_mask[x][y]) continue;
+      JXL_DASSERT(horizontal_segments[y].second == kNoSegment ||
+                  horizontal_segments[y].second == x);
+      JXL_DASSERT((horizontal_segments[y].first == kNoSegment) ==
+                  (horizontal_segments[y].second == kNoSegment));
+      if (horizontal_segments[y].first == kNoSegment) {
+        horizontal_segments[y].first = x;
+      }
+      horizontal_segments[y].second = x + 1;
+    }
+  }
+  if (horizontal_segments[0] == horizontal_segments[1] &&
+      horizontal_segments[0] == horizontal_segments[2]) {
+    append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+                3);
+  } else if (horizontal_segments[0] == horizontal_segments[1]) {
+    append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+                2);
+    append_rect(horizontal_segments[2].first, horizontal_segments[2].second, 2,
+                3);
+  } else if (horizontal_segments[1] == horizontal_segments[2]) {
+    append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+                1);
+    append_rect(horizontal_segments[1].first, horizontal_segments[1].second, 1,
+                3);
+  } else {
+    append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+                1);
+    append_rect(horizontal_segments[1].first, horizontal_segments[1].second, 1,
+                2);
+    append_rect(horizontal_segments[2].first, horizontal_segments[2].second, 2,
+                3);
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.h
new file mode 100644
index 0000000000..67af6afd7d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_group_border.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_GROUP_BORDER_H_
+#define LIB_JXL_DEC_GROUP_BORDER_H_
+
+#include <stddef.h>
+
+#include <atomic>
+
+#include "lib/jxl/base/arch_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+class GroupBorderAssigner {
+ public:
+  // Prepare the GroupBorderAssigner to handle a given frame.
+  void Init(const FrameDimensions& frame_dim);
+  // Marks a group as done, and returns the (at most 3) rects to run
+  // FinalizeImageRect on. `block_rect` must be the rect corresponding
+  // to the given `group_id`, measured in blocks.
+  void GroupDone(size_t group_id, size_t padding, Rect* rects_to_finalize,
+                 size_t* num_to_finalize);
+  // Marks a group as not-done, for running re-paints.
+  void ClearDone(size_t group_id);
+
+  static constexpr size_t kMaxToFinalize = 3;
+
+  // Vectors on ARM NEON are never wider than 4 floats, so rounding to multiples
+  // of 4 is enough.
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+  static constexpr size_t kPaddingXRound = 4;
+#else
+  static constexpr size_t kPaddingXRound = kBlockDim;
+#endif
+
+  // Returns the necessary amount of padding for the X axis.
+  static size_t PaddingX(size_t padding) {
+    return RoundUpTo(padding, kPaddingXRound);
+  }
+
+ private:
+  FrameDimensions frame_dim_;
+  std::unique_ptr<std::atomic<uint8_t>[]> counters_;
+
+  // Constants to identify group positions relative to the corners.
+  static constexpr uint8_t kTopLeft = 0x01;
+  static constexpr uint8_t kTopRight = 0x02;
+  static constexpr uint8_t kBottomRight = 0x04;
+  static constexpr uint8_t kBottomLeft = 0x08;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_GROUP_BORDER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc
new file mode 100644
index 0000000000..05b275773a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.cc
@@ -0,0 +1,255 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_huffman.h"
+
+#include <string.h> /* for memset */
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/huffman_table.h"
+
+namespace jxl {
+
+static const int kCodeLengthCodes = 18;
+static const uint8_t kCodeLengthCodeOrder[kCodeLengthCodes] = {
+    1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+static const uint8_t kDefaultCodeLength = 8;
+static const uint8_t kCodeLengthRepeatCode = 16;
+
+int ReadHuffmanCodeLengths(const uint8_t* code_length_code_lengths,
+                           int num_symbols, uint8_t* code_lengths,
+                           BitReader* br) {
+  int symbol = 0;
+  uint8_t prev_code_len = kDefaultCodeLength;
+  int repeat = 0;
+  uint8_t repeat_code_len = 0;
+  int space = 32768;
+  HuffmanCode table[32];
+
+  uint16_t counts[16] = {0};
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    ++counts[code_length_code_lengths[i]];
+  }
+  if (!BuildHuffmanTable(table, 5, code_length_code_lengths, kCodeLengthCodes,
+                         &counts[0])) {
+    return 0;
+  }
+
+  while (symbol < num_symbols && space > 0) {
+    const HuffmanCode* p = table;
+    uint8_t code_len;
+    br->Refill();
+    p += br->PeekFixedBits<5>();
+    br->Consume(p->bits);
+    code_len = (uint8_t)p->value;
+    if (code_len < kCodeLengthRepeatCode) {
+      repeat = 0;
+      code_lengths[symbol++] = code_len;
+      if (code_len != 0) {
+        prev_code_len = code_len;
+        space -= 32768u >> code_len;
+      }
+    } else {
+      const int extra_bits = code_len - 14;
+      int old_repeat;
+      int repeat_delta;
+      uint8_t new_len = 0;
+      if (code_len == kCodeLengthRepeatCode) {
+        new_len = prev_code_len;
+      }
+      if (repeat_code_len != new_len) {
+        repeat = 0;
+        repeat_code_len = new_len;
+      }
+      old_repeat = repeat;
+      if (repeat > 0) {
+        repeat -= 2;
+        repeat <<= extra_bits;
+      }
+      repeat += (int)br->ReadBits(extra_bits) + 3;
+      repeat_delta = repeat - old_repeat;
+      if (symbol + repeat_delta > num_symbols) {
+        return 0;
+      }
+      memset(&code_lengths[symbol], repeat_code_len, (size_t)repeat_delta);
+      symbol += repeat_delta;
+      if (repeat_code_len != 0) {
+        space -= repeat_delta << (15 - repeat_code_len);
+      }
+    }
+  }
+  if (space != 0) {
+    return 0;
+  }
+  memset(&code_lengths[symbol], 0, (size_t)(num_symbols - symbol));
+  return true;
+}
+
+static JXL_INLINE bool ReadSimpleCode(size_t alphabet_size, BitReader* br,
+                                      HuffmanCode* table) {
+  size_t max_bits =
+      (alphabet_size > 1u) ? FloorLog2Nonzero(alphabet_size - 1u) + 1 : 0;
+
+  size_t num_symbols = br->ReadFixedBits<2>() + 1;
+
+  uint16_t symbols[4] = {0};
+  for (size_t i = 0; i < num_symbols; ++i) {
+    uint16_t symbol = br->ReadBits(max_bits);
+    if (symbol >= alphabet_size) {
+      return false;
+    }
+    symbols[i] = symbol;
+  }
+
+  for (size_t i = 0; i < num_symbols - 1; ++i) {
+    for (size_t j = i + 1; j < num_symbols; ++j) {
+      if (symbols[i] == symbols[j]) return false;
+    }
+  }
+
+  // 4 symbols have to option to encode.
+  if (num_symbols == 4) num_symbols += br->ReadFixedBits<1>();
+
+  const auto swap_symbols = [&symbols](size_t i, size_t j) {
+    uint16_t t = symbols[j];
+    symbols[j] = symbols[i];
+    symbols[i] = t;
+  };
+
+  size_t table_size = 1;
+  switch (num_symbols) {
+    case 1:
+      table[0] = {0, symbols[0]};
+      break;
+    case 2:
+      if (symbols[0] > symbols[1]) swap_symbols(0, 1);
+      table[0] = {1, symbols[0]};
+      table[1] = {1, symbols[1]};
+      table_size = 2;
+      break;
+    case 3:
+      if (symbols[1] > symbols[2]) swap_symbols(1, 2);
+      table[0] = {1, symbols[0]};
+      table[2] = {1, symbols[0]};
+      table[1] = {2, symbols[1]};
+      table[3] = {2, symbols[2]};
+      table_size = 4;
+      break;
+    case 4: {
+      for (size_t i = 0; i < 3; ++i) {
+        for (size_t j = i + 1; j < 4; ++j) {
+          if (symbols[i] > symbols[j]) swap_symbols(i, j);
+        }
+      }
+      table[0] = {2, symbols[0]};
+      table[2] = {2, symbols[1]};
+      table[1] = {2, symbols[2]};
+      table[3] = {2, symbols[3]};
+      table_size = 4;
+      break;
+    }
+    case 5: {
+      if (symbols[2] > symbols[3]) swap_symbols(2, 3);
+      table[0] = {1, symbols[0]};
+      table[1] = {2, symbols[1]};
+      table[2] = {1, symbols[0]};
+      table[3] = {3, symbols[2]};
+      table[4] = {1, symbols[0]};
+      table[5] = {2, symbols[1]};
+      table[6] = {1, symbols[0]};
+      table[7] = {3, symbols[3]};
+      table_size = 8;
+      break;
+    }
+    default: {
+      // Unreachable.
+      return false;
+    }
+  }
+
+  const uint32_t goal_size = 1u << kHuffmanTableBits;
+  while (table_size != goal_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+
+  return true;
+}
+
+bool HuffmanDecodingData::ReadFromBitStream(size_t alphabet_size,
+                                            BitReader* br) {
+  if (alphabet_size > (1 << PREFIX_MAX_BITS)) return false;
+
+  /* simple_code_or_skip is used as follows:
+     1 for simple code;
+     0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
+  uint32_t simple_code_or_skip = br->ReadFixedBits<2>();
+  if (simple_code_or_skip == 1u) {
+    table_.resize(1u << kHuffmanTableBits);
+    return ReadSimpleCode(alphabet_size, br, table_.data());
+  }
+
+  std::vector<uint8_t> code_lengths(alphabet_size, 0);
+  uint8_t code_length_code_lengths[kCodeLengthCodes] = {0};
+  int space = 32;
+  int num_codes = 0;
+  /* Static Huffman code for the code length code lengths */
+  static const HuffmanCode huff[16] = {
+      {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1},
+      {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 5},
+  };
+  for (size_t i = simple_code_or_skip; i < kCodeLengthCodes && space > 0; ++i) {
+    const int code_len_idx = kCodeLengthCodeOrder[i];
+    const HuffmanCode* p = huff;
+    uint8_t v;
+    br->Refill();
+    p += br->PeekFixedBits<4>();
+    br->Consume(p->bits);
+    v = (uint8_t)p->value;
+    code_length_code_lengths[code_len_idx] = v;
+    if (v != 0) {
+      space -= (32u >> v);
+      ++num_codes;
+    }
+  }
+  bool ok = (num_codes == 1 || space == 0) &&
+            ReadHuffmanCodeLengths(code_length_code_lengths, alphabet_size,
+                                   &code_lengths[0], br);
+
+  if (!ok) return false;
+  uint16_t counts[16] = {0};
+  for (size_t i = 0; i < alphabet_size; ++i) {
+    ++counts[code_lengths[i]];
+  }
+  table_.resize(alphabet_size + 376);
+  uint32_t table_size =
+      BuildHuffmanTable(table_.data(), kHuffmanTableBits, &code_lengths[0],
+                        alphabet_size, &counts[0]);
+  table_.resize(table_size);
+  return (table_size > 0);
+}
+
+// Decodes the next Huffman coded symbol from the bit-stream.
+uint16_t HuffmanDecodingData::ReadSymbol(BitReader* br) const {
+  size_t n_bits;
+  const HuffmanCode* table = table_.data();
+  table += br->PeekBits(kHuffmanTableBits);
+  n_bits = table->bits;
+  if (n_bits > kHuffmanTableBits) {
+    br->Consume(kHuffmanTableBits);
+    n_bits -= kHuffmanTableBits;
+    table += table->value;
+    table += br->PeekBits(n_bits);
+  }
+  br->Consume(table->bits);
+  return table->value;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.h
new file mode 100644
index 0000000000..162c3e309c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_huffman.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_HUFFMAN_H_
+#define LIB_JXL_DEC_HUFFMAN_H_
+
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/huffman_table.h"
+
+namespace jxl {
+
+static constexpr size_t kHuffmanTableBits = 8u;
+
+struct HuffmanDecodingData {
+  // Decodes the Huffman code lengths from the bit-stream and fills in the
+  // pre-allocated table with the corresponding 2-level Huffman decoding table.
+  // Returns false if the Huffman code lengths can not de decoded.
+  bool ReadFromBitStream(size_t alphabet_size, BitReader* br);
+
+  uint16_t ReadSymbol(BitReader* br) const;
+
+  std::vector<HuffmanCode> table_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_HUFFMAN_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc
new file mode 100644
index 0000000000..64773eb4e4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.cc
@@ -0,0 +1,592 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_modular.h"
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/frame_header.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_modular.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+
+void MultiplySum(const size_t xsize,
+                 const pixel_type* const JXL_RESTRICT row_in,
+                 const pixel_type* const JXL_RESTRICT row_in_Y,
+                 const float factor, float* const JXL_RESTRICT row_out) {
+  const HWY_FULL(float) df;
+  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
+  const auto factor_v = Set(df, factor);
+  for (size_t x = 0; x < xsize; x += Lanes(di)) {
+    const auto in = Load(di, row_in + x) + Load(di, row_in_Y + x);
+    const auto out = ConvertTo(df, in) * factor_v;
+    Store(out, df, row_out + x);
+  }
+}
+
+void RgbFromSingle(const size_t xsize,
+                   const pixel_type* const JXL_RESTRICT row_in,
+                   const float factor, Image3F* decoded, size_t /*c*/,
+                   size_t y) {
+  const HWY_FULL(float) df;
+  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
+
+  float* const JXL_RESTRICT row_out_r = decoded->PlaneRow(0, y);
+  float* const JXL_RESTRICT row_out_g = decoded->PlaneRow(1, y);
+  float* const JXL_RESTRICT row_out_b = decoded->PlaneRow(2, y);
+
+  const auto factor_v = Set(df, factor);
+  for (size_t x = 0; x < xsize; x += Lanes(di)) {
+    const auto in = Load(di, row_in + x);
+    const auto out = ConvertTo(df, in) * factor_v;
+    Store(out, df, row_out_r + x);
+    Store(out, df, row_out_g + x);
+    Store(out, df, row_out_b + x);
+  }
+}
+
+// Same signature as RgbFromSingle so we can assign to the same pointer.
+void SingleFromSingle(const size_t xsize,
+                      const pixel_type* const JXL_RESTRICT row_in,
+                      const float factor, Image3F* decoded, size_t c,
+                      size_t y) {
+  const HWY_FULL(float) df;
+  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
+
+  float* const JXL_RESTRICT row_out = decoded->PlaneRow(c, y);
+
+  const auto factor_v = Set(df, factor);
+  for (size_t x = 0; x < xsize; x += Lanes(di)) {
+    const auto in = Load(di, row_in + x);
+    const auto out = ConvertTo(df, in) * factor_v;
+    Store(out, df, row_out + x);
+  }
+}
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(MultiplySum);       // Local function
+HWY_EXPORT(RgbFromSingle);     // Local function
+HWY_EXPORT(SingleFromSingle);  // Local function
+
+// convert custom [bits]-bit float (with [exp_bits] exponent bits) stored as int
+// back to binary32 float
+void int_to_float(const pixel_type* const JXL_RESTRICT row_in,
+                  float* const JXL_RESTRICT row_out, const size_t xsize,
+                  const int bits, const int exp_bits) {
+  if (bits == 32) {
+    JXL_ASSERT(sizeof(pixel_type) == sizeof(float));
+    JXL_ASSERT(exp_bits == 8);
+    memcpy(row_out, row_in, xsize * sizeof(float));
+    return;
+  }
+  int exp_bias = (1 << (exp_bits - 1)) - 1;
+  int sign_shift = bits - 1;
+  int mant_bits = bits - exp_bits - 1;
+  int mant_shift = 23 - mant_bits;
+  for (size_t x = 0; x < xsize; ++x) {
+    uint32_t f;
+    memcpy(&f, &row_in[x], 4);
+    int signbit = (f >> sign_shift);
+    f &= (1 << sign_shift) - 1;
+    if (f == 0) {
+      row_out[x] = (signbit ? -0.f : 0.f);
+      continue;
+    }
+    int exp = (f >> mant_bits);
+    int mantissa = (f & ((1 << mant_bits) - 1));
+    mantissa <<= mant_shift;
+    // Try to normalize only if there is space for maneuver.
+    if (exp == 0 && exp_bits < 8) {
+      // subnormal number
+      while ((mantissa & 0x800000) == 0) {
+        mantissa <<= 1;
+        exp--;
+      }
+      exp++;
+      // remove leading 1 because it is implicit now
+      mantissa &= 0x7fffff;
+    }
+    exp -= exp_bias;
+    // broke up the arbitrary float into its parts, now reassemble into
+    // binary32
+    exp += 127;
+    JXL_ASSERT(exp >= 0);
+    f = (signbit ? 0x80000000 : 0);
+    f |= (exp << 23);
+    f |= mantissa;
+    memcpy(&row_out[x], &f, 4);
+  }
+}
+
+Status ModularFrameDecoder::DecodeGlobalInfo(BitReader* reader,
+                                             const FrameHeader& frame_header,
+                                             bool allow_truncated_group) {
+  bool decode_color = frame_header.encoding == FrameEncoding::kModular;
+  const auto& metadata = frame_header.nonserialized_metadata->m;
+  bool is_gray = metadata.color_encoding.IsGray();
+  size_t nb_chans = 3;
+  if (is_gray && frame_header.color_transform == ColorTransform::kNone) {
+    nb_chans = 1;
+  }
+  bool has_tree = reader->ReadBits(1);
+  if (has_tree) {
+    size_t tree_size_limit =
+        1024 + frame_dim.xsize * frame_dim.ysize * nb_chans;
+    JXL_RETURN_IF_ERROR(DecodeTree(reader, &tree, tree_size_limit));
+    JXL_RETURN_IF_ERROR(
+        DecodeHistograms(reader, (tree.size() + 1) / 2, &code, &context_map));
+  }
+  do_color = decode_color;
+  if (!do_color) nb_chans = 0;
+  size_t nb_extra = metadata.extra_channel_info.size();
+
+  bool fp = metadata.bit_depth.floating_point_sample;
+
+  // bits_per_sample is just metadata for XYB images.
+  if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
+      frame_header.color_transform != ColorTransform::kXYB) {
+    if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
+      return JXL_FAILURE("uint32_t not supported in dec_modular");
+    } else if (metadata.bit_depth.bits_per_sample > 32) {
+      return JXL_FAILURE("bits_per_sample > 32 not supported");
+    }
+  }
+
+  Image gi(frame_dim.xsize, frame_dim.ysize, metadata.bit_depth.bits_per_sample,
+           nb_chans + nb_extra);
+
+  if (frame_header.color_transform == ColorTransform::kYCbCr) {
+    for (size_t c = 0; c < nb_chans; c++) {
+      gi.channel[c].hshift = frame_header.chroma_subsampling.HShift(c);
+      gi.channel[c].vshift = frame_header.chroma_subsampling.VShift(c);
+      size_t xsize_shifted =
+          DivCeil(frame_dim.xsize, 1 << gi.channel[c].hshift);
+      size_t ysize_shifted =
+          DivCeil(frame_dim.ysize, 1 << gi.channel[c].vshift);
+      gi.channel[c].shrink(xsize_shifted, ysize_shifted);
+    }
+  }
+
+  for (size_t ec = 0, c = nb_chans; ec < nb_extra; ec++, c++) {
+    size_t ecups = frame_header.extra_channel_upsampling[ec];
+    gi.channel[c].shrink(DivCeil(frame_dim.xsize_upsampled, ecups),
+                         DivCeil(frame_dim.ysize_upsampled, ecups));
+    gi.channel[c].hshift = gi.channel[c].vshift =
+        CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
+  }
+
+  ModularOptions options;
+  options.max_chan_size = frame_dim.group_dim;
+  options.group_dim = frame_dim.group_dim;
+  Status dec_status = ModularGenericDecompress(
+      reader, gi, &global_header, ModularStreamId::Global().ID(frame_dim),
+      &options,
+      /*undo_transforms=*/-2, &tree, &code, &context_map,
+      allow_truncated_group);
+  if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status);
+  if (dec_status.IsFatalError()) {
+    return JXL_FAILURE("Failed to decode global modular info");
+  }
+
+  // TODO(eustas): are we sure this can be done after partial decode?
+  have_something = false;
+  for (size_t c = 0; c < gi.channel.size(); c++) {
+    Channel& gic = gi.channel[c];
+    if (c >= gi.nb_meta_channels && gic.w < frame_dim.group_dim &&
+        gic.h < frame_dim.group_dim)
+      have_something = true;
+  }
+  full_image = std::move(gi);
+  return dec_status;
+}
+
+Status ModularFrameDecoder::DecodeGroup(const Rect& rect, BitReader* reader,
+                                        int minShift, int maxShift,
+                                        const ModularStreamId& stream,
+                                        bool zerofill) {
+  JXL_DASSERT(stream.kind == ModularStreamId::kModularDC ||
+              stream.kind == ModularStreamId::kModularAC);
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  Image gi(xsize, ysize, full_image.bitdepth, 0);
+  // start at the first bigger-than-groupsize non-metachannel
+  size_t c = full_image.nb_meta_channels;
+  for (; c < full_image.channel.size(); c++) {
+    Channel& fc = full_image.channel[c];
+    if (fc.w > frame_dim.group_dim || fc.h > frame_dim.group_dim) break;
+  }
+  size_t beginc = c;
+  for (; c < full_image.channel.size(); c++) {
+    Channel& fc = full_image.channel[c];
+    int shift = std::min(fc.hshift, fc.vshift);
+    if (shift > maxShift) continue;
+    if (shift < minShift) continue;
+    Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+           rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+    if (r.xsize() == 0 || r.ysize() == 0) continue;
+    Channel gc(r.xsize(), r.ysize());
+    gc.hshift = fc.hshift;
+    gc.vshift = fc.vshift;
+    gi.channel.emplace_back(std::move(gc));
+  }
+  if (zerofill) {
+    int gic = 0;
+    for (c = beginc; c < full_image.channel.size(); c++) {
+      Channel& fc = full_image.channel[c];
+      int shift = std::min(fc.hshift, fc.vshift);
+      if (shift > maxShift) continue;
+      if (shift < minShift) continue;
+      Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+             rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+      if (r.xsize() == 0 || r.ysize() == 0) continue;
+      for (size_t y = 0; y < r.ysize(); ++y) {
+        pixel_type* const JXL_RESTRICT row_out = r.Row(&fc.plane, y);
+        memset(row_out, 0, r.xsize() * sizeof(*row_out));
+      }
+      gic++;
+    }
+    return true;
+  }
+  ModularOptions options;
+  if (!ModularGenericDecompress(
+          reader, gi, /*header=*/nullptr, stream.ID(frame_dim), &options,
+          /*undo_transforms=*/-1, &tree, &code, &context_map))
+    return JXL_FAILURE("Failed to decode modular group");
+  int gic = 0;
+  for (c = beginc; c < full_image.channel.size(); c++) {
+    Channel& fc = full_image.channel[c];
+    int shift = std::min(fc.hshift, fc.vshift);
+    if (shift > maxShift) continue;
+    if (shift < minShift) continue;
+    Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+           rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+    if (r.xsize() == 0 || r.ysize() == 0) continue;
+    for (size_t y = 0; y < r.ysize(); ++y) {
+      pixel_type* const JXL_RESTRICT row_out = r.Row(&fc.plane, y);
+      const pixel_type* const JXL_RESTRICT row_in = gi.channel[gic].Row(y);
+      for (size_t x = 0; x < r.xsize(); ++x) {
+        row_out[x] = row_in[x];
+      }
+    }
+    gic++;
+  }
+  return true;
+}
+Status ModularFrameDecoder::DecodeVarDCTDC(size_t group_id, BitReader* reader,
+                                           PassesDecoderState* dec_state) {
+  const Rect r = dec_state->shared->DCGroupRect(group_id);
+  // TODO(eustas): investigate if we could reduce the impact of
+  //               EvalRationalPolynomial; generally speaking, the limit is
+  //               2**(128/(3*magic)), where 128 comes from IEEE 754 exponent,
+  //               3 comes from XybToRgb that cubes the values, and "magic" is
+  //               the sum of all other contributions. 2**18 is known to lead
+  //               to NaN on input found by fuzzing (see commit message).
+  Image image(r.xsize(), r.ysize(), full_image.bitdepth, 3);
+  size_t stream_id = ModularStreamId::VarDCTDC(group_id).ID(frame_dim);
+  reader->Refill();
+  size_t extra_precision = reader->ReadFixedBits<2>();
+  float mul = 1.0f / (1 << extra_precision);
+  ModularOptions options;
+  for (size_t c = 0; c < 3; c++) {
+    Channel& ch = image.channel[c < 2 ? c ^ 1 : c];
+    ch.w >>= dec_state->shared->frame_header.chroma_subsampling.HShift(c);
+    ch.h >>= dec_state->shared->frame_header.chroma_subsampling.VShift(c);
+    ch.shrink();
+  }
+  if (!ModularGenericDecompress(
+          reader, image, /*header=*/nullptr, stream_id, &options,
+          /*undo_transforms=*/-1, &tree, &code, &context_map)) {
+    return JXL_FAILURE("Failed to decode modular DC group");
+  }
+  DequantDC(r, &dec_state->shared_storage.dc_storage,
+            &dec_state->shared_storage.quant_dc, image,
+            dec_state->shared->quantizer.MulDC(), mul,
+            dec_state->shared->cmap.DCFactors(),
+            dec_state->shared->frame_header.chroma_subsampling,
+            dec_state->shared->block_ctx_map);
+  return true;
+}
+
+Status ModularFrameDecoder::DecodeAcMetadata(size_t group_id, BitReader* reader,
+                                             PassesDecoderState* dec_state) {
+  const Rect r = dec_state->shared->DCGroupRect(group_id);
+  size_t upper_bound = r.xsize() * r.ysize();
+  reader->Refill();
+  size_t count = reader->ReadBits(CeilLog2Nonzero(upper_bound)) + 1;
+  size_t stream_id = ModularStreamId::ACMetadata(group_id).ID(frame_dim);
+  // YToX, YToB, ACS + QF, EPF
+  Image image(r.xsize(), r.ysize(), full_image.bitdepth, 4);
+  static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
+  Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
+  image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+  image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+  image.channel[2] = Channel(count, 2, 0, 0);
+  ModularOptions options;
+  if (!ModularGenericDecompress(
+          reader, image, /*header=*/nullptr, stream_id, &options,
+          /*undo_transforms=*/-1, &tree, &code, &context_map)) {
+    return JXL_FAILURE("Failed to decode AC metadata");
+  }
+  ConvertPlaneAndClamp(Rect(image.channel[0].plane), image.channel[0].plane, cr,
+                       &dec_state->shared_storage.cmap.ytox_map);
+  ConvertPlaneAndClamp(Rect(image.channel[1].plane), image.channel[1].plane, cr,
+                       &dec_state->shared_storage.cmap.ytob_map);
+  size_t num = 0;
+  bool is444 = dec_state->shared->frame_header.chroma_subsampling.Is444();
+  auto& ac_strategy = dec_state->shared_storage.ac_strategy;
+  size_t xlim = std::min(ac_strategy.xsize(), r.x0() + r.xsize());
+  size_t ylim = std::min(ac_strategy.ysize(), r.y0() + r.ysize());
+  uint32_t local_used_acs = 0;
+  for (size_t iy = 0; iy < r.ysize(); iy++) {
+    size_t y = r.y0() + iy;
+    int* row_qf = r.Row(&dec_state->shared_storage.raw_quant_field, iy);
+    uint8_t* row_epf = r.Row(&dec_state->shared_storage.epf_sharpness, iy);
+    int* row_in_1 = image.channel[2].plane.Row(0);
+    int* row_in_2 = image.channel[2].plane.Row(1);
+    int* row_in_3 = image.channel[3].plane.Row(iy);
+    for (size_t ix = 0; ix < r.xsize(); ix++) {
+      size_t x = r.x0() + ix;
+      int sharpness = row_in_3[ix];
+      if (sharpness < 0 || sharpness >= LoopFilter::kEpfSharpEntries) {
+        return JXL_FAILURE("Corrupted sharpness field");
+      }
+      row_epf[ix] = sharpness;
+      if (ac_strategy.IsValid(x, y)) {
+        continue;
+      }
+
+      if (num >= count) return JXL_FAILURE("Corrupted stream");
+
+      if (!AcStrategy::IsRawStrategyValid(row_in_1[num])) {
+        return JXL_FAILURE("Invalid AC strategy");
+      }
+      local_used_acs |= 1u << row_in_1[num];
+      AcStrategy acs = AcStrategy::FromRawStrategy(row_in_1[num]);
+      if ((acs.covered_blocks_x() > 1 || acs.covered_blocks_y() > 1) &&
+          !is444) {
+        return JXL_FAILURE(
+            "AC strategy not compatible with chroma subsampling");
+      }
+      // Ensure that blocks do not overflow *AC* groups.
+      size_t next_x_ac_block = (x / kGroupDimInBlocks + 1) * kGroupDimInBlocks;
+      size_t next_y_ac_block = (y / kGroupDimInBlocks + 1) * kGroupDimInBlocks;
+      size_t next_x_dct_block = x + acs.covered_blocks_x();
+      size_t next_y_dct_block = y + acs.covered_blocks_y();
+      if (next_x_dct_block > next_x_ac_block || next_x_dct_block > xlim) {
+        return JXL_FAILURE("Invalid AC strategy, x overflow");
+      }
+      if (next_y_dct_block > next_y_ac_block || next_y_dct_block > ylim) {
+        return JXL_FAILURE("Invalid AC strategy, y overflow");
+      }
+      JXL_RETURN_IF_ERROR(
+          ac_strategy.SetNoBoundsCheck(x, y, AcStrategy::Type(row_in_1[num])));
+      row_qf[ix] =
+          1 + std::max(0, std::min(Quantizer::kQuantMax - 1, row_in_2[num]));
+      num++;
+    }
+  }
+  dec_state->used_acs |= local_used_acs;
+  if (dec_state->shared->frame_header.loop_filter.epf_iters > 0) {
+    ComputeSigma(r, dec_state);
+  }
+  return true;
+}
+
+Status ModularFrameDecoder::FinalizeDecoding(PassesDecoderState* dec_state,
+                                             jxl::ThreadPool* pool,
+                                             ImageBundle* output) {
+  Image& gi = full_image;
+  size_t xsize = gi.w;
+  size_t ysize = gi.h;
+
+  const auto& frame_header = dec_state->shared->frame_header;
+  const auto* metadata = frame_header.nonserialized_metadata;
+
+  // Don't use threads if total image size is smaller than a group
+  if (xsize * ysize < frame_dim.group_dim * frame_dim.group_dim) pool = nullptr;
+
+  // Undo the global transforms
+  gi.undo_transforms(global_header.wp_header, -1, pool);
+  if (gi.error) return JXL_FAILURE("Undoing transforms failed");
+
+  auto& decoded = dec_state->decoded;
+
+  int c = 0;
+  if (do_color) {
+    const bool rgb_from_gray =
+        metadata->m.color_encoding.IsGray() &&
+        frame_header.color_transform == ColorTransform::kNone;
+    const bool fp = metadata->m.bit_depth.floating_point_sample;
+
+    for (; c < 3; c++) {
+      float factor = full_image.bitdepth < 32
+                         ? 1.f / ((1u << full_image.bitdepth) - 1)
+                         : 0;
+      int c_in = c;
+      if (frame_header.color_transform == ColorTransform::kXYB) {
+        factor = dec_state->shared->matrices.DCQuants()[c];
+        // XYB is encoded as YX(B-Y)
+        if (c < 2) c_in = 1 - c;
+      } else if (rgb_from_gray) {
+        c_in = 0;
+      }
+      // TODO(eustas): could we detect it on earlier stage?
+      if (gi.channel[c_in].w == 0 || gi.channel[c_in].h == 0) {
+        return JXL_FAILURE("Empty image");
+      }
+      size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_in].hshift);
+      size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_in].vshift);
+      if (ysize_shifted != gi.channel[c_in].h ||
+          xsize_shifted != gi.channel[c_in].w) {
+        return JXL_FAILURE("Dimension mismatch");
+      }
+      if (frame_header.color_transform == ColorTransform::kXYB && c == 2) {
+        JXL_ASSERT(!fp);
+        RunOnPool(
+            pool, 0, ysize_shifted, jxl::ThreadPool::SkipInit(),
+            [&](const int task, const int thread) {
+              const size_t y = task;
+              const pixel_type* const JXL_RESTRICT row_in =
+                  gi.channel[c_in].Row(y);
+              const pixel_type* const JXL_RESTRICT row_in_Y =
+                  gi.channel[0].Row(y);
+              float* const JXL_RESTRICT row_out = decoded.PlaneRow(c, y);
+              HWY_DYNAMIC_DISPATCH(MultiplySum)
+              (xsize_shifted, row_in, row_in_Y, factor, row_out);
+            },
+            "ModularIntToFloat");
+      } else if (fp) {
+        int bits = metadata->m.bit_depth.bits_per_sample;
+        int exp_bits = metadata->m.bit_depth.exponent_bits_per_sample;
+        RunOnPool(
+            pool, 0, ysize_shifted, jxl::ThreadPool::SkipInit(),
+            [&](const int task, const int thread) {
+              const size_t y = task;
+              const pixel_type* const JXL_RESTRICT row_in =
+                  gi.channel[c_in].Row(y);
+              float* const JXL_RESTRICT row_out = decoded.PlaneRow(c, y);
+              int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits);
+            },
+            "ModularIntToFloat_losslessfloat");
+      } else {
+        RunOnPool(
+            pool, 0, ysize_shifted, jxl::ThreadPool::SkipInit(),
+            [&](const int task, const int thread) {
+              const size_t y = task;
+              const pixel_type* const JXL_RESTRICT row_in =
+                  gi.channel[c_in].Row(y);
+              if (rgb_from_gray) {
+                HWY_DYNAMIC_DISPATCH(RgbFromSingle)
+                (xsize_shifted, row_in, factor, &decoded, c, y);
+              } else {
+                HWY_DYNAMIC_DISPATCH(SingleFromSingle)
+                (xsize_shifted, row_in, factor, &decoded, c, y);
+              }
+            },
+            "ModularIntToFloat");
+      }
+      if (rgb_from_gray) {
+        break;
+      }
+    }
+    if (rgb_from_gray) {
+      c = 1;
+    }
+  }
+  for (size_t ec = 0; ec < dec_state->extra_channels.size(); ec++, c++) {
+    const ExtraChannelInfo& eci = output->metadata()->extra_channel_info[ec];
+    int bits = eci.bit_depth.bits_per_sample;
+    int exp_bits = eci.bit_depth.exponent_bits_per_sample;
+    bool fp = eci.bit_depth.floating_point_sample;
+    JXL_ASSERT(fp || bits < 32);
+    const float mul = fp ? 0 : (1.0f / ((1u << bits) - 1));
+    size_t ecups = frame_header.extra_channel_upsampling[ec];
+    const size_t ec_xsize = DivCeil(frame_dim.xsize_upsampled, ecups);
+    const size_t ec_ysize = DivCeil(frame_dim.ysize_upsampled, ecups);
+    for (size_t y = 0; y < ec_ysize; ++y) {
+      float* const JXL_RESTRICT row_out = dec_state->extra_channels[ec].Row(y);
+      const pixel_type* const JXL_RESTRICT row_in = gi.channel[c].Row(y);
+      if (fp) {
+        int_to_float(row_in, row_out, ec_xsize, bits, exp_bits);
+      } else {
+        for (size_t x = 0; x < ec_xsize; ++x) {
+          row_out[x] = row_in[x] * mul;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+static constexpr const float kAlmostZero = 1e-8f;
+
+Status ModularFrameDecoder::DecodeQuantTable(
+    size_t required_size_x, size_t required_size_y, BitReader* br,
+    QuantEncoding* encoding, size_t idx,
+    ModularFrameDecoder* modular_frame_decoder) {
+  JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->qraw.qtable_den));
+  if (encoding->qraw.qtable_den < kAlmostZero) {
+    // qtable[] values are already checked for <= 0 so the denominator may not
+    // be negative.
+    return JXL_FAILURE("Invalid qtable_den: value too small");
+  }
+  Image image(required_size_x, required_size_y, 8, 3);
+  ModularOptions options;
+  if (modular_frame_decoder) {
+    JXL_RETURN_IF_ERROR(ModularGenericDecompress(
+        br, image, /*header=*/nullptr,
+        ModularStreamId::QuantTable(idx).ID(modular_frame_decoder->frame_dim),
+        &options, /*undo_transforms=*/-1, &modular_frame_decoder->tree,
+        &modular_frame_decoder->code, &modular_frame_decoder->context_map));
+  } else {
+    JXL_RETURN_IF_ERROR(ModularGenericDecompress(br, image, /*header=*/nullptr,
+                                                 0, &options,
+                                                 /*undo_transforms=*/-1));
+  }
+  if (!encoding->qraw.qtable) {
+    encoding->qraw.qtable = new std::vector<int>();
+  }
+  encoding->qraw.qtable->resize(required_size_x * required_size_y * 3);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < required_size_y; y++) {
+      int* JXL_RESTRICT row = image.channel[c].Row(y);
+      for (size_t x = 0; x < required_size_x; x++) {
+        (*encoding->qraw.qtable)[c * required_size_x * required_size_y +
+                                 y * required_size_x + x] = row[x];
+        if (row[x] <= 0) {
+          return JXL_FAILURE("Invalid raw quantization table");
+        }
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.h
new file mode 100644
index 0000000000..7ae2418471
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_modular.h
@@ -0,0 +1,125 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_MODULAR_H_
+#define LIB_JXL_DEC_MODULAR_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+struct ModularStreamId {
+  enum Kind {
+    kGlobalData,
+    kVarDCTDC,
+    kModularDC,
+    kACMetadata,
+    kQuantTable,
+    kModularAC
+  };
+  Kind kind;
+  size_t quant_table_id;
+  size_t group_id;  // DC or AC group id.
+  size_t pass_id;   // Only for kModularAC.
+  size_t ID(const FrameDimensions& frame_dim) const {
+    size_t id = 0;
+    switch (kind) {
+      case kGlobalData:
+        id = 0;
+        break;
+      case kVarDCTDC:
+        id = 1 + group_id;
+        break;
+      case kModularDC:
+        id = 1 + frame_dim.num_dc_groups + group_id;
+        break;
+      case kACMetadata:
+        id = 1 + 2 * frame_dim.num_dc_groups + group_id;
+        break;
+      case kQuantTable:
+        id = 1 + 3 * frame_dim.num_dc_groups + quant_table_id;
+        break;
+      case kModularAC:
+        id = 1 + 3 * frame_dim.num_dc_groups + DequantMatrices::kNum +
+             frame_dim.num_groups * pass_id + group_id;
+        break;
+    };
+    return id;
+  }
+  static ModularStreamId Global() {
+    return ModularStreamId{kGlobalData, 0, 0, 0};
+  }
+  static ModularStreamId VarDCTDC(size_t group_id) {
+    return ModularStreamId{kVarDCTDC, 0, group_id, 0};
+  }
+  static ModularStreamId ModularDC(size_t group_id) {
+    return ModularStreamId{kModularDC, 0, group_id, 0};
+  }
+  static ModularStreamId ACMetadata(size_t group_id) {
+    return ModularStreamId{kACMetadata, 0, group_id, 0};
+  }
+  static ModularStreamId QuantTable(size_t quant_table_id) {
+    JXL_ASSERT(quant_table_id < DequantMatrices::kNum);
+    return ModularStreamId{kQuantTable, quant_table_id, 0, 0};
+  }
+  static ModularStreamId ModularAC(size_t group_id, size_t pass_id) {
+    return ModularStreamId{kModularAC, 0, group_id, pass_id};
+  }
+  static size_t Num(const FrameDimensions& frame_dim, size_t passes) {
+    return ModularAC(0, passes).ID(frame_dim);
+  }
+};
+
+class ModularFrameDecoder {
+ public:
+  void Init(const FrameDimensions& frame_dim) { this->frame_dim = frame_dim; }
+  Status DecodeGlobalInfo(BitReader* reader, const FrameHeader& frame_header,
+                          bool allow_truncated_group = false);
+  Status DecodeGroup(const Rect& rect, BitReader* reader, int minShift,
+                     int maxShift, const ModularStreamId& stream,
+                     bool zerofill);
+  // Decodes a VarDCT DC group (`group_id`) from the given `reader`.
+  Status DecodeVarDCTDC(size_t group_id, BitReader* reader,
+                        PassesDecoderState* dec_state);
+  // Decodes a VarDCT AC Metadata group (`group_id`) from the given `reader`.
+  Status DecodeAcMetadata(size_t group_id, BitReader* reader,
+                          PassesDecoderState* dec_state);
+  // Decodes a RAW quant table from `br` into the given `encoding`, of size
+  // `required_size_x x required_size_y`. If `modular_frame_decoder` is passed,
+  // its global tree is used, otherwise no global tree is used.
+  static Status DecodeQuantTable(size_t required_size_x, size_t required_size_y,
+                                 BitReader* br, QuantEncoding* encoding,
+                                 size_t idx,
+                                 ModularFrameDecoder* modular_frame_decoder);
+  Status FinalizeDecoding(PassesDecoderState* dec_state, jxl::ThreadPool* pool,
+                          ImageBundle* output);
+  bool have_dc() const { return have_something; }
+
+ private:
+  Image full_image;
+  FrameDimensions frame_dim;
+  bool do_color;
+  bool have_something;
+  Tree tree;
+  ANSCode code;
+  std::vector<uint8_t> context_map;
+  GroupHeader global_header;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_MODULAR_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc
new file mode 100644
index 0000000000..240b8aff21
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.cc
@@ -0,0 +1,295 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_noise.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_noise.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/xorshift128plus-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_CAPPED(float, kBlockDim);
+using DI = hwy::HWY_NAMESPACE::Rebind<int, D>;
+using DI8 = hwy::HWY_NAMESPACE::Repartition<uint8_t, D>;
+
+// Converts one vector's worth of random bits to floats in [1, 2).
+// NOTE: as the convolution kernel sums to 0, it doesn't matter if inputs are in
+// [0, 1) or in [1, 2).
+void BitsToFloat(const uint32_t* JXL_RESTRICT random_bits,
+                 float* JXL_RESTRICT floats) {
+  const HWY_FULL(float) df;
+  const HWY_FULL(uint32_t) du;
+
+  const auto bits = Load(du, random_bits);
+  // 1.0 + 23 random mantissa bits = [1, 2)
+  const auto rand12 = BitCast(df, ShiftRight<9>(bits) | Set(du, 0x3F800000));
+  Store(rand12, df, floats);
+}
+
+void RandomImage(Xorshift128Plus* rng, const Rect& rect,
+                 ImageF* JXL_RESTRICT noise) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+
+  // May exceed the vector size, hence we have two loops over x below.
+  constexpr size_t kFloatsPerBatch =
+      Xorshift128Plus::N * sizeof(uint64_t) / sizeof(float);
+  HWY_ALIGN uint64_t batch[Xorshift128Plus::N];
+
+  const HWY_FULL(float) df;
+  const size_t N = Lanes(df);
+
+  for (size_t y = 0; y < ysize; ++y) {
+    float* JXL_RESTRICT row = rect.Row(noise, y);
+
+    size_t x = 0;
+    // Only entire batches (avoids exceeding the image padding).
+    for (; x + kFloatsPerBatch <= xsize; x += kFloatsPerBatch) {
+      rng->Fill(batch);
+      for (size_t i = 0; i < kFloatsPerBatch; i += Lanes(df)) {
+        BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + i, row + x + i);
+      }
+    }
+
+    // Any remaining pixels, rounded up to vectors (safe due to padding).
+    rng->Fill(batch);
+    size_t batch_pos = 0;  // < kFloatsPerBatch
+    for (; x < xsize; x += N) {
+      BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + batch_pos,
+                  row + x);
+      batch_pos += N;
+    }
+  }
+}
+
+// [0, max_value]
+template <class D, class V>
+static HWY_INLINE V Clamp0ToMax(D d, const V x, const V max_value) {
+  const auto clamped = Min(x, max_value);
+  return ZeroIfNegative(clamped);
+}
+
+// x is in [0+delta, 1+delta], delta ~= 0.06
+template <class StrengthEval>
+typename StrengthEval::V NoiseStrength(const StrengthEval& eval,
+                                       const typename StrengthEval::V x) {
+  return Clamp0ToMax(D(), eval(x), Set(D(), 1.0f));
+}
+
+// TODO(veluca): SIMD-fy.
+class StrengthEvalLut {
+ public:
+  using V = Vec<D>;
+
+  explicit StrengthEvalLut(const NoiseParams& noise_params)
+#if HWY_TARGET == HWY_SCALAR
+      : noise_params_(noise_params)
+#endif
+  {
+#if HWY_TARGET != HWY_SCALAR
+    uint32_t lut[8];
+    memcpy(lut, noise_params.lut, sizeof(lut));
+    for (size_t i = 0; i < 8; i++) {
+      low16_lut[2 * i] = (lut[i] >> 0) & 0xFF;
+      low16_lut[2 * i + 1] = (lut[i] >> 8) & 0xFF;
+      high16_lut[2 * i] = (lut[i] >> 16) & 0xFF;
+      high16_lut[2 * i + 1] = (lut[i] >> 24) & 0xFF;
+    }
+#endif
+  }
+
+  V operator()(const V vx) const {
+    constexpr size_t kScale = NoiseParams::kNumNoisePoints - 2;
+    auto scaled_vx = Max(Zero(D()), vx * Set(D(), kScale));
+    auto floor_x = Floor(scaled_vx);
+    auto frac_x = scaled_vx - floor_x;
+    floor_x = IfThenElse(scaled_vx >= Set(D(), kScale), Set(D(), kScale - 1),
+                         floor_x);
+    frac_x = IfThenElse(scaled_vx >= Set(D(), kScale), Set(D(), 1), frac_x);
+    auto floor_x_int = ConvertTo(DI(), floor_x);
+#if HWY_TARGET == HWY_SCALAR
+    auto low = Set(D(), noise_params_.lut[floor_x_int.raw]);
+    auto hi = Set(D(), noise_params_.lut[floor_x_int.raw + 1]);
+#else
+    // Set each lane's bytes to {0, 0, 2x+1, 2x}.
+    auto floorx_indices_low =
+        floor_x_int * Set(DI(), 0x0202) + Set(DI(), 0x0100);
+    // Set each lane's bytes to {2x+1, 2x, 0, 0}.
+    auto floorx_indices_hi =
+        floor_x_int * Set(DI(), 0x02020000) + Set(DI(), 0x01000000);
+    // load LUT
+    auto low16 = BitCast(DI(), LoadDup128(DI8(), low16_lut));
+    auto lowm = Set(DI(), 0xFFFF);
+    auto hi16 = BitCast(DI(), LoadDup128(DI8(), high16_lut));
+    auto him = Set(DI(), 0xFFFF0000);
+    // low = noise_params.lut[floor_x]
+    auto low =
+        BitCast(D(), (TableLookupBytes(low16, floorx_indices_low) & lowm) |
+                         (TableLookupBytes(hi16, floorx_indices_hi) & him));
+    // hi = noise_params.lut[floor_x+1]
+    floorx_indices_low += Set(DI(), 0x0202);
+    floorx_indices_hi += Set(DI(), 0x02020000);
+    auto hi =
+        BitCast(D(), (TableLookupBytes(low16, floorx_indices_low) & lowm) |
+                         (TableLookupBytes(hi16, floorx_indices_hi) & him));
+#endif
+    return MulAdd(hi - low, frac_x, low);
+  }
+
+ private:
+#if HWY_TARGET != HWY_SCALAR
+  // noise_params.lut transformed into two 16-bit lookup tables.
+  HWY_ALIGN uint8_t high16_lut[16];
+  HWY_ALIGN uint8_t low16_lut[16];
+#else
+  const NoiseParams& noise_params_;
+#endif
+};
+
+template <class D>
+void AddNoiseToRGB(const D d, const Vec<D> rnd_noise_r,
+                   const Vec<D> rnd_noise_g, const Vec<D> rnd_noise_cor,
+                   const Vec<D> noise_strength_g, const Vec<D> noise_strength_r,
+                   float ytox, float ytob, float* JXL_RESTRICT out_x,
+                   float* JXL_RESTRICT out_y, float* JXL_RESTRICT out_b) {
+  const auto kRGCorr = Set(d, 0.9921875f);   // 127/128
+  const auto kRGNCorr = Set(d, 0.0078125f);  // 1/128
+
+  const auto red_noise = kRGNCorr * rnd_noise_r * noise_strength_r +
+                         kRGCorr * rnd_noise_cor * noise_strength_r;
+  const auto green_noise = kRGNCorr * rnd_noise_g * noise_strength_g +
+                           kRGCorr * rnd_noise_cor * noise_strength_g;
+
+  auto vx = Load(d, out_x);
+  auto vy = Load(d, out_y);
+  auto vb = Load(d, out_b);
+
+  vx += red_noise - green_noise + Set(d, ytox) * (red_noise + green_noise);
+  vy += red_noise + green_noise;
+  vb += Set(d, ytob) * (red_noise + green_noise);
+
+  Store(vx, d, out_x);
+  Store(vy, d, out_y);
+  Store(vb, d, out_b);
+}
+
+void AddNoise(const NoiseParams& noise_params, const Rect& noise_rect,
+              const Image3F& noise, const Rect& opsin_rect,
+              const ColorCorrelationMap& cmap, Image3F* opsin) {
+  if (!noise_params.HasAny()) return;
+  const StrengthEvalLut noise_model(noise_params);
+  D d;
+  const auto half = Set(d, 0.5f);
+
+  const size_t xsize = opsin_rect.xsize();
+  const size_t ysize = opsin_rect.ysize();
+
+  // With the prior subtract-random Laplacian approximation, rnd_* ranges were
+  // about [-1.5, 1.6]; Laplacian3 about doubles this to [-3.6, 3.6], so the
+  // normalizer is half of what it was before (0.5).
+  const auto norm_const = Set(d, 0.22f);
+
+  float ytox = cmap.YtoXRatio(0);
+  float ytob = cmap.YtoBRatio(0);
+
+  const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+
+  for (size_t y = 0; y < ysize; ++y) {
+    float* JXL_RESTRICT row_x = opsin_rect.PlaneRow(opsin, 0, y);
+    float* JXL_RESTRICT row_y = opsin_rect.PlaneRow(opsin, 1, y);
+    float* JXL_RESTRICT row_b = opsin_rect.PlaneRow(opsin, 2, y);
+    const float* JXL_RESTRICT row_rnd_r = noise_rect.ConstPlaneRow(noise, 0, y);
+    const float* JXL_RESTRICT row_rnd_g = noise_rect.ConstPlaneRow(noise, 1, y);
+    const float* JXL_RESTRICT row_rnd_c = noise_rect.ConstPlaneRow(noise, 2, y);
+    // Needed by the calls to Floor() in StrengthEvalLut. Only arithmetic and
+    // shuffles are otherwise done on the data, so this is safe.
+    msan::UnpoisonMemory(row_x + xsize, (xsize_v - xsize) * sizeof(float));
+    msan::UnpoisonMemory(row_y + xsize, (xsize_v - xsize) * sizeof(float));
+    for (size_t x = 0; x < xsize; x += Lanes(d)) {
+      const auto vx = Load(d, row_x + x);
+      const auto vy = Load(d, row_y + x);
+      const auto in_g = vy - vx;
+      const auto in_r = vy + vx;
+      const auto noise_strength_g = NoiseStrength(noise_model, in_g * half);
+      const auto noise_strength_r = NoiseStrength(noise_model, in_r * half);
+      const auto addit_rnd_noise_red = Load(d, row_rnd_r + x) * norm_const;
+      const auto addit_rnd_noise_green = Load(d, row_rnd_g + x) * norm_const;
+      const auto addit_rnd_noise_correlated =
+          Load(d, row_rnd_c + x) * norm_const;
+      AddNoiseToRGB(D(), addit_rnd_noise_red, addit_rnd_noise_green,
+                    addit_rnd_noise_correlated, noise_strength_g,
+                    noise_strength_r, ytox, ytob, row_x + x, row_y + x,
+                    row_b + x);
+    }
+    msan::PoisonMemory(row_x + xsize, (xsize_v - xsize) * sizeof(float));
+    msan::PoisonMemory(row_y + xsize, (xsize_v - xsize) * sizeof(float));
+    msan::PoisonMemory(row_b + xsize, (xsize_v - xsize) * sizeof(float));
+  }
+}
+
+void RandomImage3(size_t seed, const Rect& rect, Image3F* JXL_RESTRICT noise) {
+  HWY_ALIGN Xorshift128Plus rng(seed);
+  RandomImage(&rng, rect, &noise->Plane(0));
+  RandomImage(&rng, rect, &noise->Plane(1));
+  RandomImage(&rng, rect, &noise->Plane(2));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(AddNoise);
+void AddNoise(const NoiseParams& noise_params, const Rect& noise_rect,
+              const Image3F& noise, const Rect& opsin_rect,
+              const ColorCorrelationMap& cmap, Image3F* opsin) {
+  return HWY_DYNAMIC_DISPATCH(AddNoise)(noise_params, noise_rect, noise,
+                                        opsin_rect, cmap, opsin);
+}
+
+HWY_EXPORT(RandomImage3);
+void RandomImage3(size_t seed, const Rect& rect, Image3F* JXL_RESTRICT noise) {
+  return HWY_DYNAMIC_DISPATCH(RandomImage3)(seed, rect, noise);
+}
+
+void DecodeFloatParam(float precision, float* val, BitReader* br) {
+  const int absval_quant = br->ReadFixedBits<10>();
+  *val = absval_quant / precision;
+}
+
+Status DecodeNoise(BitReader* br, NoiseParams* noise_params) {
+  for (float& i : noise_params->lut) {
+    DecodeFloatParam(kNoisePrecision, &i, br);
+  }
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.h
new file mode 100644
index 0000000000..f7135e7c5a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_noise.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_NOISE_H_
+#define LIB_JXL_DEC_NOISE_H_
+
+// Noise synthesis. Currently disabled.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/noise.h"
+
+namespace jxl {
+
+// Add a noise to Opsin image, loading generated random noise from `noise_rect`
+// in `noise`.
+void AddNoise(const NoiseParams& noise_params, const Rect& noise_rect,
+              const Image3F& noise, const Rect& opsin_rect,
+              const ColorCorrelationMap& cmap, Image3F* opsin);
+
+void RandomImage3(size_t seed, const Rect& rect, Image3F* JXL_RESTRICT noise);
+
+// Must only call if FrameHeader.flags.kNoise.
+Status DecodeNoise(BitReader* br, NoiseParams* noise_params);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_NOISE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_params.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_params.h
new file mode 100644
index 0000000000..e3131e6cb9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_params.h
@@ -0,0 +1,62 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_PARAMS_H_
+#define LIB_JXL_DEC_PARAMS_H_
+
+// Parameters and flags that govern JXL decompression.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+
+#include "lib/jxl/base/override.h"
+
+namespace jxl {
+
+struct DecompressParams {
+  // If true, checks at the end of decoding that all of the compressed data
+  // was consumed by the decoder.
+  bool check_decompressed_size = true;
+
+  // If true, skip dequant and iDCT and decode to JPEG (only if possible)
+  bool keep_dct = false;
+  // If true, render spot colors (otherwise only returned as extra channels)
+  bool render_spotcolors = true;
+
+  // These cannot be kOn because they need encoder support.
+  Override preview = Override::kDefault;
+
+  // How many passes to decode at most. By default, decode everything.
+  uint32_t max_passes = std::numeric_limits<uint32_t>::max();
+  // Alternatively, one can specify the maximum tolerable downscaling factor
+  // with respect to the full size of the image. By default, nothing less than
+  // the full size is requested.
+  size_t max_downsampling = 1;
+
+  // Try to decode as much as possible of a truncated codestream, but only whole
+  // sections at a time.
+  bool allow_partial_files = false;
+  // Allow even more progression.
+  bool allow_more_progressive_steps = false;
+
+  bool operator==(const DecompressParams other) const {
+    return check_decompressed_size == other.check_decompressed_size &&
+           keep_dct == other.keep_dct &&
+           render_spotcolors == other.render_spotcolors &&
+           preview == other.preview && max_passes == other.max_passes &&
+           max_downsampling == other.max_downsampling &&
+           allow_partial_files == other.allow_partial_files &&
+           allow_more_progressive_steps == other.allow_more_progressive_steps;
+  }
+  bool operator!=(const DecompressParams& other) const {
+    return !(*this == other);
+  }
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_PARAMS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc
new file mode 100644
index 0000000000..d1b84f24db
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.cc
@@ -0,0 +1,238 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_patch_dictionary.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <random>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/blending.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/patch_dictionary_internal.h"
+
+namespace jxl {
+
+constexpr int kMaxPatches = 1 << 24;
+
+Status PatchDictionary::Decode(BitReader* br, size_t xsize, size_t ysize,
+                               bool* uses_extra_channels) {
+  positions_.clear();
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  JXL_RETURN_IF_ERROR(
+      DecodeHistograms(br, kNumPatchDictionaryContexts, &code, &context_map));
+  ANSSymbolReader decoder(&code, br);
+
+  auto read_num = [&](size_t context) {
+    size_t r = decoder.ReadHybridUint(context, br, context_map);
+    return r;
+  };
+
+  size_t num_ref_patch = read_num(kNumRefPatchContext);
+  // TODO(veluca): does this make sense?
+  if (num_ref_patch > kMaxPatches) {
+    return JXL_FAILURE("Too many patches in dictionary");
+  }
+
+  for (size_t id = 0; id < num_ref_patch; id++) {
+    PatchReferencePosition ref_pos;
+    ref_pos.ref = read_num(kReferenceFrameContext);
+    if (ref_pos.ref >= kMaxNumReferenceFrames ||
+        shared_->reference_frames[ref_pos.ref].frame->xsize() == 0) {
+      return JXL_FAILURE("Invalid reference frame ID");
+    }
+    if (!shared_->reference_frames[ref_pos.ref].ib_is_in_xyb) {
+      return JXL_FAILURE(
+          "Patches cannot use frames saved post color transforms");
+    }
+    const ImageBundle& ib = *shared_->reference_frames[ref_pos.ref].frame;
+    ref_pos.x0 = read_num(kPatchReferencePositionContext);
+    ref_pos.y0 = read_num(kPatchReferencePositionContext);
+    ref_pos.xsize = read_num(kPatchSizeContext) + 1;
+    ref_pos.ysize = read_num(kPatchSizeContext) + 1;
+    if (ref_pos.x0 + ref_pos.xsize > ib.xsize()) {
+      return JXL_FAILURE("Invalid position specified in reference frame");
+    }
+    if (ref_pos.y0 + ref_pos.ysize > ib.ysize()) {
+      return JXL_FAILURE("Invalid position specified in reference frame");
+    }
+    size_t id_count = read_num(kPatchCountContext) + 1;
+    if (id_count > kMaxPatches) {
+      return JXL_FAILURE("Too many patches in dictionary");
+    }
+    positions_.reserve(positions_.size() + id_count);
+    for (size_t i = 0; i < id_count; i++) {
+      PatchPosition pos;
+      pos.ref_pos = ref_pos;
+      if (i == 0) {
+        pos.x = read_num(kPatchPositionContext);
+        pos.y = read_num(kPatchPositionContext);
+      } else {
+        pos.x =
+            positions_.back().x + UnpackSigned(read_num(kPatchOffsetContext));
+        pos.y =
+            positions_.back().y + UnpackSigned(read_num(kPatchOffsetContext));
+      }
+      if (pos.x + ref_pos.xsize > xsize) {
+        return JXL_FAILURE("Invalid patch x: at %zu + %zu > %zu", pos.x,
+                           ref_pos.xsize, xsize);
+      }
+      if (pos.y + ref_pos.ysize > ysize) {
+        return JXL_FAILURE("Invalid patch y: at %zu + %zu > %zu", pos.y,
+                           ref_pos.ysize, ysize);
+      }
+      for (size_t i = 0; i < shared_->metadata->m.extra_channel_info.size() + 1;
+           i++) {
+        uint32_t blend_mode = read_num(kPatchBlendModeContext);
+        if (blend_mode >= uint32_t(PatchBlendMode::kNumBlendModes)) {
+          return JXL_FAILURE("Invalid patch blend mode: %u", blend_mode);
+        }
+        PatchBlending info;
+        info.mode = static_cast<PatchBlendMode>(blend_mode);
+        if (UsesAlpha(info.mode)) {
+          *uses_extra_channels = true;
+        }
+        if (info.mode != PatchBlendMode::kNone && i > 0) {
+          *uses_extra_channels = true;
+        }
+        if (UsesAlpha(info.mode) &&
+            shared_->metadata->m.extra_channel_info.size() > 1) {
+          info.alpha_channel = read_num(kPatchAlphaChannelContext);
+          if (info.alpha_channel >=
+              shared_->metadata->m.extra_channel_info.size()) {
+            return JXL_FAILURE(
+                "Invalid alpha channel for blending: %u out of %u\n",
+                info.alpha_channel,
+                (uint32_t)shared_->metadata->m.extra_channel_info.size());
+          }
+        } else {
+          info.alpha_channel = 0;
+        }
+        if (UsesClamp(info.mode)) {
+          info.clamp = read_num(kPatchClampContext);
+        } else {
+          info.clamp = false;
+        }
+        pos.blending.push_back(info);
+      }
+      positions_.push_back(std::move(pos));
+    }
+  }
+
+  if (!decoder.CheckANSFinalState()) {
+    return JXL_FAILURE("ANS checksum failure.");
+  }
+  if (!HasAny()) {
+    return JXL_FAILURE("Decoded patch dictionary but got none");
+  }
+
+  ComputePatchCache();
+  return true;
+}
+
+int PatchDictionary::GetReferences() const {
+  int result = 0;
+  for (size_t i = 0; i < positions_.size(); ++i) {
+    result |= (1 << static_cast<int>(positions_[i].ref_pos.ref));
+  }
+  return result;
+}
+
+void PatchDictionary::ComputePatchCache() {
+  patch_starts_.clear();
+  sorted_patches_.clear();
+  if (positions_.empty()) return;
+  std::vector<std::pair<size_t, size_t>> sorted_patches_y;
+  for (size_t i = 0; i < positions_.size(); i++) {
+    const PatchPosition& pos = positions_[i];
+    for (size_t y = pos.y; y < pos.y + pos.ref_pos.ysize; y++) {
+      sorted_patches_y.emplace_back(y, i);
+    }
+  }
+  // The relative order of patches that affect the same pixels is preserved.
+  // This is important for patches that have a blend mode different from kAdd.
+  std::sort(sorted_patches_y.begin(), sorted_patches_y.end());
+  patch_starts_.resize(sorted_patches_y.back().first + 2,
+                       sorted_patches_y.size());
+  sorted_patches_.resize(sorted_patches_y.size());
+  for (size_t i = 0; i < sorted_patches_y.size(); i++) {
+    sorted_patches_[i] = sorted_patches_y[i].second;
+    patch_starts_[sorted_patches_y[i].first] =
+        std::min(patch_starts_[sorted_patches_y[i].first], i);
+  }
+  for (size_t i = patch_starts_.size() - 1; i > 0; i--) {
+    patch_starts_[i - 1] = std::min(patch_starts_[i], patch_starts_[i - 1]);
+  }
+}
+
+Status PatchDictionary::AddTo(Image3F* opsin, const Rect& opsin_rect,
+                              float* const* extra_channels,
+                              const Rect& image_rect) const {
+  JXL_CHECK(SameSize(opsin_rect, image_rect));
+  size_t num_ec = shared_->metadata->m.num_extra_channels;
+  std::vector<const float*> fg_ptrs(3 + num_ec);
+  std::vector<float*> bg_ptrs(3 + num_ec);
+  for (size_t y = image_rect.y0(); y < image_rect.y0() + image_rect.ysize();
+       y++) {
+    if (y + 1 >= patch_starts_.size()) continue;
+    for (size_t id = patch_starts_[y]; id < patch_starts_[y + 1]; id++) {
+      const PatchPosition& pos = positions_[sorted_patches_[id]];
+      size_t by = pos.y;
+      size_t bx = pos.x;
+      size_t xsize = pos.ref_pos.xsize;
+      JXL_DASSERT(y >= by);
+      JXL_DASSERT(y < by + pos.ref_pos.ysize);
+      size_t iy = y - by;
+      size_t ref = pos.ref_pos.ref;
+      if (bx >= image_rect.x0() + image_rect.xsize()) continue;
+      if (bx + xsize < image_rect.x0()) continue;
+      size_t x0 = std::max(bx, image_rect.x0());
+      size_t x1 = std::min(bx + xsize, image_rect.x0() + image_rect.xsize());
+      for (size_t c = 0; c < 3; c++) {
+        fg_ptrs[c] =
+            shared_->reference_frames[ref].frame->color()->ConstPlaneRow(
+                c, pos.ref_pos.y0 + iy) +
+            pos.ref_pos.x0 + x0 - bx;
+        bg_ptrs[c] = opsin_rect.PlaneRow(opsin, c, y - image_rect.y0()) + x0 -
+                     image_rect.x0();
+      }
+      for (size_t i = 0; i < num_ec; i++) {
+        fg_ptrs[3 + i] =
+            shared_->reference_frames[ref].frame->extra_channels()[i].ConstRow(
+                pos.ref_pos.y0 + iy) +
+            pos.ref_pos.x0 + x0 - bx;
+        bg_ptrs[3 + i] = extra_channels[i] + x0 - image_rect.x0();
+      }
+      JXL_RETURN_IF_ERROR(
+          PerformBlending(bg_ptrs.data(), fg_ptrs.data(), bg_ptrs.data(),
+                          x1 - x0, pos.blending[0], pos.blending.data() + 1,
+                          shared_->metadata->m.extra_channel_info));
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.h
new file mode 100644
index 0000000000..8e3c4d0349
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_patch_dictionary.h
@@ -0,0 +1,200 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_PATCH_DICTIONARY_H_
+#define LIB_JXL_DEC_PATCH_DICTIONARY_H_
+
+// Chooses reference patches, and avoids encoding them once per occurrence.
+
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <tuple>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+constexpr size_t kMaxPatchSize = 32;
+
+enum class PatchBlendMode : uint8_t {
+  // The new values are the old ones. Useful to skip some channels.
+  kNone = 0,
+  // The new values (in the crop) replace the old ones: sample = new
+  kReplace = 1,
+  // The new values (in the crop) get added to the old ones: sample = old + new
+  kAdd = 2,
+  // The new values (in the crop) get multiplied by the old ones:
+  // sample = old * new
+  // This blend mode is only supported if BlendColorSpace is kEncoded. The
+  // range of the new value matters for multiplication purposes, and its
+  // nominal range of 0..1 is computed the same way as this is done for the
+  // alpha values in kBlend and kAlphaWeightedAdd.
+  kMul = 3,
+  // The new values (in the crop) replace the old ones if alpha>0:
+  // For first alpha channel:
+  // alpha = old + new * (1 - old)
+  // For other channels if !alpha_associated:
+  // sample = ((1 - new_alpha) * old * old_alpha + new_alpha * new) / alpha
+  // For other channels if alpha_associated:
+  // sample = (1 - new_alpha) * old + new
+  // The alpha formula applies to the alpha used for the division in the other
+  // channels formula, and applies to the alpha channel itself if its
+  // blend_channel value matches itself.
+  // If using kBlendAbove, new is the patch and old is the original image; if
+  // using kBlendBelow, the meaning is inverted.
+  kBlendAbove = 4,
+  kBlendBelow = 5,
+  // The new values (in the crop) are added to the old ones if alpha>0:
+  // For first alpha channel: sample = sample = old + new * (1 - old)
+  // For other channels: sample = old + alpha * new
+  kAlphaWeightedAddAbove = 6,
+  kAlphaWeightedAddBelow = 7,
+  kNumBlendModes,
+};
+
+inline bool UsesAlpha(PatchBlendMode mode) {
+  return mode == PatchBlendMode::kBlendAbove ||
+         mode == PatchBlendMode::kBlendBelow ||
+         mode == PatchBlendMode::kAlphaWeightedAddAbove ||
+         mode == PatchBlendMode::kAlphaWeightedAddBelow;
+}
+inline bool UsesClamp(PatchBlendMode mode) {
+  return UsesAlpha(mode) || mode == PatchBlendMode::kMul;
+}
+
+struct PatchBlending {
+  PatchBlendMode mode;
+  uint32_t alpha_channel;
+  bool clamp;
+};
+
+struct QuantizedPatch {
+  size_t xsize;
+  size_t ysize;
+  QuantizedPatch() {
+    for (size_t i = 0; i < 3; i++) {
+      pixels[i].resize(kMaxPatchSize * kMaxPatchSize);
+      fpixels[i].resize(kMaxPatchSize * kMaxPatchSize);
+    }
+  }
+  std::vector<int8_t> pixels[3] = {};
+  // Not compared. Used only to retrieve original pixels to construct the
+  // reference image.
+  std::vector<float> fpixels[3] = {};
+  bool operator==(const QuantizedPatch& other) const {
+    if (xsize != other.xsize) return false;
+    if (ysize != other.ysize) return false;
+    for (size_t c = 0; c < 3; c++) {
+      if (memcmp(pixels[c].data(), other.pixels[c].data(),
+                 sizeof(int8_t) * xsize * ysize) != 0)
+        return false;
+    }
+    return true;
+  }
+
+  bool operator<(const QuantizedPatch& other) const {
+    if (xsize != other.xsize) return xsize < other.xsize;
+    if (ysize != other.ysize) return ysize < other.ysize;
+    for (size_t c = 0; c < 3; c++) {
+      int cmp = memcmp(pixels[c].data(), other.pixels[c].data(),
+                       sizeof(int8_t) * xsize * ysize);
+      if (cmp > 0) return false;
+      if (cmp < 0) return true;
+    }
+    return false;
+  }
+};
+
+// Pair (patch, vector of occurrences).
+using PatchInfo =
+    std::pair<QuantizedPatch, std::vector<std::pair<uint32_t, uint32_t>>>;
+
+// Position and size of the patch in the reference frame.
+struct PatchReferencePosition {
+  size_t ref, x0, y0, xsize, ysize;
+  bool operator<(const PatchReferencePosition& oth) const {
+    return std::make_tuple(ref, x0, y0, xsize, ysize) <
+           std::make_tuple(oth.ref, oth.x0, oth.y0, oth.xsize, oth.ysize);
+  }
+  bool operator==(const PatchReferencePosition& oth) const {
+    return !(*this < oth) && !(oth < *this);
+  }
+};
+
+struct PatchPosition {
+  // Position of top-left corner of the patch in the image.
+  size_t x, y;
+  // Different blend mode for color and extra channels.
+  std::vector<PatchBlending> blending;
+  PatchReferencePosition ref_pos;
+  bool operator<(const PatchPosition& oth) const {
+    return std::make_tuple(ref_pos, x, y) <
+           std::make_tuple(oth.ref_pos, oth.x, oth.y);
+  }
+};
+
+struct PassesSharedState;
+
+// Encoder-side helper class to encode the PatchesDictionary.
+class PatchDictionaryEncoder;
+
+class PatchDictionary {
+ public:
+  PatchDictionary() = default;
+
+  void SetPassesSharedState(const PassesSharedState* shared) {
+    shared_ = shared;
+  }
+
+  bool HasAny() const { return !positions_.empty(); }
+
+  Status Decode(BitReader* br, size_t xsize, size_t ysize,
+                bool* uses_extra_channels);
+
+  void Clear() {
+    positions_.clear();
+    ComputePatchCache();
+  }
+
+  // Only adds patches that belong to the `image_rect` area of the decoded
+  // image, writing them to the `opsin_rect` area of `opsin`.
+  Status AddTo(Image3F* opsin, const Rect& opsin_rect,
+               float* const* extra_channels, const Rect& image_rect) const;
+
+  // Returns dependencies of this patch dictionary on reference frame ids as a
+  // bit mask: bits 0-3 indicate reference frame 0-3.
+  int GetReferences() const;
+
+ private:
+  friend class PatchDictionaryEncoder;
+
+  const PassesSharedState* shared_;
+  std::vector<PatchPosition> positions_;
+
+  // Patch occurrences sorted by y.
+  std::vector<size_t> sorted_patches_;
+  // Index of the first patch for each y value.
+  std::vector<size_t> patch_starts_;
+
+  // Patch IDs in position [patch_starts_[y], patch_start_[y+1]) of
+  // sorted_patches_ are all the patches that intersect the horizontal line at
+  // y.
+  // The relative order of patches that affect the same pixels is the same -
+  // important when applying patches is noncommutative.
+
+  // Compute patches_by_y_ after updating positions_.
+  void ComputePatchCache();
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_PATCH_DICTIONARY_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc
new file mode 100644
index 0000000000..b295728ba4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.cc
@@ -0,0 +1,1242 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_reconstruct.h"
+
+#include <atomic>
+#include <utility>
+
+#include "lib/jxl/filters.h"
+#include "lib/jxl/image_ops.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_reconstruct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/blending.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_noise.h"
+#include "lib/jxl/dec_upsample.h"
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+template <typename Op>
+void DoUndoXYBInPlace(Image3F* idct, const Rect& rect, Op op,
+                      const OutputEncodingInfo& output_encoding_info) {
+  // TODO(eustas): should it still be capped?
+  const HWY_CAPPED(float, GroupBorderAssigner::kPaddingXRound) d;
+  const size_t xsize = rect.xsize();
+  const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+  // The size of `rect` might not be a multiple of Lanes(d), but is guaranteed
+  // to be a multiple of kBlockDim or at the margin of the image.
+  for (size_t y = 0; y < rect.ysize(); y++) {
+    float* JXL_RESTRICT row0 = rect.PlaneRow(idct, 0, y);
+    float* JXL_RESTRICT row1 = rect.PlaneRow(idct, 1, y);
+    float* JXL_RESTRICT row2 = rect.PlaneRow(idct, 2, y);
+    // All calculations are lane-wise, still some might require value-dependent
+    // behaviour (e.g. NearestInt). Temporary unposion last vector tail.
+    msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+    for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
+      const auto in_opsin_x = Load(d, row0 + x);
+      const auto in_opsin_y = Load(d, row1 + x);
+      const auto in_opsin_b = Load(d, row2 + x);
+      JXL_COMPILER_FENCE;
+      auto linear_r = Undefined(d);
+      auto linear_g = Undefined(d);
+      auto linear_b = Undefined(d);
+      XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b,
+               output_encoding_info.opsin_params, &linear_r, &linear_g,
+               &linear_b);
+      Store(op.Transform(d, linear_r), d, row0 + x);
+      Store(op.Transform(d, linear_g), d, row1 + x);
+      Store(op.Transform(d, linear_b), d, row2 + x);
+    }
+    msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+  }
+}
+
+struct OpLinear {
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) {
+    return linear;
+  }
+};
+
+struct OpRgb {
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) {
+#if JXL_HIGH_PRECISION
+    return TF_SRGB().EncodedFromDisplay(d, linear);
+#else
+    return FastLinearToSRGB(d, linear);
+#endif
+  }
+};
+
+struct OpPq {
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) {
+    return TF_PQ().EncodedFromDisplay(d, linear);
+  }
+};
+
+struct OpHlg {
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) {
+    return TF_HLG().EncodedFromDisplay(d, linear);
+  }
+};
+
+struct Op709 {
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) {
+    return TF_709().EncodedFromDisplay(d, linear);
+  }
+};
+
+struct OpGamma {
+  const float inverse_gamma;
+  template <typename D, typename T>
+  T Transform(D d, const T& linear) {
+    return IfThenZeroElse(linear <= Set(d, 1e-5f),
+                          FastPowf(d, linear, Set(d, inverse_gamma)));
+  }
+};
+
+Status UndoXYBInPlace(Image3F* idct, const Rect& rect,
+                      const OutputEncodingInfo& output_encoding_info) {
+  PROFILER_ZONE("UndoXYB");
+
+  if (output_encoding_info.color_encoding.tf.IsLinear()) {
+    DoUndoXYBInPlace(idct, rect, OpLinear(), output_encoding_info);
+  } else if (output_encoding_info.color_encoding.tf.IsSRGB()) {
+    DoUndoXYBInPlace(idct, rect, OpRgb(), output_encoding_info);
+  } else if (output_encoding_info.color_encoding.tf.IsPQ()) {
+    DoUndoXYBInPlace(idct, rect, OpPq(), output_encoding_info);
+  } else if (output_encoding_info.color_encoding.tf.IsHLG()) {
+    DoUndoXYBInPlace(idct, rect, OpHlg(), output_encoding_info);
+  } else if (output_encoding_info.color_encoding.tf.Is709()) {
+    DoUndoXYBInPlace(idct, rect, Op709(), output_encoding_info);
+  } else if (output_encoding_info.color_encoding.tf.IsGamma() ||
+             output_encoding_info.color_encoding.tf.IsDCI()) {
+    OpGamma op = {output_encoding_info.inverse_gamma};
+    DoUndoXYBInPlace(idct, rect, op, output_encoding_info);
+  } else {
+    // This is a programming error.
+    JXL_ABORT("Invalid target encoding");
+  }
+  return true;
+}
+
+template <typename D, typename V>
+void StoreRGBA(D d, V r, V g, V b, V a, bool alpha, size_t n, size_t extra,
+               uint8_t* buf) {
+#if HWY_TARGET == HWY_SCALAR
+  buf[0] = r.raw;
+  buf[1] = g.raw;
+  buf[2] = b.raw;
+  if (alpha) {
+    buf[3] = a.raw;
+  }
+#elif HWY_TARGET == HWY_NEON
+  if (alpha) {
+    uint8x8x4_t data = {r.raw, g.raw, b.raw, a.raw};
+    if (extra >= 8) {
+      vst4_u8(buf, data);
+    } else {
+      uint8_t tmp[8 * 4];
+      vst4_u8(tmp, data);
+      memcpy(buf, tmp, n * 4);
+    }
+  } else {
+    uint8x8x3_t data = {r.raw, g.raw, b.raw};
+    if (extra >= 8) {
+      vst3_u8(buf, data);
+    } else {
+      uint8_t tmp[8 * 3];
+      vst3_u8(tmp, data);
+      memcpy(buf, tmp, n * 3);
+    }
+  }
+#else
+  // TODO(veluca): implement this for x86.
+  size_t mul = alpha ? 4 : 3;
+  HWY_ALIGN uint8_t bytes[16];
+  Store(r, d, bytes);
+  for (size_t i = 0; i < n; i++) {
+    buf[mul * i] = bytes[i];
+  }
+  Store(g, d, bytes);
+  for (size_t i = 0; i < n; i++) {
+    buf[mul * i + 1] = bytes[i];
+  }
+  Store(b, d, bytes);
+  for (size_t i = 0; i < n; i++) {
+    buf[mul * i + 2] = bytes[i];
+  }
+  if (alpha) {
+    Store(a, d, bytes);
+    for (size_t i = 0; i < n; i++) {
+      buf[4 * i + 3] = bytes[i];
+    }
+  }
+#endif
+}
+
+// Outputs floating point image to RGBA 8-bit buffer. Does not support alpha
+// channel in the input, but outputs opaque alpha channel for the case where the
+// output buffer to write to is in the 4-byte per pixel RGBA format.
+void FloatToRGBA8(const Image3F& input, const Rect& input_rect, bool is_rgba,
+                  const ImageF* alpha_in, const Rect& alpha_rect,
+                  const Rect& output_buf_rect, uint8_t* JXL_RESTRICT output_buf,
+                  size_t stride) {
+  size_t bytes = is_rgba ? 4 : 3;
+  for (size_t y = 0; y < output_buf_rect.ysize(); y++) {
+    const float* JXL_RESTRICT row_in_r = input_rect.ConstPlaneRow(input, 0, y);
+    const float* JXL_RESTRICT row_in_g = input_rect.ConstPlaneRow(input, 1, y);
+    const float* JXL_RESTRICT row_in_b = input_rect.ConstPlaneRow(input, 2, y);
+    const float* JXL_RESTRICT row_in_a =
+        alpha_in ? alpha_rect.ConstRow(*alpha_in, y) : nullptr;
+    size_t base_ptr =
+        (y + output_buf_rect.y0()) * stride + bytes * output_buf_rect.x0();
+    using D = HWY_CAPPED(float, 4);
+    const D d;
+    D::Rebind<uint32_t> du;
+    auto zero = Zero(d);
+    auto one = Set(d, 1.0f);
+    auto mul = Set(d, 255.0f);
+
+    // All calculations are lane-wise, still some might require value-dependent
+    // behaviour (e.g. NearestInt). Temporary unposion last vector tail.
+    size_t xsize = output_buf_rect.xsize();
+    size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+    msan::UnpoisonMemory(row_in_r + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row_in_g + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::UnpoisonMemory(row_in_b + xsize, sizeof(float) * (xsize_v - xsize));
+    if (row_in_a)
+      msan::UnpoisonMemory(row_in_a + xsize, sizeof(float) * (xsize_v - xsize));
+    for (size_t x = 0; x < xsize; x += Lanes(d)) {
+      auto rf = Clamp(zero, Load(d, row_in_r + x), one) * mul;
+      auto gf = Clamp(zero, Load(d, row_in_g + x), one) * mul;
+      auto bf = Clamp(zero, Load(d, row_in_b + x), one) * mul;
+      auto af = row_in_a ? Clamp(zero, Load(d, row_in_a + x), one) * mul
+                         : Set(d, 255.0f);
+      auto r8 = U8FromU32(BitCast(du, NearestInt(rf)));
+      auto g8 = U8FromU32(BitCast(du, NearestInt(gf)));
+      auto b8 = U8FromU32(BitCast(du, NearestInt(bf)));
+      auto a8 = U8FromU32(BitCast(du, NearestInt(af)));
+      size_t n = output_buf_rect.xsize() - x;
+      if (JXL_LIKELY(n >= Lanes(d))) {
+        StoreRGBA(D::Rebind<uint8_t>(), r8, g8, b8, a8, is_rgba, Lanes(d), n,
+                  output_buf + base_ptr + bytes * x);
+      } else {
+        StoreRGBA(D::Rebind<uint8_t>(), r8, g8, b8, a8, is_rgba, n, n,
+                  output_buf + base_ptr + bytes * x);
+      }
+    }
+    msan::PoisonMemory(row_in_r + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row_in_g + xsize, sizeof(float) * (xsize_v - xsize));
+    msan::PoisonMemory(row_in_b + xsize, sizeof(float) * (xsize_v - xsize));
+    if (row_in_a)
+      msan::PoisonMemory(row_in_a + xsize, sizeof(float) * (xsize_v - xsize));
+  }
+}
+
+// Upsample in horizonal (if hs=1) and vertical (if vs=1) the plane_in image
+// to the output plane_out image.
+// The output region "rect" in plane_out and a border around it of lf.Padding()
+// will be generated, as long as those pixels fall inside the image frame.
+// Otherwise the border pixels that fall outside the image frame in plane_out
+// are undefined.
+// "rect" is an area inside the plane_out image which corresponds to the
+// "frame_rect" area in the frame. plane_in and plane_out both are expected to
+// have a padding of kGroupDataXBorder and kGroupDataYBorder on either side of
+// X and Y coordinates. This means that when upsampling vertically the plane_out
+// row `kGroupDataXBorder + N` will be generated from the plane_in row
+// `kGroupDataXBorder + N / 2` (and a previous or next row).
+void DoYCbCrUpsampling(size_t hs, size_t vs, ImageF* plane_in, const Rect& rect,
+                       const Rect& frame_rect, const FrameDimensions& frame_dim,
+                       ImageF* plane_out, const LoopFilter& lf, ImageF* temp) {
+  JXL_DASSERT(SameSize(rect, frame_rect));
+  JXL_DASSERT(hs <= 1 && vs <= 1);
+  // The pixel in (xoff, yoff) is the origin of the downsampling coordinate
+  // system.
+  size_t xoff = PassesDecoderState::kGroupDataXBorder;
+  size_t yoff = PassesDecoderState::kGroupDataYBorder;
+
+  // This X,Y range is the intersection between the requested "rect" expanded
+  // with a lf.Padding() all around and the image frame translated to the
+  // coordinate system used by plane_out.
+  // All the pixels in the [x0, x1) x [y0, y1) range must be defined in the
+  // plane_out output at the end.
+  const size_t y0 = rect.y0() - std::min<size_t>(lf.Padding(), frame_rect.y0());
+  const size_t y1 = rect.y0() +
+                    std::min(frame_rect.y0() + rect.ysize() + lf.Padding(),
+                             frame_dim.ysize_padded) -
+                    frame_rect.y0();
+
+  const size_t x0 = rect.x0() - std::min<size_t>(lf.Padding(), frame_rect.x0());
+  const size_t x1 = rect.x0() +
+                    std::min(frame_rect.x0() + rect.xsize() + lf.Padding(),
+                             frame_dim.xsize_padded) -
+                    frame_rect.x0();
+
+  if (hs == 0 && vs == 0) {
+    Rect r(x0, y0, x1 - x0, y1 - y0);
+    JXL_CHECK_IMAGE_INITIALIZED(*plane_in, r);
+    CopyImageTo(r, *plane_in, r, plane_out);
+    return;
+  }
+  // Prepare padding if we are on a border.
+  // Copy the whole row/column here: it is likely similarly fast and ensures
+  // that we don't forget some parts of padding.
+  if (frame_rect.x0() == 0) {
+    for (size_t y = 0; y < plane_in->ysize(); y++) {
+      plane_in->Row(y)[rect.x0() - 1] = plane_in->Row(y)[rect.x0()];
+    }
+  }
+  if (frame_rect.x0() + x1 - rect.x0() >= frame_dim.xsize_padded) {
+    ssize_t borderx = static_cast<ssize_t>(x1 - xoff + hs) / (1 << hs) + xoff;
+    for (size_t y = 0; y < plane_in->ysize(); y++) {
+      plane_in->Row(y)[borderx] = plane_in->Row(y)[borderx - 1];
+    }
+  }
+  if (frame_rect.y0() == 0) {
+    memcpy(plane_in->Row(rect.y0() - 1), plane_in->Row(rect.y0()),
+           plane_in->xsize() * sizeof(float));
+  }
+  if (frame_rect.y0() + y1 - rect.y0() >= frame_dim.ysize_padded) {
+    ssize_t bordery = static_cast<ssize_t>(y1 - yoff + vs) / (1 << vs) + yoff;
+    memcpy(plane_in->Row(bordery), plane_in->Row(bordery - 1),
+           plane_in->xsize() * sizeof(float));
+  }
+  if (hs == 1) {
+    // Limited to 4 for Interleave*.
+    HWY_CAPPED(float, 4) d;
+    auto threefour = Set(d, 0.75f);
+    auto onefour = Set(d, 0.25f);
+    size_t orig_y0 = y0;
+    size_t orig_y1 = y1;
+    if (vs != 0) {
+      orig_y0 = (y0 >> 1) + (yoff >> 1) - 1;
+      orig_y1 = (y1 >> 1) + (yoff >> 1) + 1;
+    }
+    for (size_t y = orig_y0; y < orig_y1; y++) {
+      const float* in = plane_in->Row(y);
+      float* out = temp->Row(y);
+      for (size_t x = x0 / (2 * Lanes(d)) * 2 * Lanes(d);
+           x < RoundUpTo(x1, 2 * Lanes(d)); x += 2 * Lanes(d)) {
+        size_t ox = (x >> 1) + (xoff >> 1);
+        auto current = Load(d, in + ox) * threefour;
+        auto prev = LoadU(d, in + ox - 1);
+        auto next = LoadU(d, in + ox + 1);
+        auto left = MulAdd(onefour, prev, current);
+        auto right = MulAdd(onefour, next, current);
+#if HWY_TARGET == HWY_SCALAR
+        Store(left, d, out + x);
+        Store(right, d, out + x + 1);
+#else
+        Store(InterleaveLower(left, right), d, out + x);
+        Store(InterleaveUpper(left, right), d, out + x + Lanes(d));
+#endif
+      }
+    }
+  } else {
+    CopyImageTo(*plane_in, temp);
+  }
+  if (vs == 1) {
+    HWY_FULL(float) d;
+    auto threefour = Set(d, 0.75f);
+    auto onefour = Set(d, 0.25f);
+    for (size_t y = y0; y < y1; y++) {
+      size_t oy1 = (y >> 1) + (yoff >> 1);
+      if ((y & 1) == 1) oy1++;
+      size_t oy0 = oy1 - 1;
+      const float* in0 = temp->Row(oy0);
+      const float* in1 = temp->Row(oy1);
+      float* out = plane_out->Row(y);
+      if ((y & 1) == 1) {
+        for (size_t x = x0 / Lanes(d) * Lanes(d); x < RoundUpTo(x1, Lanes(d));
+             x += Lanes(d)) {
+          auto i0 = Load(d, in0 + x);
+          auto i1 = Load(d, in1 + x);
+          auto o = MulAdd(i0, threefour, i1 * onefour);
+          Store(o, d, out + x);
+        }
+      } else {
+        for (size_t x = x0 / Lanes(d) * Lanes(d); x < RoundUpTo(x1, Lanes(d));
+             x += Lanes(d)) {
+          auto i0 = Load(d, in0 + x);
+          auto i1 = Load(d, in1 + x);
+          auto o = MulAdd(i0, onefour, i1 * threefour);
+          Store(o, d, out + x);
+        }
+      }
+    }
+  } else {
+    CopyImageTo(*temp, plane_out);
+  }
+
+  // The output must be initialized including the lf.Padding() around the image
+  // for all the pixels that fall inside the image frame.
+  JXL_CHECK_IMAGE_INITIALIZED(*plane_out, Rect(x0, y0, x1 - x0, y1 - y0));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(UndoXYBInPlace);
+HWY_EXPORT(FloatToRGBA8);
+HWY_EXPORT(DoYCbCrUpsampling);
+
+void UndoXYB(const Image3F& src, Image3F* dst,
+             const OutputEncodingInfo& output_info, ThreadPool* pool) {
+  CopyImageTo(src, dst);
+  pool->Run(0, src.ysize(), ThreadPool::SkipInit(), [&](int y, int /*thread*/) {
+    JXL_CHECK(HWY_DYNAMIC_DISPATCH(UndoXYBInPlace)(dst, Rect(*dst).Line(y),
+                                                   output_info));
+  });
+}
+
+namespace {
+Rect ScaleRectForEC(Rect in, const FrameHeader& frame_header, size_t ec) {
+  auto s = [&](size_t x) {
+    return DivCeil(x * frame_header.upsampling,
+                   frame_header.extra_channel_upsampling[ec]);
+  };
+  return Rect(s(in.x0()), s(in.y0()), s(in.xsize()), s(in.ysize()));
+}
+
+// Implements EnsurePaddingInPlace, but allows processing data one row at a
+// time.
+class EnsurePaddingInPlaceRowByRow {
+  void Init(const Rect& rect, const Rect& image_rect, size_t image_xsize,
+            size_t image_ysize, size_t xpadding, size_t ypadding, ssize_t* y0,
+            ssize_t* y1) {
+    // coordinates relative to rect.
+    JXL_DASSERT(SameSize(rect, image_rect));
+    *y0 = -std::min(image_rect.y0(), ypadding);
+    *y1 = rect.ysize() + std::min(ypadding, image_ysize - image_rect.ysize() -
+                                                image_rect.y0());
+    if (image_rect.x0() >= xpadding &&
+        image_rect.x0() + image_rect.xsize() + xpadding <= image_xsize) {
+      // Nothing to do.
+      strategy_ = kSkip;
+    } else if (image_xsize >= 2 * xpadding) {
+      strategy_ = kFast;
+    } else {
+      strategy_ = kSlow;
+    }
+    y0_ = rect.y0();
+    JXL_DASSERT(rect.x0() >= xpadding);
+    x0_ = x1_ = rect.x0() - xpadding;
+    // If close to the left border - do mirroring.
+    if (image_rect.x0() < xpadding) x1_ = rect.x0() - image_rect.x0();
+    x2_ = x3_ = rect.x0() + rect.xsize() + xpadding;
+    // If close to the right border - do mirroring.
+    if (image_rect.x0() + image_rect.xsize() + xpadding > image_xsize) {
+      x2_ = rect.x0() + image_xsize - image_rect.x0();
+    }
+    JXL_DASSERT(image_xsize == (x2_ - x1_) ||
+                (x1_ - x0_ <= x2_ - x1_ && x3_ - x2_ <= x2_ - x1_));
+  }
+
+ public:
+  void Init(Image3F* img, const Rect& rect, const Rect& image_rect,
+            size_t image_xsize, size_t image_ysize, size_t xpadding,
+            size_t ypadding, ssize_t* y0, ssize_t* y1) {
+    Init(rect, image_rect, image_xsize, image_ysize, xpadding, ypadding, y0,
+         y1);
+    img3_ = img;
+    JXL_DASSERT(x3_ <= img->xsize());
+  }
+  void Init(ImageF* img, const Rect& rect, const Rect& image_rect,
+            size_t image_xsize, size_t image_ysize, size_t xpadding,
+            size_t ypadding, ssize_t* y0, ssize_t* y1) {
+    Init(rect, image_rect, image_xsize, image_ysize, xpadding, ypadding, y0,
+         y1);
+    img_ = img;
+    JXL_DASSERT(x3_ <= img->xsize());
+  }
+  // To be called when row `y` of the input is available, for all the values in
+  // [*y0, *y1).
+  void Process3(ssize_t y) {
+    JXL_DASSERT(img3_);
+    for (size_t c = 0; c < 3; c++) {
+      img_ = &img3_->Plane(c);
+      Process(y);
+    }
+  }
+  void Process(ssize_t y) {
+    JXL_DASSERT(img_);
+    switch (strategy_) {
+      case kSkip:
+        break;
+      case kFast: {
+        // Image is wide enough that a single Mirror() step is sufficient.
+        float* JXL_RESTRICT row = img_->Row(y + y0_);
+        for (size_t x = x0_; x < x1_; x++) {
+          row[x] = row[2 * x1_ - x - 1];
+        }
+        for (size_t x = x2_; x < x3_; x++) {
+          row[x] = row[2 * x2_ - x - 1];
+        }
+        break;
+      }
+      case kSlow: {
+        // Slow case for small images.
+        float* JXL_RESTRICT row = img_->Row(y + y0_) + x1_;
+        for (ssize_t x = x0_ - x1_; x < 0; x++) {
+          *(row + x) = row[Mirror(x, x2_ - x1_)];
+        }
+        for (size_t x = x2_ - x1_; x < x3_ - x1_; x++) {
+          *(row + x) = row[Mirror(x, x2_ - x1_)];
+        }
+        break;
+      }
+    }
+  }
+
+ private:
+  // Initialized to silence spurious compiler warnings.
+  Image3F* img3_ = nullptr;
+  ImageF* img_ = nullptr;
+  // Will fill [x0_, x1_) and [x2_, x3_) on every row.
+  // The [x1_, x2_) range contains valid image pixels. We guarantee that either
+  // x1_ - x0_ <= x2_ - x1_, (and similarly for x2_, x3_), or that the [x1_,
+  // x2_) contains a full horizontal line of the original image.
+  size_t x0_ = 0, x1_ = 0, x2_ = 0, x3_ = 0;
+  size_t y0_ = 0;
+  // kSlow: use calls to Mirror(), for the case where the border might be larger
+  // than the image.
+  // kFast: directly use the result of Mirror() when it can be computed in a
+  // single iteration.
+  // kSkip: do nothing.
+  enum Strategy { kFast, kSlow, kSkip };
+  Strategy strategy_ = kSkip;
+};
+}  // namespace
+
+void EnsurePaddingInPlace(Image3F* img, const Rect& rect,
+                          const Rect& image_rect, size_t image_xsize,
+                          size_t image_ysize, size_t xpadding,
+                          size_t ypadding) {
+  ssize_t y0, y1;
+  EnsurePaddingInPlaceRowByRow impl;
+  impl.Init(img, rect, image_rect, image_xsize, image_ysize, xpadding, ypadding,
+            &y0, &y1);
+  for (ssize_t y = y0; y < y1; y++) {
+    impl.Process3(y);
+  }
+}
+
+Status FinalizeImageRect(
+    Image3F* input_image, const Rect& input_rect,
+    const std::vector<std::pair<ImageF*, Rect>>& extra_channels,
+    PassesDecoderState* dec_state, size_t thread,
+    ImageBundle* JXL_RESTRICT output_image, const Rect& frame_rect) {
+  const ImageFeatures& image_features = dec_state->shared->image_features;
+  const FrameHeader& frame_header = dec_state->shared->frame_header;
+  const ImageMetadata& metadata = frame_header.nonserialized_metadata->m;
+  const LoopFilter& lf = frame_header.loop_filter;
+  const FrameDimensions& frame_dim = dec_state->shared->frame_dim;
+  JXL_DASSERT(frame_rect.xsize() <= kApplyImageFeaturesTileDim);
+  JXL_DASSERT(frame_rect.ysize() <= kApplyImageFeaturesTileDim);
+  JXL_DASSERT(input_rect.xsize() == frame_rect.xsize());
+  JXL_DASSERT(input_rect.ysize() == frame_rect.ysize());
+  JXL_DASSERT(frame_rect.x0() % GroupBorderAssigner::kPaddingXRound == 0);
+  JXL_DASSERT(frame_rect.xsize() % GroupBorderAssigner::kPaddingXRound == 0 ||
+              frame_rect.xsize() + frame_rect.x0() == frame_dim.xsize ||
+              frame_rect.xsize() + frame_rect.x0() == frame_dim.xsize_padded);
+
+  // +----------------------------- STEP 1 ------------------------------+
+  // | Compute the rects on which patches and splines will be applied.   |
+  // | In case we are applying upsampling, we need to apply patches on a |
+  // | slightly larger image.                                            |
+  // +-------------------------------------------------------------------+
+
+  // If we are applying upsampling, we need 2 more pixels around the actual rect
+  // for border. Thus, we also need to apply patches and splines to those
+  // pixels. We compute here
+  // - The portion of image that corresponds to the area we are applying IF.
+  //   (rect_for_if)
+  // - The rect where that pixel data is stored in upsampling_input_storage.
+  //   (rect_for_if_storage)
+  // - The rect where the pixel data that we need to upsample is stored.
+  //   (rect_for_upsampling)
+  // - The source rect for the pixel data in `input_image`. It is assumed that,
+  //   if `frame_rect` is not on an image border, `input_image:input_rect` has
+  //   enough border available. (rect_for_if_input)
+
+  Image3F* output_color =
+      dec_state->rgb_output == nullptr && dec_state->pixel_callback == nullptr
+          ? output_image->color()
+          : nullptr;
+
+  Image3F* storage_for_if = output_color;
+  Rect rect_for_if = frame_rect;
+  Rect rect_for_if_storage = frame_rect;
+  Rect rect_for_upsampling = frame_rect;
+  Rect rect_for_if_input = input_rect;
+  // The same as rect_for_if_input but in the frame coordinates.
+  Rect frame_rect_for_ycbcr_upsampling = frame_rect;
+  size_t extra_rows_t = 0;
+  size_t extra_rows_b = 0;
+  if (frame_header.upsampling != 1) {
+    size_t ifbx0 = 0;
+    size_t ifbx1 = 0;
+    size_t ifby0 = 0;
+    size_t ifby1 = 0;
+    if (frame_rect.x0() >= 2) {
+      JXL_DASSERT(input_rect.x0() >= 2);
+      ifbx0 = 2;
+    }
+    if (frame_rect.y0() >= 2) {
+      JXL_DASSERT(input_rect.y0() >= 2);
+      extra_rows_t = ifby0 = 2;
+    }
+    for (size_t extra : {1, 2}) {
+      if (frame_rect.x0() + frame_rect.xsize() + extra <=
+          dec_state->shared->frame_dim.xsize_padded) {
+        JXL_DASSERT(input_rect.x0() + input_rect.xsize() + extra <=
+                    input_image->xsize());
+        ifbx1 = extra;
+      }
+      if (frame_rect.y0() + frame_rect.ysize() + extra <=
+          dec_state->shared->frame_dim.ysize_padded) {
+        JXL_DASSERT(input_rect.y0() + input_rect.ysize() + extra <=
+                    input_image->ysize());
+        extra_rows_b = ifby1 = extra;
+      }
+    }
+    rect_for_if = Rect(frame_rect.x0() - ifbx0, frame_rect.y0() - ifby0,
+                       frame_rect.xsize() + ifbx0 + ifbx1,
+                       frame_rect.ysize() + ifby0 + ifby1);
+    // Storage for pixel data does not necessarily start at (0, 0) as we need to
+    // have the left border of upsampling_rect aligned to a multiple of
+    // GroupBorderAssigner::kPaddingXRound.
+    rect_for_if_storage =
+        Rect(kBlockDim + RoundUpTo(ifbx0, GroupBorderAssigner::kPaddingXRound) -
+                 ifbx0,
+             kBlockDim, rect_for_if.xsize(), rect_for_if.ysize());
+    rect_for_upsampling =
+        Rect(kBlockDim + RoundUpTo(ifbx0, GroupBorderAssigner::kPaddingXRound),
+             kBlockDim + ifby0, frame_rect.xsize(), frame_rect.ysize());
+    rect_for_if_input =
+        Rect(input_rect.x0() - ifbx0, input_rect.y0() - ifby0,
+             rect_for_if_storage.xsize(), rect_for_if_storage.ysize());
+    frame_rect_for_ycbcr_upsampling =
+        Rect(frame_rect.x0() - ifbx0, frame_rect.y0() - ifby0,
+             rect_for_if_input.xsize(), rect_for_if_input.ysize());
+    storage_for_if = &dec_state->upsampling_input_storage[thread];
+  }
+
+  // +--------------------------- STEP 1.5 ------------------------------+
+  // | Perform YCbCr upsampling if needed.                               |
+  // +-------------------------------------------------------------------+
+
+  Image3F* input = input_image;
+  if (!frame_header.chroma_subsampling.Is444()) {
+    for (size_t c = 0; c < 3; c++) {
+      size_t vs = frame_header.chroma_subsampling.VShift(c);
+      size_t hs = frame_header.chroma_subsampling.HShift(c);
+      // The per-thread output is used for the first time here. Poison the temp
+      // image on this thread to prevent leaking initialized data from a
+      // previous run in this thread in msan builds.
+      msan::PoisonImage(dec_state->ycbcr_out_images[thread].Plane(c));
+      HWY_DYNAMIC_DISPATCH(DoYCbCrUpsampling)
+      (hs, vs, &input_image->Plane(c), rect_for_if_input,
+       frame_rect_for_ycbcr_upsampling, frame_dim,
+       &dec_state->ycbcr_out_images[thread].Plane(c), lf,
+       &dec_state->ycbcr_temp_images[thread]);
+    }
+    input = &dec_state->ycbcr_out_images[thread];
+  }
+
+  // Variables for upsampling and filtering.
+  Rect upsampled_frame_rect(frame_rect.x0() * frame_header.upsampling,
+                            frame_rect.y0() * frame_header.upsampling,
+                            frame_rect.xsize() * frame_header.upsampling,
+                            frame_rect.ysize() * frame_header.upsampling);
+  Rect full_frame_rect(0, 0, frame_dim.xsize_upsampled,
+                       frame_dim.ysize_upsampled);
+  upsampled_frame_rect = upsampled_frame_rect.Crop(full_frame_rect);
+  EnsurePaddingInPlaceRowByRow ensure_padding_upsampling;
+  ssize_t ensure_padding_upsampling_y0 = 0;
+  ssize_t ensure_padding_upsampling_y1 = 0;
+
+  EnsurePaddingInPlaceRowByRow ensure_padding_filter;
+  FilterPipeline* fp = nullptr;
+  ssize_t ensure_padding_filter_y0 = 0;
+  ssize_t ensure_padding_filter_y1 = 0;
+  if (lf.epf_iters != 0 || lf.gab) {
+    fp = &dec_state->filter_pipelines[thread];
+  }
+
+  // +----------------------------- STEP 2 ------------------------------+
+  // | Change rects and buffer to not use `output_image` if direct       |
+  // | output to rgb8 is requested.                                      |
+  // +-------------------------------------------------------------------+
+  Image3F* output_pixel_data_storage = output_color;
+  Rect upsampled_frame_rect_for_storage = upsampled_frame_rect;
+  if (dec_state->rgb_output || dec_state->pixel_callback) {
+    size_t log2_upsampling = CeilLog2Nonzero(frame_header.upsampling);
+    if (storage_for_if == output_color) {
+      storage_for_if =
+          &dec_state->output_pixel_data_storage[log2_upsampling][thread];
+      rect_for_if_storage =
+          Rect(0, 0, rect_for_if_storage.xsize(), rect_for_if_storage.ysize());
+    }
+    output_pixel_data_storage =
+        &dec_state->output_pixel_data_storage[log2_upsampling][thread];
+    upsampled_frame_rect_for_storage =
+        Rect(0, 0, upsampled_frame_rect.xsize(), upsampled_frame_rect.ysize());
+    if (frame_header.upsampling == 1 && fp == nullptr) {
+      upsampled_frame_rect_for_storage = rect_for_if_storage =
+          rect_for_if_input;
+      output_pixel_data_storage = storage_for_if = input;
+    }
+  }
+  // Set up alpha channel.
+  const size_t ec =
+      metadata.Find(ExtraChannel::kAlpha) - metadata.extra_channel_info.data();
+  const ImageF* alpha = nullptr;
+  Rect alpha_rect = upsampled_frame_rect;
+  if (ec < metadata.extra_channel_info.size()) {
+    JXL_ASSERT(ec < extra_channels.size());
+    if (frame_header.extra_channel_upsampling[ec] == 1) {
+      alpha = extra_channels[ec].first;
+      alpha_rect = extra_channels[ec].second;
+    } else {
+      alpha = &output_image->extra_channels()[ec];
+      alpha_rect = upsampled_frame_rect;
+    }
+  }
+
+  // +----------------------------- STEP 3 ------------------------------+
+  // | Set up upsampling and upsample extra channels.                    |
+  // +-------------------------------------------------------------------+
+  Upsampler* color_upsampler = nullptr;
+  if (frame_header.upsampling != 1) {
+    color_upsampler =
+        &dec_state->upsamplers[CeilLog2Nonzero(frame_header.upsampling) - 1];
+    ensure_padding_upsampling.Init(
+        storage_for_if, rect_for_upsampling, frame_rect, frame_dim.xsize_padded,
+        frame_dim.ysize_padded, 2, 2, &ensure_padding_upsampling_y0,
+        &ensure_padding_upsampling_y1);
+  }
+
+  std::vector<std::pair<ImageF*, Rect>> extra_channels_for_patches;
+  std::vector<EnsurePaddingInPlaceRowByRow> ec_padding;
+
+  bool late_ec_upsample = frame_header.upsampling != 1;
+  for (auto ecups : frame_header.extra_channel_upsampling) {
+    if (ecups != frame_header.upsampling) {
+      // If patches are applied, either frame_header.upsampling == 1 or
+      // late_ec_upsample is true.
+      late_ec_upsample = false;
+    }
+  }
+
+  ssize_t ensure_padding_upsampling_ec_y0 = 0;
+  ssize_t ensure_padding_upsampling_ec_y1 = 0;
+
+  // TODO(veluca) do not upsample extra channels to a full-image-sized buffer if
+  // we are not outputting to an ImageBundle.
+  if (!late_ec_upsample) {
+    // Upsample extra channels first if not all channels have the same
+    // upsampling factor.
+    for (size_t ec = 0; ec < extra_channels.size(); ec++) {
+      size_t ecups = frame_header.extra_channel_upsampling[ec];
+      if (ecups == 1) {
+        extra_channels_for_patches.push_back(extra_channels[ec]);
+        continue;
+      }
+      ssize_t ensure_padding_y0, ensure_padding_y1;
+      EnsurePaddingInPlaceRowByRow ensure_padding;
+      Rect ec_image_rect = ScaleRectForEC(frame_rect, frame_header, ec);
+      size_t ecxs = DivCeil(frame_dim.xsize_upsampled,
+                            frame_header.extra_channel_upsampling[ec]);
+      size_t ecys = DivCeil(frame_dim.ysize_upsampled,
+                            frame_header.extra_channel_upsampling[ec]);
+      ensure_padding.Init(extra_channels[ec].first, extra_channels[ec].second,
+                          ec_image_rect, ecxs, ecys, 2, 2, &ensure_padding_y0,
+                          &ensure_padding_y1);
+      for (ssize_t y = ensure_padding_y0; y < ensure_padding_y1; y++) {
+        ensure_padding.Process(y);
+      }
+      Upsampler& upsampler =
+          dec_state->upsamplers[CeilLog2Nonzero(
+                                    frame_header.extra_channel_upsampling[ec]) -
+                                1];
+      upsampler.UpsampleRect(
+          *extra_channels[ec].first, extra_channels[ec].second,
+          &output_image->extra_channels()[ec], upsampled_frame_rect,
+          static_cast<ssize_t>(ec_image_rect.y0()) -
+              static_cast<ssize_t>(extra_channels[ec].second.y0()),
+          ecys, dec_state->upsampler_storage[thread].get());
+      extra_channels_for_patches.emplace_back(
+          &output_image->extra_channels()[ec], upsampled_frame_rect);
+    }
+  } else {
+    // Upsample extra channels last if color channels are upsampled and all the
+    // extra channels have the same upsampling as them.
+    ec_padding.resize(extra_channels.size());
+    for (size_t ec = 0; ec < extra_channels.size(); ec++) {
+      // Add a border to the extra channel rect for when patches are applied.
+      // This ensures that the correct row is accessed (y values for patches are
+      // relative to rect_for_if, not to input_rect).
+      // As the rect is extended by 0 or 2 pixels, and the patches input has,
+      // accordingly, the same padding, this is safe.
+      Rect r(extra_channels[ec].second.x0() + rect_for_upsampling.x0() -
+                 rect_for_if_storage.x0(),
+             extra_channels[ec].second.y0() + rect_for_upsampling.y0() -
+                 rect_for_if_storage.y0(),
+             extra_channels[ec].second.xsize() + rect_for_if_storage.xsize() -
+                 rect_for_upsampling.xsize(),
+             extra_channels[ec].second.ysize() + rect_for_if_storage.ysize() -
+                 rect_for_upsampling.ysize());
+      extra_channels_for_patches.emplace_back(extra_channels[ec].first, r);
+      ec_padding[ec].Init(extra_channels[ec].first, extra_channels[ec].second,
+                          frame_rect, frame_dim.xsize, frame_dim.ysize, 2, 2,
+                          &ensure_padding_upsampling_ec_y0,
+                          &ensure_padding_upsampling_ec_y1);
+    }
+  }
+
+  // Initialized to a valid non-null ptr to avoid UB if arithmetic is done with
+  // the pointer value (which would then not be used).
+  std::vector<float*> ec_ptrs_for_patches(extra_channels.size(),
+                                          input->PlaneRow(0, 0));
+
+  // +----------------------------- STEP 4 ------------------------------+
+  // | Set up the filter pipeline.                                       |
+  // +-------------------------------------------------------------------+
+  if (fp) {
+    ensure_padding_filter.Init(
+        input, rect_for_if_input, rect_for_if, frame_dim.xsize_padded,
+        frame_dim.ysize_padded, lf.Padding(), lf.Padding(),
+        &ensure_padding_filter_y0, &ensure_padding_filter_y1);
+
+    fp = PrepareFilterPipeline(dec_state, rect_for_if, *input,
+                               rect_for_if_input, frame_dim.ysize_padded,
+                               thread, storage_for_if, rect_for_if_storage);
+  }
+
+  // +----------------------------- STEP 5 ------------------------------+
+  // | Run the prepared pipeline of operations.                          |
+  // +-------------------------------------------------------------------+
+
+  // y values are relative to rect_for_if.
+  // Automatic mirroring in fp->ApplyFiltersRow() implies that we should ensure
+  // that padding for the first lines of the image is already present before
+  // calling ApplyFiltersRow() with "virtual" rows.
+  // Here we rely on the fact that virtual rows at the beginning of the image
+  // are only present if input_rect.y0() == 0.
+  ssize_t first_ensure_padding_y = ensure_padding_filter_y0;
+  if (frame_rect.y0() == 0) {
+    JXL_DASSERT(ensure_padding_filter_y0 == 0);
+    first_ensure_padding_y =
+        std::min<ssize_t>(lf.Padding(), ensure_padding_filter_y1);
+    for (ssize_t y = 0; y < first_ensure_padding_y; y++) {
+      ensure_padding_filter.Process3(y);
+    }
+  }
+
+  for (ssize_t y = -lf.Padding();
+       y < static_cast<ssize_t>(lf.Padding() + rect_for_if.ysize()); y++) {
+    if (fp) {
+      if (y >= first_ensure_padding_y && y < ensure_padding_filter_y1) {
+        ensure_padding_filter.Process3(y);
+      }
+      fp->ApplyFiltersRow(lf, dec_state->filter_weights, y);
+    } else if (output_pixel_data_storage != input) {
+      for (size_t c = 0; c < 3; c++) {
+        memcpy(rect_for_if_storage.PlaneRow(storage_for_if, c, y),
+               rect_for_if_input.ConstPlaneRow(*input, c, y),
+               rect_for_if_input.xsize() * sizeof(float));
+      }
+    }
+    if (y < static_cast<ssize_t>(lf.Padding())) continue;
+    // At this point, row `y - lf.Padding()` of `rect_for_if` has been produced
+    // by the filters.
+    ssize_t available_y = y - lf.Padding();
+    if (frame_header.upsampling == 1) {
+      for (size_t i = 0; i < extra_channels.size(); i++) {
+        ec_ptrs_for_patches[i] = extra_channels_for_patches[i].second.Row(
+            extra_channels_for_patches[i].first, available_y);
+      }
+    }
+    JXL_RETURN_IF_ERROR(image_features.patches.AddTo(
+        storage_for_if, rect_for_if_storage.Line(available_y),
+        ec_ptrs_for_patches.data(), rect_for_if.Line(available_y)));
+    JXL_RETURN_IF_ERROR(image_features.splines.AddTo(
+        storage_for_if, rect_for_if_storage.Line(available_y),
+        rect_for_if.Line(available_y), dec_state->shared->cmap));
+    size_t num_ys = 1;
+    if (frame_header.upsampling != 1) {
+      // Upsampling `y` values are relative to `rect_for_upsampling`, not to
+      // `rect_for_if`.
+      ssize_t shifted_y = available_y - extra_rows_t;
+      if (shifted_y >= ensure_padding_upsampling_y0 &&
+          shifted_y < ensure_padding_upsampling_y1) {
+        ensure_padding_upsampling.Process3(shifted_y);
+      }
+      if (late_ec_upsample && shifted_y >= ensure_padding_upsampling_ec_y0 &&
+          shifted_y < ensure_padding_upsampling_ec_y1) {
+        for (size_t ec = 0; ec < extra_channels.size(); ec++) {
+          ec_padding[ec].Process(shifted_y);
+        }
+      }
+      // Upsampling will access two rows of border, so the first upsampling
+      // output will be available after shifted_y is at least 2, *unless* image
+      // height is <= 2.
+      if (shifted_y < 2 &&
+          shifted_y + 1 != static_cast<ssize_t>(frame_rect.ysize())) {
+        continue;
+      }
+      // Value relative to upsampled_frame_rect.
+      size_t input_y = std::max<ssize_t>(shifted_y - 2, 0);
+      size_t upsampled_available_y = frame_header.upsampling * input_y;
+      size_t num_input_rows = 1;
+      // If we are going to mirror the last output rows, then we already have 3
+      // input lines ready. This happens iff we did not extend rect_for_if on
+      // the bottom *and* we are at the last `y` value.
+      if (extra_rows_b != 2 &&
+          static_cast<size_t>(y) + 1 == lf.Padding() + rect_for_if.ysize()) {
+        num_input_rows = 3;
+      }
+      num_input_rows = std::min(num_input_rows, frame_dim.ysize_padded);
+      num_ys = num_input_rows * frame_header.upsampling;
+
+      if (static_cast<size_t>(upsampled_available_y) >=
+          upsampled_frame_rect.ysize()) {
+        continue;
+      }
+
+      if (upsampled_available_y + num_ys >= upsampled_frame_rect.ysize()) {
+        num_ys = upsampled_frame_rect.ysize() - upsampled_available_y;
+      }
+
+      // Upsampler takes care of mirroring, and checks "physical" boundaries.
+      Rect upsample_input_rect = rect_for_upsampling.Lines(input_y, 1);
+      color_upsampler->UpsampleRect(
+          *storage_for_if, upsample_input_rect, output_pixel_data_storage,
+          upsampled_frame_rect_for_storage.Lines(upsampled_available_y, num_ys),
+          static_cast<ssize_t>(frame_rect.y0()) -
+              static_cast<ssize_t>(rect_for_upsampling.y0()),
+          frame_dim.ysize_padded, dec_state->upsampler_storage[thread].get());
+      if (late_ec_upsample) {
+        for (size_t ec = 0; ec < extra_channels.size(); ec++) {
+          // Upsampler takes care of mirroring, and checks "physical"
+          // boundaries.
+          Rect upsample_ec_input_rect =
+              extra_channels[ec].second.Lines(input_y, 1);
+          color_upsampler->UpsampleRect(
+              *extra_channels[ec].first, upsample_ec_input_rect,
+              &output_image->extra_channels()[ec],
+              upsampled_frame_rect.Lines(upsampled_available_y, num_ys),
+              static_cast<ssize_t>(frame_rect.y0()) -
+                  static_cast<ssize_t>(extra_channels[ec].second.y0()),
+              frame_dim.ysize, dec_state->upsampler_storage[thread].get());
+        }
+      }
+      available_y = upsampled_available_y;
+    }
+
+    if (static_cast<size_t>(available_y) >= upsampled_frame_rect.ysize()) {
+      continue;
+    }
+
+    // The image data is now unconditionally in
+    // `output_image_storage:upsampled_frame_rect_for_storage`.
+    if (frame_header.flags & FrameHeader::kNoise) {
+      PROFILER_ZONE("AddNoise");
+      AddNoise(image_features.noise_params,
+               upsampled_frame_rect.Lines(available_y, num_ys),
+               dec_state->noise,
+               upsampled_frame_rect_for_storage.Lines(available_y, num_ys),
+               dec_state->shared_storage.cmap, output_pixel_data_storage);
+    }
+
+    if (dec_state->pre_color_transform_frame.xsize() != 0) {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t y = available_y; y < available_y + num_ys; y++) {
+          float* JXL_RESTRICT row_out = upsampled_frame_rect.PlaneRow(
+              &dec_state->pre_color_transform_frame, c, y);
+          const float* JXL_RESTRICT row_in =
+              upsampled_frame_rect_for_storage.ConstPlaneRow(
+                  *output_pixel_data_storage, c, y);
+          memcpy(row_out, row_in,
+                 upsampled_frame_rect.xsize() * sizeof(*row_in));
+        }
+      }
+    }
+
+    // We skip the color transform entirely if save_before_color_transform and
+    // the frame is not supposed to be displayed.
+
+    if (dec_state->fast_xyb_srgb8_conversion) {
+      FastXYBTosRGB8(
+          *output_pixel_data_storage,
+          upsampled_frame_rect_for_storage.Lines(available_y, num_ys),
+          upsampled_frame_rect.Lines(available_y, num_ys)
+              .Crop(Rect(0, 0, frame_dim.xsize, frame_dim.ysize)),
+          alpha, alpha_rect.Lines(available_y, num_ys),
+          dec_state->rgb_output_is_rgba, dec_state->rgb_output, frame_dim.xsize,
+          dec_state->rgb_stride);
+    } else {
+      if (frame_header.needs_color_transform()) {
+        if (frame_header.color_transform == ColorTransform::kXYB) {
+          JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(UndoXYBInPlace)(
+              output_pixel_data_storage,
+              upsampled_frame_rect_for_storage.Lines(available_y, num_ys),
+              dec_state->output_encoding_info));
+        } else if (frame_header.color_transform == ColorTransform::kYCbCr) {
+          YcbcrToRgb(
+              *output_pixel_data_storage, output_pixel_data_storage,
+              upsampled_frame_rect_for_storage.Lines(available_y, num_ys));
+        }
+      }
+
+      // TODO(veluca): all blending should happen here.
+
+      if (dec_state->rgb_output != nullptr) {
+        HWY_DYNAMIC_DISPATCH(FloatToRGBA8)
+        (*output_pixel_data_storage,
+         upsampled_frame_rect_for_storage.Lines(available_y, num_ys),
+         dec_state->rgb_output_is_rgba, alpha,
+         alpha_rect.Lines(available_y, num_ys),
+         upsampled_frame_rect.Lines(available_y, num_ys)
+             .Crop(Rect(0, 0, frame_dim.xsize, frame_dim.ysize)),
+         dec_state->rgb_output, dec_state->rgb_stride);
+      }
+      if (dec_state->pixel_callback != nullptr) {
+        Rect alpha_line_rect = alpha_rect.Lines(available_y, num_ys);
+        Rect color_input_line_rect =
+            upsampled_frame_rect_for_storage.Lines(available_y, num_ys);
+        Rect image_line_rect =
+            upsampled_frame_rect.Lines(available_y, num_ys)
+                .Crop(Rect(0, 0, frame_dim.xsize, frame_dim.ysize));
+        const float* line_buffers[4];
+        for (size_t iy = 0; iy < image_line_rect.ysize(); iy++) {
+          for (size_t c = 0; c < 3; c++) {
+            line_buffers[c] = color_input_line_rect.ConstPlaneRow(
+                *output_pixel_data_storage, c, iy);
+          }
+          if (alpha) {
+            line_buffers[3] = alpha_line_rect.ConstRow(*alpha, iy);
+          } else {
+            line_buffers[3] = dec_state->opaque_alpha.data();
+          }
+          std::vector<float>& interleaved =
+              dec_state->pixel_callback_rows[thread];
+          size_t j = 0;
+          for (size_t i = 0; i < image_line_rect.xsize(); i++) {
+            interleaved[j++] = line_buffers[0][i];
+            interleaved[j++] = line_buffers[1][i];
+            interleaved[j++] = line_buffers[2][i];
+            if (dec_state->rgb_output_is_rgba) {
+              interleaved[j++] = line_buffers[3][i];
+            }
+          }
+          dec_state->pixel_callback(interleaved.data(), image_line_rect.x0(),
+                                    image_line_rect.y0() + iy,
+                                    image_line_rect.xsize());
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+Status FinalizeFrameDecoding(ImageBundle* decoded,
+                             PassesDecoderState* dec_state, ThreadPool* pool,
+                             bool force_fir, bool skip_blending) {
+  const FrameHeader& frame_header = dec_state->shared->frame_header;
+  const FrameDimensions& frame_dim = dec_state->shared->frame_dim;
+
+  // FinalizeImageRect was not yet run, or we are forcing a run.
+  if (!dec_state->EagerFinalizeImageRect() || force_fir) {
+    std::vector<Rect> rects_to_process;
+    for (size_t y = 0; y < frame_dim.ysize_padded; y += kGroupDim) {
+      for (size_t x = 0; x < frame_dim.xsize_padded; x += kGroupDim) {
+        Rect rect(x, y, kGroupDim, kGroupDim, frame_dim.xsize_padded,
+                  frame_dim.ysize_padded);
+        if (rect.xsize() == 0 || rect.ysize() == 0) continue;
+        rects_to_process.push_back(rect);
+      }
+    }
+    const auto allocate_storage = [&](size_t num_threads) {
+      dec_state->EnsureStorage(num_threads);
+      return true;
+    };
+
+    {
+      std::vector<ImageF> ecs;
+      const ImageMetadata& metadata = frame_header.nonserialized_metadata->m;
+      for (size_t i = 0; i < metadata.num_extra_channels; i++) {
+        if (frame_header.extra_channel_upsampling[i] == 1) {
+          ecs.push_back(std::move(dec_state->extra_channels[i]));
+        } else {
+          ecs.emplace_back(frame_dim.xsize_upsampled_padded,
+                           frame_dim.ysize_upsampled_padded);
+        }
+      }
+      decoded->SetExtraChannels(std::move(ecs));
+    }
+
+    std::atomic<bool> apply_features_ok{true};
+    auto run_apply_features = [&](size_t rect_id, size_t thread) {
+      size_t xstart = PassesDecoderState::kGroupDataXBorder;
+      size_t ystart = PassesDecoderState::kGroupDataYBorder;
+      for (size_t c = 0; c < 3; c++) {
+        Rect rh(rects_to_process[rect_id].x0() >>
+                    frame_header.chroma_subsampling.HShift(c),
+                rects_to_process[rect_id].y0() >>
+                    frame_header.chroma_subsampling.VShift(c),
+                rects_to_process[rect_id].xsize() >>
+                    frame_header.chroma_subsampling.HShift(c),
+                rects_to_process[rect_id].ysize() >>
+                    frame_header.chroma_subsampling.VShift(c));
+        Rect group_data_rect(xstart, ystart, rh.xsize(), rh.ysize());
+        // Poison the image in this thread to prevent leaking initialized data
+        // from a previous run in this thread in msan builds.
+        msan::PoisonImage(dec_state->group_data[thread].Plane(c));
+        CopyImageToWithPadding(
+            rh, dec_state->decoded.Plane(c), dec_state->FinalizeRectPadding(),
+            group_data_rect, &dec_state->group_data[thread].Plane(c));
+      }
+      Rect group_data_rect(xstart, ystart, rects_to_process[rect_id].xsize(),
+                           rects_to_process[rect_id].ysize());
+      std::vector<std::pair<ImageF*, Rect>> ec_rects;
+      ec_rects.reserve(decoded->extra_channels().size());
+      for (size_t i = 0; i < decoded->extra_channels().size(); i++) {
+        Rect r = ScaleRectForEC(rects_to_process[rect_id], frame_header, i);
+        if (frame_header.extra_channel_upsampling[i] != 1) {
+          Rect ec_input_rect(kBlockDim, 2, r.xsize(), r.ysize());
+          auto eti =
+              &dec_state
+                   ->ec_temp_images[thread * decoded->extra_channels().size() +
+                                    i];
+          // Poison the temp image on this thread to prevent leaking initialized
+          // data from a previous run in this thread in msan builds.
+          msan::PoisonImage(*eti);
+          CopyImageToWithPadding(r, dec_state->extra_channels[i],
+                                 /*padding=*/2, ec_input_rect, eti);
+          ec_rects.emplace_back(eti, ec_input_rect);
+        } else {
+          ec_rects.emplace_back(&decoded->extra_channels()[i], r);
+        }
+      }
+      if (!FinalizeImageRect(&dec_state->group_data[thread], group_data_rect,
+                             ec_rects, dec_state, thread, decoded,
+                             rects_to_process[rect_id])) {
+        apply_features_ok = false;
+      }
+    };
+
+    RunOnPool(pool, 0, rects_to_process.size(), allocate_storage,
+              run_apply_features, "ApplyFeatures");
+
+    if (!apply_features_ok) {
+      return JXL_FAILURE("FinalizeImageRect failed");
+    }
+  }
+
+  const size_t xsize = frame_dim.xsize_upsampled;
+  const size_t ysize = frame_dim.ysize_upsampled;
+
+  decoded->ShrinkTo(xsize, ysize);
+  if (dec_state->pre_color_transform_frame.xsize() != 0) {
+    dec_state->pre_color_transform_frame.ShrinkTo(xsize, ysize);
+  }
+
+  if (!skip_blending && ImageBlender::NeedsBlending(dec_state)) {
+    if (dec_state->pre_color_transform_frame.xsize() != 0) {
+      // Extra channels are going to be modified. Make a copy.
+      dec_state->pre_color_transform_ec.clear();
+      for (const auto& ec : decoded->extra_channels()) {
+        dec_state->pre_color_transform_ec.emplace_back(CopyImage(ec));
+      }
+    }
+    ImageBlender blender;
+    ImageBundle foreground = std::move(*decoded);
+    decoded->SetFromImage(Image3F(frame_header.nonserialized_metadata->xsize(),
+                                  frame_header.nonserialized_metadata->ysize()),
+                          foreground.c_current());
+    std::vector<Rect> extra_channels_rects;
+    decoded->extra_channels().reserve(foreground.extra_channels().size());
+    extra_channels_rects.reserve(foreground.extra_channels().size());
+    for (size_t i = 0; i < foreground.extra_channels().size(); ++i) {
+      decoded->extra_channels().emplace_back(
+          frame_header.nonserialized_metadata->xsize(),
+          frame_header.nonserialized_metadata->ysize());
+      extra_channels_rects.emplace_back(decoded->extra_channels().back());
+    }
+    JXL_RETURN_IF_ERROR(blender.PrepareBlending(
+        dec_state, foreground.origin, foreground.xsize(), foreground.ysize(),
+        &frame_header.nonserialized_metadata->m.extra_channel_info,
+        foreground.c_current(), Rect(*decoded->color()),
+        /*output=*/decoded->color(), Rect(*decoded->color()),
+        &decoded->extra_channels(), std::move(extra_channels_rects)));
+
+    std::vector<Rect> rects_to_process;
+    for (size_t y = 0; y < frame_dim.ysize; y += kGroupDim) {
+      for (size_t x = 0; x < frame_dim.xsize; x += kGroupDim) {
+        Rect rect(x, y, kGroupDim, kGroupDim, frame_dim.xsize, frame_dim.ysize);
+        if (rect.xsize() == 0 || rect.ysize() == 0) continue;
+        rects_to_process.push_back(rect);
+      }
+    }
+
+    std::atomic<bool> blending_ok{true};
+    JXL_RETURN_IF_ERROR(RunOnPool(
+        pool, 0, rects_to_process.size(), ThreadPool::SkipInit(),
+        [&](size_t i, size_t /*thread*/) {
+          const Rect& rect = rects_to_process[i];
+          auto rect_blender = blender.PrepareRect(
+              rect, *foreground.color(), foreground.extra_channels(), rect);
+          for (size_t y = 0; y < rect.ysize(); ++y) {
+            if (!rect_blender.DoBlending(y)) {
+              blending_ok = false;
+              return;
+            }
+          }
+        },
+        "Blend"));
+    JXL_RETURN_IF_ERROR(blending_ok.load());
+  }
+
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.h
new file mode 100644
index 0000000000..4fa9179b37
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_reconstruct.h
@@ -0,0 +1,69 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_RECONSTRUCT_H_
+#define LIB_JXL_DEC_RECONSTRUCT_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+// Finalizes the decoding of a frame by applying image features if necessary,
+// doing color transforms (unless the frame header specifies
+// `SaveBeforeColorTransform()`) and applying upsampling.
+//
+// Writes pixels in the appropriate colorspace to `idct`, shrinking it if
+// necessary.
+// `skip_blending` is necessary because the encoder butteraugli loop does not
+// (yet) handle blending.
+// TODO(veluca): remove the "force_fir" parameter, and call EPF directly in
+// those use cases where this is needed.
+Status FinalizeFrameDecoding(ImageBundle* JXL_RESTRICT decoded,
+                             PassesDecoderState* dec_state, ThreadPool* pool,
+                             bool force_fir, bool skip_blending);
+
+// Renders the `frame_rect` portion of the final image to `output_image`
+// (unless the frame is upsampled - in which case, `frame_rect` is scaled
+// accordingly). `input_rect` should have the same shape. `input_rect` always
+// refers to the non-padded pixels. `frame_rect.x0()` is guaranteed to be a
+// multiple of GroupBorderAssigner::kPaddingRoundX. `frame_rect.xsize()` is
+// either a multiple of GroupBorderAssigner::kPaddingRoundX, or is such that
+// `frame_rect.x0() + frame_rect.xsize() == frame_dim.xsize`. `input_image`
+// may be mutated by adding padding. If `frame_rect` is on an image border, the
+// input will be padded. Otherwise, appropriate padding must already be present.
+Status FinalizeImageRect(
+    Image3F* input_image, const Rect& input_rect,
+    const std::vector<std::pair<ImageF*, Rect>>& extra_channels,
+    PassesDecoderState* dec_state, size_t thread,
+    ImageBundle* JXL_RESTRICT output_image, const Rect& frame_rect);
+
+// Fills padding around `img:rect` in the x direction by mirroring. Padding is
+// applied so that a full border of xpadding and ypadding is available, except
+// if `image_rect` points to an area of the full image that touches the top or
+// the bottom. It is expected that padding is already in place for inputs such
+// that the corresponding image_rect is not at an image border.
+void EnsurePaddingInPlace(Image3F* img, const Rect& rect,
+                          const Rect& image_rect, size_t image_xsize,
+                          size_t image_ysize, size_t xpadding, size_t ypadding);
+
+// For DC in the API.
+void UndoXYB(const Image3F& src, Image3F* dst,
+             const OutputEncodingInfo& output_info, ThreadPool* pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_RECONSTRUCT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_render_pipeline.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_render_pipeline.h
new file mode 100644
index 0000000000..9496770a6d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_render_pipeline.h
@@ -0,0 +1,91 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_RENDER_PIPELINE_H_
+#define LIB_JXL_DEC_RENDER_PIPELINE_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/filters.h"
+
+namespace jxl {
+
+// The first pixel in the input to RenderPipelineStage will be located at
+// this position. Pixels before this position may be accessed as padding.
+constexpr size_t kRenderPipelineXOffset = 16;
+
+enum class RenderPipelineChannelMode {
+  kIgnored = 0,
+  kInPlace = 1,
+  kInOut = 2,
+};
+
+class RenderPipelineStage {
+ public:
+  // `input` points to `2*MaxPaddingY() + 1` pointers, each of which points to
+  // `3+num_non_color_channels` pointer-to-row. So, `input[MaxPaddingY()][0]` is
+  // the pointer to the center row of the first color channel.
+  //  `MaxPaddingY()` is the maximum value returned by `GetPaddingX()`;
+  //  typically, this is a constant.
+  // `output` points to `1<<MaxShiftY()` pointers, each of which points to
+  // `3+num_non_color_channels` pointer-to-row. So, `output[0][3]` is the
+  //  pointer to the top row of the first non-color channel.
+  //  `MaxShiftY()` is defined similarly to `MaxPaddingY()`.
+  //  `xsize` represents the total number of pixels to be processed in the input
+  //  row. `xpos` and `ypos` represent the position of the first pixel in the
+  //  center row in the input
+  virtual void ProcessRow(float* JXL_RESTRICT** input,
+                          float* JXL_RESTRICT** output, size_t xsize,
+                          size_t xpos, size_t ypos) const = 0;
+  virtual ~RenderPipelineStage() {}
+
+  // Amount of padding required by each channel in the various directions.
+  // The value for c=0 indicates padding required for color channels, subsequent
+  // values refer to padding for non-color channels, in order.
+  virtual size_t GetPaddingX(size_t c) const = 0;
+  virtual size_t GetPaddingY(size_t c) const = 0;
+
+  // Log2 of the number of columns/rows of output that this stage will produce
+  // for the given channel.
+  virtual size_t ShiftX(size_t c) const = 0;
+  virtual size_t ShiftY(size_t c) const = 0;
+
+  // How each channel will be processed. If this method returns kIgnored or
+  // kInPlace for a given channel, then the corresponding pointer-to-row values
+  // in the output of ProcessRow will be null for that channel, and
+  // `GetPaddingX`, `GetPaddingY`, `ShiftX` and `ShiftY` for that channel must
+  // return 0.
+  virtual RenderPipelineChannelMode GetChannelMode(size_t c) const = 0;
+};
+
+class RenderPipeline {
+ public:
+  // Initial shifts for the channels (following the same convention as
+  // RenderPipelineStage for naming the channels).
+  void Init(const std::vector<std::pair<size_t, size_t>>& channel_shifts) {
+    JXL_ABORT("Not implemented");
+  }
+
+  // Adds a stage to the pipeline. The shifts for all the channels that are not
+  // kIgnored by the stage must be identical at this point.
+  void AddStage(std::unique_ptr<RenderPipelineStage> stage) {
+    JXL_ABORT("Not implemented");
+  }
+
+  // Finalizes setup of the pipeline. Shifts for all channels should be 0 at
+  // this point.
+  void Finalize() { JXL_ABORT("Not implemented"); }
+
+  // Allocates storage to run with `num` threads.
+  void PrepareForThreads(size_t num) { JXL_ABORT("Not implemented"); }
+
+  // TBD: run the pipeline for a given input, on a given thread.
+  // void Run(Image3F* color_data, ImageF* ec_data, const Rect& input_rect,
+  // size_t thread, size_t xpos, size_t ypos) {}
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_RENDER_PIPELINE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_transforms-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_transforms-inl.h
new file mode 100644
index 0000000000..c9aebc6b99
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_transforms-inl.h
@@ -0,0 +1,867 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
+#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
+#else
+#define LIB_JXL_DEC_TRANSFORMS_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/dct_scales.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+template <size_t ROWS, size_t COLS>
+struct DoDCT {
+  template <typename From>
+  void operator()(const From& from, float* JXL_RESTRICT to,
+                  float* JXL_RESTRICT scratch_space) {
+    ComputeScaledDCT<ROWS, COLS>()(from, to, scratch_space);
+  }
+};
+
+template <size_t N>
+struct DoDCT<N, N> {
+  template <typename From>
+  void operator()(const From& from, float* JXL_RESTRICT to,
+                  float* JXL_RESTRICT scratch_space) {
+    ComputeTransposedScaledDCT<N>()(from, to, scratch_space);
+  }
+};
+
+// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
+// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
+// input block.
+template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
+          size_t ROWS, size_t COLS>
+JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
+                                  float* output, const size_t output_stride) {
+  static_assert(LF_ROWS == ROWS,
+                "ReinterpretingDCT should only be called with LF == N");
+  static_assert(LF_COLS == COLS,
+                "ReinterpretingDCT should only be called with LF == N");
+  HWY_ALIGN float block[ROWS * COLS];
+
+  // ROWS, COLS <= 8, so we can put scratch space on the stack.
+  HWY_ALIGN float scratch_space[ROWS * COLS];
+  DoDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, scratch_space);
+  if (ROWS < COLS) {
+    for (size_t y = 0; y < LF_ROWS; y++) {
+      for (size_t x = 0; x < LF_COLS; x++) {
+        output[y * output_stride + x] =
+            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
+            DCTTotalResampleScale<COLS, DCT_COLS>(x);
+      }
+    }
+  } else {
+    for (size_t y = 0; y < LF_COLS; y++) {
+      for (size_t x = 0; x < LF_ROWS; x++) {
+        output[y * output_stride + x] =
+            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
+            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
+      }
+    }
+  }
+}
+
+template <size_t S>
+void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
+  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
+  static_assert(S % 2 == 0, "S should be even");
+  float temp[kDCTBlockSize];
+  constexpr size_t num_2x2 = S / 2;
+  for (size_t y = 0; y < num_2x2; y++) {
+    for (size_t x = 0; x < num_2x2; x++) {
+      float c00 = block[y * kBlockDim + x];
+      float c01 = block[y * kBlockDim + num_2x2 + x];
+      float c10 = block[(y + num_2x2) * kBlockDim + x];
+      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
+      float r00 = c00 + c01 + c10 + c11;
+      float r01 = c00 + c01 - c10 - c11;
+      float r10 = c00 - c01 + c10 - c11;
+      float r11 = c00 - c01 - c10 + c11;
+      temp[y * 2 * kBlockDim + x * 2] = r00;
+      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
+      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
+      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
+    }
+  }
+  for (size_t y = 0; y < S; y++) {
+    for (size_t x = 0; x < S; x++) {
+      out[y * stride_out + x] = temp[y * kBlockDim + x];
+    }
+  }
+}
+
+void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
+  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
+      {
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+          0.25,
+      },
+      {
+          0.876902929799142f,
+          0.2206518106944235f,
+          -0.10140050393753763f,
+          -0.1014005039375375f,
+          0.2206518106944236f,
+          -0.10140050393753777f,
+          -0.10140050393753772f,
+          -0.10140050393753763f,
+          -0.10140050393753758f,
+          -0.10140050393753769f,
+          -0.1014005039375375f,
+          -0.10140050393753768f,
+          -0.10140050393753768f,
+          -0.10140050393753759f,
+          -0.10140050393753763f,
+          -0.10140050393753741f,
+      },
+      {
+          0.0,
+          0.0,
+          0.40670075830260755f,
+          0.44444816619734445f,
+          0.0,
+          0.0,
+          0.19574399372042936f,
+          0.2929100136981264f,
+          -0.40670075830260716f,
+          -0.19574399372042872f,
+          0.0,
+          0.11379074460448091f,
+          -0.44444816619734384f,
+          -0.29291001369812636f,
+          -0.1137907446044814f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          -0.21255748058288748f,
+          0.3085497062849767f,
+          0.0,
+          0.4706702258572536f,
+          -0.1621205195722993f,
+          0.0,
+          -0.21255748058287047f,
+          -0.16212051957228327f,
+          -0.47067022585725277f,
+          -0.1464291867126764f,
+          0.3085497062849487f,
+          0.0,
+          -0.14642918671266536f,
+          0.4251149611657548f,
+      },
+      {
+          0.0,
+          -0.7071067811865474f,
+          0.0,
+          0.0,
+          0.7071067811865476f,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+      },
+      {
+          -0.4105377591765233f,
+          0.6235485373547691f,
+          -0.06435071657946274f,
+          -0.06435071657946266f,
+          0.6235485373547694f,
+          -0.06435071657946284f,
+          -0.0643507165794628f,
+          -0.06435071657946274f,
+          -0.06435071657946272f,
+          -0.06435071657946279f,
+          -0.06435071657946266f,
+          -0.06435071657946277f,
+          -0.06435071657946277f,
+          -0.06435071657946273f,
+          -0.06435071657946274f,
+          -0.0643507165794626f,
+      },
+      {
+          0.0,
+          0.0,
+          -0.4517556589999482f,
+          0.15854503551840063f,
+          0.0,
+          -0.04038515160822202f,
+          0.0074182263792423875f,
+          0.39351034269210167f,
+          -0.45175565899994635f,
+          0.007418226379244351f,
+          0.1107416575309343f,
+          0.08298163094882051f,
+          0.15854503551839705f,
+          0.3935103426921022f,
+          0.0829816309488214f,
+          -0.45175565899994796f,
+      },
+      {
+          0.0,
+          0.0,
+          -0.304684750724869f,
+          0.5112616136591823f,
+          0.0,
+          0.0,
+          -0.290480129728998f,
+          -0.06578701549142804f,
+          0.304684750724884f,
+          0.2904801297290076f,
+          0.0,
+          -0.23889773523344604f,
+          -0.5112616136592012f,
+          0.06578701549142545f,
+          0.23889773523345467f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          0.3017929516615495f,
+          0.25792362796341184f,
+          0.0,
+          0.16272340142866204f,
+          0.09520022653475037f,
+          0.0,
+          0.3017929516615503f,
+          0.09520022653475055f,
+          -0.16272340142866173f,
+          -0.35312385449816297f,
+          0.25792362796341295f,
+          0.0,
+          -0.3531238544981624f,
+          -0.6035859033230976f,
+      },
+      {
+          0.0,
+          0.0,
+          0.40824829046386274f,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          -0.4082482904638628f,
+          -0.4082482904638635f,
+          0.0,
+          0.0,
+          -0.40824829046386296f,
+          0.0,
+          0.4082482904638634f,
+          0.408248290463863f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          0.1747866975480809f,
+          0.0812611176717539f,
+          0.0,
+          0.0,
+          -0.3675398009862027f,
+          -0.307882213957909f,
+          -0.17478669754808135f,
+          0.3675398009862011f,
+          0.0,
+          0.4826689115059883f,
+          -0.08126111767175039f,
+          0.30788221395790305f,
+          -0.48266891150598584f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          -0.21105601049335784f,
+          0.18567180916109802f,
+          0.0,
+          0.0,
+          0.49215859013738733f,
+          -0.38525013709251915f,
+          0.21105601049335806f,
+          -0.49215859013738905f,
+          0.0,
+          0.17419412659916217f,
+          -0.18567180916109904f,
+          0.3852501370925211f,
+          -0.1741941265991621f,
+          0.0,
+      },
+      {
+          0.0,
+          0.0,
+          -0.14266084808807264f,
+          -0.3416446842253372f,
+          0.0,
+          0.7367497537172237f,
+          0.24627107722075148f,
+          -0.08574019035519306f,
+          -0.14266084808807344f,
+          0.24627107722075137f,
+          0.14883399227113567f,
+          -0.04768680350229251f,
+          -0.3416446842253373f,
+          -0.08574019035519267f,
+          -0.047686803502292804f,
+          -0.14266084808807242f,
+      },
+      {
+          0.0,
+          0.0,
+          -0.13813540350758585f,
+          0.3302282550303788f,
+          0.0,
+          0.08755115000587084f,
+          -0.07946706605909573f,
+          -0.4613374887461511f,
+          -0.13813540350758294f,
+          -0.07946706605910261f,
+          0.49724647109535086f,
+          0.12538059448563663f,
+          0.3302282550303805f,
+          -0.4613374887461554f,
+          0.12538059448564315f,
+          -0.13813540350758452f,
+      },
+      {
+          0.0,
+          0.0,
+          -0.17437602599651067f,
+          0.0702790691196284f,
+          0.0,
+          -0.2921026642334881f,
+          0.3623817333531167f,
+          0.0,
+          -0.1743760259965108f,
+          0.36238173335311646f,
+          0.29210266423348785f,
+          -0.4326608024727445f,
+          0.07027906911962818f,
+          0.0,
+          -0.4326608024727457f,
+          0.34875205199302267f,
+      },
+      {
+          0.0,
+          0.0,
+          0.11354987314994337f,
+          -0.07417504595810355f,
+          0.0,
+          0.19402893032594343f,
+          -0.435190496523228f,
+          0.21918684838857466f,
+          0.11354987314994257f,
+          -0.4351904965232251f,
+          0.5550443808910661f,
+          -0.25468277124066463f,
+          -0.07417504595810233f,
+          0.2191868483885728f,
+          -0.25468277124066413f,
+          0.1135498731499429f,
+      },
+  };
+
+  const HWY_CAPPED(float, 16) d;
+  for (size_t i = 0; i < 16; i += Lanes(d)) {
+    auto pixel = Zero(d);
+    for (size_t j = 0; j < 16; j++) {
+      auto cf = Set(d, coeffs[j]);
+      auto basis = Load(d, k4x4AFVBasis[j] + i);
+      pixel = MulAdd(cf, basis, pixel);
+    }
+    Store(pixel, d, pixels + i);
+  }
+}
+
+template <size_t afv_kind>
+void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
+                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
+  HWY_ALIGN float scratch_space[4 * 8];
+  size_t afv_x = afv_kind & 1;
+  size_t afv_y = afv_kind / 2;
+  float dcs[3] = {};
+  float block00 = coefficients[0];
+  float block01 = coefficients[1];
+  float block10 = coefficients[8];
+  dcs[0] = (block00 + block10 + block01) * 4.0f;
+  dcs[1] = (block00 + block10 - block01);
+  dcs[2] = block00 - block10;
+  // IAFV: (even, even) positions.
+  HWY_ALIGN float coeff[4 * 4];
+  coeff[0] = dcs[0];
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      if (ix == 0 && iy == 0) continue;
+      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
+    }
+  }
+  HWY_ALIGN float block[4 * 8];
+  AFVIDCT4x4(coeff, block);
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
+          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
+    }
+  }
+  // IDCT4x4 in (odd, even) positions.
+  block[0] = dcs[1];
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      if (ix == 0 && iy == 0) continue;
+      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
+    }
+  }
+  ComputeTransposedScaledIDCT<4>()(
+      block,
+      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
+            pixels_stride),
+      scratch_space);
+  // IDCT4x8.
+  block[0] = dcs[2];
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 8; ix++) {
+      if (ix == 0 && iy == 0) continue;
+      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
+    }
+  }
+  ComputeScaledIDCT<4, 8>()(
+      block,
+      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
+      scratch_space);
+}
+
+HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategy::Type strategy,
+                                        float* JXL_RESTRICT coefficients,
+                                        float* JXL_RESTRICT pixels,
+                                        size_t pixels_stride,
+                                        float* scratch_space) {
+  using Type = AcStrategy::Type;
+  switch (strategy) {
+    case Type::IDENTITY: {
+      PROFILER_ZONE("IDCT Identity");
+      float dcs[4] = {};
+      float block00 = coefficients[0];
+      float block01 = coefficients[1];
+      float block10 = coefficients[8];
+      float block11 = coefficients[9];
+      dcs[0] = block00 + block01 + block10 + block11;
+      dcs[1] = block00 + block01 - block10 - block11;
+      dcs[2] = block00 - block01 + block10 - block11;
+      dcs[3] = block00 - block01 - block10 + block11;
+      for (size_t y = 0; y < 2; y++) {
+        for (size_t x = 0; x < 2; x++) {
+          float block_dc = dcs[y * 2 + x];
+          float residual_sum = 0;
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              if (ix == 0 && iy == 0) continue;
+              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
+            }
+          }
+          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
+              block_dc - residual_sum * (1.0f / 16);
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              if (ix == 1 && iy == 1) continue;
+              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
+                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
+                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
+            }
+          }
+          pixels[y * 4 * pixels_stride + x * 4] =
+              coefficients[(y + 2) * 8 + x + 2] +
+              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
+        }
+      }
+      break;
+    }
+    case Type::DCT8X4: {
+      PROFILER_ZONE("IDCT 8x4");
+      float dcs[2] = {};
+      float block0 = coefficients[0];
+      float block1 = coefficients[8];
+      dcs[0] = block0 + block1;
+      dcs[1] = block0 - block1;
+      for (size_t x = 0; x < 2; x++) {
+        HWY_ALIGN float block[4 * 8];
+        block[0] = dcs[x];
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 8; ix++) {
+            if (ix == 0 && iy == 0) continue;
+            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
+          }
+        }
+        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
+                                  scratch_space);
+      }
+      break;
+    }
+    case Type::DCT4X8: {
+      PROFILER_ZONE("IDCT 4x8");
+      float dcs[2] = {};
+      float block0 = coefficients[0];
+      float block1 = coefficients[8];
+      dcs[0] = block0 + block1;
+      dcs[1] = block0 - block1;
+      for (size_t y = 0; y < 2; y++) {
+        HWY_ALIGN float block[4 * 8];
+        block[0] = dcs[y];
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 8; ix++) {
+            if (ix == 0 && iy == 0) continue;
+            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
+          }
+        }
+        ComputeScaledIDCT<4, 8>()(
+            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
+            scratch_space);
+      }
+      break;
+    }
+    case Type::DCT4X4: {
+      PROFILER_ZONE("IDCT 4");
+      float dcs[4] = {};
+      float block00 = coefficients[0];
+      float block01 = coefficients[1];
+      float block10 = coefficients[8];
+      float block11 = coefficients[9];
+      dcs[0] = block00 + block01 + block10 + block11;
+      dcs[1] = block00 + block01 - block10 - block11;
+      dcs[2] = block00 - block01 + block10 - block11;
+      dcs[3] = block00 - block01 - block10 + block11;
+      for (size_t y = 0; y < 2; y++) {
+        for (size_t x = 0; x < 2; x++) {
+          HWY_ALIGN float block[4 * 4];
+          block[0] = dcs[y * 2 + x];
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              if (ix == 0 && iy == 0) continue;
+              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
+            }
+          }
+          ComputeTransposedScaledIDCT<4>()(
+              block,
+              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
+              scratch_space);
+        }
+      }
+      break;
+    }
+    case Type::DCT2X2: {
+      PROFILER_ZONE("IDCT 2");
+      HWY_ALIGN float coeffs[kDCTBlockSize];
+      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
+      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
+      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
+      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
+      for (size_t y = 0; y < kBlockDim; y++) {
+        for (size_t x = 0; x < kBlockDim; x++) {
+          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
+        }
+      }
+      break;
+    }
+    case Type::DCT16X16: {
+      PROFILER_ZONE("IDCT 16");
+      ComputeTransposedScaledIDCT<16>()(
+          coefficients, DCTTo(pixels, pixels_stride), scratch_space);
+      break;
+    }
+    case Type::DCT16X8: {
+      PROFILER_ZONE("IDCT 16x8");
+      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT8X16: {
+      PROFILER_ZONE("IDCT 8x16");
+      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT32X8: {
+      PROFILER_ZONE("IDCT 32x8");
+      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT8X32: {
+      PROFILER_ZONE("IDCT 8x32");
+      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT32X16: {
+      PROFILER_ZONE("IDCT 32x16");
+      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT16X32: {
+      PROFILER_ZONE("IDCT 16x32");
+      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT32X32: {
+      PROFILER_ZONE("IDCT 32");
+      ComputeTransposedScaledIDCT<32>()(
+          coefficients, DCTTo(pixels, pixels_stride), scratch_space);
+      break;
+    }
+    case Type::DCT: {
+      PROFILER_ZONE("IDCT 8");
+      ComputeTransposedScaledIDCT<8>()(
+          coefficients, DCTTo(pixels, pixels_stride), scratch_space);
+      break;
+    }
+    case Type::AFV0: {
+      PROFILER_ZONE("IAFV0");
+      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
+      break;
+    }
+    case Type::AFV1: {
+      PROFILER_ZONE("IAFV1");
+      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
+      break;
+    }
+    case Type::AFV2: {
+      PROFILER_ZONE("IAFV2");
+      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
+      break;
+    }
+    case Type::AFV3: {
+      PROFILER_ZONE("IAFV3");
+      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
+      break;
+    }
+    case Type::DCT64X32: {
+      PROFILER_ZONE("IDCT 64x32");
+      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT32X64: {
+      PROFILER_ZONE("IDCT 32x64");
+      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT64X64: {
+      PROFILER_ZONE("IDCT 64");
+      ComputeTransposedScaledIDCT<64>()(
+          coefficients, DCTTo(pixels, pixels_stride), scratch_space);
+      break;
+    }
+    case Type::DCT128X64: {
+      PROFILER_ZONE("IDCT 128x64");
+      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
+                                   scratch_space);
+      break;
+    }
+    case Type::DCT64X128: {
+      PROFILER_ZONE("IDCT 64x128");
+      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
+                                   scratch_space);
+      break;
+    }
+    case Type::DCT128X128: {
+      PROFILER_ZONE("IDCT 128");
+      ComputeTransposedScaledIDCT<128>()(
+          coefficients, DCTTo(pixels, pixels_stride), scratch_space);
+      break;
+    }
+    case Type::DCT256X128: {
+      PROFILER_ZONE("IDCT 256x128");
+      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
+                                    scratch_space);
+      break;
+    }
+    case Type::DCT128X256: {
+      PROFILER_ZONE("IDCT 128x256");
+      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
+                                    scratch_space);
+      break;
+    }
+    case Type::DCT256X256: {
+      PROFILER_ZONE("IDCT 256");
+      ComputeTransposedScaledIDCT<256>()(
+          coefficients, DCTTo(pixels, pixels_stride), scratch_space);
+      break;
+    }
+    case Type::kNumValidStrategies:
+      JXL_ABORT("Invalid strategy");
+  }
+}
+
+HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategy::Type strategy,
+                                              const float* dc, size_t dc_stride,
+                                              float* llf) {
+  using Type = AcStrategy::Type;
+  switch (strategy) {
+    case Type::DCT16X8: {
+      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
+          dc, dc_stride, llf, 2 * kBlockDim);
+      break;
+    }
+    case Type::DCT8X16: {
+      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
+          dc, dc_stride, llf, 2 * kBlockDim);
+      break;
+    }
+    case Type::DCT16X16: {
+      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
+          dc, dc_stride, llf, 2 * kBlockDim);
+      break;
+    }
+    case Type::DCT32X8: {
+      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
+          dc, dc_stride, llf, 4 * kBlockDim);
+      break;
+    }
+    case Type::DCT8X32: {
+      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
+          dc, dc_stride, llf, 4 * kBlockDim);
+      break;
+    }
+    case Type::DCT32X16: {
+      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
+          dc, dc_stride, llf, 4 * kBlockDim);
+      break;
+    }
+    case Type::DCT16X32: {
+      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
+          dc, dc_stride, llf, 4 * kBlockDim);
+      break;
+    }
+    case Type::DCT32X32: {
+      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
+          dc, dc_stride, llf, 4 * kBlockDim);
+      break;
+    }
+    case Type::DCT64X32: {
+      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
+          dc, dc_stride, llf, 8 * kBlockDim);
+      break;
+    }
+    case Type::DCT32X64: {
+      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
+          dc, dc_stride, llf, 8 * kBlockDim);
+      break;
+    }
+    case Type::DCT64X64: {
+      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
+          dc, dc_stride, llf, 8 * kBlockDim);
+      break;
+    }
+    case Type::DCT128X64: {
+      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
+          dc, dc_stride, llf, 16 * kBlockDim);
+      break;
+    }
+    case Type::DCT64X128: {
+      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
+          dc, dc_stride, llf, 16 * kBlockDim);
+      break;
+    }
+    case Type::DCT128X128: {
+      ReinterpretingDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
+          dc, dc_stride, llf, 16 * kBlockDim);
+      break;
+    }
+    case Type::DCT256X128: {
+      ReinterpretingDCT<
+          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
+          dc, dc_stride, llf, 32 * kBlockDim);
+      break;
+    }
+    case Type::DCT128X256: {
+      ReinterpretingDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
+          dc, dc_stride, llf, 32 * kBlockDim);
+      break;
+    }
+    case Type::DCT256X256: {
+      ReinterpretingDCT<
+          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
+          dc, dc_stride, llf, 32 * kBlockDim);
+      break;
+    }
+    case Type::DCT:
+    case Type::DCT2X2:
+    case Type::DCT4X4:
+    case Type::DCT4X8:
+    case Type::DCT8X4:
+    case Type::AFV0:
+    case Type::AFV1:
+    case Type::AFV2:
+    case Type::AFV3:
+    case Type::IDENTITY:
+      llf[0] = dc[0];
+      break;
+    case Type::kNumValidStrategies:
+      JXL_ABORT("Invalid strategy");
+  };
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_transforms_testonly.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_transforms_testonly.cc
new file mode 100644
index 0000000000..9ee80c59dc
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_transforms_testonly.cc
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_transforms_testonly.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_transforms_testonly.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_transforms-inl.h"
+
+namespace jxl {
+
+#if HWY_ONCE
+HWY_EXPORT(TransformToPixels);
+void TransformToPixels(AcStrategy::Type strategy,
+                       float* JXL_RESTRICT coefficients,
+                       float* JXL_RESTRICT pixels, size_t pixels_stride,
+                       float* scratch_space) {
+  return HWY_DYNAMIC_DISPATCH(TransformToPixels)(strategy, coefficients, pixels,
+                                                 pixels_stride, scratch_space);
+}
+
+HWY_EXPORT(LowestFrequenciesFromDC);
+void LowestFrequenciesFromDC(const jxl::AcStrategy::Type strategy,
+                             const float* dc, size_t dc_stride, float* llf) {
+  return HWY_DYNAMIC_DISPATCH(LowestFrequenciesFromDC)(strategy, dc, dc_stride,
+                                                       llf);
+}
+
+HWY_EXPORT(AFVIDCT4x4);
+void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
+  return HWY_DYNAMIC_DISPATCH(AFVIDCT4x4)(coeffs, pixels);
+}
+#endif  // HWY_ONCE
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_transforms_testonly.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_transforms_testonly.h
new file mode 100644
index 0000000000..97c4ca543d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_transforms_testonly.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_
+#define LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_
+
+// Facade for (non-inlined) inverse integral transforms.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+void TransformToPixels(AcStrategy::Type strategy,
+                       float* JXL_RESTRICT coefficients,
+                       float* JXL_RESTRICT pixels, size_t pixels_stride,
+                       float* JXL_RESTRICT scratch_space);
+
+// Equivalent of the above for DC image.
+void LowestFrequenciesFromDC(const jxl::AcStrategy::Type strategy,
+                             const float* dc, size_t dc_stride, float* llf);
+
+void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc
new file mode 100644
index 0000000000..9c7a5e5a92
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.cc
@@ -0,0 +1,375 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_upsample.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_upsample.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/image_ops.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+void InitKernel(const float* weights, CacheAlignedUniquePtr* kernel_storage,
+                size_t N, size_t x_repeat) {
+  const size_t NX = N * x_repeat;
+  const size_t N2 = N / 2;
+  HWY_FULL(float) df;
+  const size_t V = Lanes(df);
+  const size_t num_kernels = N * NX;
+
+  constexpr const size_t M = 2 * Upsampler::filter_radius() + 1;
+  const size_t MX = M + x_repeat - 1;
+  const size_t num_coeffs = M * MX;
+
+  // Pad kernel slices to vector size.
+  const size_t stride = RoundUpTo(num_kernels, V);
+  *kernel_storage = AllocateArray(stride * sizeof(float) * num_coeffs);
+  float* kernels = reinterpret_cast<float*>(kernel_storage->get());
+  memset(kernels, 0, stride * sizeof(float) * num_coeffs);
+
+  for (size_t offset = 0; offset < num_coeffs; ++offset) {
+    size_t iy = offset / MX;
+    size_t ix = offset % MX;
+    for (size_t kernel = 0; kernel < num_kernels; ++kernel) {
+      size_t ky = kernel / NX;
+      size_t kx_ = kernel % NX;
+      size_t kx = kx_ % N;
+      size_t shift = kx_ / N;
+      if ((ix < shift) || (ix - shift >= M)) continue;  // 0 weight from memset.
+      // Only weights for top-left 1 / 4 of kernels are specified; other 3 / 4
+      // kernels are produced by vertical and horizontal mirroring.
+      size_t j = (ky < N2) ? (iy + M * ky) : ((M - 1 - iy) + M * (N - 1 - ky));
+      size_t i = (kx < N2) ? (ix - shift + M * kx)
+                           : ((M - 1 - (ix - shift)) + M * (N - 1 - kx));
+      // (y, x) = sorted(i, j)
+      // the matrix built of kernel matrices as blocks is symmetric.
+      size_t y = std::min(i, j);
+      size_t x = std::max(i, j);
+      // Take the weight from "triangle" coordinates.
+      float weight = weights[M * N2 * y - y * (y - 1) / 2 + x - y];
+      kernels[offset * stride + kernel] = weight;
+    }
+  }
+}
+
+template <size_t N, size_t x_repeat>
+void Upsample(const ImageF& src, const Rect& src_rect, ImageF* dst,
+              const Rect& dst_rect, const float* kernels,
+              ssize_t image_y_offset, size_t image_ysize, float* arena) {
+  constexpr const size_t M = 2 * Upsampler::filter_radius() + 1;
+  constexpr const size_t M2 = M / 2;
+  JXL_DASSERT(src_rect.x0() >= M2);
+  const size_t src_x_limit = src_rect.x0() + src_rect.xsize() + M2;
+  JXL_DASSERT(src_x_limit <= src.xsize());
+  JXL_ASSERT(DivCeil(dst_rect.xsize(), N) <= src_rect.xsize());
+  // TODO(eustas): add proper (src|dst) ysize check that accounts for mirroring.
+
+  constexpr const size_t MX = M + x_repeat - 1;
+  constexpr const size_t num_coeffs = M * MX;
+
+  constexpr const size_t NX = N * x_repeat;
+
+  HWY_FULL(float) df;
+  const size_t V = Lanes(df);
+  const size_t num_kernels = N * NX;
+  const size_t stride = RoundUpTo(num_kernels, V);
+
+  const size_t rsx = DivCeil(dst_rect.xsize(), N);
+  const size_t dsx = rsx + 2 * M2;
+  // Round-down to complete vectors.
+  const size_t dsx_v = V * (dsx / V);
+
+  float* JXL_RESTRICT in = arena;
+  arena += RoundUpTo(num_coeffs, V);
+  float* JXL_RESTRICT out = arena;
+  arena += stride;
+  float* JXL_RESTRICT raw_min_row = arena;
+  arena += RoundUpTo(dsx + V, V);
+  float* JXL_RESTRICT raw_max_row = arena;
+  arena += RoundUpTo(dsx + V, V);
+  float* JXL_RESTRICT min_row = arena;
+  arena += RoundUpTo(rsx * N + V, V);
+  float* JXL_RESTRICT max_row = arena;
+  arena += RoundUpTo(rsx * N + V, V);
+
+  memset(raw_min_row + dsx_v, 0, sizeof(float) * (V + dsx - dsx_v));
+  memset(raw_max_row + dsx_v, 0, sizeof(float) * (V + dsx - dsx_v));
+  memset(min_row + dst_rect.xsize(), 0, sizeof(float) * V);
+  memset(max_row + dst_rect.xsize(), 0, sizeof(float) * V);
+
+  // For min/max reduction.
+  const size_t span_tail_len = M % V;
+  const bool has_span_tail = (span_tail_len != 0);
+  JXL_ASSERT(has_span_tail || V <= M);
+  const size_t span_start = has_span_tail ? 0 : V;
+  const size_t span_tail_start = M - span_tail_len;
+  const auto span_tail_mask = Iota(df, 0) < Set(df, span_tail_len);
+
+  // sx and sy correspond to offset in source image.
+  // x and y correspond to top-left pixel offset in upsampled output image.
+  for (size_t y = 0; y < dst_rect.ysize(); y += N) {
+    const float* src_rows[M];
+    const size_t sy = y / N;
+    const ssize_t top = static_cast<ssize_t>(sy + src_rect.y0() - M2);
+    for (size_t iy = 0; iy < M; iy++) {
+      const ssize_t image_y = top + iy + image_y_offset;
+      src_rows[iy] = src.Row(Mirror(image_y, image_ysize) - image_y_offset);
+    }
+    const size_t sx0 = src_rect.x0() - M2;
+    for (size_t sx = 0; sx < dsx_v; sx += V) {
+      static_assert(M == 5, "Filter diameter is expected to be 5");
+      const auto r0 = LoadU(df, src_rows[0] + sx0 + sx);
+      const auto r1 = LoadU(df, src_rows[1] + sx0 + sx);
+      const auto r2 = LoadU(df, src_rows[2] + sx0 + sx);
+      const auto r3 = LoadU(df, src_rows[3] + sx0 + sx);
+      const auto r4 = LoadU(df, src_rows[4] + sx0 + sx);
+      const auto min0 = Min(r0, r1);
+      const auto max0 = Max(r0, r1);
+      const auto min1 = Min(r2, r3);
+      const auto max1 = Max(r2, r3);
+      const auto min2 = Min(min0, r4);
+      const auto max2 = Max(max0, r4);
+      Store(Min(min1, min2), df, raw_min_row + sx);
+      Store(Max(max1, max2), df, raw_max_row + sx);
+    }
+    for (size_t sx = dsx_v; sx < dsx; sx++) {
+      static_assert(M == 5, "Filter diameter is expected to be 5");
+      const auto r0 = src_rows[0][sx0 + sx];
+      const auto r1 = src_rows[1][sx0 + sx];
+      const auto r2 = src_rows[2][sx0 + sx];
+      const auto r3 = src_rows[3][sx0 + sx];
+      const auto r4 = src_rows[4][sx0 + sx];
+      const auto min0 = std::min(r0, r1);
+      const auto max0 = std::max(r0, r1);
+      const auto min1 = std::min(r2, r3);
+      const auto max1 = std::max(r2, r3);
+      const auto min2 = std::min(min0, r4);
+      const auto max2 = std::max(max0, r4);
+      raw_min_row[sx] = std::min(min1, min2);
+      raw_max_row[sx] = std::max(max1, max2);
+    }
+
+    for (size_t sx = 0; sx < rsx; sx++) {
+      decltype(Zero(df)) min, max;
+      if (has_span_tail) {
+        auto dummy = Set(df, raw_min_row[sx]);
+        min = IfThenElse(span_tail_mask,
+                         LoadU(df, raw_min_row + sx + span_tail_start), dummy);
+        max = IfThenElse(span_tail_mask,
+                         LoadU(df, raw_max_row + sx + span_tail_start), dummy);
+      } else {
+        min = LoadU(df, raw_min_row + sx);
+        max = LoadU(df, raw_max_row + sx);
+      }
+      for (size_t fx = span_start; fx < span_tail_start; fx += V) {
+        min = Min(LoadU(df, raw_min_row + sx + fx), min);
+        max = Max(LoadU(df, raw_max_row + sx + fx), max);
+      }
+      min = MinOfLanes(min);
+      max = MaxOfLanes(max);
+      for (size_t lx = 0; lx < N; lx += V) {
+        StoreU(min, df, min_row + N * sx + lx);
+        StoreU(max, df, max_row + N * sx + lx);
+      }
+    }
+
+    for (size_t x = 0; x < dst_rect.xsize(); x += NX) {
+      const size_t sx = x / N;
+      const size_t xbase = sx + sx0;
+      // Copy input pixels for "linearization".
+      for (size_t iy = 0; iy < M; iy++) {
+        memcpy(in + MX * iy, src_rows[iy] + xbase, MX * sizeof(float));
+      }
+      if (x_repeat > 1) {
+        // Even if filter coeffs contain 0 at "undefined" values, the result
+        // might be undefined, because NaN will poison the sum.
+        if (JXL_UNLIKELY(xbase + MX > src_x_limit)) {
+          for (size_t iy = 0; iy < M; iy++) {
+            for (size_t ix = src_x_limit - xbase; ix < MX; ++ix) {
+              in[MX * iy + ix] = 0.0f;
+            }
+          }
+        }
+      }
+      constexpr size_t U = 4;  // Unroll factor.
+      constexpr size_t tail = num_coeffs & ~(U - 1);
+      constexpr size_t tail_length = num_coeffs - tail;
+      for (size_t kernel_idx = 0; kernel_idx < num_kernels; kernel_idx += V) {
+        const float* JXL_RESTRICT kernel_base = kernels + kernel_idx;
+        decltype(Zero(df)) results[U];
+        for (size_t i = 0; i < U; i++) {
+          results[i] = Set(df, in[i]) * Load(df, kernel_base + i * stride);
+        }
+        for (size_t i = U; i < tail; i += U) {
+          for (size_t j = 0; j < U; ++j) {
+            results[j] =
+                MulAdd(Set(df, in[i + j]),
+                       Load(df, kernel_base + (i + j) * stride), results[j]);
+          }
+        }
+        for (size_t i = 0; i < tail_length; ++i) {
+          results[i] =
+              MulAdd(Set(df, in[tail + i]),
+                     Load(df, kernel_base + (tail + i) * stride), results[i]);
+        }
+        auto result = results[0];
+        for (size_t i = 1; i < U; ++i) result += results[i];
+        Store(result, df, out + kernel_idx);
+      }
+      const size_t oy_max = std::min<size_t>(dst_rect.ysize(), y + N);
+      const size_t ox_max = std::min<size_t>(dst_rect.xsize(), x + NX);
+      const size_t copy_len = ox_max - x;
+      const size_t copy_last = RoundUpTo(copy_len, V);
+      if (JXL_LIKELY(x + copy_last <= dst_rect.xsize())) {
+        for (size_t dx = 0; dx < copy_len; dx += V) {
+          auto min = LoadU(df, min_row + x + dx);
+          auto max = LoadU(df, max_row + x + dx);
+          float* pixels = out;
+          for (size_t oy = sy * N; oy < oy_max; ++oy, pixels += NX) {
+            StoreU(Clamp(LoadU(df, pixels + dx), min, max), df,
+                   dst_rect.Row(dst, oy) + x + dx);
+          }
+        }
+      } else {
+        for (size_t dx = 0; dx < copy_len; dx++) {
+          auto min = min_row[x + dx];
+          auto max = max_row[x + dx];
+          float* pixels = out;
+          for (size_t oy = sy * N; oy < oy_max; ++oy, pixels += NX) {
+            dst_rect.Row(dst, oy)[x + dx] = Clamp1(pixels[dx], min, max);
+          }
+        }
+      }
+    }
+  }
+}
+
+}  // namespace
+
+void UpsampleRect(size_t upsampling, const float* kernels, const ImageF& src,
+                  const Rect& src_rect, ImageF* dst, const Rect& dst_rect,
+                  ssize_t image_y_offset, size_t image_ysize, float* arena,
+                  size_t x_repeat) {
+  if (upsampling == 1) return;
+  if (upsampling == 2) {
+    if (x_repeat == 1) {
+      Upsample</*N=*/2, /*x_repeat=*/1>(src, src_rect, dst, dst_rect, kernels,
+                                        image_y_offset, image_ysize, arena);
+    } else if (x_repeat == 2) {
+      Upsample</*N=*/2, /*x_repeat=*/2>(src, src_rect, dst, dst_rect, kernels,
+                                        image_y_offset, image_ysize, arena);
+    } else if (x_repeat == 4) {
+      Upsample</*N=*/2, /*x_repeat=*/4>(src, src_rect, dst, dst_rect, kernels,
+                                        image_y_offset, image_ysize, arena);
+    } else {
+      JXL_ABORT("Not implemented");
+    }
+  } else if (upsampling == 4) {
+    JXL_ASSERT(x_repeat == 1);
+    Upsample</*N=*/4, /*x_repeat=*/1>(src, src_rect, dst, dst_rect, kernels,
+                                      image_y_offset, image_ysize, arena);
+  } else if (upsampling == 8) {
+    JXL_ASSERT(x_repeat == 1);
+    Upsample</*N=*/8, /*x_repeat=*/1>(src, src_rect, dst, dst_rect, kernels,
+                                      image_y_offset, image_ysize, arena);
+  } else {
+    JXL_ABORT("Not implemented");
+  }
+}
+
+size_t NumLanes() {
+  HWY_FULL(float) df;
+  return Lanes(df);
+}
+
+void Init(size_t upsampling, CacheAlignedUniquePtr* kernel_storage,
+          const CustomTransformData& data, size_t x_repeat) {
+  if ((upsampling & (upsampling - 1)) != 0 ||
+      upsampling > Upsampler::max_upsampling()) {
+    JXL_ABORT("Invalid upsample");
+  }
+  if ((x_repeat & (x_repeat - 1)) != 0 ||
+      x_repeat > Upsampler::max_x_repeat()) {
+    JXL_ABORT("Invalid x_repeat");
+  }
+
+  // No-op upsampling.
+  if (upsampling == 1) return;
+  const float* weights = (upsampling == 2)   ? data.upsampling2_weights
+                         : (upsampling == 4) ? data.upsampling4_weights
+                                             : data.upsampling8_weights;
+  InitKernel(weights, kernel_storage, upsampling, x_repeat);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+namespace {
+HWY_EXPORT(NumLanes);
+HWY_EXPORT(Init);
+HWY_EXPORT(UpsampleRect);
+}  // namespace
+
+void Upsampler::Init(size_t upsampling, const CustomTransformData& data) {
+  upsampling_ = upsampling;
+  size_t V = HWY_DYNAMIC_DISPATCH(NumLanes)();
+  x_repeat_ = 1;
+  if (upsampling_ == 2) {
+    // 2 * 2 = 4 kernels; repeat cell, if there is more lanes available
+    if (V >= 8) x_repeat_ = 2;
+    if (V >= 16) x_repeat_ = 4;
+  }
+  HWY_DYNAMIC_DISPATCH(Init)(upsampling, &kernel_storage_, data, x_repeat_);
+}
+
+size_t Upsampler::GetArenaSize(size_t max_dst_xsize) {
+  size_t V = HWY_DYNAMIC_DISPATCH(NumLanes)();
+  constexpr const size_t M2 = Upsampler::filter_radius();
+  constexpr const size_t M = 2 * M2 + 1;
+  constexpr size_t X = max_x_repeat();
+  constexpr const size_t MX = M + X - 1;
+  constexpr const size_t N = max_upsampling();
+  // TODO(eustas): raw_(min|max)_row and (min|max)_row could overlap almost
+  // completely.
+  return RoundUpTo(N * N * X, V) + RoundUpTo(M * MX, V) +
+         2 * RoundUpTo(DivCeil(max_dst_xsize, 8) * 4 + 2 * M2 + V, V) +
+         2 * RoundUpTo(max_dst_xsize + V, V);
+}
+
+void Upsampler::UpsampleRect(const ImageF& src, const Rect& src_rect,
+                             ImageF* dst, const Rect& dst_rect,
+                             ssize_t image_y_offset, size_t image_ysize,
+                             float* arena) const {
+  JXL_CHECK(arena);
+  HWY_DYNAMIC_DISPATCH(UpsampleRect)
+  (upsampling_, reinterpret_cast<float*>(kernel_storage_.get()), src, src_rect,
+   dst, dst_rect, image_y_offset, image_ysize, arena, x_repeat_);
+}
+
+void Upsampler::UpsampleRect(const Image3F& src, const Rect& src_rect,
+                             Image3F* dst, const Rect& dst_rect,
+                             ssize_t image_y_offset, size_t image_ysize,
+                             float* arena) const {
+  PROFILER_FUNC;
+  for (size_t c = 0; c < 3; c++) {
+    UpsampleRect(src.Plane(c), src_rect, &dst->Plane(c), dst_rect,
+                 image_y_offset, image_ysize, arena);
+  }
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.h
new file mode 100644
index 0000000000..036acdfcba
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_upsample.h
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_UPSAMPLE_H_
+#define LIB_JXL_DEC_UPSAMPLE_H_
+
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_metadata.h"
+
+namespace jxl {
+
+struct Upsampler {
+  void Init(size_t upsampling, const CustomTransformData& data);
+
+  // Only 1x, 2x, 4x and 8x upsampling is supported.
+  static constexpr size_t max_upsampling() { return 8; }
+
+  // To produce N x N upsampled pixels the [-2..2]x[-2..2] neighborhood of
+  // input pixel is taken and dot-multiplied with N x N corresponding "kernels".
+  // Thus the "kernel" is a 5 x 5 matrix of weights.
+  static constexpr size_t filter_radius() { return 2; }
+
+  // Calculate multiple upsampled cells at the same time.
+  // Kernels are transposed - several kernels are multiplied by input
+  // at the same time.  In case of 2x upsampling there are only 4 kernels.
+  // If current target supports SIMD vectors longer than 4 floats, to reduce
+  // the wasted multiplications we increase the effective kernel count.
+  static constexpr size_t max_x_repeat() { return 4; }
+
+  // Get the size of "arena" required for UpsampleRect;
+  // "arena" should be an aligned piece of memory with at least `GetArenaSize()`
+  // float values accessible.
+  static size_t GetArenaSize(size_t max_dst_xsize);
+
+  // The caller must guarantee that `src:src_rect` has two pixels of padding
+  // available on each side of the x dimension. `image_ysize` is the total
+  // height of the frame that the source area belongs to (not the buffer);
+  // `image_y_offset` is the difference between `src.y0()` and the corresponding
+  // y value in the full frame.
+  void UpsampleRect(const Image3F& src, const Rect& src_rect, Image3F* dst,
+                    const Rect& dst_rect, ssize_t image_y_offset,
+                    size_t image_ysize, float* arena) const;
+  void UpsampleRect(const ImageF& src, const Rect& src_rect, ImageF* dst,
+                    const Rect& dst_rect, ssize_t image_y_offset,
+                    size_t image_ysize, float* arena) const;
+
+ private:
+  size_t upsampling_ = 1;
+  size_t x_repeat_ = 1;
+  CacheAlignedUniquePtr kernel_storage_ = {nullptr};
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_UPSAMPLE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb-inl.h
new file mode 100644
index 0000000000..df16ce897a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb-inl.h
@@ -0,0 +1,351 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// XYB -> linear sRGB helper function.
+
+#if defined(LIB_JXL_DEC_XYB_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DEC_XYB_INL_H_
+#undef LIB_JXL_DEC_XYB_INL_H_
+#else
+#define LIB_JXL_DEC_XYB_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_xyb.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Broadcast;
+
+// Inverts the pixel-wise RGB->XYB conversion in OpsinDynamicsImage() (including
+// the gamma mixing and simple gamma). Avoids clamping to [0, 1] - out of (sRGB)
+// gamut values may be in-gamut after transforming to a wider space.
+// "inverse_matrix" points to 9 broadcasted vectors, which are the 3x3 entries
+// of the (row-major) opsin absorbance matrix inverse. Pre-multiplying its
+// entries by c is equivalent to multiplying linear_* by c afterwards.
+template <class D, class V>
+HWY_INLINE HWY_MAYBE_UNUSED void XybToRgb(D d, const V opsin_x, const V opsin_y,
+                                          const V opsin_b,
+                                          const OpsinParams& opsin_params,
+                                          V* const HWY_RESTRICT linear_r,
+                                          V* const HWY_RESTRICT linear_g,
+                                          V* const HWY_RESTRICT linear_b) {
+#if HWY_TARGET == HWY_SCALAR
+  const auto neg_bias_r = Set(d, opsin_params.opsin_biases[0]);
+  const auto neg_bias_g = Set(d, opsin_params.opsin_biases[1]);
+  const auto neg_bias_b = Set(d, opsin_params.opsin_biases[2]);
+#else
+  const auto neg_bias_rgb = LoadDup128(d, opsin_params.opsin_biases);
+  const auto neg_bias_r = Broadcast<0>(neg_bias_rgb);
+  const auto neg_bias_g = Broadcast<1>(neg_bias_rgb);
+  const auto neg_bias_b = Broadcast<2>(neg_bias_rgb);
+#endif
+
+  // Color space: XYB -> RGB
+  auto gamma_r = opsin_y + opsin_x;
+  auto gamma_g = opsin_y - opsin_x;
+  auto gamma_b = opsin_b;
+
+  gamma_r -= Set(d, opsin_params.opsin_biases_cbrt[0]);
+  gamma_g -= Set(d, opsin_params.opsin_biases_cbrt[1]);
+  gamma_b -= Set(d, opsin_params.opsin_biases_cbrt[2]);
+
+  // Undo gamma compression: linear = gamma^3 for efficiency.
+  const auto gamma_r2 = gamma_r * gamma_r;
+  const auto gamma_g2 = gamma_g * gamma_g;
+  const auto gamma_b2 = gamma_b * gamma_b;
+  const auto mixed_r = MulAdd(gamma_r2, gamma_r, neg_bias_r);
+  const auto mixed_g = MulAdd(gamma_g2, gamma_g, neg_bias_g);
+  const auto mixed_b = MulAdd(gamma_b2, gamma_b, neg_bias_b);
+
+  const float* HWY_RESTRICT inverse_matrix = opsin_params.inverse_opsin_matrix;
+
+  // Unmix (multiply by 3x3 inverse_matrix)
+  *linear_r = LoadDup128(d, &inverse_matrix[0 * 4]) * mixed_r;
+  *linear_g = LoadDup128(d, &inverse_matrix[3 * 4]) * mixed_r;
+  *linear_b = LoadDup128(d, &inverse_matrix[6 * 4]) * mixed_r;
+  *linear_r = MulAdd(LoadDup128(d, &inverse_matrix[1 * 4]), mixed_g, *linear_r);
+  *linear_g = MulAdd(LoadDup128(d, &inverse_matrix[4 * 4]), mixed_g, *linear_g);
+  *linear_b = MulAdd(LoadDup128(d, &inverse_matrix[7 * 4]), mixed_g, *linear_b);
+  *linear_r = MulAdd(LoadDup128(d, &inverse_matrix[2 * 4]), mixed_b, *linear_r);
+  *linear_g = MulAdd(LoadDup128(d, &inverse_matrix[5 * 4]), mixed_b, *linear_g);
+  *linear_b = MulAdd(LoadDup128(d, &inverse_matrix[8 * 4]), mixed_b, *linear_b);
+}
+
+static inline HWY_MAYBE_UNUSED bool HasFastXYBTosRGB8() {
+#if HWY_TARGET == HWY_NEON
+  return true;
+#else
+  return false;
+#endif
+}
+
+static inline HWY_MAYBE_UNUSED void FastXYBTosRGB8(
+    const Image3F& input, const Rect& input_rect, const Rect& output_buf_rect,
+    const ImageF* alpha, const Rect& alpha_rect, bool is_rgba,
+    uint8_t* JXL_RESTRICT output_buf, size_t xsize, size_t output_stride) {
+  // This function is very NEON-specific. As such, it uses intrinsics directly.
+#if HWY_TARGET == HWY_NEON
+  // WARNING: doing fixed point arithmetic correctly is very complicated.
+  // Changes to this function should be thoroughly tested.
+
+  // Note that the input is assumed to have 13 bits of mantissa, and the output
+  // will have 14 bits.
+  auto srgb_tf = [&](int16x8_t v16) {
+    int16x8_t clz = vclzq_s16(v16);
+    // Convert to [0.25, 0.5) range.
+    int16x8_t v025_05_16 = vqshlq_s16(v16, vqsubq_s16(clz, vdupq_n_s16(2)));
+
+    // third degree polynomial approximation between 0.25 and 0.5
+    // of 1.055/2^(7/2.4) * x^(1/2.4) / 32.
+    // poly ~ ((0.95x-1.75)*x+1.72)*x+0.29
+    // We actually compute ~ ((0.47x-0.87)*x+0.86)*(2x)+0.29 as 1.75 and 1.72
+    // overflow our fixed point representation.
+
+    int16x8_t twov = vqaddq_s16(v025_05_16, v025_05_16);
+
+    // 0.47 * x
+    int16x8_t step1 = vqrdmulhq_n_s16(v025_05_16, 15706);
+    // - 0.87
+    int16x8_t step2 = vsubq_s16(step1, vdupq_n_s16(28546));
+    // * x
+    int16x8_t step3 = vqrdmulhq_s16(step2, v025_05_16);
+    // + 0.86
+    int16x8_t step4 = vaddq_s16(step3, vdupq_n_s16(28302));
+    // * 2x
+    int16x8_t step5 = vqrdmulhq_s16(step4, twov);
+    // + 0.29
+    int16x8_t mul16 = vaddq_s16(step5, vdupq_n_s16(9485));
+
+    int16x8_t exp16 = vsubq_s16(vdupq_n_s16(11), clz);
+    // Compute 2**(1/2.4*exp16)/32. Values of exp16 that would overflow are
+    // capped to 1.
+    // Generated with the following Python script:
+    // a = []
+    // b = []
+    //
+    // for i in range(0, 16):
+    //   v = 2**(5/12.*i)
+    //   v /= 16
+    //   v *= 256 * 128
+    //   v = int(v)
+    //   a.append(v // 256)
+    //   b.append(v % 256)
+    //
+    // print(", ".join("0x%02x" % x for x in a))
+    //
+    // print(", ".join("0x%02x" % x for x in b))
+
+    HWY_ALIGN constexpr uint8_t k2to512powersm1div32_high[16] = {
+        0x08, 0x0a, 0x0e, 0x13, 0x19, 0x21, 0x2d, 0x3c,
+        0x50, 0x6b, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f,
+    };
+    HWY_ALIGN constexpr uint8_t k2to512powersm1div32_low[16] = {
+        0x00, 0xad, 0x41, 0x06, 0x65, 0xe7, 0x41, 0x68,
+        0xa2, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    };
+    // Using the highway implementation here since vqtbl1q is aarch64-only.
+    using hwy::HWY_NAMESPACE::Vec128;
+    uint8x16_t pow_low =
+        TableLookupBytes(
+            Vec128<uint8_t, 16>(vld1q_u8(k2to512powersm1div32_low)),
+            Vec128<uint8_t, 16>(vreinterpretq_u8_s16(exp16)))
+            .raw;
+    uint8x16_t pow_high =
+        TableLookupBytes(
+            Vec128<uint8_t, 16>(vld1q_u8(k2to512powersm1div32_high)),
+            Vec128<uint8_t, 16>(vreinterpretq_u8_s16(exp16)))
+            .raw;
+    int16x8_t pow16 = vreinterpretq_s16_u16(vsliq_n_u16(
+        vreinterpretq_u16_u8(pow_low), vreinterpretq_u16_u8(pow_high), 8));
+
+    // approximation of v * 12.92, divided by 2
+    // Note that our input is using 13 mantissa bits instead of 15.
+    int16x8_t v16_linear = vrshrq_n_s16(vmulq_n_s16(v16, 826), 5);
+    // 1.055*pow(v, 1/2.4) - 0.055, divided by 2
+    auto v16_pow = vsubq_s16(vqrdmulhq_s16(mul16, pow16), vdupq_n_s16(901));
+    // > 0.0031308f (note that v16 has 13 mantissa bits)
+    return vbslq_s16(vcgeq_s16(v16, vdupq_n_s16(26)), v16_pow, v16_linear);
+  };
+  for (size_t y = 0; y < output_buf_rect.ysize(); y++) {
+    const float* JXL_RESTRICT row_in_x = input_rect.ConstPlaneRow(input, 0, y);
+    const float* JXL_RESTRICT row_in_y = input_rect.ConstPlaneRow(input, 1, y);
+    const float* JXL_RESTRICT row_in_b = input_rect.ConstPlaneRow(input, 2, y);
+    const float* JXL_RESTRICT row_in_a =
+        alpha == nullptr ? nullptr : alpha_rect.ConstRow(*alpha, y);
+    size_t cnt = !is_rgba ? 3 : 4;
+    size_t base_ptr =
+        (y + output_buf_rect.y0()) * output_stride + output_buf_rect.x0() * cnt;
+    for (size_t x = 0; x < output_buf_rect.xsize(); x += 8) {
+      // Normal ranges for xyb for in-gamut sRGB colors:
+      // x: -0.015386 0.028100
+      // y: 0.000000 0.845308
+      // b: 0.000000 0.845308
+
+      // We actually want x * 8 to have some extra precision.
+      // TODO(veluca): consider different approaches here, like vld1q_f32_x2.
+      float32x4_t opsin_x_left = vld1q_f32(row_in_x + x);
+      int16x4_t opsin_x16_times8_left =
+          vqmovn_s32(vcvtq_n_s32_f32(opsin_x_left, 18));
+      float32x4_t opsin_x_right =
+          vld1q_f32(row_in_x + x + (x + 4 < output_buf_rect.xsize() ? 4 : 0));
+      int16x4_t opsin_x16_times8_right =
+          vqmovn_s32(vcvtq_n_s32_f32(opsin_x_right, 18));
+      int16x8_t opsin_x16_times8 =
+          vcombine_s16(opsin_x16_times8_left, opsin_x16_times8_right);
+
+      float32x4_t opsin_y_left = vld1q_f32(row_in_y + x);
+      int16x4_t opsin_y16_left = vqmovn_s32(vcvtq_n_s32_f32(opsin_y_left, 15));
+      float32x4_t opsin_y_right =
+          vld1q_f32(row_in_y + x + (x + 4 < output_buf_rect.xsize() ? 4 : 0));
+      int16x4_t opsin_y16_right =
+          vqmovn_s32(vcvtq_n_s32_f32(opsin_y_right, 15));
+      int16x8_t opsin_y16 = vcombine_s16(opsin_y16_left, opsin_y16_right);
+
+      float32x4_t opsin_b_left = vld1q_f32(row_in_b + x);
+      int16x4_t opsin_b16_left = vqmovn_s32(vcvtq_n_s32_f32(opsin_b_left, 15));
+      float32x4_t opsin_b_right =
+          vld1q_f32(row_in_b + x + (x + 4 < output_buf_rect.xsize() ? 4 : 0));
+      int16x4_t opsin_b16_right =
+          vqmovn_s32(vcvtq_n_s32_f32(opsin_b_right, 15));
+      int16x8_t opsin_b16 = vcombine_s16(opsin_b16_left, opsin_b16_right);
+
+      int16x8_t neg_bias16 = vdupq_n_s16(-124);        // -0.0037930732552754493
+      int16x8_t neg_bias_cbrt16 = vdupq_n_s16(-5110);  // -0.155954201
+      int16x8_t neg_bias_half16 = vdupq_n_s16(-62);
+
+      // Color space: XYB -> RGB
+      // Compute ((y+x-bias_cbrt)^3-(y-x-bias_cbrt)^3)/2,
+      // ((y+x-bias_cbrt)^3+(y-x-bias_cbrt)^3)/2+bias, (b-bias_cbrt)^3+bias.
+      // Note that ignoring x2 in the formulas below (as x << y) results in
+      // errors of at least 3 in the final sRGB values.
+      int16x8_t opsin_yp16 = vqsubq_s16(opsin_y16, neg_bias_cbrt16);
+      int16x8_t ysq16 = vqrdmulhq_s16(opsin_yp16, opsin_yp16);
+      int16x8_t twentyfourx16 = vmulq_n_s16(opsin_x16_times8, 3);
+      int16x8_t twentyfourxy16 = vqrdmulhq_s16(opsin_yp16, twentyfourx16);
+      int16x8_t threexsq16 =
+          vrshrq_n_s16(vqrdmulhq_s16(opsin_x16_times8, twentyfourx16), 6);
+
+      // We can ignore x^3 here. Note that this is multiplied by 8.
+      int16x8_t mixed_rmg16 = vqrdmulhq_s16(twentyfourxy16, opsin_yp16);
+
+      int16x8_t mixed_rpg_sos_half = vhaddq_s16(ysq16, threexsq16);
+      int16x8_t mixed_rpg16 = vhaddq_s16(
+          vqrdmulhq_s16(opsin_yp16, mixed_rpg_sos_half), neg_bias_half16);
+
+      int16x8_t gamma_b16 = vqsubq_s16(opsin_b16, neg_bias_cbrt16);
+      int16x8_t gamma_bsq16 = vqrdmulhq_s16(gamma_b16, gamma_b16);
+      int16x8_t gamma_bcb16 = vqrdmulhq_s16(gamma_bsq16, gamma_b16);
+      int16x8_t mixed_b16 = vqaddq_s16(gamma_bcb16, neg_bias16);
+      // mixed_rpg and mixed_b are in 0-1 range.
+      // mixed_rmg has a smaller range (-0.035 to 0.035 for valid sRGB). Note
+      // that at this point it is already multiplied by 8.
+
+      // We multiply all the mixed values by 1/4 (i.e. shift them to 13-bit
+      // fixed point) to ensure intermediate quantities are in range. Note that
+      // r-g is not shifted, and was x8 before here; this corresponds to a x32
+      // overall multiplicative factor and ensures that all the matrix constants
+      // are in 0-1 range.
+      // Similarly, mixed_rpg16 is already multiplied by 1/4 because of the two
+      // vhadd + using neg_bias_half.
+      mixed_b16 = vshrq_n_s16(mixed_b16, 2);
+
+      // Unmix (multiply by 3x3 inverse_matrix)
+      // For increased precision, we use a matrix for converting from
+      // ((mixed_r - mixed_g)/2, (mixed_r + mixed_g)/2, mixed_b) to rgb. This
+      // avoids cancellation effects when computing (y+x)^3-(y-x)^3.
+      // We compute mixed_rpg - mixed_b because the (1+c)*mixed_rpg - c *
+      // mixed_b pattern is repeated frequently in the code below. This allows
+      // us to save a multiply per channel, and removes the presence of
+      // some constants above 1. Moreover, mixed_rmg - mixed_b is in (-1, 1)
+      // range, so the subtraction is safe.
+      // All the magic-looking constants here are derived by computing the
+      // inverse opsin matrix for the transformation modified as described
+      // above.
+
+      // Precomputation common to multiple color values.
+      int16x8_t mixed_rpgmb16 = vqsubq_s16(mixed_rpg16, mixed_b16);
+      int16x8_t mixed_rpgmb_times_016 = vqrdmulhq_n_s16(mixed_rpgmb16, 5394);
+      int16x8_t mixed_rg16 = vqaddq_s16(mixed_rpgmb_times_016, mixed_rpg16);
+
+      // R
+      int16x8_t linear_r16 =
+          vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, 21400));
+
+      // G
+      int16x8_t linear_g16 =
+          vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, -7857));
+
+      // B
+      int16x8_t linear_b16 = vqrdmulhq_n_s16(mixed_rpgmb16, -30996);
+      linear_b16 = vqaddq_s16(linear_b16, mixed_b16);
+      linear_b16 = vqaddq_s16(linear_b16, vqrdmulhq_n_s16(mixed_rmg16, -6525));
+
+      // Apply SRGB transfer function.
+      int16x8_t r = srgb_tf(linear_r16);
+      int16x8_t g = srgb_tf(linear_g16);
+      int16x8_t b = srgb_tf(linear_b16);
+
+      uint8x8_t r8 =
+          vqmovun_s16(vrshrq_n_s16(vsubq_s16(r, vshrq_n_s16(r, 8)), 6));
+      uint8x8_t g8 =
+          vqmovun_s16(vrshrq_n_s16(vsubq_s16(g, vshrq_n_s16(g, 8)), 6));
+      uint8x8_t b8 =
+          vqmovun_s16(vrshrq_n_s16(vsubq_s16(b, vshrq_n_s16(b, 8)), 6));
+
+      size_t n = output_buf_rect.xsize() - x;
+      if (is_rgba) {
+        float32x4_t a_f32_left =
+            row_in_a ? vld1q_f32(row_in_a + x) : vdupq_n_f32(1.0f);
+        float32x4_t a_f32_right =
+            row_in_a ? vld1q_f32(row_in_a + x +
+                                 (x + 4 < output_buf_rect.xsize() ? 4 : 0))
+                     : vdupq_n_f32(1.0f);
+        int16x4_t a16_left = vqmovn_s32(vcvtq_n_s32_f32(a_f32_left, 8));
+        int16x4_t a16_right = vqmovn_s32(vcvtq_n_s32_f32(a_f32_right, 8));
+        uint8x8_t a8 = vqmovun_s16(vcombine_s16(a16_left, a16_right));
+        uint8_t* buf = output_buf + base_ptr + 4 * x;
+        uint8x8x4_t data = {r8, g8, b8, a8};
+        if (n >= 8) {
+          vst4_u8(buf, data);
+        } else {
+          uint8_t tmp[8 * 4];
+          vst4_u8(tmp, data);
+          memcpy(buf, tmp, n * 4);
+        }
+      } else {
+        uint8_t* buf = output_buf + base_ptr + 3 * x;
+        uint8x8x3_t data = {r8, g8, b8};
+        if (n >= 8) {
+          vst3_u8(buf, data);
+        } else {
+          uint8_t tmp[8 * 3];
+          vst3_u8(tmp, data);
+          memcpy(buf, tmp, n * 3);
+        }
+      }
+    }
+  }
+#else
+  (void)input;
+  (void)input_rect;
+  (void)output_buf_rect;
+  (void)output_buf;
+  (void)xsize;
+  JXL_ABORT("Unreachable");
+#endif
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_DEC_XYB_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc
new file mode 100644
index 0000000000..26e10037e6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.cc
@@ -0,0 +1,290 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_xyb.h"
+
+#include <string.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_xyb.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_group_border.h"
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/sanitizers.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Broadcast;
+
+void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
+                          const OpsinParams& opsin_params) {
+  PROFILER_FUNC;
+  JXL_CHECK_IMAGE_INITIALIZED(*inout, Rect(*inout));
+
+  const size_t xsize = inout->xsize();  // not padded
+  RunOnPool(
+      pool, 0, inout->ysize(), ThreadPool::SkipInit(),
+      [&](const int task, const int thread) {
+        const size_t y = task;
+
+        // Faster than adding via ByteOffset at end of loop.
+        float* JXL_RESTRICT row0 = inout->PlaneRow(0, y);
+        float* JXL_RESTRICT row1 = inout->PlaneRow(1, y);
+        float* JXL_RESTRICT row2 = inout->PlaneRow(2, y);
+
+        const HWY_FULL(float) d;
+
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          const auto in_opsin_x = Load(d, row0 + x);
+          const auto in_opsin_y = Load(d, row1 + x);
+          const auto in_opsin_b = Load(d, row2 + x);
+          JXL_COMPILER_FENCE;
+          auto linear_r = Undefined(d);
+          auto linear_g = Undefined(d);
+          auto linear_b = Undefined(d);
+          XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
+                   &linear_r, &linear_g, &linear_b);
+
+          Store(linear_r, d, row0 + x);
+          Store(linear_g, d, row1 + x);
+          Store(linear_b, d, row2 + x);
+        }
+      },
+      "OpsinToLinear");
+}
+
+// Same, but not in-place.
+void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
+                   Image3F* JXL_RESTRICT linear,
+                   const OpsinParams& opsin_params) {
+  PROFILER_FUNC;
+
+  JXL_ASSERT(SameSize(rect, *linear));
+  JXL_CHECK_IMAGE_INITIALIZED(opsin, rect);
+
+  RunOnPool(
+      pool, 0, static_cast<int>(rect.ysize()), ThreadPool::SkipInit(),
+      [&](const int task, int /*thread*/) {
+        const size_t y = static_cast<size_t>(task);
+
+        // Faster than adding via ByteOffset at end of loop.
+        const float* JXL_RESTRICT row_opsin_0 = rect.ConstPlaneRow(opsin, 0, y);
+        const float* JXL_RESTRICT row_opsin_1 = rect.ConstPlaneRow(opsin, 1, y);
+        const float* JXL_RESTRICT row_opsin_2 = rect.ConstPlaneRow(opsin, 2, y);
+        float* JXL_RESTRICT row_linear_0 = linear->PlaneRow(0, y);
+        float* JXL_RESTRICT row_linear_1 = linear->PlaneRow(1, y);
+        float* JXL_RESTRICT row_linear_2 = linear->PlaneRow(2, y);
+
+        const HWY_FULL(float) d;
+
+        for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
+          const auto in_opsin_x = Load(d, row_opsin_0 + x);
+          const auto in_opsin_y = Load(d, row_opsin_1 + x);
+          const auto in_opsin_b = Load(d, row_opsin_2 + x);
+          JXL_COMPILER_FENCE;
+          auto linear_r = Undefined(d);
+          auto linear_g = Undefined(d);
+          auto linear_b = Undefined(d);
+          XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
+                   &linear_r, &linear_g, &linear_b);
+
+          Store(linear_r, d, row_linear_0 + x);
+          Store(linear_g, d, row_linear_1 + x);
+          Store(linear_b, d, row_linear_2 + x);
+        }
+      },
+      "OpsinToLinear(Rect)");
+  JXL_CHECK_IMAGE_INITIALIZED(*linear, rect);
+}
+
+// Transform YCbCr to RGB.
+// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
+void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
+  JXL_CHECK_IMAGE_INITIALIZED(ycbcr, rect);
+  const HWY_CAPPED(float, GroupBorderAssigner::kPaddingXRound) df;
+  const size_t S = Lanes(df);  // Step.
+
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  if ((xsize == 0) || (ysize == 0)) return;
+
+  // Full-range BT.601 as defined by JFIF Clause 7:
+  // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+  const auto c128 = Set(df, 128.0f / 255);
+  const auto crcr = Set(df, 1.402f);
+  const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
+  const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
+  const auto cbcb = Set(df, 1.772f);
+
+  for (size_t y = 0; y < ysize; y++) {
+    const float* y_row = rect.ConstPlaneRow(ycbcr, 1, y);
+    const float* cb_row = rect.ConstPlaneRow(ycbcr, 0, y);
+    const float* cr_row = rect.ConstPlaneRow(ycbcr, 2, y);
+    float* r_row = rect.PlaneRow(rgb, 0, y);
+    float* g_row = rect.PlaneRow(rgb, 1, y);
+    float* b_row = rect.PlaneRow(rgb, 2, y);
+    for (size_t x = 0; x < xsize; x += S) {
+      const auto y_vec = Load(df, y_row + x) + c128;
+      const auto cb_vec = Load(df, cb_row + x);
+      const auto cr_vec = Load(df, cr_row + x);
+      const auto r_vec = crcr * cr_vec + y_vec;
+      const auto g_vec = cgcr * cr_vec + cgcb * cb_vec + y_vec;
+      const auto b_vec = cbcb * cb_vec + y_vec;
+      Store(r_vec, df, r_row + x);
+      Store(g_vec, df, g_row + x);
+      Store(b_vec, df, b_row + x);
+    }
+  }
+  JXL_CHECK_IMAGE_INITIALIZED(*rgb, rect);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(OpsinToLinearInplace);
+void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
+                          const OpsinParams& opsin_params) {
+  return HWY_DYNAMIC_DISPATCH(OpsinToLinearInplace)(inout, pool, opsin_params);
+}
+
+HWY_EXPORT(OpsinToLinear);
+void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
+                   Image3F* JXL_RESTRICT linear,
+                   const OpsinParams& opsin_params) {
+  return HWY_DYNAMIC_DISPATCH(OpsinToLinear)(opsin, rect, pool, linear,
+                                             opsin_params);
+}
+
+HWY_EXPORT(YcbcrToRgb);
+void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
+  return HWY_DYNAMIC_DISPATCH(YcbcrToRgb)(ycbcr, rgb, rect);
+}
+
+HWY_EXPORT(HasFastXYBTosRGB8);
+bool HasFastXYBTosRGB8() { return HWY_DYNAMIC_DISPATCH(HasFastXYBTosRGB8)(); }
+
+HWY_EXPORT(FastXYBTosRGB8);
+void FastXYBTosRGB8(const Image3F& input, const Rect& input_rect,
+                    const Rect& output_buf_rect, const ImageF* alpha,
+                    const Rect& alpha_rect, bool is_rgba,
+                    uint8_t* JXL_RESTRICT output_buf, size_t xsize,
+                    size_t output_stride) {
+  return HWY_DYNAMIC_DISPATCH(FastXYBTosRGB8)(
+      input, input_rect, output_buf_rect, alpha, alpha_rect, is_rgba,
+      output_buf, xsize, output_stride);
+}
+
+void OpsinParams::Init(float intensity_target) {
+  InitSIMDInverseMatrix(GetOpsinAbsorbanceInverseMatrix(), inverse_opsin_matrix,
+                        intensity_target);
+  memcpy(opsin_biases, kNegOpsinAbsorbanceBiasRGB,
+         sizeof(kNegOpsinAbsorbanceBiasRGB));
+  memcpy(quant_biases, kDefaultQuantBias, sizeof(kDefaultQuantBias));
+  for (size_t c = 0; c < 4; c++) {
+    opsin_biases_cbrt[c] = cbrtf(opsin_biases[c]);
+  }
+}
+
+Status OutputEncodingInfo::Set(const CodecMetadata& metadata,
+                               const ColorEncoding& default_enc) {
+  const auto& im = metadata.transform_data.opsin_inverse_matrix;
+  float inverse_matrix[9];
+  memcpy(inverse_matrix, im.inverse_matrix, sizeof(inverse_matrix));
+  float intensity_target = metadata.m.IntensityTarget();
+  if (metadata.m.xyb_encoded) {
+    const auto& orig_color_encoding = metadata.m.color_encoding;
+    color_encoding = default_enc;
+    // Figure out if we can output to this color encoding.
+    do {
+      if (!orig_color_encoding.HaveFields()) break;
+      // TODO(veluca): keep in sync with dec_reconstruct.cc
+      if (!orig_color_encoding.tf.IsPQ() && !orig_color_encoding.tf.IsSRGB() &&
+          !orig_color_encoding.tf.IsGamma() &&
+          !orig_color_encoding.tf.IsLinear() &&
+          !orig_color_encoding.tf.IsHLG() && !orig_color_encoding.tf.IsDCI() &&
+          !orig_color_encoding.tf.Is709()) {
+        break;
+      }
+      if (orig_color_encoding.tf.IsGamma()) {
+        inverse_gamma = orig_color_encoding.tf.GetGamma();
+      }
+      if (orig_color_encoding.tf.IsDCI()) {
+        inverse_gamma = 1.0f / 2.6f;
+      }
+      if (orig_color_encoding.IsGray() &&
+          orig_color_encoding.white_point != WhitePoint::kD65) {
+        // TODO(veluca): figure out what should happen here.
+        break;
+      }
+
+      if ((orig_color_encoding.primaries != Primaries::kSRGB ||
+           orig_color_encoding.white_point != WhitePoint::kD65) &&
+          !orig_color_encoding.IsGray()) {
+        all_default_opsin = false;
+        float srgb_to_xyzd50[9];
+        const auto& srgb = ColorEncoding::SRGB(/*is_gray=*/false);
+        JXL_CHECK(PrimariesToXYZD50(
+            srgb.GetPrimaries().r.x, srgb.GetPrimaries().r.y,
+            srgb.GetPrimaries().g.x, srgb.GetPrimaries().g.y,
+            srgb.GetPrimaries().b.x, srgb.GetPrimaries().b.y,
+            srgb.GetWhitePoint().x, srgb.GetWhitePoint().y, srgb_to_xyzd50));
+        float xyzd50_to_original[9];
+        JXL_RETURN_IF_ERROR(PrimariesToXYZD50(
+            orig_color_encoding.GetPrimaries().r.x,
+            orig_color_encoding.GetPrimaries().r.y,
+            orig_color_encoding.GetPrimaries().g.x,
+            orig_color_encoding.GetPrimaries().g.y,
+            orig_color_encoding.GetPrimaries().b.x,
+            orig_color_encoding.GetPrimaries().b.y,
+            orig_color_encoding.GetWhitePoint().x,
+            orig_color_encoding.GetWhitePoint().y, xyzd50_to_original));
+        JXL_RETURN_IF_ERROR(Inv3x3Matrix(xyzd50_to_original));
+        float srgb_to_original[9];
+        MatMul(xyzd50_to_original, srgb_to_xyzd50, 3, 3, 3, srgb_to_original);
+        MatMul(srgb_to_original, im.inverse_matrix, 3, 3, 3, inverse_matrix);
+      }
+      color_encoding = orig_color_encoding;
+      color_encoding_is_original = true;
+      if (color_encoding.tf.IsPQ()) {
+        intensity_target = 10000;
+      }
+    } while (false);
+  } else {
+    color_encoding = metadata.m.color_encoding;
+  }
+  if (std::abs(intensity_target - 255.0) > 0.1f || !im.all_default) {
+    all_default_opsin = false;
+  }
+  InitSIMDInverseMatrix(inverse_matrix, opsin_params.inverse_opsin_matrix,
+                        intensity_target);
+  std::copy(std::begin(im.opsin_biases), std::end(im.opsin_biases),
+            opsin_params.opsin_biases);
+  for (int i = 0; i < 3; ++i) {
+    opsin_params.opsin_biases_cbrt[i] = cbrtf(opsin_params.opsin_biases[i]);
+  }
+  opsin_params.opsin_biases_cbrt[3] = opsin_params.opsin_biases[3] = 1;
+  std::copy(std::begin(im.quant_biases), std::end(im.quant_biases),
+            opsin_params.quant_biases);
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.h
new file mode 100644
index 0000000000..affdef11c1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/dec_xyb.h
@@ -0,0 +1,71 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_XYB_H_
+#define LIB_JXL_DEC_XYB_H_
+
+// XYB -> linear sRGB.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+// Parameters for XYB->sRGB conversion.
+struct OpsinParams {
+  float inverse_opsin_matrix[9 * 4];
+  float opsin_biases[4];
+  float opsin_biases_cbrt[4];
+  float quant_biases[4];
+  void Init(float intensity_target);
+};
+
+struct OutputEncodingInfo {
+  ColorEncoding color_encoding;
+  // Used for Gamma and DCI transfer functions.
+  float inverse_gamma;
+  // Contains an opsin matrix that converts to the primaries of the output
+  // encoding.
+  OpsinParams opsin_params;
+  // default_enc is used for xyb encoded image with ICC profile, in other
+  // cases it has no effect. Use linear sRGB or grayscale if ICC profile is
+  // not matched (not parsed or no matching ColorEncoding exists)
+  Status Set(const CodecMetadata& metadata, const ColorEncoding& default_enc);
+  bool all_default_opsin = true;
+  bool color_encoding_is_original = false;
+};
+
+// Converts `inout` (not padded) from opsin to linear sRGB in-place. Called from
+// per-pass postprocessing, hence parallelized.
+void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
+                          const OpsinParams& opsin_params);
+
+// Converts `opsin:rect` (opsin may be padded, rect.x0 must be vector-aligned)
+// to linear sRGB. Called from whole-frame encoder, hence parallelized.
+void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
+                   Image3F* JXL_RESTRICT linear,
+                   const OpsinParams& opsin_params);
+
+// Bt.601 to match JPEG/JFIF. Inputs are _signed_ YCbCr values suitable for DCT,
+// see F.1.1.3 of T.81 (because our data type is float, there is no need to add
+// a bias to make the values unsigned).
+void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect);
+
+bool HasFastXYBTosRGB8();
+void FastXYBTosRGB8(const Image3F& input, const Rect& input_rect,
+                    const Rect& output_buf_rect, const ImageF* alpha,
+                    const Rect& alpha_rect, bool is_rgba,
+                    uint8_t* JXL_RESTRICT output_buf, size_t xsize,
+                    size_t output_stride);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DEC_XYB_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode.cc
new file mode 100644
index 0000000000..78c7d8d8e8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode.cc
@@ -0,0 +1,2217 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "jxl/decode.h"
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/dec_reconstruct.h"
+#include "lib/jxl/decode_to_jpeg.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/memory_manager_internal.h"
+#include "lib/jxl/toc.h"
+
+#ifndef JPEGXL_MAJOR_VERSION
+#define JPEGXL_MAJOR_VERSION 0
+#define JPEGXL_MINOR_VERSION 5
+#define JPEGXL_PATCH_VERSION 0
+#endif
+
+namespace {
+
+// If set (by fuzzer) then some operations will fail, if those would require
+// allocating large objects. Actual memory usage might be two orders of
+// magnitude bigger.
+// TODO(eustas): this is a poor-mans replacement for memory-manager approach;
+//               remove, once memory-manager actually works.
+size_t memory_limit_base_ = 0;
+size_t cpu_limit_base_ = 0;
+size_t used_cpu_base_ = 0;
+
+bool CheckSizeLimit(size_t xsize, size_t ysize) {
+  if (!memory_limit_base_) return true;
+  if (xsize == 0 || ysize == 0) return true;
+  size_t num_pixels = xsize * ysize;
+  if (num_pixels / xsize != ysize) return false;  // overflow
+  if (num_pixels > memory_limit_base_) return false;
+  return true;
+}
+
+// Checks if a + b > size, taking possible integer overflow into account.
+bool OutOfBounds(size_t a, size_t b, size_t size) {
+  size_t pos = a + b;
+  if (pos > size) return true;
+  if (pos < a) return true;  // overflow happened
+  return false;
+}
+
+// Checks if a + b + c > size, taking possible integer overflow into account.
+bool OutOfBounds(size_t a, size_t b, size_t c, size_t size) {
+  size_t pos = a + b;
+  if (pos < b) return true;  // overflow happened
+  pos += c;
+  if (pos < c) return true;  // overflow happened
+  if (pos > size) return true;
+  return false;
+}
+
+bool SumOverflows(size_t a, size_t b, size_t c) {
+  size_t sum = a + b;
+  if (sum < b) return true;
+  sum += c;
+  if (sum < c) return true;
+  return false;
+}
+
+JXL_INLINE size_t InitialBasicInfoSizeHint() {
+  // Amount of bytes before the start of the codestream in the container format,
+  // assuming that the codestream is the first box after the signature and
+  // filetype boxes. 12 bytes signature box + 20 bytes filetype box + 16 bytes
+  // codestream box length + name + optional XLBox length.
+  const size_t container_header_size = 48;
+
+  // Worst-case amount of bytes for basic info of the JPEG XL codestream header,
+  // that is all information up to and including extra_channel_bits. Up to
+  // around 2 bytes signature + 8 bytes SizeHeader + 31 bytes ColorEncoding + 4
+  // bytes rest of ImageMetadata + 5 bytes part of ImageMetadata2.
+  // TODO(lode): recompute and update this value when alpha_bits is moved to
+  // extra channels info.
+  const size_t max_codestream_basic_info_size = 50;
+
+  return container_header_size + max_codestream_basic_info_size;
+}
+
+// Debug-printing failure macro similar to JXL_FAILURE, but for the status code
+// JXL_DEC_ERROR
+#ifdef JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                           \
+  (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+   ::jxl::Abort(), JXL_DEC_ERROR)
+#else  // JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                             \
+  (((JXL_DEBUG_ON_ERROR) &&                                                    \
+    ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \
+   JXL_DEC_ERROR)
+#endif  // JXL_CRASH_ON_ERROR
+
+JxlDecoderStatus ConvertStatus(JxlDecoderStatus status) { return status; }
+
+JxlDecoderStatus ConvertStatus(jxl::Status status) {
+  return status ? JXL_DEC_SUCCESS : JXL_DEC_ERROR;
+}
+
+JxlSignature ReadSignature(const uint8_t* buf, size_t len, size_t* pos) {
+  if (*pos >= len) return JXL_SIG_NOT_ENOUGH_BYTES;
+
+  buf += *pos;
+  len -= *pos;
+
+  // JPEG XL codestream: 0xff 0x0a
+  if (len >= 1 && buf[0] == 0xff) {
+    if (len < 2) {
+      return JXL_SIG_NOT_ENOUGH_BYTES;
+    } else if (buf[1] == jxl::kCodestreamMarker) {
+      *pos += 2;
+      return JXL_SIG_CODESTREAM;
+    } else {
+      return JXL_SIG_INVALID;
+    }
+  }
+
+  // JPEG XL container
+  if (len >= 1 && buf[0] == 0) {
+    if (len < 12) {
+      return JXL_SIG_NOT_ENOUGH_BYTES;
+    } else if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0xC && buf[4] == 'J' &&
+               buf[5] == 'X' && buf[6] == 'L' && buf[7] == ' ' &&
+               buf[8] == 0xD && buf[9] == 0xA && buf[10] == 0x87 &&
+               buf[11] == 0xA) {
+      *pos += 12;
+      return JXL_SIG_CONTAINER;
+    } else {
+      return JXL_SIG_INVALID;
+    }
+  }
+
+  return JXL_SIG_INVALID;
+}
+
+}  // namespace
+
+uint32_t JxlDecoderVersion(void) {
+  return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 +
+         JPEGXL_PATCH_VERSION;
+}
+
+JxlSignature JxlSignatureCheck(const uint8_t* buf, size_t len) {
+  size_t pos = 0;
+  return ReadSignature(buf, len, &pos);
+}
+
+namespace {
+
+size_t BitsPerChannel(JxlDataType data_type) {
+  switch (data_type) {
+    case JXL_TYPE_BOOLEAN:
+      return 1;
+    case JXL_TYPE_UINT8:
+      return 8;
+    case JXL_TYPE_UINT16:
+      return 16;
+    case JXL_TYPE_UINT32:
+      return 32;
+    case JXL_TYPE_FLOAT:
+      return 32;
+    case JXL_TYPE_FLOAT16:
+      return 16;
+      // No default, give compiler error if new type not handled.
+  }
+  return 0;  // Indicate invalid data type.
+}
+
+enum class DecoderStage : uint32_t {
+  kInited,    // Decoder created, no JxlDecoderProcessInput called yet
+  kStarted,   // Running JxlDecoderProcessInput calls
+  kFinished,  // Everything done, nothing left to process
+  kError,     // Error occurred, decoder object no longer usable
+};
+
+enum class FrameStage : uint32_t {
+  kHeader,      // Must parse frame header. dec->frame_start must be set up
+                // correctly already.
+  kTOC,         // Must parse TOC
+  kFull,        // Must parse full pixels
+  kFullOutput,  // Must output full pixels
+};
+
+// Manages the sections for the FrameDecoder based on input bytes received.
+struct Sections {
+  // sections_begin = position in the frame where the sections begin, after
+  // the frame header and TOC, so sections_begin = sum of frame header size and
+  // TOC size.
+  Sections(jxl::FrameDecoder* frame_dec, size_t frame_size,
+           size_t sections_begin)
+      : frame_dec_(frame_dec),
+        frame_size_(frame_size),
+        sections_begin_(sections_begin) {}
+
+  Sections(const Sections&) = delete;
+  Sections& operator=(const Sections&) = delete;
+  Sections(Sections&&) = delete;
+  Sections& operator=(Sections&&) = delete;
+
+  ~Sections() {
+    // Avoid memory leaks if the JXL decoder quits early and doesn't end up
+    // calling CloseInput().
+    CloseInput();
+  }
+
+  // frame_dec_ must have been Inited already, but not yet done ProcessSections.
+  JxlDecoderStatus Init() {
+    section_received.resize(frame_dec_->NumSections(), 0);
+
+    const auto& offsets = frame_dec_->SectionOffsets();
+    const auto& sizes = frame_dec_->SectionSizes();
+
+    // Ensure none of the sums of section offset and size overflow.
+    for (size_t i = 0; i < frame_dec_->NumSections(); i++) {
+      if (OutOfBounds(sections_begin_, offsets[i], sizes[i], frame_size_)) {
+        return JXL_API_ERROR("section out of bounds");
+      }
+    }
+
+    return JXL_DEC_SUCCESS;
+  }
+
+  // Sets the input data for the frame. The frame pointer must point to the
+  // beginning of the frame, size is the amount of bytes gotten so far and
+  // should increase with next calls until the full frame is loaded.
+  // TODO(lode): allow caller to provide only later chunks of memory when
+  // earlier sections are fully processed already.
+  void SetInput(const uint8_t* frame, size_t size) {
+    const auto& offsets = frame_dec_->SectionOffsets();
+    const auto& sizes = frame_dec_->SectionSizes();
+
+    for (size_t i = 0; i < frame_dec_->NumSections(); i++) {
+      if (section_received[i]) continue;
+      if (!OutOfBounds(sections_begin_, offsets[i], sizes[i], size)) {
+        section_received[i] = 1;
+        section_info.emplace_back(jxl::FrameDecoder::SectionInfo{nullptr, i});
+        section_status.emplace_back();
+      }
+    }
+    // Reset all the bitreaders, because the address of the frame pointer may
+    // change, even if it always represents the same frame start.
+    for (size_t i = 0; i < section_info.size(); i++) {
+      size_t id = section_info[i].id;
+      JXL_ASSERT(section_info[i].br == nullptr);
+      section_info[i].br = new jxl::BitReader(jxl::Span<const uint8_t>(
+          frame + sections_begin_ + offsets[id], sizes[id]));
+    }
+  }
+
+  JxlDecoderStatus CloseInput() {
+    bool out_of_bounds = false;
+    for (size_t i = 0; i < section_info.size(); i++) {
+      if (!section_info[i].br) continue;
+      if (!section_info[i].br->AllReadsWithinBounds()) {
+        // Mark out of bounds section, but keep closing and deleting the next
+        // ones as well.
+        out_of_bounds = true;
+      }
+      JXL_ASSERT(section_info[i].br->Close());
+      delete section_info[i].br;
+      section_info[i].br = nullptr;
+    }
+    if (out_of_bounds) {
+      // If any bit reader indicates out of bounds, it's an error, not just
+      // needing more input, since we ensure only bit readers containing
+      // a complete section are provided to the FrameDecoder.
+      return JXL_API_ERROR("frame out of bounds");
+    }
+    return JXL_DEC_SUCCESS;
+  }
+
+  // Not managed by us.
+  jxl::FrameDecoder* frame_dec_;
+
+  size_t frame_size_;
+  size_t sections_begin_;
+
+  std::vector<jxl::FrameDecoder::SectionInfo> section_info;
+  std::vector<jxl::FrameDecoder::SectionStatus> section_status;
+  std::vector<char> section_received;
+};
+
+/*
+Given list of frame references to storage slots, and storage slots in which this
+frame is saved, computes which frames are required to decode the frame at the
+given index and any frames after it. The frames on which this depends are
+returned as a vector of their indices, in no particular order. The given index
+must be smaller than saved_as.size(), and references.size() must equal
+saved_as.size(). Any frames beyond saved_as and references are considered
+unknown future frames and must be treated as if something depends on them.
+*/
+std::vector<size_t> GetFrameDependencies(size_t index,
+                                         const std::vector<int>& saved_as,
+                                         const std::vector<int>& references) {
+  JXL_ASSERT(references.size() == saved_as.size());
+  JXL_ASSERT(index < references.size());
+
+  std::vector<size_t> result;
+
+  constexpr size_t kNumStorage = 8;
+
+  // value which indicates nothing is stored in this storage slot
+  const size_t invalid = references.size();
+  // for each of the 8 storage slots, a vector that translates frame index to
+  // frame stored in this storage slot at this point, that is, the last
+  // frame that was stored in this slot before or at this index.
+  std::array<std::vector<size_t>, kNumStorage> storage;
+  for (size_t s = 0; s < kNumStorage; ++s) {
+    storage[s].resize(saved_as.size());
+    int mask = 1 << s;
+    size_t id = invalid;
+    for (size_t i = 0; i < saved_as.size(); ++i) {
+      if (saved_as[i] & mask) {
+        id = i;
+      }
+      storage[s][i] = id;
+    }
+  }
+
+  std::vector<char> seen(index + 1, 0);
+  std::vector<size_t> stack;
+  stack.push_back(index);
+  seen[index] = 1;
+
+  // For frames after index, assume they can depend on any of the 8 storage
+  // slots, so push the frame for each stored reference to the stack and result.
+  // All frames after index are treated as having unknown references and with
+  // the possibility that there are more frames after the last known.
+  // TODO(lode): take values of saved_as and references after index, and a
+  // input flag indicating if they are all frames of the image, to further
+  // optimize this.
+  for (size_t s = 0; s < kNumStorage; ++s) {
+    size_t frame_ref = storage[s][index];
+    if (frame_ref == invalid) continue;
+    if (seen[frame_ref]) continue;
+    stack.push_back(frame_ref);
+    seen[frame_ref] = 1;
+    result.push_back(frame_ref);
+  }
+
+  while (!stack.empty()) {
+    size_t frame_index = stack.back();
+    stack.pop_back();
+    if (frame_index == 0) continue;  // first frame cannot have references
+    for (size_t s = 0; s < kNumStorage; ++s) {
+      int mask = 1 << s;
+      if (!(references[frame_index] & mask)) continue;
+      size_t frame_ref = storage[s][frame_index - 1];
+      if (frame_ref == invalid) continue;
+      if (seen[frame_ref]) continue;
+      stack.push_back(frame_ref);
+      seen[frame_ref] = 1;
+      result.push_back(frame_ref);
+    }
+  }
+
+  return result;
+}
+
+}  // namespace
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct JxlDecoderStruct {
+  JxlDecoderStruct() = default;
+
+  JxlMemoryManager memory_manager;
+  std::unique_ptr<jxl::ThreadPool> thread_pool;
+
+  DecoderStage stage;
+
+  // Status of progression, internal.
+  bool got_signature;
+  bool first_codestream_seen;
+  // Indicates we know that we've seen the last codestream, however this is not
+  // guaranteed to be true for the last box because a jxl file may have multiple
+  // "jxlp" boxes and it is possible (and permitted) that the last one is not a
+  // final box that uses size 0 to indicate the end.
+  bool last_codestream_seen;
+  bool got_basic_info;
+  size_t header_except_icc_bits = 0;  // To skip everything before ICC.
+  bool got_all_headers;               // Codestream metadata headers.
+  bool post_headers;                  // Already decoding pixels.
+  jxl::ICCReader icc_reader;
+
+  // This means either we actually got the preview image, or determined we
+  // cannot get it or there is none.
+  bool got_preview_image;
+
+  // Position of next_in in the original file including box format if present
+  // (as opposed to position in the codestream)
+  size_t file_pos;
+  size_t box_begin;
+  size_t box_end;
+  bool skip_box;
+  // Begin and end of the content of the current codestream box. This could be
+  // a partial codestream box.
+  // codestream_begin 0 is used to indicate the begin is not yet known.
+  // codestream_end 0 is used to indicate uncapped (until end of file, for the
+  // last box if this box doesn't indicate its actual size).
+  // Not used if the file is a direct codestream.
+  size_t codestream_begin;
+  size_t codestream_end;
+
+  // Settings
+  bool keep_orientation;
+
+  // Bitfield, for which informative events (JXL_DEC_BASIC_INFO, etc...) the
+  // decoder returns a status. By default, do not return for any of the events,
+  // only return when the decoder cannot continue because it needs more input or
+  // output data.
+  int events_wanted;
+  int orig_events_wanted;
+
+  // Fields for reading the basic info from the header.
+  size_t basic_info_size_hint;
+  bool have_container;
+
+  // Whether the preview out buffer was set. It is possible for the buffer to
+  // be nullptr and buffer_set to be true, indicating it was deliberately
+  // set to nullptr.
+  bool preview_out_buffer_set;
+  // Idem for the image buffer.
+  bool image_out_buffer_set;
+
+  // Owned by the caller, buffers for DC image and full resolution images
+  void* preview_out_buffer;
+  void* image_out_buffer;
+  JxlImageOutCallback image_out_callback;
+  void* image_out_opaque;
+
+  size_t preview_out_size;
+  size_t image_out_size;
+
+  // TODO(lode): merge these?
+  JxlPixelFormat preview_out_format;
+  JxlPixelFormat image_out_format;
+
+  jxl::CodecMetadata metadata;
+  std::unique_ptr<jxl::ImageBundle> ib;
+  // ColorEncoding to use for xyb encoded image with ICC profile.
+  jxl::ColorEncoding default_enc;
+
+  std::unique_ptr<jxl::PassesDecoderState> passes_state;
+  std::unique_ptr<jxl::FrameDecoder> frame_dec;
+  std::unique_ptr<Sections> sections;
+  // The FrameDecoder is initialized, and not yet finalized
+  bool frame_dec_in_progress;
+
+  // headers and TOC for the current frame. When got_toc is true, this is
+  // always the frame header of the last frame of the current still series,
+  // that is, the displayed frame.
+  std::unique_ptr<jxl::FrameHeader> frame_header;
+
+  // Start of the current frame being processed, as offset from the beginning of
+  // the codestream.
+  size_t frame_start;
+  size_t frame_size;
+  FrameStage frame_stage;
+  // The currently processed frame is the last of the current composite still,
+  // and so must be returned as pixels
+  bool is_last_of_still;
+  // The currently processed frame is the last of the codestream
+  bool is_last_total;
+  // How many frames to skip.
+  size_t skip_frames;
+  // Skipping the current frame. May be false if skip_frames was just set to
+  // a positive value while already processing a current frame, then
+  // skipping_frame will be enabled only for the next frame.
+  bool skipping_frame;
+
+  // Amount of internal frames and external frames started. External frames are
+  // user-visible frames, internal frames includes all external frames and
+  // also invisible frames such as patches, blending-only and dc_level frames.
+  size_t internal_frames;
+  size_t external_frames;
+
+  // For each internal frame, which storage locations it references, and which
+  // storage locations it is stored in, using the bit mask as defined in
+  // FrameDecoder::References and FrameDecoder::SaveAs.
+  std::vector<int> frame_references;
+  std::vector<int> frame_saved_as;
+
+  // Translates external frame index to internal frame index. The external
+  // index is the index of user-visible frames. The internal index can be larger
+  // since non-visible frames (such as frames with patches, ...) are included.
+  std::vector<size_t> frame_external_to_internal;
+
+  // Whether the frame with internal index is required to decode the frame
+  // being skipped to or any frames after that. If no skipping is active,
+  // this vector is ignored. If the current internal frame index is beyond this
+  // vector, it must be treated as a required frame.
+  std::vector<char> frame_required;
+
+  // Codestream input data is stored here, when the decoder takes in and stores
+  // the user input bytes. If the decoder does not do that (e.g. in one-shot
+  // case), this field is unused.
+  // TODO(lode): avoid needing this field once the C++ decoder doesn't need
+  // all bytes at once, to save memory. Find alternative to std::vector doubling
+  // strategy to prevent some memory usage.
+  std::vector<uint8_t> codestream;
+
+  jxl::JxlToJpegDecoder jpeg_decoder;
+
+  // Position in the actual codestream, which codestream.begin() points to.
+  // Non-zero once earlier parts of the codestream vector have been erased.
+  size_t codestream_pos;
+
+  // Statistics which CodecInOut can keep
+  uint64_t dec_pixels;
+
+  const uint8_t* next_in;
+  size_t avail_in;
+};
+
+// TODO(zond): Make this depend on the data loaded into the decoder.
+JxlDecoderStatus JxlDecoderDefaultPixelFormat(const JxlDecoder* dec,
+                                              JxlPixelFormat* format) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+  *format = {4, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+  return JXL_DEC_SUCCESS;
+}
+
+void JxlDecoderReset(JxlDecoder* dec) {
+  dec->thread_pool.reset();
+  dec->stage = DecoderStage::kInited;
+  dec->got_signature = false;
+  dec->first_codestream_seen = false;
+  dec->last_codestream_seen = false;
+  dec->got_basic_info = false;
+  dec->header_except_icc_bits = 0;
+  dec->got_all_headers = false;
+  dec->post_headers = false;
+  dec->icc_reader.Reset();
+  dec->got_preview_image = false;
+  dec->file_pos = 0;
+  dec->box_begin = 0;
+  dec->box_end = 0;
+  dec->skip_box = false;
+  dec->codestream_pos = 0;
+  dec->codestream_begin = 0;
+  dec->codestream_end = 0;
+  dec->keep_orientation = false;
+  dec->events_wanted = 0;
+  dec->orig_events_wanted = 0;
+  dec->basic_info_size_hint = InitialBasicInfoSizeHint();
+  dec->have_container = 0;
+  dec->preview_out_buffer_set = false;
+  dec->image_out_buffer_set = false;
+  dec->preview_out_buffer = nullptr;
+  dec->image_out_buffer = nullptr;
+  dec->image_out_callback = nullptr;
+  dec->image_out_opaque = nullptr;
+  dec->preview_out_size = 0;
+  dec->image_out_size = 0;
+  dec->dec_pixels = 0;
+  dec->next_in = 0;
+  dec->avail_in = 0;
+
+  dec->passes_state.reset(nullptr);
+  dec->frame_dec.reset(nullptr);
+  dec->sections.reset(nullptr);
+  dec->frame_dec_in_progress = false;
+
+  dec->ib.reset();
+  dec->metadata = jxl::CodecMetadata();
+  dec->frame_header.reset(new jxl::FrameHeader(&dec->metadata));
+  dec->codestream.clear();
+
+  dec->frame_stage = FrameStage::kHeader;
+  dec->frame_start = 0;
+  dec->frame_size = 0;
+  dec->is_last_of_still = false;
+  dec->is_last_total = false;
+  dec->skip_frames = 0;
+  dec->skipping_frame = false;
+  dec->internal_frames = 0;
+  dec->external_frames = 0;
+  dec->frame_references.clear();
+  dec->frame_saved_as.clear();
+  dec->frame_external_to_internal.clear();
+  dec->frame_required.clear();
+}
+
+JxlDecoder* JxlDecoderCreate(const JxlMemoryManager* memory_manager) {
+  JxlMemoryManager local_memory_manager;
+  if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager))
+    return nullptr;
+
+  void* alloc =
+      jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlDecoder));
+  if (!alloc) return nullptr;
+  // Placement new constructor on allocated memory
+  JxlDecoder* dec = new (alloc) JxlDecoder();
+  dec->memory_manager = local_memory_manager;
+
+  JxlDecoderReset(dec);
+
+  return dec;
+}
+
+void JxlDecoderDestroy(JxlDecoder* dec) {
+  if (dec) {
+    // Call destructor directly since custom free function is used.
+    dec->~JxlDecoder();
+    jxl::MemoryManagerFree(&dec->memory_manager, dec);
+  }
+}
+
+void JxlDecoderRewind(JxlDecoder* dec) {
+  int keep_orientation = dec->keep_orientation;
+  int events_wanted = dec->orig_events_wanted;
+  std::vector<int> frame_references;
+  std::vector<int> frame_saved_as;
+  std::vector<size_t> frame_external_to_internal;
+  std::vector<char> frame_required;
+  frame_references.swap(dec->frame_references);
+  frame_saved_as.swap(dec->frame_saved_as);
+  frame_external_to_internal.swap(dec->frame_external_to_internal);
+  frame_required.swap(dec->frame_required);
+
+  JxlDecoderReset(dec);
+  dec->keep_orientation = keep_orientation;
+  dec->events_wanted = events_wanted;
+  dec->orig_events_wanted = events_wanted;
+  frame_references.swap(dec->frame_references);
+  frame_saved_as.swap(dec->frame_saved_as);
+  frame_external_to_internal.swap(dec->frame_external_to_internal);
+  frame_required.swap(dec->frame_required);
+}
+
+void JxlDecoderSkipFrames(JxlDecoder* dec, size_t amount) {
+  // Increment amount, rather than set it: making the amount smaller is
+  // impossible because the decoder may already have skipped frames required to
+  // decode earlier frames, and making the amount larger compared to an existing
+  // amount is impossible because if JxlDecoderSkipFrames is called in the
+  // middle of already skipping frames, the user cannot know how many frames
+  // have already been skipped internally so far so an absolute value cannot
+  // be defined.
+  dec->skip_frames += amount;
+
+  dec->frame_required.clear();
+  size_t next_frame = dec->external_frames + dec->skip_frames;
+
+  // A frame that has been seen before a rewind
+  if (next_frame < dec->frame_external_to_internal.size()) {
+    size_t internal_index = dec->frame_external_to_internal[next_frame];
+    if (internal_index < dec->frame_saved_as.size()) {
+      std::vector<size_t> deps = GetFrameDependencies(
+          internal_index, dec->frame_saved_as, dec->frame_references);
+
+      dec->frame_required.resize(internal_index + 1, 0);
+      for (size_t i = 0; i < deps.size(); i++) {
+        JXL_ASSERT(deps[i] < dec->frame_required.size());
+        dec->frame_required[deps[i]] = 1;
+      }
+    }
+  }
+}
+
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner,
+                            void* parallel_runner_opaque) {
+  if (dec->thread_pool) return JXL_API_ERROR("parallel runner already set");
+  dec->thread_pool.reset(
+      new jxl::ThreadPool(parallel_runner, parallel_runner_opaque));
+  return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderSizeHintBasicInfo(const JxlDecoder* dec) {
+  if (dec->got_basic_info) return 0;
+  return dec->basic_info_size_hint;
+}
+
+JxlDecoderStatus JxlDecoderSubscribeEvents(JxlDecoder* dec, int events_wanted) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_DEC_ERROR;  // Cannot subscribe to events after having started.
+  }
+  if (events_wanted & 63) {
+    return JXL_DEC_ERROR;  // Can only subscribe to informative events.
+  }
+  dec->events_wanted = events_wanted;
+  dec->orig_events_wanted = events_wanted;
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetKeepOrientation(JxlDecoder* dec,
+                                              JXL_BOOL keep_orientation) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_API_ERROR("Must set keep_orientation option before starting");
+  }
+  dec->keep_orientation = !!keep_orientation;
+  return JXL_DEC_SUCCESS;
+}
+
+namespace jxl {
+namespace {
+
+template <class T>
+bool CanRead(Span<const uint8_t> data, BitReader* reader, T* JXL_RESTRICT t) {
+  // Use a copy of the bit reader because CanRead advances bits.
+  BitReader reader2(data);
+  reader2.SkipBits(reader->TotalBitsConsumed());
+  bool result = Bundle::CanRead(&reader2, t);
+  JXL_ASSERT(reader2.Close());
+  return result;
+}
+
+// Returns JXL_DEC_SUCCESS if the full bundle was successfully read, status
+// indicating either error or need more input otherwise.
+template <class T>
+JxlDecoderStatus ReadBundle(Span<const uint8_t> data, BitReader* reader,
+                            T* JXL_RESTRICT t) {
+  if (!CanRead(data, reader, t)) {
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  if (!Bundle::Read(reader, t)) {
+    return JXL_DEC_ERROR;
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+#define JXL_API_RETURN_IF_ERROR(expr)               \
+  {                                                 \
+    JxlDecoderStatus status_ = ConvertStatus(expr); \
+    if (status_ != JXL_DEC_SUCCESS) return status_; \
+  }
+
+std::unique_ptr<BitReader, std::function<void(BitReader*)>> GetBitReader(
+    Span<const uint8_t> span) {
+  BitReader* reader = new BitReader(span);
+  return std::unique_ptr<BitReader, std::function<void(BitReader*)>>(
+      reader, [](BitReader* reader) {
+        // We can't allow Close to abort the program if the reader is out of
+        // bounds, or all return paths in the code, even those that already
+        // return failure, would have to manually call AllReadsWithinBounds().
+        // Invalid JXL codestream should not cause program to quit.
+        (void)reader->AllReadsWithinBounds();
+        (void)reader->Close();
+        delete reader;
+      });
+}
+
+JxlDecoderStatus JxlDecoderReadBasicInfo(JxlDecoder* dec, const uint8_t* in,
+                                         size_t size) {
+  size_t pos = 0;
+
+  // Check and skip the codestream signature
+  JxlSignature signature = ReadSignature(in, size, &pos);
+  if (signature == JXL_SIG_NOT_ENOUGH_BYTES) {
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  if (signature == JXL_SIG_CONTAINER) {
+    // There is a container signature where we expect a codestream, container
+    // is handled at a higher level already.
+    return JXL_API_ERROR("invalid: nested container");
+  }
+  if (signature != JXL_SIG_CODESTREAM) {
+    return JXL_API_ERROR("invalid signature");
+  }
+
+  Span<const uint8_t> span(in + pos, size - pos);
+  auto reader = GetBitReader(span);
+  JXL_API_RETURN_IF_ERROR(ReadBundle(span, reader.get(), &dec->metadata.size));
+
+  dec->metadata.m.nonserialized_only_parse_basic_info = true;
+  JXL_API_RETURN_IF_ERROR(ReadBundle(span, reader.get(), &dec->metadata.m));
+  dec->metadata.m.nonserialized_only_parse_basic_info = false;
+  dec->got_basic_info = true;
+  dec->basic_info_size_hint = 0;
+
+  if (!CheckSizeLimit(dec->metadata.size.xsize(), dec->metadata.size.ysize())) {
+    return JXL_API_ERROR("image is too large");
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+// Reads all codestream headers (but not frame headers)
+JxlDecoderStatus JxlDecoderReadAllHeaders(JxlDecoder* dec, const uint8_t* in,
+                                          size_t size) {
+  size_t pos = 0;
+
+  // Check and skip the codestream signature
+  JxlSignature signature = ReadSignature(in, size, &pos);
+  if (signature == JXL_SIG_CONTAINER) {
+    return JXL_API_ERROR("invalid: nested container");
+  }
+  if (signature != JXL_SIG_CODESTREAM) {
+    return JXL_API_ERROR("invalid signature");
+  }
+
+  Span<const uint8_t> span(in + pos, size - pos);
+  auto reader = GetBitReader(span);
+
+  if (dec->header_except_icc_bits != 0) {
+    // Headers were decoded already.
+    reader->SkipBits(dec->header_except_icc_bits);
+  } else {
+    SizeHeader dummy_size_header;
+    JXL_API_RETURN_IF_ERROR(ReadBundle(span, reader.get(), &dummy_size_header));
+
+    // We already decoded the metadata to dec->metadata.m, no reason to
+    // overwrite it, use a dummy metadata instead.
+    ImageMetadata dummy_metadata;
+    JXL_API_RETURN_IF_ERROR(ReadBundle(span, reader.get(), &dummy_metadata));
+
+    JXL_API_RETURN_IF_ERROR(
+        ReadBundle(span, reader.get(), &dec->metadata.transform_data));
+  }
+
+  dec->header_except_icc_bits = reader->TotalBitsConsumed();
+
+  if (dec->metadata.m.color_encoding.WantICC()) {
+    jxl::Status status = dec->icc_reader.Init(reader.get(), memory_limit_base_);
+    // Always check AllReadsWithinBounds, not all the C++ decoder implementation
+    // handles reader out of bounds correctly  yet (e.g. context map). Not
+    // checking AllReadsWithinBounds can cause reader->Close() to trigger an
+    // assert, but we don't want library to quit program for invalid codestream.
+    if (!reader->AllReadsWithinBounds()) {
+      return JXL_DEC_NEED_MORE_INPUT;
+    }
+    if (!status) {
+      if (status.code() == StatusCode::kNotEnoughBytes) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      // Other non-successful status is an error
+      return JXL_DEC_ERROR;
+    }
+    PaddedBytes icc;
+    status = dec->icc_reader.Process(reader.get(), &icc);
+    if (!status) {
+      if (status.code() == StatusCode::kNotEnoughBytes) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      // Other non-successful status is an error
+      return JXL_DEC_ERROR;
+    }
+    if (!dec->metadata.m.color_encoding.SetICCRaw(std::move(icc))) {
+      return JXL_DEC_ERROR;
+    }
+  }
+
+  dec->got_all_headers = true;
+  JXL_API_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+  dec->frame_start = pos + reader->TotalBitsConsumed() / jxl::kBitsPerByte;
+
+  if (!dec->passes_state) {
+    dec->passes_state.reset(new jxl::PassesDecoderState());
+  }
+
+  dec->default_enc =
+      ColorEncoding::LinearSRGB(dec->metadata.m.color_encoding.IsGray());
+
+  JXL_API_RETURN_IF_ERROR(dec->passes_state->output_encoding_info.Set(
+      dec->metadata, dec->default_enc));
+
+  return JXL_DEC_SUCCESS;
+}
+
+static size_t GetStride(const JxlDecoder* dec, const JxlPixelFormat& format,
+                        const jxl::ImageBundle* frame = nullptr) {
+  size_t xsize = dec->metadata.xsize();
+  if (!dec->keep_orientation && dec->metadata.m.orientation > 4) {
+    xsize = dec->metadata.ysize();
+  }
+  if (frame) {
+    xsize = dec->keep_orientation ? frame->xsize() : frame->oriented_xsize();
+  }
+  size_t stride = xsize * (BitsPerChannel(format.data_type) *
+                           format.num_channels / jxl::kBitsPerByte);
+  if (format.align > 1) {
+    stride = jxl::DivCeil(stride, format.align) * format.align;
+  }
+  return stride;
+}
+
+static JxlDecoderStatus ConvertImageInternal(const JxlDecoder* dec,
+                                             const jxl::ImageBundle& frame,
+                                             const JxlPixelFormat& format,
+                                             void* out_image, size_t out_size,
+                                             JxlImageOutCallback out_callback,
+                                             void* out_opaque) {
+  // TODO(lode): handle mismatch of RGB/grayscale color profiles and pixel data
+  // color/grayscale format
+  const auto& metadata = dec->metadata.m;
+
+  const size_t stride = GetStride(dec, format, &frame);
+
+  bool float_format = format.data_type == JXL_TYPE_FLOAT ||
+                      format.data_type == JXL_TYPE_FLOAT16;
+
+  jxl::Orientation undo_orientation = dec->keep_orientation
+                                          ? jxl::Orientation::kIdentity
+                                          : metadata.GetOrientation();
+  JXL_DASSERT(!dec->frame_dec || !dec->frame_dec->HasRGBBuffer());
+  jxl::Status status = jxl::ConvertToExternal(
+      frame, BitsPerChannel(format.data_type), float_format,
+      format.num_channels, format.endianness, stride, dec->thread_pool.get(),
+      out_image, out_size, /*out_callback=*/out_callback,
+      /*out_opaque=*/out_opaque, undo_orientation);
+
+  return status ? JXL_DEC_SUCCESS : JXL_DEC_ERROR;
+}
+
+// Parses the FrameHeader and the total frame_size, given the initial bytes
+// of the frame up to and including the TOC.
+// TODO(lode): merge this with FrameDecoder
+JxlDecoderStatus ParseFrameHeader(jxl::FrameHeader* frame_header,
+                                  const uint8_t* in, size_t size, size_t pos,
+                                  bool is_preview, size_t* frame_size,
+                                  int* saved_as) {
+  if (pos >= size) {
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  Span<const uint8_t> span(in + pos, size - pos);
+  auto reader = GetBitReader(span);
+
+  frame_header->nonserialized_is_preview = is_preview;
+  jxl::Status status = DecodeFrameHeader(reader.get(), frame_header);
+  jxl::FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+  if (!CheckSizeLimit(frame_dim.xsize_upsampled_padded,
+                      frame_dim.ysize_upsampled_padded)) {
+    return JXL_API_ERROR("frame is too large");
+  }
+
+  if (status.code() == StatusCode::kNotEnoughBytes) {
+    // TODO(lode): prevent asking for way too much input bytes in case of
+    // invalid header that the decoder thinks is a very long user extension
+    // instead. Example: fields can currently print something like this:
+    // "../lib/jxl/fields.cc:416: Skipping 71467322-bit extension(s)"
+    // Maybe fields.cc should return error in the above case rather than
+    // print a message.
+    return JXL_DEC_NEED_MORE_INPUT;
+  } else if (!status) {
+    return JXL_API_ERROR("invalid frame header");
+  }
+
+  // Read TOC.
+  uint64_t groups_total_size;
+  const bool has_ac_global = true;
+  const size_t toc_entries =
+      NumTocEntries(frame_dim.num_groups, frame_dim.num_dc_groups,
+                    frame_header->passes.num_passes, has_ac_global);
+
+  std::vector<uint64_t> group_offsets;
+  std::vector<uint32_t> group_sizes;
+  status = ReadGroupOffsets(toc_entries, reader.get(), &group_offsets,
+                            &group_sizes, &groups_total_size);
+
+  // TODO(lode): we're actually relying on AllReadsWithinBounds() here
+  // instead of on status.code(), change the internal TOC C++ code to
+  // correctly set the status.code() instead so we can rely on that one.
+  if (!reader->AllReadsWithinBounds() ||
+      status.code() == StatusCode::kNotEnoughBytes) {
+    return JXL_DEC_NEED_MORE_INPUT;
+  } else if (!status) {
+    return JXL_API_ERROR("invalid toc entries");
+  }
+
+  JXL_DASSERT((reader->TotalBitsConsumed() % kBitsPerByte) == 0);
+  JXL_API_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+  size_t header_size = (reader->TotalBitsConsumed() >> 3);
+  *frame_size = header_size + groups_total_size;
+
+  if (saved_as != nullptr) {
+    *saved_as = FrameDecoder::SavedAs(*frame_header);
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+// TODO(eustas): no CodecInOut -> no image size reinforcement -> possible OOM.
+JxlDecoderStatus JxlDecoderProcessInternal(JxlDecoder* dec, const uint8_t* in,
+                                           size_t size) {
+  // If no parallel runner is set, use the default
+  // TODO(lode): move this initialization to an appropriate location once the
+  // runner is used to decode pixels.
+  if (!dec->thread_pool) {
+    dec->thread_pool.reset(new jxl::ThreadPool(nullptr, nullptr));
+  }
+
+  // No matter what events are wanted, the basic info is always required.
+  if (!dec->got_basic_info) {
+    JxlDecoderStatus status = JxlDecoderReadBasicInfo(dec, in, size);
+    if (status != JXL_DEC_SUCCESS) return status;
+  }
+
+  if (dec->events_wanted & JXL_DEC_BASIC_INFO) {
+    dec->events_wanted &= ~JXL_DEC_BASIC_INFO;
+    return JXL_DEC_BASIC_INFO;
+  }
+
+  if (!dec->got_all_headers) {
+    JxlDecoderStatus status = JxlDecoderReadAllHeaders(dec, in, size);
+    if (status != JXL_DEC_SUCCESS) return status;
+  }
+
+  if (dec->events_wanted & JXL_DEC_EXTENSIONS) {
+    dec->events_wanted &= ~JXL_DEC_EXTENSIONS;
+    if (dec->metadata.m.extensions != 0) {
+      return JXL_DEC_EXTENSIONS;
+    }
+  }
+
+  if (dec->events_wanted & JXL_DEC_COLOR_ENCODING) {
+    dec->events_wanted &= ~JXL_DEC_COLOR_ENCODING;
+    return JXL_DEC_COLOR_ENCODING;
+  }
+
+  dec->post_headers = true;
+
+  // Decode to pixels, only if required for the events the user wants.
+  if (!dec->got_preview_image) {
+    // Parse the preview, or at least its TOC to be able to skip the frame, if
+    // any frame or image decoding is desired.
+    bool parse_preview =
+        (dec->events_wanted &
+         (JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+    if (!dec->metadata.m.have_preview) {
+      // There is no preview, mark this as done and go to next step
+      dec->got_preview_image = true;
+    } else if (!parse_preview) {
+      // No preview parsing needed, mark this step as done
+      dec->got_preview_image = true;
+    } else {
+      // Want to decode the preview, not just skip the frame
+      bool want_preview = (dec->events_wanted & JXL_DEC_PREVIEW_IMAGE);
+      size_t frame_size;
+      size_t pos = dec->frame_start;
+      dec->frame_header.reset(new FrameHeader(&dec->metadata));
+      JxlDecoderStatus status = ParseFrameHeader(dec->frame_header.get(), in,
+                                                 size, pos, true, &frame_size,
+                                                 /*saved_as=*/nullptr);
+      if (status != JXL_DEC_SUCCESS) return status;
+      if (OutOfBounds(pos, frame_size, size)) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+
+      if (want_preview && !dec->preview_out_buffer_set) {
+        return JXL_DEC_NEED_PREVIEW_OUT_BUFFER;
+      }
+
+      jxl::Span<const uint8_t> compressed(in + dec->frame_start,
+                                          size - dec->frame_start);
+      auto reader = GetBitReader(compressed);
+      jxl::DecompressParams dparams;
+      dparams.preview = want_preview ? jxl::Override::kOn : jxl::Override::kOff;
+      jxl::ImageBundle ib(&dec->metadata.m);
+      PassesDecoderState preview_dec_state;
+      JXL_API_RETURN_IF_ERROR(preview_dec_state.output_encoding_info.Set(
+          dec->metadata,
+          ColorEncoding::LinearSRGB(dec->metadata.m.color_encoding.IsGray())));
+      if (!DecodeFrame(dparams, &preview_dec_state, dec->thread_pool.get(),
+                       reader.get(), &ib, dec->metadata,
+                       /*constraints=*/nullptr,
+                       /*is_preview=*/true)) {
+        return JXL_API_ERROR("decoding preview failed");
+      }
+
+      // Set frame_start to the first non-preview frame.
+      dec->frame_start += DivCeil(reader->TotalBitsConsumed(), kBitsPerByte);
+      dec->got_preview_image = true;
+
+      if (want_preview) {
+        if (dec->preview_out_buffer) {
+          JxlDecoderStatus status = ConvertImageInternal(
+              dec, ib, dec->preview_out_format, dec->preview_out_buffer,
+              dec->preview_out_size, /*out_callback=*/nullptr,
+              /*out_opaque=*/nullptr);
+          if (status != JXL_DEC_SUCCESS) return status;
+        }
+        return JXL_DEC_PREVIEW_IMAGE;
+      }
+    }
+  }
+
+  // Handle frames
+  for (;;) {
+    if (!(dec->events_wanted & (JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME))) {
+      break;
+    }
+    if (dec->frame_stage == FrameStage::kHeader && dec->is_last_total) {
+      break;
+    }
+
+    if (dec->frame_stage == FrameStage::kHeader) {
+      size_t pos = dec->frame_start - dec->codestream_pos;
+      if (pos >= size) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      dec->frame_header.reset(new FrameHeader(&dec->metadata));
+      int saved_as = 0;
+      JxlDecoderStatus status =
+          ParseFrameHeader(dec->frame_header.get(), in, size, pos,
+                           /*is_preview=*/false, &dec->frame_size, &saved_as);
+      if (status != JXL_DEC_SUCCESS) return status;
+
+      // is last in entire codestream
+      dec->is_last_total = dec->frame_header->is_last;
+      // is last of current still
+      dec->is_last_of_still =
+          dec->is_last_total || dec->frame_header->animation_frame.duration > 0;
+
+      const size_t internal_frame_index = dec->internal_frames;
+      const size_t external_frame_index = dec->external_frames;
+      if (dec->is_last_of_still) dec->external_frames++;
+      dec->internal_frames++;
+
+      dec->frame_stage = FrameStage::kTOC;
+
+      if (dec->skip_frames > 0) {
+        dec->skipping_frame = true;
+        if (dec->is_last_of_still) {
+          dec->skip_frames--;
+        }
+      } else {
+        dec->skipping_frame = false;
+      }
+
+      if (external_frame_index >= dec->frame_external_to_internal.size()) {
+        dec->frame_external_to_internal.push_back(internal_frame_index);
+        JXL_ASSERT(dec->frame_external_to_internal.size() ==
+                   external_frame_index + 1);
+      }
+
+      if (internal_frame_index >= dec->frame_saved_as.size()) {
+        dec->frame_saved_as.push_back(saved_as);
+        JXL_ASSERT(dec->frame_saved_as.size() == internal_frame_index + 1);
+
+        // add the value 0xff (which means all references) to new slots: we only
+        // know the references of the frame at FinalizeFrame, and fill in the
+        // correct values there. As long as this information is not known, the
+        // worst case where the frame depends on all storage slots is assumed.
+        dec->frame_references.push_back(0xff);
+        JXL_ASSERT(dec->frame_references.size() == internal_frame_index + 1);
+      }
+
+      if (dec->skipping_frame) {
+        // Whether this frame could be referenced by any future frame: either
+        // because it's a frame saved for blending or patches, or because it's
+        // a DC frame.
+        bool referenceable =
+            dec->frame_header->CanBeReferenced() ||
+            dec->frame_header->frame_type == FrameType::kDCFrame;
+        if (internal_frame_index < dec->frame_required.size() &&
+            !dec->frame_required[internal_frame_index]) {
+          referenceable = false;
+        }
+        if (!referenceable) {
+          // Skip all decoding for this frame, since the user is skipping this
+          // frame and no future frames can reference it.
+          dec->frame_stage = FrameStage::kHeader;
+          dec->frame_start += dec->frame_size;
+          continue;
+        }
+      }
+
+      if ((dec->events_wanted & JXL_DEC_FRAME) && dec->is_last_of_still) {
+        // Only return this for the last of a series of stills: patches frames
+        // etc... before this one do not contain the correct information such
+        // as animation timing, ...
+        if (!dec->skipping_frame) {
+          return JXL_DEC_FRAME;
+        }
+      }
+    }
+
+    if (dec->frame_stage == FrameStage::kTOC) {
+      size_t pos = dec->frame_start - dec->codestream_pos;
+      if (pos >= size) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      Span<const uint8_t> span(in + pos, size - pos);
+      auto reader = GetBitReader(span);
+
+      if (!dec->passes_state) {
+        dec->passes_state.reset(new jxl::PassesDecoderState());
+      }
+      if (!dec->ib) {
+        dec->ib.reset(new jxl::ImageBundle(&dec->metadata.m));
+      }
+
+      dec->frame_dec.reset(new FrameDecoder(
+          dec->passes_state.get(), dec->metadata, dec->thread_pool.get()));
+
+      // If JPEG reconstruction is wanted and possible, set the jpeg_data of
+      // the ImageBundle.
+      if (!dec->jpeg_decoder.SetImageBundleJpegData(dec->ib.get()))
+        return JXL_DEC_ERROR;
+
+      jxl::Status status = dec->frame_dec->InitFrame(
+          reader.get(), dec->ib.get(), /*is_preview=*/false,
+          /*allow_partial_frames=*/false, /*allow_partial_dc_global=*/false);
+      if (!status) JXL_API_RETURN_IF_ERROR(status);
+
+      size_t sections_begin =
+          DivCeil(reader->TotalBitsConsumed(), kBitsPerByte);
+
+      dec->sections.reset(
+          new Sections(dec->frame_dec.get(), dec->frame_size, sections_begin));
+      JXL_API_RETURN_IF_ERROR(dec->sections->Init());
+
+      // If we don't need pixels, we can skip actually decoding the frames
+      // (kFull / kFullOut). By not updating frame_stage, none of
+      // these stages will execute, and the loop will continue from the next
+      // frame.
+      if (dec->events_wanted & JXL_DEC_FULL_IMAGE) {
+        dec->frame_dec_in_progress = true;
+        dec->frame_stage = FrameStage::kFull;
+      }
+    }
+
+    bool return_full_image = false;
+
+    if (dec->frame_stage == FrameStage::kFull) {
+      if (dec->events_wanted & JXL_DEC_FULL_IMAGE) {
+        if (!dec->image_out_buffer_set && (!dec->jpeg_decoder.IsOutputSet() ||
+                                           dec->ib->jpeg_data == nullptr) &&
+            dec->is_last_of_still) {
+          // TODO(lode): remove the dec->is_last_of_still condition if the
+          // frame decoder needs the image buffer as working space for decoding
+          // non-visible or blending frames too
+          if (!dec->skipping_frame) {
+            return JXL_DEC_NEED_IMAGE_OUT_BUFFER;
+          }
+        }
+      }
+
+      if (dec->image_out_buffer_set && !!dec->image_out_buffer &&
+          dec->image_out_format.data_type == JXL_TYPE_UINT8 &&
+          dec->image_out_format.num_channels >= 3) {
+        bool is_rgba = dec->image_out_format.num_channels == 4;
+        dec->frame_dec->MaybeSetRGB8OutputBuffer(
+            reinterpret_cast<uint8_t*>(dec->image_out_buffer),
+            GetStride(dec, dec->image_out_format), is_rgba,
+            !dec->keep_orientation);
+      }
+
+      const bool little_endian =
+          dec->image_out_format.endianness == JXL_LITTLE_ENDIAN ||
+          (dec->image_out_format.endianness == JXL_NATIVE_ENDIAN &&
+           IsLittleEndian());
+      bool swap_endianness = little_endian != IsLittleEndian();
+
+      // TODO(lode): Support more formats than just native endian float32 for
+      // the low-memory callback path
+      if (dec->image_out_buffer_set && !!dec->image_out_callback &&
+          dec->image_out_format.data_type == JXL_TYPE_FLOAT &&
+          dec->image_out_format.num_channels >= 3 && !swap_endianness &&
+          dec->frame_dec_in_progress) {
+        bool is_rgba = dec->image_out_format.num_channels == 4;
+        dec->frame_dec->MaybeSetFloatCallback(
+            [dec](const float* pixels, size_t x, size_t y, size_t num_pixels) {
+              dec->image_out_callback(dec->image_out_opaque, x, y, num_pixels,
+                                      pixels);
+            },
+            is_rgba, !dec->keep_orientation);
+      }
+
+      size_t pos = dec->frame_start - dec->codestream_pos;
+      if (pos >= size) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      dec->sections->SetInput(in + pos, size - pos);
+
+      if (cpu_limit_base_ != 0) {
+        FrameDimensions frame_dim = dec->frame_header->ToFrameDimensions();
+        // No overflow, checked in ParseHeader.
+        size_t num_pixels = frame_dim.xsize * frame_dim.ysize;
+        if (used_cpu_base_ + num_pixels < used_cpu_base_) {
+          return JXL_API_ERROR("used too much CPU");
+        }
+        used_cpu_base_ += num_pixels;
+        if (used_cpu_base_ > cpu_limit_base_) {
+          return JXL_API_ERROR("used too much CPU");
+        }
+      }
+
+      jxl::Status status =
+          dec->frame_dec->ProcessSections(dec->sections->section_info.data(),
+                                          dec->sections->section_info.size(),
+                                          dec->sections->section_status.data());
+      JXL_API_RETURN_IF_ERROR(dec->sections->CloseInput());
+      if (status.IsFatalError()) {
+        return JXL_API_ERROR("decoding frame failed");
+      }
+
+      // TODO(lode): allow next_in to move forward if sections from the
+      // beginning of the stream have been processed
+
+      if (status.code() == StatusCode::kNotEnoughBytes ||
+          dec->sections->section_info.size() < dec->frame_dec->NumSections()) {
+        // Not all sections have been processed yet
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+
+      size_t internal_index = dec->internal_frames - 1;
+      JXL_ASSERT(dec->frame_references.size() > internal_index);
+      // Always fill this in, even if it was already written, it could be that
+      // this frame was skipped before and set to 255, while only now we know
+      // the true value.
+      dec->frame_references[internal_index] = dec->frame_dec->References();
+      if (!dec->frame_dec->FinalizeFrame()) {
+        return JXL_API_ERROR("decoding frame failed");
+      }
+      dec->frame_dec_in_progress = false;
+      dec->frame_stage = FrameStage::kFullOutput;
+    }
+
+    if (dec->frame_stage == FrameStage::kFullOutput) {
+      if (dec->is_last_of_still) {
+        if (dec->events_wanted & JXL_DEC_FULL_IMAGE) {
+          dec->events_wanted &= ~JXL_DEC_FULL_IMAGE;
+          return_full_image = true;
+        }
+
+        // Frame finished, restore the events_wanted with the per-frame events
+        // from orig_events_wanted, in case there is a next frame.
+        dec->events_wanted |=
+            (dec->orig_events_wanted & (JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME));
+
+        // If no output buffer was set, we merely return the JXL_DEC_FULL_IMAGE
+        // status without outputting pixels.
+        if (dec->jpeg_decoder.IsOutputSet() && dec->ib->jpeg_data != nullptr) {
+          JxlDecoderStatus status =
+              dec->jpeg_decoder.WriteOutput(*dec->ib->jpeg_data);
+          if (status != JXL_DEC_SUCCESS) return status;
+        } else if (return_full_image && dec->image_out_buffer_set) {
+          if (!dec->frame_dec->HasRGBBuffer()) {
+            // Copy pixels if desired.
+            JxlDecoderStatus status = ConvertImageInternal(
+                dec, *dec->ib, dec->image_out_format, dec->image_out_buffer,
+                dec->image_out_size, dec->image_out_callback,
+                dec->image_out_opaque);
+            if (status != JXL_DEC_SUCCESS) return status;
+          }
+          dec->image_out_buffer_set = false;
+        }
+      }
+    }
+
+    // The pixels have been output or are not needed, do not keep them in
+    // memory here.
+    dec->ib.reset();
+    dec->frame_stage = FrameStage::kHeader;
+    dec->frame_start += dec->frame_size;
+    if (return_full_image && !dec->skipping_frame) {
+      return JXL_DEC_FULL_IMAGE;
+    }
+  }
+
+  dec->stage = DecoderStage::kFinished;
+  // Return success, this means there is nothing more to do.
+  return JXL_DEC_SUCCESS;
+}
+
+}  // namespace
+}  // namespace jxl
+
+JxlDecoderStatus JxlDecoderSetInput(JxlDecoder* dec, const uint8_t* data,
+                                    size_t size) {
+  if (dec->next_in) return JXL_DEC_ERROR;
+
+  dec->next_in = data;
+  dec->avail_in = size;
+  return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderReleaseInput(JxlDecoder* dec) {
+  size_t result = dec->avail_in;
+  dec->next_in = nullptr;
+  dec->avail_in = 0;
+  return result;
+}
+
+JxlDecoderStatus JxlDecoderSetJPEGBuffer(JxlDecoder* dec, uint8_t* data,
+                                         size_t size) {
+  return dec->jpeg_decoder.SetOutputBuffer(data, size);
+}
+
+size_t JxlDecoderReleaseJPEGBuffer(JxlDecoder* dec) {
+  return dec->jpeg_decoder.ReleaseOutputBuffer();
+}
+
+JxlDecoderStatus JxlDecoderProcessInput(JxlDecoder* dec) {
+  const uint8_t** next_in = &dec->next_in;
+  size_t* avail_in = &dec->avail_in;
+  if (dec->stage == DecoderStage::kInited) {
+    dec->stage = DecoderStage::kStarted;
+  }
+  if (dec->stage == DecoderStage::kError) {
+    return JXL_API_ERROR(
+        "Cannot keep using decoder after it encountered an error, use "
+        "JxlDecoderReset to reset it");
+  }
+  if (dec->stage == DecoderStage::kFinished) {
+    return JXL_API_ERROR(
+        "Cannot keep using decoder after it finished, use JxlDecoderReset to "
+        "reset it");
+  }
+
+  if (!dec->got_signature) {
+    JxlSignature sig = JxlSignatureCheck(*next_in, *avail_in);
+    if (sig == JXL_SIG_INVALID) return JXL_API_ERROR("invalid signature");
+    if (sig == JXL_SIG_NOT_ENOUGH_BYTES) return JXL_DEC_NEED_MORE_INPUT;
+
+    dec->got_signature = true;
+
+    if (sig == JXL_SIG_CONTAINER) {
+      dec->have_container = 1;
+    }
+  }
+
+  // Available codestream bytes, may differ from *avail_in if there is another
+  // box behind the current position, in the dec->have_container case.
+  size_t csize = *avail_in;
+
+  if (dec->have_container) {
+    /*
+    Process bytes as follows:
+    *) find the box(es) containing the codestream
+    *) support codestream split over multiple partial boxes
+    *) avoid copying bytes to the codestream vector if the decoding will be
+     one-shot, when the user already provided everything contiguously in
+     memory
+    *) copy to codestream vector, and update next_in so user can delete the data
+    on their side, once we know it's not oneshot. This relieves the user from
+    continuing to store the data.
+    *) also copy to codestream if one-shot but the codestream is split across
+    multiple boxes: this copying can be avoided in the future if the C++
+    decoder is updated for streaming, but for now it requires all consecutive
+    data at once.
+    */
+
+    if (dec->skip_box) {
+      // Amount of remaining bytes in the box that is being skipped.
+      size_t remaining = dec->box_end - dec->file_pos;
+      if (*avail_in < remaining) {
+        // Don't have the full box yet, skip all we have so far
+        dec->file_pos += *avail_in;
+        *next_in += *avail_in;
+        *avail_in -= *avail_in;
+        return JXL_DEC_NEED_MORE_INPUT;
+      } else {
+        // Full box available, skip all its remaining bytes
+        dec->file_pos += remaining;
+        *next_in += remaining;
+        *avail_in -= remaining;
+        dec->skip_box = false;
+      }
+    }
+
+    if (dec->first_codestream_seen && !dec->last_codestream_seen &&
+        dec->codestream_end != 0 && dec->file_pos < dec->codestream_end &&
+        dec->file_pos + *avail_in >= dec->codestream_end &&
+        !dec->codestream.empty()) {
+      // dec->file_pos in a codestream, not in surrounding box format bytes, but
+      // the end of the current codestream part is in the current input, and
+      // boxes that can contain a next part of the codestream could be present.
+      // Therefore, store the known codestream part, and ensure processing of
+      // boxes below will trigger. This is only done if
+      // !dec->codestream.empty(), that is, we're already streaming.
+
+      // Size of the codestream, excluding potential boxes that come after it.
+      csize = *avail_in;
+      if (dec->codestream_end && csize > dec->codestream_end - dec->file_pos) {
+        csize = dec->codestream_end - dec->file_pos;
+      }
+      dec->codestream.insert(dec->codestream.end(), *next_in, *next_in + csize);
+      dec->file_pos += csize;
+      *next_in += csize;
+      *avail_in -= csize;
+    }
+
+    if (dec->jpeg_decoder.IsParsingBox()) {
+      // We are inside a JPEG reconstruction box.
+      JxlDecoderStatus recon_result =
+          dec->jpeg_decoder.Process(next_in, avail_in);
+      if (recon_result == JXL_DEC_JPEG_RECONSTRUCTION) {
+        // If successful JPEG reconstruction, return the success if the user
+        // cares about it, otherwise continue.
+        if (dec->events_wanted & recon_result) {
+          dec->events_wanted &= ~recon_result;
+          return recon_result;
+        }
+      } else {
+        // If anything else, return the result.
+        return recon_result;
+      }
+    }
+
+    if (!dec->last_codestream_seen &&
+        (dec->codestream_begin == 0 ||
+         (dec->codestream_end != 0 && dec->file_pos >= dec->codestream_end))) {
+      size_t pos = 0;
+      // after this for loop, either we should be in a part of the data that is
+      // codestream (not boxes), or have returned that we need more input.
+      for (;;) {
+        const uint8_t* in = *next_in;
+        size_t size = *avail_in;
+        if (size == pos) {
+          // If the remaining size is 0, we are exactly after a full box. We
+          // can't know for sure if this is the last box or not since more bytes
+          // can follow, but do not return NEED_MORE_INPUT, instead break and
+          // let the codestream-handling code determine if we need more.
+          break;
+        }
+        if (OutOfBounds(pos, 8, size)) {
+          dec->basic_info_size_hint =
+              InitialBasicInfoSizeHint() + pos + 8 - dec->file_pos;
+          return JXL_DEC_NEED_MORE_INPUT;
+        }
+        size_t box_start = pos;
+        // Box size, including this header itself.
+        uint64_t box_size = LoadBE32(in + pos);
+        char type[5] = {0};
+        memcpy(type, in + pos + 4, 4);
+        pos += 8;
+        if (box_size == 1) {
+          if (OutOfBounds(pos, 8, size)) return JXL_DEC_NEED_MORE_INPUT;
+          box_size = LoadBE64(in + pos);
+          pos += 8;
+        }
+        size_t header_size = pos - box_start;
+        if (box_size > 0 && box_size < header_size) {
+          return JXL_API_ERROR("invalid box size");
+        }
+        if (SumOverflows(dec->file_pos, pos, box_size)) {
+          return JXL_API_ERROR("Box size overflow");
+        }
+        size_t contents_size =
+            (box_size == 0) ? 0 : (box_size - pos + box_start);
+
+        dec->box_begin = box_start;
+        dec->box_end = dec->file_pos + box_start + box_size;
+        if (strcmp(type, "jxlc") == 0 || strcmp(type, "jxlp") == 0) {
+          size_t codestream_size = contents_size;
+          // Whether this is the last codestream box, either when it is a jxlc
+          // box, or when it is a jxlp box that has the final bit set.
+          // The codestream is either contained within a single jxlc box, or
+          // within one or more jxlp boxes. The final jxlp box is marked as last
+          // by setting the high bit of its 4-byte box-index value.
+          bool last_codestream = false;
+          if (strcmp(type, "jxlp") == 0) {
+            if (OutOfBounds(pos, 4, size)) return JXL_DEC_NEED_MORE_INPUT;
+            if (box_size != 0 && contents_size < 4) {
+              return JXL_API_ERROR("jxlp box too small to contain index");
+            }
+            codestream_size -= 4;
+            size_t jxlp_index = LoadBE32(in + pos);
+            pos += 4;
+            // The high bit of jxlp_index indicates whether this is the last
+            // jxlp box.
+            if (jxlp_index & 0x80000000) last_codestream = true;
+          } else if (strcmp(type, "jxlc") == 0) {
+            last_codestream = true;
+          }
+          if (!last_codestream && box_size == 0) {
+            return JXL_API_ERROR(
+                "final box has unbounded size, but is a non-final codestream "
+                "box");
+          }
+          dec->first_codestream_seen = true;
+          if (last_codestream) dec->last_codestream_seen = true;
+          if (dec->codestream_begin != 0 && dec->codestream.empty()) {
+            // We've already seen a codestream part, so it's a stream spanning
+            // multiple boxes.
+            // We have no choice but to copy contents to the codestream
+            // vector to make it a contiguous stream for the C++ decoder.
+            // This appends the previous codestream box that we had seen to
+            // dec->codestream.
+            if (dec->codestream_begin < dec->file_pos) {
+              return JXL_API_ERROR("earlier codestream box out of range");
+            }
+            size_t begin = dec->codestream_begin - dec->file_pos;
+            size_t end = dec->codestream_end - dec->file_pos;
+            JXL_ASSERT(end <= *avail_in);
+            dec->codestream.insert(dec->codestream.end(), *next_in + begin,
+                                   *next_in + end);
+          }
+          dec->codestream_begin = dec->file_pos + pos;
+          dec->codestream_end =
+              (box_size == 0) ? 0 : (dec->codestream_begin + codestream_size);
+          size_t avail_codestream_size =
+              (box_size == 0)
+                  ? (size - pos)
+                  : std::min<size_t>(size - pos, box_size - pos + box_start);
+          // If already appending codestream, append what we have here too
+          if (!dec->codestream.empty()) {
+            size_t begin = pos;
+            size_t end =
+                std::min<size_t>(*avail_in, begin + avail_codestream_size);
+            dec->codestream.insert(dec->codestream.end(), *next_in + begin,
+                                   *next_in + end);
+            pos += (end - begin);
+            dec->file_pos += pos;
+            *next_in += pos;
+            *avail_in -= pos;
+            pos = 0;
+            // TODO(lode): check if this should break always instead, and
+            // process what we have of the codestream so far, to support
+            // progressive decoding, and get events such as basic info faster.
+            // The user could have given 1.5 boxes here, and the first one could
+            // contain useful parts of codestream that can already be processed.
+            // Similar to several other exact avail_size checks. This may not
+            // need to be changed here, but instead at the point in this for
+            // loop where it returns "NEED_MORE_INPUT", it could instead break
+            // and allow decoding what we have of the codestream so far.
+            if (*avail_in == 0) break;
+          } else {
+            // skip only the header, so next_in points to the start of this new
+            // codestream part, for the one-shot case where user data is not
+            // (yet) copied to dec->codestream.
+            dec->file_pos += pos;
+            *next_in += pos;
+            *avail_in -= pos;
+            pos = 0;
+            // Update pos to be after the box contents with codestream
+            if (avail_codestream_size == *avail_in) {
+              break;  // the rest is codestream, this loop is done
+            }
+            pos += avail_codestream_size;
+          }
+        } else if ((JPEGXL_ENABLE_TRANSCODE_JPEG) &&
+                   (dec->orig_events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) &&
+                   strcmp(type, "jbrd") == 0) {
+          // This is a new JPEG reconstruction metadata box.
+          dec->jpeg_decoder.StartBox(box_size, contents_size);
+          dec->file_pos += pos;
+          *next_in += pos;
+          *avail_in -= pos;
+          pos = 0;
+          JxlDecoderStatus recon_result =
+              dec->jpeg_decoder.Process(next_in, avail_in);
+          if (recon_result == JXL_DEC_JPEG_RECONSTRUCTION) {
+            // If successful JPEG reconstruction, return the success if the user
+            // cares about it, otherwise continue.
+            if (dec->events_wanted & recon_result) {
+              dec->events_wanted &= ~recon_result;
+              return recon_result;
+            }
+          } else {
+            // If anything else, return the result.
+            return recon_result;
+          }
+        } else {
+          if (box_size == 0) {
+            // Final box with unknown size, but it's not a codestream box, so
+            // nothing more to do.
+            if (!dec->first_codestream_seen) {
+              return JXL_API_ERROR("didn't find any codestream box");
+            }
+            break;
+          }
+          if (OutOfBounds(pos, contents_size, size)) {
+            dec->skip_box = true;
+            dec->file_pos += pos;
+            *next_in += pos;
+            *avail_in -= pos;
+            // Indicate how many more bytes needed starting from *next_in.
+            dec->basic_info_size_hint = InitialBasicInfoSizeHint() + pos +
+                                        contents_size - dec->file_pos;
+            return JXL_DEC_NEED_MORE_INPUT;
+          }
+          pos += contents_size;
+          if (!(dec->codestream.empty() && dec->first_codestream_seen)) {
+            // Last box no longer needed since we have copied the codestream
+            // buffer, remove from input so user can release memory.
+            dec->file_pos += pos;
+            *next_in += pos;
+            *avail_in -= pos;
+            pos = 0;
+          }
+        }
+      }
+    }
+
+    // Size of the codestream, excluding potential boxes that come after it.
+    csize = *avail_in;
+    if (dec->codestream_end && csize > dec->codestream_end - dec->file_pos) {
+      csize = dec->codestream_end - dec->file_pos;
+    }
+  }
+
+  // Whether we are taking the input directly from the user (oneshot case,
+  // without copying bytes), or appending parts of input to dec->codestream
+  // (streaming)
+  bool detected_streaming = !dec->codestream.empty();
+  JxlDecoderStatus result;
+  JXL_DASSERT(csize <= *avail_in);
+
+  if (detected_streaming) {
+    dec->codestream.insert(dec->codestream.end(), *next_in, *next_in + csize);
+    dec->file_pos += csize;
+    *next_in += csize;
+    *avail_in -= csize;
+    result = jxl::JxlDecoderProcessInternal(dec, dec->codestream.data(),
+                                            dec->codestream.size());
+  } else {
+    // No data copied to codestream buffer yet, the user input may contain the
+    // full codestream.
+    result = jxl::JxlDecoderProcessInternal(dec, *next_in, csize);
+    // Copy the user's input bytes to the codestream once we are able to and
+    // it is needed. Before we got the basic info, we're still parsing the box
+    // format instead. If the result is not JXL_DEC_NEED_MORE_INPUT, then
+    // there is no reason yet to copy since the user may have a full buffer
+    // allowing one-shot. Once JXL_DEC_NEED_MORE_INPUT occurred at least once,
+    // start copying over the codestream bytes and allow user to free them
+    // instead. Next call, detected_streaming will be true.
+    if (dec->got_basic_info && result == JXL_DEC_NEED_MORE_INPUT) {
+      dec->codestream.insert(dec->codestream.end(), *next_in, *next_in + csize);
+      dec->file_pos += csize;
+      *next_in += csize;
+      *avail_in -= csize;
+    }
+  }
+
+  return result;
+}
+
+JxlDecoderStatus JxlDecoderGetBasicInfo(const JxlDecoder* dec,
+                                        JxlBasicInfo* info) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+  if (info) {
+    const jxl::ImageMetadata& meta = dec->metadata.m;
+
+    info->have_container = dec->have_container;
+    info->xsize = dec->metadata.size.xsize();
+    info->ysize = dec->metadata.size.ysize();
+    info->uses_original_profile = !meta.xyb_encoded;
+
+    info->bits_per_sample = meta.bit_depth.bits_per_sample;
+    info->exponent_bits_per_sample = meta.bit_depth.exponent_bits_per_sample;
+
+    info->have_preview = meta.have_preview;
+    info->have_animation = meta.have_animation;
+    // TODO(janwas): intrinsic_size
+    info->orientation = static_cast<JxlOrientation>(meta.orientation);
+
+    if (!dec->keep_orientation) {
+      if (info->orientation >= JXL_ORIENT_TRANSPOSE) {
+        std::swap(info->xsize, info->ysize);
+      }
+      info->orientation = JXL_ORIENT_IDENTITY;
+    }
+
+    info->intensity_target = meta.IntensityTarget();
+    info->min_nits = meta.tone_mapping.min_nits;
+    info->relative_to_max_display = meta.tone_mapping.relative_to_max_display;
+    info->linear_below = meta.tone_mapping.linear_below;
+
+    const jxl::ExtraChannelInfo* alpha = meta.Find(jxl::ExtraChannel::kAlpha);
+    if (alpha != nullptr) {
+      info->alpha_bits = alpha->bit_depth.bits_per_sample;
+      info->alpha_exponent_bits = alpha->bit_depth.exponent_bits_per_sample;
+      info->alpha_premultiplied = alpha->alpha_associated;
+    } else {
+      info->alpha_bits = 0;
+      info->alpha_exponent_bits = 0;
+      info->alpha_premultiplied = 0;
+    }
+
+    info->num_color_channels =
+        meta.color_encoding.GetColorSpace() == jxl::ColorSpace::kGray ? 1 : 3;
+
+    info->num_extra_channels = meta.num_extra_channels;
+
+    if (info->have_preview) {
+      info->preview.xsize = dec->metadata.m.preview_size.xsize();
+      info->preview.ysize = dec->metadata.m.preview_size.ysize();
+    }
+
+    if (info->have_animation) {
+      info->animation.tps_numerator = dec->metadata.m.animation.tps_numerator;
+      info->animation.tps_denominator =
+          dec->metadata.m.animation.tps_denominator;
+      info->animation.num_loops = dec->metadata.m.animation.num_loops;
+      info->animation.have_timecodes = dec->metadata.m.animation.have_timecodes;
+    }
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelInfo(const JxlDecoder* dec,
+                                               size_t index,
+                                               JxlExtraChannelInfo* info) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+  const std::vector<jxl::ExtraChannelInfo>& channels =
+      dec->metadata.m.extra_channel_info;
+
+  if (index >= channels.size()) return JXL_DEC_ERROR;  // out of bounds
+  const jxl::ExtraChannelInfo& channel = channels[index];
+
+  info->type = static_cast<JxlExtraChannelType>(channel.type);
+  info->bits_per_sample = channel.bit_depth.bits_per_sample;
+  info->exponent_bits_per_sample =
+      channel.bit_depth.floating_point_sample
+          ? channel.bit_depth.exponent_bits_per_sample
+          : 0;
+  info->dim_shift = channel.dim_shift;
+  info->name_length = channel.name.size();
+  info->alpha_associated = channel.alpha_associated;
+  info->spot_color[0] = channel.spot_color[0];
+  info->spot_color[1] = channel.spot_color[1];
+  info->spot_color[2] = channel.spot_color[2];
+  info->spot_color[3] = channel.spot_color[3];
+  info->cfa_channel = channel.cfa_channel;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelName(const JxlDecoder* dec,
+                                               size_t index, char* name,
+                                               size_t size) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+  const std::vector<jxl::ExtraChannelInfo>& channels =
+      dec->metadata.m.extra_channel_info;
+
+  if (index >= channels.size()) return JXL_DEC_ERROR;  // out of bounds
+  const jxl::ExtraChannelInfo& channel = channels[index];
+
+  // Also need null-termination character
+  if (channel.name.size() + 1 > size) return JXL_DEC_ERROR;
+
+  memcpy(name, channel.name.c_str(), channel.name.size() + 1);
+
+  return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+// Gets the jxl::ColorEncoding for the desired target, and checks errors.
+// Returns the object regardless of whether the actual color space is in ICC,
+// but ensures that if the color encoding is not the encoding from the
+// codestream header metadata, it cannot require ICC profile.
+JxlDecoderStatus GetColorEncodingForTarget(
+    const JxlDecoder* dec, const JxlPixelFormat* format,
+    JxlColorProfileTarget target, const jxl::ColorEncoding** encoding) {
+  if (!dec->got_all_headers) return JXL_DEC_NEED_MORE_INPUT;
+  *encoding = nullptr;
+  if (target == JXL_COLOR_PROFILE_TARGET_DATA && dec->metadata.m.xyb_encoded) {
+    *encoding = &dec->passes_state->output_encoding_info.color_encoding;
+  } else {
+    *encoding = &dec->metadata.m.color_encoding;
+  }
+  return JXL_DEC_SUCCESS;
+}
+}  // namespace
+
+JxlDecoderStatus JxlDecoderGetColorAsEncodedProfile(
+    const JxlDecoder* dec, const JxlPixelFormat* format,
+    JxlColorProfileTarget target, JxlColorEncoding* color_encoding) {
+  const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+  JxlDecoderStatus status =
+      GetColorEncodingForTarget(dec, format, target, &jxl_color_encoding);
+  if (status) return status;
+
+  if (jxl_color_encoding->WantICC())
+    return JXL_DEC_ERROR;  // Indicate no encoded profile available.
+
+  if (color_encoding) {
+    ConvertInternalToExternalColorEncoding(*jxl_color_encoding, color_encoding);
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetICCProfileSize(const JxlDecoder* dec,
+                                             const JxlPixelFormat* format,
+                                             JxlColorProfileTarget target,
+                                             size_t* size) {
+  const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+  JxlDecoderStatus status =
+      GetColorEncodingForTarget(dec, format, target, &jxl_color_encoding);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (jxl_color_encoding->WantICC()) {
+    jxl::ColorSpace color_space =
+        dec->metadata.m.color_encoding.GetColorSpace();
+    if (color_space == jxl::ColorSpace::kUnknown ||
+        color_space == jxl::ColorSpace::kXYB) {
+      // This indicates there's no ICC profile available
+      // TODO(lode): for the XYB case, do we want to craft an ICC profile that
+      // represents XYB as an RGB profile? It may be possible, but not with
+      // only 1D transfer functions.
+      return JXL_DEC_ERROR;
+    }
+  }
+
+  if (size) {
+    *size = jxl_color_encoding->ICC().size();
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetColorAsICCProfile(const JxlDecoder* dec,
+                                                const JxlPixelFormat* format,
+                                                JxlColorProfileTarget target,
+                                                uint8_t* icc_profile,
+                                                size_t size) {
+  size_t wanted_size;
+  // This also checks the NEED_MORE_INPUT and the unknown/xyb cases
+  JxlDecoderStatus status =
+      JxlDecoderGetICCProfileSize(dec, format, target, &wanted_size);
+  if (status != JXL_DEC_SUCCESS) return status;
+  if (size < wanted_size) return JXL_API_ERROR("ICC profile output too small");
+
+  const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+  status = GetColorEncodingForTarget(dec, format, target, &jxl_color_encoding);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  memcpy(icc_profile, jxl_color_encoding->ICC().data(),
+         jxl_color_encoding->ICC().size());
+
+  return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+// Returns the amount of bits needed for getting memory buffer size, and does
+// all error checking required for size checking and format validity.
+JxlDecoderStatus PrepareSizeCheck(const JxlDecoder* dec,
+                                  const JxlPixelFormat* format, size_t* bits) {
+  if (!dec->got_basic_info) {
+    // Don't know image dimensions yet, cannot check for valid size.
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  if (format->num_channels > 4) {
+    return JXL_API_ERROR("More than 4 channels not supported");
+  }
+  if (format->num_channels < 3 && !dec->metadata.m.color_encoding.IsGray()) {
+    return JXL_API_ERROR("Grayscale output not possible for color image");
+  }
+  if (format->data_type == JXL_TYPE_BOOLEAN) {
+    return JXL_API_ERROR("Boolean data type not yet supported");
+  }
+  if (format->data_type == JXL_TYPE_UINT32) {
+    return JXL_API_ERROR("uint32 data type not yet supported");
+  }
+
+  *bits = BitsPerChannel(format->data_type);
+
+  if (*bits == 0) {
+    return JXL_API_ERROR("Invalid data type");
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+}  // namespace
+
+JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec) {
+  if (!dec->image_out_buffer) return JXL_DEC_ERROR;
+  if (!dec->sections || dec->sections->section_info.empty()) {
+    return JXL_DEC_ERROR;
+  }
+  if (!dec->frame_dec || !dec->frame_dec_in_progress) {
+    return JXL_DEC_ERROR;
+  }
+  if (!dec->frame_dec->HasDecodedDC()) {
+    // FrameDecoder::Fush currently requires DC to have been decoded already
+    // to work correctly.
+    return JXL_DEC_ERROR;
+  }
+  if (dec->frame_header->encoding != jxl::FrameEncoding::kVarDCT) {
+    // Flushing does not yet work correctly if the frame uses modular encoding.
+    return JXL_DEC_ERROR;
+  }
+  if (dec->metadata.m.num_extra_channels > 0) {
+    // Flushing does not yet work correctly if there are extra channels, which
+    // use modular
+    return JXL_DEC_ERROR;
+  }
+
+  if (!dec->frame_dec->Flush()) {
+    return JXL_DEC_ERROR;
+  }
+
+  if (dec->frame_dec->HasRGBBuffer()) {
+    return JXL_DEC_SUCCESS;
+  }
+
+  // Temporarily shrink `dec->ib` to the actual size of the full image to call
+  // ConvertImageInternal.
+  size_t xsize = dec->ib->xsize();
+  size_t ysize = dec->ib->ysize();
+  dec->ib->ShrinkTo(dec->metadata.size.xsize(), dec->metadata.size.ysize());
+  JxlDecoderStatus status = jxl::ConvertImageInternal(
+      dec, *dec->ib, dec->image_out_format, dec->image_out_buffer,
+      dec->image_out_size,
+      /*out_callback=*/nullptr, /*out_opaque=*/nullptr);
+  dec->ib->ShrinkTo(xsize, ysize);
+  if (status != JXL_DEC_SUCCESS) return status;
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+  size_t bits;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  size_t xsize = dec->metadata.oriented_preview_xsize(dec->keep_orientation);
+  size_t ysize = dec->metadata.oriented_preview_ysize(dec->keep_orientation);
+
+  size_t row_size =
+      jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte);
+  if (format->align > 1) {
+    row_size = jxl::DivCeil(row_size, format->align) * format->align;
+  }
+  *size = row_size * ysize;
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreviewOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size) {
+  if (!dec->got_basic_info || !dec->metadata.m.have_preview ||
+      !(dec->orig_events_wanted & JXL_DEC_PREVIEW_IMAGE)) {
+    return JXL_API_ERROR("No preview out buffer needed at this time");
+  }
+
+  size_t min_size;
+  // This also checks whether the format is valid and supported and basic info
+  // is available.
+  JxlDecoderStatus status =
+      JxlDecoderPreviewOutBufferSize(dec, format, &min_size);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (size < min_size) return JXL_DEC_ERROR;
+
+  dec->preview_out_buffer_set = true;
+  dec->preview_out_buffer = buffer;
+  dec->preview_out_size = size;
+  dec->preview_out_format = *format;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderDCOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+  size_t bits;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  size_t xsize = jxl::DivCeil(
+      dec->metadata.oriented_xsize(dec->keep_orientation), jxl::kBlockDim);
+  size_t ysize = jxl::DivCeil(
+      dec->metadata.oriented_ysize(dec->keep_orientation), jxl::kBlockDim);
+
+  size_t row_size =
+      jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte);
+  if (format->align > 1) {
+    row_size = jxl::DivCeil(row_size, format->align) * format->align;
+  }
+  *size = row_size * ysize;
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetDCOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size) {
+  // No buffer set: this feature is deprecated
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderImageOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+  size_t bits;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  size_t row_size =
+      jxl::DivCeil(dec->metadata.oriented_xsize(dec->keep_orientation) *
+                       format->num_channels * bits,
+                   jxl::kBitsPerByte);
+  if (format->align > 1) {
+    row_size = jxl::DivCeil(row_size, format->align) * format->align;
+  }
+  *size = row_size * dec->metadata.oriented_ysize(dec->keep_orientation);
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetImageOutBuffer(JxlDecoder* dec,
+                                             const JxlPixelFormat* format,
+                                             void* buffer, size_t size) {
+  if (!dec->got_basic_info || !(dec->orig_events_wanted & JXL_DEC_FULL_IMAGE)) {
+    return JXL_API_ERROR("No image out buffer needed at this time");
+  }
+  if (dec->image_out_buffer_set && !!dec->image_out_callback) {
+    return JXL_API_ERROR(
+        "Cannot change from image out callback to image out buffer");
+  }
+  size_t min_size;
+  // This also checks whether the format is valid and supported and basic info
+  // is available.
+  JxlDecoderStatus status =
+      JxlDecoderImageOutBufferSize(dec, format, &min_size);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (size < min_size) return JXL_DEC_ERROR;
+
+  dec->image_out_buffer_set = true;
+  dec->image_out_buffer = buffer;
+  dec->image_out_size = size;
+  dec->image_out_format = *format;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetImageOutCallback(JxlDecoder* dec,
+                                               const JxlPixelFormat* format,
+                                               JxlImageOutCallback callback,
+                                               void* opaque) {
+  if (dec->image_out_buffer_set && !!dec->image_out_buffer) {
+    return JXL_API_ERROR(
+        "Cannot change from image out buffer to image out callback");
+  }
+
+  // Perform error checking for invalid format.
+  size_t bits_dummy;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits_dummy);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  dec->image_out_buffer_set = true;
+  dec->image_out_callback = callback;
+  dec->image_out_opaque = opaque;
+  dec->image_out_format = *format;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetFrameHeader(const JxlDecoder* dec,
+                                          JxlFrameHeader* header) {
+  if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+    return JXL_API_ERROR("no frame header available");
+  }
+  const auto& metadata = dec->metadata.m;
+  if (metadata.have_animation) {
+    header->duration = dec->frame_header->animation_frame.duration;
+    if (metadata.animation.have_timecodes) {
+      header->timecode = dec->frame_header->animation_frame.timecode;
+    }
+  }
+  header->name_length = dec->frame_header->name.size();
+  header->is_last = dec->frame_header->is_last;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetFrameName(const JxlDecoder* dec, char* name,
+                                        size_t size) {
+  if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+    return JXL_API_ERROR("no frame header available");
+  }
+  if (size < dec->frame_header->name.size() + 1) {
+    return JXL_API_ERROR("too small frame name output buffer");
+  }
+  memcpy(name, dec->frame_header->name.c_str(),
+         dec->frame_header->name.size() + 1);
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetPreferredColorProfile(
+    JxlDecoder* dec, const JxlColorEncoding* color_encoding) {
+  if (!dec->got_all_headers) {
+    return JXL_API_ERROR("color info not yet available");
+  }
+  if (dec->post_headers) {
+    return JXL_API_ERROR("too late to set the color encoding");
+  }
+  if (dec->metadata.m.color_encoding.IsGray() !=
+      (color_encoding->color_space == JXL_COLOR_SPACE_GRAY)) {
+    return JXL_API_ERROR("grayscale mismatch");
+  }
+  if (color_encoding->color_space == JXL_COLOR_SPACE_UNKNOWN ||
+      color_encoding->color_space == JXL_COLOR_SPACE_XYB) {
+    return JXL_API_ERROR("only RGB or grayscale output supported");
+  }
+
+  JXL_API_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding(
+      *color_encoding, &dec->default_enc));
+  JXL_API_RETURN_IF_ERROR(dec->passes_state->output_encoding_info.Set(
+      dec->metadata, dec->default_enc));
+  return JXL_DEC_SUCCESS;
+}
+
+// This function is "package-private". It is only used by fuzzer to avoid
+// running cases that are too memory / CPU hungry. Limitations are applied
+// at mid-level API. In the future high-level API would also include the
+// means of limiting / throttling memory / CPU usage.
+void SetDecoderMemoryLimitBase_(size_t memory_limit_base) {
+  memory_limit_base_ = memory_limit_base;
+  // Allow 5 x max_image_size processing units; every frame is accounted
+  // as W x H CPU processing units, so there could be numerous small frames
+  // or few larger ones.
+  cpu_limit_base_ = 5 * memory_limit_base;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode_test.cc
new file mode 100644
index 0000000000..f1acc4a1e0
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode_test.cc
@@ -0,0 +1,2926 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "jxl/decode.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "jxl/decode_cxx.h"
+#include "jxl/resizable_parallel_runner_cxx.h"
+#include "jxl/thread_parallel_runner_cxx.h"
+#include "lib/extras/codec.h"
+#include "lib/extras/codec_jpg.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_gamma_correct.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+#include "tools/box/box.h"
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+void AppendU32BE(uint32_t u32, jxl::PaddedBytes* bytes) {
+  bytes->push_back(u32 >> 24);
+  bytes->push_back(u32 >> 16);
+  bytes->push_back(u32 >> 8);
+  bytes->push_back(u32 >> 0);
+}
+
+bool Near(double expected, double value, double max_dist) {
+  double dist = expected > value ? expected - value : value - expected;
+  return dist <= max_dist;
+}
+
+// Loads a Big-Endian float
+float LoadBEFloat(const uint8_t* p) {
+  uint32_t u = LoadBE32(p);
+  float result;
+  memcpy(&result, &u, 4);
+  return result;
+}
+
+// Loads a Little-Endian float
+float LoadLEFloat(const uint8_t* p) {
+  uint32_t u = LoadLE32(p);
+  float result;
+  memcpy(&result, &u, 4);
+  return result;
+}
+
+// Based on highway scalar implementation, for testing
+float LoadFloat16(uint16_t bits16) {
+  const uint32_t sign = bits16 >> 15;
+  const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
+  const uint32_t mantissa = bits16 & 0x3FF;
+
+  // Subnormal or zero
+  if (biased_exp == 0) {
+    const float subnormal = (1.0f / 16384) * (mantissa * (1.0f / 1024));
+    return sign ? -subnormal : subnormal;
+  }
+
+  // Normalized: convert the representation directly (faster than ldexp/tables).
+  const uint32_t biased_exp32 = biased_exp + (127 - 15);
+  const uint32_t mantissa32 = mantissa << (23 - 10);
+  const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
+
+  float result;
+  memcpy(&result, &bits32, 4);
+  return result;
+}
+
+float LoadLEFloat16(const uint8_t* p) {
+  uint16_t bits16 = LoadLE16(p);
+  return LoadFloat16(bits16);
+}
+
+float LoadBEFloat16(const uint8_t* p) {
+  uint16_t bits16 = LoadBE16(p);
+  return LoadFloat16(bits16);
+}
+
+size_t GetPrecision(JxlDataType data_type) {
+  switch (data_type) {
+    case JXL_TYPE_BOOLEAN:
+      return 1;
+    case JXL_TYPE_UINT8:
+      return 8;
+    case JXL_TYPE_UINT16:
+      return 16;
+    case JXL_TYPE_UINT32:
+      return 32;
+    case JXL_TYPE_FLOAT:
+      // Floating point mantissa precision
+      return 24;
+    case JXL_TYPE_FLOAT16:
+      return 11;
+  }
+  JXL_ASSERT(false);  // unknown type
+}
+
+size_t GetDataBits(JxlDataType data_type) {
+  switch (data_type) {
+    case JXL_TYPE_BOOLEAN:
+      return 1;
+    case JXL_TYPE_UINT8:
+      return 8;
+    case JXL_TYPE_UINT16:
+      return 16;
+    case JXL_TYPE_UINT32:
+      return 32;
+    case JXL_TYPE_FLOAT:
+      return 32;
+    case JXL_TYPE_FLOAT16:
+      return 16;
+  }
+  JXL_ASSERT(false);  // unknown type
+}
+
+// What type of codestream format in the boxes to use for testing
+enum CodeStreamBoxFormat {
+  // Do not use box format at all, only pure codestream
+  kCSBF_None,
+  // Have a single codestream box, with its actual size given in the box
+  kCSBF_Single,
+  // Have a single codestream box, with box size 0 (final box running to end)
+  kCSBF_Single_Zero_Terminated,
+  // Single codestream box, with another unknown box behind it
+  kCSBF_Single_other,
+  // Have multiple partial codestream boxes
+  kCSBF_Multi,
+  // Have multiple partial codestream boxes, with final box size 0 (running
+  // to end)
+  kCSBF_Multi_Zero_Terminated,
+  // Have multiple partial codestream boxes, terminated by non-codestream box
+  kCSBF_Multi_Other_Terminated,
+  // Have multiple partial codestream boxes, terminated by non-codestream box
+  // that has its size set to 0 (running to end)
+  kCSBF_Multi_Other_Zero_Terminated,
+  // Have multiple partial codestream boxes, and the first one has a content
+  // of zero length
+  kCSBF_Multi_First_Empty,
+  // Not a value but used for counting amount of enum entries
+  kCSBF_NUM_ENTRIES,
+};
+
+// Returns an ICC profile output by the JPEG XL decoder for RGB_D65_SRG_Rel_Lin,
+// but with, on purpose, rXYZ, bXYZ and gXYZ (the RGB primaries) switched to a
+// different order to ensure the profile does not match any known profile, so
+// the encoder cannot encode it in a compact struct instead.
+jxl::PaddedBytes GetIccTestProfile() {
+  const uint8_t* profile = reinterpret_cast<const uint8_t*>(
+      "\0\0\3\200lcms\0040\0\0mntrRGB XYZ "
+      "\a\344\0\a\0\27\0\21\0$"
+      "\0\37acspAPPL\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0\0\366"
+      "\326\0\1\0\0\0\0\323-lcms\372c\207\36\227\200{"
+      "\2\232s\255\327\340\0\n\26\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+      "\0\0\0\0\0\0\0\0\rdesc\0\0\1 "
+      "\0\0\0Bcprt\0\0\1d\0\0\1\0wtpt\0\0\2d\0\0\0\24chad\0\0\2x\0\0\0,"
+      "bXYZ\0\0\2\244\0\0\0\24gXYZ\0\0\2\270\0\0\0\24rXYZ\0\0\2\314\0\0\0\24rTR"
+      "C\0\0\2\340\0\0\0 gTRC\0\0\2\340\0\0\0 bTRC\0\0\2\340\0\0\0 "
+      "chrm\0\0\3\0\0\0\0$dmnd\0\0\3$\0\0\0("
+      "dmdd\0\0\3L\0\0\0002mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0&"
+      "\0\0\0\34\0R\0G\0B\0_\0D\0006\0005\0_\0S\0R\0G\0_\0R\0e\0l\0_"
+      "\0L\0i\0n\0\0mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\344\0\0\0\34\0C\0o\0"
+      "p\0y\0r\0i\0g\0h\0t\0 \0002\0000\0001\08\0 \0G\0o\0o\0g\0l\0e\0 "
+      "\0L\0L\0C\0,\0 \0C\0C\0-\0B\0Y\0-\0S\0A\0 \0003\0.\0000\0 "
+      "\0U\0n\0p\0o\0r\0t\0e\0d\0 "
+      "\0l\0i\0c\0e\0n\0s\0e\0(\0h\0t\0t\0p\0s\0:\0/\0/"
+      "\0c\0r\0e\0a\0t\0i\0v\0e\0c\0o\0m\0m\0o\0n\0s\0.\0o\0r\0g\0/"
+      "\0l\0i\0c\0e\0n\0s\0e\0s\0/\0b\0y\0-\0s\0a\0/\0003\0.\0000\0/"
+      "\0l\0e\0g\0a\0l\0c\0o\0d\0e\0)XYZ "
+      "\0\0\0\0\0\0\366\326\0\1\0\0\0\0\323-"
+      "sf32\0\0\0\0\0\1\fB\0\0\5\336\377\377\363%"
+      "\0\0\a\223\0\0\375\220\377\377\373\241\377\377\375\242\0\0\3\334\0\0\300"
+      "nXYZ \0\0\0\0\0\0o\240\0\08\365\0\0\3\220XYZ "
+      "\0\0\0\0\0\0$\237\0\0\17\204\0\0\266\304XYZ "
+      "\0\0\0\0\0\0b\227\0\0\267\207\0\0\30\331para\0\0\0\0\0\3\0\0\0\1\0\0\0\1"
+      "\0\0\0\0\0\0\0\1\0\0\0\0\0\0chrm\0\0\0\0\0\3\0\0\0\0\243\327\0\0T|"
+      "\0\0L\315\0\0\231\232\0\0&"
+      "g\0\0\17\\mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\f\0\0\0\34\0G\0o\0o\0g"
+      "\0l\0emluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\26\0\0\0\34\0I\0m\0a\0g\0e"
+      "\0 \0c\0o\0d\0e\0c\0\0");
+  size_t profile_size = 896;
+  jxl::PaddedBytes icc_profile;
+  icc_profile.assign(profile, profile + profile_size);
+  return icc_profile;
+}
+
+}  // namespace
+
+namespace jxl {
+namespace {
+
+// Input pixels always given as 16-bit RGBA, 8 bytes per pixel.
+// include_alpha determines if the encoded image should contain the alpha
+// channel.
+// add_icc_profile: if false, encodes the image as sRGB using the JXL fields,
+// for grayscale or RGB images. If true, encodes the image using the ICC profile
+// returned by GetIccTestProfile, without the JXL fields, this requires the
+// image is RGB, not grayscale.
+// Providing jpeg_codestream will populate the jpeg_codestream with compressed
+// JPEG bytes, and make it possible to reconstruct those exact JPEG bytes using
+// the return value _if_ add_container indicates a box format.
+PaddedBytes CreateTestJXLCodestream(
+    Span<const uint8_t> pixels, size_t xsize, size_t ysize, size_t num_channels,
+    const CompressParams& cparams, CodeStreamBoxFormat add_container,
+    JxlOrientation orientation, bool add_preview, bool add_icc_profile = false,
+    PaddedBytes* jpeg_codestream = nullptr) {
+  // Compress the pixels with JPEG XL.
+  bool grayscale = (num_channels <= 2);
+  bool include_alpha = !(num_channels & 1) && jpeg_codestream == nullptr;
+  size_t bitdepth = jpeg_codestream == nullptr ? 16 : 8;
+  CodecInOut io;
+  io.SetSize(xsize, ysize);
+  ColorEncoding color_encoding =
+      jxl::ColorEncoding::SRGB(/*is_gray=*/grayscale);
+  if (add_icc_profile) {
+    // the hardcoded ICC profile we attach requires RGB.
+    EXPECT_EQ(false, grayscale);
+    EXPECT_TRUE(color_encoding.SetICC(GetIccTestProfile()));
+  }
+  ThreadPool pool(nullptr, nullptr);
+  io.metadata.m.SetUintSamples(bitdepth);
+  if (include_alpha) {
+    io.metadata.m.SetAlphaBits(bitdepth);
+  }
+  // Make the grayscale-ness of the io metadata color_encoding and the packed
+  // image match.
+  io.metadata.m.color_encoding = color_encoding;
+  EXPECT_TRUE(ConvertFromExternal(
+      pixels, xsize, ysize, color_encoding, /*has_alpha=*/include_alpha,
+      /*alpha_is_premultiplied=*/false, bitdepth, JXL_BIG_ENDIAN,
+      /*flipped_y=*/false, &pool, &io.Main()));
+  jxl::PaddedBytes jpeg_data;
+  if (jpeg_codestream != nullptr) {
+#if JPEGXL_ENABLE_JPEG
+    jxl::PaddedBytes jpeg_bytes;
+    EXPECT_TRUE(EncodeImageJPG(&io, jxl::JpegEncoder::kLibJpeg, /*quality=*/70,
+                               jxl::YCbCrChromaSubsampling(), &pool,
+                               &jpeg_bytes, jxl::DecodeTarget::kPixels));
+    jpeg_codestream->append(jpeg_bytes.data(),
+                            jpeg_bytes.data() + jpeg_bytes.size());
+    EXPECT_TRUE(jxl::jpeg::DecodeImageJPG(
+        jxl::Span<const uint8_t>(jpeg_bytes.data(), jpeg_bytes.size()), &io));
+    EXPECT_TRUE(EncodeJPEGData(*io.Main().jpeg_data, &jpeg_data));
+    io.metadata.m.xyb_encoded = false;
+#else   // JPEGXL_ENABLE_JPEG
+    JXL_ABORT(
+        "unable to create reconstructible JPEG without JPEG support enabled");
+#endif  // JPEGXL_ENABLE_JPEG
+  }
+  if (add_preview) {
+    io.preview_frame = io.Main().Copy();
+    io.preview_frame.ShrinkTo(xsize / 7, ysize / 7);
+    io.metadata.m.have_preview = true;
+    EXPECT_TRUE(io.metadata.m.preview_size.Set(io.preview_frame.xsize(),
+                                               io.preview_frame.ysize()));
+  }
+  io.metadata.m.orientation = orientation;
+  AuxOut aux_out;
+  PaddedBytes compressed;
+  PassesEncoderState enc_state;
+  EXPECT_TRUE(
+      EncodeFile(cparams, &io, &enc_state, &compressed, &aux_out, &pool));
+  if (add_container != kCSBF_None) {
+    // Header with signature box and ftyp box.
+    const uint8_t header[] = {0,    0,    0,    0xc,  0x4a, 0x58, 0x4c, 0x20,
+                              0xd,  0xa,  0x87, 0xa,  0,    0,    0,    0x14,
+                              0x66, 0x74, 0x79, 0x70, 0x6a, 0x78, 0x6c, 0x20,
+                              0,    0,    0,    0,    0x6a, 0x78, 0x6c, 0x20};
+    // Unknown box, could be a box added by user, decoder must be able to skip
+    // over it. Type is set to 'unkn', size to 24, contents to 16 0's.
+    const uint8_t unknown[] = {0, 0, 0, 0x18, 0x75, 0x6e, 0x6b, 0x6e,
+                               0, 0, 0, 0,    0,    0,    0,    0,
+                               0, 0, 0, 0,    0,    0,    0,    0};
+    // same as the unknown box, but with size set to 0, this can only be a final
+    // box
+    const uint8_t unknown_end[] = {0, 0, 0, 0, 0x75, 0x6e, 0x6b, 0x6e,
+                                   0, 0, 0, 0, 0,    0,    0,    0,
+                                   0, 0, 0, 0, 0,    0,    0,    0};
+
+    bool is_multi = add_container == kCSBF_Multi ||
+                    add_container == kCSBF_Multi_Zero_Terminated ||
+                    add_container == kCSBF_Multi_Other_Terminated ||
+                    add_container == kCSBF_Multi_Other_Zero_Terminated ||
+                    add_container == kCSBF_Multi_First_Empty;
+
+    if (is_multi) {
+      size_t third = compressed.size() / 3;
+      std::vector<uint8_t> compressed0(compressed.data(),
+                                       compressed.data() + third);
+      std::vector<uint8_t> compressed1(compressed.data() + third,
+                                       compressed.data() + 2 * third);
+      std::vector<uint8_t> compressed2(compressed.data() + 2 * third,
+                                       compressed.data() + compressed.size());
+
+      PaddedBytes c;
+      c.append(header, header + sizeof(header));
+      if (jpeg_codestream != nullptr) {
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+                             &c);
+        c.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+      }
+      uint32_t jxlp_index = 0;
+      if (add_container == kCSBF_Multi_First_Empty) {
+        // Dummy (empty) codestream part
+        AppendU32BE(12, &c);
+        c.push_back('j');
+        c.push_back('x');
+        c.push_back('l');
+        c.push_back('p');
+        AppendU32BE(jxlp_index++, &c);
+      }
+      // First codestream part
+      AppendU32BE(compressed0.size() + 12, &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('p');
+      AppendU32BE(jxlp_index++, &c);
+      c.append(compressed0.data(), compressed0.data() + compressed0.size());
+      // A few non-codestream boxes in between
+      c.append(unknown, unknown + sizeof(unknown));
+      c.append(unknown, unknown + sizeof(unknown));
+      // Dummy (empty) codestream part
+      AppendU32BE(12, &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('p');
+      AppendU32BE(jxlp_index++, &c);
+      // Second codestream part
+      AppendU32BE(compressed1.size() + 12, &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('p');
+      AppendU32BE(jxlp_index++, &c);
+      c.append(compressed1.data(), compressed1.data() + compressed1.size());
+      // Third codestream part
+      AppendU32BE(add_container == kCSBF_Multi ? (compressed2.size() + 12) : 0,
+                  &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('p');
+      AppendU32BE(jxlp_index++ | 0x80000000, &c);
+      c.append(compressed2.data(), compressed2.data() + compressed2.size());
+      if (add_container == kCSBF_Multi_Other_Terminated) {
+        c.append(unknown, unknown + sizeof(unknown));
+      }
+      if (add_container == kCSBF_Multi_Other_Zero_Terminated) {
+        c.append(unknown_end, unknown_end + sizeof(unknown_end));
+      }
+      compressed.swap(c);
+    } else {
+      PaddedBytes c;
+      c.append(header, header + sizeof(header));
+      if (jpeg_codestream != nullptr) {
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+                             &c);
+        c.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+      }
+      AppendU32BE(add_container == kCSBF_Single_Zero_Terminated
+                      ? 0
+                      : (compressed.size() + 8),
+                  &c);
+      c.push_back('j');
+      c.push_back('x');
+      c.push_back('l');
+      c.push_back('c');
+      c.append(compressed.data(), compressed.data() + compressed.size());
+      if (add_container == kCSBF_Single_other) {
+        c.append(unknown, unknown + sizeof(unknown));
+      }
+      compressed.swap(c);
+    }
+  }
+
+  return compressed;
+}
+
+// Decodes one-shot with the API for non-streaming decoding tests.
+std::vector<uint8_t> DecodeWithAPI(JxlDecoder* dec,
+                                   Span<const uint8_t> compressed,
+                                   const JxlPixelFormat& format,
+                                   bool use_callback, bool set_buffer_early,
+                                   bool use_resizable_runner) {
+  JxlThreadParallelRunnerPtr runner_fixed;
+  JxlResizableParallelRunnerPtr runner_resizable;
+  JxlParallelRunner runner_fn;
+  void* runner;
+
+  if (use_resizable_runner) {
+    runner_resizable = JxlResizableParallelRunnerMake(nullptr);
+    runner = runner_resizable.get();
+    runner_fn = JxlResizableParallelRunner;
+  } else {
+    runner_fixed = JxlThreadParallelRunnerMake(
+        nullptr, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+    runner = runner_fixed.get();
+    runner_fn = JxlThreadParallelRunner;
+  }
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, runner_fn, runner));
+
+  EXPECT_EQ(
+      JXL_DEC_SUCCESS,
+      JxlDecoderSubscribeEvents(
+          dec, JXL_DEC_BASIC_INFO | (set_buffer_early ? JXL_DEC_FRAME : 0) |
+                   JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FULL_IMAGE));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  if (use_resizable_runner) {
+    JxlResizableParallelRunnerSetThreads(
+        runner,
+        JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+  }
+
+  std::vector<uint8_t> pixels(buffer_size);
+  size_t bytes_per_pixel =
+      format.num_channels * GetDataBits(format.data_type) / jxl::kBitsPerByte;
+  size_t stride = bytes_per_pixel * info.xsize;
+  if (format.align > 1) {
+    stride = jxl::DivCeil(stride, format.align) * format.align;
+  }
+  auto callback = [&](size_t x, size_t y, size_t num_pixels,
+                      const void* pixels_row) {
+    memcpy(pixels.data() + stride * y + bytes_per_pixel * x, pixels_row,
+           num_pixels * bytes_per_pixel);
+  };
+
+  JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+
+  std::vector<uint8_t> preview;
+  if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) {
+    size_t buffer_size;
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+    preview.resize(buffer_size);
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetPreviewOutBuffer(dec, &format, preview.data(),
+                                            preview.size()));
+    EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec));
+
+    status = JxlDecoderProcessInput(dec);
+  }
+
+  if (set_buffer_early) {
+    EXPECT_EQ(JXL_DEC_FRAME, status);
+  } else {
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, status);
+  }
+
+  if (use_callback) {
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSetImageOutCallback(
+                  dec, &format,
+                  [](void* opaque, size_t x, size_t y, size_t xsize,
+                     const void* pixels_row) {
+                    auto cb = static_cast<decltype(&callback)>(opaque);
+                    (*cb)(x, y, xsize, pixels_row);
+                  },
+                  /*opaque=*/&callback));
+  } else {
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+  }
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+  // After the full image is gotten, JxlDecoderProcessInput should return
+  // success to indicate all is done.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  return pixels;
+}
+
+// Decodes one-shot with the API for non-streaming decoding tests.
+std::vector<uint8_t> DecodeWithAPI(Span<const uint8_t> compressed,
+                                   const JxlPixelFormat& format,
+                                   bool use_callback, bool set_buffer_early,
+                                   bool use_resizable_runner) {
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  std::vector<uint8_t> pixels =
+      DecodeWithAPI(dec, compressed, format, use_callback, set_buffer_early,
+                    use_resizable_runner);
+  JxlDecoderDestroy(dec);
+  return pixels;
+}
+
+}  // namespace
+}  // namespace jxl
+
+namespace {
+
+// Procedure to convert pixels to double precision, not efficient, but
+// well-controlled for testing. It uses double, to be able to represent all
+// precisions needed for the maximum data types the API supports: uint32_t
+// integers, and, single precision float. The values are in range 0-1 for SDR.
+std::vector<double> ConvertToRGBA32(const uint8_t* pixels, size_t xsize,
+                                    size_t ysize,
+                                    const JxlPixelFormat& format) {
+  std::vector<double> result(xsize * ysize * 4);
+  size_t num_channels = format.num_channels;
+  bool gray = num_channels == 1 || num_channels == 2;
+  bool alpha = num_channels == 2 || num_channels == 4;
+
+  size_t stride =
+      xsize * jxl::DivCeil(GetDataBits(format.data_type) * num_channels,
+                           jxl::kBitsPerByte);
+  if (format.align > 1) stride = jxl::RoundUpTo(stride, format.align);
+
+  if (format.data_type == JXL_TYPE_BOOLEAN) {
+    for (size_t y = 0; y < ysize; ++y) {
+      jxl::BitReader br(jxl::Span<const uint8_t>(pixels + stride * y, stride));
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        double r = br.ReadBits(1);
+        double g = gray ? r : br.ReadBits(1);
+        double b = gray ? r : br.ReadBits(1);
+        double a = alpha ? br.ReadBits(1) : 1;
+        result[j + 0] = r;
+        result[j + 1] = g;
+        result[j + 2] = b;
+        result[j + 3] = a;
+      }
+      JXL_CHECK(br.Close());
+    }
+  } else if (format.data_type == JXL_TYPE_UINT8) {
+    double mul = 1.0 / 255.0;  // Multiplier to bring to 0-1.0 range
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        size_t i = y * stride + x * num_channels;
+        double r = pixels[i];
+        double g = gray ? r : pixels[i + 1];
+        double b = gray ? r : pixels[i + 2];
+        double a = alpha ? pixels[i + num_channels - 1] : 255;
+        result[j + 0] = r * mul;
+        result[j + 1] = g * mul;
+        result[j + 2] = b * mul;
+        result[j + 3] = a * mul;
+      }
+    }
+  } else if (format.data_type == JXL_TYPE_UINT16) {
+    double mul = 1.0 / 65535.0;  // Multiplier to bring to 0-1.0 range
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        size_t i = y * stride + x * num_channels * 2;
+        double r, g, b, a;
+        if (format.endianness == JXL_BIG_ENDIAN) {
+          r = (pixels[i + 0] << 8) + pixels[i + 1];
+          g = gray ? r : (pixels[i + 2] << 8) + pixels[i + 3];
+          b = gray ? r : (pixels[i + 4] << 8) + pixels[i + 5];
+          a = alpha ? (pixels[i + num_channels * 2 - 2] << 8) +
+                          pixels[i + num_channels * 2 - 1]
+                    : 65535;
+        } else {
+          r = (pixels[i + 1] << 8) + pixels[i + 0];
+          g = gray ? r : (pixels[i + 3] << 8) + pixels[i + 2];
+          b = gray ? r : (pixels[i + 5] << 8) + pixels[i + 4];
+          a = alpha ? (pixels[i + num_channels * 2 - 1] << 8) +
+                          pixels[i + num_channels * 2 - 2]
+                    : 65535;
+        }
+        result[j + 0] = r * mul;
+        result[j + 1] = g * mul;
+        result[j + 2] = b * mul;
+        result[j + 3] = a * mul;
+      }
+    }
+  } else if (format.data_type == JXL_TYPE_UINT32) {
+    double mul = 1.0 / 4294967295.0;  // Multiplier to bring to 0-1.0 range
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        size_t i = y * stride + x * num_channels * 4;
+        double r, g, b, a;
+        if (format.endianness == JXL_BIG_ENDIAN) {
+          r = LoadBE32(pixels + i);
+          g = gray ? r : LoadBE32(pixels + i + 4);
+          b = gray ? r : LoadBE32(pixels + i + 8);
+          a = alpha ? LoadBE32(pixels + i + num_channels * 2 - 4) : 4294967295;
+
+        } else {
+          r = LoadLE32(pixels + i);
+          g = gray ? r : LoadLE32(pixels + i + 4);
+          b = gray ? r : LoadLE32(pixels + i + 8);
+          a = alpha ? LoadLE32(pixels + i + num_channels * 2 - 4) : 4294967295;
+        }
+        result[j + 0] = r * mul;
+        result[j + 1] = g * mul;
+        result[j + 2] = b * mul;
+        result[j + 3] = a * mul;
+      }
+    }
+  } else if (format.data_type == JXL_TYPE_FLOAT) {
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        size_t i = y * stride + x * num_channels * 4;
+        double r, g, b, a;
+        if (format.endianness == JXL_BIG_ENDIAN) {
+          r = LoadBEFloat(pixels + i);
+          g = gray ? r : LoadBEFloat(pixels + i + 4);
+          b = gray ? r : LoadBEFloat(pixels + i + 8);
+          a = alpha ? LoadBEFloat(pixels + i + num_channels * 4 - 4) : 1.0;
+        } else {
+          r = LoadLEFloat(pixels + i);
+          g = gray ? r : LoadLEFloat(pixels + i + 4);
+          b = gray ? r : LoadLEFloat(pixels + i + 8);
+          a = alpha ? LoadLEFloat(pixels + i + num_channels * 4 - 4) : 1.0;
+        }
+        result[j + 0] = r;
+        result[j + 1] = g;
+        result[j + 2] = b;
+        result[j + 3] = a;
+      }
+    }
+  } else if (format.data_type == JXL_TYPE_FLOAT16) {
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        size_t j = (y * xsize + x) * 4;
+        size_t i = y * stride + x * num_channels * 2;
+        double r, g, b, a;
+        if (format.endianness == JXL_BIG_ENDIAN) {
+          r = LoadBEFloat16(pixels + i);
+          g = gray ? r : LoadBEFloat16(pixels + i + 2);
+          b = gray ? r : LoadBEFloat16(pixels + i + 4);
+          a = alpha ? LoadBEFloat16(pixels + i + num_channels * 2 - 2) : 1.0;
+        } else {
+          r = LoadLEFloat16(pixels + i);
+          g = gray ? r : LoadLEFloat16(pixels + i + 2);
+          b = gray ? r : LoadLEFloat16(pixels + i + 4);
+          a = alpha ? LoadLEFloat16(pixels + i + num_channels * 2 - 2) : 1.0;
+        }
+        result[j + 0] = r;
+        result[j + 1] = g;
+        result[j + 2] = b;
+        result[j + 3] = a;
+      }
+    }
+  } else {
+    JXL_ASSERT(false);  // Unsupported type
+  }
+  return result;
+}
+
+// Returns amount of pixels which differ between the two pictures. Image b is
+// the image after roundtrip after roundtrip, image a before roundtrip. There
+// are more strict requirements for the alpha channel and grayscale values of
+// the output image.
+size_t ComparePixels(const uint8_t* a, const uint8_t* b, size_t xsize,
+                     size_t ysize, const JxlPixelFormat& format_a,
+                     const JxlPixelFormat& format_b) {
+  // Convert both images to equal full precision for comparison.
+  std::vector<double> a_full = ConvertToRGBA32(a, xsize, ysize, format_a);
+  std::vector<double> b_full = ConvertToRGBA32(b, xsize, ysize, format_b);
+  bool gray_a = format_a.num_channels < 3;
+  bool gray_b = format_b.num_channels < 3;
+  bool alpha_a = !(format_a.num_channels & 1);
+  bool alpha_b = !(format_b.num_channels & 1);
+  size_t bits_a = GetPrecision(format_a.data_type);
+  size_t bits_b = GetPrecision(format_b.data_type);
+  size_t bits = std::min(bits_a, bits_b);
+  // How much distance is allowed in case of pixels with lower bit depths, given
+  // that the double precision float images use range 0-1.0.
+  // E.g. in case of 1-bit this is 0.5 since 0.499 must map to 0 and 0.501 must
+  // map to 1.
+  double precision = 0.5 / ((1ull << bits) - 1ull);
+  if (format_a.data_type == JXL_TYPE_FLOAT16 ||
+      format_b.data_type == JXL_TYPE_FLOAT16) {
+    // Lower the precision for float16, because it currently looks like the
+    // scalar and wasm implementations of hwy have 1 less bit of precision
+    // than the x86 implementations.
+    // TODO(lode): Set the required precision back to 11 bits when possible.
+    precision = 0.5 / ((1ull << (bits - 1)) - 1ull);
+  }
+  size_t numdiff = 0;
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      size_t i = (y * xsize + x) * 4;
+      bool ok = true;
+      if (gray_a || gray_b) {
+        if (!Near(a_full[i + 0], b_full[i + 0], precision)) ok = false;
+        // If the input was grayscale and the output not, then the output must
+        // have all channels equal.
+        if (gray_a && b_full[i + 0] != b_full[i + 1] &&
+            b_full[i + 2] != b_full[i + 2]) {
+          ok = false;
+        }
+      } else {
+        if (!Near(a_full[i + 0], b_full[i + 0], precision) ||
+            !Near(a_full[i + 1], b_full[i + 1], precision) ||
+            !Near(a_full[i + 2], b_full[i + 2], precision)) {
+          ok = false;
+        }
+      }
+      if (alpha_a && alpha_b) {
+        if (!Near(a_full[i + 3], b_full[i + 3], precision)) ok = false;
+      } else {
+        // If the input had no alpha channel, the output should be opaque
+        // after roundtrip.
+        if (alpha_b && !Near(1.0, b_full[i + 3], precision)) ok = false;
+      }
+      if (!ok) numdiff++;
+    }
+  }
+  return numdiff;
+}
+
+}  // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(DecodeTest, JxlSignatureCheckTest) {
+  std::vector<std::pair<int, std::vector<uint8_t>>> tests = {
+      // No JPEGXL header starts with 'a'.
+      {JXL_SIG_INVALID, {'a'}},
+      {JXL_SIG_INVALID, {'a', 'b', 'c', 'd', 'e', 'f'}},
+
+      // Empty file is not enough bytes.
+      {JXL_SIG_NOT_ENOUGH_BYTES, {}},
+
+      // JPEGXL headers.
+      {JXL_SIG_NOT_ENOUGH_BYTES, {0xff}},  // Part of a signature.
+      {JXL_SIG_INVALID, {0xff, 0xD8}},     // JPEG-1
+      {JXL_SIG_CODESTREAM, {0xff, 0x0a}},
+
+      // JPEGXL container file.
+      {JXL_SIG_CONTAINER,
+       {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87, 0xA}},
+      // Ending with invalid byte.
+      {JXL_SIG_INVALID, {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87, 0}},
+      // Part of signature.
+      {JXL_SIG_NOT_ENOUGH_BYTES,
+       {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87}},
+      {JXL_SIG_NOT_ENOUGH_BYTES, {0}},
+  };
+  for (const auto& test : tests) {
+    EXPECT_EQ(test.first,
+              JxlSignatureCheck(test.second.data(), test.second.size()))
+        << "Where test data is " << ::testing::PrintToString(test.second);
+  }
+}
+
+TEST(DecodeTest, DefaultAllocTest) {
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, CustomAllocTest) {
+  struct CalledCounters {
+    int allocs = 0;
+    int frees = 0;
+  } counters;
+
+  JxlMemoryManager mm;
+  mm.opaque = &counters;
+  mm.alloc = [](void* opaque, size_t size) {
+    reinterpret_cast<CalledCounters*>(opaque)->allocs++;
+    return malloc(size);
+  };
+  mm.free = [](void* opaque, void* address) {
+    reinterpret_cast<CalledCounters*>(opaque)->frees++;
+    free(address);
+  };
+
+  JxlDecoder* dec = JxlDecoderCreate(&mm);
+  EXPECT_NE(nullptr, dec);
+  EXPECT_LE(1, counters.allocs);
+  EXPECT_EQ(0, counters.frees);
+  JxlDecoderDestroy(dec);
+  EXPECT_LE(1, counters.frees);
+}
+
+// TODO(lode): add multi-threaded test when multithreaded pixel decoding from
+// API is implemented.
+TEST(DecodeTest, DefaultParallelRunnerTest) {
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, nullptr, nullptr));
+  JxlDecoderDestroy(dec);
+}
+
+// Creates the header of a JPEG XL file with various custom parameters for
+// testing.
+// xsize, ysize: image dimensions to store in the SizeHeader, max 512.
+// bits_per_sample, orientation: a selection of header parameters to test with.
+// orientation: image orientation to set in the metadata
+// alpha_bits: if non-0, alpha extra channel bits to set in the metadata. Also
+//   gives the alpha channel the name "alpha_test"
+// have_container: add box container format around the codestream.
+// metadata_default: if true, ImageMetadata is set to default and
+//   bits_per_sample, orientation and alpha_bits are ignored.
+// insert_box: insert an extra box before the codestream box, making the header
+// farther away from the front than is ideal. Only used if have_container.
+std::vector<uint8_t> GetTestHeader(size_t xsize, size_t ysize,
+                                   size_t bits_per_sample, size_t orientation,
+                                   size_t alpha_bits, bool xyb_encoded,
+                                   bool have_container, bool metadata_default,
+                                   bool insert_extra_box,
+                                   const jxl::PaddedBytes& icc_profile) {
+  jxl::BitWriter writer;
+  jxl::BitWriter::Allotment allotment(&writer, 65536);  // Large enough
+
+  if (have_container) {
+    const std::vector<uint8_t> signature_box = {0,   0,   0,   0xc, 'J',  'X',
+                                                'L', ' ', 0xd, 0xa, 0x87, 0xa};
+    const std::vector<uint8_t> filetype_box = {
+        0,   0,   0, 0x14, 'f', 't', 'y', 'p', 'j', 'x',
+        'l', ' ', 0, 0,    0,   0,   'j', 'x', 'l', ' '};
+    const std::vector<uint8_t> extra_box_header = {0,   0,   0,   0xff,
+                                                   't', 'e', 's', 't'};
+    // Beginning of codestream box, with an arbitrary size certainly large
+    // enough to contain the header
+    const std::vector<uint8_t> codestream_box_header = {0,   0,   0,   0xff,
+                                                        'j', 'x', 'l', 'c'};
+
+    for (size_t i = 0; i < signature_box.size(); i++) {
+      writer.Write(8, signature_box[i]);
+    }
+    for (size_t i = 0; i < filetype_box.size(); i++) {
+      writer.Write(8, filetype_box[i]);
+    }
+    if (insert_extra_box) {
+      for (size_t i = 0; i < extra_box_header.size(); i++) {
+        writer.Write(8, extra_box_header[i]);
+      }
+      for (size_t i = 0; i < 255 - 8; i++) {
+        writer.Write(8, 0);
+      }
+    }
+    for (size_t i = 0; i < codestream_box_header.size(); i++) {
+      writer.Write(8, codestream_box_header[i]);
+    }
+  }
+
+  // JXL signature
+  writer.Write(8, 0xff);
+  writer.Write(8, 0x0a);
+
+  // SizeHeader
+  jxl::CodecMetadata metadata;
+  EXPECT_TRUE(metadata.size.Set(xsize, ysize));
+  EXPECT_TRUE(WriteSizeHeader(metadata.size, &writer, 0, nullptr));
+
+  if (!metadata_default) {
+    metadata.m.SetUintSamples(bits_per_sample);
+    metadata.m.orientation = orientation;
+    metadata.m.SetAlphaBits(alpha_bits);
+    metadata.m.xyb_encoded = xyb_encoded;
+    if (alpha_bits != 0) {
+      metadata.m.extra_channel_info[0].name = "alpha_test";
+    }
+  }
+
+  if (!icc_profile.empty()) {
+    jxl::PaddedBytes copy = icc_profile;
+    EXPECT_TRUE(metadata.m.color_encoding.SetICC(std::move(copy)));
+  }
+
+  EXPECT_TRUE(jxl::Bundle::Write(metadata.m, &writer, 0, nullptr));
+  metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded;
+  EXPECT_TRUE(jxl::Bundle::Write(metadata.transform_data, &writer, 0, nullptr));
+
+  if (!icc_profile.empty()) {
+    EXPECT_TRUE(metadata.m.color_encoding.WantICC());
+    EXPECT_TRUE(jxl::WriteICC(icc_profile, &writer, 0, nullptr));
+  }
+
+  writer.ZeroPadToByte();
+  ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+  return std::vector<uint8_t>(
+      writer.GetSpan().data(),
+      writer.GetSpan().data() + writer.GetSpan().size());
+}
+
+TEST(DecodeTest, BasicInfoTest) {
+  size_t xsize[2] = {50, 33};
+  size_t ysize[2] = {50, 77};
+  size_t bits_per_sample[2] = {8, 23};
+  size_t orientation[2] = {3, 5};
+  size_t alpha_bits[2] = {0, 8};
+  size_t have_container[2] = {0, 1};
+  bool xyb_encoded = false;
+
+  std::vector<std::vector<uint8_t>> test_samples;
+  // Test with direct codestream
+  test_samples.push_back(GetTestHeader(
+      xsize[0], ysize[0], bits_per_sample[0], orientation[0], alpha_bits[0],
+      xyb_encoded, have_container[0], /*metadata_default=*/false,
+      /*insert_extra_box=*/false, {}));
+  // Test with container and different parameters
+  test_samples.push_back(GetTestHeader(
+      xsize[1], ysize[1], bits_per_sample[1], orientation[1], alpha_bits[1],
+      xyb_encoded, have_container[1], /*metadata_default=*/false,
+      /*insert_extra_box=*/false, {}));
+
+  for (size_t i = 0; i < test_samples.size(); ++i) {
+    const std::vector<uint8_t>& data = test_samples[i];
+    // Test decoding too small header first, until we reach the final byte.
+    for (size_t size = 0; size <= data.size(); ++size) {
+      // Test with a new decoder for each tested byte size.
+      JxlDecoder* dec = JxlDecoderCreate(nullptr);
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+      const uint8_t* next_in = data.data();
+      size_t avail_in = size;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+      JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+
+      JxlBasicInfo info;
+      bool have_basic_info = !JxlDecoderGetBasicInfo(dec, &info);
+
+      if (size == data.size()) {
+        EXPECT_EQ(JXL_DEC_BASIC_INFO, status);
+
+        // All header bytes given so the decoder must have the basic info.
+        EXPECT_EQ(true, have_basic_info);
+        EXPECT_EQ(have_container[i], info.have_container);
+        EXPECT_EQ(alpha_bits[i], info.alpha_bits);
+        // Orientations 5..8 swap the dimensions
+        if (orientation[i] >= 5) {
+          EXPECT_EQ(xsize[i], info.ysize);
+          EXPECT_EQ(ysize[i], info.xsize);
+        } else {
+          EXPECT_EQ(xsize[i], info.xsize);
+          EXPECT_EQ(ysize[i], info.ysize);
+        }
+        // The API should set the orientation to identity by default since it
+        // already applies the transformation internally by default.
+        EXPECT_EQ(1, info.orientation);
+
+        EXPECT_EQ(3, info.num_color_channels);
+
+        if (alpha_bits[i] != 0) {
+          // Expect an extra channel
+          EXPECT_EQ(1, info.num_extra_channels);
+          JxlExtraChannelInfo extra;
+          EXPECT_EQ(0, JxlDecoderGetExtraChannelInfo(dec, 0, &extra));
+          EXPECT_EQ(alpha_bits[i], extra.bits_per_sample);
+          EXPECT_EQ(JXL_CHANNEL_ALPHA, extra.type);
+          EXPECT_EQ(0, extra.alpha_associated);
+          // Verify the name "alpha_test" given to the alpha channel
+          EXPECT_EQ(10, extra.name_length);
+          char name[11];
+          EXPECT_EQ(0,
+                    JxlDecoderGetExtraChannelName(dec, 0, name, sizeof(name)));
+          EXPECT_EQ(std::string("alpha_test"), std::string(name));
+        } else {
+          EXPECT_EQ(0, info.num_extra_channels);
+        }
+
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+      } else {
+        // If we did not give the full header, the basic info should not be
+        // available. Allow a few bytes of slack due to some bits for default
+        // opsinmatrix/extension bits.
+        if (size + 2 < data.size()) {
+          EXPECT_EQ(false, have_basic_info);
+          EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, status);
+        }
+      }
+
+      // Test that decoder doesn't allow setting a setting required at beginning
+      // unless it's reset
+      EXPECT_EQ(JXL_DEC_ERROR,
+                JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+      JxlDecoderReset(dec);
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+
+      JxlDecoderDestroy(dec);
+    }
+  }
+}
+
+TEST(DecodeTest, BufferSizeTest) {
+  size_t xsize = 33;
+  size_t ysize = 77;
+  size_t bits_per_sample = 8;
+  size_t orientation = 1;
+  size_t alpha_bits = 8;
+  bool have_container = false;
+  bool xyb_encoded = false;
+
+  std::vector<uint8_t> header =
+      GetTestHeader(xsize, ysize, bits_per_sample, orientation, alpha_bits,
+                    xyb_encoded, have_container, /*metadata_default=*/false,
+                    /*insert_extra_box=*/false, {});
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+  const uint8_t* next_in = header.data();
+  size_t avail_in = header.size();
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, status);
+
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+
+  JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+  size_t image_out_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &image_out_size));
+  EXPECT_EQ(xsize * ysize * 4, image_out_size);
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, BasicInfoSizeHintTest) {
+  // Test on a file where the size hint is too small initially due to inserting
+  // a box before the codestream (something that is normally not recommended)
+  size_t xsize = 50;
+  size_t ysize = 50;
+  size_t bits_per_sample = 16;
+  size_t orientation = 1;
+  size_t alpha_bits = 0;
+  bool xyb_encoded = false;
+  std::vector<uint8_t> data = GetTestHeader(
+      xsize, ysize, bits_per_sample, orientation, alpha_bits, xyb_encoded,
+      /*have_container=*/true, /*metadata_default=*/false,
+      /*insert_extra_box=*/true, {});
+
+  JxlDecoderStatus status;
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+
+  size_t hint0 = JxlDecoderSizeHintBasicInfo(dec);
+  // Test that the test works as intended: we construct a file on purpose to
+  // be larger than the first hint by having that extra box.
+  EXPECT_LT(hint0, data.size());
+  const uint8_t* next_in = data.data();
+  // Do as if we have only as many bytes as indicated by the hint available
+  size_t avail_in = std::min(hint0, data.size());
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  status = JxlDecoderProcessInput(dec);
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, status);
+  // Basic info cannot be available yet due to the extra inserted box.
+  EXPECT_EQ(false, !JxlDecoderGetBasicInfo(dec, nullptr));
+
+  size_t num_read = avail_in - JxlDecoderReleaseInput(dec);
+  EXPECT_LT(num_read, data.size());
+
+  size_t hint1 = JxlDecoderSizeHintBasicInfo(dec);
+  // The hint must be larger than the previous hint (taking already processed
+  // bytes into account, the hint is a hint for the next avail_in) since the
+  // decoder now knows there is a box in between.
+  EXPECT_GT(hint1 + num_read, hint0);
+  avail_in = std::min<size_t>(hint1, data.size() - num_read);
+  next_in += num_read;
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  status = JxlDecoderProcessInput(dec);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, status);
+  JxlBasicInfo info;
+  // We should have the basic info now, since we only added one box in-between,
+  // and the decoder should have known its size, its implementation can return
+  // a correct hint.
+  EXPECT_EQ(true, !JxlDecoderGetBasicInfo(dec, &info));
+
+  // Also test if the basic info is correct.
+  EXPECT_EQ(1, info.have_container);
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+  EXPECT_EQ(orientation, info.orientation);
+  EXPECT_EQ(bits_per_sample, info.bits_per_sample);
+
+  JxlDecoderDestroy(dec);
+}
+
+std::vector<uint8_t> GetIccTestHeader(const jxl::PaddedBytes& icc_profile,
+                                      bool xyb_encoded) {
+  size_t xsize = 50;
+  size_t ysize = 50;
+  size_t bits_per_sample = 16;
+  size_t orientation = 1;
+  size_t alpha_bits = 0;
+  return GetTestHeader(xsize, ysize, bits_per_sample, orientation, alpha_bits,
+                       xyb_encoded,
+                       /*have_container=*/false, /*metadata_default=*/false,
+                       /*insert_extra_box=*/false, icc_profile);
+}
+
+// Tests the case where pixels and metadata ICC profile are the same
+TEST(DecodeTest, IccProfileTestOriginal) {
+  jxl::PaddedBytes icc_profile = GetIccTestProfile();
+  bool xyb_encoded = false;
+  std::vector<uint8_t> data = GetIccTestHeader(icc_profile, xyb_encoded);
+  JxlPixelFormat format = {4, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size()));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+
+  // Expect the opposite of xyb_encoded for uses_original_profile
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(JXL_TRUE, info.uses_original_profile);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  // the encoded color profile expected to be not available, since the image
+  // has an ICC profile instead
+  EXPECT_EQ(JXL_DEC_ERROR,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+  size_t dec_profile_size;
+  EXPECT_EQ(
+      JXL_DEC_SUCCESS,
+      JxlDecoderGetICCProfileSize(
+          dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, &dec_profile_size));
+
+  // Check that can get return status with NULL size
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(
+                dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+  // The profiles must be equal. This requires they have equal size, and if
+  // they do, we can get the profile and compare the contents.
+  EXPECT_EQ(icc_profile.size(), dec_profile_size);
+  if (icc_profile.size() == dec_profile_size) {
+    jxl::PaddedBytes icc_profile2(icc_profile.size());
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderGetColorAsICCProfile(
+                  dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                  icc_profile2.data(), icc_profile2.size()));
+    EXPECT_EQ(icc_profile, icc_profile2);
+  }
+
+  // the data is not xyb_encoded, so same result expected for the pixel data
+  // color profile
+  EXPECT_EQ(JXL_DEC_ERROR,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, nullptr));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetICCProfileSize(
+                                 dec, &format, JXL_COLOR_PROFILE_TARGET_DATA,
+                                 &dec_profile_size));
+  EXPECT_EQ(icc_profile.size(), dec_profile_size);
+
+  JxlDecoderDestroy(dec);
+}
+
+// Tests the case where pixels and metadata ICC profile are different
+TEST(DecodeTest, IccProfileTestXybEncoded) {
+  jxl::PaddedBytes icc_profile = GetIccTestProfile();
+  bool xyb_encoded = true;
+  std::vector<uint8_t> data = GetIccTestHeader(icc_profile, xyb_encoded);
+  JxlPixelFormat format = {4, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+  JxlPixelFormat format_int = {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size()));
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+
+  // Expect the opposite of xyb_encoded for uses_original_profile
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(JXL_FALSE, info.uses_original_profile);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  // the encoded color profile expected to be not available, since the image
+  // has an ICC profile instead
+  EXPECT_EQ(JXL_DEC_ERROR,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+  // Check that can get return status with NULL size
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(
+                dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+  size_t dec_profile_size;
+  EXPECT_EQ(
+      JXL_DEC_SUCCESS,
+      JxlDecoderGetICCProfileSize(
+          dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, &dec_profile_size));
+
+  // The profiles must be equal. This requires they have equal size, and if
+  // they do, we can get the profile and compare the contents.
+  EXPECT_EQ(icc_profile.size(), dec_profile_size);
+  if (icc_profile.size() == dec_profile_size) {
+    jxl::PaddedBytes icc_profile2(icc_profile.size());
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderGetColorAsICCProfile(
+                  dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                  icc_profile2.data(), icc_profile2.size()));
+    EXPECT_EQ(icc_profile, icc_profile2);
+  }
+
+  // Data is xyb_encoded, so the data profile is a different profile, encoded
+  // as structured profile.
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, nullptr));
+  JxlColorEncoding pixel_encoding;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+  EXPECT_EQ(JXL_PRIMARIES_SRGB, pixel_encoding.primaries);
+  // The API returns LINEAR by default when the colorspace cannot be represented
+  // by enum values.
+  EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function);
+
+  // Test the same but with integer format.
+  EXPECT_EQ(
+      JXL_DEC_SUCCESS,
+      JxlDecoderGetColorAsEncodedProfile(
+          dec, &format_int, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+  EXPECT_EQ(JXL_PRIMARIES_SRGB, pixel_encoding.primaries);
+  EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function);
+
+  // Test after setting the preferred color profile to non-linear sRGB:
+  // for XYB images with ICC profile, this setting is expected to take effect.
+  jxl::ColorEncoding temp_jxl_srgb = jxl::ColorEncoding::SRGB(false);
+  JxlColorEncoding pixel_encoding_srgb;
+  ConvertInternalToExternalColorEncoding(temp_jxl_srgb, &pixel_encoding_srgb);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetPreferredColorProfile(dec, &pixel_encoding_srgb));
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsEncodedProfile(
+                dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+  EXPECT_EQ(JXL_TRANSFER_FUNCTION_SRGB, pixel_encoding.transfer_function);
+
+  // The decoder can also output this as a generated ICC profile anyway, and
+  // we're certain that it will differ from the above defined profile since
+  // the sRGB data should not have swapped R/G/B primaries.
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetICCProfileSize(
+                                 dec, &format, JXL_COLOR_PROFILE_TARGET_DATA,
+                                 &dec_profile_size));
+  // We don't need to dictate exactly what size the generated ICC profile
+  // must be (since there are many ways to represent the same color space),
+  // but it should not be zero.
+  EXPECT_NE(0, dec_profile_size);
+  if (0 != dec_profile_size) {
+    jxl::PaddedBytes icc_profile2(dec_profile_size);
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+                                   dec, &format, JXL_COLOR_PROFILE_TARGET_DATA,
+                                   icc_profile2.data(), icc_profile2.size()));
+    // expected not equal
+    EXPECT_NE(icc_profile, icc_profile2);
+  }
+
+  JxlDecoderDestroy(dec);
+}
+
+// Test decoding ICC from partial files byte for byte.
+// This test must pass also if JXL_CRASH_ON_ERROR is enabled, that is, the
+// decoding of the ANS histogram and stream of the encoded ICC profile must also
+// handle the case of not enough input bytes with StatusCode::kNotEnoughBytes
+// rather than fatal error status codes.
+TEST(DecodeTest, ICCPartialTest) {
+  jxl::PaddedBytes icc_profile = GetIccTestProfile();
+  std::vector<uint8_t> data = GetIccTestHeader(icc_profile, false);
+  JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+  const uint8_t* next_in = data.data();
+  size_t avail_in = 0;
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING));
+
+  bool seen_basic_info = false;
+  bool seen_color_encoding = false;
+  size_t total_size = 0;
+
+  for (;;) {
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+    JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+    size_t remaining = JxlDecoderReleaseInput(dec);
+    EXPECT_LE(remaining, avail_in);
+    next_in += avail_in - remaining;
+    avail_in = remaining;
+    if (status == JXL_DEC_NEED_MORE_INPUT) {
+      if (total_size >= data.size()) {
+        // End of partial codestream with codestrema headers and ICC profile
+        // reached, it should not require more input since full image is not
+        // requested
+        FAIL();
+        break;
+      }
+      size_t increment = 1;
+      if (total_size + increment > data.size()) {
+        increment = data.size() - total_size;
+      }
+      total_size += increment;
+      avail_in += increment;
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      EXPECT_FALSE(seen_basic_info);
+      seen_basic_info = true;
+    } else if (status == JXL_DEC_COLOR_ENCODING) {
+      EXPECT_TRUE(seen_basic_info);
+      EXPECT_FALSE(seen_color_encoding);
+      seen_color_encoding = true;
+
+      // Sanity check that the ICC profile was decoded correctly
+      size_t dec_profile_size;
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderGetICCProfileSize(dec, &format,
+                                            JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                            &dec_profile_size));
+      EXPECT_EQ(icc_profile.size(), dec_profile_size);
+
+    } else if (status == JXL_DEC_SUCCESS) {
+      EXPECT_TRUE(seen_color_encoding);
+      break;
+    } else {
+      // We do not expect any other events or errors
+      FAIL();
+      break;
+    }
+  }
+
+  EXPECT_TRUE(seen_basic_info);
+  EXPECT_TRUE(seen_color_encoding);
+
+  JxlDecoderDestroy(dec);
+}
+
+struct PixelTestConfig {
+  // Input image definition.
+  bool grayscale;
+  bool include_alpha;
+  size_t xsize;
+  size_t ysize;
+  bool add_preview;
+  // Output format.
+  JxlEndianness endianness;
+  JxlDataType data_type;
+  uint32_t output_channels;
+  // Container options.
+  CodeStreamBoxFormat add_container;
+  // Decoding mode.
+  bool use_callback;
+  bool set_buffer_early;
+  bool use_resizable_runner;
+  // Exif orientation, 1-8
+  JxlOrientation orientation;
+  bool keep_orientation;
+};
+
+class DecodeTestParam : public ::testing::TestWithParam<PixelTestConfig> {};
+
+TEST_P(DecodeTestParam, PixelTest) {
+  PixelTestConfig config = GetParam();
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  if (config.keep_orientation) {
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetKeepOrientation(dec, JXL_TRUE));
+  }
+
+  size_t num_pixels = config.xsize * config.ysize;
+  uint32_t orig_channels =
+      (config.grayscale ? 1 : 3) + (config.include_alpha ? 1 : 0);
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(config.xsize, config.ysize, orig_channels, 0);
+  JxlPixelFormat format_orig = {orig_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN,
+                                0};
+  jxl::CompressParams cparams;
+  // Lossless to verify pixels exactly after roundtrip.
+  cparams.SetLossless();
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), config.xsize,
+      config.ysize, orig_channels, cparams, config.add_container,
+      config.orientation, config.add_preview);
+
+  JxlPixelFormat format = {config.output_channels, config.data_type,
+                           config.endianness, 0};
+
+  bool swap_xy = !config.keep_orientation && (config.orientation > 4);
+  size_t xsize = swap_xy ? config.ysize : config.xsize;
+  size_t ysize = swap_xy ? config.xsize : config.ysize;
+
+  std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+      dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+      format, config.use_callback, config.set_buffer_early,
+      config.use_resizable_runner);
+  JxlDecoderReset(dec);
+  EXPECT_EQ(num_pixels * config.output_channels *
+                GetDataBits(config.data_type) / jxl::kBitsPerByte,
+            pixels2.size());
+
+  // If an orientation transformation is expected, to compare the pixels, also
+  // apply this transformation to the original pixels. ConvertToExternal is
+  // used to achieve this, with a temporary conversion to CodecInOut and back.
+  if (config.orientation > 1 && !config.keep_orientation) {
+    jxl::Span<const uint8_t> bytes(pixels.data(), pixels.size());
+    jxl::ColorEncoding color_encoding =
+        jxl::ColorEncoding::SRGB(config.grayscale);
+
+    jxl::CodecInOut io;
+    if (config.include_alpha) io.metadata.m.SetAlphaBits(16);
+    io.SetSize(config.xsize, config.ysize);
+
+    EXPECT_TRUE(ConvertFromExternal(
+        bytes, config.xsize, config.ysize, color_encoding, config.include_alpha,
+        /*alpha_is_premultiplied=*/false, 16, JXL_BIG_ENDIAN,
+        /*flipped_y=*/false, nullptr, &io.Main()));
+
+    for (size_t i = 0; i < pixels.size(); i++) pixels[i] = 0;
+    EXPECT_TRUE(ConvertToExternal(
+        io.Main(), 16,
+        /*float_out=*/false, orig_channels, JXL_BIG_ENDIAN,
+        xsize * 2 * orig_channels, nullptr, pixels.data(), pixels.size(),
+        nullptr, nullptr, static_cast<jxl::Orientation>(config.orientation)));
+  }
+
+  EXPECT_EQ(0, ComparePixels(pixels.data(), pixels2.data(), xsize, ysize,
+                             format_orig, format));
+
+  JxlDecoderDestroy(dec);
+}
+
+std::vector<PixelTestConfig> GeneratePixelTests() {
+  std::vector<PixelTestConfig> all_tests;
+  struct ChannelInfo {
+    bool grayscale;
+    bool include_alpha;
+    size_t output_channels;
+  };
+  ChannelInfo ch_info[] = {
+      {false, true, 4},   // RGBA -> RGBA
+      {true, false, 1},   // G -> G
+      {true, true, 1},    // GA -> G
+      {true, true, 2},    // GA -> GA
+      {false, false, 3},  // RGB -> RGB
+      {false, true, 3},   // RGBA -> RGB
+      {false, false, 4},  // RGB -> RGBA
+  };
+
+  struct OutputFormat {
+    JxlEndianness endianness;
+    JxlDataType data_type;
+  };
+  OutputFormat out_formats[] = {
+      {JXL_NATIVE_ENDIAN, JXL_TYPE_UINT8},
+      {JXL_LITTLE_ENDIAN, JXL_TYPE_UINT16},
+      {JXL_BIG_ENDIAN, JXL_TYPE_UINT16},
+      {JXL_NATIVE_ENDIAN, JXL_TYPE_FLOAT16},
+      {JXL_LITTLE_ENDIAN, JXL_TYPE_FLOAT},
+      {JXL_BIG_ENDIAN, JXL_TYPE_FLOAT},
+  };
+
+  auto make_test = [&](ChannelInfo ch, size_t xsize, size_t ysize, bool preview,
+                       CodeStreamBoxFormat box, JxlOrientation orientation,
+                       bool keep_orientation, OutputFormat format,
+                       bool use_callback, bool set_buffer_early,
+                       bool resizable_runner) {
+    PixelTestConfig c;
+    c.grayscale = ch.grayscale;
+    c.include_alpha = ch.include_alpha;
+    c.add_preview = preview;
+    c.xsize = xsize;
+    c.ysize = ysize;
+    c.add_container = (CodeStreamBoxFormat)box;
+    c.output_channels = ch.output_channels;
+    c.data_type = format.data_type;
+    c.endianness = format.endianness;
+    c.use_callback = use_callback;
+    c.set_buffer_early = set_buffer_early;
+    c.use_resizable_runner = resizable_runner;
+    c.orientation = orientation;
+    c.keep_orientation = keep_orientation;
+    all_tests.push_back(c);
+  };
+
+  // Test output formats and methods.
+  for (ChannelInfo ch : ch_info) {
+    for (int use_callback = 0; use_callback <= 1; use_callback++) {
+      for (OutputFormat fmt : out_formats) {
+        make_test(ch, 301, 33, /*add_preview=*/false,
+                  CodeStreamBoxFormat::kCSBF_None, JXL_ORIENT_IDENTITY,
+                  /*keep_orientation=*/false, fmt, use_callback,
+                  /*set_buffer_early=*/false, /*resizable_runner=*/false);
+      }
+    }
+  }
+  // Test codestream formats.
+  for (size_t box = 1; box < kCSBF_NUM_ENTRIES; ++box) {
+    make_test(ch_info[0], 77, 33, /*add_preview=*/false,
+              (CodeStreamBoxFormat)box, JXL_ORIENT_IDENTITY,
+              /*keep_orientation=*/false, out_formats[0],
+              /*use_callback=*/false,
+              /*set_buffer_early=*/false, /*resizable_runner=*/false);
+  }
+  // Test previews.
+  for (int add_preview = 0; add_preview <= 1; add_preview++) {
+    make_test(ch_info[0], 77, 33, add_preview, CodeStreamBoxFormat::kCSBF_None,
+              JXL_ORIENT_IDENTITY, /*keep_orientation=*/false, out_formats[0],
+              /*use_callback=*/false, /*set_buffer_early=*/false,
+              /*resizable_runner=*/false);
+  }
+  // Test setting buffers early.
+  make_test(ch_info[0], 300, 33, /*add_preview=*/false,
+            CodeStreamBoxFormat::kCSBF_None, JXL_ORIENT_IDENTITY,
+            /*keep_orientation=*/false, out_formats[0],
+            /*use_callback=*/false, /*set_buffer_early=*/true,
+            /*resizable_runner=*/false);
+
+  // Test using the resizable runner
+  for (size_t i = 0; i < 4; i++) {
+    make_test(ch_info[0], 300 << i, 33 << i, /*add_preview=*/false,
+              CodeStreamBoxFormat::kCSBF_None, JXL_ORIENT_IDENTITY,
+              /*keep_orientation=*/false, out_formats[0],
+              /*use_callback=*/false, /*set_buffer_early=*/false,
+              /*resizable_runner=*/true);
+  }
+
+  // Test orientations.
+  for (int orientation = 1; orientation <= 8; ++orientation) {
+    make_test(ch_info[0], 280, 12, /*add_preview=*/false,
+              CodeStreamBoxFormat::kCSBF_None,
+              static_cast<JxlOrientation>(orientation),
+              /*keep_orientation=*/false, out_formats[0],
+              /*use_callback=*/false, /*set_buffer_early=*/true,
+              /*resizable_runner=*/false);
+    make_test(ch_info[0], 280, 12, /*add_preview=*/false,
+              CodeStreamBoxFormat::kCSBF_None,
+              static_cast<JxlOrientation>(orientation),
+              /*keep_orientation=*/true, out_formats[0],
+              /*use_callback=*/false, /*set_buffer_early=*/true,
+              /*resizable_runner=*/false);
+  }
+
+  return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const PixelTestConfig& c) {
+  os << c.xsize << "x" << c.ysize;
+  const char* colors[] = {"", "G", "GA", "RGB", "RGBA"};
+  os << colors[(c.grayscale ? 1 : 3) + (c.include_alpha ? 1 : 0)];
+  os << "to";
+  os << colors[c.output_channels];
+  switch (c.data_type) {
+    case JXL_TYPE_UINT8:
+      os << "u8";
+      break;
+    case JXL_TYPE_UINT16:
+      os << "u16";
+      break;
+    case JXL_TYPE_FLOAT:
+      os << "f32";
+      break;
+    case JXL_TYPE_FLOAT16:
+      os << "f16";
+      break;
+    case JXL_TYPE_UINT32:
+      os << "u32";
+      break;
+    case JXL_TYPE_BOOLEAN:
+      os << "b";
+      break;
+  };
+  if (GetDataBits(c.data_type) > jxl::kBitsPerByte) {
+    if (c.endianness == JXL_NATIVE_ENDIAN) {
+      // add nothing
+    } else if (c.endianness == JXL_BIG_ENDIAN) {
+      os << "BE";
+    } else if (c.endianness == JXL_LITTLE_ENDIAN) {
+      os << "LE";
+    }
+  }
+  if (c.add_container != CodeStreamBoxFormat::kCSBF_None) {
+    os << "Box";
+    os << (size_t)c.add_container;
+  }
+  if (c.add_preview) os << "Preview";
+  if (c.use_callback) os << "Callback";
+  if (c.set_buffer_early) os << "EarlyBuffer";
+  if (c.use_resizable_runner) os << "ResizableRunner";
+  if (c.orientation != 1) os << "O" << c.orientation;
+  if (c.keep_orientation) os << "Keep";
+  return os;
+}
+
+std::string PixelTestDescription(
+    const testing::TestParamInfo<DecodeTestParam::ParamType>& info) {
+  std::stringstream name;
+  name << info.param;
+  return name.str();
+}
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(DecodeTest, DecodeTestParam,
+                                   testing::ValuesIn(GeneratePixelTests()),
+                                   PixelTestDescription);
+
+TEST(DecodeTest, PixelTestWithICCProfileLossless) {
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  size_t xsize = 123, ysize = 77;
+  size_t num_pixels = xsize * ysize;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  jxl::CompressParams cparams;
+  // Lossless to verify pixels exactly after roundtrip.
+  cparams.SetLossless();
+  // For variation: some have container and no preview, others have preview
+  // and no container.
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+      cparams, kCSBF_None, JXL_ORIENT_IDENTITY, false, true);
+
+  for (uint32_t channels = 3; channels <= 4; ++channels) {
+    {
+      JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+      std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+          dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+          format, /*use_callback=*/false, /*set_buffer_early=*/false,
+          /*use_resizable_runner=*/false);
+      JxlDecoderReset(dec);
+      EXPECT_EQ(num_pixels * channels, pixels2.size());
+      EXPECT_EQ(0, ComparePixels(pixels.data(), pixels2.data(), xsize, ysize,
+                                 format_orig, format));
+    }
+    {
+      JxlPixelFormat format = {channels, JXL_TYPE_UINT16, JXL_LITTLE_ENDIAN, 0};
+
+      // Test with the container for one of the pixel formats.
+      std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+          dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+          format, /*use_callback=*/true, /*set_buffer_early=*/true,
+          /*use_resizable_runner=*/false);
+      JxlDecoderReset(dec);
+      EXPECT_EQ(num_pixels * channels * 2, pixels2.size());
+      EXPECT_EQ(0, ComparePixels(pixels.data(), pixels2.data(), xsize, ysize,
+                                 format_orig, format));
+    }
+
+    {
+      JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+
+      std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+          dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+          format, /*use_callback=*/false, /*set_buffer_early=*/false,
+          /*use_resizable_runner=*/false);
+      JxlDecoderReset(dec);
+      EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+      EXPECT_EQ(0, ComparePixels(pixels.data(), pixels2.data(), xsize, ysize,
+                                 format_orig, format));
+    }
+  }
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, PixelTestWithICCProfileLossy) {
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  size_t xsize = 123, ysize = 77;
+  size_t num_pixels = xsize * ysize;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  jxl::CompressParams cparams;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+      cparams, kCSBF_None, JXL_ORIENT_IDENTITY, /*add_preview=*/false,
+      /*add_icc_profile=*/true);
+  uint32_t channels = 3;
+
+  JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+
+  std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+      dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+      format, /*use_callback=*/false, /*set_buffer_early=*/true,
+      /*use_resizable_runner=*/false);
+  JxlDecoderReset(dec);
+  EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+
+  // The input pixels use the profile matching GetIccTestProfile, since we set
+  // add_icc_profile for CreateTestJXLCodestream to true.
+  jxl::ColorEncoding color_encoding0;
+  EXPECT_TRUE(color_encoding0.SetICC(GetIccTestProfile()));
+  jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
+  jxl::CodecInOut io0;
+  io0.SetSize(xsize, ysize);
+  EXPECT_TRUE(ConvertFromExternal(
+      span0, xsize, ysize, color_encoding0,
+      /*has_alpha=*/false, false, 16, format_orig.endianness,
+      /*flipped_y=*/false, /*pool=*/nullptr, &io0.Main()));
+
+  // The output pixels are expected to be in the same colorspace as the input
+  // profile, as the profile can be represented by enum values.
+  jxl::ColorEncoding color_encoding1 = color_encoding0;
+  jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
+  jxl::CodecInOut io1;
+  io1.SetSize(xsize, ysize);
+  EXPECT_TRUE(
+      ConvertFromExternal(span1, xsize, ysize, color_encoding1,
+                          /*has_alpha=*/false, false, 32, format.endianness,
+                          /*flipped_y=*/false, /*pool=*/nullptr, &io1.Main()));
+
+  jxl::ButteraugliParams ba;
+  EXPECT_LE(ButteraugliDistance(io0, io1, ba, /*distmap=*/nullptr, nullptr),
+            2.4f);
+
+  JxlDecoderDestroy(dec);
+}
+
+// Tests the case of lossy sRGB image without alpha channel, decoded to RGB8
+// and to RGBA8
+TEST(DecodeTest, PixelTestOpaqueSrgbLossy) {
+  for (unsigned channels = 3; channels <= 4; channels++) {
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+    size_t xsize = 123, ysize = 77;
+    size_t num_pixels = xsize * ysize;
+    std::vector<uint8_t> pixels =
+        jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+    JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    jxl::CompressParams cparams;
+    jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+        cparams, kCSBF_None, JXL_ORIENT_IDENTITY, /*add_preview=*/false,
+        /*add_icc_profile=*/false);
+
+    JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+    std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+        dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+        format, /*use_callback=*/true, /*set_buffer_early=*/false,
+        /*use_resizable_runner=*/false);
+    JxlDecoderReset(dec);
+    EXPECT_EQ(num_pixels * channels, pixels2.size());
+
+    // The input pixels use the profile matching GetIccTestProfile, since we set
+    // add_icc_profile for CreateTestJXLCodestream to true.
+    jxl::ColorEncoding color_encoding0 = jxl::ColorEncoding::SRGB(false);
+    jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
+    jxl::CodecInOut io0;
+    io0.SetSize(xsize, ysize);
+    EXPECT_TRUE(ConvertFromExternal(
+        span0, xsize, ysize, color_encoding0,
+        /*has_alpha=*/false, false, 16, format_orig.endianness,
+        /*flipped_y=*/false, /*pool=*/nullptr, &io0.Main()));
+
+    jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false);
+    jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
+    jxl::CodecInOut io1;
+    if (channels == 4) {
+      io1.metadata.m.SetAlphaBits(8);
+      io1.SetSize(xsize, ysize);
+      EXPECT_TRUE(ConvertFromExternal(
+          span1, xsize, ysize, color_encoding1,
+          /*has_alpha=*/true, false, 8, format.endianness,
+          /*flipped_y=*/false, /*pool=*/nullptr, &io1.Main()));
+      io1.metadata.m.SetAlphaBits(0);
+      io1.Main().ClearExtraChannels();
+    } else {
+      EXPECT_TRUE(ConvertFromExternal(
+          span1, xsize, ysize, color_encoding1,
+          /*has_alpha=*/false, false, 8, format.endianness,
+          /*flipped_y=*/false, /*pool=*/nullptr, &io1.Main()));
+    }
+
+    jxl::ButteraugliParams ba;
+    EXPECT_LE(ButteraugliDistance(io0, io1, ba, /*distmap=*/nullptr, nullptr),
+              2.4f);
+
+    JxlDecoderDestroy(dec);
+  }
+}
+
+// Opaque image with noise enabled, decoded to RGB8 and RGBA8.
+TEST(DecodeTest, PixelTestOpaqueSrgbLossyNoise) {
+  for (unsigned channels = 3; channels <= 4; channels++) {
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+    size_t xsize = 512, ysize = 300;
+    size_t num_pixels = xsize * ysize;
+    std::vector<uint8_t> pixels =
+        jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+    JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+    jxl::CompressParams cparams;
+    cparams.noise = jxl::Override::kOn;
+    jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+        cparams, kCSBF_None, JXL_ORIENT_IDENTITY, /*add_preview=*/false,
+        /*add_icc_profile=*/false);
+
+    JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+    std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+        dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+        format, /*use_callback=*/false, /*set_buffer_early=*/true,
+        /*use_resizable_runner=*/false);
+    JxlDecoderReset(dec);
+    EXPECT_EQ(num_pixels * channels, pixels2.size());
+
+    // The input pixels use the profile matching GetIccTestProfile, since we set
+    // add_icc_profile for CreateTestJXLCodestream to true.
+    jxl::ColorEncoding color_encoding0 = jxl::ColorEncoding::SRGB(false);
+    jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
+    jxl::CodecInOut io0;
+    io0.SetSize(xsize, ysize);
+    EXPECT_TRUE(ConvertFromExternal(
+        span0, xsize, ysize, color_encoding0,
+        /*has_alpha=*/false, false, 16, format_orig.endianness,
+        /*flipped_y=*/false, /*pool=*/nullptr, &io0.Main()));
+
+    jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false);
+    jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
+    jxl::CodecInOut io1;
+    if (channels == 4) {
+      io1.metadata.m.SetAlphaBits(8);
+      io1.SetSize(xsize, ysize);
+      EXPECT_TRUE(ConvertFromExternal(
+          span1, xsize, ysize, color_encoding1,
+          /*has_alpha=*/true, false, 8, format.endianness,
+          /*flipped_y=*/false, /*pool=*/nullptr, &io1.Main()));
+      io1.metadata.m.SetAlphaBits(0);
+      io1.Main().ClearExtraChannels();
+    } else {
+      EXPECT_TRUE(ConvertFromExternal(
+          span1, xsize, ysize, color_encoding1,
+          /*has_alpha=*/false, false, 8, format.endianness,
+          /*flipped_y=*/false, /*pool=*/nullptr, &io1.Main()));
+    }
+
+    jxl::ButteraugliParams ba;
+    EXPECT_LE(ButteraugliDistance(io0, io1, ba, /*distmap=*/nullptr, nullptr),
+              2.6f);
+
+    JxlDecoderDestroy(dec);
+  }
+}
+
+void TestPartialStream(bool reconstructible_jpeg) {
+  size_t xsize = 123, ysize = 77;
+  uint32_t channels = 4;
+  if (reconstructible_jpeg) {
+    channels = 3;
+  }
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, channels, 0);
+  JxlPixelFormat format_orig = {channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  jxl::CompressParams cparams;
+  if (reconstructible_jpeg) {
+    cparams.color_transform = jxl::ColorTransform::kNone;
+  } else {
+    cparams
+        .SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  }
+
+  std::vector<uint8_t> pixels2;
+  pixels2.resize(pixels.size());
+
+  jxl::PaddedBytes jpeg_output(64);
+  size_t used_jpeg_output = 0;
+
+  std::vector<jxl::PaddedBytes> codestreams(kCSBF_NUM_ENTRIES);
+  std::vector<jxl::PaddedBytes> jpeg_codestreams(kCSBF_NUM_ENTRIES);
+  for (size_t i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+    CodeStreamBoxFormat add_container = (CodeStreamBoxFormat)i;
+
+    codestreams[i] = jxl::CreateTestJXLCodestream(
+        jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+        channels, cparams, add_container, JXL_ORIENT_IDENTITY,
+        /*add_preview=*/true,
+        /*add_icc_profile=*/false,
+        reconstructible_jpeg ? &jpeg_codestreams[i] : nullptr);
+  }
+
+  // Test multiple step sizes, to test different combinations of the streaming
+  // box parsing.
+  std::vector<size_t> increments = {1, 3, 17, 23, 120, 700, 1050};
+
+  for (size_t index = 0; index < increments.size(); index++) {
+    for (size_t i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+      if (reconstructible_jpeg &&
+          (CodeStreamBoxFormat)i == CodeStreamBoxFormat::kCSBF_None) {
+        continue;
+      }
+      const jxl::PaddedBytes& data = codestreams[i];
+      const uint8_t* next_in = data.data();
+      size_t avail_in = 0;
+
+      JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSubscribeEvents(
+                    dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE |
+                             JXL_DEC_JPEG_RECONSTRUCTION));
+
+      bool seen_basic_info = false;
+      bool seen_full_image = false;
+      bool seen_jpeg_recon = false;
+
+      size_t total_size = 0;
+
+      for (;;) {
+        EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+        JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+        size_t remaining = JxlDecoderReleaseInput(dec);
+        EXPECT_LE(remaining, avail_in);
+        next_in += avail_in - remaining;
+        avail_in = remaining;
+        if (status == JXL_DEC_NEED_MORE_INPUT) {
+          if (total_size >= data.size()) {
+            // End of test data reached, it should have successfully decoded the
+            // image now.
+            FAIL();
+            break;
+          }
+
+          size_t increment = increments[index];
+          // End of the file reached, should be the final test.
+          if (total_size + increment > data.size()) {
+            increment = data.size() - total_size;
+          }
+          total_size += increment;
+          avail_in += increment;
+        } else if (status == JXL_DEC_BASIC_INFO) {
+          // This event should happen exactly once
+          EXPECT_FALSE(seen_basic_info);
+          if (seen_basic_info) break;
+          seen_basic_info = true;
+          JxlBasicInfo info;
+          EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+          EXPECT_EQ(info.xsize, xsize);
+          EXPECT_EQ(info.ysize, ysize);
+        } else if (status == JXL_DEC_JPEG_RECONSTRUCTION) {
+          EXPECT_FALSE(seen_basic_info);
+          EXPECT_FALSE(seen_full_image);
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetJPEGBuffer(dec, jpeg_output.data(),
+                                            jpeg_output.size()));
+          seen_jpeg_recon = true;
+        } else if (status == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+          EXPECT_TRUE(seen_jpeg_recon);
+          used_jpeg_output =
+              jpeg_output.size() - JxlDecoderReleaseJPEGBuffer(dec);
+          jpeg_output.resize(jpeg_output.size() * 2);
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetJPEGBuffer(
+                        dec, jpeg_output.data() + used_jpeg_output,
+                        jpeg_output.size() - used_jpeg_output));
+        } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+          EXPECT_EQ(JXL_DEC_SUCCESS,
+                    JxlDecoderSetImageOutBuffer(
+                        dec, &format_orig, pixels2.data(), pixels2.size()));
+        } else if (status == JXL_DEC_FULL_IMAGE) {
+          // This event should happen exactly once
+          EXPECT_FALSE(seen_full_image);
+          if (seen_full_image) break;
+          // This event should happen after basic info
+          EXPECT_TRUE(seen_basic_info);
+          seen_full_image = true;
+          if (reconstructible_jpeg) {
+            used_jpeg_output =
+                jpeg_output.size() - JxlDecoderReleaseJPEGBuffer(dec);
+            EXPECT_EQ(used_jpeg_output, jpeg_codestreams[i].size());
+            EXPECT_EQ(0, memcmp(jpeg_output.data(), jpeg_codestreams[i].data(),
+                                used_jpeg_output));
+          } else {
+            EXPECT_EQ(pixels, pixels2);
+          }
+        } else if (status == JXL_DEC_SUCCESS) {
+          EXPECT_TRUE(seen_full_image);
+          break;
+        } else {
+          // We do not expect any other events or errors
+          FAIL();
+          break;
+        }
+      }
+
+      // Ensure the decoder emitted the basic info and full image events
+      EXPECT_TRUE(seen_basic_info);
+      EXPECT_TRUE(seen_full_image);
+
+      JxlDecoderDestroy(dec);
+    }
+  }
+}
+
+// Tests the return status when trying to decode pixels on incomplete file: it
+// should return JXL_DEC_NEED_MORE_INPUT, not error.
+TEST(DecodeTest, PixelPartialTest) { TestPartialStream(false); }
+
+#if JPEGXL_ENABLE_JPEG
+// Tests the return status when trying to decode JPEG bytes on incomplete file.
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGPartialTest)) {
+  TestPartialStream(true);
+}
+#endif  // JPEGXL_ENABLE_JPEG
+
+// The DC event still exists, but is no longer implemented, it is deprecated.
+TEST(DecodeTest, DCNotGettableTest) {
+  // 1x1 pixel JXL image
+  std::string compressed(
+      "\377\n\0\20\260\23\0H\200("
+      "\0\334\0U\17\0\0\250P\31e\334\340\345\\\317\227\37:,"
+      "\246m\\gh\253m\vK\22E\306\261I\252C&pH\22\353 "
+      "\363\6\22\bp\0\200\237\34\231W2d\255$\1",
+      68);
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+                                 dec, JXL_DEC_BASIC_INFO | JXL_DEC_DC_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetInput(
+                dec, reinterpret_cast<const uint8_t*>(compressed.data()),
+                compressed.size()));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+
+  // Since the image is only 1x1 pixel, there is only 1 group, the decoder is
+  // unable to get DC size from this, and will not return the DC at all. Since
+  // no full image is requested either, it is expected to return success.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, PreviewTest) {
+  size_t xsize = 77, ysize = 120;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+
+  jxl::CompressParams cparams;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+      cparams, kCSBF_Multi, JXL_ORIENT_IDENTITY, /*add_preview=*/true);
+
+  JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_PREVIEW_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+
+  // GetSomeTestImage is hardcoded to use a top-left cropped preview with
+  // floor of 1/7th of the size
+  size_t xsize_preview = (xsize / 7);
+  size_t ysize_preview = (ysize / 7);
+  EXPECT_EQ(xsize_preview, info.preview.xsize);
+  EXPECT_EQ(ysize_preview, info.preview.ysize);
+  EXPECT_EQ(xsize_preview * ysize_preview * 3, buffer_size);
+
+  EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+  std::vector<uint8_t> preview(xsize_preview * ysize_preview * 3);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetPreviewOutBuffer(
+                                 dec, &format, preview.data(), preview.size()));
+
+  EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec));
+
+  jxl::Image3F preview0(xsize_preview, ysize_preview);
+  jxl::Image3F preview1(xsize_preview, ysize_preview);
+
+  // For preview0, the original: top-left crop the preview image the way
+  // GetSomeTestImage does.
+  for (size_t y = 0; y < ysize_preview; y++) {
+    for (size_t x = 0; x < xsize_preview; x++) {
+      preview0.PlaneRow(0, y)[x] =
+          (1.f / 255) * (pixels[(y * xsize + x) * 6 + 0]);
+      preview0.PlaneRow(1, y)[x] =
+          (1.f / 255) * (pixels[(y * xsize + x) * 6 + 2]);
+      preview0.PlaneRow(2, y)[x] =
+          (1.f / 255) * (pixels[(y * xsize + x) * 6 + 4]);
+      preview1.PlaneRow(0, y)[x] =
+          (1.f / 255) * (preview[(y * xsize_preview + x) * 3 + 0]);
+      preview1.PlaneRow(1, y)[x] =
+          (1.f / 255) * (preview[(y * xsize_preview + x) * 3 + 1]);
+      preview1.PlaneRow(2, y)[x] =
+          (1.f / 255) * (preview[(y * xsize_preview + x) * 3 + 2]);
+    }
+  }
+
+  jxl::CodecInOut io0;
+  io0.SetFromImage(std::move(preview0), jxl::ColorEncoding::SRGB(false));
+  jxl::CodecInOut io1;
+  io1.SetFromImage(std::move(preview1), jxl::ColorEncoding::SRGB(false));
+
+  jxl::ButteraugliParams ba;
+  // TODO(lode): this ButteraugliDistance silently returns 0 (dangerous for
+  // tests) if xsize or ysize is < 8, no matter how different the images, a tiny
+  // size that could happen for a preview. ButteraugliDiffmap does support
+  // smaller than 8x8, but jxl's ButteraugliDistance does not. Perhaps move
+  // butteraugli's <8x8 handling from ButteraugliDiffmap to
+  // ButteraugliComparator::Diffmap in butteraugli.cc.
+  EXPECT_LE(ButteraugliDistance(io0, io1, ba,
+                                /*distmap=*/nullptr, nullptr),
+            1.4f);
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, AlignTest) {
+  size_t xsize = 123, ysize = 77;
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+  JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+      cparams, kCSBF_None, JXL_ORIENT_IDENTITY, false);
+
+  size_t align = 17;
+  JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, align};
+  // On purpose not using jxl::RoundUpTo to test it independently.
+  size_t expected_line_bytes = (1 * 3 * xsize + align - 1) / align * align;
+
+  for (int use_callback = 0; use_callback <= 1; ++use_callback) {
+    std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+        jxl::Span<const uint8_t>(compressed.data(), compressed.size()), format,
+        use_callback, /*set_buffer_early=*/false,
+        /*use_resizable_runner=*/false);
+    EXPECT_EQ(expected_line_bytes * ysize, pixels2.size());
+    EXPECT_EQ(0, ComparePixels(pixels.data(), pixels2.data(), xsize, ysize,
+                               format_orig, format));
+  }
+}
+
+TEST(DecodeTest, AnimationTest) {
+  size_t xsize = 123, ysize = 77;
+  static const size_t num_frames = 2;
+  std::vector<uint8_t> frames[2];
+  frames[0] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  frames[1] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 1);
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations(num_frames);
+  for (size_t i = 0; i < num_frames; ++i) {
+    frame_durations[i] = 5 + i;
+  }
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    jxl::ImageBundle bundle(&io.metadata.m);
+
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+        ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*has_alpha=*/false,
+        /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
+        JXL_BIG_ENDIAN, /*flipped_y=*/false, /*pool=*/nullptr, &bundle));
+    bundle.duration = frame_durations[i];
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, &aux_out,
+                              nullptr));
+
+  // Decode and test the animation frames
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  void* runner = JxlThreadParallelRunnerCreate(
+      NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+    EXPECT_EQ(0, frame_header.name_length);
+    // For now, test with empty name, there's currently no easy way to encode
+    // a jxl file with a frame name because ImageBundle doesn't have a
+    // jxl::FrameHeader to set the name in. We can test the null termination
+    // character though.
+    char name;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameName(dec, &name, 1));
+    EXPECT_EQ(0, name);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0, ComparePixels(frames[i].data(), pixels.data(), xsize, ysize,
+                               format, format));
+  }
+
+  // After all frames were decoded, JxlDecoderProcessInput should return
+  // success to indicate all is done.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  JxlThreadParallelRunnerDestroy(runner);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, AnimationTestStreaming) {
+  size_t xsize = 123, ysize = 77;
+  static const size_t num_frames = 2;
+  std::vector<uint8_t> frames[2];
+  frames[0] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+  frames[1] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 1);
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations(num_frames);
+  for (size_t i = 0; i < num_frames; ++i) {
+    frame_durations[i] = 5 + i;
+  }
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    jxl::ImageBundle bundle(&io.metadata.m);
+
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+        ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*has_alpha=*/false,
+        /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
+        JXL_BIG_ENDIAN, /*flipped_y=*/false, /*pool=*/nullptr, &bundle));
+    bundle.duration = frame_durations[i];
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, &aux_out,
+                              nullptr));
+
+  // Decode and test the animation frames
+
+  const size_t step_size = 16;
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = 0;
+  size_t frame_headers_seen = 0;
+  size_t frames_seen = 0;
+  bool seen_basic_info = false;
+
+  void* runner = JxlThreadParallelRunnerCreate(
+      NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+  std::vector<uint8_t> frames2[2];
+  for (size_t i = 0; i < num_frames; ++i) {
+    frames2[i].resize(frames[i].size());
+  }
+
+  size_t total_in = 0;
+  size_t loop_count = 0;
+
+  for (;;) {
+    if (loop_count++ > compressed.size()) {
+      fprintf(stderr, "Too many loops\n");
+      FAIL();
+      break;
+    }
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+    auto status = JxlDecoderProcessInput(dec);
+    size_t remaining = JxlDecoderReleaseInput(dec);
+    EXPECT_LE(remaining, avail_in);
+    next_in += avail_in - remaining;
+    avail_in = remaining;
+
+    if (status == JXL_DEC_SUCCESS) {
+      break;
+    } else if (status == JXL_DEC_ERROR) {
+      FAIL();
+    } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+      if (total_in >= compressed.size()) {
+        fprintf(stderr, "Already gave all input data\n");
+        FAIL();
+        break;
+      }
+      size_t amount = step_size;
+      if (total_in + amount > compressed.size()) {
+        amount = compressed.size() - total_in;
+      }
+      avail_in += amount;
+      total_in += amount;
+    } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                     dec, &format, frames2[frames_seen].data(),
+                                     frames2[frames_seen].size()));
+    } else if (status == JXL_DEC_BASIC_INFO) {
+      EXPECT_EQ(false, seen_basic_info);
+      seen_basic_info = true;
+      JxlBasicInfo info;
+      EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+      EXPECT_EQ(xsize, info.xsize);
+      EXPECT_EQ(ysize, info.ysize);
+    } else if (status == JXL_DEC_FRAME) {
+      EXPECT_EQ(true, seen_basic_info);
+      frame_headers_seen++;
+    } else if (status == JXL_DEC_FULL_IMAGE) {
+      frames_seen++;
+      EXPECT_EQ(frame_headers_seen, frames_seen);
+    } else {
+      fprintf(stderr, "Unexpected status: %d\n", (int)status);
+      FAIL();
+    }
+  }
+
+  EXPECT_EQ(true, seen_basic_info);
+  EXPECT_EQ(num_frames, frames_seen);
+  EXPECT_EQ(num_frames, frame_headers_seen);
+  for (size_t i = 0; i < num_frames; ++i) {
+    EXPECT_EQ(frames[i], frames2[i]);
+  }
+
+  JxlThreadParallelRunnerDestroy(runner);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, SkipFrameTest) {
+  size_t xsize = 90, ysize = 120;
+  constexpr size_t num_frames = 16;
+  std::vector<uint8_t> frames[num_frames];
+  for (size_t i = 0; i < num_frames; i++) {
+    frames[i] = jxl::test::GetSomeTestImage(xsize, ysize, 3, i);
+  }
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations(num_frames);
+  for (size_t i = 0; i < num_frames; ++i) {
+    frame_durations[i] = 5 + i;
+  }
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    jxl::ImageBundle bundle(&io.metadata.m);
+    if (i & 1) {
+      // Mark some frames as referenceable, others not.
+      bundle.use_for_next_frame = true;
+    }
+
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+        ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*has_alpha=*/false,
+        /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
+        JXL_BIG_ENDIAN, /*flipped_y=*/false, /*pool=*/nullptr, &bundle));
+    bundle.duration = frame_durations[i];
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, &aux_out,
+                              nullptr));
+
+  // Decode and test the animation frames
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  void* runner = JxlThreadParallelRunnerCreate(
+      NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    if (i == 3) {
+      JxlDecoderSkipFrames(dec, 5);
+      i += 5;
+    }
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0, ComparePixels(frames[i].data(), pixels.data(), xsize, ysize,
+                               format, format));
+  }
+
+  // After all frames were decoded, JxlDecoderProcessInput should return
+  // success to indicate all is done.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  // Test rewinding the decoder and skipping different frames
+
+  JxlDecoderRewind(dec);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    int test_skipping = (i == 9) ? 3 : 0;
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this
+    // should only skip the next frame, not the currently processed one.
+    if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping);
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0, ComparePixels(frames[i].data(), pixels.data(), xsize, ysize,
+                               format, format));
+
+    if (test_skipping) i += test_skipping;
+  }
+
+  JxlThreadParallelRunnerDestroy(runner);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, SkipFrameWithBlendingTest) {
+  size_t xsize = 90, ysize = 120;
+  constexpr size_t num_frames = 16;
+  std::vector<uint8_t> frames[num_frames];
+  JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+  io.metadata.m.have_animation = true;
+  io.frames.clear();
+  io.frames.reserve(num_frames);
+  io.SetSize(xsize, ysize);
+
+  std::vector<uint32_t> frame_durations(num_frames);
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    if (i < 5) {
+      std::vector<uint8_t> frame_internal =
+          jxl::test::GetSomeTestImage(xsize, ysize, 3, i * 2 + 1);
+      // An internal frame with 0 duration, and use_for_next_frame, this is a
+      // frame that is not rendered and not output by the API, but on which the
+      // rendered frames depend
+      jxl::ImageBundle bundle_internal(&io.metadata.m);
+      EXPECT_TRUE(ConvertFromExternal(
+          jxl::Span<const uint8_t>(frame_internal.data(),
+                                   frame_internal.size()),
+          xsize, ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+          /*has_alpha=*/false,
+          /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
+          JXL_BIG_ENDIAN, /*flipped_y=*/false, /*pool=*/nullptr,
+          &bundle_internal));
+      bundle_internal.duration = 0;
+      bundle_internal.use_for_next_frame = true;
+      io.frames.push_back(std::move(bundle_internal));
+    }
+
+    std::vector<uint8_t> frame =
+        jxl::test::GetSomeTestImage(xsize, ysize, 3, i * 2);
+    // Actual rendered frame
+    frame_durations[i] = 5 + i;
+    jxl::ImageBundle bundle(&io.metadata.m);
+    EXPECT_TRUE(ConvertFromExternal(
+        jxl::Span<const uint8_t>(frame.data(), frame.size()), xsize, ysize,
+        jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*has_alpha=*/false,
+        /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
+        JXL_BIG_ENDIAN, /*flipped_y=*/false, /*pool=*/nullptr, &bundle));
+    bundle.duration = frame_durations[i];
+    // Create some variation in which frames depend on which.
+    if (i != 3 && i != 9 && i != 10) {
+      bundle.use_for_next_frame = true;
+    }
+    if (i != 12) {
+      bundle.blend = true;
+      // Choose a blend mode that depends on the pixels of the saved frame and
+      // doesn't use alpha
+      bundle.blendmode = jxl::BlendMode::kMul;
+    }
+    io.frames.push_back(std::move(bundle));
+  }
+
+  jxl::CompressParams cparams;
+  cparams.SetLossless();  // Lossless to verify pixels exactly after roundtrip.
+  jxl::AuxOut aux_out;
+  jxl::PaddedBytes compressed;
+  jxl::PassesEncoderState enc_state;
+  EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, &aux_out,
+                              nullptr));
+
+  // Independently decode all frames without any skipping, to create the
+  // expected blended frames, for the actual tests below to compare with.
+  {
+    JxlDecoder* dec = JxlDecoderCreate(NULL);
+    const uint8_t* next_in = compressed.data();
+    size_t avail_in = compressed.size();
+
+    void* runner = JxlThreadParallelRunnerCreate(
+        NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner(
+                                   dec, JxlThreadParallelRunner, runner));
+    EXPECT_EQ(JXL_DEC_SUCCESS,
+              JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE));
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+    for (size_t i = 0; i < num_frames; ++i) {
+      EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+      frames[i].resize(xsize * ysize * 6);
+      EXPECT_EQ(JXL_DEC_SUCCESS,
+                JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(),
+                                            frames[i].size()));
+      EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    }
+
+    // After all frames were decoded, JxlDecoderProcessInput should return
+    // success to indicate all is done.
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+    JxlThreadParallelRunnerDestroy(runner);
+    JxlDecoderDestroy(dec);
+  }
+
+  JxlDecoder* dec = JxlDecoderCreate(NULL);
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  void* runner = JxlThreadParallelRunnerCreate(
+      NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0, ComparePixels(frames[i].data(), pixels.data(), xsize, ysize,
+                               format, format));
+
+    // Test rewinding mid-way, not decoding all frames.
+    if (i == 8) {
+      break;
+    }
+  }
+
+  JxlDecoderRewind(dec);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    if (i == 3) {
+      JxlDecoderSkipFrames(dec, 5);
+      i += 5;
+    }
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0, ComparePixels(frames[i].data(), pixels.data(), xsize, ysize,
+                               format, format));
+  }
+
+  // After all frames were decoded, JxlDecoderProcessInput should return
+  // success to indicate all is done.
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+  // Test rewinding the decoder and skipping different frames
+
+  JxlDecoderRewind(dec);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+  for (size_t i = 0; i < num_frames; ++i) {
+    int test_skipping = (i == 9) ? 3 : 0;
+    std::vector<uint8_t> pixels(buffer_size);
+
+    EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+    // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this
+    // should only skip the next frame, not the currently processed one.
+    if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping);
+
+    JxlFrameHeader frame_header;
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+    EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+    EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+    EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+    EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                   dec, &format, pixels.data(), pixels.size()));
+
+    EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+    EXPECT_EQ(0, ComparePixels(frames[i].data(), pixels.data(), xsize, ysize,
+                               format, format));
+
+    if (test_skipping) i += test_skipping;
+  }
+
+  JxlThreadParallelRunnerDestroy(runner);
+  JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, FlushTest) {
+  // Size large enough for multiple groups, required to have progressive
+  // stages
+  size_t xsize = 333, ysize = 300;
+  uint32_t num_channels = 3;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+  jxl::CompressParams cparams;
+  jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      num_channels, cparams, kCSBF_None, JXL_ORIENT_IDENTITY, true);
+  JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+  std::vector<uint8_t> pixels2;
+  pixels2.resize(pixels.size());
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+  // Ensure that the first part contains at least the full DC of the image,
+  // otherwise flush does not work. The DC takes up more than 50% of the
+  // image generated here.
+  size_t first_part = data.size() * 3 / 4;
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(info.xsize, xsize);
+  EXPECT_EQ(info.ysize, ysize);
+
+  EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+  // Output buffer not yet set
+  EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  EXPECT_EQ(pixels2.size(), buffer_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                 dec, &format, pixels2.data(), pixels2.size()));
+
+  // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+  // data was already input before, since the processing of the frame only
+  // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+  // Note: actual pixel data not tested here, it should look similar to the
+  // input image, but with less fine detail. Instead the expected events are
+  // tested here.
+
+  EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+  size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+                                                data.size() - consumed));
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+}
+
+void VerifyJPEGReconstruction(const jxl::PaddedBytes& container,
+                              const jxl::PaddedBytes& jpeg_bytes) {
+  JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec.get(), JXL_DEC_JPEG_RECONSTRUCTION | JXL_DEC_FULL_IMAGE));
+  JxlDecoderSetInput(dec.get(), container.data(), container.size());
+  EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec.get()));
+  std::vector<uint8_t> reconstructed_buffer(128);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data(),
+                                    reconstructed_buffer.size()));
+  size_t used = 0;
+  JxlDecoderStatus process_result = JXL_DEC_JPEG_NEED_MORE_OUTPUT;
+  while (process_result == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+    used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+    reconstructed_buffer.resize(reconstructed_buffer.size() * 2);
+    EXPECT_EQ(
+        JXL_DEC_SUCCESS,
+        JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data() + used,
+                                reconstructed_buffer.size() - used));
+    process_result = JxlDecoderProcessInput(dec.get());
+  }
+  ASSERT_EQ(JXL_DEC_FULL_IMAGE, process_result);
+  used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+  ASSERT_EQ(used, jpeg_bytes.size());
+  EXPECT_EQ(0, memcmp(reconstructed_buffer.data(), jpeg_bytes.data(), used));
+}
+
+#if JPEGXL_ENABLE_JPEG
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructTestCodestream)) {
+  size_t xsize = 123;
+  size_t ysize = 77;
+  size_t channels = 3;
+  std::vector<uint8_t> pixels =
+      jxl::test::GetSomeTestImage(xsize, ysize, channels, /*seed=*/0);
+  jxl::CompressParams cparams;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  jxl::PaddedBytes jpeg_codestream;
+  jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+      jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+      channels, cparams, kCSBF_Single, JXL_ORIENT_IDENTITY,
+      /*add_preview=*/true,
+      /*add_icc_profile=*/false, &jpeg_codestream);
+  VerifyJPEGReconstruction(compressed, jpeg_codestream);
+}
+#endif  // JPEGXL_ENABLE_JPEG
+
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionTest)) {
+  const std::string jpeg_path =
+      "imagecompression.info/flower_foveon.png.im_q85_420.jpg";
+  const jxl::PaddedBytes orig = jxl::ReadTestData(jpeg_path);
+  jxl::CodecInOut orig_io;
+  ASSERT_TRUE(
+      jxl::jpeg::DecodeImageJPG(jxl::Span<const uint8_t>(orig), &orig_io));
+  orig_io.metadata.m.xyb_encoded = false;
+  jxl::BitWriter writer;
+  ASSERT_TRUE(WriteHeaders(&orig_io.metadata, &writer, nullptr));
+  writer.ZeroPadToByte();
+  jxl::PassesEncoderState enc_state;
+  jxl::CompressParams cparams;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  ASSERT_TRUE(jxl::EncodeFrame(cparams, jxl::FrameInfo{}, &orig_io.metadata,
+                               orig_io.Main(), &enc_state,
+                               /*pool=*/nullptr, &writer,
+                               /*aux_out=*/nullptr));
+
+  jxl::PaddedBytes jpeg_data;
+  ASSERT_TRUE(EncodeJPEGData(*orig_io.Main().jpeg_data.get(), &jpeg_data));
+  jxl::PaddedBytes container;
+  container.append(jxl::kContainerHeader,
+                   jxl::kContainerHeader + sizeof(jxl::kContainerHeader));
+  jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+                       &container);
+  container.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+  jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), 0, true, &container);
+  jxl::PaddedBytes codestream = std::move(writer).TakeBytes();
+  container.append(codestream.data(), codestream.data() + codestream.size());
+  VerifyJPEGReconstruction(container, orig);
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc
new file mode 100644
index 0000000000..4bab82abb3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.cc
@@ -0,0 +1,77 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/decode_to_jpeg.h"
+
+namespace jxl {
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+JxlDecoderStatus JxlToJpegDecoder::Process(const uint8_t** next_in,
+                                           size_t* avail_in) {
+  if (!inside_box_) {
+    JXL_ABORT(
+        "processing of JPEG reconstruction data outside JPEG reconstruction "
+        "box");
+  }
+  Span<const uint8_t> to_decode;
+  if (box_until_eof_) {
+    // Until EOF means consume all data.
+    to_decode = Span<const uint8_t>(*next_in, *avail_in);
+    *next_in += *avail_in;
+    *avail_in = 0;
+  } else {
+    // Defined size means consume min(available, needed).
+    size_t avail_recon_in =
+        std::min<size_t>(*avail_in, box_size_ - buffer_.size());
+    to_decode = Span<const uint8_t>(*next_in, avail_recon_in);
+    *next_in += avail_recon_in;
+    *avail_in -= avail_recon_in;
+  }
+  bool old_data_exists = !buffer_.empty();
+  if (old_data_exists) {
+    // Append incoming data to buffer if we already had data in the buffer.
+    buffer_.insert(buffer_.end(), to_decode.data(),
+                   to_decode.data() + to_decode.size());
+    to_decode = Span<const uint8_t>(buffer_.data(), buffer_.size());
+  }
+  if (!box_until_eof_ && to_decode.size() > box_size_) {
+    JXL_ABORT("JPEG reconstruction data to decode larger than expected");
+  }
+  if (box_until_eof_ || to_decode.size() == box_size_) {
+    // If undefined size, or the right size, try to decode.
+    jpeg_data_ = make_unique<jpeg::JPEGData>();
+    const auto status = jpeg::DecodeJPEGData(to_decode, jpeg_data_.get());
+    if (status.IsFatalError()) return JXL_DEC_ERROR;
+    if (status) {
+      // Successful decoding, emit event after updating state to track that we
+      // are no longer parsing JPEG reconstruction data.
+      inside_box_ = false;
+      return JXL_DEC_JPEG_RECONSTRUCTION;
+    }
+    if (box_until_eof_) {
+      // Unsuccessful decoding and undefined size, assume incomplete data. Copy
+      // the data if we haven't already.
+      if (!old_data_exists) {
+        buffer_.insert(buffer_.end(), to_decode.data(),
+                       to_decode.data() + to_decode.size());
+      }
+    } else {
+      // Unsuccessful decoding of correct amount of data, assume error.
+      return JXL_DEC_ERROR;
+    }
+  } else {
+    // Not enough data, copy the data if we haven't already.
+    if (!old_data_exists) {
+      buffer_.insert(buffer_.end(), to_decode.data(),
+                     to_decode.data() + to_decode.size());
+    }
+  }
+  return JXL_DEC_NEED_MORE_INPUT;
+}
+
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.h
new file mode 100644
index 0000000000..86f0a66da4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/decode_to_jpeg.h
@@ -0,0 +1,173 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DECODE_TO_JPEG_H_
+#define LIB_JXL_DECODE_TO_JPEG_H_
+
+// JPEG XL to JPEG bytes decoder logic. The JxlToJpegDecoder class keeps track
+// of the decoder state needed to parse the JPEG reconstruction box and provide
+// the reconstructed JPEG to the output buffer.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <vector>
+
+#include "jxl/decode.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"  // JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+namespace jxl {
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+class JxlToJpegDecoder {
+ public:
+  // Returns whether an output buffer is set.
+  bool IsOutputSet() const { return next_out_ != nullptr; }
+
+  // Returns whether the decoder is parsing a boxa JPEG box was parsed.
+  bool IsParsingBox() const { return inside_box_; }
+
+  const jpeg::JPEGData* JpegData() const { return jpeg_data_.get(); }
+
+  // Return the parsed jpeg::JPEGData object and removes it from the
+  // JxlToJpegDecoder.
+  jpeg::JPEGData* ReleaseJpegData() { return jpeg_data_.release(); }
+
+  // Sets the output buffer used when producing JPEG output.
+  JxlDecoderStatus SetOutputBuffer(uint8_t* data, size_t size) {
+    if (next_out_) return JXL_DEC_ERROR;
+    next_out_ = data;
+    avail_size_ = size;
+    return JXL_DEC_SUCCESS;
+  }
+
+  // Releases the buffer set with SetOutputBuffer().
+  size_t ReleaseOutputBuffer() {
+    size_t result = avail_size_;
+    next_out_ = nullptr;
+    avail_size_ = 0;
+    return result;
+  }
+
+  void StartBox(uint64_t box_size, size_t contents_size) {
+    // A new box implies that we clear the buffer.
+    buffer_.clear();
+    inside_box_ = true;
+    if (box_size == 0) {
+      box_until_eof_ = true;
+    } else {
+      box_size_ = contents_size;
+    }
+  }
+
+  // Consumes data from next_in/avail_in to reconstruct JPEG data.
+  // Uses box_size_, inside_box_ and box_until_eof_ to calculate how much to
+  // consume. Potentially stores unparsed data in buffer_.
+  // Potentially populates jpeg_data_. Potentially updates inside_box_.
+  JxlDecoderStatus Process(const uint8_t** next_in, size_t* avail_in);
+
+  // Sets the JpegData of the ImageBundle passed if there is anything to set.
+  // Releases the JpegData from this decoder if set.
+  Status SetImageBundleJpegData(ImageBundle* ib) {
+    if (IsOutputSet() && jpeg_data_ != nullptr) {
+      if (!jpeg::SetJPEGDataFromICC(ib->metadata()->color_encoding.ICC(),
+                                    jpeg_data_.get())) {
+        return false;
+      }
+      ib->jpeg_data.reset(jpeg_data_.release());
+    }
+    return true;
+  }
+
+  JxlDecoderStatus WriteOutput(const jpeg::JPEGData& jpeg_data) {
+    // Copy JPEG bytestream if desired.
+    uint8_t* tmp_next_out = next_out_;
+    size_t tmp_avail_size = avail_size_;
+    auto write = [&tmp_next_out, &tmp_avail_size](const uint8_t* buf,
+                                                  size_t len) {
+      size_t to_write = std::min<size_t>(tmp_avail_size, len);
+      memcpy(tmp_next_out, buf, to_write);
+      tmp_next_out += to_write;
+      tmp_avail_size -= to_write;
+      return to_write;
+    };
+    Status write_result = jpeg::WriteJpeg(jpeg_data, write);
+    if (!write_result) {
+      if (tmp_avail_size == 0) {
+        return JXL_DEC_JPEG_NEED_MORE_OUTPUT;
+      }
+      return JXL_DEC_ERROR;
+    }
+    next_out_ = tmp_next_out;
+    avail_size_ = tmp_avail_size;
+    return JXL_DEC_SUCCESS;
+  }
+
+ private:
+  // Content of the most recently parsed JPEG reconstruction box if any.
+  std::vector<uint8_t> buffer_;
+
+  // Decoded content of the most recently parsed JPEG reconstruction box is
+  // stored here.
+  std::unique_ptr<jpeg::JPEGData> jpeg_data_;
+
+  // True if the decoder is currently reading bytes inside a JPEG reconstruction
+  // box.
+  bool inside_box_ = false;
+
+  // True if the JPEG reconstruction box had undefined size (all remaining
+  // bytes).
+  bool box_until_eof_ = false;
+  // Size of most recently parsed JPEG reconstruction box contents.
+  size_t box_size_ = 0;
+
+  // Next bytes to write JPEG reconstruction to.
+  uint8_t* next_out_ = nullptr;
+  // Available bytes to write JPEG reconstruction to.
+  size_t avail_size_ = 0;
+};
+
+#else
+
+// Fake class that disables support for decoding JPEG XL to JPEG.
+class JxlToJpegDecoder {
+ public:
+  bool IsOutputSet() const { return false; }
+  bool IsParsingBox() const { return false; }
+
+  const jpeg::JPEGData* JpegData() const { return nullptr; }
+  jpeg::JPEGData* ReleaseJpegData() { return nullptr; }
+
+  JxlDecoderStatus SetOutputBuffer(uint8_t* /* data */, size_t /* size */) {
+    return JXL_DEC_ERROR;
+  }
+  size_t ReleaseOutputBuffer() { return 0; }
+
+  void StartBox(uint64_t /* box_size */, size_t /* contents_size */) {}
+
+  JxlDecoderStatus Process(const uint8_t** next_in, size_t* avail_in) {
+    return JXL_DEC_ERROR;
+  }
+
+  Status SetImageBundleJpegData(ImageBundle* /* ib */) { return true; }
+
+  JxlDecoderStatus WriteOutput(const jpeg::JPEGData& /* jpeg_data */) {
+    return JXL_DEC_SUCCESS;
+  }
+};
+
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_DECODE_TO_JPEG_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/descriptive_statistics_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/descriptive_statistics_test.cc
new file mode 100644
index 0000000000..7891c728e2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/descriptive_statistics_test.cc
@@ -0,0 +1,152 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/descriptive_statistics.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <random>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/noise_distributions.h"
+
+namespace jxl {
+namespace {
+
+// Assigns x to one of two streams so we can later test Assimilate.
+template <typename Random>
+void NotifyEither(float x, Random* rng, Stats* JXL_RESTRICT stats1,
+                  Stats* JXL_RESTRICT stats2) {
+  if ((*rng)() & 128) {
+    stats1->Notify(x);
+  } else {
+    stats2->Notify(x);
+  }
+}
+
+TEST(StatsTest, TestGaussian) {
+  Stats stats;
+  Stats stats1, stats2;
+  const float mean = 5.0f;
+  const float stddev = 4.0f;
+  NoiseGaussian noise(stddev);
+  std::mt19937 rng(129);
+  for (size_t i = 0; i < 1000 * 1000; ++i) {
+    const float x = noise(mean, &rng);
+    stats.Notify(x);
+    NotifyEither(x, &rng, &stats1, &stats2);
+  }
+  EXPECT_NEAR(mean, stats.Mean(), 0.01);
+  EXPECT_NEAR(stddev, stats.StandardDeviation(), 0.02);
+  EXPECT_NEAR(0.0, stats.Skewness(), 0.02);
+  EXPECT_NEAR(0.0, stats.Kurtosis() - 3, 0.02);
+  printf("%s\n", stats.ToString().c_str());
+
+  // Same results after merging both accumulators.
+  stats1.Assimilate(stats2);
+  EXPECT_NEAR(mean, stats1.Mean(), 0.01);
+  EXPECT_NEAR(stddev, stats1.StandardDeviation(), 0.02);
+  EXPECT_NEAR(0.0, stats1.Skewness(), 0.02);
+  EXPECT_NEAR(0.0, stats1.Kurtosis() - 3, 0.02);
+}
+
+TEST(StatsTest, TestUniform) {
+  Stats stats;
+  Stats stats1, stats2;
+  NoiseUniform noise(0, 256);
+  std::mt19937 rng(129), rng_split(65537);
+  for (size_t i = 0; i < 1000 * 1000; ++i) {
+    const float x = noise(0.0f, &rng);
+    stats.Notify(x);
+    NotifyEither(x, &rng_split, &stats1, &stats2);
+  }
+  EXPECT_NEAR(128.0, stats.Mean(), 0.05);
+  EXPECT_NEAR(0.0, stats.Min(), 0.01);
+  EXPECT_NEAR(256.0, stats.Max(), 0.01);
+  EXPECT_NEAR(70, stats.StandardDeviation(), 10);
+  // No outliers.
+  EXPECT_NEAR(-1.2, stats.Kurtosis() - 3, 0.1);
+  printf("%s\n", stats.ToString().c_str());
+
+  // Same results after merging both accumulators.
+  stats1.Assimilate(stats2);
+  EXPECT_NEAR(128.0, stats1.Mean(), 0.05);
+  EXPECT_NEAR(0.0, stats1.Min(), 0.01);
+  EXPECT_NEAR(256.0, stats1.Max(), 0.01);
+  EXPECT_NEAR(70, stats1.StandardDeviation(), 10);
+}
+
+TEST(StatsTest, CompareCentralMomentsAgainstTwoPass) {
+  // Vary seed so the thresholds are not specific to one distribution.
+  for (int rep = 0; rep < 200; ++rep) {
+    // Uniform avoids outliers.
+    NoiseUniform noise(0, 256);
+    std::mt19937 rng(129 + 13 * rep), rng_split(65537);
+
+    // Small count so bias (population vs sample) is visible.
+    const size_t kSamples = 20;
+
+    // First pass: compute mean
+    std::vector<float> samples;
+    samples.reserve(kSamples);
+    double sum = 0.0;
+    for (size_t i = 0; i < kSamples; ++i) {
+      const float x = noise(0.0f, &rng);
+      samples.push_back(x);
+      sum += x;
+    }
+    const double mean = sum / kSamples;
+
+    // Second pass: compute stats and moments
+    Stats stats;
+    Stats stats1, stats2;
+    double sum2 = 0.0;
+    double sum3 = 0.0;
+    double sum4 = 0.0;
+    for (const double x : samples) {
+      const double d = x - mean;
+      sum2 += d * d;
+      sum3 += d * d * d;
+      sum4 += d * d * d * d;
+
+      stats.Notify(x);
+      NotifyEither(x, &rng_split, &stats1, &stats2);
+    }
+    const double mu1 = mean;
+    const double mu2 = sum2 / kSamples;
+    const double mu3 = sum3 / kSamples;
+    const double mu4 = sum4 / kSamples;
+
+    // Raw central moments (note: Mu1 is zero by definition)
+    EXPECT_NEAR(mu1, stats.Mu1(), 1E-13);
+    EXPECT_NEAR(mu2, stats.Mu2(), 1E-11);
+    EXPECT_NEAR(mu3, stats.Mu3(), 1E-9);
+    EXPECT_NEAR(mu4, stats.Mu4(), 1E-6);
+
+    // Same results after merging both accumulators.
+    stats1.Assimilate(stats2);
+    EXPECT_NEAR(mu1, stats1.Mu1(), 1E-13);
+    EXPECT_NEAR(mu2, stats1.Mu2(), 1E-11);
+    EXPECT_NEAR(mu3, stats1.Mu3(), 1E-9);
+    EXPECT_NEAR(mu4, stats1.Mu4(), 1E-6);
+
+    const double sample_variance = mu2;
+    // Scaling factor for sampling bias
+    const double r = (kSamples - 1.0) / kSamples;
+    const double skewness = mu3 * pow(r / mu2, 1.5);
+    const double kurtosis = mu4 * pow(r / mu2, 2.0);
+
+    EXPECT_NEAR(sample_variance, stats.SampleVariance(),
+                sample_variance * 1E-12);
+    EXPECT_NEAR(skewness, stats.Skewness(), std::abs(skewness * 1E-11));
+    EXPECT_NEAR(kurtosis, stats.Kurtosis(), kurtosis * 1E-12);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/docs/color_management.md b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/docs/color_management.md
new file mode 100644
index 0000000000..56f4a2856c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/docs/color_management.md
@@ -0,0 +1,68 @@
+# Color Management
+
+[TOC]
+
+<!--*
+# Document freshness: For more information, see go/fresh-source.
+freshness: { owner: 'janwas' reviewed: '2019-02-01' }
+*-->
+
+## Why
+
+The vast majority of web images are still sRGB. However, wide-gamut material is
+increasingly being produced (photography, cinema, 4K). Screens covering most of
+the Adobe RGB gamut are readily available and some also cover most of DCI P3
+(iPhone, Pixel2) or even BT.2020.
+
+Currently, after a camera records a very saturated red pixel, most raw
+processors would clip it to the rather small sRGB gamut before saving as JPEG.
+In keeping with our high-quality goal, we prevent such loss by allowing wider
+input color spaces.
+
+## Which color space
+
+Even wide gamuts could be expressed relative to the sRGB primaries, but the
+resulting coordinates may be outside the valid 0..1 range. Surprisingly, such
+'unbounded' coordinates can be passed through color transforms provided the
+transfer functions are expressed as parametric functions (not lookup tables).
+However, most image file formats (including PNG and PNM) lack min/max metadata
+and thus do not support unbounded coordinates.
+
+Instead, we need a larger working gamut to ensure most pixel coordinates are
+within bounds and thus not clipped. However, larger gamuts result in lower
+precision/resolution when using <= 16 bit encodings (as opposed to 32-bit float
+in PFM). BT.2100 or P3 DCI appear to be good compromises.
+
+## CMS library
+
+Transforms with unbounded pixels are desirable because they reduce round-trip
+error in tests. This requires parametric curves, which are only supported for
+the common sRGB case in ICC v4 profiles. ArgyllCMS does not support v4. The
+other popular open-source CMS is LittleCMS. It is also used by color-managed
+editors (Krita/darktable), which increases the chances of interoperability.
+However, LCMS has race conditions and overflow issues that prevent fuzzing. We
+will later switch to the newer skcms. Note that this library does not intend to
+support multiProcessElements, so HDR transfer functions cannot be represented
+accurately. Thus in the long term, we will probably migrate away from ICC
+profiles entirely.
+
+## Which viewer
+
+On Linux, Krita and darktable support loading our PNG output images and their
+ICC profile.
+
+## How to compress/decompress
+
+### Embedded ICC profile
+
+-   Create an 8-bit or 16-bit PNG with an iCCP chunk, e.g. using darktable.
+-   Pass it to `cjxl`, then `djxl` with no special arguments. The decoded output
+    will have the same bit depth (can override with `--output_bit_depth`) and
+    color space.
+
+### Images without metadata (e.g. HDR)
+
+-   Create a PGM/PPM/PFM file in a known color space.
+-   Invoke `cjxl` with `-x color_space=RGB_D65_202_Rel_Lin` (linear 2020). For
+    details/possible values, see color_encoding.cc `Description`.
+-   Invoke `djxl` as above with no special arguments.
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.cc
new file mode 100644
index 0000000000..507e022cdc
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.cc
@@ -0,0 +1,1099 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_ac_strategy.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_ac_strategy.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/fast_math-inl.h"
+
+// Some of the floating point constants in this file and in other
+// files in the libjxl project have been obtained using the
+// tools/optimizer/simplex_fork.py tool. It is a variation of
+// Nelder-Mead optimization, and we generally try to minimize
+// BPP * pnorm aggregate as reported by the benchmark_xl tool,
+// but occasionally the values are optimized by using additional
+// constraints such as maintaining a certain density, or ratio of
+// popularity of integral transforms. Jyrki visually reviews all
+// such changes and often makes manual changes to maintain good
+// visual quality to changes where butteraugli was not sufficiently
+// sensitive to some kind of degradation. Unfortunately image quality
+// is still more of an art than science.
+
+// This must come before the begin/end_target, but HWY_ONCE is only true
+// after that, so use an "include guard".
+#ifndef LIB_JXL_ENC_AC_STRATEGY_
+#define LIB_JXL_ENC_AC_STRATEGY_
+// Parameters of the heuristic are marked with a OPTIMIZE comment.
+namespace jxl {
+
+// Debugging utilities.
+
+// Returns a linear sRGB color (as bytes) for each AC strategy.
+const uint8_t* TypeColor(const uint8_t& raw_strategy) {
+  JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy));
+  static_assert(AcStrategy::kNumValidStrategies == 27, "Change colors");
+  static constexpr uint8_t kColors[][3] = {
+      {0xFF, 0xFF, 0x00},  // DCT8
+      {0xFF, 0x80, 0x80},  // HORNUSS
+      {0xFF, 0x80, 0x80},  // DCT2x2
+      {0xFF, 0x80, 0x80},  // DCT4x4
+      {0x80, 0xFF, 0x00},  // DCT16x16
+      {0x00, 0xC0, 0x00},  // DCT32x32
+      {0xC0, 0xFF, 0x00},  // DCT16x8
+      {0xC0, 0xFF, 0x00},  // DCT8x16
+      {0x00, 0xFF, 0x00},  // DCT32x8
+      {0x00, 0xFF, 0x00},  // DCT8x32
+      {0x00, 0xFF, 0x00},  // DCT32x16
+      {0x00, 0xFF, 0x00},  // DCT16x32
+      {0xFF, 0x80, 0x00},  // DCT4x8
+      {0xFF, 0x80, 0x00},  // DCT8x4
+      {0xFF, 0xFF, 0x80},  // AFV0
+      {0xFF, 0xFF, 0x80},  // AFV1
+      {0xFF, 0xFF, 0x80},  // AFV2
+      {0xFF, 0xFF, 0x80},  // AFV3
+      {0x00, 0xC0, 0xFF},  // DCT64x64
+      {0x00, 0xFF, 0xFF},  // DCT64x32
+      {0x00, 0xFF, 0xFF},  // DCT32x64
+      {0x00, 0x40, 0xFF},  // DCT128x128
+      {0x00, 0x80, 0xFF},  // DCT128x64
+      {0x00, 0x80, 0xFF},  // DCT64x128
+      {0x00, 0x00, 0xC0},  // DCT256x256
+      {0x00, 0x00, 0xFF},  // DCT256x128
+      {0x00, 0x00, 0xFF},  // DCT128x256
+  };
+  return kColors[raw_strategy];
+}
+
+const uint8_t* TypeMask(const uint8_t& raw_strategy) {
+  JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy));
+  static_assert(AcStrategy::kNumValidStrategies == 27, "Add masks");
+  // implicitly, first row and column is made dark
+  static constexpr uint8_t kMask[][64] = {
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // DCT8
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 1, 0, 0, 1, 0, 0,  //
+          0, 0, 1, 0, 0, 1, 0, 0,  //
+          0, 0, 1, 1, 1, 1, 0, 0,  //
+          0, 0, 1, 0, 0, 1, 0, 0,  //
+          0, 0, 1, 0, 0, 1, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // HORNUSS
+      {
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          1, 0, 1, 0, 1, 0, 1, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          1, 0, 1, 0, 1, 0, 1, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          1, 0, 1, 0, 1, 0, 1, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          1, 0, 1, 0, 1, 0, 1, 0,  //
+      },                           // 2x2
+      {
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+      },                           // 4x4
+      {},                          // DCT16x16 (unused)
+      {},                          // DCT32x32 (unused)
+      {},                          // DCT16x8 (unused)
+      {},                          // DCT8x16 (unused)
+      {},                          // DCT32x8 (unused)
+      {},                          // DCT8x32 (unused)
+      {},                          // DCT32x16 (unused)
+      {},                          // DCT16x32 (unused)
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          1, 1, 1, 1, 1, 1, 1, 1,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // DCT4x8
+      {
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+          0, 0, 0, 0, 1, 0, 0, 0,  //
+      },                           // DCT8x4
+      {
+          1, 1, 1, 1, 1, 0, 0, 0,  //
+          1, 1, 1, 1, 0, 0, 0, 0,  //
+          1, 1, 1, 0, 0, 0, 0, 0,  //
+          1, 1, 0, 0, 0, 0, 0, 0,  //
+          1, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // AFV0
+      {
+          0, 0, 0, 0, 1, 1, 1, 1,  //
+          0, 0, 0, 0, 0, 1, 1, 1,  //
+          0, 0, 0, 0, 0, 0, 1, 1,  //
+          0, 0, 0, 0, 0, 0, 0, 1,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+      },                           // AFV1
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          1, 0, 0, 0, 0, 0, 0, 0,  //
+          1, 1, 0, 0, 0, 0, 0, 0,  //
+          1, 1, 1, 0, 0, 0, 0, 0,  //
+          1, 1, 1, 1, 0, 0, 0, 0,  //
+      },                           // AFV2
+      {
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 0,  //
+          0, 0, 0, 0, 0, 0, 0, 1,  //
+          0, 0, 0, 0, 0, 0, 1, 1,  //
+          0, 0, 0, 0, 0, 1, 1, 1,  //
+      },                           // AFV3
+  };
+  return kMask[raw_strategy];
+}
+
+void DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize,
+                    size_t ysize, const char* tag, AuxOut* aux_out) {
+  Image3F color_acs(xsize, ysize);
+  for (size_t y = 0; y < ysize; y++) {
+    float* JXL_RESTRICT rows[3] = {
+        color_acs.PlaneRow(0, y),
+        color_acs.PlaneRow(1, y),
+        color_acs.PlaneRow(2, y),
+    };
+    const AcStrategyRow acs_row = ac_strategy.ConstRow(y / kBlockDim);
+    for (size_t x = 0; x < xsize; x++) {
+      AcStrategy acs = acs_row[x / kBlockDim];
+      const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy());
+      for (size_t c = 0; c < 3; c++) {
+        rows[c][x] = color[c] / 255.f;
+      }
+    }
+  }
+  size_t stride = color_acs.PixelsPerRow();
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t by = 0; by < DivCeil(ysize, kBlockDim); by++) {
+      float* JXL_RESTRICT row = color_acs.PlaneRow(c, by * kBlockDim);
+      const AcStrategyRow acs_row = ac_strategy.ConstRow(by);
+      for (size_t bx = 0; bx < DivCeil(xsize, kBlockDim); bx++) {
+        AcStrategy acs = acs_row[bx];
+        if (!acs.IsFirstBlock()) continue;
+        const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy());
+        const uint8_t* JXL_RESTRICT mask = TypeMask(acs.RawStrategy());
+        if (acs.covered_blocks_x() == 1 && acs.covered_blocks_y() == 1) {
+          for (size_t iy = 0; iy < kBlockDim && by * kBlockDim + iy < ysize;
+               iy++) {
+            for (size_t ix = 0; ix < kBlockDim && bx * kBlockDim + ix < xsize;
+                 ix++) {
+              if (mask[iy * kBlockDim + ix]) {
+                row[iy * stride + bx * kBlockDim + ix] = color[c] / 800.f;
+              }
+            }
+          }
+        }
+        // draw block edges
+        for (size_t ix = 0; ix < kBlockDim * acs.covered_blocks_x() &&
+                            bx * kBlockDim + ix < xsize;
+             ix++) {
+          row[0 * stride + bx * kBlockDim + ix] = color[c] / 350.f;
+        }
+        for (size_t iy = 0; iy < kBlockDim * acs.covered_blocks_y() &&
+                            by * kBlockDim + iy < ysize;
+             iy++) {
+          row[iy * stride + bx * kBlockDim + 0] = color[c] / 350.f;
+        }
+      }
+    }
+  }
+  aux_out->DumpImage(tag, color_acs);
+}
+
+}  // namespace jxl
+#endif  // LIB_JXL_ENC_AC_STRATEGY_
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+bool MultiBlockTransformCrossesHorizontalBoundary(
+    const AcStrategyImage& ac_strategy, size_t start_x, size_t y,
+    size_t end_x) {
+  if (start_x >= ac_strategy.xsize() || y >= ac_strategy.ysize()) {
+    return false;
+  }
+  if (y % 8 == 0) {
+    // Nothing crosses 64x64 boundaries, and the memory on the other side
+    // of the 64x64 block may still uninitialized.
+    return false;
+  }
+  end_x = std::min(end_x, ac_strategy.xsize());
+  // The first multiblock might be before the start_x, let's adjust it
+  // to point to the first IsFirstBlock() == true block we find by backward
+  // tracing.
+  AcStrategyRow row = ac_strategy.ConstRow(y);
+  const size_t start_x_limit = start_x & ~7;
+  while (start_x != start_x_limit && !row[start_x].IsFirstBlock()) {
+    --start_x;
+  }
+  for (size_t x = start_x; x < end_x;) {
+    if (row[x].IsFirstBlock()) {
+      x += row[x].covered_blocks_x();
+    } else {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool MultiBlockTransformCrossesVerticalBoundary(
+    const AcStrategyImage& ac_strategy, size_t x, size_t start_y,
+    size_t end_y) {
+  if (x >= ac_strategy.xsize() || start_y >= ac_strategy.ysize()) {
+    return false;
+  }
+  if (x % 8 == 0) {
+    // Nothing crosses 64x64 boundaries, and the memory on the other side
+    // of the 64x64 block may still uninitialized.
+    return false;
+  }
+  end_y = std::min(end_y, ac_strategy.ysize());
+  // The first multiblock might be before the start_y, let's adjust it
+  // to point to the first IsFirstBlock() == true block we find by backward
+  // tracing.
+  const size_t start_y_limit = start_y & ~7;
+  while (start_y != start_y_limit &&
+         !ac_strategy.ConstRow(start_y)[x].IsFirstBlock()) {
+    --start_y;
+  }
+
+  for (size_t y = start_y; y < end_y;) {
+    AcStrategyRow row = ac_strategy.ConstRow(y);
+    if (row[x].IsFirstBlock()) {
+      y += row[x].covered_blocks_y();
+    } else {
+      return true;
+    }
+  }
+  return false;
+}
+
+float EstimateEntropy(const AcStrategy& acs, size_t x, size_t y,
+                      const ACSConfig& config,
+                      const float* JXL_RESTRICT cmap_factors, float* block,
+                      float* scratch_space, uint32_t* quantized) {
+  const size_t size = (1 << acs.log2_covered_blocks()) * kDCTBlockSize;
+
+  // Apply transform.
+  for (size_t c = 0; c < 3; c++) {
+    float* JXL_RESTRICT block_c = block + size * c;
+    TransformFromPixels(acs.Strategy(), &config.Pixel(c, x, y),
+                        config.src_stride, block_c, scratch_space);
+  }
+
+  HWY_FULL(float) df;
+
+  const size_t num_blocks = acs.covered_blocks_x() * acs.covered_blocks_y();
+  float quant_norm8 = 0;
+  float masking = 0;
+  if (num_blocks == 1) {
+    // When it is only one 8x8, we don't need aggregation of values.
+    quant_norm8 = config.Quant(x / 8, y / 8);
+    masking = 2.0f * config.Masking(x / 8, y / 8);
+  } else if (num_blocks == 2) {
+    // Taking max instead of 8th norm seems to work
+    // better for smallest blocks up to 16x8. Jyrki couldn't get
+    // improvements in trying the same for 16x16 blocks.
+    if (acs.covered_blocks_y() == 2) {
+      quant_norm8 =
+          std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8, y / 8 + 1));
+      masking = 2.0f * std::max(config.Masking(x / 8, y / 8),
+                                config.Masking(x / 8, y / 8 + 1));
+    } else {
+      quant_norm8 =
+          std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8 + 1, y / 8));
+      masking = 2.0f * std::max(config.Masking(x / 8, y / 8),
+                                config.Masking(x / 8 + 1, y / 8));
+    }
+  } else {
+    float masking_norm2 = 0;
+    float masking_max = 0;
+    // Load QF value, calculate empirical heuristic on masking field
+    // for weighting the information loss. Information loss manifests
+    // itself as ringing, and masking could hide it.
+    for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+      for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+        float qval = config.Quant(x / 8 + ix, y / 8 + iy);
+        qval *= qval;
+        qval *= qval;
+        quant_norm8 += qval * qval;
+        float maskval = config.Masking(x / 8 + ix, y / 8 + iy);
+        masking_max = std::max<float>(masking_max, maskval);
+        masking_norm2 += maskval * maskval;
+      }
+    }
+    quant_norm8 /= num_blocks;
+    quant_norm8 = FastPowf(quant_norm8, 1.0f / 8.0f);
+    masking_norm2 = sqrt(masking_norm2 / num_blocks);
+    // This is a highly empirical formula.
+    masking = (masking_norm2 + masking_max);
+  }
+  const auto q = Set(df, quant_norm8);
+
+  // Compute entropy.
+  float entropy = config.base_entropy;
+  auto info_loss = Zero(df);
+  auto info_loss2 = Zero(df);
+
+  for (size_t c = 0; c < 3; c++) {
+    const float* inv_matrix = config.dequant->InvMatrix(acs.RawStrategy(), c);
+    const auto cmap_factor = Set(df, cmap_factors[c]);
+
+    auto entropy_v = Zero(df);
+    auto nzeros_v = Zero(df);
+    auto cost1 = Set(df, config.cost1);
+    auto cost2 = Set(df, config.cost2);
+    auto cost_delta = Set(df, config.cost_delta);
+    for (size_t i = 0; i < num_blocks * kDCTBlockSize; i += Lanes(df)) {
+      const auto in = Load(df, block + c * size + i);
+      const auto in_y = Load(df, block + size + i) * cmap_factor;
+      const auto im = Load(df, inv_matrix + i);
+      const auto val = (in - in_y) * im * q;
+      const auto rval = Round(val);
+      const auto diff = AbsDiff(val, rval);
+      info_loss += diff;
+      info_loss2 += diff * diff;
+      const auto q = Abs(rval);
+      const auto q_is_zero = q == Zero(df);
+      entropy_v += IfThenElseZero(q >= Set(df, 1.5f), cost2);
+      // We used to have q * C here, but that cost model seems to
+      // be punishing large values more than necessary. Sqrt tries
+      // to avoid large values less aggressively. Having high accuracy
+      // around zero is most important at low qualities, and there
+      // we have directly specified costs for 0, 1, and 2.
+      entropy_v += Sqrt(q) * cost_delta;
+      nzeros_v += IfThenZeroElse(q_is_zero, Set(df, 1.0f));
+    }
+    entropy_v += nzeros_v * cost1;
+
+    entropy += GetLane(SumOfLanes(entropy_v));
+    size_t num_nzeros = GetLane(SumOfLanes(nzeros_v));
+    // Add #bit of num_nonzeros, as an estimate of the cost for encoding the
+    // number of non-zeros of the block.
+    size_t nbits = CeilLog2Nonzero(num_nzeros + 1) + 1;
+    // Also add #bit of #bit of num_nonzeros, to estimate the ANS cost, with a
+    // bias.
+    entropy += config.zeros_mul * (CeilLog2Nonzero(nbits + 17) + nbits);
+  }
+  float ret =
+      entropy +
+      masking *
+          ((config.info_loss_multiplier * GetLane(SumOfLanes(info_loss))) +
+           (config.info_loss_multiplier2 *
+            sqrt(num_blocks * GetLane(SumOfLanes(info_loss2)))));
+  return ret;
+}
+
+uint8_t FindBest8x8Transform(size_t x, size_t y, int encoding_speed_tier,
+                             const ACSConfig& config,
+                             const float* JXL_RESTRICT cmap_factors,
+                             AcStrategyImage* JXL_RESTRICT ac_strategy,
+                             float* block, float* scratch_space,
+                             uint32_t* quantized, float* entropy_out) {
+  struct TransformTry8x8 {
+    AcStrategy::Type type;
+    int encoding_speed_tier_max_limit;
+    float entropy_add;
+    float entropy_mul;
+  };
+  static const TransformTry8x8 kTransforms8x8[] = {
+      {
+          AcStrategy::Type::DCT,
+          9,
+          3.0f,
+          0.745f,
+      },
+      {
+          AcStrategy::Type::DCT4X4,
+          5,
+          4.0f,
+          1.0179946967008329f,
+      },
+      {
+          AcStrategy::Type::DCT2X2,
+          4,
+          4.0f,
+          0.76721119707580943f,
+      },
+      {
+          AcStrategy::Type::DCT4X8,
+          5,
+          0.0f,
+          0.700754622182473063f,
+      },
+      {
+          AcStrategy::Type::DCT8X4,
+          5,
+          0.0f,
+          0.700754622182473063f,
+      },
+      {
+          AcStrategy::Type::IDENTITY,
+          5,
+          8.0f,
+          0.81217614513585534f,
+      },
+      {
+          AcStrategy::Type::AFV0,
+          4,
+          3.0f,
+          0.70086131125719425f,
+      },
+      {
+          AcStrategy::Type::AFV1,
+          4,
+          3.0f,
+          0.70086131125719425f,
+      },
+      {
+          AcStrategy::Type::AFV2,
+          4,
+          3.0f,
+          0.70086131125719425f,
+      },
+      {
+          AcStrategy::Type::AFV3,
+          4,
+          3.0f,
+          0.70086131125719425f,
+      },
+  };
+  double best = 1e30;
+  uint8_t best_tx = kTransforms8x8[0].type;
+  for (auto tx : kTransforms8x8) {
+    if (tx.encoding_speed_tier_max_limit < encoding_speed_tier) {
+      continue;
+    }
+    AcStrategy acs = AcStrategy::FromRawStrategy(tx.type);
+    float entropy = EstimateEntropy(acs, x, y, config, cmap_factors, block,
+                                    scratch_space, quantized);
+    entropy = tx.entropy_add + tx.entropy_mul * entropy;
+    if (entropy < best) {
+      best_tx = tx.type;
+      best = entropy;
+    }
+  }
+  *entropy_out = best;
+  return best_tx;
+}
+
+// bx, by addresses the 64x64 block at 8x8 subresolution
+// cx, cy addresses the left, upper 8x8 block position of the candidate
+// transform.
+void TryMergeAcs(AcStrategy::Type acs_raw, size_t bx, size_t by, size_t cx,
+                 size_t cy, const ACSConfig& config,
+                 const float* JXL_RESTRICT cmap_factors,
+                 AcStrategyImage* JXL_RESTRICT ac_strategy,
+                 const float entropy_mul, const uint8_t candidate_priority,
+                 uint8_t* priority, float* JXL_RESTRICT entropy_estimate,
+                 float* block, float* scratch_space, uint32_t* quantized) {
+  AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw);
+  float entropy_current = 0;
+  for (size_t iy = 0; iy < acs.covered_blocks_y(); ++iy) {
+    for (size_t ix = 0; ix < acs.covered_blocks_x(); ++ix) {
+      if (priority[(cy + iy) * 8 + (cx + ix)] >= candidate_priority) {
+        // Transform would reuse already allocated blocks and
+        // lead to invalid overlaps, for example DCT64X32 vs.
+        // DCT32X64.
+        return;
+      }
+      entropy_current += entropy_estimate[(cy + iy) * 8 + (cx + ix)];
+    }
+  }
+  float entropy_candidate =
+      entropy_mul * EstimateEntropy(acs, (bx + cx) * 8, (by + cy) * 8, config,
+                                    cmap_factors, block, scratch_space,
+                                    quantized);
+  if (entropy_candidate >= entropy_current) return;
+  // Accept the candidate.
+  for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+    for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+      entropy_estimate[(cy + iy) * 8 + cx + ix] = 0;
+      priority[(cy + iy) * 8 + cx + ix] = candidate_priority;
+    }
+  }
+  ac_strategy->Set(bx + cx, by + cy, acs_raw);
+  entropy_estimate[cy * 8 + cx] = entropy_candidate;
+}
+
+static void SetEntropyForTransform(size_t cx, size_t cy,
+                                   const AcStrategy::Type acs_raw,
+                                   float entropy,
+                                   float* JXL_RESTRICT entropy_estimate) {
+  const AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw);
+  for (size_t dy = 0; dy < acs.covered_blocks_y(); ++dy) {
+    for (size_t dx = 0; dx < acs.covered_blocks_x(); ++dx) {
+      entropy_estimate[(cy + dy) * 8 + cx + dx] = 0.0;
+    }
+  }
+  entropy_estimate[cy * 8 + cx] = entropy;
+}
+
+AcStrategy::Type AcsSquare(size_t blocks) {
+  if (blocks == 2) {
+    return AcStrategy::Type::DCT16X16;
+  } else if (blocks == 4) {
+    return AcStrategy::Type::DCT32X32;
+  } else {
+    return AcStrategy::Type::DCT64X64;
+  }
+}
+
+AcStrategy::Type AcsVerticalSplit(size_t blocks) {
+  if (blocks == 2) {
+    return AcStrategy::Type::DCT16X8;
+  } else if (blocks == 4) {
+    return AcStrategy::Type::DCT32X16;
+  } else {
+    return AcStrategy::Type::DCT64X32;
+  }
+}
+
+AcStrategy::Type AcsHorizontalSplit(size_t blocks) {
+  if (blocks == 2) {
+    return AcStrategy::Type::DCT8X16;
+  } else if (blocks == 4) {
+    return AcStrategy::Type::DCT16X32;
+  } else {
+    return AcStrategy::Type::DCT32X64;
+  }
+}
+
+// The following function tries to merge smaller transforms into
+// squares and the rectangles originating from a single middle division
+// (horizontal or vertical) fairly.
+//
+// This is now generalized to concern about squares
+// of blocks X blocks size, where a block is 8x8 pixels.
+void FindBestFirstLevelDivisionForSquare(
+    size_t blocks, bool allow_square_transform, size_t bx, size_t by, size_t cx,
+    size_t cy, const ACSConfig& config, const float* JXL_RESTRICT cmap_factors,
+    AcStrategyImage* JXL_RESTRICT ac_strategy, const float entropy_mul_JXK,
+    const float entropy_mul_JXJ, float* JXL_RESTRICT entropy_estimate,
+    float* block, float* scratch_space, uint32_t* quantized) {
+  // We denote J for the larger dimension here, and K for the smaller.
+  // For example, for 32x32 block splitting, J would be 32, K 16.
+  const size_t blocks_half = blocks / 2;
+  const AcStrategy::Type acs_rawJXK = AcsVerticalSplit(blocks);
+  const AcStrategy::Type acs_rawKXJ = AcsHorizontalSplit(blocks);
+  const AcStrategy::Type acs_rawJXJ = AcsSquare(blocks);
+  const AcStrategy acsJXK = AcStrategy::FromRawStrategy(acs_rawJXK);
+  const AcStrategy acsKXJ = AcStrategy::FromRawStrategy(acs_rawKXJ);
+  const AcStrategy acsJXJ = AcStrategy::FromRawStrategy(acs_rawJXJ);
+  AcStrategyRow row0 = ac_strategy->ConstRow(by + cy + 0);
+  AcStrategyRow row1 = ac_strategy->ConstRow(by + cy + blocks_half);
+  // Let's check if we can consider a JXJ block here at all.
+  // This is not necessary in the basic use of hierarchically merging
+  // blocks in the simplest possible way, but is needed when we try other
+  // 'floating' options of merging, possibly after a simple hierarchical
+  // merge has been explored.
+  if (MultiBlockTransformCrossesHorizontalBoundary(*ac_strategy, bx + cx,
+                                                   by + cy, bx + cx + blocks) ||
+      MultiBlockTransformCrossesHorizontalBoundary(
+          *ac_strategy, bx + cx, by + cy + blocks, bx + cx + blocks) ||
+      MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx, by + cy,
+                                                 by + cy + blocks) ||
+      MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx + blocks,
+                                                 by + cy, by + cy + blocks)) {
+    return;  // not suitable for JxJ analysis, some transforms leak out.
+  }
+  // For floating transforms there may be
+  // already blocks selected that make either or both JXK and
+  // KXJ not feasible for this location.
+  const bool allow_JXK = !MultiBlockTransformCrossesVerticalBoundary(
+      *ac_strategy, bx + cx + blocks_half, by + cy, by + cy + blocks);
+  const bool allow_KXJ = !MultiBlockTransformCrossesHorizontalBoundary(
+      *ac_strategy, bx + cx, by + cy + blocks_half, bx + cx + blocks);
+  // Current entropies aggregated on NxN resolution.
+  float entropy[2][2] = {};
+  for (size_t dy = 0; dy < blocks; ++dy) {
+    for (size_t dx = 0; dx < blocks; ++dx) {
+      entropy[dy / blocks_half][dx / blocks_half] +=
+          entropy_estimate[(cy + dy) * 8 + (cx + dx)];
+    }
+  }
+  float entropy_JXK_left = std::numeric_limits<float>::max();
+  float entropy_JXK_right = std::numeric_limits<float>::max();
+  float entropy_KXJ_top = std::numeric_limits<float>::max();
+  float entropy_KXJ_bottom = std::numeric_limits<float>::max();
+  float entropy_JXJ = std::numeric_limits<float>::max();
+  if (allow_JXK) {
+    if (row0[bx + cx + 0].RawStrategy() != acs_rawJXK) {
+      entropy_JXK_left =
+          entropy_mul_JXK *
+          EstimateEntropy(acsJXK, (bx + cx + 0) * 8, (by + cy + 0) * 8, config,
+                          cmap_factors, block, scratch_space, quantized);
+    }
+    if (row0[bx + cx + blocks_half].RawStrategy() != acs_rawJXK) {
+      entropy_JXK_right =
+          entropy_mul_JXK * EstimateEntropy(acsJXK, (bx + cx + blocks_half) * 8,
+                                            (by + cy + 0) * 8, config,
+                                            cmap_factors, block, scratch_space,
+                                            quantized);
+    }
+  }
+  if (allow_KXJ) {
+    if (row0[bx + cx].RawStrategy() != acs_rawKXJ) {
+      entropy_KXJ_top =
+          entropy_mul_JXK *
+          EstimateEntropy(acsKXJ, (bx + cx + 0) * 8, (by + cy + 0) * 8, config,
+                          cmap_factors, block, scratch_space, quantized);
+    }
+    if (row1[bx + cx].RawStrategy() != acs_rawKXJ) {
+      entropy_KXJ_bottom =
+          entropy_mul_JXK * EstimateEntropy(acsKXJ, (bx + cx + 0) * 8,
+                                            (by + cy + blocks_half) * 8, config,
+                                            cmap_factors, block, scratch_space,
+                                            quantized);
+    }
+  }
+  if (allow_square_transform) {
+    // We control the exploration of the square transform separately so that
+    // we can turn it off at high decoding speeds for 32x32, but still allow
+    // exploring 16x32 and 32x16.
+    entropy_JXJ = entropy_mul_JXJ * EstimateEntropy(acsJXJ, (bx + cx + 0) * 8,
+                                                    (by + cy + 0) * 8, config,
+                                                    cmap_factors, block,
+                                                    scratch_space, quantized);
+  }
+
+  // Test if this block should have JXK or KXJ transforms,
+  // because it can have only one or the other.
+  float costJxN = std::min(entropy_JXK_left, entropy[0][0] + entropy[1][0]) +
+                  std::min(entropy_JXK_right, entropy[0][1] + entropy[1][1]);
+  float costNxJ = std::min(entropy_KXJ_top, entropy[0][0] + entropy[0][1]) +
+                  std::min(entropy_KXJ_bottom, entropy[1][0] + entropy[1][1]);
+  if (entropy_JXJ < costJxN && entropy_JXJ < costNxJ) {
+    ac_strategy->Set(bx + cx, by + cy, acs_rawJXJ);
+    SetEntropyForTransform(cx, cy, acs_rawJXJ, entropy_JXJ, entropy_estimate);
+  } else if (costJxN < costNxJ) {
+    if (entropy_JXK_left < entropy[0][0] + entropy[1][0]) {
+      ac_strategy->Set(bx + cx, by + cy, acs_rawJXK);
+      SetEntropyForTransform(cx, cy, acs_rawJXK, entropy_JXK_left,
+                             entropy_estimate);
+    }
+    if (entropy_JXK_right < entropy[0][1] + entropy[1][1]) {
+      ac_strategy->Set(bx + cx + blocks_half, by + cy, acs_rawJXK);
+      SetEntropyForTransform(cx + blocks_half, cy, acs_rawJXK,
+                             entropy_JXK_right, entropy_estimate);
+    }
+  } else {
+    if (entropy_KXJ_top < entropy[0][0] + entropy[0][1]) {
+      ac_strategy->Set(bx + cx, by + cy, acs_rawKXJ);
+      SetEntropyForTransform(cx, cy, acs_rawKXJ, entropy_KXJ_top,
+                             entropy_estimate);
+    }
+    if (entropy_KXJ_bottom < entropy[1][0] + entropy[1][1]) {
+      ac_strategy->Set(bx + cx, by + cy + blocks_half, acs_rawKXJ);
+      SetEntropyForTransform(cx, cy + blocks_half, acs_rawKXJ,
+                             entropy_KXJ_bottom, entropy_estimate);
+    }
+  }
+}
+
+void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
+                    const ACSConfig& config, const Rect& rect) {
+  // Main philosophy here:
+  // 1. First find best 8x8 transform for each area.
+  // 2. Merging them into larger transforms where possibly, but
+  // starting from the smallest transforms (16x8 and 8x16).
+  // Additional complication: 16x8 and 8x16 are considered
+  // simultanouesly and fairly against each other.
+  // We are looking at 64x64 squares since the YtoX and YtoB
+  // maps happen to be at that resolution, and having
+  // integral transforms cross these boundaries leads to
+  // additional complications.
+  const CompressParams& cparams = enc_state->cparams;
+  const float butteraugli_target = cparams.butteraugli_distance;
+  AcStrategyImage* ac_strategy = &enc_state->shared.ac_strategy;
+  // TODO(veluca): reuse allocations
+  auto mem = hwy::AllocateAligned<float>(5 * AcStrategy::kMaxCoeffArea);
+  auto qmem = hwy::AllocateAligned<uint32_t>(AcStrategy::kMaxCoeffArea);
+  uint32_t* JXL_RESTRICT quantized = qmem.get();
+  float* JXL_RESTRICT block = mem.get();
+  float* JXL_RESTRICT scratch_space = mem.get() + 3 * AcStrategy::kMaxCoeffArea;
+  size_t bx = rect.x0();
+  size_t by = rect.y0();
+  JXL_ASSERT(rect.xsize() <= 8);
+  JXL_ASSERT(rect.ysize() <= 8);
+  size_t tx = bx / kColorTileDimInBlocks;
+  size_t ty = by / kColorTileDimInBlocks;
+  const float cmap_factors[3] = {
+      enc_state->shared.cmap.YtoXRatio(
+          enc_state->shared.cmap.ytox_map.ConstRow(ty)[tx]),
+      0.0f,
+      enc_state->shared.cmap.YtoBRatio(
+          enc_state->shared.cmap.ytob_map.ConstRow(ty)[tx]),
+  };
+  if (cparams.speed_tier > SpeedTier::kHare) return;
+  // First compute the best 8x8 transform for each square. Later, we do not
+  // experiment with different combinations, but only use the best of the 8x8s
+  // when DCT8X8 is specified in the tree search.
+  // 8x8 transforms have 10 variants, but every larger transform is just a DCT.
+  float entropy_estimate[64] = {};
+  // Favor all 8x8 transforms (against 16x8 and larger transforms)) at
+  // low butteraugli_target distances.
+  static const float k8x8mul1 = -0.55;
+  static const float k8x8mul2 = 1.0735757687292623f;
+  static const float k8x8base = 1.4;
+  const float mul8x8 = k8x8mul2 + k8x8mul1 / (butteraugli_target + k8x8base);
+  for (size_t iy = 0; iy < rect.ysize(); iy++) {
+    for (size_t ix = 0; ix < rect.xsize(); ix++) {
+      float entropy = 0.0;
+      const uint8_t best_of_8x8s = FindBest8x8Transform(
+          8 * (bx + ix), 8 * (by + iy), static_cast<int>(cparams.speed_tier),
+          config, cmap_factors, ac_strategy, block, scratch_space, quantized,
+          &entropy);
+      ac_strategy->Set(bx + ix, by + iy,
+                       static_cast<AcStrategy::Type>(best_of_8x8s));
+      entropy_estimate[iy * 8 + ix] = entropy * mul8x8;
+    }
+  }
+  // Merge when a larger transform is better than the previously
+  // searched best combination of 8x8 transforms.
+  struct MergeTry {
+    AcStrategy::Type type;
+    uint8_t priority;
+    uint8_t decoding_speed_tier_max_limit;
+    uint8_t encoding_speed_tier_max_limit;
+    float entropy_mul;
+  };
+  static const float k8X16mul1 = -0.55;
+  static const float k8X16mul2 = 0.9019587899705066;
+  static const float k8X16base = 1.6;
+  const float entropy_mul16X8 =
+      k8X16mul2 + k8X16mul1 / (butteraugli_target + k8X16base);
+  //  const float entropy_mul16X8 = mul8X16 * 0.91195782912371126f;
+
+  static const float k16X16mul1 = -0.35;
+  static const float k16X16mul2 = 0.82098067020252011;
+  static const float k16X16base = 2.0;
+  const float entropy_mul16X16 =
+      k16X16mul2 + k16X16mul1 / (butteraugli_target + k16X16base);
+  //  const float entropy_mul16X16 = mul16X16 * 0.83183417727960129f;
+
+  static const float k32X16mul1 = -0.1;
+  static const float k32X16mul2 = 0.86098067020252011;
+  static const float k32X16base = 2.5;
+  const float entropy_mul16X32 =
+      k32X16mul2 + k32X16mul1 / (butteraugli_target + k32X16base);
+
+  const float entropy_mul32X32 = 0.9188333021616017f;
+  const float entropy_mul64X64 = 1.50f;
+  // TODO(jyrki): Consider this feedback in further changes:
+  // Also effectively when the multipliers for smaller blocks are
+  // below 1, this raises the bar for the bigger blocks even higher
+  // in that sense these constants are not independent (e.g. changing
+  // the constant for DCT16x32 by -5% (making it more likely) also
+  // means that DCT32x32 becomes harder to do when starting from
+  // two DCT16x32s). It might be better to make them more independent,
+  // e.g. by not applying the multiplier when storing the new entropy
+  // estimates in TryMergeToACSCandidate().
+  const MergeTry kTransformsForMerge[9] = {
+      {AcStrategy::Type::DCT16X8, 2, 4, 5, entropy_mul16X8},
+      {AcStrategy::Type::DCT8X16, 2, 4, 5, entropy_mul16X8},
+      // FindBestFirstLevelDivisionForSquare looks for DCT16X16 and its
+      // subdivisions. {AcStrategy::Type::DCT16X16, 3, entropy_mul16X16},
+      {AcStrategy::Type::DCT16X32, 4, 4, 4, entropy_mul16X32},
+      {AcStrategy::Type::DCT32X16, 4, 4, 4, entropy_mul16X32},
+      // FindBestFirstLevelDivisionForSquare looks for DCT32X32 and its
+      // subdivisions. {AcStrategy::Type::DCT32X32, 5, 1, 5,
+      // 0.9822994906548809f},
+      // TODO(jyrki): re-enable 64x32 and 64x64 if/when possible.
+      {AcStrategy::Type::DCT64X32, 6, 1, 3, 1.27f},
+      {AcStrategy::Type::DCT32X64, 6, 1, 3, 1.27f},
+      // {AcStrategy::Type::DCT64X64, 8, 1, 3, 2.0846542128012948f},
+  };
+  /*
+  These sizes not yet included in merge heuristic:
+  set(AcStrategy::Type::DCT32X8, 0.0f, 2.261390410971102f);
+  set(AcStrategy::Type::DCT8X32, 0.0f, 2.261390410971102f);
+  set(AcStrategy::Type::DCT128X128, 0.0f, 1.0f);
+  set(AcStrategy::Type::DCT128X64, 0.0f, 0.73f);
+  set(AcStrategy::Type::DCT64X128, 0.0f, 0.73f);
+  set(AcStrategy::Type::DCT256X256, 0.0f, 1.0f);
+  set(AcStrategy::Type::DCT256X128, 0.0f, 0.73f);
+  set(AcStrategy::Type::DCT128X256, 0.0f, 0.73f);
+  */
+
+  // Priority is a tricky kludge to avoid collisions so that transforms
+  // don't overlap.
+  uint8_t priority[64] = {};
+  for (auto tx : kTransformsForMerge) {
+    if (tx.decoding_speed_tier_max_limit < cparams.decoding_speed_tier) {
+      continue;
+    }
+    AcStrategy acs = AcStrategy::FromRawStrategy(tx.type);
+    for (size_t cy = 0; cy + acs.covered_blocks_y() - 1 < rect.ysize();
+         cy += acs.covered_blocks_y()) {
+      for (size_t cx = 0; cx + acs.covered_blocks_x() - 1 < rect.xsize();
+           cx += acs.covered_blocks_x()) {
+        if (cy + 7 < rect.ysize() && cx + 7 < rect.xsize()) {
+          if (cparams.decoding_speed_tier < 4 &&
+              tx.type == AcStrategy::Type::DCT32X64) {
+            // We handle both DCT8X16 and DCT16X8 at the same time.
+            if ((cy | cx) % 8 == 0) {
+              FindBestFirstLevelDivisionForSquare(
+                  8, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+                  tx.entropy_mul, entropy_mul64X64, entropy_estimate, block,
+                  scratch_space, quantized);
+            }
+            continue;
+          } else if (tx.type == AcStrategy::Type::DCT32X16) {
+            // We handled both DCT8X16 and DCT16X8 at the same time,
+            // and that is above. The last column and last row,
+            // when the last column or last row is odd numbered,
+            // are still handled by TryMergeAcs.
+            continue;
+          }
+        }
+        if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) ||
+            (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) {
+          // already covered by FindBest32X32
+          continue;
+        }
+
+        if (cy + 3 < rect.ysize() && cx + 3 < rect.xsize()) {
+          if (tx.type == AcStrategy::Type::DCT16X32) {
+            // We handle both DCT8X16 and DCT16X8 at the same time.
+            bool enable_32x32 = cparams.decoding_speed_tier < 4;
+            if ((cy | cx) % 4 == 0) {
+              FindBestFirstLevelDivisionForSquare(
+                  4, enable_32x32, bx, by, cx, cy, config, cmap_factors,
+                  ac_strategy, tx.entropy_mul, entropy_mul32X32,
+                  entropy_estimate, block, scratch_space, quantized);
+            }
+            continue;
+          } else if (tx.type == AcStrategy::Type::DCT32X16) {
+            // We handled both DCT8X16 and DCT16X8 at the same time,
+            // and that is above. The last column and last row,
+            // when the last column or last row is odd numbered,
+            // are still handled by TryMergeAcs.
+            continue;
+          }
+        }
+        if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) ||
+            (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) {
+          // already covered by FindBest32X32
+          continue;
+        }
+        if (cy + 1 < rect.ysize() && cx + 1 < rect.xsize()) {
+          if (tx.type == AcStrategy::Type::DCT8X16) {
+            // We handle both DCT8X16 and DCT16X8 at the same time.
+            if ((cy | cx) % 2 == 0) {
+              FindBestFirstLevelDivisionForSquare(
+                  2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+                  tx.entropy_mul, entropy_mul16X16, entropy_estimate, block,
+                  scratch_space, quantized);
+            }
+            continue;
+          } else if (tx.type == AcStrategy::Type::DCT16X8) {
+            // We handled both DCT8X16 and DCT16X8 at the same time,
+            // and that is above. The last column and last row,
+            // when the last column or last row is odd numbered,
+            // are still handled by TryMergeAcs.
+            continue;
+          }
+        }
+        if ((tx.type == AcStrategy::Type::DCT8X16 && cy % 2 == 1) ||
+            (tx.type == AcStrategy::Type::DCT16X8 && cx % 2 == 1)) {
+          // already covered by FindBestFirstLevelDivisionForSquare
+          continue;
+        }
+        // All other merge sizes are handled here.
+        // Some of the DCT16X8s and DCT8X16s will still leak through here
+        // when there is an odd number of 8x8 blocks, then the last row
+        // and column will get their DCT16X8s and DCT8X16s through the
+        // normal integral transform merging process.
+        TryMergeAcs(tx.type, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+                    tx.entropy_mul, tx.priority, &priority[0], entropy_estimate,
+                    block, scratch_space, quantized);
+      }
+    }
+  }
+  // Here we still try to do some non-aligned matching, find a few more
+  // 16X8, 8X16 and 16X16s between the non-2-aligned blocks.
+  if (cparams.speed_tier >= SpeedTier::kHare) {
+    return;
+  }
+  for (int ii = 0; ii < 3; ++ii) {
+    for (size_t cy = 1 - (ii == 1); cy + 1 < rect.ysize(); cy += 2) {
+      for (size_t cx = 1 - (ii == 2); cx + 1 < rect.xsize(); cx += 2) {
+        FindBestFirstLevelDivisionForSquare(
+            2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+            entropy_mul16X8, entropy_mul16X16, entropy_estimate, block,
+            scratch_space, quantized);
+      }
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ProcessRectACS);
+
+void AcStrategyHeuristics::Init(const Image3F& src,
+                                PassesEncoderState* enc_state) {
+  this->enc_state = enc_state;
+  config.dequant = &enc_state->shared.matrices;
+  const CompressParams& cparams = enc_state->cparams;
+  const float butteraugli_target = cparams.butteraugli_distance;
+
+  // Image row pointers and strides.
+  config.quant_field_row = enc_state->initial_quant_field.Row(0);
+  config.quant_field_stride = enc_state->initial_quant_field.PixelsPerRow();
+  auto& mask = enc_state->initial_quant_masking;
+  if (mask.xsize() > 0 && mask.ysize() > 0) {
+    config.masking_field_row = mask.Row(0);
+    config.masking_field_stride = mask.PixelsPerRow();
+  }
+
+  config.src_rows[0] = src.ConstPlaneRow(0, 0);
+  config.src_rows[1] = src.ConstPlaneRow(1, 0);
+  config.src_rows[2] = src.ConstPlaneRow(2, 0);
+  config.src_stride = src.PixelsPerRow();
+
+  // Entropy estimate is composed of two factors:
+  //  - estimate of the number of bits that will be used by the block
+  //  - information loss due to quantization
+  // The following constant controls the relative weights of these components.
+  config.info_loss_multiplier = 138.0f;
+  config.info_loss_multiplier2 = 50.46839691767866;
+  // TODO(jyrki): explore base_entropy setting more.
+  // A small value (0?) works better at high distance, while a larger value
+  // may be more effective at low distance/high bpp.
+  config.base_entropy = 0.0;
+  config.zeros_mul = 7.565053364251793f;
+  // Lots of +1 and -1 coefficients at high quality, it is
+  // beneficial to favor them. At low qualities zeros matter more
+  // and +1 / -1 coefficients are already quite harmful.
+  float slope = std::min<float>(1.0f, butteraugli_target * (1.0f / 3));
+  config.cost1 = 1 + slope * 8.8703248061477744f;
+  config.cost2 = 4.4628149885273363f;
+  config.cost_delta = 5.3359184934516337f;
+  JXL_ASSERT(enc_state->shared.ac_strategy.xsize() ==
+             enc_state->shared.frame_dim.xsize_blocks);
+  JXL_ASSERT(enc_state->shared.ac_strategy.ysize() ==
+             enc_state->shared.frame_dim.ysize_blocks);
+}
+
+void AcStrategyHeuristics::ProcessRect(const Rect& rect) {
+  PROFILER_FUNC;
+  const CompressParams& cparams = enc_state->cparams;
+  // In Falcon mode, use DCT8 everywhere and uniform quantization.
+  if (cparams.speed_tier >= SpeedTier::kCheetah) {
+    enc_state->shared.ac_strategy.FillDCT8(rect);
+    return;
+  }
+  HWY_DYNAMIC_DISPATCH(ProcessRectACS)
+  (enc_state, config, rect);
+}
+
+void AcStrategyHeuristics::Finalize(AuxOut* aux_out) {
+  const auto& ac_strategy = enc_state->shared.ac_strategy;
+  // Accounting and debug output.
+  if (aux_out != nullptr) {
+    aux_out->num_dct2_blocks =
+        32 * (ac_strategy.CountBlocks(AcStrategy::Type::DCT32X64) +
+              ac_strategy.CountBlocks(AcStrategy::Type::DCT64X32));
+    aux_out->num_dct4_blocks =
+        64 * ac_strategy.CountBlocks(AcStrategy::Type::DCT64X64);
+    aux_out->num_dct4x8_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT4X8) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT8X4);
+    aux_out->num_afv_blocks = ac_strategy.CountBlocks(AcStrategy::Type::AFV0) +
+                              ac_strategy.CountBlocks(AcStrategy::Type::AFV1) +
+                              ac_strategy.CountBlocks(AcStrategy::Type::AFV2) +
+                              ac_strategy.CountBlocks(AcStrategy::Type::AFV3);
+    aux_out->num_dct8_blocks = ac_strategy.CountBlocks(AcStrategy::Type::DCT);
+    aux_out->num_dct8x16_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT8X16) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT16X8);
+    aux_out->num_dct8x32_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT8X32) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT32X8);
+    aux_out->num_dct16_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT16X16);
+    aux_out->num_dct16x32_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT16X32) +
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT32X16);
+    aux_out->num_dct32_blocks =
+        ac_strategy.CountBlocks(AcStrategy::Type::DCT32X32);
+  }
+
+  if (WantDebugOutput(aux_out)) {
+    DumpAcStrategy(ac_strategy, enc_state->shared.frame_dim.xsize,
+                   enc_state->shared.frame_dim.ysize, "ac_strategy", aux_out);
+  }
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.h
new file mode 100644
index 0000000000..6cf82d524c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ac_strategy.h
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_AC_STRATEGY_H_
+#define LIB_JXL_ENC_AC_STRATEGY_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quant_weights.h"
+
+// `FindBestAcStrategy` uses heuristics to choose which AC strategy should be
+// used in each block, as well as the initial quantization field.
+
+namespace jxl {
+
+// AC strategy selection: utility struct.
+
+struct ACSConfig {
+  const DequantMatrices* JXL_RESTRICT dequant;
+  float info_loss_multiplier;
+  float info_loss_multiplier2;
+  float* JXL_RESTRICT quant_field_row;
+  size_t quant_field_stride;
+  float* JXL_RESTRICT masking_field_row;
+  size_t masking_field_stride;
+  const float* JXL_RESTRICT src_rows[3];
+  size_t src_stride;
+  // Cost for 1 (-1), 2 (-2) explicitly, cost for others computed with cost1 +
+  // cost2 + sqrt(q) * cost_delta.
+  float cost1;
+  float cost2;
+  float cost_delta;
+  float base_entropy;
+  float zeros_mul;
+  const float& Pixel(size_t c, size_t x, size_t y) const {
+    return src_rows[c][y * src_stride + x];
+  }
+  float Masking(size_t bx, size_t by) const {
+    JXL_DASSERT(masking_field_row[by * masking_field_stride + bx] > 0);
+    return masking_field_row[by * masking_field_stride + bx];
+  }
+  float Quant(size_t bx, size_t by) const {
+    JXL_DASSERT(quant_field_row[by * quant_field_stride + bx] > 0);
+    return quant_field_row[by * quant_field_stride + bx];
+  }
+  void SetQuant(size_t bx, size_t by, float value) const {
+    JXL_DASSERT(value > 0);
+    quant_field_row[by * quant_field_stride + bx] = value;
+  }
+};
+
+struct AcStrategyHeuristics {
+  void Init(const Image3F& src, PassesEncoderState* enc_state);
+  void ProcessRect(const Rect& rect);
+  void Finalize(AuxOut* aux_out);
+  ACSConfig config;
+  PassesEncoderState* enc_state;
+};
+
+// Debug.
+void DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize,
+                    size_t ysize, const char* tag, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_AC_STRATEGY_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.cc
new file mode 100644
index 0000000000..10f99b9c99
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.cc
@@ -0,0 +1,1054 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_adaptive_quantization.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <string>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_adaptive_quantization.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_group.h"
+#include "lib/jxl/dec_reconstruct.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+
+// The following functions modulate an exponent (out_val) and return the updated
+// value. Their descriptor is limited to 8 lanes for 8x8 blocks.
+
+// Hack for mask estimation. Eventually replace this code with butteraugli's
+// masking.
+float ComputeMaskForAcStrategyUse(const float out_val) {
+  const float kMul = 1.0f;
+  const float kOffset = 0.4f;
+  return kMul / (out_val + kOffset);
+}
+
+template <class D, class V>
+V ComputeMask(const D d, const V out_val) {
+  const auto kBase = Set(d, -0.74174993f);
+  const auto kMul4 = Set(d, 3.2353257320940401f);
+  const auto kMul2 = Set(d, 12.906028311180409f);
+  const auto kOffset2 = Set(d, 305.04035728311436f);
+  const auto kMul3 = Set(d, 5.0220313103171232f);
+  const auto kOffset3 = Set(d, 2.1925739705298404f);
+  const auto kOffset4 = Set(d, 0.25f) * kOffset3;
+  const auto kMul0 = Set(d, 0.74760422233706747f);
+  const auto k1 = Set(d, 1.0f);
+
+  // Avoid division by zero.
+  const auto v1 = Max(out_val * kMul0, Set(d, 1e-3f));
+  const auto v2 = k1 / (v1 + kOffset2);
+  const auto v3 = k1 / MulAdd(v1, v1, kOffset3);
+  const auto v4 = k1 / MulAdd(v1, v1, kOffset4);
+  // TODO(jyrki):
+  // A log or two here could make sense. In butteraugli we have effectively
+  // log(log(x + C)) for this kind of use, as a single log is used in
+  // saturating visual masking and here the modulation values are exponential,
+  // another log would counter that.
+  return kBase + MulAdd(kMul4, v4, MulAdd(kMul2, v2, kMul3 * v3));
+}
+
+// For converting full vectors to a subset. Assumes `vfull` lanes are identical.
+template <class D, class VFull>
+Vec<D> CapTo(const D d, VFull vfull) {
+  using T = typename D::T;
+  const HWY_FULL(T) dfull;
+  HWY_ALIGN T lanes[MaxLanes(dfull)];
+  Store(vfull, dfull, lanes);
+  return Load(d, lanes);
+}
+
+// mul and mul2 represent a scaling difference between jxl and butteraugli.
+static const float kSGmul = 226.0480446705883f;
+static const float kSGmul2 = 1.0f / 73.377132366608819f;
+static const float kLog2 = 0.693147181f;
+// Includes correction factor for std::log -> log2.
+static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2;
+static const float kSGVOffset = 7.14672470003f;
+
+template <bool invert, typename D, typename V>
+V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) {
+  // The opsin space in jxl is the cubic root of photons, i.e., v * v * v
+  // is related to the number of photons.
+  //
+  // SimpleGamma(v * v * v) is the psychovisual space in butteraugli.
+  // This ratio allows quantization to move from jxl's opsin space to
+  // butteraugli's log-gamma space.
+  v = ZeroIfNegative(v);
+  const auto kNumMul = Set(d, kSGRetMul * 3 * kSGmul);
+  const auto kVOffset = Set(d, kSGVOffset * kLog2);
+  const auto kDenMul = Set(d, kLog2 * kSGmul);
+
+  const auto v2 = v * v;
+
+  const auto num = kNumMul * v2;
+  const auto den = MulAdd(kDenMul * v, v2, kVOffset);
+  return invert ? num / den : den / num;
+}
+
+template <bool invert = false>
+static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) {
+  using DScalar = HWY_CAPPED(float, 1);
+  auto vscalar = Load(DScalar(), &v);
+  return GetLane(
+      RatioOfDerivativesOfCubicRootToSimpleGamma<invert>(DScalar(), vscalar));
+}
+
+// TODO(veluca): this function computes an approximation of the derivative of
+// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or
+// exact derivatives. For reference, SimpleGamma was:
+/*
+template <typename D, typename V>
+V SimpleGamma(const D d, V v) {
+  // A simple HDR compatible gamma function.
+  const auto mul = Set(d, kSGmul);
+  const auto kRetMul = Set(d, kSGRetMul);
+  const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f);
+  const auto kVOffset = Set(d, kSGVOffset);
+
+  v *= mul;
+
+  // This should happen rarely, but may lead to a NaN, which is rather
+  // undesirable. Since negative photons don't exist we solve the NaNs by
+  // clamping here.
+  // TODO(veluca): with FastLog2f, this no longer leads to NaNs.
+  v = ZeroIfNegative(v);
+  return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd;
+}
+*/
+
+template <class D, class V>
+V GammaModulation(const D d, const size_t x, const size_t y,
+                  const ImageF& xyb_x, const ImageF& xyb_y, const V out_val) {
+  const float kBias = 0.16f;
+  JXL_DASSERT(kBias > kOpsinAbsorbanceBias[0]);
+  JXL_DASSERT(kBias > kOpsinAbsorbanceBias[1]);
+  JXL_DASSERT(kBias > kOpsinAbsorbanceBias[2]);
+  auto overall_ratio = Zero(d);
+  auto bias = Set(d, kBias);
+  auto half = Set(d, 0.5f);
+  for (size_t dy = 0; dy < 8; ++dy) {
+    const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy);
+    const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy);
+    for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+      const auto iny = Load(d, row_in_y + x + dx) + bias;
+      const auto inx = Load(d, row_in_x + x + dx);
+      const auto r = iny - inx;
+      const auto g = iny + inx;
+      const auto ratio_r =
+          RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, r);
+      const auto ratio_g =
+          RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, g);
+      const auto avg_ratio = half * (ratio_r + ratio_g);
+
+      overall_ratio += avg_ratio;
+    }
+  }
+  overall_ratio = SumOfLanes(overall_ratio);
+  overall_ratio *= Set(d, 1.0f / 64);
+  // ideally -1.0, but likely optimal correction adds some entropy, so slightly
+  // less than that.
+  // ln(2) constant folded in because we want std::log but have FastLog2f.
+  const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f);
+  return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val);
+}
+
+// Change precision in 8x8 blocks that have high frequency content.
+template <class D, class V>
+V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb,
+               const V out_val) {
+  // Zero out the invalid differences for the rightmost value per row.
+  const Rebind<uint32_t, D> du;
+  HWY_ALIGN constexpr uint32_t kMaskRight[kBlockDim] = {~0u, ~0u, ~0u, ~0u,
+                                                        ~0u, ~0u, ~0u, 0};
+
+  auto sum = Zero(d);  // sum of absolute differences with right and below
+
+  for (size_t dy = 0; dy < 8; ++dy) {
+    const float* JXL_RESTRICT row_in = xyb.Row(y + dy) + x;
+    const float* JXL_RESTRICT row_in_next =
+        dy == 7 ? row_in : xyb.Row(y + dy + 1) + x;
+
+    // In SCALAR, there is no guarantee of having extra row padding.
+    // Hence, we need to ensure we don't access pixels outside the row itself.
+    // In SIMD modes, however, rows are padded, so it's safe to access one
+    // garbage value after the row. The vector then gets masked with kMaskRight
+    // to remove the influence of that value.
+#if HWY_TARGET != HWY_SCALAR
+    for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+#else
+    for (size_t dx = 0; dx < 7; dx += Lanes(d)) {
+#endif
+      const auto p = Load(d, row_in + dx);
+      const auto pr = LoadU(d, row_in + dx + 1);
+      const auto mask = BitCast(d, Load(du, kMaskRight + dx));
+      sum += And(mask, AbsDiff(p, pr));
+
+      const auto pd = Load(d, row_in_next + dx);
+      sum += AbsDiff(p, pd);
+    }
+  }
+
+  sum = SumOfLanes(sum);
+  return MulAdd(sum, Set(d, -2.0052193233688884f / 112), out_val);
+}
+
+void PerBlockModulations(const float butteraugli_target, const ImageF& xyb_x,
+                         const ImageF& xyb_y, const float scale,
+                         const Rect& rect, ImageF* out) {
+  JXL_ASSERT(SameSize(xyb_x, xyb_y));
+  JXL_ASSERT(DivCeil(xyb_x.xsize(), kBlockDim) == out->xsize());
+  JXL_ASSERT(DivCeil(xyb_x.ysize(), kBlockDim) == out->ysize());
+
+  float base_level = 0.5f * scale;
+  float kDampenRampStart = 7.0f;
+  float kDampenRampEnd = 14.0f;
+  float dampen = 1.0f;
+  if (butteraugli_target >= kDampenRampStart) {
+    dampen = 1.0f - ((butteraugli_target - kDampenRampStart) /
+                     (kDampenRampEnd - kDampenRampStart));
+    if (dampen < 0) {
+      dampen = 0;
+    }
+  }
+  const float mul = scale * dampen;
+  const float add = (1.0f - dampen) * base_level;
+  for (size_t iy = rect.y0(); iy < rect.y0() + rect.ysize(); iy++) {
+    const size_t y = iy * 8;
+    float* const JXL_RESTRICT row_out = out->Row(iy);
+    const HWY_CAPPED(float, kBlockDim) df;
+    for (size_t ix = rect.x0(); ix < rect.x0() + rect.xsize(); ix++) {
+      size_t x = ix * 8;
+      auto out_val = Set(df, row_out[ix]);
+      out_val = ComputeMask(df, out_val);
+      out_val = HfModulation(df, x, y, xyb_y, out_val);
+      out_val = GammaModulation(df, x, y, xyb_x, xyb_y, out_val);
+      // We want multiplicative quantization field, so everything
+      // until this point has been modulating the exponent.
+      row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add;
+    }
+  }
+}
+
+template <typename D, typename V>
+V MaskingSqrt(const D d, V v) {
+  static const float kLogOffset = 26.481471032459346f;
+  static const float kMul = 211.50759899638012f;
+  const auto mul_v = Set(d, kMul * 1e8);
+  const auto offset_v = Set(d, kLogOffset);
+  return Set(d, 0.25f) * Sqrt(MulAdd(v, Sqrt(mul_v), offset_v));
+}
+
+float MaskingSqrt(const float v) {
+  using DScalar = HWY_CAPPED(float, 1);
+  auto vscalar = Load(DScalar(), &v);
+  return GetLane(MaskingSqrt(DScalar(), vscalar));
+}
+
+void StoreMin4(const float v, float& min0, float& min1, float& min2,
+               float& min3) {
+  if (v < min3) {
+    if (v < min0) {
+      min3 = min2;
+      min2 = min1;
+      min1 = min0;
+      min0 = v;
+    } else if (v < min1) {
+      min3 = min2;
+      min2 = min1;
+      min1 = v;
+    } else if (v < min2) {
+      min3 = min2;
+      min2 = v;
+    } else {
+      min3 = v;
+    }
+  }
+}
+
+// Look for smooth areas near the area of degradation.
+// If the areas are generally smooth, don't do masking.
+// Output is downsampled 2x.
+void FuzzyErosion(const Rect& from_rect, const ImageF& from,
+                  const Rect& to_rect, ImageF* to) {
+  const size_t xsize = from.xsize();
+  const size_t ysize = from.ysize();
+  constexpr int kStep = 1;
+  static_assert(kStep == 1, "Step must be 1");
+  JXL_ASSERT(to_rect.xsize() * 2 == from_rect.xsize());
+  JXL_ASSERT(to_rect.ysize() * 2 == from_rect.ysize());
+  for (size_t fy = 0; fy < from_rect.ysize(); ++fy) {
+    size_t y = fy + from_rect.y0();
+    size_t ym1 = y >= kStep ? y - kStep : y;
+    size_t yp1 = y + kStep < ysize ? y + kStep : y;
+    const float* rowt = from.Row(ym1);
+    const float* row = from.Row(y);
+    const float* rowb = from.Row(yp1);
+    float* row_out = to_rect.Row(to, fy / 2);
+    for (size_t fx = 0; fx < from_rect.xsize(); ++fx) {
+      size_t x = fx + from_rect.x0();
+      size_t xm1 = x >= kStep ? x - kStep : x;
+      size_t xp1 = x + kStep < xsize ? x + kStep : x;
+      float min0 = row[x];
+      float min1 = row[xm1];
+      float min2 = row[xp1];
+      float min3 = rowt[xm1];
+      // Sort the first four values.
+      if (min0 > min1) std::swap(min0, min1);
+      if (min0 > min2) std::swap(min0, min2);
+      if (min0 > min3) std::swap(min0, min3);
+      if (min1 > min2) std::swap(min1, min2);
+      if (min1 > min3) std::swap(min1, min3);
+      if (min2 > min3) std::swap(min2, min3);
+      // The remaining five values of a 3x3 neighbourhood.
+      StoreMin4(rowt[x], min0, min1, min2, min3);
+      StoreMin4(rowt[xp1], min0, min1, min2, min3);
+      StoreMin4(rowb[xm1], min0, min1, min2, min3);
+      StoreMin4(rowb[x], min0, min1, min2, min3);
+      StoreMin4(rowb[xp1], min0, min1, min2, min3);
+      static const float kMulC = 0.05f;
+      static const float kMul0 = 0.05f;
+      static const float kMul1 = 0.05f;
+      static const float kMul2 = 0.05f;
+      static const float kMul3 = 0.05f;
+      float v = kMulC * row[x] + kMul0 * min0 + kMul1 * min1 + kMul2 * min2 +
+                kMul3 * min3;
+      if (fx % 2 == 0 && fy % 2 == 0) {
+        row_out[fx / 2] = v;
+      } else {
+        row_out[fx / 2] += v;
+      }
+    }
+  }
+}
+
+struct AdaptiveQuantizationImpl {
+  void Init(const Image3F& xyb) {
+    JXL_DASSERT(xyb.xsize() % kBlockDim == 0);
+    JXL_DASSERT(xyb.ysize() % kBlockDim == 0);
+    const size_t xsize = xyb.xsize();
+    const size_t ysize = xyb.ysize();
+    aq_map = ImageF(xsize / kBlockDim, ysize / kBlockDim);
+  }
+  void PrepareBuffers(size_t num_threads) {
+    diff_buffer = ImageF(kEncTileDim + 8, num_threads);
+    for (size_t i = pre_erosion.size(); i < num_threads; i++) {
+      pre_erosion.emplace_back(kEncTileDimInBlocks * 2 + 2,
+                               kEncTileDimInBlocks * 2 + 2);
+    }
+  }
+
+  void ComputeTile(float butteraugli_target, float scale, const Image3F& xyb,
+                   const Rect& rect, const int thread, ImageF* mask) {
+    PROFILER_ZONE("aq DiffPrecompute");
+    const size_t xsize = xyb.xsize();
+    const size_t ysize = xyb.ysize();
+
+    // The XYB gamma is 3.0 to be able to decode faster with two muls.
+    // Butteraugli's gamma is matching the gamma of human eye, around 2.6.
+    // We approximate the gamma difference by adding one cubic root into
+    // the adaptive quantization. This gives us a total gamma of 2.6666
+    // for quantization uses.
+    const float match_gamma_offset = 0.019;
+
+    const HWY_FULL(float) df;
+    const float kXMul = 23.426802998210313f;
+    const auto kXMulv = Set(df, kXMul);
+
+    size_t y_start = rect.y0() * 8;
+    size_t y_end = y_start + rect.ysize() * 8;
+
+    size_t x0 = rect.x0() * 8;
+    size_t x1 = x0 + rect.xsize() * 8;
+    if (x0 != 0) x0 -= 4;
+    if (x1 != xyb.xsize()) x1 += 4;
+    if (y_start != 0) y_start -= 4;
+    if (y_end != xyb.ysize()) y_end += 4;
+    pre_erosion[thread].ShrinkTo((x1 - x0) / 4, (y_end - y_start) / 4);
+
+    // Computes image (padded to multiple of 8x8) of local pixel differences.
+    // Subsample both directions by 4.
+    for (size_t y = y_start; y < y_end; ++y) {
+      size_t y2 = y + 1 < ysize ? y + 1 : y;
+      size_t y1 = y > 0 ? y - 1 : y;
+
+      const float* row_in = xyb.PlaneRow(1, y);
+      const float* row_in1 = xyb.PlaneRow(1, y1);
+      const float* row_in2 = xyb.PlaneRow(1, y2);
+      const float* row_x_in = xyb.PlaneRow(0, y);
+      const float* row_x_in1 = xyb.PlaneRow(0, y1);
+      const float* row_x_in2 = xyb.PlaneRow(0, y2);
+      float* JXL_RESTRICT row_out = diff_buffer.Row(thread);
+
+      auto scalar_pixel = [&](size_t x) {
+        const size_t x2 = x + 1 < xsize ? x + 1 : x;
+        const size_t x1 = x > 0 ? x - 1 : x;
+        const float base =
+            0.25f * (row_in2[x] + row_in1[x] + row_in[x1] + row_in[x2]);
+        const float gammac = RatioOfDerivativesOfCubicRootToSimpleGamma(
+            row_in[x] + match_gamma_offset);
+        float diff = gammac * (row_in[x] - base);
+        diff *= diff;
+        const float base_x =
+            0.25f * (row_x_in2[x] + row_x_in1[x] + row_x_in[x1] + row_x_in[x2]);
+        float diff_x = gammac * (row_x_in[x] - base_x);
+        diff_x *= diff_x;
+        diff += kXMul * diff_x;
+        diff = MaskingSqrt(diff);
+        if ((y % 4) != 0) {
+          row_out[x - x0] += diff;
+        } else {
+          row_out[x - x0] = diff;
+        }
+      };
+
+      size_t x = x0;
+      // First pixel of the row.
+      if (x0 == 0) {
+        scalar_pixel(x0);
+        ++x;
+      }
+      // SIMD
+      const auto match_gamma_offset_v = Set(df, match_gamma_offset);
+      const auto quarter = Set(df, 0.25f);
+      for (; x + 1 + Lanes(df) < x1; x += Lanes(df)) {
+        const auto in = LoadU(df, row_in + x);
+        const auto in_r = LoadU(df, row_in + x + 1);
+        const auto in_l = LoadU(df, row_in + x - 1);
+        const auto in_t = LoadU(df, row_in2 + x);
+        const auto in_b = LoadU(df, row_in1 + x);
+        auto base = quarter * (in_r + in_l + in_t + in_b);
+        auto gammacv =
+            RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/false>(
+                df, in + match_gamma_offset_v);
+        auto diff = gammacv * (in - base);
+        diff *= diff;
+
+        const auto in_x = LoadU(df, row_x_in + x);
+        const auto in_x_r = LoadU(df, row_x_in + x + 1);
+        const auto in_x_l = LoadU(df, row_x_in + x - 1);
+        const auto in_x_t = LoadU(df, row_x_in2 + x);
+        const auto in_x_b = LoadU(df, row_x_in1 + x);
+        auto base_x = quarter * (in_x_r + in_x_l + in_x_t + in_x_b);
+        auto diff_x = gammacv * (in_x - base_x);
+        diff_x *= diff_x;
+        diff += kXMulv * diff_x;
+        diff = MaskingSqrt(df, diff);
+        if ((y & 3) != 0) {
+          diff += LoadU(df, row_out + x - x0);
+        }
+        StoreU(diff, df, row_out + x - x0);
+      }
+      // Scalar
+      for (; x < x1; ++x) {
+        scalar_pixel(x);
+      }
+      if (y % 4 == 3) {
+        float* row_dout = pre_erosion[thread].Row((y - y_start) / 4);
+        for (size_t x = 0; x < (x1 - x0) / 4; x++) {
+          row_dout[x] = (row_out[x * 4] + row_out[x * 4 + 1] +
+                         row_out[x * 4 + 2] + row_out[x * 4 + 3]) *
+                        0.25f;
+        }
+      }
+    }
+    Rect from_rect(x0 % 8 == 0 ? 0 : 1, y_start % 8 == 0 ? 0 : 1,
+                   rect.xsize() * 2, rect.ysize() * 2);
+    FuzzyErosion(from_rect, pre_erosion[thread], rect, &aq_map);
+    for (size_t y = 0; y < rect.ysize(); ++y) {
+      const float* aq_map_row = rect.ConstRow(aq_map, y);
+      float* mask_row = rect.Row(mask, y);
+      for (size_t x = 0; x < rect.xsize(); ++x) {
+        mask_row[x] = ComputeMaskForAcStrategyUse(aq_map_row[x]);
+      }
+    }
+    PerBlockModulations(butteraugli_target, xyb.Plane(0), xyb.Plane(1), scale,
+                        rect, &aq_map);
+  }
+  std::vector<ImageF> pre_erosion;
+  ImageF aq_map;
+  ImageF diff_buffer;
+};
+
+ImageF AdaptiveQuantizationMap(const float butteraugli_target,
+                               const Image3F& xyb,
+                               const FrameDimensions& frame_dim, float scale,
+                               ThreadPool* pool, ImageF* mask) {
+  PROFILER_ZONE("aq AdaptiveQuantMap");
+
+  AdaptiveQuantizationImpl impl;
+  impl.Init(xyb);
+  *mask = ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  RunOnPool(
+      pool, 0,
+      DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+          DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks),
+      [&](size_t num_threads) {
+        impl.PrepareBuffers(num_threads);
+        return true;
+      },
+      [&](const int tid, int thread) {
+        size_t n_enc_tiles =
+            DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
+        size_t tx = tid % n_enc_tiles;
+        size_t ty = tid / n_enc_tiles;
+        size_t by0 = ty * kEncTileDimInBlocks;
+        size_t by1 =
+            std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
+        size_t bx0 = tx * kEncTileDimInBlocks;
+        size_t bx1 =
+            std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
+        Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+        impl.ComputeTile(butteraugli_target, scale, xyb, r, thread, mask);
+      },
+      "AQ DiffPrecompute");
+
+  return std::move(impl).aq_map;
+}
+
+}  // namespace
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(AdaptiveQuantizationMap);
+
+namespace {
+bool FLAGS_log_search_state = false;
+// If true, prints the quantization maps at each iteration.
+bool FLAGS_dump_quant_state = false;
+
+void DumpHeatmap(const AuxOut* aux_out, const std::string& label,
+                 const ImageF& image, float good_threshold,
+                 float bad_threshold) {
+  Image3F heatmap = CreateHeatMapImage(image, good_threshold, bad_threshold);
+  char filename[200];
+  snprintf(filename, sizeof(filename), "%s%05d", label.c_str(),
+           aux_out->num_butteraugli_iters);
+  aux_out->DumpImage(filename, heatmap);
+}
+
+void DumpHeatmaps(const AuxOut* aux_out, float ba_target,
+                  const ImageF& quant_field, const ImageF& tile_heatmap,
+                  const ImageF& bt_diffmap) {
+  if (!WantDebugOutput(aux_out)) return;
+  ImageF inv_qmap(quant_field.xsize(), quant_field.ysize());
+  for (size_t y = 0; y < quant_field.ysize(); ++y) {
+    const float* JXL_RESTRICT row_q = quant_field.ConstRow(y);
+    float* JXL_RESTRICT row_inv_q = inv_qmap.Row(y);
+    for (size_t x = 0; x < quant_field.xsize(); ++x) {
+      row_inv_q[x] = 1.0f / row_q[x];  // never zero
+    }
+  }
+  DumpHeatmap(aux_out, "quant_heatmap", inv_qmap, 4.0f * ba_target,
+              6.0f * ba_target);
+  DumpHeatmap(aux_out, "tile_heatmap", tile_heatmap, ba_target,
+              1.5f * ba_target);
+  // matches heat maps produced by the command line tool.
+  DumpHeatmap(aux_out, "bt_diffmap", bt_diffmap, ButteraugliFuzzyInverse(1.5),
+              ButteraugliFuzzyInverse(0.5));
+}
+
+ImageF TileDistMap(const ImageF& distmap, int tile_size, int margin,
+                   const AcStrategyImage& ac_strategy) {
+  PROFILER_FUNC;
+  const int tile_xsize = (distmap.xsize() + tile_size - 1) / tile_size;
+  const int tile_ysize = (distmap.ysize() + tile_size - 1) / tile_size;
+  ImageF tile_distmap(tile_xsize, tile_ysize);
+  size_t distmap_stride = tile_distmap.PixelsPerRow();
+  for (int tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+    AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(tile_y);
+    float* JXL_RESTRICT dist_row = tile_distmap.Row(tile_y);
+    for (int tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+      AcStrategy acs = ac_strategy_row[tile_x];
+      if (!acs.IsFirstBlock()) continue;
+      int this_tile_xsize = acs.covered_blocks_x() * tile_size;
+      int this_tile_ysize = acs.covered_blocks_y() * tile_size;
+      int y_begin = std::max<int>(0, tile_size * tile_y - margin);
+      int y_end = std::min<int>(distmap.ysize(),
+                                tile_size * tile_y + this_tile_ysize + margin);
+      int x_begin = std::max<int>(0, tile_size * tile_x - margin);
+      int x_end = std::min<int>(distmap.xsize(),
+                                tile_size * tile_x + this_tile_xsize + margin);
+      float dist_norm = 0.0;
+      double pixels = 0;
+      for (int y = y_begin; y < y_end; ++y) {
+        float ymul = 1.0;
+        constexpr float kBorderMul = 0.98f;
+        constexpr float kCornerMul = 0.7f;
+        if (margin != 0 && (y == y_begin || y == y_end - 1)) {
+          ymul = kBorderMul;
+        }
+        const float* const JXL_RESTRICT row = distmap.Row(y);
+        for (int x = x_begin; x < x_end; ++x) {
+          float xmul = ymul;
+          if (margin != 0 && (x == x_begin || x == x_end - 1)) {
+            if (xmul == 1.0) {
+              xmul = kBorderMul;
+            } else {
+              xmul = kCornerMul;
+            }
+          }
+          float v = row[x];
+          v *= v;
+          v *= v;
+          v *= v;
+          v *= v;
+          dist_norm += xmul * v;
+          pixels += xmul;
+        }
+      }
+      if (pixels == 0) pixels = 1;
+      // 16th norm is less than the max norm, we reduce the difference
+      // with this normalization factor.
+      constexpr float kTileNorm = 1.2f;
+      const float tile_dist =
+          kTileNorm * std::pow(dist_norm / pixels, 1.0f / 16.0f);
+      dist_row[tile_x] = tile_dist;
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          dist_row[tile_x + distmap_stride * iy + ix] = tile_dist;
+        }
+      }
+    }
+  }
+  return tile_distmap;
+}
+
+constexpr float kDcQuantPow = 0.57f;
+static const float kDcQuant = 1.12f;
+static const float kAcQuant = 0.787f;
+
+void FindBestQuantization(const ImageBundle& linear, const Image3F& opsin,
+                          PassesEncoderState* enc_state, ThreadPool* pool,
+                          AuxOut* aux_out) {
+  const CompressParams& cparams = enc_state->cparams;
+  Quantizer& quantizer = enc_state->shared.quantizer;
+  ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
+  ImageF& quant_field = enc_state->initial_quant_field;
+
+  const float butteraugli_target = cparams.butteraugli_distance;
+  ButteraugliParams params = cparams.ba_params;
+  params.intensity_target = linear.metadata()->IntensityTarget();
+  // Hack the default intensity target value to be 80.0, the intensity
+  // target of sRGB images and a more reasonable viewing default than
+  // JPEG XL file format's default.
+  if (fabs(params.intensity_target - 255.0f) < 1e-3) {
+    params.intensity_target = 80.0f;
+  }
+  JxlButteraugliComparator comparator(params);
+  JXL_CHECK(comparator.SetReferenceImage(linear));
+  bool lower_is_better =
+      (comparator.GoodQualityScore() < comparator.BadQualityScore());
+  const float initial_quant_dc = InitialQuantDC(butteraugli_target);
+  AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field),
+                   &quant_field);
+  ImageF tile_distmap;
+  ImageF initial_quant_field = CopyImage(quant_field);
+
+  float initial_qf_min, initial_qf_max;
+  ImageMinMax(initial_quant_field, &initial_qf_min, &initial_qf_max);
+  float initial_qf_ratio = initial_qf_max / initial_qf_min;
+  float qf_max_deviation_low = std::sqrt(250 / initial_qf_ratio);
+  float asymmetry = 2;
+  if (qf_max_deviation_low < asymmetry) asymmetry = qf_max_deviation_low;
+  float qf_lower = initial_qf_min / (asymmetry * qf_max_deviation_low);
+  float qf_higher = initial_qf_max * (qf_max_deviation_low / asymmetry);
+
+  JXL_ASSERT(qf_higher / qf_lower < 253);
+
+  constexpr int kOriginalComparisonRound = 1;
+  int iters = cparams.max_butteraugli_iters;
+  if (iters > 7) {
+    iters = 7;
+  }
+  if (cparams.speed_tier != SpeedTier::kTortoise) {
+    iters = 2;
+  }
+  for (int i = 0; i < iters + 1; ++i) {
+    if (FLAGS_dump_quant_state) {
+      printf("\nQuantization field:\n");
+      for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+          printf(" %.5f", quant_field.Row(y)[x]);
+        }
+        printf("\n");
+      }
+    }
+    quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+    ImageBundle linear = RoundtripImage(opsin, enc_state, pool);
+    PROFILER_ZONE("enc Butteraugli");
+    float score;
+    ImageF diffmap;
+    JXL_CHECK(comparator.CompareWith(linear, &diffmap, &score));
+    if (!lower_is_better) {
+      score = -score;
+      diffmap = ScaleImage(-1.0f, diffmap);
+    }
+    tile_distmap = TileDistMap(diffmap, 8, 0, enc_state->shared.ac_strategy);
+    if (WantDebugOutput(aux_out)) {
+      aux_out->DumpImage(("dec" + ToString(i)).c_str(), *linear.color());
+      DumpHeatmaps(aux_out, butteraugli_target, quant_field, tile_distmap,
+                   diffmap);
+    }
+    if (aux_out != nullptr) ++aux_out->num_butteraugli_iters;
+    if (FLAGS_log_search_state) {
+      float minval, maxval;
+      ImageMinMax(quant_field, &minval, &maxval);
+      printf("\nButteraugli iter: %d/%d\n", i, cparams.max_butteraugli_iters);
+      printf("Butteraugli distance: %f\n", score);
+      printf("quant range: %f ... %f  DC quant: %f\n", minval, maxval,
+             initial_quant_dc);
+      if (FLAGS_dump_quant_state) {
+        quantizer.DumpQuantizationMap(raw_quant_field);
+      }
+    }
+
+    if (i == iters) break;
+
+    double kPow[8] = {
+        0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+    };
+    double kPowMod[8] = {
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+    };
+    if (i == kOriginalComparisonRound) {
+      // Don't allow optimization to make the quant field a lot worse than
+      // what the initial guess was. This allows the AC field to have enough
+      // precision to reduce the oscillations due to the dc reconstruction.
+      double kInitMul = 0.6;
+      const double kOneMinusInitMul = 1.0 - kInitMul;
+      for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        float* const JXL_RESTRICT row_q = quant_field.Row(y);
+        const float* const JXL_RESTRICT row_init = initial_quant_field.Row(y);
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+          double clamp = kOneMinusInitMul * row_q[x] + kInitMul * row_init[x];
+          if (row_q[x] < clamp) {
+            row_q[x] = clamp;
+            if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+            if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+          }
+        }
+      }
+    }
+
+    double cur_pow = 0.0;
+    if (i < 7) {
+      cur_pow = kPow[i] + (butteraugli_target - 1.0) * kPowMod[i];
+      if (cur_pow < 0) {
+        cur_pow = 0;
+      }
+    }
+    if (cur_pow == 0.0) {
+      for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y);
+        float* const JXL_RESTRICT row_q = quant_field.Row(y);
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+          const float diff = row_dist[x] / butteraugli_target;
+          if (diff > 1.0f) {
+            float old = row_q[x];
+            row_q[x] *= diff;
+            int qf_old = old * quantizer.InvGlobalScale() + 0.5;
+            int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5;
+            if (qf_old == qf_new) {
+              row_q[x] = old + quantizer.Scale();
+            }
+          }
+          if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+          if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+        }
+      }
+    } else {
+      for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y);
+        float* const JXL_RESTRICT row_q = quant_field.Row(y);
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+          const float diff = row_dist[x] / butteraugli_target;
+          if (diff <= 1.0f) {
+            row_q[x] *= std::pow(diff, cur_pow);
+          } else {
+            float old = row_q[x];
+            row_q[x] *= diff;
+            int qf_old = old * quantizer.InvGlobalScale() + 0.5;
+            int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5;
+            if (qf_old == qf_new) {
+              row_q[x] = old + quantizer.Scale();
+            }
+          }
+          if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+          if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+        }
+      }
+    }
+  }
+  quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+}
+
+void FindBestQuantizationMaxError(const Image3F& opsin,
+                                  PassesEncoderState* enc_state,
+                                  ThreadPool* pool, AuxOut* aux_out) {
+  // TODO(veluca): this only works if opsin is in XYB. The current encoder does
+  // not have code paths that produce non-XYB opsin here.
+  JXL_CHECK(enc_state->shared.frame_header.color_transform ==
+            ColorTransform::kXYB);
+  const CompressParams& cparams = enc_state->cparams;
+  Quantizer& quantizer = enc_state->shared.quantizer;
+  ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
+  ImageF& quant_field = enc_state->initial_quant_field;
+
+  // TODO(veluca): better choice of this value.
+  const float initial_quant_dc =
+      16 * std::sqrt(0.1f / cparams.butteraugli_distance);
+  AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field),
+                   &quant_field);
+
+  const float inv_max_err[3] = {1.0f / enc_state->cparams.max_error[0],
+                                1.0f / enc_state->cparams.max_error[1],
+                                1.0f / enc_state->cparams.max_error[2]};
+
+  for (int i = 0; i < cparams.max_butteraugli_iters + 1; ++i) {
+    quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+    if (aux_out) {
+      aux_out->DumpXybImage(("ops" + ToString(i)).c_str(), opsin);
+    }
+    ImageBundle decoded = RoundtripImage(opsin, enc_state, pool);
+    if (aux_out) {
+      aux_out->DumpXybImage(("dec" + ToString(i)).c_str(), *decoded.color());
+    }
+
+    for (size_t by = 0; by < enc_state->shared.frame_dim.ysize_blocks; by++) {
+      AcStrategyRow ac_strategy_row =
+          enc_state->shared.ac_strategy.ConstRow(by);
+      for (size_t bx = 0; bx < enc_state->shared.frame_dim.xsize_blocks; bx++) {
+        AcStrategy acs = ac_strategy_row[bx];
+        if (!acs.IsFirstBlock()) continue;
+        float max_error = 0;
+        for (size_t c = 0; c < 3; c++) {
+          for (size_t y = by * kBlockDim;
+               y < (by + acs.covered_blocks_y()) * kBlockDim; y++) {
+            if (y >= decoded.ysize()) continue;
+            const float* JXL_RESTRICT in_row = opsin.ConstPlaneRow(c, y);
+            const float* JXL_RESTRICT dec_row =
+                decoded.color()->ConstPlaneRow(c, y);
+            for (size_t x = bx * kBlockDim;
+                 x < (bx + acs.covered_blocks_x()) * kBlockDim; x++) {
+              if (x >= decoded.xsize()) continue;
+              max_error = std::max(
+                  std::abs(in_row[x] - dec_row[x]) * inv_max_err[c], max_error);
+            }
+          }
+        }
+        // Target an error between max_error/2 and max_error.
+        // If the error in the varblock is above the target, increase the qf to
+        // compensate. If the error is below the target, decrease the qf.
+        // However, to avoid an excessive increase of the qf, only do so if the
+        // error is less than half the maximum allowed error.
+        const float qf_mul = (max_error < 0.5f)   ? max_error * 2.0f
+                             : (max_error > 1.0f) ? max_error
+                                                  : 1.0f;
+        for (size_t qy = by; qy < by + acs.covered_blocks_y(); qy++) {
+          float* JXL_RESTRICT quant_field_row = quant_field.Row(qy);
+          for (size_t qx = bx; qx < bx + acs.covered_blocks_x(); qx++) {
+            quant_field_row[qx] *= qf_mul;
+          }
+        }
+      }
+    }
+  }
+  quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+}
+
+}  // namespace
+
+void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect,
+                      ImageF* quant_field) {
+  // Replace the whole quant_field in non-8x8 blocks with the maximum of each
+  // 8x8 block.
+  size_t stride = quant_field->PixelsPerRow();
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(rect, y);
+    float* JXL_RESTRICT quant_row = rect.Row(quant_field, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      AcStrategy acs = ac_strategy_row[x];
+      if (!acs.IsFirstBlock()) continue;
+      JXL_ASSERT(x + acs.covered_blocks_x() <= quant_field->xsize());
+      JXL_ASSERT(y + acs.covered_blocks_y() <= quant_field->ysize());
+      float max = quant_row[x];
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          max = std::max(quant_row[x + ix + iy * stride], max);
+        }
+      }
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          quant_row[x + ix + iy * stride] = max;
+        }
+      }
+    }
+  }
+}
+
+float InitialQuantDC(float butteraugli_target) {
+  const float kDcMul = 2.9;  // Butteraugli target where non-linearity kicks in.
+  const float butteraugli_target_dc = std::max<float>(
+      0.5f * butteraugli_target,
+      std::min<float>(butteraugli_target,
+                      kDcMul * std::pow((1.0f / kDcMul) * butteraugli_target,
+                                        kDcQuantPow)));
+  // We want the maximum DC value to be at most 2**15 * kInvDCQuant / quant_dc.
+  // The maximum DC value might not be in the kXybRange because of inverse
+  // gaborish, so we add some slack to the maximum theoretical quant obtained
+  // this way (64).
+  return std::min(kDcQuant / butteraugli_target_dc, 50.f);
+}
+
+ImageF InitialQuantField(const float butteraugli_target, const Image3F& opsin,
+                         const FrameDimensions& frame_dim, ThreadPool* pool,
+                         float rescale, ImageF* mask) {
+  PROFILER_FUNC;
+  const float quant_ac = kAcQuant / butteraugli_target;
+  return HWY_DYNAMIC_DISPATCH(AdaptiveQuantizationMap)(
+      butteraugli_target, opsin, frame_dim, quant_ac * rescale, pool, mask);
+}
+
+void FindBestQuantizer(const ImageBundle* linear, const Image3F& opsin,
+                       PassesEncoderState* enc_state, ThreadPool* pool,
+                       AuxOut* aux_out, double rescale) {
+  const CompressParams& cparams = enc_state->cparams;
+  if (cparams.max_error_mode) {
+    PROFILER_ZONE("enc find best maxerr");
+    FindBestQuantizationMaxError(opsin, enc_state, pool, aux_out);
+  } else if (cparams.speed_tier <= SpeedTier::kKitten) {
+    // Normal encoding to a butteraugli score.
+    PROFILER_ZONE("enc find best2");
+    FindBestQuantization(*linear, opsin, enc_state, pool, aux_out);
+  }
+}
+
+ImageBundle RoundtripImage(const Image3F& opsin, PassesEncoderState* enc_state,
+                           ThreadPool* pool) {
+  PROFILER_ZONE("enc roundtrip");
+  std::unique_ptr<PassesDecoderState> dec_state =
+      jxl::make_unique<PassesDecoderState>();
+  JXL_CHECK(dec_state->output_encoding_info.Set(
+      *enc_state->shared.metadata,
+      ColorEncoding::LinearSRGB(
+          enc_state->shared.metadata->m.color_encoding.IsGray())));
+  dec_state->shared = &enc_state->shared;
+  JXL_ASSERT(opsin.ysize() % kBlockDim == 0);
+
+  const size_t xsize_groups = DivCeil(opsin.xsize(), kGroupDim);
+  const size_t ysize_groups = DivCeil(opsin.ysize(), kGroupDim);
+  const size_t num_groups = xsize_groups * ysize_groups;
+
+  size_t num_special_frames = enc_state->special_frames.size();
+
+  std::unique_ptr<ModularFrameEncoder> modular_frame_encoder =
+      jxl::make_unique<ModularFrameEncoder>(enc_state->shared.frame_header,
+                                            enc_state->cparams);
+  // InitializePassesEncoder(opsin, pool, enc_state, modular_frame_encoder.get(),
+  //                         nullptr);
+  JXL_CHECK(dec_state->Init());
+  dec_state->InitForAC(pool);
+
+  ImageBundle decoded(&enc_state->shared.metadata->m);
+  decoded.origin = enc_state->shared.frame_header.frame_origin;
+  decoded.SetFromImage(Image3F(opsin.xsize(), opsin.ysize()),
+                       dec_state->output_encoding_info.color_encoding);
+
+  // Same as dec_state->shared->frame_header.nonserialized_metadata->m
+  const ImageMetadata& metadata = *decoded.metadata();
+  if (!metadata.extra_channel_info.empty()) {
+    // Add dummy extra channels to the dec_state: FinalizeFrameDecoding moves
+    // these extra channels to the ImageBundle, and is required that the amount
+    // of extra channels matches its metadata()->extra_channel_info.size().
+    // Normally we'd place these extra channels in the ImageBundle, but in this
+    // case FinalizeFrameDecoding is the one that does this.
+    std::vector<ImageF> extra_channels;
+    extra_channels.reserve(metadata.extra_channel_info.size());
+    for (size_t i = 0; i < metadata.extra_channel_info.size(); i++) {
+      extra_channels.emplace_back(decoded.xsize(), decoded.ysize());
+      // Must initialize the image with data to not affect blending with
+      // uninitialized memory.
+      ZeroFillImage(&extra_channels.back());
+    }
+    dec_state->extra_channels = std::move(extra_channels);
+  }
+
+  hwy::AlignedUniquePtr<GroupDecCache[]> group_dec_caches;
+  const auto allocate_storage = [&](size_t num_threads) {
+    dec_state->EnsureStorage(num_threads);
+    group_dec_caches = hwy::MakeUniqueAlignedArray<GroupDecCache>(num_threads);
+    return true;
+  };
+  const auto process_group = [&](const int group_index, const int thread) {
+    if (dec_state->shared->frame_header.loop_filter.epf_iters > 0) {
+      ComputeSigma(dec_state->shared->BlockGroupRect(group_index),
+                   dec_state.get());
+    }
+    JXL_CHECK(DecodeGroupForRoundtrip(
+        enc_state->coeffs, group_index, dec_state.get(),
+        &group_dec_caches[thread], thread, &decoded, nullptr));
+  };
+  RunOnPool(pool, 0, num_groups, allocate_storage, process_group, "AQ loop");
+
+  // Fine to do a JXL_ASSERT instead of error handling, since this only happens
+  // on the encoder side where we can't be fed with invalid data.
+  JXL_CHECK(FinalizeFrameDecoding(&decoded, dec_state.get(), pool,
+                                  /*force_fir=*/false, /*skip_blending=*/true));
+  // Ensure we don't create any new special frames.
+  enc_state->special_frames.resize(num_special_frames);
+
+  return decoded;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.h
new file mode 100644
index 0000000000..d9666f42b1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_adaptive_quantization.h
@@ -0,0 +1,65 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
+#define LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+
+// Heuristics to find a good quantizer for a given image. InitialQuantField
+// produces a quantization field (i.e. relative quantization amounts for each
+// block) out of an opsin-space image. `InitialQuantField` uses heuristics,
+// `FindBestQuantizer` (in non-fast mode) will run multiple encoding-decoding
+// steps and try to improve the given quant field.
+
+namespace jxl {
+
+// Computes the decoded image for a given set of compression parameters. Mainly
+// used in the FindBestQuantization loops and in some tests.
+// TODO(veluca): this doesn't seem the best possible file for this function.
+ImageBundle RoundtripImage(const Image3F& opsin, PassesEncoderState* enc_state,
+                           ThreadPool* pool);
+
+// Returns an image subsampled by kBlockDim in each direction. If the value
+// at pixel (x,y) in the returned image is greater than 1.0, it means that
+// more fine-grained quantization should be used in the corresponding block
+// of the input image, while a value less than 1.0 indicates that less
+// fine-grained quantization should be enough. Returns a mask, too, which
+// can later be used to make better decisions about ac strategy.
+ImageF InitialQuantField(float butteraugli_target, const Image3F& opsin,
+                         const FrameDimensions& frame_dim, ThreadPool* pool,
+                         float rescale, ImageF* initial_quant_mask);
+
+float InitialQuantDC(float butteraugli_target);
+
+void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect,
+                      ImageF* quant_field);
+
+// Returns a quantizer that uses an adjusted version of the provided
+// quant_field. Also computes the dequant_map corresponding to the given
+// dequant_float_map and chosen quantization levels.
+// `linear` is only used in Kitten mode or slower.
+void FindBestQuantizer(const ImageBundle* linear, const Image3F& opsin,
+                       PassesEncoderState* enc_state, ThreadPool* pool,
+                       AuxOut* aux_out, double rescale = 1.0);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc
new file mode 100644
index 0000000000..48bc745f65
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.cc
@@ -0,0 +1,1622 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_ans.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_cluster.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_huffman.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+namespace {
+
+bool ans_fuzzer_friendly_ = false;
+
+static const int kMaxNumSymbolsForSmallCode = 4;
+
+void ANSBuildInfoTable(const ANSHistBin* counts, const AliasTable::Entry* table,
+                       size_t alphabet_size, size_t log_alpha_size,
+                       ANSEncSymbolInfo* info) {
+  size_t log_entry_size = ANS_LOG_TAB_SIZE - log_alpha_size;
+  size_t entry_size_minus_1 = (1 << log_entry_size) - 1;
+  // create valid alias table for empty streams.
+  for (size_t s = 0; s < std::max<size_t>(1, alphabet_size); ++s) {
+    const ANSHistBin freq = s == alphabet_size ? ANS_TAB_SIZE : counts[s];
+    info[s].freq_ = static_cast<uint16_t>(freq);
+#ifdef USE_MULT_BY_RECIPROCAL
+    if (freq != 0) {
+      info[s].ifreq_ =
+          ((1ull << RECIPROCAL_PRECISION) + info[s].freq_ - 1) / info[s].freq_;
+    } else {
+      info[s].ifreq_ = 1;  // shouldn't matter (symbol shouldn't occur), but...
+    }
+#endif
+    info[s].reverse_map_.resize(freq);
+  }
+  for (int i = 0; i < ANS_TAB_SIZE; i++) {
+    AliasTable::Symbol s =
+        AliasTable::Lookup(table, i, log_entry_size, entry_size_minus_1);
+    info[s.value].reverse_map_[s.offset] = i;
+  }
+}
+
+float EstimateDataBits(const ANSHistBin* histogram, const ANSHistBin* counts,
+                       size_t len) {
+  float sum = 0.0f;
+  int total_histogram = 0;
+  int total_counts = 0;
+  for (size_t i = 0; i < len; ++i) {
+    total_histogram += histogram[i];
+    total_counts += counts[i];
+    if (histogram[i] > 0) {
+      JXL_ASSERT(counts[i] > 0);
+      // += histogram[i] * -log(counts[i]/total_counts)
+      sum += histogram[i] *
+             std::max(0.0f, ANS_LOG_TAB_SIZE - FastLog2f(counts[i]));
+    }
+  }
+  if (total_histogram > 0) {
+    JXL_ASSERT(total_counts == ANS_TAB_SIZE);
+  }
+  return sum;
+}
+
+float EstimateDataBitsFlat(const ANSHistBin* histogram, size_t len) {
+  const float flat_bits = std::max(FastLog2f(len), 0.0f);
+  int total_histogram = 0;
+  for (size_t i = 0; i < len; ++i) {
+    total_histogram += histogram[i];
+  }
+  return total_histogram * flat_bits;
+}
+
+// Static Huffman code for encoding logcounts. The last symbol is used as RLE
+// sequence.
+static const uint8_t kLogCountBitLengths[ANS_LOG_TAB_SIZE + 2] = {
+    5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 6, 7, 7,
+};
+static const uint8_t kLogCountSymbols[ANS_LOG_TAB_SIZE + 2] = {
+    17, 11, 15, 3, 9, 7, 4, 2, 5, 6, 0, 33, 1, 65,
+};
+
+// Returns the difference between largest count that can be represented and is
+// smaller than "count" and smallest representable count larger than "count".
+static int SmallestIncrement(uint32_t count, uint32_t shift) {
+  int bits = count == 0 ? -1 : FloorLog2Nonzero(count);
+  int drop_bits = bits - GetPopulationCountPrecision(bits, shift);
+  return drop_bits < 0 ? 1 : (1 << drop_bits);
+}
+
+template <bool minimize_error_of_sum>
+bool RebalanceHistogram(const float* targets, int max_symbol, int table_size,
+                        uint32_t shift, int* omit_pos, ANSHistBin* counts) {
+  int sum = 0;
+  float sum_nonrounded = 0.0;
+  int remainder_pos = 0;  // if all of them are handled in first loop
+  int remainder_log = -1;
+  for (int n = 0; n < max_symbol; ++n) {
+    if (targets[n] > 0 && targets[n] < 1.0f) {
+      counts[n] = 1;
+      sum_nonrounded += targets[n];
+      sum += counts[n];
+    }
+  }
+  const float discount_ratio =
+      (table_size - sum) / (table_size - sum_nonrounded);
+  JXL_ASSERT(discount_ratio > 0);
+  JXL_ASSERT(discount_ratio <= 1.0f);
+  // Invariant for minimize_error_of_sum == true:
+  // abs(sum - sum_nonrounded)
+  //   <= SmallestIncrement(max(targets[])) + max_symbol
+  for (int n = 0; n < max_symbol; ++n) {
+    if (targets[n] >= 1.0f) {
+      sum_nonrounded += targets[n];
+      counts[n] =
+          static_cast<ANSHistBin>(targets[n] * discount_ratio);  // truncate
+      if (counts[n] == 0) counts[n] = 1;
+      if (counts[n] == table_size) counts[n] = table_size - 1;
+      // Round the count to the closest nonzero multiple of SmallestIncrement
+      // (when minimize_error_of_sum is false) or one of two closest so as to
+      // keep the sum as close as possible to sum_nonrounded.
+      int inc = SmallestIncrement(counts[n], shift);
+      counts[n] -= counts[n] & (inc - 1);
+      // TODO(robryk): Should we rescale targets[n]?
+      const float target =
+          minimize_error_of_sum ? (sum_nonrounded - sum) : targets[n];
+      if (counts[n] == 0 ||
+          (target > counts[n] + inc / 2 && counts[n] + inc < table_size)) {
+        counts[n] += inc;
+      }
+      sum += counts[n];
+      const int count_log = FloorLog2Nonzero(static_cast<uint32_t>(counts[n]));
+      if (count_log > remainder_log) {
+        remainder_pos = n;
+        remainder_log = count_log;
+      }
+    }
+  }
+  JXL_ASSERT(remainder_pos != -1);
+  // NOTE: This is the only place where counts could go negative. We could
+  // detect that, return false and make ANSHistBin uint32_t.
+  counts[remainder_pos] -= sum - table_size;
+  *omit_pos = remainder_pos;
+  return counts[remainder_pos] > 0;
+}
+
+Status NormalizeCounts(ANSHistBin* counts, int* omit_pos, const int length,
+                       const int precision_bits, uint32_t shift,
+                       int* num_symbols, int* symbols) {
+  const int32_t table_size = 1 << precision_bits;  // target sum / table size
+  uint64_t total = 0;
+  int max_symbol = 0;
+  int symbol_count = 0;
+  for (int n = 0; n < length; ++n) {
+    total += counts[n];
+    if (counts[n] > 0) {
+      if (symbol_count < kMaxNumSymbolsForSmallCode) {
+        symbols[symbol_count] = n;
+      }
+      ++symbol_count;
+      max_symbol = n + 1;
+    }
+  }
+  *num_symbols = symbol_count;
+  if (symbol_count == 0) {
+    return true;
+  }
+  if (symbol_count == 1) {
+    counts[symbols[0]] = table_size;
+    return true;
+  }
+  if (symbol_count > table_size)
+    return JXL_FAILURE("Too many entries in an ANS histogram");
+
+  const float norm = 1.f * table_size / total;
+  std::vector<float> targets(max_symbol);
+  for (size_t n = 0; n < targets.size(); ++n) {
+    targets[n] = norm * counts[n];
+  }
+  if (!RebalanceHistogram<false>(&targets[0], max_symbol, table_size, shift,
+                                 omit_pos, counts)) {
+    // Use an alternative rebalancing mechanism if the one above failed
+    // to create a histogram that is positive wherever the original one was.
+    if (!RebalanceHistogram<true>(&targets[0], max_symbol, table_size, shift,
+                                  omit_pos, counts)) {
+      return JXL_FAILURE("Logic error: couldn't rebalance a histogram");
+    }
+  }
+  return true;
+}
+
+struct SizeWriter {
+  size_t size = 0;
+  void Write(size_t num, size_t bits) { size += num; }
+};
+
+template <typename Writer>
+void StoreVarLenUint8(size_t n, Writer* writer) {
+  JXL_DASSERT(n <= 255);
+  if (n == 0) {
+    writer->Write(1, 0);
+  } else {
+    writer->Write(1, 1);
+    size_t nbits = FloorLog2Nonzero(n);
+    writer->Write(3, nbits);
+    writer->Write(nbits, n - (1ULL << nbits));
+  }
+}
+
+template <typename Writer>
+void StoreVarLenUint16(size_t n, Writer* writer) {
+  JXL_DASSERT(n <= 65535);
+  if (n == 0) {
+    writer->Write(1, 0);
+  } else {
+    writer->Write(1, 1);
+    size_t nbits = FloorLog2Nonzero(n);
+    writer->Write(4, nbits);
+    writer->Write(nbits, n - (1ULL << nbits));
+  }
+}
+
+template <typename Writer>
+bool EncodeCounts(const ANSHistBin* counts, const int alphabet_size,
+                  const int omit_pos, const int num_symbols, uint32_t shift,
+                  const int* symbols, Writer* writer) {
+  bool ok = true;
+  if (num_symbols <= 2) {
+    // Small tree marker to encode 1-2 symbols.
+    writer->Write(1, 1);
+    if (num_symbols == 0) {
+      writer->Write(1, 0);
+      StoreVarLenUint8(0, writer);
+    } else {
+      writer->Write(1, num_symbols - 1);
+      for (int i = 0; i < num_symbols; ++i) {
+        StoreVarLenUint8(symbols[i], writer);
+      }
+    }
+    if (num_symbols == 2) {
+      writer->Write(ANS_LOG_TAB_SIZE, counts[symbols[0]]);
+    }
+  } else {
+    // Mark non-small tree.
+    writer->Write(1, 0);
+    // Mark non-flat histogram.
+    writer->Write(1, 0);
+
+    // Precompute sequences for RLE encoding. Contains the number of identical
+    // values starting at a given index. Only contains the value at the first
+    // element of the series.
+    std::vector<uint32_t> same(alphabet_size, 0);
+    int last = 0;
+    for (int i = 1; i < alphabet_size; i++) {
+      // Store the sequence length once different symbol reached, or we're at
+      // the end, or the length is longer than we can encode, or we are at
+      // the omit_pos. We don't support including the omit_pos in an RLE
+      // sequence because this value may use a different amount of log2 bits
+      // than standard, it is too complex to handle in the decoder.
+      if (counts[i] != counts[last] || i + 1 == alphabet_size ||
+          (i - last) >= 255 || i == omit_pos || i == omit_pos + 1) {
+        same[last] = (i - last);
+        last = i + 1;
+      }
+    }
+
+    int length = 0;
+    std::vector<int> logcounts(alphabet_size);
+    int omit_log = 0;
+    for (int i = 0; i < alphabet_size; ++i) {
+      JXL_ASSERT(counts[i] <= ANS_TAB_SIZE);
+      JXL_ASSERT(counts[i] >= 0);
+      if (i == omit_pos) {
+        length = i + 1;
+      } else if (counts[i] > 0) {
+        logcounts[i] = FloorLog2Nonzero(static_cast<uint32_t>(counts[i])) + 1;
+        length = i + 1;
+        if (i < omit_pos) {
+          omit_log = std::max(omit_log, logcounts[i] + 1);
+        } else {
+          omit_log = std::max(omit_log, logcounts[i]);
+        }
+      }
+    }
+    logcounts[omit_pos] = omit_log;
+
+    // Elias gamma-like code for shift. Only difference is that if the number
+    // of bits to be encoded is equal to FloorLog2(ANS_LOG_TAB_SIZE+1), we skip
+    // the terminating 0 in unary coding.
+    int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1);
+    int log = FloorLog2Nonzero(shift + 1);
+    writer->Write(log, (1 << log) - 1);
+    if (log != upper_bound_log) writer->Write(1, 0);
+    writer->Write(log, ((1 << log) - 1) & (shift + 1));
+
+    // Since num_symbols >= 3, we know that length >= 3, therefore we encode
+    // length - 3.
+    if (length - 3 > 255) {
+      // Pretend that everything is OK, but complain about correctness later.
+      StoreVarLenUint8(255, writer);
+      ok = false;
+    } else {
+      StoreVarLenUint8(length - 3, writer);
+    }
+
+    // The logcount values are encoded with a static Huffman code.
+    static const size_t kMinReps = 4;
+    size_t rep = ANS_LOG_TAB_SIZE + 1;
+    for (int i = 0; i < length; ++i) {
+      if (i > 0 && same[i - 1] > kMinReps) {
+        // Encode the RLE symbol and skip the repeated ones.
+        writer->Write(kLogCountBitLengths[rep], kLogCountSymbols[rep]);
+        StoreVarLenUint8(same[i - 1] - kMinReps - 1, writer);
+        i += same[i - 1] - 2;
+        continue;
+      }
+      writer->Write(kLogCountBitLengths[logcounts[i]],
+                    kLogCountSymbols[logcounts[i]]);
+    }
+    for (int i = 0; i < length; ++i) {
+      if (i > 0 && same[i - 1] > kMinReps) {
+        // Skip symbols encoded by RLE.
+        i += same[i - 1] - 2;
+        continue;
+      }
+      if (logcounts[i] > 1 && i != omit_pos) {
+        int bitcount = GetPopulationCountPrecision(logcounts[i] - 1, shift);
+        int drop_bits = logcounts[i] - 1 - bitcount;
+        JXL_CHECK((counts[i] & ((1 << drop_bits) - 1)) == 0);
+        writer->Write(bitcount, (counts[i] >> drop_bits) - (1 << bitcount));
+      }
+    }
+  }
+  return ok;
+}
+
+void EncodeFlatHistogram(const int alphabet_size, BitWriter* writer) {
+  // Mark non-small tree.
+  writer->Write(1, 0);
+  // Mark uniform histogram.
+  writer->Write(1, 1);
+  JXL_ASSERT(alphabet_size > 0);
+  // Encode alphabet size.
+  StoreVarLenUint8(alphabet_size - 1, writer);
+}
+
+float ComputeHistoAndDataCost(const ANSHistBin* histogram, size_t alphabet_size,
+                              uint32_t method) {
+  if (method == 0) {  // Flat code
+    return ANS_LOG_TAB_SIZE + 2 +
+           EstimateDataBitsFlat(histogram, alphabet_size);
+  }
+  // Non-flat: shift = method-1.
+  uint32_t shift = method - 1;
+  std::vector<ANSHistBin> counts(histogram, histogram + alphabet_size);
+  int omit_pos = 0;
+  int num_symbols;
+  int symbols[kMaxNumSymbolsForSmallCode] = {};
+  JXL_CHECK(NormalizeCounts(counts.data(), &omit_pos, alphabet_size,
+                            ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols));
+  SizeWriter writer;
+  // Ignore the correctness, no real encoding happens at this stage.
+  (void)EncodeCounts(counts.data(), alphabet_size, omit_pos, num_symbols, shift,
+                     symbols, &writer);
+  return writer.size +
+         EstimateDataBits(histogram, counts.data(), alphabet_size);
+}
+
+uint32_t ComputeBestMethod(
+    const ANSHistBin* histogram, size_t alphabet_size, float* cost,
+    HistogramParams::ANSHistogramStrategy ans_histogram_strategy) {
+  size_t method = 0;
+  float fcost = ComputeHistoAndDataCost(histogram, alphabet_size, 0);
+  for (uint32_t shift = 0; shift <= ANS_LOG_TAB_SIZE;
+       ans_histogram_strategy != HistogramParams::ANSHistogramStrategy::kPrecise
+           ? shift += 2
+           : shift++) {
+    float c = ComputeHistoAndDataCost(histogram, alphabet_size, shift + 1);
+    if (c < fcost) {
+      method = shift + 1;
+      fcost = c;
+    } else if (ans_histogram_strategy ==
+               HistogramParams::ANSHistogramStrategy::kFast) {
+      // do not be as precise if estimating cost.
+      break;
+    }
+  }
+  *cost = fcost;
+  return method;
+}
+
+}  // namespace
+
+// Returns an estimate of the cost of encoding this histogram and the
+// corresponding data.
+size_t BuildAndStoreANSEncodingData(
+    HistogramParams::ANSHistogramStrategy ans_histogram_strategy,
+    const ANSHistBin* histogram, size_t alphabet_size, size_t log_alpha_size,
+    bool use_prefix_code, ANSEncSymbolInfo* info, BitWriter* writer) {
+  if (use_prefix_code) {
+    if (alphabet_size <= 1) return 0;
+    std::vector<uint32_t> histo(alphabet_size);
+    for (size_t i = 0; i < alphabet_size; i++) {
+      histo[i] = histogram[i];
+      JXL_CHECK(histogram[i] >= 0);
+    }
+    size_t cost = 0;
+    {
+      std::vector<uint8_t> depths(alphabet_size);
+      std::vector<uint16_t> bits(alphabet_size);
+      BitWriter tmp_writer;
+      BitWriter* w = writer ? writer : &tmp_writer;
+      size_t start = w->BitsWritten();
+      BitWriter::Allotment allotment(
+          w, 8 * alphabet_size + 8);  // safe upper bound
+      BuildAndStoreHuffmanTree(histo.data(), alphabet_size, depths.data(),
+                               bits.data(), w);
+      ReclaimAndCharge(w, &allotment, 0, /*aux_out=*/nullptr);
+
+      for (size_t i = 0; i < alphabet_size; i++) {
+        info[i].bits = depths[i] == 0 ? 0 : bits[i];
+        info[i].depth = depths[i];
+      }
+      cost = w->BitsWritten() - start;
+    }
+    // Estimate data cost.
+    for (size_t i = 0; i < alphabet_size; i++) {
+      cost += histogram[i] * info[i].depth;
+    }
+    return cost;
+  }
+  JXL_ASSERT(alphabet_size <= ANS_TAB_SIZE);
+  // Ensure we ignore trailing zeros in the histogram.
+  if (alphabet_size != 0) {
+    size_t largest_symbol = 0;
+    for (size_t i = 0; i < alphabet_size; i++) {
+      if (histogram[i] != 0) largest_symbol = i;
+    }
+    alphabet_size = largest_symbol + 1;
+  }
+  float cost;
+  uint32_t method = ComputeBestMethod(histogram, alphabet_size, &cost,
+                                      ans_histogram_strategy);
+  JXL_ASSERT(cost >= 0);
+  int num_symbols;
+  int symbols[kMaxNumSymbolsForSmallCode] = {};
+  std::vector<ANSHistBin> counts(histogram, histogram + alphabet_size);
+  if (!counts.empty()) {
+    size_t sum = 0;
+    for (size_t i = 0; i < counts.size(); i++) {
+      sum += counts[i];
+    }
+    if (sum == 0) {
+      counts[0] = ANS_TAB_SIZE;
+    }
+  }
+  if (method == 0) {
+    counts = CreateFlatHistogram(alphabet_size, ANS_TAB_SIZE);
+    AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE];
+    InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a);
+    ANSBuildInfoTable(counts.data(), a, alphabet_size, log_alpha_size, info);
+    if (writer != nullptr) {
+      EncodeFlatHistogram(alphabet_size, writer);
+    }
+    return cost;
+  }
+  int omit_pos = 0;
+  uint32_t shift = method - 1;
+  JXL_CHECK(NormalizeCounts(counts.data(), &omit_pos, alphabet_size,
+                            ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols));
+  AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE];
+  InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a);
+  ANSBuildInfoTable(counts.data(), a, alphabet_size, log_alpha_size, info);
+  if (writer != nullptr) {
+    bool ok = EncodeCounts(counts.data(), alphabet_size, omit_pos, num_symbols,
+                           shift, symbols, writer);
+    (void)ok;
+    JXL_DASSERT(ok);
+  }
+  return cost;
+}
+
+float ANSPopulationCost(const ANSHistBin* data, size_t alphabet_size) {
+  float c;
+  ComputeBestMethod(data, alphabet_size, &c,
+                    HistogramParams::ANSHistogramStrategy::kFast);
+  return c;
+}
+
+template <typename Writer>
+void EncodeUintConfig(const HybridUintConfig uint_config, Writer* writer,
+                      size_t log_alpha_size) {
+  writer->Write(CeilLog2Nonzero(log_alpha_size + 1),
+                uint_config.split_exponent);
+  if (uint_config.split_exponent == log_alpha_size) {
+    return;  // msb/lsb don't matter.
+  }
+  size_t nbits = CeilLog2Nonzero(uint_config.split_exponent + 1);
+  writer->Write(nbits, uint_config.msb_in_token);
+  nbits = CeilLog2Nonzero(uint_config.split_exponent -
+                          uint_config.msb_in_token + 1);
+  writer->Write(nbits, uint_config.lsb_in_token);
+}
+template <typename Writer>
+void EncodeUintConfigs(const std::vector<HybridUintConfig>& uint_config,
+                       Writer* writer, size_t log_alpha_size) {
+  // TODO(veluca): RLE?
+  for (size_t i = 0; i < uint_config.size(); i++) {
+    EncodeUintConfig(uint_config[i], writer, log_alpha_size);
+  }
+}
+template void EncodeUintConfigs(const std::vector<HybridUintConfig>&,
+                                BitWriter*, size_t);
+
+namespace {
+
+void ChooseUintConfigs(const HistogramParams& params,
+                       const std::vector<std::vector<Token>>& tokens,
+                       const std::vector<uint8_t>& context_map,
+                       std::vector<Histogram>* clustered_histograms,
+                       EntropyEncodingData* codes, size_t* log_alpha_size) {
+  codes->uint_config.resize(clustered_histograms->size());
+  if (params.uint_method == HistogramParams::HybridUintMethod::kNone) return;
+  if (params.uint_method == HistogramParams::HybridUintMethod::kContextMap) {
+    codes->uint_config.clear();
+    codes->uint_config.resize(clustered_histograms->size(),
+                              HybridUintConfig(2, 0, 1));
+    return;
+  }
+
+  // Brute-force method that tries a few options.
+  std::vector<HybridUintConfig> configs;
+  if (params.uint_method == HistogramParams::HybridUintMethod::kBest) {
+    configs = {
+        HybridUintConfig(4, 2, 0),  // default
+        HybridUintConfig(4, 1, 0),  // less precise
+        HybridUintConfig(4, 2, 1),  // add sign
+        HybridUintConfig(4, 2, 2),  // add sign+parity
+        HybridUintConfig(4, 1, 2),  // add parity but less msb
+        // Same as above, but more direct coding.
+        HybridUintConfig(5, 2, 0), HybridUintConfig(5, 1, 0),
+        HybridUintConfig(5, 2, 1), HybridUintConfig(5, 2, 2),
+        HybridUintConfig(5, 1, 2),
+        // Same as above, but less direct coding.
+        HybridUintConfig(3, 2, 0), HybridUintConfig(3, 1, 0),
+        HybridUintConfig(3, 2, 1), HybridUintConfig(3, 1, 2),
+        // For near-lossless.
+        HybridUintConfig(4, 1, 3), HybridUintConfig(5, 1, 4),
+        HybridUintConfig(5, 2, 3), HybridUintConfig(6, 1, 5),
+        HybridUintConfig(6, 2, 4), HybridUintConfig(6, 0, 0),
+        // Other
+        HybridUintConfig(0, 0, 0),   // varlenuint
+        HybridUintConfig(2, 0, 1),   // works well for ctx map
+        HybridUintConfig(7, 0, 0),   // direct coding
+        HybridUintConfig(8, 0, 0),   // direct coding
+        HybridUintConfig(9, 0, 0),   // direct coding
+        HybridUintConfig(10, 0, 0),  // direct coding
+        HybridUintConfig(11, 0, 0),  // direct coding
+        HybridUintConfig(12, 0, 0),  // direct coding
+    };
+  } else if (params.uint_method == HistogramParams::HybridUintMethod::kFast) {
+    configs = {
+        HybridUintConfig(4, 2, 0),  // default
+        HybridUintConfig(4, 1, 2),  // add parity but less msb
+        HybridUintConfig(0, 0, 0),  // smallest histograms
+        HybridUintConfig(2, 0, 1),  // works well for ctx map
+    };
+  }
+
+  std::vector<float> costs(clustered_histograms->size(),
+                           std::numeric_limits<float>::max());
+  std::vector<uint32_t> extra_bits(clustered_histograms->size());
+  std::vector<uint8_t> is_valid(clustered_histograms->size());
+  size_t max_alpha =
+      codes->use_prefix_code ? PREFIX_MAX_ALPHABET_SIZE : ANS_MAX_ALPHABET_SIZE;
+  for (HybridUintConfig cfg : configs) {
+    std::fill(is_valid.begin(), is_valid.end(), true);
+    std::fill(extra_bits.begin(), extra_bits.end(), 0);
+
+    for (size_t i = 0; i < clustered_histograms->size(); i++) {
+      (*clustered_histograms)[i].Clear();
+    }
+    for (size_t i = 0; i < tokens.size(); ++i) {
+      for (size_t j = 0; j < tokens[i].size(); ++j) {
+        const Token token = tokens[i][j];
+        // TODO(veluca): do not ignore lz77 commands.
+        if (token.is_lz77_length) continue;
+        size_t histo = context_map[token.context];
+        uint32_t tok, nbits, bits;
+        cfg.Encode(token.value, &tok, &nbits, &bits);
+        if (tok >= max_alpha ||
+            (codes->lz77.enabled && tok >= codes->lz77.min_symbol)) {
+          is_valid[histo] = false;
+          continue;
+        }
+        extra_bits[histo] += nbits;
+        (*clustered_histograms)[histo].Add(tok);
+      }
+    }
+
+    for (size_t i = 0; i < clustered_histograms->size(); i++) {
+      if (!is_valid[i]) continue;
+      float cost = (*clustered_histograms)[i].PopulationCost() + extra_bits[i];
+      if (cost < costs[i]) {
+        codes->uint_config[i] = cfg;
+        costs[i] = cost;
+      }
+    }
+  }
+
+  // Rebuild histograms.
+  for (size_t i = 0; i < clustered_histograms->size(); i++) {
+    (*clustered_histograms)[i].Clear();
+  }
+  *log_alpha_size = 4;
+  for (size_t i = 0; i < tokens.size(); ++i) {
+    for (size_t j = 0; j < tokens[i].size(); ++j) {
+      const Token token = tokens[i][j];
+      uint32_t tok, nbits, bits;
+      size_t histo = context_map[token.context];
+      (token.is_lz77_length ? codes->lz77.length_uint_config
+                            : codes->uint_config[histo])
+          .Encode(token.value, &tok, &nbits, &bits);
+      tok += token.is_lz77_length ? codes->lz77.min_symbol : 0;
+      (*clustered_histograms)[histo].Add(tok);
+      while (tok >= (1u << *log_alpha_size)) (*log_alpha_size)++;
+    }
+  }
+#if JXL_ENABLE_ASSERT
+  size_t max_log_alpha_size = codes->use_prefix_code ? PREFIX_MAX_BITS : 8;
+  JXL_ASSERT(*log_alpha_size <= max_log_alpha_size);
+#endif
+}
+
+class HistogramBuilder {
+ public:
+  explicit HistogramBuilder(const size_t num_contexts)
+      : histograms_(num_contexts) {}
+
+  void VisitSymbol(int symbol, size_t histo_idx) {
+    JXL_DASSERT(histo_idx < histograms_.size());
+    histograms_[histo_idx].Add(symbol);
+  }
+
+  // NOTE: `layer` is only for clustered_entropy; caller does ReclaimAndCharge.
+  size_t BuildAndStoreEntropyCodes(
+      const HistogramParams& params,
+      const std::vector<std::vector<Token>>& tokens, EntropyEncodingData* codes,
+      std::vector<uint8_t>* context_map, bool use_prefix_code,
+      BitWriter* writer, size_t layer, AuxOut* aux_out) const {
+    size_t cost = 0;
+    codes->encoding_info.clear();
+    std::vector<Histogram> clustered_histograms(histograms_);
+    context_map->resize(histograms_.size());
+    if (histograms_.size() > 1) {
+      if (!ans_fuzzer_friendly_) {
+        std::vector<uint32_t> histogram_symbols;
+        ClusterHistograms(params, histograms_, histograms_.size(),
+                          kClustersLimit, &clustered_histograms,
+                          &histogram_symbols);
+        for (size_t c = 0; c < histograms_.size(); ++c) {
+          (*context_map)[c] = static_cast<uint8_t>(histogram_symbols[c]);
+        }
+      } else {
+        fill(context_map->begin(), context_map->end(), 0);
+        size_t max_symbol = 0;
+        for (const Histogram& h : histograms_) {
+          max_symbol = std::max(h.data_.size(), max_symbol);
+        }
+        size_t num_symbols = 1 << CeilLog2Nonzero(max_symbol + 1);
+        clustered_histograms.resize(1);
+        clustered_histograms[0].Clear();
+        for (size_t i = 0; i < num_symbols; i++) {
+          clustered_histograms[0].Add(i);
+        }
+      }
+      if (writer != nullptr) {
+        EncodeContextMap(*context_map, clustered_histograms.size(), writer);
+      }
+    }
+    if (aux_out != nullptr) {
+      for (size_t i = 0; i < clustered_histograms.size(); ++i) {
+        aux_out->layers[layer].clustered_entropy +=
+            clustered_histograms[i].ShannonEntropy();
+      }
+    }
+    codes->use_prefix_code = use_prefix_code;
+    size_t log_alpha_size = codes->lz77.enabled ? 8 : 7;  // Sane default.
+    if (ans_fuzzer_friendly_) {
+      codes->uint_config.clear();
+      codes->uint_config.resize(1, HybridUintConfig(7, 0, 0));
+    } else {
+      ChooseUintConfigs(params, tokens, *context_map, &clustered_histograms,
+                        codes, &log_alpha_size);
+    }
+    if (log_alpha_size < 5) log_alpha_size = 5;
+    SizeWriter size_writer;  // Used if writer == nullptr to estimate costs.
+    cost += 1;
+    if (writer) writer->Write(1, use_prefix_code);
+
+    if (use_prefix_code) {
+      log_alpha_size = PREFIX_MAX_BITS;
+    } else {
+      cost += 2;
+    }
+    if (writer == nullptr) {
+      EncodeUintConfigs(codes->uint_config, &size_writer, log_alpha_size);
+    } else {
+      if (!use_prefix_code) writer->Write(2, log_alpha_size - 5);
+      EncodeUintConfigs(codes->uint_config, writer, log_alpha_size);
+    }
+    if (use_prefix_code) {
+      for (size_t c = 0; c < clustered_histograms.size(); ++c) {
+        size_t num_symbol = 1;
+        for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) {
+          if (clustered_histograms[c].data_[i]) num_symbol = i + 1;
+        }
+        if (writer) {
+          StoreVarLenUint16(num_symbol - 1, writer);
+        } else {
+          StoreVarLenUint16(num_symbol - 1, &size_writer);
+        }
+      }
+    }
+    cost += size_writer.size;
+    for (size_t c = 0; c < clustered_histograms.size(); ++c) {
+      size_t num_symbol = 1;
+      for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) {
+        if (clustered_histograms[c].data_[i]) num_symbol = i + 1;
+      }
+      codes->encoding_info.emplace_back();
+      codes->encoding_info.back().resize(std::max<size_t>(1, num_symbol));
+
+      BitWriter::Allotment allotment(writer, 256 + num_symbol * 24);
+      cost += BuildAndStoreANSEncodingData(
+          params.ans_histogram_strategy, clustered_histograms[c].data_.data(),
+          num_symbol, log_alpha_size, use_prefix_code,
+          codes->encoding_info.back().data(), writer);
+      allotment.FinishedHistogram(writer);
+      ReclaimAndCharge(writer, &allotment, layer, aux_out);
+    }
+    return cost;
+  }
+
+  const Histogram& Histo(size_t i) const { return histograms_[i]; }
+
+ private:
+  std::vector<Histogram> histograms_;
+};
+
+class SymbolCostEstimator {
+ public:
+  SymbolCostEstimator(size_t num_contexts, bool force_huffman,
+                      const std::vector<std::vector<Token>>& tokens,
+                      const LZ77Params& lz77) {
+    HistogramBuilder builder(num_contexts);
+    // Build histograms for estimating lz77 savings.
+    HybridUintConfig uint_config;
+    for (size_t i = 0; i < tokens.size(); ++i) {
+      for (size_t j = 0; j < tokens[i].size(); ++j) {
+        const Token token = tokens[i][j];
+        uint32_t tok, nbits, bits;
+        (token.is_lz77_length ? lz77.length_uint_config : uint_config)
+            .Encode(token.value, &tok, &nbits, &bits);
+        tok += token.is_lz77_length ? lz77.min_symbol : 0;
+        builder.VisitSymbol(tok, token.context);
+      }
+    }
+    max_alphabet_size_ = 0;
+    for (size_t i = 0; i < num_contexts; i++) {
+      max_alphabet_size_ =
+          std::max(max_alphabet_size_, builder.Histo(i).data_.size());
+    }
+    bits_.resize(num_contexts * max_alphabet_size_);
+    // TODO(veluca): SIMD?
+    add_symbol_cost_.resize(num_contexts);
+    for (size_t i = 0; i < num_contexts; i++) {
+      float inv_total = 1.0f / (builder.Histo(i).total_count_ + 1e-8f);
+      float total_cost = 0;
+      for (size_t j = 0; j < builder.Histo(i).data_.size(); j++) {
+        size_t cnt = builder.Histo(i).data_[j];
+        float cost = 0;
+        if (cnt != 0 && cnt != builder.Histo(i).total_count_) {
+          cost = -FastLog2f(cnt * inv_total);
+          if (force_huffman) cost = std::ceil(cost);
+        } else if (cnt == 0) {
+          cost = ANS_LOG_TAB_SIZE;  // Highest possible cost.
+        }
+        bits_[i * max_alphabet_size_ + j] = cost;
+        total_cost += cost * builder.Histo(i).data_[j];
+      }
+      // Penalty for adding a lz77 symbol to this contest (only used for static
+      // cost model). Higher penalty for contexts that have a very low
+      // per-symbol entropy.
+      add_symbol_cost_[i] = std::max(0.0f, 6.0f - total_cost * inv_total);
+    }
+  }
+  float Bits(size_t ctx, size_t sym) const {
+    return bits_[ctx * max_alphabet_size_ + sym];
+  }
+  float LenCost(size_t ctx, size_t len, const LZ77Params& lz77) const {
+    uint32_t nbits, bits, tok;
+    lz77.length_uint_config.Encode(len, &tok, &nbits, &bits);
+    tok += lz77.min_symbol;
+    return nbits + Bits(ctx, tok);
+  }
+  float DistCost(size_t len, const LZ77Params& lz77) const {
+    uint32_t nbits, bits, tok;
+    HybridUintConfig().Encode(len, &tok, &nbits, &bits);
+    return nbits + Bits(lz77.nonserialized_distance_context, tok);
+  }
+  float AddSymbolCost(size_t idx) const { return add_symbol_cost_[idx]; }
+
+ private:
+  size_t max_alphabet_size_;
+  std::vector<float> bits_;
+  std::vector<float> add_symbol_cost_;
+};
+
+void ApplyLZ77_RLE(const HistogramParams& params, size_t num_contexts,
+                   const std::vector<std::vector<Token>>& tokens,
+                   LZ77Params& lz77,
+                   std::vector<std::vector<Token>>& tokens_lz77) {
+  // TODO(veluca): tune heuristics here.
+  SymbolCostEstimator sce(num_contexts, params.force_huffman, tokens, lz77);
+  float bit_decrease = 0;
+  size_t total_symbols = 0;
+  tokens_lz77.resize(tokens.size());
+  std::vector<float> sym_cost;
+  HybridUintConfig uint_config;
+  for (size_t stream = 0; stream < tokens.size(); stream++) {
+    size_t distance_multiplier =
+        params.image_widths.size() > stream ? params.image_widths[stream] : 0;
+    const auto& in = tokens[stream];
+    auto& out = tokens_lz77[stream];
+    total_symbols += in.size();
+    // Cumulative sum of bit costs.
+    sym_cost.resize(in.size() + 1);
+    for (size_t i = 0; i < in.size(); i++) {
+      uint32_t tok, nbits, unused_bits;
+      uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits);
+      sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i];
+    }
+    out.reserve(in.size());
+    for (size_t i = 0; i < in.size(); i++) {
+      size_t num_to_copy = 0;
+      size_t distance_symbol = 0;  // 1 for RLE.
+      if (distance_multiplier != 0) {
+        distance_symbol = 1;  // Special distance 1 if enabled.
+        JXL_DASSERT(kSpecialDistances[1][0] == 1);
+        JXL_DASSERT(kSpecialDistances[1][1] == 0);
+      }
+      if (i > 0) {
+        for (; i + num_to_copy < in.size(); num_to_copy++) {
+          if (in[i + num_to_copy].value != in[i - 1].value) {
+            break;
+          }
+        }
+      }
+      if (num_to_copy == 0) {
+        out.push_back(in[i]);
+        continue;
+      }
+      float cost = sym_cost[i + num_to_copy] - sym_cost[i];
+      // This subtraction might overflow, but that's OK.
+      size_t lz77_len = num_to_copy - lz77.min_length;
+      float lz77_cost = num_to_copy >= lz77.min_length
+                            ? CeilLog2Nonzero(lz77_len + 1) + 1
+                            : 0;
+      if (num_to_copy < lz77.min_length || cost <= lz77_cost) {
+        for (size_t j = 0; j < num_to_copy; j++) {
+          out.push_back(in[i + j]);
+        }
+        i += num_to_copy - 1;
+        continue;
+      }
+      // Output the LZ77 length
+      out.emplace_back(in[i].context, lz77_len);
+      out.back().is_lz77_length = true;
+      i += num_to_copy - 1;
+      bit_decrease += cost - lz77_cost;
+      // Output the LZ77 copy distance.
+      out.emplace_back(lz77.nonserialized_distance_context, distance_symbol);
+    }
+  }
+
+  if (bit_decrease > total_symbols * 0.2 + 16) {
+    lz77.enabled = true;
+  }
+}
+
+// Hash chain for LZ77 matching
+struct HashChain {
+  size_t size_;
+  std::vector<uint32_t> data_;
+
+  unsigned hash_num_values_ = 32768;
+  unsigned hash_mask_ = hash_num_values_ - 1;
+  unsigned hash_shift_ = 5;
+
+  std::vector<int> head;
+  std::vector<uint32_t> chain;
+  std::vector<int> val;
+
+  // Speed up repetitions of zero
+  std::vector<int> headz;
+  std::vector<uint32_t> chainz;
+  std::vector<uint32_t> zeros;
+  uint32_t numzeros = 0;
+
+  size_t window_size_;
+  size_t window_mask_;
+  size_t min_length_;
+  size_t max_length_;
+
+  // Map of special distance codes.
+  std::unordered_map<int, int> special_dist_table_;
+  size_t num_special_distances_ = 0;
+
+  uint32_t maxchainlength = 256;  // window_size_ to allow all
+
+  HashChain(const Token* data, size_t size, size_t window_size,
+            size_t min_length, size_t max_length, size_t distance_multiplier)
+      : size_(size),
+        window_size_(window_size),
+        window_mask_(window_size - 1),
+        min_length_(min_length),
+        max_length_(max_length) {
+    data_.resize(size);
+    for (size_t i = 0; i < size; i++) {
+      data_[i] = data[i].value;
+    }
+
+    head.resize(hash_num_values_, -1);
+    val.resize(window_size_, -1);
+    chain.resize(window_size_);
+    for (uint32_t i = 0; i < window_size_; ++i) {
+      chain[i] = i;  // same value as index indicates uninitialized
+    }
+
+    zeros.resize(window_size_);
+    headz.resize(window_size_ + 1, -1);
+    chainz.resize(window_size_);
+    for (uint32_t i = 0; i < window_size_; ++i) {
+      chainz[i] = i;
+    }
+    // Translate distance to special distance code.
+    if (distance_multiplier) {
+      // Count down, so if due to small distance multiplier multiple distances
+      // map to the same code, the smallest code will be used in the end.
+      for (int i = kNumSpecialDistances - 1; i >= 0; --i) {
+        int xi = kSpecialDistances[i][0];
+        int yi = kSpecialDistances[i][1];
+        int distance = yi * distance_multiplier + xi;
+        // Ensure that we map distance 1 to the lowest symbols.
+        if (distance < 1) distance = 1;
+        special_dist_table_[distance] = i;
+      }
+      num_special_distances_ = kNumSpecialDistances;
+    }
+  }
+
+  uint32_t GetHash(size_t pos) const {
+    uint32_t result = 0;
+    if (pos + 2 < size_) {
+      // TODO(lode): take the MSB's of the uint32_t values into account as well,
+      // given that the hash code itself is less than 32 bits.
+      result ^= (uint32_t)(data_[pos + 0] << 0u);
+      result ^= (uint32_t)(data_[pos + 1] << hash_shift_);
+      result ^= (uint32_t)(data_[pos + 2] << (hash_shift_ * 2));
+    } else {
+      // No need to compute hash of last 2 bytes, the length 2 is too short.
+      return 0;
+    }
+    return result & hash_mask_;
+  }
+
+  uint32_t CountZeros(size_t pos, uint32_t prevzeros) const {
+    size_t end = pos + window_size_;
+    if (end > size_) end = size_;
+    if (prevzeros > 0) {
+      if (prevzeros >= window_mask_ && data_[end - 1] == 0 &&
+          end == pos + window_size_) {
+        return prevzeros;
+      } else {
+        return prevzeros - 1;
+      }
+    }
+    uint32_t num = 0;
+    while (pos + num < end && data_[pos + num] == 0) num++;
+    return num;
+  }
+
+  void Update(size_t pos) {
+    uint32_t hashval = GetHash(pos);
+    uint32_t wpos = pos & window_mask_;
+
+    val[wpos] = (int)hashval;
+    if (head[hashval] != -1) chain[wpos] = head[hashval];
+    head[hashval] = wpos;
+
+    if (pos > 0 && data_[pos] != data_[pos - 1]) numzeros = 0;
+    numzeros = CountZeros(pos, numzeros);
+
+    zeros[wpos] = numzeros;
+    if (headz[numzeros] != -1) chainz[wpos] = headz[numzeros];
+    headz[numzeros] = wpos;
+  }
+
+  void Update(size_t pos, size_t len) {
+    for (size_t i = 0; i < len; i++) {
+      Update(pos + i);
+    }
+  }
+
+  template <typename CB>
+  void FindMatches(size_t pos, int max_dist, const CB& found_match) const {
+    uint32_t wpos = pos & window_mask_;
+    uint32_t hashval = GetHash(pos);
+    uint32_t hashpos = chain[wpos];
+
+    int prev_dist = 0;
+    int end = std::min<int>(pos + max_length_, size_);
+    uint32_t chainlength = 0;
+    uint32_t best_len = 0;
+    for (;;) {
+      int dist = (hashpos <= wpos) ? (wpos - hashpos)
+                                   : (wpos - hashpos + window_mask_ + 1);
+      if (dist < prev_dist) break;
+      prev_dist = dist;
+      uint32_t len = 0;
+      if (dist > 0) {
+        int i = pos;
+        int j = pos - dist;
+        if (numzeros > 3) {
+          int r = std::min<int>(numzeros - 1, zeros[hashpos]);
+          if (i + r >= end) r = end - i - 1;
+          i += r;
+          j += r;
+        }
+        while (i < end && data_[i] == data_[j]) {
+          i++;
+          j++;
+        }
+        len = i - pos;
+        // This can trigger even if the new length is slightly smaller than the
+        // best length, because it is possible for a slightly cheaper distance
+        // symbol to occur.
+        if (len >= min_length_ && len + 2 >= best_len) {
+          auto it = special_dist_table_.find(dist);
+          int dist_symbol = (it == special_dist_table_.end())
+                                ? (num_special_distances_ + dist - 1)
+                                : it->second;
+          found_match(len, dist_symbol);
+          if (len > best_len) best_len = len;
+        }
+      }
+
+      chainlength++;
+      if (chainlength >= maxchainlength) break;
+
+      if (numzeros >= 3 && len > numzeros) {
+        if (hashpos == chainz[hashpos]) break;
+        hashpos = chainz[hashpos];
+        if (zeros[hashpos] != numzeros) break;
+      } else {
+        if (hashpos == chain[hashpos]) break;
+        hashpos = chain[hashpos];
+        if (val[hashpos] != (int)hashval) break;  // outdated hash value
+      }
+    }
+  }
+  void FindMatch(size_t pos, int max_dist, size_t* result_dist_symbol,
+                 size_t* result_len) const {
+    *result_dist_symbol = 0;
+    *result_len = 1;
+    FindMatches(pos, max_dist, [&](size_t len, size_t dist_symbol) {
+      if (len > *result_len ||
+          (len == *result_len && *result_dist_symbol > dist_symbol)) {
+        *result_len = len;
+        *result_dist_symbol = dist_symbol;
+      }
+    });
+  }
+};
+
+float LenCost(size_t len) {
+  uint32_t nbits, bits, tok;
+  HybridUintConfig(1, 0, 0).Encode(len, &tok, &nbits, &bits);
+  constexpr float kCostTable[] = {
+      2.797667318563126,  3.213177690381199,  2.5706009246743737,
+      2.408392498667534,  2.829649191872326,  3.3923087753324577,
+      4.029267451554331,  4.415576699706408,  4.509357574741465,
+      9.21481543803004,   10.020590190114898, 11.858671627804766,
+      12.45853300490526,  11.713105831990857, 12.561996324849314,
+      13.775477692278367, 13.174027068768641,
+  };
+  size_t table_size = sizeof kCostTable / sizeof *kCostTable;
+  if (tok >= table_size) tok = table_size - 1;
+  return kCostTable[tok] + nbits;
+}
+
+// TODO(veluca): this does not take into account usage or non-usage of distance
+// multipliers.
+float DistCost(size_t dist) {
+  uint32_t nbits, bits, tok;
+  HybridUintConfig(7, 0, 0).Encode(dist, &tok, &nbits, &bits);
+  constexpr float kCostTable[] = {
+      6.368282626312716,  5.680793277090298,  8.347404197105247,
+      7.641619201599141,  6.914328374119438,  7.959808291537444,
+      8.70023120759855,   8.71378518934703,   9.379132523982769,
+      9.110472749092708,  9.159029569270908,  9.430936766731973,
+      7.278284055315169,  7.8278514904267755, 10.026641158289236,
+      9.976049229827066,  9.64351607048908,   9.563403863480442,
+      10.171474111762747, 10.45950155077234,  9.994813912104219,
+      10.322524683741156, 8.465808729388186,  8.756254166066853,
+      10.160930174662234, 10.247329273413435, 10.04090403724809,
+      10.129398517544082, 9.342311691539546,  9.07608009102374,
+      10.104799540677513, 10.378079384990906, 10.165828974075072,
+      10.337595322341553, 7.940557464567944,  10.575665823319431,
+      11.023344321751955, 10.736144698831827, 11.118277044595054,
+      7.468468230648442,  10.738305230932939, 10.906980780216568,
+      10.163468216353817, 10.17805759656433,  11.167283670483565,
+      11.147050200274544, 10.517921919244333, 10.651764778156886,
+      10.17074446448919,  11.217636876224745, 11.261630721139484,
+      11.403140815247259, 10.892472096873417, 11.1859607804481,
+      8.017346947551262,  7.895143720278828,  11.036577113822025,
+      11.170562110315794, 10.326988722591086, 10.40872184751056,
+      11.213498225466386, 11.30580635516863,  10.672272515665442,
+      10.768069466228063, 11.145257364153565, 11.64668307145549,
+      10.593156194627339, 11.207499484844943, 10.767517766396908,
+      10.826629811407042, 10.737764794499988, 10.6200448518045,
+      10.191315385198092, 8.468384171390085,  11.731295299170432,
+      11.824619886654398, 10.41518844301179,  10.16310536548649,
+      10.539423685097576, 10.495136599328031, 10.469112847728267,
+      11.72057686174922,  10.910326337834674, 11.378921834673758,
+      11.847759036098536, 11.92071647623854,  10.810628276345282,
+      11.008601085273893, 11.910326337834674, 11.949212023423133,
+      11.298614839104337, 11.611603659010392, 10.472930394619985,
+      11.835564720850282, 11.523267392285337, 12.01055816679611,
+      8.413029688994023,  11.895784139536406, 11.984679534970505,
+      11.220654278717394, 11.716311684833672, 10.61036646226114,
+      10.89849965960364,  10.203762898863669, 10.997560826267238,
+      11.484217379438984, 11.792836176993665, 12.24310468755171,
+      11.464858097919262, 12.212747017409377, 11.425595666074955,
+      11.572048533398757, 12.742093965163013, 11.381874288645637,
+      12.191870445817015, 11.683156920035426, 11.152442115262197,
+      11.90303691580457,  11.653292787169159, 11.938615382266098,
+      16.970641701570223, 16.853602280380002, 17.26240782594733,
+      16.644655390108507, 17.14310889757499,  16.910935455445955,
+      17.505678976959697, 17.213498225466388, 2.4162310293553024,
+      3.494587244462329,  3.5258600986408344, 3.4959806589517095,
+      3.098390886949687,  3.343454654302911,  3.588847442290287,
+      4.14614790111827,   5.152948641990529,  7.433696808092598,
+      9.716311684833672,
+  };
+  size_t table_size = sizeof kCostTable / sizeof *kCostTable;
+  if (tok >= table_size) tok = table_size - 1;
+  return kCostTable[tok] + nbits;
+}
+
+void ApplyLZ77_LZ77(const HistogramParams& params, size_t num_contexts,
+                    const std::vector<std::vector<Token>>& tokens,
+                    LZ77Params& lz77,
+                    std::vector<std::vector<Token>>& tokens_lz77) {
+  // TODO(veluca): tune heuristics here.
+  SymbolCostEstimator sce(num_contexts, params.force_huffman, tokens, lz77);
+  float bit_decrease = 0;
+  size_t total_symbols = 0;
+  tokens_lz77.resize(tokens.size());
+  HybridUintConfig uint_config;
+  std::vector<float> sym_cost;
+  for (size_t stream = 0; stream < tokens.size(); stream++) {
+    size_t distance_multiplier =
+        params.image_widths.size() > stream ? params.image_widths[stream] : 0;
+    const auto& in = tokens[stream];
+    auto& out = tokens_lz77[stream];
+    total_symbols += in.size();
+    // Cumulative sum of bit costs.
+    sym_cost.resize(in.size() + 1);
+    for (size_t i = 0; i < in.size(); i++) {
+      uint32_t tok, nbits, unused_bits;
+      uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits);
+      sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i];
+    }
+
+    out.reserve(in.size());
+    size_t max_distance = in.size();
+    size_t min_length = lz77.min_length;
+    JXL_ASSERT(min_length >= 3);
+    size_t max_length = in.size();
+
+    // Use next power of two as window size.
+    size_t window_size = 1;
+    while (window_size < max_distance && window_size < kWindowSize) {
+      window_size <<= 1;
+    }
+
+    HashChain chain(in.data(), in.size(), window_size, min_length, max_length,
+                    distance_multiplier);
+    size_t len, dist_symbol;
+
+    const size_t max_lazy_match_len = 256;  // 0 to disable lazy matching
+
+    // Whether the next symbol was already updated (to test lazy matching)
+    bool already_updated = false;
+    for (size_t i = 0; i < in.size(); i++) {
+      out.push_back(in[i]);
+      if (!already_updated) chain.Update(i);
+      already_updated = false;
+      chain.FindMatch(i, max_distance, &dist_symbol, &len);
+      if (len >= min_length) {
+        if (len < max_lazy_match_len && i + 1 < in.size()) {
+          // Try length at next symbol lazy matching
+          chain.Update(i + 1);
+          already_updated = true;
+          size_t len2, dist_symbol2;
+          chain.FindMatch(i + 1, max_distance, &dist_symbol2, &len2);
+          if (len2 > len) {
+            // Use the lazy match. Add literal, and use the next length starting
+            // from the next byte.
+            ++i;
+            already_updated = false;
+            len = len2;
+            dist_symbol = dist_symbol2;
+            out.push_back(in[i]);
+          }
+        }
+
+        float cost = sym_cost[i + len] - sym_cost[i];
+        size_t lz77_len = len - lz77.min_length;
+        float lz77_cost = LenCost(lz77_len) + DistCost(dist_symbol) +
+                          sce.AddSymbolCost(out.back().context);
+
+        if (lz77_cost <= cost) {
+          out.back().value = len - min_length;
+          out.back().is_lz77_length = true;
+          out.emplace_back(lz77.nonserialized_distance_context, dist_symbol);
+          bit_decrease += cost - lz77_cost;
+        } else {
+          // LZ77 match ignored, and symbol already pushed. Push all other
+          // symbols and skip.
+          for (size_t j = 1; j < len; j++) {
+            out.push_back(in[i + j]);
+          }
+        }
+
+        if (already_updated) {
+          chain.Update(i + 2, len - 2);
+          already_updated = false;
+        } else {
+          chain.Update(i + 1, len - 1);
+        }
+        i += len - 1;
+      } else {
+        // Literal, already pushed
+      }
+    }
+  }
+
+  if (bit_decrease > total_symbols * 0.2 + 16) {
+    lz77.enabled = true;
+  }
+}
+
+void ApplyLZ77_Optimal(const HistogramParams& params, size_t num_contexts,
+                       const std::vector<std::vector<Token>>& tokens,
+                       LZ77Params& lz77,
+                       std::vector<std::vector<Token>>& tokens_lz77) {
+  std::vector<std::vector<Token>> tokens_for_cost_estimate;
+  ApplyLZ77_LZ77(params, num_contexts, tokens, lz77, tokens_for_cost_estimate);
+  // If greedy-LZ77 does not give better compression than no-lz77, no reason to
+  // run the optimal matching.
+  if (!lz77.enabled) return;
+  SymbolCostEstimator sce(num_contexts + 1, params.force_huffman,
+                          tokens_for_cost_estimate, lz77);
+  tokens_lz77.resize(tokens.size());
+  HybridUintConfig uint_config;
+  std::vector<float> sym_cost;
+  std::vector<uint32_t> dist_symbols;
+  for (size_t stream = 0; stream < tokens.size(); stream++) {
+    size_t distance_multiplier =
+        params.image_widths.size() > stream ? params.image_widths[stream] : 0;
+    const auto& in = tokens[stream];
+    auto& out = tokens_lz77[stream];
+    // Cumulative sum of bit costs.
+    sym_cost.resize(in.size() + 1);
+    for (size_t i = 0; i < in.size(); i++) {
+      uint32_t tok, nbits, unused_bits;
+      uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits);
+      sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i];
+    }
+
+    out.reserve(in.size());
+    size_t max_distance = in.size();
+    size_t min_length = lz77.min_length;
+    JXL_ASSERT(min_length >= 3);
+    size_t max_length = in.size();
+
+    // Use next power of two as window size.
+    size_t window_size = 1;
+    while (window_size < max_distance && window_size < kWindowSize) {
+      window_size <<= 1;
+    }
+
+    HashChain chain(in.data(), in.size(), window_size, min_length, max_length,
+                    distance_multiplier);
+
+    struct MatchInfo {
+      uint32_t len;
+      uint32_t dist_symbol;
+      uint32_t ctx;
+      float total_cost = std::numeric_limits<float>::max();
+    };
+    // Total cost to encode the first N symbols.
+    std::vector<MatchInfo> prefix_costs(in.size() + 1);
+    prefix_costs[0].total_cost = 0;
+
+    size_t rle_length = 0;
+    size_t skip_lz77 = 0;
+    for (size_t i = 0; i < in.size(); i++) {
+      chain.Update(i);
+      float lit_cost =
+          prefix_costs[i].total_cost + sym_cost[i + 1] - sym_cost[i];
+      if (prefix_costs[i + 1].total_cost > lit_cost) {
+        prefix_costs[i + 1].dist_symbol = 0;
+        prefix_costs[i + 1].len = 1;
+        prefix_costs[i + 1].ctx = in[i].context;
+        prefix_costs[i + 1].total_cost = lit_cost;
+      }
+      if (skip_lz77 > 0) {
+        skip_lz77--;
+        continue;
+      }
+      dist_symbols.clear();
+      chain.FindMatches(i, max_distance,
+                        [&dist_symbols](size_t len, size_t dist_symbol) {
+                          if (dist_symbols.size() <= len) {
+                            dist_symbols.resize(len + 1, dist_symbol);
+                          }
+                          if (dist_symbol < dist_symbols[len]) {
+                            dist_symbols[len] = dist_symbol;
+                          }
+                        });
+      if (dist_symbols.size() <= min_length) continue;
+      {
+        size_t best_cost = dist_symbols.back();
+        for (size_t j = dist_symbols.size() - 1; j >= min_length; j--) {
+          if (dist_symbols[j] < best_cost) {
+            best_cost = dist_symbols[j];
+          }
+          dist_symbols[j] = best_cost;
+        }
+      }
+      for (size_t j = min_length; j < dist_symbols.size(); j++) {
+        // Cost model that uses results from lazy LZ77.
+        float lz77_cost = sce.LenCost(in[i].context, j - min_length, lz77) +
+                          sce.DistCost(dist_symbols[j], lz77);
+        float cost = prefix_costs[i].total_cost + lz77_cost;
+        if (prefix_costs[i + j].total_cost > cost) {
+          prefix_costs[i + j].len = j;
+          prefix_costs[i + j].dist_symbol = dist_symbols[j] + 1;
+          prefix_costs[i + j].ctx = in[i].context;
+          prefix_costs[i + j].total_cost = cost;
+        }
+      }
+      // We are in a RLE sequence: skip all the symbols except the first 8 and
+      // the last 8. This avoid quadratic costs for sequences with long runs of
+      // the same symbol.
+      if ((dist_symbols.back() == 0 && distance_multiplier == 0) ||
+          (dist_symbols.back() == 1 && distance_multiplier != 0)) {
+        rle_length++;
+      } else {
+        rle_length = 0;
+      }
+      if (rle_length >= 8 && dist_symbols.size() > 9) {
+        skip_lz77 = dist_symbols.size() - 10;
+        rle_length = 0;
+      }
+    }
+    size_t pos = in.size();
+    while (pos > 0) {
+      bool is_lz77_length = prefix_costs[pos].dist_symbol != 0;
+      if (is_lz77_length) {
+        size_t dist_symbol = prefix_costs[pos].dist_symbol - 1;
+        out.emplace_back(lz77.nonserialized_distance_context, dist_symbol);
+      }
+      size_t val = is_lz77_length ? prefix_costs[pos].len - min_length
+                                  : in[pos - 1].value;
+      out.emplace_back(prefix_costs[pos].ctx, val);
+      out.back().is_lz77_length = is_lz77_length;
+      pos -= prefix_costs[pos].len;
+    }
+    std::reverse(out.begin(), out.end());
+  }
+}
+
+void ApplyLZ77(const HistogramParams& params, size_t num_contexts,
+               const std::vector<std::vector<Token>>& tokens, LZ77Params& lz77,
+               std::vector<std::vector<Token>>& tokens_lz77) {
+  lz77.enabled = false;
+  if (params.force_huffman) {
+    lz77.min_symbol = std::min(PREFIX_MAX_ALPHABET_SIZE - 32, 512);
+  } else {
+    lz77.min_symbol = 224;
+  }
+  if (params.lz77_method == HistogramParams::LZ77Method::kNone) {
+    return;
+  } else if (params.lz77_method == HistogramParams::LZ77Method::kRLE) {
+    ApplyLZ77_RLE(params, num_contexts, tokens, lz77, tokens_lz77);
+  } else if (params.lz77_method == HistogramParams::LZ77Method::kLZ77) {
+    ApplyLZ77_LZ77(params, num_contexts, tokens, lz77, tokens_lz77);
+  } else if (params.lz77_method == HistogramParams::LZ77Method::kOptimal) {
+    ApplyLZ77_Optimal(params, num_contexts, tokens, lz77, tokens_lz77);
+  } else {
+    JXL_ABORT("Not implemented");
+  }
+}
+}  // namespace
+
+size_t BuildAndEncodeHistograms(const HistogramParams& params,
+                                size_t num_contexts,
+                                std::vector<std::vector<Token>>& tokens,
+                                EntropyEncodingData* codes,
+                                std::vector<uint8_t>* context_map,
+                                BitWriter* writer, size_t layer,
+                                AuxOut* aux_out) {
+  size_t total_bits = 0;
+  codes->lz77.nonserialized_distance_context = num_contexts;
+  std::vector<std::vector<Token>> tokens_lz77;
+  ApplyLZ77(params, num_contexts, tokens, codes->lz77, tokens_lz77);
+  if (ans_fuzzer_friendly_) {
+    codes->lz77.length_uint_config = HybridUintConfig(10, 0, 0);
+    codes->lz77.min_symbol = 2048;
+  }
+
+  const size_t max_contexts = std::min(num_contexts, kClustersLimit);
+  BitWriter::Allotment allotment(writer,
+                                 128 + num_contexts * 40 + max_contexts * 96);
+  if (writer) {
+    JXL_CHECK(Bundle::Write(codes->lz77, writer, layer, aux_out));
+  } else {
+    size_t ebits, bits;
+    JXL_CHECK(Bundle::CanEncode(codes->lz77, &ebits, &bits));
+    total_bits += bits;
+  }
+  if (codes->lz77.enabled) {
+    if (writer) {
+      size_t b = writer->BitsWritten();
+      EncodeUintConfig(codes->lz77.length_uint_config, writer,
+                       /*log_alpha_size=*/8);
+      total_bits += writer->BitsWritten() - b;
+    } else {
+      SizeWriter size_writer;
+      EncodeUintConfig(codes->lz77.length_uint_config, &size_writer,
+                       /*log_alpha_size=*/8);
+      total_bits += size_writer.size;
+    }
+    num_contexts += 1;
+    tokens = std::move(tokens_lz77);
+  }
+  size_t total_tokens = 0;
+  // Build histograms.
+  HistogramBuilder builder(num_contexts);
+  HybridUintConfig uint_config;  //  Default config for clustering.
+  // Unless we are using the kContextMap histogram option.
+  if (params.uint_method == HistogramParams::HybridUintMethod::kContextMap) {
+    uint_config = HybridUintConfig(2, 0, 1);
+  }
+  if (ans_fuzzer_friendly_) {
+    uint_config = HybridUintConfig(10, 0, 0);
+  }
+  for (size_t i = 0; i < tokens.size(); ++i) {
+    for (size_t j = 0; j < tokens[i].size(); ++j) {
+      const Token token = tokens[i][j];
+      total_tokens++;
+      uint32_t tok, nbits, bits;
+      (token.is_lz77_length ? codes->lz77.length_uint_config : uint_config)
+          .Encode(token.value, &tok, &nbits, &bits);
+      tok += token.is_lz77_length ? codes->lz77.min_symbol : 0;
+      builder.VisitSymbol(tok, token.context);
+    }
+  }
+
+  bool use_prefix_code =
+      params.force_huffman || total_tokens < 100 ||
+      params.clustering == HistogramParams::ClusteringType::kFastest ||
+      ans_fuzzer_friendly_;
+  if (!use_prefix_code) {
+    bool all_singleton = true;
+    for (size_t i = 0; i < num_contexts; i++) {
+      if (builder.Histo(i).ShannonEntropy() >= 1e-5) {
+        all_singleton = false;
+      }
+    }
+    if (all_singleton) {
+      use_prefix_code = true;
+    }
+  }
+
+  // Encode histograms.
+  total_bits += builder.BuildAndStoreEntropyCodes(params, tokens, codes,
+                                                  context_map, use_prefix_code,
+                                                  writer, layer, aux_out);
+  allotment.FinishedHistogram(writer);
+  ReclaimAndCharge(writer, &allotment, layer, aux_out);
+
+  if (aux_out != nullptr) {
+    aux_out->layers[layer].num_clustered_histograms +=
+        codes->encoding_info.size();
+  }
+  return total_bits;
+}
+
+size_t WriteTokens(const std::vector<Token>& tokens,
+                   const EntropyEncodingData& codes,
+                   const std::vector<uint8_t>& context_map, BitWriter* writer) {
+  size_t num_extra_bits = 0;
+  if (codes.use_prefix_code) {
+    for (size_t i = 0; i < tokens.size(); i++) {
+      uint32_t tok, nbits, bits;
+      const Token& token = tokens[i];
+      size_t histo = context_map[token.context];
+      (token.is_lz77_length ? codes.lz77.length_uint_config
+                            : codes.uint_config[histo])
+          .Encode(token.value, &tok, &nbits, &bits);
+      tok += token.is_lz77_length ? codes.lz77.min_symbol : 0;
+      // Combine two calls to the BitWriter. Equivalent to:
+      // writer->Write(codes.encoding_info[histo][tok].depth,
+      //               codes.encoding_info[histo][tok].bits);
+      // writer->Write(nbits, bits);
+      uint64_t data = codes.encoding_info[histo][tok].bits;
+      data |= bits << codes.encoding_info[histo][tok].depth;
+      writer->Write(codes.encoding_info[histo][tok].depth + nbits, data);
+      num_extra_bits += nbits;
+    }
+    return num_extra_bits;
+  }
+  std::vector<uint64_t> out;
+  std::vector<uint8_t> out_nbits;
+  out.reserve(tokens.size());
+  out_nbits.reserve(tokens.size());
+  uint64_t allbits = 0;
+  size_t numallbits = 0;
+  // Writes in *reversed* order.
+  auto addbits = [&](size_t bits, size_t nbits) {
+    JXL_DASSERT(bits >> nbits == 0);
+    if (JXL_UNLIKELY(numallbits + nbits > BitWriter::kMaxBitsPerCall)) {
+      out.push_back(allbits);
+      out_nbits.push_back(numallbits);
+      numallbits = allbits = 0;
+    }
+    allbits <<= nbits;
+    allbits |= bits;
+    numallbits += nbits;
+  };
+  const int end = tokens.size();
+  ANSCoder ans;
+  for (int i = end - 1; i >= 0; --i) {
+    const Token token = tokens[i];
+    const uint8_t histo = context_map[token.context];
+    uint32_t tok, nbits, bits;
+    (token.is_lz77_length ? codes.lz77.length_uint_config
+                          : codes.uint_config[histo])
+        .Encode(tokens[i].value, &tok, &nbits, &bits);
+    tok += token.is_lz77_length ? codes.lz77.min_symbol : 0;
+    const ANSEncSymbolInfo& info = codes.encoding_info[histo][tok];
+    // Extra bits first as this is reversed.
+    addbits(bits, nbits);
+    num_extra_bits += nbits;
+    uint8_t ans_nbits = 0;
+    uint32_t ans_bits = ans.PutSymbol(info, &ans_nbits);
+    addbits(ans_bits, ans_nbits);
+  }
+  const uint32_t state = ans.GetState();
+  writer->Write(32, state);
+  writer->Write(numallbits, allbits);
+  for (int i = out.size(); i > 0; --i) {
+    writer->Write(out_nbits[i - 1], out[i - 1]);
+  }
+  return num_extra_bits;
+}
+
+void WriteTokens(const std::vector<Token>& tokens,
+                 const EntropyEncodingData& codes,
+                 const std::vector<uint8_t>& context_map, BitWriter* writer,
+                 size_t layer, AuxOut* aux_out) {
+  BitWriter::Allotment allotment(writer, 32 * tokens.size() + 32 * 1024 * 4);
+  size_t num_extra_bits = WriteTokens(tokens, codes, context_map, writer);
+  ReclaimAndCharge(writer, &allotment, layer, aux_out);
+  if (aux_out != nullptr) {
+    aux_out->layers[layer].extra_bits += num_extra_bits;
+  }
+}
+
+void SetANSFuzzerFriendly(bool ans_fuzzer_friendly) {
+#if JXL_IS_DEBUG_BUILD  // Guard against accidental / malicious changes.
+  ans_fuzzer_friendly_ = ans_fuzzer_friendly;
+#endif
+}
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.h
new file mode 100644
index 0000000000..9614ede9c6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans.h
@@ -0,0 +1,142 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ANS_H_
+#define LIB_JXL_ENC_ANS_H_
+
+// Library to encode the ANS population counts to the bit-stream and encode
+// symbols based on the respective distributions.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_ans_params.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/huffman_table.h"
+
+namespace jxl {
+
+#define USE_MULT_BY_RECIPROCAL
+
+// precision must be equal to:  #bits(state_) + #bits(freq)
+#define RECIPROCAL_PRECISION (32 + ANS_LOG_TAB_SIZE)
+
+// Data structure representing one element of the encoding table built
+// from a distribution.
+// TODO(veluca): split this up, or use an union.
+struct ANSEncSymbolInfo {
+  // ANS
+  uint16_t freq_;
+  std::vector<uint16_t> reverse_map_;
+#ifdef USE_MULT_BY_RECIPROCAL
+  uint64_t ifreq_;
+#endif
+  // Prefix coding.
+  uint8_t depth;
+  uint16_t bits;
+};
+
+class ANSCoder {
+ public:
+  ANSCoder() : state_(ANS_SIGNATURE << 16) {}
+
+  uint32_t PutSymbol(const ANSEncSymbolInfo& t, uint8_t* nbits) {
+    uint32_t bits = 0;
+    *nbits = 0;
+    if ((state_ >> (32 - ANS_LOG_TAB_SIZE)) >= t.freq_) {
+      bits = state_ & 0xffff;
+      state_ >>= 16;
+      *nbits = 16;
+    }
+#ifdef USE_MULT_BY_RECIPROCAL
+    // We use mult-by-reciprocal trick, but that requires 64b calc.
+    const uint32_t v = (state_ * t.ifreq_) >> RECIPROCAL_PRECISION;
+    const uint32_t offset = t.reverse_map_[state_ - v * t.freq_];
+    state_ = (v << ANS_LOG_TAB_SIZE) + offset;
+#else
+    state_ = ((state_ / t.freq_) << ANS_LOG_TAB_SIZE) +
+             t.reverse_map_[state_ % t.freq_];
+#endif
+    return bits;
+  }
+
+  uint32_t GetState() const { return state_; }
+
+ private:
+  uint32_t state_;
+};
+
+// RebalanceHistogram requires a signed type.
+using ANSHistBin = int32_t;
+
+struct EntropyEncodingData {
+  std::vector<std::vector<ANSEncSymbolInfo>> encoding_info;
+  bool use_prefix_code;
+  std::vector<HybridUintConfig> uint_config;
+  LZ77Params lz77;
+};
+
+// Integer to be encoded by an entropy coder, either ANS or Huffman.
+struct Token {
+  Token(uint32_t c, uint32_t value)
+      : is_lz77_length(false), context(c), value(value) {}
+  uint32_t is_lz77_length : 1;
+  uint32_t context : 31;
+  uint32_t value;
+};
+
+// Returns an estimate of the number of bits required to encode the given
+// histogram (header bits plus data bits).
+float ANSPopulationCost(const ANSHistBin* data, size_t alphabet_size);
+
+// Apply context clustering, compute histograms and encode them. Returns an
+// estimate of the total bits used for encoding the stream. If `writer` ==
+// nullptr, the bit estimate will not take into account the context map (which
+// does not get written if `num_contexts` == 1).
+size_t BuildAndEncodeHistograms(const HistogramParams& params,
+                                size_t num_contexts,
+                                std::vector<std::vector<Token>>& tokens,
+                                EntropyEncodingData* codes,
+                                std::vector<uint8_t>* context_map,
+                                BitWriter* writer, size_t layer,
+                                AuxOut* aux_out);
+
+// Write the tokens to a string.
+void WriteTokens(const std::vector<Token>& tokens,
+                 const EntropyEncodingData& codes,
+                 const std::vector<uint8_t>& context_map, BitWriter* writer,
+                 size_t layer, AuxOut* aux_out);
+
+// Same as above, but assumes allotment created by caller.
+size_t WriteTokens(const std::vector<Token>& tokens,
+                   const EntropyEncodingData& codes,
+                   const std::vector<uint8_t>& context_map, BitWriter* writer);
+
+// Exposed for tests; to be used with Writer=BitWriter only.
+template <typename Writer>
+void EncodeUintConfigs(const std::vector<HybridUintConfig>& uint_config,
+                       Writer* writer, size_t log_alpha_size);
+extern template void EncodeUintConfigs(const std::vector<HybridUintConfig>&,
+                                       BitWriter*, size_t);
+
+// Globally set the option to create fuzzer-friendly ANS streams. Negatively
+// impacts compression. Not thread-safe.
+void SetANSFuzzerFriendly(bool ans_fuzzer_friendly);
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ANS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans_params.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans_params.h
new file mode 100644
index 0000000000..6f7cd897cc
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ans_params.h
@@ -0,0 +1,75 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ANS_PARAMS_H_
+#define LIB_JXL_ENC_ANS_PARAMS_H_
+
+// Encoder-only parameter needed for ANS entropy encoding methods.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+struct HistogramParams {
+  enum class ClusteringType {
+    kFastest,  // Only 4 clusters.
+    kFast,
+    kBest,
+  };
+
+  enum class HybridUintMethod {
+    kNone,        // just use kHybridUint420Config.
+    kFast,        // just try a couple of options.
+    kContextMap,  // fast choice for ctx map.
+    kBest,
+  };
+
+  enum class LZ77Method {
+    kNone,     // do not try lz77.
+    kRLE,      // only try doing RLE.
+    kLZ77,     // try lz77 with backward references.
+    kOptimal,  // optimal-matching LZ77 parsing.
+  };
+
+  enum class ANSHistogramStrategy {
+    kFast,         // Only try some methods, early exit.
+    kApproximate,  // Only try some methods.
+    kPrecise,      // Try all methods.
+  };
+
+  HistogramParams() = default;
+
+  HistogramParams(SpeedTier tier, size_t num_ctx) {
+    if (tier > SpeedTier::kFalcon) {
+      clustering = ClusteringType::kFastest;
+      lz77_method = LZ77Method::kNone;
+    } else if (tier > SpeedTier::kTortoise) {
+      clustering = ClusteringType::kFast;
+    } else {
+      clustering = ClusteringType::kBest;
+    }
+    if (tier > SpeedTier::kTortoise) {
+      uint_method = HybridUintMethod::kNone;
+    }
+    if (tier >= SpeedTier::kSquirrel) {
+      ans_histogram_strategy = ANSHistogramStrategy::kApproximate;
+    }
+  }
+
+  ClusteringType clustering = ClusteringType::kBest;
+  HybridUintMethod uint_method = HybridUintMethod::kBest;
+  LZ77Method lz77_method = LZ77Method::kRLE;
+  ANSHistogramStrategy ans_histogram_strategy = ANSHistogramStrategy::kPrecise;
+  std::vector<size_t> image_widths;
+  size_t max_histograms = ~0;
+  bool force_huffman = false;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ANS_PARAMS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc
new file mode 100644
index 0000000000..f43340eda4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.cc
@@ -0,0 +1,318 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_ar_control_field.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <algorithm>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_ar_control_field.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+void ProcessTile(const Image3F& opsin, PassesEncoderState* enc_state,
+                 const Rect& rect,
+                 ArControlFieldHeuristics::TempImages* temp_image) {
+  constexpr size_t N = kBlockDim;
+  ImageB* JXL_RESTRICT epf_sharpness = &enc_state->shared.epf_sharpness;
+  ImageF* JXL_RESTRICT quant = &enc_state->initial_quant_field;
+  JXL_ASSERT(
+      epf_sharpness->xsize() == enc_state->shared.frame_dim.xsize_blocks &&
+      epf_sharpness->ysize() == enc_state->shared.frame_dim.ysize_blocks);
+
+  if (enc_state->cparams.butteraugli_distance < kMinButteraugliForDynamicAR ||
+      enc_state->cparams.speed_tier > SpeedTier::kWombat ||
+      enc_state->shared.frame_header.loop_filter.epf_iters == 0) {
+    FillPlane(static_cast<uint8_t>(4), epf_sharpness, rect);
+    return;
+  }
+
+  // Likely better to have a higher X weight, like:
+  // const float kChannelWeights[3] = {47.0f, 4.35f, 0.287f};
+  const float kChannelWeights[3] = {4.35f, 4.35f, 0.287f};
+  const float kChannelWeightsLapNeg[3] = {-0.125f * kChannelWeights[0],
+                                          -0.125f * kChannelWeights[1],
+                                          -0.125f * kChannelWeights[2]};
+  const size_t sharpness_stride =
+      static_cast<size_t>(epf_sharpness->PixelsPerRow());
+
+  size_t by0 = rect.y0();
+  size_t by1 = rect.y0() + rect.ysize();
+  size_t bx0 = rect.x0();
+  size_t bx1 = rect.x0() + rect.xsize();
+  temp_image->InitOnce();
+  ImageF& laplacian_sqrsum = temp_image->laplacian_sqrsum;
+  // Calculate the L2 of the 3x3 Laplacian in an integral transform
+  // (for example 32x32 dct). This relates to transforms ability
+  // to propagate artefacts.
+  size_t y0 = by0 == 0 ? 2 : 0;
+  size_t y1 = by1 * N + 4 <= opsin.ysize() + 2 ? (by1 - by0) * N + 4
+                                               : opsin.ysize() + 2 - by0 * N;
+  size_t x0 = bx0 == 0 ? 2 : 0;
+  size_t x1 = bx1 * N + 4 <= opsin.xsize() + 2 ? (bx1 - bx0) * N + 4
+                                               : opsin.xsize() + 2 - bx0 * N;
+  HWY_FULL(float) df;
+  for (size_t y = y0; y < y1; y++) {
+    float* JXL_RESTRICT laplacian_sqrsum_row = laplacian_sqrsum.Row(y);
+    size_t cy = y + by0 * N - 2;
+    const float* JXL_RESTRICT in_row_t[3];
+    const float* JXL_RESTRICT in_row[3];
+    const float* JXL_RESTRICT in_row_b[3];
+    for (size_t c = 0; c < 3; c++) {
+      in_row_t[c] = opsin.PlaneRow(c, cy > 0 ? cy - 1 : cy);
+      in_row[c] = opsin.PlaneRow(c, cy);
+      in_row_b[c] = opsin.PlaneRow(c, cy + 1 < opsin.ysize() ? cy + 1 : cy);
+    }
+    auto compute_laplacian_scalar = [&](size_t x) {
+      size_t cx = x + bx0 * N - 2;
+      const size_t prevX = cx >= 1 ? cx - 1 : cx;
+      const size_t nextX = cx + 1 < opsin.xsize() ? cx + 1 : cx;
+      float sumsqr = 0;
+      for (size_t c = 0; c < 3; c++) {
+        float laplacian =
+            kChannelWeights[c] * in_row[c][cx] +
+            kChannelWeightsLapNeg[c] *
+                (in_row[c][prevX] + in_row[c][nextX] + in_row_b[c][prevX] +
+                 in_row_b[c][cx] + in_row_b[c][nextX] + in_row_t[c][prevX] +
+                 in_row_t[c][cx] + in_row_t[c][nextX]);
+        sumsqr += laplacian * laplacian;
+      }
+      laplacian_sqrsum_row[x] = sumsqr;
+    };
+    size_t x = x0;
+    for (; x + bx0 * N < 3; x++) {
+      compute_laplacian_scalar(x);
+    }
+    // Interior. One extra pixel of border as the last pixel is special.
+    for (; x + Lanes(df) <= x1 && x + Lanes(df) + bx0 * N - 1 <= opsin.xsize();
+         x += Lanes(df)) {
+      size_t cx = x + bx0 * N - 2;
+      auto sumsqr = Zero(df);
+      for (size_t c = 0; c < 3; c++) {
+        auto laplacian =
+            LoadU(df, in_row[c] + cx) * Set(df, kChannelWeights[c]);
+        auto sum_oth0 = LoadU(df, in_row[c] + cx - 1);
+        auto sum_oth1 = LoadU(df, in_row[c] + cx + 1);
+        auto sum_oth2 = LoadU(df, in_row_t[c] + cx - 1);
+        auto sum_oth3 = LoadU(df, in_row_t[c] + cx);
+        sum_oth0 += LoadU(df, in_row_t[c] + cx + 1);
+        sum_oth1 += LoadU(df, in_row_b[c] + cx - 1);
+        sum_oth2 += LoadU(df, in_row_b[c] + cx);
+        sum_oth3 += LoadU(df, in_row_b[c] + cx + 1);
+        sum_oth0 += sum_oth1;
+        sum_oth2 += sum_oth3;
+        sum_oth0 += sum_oth2;
+        laplacian =
+            MulAdd(Set(df, kChannelWeightsLapNeg[c]), sum_oth0, laplacian);
+        sumsqr = MulAdd(laplacian, laplacian, sumsqr);
+      }
+      StoreU(sumsqr, df, laplacian_sqrsum_row + x);
+    }
+    for (; x < x1; x++) {
+      compute_laplacian_scalar(x);
+    }
+  }
+  HWY_CAPPED(float, 4) df4;
+  // Calculate the L2 of the 3x3 Laplacian in 4x4 blocks within the area
+  // of the integral transform. Sample them within the integral transform
+  // with two offsets (0,0) and (-2, -2) pixels (sqrsum_00 and sqrsum_22,
+  //  respectively).
+  ImageF& sqrsum_00 = temp_image->sqrsum_00;
+  size_t sqrsum_00_stride = sqrsum_00.PixelsPerRow();
+  float* JXL_RESTRICT sqrsum_00_row = sqrsum_00.Row(0);
+  for (size_t y = 0; y < (by1 - by0) * 2; y++) {
+    const float* JXL_RESTRICT rows_in[4];
+    for (size_t iy = 0; iy < 4; iy++) {
+      rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy + 2);
+    }
+    float* JXL_RESTRICT row_out = sqrsum_00_row + y * sqrsum_00_stride;
+    for (size_t x = 0; x < (bx1 - bx0) * 2; x++) {
+      auto sum = Zero(df4);
+      for (size_t iy = 0; iy < 4; iy++) {
+        for (size_t ix = 0; ix < 4; ix += Lanes(df4)) {
+          sum += LoadU(df4, rows_in[iy] + x * 4 + ix + 2);
+        }
+      }
+      row_out[x] = GetLane(Sqrt(SumOfLanes(sum))) * (1.0f / 4.0f);
+    }
+  }
+  // Indexing iy and ix is a bit tricky as we include a 2 pixel border
+  // around the block for evenness calculations. This is similar to what
+  // we did in guetzli for the observability of artefacts, except there
+  // the element is a sliding 5x5, not sparsely sampled 4x4 box like here.
+  ImageF& sqrsum_22 = temp_image->sqrsum_22;
+  size_t sqrsum_22_stride = sqrsum_22.PixelsPerRow();
+  float* JXL_RESTRICT sqrsum_22_row = sqrsum_22.Row(0);
+  for (size_t y = 0; y < (by1 - by0) * 2 + 1; y++) {
+    const float* JXL_RESTRICT rows_in[4];
+    for (size_t iy = 0; iy < 4; iy++) {
+      rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy);
+    }
+    float* JXL_RESTRICT row_out = sqrsum_22_row + y * sqrsum_22_stride;
+    // ignore pixels outside the image.
+    // Y coordinates are relative to by0*8+y*4.
+    size_t sy = y * 4 + by0 * 8 > 0 ? 0 : 2;
+    size_t ey = y * 4 + by0 * 8 + 4 <= opsin.ysize() + 2
+                    ? 4
+                    : opsin.ysize() - y * 4 - by0 * 8 + 2;
+    for (size_t x = 0; x < (bx1 - bx0) * 2 + 1; x++) {
+      // ignore pixels outside the image.
+      // X coordinates are relative to bx0*8.
+      size_t sx = x * 4 + bx0 * 8 > 0 ? x * 4 : x * 4 + 2;
+      size_t ex = x * 4 + bx0 * 8 + 4 <= opsin.xsize() + 2
+                      ? x * 4 + 4
+                      : opsin.xsize() - bx0 * 8 + 2;
+      if (ex - sx == 4 && ey - sy == 4) {
+        auto sum = Zero(df4);
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 4; ix += Lanes(df4)) {
+            sum += Load(df4, rows_in[iy] + sx + ix);
+          }
+        }
+        row_out[x] = GetLane(Sqrt(SumOfLanes(sum))) * (1.0f / 4.0f);
+      } else {
+        float sum = 0;
+        for (size_t iy = sy; iy < ey; iy++) {
+          for (size_t ix = sx; ix < ex; ix++) {
+            sum += rows_in[iy][ix];
+          }
+        }
+        row_out[x] = std::sqrt(sum / ((ex - sx) * (ey - sy)));
+      }
+    }
+  }
+  for (size_t by = by0; by < by1; by++) {
+    AcStrategyRow acs_row = enc_state->shared.ac_strategy.ConstRow(by);
+    uint8_t* JXL_RESTRICT out_row = epf_sharpness->Row(by);
+    float* JXL_RESTRICT quant_row = quant->Row(by);
+    for (size_t bx = bx0; bx < bx1; bx++) {
+      AcStrategy acs = acs_row[bx];
+      if (!acs.IsFirstBlock()) continue;
+      // The errors are going to be linear to the quantization value in this
+      // locality. We only have access to the initial quant field here.
+      float quant_val = 1.0f / quant_row[bx];
+
+      const auto sq00 = [&](size_t y, size_t x) {
+        return sqrsum_00_row[((by - by0) * 2 + y) * sqrsum_00_stride +
+                             (bx - bx0) * 2 + x];
+      };
+      const auto sq22 = [&](size_t y, size_t x) {
+        return sqrsum_22_row[((by - by0) * 2 + y) * sqrsum_22_stride +
+                             (bx - bx0) * 2 + x];
+      };
+      float sqrsum_integral_transform = 0;
+      for (size_t iy = 0; iy < acs.covered_blocks_y() * 2; iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x() * 2; ix++) {
+          sqrsum_integral_transform += sq00(iy, ix) * sq00(iy, ix);
+        }
+      }
+      sqrsum_integral_transform /=
+          4 * acs.covered_blocks_x() * acs.covered_blocks_y();
+      sqrsum_integral_transform = std::sqrt(sqrsum_integral_transform);
+      // If masking is high or amplitude of the artefacts is low, then no
+      // smoothing is needed.
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          // Five 4x4 blocks for masking estimation, all within the
+          // 8x8 area.
+          float minval_1 = std::min(sq00(2 * iy + 0, 2 * ix + 0),
+                                    sq00(2 * iy + 0, 2 * ix + 1));
+          float minval_2 = std::min(sq00(2 * iy + 1, 2 * ix + 0),
+                                    sq00(2 * iy + 1, 2 * ix + 1));
+          float minval = std::min(minval_1, minval_2);
+          minval = std::min(minval, sq22(2 * iy + 1, 2 * ix + 1));
+          // Nine more 4x4 blocks for masking estimation, includes
+          // the 2 pixel area around the 8x8 block being controlled.
+          float minval2_1 = std::min(sq22(2 * iy + 0, 2 * ix + 0),
+                                     sq22(2 * iy + 0, 2 * ix + 1));
+          float minval2_2 = std::min(sq22(2 * iy + 0, 2 * ix + 2),
+                                     sq22(2 * iy + 1, 2 * ix + 0));
+          float minval2_3 = std::min(sq22(2 * iy + 1, 2 * ix + 1),
+                                     sq22(2 * iy + 1, 2 * ix + 2));
+          float minval2_4 = std::min(sq22(2 * iy + 2, 2 * ix + 0),
+                                     sq22(2 * iy + 2, 2 * ix + 1));
+          float minval2_5 = std::min(minval2_1, minval2_2);
+          float minval2_6 = std::min(minval2_3, minval2_4);
+          float minval2 = std::min(minval2_5, minval2_6);
+          minval2 = std::min(minval2, sq22(2 * iy + 2, 2 * ix + 2));
+          float minval3 = std::min(minval, minval2);
+          minval *= 0.125f;
+          minval += 0.625f * minval3;
+          minval +=
+              0.125f * std::min(1.5f * minval3, sq22(2 * iy + 1, 2 * ix + 1));
+          minval += 0.125f * minval2;
+          // Larger kBias, less smoothing for low intensity changes.
+          float kDeltaLimit = 3.2;
+          float bias = 0.0625f * quant_val;
+          float delta =
+              (sqrsum_integral_transform + (kDeltaLimit + 0.05) * bias) /
+              (minval + bias);
+          int out = 4;
+          if (delta > kDeltaLimit) {
+            out = 4;  // smooth
+          } else {
+            out = 0;
+          }
+          // 'threshold' is separate from 'bias' for easier tuning of these
+          // heuristics.
+          float threshold = 0.0625f * quant_val;
+          const float kSmoothLimit = 0.085f;
+          float smooth = 0.20f * (sq00(2 * iy + 0, 2 * ix + 0) +
+                                  sq00(2 * iy + 0, 2 * ix + 1) +
+                                  sq00(2 * iy + 1, 2 * ix + 0) +
+                                  sq00(2 * iy + 1, 2 * ix + 1) + minval);
+          if (smooth < kSmoothLimit * threshold) {
+            out = 4;
+          }
+          out_row[bx + sharpness_stride * iy + ix] = out;
+        }
+      }
+    }
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ProcessTile);
+
+void ArControlFieldHeuristics::RunRect(const Rect& block_rect,
+                                       const Image3F& opsin,
+                                       PassesEncoderState* enc_state,
+                                       size_t thread) {
+  HWY_DYNAMIC_DISPATCH(ProcessTile)
+  (opsin, enc_state, block_rect, &temp_images[thread]);
+}
+
+}  // namespace jxl
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.h
new file mode 100644
index 0000000000..ae9d399b92
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_ar_control_field.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_AR_CONTROL_FIELD_H_
+#define LIB_JXL_ENC_AR_CONTROL_FIELD_H_
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+struct ArControlFieldHeuristics {
+  struct TempImages {
+    void InitOnce() {
+      if (laplacian_sqrsum.xsize() != 0) return;
+      laplacian_sqrsum = ImageF(kEncTileDim + 4, kEncTileDim + 4);
+      sqrsum_00 = ImageF(kEncTileDim / 4, kEncTileDim / 4);
+      sqrsum_22 = ImageF(kEncTileDim / 4 + 1, kEncTileDim / 4 + 1);
+    }
+
+    ImageF laplacian_sqrsum;
+    ImageF sqrsum_00;
+    ImageF sqrsum_22;
+  };
+
+  void PrepareForThreads(size_t num_threads) {
+    temp_images.resize(num_threads);
+  }
+
+  void RunRect(const Rect& block_rect, const Image3F& opsin,
+               PassesEncoderState* enc_state, size_t thread);
+
+  std::vector<TempImages> temp_images;
+  ImageB* epf_sharpness;
+  ImageF* quant;
+  bool all_default;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_AR_ENC_CONTROL_FIELD_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc
new file mode 100644
index 0000000000..50e13f3883
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.cc
@@ -0,0 +1,379 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_bit_writer.h"
+
+#include <string.h>  // memcpy
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/dec_bit_reader.h"
+
+namespace jxl {
+
+BitWriter::Allotment::Allotment(BitWriter* JXL_RESTRICT writer, size_t max_bits)
+    : max_bits_(max_bits) {
+  if (writer == nullptr) return;
+  prev_bits_written_ = writer->BitsWritten();
+  const size_t prev_bytes = writer->storage_.size();
+  const size_t next_bytes = DivCeil(max_bits, kBitsPerByte);
+  writer->storage_.resize(prev_bytes + next_bytes);
+  parent_ = writer->current_allotment_;
+  writer->current_allotment_ = this;
+}
+
+BitWriter::Allotment::~Allotment() {
+  if (!called_) {
+    // Not calling is a bug - unused storage will not be reclaimed.
+    JXL_ABORT("Did not call Allotment::ReclaimUnused");
+  }
+}
+
+void BitWriter::Allotment::FinishedHistogram(BitWriter* JXL_RESTRICT writer) {
+  if (writer == nullptr) return;
+  JXL_ASSERT(!called_);              // Call before ReclaimUnused
+  JXL_ASSERT(histogram_bits_ == 0);  // Do not call twice
+  JXL_ASSERT(writer->BitsWritten() >= prev_bits_written_);
+  histogram_bits_ = writer->BitsWritten() - prev_bits_written_;
+}
+
+void BitWriter::Allotment::PrivateReclaim(BitWriter* JXL_RESTRICT writer,
+                                          size_t* JXL_RESTRICT used_bits,
+                                          size_t* JXL_RESTRICT unused_bits) {
+  JXL_ASSERT(!called_);  // Do not call twice
+  called_ = true;
+  if (writer == nullptr) return;
+
+  JXL_ASSERT(writer->BitsWritten() >= prev_bits_written_);
+  *used_bits = writer->BitsWritten() - prev_bits_written_;
+  JXL_ASSERT(*used_bits <= max_bits_);
+  *unused_bits = max_bits_ - *used_bits;
+
+  // Reclaim unused bytes whole bytes from writer's allotment.
+  const size_t unused_bytes = *unused_bits / kBitsPerByte;  // truncate
+  JXL_ASSERT(writer->storage_.size() >= unused_bytes);
+  writer->storage_.resize(writer->storage_.size() - unused_bytes);
+  writer->current_allotment_ = parent_;
+  // Ensure we don't also charge the parent for these bits.
+  auto parent = parent_;
+  while (parent != nullptr) {
+    parent->prev_bits_written_ += *used_bits;
+    parent = parent->parent_;
+  }
+}
+
+void BitWriter::AppendByteAligned(const Span<const uint8_t>& span) {
+  if (!span.size()) return;
+  storage_.resize(storage_.size() + span.size() + 1);  // extra zero padding
+
+  // Concatenate by copying bytes because both source and destination are bytes.
+  JXL_ASSERT(BitsWritten() % kBitsPerByte == 0);
+  size_t pos = BitsWritten() / kBitsPerByte;
+  memcpy(storage_.data() + pos, span.data(), span.size());
+  pos += span.size();
+  storage_[pos++] = 0;  // for next Write
+  JXL_ASSERT(pos <= storage_.size());
+  bits_written_ += span.size() * kBitsPerByte;
+}
+
+void BitWriter::AppendByteAligned(const BitWriter& other) {
+  JXL_ASSERT(other.BitsWritten() % kBitsPerByte == 0);
+  JXL_ASSERT(other.BitsWritten() / kBitsPerByte != 0);
+
+  AppendByteAligned(other.GetSpan());
+}
+
+void BitWriter::AppendByteAligned(const std::vector<BitWriter>& others) {
+  // Total size to add so we can preallocate
+  size_t other_bytes = 0;
+  for (const BitWriter& writer : others) {
+    JXL_ASSERT(writer.BitsWritten() % kBitsPerByte == 0);
+    other_bytes += writer.BitsWritten() / kBitsPerByte;
+  }
+  if (other_bytes == 0) {
+    // No bytes to append: this happens for example when creating per-group
+    // storage for groups, but not writing anything in them for e.g. lossless
+    // images with no alpha. Do nothing.
+    return;
+  }
+  storage_.resize(storage_.size() + other_bytes + 1);  // extra zero padding
+
+  // Concatenate by copying bytes because both source and destination are bytes.
+  JXL_ASSERT(BitsWritten() % kBitsPerByte == 0);
+  size_t pos = BitsWritten() / kBitsPerByte;
+  for (const BitWriter& writer : others) {
+    const Span<const uint8_t> span = writer.GetSpan();
+    memcpy(storage_.data() + pos, span.data(), span.size());
+    pos += span.size();
+  }
+  storage_[pos++] = 0;  // for next Write
+  JXL_ASSERT(pos <= storage_.size());
+  bits_written_ += other_bytes * kBitsPerByte;
+}
+
+// TODO(lode): avoid code duplication
+void BitWriter::AppendByteAligned(
+    const std::vector<std::unique_ptr<BitWriter>>& others) {
+  // Total size to add so we can preallocate
+  size_t other_bytes = 0;
+  for (const auto& writer : others) {
+    JXL_ASSERT(writer->BitsWritten() % kBitsPerByte == 0);
+    other_bytes += writer->BitsWritten() / kBitsPerByte;
+  }
+  if (other_bytes == 0) {
+    // No bytes to append: this happens for example when creating per-group
+    // storage for groups, but not writing anything in them for e.g. lossless
+    // images with no alpha. Do nothing.
+    return;
+  }
+  storage_.resize(storage_.size() + other_bytes + 1);  // extra zero padding
+
+  // Concatenate by copying bytes because both source and destination are bytes.
+  JXL_ASSERT(BitsWritten() % kBitsPerByte == 0);
+  size_t pos = BitsWritten() / kBitsPerByte;
+  for (const auto& writer : others) {
+    const Span<const uint8_t> span = writer->GetSpan();
+    memcpy(storage_.data() + pos, span.data(), span.size());
+    pos += span.size();
+  }
+  storage_[pos++] = 0;  // for next Write
+  JXL_ASSERT(pos <= storage_.size());
+  bits_written_ += other_bytes * kBitsPerByte;
+}
+
+BitWriter& BitWriter::operator+=(const BitWriter& other) {
+  // Required for correctness, otherwise owned[bits_written_] is out of bounds.
+  if (other.bits_written_ == 0) return *this;
+  const size_t other_bytes = DivCeil(other.bits_written_, kBitsPerByte);
+  const size_t prev_bytes = storage_.size();
+  storage_.resize(prev_bytes + other_bytes + 1);  // extra zero padding
+
+  if (bits_written_ % kBitsPerByte == 0) {
+    // Only copy fully-initialized bytes.
+    const size_t full_bytes = other.bits_written_ / kBitsPerByte;  // truncated
+    memcpy(&storage_[bits_written_ / kBitsPerByte], other.storage_.data(),
+           full_bytes);
+    storage_[bits_written_ / kBitsPerByte + full_bytes] = 0;  // for next Write
+    bits_written_ += full_bytes * kBitsPerByte;
+
+    const size_t leftovers = other.bits_written_ % kBitsPerByte;
+    if (leftovers != 0) {
+      BitReader reader(Span<const uint8_t>(other.storage_.data() + full_bytes,
+                                           other_bytes - full_bytes));
+      Write(leftovers, reader.ReadBits(leftovers));
+      JXL_CHECK(reader.Close());
+    }
+    return *this;
+  }
+
+  constexpr size_t N = kMaxBitsPerCall < BitReader::kMaxBitsPerCall
+                           ? kMaxBitsPerCall
+                           : BitReader::kMaxBitsPerCall;
+
+  // Do not use GetSpan because other may not be byte-aligned.
+  BitReader reader(other.storage_);
+  size_t i = 0;
+  for (; i + N <= other.bits_written_; i += N) {
+    Write(N, reader.ReadFixedBits<N>());
+  }
+  const size_t leftovers = other.bits_written_ - i;
+  if (leftovers != 0) {
+    Write(leftovers, reader.ReadBits(leftovers));
+  }
+  JXL_CHECK(reader.Close());
+  return *this;
+}
+
+#ifndef DISABLE_ACC_BIT_WRITER
+void BitWriter::init(size_t cnt){
+  cur_part = 0;
+  nbits_streams.resize(cnt);
+  bits_streams.resize(cnt);
+}
+
+void BitWriter::update_part(size_t cnt){
+  cur_part=cnt;
+}
+#endif
+// Example: let's assume that 3 bits (Rs below) have been written already:
+// BYTE+0       BYTE+1       BYTE+2
+// 0000 0RRR    ???? ????    ???? ????
+//
+// Now, we could write up to 5 bits by just shifting them left by 3 bits and
+// OR'ing to BYTE-0.
+//
+// For n > 5 bits, we write the lowest 5 bits as above, then write the next
+// lowest bits into BYTE+1 starting from its lower bits and so on.
+#ifndef DISABLE_ACC_BIT_WRITER
+void BitWriter::Write(size_t n_bits, uint64_t bits) {
+  JXL_DASSERT((bits >> n_bits) == 0);
+  JXL_DASSERT(n_bits <= kMaxBitsPerCall);
+
+  nbits_streams[cur_part].push(n_bits);
+  bits_streams[cur_part].push(bits);
+/*
+  uint8_t* p = &storage_[bits_written_ / kBitsPerByte];
+  const size_t bits_in_first_byte = bits_written_ % kBitsPerByte;
+  bits <<= bits_in_first_byte;
+#if JXL_BYTE_ORDER_LITTLE
+  uint64_t v = *p;
+  // Last (partial) or next byte to write must be zero-initialized!
+  // PaddedBytes initializes the first, and Write/Append maintain this.
+  JXL_DASSERT(v >> bits_in_first_byte == 0);
+  v |= bits;
+  memcpy(p, &v, sizeof(v));  // Write bytes: possibly more than n_bits/8
+#else
+  *p++ |= static_cast<uint8_t>(bits & 0xFF);
+  for (size_t bits_left_to_write = n_bits + bits_in_first_byte;
+       bits_left_to_write >= 9; bits_left_to_write -= 8) {
+    bits >>= 8;
+    *p++ = static_cast<uint8_t>(bits & 0xFF);
+  }
+  *p = 0;
+#endif*/
+  bits_written_ += n_bits;
+}
+#else
+void BitWriter::Write(size_t n_bits, uint64_t bits) {
+  JXL_DASSERT((bits >> n_bits) == 0);
+  JXL_DASSERT(n_bits <= kMaxBitsPerCall);
+  uint8_t* p = &storage_[bits_written_ / kBitsPerByte];
+  const size_t bits_in_first_byte = bits_written_ % kBitsPerByte;
+  bits <<= bits_in_first_byte;
+#if JXL_BYTE_ORDER_LITTLE
+  uint64_t v = *p;
+  // Last (partial) or next byte to write must be zero-initialized!
+  // PaddedBytes initializes the first, and Write/Append maintain this.
+  JXL_DASSERT(v >> bits_in_first_byte == 0);
+  v |= bits;
+  memcpy(p, &v, sizeof(v));  // Write bytes: possibly more than n_bits/8
+#else
+  *p++ |= static_cast<uint8_t>(bits & 0xFF);
+  for (size_t bits_left_to_write = n_bits + bits_in_first_byte;
+       bits_left_to_write >= 9; bits_left_to_write -= 8) {
+    bits >>= 8;
+    *p++ = static_cast<uint8_t>(bits & 0xFF);
+  }
+  *p = 0;
+#endif
+  bits_written_ += n_bits;
+}
+#endif
+
+#ifndef DISABLE_ACC_BIT_WRITER
+void BitWriter::Finalize(std::vector<int> seq){
+  int cnt=0;
+  storage_.resize(bits_written_);
+    size_t bits_written=old_bits_written_;
+    for(size_t i=0;i<seq.size();i++){
+  while (!nbits_streams[seq[i]].empty()&&!bits_streams[seq[i]].empty()){
+    size_t n_bits = nbits_streams[seq[i]].front();
+    uint64_t bits = bits_streams[seq[i]].front();
+//    std::cout<<"cnt: "<<cnt<<" n_bits: "<<n_bits<<" bits: "<<bits<<std::endl;
+    nbits_streams[seq[i]].pop();
+    bits_streams[seq[i]].pop();
+
+      JXL_DASSERT((bits >> n_bits) == 0);
+  JXL_DASSERT(n_bits <= kMaxBitsPerCall);
+
+  uint8_t* p = &storage_[bits_written / kBitsPerByte];
+  const size_t bits_in_first_byte = bits_written % kBitsPerByte;
+  bits <<= bits_in_first_byte;
+#if JXL_BYTE_ORDER_LITTLE
+  uint64_t v = *p;
+  // Last (partial) or next byte to write must be zero-initialized!
+  // PaddedBytes initializes the first, and Write/Append maintain this.
+  JXL_DASSERT(v >> bits_in_first_byte == 0);
+  v |= bits;
+  memcpy(p, &v, sizeof(v));  // Write bytes: possibly more than n_bits/8
+#else
+  *p++ |= static_cast<uint8_t>(bits & 0xFF);
+  for (size_t bits_left_to_write = n_bits + bits_in_first_byte;
+       bits_left_to_write >= 9; bits_left_to_write -= 8) {
+    bits >>= 8;
+    *p++ = static_cast<uint8_t>(bits & 0xFF);
+  }
+  *p = 0;
+#endif
+      bits_written += n_bits;
+      cnt++;
+  }
+    }
+
+  JXL_DASSERT(bits_written==bits_written_);
+  old_bits_written_=bits_written_;
+}
+
+void BitWriter::Finalize(){
+  int cnt=0;
+  storage_.resize(bits_written_);
+    size_t bits_written=old_bits_written_;
+    for(size_t i=0;i<nbits_streams.size();i++){
+  while (!nbits_streams[i].empty()&&!bits_streams[i].empty()){
+    size_t n_bits = nbits_streams[i].front();
+    uint64_t bits = bits_streams[i].front();
+//    std::cout<<"cnt: "<<cnt<<" n_bits: "<<n_bits<<" bits: "<<bits<<std::endl;
+    nbits_streams[i].pop();
+    bits_streams[i].pop();
+
+      JXL_DASSERT((bits >> n_bits) == 0);
+  JXL_DASSERT(n_bits <= kMaxBitsPerCall);
+
+  uint8_t* p = &storage_[bits_written / kBitsPerByte];
+  const size_t bits_in_first_byte = bits_written % kBitsPerByte;
+  bits <<= bits_in_first_byte;
+#if JXL_BYTE_ORDER_LITTLE
+  uint64_t v = *p;
+  // Last (partial) or next byte to write must be zero-initialized!
+  // PaddedBytes initializes the first, and Write/Append maintain this.
+  JXL_DASSERT(v >> bits_in_first_byte == 0);
+  v |= bits;
+  memcpy(p, &v, sizeof(v));  // Write bytes: possibly more than n_bits/8
+#else
+  *p++ |= static_cast<uint8_t>(bits & 0xFF);
+  for (size_t bits_left_to_write = n_bits + bits_in_first_byte;
+       bits_left_to_write >= 9; bits_left_to_write -= 8) {
+    bits >>= 8;
+    *p++ = static_cast<uint8_t>(bits & 0xFF);
+  }
+  *p = 0;
+#endif
+      bits_written += n_bits;
+      cnt++;
+  }
+    }
+
+  JXL_DASSERT(bits_written==bits_written_);
+  old_bits_written_=bits_written_;
+}
+#endif
+BitWriter& BitWriter::operator+=(const PaddedBytes& other) {
+  const size_t other_bytes = other.size();
+  // Required for correctness, otherwise owned[bits_written_] is out of bounds.
+  if (other_bytes == 0) return *this;
+  const size_t other_bits = other_bytes * kBitsPerByte;
+
+  storage_.resize(storage_.size() + other_bytes + 1);
+  if (bits_written_ % kBitsPerByte == 0) {
+    memcpy(&storage_[bits_written_ / kBitsPerByte], other.data(), other_bytes);
+    storage_[bits_written_ / kBitsPerByte + other_bytes] = 0;  // for next Write
+    bits_written_ += other_bits;
+    return *this;
+  }
+  constexpr size_t N = kMaxBitsPerCall < BitReader::kMaxBitsPerCall
+                           ? kMaxBitsPerCall
+                           : BitReader::kMaxBitsPerCall;
+
+  BitReader reader(other);
+  size_t i = 0;
+  for (; i + N <= other_bits; i += N) {
+    Write(N, reader.ReadFixedBits<N>());
+  }
+  const size_t leftovers = other_bits - i;
+  Write(leftovers, reader.ReadBits(leftovers));
+  JXL_CHECK(reader.Close());
+  return *this;
+}
+
+}  // namespace jxl
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.h
new file mode 100644
index 0000000000..750a12b88e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_bit_writer.h
@@ -0,0 +1,182 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_BIT_WRITER_H_
+#define LIB_JXL_ENC_BIT_WRITER_H_
+
+// BitWriter class: unbuffered writes using unaligned 64-bit stores.
+#include "hls_stream.h"
+#include "ap_int.h"
+#include <queue>
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+
+#include "xlnx_cfg.h"
+
+namespace jxl {
+
+struct BitWriter {
+  // Upper bound on `n_bits` in each call to Write. We shift a 64-bit word by
+  // 7 bits (max already valid bits in the last byte) and at least 1 bit is
+  // needed to zero-initialize the bit-stream ahead (i.e. if 7 bits are valid
+  // and we write 57 bits, then the next write will access a byte that was not
+  // yet zero-initialized).
+  static constexpr size_t kMaxBitsPerCall = 56;
+
+#ifdef DISABLE_ACC_BIT_WRITER
+  BitWriter() : bits_written_(0) {}
+#else
+  size_t cur_part;
+  std::vector<std::queue<uint64_t> > bits_streams;
+  std::vector<std::queue<size_t> > nbits_streams;
+
+  BitWriter() : bits_written_(0) {
+    cur_part = 0;
+    old_bits_written_=0;
+    bits_streams.resize(1);
+    nbits_streams.resize(1);
+  }
+#endif
+
+  // Disallow copying - may lead to bugs.
+  BitWriter(const BitWriter&) = delete;
+  BitWriter& operator=(const BitWriter&) = delete;
+  BitWriter(BitWriter&&) = default;
+  BitWriter& operator=(BitWriter&&) = default;
+
+#ifdef DISABLE_ACC_BIT_WRITER 
+  explicit BitWriter(PaddedBytes&& donor)
+      : bits_written_(donor.size() * kBitsPerByte),
+        storage_(std::move(donor)) {}
+#else
+  explicit BitWriter(PaddedBytes&& donor)
+      : bits_written_(donor.size() * kBitsPerByte),
+        storage_(std::move(donor)) {
+          JXL_DASSERT(bits_written_==old_bits_written_);
+          old_bits_written_=donor.size()*kBitsPerByte;
+        }
+#endif
+
+  size_t BitsWritten() const { return bits_written_; }
+
+  Span<const uint8_t> GetSpan() const {
+    // Callers must ensure byte alignment to avoid uninitialized bits.
+    JXL_ASSERT(bits_written_ % kBitsPerByte == 0);
+    return Span<const uint8_t>(storage_.data(), bits_written_ / kBitsPerByte);
+  }
+
+  // Example usage: bytes = std::move(writer).TakeBytes(); Useful for the
+  // top-level encoder which returns PaddedBytes, not a BitWriter.
+  // *this must be an rvalue reference and is invalid afterwards.
+  PaddedBytes&& TakeBytes() && {
+    // Callers must ensure byte alignment to avoid uninitialized bits.
+    JXL_ASSERT(bits_written_ % kBitsPerByte == 0);
+    storage_.resize(bits_written_ / kBitsPerByte);
+    return std::move(storage_);
+  }
+
+  // Must be byte-aligned before calling.
+  void AppendByteAligned(const Span<const uint8_t>& span);
+  // NOTE: no allotment needed, the other BitWriters have already been charged.
+  void AppendByteAligned(const BitWriter& other);
+  void AppendByteAligned(const std::vector<std::unique_ptr<BitWriter>>& others);
+  void AppendByteAligned(const std::vector<BitWriter>& others);
+
+  class Allotment {
+   public:
+    // Expands a BitWriter's storage. Must happen before calling Write or
+    // ZeroPadToByte. Must call ReclaimUnused after writing to reclaim the
+    // unused storage so that BitWriter memory use remains tightly bounded.
+    Allotment(BitWriter* JXL_RESTRICT writer, size_t max_bits);
+    ~Allotment();
+
+    size_t MaxBits() const { return max_bits_; }
+
+    // Call after writing a histogram, but before ReclaimUnused.
+    void FinishedHistogram(BitWriter* JXL_RESTRICT writer);
+
+    size_t HistogramBits() const {
+      JXL_ASSERT(called_);
+      return histogram_bits_;
+    }
+
+    // Do not call directly - use ::ReclaimAndCharge instead, which ensures
+    // the bits are charged to a layer.
+    void PrivateReclaim(BitWriter* JXL_RESTRICT writer,
+                        size_t* JXL_RESTRICT used_bits,
+                        size_t* JXL_RESTRICT unused_bits);
+
+   private:
+    size_t prev_bits_written_;
+    const size_t max_bits_;
+    size_t histogram_bits_ = 0;
+    bool called_ = false;
+    Allotment* parent_;
+  };
+
+  // WARNING: think twice before using this. Concatenating two BitWriters that
+  // pad to bytes is NOT the same as one contiguous BitWriter.
+  BitWriter& operator+=(const BitWriter& other);
+
+  // TODO(janwas): remove once all callers use BitWriter
+  BitWriter& operator+=(const PaddedBytes& other);
+
+  // Writes bits into bytes in increasing addresses, and within a byte
+  // least-significant-bit first.
+  //
+  // The function can write up to 56 bits in one go.
+#ifdef DISABLE_ACC_BIT_WRITER
+  void Write(size_t n_bits, uint64_t bits);
+#else
+  void init(size_t cnt);
+  void update_part(size_t cnt);
+  void Write(size_t n_bits, uint64_t bits);
+  void Finalize(std::vector<int> seq);
+    void Finalize();
+#endif
+
+  // This should only rarely be used - e.g. when the current location will be
+  // referenced via byte offset (TOCs point to groups), or byte-aligned reading
+  // is required for speed. WARNING: this interacts badly with operator+=,
+  // see above.
+  void ZeroPadToByte() {
+    const size_t remainder_bits =
+        RoundUpBitsToByteMultiple(bits_written_) - bits_written_;
+    if (remainder_bits == 0) return;
+    Write(remainder_bits, 0);
+    JXL_ASSERT(bits_written_ % kBitsPerByte == 0);
+  }
+
+  // TODO(janwas): remove? only called from ANS
+  void RewindStorage(const size_t pos0) {
+    JXL_ASSERT(pos0 <= bits_written_);
+    bits_written_ = pos0;
+    static const uint8_t kRewindMasks[8] = {0x0, 0x1,  0x3,  0x7,
+                                            0xf, 0x1f, 0x3f, 0x7f};
+    storage_[pos0 >> 3] &= kRewindMasks[pos0 & 7];
+  }
+
+ private:
+  size_t bits_written_;
+  #ifndef DISABLE_ACC_BIT_WRITER 
+    size_t old_bits_written_;
+  #endif
+  PaddedBytes storage_;
+  Allotment* current_allotment_ = nullptr;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_BIT_WRITER_H_i
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc
new file mode 100644
index 0000000000..e253509466
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.cc
@@ -0,0 +1,93 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_butteraugli_comparator.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/color_management.h"
+
+namespace jxl {
+
+JxlButteraugliComparator::JxlButteraugliComparator(
+    const ButteraugliParams& params)
+    : params_(params) {}
+
+Status JxlButteraugliComparator::SetReferenceImage(const ImageBundle& ref) {
+  const ImageBundle* ref_linear_srgb;
+  ImageMetadata metadata = *ref.metadata();
+  ImageBundle store(&metadata);
+  if (!TransformIfNeeded(ref, ColorEncoding::LinearSRGB(ref.IsGray()),
+                         /*pool=*/nullptr, &store, &ref_linear_srgb)) {
+    return false;
+  }
+
+  comparator_.reset(
+      new ButteraugliComparator(ref_linear_srgb->color(), params_));
+  xsize_ = ref.xsize();
+  ysize_ = ref.ysize();
+  return true;
+}
+
+Status JxlButteraugliComparator::CompareWith(const ImageBundle& actual,
+                                             ImageF* diffmap, float* score) {
+  if (!comparator_) {
+    return JXL_FAILURE("Must set reference image first");
+  }
+  if (xsize_ != actual.xsize() || ysize_ != actual.ysize()) {
+    return JXL_FAILURE("Images must have same size");
+  }
+
+  const ImageBundle* actual_linear_srgb;
+  ImageMetadata metadata = *actual.metadata();
+  ImageBundle store(&metadata);
+  if (!TransformIfNeeded(actual, ColorEncoding::LinearSRGB(actual.IsGray()),
+                         /*pool=*/nullptr, &store, &actual_linear_srgb)) {
+    return false;
+  }
+
+  ImageF temp_diffmap(xsize_, ysize_);
+  comparator_->Diffmap(actual_linear_srgb->color(), temp_diffmap);
+
+  if (score != nullptr) {
+    *score = ButteraugliScoreFromDiffmap(temp_diffmap, &params_);
+  }
+  if (diffmap != nullptr) {
+    diffmap->Swap(temp_diffmap);
+  }
+
+  return true;
+}
+
+float JxlButteraugliComparator::GoodQualityScore() const {
+  return ButteraugliFuzzyInverse(1.5);
+}
+
+float JxlButteraugliComparator::BadQualityScore() const {
+  return ButteraugliFuzzyInverse(0.5);
+}
+
+float ButteraugliDistance(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                          const ButteraugliParams& params, ImageF* distmap,
+                          ThreadPool* pool) {
+  JxlButteraugliComparator comparator(params);
+  return ComputeScore(rgb0, rgb1, &comparator, distmap, pool);
+}
+
+float ButteraugliDistance(const CodecInOut& rgb0, const CodecInOut& rgb1,
+                          const ButteraugliParams& params, ImageF* distmap,
+                          ThreadPool* pool) {
+  JxlButteraugliComparator comparator(params);
+  JXL_ASSERT(rgb0.frames.size() == rgb1.frames.size());
+  float max_dist = 0.0f;
+  for (size_t i = 0; i < rgb0.frames.size(); ++i) {
+    max_dist = std::max(max_dist, ComputeScore(rgb0.frames[i], rgb1.frames[i],
+                                               &comparator, distmap, pool));
+  }
+  return max_dist;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.h
new file mode 100644
index 0000000000..48a1d8950e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_comparator.h
@@ -0,0 +1,56 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_
+#define LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_
+
+#include <stddef.h>
+
+#include <memory>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_comparator.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+class JxlButteraugliComparator : public Comparator {
+ public:
+  explicit JxlButteraugliComparator(const ButteraugliParams& params);
+
+  Status SetReferenceImage(const ImageBundle& ref) override;
+
+  Status CompareWith(const ImageBundle& actual, ImageF* diffmap,
+                     float* score) override;
+
+  float GoodQualityScore() const override;
+  float BadQualityScore() const override;
+
+ private:
+  ButteraugliParams params_;
+  std::unique_ptr<ButteraugliComparator> comparator_;
+  size_t xsize_ = 0;
+  size_t ysize_ = 0;
+};
+
+// Returns the butteraugli distance between rgb0 and rgb1.
+// If distmap is not null, it must be the same size as rgb0 and rgb1.
+float ButteraugliDistance(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                          const ButteraugliParams& params,
+                          ImageF* distmap = nullptr,
+                          ThreadPool* pool = nullptr);
+
+float ButteraugliDistance(const CodecInOut& rgb0, const CodecInOut& rgb1,
+                          const ButteraugliParams& params,
+                          ImageF* distmap = nullptr,
+                          ThreadPool* pool = nullptr);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_pnorm.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_pnorm.cc
new file mode 100644
index 0000000000..7c3fb9c287
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_pnorm.cc
@@ -0,0 +1,212 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_butteraugli_pnorm.h"
+
+#include <math.h>
+#include <stdlib.h>
+
+#include <atomic>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_butteraugli_pnorm.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+
+double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params,
+                        double p) {
+  PROFILER_FUNC;
+  // In approximate-border mode, skip pixels on the border likely to be affected
+  // by FastGauss' zero-valued-boundary behavior. The border is less than half
+  // the largest-diameter kernel (37x37 pixels), and 0 if the image is tiny.
+  // NOTE: chosen such that it is vector-aligned.
+  size_t border = (params.approximate_border) ? 8 : 0;
+  if (distmap.xsize() <= 2 * border || distmap.ysize() <= 2 * border) {
+    border = 0;
+  }
+
+  const double onePerPixels = 1.0 / (distmap.ysize() * distmap.xsize());
+  if (std::abs(p - 3.0) < 1E-6) {
+    double sum1[3] = {0.0};
+
+// Prefer double if possible, but otherwise use float rather than scalar.
+#if HWY_CAP_FLOAT64
+    using T = double;
+    const Rebind<float, HWY_FULL(double)> df;
+#else
+    using T = float;
+#endif
+    const HWY_FULL(T) d;
+    constexpr size_t N = MaxLanes(HWY_FULL(T)());
+    // Manually aligned storage to avoid asan crash on clang-7 due to
+    // unaligned spill.
+    HWY_ALIGN T sum_totals0[N] = {0};
+    HWY_ALIGN T sum_totals1[N] = {0};
+    HWY_ALIGN T sum_totals2[N] = {0};
+
+    for (size_t y = border; y < distmap.ysize() - border; ++y) {
+      const float* JXL_RESTRICT row = distmap.ConstRow(y);
+
+      auto sums0 = Zero(d);
+      auto sums1 = Zero(d);
+      auto sums2 = Zero(d);
+
+      size_t x = border;
+      for (; x + Lanes(d) <= distmap.xsize() - border; x += Lanes(d)) {
+#if HWY_CAP_FLOAT64
+        const auto d1 = PromoteTo(d, Load(df, row + x));
+#else
+        const auto d1 = Load(d, row + x);
+#endif
+        const auto d2 = d1 * d1 * d1;
+        sums0 += d2;
+        const auto d3 = d2 * d2;
+        sums1 += d3;
+        const auto d4 = d3 * d3;
+        sums2 += d4;
+      }
+
+      Store(sums0 + Load(d, sum_totals0), d, sum_totals0);
+      Store(sums1 + Load(d, sum_totals1), d, sum_totals1);
+      Store(sums2 + Load(d, sum_totals2), d, sum_totals2);
+
+      for (; x < distmap.xsize() - border; ++x) {
+        const double d1 = row[x];
+        double d2 = d1 * d1 * d1;
+        sum1[0] += d2;
+        d2 *= d2;
+        sum1[1] += d2;
+        d2 *= d2;
+        sum1[2] += d2;
+      }
+    }
+    double v = 0;
+    v += pow(
+        onePerPixels * (sum1[0] + GetLane(SumOfLanes(Load(d, sum_totals0)))),
+        1.0 / (p * 1.0));
+    v += pow(
+        onePerPixels * (sum1[1] + GetLane(SumOfLanes(Load(d, sum_totals1)))),
+        1.0 / (p * 2.0));
+    v += pow(
+        onePerPixels * (sum1[2] + GetLane(SumOfLanes(Load(d, sum_totals2)))),
+        1.0 / (p * 4.0));
+    v /= 3.0;
+    return v;
+  } else {
+    static std::atomic<int> once{0};
+    if (once.fetch_add(1, std::memory_order_relaxed) == 0) {
+      JXL_WARNING("WARNING: using slow ComputeDistanceP");
+    }
+    double sum1[3] = {0.0};
+    for (size_t y = border; y < distmap.ysize() - border; ++y) {
+      const float* JXL_RESTRICT row = distmap.ConstRow(y);
+      for (size_t x = border; x < distmap.xsize() - border; ++x) {
+        double d2 = std::pow(row[x], p);
+        sum1[0] += d2;
+        d2 *= d2;
+        sum1[1] += d2;
+        d2 *= d2;
+        sum1[2] += d2;
+      }
+    }
+    double v = 0;
+    for (int i = 0; i < 3; ++i) {
+      v += pow(onePerPixels * (sum1[i]), 1.0 / (p * (1 << i)));
+    }
+    v /= 3.0;
+    return v;
+  }
+}
+
+// TODO(lode): take alpha into account when needed
+double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2) {
+  PROFILER_FUNC;
+  // Convert to sRGB - closer to perception than linear.
+  const Image3F* srgb1 = &ib1.color();
+  Image3F copy1;
+  if (!ib1.IsSRGB()) {
+    JXL_CHECK(ib1.CopyTo(Rect(ib1), ColorEncoding::SRGB(ib1.IsGray()), &copy1));
+    srgb1 = &copy1;
+  }
+  const Image3F* srgb2 = &ib2.color();
+  Image3F copy2;
+  if (!ib2.IsSRGB()) {
+    JXL_CHECK(ib2.CopyTo(Rect(ib2), ColorEncoding::SRGB(ib2.IsGray()), &copy2));
+    srgb2 = &copy2;
+  }
+
+  JXL_CHECK(SameSize(*srgb1, *srgb2));
+
+  // TODO(veluca): SIMD.
+  float yuvmatrix[3][3] = {{0.299, 0.587, 0.114},
+                           {-0.14713, -0.28886, 0.436},
+                           {0.615, -0.51499, -0.10001}};
+  double sum_of_squares[3] = {};
+  for (size_t y = 0; y < srgb1->ysize(); ++y) {
+    const float* JXL_RESTRICT row1[3];
+    const float* JXL_RESTRICT row2[3];
+    for (size_t j = 0; j < 3; j++) {
+      row1[j] = srgb1->ConstPlaneRow(j, y);
+      row2[j] = srgb2->ConstPlaneRow(j, y);
+    }
+    for (size_t x = 0; x < srgb1->xsize(); ++x) {
+      float cdiff[3] = {};
+      // YUV conversion is linear, so we can run it on the difference.
+      for (size_t j = 0; j < 3; j++) {
+        cdiff[j] = row1[j][x] - row2[j][x];
+      }
+      float yuvdiff[3] = {};
+      for (size_t j = 0; j < 3; j++) {
+        for (size_t k = 0; k < 3; k++) {
+          yuvdiff[j] += yuvmatrix[j][k] * cdiff[k];
+        }
+      }
+      for (size_t j = 0; j < 3; j++) {
+        sum_of_squares[j] += yuvdiff[j] * yuvdiff[j];
+      }
+    }
+  }
+  // Weighted PSNR as in JPEG-XL: chroma counts 1/8.
+  const float weights[3] = {6.0f / 8, 1.0f / 8, 1.0f / 8};
+  // Avoid squaring the weight - 1/64 is too extreme.
+  double norm = 0;
+  for (size_t i = 0; i < 3; i++) {
+    norm += std::sqrt(sum_of_squares[i]) * weights[i];
+  }
+  // This function returns distance *squared*.
+  return norm * norm;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ComputeDistanceP);
+double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params,
+                        double p) {
+  return HWY_DYNAMIC_DISPATCH(ComputeDistanceP)(distmap, params, p);
+}
+
+HWY_EXPORT(ComputeDistance2);
+double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2) {
+  return HWY_DYNAMIC_DISPATCH(ComputeDistance2)(ib1, ib2);
+}
+
+}  // namespace jxl
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_pnorm.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_pnorm.h
new file mode 100644
index 0000000000..5579c0adee
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_butteraugli_pnorm.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_BUTTERAUGLI_PNORM_H_
+#define LIB_JXL_ENC_BUTTERAUGLI_PNORM_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Computes p-norm given the butteraugli distmap.
+double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params,
+                        double p);
+
+double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_BUTTERAUGLI_PNORM_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.cc
new file mode 100644
index 0000000000..038a706d02
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.cc
@@ -0,0 +1,198 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_cache.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <type_traits>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+void InitializePassesEncoder(const Image3F& opsin, ThreadPool* pool,
+                             PassesEncoderState* enc_state,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             AuxOut* aux_out) {
+  PROFILER_FUNC;
+
+  PassesSharedState& JXL_RESTRICT shared = enc_state->shared;
+
+  enc_state->histogram_idx.resize(shared.frame_dim.num_groups);
+
+  enc_state->x_qm_multiplier =
+      std::pow(1.25f, shared.frame_header.x_qm_scale - 2.0f);
+  enc_state->b_qm_multiplier =
+      std::pow(1.25f, shared.frame_header.b_qm_scale - 2.0f);
+
+  if (enc_state->coeffs.size() < shared.frame_header.passes.num_passes) {
+    enc_state->coeffs.reserve(shared.frame_header.passes.num_passes);
+    for (size_t i = enc_state->coeffs.size();
+         i < shared.frame_header.passes.num_passes; i++) {
+      // Allocate enough coefficients for each group on every row.
+      enc_state->coeffs.emplace_back(make_unique<ACImageT<int32_t>>(
+          kGroupDim * kGroupDim, shared.frame_dim.num_groups));
+    }
+  }
+  while (enc_state->coeffs.size() > shared.frame_header.passes.num_passes) {
+    enc_state->coeffs.pop_back();
+  }
+
+  Image3F dc(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+  RunOnPool(
+      pool, 0, shared.frame_dim.num_groups, ThreadPool::SkipInit(),
+      [&](size_t group_idx, size_t _) {
+        ComputeCoefficients(group_idx, enc_state, opsin, &dc);
+      },
+      "Compute coeffs");
+
+  if (shared.frame_header.flags & FrameHeader::kUseDcFrame) {
+    CompressParams cparams = enc_state->cparams;
+    // Guess a distance that produces good initial results.
+    cparams.butteraugli_distance =
+        std::max(kMinButteraugliDistance,
+                 enc_state->cparams.butteraugli_distance * 0.1f);
+    cparams.dots = Override::kOff;
+    cparams.noise = Override::kOff;
+    cparams.patches = Override::kOff;
+    cparams.gaborish = Override::kOff;
+    cparams.epf = 0;
+    cparams.max_error_mode = true;
+    cparams.resampling = 1;
+    cparams.ec_resampling = 1;
+    for (size_t c = 0; c < 3; c++) {
+      cparams.max_error[c] = shared.quantizer.MulDC()[c];
+    }
+    JXL_ASSERT(cparams.progressive_dc > 0);
+    cparams.progressive_dc--;
+    // The DC frame will have alpha=0. Don't erase its contents.
+    cparams.keep_invisible = Override::kOn;
+    // No EPF or Gaborish in DC frames.
+    cparams.epf = 0;
+    cparams.gaborish = Override::kOff;
+    // Use kVarDCT in max_error_mode for intermediate progressive DC,
+    // and kModular for the smallest DC (first in the bitstream)
+    if (cparams.progressive_dc == 0) {
+      cparams.modular_mode = true;
+      cparams.quality_pair.first = cparams.quality_pair.second =
+          99.f - enc_state->cparams.butteraugli_distance * 0.2f;
+    }
+    ImageBundle ib(&shared.metadata->m);
+    // This is a lie - dc is in XYB
+    // (but EncodeFrame will skip RGB->XYB conversion anyway)
+    ib.SetFromImage(
+        std::move(dc),
+        ColorEncoding::LinearSRGB(shared.metadata->m.color_encoding.IsGray()));
+    if (!ib.metadata()->extra_channel_info.empty()) {
+      // Add dummy extra channels to the patch image: dc_level frames do not yet
+      // support extra channels, but the codec expects that the amount of extra
+      // channels in frames matches that in the metadata of the codestream.
+      std::vector<ImageF> extra_channels;
+      extra_channels.reserve(ib.metadata()->extra_channel_info.size());
+      for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) {
+        extra_channels.emplace_back(ib.xsize(), ib.ysize());
+        // Must initialize the image with data to not affect blending with
+        // uninitialized memory.
+        // TODO(lode): dc_level must copy and use the real extra channels
+        // instead.
+        ZeroFillImage(&extra_channels.back());
+      }
+      ib.SetExtraChannels(std::move(extra_channels));
+    }
+    std::unique_ptr<PassesEncoderState> state =
+        jxl::make_unique<PassesEncoderState>();
+
+    auto special_frame = std::unique_ptr<BitWriter>(new BitWriter());
+    FrameInfo dc_frame_info;
+    dc_frame_info.frame_type = FrameType::kDCFrame;
+    dc_frame_info.dc_level = shared.frame_header.dc_level + 1;
+    dc_frame_info.ib_needs_color_transform = false;
+    dc_frame_info.save_before_color_transform = true;  // Implicitly true
+    // TODO(lode): the EncodeFrame / DecodeFrame pair here is likely broken in
+    // case of dc_level >= 3, since EncodeFrame may output multiple frames
+    // to the bitwriter, while DecodeFrame reads only one.
+    JXL_CHECK(EncodeFrame(cparams, dc_frame_info, shared.metadata, ib,
+                          state.get(), pool, special_frame.get(), nullptr));
+    const Span<const uint8_t> encoded = special_frame->GetSpan();
+    enc_state->special_frames.emplace_back(std::move(special_frame));
+
+    BitReader br(encoded);
+    ImageBundle decoded(&shared.metadata->m);
+    std::unique_ptr<PassesDecoderState> dec_state =
+        jxl::make_unique<PassesDecoderState>();
+    JXL_CHECK(dec_state->output_encoding_info.Set(
+        *shared.metadata,
+        ColorEncoding::LinearSRGB(shared.metadata->m.color_encoding.IsGray())));
+    JXL_CHECK(DecodeFrame({}, dec_state.get(), pool, &br, &decoded,
+                          *shared.metadata, /*constraints=*/nullptr));
+    // TODO(lode): shared.frame_header.dc_level should be equal to
+    // dec_state.shared->frame_header.dc_level - 1 here, since above we set
+    // dc_frame_info.dc_level = shared.frame_header.dc_level + 1, and
+    // dc_frame_info.dc_level is used by EncodeFrame. However, if EncodeFrame
+    // outputs multiple frames, this assumption could be wrong.
+    shared.dc_storage =
+        CopyImage(dec_state->shared->dc_frames[shared.frame_header.dc_level]);
+    ZeroFillImage(&shared.quant_dc);
+    shared.dc = &shared.dc_storage;
+    JXL_CHECK(br.Close());
+  } else {
+    auto compute_dc_coeffs = [&](int group_index, int /* thread */) {
+      modular_frame_encoder->AddVarDCTDC(
+          dc, group_index,
+          enc_state->cparams.butteraugli_distance >= 2.0f &&
+              enc_state->cparams.speed_tier < SpeedTier::kFalcon,
+          enc_state);
+    };
+    RunOnPool(pool, 0, shared.frame_dim.num_dc_groups, ThreadPool::SkipInit(),
+              compute_dc_coeffs, "Compute DC coeffs");
+    // TODO(veluca): this is only useful in tests and if inspection is enabled.
+    if (!(shared.frame_header.flags & FrameHeader::kSkipAdaptiveDCSmoothing)) {
+      AdaptiveDCSmoothing(shared.quantizer.MulDC(), &shared.dc_storage, pool);
+    }
+  }
+  auto compute_ac_meta = [&](int group_index, int /* thread */) {
+    modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/false,
+                                         enc_state);
+  };
+  RunOnPool(pool, 0, shared.frame_dim.num_dc_groups, ThreadPool::SkipInit(),
+            compute_ac_meta, "Compute AC Metadata");
+
+  if (aux_out != nullptr) {
+    aux_out->InspectImage3F("compressed_image:InitializeFrameEncCache:dc_dec",
+                            shared.dc_storage);
+  }
+}
+
+void EncCache::InitOnce() {
+  PROFILER_FUNC;
+
+  if (num_nzeroes.xsize() == 0) {
+    num_nzeroes = Image3I(kGroupDimInBlocks, kGroupDimInBlocks);
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.h
new file mode 100644
index 0000000000..4c78893d75
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cache.h
@@ -0,0 +1,116 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CACHE_H_
+#define LIB_JXL_ENC_CACHE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_heuristics.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/progressive_split.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+// Contains encoder state.
+struct PassesEncoderState {
+  PassesSharedState shared;
+
+  ImageF initial_quant_field;    // Invalid in Falcon mode.
+  ImageF initial_quant_masking;  // Invalid in Falcon mode.
+
+  // Per-pass DCT coefficients for the image. One row per group.
+  std::vector<std::unique_ptr<ACImage>> coeffs;
+
+  // Raw data for special (reference+DC) frames.
+  std::vector<std::unique_ptr<BitWriter>> special_frames;
+
+  // For splitting into passes.
+  ProgressiveSplitter progressive_splitter;
+
+  CompressParams cparams;
+
+  struct PassData {
+    std::vector<std::vector<Token>> ac_tokens;
+    std::vector<uint8_t> context_map;
+    EntropyEncodingData codes;
+  };
+
+  std::vector<PassData> passes;
+  std::vector<uint8_t> histogram_idx;
+
+  // Coefficient orders that are non-default.
+  std::vector<uint32_t> used_orders;
+
+  // Multiplier to be applied to the quant matrices of the x channel.
+  float x_qm_multiplier = 1.0f;
+  float b_qm_multiplier = 1.0f;
+
+  // Heuristics to be used by the encoder.
+  std::unique_ptr<EncoderHeuristics> heuristics =
+      make_unique<DefaultEncoderHeuristics>();
+};
+
+// Initialize per-frame information.
+class ModularFrameEncoder;
+
+// XLNX_MODIFY
+/*void InitializePassesEncoder(const Image3F& opsin, ThreadPool* pool,
+                             PassesEncoderState* passes_enc_state,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             AuxOut* aux_out);*/
+
+void InitializePassesEncoder(const Image3F& opsin, ThreadPool* pool,
+                             PassesEncoderState* passes_enc_state,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             AuxOut* aux_out,                                                      
+                             //==========hls interface========
+                         size_t xsize, size_t ysize,
+                         std::vector<std::vector<float>>& dctIDT,
+                         std::vector<std::vector<float>>& dct2x2,
+                         std::vector<std::vector<float>>& dct4x4,
+                         std::vector<std::vector<float>>& dct8x8,
+                         std::vector<std::vector<float>>& dct16x16,
+                         std::vector<std::vector<float>>& dct32x32,
+
+                         std::vector<std::vector<float>>& dcIDT,
+                         std::vector<std::vector<float>>& dc2x2,
+                         std::vector<std::vector<float>>& dc4x4,
+                         std::vector<std::vector<float>>& dc8x8,
+                         std::vector<std::vector<float>>& dc16x16,
+                         std::vector<std::vector<float>>& dc32x32
+                         //================================
+                         );
+
+// Working area for ComputeCoefficients (per-group!)
+struct EncCache {
+  // Allocates memory when first called, shrinks images to current group size.
+  void InitOnce();
+
+  // TokenizeCoefficients
+  Image3I num_nzeroes;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_CACHE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.cc
new file mode 100644
index 0000000000..e5c3f38991
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.cc
@@ -0,0 +1,375 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_chroma_from_luma.h"
+
+#include <float.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_chroma_from_luma.cc"
+#include <hwy/aligned_allocator.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/quantizer.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+static HWY_FULL(float) df;
+
+struct CFLFunction {
+  static constexpr float kCoeff = 1.f / 3;
+  static constexpr float kThres = 100.0f;
+  static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor;
+  CFLFunction(const float* values_m, const float* values_s, size_t num,
+              float base, float distance_mul)
+      : values_m(values_m),
+        values_s(values_s),
+        num(num),
+        base(base),
+        distance_mul(distance_mul) {}
+
+  // Returns f'(x), where f is 1/3 * sum ((|color residual| + 1)^2-1) +
+  // distance_mul * x^2 * num.
+  float Compute(float x, float eps, float* fpeps, float* fmeps) const {
+    float first_derivative = 2 * distance_mul * num * x;
+    float first_derivative_peps = 2 * distance_mul * num * (x + eps);
+    float first_derivative_meps = 2 * distance_mul * num * (x - eps);
+
+    const auto inv_color_factor = Set(df, kInvColorFactor);
+    const auto thres = Set(df, kThres);
+    const auto coeffx2 = Set(df, kCoeff * 2.0f);
+    const auto one = Set(df, 1.0f);
+    const auto zero = Set(df, 0.0f);
+    const auto base_v = Set(df, base);
+    const auto x_v = Set(df, x);
+    const auto xpe_v = Set(df, x + eps);
+    const auto xme_v = Set(df, x - eps);
+    auto fd_v = Zero(df);
+    auto fdpe_v = Zero(df);
+    auto fdme_v = Zero(df);
+    JXL_ASSERT(num % Lanes(df) == 0);
+
+    for (size_t i = 0; i < num; i += Lanes(df)) {
+      // color residual = ax + b
+      const auto a = inv_color_factor * Load(df, values_m + i);
+      const auto b = base_v * Load(df, values_m + i) - Load(df, values_s + i);
+      const auto v = a * x_v + b;
+      const auto vpe = a * xpe_v + b;
+      const auto vme = a * xme_v + b;
+      const auto av = Abs(v);
+      const auto avpe = Abs(vpe);
+      const auto avme = Abs(vme);
+      auto d = coeffx2 * (av + one) * a;
+      auto dpe = coeffx2 * (avpe + one) * a;
+      auto dme = coeffx2 * (avme + one) * a;
+      d = IfThenElse(v < zero, zero - d, d);
+      dpe = IfThenElse(vpe < zero, zero - dpe, dpe);
+      dme = IfThenElse(vme < zero, zero - dme, dme);
+      fd_v += IfThenElse(av >= thres, zero, d);
+      fdpe_v += IfThenElse(av >= thres, zero, dpe);
+      fdme_v += IfThenElse(av >= thres, zero, dme);
+    }
+
+    *fpeps = first_derivative_peps + GetLane(SumOfLanes(fdpe_v));
+    *fmeps = first_derivative_meps + GetLane(SumOfLanes(fdme_v));
+    return first_derivative + GetLane(SumOfLanes(fd_v));
+  }
+
+  const float* JXL_RESTRICT values_m;
+  const float* JXL_RESTRICT values_s;
+  size_t num;
+  float base;
+  float distance_mul;
+};
+
+int32_t FindBestMultiplier(const float* values_m, const float* values_s,
+                           size_t num, float base, float distance_mul,
+                           bool fast) {
+  if (num == 0) {
+    return 0;
+  }
+  float x;
+  if (fast) {
+    static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor;
+    auto ca = Zero(df);
+    auto cb = Zero(df);
+    const auto inv_color_factor = Set(df, kInvColorFactor);
+    const auto base_v = Set(df, base);
+    for (size_t i = 0; i < num; i += Lanes(df)) {
+      // color residual = ax + b
+      const auto a = inv_color_factor * Load(df, values_m + i);
+      const auto b = base_v * Load(df, values_m + i) - Load(df, values_s + i);
+      ca = MulAdd(a, a, ca);
+      cb = MulAdd(a, b, cb);
+    }
+    // + distance_mul * x^2 * num
+    x = -GetLane(SumOfLanes(cb)) /
+        (GetLane(SumOfLanes(ca)) + num * distance_mul * 0.5f);
+  } else {
+    constexpr float eps = 1;
+    constexpr float kClamp = 20.0f;
+    CFLFunction fn(values_m, values_s, num, base, distance_mul);
+    x = 0;
+    // Up to 20 Newton iterations, with approximate derivatives.
+    // Derivatives are approximate due to the high amount of noise in the exact
+    // derivatives.
+    for (size_t i = 0; i < 20; i++) {
+      float dfpeps, dfmeps;
+      float df = fn.Compute(x, eps, &dfpeps, &dfmeps);
+      float ddf = (dfpeps - dfmeps) / (2 * eps);
+      float step = df / ddf;
+      x -= std::min(kClamp, std::max(-kClamp, step));
+      if (std::abs(step) < 3e-3) break;
+    }
+  }
+  return std::max(-128.0f, std::min(127.0f, roundf(x)));
+}
+
+void InitDCStorage(size_t num_blocks, ImageF* dc_values) {
+  // First row: Y channel
+  // Second row: X channel
+  // Third row: Y channel
+  // Fourth row: B channel
+  *dc_values = ImageF(RoundUpTo(num_blocks, Lanes(df)), 4);
+
+  JXL_ASSERT(dc_values->xsize() != 0);
+  // Zero-fill the last lanes
+  for (size_t y = 0; y < 4; y++) {
+    for (size_t x = dc_values->xsize() - Lanes(df); x < dc_values->xsize();
+         x++) {
+      dc_values->Row(y)[x] = 0;
+    }
+  }
+}
+
+void ComputeDC(const ImageF& dc_values, bool fast, int* dc_x, int* dc_b) {
+  constexpr float kDistanceMultiplierDC = 1e-5f;
+  const float* JXL_RESTRICT dc_values_yx = dc_values.Row(0);
+  const float* JXL_RESTRICT dc_values_x = dc_values.Row(1);
+  const float* JXL_RESTRICT dc_values_yb = dc_values.Row(2);
+  const float* JXL_RESTRICT dc_values_b = dc_values.Row(3);
+  *dc_x = FindBestMultiplier(dc_values_yx, dc_values_x, dc_values.xsize(), 0.0f,
+                             kDistanceMultiplierDC, fast);
+  *dc_b = FindBestMultiplier(dc_values_yb, dc_values_b, dc_values.xsize(),
+                             kYToBRatio, kDistanceMultiplierDC, fast);
+}
+
+void ComputeTile(const Image3F& opsin, const DequantMatrices& dequant,
+                 const AcStrategyImage* ac_strategy, const Quantizer* quantizer,
+                 const Rect& r, bool fast, bool use_dct8, ImageSB* map_x,
+                 ImageSB* map_b, ImageF* dc_values, float* mem) {
+  static_assert(kEncTileDimInBlocks == kColorTileDimInBlocks,
+                "Invalid color tile dim");
+  size_t xsize_blocks = opsin.xsize() / kBlockDim;
+  constexpr float kDistanceMultiplierAC = 1e-3f;
+
+  const size_t y0 = r.y0();
+  const size_t x0 = r.x0();
+  const size_t x1 = r.x0() + r.xsize();
+  const size_t y1 = r.y0() + r.ysize();
+
+  int ty = y0 / kColorTileDimInBlocks;
+  int tx = x0 / kColorTileDimInBlocks;
+
+  int8_t* JXL_RESTRICT row_out_x = map_x->Row(ty);
+  int8_t* JXL_RESTRICT row_out_b = map_b->Row(ty);
+
+  float* JXL_RESTRICT dc_values_yx = dc_values->Row(0);
+  float* JXL_RESTRICT dc_values_x = dc_values->Row(1);
+  float* JXL_RESTRICT dc_values_yb = dc_values->Row(2);
+  float* JXL_RESTRICT dc_values_b = dc_values->Row(3);
+
+  // All are aligned.
+  float* HWY_RESTRICT block_y = mem;
+  float* HWY_RESTRICT block_x = block_y + AcStrategy::kMaxCoeffArea;
+  float* HWY_RESTRICT block_b = block_x + AcStrategy::kMaxCoeffArea;
+  float* HWY_RESTRICT coeffs_yx = block_b + AcStrategy::kMaxCoeffArea;
+  float* HWY_RESTRICT coeffs_x = coeffs_yx + kColorTileDim * kColorTileDim;
+  float* HWY_RESTRICT coeffs_yb = coeffs_x + kColorTileDim * kColorTileDim;
+  float* HWY_RESTRICT coeffs_b = coeffs_yb + kColorTileDim * kColorTileDim;
+  float* HWY_RESTRICT scratch_space = coeffs_b + kColorTileDim * kColorTileDim;
+  JXL_DASSERT(scratch_space + 2 * AcStrategy::kMaxCoeffArea ==
+              block_y + CfLHeuristics::kItemsPerThread);
+
+  // Small (~256 bytes each)
+  HWY_ALIGN_MAX float
+      dc_y[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+  HWY_ALIGN_MAX float
+      dc_x[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+  HWY_ALIGN_MAX float
+      dc_b[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+  size_t num_ac = 0;
+
+  for (size_t y = y0; y < y1; ++y) {
+    const float* JXL_RESTRICT row_y = opsin.ConstPlaneRow(1, y * kBlockDim);
+    const float* JXL_RESTRICT row_x = opsin.ConstPlaneRow(0, y * kBlockDim);
+    const float* JXL_RESTRICT row_b = opsin.ConstPlaneRow(2, y * kBlockDim);
+    size_t stride = opsin.PixelsPerRow();
+
+    for (size_t x = x0; x < x1; x++) {
+      AcStrategy acs = use_dct8
+                           ? AcStrategy::FromRawStrategy(AcStrategy::Type::DCT)
+                           : ac_strategy->ConstRow(y)[x];
+      if (!acs.IsFirstBlock()) continue;
+      size_t xs = acs.covered_blocks_x();
+      TransformFromPixels(acs.Strategy(), row_y + x * kBlockDim, stride,
+                          block_y, scratch_space);
+      DCFromLowestFrequencies(acs.Strategy(), block_y, dc_y, xs);
+      TransformFromPixels(acs.Strategy(), row_x + x * kBlockDim, stride,
+                          block_x, scratch_space);
+      DCFromLowestFrequencies(acs.Strategy(), block_x, dc_x, xs);
+      TransformFromPixels(acs.Strategy(), row_b + x * kBlockDim, stride,
+                          block_b, scratch_space);
+      DCFromLowestFrequencies(acs.Strategy(), block_b, dc_b, xs);
+      const float* const JXL_RESTRICT qm_x =
+          dequant.InvMatrix(acs.Strategy(), 0);
+      const float* const JXL_RESTRICT qm_b =
+          dequant.InvMatrix(acs.Strategy(), 2);
+      // Why does a constant seem to work better than
+      // raw_quant_field->Row(y)[x] ?
+      float q = use_dct8 ? 1 : quantizer->Scale() * 400.0f;
+      float q_dc_x = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(0);
+      float q_dc_b = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(2);
+
+      // Copy DCs in dc_values.
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < xs; ix++) {
+          dc_values_yx[(iy + y) * xsize_blocks + ix + x] =
+              dc_y[iy * xs + ix] * q_dc_x;
+          dc_values_x[(iy + y) * xsize_blocks + ix + x] =
+              dc_x[iy * xs + ix] * q_dc_x;
+          dc_values_yb[(iy + y) * xsize_blocks + ix + x] =
+              dc_y[iy * xs + ix] * q_dc_b;
+          dc_values_b[(iy + y) * xsize_blocks + ix + x] =
+              dc_b[iy * xs + ix] * q_dc_b;
+        }
+      }
+
+      // Do not use this block for computing AC CfL.
+      if (acs.covered_blocks_x() + x0 > x1 ||
+          acs.covered_blocks_y() + y0 > y1) {
+        continue;
+      }
+
+      // Copy AC coefficients in the local block. The order in which
+      // coefficients get stored does not matter.
+      size_t cx = acs.covered_blocks_x();
+      size_t cy = acs.covered_blocks_y();
+      CoefficientLayout(&cy, &cx);
+      // Zero out LFs. This introduces terms in the optimization loop that
+      // don't affect the result, as they are all 0, but allow for simpler
+      // SIMDfication.
+      for (size_t iy = 0; iy < cy; iy++) {
+        for (size_t ix = 0; ix < cx; ix++) {
+          block_y[cx * kBlockDim * iy + ix] = 0;
+          block_x[cx * kBlockDim * iy + ix] = 0;
+          block_b[cx * kBlockDim * iy + ix] = 0;
+        }
+      }
+      const auto qv = Set(df, q);
+      for (size_t i = 0; i < cx * cy * 64; i += Lanes(df)) {
+        const auto b_y = Load(df, block_y + i);
+        const auto b_x = Load(df, block_x + i);
+        const auto b_b = Load(df, block_b + i);
+        const auto qqm_x = qv * Load(df, qm_x + i);
+        const auto qqm_b = qv * Load(df, qm_b + i);
+        Store(b_y * qqm_x, df, coeffs_yx + num_ac);
+        Store(b_x * qqm_x, df, coeffs_x + num_ac);
+        Store(b_y * qqm_b, df, coeffs_yb + num_ac);
+        Store(b_b * qqm_b, df, coeffs_b + num_ac);
+        num_ac += Lanes(df);
+      }
+    }
+  }
+  JXL_CHECK(num_ac % Lanes(df) == 0);
+  row_out_x[tx] = FindBestMultiplier(coeffs_yx, coeffs_x, num_ac, 0.0f,
+                                     kDistanceMultiplierAC, fast);
+  row_out_b[tx] = FindBestMultiplier(coeffs_yb, coeffs_b, num_ac, kYToBRatio,
+                                     kDistanceMultiplierAC, fast);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(InitDCStorage);
+HWY_EXPORT(ComputeDC);
+HWY_EXPORT(ComputeTile);
+
+void CfLHeuristics::Init(const Image3F& opsin) {
+  size_t xsize_blocks = opsin.xsize() / kBlockDim;
+  size_t ysize_blocks = opsin.ysize() / kBlockDim;
+  HWY_DYNAMIC_DISPATCH(InitDCStorage)
+  (xsize_blocks * ysize_blocks, &dc_values);
+}
+
+void CfLHeuristics::ComputeTile(const Rect& r, const Image3F& opsin,
+                                const DequantMatrices& dequant,
+                                const AcStrategyImage* ac_strategy,
+                                const Quantizer* quantizer, bool fast,
+                                size_t thread, ColorCorrelationMap* cmap) {
+  bool use_dct8 = ac_strategy == nullptr;
+  HWY_DYNAMIC_DISPATCH(ComputeTile)
+  (opsin, dequant, ac_strategy, quantizer, r, fast, use_dct8, &cmap->ytox_map,
+   &cmap->ytob_map, &dc_values, mem.get() + thread * kItemsPerThread);
+}
+
+void CfLHeuristics::ComputeDC(bool fast, ColorCorrelationMap* cmap) {
+  int32_t ytob_dc = 0;
+  int32_t ytox_dc = 0;
+  HWY_DYNAMIC_DISPATCH(ComputeDC)(dc_values, fast, &ytox_dc, &ytob_dc);
+  cmap->SetYToBDC(ytob_dc);
+  cmap->SetYToXDC(ytox_dc);
+}
+
+void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer,
+                                 size_t layer, AuxOut* aux_out) {
+  float color_factor = map->GetColorFactor();
+  float base_correlation_x = map->GetBaseCorrelationX();
+  float base_correlation_b = map->GetBaseCorrelationB();
+  int32_t ytox_dc = map->GetYToXDC();
+  int32_t ytob_dc = map->GetYToBDC();
+
+  BitWriter::Allotment allotment(writer, 1 + 2 * kBitsPerByte + 12 + 32);
+  if (ytox_dc == 0 && ytob_dc == 0 && color_factor == kDefaultColorFactor &&
+      base_correlation_x == 0.0f && base_correlation_b == kYToBRatio) {
+    writer->Write(1, 1);
+    ReclaimAndCharge(writer, &allotment, layer, aux_out);
+    return;
+  }
+  writer->Write(1, 0);
+  JXL_CHECK(U32Coder::Write(kColorFactorDist, color_factor, writer));
+  JXL_CHECK(F16Coder::Write(base_correlation_x, writer));
+  JXL_CHECK(F16Coder::Write(base_correlation_b, writer));
+  writer->Write(kBitsPerByte, ytox_dc - std::numeric_limits<int8_t>::min());
+  writer->Write(kBitsPerByte, ytob_dc - std::numeric_limits<int8_t>::min());
+  ReclaimAndCharge(writer, &allotment, layer, aux_out);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.h
new file mode 100644
index 0000000000..a097774030
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_chroma_from_luma.h
@@ -0,0 +1,67 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
+#define LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
+
+// Chroma-from-luma, computed using heuristics to determine the best linear
+// model for the X and B channels from the Y channel.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer,
+                                 size_t layer, AuxOut* aux_out);
+
+struct CfLHeuristics {
+  void Init(const Image3F& opsin);
+
+  void PrepareForThreads(size_t num_threads) {
+    mem = hwy::AllocateAligned<float>(num_threads * kItemsPerThread);
+  }
+
+  void ComputeTile(const Rect& r, const Image3F& opsin,
+                   const DequantMatrices& dequant,
+                   const AcStrategyImage* ac_strategy,
+                   const Quantizer* quantizer, bool fast, size_t thread,
+                   ColorCorrelationMap* cmap);
+
+  void ComputeDC(bool fast, ColorCorrelationMap* cmap);
+
+  ImageF dc_values;
+  hwy::AlignedFreeUniquePtr<float[]> mem;
+
+  // Working set is too large for stack; allocate dynamically.
+  constexpr static size_t kItemsPerThread =
+      AcStrategy::kMaxCoeffArea * 3        // Blocks
+      + kColorTileDim * kColorTileDim * 4  // AC coeff storage
+      + AcStrategy::kMaxCoeffArea * 2;     // Scratch space
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc
new file mode 100644
index 0000000000..1f12a29881
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.cc
@@ -0,0 +1,310 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_cluster.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <map>
+#include <memory>
+#include <numeric>
+#include <queue>
+#include <tuple>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_cluster.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/fast_math-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+template <class V>
+V Entropy(V count, V inv_total, V total) {
+  const HWY_CAPPED(float, Histogram::kRounding) d;
+  const auto zero = Set(d, 0.0f);
+  return IfThenZeroElse(count == total,
+                        zero - count * FastLog2f(d, inv_total * count));
+}
+
+void HistogramEntropy(const Histogram& a) {
+  a.entropy_ = 0.0f;
+  if (a.total_count_ == 0) return;
+
+  const HWY_CAPPED(float, Histogram::kRounding) df;
+  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
+
+  const auto inv_tot = Set(df, 1.0f / a.total_count_);
+  auto entropy_lanes = Zero(df);
+  auto total = Set(df, a.total_count_);
+
+  for (size_t i = 0; i < a.data_.size(); i += Lanes(di)) {
+    const auto counts = LoadU(di, &a.data_[i]);
+    entropy_lanes += Entropy(ConvertTo(df, counts), inv_tot, total);
+  }
+  a.entropy_ += GetLane(SumOfLanes(entropy_lanes));
+}
+
+float HistogramDistance(const Histogram& a, const Histogram& b) {
+  if (a.total_count_ == 0 || b.total_count_ == 0) return 0;
+
+  const HWY_CAPPED(float, Histogram::kRounding) df;
+  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
+
+  const auto inv_tot = Set(df, 1.0f / (a.total_count_ + b.total_count_));
+  auto distance_lanes = Zero(df);
+  auto total = Set(df, a.total_count_ + b.total_count_);
+
+  for (size_t i = 0; i < std::max(a.data_.size(), b.data_.size());
+       i += Lanes(di)) {
+    const auto a_counts =
+        a.data_.size() > i ? LoadU(di, &a.data_[i]) : Zero(di);
+    const auto b_counts =
+        b.data_.size() > i ? LoadU(di, &b.data_[i]) : Zero(di);
+    const auto counts = ConvertTo(df, a_counts + b_counts);
+    distance_lanes += Entropy(counts, inv_tot, total);
+  }
+  const float total_distance = GetLane(SumOfLanes(distance_lanes));
+  return total_distance - a.entropy_ - b.entropy_;
+}
+
+// First step of a k-means clustering with a fancy distance metric.
+void FastClusterHistograms(const std::vector<Histogram>& in,
+                           const size_t num_contexts_in, size_t max_histograms,
+                           float min_distance, std::vector<Histogram>* out,
+                           std::vector<uint32_t>* histogram_symbols) {
+  PROFILER_FUNC;
+  size_t largest_idx = 0;
+  std::vector<uint32_t> nonempty_histograms;
+  nonempty_histograms.reserve(in.size());
+  for (size_t i = 0; i < num_contexts_in; i++) {
+    if (in[i].total_count_ == 0) continue;
+    HistogramEntropy(in[i]);
+    if (in[i].total_count_ > in[largest_idx].total_count_) {
+      largest_idx = i;
+    }
+    nonempty_histograms.push_back(i);
+  }
+  // No symbols.
+  if (nonempty_histograms.empty()) {
+    out->resize(1);
+    histogram_symbols->clear();
+    histogram_symbols->resize(in.size(), 0);
+    return;
+  }
+  largest_idx = std::find(nonempty_histograms.begin(),
+                          nonempty_histograms.end(), largest_idx) -
+                nonempty_histograms.begin();
+  size_t num_contexts = nonempty_histograms.size();
+  out->clear();
+  out->reserve(max_histograms);
+  std::vector<float> dists(num_contexts, std::numeric_limits<float>::max());
+  histogram_symbols->clear();
+  histogram_symbols->resize(in.size(), max_histograms);
+
+  while (out->size() < max_histograms && out->size() < num_contexts) {
+    (*histogram_symbols)[nonempty_histograms[largest_idx]] = out->size();
+    out->push_back(in[nonempty_histograms[largest_idx]]);
+    largest_idx = 0;
+    for (size_t i = 0; i < num_contexts; i++) {
+      dists[i] = std::min(
+          HistogramDistance(in[nonempty_histograms[i]], out->back()), dists[i]);
+      // Avoid repeating histograms
+      if ((*histogram_symbols)[nonempty_histograms[i]] != max_histograms) {
+        continue;
+      }
+      if (dists[i] > dists[largest_idx]) largest_idx = i;
+    }
+    if (dists[largest_idx] < min_distance) break;
+  }
+
+  for (size_t i = 0; i < num_contexts_in; i++) {
+    if ((*histogram_symbols)[i] != max_histograms) continue;
+    if (in[i].total_count_ == 0) {
+      (*histogram_symbols)[i] = 0;
+      continue;
+    }
+    size_t best = 0;
+    float best_dist = HistogramDistance(in[i], (*out)[best]);
+    for (size_t j = 1; j < out->size(); j++) {
+      float dist = HistogramDistance(in[i], (*out)[j]);
+      if (dist < best_dist) {
+        best = j;
+        best_dist = dist;
+      }
+    }
+    (*out)[best].AddHistogram(in[i]);
+    HistogramEntropy((*out)[best]);
+    (*histogram_symbols)[i] = best;
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(FastClusterHistograms);  // Local function
+HWY_EXPORT(HistogramEntropy);       // Local function
+
+float Histogram::ShannonEntropy() const {
+  HWY_DYNAMIC_DISPATCH(HistogramEntropy)(*this);
+  return entropy_;
+}
+
+namespace {
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// Reorder histograms in *out so that the new symbols in *symbols come in
+// increasing order.
+void HistogramReindex(std::vector<Histogram>* out,
+                      std::vector<uint32_t>* symbols) {
+  std::vector<Histogram> tmp(*out);
+  std::map<int, int> new_index;
+  int next_index = 0;
+  for (uint32_t symbol : *symbols) {
+    if (new_index.find(symbol) == new_index.end()) {
+      new_index[symbol] = next_index;
+      (*out)[next_index] = tmp[symbol];
+      ++next_index;
+    }
+  }
+  out->resize(next_index);
+  for (uint32_t& symbol : *symbols) {
+    symbol = new_index[symbol];
+  }
+}
+
+}  // namespace
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+void ClusterHistograms(const HistogramParams params,
+                       const std::vector<Histogram>& in,
+                       const size_t num_contexts, size_t max_histograms,
+                       std::vector<Histogram>* out,
+                       std::vector<uint32_t>* histogram_symbols) {
+  constexpr float kMinDistanceForDistinctFast = 64.0f;
+  constexpr float kMinDistanceForDistinctBest = 16.0f;
+  max_histograms = std::min(max_histograms, params.max_histograms);
+  if (params.clustering == HistogramParams::ClusteringType::kFastest) {
+    HWY_DYNAMIC_DISPATCH(FastClusterHistograms)
+    (in, num_contexts, 4, kMinDistanceForDistinctFast, out, histogram_symbols);
+  } else if (params.clustering == HistogramParams::ClusteringType::kFast) {
+    HWY_DYNAMIC_DISPATCH(FastClusterHistograms)
+    (in, num_contexts, max_histograms, kMinDistanceForDistinctFast, out,
+     histogram_symbols);
+  } else {
+    PROFILER_FUNC;
+    HWY_DYNAMIC_DISPATCH(FastClusterHistograms)
+    (in, num_contexts, max_histograms, kMinDistanceForDistinctBest, out,
+     histogram_symbols);
+    for (size_t i = 0; i < out->size(); i++) {
+      (*out)[i].entropy_ =
+          ANSPopulationCost((*out)[i].data_.data(), (*out)[i].data_.size());
+    }
+    uint32_t next_version = 2;
+    std::vector<uint32_t> version(out->size(), 1);
+    std::vector<uint32_t> renumbering(out->size());
+    std::iota(renumbering.begin(), renumbering.end(), 0);
+
+    // Try to pair up clusters if doing so reduces the total cost.
+
+    struct HistogramPair {
+      // validity of a pair: p.version == max(version[i], version[j])
+      float cost;
+      uint32_t first;
+      uint32_t second;
+      uint32_t version;
+      // We use > because priority queues sort in *decreasing* order, but we
+      // want lower cost elements to appear first.
+      bool operator<(const HistogramPair& other) const {
+        return std::make_tuple(cost, first, second, version) >
+               std::make_tuple(other.cost, other.first, other.second,
+                               other.version);
+      }
+    };
+
+    // Create list of all pairs by increasing merging cost.
+    std::priority_queue<HistogramPair> pairs_to_merge;
+    for (uint32_t i = 0; i < out->size(); i++) {
+      for (uint32_t j = i + 1; j < out->size(); j++) {
+        Histogram histo;
+        histo.AddHistogram((*out)[i]);
+        histo.AddHistogram((*out)[j]);
+        float cost = ANSPopulationCost(histo.data_.data(), histo.data_.size()) -
+                     (*out)[i].entropy_ - (*out)[j].entropy_;
+        // Avoid enqueueing pairs that are not advantageous to merge.
+        if (cost >= 0) continue;
+        pairs_to_merge.push(
+            HistogramPair{cost, i, j, std::max(version[i], version[j])});
+      }
+    }
+
+    // Merge the best pair to merge, add new pairs that get formed as a
+    // consequence.
+    while (!pairs_to_merge.empty()) {
+      uint32_t first = pairs_to_merge.top().first;
+      uint32_t second = pairs_to_merge.top().second;
+      uint32_t ver = pairs_to_merge.top().version;
+      pairs_to_merge.pop();
+      if (ver != std::max(version[first], version[second]) ||
+          version[first] == 0 || version[second] == 0) {
+        continue;
+      }
+      (*out)[first].AddHistogram((*out)[second]);
+      (*out)[first].entropy_ = ANSPopulationCost((*out)[first].data_.data(),
+                                                 (*out)[first].data_.size());
+      for (size_t i = 0; i < renumbering.size(); i++) {
+        if (renumbering[i] == second) {
+          renumbering[i] = first;
+        }
+      }
+      version[second] = 0;
+      version[first] = next_version++;
+      for (uint32_t j = 0; j < out->size(); j++) {
+        if (j == first) continue;
+        if (version[j] == 0) continue;
+        Histogram histo;
+        histo.AddHistogram((*out)[first]);
+        histo.AddHistogram((*out)[j]);
+        float cost = ANSPopulationCost(histo.data_.data(), histo.data_.size()) -
+                     (*out)[first].entropy_ - (*out)[j].entropy_;
+        // Avoid enqueueing pairs that are not advantageous to merge.
+        if (cost >= 0) continue;
+        pairs_to_merge.push(
+            HistogramPair{cost, std::min(first, j), std::max(first, j),
+                          std::max(version[first], version[j])});
+      }
+    }
+    std::vector<uint32_t> reverse_renumbering(out->size(), -1);
+    size_t num_alive = 0;
+    for (size_t i = 0; i < out->size(); i++) {
+      if (version[i] == 0) continue;
+      (*out)[num_alive++] = (*out)[i];
+      reverse_renumbering[i] = num_alive - 1;
+    }
+    out->resize(num_alive);
+    for (size_t i = 0; i < histogram_symbols->size(); i++) {
+      (*histogram_symbols)[i] =
+          reverse_renumbering[renumbering[(*histogram_symbols)[i]]];
+    }
+  }
+
+  // Convert the context map to a canonical form.
+  HistogramReindex(out, histogram_symbols);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.h
new file mode 100644
index 0000000000..622a567950
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_cluster.h
@@ -0,0 +1,61 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for clustering similar histograms together.
+
+#ifndef LIB_JXL_ENC_CLUSTER_H_
+#define LIB_JXL_ENC_CLUSTER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/enc_ans.h"
+
+namespace jxl {
+
+struct Histogram {
+  Histogram() { total_count_ = 0; }
+  void Clear() {
+    data_.clear();
+    total_count_ = 0;
+  }
+  void Add(size_t symbol) {
+    if (data_.size() <= symbol) {
+      data_.resize(DivCeil(symbol + 1, kRounding) * kRounding);
+    }
+    ++data_[symbol];
+    ++total_count_;
+  }
+  void AddHistogram(const Histogram& other) {
+    if (other.data_.size() > data_.size()) {
+      data_.resize(other.data_.size());
+    }
+    for (size_t i = 0; i < other.data_.size(); ++i) {
+      data_[i] += other.data_[i];
+    }
+    total_count_ += other.total_count_;
+  }
+  float PopulationCost() const {
+    return ANSPopulationCost(data_.data(), data_.size());
+  }
+  float ShannonEntropy() const;
+
+  std::vector<ANSHistBin> data_;
+  size_t total_count_;
+  mutable float entropy_;  // WARNING: not kept up-to-date.
+  static constexpr size_t kRounding = 8;
+};
+
+void ClusterHistograms(HistogramParams params, const std::vector<Histogram>& in,
+                       size_t num_contexts, size_t max_histograms,
+                       std::vector<Histogram>* out,
+                       std::vector<uint32_t>* histogram_symbols);
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_CLUSTER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc
new file mode 100644
index 0000000000..81315a0787
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.cc
@@ -0,0 +1,274 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/lehmer_code.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+uint32_t ComputeUsedOrders(const SpeedTier speed,
+                           const AcStrategyImage& ac_strategy,
+                           const Rect& rect) {
+  // Use default orders for small images.
+  if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return 0;
+
+  // Only uses DCT8 = 0, so bitfield = 1.
+  if (speed >= SpeedTier::kFalcon) return 1;
+
+  uint32_t ret = 0;
+  size_t xsize_blocks = rect.xsize();
+  size_t ysize_blocks = rect.ysize();
+  // TODO(veluca): precompute when doing DCT.
+  for (size_t by = 0; by < ysize_blocks; ++by) {
+    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+      int ord = kStrategyOrder[acs_row[bx].RawStrategy()];
+      // Do not customize coefficient orders for blocks bigger than 32x32.
+      if (ord > 6) {
+        continue;
+      }
+      ret |= 1u << ord;
+    }
+  }
+  return ret;
+}
+
+void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
+                       const AcStrategyImage& ac_strategy,
+                       const FrameDimensions& frame_dim, uint32_t& used_orders,
+                       coeff_order_t* JXL_RESTRICT order) {
+  std::vector<int32_t> num_zeros(kCoeffOrderMaxSize);
+  // If compressing at high speed and only using 8x8 DCTs, only consider a
+  // subset of blocks.
+  double block_fraction = 1.0f;
+  // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes
+  // encoding significantly less dense.
+  if (speed >= SpeedTier::kSquirrel && used_orders == 1) {
+    block_fraction = 0.5f;
+  }
+  // No need to compute number of zero coefficients if all orders are the
+  // default.
+  if (used_orders != 0) {
+    uint64_t threshold =
+        (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction;
+    uint64_t s[2] = {0x94D049BB133111EBull, 0xBF58476D1CE4E5B9ull};
+    // Xorshift128+ adapted from xorshift128+-inl.h
+    auto use_sample = [&]() {
+      auto s1 = s[0];
+      const auto s0 = s[1];
+      const auto bits = s1 + s0;  // b, c
+      s[0] = s0;
+      s1 ^= s1 << 23;
+      s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+      s[1] = s1;
+      return (bits >> 32) <= threshold;
+    };
+
+    // Count number of zero coefficients, separately for each DCT band.
+    // TODO(veluca): precompute when doing DCT.
+    for (size_t group_index = 0; group_index < frame_dim.num_groups;
+         group_index++) {
+      const size_t gx = group_index % frame_dim.xsize_groups;
+      const size_t gy = group_index / frame_dim.xsize_groups;
+      const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks,
+                      kGroupDimInBlocks, kGroupDimInBlocks,
+                      frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+      ConstACPtr rows[3];
+      ACType type = acs.Type();
+      for (size_t c = 0; c < 3; c++) {
+        rows[c] = acs.PlaneRow(c, group_index, 0);
+      }
+      size_t ac_offset = 0;
+
+      // TODO(veluca): SIMDfy.
+      for (size_t by = 0; by < rect.ysize(); ++by) {
+        AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+        for (size_t bx = 0; bx < rect.xsize(); ++bx) {
+          AcStrategy acs = acs_row[bx];
+          if (!acs.IsFirstBlock()) continue;
+          if (!use_sample()) continue;
+          size_t size = kDCTBlockSize << acs.log2_covered_blocks();
+          for (size_t c = 0; c < 3; ++c) {
+            const size_t order_offset =
+                CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c);
+            if (type == ACType::k16) {
+              for (size_t k = 0; k < size; k++) {
+                bool is_zero = rows[c].ptr16[ac_offset + k] == 0;
+                num_zeros[order_offset + k] += is_zero ? 1 : 0;
+              }
+            } else {
+              for (size_t k = 0; k < size; k++) {
+                bool is_zero = rows[c].ptr32[ac_offset + k] == 0;
+                num_zeros[order_offset + k] += is_zero ? 1 : 0;
+              }
+            }
+            // Ensure LLFs are first in the order.
+            size_t cx = acs.covered_blocks_x();
+            size_t cy = acs.covered_blocks_y();
+            CoefficientLayout(&cy, &cx);
+            for (size_t iy = 0; iy < cy; iy++) {
+              for (size_t ix = 0; ix < cx; ix++) {
+                num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1;
+              }
+            }
+          }
+          ac_offset += size;
+        }
+      }
+    }
+  }
+  struct PosAndCount {
+    uint32_t pos;
+    uint32_t count;
+  };
+  auto mem = hwy::AllocateAligned<PosAndCount>(AcStrategy::kMaxCoeffArea);
+
+  uint16_t computed = 0;
+  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+    uint8_t ord = kStrategyOrder[o];
+    if (computed & (1 << ord)) continue;
+    computed |= 1 << ord;
+    AcStrategy acs = AcStrategy::FromRawStrategy(o);
+    size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
+    // Ensure natural coefficient order is not permuted if the order is
+    // not transmitted.
+    if ((1 << ord) & ~used_orders) {
+      for (size_t c = 0; c < 3; c++) {
+        size_t offset = CoeffOrderOffset(ord, c);
+        JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
+        SetDefaultOrder(AcStrategy::FromRawStrategy(o), &order[offset]);
+      }
+      continue;
+    }
+    const coeff_order_t* natural_coeff_order = acs.NaturalCoeffOrder();
+
+    bool is_nondefault = false;
+    for (uint8_t c = 0; c < 3; c++) {
+      // Apply zig-zag order.
+      PosAndCount* pos_and_val = mem.get();
+      size_t offset = CoeffOrderOffset(ord, c);
+      JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
+      float inv_sqrt_sz = 1.0f / std::sqrt(sz);
+      for (size_t i = 0; i < sz; ++i) {
+        size_t pos = natural_coeff_order[i];
+        pos_and_val[i].pos = pos;
+        // We don't care for the exact number -> quantize number of zeros,
+        // to get less permuted order.
+        pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f;
+      }
+
+      // Stable-sort -> elements with same number of zeros will preserve their
+      // order.
+      auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
+        return a.count < b.count;
+      };
+      std::stable_sort(pos_and_val, pos_and_val + sz, comparator);
+
+      // Grab indices.
+      for (size_t i = 0; i < sz; ++i) {
+        order[offset + i] = pos_and_val[i].pos;
+        is_nondefault |= natural_coeff_order[i] != pos_and_val[i].pos;
+      }
+    }
+    if (!is_nondefault) {
+      used_orders &= ~(1 << ord);
+    }
+  }
+}
+
+namespace {
+
+void TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+                         size_t size, std::vector<Token>* tokens) {
+  std::vector<LehmerT> lehmer(size);
+  std::vector<uint32_t> temp(size + 1);
+  ComputeLehmerCode(order, temp.data(), size, lehmer.data());
+  size_t end = size;
+  while (end > skip && lehmer[end - 1] == 0) {
+    --end;
+  }
+  tokens->emplace_back(CoeffOrderContext(size), end - skip);
+  uint32_t last = 0;
+  for (size_t i = skip; i < end; ++i) {
+    tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
+    last = lehmer[i];
+  }
+}
+
+}  // namespace
+
+void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+                       size_t size, BitWriter* writer, int layer,
+                       AuxOut* aux_out) {
+  std::vector<std::vector<Token>> tokens(1);
+  TokenizePermutation(order, skip, size, &tokens[0]);
+  std::vector<uint8_t> context_map;
+  EntropyEncodingData codes;
+  BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
+                           &codes, &context_map, writer, layer, aux_out);
+  WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+namespace {
+void EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs,
+                      std::vector<Token>* tokens, coeff_order_t* order_zigzag) {
+  const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+  const size_t size = kDCTBlockSize * llf;
+  const coeff_order_t* natural_coeff_order_lut = acs.NaturalCoeffOrderLut();
+  for (size_t i = 0; i < size; ++i) {
+    order_zigzag[i] = natural_coeff_order_lut[order[i]];
+  }
+  TokenizePermutation(order_zigzag, llf, size, tokens);
+}
+}  // namespace
+
+void EncodeCoeffOrders(uint16_t used_orders,
+                       const coeff_order_t* JXL_RESTRICT order,
+                       BitWriter* writer, size_t layer,
+                       AuxOut* JXL_RESTRICT aux_out) {
+  auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
+  uint16_t computed = 0;
+  std::vector<std::vector<Token>> tokens(1);
+  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+    uint8_t ord = kStrategyOrder[o];
+    if (computed & (1 << ord)) continue;
+    computed |= 1 << ord;
+    if ((used_orders & (1 << ord)) == 0) continue;
+    AcStrategy acs = AcStrategy::FromRawStrategy(o);
+    for (size_t c = 0; c < 3; c++) {
+      EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &tokens[0],
+                       mem.get());
+    }
+  }
+  // Do not write anything if no order is used.
+  if (used_orders != 0) {
+    std::vector<uint8_t> context_map;
+    EntropyEncodingData codes;
+    BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
+                             &codes, &context_map, writer, layer, aux_out);
+    WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.h
new file mode 100644
index 0000000000..5eee746592
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_coeff_order.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_COEFF_ORDER_H_
+#define LIB_JXL_ENC_COEFF_ORDER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+// Orders that are actually used in part of image. `rect` is in block units.
+uint32_t ComputeUsedOrders(SpeedTier speed, const AcStrategyImage& ac_strategy,
+                           const Rect& rect);
+
+// Modify zig-zag order, so that DCT bands with more zeros go later.
+// Order of DCT bands with same number of zeros is untouched, so
+// permutation will be cheaper to encode.
+void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
+                       const AcStrategyImage& ac_strategy,
+                       const FrameDimensions& frame_dim, uint32_t& used_orders,
+                       coeff_order_t* JXL_RESTRICT order);
+
+void EncodeCoeffOrders(uint16_t used_orders,
+                       const coeff_order_t* JXL_RESTRICT order,
+                       BitWriter* writer, size_t layer,
+                       AuxOut* JXL_RESTRICT aux_out);
+
+// Encoding/decoding of a single permutation. `size`: number of elements in the
+// permutation. `skip`: number of elements to skip from the *beginning* of the
+// permutation.
+void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+                       size_t size, BitWriter* writer, int layer,
+                       AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_COEFF_ORDER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc
new file mode 100644
index 0000000000..ff7dbe557b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.cc
@@ -0,0 +1,886 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Defined by build system; this avoids IDE warnings. Must come before
+// color_management.h (affects header definitions).
+#ifndef JPEGXL_ENABLE_SKCMS
+#define JPEGXL_ENABLE_SKCMS 0
+#endif
+
+#include "lib/jxl/enc_color_management.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_color_management.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/linalg.h"
+#include "lib/jxl/transfer_functions-inl.h"
+#if JPEGXL_ENABLE_SKCMS
+#include "skcms.h"
+#else  // JPEGXL_ENABLE_SKCMS
+#include "lcms2.h"
+#include "lcms2_plugin.h"
+#endif  // JPEGXL_ENABLE_SKCMS
+
+#define JXL_CMS_VERBOSE 0
+
+// Define these only once. We can't use HWY_ONCE here because it is defined as
+// 1 only on the last pass.
+#ifndef LIB_JXL_ENC_COLOR_MANAGEMENT_CC_
+#define LIB_JXL_ENC_COLOR_MANAGEMENT_CC_
+
+namespace jxl {
+#if JPEGXL_ENABLE_SKCMS
+struct ColorSpaceTransform::SkcmsICC {
+  // Parsed skcms_ICCProfiles retain pointers to the original data.
+  PaddedBytes icc_src_, icc_dst_;
+  skcms_ICCProfile profile_src_, profile_dst_;
+};
+#endif  // JPEGXL_ENABLE_SKCMS
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_COLOR_MANAGEMENT_CC_
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+#if JXL_CMS_VERBOSE >= 2
+const size_t kX = 0;  // pixel index, multiplied by 3 for RGB
+#endif
+
+// xform_src = UndoGammaCompression(buf_src).
+void BeforeTransform(ColorSpaceTransform* t, const float* buf_src,
+                     float* xform_src) {
+  switch (t->preprocess_) {
+    case ExtraTF::kNone:
+      JXL_DASSERT(false);  // unreachable
+      break;
+
+    case ExtraTF::kPQ: {
+      // By default, PQ content has an intensity target of 10000, stored
+      // exactly.
+      HWY_FULL(float) df;
+      const auto multiplier = Set(df, t->intensity_target_ == 10000.f
+                                          ? 1.0f
+                                          : 10000.f / t->intensity_target_);
+      for (size_t i = 0; i < t->buf_src_.xsize(); i += Lanes(df)) {
+        const auto val = Load(df, buf_src + i);
+        const auto result = multiplier * TF_PQ().DisplayFromEncoded(df, val);
+        Store(result, df, xform_src + i);
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("pre in %.4f %.4f %.4f undoPQ %.4f %.4f %.4f\n", buf_src[3 * kX],
+             buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+             xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+      break;
+    }
+
+    case ExtraTF::kHLG:
+      for (size_t i = 0; i < t->buf_src_.xsize(); ++i) {
+        xform_src[i] = static_cast<float>(
+            TF_HLG().DisplayFromEncoded(static_cast<double>(buf_src[i])));
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("pre in %.4f %.4f %.4f undoHLG %.4f %.4f %.4f\n", buf_src[3 * kX],
+             buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+             xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+      break;
+
+    case ExtraTF::kSRGB:
+      HWY_FULL(float) df;
+      for (size_t i = 0; i < t->buf_src_.xsize(); i += Lanes(df)) {
+        const auto val = Load(df, buf_src + i);
+        const auto result = TF_SRGB().DisplayFromEncoded(val);
+        Store(result, df, xform_src + i);
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("pre in %.4f %.4f %.4f undoSRGB %.4f %.4f %.4f\n", buf_src[3 * kX],
+             buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+             xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+      break;
+  }
+}
+
+// Applies gamma compression in-place.
+void AfterTransform(ColorSpaceTransform* t, float* JXL_RESTRICT buf_dst) {
+  switch (t->postprocess_) {
+    case ExtraTF::kNone:
+      JXL_DASSERT(false);  // unreachable
+      break;
+    case ExtraTF::kPQ: {
+      HWY_FULL(float) df;
+      const auto multiplier = Set(df, t->intensity_target_ == 10000.f
+                                          ? 1.0f
+                                          : t->intensity_target_ * 1e-4f);
+      for (size_t i = 0; i < t->buf_dst_.xsize(); i += Lanes(df)) {
+        const auto val = Load(df, buf_dst + i);
+        const auto result = TF_PQ().EncodedFromDisplay(df, multiplier * val);
+        Store(result, df, buf_dst + i);
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("after PQ enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+             buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+      break;
+    }
+    case ExtraTF::kHLG:
+      for (size_t i = 0; i < t->buf_dst_.xsize(); ++i) {
+        buf_dst[i] = static_cast<float>(
+            TF_HLG().EncodedFromDisplay(static_cast<double>(buf_dst[i])));
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("after HLG enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+             buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+      break;
+    case ExtraTF::kSRGB:
+      HWY_FULL(float) df;
+      for (size_t i = 0; i < t->buf_dst_.xsize(); i += Lanes(df)) {
+        const auto val = Load(df, buf_dst + i);
+        const auto result =
+            TF_SRGB().EncodedFromDisplay(HWY_FULL(float)(), val);
+        Store(result, df, buf_dst + i);
+      }
+#if JXL_CMS_VERBOSE >= 2
+      printf("after SRGB enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+             buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+      break;
+  }
+}
+
+void DoColorSpaceTransform(ColorSpaceTransform* t, const size_t thread,
+                           const float* buf_src, float* buf_dst) {
+  // No lock needed.
+
+  float* xform_src = const_cast<float*>(buf_src);  // Read-only.
+  if (t->preprocess_ != ExtraTF::kNone) {
+    xform_src = t->buf_src_.Row(thread);  // Writable buffer.
+    BeforeTransform(t, buf_src, xform_src);
+  }
+
+#if JXL_CMS_VERBOSE >= 2
+  // Save inputs for printing before in-place transforms overwrite them.
+  const float in0 = xform_src[3 * kX + 0];
+  const float in1 = xform_src[3 * kX + 1];
+  const float in2 = xform_src[3 * kX + 2];
+#endif
+
+  if (t->skip_lcms_) {
+    if (buf_dst != xform_src) {
+      memcpy(buf_dst, xform_src, t->buf_dst_.xsize() * sizeof(*buf_dst));
+    }  // else: in-place, no need to copy
+  } else {
+#if JPEGXL_ENABLE_SKCMS
+    JXL_CHECK(skcms_Transform(
+        xform_src, skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Opaque,
+        &t->skcms_icc_->profile_src_, buf_dst, skcms_PixelFormat_RGB_fff,
+        skcms_AlphaFormat_Opaque, &t->skcms_icc_->profile_dst_, t->xsize_));
+#else   // JPEGXL_ENABLE_SKCMS
+    cmsDoTransform(t->lcms_transform_, xform_src, buf_dst,
+                   static_cast<cmsUInt32Number>(t->xsize_));
+#endif  // JPEGXL_ENABLE_SKCMS
+  }
+#if JXL_CMS_VERBOSE >= 2
+  printf("xform skip%d: %.4f %.4f %.4f (%p) -> (%p) %.4f %.4f %.4f\n",
+         t->skip_lcms_, in0, in1, in2, xform_src, buf_dst, buf_dst[3 * kX],
+         buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+
+  if (t->postprocess_ != ExtraTF::kNone) {
+    AfterTransform(t, buf_dst);
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(DoColorSpaceTransform);
+void DoColorSpaceTransform(ColorSpaceTransform* t, size_t thread,
+                           const float* buf_src, float* buf_dst) {
+  return HWY_DYNAMIC_DISPATCH(DoColorSpaceTransform)(t, thread, buf_src,
+                                                     buf_dst);
+}
+
+namespace {
+
+#define JXL_CMS_OLD_VERSION 0
+
+// cms functions (even *THR) are not thread-safe, except cmsDoTransform.
+// To ensure all functions are covered without frequent lock-taking nor risk of
+// recursive lock, we lock in the top-level APIs.
+static std::mutex& LcmsMutex() {
+  static std::mutex m;
+  return m;
+}
+
+#if JPEGXL_ENABLE_SKCMS
+
+JXL_MUST_USE_RESULT CIExy CIExyFromXYZ(const float XYZ[3]) {
+  const float factor = 1.f / (XYZ[0] + XYZ[1] + XYZ[2]);
+  CIExy xy;
+  xy.x = XYZ[0] * factor;
+  xy.y = XYZ[1] * factor;
+  return xy;
+}
+
+#else  // JPEGXL_ENABLE_SKCMS
+// (LCMS interface requires xyY but we omit the Y for white points/primaries.)
+
+JXL_MUST_USE_RESULT CIExy CIExyFromxyY(const cmsCIExyY& xyY) {
+  CIExy xy;
+  xy.x = xyY.x;
+  xy.y = xyY.y;
+  return xy;
+}
+
+JXL_MUST_USE_RESULT CIExy CIExyFromXYZ(const cmsCIEXYZ& XYZ) {
+  cmsCIExyY xyY;
+  cmsXYZ2xyY(/*Dest=*/&xyY, /*Source=*/&XYZ);
+  return CIExyFromxyY(xyY);
+}
+
+JXL_MUST_USE_RESULT cmsCIEXYZ D50_XYZ() {
+  // Quantized D50 as stored in ICC profiles.
+  return {0.96420288, 1.0, 0.82490540};
+}
+
+JXL_MUST_USE_RESULT cmsCIExyY xyYFromCIExy(const CIExy& xy) {
+  const cmsCIExyY xyY = {xy.x, xy.y, 1.0};
+  return xyY;
+}
+
+// RAII
+
+struct ProfileDeleter {
+  void operator()(void* p) { cmsCloseProfile(p); }
+};
+using Profile = std::unique_ptr<void, ProfileDeleter>;
+
+struct TransformDeleter {
+  void operator()(void* p) { cmsDeleteTransform(p); }
+};
+using Transform = std::unique_ptr<void, TransformDeleter>;
+
+struct CurveDeleter {
+  void operator()(cmsToneCurve* p) { cmsFreeToneCurve(p); }
+};
+using Curve = std::unique_ptr<cmsToneCurve, CurveDeleter>;
+
+Status CreateProfileXYZ(const cmsContext context,
+                        Profile* JXL_RESTRICT profile) {
+  profile->reset(cmsCreateXYZProfileTHR(context));
+  if (profile->get() == nullptr) return JXL_FAILURE("Failed to create XYZ");
+  return true;
+}
+
+#endif  // !JPEGXL_ENABLE_SKCMS
+
+#if JPEGXL_ENABLE_SKCMS
+// IMPORTANT: icc must outlive profile.
+Status DecodeProfile(const PaddedBytes& icc, skcms_ICCProfile* const profile) {
+  if (!skcms_Parse(icc.data(), icc.size(), profile)) {
+    return JXL_FAILURE("Failed to parse ICC profile with %zu bytes",
+                       icc.size());
+  }
+  return true;
+}
+#else  // JPEGXL_ENABLE_SKCMS
+Status DecodeProfile(const cmsContext context, const PaddedBytes& icc,
+                     Profile* profile) {
+  profile->reset(cmsOpenProfileFromMemTHR(context, icc.data(), icc.size()));
+  if (profile->get() == nullptr) {
+    return JXL_FAILURE("Failed to decode profile");
+  }
+
+  return true;
+}
+#endif  // JPEGXL_ENABLE_SKCMS
+
+#if JPEGXL_ENABLE_SKCMS
+
+ColorSpace ColorSpaceFromProfile(const skcms_ICCProfile& profile) {
+  switch (profile.data_color_space) {
+    case skcms_Signature_RGB:
+      return ColorSpace::kRGB;
+    case skcms_Signature_Gray:
+      return ColorSpace::kGray;
+    default:
+      return ColorSpace::kUnknown;
+  }
+}
+
+// "profile1" is pre-decoded to save time in DetectTransferFunction.
+Status ProfileEquivalentToICC(const skcms_ICCProfile& profile1,
+                              const PaddedBytes& icc) {
+  skcms_ICCProfile profile2;
+  JXL_RETURN_IF_ERROR(skcms_Parse(icc.data(), icc.size(), &profile2));
+  return skcms_ApproximatelyEqualProfiles(&profile1, &profile2);
+}
+
+// vector_out := matmul(matrix, vector_in)
+void MatrixProduct(const skcms_Matrix3x3& matrix, const float vector_in[3],
+                   float vector_out[3]) {
+  for (int i = 0; i < 3; ++i) {
+    vector_out[i] = 0;
+    for (int j = 0; j < 3; ++j) {
+      vector_out[i] += matrix.vals[i][j] * vector_in[j];
+    }
+  }
+}
+
+// Returns white point that was specified when creating the profile.
+JXL_MUST_USE_RESULT Status UnadaptedWhitePoint(const skcms_ICCProfile& profile,
+                                               CIExy* out) {
+  float media_white_point_XYZ[3];
+  if (!skcms_GetWTPT(&profile, media_white_point_XYZ)) {
+    return JXL_FAILURE("ICC profile does not contain WhitePoint tag");
+  }
+  skcms_Matrix3x3 CHAD;
+  if (!skcms_GetCHAD(&profile, &CHAD)) {
+    // If there is no chromatic adaptation matrix, it means that the white point
+    // is already unadapted.
+    *out = CIExyFromXYZ(media_white_point_XYZ);
+    return true;
+  }
+  // Otherwise, it has been adapted to the PCS white point using said matrix,
+  // and the adaptation needs to be undone.
+  skcms_Matrix3x3 inverse_CHAD;
+  if (!skcms_Matrix3x3_invert(&CHAD, &inverse_CHAD)) {
+    return JXL_FAILURE("Non-invertible ChromaticAdaptation matrix");
+  }
+  float unadapted_white_point_XYZ[3];
+  MatrixProduct(inverse_CHAD, media_white_point_XYZ, unadapted_white_point_XYZ);
+  *out = CIExyFromXYZ(unadapted_white_point_XYZ);
+  return true;
+}
+
+Status IdentifyPrimaries(const skcms_ICCProfile& profile,
+                         const CIExy& wp_unadapted, ColorEncoding* c) {
+  if (!c->HasPrimaries()) return true;
+
+  skcms_Matrix3x3 CHAD, inverse_CHAD;
+  if (skcms_GetCHAD(&profile, &CHAD)) {
+    JXL_RETURN_IF_ERROR(skcms_Matrix3x3_invert(&CHAD, &inverse_CHAD));
+  } else {
+    static constexpr skcms_Matrix3x3 kLMSFromXYZ = {
+        {{0.8951, 0.2664, -0.1614},
+         {-0.7502, 1.7135, 0.0367},
+         {0.0389, -0.0685, 1.0296}}};
+    static constexpr skcms_Matrix3x3 kXYZFromLMS = {
+        {{0.9869929, -0.1470543, 0.1599627},
+         {0.4323053, 0.5183603, 0.0492912},
+         {-0.0085287, 0.0400428, 0.9684867}}};
+    static constexpr float kWpD50XYZ[3] = {0.96420288, 1.0, 0.82490540};
+    float wp_unadapted_XYZ[3];
+    JXL_RETURN_IF_ERROR(CIEXYZFromWhiteCIExy(wp_unadapted, wp_unadapted_XYZ));
+    float wp_D50_LMS[3], wp_unadapted_LMS[3];
+    MatrixProduct(kLMSFromXYZ, kWpD50XYZ, wp_D50_LMS);
+    MatrixProduct(kLMSFromXYZ, wp_unadapted_XYZ, wp_unadapted_LMS);
+    inverse_CHAD = {{{wp_unadapted_LMS[0] / wp_D50_LMS[0], 0, 0},
+                     {0, wp_unadapted_LMS[1] / wp_D50_LMS[1], 0},
+                     {0, 0, wp_unadapted_LMS[2] / wp_D50_LMS[2]}}};
+    inverse_CHAD = skcms_Matrix3x3_concat(&kXYZFromLMS, &inverse_CHAD);
+    inverse_CHAD = skcms_Matrix3x3_concat(&inverse_CHAD, &kLMSFromXYZ);
+  }
+
+  float XYZ[3];
+  PrimariesCIExy primaries;
+  CIExy* const chromaticities[] = {&primaries.r, &primaries.g, &primaries.b};
+  for (int i = 0; i < 3; ++i) {
+    float RGB[3] = {};
+    RGB[i] = 1;
+    skcms_Transform(RGB, skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Opaque,
+                    &profile, XYZ, skcms_PixelFormat_RGB_fff,
+                    skcms_AlphaFormat_Opaque, skcms_XYZD50_profile(), 1);
+    float unadapted_XYZ[3];
+    MatrixProduct(inverse_CHAD, XYZ, unadapted_XYZ);
+    *chromaticities[i] = CIExyFromXYZ(unadapted_XYZ);
+  }
+  return c->SetPrimaries(primaries);
+}
+
+void DetectTransferFunction(const skcms_ICCProfile& profile,
+                            ColorEncoding* JXL_RESTRICT c) {
+  if (c->tf.SetImplicit()) return;
+
+  for (TransferFunction tf : Values<TransferFunction>()) {
+    // Can only create profile from known transfer function.
+    if (tf == TransferFunction::kUnknown) continue;
+
+    c->tf.SetTransferFunction(tf);
+
+    skcms_ICCProfile profile_test;
+    PaddedBytes bytes;
+    if (MaybeCreateProfile(*c, &bytes) && DecodeProfile(bytes, &profile_test) &&
+        skcms_ApproximatelyEqualProfiles(&profile, &profile_test)) {
+      return;
+    }
+  }
+
+  c->tf.SetTransferFunction(TransferFunction::kUnknown);
+}
+
+#else  // JPEGXL_ENABLE_SKCMS
+
+uint32_t Type32(const ColorEncoding& c) {
+  if (c.IsGray()) return TYPE_GRAY_FLT;
+  return TYPE_RGB_FLT;
+}
+
+uint32_t Type64(const ColorEncoding& c) {
+  if (c.IsGray()) return TYPE_GRAY_DBL;
+  return TYPE_RGB_DBL;
+}
+
+ColorSpace ColorSpaceFromProfile(const Profile& profile) {
+  switch (cmsGetColorSpace(profile.get())) {
+    case cmsSigRgbData:
+      return ColorSpace::kRGB;
+    case cmsSigGrayData:
+      return ColorSpace::kGray;
+    default:
+      return ColorSpace::kUnknown;
+  }
+}
+
+// "profile1" is pre-decoded to save time in DetectTransferFunction.
+Status ProfileEquivalentToICC(const cmsContext context, const Profile& profile1,
+                              const PaddedBytes& icc, const ColorEncoding& c) {
+  const uint32_t type_src = Type64(c);
+
+  Profile profile2;
+  JXL_RETURN_IF_ERROR(DecodeProfile(context, icc, &profile2));
+
+  Profile profile_xyz;
+  JXL_RETURN_IF_ERROR(CreateProfileXYZ(context, &profile_xyz));
+
+  const uint32_t intent = INTENT_RELATIVE_COLORIMETRIC;
+  const uint32_t flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_BLACKPOINTCOMPENSATION |
+                         cmsFLAGS_HIGHRESPRECALC;
+  Transform xform1(cmsCreateTransformTHR(context, profile1.get(), type_src,
+                                         profile_xyz.get(), TYPE_XYZ_DBL,
+                                         intent, flags));
+  Transform xform2(cmsCreateTransformTHR(context, profile2.get(), type_src,
+                                         profile_xyz.get(), TYPE_XYZ_DBL,
+                                         intent, flags));
+  if (xform1 == nullptr || xform2 == nullptr) {
+    return JXL_FAILURE("Failed to create transform");
+  }
+
+  double in[3];
+  double out1[3];
+  double out2[3];
+
+  // Uniformly spaced samples from very dark to almost fully bright.
+  const double init = 1E-3;
+  const double step = 0.2;
+
+  if (c.IsGray()) {
+    // Finer sampling and replicate each component.
+    for (in[0] = init; in[0] < 1.0; in[0] += step / 8) {
+      cmsDoTransform(xform1.get(), in, out1, 1);
+      cmsDoTransform(xform2.get(), in, out2, 1);
+      if (!ApproxEq(out1[0], out2[0], 2E-4)) {
+        return false;
+      }
+    }
+  } else {
+    for (in[0] = init; in[0] < 1.0; in[0] += step) {
+      for (in[1] = init; in[1] < 1.0; in[1] += step) {
+        for (in[2] = init; in[2] < 1.0; in[2] += step) {
+          cmsDoTransform(xform1.get(), in, out1, 1);
+          cmsDoTransform(xform2.get(), in, out2, 1);
+          for (size_t i = 0; i < 3; ++i) {
+            if (!ApproxEq(out1[i], out2[i], 2E-4)) {
+              return false;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+// Returns white point that was specified when creating the profile.
+// NOTE: we can't just use cmsSigMediaWhitePointTag because its interpretation
+// differs between ICC versions.
+JXL_MUST_USE_RESULT cmsCIEXYZ UnadaptedWhitePoint(const cmsContext context,
+                                                  const Profile& profile,
+                                                  const ColorEncoding& c) {
+  cmsCIEXYZ XYZ = {1.0, 1.0, 1.0};
+
+  Profile profile_xyz;
+  if (!CreateProfileXYZ(context, &profile_xyz)) return XYZ;
+  // Array arguments are one per profile.
+  cmsHPROFILE profiles[2] = {profile.get(), profile_xyz.get()};
+  // Leave white point unchanged - that is what we're trying to extract.
+  cmsUInt32Number intents[2] = {INTENT_ABSOLUTE_COLORIMETRIC,
+                                INTENT_ABSOLUTE_COLORIMETRIC};
+  cmsBool black_compensation[2] = {0, 0};
+  cmsFloat64Number adaption[2] = {0.0, 0.0};
+  // Only transforming a single pixel, so skip expensive optimizations.
+  cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_HIGHRESPRECALC;
+  Transform xform(cmsCreateExtendedTransform(
+      context, 2, profiles, black_compensation, intents, adaption, nullptr, 0,
+      Type64(c), TYPE_XYZ_DBL, flags));
+  if (!xform) return XYZ;  // TODO(lode): return error
+
+  // xy are relative, so magnitude does not matter if we ignore output Y.
+  const cmsFloat64Number in[3] = {1.0, 1.0, 1.0};
+  cmsDoTransform(xform.get(), in, &XYZ.X, 1);
+  return XYZ;
+}
+
+Status IdentifyPrimaries(const Profile& profile, const cmsCIEXYZ& wp_unadapted,
+                         ColorEncoding* c) {
+  if (!c->HasPrimaries()) return true;
+  if (ColorSpaceFromProfile(profile) == ColorSpace::kUnknown) return true;
+
+  // These were adapted to the profile illuminant before storing in the profile.
+  const cmsCIEXYZ* adapted_r = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigRedColorantTag));
+  const cmsCIEXYZ* adapted_g = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigGreenColorantTag));
+  const cmsCIEXYZ* adapted_b = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigBlueColorantTag));
+  if (adapted_r == nullptr || adapted_g == nullptr || adapted_b == nullptr) {
+    return JXL_FAILURE("Failed to retrieve colorants");
+  }
+
+  // TODO(janwas): no longer assume Bradford and D50.
+  // Undo the chromatic adaptation.
+  const cmsCIEXYZ d50 = D50_XYZ();
+
+  cmsCIEXYZ r, g, b;
+  cmsAdaptToIlluminant(&r, &d50, &wp_unadapted, adapted_r);
+  cmsAdaptToIlluminant(&g, &d50, &wp_unadapted, adapted_g);
+  cmsAdaptToIlluminant(&b, &d50, &wp_unadapted, adapted_b);
+
+  const PrimariesCIExy rgb = {CIExyFromXYZ(r), CIExyFromXYZ(g),
+                              CIExyFromXYZ(b)};
+  return c->SetPrimaries(rgb);
+}
+
+void DetectTransferFunction(const cmsContext context, const Profile& profile,
+                            ColorEncoding* JXL_RESTRICT c) {
+  if (c->tf.SetImplicit()) return;
+
+  for (TransferFunction tf : Values<TransferFunction>()) {
+    // Can only create profile from known transfer function.
+    if (tf == TransferFunction::kUnknown) continue;
+
+    c->tf.SetTransferFunction(tf);
+
+    PaddedBytes icc_test;
+    if (MaybeCreateProfile(*c, &icc_test) &&
+        ProfileEquivalentToICC(context, profile, icc_test, *c)) {
+      return;
+    }
+  }
+
+  c->tf.SetTransferFunction(TransferFunction::kUnknown);
+}
+
+void ErrorHandler(cmsContext context, cmsUInt32Number code, const char* text) {
+  JXL_WARNING("LCMS error %u: %s", code, text);
+}
+
+// Returns a context for the current thread, creating it if necessary.
+cmsContext GetContext() {
+  static thread_local void* context_;
+  if (context_ == nullptr) {
+    context_ = cmsCreateContext(nullptr, nullptr);
+    JXL_ASSERT(context_ != nullptr);
+
+    cmsSetLogErrorHandlerTHR(static_cast<cmsContext>(context_), &ErrorHandler);
+  }
+  return static_cast<cmsContext>(context_);
+}
+
+#endif  // JPEGXL_ENABLE_SKCMS
+
+}  // namespace
+
+// All functions that call lcms directly (except ColorSpaceTransform::Run) must
+// lock LcmsMutex().
+
+Status ColorEncoding::SetFieldsFromICC() {
+  // In case parsing fails, mark the ColorEncoding as invalid.
+  SetColorSpace(ColorSpace::kUnknown);
+  tf.SetTransferFunction(TransferFunction::kUnknown);
+
+  if (icc_.empty()) return JXL_FAILURE("Empty ICC profile");
+
+#if JPEGXL_ENABLE_SKCMS
+  if (icc_.size() < 128) {
+    return JXL_FAILURE("ICC file too small");
+  }
+
+  skcms_ICCProfile profile;
+  JXL_RETURN_IF_ERROR(skcms_Parse(icc_.data(), icc_.size(), &profile));
+
+  // skcms does not return the rendering intent, so get it from the file. It
+  // is encoded as big-endian 32-bit integer in bytes 60..63.
+  uint32_t rendering_intent32 = icc_[67];
+  if (rendering_intent32 > 3 || icc_[64] != 0 || icc_[65] != 0 ||
+      icc_[66] != 0) {
+    return JXL_FAILURE("Invalid rendering intent %u\n", rendering_intent32);
+  }
+
+  SetColorSpace(ColorSpaceFromProfile(profile));
+
+  CIExy wp_unadapted;
+  JXL_RETURN_IF_ERROR(UnadaptedWhitePoint(profile, &wp_unadapted));
+  JXL_RETURN_IF_ERROR(SetWhitePoint(wp_unadapted));
+
+  // Relies on color_space.
+  JXL_RETURN_IF_ERROR(IdentifyPrimaries(profile, wp_unadapted, this));
+
+  // Relies on color_space/white point/primaries being set already.
+  DetectTransferFunction(profile, this);
+  // ICC and RenderingIntent have the same values (0..3).
+  rendering_intent = static_cast<RenderingIntent>(rendering_intent32);
+#else   // JPEGXL_ENABLE_SKCMS
+
+  std::lock_guard<std::mutex> guard(LcmsMutex());
+  const cmsContext context = GetContext();
+
+  Profile profile;
+  JXL_RETURN_IF_ERROR(DecodeProfile(context, icc_, &profile));
+
+  const cmsUInt32Number rendering_intent32 =
+      cmsGetHeaderRenderingIntent(profile.get());
+  if (rendering_intent32 > 3) {
+    return JXL_FAILURE("Invalid rendering intent %u\n", rendering_intent32);
+  }
+
+  SetColorSpace(ColorSpaceFromProfile(profile));
+
+  const cmsCIEXYZ wp_unadapted = UnadaptedWhitePoint(context, profile, *this);
+  JXL_RETURN_IF_ERROR(SetWhitePoint(CIExyFromXYZ(wp_unadapted)));
+
+  // Relies on color_space.
+  JXL_RETURN_IF_ERROR(IdentifyPrimaries(profile, wp_unadapted, this));
+
+  // Relies on color_space/white point/primaries being set already.
+  DetectTransferFunction(context, profile, this);
+
+  // ICC and RenderingIntent have the same values (0..3).
+  rendering_intent = static_cast<RenderingIntent>(rendering_intent32);
+#endif  // JPEGXL_ENABLE_SKCMS
+
+  return true;
+}
+
+void ColorEncoding::DecideIfWantICC() {
+  PaddedBytes icc_new;
+  bool equivalent;
+#if JPEGXL_ENABLE_SKCMS
+  skcms_ICCProfile profile;
+  if (!DecodeProfile(ICC(), &profile)) return;
+  if (!MaybeCreateProfile(*this, &icc_new)) return;
+  equivalent = ProfileEquivalentToICC(profile, icc_new);
+#else   // JPEGXL_ENABLE_SKCMS
+  const cmsContext context = GetContext();
+  Profile profile;
+  if (!DecodeProfile(context, ICC(), &profile)) return;
+  if (!MaybeCreateProfile(*this, &icc_new)) return;
+  equivalent = ProfileEquivalentToICC(context, profile, icc_new, *this);
+#endif  // JPEGXL_ENABLE_SKCMS
+
+  // Successfully created a profile => reconstruction should be equivalent.
+  JXL_ASSERT(equivalent);
+  want_icc_ = false;
+}
+
+ColorSpaceTransform::~ColorSpaceTransform() {
+#if !JPEGXL_ENABLE_SKCMS
+  std::lock_guard<std::mutex> guard(LcmsMutex());
+  TransformDeleter()(lcms_transform_);
+#endif
+}
+
+ColorSpaceTransform::ColorSpaceTransform()
+#if JPEGXL_ENABLE_SKCMS
+    : skcms_icc_(new SkcmsICC())
+#endif  // JPEGXL_ENABLE_SKCMS
+{
+}
+
+Status ColorSpaceTransform::Init(const ColorEncoding& c_src,
+                                 const ColorEncoding& c_dst,
+                                 float intensity_target, size_t xsize,
+                                 const size_t num_threads) {
+  std::lock_guard<std::mutex> guard(LcmsMutex());
+#if JXL_CMS_VERBOSE
+  printf("%s -> %s\n", Description(c_src).c_str(), Description(c_dst).c_str());
+#endif
+
+#if JPEGXL_ENABLE_SKCMS
+  skcms_icc_->icc_src_ = c_src.ICC();
+  skcms_icc_->icc_dst_ = c_dst.ICC();
+  JXL_RETURN_IF_ERROR(
+      DecodeProfile(skcms_icc_->icc_src_, &skcms_icc_->profile_src_));
+  JXL_RETURN_IF_ERROR(
+      DecodeProfile(skcms_icc_->icc_dst_, &skcms_icc_->profile_dst_));
+#else   // JPEGXL_ENABLE_SKCMS
+  const cmsContext context = GetContext();
+  Profile profile_src, profile_dst;
+  JXL_RETURN_IF_ERROR(DecodeProfile(context, c_src.ICC(), &profile_src));
+  JXL_RETURN_IF_ERROR(DecodeProfile(context, c_dst.ICC(), &profile_dst));
+#endif  // JPEGXL_ENABLE_SKCMS
+
+  skip_lcms_ = false;
+  if (c_src.SameColorEncoding(c_dst)) {
+    skip_lcms_ = true;
+#if JXL_CMS_VERBOSE
+    printf("Skip CMS\n");
+#endif
+  }
+
+  // Special-case for BT.2100 HLG/PQ and SRGB <=> linear:
+  const bool src_linear = c_src.tf.IsLinear();
+  const bool dst_linear = c_dst.tf.IsLinear();
+  if (((c_src.tf.IsPQ() || c_src.tf.IsHLG()) && dst_linear) ||
+      ((c_dst.tf.IsPQ() || c_dst.tf.IsHLG()) && src_linear) ||
+      ((c_src.tf.IsPQ() != c_dst.tf.IsPQ()) && intensity_target_ != 10000) ||
+      (c_src.tf.IsSRGB() && dst_linear) || (c_dst.tf.IsSRGB() && src_linear)) {
+    // Construct new profiles as if the data were already/still linear.
+    ColorEncoding c_linear_src = c_src;
+    ColorEncoding c_linear_dst = c_dst;
+    c_linear_src.tf.SetTransferFunction(TransferFunction::kLinear);
+    c_linear_dst.tf.SetTransferFunction(TransferFunction::kLinear);
+    PaddedBytes icc_src, icc_dst;
+#if JPEGXL_ENABLE_SKCMS
+    skcms_ICCProfile new_src, new_dst;
+#else  // JPEGXL_ENABLE_SKCMS
+    Profile new_src, new_dst;
+#endif  // JPEGXL_ENABLE_SKCMS
+        // Only enable ExtraTF if profile creation succeeded.
+    if (MaybeCreateProfile(c_linear_src, &icc_src) &&
+        MaybeCreateProfile(c_linear_dst, &icc_dst) &&
+#if JPEGXL_ENABLE_SKCMS
+        DecodeProfile(icc_src, &new_src) && DecodeProfile(icc_dst, &new_dst)) {
+#else   // JPEGXL_ENABLE_SKCMS
+        DecodeProfile(context, icc_src, &new_src) &&
+        DecodeProfile(context, icc_dst, &new_dst)) {
+#endif  // JPEGXL_ENABLE_SKCMS
+      if (c_src.SameColorSpace(c_dst)) {
+        skip_lcms_ = true;
+      }
+#if JXL_CMS_VERBOSE
+      printf("Special linear <-> HLG/PQ/sRGB; skip=%d\n", skip_lcms_);
+#endif
+#if JPEGXL_ENABLE_SKCMS
+      skcms_icc_->icc_src_ = PaddedBytes();
+      skcms_icc_->profile_src_ = new_src;
+      skcms_icc_->icc_dst_ = PaddedBytes();
+      skcms_icc_->profile_dst_ = new_dst;
+#else   // JPEGXL_ENABLE_SKCMS
+      profile_src.swap(new_src);
+      profile_dst.swap(new_dst);
+#endif  // JPEGXL_ENABLE_SKCMS
+      if (!c_src.tf.IsLinear()) {
+        preprocess_ = c_src.tf.IsSRGB()
+                          ? ExtraTF::kSRGB
+                          : (c_src.tf.IsPQ() ? ExtraTF::kPQ : ExtraTF::kHLG);
+      }
+      if (!c_dst.tf.IsLinear()) {
+        postprocess_ = c_dst.tf.IsSRGB()
+                           ? ExtraTF::kSRGB
+                           : (c_dst.tf.IsPQ() ? ExtraTF::kPQ : ExtraTF::kHLG);
+      }
+    } else {
+      JXL_WARNING("Failed to create extra linear profiles");
+    }
+  }
+
+#if JPEGXL_ENABLE_SKCMS
+  if (!skcms_MakeUsableAsDestination(&skcms_icc_->profile_dst_)) {
+    return JXL_FAILURE(
+        "Failed to make %s usable as a color transform destination",
+        Description(c_dst).c_str());
+  }
+#endif  // JPEGXL_ENABLE_SKCMS
+
+  // Not including alpha channel (copied separately).
+  const size_t channels_src = c_src.Channels();
+  const size_t channels_dst = c_dst.Channels();
+  JXL_CHECK(channels_src == channels_dst);
+#if JXL_CMS_VERBOSE
+  printf("Channels: %zu; Threads: %zu\n", channels_src, num_threads);
+#endif
+
+#if !JPEGXL_ENABLE_SKCMS
+  // Type includes color space (XYZ vs RGB), so can be different.
+  const uint32_t type_src = Type32(c_src);
+  const uint32_t type_dst = Type32(c_dst);
+  const uint32_t intent = static_cast<uint32_t>(c_dst.rendering_intent);
+  // Use cmsFLAGS_NOCACHE to disable the 1-pixel cache and make calling
+  // cmsDoTransform() thread-safe.
+  const uint32_t flags = cmsFLAGS_NOCACHE | cmsFLAGS_BLACKPOINTCOMPENSATION |
+                         cmsFLAGS_HIGHRESPRECALC;
+  lcms_transform_ =
+      cmsCreateTransformTHR(context, profile_src.get(), type_src,
+                            profile_dst.get(), type_dst, intent, flags);
+  if (lcms_transform_ == nullptr) {
+    return JXL_FAILURE("Failed to create transform");
+  }
+#endif  // !JPEGXL_ENABLE_SKCMS
+
+  // Ideally LCMS would convert directly from External to Image3. However,
+  // cmsDoTransformLineStride only accepts 32-bit BytesPerPlaneIn, whereas our
+  // planes can be more than 4 GiB apart. Hence, transform inputs/outputs must
+  // be interleaved. Calling cmsDoTransform for each pixel is expensive
+  // (indirect call). We therefore transform rows, which requires per-thread
+  // buffers. To avoid separate allocations, we use the rows of an image.
+  // Because LCMS apparently also cannot handle <= 16 bit inputs and 32-bit
+  // outputs (or vice versa), we use floating point input/output.
+#if JPEGXL_ENABLE_SKCMS
+  // SkiaCMS doesn't support grayscale float buffers, so we create space for RGB
+  // float buffers anyway.
+  buf_src_ = ImageF(xsize * 3, num_threads);
+  buf_dst_ = ImageF(xsize * 3, num_threads);
+#else
+  buf_src_ = ImageF(xsize * channels_src, num_threads);
+  buf_dst_ = ImageF(xsize * channels_dst, num_threads);
+#endif
+  intensity_target_ = intensity_target;
+  xsize_ = xsize;
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.h
new file mode 100644
index 0000000000..9dbce855bf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_color_management.h
@@ -0,0 +1,70 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_COLOR_MANAGEMENT_H_
+#define LIB_JXL_ENC_COLOR_MANAGEMENT_H_
+
+// ICC profiles and color space conversions.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Run is thread-safe.
+class ColorSpaceTransform {
+ public:
+  ColorSpaceTransform();
+  ~ColorSpaceTransform();
+
+  // Cannot copy (transforms_ holds pointers).
+  ColorSpaceTransform(const ColorSpaceTransform&) = delete;
+  ColorSpaceTransform& operator=(const ColorSpaceTransform&) = delete;
+
+  // "Constructor"; allocates for up to `num_threads`, or returns false.
+  // `intensity_target` is used for conversion to and from PQ, which is absolute
+  // (1 always represents 10000 cd/m²) and thus needs scaling in linear space if
+  // 1 is to represent another luminance level instead.
+  Status Init(const ColorEncoding& c_src, const ColorEncoding& c_dst,
+              float intensity_target, size_t xsize, size_t num_threads);
+
+  float* BufSrc(const size_t thread) { return buf_src_.Row(thread); }
+
+  float* BufDst(const size_t thread) { return buf_dst_.Row(thread); }
+
+#if JPEGXL_ENABLE_SKCMS
+  struct SkcmsICC;
+  std::unique_ptr<SkcmsICC> skcms_icc_;
+#else
+  void* lcms_transform_;
+#endif
+
+  ImageF buf_src_;
+  ImageF buf_dst_;
+  float intensity_target_;
+  size_t xsize_;
+  bool skip_lcms_ = false;
+  ExtraTF preprocess_ = ExtraTF::kNone;
+  ExtraTF postprocess_ = ExtraTF::kNone;
+};
+
+// buf_X can either be from BufX() or caller-allocated, interleaved storage.
+// `thread` must be less than the `num_threads` passed to Init.
+// `t` is non-const because buf_* may be modified.
+void DoColorSpaceTransform(ColorSpaceTransform* t, size_t thread,
+                           const float* buf_src, float* buf_dst);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_COLOR_MANAGEMENT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc
new file mode 100644
index 0000000000..f5b25f876a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.cc
@@ -0,0 +1,140 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_comparator.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_gamma_correct.h"
+
+namespace jxl {
+namespace {
+
+// color is linear, but blending happens in gamma-compressed space using
+// (gamma-compressed) grayscale background color, alpha image represents
+// weights of the sRGB colors in the [0 .. (1 << bit_depth) - 1] interval,
+// output image is in linear space.
+void AlphaBlend(const Image3F& in, const size_t c, float background_linear,
+                const ImageF& alpha, Image3F* out) {
+  const float background = LinearToSrgb8Direct(background_linear);
+
+  for (size_t y = 0; y < out->ysize(); ++y) {
+    const float* JXL_RESTRICT row_a = alpha.ConstRow(y);
+    const float* JXL_RESTRICT row_i = in.ConstPlaneRow(c, y);
+    float* JXL_RESTRICT row_o = out->PlaneRow(c, y);
+    for (size_t x = 0; x < out->xsize(); ++x) {
+      const float a = row_a[x];
+      if (a <= 0.f) {
+        row_o[x] = background_linear;
+      } else if (a >= 1.f) {
+        row_o[x] = row_i[x];
+      } else {
+        const float w_fg = a;
+        const float w_bg = 1.0f - w_fg;
+        const float fg = w_fg * LinearToSrgb8Direct(row_i[x]);
+        const float bg = w_bg * background;
+        row_o[x] = Srgb8ToLinearDirect(fg + bg);
+      }
+    }
+  }
+}
+
+const Image3F* AlphaBlend(const ImageBundle& ib, const Image3F& linear,
+                          float background_linear, Image3F* copy) {
+  // No alpha => all opaque.
+  if (!ib.HasAlpha()) return &linear;
+
+  *copy = Image3F(linear.xsize(), linear.ysize());
+  for (size_t c = 0; c < 3; ++c) {
+    AlphaBlend(linear, c, background_linear, ib.alpha(), copy);
+  }
+  return copy;
+}
+
+void AlphaBlend(float background_linear, ImageBundle* io_linear_srgb) {
+  // No alpha => all opaque.
+  if (!io_linear_srgb->HasAlpha()) return;
+
+  for (size_t c = 0; c < 3; ++c) {
+    AlphaBlend(*io_linear_srgb->color(), c, background_linear,
+               *io_linear_srgb->alpha(), io_linear_srgb->color());
+  }
+}
+
+float ComputeScoreImpl(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                       Comparator* comparator, ImageF* distmap) {
+  JXL_CHECK(comparator->SetReferenceImage(rgb0));
+  float score;
+  JXL_CHECK(comparator->CompareWith(rgb1, distmap, &score));
+  return score;
+}
+
+}  // namespace
+
+float ComputeScore(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                   Comparator* comparator, ImageF* diffmap, ThreadPool* pool) {
+  PROFILER_FUNC;
+  // Convert to linear sRGB (unless already in that space)
+  ImageMetadata metadata0 = *rgb0.metadata();
+  ImageBundle store0(&metadata0);
+  const ImageBundle* linear_srgb0;
+  JXL_CHECK(TransformIfNeeded(rgb0, ColorEncoding::LinearSRGB(rgb0.IsGray()),
+                              pool, &store0, &linear_srgb0));
+  ImageMetadata metadata1 = *rgb1.metadata();
+  ImageBundle store1(&metadata1);
+  const ImageBundle* linear_srgb1;
+  JXL_CHECK(TransformIfNeeded(rgb1, ColorEncoding::LinearSRGB(rgb1.IsGray()),
+                              pool, &store1, &linear_srgb1));
+
+  // No alpha: skip blending, only need a single call to Butteraugli.
+  if (!rgb0.HasAlpha() && !rgb1.HasAlpha()) {
+    return ComputeScoreImpl(*linear_srgb0, *linear_srgb1, comparator, diffmap);
+  }
+
+  // Blend on black and white backgrounds
+
+  const float black = 0.0f;
+  ImageBundle blended_black0 = linear_srgb0->Copy();
+  ImageBundle blended_black1 = linear_srgb1->Copy();
+  AlphaBlend(black, &blended_black0);
+  AlphaBlend(black, &blended_black1);
+
+  const float white = 1.0f;
+  ImageBundle blended_white0 = linear_srgb0->Copy();
+  ImageBundle blended_white1 = linear_srgb1->Copy();
+
+  AlphaBlend(white, &blended_white0);
+  AlphaBlend(white, &blended_white1);
+
+  ImageF diffmap_black, diffmap_white;
+  const float dist_black = ComputeScoreImpl(blended_black0, blended_black1,
+                                            comparator, &diffmap_black);
+  const float dist_white = ComputeScoreImpl(blended_white0, blended_white1,
+                                            comparator, &diffmap_white);
+
+  // diffmap and return values are the max of diffmap_black/white.
+  if (diffmap != nullptr) {
+    const size_t xsize = rgb0.xsize();
+    const size_t ysize = rgb0.ysize();
+    *diffmap = ImageF(xsize, ysize);
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* JXL_RESTRICT row_black = diffmap_black.ConstRow(y);
+      const float* JXL_RESTRICT row_white = diffmap_white.ConstRow(y);
+      float* JXL_RESTRICT row_out = diffmap->Row(y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] = std::max(row_black[x], row_white[x]);
+      }
+    }
+  }
+  return std::max(dist_black, dist_white);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.h
new file mode 100644
index 0000000000..e348a4e8eb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_comparator.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_COMPARATOR_H_
+#define LIB_JXL_ENC_COMPARATOR_H_
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+class Comparator {
+ public:
+  virtual ~Comparator() = default;
+
+  // Sets the reference image, the first to compare
+  // Image must be in linear sRGB (gamma expanded) in range 0.0f-1.0f as
+  // the range from standard black point to standard white point, but values
+  // outside permitted.
+  virtual Status SetReferenceImage(const ImageBundle& ref) = 0;
+
+  // Sets the actual image (with loss), the second to compare
+  // Image must be in linear sRGB (gamma expanded) in range 0.0f-1.0f as
+  // the range from standard black point to standard white point, but values
+  // outside permitted.
+  // In diffmap it outputs the local score per pixel, while in score it outputs
+  // a single score. Any one may be set to nullptr to not compute it.
+  virtual Status CompareWith(const ImageBundle& actual, ImageF* diffmap,
+                             float* score) = 0;
+
+  // Quality thresholds for diffmap and score values.
+  // The good score must represent a value where the images are considered to
+  // be perceptually indistinguishable (but not identical)
+  // The bad value must be larger than good to indicate "lower means better"
+  // and smaller than good to indicate "higher means better"
+  virtual float GoodQualityScore() const = 0;
+  virtual float BadQualityScore() const = 0;
+};
+
+// Computes the score given images in any RGB color model, optionally with
+// alpha channel.
+float ComputeScore(const ImageBundle& rgb0, const ImageBundle& rgb1,
+                   Comparator* comparator, ImageF* diffmap = nullptr,
+                   ThreadPool* pool = nullptr);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_COMPARATOR_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc
new file mode 100644
index 0000000000..d7ae8e4a6b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.cc
@@ -0,0 +1,139 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Library to encode the context map.
+
+#include "lib/jxl/enc_context_map.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/entropy_coder.h"
+
+namespace jxl {
+
+namespace {
+
+size_t IndexOf(const std::vector<uint8_t>& v, uint8_t value) {
+  size_t i = 0;
+  for (; i < v.size(); ++i) {
+    if (v[i] == value) return i;
+  }
+  return i;
+}
+
+void MoveToFront(std::vector<uint8_t>* v, size_t index) {
+  uint8_t value = (*v)[index];
+  for (size_t i = index; i != 0; --i) {
+    (*v)[i] = (*v)[i - 1];
+  }
+  (*v)[0] = value;
+}
+
+std::vector<uint8_t> MoveToFrontTransform(const std::vector<uint8_t>& v) {
+  if (v.empty()) return v;
+  uint8_t max_value = *std::max_element(v.begin(), v.end());
+  std::vector<uint8_t> mtf(max_value + 1);
+  for (size_t i = 0; i <= max_value; ++i) mtf[i] = i;
+  std::vector<uint8_t> result(v.size());
+  for (size_t i = 0; i < v.size(); ++i) {
+    size_t index = IndexOf(mtf, v[i]);
+    JXL_ASSERT(index < mtf.size());
+    result[i] = static_cast<uint8_t>(index);
+    MoveToFront(&mtf, index);
+  }
+  return result;
+}
+
+}  // namespace
+
+void EncodeContextMap(const std::vector<uint8_t>& context_map,
+                      size_t num_histograms, BitWriter* writer) {
+  if (num_histograms == 1) {
+    // Simple code
+    writer->Write(1, 1);
+    // 0 bits per entry.
+    writer->Write(2, 0);
+    return;
+  }
+
+  std::vector<uint8_t> transformed_symbols = MoveToFrontTransform(context_map);
+  std::vector<std::vector<Token>> tokens(1), mtf_tokens(1);
+  EntropyEncodingData codes;
+  std::vector<uint8_t> dummy_context_map;
+  for (size_t i = 0; i < context_map.size(); i++) {
+    tokens[0].emplace_back(0, context_map[i]);
+  }
+  for (size_t i = 0; i < transformed_symbols.size(); i++) {
+    mtf_tokens[0].emplace_back(0, transformed_symbols[i]);
+  }
+  HistogramParams params;
+  params.uint_method = HistogramParams::HybridUintMethod::kContextMap;
+  size_t ans_cost = BuildAndEncodeHistograms(
+      params, 1, tokens, &codes, &dummy_context_map, nullptr, 0, nullptr);
+  size_t mtf_cost = BuildAndEncodeHistograms(
+      params, 1, mtf_tokens, &codes, &dummy_context_map, nullptr, 0, nullptr);
+  bool use_mtf = mtf_cost < ans_cost;
+  // Rebuild token list.
+  tokens[0].clear();
+  for (size_t i = 0; i < transformed_symbols.size(); i++) {
+    tokens[0].emplace_back(0,
+                           use_mtf ? transformed_symbols[i] : context_map[i]);
+  }
+  size_t entry_bits = CeilLog2Nonzero(num_histograms);
+  size_t simple_cost = entry_bits * context_map.size();
+  if (entry_bits < 4 && simple_cost < ans_cost && simple_cost < mtf_cost) {
+    writer->Write(1, 1);
+    writer->Write(2, entry_bits);
+    for (size_t i = 0; i < context_map.size(); i++) {
+      writer->Write(entry_bits, context_map[i]);
+    }
+  } else {
+    writer->Write(1, 0);
+    writer->Write(1, use_mtf);  // Use/don't use MTF.
+    BuildAndEncodeHistograms(params, 1, tokens, &codes, &dummy_context_map,
+                             writer, 0, nullptr);
+    WriteTokens(tokens[0], codes, dummy_context_map, writer);
+  }
+}
+
+void EncodeBlockCtxMap(const BlockCtxMap& block_ctx_map, BitWriter* writer,
+                       AuxOut* aux_out) {
+  auto& dct = block_ctx_map.dc_thresholds;
+  auto& qft = block_ctx_map.qf_thresholds;
+  auto& ctx_map = block_ctx_map.ctx_map;
+  BitWriter::Allotment allotment(
+      writer,
+      (dct[0].size() + dct[1].size() + dct[2].size() + qft.size()) * 34 + 1 +
+          4 + 4 + ctx_map.size() * 10 + 1024);
+  if (dct[0].empty() && dct[1].empty() && dct[2].empty() && qft.empty() &&
+      ctx_map.size() == 21 &&
+      std::equal(ctx_map.begin(), ctx_map.end(), jxl::kDefaultCtxMap)) {
+    writer->Write(1, 1);  // default
+    ReclaimAndCharge(writer, &allotment, kLayerAC, aux_out);
+    return;
+  }
+  writer->Write(1, 0);
+  for (int j : {0, 1, 2}) {
+    writer->Write(4, dct[j].size());
+    for (int i : dct[j]) {
+      JXL_CHECK(U32Coder::Write(kDCThresholdDist, PackSigned(i), writer));
+    }
+  }
+  writer->Write(4, qft.size());
+  for (uint32_t i : qft) {
+    JXL_CHECK(U32Coder::Write(kQFThresholdDist, i - 1, writer));
+  }
+  EncodeContextMap(ctx_map, block_ctx_map.num_ctxs, writer);
+  ReclaimAndCharge(writer, &allotment, kLayerAC, aux_out);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.h
new file mode 100644
index 0000000000..7f6c624380
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_context_map.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CONTEXT_MAP_H_
+#define LIB_JXL_ENC_CONTEXT_MAP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+// Max limit is 255 because encoding assumes numbers < 255
+// More clusters can help compression, but makes encode/decode somewhat slower
+static const size_t kClustersLimit = 128;
+
+// Encodes the given context map to the bit stream. The number of different
+// histogram ids is given by num_histograms.
+void EncodeContextMap(const std::vector<uint8_t>& context_map,
+                      size_t num_histograms, BitWriter* writer);
+
+void EncodeBlockCtxMap(const BlockCtxMap& block_ctx_map, BitWriter* writer,
+                       AuxOut* aux_out);
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_CONTEXT_MAP_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc
new file mode 100644
index 0000000000..a2285df362
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.cc
@@ -0,0 +1,620 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_detect_dots.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <cstdio>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_detect_dots.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/linalg.h"
+#include "lib/jxl/optimize.h"
+
+// Set JXL_DEBUG_DOT_DETECT to 1 to enable debugging.
+#ifndef JXL_DEBUG_DOT_DETECT
+#define JXL_DEBUG_DOT_DETECT 0
+#endif
+
+#if JXL_DEBUG_DOT_DETECT
+#include "lib/jxl/aux_out.h"
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+ImageF SumOfSquareDifferences(const Image3F& forig, const Image3F& smooth,
+                              ThreadPool* pool) {
+  const HWY_FULL(float) d;
+  const auto color_coef0 = Set(d, 0.0f);
+  const auto color_coef1 = Set(d, 10.0f);
+  const auto color_coef2 = Set(d, 0.0f);
+
+  ImageF sum_of_squares(forig.xsize(), forig.ysize());
+  RunOnPool(
+      pool, 0, forig.ysize(), ThreadPool::SkipInit(),
+      [&](const int task, const int thread) {
+        const size_t y = static_cast<size_t>(task);
+        const float* JXL_RESTRICT orig_row0 = forig.Plane(0).ConstRow(y);
+        const float* JXL_RESTRICT orig_row1 = forig.Plane(1).ConstRow(y);
+        const float* JXL_RESTRICT orig_row2 = forig.Plane(2).ConstRow(y);
+        const float* JXL_RESTRICT smooth_row0 = smooth.Plane(0).ConstRow(y);
+        const float* JXL_RESTRICT smooth_row1 = smooth.Plane(1).ConstRow(y);
+        const float* JXL_RESTRICT smooth_row2 = smooth.Plane(2).ConstRow(y);
+        float* JXL_RESTRICT sos_row = sum_of_squares.Row(y);
+
+        for (size_t x = 0; x < forig.xsize(); x += Lanes(d)) {
+          auto v0 = Load(d, orig_row0 + x) - Load(d, smooth_row0 + x);
+          auto v1 = Load(d, orig_row1 + x) - Load(d, smooth_row1 + x);
+          auto v2 = Load(d, orig_row2 + x) - Load(d, smooth_row2 + x);
+          v0 *= v0;
+          v1 *= v1;
+          v2 *= v2;
+          v0 *= color_coef0;  // FMA doesn't help here.
+          v1 *= color_coef1;
+          v2 *= color_coef2;
+          const auto sos = v0 + v1 + v2;  // weighted sum of square diffs
+          Store(sos, d, sos_row + x);
+        }
+      },
+      "ComputeEnergyImage");
+  return sum_of_squares;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(SumOfSquareDifferences);  // Local function
+
+const int kEllipseWindowSize = 5;
+
+namespace {
+struct GaussianEllipse {
+  double x;                         // position in x
+  double y;                         // position in y
+  double sigma_x;                   // scale in x
+  double sigma_y;                   // scale in y
+  double angle;                     // ellipse rotation in radians
+  std::array<double, 3> intensity;  // intensity in each channel
+
+  // The following variables do not need to be encoded
+  double l2_loss;  // error after the Gaussian was fit
+  double l1_loss;
+  double ridge_loss;              // the l2_loss plus regularization term
+  double custom_loss;             // experimental custom loss
+  std::array<double, 3> bgColor;  // best background color
+  size_t neg_pixels;  // number of negative pixels when subtracting dot
+  std::array<double, 3> neg_value;  // debt due to channel truncation
+};
+double DotGaussianModel(double dx, double dy, double ct, double st,
+                        double sigma_x, double sigma_y, double intensity) {
+  double rx = ct * dx + st * dy;
+  double ry = -st * dx + ct * dy;
+  double md = (rx * rx / sigma_x) + (ry * ry / sigma_y);
+  double value = intensity * exp(-0.5 * md);
+  return value;
+}
+
+constexpr bool kOptimizeBackground = true;
+
+// Gaussian that smooths noise but preserves dots
+const WeightsSeparable5& WeightsSeparable5Gaussian0_65() {
+  constexpr float w0 = 0.558311f;
+  constexpr float w1 = 0.210395f;
+  constexpr float w2 = 0.010449f;
+  static constexpr WeightsSeparable5 weights = {
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}};
+  return weights;
+}
+
+// (Iterated) Gaussian that removes dots.
+const WeightsSeparable5& WeightsSeparable5Gaussian3() {
+  constexpr float w0 = 0.222338f;
+  constexpr float w1 = 0.210431f;
+  constexpr float w2 = 0.1784f;
+  static constexpr WeightsSeparable5 weights = {
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+      {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}};
+  return weights;
+}
+
+ImageF ComputeEnergyImage(const Image3F& orig, Image3F* smooth,
+                          ThreadPool* pool) {
+  PROFILER_FUNC;
+
+  // Prepare guidance images for dot selection.
+  Image3F forig(orig.xsize(), orig.ysize());
+  Image3F tmp(orig.xsize(), orig.ysize());
+  *smooth = Image3F(orig.xsize(), orig.ysize());
+
+  const auto& weights1 = WeightsSeparable5Gaussian0_65();
+  const auto& weights3 = WeightsSeparable5Gaussian3();
+
+  Separable5_3(orig, Rect(orig), weights1, pool, &forig);
+
+  Separable5_3(orig, Rect(orig), weights3, pool, &tmp);
+  Separable5_3(tmp, Rect(tmp), weights3, pool, smooth);
+
+#if JXL_DEBUG_DOT_DETECT
+  AuxOut aux;
+  aux.debug_prefix = "/tmp/sebastian/";
+  aux.DumpImage("filtered", forig);
+  aux.DumpImage("sm", *smooth);
+#endif
+
+  return HWY_DYNAMIC_DISPATCH(SumOfSquareDifferences)(forig, *smooth, pool);
+}
+
+struct Pixel {
+  int x;
+  int y;
+};
+
+Pixel operator+(const Pixel& a, const Pixel& b) {
+  return Pixel{a.x + b.x, a.y + b.y};
+}
+
+// Maximum area in pixels of a ellipse
+const size_t kMaxCCSize = 1000;
+
+// Extracts a connected component from a Binary image where seed is part
+// of the component
+bool ExtractComponent(ImageF* img, std::vector<Pixel>* pixels,
+                      const Pixel& seed, double threshold) {
+  PROFILER_FUNC;
+  static const std::vector<Pixel> neighbors{{1, -1}, {1, 0},   {1, 1},  {0, -1},
+                                            {0, 1},  {-1, -1}, {-1, 1}, {1, 0}};
+  std::vector<Pixel> q{seed};
+  while (!q.empty()) {
+    Pixel current = q.back();
+    q.pop_back();
+    pixels->push_back(current);
+    if (pixels->size() > kMaxCCSize) return false;
+    for (const Pixel& delta : neighbors) {
+      Pixel child = current + delta;
+      if (child.x >= 0 && static_cast<size_t>(child.x) < img->xsize() &&
+          child.y >= 0 && static_cast<size_t>(child.y) < img->ysize()) {
+        float* value = &img->Row(child.y)[child.x];
+        if (*value > threshold) {
+          *value = 0.0;
+          q.push_back(child);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+inline bool PointInRect(const Rect& r, const Pixel& p) {
+  return (static_cast<size_t>(p.x) >= r.x0() &&
+          static_cast<size_t>(p.x) < (r.x0() + r.xsize()) &&
+          static_cast<size_t>(p.y) >= r.y0() &&
+          static_cast<size_t>(p.y) < (r.y0() + r.ysize()));
+}
+
+struct ConnectedComponent {
+  ConnectedComponent(const Rect& bounds, const std::vector<Pixel>&& pixels)
+      : bounds(bounds), pixels(pixels) {}
+  Rect bounds;
+  std::vector<Pixel> pixels;
+  float maxEnergy;
+  float meanEnergy;
+  float varEnergy;
+  float meanBg;
+  float varBg;
+  float score;
+  Pixel mode;
+
+  void CompStats(const ImageF& energy, int extra) {
+    PROFILER_FUNC;
+    maxEnergy = 0.0;
+    meanEnergy = 0.0;
+    varEnergy = 0.0;
+    meanBg = 0.0;
+    varBg = 0.0;
+    int nIn = 0;
+    int nOut = 0;
+    mode.x = 0;
+    mode.y = 0;
+    for (int sy = -extra; sy < (static_cast<int>(bounds.ysize()) + extra);
+         sy++) {
+      int y = sy + static_cast<int>(bounds.y0());
+      if (y < 0 || static_cast<size_t>(y) >= energy.ysize()) continue;
+      const float* JXL_RESTRICT erow = energy.ConstRow(y);
+      for (int sx = -extra; sx < (static_cast<int>(bounds.xsize()) + extra);
+           sx++) {
+        int x = sx + static_cast<int>(bounds.x0());
+        if (x < 0 || static_cast<size_t>(x) >= energy.xsize()) continue;
+        if (erow[x] > maxEnergy) {
+          maxEnergy = erow[x];
+          mode.x = x;
+          mode.y = y;
+        }
+        if (PointInRect(bounds, Pixel{x, y})) {
+          meanEnergy += erow[x];
+          varEnergy += erow[x] * erow[x];
+          nIn++;
+        } else {
+          meanBg += erow[x];
+          varBg += erow[x] * erow[x];
+          nOut++;
+        }
+      }
+    }
+    meanEnergy = meanEnergy / nIn;
+    meanBg = meanBg / nOut;
+    varEnergy = (varEnergy / nIn) - meanEnergy * meanEnergy;
+    varBg = (varBg / nOut) - meanBg * meanBg;
+    score = (meanEnergy - meanBg) / std::sqrt(varBg);
+  }
+};
+
+Rect BoundingRectangle(const std::vector<Pixel>& pixels) {
+  PROFILER_FUNC;
+  JXL_ASSERT(!pixels.empty());
+  int low_x, high_x, low_y, high_y;
+  low_x = high_x = pixels[0].x;
+  low_y = high_y = pixels[0].y;
+  for (const Pixel& p : pixels) {
+    low_x = std::min(low_x, p.x);
+    high_x = std::max(high_x, p.x);
+    low_y = std::min(low_y, p.y);
+    high_y = std::max(high_y, p.y);
+  }
+  return Rect(low_x, low_y, high_x - low_x + 1, high_y - low_y + 1);
+}
+
+std::vector<ConnectedComponent> FindCC(const ImageF& energy, double t_low,
+                                       double t_high, uint32_t maxWindow,
+                                       double minScore) {
+  PROFILER_FUNC;
+  const int kExtraRect = 4;
+  ImageF img = CopyImage(energy);
+  std::vector<ConnectedComponent> ans;
+  for (size_t y = 0; y < img.ysize(); y++) {
+    float* JXL_RESTRICT row = img.Row(y);
+    for (size_t x = 0; x < img.xsize(); x++) {
+      if (row[x] > t_high) {
+        std::vector<Pixel> pixels;
+        row[x] = 0.0;
+        bool success = ExtractComponent(
+            &img, &pixels, Pixel{static_cast<int>(x), static_cast<int>(y)},
+            t_low);
+        if (!success) continue;
+#if JXL_DEBUG_DOT_DETECT
+        for (size_t i = 0; i < pixels.size(); i++) {
+          fprintf(stderr, "(%d,%d) ", pixels[i].x, pixels[i].y);
+        }
+        fprintf(stderr, "\n");
+#endif  // JXL_DEBUG_DOT_DETECT
+        Rect bounds = BoundingRectangle(pixels);
+        if (bounds.xsize() < maxWindow && bounds.ysize() < maxWindow) {
+          ConnectedComponent cc{bounds, std::move(pixels)};
+          cc.CompStats(energy, kExtraRect);
+          if (cc.score < minScore) continue;
+          JXL_DEBUG(JXL_DEBUG_DOT_DETECT,
+                    "cc mode: (%d,%d), max: %f, bgMean: %f bgVar: "
+                    "%f bound:(%zu,%zu,%zu,%zu)\n",
+                    cc.mode.x, cc.mode.y, cc.maxEnergy, cc.meanEnergy,
+                    cc.varEnergy, cc.bounds.x0(), cc.bounds.y0(),
+                    cc.bounds.xsize(), cc.bounds.ysize());
+          ans.push_back(cc);
+        }
+      }
+    }
+  }
+  return ans;
+}
+
+// TODO (sggonzalez): Adapt this function for the different color spaces or
+// remove it if the color space with the best performance does not need it
+void ComputeDotLosses(GaussianEllipse* ellipse, const ConnectedComponent& cc,
+                      const Image3F& img, const Image3F& background) {
+  PROFILER_FUNC;
+  const int rectBounds = 2;
+  const double kIntensityR = 0.0;   // 0.015;
+  const double kSigmaR = 0.0;       // 0.01;
+  const double kZeroEpsilon = 0.1;  // Tolerance to consider a value negative
+  double ct = cos(ellipse->angle), st = sin(ellipse->angle);
+  const std::array<double, 3> channelGains{1.0, 1.0, 1.0};
+  int N = 0;
+  ellipse->l1_loss = 0.0;
+  ellipse->l2_loss = 0.0;
+  ellipse->neg_pixels = 0;
+  ellipse->neg_value.fill(0.0);
+  double distMeanModeSq = (cc.mode.x - ellipse->x) * (cc.mode.x - ellipse->x) +
+                          (cc.mode.y - ellipse->y) * (cc.mode.y - ellipse->y);
+  ellipse->custom_loss = 0.0;
+  for (int c = 0; c < 3; c++) {
+    for (int sy = -rectBounds;
+         sy < (static_cast<int>(cc.bounds.ysize()) + rectBounds); sy++) {
+      int y = sy + cc.bounds.y0();
+      if (y < 0 || static_cast<size_t>(y) >= img.ysize()) continue;
+      const float* JXL_RESTRICT row = img.ConstPlaneRow(c, y);
+      // bgrow is only used if kOptimizeBackground is false.
+      // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
+      const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(c, y);
+      for (int sx = -rectBounds;
+           sx < (static_cast<int>(cc.bounds.xsize()) + rectBounds); sx++) {
+        int x = sx + cc.bounds.x0();
+        if (x < 0 || static_cast<size_t>(x) >= img.xsize()) continue;
+        double target = row[x];
+        double dotDelta = DotGaussianModel(
+            x - ellipse->x, y - ellipse->y, ct, st, ellipse->sigma_x,
+            ellipse->sigma_y, ellipse->intensity[c]);
+        if (dotDelta > target + kZeroEpsilon) {
+          ellipse->neg_pixels++;
+          ellipse->neg_value[c] += dotDelta - target;
+        }
+        double bkg = kOptimizeBackground ? ellipse->bgColor[c] : bgrow[x];
+        double pred = bkg + dotDelta;
+        double diff = target - pred;
+        double l2 = channelGains[c] * diff * diff;
+        double l1 = channelGains[c] * std::fabs(diff);
+        ellipse->l2_loss += l2;
+        ellipse->l1_loss += l1;
+        double w = DotGaussianModel(x - cc.mode.x, y - cc.mode.y, 1.0, 0.0,
+                                    1.0 + ellipse->sigma_x,
+                                    1.0 + ellipse->sigma_y, 1.0);
+        ellipse->custom_loss += w * l2;
+        N++;
+      }
+    }
+  }
+  ellipse->l2_loss /= N;
+  ellipse->custom_loss /= N;
+  ellipse->custom_loss += 20.0 * distMeanModeSq + ellipse->neg_value[1];
+  ellipse->l1_loss /= N;
+  double ridgeTerm = kSigmaR * ellipse->sigma_x + kSigmaR * ellipse->sigma_y;
+  for (int c = 0; c < 3; c++) {
+    ridgeTerm += kIntensityR * ellipse->intensity[c] * ellipse->intensity[c];
+  }
+  ellipse->ridge_loss = ellipse->l2_loss + ridgeTerm;
+}
+
+GaussianEllipse FitGaussianFast(const ConnectedComponent& cc,
+                                const ImageF& energy, const Image3F& img,
+                                const Image3F& background) {
+  PROFILER_FUNC;
+  constexpr bool leastSqIntensity = true;
+  constexpr double kEpsilon = 1e-6;
+  GaussianEllipse ans;
+  constexpr int kRectBounds = (kEllipseWindowSize >> 1);
+
+  // Compute the 1st and 2nd moments of the CC
+  double sum = 0.0;
+  int N = 0;
+  std::array<double, 3> m1{0.0, 0.0, 0.0};
+  std::array<double, 3> m2{0.0, 0.0, 0.0};
+  std::array<double, 3> color{0.0, 0.0, 0.0};
+  std::array<double, 3> bgColor{0.0, 0.0, 0.0};
+
+  JXL_DEBUG(JXL_DEBUG_DOT_DETECT, "%zu %zu %zu %zu\n", cc.bounds.x0(),
+            cc.bounds.y0(), cc.bounds.xsize(), cc.bounds.ysize());
+  for (int c = 0; c < 3; c++) {
+    color[c] = img.ConstPlaneRow(c, cc.mode.y)[cc.mode.x] -
+               background.ConstPlaneRow(c, cc.mode.y)[cc.mode.x];
+  }
+  double sign = (color[1] > 0) ? 1 : -1;
+  for (int sy = -kRectBounds; sy <= kRectBounds; sy++) {
+    int y = sy + cc.mode.y;
+    if (y < 0 || static_cast<size_t>(y) >= energy.ysize()) continue;
+    const float* JXL_RESTRICT row = img.ConstPlaneRow(1, y);
+    const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(1, y);
+    for (int sx = -kRectBounds; sx <= kRectBounds; sx++) {
+      int x = sx + cc.mode.x;
+      if (x < 0 || static_cast<size_t>(x) >= energy.xsize()) continue;
+      double w = std::max(kEpsilon, sign * (row[x] - bgrow[x]));
+      sum += w;
+
+      m1[0] += w * x;
+      m1[1] += w * y;
+      m2[0] += w * x * x;
+      m2[1] += w * x * y;
+      m2[2] += w * y * y;
+      for (int c = 0; c < 3; c++) {
+        bgColor[c] += background.ConstPlaneRow(c, y)[x];
+      }
+      N++;
+    }
+  }
+  JXL_CHECK(N > 0);
+
+  for (int i = 0; i < 3; i++) {
+    m1[i] /= sum;
+    m2[i] /= sum;
+    bgColor[i] /= N;
+  }
+
+  // Some magic constants
+  constexpr double kSigmaMult = 1.0;
+  constexpr std::array<double, 3> kScaleMult{1.1, 1.1, 1.1};
+
+  // Now set the parameters of the Gaussian
+  ans.x = m1[0];
+  ans.y = m1[1];
+  for (int j = 0; j < 3; j++) {
+    ans.intensity[j] = kScaleMult[j] * color[j];
+  }
+
+  ImageD Sigma(2, 2), D(1, 2), U(2, 2);
+  Sigma.Row(0)[0] = m2[0] - m1[0] * m1[0];
+  Sigma.Row(1)[1] = m2[2] - m1[1] * m1[1];
+  Sigma.Row(0)[1] = Sigma.Row(1)[0] = m2[1] - m1[0] * m1[1];
+  ConvertToDiagonal(Sigma, &D, &U);
+  const double* JXL_RESTRICT d = D.ConstRow(0);
+  const double* JXL_RESTRICT u = U.ConstRow(1);
+  int p1 = 0, p2 = 1;
+  if (d[0] < d[1]) std::swap(p1, p2);
+  ans.sigma_x = kSigmaMult * d[p1];
+  ans.sigma_y = kSigmaMult * d[p2];
+  ans.angle = std::atan2(u[p1], u[p2]);
+  ans.l2_loss = 0.0;
+  ans.bgColor = bgColor;
+  if (leastSqIntensity) {
+    GaussianEllipse* ellipse = &ans;
+    double ct = cos(ans.angle), st = sin(ans.angle);
+    // Estimate intensity with least squares (fixed background)
+    for (int c = 0; c < 3; c++) {
+      double gg = 0.0;
+      double gd = 0.0;
+      int yc = static_cast<int>(cc.mode.y);
+      int xc = static_cast<int>(cc.mode.x);
+      for (int y = yc - kRectBounds; y <= yc + kRectBounds; y++) {
+        if (y < 0 || static_cast<size_t>(y) >= img.ysize()) continue;
+        const float* JXL_RESTRICT row = img.ConstPlaneRow(c, y);
+        const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(c, y);
+        for (int x = xc - kRectBounds; x <= xc + kRectBounds; x++) {
+          if (x < 0 || static_cast<size_t>(x) >= img.xsize()) continue;
+          double target = row[x] - bgrow[x];
+          double gaussian =
+              DotGaussianModel(x - ellipse->x, y - ellipse->y, ct, st,
+                               ellipse->sigma_x, ellipse->sigma_y, 1.0);
+          gg += gaussian * gaussian;
+          gd += gaussian * target;
+        }
+      }
+      ans.intensity[c] = gd / (gg + 1e-6);  // Regularized least squares
+    }
+  }
+  ComputeDotLosses(&ans, cc, img, background);
+  return ans;
+}
+
+GaussianEllipse FitGaussian(const ConnectedComponent& cc, const ImageF& energy,
+                            const Image3F& img, const Image3F& background) {
+  auto ellipse = FitGaussianFast(cc, energy, img, background);
+  if (ellipse.sigma_x < ellipse.sigma_y) {
+    std::swap(ellipse.sigma_x, ellipse.sigma_y);
+    ellipse.angle += kPi / 2.0;
+  }
+  ellipse.angle -= kPi * std::floor(ellipse.angle / kPi);
+  if (fabs(ellipse.angle - kPi) < 1e-6 || fabs(ellipse.angle) < 1e-6) {
+    ellipse.angle = 0.0;
+  }
+  JXL_CHECK(ellipse.angle >= 0 && ellipse.angle <= kPi &&
+            ellipse.sigma_x >= ellipse.sigma_y);
+  JXL_DEBUG(JXL_DEBUG_DOT_DETECT,
+            "Ellipse mu=(%lf,%lf) sigma=(%lf,%lf) angle=%lf "
+            "intensity=(%lf,%lf,%lf) bg=(%lf,%lf,%lf) l2_loss=%lf "
+            "custom_loss=%lf, neg_pix=%zu, neg_v=(%lf,%lf,%lf)\n",
+            ellipse.x, ellipse.y, ellipse.sigma_x, ellipse.sigma_y,
+            ellipse.angle, ellipse.intensity[0], ellipse.intensity[1],
+            ellipse.intensity[2], ellipse.bgColor[0], ellipse.bgColor[1],
+            ellipse.bgColor[2], ellipse.l2_loss, ellipse.custom_loss,
+            ellipse.neg_pixels, ellipse.neg_value[0], ellipse.neg_value[1],
+            ellipse.neg_value[2]);
+  return ellipse;
+}
+
+}  // namespace
+
+std::vector<PatchInfo> DetectGaussianEllipses(
+    const Image3F& opsin, const GaussianDetectParams& params,
+    const EllipseQuantParams& qParams, ThreadPool* pool) {
+  PROFILER_FUNC;
+  std::vector<PatchInfo> dots;
+  Image3F smooth(opsin.xsize(), opsin.ysize());
+  ImageF energy = ComputeEnergyImage(opsin, &smooth, pool);
+#if JXL_DEBUG_DOT_DETECT
+  AuxOut aux;
+  aux.debug_prefix = "/tmp/sebastian/";
+  aux.DumpXybImage("smooth", smooth);
+  aux.DumpPlaneNormalized("energy", energy);
+#endif  // JXL_DEBUG_DOT_DETECT
+  std::vector<ConnectedComponent> components = FindCC(
+      energy, params.t_low, params.t_high, params.maxWinSize, params.minScore);
+  size_t numCC =
+      std::min(params.maxCC, (components.size() * params.percCC) / 100);
+  if (components.size() > numCC) {
+    std::sort(
+        components.begin(), components.end(),
+        [](const ConnectedComponent& a, const ConnectedComponent& b) -> bool {
+          return a.score > b.score;
+        });
+    components.erase(components.begin() + numCC, components.end());
+  }
+  for (const auto& cc : components) {
+    GaussianEllipse ellipse = FitGaussian(cc, energy, opsin, smooth);
+    if (ellipse.x < 0.0 ||
+        std::ceil(ellipse.x) >= static_cast<double>(opsin.xsize()) ||
+        ellipse.y < 0.0 ||
+        std::ceil(ellipse.y) >= static_cast<double>(opsin.ysize())) {
+      continue;
+    }
+    if (ellipse.neg_pixels > params.maxNegPixels) continue;
+    double intensity = 0.21 * ellipse.intensity[0] +
+                       0.72 * ellipse.intensity[1] +
+                       0.07 * ellipse.intensity[2];
+    double intensitySq = intensity * intensity;
+    // for (int c = 0; c < 3; c++) {
+    //  intensitySq += ellipse.intensity[c] * ellipse.intensity[c];
+    //}
+    double sqDistMeanMode = (ellipse.x - cc.mode.x) * (ellipse.x - cc.mode.x) +
+                            (ellipse.y - cc.mode.y) * (ellipse.y - cc.mode.y);
+    if (ellipse.l2_loss < params.maxL2Loss &&
+        ellipse.custom_loss < params.maxCustomLoss &&
+        intensitySq > (params.minIntensity * params.minIntensity) &&
+        sqDistMeanMode < params.maxDistMeanMode * params.maxDistMeanMode) {
+      size_t x0 = cc.bounds.x0();
+      size_t y0 = cc.bounds.y0();
+      dots.emplace_back();
+      dots.back().second.emplace_back(x0, y0);
+      QuantizedPatch& patch = dots.back().first;
+      patch.xsize = cc.bounds.xsize();
+      patch.ysize = cc.bounds.ysize();
+      for (size_t y = 0; y < patch.ysize; y++) {
+        for (size_t x = 0; x < patch.xsize; x++) {
+          for (size_t c = 0; c < 3; c++) {
+            patch.fpixels[c][y * patch.xsize + x] =
+                opsin.ConstPlaneRow(c, y0 + y)[x0 + x] -
+                smooth.ConstPlaneRow(c, y0 + y)[x0 + x];
+          }
+        }
+      }
+    }
+  }
+#if JXL_DEBUG_DOT_DETECT
+  JXL_DEBUG(JXL_DEBUG_DOT_DETECT, "Candidates: %zu, Dots: %zu\n",
+            components.size(), dots.size());
+  ApplyGaussianEllipses(&smooth, dots, 1.0);
+  aux.DumpXybImage("draw", smooth);
+  ApplyGaussianEllipses(&smooth, dots, -1.0);
+
+  auto qdots = QuantizeGaussianEllipses(dots, qParams);
+  auto deq = DequantizeGaussianEllipses(qdots, qParams);
+  ApplyGaussianEllipses(&smooth, deq, 1.0);
+  aux.DumpXybImage("qdraw", smooth);
+  ApplyGaussianEllipses(&smooth, deq, -1.0);
+#endif  // JXL_DEBUG_DOT_DETECT
+  return dots;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.h
new file mode 100644
index 0000000000..6e06a164fd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_detect_dots.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// We attempt to remove dots, or speckle from images using Gaussian blur.
+#ifndef LIB_JXL_ENC_DETECT_DOTS_H_
+#define LIB_JXL_ENC_DETECT_DOTS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+struct GaussianDetectParams {
+  double t_high = 0;  // at least one pixel must have larger energy than t_high
+  double t_low = 0;   // all pixels must have a larger energy than tLow
+  uint32_t maxWinSize = 0;  // discard dots larger than this containing window
+  double maxL2Loss = 0;
+  double maxCustomLoss = 0;
+  double minIntensity = 0;     // If the intensity is too low, discard it
+  double maxDistMeanMode = 0;  // The mean and the mode must be close
+  size_t maxNegPixels = 0;     // Maximum number of negative pixel
+  size_t minScore = 0;
+  size_t maxCC = 50;   // Maximum number of CC to keep
+  size_t percCC = 15;  // Percentage in [0,100] of CC to keep
+};
+
+// Ellipse Quantization Params
+struct EllipseQuantParams {
+  size_t xsize;      // Image size in x
+  size_t ysize;      // Image size in y
+  size_t qPosition;  // Position quantization delta
+  // Quantization for the Gaussian sigma parameters
+  double minSigma;
+  double maxSigma;
+  size_t qSigma;  // number of quantization levels
+  // Quantization for the rotation angle (between -pi and pi)
+  size_t qAngle;
+  // Quantization for the intensity
+  std::array<double, 3> minIntensity;
+  std::array<double, 3> maxIntensity;
+  std::array<size_t, 3> qIntensity;  // number of quantization levels
+  // Extra parameters for the encoding
+  bool subtractQuantized;  // Should we subtract quantized or detected dots?
+  float ytox;
+  float ytob;
+
+  void QuantPositionSize(size_t* xsize, size_t* ysize) const;
+};
+
+// Detects dots in XYB image.
+std::vector<PatchInfo> DetectGaussianEllipses(
+    const Image3F& opsin, const GaussianDetectParams& params,
+    const EllipseQuantParams& qParams, ThreadPool* pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_DETECT_DOTS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc
new file mode 100644
index 0000000000..40440c4aa4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.cc
@@ -0,0 +1,72 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_dot_dictionary.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include <array>
+#include <utility>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_detect_dots.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Private implementation of Dictionary Encode/Decode
+namespace {
+
+/* Quantization constants for Ellipse dots */
+const size_t kEllipsePosQ = 2;        // Quantization level for the position
+const double kEllipseMinSigma = 0.1;  // Minimum sigma value
+const double kEllipseMaxSigma = 3.1;  // Maximum Sigma value
+const size_t kEllipseSigmaQ = 16;     // Number of quantization levels for sigma
+const size_t kEllipseAngleQ = 8;      // Quantization level for the angle
+// TODO: fix these values.
+const std::array<double, 3> kEllipseMinIntensity{-0.05, 0.0, -0.5};
+const std::array<double, 3> kEllipseMaxIntensity{0.05, 1.0, 0.4};
+const std::array<size_t, 3> kEllipseIntensityQ{10, 36, 10};
+}  // namespace
+
+std::vector<PatchInfo> FindDotDictionary(const CompressParams& cparams,
+                                         const Image3F& opsin,
+                                         const ColorCorrelationMap& cmap,
+                                         ThreadPool* pool) {
+  if (ApplyOverride(cparams.dots,
+                    cparams.butteraugli_distance >= kMinButteraugliForDots)) {
+    GaussianDetectParams ellipse_params;
+    ellipse_params.t_high = 0.04;
+    ellipse_params.t_low = 0.02;
+    ellipse_params.maxWinSize = 5;
+    ellipse_params.maxL2Loss = 0.005;
+    ellipse_params.maxCustomLoss = 300;
+    ellipse_params.minIntensity = 0.12;
+    ellipse_params.maxDistMeanMode = 1.0;
+    ellipse_params.maxNegPixels = 0;
+    ellipse_params.minScore = 12.0;
+    ellipse_params.maxCC = 100;
+    ellipse_params.percCC = 100;
+    EllipseQuantParams qParams{
+        opsin.xsize(),      opsin.ysize(),        kEllipsePosQ,
+        kEllipseMinSigma,   kEllipseMaxSigma,     kEllipseSigmaQ,
+        kEllipseAngleQ,     kEllipseMinIntensity, kEllipseMaxIntensity,
+        kEllipseIntensityQ, kEllipsePosQ <= 5,    cmap.YtoXRatio(0),
+        cmap.YtoBRatio(0)};
+
+    return DetectGaussianEllipses(opsin, ellipse_params, qParams, pool);
+  }
+  return {};
+}
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.h
new file mode 100644
index 0000000000..f89791e4b1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_dot_dictionary.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_DOT_DICTIONARY_H_
+#define LIB_JXL_ENC_DOT_DICTIONARY_H_
+
+// Dots are stored in a dictionary to avoid storing similar dots multiple
+// times.
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+std::vector<PatchInfo> FindDotDictionary(const CompressParams& cparams,
+                                         const Image3F& opsin,
+                                         const ColorCorrelationMap& cmap,
+                                         ThreadPool* pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_DOT_DICTIONARY_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc
new file mode 100644
index 0000000000..0946300972
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.cc
@@ -0,0 +1,268 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_entropy_coder.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// Returns number of non-zero coefficients (but skip LLF).
+// We cannot rely on block[] being all-zero bits, so first truncate to integer.
+// Also writes the per-8x8 block nzeros starting at nzeros_pos.
+int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy,
+                            const AcStrategy acs, const size_t covered_blocks,
+                            const size_t log2_covered_blocks,
+                            const int32_t* JXL_RESTRICT block,
+                            const size_t nzeros_stride,
+                            int32_t* JXL_RESTRICT nzeros_pos) {
+  const HWY_CAPPED(int32_t, kBlockDim) di;
+
+  const auto zero = Zero(di);
+  // Add FF..FF for every zero coefficient, negate to get #zeros.
+  auto neg_sum_zero = zero;
+
+  {
+    // Mask sufficient for one row of coefficients.
+    HWY_ALIGN const int32_t
+        llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
+            -1, -1, -1, -1};
+    // First cx=1,2,4 elements are FF..FF, others 0.
+    const int32_t* llf_mask_pos =
+        llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
+
+    // Rows with LLF: mask out the LLF
+    for (size_t y = 0; y < cy; y++) {
+      for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
+        const auto llf_mask = LoadU(di, llf_mask_pos + x);
+
+        // LLF counts as zero so we don't include it in nzeros.
+        const auto coef =
+            AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
+
+        neg_sum_zero += VecFromMask(di, coef == zero);
+      }
+    }
+  }
+
+  // Remaining rows: no mask
+  for (size_t y = cy; y < cy * kBlockDim; y++) {
+    for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
+      const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
+      neg_sum_zero += VecFromMask(di, coef == zero);
+    }
+  }
+
+  // We want area - sum_zero, add because neg_sum_zero is already negated.
+  const int32_t nzeros =
+      int32_t(cx * cy * kDCTBlockSize) + GetLane(SumOfLanes(neg_sum_zero));
+
+  const int32_t shifted_nzeros = static_cast<int32_t>(
+      (nzeros + covered_blocks - 1) >> log2_covered_blocks);
+  // Need non-canonicalized dimensions!
+  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+      nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
+    }
+  }
+
+  return nzeros;
+}
+
+// Specialization for 8x8, where only top-left is LLF/DC.
+// About 1% overall speedup vs. NumNonZeroExceptLLF.
+int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
+                              int32_t* JXL_RESTRICT nzeros_pos) {
+  const HWY_CAPPED(int32_t, kBlockDim) di;
+
+  const auto zero = Zero(di);
+  // Add FF..FF for every zero coefficient, negate to get #zeros.
+  auto neg_sum_zero = zero;
+
+  {
+    // First row has DC, so mask
+    const size_t y = 0;
+    HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
+
+    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
+      const auto dc_mask = Load(di, dc_mask_lanes + x);
+
+      // DC counts as zero so we don't include it in nzeros.
+      const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
+
+      neg_sum_zero += VecFromMask(di, coef == zero);
+    }
+  }
+
+  // Remaining rows: no mask
+  for (size_t y = 1; y < kBlockDim; y++) {
+    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
+      const auto coef = Load(di, &block[y * kBlockDim + x]);
+      neg_sum_zero += VecFromMask(di, coef == zero);
+    }
+  }
+
+  // We want 64 - sum_zero, add because neg_sum_zero is already negated.
+  const int32_t nzeros =
+      int32_t(kDCTBlockSize) + GetLane(SumOfLanes(neg_sum_zero));
+
+  *nzeros_pos = nzeros;
+
+  return nzeros;
+}
+
+// The number of nonzeros of each block is predicted from the top and the left
+// blocks, with opportune scaling to take into account the number of blocks of
+// each strategy.  The predicted number of nonzeros divided by two is used as a
+// context; if this number is above 63, a specific context is used.  If the
+// number of nonzeros of a strategy is above 63, it is written directly using a
+// fixed number of bits (that depends on the size of the strategy).
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+                          const Rect& rect,
+                          const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+                          const AcStrategyImage& ac_strategy,
+                          YCbCrChromaSubsampling cs,
+                          Image3I* JXL_RESTRICT tmp_num_nzeroes,
+                          std::vector<Token>* JXL_RESTRICT output,
+                          const ImageB& qdc, const ImageI& qf,
+                          const BlockCtxMap& block_ctx_map) {
+  const size_t xsize_blocks = rect.xsize();
+  const size_t ysize_blocks = rect.ysize();
+
+  // TODO(user): update the estimate: usually less coefficients are used.
+  output->reserve(output->size() +
+                  3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
+
+  size_t offset[3] = {};
+  const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
+  for (size_t by = 0; by < ysize_blocks; ++by) {
+    size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
+                     by >> cs.VShift(2)};
+    int32_t* JXL_RESTRICT row_nzeros[3] = {
+        tmp_num_nzeroes->PlaneRow(0, sby[0]),
+        tmp_num_nzeroes->PlaneRow(1, sby[1]),
+        tmp_num_nzeroes->PlaneRow(2, sby[2]),
+    };
+    const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
+        sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
+        sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
+        sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
+    };
+    const uint8_t* JXL_RESTRICT row_qdc =
+        qdc.ConstRow(rect.y0() + by) + rect.x0();
+    const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
+    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+      AcStrategy acs = acs_row[bx];
+      if (!acs.IsFirstBlock()) continue;
+      size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
+                       bx >> cs.HShift(2)};
+      size_t cx = acs.covered_blocks_x();
+      size_t cy = acs.covered_blocks_y();
+      const size_t covered_blocks = cx * cy;  // = #LLF coefficients
+      const size_t log2_covered_blocks =
+          Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
+      const size_t size = covered_blocks * kDCTBlockSize;
+
+      CoefficientLayout(&cy, &cx);  // swap cx/cy to canonical order
+
+      for (int c : {1, 0, 2}) {
+        if (sbx[c] << cs.HShift(c) != bx) continue;
+        if (sby[c] << cs.VShift(c) != by) continue;
+        const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];
+
+        int32_t nzeros =
+            (covered_blocks == 1)
+                ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
+                : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
+                                      log2_covered_blocks, block, nzeros_stride,
+                                      row_nzeros[c] + sbx[c]);
+
+        int ord = kStrategyOrder[acs.RawStrategy()];
+        const coeff_order_t* JXL_RESTRICT order =
+            &orders[CoeffOrderOffset(ord, c)];
+
+        int32_t predicted_nzeros =
+            PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
+        size_t block_ctx =
+            block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
+        const int32_t nzero_ctx =
+            block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);
+
+        output->emplace_back(nzero_ctx, nzeros);
+        const size_t histo_offset =
+            block_ctx_map.ZeroDensityContextsOffset(block_ctx);
+        // Skip LLF.
+        size_t prev = (nzeros > static_cast<ssize_t>(size / 16) ? 0 : 1);
+        for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
+          int32_t coeff = block[order[k]];
+          size_t ctx =
+              histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
+                                                log2_covered_blocks, prev);
+          uint32_t u_coeff = PackSigned(coeff);
+          output->emplace_back(ctx, u_coeff);
+          prev = coeff != 0;
+          nzeros -= prev;
+        }
+        JXL_DASSERT(nzeros == 0);
+        offset[c] += size;
+      }
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(TokenizeCoefficients);
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+                          const Rect& rect,
+                          const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+                          const AcStrategyImage& ac_strategy,
+                          YCbCrChromaSubsampling cs,
+                          Image3I* JXL_RESTRICT tmp_num_nzeroes,
+                          std::vector<Token>* JXL_RESTRICT output,
+                          const ImageB& qdc, const ImageI& qf,
+                          const BlockCtxMap& block_ctx_map) {
+  return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)(
+      orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf,
+      block_ctx_map);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.h
new file mode 100644
index 0000000000..7dfc71c726
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_entropy_coder.h
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ENTROPY_CODER_H_
+#define LIB_JXL_ENC_ENTROPY_CODER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"  // BlockCtxMap
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/frame_header.h"  // YCbCrChromaSubsampling
+#include "lib/jxl/image.h"
+
+// Entropy coding and context modeling of DC and AC coefficients, as well as AC
+// strategy and quantization field.
+
+namespace jxl {
+
+// Generate DCT NxN quantized AC values tokens.
+// Only the subset "rect" [in units of blocks] within all images.
+// See also DecodeACVarBlock.
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+                          const Rect& rect,
+                          const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+                          const AcStrategyImage& ac_strategy,
+                          YCbCrChromaSubsampling cs,
+                          Image3I* JXL_RESTRICT tmp_num_nzeroes,
+                          std::vector<Token>* JXL_RESTRICT output,
+                          const ImageB& qdc, const ImageI& qf,
+                          const BlockCtxMap& block_ctx_map);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ENTROPY_CODER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc
new file mode 100644
index 0000000000..f1eb155e71
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.cc
@@ -0,0 +1,283 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_external_image.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+namespace {
+
+// Loads a float in big endian
+float LoadBEFloat(const uint8_t* p) {
+  float value;
+  const uint32_t u = LoadBE32(p);
+  memcpy(&value, &u, 4);
+  return value;
+}
+
+// Loads a float in little endian
+float LoadLEFloat(const uint8_t* p) {
+  float value;
+  const uint32_t u = LoadLE32(p);
+  memcpy(&value, &u, 4);
+  return value;
+}
+
+typedef uint32_t(LoadFuncType)(const uint8_t* p);
+template <LoadFuncType LoadFunc>
+void JXL_INLINE LoadFloatRow(float* JXL_RESTRICT row_out, const uint8_t* in,
+                             float mul, size_t xsize, size_t bytes_per_pixel) {
+  size_t i = 0;
+  for (size_t x = 0; x < xsize; ++x) {
+    row_out[x] = mul * LoadFunc(in + i);
+    i += bytes_per_pixel;
+  }
+}
+
+uint32_t JXL_INLINE Load8(const uint8_t* p) { return *p; }
+
+}  // namespace
+
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+                           size_t ysize, const ColorEncoding& c_current,
+                           bool has_alpha, bool alpha_is_premultiplied,
+                           size_t bits_per_sample, JxlEndianness endianness,
+                           bool flipped_y, ThreadPool* pool, ImageBundle* ib) {
+  if (bits_per_sample < 1 || bits_per_sample > 32) {
+    return JXL_FAILURE("Invalid bits_per_sample value.");
+  }
+  // TODO(deymo): Implement 1-bit per sample as 8 samples per byte. In
+  // any other case we use DivCeil(bits_per_sample, 8) bytes per pixel per
+  // channel.
+  if (bits_per_sample == 1) {
+    return JXL_FAILURE("packed 1-bit per sample is not yet supported");
+  }
+
+  const size_t color_channels = c_current.Channels();
+  const size_t channels = color_channels + has_alpha;
+
+  // bytes_per_channel and bytes_per_pixel are only valid for
+  // bits_per_sample > 1.
+  const size_t bytes_per_channel = DivCeil(bits_per_sample, jxl::kBitsPerByte);
+  const size_t bytes_per_pixel = channels * bytes_per_channel;
+
+  const size_t row_size = xsize * bytes_per_pixel;
+  if (ysize && bytes.size() / ysize < row_size) {
+    return JXL_FAILURE("Buffer size is too small");
+  }
+
+  const bool little_endian =
+      endianness == JXL_LITTLE_ENDIAN ||
+      (endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
+
+  const uint8_t* const in = bytes.data();
+
+  Image3F color(xsize, ysize);
+  ImageF alpha;
+  if (has_alpha) {
+    alpha = ImageF(xsize, ysize);
+  }
+
+  // Matches the old behavior of PackedImage.
+  // TODO(sboukortt): make this a parameter.
+  const bool float_in = bits_per_sample == 32;
+
+  const auto get_y = [flipped_y, ysize](const size_t y) {
+    return flipped_y ? ysize - 1 - y : y;
+  };
+
+  if (float_in) {
+    if (bits_per_sample != 32) {
+      return JXL_FAILURE("non-32-bit float not supported");
+    }
+    for (size_t c = 0; c < color_channels; ++c) {
+      RunOnPool(
+          pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+          [&](const int task, int /*thread*/) {
+            const size_t y = get_y(task);
+            size_t i =
+                row_size * task + (c * bits_per_sample / jxl::kBitsPerByte);
+            float* JXL_RESTRICT row_out = color.PlaneRow(c, y);
+            if (little_endian) {
+              for (size_t x = 0; x < xsize; ++x) {
+                row_out[x] = LoadLEFloat(in + i);
+                i += bytes_per_pixel;
+              }
+            } else {
+              for (size_t x = 0; x < xsize; ++x) {
+                row_out[x] = LoadBEFloat(in + i);
+                i += bytes_per_pixel;
+              }
+            }
+          },
+          "ConvertRGBFloat");
+    }
+  } else {
+    // Multiplier to convert from the integer range to floating point 0-1 range.
+    float mul = 1. / ((1ull << bits_per_sample) - 1);
+    for (size_t c = 0; c < color_channels; ++c) {
+      RunOnPool(
+          pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+          [&](const int task, int /*thread*/) {
+            const size_t y = get_y(task);
+            size_t i = row_size * task + c * bytes_per_channel;
+            float* JXL_RESTRICT row_out = color.PlaneRow(c, y);
+            // TODO(deymo): add bits_per_sample == 1 case here. Also maybe
+            // implement masking if bits_per_sample is not a multiple of 8.
+            if (bits_per_sample <= 8) {
+              LoadFloatRow<Load8>(row_out, in + i, mul, xsize, bytes_per_pixel);
+            } else if (bits_per_sample <= 16) {
+              if (little_endian) {
+                LoadFloatRow<LoadLE16>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              } else {
+                LoadFloatRow<LoadBE16>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              }
+            } else if (bits_per_sample <= 24) {
+              if (little_endian) {
+                LoadFloatRow<LoadLE24>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              } else {
+                LoadFloatRow<LoadBE24>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              }
+            } else {
+              if (little_endian) {
+                LoadFloatRow<LoadLE32>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              } else {
+                LoadFloatRow<LoadBE32>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              }
+            }
+          },
+          "ConvertRGBUint");
+    }
+  }
+
+  if (color_channels == 1) {
+    CopyImageTo(color.Plane(0), &color.Plane(1));
+    CopyImageTo(color.Plane(0), &color.Plane(2));
+  }
+
+  ib->SetFromImage(std::move(color), c_current);
+
+  if (has_alpha) {
+    if (float_in) {
+      if (bits_per_sample != 32) {
+        return JXL_FAILURE("non-32-bit float not supported");
+      }
+      RunOnPool(
+          pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+          [&](const int task, int /*thread*/) {
+            const size_t y = get_y(task);
+            size_t i = row_size * task +
+                       (color_channels * bits_per_sample / jxl::kBitsPerByte);
+            float* JXL_RESTRICT row_out = alpha.Row(y);
+            if (little_endian) {
+              for (size_t x = 0; x < xsize; ++x) {
+                row_out[x] = LoadLEFloat(in + i);
+                i += bytes_per_pixel;
+              }
+            } else {
+              for (size_t x = 0; x < xsize; ++x) {
+                row_out[x] = LoadBEFloat(in + i);
+                i += bytes_per_pixel;
+              }
+            }
+          },
+          "ConvertAlphaFloat");
+    } else {
+      float mul = 1. / ((1ull << bits_per_sample) - 1);
+      RunOnPool(
+          pool, 0, static_cast<uint32_t>(ysize), ThreadPool::SkipInit(),
+          [&](const int task, int /*thread*/) {
+            const size_t y = get_y(task);
+            size_t i = row_size * task + color_channels * bytes_per_channel;
+            float* JXL_RESTRICT row_out = alpha.Row(y);
+            // TODO(deymo): add bits_per_sample == 1 case here. Also maybe
+            // implement masking if bits_per_sample is not a multiple of 8.
+            if (bits_per_sample <= 8) {
+              LoadFloatRow<Load8>(row_out, in + i, mul, xsize, bytes_per_pixel);
+            } else if (bits_per_sample <= 16) {
+              if (little_endian) {
+                LoadFloatRow<LoadLE16>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              } else {
+                LoadFloatRow<LoadBE16>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              }
+            } else if (bits_per_sample <= 24) {
+              if (little_endian) {
+                LoadFloatRow<LoadLE24>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              } else {
+                LoadFloatRow<LoadBE24>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              }
+            } else {
+              if (little_endian) {
+                LoadFloatRow<LoadLE32>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              } else {
+                LoadFloatRow<LoadBE32>(row_out, in + i, mul, xsize,
+                                       bytes_per_pixel);
+              }
+            }
+          },
+          "ConvertAlphaUint");
+    }
+
+    ib->SetAlpha(std::move(alpha), alpha_is_premultiplied);
+  }
+
+  return true;
+}
+
+Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize,
+                           uint32_t ysize, const void* buffer, size_t size,
+                           jxl::ThreadPool* pool,
+                           const jxl::ColorEncoding& c_current,
+                           jxl::ImageBundle* ib) {
+  size_t bitdepth;
+
+  // TODO(zond): Make this accept more than float and uint8/16.
+  if (pixel_format.data_type == JXL_TYPE_FLOAT) {
+    bitdepth = 32;
+  } else if (pixel_format.data_type == JXL_TYPE_UINT8) {
+    bitdepth = 8;
+  } else if (pixel_format.data_type == JXL_TYPE_UINT16) {
+    bitdepth = 16;
+  } else {
+    return JXL_FAILURE("unsupported bitdepth");
+  }
+
+  JXL_RETURN_IF_ERROR(ConvertFromExternal(
+      jxl::Span<const uint8_t>(static_cast<uint8_t*>(const_cast<void*>(buffer)),
+                               size),
+      xsize, ysize, c_current,
+      /*has_alpha=*/pixel_format.num_channels == 2 ||
+          pixel_format.num_channels == 4,
+      /*alpha_is_premultiplied=*/false, bitdepth, pixel_format.endianness,
+      /*flipped_y=*/false, pool, ib));
+  ib->VerifyMetadata();
+
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.h
new file mode 100644
index 0000000000..f943fc54ef
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image.h
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_EXTERNAL_IMAGE_H_
+#define LIB_JXL_ENC_EXTERNAL_IMAGE_H_
+
+// Interleaved image for color transforms and Codec.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "jxl/types.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Return the size in bytes of a given xsize, channels and bits_per_sample
+// interleaved image.
+constexpr size_t RowSize(size_t xsize, size_t channels,
+                         size_t bits_per_sample) {
+  return bits_per_sample == 1
+             ? DivCeil(xsize, kBitsPerByte)
+             : xsize * channels * DivCeil(bits_per_sample, kBitsPerByte);
+}
+
+// Convert an interleaved pixel buffer to the internal ImageBundle
+// representation. This is the opposite of ConvertToExternal().
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+                           size_t ysize, const ColorEncoding& c_current,
+                           bool has_alpha, bool alpha_is_premultiplied,
+                           size_t bits_per_sample, JxlEndianness endianness,
+                           bool flipped_y, ThreadPool* pool, ImageBundle* ib);
+
+Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize,
+                           uint32_t ysize, const void* buffer, size_t size,
+                           jxl::ThreadPool* pool,
+                           const jxl::ColorEncoding& c_current,
+                           jxl::ImageBundle* ib);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_EXTERNAL_IMAGE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image_gbench.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image_gbench.cc
new file mode 100644
index 0000000000..2af942b7f5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image_gbench.cc
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+namespace {
+
+// Encoder case, deinterleaves a buffer.
+void BM_EncExternalImage_ConvertImageRGBA(benchmark::State& state) {
+  const size_t kNumIter = 5;
+  size_t xsize = state.range();
+  size_t ysize = state.range();
+
+  ImageMetadata im;
+  im.SetAlphaBits(8);
+  ImageBundle ib(&im);
+
+  std::vector<uint8_t> interleaved(xsize * ysize * 4);
+
+  for (auto _ : state) {
+    for (size_t i = 0; i < kNumIter; ++i) {
+      JXL_CHECK(ConvertFromExternal(
+          Span<const uint8_t>(interleaved.data(), interleaved.size()), xsize,
+          ysize,
+          /*c_current=*/ColorEncoding::SRGB(),
+          /*has_alpha=*/true,
+          /*alpha_is_premultiplied=*/false,
+          /*bits_per_sample=*/8, JXL_NATIVE_ENDIAN,
+          /*flipped_y=*/false,
+          /*pool=*/nullptr, &ib));
+    }
+  }
+
+  // Pixels per second.
+  state.SetItemsProcessed(kNumIter * state.iterations() * xsize * ysize);
+  state.SetBytesProcessed(kNumIter * state.iterations() * interleaved.size());
+}
+
+BENCHMARK(BM_EncExternalImage_ConvertImageRGBA)
+    ->RangeMultiplier(2)
+    ->Range(256, 2048);
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image_test.cc
new file mode 100644
index 0000000000..3f3ac8988c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_external_image_test.cc
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_external_image.h"
+
+#include <array>
+#include <new>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+
+namespace jxl {
+namespace {
+
+#if !defined(JXL_CRASH_ON_ERROR)
+TEST(ExternalImageTest, InvalidSize) {
+  ImageMetadata im;
+  im.SetAlphaBits(8);
+  ImageBundle ib(&im);
+
+  const uint8_t buf[10 * 100 * 8] = {};
+  EXPECT_FALSE(ConvertFromExternal(
+      Span<const uint8_t>(buf, 10), /*xsize=*/10, /*ysize=*/100,
+      /*c_current=*/ColorEncoding::SRGB(), /*has_alpha=*/true,
+      /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, JXL_BIG_ENDIAN,
+      /*flipped_y=*/false, nullptr, &ib));
+  EXPECT_FALSE(ConvertFromExternal(
+      Span<const uint8_t>(buf, sizeof(buf) - 1), /*xsize=*/10, /*ysize=*/100,
+      /*c_current=*/ColorEncoding::SRGB(), /*has_alpha=*/true,
+      /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, JXL_BIG_ENDIAN,
+      /*flipped_y=*/false, nullptr, &ib));
+  EXPECT_TRUE(
+      ConvertFromExternal(Span<const uint8_t>(buf, sizeof(buf)), /*xsize=*/10,
+                          /*ysize=*/100, /*c_current=*/ColorEncoding::SRGB(),
+                          /*has_alpha=*/true, /*alpha_is_premultiplied=*/false,
+                          /*bits_per_sample=*/16, JXL_BIG_ENDIAN,
+                          /*flipped_y=*/false, nullptr, &ib));
+}
+#endif
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc
new file mode 100644
index 0000000000..16f7670c1a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_fast_heuristics.cc
@@ -0,0 +1,361 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/enc_ac_strategy.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_heuristics.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/gauss_blur.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_fast_heuristics.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+using DF4 = HWY_CAPPED(float, 4);
+DF4 df4;
+HWY_FULL(float) df;
+
+Status Heuristics(PassesEncoderState* enc_state,
+                  ModularFrameEncoder* modular_frame_encoder,
+                  const ImageBundle* linear, Image3F* opsin, ThreadPool* pool,
+                  AuxOut* aux_out) {
+  PROFILER_ZONE("JxlLossyFrameHeuristics uninstrumented");
+  CompressParams& cparams = enc_state->cparams;
+  PassesSharedState& shared = enc_state->shared;
+  const FrameDimensions& frame_dim = enc_state->shared.frame_dim;
+  JXL_CHECK(cparams.butteraugli_distance > 0);
+
+  // TODO(veluca): make this tiled.
+  if (shared.frame_header.loop_filter.gab) {
+    GaborishInverse(opsin, 0.9908511000000001f, pool);
+  }
+  // Compute image of high frequencies by removing a blurred version.
+  // TODO(veluca): certainly can be made faster, and use less memory...
+  constexpr size_t pad = 16;
+  Image3F padded = PadImageMirror(*opsin, pad, pad);
+  // Make the image (X, Y, B-Y)
+  // TODO(veluca): SubtractFrom is not parallel *and* not SIMD-fied.
+  SubtractFrom(padded.Plane(1), &padded.Plane(2));
+  // Ensure that OOB access for CfL does nothing. Not necessary if doing things
+  // properly...
+  Image3F hf(padded.xsize() + 64, padded.ysize());
+  ZeroFillImage(&hf);
+  hf.ShrinkTo(padded.xsize(), padded.ysize());
+  ImageF temp(padded.xsize(), padded.ysize());
+  // TODO(veluca): consider some faster blurring method.
+  auto g = CreateRecursiveGaussian(11.415258091746161);
+  for (size_t c = 0; c < 3; c++) {
+    FastGaussian(g, padded.Plane(c), pool, &temp, &hf.Plane(c));
+    SubtractFrom(padded.Plane(c), &hf.Plane(c));
+  }
+  // TODO(veluca): DC CfL?
+  size_t xcolortiles = DivCeil(frame_dim.xsize_blocks, kColorTileDimInBlocks);
+  size_t ycolortiles = DivCeil(frame_dim.ysize_blocks, kColorTileDimInBlocks);
+  RunOnPool(
+      pool, 0, xcolortiles * ycolortiles, ThreadPool::SkipInit(),
+      [&](size_t tile_id, size_t _) {
+        size_t tx = tile_id % xcolortiles;
+        size_t ty = tile_id / xcolortiles;
+        size_t x0 = tx * kColorTileDim;
+        size_t x1 = std::min(x0 + kColorTileDim, hf.xsize());
+        size_t y0 = ty * kColorTileDim;
+        size_t y1 = std::min(y0 + kColorTileDim, hf.ysize());
+        for (size_t c : {0, 2}) {
+          static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor;
+          auto ca = Zero(df);
+          auto cb = Zero(df);
+          const auto inv_color_factor = Set(df, kInvColorFactor);
+          for (size_t y = y0; y < y1; y++) {
+            const float* row_m = hf.PlaneRow(1, y);
+            const float* row_s = hf.PlaneRow(c, y);
+            for (size_t x = x0; x < x1; x += Lanes(df)) {
+              // color residual = ax + b
+              const auto a = inv_color_factor * Load(df, row_m + x);
+              const auto b = Zero(df) - Load(df, row_s + x);
+              ca = MulAdd(a, a, ca);
+              cb = MulAdd(a, b, cb);
+            }
+          }
+          float best =
+              -GetLane(SumOfLanes(cb)) / (GetLane(SumOfLanes(ca)) + 1e-9f);
+          int8_t& res = (c == 0 ? shared.cmap.ytox_map : shared.cmap.ytob_map)
+                            .Row(ty)[tx];
+          res = std::max(-128.0f, std::min(127.0f, roundf(best)));
+        }
+      },
+      "CfL");
+  Image3F pooled(frame_dim.xsize_padded / 4, frame_dim.ysize_padded / 4);
+  Image3F summed(frame_dim.xsize_padded / 4, frame_dim.ysize_padded / 4);
+  RunOnPool(
+      pool, 0, frame_dim.ysize_padded / 4, ThreadPool::SkipInit(),
+      [&](size_t y, size_t _) {
+        for (size_t c = 0; c < 3; c++) {
+          float* JXL_RESTRICT row_out = pooled.PlaneRow(c, y);
+          float* JXL_RESTRICT row_out_avg = summed.PlaneRow(c, y);
+          const float* JXL_RESTRICT row_in[4];
+          for (size_t iy = 0; iy < 4; iy++) {
+            row_in[iy] = hf.PlaneRow(c, 4 * y + pad + iy);
+          }
+          for (size_t x = 0; x < frame_dim.xsize_padded / 4; x++) {
+            auto max = Zero(df4);
+            auto sum = Zero(df4);
+            for (size_t iy = 0; iy < 4; iy++) {
+              for (size_t ix = 0; ix < 4; ix += Lanes(df4)) {
+                const auto nn = Abs(Load(df4, row_in[iy] + x * 4 + ix + pad));
+                sum += nn;
+                max = IfThenElse(max > nn, max, nn);
+              }
+            }
+            row_out_avg[x] = GetLane(SumOfLanes(sum));
+            row_out[x] = GetLane(MaxOfLanes(max));
+          }
+        }
+      },
+      "MaxPool");
+  // TODO(veluca): better handling of the border
+  // TODO(veluca): consider some faster blurring method.
+  // TODO(veluca): parallelize.
+  // Remove noise from the resulting image.
+  auto g2 = CreateRecursiveGaussian(2.0849544429861884);
+  constexpr size_t pad2 = 16;
+  Image3F summed_pad = PadImageMirror(summed, pad2, pad2);
+  ImageF tmp_out(summed_pad.xsize(), summed_pad.ysize());
+  ImageF tmp2(summed_pad.xsize(), summed_pad.ysize());
+  Image3F pooled_pad = PadImageMirror(pooled, pad2, pad2);
+  for (size_t c = 0; c < 3; c++) {
+    FastGaussian(g2, summed_pad.Plane(c), pool, &tmp2, &tmp_out);
+    const auto unblurred_multiplier = Set(df, 0.5f);
+    for (size_t y = 0; y < summed.ysize(); y++) {
+      float* row = summed.PlaneRow(c, y);
+      const float* row_blur = tmp_out.Row(y + pad2);
+      for (size_t x = 0; x < summed.xsize(); x += Lanes(df)) {
+        const auto b = Load(df, row_blur + x + pad2);
+        const auto o = Load(df, row + x) * unblurred_multiplier;
+        const auto m = IfThenElse(b > o, b, o);
+        Store(m, df, row + x);
+      }
+    }
+  }
+  for (size_t c = 0; c < 3; c++) {
+    FastGaussian(g2, pooled_pad.Plane(c), pool, &tmp2, &tmp_out);
+    const auto unblurred_multiplier = Set(df, 0.5f);
+    for (size_t y = 0; y < pooled.ysize(); y++) {
+      float* row = pooled.PlaneRow(c, y);
+      const float* row_blur = tmp_out.Row(y + pad2);
+      for (size_t x = 0; x < pooled.xsize(); x += Lanes(df)) {
+        const auto b = Load(df, row_blur + x + pad2);
+        const auto o = Load(df, row + x) * unblurred_multiplier;
+        const auto m = IfThenElse(b > o, b, o);
+        Store(m, df, row + x);
+      }
+    }
+  }
+  const static float kChannelMul[3] = {
+      7.9644294909680253f,
+      0.5700000183257159f,
+      0.20267448837597055f,
+  };
+  ImageF pooledhf44(pooled.xsize(), pooled.ysize());
+  for (size_t y = 0; y < pooled.ysize(); y++) {
+    const float* row_in_x = pooled.ConstPlaneRow(0, y);
+    const float* row_in_y = pooled.ConstPlaneRow(1, y);
+    const float* row_in_b = pooled.ConstPlaneRow(2, y);
+    float* row_out = pooledhf44.Row(y);
+    for (size_t x = 0; x < pooled.xsize(); x += Lanes(df)) {
+      auto v = Set(df, kChannelMul[0]) * Load(df, row_in_x + x);
+      v = MulAdd(Set(df, kChannelMul[1]), Load(df, row_in_y + x), v);
+      v = MulAdd(Set(df, kChannelMul[2]), Load(df, row_in_b + x), v);
+      Store(v, df, row_out + x);
+    }
+  }
+  ImageF summedhf44(summed.xsize(), summed.ysize());
+  for (size_t y = 0; y < summed.ysize(); y++) {
+    const float* row_in_x = summed.ConstPlaneRow(0, y);
+    const float* row_in_y = summed.ConstPlaneRow(1, y);
+    const float* row_in_b = summed.ConstPlaneRow(2, y);
+    float* row_out = summedhf44.Row(y);
+    for (size_t x = 0; x < summed.xsize(); x += Lanes(df)) {
+      auto v = Set(df, kChannelMul[0]) * Load(df, row_in_x + x);
+      v = MulAdd(Set(df, kChannelMul[1]), Load(df, row_in_y + x), v);
+      v = MulAdd(Set(df, kChannelMul[2]), Load(df, row_in_b + x), v);
+      Store(v, df, row_out + x);
+    }
+  }
+  aux_out->DumpPlaneNormalized("pooledhf44", pooledhf44);
+  aux_out->DumpPlaneNormalized("summedhf44", summedhf44);
+
+  static const float kDcQuantMul = 0.88170190420916206;
+  static const float kAcQuantMul = 2.5165738934721524;
+
+  float dc_quant = kDcQuantMul * InitialQuantDC(cparams.butteraugli_distance);
+  float ac_quant_base = kAcQuantMul / cparams.butteraugli_distance;
+  ImageF quant_field(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+
+  static_assert(kColorTileDim == 64, "Fix the code below");
+  auto mmacs = [&](size_t bx, size_t by, AcStrategy acs, float& min,
+                   float& max) {
+    min = 1e10;
+    max = 0;
+    for (size_t y = 2 * by; y < 2 * (by + acs.covered_blocks_y()); y++) {
+      const float* row = summedhf44.Row(y);
+      for (size_t x = 2 * bx; x < 2 * (bx + acs.covered_blocks_x()); x++) {
+        min = std::min(min, row[x]);
+        max = std::max(max, row[x]);
+      }
+    }
+  };
+  // Multipliers for allowed range of summedhf44.
+  std::pair<AcStrategy::Type, float> candidates[] = {
+    // The order is such that, in case of ties, 8x8 is favoured over 4x4 which
+    // is favoured over 2x2. Similarly, we prefer square transforms over
+    // same-area rectangular ones.
+    {AcStrategy::Type::DCT2X2, 1.5f},
+    {AcStrategy::Type::DCT4X4, 1.4f},
+    {AcStrategy::Type::DCT4X8, 1.2f},
+    {AcStrategy::Type::DCT8X4, 1.2f},
+    {AcStrategy::Type::AFV0,
+     1.15f},  // doesn't really work with these heuristics
+    {AcStrategy::Type::AFV1, 1.15f},
+    {AcStrategy::Type::AFV2, 1.15f},
+    {AcStrategy::Type::AFV3, 1.15f},
+    {AcStrategy::Type::DCT, 1.0f},
+    {AcStrategy::Type::DCT16X8, 0.8f},
+    {AcStrategy::Type::DCT8X16, 0.8f},
+    {AcStrategy::Type::DCT16X16, 0.2f},
+    {AcStrategy::Type::DCT16X32, 0.2f},
+    {AcStrategy::Type::DCT32X16, 0.2f},
+    {AcStrategy::Type::DCT32X32, 0.2f},
+    {AcStrategy::Type::DCT32X64, 0.1f},
+    {AcStrategy::Type::DCT64X32, 0.1f},
+    {AcStrategy::Type::DCT64X64, 0.04f},
+
+#if 0
+      {AcStrategy::Type::DCT2X2, 1e+10},  {AcStrategy::Type::DCT4X4, 2.0f},
+      {AcStrategy::Type::DCT, 1.0f},      {AcStrategy::Type::DCT16X8, 1.0f},
+      {AcStrategy::Type::DCT8X16, 1.0f},  {AcStrategy::Type::DCT32X8, 1.0f},
+      {AcStrategy::Type::DCT8X32, 1.0f},  {AcStrategy::Type::DCT32X16, 1.0f},
+      {AcStrategy::Type::DCT16X32, 1.0f}, {AcStrategy::Type::DCT64X32, 1.0f},
+      {AcStrategy::Type::DCT32X64, 1.0f}, {AcStrategy::Type::DCT16X16, 1.0f},
+      {AcStrategy::Type::DCT32X32, 1.0f}, {AcStrategy::Type::DCT64X64, 1.0f},
+#endif
+    // TODO(veluca): figure out if we want 4x8 and/or AVF.
+  };
+  float max_range = 1e-8f + 0.5f * std::pow(cparams.butteraugli_distance, 0.5f);
+  // Change quant field and sharpness amounts based on (pooled|summed)hf44, and
+  // compute block sizes.
+  // TODO(veluca): maybe this could be done per group: it would allow choosing
+  // floating blocks better.
+  RunOnPool(
+      pool, 0, xcolortiles * ycolortiles, ThreadPool::SkipInit(),
+      [&](size_t tile_id, size_t _) {
+        size_t tx = tile_id % xcolortiles;
+        size_t ty = tile_id / xcolortiles;
+        size_t x0 = tx * kColorTileDim / kBlockDim;
+        size_t x1 = std::min(x0 + kColorTileDimInBlocks, quant_field.xsize());
+        size_t y0 = ty * kColorTileDim / kBlockDim;
+        size_t y1 = std::min(y0 + kColorTileDimInBlocks, quant_field.ysize());
+        size_t qf_stride = quant_field.PixelsPerRow();
+        size_t epf_stride = shared.epf_sharpness.PixelsPerRow();
+        bool chosen_mask[64] = {};
+        for (size_t y = y0; y < y1; y++) {
+          uint8_t* epf_row = shared.epf_sharpness.Row(y);
+          float* qf_row = quant_field.Row(y);
+          for (size_t x = x0; x < x1; x++) {
+            if (chosen_mask[(y - y0) * 8 + (x - x0)]) continue;
+            // Default to DCT8 just in case something funny happens in the loop
+            // below.
+            AcStrategy::Type best = AcStrategy::DCT;
+            size_t best_covered = 1;
+            float qf = ac_quant_base;
+            for (size_t i = 0; i < sizeof(candidates) / sizeof(*candidates);
+                 i++) {
+              AcStrategy acs = AcStrategy::FromRawStrategy(candidates[i].first);
+              if (y + acs.covered_blocks_y() > y1) continue;
+              if (x + acs.covered_blocks_x() > x1) continue;
+              bool fits = true;
+              for (size_t iy = y; iy < y + acs.covered_blocks_y(); iy++) {
+                for (size_t ix = x; ix < x + acs.covered_blocks_x(); ix++) {
+                  if (chosen_mask[(iy - y0) * 8 + (ix - x0)]) {
+                    fits = false;
+                    break;
+                  }
+                }
+              }
+              if (!fits) continue;
+              float min, max;
+              mmacs(x, y, acs, min, max);
+              if (max - min > max_range * candidates[i].second) continue;
+              size_t cb = acs.covered_blocks_x() * acs.covered_blocks_y();
+              if (cb >= best_covered) {
+                best_covered = cb;
+                best = candidates[i].first;
+                // TODO(veluca): make this better.
+                qf = ac_quant_base /
+                     (3.9312946339134007f + 2.6011435675118082f * min);
+              }
+            }
+            shared.ac_strategy.Set(x, y, best);
+            AcStrategy acs = AcStrategy::FromRawStrategy(best);
+            for (size_t iy = y; iy < y + acs.covered_blocks_y(); iy++) {
+              for (size_t ix = x; ix < x + acs.covered_blocks_x(); ix++) {
+                chosen_mask[(iy - y0) * 8 + (ix - x0)] = 1;
+                qf_row[ix + (iy - y) * qf_stride] = qf;
+              }
+            }
+            // TODO
+            for (size_t iy = y; iy < y + acs.covered_blocks_y(); iy++) {
+              for (size_t ix = x; ix < x + acs.covered_blocks_x(); ix++) {
+                epf_row[ix + (iy - y) * epf_stride] = 4;
+              }
+            }
+          }
+        }
+      },
+      "QF+ACS+EPF");
+  aux_out->DumpPlaneNormalized("qf", quant_field);
+  aux_out->DumpPlaneNormalized("epf", shared.epf_sharpness);
+  DumpAcStrategy(shared.ac_strategy, frame_dim.xsize_padded,
+                 frame_dim.ysize_padded, "acs", aux_out);
+
+  shared.quantizer.SetQuantField(dc_quant, quant_field,
+                                 &shared.raw_quant_field);
+
+  return true;
+}
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(Heuristics);
+Status FastEncoderHeuristics::LossyFrameHeuristics(
+    PassesEncoderState* enc_state, ModularFrameEncoder* modular_frame_encoder,
+    const ImageBundle* linear, Image3F* opsin, ThreadPool* pool,
+    AuxOut* aux_out) {
+  return HWY_DYNAMIC_DISPATCH(Heuristics)(enc_state, modular_frame_encoder,
+                                          linear, opsin, pool, aux_out);
+}
+
+}  // namespace jxl
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc
new file mode 100644
index 0000000000..d4f94c74d7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.cc
@@ -0,0 +1,279 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_file.h"
+
+#include <stddef.h>
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+namespace {
+
+// DC + 'Very Low Frequency'
+PassDefinition progressive_passes_dc_vlf[] = {
+    {/*num_coefficients=*/2, /*shift=*/0, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/4}};
+
+PassDefinition progressive_passes_dc_lf[] = {
+    {/*num_coefficients=*/2, /*shift=*/0, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/4},
+    {/*num_coefficients=*/3, /*shift=*/0, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/2}};
+
+PassDefinition progressive_passes_dc_lf_salient_ac[] = {
+    {/*num_coefficients=*/2, /*shift=*/0, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/4},
+    {/*num_coefficients=*/3, /*shift=*/0, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/2},
+    {/*num_coefficients=*/8, /*shift=*/0, /*salient_only=*/true,
+     /*suitable_for_downsampling_of_at_least=*/0}};
+
+PassDefinition progressive_passes_dc_lf_salient_ac_other_ac[] = {
+    {/*num_coefficients=*/2, /*shift=*/0, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/4},
+    {/*num_coefficients=*/3, /*shift=*/0, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/2},
+    {/*num_coefficients=*/8, /*shift=*/0, /*salient_only=*/true,
+     /*suitable_for_downsampling_of_at_least=*/0},
+    {/*num_coefficients=*/8, /*shift=*/0, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/0}};
+
+PassDefinition progressive_passes_dc_quant_ac_full_ac[] = {
+    {/*num_coefficients=*/8, /*shift=*/1, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/2},
+    {/*num_coefficients=*/8, /*shift=*/0, /*salient_only=*/false,
+     /*suitable_for_downsampling_of_at_least=*/0},
+};
+
+constexpr uint16_t kExifOrientationTag = 274;
+
+// Parses the Exif data just enough to extract any render-impacting info.
+// If the Exif data is invalid or could not be parsed, then it is treated
+// as a no-op.
+// TODO (jon): tag 1 can be used to represent Adobe RGB 1998 if it has value
+// "R03"
+// TODO (jon): set intrinsic dimensions according to
+// https://discourse.wicg.io/t/proposal-exif-image-resolution-auto-and-from-image/4326/24
+void InterpretExif(const PaddedBytes& exif, CodecMetadata* metadata) {
+  if (exif.size() < 12) return;  // not enough bytes for a valid exif blob
+  const uint8_t* t = exif.data();
+  bool bigendian = false;
+  if (LoadLE32(t) == 0x2A004D4D) {
+    bigendian = true;
+  } else if (LoadLE32(t) != 0x002A4949) {
+    return;  // not a valid tiff header
+  }
+  t += 4;
+  uint32_t offset = (bigendian ? LoadBE32(t) : LoadLE32(t));
+  if (exif.size() < 12 + offset + 2 || offset < 8) return;
+  t += offset - 4;
+  uint16_t nb_tags = (bigendian ? LoadBE16(t) : LoadLE16(t));
+  t += 2;
+  while (nb_tags > 0) {
+    if (t + 12 >= exif.data() + exif.size()) return;
+    uint16_t tag = (bigendian ? LoadBE16(t) : LoadLE16(t));
+    t += 2;
+    uint16_t type = (bigendian ? LoadBE16(t) : LoadLE16(t));
+    t += 2;
+    uint32_t count = (bigendian ? LoadBE32(t) : LoadLE32(t));
+    t += 4;
+    uint16_t value = (bigendian ? LoadBE16(t) : LoadLE16(t));
+    t += 4;
+    if (tag == kExifOrientationTag) {
+      if (type == 3 && count == 1) {
+        if (value >= 1 && value <= 8) {
+          metadata->m.orientation = value;
+        }
+      }
+    }
+    nb_tags--;
+  }
+}
+
+Status PrepareCodecMetadataFromIO(const CompressParams& cparams,
+                                  const CodecInOut* io,
+                                  CodecMetadata* metadata) {
+  *metadata = io->metadata;
+  size_t ups = 1;
+  if (cparams.already_downsampled) ups = cparams.resampling;
+
+  JXL_RETURN_IF_ERROR(metadata->size.Set(io->xsize() * ups, io->ysize() * ups));
+
+  // Keep ICC profile in lossless modes because a reconstructed profile may be
+  // slightly different (quantization).
+  // Also keep ICC in JPEG reconstruction mode as we need byte-exact profiles.
+  const bool lossless_modular =
+      cparams.modular_mode && cparams.quality_pair.first == 100.0f;
+  if (!lossless_modular && !io->Main().IsJPEG()) {
+    metadata->m.color_encoding.DecideIfWantICC();
+  }
+
+  metadata->m.xyb_encoded =
+      cparams.color_transform == ColorTransform::kXYB ? true : false;
+
+  InterpretExif(io->blobs.exif, metadata);
+
+  return true;
+}
+
+}  // namespace
+
+Status EncodePreview(const CompressParams& cparams, const ImageBundle& ib,
+                     const CodecMetadata* metadata, ThreadPool* pool,
+                     BitWriter* JXL_RESTRICT writer) {
+  BitWriter preview_writer;
+  // TODO(janwas): also support generating preview by downsampling
+  if (ib.HasColor()) {
+    AuxOut aux_out;
+    PassesEncoderState passes_enc_state;
+    // TODO(lode): check if we want all extra channels and matching xyb_encoded
+    // for the preview, such that using the main ImageMetadata object for
+    // encoding this frame is warrented.
+    FrameInfo frame_info;
+    frame_info.is_preview = true;
+    JXL_RETURN_IF_ERROR(EncodeFrame(cparams, frame_info, metadata, ib,
+                                    &passes_enc_state, pool, &preview_writer,
+                                    &aux_out));
+    preview_writer.ZeroPadToByte();
+  }
+
+  if (preview_writer.BitsWritten() != 0) {
+    writer->ZeroPadToByte();
+    writer->AppendByteAligned(preview_writer);
+  }
+
+  return true;
+}
+
+Status WriteHeaders(CodecMetadata* metadata, BitWriter* writer,
+                    AuxOut* aux_out) {
+  // Marker/signature
+  BitWriter::Allotment allotment(writer, 16);
+  writer->Write(8, 0xFF);
+  writer->Write(8, kCodestreamMarker);
+  ReclaimAndCharge(writer, &allotment, kLayerHeader, aux_out);
+
+  JXL_RETURN_IF_ERROR(
+      WriteSizeHeader(metadata->size, writer, kLayerHeader, aux_out));
+
+  JXL_RETURN_IF_ERROR(
+      WriteImageMetadata(metadata->m, writer, kLayerHeader, aux_out));
+
+  metadata->transform_data.nonserialized_xyb_encoded = metadata->m.xyb_encoded;
+  JXL_RETURN_IF_ERROR(
+      Bundle::Write(metadata->transform_data, writer, kLayerHeader, aux_out));
+
+  return true;
+}
+
+Status EncodeFile(const CompressParams& cparams_orig, const CodecInOut* io,
+                  PassesEncoderState* passes_enc_state, PaddedBytes* compressed,
+                  AuxOut* aux_out, ThreadPool* pool, std::string xclbinPath) {
+  io->CheckMetadata();
+  BitWriter writer;
+
+  CompressParams cparams = cparams_orig;
+  if (io->Main().color_transform != ColorTransform::kNone) {
+    // Set the color transform to YCbCr or XYB if the original image is such.
+    cparams.color_transform = io->Main().color_transform;
+  }
+
+  std::unique_ptr<CodecMetadata> metadata = jxl::make_unique<CodecMetadata>();
+  JXL_RETURN_IF_ERROR(PrepareCodecMetadataFromIO(cparams, io, metadata.get()));
+  JXL_RETURN_IF_ERROR(WriteHeaders(metadata.get(), &writer, aux_out));
+
+  // Only send ICC (at least several hundred bytes) if fields aren't enough.
+  if (metadata->m.color_encoding.WantICC()) {
+    JXL_RETURN_IF_ERROR(WriteICC(metadata->m.color_encoding.ICC(), &writer,
+                                 kLayerHeader, aux_out));
+  }
+
+  if (metadata->m.have_preview) {
+    JXL_RETURN_IF_ERROR(EncodePreview(cparams, io->preview_frame,
+                                      metadata.get(), pool, &writer));
+  }
+
+  // Each frame should start on byte boundaries.
+  writer.ZeroPadToByte();
+
+  if (cparams.progressive_mode || cparams.qprogressive_mode) {
+    if (cparams.saliency_map != nullptr) {
+      passes_enc_state->progressive_splitter.SetSaliencyMap(
+          cparams.saliency_map);
+    }
+    passes_enc_state->progressive_splitter.SetSaliencyThreshold(
+        cparams.saliency_threshold);
+    if (cparams.qprogressive_mode) {
+      passes_enc_state->progressive_splitter.SetProgressiveMode(
+          ProgressiveMode{progressive_passes_dc_quant_ac_full_ac});
+    } else {
+      switch (cparams.saliency_num_progressive_steps) {
+        case 1:
+          passes_enc_state->progressive_splitter.SetProgressiveMode(
+              ProgressiveMode{progressive_passes_dc_vlf});
+          break;
+        case 2:
+          passes_enc_state->progressive_splitter.SetProgressiveMode(
+              ProgressiveMode{progressive_passes_dc_lf});
+          break;
+        case 3:
+          passes_enc_state->progressive_splitter.SetProgressiveMode(
+              ProgressiveMode{progressive_passes_dc_lf_salient_ac});
+          break;
+        case 4:
+          if (cparams.saliency_threshold == 0.0f) {
+            // No need for a 4th pass if saliency-threshold regards everything
+            // as salient.
+            passes_enc_state->progressive_splitter.SetProgressiveMode(
+                ProgressiveMode{progressive_passes_dc_lf_salient_ac});
+          } else {
+            passes_enc_state->progressive_splitter.SetProgressiveMode(
+                ProgressiveMode{progressive_passes_dc_lf_salient_ac_other_ac});
+          }
+          break;
+        default:
+          return JXL_FAILURE("Invalid saliency_num_progressive_steps.");
+      }
+    }
+  }
+  for (size_t i = 0; i < io->frames.size(); i++) {
+    FrameInfo info;
+    info.is_last = i == io->frames.size() - 1;
+    if (io->frames[i].use_for_next_frame) {
+      info.save_as_reference = 1;
+    }
+    JXL_RETURN_IF_ERROR(EncodeFrame(cparams, info, metadata.get(),
+                                    io->frames[i], passes_enc_state, pool,
+                                    &writer, aux_out, xclbinPath));
+  }
+
+  // Clean up passes_enc_state in case it gets reused.
+  for (size_t i = 0; i < 4; i++) {
+    passes_enc_state->shared.dc_frames[i] = Image3F();
+    passes_enc_state->shared.reference_frames[i].storage = ImageBundle();
+  }
+
+  *compressed = std::move(writer).TakeBytes();
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.h
new file mode 100644
index 0000000000..12b5c37b4b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_file.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FILE_H_
+#define LIB_JXL_ENC_FILE_H_
+
+// Facade for JXL encoding.
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+// Write preview from `io`.
+Status EncodePreview(const CompressParams& cparams, const ImageBundle& ib,
+                     const CodecMetadata* metadata, ThreadPool* pool,
+                     BitWriter* JXL_RESTRICT writer);
+
+// Write headers from the CodecMetadata. Also may modify nonserialized_...
+// fields of the metadata.
+Status WriteHeaders(CodecMetadata* metadata, BitWriter* writer,
+                    AuxOut* aux_out);
+
+// Compresses pixels from `io` (given in any ColorEncoding).
+// `io->metadata.m.original` must be set.
+Status EncodeFile(const CompressParams& params, const CodecInOut* io,
+                  PassesEncoderState* passes_enc_state, PaddedBytes* compressed,
+                  AuxOut* aux_out = nullptr, ThreadPool* pool = nullptr,
+                  std::string xclbinPath = "");
+
+// Backwards-compatible interface. Don't use in new code.
+// TODO(deymo): Remove this function once we migrate users to C encoder API.
+struct FrameEncCache {};
+JXL_INLINE Status EncodeFile(const CompressParams& params, const CodecInOut* io,
+                             FrameEncCache* /* unused */,
+                             PaddedBytes* compressed, AuxOut* aux_out = nullptr,
+                             ThreadPool* pool = nullptr, std::string xclbinPath = "") {
+  PassesEncoderState passes_enc_state;
+  return EncodeFile(params, io, &passes_enc_state, compressed, aux_out, pool);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_FILE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_frame.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_frame.cc
new file mode 100644
index 0000000000..40e40d8d59
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_frame.cc
@@ -0,0 +1,1418 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_chroma_from_luma.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+namespace {
+
+void ClusterGroups(PassesEncoderState* enc_state) {
+  if (enc_state->shared.frame_header.passes.num_passes > 1) {
+    // TODO(veluca): implement this for progressive modes.
+    return;
+  }
+  // This only considers pass 0 for now.
+  std::vector<uint8_t> context_map;
+  EntropyEncodingData codes;
+  auto& ac = enc_state->passes[0].ac_tokens;
+  size_t limit = std::ceil(std::sqrt(ac.size()));
+  if (limit == 1) return;
+  size_t num_contexts = enc_state->shared.block_ctx_map.NumACContexts();
+  std::vector<float> costs(ac.size());
+  HistogramParams params;
+  params.uint_method = HistogramParams::HybridUintMethod::kNone;
+  params.lz77_method = HistogramParams::LZ77Method::kNone;
+  params.ans_histogram_strategy =
+      HistogramParams::ANSHistogramStrategy::kApproximate;
+  size_t max = 0;
+  auto token_cost = [&](std::vector<std::vector<Token>>& tokens, size_t num_ctx,
+                        bool estimate = true) {
+    // TODO(veluca): not estimating is very expensive.
+    BitWriter writer;
+    size_t c = BuildAndEncodeHistograms(
+        params, num_ctx, tokens, &codes, &context_map,
+        estimate ? nullptr : &writer, 0, /*aux_out=*/0);
+    if (estimate) return c;
+    for (size_t i = 0; i < tokens.size(); i++) {
+      WriteTokens(tokens[i], codes, context_map, &writer, 0, nullptr);
+    }
+    return writer.BitsWritten();
+  };
+  for (size_t i = 0; i < ac.size(); i++) {
+    std::vector<std::vector<Token>> tokens{ac[i]};
+    costs[i] =
+        token_cost(tokens, enc_state->shared.block_ctx_map.NumACContexts());
+    if (costs[i] > costs[max]) {
+      max = i;
+    }
+  }
+  auto dist = [&](int i, int j) {
+    std::vector<std::vector<Token>> tokens{ac[i], ac[j]};
+    return token_cost(tokens, num_contexts) - costs[i] - costs[j];
+  };
+  std::vector<size_t> out{max};
+  std::vector<size_t> old_map(ac.size());
+  std::vector<float> dists(ac.size());
+  size_t farthest = 0;
+  for (size_t i = 0; i < ac.size(); i++) {
+    if (i == max) continue;
+    dists[i] = dist(max, i);
+    if (dists[i] > dists[farthest]) {
+      farthest = i;
+    }
+  }
+
+  while (dists[farthest] > 0 && out.size() < limit) {
+    out.push_back(farthest);
+    dists[farthest] = 0;
+    enc_state->histogram_idx[farthest] = out.size() - 1;
+    for (size_t i = 0; i < ac.size(); i++) {
+      float d = dist(out.back(), i);
+      if (d < dists[i]) {
+        dists[i] = d;
+        old_map[i] = enc_state->histogram_idx[i];
+        enc_state->histogram_idx[i] = out.size() - 1;
+      }
+      if (dists[i] > dists[farthest]) {
+        farthest = i;
+      }
+    }
+  }
+
+  std::vector<size_t> remap(out.size());
+  std::iota(remap.begin(), remap.end(), 0);
+  for (size_t i = 0; i < enc_state->histogram_idx.size(); i++) {
+    enc_state->histogram_idx[i] = remap[enc_state->histogram_idx[i]];
+  }
+  auto remap_cost = [&](std::vector<size_t> remap) {
+    std::vector<size_t> re_remap(remap.size(), remap.size());
+    size_t r = 0;
+    for (size_t i = 0; i < remap.size(); i++) {
+      if (re_remap[remap[i]] == remap.size()) {
+        re_remap[remap[i]] = r++;
+      }
+      remap[i] = re_remap[remap[i]];
+    }
+    auto tokens = ac;
+    size_t max_hist = 0;
+    for (size_t i = 0; i < tokens.size(); i++) {
+      for (size_t j = 0; j < tokens[i].size(); j++) {
+        size_t hist = remap[enc_state->histogram_idx[i]];
+        tokens[i][j].context += hist * num_contexts;
+        max_hist = std::max(hist + 1, max_hist);
+      }
+    }
+    return token_cost(tokens, max_hist * num_contexts, /*estimate=*/false);
+  };
+
+  for (size_t src = 0; src < out.size(); src++) {
+    float cost = remap_cost(remap);
+    size_t best = src;
+    for (size_t j = src + 1; j < out.size(); j++) {
+      if (remap[src] == remap[j]) continue;
+      auto remap_c = remap;
+      std::replace(remap_c.begin(), remap_c.end(), remap[src], remap[j]);
+      float c = remap_cost(remap_c);
+      if (c < cost) {
+        best = j;
+        cost = c;
+      }
+    }
+    if (src != best) {
+      std::replace(remap.begin(), remap.end(), remap[src], remap[best]);
+    }
+  }
+  std::vector<size_t> re_remap(remap.size(), remap.size());
+  size_t r = 0;
+  for (size_t i = 0; i < remap.size(); i++) {
+    if (re_remap[remap[i]] == remap.size()) {
+      re_remap[remap[i]] = r++;
+    }
+    remap[i] = re_remap[remap[i]];
+  }
+
+  enc_state->shared.num_histograms =
+      *std::max_element(remap.begin(), remap.end()) + 1;
+  for (size_t i = 0; i < enc_state->histogram_idx.size(); i++) {
+    enc_state->histogram_idx[i] = remap[enc_state->histogram_idx[i]];
+  }
+  for (size_t i = 0; i < ac.size(); i++) {
+    for (size_t j = 0; j < ac[i].size(); j++) {
+      ac[i][j].context += enc_state->histogram_idx[i] * num_contexts;
+    }
+  }
+}
+
+uint64_t FrameFlagsFromParams(const CompressParams& cparams) {
+  uint64_t flags = 0;
+
+  const float dist = cparams.butteraugli_distance;
+
+  // We don't add noise at low butteraugli distances because the original
+  // noise is stored within the compressed image and adding noise makes things
+  // worse.
+  if (ApplyOverride(cparams.noise, dist >= kMinButteraugliForNoise) ||
+      cparams.photon_noise_iso > 0) {
+    flags |= FrameHeader::kNoise;
+  }
+
+  if (cparams.progressive_dc > 0 && cparams.modular_mode == false) {
+    flags |= FrameHeader::kUseDcFrame;
+  }
+
+  return flags;
+}
+
+Status LoopFilterFromParams(const CompressParams& cparams,
+                            FrameHeader* JXL_RESTRICT frame_header) {
+  LoopFilter* loop_filter = &frame_header->loop_filter;
+
+  // Gaborish defaults to enabled in Hare or slower.
+  loop_filter->gab = ApplyOverride(
+      cparams.gaborish, cparams.speed_tier <= SpeedTier::kHare &&
+                            frame_header->encoding == FrameEncoding::kVarDCT &&
+                            cparams.decoding_speed_tier < 4);
+
+  if (cparams.epf != -1) {
+    loop_filter->epf_iters = cparams.epf;
+  } else {
+    if (frame_header->encoding == FrameEncoding::kModular) {
+      loop_filter->epf_iters = 0;
+    } else {
+      constexpr float kThresholds[3] = {0.7, 1.5, 4.0};
+      loop_filter->epf_iters = 0;
+      if (cparams.decoding_speed_tier < 3) {
+        for (size_t i = cparams.decoding_speed_tier == 2 ? 1 : 0; i < 3; i++) {
+          if (cparams.butteraugli_distance >= kThresholds[i]) {
+            loop_filter->epf_iters++;
+          }
+        }
+      }
+    }
+  }
+  // Strength of EPF in modular mode.
+  if (frame_header->encoding == FrameEncoding::kModular &&
+      cparams.quality_pair.first < 100) {
+    // TODO(veluca): this formula is nonsense.
+    loop_filter->epf_sigma_for_modular =
+        20.0f * (1.0f - cparams.quality_pair.first / 100);
+  }
+  if (frame_header->encoding == FrameEncoding::kModular &&
+      cparams.lossy_palette) {
+    loop_filter->epf_sigma_for_modular = 1.0f;
+  }
+
+  return true;
+}
+
+Status MakeFrameHeader(const CompressParams& cparams,
+                       const ProgressiveSplitter& progressive_splitter,
+                       const FrameInfo& frame_info, const ImageBundle& ib,
+                       FrameHeader* JXL_RESTRICT frame_header) {
+  frame_header->nonserialized_is_preview = frame_info.is_preview;
+  frame_header->is_last = frame_info.is_last;
+  frame_header->save_before_color_transform =
+      frame_info.save_before_color_transform;
+  frame_header->frame_type = frame_info.frame_type;
+  frame_header->name = ib.name;
+
+  progressive_splitter.InitPasses(&frame_header->passes);
+
+  if (cparams.modular_mode) {
+    frame_header->encoding = FrameEncoding::kModular;
+    frame_header->group_size_shift = cparams.modular_group_size_shift;
+  }
+
+  frame_header->chroma_subsampling = ib.chroma_subsampling;
+  if (ib.IsJPEG()) {
+    // we are transcoding a JPEG, so we don't get to choose
+    frame_header->encoding = FrameEncoding::kVarDCT;
+    frame_header->color_transform = ib.color_transform;
+  } else {
+    frame_header->color_transform = cparams.color_transform;
+    if (!cparams.modular_mode &&
+        (frame_header->chroma_subsampling.MaxHShift() != 0 ||
+         frame_header->chroma_subsampling.MaxVShift() != 0)) {
+      return JXL_FAILURE(
+          "Chroma subsampling is not supported in VarDCT mode when not "
+          "recompressing JPEGs");
+    }
+  }
+
+  frame_header->flags = FrameFlagsFromParams(cparams);
+  // Noise is not supported in the Modular encoder for now.
+  if (frame_header->encoding != FrameEncoding::kVarDCT) {
+    frame_header->UpdateFlag(false, FrameHeader::Flags::kNoise);
+  }
+
+  JXL_RETURN_IF_ERROR(LoopFilterFromParams(cparams, frame_header));
+
+  frame_header->dc_level = frame_info.dc_level;
+  if (frame_header->dc_level > 2) {
+    // With 3 or more progressive_dc frames, the implementation does not yet
+    // work, see enc_cache.cc.
+    return JXL_FAILURE("progressive_dc > 2 is not yet supported");
+  }
+  if (cparams.progressive_dc > 0 &&
+      (cparams.ec_resampling != 1 || cparams.resampling != 1)) {
+    return JXL_FAILURE("Resampling not supported with DC frames");
+  }
+  if (cparams.resampling != 1 && cparams.resampling != 2 &&
+      cparams.resampling != 4 && cparams.resampling != 8) {
+    return JXL_FAILURE("Invalid resampling factor");
+  }
+  if (cparams.ec_resampling != 1 && cparams.ec_resampling != 2 &&
+      cparams.ec_resampling != 4 && cparams.ec_resampling != 8) {
+    return JXL_FAILURE("Invalid ec_resampling factor");
+  }
+  // Resized frames.
+  if (frame_info.frame_type != FrameType::kDCFrame) {
+    frame_header->frame_origin = ib.origin;
+    size_t ups = 1;
+    if (cparams.already_downsampled) ups = cparams.resampling;
+    frame_header->frame_size.xsize = ib.xsize() * ups;
+    frame_header->frame_size.ysize = ib.ysize() * ups;
+    if (ib.origin.x0 != 0 || ib.origin.y0 != 0 ||
+        frame_header->frame_size.xsize != frame_header->default_xsize() ||
+        frame_header->frame_size.ysize != frame_header->default_ysize()) {
+      frame_header->custom_size_or_origin = true;
+    }
+  }
+  // Upsampling.
+  frame_header->upsampling = cparams.resampling;
+  const std::vector<ExtraChannelInfo>& extra_channels =
+      frame_header->nonserialized_metadata->m.extra_channel_info;
+  frame_header->extra_channel_upsampling.clear();
+  frame_header->extra_channel_upsampling.resize(extra_channels.size(),
+                                                cparams.ec_resampling);
+  frame_header->save_as_reference = frame_info.save_as_reference;
+
+  // Set blending-related information.
+  if (ib.blend || frame_header->custom_size_or_origin) {
+    // Set blend_channel to the first alpha channel. These values are only
+    // encoded in case a blend mode involving alpha is used and there are more
+    // than one extra channels.
+    size_t index = 0;
+    if (extra_channels.size() > 1) {
+      for (size_t i = 0; i < extra_channels.size(); i++) {
+        if (extra_channels[i].type == ExtraChannel::kAlpha) {
+          index = i;
+          break;
+        }
+      }
+    }
+    frame_header->blending_info.alpha_channel = index;
+    frame_header->blending_info.mode =
+        ib.blend ? ib.blendmode : BlendMode::kReplace;
+    // previous frames are saved with ID 1.
+    frame_header->blending_info.source = 1;
+    for (size_t i = 0; i < extra_channels.size(); i++) {
+      frame_header->extra_channel_blending_info[i].alpha_channel = index;
+      BlendMode default_blend = ib.blendmode;
+      if (extra_channels[i].type != ExtraChannel::kBlack && i != index) {
+        // K needs to be blended, spot colors and other stuff gets added
+        default_blend = BlendMode::kAdd;
+      }
+      frame_header->extra_channel_blending_info[i].mode =
+          ib.blend ? default_blend : BlendMode::kReplace;
+      frame_header->extra_channel_blending_info[i].source = 1;
+    }
+  }
+
+  frame_header->animation_frame.duration = ib.duration;
+
+  // TODO(veluca): timecode.
+
+  return true;
+}
+
+// Invisible (alpha = 0) pixels tend to be a mess in optimized PNGs.
+// Since they have no visual impact whatsoever, we can replace them with
+// something that compresses better and reduces artifacts near the edges. This
+// does some kind of smooth stuff that seems to work.
+// Replace invisible pixels with a weighted average of the pixel to the left,
+// the pixel to the topright, and non-invisible neighbours.
+// Produces downward-blurry smears, with in the upwards direction only a 1px
+// edge duplication but not more. It would probably be better to smear in all
+// directions. That requires an alpha-weighed convolution with a large enough
+// kernel though, which might be overkill...
+void SimplifyInvisible(Image3F* image, const ImageF& alpha, bool lossless) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      float* JXL_RESTRICT row = image->PlaneRow(c, y);
+      const float* JXL_RESTRICT prow =
+          (y > 0 ? image->PlaneRow(c, y - 1) : nullptr);
+      const float* JXL_RESTRICT nrow =
+          (y + 1 < image->ysize() ? image->PlaneRow(c, y + 1) : nullptr);
+      const float* JXL_RESTRICT a = alpha.Row(y);
+      const float* JXL_RESTRICT pa = (y > 0 ? alpha.Row(y - 1) : nullptr);
+      const float* JXL_RESTRICT na =
+          (y + 1 < image->ysize() ? alpha.Row(y + 1) : nullptr);
+      for (size_t x = 0; x < image->xsize(); ++x) {
+        if (a[x] == 0) {
+          if (lossless) {
+            row[x] = 0;
+            continue;
+          }
+          float d = 0.f;
+          row[x] = 0;
+          if (x > 0) {
+            row[x] += row[x - 1];
+            d++;
+            if (a[x - 1] > 0.f) {
+              row[x] += row[x - 1];
+              d++;
+            }
+          }
+          if (x + 1 < image->xsize()) {
+            if (y > 0) {
+              row[x] += prow[x + 1];
+              d++;
+            }
+            if (a[x + 1] > 0.f) {
+              row[x] += 2.f * row[x + 1];
+              d += 2.f;
+            }
+            if (y > 0 && pa[x + 1] > 0.f) {
+              row[x] += 2.f * prow[x + 1];
+              d += 2.f;
+            }
+            if (y + 1 < image->ysize() && na[x + 1] > 0.f) {
+              row[x] += 2.f * nrow[x + 1];
+              d += 2.f;
+            }
+          }
+          if (y > 0 && pa[x] > 0.f) {
+            row[x] += 2.f * prow[x];
+            d += 2.f;
+          }
+          if (y + 1 < image->ysize() && na[x] > 0.f) {
+            row[x] += 2.f * nrow[x];
+            d += 2.f;
+          }
+          if (d > 1.f) row[x] /= d;
+        }
+      }
+    }
+  }
+}
+
+}  // namespace
+
+class LossyFrameEncoder {
+ public:
+  LossyFrameEncoder(const CompressParams& cparams,
+                    const FrameHeader& frame_header,
+                    PassesEncoderState* JXL_RESTRICT enc_state,
+                    ThreadPool* pool, AuxOut* aux_out)
+      : enc_state_(enc_state), pool_(pool), aux_out_(aux_out) {
+    JXL_CHECK(InitializePassesSharedState(frame_header, &enc_state_->shared,
+                                          /*encoder=*/true));
+    enc_state_->cparams = cparams;
+    enc_state_->passes.clear();
+  }
+
+  Status ComputeEncodingData(const ImageBundle* linear,
+                             Image3F* JXL_RESTRICT opsin, ThreadPool* pool,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             BitWriter* JXL_RESTRICT writer,
+                             FrameHeader* frame_header) {
+    PROFILER_ZONE("ComputeEncodingData uninstrumented");
+    JXL_ASSERT((opsin->xsize() % kBlockDim) == 0 &&
+               (opsin->ysize() % kBlockDim) == 0);
+    PassesSharedState& shared = enc_state_->shared;
+
+    if (!enc_state_->cparams.max_error_mode) {
+      float x_qm_scale_steps[3] = {0.65f, 1.25f, 9.0f};
+      shared.frame_header.x_qm_scale = 1;
+      for (float x_qm_scale_step : x_qm_scale_steps) {
+        if (enc_state_->cparams.butteraugli_distance > x_qm_scale_step) {
+          shared.frame_header.x_qm_scale++;
+        }
+      }
+    }
+
+    JXL_RETURN_IF_ERROR(enc_state_->heuristics->LossyFrameHeuristics(
+        enc_state_, modular_frame_encoder, linear, opsin, pool_, aux_out_));
+
+    InitializePassesEncoder(*opsin, pool_, enc_state_, modular_frame_encoder,
+                            aux_out_);
+
+    enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+    for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+      pass.ac_tokens.resize(shared.frame_dim.num_groups);
+    }
+
+    ComputeAllCoeffOrders(shared.frame_dim);
+    shared.num_histograms = 1;
+
+    const auto tokenize_group_init = [&](const size_t num_threads) {
+      group_caches_.resize(num_threads);
+      return true;
+    };
+    const auto tokenize_group = [&](const int group_index, const int thread) {
+      // Tokenize coefficients.
+      const Rect rect = shared.BlockGroupRect(group_index);
+      for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size();
+           idx_pass++) {
+        JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+        const int32_t* JXL_RESTRICT ac_rows[3] = {
+            enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+            enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+            enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+        };
+        // Ensure group cache is initialized.
+        group_caches_[thread].InitOnce();
+        TokenizeCoefficients(
+            &shared.coeff_orders[idx_pass * shared.coeff_order_size], rect,
+            ac_rows, shared.ac_strategy, frame_header->chroma_subsampling,
+            &group_caches_[thread].num_nzeroes,
+            &enc_state_->passes[idx_pass].ac_tokens[group_index],
+            enc_state_->shared.quant_dc, enc_state_->shared.raw_quant_field,
+            enc_state_->shared.block_ctx_map);
+      }
+    };
+    RunOnPool(pool_, 0, shared.frame_dim.num_groups, tokenize_group_init,
+              tokenize_group, "TokenizeGroup");
+
+    *frame_header = shared.frame_header;
+    return true;
+  }
+
+  Status ComputeJPEGTranscodingData(const jpeg::JPEGData& jpeg_data,
+                                    ModularFrameEncoder* modular_frame_encoder,
+                                    FrameHeader* frame_header) {
+    PROFILER_ZONE("ComputeJPEGTranscodingData uninstrumented");
+    PassesSharedState& shared = enc_state_->shared;
+
+    frame_header->x_qm_scale = 2;
+    frame_header->b_qm_scale = 2;
+
+    FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+
+    const size_t xsize = frame_dim.xsize_padded;
+    const size_t ysize = frame_dim.ysize_padded;
+    const size_t xsize_blocks = frame_dim.xsize_blocks;
+    const size_t ysize_blocks = frame_dim.ysize_blocks;
+
+    // no-op chroma from luma
+    shared.cmap = ColorCorrelationMap(xsize, ysize, false);
+    shared.ac_strategy.FillDCT8();
+    FillImage(uint8_t(0), &shared.epf_sharpness);
+
+    enc_state_->coeffs.clear();
+    enc_state_->coeffs.emplace_back(make_unique<ACImageT<int32_t>>(
+        kGroupDim * kGroupDim, frame_dim.num_groups));
+
+    // convert JPEG quantization table to a Quantizer object
+    float dcquantization[3];
+    std::vector<QuantEncoding> qe(DequantMatrices::kNum,
+                                  QuantEncoding::Library(0));
+
+    auto jpeg_c_map = JpegOrder(frame_header->color_transform,
+                                jpeg_data.components.size() == 1);
+
+    std::vector<int> qt(192);
+    for (size_t c = 0; c < 3; c++) {
+      size_t jpeg_c = jpeg_c_map[c];
+      const int* quant =
+          jpeg_data.quant[jpeg_data.components[jpeg_c].quant_idx].values.data();
+
+      dcquantization[c] = 255 * 8.0f / quant[0];
+      for (size_t y = 0; y < 8; y++) {
+        for (size_t x = 0; x < 8; x++) {
+          // JPEG XL transposes the DCT, JPEG doesn't.
+          qt[c * 64 + 8 * x + y] = quant[8 * y + x];
+        }
+      }
+    }
+    DequantMatricesSetCustomDC(&shared.matrices, dcquantization);
+    float dcquantization_r[3] = {1.0f / dcquantization[0],
+                                 1.0f / dcquantization[1],
+                                 1.0f / dcquantization[2]};
+
+    qe[AcStrategy::Type::DCT] = QuantEncoding::RAW(qt);
+    DequantMatricesSetCustom(&shared.matrices, qe, modular_frame_encoder);
+
+    // Ensure that InvGlobalScale() is 1.
+    shared.quantizer = Quantizer(&shared.matrices, 1, kGlobalScaleDenom);
+    // Recompute MulDC() and InvMulDC().
+    shared.quantizer.RecomputeFromGlobalScale();
+
+    // Per-block dequant scaling should be 1.
+    FillImage(static_cast<int>(shared.quantizer.InvGlobalScale()),
+              &shared.raw_quant_field);
+
+    std::vector<int32_t> scaled_qtable(192);
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t i = 0; i < 64; i++) {
+        scaled_qtable[64 * c + i] =
+            (1 << kCFLFixedPointPrecision) * qt[64 + i] / qt[64 * c + i];
+      }
+    }
+
+    auto jpeg_row = [&](size_t c, size_t y) {
+      return jpeg_data.components[jpeg_c_map[c]].coeffs.data() +
+             jpeg_data.components[jpeg_c_map[c]].width_in_blocks *
+                 kDCTBlockSize * y;
+    };
+
+    Image3F dc = Image3F(xsize_blocks, ysize_blocks);
+    bool DCzero =
+        (shared.frame_header.color_transform == ColorTransform::kYCbCr);
+    // Compute chroma-from-luma for AC (doesn't seem to be useful for DC)
+    if (frame_header->chroma_subsampling.Is444() &&
+        enc_state_->cparams.force_cfl_jpeg_recompression &&
+        jpeg_data.components.size() == 3) {
+      for (size_t c : {0, 2}) {
+        ImageSB* map = (c == 0 ? &shared.cmap.ytox_map : &shared.cmap.ytob_map);
+        const float kScale = kDefaultColorFactor;
+        const int kOffset = 127;
+        const float kBase =
+            c == 0 ? shared.cmap.YtoXRatio(0) : shared.cmap.YtoBRatio(0);
+        const float kZeroThresh =
+            kScale * kZeroBiasDefault[c] *
+            0.9999f;  // just epsilon less for better rounding
+
+        auto process_row = [&](int task, int thread) {
+          size_t ty = task;
+          int8_t* JXL_RESTRICT row_out = map->Row(ty);
+          for (size_t tx = 0; tx < map->xsize(); ++tx) {
+            const size_t y0 = ty * kColorTileDimInBlocks;
+            const size_t x0 = tx * kColorTileDimInBlocks;
+            const size_t y1 = std::min(frame_dim.ysize_blocks,
+                                       (ty + 1) * kColorTileDimInBlocks);
+            const size_t x1 = std::min(frame_dim.xsize_blocks,
+                                       (tx + 1) * kColorTileDimInBlocks);
+            int32_t d_num_zeros[257] = {0};
+            // TODO(veluca): this needs SIMD + fixed point adaptation, and/or
+            // conversion to the new CfL algorithm.
+            for (size_t y = y0; y < y1; ++y) {
+              const int16_t* JXL_RESTRICT row_m = jpeg_row(1, y);
+              const int16_t* JXL_RESTRICT row_s = jpeg_row(c, y);
+              for (size_t x = x0; x < x1; ++x) {
+                for (size_t coeffpos = 1; coeffpos < kDCTBlockSize;
+                     coeffpos++) {
+                  const float scaled_m =
+                      row_m[x * kDCTBlockSize + coeffpos] *
+                      scaled_qtable[64 * c + coeffpos] *
+                      (1.0f / (1 << kCFLFixedPointPrecision));
+                  const float scaled_s =
+                      kScale * row_s[x * kDCTBlockSize + coeffpos] +
+                      (kOffset - kBase * kScale) * scaled_m;
+                  if (std::abs(scaled_m) > 1e-8f) {
+                    float from, to;
+                    if (scaled_m > 0) {
+                      from = (scaled_s - kZeroThresh) / scaled_m;
+                      to = (scaled_s + kZeroThresh) / scaled_m;
+                    } else {
+                      from = (scaled_s + kZeroThresh) / scaled_m;
+                      to = (scaled_s - kZeroThresh) / scaled_m;
+                    }
+                    if (from < 0.0f) {
+                      from = 0.0f;
+                    }
+                    if (to > 255.0f) {
+                      to = 255.0f;
+                    }
+                    // Instead of clamping the both values
+                    // we just check that range is sane.
+                    if (from <= to) {
+                      d_num_zeros[static_cast<int>(std::ceil(from))]++;
+                      d_num_zeros[static_cast<int>(std::floor(to + 1))]--;
+                    }
+                  }
+                }
+              }
+            }
+            int best = 0;
+            int32_t best_sum = 0;
+            FindIndexOfSumMaximum(d_num_zeros, 256, &best, &best_sum);
+            int32_t offset_sum = 0;
+            for (int i = 0; i < 256; ++i) {
+              if (i <= kOffset) {
+                offset_sum += d_num_zeros[i];
+              }
+            }
+            row_out[tx] = 0;
+            if (best_sum > offset_sum + 1) {
+              row_out[tx] = best - kOffset;
+            }
+          }
+        };
+
+        RunOnPool(pool_, 0, map->ysize(), ThreadPool::SkipInit(), process_row,
+                  "FindCorrelation");
+      }
+    }
+    if (!frame_header->chroma_subsampling.Is444()) {
+      ZeroFillImage(&dc);
+      enc_state_->coeffs[0]->ZeroFill();
+    }
+    // JPEG DC is from -1024 to 1023.
+    std::vector<size_t> dc_counts[3] = {};
+    dc_counts[0].resize(2048);
+    dc_counts[1].resize(2048);
+    dc_counts[2].resize(2048);
+    size_t total_dc[3] = {};
+    for (size_t c : {1, 0, 2}) {
+      if (jpeg_data.components.size() == 1 && c != 1) {
+        enc_state_->coeffs[0]->ZeroFillPlane(c);
+        ZeroFillImage(&dc.Plane(c));
+        // Ensure no division by 0.
+        dc_counts[c][1024] = 1;
+        total_dc[c] = 1;
+        continue;
+      }
+      size_t hshift = frame_header->chroma_subsampling.HShift(c);
+      size_t vshift = frame_header->chroma_subsampling.VShift(c);
+      ImageSB& map = (c == 0 ? shared.cmap.ytox_map : shared.cmap.ytob_map);
+      for (size_t group_index = 0; group_index < frame_dim.num_groups;
+           group_index++) {
+        const size_t gx = group_index % frame_dim.xsize_groups;
+        const size_t gy = group_index / frame_dim.xsize_groups;
+        size_t offset = 0;
+        int32_t* JXL_RESTRICT ac =
+            enc_state_->coeffs[0]->PlaneRow(c, group_index, 0).ptr32;
+        for (size_t by = gy * kGroupDimInBlocks;
+             by < ysize_blocks && by < (gy + 1) * kGroupDimInBlocks; ++by) {
+          if ((by >> vshift) << vshift != by) continue;
+          const int16_t* JXL_RESTRICT inputjpeg = jpeg_row(c, by >> vshift);
+          const int16_t* JXL_RESTRICT inputjpegY = jpeg_row(1, by);
+          float* JXL_RESTRICT fdc = dc.PlaneRow(c, by >> vshift);
+          const int8_t* JXL_RESTRICT cm =
+              map.ConstRow(by / kColorTileDimInBlocks);
+          for (size_t bx = gx * kGroupDimInBlocks;
+               bx < xsize_blocks && bx < (gx + 1) * kGroupDimInBlocks; ++bx) {
+            if ((bx >> hshift) << hshift != bx) continue;
+            size_t base = (bx >> hshift) * kDCTBlockSize;
+            int idc;
+            if (DCzero) {
+              idc = inputjpeg[base];
+            } else {
+              idc = inputjpeg[base] + 1024 / qt[c * 64];
+            }
+            dc_counts[c][std::min(static_cast<uint32_t>(idc + 1024),
+                                  uint32_t(2047))]++;
+            total_dc[c]++;
+            fdc[bx >> hshift] = idc * dcquantization_r[c];
+            if (c == 1 || !enc_state_->cparams.force_cfl_jpeg_recompression ||
+                !frame_header->chroma_subsampling.Is444()) {
+              for (size_t y = 0; y < 8; y++) {
+                for (size_t x = 0; x < 8; x++) {
+                  ac[offset + y * 8 + x] = inputjpeg[base + x * 8 + y];
+                }
+              }
+            } else {
+              const int32_t scale =
+                  shared.cmap.RatioJPEG(cm[bx / kColorTileDimInBlocks]);
+
+              for (size_t y = 0; y < 8; y++) {
+                for (size_t x = 0; x < 8; x++) {
+                  int Y = inputjpegY[kDCTBlockSize * bx + x * 8 + y];
+                  int QChroma = inputjpeg[kDCTBlockSize * bx + x * 8 + y];
+                  // Fixed-point multiply of CfL scale with quant table ratio
+                  // first, and Y value second.
+                  int coeff_scale = (scale * scaled_qtable[64 * c + y * 8 + x] +
+                                     (1 << (kCFLFixedPointPrecision - 1))) >>
+                                    kCFLFixedPointPrecision;
+                  int cfl_factor = (Y * coeff_scale +
+                                    (1 << (kCFLFixedPointPrecision - 1))) >>
+                                   kCFLFixedPointPrecision;
+                  int QCR = QChroma - cfl_factor;
+                  ac[offset + y * 8 + x] = QCR;
+                }
+              }
+            }
+            offset += 64;
+          }
+        }
+      }
+    }
+
+    auto& dct = enc_state_->shared.block_ctx_map.dc_thresholds;
+    auto& num_dc_ctxs = enc_state_->shared.block_ctx_map.num_dc_ctxs;
+    enc_state_->shared.block_ctx_map.num_dc_ctxs = 1;
+    for (size_t i = 0; i < 3; i++) {
+      dct[i].clear();
+      int num_thresholds = (CeilLog2Nonzero(total_dc[i]) - 10) / 2;
+      // up to 3 buckets per channel:
+      // dark/medium/bright, yellow/unsat/blue, green/unsat/red
+      num_thresholds = std::min(std::max(num_thresholds, 0), 2);
+      size_t cumsum = 0;
+      size_t cut = total_dc[i] / (num_thresholds + 1);
+      for (int j = 0; j < 2048; j++) {
+        cumsum += dc_counts[i][j];
+        if (cumsum > cut) {
+          dct[i].push_back(j - 1025);
+          cut = total_dc[i] * (dct[i].size() + 1) / (num_thresholds + 1);
+        }
+      }
+      num_dc_ctxs *= dct[i].size() + 1;
+    }
+
+    auto& ctx_map = enc_state_->shared.block_ctx_map.ctx_map;
+    ctx_map.clear();
+    ctx_map.resize(3 * kNumOrders * num_dc_ctxs, 0);
+
+    int lbuckets = (dct[1].size() + 1);
+    for (size_t i = 0; i < num_dc_ctxs; i++) {
+      // up to 9 contexts for luma
+      ctx_map[i] = i / lbuckets;
+      // up to 3 contexts for chroma
+      ctx_map[kNumOrders * num_dc_ctxs + i] =
+          num_dc_ctxs / lbuckets + (i % lbuckets);
+      ctx_map[2 * kNumOrders * num_dc_ctxs + i] =
+          num_dc_ctxs / lbuckets + (i % lbuckets);
+    }
+    enc_state_->shared.block_ctx_map.num_ctxs =
+        *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+
+    enc_state_->histogram_idx.resize(shared.frame_dim.num_groups);
+
+    // disable DC frame for now
+    shared.frame_header.UpdateFlag(false, FrameHeader::kUseDcFrame);
+    auto compute_dc_coeffs = [&](int group_index, int /* thread */) {
+      modular_frame_encoder->AddVarDCTDC(dc, group_index, /*nl_dc=*/false,
+                                         enc_state_);
+      modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/true,
+                                           enc_state_);
+    };
+    RunOnPool(pool_, 0, shared.frame_dim.num_dc_groups, ThreadPool::SkipInit(),
+              compute_dc_coeffs, "Compute DC coeffs");
+
+    // Must happen before WriteFrameHeader!
+    shared.frame_header.UpdateFlag(true, FrameHeader::kSkipAdaptiveDCSmoothing);
+
+    enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+    for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+      pass.ac_tokens.resize(shared.frame_dim.num_groups);
+    }
+
+    JXL_CHECK(enc_state_->passes.size() ==
+              1);  // skipping coeff splitting so need to have only one pass
+
+    ComputeAllCoeffOrders(frame_dim);
+    shared.num_histograms = 1;
+
+    const auto tokenize_group_init = [&](const size_t num_threads) {
+      group_caches_.resize(num_threads);
+      return true;
+    };
+    const auto tokenize_group = [&](const int group_index, const int thread) {
+      // Tokenize coefficients.
+      const Rect rect = shared.BlockGroupRect(group_index);
+      for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size();
+           idx_pass++) {
+        JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+        const int32_t* JXL_RESTRICT ac_rows[3] = {
+            enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+            enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+            enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+        };
+        // Ensure group cache is initialized.
+        group_caches_[thread].InitOnce();
+        TokenizeCoefficients(
+            &shared.coeff_orders[idx_pass * shared.coeff_order_size], rect,
+            ac_rows, shared.ac_strategy, frame_header->chroma_subsampling,
+            &group_caches_[thread].num_nzeroes,
+            &enc_state_->passes[idx_pass].ac_tokens[group_index],
+            enc_state_->shared.quant_dc, enc_state_->shared.raw_quant_field,
+            enc_state_->shared.block_ctx_map);
+      }
+    };
+    RunOnPool(pool_, 0, shared.frame_dim.num_groups, tokenize_group_init,
+              tokenize_group, "TokenizeGroup");
+    *frame_header = shared.frame_header;
+    return true;
+  }
+
+  Status EncodeGlobalDCInfo(const FrameHeader& frame_header,
+                            BitWriter* writer) const {
+    // Encode quantizer DC and global scale.
+    JXL_RETURN_IF_ERROR(
+        enc_state_->shared.quantizer.Encode(writer, kLayerQuant, aux_out_));
+    EncodeBlockCtxMap(enc_state_->shared.block_ctx_map, writer, aux_out_);
+    ColorCorrelationMapEncodeDC(&enc_state_->shared.cmap, writer, kLayerDC,
+                                aux_out_);
+    return true;
+  }
+
+  Status EncodeGlobalACInfo(BitWriter* writer,
+                            ModularFrameEncoder* modular_frame_encoder) {
+    JXL_RETURN_IF_ERROR(DequantMatricesEncode(&enc_state_->shared.matrices,
+                                              writer, kLayerDequantTables,
+                                              aux_out_, modular_frame_encoder));
+    if (enc_state_->cparams.speed_tier <= SpeedTier::kTortoise) {
+      ClusterGroups(enc_state_);
+    }
+    size_t num_histo_bits =
+        CeilLog2Nonzero(enc_state_->shared.frame_dim.num_groups);
+    if (num_histo_bits != 0) {
+      BitWriter::Allotment allotment(writer, num_histo_bits);
+      writer->Write(num_histo_bits, enc_state_->shared.num_histograms - 1);
+      ReclaimAndCharge(writer, &allotment, kLayerAC, aux_out_);
+    }
+
+    for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses();
+         i++) {
+      // Encode coefficient orders.
+      size_t order_bits = 0;
+      JXL_RETURN_IF_ERROR(U32Coder::CanEncode(
+          kOrderEnc, enc_state_->used_orders[i], &order_bits));
+      BitWriter::Allotment allotment(writer, order_bits);
+      JXL_CHECK(U32Coder::Write(kOrderEnc, enc_state_->used_orders[i], writer));
+      ReclaimAndCharge(writer, &allotment, kLayerOrder, aux_out_);
+      EncodeCoeffOrders(
+          enc_state_->used_orders[i],
+          &enc_state_->shared
+               .coeff_orders[i * enc_state_->shared.coeff_order_size],
+          writer, kLayerOrder, aux_out_);
+
+      // Encode histograms.
+      HistogramParams hist_params(
+          enc_state_->cparams.speed_tier,
+          enc_state_->shared.block_ctx_map.NumACContexts());
+      if (enc_state_->cparams.speed_tier > SpeedTier::kTortoise) {
+        hist_params.lz77_method = HistogramParams::LZ77Method::kNone;
+      }
+      if (enc_state_->cparams.decoding_speed_tier >= 1) {
+        hist_params.max_histograms = 6;
+      }
+      BuildAndEncodeHistograms(
+          hist_params,
+          enc_state_->shared.num_histograms *
+              enc_state_->shared.block_ctx_map.NumACContexts(),
+          enc_state_->passes[i].ac_tokens, &enc_state_->passes[i].codes,
+          &enc_state_->passes[i].context_map, writer, kLayerAC, aux_out_);
+    }
+
+    return true;
+  }
+
+  Status EncodeACGroup(size_t pass, size_t group_index, BitWriter* group_code,
+                       AuxOut* local_aux_out) {
+    return EncodeGroupTokenizedCoefficients(
+        group_index, pass, enc_state_->histogram_idx[group_index], *enc_state_,
+        group_code, local_aux_out);
+  }
+
+  PassesEncoderState* State() { return enc_state_; }
+
+ private:
+  void ComputeAllCoeffOrders(const FrameDimensions& frame_dim) {
+    PROFILER_FUNC;
+    enc_state_->used_orders.resize(
+        enc_state_->progressive_splitter.GetNumPasses());
+    for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses();
+         i++) {
+      // No coefficient reordering in Falcon or faster.
+      if (enc_state_->cparams.speed_tier < SpeedTier::kFalcon) {
+        enc_state_->used_orders[i] = ComputeUsedOrders(
+            enc_state_->cparams.speed_tier, enc_state_->shared.ac_strategy,
+            Rect(enc_state_->shared.raw_quant_field));
+      }
+      ComputeCoeffOrder(
+          enc_state_->cparams.speed_tier, *enc_state_->coeffs[i],
+          enc_state_->shared.ac_strategy, frame_dim, enc_state_->used_orders[i],
+          &enc_state_->shared
+               .coeff_orders[i * enc_state_->shared.coeff_order_size]);
+    }
+  }
+
+  template <typename V, typename R>
+  static inline void FindIndexOfSumMaximum(const V* array, const size_t len,
+                                           R* idx, V* sum) {
+    JXL_ASSERT(len > 0);
+    V maxval = 0;
+    V val = 0;
+    R maxidx = 0;
+    for (size_t i = 0; i < len; ++i) {
+      val += array[i];
+      if (val > maxval) {
+        maxval = val;
+        maxidx = i;
+      }
+    }
+    *idx = maxidx;
+    *sum = maxval;
+  }
+
+  PassesEncoderState* JXL_RESTRICT enc_state_;
+  ThreadPool* pool_;
+  AuxOut* aux_out_;
+  std::vector<EncCache> group_caches_;
+};
+
+Status EncodeFrame(const CompressParams& cparams_orig,
+                   const FrameInfo& frame_info, const CodecMetadata* metadata,
+                   const ImageBundle& ib, PassesEncoderState* passes_enc_state,
+                   ThreadPool* pool, BitWriter* writer, AuxOut* aux_out) {
+  ib.VerifyMetadata();
+
+  passes_enc_state->special_frames.clear();
+
+  CompressParams cparams = cparams_orig;
+
+  if (cparams.progressive_dc < 0) {
+    if (cparams.progressive_dc != -1) {
+      return JXL_FAILURE("Invalid progressive DC setting value (%d)",
+                         cparams.progressive_dc);
+    }
+    cparams.progressive_dc = 0;
+    // Enable progressive_dc for lower qualities.
+    if (cparams.butteraugli_distance >=
+        kMinButteraugliDistanceForProgressiveDc) {
+      cparams.progressive_dc = 1;
+    }
+  }
+  if (cparams.ec_resampling < cparams.resampling) {
+    cparams.ec_resampling = cparams.resampling;
+  }
+  if (cparams.resampling > 1) cparams.progressive_dc = 0;
+
+  if (frame_info.dc_level + cparams.progressive_dc > 4) {
+    return JXL_FAILURE("Too many levels of progressive DC");
+  }
+
+  if (cparams.butteraugli_distance != 0 &&
+      cparams.butteraugli_distance < kMinButteraugliDistance) {
+    return JXL_FAILURE("Butteraugli distance is too low (%f)",
+                       cparams.butteraugli_distance);
+  }
+  if (cparams.butteraugli_distance > 0.9f && cparams.modular_mode == false &&
+      cparams.quality_pair.first == 100) {
+    // in case the color image is lossy, make the alpha slightly lossy too
+    cparams.quality_pair.first =
+        std::max(90.f, 99.f - 0.3f * cparams.butteraugli_distance);
+  }
+
+  if (ib.IsJPEG()) {
+    cparams.gaborish = Override::kOff;
+    cparams.epf = 0;
+    cparams.modular_mode = false;
+  }
+
+  if (ib.xsize() == 0 || ib.ysize() == 0) return JXL_FAILURE("Empty image");
+
+  // Assert that this metadata is correctly set up for the compression params,
+  // this should have been done by enc_file.cc
+  JXL_ASSERT(metadata->m.xyb_encoded ==
+             (cparams.color_transform == ColorTransform::kXYB));
+  std::unique_ptr<FrameHeader> frame_header =
+      jxl::make_unique<FrameHeader>(metadata);
+  JXL_RETURN_IF_ERROR(MakeFrameHeader(cparams,
+                                      passes_enc_state->progressive_splitter,
+                                      frame_info, ib, frame_header.get()));
+  // Check that if the codestream header says xyb_encoded, the color_transform
+  // matches the requirement. This is checked from the cparams here, even though
+  // optimally we'd be able to check this against what has actually been written
+  // in the main codestream header, but since ib is a const object and the data
+  // written to the main codestream header is (in modified form) in ib, the
+  // encoder cannot indicate this fact in the ib's metadata.
+  if (cparams_orig.color_transform == ColorTransform::kXYB) {
+    if (frame_header->color_transform != ColorTransform::kXYB) {
+      return JXL_FAILURE(
+          "The color transform of frames must be xyb if the codestream is xyb "
+          "encoded");
+    }
+  } else {
+    if (frame_header->color_transform == ColorTransform::kXYB) {
+      return JXL_FAILURE(
+          "The color transform of frames cannot be xyb if the codestream is "
+          "not xyb encoded");
+    }
+  }
+
+  FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+
+  const size_t num_groups = frame_dim.num_groups;
+
+  Image3F opsin;
+  const ColorEncoding& c_linear = ColorEncoding::LinearSRGB(ib.IsGray());
+  std::unique_ptr<ImageMetadata> metadata_linear =
+      jxl::make_unique<ImageMetadata>();
+  metadata_linear->xyb_encoded =
+      (cparams.color_transform == ColorTransform::kXYB);
+  metadata_linear->color_encoding = c_linear;
+  ImageBundle linear_storage(metadata_linear.get());
+
+  std::vector<AuxOut> aux_outs;
+  // LossyFrameEncoder stores a reference to a std::function<Status(size_t)>
+  // so we need to keep the std::function<Status(size_t)> being referenced
+  // alive while lossy_frame_encoder is used. We could make resize_aux_outs a
+  // lambda type by making LossyFrameEncoder a template instead, but this is
+  // simpler.
+  const std::function<Status(size_t)> resize_aux_outs =
+      [&aux_outs, aux_out](size_t num_threads) -> Status {
+    if (aux_out != nullptr) {
+      size_t old_size = aux_outs.size();
+      for (size_t i = num_threads; i < old_size; i++) {
+        aux_out->Assimilate(aux_outs[i]);
+      }
+      aux_outs.resize(num_threads);
+      // Each thread needs these INPUTS. Don't copy the entire AuxOut
+      // because it may contain stats which would be Assimilated multiple
+      // times below.
+      for (size_t i = old_size; i < aux_outs.size(); i++) {
+        aux_outs[i].dump_image = aux_out->dump_image;
+        aux_outs[i].debug_prefix = aux_out->debug_prefix;
+      }
+    }
+    return true;
+  };
+
+  LossyFrameEncoder lossy_frame_encoder(cparams, *frame_header,
+                                        passes_enc_state, pool, aux_out);
+  std::unique_ptr<ModularFrameEncoder> modular_frame_encoder =
+      jxl::make_unique<ModularFrameEncoder>(*frame_header, cparams);
+
+  const std::vector<ImageF>* extra_channels = &ib.extra_channels();
+  std::vector<ImageF> extra_channels_storage;
+
+  if (ib.IsJPEG()) {
+    JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeJPEGTranscodingData(
+        *ib.jpeg_data, modular_frame_encoder.get(), frame_header.get()));
+  } else if (!lossy_frame_encoder.State()->heuristics->HandlesColorConversion(
+                 cparams, ib) ||
+             frame_header->encoding != FrameEncoding::kVarDCT) {
+    // Allocating a large enough image avoids a copy when padding.
+    opsin =
+        Image3F(RoundUpToBlockDim(ib.xsize()), RoundUpToBlockDim(ib.ysize()));
+    opsin.ShrinkTo(ib.xsize(), ib.ysize());
+
+    const bool want_linear = frame_header->encoding == FrameEncoding::kVarDCT &&
+                             cparams.speed_tier <= SpeedTier::kKitten;
+    const ImageBundle* JXL_RESTRICT ib_or_linear = &ib;
+
+    if (frame_header->color_transform == ColorTransform::kXYB &&
+        frame_info.ib_needs_color_transform) {
+      // linear_storage would only be used by the Butteraugli loop (passing
+      // linear sRGB avoids a color conversion there). Otherwise, don't
+      // fill it to reduce memory usage.
+      ib_or_linear =
+          ToXYB(ib, pool, &opsin, want_linear ? &linear_storage : nullptr);
+    } else {  // RGB or YCbCr: don't do anything (forward YCbCr is not
+              // implemented, this is only used when the input is already in
+              // YCbCr)
+              // If encoding a special DC or reference frame, don't do anything:
+              // input is already in XYB.
+      CopyImageTo(ib.color(), &opsin);
+    }
+    bool lossless = (frame_header->encoding == FrameEncoding::kModular &&
+                     cparams.quality_pair.first == 100);
+    if (ib.HasAlpha() && !ib.AlphaIsPremultiplied() &&
+        !ApplyOverride(cparams.keep_invisible, lossless) &&
+        cparams.ec_resampling == cparams.resampling) {
+      // simplify invisible pixels
+      SimplifyInvisible(&opsin, ib.alpha(), lossless);
+      if (want_linear) {
+        SimplifyInvisible(const_cast<Image3F*>(&ib_or_linear->color()),
+                          ib.alpha(), lossless);
+      }
+    }
+    if (aux_out != nullptr) {
+      JXL_RETURN_IF_ERROR(
+          aux_out->InspectImage3F("enc_frame:OpsinDynamicsImage", opsin));
+    }
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+      PadImageToBlockMultipleInPlace(&opsin);
+      JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData(
+          ib_or_linear, &opsin, pool, modular_frame_encoder.get(), writer,
+          frame_header.get()));
+    } else if (frame_header->upsampling != 1 && !cparams.already_downsampled) {
+      // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+      // after noise, if necessary.
+      DownsampleImage(&opsin, frame_header->upsampling);
+    }
+  } else {
+    JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData(
+        &ib, &opsin, pool, modular_frame_encoder.get(), writer,
+        frame_header.get()));
+  }
+  if (cparams.ec_resampling != 1 && !cparams.already_downsampled) {
+    extra_channels = &extra_channels_storage;
+    for (size_t i = 0; i < ib.extra_channels().size(); i++) {
+      extra_channels_storage.emplace_back(CopyImage(ib.extra_channels()[i]));
+      DownsampleImage(&extra_channels_storage.back(), cparams.ec_resampling);
+    }
+  }
+  // needs to happen *AFTER* VarDCT-ComputeEncodingData.
+  JXL_RETURN_IF_ERROR(modular_frame_encoder->ComputeEncodingData(
+      *frame_header, *ib.metadata(), &opsin, *extra_channels,
+      lossy_frame_encoder.State(), pool, aux_out,
+      /* do_color=*/frame_header->encoding == FrameEncoding::kModular));
+
+  writer->AppendByteAligned(lossy_frame_encoder.State()->special_frames);
+  frame_header->UpdateFlag(
+      lossy_frame_encoder.State()->shared.image_features.patches.HasAny(),
+      FrameHeader::kPatches);
+  frame_header->UpdateFlag(
+      lossy_frame_encoder.State()->shared.image_features.splines.HasAny(),
+      FrameHeader::kSplines);
+  JXL_RETURN_IF_ERROR(WriteFrameHeader(*frame_header, writer, aux_out));
+
+  const size_t num_passes =
+      passes_enc_state->progressive_splitter.GetNumPasses();
+
+  // DC global info + DC groups + AC global info + AC groups *
+  // num_passes.
+  const bool has_ac_global = true;
+  std::vector<BitWriter> group_codes(NumTocEntries(frame_dim.num_groups,
+                                                   frame_dim.num_dc_groups,
+                                                   num_passes, has_ac_global));
+  const size_t global_ac_index = frame_dim.num_dc_groups + 1;
+  const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+  const auto get_output = [&](const size_t index) {
+    return &group_codes[is_small_image ? 0 : index];
+  };
+  auto ac_group_code = [&](size_t pass, size_t group) {
+    return get_output(AcGroupIndex(pass, group, frame_dim.num_groups,
+                                   frame_dim.num_dc_groups, has_ac_global));
+  };
+
+  if (frame_header->flags & FrameHeader::kPatches) {
+    PatchDictionaryEncoder::Encode(
+        lossy_frame_encoder.State()->shared.image_features.patches,
+        get_output(0), kLayerDictionary, aux_out);
+  }
+
+  if (frame_header->flags & FrameHeader::kSplines) {
+    EncodeSplines(lossy_frame_encoder.State()->shared.image_features.splines,
+                  get_output(0), kLayerSplines, HistogramParams(), aux_out);
+  }
+
+  if (frame_header->flags & FrameHeader::kNoise) {
+    EncodeNoise(lossy_frame_encoder.State()->shared.image_features.noise_params,
+                get_output(0), kLayerNoise, aux_out);
+  }
+
+  JXL_RETURN_IF_ERROR(
+      DequantMatricesEncodeDC(&lossy_frame_encoder.State()->shared.matrices,
+                              get_output(0), kLayerDequantTables, aux_out));
+  if (frame_header->encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(
+        lossy_frame_encoder.EncodeGlobalDCInfo(*frame_header, get_output(0)));
+  }
+  JXL_RETURN_IF_ERROR(
+      modular_frame_encoder->EncodeGlobalInfo(get_output(0), aux_out));
+  JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream(
+      get_output(0), aux_out, kLayerModularGlobal, ModularStreamId::Global()));
+
+  const auto process_dc_group = [&](const int group_index, const int thread) {
+    AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+    BitWriter* output = get_output(group_index + 1);
+    if (frame_header->encoding == FrameEncoding::kVarDCT &&
+        !(frame_header->flags & FrameHeader::kUseDcFrame)) {
+      BitWriter::Allotment allotment(output, 2);
+      output->Write(2, modular_frame_encoder->extra_dc_precision[group_index]);
+      ReclaimAndCharge(output, &allotment, kLayerDC, my_aux_out);
+      JXL_CHECK(modular_frame_encoder->EncodeStream(
+          output, my_aux_out, kLayerDC,
+          ModularStreamId::VarDCTDC(group_index)));
+    }
+    JXL_CHECK(modular_frame_encoder->EncodeStream(
+        output, my_aux_out, kLayerModularDcGroup,
+        ModularStreamId::ModularDC(group_index)));
+    if (frame_header->encoding == FrameEncoding::kVarDCT) {
+      const Rect& rect =
+          lossy_frame_encoder.State()->shared.DCGroupRect(group_index);
+      size_t nb_bits = CeilLog2Nonzero(rect.xsize() * rect.ysize());
+      if (nb_bits != 0) {
+        BitWriter::Allotment allotment(output, nb_bits);
+        output->Write(nb_bits,
+                      modular_frame_encoder->ac_metadata_size[group_index] - 1);
+        ReclaimAndCharge(output, &allotment, kLayerControlFields, my_aux_out);
+      }
+      JXL_CHECK(modular_frame_encoder->EncodeStream(
+          output, my_aux_out, kLayerControlFields,
+          ModularStreamId::ACMetadata(group_index)));
+    }
+  };
+  RunOnPool(pool, 0, frame_dim.num_dc_groups, resize_aux_outs, process_dc_group,
+            "EncodeDCGroup");
+
+  if (frame_header->encoding == FrameEncoding::kVarDCT) {
+    JXL_RETURN_IF_ERROR(lossy_frame_encoder.EncodeGlobalACInfo(
+        get_output(global_ac_index), modular_frame_encoder.get()));
+  }
+
+  std::atomic<int> num_errors{0};
+  const auto process_group = [&](const int group_index, const int thread) {
+    AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+
+    for (size_t i = 0; i < num_passes; i++) {
+      if (frame_header->encoding == FrameEncoding::kVarDCT) {
+        if (!lossy_frame_encoder.EncodeACGroup(
+                i, group_index, ac_group_code(i, group_index), my_aux_out)) {
+          num_errors.fetch_add(1, std::memory_order_relaxed);
+          return;
+        }
+      }
+      // Write all modular encoded data (color?, alpha, depth, extra channels)
+      if (!modular_frame_encoder->EncodeStream(
+              ac_group_code(i, group_index), my_aux_out, kLayerModularAcGroup,
+              ModularStreamId::ModularAC(group_index, i))) {
+        num_errors.fetch_add(1, std::memory_order_relaxed);
+        return;
+      }
+    }
+  };
+  RunOnPool(pool, 0, num_groups, resize_aux_outs, process_group,
+            "EncodeGroupCoefficients");
+
+  // Resizing aux_outs to 0 also Assimilates the array.
+  static_cast<void>(resize_aux_outs(0));
+  JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+  for (BitWriter& bw : group_codes) {
+    bw.ZeroPadToByte();  // end of group.
+  }
+
+  std::vector<coeff_order_t>* permutation_ptr = nullptr;
+  std::vector<coeff_order_t> permutation;
+  if (cparams.centerfirst && !(num_passes == 1 && num_groups == 1)) {
+    permutation_ptr = &permutation;
+    // Don't permute global DC/AC or DC.
+    permutation.resize(global_ac_index + 1);
+    std::iota(permutation.begin(), permutation.end(), 0);
+    std::vector<coeff_order_t> ac_group_order(num_groups);
+    std::iota(ac_group_order.begin(), ac_group_order.end(), 0);
+    size_t group_dim = frame_dim.group_dim;
+
+    // The center of the image is either given by parameters or chosen
+    // to be the middle of the image by default if center_x, center_y resp.
+    // are not provided.
+
+    int64_t imag_cx;
+    if (cparams.center_x != static_cast<size_t>(-1)) {
+      JXL_RETURN_IF_ERROR(cparams.center_x < ib.xsize());
+      imag_cx = cparams.center_x;
+    } else {
+      imag_cx = ib.xsize() / 2;
+    }
+
+    int64_t imag_cy;
+    if (cparams.center_y != static_cast<size_t>(-1)) {
+      JXL_RETURN_IF_ERROR(cparams.center_y < ib.ysize());
+      imag_cy = cparams.center_y;
+    } else {
+      imag_cy = ib.ysize() / 2;
+    }
+
+    // The center of the group containing the center of the image.
+    int64_t cx = (imag_cx / group_dim) * group_dim + group_dim / 2;
+    int64_t cy = (imag_cy / group_dim) * group_dim + group_dim / 2;
+    // This identifies in what area of the central group the center of the image
+    // lies in.
+    double direction = -std::atan2(imag_cy - cy, imag_cx - cx);
+    // This identifies the side of the central group the center of the image
+    // lies closest to. This can take values 0, 1, 2, 3 corresponding to left,
+    // bottom, right, top.
+    int64_t side = std::fmod((direction + 5 * kPi / 4), 2 * kPi) * 2 / kPi;
+    auto get_distance_from_center = [&](size_t gid) {
+      Rect r = passes_enc_state->shared.GroupRect(gid);
+      int64_t gcx = r.x0() + group_dim / 2;
+      int64_t gcy = r.y0() + group_dim / 2;
+      int64_t dx = gcx - cx;
+      int64_t dy = gcy - cy;
+      // The angle is determined by taking atan2 and adding an appropriate
+      // starting point depending on the side we want to start on.
+      double angle = std::remainder(
+          std::atan2(dy, dx) + kPi / 4 + side * (kPi / 2), 2 * kPi);
+      // Concentric squares in clockwise order.
+      return std::make_pair(std::max(std::abs(dx), std::abs(dy)), angle);
+    };
+    std::sort(ac_group_order.begin(), ac_group_order.end(),
+              [&](coeff_order_t a, coeff_order_t b) {
+                return get_distance_from_center(a) <
+                       get_distance_from_center(b);
+              });
+    std::vector<coeff_order_t> inv_ac_group_order(ac_group_order.size(), 0);
+    for (size_t i = 0; i < ac_group_order.size(); i++) {
+      inv_ac_group_order[ac_group_order[i]] = i;
+    }
+    for (size_t i = 0; i < num_passes; i++) {
+      size_t pass_start = permutation.size();
+      for (coeff_order_t v : inv_ac_group_order) {
+        permutation.push_back(pass_start + v);
+      }
+    }
+    std::vector<BitWriter> new_group_codes(group_codes.size());
+    for (size_t i = 0; i < permutation.size(); i++) {
+      new_group_codes[permutation[i]] = std::move(group_codes[i]);
+    }
+    group_codes = std::move(new_group_codes);
+  }
+
+  JXL_RETURN_IF_ERROR(
+      WriteGroupOffsets(group_codes, permutation_ptr, writer, aux_out));
+  writer->AppendByteAligned(group_codes);
+  writer->ZeroPadToByte();  // end of frame.
+
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_frame.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_frame.h
new file mode 100644
index 0000000000..60e1c0ff65
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_frame.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FRAME_H_
+#define LIB_JXL_ENC_FRAME_H_
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Information needed for encoding a frame that is not contained elsewhere and
+// does not belong to `cparams`.
+struct FrameInfo {
+  // TODO(veluca): consider adding more parameters, such as custom patches.
+  bool save_before_color_transform = false;
+  // Whether or not the input image bundle is already in the codestream
+  // colorspace (as deduced by cparams).
+  // TODO(veluca): this is a hack - ImageBundle doesn't have a simple way to say
+  // "this is already in XYB".
+  bool ib_needs_color_transform = true;
+  FrameType frame_type = FrameType::kRegularFrame;
+  size_t dc_level = 0;
+  // Only used for kRegularFrame.
+  bool is_last = true;
+  bool is_preview = false;
+  // Information for storing this frame for future use (only for non-DC frames).
+  size_t save_as_reference = 0;
+};
+
+// Encodes a single frame (including its header) into a byte stream.  Groups may
+// be processed in parallel by `pool`. metadata is the ImageMetadata encoded in
+// the codestream, and must be used for the FrameHeaders, do not use
+// ib.metadata.
+Status EncodeFrame(const CompressParams& cparams_orig,
+                   const FrameInfo& frame_info, const CodecMetadata* metadata,
+                   const ImageBundle& ib, PassesEncoderState* passes_enc_state,
+                   ThreadPool* pool, BitWriter* writer, AuxOut* aux_out,
+                   std::string xclbinPath = "");
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_FRAME_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_gamma_correct.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_gamma_correct.h
new file mode 100644
index 0000000000..0db7012bbe
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_gamma_correct.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_GAMMA_CORRECT_H_
+#define LIB_JXL_ENC_GAMMA_CORRECT_H_
+
+// Deprecated: sRGB transfer function. Use color_management.h instead.
+
+#include <cmath>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+namespace jxl {
+
+// Values are in [0, 1].
+static JXL_INLINE double Srgb8ToLinearDirect(double srgb) {
+  if (srgb <= 0.0) return 0.0;
+  if (srgb <= 0.04045) return srgb / 12.92;
+  if (srgb >= 1.0) return 1.0;
+  return std::pow((srgb + 0.055) / 1.055, 2.4);
+}
+
+// Values are in [0, 1].
+static JXL_INLINE double LinearToSrgb8Direct(double linear) {
+  if (linear <= 0.0) return 0.0;
+  if (linear >= 1.0) return 1.0;
+  if (linear <= 0.0031308) return linear * 12.92;
+  return std::pow(linear, 1.0 / 2.4) * 1.055 - 0.055;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_GAMMA_CORRECT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.cc
new file mode 100644
index 0000000000..91357dc9b7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.cc
@@ -0,0 +1,342 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_group.h"
+
+#include <utility>
+
+#include "hwy/aligned_allocator.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_group.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quantizer-inl.h"
+#include "lib/jxl/quantizer.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// NOTE: caller takes care of extracting quant from rect of RawQuantField.
+void QuantizeBlockAC(const Quantizer& quantizer, const bool error_diffusion,
+                     size_t c, int32_t quant, float qm_multiplier,
+                     size_t quant_kind, size_t xsize, size_t ysize,
+                     const float* JXL_RESTRICT block_in,
+                     int32_t* JXL_RESTRICT block_out) {
+  PROFILER_FUNC;
+  const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
+  const float qac = quantizer.Scale() * quant;
+  // Not SIMD-fied for now.
+  float thres[4] = {0.5f, 0.6f, 0.6f, 0.65f};
+  if (c != 1) {
+    for (int i = 1; i < 4; ++i) {
+      thres[i] = 0.75f;
+    }
+  }
+
+  if (!error_diffusion) {
+    HWY_CAPPED(float, kBlockDim) df;
+    HWY_CAPPED(int32_t, kBlockDim) di;
+    HWY_CAPPED(uint32_t, kBlockDim) du;
+    const auto quant = Set(df, qac * qm_multiplier);
+
+    for (size_t y = 0; y < ysize * kBlockDim; y++) {
+      size_t yfix = static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2;
+      const size_t off = y * kBlockDim * xsize;
+      for (size_t x = 0; x < xsize * kBlockDim; x += Lanes(df)) {
+        auto thr = Zero(df);
+        if (xsize == 1) {
+          HWY_ALIGN uint32_t kMask[kBlockDim] = {0,   0,   0,   0,
+                                                 ~0u, ~0u, ~0u, ~0u};
+          const auto mask = MaskFromVec(BitCast(df, Load(du, kMask + x)));
+          thr =
+              IfThenElse(mask, Set(df, thres[yfix + 1]), Set(df, thres[yfix]));
+        } else {
+          // Same for all lanes in the vector.
+          thr = Set(
+              df,
+              thres[yfix + static_cast<size_t>(x >= xsize * kBlockDim / 2)]);
+        }
+
+        const auto q = Load(df, qm + off + x) * quant;
+        const auto in = Load(df, block_in + off + x);
+        const auto val = q * in;
+        const auto nzero_mask = Abs(val) >= thr;
+        const auto v = ConvertTo(di, IfThenElseZero(nzero_mask, Round(val)));
+        Store(v, di, block_out + off + x);
+      }
+    }
+    return;
+  }
+
+retry:
+  int hfNonZeros[4] = {};
+  float hfError[4] = {};
+  float hfMaxError[4] = {};
+  size_t hfMaxErrorIx[4] = {};
+  for (size_t y = 0; y < ysize * kBlockDim; y++) {
+    for (size_t x = 0; x < xsize * kBlockDim; x++) {
+      const size_t pos = y * kBlockDim * xsize + x;
+      if (x < xsize && y < ysize) {
+        // Ensure block is initialized
+        block_out[pos] = 0;
+        continue;
+      }
+      const size_t hfix = (static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2 +
+                           static_cast<size_t>(x >= xsize * kBlockDim / 2));
+      const float val = block_in[pos] * (qm[pos] * qac * qm_multiplier);
+      float v = (std::abs(val) < thres[hfix]) ? 0 : rintf(val);
+      const float error = std::abs(val) - std::abs(v);
+      hfError[hfix] += error;
+      if (hfMaxError[hfix] < error) {
+        hfMaxError[hfix] = error;
+        hfMaxErrorIx[hfix] = pos;
+      }
+      if (v != 0.0f) {
+        hfNonZeros[hfix] += std::abs(v);
+      }
+      block_out[pos] = static_cast<int32_t>(rintf(v));
+    }
+  }
+  if (c != 1) return;
+  // TODO(veluca): include AFV?
+  const size_t kPartialBlockKinds =
+      (1 << AcStrategy::Type::IDENTITY) | (1 << AcStrategy::Type::DCT2X2) |
+      (1 << AcStrategy::Type::DCT4X4) | (1 << AcStrategy::Type::DCT4X8) |
+      (1 << AcStrategy::Type::DCT8X4);
+  if ((1 << quant_kind) & kPartialBlockKinds) return;
+  float hfErrorLimit = 0.1f * (xsize * ysize) * kDCTBlockSize * 0.25f;
+  bool goretry = false;
+  for (int i = 1; i < 4; ++i) {
+    if (hfError[i] >= hfErrorLimit &&
+        hfNonZeros[i] <= (xsize + ysize) * 0.25f) {
+      if (thres[i] >= 0.4f) {
+        thres[i] -= 0.01f;
+        goretry = true;
+      }
+    }
+  }
+  if (goretry) goto retry;
+  for (int i = 1; i < 4; ++i) {
+    if (hfError[i] >= hfErrorLimit && hfNonZeros[i] == 0) {
+      const size_t pos = hfMaxErrorIx[i];
+      if (hfMaxError[i] >= 0.4f) {
+        block_out[pos] = block_in[pos] > 0.0f ? 1.0f : -1.0f;
+      }
+    }
+  }
+}
+
+// NOTE: caller takes care of extracting quant from rect of RawQuantField.
+void QuantizeRoundtripYBlockAC(const Quantizer& quantizer,
+                               const bool error_diffusion, int32_t quant,
+                               size_t quant_kind, size_t xsize, size_t ysize,
+                               const float* JXL_RESTRICT biases,
+                               float* JXL_RESTRICT inout,
+                               int32_t* JXL_RESTRICT quantized) {
+  QuantizeBlockAC(quantizer, error_diffusion, 1, quant, 1.0f, quant_kind, xsize,
+                  ysize, inout, quantized);
+
+  PROFILER_ZONE("enc quant adjust bias");
+  const float* JXL_RESTRICT dequant_matrix =
+      quantizer.DequantMatrix(quant_kind, 1);
+
+  HWY_CAPPED(float, kDCTBlockSize) df;
+  HWY_CAPPED(int32_t, kDCTBlockSize) di;
+  const auto inv_qac = Set(df, quantizer.inv_quant_ac(quant));
+  for (size_t k = 0; k < kDCTBlockSize * xsize * ysize; k += Lanes(df)) {
+    const auto quant = Load(di, quantized + k);
+    const auto adj_quant = AdjustQuantBias(di, 1, quant, biases);
+    const auto dequantm = Load(df, dequant_matrix + k);
+    Store(adj_quant * dequantm * inv_qac, df, inout + k);
+  }
+}
+
+void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
+                         const Image3F& opsin, Image3F* dc) {
+  PROFILER_FUNC;
+  const Rect block_group_rect = enc_state->shared.BlockGroupRect(group_idx);
+  const Rect group_rect = enc_state->shared.GroupRect(group_idx);
+  const Rect cmap_rect(
+      block_group_rect.x0() / kColorTileDimInBlocks,
+      block_group_rect.y0() / kColorTileDimInBlocks,
+      DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
+      DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
+
+  const size_t xsize_blocks = block_group_rect.xsize();
+  const size_t ysize_blocks = block_group_rect.ysize();
+
+  const size_t dc_stride = static_cast<size_t>(dc->PixelsPerRow());
+  const size_t opsin_stride = static_cast<size_t>(opsin.PixelsPerRow());
+
+  const ImageI& full_quant_field = enc_state->shared.raw_quant_field;
+  const CompressParams& cparams = enc_state->cparams;
+
+  // TODO(veluca): consider strategies to reduce this memory.
+  auto mem = hwy::AllocateAligned<int32_t>(3 * AcStrategy::kMaxCoeffArea);
+  auto fmem = hwy::AllocateAligned<float>(5 * AcStrategy::kMaxCoeffArea);
+  float* JXL_RESTRICT scratch_space =
+      fmem.get() + 3 * AcStrategy::kMaxCoeffArea;
+  {
+    // Only use error diffusion in Squirrel mode or slower.
+    const bool error_diffusion = cparams.speed_tier <= SpeedTier::kSquirrel;
+    constexpr HWY_CAPPED(float, kDCTBlockSize) d;
+
+    int32_t* JXL_RESTRICT coeffs[kMaxNumPasses][3] = {};
+    size_t num_passes = enc_state->progressive_splitter.GetNumPasses();
+    JXL_DASSERT(num_passes > 0);
+    for (size_t i = 0; i < num_passes; i++) {
+      // TODO(veluca): 16-bit quantized coeffs are not implemented yet.
+      JXL_ASSERT(enc_state->coeffs[i]->Type() == ACType::k32);
+      for (size_t c = 0; c < 3; c++) {
+        coeffs[i][c] = enc_state->coeffs[i]->PlaneRow(c, group_idx, 0).ptr32;
+      }
+    }
+
+    HWY_ALIGN float* coeffs_in = fmem.get();
+    HWY_ALIGN int32_t* quantized = mem.get();
+
+    size_t offset = 0;
+
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const int32_t* JXL_RESTRICT row_quant_ac =
+          block_group_rect.ConstRow(full_quant_field, by);
+      size_t ty = by / kColorTileDimInBlocks;
+      const int8_t* JXL_RESTRICT row_cmap[3] = {
+          cmap_rect.ConstRow(enc_state->shared.cmap.ytox_map, ty),
+          nullptr,
+          cmap_rect.ConstRow(enc_state->shared.cmap.ytob_map, ty),
+      };
+      const float* JXL_RESTRICT opsin_rows[3] = {
+          group_rect.ConstPlaneRow(opsin, 0, by * kBlockDim),
+          group_rect.ConstPlaneRow(opsin, 1, by * kBlockDim),
+          group_rect.ConstPlaneRow(opsin, 2, by * kBlockDim),
+      };
+      float* JXL_RESTRICT dc_rows[3] = {
+          block_group_rect.PlaneRow(dc, 0, by),
+          block_group_rect.PlaneRow(dc, 1, by),
+          block_group_rect.PlaneRow(dc, 2, by),
+      };
+      AcStrategyRow ac_strategy_row =
+          enc_state->shared.ac_strategy.ConstRow(block_group_rect, by);
+      for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
+           tx++) {
+        const auto x_factor =
+            Set(d, enc_state->shared.cmap.YtoXRatio(row_cmap[0][tx]));
+        const auto b_factor =
+            Set(d, enc_state->shared.cmap.YtoBRatio(row_cmap[2][tx]));
+        for (size_t bx = tx * kColorTileDimInBlocks;
+             bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks; ++bx) {
+          const AcStrategy acs = ac_strategy_row[bx];
+          if (!acs.IsFirstBlock()) continue;
+
+          size_t xblocks = acs.covered_blocks_x();
+          size_t yblocks = acs.covered_blocks_y();
+
+          CoefficientLayout(&yblocks, &xblocks);
+
+          size_t size = kDCTBlockSize * xblocks * yblocks;
+
+          // DCT Y channel, roundtrip-quantize it and set DC.
+          const int32_t quant_ac = row_quant_ac[bx];
+          TransformFromPixels(acs.Strategy(), opsin_rows[1] + bx * kBlockDim,
+                              opsin_stride, coeffs_in + size, scratch_space);
+          DCFromLowestFrequencies(acs.Strategy(), coeffs_in + size,
+                                  dc_rows[1] + bx, dc_stride);
+          QuantizeRoundtripYBlockAC(
+              enc_state->shared.quantizer, error_diffusion, quant_ac,
+              acs.RawStrategy(), xblocks, yblocks, kDefaultQuantBias,
+              coeffs_in + size, quantized + size);
+
+          // DCT X and B channels
+          for (size_t c : {0, 2}) {
+            TransformFromPixels(acs.Strategy(), opsin_rows[c] + bx * kBlockDim,
+                                opsin_stride, coeffs_in + c * size,
+                                scratch_space);
+          }
+
+          // Unapply color correlation
+          for (size_t k = 0; k < size; k += Lanes(d)) {
+            const auto in_x = Load(d, coeffs_in + k);
+            const auto in_y = Load(d, coeffs_in + size + k);
+            const auto in_b = Load(d, coeffs_in + 2 * size + k);
+            const auto out_x = in_x - x_factor * in_y;
+            const auto out_b = in_b - b_factor * in_y;
+            Store(out_x, d, coeffs_in + k);
+            Store(out_b, d, coeffs_in + 2 * size + k);
+          }
+
+          // Quantize X and B channels and set DC.
+          for (size_t c : {0, 2}) {
+            QuantizeBlockAC(enc_state->shared.quantizer, error_diffusion, c,
+                            quant_ac,
+                            c == 0 ? enc_state->x_qm_multiplier
+                                   : enc_state->b_qm_multiplier,
+                            acs.RawStrategy(), xblocks, yblocks,
+                            coeffs_in + c * size, quantized + c * size);
+            DCFromLowestFrequencies(acs.Strategy(), coeffs_in + c * size,
+                                    dc_rows[c] + bx, dc_stride);
+          }
+          enc_state->progressive_splitter.SplitACCoefficients(
+              quantized, size, acs, bx, by, offset, coeffs);
+          offset += size;
+        }
+      }
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ComputeCoefficients);
+void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
+                         const Image3F& opsin, Image3F* dc) {
+  return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin,
+                                                   dc);
+}
+
+Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
+                                        size_t histogram_idx,
+                                        const PassesEncoderState& enc_state,
+                                        BitWriter* writer, AuxOut* aux_out) {
+  // Select which histogram to use among those of the current pass.
+  const size_t num_histograms = enc_state.shared.num_histograms;
+  // num_histograms is 0 only for lossless.
+  JXL_ASSERT(num_histograms == 0 || histogram_idx < num_histograms);
+  size_t histo_selector_bits = CeilLog2Nonzero(num_histograms);
+
+  if (histo_selector_bits != 0) {
+    BitWriter::Allotment allotment(writer, histo_selector_bits);
+    writer->Write(histo_selector_bits, histogram_idx);
+    ReclaimAndCharge(writer, &allotment, kLayerAC, aux_out);
+  }
+  WriteTokens(enc_state.passes[pass_idx].ac_tokens[group_idx],
+              enc_state.passes[pass_idx].codes,
+              enc_state.passes[pass_idx].context_map, writer, kLayerACTokens,
+              aux_out);
+
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.h
new file mode 100644
index 0000000000..62468ddf95
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_group.h
@@ -0,0 +1,30 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_GROUP_H_
+#define LIB_JXL_ENC_GROUP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+
+namespace jxl {
+
+// Fills DC
+void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
+                         const Image3F& opsin, Image3F* dc);
+
+Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
+                                        size_t histogram_idx,
+                                        const PassesEncoderState& enc_state,
+                                        BitWriter* writer, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_GROUP_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_heuristics.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_heuristics.cc
new file mode 100644
index 0000000000..3324e50778
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_heuristics.cc
@@ -0,0 +1,435 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_heuristics.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+
+#include "lib/jxl/enc_ac_strategy.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_chroma_from_luma.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/gaborish.h"
+
+namespace jxl {
+namespace {
+void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
+  if (enc_state.cparams.decoding_speed_tier >= 1) {
+    static constexpr uint8_t kSimpleCtxMap[] = {
+        // Cluster all blocks together
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  //
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  //
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  //
+    };
+    static_assert(
+        3 * kNumOrders == sizeof(kSimpleCtxMap) / sizeof *kSimpleCtxMap,
+        "Update simple context map");
+
+    auto bcm = enc_state.shared.block_ctx_map;
+    bcm.ctx_map.assign(std::begin(kSimpleCtxMap), std::end(kSimpleCtxMap));
+    bcm.num_ctxs = 2;
+    bcm.num_dc_ctxs = 1;
+    return;
+  }
+  if (enc_state.cparams.speed_tier >= SpeedTier::kFalcon) {
+    return;
+  }
+  const ImageI& rqf = enc_state.shared.raw_quant_field;
+  // No need to change context modeling for small images.
+  size_t tot = rqf.xsize() * rqf.ysize();
+  size_t size_for_ctx_model =
+      (1 << 10) * enc_state.cparams.butteraugli_distance;
+  if (tot < size_for_ctx_model) return;
+
+  struct OccCounters {
+    // count the occurrences of each qf value and each strategy type.
+    OccCounters(const ImageI& rqf, const AcStrategyImage& ac_strategy) {
+      for (size_t y = 0; y < rqf.ysize(); y++) {
+        const int32_t* qf_row = rqf.Row(y);
+        AcStrategyRow acs_row = ac_strategy.ConstRow(y);
+        for (size_t x = 0; x < rqf.xsize(); x++) {
+          int ord = kStrategyOrder[acs_row[x].RawStrategy()];
+          int qf = qf_row[x] - 1;
+          qf_counts[qf]++;
+          qf_ord_counts[ord][qf]++;
+          ord_counts[ord]++;
+        }
+      }
+    }
+
+    size_t qf_counts[256] = {};
+    size_t qf_ord_counts[kNumOrders][256] = {};
+    size_t ord_counts[kNumOrders] = {};
+  };
+  // The OccCounters struct is too big to allocate on the stack.
+  std::unique_ptr<OccCounters> counters(
+      new OccCounters(rqf, enc_state.shared.ac_strategy));
+
+  // Splitting the context model according to the quantization field seems to
+  // mostly benefit only large images.
+  size_t size_for_qf_split = (1 << 13) * enc_state.cparams.butteraugli_distance;
+  size_t num_qf_segments = tot < size_for_qf_split ? 1 : 2;
+  std::vector<uint32_t>& qft = enc_state.shared.block_ctx_map.qf_thresholds;
+  qft.clear();
+  // Divide the quant field in up to num_qf_segments segments.
+  size_t cumsum = 0;
+  size_t next = 1;
+  size_t last_cut = 256;
+  size_t cut = tot * next / num_qf_segments;
+  for (uint32_t j = 0; j < 256; j++) {
+    cumsum += counters->qf_counts[j];
+    if (cumsum > cut) {
+      if (j != 0) {
+        qft.push_back(j);
+      }
+      last_cut = j;
+      while (cumsum > cut) {
+        next++;
+        cut = tot * next / num_qf_segments;
+      }
+    } else if (next > qft.size() + 1) {
+      if (j - 1 == last_cut && j != 0) {
+        qft.push_back(j);
+      }
+    }
+  }
+
+  // Count the occurrences of each segment.
+  std::vector<size_t> counts(kNumOrders * (qft.size() + 1));
+  size_t qft_pos = 0;
+  for (size_t j = 0; j < 256; j++) {
+    if (qft_pos < qft.size() && j == qft[qft_pos]) {
+      qft_pos++;
+    }
+    for (size_t i = 0; i < kNumOrders; i++) {
+      counts[qft_pos + i * (qft.size() + 1)] += counters->qf_ord_counts[i][j];
+    }
+  }
+
+  // Repeatedly merge the lowest-count pair.
+  std::vector<uint8_t> remap((qft.size() + 1) * kNumOrders);
+  std::iota(remap.begin(), remap.end(), 0);
+  std::vector<uint8_t> clusters(remap);
+  size_t nb_clusters = Clamp1((int)(tot / size_for_ctx_model / 2), 4, 8);
+  // This is O(n^2 log n), but n <= 14.
+  while (clusters.size() > nb_clusters) {
+    std::sort(clusters.begin(), clusters.end(),
+              [&](int a, int b) { return counts[a] > counts[b]; });
+    counts[clusters[clusters.size() - 2]] += counts[clusters.back()];
+    counts[clusters.back()] = 0;
+    remap[clusters.back()] = clusters[clusters.size() - 2];
+    clusters.pop_back();
+  }
+  for (size_t i = 0; i < remap.size(); i++) {
+    while (remap[remap[i]] != remap[i]) {
+      remap[i] = remap[remap[i]];
+    }
+  }
+  // Relabel starting from 0.
+  std::vector<uint8_t> remap_remap(remap.size(), remap.size());
+  size_t num = 0;
+  for (size_t i = 0; i < remap.size(); i++) {
+    if (remap_remap[remap[i]] == remap.size()) {
+      remap_remap[remap[i]] = num++;
+    }
+    remap[i] = remap_remap[remap[i]];
+  }
+  // Write the block context map.
+  auto& ctx_map = enc_state.shared.block_ctx_map.ctx_map;
+  ctx_map = remap;
+  ctx_map.resize(remap.size() * 3);
+  for (size_t i = remap.size(); i < remap.size() * 3; i++) {
+    ctx_map[i] = remap[i % remap.size()] + num;
+  }
+  enc_state.shared.block_ctx_map.num_ctxs =
+      *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+}
+
+// Returns the target size based on whether bitrate or direct targetsize is
+// given.
+size_t TargetSize(const CompressParams& cparams,
+                  const FrameDimensions& frame_dim) {
+  if (cparams.target_size > 0) {
+    return cparams.target_size;
+  }
+  if (cparams.target_bitrate > 0.0) {
+    return 0.5 +
+           cparams.target_bitrate * frame_dim.xsize * frame_dim.ysize /
+               kBitsPerByte;
+  }
+  return 0;
+}
+}  // namespace
+
+void FindBestDequantMatrices(const CompressParams& cparams,
+                             const Image3F& opsin,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             DequantMatrices* dequant_matrices) {
+  // TODO(veluca): quant matrices for no-gaborish.
+  // TODO(veluca): heuristics for in-bitstream quant tables.
+  *dequant_matrices = DequantMatrices();
+  if (cparams.max_error_mode) {
+    // Set numerators of all quantization matrices to constant values.
+    float weights[3][1] = {{1.0f / cparams.max_error[0]},
+                           {1.0f / cparams.max_error[1]},
+                           {1.0f / cparams.max_error[2]}};
+    DctQuantWeightParams dct_params(weights);
+    std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                         QuantEncoding::DCT(dct_params));
+    DequantMatricesSetCustom(dequant_matrices, encodings,
+                             modular_frame_encoder);
+    float dc_weights[3] = {1.0f / cparams.max_error[0],
+                           1.0f / cparams.max_error[1],
+                           1.0f / cparams.max_error[2]};
+    DequantMatricesSetCustomDC(dequant_matrices, dc_weights);
+  }
+}
+
+bool DefaultEncoderHeuristics::HandlesColorConversion(
+    const CompressParams& cparams, const ImageBundle& ib) {
+  return cparams.noise != Override::kOn && cparams.patches != Override::kOn &&
+         cparams.speed_tier >= SpeedTier::kWombat && cparams.resampling == 1 &&
+         cparams.color_transform == ColorTransform::kXYB &&
+         !cparams.modular_mode && !ib.HasAlpha();
+}
+
+Status DefaultEncoderHeuristics::LossyFrameHeuristics(
+    PassesEncoderState* enc_state, ModularFrameEncoder* modular_frame_encoder,
+    const ImageBundle* original_pixels, Image3F* opsin, ThreadPool* pool,
+    AuxOut* aux_out) {
+  PROFILER_ZONE("JxlLossyFrameHeuristics uninstrumented");
+
+  CompressParams& cparams = enc_state->cparams;
+  PassesSharedState& shared = enc_state->shared;
+
+  // Compute parameters for noise synthesis.
+  if (shared.frame_header.flags & FrameHeader::kNoise) {
+    PROFILER_ZONE("enc GetNoiseParam");
+    if (cparams.photon_noise_iso > 0) {
+      shared.image_features.noise_params = SimulatePhotonNoise(
+          opsin->xsize(), opsin->ysize(), cparams.photon_noise_iso);
+    } else {
+      // Don't start at zero amplitude since adding noise is expensive -- it
+      // significantly slows down decoding, and this is unlikely to
+      // completely go away even with advanced optimizations. After the
+      // kNoiseModelingRampUpDistanceRange we have reached the full level,
+      // i.e. noise is no longer represented by the compressed image, so we
+      // can add full noise by the noise modeling itself.
+      static const float kNoiseModelingRampUpDistanceRange = 0.6;
+      static const float kNoiseLevelAtStartOfRampUp = 0.25;
+      static const float kNoiseRampupStart = 1.0;
+      // TODO(user) test and properly select quality_coef with smooth
+      // filter
+      float quality_coef = 1.0f;
+      const float rampup = (cparams.butteraugli_distance - kNoiseRampupStart) /
+                           kNoiseModelingRampUpDistanceRange;
+      if (rampup < 1.0f) {
+        quality_coef = kNoiseLevelAtStartOfRampUp +
+                       (1.0f - kNoiseLevelAtStartOfRampUp) * rampup;
+      }
+      if (rampup < 0.0f) {
+        quality_coef = kNoiseRampupStart;
+      }
+      if (!GetNoiseParameter(*opsin, &shared.image_features.noise_params,
+                             quality_coef)) {
+        shared.frame_header.flags &= ~FrameHeader::kNoise;
+      }
+    }
+  }
+  if (enc_state->shared.frame_header.upsampling != 1 &&
+      !cparams.already_downsampled) {
+    // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+    // after noise, if necessary.
+    DownsampleImage(opsin, cparams.resampling);
+    PadImageToBlockMultipleInPlace(opsin);
+  }
+
+  const FrameDimensions& frame_dim = enc_state->shared.frame_dim;
+  size_t target_size = TargetSize(cparams, frame_dim);
+  size_t opsin_target_size = target_size;
+  if (cparams.target_size > 0 || cparams.target_bitrate > 0.0) {
+    cparams.target_size = opsin_target_size;
+  } else if (cparams.butteraugli_distance < 0) {
+    return JXL_FAILURE("Expected non-negative distance");
+  }
+
+  // Find and subtract splines.
+  //  if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+  //    shared.image_features.splines = FindSplines(*opsin);
+  //    JXL_RETURN_IF_ERROR(
+  //        shared.image_features.splines.SubtractFrom(opsin, shared.cmap));
+  //  }
+
+  // Find and subtract patches/dots.
+  //  if (ApplyOverride(cparams.patches,
+  //                    cparams.speed_tier <= SpeedTier::kSquirrel)) {
+  //    FindBestPatchDictionary(*opsin, enc_state, pool, aux_out);
+  //    PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches,
+  //    opsin);
+  //  }
+
+  static const float kAcQuant = 0.79f;
+  const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
+  Quantizer& quantizer = enc_state->shared.quantizer;
+  // We don't know the quant field yet, but for computing the global scale
+  // assuming that it will be the same as for Falcon mode is good enough.
+  quantizer.ComputeGlobalScaleAndQuant(
+      quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
+
+  // TODO(veluca): we can now run all the code from here to FindBestQuantizer
+  // (excluded) one rect at a time. Do that.
+
+  // Dependency graph:
+  //
+  // input: either XYB or input image
+  //
+  // input image -> XYB [optional]
+  // XYB -> initial quant field
+  // XYB -> Gaborished XYB
+  // Gaborished XYB -> CfL1
+  // initial quant field, Gaborished XYB, CfL1 -> ACS
+  // initial quant field, ACS, Gaborished XYB -> EPF control field
+  // initial quant field -> adjusted initial quant field
+  // adjusted initial quant field, ACS -> raw quant field
+  // raw quant field, ACS, Gaborished XYB -> CfL2
+  //
+  // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
+
+  ArControlFieldHeuristics ar_heuristics;
+  AcStrategyHeuristics acs_heuristics;
+  CfLHeuristics cfl_heuristics;
+
+  if (!opsin->xsize()) {
+    JXL_ASSERT(HandlesColorConversion(cparams, *original_pixels));
+    *opsin = Image3F(RoundUpToBlockDim(original_pixels->xsize()),
+                     RoundUpToBlockDim(original_pixels->ysize()));
+    opsin->ShrinkTo(original_pixels->xsize(), original_pixels->ysize());
+    ToXYB(*original_pixels, pool, opsin, /*linear=*/nullptr);
+    PadImageToBlockMultipleInPlace(opsin);
+  }
+
+  // Compute an initial estimate of the quantization field.
+  // Call InitialQuantField only in Hare mode or slower. Otherwise, rely
+  // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
+  // mode.
+  if (cparams.speed_tier > SpeedTier::kHare || cparams.uniform_quant > 0) {
+    enc_state->initial_quant_field =
+        ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+    float q = cparams.uniform_quant > 0
+                  ? cparams.uniform_quant
+                  : kAcQuant / cparams.butteraugli_distance;
+    FillImage(q, &enc_state->initial_quant_field);
+  } else {
+    // Call this here, as it relies on pre-gaborish values.
+    float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
+    if (!shared.frame_header.loop_filter.gab) {
+      butteraugli_distance_for_iqf *= 0.73f;
+    }
+    enc_state->initial_quant_field = InitialQuantField(
+        butteraugli_distance_for_iqf, *opsin, shared.frame_dim, pool, 1.0f,
+        &enc_state->initial_quant_masking);
+  }
+
+  // TODO(veluca): do something about animations.
+
+  // Apply inverse-gaborish.
+  if (shared.frame_header.loop_filter.gab) {
+    GaborishInverse(opsin, 0.9908511000000001f, pool);
+  }
+
+  cfl_heuristics.Init(*opsin);
+  acs_heuristics.Init(*opsin, enc_state);
+
+  auto process_tile = [&](size_t tid, size_t thread) {
+    size_t n_enc_tiles =
+        DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
+    size_t tx = tid % n_enc_tiles;
+    size_t ty = tid / n_enc_tiles;
+    size_t by0 = ty * kEncTileDimInBlocks;
+    size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks,
+                          enc_state->shared.frame_dim.ysize_blocks);
+    size_t bx0 = tx * kEncTileDimInBlocks;
+    size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks,
+                          enc_state->shared.frame_dim.xsize_blocks);
+    Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+
+    // For speeds up to Wombat, we only compute the color correlation map
+    // once we know the transform type and the quantization map.
+    if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+      cfl_heuristics.ComputeTile(r, *opsin, enc_state->shared.matrices,
+                                 /*ac_strategy=*/nullptr,
+                                 /*quantizer=*/nullptr, /*fast=*/false, thread,
+                                 &enc_state->shared.cmap);
+    }
+
+    // Choose block sizes.
+    acs_heuristics.ProcessRect(r);
+
+    // Choose amount of post-processing smoothing.
+    // TODO(veluca): should this go *after* AdjustQuantField?
+    ar_heuristics.RunRect(r, *opsin, enc_state, thread);
+
+    // Always set the initial quant field, so we can compute the CfL map with
+    // more accuracy. The initial quant field might change in slower modes, but
+    // adjusting the quant field with butteraugli when all the other encoding
+    // parameters are fixed is likely a more reliable choice anyway.
+    AdjustQuantField(enc_state->shared.ac_strategy, r,
+                     &enc_state->initial_quant_field);
+    quantizer.SetQuantFieldRect(enc_state->initial_quant_field, r,
+                                &enc_state->shared.raw_quant_field);
+
+    // Compute a non-default CfL map if we are at Hare speed, or slower.
+    if (cparams.speed_tier <= SpeedTier::kHare) {
+      cfl_heuristics.ComputeTile(
+          r, *opsin, enc_state->shared.matrices, &enc_state->shared.ac_strategy,
+          &enc_state->shared.quantizer,
+          /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread,
+          &enc_state->shared.cmap);
+    }
+  };
+  RunOnPool(pool, 0, DivCeil(enc_state->shared.frame_dim.xsize_blocks,
+                             kEncTileDimInBlocks) *
+                         DivCeil(enc_state->shared.frame_dim.ysize_blocks,
+                                 kEncTileDimInBlocks),
+            [&](const size_t num_threads) {
+              ar_heuristics.PrepareForThreads(num_threads);
+              cfl_heuristics.PrepareForThreads(num_threads);
+              return true;
+            },
+            process_tile, "Enc Heuristics");
+
+  acs_heuristics.Finalize(aux_out);
+  if (cparams.speed_tier <= SpeedTier::kHare) {
+    cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat,
+                             &enc_state->shared.cmap);
+  }
+
+  FindBestDequantMatrices(cparams, *opsin, modular_frame_encoder,
+                          &enc_state->shared.matrices);
+
+  // Refine quantization levels.
+  FindBestQuantizer(original_pixels, *opsin, enc_state, pool, aux_out);
+
+  // Choose a context model that depends on the amount of quantization for AC.
+  if (cparams.speed_tier < SpeedTier::kFalcon) {
+    FindBestBlockEntropyModel(*enc_state);
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_heuristics.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_heuristics.h
new file mode 100644
index 0000000000..559603a619
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_heuristics.h
@@ -0,0 +1,87 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_HEURISTICS_H_
+#define LIB_JXL_ENC_HEURISTICS_H_
+
+// Hook for custom encoder heuristics (VarDCT only for now).
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+
+namespace jxl {
+
+struct PassesEncoderState;
+class ImageBundle;
+class ModularFrameEncoder;
+
+class EncoderHeuristics {
+ public:
+  virtual ~EncoderHeuristics() = default;
+  // Initializes encoder structures in `enc_state` using the original image data
+  // in `original_pixels`, and the XYB image data in `opsin`. Also modifies the
+  // `opsin` image by applying Gaborish, and doing other modifications if
+  // necessary. `pool` is used for running the computations on multiple threads.
+  // `aux_out` collects statistics and can be used to print debug images.
+  virtual Status LossyFrameHeuristics(
+      PassesEncoderState* enc_state, ModularFrameEncoder* modular_frame_encoder,
+      const ImageBundle* original_pixels, Image3F* opsin, ThreadPool* pool,
+      AuxOut* aux_out) = 0;
+
+  // Custom fixed tree for lossless mode. Must set `tree` to a valid tree if
+  // the function returns true.
+  virtual bool CustomFixedTreeLossless(const FrameDimensions& frame_dim,
+                                       Tree* tree) {
+    return false;
+  }
+
+  // If this method returns `true`, the `opsin` parameter to
+  // LossyFrameHeuristics will not be initialized, and should be initialized
+  // during the call. Moreover, `original_pixels` may not be in a linear
+  // colorspace (but will be the same as the `ib` value passed to this
+  // function).
+  virtual bool HandlesColorConversion(const CompressParams& cparams,
+                                      const ImageBundle& ib) {
+    return false;
+  }
+};
+
+class DefaultEncoderHeuristics : public EncoderHeuristics {
+ public:
+  Status LossyFrameHeuristics(PassesEncoderState* enc_state,
+                              ModularFrameEncoder* modular_frame_encoder,
+                              const ImageBundle* original_pixels,
+                              Image3F* opsin, ThreadPool* pool,
+                              AuxOut* aux_out) override;
+  bool HandlesColorConversion(const CompressParams& cparams,
+                              const ImageBundle& ib) override;
+};
+
+class FastEncoderHeuristics : public EncoderHeuristics {
+ public:
+  Status LossyFrameHeuristics(PassesEncoderState* enc_state,
+                              ModularFrameEncoder* modular_frame_encoder,
+                              const ImageBundle* linear, Image3F* opsin,
+                              ThreadPool* pool, AuxOut* aux_out) override;
+};
+
+// Exposed here since it may be used by other EncoderHeuristics implementations
+// outside this project.
+void FindBestDequantMatrices(const CompressParams& cparams,
+                             const Image3F& opsin,
+                             ModularFrameEncoder* modular_frame_encoder,
+                             DequantMatrices* dequant_matrices);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_HEURISTICS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc
new file mode 100644
index 0000000000..04b5669982
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.cc
@@ -0,0 +1,214 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_huffman.h"
+
+#include <algorithm>
+#include <memory>
+
+#include "lib/jxl/huffman_tree.h"
+
+namespace jxl {
+
+namespace {
+
+constexpr int kCodeLengthCodes = 18;
+
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes,
+                                            const uint8_t* code_length_bitdepth,
+                                            BitWriter* writer) {
+  static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+      1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  // The bit lengths of the Huffman code over the code length alphabet
+  // are compressed with the following static Huffman code:
+  //   Symbol   Code
+  //   ------   ----
+  //   0          00
+  //   1        1110
+  //   2         110
+  //   3          01
+  //   4          10
+  //   5        1111
+  static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3,
+                                                                 2, 1, 15};
+  static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3,
+                                                                    2, 2, 4};
+
+  // Throw away trailing zeros:
+  size_t codes_to_store = kCodeLengthCodes;
+  if (num_codes > 1) {
+    for (; codes_to_store > 0; --codes_to_store) {
+      if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+        break;
+      }
+    }
+  }
+  size_t skip_some = 0;  // skips none.
+  if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+      code_length_bitdepth[kStorageOrder[1]] == 0) {
+    skip_some = 2;  // skips two.
+    if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+      skip_some = 3;  // skips three.
+    }
+  }
+  writer->Write(2, skip_some);
+  for (size_t i = skip_some; i < codes_to_store; ++i) {
+    size_t l = code_length_bitdepth[kStorageOrder[i]];
+    writer->Write(kHuffmanBitLengthHuffmanCodeBitLengths[l],
+                  kHuffmanBitLengthHuffmanCodeSymbols[l]);
+  }
+}
+
+void StoreHuffmanTreeToBitMask(const size_t huffman_tree_size,
+                               const uint8_t* huffman_tree,
+                               const uint8_t* huffman_tree_extra_bits,
+                               const uint8_t* code_length_bitdepth,
+                               const uint16_t* code_length_bitdepth_symbols,
+                               BitWriter* writer) {
+  for (size_t i = 0; i < huffman_tree_size; ++i) {
+    size_t ix = huffman_tree[i];
+    writer->Write(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix]);
+    // Extra bits
+    switch (ix) {
+      case 16:
+        writer->Write(2, huffman_tree_extra_bits[i]);
+        break;
+      case 17:
+        writer->Write(3, huffman_tree_extra_bits[i]);
+        break;
+    }
+  }
+}
+
+void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4],
+                            size_t num_symbols, size_t max_bits,
+                            BitWriter* writer) {
+  // value of 1 indicates a simple Huffman code
+  writer->Write(2, 1);
+  writer->Write(2, num_symbols - 1);  // NSYM - 1
+
+  // Sort
+  for (size_t i = 0; i < num_symbols; i++) {
+    for (size_t j = i + 1; j < num_symbols; j++) {
+      if (depths[symbols[j]] < depths[symbols[i]]) {
+        std::swap(symbols[j], symbols[i]);
+      }
+    }
+  }
+
+  if (num_symbols == 2) {
+    writer->Write(max_bits, symbols[0]);
+    writer->Write(max_bits, symbols[1]);
+  } else if (num_symbols == 3) {
+    writer->Write(max_bits, symbols[0]);
+    writer->Write(max_bits, symbols[1]);
+    writer->Write(max_bits, symbols[2]);
+  } else {
+    writer->Write(max_bits, symbols[0]);
+    writer->Write(max_bits, symbols[1]);
+    writer->Write(max_bits, symbols[2]);
+    writer->Write(max_bits, symbols[3]);
+    // tree-select
+    writer->Write(1, depths[symbols[0]] == 1 ? 1 : 0);
+  }
+}
+
+// num = alphabet size
+// depths = symbol depths
+void StoreHuffmanTree(const uint8_t* depths, size_t num, BitWriter* writer) {
+  // Write the Huffman tree into the compact representation.
+  std::unique_ptr<uint8_t[]> arena(new uint8_t[2 * num]);
+  uint8_t* huffman_tree = arena.get();
+  uint8_t* huffman_tree_extra_bits = arena.get() + num;
+  size_t huffman_tree_size = 0;
+  WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
+                   huffman_tree_extra_bits);
+
+  // Calculate the statistics of the Huffman tree in the compact representation.
+  uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0};
+  for (size_t i = 0; i < huffman_tree_size; ++i) {
+    ++huffman_tree_histogram[huffman_tree[i]];
+  }
+
+  int num_codes = 0;
+  int code = 0;
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    if (huffman_tree_histogram[i]) {
+      if (num_codes == 0) {
+        code = i;
+        num_codes = 1;
+      } else if (num_codes == 1) {
+        num_codes = 2;
+        break;
+      }
+    }
+  }
+
+  // Calculate another Huffman tree to use for compressing both the
+  // earlier Huffman tree with.
+  uint8_t code_length_bitdepth[kCodeLengthCodes] = {0};
+  uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0};
+  CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5,
+                    &code_length_bitdepth[0]);
+  ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
+                            &code_length_bitdepth_symbols[0]);
+
+  // Now, we have all the data, let's start storing it
+  StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
+                                         writer);
+
+  if (num_codes == 1) {
+    code_length_bitdepth[code] = 0;
+  }
+
+  // Store the real huffman tree now.
+  StoreHuffmanTreeToBitMask(huffman_tree_size, huffman_tree,
+                            huffman_tree_extra_bits, &code_length_bitdepth[0],
+                            code_length_bitdepth_symbols, writer);
+}
+
+}  // namespace
+
+void BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length,
+                              uint8_t* depth, uint16_t* bits,
+                              BitWriter* writer) {
+  size_t count = 0;
+  size_t s4[4] = {0};
+  for (size_t i = 0; i < length; i++) {
+    if (histogram[i]) {
+      if (count < 4) {
+        s4[count] = i;
+      } else if (count > 4) {
+        break;
+      }
+      count++;
+    }
+  }
+
+  size_t max_bits_counter = length - 1;
+  size_t max_bits = 0;
+  while (max_bits_counter) {
+    max_bits_counter >>= 1;
+    ++max_bits;
+  }
+
+  if (count <= 1) {
+    // Output symbol bits and depths are initialized with 0, nothing to do.
+    writer->Write(4, 1);
+    writer->Write(max_bits, s4[0]);
+    return;
+  }
+
+  CreateHuffmanTree(histogram, length, 15, depth);
+  ConvertBitDepthsToSymbols(depth, length, bits);
+
+  if (count <= 4) {
+    StoreSimpleHuffmanTree(depth, s4, count, max_bits, writer);
+  } else {
+    StoreHuffmanTree(depth, length, writer);
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.h
new file mode 100644
index 0000000000..d7a66584e8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_huffman.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_HUFFMAN_H_
+#define LIB_JXL_ENC_HUFFMAN_H_
+
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+// Builds a Huffman tree for the given histogram, and encodes it into writer
+// in a format that can be read by HuffmanDecodingData::ReadFromBitstream.
+// An allotment for `writer` must already have been created by the caller.
+void BuildAndStoreHuffmanTree(const uint32_t* histogram, size_t length,
+                              uint8_t* depth, uint16_t* bits,
+                              BitWriter* writer);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_HUFFMAN_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc
new file mode 100644
index 0000000000..4ec17c6b22
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.cc
@@ -0,0 +1,430 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_icc_codec.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/icc_codec_common.h"
+
+namespace jxl {
+namespace {
+
+bool EncodeVarInt(uint64_t value, size_t output_size, size_t* output_pos,
+                  uint8_t* output) {
+  // While more than 7 bits of data are left,
+  // store 7 bits and set the next byte flag
+  while (value > 127) {
+    if (*output_pos > output_size) return false;
+    // |128: Set the next byte flag
+    output[(*output_pos)++] = ((uint8_t)(value & 127)) | 128;
+    // Remove the seven bits we just wrote
+    value >>= 7;
+  }
+  if (*output_pos > output_size) return false;
+  output[(*output_pos)++] = ((uint8_t)value) & 127;
+  return true;
+}
+
+void EncodeVarInt(uint64_t value, PaddedBytes* data) {
+  size_t pos = data->size();
+  data->resize(data->size() + 9);
+  JXL_CHECK(EncodeVarInt(value, data->size(), &pos, data->data()));
+  data->resize(pos);
+}
+
+// Unshuffles or de-interleaves bytes, for example with width 2, turns
+// "AaBbCcDc" into "ABCDabcd", this for example de-interleaves UTF-16 bytes into
+// first all the high order bytes, then all the low order bytes.
+// Transposes a matrix of width columns and ceil(size / width) rows. There are
+// size elements, size may be < width * height, if so the
+// last elements of the bottom row are missing, the missing spots are
+// transposed along with the filled spots, and the result has the missing
+// elements at the bottom of the rightmost column. The input is the input matrix
+// in scanline order, the output is the result matrix in scanline order, with
+// missing elements skipped over (this may occur at multiple positions).
+void Unshuffle(uint8_t* data, size_t size, size_t width) {
+  size_t height = (size + width - 1) / width;  // amount of rows of input
+  PaddedBytes result(size);
+  // i = input index, j output index
+  size_t s = 0, j = 0;
+  for (size_t i = 0; i < size; i++) {
+    result[j] = data[i];
+    j += height;
+    if (j >= size) j = ++s;
+  }
+
+  for (size_t i = 0; i < size; i++) {
+    data[i] = result[i];
+  }
+}
+
+// This is performed by the encoder, the encoder must be able to encode any
+// random byte stream (not just byte streams that are a valid ICC profile), so
+// an error returned by this function is an implementation error.
+Status PredictAndShuffle(size_t stride, size_t width, int order, size_t num,
+                         const uint8_t* data, size_t size, size_t* pos,
+                         PaddedBytes* result) {
+  JXL_RETURN_IF_ERROR(CheckOutOfBounds(*pos, num, size));
+  // Required by the specification, see decoder. stride * 4 must be < *pos.
+  if (!*pos || ((*pos - 1u) >> 2u) < stride) {
+    return JXL_FAILURE("Invalid stride");
+  }
+  if (*pos < stride * 4) return JXL_FAILURE("Too large stride");
+  size_t start = result->size();
+  for (size_t i = 0; i < num; i++) {
+    uint8_t predicted =
+        LinearPredictICCValue(data, *pos, i, stride, width, order);
+    result->push_back(data[*pos + i] - predicted);
+  }
+  *pos += num;
+  if (width > 1) Unshuffle(result->data() + start, num, width);
+  return true;
+}
+}  // namespace
+
+// Outputs a transformed form of the given icc profile. The result itself is
+// not particularly smaller than the input data in bytes, but it will be in a
+// form that is easier to compress (more zeroes, ...) and will compress better
+// with brotli.
+Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
+  PaddedBytes commands;
+  PaddedBytes data;
+
+  EncodeVarInt(size, result);
+
+  // Header
+  PaddedBytes header = ICCInitialHeaderPrediction();
+  EncodeUint32(0, size, &header);
+  for (size_t i = 0; i < kICCHeaderSize && i < size; i++) {
+    ICCPredictHeader(icc, size, header.data(), i);
+    data.push_back(icc[i] - header[i]);
+  }
+  if (size <= kICCHeaderSize) {
+    EncodeVarInt(0, result);  // 0 commands
+    for (size_t i = 0; i < data.size(); i++) {
+      result->push_back(data[i]);
+    }
+    return true;
+  }
+
+  std::vector<Tag> tags;
+  std::vector<size_t> tagstarts;
+  std::vector<size_t> tagsizes;
+  std::map<size_t, size_t> tagmap;
+
+  // Tag list
+  size_t pos = kICCHeaderSize;
+  if (pos + 4 <= size) {
+    uint64_t numtags = DecodeUint32(icc, size, pos);
+    pos += 4;
+    EncodeVarInt(numtags + 1, &commands);
+    uint64_t prevtagstart = kICCHeaderSize + numtags * 12;
+    uint32_t prevtagsize = 0;
+    for (size_t i = 0; i < numtags; i++) {
+      if (pos + 12 > size) break;
+
+      Tag tag = DecodeKeyword(icc, size, pos + 0);
+      uint32_t tagstart = DecodeUint32(icc, size, pos + 4);
+      uint32_t tagsize = DecodeUint32(icc, size, pos + 8);
+      pos += 12;
+
+      tags.push_back(tag);
+      tagstarts.push_back(tagstart);
+      tagsizes.push_back(tagsize);
+      tagmap[tagstart] = tags.size() - 1;
+
+      uint8_t tagcode = kCommandTagUnknown;
+      for (size_t j = 0; j < kNumTagStrings; j++) {
+        if (tag == *kTagStrings[j]) {
+          tagcode = j + kCommandTagStringFirst;
+          break;
+        }
+      }
+
+      if (tag == kRtrcTag && pos + 24 < size) {
+        bool ok = true;
+        ok &= DecodeKeyword(icc, size, pos + 0) == kGtrcTag;
+        ok &= DecodeKeyword(icc, size, pos + 12) == kBtrcTag;
+        if (ok) {
+          for (size_t i = 0; i < 8; i++) {
+            if (icc[pos - 8 + i] != icc[pos + 4 + i]) ok = false;
+            if (icc[pos - 8 + i] != icc[pos + 16 + i]) ok = false;
+          }
+        }
+        if (ok) {
+          tagcode = kCommandTagTRC;
+          pos += 24;
+          i += 2;
+        }
+      }
+
+      if (tag == kRxyzTag && pos + 24 < size) {
+        bool ok = true;
+        ok &= DecodeKeyword(icc, size, pos + 0) == kGxyzTag;
+        ok &= DecodeKeyword(icc, size, pos + 12) == kBxyzTag;
+        uint32_t offsetr = tagstart;
+        uint32_t offsetg = DecodeUint32(icc, size, pos + 4);
+        uint32_t offsetb = DecodeUint32(icc, size, pos + 16);
+        uint32_t sizer = tagsize;
+        uint32_t sizeg = DecodeUint32(icc, size, pos + 8);
+        uint32_t sizeb = DecodeUint32(icc, size, pos + 20);
+        ok &= sizer == 20;
+        ok &= sizeg == 20;
+        ok &= sizeb == 20;
+        ok &= (offsetg == offsetr + 20);
+        ok &= (offsetb == offsetr + 40);
+        if (ok) {
+          tagcode = kCommandTagXYZ;
+          pos += 24;
+          i += 2;
+        }
+      }
+
+      uint8_t command = tagcode;
+      uint64_t predicted_tagstart = prevtagstart + prevtagsize;
+      if (predicted_tagstart != tagstart) command |= kFlagBitOffset;
+      size_t predicted_tagsize = prevtagsize;
+      if (tag == kRxyzTag || tag == kGxyzTag || tag == kBxyzTag ||
+          tag == kKxyzTag || tag == kWtptTag || tag == kBkptTag ||
+          tag == kLumiTag) {
+        predicted_tagsize = 20;
+      }
+      if (predicted_tagsize != tagsize) command |= kFlagBitSize;
+      commands.push_back(command);
+      if (tagcode == 1) {
+        AppendKeyword(tag, &data);
+      }
+      if (command & kFlagBitOffset) EncodeVarInt(tagstart, &commands);
+      if (command & kFlagBitSize) EncodeVarInt(tagsize, &commands);
+
+      prevtagstart = tagstart;
+      prevtagsize = tagsize;
+    }
+  }
+  // Indicate end of tag list or varint indicating there's none
+  commands.push_back(0);
+
+  // Main content
+  // The main content in a valid ICC profile contains tagged elements, with the
+  // tag types (4 letter names) given by the tag list above, and the tag list
+  // pointing to the start and indicating the size of each tagged element. It is
+  // allowed for tagged elements to overlap, e.g. the curve for R, G and B could
+  // all point to the same one.
+  Tag tag;
+  size_t tagstart = 0, tagsize = 0, clutstart = 0;
+
+  size_t last0 = pos;
+  // This loop appends commands to the output, processing some sub-section of a
+  // current tagged element each time. We need to keep track of the tagtype of
+  // the current element, and update it when we encounter the boundary of a
+  // next one.
+  // It is not required that the input data is a valid ICC profile, if the
+  // encoder does not recognize the data it will still be able to output bytes
+  // but will not predict as well.
+  while (pos <= size) {
+    size_t last1 = pos;
+    PaddedBytes commands_add;
+    PaddedBytes data_add;
+
+    // This means the loop brought the position beyond the tag end.
+    if (pos > tagstart + tagsize) {
+      tag = {0, 0, 0, 0};  // nonsensical value
+    }
+
+    if (commands_add.empty() && data_add.empty() && tagmap.count(pos) &&
+        pos + 4 <= size) {
+      size_t index = tagmap[pos];
+      tag = DecodeKeyword(icc, size, pos);
+      tagstart = tagstarts[index];
+      tagsize = tagsizes[index];
+
+      if (tag == kMlucTag && pos + tagsize <= size && tagsize > 8 &&
+          icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
+          icc[pos + 7] == 0) {
+        size_t num = tagsize - 8;
+        commands_add.push_back(kCommandTypeStartFirst + 3);
+        pos += 8;
+        commands_add.push_back(kCommandShuffle2);
+        EncodeVarInt(num, &commands_add);
+        size_t start = data_add.size();
+        for (size_t i = 0; i < num; i++) {
+          data_add.push_back(icc[pos]);
+          pos++;
+        }
+        Unshuffle(data_add.data() + start, num, 2);
+      }
+
+      if (tag == kCurvTag && pos + tagsize <= size && tagsize > 8 &&
+          icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
+          icc[pos + 7] == 0) {
+        size_t num = tagsize - 8;
+        if (num > 16 && num < (1 << 28) && pos + num <= size && pos > 0) {
+          commands_add.push_back(kCommandTypeStartFirst + 5);
+          pos += 8;
+          commands_add.push_back(kCommandPredict);
+          int order = 1, width = 2, stride = width;
+          commands_add.push_back((order << 2) | (width - 1));
+          EncodeVarInt(num, &commands_add);
+          JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+                                                size, &pos, &data_add));
+        }
+      }
+    }
+
+    if (tag == kMab_Tag || tag == kMba_Tag) {
+      Tag subTag = DecodeKeyword(icc, size, pos);
+      if (pos + 12 < size && (subTag == kCurvTag || subTag == kVcgtTag) &&
+          DecodeUint32(icc, size, pos + 4) == 0) {
+        uint32_t num = DecodeUint32(icc, size, pos + 8) * 2;
+        if (num > 16 && num < (1 << 28) && pos + 12 + num <= size) {
+          pos += 12;
+          last1 = pos;
+          commands_add.push_back(kCommandPredict);
+          int order = 1, width = 2, stride = width;
+          commands_add.push_back((order << 2) | (width - 1));
+          EncodeVarInt(num, &commands_add);
+          JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+                                                size, &pos, &data_add));
+        }
+      }
+
+      if (pos == tagstart + 24 && pos + 4 < size) {
+        // Note that this value can be remembered for next iterations of the
+        // loop, so the "pos == clutstart" if below can trigger during a later
+        // iteration.
+        clutstart = tagstart + DecodeUint32(icc, size, pos);
+      }
+
+      if (pos == clutstart && clutstart + 16 < size) {
+        size_t numi = icc[tagstart + 8];
+        size_t numo = icc[tagstart + 9];
+        size_t width = icc[clutstart + 16];
+        size_t stride = width * numo;
+        size_t num = width * numo;
+        for (size_t i = 0; i < numi && clutstart + i < size; i++) {
+          num *= icc[clutstart + i];
+        }
+        if ((width == 1 || width == 2) && num > 64 && num < (1 << 28) &&
+            pos + num <= size && pos > stride * 4) {
+          commands_add.push_back(kCommandPredict);
+          int order = 1;
+          uint8_t flags =
+              (order << 2) | (width - 1) | (stride == width ? 0 : 16);
+          commands_add.push_back(flags);
+          if (flags & 16) EncodeVarInt(stride, &commands_add);
+          EncodeVarInt(num, &commands_add);
+          JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+                                                size, &pos, &data_add));
+        }
+      }
+    }
+
+    if (commands_add.empty() && data_add.empty() && tag == kGbd_Tag &&
+        pos == tagstart + 8 && pos + tagsize - 8 <= size && pos > 16 &&
+        tagsize > 8) {
+      size_t width = 4, order = 0, stride = width;
+      size_t num = tagsize - 8;
+      uint8_t flags = (order << 2) | (width - 1) | (stride == width ? 0 : 16);
+      commands_add.push_back(kCommandPredict);
+      commands_add.push_back(flags);
+      if (flags & 16) EncodeVarInt(stride, &commands_add);
+      EncodeVarInt(num, &commands_add);
+      JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+                                            size, &pos, &data_add));
+    }
+
+    if (commands_add.empty() && data_add.empty() && pos + 20 <= size) {
+      Tag subTag = DecodeKeyword(icc, size, pos);
+      if (subTag == kXyz_Tag && DecodeUint32(icc, size, pos + 4) == 0) {
+        commands_add.push_back(kCommandXYZ);
+        pos += 8;
+        for (size_t j = 0; j < 12; j++) data_add.push_back(icc[pos++]);
+      }
+    }
+
+    if (commands_add.empty() && data_add.empty() && pos + 8 <= size) {
+      if (DecodeUint32(icc, size, pos + 4) == 0) {
+        Tag subTag = DecodeKeyword(icc, size, pos);
+        for (size_t i = 0; i < kNumTypeStrings; i++) {
+          if (subTag == *kTypeStrings[i]) {
+            commands_add.push_back(kCommandTypeStartFirst + i);
+            pos += 8;
+            break;
+          }
+        }
+      }
+    }
+
+    if (!(commands_add.empty() && data_add.empty()) || pos == size) {
+      if (last0 < last1) {
+        commands.push_back(kCommandInsert);
+        EncodeVarInt(last1 - last0, &commands);
+        while (last0 < last1) {
+          data.push_back(icc[last0++]);
+        }
+      }
+      for (size_t i = 0; i < commands_add.size(); i++) {
+        commands.push_back(commands_add[i]);
+      }
+      for (size_t i = 0; i < data_add.size(); i++) {
+        data.push_back(data_add[i]);
+      }
+      last0 = pos;
+    }
+    if (commands_add.empty() && data_add.empty()) {
+      pos++;
+    }
+  }
+
+  EncodeVarInt(commands.size(), result);
+  for (size_t i = 0; i < commands.size(); i++) {
+    result->push_back(commands[i]);
+  }
+  for (size_t i = 0; i < data.size(); i++) {
+    result->push_back(data[i]);
+  }
+
+  return true;
+}
+
+Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer,
+                size_t layer, AuxOut* JXL_RESTRICT aux_out) {
+  if (icc.empty()) return JXL_FAILURE("ICC must be non-empty");
+  PaddedBytes enc;
+  JXL_RETURN_IF_ERROR(PredictICC(icc.data(), icc.size(), &enc));
+  std::vector<std::vector<Token>> tokens(1);
+  BitWriter::Allotment allotment(writer, 128);
+  JXL_RETURN_IF_ERROR(U64Coder::Write(enc.size(), writer));
+  ReclaimAndCharge(writer, &allotment, layer, aux_out);
+
+  for (size_t i = 0; i < enc.size(); i++) {
+    tokens[0].emplace_back(
+        ICCANSContext(i, i > 0 ? enc[i - 1] : 0, i > 1 ? enc[i - 2] : 0),
+        enc[i]);
+  }
+  HistogramParams params;
+  params.lz77_method = enc.size() < 4096 ? HistogramParams::LZ77Method::kOptimal
+                                         : HistogramParams::LZ77Method::kLZ77;
+  EntropyEncodingData code;
+  std::vector<uint8_t> context_map;
+  params.force_huffman = true;
+  BuildAndEncodeHistograms(params, kNumICCContexts, tokens, &code, &context_map,
+                           writer, layer, aux_out);
+  WriteTokens(tokens[0], code, context_map, writer, layer, aux_out);
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.h
new file mode 100644
index 0000000000..2480e3ae9a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_icc_codec.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ICC_CODEC_H_
+#define LIB_JXL_ENC_ICC_CODEC_H_
+
+// Compressed representation of ICC profiles.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+// Should still be called if `icc.empty()` - if so, writes only 1 bit.
+Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer,
+                size_t layer, AuxOut* JXL_RESTRICT aux_out);
+
+// Exposed only for testing
+Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_ICC_CODEC_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc
new file mode 100644
index 0000000000..5aac244f5a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.cc
@@ -0,0 +1,170 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_image_bundle.h"
+
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+
+namespace {
+
+// Copies ib:rect, converts, and copies into out.
+template <typename T>
+Status CopyToT(const ImageMetadata* metadata, const ImageBundle* ib,
+               const Rect& rect, const ColorEncoding& c_desired,
+               ThreadPool* pool, Image3<T>* out) {
+  PROFILER_FUNC;
+  static_assert(
+      std::is_same<T, float>::value || std::numeric_limits<T>::min() == 0,
+      "CopyToT implemented only for float and unsigned types");
+  ColorSpaceTransform c_transform;
+  // Changing IsGray is probably a bug.
+  JXL_CHECK(ib->IsGray() == c_desired.IsGray());
+#if JPEGXL_ENABLE_SKCMS
+  bool is_gray = false;
+#else
+  bool is_gray = ib->IsGray();
+#endif
+  if (out->xsize() < rect.xsize() || out->ysize() < rect.ysize()) {
+    *out = Image3<T>(rect.xsize(), rect.ysize());
+  } else {
+    out->ShrinkTo(rect.xsize(), rect.ysize());
+  }
+  RunOnPool(
+      pool, 0, rect.ysize(),
+      [&](size_t num_threads) {
+        return c_transform.Init(ib->c_current(), c_desired,
+                                metadata->IntensityTarget(), rect.xsize(),
+                                num_threads);
+      },
+      [&](const int y, const int thread) {
+        float* mutable_src_buf = c_transform.BufSrc(thread);
+        const float* src_buf = mutable_src_buf;
+        // Interleave input.
+        if (is_gray) {
+          src_buf = rect.ConstPlaneRow(ib->color(), 0, y);
+        } else {
+          const float* JXL_RESTRICT row_in0 =
+              rect.ConstPlaneRow(ib->color(), 0, y);
+          const float* JXL_RESTRICT row_in1 =
+              rect.ConstPlaneRow(ib->color(), 1, y);
+          const float* JXL_RESTRICT row_in2 =
+              rect.ConstPlaneRow(ib->color(), 2, y);
+          for (size_t x = 0; x < rect.xsize(); x++) {
+            mutable_src_buf[3 * x + 0] = row_in0[x];
+            mutable_src_buf[3 * x + 1] = row_in1[x];
+            mutable_src_buf[3 * x + 2] = row_in2[x];
+          }
+        }
+        float* JXL_RESTRICT dst_buf = c_transform.BufDst(thread);
+        DoColorSpaceTransform(&c_transform, thread, src_buf, dst_buf);
+        T* JXL_RESTRICT row_out0 = out->PlaneRow(0, y);
+        T* JXL_RESTRICT row_out1 = out->PlaneRow(1, y);
+        T* JXL_RESTRICT row_out2 = out->PlaneRow(2, y);
+        // De-interleave output and convert type.
+        if (std::is_same<float, T>::value) {  // deinterleave to float.
+          if (is_gray) {
+            for (size_t x = 0; x < rect.xsize(); x++) {
+              row_out0[x] = dst_buf[x];
+              row_out1[x] = dst_buf[x];
+              row_out2[x] = dst_buf[x];
+            }
+          } else {
+            for (size_t x = 0; x < rect.xsize(); x++) {
+              row_out0[x] = dst_buf[3 * x + 0];
+              row_out1[x] = dst_buf[3 * x + 1];
+              row_out2[x] = dst_buf[3 * x + 2];
+            }
+          }
+        } else {
+          // Convert to T, doing clamping.
+          float max = std::numeric_limits<T>::max();
+          auto cvt = [max](float in) {
+            float v = std::max(0.0f, std::min(max, in * max));
+            return static_cast<T>(v < 0 ? v - 0.5f : v + 0.5f);
+          };
+          if (is_gray) {
+            for (size_t x = 0; x < rect.xsize(); x++) {
+              row_out0[x] = cvt(dst_buf[x]);
+              row_out1[x] = cvt(dst_buf[x]);
+              row_out2[x] = cvt(dst_buf[x]);
+            }
+          } else {
+            for (size_t x = 0; x < rect.xsize(); x++) {
+              row_out0[x] = cvt(dst_buf[3 * x + 0]);
+              row_out1[x] = cvt(dst_buf[3 * x + 1]);
+              row_out2[x] = cvt(dst_buf[3 * x + 2]);
+            }
+          }
+        }
+      },
+      "Colorspace transform");
+  return true;
+}
+
+}  // namespace
+
+Status ImageBundle::TransformTo(const ColorEncoding& c_desired,
+                                ThreadPool* pool) {
+  PROFILER_FUNC;
+  JXL_RETURN_IF_ERROR(CopyTo(Rect(color_), c_desired, &color_, pool));
+  c_current_ = c_desired;
+  return true;
+}
+
+Status ImageBundle::CopyTo(const Rect& rect, const ColorEncoding& c_desired,
+                           Image3B* out, ThreadPool* pool) const {
+  return CopyToT(metadata_, this, rect, c_desired, pool, out);
+}
+Status ImageBundle::CopyTo(const Rect& rect, const ColorEncoding& c_desired,
+                           Image3F* out, ThreadPool* pool) const {
+  return CopyToT(metadata_, this, rect, c_desired, pool, out);
+}
+
+Status ImageBundle::CopyToSRGB(const Rect& rect, Image3B* out,
+                               ThreadPool* pool) const {
+  return CopyTo(rect, ColorEncoding::SRGB(IsGray()), out, pool);
+}
+
+Status TransformIfNeeded(const ImageBundle& in, const ColorEncoding& c_desired,
+                         ThreadPool* pool, ImageBundle* store,
+                         const ImageBundle** out) {
+  if (in.c_current().SameColorEncoding(c_desired)) {
+    *out = &in;
+    return true;
+  }
+  // TODO(janwas): avoid copying via createExternal+copyBackToIO
+  // instead of copy+createExternal+copyBackToIO
+  store->SetFromImage(CopyImage(in.color()), in.c_current());
+
+  // Must at least copy the alpha channel for use by external_image.
+  if (in.HasExtraChannels()) {
+    std::vector<ImageF> extra_channels;
+    for (const ImageF& extra_channel : in.extra_channels()) {
+      extra_channels.emplace_back(CopyImage(extra_channel));
+    }
+    store->SetExtraChannels(std::move(extra_channels));
+  }
+
+  if (!store->TransformTo(c_desired, pool)) {
+    return false;
+  }
+  *out = store;
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.h
new file mode 100644
index 0000000000..f5cd007296
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_image_bundle.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_IMAGE_BUNDLE_H_
+#define LIB_JXL_ENC_IMAGE_BUNDLE_H_
+
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Does color transformation from in.c_current() to c_desired if the color
+// encodings are different, or nothing if they are already the same.
+// If color transformation is done, stores the transformed values into store and
+// sets the out pointer to store, else leaves store untouched and sets the out
+// pointer to &in.
+// Returns false if color transform fails.
+Status TransformIfNeeded(const ImageBundle& in, const ColorEncoding& c_desired,
+                         ThreadPool* pool, ImageBundle* store,
+                         const ImageBundle** out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_IMAGE_BUNDLE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc
new file mode 100644
index 0000000000..4767017ad7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.cc
@@ -0,0 +1,1633 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_modular.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <limits>
+#include <queue>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cluster.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_encoding.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/enc_transform.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+namespace {
+// Squeeze default quantization factors
+// these quantization factors are for -Q 50  (other qualities simply scale the
+// factors; things are rounded down and obviously cannot get below 1)
+static const float squeeze_quality_factor =
+    0.35;  // for easy tweaking of the quality range (decrease this number for
+           // higher quality)
+static const float squeeze_luma_factor =
+    1.1;  // for easy tweaking of the balance between luma (or anything
+          // non-chroma) and chroma (decrease this number for higher quality
+          // luma)
+static const float squeeze_quality_factor_xyb = 2.4f;
+static const float squeeze_xyb_qtable[3][16] = {
+    {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16,
+     0.08, 0.04, 0.02, 0.01, 0.005},  // Y
+    {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5,
+     0.5},  // X
+    {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5,
+     0.5},  // B-Y
+};
+
+static const float squeeze_luma_qtable[16] = {
+    163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28,
+    0.64,   0.32,  0.16,  0.08,  0.04,  0.02, 0.01, 0.005};
+// for 8-bit input, the range of YCoCg chroma is -255..255 so basically this
+// does 4:2:0 subsampling (two most fine grained layers get quantized away)
+static const float squeeze_chroma_qtable[16] = {
+    1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5};
+
+// `cutoffs` must be sorted.
+Tree MakeFixedTree(int property, const std::vector<int32_t>& cutoffs,
+                   Predictor pred, size_t num_pixels) {
+  size_t log_px = CeilLog2Nonzero(num_pixels);
+  size_t min_gap = 0;
+  // Reduce fixed tree height when encoding small images.
+  if (log_px < 14) {
+    min_gap = 8 * (14 - log_px);
+  }
+  Tree tree;
+  struct NodeInfo {
+    size_t begin, end, pos;
+  };
+  std::queue<NodeInfo> q;
+  // Leaf IDs will be set by roundtrip decoding the tree.
+  tree.push_back(PropertyDecisionNode::Leaf(pred));
+  q.push(NodeInfo{0, cutoffs.size(), 0});
+  while (!q.empty()) {
+    NodeInfo info = q.front();
+    q.pop();
+    if (info.begin + min_gap >= info.end) continue;
+    uint32_t split = (info.begin + info.end) / 2;
+    tree[info.pos] =
+        PropertyDecisionNode::Split(property, cutoffs[split], tree.size());
+    q.push(NodeInfo{split + 1, info.end, tree.size()});
+    tree.push_back(PropertyDecisionNode::Leaf(pred));
+    q.push(NodeInfo{info.begin, split, tree.size()});
+    tree.push_back(PropertyDecisionNode::Leaf(pred));
+  }
+  return tree;
+}
+
+Tree PredefinedTree(ModularOptions::TreeKind tree_kind, size_t total_pixels) {
+  if (tree_kind == ModularOptions::TreeKind::kJpegTranscodeACMeta) {
+    // All the data is 0, so no need for a fancy tree.
+    return {PropertyDecisionNode::Leaf(Predictor::Zero)};
+  }
+  if (tree_kind == ModularOptions::TreeKind::kFalconACMeta) {
+    // All the data is 0 except the quant field. TODO(veluca): make that 0 too.
+    return {PropertyDecisionNode::Leaf(Predictor::Left)};
+  }
+  if (tree_kind == ModularOptions::TreeKind::kACMeta) {
+    // Small image.
+    if (total_pixels < 1024) {
+      return {PropertyDecisionNode::Leaf(Predictor::Left)};
+    }
+    Tree tree;
+    // 0: c > 1
+    tree.push_back(PropertyDecisionNode::Split(0, 1, 1));
+    // 1: c > 2
+    tree.push_back(PropertyDecisionNode::Split(0, 2, 3));
+    // 2: c > 0
+    tree.push_back(PropertyDecisionNode::Split(0, 0, 5));
+    // 3: EPF control field (all 0 or 4), top > 0
+    tree.push_back(PropertyDecisionNode::Split(6, 0, 21));
+    // 4: ACS+QF, y > 0
+    tree.push_back(PropertyDecisionNode::Split(2, 0, 7));
+    // 5: CfL x
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
+    // 6: CfL b
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
+    // 7: QF: split according to the left quant value.
+    tree.push_back(PropertyDecisionNode::Split(7, 5, 9));
+    // 8: ACS: split in 4 segments (8x8 from 0 to 3, large square 4-5, large
+    // rectangular 6-11, 8x8 12+), according to previous ACS value.
+    tree.push_back(PropertyDecisionNode::Split(7, 5, 15));
+    // QF
+    tree.push_back(PropertyDecisionNode::Split(7, 11, 11));
+    tree.push_back(PropertyDecisionNode::Split(7, 3, 13));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+    // ACS
+    tree.push_back(PropertyDecisionNode::Split(7, 11, 17));
+    tree.push_back(PropertyDecisionNode::Split(7, 3, 19));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    // EPF, left > 0
+    tree.push_back(PropertyDecisionNode::Split(7, 0, 23));
+    tree.push_back(PropertyDecisionNode::Split(7, 0, 25));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+    return tree;
+  }
+  if (tree_kind == ModularOptions::TreeKind::kWPFixedDC) {
+    std::vector<int32_t> cutoffs = {
+        -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
+        -11,  -7,   -4,   -3,   -1,   0,   1,   3,   5,   7,   11,
+        15,   23,   31,   47,   63,   95,  127, 191, 255, 392, 500};
+    return MakeFixedTree(kNumNonrefProperties - weighted::kNumProperties,
+                         cutoffs, Predictor::Weighted, total_pixels);
+  }
+  if (tree_kind == ModularOptions::TreeKind::kGradientFixedDC) {
+    std::vector<int32_t> cutoffs = {
+        -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
+        -11,  -7,   -4,   -3,   -1,   0,   1,   3,   5,   7,   11,
+        15,   23,   31,   47,   63,   95,  127, 191, 255, 392, 500};
+    return MakeFixedTree(kGradientProp, cutoffs, Predictor::Gradient,
+                         total_pixels);
+  }
+  JXL_ABORT("Unreachable");
+  return {};
+}
+
+// Merges the trees in `trees` using nodes that decide on stream_id, as defined
+// by `tree_splits`.
+void MergeTrees(const std::vector<Tree>& trees,
+                const std::vector<size_t>& tree_splits, size_t begin,
+                size_t end, Tree* tree) {
+  JXL_ASSERT(trees.size() + 1 == tree_splits.size());
+  JXL_ASSERT(end > begin);
+  JXL_ASSERT(end <= trees.size());
+  if (end == begin + 1) {
+    // Insert the tree, adding the opportune offset to all child nodes.
+    // This will make the leaf IDs wrong, but subsequent roundtripping will fix
+    // them.
+    size_t sz = tree->size();
+    tree->insert(tree->end(), trees[begin].begin(), trees[begin].end());
+    for (size_t i = sz; i < tree->size(); i++) {
+      (*tree)[i].lchild += sz;
+      (*tree)[i].rchild += sz;
+    }
+    return;
+  }
+  size_t mid = (begin + end) / 2;
+  size_t splitval = tree_splits[mid] - 1;
+  size_t cur = tree->size();
+  tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1);
+  (*tree)[cur].lchild = tree->size();
+  MergeTrees(trees, tree_splits, mid, end, tree);
+  (*tree)[cur].rchild = tree->size();
+  MergeTrees(trees, tree_splits, begin, mid, tree);
+}
+
+void QuantizeChannel(Channel& ch, const int q) {
+  if (q == 1) return;
+  for (size_t y = 0; y < ch.plane.ysize(); y++) {
+    pixel_type* row = ch.plane.Row(y);
+    for (size_t x = 0; x < ch.plane.xsize(); x++) {
+      if (row[x] < 0) {
+        row[x] = -((-row[x] + q / 2) / q) * q;
+      } else {
+        row[x] = ((row[x] + q / 2) / q) * q;
+      }
+    }
+  }
+}
+
+// convert binary32 float that corresponds to custom [bits]-bit float (with
+// [exp_bits] exponent bits) to a [bits]-bit integer representation that should
+// fit in pixel_type
+Status float_to_int(const float* const row_in, pixel_type* const row_out,
+                    size_t xsize, unsigned int bits, unsigned int exp_bits,
+                    bool fp, float factor) {
+  JXL_ASSERT(sizeof(pixel_type) * 8 >= bits);
+  if (!fp) {
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out[x] = row_in[x] * factor + 0.5f;
+    }
+    return true;
+  }
+  if (bits == 32 && fp) {
+    JXL_ASSERT(exp_bits == 8);
+    memcpy((void*)row_out, (const void*)row_in, 4 * xsize);
+    return true;
+  }
+
+  int exp_bias = (1 << (exp_bits - 1)) - 1;
+  int max_exp = (1 << exp_bits) - 1;
+  uint32_t sign = (1u << (bits - 1));
+  int mant_bits = bits - exp_bits - 1;
+  int mant_shift = 23 - mant_bits;
+  for (size_t x = 0; x < xsize; ++x) {
+    uint32_t f;
+    memcpy(&f, &row_in[x], 4);
+    int signbit = (f >> 31);
+    f &= 0x7fffffff;
+    if (f == 0) {
+      row_out[x] = (signbit ? sign : 0);
+      continue;
+    }
+    int exp = (f >> 23) - 127;
+    if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed");
+    int mantissa = (f & 0x007fffff);
+    // broke up the binary32 into its parts, now reassemble into
+    // arbitrary float
+    exp += exp_bias;
+    if (exp < 0) {  // will become a subnormal number
+      // add implicit leading 1 to mantissa
+      mantissa |= 0x00800000;
+      if (exp < -mant_bits) {
+        return JXL_FAILURE(
+            "Invalid float number: %g cannot be represented with %i "
+            "exp_bits and %i mant_bits (exp %i)",
+            row_in[x], exp_bits, mant_bits, exp);
+      }
+      mantissa >>= 1 - exp;
+      exp = 0;
+    }
+    // exp should be representable in exp_bits, otherwise input was
+    // invalid
+    if (exp > max_exp) return JXL_FAILURE("Invalid float exponent");
+    if (mantissa & ((1 << mant_shift) - 1)) {
+      return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x],
+                         mantissa);
+    }
+    mantissa >>= mant_shift;
+    f = (signbit ? sign : 0);
+    f |= (exp << mant_bits);
+    f |= mantissa;
+    row_out[x] = (pixel_type)f;
+  }
+  return true;
+}
+}  // namespace
+
+ModularFrameEncoder::ModularFrameEncoder(const FrameHeader& frame_header,
+                                         const CompressParams& cparams_orig)
+    : frame_dim(frame_header.ToFrameDimensions()), cparams(cparams_orig) {
+  size_t num_streams =
+      ModularStreamId::Num(frame_dim, frame_header.passes.num_passes);
+  if (cparams.modular_mode &&
+      cparams.quality_pair == std::pair<float, float>{100.0, 100.0}) {
+    switch (cparams.decoding_speed_tier) {
+      case 0:
+        break;
+      case 1:
+        cparams.options.wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
+        break;
+      case 2: {
+        cparams.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly;
+        cparams.options.predictor = Predictor::Gradient;
+        break;
+      }
+      case 3: {  // LZ77, no Gradient.
+        cparams.options.nb_repeats = 0;
+        cparams.options.predictor = Predictor::Gradient;
+        break;
+      }
+      default: {  // LZ77, no predictor.
+        cparams.options.nb_repeats = 0;
+        cparams.options.predictor = Predictor::Zero;
+        break;
+      }
+    }
+  }
+  stream_images.resize(num_streams);
+  if (cquality > 100) cquality = quality;
+
+  // use a sensible default if nothing explicit is specified:
+  // Squeeze for lossy, no squeeze for lossless
+  if (cparams.responsive < 0) {
+    if (quality == 100) {
+      cparams.responsive = 0;
+    } else {
+      cparams.responsive = 1;
+    }
+  }
+
+  if (cparams.speed_tier > SpeedTier::kWombat) {
+    cparams.options.splitting_heuristics_node_threshold = 192;
+  } else {
+    cparams.options.splitting_heuristics_node_threshold = 96;
+  }
+  {
+    // Set properties.
+    std::vector<uint32_t> prop_order;
+    if (cparams.responsive) {
+      // Properties in order of their likelihood of being useful for Squeeze
+      // residuals.
+      prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3};
+    } else {
+      // Same, but for the non-Squeeze case.
+      prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8};
+    }
+    switch (cparams.speed_tier) {
+      case SpeedTier::kSquirrel:
+        cparams.options.splitting_heuristics_properties.assign(
+            prop_order.begin(), prop_order.begin() + 8);
+        cparams.options.max_property_values = 32;
+        break;
+      case SpeedTier::kKitten:
+        cparams.options.splitting_heuristics_properties.assign(
+            prop_order.begin(), prop_order.begin() + 10);
+        cparams.options.max_property_values = 64;
+        break;
+      case SpeedTier::kTortoise:
+        cparams.options.splitting_heuristics_properties = prop_order;
+        cparams.options.max_property_values = 256;
+        break;
+      default:
+        cparams.options.splitting_heuristics_properties.assign(
+            prop_order.begin(), prop_order.begin() + 6);
+        cparams.options.max_property_values = 16;
+        break;
+    }
+    if (cparams.speed_tier > SpeedTier::kTortoise) {
+      // Gradient in previous channels.
+      for (int i = 0; i < cparams.options.max_properties; i++) {
+        cparams.options.splitting_heuristics_properties.push_back(
+            kNumNonrefProperties + i * 4 + 3);
+      }
+    } else {
+      // All the extra properties in Tortoise mode.
+      for (int i = 0; i < cparams.options.max_properties * 4; i++) {
+        cparams.options.splitting_heuristics_properties.push_back(
+            kNumNonrefProperties + i);
+      }
+    }
+  }
+
+  if (cparams.options.predictor == static_cast<Predictor>(-1)) {
+    // no explicit predictor(s) given, set a good default
+    if ((cparams.speed_tier <= SpeedTier::kTortoise ||
+         cparams.modular_mode == false) &&
+        quality == 100 && cparams.responsive == false) {
+      // TODO(veluca): allow all predictors that don't break residual
+      // multipliers in lossy mode.
+      cparams.options.predictor = Predictor::Variable;
+    } else if (cparams.responsive) {
+      // zero predictor for Squeeze residues
+      cparams.options.predictor = Predictor::Zero;
+    } else if (quality < 100) {
+      // If not responsive and lossy. TODO(veluca): use near_lossless instead?
+      cparams.options.predictor = Predictor::Gradient;
+    } else if (cparams.speed_tier < SpeedTier::kFalcon) {
+      // try median and weighted predictor for anything else
+      cparams.options.predictor = Predictor::Best;
+    } else if (cparams.speed_tier == SpeedTier::kFalcon) {
+      // just weighted predictor in falcon mode
+      cparams.options.predictor = Predictor::Weighted;
+    } else if (cparams.speed_tier > SpeedTier::kFalcon) {
+      // just gradient predictor in thunder mode
+      cparams.options.predictor = Predictor::Gradient;
+    }
+  }
+  tree_splits.push_back(0);
+  if (cparams.modular_mode == false) {
+    cparams.options.fast_decode_multiplier = 1.0f;
+    tree_splits.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim));
+    tree_splits.push_back(ModularStreamId::ModularDC(0).ID(frame_dim));
+    tree_splits.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim));
+    tree_splits.push_back(ModularStreamId::QuantTable(0).ID(frame_dim));
+    tree_splits.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim));
+    ac_metadata_size.resize(frame_dim.num_dc_groups);
+    extra_dc_precision.resize(frame_dim.num_dc_groups);
+  }
+  tree_splits.push_back(num_streams);
+  cparams.options.max_chan_size = frame_dim.group_dim;
+  cparams.options.group_dim = frame_dim.group_dim;
+
+  // TODO(veluca): figure out how to use different predictor sets per channel.
+  stream_options.resize(num_streams, cparams.options);
+}
+
+bool do_transform(Image& image, const Transform& tr,
+                  const weighted::Header& wp_header,
+                  jxl::ThreadPool* pool = nullptr) {
+  Transform t = tr;
+  bool did_it = TransformForward(t, image, wp_header, pool);
+  if (did_it) image.transform.push_back(t);
+  return did_it;
+}
+
+Status ModularFrameEncoder::ComputeEncodingData(
+    const FrameHeader& frame_header, const ImageMetadata& metadata,
+    Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels,
+    PassesEncoderState* JXL_RESTRICT enc_state, ThreadPool* pool,
+    AuxOut* aux_out, bool do_color) {
+  const FrameDimensions& frame_dim = enc_state->shared.frame_dim;
+
+  if (do_color && frame_header.loop_filter.gab) {
+    GaborishInverse(color, 0.9908511000000001f, pool);
+  }
+
+  if (do_color && metadata.bit_depth.bits_per_sample <= 16 &&
+      cparams.speed_tier < SpeedTier::kCheetah) {
+    FindBestPatchDictionary(*color, enc_state, nullptr, aux_out,
+                            cparams.color_transform == ColorTransform::kXYB);
+    PatchDictionaryEncoder::SubtractFrom(
+        enc_state->shared.image_features.patches, color);
+  }
+
+  // Convert ImageBundle to modular Image object
+  const size_t xsize = frame_dim.xsize;
+  const size_t ysize = frame_dim.ysize;
+
+  int nb_chans = 3;
+  if (metadata.color_encoding.IsGray() &&
+      cparams.color_transform == ColorTransform::kNone) {
+    nb_chans = 1;
+  }
+  if (!do_color) nb_chans = 0;
+
+  nb_chans += extra_channels.size();
+
+  bool fp = metadata.bit_depth.floating_point_sample;
+
+  // bits_per_sample is just metadata for XYB images.
+  if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
+      cparams.color_transform != ColorTransform::kXYB) {
+    if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
+      return JXL_FAILURE("uint32_t not supported in enc_modular");
+    } else if (metadata.bit_depth.bits_per_sample > 32) {
+      return JXL_FAILURE("bits_per_sample > 32 not supported");
+    }
+  }
+
+  Image& gi = stream_images[0];
+  gi = Image(xsize, ysize, metadata.bit_depth.bits_per_sample, nb_chans);
+  int c = 0;
+  if (cparams.color_transform == ColorTransform::kXYB &&
+      cparams.modular_mode == true) {
+    static const float enc_factors[3] = {32768.0f, 2048.0f, 2048.0f};
+    DequantMatricesSetCustomDC(&enc_state->shared.matrices, enc_factors);
+  }
+  pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0;
+  if (do_color) {
+    for (; c < 3; c++) {
+      if (metadata.color_encoding.IsGray() &&
+          cparams.color_transform == ColorTransform::kNone &&
+          c != (cparams.color_transform == ColorTransform::kXYB ? 1 : 0))
+        continue;
+      int c_out = c;
+      // XYB is encoded as YX(B-Y)
+      if (cparams.color_transform == ColorTransform::kXYB && c < 2)
+        c_out = 1 - c_out;
+      float factor = maxval;
+      if (cparams.color_transform == ColorTransform::kXYB)
+        factor = enc_state->shared.matrices.InvDCQuant(c);
+      if (c == 2 && cparams.color_transform == ColorTransform::kXYB) {
+        JXL_ASSERT(!fp);
+        for (size_t y = 0; y < ysize; ++y) {
+          const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
+          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
+          pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            row_out[x] = row_in[x] * factor + 0.5f;
+            row_out[x] -= row_Y[x];
+          }
+        }
+      } else {
+        int bits = metadata.bit_depth.bits_per_sample;
+        int exp_bits = metadata.bit_depth.exponent_bits_per_sample;
+        gi.channel[c_out].hshift =
+            enc_state->shared.frame_header.chroma_subsampling.HShift(c);
+        gi.channel[c_out].vshift =
+            enc_state->shared.frame_header.chroma_subsampling.VShift(c);
+        size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift);
+        size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift);
+        gi.channel[c_out].shrink(xsize_shifted, ysize_shifted);
+        for (size_t y = 0; y < ysize_shifted; ++y) {
+          const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
+          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
+          JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out, xsize_shifted, bits,
+                                           exp_bits, fp, factor));
+        }
+      }
+    }
+    if (metadata.color_encoding.IsGray() &&
+        cparams.color_transform == ColorTransform::kNone)
+      c = 1;
+  }
+
+  for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) {
+    const ExtraChannelInfo& eci = metadata.extra_channel_info[ec];
+    size_t ecups = frame_header.extra_channel_upsampling[ec];
+    gi.channel[c].shrink(DivCeil(frame_dim.xsize_upsampled, ecups),
+                         DivCeil(frame_dim.ysize_upsampled, ecups));
+    gi.channel[c].hshift = gi.channel[c].vshift =
+        CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
+
+    int bits = eci.bit_depth.bits_per_sample;
+    int exp_bits = eci.bit_depth.exponent_bits_per_sample;
+    bool fp = eci.bit_depth.floating_point_sample;
+    float factor = (fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1));
+    for (size_t y = 0; y < gi.channel[c].plane.ysize(); ++y) {
+      const float* const JXL_RESTRICT row_in = extra_channels[ec].Row(y);
+      pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y);
+      JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out,
+                                       gi.channel[c].plane.xsize(), bits,
+                                       exp_bits, fp, factor));
+    }
+  }
+  JXL_ASSERT(c == nb_chans);
+
+  // Set options and apply transformations
+
+  if (quality < 100) {
+    if (cparams.palette_colors != 0) {
+      JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms");
+    }
+    if (cparams.color_transform == ColorTransform::kXYB) {
+      cparams.channel_colors_pre_transform_percent = 0;
+    }
+    cparams.channel_colors_percent = 0;
+    cparams.palette_colors = 0;
+    cparams.lossy_palette = false;
+  }
+
+  // if few colors, do all-channel palette before trying channel palette
+  // Logic is as follows:
+  // - if you can make a palette with few colors (arbitrary threshold: 200),
+  //   then you can also make channel palettes, but they will just be extra
+  //   signaling cost for almost no benefit
+  // - if the palette needs more colors, then channel palette might help to
+  //   reduce palette signaling cost
+  if (cparams.palette_colors != 0 && cparams.speed_tier < SpeedTier::kFalcon) {
+    // all-channel palette (e.g. RGBA)
+    if (gi.channel.size() > 1) {
+      Transform maybe_palette(TransformId::kPalette);
+      maybe_palette.begin_c = gi.nb_meta_channels;
+      maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
+      maybe_palette.nb_colors =
+          std::min(std::min(200, (int)(xsize * ysize / 8)),
+                   std::abs(cparams.palette_colors) / 16);
+      maybe_palette.ordered_palette = cparams.palette_colors >= 0;
+      maybe_palette.lossy_palette = false;
+      do_transform(gi, maybe_palette, weighted::Header(), pool);
+    }
+  }
+
+  // Global channel palette
+  if (cparams.channel_colors_pre_transform_percent > 0 &&
+      !cparams.lossy_palette &&
+      (cparams.speed_tier <= SpeedTier::kThunder ||
+       (do_color && metadata.bit_depth.bits_per_sample > 8))) {
+    // single channel palette (like FLIF's ChannelCompact)
+    size_t nb_channels = gi.channel.size() - gi.nb_meta_channels;
+    for (size_t i = 0; i < nb_channels; i++) {
+      int min, max;
+      compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
+      int64_t colors = max - min + 1;
+      JXL_DEBUG_V(10, "Channel %zu: range=%i..%i", i, min, max);
+      Transform maybe_palette_1(TransformId::kPalette);
+      maybe_palette_1.begin_c = i + gi.nb_meta_channels;
+      maybe_palette_1.num_c = 1;
+      // simple heuristic: if less than X percent of the values in the range
+      // actually occur, it is probably worth it to do a compaction
+      // (but only if the channel palette is less than 6% the size of the
+      // image itself)
+      maybe_palette_1.nb_colors = std::min(
+          (int)(xsize * ysize / 16),
+          (int)(cparams.channel_colors_pre_transform_percent / 100. * colors));
+      if (do_transform(gi, maybe_palette_1, weighted::Header(), pool)) {
+        // effective bit depth is lower, adjust quantization accordingly
+        compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
+        if (max < maxval) maxval = max;
+      }
+    }
+  }
+
+  // Global palette
+  if ((cparams.palette_colors != 0 || cparams.lossy_palette) &&
+      cparams.speed_tier < SpeedTier::kFalcon) {
+    // all-channel palette (e.g. RGBA)
+    if (gi.channel.size() - gi.nb_meta_channels > 1) {
+      Transform maybe_palette(TransformId::kPalette);
+      maybe_palette.begin_c = gi.nb_meta_channels;
+      maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
+      maybe_palette.nb_colors =
+          std::min((int)(xsize * ysize / 8), std::abs(cparams.palette_colors));
+      maybe_palette.ordered_palette = cparams.palette_colors >= 0;
+      maybe_palette.lossy_palette =
+          (cparams.lossy_palette && maybe_palette.num_c == 3);
+      if (maybe_palette.lossy_palette) {
+        maybe_palette.predictor = Predictor::Average4;
+      }
+      // TODO(veluca): use a custom weighted header if using the weighted
+      // predictor.
+      do_transform(gi, maybe_palette, weighted::Header(), pool);
+    }
+    // all-minus-one-channel palette (RGB with separate alpha, or CMY with
+    // separate K)
+    if (gi.channel.size() - gi.nb_meta_channels > 3) {
+      Transform maybe_palette_3(TransformId::kPalette);
+      maybe_palette_3.begin_c = gi.nb_meta_channels;
+      maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1;
+      maybe_palette_3.nb_colors =
+          std::min((int)(xsize * ysize / 8), std::abs(cparams.palette_colors));
+      maybe_palette_3.ordered_palette = cparams.palette_colors >= 0;
+      maybe_palette_3.lossy_palette = cparams.lossy_palette;
+      if (maybe_palette_3.lossy_palette) {
+        maybe_palette_3.predictor = Predictor::Average4;
+      }
+      do_transform(gi, maybe_palette_3, weighted::Header(), pool);
+    }
+  }
+
+  if (cparams.color_transform == ColorTransform::kNone && do_color && !fp &&
+      gi.channel.size() - gi.nb_meta_channels >= 3) {
+    if (cparams.colorspace == 1 ||
+        (cparams.colorspace < 0 &&
+         (quality < 100 || cparams.speed_tier > SpeedTier::kHare))) {
+      Transform ycocg{TransformId::kRCT};
+      ycocg.rct_type = 6;
+      ycocg.begin_c = gi.nb_meta_channels;
+      do_transform(gi, ycocg, weighted::Header(), pool);
+    } else if (cparams.colorspace >= 2) {
+      Transform sg(TransformId::kRCT);
+      sg.begin_c = gi.nb_meta_channels;
+      sg.rct_type = cparams.colorspace - 2;
+      do_transform(gi, sg, weighted::Header(), pool);
+    }
+  }
+
+  if (cparams.responsive && !gi.channel.empty()) {
+    do_transform(gi, Transform(TransformId::kSqueeze), weighted::Header(),
+                 pool);  // use default squeezing
+  }
+
+  std::vector<uint32_t> quants;
+
+  if (quality < 100 || cquality < 100) {
+    quants.resize(gi.channel.size(), 1);
+    JXL_DEBUG_V(
+        2,
+        "Adding quantization constants corresponding to luma quality %.2f "
+        "and chroma quality %.2f",
+        quality, cquality);
+    if (!cparams.responsive) {
+      JXL_DEBUG_V(1,
+                  "Warning: lossy compression without Squeeze "
+                  "transform is just color quantization.");
+      quality = (400 + quality) / 5;
+      cquality = (400 + cquality) / 5;
+    }
+
+    // convert 'quality' to quantization scaling factor
+    if (quality > 50) {
+      quality = 200.0 - quality * 2.0;
+    } else {
+      quality = 900.0 - quality * 16.0;
+    }
+    if (cquality > 50) {
+      cquality = 200.0 - cquality * 2.0;
+    } else {
+      cquality = 900.0 - cquality * 16.0;
+    }
+    if (cparams.color_transform != ColorTransform::kXYB) {
+      quality *= 0.01f * maxval / 255.f;
+      cquality *= 0.01f * maxval / 255.f;
+    } else {
+      quality *= 0.01f;
+      cquality *= 0.01f;
+    }
+
+    if (cparams.options.nb_repeats == 0) {
+      return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!");
+    }
+    for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) {
+      Channel& ch = gi.channel[i];
+      int shift = ch.hshift + ch.vshift;  // number of pixel halvings
+      if (shift > 16) shift = 16;
+      if (shift > 0) shift--;
+      int q;
+      // assuming default Squeeze here
+      int component = ((i - gi.nb_meta_channels) % nb_chans);
+      // last 4 channels are final chroma residuals
+      if (nb_chans > 2 && i >= gi.channel.size() - 4) {
+        component = 1;
+      }
+
+      if (cparams.color_transform == ColorTransform::kXYB && component < 3) {
+        q = (component == 0 ? quality : cquality) * squeeze_quality_factor_xyb *
+            squeeze_xyb_qtable[component][shift];
+      } else {
+        if (cparams.colorspace != 0 && component > 0 && component < 3) {
+          q = cquality * squeeze_quality_factor * squeeze_chroma_qtable[shift];
+        } else {
+          q = quality * squeeze_quality_factor * squeeze_luma_factor *
+              squeeze_luma_qtable[shift];
+        }
+      }
+      if (q < 1) q = 1;
+      QuantizeChannel(gi.channel[i], q);
+      quants[i] = q;
+    }
+  }
+
+  // Fill other groups.
+  struct GroupParams {
+    Rect rect;
+    int minShift;
+    int maxShift;
+    ModularStreamId id;
+  };
+  std::vector<GroupParams> stream_params;
+
+  stream_options[0] = cparams.options;
+
+  // DC
+  for (size_t group_id = 0; group_id < frame_dim.num_dc_groups; group_id++) {
+    const size_t gx = group_id % frame_dim.xsize_dc_groups;
+    const size_t gy = group_id / frame_dim.xsize_dc_groups;
+    const Rect rect(gx * frame_dim.dc_group_dim, gy * frame_dim.dc_group_dim,
+                    frame_dim.dc_group_dim, frame_dim.dc_group_dim);
+    // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim
+    // maxShift==1000 is infinity
+    stream_params.push_back(
+        GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(group_id)});
+  }
+  // AC global -> nothing.
+  // AC
+  for (size_t group_id = 0; group_id < frame_dim.num_groups; group_id++) {
+    const size_t gx = group_id % frame_dim.xsize_groups;
+    const size_t gy = group_id / frame_dim.xsize_groups;
+    const Rect mrect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+                     frame_dim.group_dim, frame_dim.group_dim);
+    for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses();
+         i++) {
+      int maxShift, minShift;
+      frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift);
+      stream_params.push_back(GroupParams{
+          mrect, minShift, maxShift, ModularStreamId::ModularAC(group_id, i)});
+    }
+  }
+  gi_channel.resize(stream_images.size());
+
+  RunOnPool(
+      pool, 0, stream_params.size(), ThreadPool::SkipInit(),
+      [&](size_t i, size_t _) {
+        stream_options[stream_params[i].id.ID(frame_dim)] = cparams.options;
+        JXL_CHECK(PrepareStreamParams(
+            stream_params[i].rect, cparams, stream_params[i].minShift,
+            stream_params[i].maxShift, stream_params[i].id, do_color));
+      },
+      "ChooseParams");
+  {
+    // Clear out channels that have been copied to groups.
+    Image& full_image = stream_images[0];
+    size_t c = full_image.nb_meta_channels;
+    for (; c < full_image.channel.size(); c++) {
+      Channel& fc = full_image.channel[c];
+      if (fc.w > frame_dim.group_dim || fc.h > frame_dim.group_dim) break;
+    }
+    for (; c < full_image.channel.size(); c++) {
+      full_image.channel[c].plane = ImageI();
+    }
+  }
+
+  if (!quants.empty()) {
+    for (uint32_t stream_id = 0; stream_id < stream_images.size();
+         stream_id++) {
+      // skip non-modular stream_ids
+      if (stream_id > 0 && gi_channel[stream_id].empty()) continue;
+      Image& image = stream_images[stream_id];
+      const ModularOptions& options = stream_options[stream_id];
+      for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) {
+        if (i >= image.nb_meta_channels &&
+            (image.channel[i].w > options.max_chan_size ||
+             image.channel[i].h > options.max_chan_size)) {
+          continue;
+        }
+        if (stream_id > 0 && gi_channel[stream_id].empty()) continue;
+        size_t ch_id = stream_id == 0
+                           ? i
+                           : gi_channel[stream_id][i - image.nb_meta_channels];
+        uint32_t q = quants[ch_id];
+        // Inform the tree splitting heuristics that each channel in each group
+        // used this quantization factor. This will produce a tree with the
+        // given multipliers.
+        if (multiplier_info.empty() ||
+            multiplier_info.back().range[1][0] != stream_id ||
+            multiplier_info.back().multiplier != q) {
+          StaticPropRange range;
+          range[0] = {i, i + 1};
+          range[1] = {stream_id, stream_id + 1};
+          multiplier_info.push_back({range, (uint32_t)q});
+        } else {
+          // Previous channel in the same group had the same quantization
+          // factor. Don't provide two different ranges, as that creates
+          // unnecessary nodes.
+          multiplier_info.back().range[0][1] = i + 1;
+        }
+      }
+    }
+    // Merge group+channel settings that have the same channels and quantization
+    // factors, to avoid unnecessary nodes.
+    std::sort(multiplier_info.begin(), multiplier_info.end(),
+              [](ModularMultiplierInfo a, ModularMultiplierInfo b) {
+                return std::make_tuple(a.range, a.multiplier) <
+                       std::make_tuple(b.range, b.multiplier);
+              });
+    size_t new_num = 1;
+    for (size_t i = 1; i < multiplier_info.size(); i++) {
+      ModularMultiplierInfo& prev = multiplier_info[new_num - 1];
+      ModularMultiplierInfo& cur = multiplier_info[i];
+      if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier &&
+          prev.range[1][1] == cur.range[1][0]) {
+        prev.range[1][1] = cur.range[1][1];
+      } else {
+        multiplier_info[new_num++] = multiplier_info[i];
+      }
+    }
+    multiplier_info.resize(new_num);
+  }
+
+  JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options[0]));
+
+  return PrepareEncoding(pool, enc_state->shared.frame_dim,
+                         enc_state->heuristics.get(), aux_out);
+}
+
+Status ModularFrameEncoder::PrepareEncoding(ThreadPool* pool,
+                                            const FrameDimensions& frame_dim,
+                                            EncoderHeuristics* heuristics,
+                                            AuxOut* aux_out) {
+  if (!tree.empty()) return true;
+
+  // Compute tree.
+  size_t num_streams = stream_images.size();
+  stream_headers.resize(num_streams);
+  tokens.resize(num_streams);
+
+  if (heuristics->CustomFixedTreeLossless(frame_dim, &tree)) {
+    // Using a fixed tree.
+  } else if (cparams.speed_tier < SpeedTier::kFalcon || quality != 100 ||
+             !cparams.modular_mode) {
+    // Avoid creating a tree with leaves that don't correspond to any pixels.
+    std::vector<size_t> useful_splits;
+    useful_splits.reserve(tree_splits.size());
+    for (size_t chunk = 0; chunk < tree_splits.size() - 1; chunk++) {
+      bool has_pixels = false;
+      size_t start = tree_splits[chunk];
+      size_t stop = tree_splits[chunk + 1];
+      for (size_t i = start; i < stop; i++) {
+        for (const Channel& c : stream_images[i].channel) {
+          if (c.w && c.h) has_pixels = true;
+        }
+      }
+      if (has_pixels) {
+        useful_splits.push_back(tree_splits[chunk]);
+      }
+    }
+    // Don't do anything if modular mode does not have any pixels in this image
+    if (useful_splits.empty()) return true;
+    useful_splits.push_back(tree_splits.back());
+
+    std::atomic_flag invalid_force_wp = ATOMIC_FLAG_INIT;
+
+    std::vector<Tree> trees(useful_splits.size() - 1);
+    RunOnPool(
+        pool, 0, useful_splits.size() - 1, ThreadPool::SkipInit(),
+        [&](size_t chunk, size_t _) {
+          // TODO(veluca): parallelize more.
+          size_t total_pixels = 0;
+          uint32_t start = useful_splits[chunk];
+          uint32_t stop = useful_splits[chunk + 1];
+          uint32_t max_c = 0;
+          if (stream_options[start].tree_kind !=
+              ModularOptions::TreeKind::kLearn) {
+            for (size_t i = start; i < stop; i++) {
+              for (const Channel& ch : stream_images[i].channel) {
+                total_pixels += ch.w * ch.h;
+              }
+            }
+            trees[chunk] =
+                PredefinedTree(stream_options[start].tree_kind, total_pixels);
+            return;
+          }
+          TreeSamples tree_samples;
+          if (!tree_samples.SetPredictor(stream_options[start].predictor,
+                                         stream_options[start].wp_tree_mode)) {
+            invalid_force_wp.test_and_set(std::memory_order_acq_rel);
+            return;
+          }
+          if (!tree_samples.SetProperties(
+                  stream_options[start].splitting_heuristics_properties,
+                  stream_options[start].wp_tree_mode)) {
+            invalid_force_wp.test_and_set(std::memory_order_acq_rel);
+            return;
+          }
+          std::vector<pixel_type> pixel_samples;
+          std::vector<pixel_type> diff_samples;
+          std::vector<uint32_t> group_pixel_count;
+          std::vector<uint32_t> channel_pixel_count;
+          for (size_t i = start; i < stop; i++) {
+            max_c = std::max<uint32_t>(stream_images[i].channel.size(), max_c);
+            CollectPixelSamples(stream_images[i], stream_options[i], i,
+                                group_pixel_count, channel_pixel_count,
+                                pixel_samples, diff_samples);
+          }
+          StaticPropRange range;
+          range[0] = {0, max_c};
+          range[1] = {start, stop};
+          auto local_multiplier_info = multiplier_info;
+
+          tree_samples.PreQuantizeProperties(
+              range, local_multiplier_info, group_pixel_count,
+              channel_pixel_count, pixel_samples, diff_samples,
+              stream_options[start].max_property_values);
+          for (size_t i = start; i < stop; i++) {
+            JXL_CHECK(ModularGenericCompress(
+                stream_images[i], stream_options[i], /*writer=*/nullptr,
+                /*aux_out=*/nullptr, 0, i, &tree_samples, &total_pixels));
+          }
+
+          // TODO(veluca): parallelize more.
+          trees[chunk] =
+              LearnTree(std::move(tree_samples), total_pixels,
+                        stream_options[start], local_multiplier_info, range);
+        },
+        "LearnTrees");
+    if (invalid_force_wp.test_and_set(std::memory_order_acq_rel)) {
+      return JXL_FAILURE("PrepareEncoding: force_no_wp with {Weighted}");
+    }
+    tree.clear();
+    MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree);
+  } else {
+    // Fixed tree.
+    size_t total_pixels = 0;
+    for (const Image& img : stream_images) {
+      for (const Channel& ch : img.channel) {
+        total_pixels += ch.w * ch.h;
+      }
+    }
+    if (cparams.speed_tier <= SpeedTier::kFalcon) {
+      tree = PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels);
+    } else if (cparams.speed_tier <= SpeedTier::kThunder) {
+      tree = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC,
+                            total_pixels);
+    } else {
+      tree = {PropertyDecisionNode::Leaf(Predictor::Gradient)};
+    }
+  }
+  tree_tokens.resize(1);
+  tree_tokens[0].clear();
+  Tree decoded_tree;
+  TokenizeTree(tree, &tree_tokens[0], &decoded_tree);
+  JXL_ASSERT(tree.size() == decoded_tree.size());
+  tree = std::move(decoded_tree);
+
+  if (WantDebugOutput(aux_out)) {
+    PrintTree(tree, aux_out->debug_prefix + "/global_tree");
+  }
+
+  image_widths.resize(num_streams);
+  RunOnPool(
+      pool, 0, num_streams, ThreadPool::SkipInit(),
+      [&](size_t stream_id, size_t _) {
+        AuxOut my_aux_out;
+        if (aux_out) {
+          my_aux_out.dump_image = aux_out->dump_image;
+          my_aux_out.debug_prefix = aux_out->debug_prefix;
+        }
+        tokens[stream_id].clear();
+        JXL_CHECK(ModularGenericCompress(
+            stream_images[stream_id], stream_options[stream_id],
+            /*writer=*/nullptr, &my_aux_out, 0, stream_id,
+            /*tree_samples=*/nullptr,
+            /*total_pixels=*/nullptr,
+            /*tree=*/&tree, /*header=*/&stream_headers[stream_id],
+            /*tokens=*/&tokens[stream_id],
+            /*widths=*/&image_widths[stream_id]));
+      },
+      "ComputeTokens");
+  return true;
+}
+
+Status ModularFrameEncoder::EncodeGlobalInfo(BitWriter* writer,
+                                             AuxOut* aux_out) {
+  BitWriter::Allotment allotment(writer, 1);
+  // If we are using brotli, or not using modular mode.
+  if (tree_tokens.empty() || tree_tokens[0].empty()) {
+    writer->Write(1, 0);
+    ReclaimAndCharge(writer, &allotment, kLayerModularTree, aux_out);
+    return true;
+  }
+  writer->Write(1, 1);
+  ReclaimAndCharge(writer, &allotment, kLayerModularTree, aux_out);
+
+  // Write tree
+  HistogramParams params;
+  if (cparams.speed_tier > SpeedTier::kKitten) {
+    params.clustering = HistogramParams::ClusteringType::kFast;
+    params.ans_histogram_strategy =
+        cparams.speed_tier > SpeedTier::kThunder
+            ? HistogramParams::ANSHistogramStrategy::kFast
+            : HistogramParams::ANSHistogramStrategy::kApproximate;
+    params.lz77_method =
+        cparams.decoding_speed_tier >= 3 && cparams.modular_mode
+            ? (cparams.speed_tier >= SpeedTier::kFalcon
+                   ? HistogramParams::LZ77Method::kRLE
+                   : HistogramParams::LZ77Method::kLZ77)
+            : HistogramParams::LZ77Method::kNone;
+    // Near-lossless DC, as well as modular mode, require choosing hybrid uint
+    // more carefully.
+    if ((!extra_dc_precision.empty() && extra_dc_precision[0] != 0) ||
+        (cparams.modular_mode && cparams.speed_tier < SpeedTier::kCheetah)) {
+      params.uint_method = HistogramParams::HybridUintMethod::kFast;
+    } else {
+      params.uint_method = HistogramParams::HybridUintMethod::kNone;
+    }
+  } else if (cparams.speed_tier <= SpeedTier::kTortoise) {
+    params.lz77_method = HistogramParams::LZ77Method::kOptimal;
+  } else {
+    params.lz77_method = HistogramParams::LZ77Method::kLZ77;
+  }
+  if (cparams.decoding_speed_tier >= 1) {
+    params.max_histograms = 12;
+  }
+  BuildAndEncodeHistograms(params, kNumTreeContexts, tree_tokens, &code,
+                           &context_map, writer, kLayerModularTree, aux_out);
+  WriteTokens(tree_tokens[0], code, context_map, writer, kLayerModularTree,
+              aux_out);
+  params.image_widths = image_widths;
+  // Write histograms.
+  BuildAndEncodeHistograms(params, (tree.size() + 1) / 2, tokens, &code,
+                           &context_map, writer, kLayerModularGlobal, aux_out);
+  return true;
+}
+
+Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out,
+                                         size_t layer,
+                                         const ModularStreamId& stream) {
+  size_t stream_id = stream.ID(frame_dim);
+  if (stream_images[stream_id].channel.empty()) {
+    return true;  // Image with no channels, header never gets decoded.
+  }
+  JXL_RETURN_IF_ERROR(
+      Bundle::Write(stream_headers[stream_id], writer, layer, aux_out));
+  WriteTokens(tokens[stream_id], code, context_map, writer, layer, aux_out);
+  return true;
+}
+
+namespace {
+float EstimateWPCost(const Image& img, size_t i) {
+  size_t extra_bits = 0;
+  float histo_cost = 0;
+  HybridUintConfig config;
+  int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31,
+                       -23,  -15,  -11,  -7,   -4,   -3,  -1,  0,   1,
+                       3,    5,    7,    11,   15,   23,  31,  47,  63,
+                       95,   127,  191,  255,  392,  500};
+  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
+  Histogram histo[nc] = {};
+  weighted::Header wp_header;
+  PredictorMode(i, &wp_header);
+  for (const Channel& ch : img.channel) {
+    const intptr_t onerow = ch.plane.PixelsPerRow();
+    weighted::State wp_state(wp_header, ch.w, ch.h);
+    Properties properties(1);
+    for (size_t y = 0; y < ch.h; y++) {
+      const pixel_type* JXL_RESTRICT r = ch.Row(y);
+      for (size_t x = 0; x < ch.w; x++) {
+        size_t offset = 0;
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        pixel_type_w topright =
+            (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top);
+        pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
+        pixel_type guess = wp_state.Predict</*compute_properties=*/true>(
+            x, y, ch.w, top, left, topright, topleft, toptop, &properties,
+            offset);
+        size_t ctx = 0;
+        for (int c : cutoffs) {
+          ctx += c >= properties[0];
+        }
+        pixel_type res = r[x] - guess;
+        uint32_t token, nbits, bits;
+        config.Encode(PackSigned(res), &token, &nbits, &bits);
+        histo[ctx].Add(token);
+        extra_bits += nbits;
+        wp_state.UpdateErrors(r[x], x, y, ch.w);
+      }
+    }
+    for (size_t h = 0; h < nc; h++) {
+      histo_cost += histo[h].ShannonEntropy();
+      histo[h].Clear();
+    }
+  }
+  return histo_cost + extra_bits;
+}
+
+float EstimateCost(const Image& img) {
+  // TODO(veluca): consider SIMDfication of this code.
+  size_t extra_bits = 0;
+  float histo_cost = 0;
+  HybridUintConfig config;
+  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
+                        47, 63, 95, 127, 191, 255, 392, 500};
+  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
+  Histogram histo[nc] = {};
+  for (const Channel& ch : img.channel) {
+    const intptr_t onerow = ch.plane.PixelsPerRow();
+    for (size_t y = 0; y < ch.h; y++) {
+      const pixel_type* JXL_RESTRICT r = ch.Row(y);
+      for (size_t x = 0; x < ch.w; x++) {
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        size_t maxdiff = std::max(std::max(left, top), topleft) -
+                         std::min(std::min(left, top), topleft);
+        size_t ctx = 0;
+        for (uint32_t c : cutoffs) {
+          ctx += c > maxdiff;
+        }
+        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
+        uint32_t token, nbits, bits;
+        config.Encode(PackSigned(res), &token, &nbits, &bits);
+        histo[ctx].Add(token);
+        extra_bits += nbits;
+      }
+    }
+    for (size_t h = 0; h < nc; h++) {
+      histo_cost += histo[h].ShannonEntropy();
+      histo[h].Clear();
+    }
+  }
+  return histo_cost + extra_bits;
+}
+
+}  // namespace
+
+Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect,
+                                                const CompressParams& cparams,
+                                                int minShift, int maxShift,
+                                                const ModularStreamId& stream,
+                                                bool do_color) {
+  size_t stream_id = stream.ID(frame_dim);
+  JXL_ASSERT(stream_id != 0);
+  Image& full_image = stream_images[0];
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  Image& gi = stream_images[stream_id];
+  gi = Image(xsize, ysize, full_image.bitdepth, 0);
+  // start at the first bigger-than-frame_dim.group_dim non-metachannel
+  size_t c = full_image.nb_meta_channels;
+  for (; c < full_image.channel.size(); c++) {
+    Channel& fc = full_image.channel[c];
+    if (fc.w > frame_dim.group_dim || fc.h > frame_dim.group_dim) break;
+  }
+  for (; c < full_image.channel.size(); c++) {
+    Channel& fc = full_image.channel[c];
+    int shift = std::min(fc.hshift, fc.vshift);
+    if (shift > maxShift) continue;
+    if (shift < minShift) continue;
+    Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+           rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+    if (r.xsize() == 0 || r.ysize() == 0) continue;
+    gi_channel[stream_id].push_back(c);
+    Channel gc(r.xsize(), r.ysize());
+    gc.hshift = fc.hshift;
+    gc.vshift = fc.vshift;
+    for (size_t y = 0; y < r.ysize(); ++y) {
+      const pixel_type* const JXL_RESTRICT row_in = r.ConstRow(fc.plane, y);
+      pixel_type* const JXL_RESTRICT row_out = gc.Row(y);
+      for (size_t x = 0; x < r.xsize(); ++x) {
+        row_out[x] = row_in[x];
+      }
+    }
+    gi.channel.emplace_back(std::move(gc));
+  }
+
+  // Do some per-group transforms
+
+  float quality = cparams.quality_pair.first;
+
+  // Local palette
+  // TODO(veluca): make this work with quantize-after-prediction in lossy mode.
+  if (quality == 100 && cparams.palette_colors != 0 &&
+      cparams.speed_tier < SpeedTier::kCheetah) {
+    // all-channel palette (e.g. RGBA)
+    if (gi.channel.size() - gi.nb_meta_channels > 1) {
+      Transform maybe_palette(TransformId::kPalette);
+      maybe_palette.begin_c = gi.nb_meta_channels;
+      maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
+      maybe_palette.nb_colors = std::abs(cparams.palette_colors);
+      maybe_palette.ordered_palette = cparams.palette_colors >= 0;
+      do_transform(gi, maybe_palette, weighted::Header());
+    }
+    // all-minus-one-channel palette (RGB with separate alpha, or CMY with
+    // separate K)
+    if (gi.channel.size() - gi.nb_meta_channels > 3) {
+      Transform maybe_palette_3(TransformId::kPalette);
+      maybe_palette_3.begin_c = gi.nb_meta_channels;
+      maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1;
+      maybe_palette_3.nb_colors = std::abs(cparams.palette_colors);
+      maybe_palette_3.ordered_palette = cparams.palette_colors >= 0;
+      maybe_palette_3.lossy_palette = cparams.lossy_palette;
+      if (maybe_palette_3.lossy_palette) {
+        maybe_palette_3.predictor = Predictor::Weighted;
+      }
+      do_transform(gi, maybe_palette_3, weighted::Header());
+    }
+  }
+
+  // Local channel palette
+  if (cparams.channel_colors_percent > 0 && quality == 100 &&
+      !cparams.lossy_palette && cparams.speed_tier < SpeedTier::kCheetah) {
+    // single channel palette (like FLIF's ChannelCompact)
+    size_t nb_channels = gi.channel.size() - gi.nb_meta_channels;
+    for (size_t i = 0; i < nb_channels; i++) {
+      int min, max;
+      compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
+      int colors = max - min + 1;
+      JXL_DEBUG_V(10, "Channel %zu: range=%i..%i", i, min, max);
+      Transform maybe_palette_1(TransformId::kPalette);
+      maybe_palette_1.begin_c = i + gi.nb_meta_channels;
+      maybe_palette_1.num_c = 1;
+      // simple heuristic: if less than X percent of the values in the range
+      // actually occur, it is probably worth it to do a compaction
+      // (but only if the channel palette is less than 80% the size of the
+      // image itself)
+      maybe_palette_1.nb_colors =
+          std::min((int)(xsize * ysize * 0.8),
+                   (int)(cparams.channel_colors_percent / 100. * colors));
+      do_transform(gi, maybe_palette_1, weighted::Header());
+    }
+  }
+
+  // lossless and no specific color transform specified: try Nothing, YCoCg,
+  // and 17 RCTs
+  if (cparams.color_transform == ColorTransform::kNone && quality == 100 &&
+      cparams.colorspace < 0 && gi.channel.size() - gi.nb_meta_channels >= 3 &&
+      cparams.responsive == false && do_color &&
+      cparams.speed_tier <= SpeedTier::kHare) {
+    Transform sg(TransformId::kRCT);
+    sg.begin_c = gi.nb_meta_channels;
+    size_t nb_rcts_to_try = 0;
+    switch (cparams.speed_tier) {
+      case SpeedTier::kLightning:
+      case SpeedTier::kThunder:
+      case SpeedTier::kFalcon:
+      case SpeedTier::kCheetah:
+        nb_rcts_to_try = 0;  // Just do global YCoCg
+        break;
+      case SpeedTier::kHare:
+        nb_rcts_to_try = 4;
+        break;
+      case SpeedTier::kWombat:
+        nb_rcts_to_try = 5;
+        break;
+      case SpeedTier::kSquirrel:
+        nb_rcts_to_try = 7;
+        break;
+      case SpeedTier::kKitten:
+        nb_rcts_to_try = 9;
+        break;
+      case SpeedTier::kTortoise:
+        nb_rcts_to_try = 19;
+        break;
+    }
+    float best_cost = std::numeric_limits<float>::max();
+    size_t best_rct = 0;
+    // These should be 19 actually different transforms; the remaining ones
+    // are equivalent to one of these (note that the first two are do-nothing
+    // and YCoCg) modulo channel reordering (which only matters in the case of
+    // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR)
+    for (int i : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5,
+                  5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4,
+                  1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4,
+                  4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) {
+      if (nb_rcts_to_try == 0) break;
+      int num_transforms_to_keep = gi.transform.size();
+      sg.rct_type = i;
+      do_transform(gi, sg, weighted::Header());
+      float cost = EstimateCost(gi);
+      if (cost < best_cost) {
+        best_rct = i;
+        best_cost = cost;
+      }
+      nb_rcts_to_try--;
+      // Ensure we do not clamp channels to their supposed range, as this
+      // otherwise breaks in the presence of patches.
+      gi.undo_transforms(weighted::Header(), num_transforms_to_keep == 0
+                                                 ? -1
+                                                 : num_transforms_to_keep);
+    }
+    // Apply the best RCT to the image for future encoding.
+    sg.rct_type = best_rct;
+    do_transform(gi, sg, weighted::Header());
+  } else {
+    // No need to try anything, just use the default options.
+  }
+  size_t nb_wp_modes = 1;
+  if (cparams.speed_tier <= SpeedTier::kTortoise) {
+    nb_wp_modes = 5;
+  } else if (cparams.speed_tier <= SpeedTier::kKitten) {
+    nb_wp_modes = 2;
+  }
+  if (nb_wp_modes > 1 &&
+      (stream_options[stream_id].predictor == Predictor::Weighted ||
+       stream_options[stream_id].predictor == Predictor::Best ||
+       stream_options[stream_id].predictor == Predictor::Variable)) {
+    float best_cost = std::numeric_limits<float>::max();
+    stream_options[stream_id].wp_mode = 0;
+    for (size_t i = 0; i < nb_wp_modes; i++) {
+      float cost = EstimateWPCost(gi, i);
+      if (cost < best_cost) {
+        best_cost = cost;
+        stream_options[stream_id].wp_mode = i;
+      }
+    }
+  }
+  return true;
+}
+
+int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y,
+               size_t w, weighted::State* wp_state, float value,
+               float inv_factor) {
+  float svalue = value * inv_factor;
+  PredictionResult pred =
+      PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state);
+  svalue -= pred.guess;
+  int residual = roundf(svalue);
+  if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
+  return residual + pred.guess;
+}
+
+int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x,
+                     size_t y, size_t w, float value, float inv_factor) {
+  float svalue = value * inv_factor;
+  PredictionResult pred =
+      PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient);
+  svalue -= pred.guess;
+  int residual = roundf(svalue);
+  if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
+  return residual + pred.guess;
+}
+
+void ModularFrameEncoder::AddVarDCTDC(const Image3F& dc, size_t group_index,
+                                      bool nl_dc,
+                                      PassesEncoderState* enc_state) {
+  const Rect r = enc_state->shared.DCGroupRect(group_index);
+  extra_dc_precision[group_index] = nl_dc ? 1 : 0;
+  float mul = 1 << extra_dc_precision[group_index];
+
+  size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim);
+  stream_options[stream_id].max_chan_size = 0xFFFFFF;
+  stream_options[stream_id].predictor = Predictor::Weighted;
+  stream_options[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
+  if (cparams.speed_tier >= SpeedTier::kSquirrel) {
+    stream_options[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
+  }
+  if (cparams.decoding_speed_tier >= 1) {
+    stream_options[stream_id].tree_kind =
+        ModularOptions::TreeKind::kGradientFixedDC;
+  }
+
+  stream_images[stream_id] = Image(r.xsize(), r.ysize(), 8, 3);
+  if (nl_dc && stream_options[stream_id].tree_kind ==
+                   ModularOptions::TreeKind::kGradientFixedDC) {
+    JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444());
+    for (size_t c : {1, 0, 2}) {
+      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
+      float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
+      for (size_t y = 0; y < r.ysize(); y++) {
+        int32_t* quant_row =
+            stream_images[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
+        size_t stride = stream_images[stream_id]
+                            .channel[c < 2 ? c ^ 1 : c]
+                            .plane.PixelsPerRow();
+        const float* row = r.ConstPlaneRow(dc, c, y);
+        if (c == 1) {
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y,
+                                            r.xsize(), row[x], inv_factor);
+          }
+        } else {
+          int32_t* quant_row_y =
+              stream_images[stream_id].channel[0].plane.Row(y);
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = QuantizeGradient(
+                quant_row, stride, c, x, y, r.xsize(),
+                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
+          }
+        }
+      }
+    }
+  } else if (nl_dc) {
+    JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444());
+    for (size_t c : {1, 0, 2}) {
+      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
+      float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
+      weighted::Header header;
+      weighted::State wp_state(header, r.xsize(), r.ysize());
+      for (size_t y = 0; y < r.ysize(); y++) {
+        int32_t* quant_row =
+            stream_images[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
+        size_t stride = stream_images[stream_id]
+                            .channel[c < 2 ? c ^ 1 : c]
+                            .plane.PixelsPerRow();
+        const float* row = r.ConstPlaneRow(dc, c, y);
+        if (c == 1) {
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(),
+                                      &wp_state, row[x], inv_factor);
+            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
+          }
+        } else {
+          int32_t* quant_row_y =
+              stream_images[stream_id].channel[0].plane.Row(y);
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = QuantizeWP(
+                quant_row, stride, c, x, y, r.xsize(), &wp_state,
+                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
+            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
+          }
+        }
+      }
+    }
+  } else if (enc_state->shared.frame_header.chroma_subsampling.Is444()) {
+    for (size_t c : {1, 0, 2}) {
+      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
+      float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
+      for (size_t y = 0; y < r.ysize(); y++) {
+        int32_t* quant_row =
+            stream_images[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
+        const float* row = r.ConstPlaneRow(dc, c, y);
+        if (c == 1) {
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] = roundf(row[x] * inv_factor);
+          }
+        } else {
+          int32_t* quant_row_y =
+              stream_images[stream_id].channel[0].plane.Row(y);
+          for (size_t x = 0; x < r.xsize(); x++) {
+            quant_row[x] =
+                roundf((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) *
+                       inv_factor);
+          }
+        }
+      }
+    }
+  } else {
+    for (size_t c : {1, 0, 2}) {
+      Rect rect(
+          r.x0() >> enc_state->shared.frame_header.chroma_subsampling.HShift(c),
+          r.y0() >> enc_state->shared.frame_header.chroma_subsampling.VShift(c),
+          r.xsize() >>
+              enc_state->shared.frame_header.chroma_subsampling.HShift(c),
+          r.ysize() >>
+              enc_state->shared.frame_header.chroma_subsampling.VShift(c));
+      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+      size_t ys = rect.ysize();
+      size_t xs = rect.xsize();
+      Channel& ch = stream_images[stream_id].channel[c < 2 ? c ^ 1 : c];
+      ch.w = xs;
+      ch.h = ys;
+      ch.shrink();
+      for (size_t y = 0; y < ys; y++) {
+        int32_t* quant_row = ch.plane.Row(y);
+        const float* row = rect.ConstPlaneRow(dc, c, y);
+        for (size_t x = 0; x < xs; x++) {
+          quant_row[x] = roundf(row[x] * inv_factor);
+        }
+      }
+    }
+  }
+
+  DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc,
+            stream_images[stream_id], enc_state->shared.quantizer.MulDC(),
+            1.0 / mul, enc_state->shared.cmap.DCFactors(),
+            enc_state->shared.frame_header.chroma_subsampling,
+            enc_state->shared.block_ctx_map);
+}
+
+void ModularFrameEncoder::AddACMetadata(size_t group_index, bool jpeg_transcode,
+                                        PassesEncoderState* enc_state) {
+  const Rect r = enc_state->shared.DCGroupRect(group_index);
+  size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim);
+  stream_options[stream_id].max_chan_size = 0xFFFFFF;
+  stream_options[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP;
+  if (jpeg_transcode) {
+    stream_options[stream_id].tree_kind =
+        ModularOptions::TreeKind::kJpegTranscodeACMeta;
+  } else if (cparams.speed_tier >= SpeedTier::kFalcon) {
+    stream_options[stream_id].tree_kind =
+        ModularOptions::TreeKind::kFalconACMeta;
+  } else if (cparams.speed_tier > SpeedTier::kKitten) {
+    stream_options[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta;
+  }
+  // If we are using a non-constant CfL field, and are in a slow enough mode,
+  // re-enable tree computation for it.
+  if (cparams.speed_tier < SpeedTier::kSquirrel &&
+      cparams.force_cfl_jpeg_recompression) {
+    stream_options[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
+  }
+  // YToX, YToB, ACS + QF, EPF
+  Image& image = stream_images[stream_id];
+  image = Image(r.xsize(), r.ysize(), 8, 4);
+  static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
+  Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
+  image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+  image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+  image.channel[2] = Channel(r.xsize() * r.ysize(), 2, 0, 0);
+  ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map,
+                       Rect(image.channel[0].plane), &image.channel[0].plane);
+  ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map,
+                       Rect(image.channel[1].plane), &image.channel[1].plane);
+  size_t num = 0;
+  for (size_t y = 0; y < r.ysize(); y++) {
+    AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y);
+    const int* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y);
+    const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y);
+    int* out_acs = image.channel[2].plane.Row(0);
+    int* out_qf = image.channel[2].plane.Row(1);
+    int* row_out_epf = image.channel[3].plane.Row(y);
+    for (size_t x = 0; x < r.xsize(); x++) {
+      row_out_epf[x] = row_epf[x];
+      if (!row_acs[x].IsFirstBlock()) continue;
+      out_acs[num] = row_acs[x].RawStrategy();
+      out_qf[num] = row_qf[x] - 1;
+      num++;
+    }
+  }
+  image.channel[2].w = num;
+  ac_metadata_size[group_index] = num;
+}
+
+void ModularFrameEncoder::EncodeQuantTable(
+    size_t size_x, size_t size_y, BitWriter* writer,
+    const QuantEncoding& encoding, size_t idx,
+    ModularFrameEncoder* modular_frame_encoder) {
+  JXL_ASSERT(encoding.qraw.qtable != nullptr);
+  JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size());
+  JXL_CHECK(F16Coder::Write(encoding.qraw.qtable_den, writer));
+  if (modular_frame_encoder) {
+    JXL_CHECK(modular_frame_encoder->EncodeStream(
+        writer, nullptr, 0, ModularStreamId::QuantTable(idx)));
+    return;
+  }
+  Image image(size_x, size_y, 8, 3);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < size_y; y++) {
+      int* JXL_RESTRICT row = image.channel[c].Row(y);
+      for (size_t x = 0; x < size_x; x++) {
+        row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x];
+      }
+    }
+  }
+  ModularOptions cfopts;
+  JXL_CHECK(ModularGenericCompress(image, cfopts, writer));
+}
+
+void ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y,
+                                        const QuantEncoding& encoding,
+                                        size_t idx) {
+  size_t stream_id = ModularStreamId::QuantTable(idx).ID(frame_dim);
+  JXL_ASSERT(encoding.qraw.qtable != nullptr);
+  JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size());
+  Image& image = stream_images[stream_id];
+  image = Image(size_x, size_y, 8, 3);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < size_y; y++) {
+      int* JXL_RESTRICT row = image.channel[c].Row(y);
+      for (size_t x = 0; x < size_x; x++) {
+        row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x];
+      }
+    }
+  }
+}
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.h
new file mode 100644
index 0000000000..30a6610d6b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_modular.h
@@ -0,0 +1,94 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_MODULAR_H_
+#define LIB_JXL_ENC_MODULAR_H_
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+class ModularFrameEncoder {
+ public:
+  ModularFrameEncoder(const FrameHeader& frame_header,
+                      const CompressParams& cparams_orig);
+  Status ComputeEncodingData(const FrameHeader& frame_header,
+                             const ImageMetadata& metadata,
+                             Image3F* JXL_RESTRICT color,
+                             const std::vector<ImageF>& extra_channels,
+                             PassesEncoderState* JXL_RESTRICT enc_state,
+                             ThreadPool* pool, AuxOut* aux_out, bool do_color);
+  // Encodes global info (tree + histograms) in the `writer`.
+  Status EncodeGlobalInfo(BitWriter* writer, AuxOut* aux_out);
+  // Encodes a specific modular image (identified by `stream`) in the `writer`,
+  // assigning bits to the provided `layer`.
+  Status EncodeStream(BitWriter* writer, AuxOut* aux_out, size_t layer,
+                      const ModularStreamId& stream);
+  // Creates a modular image for a given DC group of VarDCT mode. `dc` is the
+  // input DC image, not quantized; the group is specified by `group_index`, and
+  // `nl_dc` decides whether to apply a near-lossless processing to the DC or
+  // not.
+  void AddVarDCTDC(const Image3F& dc, size_t group_index, bool nl_dc,
+                   PassesEncoderState* enc_state);
+  // Creates a modular image for the AC metadata of the given group
+  // (`group_index`).
+  void AddACMetadata(size_t group_index, bool jpeg_transcode,
+                     PassesEncoderState* enc_state);
+  // Encodes a RAW quantization table in `writer`. If `modular_frame_encoder` is
+  // null, the quantization table in `encoding` is used, with dimensions `size_x
+  // x size_y`. Otherwise, the table with ID `idx` is encoded from the given
+  // `modular_frame_encoder`.
+  static void EncodeQuantTable(size_t size_x, size_t size_y, BitWriter* writer,
+                               const QuantEncoding& encoding, size_t idx,
+                               ModularFrameEncoder* modular_frame_encoder);
+  // Stores a quantization table for future usage with `EncodeQuantTable`.
+  void AddQuantTable(size_t size_x, size_t size_y,
+                     const QuantEncoding& encoding, size_t idx);
+
+  std::vector<size_t> ac_metadata_size;
+  std::vector<uint8_t> extra_dc_precision;
+
+  std::vector<Image> stream_images;
+  std::vector<ModularOptions> stream_options;
+
+  Tree tree;
+  std::vector<std::vector<Token>> tree_tokens;
+  std::vector<GroupHeader> stream_headers;
+  std::vector<std::vector<Token>> tokens;
+  EntropyEncodingData code;
+  std::vector<uint8_t> context_map;
+  FrameDimensions frame_dim;
+  CompressParams cparams;
+  float quality = cparams.quality_pair.first;
+  float cquality = cparams.quality_pair.second;
+  std::vector<size_t> tree_splits;
+  std::vector<ModularMultiplierInfo> multiplier_info;
+  std::vector<std::vector<uint32_t>> gi_channel;
+  std::vector<size_t> image_widths;
+
+ private:
+  Status PrepareEncoding(ThreadPool* pool, const FrameDimensions& frame_dim,
+                         EncoderHeuristics* heuristics,
+                         AuxOut* aux_out = nullptr);
+  Status PrepareStreamParams(const Rect& rect, const CompressParams& cparams,
+                             int minShift, int maxShift,
+                             const ModularStreamId& stream, bool do_color);
+
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_MODULAR_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc
new file mode 100644
index 0000000000..383b7922f9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.cc
@@ -0,0 +1,378 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_noise.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/robust_statistics.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/optimize.h"
+
+namespace jxl {
+namespace {
+
+using OptimizeArray = optimize::Array<double, NoiseParams::kNumNoisePoints>;
+
+float GetScoreSumsOfAbsoluteDifferences(const Image3F& opsin, const int x,
+                                        const int y, const int block_size) {
+  const int small_bl_size_x = 3;
+  const int small_bl_size_y = 4;
+  const int kNumSAD =
+      (block_size - small_bl_size_x) * (block_size - small_bl_size_y);
+  // block_size x block_size reference pixels
+  int counter = 0;
+  const int offset = 2;
+
+  std::vector<float> sad(kNumSAD, 0);
+  for (int y_bl = 0; y_bl + small_bl_size_y < block_size; ++y_bl) {
+    for (int x_bl = 0; x_bl + small_bl_size_x < block_size; ++x_bl) {
+      float sad_sum = 0;
+      // size of the center patch, we compare all the patches inside window with
+      // the center one
+      for (int cy = 0; cy < small_bl_size_y; ++cy) {
+        for (int cx = 0; cx < small_bl_size_x; ++cx) {
+          float wnd = 0.5f * (opsin.PlaneRow(1, y + y_bl + cy)[x + x_bl + cx] +
+                              opsin.PlaneRow(0, y + y_bl + cy)[x + x_bl + cx]);
+          float center =
+              0.5f * (opsin.PlaneRow(1, y + offset + cy)[x + offset + cx] +
+                      opsin.PlaneRow(0, y + offset + cy)[x + offset + cx]);
+          sad_sum += std::abs(center - wnd);
+        }
+      }
+      sad[counter++] = sad_sum;
+    }
+  }
+  const int kSamples = (kNumSAD) / 2;
+  // As with ROAD (rank order absolute distance), we keep the smallest half of
+  // the values in SAD (we use here the more robust patch SAD instead of
+  // absolute single-pixel differences).
+  std::sort(sad.begin(), sad.end());
+  const float total_sad_sum =
+      std::accumulate(sad.begin(), sad.begin() + kSamples, 0.0f);
+  return total_sad_sum / kSamples;
+}
+
+class NoiseHistogram {
+ public:
+  static constexpr int kBins = 256;
+
+  NoiseHistogram() { std::fill(bins, bins + kBins, 0); }
+
+  void Increment(const float x) { bins[Index(x)] += 1; }
+  int Get(const float x) const { return bins[Index(x)]; }
+  int Bin(const size_t bin) const { return bins[bin]; }
+
+  void Print() const {
+    for (unsigned int bin : bins) {
+      printf("%d\n", bin);
+    }
+  }
+
+  int Mode() const {
+    uint32_t cdf[kBins];
+    std::partial_sum(bins, bins + kBins, cdf);
+    return HalfRangeMode()(cdf, kBins);
+  }
+
+  double Quantile(double q01) const {
+    const int64_t total = std::accumulate(bins, bins + kBins, int64_t{1});
+    const int64_t target = static_cast<int64_t>(q01 * total);
+    // Until sum >= target:
+    int64_t sum = 0;
+    size_t i = 0;
+    for (; i < kBins; ++i) {
+      sum += bins[i];
+      // Exact match: assume middle of bin i
+      if (sum == target) {
+        return i + 0.5;
+      }
+      if (sum > target) break;
+    }
+
+    // Next non-empty bin (in case histogram is sparsely filled)
+    size_t next = i + 1;
+    while (next < kBins && bins[next] == 0) {
+      ++next;
+    }
+
+    // Linear interpolation according to how far into next we went
+    const double excess = target - sum;
+    const double weight_next = bins[Index(next)] / excess;
+    return ClampX(next * weight_next + i * (1.0 - weight_next));
+  }
+
+  // Inter-quartile range
+  double IQR() const { return Quantile(0.75) - Quantile(0.25); }
+
+ private:
+  template <typename T>
+  T ClampX(const T x) const {
+    return std::min(std::max(T(0), x), T(kBins - 1));
+  }
+  size_t Index(const float x) const { return ClampX(static_cast<int>(x)); }
+
+  uint32_t bins[kBins];
+};
+
+std::vector<float> GetSADScoresForPatches(const Image3F& opsin,
+                                          const size_t block_s,
+                                          const size_t num_bin,
+                                          NoiseHistogram* sad_histogram) {
+  std::vector<float> sad_scores(
+      (opsin.ysize() / block_s) * (opsin.xsize() / block_s), 0.0f);
+
+  int block_index = 0;
+
+  for (size_t y = 0; y + block_s <= opsin.ysize(); y += block_s) {
+    for (size_t x = 0; x + block_s <= opsin.xsize(); x += block_s) {
+      float sad_sc = GetScoreSumsOfAbsoluteDifferences(opsin, x, y, block_s);
+      sad_scores[block_index++] = sad_sc;
+      sad_histogram->Increment(sad_sc * num_bin);
+    }
+  }
+  return sad_scores;
+}
+
+float GetSADThreshold(const NoiseHistogram& histogram, const int num_bin) {
+  // Here we assume that the most patches with similar SAD value is a "flat"
+  // patches. However, some images might contain regular texture part and
+  // generate second strong peak at the histogram
+  // TODO(user) handle bimodal and heavy-tailed case
+  const int mode = histogram.Mode();
+  return static_cast<float>(mode) / NoiseHistogram::kBins;
+}
+
+// loss = sum asym * (F(x) - nl)^2 + kReg * num_points * sum (w[i] - w[i+1])^2
+// where asym = 1 if F(x) < nl, kAsym if F(x) > nl.
+struct LossFunction {
+  explicit LossFunction(std::vector<NoiseLevel> nl0) : nl(std::move(nl0)) {}
+
+  double Compute(const OptimizeArray& w, OptimizeArray* df,
+                 bool skip_regularization = false) const {
+    constexpr double kReg = 0.005;
+    constexpr double kAsym = 1.1;
+    double loss_function = 0;
+    for (size_t i = 0; i < w.size(); i++) {
+      (*df)[i] = 0;
+    }
+    for (auto ind : nl) {
+      std::pair<int, float> pos = IndexAndFrac(ind.intensity);
+      JXL_DASSERT(pos.first >= 0 && static_cast<size_t>(pos.first) <
+                                        NoiseParams::kNumNoisePoints - 1);
+      double low = w[pos.first];
+      double hi = w[pos.first + 1];
+      double val = low * (1.0f - pos.second) + hi * pos.second;
+      double dist = val - ind.noise_level;
+      if (dist > 0) {
+        loss_function += kAsym * dist * dist;
+        (*df)[pos.first] -= kAsym * (1.0f - pos.second) * dist;
+        (*df)[pos.first + 1] -= kAsym * pos.second * dist;
+      } else {
+        loss_function += dist * dist;
+        (*df)[pos.first] -= (1.0f - pos.second) * dist;
+        (*df)[pos.first + 1] -= pos.second * dist;
+      }
+    }
+    if (skip_regularization) return loss_function;
+    for (size_t i = 0; i + 1 < w.size(); i++) {
+      double diff = w[i] - w[i + 1];
+      loss_function += kReg * nl.size() * diff * diff;
+      (*df)[i] -= kReg * diff * nl.size();
+      (*df)[i + 1] += kReg * diff * nl.size();
+    }
+    return loss_function;
+  }
+
+  std::vector<NoiseLevel> nl;
+};
+
+void OptimizeNoiseParameters(const std::vector<NoiseLevel>& noise_level,
+                             NoiseParams* noise_params) {
+  constexpr double kMaxError = 1e-3;
+  static const double kPrecision = 1e-8;
+  static const int kMaxIter = 40;
+
+  float avg = 0;
+  for (const NoiseLevel& nl : noise_level) {
+    avg += nl.noise_level;
+  }
+  avg /= noise_level.size();
+
+  LossFunction loss_function(noise_level);
+  OptimizeArray parameter_vector;
+  for (size_t i = 0; i < parameter_vector.size(); i++) {
+    parameter_vector[i] = avg;
+  }
+
+  parameter_vector = optimize::OptimizeWithScaledConjugateGradientMethod(
+      loss_function, parameter_vector, kPrecision, kMaxIter);
+
+  OptimizeArray df = parameter_vector;
+  float loss = loss_function.Compute(parameter_vector, &df,
+                                     /*skip_regularization=*/true) /
+               noise_level.size();
+
+  // Approximation went too badly: escape with no noise at all.
+  if (loss > kMaxError) {
+    noise_params->Clear();
+    return;
+  }
+
+  for (size_t i = 0; i < parameter_vector.size(); i++) {
+    noise_params->lut[i] = std::max(parameter_vector[i], 0.0);
+  }
+}
+
+std::vector<NoiseLevel> GetNoiseLevel(
+    const Image3F& opsin, const std::vector<float>& texture_strength,
+    const float threshold, const size_t block_s) {
+  std::vector<NoiseLevel> noise_level_per_intensity;
+
+  const int filt_size = 1;
+  static const float kLaplFilter[filt_size * 2 + 1][filt_size * 2 + 1] = {
+      {-0.25f, -1.0f, -0.25f},
+      {-1.0f, 5.0f, -1.0f},
+      {-0.25f, -1.0f, -0.25f},
+  };
+
+  // The noise model is built based on channel 0.5 * (X+Y) as we notice that it
+  // is similar to the model 0.5 * (Y-X)
+  size_t patch_index = 0;
+
+  for (size_t y = 0; y + block_s <= opsin.ysize(); y += block_s) {
+    for (size_t x = 0; x + block_s <= opsin.xsize(); x += block_s) {
+      if (texture_strength[patch_index] <= threshold) {
+        // Calculate mean value
+        float mean_int = 0;
+        for (size_t y_bl = 0; y_bl < block_s; ++y_bl) {
+          for (size_t x_bl = 0; x_bl < block_s; ++x_bl) {
+            mean_int += 0.5f * (opsin.PlaneRow(1, y + y_bl)[x + x_bl] +
+                                opsin.PlaneRow(0, y + y_bl)[x + x_bl]);
+          }
+        }
+        mean_int /= block_s * block_s;
+
+        // Calculate Noise level
+        float noise_level = 0;
+        size_t count = 0;
+        for (size_t y_bl = 0; y_bl < block_s; ++y_bl) {
+          for (size_t x_bl = 0; x_bl < block_s; ++x_bl) {
+            float filtered_value = 0;
+            for (int y_f = -1 * filt_size; y_f <= filt_size; ++y_f) {
+              if ((static_cast<ssize_t>(y_bl) + y_f) >= 0 &&
+                  (y_bl + y_f) < block_s) {
+                for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) {
+                  if ((static_cast<ssize_t>(x_bl) + x_f) >= 0 &&
+                      (x_bl + x_f) < block_s) {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl + x_f] +
+                         opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl + x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  } else {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl - x_f] +
+                         opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl - x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  }
+                }
+              } else {
+                for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) {
+                  if ((static_cast<ssize_t>(x_bl) + x_f) >= 0 &&
+                      (x_bl + x_f) < block_s) {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl + x_f] +
+                         opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl + x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  } else {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl - x_f] +
+                         opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl - x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  }
+                }
+              }
+            }
+            noise_level += std::abs(filtered_value);
+            ++count;
+          }
+        }
+        noise_level /= count;
+        NoiseLevel nl;
+        nl.intensity = mean_int;
+        nl.noise_level = noise_level;
+        noise_level_per_intensity.push_back(nl);
+      }
+      ++patch_index;
+    }
+  }
+  return noise_level_per_intensity;
+}
+
+void EncodeFloatParam(float val, float precision, BitWriter* writer) {
+  JXL_ASSERT(val >= 0);
+  const int absval_quant = static_cast<int>(val * precision + 0.5f);
+  JXL_ASSERT(absval_quant < (1 << 10));
+  writer->Write(10, absval_quant);
+}
+
+}  // namespace
+
+Status GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params,
+                         float quality_coef) {
+  // The size of a patch in decoder might be different from encoder's patch
+  // size.
+  // For encoder: the patch size should be big enough to estimate
+  //              noise level, but, at the same time, it should be not too big
+  //              to be able to estimate intensity value of the patch
+  const size_t block_s = 8;
+  const size_t kNumBin = 256;
+  NoiseHistogram sad_histogram;
+  std::vector<float> sad_scores =
+      GetSADScoresForPatches(opsin, block_s, kNumBin, &sad_histogram);
+  float sad_threshold = GetSADThreshold(sad_histogram, kNumBin);
+  // If threshold is too large, the image has a strong pattern. This pattern
+  // fools our model and it will add too much noise. Therefore, we do not add
+  // noise for such images
+  if (sad_threshold > 0.15f || sad_threshold <= 0.0f) {
+    noise_params->Clear();
+    return false;
+  }
+  std::vector<NoiseLevel> nl =
+      GetNoiseLevel(opsin, sad_scores, sad_threshold, block_s);
+
+  OptimizeNoiseParameters(nl, noise_params);
+  for (float& i : noise_params->lut) {
+    i *= quality_coef * 1.4;
+  }
+  return noise_params->HasAny();
+}
+
+void EncodeNoise(const NoiseParams& noise_params, BitWriter* writer,
+                 size_t layer, AuxOut* aux_out) {
+  JXL_ASSERT(noise_params.HasAny());
+
+  BitWriter::Allotment allotment(writer, NoiseParams::kNumNoisePoints * 16);
+  for (float i : noise_params.lut) {
+    EncodeFloatParam(i, kNoisePrecision, writer);
+  }
+  ReclaimAndCharge(writer, &allotment, layer, aux_out);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.h
new file mode 100644
index 0000000000..15fb07a8c8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_noise.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_NOISE_H_
+#define LIB_JXL_ENC_NOISE_H_
+
+// Noise parameter estimation.
+
+#include <stddef.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/noise.h"
+
+namespace jxl {
+
+// Get parameters of the noise for NoiseParams model
+// Returns whether a valid noise model (with HasAny()) is set.
+Status GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params,
+                         float quality_coef);
+
+// Does not write anything if `noise_params` are empty. Otherwise, caller must
+// set FrameHeader.flags.kNoise.
+void EncodeNoise(const NoiseParams& noise_params, BitWriter* writer,
+                 size_t layer, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_NOISE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_params.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_params.h
new file mode 100644
index 0000000000..78a3a7cee0
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_params.h
@@ -0,0 +1,270 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_PARAMS_H_
+#define LIB_JXL_ENC_PARAMS_H_
+
+// Parameters and flags that govern JXL compression.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+enum class SpeedTier {
+  // Turns on FindBestQuantizationHQ loop. Equivalent to "guetzli" mode.
+  kTortoise = 1,
+  // Turns on FindBestQuantization butteraugli loop.
+  kKitten = 2,
+  // Turns on dots, patches, and spline detection by default, as well as full
+  // context clustering. Default.
+  kSquirrel = 3,
+  // Turns on error diffusion and full AC strategy heuristics. Equivalent to
+  // "fast" mode.
+  kWombat = 4,
+  // Turns on gaborish by default, non-default cmap, initial quant field.
+  kHare = 5,
+  // Turns on simple heuristics for AC strategy, quant field, and clustering;
+  // also enables coefficient reordering.
+  kCheetah = 6,
+  // Turns off most encoder features. Does context clustering.
+  // Modular: uses fixed tree with Weighted predictor.
+  kFalcon = 7,
+  // Currently fastest possible setting for VarDCT.
+  // Modular: uses fixed tree with Gradient predictor.
+  kThunder = 8,
+  // VarDCT: same as kThunder.
+  // Modular: no tree, Gradient predictor, fast histograms
+  kLightning = 9
+};
+
+inline bool ParseSpeedTier(const std::string& s, SpeedTier* out) {
+  if (s == "lightning") {
+    *out = SpeedTier::kLightning;
+    return true;
+  } else if (s == "thunder") {
+    *out = SpeedTier::kThunder;
+    return true;
+  } else if (s == "falcon") {
+    *out = SpeedTier::kFalcon;
+    return true;
+  } else if (s == "cheetah") {
+    *out = SpeedTier::kCheetah;
+    return true;
+  } else if (s == "hare") {
+    *out = SpeedTier::kHare;
+    return true;
+  } else if (s == "fast" || s == "wombat") {
+    *out = SpeedTier::kWombat;
+    return true;
+  } else if (s == "squirrel") {
+    *out = SpeedTier::kSquirrel;
+    return true;
+  } else if (s == "kitten") {
+    *out = SpeedTier::kKitten;
+    return true;
+  } else if (s == "guetzli" || s == "tortoise") {
+    *out = SpeedTier::kTortoise;
+    return true;
+  }
+  size_t st = 10 - static_cast<size_t>(strtoull(s.c_str(), nullptr, 0));
+  if (st <= static_cast<size_t>(SpeedTier::kLightning) &&
+      st >= static_cast<size_t>(SpeedTier::kTortoise)) {
+    *out = SpeedTier(st);
+    return true;
+  }
+  return false;
+}
+
+inline const char* SpeedTierName(SpeedTier speed_tier) {
+  switch (speed_tier) {
+    case SpeedTier::kLightning:
+      return "lightning";
+    case SpeedTier::kThunder:
+      return "thunder";
+    case SpeedTier::kFalcon:
+      return "falcon";
+    case SpeedTier::kCheetah:
+      return "cheetah";
+    case SpeedTier::kHare:
+      return "hare";
+    case SpeedTier::kWombat:
+      return "wombat";
+    case SpeedTier::kSquirrel:
+      return "squirrel";
+    case SpeedTier::kKitten:
+      return "kitten";
+    case SpeedTier::kTortoise:
+      return "tortoise";
+  }
+  return "INVALID";
+}
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct CompressParams {
+  float butteraugli_distance = 1.0f;
+  size_t target_size = 0;
+  float target_bitrate = 0.0f;
+
+  // 0.0 means search for the adaptive quantization map that matches the
+  // butteraugli distance, positive values mean quantize everywhere with that
+  // value.
+  float uniform_quant = 0.0f;
+  float quant_border_bias = 0.0f;
+
+  // Try to achieve a maximum pixel-by-pixel error on each channel.
+  bool max_error_mode = false;
+  float max_error[3] = {0.0, 0.0, 0.0};
+
+  SpeedTier speed_tier = SpeedTier::kSquirrel;
+
+  // 0 = default.
+  // 1 = slightly worse quality.
+  // 4 = fastest speed, lowest quality
+  // TODO(veluca): hook this up to the C API.
+  size_t decoding_speed_tier = 0;
+
+  int max_butteraugli_iters = 4;
+
+  int max_butteraugli_iters_guetzli_mode = 100;
+
+  ColorTransform color_transform = ColorTransform::kXYB;
+  YCbCrChromaSubsampling chroma_subsampling;
+
+  // If true, the "modular mode options" members below are used.
+  bool modular_mode = false;
+
+  // Change group size in modular mode (0=128, 1=256, 2=512, 3=1024).
+  size_t modular_group_size_shift = 1;
+
+  Override preview = Override::kDefault;
+  Override noise = Override::kDefault;
+  Override dots = Override::kDefault;
+  Override patches = Override::kDefault;
+  Override gaborish = Override::kDefault;
+  int epf = -1;
+
+  // Progressive mode.
+  bool progressive_mode = false;
+
+  // Quantized-progressive mode.
+  bool qprogressive_mode = false;
+
+  // Put center groups first in the bitstream.
+  bool centerfirst = false;
+
+  // Pixel coordinates of the center. First group will contain that center.
+  size_t center_x = static_cast<size_t>(-1);
+  size_t center_y = static_cast<size_t>(-1);
+
+  int progressive_dc = -1;
+
+  // If on: preserve color of invisible pixels (if off: don't care)
+  // Default: on for lossless, off for lossy
+  Override keep_invisible = Override::kDefault;
+
+  // Progressive-mode saliency.
+  //
+  // How many progressive saliency-encoding steps to perform.
+  // - 1: Encode only DC and lowest-frequency AC. Does not need a saliency-map.
+  // - 2: Encode only DC+LF, dropping all HF AC data.
+  //      Does not need a saliency-map.
+  // - 3: Encode DC+LF+{salient HF}, dropping all non-salient HF data.
+  // - 4: Encode DC+LF+{salient HF}+{other HF}.
+  // - 5: Encode DC+LF+{quantized HF}+{low HF bits}.
+  size_t saliency_num_progressive_steps = 3;
+  // Every saliency-heatmap cell with saliency >= threshold will be considered
+  // as 'salient'. The default value of 0.0 will consider every AC-block
+  // as salient, hence not require a saliency-map, and not actually generate
+  // a 4th progressive step.
+  float saliency_threshold = 0.0f;
+  // Saliency-map (owned by caller).
+  ImageF* saliency_map = nullptr;
+
+  // Input and output file name. Will be used to provide pluggable saliency
+  // extractor with paths.
+  const char* file_in = nullptr;
+  const char* file_out = nullptr;
+
+  // Currently unused as of 2020-01.
+  bool clear_metadata = false;
+
+  // Prints extra information during/after encoding.
+  bool verbose = false;
+
+  ButteraugliParams ba_params;
+
+  // Force usage of CfL when doing JPEG recompression. This can have unexpected
+  // effects on the decoded pixels, while still being JPEG-compliant and
+  // allowing reconstruction of the original JPEG.
+  bool force_cfl_jpeg_recompression = true;
+
+  // Set the noise to what it would approximately be if shooting at the nominal
+  // exposure for a given ISO setting on a 35mm camera.
+  float photon_noise_iso = 0;
+
+  // modular mode options below
+  ModularOptions options;
+  int responsive = -1;
+  // A pair of <quality, cquality>.
+  std::pair<float, float> quality_pair{100.f, 100.f};
+  int colorspace = -1;
+  // Use Global channel palette if #colors < this percentage of range
+  float channel_colors_pre_transform_percent = 95.f;
+  // Use Local channel palette if #colors < this percentage of range
+  float channel_colors_percent = 80.f;
+  int palette_colors = 1 << 10;  // up to 10-bit palette is probably worthwhile
+  bool lossy_palette = false;
+
+  // Returns whether these params are lossless as defined by SetLossless();
+  bool IsLossless() const {
+    return modular_mode && quality_pair.first == 100 &&
+           quality_pair.second == 100 &&
+           color_transform == jxl::ColorTransform::kNone;
+  }
+
+  // Sets the parameters required to make the codec lossless.
+  void SetLossless() {
+    modular_mode = true;
+    quality_pair.first = 100;
+    quality_pair.second = 100;
+    color_transform = jxl::ColorTransform::kNone;
+  }
+
+  bool use_new_heuristics = false;
+
+  // Down/upsample the image before encoding / after decoding by this factor.
+  size_t resampling = 1;
+  size_t ec_resampling = 1;
+  // Skip the downsampling before encoding if this is true.
+  bool already_downsampled = false;
+};
+
+static constexpr float kMinButteraugliForDynamicAR = 0.5f;
+static constexpr float kMinButteraugliForDots = 3.0f;
+static constexpr float kMinButteraugliToSubtractOriginalPatches = 3.0f;
+static constexpr float kMinButteraugliDistanceForProgressiveDc = 4.5f;
+
+// Always off
+static constexpr float kMinButteraugliForNoise = 99.0f;
+
+// Minimum butteraugli distance the encoder accepts.
+static constexpr float kMinButteraugliDistance = 0.01f;
+
+// Tile size for encoder-side processing. Must be equal to color tile dim in the
+// current implementation.
+static constexpr size_t kEncTileDim = 64;
+static constexpr size_t kEncTileDimInBlocks = kEncTileDim / kBlockDim;
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_PARAMS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc
new file mode 100644
index 0000000000..5973acd63d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.cc
@@ -0,0 +1,836 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_patch_dictionary.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <random>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_dot_dictionary.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/patch_dictionary_internal.h"
+
+namespace jxl {
+
+// static
+void PatchDictionaryEncoder::Encode(const PatchDictionary& pdic,
+                                    BitWriter* writer, size_t layer,
+                                    AuxOut* aux_out) {
+  JXL_ASSERT(pdic.HasAny());
+  std::vector<std::vector<Token>> tokens(1);
+
+  auto add_num = [&](int context, size_t num) {
+    tokens[0].emplace_back(context, num);
+  };
+  size_t num_ref_patch = 0;
+  for (size_t i = 0; i < pdic.positions_.size();) {
+    size_t i_start = i;
+    while (i < pdic.positions_.size() &&
+           pdic.positions_[i].ref_pos == pdic.positions_[i_start].ref_pos) {
+      i++;
+    }
+    num_ref_patch++;
+  }
+  add_num(kNumRefPatchContext, num_ref_patch);
+  for (size_t i = 0; i < pdic.positions_.size();) {
+    size_t i_start = i;
+    while (i < pdic.positions_.size() &&
+           pdic.positions_[i].ref_pos == pdic.positions_[i_start].ref_pos) {
+      i++;
+    }
+    size_t num = i - i_start;
+    JXL_ASSERT(num > 0);
+    add_num(kReferenceFrameContext, pdic.positions_[i_start].ref_pos.ref);
+    add_num(kPatchReferencePositionContext,
+            pdic.positions_[i_start].ref_pos.x0);
+    add_num(kPatchReferencePositionContext,
+            pdic.positions_[i_start].ref_pos.y0);
+    add_num(kPatchSizeContext, pdic.positions_[i_start].ref_pos.xsize - 1);
+    add_num(kPatchSizeContext, pdic.positions_[i_start].ref_pos.ysize - 1);
+    add_num(kPatchCountContext, num - 1);
+    for (size_t j = i_start; j < i; j++) {
+      const PatchPosition& pos = pdic.positions_[j];
+      if (j == i_start) {
+        add_num(kPatchPositionContext, pos.x);
+        add_num(kPatchPositionContext, pos.y);
+      } else {
+        add_num(kPatchOffsetContext,
+                PackSigned(pos.x - pdic.positions_[j - 1].x));
+        add_num(kPatchOffsetContext,
+                PackSigned(pos.y - pdic.positions_[j - 1].y));
+      }
+      JXL_ASSERT(pdic.shared_->metadata->m.extra_channel_info.size() + 1 ==
+                 pos.blending.size());
+      for (size_t i = 0;
+           i < pdic.shared_->metadata->m.extra_channel_info.size() + 1; i++) {
+        const PatchBlending& info = pos.blending[i];
+        add_num(kPatchBlendModeContext, static_cast<uint32_t>(info.mode));
+        if (UsesAlpha(info.mode) &&
+            pdic.shared_->metadata->m.extra_channel_info.size() > 1) {
+          add_num(kPatchAlphaChannelContext, info.alpha_channel);
+        }
+        if (UsesClamp(info.mode)) {
+          add_num(kPatchClampContext, info.clamp);
+        }
+      }
+    }
+  }
+
+  EntropyEncodingData codes;
+  std::vector<uint8_t> context_map;
+  BuildAndEncodeHistograms(HistogramParams(), kNumPatchDictionaryContexts,
+                           tokens, &codes, &context_map, writer, layer,
+                           aux_out);
+  WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+// static
+void PatchDictionaryEncoder::SubtractFrom(const PatchDictionary& pdic,
+                                          Image3F* opsin) {
+  // TODO(veluca): this can likely be optimized knowing it runs on full images.
+  for (size_t y = 0; y < opsin->ysize(); y++) {
+    if (y + 1 >= pdic.patch_starts_.size()) continue;
+    float* JXL_RESTRICT rows[3] = {
+        opsin->PlaneRow(0, y),
+        opsin->PlaneRow(1, y),
+        opsin->PlaneRow(2, y),
+    };
+    for (size_t id = pdic.patch_starts_[y]; id < pdic.patch_starts_[y + 1];
+         id++) {
+      const PatchPosition& pos = pdic.positions_[pdic.sorted_patches_[id]];
+      size_t by = pos.y;
+      size_t bx = pos.x;
+      size_t xsize = pos.ref_pos.xsize;
+      JXL_DASSERT(y >= by);
+      JXL_DASSERT(y < by + pos.ref_pos.ysize);
+      size_t iy = y - by;
+      size_t ref = pos.ref_pos.ref;
+      const float* JXL_RESTRICT ref_rows[3] = {
+          pdic.shared_->reference_frames[ref].frame->color()->ConstPlaneRow(
+              0, pos.ref_pos.y0 + iy) +
+              pos.ref_pos.x0,
+          pdic.shared_->reference_frames[ref].frame->color()->ConstPlaneRow(
+              1, pos.ref_pos.y0 + iy) +
+              pos.ref_pos.x0,
+          pdic.shared_->reference_frames[ref].frame->color()->ConstPlaneRow(
+              2, pos.ref_pos.y0 + iy) +
+              pos.ref_pos.x0,
+      };
+      for (size_t ix = 0; ix < xsize; ix++) {
+        for (size_t c = 0; c < 3; c++) {
+          if (pos.blending[0].mode == PatchBlendMode::kAdd) {
+            rows[c][bx + ix] -= ref_rows[c][ix];
+          } else if (pos.blending[0].mode == PatchBlendMode::kReplace) {
+            rows[c][bx + ix] = 0;
+          } else if (pos.blending[0].mode == PatchBlendMode::kNone) {
+            // Nothing to do.
+          } else {
+            JXL_ABORT("Blending mode %u not yet implemented",
+                      (uint32_t)pos.blending[0].mode);
+          }
+        }
+      }
+    }
+  }
+}
+
+namespace {
+
+struct PatchColorspaceInfo {
+  float kChannelDequant[3];
+  float kChannelWeights[3];
+
+  explicit PatchColorspaceInfo(bool is_xyb) {
+    if (is_xyb) {
+      kChannelDequant[0] = 0.01615;
+      kChannelDequant[1] = 0.08875;
+      kChannelDequant[2] = 0.1922;
+      kChannelWeights[0] = 30.0;
+      kChannelWeights[1] = 3.0;
+      kChannelWeights[2] = 1.0;
+    } else {
+      kChannelDequant[0] = 20.0f / 255;
+      kChannelDequant[1] = 22.0f / 255;
+      kChannelDequant[2] = 20.0f / 255;
+      kChannelWeights[0] = 0.017 * 255;
+      kChannelWeights[1] = 0.02 * 255;
+      kChannelWeights[2] = 0.017 * 255;
+    }
+  }
+
+  float ScaleForQuantization(float val, size_t c) {
+    return val / kChannelDequant[c];
+  }
+
+  int Quantize(float val, size_t c) {
+    return truncf(ScaleForQuantization(val, c));
+  }
+
+  bool is_similar_v(const float v1[3], const float v2[3], float threshold) {
+    float distance = 0;
+    for (size_t c = 0; c < 3; c++) {
+      distance += std::fabs(v1[c] - v2[c]) * kChannelWeights[c];
+    }
+    return distance <= threshold;
+  }
+};
+
+std::vector<PatchInfo> FindTextLikePatches(
+    const Image3F& opsin, const PassesEncoderState* JXL_RESTRICT state,
+    ThreadPool* pool, AuxOut* aux_out, bool is_xyb) {
+  if (state->cparams.patches == Override::kOff) return {};
+
+  PatchColorspaceInfo pci(is_xyb);
+  float kSimilarThreshold = 0.8f;
+
+  auto is_similar_impl = [&pci](std::pair<uint32_t, uint32_t> p1,
+                                std::pair<uint32_t, uint32_t> p2,
+                                const float* JXL_RESTRICT rows[3],
+                                size_t stride, float threshold) {
+    float v1[3], v2[3];
+    for (size_t c = 0; c < 3; c++) {
+      v1[c] = rows[c][p1.second * stride + p1.first];
+      v2[c] = rows[c][p2.second * stride + p2.first];
+    }
+    return pci.is_similar_v(v1, v2, threshold);
+  };
+
+  std::atomic<bool> has_screenshot_areas{false};
+  const size_t opsin_stride = opsin.PixelsPerRow();
+  const float* JXL_RESTRICT opsin_rows[3] = {opsin.ConstPlaneRow(0, 0),
+                                             opsin.ConstPlaneRow(1, 0),
+                                             opsin.ConstPlaneRow(2, 0)};
+
+  auto is_same = [&opsin_rows, opsin_stride](std::pair<uint32_t, uint32_t> p1,
+                                             std::pair<uint32_t, uint32_t> p2) {
+    for (size_t c = 0; c < 3; c++) {
+      float v1 = opsin_rows[c][p1.second * opsin_stride + p1.first];
+      float v2 = opsin_rows[c][p2.second * opsin_stride + p2.first];
+      if (std::fabs(v1 - v2) > 1e-4) {
+        return false;
+      }
+    }
+    return true;
+  };
+
+  auto is_similar = [&](std::pair<uint32_t, uint32_t> p1,
+                        std::pair<uint32_t, uint32_t> p2) {
+    return is_similar_impl(p1, p2, opsin_rows, opsin_stride, kSimilarThreshold);
+  };
+
+  constexpr int64_t kPatchSide = 4;
+  constexpr int64_t kExtraSide = 4;
+
+  // Look for kPatchSide size squares, naturally aligned, that all have the same
+  // pixel values.
+  ImageB is_screenshot_like(DivCeil(opsin.xsize(), kPatchSide),
+                            DivCeil(opsin.ysize(), kPatchSide));
+  ZeroFillImage(&is_screenshot_like);
+  uint8_t* JXL_RESTRICT screenshot_row = is_screenshot_like.Row(0);
+  const size_t screenshot_stride = is_screenshot_like.PixelsPerRow();
+  const auto process_row = [&](uint64_t y, int _) {
+    for (uint64_t x = 0; x < opsin.xsize() / kPatchSide; x++) {
+      bool all_same = true;
+      for (size_t iy = 0; iy < static_cast<size_t>(kPatchSide); iy++) {
+        for (size_t ix = 0; ix < static_cast<size_t>(kPatchSide); ix++) {
+          size_t cx = x * kPatchSide + ix;
+          size_t cy = y * kPatchSide + iy;
+          if (!is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) {
+            all_same = false;
+            break;
+          }
+        }
+      }
+      if (!all_same) continue;
+      size_t num = 0;
+      size_t num_same = 0;
+      for (int64_t iy = -kExtraSide; iy < kExtraSide + kPatchSide; iy++) {
+        for (int64_t ix = -kExtraSide; ix < kExtraSide + kPatchSide; ix++) {
+          int64_t cx = x * kPatchSide + ix;
+          int64_t cy = y * kPatchSide + iy;
+          if (cx < 0 || static_cast<uint64_t>(cx) >= opsin.xsize() ||  //
+              cy < 0 || static_cast<uint64_t>(cy) >= opsin.ysize()) {
+            continue;
+          }
+          num++;
+          if (is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) num_same++;
+        }
+      }
+      // Too few equal pixels nearby.
+      if (num_same * 8 < num * 7) continue;
+      screenshot_row[y * screenshot_stride + x] = 1;
+      has_screenshot_areas = true;
+    }
+  };
+  RunOnPool(pool, 0, opsin.ysize() / kPatchSide, ThreadPool::SkipInit(),
+            process_row, "IsScreenshotLike");
+
+  // TODO(veluca): also parallelize the rest of this function.
+  if (WantDebugOutput(aux_out)) {
+    aux_out->DumpPlaneNormalized("screenshot_like", is_screenshot_like);
+  }
+
+  constexpr int kSearchRadius = 1;
+
+  if (!ApplyOverride(state->cparams.patches, has_screenshot_areas)) {
+    return {};
+  }
+
+  // Search for "similar enough" pixels near the screenshot-like areas.
+  ImageB is_background(opsin.xsize(), opsin.ysize());
+  ZeroFillImage(&is_background);
+  Image3F background(opsin.xsize(), opsin.ysize());
+  ZeroFillImage(&background);
+  constexpr size_t kDistanceLimit = 50;
+  float* JXL_RESTRICT background_rows[3] = {
+      background.PlaneRow(0, 0),
+      background.PlaneRow(1, 0),
+      background.PlaneRow(2, 0),
+  };
+  const size_t background_stride = background.PixelsPerRow();
+  uint8_t* JXL_RESTRICT is_background_row = is_background.Row(0);
+  const size_t is_background_stride = is_background.PixelsPerRow();
+  std::vector<
+      std::pair<std::pair<uint32_t, uint32_t>, std::pair<uint32_t, uint32_t>>>
+      queue;
+  size_t queue_front = 0;
+  for (size_t y = 0; y < opsin.ysize(); y++) {
+    for (size_t x = 0; x < opsin.xsize(); x++) {
+      if (!screenshot_row[screenshot_stride * (y / kPatchSide) +
+                          (x / kPatchSide)])
+        continue;
+      queue.push_back({{x, y}, {x, y}});
+    }
+  }
+  while (queue.size() != queue_front) {
+    std::pair<uint32_t, uint32_t> cur = queue[queue_front].first;
+    std::pair<uint32_t, uint32_t> src = queue[queue_front].second;
+    queue_front++;
+    if (is_background_row[cur.second * is_background_stride + cur.first])
+      continue;
+    is_background_row[cur.second * is_background_stride + cur.first] = 1;
+    for (size_t c = 0; c < 3; c++) {
+      background_rows[c][cur.second * background_stride + cur.first] =
+          opsin_rows[c][src.second * opsin_stride + src.first];
+    }
+    for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
+      for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
+        if (dx == 0 && dy == 0) continue;
+        int next_first = cur.first + dx;
+        int next_second = cur.second + dy;
+        if (next_first < 0 || next_second < 0 ||
+            static_cast<uint32_t>(next_first) >= opsin.xsize() ||
+            static_cast<uint32_t>(next_second) >= opsin.ysize()) {
+          continue;
+        }
+        if (static_cast<uint32_t>(
+                std::abs(next_first - static_cast<int>(src.first)) +
+                std::abs(next_second - static_cast<int>(src.second))) >
+            kDistanceLimit) {
+          continue;
+        }
+        std::pair<uint32_t, uint32_t> next{next_first, next_second};
+        if (is_similar(src, next)) {
+          if (!screenshot_row[next.second / kPatchSide * screenshot_stride +
+                              next.first / kPatchSide] ||
+              is_same(src, next)) {
+            if (!is_background_row[next.second * is_background_stride +
+                                   next.first])
+              queue.emplace_back(next, src);
+          }
+        }
+      }
+    }
+  }
+  queue.clear();
+
+  ImageF ccs;
+  std::mt19937 rng;
+  std::uniform_real_distribution<float> dist(0.5, 1.0);
+  bool paint_ccs = false;
+  if (WantDebugOutput(aux_out)) {
+    aux_out->DumpPlaneNormalized("is_background", is_background);
+    if (is_xyb) {
+      aux_out->DumpXybImage("background", background);
+    } else {
+      aux_out->DumpImage("background", background);
+    }
+    ccs = ImageF(opsin.xsize(), opsin.ysize());
+    ZeroFillImage(&ccs);
+    paint_ccs = true;
+  }
+
+  constexpr float kVerySimilarThreshold = 0.03f;
+  constexpr float kHasSimilarThreshold = 0.03f;
+
+  const float* JXL_RESTRICT const_background_rows[3] = {
+      background_rows[0], background_rows[1], background_rows[2]};
+  auto is_similar_b = [&](std::pair<int, int> p1, std::pair<int, int> p2) {
+    return is_similar_impl(p1, p2, const_background_rows, background_stride,
+                           kVerySimilarThreshold);
+  };
+
+  constexpr int kMinPeak = 2;
+  constexpr int kHasSimilarRadius = 2;
+
+  std::vector<PatchInfo> info;
+
+  // Find small CC outside the "similar enough" areas, compute bounding boxes,
+  // and run heuristics to exclude some patches.
+  ImageB visited(opsin.xsize(), opsin.ysize());
+  ZeroFillImage(&visited);
+  uint8_t* JXL_RESTRICT visited_row = visited.Row(0);
+  const size_t visited_stride = visited.PixelsPerRow();
+  std::vector<std::pair<uint32_t, uint32_t>> cc;
+  std::vector<std::pair<uint32_t, uint32_t>> stack;
+  for (size_t y = 0; y < opsin.ysize(); y++) {
+    for (size_t x = 0; x < opsin.xsize(); x++) {
+      if (is_background_row[y * is_background_stride + x]) continue;
+      cc.clear();
+      stack.clear();
+      stack.emplace_back(x, y);
+      size_t min_x = x;
+      size_t max_x = x;
+      size_t min_y = y;
+      size_t max_y = y;
+      std::pair<uint32_t, uint32_t> reference;
+      bool found_border = false;
+      bool all_similar = true;
+      while (!stack.empty()) {
+        std::pair<uint32_t, uint32_t> cur = stack.back();
+        stack.pop_back();
+        if (visited_row[cur.second * visited_stride + cur.first]) continue;
+        visited_row[cur.second * visited_stride + cur.first] = 1;
+        if (cur.first < min_x) min_x = cur.first;
+        if (cur.first > max_x) max_x = cur.first;
+        if (cur.second < min_y) min_y = cur.second;
+        if (cur.second > max_y) max_y = cur.second;
+        if (paint_ccs) {
+          cc.push_back(cur);
+        }
+        for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
+          for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
+            if (dx == 0 && dy == 0) continue;
+            int next_first = static_cast<int32_t>(cur.first) + dx;
+            int next_second = static_cast<int32_t>(cur.second) + dy;
+            if (next_first < 0 || next_second < 0 ||
+                static_cast<uint32_t>(next_first) >= opsin.xsize() ||
+                static_cast<uint32_t>(next_second) >= opsin.ysize()) {
+              continue;
+            }
+            std::pair<uint32_t, uint32_t> next{next_first, next_second};
+            if (!is_background_row[next.second * is_background_stride +
+                                   next.first]) {
+              stack.push_back(next);
+            } else {
+              if (!found_border) {
+                reference = next;
+                found_border = true;
+              } else {
+                if (!is_similar_b(next, reference)) all_similar = false;
+              }
+            }
+          }
+        }
+      }
+      if (!found_border || !all_similar || max_x - min_x >= kMaxPatchSize ||
+          max_y - min_y >= kMaxPatchSize) {
+        continue;
+      }
+      size_t bpos = background_stride * reference.second + reference.first;
+      float ref[3] = {background_rows[0][bpos], background_rows[1][bpos],
+                      background_rows[2][bpos]};
+      bool has_similar = false;
+      for (size_t iy = std::max<int>(
+               static_cast<int32_t>(min_y) - kHasSimilarRadius, 0);
+           iy < std::min(max_y + kHasSimilarRadius + 1, opsin.ysize()); iy++) {
+        for (size_t ix = std::max<int>(
+                 static_cast<int32_t>(min_x) - kHasSimilarRadius, 0);
+             ix < std::min(max_x + kHasSimilarRadius + 1, opsin.xsize());
+             ix++) {
+          size_t opos = opsin_stride * iy + ix;
+          float px[3] = {opsin_rows[0][opos], opsin_rows[1][opos],
+                         opsin_rows[2][opos]};
+          if (pci.is_similar_v(ref, px, kHasSimilarThreshold)) {
+            has_similar = true;
+          }
+        }
+      }
+      if (!has_similar) continue;
+      info.emplace_back();
+      info.back().second.emplace_back(min_x, min_y);
+      QuantizedPatch& patch = info.back().first;
+      patch.xsize = max_x - min_x + 1;
+      patch.ysize = max_y - min_y + 1;
+      int max_value = 0;
+      for (size_t c : {1, 0, 2}) {
+        for (size_t iy = min_y; iy <= max_y; iy++) {
+          for (size_t ix = min_x; ix <= max_x; ix++) {
+            size_t offset = (iy - min_y) * patch.xsize + ix - min_x;
+            patch.fpixels[c][offset] =
+                opsin_rows[c][iy * opsin_stride + ix] - ref[c];
+            int val = pci.Quantize(patch.fpixels[c][offset], c);
+            patch.pixels[c][offset] = val;
+            if (std::abs(val) > max_value) max_value = std::abs(val);
+          }
+        }
+      }
+      if (max_value < kMinPeak) {
+        info.pop_back();
+        continue;
+      }
+      if (paint_ccs) {
+        float cc_color = dist(rng);
+        for (std::pair<uint32_t, uint32_t> p : cc) {
+          ccs.Row(p.second)[p.first] = cc_color;
+        }
+      }
+    }
+  }
+
+  if (paint_ccs) {
+    JXL_ASSERT(WantDebugOutput(aux_out));
+    aux_out->DumpPlaneNormalized("ccs", ccs);
+  }
+  if (info.empty()) {
+    return {};
+  }
+
+  // Remove duplicates.
+  constexpr size_t kMinPatchOccurences = 2;
+  std::sort(info.begin(), info.end());
+  size_t unique = 0;
+  for (size_t i = 1; i < info.size(); i++) {
+    if (info[i].first == info[unique].first) {
+      info[unique].second.insert(info[unique].second.end(),
+                                 info[i].second.begin(), info[i].second.end());
+    } else {
+      if (info[unique].second.size() >= kMinPatchOccurences) {
+        unique++;
+      }
+      info[unique] = info[i];
+    }
+  }
+  if (info[unique].second.size() >= kMinPatchOccurences) {
+    unique++;
+  }
+  info.resize(unique);
+
+  size_t max_patch_size = 0;
+
+  for (size_t i = 0; i < info.size(); i++) {
+    size_t pixels = info[i].first.xsize * info[i].first.ysize;
+    if (pixels > max_patch_size) max_patch_size = pixels;
+  }
+
+  // don't use patches if all patches are smaller than this
+  constexpr size_t kMinMaxPatchSize = 20;
+  if (max_patch_size < kMinMaxPatchSize) return {};
+
+  // Ensure that the specified set of patches doesn't produce out-of-bounds
+  // pixels.
+  // TODO(veluca): figure out why this is still necessary even with RCTs that
+  // don't depend on bit depth.
+  if (state->cparams.modular_mode && state->cparams.quality_pair.first >= 100) {
+    constexpr size_t kMaxPatchArea = kMaxPatchSize * kMaxPatchSize;
+    std::vector<float> min_then_max_px(2 * kMaxPatchArea);
+    for (size_t i = 0; i < info.size(); i++) {
+      for (size_t c = 0; c < 3; c++) {
+        float* JXL_RESTRICT min_px = min_then_max_px.data();
+        float* JXL_RESTRICT max_px = min_px + kMaxPatchArea;
+        std::fill(min_px, min_px + kMaxPatchArea, 1);
+        std::fill(max_px, max_px + kMaxPatchArea, 0);
+        size_t xsize = info[i].first.xsize;
+        for (size_t j = 0; j < info[i].second.size(); j++) {
+          size_t bx = info[i].second[j].first;
+          size_t by = info[i].second[j].second;
+          for (size_t iy = 0; iy < info[i].first.ysize; iy++) {
+            for (size_t ix = 0; ix < xsize; ix++) {
+              float v = opsin_rows[c][(by + iy) * opsin_stride + bx + ix];
+              if (v < min_px[iy * xsize + ix]) min_px[iy * xsize + ix] = v;
+              if (v > max_px[iy * xsize + ix]) max_px[iy * xsize + ix] = v;
+            }
+          }
+        }
+        for (size_t iy = 0; iy < info[i].first.ysize; iy++) {
+          for (size_t ix = 0; ix < xsize; ix++) {
+            float smallest = min_px[iy * xsize + ix];
+            float biggest = max_px[iy * xsize + ix];
+            JXL_ASSERT(smallest <= biggest);
+            float& out = info[i].first.fpixels[c][iy * xsize + ix];
+            // Clamp fpixels so that subtracting the patch never creates a
+            // negative value, or a value above 1.
+            JXL_ASSERT(biggest - 1 <= smallest);
+            out = std::max(smallest, out);
+            out = std::min(biggest - 1.f, out);
+          }
+        }
+      }
+    }
+  }
+  return info;
+}
+
+}  // namespace
+
+void FindBestPatchDictionary(const Image3F& opsin,
+                             PassesEncoderState* JXL_RESTRICT state,
+                             ThreadPool* pool, AuxOut* aux_out, bool is_xyb) {
+  state->shared.image_features.patches = PatchDictionary();
+  state->shared.image_features.patches.SetPassesSharedState(&state->shared);
+
+  std::vector<PatchInfo> info =
+      FindTextLikePatches(opsin, state, pool, aux_out, is_xyb);
+
+  // TODO(veluca): this doesn't work if both dots and patches are enabled.
+  // For now, since dots and patches are not likely to occur in the same kind of
+  // images, disable dots if some patches were found.
+  if (info.empty() &&
+      ApplyOverride(
+          state->cparams.dots,
+          state->cparams.speed_tier <= SpeedTier::kSquirrel &&
+              state->cparams.butteraugli_distance >= kMinButteraugliForDots)) {
+    info = FindDotDictionary(state->cparams, opsin, state->shared.cmap, pool);
+  }
+
+  if (info.empty()) return;
+
+  std::sort(
+      info.begin(), info.end(), [&](const PatchInfo& a, const PatchInfo& b) {
+        return a.first.xsize * a.first.ysize > b.first.xsize * b.first.ysize;
+      });
+
+  size_t max_x_size = 0;
+  size_t max_y_size = 0;
+  size_t total_pixels = 0;
+
+  for (size_t i = 0; i < info.size(); i++) {
+    size_t pixels = info[i].first.xsize * info[i].first.ysize;
+    if (max_x_size < info[i].first.xsize) max_x_size = info[i].first.xsize;
+    if (max_y_size < info[i].first.ysize) max_y_size = info[i].first.ysize;
+    total_pixels += pixels;
+  }
+
+  // Bin-packing & conversion of patches.
+  constexpr float kBinPackingSlackness = 1.05f;
+  size_t ref_xsize = std::max<float>(max_x_size, std::sqrt(total_pixels));
+  size_t ref_ysize = std::max<float>(max_y_size, std::sqrt(total_pixels));
+  std::vector<std::pair<size_t, size_t>> ref_positions(info.size());
+  // TODO(veluca): allow partial overlaps of patches that have the same pixels.
+  size_t max_y = 0;
+  do {
+    max_y = 0;
+    // Increase packed image size.
+    ref_xsize = ref_xsize * kBinPackingSlackness + 1;
+    ref_ysize = ref_ysize * kBinPackingSlackness + 1;
+
+    ImageB occupied(ref_xsize, ref_ysize);
+    ZeroFillImage(&occupied);
+    uint8_t* JXL_RESTRICT occupied_rows = occupied.Row(0);
+    size_t occupied_stride = occupied.PixelsPerRow();
+
+    bool success = true;
+    // For every patch...
+    for (size_t patch = 0; patch < info.size(); patch++) {
+      size_t x0 = 0;
+      size_t y0 = 0;
+      size_t xsize = info[patch].first.xsize;
+      size_t ysize = info[patch].first.ysize;
+      bool found = false;
+      // For every possible start position ...
+      for (; y0 + ysize <= ref_ysize; y0++) {
+        x0 = 0;
+        for (; x0 + xsize <= ref_xsize; x0++) {
+          bool has_occupied_pixel = false;
+          size_t x = x0;
+          // Check if it is possible to place the patch in this position in the
+          // reference frame.
+          for (size_t y = y0; y < y0 + ysize; y++) {
+            x = x0;
+            for (; x < x0 + xsize; x++) {
+              if (occupied_rows[y * occupied_stride + x]) {
+                has_occupied_pixel = true;
+                break;
+              }
+            }
+          }  // end of positioning check
+          if (!has_occupied_pixel) {
+            found = true;
+            break;
+          }
+          x0 = x;  // Jump to next pixel after the occupied one.
+        }
+        if (found) break;
+      }  // end of start position checking
+
+      // We didn't find a possible position: repeat from the beginning with a
+      // larger reference frame size.
+      if (!found) {
+        success = false;
+        break;
+      }
+
+      // We found a position: mark the corresponding positions in the reference
+      // image as used.
+      ref_positions[patch] = {x0, y0};
+      for (size_t y = y0; y < y0 + ysize; y++) {
+        for (size_t x = x0; x < x0 + xsize; x++) {
+          occupied_rows[y * occupied_stride + x] = true;
+        }
+      }
+      max_y = std::max(max_y, y0 + ysize);
+    }
+
+    if (success) break;
+  } while (true);
+
+  JXL_ASSERT(ref_ysize >= max_y);
+
+  ref_ysize = max_y;
+
+  Image3F reference_frame(ref_xsize, ref_ysize);
+  // TODO(veluca): figure out a better way to fill the image.
+  ZeroFillImage(&reference_frame);
+  std::vector<PatchPosition> positions;
+  float* JXL_RESTRICT ref_rows[3] = {
+      reference_frame.PlaneRow(0, 0),
+      reference_frame.PlaneRow(1, 0),
+      reference_frame.PlaneRow(2, 0),
+  };
+  size_t ref_stride = reference_frame.PixelsPerRow();
+
+  for (size_t i = 0; i < info.size(); i++) {
+    PatchReferencePosition ref_pos;
+    ref_pos.xsize = info[i].first.xsize;
+    ref_pos.ysize = info[i].first.ysize;
+    ref_pos.x0 = ref_positions[i].first;
+    ref_pos.y0 = ref_positions[i].second;
+    ref_pos.ref = 0;
+    for (size_t y = 0; y < ref_pos.ysize; y++) {
+      for (size_t x = 0; x < ref_pos.xsize; x++) {
+        for (size_t c = 0; c < 3; c++) {
+          ref_rows[c][(y + ref_pos.y0) * ref_stride + x + ref_pos.x0] =
+              info[i].first.fpixels[c][y * ref_pos.xsize + x];
+        }
+      }
+    }
+    // Add color channels, ignore other channels.
+    std::vector<PatchBlending> blending_info(
+        state->shared.metadata->m.extra_channel_info.size() + 1,
+        PatchBlending{PatchBlendMode::kNone, 0, false});
+    blending_info[0].mode = PatchBlendMode::kAdd;
+    for (const auto& pos : info[i].second) {
+      positions.emplace_back(
+          PatchPosition{pos.first, pos.second, blending_info, ref_pos});
+    }
+  }
+
+  CompressParams cparams = state->cparams;
+  cparams.resampling = 1;
+  cparams.ec_resampling = 1;
+  // Recursive application of patches could create very weird issues.
+  cparams.patches = Override::kOff;
+  cparams.dots = Override::kOff;
+  cparams.noise = Override::kOff;
+  cparams.modular_mode = true;
+  cparams.responsive = 0;
+  cparams.progressive_dc = 0;
+  cparams.progressive_mode = false;
+  cparams.qprogressive_mode = false;
+  // Use gradient predictor and not Predictor::Best.
+  cparams.options.predictor = Predictor::Gradient;
+  // TODO(veluca): possibly change heuristics here.
+  if (!cparams.modular_mode) {
+    cparams.quality_pair.first = cparams.quality_pair.second =
+        80 - cparams.butteraugli_distance * 12;
+  } else {
+    cparams.quality_pair.first = (100 + 3 * cparams.quality_pair.first) * 0.25f;
+    cparams.quality_pair.second =
+        (100 + 3 * cparams.quality_pair.second) * 0.25f;
+  }
+  FrameInfo patch_frame_info;
+  patch_frame_info.save_as_reference = 0;  // always saved.
+  patch_frame_info.frame_type = FrameType::kReferenceOnly;
+  patch_frame_info.save_before_color_transform = true;
+
+  ImageBundle ib(&state->shared.metadata->m);
+  // TODO(veluca): metadata.color_encoding is a lie: ib is in XYB, but there is
+  // no simple way to express that yet.
+  patch_frame_info.ib_needs_color_transform = false;
+  patch_frame_info.save_as_reference = 0;
+  ib.SetFromImage(std::move(reference_frame),
+                  state->shared.metadata->m.color_encoding);
+  if (!ib.metadata()->extra_channel_info.empty()) {
+    // Add dummy extra channels to the patch image: patches do not yet support
+    // extra channels, but the codec expects that the amount of extra channels
+    // in frames matches that in the metadata of the codestream.
+    std::vector<ImageF> extra_channels;
+    extra_channels.reserve(ib.metadata()->extra_channel_info.size());
+    for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) {
+      extra_channels.emplace_back(ib.xsize(), ib.ysize());
+      // Must initialize the image with data to not affect blending with
+      // uninitialized memory.
+      // TODO(lode): patches must copy and use the real extra channels instead.
+      FillImage(1.0f, &extra_channels.back());
+    }
+    ib.SetExtraChannels(std::move(extra_channels));
+  }
+
+  PassesEncoderState roundtrip_state;
+  auto special_frame = std::unique_ptr<BitWriter>(new BitWriter());
+  JXL_CHECK(EncodeFrame(cparams, patch_frame_info, state->shared.metadata, ib,
+                        &roundtrip_state, pool, special_frame.get(), nullptr));
+  const Span<const uint8_t> encoded = special_frame->GetSpan();
+  state->special_frames.emplace_back(std::move(special_frame));
+  if (cparams.butteraugli_distance < kMinButteraugliToSubtractOriginalPatches) {
+    BitReader br(encoded);
+    ImageBundle decoded(&state->shared.metadata->m);
+    PassesDecoderState dec_state;
+    JXL_CHECK(dec_state.output_encoding_info.Set(
+        *state->shared.metadata,
+        ColorEncoding::LinearSRGB(
+            state->shared.metadata->m.color_encoding.IsGray())));
+    JXL_CHECK(DecodeFrame({}, &dec_state, pool, &br, &decoded,
+                          *state->shared.metadata, /*constraints=*/nullptr));
+    JXL_CHECK(br.Close());
+    state->shared.reference_frames[0] =
+        std::move(dec_state.shared_storage.reference_frames[0]);
+  } else {
+    state->shared.reference_frames[0].storage = std::move(ib);
+  }
+  state->shared.reference_frames[0].frame =
+      &state->shared.reference_frames[0].storage;
+  // TODO(veluca): this assumes that applying patches is commutative, which is
+  // not true for all blending modes. This code only produces kAdd patches, so
+  // this works out.
+  std::sort(positions.begin(), positions.end());
+  PatchDictionaryEncoder::SetPositions(&state->shared.image_features.patches,
+                                       std::move(positions));
+}
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.h
new file mode 100644
index 0000000000..f26016f8de
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_patch_dictionary.h
@@ -0,0 +1,56 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_PATCH_DICTIONARY_H_
+#define LIB_JXL_ENC_PATCH_DICTIONARY_H_
+
+// Chooses reference patches, and avoids encoding them once per occurrence.
+
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <tuple>
+#include <vector>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+// Friend class of PatchDictionary.
+class PatchDictionaryEncoder {
+ public:
+  // Only call if HasAny().
+  static void Encode(const PatchDictionary& pdic, BitWriter* writer,
+                     size_t layer, AuxOut* aux_out);
+
+  static void SetPositions(PatchDictionary* pdic,
+                           std::vector<PatchPosition> positions) {
+    pdic->positions_ = std::move(positions);
+    pdic->ComputePatchCache();
+  }
+
+  static void SubtractFrom(const PatchDictionary& pdic, Image3F* opsin);
+};
+
+void FindBestPatchDictionary(const Image3F& opsin,
+                             PassesEncoderState* JXL_RESTRICT state,
+                             ThreadPool* pool, AuxOut* aux_out,
+                             bool is_xyb = true);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_PATCH_DICTIONARY_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc
new file mode 100644
index 0000000000..3786ef5cf5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.cc
@@ -0,0 +1,89 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_photon_noise.h"
+
+namespace jxl {
+
+namespace {
+
+// Assumes a daylight-like spectrum.
+// https://www.strollswithmydog.com/effective-quantum-efficiency-of-sensor/#:~:text=11%2C260%20photons/um%5E2/lx-s
+constexpr float kPhotonsPerLxSPerUm2 = 11260;
+
+// Order of magnitude for cameras in the 2010-2020 decade, taking the CFA into
+// account.
+constexpr float kEffectiveQuantumEfficiency = 0.20;
+
+// TODO(sboukortt): reevaluate whether these are good defaults, notably whether
+// it would be worth making read noise higher at lower ISO settings.
+constexpr float kPhotoResponseNonUniformity = 0.005;
+constexpr float kInputReferredReadNoise = 3;
+
+// Assumes a 35mm sensor.
+constexpr float kSensorAreaUm2 = 36000.f * 24000;
+
+template <typename T>
+inline constexpr T Square(const T x) {
+  return x * x;
+}
+template <typename T>
+inline constexpr T Cube(const T x) {
+  return x * x * x;
+}
+
+}  // namespace
+
+NoiseParams SimulatePhotonNoise(const size_t xsize, const size_t ysize,
+                                const float iso) {
+  const float kOpsinAbsorbanceBiasCbrt = std::cbrt(kOpsinAbsorbanceBias[1]);
+
+  // Focal plane exposure for 18% of kDefaultIntensityTarget, in lx·s.
+  // (ISO = 10 lx·s ÷ H)
+  const float h_18 = 10 / iso;
+
+  const float pixel_area_um2 = kSensorAreaUm2 / (xsize * ysize);
+
+  const float electrons_per_pixel_18 = kEffectiveQuantumEfficiency *
+                                       kPhotonsPerLxSPerUm2 * h_18 *
+                                       pixel_area_um2;
+
+  NoiseParams params;
+
+  for (size_t i = 0; i < NoiseParams::kNumNoisePoints; ++i) {
+    const float scaled_index = i / (NoiseParams::kNumNoisePoints - 2.f);
+    // scaled_index is used for XYB = (0, 2·scaled_index, 2·scaled_index)
+    const float y = 2 * scaled_index;
+    // 1 = default intensity target
+    const float linear = std::max(
+        0.f, Cube(y - kOpsinAbsorbanceBiasCbrt) + kOpsinAbsorbanceBias[1]);
+    const float electrons_per_pixel = electrons_per_pixel_18 * (linear / 0.18f);
+    // Quadrature sum of read noise, photon shot noise (sqrt(S) so simply not
+    // squared here) and photo response non-uniformity.
+    // https://doi.org/10.1117/3.725073
+    // Units are electrons rms.
+    const float noise =
+        std::sqrt(Square(kInputReferredReadNoise) + electrons_per_pixel +
+                  Square(kPhotoResponseNonUniformity * electrons_per_pixel));
+    const float linear_noise = noise * (0.18f / electrons_per_pixel_18);
+    const float opsin_derivative =
+        (1.f / 3) / Square(std::cbrt(linear - kOpsinAbsorbanceBias[1]));
+    const float opsin_noise = linear_noise * opsin_derivative;
+
+    // TODO(sboukortt): verify more thoroughly whether the denominator is
+    // correct.
+    params.lut[i] =
+        Clamp1(opsin_noise /
+                   (0.22f             // norm_const
+                    * std::sqrt(2.f)  // red_noise + green_noise
+                    * 1.13f  // standard deviation of a plane of generated noise
+                    ),
+               0.f, 1.f);
+  }
+
+  return params;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.h
new file mode 100644
index 0000000000..f43e14d560
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_PHOTON_NOISE_H_
+#define LIB_JXL_ENC_PHOTON_NOISE_H_
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/noise.h"
+
+namespace jxl {
+
+// Constructs a NoiseParams representing the noise that would be seen at the
+// selected nominal exposure on a last-decade (as of 2021) color camera with a
+// 36×24mm sensor (“35mm format”).
+NoiseParams SimulatePhotonNoise(size_t xsize, size_t ysize, float iso);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_PHOTON_NOISE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise_test.cc
new file mode 100644
index 0000000000..3790fdee99
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_photon_noise_test.cc
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_photon_noise.h"
+
+#include "gmock/gmock.h"
+
+namespace jxl {
+namespace {
+
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
+MATCHER(AreApproximatelyEqual, "") {
+  constexpr float kTolerance = 1e-6;
+  const float actual = std::get<0>(arg);
+  const float expected = std::get<1>(arg);
+  return testing::ExplainMatchResult(FloatNear(expected, kTolerance), actual,
+                                     result_listener);
+}
+
+TEST(EncPhotonNoiseTest, LUTs) {
+  EXPECT_THAT(
+      SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/100).lut,
+      Pointwise(AreApproximatelyEqual(),
+                {0.00259652, 0.0139648, 0.00681551, 0.00632582, 0.00694917,
+                 0.00803922, 0.00934574, 0.0107607}));
+  EXPECT_THAT(
+      SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/800).lut,
+      Pointwise(AreApproximatelyEqual(),
+                {0.02077220, 0.0420923, 0.01820690, 0.01439020, 0.01293670,
+                 0.01254030, 0.01277390, 0.0134161}));
+  EXPECT_THAT(
+      SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/6400).lut,
+      Pointwise(AreApproximatelyEqual(),
+                {0.1661770, 0.1691120, 0.05309080, 0.03963960, 0.03357410,
+                 0.03001650, 0.02776740, 0.0263478}));
+
+  // Lower when measured on a per-pixel basis as there are fewer of them.
+  EXPECT_THAT(
+      SimulatePhotonNoise(/*xsize=*/4000, /*ysize=*/3000, /*iso=*/6400).lut,
+      Pointwise(AreApproximatelyEqual(),
+                {0.0830886, 0.1008720, 0.0367748, 0.0280305, 0.0240236,
+                 0.0218040, 0.0205771, 0.0200058}));
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc
new file mode 100644
index 0000000000..33d0e47bae
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.cc
@@ -0,0 +1,203 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_quant_weights.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+namespace {
+
+Status EncodeDctParams(const DctQuantWeightParams& params, BitWriter* writer) {
+  JXL_ASSERT(params.num_distance_bands >= 1);
+  writer->Write(DctQuantWeightParams::kLog2MaxDistanceBands,
+                params.num_distance_bands - 1);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t i = 0; i < params.num_distance_bands; i++) {
+      JXL_RETURN_IF_ERROR(F16Coder::Write(
+          params.distance_bands[c][i] * (i == 0 ? (1 / 64.0f) : 1.0f), writer));
+    }
+  }
+  return true;
+}
+
+Status EncodeQuant(const QuantEncoding& encoding, size_t idx, size_t size_x,
+                   size_t size_y, BitWriter* writer,
+                   ModularFrameEncoder* modular_frame_encoder) {
+  writer->Write(kLog2NumQuantModes, encoding.mode);
+  size_x *= kBlockDim;
+  size_y *= kBlockDim;
+  switch (encoding.mode) {
+    case QuantEncoding::kQuantModeLibrary: {
+      writer->Write(kCeilLog2NumPredefinedTables, encoding.predefined);
+      break;
+    }
+    case QuantEncoding::kQuantModeID: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 3; i++) {
+          JXL_RETURN_IF_ERROR(
+              F16Coder::Write(encoding.idweights[c][i] * (1.0f / 64), writer));
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT2: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 6; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Write(
+              encoding.dct2weights[c][i] * (1.0f / 64), writer));
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4X8: {
+      for (size_t c = 0; c < 3; c++) {
+        JXL_RETURN_IF_ERROR(
+            F16Coder::Write(encoding.dct4x8multipliers[c], writer));
+      }
+      JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 2; i++) {
+          JXL_RETURN_IF_ERROR(
+              F16Coder::Write(encoding.dct4multipliers[c][i], writer));
+        }
+      }
+      JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT: {
+      JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+      break;
+    }
+    case QuantEncoding::kQuantModeRAW: {
+      ModularFrameEncoder::EncodeQuantTable(size_x, size_y, writer, encoding,
+                                            idx, modular_frame_encoder);
+      break;
+    }
+    case QuantEncoding::kQuantModeAFV: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 9; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Write(
+              encoding.afv_weights[c][i] * (i < 6 ? 1.0f / 64 : 1.0f), writer));
+        }
+        JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+        JXL_RETURN_IF_ERROR(
+            EncodeDctParams(encoding.dct_params_afv_4x4, writer));
+      }
+      break;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DequantMatricesEncode(const DequantMatrices* matrices, BitWriter* writer,
+                             size_t layer, AuxOut* aux_out,
+                             ModularFrameEncoder* modular_frame_encoder) {
+  bool all_default = true;
+  const std::vector<QuantEncoding>& encodings = matrices->encodings();
+
+  for (size_t i = 0; i < encodings.size(); i++) {
+    if (encodings[i].mode != QuantEncoding::kQuantModeLibrary ||
+        encodings[i].predefined != 0) {
+      all_default = false;
+    }
+  }
+  // TODO(janwas): better bound
+  BitWriter::Allotment allotment(writer, 512 * 1024);
+  writer->Write(1, all_default);
+  if (!all_default) {
+    for (size_t i = 0; i < encodings.size(); i++) {
+      JXL_RETURN_IF_ERROR(EncodeQuant(
+          encodings[i], i, DequantMatrices::required_size_x[i],
+          DequantMatrices::required_size_y[i], writer, modular_frame_encoder));
+    }
+  }
+  ReclaimAndCharge(writer, &allotment, layer, aux_out);
+  return true;
+}
+
+Status DequantMatricesEncodeDC(const DequantMatrices* matrices,
+                               BitWriter* writer, size_t layer,
+                               AuxOut* aux_out) {
+  bool all_default = true;
+  const float* dc_quant = matrices->DCQuants();
+  for (size_t c = 0; c < 3; c++) {
+    if (dc_quant[c] != kDCQuant[c]) {
+      all_default = false;
+    }
+  }
+  BitWriter::Allotment allotment(writer, 1 + sizeof(float) * kBitsPerByte * 3);
+  writer->Write(1, all_default);
+  if (!all_default) {
+    for (size_t c = 0; c < 3; c++) {
+      JXL_RETURN_IF_ERROR(F16Coder::Write(dc_quant[c] * 128.0f, writer));
+    }
+  }
+  ReclaimAndCharge(writer, &allotment, layer, aux_out);
+  return true;
+}
+
+void DequantMatricesSetCustomDC(DequantMatrices* matrices, const float* dc) {
+  matrices->SetDCQuant(dc);
+  // Roundtrip encode/decode DC to ensure same values as decoder.
+  BitWriter writer;
+  JXL_CHECK(DequantMatricesEncodeDC(matrices, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  BitReader br(writer.GetSpan());
+  // Called only in the encoder: should fail only for programmer errors.
+  JXL_CHECK(matrices->DecodeDC(&br));
+  JXL_CHECK(br.Close());
+}
+
+void DequantMatricesSetCustom(DequantMatrices* matrices,
+                              const std::vector<QuantEncoding>& encodings,
+                              ModularFrameEncoder* encoder) {
+  JXL_ASSERT(encodings.size() == DequantMatrices::kNum);
+  matrices->SetEncodings(encodings);
+  for (size_t i = 0; i < encodings.size(); i++) {
+    if (encodings[i].mode == QuantEncodingInternal::kQuantModeRAW) {
+      encoder->AddQuantTable(DequantMatrices::required_size_x[i] * kBlockDim,
+                             DequantMatrices::required_size_y[i] * kBlockDim,
+                             encodings[i], i);
+    }
+  }
+  // Roundtrip encode/decode the matrices to ensure same values as decoder.
+  // Do not pass modular en/decoder, as they only change entropy and not
+  // values.
+  BitWriter writer;
+  JXL_CHECK(DequantMatricesEncode(matrices, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  BitReader br(writer.GetSpan());
+  // Called only in the encoder: should fail only for programmer errors.
+  JXL_CHECK(matrices->Decode(&br));
+  JXL_CHECK(br.Close());
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.h
new file mode 100644
index 0000000000..89033d8cbb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_quant_weights.h
@@ -0,0 +1,29 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_QUANT_WEIGHTS_H_
+#define LIB_JXL_ENC_QUANT_WEIGHTS_H_
+
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+Status DequantMatricesEncode(
+    const DequantMatrices* matrices, BitWriter* writer, size_t layer,
+    AuxOut* aux_out, ModularFrameEncoder* modular_frame_encoder = nullptr);
+Status DequantMatricesEncodeDC(const DequantMatrices* matrices,
+                               BitWriter* writer, size_t layer,
+                               AuxOut* aux_out);
+// For consistency with QuantEncoding, higher values correspond to more
+// precision.
+void DequantMatricesSetCustomDC(DequantMatrices* matrices, const float* dc);
+
+void DequantMatricesSetCustom(DequantMatrices* matrices,
+                              const std::vector<QuantEncoding>& encodings,
+                              ModularFrameEncoder* encoder);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_QUANT_WEIGHTS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc
new file mode 100644
index 0000000000..cdb797dc6a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.cc
@@ -0,0 +1,96 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <algorithm>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+class QuantizedSplineEncoder {
+ public:
+  // Only call if HasAny().
+  static void Tokenize(const QuantizedSpline& spline,
+                       std::vector<Token>* const tokens) {
+    tokens->emplace_back(kNumControlPointsContext,
+                         spline.control_points_.size());
+    for (const auto& point : spline.control_points_) {
+      tokens->emplace_back(kControlPointsContext, PackSigned(point.first));
+      tokens->emplace_back(kControlPointsContext, PackSigned(point.second));
+    }
+    const auto encode_dct = [tokens](const int dct[32]) {
+      for (int i = 0; i < 32; ++i) {
+        tokens->emplace_back(kDCTContext, PackSigned(dct[i]));
+      }
+    };
+    for (int c = 0; c < 3; ++c) {
+      encode_dct(spline.color_dct_[c]);
+    }
+    encode_dct(spline.sigma_dct_);
+  }
+};
+
+namespace {
+
+void EncodeAllStartingPoints(const std::vector<Spline::Point>& points,
+                             std::vector<Token>* tokens) {
+  int64_t last_x = 0;
+  int64_t last_y = 0;
+  for (size_t i = 0; i < points.size(); i++) {
+    const int64_t x = lroundf(points[i].x);
+    const int64_t y = lroundf(points[i].y);
+    if (i == 0) {
+      tokens->emplace_back(kStartingPositionContext, x);
+      tokens->emplace_back(kStartingPositionContext, y);
+    } else {
+      tokens->emplace_back(kStartingPositionContext, PackSigned(x - last_x));
+      tokens->emplace_back(kStartingPositionContext, PackSigned(y - last_y));
+    }
+    last_x = x;
+    last_y = y;
+  }
+}
+
+}  // namespace
+
+void EncodeSplines(const Splines& splines, BitWriter* writer,
+                   const size_t layer, const HistogramParams& histogram_params,
+                   AuxOut* aux_out) {
+  JXL_ASSERT(splines.HasAny());
+
+  const std::vector<QuantizedSpline>& quantized_splines =
+      splines.QuantizedSplines();
+  std::vector<std::vector<Token>> tokens(1);
+  tokens[0].emplace_back(kNumSplinesContext, quantized_splines.size() - 1);
+  EncodeAllStartingPoints(splines.StartingPoints(), &tokens[0]);
+
+  tokens[0].emplace_back(kQuantizationAdjustmentContext,
+                         PackSigned(splines.GetQuantizationAdjustment()));
+
+  for (const QuantizedSpline& spline : quantized_splines) {
+    QuantizedSplineEncoder::Tokenize(spline, &tokens[0]);
+  }
+
+  EntropyEncodingData codes;
+  std::vector<uint8_t> context_map;
+  BuildAndEncodeHistograms(histogram_params, kNumSplineContexts, tokens, &codes,
+                           &context_map, writer, layer, aux_out);
+  WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+Splines FindSplines(const Image3F& opsin) {
+  // TODO: implement spline detection.
+  return {};
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.h
new file mode 100644
index 0000000000..732d77ac2c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_splines.h
@@ -0,0 +1,39 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_SPLINES_H_
+#define LIB_JXL_ENC_SPLINES_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+// Only call if splines.HasAny().
+void EncodeSplines(const Splines& splines, BitWriter* writer,
+                   const size_t layer, const HistogramParams& histogram_params,
+                   AuxOut* aux_out);
+
+Splines FindSplines(const Image3F& opsin);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_SPLINES_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc
new file mode 100644
index 0000000000..c877b0c837
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.cc
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_toc.h"
+
+#include <stdint.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+Status WriteGroupOffsets(const std::vector<BitWriter>& group_codes,
+                         const std::vector<coeff_order_t>* permutation,
+                         BitWriter* JXL_RESTRICT writer, AuxOut* aux_out) {
+  BitWriter::Allotment allotment(writer, MaxBits(group_codes.size()));
+  if (permutation && !group_codes.empty()) {
+    // Don't write a permutation at all for an empty group_codes.
+    writer->Write(1, 1);  // permutation
+    JXL_DASSERT(permutation->size() == group_codes.size());
+    EncodePermutation(permutation->data(), /*skip=*/0, permutation->size(),
+                      writer, /* layer= */ 0, aux_out);
+
+  } else {
+    writer->Write(1, 0);  // no permutation
+  }
+  writer->ZeroPadToByte();  // before TOC entries
+
+  for (size_t i = 0; i < group_codes.size(); i++) {
+    JXL_ASSERT(group_codes[i].BitsWritten() % kBitsPerByte == 0);
+    const size_t group_size = group_codes[i].BitsWritten() / kBitsPerByte;
+    JXL_RETURN_IF_ERROR(U32Coder::Write(kTocDist, group_size, writer));
+  }
+  writer->ZeroPadToByte();  // before first group
+  ReclaimAndCharge(writer, &allotment, kLayerTOC, aux_out);
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.h
new file mode 100644
index 0000000000..dc81a5d12e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_toc.h
@@ -0,0 +1,29 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_TOC_H_
+#define LIB_JXL_ENC_TOC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+// Writes the group offsets. If the permutation vector is nullptr, the identity
+// permutation will be used.
+Status WriteGroupOffsets(const std::vector<BitWriter>& group_codes,
+                         const std::vector<coeff_order_t>* permutation,
+                         BitWriter* JXL_RESTRICT writer, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_TOC_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_transforms-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_transforms-inl.h
new file mode 100644
index 0000000000..c2f8e61105
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_transforms-inl.h
@@ -0,0 +1,844 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
+#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
+#else
+#define LIB_JXL_ENC_TRANSFORMS_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/dct_scales.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+template <size_t ROWS, size_t COLS>
+struct DoIDCT {
+  template <typename To>
+  void operator()(float* JXL_RESTRICT from, const To& to,
+                  float* JXL_RESTRICT scratch_space) {
+    ComputeScaledIDCT<ROWS, COLS>()(from, to, scratch_space);
+  }
+};
+
+template <size_t N>
+struct DoIDCT<N, N> {
+  template <typename To>
+  void operator()(float* JXL_RESTRICT from, const To& to,
+                  float* JXL_RESTRICT scratch_space) const {
+    ComputeTransposedScaledIDCT<N>()(from, to, scratch_space);
+  }
+};
+
+// Inverse of ReinterpretingDCT.
+template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
+          size_t ROWS, size_t COLS>
+HWY_INLINE void ReinterpretingIDCT(const float* input,
+                                   const size_t input_stride, float* output,
+                                   const size_t output_stride) {
+  HWY_ALIGN float block[ROWS * COLS] = {};
+  if (ROWS < COLS) {
+    for (size_t y = 0; y < LF_ROWS; y++) {
+      for (size_t x = 0; x < LF_COLS; x++) {
+        block[y * COLS + x] = input[y * input_stride + x] *
+                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
+                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
+      }
+    }
+  } else {
+    for (size_t y = 0; y < LF_COLS; y++) {
+      for (size_t x = 0; x < LF_ROWS; x++) {
+        block[y * ROWS + x] = input[y * input_stride + x] *
+                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
+                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
+      }
+    }
+  }
+
+  // ROWS, COLS <= 8, so we can put scratch space on the stack.
+  HWY_ALIGN float scratch_space[ROWS * COLS];
+  DoIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride), scratch_space);
+}
+
+template <size_t S>
+void DCT2TopBlock(const float* block, size_t stride, float* out) {
+  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
+  static_assert(S % 2 == 0, "S should be even");
+  float temp[kDCTBlockSize];
+  constexpr size_t num_2x2 = S / 2;
+  for (size_t y = 0; y < num_2x2; y++) {
+    for (size_t x = 0; x < num_2x2; x++) {
+      float c00 = block[y * 2 * stride + x * 2];
+      float c01 = block[y * 2 * stride + x * 2 + 1];
+      float c10 = block[(y * 2 + 1) * stride + x * 2];
+      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
+      float r00 = c00 + c01 + c10 + c11;
+      float r01 = c00 + c01 - c10 - c11;
+      float r10 = c00 - c01 + c10 - c11;
+      float r11 = c00 - c01 - c10 + c11;
+      r00 *= 0.25f;
+      r01 *= 0.25f;
+      r10 *= 0.25f;
+      r11 *= 0.25f;
+      temp[y * kBlockDim + x] = r00;
+      temp[y * kBlockDim + num_2x2 + x] = r01;
+      temp[(y + num_2x2) * kBlockDim + x] = r10;
+      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
+    }
+  }
+  for (size_t y = 0; y < S; y++) {
+    for (size_t x = 0; x < S; x++) {
+      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
+    }
+  }
+}
+
+void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
+  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
+      {
+          0.2500000000000000,
+          0.8769029297991420f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.4105377591765233f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+      },
+      {
+          0.2500000000000000,
+          0.2206518106944235f,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.7071067811865474f,
+          0.6235485373547691f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          0.4067007583026075f,
+          -0.2125574805828875f,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          -0.4517556589999482f,
+          -0.3046847507248690f,
+          0.3017929516615495f,
+          0.4082482904638627f,
+          0.1747866975480809f,
+          -0.2110560104933578f,
+          -0.1426608480880726f,
+          -0.1381354035075859f,
+          -0.1743760259965107f,
+          0.1135498731499434f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375375f,
+          0.4444481661973445f,
+          0.3085497062849767f,
+          0.0000000000000000f,
+          -0.0643507165794627f,
+          0.1585450355184006f,
+          0.5112616136591823f,
+          0.2579236279634118f,
+          0.0000000000000000,
+          0.0812611176717539f,
+          0.1856718091610980f,
+          -0.3416446842253372f,
+          0.3302282550303788f,
+          0.0702790691196284f,
+          -0.0741750459581035f,
+      },
+      {
+          0.2500000000000000,
+          0.2206518106944236f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.7071067811865476f,
+          0.6235485373547694f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375378f,
+          0.0000000000000000,
+          0.4706702258572536f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          -0.0403851516082220f,
+          0.0000000000000000,
+          0.1627234014286620f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.7367497537172237f,
+          0.0875511500058708f,
+          -0.2921026642334881f,
+          0.1940289303259434f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375377f,
+          0.1957439937204294f,
+          -0.1621205195722993f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          0.0074182263792424f,
+          -0.2904801297289980f,
+          0.0952002265347504f,
+          0.0000000000000000,
+          -0.3675398009862027f,
+          0.4921585901373873f,
+          0.2462710772207515f,
+          -0.0794670660590957f,
+          0.3623817333531167f,
+          -0.4351904965232280f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          0.2929100136981264f,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          0.3935103426921017f,
+          -0.0657870154914280f,
+          0.0000000000000000,
+          -0.4082482904638628f,
+          -0.3078822139579090f,
+          -0.3852501370925192f,
+          -0.0857401903551931f,
+          -0.4613374887461511f,
+          0.0000000000000000,
+          0.2191868483885747f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          -0.4067007583026072f,
+          -0.2125574805828705f,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          -0.4517556589999464f,
+          0.3046847507248840f,
+          0.3017929516615503f,
+          -0.4082482904638635f,
+          -0.1747866975480813f,
+          0.2110560104933581f,
+          -0.1426608480880734f,
+          -0.1381354035075829f,
+          -0.1743760259965108f,
+          0.1135498731499426f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375377f,
+          -0.1957439937204287f,
+          -0.1621205195722833f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          0.0074182263792444f,
+          0.2904801297290076f,
+          0.0952002265347505f,
+          0.0000000000000000,
+          0.3675398009862011f,
+          -0.4921585901373891f,
+          0.2462710772207514f,
+          -0.0794670660591026f,
+          0.3623817333531165f,
+          -0.4351904965232251f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375375f,
+          0.0000000000000000,
+          -0.4706702258572528f,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          0.1107416575309343f,
+          0.0000000000000000,
+          -0.1627234014286617f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.1488339922711357f,
+          0.4972464710953509f,
+          0.2921026642334879f,
+          0.5550443808910661f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375377f,
+          0.1137907446044809f,
+          -0.1464291867126764f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          0.0829816309488205f,
+          -0.2388977352334460f,
+          -0.3531238544981630f,
+          -0.4082482904638630f,
+          0.4826689115059883f,
+          0.1741941265991622f,
+          -0.0476868035022925f,
+          0.1253805944856366f,
+          -0.4326608024727445f,
+          -0.2546827712406646f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375377f,
+          -0.4444481661973438f,
+          0.3085497062849487f,
+          0.0000000000000000,
+          -0.0643507165794628f,
+          0.1585450355183970f,
+          -0.5112616136592012f,
+          0.2579236279634129f,
+          0.0000000000000000,
+          -0.0812611176717504f,
+          -0.1856718091610990f,
+          -0.3416446842253373f,
+          0.3302282550303805f,
+          0.0702790691196282f,
+          -0.0741750459581023f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          -0.2929100136981264f,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          0.3935103426921022f,
+          0.0657870154914254f,
+          0.0000000000000000,
+          0.4082482904638634f,
+          0.3078822139579031f,
+          0.3852501370925211f,
+          -0.0857401903551927f,
+          -0.4613374887461554f,
+          0.0000000000000000,
+          0.2191868483885728f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375376f,
+          -0.1137907446044814f,
+          -0.1464291867126654f,
+          0.0000000000000000,
+          -0.0643507165794627f,
+          0.0829816309488214f,
+          0.2388977352334547f,
+          -0.3531238544981624f,
+          0.4082482904638630f,
+          -0.4826689115059858f,
+          -0.1741941265991621f,
+          -0.0476868035022928f,
+          0.1253805944856431f,
+          -0.4326608024727457f,
+          -0.2546827712406641f,
+      },
+      {
+          0.2500000000000000,
+          -0.1014005039375374f,
+          0.0000000000000000,
+          0.4251149611657548f,
+          0.0000000000000000,
+          -0.0643507165794626f,
+          -0.4517556589999480f,
+          0.0000000000000000,
+          -0.6035859033230976f,
+          0.0000000000000000,
+          0.0000000000000000,
+          0.0000000000000000,
+          -0.1426608480880724f,
+          -0.1381354035075845f,
+          0.3487520519930227f,
+          0.1135498731499429f,
+      },
+  };
+
+  const HWY_CAPPED(float, 16) d;
+  for (size_t i = 0; i < 16; i += Lanes(d)) {
+    auto scalar = Zero(d);
+    for (size_t j = 0; j < 16; j++) {
+      auto px = Set(d, pixels[j]);
+      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
+      scalar = MulAdd(px, basis, scalar);
+    }
+    Store(scalar, d, coeffs + i);
+  }
+}
+
+// Coefficient layout:
+//  - (even, even) positions hold AFV coefficients
+//  - (odd, even) positions hold DCT4x4 coefficients
+//  - (any, odd) positions hold DCT4x8 coefficients
+template <size_t afv_kind>
+void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
+                            size_t pixels_stride,
+                            float* JXL_RESTRICT coefficients) {
+  HWY_ALIGN float scratch_space[4 * 8 * 2];
+  size_t afv_x = afv_kind & 1;
+  size_t afv_y = afv_kind / 2;
+  HWY_ALIGN float block[4 * 8];
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
+          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
+    }
+  }
+  // AFV coefficients in (even, even) positions.
+  HWY_ALIGN float coeff[4 * 4];
+  AFVDCT4x4(block, coeff);
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 4; ix++) {
+      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
+    }
+  }
+  // 4x4 DCT of the block with same y and different x.
+  ComputeTransposedScaledDCT<4>()(
+      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
+              pixels_stride),
+      block, scratch_space);
+  // ... in (odd, even) positions.
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 8; ix++) {
+      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
+    }
+  }
+  // 4x8 DCT of the other half of the block.
+  ComputeScaledDCT<4, 8>()(
+      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
+      block, scratch_space);
+  for (size_t iy = 0; iy < 4; iy++) {
+    for (size_t ix = 0; ix < 8; ix++) {
+      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
+    }
+  }
+  float block00 = coefficients[0] * 0.25f;
+  float block01 = coefficients[1];
+  float block10 = coefficients[8];
+  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
+  coefficients[1] = (block00 - block01) * 0.5f;
+  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
+}
+
+HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategy::Type strategy,
+                                          const float* JXL_RESTRICT pixels,
+                                          size_t pixels_stride,
+                                          float* JXL_RESTRICT coefficients,
+                                          float* JXL_RESTRICT scratch_space) {
+  using Type = AcStrategy::Type;
+  switch (strategy) {
+    case Type::IDENTITY: {
+      PROFILER_ZONE("DCT Identity");
+      for (size_t y = 0; y < 2; y++) {
+        for (size_t x = 0; x < 2; x++) {
+          float block_dc = 0;
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
+            }
+          }
+          block_dc *= 1.0f / 16;
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              if (ix == 1 && iy == 1) continue;
+              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
+                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
+                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
+            }
+          }
+          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
+          coefficients[y * 8 + x] = block_dc;
+        }
+      }
+      float block00 = coefficients[0];
+      float block01 = coefficients[1];
+      float block10 = coefficients[8];
+      float block11 = coefficients[9];
+      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
+      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
+      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
+      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
+      break;
+    }
+    case Type::DCT8X4: {
+      PROFILER_ZONE("DCT 8x4");
+      for (size_t x = 0; x < 2; x++) {
+        HWY_ALIGN float block[4 * 8];
+        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
+                                 scratch_space);
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 8; ix++) {
+            // Store transposed.
+            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
+          }
+        }
+      }
+      float block0 = coefficients[0];
+      float block1 = coefficients[8];
+      coefficients[0] = (block0 + block1) * 0.5f;
+      coefficients[8] = (block0 - block1) * 0.5f;
+      break;
+    }
+    case Type::DCT4X8: {
+      PROFILER_ZONE("DCT 4x8");
+      for (size_t y = 0; y < 2; y++) {
+        HWY_ALIGN float block[4 * 8];
+        ComputeScaledDCT<4, 8>()(
+            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
+            scratch_space);
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 8; ix++) {
+            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
+          }
+        }
+      }
+      float block0 = coefficients[0];
+      float block1 = coefficients[8];
+      coefficients[0] = (block0 + block1) * 0.5f;
+      coefficients[8] = (block0 - block1) * 0.5f;
+      break;
+    }
+    case Type::DCT4X4: {
+      PROFILER_ZONE("DCT 4");
+      for (size_t y = 0; y < 2; y++) {
+        for (size_t x = 0; x < 2; x++) {
+          HWY_ALIGN float block[4 * 4];
+          ComputeTransposedScaledDCT<4>()(
+              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
+              block, scratch_space);
+          for (size_t iy = 0; iy < 4; iy++) {
+            for (size_t ix = 0; ix < 4; ix++) {
+              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
+            }
+          }
+        }
+      }
+      float block00 = coefficients[0];
+      float block01 = coefficients[1];
+      float block10 = coefficients[8];
+      float block11 = coefficients[9];
+      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
+      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
+      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
+      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
+      break;
+    }
+    case Type::DCT2X2: {
+      PROFILER_ZONE("DCT 2");
+      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
+      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
+      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
+      break;
+    }
+    case Type::DCT16X16: {
+      PROFILER_ZONE("DCT 16");
+      ComputeTransposedScaledDCT<16>()(DCTFrom(pixels, pixels_stride),
+                                       coefficients, scratch_space);
+      break;
+    }
+    case Type::DCT16X8: {
+      PROFILER_ZONE("DCT 16x8");
+      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                scratch_space);
+      break;
+    }
+    case Type::DCT8X16: {
+      PROFILER_ZONE("DCT 8x16");
+      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                scratch_space);
+      break;
+    }
+    case Type::DCT32X8: {
+      PROFILER_ZONE("DCT 32x8");
+      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                scratch_space);
+      break;
+    }
+    case Type::DCT8X32: {
+      PROFILER_ZONE("DCT 8x32");
+      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                scratch_space);
+      break;
+    }
+    case Type::DCT32X16: {
+      PROFILER_ZONE("DCT 32x16");
+      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT16X32: {
+      PROFILER_ZONE("DCT 16x32");
+      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT32X32: {
+      PROFILER_ZONE("DCT 32");
+      ComputeTransposedScaledDCT<32>()(DCTFrom(pixels, pixels_stride),
+                                       coefficients, scratch_space);
+      break;
+    }
+    case Type::DCT: {
+      PROFILER_ZONE("DCT 8");
+      ComputeTransposedScaledDCT<8>()(DCTFrom(pixels, pixels_stride),
+                                      coefficients, scratch_space);
+      break;
+    }
+    case Type::AFV0: {
+      PROFILER_ZONE("AFV0");
+      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
+      break;
+    }
+    case Type::AFV1: {
+      PROFILER_ZONE("AFV1");
+      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
+      break;
+    }
+    case Type::AFV2: {
+      PROFILER_ZONE("AFV2");
+      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
+      break;
+    }
+    case Type::AFV3: {
+      PROFILER_ZONE("AFV3");
+      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
+      break;
+    }
+    case Type::DCT64X64: {
+      PROFILER_ZONE("DCT 64x64");
+      ComputeTransposedScaledDCT<64>()(DCTFrom(pixels, pixels_stride),
+                                       coefficients, scratch_space);
+      break;
+    }
+    case Type::DCT64X32: {
+      PROFILER_ZONE("DCT 64x32");
+      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT32X64: {
+      PROFILER_ZONE("DCT 32x64");
+      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                 scratch_space);
+      break;
+    }
+    case Type::DCT128X128: {
+      PROFILER_ZONE("DCT 128x128");
+      ComputeTransposedScaledDCT<128>()(DCTFrom(pixels, pixels_stride),
+                                        coefficients, scratch_space);
+      break;
+    }
+    case Type::DCT128X64: {
+      PROFILER_ZONE("DCT 128x64");
+      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT64X128: {
+      PROFILER_ZONE("DCT 64x128");
+      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                  scratch_space);
+      break;
+    }
+    case Type::DCT256X256: {
+      PROFILER_ZONE("DCT 256x256");
+      ComputeTransposedScaledDCT<256>()(DCTFrom(pixels, pixels_stride),
+                                        coefficients, scratch_space);
+      break;
+    }
+    case Type::DCT256X128: {
+      PROFILER_ZONE("DCT 256x128");
+      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                   scratch_space);
+      break;
+    }
+    case Type::DCT128X256: {
+      PROFILER_ZONE("DCT 128x256");
+      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
+                                   scratch_space);
+      break;
+    }
+    case Type::kNumValidStrategies:
+      JXL_ABORT("Invalid strategy");
+  }
+}
+
+HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategy::Type strategy,
+                                              const float* block, float* dc,
+                                              size_t dc_stride) {
+  using Type = AcStrategy::Type;
+  switch (strategy) {
+    case Type::DCT16X8: {
+      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
+          block, 2 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT8X16: {
+      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
+          block, 2 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT16X16: {
+      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
+          block, 2 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT32X8: {
+      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT8X32: {
+      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT32X16: {
+      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT16X32: {
+      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT32X32: {
+      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
+          block, 4 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT64X32: {
+      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
+          block, 8 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT32X64: {
+      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
+          block, 8 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT64X64: {
+      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
+          block, 8 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT128X64: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
+          block, 16 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT64X128: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
+          block, 16 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT128X128: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
+          block, 16 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT256X128: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
+          block, 32 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT128X256: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
+          block, 32 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT256X256: {
+      ReinterpretingIDCT<
+          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
+          block, 32 * kBlockDim, dc, dc_stride);
+      break;
+    }
+    case Type::DCT:
+    case Type::DCT2X2:
+    case Type::DCT4X4:
+    case Type::DCT4X8:
+    case Type::DCT8X4:
+    case Type::AFV0:
+    case Type::AFV1:
+    case Type::AFV2:
+    case Type::AFV3:
+    case Type::IDENTITY:
+      dc[0] = block[0];
+      break;
+    case Type::kNumValidStrategies:
+      JXL_ABORT("Invalid strategy");
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_transforms.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_transforms.cc
new file mode 100644
index 0000000000..8978ba1dcb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_transforms.cc
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_transforms.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_transforms.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_transforms-inl.h"
+
+namespace jxl {
+
+#if HWY_ONCE
+HWY_EXPORT(TransformFromPixels);
+void TransformFromPixels(const AcStrategy::Type strategy,
+                         const float* JXL_RESTRICT pixels, size_t pixels_stride,
+                         float* JXL_RESTRICT coefficients,
+                         float* scratch_space) {
+  return HWY_DYNAMIC_DISPATCH(TransformFromPixels)(
+      strategy, pixels, pixels_stride, coefficients, scratch_space);
+}
+
+HWY_EXPORT(DCFromLowestFrequencies);
+void DCFromLowestFrequencies(AcStrategy::Type strategy, const float* block,
+                             float* dc, size_t dc_stride) {
+  return HWY_DYNAMIC_DISPATCH(DCFromLowestFrequencies)(strategy, block, dc,
+                                                       dc_stride);
+}
+
+HWY_EXPORT(AFVDCT4x4);
+void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
+  return HWY_DYNAMIC_DISPATCH(AFVDCT4x4)(pixels, coeffs);
+}
+#endif  // HWY_ONCE
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_transforms.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_transforms.h
new file mode 100644
index 0000000000..039ccc3893
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_transforms.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_TRANSFORMS_H_
+#define LIB_JXL_ENC_TRANSFORMS_H_
+
+// Facade for (non-inlined) integral transforms.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+void TransformFromPixels(const AcStrategy::Type strategy,
+                         const float* JXL_RESTRICT pixels, size_t pixels_stride,
+                         float* JXL_RESTRICT coefficients,
+                         float* JXL_RESTRICT scratch_space);
+
+// Equivalent of the above for DC image.
+void DCFromLowestFrequencies(AcStrategy::Type strategy, const float* block,
+                             float* dc, size_t dc_stride);
+
+void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_TRANSFORMS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc
new file mode 100644
index 0000000000..57383b1b8e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.cc
@@ -0,0 +1,437 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_xyb.h"
+
+#include <algorithm>
+#include <cstdlib>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_xyb.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/transfer_functions-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::ShiftRight;
+
+// Returns cbrt(x) + add with 6 ulp max error.
+// Modified from vectormath_exp.h, Apache 2 license.
+// https://www.agner.org/optimize/vectorclass.zip
+template <class V>
+V CubeRootAndAdd(const V x, const V add) {
+  const HWY_FULL(float) df;
+  const HWY_FULL(int32_t) di;
+
+  const auto kExpBias = Set(di, 0x54800000);  // cast(1.) + cast(1.) / 3
+  const auto kExpMul = Set(di, 0x002AAAAA);   // shifted 1/3
+  const auto k1_3 = Set(df, 1.0f / 3);
+  const auto k4_3 = Set(df, 4.0f / 3);
+
+  const auto xa = x;  // assume inputs never negative
+  const auto xa_3 = k1_3 * xa;
+
+  // Multiply exponent by -1/3
+  const auto m1 = BitCast(di, xa);
+  // Special case for 0. 0 is represented with an exponent of 0, so the
+  // "kExpBias - 1/3 * exp" below gives the wrong result. The IfThenZeroElse()
+  // sets those values as 0, which prevents having NaNs in the computations
+  // below.
+  const auto m2 =
+      IfThenZeroElse(m1 == Zero(di), kExpBias - (ShiftRight<23>(m1)) * kExpMul);
+  auto r = BitCast(df, m2);
+
+  // Newton-Raphson iterations
+  for (int i = 0; i < 3; i++) {
+    const auto r2 = r * r;
+    r = NegMulAdd(xa_3, r2 * r2, k4_3 * r);
+  }
+  // Final iteration
+  auto r2 = r * r;
+  r = MulAdd(k1_3, NegMulAdd(xa, r2 * r2, r), r);
+  r2 = r * r;
+  r = MulAdd(r2, x, add);
+
+  return r;
+}
+
+// Ensures infinity norm is bounded.
+void TestCubeRoot() {
+  const HWY_FULL(float) d;
+  float max_err = 0.0f;
+  for (uint64_t x5 = 0; x5 < 2000000; x5++) {
+    const float x = x5 * 1E-5f;
+    const float expected = cbrtf(x);
+    HWY_ALIGN float approx[MaxLanes(d)];
+    Store(CubeRootAndAdd(Set(d, x), Zero(d)), d, approx);
+
+    // All lanes are same
+    for (size_t i = 1; i < Lanes(d); ++i) {
+      JXL_ASSERT(std::abs(approx[0] - approx[i]) <= 1.2E-7f);
+    }
+
+    const float err = std::abs(approx[0] - expected);
+    max_err = std::max(max_err, err);
+  }
+  // printf("max err %e\n", max_err);
+  JXL_ASSERT(max_err < 8E-7f);
+}
+
+// 4x3 matrix * 3x1 SIMD vectors
+template <class V>
+JXL_INLINE void OpsinAbsorbance(const V r, const V g, const V b,
+                                const float* JXL_RESTRICT premul_absorb,
+                                V* JXL_RESTRICT mixed0, V* JXL_RESTRICT mixed1,
+                                V* JXL_RESTRICT mixed2) {
+  const float* bias = &kOpsinAbsorbanceBias[0];
+  const HWY_FULL(float) d;
+  const size_t N = Lanes(d);
+  const auto m0 = Load(d, premul_absorb + 0 * N);
+  const auto m1 = Load(d, premul_absorb + 1 * N);
+  const auto m2 = Load(d, premul_absorb + 2 * N);
+  const auto m3 = Load(d, premul_absorb + 3 * N);
+  const auto m4 = Load(d, premul_absorb + 4 * N);
+  const auto m5 = Load(d, premul_absorb + 5 * N);
+  const auto m6 = Load(d, premul_absorb + 6 * N);
+  const auto m7 = Load(d, premul_absorb + 7 * N);
+  const auto m8 = Load(d, premul_absorb + 8 * N);
+  *mixed0 = MulAdd(m0, r, MulAdd(m1, g, MulAdd(m2, b, Set(d, bias[0]))));
+  *mixed1 = MulAdd(m3, r, MulAdd(m4, g, MulAdd(m5, b, Set(d, bias[1]))));
+  *mixed2 = MulAdd(m6, r, MulAdd(m7, g, MulAdd(m8, b, Set(d, bias[2]))));
+}
+
+template <class V>
+void StoreXYB(const V r, V g, const V b, float* JXL_RESTRICT valx,
+              float* JXL_RESTRICT valy, float* JXL_RESTRICT valz) {
+  const HWY_FULL(float) d;
+  const V half = Set(d, 0.5f);
+  Store(half * (r - g), d, valx);
+  Store(half * (r + g), d, valy);
+  Store(b, d, valz);
+}
+
+// Converts one RGB vector to XYB.
+template <class V>
+void LinearRGBToXYB(const V r, const V g, const V b,
+                    const float* JXL_RESTRICT premul_absorb,
+                    float* JXL_RESTRICT valx, float* JXL_RESTRICT valy,
+                    float* JXL_RESTRICT valz) {
+  V mixed0, mixed1, mixed2;
+  OpsinAbsorbance(r, g, b, premul_absorb, &mixed0, &mixed1, &mixed2);
+
+  // mixed* should be non-negative even for wide-gamut, so clamp to zero.
+  mixed0 = ZeroIfNegative(mixed0);
+  mixed1 = ZeroIfNegative(mixed1);
+  mixed2 = ZeroIfNegative(mixed2);
+
+  const HWY_FULL(float) d;
+  const size_t N = Lanes(d);
+  mixed0 = CubeRootAndAdd(mixed0, Load(d, premul_absorb + 9 * N));
+  mixed1 = CubeRootAndAdd(mixed1, Load(d, premul_absorb + 10 * N));
+  mixed2 = CubeRootAndAdd(mixed2, Load(d, premul_absorb + 11 * N));
+  StoreXYB(mixed0, mixed1, mixed2, valx, valy, valz);
+
+  // For wide-gamut inputs, r/g/b and valx (but not y/z) are often negative.
+}
+
+// Input/output uses the codec.h scaling: nominally 0-1 if in-gamut.
+template <class V>
+V LinearFromSRGB(V encoded) {
+  return TF_SRGB().DisplayFromEncoded(encoded);
+}
+
+void LinearSRGBToXYB(const Image3F& linear,
+                     const float* JXL_RESTRICT premul_absorb, ThreadPool* pool,
+                     Image3F* JXL_RESTRICT xyb) {
+  const size_t xsize = linear.xsize();
+
+  const HWY_FULL(float) d;
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(linear.ysize()), ThreadPool::SkipInit(),
+      [&](const int task, const int /*thread*/) {
+        const size_t y = static_cast<size_t>(task);
+        const float* JXL_RESTRICT row_in0 = linear.ConstPlaneRow(0, y);
+        const float* JXL_RESTRICT row_in1 = linear.ConstPlaneRow(1, y);
+        const float* JXL_RESTRICT row_in2 = linear.ConstPlaneRow(2, y);
+        float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
+        float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
+        float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
+
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          const auto in_r = Load(d, row_in0 + x);
+          const auto in_g = Load(d, row_in1 + x);
+          const auto in_b = Load(d, row_in2 + x);
+          LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
+                         row_xyb1 + x, row_xyb2 + x);
+        }
+      },
+      "LinearToXYB");
+}
+
+void SRGBToXYB(const Image3F& srgb, const float* JXL_RESTRICT premul_absorb,
+               ThreadPool* pool, Image3F* JXL_RESTRICT xyb) {
+  const size_t xsize = srgb.xsize();
+
+  const HWY_FULL(float) d;
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(srgb.ysize()), ThreadPool::SkipInit(),
+      [&](const int task, const int /*thread*/) {
+        const size_t y = static_cast<size_t>(task);
+        const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y);
+        const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y);
+        const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y);
+        float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
+        float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
+        float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
+
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x));
+          const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x));
+          const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x));
+          LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
+                         row_xyb1 + x, row_xyb2 + x);
+        }
+      },
+      "SRGBToXYB");
+}
+
+void SRGBToXYBAndLinear(const Image3F& srgb,
+                        const float* JXL_RESTRICT premul_absorb,
+                        ThreadPool* pool, Image3F* JXL_RESTRICT xyb,
+                        Image3F* JXL_RESTRICT linear) {
+  const size_t xsize = srgb.xsize();
+
+  const HWY_FULL(float) d;
+  RunOnPool(
+      pool, 0, static_cast<uint32_t>(srgb.ysize()), ThreadPool::SkipInit(),
+      [&](const int task, const int /*thread*/) {
+        const size_t y = static_cast<size_t>(task);
+        const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y);
+        const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y);
+        const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y);
+
+        float* JXL_RESTRICT row_linear0 = linear->PlaneRow(0, y);
+        float* JXL_RESTRICT row_linear1 = linear->PlaneRow(1, y);
+        float* JXL_RESTRICT row_linear2 = linear->PlaneRow(2, y);
+
+        float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
+        float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
+        float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
+
+        for (size_t x = 0; x < xsize; x += Lanes(d)) {
+          const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x));
+          const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x));
+          const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x));
+
+          Store(in_r, d, row_linear0 + x);
+          Store(in_g, d, row_linear1 + x);
+          Store(in_b, d, row_linear2 + x);
+
+          LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
+                         row_xyb1 + x, row_xyb2 + x);
+        }
+      },
+      "SRGBToXYBAndLinear");
+}
+
+// This is different from Butteraugli's OpsinDynamicsImage() in the sense that
+// it does not contain a sensitivity multiplier based on the blurred image.
+const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool,
+                         Image3F* JXL_RESTRICT xyb,
+                         ImageBundle* const JXL_RESTRICT linear) {
+  PROFILER_FUNC;
+
+  const size_t xsize = in.xsize();
+  const size_t ysize = in.ysize();
+  JXL_ASSERT(SameSize(in, *xyb));
+
+  const HWY_FULL(float) d;
+  // Pre-broadcasted constants
+  HWY_ALIGN float premul_absorb[MaxLanes(d) * 12];
+  const size_t N = Lanes(d);
+  for (size_t i = 0; i < 9; ++i) {
+    const auto absorb = Set(d, kOpsinAbsorbanceMatrix[i] *
+                                   (in.metadata()->IntensityTarget() / 255.0f));
+    Store(absorb, d, premul_absorb + i * N);
+  }
+  for (size_t i = 0; i < 3; ++i) {
+    const auto neg_bias_cbrt = Set(d, -cbrtf(kOpsinAbsorbanceBias[i]));
+    Store(neg_bias_cbrt, d, premul_absorb + (9 + i) * N);
+  }
+
+  const bool want_linear = linear != nullptr;
+
+  const ColorEncoding& c_linear_srgb = ColorEncoding::LinearSRGB(in.IsGray());
+  // Linear sRGB inputs are rare but can be useful for the fastest encoders, for
+  // which undoing the sRGB transfer function would be a large part of the cost.
+  if (c_linear_srgb.SameColorEncoding(in.c_current())) {
+    LinearSRGBToXYB(in.color(), premul_absorb, pool, xyb);
+    // This only happens if kitten or slower, moving ImageBundle might be
+    // possible but the encoder is much slower than this copy.
+    if (want_linear) {
+      *linear = in.Copy();
+      return linear;
+    }
+    return &in;
+  }
+
+  // Common case: already sRGB, can avoid the color transform
+  if (in.IsSRGB()) {
+    // Common case: can avoid allocating/copying
+    if (!want_linear) {
+      SRGBToXYB(in.color(), premul_absorb, pool, xyb);
+      return &in;
+    }
+
+    // Slow encoder also wants linear sRGB.
+    linear->SetFromImage(Image3F(xsize, ysize), c_linear_srgb);
+    SRGBToXYBAndLinear(in.color(), premul_absorb, pool, xyb, linear->color());
+    return linear;
+  }
+
+  // General case: not sRGB, need color transform.
+  ImageBundle linear_storage;  // Local storage only used if !want_linear.
+
+  ImageBundle* linear_storage_ptr;
+  if (want_linear) {
+    // Caller asked for linear, use that storage directly.
+    linear_storage_ptr = linear;
+  } else {
+    // Caller didn't ask for linear, create our own local storage
+    // OK to reuse metadata, it will not be changed.
+    linear_storage = ImageBundle(const_cast<ImageMetadata*>(in.metadata()));
+    linear_storage_ptr = &linear_storage;
+  }
+
+  const ImageBundle* ptr;
+  JXL_CHECK(
+      TransformIfNeeded(in, c_linear_srgb, pool, linear_storage_ptr, &ptr));
+  // If no transform was necessary, should have taken the above codepath.
+  JXL_ASSERT(ptr == linear_storage_ptr);
+
+  LinearSRGBToXYB(*linear_storage_ptr->color(), premul_absorb, pool, xyb);
+  return want_linear ? linear : &in;
+}
+
+// Transform RGB to YCbCr.
+// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
+void RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
+                const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
+                ImageF* cr_plane, ThreadPool* pool) {
+  const HWY_FULL(float) df;
+  const size_t S = Lanes(df);  // Step.
+
+  const size_t xsize = r_plane.xsize();
+  const size_t ysize = r_plane.ysize();
+  if ((xsize == 0) || (ysize == 0)) return;
+
+  // Full-range BT.601 as defined by JFIF Clause 7:
+  // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+  const auto k128 = Set(df, 128.0f / 255);
+  const auto kR = Set(df, 0.299f);  // NTSC luma
+  const auto kG = Set(df, 0.587f);
+  const auto kB = Set(df, 0.114f);
+  const auto kAmpR = Set(df, 0.701f);
+  const auto kAmpB = Set(df, 0.886f);
+  const auto kDiffR = kAmpR + kR;
+  const auto kDiffB = kAmpB + kB;
+  const auto kNormR = Set(df, 1.0f) / (kAmpR + kG + kB);
+  const auto kNormB = Set(df, 1.0f) / (kR + kG + kAmpB);
+
+  constexpr size_t kGroupArea = kGroupDim * kGroupDim;
+  const size_t lines_per_group = DivCeil(kGroupArea, xsize);
+  const size_t num_stripes = DivCeil(ysize, lines_per_group);
+  const auto transform = [&](int idx, int /* thread*/) {
+    const size_t y0 = idx * lines_per_group;
+    const size_t y1 = std::min<size_t>(y0 + lines_per_group, ysize);
+    for (size_t y = y0; y < y1; ++y) {
+      const float* r_row = r_plane.ConstRow(y);
+      const float* g_row = g_plane.ConstRow(y);
+      const float* b_row = b_plane.ConstRow(y);
+      float* y_row = y_plane->Row(y);
+      float* cb_row = cb_plane->Row(y);
+      float* cr_row = cr_plane->Row(y);
+      for (size_t x = 0; x < xsize; x += S) {
+        const auto r = Load(df, r_row + x);
+        const auto g = Load(df, g_row + x);
+        const auto b = Load(df, b_row + x);
+        const auto r_base = r * kR;
+        const auto r_diff = r * kDiffR;
+        const auto g_base = g * kG;
+        const auto b_base = b * kB;
+        const auto b_diff = b * kDiffB;
+        const auto y_base = r_base + g_base + b_base;
+        const auto y_vec = y_base - k128;
+        const auto cb_vec = (b_diff - y_base) * kNormB;
+        const auto cr_vec = (r_diff - y_base) * kNormR;
+        Store(y_vec, df, y_row + x);
+        Store(cb_vec, df, cb_row + x);
+        Store(cr_vec, df, cr_row + x);
+      }
+    }
+  };
+  RunOnPool(pool, 0, static_cast<int>(num_stripes), ThreadPool::SkipInit(),
+            transform, "RgbToYcbCr");
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ToXYB);
+const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool,
+                         Image3F* JXL_RESTRICT xyb,
+                         ImageBundle* JXL_RESTRICT linear_storage) {
+  return HWY_DYNAMIC_DISPATCH(ToXYB)(in, pool, xyb, linear_storage);
+}
+
+HWY_EXPORT(RgbToYcbcr);
+void RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
+                const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
+                ImageF* cr_plane, ThreadPool* pool) {
+  return HWY_DYNAMIC_DISPATCH(RgbToYcbcr)(r_plane, g_plane, b_plane, y_plane,
+                                          cb_plane, cr_plane, pool);
+}
+
+HWY_EXPORT(TestCubeRoot);
+void TestCubeRoot() { return HWY_DYNAMIC_DISPATCH(TestCubeRoot)(); }
+
+// DEPRECATED
+Image3F OpsinDynamicsImage(const Image3B& srgb8) {
+  ImageMetadata metadata;
+  metadata.SetUintSamples(8);
+  metadata.color_encoding = ColorEncoding::SRGB();
+  ImageBundle ib(&metadata);
+  ib.SetFromImage(ConvertToFloat(srgb8), metadata.color_encoding);
+  JXL_CHECK(ib.TransformTo(ColorEncoding::LinearSRGB(ib.IsGray())));
+  ThreadPool* null_pool = nullptr;
+  Image3F xyb(srgb8.xsize(), srgb8.ysize());
+
+  ImageBundle linear_storage(&metadata);
+  (void)ToXYB(ib, null_pool, &xyb, &linear_storage);
+  return xyb;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.h
new file mode 100644
index 0000000000..f30ae2f68b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/enc_xyb.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_XYB_H_
+#define LIB_JXL_ENC_XYB_H_
+
+// Converts to XYB color space.
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Converts any color space to XYB. If `linear` is not null, returns `linear`
+// after filling it with a linear sRGB copy of `in`. Otherwise, returns `&in`.
+//
+// NOTE this return value can avoid an extra color conversion if `in` would
+// later be passed to JxlButteraugliComparator.
+const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool,
+                         Image3F* JXL_RESTRICT xyb,
+                         ImageBundle* JXL_RESTRICT linear = nullptr);
+
+// Bt.601 to match JPEG/JFIF. Outputs _signed_ YCbCr values suitable for DCT,
+// see F.1.1.3 of T.81 (because our data type is float, there is no need to add
+// a bias to make the values unsigned).
+void RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
+                const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
+                ImageF* cr_plane, ThreadPool* pool);
+
+// DEPRECATED, used by opsin_image_wrapper.
+Image3F OpsinDynamicsImage(const Image3B& srgb8);
+
+// For opsin_image_test.
+void TestCubeRoot();
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENC_XYB_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/encode.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/encode.cc
new file mode 100644
index 0000000000..f4e94d1412
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/encode.cc
@@ -0,0 +1,471 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "jxl/encode.h"
+
+#include <algorithm>
+#include <cstring>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+
+#define JPEGXL_MAJOR_VERSION 0
+#define JPEGXL_MINOR_VERSION 5
+#define JPEGXL_PATCH_VERSION 0
+
+// Debug-printing failure macro similar to JXL_FAILURE, but for the status code
+// JXL_ENC_ERROR
+#ifdef JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                           \
+  (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+   ::jxl::Abort(), JXL_ENC_ERROR)
+#else  // JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                             \
+  (((JXL_DEBUG_ON_ERROR) &&                                                    \
+    ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \
+   JXL_ENC_ERROR)
+#endif  // JXL_CRASH_ON_ERROR
+
+namespace jxl {}  // namespace jxl
+
+uint32_t JxlEncoderVersion(void) {
+  return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 +
+         JPEGXL_PATCH_VERSION;
+}
+
+JxlEncoderStatus JxlEncoderStruct::RefillOutputByteQueue() {
+  jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedFrame> input_frame =
+      std::move(input_frame_queue[0]);
+  input_frame_queue.erase(input_frame_queue.begin());
+
+  // TODO(zond): If the frame queue is empty and the input_closed is true,
+  // then mark this frame as the last.
+
+  jxl::BitWriter writer;
+
+  if (!wrote_bytes) {
+    if (use_container) {
+      output_byte_queue.insert(
+          output_byte_queue.end(), jxl::kContainerHeader,
+          jxl::kContainerHeader + sizeof(jxl::kContainerHeader));
+      if (store_jpeg_metadata && jpeg_metadata.size() > 0) {
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_metadata.size(),
+                             false, &output_byte_queue);
+        output_byte_queue.insert(output_byte_queue.end(), jpeg_metadata.begin(),
+                                 jpeg_metadata.end());
+      }
+    }
+    if (!WriteHeaders(&metadata, &writer, nullptr)) {
+      return JXL_ENC_ERROR;
+    }
+    // Only send ICC (at least several hundred bytes) if fields aren't enough.
+    if (metadata.m.color_encoding.WantICC()) {
+      if (!jxl::WriteICC(metadata.m.color_encoding.ICC(), &writer,
+                         jxl::kLayerHeader, nullptr)) {
+        return JXL_ENC_ERROR;
+      }
+    }
+
+    // TODO(lode): preview should be added here if a preview image is added
+
+    // Each frame should start on byte boundaries.
+    writer.ZeroPadToByte();
+  }
+
+  // TODO(zond): Handle progressive mode like EncodeFile does it.
+  // TODO(zond): Handle animation like EncodeFile does it, by checking if
+  //             JxlEncoderCloseInput has been called and if the frame queue is
+  //             empty (to see if it's the last animation frame).
+
+  if (metadata.m.xyb_encoded) {
+    input_frame->option_values.cparams.color_transform =
+        jxl::ColorTransform::kXYB;
+  } else {
+    // TODO(zond): Figure out when to use kYCbCr instead.
+    input_frame->option_values.cparams.color_transform =
+        jxl::ColorTransform::kNone;
+  }
+
+  jxl::PassesEncoderState enc_state;
+  if (!jxl::EncodeFrame(input_frame->option_values.cparams, jxl::FrameInfo{},
+                        &metadata, input_frame->frame, &enc_state,
+                        thread_pool.get(), &writer,
+                        /*aux_out=*/nullptr)) {
+    return JXL_ENC_ERROR;
+  }
+
+  jxl::PaddedBytes bytes = std::move(writer).TakeBytes();
+
+  if (use_container && !wrote_bytes) {
+    if (input_closed && input_frame_queue.empty()) {
+      jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), bytes.size(),
+                           /*unbounded=*/false, &output_byte_queue);
+    } else {
+      jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), 0, /*unbounded=*/true,
+                           &output_byte_queue);
+    }
+  }
+
+  output_byte_queue.insert(output_byte_queue.end(), bytes.data(),
+                           bytes.data() + bytes.size());
+  wrote_bytes = true;
+
+  last_used_cparams = input_frame->option_values.cparams;
+
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetColorEncoding(JxlEncoder* enc,
+                                            const JxlColorEncoding* color) {
+  if (enc->color_encoding_set) {
+    // Already set
+    return JXL_ENC_ERROR;
+  }
+  if (!jxl::ConvertExternalToInternalColorEncoding(
+          *color, &enc->metadata.m.color_encoding)) {
+    return JXL_ENC_ERROR;
+  }
+  enc->color_encoding_set = true;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetICCProfile(JxlEncoder* enc,
+                                         const uint8_t* icc_profile,
+                                         size_t size) {
+  if (enc->color_encoding_set) {
+    // Already set
+    return JXL_ENC_ERROR;
+  }
+  jxl::PaddedBytes icc;
+  icc.assign(icc_profile, icc_profile + size);
+  if (!enc->metadata.m.color_encoding.SetICCRaw(std::move(icc))) {
+    return JXL_ENC_ERROR;
+  }
+  enc->color_encoding_set = true;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc,
+                                        const JxlBasicInfo* info) {
+  if (!enc->metadata.size.Set(info->xsize, info->ysize)) {
+    return JXL_ENC_ERROR;
+  }
+  if (info->exponent_bits_per_sample) {
+    if (info->exponent_bits_per_sample != 8) return JXL_ENC_NOT_SUPPORTED;
+    if (info->bits_per_sample == 32) {
+      enc->metadata.m.SetFloat32Samples();
+    } else {
+      return JXL_ENC_NOT_SUPPORTED;
+    }
+  } else {
+    switch (info->bits_per_sample) {
+      case 32:
+      case 16:
+      case 8:
+        enc->metadata.m.SetUintSamples(info->bits_per_sample);
+        break;
+      default:
+        return JXL_ENC_ERROR;
+        break;
+    }
+  }
+  if (info->alpha_bits > 0 && info->alpha_exponent_bits > 0) {
+    return JXL_ENC_NOT_SUPPORTED;
+  }
+  switch (info->alpha_bits) {
+    case 0:
+      break;
+    case 32:
+    case 16:
+      enc->metadata.m.SetAlphaBits(16);
+      break;
+    case 8:
+      enc->metadata.m.SetAlphaBits(info->alpha_bits);
+      break;
+    default:
+      return JXL_ENC_ERROR;
+      break;
+  }
+  enc->metadata.m.xyb_encoded = !info->uses_original_profile;
+  enc->basic_info_set = true;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderOptions* JxlEncoderOptionsCreate(JxlEncoder* enc,
+                                           const JxlEncoderOptions* source) {
+  auto opts =
+      jxl::MemoryManagerMakeUnique<JxlEncoderOptions>(&enc->memory_manager);
+  if (!opts) return nullptr;
+  opts->enc = enc;
+  if (source != nullptr) {
+    opts->values = source->values;
+  } else {
+    opts->values.lossless = false;
+  }
+  JxlEncoderOptions* ret = opts.get();
+  enc->encoder_options.emplace_back(std::move(opts));
+  return ret;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetLossless(JxlEncoderOptions* options,
+                                              const JXL_BOOL lossless) {
+  options->values.lossless = lossless;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetEffort(JxlEncoderOptions* options,
+                                            const int effort) {
+  if (effort < 3 || effort > 9) {
+    return JXL_ENC_ERROR;
+  }
+  options->values.cparams.speed_tier = static_cast<jxl::SpeedTier>(10 - effort);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetDistance(JxlEncoderOptions* options,
+                                              float distance) {
+  if (distance < 0 || distance > 15) {
+    return JXL_ENC_ERROR;
+  }
+  options->values.cparams.butteraugli_distance = distance;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoder* JxlEncoderCreate(const JxlMemoryManager* memory_manager) {
+  JxlMemoryManager local_memory_manager;
+  if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager)) {
+    return nullptr;
+  }
+
+  void* alloc =
+      jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlEncoder));
+  if (!alloc) return nullptr;
+  JxlEncoder* enc = new (alloc) JxlEncoder();
+  enc->memory_manager = local_memory_manager;
+
+  return enc;
+}
+
+void JxlEncoderReset(JxlEncoder* enc) {
+  enc->thread_pool.reset();
+  enc->input_frame_queue.clear();
+  enc->encoder_options.clear();
+  enc->output_byte_queue.clear();
+  enc->wrote_bytes = false;
+  enc->metadata = jxl::CodecMetadata();
+  enc->last_used_cparams = jxl::CompressParams();
+  enc->input_closed = false;
+  enc->basic_info_set = false;
+  enc->color_encoding_set = false;
+}
+
+void JxlEncoderDestroy(JxlEncoder* enc) {
+  if (enc) {
+    // Call destructor directly since custom free function is used.
+    enc->~JxlEncoder();
+    jxl::MemoryManagerFree(&enc->memory_manager, enc);
+  }
+}
+
+JxlEncoderStatus JxlEncoderUseContainer(JxlEncoder* enc,
+                                        JXL_BOOL use_container) {
+  enc->use_container = static_cast<bool>(use_container);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderStoreJPEGMetadata(JxlEncoder* enc,
+                                             JXL_BOOL store_jpeg_metadata) {
+  enc->store_jpeg_metadata = static_cast<bool>(store_jpeg_metadata);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetParallelRunner(JxlEncoder* enc,
+                                             JxlParallelRunner parallel_runner,
+                                             void* parallel_runner_opaque) {
+  if (enc->thread_pool) return JXL_API_ERROR("parallel runner already set");
+  enc->thread_pool = jxl::MemoryManagerMakeUnique<jxl::ThreadPool>(
+      &enc->memory_manager, parallel_runner, parallel_runner_opaque);
+  if (!enc->thread_pool) {
+    return JXL_ENC_ERROR;
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderAddJPEGFrame(const JxlEncoderOptions* options,
+                                        const uint8_t* buffer, size_t size) {
+  if (options->enc->input_closed) {
+    return JXL_ENC_ERROR;
+  }
+
+  jxl::CodecInOut io;
+  if (!jxl::jpeg::DecodeImageJPG(jxl::Span<const uint8_t>(buffer, size), &io)) {
+    return JXL_ENC_ERROR;
+  }
+
+  if (!options->enc->color_encoding_set) {
+    if (!SetColorEncodingFromJpegData(
+            *io.Main().jpeg_data, &options->enc->metadata.m.color_encoding)) {
+      return JXL_ENC_ERROR;
+    }
+  }
+
+  if (!options->enc->basic_info_set) {
+    JxlBasicInfo basic_info;
+    basic_info.exponent_bits_per_sample = 0;
+    basic_info.bits_per_sample = 8;
+    basic_info.alpha_bits = 0;
+    basic_info.alpha_exponent_bits = 0;
+    basic_info.xsize = io.Main().jpeg_data->width;
+    basic_info.ysize = io.Main().jpeg_data->height;
+    basic_info.uses_original_profile = true;
+    if (JxlEncoderSetBasicInfo(options->enc, &basic_info) != JXL_ENC_SUCCESS) {
+      return JXL_ENC_ERROR;
+    }
+  }
+
+  if (options->enc->metadata.m.xyb_encoded) {
+    // Can't XYB encode a lossless JPEG.
+    return JXL_ENC_ERROR;
+  }
+
+  if (options->enc->store_jpeg_metadata) {
+    jxl::jpeg::JPEGData data_in = *io.Main().jpeg_data;
+    jxl::PaddedBytes jpeg_data;
+    if (!EncodeJPEGData(data_in, &jpeg_data)) {
+      return JXL_ENC_ERROR;
+    }
+    options->enc->jpeg_metadata = std::vector<uint8_t>(
+        jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+  }
+
+  auto queued_frame = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedFrame>(
+      &options->enc->memory_manager,
+      // JxlEncoderQueuedFrame is a struct with no constructors, so we use the
+      // default move constructor there.
+      jxl::JxlEncoderQueuedFrame{options->values,
+                                 jxl::ImageBundle(&options->enc->metadata.m)});
+  if (!queued_frame) {
+    return JXL_ENC_ERROR;
+  }
+  queued_frame->frame.SetFromImage(std::move(*io.Main().color()),
+                                   io.Main().c_current());
+  queued_frame->frame.jpeg_data = std::move(io.Main().jpeg_data);
+  queued_frame->frame.color_transform = io.Main().color_transform;
+  queued_frame->frame.chroma_subsampling = io.Main().chroma_subsampling;
+
+  if (options->values.lossless) {
+    queued_frame->option_values.cparams.SetLossless();
+  }
+
+  options->enc->input_frame_queue.emplace_back(std::move(queued_frame));
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderAddImageFrame(const JxlEncoderOptions* options,
+                                         const JxlPixelFormat* pixel_format,
+                                         const void* buffer, size_t size) {
+  if (!options->enc->basic_info_set || !options->enc->color_encoding_set) {
+    return JXL_ENC_ERROR;
+  }
+
+  if (options->enc->input_closed) {
+    return JXL_ENC_ERROR;
+  }
+
+  auto queued_frame = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedFrame>(
+      &options->enc->memory_manager,
+      // JxlEncoderQueuedFrame is a struct with no constructors, so we use the
+      // default move constructor there.
+      jxl::JxlEncoderQueuedFrame{options->values,
+                                 jxl::ImageBundle(&options->enc->metadata.m)});
+  if (!queued_frame) {
+    return JXL_ENC_ERROR;
+  }
+
+  if (pixel_format->data_type == JXL_TYPE_FLOAT16) {
+    // float16 is currently only supported in the decoder
+    return JXL_ENC_ERROR;
+  }
+
+  jxl::ColorEncoding c_current;
+  if (options->enc->metadata.m.xyb_encoded) {
+    if (pixel_format->data_type == JXL_TYPE_FLOAT) {
+      c_current =
+          jxl::ColorEncoding::LinearSRGB(pixel_format->num_channels < 3);
+    } else {
+      c_current = jxl::ColorEncoding::SRGB(pixel_format->num_channels < 3);
+    }
+  } else {
+    c_current = options->enc->metadata.m.color_encoding;
+  }
+
+  if (!jxl::BufferToImageBundle(*pixel_format, options->enc->metadata.xsize(),
+                                options->enc->metadata.ysize(), buffer, size,
+                                options->enc->thread_pool.get(), c_current,
+                                &(queued_frame->frame))) {
+    return JXL_ENC_ERROR;
+  }
+
+  if (options->values.lossless) {
+    queued_frame->option_values.cparams.SetLossless();
+  }
+
+  options->enc->input_frame_queue.emplace_back(std::move(queued_frame));
+  return JXL_ENC_SUCCESS;
+}
+
+void JxlEncoderCloseInput(JxlEncoder* enc) { enc->input_closed = true; }
+
+JxlEncoderStatus JxlEncoderProcessOutput(JxlEncoder* enc, uint8_t** next_out,
+                                         size_t* avail_out) {
+  while (*avail_out > 0 &&
+         (!enc->output_byte_queue.empty() || !enc->input_frame_queue.empty())) {
+    if (!enc->output_byte_queue.empty()) {
+      size_t to_copy = std::min(*avail_out, enc->output_byte_queue.size());
+      memcpy(static_cast<void*>(*next_out), enc->output_byte_queue.data(),
+             to_copy);
+      *next_out += to_copy;
+      *avail_out -= to_copy;
+      enc->output_byte_queue.erase(enc->output_byte_queue.begin(),
+                                   enc->output_byte_queue.begin() + to_copy);
+    } else if (!enc->input_frame_queue.empty()) {
+      if (enc->RefillOutputByteQueue() != JXL_ENC_SUCCESS) {
+        return JXL_ENC_ERROR;
+      }
+    }
+  }
+
+  if (!enc->output_byte_queue.empty() || !enc->input_frame_queue.empty()) {
+    return JXL_ENC_NEED_MORE_OUTPUT;
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetDecodingSpeed(JxlEncoderOptions* options,
+                                                   int tier) {
+  if (tier < 0 || tier > 4) {
+    return JXL_ENC_ERROR;
+  }
+  options->values.cparams.decoding_speed_tier = tier;
+  return JXL_ENC_SUCCESS;
+}
+
+void JxlColorEncodingSetToSRGB(JxlColorEncoding* color_encoding,
+                               JXL_BOOL is_gray) {
+  ConvertInternalToExternalColorEncoding(jxl::ColorEncoding::SRGB(is_gray),
+                                         color_encoding);
+}
+
+void JxlColorEncodingSetToLinearSRGB(JxlColorEncoding* color_encoding,
+                                     JXL_BOOL is_gray) {
+  ConvertInternalToExternalColorEncoding(
+      jxl::ColorEncoding::LinearSRGB(is_gray), color_encoding);
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/encode_internal.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/encode_internal.h
new file mode 100644
index 0000000000..f4ade2872a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/encode_internal.h
@@ -0,0 +1,120 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+#ifndef LIB_JXL_ENCODE_INTERNAL_H_
+#define LIB_JXL_ENCODE_INTERNAL_H_
+
+#include <vector>
+
+#include "jxl/encode.h"
+#include "jxl/memory_manager.h"
+#include "jxl/parallel_runner.h"
+#include "jxl/types.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/memory_manager_internal.h"
+
+namespace jxl {
+
+typedef struct JxlEncoderOptionsValuesStruct {
+  // lossless is a separate setting from cparams because it is a combination
+  // setting that overrides multiple settings inside of cparams.
+  bool lossless;
+  jxl::CompressParams cparams;
+} JxlEncoderOptionsValues;
+
+typedef struct JxlEncoderQueuedFrame {
+  JxlEncoderOptionsValues option_values;
+  jxl::ImageBundle frame;
+} JxlEncoderQueuedFrame;
+
+typedef std::array<uint8_t, 4> BoxType;
+
+// Utility function that makes a BoxType from a null terminated string literal.
+constexpr BoxType MakeBoxType(const char (&type)[5]) {
+  return BoxType({static_cast<uint8_t>(type[0]), static_cast<uint8_t>(type[1]),
+                  static_cast<uint8_t>(type[2]),
+                  static_cast<uint8_t>(type[3])});
+}
+
+constexpr unsigned char kContainerHeader[] = {
+    0,   0,   0, 0xc, 'J',  'X', 'L', ' ', 0xd, 0xa, 0x87,
+    0xa, 0,   0, 0,   0x14, 'f', 't', 'y', 'p', 'j', 'x',
+    'l', ' ', 0, 0,   0,    0,   'j', 'x', 'l', ' '};
+
+namespace {
+template <typename T>
+uint8_t* Extend(T* vec, size_t size) {
+  vec->resize(vec->size() + size, 0);
+  return vec->data() + vec->size() - size;
+}
+}  // namespace
+
+// Appends a JXL container box header with given type, size, and unbounded
+// properties to output.
+template <typename T>
+void AppendBoxHeader(const jxl::BoxType& type, size_t size, bool unbounded,
+                     T* output) {
+  uint64_t box_size = 0;
+  bool large_size = false;
+  if (!unbounded) {
+    box_size = size + 8;
+    if (box_size >= 0x100000000ull) {
+      large_size = true;
+    }
+  }
+
+  StoreBE32(large_size ? 1 : box_size, Extend(output, 4));
+
+  for (size_t i = 0; i < 4; i++) {
+    output->push_back(*(type.data() + i));
+  }
+
+  if (large_size) {
+    StoreBE64(box_size, Extend(output, 8));
+  }
+}
+
+}  // namespace jxl
+
+struct JxlEncoderStruct {
+  JxlMemoryManager memory_manager;
+  jxl::MemoryManagerUniquePtr<jxl::ThreadPool> thread_pool{
+      nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)};
+  std::vector<jxl::MemoryManagerUniquePtr<JxlEncoderOptions>> encoder_options;
+
+  std::vector<jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedFrame>>
+      input_frame_queue;
+  std::vector<uint8_t> output_byte_queue;
+
+  bool use_container = false;
+  bool store_jpeg_metadata = false;
+  jxl::CodecMetadata metadata;
+  std::vector<uint8_t> jpeg_metadata;
+
+  bool wrote_bytes = false;
+  jxl::CompressParams last_used_cparams;
+
+  bool input_closed = false;
+  bool basic_info_set = false;
+  bool color_encoding_set = false;
+
+  // Takes the first frame in the input_frame_queue, encodes it, and appends the
+  // bytes to the output_byte_queue.
+  JxlEncoderStatus RefillOutputByteQueue();
+
+  // Appends the bytes of a JXL box header with the provided type and size to
+  // the end of the output_byte_queue. If unbounded is true, the size won't be
+  // added to the header and the box will be assumed to continue until EOF.
+  void AppendBoxHeader(const jxl::BoxType& type, size_t size, bool unbounded);
+};
+
+struct JxlEncoderOptionsStruct {
+  JxlEncoder* enc;
+  jxl::JxlEncoderOptionsValues values;
+};
+
+#endif  // LIB_JXL_ENCODE_INTERNAL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/encode_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/encode_test.cc
new file mode 100644
index 0000000000..22425a8292
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/encode_test.cc
@@ -0,0 +1,597 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "jxl/encode.h"
+
+#include "gtest/gtest.h"
+#include "jxl/encode_cxx.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+
+TEST(EncodeTest, AddFrameAfterCloseInputTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  JxlEncoderCloseInput(enc.get());
+
+  size_t xsize = 64;
+  size_t ysize = 64;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  jxl::CodecInOut input_io =
+      jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = false;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding,
+                            /*is_gray=*/pixel_format.num_channels < 3);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+  JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderAddImageFrame(options, &pixel_format, pixels.data(),
+                                    pixels.size()));
+}
+
+TEST(EncodeTest, AddJPEGAfterCloseTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  JxlEncoderCloseInput(enc.get());
+
+  const std::string jpeg_path =
+      "imagecompression.info/flower_foveon.png.im_q85_420.jpg";
+  const jxl::PaddedBytes orig = jxl::ReadTestData(jpeg_path);
+  jxl::CodecInOut orig_io;
+  ASSERT_TRUE(
+      SetFromBytes(jxl::Span<const uint8_t>(orig), &orig_io, /*pool=*/nullptr));
+
+  JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderAddJPEGFrame(options, orig.data(), orig.size()));
+}
+
+TEST(EncodeTest, AddFrameBeforeColorEncodingTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  size_t xsize = 64;
+  size_t ysize = 64;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  jxl::CodecInOut input_io =
+      jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = false;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderAddImageFrame(options, &pixel_format, pixels.data(),
+                                    pixels.size()));
+}
+
+TEST(EncodeTest, AddFrameBeforeBasicInfoTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+
+  size_t xsize = 64;
+  size_t ysize = 64;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  jxl::CodecInOut input_io =
+      jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding,
+                            /*is_gray=*/pixel_format.num_channels < 3);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+  JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+  EXPECT_EQ(JXL_ENC_ERROR,
+            JxlEncoderAddImageFrame(options, &pixel_format, pixels.data(),
+                                    pixels.size()));
+}
+
+TEST(EncodeTest, DefaultAllocTest) {
+  JxlEncoder* enc = JxlEncoderCreate(nullptr);
+  EXPECT_NE(nullptr, enc);
+  JxlEncoderDestroy(enc);
+}
+
+TEST(EncodeTest, CustomAllocTest) {
+  struct CalledCounters {
+    int allocs = 0;
+    int frees = 0;
+  } counters;
+
+  JxlMemoryManager mm;
+  mm.opaque = &counters;
+  mm.alloc = [](void* opaque, size_t size) {
+    reinterpret_cast<CalledCounters*>(opaque)->allocs++;
+    return malloc(size);
+  };
+  mm.free = [](void* opaque, void* address) {
+    reinterpret_cast<CalledCounters*>(opaque)->frees++;
+    free(address);
+  };
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(&mm);
+    EXPECT_NE(nullptr, enc.get());
+    EXPECT_LE(1, counters.allocs);
+    EXPECT_EQ(0, counters.frees);
+  }
+  EXPECT_LE(1, counters.frees);
+}
+
+TEST(EncodeTest, DefaultParallelRunnerTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetParallelRunner(enc.get(), nullptr, nullptr));
+}
+
+void VerifyFrameEncoding(size_t xsize, size_t ysize, JxlEncoder* enc,
+                         const JxlEncoderOptions* options) {
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  jxl::CodecInOut input_io =
+      jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  if (options->values.lossless) {
+    basic_info.uses_original_profile = true;
+  } else {
+    basic_info.uses_original_profile = false;
+  }
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding,
+                            /*is_gray=*/pixel_format.num_channels < 3);
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding));
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(options, &pixel_format, pixels.data(),
+                                    pixels.size()));
+  JxlEncoderCloseInput(enc);
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() * 2);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  compressed.resize(next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+  jxl::DecompressParams dparams;
+  jxl::CodecInOut decoded_io;
+  EXPECT_TRUE(jxl::DecodeFile(
+      dparams, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+      &decoded_io, /*pool=*/nullptr));
+
+  jxl::ButteraugliParams ba;
+  EXPECT_LE(ButteraugliDistance(input_io, decoded_io, ba,
+                                /*distmap=*/nullptr, nullptr),
+            3.0f);
+}
+
+void VerifyFrameEncoding(JxlEncoder* enc, const JxlEncoderOptions* options) {
+  VerifyFrameEncoding(63, 129, enc, options);
+}
+
+TEST(EncodeTest, FrameEncodingTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+  VerifyFrameEncoding(enc.get(), JxlEncoderOptionsCreate(enc.get(), nullptr));
+}
+
+TEST(EncodeTest, EncoderResetTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+  VerifyFrameEncoding(50, 200, enc.get(),
+                      JxlEncoderOptionsCreate(enc.get(), nullptr));
+  // Encoder should become reusable for a new image from scratch after using
+  // reset.
+  JxlEncoderReset(enc.get());
+  VerifyFrameEncoding(157, 77, enc.get(),
+                      JxlEncoderOptionsCreate(enc.get(), nullptr));
+}
+
+TEST(EncodeTest, OptionsTest) {
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderOptionsSetEffort(options, 5));
+    VerifyFrameEncoding(enc.get(), options);
+    EXPECT_EQ(jxl::SpeedTier::kHare, enc->last_used_cparams.speed_tier);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+    // Lower than currently supported values
+    EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderOptionsSetEffort(options, 2));
+    // Higher than currently supported values
+    EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderOptionsSetEffort(options, 10));
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderOptionsSetLossless(options, JXL_TRUE));
+    VerifyFrameEncoding(enc.get(), options);
+    EXPECT_EQ(true, enc->last_used_cparams.IsLossless());
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderOptionsSetDistance(options, 0.5));
+    VerifyFrameEncoding(enc.get(), options);
+    EXPECT_EQ(0.5, enc->last_used_cparams.butteraugli_distance);
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+    // Disallowed negative distance
+    EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderOptionsSetDistance(options, -1));
+  }
+
+  {
+    JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+    EXPECT_NE(nullptr, enc.get());
+    JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+    EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderOptionsSetDecodingSpeed(options, 2));
+    VerifyFrameEncoding(enc.get(), options);
+    EXPECT_EQ(2, enc->last_used_cparams.decoding_speed_tier);
+  }
+}
+
+namespace {
+// Returns a copy of buf from offset to offset+size, or a new zeroed vector if
+// the result would have been out of bounds taking integer overflow into
+// account.
+const std::vector<uint8_t> SliceSpan(const jxl::Span<const uint8_t>& buf,
+                                     size_t offset, size_t size) {
+  if (offset + size >= buf.size()) {
+    return std::vector<uint8_t>(size, 0);
+  }
+  if (offset + size < offset) {
+    return std::vector<uint8_t>(size, 0);
+  }
+  return std::vector<uint8_t>(buf.data() + offset, buf.data() + offset + size);
+}
+
+struct Box {
+  // The type of the box.
+  // If "uuid", use extended_type instead
+  char type[4] = {0, 0, 0, 0};
+
+  // The extended_type is only used when type == "uuid".
+  // Extended types are not used in JXL. However, the box format itself
+  // supports this so they are handled correctly.
+  char extended_type[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  // Box data.
+  jxl::Span<const uint8_t> data = jxl::Span<const uint8_t>(nullptr, 0);
+
+  // If the size is not given, the datasize extends to the end of the file.
+  // If this field is false, the size field is not encoded when the box is
+  // serialized.
+  bool data_size_given = true;
+
+  // If successful, returns true and sets `in` to be the rest data (if any).
+  // If `in` contains a box with a size larger than `in.size()`, will not
+  // modify `in`, and will return true but the data `Span<uint8_t>` will
+  // remain set to nullptr.
+  // If unsuccessful, returns error and doesn't modify `in`.
+  jxl::Status Decode(jxl::Span<const uint8_t>* in) {
+    // Total box_size including this header itself.
+    uint64_t box_size = LoadBE32(SliceSpan(*in, 0, 4).data());
+    size_t pos = 4;
+
+    memcpy(type, SliceSpan(*in, pos, 4).data(), 4);
+    pos += 4;
+
+    if (box_size == 1) {
+      // If the size is 1, it indicates extended size read from 64-bit integer.
+      box_size = LoadBE64(SliceSpan(*in, pos, 8).data());
+      pos += 8;
+    }
+
+    if (!memcmp("uuid", type, 4)) {
+      memcpy(extended_type, SliceSpan(*in, pos, 16).data(), 16);
+      pos += 16;
+    }
+
+    // This is the end of the box header, the box data begins here. Handle
+    // the data size now.
+    const size_t header_size = pos;
+
+    if (box_size != 0) {
+      if (box_size < header_size) {
+        return JXL_FAILURE("Invalid box size");
+      }
+      if (box_size > in->size()) {
+        // The box is fine, but the input is too short.
+        return true;
+      }
+      data_size_given = true;
+      data = jxl::Span<const uint8_t>(in->data() + header_size,
+                                      box_size - header_size);
+    } else {
+      data_size_given = false;
+      data = jxl::Span<const uint8_t>(in->data() + header_size,
+                                      in->size() - header_size);
+    }
+
+    *in = jxl::Span<const uint8_t>(in->data() + header_size + data.size(),
+                                   in->size() - header_size - data.size());
+    return true;
+  }
+};
+
+struct Container {
+  std::vector<Box> boxes;
+
+  // If successful, returns true and sets `in` to be the rest data (if any).
+  // If unsuccessful, returns error and doesn't modify `in`.
+  jxl::Status Decode(jxl::Span<const uint8_t>* in) {
+    boxes.clear();
+
+    Box signature_box;
+    JXL_RETURN_IF_ERROR(signature_box.Decode(in));
+    if (memcmp("JXL ", signature_box.type, 4) != 0) {
+      return JXL_FAILURE("Invalid magic signature");
+    }
+    if (signature_box.data.size() != 4)
+      return JXL_FAILURE("Invalid magic signature");
+    if (signature_box.data[0] != 0xd || signature_box.data[1] != 0xa ||
+        signature_box.data[2] != 0x87 || signature_box.data[3] != 0xa) {
+      return JXL_FAILURE("Invalid magic signature");
+    }
+
+    Box ftyp_box;
+    JXL_RETURN_IF_ERROR(ftyp_box.Decode(in));
+    if (memcmp("ftyp", ftyp_box.type, 4) != 0) {
+      return JXL_FAILURE("Invalid ftyp");
+    }
+    if (ftyp_box.data.size() != 12) return JXL_FAILURE("Invalid ftyp");
+    const char* expected = "jxl \0\0\0\0jxl ";
+    if (memcmp(expected, ftyp_box.data.data(), 12) != 0)
+      return JXL_FAILURE("Invalid ftyp");
+
+    while (in->size() > 0) {
+      Box box = {};
+      JXL_RETURN_IF_ERROR(box.Decode(in));
+      if (box.data.data() == nullptr) {
+        // The decoding encountered a box, but not enough data yet.
+        return true;
+      }
+      boxes.emplace_back(box);
+    }
+
+    return true;
+  }
+};
+
+}  // namespace
+
+TEST(EncodeTest, SingleFrameBoundedJXLCTest) {
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  EXPECT_NE(nullptr, enc.get());
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc.get(),
+			  true));
+  JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+
+  size_t xsize = 71;
+  size_t ysize = 23;
+  JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+  std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = false;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+  JxlColorEncoding color_encoding;
+  JxlColorEncodingSetToSRGB(&color_encoding,
+                            /*is_gray=*/false);
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(options, &pixel_format, pixels.data(),
+                                    pixels.size()));
+  JxlEncoderCloseInput(enc.get());
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() * 2);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  compressed.resize(next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+  Container container = {};
+  jxl::Span<const uint8_t> encoded_span =
+      jxl::Span<const uint8_t>(compressed.data(), compressed.size());
+  EXPECT_TRUE(container.Decode(&encoded_span));
+  EXPECT_EQ(0, encoded_span.size());
+  EXPECT_EQ(0, memcmp("jxlc", container.boxes[0].type, 4));
+  EXPECT_EQ(true, container.boxes[0].data_size_given);
+}
+
+TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionTest)) {
+  const std::string jpeg_path =
+      "imagecompression.info/flower_foveon.png.im_q85_420.jpg";
+  const jxl::PaddedBytes orig = jxl::ReadTestData(jpeg_path);
+  jxl::CodecInOut orig_io;
+  ASSERT_TRUE(
+      SetFromBytes(jxl::Span<const uint8_t>(orig), &orig_io, /*pool=*/nullptr));
+
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc.get(), JXL_TRUE));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE));
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddJPEGFrame(options, orig.data(), orig.size()));
+  JxlEncoderCloseInput(enc.get());
+
+  std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+  uint8_t* next_out = compressed.data();
+  size_t avail_out = compressed.size() - (next_out - compressed.data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed.data();
+      compressed.resize(compressed.size() * 2);
+      next_out = compressed.data() + offset;
+      avail_out = compressed.size() - offset;
+    }
+  }
+  compressed.resize(next_out - compressed.data());
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+  Container container = {};
+  jxl::Span<const uint8_t> encoded_span =
+      jxl::Span<const uint8_t>(compressed.data(), compressed.size());
+  EXPECT_TRUE(container.Decode(&encoded_span));
+  EXPECT_EQ(0, encoded_span.size());
+  EXPECT_EQ(0, memcmp("jbrd", container.boxes[0].type, 4));
+  EXPECT_EQ(0, memcmp("jxlc", container.boxes[1].type, 4));
+
+  jxl::CodecInOut decoded_io;
+  decoded_io.Main().jpeg_data = jxl::make_unique<jxl::jpeg::JPEGData>();
+  EXPECT_TRUE(jxl::jpeg::DecodeJPEGData(container.boxes[0].data,
+                                        decoded_io.Main().jpeg_data.get()));
+
+  jxl::DecompressParams dparams;
+  dparams.keep_dct = true;
+  EXPECT_TRUE(
+      jxl::DecodeFile(dparams, container.boxes[1].data, &decoded_io, nullptr));
+
+  std::vector<uint8_t> decoded_jpeg_bytes;
+  auto write = [&decoded_jpeg_bytes](const uint8_t* buf, size_t len) {
+    decoded_jpeg_bytes.insert(decoded_jpeg_bytes.end(), buf, buf + len);
+    return len;
+  };
+  EXPECT_TRUE(jxl::jpeg::WriteJpeg(*decoded_io.Main().jpeg_data, write));
+
+  EXPECT_EQ(decoded_jpeg_bytes.size(), orig.size());
+  EXPECT_EQ(0, memcmp(decoded_jpeg_bytes.data(), orig.data(), orig.size()));
+}
+
+TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGFrameTest)) {
+  for (int skip_basic_info = 0; skip_basic_info < 2; skip_basic_info++) {
+    for (int skip_color_encoding = 0; skip_color_encoding < 2;
+         skip_color_encoding++) {
+      const std::string jpeg_path =
+          "imagecompression.info/flower_foveon.png.im_q85_420.jpg";
+      const jxl::PaddedBytes orig = jxl::ReadTestData(jpeg_path);
+      jxl::CodecInOut orig_io;
+      ASSERT_TRUE(SetFromBytes(jxl::Span<const uint8_t>(orig), &orig_io,
+                               /*pool=*/nullptr));
+
+      JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+      JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+
+      if (!skip_basic_info) {
+        JxlBasicInfo basic_info;
+        basic_info.exponent_bits_per_sample = 0;
+        basic_info.bits_per_sample = 8;
+        basic_info.alpha_bits = 0;
+        basic_info.alpha_exponent_bits = 0;
+        basic_info.xsize = orig_io.xsize();
+        basic_info.ysize = orig_io.ysize();
+        basic_info.uses_original_profile = true;
+        EXPECT_EQ(JXL_ENC_SUCCESS,
+                  JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+      }
+      if (!skip_color_encoding) {
+        JxlColorEncoding color_encoding;
+        JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
+        EXPECT_EQ(JXL_ENC_SUCCESS,
+                  JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+      }
+      EXPECT_EQ(JXL_ENC_SUCCESS,
+                JxlEncoderAddJPEGFrame(options, orig.data(), orig.size()));
+      JxlEncoderCloseInput(enc.get());
+
+      std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+      uint8_t* next_out = compressed.data();
+      size_t avail_out = compressed.size() - (next_out - compressed.data());
+      JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+      while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+        process_result =
+            JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+        if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+          size_t offset = next_out - compressed.data();
+          compressed.resize(compressed.size() * 2);
+          next_out = compressed.data() + offset;
+          avail_out = compressed.size() - offset;
+        }
+      }
+      compressed.resize(next_out - compressed.data());
+      EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+      jxl::DecompressParams dparams;
+      jxl::CodecInOut decoded_io;
+      EXPECT_TRUE(jxl::DecodeFile(
+          dparams,
+          jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+          &decoded_io, /*pool=*/nullptr));
+
+      jxl::ButteraugliParams ba;
+      EXPECT_LE(ButteraugliDistance(orig_io, decoded_io, ba,
+                                    /*distmap=*/nullptr, nullptr),
+                2.5f);
+    }
+  }
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc
new file mode 100644
index 0000000000..40edd10445
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.cc
@@ -0,0 +1,70 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/entropy_coder.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+Status DecodeBlockCtxMap(BitReader* br, BlockCtxMap* block_ctx_map) {
+  auto& dct = block_ctx_map->dc_thresholds;
+  auto& qft = block_ctx_map->qf_thresholds;
+  auto& ctx_map = block_ctx_map->ctx_map;
+  bool is_default = br->ReadFixedBits<1>();
+  if (is_default) {
+    *block_ctx_map = BlockCtxMap();
+    return true;
+  }
+  block_ctx_map->num_dc_ctxs = 1;
+  for (int j : {0, 1, 2}) {
+    dct[j].resize(br->ReadFixedBits<4>());
+    block_ctx_map->num_dc_ctxs *= dct[j].size() + 1;
+    for (int& i : dct[j]) {
+      i = UnpackSigned(U32Coder::Read(kDCThresholdDist, br));
+    }
+  }
+  qft.resize(br->ReadFixedBits<4>());
+  for (uint32_t& i : qft) {
+    i = U32Coder::Read(kQFThresholdDist, br) + 1;
+  }
+
+  if (block_ctx_map->num_dc_ctxs * (qft.size() + 1) > 64) {
+    return JXL_FAILURE("Invalid block context map: too big");
+  }
+
+  ctx_map.resize(3 * kNumOrders * block_ctx_map->num_dc_ctxs *
+                 (qft.size() + 1));
+  JXL_RETURN_IF_ERROR(DecodeContextMap(&ctx_map, &block_ctx_map->num_ctxs, br));
+  if (block_ctx_map->num_ctxs > 16) {
+    return JXL_FAILURE("Invalid block context map: too many distinct contexts");
+  }
+  return true;
+}
+
+// constexpr uint8_t jxl::kDefaultCtxMap[];  // from ac_context.h
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.h
new file mode 100644
index 0000000000..e4afa7a631
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENTROPY_CODER_H_
+#define LIB_JXL_ENTROPY_CODER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+// Entropy coding and context modeling of DC and AC coefficients, as well as AC
+// strategy and quantization field.
+
+namespace jxl {
+
+static JXL_INLINE int32_t PredictFromTopAndLeft(
+    const int32_t* const JXL_RESTRICT row_top,
+    const int32_t* const JXL_RESTRICT row, size_t x, int32_t default_val) {
+  if (x == 0) {
+    return row_top == nullptr ? default_val : row_top[x];
+  }
+  if (row_top == nullptr) {
+    return row[x - 1];
+  }
+  return (row_top[x] + row[x - 1] + 1) / 2;
+}
+
+static constexpr U32Enc kDCThresholdDist(Bits(4), BitsOffset(8, 16),
+                                         BitsOffset(16, 272),
+                                         BitsOffset(32, 65808));
+
+static constexpr U32Enc kQFThresholdDist(Bits(2), BitsOffset(3, 4),
+                                         BitsOffset(5, 12), BitsOffset(8, 44));
+
+Status DecodeBlockCtxMap(BitReader* br, BlockCtxMap* block_ctx_map);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ENTROPY_CODER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder_test.cc
new file mode 100644
index 0000000000..cce1713d2b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/entropy_coder_test.cc
@@ -0,0 +1,70 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO(deymo): Move these tests to dec_ans.h and common.h
+
+#include <stdint.h>
+
+#include <random>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+
+namespace jxl {
+namespace {
+
+TEST(EntropyCoderTest, PackUnpack) {
+  for (int32_t i = -31; i < 32; ++i) {
+    uint32_t packed = PackSigned(i);
+    EXPECT_LT(packed, 63);
+    int32_t unpacked = UnpackSigned(packed);
+    EXPECT_EQ(i, unpacked);
+  }
+}
+
+struct DummyBitReader {
+  uint32_t nbits, bits;
+  void Consume(uint32_t nbits) {}
+  uint32_t PeekBits(uint32_t n) {
+    EXPECT_EQ(n, nbits);
+    return bits;
+  }
+};
+
+void HybridUintRoundtrip(HybridUintConfig config, size_t limit = 1 << 24) {
+  std::mt19937 rng(0);
+  std::uniform_int_distribution<uint32_t> dist(0, limit);
+  constexpr size_t kNumIntegers = 1 << 20;
+  std::vector<uint32_t> integers(kNumIntegers);
+  std::vector<uint32_t> token(kNumIntegers);
+  std::vector<uint32_t> nbits(kNumIntegers);
+  std::vector<uint32_t> bits(kNumIntegers);
+  for (size_t i = 0; i < kNumIntegers; i++) {
+    integers[i] = dist(rng);
+    config.Encode(integers[i], &token[i], &nbits[i], &bits[i]);
+  }
+  for (size_t i = 0; i < kNumIntegers; i++) {
+    DummyBitReader br{nbits[i], bits[i]};
+    EXPECT_EQ(integers[i],
+              ANSSymbolReader::ReadHybridUintConfig(config, token[i], &br));
+  }
+}
+
+TEST(HybridUintTest, Test000) {
+  HybridUintRoundtrip(HybridUintConfig{0, 0, 0});
+}
+TEST(HybridUintTest, Test411) {
+  HybridUintRoundtrip(HybridUintConfig{4, 1, 1});
+}
+TEST(HybridUintTest, Test420) {
+  HybridUintRoundtrip(HybridUintConfig{4, 2, 0});
+}
+TEST(HybridUintTest, Test421) {
+  HybridUintRoundtrip(HybridUintConfig{4, 2, 1}, 256);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc
new file mode 100644
index 0000000000..1701203d8d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/epf.cc
@@ -0,0 +1,684 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Edge-preserving smoothing: weighted average based on L1 patch similarity.
+
+#include "lib/jxl/epf.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <atomic>
+#include <numeric>  // std::accumulate
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/epf.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/filters.h"
+#include "lib/jxl/filters_internal.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Vec;
+
+// The EPF logic treats 8x8 blocks as one unit, each with their own sigma.
+// It should be possible to do two blocks at a time in AVX3 vectors, at some
+// increase in complexity (broadcasting sigma0/1 to lanes 0..7 and 8..15).
+using DF = HWY_CAPPED(float, GroupBorderAssigner::kPaddingXRound);
+using DU = HWY_CAPPED(uint32_t, GroupBorderAssigner::kPaddingXRound);
+
+// kInvSigmaNum / 0.3
+constexpr float kMinSigma = -3.90524291751269967465540850526868f;
+
+DF df;
+
+JXL_INLINE Vec<DF> Weight(Vec<DF> sad, Vec<DF> inv_sigma, Vec<DF> thres) {
+  auto v = MulAdd(sad, inv_sigma, Set(DF(), 1.0f));
+  auto v2 = v * v;
+  return IfThenZeroElse(v <= thres, v2);
+}
+
+template <bool aligned>
+JXL_INLINE void AddPixelStep1(int row, const FilterRows& rows, size_t x,
+                              Vec<DF> sad, Vec<DF> inv_sigma,
+                              const LoopFilter& lf, Vec<DF>* JXL_RESTRICT X,
+                              Vec<DF>* JXL_RESTRICT Y, Vec<DF>* JXL_RESTRICT B,
+                              Vec<DF>* JXL_RESTRICT w) {
+  auto cx = aligned ? Load(DF(), rows.GetInputRow(row, 0) + x)
+                    : LoadU(DF(), rows.GetInputRow(row, 0) + x);
+  auto cy = aligned ? Load(DF(), rows.GetInputRow(row, 1) + x)
+                    : LoadU(DF(), rows.GetInputRow(row, 1) + x);
+  auto cb = aligned ? Load(DF(), rows.GetInputRow(row, 2) + x)
+                    : LoadU(DF(), rows.GetInputRow(row, 2) + x);
+
+  auto weight = Weight(sad, inv_sigma, Set(df, lf.epf_pass1_zeroflush));
+  *w += weight;
+  *X = MulAdd(weight, cx, *X);
+  *Y = MulAdd(weight, cy, *Y);
+  *B = MulAdd(weight, cb, *B);
+}
+
+template <bool aligned>
+JXL_INLINE void AddPixelStep2(int row, const FilterRows& rows, size_t x,
+                              Vec<DF> rx, Vec<DF> ry, Vec<DF> rb,
+                              Vec<DF> inv_sigma, const LoopFilter& lf,
+                              Vec<DF>* JXL_RESTRICT X, Vec<DF>* JXL_RESTRICT Y,
+                              Vec<DF>* JXL_RESTRICT B,
+                              Vec<DF>* JXL_RESTRICT w) {
+  auto cx = aligned ? Load(DF(), rows.GetInputRow(row, 0) + x)
+                    : LoadU(DF(), rows.GetInputRow(row, 0) + x);
+  auto cy = aligned ? Load(DF(), rows.GetInputRow(row, 1) + x)
+                    : LoadU(DF(), rows.GetInputRow(row, 1) + x);
+  auto cb = aligned ? Load(DF(), rows.GetInputRow(row, 2) + x)
+                    : LoadU(DF(), rows.GetInputRow(row, 2) + x);
+
+  auto sad = AbsDiff(cx, rx) * Set(df, lf.epf_channel_scale[0]);
+  sad = MulAdd(AbsDiff(cy, ry), Set(df, lf.epf_channel_scale[1]), sad);
+  sad = MulAdd(AbsDiff(cb, rb), Set(df, lf.epf_channel_scale[2]), sad);
+
+  auto weight = Weight(sad, inv_sigma, Set(df, lf.epf_pass2_zeroflush));
+
+  *w += weight;
+  *X = MulAdd(weight, cx, *X);
+  *Y = MulAdd(weight, cy, *Y);
+  *B = MulAdd(weight, cb, *B);
+}
+
+template <class D, class V>
+void GaborishVector(const D df, const float* JXL_RESTRICT row_t,
+                    const float* JXL_RESTRICT row_m,
+                    const float* JXL_RESTRICT row_b, const V w0, const V w1,
+                    const V w2, float* JXL_RESTRICT row_out) {
+// Filter x0 is only aligned to blocks (8 floats = 32 bytes). For larger
+// vectors, treat loads as unaligned (we manually align the Store).
+#undef LoadMaybeU
+#if HWY_CAP_GE512
+#define LoadMaybeU LoadU
+#else
+#define LoadMaybeU Load
+#endif
+
+  const auto t = LoadMaybeU(df, row_t);
+  const auto tl = LoadU(df, row_t - 1);
+  const auto tr = LoadU(df, row_t + 1);
+  const auto m = LoadMaybeU(df, row_m);
+  const auto l = LoadU(df, row_m - 1);
+  const auto r = LoadU(df, row_m + 1);
+  const auto b = LoadMaybeU(df, row_b);
+  const auto bl = LoadU(df, row_b - 1);
+  const auto br = LoadU(df, row_b + 1);
+  const auto sum0 = m;
+  const auto sum1 = (l + r) + (t + b);
+  const auto sum2 = (tl + tr) + (bl + br);
+  auto pixels = MulAdd(sum2, w2, MulAdd(sum1, w1, sum0 * w0));
+  Store(pixels, df, row_out);
+}
+
+void GaborishRow(const FilterRows& rows, const LoopFilter& /* lf */,
+                 const FilterWeights& filter_weights, size_t x0, size_t x1,
+                 size_t /*sigma_x_offset*/, size_t /* image_y_mod_8 */) {
+  JXL_DASSERT(x0 % Lanes(df) == 0);
+
+  const float* JXL_RESTRICT gab_weights = filter_weights.gab_weights;
+  for (size_t c = 0; c < 3; c++) {
+    const float* JXL_RESTRICT row_t = rows.GetInputRow(-1, c);
+    const float* JXL_RESTRICT row_m = rows.GetInputRow(0, c);
+    const float* JXL_RESTRICT row_b = rows.GetInputRow(1, c);
+    float* JXL_RESTRICT row_out = rows.GetOutputRow(c);
+
+    size_t ix = x0;
+
+#if HWY_CAP_GE512
+    const HWY_FULL(float) dfull;  // Gaborish is not block-dependent.
+
+    // For AVX3, x0 might only be aligned to 8, not 16; if so, do a capped
+    // vector first to ensure full (Store-only!) alignment, then full vectors.
+    const uintptr_t addr = reinterpret_cast<uintptr_t>(row_out + ix);
+    if ((addr % 64) != 0 && ix < x1) {
+      const auto w0 = Set(df, gab_weights[3 * c + 0]);
+      const auto w1 = Set(df, gab_weights[3 * c + 1]);
+      const auto w2 = Set(df, gab_weights[3 * c + 2]);
+      GaborishVector(df, row_t + ix, row_m + ix, row_b + ix, w0, w1, w2,
+                     row_out + ix);
+      ix += Lanes(df);
+    }
+
+    const auto wfull0 = Set(dfull, gab_weights[3 * c + 0]);
+    const auto wfull1 = Set(dfull, gab_weights[3 * c + 1]);
+    const auto wfull2 = Set(dfull, gab_weights[3 * c + 2]);
+    for (; ix + Lanes(dfull) <= x1; ix += Lanes(dfull)) {
+      GaborishVector(dfull, row_t + ix, row_m + ix, row_b + ix, wfull0, wfull1,
+                     wfull2, row_out + ix);
+    }
+#endif
+
+    // Non-AVX3 loop, or last capped vector for AVX3, if necessary
+    const auto w0 = Set(df, gab_weights[3 * c + 0]);
+    const auto w1 = Set(df, gab_weights[3 * c + 1]);
+    const auto w2 = Set(df, gab_weights[3 * c + 2]);
+    for (; ix < x1; ix += Lanes(df)) {
+      GaborishVector(df, row_t + ix, row_m + ix, row_b + ix, w0, w1, w2,
+                     row_out + ix);
+    }
+  }
+}
+
+// Step 0: 5x5 plus-shaped kernel with 5 SADs per pixel (3x3
+// plus-shaped). So this makes this filter a 7x7 filter.
+void Epf0Row(const FilterRows& rows, const LoopFilter& lf,
+             const FilterWeights& filter_weights, size_t x0, size_t x1,
+             size_t sigma_x_offset, size_t image_y_mod_8) {
+  JXL_DASSERT(x0 % Lanes(df) == 0);
+  const float* JXL_RESTRICT row_sigma = rows.GetSigmaRow();
+
+  float sm = lf.epf_pass0_sigma_scale;
+  float bsm = sm * lf.epf_border_sad_mul;
+
+  HWY_ALIGN float sad_mul[kBlockDim] = {bsm, sm, sm, sm, sm, sm, sm, bsm};
+
+  if (image_y_mod_8 == 0 || image_y_mod_8 == kBlockDim - 1) {
+    for (size_t i = 0; i < kBlockDim; i += Lanes(df)) {
+      Store(Set(df, bsm), df, sad_mul + i);
+    }
+  }
+
+  for (size_t x = x0; x < x1; x += Lanes(df)) {
+    size_t bx = (x + sigma_x_offset) / kBlockDim;
+    size_t ix = (x + sigma_x_offset) % kBlockDim;
+    if (row_sigma[bx] < kMinSigma) {
+      for (size_t c = 0; c < 3; c++) {
+        auto px = Load(df, rows.GetInputRow(0, c) + x);
+        Store(px, df, rows.GetOutputRow(c) + x);
+      }
+      continue;
+    }
+
+    const auto sm = Load(df, sad_mul + ix);
+    const auto inv_sigma = Set(DF(), row_sigma[bx]) * sm;
+
+    decltype(Zero(df)) sads[12];
+    for (size_t i = 0; i < 12; i++) sads[i] = Zero(df);
+    constexpr std::array<int, 2> sads_off[12] = {
+        {-2, 0}, {-1, -1}, {-1, 0}, {-1, 1}, {0, -2}, {0, -1},
+        {0, 1},  {0, 2},   {1, -1}, {1, 0},  {1, 1},  {2, 0},
+    };
+
+    // compute sads
+    // TODO(veluca): consider unrolling and optimizing this.
+    for (size_t c = 0; c < 3; c++) {
+      auto scale = Set(df, lf.epf_channel_scale[c]);
+      for (size_t i = 0; i < 12; i++) {
+        auto sad = Zero(df);
+        constexpr std::array<int, 2> plus_off[] = {
+            {0, 0}, {-1, 0}, {0, -1}, {1, 0}, {0, 1}};
+        for (size_t j = 0; j < 5; j++) {
+          const auto r11 = LoadU(
+              df, rows.GetInputRow(plus_off[j][0], c) + x + plus_off[j][1]);
+          const auto c11 =
+              LoadU(df, rows.GetInputRow(sads_off[i][0] + plus_off[j][0], c) +
+                            x + sads_off[i][1] + plus_off[j][1]);
+          sad += AbsDiff(r11, c11);
+        }
+        sads[i] = MulAdd(sad, scale, sads[i]);
+      }
+    }
+    const auto x_cc = LoadU(df, rows.GetInputRow(0, 0) + x);
+    const auto y_cc = LoadU(df, rows.GetInputRow(0, 1) + x);
+    const auto b_cc = LoadU(df, rows.GetInputRow(0, 2) + x);
+
+    auto w = Set(df, 1);
+    auto X = x_cc;
+    auto Y = y_cc;
+    auto B = b_cc;
+
+    for (size_t i = 0; i < 12; i++) {
+      AddPixelStep1</*aligned=*/false>(/*row=*/sads_off[i][0], rows,
+                                       x + sads_off[i][1], sads[i], inv_sigma,
+                                       lf, &X, &Y, &B, &w);
+    }
+
+#if JXL_HIGH_PRECISION
+    auto inv_w = Set(df, 1.0f) / w;
+#else
+    auto inv_w = ApproximateReciprocal(w);
+#endif
+    Store(X * inv_w, df, rows.GetOutputRow(0) + x);
+    Store(Y * inv_w, df, rows.GetOutputRow(1) + x);
+    Store(B * inv_w, df, rows.GetOutputRow(2) + x);
+  }
+}
+
+// Step 1: 3x3 plus-shaped kernel with 5 SADs per pixel (also 3x3
+// plus-shaped). So this makes this filter a 5x5 filter.
+void Epf1Row(const FilterRows& rows, const LoopFilter& lf,
+             const FilterWeights& filter_weights, size_t x0, size_t x1,
+             size_t sigma_x_offset, size_t image_y_mod_8) {
+  JXL_DASSERT(x0 % Lanes(df) == 0);
+  const float* JXL_RESTRICT row_sigma = rows.GetSigmaRow();
+
+  float sm = 1.0f;
+  float bsm = sm * lf.epf_border_sad_mul;
+
+  HWY_ALIGN float sad_mul[kBlockDim] = {bsm, sm, sm, sm, sm, sm, sm, bsm};
+
+  if (image_y_mod_8 == 0 || image_y_mod_8 == kBlockDim - 1) {
+    for (size_t i = 0; i < kBlockDim; i += Lanes(df)) {
+      Store(Set(df, bsm), df, sad_mul + i);
+    }
+  }
+
+  for (size_t x = x0; x < x1; x += Lanes(df)) {
+    size_t bx = (x + sigma_x_offset) / kBlockDim;
+    size_t ix = (x + sigma_x_offset) % kBlockDim;
+    if (row_sigma[bx] < kMinSigma) {
+      for (size_t c = 0; c < 3; c++) {
+        auto px = Load(df, rows.GetInputRow(0, c) + x);
+        Store(px, df, rows.GetOutputRow(c) + x);
+      }
+      continue;
+    }
+
+    const auto sm = Load(df, sad_mul + ix);
+    const auto inv_sigma = Set(DF(), row_sigma[bx]) * sm;
+    auto sad0 = Zero(df);
+    auto sad1 = Zero(df);
+    auto sad2 = Zero(df);
+    auto sad3 = Zero(df);
+
+    // compute sads
+    for (size_t c = 0; c < 3; c++) {
+      // center px = 22, px above = 21
+      auto t = Undefined(df);
+
+      const auto p20 = Load(df, rows.GetInputRow(-2, c) + x);
+      const auto p21 = Load(df, rows.GetInputRow(-1, c) + x);
+      auto sad0c = AbsDiff(p20, p21);  // SAD 2, 1
+
+      const auto p11 = LoadU(df, rows.GetInputRow(-1, c) + x - 1);
+      auto sad1c = AbsDiff(p11, p21);  // SAD 1, 2
+
+      const auto p31 = LoadU(df, rows.GetInputRow(-1, c) + x + 1);
+      auto sad2c = AbsDiff(p31, p21);  // SAD 3, 2
+
+      const auto p02 = LoadU(df, rows.GetInputRow(0, c) + x - 2);
+      const auto p12 = LoadU(df, rows.GetInputRow(0, c) + x - 1);
+      sad1c += AbsDiff(p02, p12);  // SAD 1, 2
+      sad0c += AbsDiff(p11, p12);  // SAD 2, 1
+
+      const auto p22 = LoadU(df, rows.GetInputRow(0, c) + x);
+      t = AbsDiff(p12, p22);
+      sad1c += t;  // SAD 1, 2
+      sad2c += t;  // SAD 3, 2
+      t = AbsDiff(p22, p21);
+      auto sad3c = t;  // SAD 2, 3
+      sad0c += t;      // SAD 2, 1
+
+      const auto p32 = LoadU(df, rows.GetInputRow(0, c) + x + 1);
+      sad0c += AbsDiff(p31, p32);  // SAD 2, 1
+      t = AbsDiff(p22, p32);
+      sad1c += t;  // SAD 1, 2
+      sad2c += t;  // SAD 3, 2
+
+      const auto p42 = LoadU(df, rows.GetInputRow(0, c) + x + 2);
+      sad2c += AbsDiff(p42, p32);  // SAD 3, 2
+
+      const auto p13 = LoadU(df, rows.GetInputRow(1, c) + x - 1);
+      sad3c += AbsDiff(p13, p12);  // SAD 2, 3
+
+      const auto p23 = Load(df, rows.GetInputRow(1, c) + x);
+      t = AbsDiff(p22, p23);
+      sad0c += t;                  // SAD 2, 1
+      sad3c += t;                  // SAD 2, 3
+      sad1c += AbsDiff(p13, p23);  // SAD 1, 2
+
+      const auto p33 = LoadU(df, rows.GetInputRow(1, c) + x + 1);
+      sad2c += AbsDiff(p33, p23);  // SAD 3, 2
+      sad3c += AbsDiff(p33, p32);  // SAD 2, 3
+
+      const auto p24 = Load(df, rows.GetInputRow(2, c) + x);
+      sad3c += AbsDiff(p24, p23);  // SAD 2, 3
+
+      auto scale = Set(df, lf.epf_channel_scale[c]);
+      sad0 = MulAdd(sad0c, scale, sad0);
+      sad1 = MulAdd(sad1c, scale, sad1);
+      sad2 = MulAdd(sad2c, scale, sad2);
+      sad3 = MulAdd(sad3c, scale, sad3);
+    }
+    const auto x_cc = Load(df, rows.GetInputRow(0, 0) + x);
+    const auto y_cc = Load(df, rows.GetInputRow(0, 1) + x);
+    const auto b_cc = Load(df, rows.GetInputRow(0, 2) + x);
+
+    auto w = Set(df, 1);
+    auto X = x_cc;
+    auto Y = y_cc;
+    auto B = b_cc;
+
+    // Top row
+    AddPixelStep1</*aligned=*/true>(/*row=*/-1, rows, x, sad0, inv_sigma, lf,
+                                    &X, &Y, &B, &w);
+    // Center
+    AddPixelStep1</*aligned=*/false>(/*row=*/0, rows, x - 1, sad1, inv_sigma,
+                                     lf, &X, &Y, &B, &w);
+    AddPixelStep1</*aligned=*/false>(/*row=*/0, rows, x + 1, sad2, inv_sigma,
+                                     lf, &X, &Y, &B, &w);
+    // Bottom
+    AddPixelStep1</*aligned=*/true>(/*row=*/1, rows, x, sad3, inv_sigma, lf, &X,
+                                    &Y, &B, &w);
+#if JXL_HIGH_PRECISION
+    auto inv_w = Set(df, 1.0f) / w;
+#else
+    auto inv_w = ApproximateReciprocal(w);
+#endif
+    Store(X * inv_w, df, rows.GetOutputRow(0) + x);
+    Store(Y * inv_w, df, rows.GetOutputRow(1) + x);
+    Store(B * inv_w, df, rows.GetOutputRow(2) + x);
+  }
+}
+
+// Step 2: 3x3 plus-shaped kernel with a single reference pixel, ran on
+// the output of the previous step.
+void Epf2Row(const FilterRows& rows, const LoopFilter& lf,
+             const FilterWeights& filter_weights, size_t x0, size_t x1,
+             size_t sigma_x_offset, size_t image_y_mod_8) {
+  JXL_DASSERT(x0 % Lanes(df) == 0);
+  const float* JXL_RESTRICT row_sigma = rows.GetSigmaRow();
+
+  float sm = lf.epf_pass2_sigma_scale;
+  float bsm = sm * lf.epf_border_sad_mul;
+
+  HWY_ALIGN float sad_mul[kBlockDim] = {bsm, sm, sm, sm, sm, sm, sm, bsm};
+
+  if (image_y_mod_8 == 0 || image_y_mod_8 == kBlockDim - 1) {
+    for (size_t i = 0; i < kBlockDim; i += Lanes(df)) {
+      Store(Set(df, bsm), df, sad_mul + i);
+    }
+  }
+
+  for (size_t x = x0; x < x1; x += Lanes(df)) {
+    size_t bx = (x + sigma_x_offset) / kBlockDim;
+    size_t ix = (x + sigma_x_offset) % kBlockDim;
+
+    if (row_sigma[bx] < kMinSigma) {
+      for (size_t c = 0; c < 3; c++) {
+        auto px = Load(df, rows.GetInputRow(0, c) + x);
+        Store(px, df, rows.GetOutputRow(c) + x);
+      }
+      continue;
+    }
+
+    const auto sm = Load(df, sad_mul + ix);
+    const auto inv_sigma = Set(DF(), row_sigma[bx]) * sm;
+
+    const auto x_cc = Load(df, rows.GetInputRow(0, 0) + x);
+    const auto y_cc = Load(df, rows.GetInputRow(0, 1) + x);
+    const auto b_cc = Load(df, rows.GetInputRow(0, 2) + x);
+
+    auto w = Set(df, 1);
+    auto X = x_cc;
+    auto Y = y_cc;
+    auto B = b_cc;
+
+    // Top row
+    AddPixelStep2</*aligned=*/true>(/*row=*/-1, rows, x, x_cc, y_cc, b_cc,
+                                    inv_sigma, lf, &X, &Y, &B, &w);
+    // Center
+    AddPixelStep2</*aligned=*/false>(/*row=*/0, rows, x - 1, x_cc, y_cc, b_cc,
+                                     inv_sigma, lf, &X, &Y, &B, &w);
+    AddPixelStep2</*aligned=*/false>(/*row=*/0, rows, x + 1, x_cc, y_cc, b_cc,
+                                     inv_sigma, lf, &X, &Y, &B, &w);
+    // Bottom
+    AddPixelStep2</*aligned=*/true>(/*row=*/1, rows, x, x_cc, y_cc, b_cc,
+                                    inv_sigma, lf, &X, &Y, &B, &w);
+
+#if JXL_HIGH_PRECISION
+    auto inv_w = Set(df, 1.0f) / w;
+#else
+    auto inv_w = ApproximateReciprocal(w);
+#endif
+    Store(X * inv_w, df, rows.GetOutputRow(0) + x);
+    Store(Y * inv_w, df, rows.GetOutputRow(1) + x);
+    Store(B * inv_w, df, rows.GetOutputRow(2) + x);
+  }
+}
+
+constexpr FilterDefinition kGaborishFilter{&GaborishRow, 1};
+constexpr FilterDefinition kEpf0Filter{&Epf0Row, 3};
+constexpr FilterDefinition kEpf1Filter{&Epf1Row, 2};
+constexpr FilterDefinition kEpf2Filter{&Epf2Row, 1};
+
+void FilterPipelineInit(FilterPipeline* fp, const LoopFilter& lf,
+                        const Image3F& in, const Rect& in_rect,
+                        const Rect& image_rect, size_t image_ysize,
+                        Image3F* out, const Rect& out_rect) {
+  JXL_DASSERT(lf.gab || lf.epf_iters > 0);
+  // All EPF filters use sigma so we need to compute it.
+  fp->compute_sigma = lf.epf_iters > 0;
+
+  fp->num_filters = 0;
+  fp->storage_rows_used = 0;
+  // First filter always uses the input image.
+  fp->filters[0].SetInput(&in, in_rect, image_rect, image_ysize);
+
+  if (lf.gab) {
+    fp->AddStep<kGaborishFilter.border>(kGaborishFilter);
+  }
+
+  if (lf.epf_iters == 1) {
+    fp->AddStep<kEpf1Filter.border>(kEpf1Filter);
+  } else if (lf.epf_iters == 2) {
+    fp->AddStep<kEpf1Filter.border>(kEpf1Filter);
+    fp->AddStep<kEpf2Filter.border>(kEpf2Filter);
+  } else if (lf.epf_iters == 3) {
+    fp->AddStep<kEpf0Filter.border>(kEpf0Filter);
+    fp->AddStep<kEpf1Filter.border>(kEpf1Filter);
+    fp->AddStep<kEpf2Filter.border>(kEpf2Filter);
+  }
+
+  // At least one of the filters was enabled so "num_filters" must be non-zero.
+  JXL_DASSERT(fp->num_filters > 0);
+
+  // Set the output of the last filter as the output image.
+  fp->filters[fp->num_filters - 1].SetOutput(out, out_rect);
+
+  // Walk the list of filters backwards to compute how many rows are needed.
+  size_t col_border = 0;
+  for (int i = fp->num_filters - 1; i >= 0; i--) {
+    // Compute the region where we need to apply this filter. Depending on the
+    // step we might need to compute a larger portion than the original rect
+    // because of the border needed by other stages. This is the range of valid
+    // output values we produce, however we run the filter over a larger region
+    // to make those values multiple of Lanes(df).
+    const size_t x0 =
+        FilterPipeline::FilterStep::MaxLeftPadding(image_rect.x0()) -
+        col_border;
+    const size_t x1 =
+        FilterPipeline::FilterStep::MaxLeftPadding(image_rect.x0()) +
+        image_rect.xsize() + col_border;
+
+    fp->filters[i].filter_x0 = x0 - x0 % Lanes(df);
+    fp->filters[i].filter_x1 = RoundUpTo(x1, Lanes(df));
+
+    // The extra border needed for future filtering.
+    fp->filters[i].output_col_border = col_border;
+    col_border += fp->filters[i].filter_def.border;
+  }
+  fp->total_border = col_border;
+  JXL_ASSERT(fp->total_border == lf.Padding());
+  JXL_ASSERT(fp->total_border <= kMaxFilterBorder);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(FilterPipelineInit);  // Local function
+
+// Mirror n floats starting at *p and store them before p.
+JXL_INLINE void LeftMirror(float* p, size_t n) {
+  for (size_t i = 0; i < n; i++) {
+    *(p - 1 - i) = p[i];
+  }
+}
+
+// Mirror n floats starting at *(p - n) and store them at *p.
+JXL_INLINE void RightMirror(float* p, size_t n) {
+  for (size_t i = 0; i < n; i++) {
+    p[i] = *(p - 1 - i);
+  }
+}
+
+void ComputeSigma(const Rect& block_rect, PassesDecoderState* state) {
+  const LoopFilter& lf = state->shared->frame_header.loop_filter;
+  JXL_CHECK(lf.epf_iters > 0);
+  const AcStrategyImage& ac_strategy = state->shared->ac_strategy;
+  const float quant_scale = state->shared->quantizer.Scale();
+
+  const size_t sigma_stride = state->filter_weights.sigma.PixelsPerRow();
+  const size_t sharpness_stride = state->shared->epf_sharpness.PixelsPerRow();
+
+  for (size_t by = 0; by < block_rect.ysize(); ++by) {
+    float* JXL_RESTRICT sigma_row =
+        block_rect.Row(&state->filter_weights.sigma, by);
+    const uint8_t* JXL_RESTRICT sharpness_row =
+        block_rect.ConstRow(state->shared->epf_sharpness, by);
+    AcStrategyRow acs_row = ac_strategy.ConstRow(block_rect, by);
+    const int* const JXL_RESTRICT row_quant =
+        block_rect.ConstRow(state->shared->raw_quant_field, by);
+
+    for (size_t bx = 0; bx < block_rect.xsize(); bx++) {
+      AcStrategy acs = acs_row[bx];
+      size_t llf_x = acs.covered_blocks_x();
+      if (!acs.IsFirstBlock()) continue;
+      // quant_scale is smaller for low quality.
+      // quant_scale is roughly 0.08 / butteraugli score.
+      //
+      // row_quant is smaller for low quality.
+      // row_quant is a quantization multiplier of form 1.0 /
+      // row_quant[bx]
+      //
+      // lf.epf_quant_mul is a parameter in the format
+      // kInvSigmaNum is a constant
+      float sigma_quant =
+          lf.epf_quant_mul / (quant_scale * row_quant[bx] * kInvSigmaNum);
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          float sigma =
+              sigma_quant *
+              lf.epf_sharp_lut[sharpness_row[bx + ix + iy * sharpness_stride]];
+          // Avoid infinities.
+          sigma = std::min(-1e-4f, sigma);  // TODO(veluca): remove this.
+          sigma_row[bx + ix + kSigmaPadding +
+                    (iy + kSigmaPadding) * sigma_stride] = 1.0f / sigma;
+        }
+      }
+      // TODO(veluca): remove this padding.
+      // Left padding with mirroring.
+      if (bx + block_rect.x0() == 0) {
+        for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+          LeftMirror(
+              sigma_row + kSigmaPadding + (iy + kSigmaPadding) * sigma_stride,
+              kSigmaBorder);
+        }
+      }
+      // Right padding with mirroring.
+      if (bx + block_rect.x0() + llf_x ==
+          state->shared->frame_dim.xsize_blocks) {
+        for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+          RightMirror(sigma_row + kSigmaPadding + bx + llf_x +
+                          (iy + kSigmaPadding) * sigma_stride,
+                      kSigmaBorder);
+        }
+      }
+      // Offsets for row copying, in blocks.
+      size_t offset_before = bx + block_rect.x0() == 0 ? 1 : bx + kSigmaPadding;
+      size_t offset_after =
+          bx + block_rect.x0() + llf_x == state->shared->frame_dim.xsize_blocks
+              ? kSigmaPadding + llf_x + bx + kSigmaBorder
+              : kSigmaPadding + llf_x + bx;
+      size_t num = offset_after - offset_before;
+      // Above
+      if (by + block_rect.y0() == 0) {
+        for (size_t iy = 0; iy < kSigmaBorder; iy++) {
+          memcpy(
+              sigma_row + offset_before +
+                  (kSigmaPadding - 1 - iy) * sigma_stride,
+              sigma_row + offset_before + (kSigmaPadding + iy) * sigma_stride,
+              num * sizeof(*sigma_row));
+        }
+      }
+      // Below
+      if (by + block_rect.y0() + acs.covered_blocks_y() ==
+          state->shared->frame_dim.ysize_blocks) {
+        for (size_t iy = 0; iy < kSigmaBorder; iy++) {
+          memcpy(
+              sigma_row + offset_before +
+                  sigma_stride * (acs.covered_blocks_y() + kSigmaPadding + iy),
+              sigma_row + offset_before +
+                  sigma_stride *
+                      (acs.covered_blocks_y() + kSigmaPadding - 1 - iy),
+              num * sizeof(*sigma_row));
+        }
+      }
+    }
+  }
+}
+
+FilterPipeline* PrepareFilterPipeline(
+    PassesDecoderState* dec_state, const Rect& image_rect, const Image3F& input,
+    const Rect& input_rect, size_t image_ysize, size_t thread,
+    Image3F* JXL_RESTRICT out, const Rect& output_rect) {
+  const LoopFilter& lf = dec_state->shared->frame_header.loop_filter;
+  // image_rect, input and output must all have the same kPaddingXRound
+  // alignment for SIMD, but it doesn't need to be 0.
+  JXL_DASSERT(image_rect.x0() % GroupBorderAssigner::kPaddingXRound ==
+              input_rect.x0() % GroupBorderAssigner::kPaddingXRound);
+  JXL_DASSERT(image_rect.x0() % GroupBorderAssigner::kPaddingXRound ==
+              output_rect.x0() % GroupBorderAssigner::kPaddingXRound);
+
+  // We need enough pixels to access the padding and the rounding to
+  // GroupBorderAssigner::kPaddingXRound to the left of the image.
+  JXL_DASSERT(input_rect.x0() >=
+              input_rect.x0() % GroupBorderAssigner::kPaddingXRound +
+                  lf.Padding());
+
+  JXL_DASSERT(image_rect.xsize() == input_rect.xsize());
+  JXL_DASSERT(image_rect.xsize() == output_rect.xsize());
+  FilterPipeline* fp = &(dec_state->filter_pipelines[thread]);
+  fp->image_rect = image_rect;
+
+  HWY_DYNAMIC_DISPATCH(FilterPipelineInit)
+  (fp, lf, input, input_rect, image_rect, image_ysize, out, output_rect);
+  return fp;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/epf.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/epf.h
new file mode 100644
index 0000000000..a2fd9d16f4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/epf.h
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_EPF_H_
+#define LIB_JXL_EPF_H_
+
+// Fast SIMD "in-loop" edge preserving filter (adaptive, nonlinear).
+
+#include <stddef.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/filters.h"
+#include "lib/jxl/passes_state.h"
+
+namespace jxl {
+
+// 4 * (sqrt(0.5)-1), so that Weight(sigma) = 0.5.
+static constexpr float kInvSigmaNum = -1.1715728752538099024f;
+
+// Fills the `state->filter_weights.sigma` image with the precomputed sigma
+// values in the area inside `block_rect`. Accesses the AC strategy, quant field
+// and epf_sharpness fields in the corresponding positions.
+void ComputeSigma(const Rect& block_rect, PassesDecoderState* state);
+
+// Applies Gaborish + EPF to the given `image_rect` part of the image (used to
+// select the sigma values). Input pixels are taken from `input:input_rect`, and
+// the filtering result is written to `out:output_rect`. `dec_state->sigma` must
+// be padded with `kMaxFilterPadding/kBlockDim` values along the x axis.
+// All rects must have the same alignment module
+// GroupBorderAssigner::kPaddingXRound pixels.
+// `input_rect`, `output_rect` and `image_rect` must all have the same size.
+// At least `lf.Padding()` pixels must be accessible and contain valid values
+// outside of `image_rect` in `input`. Also, depending on the implementation,
+// more pixels in the input up to a vector size boundary should be accessible
+// but may contain uninitialized data.
+//
+// This function only prepares and returns the pipeline, to perform the
+// filtering process it must be called on all row from -lf.Padding() to
+// image_rect.ysize() + lf.Padding() .
+//
+// Note: if the output_rect x0 or x1 are not a multiple of kPaddingXRound more
+// pixels with potentially uninitialized data will be written to the output left
+// and right of the requested rect up to a multiple of kPaddingXRound pixels.
+FilterPipeline* PrepareFilterPipeline(
+    PassesDecoderState* dec_state, const Rect& image_rect, const Image3F& input,
+    const Rect& input_rect, size_t image_ysize, size_t thread,
+    Image3F* JXL_RESTRICT out, const Rect& output_rect);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_EPF_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fast_math-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fast_math-inl.h
new file mode 100644
index 0000000000..60be66829a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fast_math-inl.h
@@ -0,0 +1,175 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD math ops (log2, encoder only, cos, erf for splines)
+
+#if defined(LIB_JXL_FAST_MATH_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_FAST_MATH_INL_H_
+#undef LIB_JXL_FAST_MATH_INL_H_
+#else
+#define LIB_JXL_FAST_MATH_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/rational_polynomial-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+
+// Computes base-2 logarithm like std::log2. Undefined if negative / NaN.
+// L1 error ~3.9E-6
+template <class DF, class V>
+V FastLog2f(const DF df, V x) {
+  // 2,2 rational polynomial approximation of std::log1p(x) / std::log(2).
+  HWY_ALIGN const float p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06f),
+                                          HWY_REP4(1.4287160470083755E+00f),
+                                          HWY_REP4(7.4245873327820566E-01f)};
+  HWY_ALIGN const float q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01f),
+                                          HWY_REP4(1.0096718572241148E+00f),
+                                          HWY_REP4(1.7409343003366853E-01f)};
+
+  const Rebind<int32_t, DF> di;
+  const auto x_bits = BitCast(di, x);
+
+  // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops
+  const auto exp_bits = x_bits - Set(di, 0x3f2aaaab);  // = 2/3
+  // Shifted exponent = log2; also used to clear mantissa.
+  const auto exp_shifted = ShiftRight<23>(exp_bits);
+  const auto mantissa = BitCast(df, x_bits - ShiftLeft<23>(exp_shifted));
+  const auto exp_val = ConvertTo(df, exp_shifted);
+  return EvalRationalPolynomial(df, mantissa - Set(df, 1.0f), p, q) + exp_val;
+}
+
+// max relative error ~3e-7
+template <class DF, class V>
+V FastPow2f(const DF df, V x) {
+  const Rebind<int32_t, DF> di;
+  auto floorx = Floor(x);
+  auto exp = BitCast(df, ShiftLeft<23>(ConvertTo(di, floorx) + Set(di, 127)));
+  auto frac = x - floorx;
+  auto num = frac + Set(df, 1.01749063e+01);
+  num = MulAdd(num, frac, Set(df, 4.88687798e+01));
+  num = MulAdd(num, frac, Set(df, 9.85506591e+01));
+  num = num * exp;
+  auto den = MulAdd(frac, Set(df, 2.10242958e-01), Set(df, -2.22328856e-02));
+  den = MulAdd(den, frac, Set(df, -1.94414990e+01));
+  den = MulAdd(den, frac, Set(df, 9.85506633e+01));
+  return num / den;
+}
+
+// max relative error ~3e-5
+template <class DF, class V>
+V FastPowf(const DF df, V base, V exponent) {
+  return FastPow2f(df, FastLog2f(df, base) * exponent);
+}
+
+// Computes cosine like std::cos.
+// L1 error 7e-5.
+template <class DF, class V>
+V FastCosf(const DF df, V x) {
+  // Step 1: range reduction to [0, 2pi)
+  const auto pi2 = Set(df, kPi * 2.0f);
+  const auto pi2_inv = Set(df, 0.5f / kPi);
+  const auto npi2 = Floor(x * pi2_inv) * pi2;
+  const auto xmodpi2 = x - npi2;
+  // Step 2: range reduction to [0, pi]
+  const auto x_pi = Min(xmodpi2, pi2 - xmodpi2);
+  // Step 3: range reduction to [0, pi/2]
+  const auto above_pihalf = x_pi >= Set(df, kPi / 2.0f);
+  const auto x_pihalf = IfThenElse(above_pihalf, Set(df, kPi) - x_pi, x_pi);
+  // Step 4: Taylor-like approximation, scaled by 2**0.75 to make angle
+  // duplication steps faster, on x/4.
+  const auto xs = x_pihalf * Set(df, 0.25f);
+  const auto x2 = xs * xs;
+  const auto x4 = x2 * x2;
+  const auto cosx_prescaling =
+      MulAdd(x4, Set(df, 0.06960438),
+             MulAdd(x2, Set(df, -0.84087373), Set(df, 1.68179268)));
+  // Step 5: angle duplication.
+  const auto cosx_scale1 =
+      MulAdd(cosx_prescaling, cosx_prescaling, Set(df, -1.414213562));
+  const auto cosx_scale2 = MulAdd(cosx_scale1, cosx_scale1, Set(df, -1));
+  // Step 6: change sign if needed.
+  const Rebind<uint32_t, DF> du;
+  auto signbit = ShiftLeft<31>(BitCast(du, VecFromMask(df, above_pihalf)));
+  return BitCast(df, signbit ^ BitCast(du, cosx_scale2));
+}
+
+// Computes the error function like std::erf.
+// L1 error 7e-4.
+template <class DF, class V>
+V FastErff(const DF df, V x) {
+  // Formula from
+  // https://en.wikipedia.org/wiki/Error_function#Numerical_approximations
+  // but constants have been recomputed.
+  const auto xle0 = x <= Zero(df);
+  const auto absx = Abs(x);
+  // Compute 1 - 1 / ((((x * a + b) * x + c) * x + d) * x + 1)**4
+  const auto denom1 =
+      MulAdd(absx, Set(df, 7.77394369e-02), Set(df, 2.05260015e-04));
+  const auto denom2 = MulAdd(denom1, absx, Set(df, 2.32120216e-01));
+  const auto denom3 = MulAdd(denom2, absx, Set(df, 2.77820801e-01));
+  const auto denom4 = MulAdd(denom3, absx, Set(df, 1.0f));
+  const auto denom5 = denom4 * denom4;
+  const auto inv_denom5 = Set(df, 1.0f) / denom5;
+  const auto result = NegMulAdd(inv_denom5, inv_denom5, Set(df, 1.0f));
+  // Change sign if needed.
+  const Rebind<uint32_t, DF> du;
+  auto signbit = ShiftLeft<31>(BitCast(du, VecFromMask(df, xle0)));
+  return BitCast(df, signbit ^ BitCast(du, result));
+}
+
+inline float FastLog2f(float f) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastLog2f(D, Set(D, f)));
+}
+
+inline float FastPow2f(float f) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastPow2f(D, Set(D, f)));
+}
+
+inline float FastPowf(float b, float e) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastPowf(D, Set(D, b), Set(D, e)));
+}
+
+inline float FastCosf(float f) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastCosf(D, Set(D, f)));
+}
+
+inline float FastErff(float f) {
+  HWY_CAPPED(float, 1) D;
+  return GetLane(FastErff(D, Set(D, f)));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_FAST_MATH_INL_H_
+
+#if HWY_ONCE
+
+namespace jxl {
+inline float FastLog2f(float f) { return HWY_STATIC_DISPATCH(FastLog2f)(f); }
+inline float FastPow2f(float f) { return HWY_STATIC_DISPATCH(FastPow2f)(f); }
+inline float FastPowf(float b, float e) {
+  return HWY_STATIC_DISPATCH(FastPowf)(b, e);
+}
+inline float FastCosf(float f) { return HWY_STATIC_DISPATCH(FastCosf)(f); }
+inline float FastErff(float f) { return HWY_STATIC_DISPATCH(FastErff)(f); }
+}  // namespace jxl
+
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fast_math_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fast_math_test.cc
new file mode 100644
index 0000000000..50c3bbb03a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fast_math_test.cc
@@ -0,0 +1,280 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <random>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/fast_math_test.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+// Test utils
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+HWY_NOINLINE void TestFastLog2() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> dist(1e-7f, 1e3f);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = dist(rng);
+    const auto actual_v = FastLog2f(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float abs_err = std::abs(std::log2(f) - actual);
+    EXPECT_LT(abs_err, 2.9E-6) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastPow2() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> dist(-100, 100);
+  float max_rel_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = dist(rng);
+    const auto actual_v = FastPow2f(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float expected = std::pow(2, f);
+    const float rel_err = std::abs(expected - actual) / expected;
+    EXPECT_LT(rel_err, 3.1E-7) << "f = " << f;
+    max_rel_err = std::max(max_rel_err, rel_err);
+  }
+  printf("max rel err %e\n", static_cast<double>(max_rel_err));
+}
+
+HWY_NOINLINE void TestFastPow() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> distb(1e-3f, 1e3f);
+  std::uniform_real_distribution<float> diste(-10, 10);
+  float max_rel_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float b = distb(rng);
+    const float e = diste(rng);
+    const auto actual_v = FastPowf(d, Set(d, b), Set(d, e));
+    const float actual = GetLane(actual_v);
+    const float expected = std::pow(b, e);
+    const float rel_err = std::abs(expected - actual) / expected;
+    EXPECT_LT(rel_err, 3E-5) << "b = " << b << " e = " << e;
+    max_rel_err = std::max(max_rel_err, rel_err);
+  }
+  printf("max rel err %e\n", static_cast<double>(max_rel_err));
+}
+
+HWY_NOINLINE void TestFastCos() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> dist(-1e3f, 1e3f);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = dist(rng);
+    const auto actual_v = FastCosf(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float abs_err = std::abs(std::cos(f) - actual);
+    EXPECT_LT(abs_err, 7E-5) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastErf() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> dist(-5.f, 5.f);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = dist(rng);
+    const auto actual_v = FastErff(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float abs_err = std::abs(std::erf(f) - actual);
+    EXPECT_LT(abs_err, 7E-4) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastSRGB() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = dist(rng);
+    const auto actual_v = FastLinearToSRGB(d, Set(d, f));
+    const float actual = GetLane(actual_v);
+    const float expected = GetLane(TF_SRGB().EncodedFromDisplay(d, Set(d, f)));
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 1.2E-4) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastPQEFD() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = dist(rng);
+    const float actual = GetLane(TF_PQ().EncodedFromDisplay(d, Set(d, f)));
+    const float expected = TF_PQ().EncodedFromDisplay(f);
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 7e-7) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastHLGEFD() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = dist(rng);
+    const float actual = GetLane(TF_HLG().EncodedFromDisplay(d, Set(d, f)));
+    const float expected = TF_HLG().EncodedFromDisplay(f);
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 5e-7) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFast709EFD() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = dist(rng);
+    const float actual = GetLane(TF_709().EncodedFromDisplay(d, Set(d, f)));
+    const float expected = TF_709().EncodedFromDisplay(f);
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 2e-6) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastPQDFE() {
+  constexpr size_t kNumTrials = 1 << 23;
+  std::mt19937 rng(1);
+  std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+  float max_abs_err = 0;
+  HWY_FULL(float) d;
+  for (size_t i = 0; i < kNumTrials; i++) {
+    const float f = dist(rng);
+    const float actual = GetLane(TF_PQ().DisplayFromEncoded(d, Set(d, f)));
+    const float expected = TF_PQ().DisplayFromEncoded(f);
+    const float abs_err = std::abs(expected - actual);
+    EXPECT_LT(abs_err, 3E-6) << "f = " << f;
+    max_abs_err = std::max(max_abs_err, abs_err);
+  }
+  printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastXYB() {
+  if (!HasFastXYBTosRGB8()) return;
+  ImageMetadata metadata;
+  ImageBundle ib(&metadata);
+  int scaling = 1;
+  int n = 256 * scaling;
+  float inv_scaling = 1.0f / scaling;
+  int kChunk = 32;
+  // The image is divided in chunks to reduce total memory usage.
+  for (int cr = 0; cr < n; cr += kChunk) {
+    for (int cg = 0; cg < n; cg += kChunk) {
+      for (int cb = 0; cb < n; cb += kChunk) {
+        Image3F chunk(kChunk * kChunk, kChunk);
+        for (int ir = 0; ir < kChunk; ir++) {
+          for (int ig = 0; ig < kChunk; ig++) {
+            for (int ib = 0; ib < kChunk; ib++) {
+              float r = (cr + ir) * inv_scaling;
+              float g = (cg + ig) * inv_scaling;
+              float b = (cb + ib) * inv_scaling;
+              chunk.PlaneRow(0, ir)[ig * kChunk + ib] = r * (1.0f / 255);
+              chunk.PlaneRow(1, ir)[ig * kChunk + ib] = g * (1.0f / 255);
+              chunk.PlaneRow(2, ir)[ig * kChunk + ib] = b * (1.0f / 255);
+            }
+          }
+        }
+        ib.SetFromImage(std::move(chunk), ColorEncoding::SRGB());
+        Image3F xyb(kChunk * kChunk, kChunk);
+        std::vector<uint8_t> roundtrip(kChunk * kChunk * kChunk * 3);
+        ToXYB(ib, nullptr, &xyb);
+        jxl::HWY_NAMESPACE::FastXYBTosRGB8(
+            xyb, Rect(xyb), Rect(xyb), nullptr, Rect(), /*is_rgba=*/false,
+            roundtrip.data(), xyb.xsize(), xyb.xsize() * 3);
+        for (int ir = 0; ir < kChunk; ir++) {
+          for (int ig = 0; ig < kChunk; ig++) {
+            for (int ib = 0; ib < kChunk; ib++) {
+              float r = (cr + ir) * inv_scaling;
+              float g = (cg + ig) * inv_scaling;
+              float b = (cb + ib) * inv_scaling;
+              size_t idx = ir * kChunk * kChunk + ig * kChunk + ib;
+              int rr = roundtrip[3 * idx];
+              int rg = roundtrip[3 * idx + 1];
+              int rb = roundtrip[3 * idx + 2];
+              EXPECT_LT(abs(r - rr), 2) << "expected " << r << " got " << rr;
+              EXPECT_LT(abs(g - rg), 2) << "expected " << g << " got " << rg;
+              EXPECT_LT(abs(b - rb), 2) << "expected " << b << " got " << rb;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class FastMathTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastMathTargetTest);
+
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastLog2);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow2);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastCos);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastErf);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastSRGB);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPQDFE);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPQEFD);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastHLGEFD);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFast709EFD);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastXYB);
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/field_encodings.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/field_encodings.h
new file mode 100644
index 0000000000..00d0880c71
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/field_encodings.h
@@ -0,0 +1,123 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FIELD_ENCODINGS_H_
+#define LIB_JXL_FIELD_ENCODINGS_H_
+
+// Constants needed to encode/decode fields; avoids including the full fields.h.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "hwy/base.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+class Visitor;
+class Fields {
+ public:
+  virtual ~Fields() = default;
+  virtual const char* Name() const = 0;
+  virtual Status VisitFields(Visitor* JXL_RESTRICT visitor) = 0;
+};
+
+// Distribution of U32 values for one particular selector. Represents either a
+// power of two-sized range, or a single value. A separate type ensures this is
+// only passed to the U32Enc ctor.
+struct U32Distr {
+  // No need to validate - all `d` are legitimate.
+  constexpr explicit U32Distr(uint32_t d) : d(d) {}
+
+  static constexpr uint32_t kDirect = 0x80000000u;
+
+  constexpr bool IsDirect() const { return (d & kDirect) != 0; }
+
+  // Only call if IsDirect().
+  constexpr uint32_t Direct() const { return d & (kDirect - 1); }
+
+  // Only call if !IsDirect().
+  constexpr size_t ExtraBits() const { return (d & 0x1F) + 1; }
+  uint32_t Offset() const { return (d >> 5) & 0x3FFFFFF; }
+
+  uint32_t d;
+};
+
+// A direct-coded 31-bit value occupying 2 bits in the bitstream.
+constexpr U32Distr Val(uint32_t value) {
+  return U32Distr(value | U32Distr::kDirect);
+}
+
+// Value - `offset` will be signaled in `bits` extra bits.
+constexpr U32Distr BitsOffset(uint32_t bits, uint32_t offset) {
+  return U32Distr(((bits - 1) & 0x1F) + ((offset & 0x3FFFFFF) << 5));
+}
+
+// Value will be signaled in `bits` extra bits.
+constexpr U32Distr Bits(uint32_t bits) { return BitsOffset(bits, 0); }
+
+// See U32Coder documentation in fields.h.
+class U32Enc {
+ public:
+  constexpr U32Enc(const U32Distr d0, const U32Distr d1, const U32Distr d2,
+                   const U32Distr d3)
+      : d_{d0, d1, d2, d3} {}
+
+  // Returns the U32Distr at `selector` = 0..3, least-significant first.
+  U32Distr GetDistr(const uint32_t selector) const {
+    JXL_ASSERT(selector < 4);
+    return d_[selector];
+  }
+
+ private:
+  U32Distr d_[4];
+};
+
+// Returns bit with the given `index` (0 = least significant).
+template <typename T>
+static inline constexpr uint64_t MakeBit(T index) {
+  return 1ULL << static_cast<uint32_t>(index);
+}
+
+// Returns vector of all possible values of an Enum type. Relies on each Enum
+// providing an overload of EnumBits() that returns a bit array of its values,
+// which implies values must be in [0, 64).
+template <typename Enum>
+std::vector<Enum> Values() {
+  uint64_t bits = EnumBits(Enum());
+
+  std::vector<Enum> values;
+  values.reserve(hwy::PopCount(bits));
+
+  // For each 1-bit in bits: add its index as value
+  while (bits != 0) {
+    const int index = Num0BitsBelowLS1Bit_Nonzero(bits);
+    values.push_back(static_cast<Enum>(index));
+    bits &= bits - 1;  // clear least-significant bit
+  }
+  return values;
+}
+
+// Returns true if value is one of Values<Enum>().
+template <class Enum>
+Status EnumValid(const Enum value) {
+  if (static_cast<uint32_t>(value) >= 64) {
+    return JXL_FAILURE("Value %u too large for %s\n",
+                       static_cast<uint32_t>(value), EnumName(Enum()));
+  }
+  const uint64_t bit = MakeBit(value);
+  if ((EnumBits(Enum()) & bit) == 0) {
+    return JXL_FAILURE("Invalid value %u for %s\n",
+                       static_cast<uint32_t>(value), EnumName(Enum()));
+  }
+  return true;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_FIELD_ENCODINGS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc
new file mode 100644
index 0000000000..7f00c44610
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fields.cc
@@ -0,0 +1,985 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/fields.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+
+#include "hwy/base.h"
+#include "lib/jxl/base/bits.h"
+
+namespace jxl {
+
+namespace {
+
+// A bundle can be in one of three states concerning extensions: not-begun,
+// active, ended. Bundles may be nested, so we need a stack of states.
+class ExtensionStates {
+ public:
+  void Push() {
+    // Initial state = not-begun.
+    begun_ <<= 1;
+    ended_ <<= 1;
+  }
+
+  // Clears current state; caller must check IsEnded beforehand.
+  void Pop() {
+    begun_ >>= 1;
+    ended_ >>= 1;
+  }
+
+  // Returns true if state == active || state == ended.
+  Status IsBegun() const { return (begun_ & 1) != 0; }
+  // Returns true if state != not-begun && state != active.
+  Status IsEnded() const { return (ended_ & 1) != 0; }
+
+  void Begin() {
+    JXL_ASSERT(!IsBegun());
+    JXL_ASSERT(!IsEnded());
+    begun_ += 1;
+  }
+
+  void End() {
+    JXL_ASSERT(IsBegun());
+    JXL_ASSERT(!IsEnded());
+    ended_ += 1;
+  }
+
+ private:
+  // Current state := least-significant bit of begun_ and ended_.
+  uint64_t begun_ = 0;
+  uint64_t ended_ = 0;
+};
+
+// Visitors generate Init/AllDefault/Read/Write logic for all fields. Each
+// bundle's VisitFields member function calls visitor->U32 etc. We do not
+// overload operator() because a function name is easier to search for.
+
+class VisitorBase : public Visitor {
+ public:
+  explicit VisitorBase(bool print_bundles = false)
+      : print_bundles_(print_bundles) {}
+  ~VisitorBase() override { JXL_ASSERT(depth_ == 0); }
+
+  // This is the only call site of Fields::VisitFields. Adds tracing and
+  // ensures EndExtensions was called.
+  Status Visit(Fields* fields, const char* visitor_name) override {
+    fputs(visitor_name, stdout);  // No newline; no effect if empty
+    if (print_bundles_) {
+      Trace("%s\n", print_bundles_ ? fields->Name() : "");
+    }
+
+    depth_ += 1;
+    JXL_ASSERT(depth_ <= Bundle::kMaxExtensions);
+    extension_states_.Push();
+
+    const Status ok = fields->VisitFields(this);
+
+    if (ok) {
+      // If VisitFields called BeginExtensions, must also call
+      // EndExtensions.
+      JXL_ASSERT(!extension_states_.IsBegun() || extension_states_.IsEnded());
+    } else {
+      // Failed, undefined state: don't care whether EndExtensions was
+      // called.
+    }
+
+    extension_states_.Pop();
+    JXL_ASSERT(depth_ != 0);
+    depth_ -= 1;
+
+    return ok;
+  }
+
+  // For visitors accepting a const Visitor, need to const-cast so we can call
+  // the non-const Visitor::VisitFields. NOTE: C is not modified except the
+  // `all_default` field by CanEncodeVisitor.
+  Status VisitConst(const Fields& t, const char* message) {
+    return Visit(const_cast<Fields*>(&t), message);
+  }
+
+  // Derived types (overridden by InitVisitor because it is unsafe to read
+  // from *value there)
+
+  Status Bool(bool default_value, bool* JXL_RESTRICT value) override {
+    uint32_t bits = *value ? 1 : 0;
+    JXL_RETURN_IF_ERROR(Bits(1, static_cast<uint32_t>(default_value), &bits));
+    JXL_DASSERT(bits <= 1);
+    *value = bits == 1;
+    return true;
+  }
+
+  // Overridden by ReadVisitor and WriteVisitor.
+  // Called before any conditional visit based on "extensions".
+  // Overridden by ReadVisitor, CanEncodeVisitor and WriteVisitor.
+  Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+    JXL_RETURN_IF_ERROR(U64(0, extensions));
+
+    extension_states_.Begin();
+    return true;
+  }
+
+  // Called after all extension fields (if any). Although non-extension
+  // fields could be visited afterward, we prefer the convention that
+  // extension fields are always the last to be visited. Overridden by
+  // ReadVisitor.
+  Status EndExtensions() override {
+    extension_states_.End();
+    return true;
+  }
+
+ protected:
+  // Prints indentation, <format>.
+  JXL_FORMAT(2, 3)  // 1-based plus one because member function
+  void Trace(const char* format, ...) const {
+    // Indentation.
+    printf("%*s", static_cast<int>(2 * depth_), "");
+
+    va_list args;
+    va_start(args, format);
+    vfprintf(stdout, format, args);
+    va_end(args);
+  }
+
+ private:
+  size_t depth_ = 0;  // for indentation.
+  ExtensionStates extension_states_;
+  const bool print_bundles_;
+};
+
+struct InitVisitor : public VisitorBase {
+  Status Bits(const size_t /*unused*/, const uint32_t default_value,
+              uint32_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status U32(const U32Enc /*unused*/, const uint32_t default_value,
+             uint32_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status U64(const uint64_t default_value,
+             uint64_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status Bool(bool default_value, bool* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status F16(const float default_value, float* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  // Always visit conditional fields to ensure they are initialized.
+  Status Conditional(bool /*condition*/) override { return true; }
+
+  Status AllDefault(const Fields& /*fields*/,
+                    bool* JXL_RESTRICT all_default) override {
+    // Just initialize this field and don't skip initializing others.
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return false;
+  }
+
+  Status VisitNested(Fields* /*fields*/) override {
+    // Avoid re-initializing nested bundles (their ctors already called
+    // Bundle::Init for their fields).
+    return true;
+  }
+
+  const char* VisitorName() override { return "InitVisitor"; }
+};
+
+// Similar to InitVisitor, but also initializes nested fields.
+struct SetDefaultVisitor : public VisitorBase {
+  Status Bits(const size_t /*unused*/, const uint32_t default_value,
+              uint32_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status U32(const U32Enc /*unused*/, const uint32_t default_value,
+             uint32_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status U64(const uint64_t default_value,
+             uint64_t* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status Bool(bool default_value, bool* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  Status F16(const float default_value, float* JXL_RESTRICT value) override {
+    *value = default_value;
+    return true;
+  }
+
+  // Always visit conditional fields to ensure they are initialized.
+  Status Conditional(bool /*condition*/) override { return true; }
+
+  Status AllDefault(const Fields& /*fields*/,
+                    bool* JXL_RESTRICT all_default) override {
+    // Just initialize this field and don't skip initializing others.
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return false;
+  }
+
+  const char* VisitorName() override { return "SetDefaultVisitor"; }
+};
+
+class AllDefaultVisitor : public VisitorBase {
+ public:
+  explicit AllDefaultVisitor(bool print_all_default)
+      : VisitorBase(print_all_default), print_all_default_(print_all_default) {}
+
+  Status Bits(const size_t bits, const uint32_t default_value,
+              uint32_t* JXL_RESTRICT value) override {
+    if (print_all_default_) {
+      Trace("  u(%zu) = %u, default %u\n", bits, *value, default_value);
+    }
+
+    all_default_ &= *value == default_value;
+    return true;
+  }
+
+  Status U32(const U32Enc /*unused*/, const uint32_t default_value,
+             uint32_t* JXL_RESTRICT value) override {
+    if (print_all_default_) {
+      Trace("  U32 = %u, default %u\n", *value, default_value);
+    }
+
+    all_default_ &= *value == default_value;
+    return true;
+  }
+
+  Status U64(const uint64_t default_value,
+             uint64_t* JXL_RESTRICT value) override {
+    if (print_all_default_) {
+      Trace("  U64 = %" PRIu64 ", default %" PRIu64 "\n", *value,
+            default_value);
+    }
+
+    all_default_ &= *value == default_value;
+    return true;
+  }
+
+  Status F16(const float default_value, float* JXL_RESTRICT value) override {
+    if (print_all_default_) {
+      Trace("  F16 = %.6f, default %.6f\n", static_cast<double>(*value),
+            static_cast<double>(default_value));
+    }
+    all_default_ &= std::abs(*value - default_value) < 1E-6f;
+    return true;
+  }
+
+  Status AllDefault(const Fields& /*fields*/,
+                    bool* JXL_RESTRICT /*all_default*/) override {
+    // Visit all fields so we can compute the actual all_default_ value.
+    return false;
+  }
+
+  bool AllDefault() const { return all_default_; }
+
+  const char* VisitorName() override { return "AllDefaultVisitor"; }
+
+ private:
+  const bool print_all_default_;
+  bool all_default_ = true;
+};
+
+class ReadVisitor : public VisitorBase {
+ public:
+  ReadVisitor(BitReader* reader, bool print_read)
+      : VisitorBase(print_read), print_read_(print_read), reader_(reader) {}
+
+  Status Bits(const size_t bits, const uint32_t /*default_value*/,
+              uint32_t* JXL_RESTRICT value) override {
+    *value = BitsCoder::Read(bits, reader_);
+    if (!reader_->AllReadsWithinBounds()) {
+      return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                        "Not enough bytes for header");
+    }
+    if (print_read_) Trace("  u(%zu) = %u\n", bits, *value);
+    return true;
+  }
+
+  Status U32(const U32Enc dist, const uint32_t /*default_value*/,
+             uint32_t* JXL_RESTRICT value) override {
+    *value = U32Coder::Read(dist, reader_);
+    if (!reader_->AllReadsWithinBounds()) {
+      return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                        "Not enough bytes for header");
+    }
+    if (print_read_) Trace("  U32 = %u\n", *value);
+    return true;
+  }
+
+  Status U64(const uint64_t /*default_value*/,
+             uint64_t* JXL_RESTRICT value) override {
+    *value = U64Coder::Read(reader_);
+    if (!reader_->AllReadsWithinBounds()) {
+      return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                        "Not enough bytes for header");
+    }
+    if (print_read_) Trace("  U64 = %" PRIu64 "\n", *value);
+    return true;
+  }
+
+  Status F16(const float /*default_value*/,
+             float* JXL_RESTRICT value) override {
+    ok_ &= F16Coder::Read(reader_, value);
+    if (!reader_->AllReadsWithinBounds()) {
+      return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                        "Not enough bytes for header");
+    }
+    if (print_read_) Trace("  F16 = %f\n", static_cast<double>(*value));
+    return true;
+  }
+
+  void SetDefault(Fields* fields) override { Bundle::SetDefault(fields); }
+
+  bool IsReading() const override { return true; }
+
+  // This never fails because visitors are expected to keep reading until
+  // EndExtensions, see comment there.
+  Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+    JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions));
+    if (*extensions == 0) return true;
+
+    // For each nonzero bit, i.e. extension that is present:
+    for (uint64_t remaining_extensions = *extensions; remaining_extensions != 0;
+         remaining_extensions &= remaining_extensions - 1) {
+      const size_t idx_extension =
+          Num0BitsBelowLS1Bit_Nonzero(remaining_extensions);
+      // Read additional U64 (one per extension) indicating the number of bits
+      // (allows skipping individual extensions).
+      JXL_RETURN_IF_ERROR(U64(0, &extension_bits_[idx_extension]));
+      if (!SafeAdd(total_extension_bits_, extension_bits_[idx_extension],
+                   total_extension_bits_)) {
+        return JXL_FAILURE("Extension bits overflowed, invalid codestream");
+      }
+    }
+    // Used by EndExtensions to skip past any _remaining_ extensions.
+    pos_after_ext_size_ = reader_->TotalBitsConsumed();
+    JXL_ASSERT(pos_after_ext_size_ != 0);
+    return true;
+  }
+
+  Status EndExtensions() override {
+    JXL_QUIET_RETURN_IF_ERROR(VisitorBase::EndExtensions());
+    // Happens if extensions == 0: don't read size, done.
+    if (pos_after_ext_size_ == 0) return true;
+
+    // Not enough bytes as set by BeginExtensions or earlier. Do not return
+    // this as an JXL_FAILURE or false (which can also propagate to error
+    // through e.g. JXL_RETURN_IF_ERROR), since this may be used while
+    // silently checking whether there are enough bytes. If this case must be
+    // treated as an error, reader_>Close() will do this, just like is already
+    // done for non-extension fields.
+    if (!enough_bytes_) return true;
+
+    // Skip new fields this (old?) decoder didn't know about, if any.
+    const size_t bits_read = reader_->TotalBitsConsumed();
+    uint64_t end;
+    if (!SafeAdd(pos_after_ext_size_, total_extension_bits_, end)) {
+      return JXL_FAILURE("Invalid extension size, caused overflow");
+    }
+    if (bits_read > end) {
+      return JXL_FAILURE("Read more extension bits than budgeted");
+    }
+    const size_t remaining_bits = end - bits_read;
+    if (remaining_bits != 0) {
+      JXL_WARNING("Skipping %zu-bit extension(s)", remaining_bits);
+      reader_->SkipBits(remaining_bits);
+      if (!reader_->AllReadsWithinBounds()) {
+        return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                          "Not enough bytes for header");
+      }
+    }
+    return true;
+  }
+
+  Status OK() const { return ok_; }
+
+  const char* VisitorName() override { return "ReadVisitor"; }
+
+ private:
+  const bool print_read_;
+
+  // Whether any error other than not enough bytes occurred.
+  bool ok_ = true;
+
+  // Whether there are enough input bytes to read from.
+  bool enough_bytes_ = true;
+  BitReader* const reader_;
+  // May be 0 even if the corresponding extension is present.
+  uint64_t extension_bits_[Bundle::kMaxExtensions] = {0};
+  uint64_t total_extension_bits_ = 0;
+  size_t pos_after_ext_size_ = 0;  // 0 iff extensions == 0.
+};
+
+class MaxBitsVisitor : public VisitorBase {
+ public:
+  Status Bits(const size_t bits, const uint32_t /*default_value*/,
+              uint32_t* JXL_RESTRICT /*value*/) override {
+    max_bits_ += BitsCoder::MaxEncodedBits(bits);
+    return true;
+  }
+
+  Status U32(const U32Enc enc, const uint32_t /*default_value*/,
+             uint32_t* JXL_RESTRICT /*value*/) override {
+    max_bits_ += U32Coder::MaxEncodedBits(enc);
+    return true;
+  }
+
+  Status U64(const uint64_t /*default_value*/,
+             uint64_t* JXL_RESTRICT /*value*/) override {
+    max_bits_ += U64Coder::MaxEncodedBits();
+    return true;
+  }
+
+  Status F16(const float /*default_value*/,
+             float* JXL_RESTRICT /*value*/) override {
+    max_bits_ += F16Coder::MaxEncodedBits();
+    return true;
+  }
+
+  Status AllDefault(const Fields& /*fields*/,
+                    bool* JXL_RESTRICT all_default) override {
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return false;  // For max bits, assume nothing is default
+  }
+
+  // Always visit conditional fields to get a (loose) upper bound.
+  Status Conditional(bool /*condition*/) override { return true; }
+
+  Status BeginExtensions(uint64_t* JXL_RESTRICT /*extensions*/) override {
+    // Skip - extensions are not included in "MaxBits" because their length
+    // is potentially unbounded.
+    return true;
+  }
+
+  Status EndExtensions() override { return true; }
+
+  size_t MaxBits() const { return max_bits_; }
+
+  const char* VisitorName() override { return "MaxBitsVisitor"; }
+
+ private:
+  size_t max_bits_ = 0;
+};
+
+class CanEncodeVisitor : public VisitorBase {
+ public:
+  explicit CanEncodeVisitor(bool print_sizes)
+      : VisitorBase(print_sizes), print_sizes_(print_sizes) {}
+
+  Status Bits(const size_t bits, const uint32_t /*default_value*/,
+              uint32_t* JXL_RESTRICT value) override {
+    size_t encoded_bits = 0;
+    ok_ &= BitsCoder::CanEncode(bits, *value, &encoded_bits);
+    if (print_sizes_) Trace("u(%zu) = %u\n", bits, *value);
+    encoded_bits_ += encoded_bits;
+    return true;
+  }
+
+  Status U32(const U32Enc enc, const uint32_t /*default_value*/,
+             uint32_t* JXL_RESTRICT value) override {
+    size_t encoded_bits = 0;
+    ok_ &= U32Coder::CanEncode(enc, *value, &encoded_bits);
+    if (print_sizes_) Trace("U32(%zu) = %u\n", encoded_bits, *value);
+    encoded_bits_ += encoded_bits;
+    return true;
+  }
+
+  Status U64(const uint64_t /*default_value*/,
+             uint64_t* JXL_RESTRICT value) override {
+    size_t encoded_bits = 0;
+    ok_ &= U64Coder::CanEncode(*value, &encoded_bits);
+    if (print_sizes_) {
+      Trace("U64(%zu) = %" PRIu64 "\n", encoded_bits, *value);
+    }
+    encoded_bits_ += encoded_bits;
+    return true;
+  }
+
+  Status F16(const float /*default_value*/,
+             float* JXL_RESTRICT value) override {
+    size_t encoded_bits = 0;
+    ok_ &= F16Coder::CanEncode(*value, &encoded_bits);
+    if (print_sizes_) {
+      Trace("F16(%zu) = %.6f\n", encoded_bits, static_cast<double>(*value));
+    }
+    encoded_bits_ += encoded_bits;
+    return true;
+  }
+
+  Status AllDefault(const Fields& fields,
+                    bool* JXL_RESTRICT all_default) override {
+    *all_default = Bundle::AllDefault(fields);
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return *all_default;
+  }
+
+  Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+    JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions));
+    extensions_ = *extensions;
+    if (*extensions != 0) {
+      JXL_ASSERT(pos_after_ext_ == 0);
+      pos_after_ext_ = encoded_bits_;
+      JXL_ASSERT(pos_after_ext_ != 0);  // visited "extensions"
+    }
+    return true;
+  }
+  // EndExtensions = default.
+
+  Status GetSizes(size_t* JXL_RESTRICT extension_bits,
+                  size_t* JXL_RESTRICT total_bits) {
+    JXL_RETURN_IF_ERROR(ok_);
+    *extension_bits = 0;
+    *total_bits = encoded_bits_;
+    // Only if extension field was nonzero will we encode their sizes.
+    if (pos_after_ext_ != 0) {
+      JXL_ASSERT(encoded_bits_ >= pos_after_ext_);
+      *extension_bits = encoded_bits_ - pos_after_ext_;
+      // Also need to encode *extension_bits and bill it to *total_bits.
+      size_t encoded_bits = 0;
+      ok_ &= U64Coder::CanEncode(*extension_bits, &encoded_bits);
+      *total_bits += encoded_bits;
+
+      // TODO(janwas): support encoding individual extension sizes. We
+      // currently ascribe all bits to the first and send zeros for the
+      // others.
+      for (size_t i = 1; i < hwy::PopCount(extensions_); ++i) {
+        encoded_bits = 0;
+        ok_ &= U64Coder::CanEncode(0, &encoded_bits);
+        *total_bits += encoded_bits;
+      }
+    }
+    return true;
+  }
+
+  const char* VisitorName() override { return "CanEncodeVisitor"; }
+
+ private:
+  const bool print_sizes_;
+  bool ok_ = true;
+  size_t encoded_bits_ = 0;
+  uint64_t extensions_ = 0;
+  // Snapshot of encoded_bits_ after visiting the extension field, but NOT
+  // including the hidden extension sizes.
+  uint64_t pos_after_ext_ = 0;
+};
+
+class WriteVisitor : public VisitorBase {
+ public:
+  WriteVisitor(const size_t extension_bits, BitWriter* JXL_RESTRICT writer)
+      : extension_bits_(extension_bits), writer_(writer) {}
+
+  Status Bits(const size_t bits, const uint32_t /*default_value*/,
+              uint32_t* JXL_RESTRICT value) override {
+    ok_ &= BitsCoder::Write(bits, *value, writer_);
+    return true;
+  }
+  Status U32(const U32Enc enc, const uint32_t /*default_value*/,
+             uint32_t* JXL_RESTRICT value) override {
+    ok_ &= U32Coder::Write(enc, *value, writer_);
+    return true;
+  }
+
+  Status U64(const uint64_t /*default_value*/,
+             uint64_t* JXL_RESTRICT value) override {
+    ok_ &= U64Coder::Write(*value, writer_);
+    return true;
+  }
+
+  Status F16(const float /*default_value*/,
+             float* JXL_RESTRICT value) override {
+    ok_ &= F16Coder::Write(*value, writer_);
+    return true;
+  }
+
+  Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+    JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions));
+    if (*extensions == 0) {
+      JXL_ASSERT(extension_bits_ == 0);
+      return true;
+    }
+    // TODO(janwas): extend API to pass in array of extension_bits, one per
+    // extension. We currently ascribe all bits to the first extension, but
+    // this is only an encoder limitation. NOTE: extension_bits_ can be zero
+    // if an extension does not require any additional fields.
+    ok_ &= U64Coder::Write(extension_bits_, writer_);
+    // For each nonzero bit except the lowest/first (already written):
+    for (uint64_t remaining_extensions = *extensions & (*extensions - 1);
+         remaining_extensions != 0;
+         remaining_extensions &= remaining_extensions - 1) {
+      ok_ &= U64Coder::Write(0, writer_);
+    }
+    return true;
+  }
+  // EndExtensions = default.
+
+  Status OK() const { return ok_; }
+
+  const char* VisitorName() override { return "WriteVisitor"; }
+
+ private:
+  const size_t extension_bits_;
+  BitWriter* JXL_RESTRICT writer_;
+  bool ok_ = true;
+};
+
+}  // namespace
+
+void Bundle::Init(Fields* fields) {
+  InitVisitor visitor;
+  if (!visitor.Visit(fields, PrintVisitors() ? "-- Init\n" : "")) {
+    JXL_ABORT("Init should never fail");
+  }
+}
+void Bundle::SetDefault(Fields* fields) {
+  SetDefaultVisitor visitor;
+  if (!visitor.Visit(fields, PrintVisitors() ? "-- SetDefault\n" : "")) {
+    JXL_ABORT("SetDefault should never fail");
+  }
+}
+bool Bundle::AllDefault(const Fields& fields) {
+  AllDefaultVisitor visitor(/*print_all_default=*/PrintAllDefault());
+  const char* name =
+      (PrintVisitors() || PrintAllDefault()) ? "[[AllDefault\n" : "";
+  if (!visitor.VisitConst(fields, name)) {
+    JXL_ABORT("AllDefault should never fail");
+  }
+
+  if (PrintAllDefault()) printf("  %d]]\n", visitor.AllDefault());
+  return visitor.AllDefault();
+}
+size_t Bundle::MaxBits(const Fields& fields) {
+  MaxBitsVisitor visitor;
+#if JXL_ENABLE_ASSERT
+  Status ret =
+#else
+  (void)
+#endif  // JXL_ENABLE_ASSERT
+      visitor.VisitConst(fields, PrintVisitors() ? "-- MaxBits\n" : "");
+  JXL_ASSERT(ret);
+  return visitor.MaxBits();
+}
+Status Bundle::CanEncode(const Fields& fields, size_t* extension_bits,
+                         size_t* total_bits) {
+  CanEncodeVisitor visitor(/*print_sizes=*/PrintSizes());
+  const char* name = (PrintVisitors() || PrintSizes()) ? "[[CanEncode\n" : "";
+  JXL_QUIET_RETURN_IF_ERROR(visitor.VisitConst(fields, name));
+  JXL_QUIET_RETURN_IF_ERROR(visitor.GetSizes(extension_bits, total_bits));
+  if (PrintSizes()) printf("  %zu]]\n", *total_bits);
+  return true;
+}
+Status Bundle::Read(BitReader* reader, Fields* fields) {
+  ReadVisitor visitor(reader, /*print_read=*/PrintRead());
+  JXL_RETURN_IF_ERROR(
+      visitor.Visit(fields, PrintVisitors() ? "-- Read\n" : ""));
+  return visitor.OK();
+}
+bool Bundle::CanRead(BitReader* reader, Fields* fields) {
+  ReadVisitor visitor(reader, /*print_read=*/PrintRead());
+  Status status = visitor.Visit(fields, PrintVisitors() ? "-- Read\n" : "");
+  // We are only checking here whether there are enough bytes. We still return
+  // true for other errors because it means there are enough bytes to determine
+  // there's an error. Use Read() to determine which error it is.
+  return status.code() != StatusCode::kNotEnoughBytes;
+}
+Status Bundle::Write(const Fields& fields, BitWriter* writer, size_t layer,
+                     AuxOut* aux_out) {
+  size_t extension_bits, total_bits;
+  JXL_RETURN_IF_ERROR(CanEncode(fields, &extension_bits, &total_bits));
+
+  BitWriter::Allotment allotment(writer, total_bits);
+  WriteVisitor visitor(extension_bits, writer);
+  JXL_RETURN_IF_ERROR(
+      visitor.VisitConst(fields, PrintVisitors() ? "-- Write\n" : ""));
+  JXL_RETURN_IF_ERROR(visitor.OK());
+  ReclaimAndCharge(writer, &allotment, layer, aux_out);
+  return true;
+}
+
+size_t U32Coder::MaxEncodedBits(const U32Enc enc) {
+  size_t extra_bits = 0;
+  for (uint32_t selector = 0; selector < 4; ++selector) {
+    const U32Distr d = enc.GetDistr(selector);
+    if (d.IsDirect()) {
+      continue;
+    } else {
+      extra_bits = std::max<size_t>(extra_bits, d.ExtraBits());
+    }
+  }
+  return 2 + extra_bits;
+}
+
+Status U32Coder::CanEncode(const U32Enc enc, const uint32_t value,
+                           size_t* JXL_RESTRICT encoded_bits) {
+  uint32_t selector;
+  size_t total_bits;
+  const Status ok = ChooseSelector(enc, value, &selector, &total_bits);
+  *encoded_bits = ok ? total_bits : 0;
+  return ok;
+}
+
+uint32_t U32Coder::Read(const U32Enc enc, BitReader* JXL_RESTRICT reader) {
+  const uint32_t selector = reader->ReadFixedBits<2>();
+  const U32Distr d = enc.GetDistr(selector);
+  if (d.IsDirect()) {
+    return d.Direct();
+  } else {
+    return reader->ReadBits(d.ExtraBits()) + d.Offset();
+  }
+}
+
+// Returns false if the value is too large to encode.
+Status U32Coder::Write(const U32Enc enc, const uint32_t value,
+                       BitWriter* JXL_RESTRICT writer) {
+  uint32_t selector;
+  size_t total_bits;
+  JXL_RETURN_IF_ERROR(ChooseSelector(enc, value, &selector, &total_bits));
+
+  writer->Write(2, selector);
+
+  const U32Distr d = enc.GetDistr(selector);
+  if (!d.IsDirect()) {  // Nothing more to write for direct encoding
+    const uint32_t offset = d.Offset();
+    JXL_ASSERT(value >= offset);
+    writer->Write(total_bits - 2, value - offset);
+  }
+
+  return true;
+}
+
+Status U32Coder::ChooseSelector(const U32Enc enc, const uint32_t value,
+                                uint32_t* JXL_RESTRICT selector,
+                                size_t* JXL_RESTRICT total_bits) {
+#if JXL_ENABLE_ASSERT
+  const size_t bits_required = 32 - Num0BitsAboveMS1Bit(value);
+#endif  // JXL_ENABLE_ASSERT
+  JXL_ASSERT(bits_required <= 32);
+
+  *selector = 0;
+  *total_bits = 0;
+
+  // It is difficult to verify whether Dist32Byte are sorted, so check all
+  // selectors and keep the one with the fewest total_bits.
+  *total_bits = 64;  // more than any valid encoding
+  for (uint32_t s = 0; s < 4; ++s) {
+    const U32Distr d = enc.GetDistr(s);
+    if (d.IsDirect()) {
+      if (d.Direct() == value) {
+        *selector = s;
+        *total_bits = 2;
+        return true;  // Done, direct is always the best possible.
+      }
+      continue;
+    }
+    const size_t extra_bits = d.ExtraBits();
+    const uint32_t offset = d.Offset();
+    if (value < offset || value >= offset + (1ULL << extra_bits)) continue;
+
+    // Better than prior encoding, remember it:
+    if (2 + extra_bits < *total_bits) {
+      *selector = s;
+      *total_bits = 2 + extra_bits;
+    }
+  }
+
+  if (*total_bits == 64) {
+    return JXL_FAILURE("No feasible selector for %u", value);
+  }
+
+  return true;
+}
+
+uint64_t U64Coder::Read(BitReader* JXL_RESTRICT reader) {
+  uint64_t selector = reader->ReadFixedBits<2>();
+  if (selector == 0) {
+    return 0;
+  }
+  if (selector == 1) {
+    return 1 + reader->ReadFixedBits<4>();
+  }
+  if (selector == 2) {
+    return 17 + reader->ReadFixedBits<8>();
+  }
+
+  // selector 3, varint, groups have first 12, then 8, and last 4 bits.
+  uint64_t result = reader->ReadFixedBits<12>();
+
+  uint64_t shift = 12;
+  while (reader->ReadFixedBits<1>()) {
+    if (shift == 60) {
+      result |= static_cast<uint64_t>(reader->ReadFixedBits<4>()) << shift;
+      break;
+    }
+    result |= static_cast<uint64_t>(reader->ReadFixedBits<8>()) << shift;
+    shift += 8;
+  }
+
+  return result;
+}
+
+// Returns false if the value is too large to encode.
+Status U64Coder::Write(uint64_t value, BitWriter* JXL_RESTRICT writer) {
+  if (value == 0) {
+    // Selector: use 0 bits, value 0
+    writer->Write(2, 0);
+  } else if (value <= 16) {
+    // Selector: use 4 bits, value 1..16
+    writer->Write(2, 1);
+    writer->Write(4, value - 1);
+  } else if (value <= 272) {
+    // Selector: use 8 bits, value 17..272
+    writer->Write(2, 2);
+    writer->Write(8, value - 17);
+  } else {
+    // Selector: varint, first a 12-bit group, after that per 8-bit group.
+    writer->Write(2, 3);
+    writer->Write(12, value & 4095);
+    value >>= 12;
+    int shift = 12;
+    while (value > 0 && shift < 60) {
+      // Indicate varint not done
+      writer->Write(1, 1);
+      writer->Write(8, value & 255);
+      value >>= 8;
+      shift += 8;
+    }
+    if (value > 0) {
+      // This only could happen if shift == N - 4.
+      writer->Write(1, 1);
+      writer->Write(4, value & 15);
+      // Implicitly closed sequence, no extra stop bit is required.
+    } else {
+      // Indicate end of varint
+      writer->Write(1, 0);
+    }
+  }
+
+  return true;
+}
+
+// Can always encode, but useful because it also returns bit size.
+Status U64Coder::CanEncode(uint64_t value, size_t* JXL_RESTRICT encoded_bits) {
+  if (value == 0) {
+    *encoded_bits = 2;  // 2 selector bits
+  } else if (value <= 16) {
+    *encoded_bits = 2 + 4;  // 2 selector bits + 4 payload bits
+  } else if (value <= 272) {
+    *encoded_bits = 2 + 8;  // 2 selector bits + 8 payload bits
+  } else {
+    *encoded_bits = 2 + 12;  // 2 selector bits + 12 payload bits
+    value >>= 12;
+    int shift = 12;
+    while (value > 0 && shift < 60) {
+      *encoded_bits += 1 + 8;  // 1 continuation bit + 8 payload bits
+      value >>= 8;
+      shift += 8;
+    }
+    if (value > 0) {
+      // This only could happen if shift == N - 4.
+      *encoded_bits += 1 + 4;  // 1 continuation bit + 4 payload bits
+    } else {
+      *encoded_bits += 1;  // 1 stop bit
+    }
+  }
+
+  return true;
+}
+
+Status F16Coder::Read(BitReader* JXL_RESTRICT reader,
+                      float* JXL_RESTRICT value) {
+  const uint32_t bits16 = reader->ReadFixedBits<16>();
+  const uint32_t sign = bits16 >> 15;
+  const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
+  const uint32_t mantissa = bits16 & 0x3FF;
+
+  if (JXL_UNLIKELY(biased_exp == 31)) {
+    return JXL_FAILURE("F16 infinity or NaN are not supported");
+  }
+
+  // Subnormal or zero
+  if (JXL_UNLIKELY(biased_exp == 0)) {
+    *value = (1.0f / 16384) * (mantissa * (1.0f / 1024));
+    if (sign) *value = -*value;
+    return true;
+  }
+
+  // Normalized: convert the representation directly (faster than ldexp/tables).
+  const uint32_t biased_exp32 = biased_exp + (127 - 15);
+  const uint32_t mantissa32 = mantissa << (23 - 10);
+  const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
+  memcpy(value, &bits32, sizeof(bits32));
+  return true;
+}
+
+Status F16Coder::Write(float value, BitWriter* JXL_RESTRICT writer) {
+  uint32_t bits32;
+  memcpy(&bits32, &value, sizeof(bits32));
+  const uint32_t sign = bits32 >> 31;
+  const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF;
+  const uint32_t mantissa32 = bits32 & 0x7FFFFF;
+
+  const int32_t exp = static_cast<int32_t>(biased_exp32) - 127;
+  if (JXL_UNLIKELY(exp > 15)) {
+    return JXL_FAILURE("Too big to encode, CanEncode should return false");
+  }
+
+  // Tiny or zero => zero.
+  if (exp < -24) {
+    writer->Write(16, 0);
+    return true;
+  }
+
+  uint32_t biased_exp16, mantissa16;
+
+  // exp = [-24, -15] => subnormal
+  if (JXL_UNLIKELY(exp < -14)) {
+    biased_exp16 = 0;
+    const uint32_t sub_exp = static_cast<uint32_t>(-14 - exp);
+    JXL_ASSERT(1 <= sub_exp && sub_exp < 11);
+    mantissa16 = (1 << (10 - sub_exp)) + (mantissa32 >> (13 + sub_exp));
+  } else {
+    // exp = [-14, 15]
+    biased_exp16 = static_cast<uint32_t>(exp + 15);
+    JXL_ASSERT(1 <= biased_exp16 && biased_exp16 < 31);
+    mantissa16 = mantissa32 >> 13;
+  }
+
+  JXL_ASSERT(mantissa16 < 1024);
+  const uint32_t bits16 = (sign << 15) | (biased_exp16 << 10) | mantissa16;
+  JXL_ASSERT(bits16 < 0x10000);
+  writer->Write(16, bits16);
+  return true;
+}
+
+Status F16Coder::CanEncode(float value, size_t* JXL_RESTRICT encoded_bits) {
+  *encoded_bits = MaxEncodedBits();
+  if (std::isnan(value) || std::isinf(value)) {
+    return JXL_FAILURE("Should not attempt to store NaN and infinity");
+  }
+  return std::abs(value) <= 65504.0f;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fields.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fields.h
new file mode 100644
index 0000000000..244b96ff73
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fields.h
@@ -0,0 +1,300 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FIELDS_H_
+#define LIB_JXL_FIELDS_H_
+
+// Forward/backward-compatible 'bundles' with auto-serialized 'fields'.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cinttypes>
+#include <cmath>  // abs
+#include <cstdarg>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// Integer coders: BitsCoder (raw), U32Coder (table), U64Coder (varint).
+
+// Reads/writes a given (fixed) number of bits <= 32.
+class BitsCoder {
+ public:
+  static size_t MaxEncodedBits(const size_t bits) { return bits; }
+
+  static Status CanEncode(const size_t bits, const uint32_t value,
+                          size_t* JXL_RESTRICT encoded_bits) {
+    *encoded_bits = bits;
+    if (value >= (1ULL << bits)) {
+      return JXL_FAILURE("Value %u too large for %zu bits", value, bits);
+    }
+    return true;
+  }
+
+  static uint32_t Read(const size_t bits, BitReader* JXL_RESTRICT reader) {
+    return reader->ReadBits(bits);
+  }
+
+  // Returns false if the value is too large to encode.
+  static Status Write(const size_t bits, const uint32_t value,
+                      BitWriter* JXL_RESTRICT writer) {
+    if (value >= (1ULL << bits)) {
+      return JXL_FAILURE("Value %d too large to encode in %zu bits", value,
+                         bits);
+    }
+    writer->Write(bits, value);
+    return true;
+  }
+};
+
+// Encodes u32 using a lookup table and/or extra bits, governed by a per-field
+// encoding `enc` which consists of four distributions `d` chosen via a 2-bit
+// selector (least significant = 0). Each d may have two modes:
+// - direct: if d.IsDirect(), the value is d.Direct();
+// - offset: the value is derived from d.ExtraBits() extra bits plus d.Offset();
+// This encoding is denser than Exp-Golomb or Gamma codes when both small and
+// large values occur.
+//
+// Examples:
+// Direct: U32Enc(Val(8), Val(16), Val(32), Bits(6)), value 32 => 10b.
+// Offset: U32Enc(Val(0), BitsOffset(1, 1), BitsOffset(2, 3), BitsOffset(8, 8))
+//   defines the following prefix code:
+//   00 -> 0
+//   01x -> 1..2
+//   10xx -> 3..7
+//   11xxxxxxxx -> 8..263
+class U32Coder {
+ public:
+  static size_t MaxEncodedBits(U32Enc enc);
+  static Status CanEncode(U32Enc enc, uint32_t value,
+                          size_t* JXL_RESTRICT encoded_bits);
+  static uint32_t Read(U32Enc enc, BitReader* JXL_RESTRICT reader);
+
+  // Returns false if the value is too large to encode.
+  static Status Write(U32Enc enc, uint32_t value,
+                      BitWriter* JXL_RESTRICT writer);
+
+ private:
+  static Status ChooseSelector(U32Enc enc, uint32_t value,
+                               uint32_t* JXL_RESTRICT selector,
+                               size_t* JXL_RESTRICT total_bits);
+};
+
+// Encodes 64-bit unsigned integers with a fixed distribution, taking 2 bits
+// to encode 0, 6 bits to encode 1 to 16, 10 bits to encode 17 to 272, 15 bits
+// to encode up to 4095, and on the order of log2(value) * 1.125 bits for
+// larger values.
+class U64Coder {
+ public:
+  static constexpr size_t MaxEncodedBits() {
+    return 2 + 12 + 6 * (8 + 1) + (4 + 1);
+  }
+
+  static uint64_t Read(BitReader* JXL_RESTRICT reader);
+
+  // Returns false if the value is too large to encode.
+  static Status Write(uint64_t value, BitWriter* JXL_RESTRICT writer);
+
+  // Can always encode, but useful because it also returns bit size.
+  static Status CanEncode(uint64_t value, size_t* JXL_RESTRICT encoded_bits);
+};
+
+// IEEE 754 half-precision (binary16). Refuses to read/write NaN/Inf.
+class F16Coder {
+ public:
+  static constexpr size_t MaxEncodedBits() { return 16; }
+
+  // Returns false if the bit representation is NaN or infinity
+  static Status Read(BitReader* JXL_RESTRICT reader, float* JXL_RESTRICT value);
+
+  // Returns false if the value is too large to encode.
+  static Status Write(float value, BitWriter* JXL_RESTRICT writer);
+  static Status CanEncode(float value, size_t* JXL_RESTRICT encoded_bits);
+};
+
+// A "bundle" is a forward- and backward compatible collection of fields.
+// They are used for SizeHeader/FrameHeader/GroupHeader. Bundles can be
+// extended by appending(!) fields. Optional fields may be omitted from the
+// bitstream by conditionally visiting them. When reading new bitstreams with
+// old code, we skip unknown fields at the end of the bundle. This requires
+// storing the amount of extra appended bits, and that fields are visited in
+// chronological order of being added to the format, because old decoders
+// cannot skip some future fields and resume reading old fields. Similarly,
+// new readers query bits in an "extensions" field to skip (groups of) fields
+// not present in old bitstreams. Note that each bundle must include an
+// "extensions" field prior to freezing the format, otherwise it cannot be
+// extended.
+//
+// To ensure interoperability, there will be no opaque fields.
+//
+// HOWTO:
+// - basic usage: define a struct with member variables ("fields") and a
+//   VisitFields(v) member function that calls v->U32/Bool etc. for each
+//   field, specifying their default values. The ctor must call
+//   Bundle::Init(this).
+//
+// - print a trace of visitors: ensure each bundle has a static Name() member
+//   function, and change Bundle::Print* to return true.
+//
+// - optional fields: in VisitFields, add if (v->Conditional(your_condition))
+//   { v->Bool(default, &field); }. This prevents reading/writing field
+//   if !your_condition, which is typically computed from a prior field.
+//   WARNING: to ensure all fields are initialized, do not add an else branch;
+//   instead add another if (v->Conditional(!your_condition)).
+//
+// - repeated fields: for dynamic sizes, use e.g. std::vector and in
+//   VisitFields, if (v->IsReading()) field.resize(size) before accessing field.
+//   For static or bounded sizes, use an array or std::array. In all cases,
+//   simply visit each array element as if it were a normal field.
+//
+// - nested bundles: add a bundle as a normal field and in VisitFields call
+//   JXL_RETURN_IF_ERROR(v->VisitNested(&nested));
+//
+// - allow future extensions: define a "uint64_t extensions" field and call
+//   v->BeginExtensions(&extensions) after visiting all non-extension fields,
+//   and `return v->EndExtensions();` after the last extension field.
+//
+// - encode an entire bundle in one bit if ALL its fields equal their default
+//   values: add a "mutable bool all_default" field and as the first visitor:
+//   if (v->AllDefault(*this, &all_default)) {
+//     // Overwrite all serialized fields, but not any nonserialized_*.
+//     v->SetDefault(this);
+//     return true;
+//   }
+//   Note: if extensions are present, AllDefault() == false.
+
+class Bundle {
+ public:
+  static constexpr size_t kMaxExtensions = 64;  // bits in u64
+
+  // Print the type of each visitor called.
+  static constexpr bool PrintVisitors() { return false; }
+  // Print default value for each field and AllDefault result.
+  static constexpr bool PrintAllDefault() { return false; }
+  // Print values decoded for each field in Read.
+  static constexpr bool PrintRead() { return false; }
+  // Print size for each field and CanEncode total_bits.
+  static constexpr bool PrintSizes() { return false; }
+
+  // Initializes fields to the default values. It is not recursive to nested
+  // fields, this function is intended to be called in the constructors so
+  // each nested field will already Init itself.
+  static void Init(Fields* JXL_RESTRICT fields);
+
+  // Similar to Init, but recursive to nested fields.
+  static void SetDefault(Fields* JXL_RESTRICT fields);
+
+  // Returns whether ALL fields (including `extensions`, if present) are equal
+  // to their default value.
+  static bool AllDefault(const Fields& fields);
+
+  // Returns max number of bits required to encode a T.
+  static size_t MaxBits(const Fields& fields);
+
+  // Returns whether a header's fields can all be encoded, i.e. they have a
+  // valid representation. If so, "*total_bits" is the exact number of bits
+  // required. Called by Write.
+  static Status CanEncode(const Fields& fields,
+                          size_t* JXL_RESTRICT extension_bits,
+                          size_t* JXL_RESTRICT total_bits);
+
+  static Status Read(BitReader* reader, Fields* JXL_RESTRICT fields);
+
+  // Returns whether enough bits are available to fully read this bundle using
+  // Read. Also returns true in case of a codestream error (other than not being
+  // large enough): that means enough bits are available to determine there's an
+  // error, use Read to get such error status.
+  // NOTE: this advances the BitReader, a different one pointing back at the
+  // original bit position in the codestream must be created to use Read after
+  // this.
+  static bool CanRead(BitReader* reader, Fields* JXL_RESTRICT fields);
+
+  static Status Write(const Fields& fields, BitWriter* JXL_RESTRICT writer,
+                      size_t layer, AuxOut* aux_out);
+
+ private:
+};
+
+// Different subclasses of Visitor are passed to implementations of Fields
+// throughout their lifetime. Templates used to be used for this but dynamic
+// polymorphism produces more compact executables than template reification did.
+class Visitor {
+ public:
+  virtual ~Visitor() = default;
+  virtual Status Visit(Fields* fields, const char* visitor_name) = 0;
+
+  virtual Status Bool(bool default_value, bool* JXL_RESTRICT value) = 0;
+  virtual Status U32(U32Enc, uint32_t, uint32_t*) = 0;
+
+  // Helper to construct U32Enc from U32Distr.
+  Status U32(const U32Distr d0, const U32Distr d1, const U32Distr d2,
+             const U32Distr d3, const uint32_t default_value,
+             uint32_t* JXL_RESTRICT value) {
+    return U32(U32Enc(d0, d1, d2, d3), default_value, value);
+  }
+
+  template <typename EnumT>
+  Status Enum(const EnumT default_value, EnumT* JXL_RESTRICT value) {
+    uint32_t u32 = static_cast<uint32_t>(*value);
+    // 00 -> 0
+    // 01 -> 1
+    // 10xxxx -> 2..17
+    // 11yyyyyy -> 18..81
+    JXL_RETURN_IF_ERROR(U32(Val(0), Val(1), BitsOffset(4, 2), BitsOffset(6, 18),
+                            static_cast<uint32_t>(default_value), &u32));
+    *value = static_cast<EnumT>(u32);
+    return EnumValid(*value);
+  }
+
+  virtual Status Bits(size_t bits, uint32_t default_value,
+                      uint32_t* JXL_RESTRICT value) = 0;
+  virtual Status U64(uint64_t default_value, uint64_t* JXL_RESTRICT value) = 0;
+  virtual Status F16(float default_value, float* JXL_RESTRICT value) = 0;
+
+  // Returns whether VisitFields should visit some subsequent fields.
+  // "condition" is typically from prior fields, e.g. flags.
+  // Overridden by InitVisitor and MaxBitsVisitor.
+  virtual Status Conditional(bool condition) { return condition; }
+
+  // Overridden by InitVisitor, AllDefaultVisitor and CanEncodeVisitor.
+  virtual Status AllDefault(const Fields& /*fields*/,
+                            bool* JXL_RESTRICT all_default) {
+    JXL_RETURN_IF_ERROR(Bool(true, all_default));
+    return *all_default;
+  }
+
+  virtual void SetDefault(Fields* /*fields*/) {
+    // Do nothing by default, this is overridden by ReadVisitor.
+  }
+
+  // Returns the result of visiting a nested Bundle.
+  // Overridden by InitVisitor.
+  virtual Status VisitNested(Fields* fields) { return Visit(fields, ""); }
+
+  // Overridden by ReadVisitor. Enables dynamically-sized fields.
+  virtual bool IsReading() const { return false; }
+
+  virtual Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) = 0;
+  virtual Status EndExtensions() = 0;
+
+  // For debugging
+  virtual const char* VisitorName() = 0;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_FIELDS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fields_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fields_test.cc
new file mode 100644
index 0000000000..78d372dfb3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/fields_test.cc
@@ -0,0 +1,434 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/fields.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+
+namespace jxl {
+namespace {
+
+// Ensures `value` round-trips and in exactly `expected_bits_written`.
+void TestU32Coder(const uint32_t value, const size_t expected_bits_written) {
+  U32Coder coder;
+  const U32Enc enc(Val(0), Bits(4), Val(0x7FFFFFFF), Bits(32));
+
+  BitWriter writer;
+  BitWriter::Allotment allotment(
+      &writer, RoundUpBitsToByteMultiple(U32Coder::MaxEncodedBits(enc)));
+
+  size_t precheck_pos;
+  EXPECT_TRUE(coder.CanEncode(enc, value, &precheck_pos));
+  EXPECT_EQ(expected_bits_written, precheck_pos);
+
+  EXPECT_TRUE(coder.Write(enc, value, &writer));
+  EXPECT_EQ(expected_bits_written, writer.BitsWritten());
+  writer.ZeroPadToByte();
+  ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+
+  BitReader reader(writer.GetSpan());
+  const uint32_t decoded_value = coder.Read(enc, &reader);
+  EXPECT_EQ(value, decoded_value);
+  EXPECT_TRUE(reader.Close());
+}
+
+TEST(FieldsTest, U32CoderTest) {
+  TestU32Coder(0, 2);
+  TestU32Coder(1, 6);
+  TestU32Coder(15, 6);
+  TestU32Coder(0x7FFFFFFF, 2);
+  TestU32Coder(128, 34);
+  TestU32Coder(0x7FFFFFFEu, 34);
+  TestU32Coder(0x80000000u, 34);
+  TestU32Coder(0xFFFFFFFFu, 34);
+}
+
+void TestU64Coder(const uint64_t value, const size_t expected_bits_written) {
+  U64Coder coder;
+
+  BitWriter writer;
+  BitWriter::Allotment allotment(
+      &writer, RoundUpBitsToByteMultiple(U64Coder::MaxEncodedBits()));
+
+  size_t precheck_pos;
+  EXPECT_TRUE(coder.CanEncode(value, &precheck_pos));
+  EXPECT_EQ(expected_bits_written, precheck_pos);
+
+  EXPECT_TRUE(coder.Write(value, &writer));
+  EXPECT_EQ(expected_bits_written, writer.BitsWritten());
+
+  writer.ZeroPadToByte();
+  ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+
+  BitReader reader(writer.GetSpan());
+  const uint64_t decoded_value = coder.Read(&reader);
+  EXPECT_EQ(value, decoded_value);
+  EXPECT_TRUE(reader.Close());
+}
+
+TEST(FieldsTest, U64CoderTest) {
+  // Values that should take 2 bits (selector 00): 0
+  TestU64Coder(0, 2);
+
+  // Values that should take 6 bits (2 for selector, 4 for value): 1..16
+  TestU64Coder(1, 6);
+  TestU64Coder(2, 6);
+  TestU64Coder(8, 6);
+  TestU64Coder(15, 6);
+  TestU64Coder(16, 6);
+
+  // Values that should take 10 bits (2 for selector, 8 for value): 17..272
+  TestU64Coder(17, 10);
+  TestU64Coder(18, 10);
+  TestU64Coder(100, 10);
+  TestU64Coder(271, 10);
+  TestU64Coder(272, 10);
+
+  // Values that should take 15 bits (2 for selector, 12 for value, 1 for varint
+  // end): (0)..273..4095
+  TestU64Coder(273, 15);
+  TestU64Coder(274, 15);
+  TestU64Coder(1000, 15);
+  TestU64Coder(4094, 15);
+  TestU64Coder(4095, 15);
+
+  // Take 24 bits (of which 20 actual value): (0)..4096..1048575
+  TestU64Coder(4096, 24);
+  TestU64Coder(4097, 24);
+  TestU64Coder(10000, 24);
+  TestU64Coder(1048574, 24);
+  TestU64Coder(1048575, 24);
+
+  // Take 33 bits (of which 28 actual value): (0)..1048576..268435455
+  TestU64Coder(1048576, 33);
+  TestU64Coder(1048577, 33);
+  TestU64Coder(10000000, 33);
+  TestU64Coder(268435454, 33);
+  TestU64Coder(268435455, 33);
+
+  // Take 42 bits (of which 36 actual value): (0)..268435456..68719476735
+  TestU64Coder(268435456ull, 42);
+  TestU64Coder(268435457ull, 42);
+  TestU64Coder(1000000000ull, 42);
+  TestU64Coder(68719476734ull, 42);
+  TestU64Coder(68719476735ull, 42);
+
+  // Take 51 bits (of which 44 actual value): (0)..68719476736..17592186044415
+  TestU64Coder(68719476736ull, 51);
+  TestU64Coder(68719476737ull, 51);
+  TestU64Coder(1000000000000ull, 51);
+  TestU64Coder(17592186044414ull, 51);
+  TestU64Coder(17592186044415ull, 51);
+
+  // Take 60 bits (of which 52 actual value):
+  // (0)..17592186044416..4503599627370495
+  TestU64Coder(17592186044416ull, 60);
+  TestU64Coder(17592186044417ull, 60);
+  TestU64Coder(100000000000000ull, 60);
+  TestU64Coder(4503599627370494ull, 60);
+  TestU64Coder(4503599627370495ull, 60);
+
+  // Take 69 bits (of which 60 actual value):
+  // (0)..4503599627370496..1152921504606846975
+  TestU64Coder(4503599627370496ull, 69);
+  TestU64Coder(4503599627370497ull, 69);
+  TestU64Coder(10000000000000000ull, 69);
+  TestU64Coder(1152921504606846974ull, 69);
+  TestU64Coder(1152921504606846975ull, 69);
+
+  // Take 73 bits (of which 64 actual value):
+  // (0)..1152921504606846976..18446744073709551615
+  TestU64Coder(1152921504606846976ull, 73);
+  TestU64Coder(1152921504606846977ull, 73);
+  TestU64Coder(10000000000000000000ull, 73);
+  TestU64Coder(18446744073709551614ull, 73);
+  TestU64Coder(18446744073709551615ull, 73);
+}
+
+Status TestF16Coder(const float value) {
+  F16Coder coder;
+
+  size_t max_encoded_bits;
+  // It is not a fatal error if it can't be encoded.
+  if (!coder.CanEncode(value, &max_encoded_bits)) return false;
+  EXPECT_EQ(F16Coder::MaxEncodedBits(), max_encoded_bits);
+
+  BitWriter writer;
+  BitWriter::Allotment allotment(&writer,
+                                 RoundUpBitsToByteMultiple(max_encoded_bits));
+
+  EXPECT_TRUE(coder.Write(value, &writer));
+  EXPECT_EQ(F16Coder::MaxEncodedBits(), writer.BitsWritten());
+  writer.ZeroPadToByte();
+  ReclaimAndCharge(&writer, &allotment, 0, nullptr);
+
+  BitReader reader(writer.GetSpan());
+  float decoded_value;
+  EXPECT_TRUE(coder.Read(&reader, &decoded_value));
+  // All values we test can be represented exactly.
+  EXPECT_EQ(value, decoded_value);
+  EXPECT_TRUE(reader.Close());
+  return true;
+}
+
+TEST(FieldsTest, F16CoderTest) {
+  for (float sign : {-1.0f, 1.0f}) {
+    // (anything less than 1E-3 are subnormals)
+    for (float mag : {0.0f, 0.5f, 1.0f, 2.0f, 2.5f, 16.015625f, 1.0f / 4096,
+                      1.0f / 16384, 65504.0f}) {
+      EXPECT_TRUE(TestF16Coder(sign * mag));
+    }
+  }
+
+  // Out of range
+  EXPECT_FALSE(TestF16Coder(65504.01f));
+  EXPECT_FALSE(TestF16Coder(-65505.0f));
+}
+
+// Ensures Read(Write()) returns the same fields.
+TEST(FieldsTest, TestRoundtripSize) {
+  for (int i = 0; i < 8; i++) {
+    SizeHeader size;
+    ASSERT_TRUE(size.Set(123 + 77 * i, 7 + i));
+
+    size_t extension_bits = 999, total_bits = 999;  // Initialize as garbage.
+    ASSERT_TRUE(Bundle::CanEncode(size, &extension_bits, &total_bits));
+    EXPECT_EQ(0, extension_bits);
+
+    BitWriter writer;
+    ASSERT_TRUE(WriteSizeHeader(size, &writer, 0, nullptr));
+    EXPECT_EQ(total_bits, writer.BitsWritten());
+    writer.ZeroPadToByte();
+
+    SizeHeader size2;
+    BitReader reader(writer.GetSpan());
+    ASSERT_TRUE(ReadSizeHeader(&reader, &size2));
+    EXPECT_EQ(total_bits, reader.TotalBitsConsumed());
+    EXPECT_TRUE(reader.Close());
+
+    EXPECT_EQ(size.xsize(), size2.xsize());
+    EXPECT_EQ(size.ysize(), size2.ysize());
+  }
+}
+
+// Ensure all values can be reached by the encoding.
+TEST(FieldsTest, TestCropRect) {
+  CodecMetadata metadata;
+  for (int32_t i = -1000; i < 19000; ++i) {
+    FrameHeader f(&metadata);
+    f.custom_size_or_origin = true;
+    f.frame_origin.x0 = i;
+    f.frame_origin.y0 = i;
+    f.frame_size.xsize = 1000 + i;
+    f.frame_size.ysize = 1000 + i;
+    size_t extension_bits = 0, total_bits = 0;
+    ASSERT_TRUE(Bundle::CanEncode(f, &extension_bits, &total_bits));
+    EXPECT_EQ(0, extension_bits);
+    EXPECT_GE(total_bits, 9);
+  }
+}
+TEST(FieldsTest, TestPreview) {
+  // (div8 cannot represent 4360, but !div8 can go a little higher)
+  for (uint32_t i = 1; i < 4360; ++i) {
+    PreviewHeader p;
+    ASSERT_TRUE(p.Set(i, i));
+    size_t extension_bits = 0, total_bits = 0;
+    ASSERT_TRUE(Bundle::CanEncode(p, &extension_bits, &total_bits));
+    EXPECT_EQ(0, extension_bits);
+    EXPECT_GE(total_bits, 6);
+  }
+}
+
+// Ensures Read(Write()) returns the same fields.
+TEST(FieldsTest, TestRoundtripFrame) {
+  CodecMetadata metadata;
+  FrameHeader h(&metadata);
+  h.extensions = 0x800;
+
+  size_t extension_bits = 999, total_bits = 999;  // Initialize as garbage.
+  ASSERT_TRUE(Bundle::CanEncode(h, &extension_bits, &total_bits));
+  EXPECT_EQ(0, extension_bits);
+  BitWriter writer;
+  ASSERT_TRUE(WriteFrameHeader(h, &writer, nullptr));
+  EXPECT_EQ(total_bits, writer.BitsWritten());
+  writer.ZeroPadToByte();
+
+  FrameHeader h2(&metadata);
+  BitReader reader(writer.GetSpan());
+  ASSERT_TRUE(ReadFrameHeader(&reader, &h2));
+  EXPECT_EQ(total_bits, reader.TotalBitsConsumed());
+  EXPECT_TRUE(reader.Close());
+
+  EXPECT_EQ(h.extensions, h2.extensions);
+  EXPECT_EQ(h.flags, h2.flags);
+}
+
+#ifndef JXL_CRASH_ON_ERROR
+// Ensure out-of-bounds values cause an error.
+TEST(FieldsTest, TestOutOfRange) {
+  SizeHeader h;
+  ASSERT_TRUE(h.Set(0xFFFFFFFFull, 0xFFFFFFFFull));
+  size_t extension_bits = 999, total_bits = 999;  // Initialize as garbage.
+  ASSERT_FALSE(Bundle::CanEncode(h, &extension_bits, &total_bits));
+}
+#endif
+
+struct OldBundle : public Fields {
+  OldBundle() { Bundle::Init(this); }
+  const char* Name() const override { return "OldBundle"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Bits(2), Bits(3), Bits(4), 1, &old_small));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.125f, &old_f));
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Bits(7), Bits(12), Bits(16), Bits(32), 0, &old_large));
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+    return visitor->EndExtensions();
+  }
+
+  uint32_t old_small;
+  float old_f;
+  uint32_t old_large;
+  uint64_t extensions;
+};
+
+struct NewBundle : public Fields {
+  NewBundle() { Bundle::Init(this); }
+  const char* Name() const override { return "NewBundle"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Bits(2), Bits(3), Bits(4), 1, &old_small));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.125f, &old_f));
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Bits(7), Bits(12), Bits(16), Bits(32), 0, &old_large));
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+    if (visitor->Conditional(extensions & 1)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(2), Bits(2), Bits(3), Bits(4), 2, &new_small));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(-2.0f, &new_f));
+    }
+    if (visitor->Conditional(extensions & 2)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Bits(9), Bits(12), Bits(16), Bits(32), 0, &new_large));
+    }
+    return visitor->EndExtensions();
+  }
+
+  uint32_t old_small;
+  float old_f;
+  uint32_t old_large;
+  uint64_t extensions;
+
+  // If extensions & 1
+  uint32_t new_small = 2;
+  float new_f = -2.0f;
+  // If extensions & 2
+  uint32_t new_large = 0;
+};
+
+TEST(FieldsTest, TestNewDecoderOldData) {
+  OldBundle old_bundle;
+  old_bundle.old_large = 123;
+  old_bundle.old_f = 3.75f;
+  old_bundle.extensions = 0;
+
+  // Write to bit stream
+  const size_t kMaxOutBytes = 999;
+  BitWriter writer;
+  // Make sure values are initialized by code under test.
+  size_t extension_bits = 12345, total_bits = 12345;
+  ASSERT_TRUE(Bundle::CanEncode(old_bundle, &extension_bits, &total_bits));
+  ASSERT_LE(total_bits, kMaxOutBytes * kBitsPerByte);
+  EXPECT_EQ(0, extension_bits);
+  AuxOut aux_out;
+  ASSERT_TRUE(Bundle::Write(old_bundle, &writer, kLayerHeader, &aux_out));
+
+  BitWriter::Allotment allotment(&writer,
+                                 kMaxOutBytes * kBitsPerByte - total_bits);
+  writer.Write(20, 0xA55A);  // sentinel
+  writer.ZeroPadToByte();
+  ReclaimAndCharge(&writer, &allotment, kLayerHeader, nullptr);
+
+  ASSERT_LE(writer.GetSpan().size(), kMaxOutBytes);
+  BitReader reader(writer.GetSpan());
+  NewBundle new_bundle;
+  ASSERT_TRUE(Bundle::Read(&reader, &new_bundle));
+  EXPECT_EQ(reader.TotalBitsConsumed(),
+            aux_out.layers[kLayerHeader].total_bits);
+  EXPECT_EQ(reader.ReadBits(20), 0xA55A);
+  EXPECT_TRUE(reader.Close());
+
+  // Old fields are the same in both
+  EXPECT_EQ(old_bundle.extensions, new_bundle.extensions);
+  EXPECT_EQ(old_bundle.old_small, new_bundle.old_small);
+  EXPECT_EQ(old_bundle.old_f, new_bundle.old_f);
+  EXPECT_EQ(old_bundle.old_large, new_bundle.old_large);
+  // New fields match their defaults
+  EXPECT_EQ(2, new_bundle.new_small);
+  EXPECT_EQ(-2.0f, new_bundle.new_f);
+  EXPECT_EQ(0, new_bundle.new_large);
+}
+
+TEST(FieldsTest, TestOldDecoderNewData) {
+  NewBundle new_bundle;
+  new_bundle.old_large = 123;
+  new_bundle.extensions = 3;
+  new_bundle.new_f = 999.0f;
+  new_bundle.new_large = 456;
+
+  // Write to bit stream
+  constexpr size_t kMaxOutBytes = 999;
+  BitWriter writer;
+  // Make sure values are initialized by code under test.
+  size_t extension_bits = 12345, total_bits = 12345;
+  ASSERT_TRUE(Bundle::CanEncode(new_bundle, &extension_bits, &total_bits));
+  EXPECT_NE(0, extension_bits);
+  AuxOut aux_out;
+  ASSERT_TRUE(Bundle::Write(new_bundle, &writer, kLayerHeader, &aux_out));
+  ASSERT_LE(aux_out.layers[kLayerHeader].total_bits,
+            kMaxOutBytes * kBitsPerByte);
+
+  BitWriter::Allotment allotment(
+      &writer,
+      kMaxOutBytes * kBitsPerByte - aux_out.layers[kLayerHeader].total_bits);
+  // Ensure Read skips the additional fields
+  writer.Write(20, 0xA55A);  // sentinel
+  writer.ZeroPadToByte();
+  ReclaimAndCharge(&writer, &allotment, kLayerHeader, nullptr);
+
+  BitReader reader(writer.GetSpan());
+  OldBundle old_bundle;
+  ASSERT_TRUE(Bundle::Read(&reader, &old_bundle));
+  EXPECT_EQ(reader.TotalBitsConsumed(),
+            aux_out.layers[kLayerHeader].total_bits);
+  EXPECT_EQ(reader.ReadBits(20), 0xA55A);
+  EXPECT_TRUE(reader.Close());
+
+  // Old fields are the same in both
+  EXPECT_EQ(new_bundle.extensions, old_bundle.extensions);
+  EXPECT_EQ(new_bundle.old_small, old_bundle.old_small);
+  EXPECT_EQ(new_bundle.old_f, old_bundle.old_f);
+  EXPECT_EQ(new_bundle.old_large, old_bundle.old_large);
+  // (Can't check new fields because old decoder doesn't know about them)
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc
new file mode 100644
index 0000000000..9cb62c1e94
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters.cc
@@ -0,0 +1,112 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/filters.h"
+
+#include <cmath>
+
+#include "lib/jxl/base/profiler.h"
+
+namespace jxl {
+
+Status FilterWeights::Init(const LoopFilter& lf,
+                           const FrameDimensions& frame_dim) {
+  if (lf.epf_iters > 0) {
+    sigma = ImageF(frame_dim.xsize_blocks + 2 * kSigmaPadding,
+                   frame_dim.ysize_blocks + 2 * kSigmaPadding);
+  }
+  if (lf.gab) {
+    JXL_RETURN_IF_ERROR(GaborishWeights(lf));
+  }
+  return true;
+}
+
+Status FilterWeights::GaborishWeights(const LoopFilter& lf) {
+  const float kZeroEpsilon = 1e-6;
+
+  gab_weights[0] = 1;
+  gab_weights[1] = lf.gab_x_weight1;
+  gab_weights[2] = lf.gab_x_weight2;
+  gab_weights[3] = 1;
+  gab_weights[4] = lf.gab_y_weight1;
+  gab_weights[5] = lf.gab_y_weight2;
+  gab_weights[6] = 1;
+  gab_weights[7] = lf.gab_b_weight1;
+  gab_weights[8] = lf.gab_b_weight2;
+  // Normalize
+  for (size_t c = 0; c < 3; c++) {
+    const float div = gab_weights[3 * c] +
+                      4 * (gab_weights[3 * c + 1] + gab_weights[3 * c + 2]);
+    if (std::abs(div) < kZeroEpsilon) {
+      return JXL_FAILURE("Gaborish weights lead to near 0 unnormalized kernel");
+    }
+    const float mul = 1.0f / div;
+    gab_weights[3 * c] *= mul;
+    gab_weights[3 * c + 1] *= mul;
+    gab_weights[3 * c + 2] *= mul;
+  }
+  return true;
+}
+
+void FilterPipeline::ApplyFiltersRow(const LoopFilter& lf,
+                                     const FilterWeights& filter_weights,
+                                     ssize_t y) {
+  PROFILER_ZONE("Gaborish+EPF");
+  JXL_DASSERT(num_filters != 0);  // Must be initialized.
+
+  JXL_ASSERT(y < static_cast<ssize_t>(image_rect.ysize() + lf.Padding()));
+
+  // The minimum value of the center row "y" needed to process the current
+  // filter.
+  ssize_t rows_needed = -static_cast<ssize_t>(lf.Padding());
+
+  // We pass `image_rect.x0() - image_rect.x0() % kBlockDim` as the x0 for
+  // the row_sigma, so to go from an `x` value in the filter to the
+  // corresponding value in row_sigma we use the fact that we mapped
+  // image_rect.x0() in the original image to MaxLeftPadding(image_rect.x0()) in
+  // the input/output rows seen by the filters:
+  // x_in_sigma_row =
+  //    ((x - (image_rect.x0() % kPaddingXRound) + image_rect.x0()) -
+  //     (image_rect.x0() - image_rect.x0() % kBlockDim))) / kBlockDim
+  // x_in_sigma_row =
+  //   x - image_rect.x0() % kPaddingXRound + image_rect.x0() % kBlockDim
+  const size_t sigma_x_offset =
+      image_rect.x0() % kBlockDim -
+      image_rect.x0() % GroupBorderAssigner::kPaddingXRound;
+
+  for (size_t i = 0; i < num_filters; i++) {
+    const FilterStep& filter = filters[i];
+
+    rows_needed += filter.filter_def.border;
+
+    // After this "y" points to the rect row for the center of the filter.
+    y -= filter.filter_def.border;
+    if (y < rows_needed) return;
+
+    // Apply filter to the given region.
+    FilterRows rows(filter.filter_def.border);
+    filter.set_input_rows(filter, &rows, y);
+    filter.set_output_rows(filter, &rows, y);
+
+    // The "y" coordinate used for the sigma image in EPF1. Sigma is padded
+    // with kMaxFilterPadding (or kMaxFilterPadding/kBlockDim rows in sigma)
+    // above and below.
+    const size_t sigma_y = kMaxFilterPadding + image_rect.y0() + y;
+    // The offset to subtract to a "x" value in the filter to obtain the
+    // corresponding x in the sigma row.
+    if (compute_sigma) {
+      rows.SetSigma(filter_weights.sigma, sigma_y,
+                    image_rect.x0() - image_rect.x0() % kBlockDim);
+    }
+
+    filter.filter_def.apply(rows, lf, filter_weights, filter.filter_x0,
+                            filter.filter_x1, sigma_x_offset,
+                            sigma_y % kBlockDim);
+  }
+
+  JXL_DASSERT(rows_needed == 0);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters.h
new file mode 100644
index 0000000000..1dad66fc42
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters.h
@@ -0,0 +1,348 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FILTERS_H_
+#define LIB_JXL_FILTERS_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_group_border.h"
+#include "lib/jxl/filters_internal.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+struct FilterWeights {
+  // Initialize the FilterWeights for the passed LoopFilter and FrameDimensions.
+  // Returns an error if the weights are invalid.
+  Status Init(const LoopFilter& lf, const FrameDimensions& frame_dim);
+
+  // Normalized weights for gaborish, in XYB order, each weight for Manhattan
+  // distance of 0, 1 and 2 respectively.
+  float gab_weights[9];
+
+  // Sigma values for EPF, if enabled.
+  // Note that, for speed reasons, this is actually kInvSigmaNum / sigma.
+  ImageF sigma;
+
+ private:
+  Status GaborishWeights(const LoopFilter& lf);
+};
+
+static constexpr size_t kMaxFinalizeRectPadding = 9;
+
+// Line-based EPF only needs to keep in cache 21 lines of the image, so 256 is
+// sufficient for everything to fit in the L2 cache. We add
+// 2*RoundUpTo(kMaxFinalizeRectPadding, kBlockDim) pixels as we might have up to
+// two extra borders on each side.
+constexpr size_t kApplyImageFeaturesTileDim =
+    256 + 2 * RoundUpToBlockDim(kMaxFinalizeRectPadding);
+
+// The maximum row storage needed by the filtering pipeline. This is the sum of
+// the number of input rows needed by each step.
+constexpr size_t kTotalStorageRows = 7 + 5 + 3;  // max is EPF0 + EPF1 + EPF2.
+
+// The maximum sum of all the borders in a chain of filters.
+constexpr size_t kMaxFilterBorder = 1 * kBlockDim;
+
+// The maximum horizontal filter padding ever needed to apply a chain of
+// filters. Intermediate storage must have at least as much padding on each
+// left and right sides. This value must be a multiple of kBlockDim.
+constexpr size_t kMaxFilterPadding = kMaxFilterBorder + kBlockDim;
+static_assert(kMaxFilterPadding % kBlockDim == 0,
+              "kMaxFilterPadding must be a multiple of block size.");
+
+// Same as FilterBorder and FilterPadding but for Sigma.
+constexpr size_t kSigmaBorder = kMaxFilterBorder / kBlockDim;
+constexpr size_t kSigmaPadding = kMaxFilterPadding / kBlockDim;
+
+// Utility struct to define input/output rows of row-based loop filters.
+constexpr size_t kMaxBorderSize = 3;
+struct FilterRows {
+  explicit FilterRows(int border_size) : border_size_(border_size) {
+    JXL_DASSERT(border_size <= static_cast<int>(kMaxBorderSize));
+  }
+
+  JXL_INLINE const float* GetInputRow(int row, size_t c) const {
+    // Check that row is within range.
+    JXL_DASSERT(-border_size_ <= row && row <= border_size_);
+    return rows_in_[c] + offsets_in_[kMaxBorderSize + row];
+  }
+
+  float* GetOutputRow(size_t c) const { return rows_out_[c]; }
+
+  const float* GetSigmaRow() const {
+    JXL_DASSERT(row_sigma_ != nullptr);
+    return row_sigma_;
+  }
+
+  template <typename RowMap>
+  void SetInput(const Image3F& in, size_t y_offset, ssize_t y0, ssize_t x0,
+                ssize_t full_image_y_offset = 0, ssize_t image_ysize = 0) {
+    RowMap row_map(full_image_y_offset, image_ysize);
+    for (size_t c = 0; c < 3; c++) {
+      rows_in_[c] = in.ConstPlaneRow(c, 0);
+    }
+    for (int32_t i = -border_size_; i <= border_size_; i++) {
+      size_t y = row_map(y0 + i);
+      offsets_in_[i + kMaxBorderSize] =
+          static_cast<ssize_t>((y + y_offset) * in.PixelsPerRow()) + x0;
+    }
+  }
+
+  template <typename RowMap>
+  void SetOutput(Image3F* out, size_t y_offset, ssize_t y0, ssize_t x0) {
+    size_t y = RowMap()(y0);
+    for (size_t c = 0; c < 3; c++) {
+      rows_out_[c] = out->PlaneRow(c, y + y_offset) + x0;
+    }
+  }
+
+  // Sets the sigma row for the given y0, x0 input image position. Sigma images
+  // have one pixel per input image block, although they are padded with two
+  // blocks (pixels in sigma) on each one of the four sides. The (x0, y0) values
+  // should include this padding.
+  void SetSigma(const ImageF& sigma, size_t y0, size_t x0) {
+    JXL_DASSERT(x0 % kBlockDim == 0);
+    row_sigma_ = sigma.ConstRow(y0 / kBlockDim) + x0 / kBlockDim;
+  }
+
+ private:
+  // Base pointer to each one of the planes.
+  const float* JXL_RESTRICT rows_in_[3];
+
+  // Offset to the pixel x0 at the different rows. offsets_in_[kMaxBorderSize]
+  // references the center row, regardless of the border_size_. Only the center
+  // row, border_size_ before and border_size_ after are initialized. The offset
+  // is relative to the base pointer in rows_in_.
+  ssize_t offsets_in_[2 * kMaxBorderSize + 1];
+
+  float* JXL_RESTRICT rows_out_[3];
+
+  const float* JXL_RESTRICT row_sigma_{nullptr};
+
+  const int border_size_;
+};
+
+// Definition of a filter. This specifies the function to be used to apply the
+// filter and its row and column padding requirements.
+struct FilterDefinition {
+  // Function to apply the filter to a given row. The filter constant parameters
+  // are passed in LoopFilter lf and filter_weights. `sigma_x_offset` is needed
+  // to offset the `x0` value so that it will cause correct accesses to
+  // rows.GetSigmaRow(): there is just one sigma value per 8 pixels, and if the
+  // image rectangle is not aligned to multiples of 8 pixels, we need to
+  // compensate for the difference between x0 and the image position modulo 8.
+  void (*apply)(const FilterRows& rows, const LoopFilter& lf,
+                const FilterWeights& filter_weights, size_t x0, size_t x1,
+                size_t sigma_x_offset, size_t image_y_mod_8);
+
+  // Number of source image rows and cols before and after an input pixel needed
+  // to compute the output of the filter. For a 3x3 convolution this border will
+  // be only 1.
+  size_t border;
+};
+
+// A chain of filters to be applied to a source image. This instance must be
+// initialized by the FilterPipelineInit() function before it can be used.
+class FilterPipeline {
+ public:
+  FilterPipeline() : FilterPipeline(kApplyImageFeaturesTileDim) {}
+  explicit FilterPipeline(size_t max_rect_xsize)
+      : storage{max_rect_xsize + 2 * kMaxFilterPadding +
+                    GroupBorderAssigner::kPaddingXRound,
+                kTotalStorageRows} {
+#if MEMORY_SANITIZER
+    // The padding of the storage may be used uninitialized since we process
+    // multiple SIMD lanes at a time, aligned to a multiple of lanes.
+    // For example, in a hypothetical 3-step filter process where all filters
+    // use 1 pixel border the first filter needs to process 2 pixels more on
+    // each side than the requested rect.x0(), rect.xsize(), while the second
+    // filter needs to process 1 more pixel on each side, however for
+    // performance reasons both will process Lanes(df) more pixels on each
+    // side assuming this Lanes(df) value is more than one. In that case the
+    // second filter will be using one pixel of uninitialized data to generate
+    // an output pixel that won't affect the final output but may cause msan
+    // failures. For this reason we initialize the padding region.
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t y = 0; y < storage.ysize(); y++) {
+        float* row = storage.PlaneRow(c, y);
+        std::fill(row, row + kMaxFilterPadding, msan::kSanitizerSentinel);
+        std::fill(row + storage.xsize() - kMaxFilterPadding,
+                  row + storage.xsize(), msan::kSanitizerSentinel);
+      }
+    }
+#endif  // MEMORY_SANITIZER
+  }
+
+  FilterPipeline(const FilterPipeline&) = delete;
+  FilterPipeline(FilterPipeline&&) = default;
+
+  // Apply the filter chain to a given row. To apply the filter chain to a whole
+  // image this must be called for `image_rect.ysize() + 2 * total_border`
+  // values of `y`, in increasing order, starting from `y = -total_border`.
+  // `image_rect` is the value passed to FilterPipelineInit().
+  void ApplyFiltersRow(const LoopFilter& lf,
+                       const FilterWeights& filter_weights, ssize_t y);
+
+  struct FilterStep {
+    // We don't map self.input_rect.x0() directly to kMaxFilterPadding in
+    // input/output row since they might have a different alignment, instead we
+    // keep the alignment modulo kPaddingXRound.
+    static size_t MaxLeftPadding(size_t image_rect_x0) {
+      return kMaxFilterPadding +
+             image_rect_x0 % GroupBorderAssigner::kPaddingXRound;
+    }
+
+    // Sets the input of the filter step as an image region.
+    void SetInput(const Image3F* im_input, const Rect& input_rect,
+                  const Rect& image_rect, size_t image_ysize) {
+      input = im_input;
+      this->input_rect = input_rect;
+      this->image_rect = image_rect;
+      this->image_ysize = image_ysize;
+      JXL_DASSERT(SameSize(input_rect, image_rect));
+      set_input_rows = [](const FilterStep& self, FilterRows* rows,
+                          ssize_t y0) {
+        ssize_t full_image_y_offset =
+            static_cast<ssize_t>(self.image_rect.y0()) -
+            static_cast<ssize_t>(self.input_rect.y0());
+        rows->SetInput<RowMapMirror>(*(self.input), 0,
+                                     self.input_rect.y0() + y0,
+                                     self.input_rect.x0() - kMaxFilterPadding,
+                                     full_image_y_offset, self.image_ysize);
+        rows->SetInput<RowMapMirror>(
+            *(self.input), 0, self.input_rect.y0() + y0,
+            self.input_rect.x0() - MaxLeftPadding(self.input_rect.x0()),
+            full_image_y_offset, self.image_ysize);
+      };
+    }
+
+    // Sets the input of the filter step as the temporary cyclic storage with
+    // num_rows rows. The value image_rect.x0() during application will be
+    // mapped to "kMaxFilterPadding + alignment" regardless of the rect being
+    // processed.
+    template <size_t num_rows>
+    void SetInputCyclicStorage(const Image3F* storage, size_t offset_rows) {
+      input = storage;
+      input_y_offset = offset_rows;
+      set_input_rows = [](const FilterStep& self, FilterRows* rows,
+                          ssize_t y0) {
+        rows->SetInput<RowMapMod<num_rows>>(*(self.input), self.input_y_offset,
+                                            y0, 0);
+      };
+    }
+
+    // Sets the output of the filter step as the temporary cyclic storage with
+    // num_rows rows. The value image_rect.x0() during application will be
+    // mapped to "kMaxFilterPadding + alignment" regardless of the rect being
+    // processed.
+    template <size_t num_rows>
+    void SetOutputCyclicStorage(Image3F* storage, size_t offset_rows) {
+      output = storage;
+      output_y_offset = offset_rows;
+      set_output_rows = [](const FilterStep& self, FilterRows* rows,
+                           ssize_t y0) {
+        rows->SetOutput<RowMapMod<num_rows>>(self.output, self.output_y_offset,
+                                             y0, 0);
+      };
+    }
+
+    // Set the output of the filter step as the output image. The value
+    // rect.x0() will be mapped to the same value in the output image.
+    void SetOutput(Image3F* im_output, const Rect& output_rect) {
+      output = im_output;
+      this->output_rect = output_rect;
+      set_output_rows = [](const FilterStep& self, FilterRows* rows,
+                           ssize_t y0) {
+        rows->SetOutput<RowMapId>(self.output, 0, self.output_rect.y0() + y0,
+                                  static_cast<ssize_t>(self.output_rect.x0()) -
+                                      MaxLeftPadding(self.output_rect.x0()));
+      };
+    }
+
+    // The input and output image buffers for the current filter step. Note that
+    // the rows used from these images depends on the module used in
+    // set_input_rows and set_output_rows functions.
+    const Image3F* input;
+    size_t input_y_offset = 0;
+    Image3F* output;
+    size_t output_y_offset = 0;
+
+    // Input/output rect for the first/last steps of the filter.
+    Rect input_rect;
+    Rect output_rect;
+
+    // Information to properly do RowMapMirror().
+    Rect image_rect;
+    size_t image_ysize;
+
+    // Functions that compute the list of rows needed to process a region for
+    // the given row and starting column.
+    void (*set_input_rows)(const FilterStep&, FilterRows* rows, ssize_t y0);
+    void (*set_output_rows)(const FilterStep&, FilterRows* rows, ssize_t y0);
+
+    // Actual filter descriptor.
+    FilterDefinition filter_def;
+
+    // Range of output pixels of the step. The filter [x0, x1) range is always
+    // a multiple of Lanes(df) and is large enough to contain the input and
+    // border needed by the next stages, but values outside that range may be
+    // undefined values. Coordinates are relative to the FilterRows pointers.
+    size_t filter_x0, filter_x1;
+
+    // Number of extra horizontal pixels needed on each side of the output of
+    // this filter to produce the requested rect at the end of the chain. This
+    // value is always 0 for the last filter of the chain but it depends on the
+    // actual filter chain used in other cases.
+    size_t output_col_border;
+  };
+
+  template <size_t border>
+  void AddStep(const FilterDefinition& filter_def) {
+    JXL_DASSERT(num_filters < kMaxFilters);
+    filters[num_filters].filter_def = filter_def;
+
+    if (num_filters > 0) {
+      // If it is not the first step we need to set the previous step output to
+      // a portion of the cyclic storage. We only need as many rows as the
+      // input of the current stage.
+      constexpr size_t num_rows = 2 * border + 1;
+      filters[num_filters - 1].SetOutputCyclicStorage<num_rows>(
+          &storage, storage_rows_used);
+      filters[num_filters].SetInputCyclicStorage<num_rows>(&storage,
+                                                           storage_rows_used);
+      storage_rows_used += num_rows;
+      JXL_DASSERT(storage_rows_used <= kTotalStorageRows);
+    }
+    num_filters++;
+  }
+
+  // Tile storage for ApplyImageFeatures steps. Different groups of rows of this
+  // image are used for the intermediate steps.
+  Image3F storage;
+  size_t storage_rows_used = 0;
+
+  static const size_t kMaxFilters = 4;
+  FilterStep filters[kMaxFilters];
+  size_t num_filters = 0;
+
+  // Whether we need to compute the sigma_row_ during application.
+  bool compute_sigma = false;
+
+  // Rect to be processed in the image coordinates. This doesn't include any
+  // padding needed to produce the output.
+  Rect image_rect;
+
+  // The total border needed to process this pipeline.
+  size_t total_border = 0;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_FILTERS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters_internal.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters_internal.h
new file mode 100644
index 0000000000..4ad90faaf2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters_internal.h
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FILTERS_INTERNAL_H_
+#define LIB_JXL_FILTERS_INTERNAL_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+// Maps a row to the range [0, image_ysize) mirroring it when outside the [0,
+// image_ysize) range. The input row is offset by `full_image_y_offset`, i.e.
+// row `y` corresponds to row `y + full_image_y_offset` in the full frame.
+struct RowMapMirror {
+  RowMapMirror(ssize_t full_image_y_offset, size_t image_ysize)
+      : full_image_y_offset_(full_image_y_offset), image_ysize_(image_ysize) {}
+  size_t operator()(ssize_t y) {
+    return Mirror(y + full_image_y_offset_, image_ysize_) -
+           full_image_y_offset_;
+  }
+  ssize_t full_image_y_offset_;
+  size_t image_ysize_;
+};
+
+// Maps a row in the range [-16, \inf) to a row number in the range [0, m) using
+// the modulo operation.
+template <size_t m>
+struct RowMapMod {
+  RowMapMod() = default;
+  RowMapMod(ssize_t /*full_image_y_offset*/, size_t /*image_ysize*/) {}
+  size_t operator()(ssize_t y) {
+    JXL_DASSERT(y >= -16);
+    // The `m > 16 ? m : 16 * m` is evaluated at compile time and is a multiple
+    // of m of at least 16. This is to make sure that the left operand is
+    // positive.
+    return static_cast<size_t>(y + (m > 16 ? m : 16 * m)) % m;
+  }
+};
+
+// Identity mapping. Maps a row in the range [0, ysize) to the same value.
+struct RowMapId {
+  size_t operator()(ssize_t y) {
+    JXL_DASSERT(y >= 0);
+    return y;
+  }
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_FILTERS_INTERNAL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters_internal_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters_internal_test.cc
new file mode 100644
index 0000000000..c47269d194
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/filters_internal_test.cc
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/filters_internal.h"
+
+#include "gtest/gtest.h"
+
+namespace jxl {
+
+class FiltersInternalTest : public ::testing::Test {};
+
+// Test the mping of rows using RowMapMod.
+TEST(FiltersInternalTest, RowMapModTest) {
+  RowMapMod<5> m;
+  // Identity part:
+  EXPECT_EQ(0, m(0));
+  EXPECT_EQ(4, m(4));
+
+  // Larger than the module work.
+  EXPECT_EQ(0, m(5));
+  EXPECT_EQ(1, m(11));
+
+  // Smaller than 0 up to a block.
+  EXPECT_EQ(4, m(-1));
+  EXPECT_EQ(2, m(-8));
+}
+
+// Test the implementation for mirroring of rows.
+TEST(FiltersInternalTest, RowMapMirrorTest) {
+  RowMapMirror m(0, 10);  // Image size of 10 rows.
+
+  EXPECT_EQ(2, m(-3));
+  EXPECT_EQ(1, m(-2));
+  EXPECT_EQ(0, m(-1));
+
+  EXPECT_EQ(0, m(0));
+  EXPECT_EQ(9, m(9));
+
+  EXPECT_EQ(9, m(10));
+  EXPECT_EQ(8, m(11));
+  EXPECT_EQ(7, m(12));
+
+  // It mirrors the rows to infinity.
+  EXPECT_EQ(1, m(21));
+  EXPECT_EQ(1, m(41));
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc
new file mode 100644
index 0000000000..bee1070350
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.cc
@@ -0,0 +1,376 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/frame_header.h"
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+constexpr uint8_t YCbCrChromaSubsampling::kHShift[];
+constexpr uint8_t YCbCrChromaSubsampling::kVShift[];
+
+static Status VisitBlendMode(Visitor* JXL_RESTRICT visitor,
+                             BlendMode default_value, BlendMode* blend_mode) {
+  uint32_t encoded = static_cast<uint32_t>(*blend_mode);
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+      Val(static_cast<uint32_t>(BlendMode::kReplace)),
+      Val(static_cast<uint32_t>(BlendMode::kAdd)),
+      Val(static_cast<uint32_t>(BlendMode::kBlend)), BitsOffset(2, 3),
+      static_cast<uint32_t>(default_value), &encoded));
+  if (encoded > 4) {
+    return JXL_FAILURE("Invalid blend_mode");
+  }
+  *blend_mode = static_cast<BlendMode>(encoded);
+  return true;
+}
+
+static Status VisitFrameType(Visitor* JXL_RESTRICT visitor,
+                             FrameType default_value, FrameType* frame_type) {
+  uint32_t encoded = static_cast<uint32_t>(*frame_type);
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->U32(Val(static_cast<uint32_t>(FrameType::kRegularFrame)),
+                   Val(static_cast<uint32_t>(FrameType::kDCFrame)),
+                   Val(static_cast<uint32_t>(FrameType::kReferenceOnly)),
+                   Val(static_cast<uint32_t>(FrameType::kSkipProgressive)),
+                   static_cast<uint32_t>(default_value), &encoded));
+  *frame_type = static_cast<FrameType>(encoded);
+  return true;
+}
+
+BlendingInfo::BlendingInfo() { Bundle::Init(this); }
+
+Status BlendingInfo::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(
+      VisitBlendMode(visitor, BlendMode::kReplace, &mode));
+  if (visitor->Conditional(nonserialized_num_extra_channels > 0 &&
+                           (mode == BlendMode::kBlend ||
+                            mode == BlendMode::kAlphaWeightedAdd))) {
+    // Up to 11 alpha channels for blending.
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val(0), Val(1), Val(2), BitsOffset(3, 3), 0, &alpha_channel));
+    if (visitor->IsReading() &&
+        alpha_channel >= nonserialized_num_extra_channels) {
+      return JXL_FAILURE("Invalid alpha channel for blending");
+    }
+  }
+  if (visitor->Conditional((nonserialized_num_extra_channels > 0 &&
+                            (mode == BlendMode::kBlend ||
+                             mode == BlendMode::kAlphaWeightedAdd)) ||
+                           mode == BlendMode::kMul)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &clamp));
+  }
+  // 'old' frame for blending. Only necessary if this is not a full frame, or
+  // blending is not kReplace.
+  if (visitor->Conditional(mode != BlendMode::kReplace ||
+                           nonserialized_is_partial_frame)) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(0), Val(1), Val(2), Val(3), 0, &source));
+  }
+  return true;
+}
+
+AnimationFrame::AnimationFrame(const CodecMetadata* metadata)
+    : nonserialized_metadata(metadata) {
+  Bundle::Init(this);
+}
+Status AnimationFrame::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->Conditional(nonserialized_metadata != nullptr &&
+                           nonserialized_metadata->m.have_animation)) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(0), Val(1), Bits(8), Bits(32), 0, &duration));
+  }
+
+  if (visitor->Conditional(
+          nonserialized_metadata != nullptr &&
+          nonserialized_metadata->m.animation.have_timecodes)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(32, 0, &timecode));
+  }
+  return true;
+}
+
+YCbCrChromaSubsampling::YCbCrChromaSubsampling() { Bundle::Init(this); }
+Passes::Passes() { Bundle::Init(this); }
+Status Passes::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->U32(Val(1), Val(2), Val(3), BitsOffset(3, 4), 1, &num_passes));
+  JXL_ASSERT(num_passes <= kMaxNumPasses);  // Cannot happen when reading
+
+  if (visitor->Conditional(num_passes != 1)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val(0), Val(1), Val(2), BitsOffset(1, 3), 0, &num_downsample));
+    JXL_ASSERT(num_downsample <= 4);  // 1,2,4,8
+    if (num_downsample > num_passes) {
+      return JXL_FAILURE("num_downsample %u > num_passes %u", num_downsample,
+                         num_passes);
+    }
+
+    for (uint32_t i = 0; i < num_passes - 1; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 0, &shift[i]));
+    }
+    shift[num_passes - 1] = 0;
+
+    for (uint32_t i = 0; i < num_downsample; ++i) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &downsample[i]));
+    }
+    for (uint32_t i = 0; i < num_downsample; ++i) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(0), Val(1), Val(2), Bits(3), 0, &last_pass[i]));
+      if (last_pass[i] >= num_passes) {
+        return JXL_FAILURE("last_pass %u >= num_passes %u", last_pass[i],
+                           num_passes);
+      }
+    }
+  }
+
+  return true;
+}
+FrameHeader::FrameHeader(const CodecMetadata* metadata)
+    : animation_frame(metadata), nonserialized_metadata(metadata) {
+  Bundle::Init(this);
+}
+
+Status ReadFrameHeader(BitReader* JXL_RESTRICT reader,
+                       FrameHeader* JXL_RESTRICT frame) {
+  return Bundle::Read(reader, frame);
+}
+
+Status WriteFrameHeader(const FrameHeader& frame,
+                        BitWriter* JXL_RESTRICT writer, AuxOut* aux_out) {
+  return Bundle::Write(frame, writer, kLayerHeader, aux_out);
+}
+
+Status FrameHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      VisitFrameType(visitor, FrameType::kRegularFrame, &frame_type));
+  if (visitor->IsReading() && nonserialized_is_preview &&
+      frame_type != kRegularFrame) {
+    return JXL_FAILURE("Only regular frame could be a preview");
+  }
+
+  // FrameEncoding.
+  bool is_modular = (encoding == FrameEncoding::kModular);
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &is_modular));
+  encoding = (is_modular ? FrameEncoding::kModular : FrameEncoding::kVarDCT);
+
+  // Flags
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U64(0, &flags));
+
+  // Color transform
+  bool xyb_encoded = nonserialized_metadata == nullptr ||
+                     nonserialized_metadata->m.xyb_encoded;
+
+  bool fp = nonserialized_metadata != nullptr &&
+            nonserialized_metadata->m.bit_depth.floating_point_sample;
+
+  if (xyb_encoded) {
+    if (is_modular && fp) {
+      return JXL_FAILURE(
+          "Floating point samples is not supported with XYB color encoding");
+    }
+    color_transform = ColorTransform::kXYB;
+  } else {
+    // Alternate if kYCbCr.
+    bool alternate = color_transform == ColorTransform::kYCbCr;
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &alternate));
+    color_transform =
+        (alternate ? ColorTransform::kYCbCr : ColorTransform::kNone);
+  }
+
+  // Chroma subsampling for YCbCr, if no DC frame is used.
+  if (visitor->Conditional(color_transform == ColorTransform::kYCbCr &&
+                           ((flags & kUseDcFrame) == 0))) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&chroma_subsampling));
+  }
+
+  size_t num_extra_channels =
+      nonserialized_metadata != nullptr
+          ? nonserialized_metadata->m.extra_channel_info.size()
+          : 0;
+
+  // Upsampling
+  if (visitor->Conditional((flags & kUseDcFrame) == 0)) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &upsampling));
+    if (nonserialized_metadata != nullptr &&
+        visitor->Conditional(num_extra_channels != 0)) {
+      const std::vector<ExtraChannelInfo>& extra_channels =
+          nonserialized_metadata->m.extra_channel_info;
+      extra_channel_upsampling.resize(extra_channels.size(), 1);
+      for (size_t i = 0; i < extra_channels.size(); ++i) {
+        uint32_t dim_shift =
+            nonserialized_metadata->m.extra_channel_info[i].dim_shift;
+        uint32_t& ec_upsampling = extra_channel_upsampling[i];
+        ec_upsampling >>= dim_shift;
+        JXL_QUIET_RETURN_IF_ERROR(
+            visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &ec_upsampling));
+        ec_upsampling <<= dim_shift;
+        if (ec_upsampling < upsampling) {
+          return JXL_FAILURE(
+              "EC upsampling (%u) < color upsampling (%u), which is invalid.",
+              ec_upsampling, upsampling);
+        }
+        if (ec_upsampling > 8) {
+          return JXL_FAILURE("EC upsampling too large (%u)", ec_upsampling);
+        }
+      }
+    } else {
+      extra_channel_upsampling.clear();
+    }
+  }
+
+  // Modular- or VarDCT-specific data.
+  if (visitor->Conditional(encoding == FrameEncoding::kModular)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 1, &group_size_shift));
+  }
+  if (visitor->Conditional(encoding == FrameEncoding::kVarDCT &&
+                           color_transform == ColorTransform::kXYB)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 3, &x_qm_scale));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 2, &b_qm_scale));
+  } else {
+    x_qm_scale = b_qm_scale = 2;  // noop
+  }
+
+  // Not useful for kPatchSource
+  if (visitor->Conditional(frame_type != FrameType::kReferenceOnly)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&passes));
+  }
+
+  if (visitor->Conditional(frame_type == FrameType::kDCFrame)) {
+    // Up to 4 pyramid levels - for up to 16384x downsampling.
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(3), Val(4), 1, &dc_level));
+  }
+  if (frame_type != FrameType::kDCFrame) {
+    dc_level = 0;
+  }
+
+  bool is_partial_frame = false;
+  if (visitor->Conditional(frame_type != FrameType::kDCFrame)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &custom_size_or_origin));
+    if (visitor->Conditional(custom_size_or_origin)) {
+      const U32Enc enc(Bits(8), BitsOffset(11, 256), BitsOffset(14, 2304),
+                       BitsOffset(30, 18688));
+      // Frame offset, only if kRegularFrame or kSkipProgressive.
+      if (visitor->Conditional(frame_type == FrameType::kRegularFrame ||
+                               frame_type == FrameType::kSkipProgressive)) {
+        uint32_t ux0 = PackSigned(frame_origin.x0);
+        uint32_t uy0 = PackSigned(frame_origin.y0);
+        JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &ux0));
+        JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &uy0));
+        frame_origin.x0 = UnpackSigned(ux0);
+        frame_origin.y0 = UnpackSigned(uy0);
+      }
+      // Frame size
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &frame_size.xsize));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &frame_size.ysize));
+      int32_t image_xsize = default_xsize();
+      int32_t image_ysize = default_ysize();
+      if (frame_type == FrameType::kRegularFrame ||
+          frame_type == FrameType::kSkipProgressive) {
+        is_partial_frame |= frame_origin.x0 > 0;
+        is_partial_frame |= frame_origin.y0 > 0;
+        is_partial_frame |= (static_cast<int32_t>(frame_size.xsize) +
+                             frame_origin.x0) < image_xsize;
+        is_partial_frame |= (static_cast<int32_t>(frame_size.ysize) +
+                             frame_origin.y0) < image_ysize;
+      }
+    }
+  }
+
+  // Blending info, animation info and whether this is the last frame or not.
+  if (visitor->Conditional(frame_type == FrameType::kRegularFrame ||
+                           frame_type == FrameType::kSkipProgressive)) {
+    blending_info.nonserialized_num_extra_channels = num_extra_channels;
+    blending_info.nonserialized_is_partial_frame = is_partial_frame;
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&blending_info));
+    bool replace_all = (blending_info.mode == BlendMode::kReplace);
+    extra_channel_blending_info.resize(num_extra_channels);
+    for (size_t i = 0; i < num_extra_channels; i++) {
+      auto& ec_blending_info = extra_channel_blending_info[i];
+      ec_blending_info.nonserialized_is_partial_frame = is_partial_frame;
+      ec_blending_info.nonserialized_num_extra_channels = num_extra_channels;
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&ec_blending_info));
+      replace_all &= (ec_blending_info.mode == BlendMode::kReplace);
+    }
+    if (visitor->IsReading() && nonserialized_is_preview) {
+      if (!replace_all || custom_size_or_origin) {
+        return JXL_FAILURE("Preview is not compatible with blending");
+      }
+    }
+    if (visitor->Conditional(nonserialized_metadata != nullptr &&
+                             nonserialized_metadata->m.have_animation)) {
+      animation_frame.nonserialized_metadata = nonserialized_metadata;
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&animation_frame));
+    }
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &is_last));
+  }
+  if (frame_type != FrameType::kRegularFrame) {
+    is_last = false;
+  }
+
+  // ID of that can be used to refer to this frame. 0 for a non-zero-duration
+  // frame means that it will not be referenced. Not necessary for the last
+  // frame.
+  if (visitor->Conditional(frame_type != kDCFrame && !is_last)) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(0), Val(1), Val(2), Val(3), 0, &save_as_reference));
+  }
+
+  // If this frame is not blended on another frame post-color-transform, it may
+  // be stored for being referenced either before or after the color transform.
+  // If it is blended post-color-transform, it must be blended after. It must
+  // also be blended after if this is a kRegular frame that does not cover the
+  // full frame, as samples outside the partial region are from a
+  // post-color-transform frame.
+  if (frame_type != FrameType::kDCFrame) {
+    if (visitor->Conditional(CanBeReferenced() &&
+                             blending_info.mode == BlendMode::kReplace &&
+                             !is_partial_frame &&
+                             (frame_type == FrameType::kRegularFrame ||
+                              frame_type == FrameType::kSkipProgressive))) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->Bool(false, &save_before_color_transform));
+    } else if (visitor->Conditional(frame_type == FrameType::kReferenceOnly)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->Bool(true, &save_before_color_transform));
+      if (!save_before_color_transform &&
+          (frame_size.xsize < nonserialized_metadata->xsize() ||
+           frame_size.ysize < nonserialized_metadata->ysize() ||
+           frame_origin.x0 != 0 || frame_origin.y0 != 0)) {
+        return JXL_FAILURE(
+            "non-patch reference frame with invalid crop: %zux%zu%+d%+d",
+            static_cast<size_t>(frame_size.xsize),
+            static_cast<size_t>(frame_size.ysize),
+            static_cast<int>(frame_origin.x0),
+            static_cast<int>(frame_origin.y0));
+      }
+    }
+  } else {
+    save_before_color_transform = true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(VisitNameString(visitor, &name));
+
+  loop_filter.nonserialized_is_modular = is_modular;
+  JXL_RETURN_IF_ERROR(visitor->VisitNested(&loop_filter));
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+  // Extensions: in chronological order of being added to the format.
+  return visitor->EndExtensions();
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.h
new file mode 100644
index 0000000000..dab0267adf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/frame_header.h
@@ -0,0 +1,492 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FRAME_HEADER_H_
+#define LIB_JXL_FRAME_HEADER_H_
+
+// Frame header with backward and forward-compatible extension capability and
+// compressed integer fields.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/loop_filter.h"
+
+namespace jxl {
+
+// Also used by extra channel names.
+static inline Status VisitNameString(Visitor* JXL_RESTRICT visitor,
+                                     std::string* name) {
+  uint32_t name_length = static_cast<uint32_t>(name->length());
+  // Allows layer name lengths up to 1071 bytes
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Bits(4), BitsOffset(5, 16),
+                                         BitsOffset(10, 48), 0, &name_length));
+  if (visitor->IsReading()) {
+    name->resize(name_length);
+  }
+  for (size_t i = 0; i < name_length; i++) {
+    uint32_t c = (*name)[i];
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(8, 0, &c));
+    (*name)[i] = static_cast<char>(c);
+  }
+  return true;
+}
+
+enum class FrameEncoding : uint32_t {
+  kVarDCT,
+  kModular,
+};
+
+enum class ColorTransform : uint32_t {
+  kXYB,    // Values are encoded with XYB. May only be used if
+           // ImageBundle::xyb_encoded.
+  kNone,   // Values are encoded according to the attached color profile. May
+           // only be used if !ImageBundle::xyb_encoded.
+  kYCbCr,  // Values are encoded according to the attached color profile, but
+           // transformed to YCbCr. May only be used if
+           // !ImageBundle::xyb_encoded.
+};
+
+inline std::array<int, 3> JpegOrder(ColorTransform ct, bool is_gray) {
+  if (is_gray) {
+    return {0, 0, 0};
+  }
+  JXL_ASSERT(ct != ColorTransform::kXYB);
+  if (ct == ColorTransform::kYCbCr) {
+    return {1, 0, 2};
+  } else {
+    return {0, 1, 2};
+  }
+}
+
+struct YCbCrChromaSubsampling : public Fields {
+  YCbCrChromaSubsampling();
+  const char* Name() const override { return "YCbCrChromaSubsampling"; }
+  size_t HShift(size_t c) const { return maxhs_ - kHShift[channel_mode_[c]]; }
+  size_t VShift(size_t c) const { return maxvs_ - kVShift[channel_mode_[c]]; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override {
+    // TODO(veluca): consider allowing 4x downsamples
+    for (size_t i = 0; i < 3; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 0, &channel_mode_[i]));
+    }
+    Recompute();
+    return true;
+  }
+
+  uint8_t MaxHShift() const { return maxhs_; }
+  uint8_t MaxVShift() const { return maxvs_; }
+
+  uint8_t RawHShift(size_t c) { return kHShift[channel_mode_[c]]; }
+  uint8_t RawVShift(size_t c) { return kVShift[channel_mode_[c]]; }
+
+  // Uses JPEG channel order (Y, Cb, Cr).
+  Status Set(const uint8_t* hsample, const uint8_t* vsample) {
+    for (size_t c = 0; c < 3; c++) {
+      size_t cjpeg = c < 2 ? c ^ 1 : c;
+      size_t i = 0;
+      for (; i < 4; i++) {
+        if (1 << kHShift[i] == hsample[cjpeg] &&
+            1 << kVShift[i] == vsample[cjpeg]) {
+          channel_mode_[c] = i;
+          break;
+        }
+      }
+      if (i == 4) {
+        return JXL_FAILURE("Invalid subsample mode");
+      }
+    }
+    Recompute();
+    return true;
+  }
+
+  bool Is444() const {
+    for (size_t c : {0, 2}) {
+      if (channel_mode_[c] != channel_mode_[1]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool Is420() const {
+    return channel_mode_[0] == 1 && channel_mode_[1] == 0 &&
+           channel_mode_[2] == 1;
+  }
+
+  bool Is422() const {
+    for (size_t c : {0, 2}) {
+      if (kHShift[channel_mode_[c]] == kHShift[channel_mode_[1]] + 1 &&
+          kVShift[channel_mode_[c]] == kVShift[channel_mode_[1]]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool Is440() const {
+    for (size_t c : {0, 2}) {
+      if (kHShift[channel_mode_[c]] == kHShift[channel_mode_[1]] &&
+          kVShift[channel_mode_[c]] == kVShift[channel_mode_[1]] + 1) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+ private:
+  void Recompute() {
+    maxhs_ = 0;
+    maxvs_ = 0;
+    for (size_t i = 0; i < 3; i++) {
+      maxhs_ = std::max(maxhs_, kHShift[channel_mode_[i]]);
+      maxvs_ = std::max(maxvs_, kVShift[channel_mode_[i]]);
+    }
+  }
+  static constexpr uint8_t kHShift[4] = {0, 1, 1, 0};
+  static constexpr uint8_t kVShift[4] = {0, 1, 0, 1};
+  uint32_t channel_mode_[3];
+  uint8_t maxhs_;
+  uint8_t maxvs_;
+};
+
+// Indicates how to combine the current frame with a previously-saved one. Can
+// be independently controlled for color and extra channels. Formulas are
+// indicative and treat alpha as if it is in range 0.0-1.0. In descriptions
+// below, alpha channel is the extra channel of type alpha used for blending
+// according to the blend_channel, or fully opaque if there is no alpha channel.
+// The blending specified here is used for performing blending *after* color
+// transforms - in linear sRGB if blending a XYB-encoded frame on another
+// XYB-encoded frame, in sRGB if blending a frame with kColorSpace == kSRGB, or
+// in the original colorspace otherwise. Blending in XYB or YCbCr is done by
+// using patches.
+enum class BlendMode {
+  // The new values (in the crop) replace the old ones: sample = new
+  kReplace = 0,
+  // The new values (in the crop) get added to the old ones: sample = old + new
+  kAdd = 1,
+  // The new values (in the crop) replace the old ones if alpha>0:
+  // For the alpha channel that is used as source:
+  // alpha = old + new * (1 - old)
+  // For other channels if !alpha_associated:
+  // sample = ((1 - new_alpha) * old * old_alpha + new_alpha * new) / alpha
+  // For other channels if alpha_associated:
+  // sample = (1 - new_alpha) * old + new
+  // The alpha formula applies to the alpha used for the division in the other
+  // channels formula, and applies to the alpha channel itself if its
+  // blend_channel value matches itself.
+  kBlend = 2,
+  // The new values (in the crop) are added to the old ones if alpha>0:
+  // For the alpha channel that is used as source:
+  // sample = sample = old + new * (1 - old)
+  // For other channels: sample = old + alpha * new
+  kAlphaWeightedAdd = 3,
+  // The new values (in the crop) get multiplied by the old ones:
+  // sample = old * new
+  // The range of the new value matters for multiplication purposes, and its
+  // nominal range of 0..1 is computed the same way as this is done for the
+  // alpha values in kBlend and kAlphaWeightedAdd.
+  // If using kMul as a blend mode for color channels, no color transform is
+  // performed on the current frame.
+  kMul = 4,
+};
+
+struct BlendingInfo : public Fields {
+  BlendingInfo();
+  const char* Name() const override { return "BlendingInfo"; }
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+  BlendMode mode;
+  // Which extra channel to use as alpha channel for blending, only encoded
+  // for blend modes that involve alpha and if there are more than 1 extra
+  // channels.
+  uint32_t alpha_channel;
+  // Clamp alpha or channel values to 0-1 range.
+  bool clamp;
+  // Frame ID to copy from (0-3). Only encoded if blend_mode is not kReplace.
+  uint32_t source;
+
+  size_t nonserialized_num_extra_channels = 0;
+  bool nonserialized_is_partial_frame = false;
+};
+
+// Origin of the current frame. Not present for frames of type
+// kOnlyPatches.
+struct FrameOrigin {
+  int32_t x0, y0;  // can be negative.
+};
+
+// Size of the current frame.
+struct FrameSize {
+  uint32_t xsize, ysize;
+};
+
+// AnimationFrame defines duration of animation frames.
+struct AnimationFrame : public Fields {
+  explicit AnimationFrame(const CodecMetadata* metadata);
+  const char* Name() const override { return "AnimationFrame"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // How long to wait [in ticks, see Animation{}] after rendering.
+  // May be 0 if the current frame serves as a foundation for another frame.
+  uint32_t duration;
+
+  uint32_t timecode;  // 0xHHMMSSFF
+
+  // Must be set to the one ImageMetadata acting as the full codestream header,
+  // with correct xyb_encoded, list of extra channels, etc...
+  const CodecMetadata* nonserialized_metadata = nullptr;
+};
+
+// For decoding to lower resolutions. Only used for kRegular frames.
+struct Passes : public Fields {
+  Passes();
+  const char* Name() const override { return "Passes"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  void GetDownsamplingBracket(size_t pass, int& minShift, int& maxShift) const {
+    maxShift = 2;
+    minShift = 0;
+    for (size_t i = 0;; i++) {
+      for (uint32_t j = 0; j < num_downsample; ++j) {
+        if (i <= last_pass[j]) {
+          if (downsample[j] == 8) minShift = 3;
+          if (downsample[j] == 4) minShift = 2;
+          if (downsample[j] == 2) minShift = 1;
+          if (downsample[j] == 1) minShift = 0;
+        }
+      }
+      if (i == num_passes - 1) minShift = 0;
+      if (i == pass) return;
+      maxShift = minShift - 1;
+      minShift = 0;
+    }
+  }
+
+  uint32_t num_passes;      // <= kMaxNumPasses
+  uint32_t num_downsample;  // <= num_passes
+
+  // Array of num_downsample pairs. downsample=1/last_pass=num_passes-1 and
+  // downsample=8/last_pass=0 need not be specified; they are implicit.
+  uint32_t downsample[kMaxNumPasses];
+  uint32_t last_pass[kMaxNumPasses];
+  // Array of shift values for each pass. It is implicitly assumed to be 0 for
+  // the last pass.
+  uint32_t shift[kMaxNumPasses];
+};
+
+enum FrameType {
+  // A "regular" frame: might be a crop, and will be blended on a previous
+  // frame, if any, and displayed or blended in future frames.
+  kRegularFrame = 0,
+  // A DC frame: this frame is downsampled and will be *only* used as the DC of
+  // a future frame and, possibly, for previews. Cannot be cropped, blended, or
+  // referenced by patches or blending modes. Frames that *use* a DC frame
+  // cannot have non-default sizes either.
+  kDCFrame = 1,
+  // A PatchesSource frame: this frame will be only used as a source frame for
+  // taking patches. Can be cropped, but cannot have non-(0, 0) x0 and y0.
+  kReferenceOnly = 2,
+  // Same as kRegularFrame, but not used for progressive rendering. This also
+  // implies no early display of DC.
+  kSkipProgressive = 3,
+};
+
+// Image/frame := one of more of these, where the last has is_last = true.
+// Starts at a byte-aligned address "a"; the next pass starts at "a + size".
+struct FrameHeader : public Fields {
+  // Optional postprocessing steps. These flags are the source of truth;
+  // Override must set/clear them rather than change their meaning. Values
+  // chosen such that typical flags == 0 (encoded in only two bits).
+  enum Flags {
+    // Often but not always off => low bit value:
+
+    // Inject noise into decoded output.
+    kNoise = 1,
+
+    // Overlay patches.
+    kPatches = 2,
+
+    // 4, 8 = reserved for future sometimes-off
+
+    // Overlay splines.
+    kSplines = 16,
+
+    kUseDcFrame = 32,  // Implies kSkipAdaptiveDCSmoothing.
+
+    // 64 = reserved for future often-off
+
+    // Almost always on => negated:
+
+    kSkipAdaptiveDCSmoothing = 128,
+  };
+
+  explicit FrameHeader(const CodecMetadata* metadata);
+  const char* Name() const override { return "FrameHeader"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Sets/clears `flag` based upon `condition`.
+  void UpdateFlag(const bool condition, const uint64_t flag) {
+    if (condition) {
+      flags |= flag;
+    } else {
+      flags &= ~flag;
+    }
+  }
+
+  // Returns true if this frame is supposed to be saved for future usage by
+  // other frames.
+  bool CanBeReferenced() const {
+    // DC frames cannot be referenced. The last frame cannot be referenced. A
+    // duration 0 frame makes little sense if it is not referenced. A
+    // non-duration 0 frame may or may not be referenced.
+    return !is_last && frame_type != FrameType::kDCFrame &&
+           (animation_frame.duration == 0 || save_as_reference != 0);
+  }
+
+  mutable bool all_default;
+
+  // Always present
+  FrameEncoding encoding;
+  // Some versions of UBSAN complain in VisitFrameType if not initialized.
+  FrameType frame_type = FrameType::kRegularFrame;
+
+  uint64_t flags;
+
+  ColorTransform color_transform;
+  YCbCrChromaSubsampling chroma_subsampling;
+
+  uint32_t group_size_shift;  // only if encoding == kModular;
+
+  uint32_t x_qm_scale;  // only if VarDCT and color_transform == kXYB
+  uint32_t b_qm_scale;  // only if VarDCT and color_transform == kXYB
+
+  std::string name;
+
+  // Skipped for kReferenceOnly.
+  Passes passes;
+
+  // Skipped for kDCFrame
+  bool custom_size_or_origin;
+  FrameSize frame_size;
+
+  // upsampling factors for color and extra channels.
+  // Upsampling is always performed before applying any inverse color transform.
+  // Skipped (1) if kUseDCFrame
+  uint32_t upsampling;
+  std::vector<uint32_t> extra_channel_upsampling;
+
+  // Only for kRegular frames.
+  FrameOrigin frame_origin;
+
+  BlendingInfo blending_info;
+  std::vector<BlendingInfo> extra_channel_blending_info;
+
+  // Animation info for this frame.
+  AnimationFrame animation_frame;
+
+  // This is the last frame.
+  bool is_last;
+
+  // ID to refer to this frame with. 0-3, not present if kDCFrame.
+  // 0 has a special meaning for kRegular frames of nonzero duration: it defines
+  // a frame that will not be referenced in the future.
+  uint32_t save_as_reference;
+
+  // Whether to save this frame before or after the color transform. A frame
+  // that is saved before the color tansform can only be used for blending
+  // through patches. On the contrary, a frame that is saved after the color
+  // transform can only be used for blending through blending modes.
+  // Irrelevant for extra channel blending. Can only be true if
+  // blending_info.mode == kReplace and this is not a partial kRegularFrame; if
+  // this is a DC frame, it is always true.
+  bool save_before_color_transform;
+
+  uint32_t dc_level;  // 1-4 if kDCFrame (0 otherwise).
+
+  // Must be set to the one ImageMetadata acting as the full codestream header,
+  // with correct xyb_encoded, list of extra channels, etc...
+  const CodecMetadata* nonserialized_metadata = nullptr;
+
+  // NOTE: This is ignored by AllDefault.
+  LoopFilter loop_filter;
+
+  bool nonserialized_is_preview = false;
+
+  size_t default_xsize() const {
+    if (!nonserialized_metadata) return 0;
+    if (nonserialized_is_preview) {
+      return nonserialized_metadata->m.preview_size.xsize();
+    }
+    return nonserialized_metadata->xsize();
+  }
+
+  size_t default_ysize() const {
+    if (!nonserialized_metadata) return 0;
+    if (nonserialized_is_preview) {
+      return nonserialized_metadata->m.preview_size.ysize();
+    }
+    return nonserialized_metadata->ysize();
+  }
+
+  FrameDimensions ToFrameDimensions() const {
+    size_t xsize = default_xsize();
+    size_t ysize = default_ysize();
+
+    xsize = frame_size.xsize ? frame_size.xsize : xsize;
+    ysize = frame_size.ysize ? frame_size.ysize : ysize;
+
+    if (dc_level != 0) {
+      xsize = DivCeil(xsize, 1 << (3 * dc_level));
+      ysize = DivCeil(ysize, 1 << (3 * dc_level));
+    }
+
+    FrameDimensions frame_dim;
+    frame_dim.Set(xsize, ysize, group_size_shift,
+                  chroma_subsampling.MaxHShift(),
+                  chroma_subsampling.MaxVShift(),
+                  encoding == FrameEncoding::kModular, upsampling);
+    return frame_dim;
+  }
+
+  // True if a color transform should be applied to this frame.
+  bool needs_color_transform() const {
+    return !save_before_color_transform ||
+           frame_type == FrameType::kRegularFrame ||
+           frame_type == FrameType::kSkipProgressive;
+  }
+
+  uint64_t extensions;
+};
+
+Status ReadFrameHeader(BitReader* JXL_RESTRICT reader,
+                       FrameHeader* JXL_RESTRICT frame);
+
+Status WriteFrameHeader(const FrameHeader& frame,
+                        BitWriter* JXL_RESTRICT writer, AuxOut* aux_out);
+
+// Shared by enc/dec. 5F and 13 are by far the most common for d1/2/4/8, 0
+// ensures low overhead for small images.
+static constexpr U32Enc kOrderEnc =
+    U32Enc(Val(0x5F), Val(0x13), Val(0), Bits(kNumOrders));
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_FRAME_HEADER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc
new file mode 100644
index 0000000000..6a187c46eb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.cc
@@ -0,0 +1,70 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/gaborish.h"
+
+#include <stddef.h>
+
+#include <hwy/base.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+void GaborishInverse(Image3F* in_out, float mul, ThreadPool* pool) {
+  JXL_ASSERT(mul >= 0.0f);
+
+  // Only an approximation. One or even two 3x3, and rank-1 (separable) 5x5
+  // are insufficient.
+  constexpr float kGaborish[5] = {
+      -0.092359145662814029f,  -0.039253623634014627f, 0.016176494530216929f,
+      0.00083458437774987476f, 0.004512465323949319f,
+  };
+  /*
+    better would be:
+      1.0 - mul * (4 * (kGaborish[0] + kGaborish[1] +
+                        kGaborish[2] + kGaborish[4]) +
+                   8 * (kGaborish[3]));
+  */
+  WeightsSymmetric5 weights = {{HWY_REP4(1.0f)},
+                               {HWY_REP4(mul * kGaborish[0])},
+                               {HWY_REP4(mul * kGaborish[2])},
+                               {HWY_REP4(mul * kGaborish[1])},
+                               {HWY_REP4(mul * kGaborish[4])},
+                               {HWY_REP4(mul * kGaborish[3])}};
+  double sum = static_cast<double>(weights.c[0]);
+  sum += 4 * weights.r[0];
+  sum += 4 * weights.R[0];
+  sum += 4 * weights.d[0];
+  sum += 4 * weights.D[0];
+  sum += 8 * weights.L[0];
+  const float normalize = static_cast<float>(1.0 / sum);
+  for (size_t i = 0; i < 4; ++i) {
+    weights.c[i] *= normalize;
+    weights.r[i] *= normalize;
+    weights.R[i] *= normalize;
+    weights.d[i] *= normalize;
+    weights.D[i] *= normalize;
+    weights.L[i] *= normalize;
+  }
+
+  // Reduce memory footprint by only allocating a single plane and swapping it
+  // into the output Image3F. Better still would be tiling.
+  // Note that we cannot *allocate* a plane, as doing so might cause Image3F to
+  // have planes of different stride. Instead, we copy one plane in a temporary
+  // image and reuse the existing planes of the in/out image.
+  ImageF temp = CopyImage(in_out->Plane(2));
+  Symmetric5(in_out->Plane(0), Rect(*in_out), weights, pool, &in_out->Plane(2));
+  Symmetric5(in_out->Plane(1), Rect(*in_out), weights, pool, &in_out->Plane(0));
+  Symmetric5(temp, Rect(*in_out), weights, pool, &in_out->Plane(1));
+  // Now planes are 1, 2, 0.
+  in_out->Plane(0).Swap(in_out->Plane(1));
+  // 2 1 0
+  in_out->Plane(0).Swap(in_out->Plane(2));
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.h
new file mode 100644
index 0000000000..e43411dd9c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_GABORISH_H_
+#define LIB_JXL_GABORISH_H_
+
+// Linear smoothing (3x3 convolution) for deblocking without too much blur.
+
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Used in encoder to reduce the impact of the decoder's smoothing.
+// This is not exact. Works in-place to reduce memory use.
+// The input is typically in XYB space.
+void GaborishInverse(Image3F* in_out, float mul, ThreadPool* pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_GABORISH_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish_test.cc
new file mode 100644
index 0000000000..55b17a060a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gaborish_test.cc
@@ -0,0 +1,71 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/gaborish.h"
+
+#include <hwy/base.h>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+
+namespace jxl {
+namespace {
+
+// weight1,2 need not be normalized.
+WeightsSymmetric3 GaborishKernel(float weight1, float weight2) {
+  constexpr float weight0 = 1.0f;
+
+  // Normalize
+  const float mul = 1.0f / (weight0 + 4 * (weight1 + weight2));
+  const float w0 = weight0 * mul;
+  const float w1 = weight1 * mul;
+  const float w2 = weight2 * mul;
+
+  const WeightsSymmetric3 w = {{HWY_REP4(w0)}, {HWY_REP4(w1)}, {HWY_REP4(w2)}};
+  return w;
+}
+
+void ConvolveGaborish(const ImageF& in, float weight1, float weight2,
+                      ThreadPool* pool, ImageF* JXL_RESTRICT out) {
+  JXL_CHECK(SameSize(in, *out));
+  Symmetric3(in, Rect(in), GaborishKernel(weight1, weight2), pool, out);
+}
+
+void TestRoundTrip(const Image3F& in, float max_l1) {
+  Image3F fwd(in.xsize(), in.ysize());
+  ThreadPool* null_pool = nullptr;
+  ConvolveGaborish(in.Plane(0), 0, 0, null_pool, &fwd.Plane(0));
+  ConvolveGaborish(in.Plane(1), 0, 0, null_pool, &fwd.Plane(1));
+  ConvolveGaborish(in.Plane(2), 0, 0, null_pool, &fwd.Plane(2));
+  GaborishInverse(&fwd, 0.92718927264540152f, null_pool);
+  VerifyRelativeError(in, fwd, max_l1, 1E-4f);
+}
+
+TEST(GaborishTest, TestZero) {
+  Image3F in(20, 20);
+  ZeroFillImage(&in);
+  TestRoundTrip(in, 0.0f);
+}
+
+// Disabled: large difference.
+#if 0
+TEST(GaborishTest, TestDirac) {
+  Image3F in(20, 20);
+  ZeroFillImage(&in);
+  in.PlaneRow(1, 10)[10] = 10.0f;
+  TestRoundTrip(in, 0.26f);
+}
+#endif
+
+TEST(GaborishTest, TestFlat) {
+  Image3F in(20, 20);
+  FillImage(1.0f, &in);
+  TestRoundTrip(in, 1E-5f);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gamma_correct_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gamma_correct_test.cc
new file mode 100644
index 0000000000..d17ce899ba
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gamma_correct_test.cc
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdlib.h>
+
+#include <algorithm>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/enc_gamma_correct.h"
+
+namespace jxl {
+namespace {
+
+TEST(GammaCorrectTest, TestLinearToSrgbEdgeCases) {
+  EXPECT_EQ(0, LinearToSrgb8Direct(0.0));
+  EXPECT_NEAR(0, LinearToSrgb8Direct(1E-6f), 2E-5);
+  EXPECT_EQ(0, LinearToSrgb8Direct(-1E-6f));
+  EXPECT_EQ(0, LinearToSrgb8Direct(-1E6));
+  EXPECT_NEAR(1, LinearToSrgb8Direct(1 - 1E-6f), 1E-5);
+  EXPECT_EQ(1, LinearToSrgb8Direct(1 + 1E-6f));
+  EXPECT_EQ(1, LinearToSrgb8Direct(1E6));
+}
+
+TEST(GammaCorrectTest, TestRoundTrip) {
+  // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
+  for (double linear = 0.0; linear <= 1.0; linear += 1E-7) {
+    const double srgb = LinearToSrgb8Direct(linear);
+    const double linear2 = Srgb8ToLinearDirect(srgb);
+    ASSERT_LT(std::abs(linear - linear2), 2E-13)
+        << "linear = " << linear << ", linear2 = " << linear2;
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc
new file mode 100644
index 0000000000..b6550819ee
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.cc
@@ -0,0 +1,616 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/gauss_blur.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <cmath>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/gauss_blur.cc"
+#include <hwy/cache_control.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/linalg.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Broadcast;
+#if HWY_TARGET != HWY_SCALAR
+using hwy::HWY_NAMESPACE::ShiftLeftLanes;
+#endif
+using hwy::HWY_NAMESPACE::Vec;
+
+void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                    const float* JXL_RESTRICT in, intptr_t width,
+                    float* JXL_RESTRICT out) {
+  // Although the current output depends on the previous output, we can unroll
+  // up to 4x by precomputing up to fourth powers of the constants. Beyond that,
+  // numerical precision might become a problem. Macro because this is tested
+  // in #if alongside HWY_TARGET.
+#define JXL_GAUSS_MAX_LANES 4
+  using D = HWY_CAPPED(float, JXL_GAUSS_MAX_LANES);
+  using V = Vec<D>;
+  const D d;
+  const V mul_in_1 = Load(d, rg->mul_in + 0 * 4);
+  const V mul_in_3 = Load(d, rg->mul_in + 1 * 4);
+  const V mul_in_5 = Load(d, rg->mul_in + 2 * 4);
+  const V mul_prev_1 = Load(d, rg->mul_prev + 0 * 4);
+  const V mul_prev_3 = Load(d, rg->mul_prev + 1 * 4);
+  const V mul_prev_5 = Load(d, rg->mul_prev + 2 * 4);
+  const V mul_prev2_1 = Load(d, rg->mul_prev2 + 0 * 4);
+  const V mul_prev2_3 = Load(d, rg->mul_prev2 + 1 * 4);
+  const V mul_prev2_5 = Load(d, rg->mul_prev2 + 2 * 4);
+  V prev_1 = Zero(d);
+  V prev_3 = Zero(d);
+  V prev_5 = Zero(d);
+  V prev2_1 = Zero(d);
+  V prev2_3 = Zero(d);
+  V prev2_5 = Zero(d);
+
+  const intptr_t N = rg->radius;
+
+  intptr_t n = -N + 1;
+  // Left side with bounds checks and only write output after n >= 0.
+  const intptr_t first_aligned = RoundUpTo(N + 1, Lanes(d));
+  for (; n < std::min(first_aligned, width); ++n) {
+    const intptr_t left = n - N - 1;
+    const intptr_t right = n + N - 1;
+    const float left_val = left >= 0 ? in[left] : 0.0f;
+    const float right_val = right < width ? in[right] : 0.0f;
+    const V sum = Set(d, left_val + right_val);
+
+    // (Only processing a single lane here, no need to broadcast)
+    V out_1 = sum * mul_in_1;
+    V out_3 = sum * mul_in_3;
+    V out_5 = sum * mul_in_5;
+
+    out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
+    out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
+    out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
+    prev2_1 = prev_1;
+    prev2_3 = prev_3;
+    prev2_5 = prev_5;
+
+    out_1 = MulAdd(mul_prev_1, prev_1, out_1);
+    out_3 = MulAdd(mul_prev_3, prev_3, out_3);
+    out_5 = MulAdd(mul_prev_5, prev_5, out_5);
+    prev_1 = out_1;
+    prev_3 = out_3;
+    prev_5 = out_5;
+
+    if (n >= 0) {
+      out[n] = GetLane(out_1 + out_3 + out_5);
+    }
+  }
+
+  // The above loop is effectively scalar but it is convenient to use the same
+  // prev/prev2 variables, so broadcast to each lane before the unrolled loop.
+#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES > 1
+  prev2_1 = Broadcast<0>(prev2_1);
+  prev2_3 = Broadcast<0>(prev2_3);
+  prev2_5 = Broadcast<0>(prev2_5);
+  prev_1 = Broadcast<0>(prev_1);
+  prev_3 = Broadcast<0>(prev_3);
+  prev_5 = Broadcast<0>(prev_5);
+#endif
+
+  // Unrolled, no bounds checking needed.
+  for (; n < width - N + 1 - (JXL_GAUSS_MAX_LANES - 1); n += Lanes(d)) {
+    const V sum = LoadU(d, in + n - N - 1) + LoadU(d, in + n + N - 1);
+
+    // To get a vector of output(s), we multiply broadcasted vectors (of each
+    // input plus the two previous outputs) and add them all together.
+    // Incremental broadcasting and shifting is expected to be cheaper than
+    // horizontal adds or transposing 4x4 values because they run on a different
+    // port, concurrently with the FMA.
+    const V in0 = Broadcast<0>(sum);
+    V out_1 = in0 * mul_in_1;
+    V out_3 = in0 * mul_in_3;
+    V out_5 = in0 * mul_in_5;
+
+#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES >= 2
+    const V in1 = Broadcast<1>(sum);
+    out_1 = MulAdd(ShiftLeftLanes<1>(mul_in_1), in1, out_1);
+    out_3 = MulAdd(ShiftLeftLanes<1>(mul_in_3), in1, out_3);
+    out_5 = MulAdd(ShiftLeftLanes<1>(mul_in_5), in1, out_5);
+
+#if JXL_GAUSS_MAX_LANES >= 4
+    const V in2 = Broadcast<2>(sum);
+    out_1 = MulAdd(ShiftLeftLanes<2>(mul_in_1), in2, out_1);
+    out_3 = MulAdd(ShiftLeftLanes<2>(mul_in_3), in2, out_3);
+    out_5 = MulAdd(ShiftLeftLanes<2>(mul_in_5), in2, out_5);
+
+    const V in3 = Broadcast<3>(sum);
+    out_1 = MulAdd(ShiftLeftLanes<3>(mul_in_1), in3, out_1);
+    out_3 = MulAdd(ShiftLeftLanes<3>(mul_in_3), in3, out_3);
+    out_5 = MulAdd(ShiftLeftLanes<3>(mul_in_5), in3, out_5);
+#endif
+#endif
+
+    out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
+    out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
+    out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
+
+    out_1 = MulAdd(mul_prev_1, prev_1, out_1);
+    out_3 = MulAdd(mul_prev_3, prev_3, out_3);
+    out_5 = MulAdd(mul_prev_5, prev_5, out_5);
+#if HWY_TARGET == HWY_SCALAR || JXL_GAUSS_MAX_LANES == 1
+    prev2_1 = prev_1;
+    prev2_3 = prev_3;
+    prev2_5 = prev_5;
+    prev_1 = out_1;
+    prev_3 = out_3;
+    prev_5 = out_5;
+#else
+    prev2_1 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_1);
+    prev2_3 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_3);
+    prev2_5 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_5);
+    prev_1 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_1);
+    prev_3 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_3);
+    prev_5 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_5);
+#endif
+
+    Store(out_1 + out_3 + out_5, d, out + n);
+  }
+
+  // Remainder handling with bounds checks
+  for (; n < width; ++n) {
+    const intptr_t left = n - N - 1;
+    const intptr_t right = n + N - 1;
+    const float left_val = left >= 0 ? in[left] : 0.0f;
+    const float right_val = right < width ? in[right] : 0.0f;
+    const V sum = Set(d, left_val + right_val);
+
+    // (Only processing a single lane here, no need to broadcast)
+    V out_1 = sum * mul_in_1;
+    V out_3 = sum * mul_in_3;
+    V out_5 = sum * mul_in_5;
+
+    out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
+    out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
+    out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
+    prev2_1 = prev_1;
+    prev2_3 = prev_3;
+    prev2_5 = prev_5;
+
+    out_1 = MulAdd(mul_prev_1, prev_1, out_1);
+    out_3 = MulAdd(mul_prev_3, prev_3, out_3);
+    out_5 = MulAdd(mul_prev_5, prev_5, out_5);
+    prev_1 = out_1;
+    prev_3 = out_3;
+    prev_5 = out_5;
+
+    out[n] = GetLane(out_1 + out_3 + out_5);
+  }
+}
+
+// Ring buffer is for n, n-1, n-2; round up to 4 for faster modulo.
+constexpr size_t kMod = 4;
+
+// Avoids an unnecessary store during warmup.
+struct OutputNone {
+  template <class V>
+  void operator()(const V& /*unused*/, float* JXL_RESTRICT /*pos*/,
+                  ptrdiff_t /*offset*/) const {}
+};
+
+// Common case: write output vectors in all VerticalBlock except warmup.
+struct OutputStore {
+  template <class V>
+  void operator()(const V& out, float* JXL_RESTRICT pos,
+                  ptrdiff_t offset) const {
+    // Stream helps for large images but is slower for images that fit in cache.
+    Store(out, HWY_FULL(float)(), pos + offset);
+  }
+};
+
+// At top/bottom borders, we don't have two inputs to load, so avoid addition.
+// pos may even point to all zeros if the row is outside the input image.
+class SingleInput {
+ public:
+  explicit SingleInput(const float* pos) : pos_(pos) {}
+  Vec<HWY_FULL(float)> operator()(const size_t offset) const {
+    return Load(HWY_FULL(float)(), pos_ + offset);
+  }
+  const float* pos_;
+};
+
+// In the middle of the image, we need to load from a row above and below, and
+// return the sum.
+class TwoInputs {
+ public:
+  TwoInputs(const float* pos1, const float* pos2) : pos1_(pos1), pos2_(pos2) {}
+  Vec<HWY_FULL(float)> operator()(const size_t offset) const {
+    const auto in1 = Load(HWY_FULL(float)(), pos1_ + offset);
+    const auto in2 = Load(HWY_FULL(float)(), pos2_ + offset);
+    return in1 + in2;
+  }
+
+ private:
+  const float* pos1_;
+  const float* pos2_;
+};
+
+// Block := kVectors consecutive full vectors (one cache line except on the
+// right boundary, where we can only rely on having one vector). Unrolling to
+// the cache line size improves cache utilization.
+template <size_t kVectors, class V, class Input, class Output>
+void VerticalBlock(const V& d1_1, const V& d1_3, const V& d1_5, const V& n2_1,
+                   const V& n2_3, const V& n2_5, const Input& input,
+                   size_t& ctr, float* ring_buffer, const Output output,
+                   float* JXL_RESTRICT out_pos) {
+  const HWY_FULL(float) d;
+  constexpr size_t kVN = 1;//MaxLanes(d);
+  // More cache-friendly to process an entirely cache line at a time
+  constexpr size_t kLanes = kVectors * kVN;
+
+  float* JXL_RESTRICT y_1 = ring_buffer + 0 * kLanes * kMod;
+  float* JXL_RESTRICT y_3 = ring_buffer + 1 * kLanes * kMod;
+  float* JXL_RESTRICT y_5 = ring_buffer + 2 * kLanes * kMod;
+
+  const size_t n_0 = (++ctr) % kMod;
+  const size_t n_1 = (ctr - 1) % kMod;
+  const size_t n_2 = (ctr - 2) % kMod;
+
+  for (size_t idx_vec = 0; idx_vec < kVectors; ++idx_vec) {
+    const V sum = input(idx_vec * kVN);
+
+    const V y_n1_1 = Load(d, y_1 + kLanes * n_1 + idx_vec * kVN);
+    const V y_n1_3 = Load(d, y_3 + kLanes * n_1 + idx_vec * kVN);
+    const V y_n1_5 = Load(d, y_5 + kLanes * n_1 + idx_vec * kVN);
+    const V y_n2_1 = Load(d, y_1 + kLanes * n_2 + idx_vec * kVN);
+    const V y_n2_3 = Load(d, y_3 + kLanes * n_2 + idx_vec * kVN);
+    const V y_n2_5 = Load(d, y_5 + kLanes * n_2 + idx_vec * kVN);
+    // (35)
+    const V y1 = MulAdd(n2_1, sum, NegMulSub(d1_1, y_n1_1, y_n2_1));
+    const V y3 = MulAdd(n2_3, sum, NegMulSub(d1_3, y_n1_3, y_n2_3));
+    const V y5 = MulAdd(n2_5, sum, NegMulSub(d1_5, y_n1_5, y_n2_5));
+    Store(y1, d, y_1 + kLanes * n_0 + idx_vec * kVN);
+    Store(y3, d, y_3 + kLanes * n_0 + idx_vec * kVN);
+    Store(y5, d, y_5 + kLanes * n_0 + idx_vec * kVN);
+    output(y1 + y3 + y5, out_pos, idx_vec * kVN);
+  }
+  // NOTE: flushing cache line out_pos hurts performance - less so with
+  // clflushopt than clflush but still a significant slowdown.
+}
+
+// Reads/writes one block (kVectors full vectors) in each row.
+template <size_t kVectors>
+void VerticalStrip(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                   const ImageF& in, const size_t x, ImageF* JXL_RESTRICT out) {
+  // We're iterating vertically, so use multiple full-length vectors (each lane
+  // is one column of row n).
+  using D = HWY_FULL(float);
+  using V = Vec<D>;
+  const D d;
+  constexpr size_t kVN = 1;//MaxLanes(d);
+  // More cache-friendly to process an entirely cache line at a time
+  constexpr size_t kLanes = kVectors * kVN;
+#if HWY_TARGET == HWY_SCALAR
+  const V d1_1 = Set(d, rg->d1[0 * 4]);
+  const V d1_3 = Set(d, rg->d1[1 * 4]);
+  const V d1_5 = Set(d, rg->d1[2 * 4]);
+  const V n2_1 = Set(d, rg->n2[0 * 4]);
+  const V n2_3 = Set(d, rg->n2[1 * 4]);
+  const V n2_5 = Set(d, rg->n2[2 * 4]);
+#else
+  const V d1_1 = LoadDup128(d, rg->d1 + 0 * 4);
+  const V d1_3 = LoadDup128(d, rg->d1 + 1 * 4);
+  const V d1_5 = LoadDup128(d, rg->d1 + 2 * 4);
+  const V n2_1 = LoadDup128(d, rg->n2 + 0 * 4);
+  const V n2_3 = LoadDup128(d, rg->n2 + 1 * 4);
+  const V n2_5 = LoadDup128(d, rg->n2 + 2 * 4);
+#endif
+
+  const size_t N = rg->radius;
+  const size_t ysize = in.ysize();
+
+  size_t ctr = 0;
+  HWY_ALIGN float ring_buffer[3 * kLanes * kMod] = {0};
+  HWY_ALIGN static constexpr float zero[kLanes] = {0};
+
+  // Warmup: top is out of bounds (zero padded), bottom is usually in-bounds.
+  ssize_t n = -static_cast<ssize_t>(N) + 1;
+  for (; n < 0; ++n) {
+    // bottom is always non-negative since n is initialized in -N + 1.
+    const size_t bottom = n + N - 1;
+    VerticalBlock<kVectors>(
+        d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+        SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr,
+        ring_buffer, OutputNone(), nullptr);
+  }
+  JXL_DASSERT(n >= 0);
+
+  // Start producing output; top is still out of bounds.
+  for (; static_cast<size_t>(n) < std::min(N + 1, ysize); ++n) {
+    const size_t bottom = n + N - 1;
+    VerticalBlock<kVectors>(
+        d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+        SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr,
+        ring_buffer, OutputStore(), out->Row(n) + x);
+  }
+
+  // Interior outputs with prefetching and without bounds checks.
+  constexpr size_t kPrefetchRows = 8;
+  for (; n < static_cast<ssize_t>(ysize - N + 1 - kPrefetchRows); ++n) {
+    const size_t top = n - N - 1;
+    const size_t bottom = n + N - 1;
+    VerticalBlock<kVectors>(
+        d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+        TwoInputs(in.ConstRow(top) + x, in.ConstRow(bottom) + x), ctr,
+        ring_buffer, OutputStore(), out->Row(n) + x);
+    hwy::Prefetch(in.ConstRow(top + kPrefetchRows) + x);
+    hwy::Prefetch(in.ConstRow(bottom + kPrefetchRows) + x);
+  }
+
+  // Bottom border without prefetching and with bounds checks.
+  for (; static_cast<size_t>(n) < ysize; ++n) {
+    const size_t top = n - N - 1;
+    const size_t bottom = n + N - 1;
+    VerticalBlock<kVectors>(
+        d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+        TwoInputs(in.ConstRow(top) + x,
+                  bottom < ysize ? in.ConstRow(bottom) + x : zero),
+        ctr, ring_buffer, OutputStore(), out->Row(n) + x);
+  }
+}
+
+// Apply 1D vertical scan to multiple columns (one per vector lane).
+// Not yet parallelized.
+void FastGaussianVertical(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                          const ImageF& in, ThreadPool* /*pool*/,
+                          ImageF* JXL_RESTRICT out) {
+  PROFILER_FUNC;
+  JXL_CHECK(SameSize(in, *out));
+
+  constexpr size_t kCacheLineLanes = 64 / sizeof(float);
+  constexpr size_t kVN = MaxLanes(HWY_FULL(float)());
+  constexpr size_t kCacheLineVectors = kCacheLineLanes / kVN;
+
+  size_t x = 0;
+  for (; x + kCacheLineLanes <= in.xsize(); x += kCacheLineLanes) {
+    VerticalStrip<kCacheLineVectors>(rg, in, x, out);
+  }
+  for (; x < in.xsize(); x += kVN) {
+    VerticalStrip<1>(rg, in, x, out);
+  }
+}
+
+// TODO(veluca): consider replacing with FastGaussian.
+ImageF ConvolveXSampleAndTranspose(const ImageF& in,
+                                   const std::vector<float>& kernel,
+                                   const size_t res) {
+  JXL_ASSERT(kernel.size() % 2 == 1);
+  JXL_ASSERT(in.xsize() % res == 0);
+  const size_t offset = res / 2;
+  const size_t out_xsize = in.xsize() / res;
+  ImageF out(in.ysize(), out_xsize);
+  const int r = kernel.size() / 2;
+  HWY_FULL(float) df;
+  std::vector<float> row_tmp(in.xsize() + 2 * r + Lanes(df));
+  float* const JXL_RESTRICT rowp = &row_tmp[r];
+  std::vector<float> padded_k = kernel;
+  padded_k.resize(padded_k.size() + Lanes(df));
+  const float* const kernelp = &padded_k[r];
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
+    size_t x = offset, ox = 0;
+    for (; x < static_cast<uint32_t>(r) && x < in.xsize(); x += res, ++ox) {
+      float sum = 0.0f;
+      for (int i = -r; i <= r; ++i) {
+        sum += rowp[std::max<int>(
+                   0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
+               kernelp[i];
+      }
+      out.Row(ox)[y] = sum;
+    }
+    for (; x + r < in.xsize(); x += res, ++ox) {
+      auto sum = Zero(df);
+      for (int i = -r; i <= r; i += Lanes(df)) {
+        sum = MulAdd(LoadU(df, rowp + x + i), LoadU(df, kernelp + i), sum);
+      }
+      out.Row(ox)[y] = GetLane(SumOfLanes(sum));
+    }
+    for (; x < in.xsize(); x += res, ++ox) {
+      float sum = 0.0f;
+      for (int i = -r; i <= r; ++i) {
+        sum += rowp[std::max<int>(
+                   0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
+               kernelp[i];
+      }
+      out.Row(ox)[y] = sum;
+    }
+  }
+  return out;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(FastGaussian1D);
+HWY_EXPORT(ConvolveXSampleAndTranspose);
+void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                    const float* JXL_RESTRICT in, intptr_t width,
+                    float* JXL_RESTRICT out) {
+  return HWY_DYNAMIC_DISPATCH(FastGaussian1D)(rg, in, width, out);
+}
+
+HWY_EXPORT(FastGaussianVertical);  // Local function.
+
+void ExtrapolateBorders(const float* const JXL_RESTRICT row_in,
+                        float* const JXL_RESTRICT row_out, const int xsize,
+                        const int radius) {
+  const int lastcol = xsize - 1;
+  for (int x = 1; x <= radius; ++x) {
+    row_out[-x] = row_in[std::min(x, xsize - 1)];
+  }
+  memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
+  for (int x = 1; x <= radius; ++x) {
+    row_out[lastcol + x] = row_in[std::max(0, lastcol - x)];
+  }
+}
+
+ImageF ConvolveXSampleAndTranspose(const ImageF& in,
+                                   const std::vector<float>& kernel,
+                                   const size_t res) {
+  return HWY_DYNAMIC_DISPATCH(ConvolveXSampleAndTranspose)(in, kernel, res);
+}
+
+Image3F ConvolveXSampleAndTranspose(const Image3F& in,
+                                    const std::vector<float>& kernel,
+                                    const size_t res) {
+  return Image3F(ConvolveXSampleAndTranspose(in.Plane(0), kernel, res),
+                 ConvolveXSampleAndTranspose(in.Plane(1), kernel, res),
+                 ConvolveXSampleAndTranspose(in.Plane(2), kernel, res));
+}
+
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
+                         const size_t res) {
+  ImageF tmp = ConvolveXSampleAndTranspose(in, kernel, res);
+  return ConvolveXSampleAndTranspose(tmp, kernel, res);
+}
+
+// Implements "Recursive Implementation of the Gaussian Filter Using Truncated
+// Cosine Functions" by Charalampidis [2016].
+hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma) {
+  PROFILER_FUNC;
+  auto rg = hwy::MakeUniqueAligned<RecursiveGaussian>();
+  constexpr double kPi = 3.141592653589793238;
+
+  const double radius = roundf(3.2795 * sigma + 0.2546);  // (57), "N"
+
+  // Table I, first row
+  const double pi_div_2r = kPi / (2.0 * radius);
+  const double omega[3] = {pi_div_2r, 3.0 * pi_div_2r, 5.0 * pi_div_2r};
+
+  // (37), k={1,3,5}
+  const double p_1 = +1.0 / std::tan(0.5 * omega[0]);
+  const double p_3 = -1.0 / std::tan(0.5 * omega[1]);
+  const double p_5 = +1.0 / std::tan(0.5 * omega[2]);
+
+  // (44), k={1,3,5}
+  const double r_1 = +p_1 * p_1 / std::sin(omega[0]);
+  const double r_3 = -p_3 * p_3 / std::sin(omega[1]);
+  const double r_5 = +p_5 * p_5 / std::sin(omega[2]);
+
+  // (50), k={1,3,5}
+  const double neg_half_sigma2 = -0.5 * sigma * sigma;
+  const double recip_radius = 1.0 / radius;
+  double rho[3];
+  for (size_t i = 0; i < 3; ++i) {
+    rho[i] = std::exp(neg_half_sigma2 * omega[i] * omega[i]) * recip_radius;
+  }
+
+  // second part of (52), k1,k2 = 1,3; 3,5; 5,1
+  const double D_13 = p_1 * r_3 - r_1 * p_3;
+  const double D_35 = p_3 * r_5 - r_3 * p_5;
+  const double D_51 = p_5 * r_1 - r_5 * p_1;
+
+  // (52), k=5
+  const double recip_d13 = 1.0 / D_13;
+  const double zeta_15 = D_35 * recip_d13;
+  const double zeta_35 = D_51 * recip_d13;
+
+  double A[9] = {p_1,     p_3,     p_5,  //
+                 r_1,     r_3,     r_5,  //  (56)
+                 zeta_15, zeta_35, 1};
+  JXL_CHECK(Inv3x3Matrix(A));
+  const double gamma[3] = {1, radius * radius - sigma * sigma,  // (55)
+                           zeta_15 * rho[0] + zeta_35 * rho[1] + rho[2]};
+  double beta[3];
+  MatMul(A, gamma, 3, 3, 1, beta);  // (53)
+
+  // Sanity check: correctly solved for beta (IIR filter weights are normalized)
+  const double sum = beta[0] * p_1 + beta[1] * p_3 + beta[2] * p_5;  // (39)
+  JXL_ASSERT(std::abs(sum - 1) < 1E-12);
+  (void)sum;
+
+  rg->radius = static_cast<int>(radius);
+
+  double n2[3];
+  double d1[3];
+  for (size_t i = 0; i < 3; ++i) {
+    n2[i] = -beta[i] * std::cos(omega[i] * (radius + 1.0));  // (33)
+    d1[i] = -2.0 * std::cos(omega[i]);                       // (33)
+
+    for (size_t lane = 0; lane < 4; ++lane) {
+      rg->n2[4 * i + lane] = static_cast<float>(n2[i]);
+      rg->d1[4 * i + lane] = static_cast<float>(d1[i]);
+    }
+
+    const double d_2 = d1[i] * d1[i];
+
+    // Obtained by expanding (35) for four consecutive outputs via sympy:
+    // n, d, p, pp = symbols('n d p pp')
+    // i0, i1, i2, i3 = symbols('i0 i1 i2 i3')
+    // o0, o1, o2, o3 = symbols('o0 o1 o2 o3')
+    // o0 = n*i0 - d*p - pp
+    // o1 = n*i1 - d*o0 - p
+    // o2 = n*i2 - d*o1 - o0
+    // o3 = n*i3 - d*o2 - o1
+    // Then expand(o3) and gather terms for p(prev), pp(prev2) etc.
+    rg->mul_prev[4 * i + 0] = -d1[i];
+    rg->mul_prev[4 * i + 1] = d_2 - 1.0;
+    rg->mul_prev[4 * i + 2] = -d_2 * d1[i] + 2.0 * d1[i];
+    rg->mul_prev[4 * i + 3] = d_2 * d_2 - 3.0 * d_2 + 1.0;
+    rg->mul_prev2[4 * i + 0] = -1.0;
+    rg->mul_prev2[4 * i + 1] = d1[i];
+    rg->mul_prev2[4 * i + 2] = -d_2 + 1.0;
+    rg->mul_prev2[4 * i + 3] = d_2 * d1[i] - 2.0 * d1[i];
+    rg->mul_in[4 * i + 0] = n2[i];
+    rg->mul_in[4 * i + 1] = -d1[i] * n2[i];
+    rg->mul_in[4 * i + 2] = d_2 * n2[i] - n2[i];
+    rg->mul_in[4 * i + 3] = -d_2 * d1[i] * n2[i] + 2.0 * d1[i] * n2[i];
+  }
+  return rg;
+}
+
+namespace {
+
+// Apply 1D horizontal scan to each row.
+void FastGaussianHorizontal(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                            const ImageF& in, ThreadPool* pool,
+                            ImageF* JXL_RESTRICT out) {
+  PROFILER_FUNC;
+  JXL_CHECK(SameSize(in, *out));
+
+  const intptr_t xsize = in.xsize();
+  RunOnPool(
+      pool, 0, in.ysize(), ThreadPool::SkipInit(),
+      [&](const int task, const int /*thread*/) {
+        const size_t y = task;
+        const float* row_in = in.ConstRow(y);
+        float* JXL_RESTRICT row_out = out->Row(y);
+        FastGaussian1D(rg, row_in, xsize, row_out);
+      },
+      "FastGaussianHorizontal");
+}
+
+}  // namespace
+
+void FastGaussian(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                  const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp,
+                  ImageF* JXL_RESTRICT out) {
+  FastGaussianHorizontal(rg, in, pool, temp);
+  HWY_DYNAMIC_DISPATCH(FastGaussianVertical)(rg, *temp, pool, out);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.h
new file mode 100644
index 0000000000..fb4741f03a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur.h
@@ -0,0 +1,94 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_GAUSS_BLUR_H_
+#define LIB_JXL_GAUSS_BLUR_H_
+
+#include <stddef.h>
+
+#include <cmath>
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+std::vector<T> GaussianKernel(int radius, T sigma) {
+  JXL_ASSERT(sigma > 0.0);
+  std::vector<T> kernel(2 * radius + 1);
+  const T scaler = -1.0 / (2 * sigma * sigma);
+  double sum = 0.0;
+  for (int i = -radius; i <= radius; ++i) {
+    const T val = std::exp(scaler * i * i);
+    kernel[i + radius] = val;
+    sum += val;
+  }
+  for (size_t i = 0; i < kernel.size(); ++i) {
+    kernel[i] /= sum;
+  }
+  return kernel;
+}
+
+// All convolution functions below apply mirroring of the input on the borders
+// in the following way:
+//
+//     input: [a0 a1 a2 ...  aN]
+//     mirrored input: [aR ... a1 | a0 a1 a2 .... aN | aN-1 ... aN-R]
+//
+// where R is the radius of the kernel (i.e. kernel size is 2*R+1).
+
+// REQUIRES: in.xsize() and in.ysize() are integer multiples of res.
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
+                         const size_t res);
+
+// Private, used by test.
+void ExtrapolateBorders(const float* const JXL_RESTRICT row_in,
+                        float* const JXL_RESTRICT row_out, const int xsize,
+                        const int radius);
+
+// Only for use by CreateRecursiveGaussian and FastGaussian*.
+#pragma pack(push, 1)
+struct RecursiveGaussian {
+  // For k={1,3,5} in that order, each broadcasted 4x for LoadDup128. Used only
+  // for vertical passes.
+  float n2[3 * 4];
+  float d1[3 * 4];
+
+  // We unroll horizontal passes 4x - one output per lane. These are each lane's
+  // multiplier for the previous output (relative to the first of the four
+  // outputs). Indexing: 4 * 0..2 (for {1,3,5}) + 0..3 for the lane index.
+  float mul_prev[3 * 4];
+  // Ditto for the second to last output.
+  float mul_prev2[3 * 4];
+
+  // We multiply a vector of inputs 0..3 by a vector shifted from this array.
+  // in=0 uses all 4 (nonzero) terms; for in=3, the lower three lanes are 0.
+  float mul_in[3 * 4];
+
+  size_t radius;
+};
+#pragma pack(pop)
+
+// Precomputation for FastGaussian*; users may use the same pointer/storage in
+// subsequent calls to FastGaussian* with the same sigma.
+hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma);
+
+// 1D Gaussian with zero-pad boundary handling and runtime independent of sigma.
+void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                    const float* JXL_RESTRICT in, intptr_t width,
+                    float* JXL_RESTRICT out);
+
+// 2D Gaussian with zero-pad boundary handling and runtime independent of sigma.
+void FastGaussian(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+                  const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp,
+                  ImageF* JXL_RESTRICT out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_GAUSS_BLUR_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur_test.cc
new file mode 100644
index 0000000000..cdde77e1ff
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gauss_blur_test.cc
@@ -0,0 +1,610 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/gauss_blur.h"
+
+#include <cmath>
+#include <hwy/targets.h>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/extras/time.h"
+#include "lib/jxl/base/robust_statistics.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+
+namespace jxl {
+
+bool NearEdge(const int64_t width, const int64_t peak) {
+  // When around 3*sigma from the edge, there is negligible truncation.
+  return peak < 10 || peak > width - 10;
+}
+
+// Follow the curve downwards by scanning right from `peak` and verifying
+// identical values at the same offset to the left.
+void VerifySymmetric(const int64_t width, const int64_t peak,
+                     const float* out) {
+  const double tolerance = NearEdge(width, peak) ? 0.015 : 6E-7;
+  for (int64_t i = 1;; ++i) {
+    // Stop if we passed either end of the array
+    if (peak - i < 0 || peak + i >= width) break;
+    EXPECT_GT(out[peak + i - 1] + tolerance, out[peak + i]);  // descending
+    EXPECT_NEAR(out[peak - i], out[peak + i], tolerance);     // symmetric
+  }
+}
+
+void TestImpulseResponse(size_t width, size_t peak) {
+  const auto rg3 = CreateRecursiveGaussian(3.0);
+  const auto rg4 = CreateRecursiveGaussian(4.0);
+  const auto rg5 = CreateRecursiveGaussian(5.0);
+
+  // Extra padding for 4x unrolling
+  auto in = hwy::AllocateAligned<float>(width + 3);
+  memset(in.get(), 0, sizeof(float) * (width + 3));
+  in[peak] = 1.0f;
+
+  auto out3 = hwy::AllocateAligned<float>(width + 3);
+  auto out4 = hwy::AllocateAligned<float>(width + 3);
+  auto out5 = hwy::AllocateAligned<float>(width + 3);
+  FastGaussian1D(rg3, in.get(), width, out3.get());
+  FastGaussian1D(rg4, out3.get(), width, out4.get());
+  FastGaussian1D(rg5, in.get(), width, out5.get());
+
+  VerifySymmetric(width, peak, out3.get());
+  VerifySymmetric(width, peak, out4.get());
+  VerifySymmetric(width, peak, out5.get());
+
+  // Wider kernel has flatter peak
+  EXPECT_LT(out5[peak] + 0.05, out3[peak]);
+
+  // Gauss3 o Gauss4 ~= Gauss5
+  const double tolerance = NearEdge(width, peak) ? 0.04 : 0.01;
+  for (size_t i = 0; i < width; ++i) {
+    EXPECT_NEAR(out4[i], out5[i], tolerance);
+  }
+}
+
+void TestImpulseResponseForWidth(size_t width) {
+  for (size_t i = 0; i < width; ++i) {
+    TestImpulseResponse(width, i);
+  }
+}
+
+TEST(GaussBlurTest, ImpulseResponse) {
+  TestImpulseResponseForWidth(10);  // tiny even
+  TestImpulseResponseForWidth(15);  // small odd
+  TestImpulseResponseForWidth(32);  // power of two
+  TestImpulseResponseForWidth(31);  // power of two - 1
+  TestImpulseResponseForWidth(33);  // power of two + 1
+}
+
+ImageF Convolve(const ImageF& in, const std::vector<float>& kernel) {
+  return ConvolveAndSample(in, kernel, 1);
+}
+
+// Higher-precision version for accuracy test.
+ImageF ConvolveAndTransposeF64(const ImageF& in,
+                               const std::vector<double>& kernel) {
+  JXL_ASSERT(kernel.size() % 2 == 1);
+  ImageF out(in.ysize(), in.xsize());
+  const int r = kernel.size() / 2;
+  std::vector<float> row_tmp(in.xsize() + 2 * r);
+  float* const JXL_RESTRICT rowp = &row_tmp[r];
+  const double* const kernelp = &kernel[r];
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
+    for (size_t x = 0, ox = 0; x < in.xsize(); ++x, ++ox) {
+      double sum = 0.0;
+      for (int i = -r; i <= r; ++i) {
+        sum += rowp[std::max<int>(
+                   0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
+               kernelp[i];
+      }
+      out.Row(ox)[y] = static_cast<float>(sum);
+    }
+  }
+  return out;
+}
+
+ImageF ConvolveF64(const ImageF& in, const std::vector<double>& kernel) {
+  ImageF tmp = ConvolveAndTransposeF64(in, kernel);
+  return ConvolveAndTransposeF64(tmp, kernel);
+}
+
+void TestDirac2D(size_t xsize, size_t ysize, double sigma) {
+  ImageF in(xsize, ysize);
+  ZeroFillImage(&in);
+  // We anyway ignore the border below, so might as well choose the middle.
+  in.Row(ysize / 2)[xsize / 2] = 1.0f;
+
+  ImageF temp(xsize, ysize);
+  ImageF out(xsize, ysize);
+  const auto rg = CreateRecursiveGaussian(sigma);
+  ThreadPool* null_pool = nullptr;
+  FastGaussian(rg, in, null_pool, &temp, &out);
+
+  const std::vector<float> kernel =
+      GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+  const ImageF expected = Convolve(in, kernel);
+
+  const double max_l1 = sigma < 1.5 ? 5E-3 : 6E-4;
+  const size_t border = 2 * sigma;
+  VerifyRelativeError(expected, out, max_l1, 1E-8, border);
+}
+
+TEST(GaussBlurTest, Test2D) {
+  const std::vector<int> dimensions{6, 15, 17, 64, 50, 49};
+  for (int xsize : dimensions) {
+    for (int ysize : dimensions) {
+      for (double sigma : {1.0, 2.5, 3.6, 7.0}) {
+        TestDirac2D(static_cast<size_t>(xsize), static_cast<size_t>(ysize),
+                    sigma);
+      }
+    }
+  }
+}
+
+// Slow (44 sec). To run, remove the disabled prefix.
+TEST(GaussBlurTest, DISABLED_SlowTestDirac1D) {
+  const double sigma = 7.0;
+  const auto rg = CreateRecursiveGaussian(sigma);
+
+  // IPOL accuracy test uses 10^-15 tolerance, this is 2*10^-11.
+  const size_t radius = static_cast<size_t>(7 * sigma);
+  const std::vector<double> kernel = GaussianKernel(radius, sigma);
+
+  const size_t length = 16384;
+  ImageF inputs(length, 1);
+  ZeroFillImage(&inputs);
+
+  auto outputs = hwy::AllocateAligned<float>(length);
+
+  // One per center position
+  auto sum_abs_err = hwy::AllocateAligned<double>(length);
+  std::fill(sum_abs_err.get(), sum_abs_err.get() + length, 0.0);
+
+  for (size_t center = radius; center < length - radius; ++center) {
+    inputs.Row(0)[center - 1] = 0.0f;  // reset last peak, entire array now 0
+    inputs.Row(0)[center] = 1.0f;
+    FastGaussian1D(rg, inputs.Row(0), length, outputs.get());
+
+    const ImageF outputs_fir = ConvolveF64(inputs, kernel);
+
+    for (size_t i = 0; i < length; ++i) {
+      const float abs_err = std::abs(outputs[i] - outputs_fir.Row(0)[i]);
+      sum_abs_err[i] += static_cast<double>(abs_err);
+    }
+  }
+
+  const double max_abs_err =
+      *std::max_element(sum_abs_err.get(), sum_abs_err.get() + length);
+  printf("Max abs err: %.8e\n", max_abs_err);
+}
+
+void TestRandom(size_t xsize, size_t ysize, float min, float max, double sigma,
+                double max_l1, double max_rel) {
+  printf("%4zu x %4zu %4.1f %4.1f sigma %.1f\n", xsize, ysize, min, max, sigma);
+  ImageF in(xsize, ysize);
+  RandomFillImage(&in, min, max, 65537 + xsize * 129 + ysize);
+  // FastGaussian/Convolve handle borders differently, so keep those pixels 0.
+  const size_t border = 4 * sigma;
+  SetBorder(border, 0.0f, &in);
+
+  ImageF temp(xsize, ysize);
+  ImageF out(xsize, ysize);
+  const auto rg = CreateRecursiveGaussian(sigma);
+  ThreadPool* null_pool = nullptr;
+  FastGaussian(rg, in, null_pool, &temp, &out);
+
+  const std::vector<float> kernel =
+      GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+  const ImageF expected = Convolve(in, kernel);
+
+  VerifyRelativeError(expected, out, max_l1, max_rel, border);
+}
+
+void TestRandomForSizes(float min, float max, double sigma) {
+  double max_l1 = 5E-3;
+  double max_rel = 3E-3;
+  TestRandom(128, 1, min, max, sigma, max_l1, max_rel);
+  TestRandom(1, 128, min, max, sigma, max_l1, max_rel);
+  TestRandom(30, 201, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
+  TestRandom(201, 30, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
+  TestRandom(201, 201, min, max, sigma, max_l1 * 2.0, max_rel * 1.2);
+}
+
+TEST(GaussBlurTest, TestRandom) {
+  // small non-negative
+  TestRandomForSizes(0.0f, 10.0f, 3.0f);
+  TestRandomForSizes(0.0f, 10.0f, 7.0f);
+
+  // small negative
+  TestRandomForSizes(-4.0f, -1.0f, 3.0f);
+  TestRandomForSizes(-4.0f, -1.0f, 7.0f);
+
+  // mixed positive/negative
+  TestRandomForSizes(-6.0f, 6.0f, 3.0f);
+  TestRandomForSizes(-6.0f, 6.0f, 7.0f);
+}
+
+TEST(GaussBlurTest, TestSign) {
+  const size_t xsize = 500;
+  const size_t ysize = 606;
+  ImageF in(xsize, ysize);
+
+  ZeroFillImage(&in);
+  const float center[33 * 33] = {
+      -0.128445f, -0.098473f, -0.121883f, -0.093601f, 0.095665f,  -0.271332f,
+      -0.705475f, -1.324005f, -2.020741f, -1.329464f, 1.834064f,  4.787300f,
+      5.834560f,  5.272720f,  3.967960f,  3.547935f,  3.432732f,  3.383015f,
+      3.239326f,  3.290806f,  3.298954f,  3.397808f,  3.359730f,  3.533844f,
+      3.511856f,  3.436787f,  3.428310f,  3.460209f,  3.550011f,  3.590942f,
+      3.593109f,  3.560005f,  3.443165f,  0.089741f,  0.179230f,  -0.032997f,
+      -0.182610f, 0.005669f,  -0.244759f, -0.395123f, -0.514961f, -1.003529f,
+      -1.798656f, -2.377975f, 0.222191f,  3.957664f,  5.946804f,  5.543129f,
+      4.290096f,  3.621010f,  3.407257f,  3.392494f,  3.345367f,  3.391903f,
+      3.441605f,  3.429260f,  3.444969f,  3.507130f,  3.518612f,  3.443111f,
+      3.475948f,  3.536148f,  3.470333f,  3.628311f,  3.600243f,  3.292892f,
+      -0.226730f, -0.573616f, -0.762165f, -0.398739f, -0.189842f, -0.275921f,
+      -0.446739f, -0.550037f, -0.461033f, -0.724792f, -1.448349f, -1.814064f,
+      -0.491032f, 2.817703f,  5.213242f,  5.675629f,  4.864548f,  3.876324f,
+      3.535587f,  3.530312f,  3.413765f,  3.386261f,  3.404854f,  3.383472f,
+      3.420830f,  3.326496f,  3.257877f,  3.362152f,  3.489609f,  3.619587f,
+      3.555805f,  3.423164f,  3.309708f,  -0.483940f, -0.502926f, -0.592983f,
+      -0.492527f, -0.413616f, -0.482555f, -0.475506f, -0.447990f, -0.338120f,
+      -0.189072f, -0.376427f, -0.910828f, -1.878044f, -1.937927f, 1.423218f,
+      4.871609f,  5.767548f,  5.103741f,  3.983868f,  3.633003f,  3.458263f,
+      3.507309f,  3.247021f,  3.220612f,  3.326061f,  3.352814f,  3.291061f,
+      3.322739f,  3.444302f,  3.506207f,  3.556839f,  3.529575f,  3.457024f,
+      -0.408161f, -0.431343f, -0.454369f, -0.356419f, -0.380924f, -0.399452f,
+      -0.439476f, -0.412189f, -0.306816f, -0.008213f, -0.325813f, -0.537842f,
+      -0.984100f, -1.805332f, -2.028198f, 0.773205f,  4.423046f,  5.604839f,
+      5.231617f,  4.080299f,  3.603008f,  3.498741f,  3.517010f,  3.333897f,
+      3.381336f,  3.342617f,  3.369686f,  3.434155f,  3.490452f,  3.607029f,
+      3.555298f,  3.702297f,  3.618679f,  -0.503609f, -0.578564f, -0.419014f,
+      -0.239883f, 0.269836f,  0.022984f,  -0.455067f, -0.621777f, -0.304176f,
+      -0.163792f, -0.490250f, -0.466637f, -0.391792f, -0.657940f, -1.498035f,
+      -1.895836f, 0.036537f,  3.462456f,  5.586445f,  5.658791f,  4.434784f,
+      3.423435f,  3.318848f,  3.202328f,  3.532764f,  3.436687f,  3.354881f,
+      3.356941f,  3.382645f,  3.503902f,  3.512867f,  3.632366f,  3.537312f,
+      -0.274734f, -0.658829f, -0.726532f, -0.281254f, 0.053196f,  -0.064991f,
+      -0.608517f, -0.720966f, -0.070602f, -0.111320f, -0.440956f, -0.492180f,
+      -0.488762f, -0.569283f, -1.012741f, -1.582779f, -2.101479f, -1.392380f,
+      2.451153f,  5.555855f,  6.096313f,  5.230045f,  4.068172f,  3.404274f,
+      3.392586f,  3.326065f,  3.156670f,  3.284828f,  3.347012f,  3.319252f,
+      3.352310f,  3.610790f,  3.499847f,  -0.150600f, -0.314445f, -0.093575f,
+      -0.057384f, 0.053688f,  -0.189255f, -0.263515f, -0.318653f, 0.053246f,
+      0.080627f,  -0.119553f, -0.152454f, -0.305420f, -0.404869f, -0.385944f,
+      -0.689949f, -1.204914f, -1.985748f, -1.711361f, 1.260658f,  4.626896f,
+      5.888351f,  5.450989f,  4.070587f,  3.539200f,  3.383492f,  3.296318f,
+      3.267334f,  3.436028f,  3.463005f,  3.502625f,  3.522282f,  3.403763f,
+      -0.348049f, -0.302303f, -0.137016f, -0.041737f, -0.164001f, -0.358849f,
+      -0.469627f, -0.428291f, -0.375797f, -0.246346f, -0.118950f, -0.084229f,
+      -0.205681f, -0.241199f, -0.391796f, -0.323151f, -0.241211f, -0.834137f,
+      -1.684219f, -1.972137f, 0.448399f,  4.019985f,  5.648144f,  5.647846f,
+      4.295094f,  3.641884f,  3.374790f,  3.197342f,  3.425545f,  3.507481f,
+      3.478065f,  3.430889f,  3.341900f,  -1.016304f, -0.959221f, -0.909466f,
+      -0.810715f, -0.590729f, -0.594467f, -0.646721f, -0.629364f, -0.528561f,
+      -0.551819f, -0.301086f, -0.149101f, -0.060146f, -0.162220f, -0.326210f,
+      -0.156548f, -0.036293f, -0.426098f, -1.145470f, -1.628998f, -2.003052f,
+      -1.142891f, 2.885162f,  5.652863f,  5.718426f,  4.911140f,  3.234222f,
+      3.473373f,  3.577183f,  3.271603f,  3.410435f,  3.505489f,  3.434032f,
+      -0.508911f, -0.438797f, -0.437450f, -0.627426f, -0.511745f, -0.304874f,
+      -0.274246f, -0.261841f, -0.228466f, -0.342491f, -0.528206f, -0.490082f,
+      -0.516350f, -0.361694f, -0.398514f, -0.276020f, -0.210369f, -0.355938f,
+      -0.402622f, -0.538864f, -1.249573f, -2.100105f, -0.996178f, 1.886410f,
+      4.929745f,  5.630871f,  5.444199f,  4.042740f,  3.739189f,  3.691399f,
+      3.391956f,  3.469696f,  3.431232f,  0.204849f,  0.205433f,  -0.131927f,
+      -0.367908f, -0.374378f, -0.126820f, -0.186951f, -0.228565f, -0.081776f,
+      -0.143143f, -0.379230f, -0.598701f, -0.458019f, -0.295586f, -0.407730f,
+      -0.245853f, -0.043140f, 0.024242f,  -0.038998f, -0.044151f, -0.425991f,
+      -1.240753f, -1.943146f, -2.174755f, 0.523415f,  4.376751f,  5.956558f,
+      5.850082f,  4.403152f,  3.517399f,  3.560753f,  3.554836f,  3.471985f,
+      -0.508503f, -0.109783f, 0.057747f,  0.190079f,  -0.257153f, -0.591980f,
+      -0.666771f, -0.525391f, -0.293060f, -0.489731f, -0.304855f, -0.259644f,
+      -0.367825f, -0.346977f, -0.292889f, -0.215652f, -0.120705f, -0.176010f,
+      -0.422905f, -0.114647f, -0.289749f, -0.374203f, -0.606754f, -1.127949f,
+      -1.994583f, -0.588058f, 3.415840f,  5.603470f,  5.811581f,  4.959423f,
+      3.721760f,  3.710499f,  3.785461f,  -0.554588f, -0.565517f, -0.434578f,
+      -0.012482f, -0.284660f, -0.699795f, -0.957535f, -0.755135f, -0.382034f,
+      -0.321552f, -0.287571f, -0.279537f, -0.314972f, -0.256287f, -0.372818f,
+      -0.316017f, -0.287975f, -0.365639f, -0.512589f, -0.420692f, -0.436485f,
+      -0.295353f, -0.451958f, -0.755459f, -1.272358f, -2.301353f, -1.776161f,
+      1.572483f,  4.826286f,  5.741898f,  5.162853f,  4.028049f,  3.686325f,
+      -0.495590f, -0.664413f, -0.760044f, -0.152634f, -0.286480f, -0.340462f,
+      0.076477f,  0.187706f,  -0.068787f, -0.293491f, -0.361145f, -0.292515f,
+      -0.140671f, -0.190723f, -0.333302f, -0.368168f, -0.192581f, -0.154499f,
+      -0.236544f, -0.124405f, -0.208321f, -0.465607f, -0.883080f, -1.104813f,
+      -1.210567f, -1.415665f, -1.924683f, -1.634758f, 0.601017f,  4.276672f,
+      5.501350f,  5.331257f,  3.809288f,  -0.727722f, -0.533619f, -0.511524f,
+      -0.470688f, -0.610710f, -0.575130f, -0.311115f, -0.090420f, -0.297676f,
+      -0.646118f, -0.742805f, -0.485050f, -0.330910f, -0.275417f, -0.357037f,
+      -0.425598f, -0.481876f, -0.488941f, -0.393551f, -0.051105f, -0.090755f,
+      -0.328674f, -0.536369f, -0.533684f, -0.336960f, -0.689194f, -1.187195f,
+      -1.860954f, -2.290253f, -0.424774f, 3.050060f,  5.083332f,  5.291920f,
+      -0.343605f, -0.190975f, -0.303692f, -0.456512f, -0.681820f, -0.690693f,
+      -0.416729f, -0.286446f, -0.442055f, -0.709148f, -0.569160f, -0.382423f,
+      -0.402321f, -0.383362f, -0.366413f, -0.290718f, -0.110069f, -0.220280f,
+      -0.279018f, -0.255424f, -0.262081f, -0.487556f, -0.444492f, -0.250500f,
+      -0.119583f, -0.291557f, -0.537781f, -1.104073f, -1.737091f, -1.697441f,
+      -0.323456f, 2.042049f,  4.605103f,  -0.310631f, -0.279568f, -0.012695f,
+      -0.160130f, -0.358746f, -0.421101f, -0.559677f, -0.474136f, -0.416565f,
+      -0.561817f, -0.534672f, -0.519157f, -0.767197f, -0.605831f, -0.186523f,
+      0.219872f,  0.264984f,  -0.193432f, -0.363182f, -0.467472f, -0.462009f,
+      -0.571053f, -0.522476f, -0.315903f, -0.237427f, -0.147320f, -0.100201f,
+      -0.237568f, -0.763435f, -1.242043f, -2.135159f, -1.409485f, 1.236370f,
+      -0.474247f, -0.517906f, -0.410217f, -0.542244f, -0.795986f, -0.590004f,
+      -0.388863f, -0.462921f, -0.810627f, -0.778637f, -0.512486f, -0.718025f,
+      -0.710854f, -0.482513f, -0.318233f, -0.194962f, -0.220116f, -0.421673f,
+      -0.534233f, -0.403339f, -0.389332f, -0.407303f, -0.437355f, -0.469730f,
+      -0.359600f, -0.352745f, -0.466755f, -0.414585f, -0.430756f, -0.656822f,
+      -1.237038f, -2.046097f, -1.574898f, -0.593815f, -0.582165f, -0.336098f,
+      -0.372612f, -0.554386f, -0.410603f, -0.428276f, -0.647644f, -0.640720f,
+      -0.582207f, -0.414112f, -0.435547f, -0.435505f, -0.332561f, -0.248116f,
+      -0.340221f, -0.277855f, -0.352699f, -0.377319f, -0.230850f, -0.313267f,
+      -0.446270f, -0.346237f, -0.420422f, -0.530781f, -0.400341f, -0.463661f,
+      -0.209091f, -0.056705f, -0.011772f, -0.169388f, -0.736275f, -1.463017f,
+      -0.752701f, -0.668865f, -0.329765f, -0.299347f, -0.245667f, -0.286999f,
+      -0.520420f, -0.675438f, -0.255753f, 0.141357f,  -0.079639f, -0.419476f,
+      -0.374069f, -0.046253f, 0.116116f,  -0.145847f, -0.380371f, -0.563412f,
+      -0.638634f, -0.310116f, -0.260914f, -0.508404f, -0.465508f, -0.527824f,
+      -0.370979f, -0.305595f, -0.244694f, -0.254490f, 0.009968f,  -0.050201f,
+      -0.331219f, -0.614960f, -0.788208f, -0.483242f, -0.367516f, -0.186951f,
+      -0.180031f, 0.129711f,  -0.127811f, -0.384750f, -0.499542f, -0.418613f,
+      -0.121635f, 0.203197f,  -0.167290f, -0.397270f, -0.355461f, -0.218746f,
+      -0.376785f, -0.521698f, -0.721581f, -0.845741f, -0.535439f, -0.220882f,
+      -0.309067f, -0.555248f, -0.690342f, -0.664948f, -0.390102f, 0.020355f,
+      -0.130447f, -0.173252f, -0.170059f, -0.633663f, -0.956001f, -0.621696f,
+      -0.388302f, -0.342262f, -0.244370f, -0.386948f, -0.401421f, -0.172979f,
+      -0.206163f, -0.450058f, -0.525789f, -0.549274f, -0.349251f, -0.474613f,
+      -0.667976f, -0.435600f, -0.175369f, -0.196877f, -0.202976f, -0.242481f,
+      -0.258369f, -0.189133f, -0.395397f, -0.765499f, -0.944016f, -0.850967f,
+      -0.631561f, -0.152493f, -0.046432f, -0.262066f, -0.195919f, 0.048218f,
+      0.084972f,  0.039902f,  0.000618f,  -0.404430f, -0.447456f, -0.418076f,
+      -0.631935f, -0.717415f, -0.502888f, -0.530514f, -0.747826f, -0.704041f,
+      -0.674969f, -0.516853f, -0.418446f, -0.327740f, -0.308815f, -0.481636f,
+      -0.440083f, -0.481720f, -0.341053f, -0.283897f, -0.324368f, -0.352829f,
+      -0.434349f, -0.545589f, -0.533104f, -0.472755f, -0.570496f, -0.557735f,
+      -0.708176f, -0.493332f, -0.194416f, -0.186249f, -0.256710f, -0.271835f,
+      -0.304752f, -0.431267f, -0.422398f, -0.646725f, -0.680801f, -0.249031f,
+      -0.058567f, -0.213890f, -0.383949f, -0.540291f, -0.549877f, -0.225567f,
+      -0.037174f, -0.499874f, -0.641010f, -0.628044f, -0.390549f, -0.311497f,
+      -0.542313f, -0.569565f, -0.473408f, -0.331245f, -0.357197f, -0.285599f,
+      -0.200157f, -0.201866f, -0.124428f, -0.346016f, -0.392311f, -0.264496f,
+      -0.285370f, -0.436974f, -0.523483f, -0.410461f, -0.267925f, -0.055016f,
+      -0.382458f, -0.319771f, -0.049927f, 0.124329f,  0.266102f,  -0.106606f,
+      -0.773647f, -0.973053f, -0.708206f, -0.486137f, -0.319923f, -0.493900f,
+      -0.490860f, -0.324986f, -0.147346f, -0.146088f, -0.161758f, -0.084396f,
+      -0.379494f, 0.041626f,  -0.113361f, -0.277767f, 0.083366f,  0.126476f,
+      0.139057f,  0.038040f,  0.038162f,  -0.242126f, -0.411736f, -0.370049f,
+      -0.455357f, -0.039257f, 0.264442f,  -0.271492f, -0.425346f, -0.514847f,
+      -0.448650f, -0.580399f, -0.652603f, -0.774803f, -0.692524f, -0.579578f,
+      -0.465206f, -0.386265f, -0.458012f, -0.446594f, -0.284893f, -0.345448f,
+      -0.350876f, -0.440350f, -0.360378f, -0.270428f, 0.237213f,  -0.063602f,
+      -0.364529f, -0.179867f, 0.078197f,  0.117947f,  -0.093410f, -0.359119f,
+      -0.480961f, -0.540638f, -0.436287f, -0.598576f, -0.253735f, -0.060093f,
+      -0.549145f, -0.808327f, -0.698593f, -0.595764f, -0.582508f, -0.497353f,
+      -0.480892f, -0.584240f, -0.665791f, -0.690903f, -0.743446f, -0.796677f,
+      -0.782391f, -0.649010f, -0.628139f, -0.880848f, -0.829361f, -0.373272f,
+      -0.223667f, 0.174572f,  -0.348743f, -0.798901f, -0.692307f, -0.607609f,
+      -0.401455f, -0.480919f, -0.450798f, -0.435413f, -0.322338f, -0.228382f,
+      -0.450466f, -0.504440f, -0.477402f, -0.662224f, -0.583397f, -0.217445f,
+      -0.157459f, -0.079584f, -0.226168f, -0.488720f, -0.669624f, -0.666878f,
+      -0.565311f, -0.549625f, -0.364601f, -0.497627f, -0.736897f, -0.763023f,
+      -0.741020f, -0.404503f, 0.184814f,  -0.075315f, -0.281513f, -0.532906f,
+      -0.405800f, -0.313438f, -0.536652f, -0.403381f, 0.011967f,  0.103310f,
+      -0.269848f, -0.508656f, -0.445923f, -0.644859f, -0.617870f, -0.500927f,
+      -0.371559f, -0.125580f, 0.028625f,  -0.154713f, -0.442024f, -0.492764f,
+      -0.199371f, 0.236305f,  0.225925f,  0.075577f,  -0.285812f, -0.437145f,
+      -0.374260f, -0.156693f, -0.129635f, -0.243206f, -0.123058f, 0.162148f,
+      -0.313152f, -0.337982f, -0.358421f, 0.040070f,  0.038925f,  -0.333313f,
+      -0.351662f, 0.023014f,  0.091362f,  -0.282890f, -0.373253f, -0.389050f,
+      -0.532707f, -0.423347f, -0.349968f, -0.287045f, -0.202442f, -0.308430f,
+      -0.222801f, -0.106323f, -0.056358f, 0.027222f,  0.390732f,  0.033558f,
+      -0.160088f, -0.382217f, -0.535282f, -0.515900f, -0.022736f, 0.165665f,
+      -0.111408f, -0.233784f, -0.312357f, -0.541885f, -0.480022f, -0.482513f,
+      -0.246254f, 0.132244f,  0.090134f,  0.234634f,  -0.089249f, -0.460854f,
+      -0.515457f, -0.450874f, -0.311031f, -0.387680f, -0.360554f, -0.179241f,
+      -0.283817f, -0.475815f, -0.246399f, -0.388958f, -0.551140f, -0.496239f,
+      -0.559879f, -0.379761f, -0.254288f, -0.395111f, -0.613018f, -0.459427f,
+      -0.263580f, -0.268929f, 0.080826f,  0.115616f,  -0.097324f, -0.325310f,
+      -0.480450f, -0.313286f, -0.310371f, -0.517361f, -0.288288f, -0.112679f,
+      -0.173241f, -0.221664f, -0.039452f, -0.107578f, -0.089630f, -0.483768f,
+      -0.571087f, -0.497108f, -0.321533f, -0.375492f, -0.540363f, -0.406815f,
+      -0.388512f, -0.514561f, -0.540192f, -0.402412f, -0.232246f, -0.304749f,
+      -0.383724f, -0.679596f, -0.685463f, -0.694538f, -0.642937f, -0.425789f,
+      0.103271f,  -0.194862f, -0.487999f, -0.717281f, -0.681850f, -0.709286f,
+      -0.615398f, -0.554245f, -0.254681f, -0.049950f, -0.002914f, -0.095383f,
+      -0.370911f, -0.564224f, -0.242714f};
+  const size_t xtest = xsize / 2;
+  const size_t ytest = ysize / 2;
+
+  for (intptr_t dy = -16; dy <= 16; ++dy) {
+    float* row = in.Row(ytest + dy);
+    for (intptr_t dx = -16; dx <= 16; ++dx)
+      row[xtest + dx] = center[(dy + 16) * 33 + (dx + 16)];
+  }
+
+  const double sigma = 7.155933;
+
+  ImageF temp(xsize, ysize);
+  ImageF out_rg(xsize, ysize);
+  const auto rg = CreateRecursiveGaussian(sigma);
+  ThreadPool* null_pool = nullptr;
+  FastGaussian(rg, in, null_pool, &temp, &out_rg);
+
+  ImageF out_old;
+  {
+    const std::vector<float> kernel =
+        GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+    printf("old kernel size %zu\n", kernel.size());
+    out_old = Convolve(in, kernel);
+  }
+
+  printf("rg %.4f old %.4f\n", out_rg.Row(ytest)[xtest],
+         out_old.Row(ytest)[xtest]);
+}
+
+// Returns megapixels/sec. "div" is a divisor for the number of repetitions,
+// used to reduce benchmark duration. Func returns elapsed time.
+template <class Func>
+double Measure(const size_t xsize, const size_t ysize, int div,
+               const Func& func) {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+  int reps = 10 / div;
+#else
+  int reps = 2000 / div;
+#endif
+  if (reps < 2) reps = 2;
+  std::vector<double> elapsed;
+  for (int i = 0; i < reps; ++i) {
+    elapsed.push_back(func(xsize, ysize));
+  }
+
+  double mean_elapsed;
+  // Potential loss of precision, and also enough samples for mode.
+  if (reps > 50) {
+    std::sort(elapsed.begin(), elapsed.end());
+    mean_elapsed = jxl::HalfSampleMode()(elapsed.data(), elapsed.size());
+  } else {
+    // Skip first(noisier)
+    mean_elapsed = Geomean(elapsed.data() + 1, elapsed.size() - 1);
+  }
+  return (xsize * ysize * 1E-6) / mean_elapsed;
+}
+
+void Benchmark1D() {
+  // Uncomment to disable SIMD and force and scalar implementation
+  // hwy::DisableTargets(~HWY_SCALAR);
+
+  const size_t length = 16384;  // (same value used for running IPOL benchmark)
+  const double sigma = 7.0;     // (from Butteraugli application)
+  // NOTE: MSVC and clang disagree on the required captures, so use =.
+  const double mps_rg1 =
+      Measure(length, 1, 1, [=](size_t /*xsize*/, size_t /*ysize*/) {
+        ImageF in(length, 1);
+        const float expected = length;
+        FillImage(expected, &in);
+
+        ImageF temp(length, 1);
+        ImageF out(length, 1);
+        const auto rg = CreateRecursiveGaussian(sigma);
+        const double t0 = Now();
+        FastGaussian1D(rg, in.Row(0), length, out.Row(0));
+        const double t1 = Now();
+        // Prevent optimizing out
+        const float actual = out.ConstRow(0)[length / 2];
+        const float rel_err = std::abs(actual - expected) / expected;
+        EXPECT_LT(rel_err, 9E-5);
+        return t1 - t0;
+      });
+  // Report milliseconds for comparison with IPOL benchmark
+  const double milliseconds = (1E-6 * length) / mps_rg1 * 1E3;
+  printf("%5zu @%.1f: rg 1D %e\n", length, sigma, milliseconds);
+}
+
+void Benchmark(size_t xsize, size_t ysize, double sigma) {
+  // Uncomment to run AVX2
+  // hwy::DisableTargets(HWY_AVX3);
+
+  const double mps_rg =
+      Measure(xsize, ysize, 1, [sigma](size_t xsize, size_t ysize) {
+        ImageF in(xsize, ysize);
+        const float expected = xsize + ysize;
+        FillImage(expected, &in);
+
+        ImageF temp(xsize, ysize);
+        ImageF out(xsize, ysize);
+        const auto rg = CreateRecursiveGaussian(sigma);
+        ThreadPool* null_pool = nullptr;
+        const double t0 = Now();
+        FastGaussian(rg, in, null_pool, &temp, &out);
+        const double t1 = Now();
+        // Prevent optimizing out
+        const float actual = out.ConstRow(ysize / 2)[xsize / 2];
+        const float rel_err = std::abs(actual - expected) / expected;
+        EXPECT_LT(rel_err, 9E-5);
+        return t1 - t0;
+      });
+
+  const double mps_fir =
+      Measure(xsize, ysize, 100, [sigma](size_t xsize, size_t ysize) {
+        ImageF in(xsize, ysize);
+        const float expected = xsize + ysize;
+        FillImage(expected, &in);
+        const std::vector<float> kernel = GaussianKernel(
+            static_cast<int>(4 * sigma), static_cast<float>(sigma));
+        const double t0 = Now();
+        const ImageF out = Convolve(in, kernel);
+        const double t1 = Now();
+
+        // Prevent optimizing out
+        const float actual = out.ConstRow(ysize / 2)[xsize / 2];
+        const float rel_err = std::abs(actual - expected) / expected;
+        EXPECT_LT(rel_err, 5E-6);
+        return t1 - t0;
+      });
+
+  const double mps_simd7 =
+      Measure(xsize, ysize, 10, [](size_t xsize, size_t ysize) {
+        ImageF in(xsize, ysize);
+        const float expected = xsize + ysize;
+        FillImage(expected, &in);
+        ImageF out(xsize, ysize);
+        // Gaussian with sigma 1
+        const WeightsSeparable7 weights = {
+            {HWY_REP4(0.383103f), HWY_REP4(0.241843f), HWY_REP4(0.060626f),
+             HWY_REP4(0.00598f)},
+            {HWY_REP4(0.383103f), HWY_REP4(0.241843f), HWY_REP4(0.060626f),
+             HWY_REP4(0.00598f)}};
+        ThreadPool* null_pool = nullptr;
+        const double t0 = Now();
+        Separable7(in, Rect(in), weights, null_pool, &out);
+        const double t1 = Now();
+
+        // Prevent optimizing out
+        const float actual = out.ConstRow(ysize / 2)[xsize / 2];
+        const float rel_err = std::abs(actual - expected) / expected;
+        EXPECT_LT(rel_err, 5E-6);
+        return t1 - t0;
+      });
+
+  printf("%zu,%zu,%.1f,%.1f,%.1f\n", xsize, ysize, mps_fir, mps_simd7, mps_rg);
+}
+
+TEST(GaussBlurTest, BenchmarkTest) {
+  Benchmark1D();
+  Benchmark(77, 177, 7);
+}
+
+TEST(GaussBlurTest, DISABLED_SlowBenchmark) {
+  Benchmark1D();
+
+  // Euler's gamma as a nothing-up-my-sleeve number, so sizes are unlikely to
+  // interact with cache properties
+  const float g = 0.57721566;
+  const size_t d0 = 128;
+  const size_t d1 = static_cast<size_t>(d0 / g);
+  const size_t d2 = static_cast<size_t>(d1 / g);
+  const size_t d3 = static_cast<size_t>(d2 / g);
+  Benchmark(d0, d0, 7);
+  Benchmark(d0, d1, 7);
+  Benchmark(d1, d0, 7);
+  Benchmark(d1, d1, 7);
+  Benchmark(d1, d2, 7);
+  Benchmark(d2, d1, 7);
+  Benchmark(d2, d2, 7);
+  Benchmark(d2, d3, 7);
+  Benchmark(d3, d2, 7);
+  Benchmark(d3, d3, 7);
+
+  Benchmark(1920, 1080, 7);
+
+  PROFILER_PRINT_RESULTS();
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gradient_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gradient_test.cc
new file mode 100644
index 0000000000..332684ae4c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/gradient_test.cc
@@ -0,0 +1,205 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+namespace {
+
+// Returns distance of point p to line p0..p1, the result is signed and is not
+// normalized.
+double PointLineDist(double x0, double y0, double x1, double y1, double x,
+                     double y) {
+  return (y1 - y0) * x - (x1 - x0) * y + x1 * y0 - y1 * x0;
+}
+
+// Generates a test image with a gradient from one color to another.
+// Angle in degrees, colors can be given in hex as 0xRRGGBB. The angle is the
+// angle in which the change direction happens.
+Image3F GenerateTestGradient(uint32_t color0, uint32_t color1, double angle,
+                             size_t xsize, size_t ysize) {
+  Image3F image(xsize, ysize);
+
+  double x0 = xsize / 2;
+  double y0 = ysize / 2;
+  double x1 = x0 + std::sin(angle / 360.0 * 2.0 * kPi);
+  double y1 = y0 + std::cos(angle / 360.0 * 2.0 * kPi);
+
+  double maxdist =
+      std::max<double>(fabs(PointLineDist(x0, y0, x1, y1, 0, 0)),
+                       fabs(PointLineDist(x0, y0, x1, y1, xsize, 0)));
+
+  for (size_t c = 0; c < 3; ++c) {
+    float c0 = ((color0 >> (8 * (2 - c))) & 255);
+    float c1 = ((color1 >> (8 * (2 - c))) & 255);
+    for (size_t y = 0; y < ysize; ++y) {
+      float* row = image.PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        double dist = PointLineDist(x0, y0, x1, y1, x, y);
+        double v = ((dist / maxdist) + 1.0) / 2.0;
+        float color = c0 * (1.0 - v) + c1 * v;
+        row[x] = color;
+      }
+    }
+  }
+
+  return image;
+}
+
+// Computes the max of the horizontal and vertical second derivative for each
+// pixel, where second derivative means absolute value of difference of left
+// delta and right delta (top/bottom for vertical direction).
+// The radius over which the derivative is computed is only 1 pixel and it only
+// checks two angles (hor and ver), but this approximation works well enough.
+static ImageF Gradient2(const ImageF& image) {
+  size_t xsize = image.xsize();
+  size_t ysize = image.ysize();
+  ImageF image2(image.xsize(), image.ysize());
+  for (size_t y = 1; y + 1 < ysize; y++) {
+    const auto* JXL_RESTRICT row0 = image.Row(y - 1);
+    const auto* JXL_RESTRICT row1 = image.Row(y);
+    const auto* JXL_RESTRICT row2 = image.Row(y + 1);
+    auto* row_out = image2.Row(y);
+    for (size_t x = 1; x + 1 < xsize; x++) {
+      float ddx = (row1[x] - row1[x - 1]) - (row1[x + 1] - row1[x]);
+      float ddy = (row1[x] - row0[x]) - (row2[x] - row1[x]);
+      row_out[x] = std::max(fabsf(ddx), fabsf(ddy));
+    }
+  }
+  // Copy to the borders
+  if (ysize > 2) {
+    auto* JXL_RESTRICT row0 = image2.Row(0);
+    const auto* JXL_RESTRICT row1 = image2.Row(1);
+    const auto* JXL_RESTRICT row2 = image2.Row(ysize - 2);
+    auto* JXL_RESTRICT row3 = image2.Row(ysize - 1);
+    for (size_t x = 1; x + 1 < xsize; x++) {
+      row0[x] = row1[x];
+      row3[x] = row2[x];
+    }
+  } else {
+    const auto* row0_in = image.Row(0);
+    const auto* row1_in = image.Row(ysize - 1);
+    auto* row0_out = image2.Row(0);
+    auto* row1_out = image2.Row(ysize - 1);
+    for (size_t x = 1; x + 1 < xsize; x++) {
+      // Image too narrow, take first derivative instead
+      row0_out[x] = row1_out[x] = fabsf(row0_in[x] - row1_in[x]);
+    }
+  }
+  if (xsize > 2) {
+    for (size_t y = 0; y < ysize; y++) {
+      auto* row = image2.Row(y);
+      row[0] = row[1];
+      row[xsize - 1] = row[xsize - 2];
+    }
+  } else {
+    for (size_t y = 0; y < ysize; y++) {
+      const auto* JXL_RESTRICT row_in = image.Row(y);
+      auto* row_out = image2.Row(y);
+      // Image too narrow, take first derivative instead
+      row_out[0] = row_out[xsize - 1] = fabsf(row_in[0] - row_in[xsize - 1]);
+    }
+  }
+  return image2;
+}
+
+static Image3F Gradient2(const Image3F& image) {
+  return Image3F(Gradient2(image.Plane(0)), Gradient2(image.Plane(1)),
+                 Gradient2(image.Plane(2)));
+}
+
+/*
+Tests if roundtrip with jxl on a gradient image doesn't cause banding.
+Only tests if use_gradient is true. Set to false for debugging to see the
+distance values.
+Angle in degrees, colors can be given in hex as 0xRRGGBB.
+*/
+void TestGradient(ThreadPool* pool, uint32_t color0, uint32_t color1,
+                  size_t xsize, size_t ysize, float angle, bool fast_mode,
+                  float butteraugli_distance, bool use_gradient = true) {
+  CompressParams cparams;
+  cparams.butteraugli_distance = butteraugli_distance;
+  if (fast_mode) {
+    cparams.speed_tier = SpeedTier::kSquirrel;
+  }
+  DecompressParams dparams;
+
+  Image3F gradient = GenerateTestGradient(color0, color1, angle, xsize, ysize);
+
+  CodecInOut io;
+  io.metadata.m.SetUintSamples(8);
+  io.metadata.m.color_encoding = ColorEncoding::SRGB();
+  io.SetFromImage(std::move(gradient), io.metadata.m.color_encoding);
+
+  CodecInOut io2;
+
+  PaddedBytes compressed;
+  AuxOut* aux_out = nullptr;
+  PassesEncoderState enc_state;
+  EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+  EXPECT_TRUE(io2.Main().TransformTo(io2.metadata.m.color_encoding, pool));
+
+  if (use_gradient) {
+    // Test that the gradient map worked. For that, we take a second derivative
+    // of the image with Gradient2 to measure how linear the change is in x and
+    // y direction. For a well handled gradient, we expect max values around
+    // 0.1, while if there is noticeable banding, which means the gradient map
+    // failed, the values are around 0.5-1.0 (regardless of
+    // butteraugli_distance).
+    Image3F gradient2 = Gradient2(*io2.Main().color());
+
+    std::array<float, 3> image_max;
+    Image3Max(gradient2, &image_max);
+
+    // TODO(jyrki): These values used to work with 0.2, 0.2, 0.2.
+    EXPECT_LE(image_max[0], 3.15);
+    EXPECT_LE(image_max[1], 1.72);
+    EXPECT_LE(image_max[2], 5.05);
+  }
+}
+
+static constexpr bool fast_mode = true;
+
+TEST(GradientTest, SteepGradient) {
+  ThreadPoolInternal pool(8);
+  // Relatively steep gradients, colors from the sky of stp.png
+  TestGradient(&pool, 0xd99d58, 0x889ab1, 512, 512, 90, fast_mode, 3.0);
+}
+
+TEST(GradientTest, SubtleGradient) {
+  ThreadPoolInternal pool(8);
+  // Very subtle gradient
+  TestGradient(&pool, 0xb89b7b, 0xa89b8d, 512, 512, 90, fast_mode, 4.0);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc
new file mode 100644
index 0000000000..41e8595bc4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/headers.cc
@@ -0,0 +1,212 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/headers.h"
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace {
+
+struct Rational {
+  constexpr explicit Rational(uint32_t num, uint32_t den)
+      : num(num), den(den) {}
+
+  // Returns floor(multiplicand * rational).
+  constexpr uint32_t MulTruncate(uint32_t multiplicand) const {
+    return uint64_t(multiplicand) * num / den;
+  }
+
+  uint32_t num;
+  uint32_t den;
+};
+
+Rational FixedAspectRatios(uint32_t ratio) {
+  JXL_ASSERT(0 != ratio && ratio < 8);
+  // Other candidates: 5/4, 7/5, 14/9, 16/10, 5/3, 21/9, 12/5
+  constexpr Rational kRatios[7] = {Rational(1, 1),    // square
+                                   Rational(12, 10),  //
+                                   Rational(4, 3),    // camera
+                                   Rational(3, 2),    // mobile camera
+                                   Rational(16, 9),   // camera/display
+                                   Rational(5, 4),    //
+                                   Rational(2, 1)};   //
+  return kRatios[ratio - 1];
+}
+
+uint32_t FindAspectRatio(uint32_t xsize, uint32_t ysize) {
+  for (uint32_t r = 1; r < 8; ++r) {
+    if (xsize == FixedAspectRatios(r).MulTruncate(ysize)) {
+      return r;
+    }
+  }
+  return 0;  // Must send xsize instead
+}
+
+}  // namespace
+
+size_t SizeHeader::xsize() const {
+  if (ratio_ != 0) {
+    return FixedAspectRatios(ratio_).MulTruncate(
+        static_cast<uint32_t>(ysize()));
+  }
+  return small_ ? ((xsize_div8_minus_1_ + 1) * 8) : xsize_;
+}
+
+Status SizeHeader::Set(size_t xsize64, size_t ysize64) {
+  if (xsize64 > 0xFFFFFFFFull || ysize64 > 0xFFFFFFFFull) {
+    return JXL_FAILURE("Image too large");
+  }
+  const uint32_t xsize32 = static_cast<uint32_t>(xsize64);
+  const uint32_t ysize32 = static_cast<uint32_t>(ysize64);
+  if (xsize64 == 0 || ysize64 == 0) return JXL_FAILURE("Empty image");
+  small_ = xsize64 <= 256 && ysize64 <= 256 && (xsize64 % kBlockDim) == 0 &&
+           (ysize64 % kBlockDim) == 0;
+  if (small_) {
+    ysize_div8_minus_1_ = ysize32 / 8 - 1;
+  } else {
+    ysize_ = ysize32;
+  }
+
+  ratio_ = FindAspectRatio(xsize32, ysize32);
+  if (ratio_ == 0) {
+    if (small_) {
+      xsize_div8_minus_1_ = xsize32 / 8 - 1;
+    } else {
+      xsize_ = xsize32;
+    }
+  }
+  JXL_ASSERT(xsize() == xsize64);
+  JXL_ASSERT(ysize() == ysize64);
+  return true;
+}
+
+Status PreviewHeader::Set(size_t xsize64, size_t ysize64) {
+  const uint32_t xsize32 = static_cast<uint32_t>(xsize64);
+  const uint32_t ysize32 = static_cast<uint32_t>(ysize64);
+  if (xsize64 == 0 || ysize64 == 0) return JXL_FAILURE("Empty preview");
+  div8_ = (xsize64 % kBlockDim) == 0 && (ysize64 % kBlockDim) == 0;
+  if (div8_) {
+    ysize_div8_ = ysize32 / 8;
+  } else {
+    ysize_ = ysize32;
+  }
+
+  ratio_ = FindAspectRatio(xsize32, ysize32);
+  if (ratio_ == 0) {
+    if (div8_) {
+      xsize_div8_ = xsize32 / 8;
+    } else {
+      xsize_ = xsize32;
+    }
+  }
+  JXL_ASSERT(xsize() == xsize64);
+  JXL_ASSERT(ysize() == ysize64);
+  return true;
+}
+
+size_t PreviewHeader::xsize() const {
+  if (ratio_ != 0) {
+    return FixedAspectRatios(ratio_).MulTruncate(
+        static_cast<uint32_t>(ysize()));
+  }
+  return div8_ ? (xsize_div8_ * 8) : xsize_;
+}
+
+SizeHeader::SizeHeader() { Bundle::Init(this); }
+Status SizeHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &small_));
+
+  if (visitor->Conditional(small_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, 0, &ysize_div8_minus_1_));
+  }
+  if (visitor->Conditional(!small_)) {
+    // (Could still be small, but non-multiple of 8.)
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(9, 1), BitsOffset(13, 1),
+                                           BitsOffset(18, 1), BitsOffset(30, 1),
+                                           1, &ysize_));
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &ratio_));
+  if (visitor->Conditional(ratio_ == 0 && small_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, 0, &xsize_div8_minus_1_));
+  }
+  if (visitor->Conditional(ratio_ == 0 && !small_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(9, 1), BitsOffset(13, 1),
+                                           BitsOffset(18, 1), BitsOffset(30, 1),
+                                           1, &xsize_));
+  }
+
+  return true;
+}
+
+PreviewHeader::PreviewHeader() { Bundle::Init(this); }
+Status PreviewHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &div8_));
+
+  if (visitor->Conditional(div8_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), Val(32), BitsOffset(5, 1),
+                                           BitsOffset(9, 33), 1, &ysize_div8_));
+  }
+  if (visitor->Conditional(!div8_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(6, 1), BitsOffset(8, 65),
+                                           BitsOffset(10, 321),
+                                           BitsOffset(12, 1345), 1, &ysize_));
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &ratio_));
+  if (visitor->Conditional(ratio_ == 0 && div8_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), Val(32), BitsOffset(5, 1),
+                                           BitsOffset(9, 33), 1, &xsize_div8_));
+  }
+  if (visitor->Conditional(ratio_ == 0 && !div8_)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(6, 1), BitsOffset(8, 65),
+                                           BitsOffset(10, 321),
+                                           BitsOffset(12, 1345), 1, &xsize_));
+  }
+
+  return true;
+}
+
+AnimationHeader::AnimationHeader() { Bundle::Init(this); }
+Status AnimationHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(100), Val(1000), BitsOffset(10, 1),
+                                         BitsOffset(30, 1), 1, &tps_numerator));
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(1), Val(1001), BitsOffset(8, 1),
+                                         BitsOffset(10, 1), 1,
+                                         &tps_denominator));
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->U32(Val(0), Bits(3), Bits(16), Bits(32), 0, &num_loops));
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_timecodes));
+  return true;
+}
+
+Status ReadSizeHeader(BitReader* JXL_RESTRICT reader,
+                      SizeHeader* JXL_RESTRICT size) {
+  return Bundle::Read(reader, size);
+}
+
+Status WriteSizeHeader(const SizeHeader& size, BitWriter* JXL_RESTRICT writer,
+                       size_t layer, AuxOut* aux_out) {
+  const size_t max_bits = Bundle::MaxBits(size);
+  if (max_bits != SizeHeader::kMaxBits) {
+    JXL_ABORT("Please update SizeHeader::kMaxBits from %zu to %zu\n",
+              SizeHeader::kMaxBits, max_bits);
+  }
+
+  // Only check the number of non-extension bits (extensions are unbounded).
+  // (Bundle::Write will call CanEncode again, but it is fast because SizeHeader
+  // is tiny.)
+  size_t extension_bits, total_bits;
+  JXL_RETURN_IF_ERROR(Bundle::CanEncode(size, &extension_bits, &total_bits));
+  JXL_ASSERT(total_bits - extension_bits < SizeHeader::kMaxBits);
+
+  return Bundle::Write(size, writer, layer, aux_out);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/headers.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/headers.h
new file mode 100644
index 0000000000..d33e2b5498
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/headers.h
@@ -0,0 +1,106 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_HEADERS_H_
+#define LIB_JXL_HEADERS_H_
+
+// Codestream headers, also stored in CodecInOut.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// Reserved by ISO/IEC 10918-1. LF causes files opened in text mode to be
+// rejected because the marker changes to 0x0D instead. The 0xFF prefix also
+// ensures there were no 7-bit transmission limitations.
+static constexpr uint8_t kCodestreamMarker = 0x0A;
+
+// Compact representation of image dimensions (best case: 9 bits) so decoders
+// can preallocate early.
+class SizeHeader : public Fields {
+ public:
+  // All fields are valid after reading at most this many bits. WriteSizeHeader
+  // verifies this matches Bundle::MaxBits(SizeHeader).
+  static constexpr size_t kMaxBits = 78;
+
+  SizeHeader();
+  const char* Name() const override { return "SizeHeader"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  Status Set(size_t xsize, size_t ysize);
+
+  size_t xsize() const;
+  size_t ysize() const {
+    return small_ ? ((ysize_div8_minus_1_ + 1) * 8) : ysize_;
+  }
+
+ private:
+  bool small_;  // xsize and ysize <= 256 and divisible by 8.
+
+  uint32_t ysize_div8_minus_1_;
+  uint32_t ysize_;
+
+  uint32_t ratio_;
+  uint32_t xsize_div8_minus_1_;
+  uint32_t xsize_;
+};
+
+// (Similar to SizeHeader but different encoding because previews are smaller)
+class PreviewHeader : public Fields {
+ public:
+  PreviewHeader();
+  const char* Name() const override { return "PreviewHeader"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  Status Set(size_t xsize, size_t ysize);
+
+  size_t xsize() const;
+  size_t ysize() const { return div8_ ? (ysize_div8_ * 8) : ysize_; }
+
+ private:
+  bool div8_;  // xsize and ysize divisible by 8.
+
+  uint32_t ysize_div8_;
+  uint32_t ysize_;
+
+  uint32_t ratio_;
+  uint32_t xsize_div8_;
+  uint32_t xsize_;
+};
+
+struct AnimationHeader : public Fields {
+  AnimationHeader();
+  const char* Name() const override { return "AnimationHeader"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Ticks per second (expressed as rational number to support NTSC)
+  uint32_t tps_numerator;
+  uint32_t tps_denominator;
+
+  uint32_t num_loops;  // 0 means to repeat infinitely.
+
+  bool have_timecodes;
+};
+
+Status ReadSizeHeader(BitReader* JXL_RESTRICT reader,
+                      SizeHeader* JXL_RESTRICT size);
+
+Status WriteSizeHeader(const SizeHeader& size, BitWriter* JXL_RESTRICT writer,
+                       size_t layer, AuxOut* aux_out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_HEADERS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc
new file mode 100644
index 0000000000..9ae7865af6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.cc
@@ -0,0 +1,161 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/huffman_table.h"
+
+#include <cstring> /* for memcpy */
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/dec_huffman.h"
+
+namespace jxl {
+
+/* Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the
+   bit-wise reversal of the len least significant bits of key. */
+static inline int GetNextKey(int key, int len) {
+  int step = 1u << (len - 1);
+  while (key & step) {
+    step >>= 1;
+  }
+  return (key & (step - 1)) + step;
+}
+
+/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
+/* Assumes that end is an integer multiple of step */
+static inline void ReplicateValue(HuffmanCode* table, int step, int end,
+                                  HuffmanCode code) {
+  do {
+    end -= step;
+    table[end] = code;
+  } while (end > 0);
+}
+
+/* Returns the table width of the next 2nd level table. count is the histogram
+   of bit lengths for the remaining symbols, len is the code length of the next
+   processed symbol */
+static inline size_t NextTableBitSize(const uint16_t* const count, size_t len,
+                                      int root_bits) {
+  size_t left = 1u << (len - root_bits);
+  while (len < PREFIX_MAX_BITS) {
+    if (left <= count[len]) break;
+    left -= count[len];
+    ++len;
+    left <<= 1;
+  }
+  return len - root_bits;
+}
+
+uint32_t BuildHuffmanTable(HuffmanCode* root_table, int root_bits,
+                           const uint8_t* const code_lengths,
+                           size_t code_lengths_size, uint16_t* count) {
+  HuffmanCode code;   /* current table entry */
+  HuffmanCode* table; /* next available space in table */
+  size_t len;         /* current code length */
+  size_t symbol;      /* symbol index in original or sorted table */
+  int key;            /* reversed prefix code */
+  int step;           /* step size to replicate values in current table */
+  int low;            /* low bits for current root entry */
+  int mask;           /* mask for low bits */
+  size_t table_bits;  /* key length of current table */
+  int table_size;     /* size of current table */
+  int total_size;     /* sum of root table size and 2nd level table sizes */
+  /* offsets in sorted table for each length */
+  uint16_t offset[PREFIX_MAX_BITS + 1];
+  size_t max_length = 1;
+
+  if (code_lengths_size > 1u << PREFIX_MAX_BITS) return 0;
+
+  /* symbols sorted by code length */
+  std::vector<uint16_t> sorted_storage(code_lengths_size);
+  uint16_t* sorted = sorted_storage.data();
+
+  /* generate offsets into sorted symbol table by code length */
+  {
+    uint16_t sum = 0;
+    for (len = 1; len <= PREFIX_MAX_BITS; len++) {
+      offset[len] = sum;
+      if (count[len]) {
+        sum = static_cast<uint16_t>(sum + count[len]);
+        max_length = len;
+      }
+    }
+  }
+
+  /* sort symbols by length, by symbol order within each length */
+  for (symbol = 0; symbol < code_lengths_size; symbol++) {
+    if (code_lengths[symbol] != 0) {
+      sorted[offset[code_lengths[symbol]]++] = symbol;
+    }
+  }
+
+  table = root_table;
+  table_bits = root_bits;
+  table_size = 1u << table_bits;
+  total_size = table_size;
+
+  /* special case code with only one value */
+  if (offset[PREFIX_MAX_BITS] == 1) {
+    code.bits = 0;
+    code.value = static_cast<uint16_t>(sorted[0]);
+    for (key = 0; key < total_size; ++key) {
+      table[key] = code;
+    }
+    return total_size;
+  }
+
+  /* fill in root table */
+  /* let's reduce the table size to a smaller size if possible, and */
+  /* create the repetitions by memcpy if possible in the coming loop */
+  if (table_bits > max_length) {
+    table_bits = max_length;
+    table_size = 1u << table_bits;
+  }
+  key = 0;
+  symbol = 0;
+  code.bits = 1;
+  step = 2;
+  do {
+    for (; count[code.bits] != 0; --count[code.bits]) {
+      code.value = static_cast<uint16_t>(sorted[symbol++]);
+      ReplicateValue(&table[key], step, table_size, code);
+      key = GetNextKey(key, code.bits);
+    }
+    step <<= 1;
+  } while (++code.bits <= table_bits);
+
+  /* if root_bits != table_bits we only created one fraction of the */
+  /* table, and we need to replicate it now. */
+  while (total_size != table_size) {
+    memcpy(&table[table_size], &table[0], table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+
+  /* fill in 2nd level tables and add pointers to root table */
+  mask = total_size - 1;
+  low = -1;
+  for (len = root_bits + 1, step = 2; len <= max_length; ++len, step <<= 1) {
+    for (; count[len] != 0; --count[len]) {
+      if ((key & mask) != low) {
+        table += table_size;
+        table_bits = NextTableBitSize(count, len, root_bits);
+        table_size = 1u << table_bits;
+        total_size += table_size;
+        low = key & mask;
+        root_table[low].bits = static_cast<uint8_t>(table_bits + root_bits);
+        root_table[low].value =
+            static_cast<uint16_t>((table - root_table) - low);
+      }
+      code.bits = static_cast<uint8_t>(len - root_bits);
+      code.value = static_cast<uint16_t>(sorted[symbol++]);
+      ReplicateValue(&table[key >> root_bits], step, table_size, code);
+      key = GetNextKey(key, len);
+    }
+  }
+
+  return total_size;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.h
new file mode 100644
index 0000000000..11cdb2fc45
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_table.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_HUFFMAN_TABLE_H_
+#define LIB_JXL_HUFFMAN_TABLE_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace jxl {
+
+struct HuffmanCode {
+  uint8_t bits;   /* number of bits used for this symbol */
+  uint16_t value; /* symbol value or table offset */
+};
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order. */
+/* Returns 0 in case of error (invalid tree or memory error), otherwise
+   populated size of table. */
+uint32_t BuildHuffmanTable(HuffmanCode* root_table, int root_bits,
+                           const uint8_t* code_lengths,
+                           size_t code_lengths_size, uint16_t* count);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_HUFFMAN_TABLE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc
new file mode 100644
index 0000000000..77107b08d2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.cc
@@ -0,0 +1,328 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/huffman_tree.h"
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+              uint8_t level) {
+  if (p.index_left >= 0) {
+    ++level;
+    SetDepth(pool[p.index_left], pool, depth, level);
+    SetDepth(pool[p.index_right_or_value], pool, depth, level);
+  } else {
+    depth[p.index_right_or_value] = level;
+  }
+}
+
+// Sort the root nodes, least popular first.
+static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) {
+  return v0.total_count < v1.total_count;
+}
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, const size_t length,
+                       const int tree_limit, uint8_t* depth) {
+  // For block sizes below 64 kB, we never need to do a second iteration
+  // of this loop. Probably all of our block sizes will be smaller than
+  // that, so this loop is mostly of academic interest. If we actually
+  // would need this, we would be better off with the Katajainen algorithm.
+  for (uint32_t count_limit = 1;; count_limit *= 2) {
+    std::vector<HuffmanTree> tree;
+    tree.reserve(2 * length + 1);
+
+    for (size_t i = length; i != 0;) {
+      --i;
+      if (data[i]) {
+        const uint32_t count = std::max(data[i], count_limit - 1);
+        tree.emplace_back(count, -1, static_cast<int16_t>(i));
+      }
+    }
+
+    const size_t n = tree.size();
+    if (n == 1) {
+      // Fake value; will be fixed on upper level.
+      depth[tree[0].index_right_or_value] = 1;
+      break;
+    }
+
+    std::stable_sort(tree.begin(), tree.end(), Compare);
+
+    // The nodes are:
+    // [0, n): the sorted leaf nodes that we start with.
+    // [n]: we add a sentinel here.
+    // [n + 1, 2n): new parent nodes are added here, starting from
+    //              (n+1). These are naturally in ascending order.
+    // [2n]: we add a sentinel at the end as well.
+    // There will be (2n+1) elements at the end.
+    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
+    tree.push_back(sentinel);
+    tree.push_back(sentinel);
+
+    size_t i = 0;      // Points to the next leaf node.
+    size_t j = n + 1;  // Points to the next non-leaf node.
+    for (size_t k = n - 1; k != 0; --k) {
+      size_t left, right;
+      if (tree[i].total_count <= tree[j].total_count) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count <= tree[j].total_count) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      // The sentinel node becomes the parent node.
+      size_t j_end = tree.size() - 1;
+      tree[j_end].total_count =
+          tree[left].total_count + tree[right].total_count;
+      tree[j_end].index_left = static_cast<int16_t>(left);
+      tree[j_end].index_right_or_value = static_cast<int16_t>(right);
+
+      // Add back the last sentinel node.
+      tree.push_back(sentinel);
+    }
+    JXL_DASSERT(tree.size() == 2 * n + 1);
+    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+    // We need to pack the Huffman tree in tree_limit bits.
+    // If this was not successful, add fake entities to the lowest values
+    // and retry.
+    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+      break;
+    }
+  }
+}
+
+void Reverse(uint8_t* v, size_t start, size_t end) {
+  --end;
+  while (start < end) {
+    uint8_t tmp = v[start];
+    v[start] = v[end];
+    v[end] = tmp;
+    ++start;
+    --end;
+  }
+}
+
+void WriteHuffmanTreeRepetitions(const uint8_t previous_value,
+                                 const uint8_t value, size_t repetitions,
+                                 size_t* tree_size, uint8_t* tree,
+                                 uint8_t* extra_bits_data) {
+  JXL_DASSERT(repetitions > 0);
+  if (previous_value != value) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions == 7) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    for (size_t i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = value;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    size_t start = *tree_size;
+    while (true) {
+      tree[*tree_size] = 16;
+      extra_bits_data[*tree_size] = repetitions & 0x3;
+      ++(*tree_size);
+      repetitions >>= 2;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size,
+                                      uint8_t* tree, uint8_t* extra_bits_data) {
+  if (repetitions == 11) {
+    tree[*tree_size] = 0;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    for (size_t i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = 0;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    size_t start = *tree_size;
+    while (true) {
+      tree[*tree_size] = 17;
+      extra_bits_data[*tree_size] = repetitions & 0x7;
+      ++(*tree_size);
+      repetitions >>= 3;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+static void DecideOverRleUse(const uint8_t* depth, const size_t length,
+                             bool* use_rle_for_non_zero,
+                             bool* use_rle_for_zero) {
+  size_t total_reps_zero = 0;
+  size_t total_reps_non_zero = 0;
+  size_t count_reps_zero = 1;
+  size_t count_reps_non_zero = 1;
+  for (size_t i = 0; i < length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    if (reps >= 3 && value == 0) {
+      total_reps_zero += reps;
+      ++count_reps_zero;
+    }
+    if (reps >= 4 && value != 0) {
+      total_reps_non_zero += reps;
+      ++count_reps_non_zero;
+    }
+    i += reps;
+  }
+  *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
+  *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
+}
+
+void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
+                      uint8_t* tree, uint8_t* extra_bits_data) {
+  uint8_t previous_value = 8;
+
+  // Throw away trailing zeros.
+  size_t new_length = length;
+  for (size_t i = 0; i < length; ++i) {
+    if (depth[length - i - 1] == 0) {
+      --new_length;
+    } else {
+      break;
+    }
+  }
+
+  // First gather statistics on if it is a good idea to do rle.
+  bool use_rle_for_non_zero = false;
+  bool use_rle_for_zero = false;
+  if (length > 50) {
+    // Find rle coding for longer codes.
+    // Shorter codes seem not to benefit from rle.
+    DecideOverRleUse(depth, new_length, &use_rle_for_non_zero,
+                     &use_rle_for_zero);
+  }
+
+  // Actual rle coding.
+  for (size_t i = 0; i < new_length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    if ((value != 0 && use_rle_for_non_zero) ||
+        (value == 0 && use_rle_for_zero)) {
+      for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
+        ++reps;
+      }
+    }
+    if (value == 0) {
+      WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
+    } else {
+      WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree,
+                                  extra_bits_data);
+      previous_value = value;
+    }
+    i += reps;
+  }
+}
+
+namespace {
+
+uint16_t ReverseBits(int num_bits, uint16_t bits) {
+  static const size_t kLut[16] = {// Pre-reversed 4-bit values.
+                                  0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+                                  0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
+  size_t retval = kLut[bits & 0xf];
+  for (int i = 4; i < num_bits; i += 4) {
+    retval <<= 4;
+    bits = static_cast<uint16_t>(bits >> 4);
+    retval |= kLut[bits & 0xf];
+  }
+  retval >>= (-num_bits & 0x3);
+  return static_cast<uint16_t>(retval);
+}
+
+}  // namespace
+
+void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len,
+                               uint16_t* bits) {
+  // In Brotli, all bit depths are [1..15]
+  // 0 bit depth means that the symbol does not exist.
+  const int kMaxBits = 16;  // 0..15 are values for bits
+  uint16_t bl_count[kMaxBits] = {0};
+  {
+    for (size_t i = 0; i < len; ++i) {
+      ++bl_count[depth[i]];
+    }
+    bl_count[0] = 0;
+  }
+  uint16_t next_code[kMaxBits];
+  next_code[0] = 0;
+  {
+    int code = 0;
+    for (size_t i = 1; i < kMaxBits; ++i) {
+      code = (code + bl_count[i - 1]) << 1;
+      next_code[i] = static_cast<uint16_t>(code);
+    }
+  }
+  for (size_t i = 0; i < len; ++i) {
+    if (depth[i]) {
+      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+    }
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.h
new file mode 100644
index 0000000000..e4ccac49bc
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/huffman_tree.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Library for creating Huffman codes from population counts.
+
+#ifndef LIB_JXL_HUFFMAN_TREE_H_
+#define LIB_JXL_HUFFMAN_TREE_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace jxl {
+
+// A node of a Huffman tree.
+struct HuffmanTree {
+  HuffmanTree(uint32_t count, int16_t left, int16_t right)
+      : total_count(count), index_left(left), index_right_or_value(right) {}
+  uint32_t total_count;
+  int16_t index_left;
+  int16_t index_right_or_value;
+};
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+              uint8_t level);
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, const size_t length,
+                       const int tree_limit, uint8_t* depth);
+
+// Write a Huffman tree from bit depths into the bitstream representation
+// of a Huffman tree. The generated Huffman tree is to be compressed once
+// more using a Huffman tree
+void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
+                      uint8_t* tree, uint8_t* extra_bits_data);
+
+// Get the actual bit values for a tree of bit depths.
+void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len,
+                               uint16_t* bits);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_HUFFMAN_TREE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/iaca_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/iaca_test.cc
new file mode 100644
index 0000000000..9b2e8ea25c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/iaca_test.cc
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/iaca.h"
+
+#include "gtest/gtest.h"
+
+namespace jxl {
+namespace {
+
+TEST(IacaTest, MarkersDefaultToDisabledAndDoNotCrash) {
+  BeginIACA();
+  EndIACA();
+}
+
+TEST(IacaTest, ScopeDefaultToDisabledAndDoNotCrash) { ScopeIACA iaca; }
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc
new file mode 100644
index 0000000000..619c81451e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.cc
@@ -0,0 +1,404 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/icc_codec.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/icc_codec_common.h"
+
+namespace jxl {
+namespace {
+
+uint64_t DecodeVarInt(const uint8_t* input, size_t inputSize, size_t* pos) {
+  size_t i;
+  uint64_t ret = 0;
+  for (i = 0; *pos + i < inputSize && i < 10; ++i) {
+    ret |= uint64_t(input[*pos + i] & 127) << uint64_t(7 * i);
+    // If the next-byte flag is not set, stop
+    if ((input[*pos + i] & 128) == 0) break;
+  }
+  // TODO: Return a decoding error if i == 10.
+  *pos += i + 1;
+  return ret;
+}
+
+// Shuffles or interleaves bytes, for example with width 2, turns "ABCDabcd"
+// into "AaBbCcDc". Transposes a matrix of ceil(size / width) columns and
+// width rows. There are size elements, size may be < width * height, if so the
+// last elements of the rightmost column are missing, the missing spots are
+// transposed along with the filled spots, and the result has the missing
+// elements at the end of the bottom row. The input is the input matrix in
+// scanline order but with missing elements skipped (which may occur in multiple
+// locations), the output is the result matrix in scanline order (with
+// no need to skip missing elements as they are past the end of the data).
+void Shuffle(uint8_t* data, size_t size, size_t width) {
+  size_t height = (size + width - 1) / width;  // amount of rows of output
+  PaddedBytes result(size);
+  // i = output index, j input index
+  size_t s = 0, j = 0;
+  for (size_t i = 0; i < size; i++) {
+    result[i] = data[j];
+    j += height;
+    if (j >= size) j = ++s;
+  }
+
+  for (size_t i = 0; i < size; i++) {
+    data[i] = result[i];
+  }
+}
+
+// TODO(eustas): should be 20, or even 18, once DecodeVarInt is improved;
+//               currently DecodeVarInt does not signal the errors, and marks
+//               11 bytes as used even if only 10 are used (and 9 is enough for
+//               63-bit values).
+constexpr const size_t kPreambleSize = 22;  // enough for reading 2 VarInts
+
+}  // namespace
+
+// Mimics the beginning of UnpredictICC for quick validity check.
+// At least kPreambleSize bytes of data should be valid at invocation time.
+Status CheckPreamble(const PaddedBytes& data, size_t enc_size,
+                     size_t output_limit) {
+  const uint8_t* enc = data.data();
+  size_t size = data.size();
+  size_t pos = 0;
+  uint64_t osize = DecodeVarInt(enc, size, &pos);
+  JXL_RETURN_IF_ERROR(CheckIs32Bit(osize));
+  if (pos >= size) return JXL_FAILURE("Out of bounds");
+  uint64_t csize = DecodeVarInt(enc, size, &pos);
+  JXL_RETURN_IF_ERROR(CheckIs32Bit(csize));
+  JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, csize, size));
+  // We expect that UnpredictICC inflates input, not the other way round.
+  if (osize + 65536 < enc_size) return JXL_FAILURE("Malformed ICC");
+  if (output_limit && osize > output_limit) {
+    return JXL_FAILURE("Decoded ICC is too large");
+  }
+  return true;
+}
+
+// Decodes the result of PredictICC back to a valid ICC profile.
+Status UnpredictICC(const uint8_t* enc, size_t size, PaddedBytes* result) {
+  if (!result->empty()) return JXL_FAILURE("result must be empty initially");
+  size_t pos = 0;
+  // TODO(lode): technically speaking we need to check that the entire varint
+  // decoding never goes out of bounds, not just the first byte. This requires
+  // a DecodeVarInt function that returns an error code. It is safe to use
+  // DecodeVarInt with out of bounds values, it silently returns, but the
+  // specification requires an error. Idem for all DecodeVarInt below.
+  if (pos >= size) return JXL_FAILURE("Out of bounds");
+  uint64_t osize = DecodeVarInt(enc, size, &pos);  // Output size
+  JXL_RETURN_IF_ERROR(CheckIs32Bit(osize));
+  if (pos >= size) return JXL_FAILURE("Out of bounds");
+  uint64_t csize = DecodeVarInt(enc, size, &pos);  // Commands size
+  // Every command is translated to at least on byte.
+  JXL_RETURN_IF_ERROR(CheckIs32Bit(csize));
+  size_t cpos = pos;  // pos in commands stream
+  JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, csize, size));
+  size_t commands_end = cpos + csize;
+  pos = commands_end;  // pos in data stream
+
+  // Header
+  PaddedBytes header = ICCInitialHeaderPrediction();
+  EncodeUint32(0, osize, &header);
+  for (size_t i = 0; i <= kICCHeaderSize; i++) {
+    if (result->size() == osize) {
+      if (cpos != commands_end) return JXL_FAILURE("Not all commands used");
+      if (pos != size) return JXL_FAILURE("Not all data used");
+      return true;  // Valid end
+    }
+    if (i == kICCHeaderSize) break;  // Done
+    ICCPredictHeader(result->data(), result->size(), header.data(), i);
+    if (pos >= size) return JXL_FAILURE("Out of bounds");
+    result->push_back(enc[pos++] + header[i]);
+  }
+  if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+
+  // Tag list
+  uint64_t numtags = DecodeVarInt(enc, size, &cpos);
+
+  if (numtags != 0) {
+    numtags--;
+    JXL_RETURN_IF_ERROR(CheckIs32Bit(numtags));
+    AppendUint32(numtags, result);
+    uint64_t prevtagstart = kICCHeaderSize + numtags * 12;
+    uint64_t prevtagsize = 0;
+    for (;;) {
+      if (result->size() > osize) return JXL_FAILURE("Invalid result size");
+      if (cpos > commands_end) return JXL_FAILURE("Out of bounds");
+      if (cpos == commands_end) break;  // Valid end
+      uint8_t command = enc[cpos++];
+      uint8_t tagcode = command & 63;
+      Tag tag;
+      if (tagcode == 0) {
+        break;
+      } else if (tagcode == kCommandTagUnknown) {
+        JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, 4, size));
+        tag = DecodeKeyword(enc, size, pos);
+        pos += 4;
+      } else if (tagcode == kCommandTagTRC) {
+        tag = kRtrcTag;
+      } else if (tagcode == kCommandTagXYZ) {
+        tag = kRxyzTag;
+      } else {
+        if (tagcode - kCommandTagStringFirst >= kNumTagStrings) {
+          return JXL_FAILURE("Unknown tagcode");
+        }
+        tag = *kTagStrings[tagcode - kCommandTagStringFirst];
+      }
+      AppendKeyword(tag, result);
+
+      uint64_t tagstart;
+      uint64_t tagsize = prevtagsize;
+      if (tag == kRxyzTag || tag == kGxyzTag || tag == kBxyzTag ||
+          tag == kKxyzTag || tag == kWtptTag || tag == kBkptTag ||
+          tag == kLumiTag) {
+        tagsize = 20;
+      }
+
+      if (command & kFlagBitOffset) {
+        if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+        tagstart = DecodeVarInt(enc, size, &cpos);
+      } else {
+        JXL_RETURN_IF_ERROR(CheckIs32Bit(prevtagstart));
+        tagstart = prevtagstart + prevtagsize;
+      }
+      JXL_RETURN_IF_ERROR(CheckIs32Bit(tagstart));
+      AppendUint32(tagstart, result);
+      if (command & kFlagBitSize) {
+        if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+        tagsize = DecodeVarInt(enc, size, &cpos);
+      }
+      JXL_RETURN_IF_ERROR(CheckIs32Bit(tagsize));
+      AppendUint32(tagsize, result);
+      prevtagstart = tagstart;
+      prevtagsize = tagsize;
+
+      if (tagcode == kCommandTagTRC) {
+        AppendKeyword(kGtrcTag, result);
+        AppendUint32(tagstart, result);
+        AppendUint32(tagsize, result);
+        AppendKeyword(kBtrcTag, result);
+        AppendUint32(tagstart, result);
+        AppendUint32(tagsize, result);
+      }
+
+      if (tagcode == kCommandTagXYZ) {
+        JXL_RETURN_IF_ERROR(CheckIs32Bit(tagstart + tagsize * 2));
+        AppendKeyword(kGxyzTag, result);
+        AppendUint32(tagstart + tagsize, result);
+        AppendUint32(tagsize, result);
+        AppendKeyword(kBxyzTag, result);
+        AppendUint32(tagstart + tagsize * 2, result);
+        AppendUint32(tagsize, result);
+      }
+    }
+  }
+
+  // Main Content
+  for (;;) {
+    if (result->size() > osize) return JXL_FAILURE("Invalid result size");
+    if (cpos > commands_end) return JXL_FAILURE("Out of bounds");
+    if (cpos == commands_end) break;  // Valid end
+    uint8_t command = enc[cpos++];
+    if (command == kCommandInsert) {
+      if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+      uint64_t num = DecodeVarInt(enc, size, &cpos);
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size));
+      for (size_t i = 0; i < num; i++) {
+        result->push_back(enc[pos++]);
+      }
+    } else if (command == kCommandShuffle2 || command == kCommandShuffle4) {
+      if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+      uint64_t num = DecodeVarInt(enc, size, &cpos);
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size));
+      PaddedBytes shuffled(num);
+      for (size_t i = 0; i < num; i++) {
+        shuffled[i] = enc[pos + i];
+      }
+      if (command == kCommandShuffle2) {
+        Shuffle(shuffled.data(), num, 2);
+      } else if (command == kCommandShuffle4) {
+        Shuffle(shuffled.data(), num, 4);
+      }
+      for (size_t i = 0; i < num; i++) {
+        result->push_back(shuffled[i]);
+        pos++;
+      }
+    } else if (command == kCommandPredict) {
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(cpos, 2, commands_end));
+      uint8_t flags = enc[cpos++];
+
+      size_t width = (flags & 3) + 1;
+      if (width == 3) return JXL_FAILURE("Invalid width");
+
+      int order = (flags & 12) >> 2;
+      if (order == 3) return JXL_FAILURE("Invalid order");
+
+      uint64_t stride = width;
+      if (flags & 16) {
+        if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+        stride = DecodeVarInt(enc, size, &cpos);
+        if (stride < width) {
+          return JXL_FAILURE("Invalid stride");
+        }
+      }
+      // If stride * 4 >= result->size(), return failure. The check
+      // "size == 0 || ((size - 1) >> 2) < stride" corresponds to
+      // "stride * 4 >= size", but does not suffer from integer overflow.
+      // This check is more strict than necessary but follows the specification
+      // and the encoder should ensure this is followed.
+      if (result->empty() || ((result->size() - 1u) >> 2u) < stride) {
+        return JXL_FAILURE("Invalid stride");
+      }
+
+      if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+      uint64_t num = DecodeVarInt(enc, size, &cpos);  // in bytes
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size));
+
+      PaddedBytes shuffled(num);
+      for (size_t i = 0; i < num; i++) {
+        shuffled[i] = enc[pos + i];
+      }
+      if (width > 1) Shuffle(shuffled.data(), num, width);
+
+      size_t start = result->size();
+      for (size_t i = 0; i < num; i++) {
+        uint8_t predicted = LinearPredictICCValue(result->data(), start, i,
+                                                  stride, width, order);
+        result->push_back(predicted + shuffled[i]);
+      }
+      pos += num;
+    } else if (command == kCommandXYZ) {
+      AppendKeyword(kXyz_Tag, result);
+      for (int i = 0; i < 4; i++) result->push_back(0);
+      JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, 12, size));
+      for (size_t i = 0; i < 12; i++) {
+        result->push_back(enc[pos++]);
+      }
+    } else if (command >= kCommandTypeStartFirst &&
+               command < kCommandTypeStartFirst + kNumTypeStrings) {
+      AppendKeyword(*kTypeStrings[command - kCommandTypeStartFirst], result);
+      for (size_t i = 0; i < 4; i++) {
+        result->push_back(0);
+      }
+    } else {
+      return JXL_FAILURE("Unknown command");
+    }
+  }
+
+  if (pos != size) return JXL_FAILURE("Not all data used");
+  if (result->size() != osize) return JXL_FAILURE("Invalid result size");
+
+  return true;
+}
+
+Status ICCReader::Init(BitReader* reader, size_t output_limit) {
+  JXL_RETURN_IF_ERROR(CheckEOI(reader));
+  used_bits_base_ = reader->TotalBitsConsumed();
+  if (bits_to_skip_ == 0) {
+    enc_size_ = U64Coder::Read(reader);
+    if (enc_size_ > 268435456) {
+      // Avoid too large memory allocation for invalid file.
+      return JXL_FAILURE("Too large encoded profile");
+    }
+    JXL_RETURN_IF_ERROR(
+        DecodeHistograms(reader, kNumICCContexts, &code_, &context_map_));
+    ans_reader_ = ANSSymbolReader(&code_, reader);
+    i_ = 0;
+    decompressed_.resize(std::min<size_t>(i_ + 0x400, enc_size_));
+    for (; i_ < std::min<size_t>(2, enc_size_); i_++) {
+      decompressed_[i_] = ans_reader_.ReadHybridUint(
+          ICCANSContext(i_, i_ > 0 ? decompressed_[i_ - 1] : 0,
+                        i_ > 1 ? decompressed_[i_ - 2] : 0),
+          reader, context_map_);
+    }
+    if (enc_size_ > kPreambleSize) {
+      for (; i_ < kPreambleSize; i_++) {
+        decompressed_[i_] = ans_reader_.ReadHybridUint(
+            ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]),
+            reader, context_map_);
+      }
+      JXL_RETURN_IF_ERROR(CheckEOI(reader));
+      JXL_RETURN_IF_ERROR(
+          CheckPreamble(decompressed_, enc_size_, output_limit));
+    }
+    bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+  } else {
+    reader->SkipBits(bits_to_skip_);
+  }
+  return true;
+}
+
+Status ICCReader::Process(BitReader* reader, PaddedBytes* icc) {
+  ANSSymbolReader::Checkpoint checkpoint;
+  size_t saved_i = 0;
+  auto save = [&]() {
+    ans_reader_.Save(&checkpoint);
+    bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+    saved_i = i_;
+  };
+  save();
+  auto check_and_restore = [&]() {
+    Status status = CheckEOI(reader);
+    if (!status) {
+      // not enough bytes.
+      ans_reader_.Restore(checkpoint);
+      i_ = saved_i;
+      return status;
+    }
+    return Status(true);
+  };
+  for (; i_ < enc_size_; i_++) {
+    if (i_ % ANSSymbolReader::kMaxCheckpointInterval == 0 && i_ > 0) {
+      JXL_RETURN_IF_ERROR(check_and_restore());
+      save();
+      if ((i_ > 0) && (((i_ & 0xFFFF) == 0))) {
+        float used_bytes =
+            (reader->TotalBitsConsumed() - used_bits_base_) / 8.0f;
+        if (i_ > used_bytes * 256) return JXL_FAILURE("Corrupted stream");
+      }
+      decompressed_.resize(std::min<size_t>(i_ + 0x400, enc_size_));
+    }
+    JXL_DASSERT(i_ >= 2);
+    decompressed_[i_] = ans_reader_.ReadHybridUint(
+        ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]), reader,
+        context_map_);
+  }
+  JXL_RETURN_IF_ERROR(check_and_restore());
+  bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+  if (!ans_reader_.CheckANSFinalState()) {
+    return JXL_FAILURE("Corrupted ICC profile");
+  }
+
+  icc->clear();
+  return UnpredictICC(decompressed_.data(), decompressed_.size(), icc);
+}
+
+Status ICCReader::CheckEOI(BitReader* reader) {
+  if (reader->AllReadsWithinBounds()) return true;
+  return JXL_STATUS(StatusCode::kNotEnoughBytes,
+                    "Not enough bytes for reading ICC profile");
+}
+
+Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc,
+               size_t output_limit) {
+  ICCReader icc_reader;
+  JXL_RETURN_IF_ERROR(icc_reader.Init(reader, output_limit));
+  JXL_RETURN_IF_ERROR(icc_reader.Process(reader, icc));
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.h
new file mode 100644
index 0000000000..d55b316957
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec.h
@@ -0,0 +1,64 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ICC_CODEC_H_
+#define LIB_JXL_ICC_CODEC_H_
+
+// Compressed representation of ICC profiles.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+// Should still be called if `icc.empty()` - if so, writes only 1 bit.
+Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer,
+                size_t layer, AuxOut* JXL_RESTRICT aux_out);
+
+struct ICCReader {
+  Status Init(BitReader* reader, size_t output_limit);
+  Status Process(BitReader* reader, PaddedBytes* icc);
+  void Reset() {
+    bits_to_skip_ = 0;
+    decompressed_.clear();
+  }
+
+ private:
+  Status CheckEOI(BitReader* reader);
+  size_t i_ = 0;
+  size_t bits_to_skip_ = 0;
+  size_t used_bits_base_ = 0;
+  uint64_t enc_size_ = 0;
+  std::vector<uint8_t> context_map_;
+  ANSCode code_;
+  ANSSymbolReader ans_reader_;
+  PaddedBytes decompressed_;
+};
+
+// `icc` may be empty afterwards - if so, call CreateProfile. Does not append,
+// clears any original data that was in icc.
+// If `output_limit` is not 0, then returns error if resulting profile would be
+// longer than `output_limit`
+Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc,
+               size_t output_limit = 0);
+
+// Exposed only for testing
+Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result);
+
+// Exposed only for testing
+Status UnpredictICC(const uint8_t* enc, size_t size, PaddedBytes* result);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ICC_CODEC_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc
new file mode 100644
index 0000000000..1e118c5d5c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.cc
@@ -0,0 +1,192 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/icc_codec_common.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace {
+static uint8_t ByteKind1(uint8_t b) {
+  if ('a' <= b && b <= 'z') return 0;
+  if ('A' <= b && b <= 'Z') return 0;
+  if ('0' <= b && b <= '9') return 1;
+  if (b == '.' || b == ',') return 1;
+  if (b == 0) return 2;
+  if (b == 1) return 3;
+  if (b < 16) return 4;
+  if (b == 255) return 6;
+  if (b > 240) return 5;
+  return 7;
+}
+
+static uint8_t ByteKind2(uint8_t b) {
+  if ('a' <= b && b <= 'z') return 0;
+  if ('A' <= b && b <= 'Z') return 0;
+  if ('0' <= b && b <= '9') return 1;
+  if (b == '.' || b == ',') return 1;
+  if (b < 16) return 2;
+  if (b > 240) return 3;
+  return 4;
+}
+
+template <typename T>
+T PredictValue(T p1, T p2, T p3, int order) {
+  if (order == 0) return p1;
+  if (order == 1) return 2 * p1 - p2;
+  if (order == 2) return 3 * p1 - 3 * p2 + p3;
+  return 0;
+}
+}  // namespace
+
+uint32_t DecodeUint32(const uint8_t* data, size_t size, size_t pos) {
+  return pos + 4 > size ? 0 : LoadBE32(data + pos);
+}
+
+void EncodeUint32(size_t pos, uint32_t value, PaddedBytes* data) {
+  if (pos + 4 > data->size()) return;
+  StoreBE32(value, data->data() + pos);
+}
+
+void AppendUint32(uint32_t value, PaddedBytes* data) {
+  data->resize(data->size() + 4);
+  EncodeUint32(data->size() - 4, value, data);
+}
+
+typedef std::array<uint8_t, 4> Tag;
+
+Tag DecodeKeyword(const uint8_t* data, size_t size, size_t pos) {
+  if (pos + 4 > size) return {' ', ' ', ' ', ' '};
+  return {data[pos], data[pos + 1], data[pos + 2], data[pos + 3]};
+}
+
+void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos) {
+  if (keyword.size() != 4 || pos + 3 >= size) return;
+  for (size_t i = 0; i < 4; ++i) data[pos + i] = keyword[i];
+}
+
+void AppendKeyword(const Tag& keyword, PaddedBytes* data) {
+  JXL_ASSERT(keyword.size() == 4);
+  data->append(keyword);
+}
+
+// Checks if a + b > size, taking possible integer overflow into account.
+Status CheckOutOfBounds(size_t a, size_t b, size_t size) {
+  size_t pos = a + b;
+  if (pos > size) return JXL_FAILURE("Out of bounds");
+  if (pos < a) return JXL_FAILURE("Out of bounds");  // overflow happened
+  return true;
+}
+
+Status CheckIs32Bit(uint64_t v) {
+  static constexpr const uint64_t kUpper32 = ~static_cast<uint64_t>(0xFFFFFFFF);
+  if ((v & kUpper32) != 0) return JXL_FAILURE("32-bit value expected");
+  return true;
+}
+
+PaddedBytes ICCInitialHeaderPrediction() {
+  PaddedBytes result(kICCHeaderSize);
+  for (size_t i = 0; i < kICCHeaderSize; i++) {
+    result[i] = 0;
+  }
+  result[8] = 4;
+  EncodeKeyword(kMntrTag, result.data(), result.size(), 12);
+  EncodeKeyword(kRgb_Tag, result.data(), result.size(), 16);
+  EncodeKeyword(kXyz_Tag, result.data(), result.size(), 20);
+  EncodeKeyword(kAcspTag, result.data(), result.size(), 36);
+  result[68] = 0;
+  result[69] = 0;
+  result[70] = 246;
+  result[71] = 214;
+  result[72] = 0;
+  result[73] = 1;
+  result[74] = 0;
+  result[75] = 0;
+  result[76] = 0;
+  result[77] = 0;
+  result[78] = 211;
+  result[79] = 45;
+  return result;
+}
+
+void ICCPredictHeader(const uint8_t* icc, size_t size, uint8_t* header,
+                      size_t pos) {
+  if (pos == 8 && size >= 8) {
+    header[80] = icc[4];
+    header[81] = icc[5];
+    header[82] = icc[6];
+    header[83] = icc[7];
+  }
+  if (pos == 41 && size >= 41) {
+    if (icc[40] == 'A') {
+      header[41] = 'P';
+      header[42] = 'P';
+      header[43] = 'L';
+    }
+    if (icc[40] == 'M') {
+      header[41] = 'S';
+      header[42] = 'F';
+      header[43] = 'T';
+    }
+  }
+  if (pos == 42 && size >= 42) {
+    if (icc[40] == 'S' && icc[41] == 'G') {
+      header[42] = 'I';
+      header[43] = ' ';
+    }
+    if (icc[40] == 'S' && icc[41] == 'U') {
+      header[42] = 'N';
+      header[43] = 'W';
+    }
+  }
+}
+
+// Predicts a value with linear prediction of given order (0-2), for integers
+// with width bytes and given stride in bytes between values.
+// The start position is at start + i, and the relevant modulus of i describes
+// which byte of the multi-byte integer is being handled.
+// The value start + i must be at least stride * 4.
+uint8_t LinearPredictICCValue(const uint8_t* data, size_t start, size_t i,
+                              size_t stride, size_t width, int order) {
+  size_t pos = start + i;
+  if (width == 1) {
+    uint8_t p1 = data[pos - stride];
+    uint8_t p2 = data[pos - stride * 2];
+    uint8_t p3 = data[pos - stride * 3];
+    return PredictValue(p1, p2, p3, order);
+  } else if (width == 2) {
+    size_t p = start + (i & ~1);
+    uint16_t p1 = (data[p - stride * 1] << 8) + data[p - stride * 1 + 1];
+    uint16_t p2 = (data[p - stride * 2] << 8) + data[p - stride * 2 + 1];
+    uint16_t p3 = (data[p - stride * 3] << 8) + data[p - stride * 3 + 1];
+    uint16_t pred = PredictValue(p1, p2, p3, order);
+    return (i & 1) ? (pred & 255) : ((pred >> 8) & 255);
+  } else {
+    size_t p = start + (i & ~3);
+    uint32_t p1 = DecodeUint32(data, pos, p - stride);
+    uint32_t p2 = DecodeUint32(data, pos, p - stride * 2);
+    uint32_t p3 = DecodeUint32(data, pos, p - stride * 3);
+    uint32_t pred = PredictValue(p1, p2, p3, order);
+    unsigned shiftbytes = 3 - (i & 3);
+    return (pred >> (shiftbytes * 8)) & 255;
+  }
+}
+
+size_t ICCANSContext(size_t i, size_t b1, size_t b2) {
+  if (i <= 128) return 0;
+  return 1 + ByteKind1(b1) + ByteKind2(b2) * 8;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.h
new file mode 100644
index 0000000000..8ccc7e9091
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_common.h
@@ -0,0 +1,106 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ICC_CODEC_COMMON_H_
+#define LIB_JXL_ICC_CODEC_COMMON_H_
+
+// Compressed representation of ICC profiles.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+static constexpr size_t kICCHeaderSize = 128;
+
+typedef std::array<uint8_t, 4> Tag;
+
+static const Tag kAcspTag = {'a', 'c', 's', 'p'};
+static const Tag kBkptTag = {'b', 'k', 'p', 't'};
+static const Tag kBtrcTag = {'b', 'T', 'R', 'C'};
+static const Tag kBxyzTag = {'b', 'X', 'Y', 'Z'};
+static const Tag kChadTag = {'c', 'h', 'a', 'd'};
+static const Tag kChrmTag = {'c', 'h', 'r', 'm'};
+static const Tag kCprtTag = {'c', 'p', 'r', 't'};
+static const Tag kCurvTag = {'c', 'u', 'r', 'v'};
+static const Tag kDescTag = {'d', 'e', 's', 'c'};
+static const Tag kDmddTag = {'d', 'm', 'd', 'd'};
+static const Tag kDmndTag = {'d', 'm', 'n', 'd'};
+static const Tag kGbd_Tag = {'g', 'b', 'd', ' '};
+static const Tag kGtrcTag = {'g', 'T', 'R', 'C'};
+static const Tag kGxyzTag = {'g', 'X', 'Y', 'Z'};
+static const Tag kKtrcTag = {'k', 'T', 'R', 'C'};
+static const Tag kKxyzTag = {'k', 'X', 'Y', 'Z'};
+static const Tag kLumiTag = {'l', 'u', 'm', 'i'};
+static const Tag kMab_Tag = {'m', 'A', 'B', ' '};
+static const Tag kMba_Tag = {'m', 'B', 'A', ' '};
+static const Tag kMlucTag = {'m', 'l', 'u', 'c'};
+static const Tag kMntrTag = {'m', 'n', 't', 'r'};
+static const Tag kParaTag = {'p', 'a', 'r', 'a'};
+static const Tag kRgb_Tag = {'R', 'G', 'B', ' '};
+static const Tag kRtrcTag = {'r', 'T', 'R', 'C'};
+static const Tag kRxyzTag = {'r', 'X', 'Y', 'Z'};
+static const Tag kSf32Tag = {'s', 'f', '3', '2'};
+static const Tag kTextTag = {'t', 'e', 'x', 't'};
+static const Tag kVcgtTag = {'v', 'c', 'g', 't'};
+static const Tag kWtptTag = {'w', 't', 'p', 't'};
+static const Tag kXyz_Tag = {'X', 'Y', 'Z', ' '};
+
+// Tag names focused on RGB and GRAY monitor profiles
+static constexpr size_t kNumTagStrings = 17;
+static constexpr const Tag* kTagStrings[kNumTagStrings] = {
+    &kCprtTag, &kWtptTag, &kBkptTag, &kRxyzTag, &kGxyzTag, &kBxyzTag,
+    &kKxyzTag, &kRtrcTag, &kGtrcTag, &kBtrcTag, &kKtrcTag, &kChadTag,
+    &kDescTag, &kChrmTag, &kDmndTag, &kDmddTag, &kLumiTag};
+
+static constexpr size_t kCommandTagUnknown = 1;
+static constexpr size_t kCommandTagTRC = 2;
+static constexpr size_t kCommandTagXYZ = 3;
+static constexpr size_t kCommandTagStringFirst = 4;
+
+// Tag types focused on RGB and GRAY monitor profiles
+static constexpr size_t kNumTypeStrings = 8;
+static constexpr const Tag* kTypeStrings[kNumTypeStrings] = {
+    &kXyz_Tag, &kDescTag, &kTextTag, &kMlucTag,
+    &kParaTag, &kCurvTag, &kSf32Tag, &kGbd_Tag};
+
+static constexpr size_t kCommandInsert = 1;
+static constexpr size_t kCommandShuffle2 = 2;
+static constexpr size_t kCommandShuffle4 = 3;
+static constexpr size_t kCommandPredict = 4;
+static constexpr size_t kCommandXYZ = 10;
+static constexpr size_t kCommandTypeStartFirst = 16;
+
+static constexpr size_t kFlagBitOffset = 64;
+static constexpr size_t kFlagBitSize = 128;
+
+static constexpr size_t kNumICCContexts = 41;
+
+uint32_t DecodeUint32(const uint8_t* data, size_t size, size_t pos);
+void EncodeUint32(size_t pos, uint32_t value, PaddedBytes* data);
+void AppendUint32(uint32_t value, PaddedBytes* data);
+Tag DecodeKeyword(const uint8_t* data, size_t size, size_t pos);
+void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos);
+void AppendKeyword(const Tag& keyword, PaddedBytes* data);
+
+// Checks if a + b > size, taking possible integer overflow into account.
+Status CheckOutOfBounds(size_t a, size_t b, size_t size);
+Status CheckIs32Bit(uint64_t v);
+
+PaddedBytes ICCInitialHeaderPrediction();
+void ICCPredictHeader(const uint8_t* icc, size_t size, uint8_t* header,
+                      size_t pos);
+uint8_t LinearPredictICCValue(const uint8_t* data, size_t start, size_t i,
+                              size_t stride, size_t width, int order);
+size_t ICCANSContext(size_t i, size_t b1, size_t b2);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_ICC_CODEC_COMMON_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_test.cc
new file mode 100644
index 0000000000..d365471afa
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/icc_codec_test.cc
@@ -0,0 +1,207 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/icc_codec.h"
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/enc_icc_codec.h"
+
+namespace jxl {
+namespace {
+
+void TestProfile(const PaddedBytes& icc) {
+  BitWriter writer;
+  ASSERT_TRUE(WriteICC(icc, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  PaddedBytes dec;
+  BitReader reader(writer.GetSpan());
+  ASSERT_TRUE(ReadICC(&reader, &dec));
+  ASSERT_TRUE(reader.Close());
+  EXPECT_EQ(icc.size(), dec.size());
+  if (icc.size() == dec.size()) {
+    for (size_t i = 0; i < icc.size(); i++) {
+      EXPECT_EQ(icc[i], dec[i]);
+      if (icc[i] != dec[i]) break;  // One output is enough
+    }
+  }
+}
+
+void TestProfile(const std::string& icc) {
+  PaddedBytes bytes(icc.size());
+  for (size_t i = 0; i < icc.size(); i++) {
+    bytes[i] = icc[i];
+  }
+  TestProfile(bytes);
+}
+
+// Valid profile from one of the images output by the decoder.
+static const unsigned char kTestProfile[] = {
+    0x00, 0x00, 0x03, 0x80, 0x6c, 0x63, 0x6d, 0x73, 0x04, 0x30, 0x00, 0x00,
+    0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20,
+    0x07, 0xe3, 0x00, 0x04, 0x00, 0x1d, 0x00, 0x0f, 0x00, 0x32, 0x00, 0x2e,
+    0x61, 0x63, 0x73, 0x70, 0x41, 0x50, 0x50, 0x4c, 0x00, 0x00, 0x00, 0x01,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0xf6, 0xd6,
+    0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x6c, 0x63, 0x6d, 0x73,
+    0x5f, 0x07, 0x0d, 0x3e, 0x4d, 0x32, 0xf2, 0x6e, 0x5d, 0x77, 0x26, 0xcc,
+    0x23, 0xb0, 0x6a, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d,
+    0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x42,
+    0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x64, 0x00, 0x00, 0x01, 0x00,
+    0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x02, 0x64, 0x00, 0x00, 0x00, 0x14,
+    0x63, 0x68, 0x61, 0x64, 0x00, 0x00, 0x02, 0x78, 0x00, 0x00, 0x00, 0x2c,
+    0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xa4, 0x00, 0x00, 0x00, 0x14,
+    0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xb8, 0x00, 0x00, 0x00, 0x14,
+    0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xcc, 0x00, 0x00, 0x00, 0x14,
+    0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20,
+    0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20,
+    0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20,
+    0x63, 0x68, 0x72, 0x6d, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x24,
+    0x64, 0x6d, 0x6e, 0x64, 0x00, 0x00, 0x03, 0x24, 0x00, 0x00, 0x00, 0x28,
+    0x64, 0x6d, 0x64, 0x64, 0x00, 0x00, 0x03, 0x4c, 0x00, 0x00, 0x00, 0x32,
+    0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0x26,
+    0x00, 0x00, 0x00, 0x1c, 0x00, 0x52, 0x00, 0x47, 0x00, 0x42, 0x00, 0x5f,
+    0x00, 0x44, 0x00, 0x36, 0x00, 0x35, 0x00, 0x5f, 0x00, 0x53, 0x00, 0x52,
+    0x00, 0x47, 0x00, 0x5f, 0x00, 0x52, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x5f,
+    0x00, 0x37, 0x00, 0x30, 0x00, 0x39, 0x00, 0x00, 0x6d, 0x6c, 0x75, 0x63,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c,
+    0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x1c,
+    0x00, 0x43, 0x00, 0x6f, 0x00, 0x70, 0x00, 0x79, 0x00, 0x72, 0x00, 0x69,
+    0x00, 0x67, 0x00, 0x68, 0x00, 0x74, 0x00, 0x20, 0x00, 0x32, 0x00, 0x30,
+    0x00, 0x31, 0x00, 0x38, 0x00, 0x20, 0x00, 0x47, 0x00, 0x6f, 0x00, 0x6f,
+    0x00, 0x67, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x20, 0x00, 0x4c, 0x00, 0x4c,
+    0x00, 0x43, 0x00, 0x2c, 0x00, 0x20, 0x00, 0x43, 0x00, 0x43, 0x00, 0x2d,
+    0x00, 0x42, 0x00, 0x59, 0x00, 0x2d, 0x00, 0x53, 0x00, 0x41, 0x00, 0x20,
+    0x00, 0x33, 0x00, 0x2e, 0x00, 0x30, 0x00, 0x20, 0x00, 0x55, 0x00, 0x6e,
+    0x00, 0x70, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x74, 0x00, 0x65, 0x00, 0x64,
+    0x00, 0x20, 0x00, 0x6c, 0x00, 0x69, 0x00, 0x63, 0x00, 0x65, 0x00, 0x6e,
+    0x00, 0x73, 0x00, 0x65, 0x00, 0x28, 0x00, 0x68, 0x00, 0x74, 0x00, 0x74,
+    0x00, 0x70, 0x00, 0x73, 0x00, 0x3a, 0x00, 0x2f, 0x00, 0x2f, 0x00, 0x63,
+    0x00, 0x72, 0x00, 0x65, 0x00, 0x61, 0x00, 0x74, 0x00, 0x69, 0x00, 0x76,
+    0x00, 0x65, 0x00, 0x63, 0x00, 0x6f, 0x00, 0x6d, 0x00, 0x6d, 0x00, 0x6f,
+    0x00, 0x6e, 0x00, 0x73, 0x00, 0x2e, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x67,
+    0x00, 0x2f, 0x00, 0x6c, 0x00, 0x69, 0x00, 0x63, 0x00, 0x65, 0x00, 0x6e,
+    0x00, 0x73, 0x00, 0x65, 0x00, 0x73, 0x00, 0x2f, 0x00, 0x62, 0x00, 0x79,
+    0x00, 0x2d, 0x00, 0x73, 0x00, 0x61, 0x00, 0x2f, 0x00, 0x33, 0x00, 0x2e,
+    0x00, 0x30, 0x00, 0x2f, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x67, 0x00, 0x61,
+    0x00, 0x6c, 0x00, 0x63, 0x00, 0x6f, 0x00, 0x64, 0x00, 0x65, 0x00, 0x29,
+    0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+    0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x73, 0x66, 0x33, 0x32,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0c, 0x42, 0x00, 0x00, 0x05, 0xde,
+    0xff, 0xff, 0xf3, 0x25, 0x00, 0x00, 0x07, 0x93, 0x00, 0x00, 0xfd, 0x90,
+    0xff, 0xff, 0xfb, 0xa1, 0xff, 0xff, 0xfd, 0xa2, 0x00, 0x00, 0x03, 0xdc,
+    0x00, 0x00, 0xc0, 0x6e, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x6f, 0xa0, 0x00, 0x00, 0x38, 0xf5, 0x00, 0x00, 0x03, 0x90,
+    0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x9f,
+    0x00, 0x00, 0x0f, 0x84, 0x00, 0x00, 0xb6, 0xc4, 0x58, 0x59, 0x5a, 0x20,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x97, 0x00, 0x00, 0xb7, 0x87,
+    0x00, 0x00, 0x18, 0xd9, 0x70, 0x61, 0x72, 0x61, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x38, 0xe4, 0x00, 0x00, 0xe8, 0xf0,
+    0x00, 0x00, 0x17, 0x10, 0x00, 0x00, 0x38, 0xe4, 0x00, 0x00, 0x14, 0xbc,
+    0x63, 0x68, 0x72, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+    0x00, 0x00, 0xa3, 0xd7, 0x00, 0x00, 0x54, 0x7c, 0x00, 0x00, 0x4c, 0xcd,
+    0x00, 0x00, 0x99, 0x9a, 0x00, 0x00, 0x26, 0x67, 0x00, 0x00, 0x0f, 0x5c,
+    0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0x0c,
+    0x00, 0x00, 0x00, 0x1c, 0x00, 0x47, 0x00, 0x6f, 0x00, 0x6f, 0x00, 0x67,
+    0x00, 0x6c, 0x00, 0x65, 0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53,
+    0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x49, 0x00, 0x6d,
+    0x00, 0x61, 0x00, 0x67, 0x00, 0x65, 0x00, 0x20, 0x00, 0x63, 0x00, 0x6f,
+    0x00, 0x64, 0x00, 0x65, 0x00, 0x63, 0x00, 0x00,
+};
+
+}  // namespace
+
+TEST(IccCodecTest, Icc) {
+  // Empty string cannot be tested, encoder checks against writing it.
+  TestProfile("a");
+  TestProfile("ab");
+  TestProfile("aaaa");
+
+  {
+    // Exactly the ICC header size
+    PaddedBytes profile(128);
+    for (size_t i = 0; i < 128; i++) {
+      profile[i] = 0;
+    }
+    TestProfile(profile);
+  }
+
+  {
+    PaddedBytes profile;
+    profile.append(kTestProfile, kTestProfile + sizeof(kTestProfile));
+    TestProfile(profile);
+  }
+
+  // Test substrings of full profile
+  {
+    PaddedBytes profile;
+    for (size_t i = 0; i <= 256; i++) {
+      profile.push_back(kTestProfile[i]);
+      TestProfile(profile);
+    }
+  }
+}
+
+// kTestProfile after encoding with the ICC codec
+static const unsigned char kEncodedTestProfile[] = {
+    0x1f, 0x8b, 0x1,  0x13, 0x10, 0x0,  0x0,  0x0,  0x20, 0x4c, 0xcc, 0x3,
+    0xe7, 0xa0, 0xa5, 0xa2, 0x90, 0xa4, 0x27, 0xe8, 0x79, 0x1d, 0xe3, 0x26,
+    0x57, 0x54, 0xef, 0x0,  0xe8, 0x97, 0x2,  0xce, 0xa1, 0xd7, 0x85, 0x16,
+    0xb4, 0x29, 0x94, 0x58, 0xf2, 0x56, 0xc0, 0x76, 0xea, 0x23, 0xec, 0x7c,
+    0x73, 0x51, 0x41, 0x40, 0x23, 0x21, 0x95, 0x4,  0x75, 0x12, 0xc9, 0xcc,
+    0x16, 0xbd, 0xb6, 0x99, 0xad, 0xf8, 0x75, 0x35, 0xb6, 0x42, 0xae, 0xae,
+    0xae, 0x86, 0x56, 0xf8, 0xcc, 0x16, 0x30, 0xb3, 0x45, 0xad, 0xd,  0x40,
+    0xd6, 0xd1, 0xd6, 0x99, 0x40, 0xbe, 0xe2, 0xdc, 0x31, 0x7,  0xa6, 0xb9,
+    0x27, 0x92, 0x38, 0x0,  0x3,  0x5e, 0x2c, 0xbe, 0xe6, 0xfb, 0x19, 0xbf,
+    0xf3, 0x6d, 0xbc, 0x4d, 0x64, 0xe5, 0xba, 0x76, 0xde, 0x31, 0x65, 0x66,
+    0x14, 0xa6, 0x3a, 0xc5, 0x8f, 0xb1, 0xb4, 0xba, 0x1f, 0xb1, 0xb8, 0xd4,
+    0x75, 0xba, 0x18, 0x86, 0x95, 0x3c, 0x26, 0xf6, 0x25, 0x62, 0x53, 0xfd,
+    0x9c, 0x94, 0x76, 0xf6, 0x95, 0x2c, 0xb1, 0xfd, 0xdc, 0xc0, 0xe4, 0x3f,
+    0xb3, 0xff, 0x67, 0xde, 0xd5, 0x94, 0xcc, 0xb0, 0x83, 0x2f, 0x28, 0x93,
+    0x92, 0x3,  0xa1, 0x41, 0x64, 0x60, 0x62, 0x70, 0x80, 0x87, 0xaf, 0xe7,
+    0x60, 0x4a, 0x20, 0x23, 0xb3, 0x11, 0x7,  0x38, 0x38, 0xd4, 0xa,  0x66,
+    0xb5, 0x93, 0x41, 0x90, 0x19, 0x17, 0x18, 0x60, 0xa5, 0xb,  0x7a, 0x24,
+    0xaa, 0x20, 0x81, 0xac, 0xa9, 0xa1, 0x70, 0xa6, 0x12, 0x8a, 0x4a, 0xa3,
+    0xa0, 0xf9, 0x9a, 0x97, 0xe7, 0xa8, 0xac, 0x8,  0xa8, 0xc4, 0x2a, 0x86,
+    0xa7, 0x69, 0x1e, 0x67, 0xe6, 0xbe, 0xa4, 0xd3, 0xff, 0x91, 0x61, 0xf6,
+    0x8a, 0xe6, 0xb5, 0xb3, 0x61, 0x9f, 0x19, 0x17, 0x98, 0x27, 0x6b, 0xe9,
+    0x8,  0x98, 0xe1, 0x21, 0x4a, 0x9,  0xb5, 0xd7, 0xca, 0xfa, 0x94, 0xd0,
+    0x69, 0x1a, 0xeb, 0x52, 0x1,  0x4e, 0xf5, 0xf6, 0xdf, 0x7f, 0xe7, 0x29,
+    0x70, 0xee, 0x4,  0xda, 0x2f, 0xa4, 0xff, 0xfe, 0xbb, 0x6f, 0xa8, 0xff,
+    0xfe, 0xdb, 0xaf, 0x8,  0xf6, 0x72, 0xa1, 0x40, 0x5d, 0xf0, 0x2d, 0x8,
+    0x82, 0x5b, 0x87, 0xbd, 0x10, 0x8,  0xe9, 0x7,  0xee, 0x4b, 0x80, 0xda,
+    0x4a, 0x4,  0xc5, 0x5e, 0xa0, 0xb7, 0x1e, 0x60, 0xb0, 0x59, 0x76, 0x60,
+    0xb,  0x2e, 0x19, 0x8a, 0x2e, 0x1c, 0xe6, 0x6,  0x20, 0xb8, 0x64, 0x18,
+    0x2a, 0xcf, 0x51, 0x94, 0xd4, 0xee, 0xc3, 0xfe, 0x39, 0x74, 0xd4, 0x2b,
+    0x48, 0xc9, 0x83, 0x4c, 0x9b, 0xd0, 0x4c, 0x35, 0x10, 0xe3, 0x9,  0xf7,
+    0x72, 0xf0, 0x7a, 0xe,  0xbf, 0x7d, 0x36, 0x2e, 0x19, 0x7e, 0x3f, 0xc,
+    0xf7, 0x93, 0xe7, 0xf4, 0x1d, 0x32, 0xc6, 0xb0, 0x89, 0xad, 0xe0, 0x28,
+    0xc1, 0xa7, 0x59, 0xe3, 0x0,
+};
+
+// Tests that the decoded kEncodedTestProfile matches kTestProfile.
+TEST(IccCodecTest, EncodedIccProfile) {
+  jxl::BitReader reader(jxl::Span<const uint8_t>(kEncodedTestProfile,
+                                                 sizeof(kEncodedTestProfile)));
+  jxl::PaddedBytes dec;
+  ASSERT_TRUE(ReadICC(&reader, &dec));
+  ASSERT_TRUE(reader.Close());
+  EXPECT_EQ(sizeof(kTestProfile), dec.size());
+  if (sizeof(kTestProfile) == dec.size()) {
+    for (size_t i = 0; i < dec.size(); i++) {
+      EXPECT_EQ(kTestProfile[i], dec[i]);
+      if (kTestProfile[i] != dec[i]) break;  // One output is enough
+    }
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc
new file mode 100644
index 0000000000..0d63d797e1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image.cc
@@ -0,0 +1,313 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image.h"
+
+#include <algorithm>  // swap
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/image.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/sanitizers.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+
+namespace HWY_NAMESPACE {
+size_t GetVectorSize() { return HWY_LANES(uint8_t); }
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+
+HWY_EXPORT(GetVectorSize);  // Local function.
+
+size_t VectorSize() {
+  static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)();
+  return bytes;
+}
+
+// Returns distance [bytes] between the start of two consecutive rows, a
+// multiple of vector/cache line size but NOT CacheAligned::kAlias - see below.
+size_t BytesPerRow(const size_t xsize, const size_t sizeof_t) {
+  const size_t vec_size = VectorSize();
+  size_t valid_bytes = xsize * sizeof_t;
+
+  // Allow unaligned accesses starting at the last valid value - this may raise
+  // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
+  // Skip for the scalar case because no extra lanes will be loaded.
+  if (vec_size != 0) {
+    valid_bytes += vec_size - sizeof_t;
+  }
+
+  // Round up to vector and cache line size.
+  const size_t align = std::max(vec_size, CacheAligned::kAlignment);
+  size_t bytes_per_row = RoundUpTo(valid_bytes, align);
+
+  // During the lengthy window before writes are committed to memory, CPUs
+  // guard against read after write hazards by checking the address, but
+  // only the lower 11 bits. We avoid a false dependency between writes to
+  // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
+  // Avoid2K prevents the same problem for the planes of an Image3.
+  if (bytes_per_row % CacheAligned::kAlias == 0) {
+    bytes_per_row += align;
+  }
+
+  JXL_ASSERT(bytes_per_row % align == 0);
+  return bytes_per_row;
+}
+
+}  // namespace
+
+PlaneBase::PlaneBase(const size_t xsize, const size_t ysize,
+                     const size_t sizeof_t)
+    : xsize_(static_cast<uint32_t>(xsize)),
+      ysize_(static_cast<uint32_t>(ysize)),
+      orig_xsize_(static_cast<uint32_t>(xsize)),
+      orig_ysize_(static_cast<uint32_t>(ysize)) {
+  // (Can't profile CacheAligned itself because it is used by profiler.h)
+  PROFILER_FUNC;
+
+  JXL_CHECK(xsize == xsize_);
+  JXL_CHECK(ysize == ysize_);
+
+  JXL_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
+
+  bytes_per_row_ = 0;
+  // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
+  // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
+  if (xsize != 0 && ysize != 0) {
+    bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
+    bytes_ = AllocateArray(bytes_per_row_ * ysize);
+    JXL_CHECK(bytes_.get());
+    InitializePadding(sizeof_t, Padding::kRoundUp);
+  }
+}
+
+void PlaneBase::InitializePadding(const size_t sizeof_t, Padding padding) {
+#if defined(MEMORY_SANITIZER) || HWY_IDE
+  if (xsize_ == 0 || ysize_ == 0) return;
+
+  const size_t vec_size = VectorSize();
+  if (vec_size == 0) return;  // Scalar mode: no padding needed
+
+  const size_t valid_size = xsize_ * sizeof_t;
+  const size_t initialize_size = padding == Padding::kRoundUp
+                                     ? RoundUpTo(valid_size, vec_size)
+                                     : valid_size + vec_size - sizeof_t;
+  if (valid_size == initialize_size) return;
+
+  for (size_t y = 0; y < ysize_; ++y) {
+    uint8_t* JXL_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
+#if defined(__clang__) && (__clang_major__ <= 6)
+    // There's a bug in msan in clang-6 when handling AVX2 operations. This
+    // workaround allows tests to pass on msan, although it is slower and
+    // prevents msan warnings from uninitialized images.
+    std::fill(row, msan::kSanitizerSentinelByte, initialize_size);
+#else
+    memset(row + valid_size, msan::kSanitizerSentinelByte,
+           initialize_size - valid_size);
+#endif  // clang6
+  }
+#endif  // MEMORY_SANITIZER
+}
+
+void PlaneBase::Swap(PlaneBase& other) {
+  std::swap(xsize_, other.xsize_);
+  std::swap(ysize_, other.ysize_);
+  std::swap(orig_xsize_, other.orig_xsize_);
+  std::swap(orig_ysize_, other.orig_ysize_);
+  std::swap(bytes_per_row_, other.bytes_per_row_);
+  std::swap(bytes_, other.bytes_);
+}
+
+ImageB ImageFromPacked(const uint8_t* packed, const size_t xsize,
+                       const size_t ysize, const size_t bytes_per_row) {
+  JXL_ASSERT(bytes_per_row >= xsize);
+  ImageB image(xsize, ysize);
+  PROFILER_FUNC;
+  for (size_t y = 0; y < ysize; ++y) {
+    uint8_t* const JXL_RESTRICT row = image.Row(y);
+    const uint8_t* const JXL_RESTRICT packed_row = packed + y * bytes_per_row;
+    memcpy(row, packed_row, xsize);
+  }
+  return image;
+}
+
+// Note that using mirroring here gives slightly worse results.
+ImageF PadImage(const ImageF& in, const size_t xsize, const size_t ysize) {
+  JXL_ASSERT(xsize >= in.xsize());
+  JXL_ASSERT(ysize >= in.ysize());
+  ImageF out(xsize, ysize);
+  size_t y = 0;
+  for (; y < in.ysize(); ++y) {
+    const float* JXL_RESTRICT row_in = in.ConstRow(y);
+    float* JXL_RESTRICT row_out = out.Row(y);
+    memcpy(row_out, row_in, in.xsize() * sizeof(row_in[0]));
+    const int lastcol = in.xsize() - 1;
+    const float lastval = row_out[lastcol];
+    for (size_t x = in.xsize(); x < xsize; ++x) {
+      row_out[x] = lastval;
+    }
+  }
+
+  // TODO(janwas): no need to copy if we can 'extend' image: if rows are
+  // pointers to any memory? Or allocate larger image before IO?
+  const int lastrow = in.ysize() - 1;
+  for (; y < ysize; ++y) {
+    const float* JXL_RESTRICT row_in = out.ConstRow(lastrow);
+    float* JXL_RESTRICT row_out = out.Row(y);
+    memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
+  }
+  return out;
+}
+
+Image3F PadImageMirror(const Image3F& in, const size_t xborder,
+                       const size_t yborder) {
+  size_t xsize = in.xsize();
+  size_t ysize = in.ysize();
+  Image3F out(xsize + 2 * xborder, ysize + 2 * yborder);
+  if (xborder > xsize || yborder > ysize) {
+    for (size_t c = 0; c < 3; c++) {
+      for (int32_t y = 0; y < static_cast<int32_t>(out.ysize()); y++) {
+        float* row_out = out.PlaneRow(c, y);
+        const float* row_in = in.PlaneRow(
+            c, Mirror(y - static_cast<int32_t>(yborder), in.ysize()));
+        for (int32_t x = 0; x < static_cast<int32_t>(out.xsize()); x++) {
+          int32_t xin = Mirror(x - static_cast<int32_t>(xborder), in.xsize());
+          row_out[x] = row_in[xin];
+        }
+      }
+    }
+    return out;
+  }
+  CopyImageTo(in, Rect(xborder, yborder, xsize, ysize), &out);
+  for (size_t c = 0; c < 3; c++) {
+    // Horizontal pad.
+    for (size_t y = 0; y < ysize; y++) {
+      for (size_t x = 0; x < xborder; x++) {
+        out.PlaneRow(c, y + yborder)[x] =
+            in.ConstPlaneRow(c, y)[xborder - x - 1];
+        out.PlaneRow(c, y + yborder)[x + xsize + xborder] =
+            in.ConstPlaneRow(c, y)[xsize - 1 - x];
+      }
+    }
+    // Vertical pad.
+    for (size_t y = 0; y < yborder; y++) {
+      memcpy(out.PlaneRow(c, y), out.ConstPlaneRow(c, 2 * yborder - 1 - y),
+             out.xsize() * sizeof(float));
+      memcpy(out.PlaneRow(c, y + ysize + yborder),
+             out.ConstPlaneRow(c, ysize + yborder - 1 - y),
+             out.xsize() * sizeof(float));
+    }
+  }
+  return out;
+}
+
+Image3F PadImageToMultiple(const Image3F& in, const size_t N) {
+  PROFILER_FUNC;
+  const size_t xsize_blocks = DivCeil(in.xsize(), N);
+  const size_t ysize_blocks = DivCeil(in.ysize(), N);
+  const size_t xsize = N * xsize_blocks;
+  const size_t ysize = N * ysize_blocks;
+  ImageF out[3];
+  for (size_t c = 0; c < 3; ++c) {
+    out[c] = PadImage(in.Plane(c), xsize, ysize);
+  }
+  return Image3F(std::move(out[0]), std::move(out[1]), std::move(out[2]));
+}
+
+void PadImageToBlockMultipleInPlace(Image3F* JXL_RESTRICT in) {
+  PROFILER_FUNC;
+  const size_t xsize_orig = in->xsize();
+  const size_t ysize_orig = in->ysize();
+  const size_t xsize = RoundUpToBlockDim(xsize_orig);
+  const size_t ysize = RoundUpToBlockDim(ysize_orig);
+  // Expands image size to the originally-allocated size.
+  in->ShrinkTo(xsize, ysize);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < ysize_orig; y++) {
+      float* JXL_RESTRICT row = in->PlaneRow(c, y);
+      for (size_t x = xsize_orig; x < xsize; x++) {
+        row[x] = row[xsize_orig - 1];
+      }
+    }
+    const float* JXL_RESTRICT row_src = in->ConstPlaneRow(c, ysize_orig - 1);
+    for (size_t y = ysize_orig; y < ysize; y++) {
+      memcpy(in->PlaneRow(c, y), row_src, xsize * sizeof(float));
+    }
+  }
+}
+
+float DotProduct(const ImageF& a, const ImageF& b) {
+  double sum = 0.0;
+  for (size_t y = 0; y < a.ysize(); ++y) {
+    const float* const JXL_RESTRICT row_a = a.ConstRow(y);
+    const float* const JXL_RESTRICT row_b = b.ConstRow(y);
+    for (size_t x = 0; x < a.xsize(); ++x) {
+      sum += row_a[x] * row_b[x];
+    }
+  }
+  return sum;
+}
+
+static void DownsampleImage(const ImageF& input, size_t factor,
+                            ImageF* output) {
+  JXL_ASSERT(factor != 1);
+  output->ShrinkTo(DivCeil(input.xsize(), factor),
+                   DivCeil(input.ysize(), factor));
+  size_t in_stride = input.PixelsPerRow();
+  for (size_t y = 0; y < output->ysize(); y++) {
+    float* row_out = output->Row(y);
+    const float* row_in = input.Row(factor * y);
+    for (size_t x = 0; x < output->xsize(); x++) {
+      size_t cnt = 0;
+      float sum = 0;
+      for (size_t iy = 0; iy < factor && iy + factor * y < input.ysize();
+           iy++) {
+        for (size_t ix = 0; ix < factor && ix + factor * x < input.xsize();
+             ix++) {
+          sum += row_in[iy * in_stride + x * factor + ix];
+          cnt++;
+        }
+      }
+      row_out[x] = sum / cnt;
+    }
+  }
+}
+
+void DownsampleImage(ImageF* image, size_t factor) {
+  // Allocate extra space to avoid a reallocation when padding.
+  ImageF downsampled(DivCeil(image->xsize(), factor) + kBlockDim,
+                     DivCeil(image->ysize(), factor) + kBlockDim);
+  DownsampleImage(*image, factor, &downsampled);
+  *image = std::move(downsampled);
+}
+
+void DownsampleImage(Image3F* opsin, size_t factor) {
+  JXL_ASSERT(factor != 1);
+  // Allocate extra space to avoid a reallocation when padding.
+  Image3F downsampled(DivCeil(opsin->xsize(), factor) + kBlockDim,
+                      DivCeil(opsin->ysize(), factor) + kBlockDim);
+  downsampled.ShrinkTo(downsampled.xsize() - kBlockDim,
+                       downsampled.ysize() - kBlockDim);
+  for (size_t c = 0; c < 3; c++) {
+    DownsampleImage(opsin->Plane(c), factor, &downsampled.Plane(c));
+  }
+  *opsin = std::move(downsampled);
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image.h
new file mode 100644
index 0000000000..9240e01593
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image.h
@@ -0,0 +1,437 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_H_
+#define LIB_JXL_IMAGE_H_
+
+// SIMD/multicore-friendly planar image representation with row accessors.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <utility>  // std::move
+
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Type-independent parts of Plane<> - reduces code duplication and facilitates
+// moving member function implementations to cc file.
+struct PlaneBase {
+  PlaneBase()
+      : xsize_(0),
+        ysize_(0),
+        orig_xsize_(0),
+        orig_ysize_(0),
+        bytes_per_row_(0),
+        bytes_(nullptr) {}
+  PlaneBase(size_t xsize, size_t ysize, size_t sizeof_t);
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo() instead.
+  PlaneBase(const PlaneBase& other) = delete;
+  PlaneBase& operator=(const PlaneBase& other) = delete;
+
+  // Move constructor (required for returning Image from function)
+  PlaneBase(PlaneBase&& other) noexcept = default;
+
+  // Move assignment (required for std::vector)
+  PlaneBase& operator=(PlaneBase&& other) noexcept = default;
+
+  void Swap(PlaneBase& other);
+
+  // Useful for pre-allocating image with some padding for alignment purposes
+  // and later reporting the actual valid dimensions. May also be used to
+  // un-shrink the image. Caller is responsible for ensuring xsize/ysize are <=
+  // the original dimensions.
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    JXL_CHECK(xsize <= orig_xsize_);
+    JXL_CHECK(ysize <= orig_ysize_);
+    xsize_ = static_cast<uint32_t>(xsize);
+    ysize_ = static_cast<uint32_t>(ysize);
+    // NOTE: we can't recompute bytes_per_row for more compact storage and
+    // better locality because that would invalidate the image contents.
+  }
+
+  // How many pixels.
+  JXL_INLINE size_t xsize() const { return xsize_; }
+  JXL_INLINE size_t ysize() const { return ysize_; }
+
+  // NOTE: do not use this for copying rows - the valid xsize may be much less.
+  JXL_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
+
+  // Raw access to byte contents, for interfacing with other libraries.
+  // Unsigned char instead of char to avoid surprises (sign extension).
+  JXL_INLINE uint8_t* bytes() {
+    void* p = bytes_.get();
+    return static_cast<uint8_t * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(p, 64));
+  }
+  JXL_INLINE const uint8_t* bytes() const {
+    const void* p = bytes_.get();
+    return static_cast<const uint8_t * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(p, 64));
+  }
+
+ protected:
+  // Returns pointer to the start of a row.
+  JXL_INLINE void* VoidRow(const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+    if (y >= ysize_) {
+      JXL_ABORT("Row(%zu) in (%u x %u) image\n", y, xsize_, ysize_);
+    }
+#endif
+
+    void* row = bytes_.get() + y * bytes_per_row_;
+    return JXL_ASSUME_ALIGNED(row, 64);
+  }
+
+  enum class Padding {
+    // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
+    kRoundUp,
+    // Allow LoadU(d, row + x) for x = xsize() - 1. This requires an extra
+    // vector to be initialized. If done by default, this would suppress
+    // legitimate msan warnings. We therefore require users to explicitly call
+    // InitializePadding before using unaligned loads (e.g. convolution).
+    kUnaligned
+  };
+
+  // Initializes the minimum bytes required to suppress msan warnings from
+  // legitimate (according to Padding mode) vector loads/stores on the right
+  // border, where some lanes are uninitialized and assumed to be unused.
+  void InitializePadding(size_t sizeof_t, Padding padding);
+
+  // (Members are non-const to enable assignment during move-assignment.)
+  uint32_t xsize_;  // In valid pixels, not including any padding.
+  uint32_t ysize_;
+  uint32_t orig_xsize_;
+  uint32_t orig_ysize_;
+  size_t bytes_per_row_;  // Includes padding.
+  CacheAlignedUniquePtr bytes_;
+};
+
+// Single channel, aligned rows separated by padding. T must be POD.
+//
+// 'Single channel' (one 2D array per channel) simplifies vectorization
+// (repeating the same operation on multiple adjacent components) without the
+// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
+// can easily iterate over all components in a row and Image requires no
+// knowledge of the pixel format beyond the component type "T".
+//
+// 'Aligned' means each row is aligned to the L1 cache line size. This prevents
+// false sharing between two threads operating on adjacent rows.
+//
+// 'Padding' is still relevant because vectors could potentially be larger than
+// a cache line. By rounding up row sizes to the vector size, we allow
+// reading/writing ALIGNED vectors whose first lane is a valid sample. This
+// avoids needing a separate loop to handle remaining unaligned lanes.
+//
+// This image layout could also be achieved with a vector and a row accessor
+// function, but a class wrapper with support for "deleter" allows wrapping
+// existing memory allocated by clients without copying the pixels. It also
+// provides convenient accessors for xsize/ysize, which shortens function
+// argument lists. Supports move-construction so it can be stored in containers.
+template <typename ComponentType>
+class Plane : public PlaneBase {
+ public:
+  using T = ComponentType;
+  static constexpr size_t kNumPlanes = 1;
+
+  Plane() = default;
+  Plane(const size_t xsize, const size_t ysize)
+      : PlaneBase(xsize, ysize, sizeof(T)) {}
+
+  void InitializePaddingForUnalignedAccesses() {
+    InitializePadding(sizeof(T), Padding::kUnaligned);
+  }
+
+  JXL_INLINE T* Row(const size_t y) { return static_cast<T*>(VoidRow(y)); }
+
+  // Returns pointer to const (see above).
+  JXL_INLINE const T* Row(const size_t y) const {
+    return static_cast<const T*>(VoidRow(y));
+  }
+
+  // Documents that the access is const.
+  JXL_INLINE const T* ConstRow(const size_t y) const {
+    return static_cast<const T*>(VoidRow(y));
+  }
+
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must
+  // NOT be used to determine xsize.
+  JXL_INLINE intptr_t PixelsPerRow() const {
+    return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
+  }
+};
+
+using ImageSB = Plane<int8_t>;
+using ImageB = Plane<uint8_t>;
+using ImageS = Plane<int16_t>;  // signed integer or half-float
+using ImageU = Plane<uint16_t>;
+using ImageI = Plane<int32_t>;
+using ImageF = Plane<float>;
+using ImageD = Plane<double>;
+
+// Also works for Image3 and mixed argument types.
+template <class Image1, class Image2>
+bool SameSize(const Image1& image1, const Image2& image2) {
+  return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
+}
+
+template <typename T>
+class Image3;
+
+// Rectangular region in image(s). Factoring this out of Image instead of
+// shifting the pointer by x0/y0 allows this to apply to multiple images with
+// different resolutions (e.g. color transform and quantization field).
+// Can compare using SameSize(rect1, rect2).
+class Rect {
+ public:
+  // Most windows are xsize_max * ysize_max, except those on the borders where
+  // begin + size_max > end.
+  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
+                 size_t ysize_max, size_t xend, size_t yend)
+      : x0_(xbegin),
+        y0_(ybegin),
+        xsize_(ClampedSize(xbegin, xsize_max, xend)),
+        ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
+
+  // Construct with origin and known size (typically from another Rect).
+  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
+      : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
+
+  // Construct a rect that covers a whole image/plane/ImageBundle etc.
+  template <typename Image>
+  explicit Rect(const Image& image)
+      : Rect(0, 0, image.xsize(), image.ysize()) {}
+
+  Rect() : Rect(0, 0, 0, 0) {}
+
+  Rect(const Rect&) = default;
+  Rect& operator=(const Rect&) = default;
+
+  // Construct a subrect that resides in an image/plane/ImageBundle etc.
+  template <typename Image>
+  Rect Crop(const Image& image) const {
+    return Rect(x0_, y0_, xsize_, ysize_, image.xsize(), image.ysize());
+  }
+
+  // Returns a rect that only contains `num` lines with offset `y` from `y0()`.
+  Rect Lines(size_t y, size_t num) const {
+    JXL_DASSERT(y + num <= ysize_);
+    return Rect(x0_, y0_ + y, xsize_, num);
+  }
+
+  Rect Line(size_t y) const { return Lines(y, 1); }
+
+  JXL_MUST_USE_RESULT Rect Intersection(const Rect& other) const {
+    return Rect(std::max(x0_, other.x0_), std::max(y0_, other.y0_), xsize_,
+                ysize_, std::min(x0_ + xsize_, other.x0_ + other.xsize_),
+                std::min(y0_ + ysize_, other.y0_ + other.ysize_));
+  }
+
+  JXL_MUST_USE_RESULT Rect Translate(int64_t x_offset, int64_t y_offset) const {
+    return Rect(x0_ + x_offset, y0_ + y_offset, xsize_, ysize_);
+  }
+
+  template <typename T>
+  T* Row(Plane<T>* image, size_t y) const {
+    return image->Row(y + y0_) + x0_;
+  }
+
+  template <typename T>
+  const T* Row(const Plane<T>* image, size_t y) const {
+    return image->Row(y + y0_) + x0_;
+  }
+
+  template <typename T>
+  T* PlaneRow(Image3<T>* image, const size_t c, size_t y) const {
+    return image->PlaneRow(c, y + y0_) + x0_;
+  }
+
+  template <typename T>
+  const T* ConstRow(const Plane<T>& image, size_t y) const {
+    return image.ConstRow(y + y0_) + x0_;
+  }
+
+  template <typename T>
+  const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
+    return image.ConstPlaneRow(c, y + y0_) + x0_;
+  }
+
+  bool IsInside(const Rect& other) const {
+    return x0_ >= other.x0() && x0_ + xsize_ <= other.x0() + other.xsize_ &&
+           y0_ >= other.y0() && y0_ + ysize_ <= other.y0() + other.ysize();
+  }
+
+  // Returns true if this Rect fully resides in the given image. ImageT could be
+  // Plane<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
+  template <class ImageT>
+  bool IsInside(const ImageT& image) const {
+    return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
+  }
+
+  size_t x0() const { return x0_; }
+  size_t y0() const { return y0_; }
+  size_t xsize() const { return xsize_; }
+  size_t ysize() const { return ysize_; }
+
+ private:
+  // Returns size_max, or whatever is left in [begin, end).
+  static constexpr size_t ClampedSize(size_t begin, size_t size_max,
+                                      size_t end) {
+    return (begin + size_max <= end) ? size_max
+                                     : (end > begin ? end - begin : 0);
+  }
+
+  size_t x0_;
+  size_t y0_;
+
+  size_t xsize_;
+  size_t ysize_;
+};
+
+// Currently, we abuse Image to either refer to an image that owns its storage
+// or one that doesn't. In similar vein, we abuse Image* function parameters to
+// either mean "assign to me" or "fill the provided image with data".
+// Hopefully, the "assign to me" meaning will go away and most images in the
+// codebase will not be backed by own storage. When this happens we can redesign
+// Image to be a non-storage-holding view class and introduce BackedImage in
+// those places that actually need it.
+
+// NOTE: we can't use Image as a view because invariants are violated
+// (alignment and the presence of padding before/after each "row").
+
+// A bundle of 3 same-sized images. Typically constructed by moving from three
+// rvalue references to Image. To overwrite an existing Image3 using
+// single-channel producers, we also need access to Image*. Constructing
+// temporary non-owning Image pointing to one plane of an existing Image3 risks
+// dangling references, especially if the wrapper is moved. Therefore, we
+// store an array of Image (which are compact enough that size is not a concern)
+// and provide Plane+Row accessors.
+template <typename ComponentType>
+class Image3 {
+ public:
+  using T = ComponentType;
+  using PlaneT = jxl::Plane<T>;
+  static constexpr size_t kNumPlanes = 3;
+
+  Image3() : planes_{PlaneT(), PlaneT(), PlaneT()} {}
+
+  Image3(const size_t xsize, const size_t ysize)
+      : planes_{PlaneT(xsize, ysize), PlaneT(xsize, ysize),
+                PlaneT(xsize, ysize)} {}
+
+  Image3(Image3&& other) noexcept {
+    for (size_t i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+  }
+
+  Image3(PlaneT&& plane0, PlaneT&& plane1, PlaneT&& plane2) {
+    JXL_CHECK(SameSize(plane0, plane1));
+    JXL_CHECK(SameSize(plane0, plane2));
+    planes_[0] = std::move(plane0);
+    planes_[1] = std::move(plane1);
+    planes_[2] = std::move(plane2);
+  }
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo instead.
+  Image3(const Image3& other) = delete;
+  Image3& operator=(const Image3& other) = delete;
+
+  Image3& operator=(Image3&& other) noexcept {
+    for (size_t i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+    return *this;
+  }
+
+  // Returns row pointer; usage: PlaneRow(idx_plane, y)[x] = val.
+  JXL_INLINE T* PlaneRow(const size_t c, const size_t y) {
+    // Custom implementation instead of calling planes_[c].Row ensures only a
+    // single multiplication is needed for PlaneRow(0..2, y).
+    PlaneRowBoundsCheck(c, y);
+    const size_t row_offset = y * planes_[0].bytes_per_row();
+    void* row = planes_[c].bytes() + row_offset;
+    return static_cast<T * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(row, 64));
+  }
+
+  // Returns const row pointer; usage: val = PlaneRow(idx_plane, y)[x].
+  JXL_INLINE const T* PlaneRow(const size_t c, const size_t y) const {
+    PlaneRowBoundsCheck(c, y);
+    const size_t row_offset = y * planes_[0].bytes_per_row();
+    const void* row = planes_[c].bytes() + row_offset;
+    return static_cast<const T * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(row, 64));
+  }
+
+  // Returns const row pointer, even if called from a non-const Image3.
+  JXL_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
+    PlaneRowBoundsCheck(c, y);
+    return PlaneRow(c, y);
+  }
+
+  JXL_INLINE const PlaneT& Plane(size_t idx) const { return planes_[idx]; }
+
+  JXL_INLINE PlaneT& Plane(size_t idx) { return planes_[idx]; }
+
+  void Swap(Image3& other) {
+    for (size_t c = 0; c < 3; ++c) {
+      other.planes_[c].Swap(planes_[c]);
+    }
+  }
+
+  // Useful for pre-allocating image with some padding for alignment purposes
+  // and later reporting the actual valid dimensions. May also be used to
+  // un-shrink the image. Caller is responsible for ensuring xsize/ysize are <=
+  // the original dimensions.
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    for (PlaneT& plane : planes_) {
+      plane.ShrinkTo(xsize, ysize);
+    }
+  }
+
+  // Sizes of all three images are guaranteed to be equal.
+  JXL_INLINE size_t xsize() const { return planes_[0].xsize(); }
+  JXL_INLINE size_t ysize() const { return planes_[0].ysize(); }
+  // Returns offset [bytes] from one row to the next row of the same plane.
+  // WARNING: this must NOT be used to determine xsize, nor for copying rows -
+  // the valid xsize may be much less.
+  JXL_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must NOT be used
+  // to determine xsize.
+  JXL_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
+
+ private:
+  void PlaneRowBoundsCheck(const size_t c, const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+    if (c >= kNumPlanes || y >= ysize()) {
+      JXL_ABORT("PlaneRow(%zu, %zu) in (%zu x %zu) image\n", c, y, xsize(),
+                ysize());
+    }
+#endif
+  }
+
+ private:
+  PlaneT planes_[kNumPlanes];
+};
+
+using Image3B = Image3<uint8_t>;
+using Image3S = Image3<int16_t>;
+using Image3U = Image3<uint16_t>;
+using Image3I = Image3<int32_t>;
+using Image3F = Image3<float>;
+using Image3D = Image3<double>;
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc
new file mode 100644
index 0000000000..0221903219
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.cc
@@ -0,0 +1,149 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_bundle.h"
+
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+
+void ImageBundle::ShrinkTo(size_t xsize, size_t ysize) {
+  if (HasColor()) color_.ShrinkTo(xsize, ysize);
+  for (ImageF& ec : extra_channels_) {
+    ec.ShrinkTo(xsize, ysize);
+  }
+}
+
+// Called by all other SetFrom*.
+void ImageBundle::SetFromImage(Image3F&& color,
+                               const ColorEncoding& c_current) {
+  JXL_CHECK(color.xsize() != 0 && color.ysize() != 0);
+  JXL_CHECK(metadata_->color_encoding.IsGray() == c_current.IsGray());
+  color_ = std::move(color);
+  c_current_ = c_current;
+  VerifySizes();
+}
+
+void ImageBundle::VerifyMetadata() const {
+  JXL_CHECK(!c_current_.ICC().empty());
+  JXL_CHECK(metadata_->color_encoding.IsGray() == IsGray());
+
+  if (metadata_->HasAlpha() && alpha().xsize() == 0) {
+    JXL_ABORT("MD alpha_bits %u IB alpha %zu x %zu\n",
+              metadata_->GetAlphaBits(), alpha().xsize(), alpha().ysize());
+  }
+  const uint32_t alpha_bits = metadata_->GetAlphaBits();
+  JXL_CHECK(alpha_bits <= 32);
+
+  // metadata_->num_extra_channels may temporarily differ from
+  // extra_channels_.size(), e.g. after SetAlpha. They are synced by the next
+  // call to VisitFields.
+}
+
+void ImageBundle::VerifySizes() const {
+  const size_t xs = xsize();
+  const size_t ys = ysize();
+
+  if (HasExtraChannels()) {
+    JXL_CHECK(xs != 0 && ys != 0);
+    for (const ImageF& ec : extra_channels_) {
+      JXL_CHECK(ec.xsize() == xs);
+      JXL_CHECK(ec.ysize() == ys);
+    }
+  }
+}
+
+size_t ImageBundle::DetectRealBitdepth() const {
+  return metadata_->bit_depth.bits_per_sample;
+
+  // TODO(lode): let this function return lower bit depth if possible, e.g.
+  // return 8 bits in case the original image came from a 16-bit PNG that
+  // was in fact representable as 8-bit PNG. Ensure that the implementation
+  // returns 16 if e.g. two consecutive 16-bit values appeared in the original
+  // image (such as 32768 and 32769), take into account that e.g. the values
+  // 3-bit can represent is not a superset of the values 2-bit can represent,
+  // and there may be slight imprecisions in the floating point image.
+}
+
+const ImageF& ImageBundle::alpha() const {
+  JXL_ASSERT(HasAlpha());
+  const size_t ec = metadata_->Find(ExtraChannel::kAlpha) -
+                    metadata_->extra_channel_info.data();
+  JXL_ASSERT(ec < extra_channels_.size());
+  return extra_channels_[ec];
+}
+ImageF* ImageBundle::alpha() {
+  JXL_ASSERT(HasAlpha());
+  const size_t ec = metadata_->Find(ExtraChannel::kAlpha) -
+                    metadata_->extra_channel_info.data();
+  JXL_ASSERT(ec < extra_channels_.size());
+  return &extra_channels_[ec];
+}
+
+const ImageF& ImageBundle::depth() const {
+  JXL_ASSERT(HasDepth());
+  const size_t ec = metadata_->Find(ExtraChannel::kDepth) -
+                    metadata_->extra_channel_info.data();
+  JXL_ASSERT(ec < extra_channels_.size());
+  return extra_channels_[ec];
+}
+
+void ImageBundle::SetAlpha(ImageF&& alpha, bool alpha_is_premultiplied) {
+  const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha);
+  // Must call SetAlphaBits first, otherwise we don't know which channel index
+  JXL_CHECK(eci != nullptr);
+  JXL_CHECK(alpha.xsize() != 0 && alpha.ysize() != 0);
+  JXL_CHECK(eci->alpha_associated == alpha_is_premultiplied);
+  extra_channels_.insert(
+      extra_channels_.begin() + (eci - metadata_->extra_channel_info.data()),
+      std::move(alpha));
+  // num_extra_channels is automatically set in visitor
+  VerifySizes();
+}
+void ImageBundle::PremultiplyAlpha() {
+  if (!HasAlpha()) return;
+  if (!HasColor()) return;
+  const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha);
+  if (eci->alpha_associated) return;  // already premultiplied
+  JXL_CHECK(color_.ysize() == alpha()->ysize());
+  JXL_CHECK(color_.xsize() == alpha()->xsize());
+  for (size_t y = 0; y < color_.ysize(); y++) {
+    ::jxl::PremultiplyAlpha(color_.PlaneRow(0, y), color_.PlaneRow(1, y),
+                            color_.PlaneRow(2, y), alpha()->Row(y),
+                            color_.xsize());
+  }
+}
+void ImageBundle::UnpremultiplyAlpha() {
+  if (!HasAlpha()) return;
+  if (!HasColor()) return;
+  const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha);
+  if (!eci->alpha_associated) return;  // already unpremultiplied
+  JXL_CHECK(color_.ysize() == alpha()->ysize());
+  JXL_CHECK(color_.xsize() == alpha()->xsize());
+  for (size_t y = 0; y < color_.ysize(); y++) {
+    ::jxl::UnpremultiplyAlpha(color_.PlaneRow(0, y), color_.PlaneRow(1, y),
+                              color_.PlaneRow(2, y), alpha()->Row(y),
+                              color_.xsize());
+  }
+}
+
+void ImageBundle::SetExtraChannels(std::vector<ImageF>&& extra_channels) {
+  for (const ImageF& plane : extra_channels) {
+    JXL_CHECK(plane.xsize() != 0 && plane.ysize() != 0);
+  }
+  extra_channels_ = std::move(extra_channels);
+  VerifySizes();
+}
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.h
new file mode 100644
index 0000000000..83f5f7bd31
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle.h
@@ -0,0 +1,263 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_BUNDLE_H_
+#define LIB_JXL_IMAGE_BUNDLE_H_
+
+// The main image or frame consists of a bundle of associated images.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+// A bundle of color/alpha/depth/plane images.
+class ImageBundle {
+ public:
+  // Uninitialized state for use as output parameter.
+  ImageBundle() : metadata_(nullptr) {}
+  // Caller is responsible for setting metadata before calling Set*.
+  explicit ImageBundle(const ImageMetadata* metadata) : metadata_(metadata) {}
+
+  // Move-only (allows storing in std::vector).
+  ImageBundle(ImageBundle&&) = default;
+  ImageBundle& operator=(ImageBundle&&) = default;
+
+  ImageBundle Copy() const {
+    ImageBundle copy(metadata_);
+    copy.color_ = CopyImage(color_);
+    copy.c_current_ = c_current_;
+    copy.extra_channels_.reserve(extra_channels_.size());
+    for (const ImageF& plane : extra_channels_) {
+      copy.extra_channels_.emplace_back(CopyImage(plane));
+    }
+
+    copy.jpeg_data =
+        jpeg_data ? make_unique<jpeg::JPEGData>(*jpeg_data) : nullptr;
+    copy.color_transform = color_transform;
+    copy.chroma_subsampling = chroma_subsampling;
+
+    return copy;
+  }
+
+  // -- SIZE
+
+  size_t xsize() const {
+    if (IsJPEG()) return jpeg_data->width;
+    if (color_.xsize() != 0) return color_.xsize();
+    return extra_channels_.empty() ? 0 : extra_channels_[0].xsize();
+  }
+  size_t ysize() const {
+    if (IsJPEG()) return jpeg_data->height;
+    if (color_.ysize() != 0) return color_.ysize();
+    return extra_channels_.empty() ? 0 : extra_channels_[0].ysize();
+  }
+  void ShrinkTo(size_t xsize, size_t ysize);
+
+  // sizes taking orientation into account
+  size_t oriented_xsize() const {
+    if (static_cast<uint32_t>(metadata_->GetOrientation()) > 4) {
+      return ysize();
+    } else {
+      return xsize();
+    }
+  }
+  size_t oriented_ysize() const {
+    if (static_cast<uint32_t>(metadata_->GetOrientation()) > 4) {
+      return xsize();
+    } else {
+      return ysize();
+    }
+  }
+
+  // -- COLOR
+
+  // Whether color() is valid/usable. Returns true in most cases. Even images
+  // with spot colors (one example of when !planes().empty()) typically have a
+  // part that can be converted to RGB.
+  bool HasColor() const { return color_.xsize() != 0; }
+
+  // For resetting the size when switching from a reference to main frame.
+  void RemoveColor() { color_ = Image3F(); }
+
+  // Do not use if !HasColor().
+  const Image3F& color() const {
+    // If this fails, Set* was not called - perhaps because decoding failed?
+    JXL_DASSERT(HasColor());
+    return color_;
+  }
+
+  // Do not use if !HasColor().
+  Image3F* color() {
+    JXL_DASSERT(HasColor());
+    return &color_;
+  }
+
+  // If c_current.IsGray(), all planes must be identical. NOTE: c_current is
+  // independent of metadata()->color_encoding, which is the original, whereas
+  // a decoder might return pixels in a different c_current.
+  // This only sets the color channels, you must also make extra channels
+  // match the amount that is in the metadata.
+  void SetFromImage(Image3F&& color, const ColorEncoding& c_current);
+
+  // -- COLOR ENCODING
+
+  const ColorEncoding& c_current() const { return c_current_; }
+
+  // Returns whether the color image has identical planes. Once established by
+  // Set*, remains unchanged until a subsequent Set* or TransformTo.
+  bool IsGray() const { return c_current_.IsGray(); }
+
+  bool IsSRGB() const { return c_current_.IsSRGB(); }
+  bool IsLinearSRGB() const {
+    return c_current_.white_point == WhitePoint::kD65 &&
+           c_current_.primaries == Primaries::kSRGB && c_current_.tf.IsLinear();
+  }
+
+  // Set the c_current profile without doing any transformation, e.g. if the
+  // transformation was already applied.
+  void OverrideProfile(const ColorEncoding& new_c_current) {
+    c_current_ = new_c_current;
+  }
+
+  // TODO(lode): TransformTo and CopyTo are implemented in enc_image_bundle.cc,
+  // move these functions out of this header file and class, to
+  // enc_image_bundle.h.
+
+  // Transforms color to c_desired and sets c_current to c_desired. Alpha and
+  // metadata remains unchanged.
+  Status TransformTo(const ColorEncoding& c_desired,
+                     ThreadPool* pool = nullptr);
+  // Copies this:rect, converts to c_desired, and allocates+fills out.
+  Status CopyTo(const Rect& rect, const ColorEncoding& c_desired, Image3B* out,
+                ThreadPool* pool = nullptr) const;
+  Status CopyTo(const Rect& rect, const ColorEncoding& c_desired, Image3F* out,
+                ThreadPool* pool = nullptr) const;
+  Status CopyToSRGB(const Rect& rect, Image3B* out,
+                    ThreadPool* pool = nullptr) const;
+
+  // Detect 'real' bit depth, which can be lower than nominal bit depth
+  // (this is common in PNG), returns 'real' bit depth
+  size_t DetectRealBitdepth() const;
+
+  // -- ALPHA
+
+  void SetAlpha(ImageF&& alpha, bool alpha_is_premultiplied);
+  bool HasAlpha() const {
+    return metadata_->Find(ExtraChannel::kAlpha) != nullptr;
+  }
+  bool AlphaIsPremultiplied() const {
+    const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha);
+    return (eci == nullptr) ? false : eci->alpha_associated;
+  }
+  // Premultiply alpha (if it isn't already premultiplied)
+  void PremultiplyAlpha();
+  // Unpremultiply alpha (if it isn't already non-premultiplied)
+  void UnpremultiplyAlpha();
+  const ImageF& alpha() const;
+  ImageF* alpha();
+
+  // -- DEPTH
+  bool HasDepth() const {
+    return metadata_->Find(ExtraChannel::kDepth) != nullptr;
+  }
+  const ImageF& depth() const;
+
+  // -- EXTRA CHANNELS
+
+  // Extra channels of unknown interpretation (e.g. spot colors).
+  void SetExtraChannels(std::vector<ImageF>&& extra_channels);
+  void ClearExtraChannels() { extra_channels_.clear(); }
+  bool HasExtraChannels() const { return !extra_channels_.empty(); }
+  const std::vector<ImageF>& extra_channels() const { return extra_channels_; }
+  std::vector<ImageF>& extra_channels() { return extra_channels_; }
+
+  const ImageMetadata* metadata() const { return metadata_; }
+
+  void VerifyMetadata() const;
+
+  void SetDecodedBytes(size_t decoded_bytes) { decoded_bytes_ = decoded_bytes; }
+  size_t decoded_bytes() const { return decoded_bytes_; }
+
+  // -- JPEG transcoding:
+
+  // Returns true if image does or will represent quantized DCT-8 coefficients,
+  // stored in 8x8 pixel regions.
+  bool IsJPEG() const {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+    return jpeg_data != nullptr;
+#else   // JPEGXL_ENABLE_TRANSCODE_JPEG
+    return false;
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+  }
+
+  std::unique_ptr<jpeg::JPEGData> jpeg_data;
+  // these fields are used to signal the input JPEG color space
+  // NOTE: JPEG doesn't actually provide a way to determine whether YCbCr was
+  // applied or not.
+  ColorTransform color_transform = ColorTransform::kNone;
+  YCbCrChromaSubsampling chroma_subsampling;
+
+  FrameOrigin origin{0, 0};
+  // Animation-related information. This assumes GIF- and APNG- like animation.
+  uint32_t duration = 0;
+  bool use_for_next_frame = false;
+  bool blend = false;
+  BlendMode blendmode = BlendMode::kBlend;
+  std::string name;
+
+ private:
+  // Called after any Set* to ensure their sizes are compatible.
+  void VerifySizes() const;
+
+  // Required for TransformTo so that an ImageBundle is self-sufficient. Always
+  // points to the same thing, but cannot be const-pointer because that prevents
+  // the compiler from generating a move ctor.
+  const ImageMetadata* metadata_;
+
+  // Initialized by Set*:
+  Image3F color_;  // If empty, planes_ is not; all planes equal if IsGray().
+  ColorEncoding c_current_;  // of color_
+
+  // Initialized by SetPlanes; size = ImageMetadata.num_extra_channels
+  std::vector<ImageF> extra_channels_;
+
+  // How many bytes of the input were actually read.
+  size_t decoded_bytes_ = 0;
+};
+
+// Does color transformation from in.c_current() to c_desired if the color
+// encodings are different, or nothing if they are already the same.
+// If color transformation is done, stores the transformed values into store and
+// sets the out pointer to store, else leaves store untouched and sets the out
+// pointer to &in.
+// Returns false if color transform fails.
+Status TransformIfNeeded(const ImageBundle& in, const ColorEncoding& c_desired,
+                         ThreadPool* pool, ImageBundle* store,
+                         const ImageBundle** out);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_BUNDLE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle_test.cc
new file mode 100644
index 0000000000..6de2e49dbf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_bundle_test.cc
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_bundle.h"
+
+#include "gtest/gtest.h"
+#include "lib/jxl/aux_out.h"
+
+namespace jxl {
+namespace {
+
+TEST(ImageBundleTest, ExtraChannelName) {
+  AuxOut aux_out;
+  BitWriter writer;
+  BitWriter::Allotment allotment(&writer, 99);
+
+  ImageMetadata metadata;
+  ExtraChannelInfo eci;
+  eci.type = ExtraChannel::kBlack;
+  eci.name = "testK";
+  metadata.extra_channel_info.push_back(std::move(eci));
+  ASSERT_TRUE(WriteImageMetadata(metadata, &writer, /*layer=*/0, &aux_out));
+  writer.ZeroPadToByte();
+  ReclaimAndCharge(&writer, &allotment, /*layer=*/0, &aux_out);
+
+  BitReader reader(writer.GetSpan());
+  ImageMetadata metadata_out;
+  ASSERT_TRUE(ReadImageMetadata(&reader, &metadata_out));
+  EXPECT_TRUE(reader.Close());
+  EXPECT_EQ("testK", metadata_out.Find(ExtraChannel::kBlack)->name);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc
new file mode 100644
index 0000000000..2d9d62e268
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.cc
@@ -0,0 +1,414 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_metadata.h"
+
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+BitDepth::BitDepth() { Bundle::Init(this); }
+Status BitDepth::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &floating_point_sample));
+  // The same fields (bits_per_sample and exponent_bits_per_sample) are read
+  // in a different way depending on floating_point_sample's value. It's still
+  // default-initialized correctly so using visitor->Conditional is not
+  // required.
+  if (!floating_point_sample) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val(8), Val(10), Val(12), BitsOffset(6, 1), 8, &bits_per_sample));
+    exponent_bits_per_sample = 0;
+  } else {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val(32), Val(16), Val(24), BitsOffset(6, 1), 32, &bits_per_sample));
+    // The encoded value is exponent_bits_per_sample - 1, encoded in 3 bits
+    // so the value can be in range [1, 8].
+    const uint32_t offset = 1;
+    exponent_bits_per_sample -= offset;
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->Bits(4, 8 - offset, &exponent_bits_per_sample));
+    exponent_bits_per_sample += offset;
+  }
+
+  // Error-checking for floating point ranges.
+  if (floating_point_sample) {
+    if (exponent_bits_per_sample < 2 || exponent_bits_per_sample > 8) {
+      return JXL_FAILURE("Invalid exponent_bits_per_sample: %u",
+                         exponent_bits_per_sample);
+    }
+    int mantissa_bits =
+        static_cast<int>(bits_per_sample) - exponent_bits_per_sample - 1;
+    if (mantissa_bits < 2 || mantissa_bits > 23) {
+      return JXL_FAILURE("Invalid bits_per_sample: %u", bits_per_sample);
+    }
+  } else {
+    if (bits_per_sample > 31) {
+      return JXL_FAILURE("Invalid bits_per_sample: %u", bits_per_sample);
+    }
+  }
+  return true;
+}
+
+CustomTransformData::CustomTransformData() { Bundle::Init(this); }
+Status CustomTransformData::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+  if (visitor->Conditional(nonserialized_xyb_encoded)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&opsin_inverse_matrix));
+  }
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &custom_weights_mask));
+  if (visitor->Conditional((custom_weights_mask & 0x1) != 0)) {
+    // 4 5x5 kernels, but all of them can be obtained by symmetry from one,
+    // which is symmetric along its main diagonal. The top-left kernel is
+    // defined by
+    //
+    // 0  1  2  3  4
+    // 1  5  6  7  8
+    // 2  6  9 10 11
+    // 3  7 10 12 13
+    // 4  8 11 13 14
+    float constexpr kWeights2[15] = {
+        -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f,
+        0.14111091f,  0.28896755f,  0.00278718f,  -0.01610267f, 0.56661550f,
+        0.03777607f,  -0.01986694f, -0.03144731f, -0.01185068f, -0.00213539f};
+    for (size_t i = 0; i < 15; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(kWeights2[i], &upsampling2_weights[i]));
+    }
+  }
+  if (visitor->Conditional((custom_weights_mask & 0x2) != 0)) {
+    // 16 5x5 kernels, but all of them can be obtained by symmetry from
+    // three, two of which are symmetric along their main diagonals. The top
+    // left 4 kernels are defined by
+    //
+    // 0  1  2  3  4   5  6  7  8  9
+    // 1 10 11 12 13  14 15 16 17 18
+    // 2 11 19 20 21  22 23 24 25 26
+    // 3 12 20 27 28  29 30 31 32 33
+    // 4 13 21 28 34  35 36 37 38 39
+    //
+    // 5 14 22 29 35  40 41 42 43 44
+    // 6 15 23 30 36  41 45 46 47 48
+    // 7 16 24 31 37  42 46 49 50 51
+    // 8 17 25 32 38  43 47 50 52 53
+    // 9 18 26 33 39  44 48 51 53 54
+    constexpr float kWeights4[55] = {
+        -0.02419067f, -0.03491987f, -0.03693351f, -0.03094285f, -0.00529785f,
+        -0.01663432f, -0.03556863f, -0.03888905f, -0.03516850f, -0.00989469f,
+        0.23651958f,  0.33392945f,  -0.01073543f, -0.01313181f, -0.03556694f,
+        0.13048175f,  0.40103025f,  0.03951150f,  -0.02077584f, 0.46914198f,
+        -0.00209270f, -0.01484589f, -0.04064806f, 0.18942530f,  0.56279892f,
+        0.06674400f,  -0.02335494f, -0.03551682f, -0.00754830f, -0.02267919f,
+        -0.02363578f, 0.00315804f,  -0.03399098f, -0.01359519f, -0.00091653f,
+        -0.00335467f, -0.01163294f, -0.01610294f, -0.00974088f, -0.00191622f,
+        -0.01095446f, -0.03198464f, -0.04455121f, -0.02799790f, -0.00645912f,
+        0.06390599f,  0.22963888f,  0.00630981f,  -0.01897349f, 0.67537268f,
+        0.08483369f,  -0.02534994f, -0.02205197f, -0.01667999f, -0.00384443f};
+    for (size_t i = 0; i < 55; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(kWeights4[i], &upsampling4_weights[i]));
+    }
+  }
+  if (visitor->Conditional((custom_weights_mask & 0x4) != 0)) {
+    // 64 5x5 kernels, all of them can be obtained by symmetry from
+    // 10, 4 of which are symmetric along their main diagonals. The top
+    // left 16 kernels are defined by
+    //  0  1  2  3  4   5  6  7  8  9   a  b  c  d  e   f 10 11 12 13
+    //  1 14 15 16 17  18 19 1a 1b 1c  1d 1e 1f 20 21  22 23 24 25 26
+    //  2 15 27 28 29  2a 2b 2c 2d 2e  2f 30 31 32 33  34 35 36 37 38
+    //  3 16 28 39 3a  3b 3c 3d 3e 3f  40 41 42 43 44  45 46 47 48 49
+    //  4 17 29 3a 4a  4b 4c 4d 4e 4f  50 51 52 53 54  55 56 57 58 59
+
+    //  5 18 2a 3b 4b  5a 5b 5c 5d 5e  5f 60 61 62 63  64 65 66 67 68
+    //  6 19 2b 3c 4c  5b 69 6a 6b 6c  6d 6e 6f 70 71  72 73 74 75 76
+    //  7 1a 2c 3d 4d  5c 6a 77 78 79  7a 7b 7c 7d 7e  7f 80 81 82 83
+    //  8 1b 2d 3e 4e  5d 6b 78 84 85  86 87 88 89 8a  8b 8c 8d 8e 8f
+    //  9 1c 2e 3f 4f  5e 6c 79 85 90  91 92 93 94 95  96 97 98 99 9a
+
+    //  a 1d 2f 40 50  5f 6d 7a 86 91  9b 9c 9d 9e 9f  a0 a1 a2 a3 a4
+    //  b 1e 30 41 51  60 6e 7b 87 92  9c a5 a6 a7 a8  a9 aa ab ac ad
+    //  c 1f 31 42 52  61 6f 7c 88 93  9d a6 ae af b0  b1 b2 b3 b4 b5
+    //  d 20 32 43 53  62 70 7d 89 94  9e a7 af b6 b7  b8 b9 ba bb bc
+    //  e 21 33 44 54  63 71 7e 8a 95  9f a8 b0 b7 bd  be bf c0 c1 c2
+
+    //  f 22 34 45 55  64 72 7f 8b 96  a0 a9 b1 b8 be  c3 c4 c5 c6 c7
+    // 10 23 35 46 56  65 73 80 8c 97  a1 aa b2 b9 bf  c4 c8 c9 ca cb
+    // 11 24 36 47 57  66 74 81 8d 98  a2 ab b3 ba c0  c5 c9 cc cd ce
+    // 12 25 37 48 58  67 75 82 8e 99  a3 ac b4 bb c1  c6 ca cd cf d0
+    // 13 26 38 49 59  68 76 83 8f 9a  a4 ad b5 bc c2  c7 cb ce d0 d1
+    constexpr float kWeights8[210] = {
+        -0.02928613f, -0.03706353f, -0.03783812f, -0.03324558f, -0.00447632f,
+        -0.02519406f, -0.03752601f, -0.03901508f, -0.03663285f, -0.00646649f,
+        -0.02066407f, -0.03838633f, -0.04002101f, -0.03900035f, -0.00901973f,
+        -0.01626393f, -0.03954148f, -0.04046620f, -0.03979621f, -0.01224485f,
+        0.29895328f,  0.35757708f,  -0.02447552f, -0.01081748f, -0.04314594f,
+        0.23903219f,  0.41119301f,  -0.00573046f, -0.01450239f, -0.04246845f,
+        0.17567618f,  0.45220643f,  0.02287757f,  -0.01936783f, -0.03583255f,
+        0.11572472f,  0.47416733f,  0.06284440f,  -0.02685066f, 0.42720050f,
+        -0.02248939f, -0.01155273f, -0.04562755f, 0.28689496f,  0.49093869f,
+        -0.00007891f, -0.01545926f, -0.04562659f, 0.21238920f,  0.53980934f,
+        0.03369474f,  -0.02070211f, -0.03866988f, 0.14229550f,  0.56593398f,
+        0.08045181f,  -0.02888298f, -0.03680918f, -0.00542229f, -0.02920477f,
+        -0.02788574f, -0.02118180f, -0.03942402f, -0.00775547f, -0.02433614f,
+        -0.03193943f, -0.02030828f, -0.04044014f, -0.01074016f, -0.01930822f,
+        -0.03620399f, -0.01974125f, -0.03919545f, -0.01456093f, -0.00045072f,
+        -0.00360110f, -0.01020207f, -0.01231907f, -0.00638988f, -0.00071592f,
+        -0.00279122f, -0.00957115f, -0.01288327f, -0.00730937f, -0.00107783f,
+        -0.00210156f, -0.00890705f, -0.01317668f, -0.00813895f, -0.00153491f,
+        -0.02128481f, -0.04173044f, -0.04831487f, -0.03293190f, -0.00525260f,
+        -0.01720322f, -0.04052736f, -0.05045706f, -0.03607317f, -0.00738030f,
+        -0.01341764f, -0.03965629f, -0.05151616f, -0.03814886f, -0.01005819f,
+        0.18968273f,  0.33063684f,  -0.01300105f, -0.01372950f, -0.04017465f,
+        0.13727832f,  0.36402234f,  0.01027890f,  -0.01832107f, -0.03365072f,
+        0.08734506f,  0.38194295f,  0.04338228f,  -0.02525993f, 0.56408126f,
+        0.00458352f,  -0.01648227f, -0.04887868f, 0.24585519f,  0.62026135f,
+        0.04314807f,  -0.02213737f, -0.04158014f, 0.16637289f,  0.65027023f,
+        0.09621636f,  -0.03101388f, -0.04082742f, -0.00904519f, -0.02790922f,
+        -0.02117818f, 0.00798662f,  -0.03995711f, -0.01243427f, -0.02231705f,
+        -0.02946266f, 0.00992055f,  -0.03600283f, -0.01684920f, -0.00111684f,
+        -0.00411204f, -0.01297130f, -0.01723725f, -0.01022545f, -0.00165306f,
+        -0.00313110f, -0.01218016f, -0.01763266f, -0.01125620f, -0.00231663f,
+        -0.01374149f, -0.03797620f, -0.05142937f, -0.03117307f, -0.00581914f,
+        -0.01064003f, -0.03608089f, -0.05272168f, -0.03375670f, -0.00795586f,
+        0.09628104f,  0.27129991f,  -0.00353779f, -0.01734151f, -0.03153981f,
+        0.05686230f,  0.28500998f,  0.02230594f,  -0.02374955f, 0.68214326f,
+        0.05018048f,  -0.02320852f, -0.04383616f, 0.18459474f,  0.71517975f,
+        0.10805613f,  -0.03263677f, -0.03637639f, -0.01394373f, -0.02511203f,
+        -0.01728636f, 0.05407331f,  -0.02867568f, -0.01893131f, -0.00240854f,
+        -0.00446511f, -0.01636187f, -0.02377053f, -0.01522848f, -0.00333334f,
+        -0.00819975f, -0.02964169f, -0.04499287f, -0.02745350f, -0.00612408f,
+        0.02727416f,  0.19446600f,  0.00159832f,  -0.02232473f, 0.74982506f,
+        0.11452620f,  -0.03348048f, -0.01605681f, -0.02070339f, -0.00458223f};
+    for (size_t i = 0; i < 210; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(kWeights8[i], &upsampling8_weights[i]));
+    }
+  }
+  return true;
+}
+
+ExtraChannelInfo::ExtraChannelInfo() { Bundle::Init(this); }
+Status ExtraChannelInfo::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  // General
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(ExtraChannel::kAlpha, &type));
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&bit_depth));
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->U32(Val(0), Val(3), Val(4), BitsOffset(3, 1), 0, &dim_shift));
+  if ((1U << dim_shift) > 8) {
+    return JXL_FAILURE("dim_shift %u too large", dim_shift);
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(VisitNameString(visitor, &name));
+
+  // Conditional
+  if (visitor->Conditional(type == ExtraChannel::kAlpha)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &alpha_associated));
+  }
+  if (visitor->Conditional(type == ExtraChannel::kSpotColor)) {
+    for (float& c : spot_color) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0, &c));
+    }
+  }
+  if (visitor->Conditional(type == ExtraChannel::kCFA)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(1), Bits(2), BitsOffset(4, 3),
+                                           BitsOffset(8, 19), 1, &cfa_channel));
+  }
+  return true;
+}
+
+ImageMetadata::ImageMetadata() { Bundle::Init(this); }
+Status ImageMetadata::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  // Bundle::AllDefault does not allow usage when reading (it may abort the
+  // program when a codestream has invalid values), but when reading we
+  // overwrite the extra_fields value, so do not need to call AllDefault.
+  bool tone_mapping_default =
+      visitor->IsReading() ? false : Bundle::AllDefault(tone_mapping);
+
+  bool extra_fields = (orientation != 1 || have_preview || have_animation ||
+                       have_intrinsic_size || !tone_mapping_default);
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &extra_fields));
+  if (visitor->Conditional(extra_fields)) {
+    orientation--;
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &orientation));
+    orientation++;
+    // (No need for bounds checking because we read exactly 3 bits)
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_intrinsic_size));
+    if (visitor->Conditional(have_intrinsic_size)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&intrinsic_size));
+    }
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_preview));
+    if (visitor->Conditional(have_preview)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&preview_size));
+    }
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_animation));
+    if (visitor->Conditional(have_animation)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&animation));
+    }
+  } else {
+    orientation = 1;  // identity
+    have_intrinsic_size = false;
+    have_preview = false;
+    have_animation = false;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&bit_depth));
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->Bool(true, &modular_16_bit_buffer_sufficient));
+
+  num_extra_channels = extra_channel_info.size();
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(4, 2),
+                                         BitsOffset(12, 1), 0,
+                                         &num_extra_channels));
+
+  if (visitor->Conditional(num_extra_channels != 0)) {
+    if (visitor->IsReading()) {
+      extra_channel_info.resize(num_extra_channels);
+    }
+    for (ExtraChannelInfo& eci : extra_channel_info) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&eci));
+    }
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &xyb_encoded));
+  JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&color_encoding));
+  if (visitor->Conditional(extra_fields)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&tone_mapping));
+  }
+
+  // Treat as if only the fields up to extra channels exist.
+  if (visitor->IsReading() && nonserialized_only_parse_basic_info) {
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+  // Extensions: in chronological order of being added to the format.
+  return visitor->EndExtensions();
+}
+
+OpsinInverseMatrix::OpsinInverseMatrix() { Bundle::Init(this); }
+Status OpsinInverseMatrix::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+  for (int i = 0; i < 9; ++i) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->F16(
+        DefaultInverseOpsinAbsorbanceMatrix()[i], &inverse_matrix[i]));
+  }
+  for (int i = 0; i < 3; ++i) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->F16(kNegOpsinAbsorbanceBiasRGB[i], &opsin_biases[i]));
+  }
+  for (int i = 0; i < 4; ++i) {
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->F16(kDefaultQuantBias[i], &quant_biases[i]));
+  }
+  return true;
+}
+
+ToneMapping::ToneMapping() { Bundle::Init(this); }
+Status ToneMapping::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(
+      visitor->F16(kDefaultIntensityTarget, &intensity_target));
+  if (intensity_target <= 0.f) {
+    return JXL_FAILURE("invalid intensity target");
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.0f, &min_nits));
+  if (min_nits < 0.f || min_nits > intensity_target) {
+    return JXL_FAILURE("invalid min %f vs max %f", min_nits, intensity_target);
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &relative_to_max_display));
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.0f, &linear_below));
+  if (linear_below < 0 || (relative_to_max_display && linear_below > 1.0f)) {
+    return JXL_FAILURE("invalid linear_below %f (%s)", linear_below,
+                       relative_to_max_display ? "relative" : "absolute");
+  }
+
+  return true;
+}
+
+Status ReadImageMetadata(BitReader* JXL_RESTRICT reader,
+                         ImageMetadata* JXL_RESTRICT metadata) {
+  return Bundle::Read(reader, metadata);
+}
+
+Status WriteImageMetadata(const ImageMetadata& metadata,
+                          BitWriter* JXL_RESTRICT writer, size_t layer,
+                          AuxOut* aux_out) {
+  return Bundle::Write(metadata, writer, layer, aux_out);
+}
+
+void ImageMetadata::SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied) {
+  std::vector<ExtraChannelInfo>& eciv = extra_channel_info;
+  ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha);
+  if (bits == 0) {
+    if (alpha != nullptr) {
+      // Remove the alpha channel from the extra channel info. It's
+      // theoretically possible that there are multiple, remove all in that
+      // case. This ensure a next HasAlpha() will return false.
+      const auto is_alpha = [](const ExtraChannelInfo& eci) {
+        return eci.type == ExtraChannel::kAlpha;
+      };
+      eciv.erase(std::remove_if(eciv.begin(), eciv.end(), is_alpha),
+                 eciv.end());
+    }
+  } else {
+    if (alpha == nullptr) {
+      ExtraChannelInfo info;
+      info.type = ExtraChannel::kAlpha;
+      info.bit_depth.bits_per_sample = bits;
+      info.dim_shift = 0;
+      info.alpha_associated = alpha_is_premultiplied;
+      // Prepend rather than append: in case there already are other extra
+      // channels, prefer alpha channel to be listed first.
+      eciv.insert(eciv.begin(), info);
+    } else {
+      // Ignores potential extra alpha channels, only sets to first one.
+      alpha->bit_depth.bits_per_sample = bits;
+      alpha->bit_depth.floating_point_sample = false;
+      alpha->bit_depth.exponent_bits_per_sample = 0;
+      alpha->alpha_associated = alpha_is_premultiplied;
+    }
+  }
+  num_extra_channels = extra_channel_info.size();
+  if (bits > 12) modular_16_bit_buffer_sufficient = false;
+}
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.h
new file mode 100644
index 0000000000..e5f7969215
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_metadata.h
@@ -0,0 +1,410 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Main codestream header bundles, the metadata that applies to all frames.
+
+#ifndef LIB_JXL_IMAGE_METADATA_H_
+#define LIB_JXL_IMAGE_METADATA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+// EXIF orientation of the image. This field overrides any field present in
+// actual EXIF metadata. The value tells which transformation the decoder must
+// apply after decoding to display the image with the correct orientation.
+enum class Orientation : uint32_t {
+  // Values 1..8 match the EXIF definitions.
+  kIdentity = 1,
+  kFlipHorizontal,
+  kRotate180,
+  kFlipVertical,
+  kTranspose,
+  kRotate90,
+  kAntiTranspose,
+  kRotate270,
+};
+// Don't need an EnumBits because Orientation is not read via Enum().
+
+enum class ExtraChannel : uint32_t {
+  // First two enumerators (most common) are cheaper to encode
+  kAlpha,
+  kDepth,
+
+  kSpotColor,
+  kSelectionMask,
+  kBlack,  // for CMYK
+  kCFA,    // Bayer channel
+  kThermal,
+  kReserved0,
+  kReserved1,
+  kReserved2,
+  kReserved3,
+  kReserved4,
+  kReserved5,
+  kReserved6,
+  kReserved7,
+  kUnknown,  // disambiguated via name string, raise warning if unsupported
+  kOptional  // like kUnknown but can silently be ignored
+};
+static inline const char* EnumName(ExtraChannel /*unused*/) {
+  return "ExtraChannel";
+}
+static inline constexpr uint64_t EnumBits(ExtraChannel /*unused*/) {
+  using EC = ExtraChannel;
+  return MakeBit(EC::kAlpha) | MakeBit(EC::kDepth) | MakeBit(EC::kSpotColor) |
+         MakeBit(EC::kSelectionMask) | MakeBit(EC::kBlack) | MakeBit(EC::kCFA) |
+         MakeBit(EC::kUnknown) | MakeBit(EC::kOptional);
+}
+
+// Used in ImageMetadata and ExtraChannelInfo.
+struct BitDepth : public Fields {
+  BitDepth();
+  const char* Name() const override { return "BitDepth"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Whether the original (uncompressed) samples are floating point or
+  // unsigned integer.
+  bool floating_point_sample;
+
+  // Bit depth of the original (uncompressed) image samples. Must be in the
+  // range [1, 32].
+  uint32_t bits_per_sample;
+
+  // Floating point exponent bits of the original (uncompressed) image samples,
+  // only used if floating_point_sample is true.
+  // If used, the samples are floating point with:
+  // - 1 sign bit
+  // - exponent_bits_per_sample exponent bits
+  // - (bits_per_sample - exponent_bits_per_sample - 1) mantissa bits
+  // If used, exponent_bits_per_sample must be in the range
+  // [2, 8] and amount of mantissa bits must be in the range [2, 23].
+  // NOTE: exponent_bits_per_sample is 8 for single precision binary32
+  // point, 5 for half precision binary16, 7 for fp24.
+  uint32_t exponent_bits_per_sample;
+};
+
+// Describes one extra channel.
+struct ExtraChannelInfo : public Fields {
+  ExtraChannelInfo();
+  const char* Name() const override { return "ExtraChannelInfo"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  mutable bool all_default;
+
+  ExtraChannel type;
+  BitDepth bit_depth;
+  uint32_t dim_shift;  // downsampled by 2^dim_shift on each axis
+
+  std::string name;  // UTF-8
+
+  // Conditional:
+  bool alpha_associated;  // i.e. premultiplied
+  float spot_color[4];    // spot color in linear RGBA
+  uint32_t cfa_channel;
+};
+
+struct OpsinInverseMatrix : public Fields {
+  OpsinInverseMatrix();
+  const char* Name() const override { return "OpsinInverseMatrix"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  mutable bool all_default;
+
+  float inverse_matrix[9];
+  float opsin_biases[3];
+  float quant_biases[4];
+};
+
+// Information useful for mapping HDR images to lower dynamic range displays.
+struct ToneMapping : public Fields {
+  ToneMapping();
+  const char* Name() const override { return "ToneMapping"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  mutable bool all_default;
+
+  // Upper bound on the intensity level present in the image. For unsigned
+  // integer pixel encodings, this is the brightness of the largest
+  // representable value. The image does not necessarily contain a pixel
+  // actually this bright. An encoder is allowed to set 255 for SDR images
+  // without computing a histogram.
+  float intensity_target;  // [nits]
+
+  // Lower bound on the intensity level present in the image. This may be
+  // loose, i.e. lower than the actual darkest pixel. When tone mapping, a
+  // decoder will map [min_nits, intensity_target] to the display range.
+  float min_nits;
+
+  bool relative_to_max_display;  // see below
+  // The tone mapping will leave unchanged (linear mapping) any pixels whose
+  // brightness is strictly below this. The interpretation depends on
+  // relative_to_max_display. If true, this is a ratio [0, 1] of the maximum
+  // display brightness [nits], otherwise an absolute brightness [nits].
+  float linear_below;
+};
+
+// Contains weights to customize some trasnforms - in particular, XYB and
+// upsampling.
+struct CustomTransformData : public Fields {
+  CustomTransformData();
+  const char* Name() const override { return "CustomTransformData"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Must be set before calling VisitFields. Must equal xyb_encoded of
+  // ImageMetadata, should be set by ImageMetadata during VisitFields.
+  bool nonserialized_xyb_encoded = false;
+
+  mutable bool all_default;
+
+  OpsinInverseMatrix opsin_inverse_matrix;
+
+  uint32_t custom_weights_mask;
+  float upsampling2_weights[15];
+  float upsampling4_weights[55];
+  float upsampling8_weights[210];
+};
+
+// Properties of the original image bundle. This enables Encode(Decode()) to
+// re-create an equivalent image without user input.
+struct ImageMetadata : public Fields {
+  ImageMetadata();
+  const char* Name() const override { return "ImageMetadata"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  // Returns bit depth of the JPEG XL compressed alpha channel, or 0 if no alpha
+  // channel present. In the theoretical case that there are multiple alpha
+  // channels, returns the bit depht of the first.
+  uint32_t GetAlphaBits() const {
+    const ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha);
+    if (alpha == nullptr) return 0;
+    JXL_ASSERT(alpha->bit_depth.bits_per_sample != 0);
+    return alpha->bit_depth.bits_per_sample;
+  }
+
+  // Sets bit depth of alpha channel, adding extra channel if needed, or
+  // removing all alpha channels if bits is 0.
+  // Assumes integer alpha channel and not designed to support multiple
+  // alpha channels (it's possible to use those features by manipulating
+  // extra_channel_info directly).
+  //
+  // Callers must insert the actual channel image at the same index before any
+  // further modifications to extra_channel_info.
+  void SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied = false);
+
+  bool HasAlpha() const { return GetAlphaBits() != 0; }
+
+  // Sets the original bit depth fields to indicate unsigned integer of the
+  // given bit depth.
+  // TODO(lode): move function to BitDepth
+  void SetUintSamples(uint32_t bits) {
+    bit_depth.bits_per_sample = bits;
+    bit_depth.exponent_bits_per_sample = 0;
+    bit_depth.floating_point_sample = false;
+    // RCT / Squeeze may add one bit each, and this is about int16_t,
+    // so uint13 should still be OK but limiting it to 12 seems safer.
+    // TODO(jon): figure out a better way to set this header field.
+    // (in particular, if modular mode is not used it doesn't matter,
+    // and if transforms are restricted, up to 15-bit could be done)
+    if (bits > 12) modular_16_bit_buffer_sufficient = false;
+  }
+  // Sets the original bit depth fields to indicate single precision floating
+  // point.
+  // TODO(lode): move function to BitDepth
+  void SetFloat32Samples() {
+    bit_depth.bits_per_sample = 32;
+    bit_depth.exponent_bits_per_sample = 8;
+    bit_depth.floating_point_sample = true;
+    modular_16_bit_buffer_sufficient = false;
+  }
+
+  void SetFloat16Samples() {
+    bit_depth.bits_per_sample = 16;
+    bit_depth.exponent_bits_per_sample = 5;
+    bit_depth.floating_point_sample = true;
+    modular_16_bit_buffer_sufficient = false;
+  }
+
+  void SetIntensityTarget(float intensity_target) {
+    tone_mapping.intensity_target = intensity_target;
+  }
+  float IntensityTarget() const {
+    JXL_ASSERT(tone_mapping.intensity_target != 0);
+    return tone_mapping.intensity_target;
+  }
+
+  // Returns first ExtraChannelInfo of the given type, or nullptr if none.
+  const ExtraChannelInfo* Find(ExtraChannel type) const {
+    for (const ExtraChannelInfo& eci : extra_channel_info) {
+      if (eci.type == type) return &eci;
+    }
+    return nullptr;
+  }
+
+  // Returns first ExtraChannelInfo of the given type, or nullptr if none.
+  ExtraChannelInfo* Find(ExtraChannel type) {
+    for (ExtraChannelInfo& eci : extra_channel_info) {
+      if (eci.type == type) return &eci;
+    }
+    return nullptr;
+  }
+
+  Orientation GetOrientation() const {
+    return static_cast<Orientation>(orientation);
+  }
+
+  bool ExtraFieldsDefault() const;
+
+  mutable bool all_default;
+
+  BitDepth bit_depth;
+  bool modular_16_bit_buffer_sufficient;  // otherwise 32 is.
+
+  // Whether the colors values of the pixels of frames are encoded in the
+  // codestream using the absolute XYB color space, or the using values that
+  // follow the color space defined by the ColorEncoding or ICC profile. This
+  // determines when or whether a CMS (Color Management System) is needed to get
+  // the pixels in a desired color space. In one case, the pixels have one known
+  // color space and a CMS is needed to convert them to the original image's
+  // color space, in the other case the pixels have the color space of the
+  // original image and a CMS is required if a different display space, or a
+  // single known consistent color space for multiple decoded images, is
+  // desired. In all cases, the color space of all frames from a single image is
+  // the same, both VarDCT and modular frames.
+  //
+  // If true: then frames can be decoded to XYB (which can also be converted to
+  // linear and non-linear sRGB with the built in conversion without CMS). The
+  // attached ColorEncoding or ICC profile has no effect on the meaning of the
+  // pixel's color values, but instead indicates what the color profile of the
+  // original image was, and what color profile one should convert to when
+  // decoding to integers to prevent clipping and precision loss. To do that
+  // conversion requires a CMS.
+  //
+  // If false: then the color values of decoded frames are in the space defined
+  // by the attached ColorEncoding or ICC profile. To instead get the pixels in
+  // a chosen known color space, such as sRGB, requires a CMS, since the
+  // attached ColorEncoding or ICC profile could be any arbitrary color space.
+  // This mode is typically used for lossless images encoded as integers.
+  // Frames can also use YCbCr encoding, some frames may and some may not, but
+  // this is not a different color space but a certain encoding of the RGB
+  // values.
+  //
+  // Note: if !xyb_encoded, but the attached color profile indicates XYB (which
+  // can happen either if it's a ColorEncoding with color_space_ ==
+  // ColorSpace::kXYB, or if it's an ICC Profile that has been crafted to
+  // represent XYB), then the frames still may not use ColorEncoding kXYB, they
+  // must still use kNone (or kYCbCr, which would mean applying the YCbCr
+  // transform to the 3-channel XYB data), since with !xyb_encoded, the 3
+  // channels are stored as-is, no matter what meaning the color profile assigns
+  // to them. To use ColorEncoding::kXYB, xyb_encoded must be true.
+  //
+  // This value is defined in image metadata because this is the global
+  // codestream header. This value does not affect the image itself, so is not
+  // image metadata per se, it only affects the encoding, and what color space
+  // the decoder can receive the pixels in without needing a CMS.
+  bool xyb_encoded;
+
+  ColorEncoding color_encoding;
+
+  // These values are initialized to defaults such that the 'extra_fields'
+  // condition in VisitFields uses correctly initialized values.
+  uint32_t orientation = 1;
+  bool have_preview = false;
+  bool have_animation = false;
+  bool have_intrinsic_size = false;
+
+  // If present, the stored image has the dimensions of the first SizeHeader,
+  // but decoders are advised to resample or display per `intrinsic_size`.
+  SizeHeader intrinsic_size;  // only if have_intrinsic_size
+
+  ToneMapping tone_mapping;
+
+  // When reading: deserialized. When writing: automatically set from vector.
+  uint32_t num_extra_channels;
+  std::vector<ExtraChannelInfo> extra_channel_info;
+
+  // Only present if m.have_preview.
+  PreviewHeader preview_size;
+  // Only present if m.have_animation.
+  AnimationHeader animation;
+
+  uint64_t extensions;
+
+  // Option to stop parsing after basic info, and treat as if the later
+  // fields do not participate. Use to parse only basic image information
+  // excluding the final larger or variable sized data.
+  bool nonserialized_only_parse_basic_info = false;
+};
+
+Status ReadImageMetadata(BitReader* JXL_RESTRICT reader,
+                         ImageMetadata* JXL_RESTRICT metadata);
+
+Status WriteImageMetadata(const ImageMetadata& metadata,
+                          BitWriter* JXL_RESTRICT writer, size_t layer,
+                          AuxOut* aux_out);
+
+// All metadata applicable to the entire codestream (dimensions, extra channels,
+// ...)
+struct CodecMetadata {
+  // TODO(lode): use the preview and animation fields too, in place of the
+  // nonserialized_ ones in ImageMetadata.
+  ImageMetadata m;
+  // The size of the codestream: this is the nominal size applicable to all
+  // frames, although some frames can have a different effective size through
+  // crop, dc_level or representing a the preview.
+  SizeHeader size;
+  // Often default.
+  CustomTransformData transform_data;
+
+  size_t xsize() const { return size.xsize(); }
+  size_t ysize() const { return size.ysize(); }
+  size_t oriented_xsize(bool keep_orientation) const {
+    if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+      return ysize();
+    } else {
+      return xsize();
+    }
+  }
+  size_t oriented_preview_xsize(bool keep_orientation) const {
+    if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+      return m.preview_size.ysize();
+    } else {
+      return m.preview_size.xsize();
+    }
+  }
+  size_t oriented_ysize(bool keep_orientation) const {
+    if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+      return xsize();
+    } else {
+      return ysize();
+    }
+  }
+  size_t oriented_preview_ysize(bool keep_orientation) const {
+    if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+      return m.preview_size.xsize();
+    } else {
+      return m.preview_size.ysize();
+    }
+  }
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_METADATA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_ops.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_ops.h
new file mode 100644
index 0000000000..f3c2b5995e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_ops.h
@@ -0,0 +1,814 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_OPS_H_
+#define LIB_JXL_IMAGE_OPS_H_
+
+// Operations on images.
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <vector>
+
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+void CopyImageTo(const Plane<T>& from, Plane<T>* JXL_RESTRICT to) {
+  PROFILER_ZONE("CopyImage1");
+  JXL_ASSERT(SameSize(from, *to));
+  if (from.ysize() == 0 || from.xsize() == 0) return;
+  for (size_t y = 0; y < from.ysize(); ++y) {
+    const T* JXL_RESTRICT row_from = from.ConstRow(y);
+    T* JXL_RESTRICT row_to = to->Row(y);
+    memcpy(row_to, row_from, from.xsize() * sizeof(T));
+  }
+}
+
+// DEPRECATED - prefer to preallocate result.
+template <typename T>
+Plane<T> CopyImage(const Plane<T>& from) {
+  Plane<T> to(from.xsize(), from.ysize());
+  CopyImageTo(from, &to);
+  return to;
+}
+
+// Copies `from:rect_from` to `to:rect_to`.
+template <typename T>
+void CopyImageTo(const Rect& rect_from, const Plane<T>& from,
+                 const Rect& rect_to, Plane<T>* JXL_RESTRICT to) {
+  PROFILER_ZONE("CopyImageR");
+  JXL_DASSERT(SameSize(rect_from, rect_to));
+  JXL_DASSERT(rect_from.IsInside(from));
+  JXL_DASSERT(rect_to.IsInside(*to));
+  if (rect_from.xsize() == 0) return;
+  for (size_t y = 0; y < rect_from.ysize(); ++y) {
+    const T* JXL_RESTRICT row_from = rect_from.ConstRow(from, y);
+    T* JXL_RESTRICT row_to = rect_to.Row(to, y);
+    memcpy(row_to, row_from, rect_from.xsize() * sizeof(T));
+  }
+}
+
+// DEPRECATED - Returns a copy of the "image" pixels that lie in "rect".
+template <typename T>
+Plane<T> CopyImage(const Rect& rect, const Plane<T>& image) {
+  Plane<T> copy(rect.xsize(), rect.ysize());
+  CopyImageTo(rect, image, &copy);
+  return copy;
+}
+
+// Copies `from:rect_from` to `to:rect_to`.
+template <typename T>
+void CopyImageTo(const Rect& rect_from, const Image3<T>& from,
+                 const Rect& rect_to, Image3<T>* JXL_RESTRICT to) {
+  PROFILER_ZONE("CopyImageR");
+  JXL_ASSERT(SameSize(rect_from, rect_to));
+  for (size_t c = 0; c < 3; c++) {
+    CopyImageTo(rect_from, from.Plane(c), rect_to, &to->Plane(c));
+  }
+}
+
+template <typename T, typename U>
+void ConvertPlaneAndClamp(const Rect& rect_from, const Plane<T>& from,
+                          const Rect& rect_to, Plane<U>* JXL_RESTRICT to) {
+  PROFILER_ZONE("ConvertPlane");
+  JXL_ASSERT(SameSize(rect_from, rect_to));
+  using M = decltype(T() + U());
+  for (size_t y = 0; y < rect_to.ysize(); ++y) {
+    const T* JXL_RESTRICT row_from = rect_from.ConstRow(from, y);
+    U* JXL_RESTRICT row_to = rect_to.Row(to, y);
+    for (size_t x = 0; x < rect_to.xsize(); ++x) {
+      row_to[x] =
+          std::min<M>(std::max<M>(row_from[x], std::numeric_limits<U>::min()),
+                      std::numeric_limits<U>::max());
+    }
+  }
+}
+
+// Copies `from` to `to`.
+template <typename T>
+void CopyImageTo(const T& from, T* JXL_RESTRICT to) {
+  return CopyImageTo(Rect(from), from, Rect(*to), to);
+}
+
+// Copies `from:rect_from` to `to`.
+template <typename T>
+void CopyImageTo(const Rect& rect_from, const T& from, T* JXL_RESTRICT to) {
+  return CopyImageTo(rect_from, from, Rect(*to), to);
+}
+
+// Copies `from` to `to:rect_to`.
+template <typename T>
+void CopyImageTo(const T& from, const Rect& rect_to, T* JXL_RESTRICT to) {
+  return CopyImageTo(Rect(from), from, rect_to, to);
+}
+
+// Copies `from:rect_from` to `to:rect_to`; also copies `padding` pixels of
+// border around `from:rect_from`, in all directions, whenever they are inside
+// the first image.
+template <typename T>
+void CopyImageToWithPadding(const Rect& from_rect, const T& from,
+                            size_t padding, const Rect& to_rect, T* to) {
+  size_t xextra0 = std::min(padding, from_rect.x0());
+  size_t xextra1 =
+      std::min(padding, from.xsize() - from_rect.x0() - from_rect.xsize());
+  size_t yextra0 = std::min(padding, from_rect.y0());
+  size_t yextra1 =
+      std::min(padding, from.ysize() - from_rect.y0() - from_rect.ysize());
+  JXL_DASSERT(to_rect.x0() >= xextra0);
+  JXL_DASSERT(to_rect.y0() >= yextra0);
+
+  return CopyImageTo(Rect(from_rect.x0() - xextra0, from_rect.y0() - yextra0,
+                          from_rect.xsize() + xextra0 + xextra1,
+                          from_rect.ysize() + yextra0 + yextra1),
+                     from,
+                     Rect(to_rect.x0() - xextra0, to_rect.y0() - yextra0,
+                          to_rect.xsize() + xextra0 + xextra1,
+                          to_rect.ysize() + yextra0 + yextra1),
+                     to);
+}
+
+// DEPRECATED - prefer to preallocate result.
+template <typename T>
+Image3<T> CopyImage(const Image3<T>& from) {
+  Image3<T> copy(from.xsize(), from.ysize());
+  CopyImageTo(from, &copy);
+  return copy;
+}
+
+// DEPRECATED - prefer to preallocate result.
+template <typename T>
+Image3<T> CopyImage(const Rect& rect, const Image3<T>& from) {
+  Image3<T> to(rect.xsize(), rect.ysize());
+  CopyImageTo(rect, from.Plane(0), to.Plane(0));
+  CopyImageTo(rect, from.Plane(1), to.Plane(1));
+  CopyImageTo(rect, from.Plane(2), to.Plane(2));
+  return to;
+}
+
+// Sets "thickness" pixels on each border to "value". This is faster than
+// initializing the entire image and overwriting valid/interior pixels.
+template <typename T>
+void SetBorder(const size_t thickness, const T value, Image3<T>* image) {
+  const size_t xsize = image->xsize();
+  const size_t ysize = image->ysize();
+  // Top: fill entire row
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < std::min(thickness, ysize); ++y) {
+      T* JXL_RESTRICT row = image->PlaneRow(c, y);
+      std::fill(row, row + xsize, value);
+    }
+
+    // Bottom: fill entire row
+    for (size_t y = ysize - thickness; y < ysize; ++y) {
+      T* JXL_RESTRICT row = image->PlaneRow(c, y);
+      std::fill(row, row + xsize, value);
+    }
+
+    // Left/right: fill the 'columns' on either side, but only if the image is
+    // big enough that they don't already belong to the top/bottom rows.
+    if (ysize >= 2 * thickness) {
+      for (size_t y = thickness; y < ysize - thickness; ++y) {
+        T* JXL_RESTRICT row = image->PlaneRow(c, y);
+        std::fill(row, row + thickness, value);
+        std::fill(row + xsize - thickness, row + xsize, value);
+      }
+    }
+  }
+}
+
+template <class ImageIn, class ImageOut>
+void Subtract(const ImageIn& image1, const ImageIn& image2, ImageOut* out) {
+  using T = typename ImageIn::T;
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  JXL_CHECK(xsize == image2.xsize());
+  JXL_CHECK(ysize == image2.ysize());
+
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* const JXL_RESTRICT row1 = image1.Row(y);
+    const T* const JXL_RESTRICT row2 = image2.Row(y);
+    T* const JXL_RESTRICT row_out = out->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out[x] = row1[x] - row2[x];
+    }
+  }
+}
+
+// In-place.
+template <typename Tin, typename Tout>
+void SubtractFrom(const Plane<Tin>& what, Plane<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    const Tin* JXL_RESTRICT row_what = what.ConstRow(y);
+    Tout* JXL_RESTRICT row_to = to->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_to[x] -= row_what[x];
+    }
+  }
+}
+
+// In-place.
+template <typename Tin, typename Tout>
+void AddTo(const Plane<Tin>& what, Plane<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    const Tin* JXL_RESTRICT row_what = what.ConstRow(y);
+    Tout* JXL_RESTRICT row_to = to->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_to[x] += row_what[x];
+    }
+  }
+}
+
+template <typename Tin, typename Tout>
+void AddTo(Rect rectFrom, const Plane<Tin>& what, Rect rectTo,
+           Plane<Tout>* to) {
+  JXL_ASSERT(SameSize(rectFrom, rectTo));
+  const size_t xsize = rectTo.xsize();
+  const size_t ysize = rectTo.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    const Tin* JXL_RESTRICT row_what = rectFrom.ConstRow(what, y);
+    Tout* JXL_RESTRICT row_to = rectTo.Row(to, y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_to[x] += row_what[x];
+    }
+  }
+}
+
+// Returns linear combination of two grayscale images.
+template <typename T>
+Plane<T> LinComb(const T lambda1, const Plane<T>& image1, const T lambda2,
+                 const Plane<T>& image2) {
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  JXL_CHECK(xsize == image2.xsize());
+  JXL_CHECK(ysize == image2.ysize());
+  Plane<T> out(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* const JXL_RESTRICT row1 = image1.Row(y);
+    const T* const JXL_RESTRICT row2 = image2.Row(y);
+    T* const JXL_RESTRICT row_out = out.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out[x] = lambda1 * row1[x] + lambda2 * row2[x];
+    }
+  }
+  return out;
+}
+
+// Returns a pixel-by-pixel multiplication of image by lambda.
+template <typename T>
+Plane<T> ScaleImage(const T lambda, const Plane<T>& image) {
+  Plane<T> out(image.xsize(), image.ysize());
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const T* const JXL_RESTRICT row = image.Row(y);
+    T* const JXL_RESTRICT row_out = out.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      row_out[x] = lambda * row[x];
+    }
+  }
+  return out;
+}
+
+// Multiplies image by lambda in-place
+template <typename T>
+void ScaleImage(const T lambda, Plane<T>* image) {
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row[x] = lambda * row[x];
+    }
+  }
+}
+
+template <typename T>
+Plane<T> Product(const Plane<T>& a, const Plane<T>& b) {
+  Plane<T> c(a.xsize(), a.ysize());
+  for (size_t y = 0; y < a.ysize(); ++y) {
+    const T* const JXL_RESTRICT row_a = a.Row(y);
+    const T* const JXL_RESTRICT row_b = b.Row(y);
+    T* const JXL_RESTRICT row_c = c.Row(y);
+    for (size_t x = 0; x < a.xsize(); ++x) {
+      row_c[x] = row_a[x] * row_b[x];
+    }
+  }
+  return c;
+}
+
+float DotProduct(const ImageF& a, const ImageF& b);
+
+template <typename T>
+void FillImage(const T value, Plane<T>* image) {
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row[x] = value;
+    }
+  }
+}
+
+template <typename T>
+void ZeroFillImage(Plane<T>* image) {
+  if (image->xsize() == 0) return;
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    memset(row, 0, image->xsize() * sizeof(T));
+  }
+}
+
+// Mirrors out of bounds coordinates and returns valid coordinates unchanged.
+// We assume the radius (distance outside the image) is small compared to the
+// image size, otherwise this might not terminate.
+// The mirror is outside the last column (border pixel is also replicated).
+static inline int64_t Mirror(int64_t x, const int64_t xsize) {
+  JXL_DASSERT(xsize != 0);
+
+  // TODO(janwas): replace with branchless version
+  while (x < 0 || x >= xsize) {
+    if (x < 0) {
+      x = -x - 1;
+    } else {
+      x = 2 * xsize - 1 - x;
+    }
+  }
+  return x;
+}
+
+// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
+
+// Mirrors (repeating the edge pixel once). Useful for convolutions.
+struct WrapMirror {
+  JXL_INLINE int64_t operator()(const int64_t coord, const int64_t size) const {
+    return Mirror(coord, size);
+  }
+};
+
+// Returns the same coordinate: required for TFNode with Border(), or useful
+// when we know "coord" is already valid (e.g. interior of an image).
+struct WrapUnchanged {
+  JXL_INLINE int64_t operator()(const int64_t coord, int64_t /*size*/) const {
+    return coord;
+  }
+};
+
+// Similar to Wrap* but for row pointers (reduces Row() multiplications).
+
+class WrapRowMirror {
+ public:
+  template <class ImageOrView>
+  WrapRowMirror(const ImageOrView& image, size_t ysize)
+      : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
+
+  const float* operator()(const float* const JXL_RESTRICT row,
+                          const int64_t stride) const {
+    if (row < first_row_) {
+      const int64_t num_before = first_row_ - row;
+      // Mirrored; one row before => row 0, two before = row 1, ...
+      return first_row_ + num_before - stride;
+    }
+    if (row > last_row_) {
+      const int64_t num_after = row - last_row_;
+      // Mirrored; one row after => last row, two after = last - 1, ...
+      return last_row_ - num_after + stride;
+    }
+    return row;
+  }
+
+ private:
+  const float* const JXL_RESTRICT first_row_;
+  const float* const JXL_RESTRICT last_row_;
+};
+
+struct WrapRowUnchanged {
+  JXL_INLINE const float* operator()(const float* const JXL_RESTRICT row,
+                                     int64_t /*stride*/) const {
+    return row;
+  }
+};
+
+// Sets "thickness" pixels on each border to "value". This is faster than
+// initializing the entire image and overwriting valid/interior pixels.
+template <typename T>
+void SetBorder(const size_t thickness, const T value, Plane<T>* image) {
+  const size_t xsize = image->xsize();
+  const size_t ysize = image->ysize();
+  // Top: fill entire row
+  for (size_t y = 0; y < std::min(thickness, ysize); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    std::fill(row, row + xsize, value);
+  }
+
+  // Bottom: fill entire row
+  for (size_t y = ysize - thickness; y < ysize; ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    std::fill(row, row + xsize, value);
+  }
+
+  // Left/right: fill the 'columns' on either side, but only if the image is
+  // big enough that they don't already belong to the top/bottom rows.
+  if (ysize >= 2 * thickness) {
+    for (size_t y = thickness; y < ysize - thickness; ++y) {
+      T* const JXL_RESTRICT row = image->Row(y);
+      std::fill(row, row + thickness, value);
+      std::fill(row + xsize - thickness, row + xsize, value);
+    }
+  }
+}
+
+// Computes the minimum and maximum pixel value.
+template <typename T>
+void ImageMinMax(const Plane<T>& image, T* const JXL_RESTRICT min,
+                 T* const JXL_RESTRICT max) {
+  *min = std::numeric_limits<T>::max();
+  *max = std::numeric_limits<T>::lowest();
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const T* const JXL_RESTRICT row = image.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      *min = std::min(*min, row[x]);
+      *max = std::max(*max, row[x]);
+    }
+  }
+}
+
+// Copies pixels, scaling their value relative to the "from" min/max by
+// "to_range". Example: U8 [0, 255] := [0.0, 1.0], to_range = 1.0 =>
+// outputs [0.0, 1.0].
+template <typename FromType, typename ToType>
+void ImageConvert(const Plane<FromType>& from, const float to_range,
+                  Plane<ToType>* const JXL_RESTRICT to) {
+  JXL_ASSERT(SameSize(from, *to));
+  FromType min_from, max_from;
+  ImageMinMax(from, &min_from, &max_from);
+  const float scale = to_range / (max_from - min_from);
+  for (size_t y = 0; y < from.ysize(); ++y) {
+    const FromType* const JXL_RESTRICT row_from = from.Row(y);
+    ToType* const JXL_RESTRICT row_to = to->Row(y);
+    for (size_t x = 0; x < from.xsize(); ++x) {
+      row_to[x] = static_cast<ToType>((row_from[x] - min_from) * scale);
+    }
+  }
+}
+
+template <typename From>
+Plane<float> ConvertToFloat(const Plane<From>& from) {
+  float factor = 1.0f / std::numeric_limits<From>::max();
+  if (std::is_same<From, double>::value || std::is_same<From, float>::value) {
+    factor = 1.0f;
+  }
+  Plane<float> to(from.xsize(), from.ysize());
+  for (size_t y = 0; y < from.ysize(); ++y) {
+    const From* const JXL_RESTRICT row_from = from.Row(y);
+    float* const JXL_RESTRICT row_to = to.Row(y);
+    for (size_t x = 0; x < from.xsize(); ++x) {
+      row_to[x] = row_from[x] * factor;
+    }
+  }
+  return to;
+}
+
+template <typename T>
+Plane<T> ImageFromPacked(const std::vector<T>& packed, const size_t xsize,
+                         const size_t ysize) {
+  Plane<T> out(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    T* const JXL_RESTRICT row = out.Row(y);
+    const T* const JXL_RESTRICT packed_row = &packed[y * xsize];
+    memcpy(row, packed_row, xsize * sizeof(T));
+  }
+  return out;
+}
+
+// Computes independent minimum and maximum values for each plane.
+template <typename T>
+void Image3MinMax(const Image3<T>& image, const Rect& rect,
+                  std::array<T, 3>* out_min, std::array<T, 3>* out_max) {
+  for (size_t c = 0; c < 3; ++c) {
+    T min = std::numeric_limits<T>::max();
+    T max = std::numeric_limits<T>::min();
+    for (size_t y = 0; y < rect.ysize(); ++y) {
+      const T* JXL_RESTRICT row = rect.ConstPlaneRow(image, c, y);
+      for (size_t x = 0; x < rect.xsize(); ++x) {
+        min = std::min(min, row[x]);
+        max = std::max(max, row[x]);
+      }
+    }
+    (*out_min)[c] = min;
+    (*out_max)[c] = max;
+  }
+}
+
+// Computes independent minimum and maximum values for each plane.
+template <typename T>
+void Image3MinMax(const Image3<T>& image, std::array<T, 3>* out_min,
+                  std::array<T, 3>* out_max) {
+  Image3MinMax(image, Rect(image), out_min, out_max);
+}
+
+template <typename T>
+void Image3Max(const Image3<T>& image, std::array<T, 3>* out_max) {
+  for (size_t c = 0; c < 3; ++c) {
+    T max = std::numeric_limits<T>::min();
+    for (size_t y = 0; y < image.ysize(); ++y) {
+      const T* JXL_RESTRICT row = image.ConstPlaneRow(c, y);
+      for (size_t x = 0; x < image.xsize(); ++x) {
+        max = std::max(max, row[x]);
+      }
+    }
+    (*out_max)[c] = max;
+  }
+}
+
+// Computes the sum of the pixels in `rect`.
+template <typename T>
+T ImageSum(const Plane<T>& image, const Rect& rect) {
+  T result = 0;
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    const T* JXL_RESTRICT row = rect.ConstRow(image, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      result += row[x];
+    }
+  }
+  return result;
+}
+
+template <typename T>
+T ImageSum(const Plane<T>& image) {
+  return ImageSum(image, Rect(image));
+}
+
+template <typename T>
+std::array<T, 3> Image3Sum(const Image3<T>& image, const Rect& rect) {
+  std::array<T, 3> out_sum = 0;
+  for (size_t c = 0; c < 3; ++c) {
+    (out_sum)[c] = ImageSum(image.Plane(c), rect);
+  }
+  return out_sum;
+}
+
+template <typename T>
+std::array<T, 3> Image3Sum(const Image3<T>& image) {
+  return Image3Sum(image, Rect(image));
+}
+
+template <typename T>
+std::vector<T> PackedFromImage(const Plane<T>& image, const Rect& rect) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  std::vector<T> packed(xsize * ysize);
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    memcpy(&packed[y * xsize], rect.ConstRow(image, y), xsize * sizeof(T));
+  }
+  return packed;
+}
+
+template <typename T>
+std::vector<T> PackedFromImage(const Plane<T>& image) {
+  return PackedFromImage(image, Rect(image));
+}
+
+// Computes the median pixel value.
+template <typename T>
+T ImageMedian(const Plane<T>& image, const Rect& rect) {
+  std::vector<T> pixels = PackedFromImage(image, rect);
+  return Median(&pixels);
+}
+
+template <typename T>
+T ImageMedian(const Plane<T>& image) {
+  return ImageMedian(image, Rect(image));
+}
+
+template <typename T>
+std::array<T, 3> Image3Median(const Image3<T>& image, const Rect& rect) {
+  std::array<T, 3> out_median;
+  for (size_t c = 0; c < 3; ++c) {
+    (out_median)[c] = ImageMedian(image.Plane(c), rect);
+  }
+  return out_median;
+}
+
+template <typename T>
+std::array<T, 3> Image3Median(const Image3<T>& image) {
+  return Image3Median(image, Rect(image));
+}
+
+template <typename FromType, typename ToType>
+void Image3Convert(const Image3<FromType>& from, const float to_range,
+                   Image3<ToType>* const JXL_RESTRICT to) {
+  JXL_ASSERT(SameSize(from, *to));
+  std::array<FromType, 3> min_from, max_from;
+  Image3MinMax(from, &min_from, &max_from);
+  float scales[3];
+  for (size_t c = 0; c < 3; ++c) {
+    scales[c] = to_range / (max_from[c] - min_from[c]);
+  }
+  float scale = std::min(scales[0], std::min(scales[1], scales[2]));
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < from.ysize(); ++y) {
+      const FromType* JXL_RESTRICT row_from = from.ConstPlaneRow(c, y);
+      ToType* JXL_RESTRICT row_to = to->PlaneRow(c, y);
+      for (size_t x = 0; x < from.xsize(); ++x) {
+        const float to = (row_from[x] - min_from[c]) * scale;
+        row_to[x] = static_cast<ToType>(to);
+      }
+    }
+  }
+}
+
+template <typename From>
+Image3F ConvertToFloat(const Image3<From>& from) {
+  return Image3F(ConvertToFloat(from.Plane(0)), ConvertToFloat(from.Plane(1)),
+                 ConvertToFloat(from.Plane(2)));
+}
+
+template <typename Tin, typename Tout>
+void Subtract(const Image3<Tin>& image1, const Image3<Tin>& image2,
+              Image3<Tout>* out) {
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  JXL_CHECK(xsize == image2.xsize());
+  JXL_CHECK(ysize == image2.ysize());
+
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const Tin* const JXL_RESTRICT row1 = image1.ConstPlaneRow(c, y);
+      const Tin* const JXL_RESTRICT row2 = image2.ConstPlaneRow(c, y);
+      Tout* const JXL_RESTRICT row_out = out->PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] = row1[x] - row2[x];
+      }
+    }
+  }
+}
+
+template <typename Tin, typename Tout>
+void SubtractFrom(const Image3<Tin>& what, Image3<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const Tin* JXL_RESTRICT row_what = what.ConstPlaneRow(c, y);
+      Tout* JXL_RESTRICT row_to = to->PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_to[x] -= row_what[x];
+      }
+    }
+  }
+}
+
+template <typename Tin, typename Tout>
+void AddTo(const Image3<Tin>& what, Image3<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const Tin* JXL_RESTRICT row_what = what.ConstPlaneRow(c, y);
+      Tout* JXL_RESTRICT row_to = to->PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_to[x] += row_what[x];
+      }
+    }
+  }
+}
+
+// Adds `what` of the size of `rect` to `to` in the position of `rect`.
+template <typename Tin, typename Tout>
+void AddTo(const Rect& rect, const Image3<Tin>& what, Image3<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  JXL_ASSERT(xsize == rect.xsize());
+  JXL_ASSERT(ysize == rect.ysize());
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const Tin* JXL_RESTRICT row_what = what.ConstPlaneRow(c, y);
+      Tout* JXL_RESTRICT row_to = rect.PlaneRow(to, c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_to[x] += row_what[x];
+      }
+    }
+  }
+}
+
+template <typename T>
+Image3<T> ScaleImage(const T lambda, const Image3<T>& image) {
+  Image3<T> out(image.xsize(), image.ysize());
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image.ysize(); ++y) {
+      const T* JXL_RESTRICT row = image.ConstPlaneRow(c, y);
+      T* JXL_RESTRICT row_out = out.PlaneRow(c, y);
+      for (size_t x = 0; x < image.xsize(); ++x) {
+        row_out[x] = lambda * row[x];
+      }
+    }
+  }
+  return out;
+}
+
+// Multiplies image by lambda in-place
+template <typename T>
+void ScaleImage(const T lambda, Image3<T>* image) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      T* const JXL_RESTRICT row = image->PlaneRow(c, y);
+      for (size_t x = 0; x < image->xsize(); ++x) {
+        row[x] = lambda * row[x];
+      }
+    }
+  }
+}
+
+// Initializes all planes to the same "value".
+template <typename T>
+void FillImage(const T value, Image3<T>* image) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      T* JXL_RESTRICT row = image->PlaneRow(c, y);
+      for (size_t x = 0; x < image->xsize(); ++x) {
+        row[x] = value;
+      }
+    }
+  }
+}
+
+template <typename T>
+void FillPlane(const T value, Plane<T>* image) {
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* JXL_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row[x] = value;
+    }
+  }
+}
+
+template <typename T>
+void FillImage(const T value, Image3<T>* image, Rect rect) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < rect.ysize(); ++y) {
+      T* JXL_RESTRICT row = rect.PlaneRow(image, c, y);
+      for (size_t x = 0; x < rect.xsize(); ++x) {
+        row[x] = value;
+      }
+    }
+  }
+}
+
+template <typename T>
+void FillPlane(const T value, Plane<T>* image, Rect rect) {
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    T* JXL_RESTRICT row = rect.Row(image, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      row[x] = value;
+    }
+  }
+}
+
+template <typename T>
+void ZeroFillImage(Image3<T>* image) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      T* JXL_RESTRICT row = image->PlaneRow(c, y);
+      memset(row, 0, image->xsize() * sizeof(T));
+    }
+  }
+}
+
+template <typename T>
+void ZeroFillPlane(Plane<T>* image, Rect rect) {
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    T* JXL_RESTRICT row = rect.Row(image, y);
+    memset(row, 0, rect.xsize() * sizeof(T));
+  }
+}
+
+// First, image is padded horizontally, with the rightmost value.
+// Next, image is padded vertically, by repeating the last line.
+ImageF PadImage(const ImageF& in, size_t xsize, size_t ysize);
+
+// Pad an image with xborder columns on each vertical side and yboder rows
+// above and below, mirroring the image.
+Image3F PadImageMirror(const Image3F& in, size_t xborder, size_t yborder);
+
+// First, image is padded horizontally, with the rightmost value.
+// Next, image is padded vertically, by repeating the last line.
+// Prefer PadImageToBlockMultipleInPlace if padding to kBlockDim.
+Image3F PadImageToMultiple(const Image3F& in, size_t N);
+
+// Same as above, but operates in-place. Assumes that the `in` image was
+// allocated large enough.
+void PadImageToBlockMultipleInPlace(Image3F* JXL_RESTRICT in);
+
+// Downsamples an image by a given factor.
+void DownsampleImage(Image3F* opsin, size_t factor);
+void DownsampleImage(ImageF* image, size_t factor);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_OPS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_ops_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_ops_test.cc
new file mode 100644
index 0000000000..84801feb5a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_ops_test.cc
@@ -0,0 +1,133 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_ops.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <random>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_test_utils.h"
+
+namespace jxl {
+namespace {
+
+template <typename T>
+void TestPacked(const size_t xsize, const size_t ysize) {
+  Plane<T> image1(xsize, ysize);
+  RandomFillImage(&image1);
+  const std::vector<T>& packed = PackedFromImage(image1);
+  const Plane<T>& image2 = ImageFromPacked(packed, xsize, ysize);
+  EXPECT_TRUE(SamePixels(image1, image2));
+}
+
+TEST(ImageTest, TestPacked) {
+  TestPacked<uint8_t>(1, 1);
+  TestPacked<uint8_t>(7, 1);
+  TestPacked<uint8_t>(1, 7);
+
+  TestPacked<int16_t>(1, 1);
+  TestPacked<int16_t>(7, 1);
+  TestPacked<int16_t>(1, 7);
+
+  TestPacked<uint16_t>(1, 1);
+  TestPacked<uint16_t>(7, 1);
+  TestPacked<uint16_t>(1, 7);
+
+  TestPacked<float>(1, 1);
+  TestPacked<float>(7, 1);
+  TestPacked<float>(1, 7);
+}
+
+// Ensure entire payload is readable/writable for various size/offset combos.
+TEST(ImageTest, TestAllocator) {
+  std::mt19937 rng(129);
+  const size_t k32 = 32;
+  const size_t kAlign = CacheAligned::kAlignment;
+  for (size_t size : {k32 * 1, k32 * 2, k32 * 3, k32 * 4, k32 * 5,
+                      CacheAligned::kAlias, 2 * CacheAligned::kAlias + 4}) {
+    for (size_t offset = 0; offset <= CacheAligned::kAlias; offset += kAlign) {
+      uint8_t* bytes =
+          static_cast<uint8_t*>(CacheAligned::Allocate(size, offset));
+      JXL_CHECK(reinterpret_cast<uintptr_t>(bytes) % kAlign == 0);
+      // Ensure we can write/read the last byte. Use RNG to fool the compiler
+      // into thinking the write is necessary.
+      memset(bytes, 0, size);
+      bytes[size - 1] = 1;  // greatest element
+      std::uniform_int_distribution<uint32_t> dist(0, size - 1);
+      uint32_t pos = dist(rng);  // random but != greatest
+      while (pos == size - 1) {
+        pos = dist(rng);
+      }
+      JXL_CHECK(bytes[pos] < bytes[size - 1]);
+
+      CacheAligned::Free(bytes);
+    }
+  }
+}
+
+template <typename T>
+void TestFillImpl(Image3<T>* img, const char* layout) {
+  FillImage(T(1), img);
+  for (size_t y = 0; y < img->ysize(); ++y) {
+    for (size_t c = 0; c < 3; ++c) {
+      T* JXL_RESTRICT row = img->PlaneRow(c, y);
+      for (size_t x = 0; x < img->xsize(); ++x) {
+        if (row[x] != T(1)) {
+          printf("Not 1 at c=%zu %zu, %zu (%zu x %zu) (%s)\n", c, x, y,
+                 img->xsize(), img->ysize(), layout);
+          abort();
+        }
+        row[x] = T(2);
+      }
+    }
+  }
+
+  // Same for ZeroFillImage and swapped c/y loop ordering.
+  ZeroFillImage(img);
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < img->ysize(); ++y) {
+      T* JXL_RESTRICT row = img->PlaneRow(c, y);
+      for (size_t x = 0; x < img->xsize(); ++x) {
+        if (row[x] != T(0)) {
+          printf("Not 0 at c=%zu %zu, %zu (%zu x %zu) (%s)\n", c, x, y,
+                 img->xsize(), img->ysize(), layout);
+          abort();
+        }
+        row[x] = T(3);
+      }
+    }
+  }
+}
+
+template <typename T>
+void TestFillT() {
+  for (uint32_t xsize : {0, 1, 15, 16, 31, 32}) {
+    for (uint32_t ysize : {0, 1, 15, 16, 31, 32}) {
+      Image3<T> image(xsize, ysize);
+      TestFillImpl(&image, "size ctor");
+
+      Image3<T> planar(Plane<T>(xsize, ysize), Plane<T>(xsize, ysize),
+                       Plane<T>(xsize, ysize));
+      TestFillImpl(&planar, "planar");
+    }
+  }
+}
+
+// Ensure y/c/x and c/y/x loops visit pixels no more than once.
+TEST(ImageTest, TestFill) {
+  TestFillT<uint8_t>();
+  TestFillT<int16_t>();
+  TestFillT<float>();
+  TestFillT<double>();
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_test_utils.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_test_utils.h
new file mode 100644
index 0000000000..e484307c14
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/image_test_utils.h
@@ -0,0 +1,313 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_TEST_UTILS_H_
+#define LIB_JXL_IMAGE_TEST_UTILS_H_
+
+#include <stddef.h>
+
+#include <cmath>
+#include <limits>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+void VerifyEqual(const Plane<T>& expected, const Plane<T>& actual) {
+  JXL_CHECK(SameSize(expected, actual));
+  for (size_t y = 0; y < expected.ysize(); ++y) {
+    const T* const JXL_RESTRICT row_expected = expected.Row(y);
+    const T* const JXL_RESTRICT row_actual = actual.Row(y);
+    for (size_t x = 0; x < expected.xsize(); ++x) {
+      ASSERT_EQ(row_expected[x], row_actual[x]) << x << " " << y;
+    }
+  }
+}
+
+template <typename T>
+void VerifyEqual(const Image3<T>& expected, const Image3<T>& actual) {
+  for (size_t c = 0; c < 3; ++c) {
+    VerifyEqual(expected.Plane(c), actual.Plane(c));
+  }
+}
+
+template <typename T>
+bool SamePixels(const Plane<T>& image1, const Plane<T>& image2,
+                const Rect rect) {
+  if (!rect.IsInside(image1) || !rect.IsInside(image2)) {
+    ADD_FAILURE() << "requested rectangle is not fully inside the image";
+    return false;
+  }
+  size_t mismatches = 0;
+  for (size_t y = rect.y0(); y < rect.ysize(); ++y) {
+    const T* const JXL_RESTRICT row1 = image1.Row(y);
+    const T* const JXL_RESTRICT row2 = image2.Row(y);
+    for (size_t x = rect.x0(); x < rect.xsize(); ++x) {
+      if (row1[x] != row2[x]) {
+        ADD_FAILURE() << "pixel mismatch" << x << ", " << y << ": "
+                      << double(row1[x]) << " != " << double(row2[x]);
+        if (++mismatches > 4) {
+          return false;
+        }
+      }
+    }
+  }
+  return mismatches == 0;
+}
+
+template <typename T>
+bool SamePixels(const Plane<T>& image1, const Plane<T>& image2) {
+  JXL_CHECK(SameSize(image1, image2));
+  return SamePixels(image1, image2, Rect(image1));
+}
+
+template <typename T>
+bool SamePixels(const Image3<T>& image1, const Image3<T>& image2) {
+  JXL_CHECK(SameSize(image1, image2));
+  for (size_t c = 0; c < 3; ++c) {
+    if (!SamePixels(image1.Plane(c), image2.Plane(c))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Use for floating-point images with fairly large numbers; tolerates small
+// absolute errors and/or small relative errors. Returns max_relative.
+template <typename T>
+void VerifyRelativeError(const Plane<T>& expected, const Plane<T>& actual,
+                         const double threshold_l1,
+                         const double threshold_relative,
+                         const intptr_t border = 0, const size_t c = 0) {
+  JXL_CHECK(SameSize(expected, actual));
+  const intptr_t xsize = expected.xsize();
+  const intptr_t ysize = expected.ysize();
+
+  // Max over current scanline to give a better idea whether there are
+  // systematic errors or just one outlier. Invalid if negative.
+  double max_l1 = -1;
+  double max_relative = -1;
+  bool any_bad = false;
+  for (intptr_t y = border; y < ysize - border; ++y) {
+    const T* const JXL_RESTRICT row_expected = expected.Row(y);
+    const T* const JXL_RESTRICT row_actual = actual.Row(y);
+    for (intptr_t x = border; x < xsize - border; ++x) {
+      const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+      // Cannot compute relative, only check/update L1.
+      if (std::abs(row_expected[x]) < 1E-10) {
+        if (l1 > threshold_l1) {
+          any_bad = true;
+          max_l1 = std::max(max_l1, l1);
+        }
+      } else {
+        const double relative = l1 / std::abs(double(row_expected[x]));
+        if (l1 > threshold_l1 && relative > threshold_relative) {
+          // Fails both tolerances => will exit below, update max_*.
+          any_bad = true;
+          max_l1 = std::max(max_l1, l1);
+          max_relative = std::max(max_relative, relative);
+        }
+      }
+    }
+  }
+  if (any_bad) {
+    // Never had a valid relative value, don't print it.
+    if (max_relative < 0) {
+      fprintf(stderr, "c=%zu: max +/- %E exceeds +/- %.2E\n", c, max_l1,
+              threshold_l1);
+    } else {
+      fprintf(stderr, "c=%zu: max +/- %E, x %E exceeds +/- %.2E, x %.2E\n", c,
+              max_l1, max_relative, threshold_l1, threshold_relative);
+    }
+    // Dump the expected image and actual image if the region is small enough.
+    const intptr_t kMaxTestDumpSize = 16;
+    if (xsize <= kMaxTestDumpSize + 2 * border &&
+        ysize <= kMaxTestDumpSize + 2 * border) {
+      fprintf(stderr, "Expected image:\n");
+      for (intptr_t y = border; y < ysize - border; ++y) {
+        const T* const JXL_RESTRICT row_expected = expected.Row(y);
+        for (intptr_t x = border; x < xsize - border; ++x) {
+          fprintf(stderr, "%10lf ", static_cast<double>(row_expected[x]));
+        }
+        fprintf(stderr, "\n");
+      }
+
+      fprintf(stderr, "Actual image:\n");
+      for (intptr_t y = border; y < ysize - border; ++y) {
+        const T* const JXL_RESTRICT row_expected = expected.Row(y);
+        const T* const JXL_RESTRICT row_actual = actual.Row(y);
+        for (intptr_t x = border; x < xsize - border; ++x) {
+          const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+          bool bad = l1 > threshold_l1;
+          if (row_expected[x] > 1E-10) {
+            const double relative = l1 / std::abs(double(row_expected[x]));
+            bad &= relative > threshold_relative;
+          }
+          if (bad) {
+            fprintf(stderr, "%10lf ", static_cast<double>(row_actual[x]));
+          } else {
+            fprintf(stderr, "%10s ", "==");
+          }
+        }
+        fprintf(stderr, "\n");
+      }
+    }
+
+    // Find first failing x for further debugging.
+    for (intptr_t y = border; y < ysize - border; ++y) {
+      const T* const JXL_RESTRICT row_expected = expected.Row(y);
+      const T* const JXL_RESTRICT row_actual = actual.Row(y);
+
+      for (intptr_t x = border; x < xsize - border; ++x) {
+        const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+        bool bad = l1 > threshold_l1;
+        if (row_expected[x] > 1E-10) {
+          const double relative = l1 / std::abs(double(row_expected[x]));
+          bad &= relative > threshold_relative;
+        }
+        if (bad) {
+          FAIL() << x << ", " << y << " (" << expected.xsize() << " x "
+                 << expected.ysize() << ") expected "
+                 << static_cast<double>(row_expected[x]) << " actual "
+                 << static_cast<double>(row_actual[x]);
+        }
+      }
+    }
+    return;  // if any_bad, we should have exited.
+  }
+}
+
+template <typename T>
+void VerifyRelativeError(const Image3<T>& expected, const Image3<T>& actual,
+                         const float threshold_l1,
+                         const float threshold_relative,
+                         const intptr_t border = 0) {
+  for (size_t c = 0; c < 3; ++c) {
+    VerifyRelativeError(expected.Plane(c), actual.Plane(c), threshold_l1,
+                        threshold_relative, border, c);
+  }
+}
+
+// Generator for independent, uniformly distributed integers [0, max].
+template <typename T, typename Random>
+class GeneratorRandom {
+ public:
+  GeneratorRandom(Random* rng, const T max) : rng_(*rng), dist_(0, max) {}
+
+  GeneratorRandom(Random* rng, const T min, const T max)
+      : rng_(*rng), dist_(min, max) {}
+
+  T operator()(const size_t x, const size_t y, const int c) const {
+    return dist_(rng_);
+  }
+
+ private:
+  Random& rng_;
+  mutable std::uniform_int_distribution<> dist_;
+};
+
+template <typename Random>
+class GeneratorRandom<float, Random> {
+ public:
+  GeneratorRandom(Random* rng, const float max)
+      : rng_(*rng), dist_(0.0f, max) {}
+
+  GeneratorRandom(Random* rng, const float min, const float max)
+      : rng_(*rng), dist_(min, max) {}
+
+  float operator()(const size_t x, const size_t y, const int c) const {
+    return dist_(rng_);
+  }
+
+ private:
+  Random& rng_;
+  mutable std::uniform_real_distribution<float> dist_;
+};
+
+template <typename Random>
+class GeneratorRandom<double, Random> {
+ public:
+  GeneratorRandom(Random* rng, const double max)
+      : rng_(*rng), dist_(0.0, max) {}
+
+  GeneratorRandom(Random* rng, const double min, const double max)
+      : rng_(*rng), dist_(min, max) {}
+
+  double operator()(const size_t x, const size_t y, const int c) const {
+    return dist_(rng_);
+  }
+
+ private:
+  Random& rng_;
+  mutable std::uniform_real_distribution<> dist_;
+};
+
+// Assigns generator(x, y, 0) to each pixel (x, y).
+template <class Generator, class Image>
+void GenerateImage(const Generator& generator, Image* image) {
+  using T = typename Image::T;
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const JXL_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row[x] = generator(x, y, 0);
+    }
+  }
+}
+
+template <typename T>
+void RandomFillImage(Plane<T>* image,
+                     const T max = std::numeric_limits<T>::max()) {
+  std::mt19937_64 rng(129);
+  const GeneratorRandom<T, std::mt19937_64> generator(&rng, max);
+  GenerateImage(generator, image);
+}
+
+template <typename T>
+void RandomFillImage(Plane<T>* image, const T min, const T max,
+                     const int seed) {
+  std::mt19937_64 rng(seed);
+  const GeneratorRandom<T, std::mt19937_64> generator(&rng, min, max);
+  GenerateImage(generator, image);
+}
+
+// Assigns generator(x, y, c) to each pixel (x, y).
+template <class Generator, typename T>
+void GenerateImage(const Generator& generator, Image3<T>* image) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      T* JXL_RESTRICT row = image->PlaneRow(c, y);
+      for (size_t x = 0; x < image->xsize(); ++x) {
+        row[x] = generator(x, y, c);
+      }
+    }
+  }
+}
+
+template <typename T>
+void RandomFillImage(Image3<T>* image,
+                     const T max = std::numeric_limits<T>::max()) {
+  std::mt19937_64 rng(129);
+  const GeneratorRandom<T, std::mt19937_64> generator(&rng, max);
+  GenerateImage(generator, image);
+}
+
+template <typename T>
+void RandomFillImage(Image3<T>* image, const T min, const T max,
+                     const int seed) {
+  std::mt19937_64 rng(seed);
+  const GeneratorRandom<T, std::mt19937_64> generator(&rng, min, max);
+  GenerateImage(generator, image);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_IMAGE_TEST_UTILS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc
new file mode 100644
index 0000000000..f57f697139
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.cc
@@ -0,0 +1,140 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+
+#include <brotli/decode.h>
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+
+namespace jxl {
+namespace jpeg {
+Status DecodeJPEGData(Span<const uint8_t> encoded, JPEGData* jpeg_data) {
+  Status ret = true;
+  const uint8_t* in = encoded.data();
+  size_t available_in = encoded.size();
+  {
+    BitReader br(encoded);
+    BitReaderScopedCloser br_closer(&br, &ret);
+    JXL_RETURN_IF_ERROR(Bundle::Read(&br, jpeg_data));
+    JXL_RETURN_IF_ERROR(br.JumpToByteBoundary());
+    in += br.TotalBitsConsumed() / 8;
+    available_in -= br.TotalBitsConsumed() / 8;
+  }
+  JXL_RETURN_IF_ERROR(ret);
+
+  BrotliDecoderState* brotli_dec =
+      BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+
+  struct BrotliDecDeleter {
+    BrotliDecoderState* brotli_dec;
+    ~BrotliDecDeleter() { BrotliDecoderDestroyInstance(brotli_dec); }
+  } brotli_dec_deleter{brotli_dec};
+
+  BrotliDecoderResult result =
+      BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS;
+
+  auto br_read = [&](std::vector<uint8_t>& data) -> Status {
+    size_t available_out = data.size();
+    uint8_t* out = data.data();
+    while (available_out != 0) {
+      if (BrotliDecoderIsFinished(brotli_dec)) {
+        return JXL_FAILURE("Not enough decompressed output");
+      }
+      result = BrotliDecoderDecompressStream(brotli_dec, &available_in, &in,
+                                             &available_out, &out, nullptr);
+      if (result !=
+              BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT &&
+          result != BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS) {
+        return JXL_FAILURE(
+            "Brotli decoding error: %s\n",
+            BrotliDecoderErrorString(BrotliDecoderGetErrorCode(brotli_dec)));
+      }
+    }
+    return true;
+  };
+  size_t num_icc = 0;
+  for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+    auto& marker = jpeg_data->app_data[i];
+    if (jpeg_data->app_marker_type[i] != AppMarkerType::kUnknown) {
+      // Set the size of the marker.
+      size_t size_minus_1 = marker.size() - 1;
+      marker[1] = size_minus_1 >> 8;
+      marker[2] = size_minus_1 & 0xFF;
+      if (jpeg_data->app_marker_type[i] == AppMarkerType::kICC) {
+        if (marker.size() < 17) {
+          return JXL_FAILURE("ICC markers must be at least 17 bytes");
+        }
+        marker[0] = 0xE2;
+        memcpy(&marker[3], kIccProfileTag, sizeof kIccProfileTag);
+        marker[15] = ++num_icc;
+      }
+    } else {
+      JXL_RETURN_IF_ERROR(br_read(marker));
+      if (marker[1] * 256u + marker[2] + 1u != marker.size()) {
+        return JXL_FAILURE("Incorrect marker size");
+      }
+    }
+  }
+  for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+    auto& marker = jpeg_data->app_data[i];
+    if (jpeg_data->app_marker_type[i] == AppMarkerType::kICC) {
+      marker[16] = num_icc;
+    }
+    if (jpeg_data->app_marker_type[i] == AppMarkerType::kExif) {
+      marker[0] = 0xE1;
+      if (marker.size() < 3 + sizeof kExifTag) {
+        return JXL_FAILURE("Incorrect Exif marker size");
+      }
+      memcpy(&marker[3], kExifTag, sizeof kExifTag);
+    }
+    if (jpeg_data->app_marker_type[i] == AppMarkerType::kXMP) {
+      marker[0] = 0xE1;
+      if (marker.size() < 3 + sizeof kXMPTag) {
+        return JXL_FAILURE("Incorrect XMP marker size");
+      }
+      memcpy(&marker[3], kXMPTag, sizeof kXMPTag);
+    }
+  }
+  // TODO(eustas): actually inject ICC profile and check it fits perfectly.
+  for (size_t i = 0; i < jpeg_data->com_data.size(); i++) {
+    auto& marker = jpeg_data->com_data[i];
+    JXL_RETURN_IF_ERROR(br_read(marker));
+    if (marker[1] * 256u + marker[2] + 1u != marker.size()) {
+      return JXL_FAILURE("Incorrect marker size");
+    }
+  }
+  for (size_t i = 0; i < jpeg_data->inter_marker_data.size(); i++) {
+    JXL_RETURN_IF_ERROR(br_read(jpeg_data->inter_marker_data[i]));
+  }
+  JXL_RETURN_IF_ERROR(br_read(jpeg_data->tail_data));
+
+  // Check if there is more decompressed output.
+  size_t available_out = 1;
+  uint64_t dummy;
+  uint8_t* next_out = reinterpret_cast<uint8_t*>(&dummy);
+  result = BrotliDecoderDecompressStream(brotli_dec, &available_in, &in,
+                                         &available_out, &next_out, nullptr);
+  if (available_out == 0 ||
+      result == BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+    return JXL_FAILURE("Excess data in compressed stream");
+  }
+  if (result == BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+    return JXL_FAILURE("Incomplete brotli-stream");
+  }
+  if (!BrotliDecoderIsFinished(brotli_dec) ||
+      result != BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS) {
+    return JXL_FAILURE("Corrupted brotli-stream");
+  }
+  if (available_in != 0) {
+    return JXL_FAILURE("Unused data after brotli stream");
+  }
+
+  return true;
+}
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.h
new file mode 100644
index 0000000000..b9d50bf9f8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data.h
@@ -0,0 +1,19 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_DATA_H_
+#define LIB_JXL_JPEG_DEC_JPEG_DATA_H_
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+Status DecodeJPEGData(Span<const uint8_t> encoded, JPEGData* jpeg_data);
+}
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_DEC_JPEG_DATA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc
new file mode 100644
index 0000000000..c321344ebf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.cc
@@ -0,0 +1,983 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+
+#include <stdlib.h>
+#include <string.h> /* for memset, memcpy */
+
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/jpeg/dec_jpeg_serialization_state.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+namespace {
+
+enum struct SerializationStatus {
+  NEEDS_MORE_INPUT,
+  NEEDS_MORE_OUTPUT,
+  ERROR,
+  DONE
+};
+
+const int kJpegPrecision = 8;
+
+// JpegBitWriter: buffer size
+const size_t kJpegBitWriterChunkSize = 16384;
+
+// DCTCodingState: maximum number of correction bits to buffer
+const int kJPEGMaxCorrectionBits = 1u << 16;
+
+// Returns non-zero if and only if x has a zero byte, i.e. one of
+// x & 0xff, x & 0xff00, ..., x & 0xff00000000000000 is zero.
+static JXL_INLINE uint64_t HasZeroByte(uint64_t x) {
+  return (x - 0x0101010101010101ULL) & ~x & 0x8080808080808080ULL;
+}
+
+void JpegBitWriterInit(JpegBitWriter* bw,
+                       std::deque<OutputChunk>* output_queue) {
+  bw->output = output_queue;
+  bw->chunk = OutputChunk(kJpegBitWriterChunkSize);
+  bw->pos = 0;
+  bw->put_buffer = 0;
+  bw->put_bits = 64;
+  bw->healthy = true;
+  bw->data = bw->chunk.buffer->data();
+}
+
+static JXL_NOINLINE void SwapBuffer(JpegBitWriter* bw) {
+  bw->chunk.len = bw->pos;
+  bw->output->emplace_back(std::move(bw->chunk));
+  bw->chunk = OutputChunk(kJpegBitWriterChunkSize);
+  bw->data = bw->chunk.buffer->data();
+  bw->pos = 0;
+}
+
+static JXL_INLINE void Reserve(JpegBitWriter* bw, size_t n_bytes) {
+  if (JXL_UNLIKELY((bw->pos + n_bytes) > kJpegBitWriterChunkSize)) {
+    SwapBuffer(bw);
+  }
+}
+
+/**
+ * Writes the given byte to the output, writes an extra zero if byte is 0xFF.
+ *
+ * This method is "careless" - caller must make sure that there is enough
+ * space in the output buffer. Emits up to 2 bytes to buffer.
+ */
+static JXL_INLINE void EmitByte(JpegBitWriter* bw, int byte) {
+  bw->data[bw->pos++] = byte;
+  if (byte == 0xFF) bw->data[bw->pos++] = 0;
+}
+
+static JXL_INLINE void DischargeBitBuffer(JpegBitWriter* bw) {
+  // At this point we are ready to emit the most significant 6 bytes of
+  // put_buffer_ to the output.
+  // The JPEG format requires that after every 0xff byte in the entropy
+  // coded section, there is a zero byte, therefore we first check if any of
+  // the 6 most significant bytes of put_buffer_ is 0xFF.
+  Reserve(bw, 12);
+  if (HasZeroByte(~bw->put_buffer | 0xFFFF)) {
+    // We have a 0xFF byte somewhere, examine each byte and append a zero
+    // byte if necessary.
+    EmitByte(bw, (bw->put_buffer >> 56) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 48) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 40) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 32) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 24) & 0xFF);
+    EmitByte(bw, (bw->put_buffer >> 16) & 0xFF);
+  } else {
+    // We don't have any 0xFF bytes, output all 6 bytes without checking.
+    bw->data[bw->pos] = (bw->put_buffer >> 56) & 0xFF;
+    bw->data[bw->pos + 1] = (bw->put_buffer >> 48) & 0xFF;
+    bw->data[bw->pos + 2] = (bw->put_buffer >> 40) & 0xFF;
+    bw->data[bw->pos + 3] = (bw->put_buffer >> 32) & 0xFF;
+    bw->data[bw->pos + 4] = (bw->put_buffer >> 24) & 0xFF;
+    bw->data[bw->pos + 5] = (bw->put_buffer >> 16) & 0xFF;
+    bw->pos += 6;
+  }
+  bw->put_buffer <<= 48;
+  bw->put_bits += 48;
+}
+
+static JXL_INLINE void WriteBits(JpegBitWriter* bw, int nbits, uint64_t bits) {
+  // This is an optimization; if everything goes well,
+  // then |nbits| is positive; if non-existing Huffman symbol is going to be
+  // encoded, its length should be zero; later encoder could check the
+  // "health" of JpegBitWriter.
+  if (nbits == 0) {
+    bw->healthy = false;
+    return;
+  }
+  bw->put_bits -= nbits;
+  bw->put_buffer |= (bits << bw->put_bits);
+  if (bw->put_bits <= 16) DischargeBitBuffer(bw);
+}
+
+void EmitMarker(JpegBitWriter* bw, int marker) {
+  Reserve(bw, 2);
+  JXL_DASSERT(marker != 0xFF);
+  bw->data[bw->pos++] = 0xFF;
+  bw->data[bw->pos++] = marker;
+}
+
+bool JumpToByteBoundary(JpegBitWriter* bw, const uint8_t** pad_bits,
+                        const uint8_t* pad_bits_end) {
+  size_t n_bits = bw->put_bits & 7u;
+  uint8_t pad_pattern;
+  if (*pad_bits == nullptr) {
+    pad_pattern = (1u << n_bits) - 1;
+  } else {
+    pad_pattern = 0;
+    const uint8_t* src = *pad_bits;
+    // TODO(eustas): bitwise reading looks insanely ineffective...
+    while (n_bits--) {
+      pad_pattern <<= 1;
+      if (src >= pad_bits_end) return false;
+      // TODO(eustas): DCHECK *src == {0, 1}
+      pad_pattern |= !!*(src++);
+    }
+    *pad_bits = src;
+  }
+
+  Reserve(bw, 16);
+
+  while (bw->put_bits <= 56) {
+    int c = (bw->put_buffer >> 56) & 0xFF;
+    EmitByte(bw, c);
+    bw->put_buffer <<= 8;
+    bw->put_bits += 8;
+  }
+  if (bw->put_bits < 64) {
+    int pad_mask = 0xFFu >> (64 - bw->put_bits);
+    int c = ((bw->put_buffer >> 56) & ~pad_mask) | pad_pattern;
+    EmitByte(bw, c);
+  }
+  bw->put_buffer = 0;
+  bw->put_bits = 64;
+
+  return true;
+}
+
+void JpegBitWriterFinish(JpegBitWriter* bw) {
+  if (bw->pos == 0) return;
+  bw->chunk.len = bw->pos;
+  bw->output->emplace_back(std::move(bw->chunk));
+  bw->chunk = OutputChunk(nullptr, 0);
+  bw->data = nullptr;
+  bw->pos = 0;
+}
+
+void DCTCodingStateInit(DCTCodingState* s) {
+  s->eob_run_ = 0;
+  s->cur_ac_huff_ = nullptr;
+  s->refinement_bits_.clear();
+  s->refinement_bits_.reserve(kJPEGMaxCorrectionBits);
+}
+
+// Emit all buffered data to the bit stream using the given Huffman code and
+// bit writer.
+static JXL_INLINE void Flush(DCTCodingState* s, JpegBitWriter* bw) {
+  if (s->eob_run_ > 0) {
+    int nbits = FloorLog2Nonzero<uint32_t>(s->eob_run_);
+    int symbol = nbits << 4u;
+    WriteBits(bw, s->cur_ac_huff_->depth[symbol],
+              s->cur_ac_huff_->code[symbol]);
+    if (nbits > 0) {
+      WriteBits(bw, nbits, s->eob_run_ & ((1 << nbits) - 1));
+    }
+    s->eob_run_ = 0;
+  }
+  for (size_t i = 0; i < s->refinement_bits_.size(); ++i) {
+    WriteBits(bw, 1, s->refinement_bits_[i]);
+  }
+  s->refinement_bits_.clear();
+}
+
+// Buffer some more data at the end-of-band (the last non-zero or newly
+// non-zero coefficient within the [Ss, Se] spectral band).
+static JXL_INLINE void BufferEndOfBand(DCTCodingState* s,
+                                       const HuffmanCodeTable* ac_huff,
+                                       const std::vector<int>* new_bits,
+                                       JpegBitWriter* bw) {
+  if (s->eob_run_ == 0) {
+    s->cur_ac_huff_ = ac_huff;
+  }
+  ++s->eob_run_;
+  if (new_bits) {
+    s->refinement_bits_.insert(s->refinement_bits_.end(), new_bits->begin(),
+                               new_bits->end());
+  }
+  if (s->eob_run_ == 0x7FFF ||
+      s->refinement_bits_.size() > kJPEGMaxCorrectionBits - kDCTBlockSize + 1) {
+    Flush(s, bw);
+  }
+}
+
+bool BuildHuffmanCodeTable(const JPEGHuffmanCode& huff,
+                           HuffmanCodeTable* table) {
+  int huff_code[kJpegHuffmanAlphabetSize];
+  // +1 for a sentinel element.
+  uint32_t huff_size[kJpegHuffmanAlphabetSize + 1];
+  int p = 0;
+  for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) {
+    int i = huff.counts[l];
+    if (p + i > kJpegHuffmanAlphabetSize + 1) {
+      return false;
+    }
+    while (i--) huff_size[p++] = l;
+  }
+
+  if (p == 0) {
+    return true;
+  }
+
+  // Reuse sentinel element.
+  int last_p = p - 1;
+  huff_size[last_p] = 0;
+
+  int code = 0;
+  uint32_t si = huff_size[0];
+  p = 0;
+  while (huff_size[p]) {
+    while ((huff_size[p]) == si) {
+      huff_code[p++] = code;
+      code++;
+    }
+    code <<= 1;
+    si++;
+  }
+  for (p = 0; p < last_p; p++) {
+    int i = huff.values[p];
+    table->depth[i] = huff_size[p];
+    table->code[i] = huff_code[p];
+  }
+  return true;
+}
+
+bool EncodeSOI(SerializationState* state) {
+  state->output_queue.push_back(OutputChunk({0xFF, 0xD8}));
+  return true;
+}
+
+bool EncodeEOI(const JPEGData& jpg, SerializationState* state) {
+  state->output_queue.push_back(OutputChunk({0xFF, 0xD9}));
+  state->output_queue.emplace_back(jpg.tail_data);
+  return true;
+}
+
+bool EncodeSOF(const JPEGData& jpg, uint8_t marker, SerializationState* state) {
+  if (marker <= 0xC2) state->is_progressive = (marker == 0xC2);
+
+  const size_t n_comps = jpg.components.size();
+  const size_t marker_len = 8 + 3 * n_comps;
+  state->output_queue.emplace_back(marker_len + 2);
+  uint8_t* data = state->output_queue.back().buffer->data();
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = marker;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  data[pos++] = kJpegPrecision;
+  data[pos++] = jpg.height >> 8u;
+  data[pos++] = jpg.height & 0xFFu;
+  data[pos++] = jpg.width >> 8u;
+  data[pos++] = jpg.width & 0xFFu;
+  data[pos++] = n_comps;
+  for (size_t i = 0; i < n_comps; ++i) {
+    data[pos++] = jpg.components[i].id;
+    data[pos++] = ((jpg.components[i].h_samp_factor << 4u) |
+                   (jpg.components[i].v_samp_factor));
+    const size_t quant_idx = jpg.components[i].quant_idx;
+    if (quant_idx >= jpg.quant.size()) return false;
+    data[pos++] = jpg.quant[quant_idx].index;
+  }
+  return true;
+}
+
+bool EncodeSOS(const JPEGData& jpg, const JPEGScanInfo& scan_info,
+               SerializationState* state) {
+  const size_t n_scans = scan_info.num_components;
+  const size_t marker_len = 6 + 2 * n_scans;
+  state->output_queue.emplace_back(marker_len + 2);
+  uint8_t* data = state->output_queue.back().buffer->data();
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = 0xDA;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  data[pos++] = n_scans;
+  for (size_t i = 0; i < n_scans; ++i) {
+    const JPEGComponentScanInfo& si = scan_info.components[i];
+    if (si.comp_idx >= jpg.components.size()) return false;
+    data[pos++] = jpg.components[si.comp_idx].id;
+    data[pos++] = (si.dc_tbl_idx << 4u) + si.ac_tbl_idx;
+  }
+  data[pos++] = scan_info.Ss;
+  data[pos++] = scan_info.Se;
+  data[pos++] = ((scan_info.Ah << 4u) | (scan_info.Al));
+  return true;
+}
+
+bool EncodeDHT(const JPEGData& jpg, SerializationState* state) {
+  const std::vector<JPEGHuffmanCode>& huffman_code = jpg.huffman_code;
+
+  size_t marker_len = 2;
+  for (size_t i = state->dht_index; i < huffman_code.size(); ++i) {
+    const JPEGHuffmanCode& huff = huffman_code[i];
+    marker_len += kJpegHuffmanMaxBitLength;
+    for (size_t j = 0; j < huff.counts.size(); ++j) {
+      marker_len += huff.counts[j];
+    }
+    if (huff.is_last) break;
+  }
+  state->output_queue.emplace_back(marker_len + 2);
+  uint8_t* data = state->output_queue.back().buffer->data();
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = 0xC4;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  while (true) {
+    const size_t huffman_code_index = state->dht_index++;
+    if (huffman_code_index >= huffman_code.size()) {
+      return false;
+    }
+    const JPEGHuffmanCode& huff = huffman_code[huffman_code_index];
+    size_t index = huff.slot_id;
+    HuffmanCodeTable* huff_table;
+    if (index & 0x10) {
+      index -= 0x10;
+      huff_table = &state->ac_huff_table[index];
+    } else {
+      huff_table = &state->dc_huff_table[index];
+    }
+    // TODO(eustas): cache
+    // TODO(eustas): set up non-existing symbols
+    if (!BuildHuffmanCodeTable(huff, huff_table)) {
+      return false;
+    }
+    size_t total_count = 0;
+    size_t max_length = 0;
+    for (size_t i = 0; i < huff.counts.size(); ++i) {
+      if (huff.counts[i] != 0) {
+        max_length = i;
+      }
+      total_count += huff.counts[i];
+    }
+    --total_count;
+    data[pos++] = huff.slot_id;
+    for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+      data[pos++] = (i == max_length ? huff.counts[i] - 1 : huff.counts[i]);
+    }
+    for (size_t i = 0; i < total_count; ++i) {
+      data[pos++] = huff.values[i];
+    }
+    if (huff.is_last) break;
+  }
+  return true;
+}
+
+bool EncodeDQT(const JPEGData& jpg, SerializationState* state) {
+  int marker_len = 2;
+  for (size_t i = state->dqt_index; i < jpg.quant.size(); ++i) {
+    const JPEGQuantTable& table = jpg.quant[i];
+    marker_len += 1 + (table.precision ? 2 : 1) * kDCTBlockSize;
+    if (table.is_last) break;
+  }
+  state->output_queue.emplace_back(marker_len + 2);
+  uint8_t* data = state->output_queue.back().buffer->data();
+  size_t pos = 0;
+  data[pos++] = 0xFF;
+  data[pos++] = 0xDB;
+  data[pos++] = marker_len >> 8u;
+  data[pos++] = marker_len & 0xFFu;
+  while (true) {
+    const size_t idx = state->dqt_index++;
+    if (idx >= jpg.quant.size()) {
+      return false;  // corrupt input
+    }
+    const JPEGQuantTable& table = jpg.quant[idx];
+    data[pos++] = (table.precision << 4u) + table.index;
+    for (size_t i = 0; i < kDCTBlockSize; ++i) {
+      int val_idx = kJPEGNaturalOrder[i];
+      int val = table.values[val_idx];
+      if (table.precision) {
+        data[pos++] = val >> 8u;
+      }
+      data[pos++] = val & 0xFFu;
+    }
+    if (table.is_last) break;
+  }
+  return true;
+}
+
+bool EncodeDRI(const JPEGData& jpg, SerializationState* state) {
+  state->seen_dri_marker = true;
+  OutputChunk dri_marker = {0xFF,
+                            0xDD,
+                            0,
+                            4,
+                            static_cast<uint8_t>(jpg.restart_interval >> 8),
+                            static_cast<uint8_t>(jpg.restart_interval & 0xFF)};
+  state->output_queue.push_back(std::move(dri_marker));
+  return true;
+}
+
+bool EncodeRestart(uint8_t marker, SerializationState* state) {
+  state->output_queue.push_back(OutputChunk({0xFF, marker}));
+  return true;
+}
+
+bool EncodeAPP(const JPEGData& jpg, uint8_t marker, SerializationState* state) {
+  // TODO(eustas): check that marker corresponds to payload?
+  (void)marker;
+
+  size_t app_index = state->app_index++;
+  if (app_index >= jpg.app_data.size()) return false;
+  state->output_queue.push_back(OutputChunk({0xFF}));
+  state->output_queue.emplace_back(jpg.app_data[app_index]);
+  return true;
+}
+
+bool EncodeCOM(const JPEGData& jpg, SerializationState* state) {
+  size_t com_index = state->com_index++;
+  if (com_index >= jpg.com_data.size()) return false;
+  state->output_queue.push_back(OutputChunk({0xFF}));
+  state->output_queue.emplace_back(jpg.com_data[com_index]);
+  return true;
+}
+
+bool EncodeInterMarkerData(const JPEGData& jpg, SerializationState* state) {
+  size_t index = state->data_index++;
+  if (index >= jpg.inter_marker_data.size()) return false;
+  state->output_queue.emplace_back(jpg.inter_marker_data[index]);
+  return true;
+}
+
+bool EncodeDCTBlockSequential(const coeff_t* coeffs,
+                              const HuffmanCodeTable& dc_huff,
+                              const HuffmanCodeTable& ac_huff,
+                              int num_zero_runs, coeff_t* last_dc_coeff,
+                              JpegBitWriter* bw) {
+  coeff_t temp2;
+  coeff_t temp;
+  temp2 = coeffs[0];
+  temp = temp2 - *last_dc_coeff;
+  *last_dc_coeff = temp2;
+  temp2 = temp;
+  if (temp < 0) {
+    temp = -temp;
+    temp2--;
+  }
+  int dc_nbits = (temp == 0) ? 0 : (FloorLog2Nonzero<uint32_t>(temp) + 1);
+  WriteBits(bw, dc_huff.depth[dc_nbits], dc_huff.code[dc_nbits]);
+  if (dc_nbits >= 12) return false;
+  if (dc_nbits > 0) {
+    WriteBits(bw, dc_nbits, temp2 & ((1u << dc_nbits) - 1));
+  }
+  int r = 0;
+  for (int k = 1; k < 64; ++k) {
+    if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) {
+      r++;
+      continue;
+    }
+    if (temp < 0) {
+      temp = -temp;
+      temp2 = ~temp;
+    } else {
+      temp2 = temp;
+    }
+    while (r > 15) {
+      WriteBits(bw, ac_huff.depth[0xf0], ac_huff.code[0xf0]);
+      r -= 16;
+    }
+    int ac_nbits = FloorLog2Nonzero<uint32_t>(temp) + 1;
+    if (ac_nbits >= 16) return false;
+    int symbol = (r << 4u) + ac_nbits;
+    WriteBits(bw, ac_huff.depth[symbol], ac_huff.code[symbol]);
+    WriteBits(bw, ac_nbits, temp2 & ((1 << ac_nbits) - 1));
+    r = 0;
+  }
+  for (int i = 0; i < num_zero_runs; ++i) {
+    WriteBits(bw, ac_huff.depth[0xf0], ac_huff.code[0xf0]);
+    r -= 16;
+  }
+  if (r > 0) {
+    WriteBits(bw, ac_huff.depth[0], ac_huff.code[0]);
+  }
+  return true;
+}
+
+bool EncodeDCTBlockProgressive(const coeff_t* coeffs,
+                               const HuffmanCodeTable& dc_huff,
+                               const HuffmanCodeTable& ac_huff, int Ss, int Se,
+                               int Al, int num_zero_runs,
+                               DCTCodingState* coding_state,
+                               coeff_t* last_dc_coeff, JpegBitWriter* bw) {
+  bool eob_run_allowed = Ss > 0;
+  coeff_t temp2;
+  coeff_t temp;
+  if (Ss == 0) {
+    temp2 = coeffs[0] >> Al;
+    temp = temp2 - *last_dc_coeff;
+    *last_dc_coeff = temp2;
+    temp2 = temp;
+    if (temp < 0) {
+      temp = -temp;
+      temp2--;
+    }
+    int nbits = (temp == 0) ? 0 : (FloorLog2Nonzero<uint32_t>(temp) + 1);
+    WriteBits(bw, dc_huff.depth[nbits], dc_huff.code[nbits]);
+    if (nbits > 0) {
+      WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+    }
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  int r = 0;
+  for (int k = Ss; k <= Se; ++k) {
+    if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) {
+      r++;
+      continue;
+    }
+    if (temp < 0) {
+      temp = -temp;
+      temp >>= Al;
+      temp2 = ~temp;
+    } else {
+      temp >>= Al;
+      temp2 = temp;
+    }
+    if (temp == 0) {
+      r++;
+      continue;
+    }
+    Flush(coding_state, bw);
+    while (r > 15) {
+      WriteBits(bw, ac_huff.depth[0xf0], ac_huff.code[0xf0]);
+      r -= 16;
+    }
+    int nbits = FloorLog2Nonzero<uint32_t>(temp) + 1;
+    int symbol = (r << 4u) + nbits;
+    WriteBits(bw, ac_huff.depth[symbol], ac_huff.code[symbol]);
+    WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+    r = 0;
+  }
+  if (num_zero_runs > 0) {
+    Flush(coding_state, bw);
+    for (int i = 0; i < num_zero_runs; ++i) {
+      WriteBits(bw, ac_huff.depth[0xf0], ac_huff.code[0xf0]);
+      r -= 16;
+    }
+  }
+  if (r > 0) {
+    BufferEndOfBand(coding_state, &ac_huff, nullptr, bw);
+    if (!eob_run_allowed) {
+      Flush(coding_state, bw);
+    }
+  }
+  return true;
+}
+
+bool EncodeRefinementBits(const coeff_t* coeffs,
+                          const HuffmanCodeTable& ac_huff, int Ss, int Se,
+                          int Al, DCTCodingState* coding_state,
+                          JpegBitWriter* bw) {
+  bool eob_run_allowed = Ss > 0;
+  if (Ss == 0) {
+    // Emit next bit of DC component.
+    WriteBits(bw, 1, (coeffs[0] >> Al) & 1);
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  int abs_values[kDCTBlockSize];
+  int eob = 0;
+  for (int k = Ss; k <= Se; k++) {
+    const coeff_t abs_val = std::abs(coeffs[kJPEGNaturalOrder[k]]);
+    abs_values[k] = abs_val >> Al;
+    if (abs_values[k] == 1) {
+      eob = k;
+    }
+  }
+  int r = 0;
+  std::vector<int> refinement_bits;
+  refinement_bits.reserve(kDCTBlockSize);
+  for (int k = Ss; k <= Se; k++) {
+    if (abs_values[k] == 0) {
+      r++;
+      continue;
+    }
+    while (r > 15 && k <= eob) {
+      Flush(coding_state, bw);
+      WriteBits(bw, ac_huff.depth[0xf0], ac_huff.code[0xf0]);
+      r -= 16;
+      for (int bit : refinement_bits) {
+        WriteBits(bw, 1, bit);
+      }
+      refinement_bits.clear();
+    }
+    if (abs_values[k] > 1) {
+      refinement_bits.push_back(abs_values[k] & 1u);
+      continue;
+    }
+    Flush(coding_state, bw);
+    int symbol = (r << 4u) + 1;
+    int new_non_zero_bit = (coeffs[kJPEGNaturalOrder[k]] < 0) ? 0 : 1;
+    WriteBits(bw, ac_huff.depth[symbol], ac_huff.code[symbol]);
+    WriteBits(bw, 1, new_non_zero_bit);
+    for (int bit : refinement_bits) {
+      WriteBits(bw, 1, bit);
+    }
+    refinement_bits.clear();
+    r = 0;
+  }
+  if (r > 0 || !refinement_bits.empty()) {
+    BufferEndOfBand(coding_state, &ac_huff, &refinement_bits, bw);
+    if (!eob_run_allowed) {
+      Flush(coding_state, bw);
+    }
+  }
+  return true;
+}
+
+template <int kMode>
+SerializationStatus JXL_NOINLINE DoEncodeScan(const JPEGData& jpg,
+                                              SerializationState* state) {
+  const JPEGScanInfo& scan_info = jpg.scan_info[state->scan_index];
+  EncodeScanState& ss = state->scan_state;
+
+  const int restart_interval =
+      state->seen_dri_marker ? jpg.restart_interval : 0;
+
+  const auto get_next_extra_zero_run_index = [&ss, &scan_info]() -> int {
+    if (ss.extra_zero_runs_pos < scan_info.extra_zero_runs.size()) {
+      return scan_info.extra_zero_runs[ss.extra_zero_runs_pos].block_idx;
+    } else {
+      return -1;
+    }
+  };
+
+  const auto get_next_reset_point = [&ss, &scan_info]() -> int {
+    if (ss.next_reset_point_pos < scan_info.reset_points.size()) {
+      return scan_info.reset_points[ss.next_reset_point_pos++];
+    } else {
+      return -1;
+    }
+  };
+
+  if (ss.stage == EncodeScanState::HEAD) {
+    if (!EncodeSOS(jpg, scan_info, state)) return SerializationStatus::ERROR;
+    JpegBitWriterInit(&ss.bw, &state->output_queue);
+    DCTCodingStateInit(&ss.coding_state);
+    ss.restarts_to_go = restart_interval;
+    ss.next_restart_marker = 0;
+    ss.block_scan_index = 0;
+    ss.extra_zero_runs_pos = 0;
+    ss.next_extra_zero_run_index = get_next_extra_zero_run_index();
+    ss.next_reset_point_pos = 0;
+    ss.next_reset_point = get_next_reset_point();
+    ss.mcu_y = 0;
+    memset(ss.last_dc_coeff, 0, sizeof(ss.last_dc_coeff));
+    ss.stage = EncodeScanState::BODY;
+  }
+  JpegBitWriter* bw = &ss.bw;
+  DCTCodingState* coding_state = &ss.coding_state;
+
+  JXL_DASSERT(ss.stage == EncodeScanState::BODY);
+
+  // "Non-interleaved" means color data comes in separate scans, in other words
+  // each scan can contain only one color component.
+  const bool is_interleaved = (scan_info.num_components > 1);
+  int MCUs_per_row = 0;
+  int MCU_rows = 0;
+  jpg.CalculateMcuSize(scan_info, &MCUs_per_row, &MCU_rows);
+  const bool is_progressive = state->is_progressive;
+  const int Al = is_progressive ? scan_info.Al : 0;
+  const int Ss = is_progressive ? scan_info.Ss : 0;
+  const int Se = is_progressive ? scan_info.Se : 63;
+
+  // DC-only is defined by [0..0] spectral range.
+  const bool want_ac = ((Ss != 0) || (Se != 0));
+  // TODO: support streaming decoding again.
+  const bool complete_ac = true;
+  const bool has_ac = true;
+  if (want_ac && !has_ac) return SerializationStatus::NEEDS_MORE_INPUT;
+
+  // |has_ac| implies |complete_dc| but not vice versa; for the sake of
+  // simplicity we pretend they are equal, because they are separated by just a
+  // few bytes of input.
+  const bool complete_dc = has_ac;
+  const bool complete = want_ac ? complete_ac : complete_dc;
+  // When "incomplete" |ac_dc| tracks information about current ("incomplete")
+  // band parsing progress.
+
+  // FIXME: Is this always complete?
+  // const int last_mcu_y =
+  //     complete ? MCU_rows : parsing_state.internal->ac_dc.next_mcu_y *
+  //     v_group;
+  (void)complete;
+  const int last_mcu_y = complete ? MCU_rows : 0;
+
+  for (; ss.mcu_y < last_mcu_y; ++ss.mcu_y) {
+    for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) {
+      // Possibly emit a restart marker.
+      if (restart_interval > 0 && ss.restarts_to_go == 0) {
+        Flush(coding_state, bw);
+        if (!JumpToByteBoundary(bw, &state->pad_bits, state->pad_bits_end)) {
+          return SerializationStatus::ERROR;
+        }
+        EmitMarker(bw, 0xD0 + ss.next_restart_marker);
+        ss.next_restart_marker += 1;
+        ss.next_restart_marker &= 0x7;
+        ss.restarts_to_go = restart_interval;
+        memset(ss.last_dc_coeff, 0, sizeof(ss.last_dc_coeff));
+      }
+      // Encode one MCU
+      for (size_t i = 0; i < scan_info.num_components; ++i) {
+        const JPEGComponentScanInfo& si = scan_info.components[i];
+        const JPEGComponent& c = jpg.components[si.comp_idx];
+        const HuffmanCodeTable& dc_huff = state->dc_huff_table[si.dc_tbl_idx];
+        const HuffmanCodeTable& ac_huff = state->ac_huff_table[si.ac_tbl_idx];
+        int n_blocks_y = is_interleaved ? c.v_samp_factor : 1;
+        int n_blocks_x = is_interleaved ? c.h_samp_factor : 1;
+        for (int iy = 0; iy < n_blocks_y; ++iy) {
+          for (int ix = 0; ix < n_blocks_x; ++ix) {
+            int block_y = ss.mcu_y * n_blocks_y + iy;
+            int block_x = mcu_x * n_blocks_x + ix;
+            int block_idx = block_y * c.width_in_blocks + block_x;
+            if (ss.block_scan_index == ss.next_reset_point) {
+              Flush(coding_state, bw);
+              ss.next_reset_point = get_next_reset_point();
+            }
+            int num_zero_runs = 0;
+            if (ss.block_scan_index == ss.next_extra_zero_run_index) {
+              num_zero_runs = scan_info.extra_zero_runs[ss.extra_zero_runs_pos]
+                                  .num_extra_zero_runs;
+              ++ss.extra_zero_runs_pos;
+              ss.next_extra_zero_run_index = get_next_extra_zero_run_index();
+            }
+            const coeff_t* coeffs = &c.coeffs[block_idx << 6];
+            bool ok;
+            if (kMode == 0) {
+              ok = EncodeDCTBlockSequential(coeffs, dc_huff, ac_huff,
+                                            num_zero_runs,
+                                            ss.last_dc_coeff + si.comp_idx, bw);
+            } else if (kMode == 1) {
+              ok = EncodeDCTBlockProgressive(
+                  coeffs, dc_huff, ac_huff, Ss, Se, Al, num_zero_runs,
+                  coding_state, ss.last_dc_coeff + si.comp_idx, bw);
+            } else {
+              ok = EncodeRefinementBits(coeffs, ac_huff, Ss, Se, Al,
+                                        coding_state, bw);
+            }
+            if (!ok) return SerializationStatus::ERROR;
+            ++ss.block_scan_index;
+          }
+        }
+      }
+      --ss.restarts_to_go;
+    }
+  }
+  if (ss.mcu_y < MCU_rows) {
+    if (!bw->healthy) return SerializationStatus::ERROR;
+    return SerializationStatus::NEEDS_MORE_INPUT;
+  }
+  Flush(coding_state, bw);
+  if (!JumpToByteBoundary(bw, &state->pad_bits, state->pad_bits_end)) {
+    return SerializationStatus::ERROR;
+  }
+  JpegBitWriterFinish(bw);
+  ss.stage = EncodeScanState::HEAD;
+  state->scan_index++;
+  if (!bw->healthy) return SerializationStatus::ERROR;
+
+  return SerializationStatus::DONE;
+}
+
+static SerializationStatus JXL_INLINE EncodeScan(const JPEGData& jpg,
+                                                 SerializationState* state) {
+  const JPEGScanInfo& scan_info = jpg.scan_info[state->scan_index];
+  const bool is_progressive = state->is_progressive;
+  const int Al = is_progressive ? scan_info.Al : 0;
+  const int Ah = is_progressive ? scan_info.Ah : 0;
+  const int Ss = is_progressive ? scan_info.Ss : 0;
+  const int Se = is_progressive ? scan_info.Se : 63;
+  const bool need_sequential =
+      !is_progressive || (Ah == 0 && Al == 0 && Ss == 0 && Se == 63);
+  if (need_sequential) {
+    return DoEncodeScan<0>(jpg, state);
+  } else if (Ah == 0) {
+    return DoEncodeScan<1>(jpg, state);
+  } else {
+    return DoEncodeScan<2>(jpg, state);
+  }
+}
+
+SerializationStatus SerializeSection(uint8_t marker, SerializationState* state,
+                                     const JPEGData& jpg) {
+  const auto to_status = [](bool result) {
+    return result ? SerializationStatus::DONE : SerializationStatus::ERROR;
+  };
+  // TODO(eustas): add and use marker enum
+  switch (marker) {
+    case 0xC0:
+    case 0xC1:
+    case 0xC2:
+    case 0xC9:
+    case 0xCA:
+      return to_status(EncodeSOF(jpg, marker, state));
+
+    case 0xC4:
+      return to_status(EncodeDHT(jpg, state));
+
+    case 0xD0:
+    case 0xD1:
+    case 0xD2:
+    case 0xD3:
+    case 0xD4:
+    case 0xD5:
+    case 0xD6:
+    case 0xD7:
+      return to_status(EncodeRestart(marker, state));
+
+    case 0xD9:
+      return to_status(EncodeEOI(jpg, state));
+
+    case 0xDA:
+      return EncodeScan(jpg, state);
+
+    case 0xDB:
+      return to_status(EncodeDQT(jpg, state));
+
+    case 0xDD:
+      return to_status(EncodeDRI(jpg, state));
+
+    case 0xE0:
+    case 0xE1:
+    case 0xE2:
+    case 0xE3:
+    case 0xE4:
+    case 0xE5:
+    case 0xE6:
+    case 0xE7:
+    case 0xE8:
+    case 0xE9:
+    case 0xEA:
+    case 0xEB:
+    case 0xEC:
+    case 0xED:
+    case 0xEE:
+    case 0xEF:
+      return to_status(EncodeAPP(jpg, marker, state));
+
+    case 0xFE:
+      return to_status(EncodeCOM(jpg, state));
+
+    case 0xFF:
+      return to_status(EncodeInterMarkerData(jpg, state));
+
+    default:
+      return SerializationStatus::ERROR;
+  }
+}
+
+}  // namespace
+
+// TODO(veluca): add streaming support again.
+Status WriteJpeg(const JPEGData& jpg, const JPEGOutput& out) {
+  SerializationState ss;
+
+  size_t written = 0;
+  const auto maybe_push_output = [&]() -> Status {
+    if (ss.stage != SerializationState::ERROR) {
+      while (!ss.output_queue.empty()) {
+        auto& chunk = ss.output_queue.front();
+        size_t num_written = out(chunk.next, chunk.len);
+        if (num_written == 0 && chunk.len > 0) {
+          return StatusMessage(Status(StatusCode::kNotEnoughBytes),
+                               "Failed to write output");
+        }
+        chunk.len -= num_written;
+        written += num_written;
+        if (chunk.len == 0) {
+          ss.output_queue.pop_front();
+        }
+      }
+    }
+    return true;
+  };
+
+  while (true) {
+    switch (ss.stage) {
+      case SerializationState::INIT: {
+        // Valid Brunsli requires, at least, 0xD9 marker.
+        // This might happen on corrupted stream, or on unconditioned JPEGData.
+        // TODO(eustas): check D9 in the only one and is the last one.
+        if (jpg.marker_order.empty()) {
+          ss.stage = SerializationState::ERROR;
+          break;
+        }
+
+        ss.dc_huff_table.resize(kMaxHuffmanTables);
+        ss.ac_huff_table.resize(kMaxHuffmanTables);
+        if (jpg.has_zero_padding_bit) {
+          ss.pad_bits = jpg.padding_bits.data();
+          ss.pad_bits_end = ss.pad_bits + jpg.padding_bits.size();
+        }
+
+        EncodeSOI(&ss);
+        JXL_QUIET_RETURN_IF_ERROR(maybe_push_output());
+        ss.stage = SerializationState::SERIALIZE_SECTION;
+        break;
+      }
+
+      case SerializationState::SERIALIZE_SECTION: {
+        if (ss.section_index >= jpg.marker_order.size()) {
+          ss.stage = SerializationState::DONE;
+          break;
+        }
+        uint8_t marker = jpg.marker_order[ss.section_index];
+        SerializationStatus status = SerializeSection(marker, &ss, jpg);
+        if (status == SerializationStatus::ERROR) {
+          JXL_WARNING("Failed to encode marker 0x%.2x", marker);
+          ss.stage = SerializationState::ERROR;
+          break;
+        }
+        JXL_QUIET_RETURN_IF_ERROR(maybe_push_output());
+        if (status == SerializationStatus::NEEDS_MORE_INPUT) {
+          return JXL_FAILURE("Incomplete serialization data");
+        } else if (status != SerializationStatus::DONE) {
+          JXL_DASSERT(false);
+          ss.stage = SerializationState::ERROR;
+          break;
+        }
+        ++ss.section_index;
+        break;
+      }
+
+      case SerializationState::DONE:
+        JXL_ASSERT(ss.output_queue.empty());
+        return true;
+
+      case SerializationState::ERROR:
+        return JXL_FAILURE("JPEG serialization error");
+    }
+  }
+}
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.h
new file mode 100644
index 0000000000..28f5141f41
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_data_writer.h
@@ -0,0 +1,30 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for writing a JPEGData object into a jpeg byte stream.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_
+#define LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <functional>
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+// Function type used to write len bytes into buf. Returns the number of bytes
+// written.
+using JPEGOutput = std::function<size_t(const uint8_t* buf, size_t len)>;
+
+Status WriteJpeg(const JPEGData& jpg, const JPEGOutput& out);
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_output_chunk.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_output_chunk.h
new file mode 100644
index 0000000000..e003c04952
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_output_chunk.h
@@ -0,0 +1,72 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_
+#define LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <initializer_list>
+#include <memory>
+#include <vector>
+
+namespace jxl {
+namespace jpeg {
+
+/**
+ * A chunk of output data.
+ *
+ * Data producer creates OutputChunks and adds them to the end output queue.
+ * Once control flow leaves the producer code, it is considered that chunk of
+ * data is final and can not be changed; to underline this fact |next| is a
+ * const-pointer.
+ *
+ * Data consumer removes OutputChunks from the beginning of the output queue.
+ * It is possible to consume OutputChunks partially, by updating |next| and
+ * |len|.
+ *
+ * There are 2 types of output chunks:
+ *  - owning: actual data is stored in |buffer| field; producer fills data after
+ *    the instance it created; it is legal to reduce |len| to show that not all
+ *    the capacity of |buffer| is used
+ *  - non-owning: represents the data stored (owned) somewhere else
+ */
+struct OutputChunk {
+  // Non-owning
+  template <typename Bytes>
+  explicit OutputChunk(Bytes& bytes) : len(bytes.size()) {
+    // Deal both with const qualifier and data type.
+    const void* src = bytes.data();
+    next = reinterpret_cast<const uint8_t*>(src);
+  }
+
+  // Non-owning
+  OutputChunk(const uint8_t* data, size_t size) : next(data), len(size) {}
+
+  // Owning
+  explicit OutputChunk(size_t size = 0) {
+    buffer.reset(new std::vector<uint8_t>(size));
+    next = buffer->data();
+    len = size;
+  }
+
+  // Owning
+  OutputChunk(std::initializer_list<uint8_t> bytes) {
+    buffer.reset(new std::vector<uint8_t>(bytes));
+    next = buffer->data();
+    len = bytes.size();
+  }
+
+  const uint8_t* next;
+  size_t len;
+  // TODO(veluca): consider removing the unique_ptr.
+  std::unique_ptr<std::vector<uint8_t>> buffer;
+};
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_serialization_state.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_serialization_state.h
new file mode 100644
index 0000000000..a25c335b59
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/dec_jpeg_serialization_state.h
@@ -0,0 +1,95 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_
+#define LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_
+
+#include <deque>
+#include <vector>
+
+#include "lib/jxl/jpeg/dec_jpeg_output_chunk.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+struct HuffmanCodeTable {
+  int depth[256];
+  int code[256];
+};
+
+// Handles the packing of bits into output bytes.
+struct JpegBitWriter {
+  bool healthy;
+  std::deque<OutputChunk>* output;
+  OutputChunk chunk;
+  uint8_t* data;
+  size_t pos;
+  uint64_t put_buffer;
+  int put_bits;
+};
+
+// Holds data that is buffered between 8x8 blocks in progressive mode.
+struct DCTCodingState {
+  // The run length of end-of-band symbols in a progressive scan.
+  int eob_run_;
+  // The huffman table to be used when flushing the state.
+  const HuffmanCodeTable* cur_ac_huff_;
+  // The sequence of currently buffered refinement bits for a successive
+  // approximation scan (one where Ah > 0).
+  std::vector<int> refinement_bits_;
+};
+
+struct EncodeScanState {
+  enum Stage { HEAD, BODY };
+
+  Stage stage = HEAD;
+
+  int mcu_y;
+  JpegBitWriter bw;
+  coeff_t last_dc_coeff[kMaxComponents] = {0};
+  int restarts_to_go;
+  int next_restart_marker;
+  int block_scan_index;
+  DCTCodingState coding_state;
+  size_t extra_zero_runs_pos;
+  int next_extra_zero_run_index;
+  size_t next_reset_point_pos;
+  int next_reset_point;
+};
+
+struct SerializationState {
+  enum Stage {
+    INIT,
+    SERIALIZE_SECTION,
+    DONE,
+    ERROR,
+  };
+
+  Stage stage = INIT;
+
+  std::deque<OutputChunk> output_queue;
+
+  size_t section_index = 0;
+  int dht_index = 0;
+  int dqt_index = 0;
+  int app_index = 0;
+  int com_index = 0;
+  int data_index = 0;
+  int scan_index = 0;
+  std::vector<HuffmanCodeTable> dc_huff_table;
+  std::vector<HuffmanCodeTable> ac_huff_table;
+  const uint8_t* pad_bits = nullptr;
+  const uint8_t* pad_bits_end = nullptr;
+  bool seen_dri_marker = false;
+  bool is_progressive = false;
+
+  EncodeScanState scan_state;
+};
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc
new file mode 100644
index 0000000000..079c6efcea
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.cc
@@ -0,0 +1,370 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+
+#include <brotli/encode.h>
+#include <stdio.h>
+
+#include "lib/jxl/jpeg/enc_jpeg_data_reader.h"
+
+namespace jxl {
+namespace jpeg {
+
+namespace {
+
+constexpr int BITS_IN_JSAMPLE = 8;
+using ByteSpan = Span<const uint8_t>;
+
+// TODO(eustas): move to jpeg_data, to use from codec_jpg as well.
+// See if there is a canonically chunked ICC profile and mark corresponding
+// app-tags with AppMarkerType::kICC.
+Status DetectIccProfile(JPEGData& jpeg_data) {
+  JXL_DASSERT(jpeg_data.app_data.size() == jpeg_data.app_marker_type.size());
+  size_t num_icc = 0;
+  size_t num_icc_jpeg = 0;
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    const auto& app = jpeg_data.app_data[i];
+    size_t pos = 0;
+    if (app[pos++] != 0xE2) continue;
+    // At least APPn + size; otherwise it should be intermarker-data.
+    JXL_DASSERT(app.size() >= 3);
+    size_t tag_length = (app[pos] << 8) + app[pos + 1];
+    pos += 2;
+    JXL_DASSERT(app.size() == tag_length + 1);
+    // Empty payload is 2 bytes for tag length itself + signature
+    if (tag_length < 2 + sizeof kIccProfileTag) continue;
+
+    if (memcmp(&app[pos], kIccProfileTag, sizeof kIccProfileTag) != 0) continue;
+    pos += sizeof kIccProfileTag;
+    uint8_t chunk_id = app[pos++];
+    uint8_t num_chunks = app[pos++];
+    if (chunk_id != num_icc + 1) continue;
+    if (num_icc_jpeg == 0) num_icc_jpeg = num_chunks;
+    if (num_icc_jpeg != num_chunks) continue;
+    num_icc++;
+    jpeg_data.app_marker_type[i] = AppMarkerType::kICC;
+  }
+  if (num_icc != num_icc_jpeg) {
+    return JXL_FAILURE("Invalid ICC chunks");
+  }
+  return true;
+}
+
+bool GetMarkerPayload(const uint8_t* data, size_t size, ByteSpan* payload) {
+  if (size < 3) {
+    return false;
+  }
+  size_t hi = data[1];
+  size_t lo = data[2];
+  size_t internal_size = (hi << 8u) | lo;
+  // Second byte of marker is not counted towards size.
+  if (internal_size != size - 1) {
+    return false;
+  }
+  // cut second marker byte and "length" from payload.
+  *payload = ByteSpan(data, size);
+  payload->remove_prefix(3);
+  return true;
+}
+
+Status DetectBlobs(jpeg::JPEGData& jpeg_data) {
+  JXL_DASSERT(jpeg_data.app_data.size() == jpeg_data.app_marker_type.size());
+  bool have_exif = false, have_xmp = false;
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    auto& marker = jpeg_data.app_data[i];
+    if (marker.empty() || marker[0] != kApp1) {
+      continue;
+    }
+    ByteSpan payload;
+    if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) {
+      // Something is wrong with this marker; does not care.
+      continue;
+    }
+    if (!have_exif && payload.size() >= sizeof kExifTag &&
+        !memcmp(payload.data(), kExifTag, sizeof kExifTag)) {
+      jpeg_data.app_marker_type[i] = AppMarkerType::kExif;
+      have_exif = true;
+    }
+    if (!have_xmp && payload.size() >= sizeof kXMPTag &&
+        !memcmp(payload.data(), kXMPTag, sizeof kXMPTag)) {
+      jpeg_data.app_marker_type[i] = AppMarkerType::kXMP;
+      have_xmp = true;
+    }
+  }
+  return true;
+}
+
+Status ParseChunkedMarker(const jpeg::JPEGData& src, uint8_t marker_type,
+                          const ByteSpan& tag, PaddedBytes* output,
+                          bool allow_permutations = false) {
+  output->clear();
+
+  std::vector<ByteSpan> chunks;
+  std::vector<bool> presence;
+  size_t expected_number_of_parts = 0;
+  bool is_first_chunk = true;
+  size_t ordinal = 0;
+  for (const auto& marker : src.app_data) {
+    if (marker.empty() || marker[0] != marker_type) {
+      continue;
+    }
+    ByteSpan payload;
+    if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) {
+      // Something is wrong with this marker; does not care.
+      continue;
+    }
+    if ((payload.size() < tag.size()) ||
+        memcmp(payload.data(), tag.data(), tag.size()) != 0) {
+      continue;
+    }
+    payload.remove_prefix(tag.size());
+    if (payload.size() < 2) {
+      return JXL_FAILURE("Chunk is too small.");
+    }
+    uint8_t index = payload[0];
+    uint8_t total = payload[1];
+    ordinal++;
+    if (!allow_permutations) {
+      if (index != ordinal) return JXL_FAILURE("Invalid chunk order.");
+    }
+
+    payload.remove_prefix(2);
+
+    JXL_RETURN_IF_ERROR(total != 0);
+    if (is_first_chunk) {
+      is_first_chunk = false;
+      expected_number_of_parts = total;
+      // 1-based indices; 0-th element is added for convenience.
+      chunks.resize(total + 1);
+      presence.resize(total + 1);
+    } else {
+      JXL_RETURN_IF_ERROR(expected_number_of_parts == total);
+    }
+
+    if (index == 0 || index > total) {
+      return JXL_FAILURE("Invalid chunk index.");
+    }
+
+    if (presence[index]) {
+      return JXL_FAILURE("Duplicate chunk.");
+    }
+    presence[index] = true;
+    chunks[index] = payload;
+  }
+
+  for (size_t i = 0; i < expected_number_of_parts; ++i) {
+    // 0-th element is not used.
+    size_t index = i + 1;
+    if (!presence[index]) {
+      return JXL_FAILURE("Missing chunk.");
+    }
+    output->append(chunks[index]);
+  }
+
+  return true;
+}
+
+Status SetBlobsFromJpegData(const jpeg::JPEGData& jpeg_data, Blobs* blobs) {
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    auto& marker = jpeg_data.app_data[i];
+    if (marker.empty() || marker[0] != kApp1) {
+      continue;
+    }
+    ByteSpan payload;
+    if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) {
+      // Something is wrong with this marker; does not care.
+      continue;
+    }
+    if (payload.size() >= sizeof kExifTag &&
+        !memcmp(payload.data(), kExifTag, sizeof kExifTag)) {
+      if (blobs->exif.empty()) {
+        blobs->exif.resize(payload.size() - sizeof kExifTag);
+        memcpy(blobs->exif.data(), payload.data() + sizeof kExifTag,
+               payload.size() - sizeof kExifTag);
+      } else {
+        JXL_WARNING(
+            "ReJPEG: multiple Exif blobs, storing only first one in the JPEG "
+            "XL container\n");
+      }
+    }
+    if (payload.size() >= sizeof kXMPTag &&
+        !memcmp(payload.data(), kXMPTag, sizeof kXMPTag)) {
+      if (blobs->xmp.empty()) {
+        blobs->xmp.resize(payload.size() - sizeof kXMPTag);
+        memcpy(blobs->xmp.data(), payload.data() + sizeof kXMPTag,
+               payload.size() - sizeof kXMPTag);
+      } else {
+        JXL_WARNING(
+            "ReJPEG: multiple XMP blobs, storing only first one in the JPEG "
+            "XL container\n");
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+Status SetColorEncodingFromJpegData(const jpeg::JPEGData& jpg,
+                                    ColorEncoding* color_encoding) {
+  PaddedBytes icc_profile;
+  if (!ParseChunkedMarker(jpg, kApp2, ByteSpan(kIccProfileTag), &icc_profile)) {
+    JXL_WARNING("ReJPEG: corrupted ICC profile\n");
+    icc_profile.clear();
+  }
+
+  if (icc_profile.empty()) {
+    bool is_gray = (jpg.components.size() == 1);
+    *color_encoding = ColorEncoding::SRGB(is_gray);
+    return true;
+  }
+
+  return color_encoding->SetICC(std::move(icc_profile));
+}
+
+Status EncodeJPEGData(JPEGData& jpeg_data, PaddedBytes* bytes) {
+  jpeg_data.app_marker_type.resize(jpeg_data.app_data.size(),
+                                   AppMarkerType::kUnknown);
+  JXL_RETURN_IF_ERROR(DetectIccProfile(jpeg_data));
+  JXL_RETURN_IF_ERROR(DetectBlobs(jpeg_data));
+  BitWriter writer;
+  JXL_RETURN_IF_ERROR(Bundle::Write(jpeg_data, &writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  *bytes = std::move(writer).TakeBytes();
+  BrotliEncoderState* brotli_enc =
+      BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
+  BrotliEncoderSetParameter(brotli_enc, BROTLI_PARAM_QUALITY, 11);
+  size_t total_data = 0;
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    if (jpeg_data.app_marker_type[i] != AppMarkerType::kUnknown) {
+      continue;
+    }
+    total_data += jpeg_data.app_data[i].size();
+  }
+  for (size_t i = 0; i < jpeg_data.com_data.size(); i++) {
+    total_data += jpeg_data.com_data[i].size();
+  }
+  for (size_t i = 0; i < jpeg_data.inter_marker_data.size(); i++) {
+    total_data += jpeg_data.inter_marker_data[i].size();
+  }
+  total_data += jpeg_data.tail_data.size();
+  size_t initial_size = bytes->size();
+  size_t brotli_capacity = BrotliEncoderMaxCompressedSize(total_data);
+  BrotliEncoderSetParameter(brotli_enc, BROTLI_PARAM_SIZE_HINT, total_data);
+  bytes->resize(bytes->size() + brotli_capacity);
+  size_t enc_size = 0;
+  auto br_append = [&](const std::vector<uint8_t>& data, bool last) {
+    size_t available_in = data.size();
+    const uint8_t* in = data.data();
+    uint8_t* out = &(*bytes)[initial_size + enc_size];
+    do {
+      JXL_CHECK(BrotliEncoderCompressStream(
+          brotli_enc, last ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+          &available_in, &in, &brotli_capacity, &out, &enc_size));
+    } while (BrotliEncoderHasMoreOutput(brotli_enc) || available_in > 0);
+  };
+
+  for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+    if (jpeg_data.app_marker_type[i] != AppMarkerType::kUnknown) {
+      continue;
+    }
+    br_append(jpeg_data.app_data[i], /*last=*/false);
+  }
+  for (size_t i = 0; i < jpeg_data.com_data.size(); i++) {
+    br_append(jpeg_data.com_data[i], /*last=*/false);
+  }
+  for (size_t i = 0; i < jpeg_data.inter_marker_data.size(); i++) {
+    br_append(jpeg_data.inter_marker_data[i], /*last=*/false);
+  }
+  br_append(jpeg_data.tail_data, /*last=*/true);
+  BrotliEncoderDestroyInstance(brotli_enc);
+  bytes->resize(initial_size + enc_size);
+  return true;
+}
+
+Status DecodeImageJPG(const Span<const uint8_t> bytes, CodecInOut* io) {
+  io->frames.clear();
+  io->frames.reserve(1);
+  io->frames.emplace_back(&io->metadata.m);
+  io->Main().jpeg_data = make_unique<jpeg::JPEGData>();
+  jpeg::JPEGData* jpeg_data = io->Main().jpeg_data.get();
+  if (!jpeg::ReadJpeg(bytes.data(), bytes.size(), jpeg::JpegReadMode::kReadAll,
+                      jpeg_data)) {
+    return JXL_FAILURE("Error reading JPEG");
+  }
+  JXL_RETURN_IF_ERROR(
+      SetColorEncodingFromJpegData(*jpeg_data, &io->metadata.m.color_encoding));
+  JXL_RETURN_IF_ERROR(SetBlobsFromJpegData(*jpeg_data, &io->blobs));
+  size_t nbcomp = jpeg_data->components.size();
+  if (nbcomp != 1 && nbcomp != 3) {
+    return JXL_FAILURE("Cannot recompress JPEGs with neither 1 nor 3 channels");
+  }
+  YCbCrChromaSubsampling cs;
+  if (nbcomp == 3) {
+    uint8_t hsample[3], vsample[3];
+    for (size_t i = 0; i < nbcomp; i++) {
+      hsample[i] = jpeg_data->components[i].h_samp_factor;
+      vsample[i] = jpeg_data->components[i].v_samp_factor;
+    }
+    JXL_RETURN_IF_ERROR(cs.Set(hsample, vsample));
+  } else if (nbcomp == 1) {
+    uint8_t hsample[3], vsample[3];
+    for (size_t i = 0; i < 3; i++) {
+      hsample[i] = jpeg_data->components[0].h_samp_factor;
+      vsample[i] = jpeg_data->components[0].v_samp_factor;
+    }
+    JXL_RETURN_IF_ERROR(cs.Set(hsample, vsample));
+  }
+  bool is_rgb = false;
+  {
+    const auto& markers = jpeg_data->marker_order;
+    // If there is a JFIF marker, this is YCbCr. Otherwise...
+    if (std::find(markers.begin(), markers.end(), 0xE0) == markers.end()) {
+      // Try to find an 'Adobe' marker.
+      size_t app_markers = 0;
+      size_t i = 0;
+      for (; i < markers.size(); i++) {
+        // This is an APP marker.
+        if ((markers[i] & 0xF0) == 0xE0) {
+          JXL_CHECK(app_markers < jpeg_data->app_data.size());
+          // APP14 marker
+          if (markers[i] == 0xEE) {
+            const auto& data = jpeg_data->app_data[app_markers];
+            if (data.size() == 15 && data[3] == 'A' && data[4] == 'd' &&
+                data[5] == 'o' && data[6] == 'b' && data[7] == 'e') {
+              // 'Adobe' marker.
+              is_rgb = data[14] == 0;
+              break;
+            }
+          }
+          app_markers++;
+        }
+      }
+
+      if (i == markers.size()) {
+        // No 'Adobe' marker, guess from component IDs.
+        is_rgb = nbcomp == 3 && jpeg_data->components[0].id == 'R' &&
+                 jpeg_data->components[1].id == 'G' &&
+                 jpeg_data->components[2].id == 'B';
+      }
+    }
+  }
+
+  io->Main().chroma_subsampling = cs;
+  io->Main().color_transform =
+      (!is_rgb || nbcomp == 1) ? ColorTransform::kYCbCr : ColorTransform::kNone;
+
+  io->metadata.m.SetIntensityTarget(
+      io->target_nits != 0 ? io->target_nits : kDefaultIntensityTarget);
+  io->metadata.m.SetUintSamples(BITS_IN_JSAMPLE);
+  io->SetFromImage(Image3F(jpeg_data->width, jpeg_data->height),
+                   io->metadata.m.color_encoding);
+  SetIntensityTarget(io);
+  return true;
+}
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.h
new file mode 100644
index 0000000000..b80ade776f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_ENC_JPEG_DATA_H_
+#define LIB_JXL_JPEG_ENC_JPEG_DATA_H_
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+Status EncodeJPEGData(JPEGData& jpeg_data, PaddedBytes* bytes);
+
+Status SetColorEncodingFromJpegData(const jpeg::JPEGData& jpg,
+                                    ColorEncoding* color_encoding);
+
+/**
+ * Decodes bytes containing JPEG codestream into a CodecInOut as coefficients
+ * only, for lossless JPEG transcoding.
+ */
+Status DecodeImageJPG(const Span<const uint8_t> bytes, CodecInOut* io);
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_ENC_JPEG_DATA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc
new file mode 100644
index 0000000000..6e24557a27
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.cc
@@ -0,0 +1,1142 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/enc_jpeg_data_reader.h"
+
+#include <inttypes.h>
+#include <string.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/jpeg/enc_jpeg_huffman_decode.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+// By default only print debug messages when JXL_DEBUG_ON_ERROR is enabled.
+#ifndef JXL_DEBUG_JPEG_DATA_READER
+#define JXL_DEBUG_JPEG_DATA_READER JXL_DEBUG_ON_ERROR
+#endif  // JXL_DEBUG_JPEG_DATA_READER
+
+#define JXL_JPEG_DEBUG(format, ...) \
+  JXL_DEBUG(JXL_DEBUG_JPEG_DATA_READER, format, ##__VA_ARGS__)
+
+namespace jxl {
+namespace jpeg {
+
+namespace {
+static const int kBrunsliMaxSampling = 15;
+static const size_t kBrunsliMaxNumBlocks = 1ull << 24;
+
+// Macros for commonly used error conditions.
+
+#define JXL_JPEG_VERIFY_LEN(n)                                               \
+  if (*pos + (n) > len) {                                                    \
+    JXL_JPEG_DEBUG("Unexpected end of input: pos=%zu need=%d len=%zu", *pos, \
+                   static_cast<int>(n), len);                                \
+    jpg->error = JPEGReadError::UNEXPECTED_EOF;                              \
+    return false;                                                            \
+  }
+
+#define JXL_JPEG_VERIFY_INPUT(var, low, high, code)                \
+  if ((var) < (low) || (var) > (high)) {                           \
+    JXL_JPEG_DEBUG("Invalid " #var ": %d", static_cast<int>(var)); \
+    jpg->error = JPEGReadError::INVALID_##code;                    \
+    return false;                                                  \
+  }
+
+#define JXL_JPEG_VERIFY_MARKER_END()                                 \
+  if (start_pos + marker_len != *pos) {                              \
+    JXL_JPEG_DEBUG("Invalid marker length: declared=%zu actual=%zu", \
+                   marker_len, (*pos - start_pos));                  \
+    jpg->error = JPEGReadError::WRONG_MARKER_SIZE;                   \
+    return false;                                                    \
+  }
+
+#define JXL_JPEG_EXPECT_MARKER()                                      \
+  if (pos + 2 > len || data[pos] != 0xff) {                           \
+    JXL_JPEG_DEBUG(                                                   \
+        "Marker byte (0xff) expected, found: 0x%.2x pos=%zu len=%zu", \
+        (pos < len ? data[pos] : 0), pos, len);                       \
+    jpg->error = JPEGReadError::MARKER_BYTE_NOT_FOUND;                \
+    return false;                                                     \
+  }
+
+inline int ReadUint8(const uint8_t* data, size_t* pos) {
+  return data[(*pos)++];
+}
+
+inline int ReadUint16(const uint8_t* data, size_t* pos) {
+  int v = (data[*pos] << 8) + data[*pos + 1];
+  *pos += 2;
+  return v;
+}
+
+// Reads the Start of Frame (SOF) marker segment and fills in *jpg with the
+// parsed data.
+bool ProcessSOF(const uint8_t* data, const size_t len, JpegReadMode mode,
+                size_t* pos, JPEGData* jpg) {
+  if (jpg->width != 0) {
+    JXL_JPEG_DEBUG("Duplicate SOF marker.");
+    jpg->error = JPEGReadError::DUPLICATE_SOF;
+    return false;
+  }
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(8);
+  size_t marker_len = ReadUint16(data, pos);
+  int precision = ReadUint8(data, pos);
+  int height = ReadUint16(data, pos);
+  int width = ReadUint16(data, pos);
+  int num_components = ReadUint8(data, pos);
+  JXL_JPEG_VERIFY_INPUT(precision, 8, 8, PRECISION);
+  JXL_JPEG_VERIFY_INPUT(height, 1, kMaxDimPixels, HEIGHT);
+  JXL_JPEG_VERIFY_INPUT(width, 1, kMaxDimPixels, WIDTH);
+  JXL_JPEG_VERIFY_INPUT(num_components, 1, kMaxComponents, NUMCOMP);
+  JXL_JPEG_VERIFY_LEN(3 * num_components);
+  jpg->height = height;
+  jpg->width = width;
+  jpg->components.resize(num_components);
+
+  // Read sampling factors and quant table index for each component.
+  std::vector<bool> ids_seen(256, false);
+  int max_h_samp_factor = 1;
+  int max_v_samp_factor = 1;
+  for (size_t i = 0; i < jpg->components.size(); ++i) {
+    const int id = ReadUint8(data, pos);
+    if (ids_seen[id]) {  // (cf. section B.2.2, syntax of Ci)
+      JXL_JPEG_DEBUG("Duplicate ID %d in SOF.", id);
+      jpg->error = JPEGReadError::DUPLICATE_COMPONENT_ID;
+      return false;
+    }
+    ids_seen[id] = true;
+    jpg->components[i].id = id;
+    int factor = ReadUint8(data, pos);
+    int h_samp_factor = factor >> 4;
+    int v_samp_factor = factor & 0xf;
+    JXL_JPEG_VERIFY_INPUT(h_samp_factor, 1, kBrunsliMaxSampling, SAMP_FACTOR);
+    JXL_JPEG_VERIFY_INPUT(v_samp_factor, 1, kBrunsliMaxSampling, SAMP_FACTOR);
+    jpg->components[i].h_samp_factor = h_samp_factor;
+    jpg->components[i].v_samp_factor = v_samp_factor;
+    jpg->components[i].quant_idx = ReadUint8(data, pos);
+    max_h_samp_factor = std::max(max_h_samp_factor, h_samp_factor);
+    max_v_samp_factor = std::max(max_v_samp_factor, v_samp_factor);
+  }
+
+  // We have checked above that none of the sampling factors are 0, so the max
+  // sampling factors can not be 0.
+  int MCU_rows = DivCeil(jpg->height, max_v_samp_factor * 8);
+  int MCU_cols = DivCeil(jpg->width, max_h_samp_factor * 8);
+  // Compute the block dimensions for each component.
+  for (size_t i = 0; i < jpg->components.size(); ++i) {
+    JPEGComponent* c = &jpg->components[i];
+    if (max_h_samp_factor % c->h_samp_factor != 0 ||
+        max_v_samp_factor % c->v_samp_factor != 0) {
+      JXL_JPEG_DEBUG("Non-integral subsampling ratios.");
+      jpg->error = JPEGReadError::INVALID_SAMPLING_FACTORS;
+      return false;
+    }
+    c->width_in_blocks = MCU_cols * c->h_samp_factor;
+    c->height_in_blocks = MCU_rows * c->v_samp_factor;
+    const uint64_t num_blocks =
+        static_cast<uint64_t>(c->width_in_blocks) * c->height_in_blocks;
+    if (num_blocks > kBrunsliMaxNumBlocks) {
+      JXL_JPEG_DEBUG("Image too large.");
+      jpg->error = JPEGReadError::IMAGE_TOO_LARGE;
+      return false;
+    }
+    if (mode == JpegReadMode::kReadAll) {
+      c->coeffs.resize(num_blocks * kDCTBlockSize);
+    }
+  }
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Reads the Start of Scan (SOS) marker segment and fills in *scan_info with the
+// parsed data.
+bool ProcessSOS(const uint8_t* data, const size_t len, size_t* pos,
+                JPEGData* jpg) {
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(3);
+  size_t marker_len = ReadUint16(data, pos);
+  size_t comps_in_scan = ReadUint8(data, pos);
+  JXL_JPEG_VERIFY_INPUT(comps_in_scan, 1, jpg->components.size(),
+                        COMPS_IN_SCAN);
+
+  JPEGScanInfo scan_info;
+  scan_info.num_components = comps_in_scan;
+  JXL_JPEG_VERIFY_LEN(2 * comps_in_scan);
+  std::vector<bool> ids_seen(256, false);
+  for (size_t i = 0; i < comps_in_scan; ++i) {
+    uint32_t id = ReadUint8(data, pos);
+    if (ids_seen[id]) {  // (cf. section B.2.3, regarding CSj)
+      JXL_JPEG_DEBUG("Duplicate ID %d in SOS.", id);
+      jpg->error = JPEGReadError::DUPLICATE_COMPONENT_ID;
+      return false;
+    }
+    ids_seen[id] = true;
+    bool found_index = false;
+    for (size_t j = 0; j < jpg->components.size(); ++j) {
+      if (jpg->components[j].id == id) {
+        scan_info.components[i].comp_idx = j;
+        found_index = true;
+      }
+    }
+    if (!found_index) {
+      JXL_JPEG_DEBUG("SOS marker: Could not find component with id %d", id);
+      jpg->error = JPEGReadError::COMPONENT_NOT_FOUND;
+      return false;
+    }
+    int c = ReadUint8(data, pos);
+    int dc_tbl_idx = c >> 4;
+    int ac_tbl_idx = c & 0xf;
+    JXL_JPEG_VERIFY_INPUT(dc_tbl_idx, 0, 3, HUFFMAN_INDEX);
+    JXL_JPEG_VERIFY_INPUT(ac_tbl_idx, 0, 3, HUFFMAN_INDEX);
+    scan_info.components[i].dc_tbl_idx = dc_tbl_idx;
+    scan_info.components[i].ac_tbl_idx = ac_tbl_idx;
+  }
+  JXL_JPEG_VERIFY_LEN(3);
+  scan_info.Ss = ReadUint8(data, pos);
+  scan_info.Se = ReadUint8(data, pos);
+  JXL_JPEG_VERIFY_INPUT(static_cast<int>(scan_info.Ss), 0, 63, START_OF_SCAN);
+  JXL_JPEG_VERIFY_INPUT(scan_info.Se, scan_info.Ss, 63, END_OF_SCAN);
+  int c = ReadUint8(data, pos);
+  scan_info.Ah = c >> 4;
+  scan_info.Al = c & 0xf;
+  if (scan_info.Ah != 0 && scan_info.Al != scan_info.Ah - 1) {
+    // section G.1.1.1.2 : Successive approximation control only improves
+    // by one bit at a time. But it's not always respected, so we just issue
+    // a warning.
+    JXL_WARNING("Invalid progressive parameters: Al=%d Ah=%d", scan_info.Al,
+                scan_info.Ah);
+  }
+  // Check that all the Huffman tables needed for this scan are defined.
+  for (size_t i = 0; i < comps_in_scan; ++i) {
+    bool found_dc_table = false;
+    bool found_ac_table = false;
+    for (size_t j = 0; j < jpg->huffman_code.size(); ++j) {
+      uint32_t slot_id = jpg->huffman_code[j].slot_id;
+      if (slot_id == scan_info.components[i].dc_tbl_idx) {
+        found_dc_table = true;
+      } else if (slot_id == scan_info.components[i].ac_tbl_idx + 16) {
+        found_ac_table = true;
+      }
+    }
+    if (scan_info.Ss == 0 && !found_dc_table) {
+      JXL_JPEG_DEBUG(
+          "SOS marker: Could not find DC Huffman table with index %d",
+          scan_info.components[i].dc_tbl_idx);
+      jpg->error = JPEGReadError::HUFFMAN_TABLE_NOT_FOUND;
+      return false;
+    }
+    if (scan_info.Se > 0 && !found_ac_table) {
+      JXL_JPEG_DEBUG(
+          "SOS marker: Could not find AC Huffman table with index %d",
+          scan_info.components[i].ac_tbl_idx);
+      jpg->error = JPEGReadError::HUFFMAN_TABLE_NOT_FOUND;
+      return false;
+    }
+  }
+  jpg->scan_info.push_back(scan_info);
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Reads the Define Huffman Table (DHT) marker segment and fills in *jpg with
+// the parsed data. Builds the Huffman decoding table in either dc_huff_lut or
+// ac_huff_lut, depending on the type and solt_id of Huffman code being read.
+bool ProcessDHT(const uint8_t* data, const size_t len, JpegReadMode mode,
+                std::vector<HuffmanTableEntry>* dc_huff_lut,
+                std::vector<HuffmanTableEntry>* ac_huff_lut, size_t* pos,
+                JPEGData* jpg) {
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(2);
+  size_t marker_len = ReadUint16(data, pos);
+  if (marker_len == 2) {
+    JXL_JPEG_DEBUG("DHT marker: no Huffman table found");
+    jpg->error = JPEGReadError::EMPTY_DHT;
+    return false;
+  }
+  while (*pos < start_pos + marker_len) {
+    JXL_JPEG_VERIFY_LEN(1 + kJpegHuffmanMaxBitLength);
+    JPEGHuffmanCode huff;
+    huff.slot_id = ReadUint8(data, pos);
+    int huffman_index = huff.slot_id;
+    int is_ac_table = (huff.slot_id & 0x10) != 0;
+    HuffmanTableEntry* huff_lut;
+    if (is_ac_table) {
+      huffman_index -= 0x10;
+      JXL_JPEG_VERIFY_INPUT(huffman_index, 0, 3, HUFFMAN_INDEX);
+      huff_lut = &(*ac_huff_lut)[huffman_index * kJpegHuffmanLutSize];
+    } else {
+      JXL_JPEG_VERIFY_INPUT(huffman_index, 0, 3, HUFFMAN_INDEX);
+      huff_lut = &(*dc_huff_lut)[huffman_index * kJpegHuffmanLutSize];
+    }
+    huff.counts[0] = 0;
+    int total_count = 0;
+    int space = 1 << kJpegHuffmanMaxBitLength;
+    int max_depth = 1;
+    for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+      int count = ReadUint8(data, pos);
+      if (count != 0) {
+        max_depth = i;
+      }
+      huff.counts[i] = count;
+      total_count += count;
+      space -= count * (1 << (kJpegHuffmanMaxBitLength - i));
+    }
+    if (is_ac_table) {
+      JXL_JPEG_VERIFY_INPUT(total_count, 0, kJpegHuffmanAlphabetSize,
+                            HUFFMAN_CODE);
+    } else {
+      JXL_JPEG_VERIFY_INPUT(total_count, 0, kJpegDCAlphabetSize, HUFFMAN_CODE);
+    }
+    JXL_JPEG_VERIFY_LEN(total_count);
+    std::vector<bool> values_seen(256, false);
+    for (int i = 0; i < total_count; ++i) {
+      int value = ReadUint8(data, pos);
+      if (!is_ac_table) {
+        JXL_JPEG_VERIFY_INPUT(value, 0, kJpegDCAlphabetSize - 1, HUFFMAN_CODE);
+      }
+      if (values_seen[value]) {
+        JXL_JPEG_DEBUG("Duplicate Huffman code value %d", value);
+        jpg->error = JPEGReadError::INVALID_HUFFMAN_CODE;
+        return false;
+      }
+      values_seen[value] = true;
+      huff.values[i] = value;
+    }
+    // Add an invalid symbol that will have the all 1 code.
+    ++huff.counts[max_depth];
+    huff.values[total_count] = kJpegHuffmanAlphabetSize;
+    space -= (1 << (kJpegHuffmanMaxBitLength - max_depth));
+    if (space < 0) {
+      JXL_JPEG_DEBUG("Invalid Huffman code lengths.");
+      jpg->error = JPEGReadError::INVALID_HUFFMAN_CODE;
+      return false;
+    } else if (space > 0 && huff_lut[0].value != 0xffff) {
+      // Re-initialize the values to an invalid symbol so that we can recognize
+      // it when reading the bit stream using a Huffman code with space > 0.
+      for (int i = 0; i < kJpegHuffmanLutSize; ++i) {
+        huff_lut[i].bits = 0;
+        huff_lut[i].value = 0xffff;
+      }
+    }
+    huff.is_last = (*pos == start_pos + marker_len);
+    if (mode == JpegReadMode::kReadAll) {
+      BuildJpegHuffmanTable(&huff.counts[0], &huff.values[0], huff_lut);
+    }
+    jpg->huffman_code.push_back(huff);
+  }
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Reads the Define Quantization Table (DQT) marker segment and fills in *jpg
+// with the parsed data.
+bool ProcessDQT(const uint8_t* data, const size_t len, size_t* pos,
+                JPEGData* jpg) {
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(2);
+  size_t marker_len = ReadUint16(data, pos);
+  if (marker_len == 2) {
+    JXL_JPEG_DEBUG("DQT marker: no quantization table found");
+    jpg->error = JPEGReadError::EMPTY_DQT;
+    return false;
+  }
+  while (*pos < start_pos + marker_len && jpg->quant.size() < kMaxQuantTables) {
+    JXL_JPEG_VERIFY_LEN(1);
+    int quant_table_index = ReadUint8(data, pos);
+    int quant_table_precision = quant_table_index >> 4;
+    JXL_JPEG_VERIFY_INPUT(quant_table_precision, 0, 1, QUANT_TBL_PRECISION);
+    quant_table_index &= 0xf;
+    JXL_JPEG_VERIFY_INPUT(quant_table_index, 0, 3, QUANT_TBL_INDEX);
+    JXL_JPEG_VERIFY_LEN((quant_table_precision + 1) * kDCTBlockSize);
+    JPEGQuantTable table;
+    table.index = quant_table_index;
+    table.precision = quant_table_precision;
+    for (size_t i = 0; i < kDCTBlockSize; ++i) {
+      int quant_val =
+          quant_table_precision ? ReadUint16(data, pos) : ReadUint8(data, pos);
+      JXL_JPEG_VERIFY_INPUT(quant_val, 1, 65535, QUANT_VAL);
+      table.values[kJPEGNaturalOrder[i]] = quant_val;
+    }
+    table.is_last = (*pos == start_pos + marker_len);
+    jpg->quant.push_back(table);
+  }
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Reads the DRI marker and saves the restart interval into *jpg.
+bool ProcessDRI(const uint8_t* data, const size_t len, size_t* pos,
+                bool* found_dri, JPEGData* jpg) {
+  if (*found_dri) {
+    JXL_JPEG_DEBUG("Duplicate DRI marker.");
+    jpg->error = JPEGReadError::DUPLICATE_DRI;
+    return false;
+  }
+  *found_dri = true;
+  const size_t start_pos = *pos;
+  JXL_JPEG_VERIFY_LEN(4);
+  size_t marker_len = ReadUint16(data, pos);
+  int restart_interval = ReadUint16(data, pos);
+  jpg->restart_interval = restart_interval;
+  JXL_JPEG_VERIFY_MARKER_END();
+  return true;
+}
+
+// Saves the APP marker segment as a string to *jpg.
+bool ProcessAPP(const uint8_t* data, const size_t len, size_t* pos,
+                JPEGData* jpg) {
+  JXL_JPEG_VERIFY_LEN(2);
+  size_t marker_len = ReadUint16(data, pos);
+  JXL_JPEG_VERIFY_INPUT(marker_len, 2, 65535, MARKER_LEN);
+  JXL_JPEG_VERIFY_LEN(marker_len - 2);
+  JXL_DASSERT(*pos >= 3);
+  // Save the marker type together with the app data.
+  const uint8_t* app_str_start = data + *pos - 3;
+  std::vector<uint8_t> app_str(app_str_start, app_str_start + marker_len + 1);
+  *pos += marker_len - 2;
+  jpg->app_data.push_back(app_str);
+  return true;
+}
+
+// Saves the COM marker segment as a string to *jpg.
+bool ProcessCOM(const uint8_t* data, const size_t len, size_t* pos,
+                JPEGData* jpg) {
+  JXL_JPEG_VERIFY_LEN(2);
+  size_t marker_len = ReadUint16(data, pos);
+  JXL_JPEG_VERIFY_INPUT(marker_len, 2, 65535, MARKER_LEN);
+  JXL_JPEG_VERIFY_LEN(marker_len - 2);
+  const uint8_t* com_str_start = data + *pos - 3;
+  std::vector<uint8_t> com_str(com_str_start, com_str_start + marker_len + 1);
+  *pos += marker_len - 2;
+  jpg->com_data.push_back(com_str);
+  return true;
+}
+
+// Helper structure to read bits from the entropy coded data segment.
+struct BitReaderState {
+  BitReaderState(const uint8_t* data, const size_t len, size_t pos)
+      : data_(data), len_(len) {
+    Reset(pos);
+  }
+
+  void Reset(size_t pos) {
+    pos_ = pos;
+    val_ = 0;
+    bits_left_ = 0;
+    next_marker_pos_ = len_ - 2;
+    FillBitWindow();
+  }
+
+  // Returns the next byte and skips the 0xff/0x00 escape sequences.
+  uint8_t GetNextByte() {
+    if (pos_ >= next_marker_pos_) {
+      ++pos_;
+      return 0;
+    }
+    uint8_t c = data_[pos_++];
+    if (c == 0xff) {
+      uint8_t escape = data_[pos_];
+      if (escape == 0) {
+        ++pos_;
+      } else {
+        // 0xff was followed by a non-zero byte, which means that we found the
+        // start of the next marker segment.
+        next_marker_pos_ = pos_ - 1;
+      }
+    }
+    return c;
+  }
+
+  void FillBitWindow() {
+    if (bits_left_ <= 16) {
+      while (bits_left_ <= 56) {
+        val_ <<= 8;
+        val_ |= (uint64_t)GetNextByte();
+        bits_left_ += 8;
+      }
+    }
+  }
+
+  int ReadBits(int nbits) {
+    FillBitWindow();
+    uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1);
+    bits_left_ -= nbits;
+    return val;
+  }
+
+  // Sets *pos to the next stream position where parsing should continue.
+  // Enqueue the padding bits seen (0 or 1).
+  // Returns false if there is inconsistent or invalid padding or the stream
+  // ended too early.
+  bool FinishStream(JPEGData* jpg, size_t* pos) {
+    int npadbits = bits_left_ & 7;
+    if (npadbits > 0) {
+      uint64_t padmask = (1ULL << npadbits) - 1;
+      uint64_t padbits = (val_ >> (bits_left_ - npadbits)) & padmask;
+      if (padbits != padmask) {
+        jpg->has_zero_padding_bit = true;
+      }
+      for (int i = npadbits - 1; i >= 0; --i) {
+        jpg->padding_bits.push_back((padbits >> i) & 1);
+      }
+    }
+    // Give back some bytes that we did not use.
+    int unused_bytes_left = bits_left_ >> 3;
+    while (unused_bytes_left-- > 0) {
+      --pos_;
+      // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape
+      // sequence, and if yes, we need to give back one more byte.
+      if (pos_ < next_marker_pos_ && data_[pos_] == 0 &&
+          data_[pos_ - 1] == 0xff) {
+        --pos_;
+      }
+    }
+    if (pos_ > next_marker_pos_) {
+      // Data ran out before the scan was complete.
+      JXL_JPEG_DEBUG("Unexpected end of scan.");
+      return false;
+    }
+    *pos = pos_;
+    return true;
+  }
+
+  const uint8_t* data_;
+  const size_t len_;
+  size_t pos_;
+  uint64_t val_;
+  int bits_left_;
+  size_t next_marker_pos_;
+};
+
+// Returns the next Huffman-coded symbol.
+int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) {
+  int nbits;
+  br->FillBitWindow();
+  int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff;
+  table += val;
+  nbits = table->bits - 8;
+  if (nbits > 0) {
+    br->bits_left_ -= 8;
+    table += table->value;
+    val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1);
+    table += val;
+  }
+  br->bits_left_ -= table->bits;
+  return table->value;
+}
+
+/**
+ * Returns the DC diff or AC value for extra bits value x and prefix code s.
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.1 – Difference magnitude categories for DC coding
+ *  SSSS | DIFF values
+ * ------+--------------------------
+ *     0 | 0
+ *     1 | –1, 1
+ *     2 | –3, –2, 2, 3
+ *     3 | –7..–4, 4..7
+ * ......|..........................
+ *    11 | –2047..–1024, 1024..2047
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.2 – Categories assigned to coefficient values
+ * [ Same as Table F.1, but does not include SSSS equal to 0 and 11]
+ *
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * F.1.2.1.1 Structure of DC code table
+ * For each category,... additional bits... appended... to uniquely identify
+ * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF
+ * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are
+ * appended... Most significant bit... is 0 for negative differences and 1 for
+ * positive differences.
+ *
+ * In other words the upper half of extra bits range represents DIFF as is.
+ * The lower half represents the negative DIFFs with an offset.
+ */
+int HuffExtend(int x, int s) {
+  JXL_DASSERT(s >= 1);
+  int half = 1 << (s - 1);
+  if (x >= half) {
+    JXL_DASSERT(x < (1 << s));
+    return x;
+  } else {
+    return x - (1 << s) + 1;
+  }
+}
+
+// Decodes one 8x8 block of DCT coefficients from the bit stream.
+bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff,
+                    const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+                    int* eobrun, bool* reset_state, int* num_zero_runs,
+                    BitReaderState* br, JPEGData* jpg, coeff_t* last_dc_coeff,
+                    coeff_t* coeffs) {
+  // Nowadays multiplication is even faster than variable shift.
+  int Am = 1 << Al;
+  bool eobrun_allowed = Ss > 0;
+  if (Ss == 0) {
+    int s = ReadSymbol(dc_huff, br);
+    if (s >= kJpegDCAlphabetSize) {
+      JXL_JPEG_DEBUG("Invalid Huffman symbol %d  for DC coefficient.", s);
+      jpg->error = JPEGReadError::INVALID_SYMBOL;
+      return false;
+    }
+    int diff = 0;
+    if (s > 0) {
+      int bits = br->ReadBits(s);
+      diff = HuffExtend(bits, s);
+    }
+    int coeff = diff + *last_dc_coeff;
+    const int dc_coeff = coeff * Am;
+    coeffs[0] = dc_coeff;
+    // TODO(eustas): is there a more elegant / explicit way to check this?
+    if (dc_coeff != coeffs[0]) {
+      JXL_JPEG_DEBUG("Invalid DC coefficient %d", dc_coeff);
+      jpg->error = JPEGReadError::NON_REPRESENTABLE_DC_COEFF;
+      return false;
+    }
+    *last_dc_coeff = coeff;
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  if (*eobrun > 0) {
+    --(*eobrun);
+    return true;
+  }
+  *num_zero_runs = 0;
+  for (int k = Ss; k <= Se; k++) {
+    int sr = ReadSymbol(ac_huff, br);
+    if (sr >= kJpegHuffmanAlphabetSize) {
+      JXL_JPEG_DEBUG("Invalid Huffman symbol %d for AC coefficient %d", sr, k);
+      jpg->error = JPEGReadError::INVALID_SYMBOL;
+      return false;
+    }
+    int r = sr >> 4;
+    int s = sr & 15;
+    if (s > 0) {
+      k += r;
+      if (k > Se) {
+        JXL_JPEG_DEBUG("Out-of-band coefficient %d band was %d-%d", k, Ss, Se);
+        jpg->error = JPEGReadError::OUT_OF_BAND_COEFF;
+        return false;
+      }
+      if (s + Al >= kJpegDCAlphabetSize) {
+        JXL_JPEG_DEBUG(
+            "Out of range AC coefficient value: s = %d Al = %d k = %d", s, Al,
+            k);
+        jpg->error = JPEGReadError::NON_REPRESENTABLE_AC_COEFF;
+        return false;
+      }
+      int bits = br->ReadBits(s);
+      int coeff = HuffExtend(bits, s);
+      coeffs[kJPEGNaturalOrder[k]] = coeff * Am;
+      *num_zero_runs = 0;
+    } else if (r == 15) {
+      k += 15;
+      ++(*num_zero_runs);
+    } else {
+      if (eobrun_allowed && k == Ss && *eobrun == 0) {
+        // We have two end-of-block runs right after each other, so we signal
+        // the jpeg encoder to force a state reset at this point.
+        *reset_state = true;
+      }
+      *eobrun = 1 << r;
+      if (r > 0) {
+        if (!eobrun_allowed) {
+          JXL_JPEG_DEBUG("End-of-block run crossing DC coeff.");
+          jpg->error = JPEGReadError::EOB_RUN_TOO_LONG;
+          return false;
+        }
+        *eobrun += br->ReadBits(r);
+      }
+      break;
+    }
+  }
+  --(*eobrun);
+  return true;
+}
+
+bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+                    int* eobrun, bool* reset_state, BitReaderState* br,
+                    JPEGData* jpg, coeff_t* coeffs) {
+  // Nowadays multiplication is even faster than variable shift.
+  int Am = 1 << Al;
+  bool eobrun_allowed = Ss > 0;
+  if (Ss == 0) {
+    int s = br->ReadBits(1);
+    coeff_t dc_coeff = coeffs[0];
+    dc_coeff |= s * Am;
+    coeffs[0] = dc_coeff;
+    ++Ss;
+  }
+  if (Ss > Se) {
+    return true;
+  }
+  int p1 = Am;
+  int m1 = -Am;
+  int k = Ss;
+  int r;
+  int s;
+  bool in_zero_run = false;
+  if (*eobrun <= 0) {
+    for (; k <= Se; k++) {
+      s = ReadSymbol(ac_huff, br);
+      if (s >= kJpegHuffmanAlphabetSize) {
+        JXL_JPEG_DEBUG("Invalid Huffman symbol %d for AC coefficient %d", s, k);
+        jpg->error = JPEGReadError::INVALID_SYMBOL;
+        return false;
+      }
+      r = s >> 4;
+      s &= 15;
+      if (s) {
+        if (s != 1) {
+          JXL_JPEG_DEBUG("Invalid Huffman symbol %d for AC coefficient %d", s,
+                         k);
+          jpg->error = JPEGReadError::INVALID_SYMBOL;
+          return false;
+        }
+        s = br->ReadBits(1) ? p1 : m1;
+        in_zero_run = false;
+      } else {
+        if (r != 15) {
+          if (eobrun_allowed && k == Ss && *eobrun == 0) {
+            // We have two end-of-block runs right after each other, so we
+            // signal the jpeg encoder to force a state reset at this point.
+            *reset_state = true;
+          }
+          *eobrun = 1 << r;
+          if (r > 0) {
+            if (!eobrun_allowed) {
+              JXL_JPEG_DEBUG("End-of-block run crossing DC coeff.");
+              jpg->error = JPEGReadError::EOB_RUN_TOO_LONG;
+              return false;
+            }
+            *eobrun += br->ReadBits(r);
+          }
+          break;
+        }
+        in_zero_run = true;
+      }
+      do {
+        coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+        if (thiscoef != 0) {
+          if (br->ReadBits(1)) {
+            if ((thiscoef & p1) == 0) {
+              if (thiscoef >= 0) {
+                thiscoef += p1;
+              } else {
+                thiscoef += m1;
+              }
+            }
+          }
+          coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+        } else {
+          if (--r < 0) {
+            break;
+          }
+        }
+        k++;
+      } while (k <= Se);
+      if (s) {
+        if (k > Se) {
+          JXL_JPEG_DEBUG("Out-of-band coefficient %d band was %d-%d", k, Ss,
+                         Se);
+          jpg->error = JPEGReadError::OUT_OF_BAND_COEFF;
+          return false;
+        }
+        coeffs[kJPEGNaturalOrder[k]] = s;
+      }
+    }
+  }
+  if (in_zero_run) {
+    JXL_JPEG_DEBUG("Extra zero run before end-of-block.");
+    jpg->error = JPEGReadError::EXTRA_ZERO_RUN;
+    return false;
+  }
+  if (*eobrun > 0) {
+    for (; k <= Se; k++) {
+      coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+      if (thiscoef != 0) {
+        if (br->ReadBits(1)) {
+          if ((thiscoef & p1) == 0) {
+            if (thiscoef >= 0) {
+              thiscoef += p1;
+            } else {
+              thiscoef += m1;
+            }
+          }
+        }
+        coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+      }
+    }
+  }
+  --(*eobrun);
+  return true;
+}
+
+bool ProcessRestart(const uint8_t* data, const size_t len,
+                    int* next_restart_marker, BitReaderState* br,
+                    JPEGData* jpg) {
+  size_t pos = 0;
+  if (!br->FinishStream(jpg, &pos)) {
+    jpg->error = JPEGReadError::INVALID_SCAN;
+    return false;
+  }
+  int expected_marker = 0xd0 + *next_restart_marker;
+  JXL_JPEG_EXPECT_MARKER();
+  int marker = data[pos + 1];
+  if (marker != expected_marker) {
+    JXL_JPEG_DEBUG("Did not find expected restart marker %d actual %d",
+                   expected_marker, marker);
+    jpg->error = JPEGReadError::WRONG_RESTART_MARKER;
+    return false;
+  }
+  br->Reset(pos + 2);
+  *next_restart_marker += 1;
+  *next_restart_marker &= 0x7;
+  return true;
+}
+
+bool ProcessScan(const uint8_t* data, const size_t len,
+                 const std::vector<HuffmanTableEntry>& dc_huff_lut,
+                 const std::vector<HuffmanTableEntry>& ac_huff_lut,
+                 uint16_t scan_progression[kMaxComponents][kDCTBlockSize],
+                 bool is_progressive, size_t* pos, JPEGData* jpg) {
+  if (!ProcessSOS(data, len, pos, jpg)) {
+    return false;
+  }
+  JPEGScanInfo* scan_info = &jpg->scan_info.back();
+  bool is_interleaved = (scan_info->num_components > 1);
+  int max_h_samp_factor = 1;
+  int max_v_samp_factor = 1;
+  for (size_t i = 0; i < jpg->components.size(); ++i) {
+    max_h_samp_factor =
+        std::max(max_h_samp_factor, jpg->components[i].h_samp_factor);
+    max_v_samp_factor =
+        std::max(max_v_samp_factor, jpg->components[i].v_samp_factor);
+  }
+
+  int MCU_rows = DivCeil(jpg->height, max_v_samp_factor * 8);
+  int MCUs_per_row = DivCeil(jpg->width, max_h_samp_factor * 8);
+  if (!is_interleaved) {
+    const JPEGComponent& c = jpg->components[scan_info->components[0].comp_idx];
+    MCUs_per_row = DivCeil(jpg->width * c.h_samp_factor, 8 * max_h_samp_factor);
+    MCU_rows = DivCeil(jpg->height * c.v_samp_factor, 8 * max_v_samp_factor);
+  }
+  coeff_t last_dc_coeff[kMaxComponents] = {0};
+  BitReaderState br(data, len, *pos);
+  int restarts_to_go = jpg->restart_interval;
+  int next_restart_marker = 0;
+  int eobrun = -1;
+  int block_scan_index = 0;
+  const int Al = is_progressive ? scan_info->Al : 0;
+  const int Ah = is_progressive ? scan_info->Ah : 0;
+  const int Ss = is_progressive ? scan_info->Ss : 0;
+  const int Se = is_progressive ? scan_info->Se : 63;
+  const uint16_t scan_bitmask = Ah == 0 ? (0xffff << Al) : (1u << Al);
+  const uint16_t refinement_bitmask = (1 << Al) - 1;
+  for (size_t i = 0; i < scan_info->num_components; ++i) {
+    int comp_idx = scan_info->components[i].comp_idx;
+    for (int k = Ss; k <= Se; ++k) {
+      if (scan_progression[comp_idx][k] & scan_bitmask) {
+        JXL_JPEG_DEBUG(
+            "Overlapping scans: component=%d k=%d prev_mask: %u cur_mask %u",
+            comp_idx, k, scan_progression[i][k], scan_bitmask);
+        jpg->error = JPEGReadError::OVERLAPPING_SCANS;
+        return false;
+      }
+      if (scan_progression[comp_idx][k] & refinement_bitmask) {
+        JXL_JPEG_DEBUG(
+            "Invalid scan order, a more refined scan was already done: "
+            "component=%d k=%d prev_mask=%u cur_mask=%u",
+            comp_idx, k, scan_progression[i][k], scan_bitmask);
+        jpg->error = JPEGReadError::INVALID_SCAN_ORDER;
+        return false;
+      }
+      scan_progression[comp_idx][k] |= scan_bitmask;
+    }
+  }
+  if (Al > 10) {
+    JXL_JPEG_DEBUG("Scan parameter Al=%d is not supported.", Al);
+    jpg->error = JPEGReadError::NON_REPRESENTABLE_AC_COEFF;
+    return false;
+  }
+  for (int mcu_y = 0; mcu_y < MCU_rows; ++mcu_y) {
+    for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) {
+      // Handle the restart intervals.
+      if (jpg->restart_interval > 0) {
+        if (restarts_to_go == 0) {
+          if (ProcessRestart(data, len, &next_restart_marker, &br, jpg)) {
+            restarts_to_go = jpg->restart_interval;
+            memset(static_cast<void*>(last_dc_coeff), 0, sizeof(last_dc_coeff));
+            if (eobrun > 0) {
+              JXL_JPEG_DEBUG("End-of-block run too long.");
+              jpg->error = JPEGReadError::EOB_RUN_TOO_LONG;
+              return false;
+            }
+            eobrun = -1;  // fresh start
+          } else {
+            return false;
+          }
+        }
+        --restarts_to_go;
+      }
+      // Decode one MCU.
+      for (size_t i = 0; i < scan_info->num_components; ++i) {
+        JPEGComponentScanInfo* si = &scan_info->components[i];
+        JPEGComponent* c = &jpg->components[si->comp_idx];
+        const HuffmanTableEntry* dc_lut =
+            &dc_huff_lut[si->dc_tbl_idx * kJpegHuffmanLutSize];
+        const HuffmanTableEntry* ac_lut =
+            &ac_huff_lut[si->ac_tbl_idx * kJpegHuffmanLutSize];
+        int nblocks_y = is_interleaved ? c->v_samp_factor : 1;
+        int nblocks_x = is_interleaved ? c->h_samp_factor : 1;
+        for (int iy = 0; iy < nblocks_y; ++iy) {
+          for (int ix = 0; ix < nblocks_x; ++ix) {
+            int block_y = mcu_y * nblocks_y + iy;
+            int block_x = mcu_x * nblocks_x + ix;
+            int block_idx = block_y * c->width_in_blocks + block_x;
+            bool reset_state = false;
+            int num_zero_runs = 0;
+            coeff_t* coeffs = &c->coeffs[block_idx * kDCTBlockSize];
+            if (Ah == 0) {
+              if (!DecodeDCTBlock(dc_lut, ac_lut, Ss, Se, Al, &eobrun,
+                                  &reset_state, &num_zero_runs, &br, jpg,
+                                  &last_dc_coeff[si->comp_idx], coeffs)) {
+                return false;
+              }
+            } else {
+              if (!RefineDCTBlock(ac_lut, Ss, Se, Al, &eobrun, &reset_state,
+                                  &br, jpg, coeffs)) {
+                return false;
+              }
+            }
+            if (reset_state) {
+              scan_info->reset_points.emplace_back(block_scan_index);
+            }
+            if (num_zero_runs > 0) {
+              JPEGScanInfo::ExtraZeroRunInfo info;
+              info.block_idx = block_scan_index;
+              info.num_extra_zero_runs = num_zero_runs;
+              scan_info->extra_zero_runs.push_back(info);
+            }
+            ++block_scan_index;
+          }
+        }
+      }
+    }
+  }
+  if (eobrun > 0) {
+    JXL_JPEG_DEBUG("End-of-block run too long.");
+    jpg->error = JPEGReadError::EOB_RUN_TOO_LONG;
+    return false;
+  }
+  if (!br.FinishStream(jpg, pos)) {
+    jpg->error = JPEGReadError::INVALID_SCAN;
+    return false;
+  }
+  if (*pos > len) {
+    JXL_JPEG_DEBUG("Unexpected end of file during scan. pos=%zu len=%zu", *pos,
+                   len);
+    jpg->error = JPEGReadError::UNEXPECTED_EOF;
+    return false;
+  }
+  return true;
+}
+
+// Changes the quant_idx field of the components to refer to the index of the
+// quant table in the jpg->quant array.
+bool FixupIndexes(JPEGData* jpg) {
+  for (size_t i = 0; i < jpg->components.size(); ++i) {
+    JPEGComponent* c = &jpg->components[i];
+    bool found_index = false;
+    for (size_t j = 0; j < jpg->quant.size(); ++j) {
+      if (jpg->quant[j].index == c->quant_idx) {
+        c->quant_idx = j;
+        found_index = true;
+        break;
+      }
+    }
+    if (!found_index) {
+      JXL_JPEG_DEBUG("Quantization table with index %u not found",
+                     c->quant_idx);
+      jpg->error = JPEGReadError::QUANT_TABLE_NOT_FOUND;
+      return false;
+    }
+  }
+  return true;
+}
+
+size_t FindNextMarker(const uint8_t* data, const size_t len, size_t pos) {
+  // kIsValidMarker[i] == 1 means (0xc0 + i) is a valid marker.
+  static const uint8_t kIsValidMarker[] = {
+      1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+      1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+  };
+  size_t num_skipped = 0;
+  while (pos + 1 < len && (data[pos] != 0xff || data[pos + 1] < 0xc0 ||
+                           !kIsValidMarker[data[pos + 1] - 0xc0])) {
+    ++pos;
+    ++num_skipped;
+  }
+  return num_skipped;
+}
+
+}  // namespace
+
+bool ReadJpeg(const uint8_t* data, const size_t len, JpegReadMode mode,
+              JPEGData* jpg) {
+  size_t pos = 0;
+  // Check SOI marker.
+  JXL_JPEG_EXPECT_MARKER();
+  int marker = data[pos + 1];
+  pos += 2;
+  if (marker != 0xd8) {
+    JXL_JPEG_DEBUG("Did not find expected SOI marker, actual=%d", marker);
+    jpg->error = JPEGReadError::SOI_NOT_FOUND;
+    return false;
+  }
+  int lut_size = kMaxHuffmanTables * kJpegHuffmanLutSize;
+  std::vector<HuffmanTableEntry> dc_huff_lut(lut_size);
+  std::vector<HuffmanTableEntry> ac_huff_lut(lut_size);
+  bool found_sof = false;
+  bool found_dri = false;
+  uint16_t scan_progression[kMaxComponents][kDCTBlockSize] = {{0}};
+
+  jpg->padding_bits.resize(0);
+  bool is_progressive = false;  // default
+  do {
+    // Read next marker.
+    size_t num_skipped = FindNextMarker(data, len, pos);
+    if (num_skipped > 0) {
+      // Add a fake marker to indicate arbitrary in-between-markers data.
+      jpg->marker_order.push_back(0xff);
+      jpg->inter_marker_data.emplace_back(data + pos, data + pos + num_skipped);
+      pos += num_skipped;
+    }
+    JXL_JPEG_EXPECT_MARKER();
+    marker = data[pos + 1];
+    pos += 2;
+    bool ok = true;
+    switch (marker) {
+      case 0xc0:
+      case 0xc1:
+      case 0xc2:
+        is_progressive = (marker == 0xc2);
+        ok = ProcessSOF(data, len, mode, &pos, jpg);
+        found_sof = true;
+        break;
+      case 0xc4:
+        ok = ProcessDHT(data, len, mode, &dc_huff_lut, &ac_huff_lut, &pos, jpg);
+        break;
+      case 0xd0:
+      case 0xd1:
+      case 0xd2:
+      case 0xd3:
+      case 0xd4:
+      case 0xd5:
+      case 0xd6:
+      case 0xd7:
+        // RST markers do not have any data.
+        break;
+      case 0xd9:
+        // Found end marker.
+        break;
+      case 0xda:
+        if (mode == JpegReadMode::kReadAll) {
+          ok = ProcessScan(data, len, dc_huff_lut, ac_huff_lut,
+                           scan_progression, is_progressive, &pos, jpg);
+        }
+        break;
+      case 0xdb:
+        ok = ProcessDQT(data, len, &pos, jpg);
+        break;
+      case 0xdd:
+        ok = ProcessDRI(data, len, &pos, &found_dri, jpg);
+        break;
+      case 0xe0:
+      case 0xe1:
+      case 0xe2:
+      case 0xe3:
+      case 0xe4:
+      case 0xe5:
+      case 0xe6:
+      case 0xe7:
+      case 0xe8:
+      case 0xe9:
+      case 0xea:
+      case 0xeb:
+      case 0xec:
+      case 0xed:
+      case 0xee:
+      case 0xef:
+        if (mode != JpegReadMode::kReadTables) {
+          ok = ProcessAPP(data, len, &pos, jpg);
+        }
+        break;
+      case 0xfe:
+        if (mode != JpegReadMode::kReadTables) {
+          ok = ProcessCOM(data, len, &pos, jpg);
+        }
+        break;
+      default:
+        JXL_JPEG_DEBUG("Unsupported marker: %d pos=%zu len=%zu", marker, pos,
+                       len);
+        jpg->error = JPEGReadError::UNSUPPORTED_MARKER;
+        ok = false;
+        break;
+    }
+    if (!ok) {
+      return false;
+    }
+    jpg->marker_order.push_back(marker);
+    if (mode == JpegReadMode::kReadHeader && found_sof) {
+      break;
+    }
+  } while (marker != 0xd9);
+
+  if (!found_sof) {
+    JXL_JPEG_DEBUG("Missing SOF marker.");
+    jpg->error = JPEGReadError::SOF_NOT_FOUND;
+    return false;
+  }
+
+  // Supplemental checks.
+  if (mode == JpegReadMode::kReadAll) {
+    if (pos < len) {
+      jpg->tail_data = std::vector<uint8_t>(data + pos, data + len);
+    }
+    if (!FixupIndexes(jpg)) {
+      return false;
+    }
+    if (jpg->huffman_code.empty()) {
+      // Section B.2.4.2: "If a table has never been defined for a particular
+      // destination, then when this destination is specified in a scan header,
+      // the results are unpredictable."
+      JXL_JPEG_DEBUG("Need at least one Huffman code table.");
+      jpg->error = JPEGReadError::HUFFMAN_TABLE_ERROR;
+      return false;
+    }
+    if (jpg->huffman_code.size() >= kMaxDHTMarkers) {
+      JXL_JPEG_DEBUG("Too many Huffman tables.");
+      jpg->error = JPEGReadError::HUFFMAN_TABLE_ERROR;
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.h
new file mode 100644
index 0000000000..3fad820e9d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_data_reader.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for reading a jpeg byte stream into a JPEGData object.
+
+#ifndef LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_
+#define LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+enum class JpegReadMode {
+  kReadHeader,  // only basic headers
+  kReadTables,  // headers and tables (quant, Huffman, ...)
+  kReadAll,     // everything
+};
+
+// Parses the JPEG stream contained in data[*pos ... len) and fills in *jpg with
+// the parsed information.
+// If mode is kReadHeader, it fills in only the image dimensions in *jpg.
+// Returns false if the data is not valid JPEG, or if it contains an unsupported
+// JPEG feature.
+bool ReadJpeg(const uint8_t* data, const size_t len, JpegReadMode mode,
+              JPEGData* jpg);
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc
new file mode 100644
index 0000000000..38282e640a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc
@@ -0,0 +1,103 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/enc_jpeg_huffman_decode.h"
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+// Returns the table width of the next 2nd level table, count is the histogram
+// of bit lengths for the remaining symbols, len is the code length of the next
+// processed symbol.
+static inline int NextTableBitSize(const int* count, int len) {
+  int left = 1 << (len - kJpegHuffmanRootTableBits);
+  while (len < static_cast<int>(kJpegHuffmanMaxBitLength)) {
+    left -= count[len];
+    if (left <= 0) break;
+    ++len;
+    left <<= 1;
+  }
+  return len - kJpegHuffmanRootTableBits;
+}
+
+void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
+                           HuffmanTableEntry* lut) {
+  HuffmanTableEntry code;    // current table entry
+  HuffmanTableEntry* table;  // next available space in table
+  int len;                   // current code length
+  int idx;                   // symbol index
+  int key;                   // prefix code
+  int reps;                  // number of replicate key values in current table
+  int low;                   // low bits for current root entry
+  int table_bits;            // key length of current table
+  int table_size;            // size of current table
+
+  // Make a local copy of the input bit length histogram.
+  int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0};
+  int total_count = 0;
+  for (len = 1; len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+    tmp_count[len] = count[len];
+    total_count += tmp_count[len];
+  }
+
+  table = lut;
+  table_bits = kJpegHuffmanRootTableBits;
+  table_size = 1 << table_bits;
+
+  // Special case code with only one value.
+  if (total_count == 1) {
+    code.bits = 0;
+    code.value = symbols[0];
+    for (key = 0; key < table_size; ++key) {
+      table[key] = code;
+    }
+    return;
+  }
+
+  // Fill in root table.
+  key = 0;
+  idx = 0;
+  for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) {
+    for (; tmp_count[len] > 0; --tmp_count[len]) {
+      code.bits = len;
+      code.value = symbols[idx++];
+      reps = 1 << (kJpegHuffmanRootTableBits - len);
+      while (reps--) {
+        table[key++] = code;
+      }
+    }
+  }
+
+  // Fill in 2nd level tables and add pointers to root table.
+  table += table_size;
+  table_size = 0;
+  low = 0;
+  for (len = kJpegHuffmanRootTableBits + 1;
+       len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+    for (; tmp_count[len] > 0; --tmp_count[len]) {
+      // Start a new sub-table if the previous one is full.
+      if (low >= table_size) {
+        table += table_size;
+        table_bits = NextTableBitSize(tmp_count, len);
+        table_size = 1 << table_bits;
+        low = 0;
+        lut[key].bits = table_bits + kJpegHuffmanRootTableBits;
+        lut[key].value = (table - lut) - key;
+        ++key;
+      }
+      code.bits = len - kJpegHuffmanRootTableBits;
+      code.value = symbols[idx++];
+      reps = 1 << (table_bits - code.bits);
+      while (reps--) {
+        table[low++] = code;
+      }
+    }
+  }
+}
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.h
new file mode 100644
index 0000000000..b8a60e4107
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/enc_jpeg_huffman_decode.h
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Utility function for building a Huffman lookup table for the jpeg decoder.
+
+#ifndef LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_
+#define LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_
+
+#include <stdint.h>
+
+namespace jxl {
+namespace jpeg {
+
+constexpr int kJpegHuffmanRootTableBits = 8;
+// Maximum huffman lookup table size.
+// According to zlib/examples/enough.c, 758 entries are always enough for
+// an alphabet of 257 symbols (256 + 1 special symbol for the all 1s code) and
+// max bit length 16 if the root table has 8 bits.
+constexpr int kJpegHuffmanLutSize = 758;
+
+struct HuffmanTableEntry {
+  // Initialize the value to an invalid symbol so that we can recognize it
+  // when reading the bit stream using a Huffman code with space > 0.
+  HuffmanTableEntry() : bits(0), value(0xffff) {}
+
+  uint8_t bits;    // number of bits used for this symbol
+  uint16_t value;  // symbol value or table offset
+};
+
+// Builds jpeg-style Huffman lookup table from the given symbols.
+// The symbols are in order of increasing bit lengths. The number of symbols
+// with bit length n is given in counts[n] for each n >= 1.
+void BuildJpegHuffmanTable(const uint32_t* counts, const uint32_t* symbols,
+                           HuffmanTableEntry* lut);
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc
new file mode 100644
index 0000000000..42e5a4921c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.cc
@@ -0,0 +1,448 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace jpeg {
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+namespace {
+enum JPEGComponentType : uint32_t {
+  kGray = 0,
+  kYCbCr = 1,
+  kRGB = 2,
+  kCustom = 3,
+};
+
+struct JPEGInfo {
+  size_t num_app_markers = 0;
+  size_t num_com_markers = 0;
+  size_t num_scans = 0;
+  size_t num_intermarker = 0;
+  bool has_dri = false;
+};
+
+Status VisitMarker(uint8_t* marker, Visitor* visitor, JPEGInfo* info) {
+  uint32_t marker32 = *marker - 0xc0;
+  JXL_RETURN_IF_ERROR(visitor->Bits(6, 0x00, &marker32));
+  *marker = marker32 + 0xc0;
+  if ((*marker & 0xf0) == 0xe0) {
+    info->num_app_markers++;
+  }
+  if (*marker == 0xfe) {
+    info->num_com_markers++;
+  }
+  if (*marker == 0xda) {
+    info->num_scans++;
+  }
+  // We use a fake 0xff marker to signal intermarker data.
+  if (*marker == 0xff) {
+    info->num_intermarker++;
+  }
+  if (*marker == 0xdd) {
+    info->has_dri = true;
+  }
+  return true;
+}
+
+}  // namespace
+
+Status JPEGData::VisitFields(Visitor* visitor) {
+  bool is_gray = components.size() == 1;
+  JXL_RETURN_IF_ERROR(visitor->Bool(false, &is_gray));
+  if (visitor->IsReading()) {
+    components.resize(is_gray ? 1 : 3);
+  }
+  JPEGInfo info;
+  if (visitor->IsReading()) {
+    uint8_t marker = 0xc0;
+    do {
+      JXL_RETURN_IF_ERROR(VisitMarker(&marker, visitor, &info));
+      marker_order.push_back(marker);
+      if (marker_order.size() > 16384) {
+        return JXL_FAILURE("Too many markers: %zu\n", marker_order.size());
+      }
+    } while (marker != 0xd9);
+  } else {
+    if (marker_order.size() > 16384) {
+      return JXL_FAILURE("Too many markers: %zu\n", marker_order.size());
+    }
+    for (size_t i = 0; i < marker_order.size(); i++) {
+      JXL_RETURN_IF_ERROR(VisitMarker(&marker_order[i], visitor, &info));
+    }
+    if (!marker_order.empty()) {
+      // Last marker should always be EOI marker.
+      JXL_CHECK(marker_order.back() == 0xd9);
+    }
+  }
+
+  // Size of the APP and COM markers.
+  if (visitor->IsReading()) {
+    app_data.resize(info.num_app_markers);
+    app_marker_type.resize(info.num_app_markers);
+    com_data.resize(info.num_com_markers);
+    scan_info.resize(info.num_scans);
+  }
+  JXL_ASSERT(app_data.size() == info.num_app_markers);
+  JXL_ASSERT(app_marker_type.size() == info.num_app_markers);
+  JXL_ASSERT(com_data.size() == info.num_com_markers);
+  JXL_ASSERT(scan_info.size() == info.num_scans);
+  for (size_t i = 0; i < app_data.size(); i++) {
+    auto& app = app_data[i];
+    // Encodes up to 8 different values.
+    JXL_RETURN_IF_ERROR(
+        visitor->U32(Val(0), Val(1), BitsOffset(1, 2), BitsOffset(2, 4), 0,
+                     reinterpret_cast<uint32_t*>(&app_marker_type[i])));
+    if (app_marker_type[i] != AppMarkerType::kUnknown &&
+        app_marker_type[i] != AppMarkerType::kICC &&
+        app_marker_type[i] != AppMarkerType::kExif &&
+        app_marker_type[i] != AppMarkerType::kXMP) {
+      return JXL_FAILURE("Unknown app marker type %u",
+                         static_cast<uint32_t>(app_marker_type[i]));
+    }
+    uint32_t len = app.size() - 1;
+    JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len));
+    if (visitor->IsReading()) app.resize(len + 1);
+    if (app.size() < 3) {
+      return JXL_FAILURE("Invalid marker size: %zu\n", app.size());
+    }
+  }
+  for (auto& com : com_data) {
+    uint32_t len = com.size() - 1;
+    JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len));
+    if (visitor->IsReading()) com.resize(len + 1);
+    if (com.size() < 3) {
+      return JXL_FAILURE("Invalid marker size: %zu\n", com.size());
+    }
+  }
+
+  uint32_t num_quant_tables = quant.size();
+  JXL_RETURN_IF_ERROR(
+      visitor->U32(Val(1), Val(2), Val(3), Val(4), 2, &num_quant_tables));
+  if (num_quant_tables == 4) {
+    return JXL_FAILURE("Invalid number of quant tables");
+  }
+  if (visitor->IsReading()) {
+    quant.resize(num_quant_tables);
+  }
+  for (size_t i = 0; i < num_quant_tables; i++) {
+    if (quant[i].precision > 1) {
+      return JXL_FAILURE(
+          "Quant tables with more than 16 bits are not supported");
+    }
+    JXL_RETURN_IF_ERROR(visitor->Bits(1, 0, &quant[i].precision));
+    JXL_RETURN_IF_ERROR(visitor->Bits(2, i, &quant[i].index));
+    JXL_RETURN_IF_ERROR(visitor->Bool(true, &quant[i].is_last));
+  }
+
+  JPEGComponentType component_type =
+      components.size() == 1 && components[0].id == 1
+          ? JPEGComponentType::kGray
+          : components.size() == 3 && components[0].id == 1 &&
+                    components[1].id == 2 && components[2].id == 3
+                ? JPEGComponentType::kYCbCr
+                : components.size() == 3 && components[0].id == 'R' &&
+                          components[1].id == 'G' && components[2].id == 'B'
+                      ? JPEGComponentType::kRGB
+                      : JPEGComponentType::kCustom;
+  JXL_RETURN_IF_ERROR(
+      visitor->Bits(2, JPEGComponentType::kYCbCr,
+                    reinterpret_cast<uint32_t*>(&component_type)));
+  uint32_t num_components;
+  if (component_type == JPEGComponentType::kGray) {
+    num_components = 1;
+  } else if (component_type != JPEGComponentType::kCustom) {
+    num_components = 3;
+  } else {
+    num_components = components.size();
+    JXL_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(3), Val(4), 3, &num_components));
+    if (num_components != 1 && num_components != 3) {
+      return JXL_FAILURE("Invalid number of components: %u", num_components);
+    }
+  }
+  if (visitor->IsReading()) {
+    components.resize(num_components);
+  }
+  if (component_type == JPEGComponentType::kCustom) {
+    for (size_t i = 0; i < components.size(); i++) {
+      JXL_RETURN_IF_ERROR(visitor->Bits(8, 0, &components[i].id));
+    }
+  } else if (component_type == JPEGComponentType::kGray) {
+    components[0].id = 1;
+  } else if (component_type == JPEGComponentType::kRGB) {
+    components[0].id = 'R';
+    components[1].id = 'G';
+    components[2].id = 'B';
+  } else {
+    components[0].id = 1;
+    components[1].id = 2;
+    components[2].id = 3;
+  }
+  size_t used_tables = 0;
+  for (size_t i = 0; i < components.size(); i++) {
+    JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &components[i].quant_idx));
+    if (components[i].quant_idx >= quant.size()) {
+      return JXL_FAILURE("Invalid quant table for component %zu: %u\n", i,
+                         components[i].quant_idx);
+    }
+    used_tables |= 1U << components[i].quant_idx;
+  }
+  if (used_tables + 1 != 1U << quant.size()) {
+    return JXL_FAILURE(
+        "Not all quant tables are used (%zu tables, %zx used table mask)",
+        quant.size(), used_tables);
+  }
+
+  uint32_t num_huff = huffman_code.size();
+  JXL_RETURN_IF_ERROR(visitor->U32(Val(4), BitsOffset(3, 2), BitsOffset(4, 10),
+                                   BitsOffset(6, 26), 4, &num_huff));
+  if (visitor->IsReading()) {
+    huffman_code.resize(num_huff);
+  }
+  for (JPEGHuffmanCode& hc : huffman_code) {
+    bool is_ac = hc.slot_id >> 4;
+    uint32_t id = hc.slot_id & 0xF;
+    JXL_RETURN_IF_ERROR(visitor->Bool(false, &is_ac));
+    JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &id));
+    hc.slot_id = (static_cast<uint32_t>(is_ac) << 4) | id;
+    JXL_RETURN_IF_ERROR(visitor->Bool(true, &hc.is_last));
+    size_t num_symbols = 0;
+    for (size_t i = 0; i <= 16; i++) {
+      JXL_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(3, 2),
+                                       Bits(8), 0, &hc.counts[i]));
+      num_symbols += hc.counts[i];
+    }
+    if (num_symbols < 1) {
+      // Actually, at least 2 symbols are required, since one of them is EOI.
+      return JXL_FAILURE("Empty Huffman table");
+    }
+    if (num_symbols > hc.values.size()) {
+      return JXL_FAILURE("Huffman code too large (%zu)", num_symbols);
+    }
+    // Presence flags for 4 * 64 + 1 values.
+    uint64_t value_slots[5] = {};
+    for (size_t i = 0; i < num_symbols; i++) {
+      // Goes up to 256, included. Might have the same symbol appear twice...
+      JXL_RETURN_IF_ERROR(visitor->U32(Bits(2), BitsOffset(2, 4),
+                                       BitsOffset(4, 8), BitsOffset(8, 1), 0,
+                                       &hc.values[i]));
+      value_slots[hc.values[i] >> 6] |= (uint64_t)1 << (hc.values[i] & 0x3F);
+    }
+    if (hc.values[num_symbols - 1] != kJpegHuffmanAlphabetSize) {
+      return JXL_FAILURE("Missing EOI symbol");
+    }
+    // Last element, denoting EOI, have to be 1 after the loop.
+    JXL_ASSERT(value_slots[4] == 1);
+    size_t num_values = 1;
+    for (size_t i = 0; i < 4; ++i) num_values += hwy::PopCount(value_slots[i]);
+    if (num_values != num_symbols) {
+      return JXL_FAILURE("Duplicate Huffman symbols");
+    }
+    if (!is_ac) {
+      bool only_dc = ((value_slots[0] >> kJpegDCAlphabetSize) | value_slots[1] |
+                      value_slots[2] | value_slots[3]) == 0;
+      if (!only_dc) return JXL_FAILURE("Huffman symbols out of DC range");
+    }
+  }
+
+  for (auto& scan : scan_info) {
+    JXL_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(3), Val(4), 1, &scan.num_components));
+    if (scan.num_components >= 4) {
+      return JXL_FAILURE("Invalid number of components in SOS marker");
+    }
+    JXL_RETURN_IF_ERROR(visitor->Bits(6, 0, &scan.Ss));
+    JXL_RETURN_IF_ERROR(visitor->Bits(6, 63, &scan.Se));
+    JXL_RETURN_IF_ERROR(visitor->Bits(4, 0, &scan.Al));
+    JXL_RETURN_IF_ERROR(visitor->Bits(4, 0, &scan.Ah));
+    for (size_t i = 0; i < scan.num_components; i++) {
+      JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].comp_idx));
+      if (scan.components[i].comp_idx >= components.size()) {
+        return JXL_FAILURE("Invalid component idx in SOS marker");
+      }
+      JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].ac_tbl_idx));
+      JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].dc_tbl_idx));
+    }
+    // TODO(veluca): actually set and use this value.
+    JXL_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), Val(2), BitsOffset(3, 3),
+                                     kMaxNumPasses - 1,
+                                     &scan.last_needed_pass));
+  }
+
+  // From here on, this is data that is not strictly necessary to get a valid
+  // JPEG, but necessary for bit-exact JPEG reconstruction.
+  if (info.has_dri) {
+    JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &restart_interval));
+  }
+
+  uint64_t padding_spot_limit = scan_info.size();
+
+  for (auto& scan : scan_info) {
+    uint32_t num_reset_points = scan.reset_points.size();
+    JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(2, 1), BitsOffset(4, 4),
+                                     BitsOffset(16, 20), 0, &num_reset_points));
+    if (visitor->IsReading()) {
+      scan.reset_points.resize(num_reset_points);
+    }
+    int last_block_idx = -1;
+    for (auto& block_idx : scan.reset_points) {
+      block_idx -= last_block_idx + 1;
+      JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(3, 1),
+                                       BitsOffset(5, 9), BitsOffset(28, 41), 0,
+                                       &block_idx));
+      block_idx += last_block_idx + 1;
+      if (static_cast<int>(block_idx) < last_block_idx + 1) {
+        return JXL_FAILURE("Invalid block ID: %u, last block was %d", block_idx,
+                           last_block_idx);
+      }
+      // TODO(eustas): better upper boundary could be given at this point; also
+      //               it could be applied during reset_points reading.
+      if (block_idx > (1u << 30)) {
+        // At most 8K x 8K x num_channels blocks are expected. That is,
+        // typically, 1.5 * 2^27. 2^30 should be sufficient for any sane
+        // image.
+        return JXL_FAILURE("Invalid block ID: %u", block_idx);
+      }
+      last_block_idx = block_idx;
+    }
+
+    uint32_t num_extra_zero_runs = scan.extra_zero_runs.size();
+    JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(2, 1), BitsOffset(4, 4),
+                                     BitsOffset(16, 20), 0,
+                                     &num_extra_zero_runs));
+    if (visitor->IsReading()) {
+      scan.extra_zero_runs.resize(num_extra_zero_runs);
+    }
+    last_block_idx = -1;
+    for (size_t i = 0; i < scan.extra_zero_runs.size(); ++i) {
+      uint32_t& block_idx = scan.extra_zero_runs[i].block_idx;
+      JXL_RETURN_IF_ERROR(visitor->U32(
+          Val(1), BitsOffset(2, 2), BitsOffset(4, 5), BitsOffset(8, 20), 1,
+          &scan.extra_zero_runs[i].num_extra_zero_runs));
+      block_idx -= last_block_idx + 1;
+      JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(3, 1),
+                                       BitsOffset(5, 9), BitsOffset(28, 41), 0,
+                                       &block_idx));
+      block_idx += last_block_idx + 1;
+      if (static_cast<int>(block_idx) < last_block_idx + 1) {
+        return JXL_FAILURE("Invalid block ID: %u, last block was %d", block_idx,
+                           last_block_idx);
+      }
+      if (block_idx > (1u << 30)) {
+        // At most 8K x 8K x num_channels blocks are expected. That is,
+        // typically, 1.5 * 2^27. 2^30 should be sufficient for any sane
+        // image.
+        return JXL_FAILURE("Invalid block ID: %u", block_idx);
+      }
+      last_block_idx = block_idx;
+    }
+
+    if (restart_interval > 0) {
+      int MCUs_per_row = 0;
+      int MCU_rows = 0;
+      CalculateMcuSize(scan, &MCUs_per_row, &MCU_rows);
+      padding_spot_limit += DivCeil(MCU_rows * MCUs_per_row, restart_interval);
+    }
+  }
+  std::vector<uint32_t> inter_marker_data_sizes;
+  inter_marker_data_sizes.reserve(info.num_intermarker);
+  for (size_t i = 0; i < info.num_intermarker; ++i) {
+    uint32_t len = visitor->IsReading() ? 0 : inter_marker_data[i].size();
+    JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len));
+    if (visitor->IsReading()) inter_marker_data_sizes.emplace_back(len);
+  }
+  uint32_t tail_data_len = tail_data.size();
+  JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(8, 1),
+                                   BitsOffset(16, 257), BitsOffset(22, 65793),
+                                   0, &tail_data_len));
+
+  JXL_RETURN_IF_ERROR(visitor->Bool(false, &has_zero_padding_bit));
+  if (has_zero_padding_bit) {
+    uint32_t nbit = padding_bits.size();
+    JXL_RETURN_IF_ERROR(visitor->Bits(24, 0, &nbit));
+    if (nbit > 7 * padding_spot_limit) {
+      return JXL_FAILURE("Number of padding bits does not correspond to image");
+    }
+    // TODO(eustas): check that that much bits of input are available.
+    if (visitor->IsReading()) {
+      padding_bits.resize(nbit);
+    }
+    // TODO(eustas): read in (8-64?) bit groups to reduce overhead.
+    for (uint8_t& bit : padding_bits) {
+      bool bbit = bit;
+      JXL_RETURN_IF_ERROR(visitor->Bool(false, &bbit));
+      bit = bbit;
+    }
+  }
+
+  // Apply postponed actions.
+  if (visitor->IsReading()) {
+    tail_data.resize(tail_data_len);
+    JXL_ASSERT(inter_marker_data_sizes.size() == info.num_intermarker);
+    inter_marker_data.reserve(info.num_intermarker);
+    for (size_t i = 0; i < info.num_intermarker; ++i) {
+      inter_marker_data.emplace_back(inter_marker_data_sizes[i]);
+    }
+  }
+
+  return true;
+}
+
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+void JPEGData::CalculateMcuSize(const JPEGScanInfo& scan, int* MCUs_per_row,
+                                int* MCU_rows) const {
+  const bool is_interleaved = (scan.num_components > 1);
+  const JPEGComponent& base_component = components[scan.components[0].comp_idx];
+  // h_group / v_group act as numerators for converting number of blocks to
+  // number of MCU. In interleaved mode it is 1, so MCU is represented with
+  // max_*_samp_factor blocks. In non-interleaved mode we choose numerator to
+  // be the samping factor, consequently MCU is always represented with single
+  // block.
+  const int h_group = is_interleaved ? 1 : base_component.h_samp_factor;
+  const int v_group = is_interleaved ? 1 : base_component.v_samp_factor;
+  int max_h_samp_factor = 1;
+  int max_v_samp_factor = 1;
+  for (const auto& c : components) {
+    max_h_samp_factor = std::max(c.h_samp_factor, max_h_samp_factor);
+    max_v_samp_factor = std::max(c.v_samp_factor, max_v_samp_factor);
+  }
+  *MCUs_per_row = DivCeil(width * h_group, 8 * max_h_samp_factor);
+  *MCU_rows = DivCeil(height * v_group, 8 * max_v_samp_factor);
+}
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+Status SetJPEGDataFromICC(const PaddedBytes& icc, jpeg::JPEGData* jpeg_data) {
+  size_t icc_pos = 0;
+  for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+    if (jpeg_data->app_marker_type[i] != jpeg::AppMarkerType::kICC) {
+      continue;
+    }
+    size_t len = jpeg_data->app_data[i].size() - 17;
+    if (icc_pos + len > icc.size()) {
+      return JXL_FAILURE(
+          "ICC length is less than APP markers: requested %zu more bytes, "
+          "%zu available",
+          len, icc.size() - icc_pos);
+    }
+    memcpy(&jpeg_data->app_data[i][17], icc.data() + icc_pos, len);
+    icc_pos += len;
+  }
+  if (icc_pos != icc.size() && icc_pos != 0) {
+    return JXL_FAILURE("ICC length is more than APP markers");
+  }
+  return true;
+}
+
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+}  // namespace jpeg
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.h
new file mode 100644
index 0000000000..6b7cb02aad
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jpeg/jpeg_data.h
@@ -0,0 +1,267 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Data structures that represent the non-pixel contents of a jpeg file.
+
+#ifndef LIB_JXL_JPEG_JPEG_DATA_H_
+#define LIB_JXL_JPEG_JPEG_DATA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "lib/jxl/common.h"  // JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace jpeg {
+
+constexpr int kMaxComponents = 4;
+constexpr int kMaxQuantTables = 4;
+constexpr int kMaxHuffmanTables = 4;
+constexpr size_t kJpegHuffmanMaxBitLength = 16;
+constexpr int kJpegHuffmanAlphabetSize = 256;
+constexpr int kJpegDCAlphabetSize = 12;
+constexpr int kMaxDHTMarkers = 512;
+constexpr int kMaxDimPixels = 65535;
+constexpr uint8_t kApp1 = 0xE1;
+constexpr uint8_t kApp2 = 0xE2;
+const uint8_t kIccProfileTag[12] = "ICC_PROFILE";
+const uint8_t kExifTag[6] = "Exif\0";
+const uint8_t kXMPTag[29] = "http://ns.adobe.com/xap/1.0/";
+
+/* clang-format off */
+constexpr uint32_t kJPEGNaturalOrder[80] = {
+  0,   1,  8, 16,  9,  2,  3, 10,
+  17, 24, 32, 25, 18, 11,  4,  5,
+  12, 19, 26, 33, 40, 48, 41, 34,
+  27, 20, 13,  6,  7, 14, 21, 28,
+  35, 42, 49, 56, 57, 50, 43, 36,
+  29, 22, 15, 23, 30, 37, 44, 51,
+  58, 59, 52, 45, 38, 31, 39, 46,
+  53, 60, 61, 54, 47, 55, 62, 63,
+  // extra entries for safety in decoder
+  63, 63, 63, 63, 63, 63, 63, 63,
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+constexpr uint32_t kJPEGZigZagOrder[64] = {
+  0,   1,  5,  6, 14, 15, 27, 28,
+  2,   4,  7, 13, 16, 26, 29, 42,
+  3,   8, 12, 17, 25, 30, 41, 43,
+  9,  11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+/* clang-format on */
+
+enum struct JPEGReadError {
+  OK = 0,
+  SOI_NOT_FOUND,
+  SOF_NOT_FOUND,
+  UNEXPECTED_EOF,
+  MARKER_BYTE_NOT_FOUND,
+  UNSUPPORTED_MARKER,
+  WRONG_MARKER_SIZE,
+  INVALID_PRECISION,
+  INVALID_WIDTH,
+  INVALID_HEIGHT,
+  INVALID_NUMCOMP,
+  INVALID_SAMP_FACTOR,
+  INVALID_START_OF_SCAN,
+  INVALID_END_OF_SCAN,
+  INVALID_SCAN_BIT_POSITION,
+  INVALID_COMPS_IN_SCAN,
+  INVALID_HUFFMAN_INDEX,
+  INVALID_QUANT_TBL_INDEX,
+  INVALID_QUANT_VAL,
+  INVALID_MARKER_LEN,
+  INVALID_SAMPLING_FACTORS,
+  INVALID_HUFFMAN_CODE,
+  INVALID_SYMBOL,
+  NON_REPRESENTABLE_DC_COEFF,
+  NON_REPRESENTABLE_AC_COEFF,
+  INVALID_SCAN,
+  OVERLAPPING_SCANS,
+  INVALID_SCAN_ORDER,
+  EXTRA_ZERO_RUN,
+  DUPLICATE_DRI,
+  DUPLICATE_SOF,
+  WRONG_RESTART_MARKER,
+  DUPLICATE_COMPONENT_ID,
+  COMPONENT_NOT_FOUND,
+  HUFFMAN_TABLE_NOT_FOUND,
+  HUFFMAN_TABLE_ERROR,
+  QUANT_TABLE_NOT_FOUND,
+  EMPTY_DHT,
+  EMPTY_DQT,
+  OUT_OF_BAND_COEFF,
+  EOB_RUN_TOO_LONG,
+  IMAGE_TOO_LARGE,
+  INVALID_QUANT_TBL_PRECISION,
+};
+
+// Quantization values for an 8x8 pixel block.
+struct JPEGQuantTable {
+  std::array<int32_t, kDCTBlockSize> values;
+  uint32_t precision = 0;
+  // The index of this quantization table as it was parsed from the input JPEG.
+  // Each DQT marker segment contains an 'index' field, and we save this index
+  // here. Valid values are 0 to 3.
+  uint32_t index = 0;
+  // Set to true if this table is the last one within its marker segment.
+  bool is_last = true;
+};
+
+// Huffman code and decoding lookup table used for DC and AC coefficients.
+struct JPEGHuffmanCode {
+  // Bit length histogram.
+  std::array<uint32_t, kJpegHuffmanMaxBitLength + 1> counts = {};
+  // Symbol values sorted by increasing bit lengths.
+  std::array<uint32_t, kJpegHuffmanAlphabetSize + 1> values = {};
+  // The index of the Huffman code in the current set of Huffman codes. For AC
+  // component Huffman codes, 0x10 is added to the index.
+  int slot_id = 0;
+  // Set to true if this Huffman code is the last one within its marker segment.
+  bool is_last = true;
+};
+
+// Huffman table indexes used for one component of one scan.
+struct JPEGComponentScanInfo {
+  uint32_t comp_idx;
+  uint32_t dc_tbl_idx;
+  uint32_t ac_tbl_idx;
+};
+
+// Contains information that is used in one scan.
+struct JPEGScanInfo {
+  // Parameters used for progressive scans (named the same way as in the spec):
+  //   Ss : Start of spectral band in zig-zag sequence.
+  //   Se : End of spectral band in zig-zag sequence.
+  //   Ah : Successive approximation bit position, high.
+  //   Al : Successive approximation bit position, low.
+  uint32_t Ss;
+  uint32_t Se;
+  uint32_t Ah;
+  uint32_t Al;
+  uint32_t num_components = 0;
+  std::array<JPEGComponentScanInfo, 4> components;
+  // Last codestream pass that is needed to write this scan.
+  uint32_t last_needed_pass = 0;
+
+  // Extra information required for bit-precise JPEG file reconstruction.
+
+  // Set of block indexes where the JPEG encoder has to flush the end-of-block
+  // runs and refinement bits.
+  std::vector<uint32_t> reset_points;
+  // The number of extra zero runs (Huffman symbol 0xf0) before the end of
+  // block (if nonzero), indexed by block index.
+  // All of these symbols can be omitted without changing the pixel values, but
+  // some jpeg encoders put these at the end of blocks.
+  typedef struct {
+    uint32_t block_idx;
+    uint32_t num_extra_zero_runs;
+  } ExtraZeroRunInfo;
+  std::vector<ExtraZeroRunInfo> extra_zero_runs;
+};
+
+typedef int16_t coeff_t;
+
+// Represents one component of a jpeg file.
+struct JPEGComponent {
+  JPEGComponent()
+      : id(0),
+        h_samp_factor(1),
+        v_samp_factor(1),
+        quant_idx(0),
+        width_in_blocks(0),
+        height_in_blocks(0) {}
+
+  // One-byte id of the component.
+  uint32_t id;
+  // Horizontal and vertical sampling factors.
+  // In interleaved mode, each minimal coded unit (MCU) has
+  // h_samp_factor x v_samp_factor DCT blocks from this component.
+  int h_samp_factor;
+  int v_samp_factor;
+  // The index of the quantization table used for this component.
+  uint32_t quant_idx;
+  // The dimensions of the component measured in 8x8 blocks.
+  uint32_t width_in_blocks;
+  uint32_t height_in_blocks;
+  // The DCT coefficients of this component, laid out block-by-block, divided
+  // through the quantization matrix values.
+  std::vector<coeff_t> coeffs;
+};
+
+enum class AppMarkerType : uint32_t {
+  kUnknown = 0,
+  kICC = 1,
+  kExif = 2,
+  kXMP = 3,
+};
+
+// Represents a parsed jpeg file.
+struct JPEGData : public Fields {
+  JPEGData()
+      : width(0),
+        height(0),
+        restart_interval(0),
+        error(JPEGReadError::OK),
+        has_zero_padding_bit(false) {}
+
+  const char* Name() const override { return "JPEGData"; }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+  // Doesn't serialize everything - skips brotli-encoded data and what is
+  // already encoded in the codestream.
+  Status VisitFields(Visitor* visitor) override;
+#else
+  Status VisitFields(Visitor* /* visitor */) override {
+    JXL_ABORT("JPEG transcoding support not enabled");
+  }
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+  void CalculateMcuSize(const JPEGScanInfo& scan, int* MCUs_per_row,
+                        int* MCU_rows) const;
+
+  int width;
+  int height;
+  uint32_t restart_interval;
+  std::vector<std::vector<uint8_t>> app_data;
+  std::vector<AppMarkerType> app_marker_type;
+  std::vector<std::vector<uint8_t>> com_data;
+  std::vector<JPEGQuantTable> quant;
+  std::vector<JPEGHuffmanCode> huffman_code;
+  std::vector<JPEGComponent> components;
+  std::vector<JPEGScanInfo> scan_info;
+  std::vector<uint8_t> marker_order;
+  std::vector<std::vector<uint8_t>> inter_marker_data;
+  std::vector<uint8_t> tail_data;
+  JPEGReadError error;
+
+  // Extra information required for bit-precise JPEG file reconstruction.
+
+  bool has_zero_padding_bit;
+  std::vector<uint8_t> padding_bits;
+};
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+// Set ICC profile in jpeg_data.
+Status SetJPEGDataFromICC(const PaddedBytes& icc, jpeg::JPEGData* jpeg_data);
+#else
+static JXL_INLINE Status SetJPEGDataFromICC(const PaddedBytes& /* icc */,
+                                            jpeg::JPEGData* /* jpeg_data */) {
+  JXL_ABORT("JPEG transcoding support not enabled");
+}
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+}  // namespace jpeg
+}  // namespace jxl
+
+#endif  // LIB_JXL_JPEG_JPEG_DATA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl.syms b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl.syms
new file mode 100644
index 0000000000..0f398d7151
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl.syms
@@ -0,0 +1,5 @@
+{
+  extern "C" {
+    jpegxl_*;
+  };
+};
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl.version b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl.version
new file mode 100644
index 0000000000..e0ed12be25
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl.version
@@ -0,0 +1,7 @@
+JXL_0 {
+  global:
+    Jxl*;
+
+  local:
+    *;
+};
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc
new file mode 100644
index 0000000000..78c7d8d8e8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_decode.cc
@@ -0,0 +1,2217 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "jxl/decode.h"
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/dec_reconstruct.h"
+#include "lib/jxl/decode_to_jpeg.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/memory_manager_internal.h"
+#include "lib/jxl/toc.h"
+
+#ifndef JPEGXL_MAJOR_VERSION
+#define JPEGXL_MAJOR_VERSION 0
+#define JPEGXL_MINOR_VERSION 5
+#define JPEGXL_PATCH_VERSION 0
+#endif
+
+namespace {
+
+// If set (by fuzzer) then some operations will fail, if those would require
+// allocating large objects. Actual memory usage might be two orders of
+// magnitude bigger.
+// TODO(eustas): this is a poor-mans replacement for memory-manager approach;
+//               remove, once memory-manager actually works.
+size_t memory_limit_base_ = 0;
+size_t cpu_limit_base_ = 0;
+size_t used_cpu_base_ = 0;
+
+bool CheckSizeLimit(size_t xsize, size_t ysize) {
+  if (!memory_limit_base_) return true;
+  if (xsize == 0 || ysize == 0) return true;
+  size_t num_pixels = xsize * ysize;
+  if (num_pixels / xsize != ysize) return false;  // overflow
+  if (num_pixels > memory_limit_base_) return false;
+  return true;
+}
+
+// Checks if a + b > size, taking possible integer overflow into account.
+bool OutOfBounds(size_t a, size_t b, size_t size) {
+  size_t pos = a + b;
+  if (pos > size) return true;
+  if (pos < a) return true;  // overflow happened
+  return false;
+}
+
+// Checks if a + b + c > size, taking possible integer overflow into account.
+bool OutOfBounds(size_t a, size_t b, size_t c, size_t size) {
+  size_t pos = a + b;
+  if (pos < b) return true;  // overflow happened
+  pos += c;
+  if (pos < c) return true;  // overflow happened
+  if (pos > size) return true;
+  return false;
+}
+
+bool SumOverflows(size_t a, size_t b, size_t c) {
+  size_t sum = a + b;
+  if (sum < b) return true;
+  sum += c;
+  if (sum < c) return true;
+  return false;
+}
+
+JXL_INLINE size_t InitialBasicInfoSizeHint() {
+  // Amount of bytes before the start of the codestream in the container format,
+  // assuming that the codestream is the first box after the signature and
+  // filetype boxes. 12 bytes signature box + 20 bytes filetype box + 16 bytes
+  // codestream box length + name + optional XLBox length.
+  const size_t container_header_size = 48;
+
+  // Worst-case amount of bytes for basic info of the JPEG XL codestream header,
+  // that is all information up to and including extra_channel_bits. Up to
+  // around 2 bytes signature + 8 bytes SizeHeader + 31 bytes ColorEncoding + 4
+  // bytes rest of ImageMetadata + 5 bytes part of ImageMetadata2.
+  // TODO(lode): recompute and update this value when alpha_bits is moved to
+  // extra channels info.
+  const size_t max_codestream_basic_info_size = 50;
+
+  return container_header_size + max_codestream_basic_info_size;
+}
+
+// Debug-printing failure macro similar to JXL_FAILURE, but for the status code
+// JXL_DEC_ERROR
+#ifdef JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                           \
+  (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+   ::jxl::Abort(), JXL_DEC_ERROR)
+#else  // JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                             \
+  (((JXL_DEBUG_ON_ERROR) &&                                                    \
+    ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \
+   JXL_DEC_ERROR)
+#endif  // JXL_CRASH_ON_ERROR
+
+JxlDecoderStatus ConvertStatus(JxlDecoderStatus status) { return status; }
+
+JxlDecoderStatus ConvertStatus(jxl::Status status) {
+  return status ? JXL_DEC_SUCCESS : JXL_DEC_ERROR;
+}
+
+JxlSignature ReadSignature(const uint8_t* buf, size_t len, size_t* pos) {
+  if (*pos >= len) return JXL_SIG_NOT_ENOUGH_BYTES;
+
+  buf += *pos;
+  len -= *pos;
+
+  // JPEG XL codestream: 0xff 0x0a
+  if (len >= 1 && buf[0] == 0xff) {
+    if (len < 2) {
+      return JXL_SIG_NOT_ENOUGH_BYTES;
+    } else if (buf[1] == jxl::kCodestreamMarker) {
+      *pos += 2;
+      return JXL_SIG_CODESTREAM;
+    } else {
+      return JXL_SIG_INVALID;
+    }
+  }
+
+  // JPEG XL container
+  if (len >= 1 && buf[0] == 0) {
+    if (len < 12) {
+      return JXL_SIG_NOT_ENOUGH_BYTES;
+    } else if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0xC && buf[4] == 'J' &&
+               buf[5] == 'X' && buf[6] == 'L' && buf[7] == ' ' &&
+               buf[8] == 0xD && buf[9] == 0xA && buf[10] == 0x87 &&
+               buf[11] == 0xA) {
+      *pos += 12;
+      return JXL_SIG_CONTAINER;
+    } else {
+      return JXL_SIG_INVALID;
+    }
+  }
+
+  return JXL_SIG_INVALID;
+}
+
+}  // namespace
+
+uint32_t JxlDecoderVersion(void) {
+  return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 +
+         JPEGXL_PATCH_VERSION;
+}
+
+JxlSignature JxlSignatureCheck(const uint8_t* buf, size_t len) {
+  size_t pos = 0;
+  return ReadSignature(buf, len, &pos);
+}
+
+namespace {
+
+size_t BitsPerChannel(JxlDataType data_type) {
+  switch (data_type) {
+    case JXL_TYPE_BOOLEAN:
+      return 1;
+    case JXL_TYPE_UINT8:
+      return 8;
+    case JXL_TYPE_UINT16:
+      return 16;
+    case JXL_TYPE_UINT32:
+      return 32;
+    case JXL_TYPE_FLOAT:
+      return 32;
+    case JXL_TYPE_FLOAT16:
+      return 16;
+      // No default, give compiler error if new type not handled.
+  }
+  return 0;  // Indicate invalid data type.
+}
+
+enum class DecoderStage : uint32_t {
+  kInited,    // Decoder created, no JxlDecoderProcessInput called yet
+  kStarted,   // Running JxlDecoderProcessInput calls
+  kFinished,  // Everything done, nothing left to process
+  kError,     // Error occurred, decoder object no longer usable
+};
+
+enum class FrameStage : uint32_t {
+  kHeader,      // Must parse frame header. dec->frame_start must be set up
+                // correctly already.
+  kTOC,         // Must parse TOC
+  kFull,        // Must parse full pixels
+  kFullOutput,  // Must output full pixels
+};
+
+// Manages the sections for the FrameDecoder based on input bytes received.
+struct Sections {
+  // sections_begin = position in the frame where the sections begin, after
+  // the frame header and TOC, so sections_begin = sum of frame header size and
+  // TOC size.
+  Sections(jxl::FrameDecoder* frame_dec, size_t frame_size,
+           size_t sections_begin)
+      : frame_dec_(frame_dec),
+        frame_size_(frame_size),
+        sections_begin_(sections_begin) {}
+
+  Sections(const Sections&) = delete;
+  Sections& operator=(const Sections&) = delete;
+  Sections(Sections&&) = delete;
+  Sections& operator=(Sections&&) = delete;
+
+  ~Sections() {
+    // Avoid memory leaks if the JXL decoder quits early and doesn't end up
+    // calling CloseInput().
+    CloseInput();
+  }
+
+  // frame_dec_ must have been Inited already, but not yet done ProcessSections.
+  JxlDecoderStatus Init() {
+    section_received.resize(frame_dec_->NumSections(), 0);
+
+    const auto& offsets = frame_dec_->SectionOffsets();
+    const auto& sizes = frame_dec_->SectionSizes();
+
+    // Ensure none of the sums of section offset and size overflow.
+    for (size_t i = 0; i < frame_dec_->NumSections(); i++) {
+      if (OutOfBounds(sections_begin_, offsets[i], sizes[i], frame_size_)) {
+        return JXL_API_ERROR("section out of bounds");
+      }
+    }
+
+    return JXL_DEC_SUCCESS;
+  }
+
+  // Sets the input data for the frame. The frame pointer must point to the
+  // beginning of the frame, size is the amount of bytes gotten so far and
+  // should increase with next calls until the full frame is loaded.
+  // TODO(lode): allow caller to provide only later chunks of memory when
+  // earlier sections are fully processed already.
+  void SetInput(const uint8_t* frame, size_t size) {
+    const auto& offsets = frame_dec_->SectionOffsets();
+    const auto& sizes = frame_dec_->SectionSizes();
+
+    for (size_t i = 0; i < frame_dec_->NumSections(); i++) {
+      if (section_received[i]) continue;
+      if (!OutOfBounds(sections_begin_, offsets[i], sizes[i], size)) {
+        section_received[i] = 1;
+        section_info.emplace_back(jxl::FrameDecoder::SectionInfo{nullptr, i});
+        section_status.emplace_back();
+      }
+    }
+    // Reset all the bitreaders, because the address of the frame pointer may
+    // change, even if it always represents the same frame start.
+    for (size_t i = 0; i < section_info.size(); i++) {
+      size_t id = section_info[i].id;
+      JXL_ASSERT(section_info[i].br == nullptr);
+      section_info[i].br = new jxl::BitReader(jxl::Span<const uint8_t>(
+          frame + sections_begin_ + offsets[id], sizes[id]));
+    }
+  }
+
+  JxlDecoderStatus CloseInput() {
+    bool out_of_bounds = false;
+    for (size_t i = 0; i < section_info.size(); i++) {
+      if (!section_info[i].br) continue;
+      if (!section_info[i].br->AllReadsWithinBounds()) {
+        // Mark out of bounds section, but keep closing and deleting the next
+        // ones as well.
+        out_of_bounds = true;
+      }
+      JXL_ASSERT(section_info[i].br->Close());
+      delete section_info[i].br;
+      section_info[i].br = nullptr;
+    }
+    if (out_of_bounds) {
+      // If any bit reader indicates out of bounds, it's an error, not just
+      // needing more input, since we ensure only bit readers containing
+      // a complete section are provided to the FrameDecoder.
+      return JXL_API_ERROR("frame out of bounds");
+    }
+    return JXL_DEC_SUCCESS;
+  }
+
+  // Not managed by us.
+  jxl::FrameDecoder* frame_dec_;
+
+  size_t frame_size_;
+  size_t sections_begin_;
+
+  std::vector<jxl::FrameDecoder::SectionInfo> section_info;
+  std::vector<jxl::FrameDecoder::SectionStatus> section_status;
+  std::vector<char> section_received;
+};
+
+/*
+Given list of frame references to storage slots, and storage slots in which this
+frame is saved, computes which frames are required to decode the frame at the
+given index and any frames after it. The frames on which this depends are
+returned as a vector of their indices, in no particular order. The given index
+must be smaller than saved_as.size(), and references.size() must equal
+saved_as.size(). Any frames beyond saved_as and references are considered
+unknown future frames and must be treated as if something depends on them.
+*/
+std::vector<size_t> GetFrameDependencies(size_t index,
+                                         const std::vector<int>& saved_as,
+                                         const std::vector<int>& references) {
+  JXL_ASSERT(references.size() == saved_as.size());
+  JXL_ASSERT(index < references.size());
+
+  std::vector<size_t> result;
+
+  constexpr size_t kNumStorage = 8;
+
+  // value which indicates nothing is stored in this storage slot
+  const size_t invalid = references.size();
+  // for each of the 8 storage slots, a vector that translates frame index to
+  // frame stored in this storage slot at this point, that is, the last
+  // frame that was stored in this slot before or at this index.
+  std::array<std::vector<size_t>, kNumStorage> storage;
+  for (size_t s = 0; s < kNumStorage; ++s) {
+    storage[s].resize(saved_as.size());
+    int mask = 1 << s;
+    size_t id = invalid;
+    for (size_t i = 0; i < saved_as.size(); ++i) {
+      if (saved_as[i] & mask) {
+        id = i;
+      }
+      storage[s][i] = id;
+    }
+  }
+
+  std::vector<char> seen(index + 1, 0);
+  std::vector<size_t> stack;
+  stack.push_back(index);
+  seen[index] = 1;
+
+  // For frames after index, assume they can depend on any of the 8 storage
+  // slots, so push the frame for each stored reference to the stack and result.
+  // All frames after index are treated as having unknown references and with
+  // the possibility that there are more frames after the last known.
+  // TODO(lode): take values of saved_as and references after index, and a
+  // input flag indicating if they are all frames of the image, to further
+  // optimize this.
+  for (size_t s = 0; s < kNumStorage; ++s) {
+    size_t frame_ref = storage[s][index];
+    if (frame_ref == invalid) continue;
+    if (seen[frame_ref]) continue;
+    stack.push_back(frame_ref);
+    seen[frame_ref] = 1;
+    result.push_back(frame_ref);
+  }
+
+  while (!stack.empty()) {
+    size_t frame_index = stack.back();
+    stack.pop_back();
+    if (frame_index == 0) continue;  // first frame cannot have references
+    for (size_t s = 0; s < kNumStorage; ++s) {
+      int mask = 1 << s;
+      if (!(references[frame_index] & mask)) continue;
+      size_t frame_ref = storage[s][frame_index - 1];
+      if (frame_ref == invalid) continue;
+      if (seen[frame_ref]) continue;
+      stack.push_back(frame_ref);
+      seen[frame_ref] = 1;
+      result.push_back(frame_ref);
+    }
+  }
+
+  return result;
+}
+
+}  // namespace
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct JxlDecoderStruct {
+  JxlDecoderStruct() = default;
+
+  JxlMemoryManager memory_manager;
+  std::unique_ptr<jxl::ThreadPool> thread_pool;
+
+  DecoderStage stage;
+
+  // Status of progression, internal.
+  bool got_signature;
+  bool first_codestream_seen;
+  // Indicates we know that we've seen the last codestream, however this is not
+  // guaranteed to be true for the last box because a jxl file may have multiple
+  // "jxlp" boxes and it is possible (and permitted) that the last one is not a
+  // final box that uses size 0 to indicate the end.
+  bool last_codestream_seen;
+  bool got_basic_info;
+  size_t header_except_icc_bits = 0;  // To skip everything before ICC.
+  bool got_all_headers;               // Codestream metadata headers.
+  bool post_headers;                  // Already decoding pixels.
+  jxl::ICCReader icc_reader;
+
+  // This means either we actually got the preview image, or determined we
+  // cannot get it or there is none.
+  bool got_preview_image;
+
+  // Position of next_in in the original file including box format if present
+  // (as opposed to position in the codestream)
+  size_t file_pos;
+  size_t box_begin;
+  size_t box_end;
+  bool skip_box;
+  // Begin and end of the content of the current codestream box. This could be
+  // a partial codestream box.
+  // codestream_begin 0 is used to indicate the begin is not yet known.
+  // codestream_end 0 is used to indicate uncapped (until end of file, for the
+  // last box if this box doesn't indicate its actual size).
+  // Not used if the file is a direct codestream.
+  size_t codestream_begin;
+  size_t codestream_end;
+
+  // Settings
+  bool keep_orientation;
+
+  // Bitfield, for which informative events (JXL_DEC_BASIC_INFO, etc...) the
+  // decoder returns a status. By default, do not return for any of the events,
+  // only return when the decoder cannot continue because it needs more input or
+  // output data.
+  int events_wanted;
+  int orig_events_wanted;
+
+  // Fields for reading the basic info from the header.
+  size_t basic_info_size_hint;
+  bool have_container;
+
+  // Whether the preview out buffer was set. It is possible for the buffer to
+  // be nullptr and buffer_set to be true, indicating it was deliberately
+  // set to nullptr.
+  bool preview_out_buffer_set;
+  // Idem for the image buffer.
+  bool image_out_buffer_set;
+
+  // Owned by the caller, buffers for DC image and full resolution images
+  void* preview_out_buffer;
+  void* image_out_buffer;
+  JxlImageOutCallback image_out_callback;
+  void* image_out_opaque;
+
+  size_t preview_out_size;
+  size_t image_out_size;
+
+  // TODO(lode): merge these?
+  JxlPixelFormat preview_out_format;
+  JxlPixelFormat image_out_format;
+
+  jxl::CodecMetadata metadata;
+  std::unique_ptr<jxl::ImageBundle> ib;
+  // ColorEncoding to use for xyb encoded image with ICC profile.
+  jxl::ColorEncoding default_enc;
+
+  std::unique_ptr<jxl::PassesDecoderState> passes_state;
+  std::unique_ptr<jxl::FrameDecoder> frame_dec;
+  std::unique_ptr<Sections> sections;
+  // The FrameDecoder is initialized, and not yet finalized
+  bool frame_dec_in_progress;
+
+  // headers and TOC for the current frame. When got_toc is true, this is
+  // always the frame header of the last frame of the current still series,
+  // that is, the displayed frame.
+  std::unique_ptr<jxl::FrameHeader> frame_header;
+
+  // Start of the current frame being processed, as offset from the beginning of
+  // the codestream.
+  size_t frame_start;
+  size_t frame_size;
+  FrameStage frame_stage;
+  // The currently processed frame is the last of the current composite still,
+  // and so must be returned as pixels
+  bool is_last_of_still;
+  // The currently processed frame is the last of the codestream
+  bool is_last_total;
+  // How many frames to skip.
+  size_t skip_frames;
+  // Skipping the current frame. May be false if skip_frames was just set to
+  // a positive value while already processing a current frame, then
+  // skipping_frame will be enabled only for the next frame.
+  bool skipping_frame;
+
+  // Amount of internal frames and external frames started. External frames are
+  // user-visible frames, internal frames includes all external frames and
+  // also invisible frames such as patches, blending-only and dc_level frames.
+  size_t internal_frames;
+  size_t external_frames;
+
+  // For each internal frame, which storage locations it references, and which
+  // storage locations it is stored in, using the bit mask as defined in
+  // FrameDecoder::References and FrameDecoder::SaveAs.
+  std::vector<int> frame_references;
+  std::vector<int> frame_saved_as;
+
+  // Translates external frame index to internal frame index. The external
+  // index is the index of user-visible frames. The internal index can be larger
+  // since non-visible frames (such as frames with patches, ...) are included.
+  std::vector<size_t> frame_external_to_internal;
+
+  // Whether the frame with internal index is required to decode the frame
+  // being skipped to or any frames after that. If no skipping is active,
+  // this vector is ignored. If the current internal frame index is beyond this
+  // vector, it must be treated as a required frame.
+  std::vector<char> frame_required;
+
+  // Codestream input data is stored here, when the decoder takes in and stores
+  // the user input bytes. If the decoder does not do that (e.g. in one-shot
+  // case), this field is unused.
+  // TODO(lode): avoid needing this field once the C++ decoder doesn't need
+  // all bytes at once, to save memory. Find alternative to std::vector doubling
+  // strategy to prevent some memory usage.
+  std::vector<uint8_t> codestream;
+
+  jxl::JxlToJpegDecoder jpeg_decoder;
+
+  // Position in the actual codestream, which codestream.begin() points to.
+  // Non-zero once earlier parts of the codestream vector have been erased.
+  size_t codestream_pos;
+
+  // Statistics which CodecInOut can keep
+  uint64_t dec_pixels;
+
+  const uint8_t* next_in;
+  size_t avail_in;
+};
+
+// TODO(zond): Make this depend on the data loaded into the decoder.
+JxlDecoderStatus JxlDecoderDefaultPixelFormat(const JxlDecoder* dec,
+                                              JxlPixelFormat* format) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+  *format = {4, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+  return JXL_DEC_SUCCESS;
+}
+
+void JxlDecoderReset(JxlDecoder* dec) {
+  dec->thread_pool.reset();
+  dec->stage = DecoderStage::kInited;
+  dec->got_signature = false;
+  dec->first_codestream_seen = false;
+  dec->last_codestream_seen = false;
+  dec->got_basic_info = false;
+  dec->header_except_icc_bits = 0;
+  dec->got_all_headers = false;
+  dec->post_headers = false;
+  dec->icc_reader.Reset();
+  dec->got_preview_image = false;
+  dec->file_pos = 0;
+  dec->box_begin = 0;
+  dec->box_end = 0;
+  dec->skip_box = false;
+  dec->codestream_pos = 0;
+  dec->codestream_begin = 0;
+  dec->codestream_end = 0;
+  dec->keep_orientation = false;
+  dec->events_wanted = 0;
+  dec->orig_events_wanted = 0;
+  dec->basic_info_size_hint = InitialBasicInfoSizeHint();
+  dec->have_container = 0;
+  dec->preview_out_buffer_set = false;
+  dec->image_out_buffer_set = false;
+  dec->preview_out_buffer = nullptr;
+  dec->image_out_buffer = nullptr;
+  dec->image_out_callback = nullptr;
+  dec->image_out_opaque = nullptr;
+  dec->preview_out_size = 0;
+  dec->image_out_size = 0;
+  dec->dec_pixels = 0;
+  dec->next_in = 0;
+  dec->avail_in = 0;
+
+  dec->passes_state.reset(nullptr);
+  dec->frame_dec.reset(nullptr);
+  dec->sections.reset(nullptr);
+  dec->frame_dec_in_progress = false;
+
+  dec->ib.reset();
+  dec->metadata = jxl::CodecMetadata();
+  dec->frame_header.reset(new jxl::FrameHeader(&dec->metadata));
+  dec->codestream.clear();
+
+  dec->frame_stage = FrameStage::kHeader;
+  dec->frame_start = 0;
+  dec->frame_size = 0;
+  dec->is_last_of_still = false;
+  dec->is_last_total = false;
+  dec->skip_frames = 0;
+  dec->skipping_frame = false;
+  dec->internal_frames = 0;
+  dec->external_frames = 0;
+  dec->frame_references.clear();
+  dec->frame_saved_as.clear();
+  dec->frame_external_to_internal.clear();
+  dec->frame_required.clear();
+}
+
+JxlDecoder* JxlDecoderCreate(const JxlMemoryManager* memory_manager) {
+  JxlMemoryManager local_memory_manager;
+  if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager))
+    return nullptr;
+
+  void* alloc =
+      jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlDecoder));
+  if (!alloc) return nullptr;
+  // Placement new constructor on allocated memory
+  JxlDecoder* dec = new (alloc) JxlDecoder();
+  dec->memory_manager = local_memory_manager;
+
+  JxlDecoderReset(dec);
+
+  return dec;
+}
+
+void JxlDecoderDestroy(JxlDecoder* dec) {
+  if (dec) {
+    // Call destructor directly since custom free function is used.
+    dec->~JxlDecoder();
+    jxl::MemoryManagerFree(&dec->memory_manager, dec);
+  }
+}
+
+void JxlDecoderRewind(JxlDecoder* dec) {
+  int keep_orientation = dec->keep_orientation;
+  int events_wanted = dec->orig_events_wanted;
+  std::vector<int> frame_references;
+  std::vector<int> frame_saved_as;
+  std::vector<size_t> frame_external_to_internal;
+  std::vector<char> frame_required;
+  frame_references.swap(dec->frame_references);
+  frame_saved_as.swap(dec->frame_saved_as);
+  frame_external_to_internal.swap(dec->frame_external_to_internal);
+  frame_required.swap(dec->frame_required);
+
+  JxlDecoderReset(dec);
+  dec->keep_orientation = keep_orientation;
+  dec->events_wanted = events_wanted;
+  dec->orig_events_wanted = events_wanted;
+  frame_references.swap(dec->frame_references);
+  frame_saved_as.swap(dec->frame_saved_as);
+  frame_external_to_internal.swap(dec->frame_external_to_internal);
+  frame_required.swap(dec->frame_required);
+}
+
+void JxlDecoderSkipFrames(JxlDecoder* dec, size_t amount) {
+  // Increment amount, rather than set it: making the amount smaller is
+  // impossible because the decoder may already have skipped frames required to
+  // decode earlier frames, and making the amount larger compared to an existing
+  // amount is impossible because if JxlDecoderSkipFrames is called in the
+  // middle of already skipping frames, the user cannot know how many frames
+  // have already been skipped internally so far so an absolute value cannot
+  // be defined.
+  dec->skip_frames += amount;
+
+  dec->frame_required.clear();
+  size_t next_frame = dec->external_frames + dec->skip_frames;
+
+  // A frame that has been seen before a rewind
+  if (next_frame < dec->frame_external_to_internal.size()) {
+    size_t internal_index = dec->frame_external_to_internal[next_frame];
+    if (internal_index < dec->frame_saved_as.size()) {
+      std::vector<size_t> deps = GetFrameDependencies(
+          internal_index, dec->frame_saved_as, dec->frame_references);
+
+      dec->frame_required.resize(internal_index + 1, 0);
+      for (size_t i = 0; i < deps.size(); i++) {
+        JXL_ASSERT(deps[i] < dec->frame_required.size());
+        dec->frame_required[deps[i]] = 1;
+      }
+    }
+  }
+}
+
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner,
+                            void* parallel_runner_opaque) {
+  if (dec->thread_pool) return JXL_API_ERROR("parallel runner already set");
+  dec->thread_pool.reset(
+      new jxl::ThreadPool(parallel_runner, parallel_runner_opaque));
+  return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderSizeHintBasicInfo(const JxlDecoder* dec) {
+  if (dec->got_basic_info) return 0;
+  return dec->basic_info_size_hint;
+}
+
+JxlDecoderStatus JxlDecoderSubscribeEvents(JxlDecoder* dec, int events_wanted) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_DEC_ERROR;  // Cannot subscribe to events after having started.
+  }
+  if (events_wanted & 63) {
+    return JXL_DEC_ERROR;  // Can only subscribe to informative events.
+  }
+  dec->events_wanted = events_wanted;
+  dec->orig_events_wanted = events_wanted;
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetKeepOrientation(JxlDecoder* dec,
+                                              JXL_BOOL keep_orientation) {
+  if (dec->stage != DecoderStage::kInited) {
+    return JXL_API_ERROR("Must set keep_orientation option before starting");
+  }
+  dec->keep_orientation = !!keep_orientation;
+  return JXL_DEC_SUCCESS;
+}
+
+namespace jxl {
+namespace {
+
+template <class T>
+bool CanRead(Span<const uint8_t> data, BitReader* reader, T* JXL_RESTRICT t) {
+  // Use a copy of the bit reader because CanRead advances bits.
+  BitReader reader2(data);
+  reader2.SkipBits(reader->TotalBitsConsumed());
+  bool result = Bundle::CanRead(&reader2, t);
+  JXL_ASSERT(reader2.Close());
+  return result;
+}
+
+// Returns JXL_DEC_SUCCESS if the full bundle was successfully read, status
+// indicating either error or need more input otherwise.
+template <class T>
+JxlDecoderStatus ReadBundle(Span<const uint8_t> data, BitReader* reader,
+                            T* JXL_RESTRICT t) {
+  if (!CanRead(data, reader, t)) {
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  if (!Bundle::Read(reader, t)) {
+    return JXL_DEC_ERROR;
+  }
+  return JXL_DEC_SUCCESS;
+}
+
+#define JXL_API_RETURN_IF_ERROR(expr)               \
+  {                                                 \
+    JxlDecoderStatus status_ = ConvertStatus(expr); \
+    if (status_ != JXL_DEC_SUCCESS) return status_; \
+  }
+
+std::unique_ptr<BitReader, std::function<void(BitReader*)>> GetBitReader(
+    Span<const uint8_t> span) {
+  BitReader* reader = new BitReader(span);
+  return std::unique_ptr<BitReader, std::function<void(BitReader*)>>(
+      reader, [](BitReader* reader) {
+        // We can't allow Close to abort the program if the reader is out of
+        // bounds, or all return paths in the code, even those that already
+        // return failure, would have to manually call AllReadsWithinBounds().
+        // Invalid JXL codestream should not cause program to quit.
+        (void)reader->AllReadsWithinBounds();
+        (void)reader->Close();
+        delete reader;
+      });
+}
+
+JxlDecoderStatus JxlDecoderReadBasicInfo(JxlDecoder* dec, const uint8_t* in,
+                                         size_t size) {
+  size_t pos = 0;
+
+  // Check and skip the codestream signature
+  JxlSignature signature = ReadSignature(in, size, &pos);
+  if (signature == JXL_SIG_NOT_ENOUGH_BYTES) {
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  if (signature == JXL_SIG_CONTAINER) {
+    // There is a container signature where we expect a codestream, container
+    // is handled at a higher level already.
+    return JXL_API_ERROR("invalid: nested container");
+  }
+  if (signature != JXL_SIG_CODESTREAM) {
+    return JXL_API_ERROR("invalid signature");
+  }
+
+  Span<const uint8_t> span(in + pos, size - pos);
+  auto reader = GetBitReader(span);
+  JXL_API_RETURN_IF_ERROR(ReadBundle(span, reader.get(), &dec->metadata.size));
+
+  dec->metadata.m.nonserialized_only_parse_basic_info = true;
+  JXL_API_RETURN_IF_ERROR(ReadBundle(span, reader.get(), &dec->metadata.m));
+  dec->metadata.m.nonserialized_only_parse_basic_info = false;
+  dec->got_basic_info = true;
+  dec->basic_info_size_hint = 0;
+
+  if (!CheckSizeLimit(dec->metadata.size.xsize(), dec->metadata.size.ysize())) {
+    return JXL_API_ERROR("image is too large");
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+// Reads all codestream headers (but not frame headers)
+JxlDecoderStatus JxlDecoderReadAllHeaders(JxlDecoder* dec, const uint8_t* in,
+                                          size_t size) {
+  size_t pos = 0;
+
+  // Check and skip the codestream signature
+  JxlSignature signature = ReadSignature(in, size, &pos);
+  if (signature == JXL_SIG_CONTAINER) {
+    return JXL_API_ERROR("invalid: nested container");
+  }
+  if (signature != JXL_SIG_CODESTREAM) {
+    return JXL_API_ERROR("invalid signature");
+  }
+
+  Span<const uint8_t> span(in + pos, size - pos);
+  auto reader = GetBitReader(span);
+
+  if (dec->header_except_icc_bits != 0) {
+    // Headers were decoded already.
+    reader->SkipBits(dec->header_except_icc_bits);
+  } else {
+    SizeHeader dummy_size_header;
+    JXL_API_RETURN_IF_ERROR(ReadBundle(span, reader.get(), &dummy_size_header));
+
+    // We already decoded the metadata to dec->metadata.m, no reason to
+    // overwrite it, use a dummy metadata instead.
+    ImageMetadata dummy_metadata;
+    JXL_API_RETURN_IF_ERROR(ReadBundle(span, reader.get(), &dummy_metadata));
+
+    JXL_API_RETURN_IF_ERROR(
+        ReadBundle(span, reader.get(), &dec->metadata.transform_data));
+  }
+
+  dec->header_except_icc_bits = reader->TotalBitsConsumed();
+
+  if (dec->metadata.m.color_encoding.WantICC()) {
+    jxl::Status status = dec->icc_reader.Init(reader.get(), memory_limit_base_);
+    // Always check AllReadsWithinBounds, not all the C++ decoder implementation
+    // handles reader out of bounds correctly  yet (e.g. context map). Not
+    // checking AllReadsWithinBounds can cause reader->Close() to trigger an
+    // assert, but we don't want library to quit program for invalid codestream.
+    if (!reader->AllReadsWithinBounds()) {
+      return JXL_DEC_NEED_MORE_INPUT;
+    }
+    if (!status) {
+      if (status.code() == StatusCode::kNotEnoughBytes) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      // Other non-successful status is an error
+      return JXL_DEC_ERROR;
+    }
+    PaddedBytes icc;
+    status = dec->icc_reader.Process(reader.get(), &icc);
+    if (!status) {
+      if (status.code() == StatusCode::kNotEnoughBytes) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      // Other non-successful status is an error
+      return JXL_DEC_ERROR;
+    }
+    if (!dec->metadata.m.color_encoding.SetICCRaw(std::move(icc))) {
+      return JXL_DEC_ERROR;
+    }
+  }
+
+  dec->got_all_headers = true;
+  JXL_API_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+  dec->frame_start = pos + reader->TotalBitsConsumed() / jxl::kBitsPerByte;
+
+  if (!dec->passes_state) {
+    dec->passes_state.reset(new jxl::PassesDecoderState());
+  }
+
+  dec->default_enc =
+      ColorEncoding::LinearSRGB(dec->metadata.m.color_encoding.IsGray());
+
+  JXL_API_RETURN_IF_ERROR(dec->passes_state->output_encoding_info.Set(
+      dec->metadata, dec->default_enc));
+
+  return JXL_DEC_SUCCESS;
+}
+
+static size_t GetStride(const JxlDecoder* dec, const JxlPixelFormat& format,
+                        const jxl::ImageBundle* frame = nullptr) {
+  size_t xsize = dec->metadata.xsize();
+  if (!dec->keep_orientation && dec->metadata.m.orientation > 4) {
+    xsize = dec->metadata.ysize();
+  }
+  if (frame) {
+    xsize = dec->keep_orientation ? frame->xsize() : frame->oriented_xsize();
+  }
+  size_t stride = xsize * (BitsPerChannel(format.data_type) *
+                           format.num_channels / jxl::kBitsPerByte);
+  if (format.align > 1) {
+    stride = jxl::DivCeil(stride, format.align) * format.align;
+  }
+  return stride;
+}
+
+static JxlDecoderStatus ConvertImageInternal(const JxlDecoder* dec,
+                                             const jxl::ImageBundle& frame,
+                                             const JxlPixelFormat& format,
+                                             void* out_image, size_t out_size,
+                                             JxlImageOutCallback out_callback,
+                                             void* out_opaque) {
+  // TODO(lode): handle mismatch of RGB/grayscale color profiles and pixel data
+  // color/grayscale format
+  const auto& metadata = dec->metadata.m;
+
+  const size_t stride = GetStride(dec, format, &frame);
+
+  bool float_format = format.data_type == JXL_TYPE_FLOAT ||
+                      format.data_type == JXL_TYPE_FLOAT16;
+
+  jxl::Orientation undo_orientation = dec->keep_orientation
+                                          ? jxl::Orientation::kIdentity
+                                          : metadata.GetOrientation();
+  JXL_DASSERT(!dec->frame_dec || !dec->frame_dec->HasRGBBuffer());
+  jxl::Status status = jxl::ConvertToExternal(
+      frame, BitsPerChannel(format.data_type), float_format,
+      format.num_channels, format.endianness, stride, dec->thread_pool.get(),
+      out_image, out_size, /*out_callback=*/out_callback,
+      /*out_opaque=*/out_opaque, undo_orientation);
+
+  return status ? JXL_DEC_SUCCESS : JXL_DEC_ERROR;
+}
+
+// Parses the FrameHeader and the total frame_size, given the initial bytes
+// of the frame up to and including the TOC.
+// TODO(lode): merge this with FrameDecoder
+JxlDecoderStatus ParseFrameHeader(jxl::FrameHeader* frame_header,
+                                  const uint8_t* in, size_t size, size_t pos,
+                                  bool is_preview, size_t* frame_size,
+                                  int* saved_as) {
+  if (pos >= size) {
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  Span<const uint8_t> span(in + pos, size - pos);
+  auto reader = GetBitReader(span);
+
+  frame_header->nonserialized_is_preview = is_preview;
+  jxl::Status status = DecodeFrameHeader(reader.get(), frame_header);
+  jxl::FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+  if (!CheckSizeLimit(frame_dim.xsize_upsampled_padded,
+                      frame_dim.ysize_upsampled_padded)) {
+    return JXL_API_ERROR("frame is too large");
+  }
+
+  if (status.code() == StatusCode::kNotEnoughBytes) {
+    // TODO(lode): prevent asking for way too much input bytes in case of
+    // invalid header that the decoder thinks is a very long user extension
+    // instead. Example: fields can currently print something like this:
+    // "../lib/jxl/fields.cc:416: Skipping 71467322-bit extension(s)"
+    // Maybe fields.cc should return error in the above case rather than
+    // print a message.
+    return JXL_DEC_NEED_MORE_INPUT;
+  } else if (!status) {
+    return JXL_API_ERROR("invalid frame header");
+  }
+
+  // Read TOC.
+  uint64_t groups_total_size;
+  const bool has_ac_global = true;
+  const size_t toc_entries =
+      NumTocEntries(frame_dim.num_groups, frame_dim.num_dc_groups,
+                    frame_header->passes.num_passes, has_ac_global);
+
+  std::vector<uint64_t> group_offsets;
+  std::vector<uint32_t> group_sizes;
+  status = ReadGroupOffsets(toc_entries, reader.get(), &group_offsets,
+                            &group_sizes, &groups_total_size);
+
+  // TODO(lode): we're actually relying on AllReadsWithinBounds() here
+  // instead of on status.code(), change the internal TOC C++ code to
+  // correctly set the status.code() instead so we can rely on that one.
+  if (!reader->AllReadsWithinBounds() ||
+      status.code() == StatusCode::kNotEnoughBytes) {
+    return JXL_DEC_NEED_MORE_INPUT;
+  } else if (!status) {
+    return JXL_API_ERROR("invalid toc entries");
+  }
+
+  JXL_DASSERT((reader->TotalBitsConsumed() % kBitsPerByte) == 0);
+  JXL_API_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+  size_t header_size = (reader->TotalBitsConsumed() >> 3);
+  *frame_size = header_size + groups_total_size;
+
+  if (saved_as != nullptr) {
+    *saved_as = FrameDecoder::SavedAs(*frame_header);
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+// TODO(eustas): no CodecInOut -> no image size reinforcement -> possible OOM.
+JxlDecoderStatus JxlDecoderProcessInternal(JxlDecoder* dec, const uint8_t* in,
+                                           size_t size) {
+  // If no parallel runner is set, use the default
+  // TODO(lode): move this initialization to an appropriate location once the
+  // runner is used to decode pixels.
+  if (!dec->thread_pool) {
+    dec->thread_pool.reset(new jxl::ThreadPool(nullptr, nullptr));
+  }
+
+  // No matter what events are wanted, the basic info is always required.
+  if (!dec->got_basic_info) {
+    JxlDecoderStatus status = JxlDecoderReadBasicInfo(dec, in, size);
+    if (status != JXL_DEC_SUCCESS) return status;
+  }
+
+  if (dec->events_wanted & JXL_DEC_BASIC_INFO) {
+    dec->events_wanted &= ~JXL_DEC_BASIC_INFO;
+    return JXL_DEC_BASIC_INFO;
+  }
+
+  if (!dec->got_all_headers) {
+    JxlDecoderStatus status = JxlDecoderReadAllHeaders(dec, in, size);
+    if (status != JXL_DEC_SUCCESS) return status;
+  }
+
+  if (dec->events_wanted & JXL_DEC_EXTENSIONS) {
+    dec->events_wanted &= ~JXL_DEC_EXTENSIONS;
+    if (dec->metadata.m.extensions != 0) {
+      return JXL_DEC_EXTENSIONS;
+    }
+  }
+
+  if (dec->events_wanted & JXL_DEC_COLOR_ENCODING) {
+    dec->events_wanted &= ~JXL_DEC_COLOR_ENCODING;
+    return JXL_DEC_COLOR_ENCODING;
+  }
+
+  dec->post_headers = true;
+
+  // Decode to pixels, only if required for the events the user wants.
+  if (!dec->got_preview_image) {
+    // Parse the preview, or at least its TOC to be able to skip the frame, if
+    // any frame or image decoding is desired.
+    bool parse_preview =
+        (dec->events_wanted &
+         (JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+    if (!dec->metadata.m.have_preview) {
+      // There is no preview, mark this as done and go to next step
+      dec->got_preview_image = true;
+    } else if (!parse_preview) {
+      // No preview parsing needed, mark this step as done
+      dec->got_preview_image = true;
+    } else {
+      // Want to decode the preview, not just skip the frame
+      bool want_preview = (dec->events_wanted & JXL_DEC_PREVIEW_IMAGE);
+      size_t frame_size;
+      size_t pos = dec->frame_start;
+      dec->frame_header.reset(new FrameHeader(&dec->metadata));
+      JxlDecoderStatus status = ParseFrameHeader(dec->frame_header.get(), in,
+                                                 size, pos, true, &frame_size,
+                                                 /*saved_as=*/nullptr);
+      if (status != JXL_DEC_SUCCESS) return status;
+      if (OutOfBounds(pos, frame_size, size)) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+
+      if (want_preview && !dec->preview_out_buffer_set) {
+        return JXL_DEC_NEED_PREVIEW_OUT_BUFFER;
+      }
+
+      jxl::Span<const uint8_t> compressed(in + dec->frame_start,
+                                          size - dec->frame_start);
+      auto reader = GetBitReader(compressed);
+      jxl::DecompressParams dparams;
+      dparams.preview = want_preview ? jxl::Override::kOn : jxl::Override::kOff;
+      jxl::ImageBundle ib(&dec->metadata.m);
+      PassesDecoderState preview_dec_state;
+      JXL_API_RETURN_IF_ERROR(preview_dec_state.output_encoding_info.Set(
+          dec->metadata,
+          ColorEncoding::LinearSRGB(dec->metadata.m.color_encoding.IsGray())));
+      if (!DecodeFrame(dparams, &preview_dec_state, dec->thread_pool.get(),
+                       reader.get(), &ib, dec->metadata,
+                       /*constraints=*/nullptr,
+                       /*is_preview=*/true)) {
+        return JXL_API_ERROR("decoding preview failed");
+      }
+
+      // Set frame_start to the first non-preview frame.
+      dec->frame_start += DivCeil(reader->TotalBitsConsumed(), kBitsPerByte);
+      dec->got_preview_image = true;
+
+      if (want_preview) {
+        if (dec->preview_out_buffer) {
+          JxlDecoderStatus status = ConvertImageInternal(
+              dec, ib, dec->preview_out_format, dec->preview_out_buffer,
+              dec->preview_out_size, /*out_callback=*/nullptr,
+              /*out_opaque=*/nullptr);
+          if (status != JXL_DEC_SUCCESS) return status;
+        }
+        return JXL_DEC_PREVIEW_IMAGE;
+      }
+    }
+  }
+
+  // Handle frames
+  for (;;) {
+    if (!(dec->events_wanted & (JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME))) {
+      break;
+    }
+    if (dec->frame_stage == FrameStage::kHeader && dec->is_last_total) {
+      break;
+    }
+
+    if (dec->frame_stage == FrameStage::kHeader) {
+      size_t pos = dec->frame_start - dec->codestream_pos;
+      if (pos >= size) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      dec->frame_header.reset(new FrameHeader(&dec->metadata));
+      int saved_as = 0;
+      JxlDecoderStatus status =
+          ParseFrameHeader(dec->frame_header.get(), in, size, pos,
+                           /*is_preview=*/false, &dec->frame_size, &saved_as);
+      if (status != JXL_DEC_SUCCESS) return status;
+
+      // is last in entire codestream
+      dec->is_last_total = dec->frame_header->is_last;
+      // is last of current still
+      dec->is_last_of_still =
+          dec->is_last_total || dec->frame_header->animation_frame.duration > 0;
+
+      const size_t internal_frame_index = dec->internal_frames;
+      const size_t external_frame_index = dec->external_frames;
+      if (dec->is_last_of_still) dec->external_frames++;
+      dec->internal_frames++;
+
+      dec->frame_stage = FrameStage::kTOC;
+
+      if (dec->skip_frames > 0) {
+        dec->skipping_frame = true;
+        if (dec->is_last_of_still) {
+          dec->skip_frames--;
+        }
+      } else {
+        dec->skipping_frame = false;
+      }
+
+      if (external_frame_index >= dec->frame_external_to_internal.size()) {
+        dec->frame_external_to_internal.push_back(internal_frame_index);
+        JXL_ASSERT(dec->frame_external_to_internal.size() ==
+                   external_frame_index + 1);
+      }
+
+      if (internal_frame_index >= dec->frame_saved_as.size()) {
+        dec->frame_saved_as.push_back(saved_as);
+        JXL_ASSERT(dec->frame_saved_as.size() == internal_frame_index + 1);
+
+        // add the value 0xff (which means all references) to new slots: we only
+        // know the references of the frame at FinalizeFrame, and fill in the
+        // correct values there. As long as this information is not known, the
+        // worst case where the frame depends on all storage slots is assumed.
+        dec->frame_references.push_back(0xff);
+        JXL_ASSERT(dec->frame_references.size() == internal_frame_index + 1);
+      }
+
+      if (dec->skipping_frame) {
+        // Whether this frame could be referenced by any future frame: either
+        // because it's a frame saved for blending or patches, or because it's
+        // a DC frame.
+        bool referenceable =
+            dec->frame_header->CanBeReferenced() ||
+            dec->frame_header->frame_type == FrameType::kDCFrame;
+        if (internal_frame_index < dec->frame_required.size() &&
+            !dec->frame_required[internal_frame_index]) {
+          referenceable = false;
+        }
+        if (!referenceable) {
+          // Skip all decoding for this frame, since the user is skipping this
+          // frame and no future frames can reference it.
+          dec->frame_stage = FrameStage::kHeader;
+          dec->frame_start += dec->frame_size;
+          continue;
+        }
+      }
+
+      if ((dec->events_wanted & JXL_DEC_FRAME) && dec->is_last_of_still) {
+        // Only return this for the last of a series of stills: patches frames
+        // etc... before this one do not contain the correct information such
+        // as animation timing, ...
+        if (!dec->skipping_frame) {
+          return JXL_DEC_FRAME;
+        }
+      }
+    }
+
+    if (dec->frame_stage == FrameStage::kTOC) {
+      size_t pos = dec->frame_start - dec->codestream_pos;
+      if (pos >= size) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      Span<const uint8_t> span(in + pos, size - pos);
+      auto reader = GetBitReader(span);
+
+      if (!dec->passes_state) {
+        dec->passes_state.reset(new jxl::PassesDecoderState());
+      }
+      if (!dec->ib) {
+        dec->ib.reset(new jxl::ImageBundle(&dec->metadata.m));
+      }
+
+      dec->frame_dec.reset(new FrameDecoder(
+          dec->passes_state.get(), dec->metadata, dec->thread_pool.get()));
+
+      // If JPEG reconstruction is wanted and possible, set the jpeg_data of
+      // the ImageBundle.
+      if (!dec->jpeg_decoder.SetImageBundleJpegData(dec->ib.get()))
+        return JXL_DEC_ERROR;
+
+      jxl::Status status = dec->frame_dec->InitFrame(
+          reader.get(), dec->ib.get(), /*is_preview=*/false,
+          /*allow_partial_frames=*/false, /*allow_partial_dc_global=*/false);
+      if (!status) JXL_API_RETURN_IF_ERROR(status);
+
+      size_t sections_begin =
+          DivCeil(reader->TotalBitsConsumed(), kBitsPerByte);
+
+      dec->sections.reset(
+          new Sections(dec->frame_dec.get(), dec->frame_size, sections_begin));
+      JXL_API_RETURN_IF_ERROR(dec->sections->Init());
+
+      // If we don't need pixels, we can skip actually decoding the frames
+      // (kFull / kFullOut). By not updating frame_stage, none of
+      // these stages will execute, and the loop will continue from the next
+      // frame.
+      if (dec->events_wanted & JXL_DEC_FULL_IMAGE) {
+        dec->frame_dec_in_progress = true;
+        dec->frame_stage = FrameStage::kFull;
+      }
+    }
+
+    bool return_full_image = false;
+
+    if (dec->frame_stage == FrameStage::kFull) {
+      if (dec->events_wanted & JXL_DEC_FULL_IMAGE) {
+        if (!dec->image_out_buffer_set && (!dec->jpeg_decoder.IsOutputSet() ||
+                                           dec->ib->jpeg_data == nullptr) &&
+            dec->is_last_of_still) {
+          // TODO(lode): remove the dec->is_last_of_still condition if the
+          // frame decoder needs the image buffer as working space for decoding
+          // non-visible or blending frames too
+          if (!dec->skipping_frame) {
+            return JXL_DEC_NEED_IMAGE_OUT_BUFFER;
+          }
+        }
+      }
+
+      if (dec->image_out_buffer_set && !!dec->image_out_buffer &&
+          dec->image_out_format.data_type == JXL_TYPE_UINT8 &&
+          dec->image_out_format.num_channels >= 3) {
+        bool is_rgba = dec->image_out_format.num_channels == 4;
+        dec->frame_dec->MaybeSetRGB8OutputBuffer(
+            reinterpret_cast<uint8_t*>(dec->image_out_buffer),
+            GetStride(dec, dec->image_out_format), is_rgba,
+            !dec->keep_orientation);
+      }
+
+      const bool little_endian =
+          dec->image_out_format.endianness == JXL_LITTLE_ENDIAN ||
+          (dec->image_out_format.endianness == JXL_NATIVE_ENDIAN &&
+           IsLittleEndian());
+      bool swap_endianness = little_endian != IsLittleEndian();
+
+      // TODO(lode): Support more formats than just native endian float32 for
+      // the low-memory callback path
+      if (dec->image_out_buffer_set && !!dec->image_out_callback &&
+          dec->image_out_format.data_type == JXL_TYPE_FLOAT &&
+          dec->image_out_format.num_channels >= 3 && !swap_endianness &&
+          dec->frame_dec_in_progress) {
+        bool is_rgba = dec->image_out_format.num_channels == 4;
+        dec->frame_dec->MaybeSetFloatCallback(
+            [dec](const float* pixels, size_t x, size_t y, size_t num_pixels) {
+              dec->image_out_callback(dec->image_out_opaque, x, y, num_pixels,
+                                      pixels);
+            },
+            is_rgba, !dec->keep_orientation);
+      }
+
+      size_t pos = dec->frame_start - dec->codestream_pos;
+      if (pos >= size) {
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+      dec->sections->SetInput(in + pos, size - pos);
+
+      if (cpu_limit_base_ != 0) {
+        FrameDimensions frame_dim = dec->frame_header->ToFrameDimensions();
+        // No overflow, checked in ParseHeader.
+        size_t num_pixels = frame_dim.xsize * frame_dim.ysize;
+        if (used_cpu_base_ + num_pixels < used_cpu_base_) {
+          return JXL_API_ERROR("used too much CPU");
+        }
+        used_cpu_base_ += num_pixels;
+        if (used_cpu_base_ > cpu_limit_base_) {
+          return JXL_API_ERROR("used too much CPU");
+        }
+      }
+
+      jxl::Status status =
+          dec->frame_dec->ProcessSections(dec->sections->section_info.data(),
+                                          dec->sections->section_info.size(),
+                                          dec->sections->section_status.data());
+      JXL_API_RETURN_IF_ERROR(dec->sections->CloseInput());
+      if (status.IsFatalError()) {
+        return JXL_API_ERROR("decoding frame failed");
+      }
+
+      // TODO(lode): allow next_in to move forward if sections from the
+      // beginning of the stream have been processed
+
+      if (status.code() == StatusCode::kNotEnoughBytes ||
+          dec->sections->section_info.size() < dec->frame_dec->NumSections()) {
+        // Not all sections have been processed yet
+        return JXL_DEC_NEED_MORE_INPUT;
+      }
+
+      size_t internal_index = dec->internal_frames - 1;
+      JXL_ASSERT(dec->frame_references.size() > internal_index);
+      // Always fill this in, even if it was already written, it could be that
+      // this frame was skipped before and set to 255, while only now we know
+      // the true value.
+      dec->frame_references[internal_index] = dec->frame_dec->References();
+      if (!dec->frame_dec->FinalizeFrame()) {
+        return JXL_API_ERROR("decoding frame failed");
+      }
+      dec->frame_dec_in_progress = false;
+      dec->frame_stage = FrameStage::kFullOutput;
+    }
+
+    if (dec->frame_stage == FrameStage::kFullOutput) {
+      if (dec->is_last_of_still) {
+        if (dec->events_wanted & JXL_DEC_FULL_IMAGE) {
+          dec->events_wanted &= ~JXL_DEC_FULL_IMAGE;
+          return_full_image = true;
+        }
+
+        // Frame finished, restore the events_wanted with the per-frame events
+        // from orig_events_wanted, in case there is a next frame.
+        dec->events_wanted |=
+            (dec->orig_events_wanted & (JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME));
+
+        // If no output buffer was set, we merely return the JXL_DEC_FULL_IMAGE
+        // status without outputting pixels.
+        if (dec->jpeg_decoder.IsOutputSet() && dec->ib->jpeg_data != nullptr) {
+          JxlDecoderStatus status =
+              dec->jpeg_decoder.WriteOutput(*dec->ib->jpeg_data);
+          if (status != JXL_DEC_SUCCESS) return status;
+        } else if (return_full_image && dec->image_out_buffer_set) {
+          if (!dec->frame_dec->HasRGBBuffer()) {
+            // Copy pixels if desired.
+            JxlDecoderStatus status = ConvertImageInternal(
+                dec, *dec->ib, dec->image_out_format, dec->image_out_buffer,
+                dec->image_out_size, dec->image_out_callback,
+                dec->image_out_opaque);
+            if (status != JXL_DEC_SUCCESS) return status;
+          }
+          dec->image_out_buffer_set = false;
+        }
+      }
+    }
+
+    // The pixels have been output or are not needed, do not keep them in
+    // memory here.
+    dec->ib.reset();
+    dec->frame_stage = FrameStage::kHeader;
+    dec->frame_start += dec->frame_size;
+    if (return_full_image && !dec->skipping_frame) {
+      return JXL_DEC_FULL_IMAGE;
+    }
+  }
+
+  dec->stage = DecoderStage::kFinished;
+  // Return success, this means there is nothing more to do.
+  return JXL_DEC_SUCCESS;
+}
+
+}  // namespace
+}  // namespace jxl
+
+JxlDecoderStatus JxlDecoderSetInput(JxlDecoder* dec, const uint8_t* data,
+                                    size_t size) {
+  if (dec->next_in) return JXL_DEC_ERROR;
+
+  dec->next_in = data;
+  dec->avail_in = size;
+  return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderReleaseInput(JxlDecoder* dec) {
+  size_t result = dec->avail_in;
+  dec->next_in = nullptr;
+  dec->avail_in = 0;
+  return result;
+}
+
+JxlDecoderStatus JxlDecoderSetJPEGBuffer(JxlDecoder* dec, uint8_t* data,
+                                         size_t size) {
+  return dec->jpeg_decoder.SetOutputBuffer(data, size);
+}
+
+size_t JxlDecoderReleaseJPEGBuffer(JxlDecoder* dec) {
+  return dec->jpeg_decoder.ReleaseOutputBuffer();
+}
+
+JxlDecoderStatus JxlDecoderProcessInput(JxlDecoder* dec) {
+  const uint8_t** next_in = &dec->next_in;
+  size_t* avail_in = &dec->avail_in;
+  if (dec->stage == DecoderStage::kInited) {
+    dec->stage = DecoderStage::kStarted;
+  }
+  if (dec->stage == DecoderStage::kError) {
+    return JXL_API_ERROR(
+        "Cannot keep using decoder after it encountered an error, use "
+        "JxlDecoderReset to reset it");
+  }
+  if (dec->stage == DecoderStage::kFinished) {
+    return JXL_API_ERROR(
+        "Cannot keep using decoder after it finished, use JxlDecoderReset to "
+        "reset it");
+  }
+
+  if (!dec->got_signature) {
+    JxlSignature sig = JxlSignatureCheck(*next_in, *avail_in);
+    if (sig == JXL_SIG_INVALID) return JXL_API_ERROR("invalid signature");
+    if (sig == JXL_SIG_NOT_ENOUGH_BYTES) return JXL_DEC_NEED_MORE_INPUT;
+
+    dec->got_signature = true;
+
+    if (sig == JXL_SIG_CONTAINER) {
+      dec->have_container = 1;
+    }
+  }
+
+  // Available codestream bytes, may differ from *avail_in if there is another
+  // box behind the current position, in the dec->have_container case.
+  size_t csize = *avail_in;
+
+  if (dec->have_container) {
+    /*
+    Process bytes as follows:
+    *) find the box(es) containing the codestream
+    *) support codestream split over multiple partial boxes
+    *) avoid copying bytes to the codestream vector if the decoding will be
+     one-shot, when the user already provided everything contiguously in
+     memory
+    *) copy to codestream vector, and update next_in so user can delete the data
+    on their side, once we know it's not oneshot. This relieves the user from
+    continuing to store the data.
+    *) also copy to codestream if one-shot but the codestream is split across
+    multiple boxes: this copying can be avoided in the future if the C++
+    decoder is updated for streaming, but for now it requires all consecutive
+    data at once.
+    */
+
+    if (dec->skip_box) {
+      // Amount of remaining bytes in the box that is being skipped.
+      size_t remaining = dec->box_end - dec->file_pos;
+      if (*avail_in < remaining) {
+        // Don't have the full box yet, skip all we have so far
+        dec->file_pos += *avail_in;
+        *next_in += *avail_in;
+        *avail_in -= *avail_in;
+        return JXL_DEC_NEED_MORE_INPUT;
+      } else {
+        // Full box available, skip all its remaining bytes
+        dec->file_pos += remaining;
+        *next_in += remaining;
+        *avail_in -= remaining;
+        dec->skip_box = false;
+      }
+    }
+
+    if (dec->first_codestream_seen && !dec->last_codestream_seen &&
+        dec->codestream_end != 0 && dec->file_pos < dec->codestream_end &&
+        dec->file_pos + *avail_in >= dec->codestream_end &&
+        !dec->codestream.empty()) {
+      // dec->file_pos in a codestream, not in surrounding box format bytes, but
+      // the end of the current codestream part is in the current input, and
+      // boxes that can contain a next part of the codestream could be present.
+      // Therefore, store the known codestream part, and ensure processing of
+      // boxes below will trigger. This is only done if
+      // !dec->codestream.empty(), that is, we're already streaming.
+
+      // Size of the codestream, excluding potential boxes that come after it.
+      csize = *avail_in;
+      if (dec->codestream_end && csize > dec->codestream_end - dec->file_pos) {
+        csize = dec->codestream_end - dec->file_pos;
+      }
+      dec->codestream.insert(dec->codestream.end(), *next_in, *next_in + csize);
+      dec->file_pos += csize;
+      *next_in += csize;
+      *avail_in -= csize;
+    }
+
+    if (dec->jpeg_decoder.IsParsingBox()) {
+      // We are inside a JPEG reconstruction box.
+      JxlDecoderStatus recon_result =
+          dec->jpeg_decoder.Process(next_in, avail_in);
+      if (recon_result == JXL_DEC_JPEG_RECONSTRUCTION) {
+        // If successful JPEG reconstruction, return the success if the user
+        // cares about it, otherwise continue.
+        if (dec->events_wanted & recon_result) {
+          dec->events_wanted &= ~recon_result;
+          return recon_result;
+        }
+      } else {
+        // If anything else, return the result.
+        return recon_result;
+      }
+    }
+
+    if (!dec->last_codestream_seen &&
+        (dec->codestream_begin == 0 ||
+         (dec->codestream_end != 0 && dec->file_pos >= dec->codestream_end))) {
+      size_t pos = 0;
+      // after this for loop, either we should be in a part of the data that is
+      // codestream (not boxes), or have returned that we need more input.
+      for (;;) {
+        const uint8_t* in = *next_in;
+        size_t size = *avail_in;
+        if (size == pos) {
+          // If the remaining size is 0, we are exactly after a full box. We
+          // can't know for sure if this is the last box or not since more bytes
+          // can follow, but do not return NEED_MORE_INPUT, instead break and
+          // let the codestream-handling code determine if we need more.
+          break;
+        }
+        if (OutOfBounds(pos, 8, size)) {
+          dec->basic_info_size_hint =
+              InitialBasicInfoSizeHint() + pos + 8 - dec->file_pos;
+          return JXL_DEC_NEED_MORE_INPUT;
+        }
+        size_t box_start = pos;
+        // Box size, including this header itself.
+        uint64_t box_size = LoadBE32(in + pos);
+        char type[5] = {0};
+        memcpy(type, in + pos + 4, 4);
+        pos += 8;
+        if (box_size == 1) {
+          if (OutOfBounds(pos, 8, size)) return JXL_DEC_NEED_MORE_INPUT;
+          box_size = LoadBE64(in + pos);
+          pos += 8;
+        }
+        size_t header_size = pos - box_start;
+        if (box_size > 0 && box_size < header_size) {
+          return JXL_API_ERROR("invalid box size");
+        }
+        if (SumOverflows(dec->file_pos, pos, box_size)) {
+          return JXL_API_ERROR("Box size overflow");
+        }
+        size_t contents_size =
+            (box_size == 0) ? 0 : (box_size - pos + box_start);
+
+        dec->box_begin = box_start;
+        dec->box_end = dec->file_pos + box_start + box_size;
+        if (strcmp(type, "jxlc") == 0 || strcmp(type, "jxlp") == 0) {
+          size_t codestream_size = contents_size;
+          // Whether this is the last codestream box, either when it is a jxlc
+          // box, or when it is a jxlp box that has the final bit set.
+          // The codestream is either contained within a single jxlc box, or
+          // within one or more jxlp boxes. The final jxlp box is marked as last
+          // by setting the high bit of its 4-byte box-index value.
+          bool last_codestream = false;
+          if (strcmp(type, "jxlp") == 0) {
+            if (OutOfBounds(pos, 4, size)) return JXL_DEC_NEED_MORE_INPUT;
+            if (box_size != 0 && contents_size < 4) {
+              return JXL_API_ERROR("jxlp box too small to contain index");
+            }
+            codestream_size -= 4;
+            size_t jxlp_index = LoadBE32(in + pos);
+            pos += 4;
+            // The high bit of jxlp_index indicates whether this is the last
+            // jxlp box.
+            if (jxlp_index & 0x80000000) last_codestream = true;
+          } else if (strcmp(type, "jxlc") == 0) {
+            last_codestream = true;
+          }
+          if (!last_codestream && box_size == 0) {
+            return JXL_API_ERROR(
+                "final box has unbounded size, but is a non-final codestream "
+                "box");
+          }
+          dec->first_codestream_seen = true;
+          if (last_codestream) dec->last_codestream_seen = true;
+          if (dec->codestream_begin != 0 && dec->codestream.empty()) {
+            // We've already seen a codestream part, so it's a stream spanning
+            // multiple boxes.
+            // We have no choice but to copy contents to the codestream
+            // vector to make it a contiguous stream for the C++ decoder.
+            // This appends the previous codestream box that we had seen to
+            // dec->codestream.
+            if (dec->codestream_begin < dec->file_pos) {
+              return JXL_API_ERROR("earlier codestream box out of range");
+            }
+            size_t begin = dec->codestream_begin - dec->file_pos;
+            size_t end = dec->codestream_end - dec->file_pos;
+            JXL_ASSERT(end <= *avail_in);
+            dec->codestream.insert(dec->codestream.end(), *next_in + begin,
+                                   *next_in + end);
+          }
+          dec->codestream_begin = dec->file_pos + pos;
+          dec->codestream_end =
+              (box_size == 0) ? 0 : (dec->codestream_begin + codestream_size);
+          size_t avail_codestream_size =
+              (box_size == 0)
+                  ? (size - pos)
+                  : std::min<size_t>(size - pos, box_size - pos + box_start);
+          // If already appending codestream, append what we have here too
+          if (!dec->codestream.empty()) {
+            size_t begin = pos;
+            size_t end =
+                std::min<size_t>(*avail_in, begin + avail_codestream_size);
+            dec->codestream.insert(dec->codestream.end(), *next_in + begin,
+                                   *next_in + end);
+            pos += (end - begin);
+            dec->file_pos += pos;
+            *next_in += pos;
+            *avail_in -= pos;
+            pos = 0;
+            // TODO(lode): check if this should break always instead, and
+            // process what we have of the codestream so far, to support
+            // progressive decoding, and get events such as basic info faster.
+            // The user could have given 1.5 boxes here, and the first one could
+            // contain useful parts of codestream that can already be processed.
+            // Similar to several other exact avail_size checks. This may not
+            // need to be changed here, but instead at the point in this for
+            // loop where it returns "NEED_MORE_INPUT", it could instead break
+            // and allow decoding what we have of the codestream so far.
+            if (*avail_in == 0) break;
+          } else {
+            // skip only the header, so next_in points to the start of this new
+            // codestream part, for the one-shot case where user data is not
+            // (yet) copied to dec->codestream.
+            dec->file_pos += pos;
+            *next_in += pos;
+            *avail_in -= pos;
+            pos = 0;
+            // Update pos to be after the box contents with codestream
+            if (avail_codestream_size == *avail_in) {
+              break;  // the rest is codestream, this loop is done
+            }
+            pos += avail_codestream_size;
+          }
+        } else if ((JPEGXL_ENABLE_TRANSCODE_JPEG) &&
+                   (dec->orig_events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) &&
+                   strcmp(type, "jbrd") == 0) {
+          // This is a new JPEG reconstruction metadata box.
+          dec->jpeg_decoder.StartBox(box_size, contents_size);
+          dec->file_pos += pos;
+          *next_in += pos;
+          *avail_in -= pos;
+          pos = 0;
+          JxlDecoderStatus recon_result =
+              dec->jpeg_decoder.Process(next_in, avail_in);
+          if (recon_result == JXL_DEC_JPEG_RECONSTRUCTION) {
+            // If successful JPEG reconstruction, return the success if the user
+            // cares about it, otherwise continue.
+            if (dec->events_wanted & recon_result) {
+              dec->events_wanted &= ~recon_result;
+              return recon_result;
+            }
+          } else {
+            // If anything else, return the result.
+            return recon_result;
+          }
+        } else {
+          if (box_size == 0) {
+            // Final box with unknown size, but it's not a codestream box, so
+            // nothing more to do.
+            if (!dec->first_codestream_seen) {
+              return JXL_API_ERROR("didn't find any codestream box");
+            }
+            break;
+          }
+          if (OutOfBounds(pos, contents_size, size)) {
+            dec->skip_box = true;
+            dec->file_pos += pos;
+            *next_in += pos;
+            *avail_in -= pos;
+            // Indicate how many more bytes needed starting from *next_in.
+            dec->basic_info_size_hint = InitialBasicInfoSizeHint() + pos +
+                                        contents_size - dec->file_pos;
+            return JXL_DEC_NEED_MORE_INPUT;
+          }
+          pos += contents_size;
+          if (!(dec->codestream.empty() && dec->first_codestream_seen)) {
+            // Last box no longer needed since we have copied the codestream
+            // buffer, remove from input so user can release memory.
+            dec->file_pos += pos;
+            *next_in += pos;
+            *avail_in -= pos;
+            pos = 0;
+          }
+        }
+      }
+    }
+
+    // Size of the codestream, excluding potential boxes that come after it.
+    csize = *avail_in;
+    if (dec->codestream_end && csize > dec->codestream_end - dec->file_pos) {
+      csize = dec->codestream_end - dec->file_pos;
+    }
+  }
+
+  // Whether we are taking the input directly from the user (oneshot case,
+  // without copying bytes), or appending parts of input to dec->codestream
+  // (streaming)
+  bool detected_streaming = !dec->codestream.empty();
+  JxlDecoderStatus result;
+  JXL_DASSERT(csize <= *avail_in);
+
+  if (detected_streaming) {
+    dec->codestream.insert(dec->codestream.end(), *next_in, *next_in + csize);
+    dec->file_pos += csize;
+    *next_in += csize;
+    *avail_in -= csize;
+    result = jxl::JxlDecoderProcessInternal(dec, dec->codestream.data(),
+                                            dec->codestream.size());
+  } else {
+    // No data copied to codestream buffer yet, the user input may contain the
+    // full codestream.
+    result = jxl::JxlDecoderProcessInternal(dec, *next_in, csize);
+    // Copy the user's input bytes to the codestream once we are able to and
+    // it is needed. Before we got the basic info, we're still parsing the box
+    // format instead. If the result is not JXL_DEC_NEED_MORE_INPUT, then
+    // there is no reason yet to copy since the user may have a full buffer
+    // allowing one-shot. Once JXL_DEC_NEED_MORE_INPUT occurred at least once,
+    // start copying over the codestream bytes and allow user to free them
+    // instead. Next call, detected_streaming will be true.
+    if (dec->got_basic_info && result == JXL_DEC_NEED_MORE_INPUT) {
+      dec->codestream.insert(dec->codestream.end(), *next_in, *next_in + csize);
+      dec->file_pos += csize;
+      *next_in += csize;
+      *avail_in -= csize;
+    }
+  }
+
+  return result;
+}
+
+JxlDecoderStatus JxlDecoderGetBasicInfo(const JxlDecoder* dec,
+                                        JxlBasicInfo* info) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+  if (info) {
+    const jxl::ImageMetadata& meta = dec->metadata.m;
+
+    info->have_container = dec->have_container;
+    info->xsize = dec->metadata.size.xsize();
+    info->ysize = dec->metadata.size.ysize();
+    info->uses_original_profile = !meta.xyb_encoded;
+
+    info->bits_per_sample = meta.bit_depth.bits_per_sample;
+    info->exponent_bits_per_sample = meta.bit_depth.exponent_bits_per_sample;
+
+    info->have_preview = meta.have_preview;
+    info->have_animation = meta.have_animation;
+    // TODO(janwas): intrinsic_size
+    info->orientation = static_cast<JxlOrientation>(meta.orientation);
+
+    if (!dec->keep_orientation) {
+      if (info->orientation >= JXL_ORIENT_TRANSPOSE) {
+        std::swap(info->xsize, info->ysize);
+      }
+      info->orientation = JXL_ORIENT_IDENTITY;
+    }
+
+    info->intensity_target = meta.IntensityTarget();
+    info->min_nits = meta.tone_mapping.min_nits;
+    info->relative_to_max_display = meta.tone_mapping.relative_to_max_display;
+    info->linear_below = meta.tone_mapping.linear_below;
+
+    const jxl::ExtraChannelInfo* alpha = meta.Find(jxl::ExtraChannel::kAlpha);
+    if (alpha != nullptr) {
+      info->alpha_bits = alpha->bit_depth.bits_per_sample;
+      info->alpha_exponent_bits = alpha->bit_depth.exponent_bits_per_sample;
+      info->alpha_premultiplied = alpha->alpha_associated;
+    } else {
+      info->alpha_bits = 0;
+      info->alpha_exponent_bits = 0;
+      info->alpha_premultiplied = 0;
+    }
+
+    info->num_color_channels =
+        meta.color_encoding.GetColorSpace() == jxl::ColorSpace::kGray ? 1 : 3;
+
+    info->num_extra_channels = meta.num_extra_channels;
+
+    if (info->have_preview) {
+      info->preview.xsize = dec->metadata.m.preview_size.xsize();
+      info->preview.ysize = dec->metadata.m.preview_size.ysize();
+    }
+
+    if (info->have_animation) {
+      info->animation.tps_numerator = dec->metadata.m.animation.tps_numerator;
+      info->animation.tps_denominator =
+          dec->metadata.m.animation.tps_denominator;
+      info->animation.num_loops = dec->metadata.m.animation.num_loops;
+      info->animation.have_timecodes = dec->metadata.m.animation.have_timecodes;
+    }
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelInfo(const JxlDecoder* dec,
+                                               size_t index,
+                                               JxlExtraChannelInfo* info) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+  const std::vector<jxl::ExtraChannelInfo>& channels =
+      dec->metadata.m.extra_channel_info;
+
+  if (index >= channels.size()) return JXL_DEC_ERROR;  // out of bounds
+  const jxl::ExtraChannelInfo& channel = channels[index];
+
+  info->type = static_cast<JxlExtraChannelType>(channel.type);
+  info->bits_per_sample = channel.bit_depth.bits_per_sample;
+  info->exponent_bits_per_sample =
+      channel.bit_depth.floating_point_sample
+          ? channel.bit_depth.exponent_bits_per_sample
+          : 0;
+  info->dim_shift = channel.dim_shift;
+  info->name_length = channel.name.size();
+  info->alpha_associated = channel.alpha_associated;
+  info->spot_color[0] = channel.spot_color[0];
+  info->spot_color[1] = channel.spot_color[1];
+  info->spot_color[2] = channel.spot_color[2];
+  info->spot_color[3] = channel.spot_color[3];
+  info->cfa_channel = channel.cfa_channel;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelName(const JxlDecoder* dec,
+                                               size_t index, char* name,
+                                               size_t size) {
+  if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+  const std::vector<jxl::ExtraChannelInfo>& channels =
+      dec->metadata.m.extra_channel_info;
+
+  if (index >= channels.size()) return JXL_DEC_ERROR;  // out of bounds
+  const jxl::ExtraChannelInfo& channel = channels[index];
+
+  // Also need null-termination character
+  if (channel.name.size() + 1 > size) return JXL_DEC_ERROR;
+
+  memcpy(name, channel.name.c_str(), channel.name.size() + 1);
+
+  return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+// Gets the jxl::ColorEncoding for the desired target, and checks errors.
+// Returns the object regardless of whether the actual color space is in ICC,
+// but ensures that if the color encoding is not the encoding from the
+// codestream header metadata, it cannot require ICC profile.
+JxlDecoderStatus GetColorEncodingForTarget(
+    const JxlDecoder* dec, const JxlPixelFormat* format,
+    JxlColorProfileTarget target, const jxl::ColorEncoding** encoding) {
+  if (!dec->got_all_headers) return JXL_DEC_NEED_MORE_INPUT;
+  *encoding = nullptr;
+  if (target == JXL_COLOR_PROFILE_TARGET_DATA && dec->metadata.m.xyb_encoded) {
+    *encoding = &dec->passes_state->output_encoding_info.color_encoding;
+  } else {
+    *encoding = &dec->metadata.m.color_encoding;
+  }
+  return JXL_DEC_SUCCESS;
+}
+}  // namespace
+
+JxlDecoderStatus JxlDecoderGetColorAsEncodedProfile(
+    const JxlDecoder* dec, const JxlPixelFormat* format,
+    JxlColorProfileTarget target, JxlColorEncoding* color_encoding) {
+  const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+  JxlDecoderStatus status =
+      GetColorEncodingForTarget(dec, format, target, &jxl_color_encoding);
+  if (status) return status;
+
+  if (jxl_color_encoding->WantICC())
+    return JXL_DEC_ERROR;  // Indicate no encoded profile available.
+
+  if (color_encoding) {
+    ConvertInternalToExternalColorEncoding(*jxl_color_encoding, color_encoding);
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetICCProfileSize(const JxlDecoder* dec,
+                                             const JxlPixelFormat* format,
+                                             JxlColorProfileTarget target,
+                                             size_t* size) {
+  const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+  JxlDecoderStatus status =
+      GetColorEncodingForTarget(dec, format, target, &jxl_color_encoding);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (jxl_color_encoding->WantICC()) {
+    jxl::ColorSpace color_space =
+        dec->metadata.m.color_encoding.GetColorSpace();
+    if (color_space == jxl::ColorSpace::kUnknown ||
+        color_space == jxl::ColorSpace::kXYB) {
+      // This indicates there's no ICC profile available
+      // TODO(lode): for the XYB case, do we want to craft an ICC profile that
+      // represents XYB as an RGB profile? It may be possible, but not with
+      // only 1D transfer functions.
+      return JXL_DEC_ERROR;
+    }
+  }
+
+  if (size) {
+    *size = jxl_color_encoding->ICC().size();
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetColorAsICCProfile(const JxlDecoder* dec,
+                                                const JxlPixelFormat* format,
+                                                JxlColorProfileTarget target,
+                                                uint8_t* icc_profile,
+                                                size_t size) {
+  size_t wanted_size;
+  // This also checks the NEED_MORE_INPUT and the unknown/xyb cases
+  JxlDecoderStatus status =
+      JxlDecoderGetICCProfileSize(dec, format, target, &wanted_size);
+  if (status != JXL_DEC_SUCCESS) return status;
+  if (size < wanted_size) return JXL_API_ERROR("ICC profile output too small");
+
+  const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+  status = GetColorEncodingForTarget(dec, format, target, &jxl_color_encoding);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  memcpy(icc_profile, jxl_color_encoding->ICC().data(),
+         jxl_color_encoding->ICC().size());
+
+  return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+// Returns the amount of bits needed for getting memory buffer size, and does
+// all error checking required for size checking and format validity.
+JxlDecoderStatus PrepareSizeCheck(const JxlDecoder* dec,
+                                  const JxlPixelFormat* format, size_t* bits) {
+  if (!dec->got_basic_info) {
+    // Don't know image dimensions yet, cannot check for valid size.
+    return JXL_DEC_NEED_MORE_INPUT;
+  }
+  if (format->num_channels > 4) {
+    return JXL_API_ERROR("More than 4 channels not supported");
+  }
+  if (format->num_channels < 3 && !dec->metadata.m.color_encoding.IsGray()) {
+    return JXL_API_ERROR("Grayscale output not possible for color image");
+  }
+  if (format->data_type == JXL_TYPE_BOOLEAN) {
+    return JXL_API_ERROR("Boolean data type not yet supported");
+  }
+  if (format->data_type == JXL_TYPE_UINT32) {
+    return JXL_API_ERROR("uint32 data type not yet supported");
+  }
+
+  *bits = BitsPerChannel(format->data_type);
+
+  if (*bits == 0) {
+    return JXL_API_ERROR("Invalid data type");
+  }
+
+  return JXL_DEC_SUCCESS;
+}
+}  // namespace
+
+JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec) {
+  if (!dec->image_out_buffer) return JXL_DEC_ERROR;
+  if (!dec->sections || dec->sections->section_info.empty()) {
+    return JXL_DEC_ERROR;
+  }
+  if (!dec->frame_dec || !dec->frame_dec_in_progress) {
+    return JXL_DEC_ERROR;
+  }
+  if (!dec->frame_dec->HasDecodedDC()) {
+    // FrameDecoder::Fush currently requires DC to have been decoded already
+    // to work correctly.
+    return JXL_DEC_ERROR;
+  }
+  if (dec->frame_header->encoding != jxl::FrameEncoding::kVarDCT) {
+    // Flushing does not yet work correctly if the frame uses modular encoding.
+    return JXL_DEC_ERROR;
+  }
+  if (dec->metadata.m.num_extra_channels > 0) {
+    // Flushing does not yet work correctly if there are extra channels, which
+    // use modular
+    return JXL_DEC_ERROR;
+  }
+
+  if (!dec->frame_dec->Flush()) {
+    return JXL_DEC_ERROR;
+  }
+
+  if (dec->frame_dec->HasRGBBuffer()) {
+    return JXL_DEC_SUCCESS;
+  }
+
+  // Temporarily shrink `dec->ib` to the actual size of the full image to call
+  // ConvertImageInternal.
+  size_t xsize = dec->ib->xsize();
+  size_t ysize = dec->ib->ysize();
+  dec->ib->ShrinkTo(dec->metadata.size.xsize(), dec->metadata.size.ysize());
+  JxlDecoderStatus status = jxl::ConvertImageInternal(
+      dec, *dec->ib, dec->image_out_format, dec->image_out_buffer,
+      dec->image_out_size,
+      /*out_callback=*/nullptr, /*out_opaque=*/nullptr);
+  dec->ib->ShrinkTo(xsize, ysize);
+  if (status != JXL_DEC_SUCCESS) return status;
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+  size_t bits;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  size_t xsize = dec->metadata.oriented_preview_xsize(dec->keep_orientation);
+  size_t ysize = dec->metadata.oriented_preview_ysize(dec->keep_orientation);
+
+  size_t row_size =
+      jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte);
+  if (format->align > 1) {
+    row_size = jxl::DivCeil(row_size, format->align) * format->align;
+  }
+  *size = row_size * ysize;
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreviewOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size) {
+  if (!dec->got_basic_info || !dec->metadata.m.have_preview ||
+      !(dec->orig_events_wanted & JXL_DEC_PREVIEW_IMAGE)) {
+    return JXL_API_ERROR("No preview out buffer needed at this time");
+  }
+
+  size_t min_size;
+  // This also checks whether the format is valid and supported and basic info
+  // is available.
+  JxlDecoderStatus status =
+      JxlDecoderPreviewOutBufferSize(dec, format, &min_size);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (size < min_size) return JXL_DEC_ERROR;
+
+  dec->preview_out_buffer_set = true;
+  dec->preview_out_buffer = buffer;
+  dec->preview_out_size = size;
+  dec->preview_out_format = *format;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderDCOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+  size_t bits;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  size_t xsize = jxl::DivCeil(
+      dec->metadata.oriented_xsize(dec->keep_orientation), jxl::kBlockDim);
+  size_t ysize = jxl::DivCeil(
+      dec->metadata.oriented_ysize(dec->keep_orientation), jxl::kBlockDim);
+
+  size_t row_size =
+      jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte);
+  if (format->align > 1) {
+    row_size = jxl::DivCeil(row_size, format->align) * format->align;
+  }
+  *size = row_size * ysize;
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetDCOutBuffer(
+    JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size) {
+  // No buffer set: this feature is deprecated
+  return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderImageOutBufferSize(
+    const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+  size_t bits;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  size_t row_size =
+      jxl::DivCeil(dec->metadata.oriented_xsize(dec->keep_orientation) *
+                       format->num_channels * bits,
+                   jxl::kBitsPerByte);
+  if (format->align > 1) {
+    row_size = jxl::DivCeil(row_size, format->align) * format->align;
+  }
+  *size = row_size * dec->metadata.oriented_ysize(dec->keep_orientation);
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetImageOutBuffer(JxlDecoder* dec,
+                                             const JxlPixelFormat* format,
+                                             void* buffer, size_t size) {
+  if (!dec->got_basic_info || !(dec->orig_events_wanted & JXL_DEC_FULL_IMAGE)) {
+    return JXL_API_ERROR("No image out buffer needed at this time");
+  }
+  if (dec->image_out_buffer_set && !!dec->image_out_callback) {
+    return JXL_API_ERROR(
+        "Cannot change from image out callback to image out buffer");
+  }
+  size_t min_size;
+  // This also checks whether the format is valid and supported and basic info
+  // is available.
+  JxlDecoderStatus status =
+      JxlDecoderImageOutBufferSize(dec, format, &min_size);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  if (size < min_size) return JXL_DEC_ERROR;
+
+  dec->image_out_buffer_set = true;
+  dec->image_out_buffer = buffer;
+  dec->image_out_size = size;
+  dec->image_out_format = *format;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetImageOutCallback(JxlDecoder* dec,
+                                               const JxlPixelFormat* format,
+                                               JxlImageOutCallback callback,
+                                               void* opaque) {
+  if (dec->image_out_buffer_set && !!dec->image_out_buffer) {
+    return JXL_API_ERROR(
+        "Cannot change from image out buffer to image out callback");
+  }
+
+  // Perform error checking for invalid format.
+  size_t bits_dummy;
+  JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits_dummy);
+  if (status != JXL_DEC_SUCCESS) return status;
+
+  dec->image_out_buffer_set = true;
+  dec->image_out_callback = callback;
+  dec->image_out_opaque = opaque;
+  dec->image_out_format = *format;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetFrameHeader(const JxlDecoder* dec,
+                                          JxlFrameHeader* header) {
+  if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+    return JXL_API_ERROR("no frame header available");
+  }
+  const auto& metadata = dec->metadata.m;
+  if (metadata.have_animation) {
+    header->duration = dec->frame_header->animation_frame.duration;
+    if (metadata.animation.have_timecodes) {
+      header->timecode = dec->frame_header->animation_frame.timecode;
+    }
+  }
+  header->name_length = dec->frame_header->name.size();
+  header->is_last = dec->frame_header->is_last;
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetFrameName(const JxlDecoder* dec, char* name,
+                                        size_t size) {
+  if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+    return JXL_API_ERROR("no frame header available");
+  }
+  if (size < dec->frame_header->name.size() + 1) {
+    return JXL_API_ERROR("too small frame name output buffer");
+  }
+  memcpy(name, dec->frame_header->name.c_str(),
+         dec->frame_header->name.size() + 1);
+
+  return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetPreferredColorProfile(
+    JxlDecoder* dec, const JxlColorEncoding* color_encoding) {
+  if (!dec->got_all_headers) {
+    return JXL_API_ERROR("color info not yet available");
+  }
+  if (dec->post_headers) {
+    return JXL_API_ERROR("too late to set the color encoding");
+  }
+  if (dec->metadata.m.color_encoding.IsGray() !=
+      (color_encoding->color_space == JXL_COLOR_SPACE_GRAY)) {
+    return JXL_API_ERROR("grayscale mismatch");
+  }
+  if (color_encoding->color_space == JXL_COLOR_SPACE_UNKNOWN ||
+      color_encoding->color_space == JXL_COLOR_SPACE_XYB) {
+    return JXL_API_ERROR("only RGB or grayscale output supported");
+  }
+
+  JXL_API_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding(
+      *color_encoding, &dec->default_enc));
+  JXL_API_RETURN_IF_ERROR(dec->passes_state->output_encoding_info.Set(
+      dec->metadata, dec->default_enc));
+  return JXL_DEC_SUCCESS;
+}
+
+// This function is "package-private". It is only used by fuzzer to avoid
+// running cases that are too memory / CPU hungry. Limitations are applied
+// at mid-level API. In the future high-level API would also include the
+// means of limiting / throttling memory / CPU usage.
+void SetDecoderMemoryLimitBase_(size_t memory_limit_base) {
+  memory_limit_base_ = memory_limit_base;
+  // Allow 5 x max_image_size processing units; every frame is accounted
+  // as W x H CPU processing units, so there could be numerous small frames
+  // or few larger ones.
+  cpu_limit_base_ = 5 * memory_limit_base;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc
new file mode 100644
index 0000000000..f4e94d1412
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_encode.cc
@@ -0,0 +1,471 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "jxl/encode.h"
+
+#include <algorithm>
+#include <cstring>
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+
+#define JPEGXL_MAJOR_VERSION 0
+#define JPEGXL_MINOR_VERSION 5
+#define JPEGXL_PATCH_VERSION 0
+
+// Debug-printing failure macro similar to JXL_FAILURE, but for the status code
+// JXL_ENC_ERROR
+#ifdef JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                           \
+  (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+   ::jxl::Abort(), JXL_ENC_ERROR)
+#else  // JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...)                                             \
+  (((JXL_DEBUG_ON_ERROR) &&                                                    \
+    ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \
+   JXL_ENC_ERROR)
+#endif  // JXL_CRASH_ON_ERROR
+
+namespace jxl {}  // namespace jxl
+
+uint32_t JxlEncoderVersion(void) {
+  return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 +
+         JPEGXL_PATCH_VERSION;
+}
+
+JxlEncoderStatus JxlEncoderStruct::RefillOutputByteQueue() {
+  jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedFrame> input_frame =
+      std::move(input_frame_queue[0]);
+  input_frame_queue.erase(input_frame_queue.begin());
+
+  // TODO(zond): If the frame queue is empty and the input_closed is true,
+  // then mark this frame as the last.
+
+  jxl::BitWriter writer;
+
+  if (!wrote_bytes) {
+    if (use_container) {
+      output_byte_queue.insert(
+          output_byte_queue.end(), jxl::kContainerHeader,
+          jxl::kContainerHeader + sizeof(jxl::kContainerHeader));
+      if (store_jpeg_metadata && jpeg_metadata.size() > 0) {
+        jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_metadata.size(),
+                             false, &output_byte_queue);
+        output_byte_queue.insert(output_byte_queue.end(), jpeg_metadata.begin(),
+                                 jpeg_metadata.end());
+      }
+    }
+    if (!WriteHeaders(&metadata, &writer, nullptr)) {
+      return JXL_ENC_ERROR;
+    }
+    // Only send ICC (at least several hundred bytes) if fields aren't enough.
+    if (metadata.m.color_encoding.WantICC()) {
+      if (!jxl::WriteICC(metadata.m.color_encoding.ICC(), &writer,
+                         jxl::kLayerHeader, nullptr)) {
+        return JXL_ENC_ERROR;
+      }
+    }
+
+    // TODO(lode): preview should be added here if a preview image is added
+
+    // Each frame should start on byte boundaries.
+    writer.ZeroPadToByte();
+  }
+
+  // TODO(zond): Handle progressive mode like EncodeFile does it.
+  // TODO(zond): Handle animation like EncodeFile does it, by checking if
+  //             JxlEncoderCloseInput has been called and if the frame queue is
+  //             empty (to see if it's the last animation frame).
+
+  if (metadata.m.xyb_encoded) {
+    input_frame->option_values.cparams.color_transform =
+        jxl::ColorTransform::kXYB;
+  } else {
+    // TODO(zond): Figure out when to use kYCbCr instead.
+    input_frame->option_values.cparams.color_transform =
+        jxl::ColorTransform::kNone;
+  }
+
+  jxl::PassesEncoderState enc_state;
+  if (!jxl::EncodeFrame(input_frame->option_values.cparams, jxl::FrameInfo{},
+                        &metadata, input_frame->frame, &enc_state,
+                        thread_pool.get(), &writer,
+                        /*aux_out=*/nullptr)) {
+    return JXL_ENC_ERROR;
+  }
+
+  jxl::PaddedBytes bytes = std::move(writer).TakeBytes();
+
+  if (use_container && !wrote_bytes) {
+    if (input_closed && input_frame_queue.empty()) {
+      jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), bytes.size(),
+                           /*unbounded=*/false, &output_byte_queue);
+    } else {
+      jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), 0, /*unbounded=*/true,
+                           &output_byte_queue);
+    }
+  }
+
+  output_byte_queue.insert(output_byte_queue.end(), bytes.data(),
+                           bytes.data() + bytes.size());
+  wrote_bytes = true;
+
+  last_used_cparams = input_frame->option_values.cparams;
+
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetColorEncoding(JxlEncoder* enc,
+                                            const JxlColorEncoding* color) {
+  if (enc->color_encoding_set) {
+    // Already set
+    return JXL_ENC_ERROR;
+  }
+  if (!jxl::ConvertExternalToInternalColorEncoding(
+          *color, &enc->metadata.m.color_encoding)) {
+    return JXL_ENC_ERROR;
+  }
+  enc->color_encoding_set = true;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetICCProfile(JxlEncoder* enc,
+                                         const uint8_t* icc_profile,
+                                         size_t size) {
+  if (enc->color_encoding_set) {
+    // Already set
+    return JXL_ENC_ERROR;
+  }
+  jxl::PaddedBytes icc;
+  icc.assign(icc_profile, icc_profile + size);
+  if (!enc->metadata.m.color_encoding.SetICCRaw(std::move(icc))) {
+    return JXL_ENC_ERROR;
+  }
+  enc->color_encoding_set = true;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc,
+                                        const JxlBasicInfo* info) {
+  if (!enc->metadata.size.Set(info->xsize, info->ysize)) {
+    return JXL_ENC_ERROR;
+  }
+  if (info->exponent_bits_per_sample) {
+    if (info->exponent_bits_per_sample != 8) return JXL_ENC_NOT_SUPPORTED;
+    if (info->bits_per_sample == 32) {
+      enc->metadata.m.SetFloat32Samples();
+    } else {
+      return JXL_ENC_NOT_SUPPORTED;
+    }
+  } else {
+    switch (info->bits_per_sample) {
+      case 32:
+      case 16:
+      case 8:
+        enc->metadata.m.SetUintSamples(info->bits_per_sample);
+        break;
+      default:
+        return JXL_ENC_ERROR;
+        break;
+    }
+  }
+  if (info->alpha_bits > 0 && info->alpha_exponent_bits > 0) {
+    return JXL_ENC_NOT_SUPPORTED;
+  }
+  switch (info->alpha_bits) {
+    case 0:
+      break;
+    case 32:
+    case 16:
+      enc->metadata.m.SetAlphaBits(16);
+      break;
+    case 8:
+      enc->metadata.m.SetAlphaBits(info->alpha_bits);
+      break;
+    default:
+      return JXL_ENC_ERROR;
+      break;
+  }
+  enc->metadata.m.xyb_encoded = !info->uses_original_profile;
+  enc->basic_info_set = true;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderOptions* JxlEncoderOptionsCreate(JxlEncoder* enc,
+                                           const JxlEncoderOptions* source) {
+  auto opts =
+      jxl::MemoryManagerMakeUnique<JxlEncoderOptions>(&enc->memory_manager);
+  if (!opts) return nullptr;
+  opts->enc = enc;
+  if (source != nullptr) {
+    opts->values = source->values;
+  } else {
+    opts->values.lossless = false;
+  }
+  JxlEncoderOptions* ret = opts.get();
+  enc->encoder_options.emplace_back(std::move(opts));
+  return ret;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetLossless(JxlEncoderOptions* options,
+                                              const JXL_BOOL lossless) {
+  options->values.lossless = lossless;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetEffort(JxlEncoderOptions* options,
+                                            const int effort) {
+  if (effort < 3 || effort > 9) {
+    return JXL_ENC_ERROR;
+  }
+  options->values.cparams.speed_tier = static_cast<jxl::SpeedTier>(10 - effort);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetDistance(JxlEncoderOptions* options,
+                                              float distance) {
+  if (distance < 0 || distance > 15) {
+    return JXL_ENC_ERROR;
+  }
+  options->values.cparams.butteraugli_distance = distance;
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoder* JxlEncoderCreate(const JxlMemoryManager* memory_manager) {
+  JxlMemoryManager local_memory_manager;
+  if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager)) {
+    return nullptr;
+  }
+
+  void* alloc =
+      jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlEncoder));
+  if (!alloc) return nullptr;
+  JxlEncoder* enc = new (alloc) JxlEncoder();
+  enc->memory_manager = local_memory_manager;
+
+  return enc;
+}
+
+void JxlEncoderReset(JxlEncoder* enc) {
+  enc->thread_pool.reset();
+  enc->input_frame_queue.clear();
+  enc->encoder_options.clear();
+  enc->output_byte_queue.clear();
+  enc->wrote_bytes = false;
+  enc->metadata = jxl::CodecMetadata();
+  enc->last_used_cparams = jxl::CompressParams();
+  enc->input_closed = false;
+  enc->basic_info_set = false;
+  enc->color_encoding_set = false;
+}
+
+void JxlEncoderDestroy(JxlEncoder* enc) {
+  if (enc) {
+    // Call destructor directly since custom free function is used.
+    enc->~JxlEncoder();
+    jxl::MemoryManagerFree(&enc->memory_manager, enc);
+  }
+}
+
+JxlEncoderStatus JxlEncoderUseContainer(JxlEncoder* enc,
+                                        JXL_BOOL use_container) {
+  enc->use_container = static_cast<bool>(use_container);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderStoreJPEGMetadata(JxlEncoder* enc,
+                                             JXL_BOOL store_jpeg_metadata) {
+  enc->store_jpeg_metadata = static_cast<bool>(store_jpeg_metadata);
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetParallelRunner(JxlEncoder* enc,
+                                             JxlParallelRunner parallel_runner,
+                                             void* parallel_runner_opaque) {
+  if (enc->thread_pool) return JXL_API_ERROR("parallel runner already set");
+  enc->thread_pool = jxl::MemoryManagerMakeUnique<jxl::ThreadPool>(
+      &enc->memory_manager, parallel_runner, parallel_runner_opaque);
+  if (!enc->thread_pool) {
+    return JXL_ENC_ERROR;
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderAddJPEGFrame(const JxlEncoderOptions* options,
+                                        const uint8_t* buffer, size_t size) {
+  if (options->enc->input_closed) {
+    return JXL_ENC_ERROR;
+  }
+
+  jxl::CodecInOut io;
+  if (!jxl::jpeg::DecodeImageJPG(jxl::Span<const uint8_t>(buffer, size), &io)) {
+    return JXL_ENC_ERROR;
+  }
+
+  if (!options->enc->color_encoding_set) {
+    if (!SetColorEncodingFromJpegData(
+            *io.Main().jpeg_data, &options->enc->metadata.m.color_encoding)) {
+      return JXL_ENC_ERROR;
+    }
+  }
+
+  if (!options->enc->basic_info_set) {
+    JxlBasicInfo basic_info;
+    basic_info.exponent_bits_per_sample = 0;
+    basic_info.bits_per_sample = 8;
+    basic_info.alpha_bits = 0;
+    basic_info.alpha_exponent_bits = 0;
+    basic_info.xsize = io.Main().jpeg_data->width;
+    basic_info.ysize = io.Main().jpeg_data->height;
+    basic_info.uses_original_profile = true;
+    if (JxlEncoderSetBasicInfo(options->enc, &basic_info) != JXL_ENC_SUCCESS) {
+      return JXL_ENC_ERROR;
+    }
+  }
+
+  if (options->enc->metadata.m.xyb_encoded) {
+    // Can't XYB encode a lossless JPEG.
+    return JXL_ENC_ERROR;
+  }
+
+  if (options->enc->store_jpeg_metadata) {
+    jxl::jpeg::JPEGData data_in = *io.Main().jpeg_data;
+    jxl::PaddedBytes jpeg_data;
+    if (!EncodeJPEGData(data_in, &jpeg_data)) {
+      return JXL_ENC_ERROR;
+    }
+    options->enc->jpeg_metadata = std::vector<uint8_t>(
+        jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+  }
+
+  auto queued_frame = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedFrame>(
+      &options->enc->memory_manager,
+      // JxlEncoderQueuedFrame is a struct with no constructors, so we use the
+      // default move constructor there.
+      jxl::JxlEncoderQueuedFrame{options->values,
+                                 jxl::ImageBundle(&options->enc->metadata.m)});
+  if (!queued_frame) {
+    return JXL_ENC_ERROR;
+  }
+  queued_frame->frame.SetFromImage(std::move(*io.Main().color()),
+                                   io.Main().c_current());
+  queued_frame->frame.jpeg_data = std::move(io.Main().jpeg_data);
+  queued_frame->frame.color_transform = io.Main().color_transform;
+  queued_frame->frame.chroma_subsampling = io.Main().chroma_subsampling;
+
+  if (options->values.lossless) {
+    queued_frame->option_values.cparams.SetLossless();
+  }
+
+  options->enc->input_frame_queue.emplace_back(std::move(queued_frame));
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderAddImageFrame(const JxlEncoderOptions* options,
+                                         const JxlPixelFormat* pixel_format,
+                                         const void* buffer, size_t size) {
+  if (!options->enc->basic_info_set || !options->enc->color_encoding_set) {
+    return JXL_ENC_ERROR;
+  }
+
+  if (options->enc->input_closed) {
+    return JXL_ENC_ERROR;
+  }
+
+  auto queued_frame = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedFrame>(
+      &options->enc->memory_manager,
+      // JxlEncoderQueuedFrame is a struct with no constructors, so we use the
+      // default move constructor there.
+      jxl::JxlEncoderQueuedFrame{options->values,
+                                 jxl::ImageBundle(&options->enc->metadata.m)});
+  if (!queued_frame) {
+    return JXL_ENC_ERROR;
+  }
+
+  if (pixel_format->data_type == JXL_TYPE_FLOAT16) {
+    // float16 is currently only supported in the decoder
+    return JXL_ENC_ERROR;
+  }
+
+  jxl::ColorEncoding c_current;
+  if (options->enc->metadata.m.xyb_encoded) {
+    if (pixel_format->data_type == JXL_TYPE_FLOAT) {
+      c_current =
+          jxl::ColorEncoding::LinearSRGB(pixel_format->num_channels < 3);
+    } else {
+      c_current = jxl::ColorEncoding::SRGB(pixel_format->num_channels < 3);
+    }
+  } else {
+    c_current = options->enc->metadata.m.color_encoding;
+  }
+
+  if (!jxl::BufferToImageBundle(*pixel_format, options->enc->metadata.xsize(),
+                                options->enc->metadata.ysize(), buffer, size,
+                                options->enc->thread_pool.get(), c_current,
+                                &(queued_frame->frame))) {
+    return JXL_ENC_ERROR;
+  }
+
+  if (options->values.lossless) {
+    queued_frame->option_values.cparams.SetLossless();
+  }
+
+  options->enc->input_frame_queue.emplace_back(std::move(queued_frame));
+  return JXL_ENC_SUCCESS;
+}
+
+void JxlEncoderCloseInput(JxlEncoder* enc) { enc->input_closed = true; }
+
+JxlEncoderStatus JxlEncoderProcessOutput(JxlEncoder* enc, uint8_t** next_out,
+                                         size_t* avail_out) {
+  while (*avail_out > 0 &&
+         (!enc->output_byte_queue.empty() || !enc->input_frame_queue.empty())) {
+    if (!enc->output_byte_queue.empty()) {
+      size_t to_copy = std::min(*avail_out, enc->output_byte_queue.size());
+      memcpy(static_cast<void*>(*next_out), enc->output_byte_queue.data(),
+             to_copy);
+      *next_out += to_copy;
+      *avail_out -= to_copy;
+      enc->output_byte_queue.erase(enc->output_byte_queue.begin(),
+                                   enc->output_byte_queue.begin() + to_copy);
+    } else if (!enc->input_frame_queue.empty()) {
+      if (enc->RefillOutputByteQueue() != JXL_ENC_SUCCESS) {
+        return JXL_ENC_ERROR;
+      }
+    }
+  }
+
+  if (!enc->output_byte_queue.empty() || !enc->input_frame_queue.empty()) {
+    return JXL_ENC_NEED_MORE_OUTPUT;
+  }
+  return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetDecodingSpeed(JxlEncoderOptions* options,
+                                                   int tier) {
+  if (tier < 0 || tier > 4) {
+    return JXL_ENC_ERROR;
+  }
+  options->values.cparams.decoding_speed_tier = tier;
+  return JXL_ENC_SUCCESS;
+}
+
+void JxlColorEncodingSetToSRGB(JxlColorEncoding* color_encoding,
+                               JXL_BOOL is_gray) {
+  ConvertInternalToExternalColorEncoding(jxl::ColorEncoding::SRGB(is_gray),
+                                         color_encoding);
+}
+
+void JxlColorEncodingSetToLinearSRGB(JxlColorEncoding* color_encoding,
+                                     JXL_BOOL is_gray) {
+  ConvertInternalToExternalColorEncoding(
+      jxl::ColorEncoding::LinearSRGB(is_gray), color_encoding);
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_inspection.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_inspection.h
new file mode 100644
index 0000000000..0b70a58523
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_inspection.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JXL_INSPECTION_H_
+#define LIB_JXL_JXL_INSPECTION_H_
+
+#include <functional>
+
+#include "lib/jxl/image.h"
+
+namespace jxl {
+// Type of the inspection-callback which, if enabled, will be called on various
+// intermediate data during image processing, allowing inspection access.
+//
+// Returns false if processing can be stopped at that point, true otherwise.
+// This is only advisory - it is always OK to just continue processing.
+using InspectorImage3F = std::function<bool(const char*, const Image3F&)>;
+}  // namespace jxl
+
+#endif  // LIB_JXL_JXL_INSPECTION_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_osx.syms b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_osx.syms
new file mode 100644
index 0000000000..96bc568025
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_osx.syms
@@ -0,0 +1 @@
+_Jxl*
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_test.cc
new file mode 100644
index 0000000000..139e7cffde
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/jxl_test.cc
@@ -0,0 +1,1628 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <array>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/extras/codec.h"
+#include "lib/extras/codec_jpg.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+#include "tools/box/box.h"
+
+namespace jxl {
+namespace {
+using test::Roundtrip;
+
+#define JXL_TEST_NL 0  // Disabled in code
+
+void CreateImage1x1(CodecInOut* io) {
+  Image3F image(1, 1);
+  ZeroFillImage(&image);
+  io->metadata.m.SetUintSamples(8);
+  io->metadata.m.color_encoding = ColorEncoding::SRGB();
+  io->SetFromImage(std::move(image), io->metadata.m.color_encoding);
+}
+
+TEST(JxlTest, HeaderSize) {
+  CodecInOut io;
+  CreateImage1x1(&io);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.5;
+  DecompressParams dparams;
+  ThreadPool* pool = nullptr;
+
+  {
+    CodecInOut io2;
+    AuxOut aux_out;
+    Roundtrip(&io, cparams, dparams, pool, &io2, &aux_out);
+    EXPECT_LE(aux_out.layers[kLayerHeader].total_bits, 34);
+  }
+
+  {
+    CodecInOut io2;
+    io.metadata.m.SetAlphaBits(8);
+    ImageF alpha(1, 1);
+    alpha.Row(0)[0] = 1;
+    io.Main().SetAlpha(std::move(alpha), /*alpha_is_premultiplied=*/false);
+    AuxOut aux_out;
+    Roundtrip(&io, cparams, dparams, pool, &io2, &aux_out);
+    EXPECT_LE(aux_out.layers[kLayerHeader].total_bits, 57);
+  }
+}
+
+TEST(JxlTest, RoundtripSinglePixel) {
+  CodecInOut io;
+  CreateImage1x1(&io);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+  ThreadPool* pool = nullptr;
+  CodecInOut io2;
+  Roundtrip(&io, cparams, dparams, pool, &io2);
+}
+
+// Changing serialized signature causes Decode to fail.
+#ifndef JXL_CRASH_ON_ERROR
+TEST(JxlTest, RoundtripMarker) {
+  CodecInOut io;
+  CreateImage1x1(&io);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+  AuxOut* aux_out = nullptr;
+  ThreadPool* pool = nullptr;
+
+  PassesEncoderState enc_state;
+  for (size_t i = 0; i < 2; ++i) {
+    PaddedBytes compressed;
+    EXPECT_TRUE(
+        EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+    compressed[i] ^= 0xFF;
+    CodecInOut io2;
+    EXPECT_FALSE(DecodeFile(dparams, compressed, &io2, pool));
+  }
+}
+#endif
+
+TEST(JxlTest, RoundtripTinyFast) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(32, 32);
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.butteraugli_distance = 4.0f;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  const size_t enc_bytes = Roundtrip(&io, cparams, dparams, pool, &io2);
+  printf("32x32 image size %zu bytes\n", enc_bytes);
+}
+
+TEST(JxlTest, RoundtripSmallD1) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+
+  CodecInOut io_out;
+  size_t compressed_size;
+
+  {
+    CodecInOut io;
+    ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+    io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+
+    compressed_size = Roundtrip(&io, cparams, dparams, pool, &io_out);
+    EXPECT_LE(compressed_size, 1000);
+    EXPECT_LE(ButteraugliDistance(io, io_out, cparams.ba_params,
+                                  /*distmap=*/nullptr, pool),
+              1.5);
+  }
+
+  {
+    // And then, with a lower intensity target than the default, the bitrate
+    // should be smaller.
+    CodecInOut io_dim;
+    io_dim.target_nits = 100;
+    ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_dim, pool));
+    io_dim.ShrinkTo(io_dim.xsize() / 8, io_dim.ysize() / 8);
+    EXPECT_LT(Roundtrip(&io_dim, cparams, dparams, pool, &io_out),
+              compressed_size);
+    EXPECT_LE(ButteraugliDistance(io_dim, io_out, cparams.ba_params,
+                                  /*distmap=*/nullptr, pool),
+              1.5);
+    EXPECT_EQ(io_dim.metadata.m.IntensityTarget(),
+              io_out.metadata.m.IntensityTarget());
+  }
+}
+
+TEST(JxlTest, RoundtripOtherTransforms) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/64px/a2d1un_nkitzmiller_srgb8.png");
+  std::unique_ptr<CodecInOut> io = jxl::make_unique<CodecInOut>();
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), io.get(), pool));
+
+  CompressParams cparams;
+  // Slow modes access linear image for adaptive quant search
+  cparams.speed_tier = SpeedTier::kKitten;
+  cparams.color_transform = ColorTransform::kNone;
+  cparams.butteraugli_distance = 5.0f;
+  DecompressParams dparams;
+
+  std::unique_ptr<CodecInOut> io2 = jxl::make_unique<CodecInOut>();
+  const size_t compressed_size =
+      Roundtrip(io.get(), cparams, dparams, pool, io2.get());
+  EXPECT_LE(compressed_size, 23000);
+  EXPECT_LE(ButteraugliDistance(*io, *io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            6);
+
+  // Check the consistency when performing another roundtrip.
+  std::unique_ptr<CodecInOut> io3 = jxl::make_unique<CodecInOut>();
+  const size_t compressed_size2 =
+      Roundtrip(io.get(), cparams, dparams, pool, io3.get());
+  EXPECT_LE(compressed_size2, 23000);
+  EXPECT_LE(ButteraugliDistance(*io, *io3, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            6);
+}
+
+TEST(JxlTest, RoundtripResample2) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize(), io.ysize());
+  CompressParams cparams;
+  cparams.resampling = 2;
+  DecompressParams dparams;
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 15777);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            12.5);
+}
+TEST(JxlTest, RoundtripResample2MT) {
+  ThreadPoolInternal pool(4);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  // image has to be large enough to have multiple groups after downsampling
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+  CompressParams cparams;
+  cparams.resampling = 2;
+  DecompressParams dparams;
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 57000);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, &pool),
+#if JXL_HIGH_PRECISION
+            4.5);
+#else
+            12.5);
+#endif
+}
+
+TEST(JxlTest, RoundtripResample4) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize(), io.ysize());
+  CompressParams cparams;
+  cparams.resampling = 4;
+  DecompressParams dparams;
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 6000);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            28);
+}
+
+TEST(JxlTest, RoundtripResample8) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize(), io.ysize());
+  CompressParams cparams;
+  cparams.resampling = 8;
+  DecompressParams dparams;
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 2100);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            80);
+}
+
+TEST(JxlTest, RoundtripUnalignedD2) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize() / 12, io.ysize() / 7);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 2.0;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 700);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            3.2);
+}
+
+#if JXL_TEST_NL
+
+TEST(JxlTest, RoundtripMultiGroupNL) {
+  ThreadPoolInternal pool(4);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+  io.ShrinkTo(600, 1024);  // partial X, full Y group
+
+  CompressParams cparams;
+  DecompressParams dparams;
+
+  cparams.fast_mode = true;
+  cparams.butteraugli_distance = 1.0f;
+  CodecInOut io2;
+  Roundtrip(&io, cparams, dparams, &pool, &io2);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, &pool),
+            0.9f);
+
+  cparams.butteraugli_distance = 2.0f;
+  CodecInOut io3;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io3), 80000);
+  EXPECT_LE(ButteraugliDistance(io, io3, cparams.ba_params,
+                                /*distmap=*/nullptr, &pool),
+            1.5f);
+}
+
+#endif
+
+TEST(JxlTest, RoundtripMultiGroup) {
+  ThreadPoolInternal pool(4);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+  io.ShrinkTo(600, 1024);
+
+  CompressParams cparams;
+  DecompressParams dparams;
+
+  cparams.butteraugli_distance = 1.0f;
+  cparams.speed_tier = SpeedTier::kKitten;
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 40000);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, &pool),
+            1.99f);
+
+  cparams.butteraugli_distance = 2.0f;
+  CodecInOut io3;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io3), 22100);
+  EXPECT_LE(ButteraugliDistance(io, io3, cparams.ba_params,
+                                /*distmap=*/nullptr, &pool),
+            3.0f);
+}
+
+TEST(JxlTest, RoundtripLargeFast) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 265000);
+}
+
+TEST(JxlTest, RoundtripDotsForceEpf) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/cvo9xd_keong_macan_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams;
+  cparams.epf = 2;
+  cparams.dots = Override::kOn;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 265000);
+}
+
+// Checks for differing size/distance in two consecutive runs of distance 2,
+// which involves additional processing including adaptive reconstruction.
+// Failing this may be a sign of race conditions or invalid memory accesses.
+TEST(JxlTest, RoundtripD2Consistent) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.butteraugli_distance = 2.0;
+  DecompressParams dparams;
+
+  // Try each xsize mod kBlockDim to verify right border handling.
+  for (size_t xsize = 48; xsize > 40; --xsize) {
+    io.ShrinkTo(xsize, 15);
+
+    CodecInOut io2;
+    const size_t size2 = Roundtrip(&io, cparams, dparams, &pool, &io2);
+
+    CodecInOut io3;
+    const size_t size3 = Roundtrip(&io, cparams, dparams, &pool, &io3);
+
+    // Exact same compressed size.
+    EXPECT_EQ(size2, size3);
+
+    // Exact same distance.
+    const float dist2 = ButteraugliDistance(io, io2, cparams.ba_params,
+                                            /*distmap=*/nullptr, &pool);
+    const float dist3 = ButteraugliDistance(io, io3, cparams.ba_params,
+                                            /*distmap=*/nullptr, &pool);
+    EXPECT_EQ(dist2, dist3);
+  }
+}
+
+// Same as above, but for full image, testing multiple groups.
+TEST(JxlTest, RoundtripLargeConsistent) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.butteraugli_distance = 2.0;
+  DecompressParams dparams;
+
+  // Try each xsize mod kBlockDim to verify right border handling.
+  CodecInOut io2;
+  const size_t size2 = Roundtrip(&io, cparams, dparams, &pool, &io2);
+
+  CodecInOut io3;
+  const size_t size3 = Roundtrip(&io, cparams, dparams, &pool, &io3);
+
+  // Exact same compressed size.
+  EXPECT_EQ(size2, size3);
+
+  // Exact same distance.
+  const float dist2 = ButteraugliDistance(io, io2, cparams.ba_params,
+                                          /*distmap=*/nullptr, &pool);
+  const float dist3 = ButteraugliDistance(io, io3, cparams.ba_params,
+                                          /*distmap=*/nullptr, &pool);
+  EXPECT_EQ(dist2, dist3);
+}
+
+#if JXL_TEST_NL
+
+TEST(JxlTest, RoundtripSmallNL) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 1500);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.7);
+}
+
+#endif
+
+TEST(JxlTest, RoundtripNoGaborishNoAR) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  CompressParams cparams;
+  cparams.gaborish = Override::kOff;
+  cparams.epf = 0;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 40000);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            2.5);
+}
+
+TEST(JxlTest, RoundtripSmallNoGaborish) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+
+  CompressParams cparams;
+  cparams.gaborish = Override::kOff;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 900);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.7);
+}
+
+TEST(JxlTest, RoundtripSmallPatchesAlpha) {
+  ThreadPool* pool = nullptr;
+  CodecInOut io;
+  io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+  Image3F black_with_small_lines(256, 256);
+  ImageF alpha(black_with_small_lines.xsize(), black_with_small_lines.ysize());
+  ZeroFillImage(&black_with_small_lines);
+  // This pattern should be picked up by the patch detection heuristics.
+  for (size_t y = 0; y < black_with_small_lines.ysize(); y++) {
+    float* JXL_RESTRICT row = black_with_small_lines.PlaneRow(1, y);
+    for (size_t x = 0; x < black_with_small_lines.xsize(); x++) {
+      if (x % 4 == 0 && (y / 32) % 4 == 0) row[x] = 127.0f;
+    }
+  }
+  io.metadata.m.SetAlphaBits(8);
+  io.SetFromImage(std::move(black_with_small_lines),
+                  ColorEncoding::LinearSRGB());
+  FillImage(1.0f, &alpha);
+  io.Main().SetAlpha(std::move(alpha), /*alpha_is_premultiplied=*/false);
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.butteraugli_distance = 0.1f;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 2000);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            0.5f);
+}
+
+TEST(JxlTest, RoundtripSmallPatches) {
+  ThreadPool* pool = nullptr;
+  CodecInOut io;
+  io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+  Image3F black_with_small_lines(256, 256);
+  ZeroFillImage(&black_with_small_lines);
+  // This pattern should be picked up by the patch detection heuristics.
+  for (size_t y = 0; y < black_with_small_lines.ysize(); y++) {
+    float* JXL_RESTRICT row = black_with_small_lines.PlaneRow(1, y);
+    for (size_t x = 0; x < black_with_small_lines.xsize(); x++) {
+      if (x % 4 == 0 && (y / 32) % 4 == 0) row[x] = 127.0f;
+    }
+  }
+  io.SetFromImage(std::move(black_with_small_lines),
+                  ColorEncoding::LinearSRGB());
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.butteraugli_distance = 0.1f;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 2000);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            0.5f);
+}
+
+// Test header encoding of original bits per sample
+TEST(JxlTest, RoundtripImageBundleOriginalBits) {
+  ThreadPool* pool = nullptr;
+
+  // Image does not matter, only io.metadata.m and io2.metadata.m are tested.
+  Image3F image(1, 1);
+  ZeroFillImage(&image);
+  CodecInOut io;
+  io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+  io.SetFromImage(std::move(image), ColorEncoding::LinearSRGB());
+
+  CompressParams cparams;
+  DecompressParams dparams;
+
+  // Test unsigned integers from 1 to 32 bits
+  for (uint32_t bit_depth = 1; bit_depth <= 32; bit_depth++) {
+    if (bit_depth == 32) {
+      // TODO(lode): allow testing 32, however the code below ends up in
+      // enc_modular which does not support 32. We only want to test the header
+      // encoding though, so try without modular.
+      break;
+    }
+
+    io.metadata.m.SetUintSamples(bit_depth);
+    CodecInOut io2;
+    Roundtrip(&io, cparams, dparams, pool, &io2);
+
+    EXPECT_EQ(bit_depth, io2.metadata.m.bit_depth.bits_per_sample);
+    EXPECT_FALSE(io2.metadata.m.bit_depth.floating_point_sample);
+    EXPECT_EQ(0, io2.metadata.m.bit_depth.exponent_bits_per_sample);
+    EXPECT_EQ(0, io2.metadata.m.GetAlphaBits());
+  }
+
+  // Test various existing and non-existing floating point formats
+  for (uint32_t bit_depth = 8; bit_depth <= 32; bit_depth++) {
+    if (bit_depth != 32) {
+      // TODO: test other float types once they work
+      break;
+    }
+
+    uint32_t exponent_bit_depth;
+    if (bit_depth < 10) {
+      exponent_bit_depth = 2;
+    } else if (bit_depth < 12) {
+      exponent_bit_depth = 3;
+    } else if (bit_depth < 16) {
+      exponent_bit_depth = 4;
+    } else if (bit_depth < 20) {
+      exponent_bit_depth = 5;
+    } else if (bit_depth < 24) {
+      exponent_bit_depth = 6;
+    } else if (bit_depth < 28) {
+      exponent_bit_depth = 7;
+    } else {
+      exponent_bit_depth = 8;
+    }
+
+    io.metadata.m.bit_depth.bits_per_sample = bit_depth;
+    io.metadata.m.bit_depth.floating_point_sample = true;
+    io.metadata.m.bit_depth.exponent_bits_per_sample = exponent_bit_depth;
+
+    CodecInOut io2;
+    Roundtrip(&io, cparams, dparams, pool, &io2);
+
+    EXPECT_EQ(bit_depth, io2.metadata.m.bit_depth.bits_per_sample);
+    EXPECT_TRUE(io2.metadata.m.bit_depth.floating_point_sample);
+    EXPECT_EQ(exponent_bit_depth,
+              io2.metadata.m.bit_depth.exponent_bits_per_sample);
+    EXPECT_EQ(0, io2.metadata.m.GetAlphaBits());
+  }
+}
+
+TEST(JxlTest, RoundtripGrayscale) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/cvo9xd_keong_macan_grayscale.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  ASSERT_NE(io.xsize(), 0);
+  io.ShrinkTo(128, 128);
+  EXPECT_TRUE(io.Main().IsGray());
+  EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+  EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+
+  {
+    CompressParams cparams;
+    cparams.butteraugli_distance = 1.0;
+    DecompressParams dparams;
+
+    PaddedBytes compressed;
+    EXPECT_TRUE(
+        EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+    CodecInOut io2;
+    EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+    EXPECT_TRUE(io2.Main().IsGray());
+
+    EXPECT_LE(compressed.size(), 7000);
+    EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                  /*distmap=*/nullptr, pool),
+              1.7777777);
+  }
+
+  // Test with larger butteraugli distance and other settings enabled so
+  // different jxl codepaths trigger.
+  {
+    CompressParams cparams;
+    cparams.butteraugli_distance = 8.0;
+    DecompressParams dparams;
+
+    PaddedBytes compressed;
+    EXPECT_TRUE(
+        EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+    CodecInOut io2;
+    EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+    EXPECT_TRUE(io2.Main().IsGray());
+
+    EXPECT_LE(compressed.size(), 1300);
+    EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                  /*distmap=*/nullptr, pool),
+              9.0);
+  }
+}
+
+TEST(JxlTest, RoundtripAlpha) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  ASSERT_NE(io.xsize(), 0);
+  ASSERT_TRUE(io.metadata.m.HasAlpha());
+  ASSERT_TRUE(io.Main().HasAlpha());
+  io.ShrinkTo(300, 300);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+
+  EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+  EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+  CodecInOut io2;
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+
+  EXPECT_LE(compressed.size(), 10077);
+
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.4);
+}
+
+TEST(JxlTest, RoundtripAlphaPremultiplied) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  CodecInOut io, io_nopremul;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_nopremul, pool));
+
+  ASSERT_NE(io.xsize(), 0);
+  ASSERT_TRUE(io.metadata.m.HasAlpha());
+  ASSERT_TRUE(io.Main().HasAlpha());
+  io.ShrinkTo(300, 300);
+  io_nopremul.ShrinkTo(300, 300);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+
+  io.PremultiplyAlpha();
+  EXPECT_TRUE(io.Main().AlphaIsPremultiplied());
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+  CodecInOut io2;
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+
+  EXPECT_LE(compressed.size(), 10000);
+
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.4);
+  io2.Main().UnpremultiplyAlpha();
+  EXPECT_LE(ButteraugliDistance(io_nopremul, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.8);
+}
+
+TEST(JxlTest, RoundtripAlphaResampling) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  ASSERT_NE(io.xsize(), 0);
+  ASSERT_TRUE(io.metadata.m.HasAlpha());
+  ASSERT_TRUE(io.Main().HasAlpha());
+
+  CompressParams cparams;
+  cparams.resampling = 2;
+  cparams.ec_resampling = 2;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+  CodecInOut io2;
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+
+  EXPECT_LE(compressed.size(), 15000);
+
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            6.0);
+}
+
+TEST(JxlTest, RoundtripAlphaResamplingOnlyAlpha) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  ASSERT_NE(io.xsize(), 0);
+  ASSERT_TRUE(io.metadata.m.HasAlpha());
+  ASSERT_TRUE(io.Main().HasAlpha());
+
+  CompressParams cparams;
+  cparams.ec_resampling = 2;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+  CodecInOut io2;
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+
+  EXPECT_LE(compressed.size(), 31000);
+
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.5);
+}
+
+TEST(JxlTest, RoundtripAlphaNonMultipleOf8) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  ASSERT_NE(io.xsize(), 0);
+  ASSERT_TRUE(io.metadata.m.HasAlpha());
+  ASSERT_TRUE(io.Main().HasAlpha());
+  io.ShrinkTo(12, 12);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  DecompressParams dparams;
+
+  EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+  EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+  CodecInOut io2;
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+
+  EXPECT_LE(compressed.size(), 200);
+
+  // TODO(robryk): Fix the following line in presence of different alpha_bits in
+  // the two contexts.
+  // EXPECT_TRUE(SamePixels(io.Main().alpha(), io2.Main().alpha()));
+  // TODO(robryk): Fix the distance estimate used in the encoder.
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            6.3);
+}
+
+TEST(JxlTest, RoundtripAlpha16) {
+  ThreadPoolInternal pool(4);
+
+  size_t xsize = 1200, ysize = 160;
+  Image3F color(xsize, ysize);
+  ImageF alpha(xsize, ysize);
+  // Generate 16-bit pattern that uses various colors and alpha values.
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      color.PlaneRow(0, y)[x] = (y * 65535 / ysize) * (1.0f / 65535);
+      color.PlaneRow(1, y)[x] = (x * 65535 / xsize) * (1.0f / 65535);
+      color.PlaneRow(2, y)[x] =
+          ((y + x) * 65535 / (xsize + ysize)) * (1.0f / 65535);
+      alpha.Row(y)[x] = (x * 65535 / xsize) * (1.0f / 65535);
+    }
+  }
+  const bool is_gray = false;
+  CodecInOut io;
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.SetAlphaBits(16);
+  io.metadata.m.color_encoding = ColorEncoding::SRGB(is_gray);
+  io.SetFromImage(std::move(color), io.metadata.m.color_encoding);
+  io.Main().SetAlpha(std::move(alpha), /*alpha_is_premultiplied=*/false);
+
+  // The image is wider than 512 pixels to ensure multiple groups are tested.
+
+  ASSERT_NE(io.xsize(), 0);
+  ASSERT_TRUE(io.metadata.m.HasAlpha());
+  ASSERT_TRUE(io.Main().HasAlpha());
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 0.5;
+  // Prevent the test to be too slow, does not affect alpha
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  DecompressParams dparams;
+
+  io.metadata.m.SetUintSamples(16);
+  EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(
+      EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, &pool));
+  CodecInOut io2;
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, &pool));
+
+  EXPECT_TRUE(SamePixels(*io.Main().alpha(), *io2.Main().alpha()));
+}
+
+namespace {
+CompressParams CParamsForLossless() {
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  cparams.quality_pair = {100, 100};
+  cparams.options.predictor = {Predictor::Weighted};
+  return cparams;
+}
+}  // namespace
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams = CParamsForLossless();
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 3500000);
+  // If this test fails with a very close to 0.0 but not exactly 0.0 butteraugli
+  // distance, then there is likely a floating point issue, that could be
+  // happening either in io or io2. The values of io are generated by
+  // external_image.cc, and those in io2 by the jxl decoder. If they use
+  // slightly different floating point operations (say, one casts int to float
+  // while other divides the int through 255.0f and later multiplies it by
+  // 255 again) they will get slightly different values. To fix, ensure both
+  // sides do the following formula for converting integer range 0-255 to
+  // floating point range 0.0f-255.0f: static_cast<float>(i)
+  // without any further intermediate operations.
+  // Note that this precision issue is not a problem in practice if the values
+  // are equal when rounded to 8-bit int, but currently full exact precision is
+  // tested.
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io2, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLosslessNoEncoderFastPathWP)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams = CParamsForLossless();
+  cparams.speed_tier = SpeedTier::kFalcon;
+  cparams.options.skip_encoder_fast_path = true;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 3500000);
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io2, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLosslessNoEncoderFastPathGradient)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams = CParamsForLossless();
+  cparams.speed_tier = SpeedTier::kThunder;
+  cparams.options.skip_encoder_fast_path = true;
+  cparams.options.predictor = {Predictor::Gradient};
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 3500000);
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io2, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLosslessNoEncoderVeryFastPathGradient)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams = CParamsForLossless();
+  cparams.speed_tier = SpeedTier::kLightning;
+  cparams.options.skip_encoder_fast_path = true;
+  cparams.options.predictor = {Predictor::Gradient};
+  DecompressParams dparams;
+
+  CodecInOut io2, io3;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 3500000);
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io2, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+  cparams.options.skip_encoder_fast_path = false;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io3), 3500000);
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io3, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8Falcon)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams = CParamsForLossless();
+  cparams.speed_tier = SpeedTier::kFalcon;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 3500000);
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io2, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+}
+
+TEST(JxlTest, RoundtripLossless8Alpha) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/tmshre_riaphotographs_alpha.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  EXPECT_EQ(8, io.metadata.m.GetAlphaBits());
+  EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+
+  CompressParams cparams = CParamsForLossless();
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 350000);
+  // If fails, see note about floating point in RoundtripLossless8.
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io2, cparams.ba_params,
+                                     /*distmap=*/nullptr, pool));
+  EXPECT_TRUE(SamePixels(*io.Main().alpha(), *io2.Main().alpha()));
+  EXPECT_EQ(8, io2.metadata.m.GetAlphaBits());
+  EXPECT_EQ(8, io2.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io2.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io2.metadata.m.bit_depth.exponent_bits_per_sample);
+}
+
+TEST(JxlTest, RoundtripLossless16Alpha) {
+  ThreadPool* pool = nullptr;
+
+  size_t xsize = 1200, ysize = 160;
+  Image3F color(xsize, ysize);
+  ImageF alpha(xsize, ysize);
+  // Generate 16-bit pattern that uses various colors and alpha values.
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      color.PlaneRow(0, y)[x] = (y * 65535 / ysize) * (1.0f / 65535);
+      color.PlaneRow(1, y)[x] = (x * 65535 / xsize) * (1.0f / 65535);
+      color.PlaneRow(2, y)[x] =
+          ((y + x) * 65535 / (xsize + ysize)) * (1.0f / 65535);
+      alpha.Row(y)[x] = (x * 65535 / xsize) * (1.0f / 65535);
+    }
+  }
+  const bool is_gray = false;
+  CodecInOut io;
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.SetAlphaBits(16);
+  io.metadata.m.color_encoding = ColorEncoding::SRGB(is_gray);
+  io.SetFromImage(std::move(color), io.metadata.m.color_encoding);
+  io.Main().SetAlpha(std::move(alpha), /*alpha_is_premultiplied=*/false);
+
+  EXPECT_EQ(16, io.metadata.m.GetAlphaBits());
+  EXPECT_EQ(16, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+
+  CompressParams cparams = CParamsForLossless();
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 7100);
+  // If this test fails with a very close to 0.0 but not exactly 0.0 butteraugli
+  // distance, then there is likely a floating point issue, that could be
+  // happening either in io or io2. The values of io are generated by
+  // external_image.cc, and those in io2 by the jxl decoder. If they use
+  // slightly different floating point operations (say, one does "i / 257.0f"
+  // while the other does "i * (1.0f / 257)" they will get slightly different
+  // values. To fix, ensure both sides do the following formula for converting
+  // integer range 0-65535 to Image3F floating point range 0.0f-255.0f:
+  // "i * (1.0f / 257)".
+  // Note that this precision issue is not a problem in practice if the values
+  // are equal when rounded to 16-bit int, but currently full exact precision is
+  // tested.
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io2, cparams.ba_params,
+                                     /*distmap=*/nullptr, pool));
+  EXPECT_TRUE(SamePixels(*io.Main().alpha(), *io2.Main().alpha()));
+  EXPECT_EQ(16, io2.metadata.m.GetAlphaBits());
+  EXPECT_EQ(16, io2.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io2.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io2.metadata.m.bit_depth.exponent_bits_per_sample);
+}
+
+TEST(JxlTest, RoundtripLossless16AlphaNotMisdetectedAs8Bit) {
+  ThreadPool* pool = nullptr;
+
+  size_t xsize = 128, ysize = 128;
+  Image3F color(xsize, ysize);
+  ImageF alpha(xsize, ysize);
+  // All 16-bit values, both color and alpha, of this image are below 64.
+  // This allows testing if a code path wrongly concludes it's an 8-bit instead
+  // of 16-bit image (or even 6-bit).
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      color.PlaneRow(0, y)[x] = (y * 64 / ysize) * (1.0f / 65535);
+      color.PlaneRow(1, y)[x] = (x * 64 / xsize) * (1.0f / 65535);
+      color.PlaneRow(2, y)[x] =
+          ((y + x) * 64 / (xsize + ysize)) * (1.0f / 65535);
+      alpha.Row(y)[x] = (64 * x / xsize) * (1.0f / 65535);
+    }
+  }
+  const bool is_gray = false;
+  CodecInOut io;
+  io.metadata.m.SetUintSamples(16);
+  io.metadata.m.SetAlphaBits(16);
+  io.metadata.m.color_encoding = ColorEncoding::SRGB(is_gray);
+  io.SetFromImage(std::move(color), io.metadata.m.color_encoding);
+  io.Main().SetAlpha(std::move(alpha), /*alpha_is_premultiplied=*/false);
+
+  EXPECT_EQ(16, io.metadata.m.GetAlphaBits());
+  EXPECT_EQ(16, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+
+  CompressParams cparams = CParamsForLossless();
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 3100);
+  EXPECT_EQ(16, io2.metadata.m.GetAlphaBits());
+  EXPECT_EQ(16, io2.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io2.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io2.metadata.m.bit_depth.exponent_bits_per_sample);
+  // If fails, see note about floating point in RoundtripLossless8.
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io2, cparams.ba_params,
+                                     /*distmap=*/nullptr, pool));
+  EXPECT_TRUE(SamePixels(*io.Main().alpha(), *io2.Main().alpha()));
+}
+
+TEST(JxlTest, RoundtripYCbCr420) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  const PaddedBytes yuv420 =
+      ReadTestData("imagecompression.info/flower_foveon.png.ffmpeg.y4m");
+  CodecInOut io2;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(yuv420), &io2, pool));
+
+  CompressParams cparams = CParamsForLossless();
+  cparams.speed_tier = SpeedTier::kThunder;
+  DecompressParams dparams;
+
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(
+      EncodeFile(cparams, &io2, &enc_state, &compressed, aux_out, pool));
+  CodecInOut io3;
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io3, pool));
+
+  EXPECT_LE(compressed.size(), 1320000);
+
+  // we're comparing an original PNG with a YCbCr 4:2:0 version
+  EXPECT_LE(ButteraugliDistance(io, io3, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            2.5);
+}
+
+TEST(JxlTest, RoundtripDots) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/cvo9xd_keong_macan_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  ASSERT_NE(io.xsize(), 0);
+
+  CompressParams cparams;
+  cparams.dots = Override::kOn;
+  cparams.butteraugli_distance = 0.04;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  DecompressParams dparams;
+
+  EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+  CodecInOut io2;
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+
+  EXPECT_LE(compressed.size(), 400000);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            2.2);
+}
+
+TEST(JxlTest, RoundtripNoise) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/cvo9xd_keong_macan_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  ASSERT_NE(io.xsize(), 0);
+
+  CompressParams cparams;
+  cparams.noise = Override::kOn;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  DecompressParams dparams;
+
+  EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+  PassesEncoderState enc_state;
+  AuxOut* aux_out = nullptr;
+  PaddedBytes compressed;
+  EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, aux_out, pool));
+  CodecInOut io2;
+  EXPECT_TRUE(DecodeFile(dparams, compressed, &io2, pool));
+
+  EXPECT_LE(compressed.size(), 40000);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            2.2);
+}
+
+TEST(JxlTest, RoundtripLossless8Gray) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/cvo9xd_keong_macan_grayscale.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  CompressParams cparams = CParamsForLossless();
+  DecompressParams dparams;
+
+  EXPECT_TRUE(io.Main().IsGray());
+  EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 130000);
+  // If fails, see note about floating point in RoundtripLossless8.
+  EXPECT_EQ(0.0, ButteraugliDistance(io, io2, cparams.ba_params,
+                                     /*distmap=*/nullptr, pool));
+  EXPECT_TRUE(io2.Main().IsGray());
+  EXPECT_EQ(8, io2.metadata.m.bit_depth.bits_per_sample);
+  EXPECT_FALSE(io2.metadata.m.bit_depth.floating_point_sample);
+  EXPECT_EQ(0, io2.metadata.m.bit_depth.exponent_bits_per_sample);
+}
+
+#if JPEGXL_ENABLE_GIF
+
+TEST(JxlTest, RoundtripAnimation) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = ReadTestData("jxl/traffic_light.gif");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  ASSERT_EQ(4, io.frames.size());
+
+  CompressParams cparams;
+  DecompressParams dparams;
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 3000);
+
+  EXPECT_EQ(io2.frames.size(), io.frames.size());
+  test::CoalesceGIFAnimationWithAlpha(&io);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+#if JXL_HIGH_PRECISION
+            1.55);
+#else
+            1.75);
+#endif
+}
+
+TEST(JxlTest, RoundtripLosslessAnimation) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = ReadTestData("jxl/traffic_light.gif");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  ASSERT_EQ(4, io.frames.size());
+
+  CompressParams cparams = CParamsForLossless();
+  DecompressParams dparams;
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 1200);
+
+  EXPECT_EQ(io2.frames.size(), io.frames.size());
+  test::CoalesceGIFAnimationWithAlpha(&io);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            5e-4);
+}
+
+#endif  // JPEGXL_ENABLE_GIF
+
+#if JPEGXL_ENABLE_JPEG
+
+namespace {
+
+jxl::Status DecompressJxlToJPEGForTest(
+    const jpegxl::tools::JpegXlContainer& container, jxl::ThreadPool* pool,
+    jxl::PaddedBytes* output) {
+  output->clear();
+  jxl::Span<const uint8_t> compressed(container.codestream,
+                                      container.codestream_size);
+
+  JXL_RETURN_IF_ERROR(compressed.size() >= 2);
+
+  // JXL case
+  // Decode to DCT when possible and generate a JPG file.
+  jxl::CodecInOut io;
+  jxl::DecompressParams params;
+  params.keep_dct = true;
+  if (!jpegxl::tools::DecodeJpegXlToJpeg(params, container, &io, pool)) {
+    return JXL_FAILURE("Failed to decode JXL to JPEG");
+  }
+  io.jpeg_quality = 95;
+  if (!EncodeImageJPG(&io, jxl::JpegEncoder::kLibJpeg, io.jpeg_quality,
+                      jxl::YCbCrChromaSubsampling(), pool, output,
+                      jxl::DecodeTarget::kQuantizedCoeffs)) {
+    return JXL_FAILURE("Failed to generate JPEG");
+  }
+  return true;
+}
+
+}  // namespace
+
+size_t RoundtripJpeg(const PaddedBytes& jpeg_in, ThreadPool* pool) {
+  CodecInOut io;
+  io.dec_target = jxl::DecodeTarget::kQuantizedCoeffs;
+  EXPECT_TRUE(SetFromBytes(Span<const uint8_t>(jpeg_in), &io, pool));
+  CompressParams cparams;
+  cparams.color_transform = jxl::ColorTransform::kYCbCr;
+
+  PassesEncoderState passes_enc_state;
+  PaddedBytes compressed, codestream;
+
+  EXPECT_TRUE(EncodeFile(cparams, &io, &passes_enc_state, &codestream,
+                         /*aux_out=*/nullptr, pool));
+  jpegxl::tools::JpegXlContainer enc_container;
+  enc_container.codestream = codestream.data();
+  enc_container.codestream_size = codestream.size();
+  jpeg::JPEGData data_in = *io.Main().jpeg_data;
+  jxl::PaddedBytes jpeg_data;
+  EXPECT_TRUE(EncodeJPEGData(data_in, &jpeg_data));
+  enc_container.jpeg_reconstruction = jpeg_data.data();
+  enc_container.jpeg_reconstruction_size = jpeg_data.size();
+  EXPECT_TRUE(EncodeJpegXlContainerOneShot(enc_container, &compressed));
+
+  jpegxl::tools::JpegXlContainer container;
+  EXPECT_TRUE(DecodeJpegXlContainerOneShot(compressed.data(), compressed.size(),
+                                           &container));
+  PaddedBytes out;
+  EXPECT_TRUE(DecompressJxlToJPEGForTest(container, pool, &out));
+  EXPECT_EQ(out.size(), jpeg_in.size());
+  size_t failures = 0;
+  for (size_t i = 0; i < std::min(out.size(), jpeg_in.size()); i++) {
+    if (out[i] != jpeg_in[i]) {
+      EXPECT_EQ(out[i], jpeg_in[i])
+          << "byte mismatch " << i << " " << out[i] << " != " << jpeg_in[i];
+      if (++failures > 4) {
+        return compressed.size();
+      }
+    }
+  }
+  return compressed.size();
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression444)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png.im_q85_444.jpg");
+  // JPEG size is 326'916 bytes.
+  EXPECT_LE(RoundtripJpeg(orig, &pool), 256000);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png.im_q85_444.jpg");
+  CodecInOut io;
+  io.dec_target = jxl::DecodeTarget::kQuantizedCoeffs;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CodecInOut io2;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io2, &pool));
+
+  CompressParams cparams;
+  cparams.color_transform = jxl::ColorTransform::kYCbCr;
+
+  DecompressParams dparams;
+
+  CodecInOut io3;
+  Roundtrip(&io, cparams, dparams, &pool, &io3);
+
+  // TODO(eustas): investigate, why SJPEG and JpegRecompression pixels are
+  // different.
+  EXPECT_GE(1.8, ButteraugliDistance(io2, io3, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png.im_q85_420.jpg");
+  CodecInOut io;
+  io.dec_target = jxl::DecodeTarget::kQuantizedCoeffs;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CodecInOut io2;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io2, &pool));
+
+  CompressParams cparams;
+  cparams.color_transform = jxl::ColorTransform::kYCbCr;
+
+  DecompressParams dparams;
+
+  CodecInOut io3;
+  Roundtrip(&io, cparams, dparams, &pool, &io3);
+
+  EXPECT_GE(1.5, ButteraugliDistance(io2, io3, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+}
+
+TEST(JxlTest,
+     JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420Mul16)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon_cropped.jpg");
+  CodecInOut io;
+  io.dec_target = jxl::DecodeTarget::kQuantizedCoeffs;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CodecInOut io2;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io2, &pool));
+
+  CompressParams cparams;
+  cparams.color_transform = jxl::ColorTransform::kYCbCr;
+
+  DecompressParams dparams;
+
+  CodecInOut io3;
+  Roundtrip(&io, cparams, dparams, &pool, &io3);
+
+  EXPECT_GE(1.5, ButteraugliDistance(io2, io3, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+}
+
+TEST(JxlTest,
+     JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels_asymmetric)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig = ReadTestData(
+      "imagecompression.info/flower_foveon.png.im_q85_asymmetric.jpg");
+  CodecInOut io;
+  io.dec_target = jxl::DecodeTarget::kQuantizedCoeffs;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CodecInOut io2;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io2, &pool));
+
+  CompressParams cparams;
+  cparams.color_transform = jxl::ColorTransform::kYCbCr;
+
+  DecompressParams dparams;
+
+  CodecInOut io3;
+  Roundtrip(&io, cparams, dparams, &pool, &io3);
+
+  EXPECT_GE(1.5, ButteraugliDistance(io2, io3, cparams.ba_params,
+                                     /*distmap=*/nullptr, &pool));
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionGray)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png.im_q85_gray.jpg");
+  // JPEG size is 167'025 bytes.
+  EXPECT_LE(RoundtripJpeg(orig, &pool), 140000);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression420)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png.im_q85_420.jpg");
+  // JPEG size is 226'018 bytes.
+  EXPECT_LE(RoundtripJpeg(orig, &pool), 181050);
+}
+
+TEST(JxlTest,
+     JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression_luma_subsample)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig = ReadTestData(
+      "imagecompression.info/flower_foveon.png.im_q85_luma_subsample.jpg");
+  // JPEG size is 216'069 bytes.
+  EXPECT_LE(RoundtripJpeg(orig, &pool), 181000);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression444_12)) {
+  // 444 JPEG that has an interesting sampling-factor (1x2, 1x2, 1x2).
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig = ReadTestData(
+      "imagecompression.info/flower_foveon.png.im_q85_444_1x2.jpg");
+  // JPEG size is 329'942 bytes.
+  EXPECT_LE(RoundtripJpeg(orig, &pool), 256000);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression422)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png.im_q85_422.jpg");
+  // JPEG size is 265'590 bytes.
+  EXPECT_LE(RoundtripJpeg(orig, &pool), 209000);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression440)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png.im_q85_440.jpg");
+  // JPEG size is 262'249 bytes.
+  EXPECT_LE(RoundtripJpeg(orig, &pool), 209000);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression_asymmetric)) {
+  // 2x vertical downsample of one chroma channel, 2x horizontal downsample of
+  // the other.
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig = ReadTestData(
+      "imagecompression.info/flower_foveon.png.im_q85_asymmetric.jpg");
+  // JPEG size is 262'249 bytes.
+  EXPECT_LE(RoundtripJpeg(orig, &pool), 209000);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression420Progr)) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig = ReadTestData(
+      "imagecompression.info/flower_foveon.png.im_q85_420_progr.jpg");
+  EXPECT_LE(RoundtripJpeg(orig, &pool), 181000);
+}
+
+#endif  // JPEGXL_ENABLE_JPEG
+
+TEST(JxlTest, RoundtripProgressive) {
+  ThreadPoolInternal pool(4);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+  io.ShrinkTo(600, 1024);
+
+  CompressParams cparams;
+  DecompressParams dparams;
+
+  cparams.butteraugli_distance = 1.0f;
+  cparams.progressive_dc = true;
+  cparams.responsive = true;
+  cparams.progressive_mode = true;
+  CodecInOut io2;
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, &pool, &io2), 40000);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, &pool),
+            4.0f);
+}
+
+TEST(JxlTest, RoundtripAnimationPatches) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = ReadTestData("jxl/animation_patches.gif");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  ASSERT_EQ(2, io.frames.size());
+
+  CompressParams cparams;
+  cparams.patches = Override::kOn;
+  DecompressParams dparams;
+  CodecInOut io2;
+  // 40k with no patches, 27k with patch frames encoded multiple times.
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 24000);
+
+  EXPECT_EQ(io2.frames.size(), io.frames.size());
+  // >10 with broken patches
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            2.0);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/lehmer_code.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/lehmer_code.h
new file mode 100644
index 0000000000..dd1d21c6f7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/lehmer_code.h
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LEHMER_CODE_H_
+#define LIB_JXL_LEHMER_CODE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Permutation <=> factorial base representation (Lehmer code).
+
+using LehmerT = uint32_t;
+
+template <typename T>
+constexpr T ValueOfLowest1Bit(T t) {
+  return t & -t;
+}
+
+// Computes the Lehmer (factorial basis) code of permutation, an array of n
+// unique indices in [0..n), and stores it in code[0..len). N*logN time.
+// temp must have n + 1 elements but need not be initialized.
+template <typename PermutationT>
+void ComputeLehmerCode(const PermutationT* JXL_RESTRICT permutation,
+                       uint32_t* JXL_RESTRICT temp, const size_t n,
+                       LehmerT* JXL_RESTRICT code) {
+  for (size_t idx = 0; idx < n + 1; ++idx) temp[idx] = 0;
+
+  for (size_t idx = 0; idx < n; ++idx) {
+    const PermutationT s = permutation[idx];
+
+    // Compute sum in Fenwick tree
+    uint32_t penalty = 0;
+    uint32_t i = s + 1;
+    while (i != 0) {
+      penalty += temp[i];
+      i &= i - 1;  // clear lowest bit
+    }
+    JXL_DASSERT(s >= penalty);
+    code[idx] = s - penalty;
+    i = s + 1;
+    // Add operation in Fenwick tree
+    while (i < n + 1) {
+      temp[i] += 1;
+      i += ValueOfLowest1Bit(i);
+    }
+  }
+}
+
+// Decodes the Lehmer code in code[0..n) into permutation[0..n).
+// temp must have 1 << CeilLog2(n) elements but need not be initialized.
+template <typename PermutationT>
+void DecodeLehmerCode(const LehmerT* JXL_RESTRICT code,
+                      uint32_t* JXL_RESTRICT temp, size_t n,
+                      PermutationT* JXL_RESTRICT permutation) {
+  JXL_DASSERT(n != 0);
+  const size_t log2n = CeilLog2Nonzero(n);
+  const size_t padded_n = 1ull << log2n;
+
+  for (size_t i = 0; i < padded_n; i++) {
+    const int32_t i1 = static_cast<int32_t>(i + 1);
+    temp[i] = static_cast<uint32_t>(ValueOfLowest1Bit(i1));
+  }
+
+  for (size_t i = 0; i < n; i++) {
+    JXL_DASSERT(code[i] + i < n);
+    uint32_t rank = code[i] + 1;
+
+    // Extract i-th unused element via implicit order-statistics tree.
+    size_t bit = padded_n;
+    size_t next = 0;
+    for (size_t i = 0; i <= log2n; i++) {
+      const size_t cand = next + bit;
+      JXL_DASSERT(cand >= 1);
+      bit >>= 1;
+      if (temp[cand - 1] < rank) {
+        next = cand;
+        rank -= temp[cand - 1];
+      }
+    }
+
+    permutation[i] = next;
+
+    // Mark as used
+    next += 1;
+    while (next <= padded_n) {
+      temp[next - 1] -= 1;
+      next += ValueOfLowest1Bit(next);
+    }
+  }
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_LEHMER_CODE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/lehmer_code_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/lehmer_code_test.cc
new file mode 100644
index 0000000000..1ce5618ea1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/lehmer_code_test.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/lehmer_code.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <numeric>
+#include <random>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+
+namespace jxl {
+namespace {
+
+template <typename PermutationT>
+struct WorkingSet {
+  explicit WorkingSet(size_t max_n)
+      : padded_n(1ull << CeilLog2Nonzero(max_n + 1)),
+        permutation(max_n),
+        temp(padded_n),
+        lehmer(max_n),
+        decoded(max_n) {}
+
+  size_t padded_n;
+  std::vector<PermutationT> permutation;
+  std::vector<uint32_t> temp;
+  std::vector<LehmerT> lehmer;
+  std::vector<PermutationT> decoded;
+};
+
+template <typename PermutationT>
+void Roundtrip(size_t n, WorkingSet<PermutationT>* ws) {
+  JXL_ASSERT(n != 0);
+  const size_t padded_n = 1ull << CeilLog2Nonzero(n);
+
+  std::mt19937 rng(n * 65537 + 13);
+
+  // Ensure indices fit into PermutationT
+  EXPECT_LE(n, 1ULL << (sizeof(PermutationT) * 8));
+
+  std::iota(ws->permutation.begin(), ws->permutation.begin() + n, 0);
+
+  // For various random permutations:
+  for (size_t rep = 0; rep < 100; ++rep) {
+    std::shuffle(ws->permutation.begin(), ws->permutation.begin() + n, rng);
+
+    // Must decode to the same permutation
+    ComputeLehmerCode(ws->permutation.data(), ws->temp.data(), n,
+                      ws->lehmer.data());
+    memset(ws->temp.data(), 0, padded_n * 4);
+    DecodeLehmerCode(ws->lehmer.data(), ws->temp.data(), n, ws->decoded.data());
+
+    for (size_t i = 0; i < n; ++i) {
+      EXPECT_EQ(ws->permutation[i], ws->decoded[i]);
+    }
+  }
+}
+
+// Preallocates arrays and tests n = [begin, end).
+template <typename PermutationT>
+void RoundtripSizeRange(ThreadPool* pool, uint32_t begin, uint32_t end) {
+  ASSERT_NE(0, begin);  // n = 0 not allowed.
+  std::vector<WorkingSet<PermutationT>> working_sets;
+
+  RunOnPool(
+      pool, begin, end,
+      [&working_sets, end](size_t num_threads) {
+        for (size_t i = 0; i < num_threads; i++) {
+          working_sets.emplace_back(end - 1);
+        }
+        return true;
+      },
+      [&working_sets](int n, int thread) {
+        Roundtrip(n, &working_sets[thread]);
+      },
+      "lehmer test");
+}
+
+TEST(LehmerCodeTest, TestRoundtrips) {
+  ThreadPoolInternal pool(8);
+
+  RoundtripSizeRange<uint16_t>(&pool, 1, 1026);
+
+  // Ensures PermutationT can fit > 16 bit values.
+  RoundtripSizeRange<uint32_t>(&pool, 65536, 65540);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/libjxl.pc.in b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/libjxl.pc.in
new file mode 100644
index 0000000000..5dca2ac168
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/libjxl.pc.in
@@ -0,0 +1,12 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: libjxl
+Description: Loads and saves JPEG XL files
+Version: @JPEGXL_LIBRARY_VERSION@
+Requires.private: @JPEGXL_LIBRARY_REQUIRES@
+Libs: -L${libdir} -ljxl
+Libs.private: -lm
+Cflags: -I${includedir}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc
new file mode 100644
index 0000000000..61d66dd8db
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.cc
@@ -0,0 +1,235 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/linalg.h"
+
+#include <stdlib.h>
+
+#include <cmath>
+#include <deque>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+void AssertSymmetric(const ImageD& A) {
+#if JXL_ENABLE_ASSERT
+  JXL_ASSERT(A.xsize() == A.ysize());
+  for (size_t i = 0; i < A.xsize(); ++i) {
+    for (size_t j = i + 1; j < A.xsize(); ++j) {
+      JXL_ASSERT(std::abs(A.Row(i)[j] - A.Row(j)[i]) < 1e-15);
+    }
+  }
+#endif
+}
+
+void Diagonalize2x2(const double a0, const double a1, const double b, double* c,
+                    double* s) {
+  if (std::abs(b) < 1e-15) {
+    *c = 1.0;
+    *s = 0.0;
+    return;
+  }
+  double phi = std::atan2(2 * b, a1 - a0);
+  double theta = b > 0.0 ? 0.5 * phi : 0.5 * phi + Pi(1.0);
+  *c = std::cos(theta);
+  *s = std::sin(theta);
+}
+
+void GivensRotation(const double x, const double y, double* c, double* s) {
+  if (y == 0.0) {
+    *c = x < 0.0 ? -1.0 : 1.0;
+    *s = 0.0;
+  } else {
+    const double h = hypot(x, y);
+    const double d = 1.0 / h;
+    *c = x * d;
+    *s = -y * d;
+  }
+}
+
+void RotateMatrixCols(ImageD* const JXL_RESTRICT U, int i, int j, double c,
+                      double s) {
+  JXL_ASSERT(U->xsize() == U->ysize());
+  const size_t N = U->xsize();
+  double* const JXL_RESTRICT u_i = U->Row(i);
+  double* const JXL_RESTRICT u_j = U->Row(j);
+  std::vector<double> rot_i, rot_j;
+  rot_i.reserve(N);
+  rot_j.reserve(N);
+  for (size_t k = 0; k < N; ++k) {
+    rot_i.push_back(u_i[k] * c - u_j[k] * s);
+    rot_j.push_back(u_i[k] * s + u_j[k] * c);
+  }
+  for (size_t k = 0; k < N; ++k) {
+    u_i[k] = rot_i[k];
+    u_j[k] = rot_j[k];
+  }
+}
+void HouseholderReflector(const size_t N, const double* x, double* u) {
+  const double sigma = x[0] <= 0.0 ? 1.0 : -1.0;
+  u[0] = x[0] - sigma * std::sqrt(DotProduct(N, x, x));
+  for (size_t k = 1; k < N; ++k) {
+    u[k] = x[k];
+  }
+  double u_norm = 1.0 / std::sqrt(DotProduct(N, u, u));
+  for (size_t k = 0; k < N; ++k) {
+    u[k] *= u_norm;
+  }
+}
+
+void ConvertToTridiagonal(const ImageD& A, ImageD* const JXL_RESTRICT T,
+                          ImageD* const JXL_RESTRICT U) {
+  AssertSymmetric(A);
+  const size_t N = A.xsize();
+  *U = Identity<double>(A.xsize());
+  *T = CopyImage(A);
+  std::vector<ImageD> u_stack;
+  for (size_t k = 0; k + 2 < N; ++k) {
+    if (DotProduct(N - k - 2, &T->Row(k)[k + 2], &T->Row(k)[k + 2]) > 1e-15) {
+      ImageD u(N, 1);
+      ZeroFillImage(&u);
+      HouseholderReflector(N - k - 1, &T->Row(k)[k + 1], &u.Row(0)[k + 1]);
+      ImageD v = MatMul(*T, u);
+      double scale = DotProduct(u, v);
+      v = LinComb(2.0, v, -2.0 * scale, u);
+      SubtractFrom(MatMul(u, Transpose(v)), T);
+      SubtractFrom(MatMul(v, Transpose(u)), T);
+      u_stack.emplace_back(std::move(u));
+    }
+  }
+  while (!u_stack.empty()) {
+    const ImageD& u = u_stack.back();
+    ImageD v = MatMul(Transpose(*U), u);
+    SubtractFrom(ScaleImage(2.0, MatMul(u, Transpose(v))), U);
+    u_stack.pop_back();
+  }
+}
+
+double WilkinsonShift(const double a0, const double a1, const double b) {
+  const double d = 0.5 * (a0 - a1);
+  if (d == 0.0) {
+    return a1 - std::abs(b);
+  }
+  const double sign_d = d > 0.0 ? 1.0 : -1.0;
+  return a1 - b * b / (d + sign_d * hypotf(d, b));
+}
+
+void ImplicitQRStep(ImageD* const JXL_RESTRICT U, double* const JXL_RESTRICT a,
+                    double* const JXL_RESTRICT b, int m0, int m1) {
+  JXL_ASSERT(m1 - m0 > 2);
+  double x = a[m0] - WilkinsonShift(a[m1 - 2], a[m1 - 1], b[m1 - 1]);
+  double y = b[m0 + 1];
+  for (int k = m0; k < m1 - 1; ++k) {
+    double c, s;
+    GivensRotation(x, y, &c, &s);
+    const double w = c * x - s * y;
+    const double d = a[k] - a[k + 1];
+    const double z = (2 * c * b[k + 1] + d * s) * s;
+    a[k] -= z;
+    a[k + 1] += z;
+    b[k + 1] = d * c * s + (c * c - s * s) * b[k + 1];
+    x = b[k + 1];
+    if (k > m0) {
+      b[k] = w;
+    }
+    if (k < m1 - 2) {
+      y = -s * b[k + 2];
+      b[k + 2] *= c;
+    }
+    RotateMatrixCols(U, k, k + 1, c, s);
+  }
+}
+
+void ScanInterval(const double* const JXL_RESTRICT a,
+                  const double* const JXL_RESTRICT b, int istart,
+                  const int iend, const double eps,
+                  std::deque<std::pair<int, int> >* intervals) {
+  for (int k = istart; k < iend; ++k) {
+    if ((k + 1 == iend) ||
+        std::abs(b[k + 1]) < eps * (std::abs(a[k]) + std::abs(a[k + 1]))) {
+      if (k > istart) {
+        intervals->push_back(std::make_pair(istart, k + 1));
+      }
+      istart = k + 1;
+    }
+  }
+}
+
+void ConvertToDiagonal(const ImageD& A, ImageD* const JXL_RESTRICT diag,
+                       ImageD* const JXL_RESTRICT U) {
+  AssertSymmetric(A);
+  const size_t N = A.xsize();
+  ImageD T;
+  ConvertToTridiagonal(A, &T, U);
+  // From now on, the algorithm keeps the transformed matrix tri-diagonal,
+  // so we only need to keep track of the diagonal and the off-diagonal entries.
+  std::vector<double> a(N);
+  std::vector<double> b(N);
+  for (size_t k = 0; k < N; ++k) {
+    a[k] = T.Row(k)[k];
+    if (k > 0) b[k] = T.Row(k)[k - 1];
+  }
+  // Run the symmetric tri-diagonal QR algorithm with implicit Wilkinson shift.
+  const double kEpsilon = 1e-14;
+  std::deque<std::pair<int, int> > intervals;
+  ScanInterval(&a[0], &b[0], 0, N, kEpsilon, &intervals);
+  while (!intervals.empty()) {
+    const int istart = intervals[0].first;
+    const int iend = intervals[0].second;
+    intervals.pop_front();
+    if (iend == istart + 2) {
+      double& a0 = a[istart];
+      double& a1 = a[istart + 1];
+      double& b1 = b[istart + 1];
+      double c, s;
+      Diagonalize2x2(a0, a1, b1, &c, &s);
+      const double d = a0 - a1;
+      const double z = (2 * c * b1 + d * s) * s;
+      a0 -= z;
+      a1 += z;
+      b1 = 0.0;
+      RotateMatrixCols(U, istart, istart + 1, c, s);
+    } else {
+      ImplicitQRStep(U, &a[0], &b[0], istart, iend);
+      ScanInterval(&a[0], &b[0], istart, iend, kEpsilon, &intervals);
+    }
+  }
+  *diag = ImageD(N, 1);
+  double* const JXL_RESTRICT diag_row = diag->Row(0);
+  for (size_t k = 0; k < N; ++k) {
+    diag_row[k] = a[k];
+  }
+}
+
+void ComputeQRFactorization(const ImageD& A, ImageD* const JXL_RESTRICT Q,
+                            ImageD* const JXL_RESTRICT R) {
+  JXL_ASSERT(A.xsize() == A.ysize());
+  const size_t N = A.xsize();
+  *Q = Identity<double>(N);
+  *R = CopyImage(A);
+  std::vector<ImageD> u_stack;
+  for (size_t k = 0; k + 1 < N; ++k) {
+    if (DotProduct(N - k - 1, &R->Row(k)[k + 1], &R->Row(k)[k + 1]) > 1e-15) {
+      ImageD u(N, 1);
+      FillImage(0.0, &u);
+      HouseholderReflector(N - k, &R->Row(k)[k], &u.Row(0)[k]);
+      ImageD v = MatMul(Transpose(u), *R);
+      SubtractFrom(ScaleImage(2.0, MatMul(u, v)), R);
+      u_stack.emplace_back(std::move(u));
+    }
+  }
+  while (!u_stack.empty()) {
+    const ImageD& u = u_stack.back();
+    ImageD v = MatMul(Transpose(u), *Q);
+    SubtractFrom(ScaleImage(2.0, MatMul(u, v)), Q);
+    u_stack.pop_back();
+  }
+}
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.h
new file mode 100644
index 0000000000..7fbd943d90
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/linalg.h
@@ -0,0 +1,294 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LINALG_H_
+#define LIB_JXL_LINALG_H_
+
+// Linear algebra.
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+using ImageD = Plane<double>;
+
+template <typename T>
+inline T DotProduct(const size_t N, const T* const JXL_RESTRICT a,
+                    const T* const JXL_RESTRICT b) {
+  T sum = 0.0;
+  for (size_t k = 0; k < N; ++k) {
+    sum += a[k] * b[k];
+  }
+  return sum;
+}
+
+template <typename T>
+inline T L2NormSquared(const size_t N, const T* const JXL_RESTRICT a) {
+  return DotProduct(N, a, a);
+}
+
+template <typename T>
+inline T L1Norm(const size_t N, const T* const JXL_RESTRICT a) {
+  T sum = 0;
+  for (size_t k = 0; k < N; ++k) {
+    sum += a[k] >= 0 ? a[k] : -a[k];
+  }
+  return sum;
+}
+
+inline double DotProduct(const ImageD& a, const ImageD& b) {
+  JXL_ASSERT(a.ysize() == 1);
+  JXL_ASSERT(b.ysize() == 1);
+  JXL_ASSERT(a.xsize() == b.xsize());
+  const double* const JXL_RESTRICT row_a = a.Row(0);
+  const double* const JXL_RESTRICT row_b = b.Row(0);
+  return DotProduct(a.xsize(), row_a, row_b);
+}
+
+inline ImageD Transpose(const ImageD& A) {
+  ImageD out(A.ysize(), A.xsize());
+  for (size_t x = 0; x < A.xsize(); ++x) {
+    double* const JXL_RESTRICT row_out = out.Row(x);
+    for (size_t y = 0; y < A.ysize(); ++y) {
+      row_out[y] = A.Row(y)[x];
+    }
+  }
+  return out;
+}
+
+template <typename Tout, typename Tin1, typename Tin2>
+Plane<Tout> MatMul(const Plane<Tin1>& A, const Plane<Tin2>& B) {
+  JXL_ASSERT(A.ysize() == B.xsize());
+  Plane<Tout> out(A.xsize(), B.ysize());
+  for (size_t y = 0; y < B.ysize(); ++y) {
+    const Tin2* const JXL_RESTRICT row_b = B.Row(y);
+    Tout* const JXL_RESTRICT row_out = out.Row(y);
+    for (size_t x = 0; x < A.xsize(); ++x) {
+      row_out[x] = 0.0;
+      for (size_t k = 0; k < B.xsize(); ++k) {
+        row_out[x] += A.Row(k)[x] * row_b[k];
+      }
+    }
+  }
+  return out;
+}
+
+template <typename T1, typename T2>
+ImageD MatMul(const Plane<T1>& A, const Plane<T2>& B) {
+  return MatMul<double, T1, T2>(A, B);
+}
+
+template <typename T1, typename T2>
+ImageI MatMulI(const Plane<T1>& A, const Plane<T2>& B) {
+  return MatMul<int, T1, T2>(A, B);
+}
+
+// Computes A = B * C, with sizes rows*cols: A=ha*wa, B=wa*wb, C=ha*wb
+template <typename T>
+void MatMul(const T* a, const T* b, int ha, int wa, int wb, T* c) {
+  std::vector<T> temp(wa);  // Make better use of cache lines
+  for (int x = 0; x < wb; x++) {
+    for (int z = 0; z < wa; z++) {
+      temp[z] = b[z * wb + x];
+    }
+    for (int y = 0; y < ha; y++) {
+      double e = 0;
+      for (int z = 0; z < wa; z++) {
+        e += a[y * wa + z] * temp[z];
+      }
+      c[y * wb + x] = e;
+    }
+  }
+}
+
+// Computes C = A + factor * B
+template <typename T, typename F>
+void MatAdd(const T* a, const T* b, F factor, int h, int w, T* c) {
+  for (int i = 0; i < w * h; i++) {
+    c[i] = a[i] + b[i] * factor;
+  }
+}
+
+template <typename T>
+inline Plane<T> Identity(const size_t N) {
+  Plane<T> out(N, N);
+  for (size_t i = 0; i < N; ++i) {
+    T* JXL_RESTRICT row = out.Row(i);
+    std::fill(row, row + N, 0);
+    row[i] = static_cast<T>(1.0);
+  }
+  return out;
+}
+
+inline ImageD Diagonal(const ImageD& d) {
+  JXL_ASSERT(d.ysize() == 1);
+  ImageD out(d.xsize(), d.xsize());
+  const double* JXL_RESTRICT row_diag = d.Row(0);
+  for (size_t k = 0; k < d.xsize(); ++k) {
+    double* JXL_RESTRICT row_out = out.Row(k);
+    std::fill(row_out, row_out + d.xsize(), 0.0);
+    row_out[k] = row_diag[k];
+  }
+  return out;
+}
+
+// Computes c, s such that c^2 + s^2 = 1 and
+//   [c -s] [x] = [ * ]
+//   [s  c] [y]   [ 0 ]
+void GivensRotation(double x, double y, double* c, double* s);
+
+// U = U * Givens(i, j, c, s)
+void RotateMatrixCols(ImageD* JXL_RESTRICT U, int i, int j, double c, double s);
+
+// A is symmetric, U is orthogonal, T is tri-diagonal and
+// A = U * T * Transpose(U).
+void ConvertToTridiagonal(const ImageD& A, ImageD* JXL_RESTRICT T,
+                          ImageD* JXL_RESTRICT U);
+
+// A is symmetric, U is orthogonal, and A = U * Diagonal(diag) * Transpose(U).
+void ConvertToDiagonal(const ImageD& A, ImageD* JXL_RESTRICT diag,
+                       ImageD* JXL_RESTRICT U);
+
+// A is square matrix, Q is orthogonal, R is upper triangular and A = Q * R;
+void ComputeQRFactorization(const ImageD& A, ImageD* JXL_RESTRICT Q,
+                            ImageD* JXL_RESTRICT R);
+
+// Inverts a 3x3 matrix in place
+template <typename T>
+Status Inv3x3Matrix(T* matrix) {
+  // Intermediate computation is done in double precision.
+  double temp[9];
+  temp[0] = static_cast<double>(matrix[4]) * matrix[8] -
+            static_cast<double>(matrix[5]) * matrix[7];
+  temp[1] = static_cast<double>(matrix[2]) * matrix[7] -
+            static_cast<double>(matrix[1]) * matrix[8];
+  temp[2] = static_cast<double>(matrix[1]) * matrix[5] -
+            static_cast<double>(matrix[2]) * matrix[4];
+  temp[3] = static_cast<double>(matrix[5]) * matrix[6] -
+            static_cast<double>(matrix[3]) * matrix[8];
+  temp[4] = static_cast<double>(matrix[0]) * matrix[8] -
+            static_cast<double>(matrix[2]) * matrix[6];
+  temp[5] = static_cast<double>(matrix[2]) * matrix[3] -
+            static_cast<double>(matrix[0]) * matrix[5];
+  temp[6] = static_cast<double>(matrix[3]) * matrix[7] -
+            static_cast<double>(matrix[4]) * matrix[6];
+  temp[7] = static_cast<double>(matrix[1]) * matrix[6] -
+            static_cast<double>(matrix[0]) * matrix[7];
+  temp[8] = static_cast<double>(matrix[0]) * matrix[4] -
+            static_cast<double>(matrix[1]) * matrix[3];
+  double det = matrix[0] * temp[0] + matrix[1] * temp[3] + matrix[2] * temp[6];
+  if (std::abs(det) < 1e-10) {
+    return JXL_FAILURE("Matrix determinant is too close to 0");
+  }
+  double idet = 1.0 / det;
+  for (int i = 0; i < 9; i++) {
+    matrix[i] = temp[i] * idet;
+  }
+  return true;
+}
+
+// Solves system of linear equations A * X = B using the conjugate gradient
+// method. Matrix a must be a n*n, symmetric and positive definite.
+// Vectors b and x must have n elements
+template <typename T>
+void ConjugateGradient(const T* a, int n, const T* b, T* x) {
+  std::vector<T> r(n);
+  MatMul(a, x, n, n, 1, r.data());
+  MatAdd(b, r.data(), -1, n, 1, r.data());
+  std::vector<T> p = r;
+  T rr;
+  MatMul(r.data(), r.data(), 1, n, 1, &rr);  // inner product
+
+  if (rr == 0) return;  // The initial values were already optimal
+
+  for (int i = 0; i < n; i++) {
+    std::vector<T> ap(n);
+    MatMul(a, p.data(), n, n, 1, ap.data());
+    T alpha;
+    MatMul(r.data(), ap.data(), 1, n, 1, &alpha);
+    // Normally alpha couldn't be zero here but if numerical issues caused it,
+    // return assuming the solution is close.
+    if (alpha == 0) return;
+    alpha = rr / alpha;
+    MatAdd(x, p.data(), alpha, n, 1, x);
+    MatAdd(r.data(), ap.data(), -alpha, n, 1, r.data());
+
+    T rr2;
+    MatMul(r.data(), r.data(), 1, n, 1, &rr2);  // inner product
+    if (rr2 < 1e-20) break;
+
+    T beta = rr2 / rr;
+    MatAdd(r.data(), p.data(), beta, 1, n, p.data());
+    rr = rr2;
+  }
+}
+
+// Computes optimal coefficients r to approximate points p with linear
+// combination of functions f. The matrix f has h rows and w columns, r has h
+// values, p has w values. h is the amount of functions, w the amount of points.
+// Uses the finite element method and minimizes mean square error.
+template <typename T>
+void FEM(const T* f, int h, int w, const T* p, T* r) {
+  // Compute "Gramian" matrix G = F * F^T
+  // Speed up multiplication by using non-zero intervals in sparse F.
+  std::vector<int> start(h);
+  std::vector<int> end(h);
+  for (int y = 0; y < h; y++) {
+    start[y] = end[y] = 0;
+    for (int x = 0; x < w; x++) {
+      if (f[y * w + x] != 0) {
+        start[y] = x;
+        break;
+      }
+    }
+    for (int x = w - 1; x >= 0; x--) {
+      if (f[y * w + x] != 0) {
+        end[y] = x + 1;
+        break;
+      }
+    }
+  }
+
+  std::vector<T> g(h * h);
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x <= y; x++) {
+      T v = 0;
+      // Intersection of the two sparse intervals.
+      int s = std::max(start[x], start[y]);
+      int e = std::min(end[x], end[y]);
+      for (int z = s; z < e; z++) {
+        v += f[x * w + z] * f[y * w + z];
+      }
+      // Symmetric, so two values output at once
+      g[y * h + x] = v;
+      g[x * h + y] = v;
+    }
+  }
+
+  // B vector: sum of each column of F multiplied by corresponding p
+  std::vector<T> b(h, 0);
+  for (int y = 0; y < h; y++) {
+    T v = 0;
+    for (int x = 0; x < w; x++) {
+      v += f[y * w + x] * p[x];
+    }
+    b[y] = v;
+  }
+
+  ConjugateGradient(g.data(), h, b.data(), r);
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_LINALG_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/linalg_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/linalg_test.cc
new file mode 100644
index 0000000000..0842f61dad
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/linalg_test.cc
@@ -0,0 +1,149 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/linalg.h"
+
+#include <random>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/image_test_utils.h"
+
+namespace jxl {
+namespace {
+
+template <typename T, typename Random>
+Plane<T> RandomMatrix(const size_t xsize, const size_t ysize, Random& rng,
+                      const T vmin, const T vmax) {
+  Plane<T> A(xsize, ysize);
+  GeneratorRandom<T, Random> gen(&rng, vmin, vmax);
+  GenerateImage(gen, &A);
+  return A;
+}
+
+template <typename T, typename Random>
+Plane<T> RandomSymmetricMatrix(const size_t N, Random& rng, const T vmin,
+                               const T vmax) {
+  Plane<T> A = RandomMatrix<T>(N, N, rng, vmin, vmax);
+  for (size_t i = 0; i < N; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      A.Row(j)[i] = A.Row(i)[j];
+    }
+  }
+  return A;
+}
+void VerifyMatrixEqual(const ImageD& A, const ImageD& B, const double eps) {
+  ASSERT_EQ(A.xsize(), B.xsize());
+  ASSERT_EQ(A.ysize(), B.ysize());
+  for (size_t y = 0; y < A.ysize(); ++y) {
+    for (size_t x = 0; x < A.xsize(); ++x) {
+      ASSERT_NEAR(A.Row(y)[x], B.Row(y)[x], eps);
+    }
+  }
+}
+
+void VerifyOrthogonal(const ImageD& A, const double eps) {
+  VerifyMatrixEqual(Identity<double>(A.xsize()), MatMul(Transpose(A), A), eps);
+}
+
+void VerifyTridiagonal(const ImageD& T, const double eps) {
+  ASSERT_EQ(T.xsize(), T.ysize());
+  for (size_t i = 0; i < T.xsize(); ++i) {
+    for (size_t j = i + 2; j < T.xsize(); ++j) {
+      ASSERT_NEAR(T.Row(i)[j], 0.0, eps);
+      ASSERT_NEAR(T.Row(j)[i], 0.0, eps);
+    }
+  }
+}
+
+void VerifyUpperTriangular(const ImageD& R, const double eps) {
+  ASSERT_EQ(R.xsize(), R.ysize());
+  for (size_t i = 0; i < R.xsize(); ++i) {
+    for (size_t j = i + 1; j < R.xsize(); ++j) {
+      ASSERT_NEAR(R.Row(i)[j], 0.0, eps);
+    }
+  }
+}
+
+TEST(LinAlgTest, ConvertToTridiagonal) {
+  {
+    ImageD I = Identity<double>(5);
+    ImageD T, U;
+    ConvertToTridiagonal(I, &T, &U);
+    VerifyMatrixEqual(I, T, 1e-15);
+    VerifyMatrixEqual(I, U, 1e-15);
+  }
+  {
+    ImageD A = Identity<double>(5);
+    A.Row(0)[1] = A.Row(1)[0] = 2.0;
+    A.Row(0)[4] = A.Row(4)[0] = 3.0;
+    A.Row(2)[3] = A.Row(3)[2] = 2.0;
+    A.Row(3)[4] = A.Row(4)[3] = 2.0;
+    ImageD U, d;
+    ConvertToDiagonal(A, &d, &U);
+    VerifyOrthogonal(U, 1e-12);
+    VerifyMatrixEqual(A, MatMul(U, MatMul(Diagonal(d), Transpose(U))), 1e-12);
+  }
+  std::mt19937_64 rng;
+  for (int N = 2; N < 100; ++N) {
+    ImageD A = RandomSymmetricMatrix(N, rng, -1.0, 1.0);
+    ImageD T, U;
+    ConvertToTridiagonal(A, &T, &U);
+    VerifyOrthogonal(U, 1e-12);
+    VerifyTridiagonal(T, 1e-12);
+    VerifyMatrixEqual(A, MatMul(U, MatMul(T, Transpose(U))), 1e-12);
+  }
+}
+
+TEST(LinAlgTest, ConvertToDiagonal) {
+  {
+    ImageD I = Identity<double>(5);
+    ImageD U, d;
+    ConvertToDiagonal(I, &d, &U);
+    VerifyMatrixEqual(I, U, 1e-15);
+    for (int k = 0; k < 5; ++k) {
+      ASSERT_NEAR(d.Row(0)[k], 1.0, 1e-15);
+    }
+  }
+  {
+    ImageD A = Identity<double>(5);
+    A.Row(0)[1] = A.Row(1)[0] = 2.0;
+    A.Row(2)[3] = A.Row(3)[2] = 2.0;
+    A.Row(3)[4] = A.Row(4)[3] = 2.0;
+    ImageD U, d;
+    ConvertToDiagonal(A, &d, &U);
+    VerifyOrthogonal(U, 1e-12);
+    VerifyMatrixEqual(A, MatMul(U, MatMul(Diagonal(d), Transpose(U))), 1e-12);
+  }
+  std::mt19937_64 rng;
+  for (int N = 2; N < 100; ++N) {
+    ImageD A = RandomSymmetricMatrix(N, rng, -1.0, 1.0);
+    ImageD U, d;
+    ConvertToDiagonal(A, &d, &U);
+    VerifyOrthogonal(U, 1e-12);
+    VerifyMatrixEqual(A, MatMul(U, MatMul(Diagonal(d), Transpose(U))), 1e-12);
+  }
+}
+
+TEST(LinAlgTest, ComputeQRFactorization) {
+  {
+    ImageD I = Identity<double>(5);
+    ImageD Q, R;
+    ComputeQRFactorization(I, &Q, &R);
+    VerifyMatrixEqual(I, Q, 1e-15);
+    VerifyMatrixEqual(I, R, 1e-15);
+  }
+  std::mt19937_64 rng;
+  for (int N = 2; N < 100; ++N) {
+    ImageD A = RandomMatrix(N, N, rng, -1.0, 1.0);
+    ImageD Q, R;
+    ComputeQRFactorization(A, &Q, &R);
+    VerifyOrthogonal(Q, 1e-12);
+    VerifyUpperTriangular(R, 1e-12);
+    VerifyMatrixEqual(A, MatMul(Q, R), 1e-12);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc
new file mode 100644
index 0000000000..afa36a44e6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.cc
@@ -0,0 +1,87 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/loop_filter.h"
+
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+LoopFilter::LoopFilter() { Bundle::Init(this); }
+Status LoopFilter::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  // Must come before AllDefault.
+
+  if (visitor->AllDefault(*this, &all_default)) {
+    // Overwrite all serialized fields, but not any nonserialized_*.
+    visitor->SetDefault(this);
+    return true;
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &gab));
+  if (visitor->Conditional(gab)) {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &gab_custom));
+    if (visitor->Conditional(gab_custom)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.104699568f, &gab_x_weight1));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.055680538f, &gab_x_weight2));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.104699568f, &gab_y_weight1));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.055680538f, &gab_y_weight2));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.104699568f, &gab_b_weight1));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(1.1 * 0.055680538f, &gab_b_weight2));
+    }
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 2, &epf_iters));
+  if (visitor->Conditional(epf_iters > 0)) {
+    if (visitor->Conditional(!nonserialized_is_modular)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_sharp_custom));
+      if (visitor->Conditional(epf_sharp_custom)) {
+        for (size_t i = 0; i < kEpfSharpEntries; ++i) {
+          JXL_QUIET_RETURN_IF_ERROR(visitor->F16(
+              float(i) / float(kEpfSharpEntries - 1), &epf_sharp_lut[i]));
+        }
+      }
+    }
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_weight_custom));
+    if (visitor->Conditional(epf_weight_custom)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(40.0f, &epf_channel_scale[0]));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(5.0f, &epf_channel_scale[1]));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(3.5f, &epf_channel_scale[2]));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.45f, &epf_pass1_zeroflush));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.6f, &epf_pass2_zeroflush));
+    }
+
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_sigma_custom));
+    if (visitor->Conditional(epf_sigma_custom)) {
+      if (visitor->Conditional(!nonserialized_is_modular)) {
+        JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.46f, &epf_quant_mul));
+      }
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.9f, &epf_pass0_sigma_scale));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(6.5f, &epf_pass2_sigma_scale));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->F16(0.6666666666666666f, &epf_border_sad_mul));
+    }
+    if (visitor->Conditional(nonserialized_is_modular)) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.0f, &epf_sigma_for_modular));
+      if (epf_sigma_for_modular < 1e-8) {
+        return JXL_FAILURE("EPF: sigma for modular is too small");
+      }
+    }
+  }
+
+  JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+  // Extensions: in chronological order of being added to the format.
+  return visitor->EndExtensions();
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.h
new file mode 100644
index 0000000000..ffa68b5120
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/loop_filter.h
@@ -0,0 +1,78 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LOOP_FILTER_H_
+#define LIB_JXL_LOOP_FILTER_H_
+
+// Parameters for loop filter(s), stored in each frame.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+struct LoopFilter : public Fields {
+  LoopFilter();
+  const char* Name() const override { return "LoopFilter"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  size_t Padding() const {
+    static const size_t padding_per_epf_iter[4] = {0, 2, 3, 6};
+    return padding_per_epf_iter[epf_iters] + (gab ? 1 : 0);
+  }
+
+  mutable bool all_default;
+
+  // --- Gaborish convolution
+  bool gab;
+
+  bool gab_custom;
+  float gab_x_weight1;
+  float gab_x_weight2;
+  float gab_y_weight1;
+  float gab_y_weight2;
+  float gab_b_weight1;
+  float gab_b_weight2;
+
+  // --- Edge-preserving filter
+
+  // Number of EPF stages to apply. 0 means EPF disabled. 1 applies only the
+  // first stage, 2 applies both stages and 3 applies the first stage twice and
+  // the second stage once.
+  uint32_t epf_iters;
+
+  bool epf_sharp_custom;
+  enum { kEpfSharpEntries = 8 };
+  float epf_sharp_lut[kEpfSharpEntries];
+
+  bool epf_weight_custom;      // Custom weight params
+  float epf_channel_scale[3];  // Relative weight of each channel
+  float epf_pass1_zeroflush;   // Minimum weight for first pass
+  float epf_pass2_zeroflush;   // Minimum weight for second pass
+
+  bool epf_sigma_custom;        // Custom sigma parameters
+  float epf_quant_mul;          // Sigma is ~ this * quant
+  float epf_pass0_sigma_scale;  // Multiplier for sigma in pass 0
+  float epf_pass2_sigma_scale;  // Multiplier for sigma in the second pass
+  float epf_border_sad_mul;     // (inverse) multiplier for sigma on borders
+
+  float epf_sigma_for_modular;
+
+  uint64_t extensions;
+
+  bool nonserialized_is_modular = false;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_LOOP_FILTER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc
new file mode 100644
index 0000000000..9eba4d4011
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.cc
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/luminance.h"
+
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+
+namespace jxl {
+
+void SetIntensityTarget(CodecInOut* io) {
+  if (io->target_nits != 0) {
+    io->metadata.m.SetIntensityTarget(io->target_nits);
+    return;
+  }
+  if (io->metadata.m.color_encoding.tf.IsPQ()) {
+    // Peak luminance of PQ as defined by SMPTE ST 2084:2014.
+    io->metadata.m.SetIntensityTarget(10000);
+  } else if (io->metadata.m.color_encoding.tf.IsHLG()) {
+    // Nominal display peak luminance used as a reference by
+    // Rec. ITU-R BT.2100-2.
+    io->metadata.m.SetIntensityTarget(1000);
+  } else {
+    // SDR
+    io->metadata.m.SetIntensityTarget(kDefaultIntensityTarget);
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.h
new file mode 100644
index 0000000000..c6a9d9e184
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/luminance.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LUMINANCE_H_
+#define LIB_JXL_LUMINANCE_H_
+
+namespace jxl {
+
+// Chooses a default intensity target based on the transfer function of the
+// image, if known. For SDR images or images not known to be HDR, returns
+// kDefaultIntensityTarget, for images known to have PQ or HLG transfer function
+// returns a higher value. If the image metadata already has a non-zero
+// intensity target, does nothing.
+class CodecInOut;
+void SetIntensityTarget(CodecInOut* io);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_LUMINANCE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc
new file mode 100644
index 0000000000..87727e75cd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.cc
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/memory_manager_internal.h"
+
+#include <stdlib.h>
+
+namespace jxl {
+
+void* MemoryManagerDefaultAlloc(void* opaque, size_t size) {
+  return malloc(size);
+}
+
+void MemoryManagerDefaultFree(void* opaque, void* address) { free(address); }
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.h
new file mode 100644
index 0000000000..b4a78903fe
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/memory_manager_internal.h
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MEMORY_MANAGER_INTERNAL_H_
+#define LIB_JXL_MEMORY_MANAGER_INTERNAL_H_
+
+// Memory allocator with support for alignment + misalignment.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>  // memcpy
+
+#include <atomic>
+#include <memory>
+
+#include "jxl/memory_manager.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Default alloc and free functions.
+void* MemoryManagerDefaultAlloc(void* opaque, size_t size);
+void MemoryManagerDefaultFree(void* opaque, void* address);
+
+// Initializes the memory manager instance with the passed one. The
+// MemoryManager passed in |memory_manager| may be NULL or contain NULL
+// functions which will be initialized with the default ones. If either alloc
+// or free are NULL, then both must be NULL, otherwise this function returns an
+// error.
+static JXL_INLINE Status MemoryManagerInit(
+    JxlMemoryManager* self, const JxlMemoryManager* memory_manager) {
+  if (memory_manager) {
+    *self = *memory_manager;
+  } else {
+    memset(self, 0, sizeof(*self));
+  }
+  if (!self->alloc != !self->free) {
+    return false;
+  }
+  if (!self->alloc) self->alloc = jxl::MemoryManagerDefaultAlloc;
+  if (!self->free) self->free = jxl::MemoryManagerDefaultFree;
+
+  return true;
+}
+
+static JXL_INLINE void* MemoryManagerAlloc(
+    const JxlMemoryManager* memory_manager, size_t size) {
+  return memory_manager->alloc(memory_manager->opaque, size);
+}
+
+static JXL_INLINE void MemoryManagerFree(const JxlMemoryManager* memory_manager,
+                                         void* address) {
+  return memory_manager->free(memory_manager->opaque, address);
+}
+
+// Helper class to be used as a deleter in a unique_ptr<T> call.
+class MemoryManagerDeleteHelper {
+ public:
+  explicit MemoryManagerDeleteHelper(const JxlMemoryManager* memory_manager)
+      : memory_manager_(memory_manager) {}
+
+  // Delete and free the passed pointer using the memory_manager.
+  template <typename T>
+  void operator()(T* address) const {
+    if (!address) {
+      return;
+    }
+    address->~T();
+    return memory_manager_->free(memory_manager_->opaque, address);
+  }
+
+ private:
+  const JxlMemoryManager* memory_manager_;
+};
+
+template <typename T>
+using MemoryManagerUniquePtr = std::unique_ptr<T, MemoryManagerDeleteHelper>;
+
+// Creates a new object T allocating it with the memory allocator into a
+// unique_ptr.
+template <typename T, typename... Args>
+JXL_INLINE MemoryManagerUniquePtr<T> MemoryManagerMakeUnique(
+    const JxlMemoryManager* memory_manager, Args&&... args) {
+  T* mem =
+      static_cast<T*>(memory_manager->alloc(memory_manager->opaque, sizeof(T)));
+  if (!mem) {
+    // Allocation error case.
+    return MemoryManagerUniquePtr<T>(nullptr,
+                                     MemoryManagerDeleteHelper(memory_manager));
+  }
+  return MemoryManagerUniquePtr<T>(new (mem) T(std::forward<Args>(args)...),
+                                   MemoryManagerDeleteHelper(memory_manager));
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MEMORY_MANAGER_INTERNAL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/context_predict.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/context_predict.h
new file mode 100644
index 0000000000..63c7f7bb65
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/context_predict.h
@@ -0,0 +1,653 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_
+#define LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+namespace weighted {
+constexpr static size_t kNumPredictors = 4;
+constexpr static int64_t kPredExtraBits = 3;
+constexpr static int64_t kPredictionRound = ((1 << kPredExtraBits) >> 1) - 1;
+constexpr static size_t kNumProperties = 1;
+
+struct Header : public Fields {
+  const char *Name() const override { return "WeightedPredictorHeader"; }
+  // TODO(janwas): move to cc file, avoid including fields.h.
+  Header() { Bundle::Init(this); }
+
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    if (visitor->AllDefault(*this, &all_default)) {
+      // Overwrite all serialized fields, but not any nonserialized_*.
+      visitor->SetDefault(this);
+      return true;
+    }
+    auto visit_p = [visitor](pixel_type val, pixel_type *p) {
+      uint32_t up = *p;
+      JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, val, &up));
+      *p = up;
+      return Status(true);
+    };
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(16, &p1C));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(10, &p2C));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Ca));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Cb));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Cc));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(0, &p3Cd));
+    JXL_QUIET_RETURN_IF_ERROR(visit_p(0, &p3Ce));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xd, &w[0]));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[1]));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[2]));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[3]));
+    return true;
+  }
+
+  bool all_default;
+  pixel_type p1C = 0, p2C = 0, p3Ca = 0, p3Cb = 0, p3Cc = 0, p3Cd = 0, p3Ce = 0;
+  uint32_t w[kNumPredictors] = {};
+};
+
+struct State {
+  pixel_type_w prediction[kNumPredictors] = {};
+  pixel_type_w pred = 0;  // *before* removing the added bits.
+  std::vector<uint32_t> pred_errors[kNumPredictors];
+  std::vector<int32_t> error;
+  Header header;
+
+  // Allows to approximate division by a number from 1 to 64.
+  uint32_t divlookup[64];
+
+  constexpr static pixel_type_w AddBits(pixel_type_w x) {
+    return uint64_t(x) << kPredExtraBits;
+  }
+
+  State(Header header, size_t xsize, size_t ysize) : header(header) {
+    // Extra margin to avoid out-of-bounds writes.
+    // All have space for two rows of data.
+    for (size_t i = 0; i < 4; i++) {
+      pred_errors[i].resize((xsize + 2) * 2);
+    }
+    error.resize((xsize + 2) * 2);
+    // Initialize division lookup table.
+    for (int i = 0; i < 64; i++) {
+      divlookup[i] = (1 << 24) / (i + 1);
+    }
+  }
+
+  // Approximates 4+(maxweight<<24)/(x+1), avoiding division
+  JXL_INLINE uint32_t ErrorWeight(uint64_t x, uint32_t maxweight) const {
+    int shift = FloorLog2Nonzero(x + 1) - 5;
+    if (shift < 0) shift = 0;
+    return 4 + ((maxweight * divlookup[x >> shift]) >> shift);
+  }
+
+  // Approximates the weighted average of the input values with the given
+  // weights, avoiding division. Weights must sum to at least 16.
+  JXL_INLINE pixel_type_w
+  WeightedAverage(const pixel_type_w *JXL_RESTRICT p,
+                  std::array<uint32_t, kNumPredictors> w) const {
+    uint32_t weight_sum = 0;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      weight_sum += w[i];
+    }
+    JXL_DASSERT(weight_sum > 15);
+    uint32_t log_weight = FloorLog2Nonzero(weight_sum);  // at least 4.
+    weight_sum = 0;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      w[i] >>= log_weight - 4;
+      weight_sum += w[i];
+    }
+    // for rounding.
+    pixel_type_w sum = (weight_sum >> 1) - 1;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      sum += p[i] * w[i];
+    }
+    return (sum * divlookup[weight_sum - 1]) >> 24;
+  }
+
+  template <bool compute_properties>
+  JXL_INLINE pixel_type_w Predict(size_t x, size_t y, size_t xsize,
+                                  pixel_type_w N, pixel_type_w W,
+                                  pixel_type_w NE, pixel_type_w NW,
+                                  pixel_type_w NN, Properties *properties,
+                                  size_t offset) {
+    size_t cur_row = y & 1 ? 0 : (xsize + 2);
+    size_t prev_row = y & 1 ? (xsize + 2) : 0;
+    size_t pos_N = prev_row + x;
+    size_t pos_NE = x < xsize - 1 ? pos_N + 1 : pos_N;
+    size_t pos_NW = x > 0 ? pos_N - 1 : pos_N;
+    std::array<uint32_t, kNumPredictors> weights;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      // pred_errors[pos_N] also contains the error of pixel W.
+      // pred_errors[pos_NW] also contains the error of pixel WW.
+      weights[i] = pred_errors[i][pos_N] + pred_errors[i][pos_NE] +
+                   pred_errors[i][pos_NW];
+      weights[i] = ErrorWeight(weights[i], header.w[i]);
+    }
+
+    N = AddBits(N);
+    W = AddBits(W);
+    NE = AddBits(NE);
+    NW = AddBits(NW);
+    NN = AddBits(NN);
+
+    pixel_type_w teW = x == 0 ? 0 : error[cur_row + x - 1];
+    pixel_type_w teN = error[pos_N];
+    pixel_type_w teNW = error[pos_NW];
+    pixel_type_w sumWN = teN + teW;
+    pixel_type_w teNE = error[pos_NE];
+
+    if (compute_properties) {
+      pixel_type_w p = teW;
+      if (std::abs(teN) > std::abs(p)) p = teN;
+      if (std::abs(teNW) > std::abs(p)) p = teNW;
+      if (std::abs(teNE) > std::abs(p)) p = teNE;
+      (*properties)[offset++] = p;
+    }
+
+    prediction[0] = W + NE - N;
+    prediction[1] = N - (((sumWN + teNE) * header.p1C) >> 5);
+    prediction[2] = W - (((sumWN + teNW) * header.p2C) >> 5);
+    prediction[3] =
+        N - ((teNW * header.p3Ca + teN * header.p3Cb + teNE * header.p3Cc +
+              (NN - N) * header.p3Cd + (NW - W) * header.p3Ce) >>
+             5);
+
+    pred = WeightedAverage(prediction, weights);
+
+    // If all three have the same sign, skip clamping.
+    if (((teN ^ teW) | (teN ^ teNW)) > 0) {
+      return (pred + kPredictionRound) >> kPredExtraBits;
+    }
+
+    // Otherwise, clamp to min/max of neighbouring pixels (just W, NE, N).
+    pixel_type_w mx = std::max(W, std::max(NE, N));
+    pixel_type_w mn = std::min(W, std::min(NE, N));
+    pred = std::max(mn, std::min(mx, pred));
+    return (pred + kPredictionRound) >> kPredExtraBits;
+  }
+
+  JXL_INLINE void UpdateErrors(pixel_type_w val, size_t x, size_t y,
+                               size_t xsize) {
+    size_t cur_row = y & 1 ? 0 : (xsize + 2);
+    size_t prev_row = y & 1 ? (xsize + 2) : 0;
+    val = AddBits(val);
+    error[cur_row + x] = pred - val;
+    for (size_t i = 0; i < kNumPredictors; i++) {
+      pixel_type_w err =
+          (std::abs(prediction[i] - val) + kPredictionRound) >> kPredExtraBits;
+      // For predicting in the next row.
+      pred_errors[i][cur_row + x] = err;
+      // Add the error on this pixel to the error on the NE pixel. This has the
+      // effect of adding the error on this pixel to the E and EE pixels.
+      pred_errors[i][prev_row + x + 1] += err;
+    }
+  }
+};
+
+// Encoder helper function to set the parameters to some presets.
+inline void PredictorMode(int i, Header *header) {
+  switch (i) {
+    case 0:
+      // ~ lossless16 predictor
+      header->w[0] = 0xd;
+      header->w[1] = 0xc;
+      header->w[2] = 0xc;
+      header->w[3] = 0xc;
+      header->p1C = 16;
+      header->p2C = 10;
+      header->p3Ca = 7;
+      header->p3Cb = 7;
+      header->p3Cc = 7;
+      header->p3Cd = 0;
+      header->p3Ce = 0;
+      break;
+    case 1:
+      // ~ default lossless8 predictor
+      header->w[0] = 0xd;
+      header->w[1] = 0xc;
+      header->w[2] = 0xc;
+      header->w[3] = 0xb;
+      header->p1C = 8;
+      header->p2C = 8;
+      header->p3Ca = 4;
+      header->p3Cb = 0;
+      header->p3Cc = 3;
+      header->p3Cd = 23;
+      header->p3Ce = 2;
+      break;
+    case 2:
+      // ~ west lossless8 predictor
+      header->w[0] = 0xd;
+      header->w[1] = 0xc;
+      header->w[2] = 0xd;
+      header->w[3] = 0xc;
+      header->p1C = 10;
+      header->p2C = 9;
+      header->p3Ca = 7;
+      header->p3Cb = 0;
+      header->p3Cc = 0;
+      header->p3Cd = 16;
+      header->p3Ce = 9;
+      break;
+    case 3:
+      // ~ north lossless8 predictor
+      header->w[0] = 0xd;
+      header->w[1] = 0xd;
+      header->w[2] = 0xc;
+      header->w[3] = 0xc;
+      header->p1C = 16;
+      header->p2C = 8;
+      header->p3Ca = 0;
+      header->p3Cb = 16;
+      header->p3Cc = 0;
+      header->p3Cd = 23;
+      header->p3Ce = 0;
+      break;
+    case 4:
+    default:
+      // something else, because why not
+      header->w[0] = 0xd;
+      header->w[1] = 0xc;
+      header->w[2] = 0xc;
+      header->w[3] = 0xc;
+      header->p1C = 10;
+      header->p2C = 10;
+      header->p3Ca = 5;
+      header->p3Cb = 5;
+      header->p3Cc = 5;
+      header->p3Cd = 12;
+      header->p3Ce = 4;
+      break;
+  }
+}
+}  // namespace weighted
+
+// Stores a node and its two children at the same time. This significantly
+// reduces the number of branches needed during decoding.
+struct FlatDecisionNode {
+  // Property + splitval of the top node.
+  int32_t property0;  // -1 if leaf.
+  union {
+    PropertyVal splitval0;
+    Predictor predictor;
+  };
+  uint32_t childID;  // childID is ctx id if leaf.
+  // Property+splitval of the two child nodes.
+  union {
+    PropertyVal splitvals[2];
+    int32_t multiplier;
+  };
+  union {
+    int32_t properties[2];
+    int64_t predictor_offset;
+  };
+};
+using FlatTree = std::vector<FlatDecisionNode>;
+
+class MATreeLookup {
+ public:
+  explicit MATreeLookup(const FlatTree &tree) : nodes_(tree) {}
+  struct LookupResult {
+    uint32_t context;
+    Predictor predictor;
+    int64_t offset;
+    int32_t multiplier;
+  };
+  LookupResult Lookup(const Properties &properties) const {
+    uint32_t pos = 0;
+    while (true) {
+      const FlatDecisionNode &node = nodes_[pos];
+      if (node.property0 < 0) {
+        return {node.childID, node.predictor, node.predictor_offset,
+                node.multiplier};
+      }
+      bool p0 = properties[node.property0] <= node.splitval0;
+      uint32_t off0 = properties[node.properties[0]] <= node.splitvals[0];
+      uint32_t off1 = 2 | (properties[node.properties[1]] <= node.splitvals[1]);
+      pos = node.childID + (p0 ? off1 : off0);
+    }
+  }
+
+ private:
+  const FlatTree &nodes_;
+};
+
+static constexpr size_t kExtraPropsPerChannel = 4;
+static constexpr size_t kNumNonrefProperties =
+    kNumStaticProperties + 13 + weighted::kNumProperties;
+
+constexpr size_t kWPProp = kNumNonrefProperties - weighted::kNumProperties;
+constexpr size_t kGradientProp = 9;
+
+// Clamps gradient to the min/max of n, w (and l, implicitly).
+static JXL_INLINE int32_t ClampedGradient(const int32_t n, const int32_t w,
+                                          const int32_t l) {
+  const int32_t m = std::min(n, w);
+  const int32_t M = std::max(n, w);
+  // The end result of this operation doesn't overflow or underflow if the
+  // result is between m and M, but the intermediate value may overflow, so we
+  // do the intermediate operations in uint32_t and check later if we had an
+  // overflow or underflow condition comparing m, M and l directly.
+  // grad = M + m - l = n + w - l
+  const int32_t grad =
+      static_cast<int32_t>(static_cast<uint32_t>(n) + static_cast<uint32_t>(w) -
+                           static_cast<uint32_t>(l));
+  // We use two sets of ternary operators to force the evaluation of them in
+  // any case, allowing the compiler to avoid branches and use cmovl/cmovg in
+  // x86.
+  const int32_t grad_clamp_M = (l < m) ? M : grad;
+  return (l > M) ? m : grad_clamp_M;
+}
+
+inline pixel_type_w Select(pixel_type_w a, pixel_type_w b, pixel_type_w c) {
+  pixel_type_w p = a + b - c;
+  pixel_type_w pa = std::abs(p - a);
+  pixel_type_w pb = std::abs(p - b);
+  return pa < pb ? a : b;
+}
+
+inline void PrecomputeReferences(const Channel &ch, size_t y,
+                                 const Image &image, uint32_t i,
+                                 Channel *references) {
+  ZeroFillImage(&references->plane);
+  uint32_t offset = 0;
+  size_t num_extra_props = references->w;
+  intptr_t onerow = references->plane.PixelsPerRow();
+  for (int32_t j = static_cast<int32_t>(i) - 1;
+       j >= 0 && offset < num_extra_props; j--) {
+    if (image.channel[j].w != image.channel[i].w ||
+        image.channel[j].h != image.channel[i].h) {
+      continue;
+    }
+    if (image.channel[j].hshift != image.channel[i].hshift) continue;
+    if (image.channel[j].vshift != image.channel[i].vshift) continue;
+    pixel_type *JXL_RESTRICT rp = references->Row(0) + offset;
+    const pixel_type *JXL_RESTRICT rpp = image.channel[j].Row(y);
+    const pixel_type *JXL_RESTRICT rpprev = image.channel[j].Row(y ? y - 1 : 0);
+    for (size_t x = 0; x < ch.w; x++, rp += onerow) {
+      pixel_type_w v = rpp[x];
+      rp[0] = std::abs(v);
+      rp[1] = v;
+      pixel_type_w vleft = (x ? rpp[x - 1] : 0);
+      pixel_type_w vtop = (y ? rpprev[x] : vleft);
+      pixel_type_w vtopleft = (x && y ? rpprev[x - 1] : vleft);
+      pixel_type_w vpredicted = ClampedGradient(vleft, vtop, vtopleft);
+      rp[2] = std::abs(v - vpredicted);
+      rp[3] = v - vpredicted;
+    }
+
+    offset += kExtraPropsPerChannel;
+  }
+}
+
+struct PredictionResult {
+  int context = 0;
+  pixel_type_w guess = 0;
+  Predictor predictor;
+  int32_t multiplier;
+};
+
+inline std::string PropertyName(size_t i) {
+  static_assert(kNumNonrefProperties == 16, "Update this function");
+  switch (i) {
+    case 0:
+      return "c";
+    case 1:
+      return "g";
+    case 2:
+      return "y";
+    case 3:
+      return "x";
+    case 4:
+      return "|N|";
+    case 5:
+      return "|W|";
+    case 6:
+      return "N";
+    case 7:
+      return "W";
+    case 8:
+      return "W-WW-NW+NWW";
+    case 9:
+      return "W+N-NW";
+    case 10:
+      return "W-NW";
+    case 11:
+      return "NW-N";
+    case 12:
+      return "N-NE";
+    case 13:
+      return "N-NN";
+    case 14:
+      return "W-WW";
+    case 15:
+      return "WGH";
+    default:
+      return "ch[" + ToString(15 - (int)i) + "]";
+  }
+}
+
+inline void InitPropsRow(
+    Properties *p,
+    const std::array<pixel_type, kNumStaticProperties> &static_props,
+    const int y) {
+  for (size_t i = 0; i < kNumStaticProperties; i++) {
+    (*p)[i] = static_props[i];
+  }
+  (*p)[2] = y;
+  (*p)[9] = 0;  // local gradient.
+}
+
+namespace detail {
+enum PredictorMode {
+  kUseTree = 1,
+  kUseWP = 2,
+  kForceComputeProperties = 4,
+  kAllPredictions = 8,
+};
+
+JXL_INLINE pixel_type_w PredictOne(Predictor p, pixel_type_w left,
+                                   pixel_type_w top, pixel_type_w toptop,
+                                   pixel_type_w topleft, pixel_type_w topright,
+                                   pixel_type_w leftleft,
+                                   pixel_type_w toprightright,
+                                   pixel_type_w wp_pred) {
+  switch (p) {
+    case Predictor::Zero:
+      return pixel_type_w{0};
+    case Predictor::Left:
+      return left;
+    case Predictor::Top:
+      return top;
+    case Predictor::Select:
+      return Select(left, top, topleft);
+    case Predictor::Weighted:
+      return wp_pred;
+    case Predictor::Gradient:
+      return pixel_type_w{ClampedGradient(left, top, topleft)};
+    case Predictor::TopLeft:
+      return topleft;
+    case Predictor::TopRight:
+      return topright;
+    case Predictor::LeftLeft:
+      return leftleft;
+    case Predictor::Average0:
+      return (left + top) / 2;
+    case Predictor::Average1:
+      return (left + topleft) / 2;
+    case Predictor::Average2:
+      return (topleft + top) / 2;
+    case Predictor::Average3:
+      return (top + topright) / 2;
+    case Predictor::Average4:
+      return (6 * top - 2 * toptop + 7 * left + 1 * leftleft +
+              1 * toprightright + 3 * topright + 8) /
+             16;
+    default:
+      return pixel_type_w{0};
+  }
+}
+
+template <int mode>
+inline PredictionResult Predict(
+    Properties *p, size_t w, const pixel_type *JXL_RESTRICT pp,
+    const intptr_t onerow, const size_t x, const size_t y, Predictor predictor,
+    const MATreeLookup *lookup, const Channel *references,
+    weighted::State *wp_state, pixel_type_w *predictions) {
+  // We start in position 3 because of 2 static properties + y.
+  size_t offset = 3;
+  constexpr bool compute_properties =
+      mode & kUseTree || mode & kForceComputeProperties;
+  pixel_type_w left = (x ? pp[-1] : (y ? pp[-onerow] : 0));
+  pixel_type_w top = (y ? pp[-onerow] : left);
+  pixel_type_w topleft = (x && y ? pp[-1 - onerow] : left);
+  pixel_type_w topright = (x + 1 < w && y ? pp[1 - onerow] : top);
+  pixel_type_w leftleft = (x > 1 ? pp[-2] : left);
+  pixel_type_w toptop = (y > 1 ? pp[-onerow - onerow] : top);
+  pixel_type_w toprightright = (x + 2 < w && y ? pp[2 - onerow] : topright);
+
+  if (compute_properties) {
+    // location
+    (*p)[offset++] = x;
+    // neighbors
+    (*p)[offset++] = std::abs(top);
+    (*p)[offset++] = std::abs(left);
+    (*p)[offset++] = top;
+    (*p)[offset++] = left;
+
+    // local gradient
+    (*p)[offset] = left - (*p)[offset + 1];
+    offset++;
+    // local gradient
+    (*p)[offset++] = left + top - topleft;
+
+    // FFV1 context properties
+    (*p)[offset++] = left - topleft;
+    (*p)[offset++] = topleft - top;
+    (*p)[offset++] = top - topright;
+    (*p)[offset++] = top - toptop;
+    (*p)[offset++] = left - leftleft;
+  }
+
+  pixel_type_w wp_pred = 0;
+  if (mode & kUseWP) {
+    wp_pred = wp_state->Predict<compute_properties>(
+        x, y, w, top, left, topright, topleft, toptop, p, offset);
+  }
+  if (compute_properties) {
+    offset += weighted::kNumProperties;
+    // Extra properties.
+    const pixel_type *JXL_RESTRICT rp = references->Row(x);
+    for (size_t i = 0; i < references->w; i++) {
+      (*p)[offset++] = rp[i];
+    }
+  }
+  PredictionResult result;
+  if (mode & kUseTree) {
+    MATreeLookup::LookupResult lr = lookup->Lookup(*p);
+    result.context = lr.context;
+    result.guess = lr.offset;
+    result.multiplier = lr.multiplier;
+    predictor = lr.predictor;
+  }
+  if (mode & kAllPredictions) {
+    for (size_t i = 0; i < kNumModularPredictors; i++) {
+      predictions[i] = PredictOne((Predictor)i, left, top, toptop, topleft,
+                                  topright, leftleft, toprightright, wp_pred);
+    }
+  }
+  result.guess += PredictOne(predictor, left, top, toptop, topleft, topright,
+                             leftleft, toprightright, wp_pred);
+  result.predictor = predictor;
+
+  return result;
+}
+}  // namespace detail
+
+inline PredictionResult PredictNoTreeNoWP(size_t w,
+                                          const pixel_type *JXL_RESTRICT pp,
+                                          const intptr_t onerow, const int x,
+                                          const int y, Predictor predictor) {
+  return detail::Predict</*mode=*/0>(
+      /*p=*/nullptr, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr,
+      /*references=*/nullptr, /*wp_state=*/nullptr, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictNoTreeWP(size_t w,
+                                        const pixel_type *JXL_RESTRICT pp,
+                                        const intptr_t onerow, const int x,
+                                        const int y, Predictor predictor,
+                                        weighted::State *wp_state) {
+  return detail::Predict<detail::kUseWP>(
+      /*p=*/nullptr, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr,
+      /*references=*/nullptr, wp_state, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictTreeNoWP(Properties *p, size_t w,
+                                        const pixel_type *JXL_RESTRICT pp,
+                                        const intptr_t onerow, const int x,
+                                        const int y,
+                                        const MATreeLookup &tree_lookup,
+                                        const Channel &references) {
+  return detail::Predict<detail::kUseTree>(
+      p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references,
+      /*wp_state=*/nullptr, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictTreeWP(Properties *p, size_t w,
+                                      const pixel_type *JXL_RESTRICT pp,
+                                      const intptr_t onerow, const int x,
+                                      const int y,
+                                      const MATreeLookup &tree_lookup,
+                                      const Channel &references,
+                                      weighted::State *wp_state) {
+  return detail::Predict<detail::kUseTree | detail::kUseWP>(
+      p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references,
+      wp_state, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictLearn(Properties *p, size_t w,
+                                     const pixel_type *JXL_RESTRICT pp,
+                                     const intptr_t onerow, const int x,
+                                     const int y, Predictor predictor,
+                                     const Channel &references,
+                                     weighted::State *wp_state) {
+  return detail::Predict<detail::kForceComputeProperties | detail::kUseWP>(
+      p, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr, &references,
+      wp_state, /*predictions=*/nullptr);
+}
+
+inline void PredictLearnAll(Properties *p, size_t w,
+                            const pixel_type *JXL_RESTRICT pp,
+                            const intptr_t onerow, const int x, const int y,
+                            const Channel &references,
+                            weighted::State *wp_state,
+                            pixel_type_w *predictions) {
+  detail::Predict<detail::kForceComputeProperties | detail::kUseWP |
+                  detail::kAllPredictions>(
+      p, w, pp, onerow, x, y, Predictor::Zero,
+      /*lookup=*/nullptr, &references, wp_state, predictions);
+}
+
+inline void PredictAllNoWP(size_t w, const pixel_type *JXL_RESTRICT pp,
+                           const intptr_t onerow, const int x, const int y,
+                           pixel_type_w *predictions) {
+  detail::Predict<detail::kAllPredictions>(
+      /*p=*/nullptr, w, pp, onerow, x, y, Predictor::Zero,
+      /*lookup=*/nullptr,
+      /*references=*/nullptr, /*wp_state=*/nullptr, predictions);
+}
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc
new file mode 100644
index 0000000000..5be9d756ed
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.cc
@@ -0,0 +1,106 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/dec_ma.h"
+
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+namespace {
+
+Status ValidateTree(
+    const Tree &tree,
+    const std::vector<std::pair<pixel_type, pixel_type>> &prop_bounds,
+    size_t root) {
+  if (tree[root].property == -1) return true;
+  size_t p = tree[root].property;
+  int val = tree[root].splitval;
+  if (prop_bounds[p].first > val) return JXL_FAILURE("Invalid tree");
+  // Splitting at max value makes no sense: left range will be exactly same
+  // as parent, right range will be invalid (min > max).
+  if (prop_bounds[p].second <= val) return JXL_FAILURE("Invalid tree");
+  auto new_bounds = prop_bounds;
+  new_bounds[p].first = val + 1;
+  JXL_RETURN_IF_ERROR(ValidateTree(tree, new_bounds, tree[root].lchild));
+  new_bounds[p] = prop_bounds[p];
+  new_bounds[p].second = val;
+  return ValidateTree(tree, new_bounds, tree[root].rchild);
+}
+
+Status DecodeTree(BitReader *br, ANSSymbolReader *reader,
+                  const std::vector<uint8_t> &context_map, Tree *tree,
+                  size_t tree_size_limit) {
+  size_t leaf_id = 0;
+  size_t to_decode = 1;
+  tree->clear();
+  while (to_decode > 0) {
+    JXL_RETURN_IF_ERROR(br->AllReadsWithinBounds());
+    if (tree->size() > tree_size_limit) {
+      return JXL_FAILURE("Tree is too large");
+    }
+    to_decode--;
+    int property =
+        reader->ReadHybridUint(kPropertyContext, br, context_map) - 1;
+    if (property < -1 || property >= 256) {
+      return JXL_FAILURE("Invalid tree property value");
+    }
+    if (property == -1) {
+      size_t predictor =
+          reader->ReadHybridUint(kPredictorContext, br, context_map);
+      if (predictor >= kNumModularPredictors) {
+        return JXL_FAILURE("Invalid predictor");
+      }
+      int64_t predictor_offset =
+          UnpackSigned(reader->ReadHybridUint(kOffsetContext, br, context_map));
+      uint32_t mul_log =
+          reader->ReadHybridUint(kMultiplierLogContext, br, context_map);
+      if (mul_log >= 31) {
+        return JXL_FAILURE("Invalid multiplier logarithm");
+      }
+      uint32_t mul_bits =
+          reader->ReadHybridUint(kMultiplierBitsContext, br, context_map);
+      if (mul_bits + 1 >= 1u << (31u - mul_log)) {
+        return JXL_FAILURE("Invalid multiplier");
+      }
+      uint32_t multiplier = (mul_bits + 1U) << mul_log;
+      tree->emplace_back(-1, 0, leaf_id++, 0, static_cast<Predictor>(predictor),
+                         predictor_offset, multiplier);
+      continue;
+    }
+    int splitval =
+        UnpackSigned(reader->ReadHybridUint(kSplitValContext, br, context_map));
+    tree->emplace_back(property, splitval, tree->size() + to_decode + 1,
+                       tree->size() + to_decode + 2, Predictor::Zero, 0, 1);
+    to_decode += 2;
+  }
+  std::vector<std::pair<pixel_type, pixel_type>> prop_bounds;
+  prop_bounds.resize(256, {std::numeric_limits<pixel_type>::min(),
+                           std::numeric_limits<pixel_type>::max()});
+  return ValidateTree(*tree, prop_bounds, 0);
+}
+}  // namespace
+
+Status DecodeTree(BitReader *br, Tree *tree, size_t tree_size_limit) {
+  std::vector<uint8_t> tree_context_map;
+  ANSCode tree_code;
+  JXL_RETURN_IF_ERROR(
+      DecodeHistograms(br, kNumTreeContexts, &tree_code, &tree_context_map));
+  // TODO(eustas): investigate more infinite tree cases.
+  if (tree_code.degenerate_symbols[tree_context_map[kPropertyContext]] > 0) {
+    return JXL_FAILURE("Infinite tree");
+  }
+  ANSSymbolReader reader(&tree_code, br);
+  JXL_RETURN_IF_ERROR(DecodeTree(br, &reader, tree_context_map, tree,
+                                 std::min(tree_size_limit, kMaxTreeSize)));
+  if (!reader.CheckANSFinalState()) {
+    return JXL_FAILURE("ANS decode final state failed");
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.h
new file mode 100644
index 0000000000..a910c4deb1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/dec_ma.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_DEC_MA_H_
+#define LIB_JXL_MODULAR_ENCODING_DEC_MA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+// inner nodes
+struct PropertyDecisionNode {
+  PropertyVal splitval;
+  int16_t property;  // -1: leaf node, lchild points to leaf node
+  uint32_t lchild;
+  uint32_t rchild;
+  Predictor predictor;
+  int64_t predictor_offset;
+  uint32_t multiplier;
+
+  PropertyDecisionNode(int p, int split_val, int lchild, int rchild,
+                       Predictor predictor, int64_t predictor_offset,
+                       uint32_t multiplier)
+      : splitval(split_val),
+        property(p),
+        lchild(lchild),
+        rchild(rchild),
+        predictor(predictor),
+        predictor_offset(predictor_offset),
+        multiplier(multiplier) {}
+  PropertyDecisionNode()
+      : splitval(0),
+        property(-1),
+        lchild(0),
+        rchild(0),
+        predictor(Predictor::Zero),
+        predictor_offset(0),
+        multiplier(1) {}
+  static PropertyDecisionNode Leaf(Predictor predictor, int64_t offset = 0,
+                                   uint32_t multiplier = 1) {
+    return PropertyDecisionNode(-1, 0, 0, 0, predictor, offset, multiplier);
+  }
+  static PropertyDecisionNode Split(int p, int split_val, int lchild,
+                                    int rchild = -1) {
+    if (rchild == -1) rchild = lchild + 1;
+    return PropertyDecisionNode(p, split_val, lchild, rchild, Predictor::Zero,
+                                0, 1);
+  }
+};
+
+using Tree = std::vector<PropertyDecisionNode>;
+
+Status DecodeTree(BitReader *br, Tree *tree, size_t tree_size_limit);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_DEC_MA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc
new file mode 100644
index 0000000000..f7bb372c74
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.cc
@@ -0,0 +1,549 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <cinttypes>
+#include <limits>
+#include <numeric>
+#include <queue>
+#include <random>
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "lib/jxl/base/os_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+#include "lib/jxl/toc.h"
+
+#if JXL_OS_IOS
+#define JXL_ENABLE_DOT 0
+#else
+#define JXL_ENABLE_DOT 1  // iOS lacks C89 system()
+#endif
+
+namespace jxl {
+
+namespace {
+// Plot tree (if enabled) and predictor usage map.
+constexpr bool kWantDebug = false;
+}  // namespace
+
+void GatherTreeData(const Image &image, pixel_type chan, size_t group_id,
+                    const weighted::Header &wp_header,
+                    const ModularOptions &options, TreeSamples &tree_samples,
+                    size_t *total_pixels) {
+  const Channel &channel = image.channel[chan];
+
+  JXL_DEBUG_V(7, "Learning %zux%zu channel %d", channel.w, channel.h, chan);
+
+  std::array<pixel_type, kNumStaticProperties> static_props = {chan,
+                                                               (int)group_id};
+  Properties properties(kNumNonrefProperties +
+                        kExtraPropsPerChannel * options.max_properties);
+  double pixel_fraction = std::min(1.0f, options.nb_repeats);
+  // a fraction of 0 is used to disable learning entirely.
+  if (pixel_fraction > 0) {
+    pixel_fraction = std::max(pixel_fraction,
+                              std::min(1.0, 1024.0 / (channel.w * channel.h)));
+  }
+  uint64_t threshold =
+      (std::numeric_limits<uint64_t>::max() >> 32) * pixel_fraction;
+  uint64_t s[2] = {0x94D049BB133111EBull, 0xBF58476D1CE4E5B9ull};
+  // Xorshift128+ adapted from xorshift128+-inl.h
+  auto use_sample = [&]() {
+    auto s1 = s[0];
+    const auto s0 = s[1];
+    const auto bits = s1 + s0;  // b, c
+    s[0] = s0;
+    s1 ^= s1 << 23;
+    s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+    s[1] = s1;
+    return (bits >> 32) <= threshold;
+  };
+
+  const intptr_t onerow = channel.plane.PixelsPerRow();
+  Channel references(properties.size() - kNumNonrefProperties, channel.w);
+  weighted::State wp_state(wp_header, channel.w, channel.h);
+  tree_samples.PrepareForSamples(pixel_fraction * channel.h * channel.w + 64);
+  for (size_t y = 0; y < channel.h; y++) {
+    const pixel_type *JXL_RESTRICT p = channel.Row(y);
+    PrecomputeReferences(channel, y, image, chan, &references);
+    InitPropsRow(&properties, static_props, y);
+    // TODO(veluca): avoid computing WP if we don't use its property or
+    // predictions.
+    for (size_t x = 0; x < channel.w; x++) {
+      pixel_type_w pred[kNumModularPredictors];
+      if (tree_samples.NumPredictors() != 1) {
+        PredictLearnAll(&properties, channel.w, p + x, onerow, x, y, references,
+                        &wp_state, pred);
+      } else {
+        pred[static_cast<int>(tree_samples.PredictorFromIndex(0))] =
+            PredictLearn(&properties, channel.w, p + x, onerow, x, y,
+                         tree_samples.PredictorFromIndex(0), references,
+                         &wp_state)
+                .guess;
+      }
+      (*total_pixels)++;
+      if (use_sample()) {
+        tree_samples.AddSample(p[x], properties, pred);
+      }
+      wp_state.UpdateErrors(p[x], x, y, channel.w);
+    }
+  }
+}
+
+Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels,
+               const ModularOptions &options,
+               const std::vector<ModularMultiplierInfo> &multiplier_info = {},
+               StaticPropRange static_prop_range = {}) {
+  for (size_t i = 0; i < kNumStaticProperties; i++) {
+    if (static_prop_range[i][1] == 0) {
+      static_prop_range[i][1] = std::numeric_limits<uint32_t>::max();
+    }
+  }
+  if (!tree_samples.HasSamples()) {
+    Tree tree;
+    tree.emplace_back();
+    tree.back().predictor = tree_samples.PredictorFromIndex(0);
+    tree.back().property = -1;
+    tree.back().predictor_offset = 0;
+    tree.back().multiplier = 1;
+    return tree;
+  }
+  float pixel_fraction = tree_samples.NumSamples() * 1.0f / total_pixels;
+  float required_cost = pixel_fraction * 0.9 + 0.1;
+  tree_samples.AllSamplesDone();
+  Tree tree;
+  ComputeBestTree(tree_samples,
+                  options.splitting_heuristics_node_threshold * required_cost,
+                  multiplier_info, static_prop_range,
+                  options.fast_decode_multiplier, &tree);
+  return tree;
+}
+
+constexpr bool kPrintTree = false;
+
+void PrintTree(const Tree &tree, const std::string &path) {
+  if (!kPrintTree) return;
+  FILE *f = fopen((path + ".dot").c_str(), "w");
+  fprintf(f, "graph{\n");
+  for (size_t cur = 0; cur < tree.size(); cur++) {
+    if (tree[cur].property < 0) {
+      fprintf(f, "n%05zu [label=\"%s%+" PRId64 " (x%u)\"];\n", cur,
+              PredictorName(tree[cur].predictor), tree[cur].predictor_offset,
+              tree[cur].multiplier);
+    } else {
+      fprintf(f, "n%05zu [label=\"%s>%d\"];\n", cur,
+              PropertyName(tree[cur].property).c_str(), tree[cur].splitval);
+      fprintf(f, "n%05zu -- n%05d;\n", cur, tree[cur].lchild);
+      fprintf(f, "n%05zu -- n%05d;\n", cur, tree[cur].rchild);
+    }
+  }
+  fprintf(f, "}\n");
+  fclose(f);
+#if JXL_ENABLE_DOT
+  JXL_ASSERT(
+      system(("dot " + path + ".dot -T svg -o " + path + ".svg").c_str()) == 0);
+#endif
+}
+
+Status EncodeModularChannelMAANS(const Image &image, pixel_type chan,
+                                 const weighted::Header &wp_header,
+                                 const Tree &global_tree,
+                                 std::vector<Token> *tokens, AuxOut *aux_out,
+                                 size_t group_id, bool skip_encoder_fast_path) {
+  const Channel &channel = image.channel[chan];
+
+  JXL_ASSERT(channel.w != 0 && channel.h != 0);
+
+  Image3F predictor_img;
+  if (kWantDebug) predictor_img = Image3F(channel.w, channel.h);
+
+  JXL_DEBUG_V(6,
+              "Encoding %zux%zu channel %d, "
+              "(shift=%i,%i)",
+              channel.w, channel.h, chan, channel.hshift, channel.vshift);
+
+  std::array<pixel_type, kNumStaticProperties> static_props = {chan,
+                                                               (int)group_id};
+  bool use_wp, is_wp_only;
+  bool is_gradient_only;
+  size_t num_props;
+  FlatTree tree = FilterTree(global_tree, static_props, &num_props, &use_wp,
+                             &is_wp_only, &is_gradient_only);
+  Properties properties(num_props);
+  MATreeLookup tree_lookup(tree);
+  JXL_DEBUG_V(3, "Encoding using a MA tree with %zu nodes", tree.size());
+
+  // Check if this tree is a WP-only tree with a small enough property value
+  // range.
+  // Initialized to avoid clang-tidy complaining.
+  uint16_t context_lookup[2 * kPropRangeFast] = {};
+  int8_t offsets[2 * kPropRangeFast] = {};
+  if (is_wp_only) {
+    is_wp_only = TreeToLookupTable(tree, context_lookup, offsets);
+  }
+  if (is_gradient_only) {
+    is_gradient_only = TreeToLookupTable(tree, context_lookup, offsets);
+  }
+
+  tokens->reserve(tokens->size() + channel.w * channel.h);
+  if (is_wp_only && !skip_encoder_fast_path) {
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(Predictor::Weighted)[c]),
+                &predictor_img.Plane(c));
+    }
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    weighted::State wp_state(wp_header, channel.w, channel.h);
+    Properties properties(1);
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        size_t offset = 0;
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        pixel_type_w topright =
+            (x + 1 < channel.w && y ? *(r + x + 1 - onerow) : top);
+        pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
+        int32_t guess = wp_state.Predict</*compute_properties=*/true>(
+            x, y, channel.w, top, left, topright, topleft, toptop, &properties,
+            offset);
+        uint32_t pos =
+            kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]),
+                                      kPropRangeFast - 1);
+        uint32_t ctx_id = context_lookup[pos];
+        int32_t residual = r[x] - guess - offsets[pos];
+        tokens->emplace_back(ctx_id, PackSigned(residual));
+        wp_state.UpdateErrors(r[x], x, y, channel.w);
+      }
+    }
+  } else if (tree.size() == 1 && tree[0].predictor == Predictor::Gradient &&
+             tree[0].multiplier == 1 && tree[0].predictor_offset == 0 &&
+             !skip_encoder_fast_path) {
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(Predictor::Gradient)[c]),
+                &predictor_img.Plane(c));
+    }
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        int32_t guess = ClampedGradient(top, left, topleft);
+        int32_t residual = r[x] - guess;
+        tokens->emplace_back(tree[0].childID, PackSigned(residual));
+      }
+    }
+  } else if (is_gradient_only && !skip_encoder_fast_path) {
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(Predictor::Gradient)[c]),
+                &predictor_img.Plane(c));
+    }
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        int32_t guess = ClampedGradient(top, left, topleft);
+        uint32_t pos =
+            kPropRangeFast +
+            std::min<pixel_type_w>(
+                std::max<pixel_type_w>(-kPropRangeFast, top + left - topleft),
+                kPropRangeFast - 1);
+        uint32_t ctx_id = context_lookup[pos];
+        int32_t residual = r[x] - guess - offsets[pos];
+        tokens->emplace_back(ctx_id, PackSigned(residual));
+      }
+    }
+  } else if (tree.size() == 1 && tree[0].predictor == Predictor::Zero &&
+             tree[0].multiplier == 1 && tree[0].predictor_offset == 0 &&
+             !skip_encoder_fast_path) {
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(Predictor::Zero)[c]),
+                &predictor_img.Plane(c));
+    }
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT p = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        tokens->emplace_back(tree[0].childID, PackSigned(p[x]));
+      }
+    }
+  } else if (tree.size() == 1 && tree[0].predictor != Predictor::Weighted &&
+             (tree[0].multiplier & (tree[0].multiplier - 1)) == 0 &&
+             tree[0].predictor_offset == 0 && !skip_encoder_fast_path) {
+    // multiplier is a power of 2.
+    for (size_t c = 0; c < 3; c++) {
+      FillImage(static_cast<float>(PredictorColor(tree[0].predictor)[c]),
+                &predictor_img.Plane(c));
+    }
+    uint32_t mul_shift = FloorLog2Nonzero((uint32_t)tree[0].multiplier);
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        PredictionResult pred = PredictNoTreeNoWP(channel.w, r + x, onerow, x,
+                                                  y, tree[0].predictor);
+        pixel_type_w residual = r[x] - pred.guess;
+        JXL_DASSERT((residual >> mul_shift) * tree[0].multiplier == residual);
+        tokens->emplace_back(tree[0].childID,
+                             PackSigned(residual >> mul_shift));
+      }
+    }
+
+  } else if (!use_wp && !skip_encoder_fast_path) {
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    Channel references(properties.size() - kNumNonrefProperties, channel.w);
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT p = channel.Row(y);
+      PrecomputeReferences(channel, y, image, chan, &references);
+      float *pred_img_row[3];
+      if (kWantDebug) {
+        for (size_t c = 0; c < 3; c++) {
+          pred_img_row[c] = predictor_img.PlaneRow(c, y);
+        }
+      }
+      InitPropsRow(&properties, static_props, y);
+      for (size_t x = 0; x < channel.w; x++) {
+        PredictionResult res =
+            PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+                            tree_lookup, references);
+        if (kWantDebug) {
+          for (size_t i = 0; i < 3; i++) {
+            pred_img_row[i][x] = PredictorColor(res.predictor)[i];
+          }
+        }
+        pixel_type_w residual = p[x] - res.guess;
+        JXL_ASSERT(residual % res.multiplier == 0);
+        tokens->emplace_back(res.context,
+                             PackSigned(residual / res.multiplier));
+      }
+    }
+  } else {
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    Channel references(properties.size() - kNumNonrefProperties, channel.w);
+    weighted::State wp_state(wp_header, channel.w, channel.h);
+    for (size_t y = 0; y < channel.h; y++) {
+      const pixel_type *JXL_RESTRICT p = channel.Row(y);
+      PrecomputeReferences(channel, y, image, chan, &references);
+      float *pred_img_row[3];
+      if (kWantDebug) {
+        for (size_t c = 0; c < 3; c++) {
+          pred_img_row[c] = predictor_img.PlaneRow(c, y);
+        }
+      }
+      InitPropsRow(&properties, static_props, y);
+      for (size_t x = 0; x < channel.w; x++) {
+        PredictionResult res =
+            PredictTreeWP(&properties, channel.w, p + x, onerow, x, y,
+                          tree_lookup, references, &wp_state);
+        if (kWantDebug) {
+          for (size_t i = 0; i < 3; i++) {
+            pred_img_row[i][x] = PredictorColor(res.predictor)[i];
+          }
+        }
+        pixel_type_w residual = p[x] - res.guess;
+        JXL_ASSERT(residual % res.multiplier == 0);
+        tokens->emplace_back(res.context,
+                             PackSigned(residual / res.multiplier));
+        wp_state.UpdateErrors(p[x], x, y, channel.w);
+      }
+    }
+  }
+  if (kWantDebug && WantDebugOutput(aux_out)) {
+    aux_out->DumpImage(
+        ("pred_" + ToString(group_id) + "_" + ToString(chan)).c_str(),
+        predictor_img);
+  }
+  return true;
+}
+
+Status ModularEncode(const Image &image, const ModularOptions &options,
+                     BitWriter *writer, AuxOut *aux_out, size_t layer,
+                     size_t group_id, TreeSamples *tree_samples,
+                     size_t *total_pixels, const Tree *tree,
+                     GroupHeader *header, std::vector<Token> *tokens,
+                     size_t *width) {
+  if (image.error) return JXL_FAILURE("Invalid image");
+  size_t nb_channels = image.channel.size();
+  JXL_DEBUG_V(2, "Encoding %zu-channel, %i-bit, %zux%zu image.", nb_channels,
+              image.bitdepth, image.w, image.h);
+
+  if (nb_channels < 1) {
+    return true;  // is there any use for a zero-channel image?
+  }
+
+  // encode transforms
+  GroupHeader header_storage;
+  if (header == nullptr) header = &header_storage;
+  Bundle::Init(header);
+  if (options.predictor == Predictor::Weighted) {
+    weighted::PredictorMode(options.wp_mode, &header->wp_header);
+  }
+  header->transforms = image.transform;
+  // This doesn't actually work
+  if (tree != nullptr) {
+    header->use_global_tree = true;
+  }
+  if (tree_samples == nullptr && tree == nullptr) {
+    JXL_RETURN_IF_ERROR(Bundle::Write(*header, writer, layer, aux_out));
+  }
+
+  TreeSamples tree_samples_storage;
+  size_t total_pixels_storage = 0;
+  if (!total_pixels) total_pixels = &total_pixels_storage;
+  // If there's no tree, compute one (or gather data to).
+  if (tree == nullptr) {
+    bool gather_data = tree_samples != nullptr;
+    if (tree_samples == nullptr) {
+      JXL_RETURN_IF_ERROR(tree_samples_storage.SetPredictor(
+          options.predictor, options.wp_tree_mode));
+      JXL_RETURN_IF_ERROR(tree_samples_storage.SetProperties(
+          options.splitting_heuristics_properties, options.wp_tree_mode));
+      std::vector<pixel_type> pixel_samples;
+      std::vector<pixel_type> diff_samples;
+      std::vector<uint32_t> group_pixel_count;
+      std::vector<uint32_t> channel_pixel_count;
+      CollectPixelSamples(image, options, 0, group_pixel_count,
+                          channel_pixel_count, pixel_samples, diff_samples);
+      std::vector<ModularMultiplierInfo> dummy_multiplier_info;
+      StaticPropRange range;
+      tree_samples_storage.PreQuantizeProperties(
+          range, dummy_multiplier_info, group_pixel_count, channel_pixel_count,
+          pixel_samples, diff_samples, options.max_property_values);
+    }
+    for (size_t i = 0; i < nb_channels; i++) {
+      if (!image.channel[i].w || !image.channel[i].h) {
+        continue;  // skip empty channels
+      }
+      if (i >= image.nb_meta_channels &&
+          (image.channel[i].w > options.max_chan_size ||
+           image.channel[i].h > options.max_chan_size)) {
+        break;
+      }
+      GatherTreeData(image, i, group_id, header->wp_header, options,
+                     gather_data ? *tree_samples : tree_samples_storage,
+                     total_pixels);
+    }
+    if (gather_data) return true;
+  }
+
+  JXL_ASSERT((tree == nullptr) == (tokens == nullptr));
+
+  Tree tree_storage;
+  std::vector<std::vector<Token>> tokens_storage(1);
+  // Compute tree.
+  if (tree == nullptr) {
+    EntropyEncodingData code;
+    std::vector<uint8_t> context_map;
+
+    std::vector<std::vector<Token>> tree_tokens(1);
+    tree_storage =
+        LearnTree(std::move(tree_samples_storage), *total_pixels, options);
+    tree = &tree_storage;
+    tokens = &tokens_storage[0];
+
+    Tree decoded_tree;
+    TokenizeTree(*tree, &tree_tokens[0], &decoded_tree);
+    JXL_ASSERT(tree->size() == decoded_tree.size());
+    tree_storage = std::move(decoded_tree);
+
+    if (kWantDebug && WantDebugOutput(aux_out)) {
+      PrintTree(*tree, aux_out->debug_prefix + "/tree_" + ToString(group_id));
+    }
+    // Write tree
+    BuildAndEncodeHistograms(HistogramParams(), kNumTreeContexts, tree_tokens,
+                             &code, &context_map, writer, kLayerModularTree,
+                             aux_out);
+    WriteTokens(tree_tokens[0], code, context_map, writer, kLayerModularTree,
+                aux_out);
+  }
+
+  size_t image_width = 0;
+  for (size_t i = 0; i < nb_channels; i++) {
+    if (!image.channel[i].w || !image.channel[i].h) {
+      continue;  // skip empty channels
+    }
+    if (i >= image.nb_meta_channels &&
+        (image.channel[i].w > options.max_chan_size ||
+         image.channel[i].h > options.max_chan_size)) {
+      break;
+    }
+    if (image.channel[i].w > image_width) image_width = image.channel[i].w;
+    if (options.zero_tokens) {
+      tokens->resize(tokens->size() + image.channel[i].w * image.channel[i].h,
+                     {0, 0});
+    } else {
+      JXL_RETURN_IF_ERROR(EncodeModularChannelMAANS(
+          image, i, header->wp_header, *tree, tokens, aux_out, group_id,
+          options.skip_encoder_fast_path));
+    }
+  }
+
+  // Write data if not using a global tree/ANS stream.
+  if (!header->use_global_tree) {
+    EntropyEncodingData code;
+    std::vector<uint8_t> context_map;
+    HistogramParams histo_params;
+    histo_params.image_widths.push_back(image_width);
+    BuildAndEncodeHistograms(histo_params, (tree->size() + 1) / 2,
+                             tokens_storage, &code, &context_map, writer, layer,
+                             aux_out);
+    WriteTokens(tokens_storage[0], code, context_map, writer, layer, aux_out);
+  } else {
+    *width = image_width;
+  }
+  return true;
+}
+
+Status ModularGenericCompress(Image &image, const ModularOptions &opts,
+                              BitWriter *writer, AuxOut *aux_out, size_t layer,
+                              size_t group_id, TreeSamples *tree_samples,
+                              size_t *total_pixels, const Tree *tree,
+                              GroupHeader *header, std::vector<Token> *tokens,
+                              size_t *width) {
+  if (image.w == 0 || image.h == 0) return true;
+  ModularOptions options = opts;  // Make a copy to modify it.
+
+  if (options.predictor == static_cast<Predictor>(-1)) {
+    options.predictor = Predictor::Gradient;
+  }
+
+  size_t bits = writer ? writer->BitsWritten() : 0;
+  JXL_RETURN_IF_ERROR(ModularEncode(image, options, writer, aux_out, layer,
+                                    group_id, tree_samples, total_pixels, tree,
+                                    header, tokens, width));
+  bits = writer ? writer->BitsWritten() - bits : 0;
+  if (writer) {
+    JXL_DEBUG_V(
+        4,
+        "Modular-encoded a %zux%zu bitdepth=%i nbchans=%zu image in %zu bytes",
+        image.w, image.h, image.bitdepth, image.channel.size(), bits / 8);
+  }
+  (void)bits;
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.h
new file mode 100644
index 0000000000..9c083e9575
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_encoding.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_
+#define LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+void PrintTree(const Tree &tree, const std::string &path);
+Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels,
+               const ModularOptions &options,
+               const std::vector<ModularMultiplierInfo> &multiplier_info = {},
+               StaticPropRange static_prop_range = {});
+
+// TODO(veluca): make cleaner interfaces.
+
+Status ModularGenericCompress(
+    Image &image, const ModularOptions &opts, BitWriter *writer,
+    AuxOut *aux_out = nullptr, size_t layer = 0, size_t group_id = 0,
+    // For gathering data for producing a global tree.
+    TreeSamples *tree_samples = nullptr, size_t *total_pixels = nullptr,
+    // For encoding with global tree.
+    const Tree *tree = nullptr, GroupHeader *header = nullptr,
+    std::vector<Token> *tokens = nullptr, size_t *widths = nullptr);
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc
new file mode 100644
index 0000000000..0e2eaac71f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.cc
@@ -0,0 +1,1043 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/enc_ma.h"
+
+#include <algorithm>
+#include <limits>
+#include <numeric>
+#include <queue>
+#include <random>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "lib/jxl/modular/encoding/ma_common.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/modular/encoding/enc_ma.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#ifndef LIB_JXL_ENC_MODULAR_ENCODING_MA_
+#define LIB_JXL_ENC_MODULAR_ENCODING_MA_
+namespace {
+struct Rng {
+  uint64_t s[2];
+  explicit Rng(size_t seed)
+      : s{0x94D049BB133111EBull, 0xBF58476D1CE4E5B9ull + seed} {}
+  // Xorshift128+ adapted from xorshift128+-inl.h
+  uint64_t operator()() {
+    uint64_t s1 = s[0];
+    const uint64_t s0 = s[1];
+    const uint64_t bits = s1 + s0;  // b, c
+    s[0] = s0;
+    s1 ^= s1 << 23;
+    s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+    s[1] = s1;
+    return bits;
+  }
+  static constexpr uint64_t max() { return ~0ULL; }
+  static constexpr uint64_t min() { return 0; }
+};
+}  // namespace
+#endif
+
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+const HWY_FULL(float) df;
+const HWY_FULL(int32_t) di;
+size_t Padded(size_t x) { return RoundUpTo(x, Lanes(df)); }
+
+float EstimateBits(const int32_t *counts, int32_t *rounded_counts,
+                   size_t num_symbols) {
+  // Try to approximate the effect of rounding up nonzero probabilities.
+  int32_t total = std::accumulate(counts, counts + num_symbols, 0);
+  const auto min = Set(di, (total + ANS_TAB_SIZE - 1) >> ANS_LOG_TAB_SIZE);
+  const auto zero_i = Zero(di);
+  for (size_t i = 0; i < num_symbols; i += Lanes(df)) {
+    auto counts_v = LoadU(di, &counts[i]);
+    counts_v = IfThenElse(counts_v == zero_i, zero_i,
+                          IfThenElse(counts_v < min, min, counts_v));
+    StoreU(counts_v, di, &rounded_counts[i]);
+  }
+  // Compute entropy of the "rounded" probabilities.
+  const auto zero = Zero(df);
+  const size_t total_scalar =
+      std::accumulate(rounded_counts, rounded_counts + num_symbols, 0);
+  const auto inv_total = Set(df, 1.0f / total_scalar);
+  auto bits_lanes = Zero(df);
+  auto total_v = Set(di, total_scalar);
+  for (size_t i = 0; i < num_symbols; i += Lanes(df)) {
+    const auto counts_v = ConvertTo(df, LoadU(di, &counts[i]));
+    const auto round_counts_v = LoadU(di, &rounded_counts[i]);
+    const auto probs = ConvertTo(df, round_counts_v) * inv_total;
+    const auto nbps = IfThenElse(round_counts_v == total_v, BitCast(di, zero),
+                                 BitCast(di, FastLog2f(df, probs)));
+    bits_lanes -=
+        IfThenElse(counts_v == zero, zero, counts_v * BitCast(df, nbps));
+  }
+  return GetLane(SumOfLanes(bits_lanes));
+}
+
+void MakeSplitNode(size_t pos, int property, int splitval, Predictor lpred,
+                   int64_t loff, Predictor rpred, int64_t roff, Tree *tree) {
+  // Note that the tree splits on *strictly greater*.
+  (*tree)[pos].lchild = tree->size();
+  (*tree)[pos].rchild = tree->size() + 1;
+  (*tree)[pos].splitval = splitval;
+  (*tree)[pos].property = property;
+  tree->emplace_back();
+  tree->back().property = -1;
+  tree->back().predictor = rpred;
+  tree->back().predictor_offset = roff;
+  tree->back().multiplier = 1;
+  tree->emplace_back();
+  tree->back().property = -1;
+  tree->back().predictor = lpred;
+  tree->back().predictor_offset = loff;
+  tree->back().multiplier = 1;
+}
+
+enum class IntersectionType { kNone, kPartial, kInside };
+IntersectionType BoxIntersects(StaticPropRange needle, StaticPropRange haystack,
+                               uint32_t &partial_axis, uint32_t &partial_val) {
+  bool partial = false;
+  for (size_t i = 0; i < kNumStaticProperties; i++) {
+    if (haystack[i][0] >= needle[i][1]) {
+      return IntersectionType::kNone;
+    }
+    if (haystack[i][1] <= needle[i][0]) {
+      return IntersectionType::kNone;
+    }
+    if (haystack[i][0] <= needle[i][0] && haystack[i][1] >= needle[i][1]) {
+      continue;
+    }
+    partial = true;
+    partial_axis = i;
+    if (haystack[i][0] > needle[i][0] && haystack[i][0] < needle[i][1]) {
+      partial_val = haystack[i][0] - 1;
+    } else {
+      JXL_DASSERT(haystack[i][1] > needle[i][0] &&
+                  haystack[i][1] < needle[i][1]);
+      partial_val = haystack[i][1] - 1;
+    }
+  }
+  return partial ? IntersectionType::kPartial : IntersectionType::kInside;
+}
+
+void SplitTreeSamples(TreeSamples &tree_samples, size_t begin, size_t pos,
+                      size_t end, size_t prop) {
+  auto cmp = [&](size_t a, size_t b) {
+    return int32_t(tree_samples.Property(prop, a)) -
+           int32_t(tree_samples.Property(prop, b));
+  };
+  Rng rng(0);
+  while (end > begin + 1) {
+    {
+      JXL_ASSERT(end > begin);  // silence clang-tidy.
+      size_t pivot = rng() % (end - begin) + begin;
+      tree_samples.Swap(begin, pivot);
+    }
+    size_t pivot_begin = begin;
+    size_t pivot_end = pivot_begin + 1;
+    for (size_t i = begin + 1; i < end; i++) {
+      JXL_DASSERT(i >= pivot_end);
+      JXL_DASSERT(pivot_end > pivot_begin);
+      int32_t cmp_result = cmp(i, pivot_begin);
+      if (cmp_result < 0) {  // i < pivot, move pivot forward and put i before
+                             // the pivot.
+        tree_samples.ThreeShuffle(pivot_begin, pivot_end, i);
+        pivot_begin++;
+        pivot_end++;
+      } else if (cmp_result == 0) {
+        tree_samples.Swap(pivot_end, i);
+        pivot_end++;
+      }
+    }
+    JXL_DASSERT(pivot_begin >= begin);
+    JXL_DASSERT(pivot_end > pivot_begin);
+    JXL_DASSERT(pivot_end <= end);
+    for (size_t i = begin; i < pivot_begin; i++) {
+      JXL_DASSERT(cmp(i, pivot_begin) < 0);
+    }
+    for (size_t i = pivot_end; i < end; i++) {
+      JXL_DASSERT(cmp(i, pivot_begin) > 0);
+    }
+    for (size_t i = pivot_begin; i < pivot_end; i++) {
+      JXL_DASSERT(cmp(i, pivot_begin) == 0);
+    }
+    // We now have that [begin, pivot_begin) is < pivot, [pivot_begin,
+    // pivot_end) is = pivot, and [pivot_end, end) is > pivot.
+    // If pos falls in the first or the last interval, we continue in that
+    // interval; otherwise, we are done.
+    if (pivot_begin > pos) {
+      end = pivot_begin;
+    } else if (pivot_end < pos) {
+      begin = pivot_end;
+    } else {
+      break;
+    }
+  }
+}
+
+void FindBestSplit(TreeSamples &tree_samples, float threshold,
+                   const std::vector<ModularMultiplierInfo> &mul_info,
+                   StaticPropRange initial_static_prop_range,
+                   float fast_decode_multiplier, Tree *tree) {
+  struct NodeInfo {
+    size_t pos;
+    size_t begin;
+    size_t end;
+    uint64_t used_properties;
+    StaticPropRange static_prop_range;
+  };
+  std::vector<NodeInfo> nodes;
+  nodes.push_back(NodeInfo{0, 0, tree_samples.NumDistinctSamples(), 0,
+                           initial_static_prop_range});
+
+  size_t num_predictors = tree_samples.NumPredictors();
+  size_t num_properties = tree_samples.NumProperties();
+
+  // TODO(veluca): consider parallelizing the search (processing multiple nodes
+  // at a time).
+  while (!nodes.empty()) {
+    size_t pos = nodes.back().pos;
+    size_t begin = nodes.back().begin;
+    size_t end = nodes.back().end;
+    uint64_t used_properties = nodes.back().used_properties;
+    StaticPropRange static_prop_range = nodes.back().static_prop_range;
+    nodes.pop_back();
+    if (begin == end) continue;
+
+    struct SplitInfo {
+      size_t prop = 0;
+      uint32_t val = 0;
+      size_t pos = 0;
+      float lcost = std::numeric_limits<float>::max();
+      float rcost = std::numeric_limits<float>::max();
+      Predictor lpred = Predictor::Zero;
+      Predictor rpred = Predictor::Zero;
+      float Cost() { return lcost + rcost; }
+    };
+
+    SplitInfo best_split_static_constant;
+    SplitInfo best_split_static;
+    SplitInfo best_split_nonstatic;
+    SplitInfo best_split_nowp;
+
+    JXL_DASSERT(begin <= end);
+    JXL_DASSERT(end <= tree_samples.NumDistinctSamples());
+
+    // Compute the maximum token in the range.
+    size_t max_symbols = 0;
+    for (size_t pred = 0; pred < num_predictors; pred++) {
+      for (size_t i = begin; i < end; i++) {
+        uint32_t tok = tree_samples.Token(pred, i);
+        max_symbols = max_symbols > tok + 1 ? max_symbols : tok + 1;
+      }
+    }
+    max_symbols = Padded(max_symbols);
+    std::vector<int32_t> rounded_counts(max_symbols);
+    std::vector<int32_t> counts(max_symbols * num_predictors);
+    std::vector<uint32_t> tot_extra_bits(num_predictors);
+    for (size_t pred = 0; pred < num_predictors; pred++) {
+      for (size_t i = begin; i < end; i++) {
+        counts[pred * max_symbols + tree_samples.Token(pred, i)] +=
+            tree_samples.Count(i);
+        tot_extra_bits[pred] +=
+            tree_samples.NBits(pred, i) * tree_samples.Count(i);
+      }
+    }
+
+    float base_bits;
+    {
+      size_t pred = tree_samples.PredictorIndex((*tree)[pos].predictor);
+      base_bits = EstimateBits(counts.data() + pred * max_symbols,
+                               rounded_counts.data(), max_symbols) +
+                  tot_extra_bits[pred];
+    }
+
+    SplitInfo *best = &best_split_nonstatic;
+
+    SplitInfo forced_split;
+    // The multiplier ranges cut halfway through the current ranges of static
+    // properties. We do this even if the current node is not a leaf, to
+    // minimize the number of nodes in the resulting tree.
+    for (size_t i = 0; i < mul_info.size(); i++) {
+      uint32_t axis, val;
+      IntersectionType t =
+          BoxIntersects(static_prop_range, mul_info[i].range, axis, val);
+      if (t == IntersectionType::kNone) continue;
+      if (t == IntersectionType::kInside) {
+        (*tree)[pos].multiplier = mul_info[i].multiplier;
+        break;
+      }
+      if (t == IntersectionType::kPartial) {
+        forced_split.val = tree_samples.QuantizeProperty(axis, val);
+        forced_split.prop = axis;
+        forced_split.lcost = forced_split.rcost = base_bits / 2 - threshold;
+        forced_split.lpred = forced_split.rpred = (*tree)[pos].predictor;
+        best = &forced_split;
+        best->pos = begin;
+        JXL_ASSERT(best->prop == tree_samples.PropertyFromIndex(best->prop));
+        for (size_t x = begin; x < end; x++) {
+          if (tree_samples.Property(best->prop, x) <= best->val) {
+            best->pos++;
+          }
+        }
+        break;
+      }
+    }
+
+    if (best != &forced_split) {
+      std::vector<int> prop_value_used_count;
+      std::vector<int> count_increase;
+      std::vector<size_t> extra_bits_increase;
+      // For each property, compute which of its values are used, and what
+      // tokens correspond to those usages. Then, iterate through the values,
+      // and compute the entropy of each side of the split (of the form `prop >
+      // threshold`). Finally, find the split that minimizes the cost.
+      struct CostInfo {
+        float cost = std::numeric_limits<float>::max();
+        float extra_cost = 0;
+        float Cost() const { return cost + extra_cost; }
+        Predictor pred;  // will be uninitialized in some cases, but never used.
+      };
+      std::vector<CostInfo> costs_l;
+      std::vector<CostInfo> costs_r;
+
+      std::vector<int32_t> counts_above(max_symbols);
+      std::vector<int32_t> counts_below(max_symbols);
+
+      // The lower the threshold, the higher the expected noisiness of the
+      // estimate. Thus, discourage changing predictors.
+      float change_pred_penalty = 800.0f / (100.0f + threshold);
+      for (size_t prop = 0; prop < num_properties && base_bits > threshold;
+           prop++) {
+        costs_l.clear();
+        costs_r.clear();
+        size_t prop_size = tree_samples.NumPropertyValues(prop);
+        if (extra_bits_increase.size() < prop_size) {
+          count_increase.resize(prop_size * max_symbols);
+          extra_bits_increase.resize(prop_size);
+        }
+        // Clear prop_value_used_count (which cannot be cleared "on the go")
+        prop_value_used_count.clear();
+        prop_value_used_count.resize(prop_size);
+
+        size_t first_used = prop_size;
+        size_t last_used = 0;
+
+        // TODO(veluca): consider finding multiple splits along a single
+        // property at the same time, possibly with a bottom-up approach.
+        for (size_t i = begin; i < end; i++) {
+          size_t p = tree_samples.Property(prop, i);
+          prop_value_used_count[p]++;
+          last_used = std::max(last_used, p);
+          first_used = std::min(first_used, p);
+        }
+        costs_l.resize(last_used - first_used);
+        costs_r.resize(last_used - first_used);
+        // For all predictors, compute the right and left costs of each split.
+        for (size_t pred = 0; pred < num_predictors; pred++) {
+          // Compute cost and histogram increments for each property value.
+          for (size_t i = begin; i < end; i++) {
+            size_t p = tree_samples.Property(prop, i);
+            size_t cnt = tree_samples.Count(i);
+            size_t sym = tree_samples.Token(pred, i);
+            count_increase[p * max_symbols + sym] += cnt;
+            extra_bits_increase[p] += tree_samples.NBits(pred, i) * cnt;
+          }
+          memcpy(counts_above.data(), counts.data() + pred * max_symbols,
+                 max_symbols * sizeof counts_above[0]);
+          memset(counts_below.data(), 0, max_symbols * sizeof counts_below[0]);
+          size_t extra_bits_below = 0;
+          // Exclude last used: this ensures neither counts_above nor
+          // counts_below is empty.
+          for (size_t i = first_used; i < last_used; i++) {
+            if (!prop_value_used_count[i]) continue;
+            extra_bits_below += extra_bits_increase[i];
+            // The increase for this property value has been used, and will not
+            // be used again: clear it. Also below.
+            extra_bits_increase[i] = 0;
+            for (size_t sym = 0; sym < max_symbols; sym++) {
+              counts_above[sym] -= count_increase[i * max_symbols + sym];
+              counts_below[sym] += count_increase[i * max_symbols + sym];
+              count_increase[i * max_symbols + sym] = 0;
+            }
+            float rcost = EstimateBits(counts_above.data(),
+                                       rounded_counts.data(), max_symbols) +
+                          tot_extra_bits[pred] - extra_bits_below;
+            float lcost = EstimateBits(counts_below.data(),
+                                       rounded_counts.data(), max_symbols) +
+                          extra_bits_below;
+            JXL_DASSERT(extra_bits_below <= tot_extra_bits[pred]);
+            float penalty = 0;
+            // Never discourage moving away from the Weighted predictor.
+            if (tree_samples.PredictorFromIndex(pred) !=
+                    (*tree)[pos].predictor &&
+                (*tree)[pos].predictor != Predictor::Weighted) {
+              penalty = change_pred_penalty;
+            }
+            // If everything else is equal, disfavour Weighted (slower) and
+            // favour Zero (faster if it's the only predictor used in a
+            // group+channel combination)
+            if (tree_samples.PredictorFromIndex(pred) == Predictor::Weighted) {
+              penalty += 1e-8;
+            }
+            if (tree_samples.PredictorFromIndex(pred) == Predictor::Zero) {
+              penalty -= 1e-8;
+            }
+            if (rcost + penalty < costs_r[i - first_used].Cost()) {
+              costs_r[i - first_used].cost = rcost;
+              costs_r[i - first_used].extra_cost = penalty;
+              costs_r[i - first_used].pred =
+                  tree_samples.PredictorFromIndex(pred);
+            }
+            if (lcost + penalty < costs_l[i - first_used].Cost()) {
+              costs_l[i - first_used].cost = lcost;
+              costs_l[i - first_used].extra_cost = penalty;
+              costs_l[i - first_used].pred =
+                  tree_samples.PredictorFromIndex(pred);
+            }
+          }
+        }
+        // Iterate through the possible splits and find the one with minimum sum
+        // of costs of the two sides.
+        size_t split = begin;
+        for (size_t i = first_used; i < last_used; i++) {
+          if (!prop_value_used_count[i]) continue;
+          split += prop_value_used_count[i];
+          float rcost = costs_r[i - first_used].cost;
+          float lcost = costs_l[i - first_used].cost;
+          // WP was not used + we would use the WP property or predictor
+          bool adds_wp =
+              (tree_samples.PropertyFromIndex(prop) == kWPProp &&
+               (used_properties & (1LU << prop)) == 0) ||
+              ((costs_l[i - first_used].pred == Predictor::Weighted ||
+                costs_r[i - first_used].pred == Predictor::Weighted) &&
+               (*tree)[pos].predictor != Predictor::Weighted);
+          bool zero_entropy_side = rcost == 0 || lcost == 0;
+
+          SplitInfo &best =
+              prop < kNumStaticProperties
+                  ? (zero_entropy_side ? best_split_static_constant
+                                       : best_split_static)
+                  : (adds_wp ? best_split_nonstatic : best_split_nowp);
+          if (lcost + rcost < best.Cost()) {
+            best.prop = prop;
+            best.val = i;
+            best.pos = split;
+            best.lcost = lcost;
+            best.lpred = costs_l[i - first_used].pred;
+            best.rcost = rcost;
+            best.rpred = costs_r[i - first_used].pred;
+          }
+        }
+        // Clear extra_bits_increase and cost_increase for last_used.
+        extra_bits_increase[last_used] = 0;
+        for (size_t sym = 0; sym < max_symbols; sym++) {
+          count_increase[last_used * max_symbols + sym] = 0;
+        }
+      }
+
+      // Try to avoid introducing WP.
+      if (best_split_nowp.Cost() + threshold < base_bits &&
+          best_split_nowp.Cost() <= fast_decode_multiplier * best->Cost()) {
+        best = &best_split_nowp;
+      }
+      // Split along static props if possible and not significantly more
+      // expensive.
+      if (best_split_static.Cost() + threshold < base_bits &&
+          best_split_static.Cost() <= fast_decode_multiplier * best->Cost()) {
+        best = &best_split_static;
+      }
+      // Split along static props to create constant nodes if possible.
+      if (best_split_static_constant.Cost() + threshold < base_bits) {
+        best = &best_split_static_constant;
+      }
+    }
+
+    if (best->Cost() + threshold < base_bits) {
+      uint32_t p = tree_samples.PropertyFromIndex(best->prop);
+      pixel_type dequant =
+          tree_samples.UnquantizeProperty(best->prop, best->val);
+      // Split node and try to split children.
+      MakeSplitNode(pos, p, dequant, best->lpred, 0, best->rpred, 0, tree);
+      // "Sort" according to winning property
+      SplitTreeSamples(tree_samples, begin, best->pos, end, best->prop);
+      if (p >= kNumStaticProperties) {
+        used_properties |= 1 << best->prop;
+      }
+      auto new_sp_range = static_prop_range;
+      if (p < kNumStaticProperties) {
+        JXL_ASSERT(static_cast<uint32_t>(dequant + 1) <= new_sp_range[p][1]);
+        new_sp_range[p][1] = dequant + 1;
+        JXL_ASSERT(new_sp_range[p][0] < new_sp_range[p][1]);
+      }
+      nodes.push_back(NodeInfo{(*tree)[pos].rchild, begin, best->pos,
+                               used_properties, new_sp_range});
+      new_sp_range = static_prop_range;
+      if (p < kNumStaticProperties) {
+        JXL_ASSERT(new_sp_range[p][0] <= static_cast<uint32_t>(dequant + 1));
+        new_sp_range[p][0] = dequant + 1;
+        JXL_ASSERT(new_sp_range[p][0] < new_sp_range[p][1]);
+      }
+      nodes.push_back(NodeInfo{(*tree)[pos].lchild, best->pos, end,
+                               used_properties, new_sp_range});
+    }
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(FindBestSplit);  // Local function.
+
+void ComputeBestTree(TreeSamples &tree_samples, float threshold,
+                     const std::vector<ModularMultiplierInfo> &mul_info,
+                     StaticPropRange static_prop_range,
+                     float fast_decode_multiplier, Tree *tree) {
+  // TODO(veluca): take into account that different contexts can have different
+  // uint configs.
+  //
+  // Initialize tree.
+  tree->emplace_back();
+  tree->back().property = -1;
+  tree->back().predictor = tree_samples.PredictorFromIndex(0);
+  tree->back().predictor_offset = 0;
+  tree->back().multiplier = 1;
+  JXL_ASSERT(tree_samples.NumProperties() < 64);
+
+  JXL_ASSERT(tree_samples.NumDistinctSamples() <=
+             std::numeric_limits<uint32_t>::max());
+  HWY_DYNAMIC_DISPATCH(FindBestSplit)
+  (tree_samples, threshold, mul_info, static_prop_range, fast_decode_multiplier,
+   tree);
+}
+
+constexpr int TreeSamples::kPropertyRange;
+constexpr uint32_t TreeSamples::kDedupEntryUnused;
+
+Status TreeSamples::SetPredictor(Predictor predictor,
+                                 ModularOptions::TreeMode wp_tree_mode) {
+  if (wp_tree_mode == ModularOptions::TreeMode::kWPOnly) {
+    predictors = {Predictor::Weighted};
+    residuals.resize(1);
+    return true;
+  }
+  if (wp_tree_mode == ModularOptions::TreeMode::kNoWP &&
+      predictor == Predictor::Weighted) {
+    return JXL_FAILURE("Invalid predictor settings");
+  }
+  if (predictor == Predictor::Variable) {
+    for (size_t i = 0; i < kNumModularPredictors; i++) {
+      predictors.push_back(static_cast<Predictor>(i));
+    }
+    std::swap(predictors[0], predictors[static_cast<int>(Predictor::Weighted)]);
+    std::swap(predictors[1], predictors[static_cast<int>(Predictor::Gradient)]);
+  } else if (predictor == Predictor::Best) {
+    predictors = {Predictor::Weighted, Predictor::Gradient};
+  } else {
+    predictors = {predictor};
+  }
+  if (wp_tree_mode == ModularOptions::TreeMode::kNoWP) {
+    auto wp_it =
+        std::find(predictors.begin(), predictors.end(), Predictor::Weighted);
+    if (wp_it != predictors.end()) {
+      predictors.erase(wp_it);
+    }
+  }
+  residuals.resize(predictors.size());
+  return true;
+}
+
+Status TreeSamples::SetProperties(const std::vector<uint32_t> &properties,
+                                  ModularOptions::TreeMode wp_tree_mode) {
+  props_to_use = properties;
+  if (wp_tree_mode == ModularOptions::TreeMode::kWPOnly) {
+    props_to_use = {static_cast<uint32_t>(kWPProp)};
+  }
+  if (wp_tree_mode == ModularOptions::TreeMode::kGradientOnly) {
+    props_to_use = {static_cast<uint32_t>(kGradientProp)};
+  }
+  if (wp_tree_mode == ModularOptions::TreeMode::kNoWP) {
+    auto it = std::find(props_to_use.begin(), props_to_use.end(), kWPProp);
+    if (it != props_to_use.end()) {
+      props_to_use.erase(it);
+    }
+  }
+  if (props_to_use.empty()) {
+    return JXL_FAILURE("Invalid property set configuration");
+  }
+  props.resize(props_to_use.size());
+  return true;
+}
+
+void TreeSamples::InitTable(size_t size) {
+  JXL_DASSERT((size & (size - 1)) == 0);
+  if (dedup_table_.size() == size) return;
+  dedup_table_.resize(size, kDedupEntryUnused);
+  for (size_t i = 0; i < NumDistinctSamples(); i++) {
+    if (sample_counts[i] != std::numeric_limits<uint16_t>::max()) {
+      AddToTable(i);
+    }
+  }
+}
+
+bool TreeSamples::AddToTableAndMerge(size_t a) {
+  size_t pos1 = Hash1(a);
+  size_t pos2 = Hash2(a);
+  if (dedup_table_[pos1] != kDedupEntryUnused &&
+      IsSameSample(a, dedup_table_[pos1])) {
+    JXL_DASSERT(sample_counts[a] == 1);
+    sample_counts[dedup_table_[pos1]]++;
+    // Remove from hash table samples that are saturated.
+    if (sample_counts[dedup_table_[pos1]] ==
+        std::numeric_limits<uint16_t>::max()) {
+      dedup_table_[pos1] = kDedupEntryUnused;
+    }
+    return true;
+  }
+  if (dedup_table_[pos2] != kDedupEntryUnused &&
+      IsSameSample(a, dedup_table_[pos2])) {
+    JXL_DASSERT(sample_counts[a] == 1);
+    sample_counts[dedup_table_[pos2]]++;
+    // Remove from hash table samples that are saturated.
+    if (sample_counts[dedup_table_[pos2]] ==
+        std::numeric_limits<uint16_t>::max()) {
+      dedup_table_[pos2] = kDedupEntryUnused;
+    }
+    return true;
+  }
+  AddToTable(a);
+  return false;
+}
+
+void TreeSamples::AddToTable(size_t a) {
+  size_t pos1 = Hash1(a);
+  size_t pos2 = Hash2(a);
+  if (dedup_table_[pos1] == kDedupEntryUnused) {
+    dedup_table_[pos1] = a;
+  } else if (dedup_table_[pos2] == kDedupEntryUnused) {
+    dedup_table_[pos2] = a;
+  }
+}
+
+void TreeSamples::PrepareForSamples(size_t num_samples) {
+  for (auto &res : residuals) {
+    res.reserve(res.size() + num_samples);
+  }
+  for (auto &p : props) {
+    p.reserve(p.size() + num_samples);
+  }
+  size_t total_num_samples = num_samples + sample_counts.size();
+  size_t next_pow2 = 1LLU << CeilLog2Nonzero(total_num_samples * 3 / 2);
+  InitTable(next_pow2);
+}
+
+size_t TreeSamples::Hash1(size_t a) const {
+  constexpr uint64_t constant = 0x1e35a7bd;
+  uint64_t h = constant;
+  for (const auto &r : residuals) {
+    h = h * constant + r[a].tok;
+    h = h * constant + r[a].nbits;
+  }
+  for (const auto &p : props) {
+    h = h * constant + p[a];
+  }
+  return (h >> 16) & (dedup_table_.size() - 1);
+}
+size_t TreeSamples::Hash2(size_t a) const {
+  constexpr uint64_t constant = 0x1e35a7bd1e35a7bd;
+  uint64_t h = constant;
+  for (const auto &p : props) {
+    h = h * constant ^ p[a];
+  }
+  for (const auto &r : residuals) {
+    h = h * constant ^ r[a].tok;
+    h = h * constant ^ r[a].nbits;
+  }
+  return (h >> 16) & (dedup_table_.size() - 1);
+}
+
+bool TreeSamples::IsSameSample(size_t a, size_t b) const {
+  bool ret = true;
+  for (const auto &r : residuals) {
+    if (r[a].tok != r[b].tok) {
+      ret = false;
+    }
+    if (r[a].nbits != r[b].nbits) {
+      ret = false;
+    }
+  }
+  for (const auto &p : props) {
+    if (p[a] != p[b]) {
+      ret = false;
+    }
+  }
+  return ret;
+}
+
+void TreeSamples::AddSample(pixel_type_w pixel, const Properties &properties,
+                            const pixel_type_w *predictions) {
+  for (size_t i = 0; i < predictors.size(); i++) {
+    pixel_type v = pixel - predictions[static_cast<int>(predictors[i])];
+    uint32_t tok, nbits, bits;
+    HybridUintConfig(4, 1, 2).Encode(PackSigned(v), &tok, &nbits, &bits);
+    JXL_DASSERT(tok < 256);
+    JXL_DASSERT(nbits < 256);
+    residuals[i].emplace_back(
+        ResidualToken{static_cast<uint8_t>(tok), static_cast<uint8_t>(nbits)});
+  }
+  for (size_t i = 0; i < props_to_use.size(); i++) {
+    props[i].push_back(QuantizeProperty(i, properties[props_to_use[i]]));
+  }
+  sample_counts.push_back(1);
+  num_samples++;
+  if (AddToTableAndMerge(sample_counts.size() - 1)) {
+    for (auto &r : residuals) r.pop_back();
+    for (auto &p : props) p.pop_back();
+    sample_counts.pop_back();
+  }
+}
+
+void TreeSamples::Swap(size_t a, size_t b) {
+  if (a == b) return;
+  for (auto &r : residuals) {
+    std::swap(r[a], r[b]);
+  }
+  for (auto &p : props) {
+    std::swap(p[a], p[b]);
+  }
+  std::swap(sample_counts[a], sample_counts[b]);
+}
+
+void TreeSamples::ThreeShuffle(size_t a, size_t b, size_t c) {
+  if (b == c) return Swap(a, b);
+  for (auto &r : residuals) {
+    auto tmp = r[a];
+    r[a] = r[c];
+    r[c] = r[b];
+    r[b] = tmp;
+  }
+  for (auto &p : props) {
+    auto tmp = p[a];
+    p[a] = p[c];
+    p[c] = p[b];
+    p[b] = tmp;
+  }
+  auto tmp = sample_counts[a];
+  sample_counts[a] = sample_counts[c];
+  sample_counts[c] = sample_counts[b];
+  sample_counts[b] = tmp;
+}
+
+namespace {
+std::vector<int> QuantizeHistogram(const std::vector<uint32_t> &histogram,
+                                   size_t num_chunks) {
+  if (histogram.empty()) return {};
+  // TODO(veluca): selecting distinct quantiles is likely not the best
+  // way to go about this.
+  std::vector<int> thresholds;
+  size_t sum = std::accumulate(histogram.begin(), histogram.end(), 0LU);
+  size_t cumsum = 0;
+  size_t threshold = 0;
+  for (size_t i = 0; i + 1 < histogram.size(); i++) {
+    cumsum += histogram[i];
+    if (cumsum > (threshold + 1) * sum / num_chunks) {
+      thresholds.push_back(i);
+      while (cumsum >= (threshold + 1) * sum / num_chunks) threshold++;
+    }
+  }
+  return thresholds;
+}
+
+std::vector<int> QuantizeSamples(const std::vector<int32_t> &samples,
+                                 size_t num_chunks) {
+  if (samples.empty()) return {};
+  int min = *std::min_element(samples.begin(), samples.end());
+  constexpr int kRange = 512;
+  min = std::min(std::max(min, -kRange), kRange);
+  std::vector<uint32_t> counts(2 * kRange + 1);
+  for (int s : samples) {
+    uint32_t sample_offset = std::min(std::max(s, -kRange), kRange) - min;
+    counts[sample_offset]++;
+  }
+  std::vector<int> thresholds = QuantizeHistogram(counts, num_chunks);
+  for (auto &v : thresholds) v += min;
+  return thresholds;
+}
+}  // namespace
+
+void TreeSamples::PreQuantizeProperties(
+    const StaticPropRange &range,
+    const std::vector<ModularMultiplierInfo> &multiplier_info,
+    const std::vector<uint32_t> &group_pixel_count,
+    const std::vector<uint32_t> &channel_pixel_count,
+    std::vector<pixel_type> &pixel_samples,
+    std::vector<pixel_type> &diff_samples, size_t max_property_values) {
+  // If we have forced splits because of multipliers, choose channel and group
+  // thresholds accordingly.
+  std::vector<int32_t> group_multiplier_thresholds;
+  std::vector<int32_t> channel_multiplier_thresholds;
+  for (const auto &v : multiplier_info) {
+    if (v.range[0][0] != range[0][0]) {
+      channel_multiplier_thresholds.push_back(v.range[0][0] - 1);
+    }
+    if (v.range[0][1] != range[0][1]) {
+      channel_multiplier_thresholds.push_back(v.range[0][1] - 1);
+    }
+    if (v.range[1][0] != range[1][0]) {
+      group_multiplier_thresholds.push_back(v.range[1][0] - 1);
+    }
+    if (v.range[1][1] != range[1][1]) {
+      group_multiplier_thresholds.push_back(v.range[1][1] - 1);
+    }
+  }
+  std::sort(channel_multiplier_thresholds.begin(),
+            channel_multiplier_thresholds.end());
+  channel_multiplier_thresholds.resize(
+      std::unique(channel_multiplier_thresholds.begin(),
+                  channel_multiplier_thresholds.end()) -
+      channel_multiplier_thresholds.begin());
+  std::sort(group_multiplier_thresholds.begin(),
+            group_multiplier_thresholds.end());
+  group_multiplier_thresholds.resize(
+      std::unique(group_multiplier_thresholds.begin(),
+                  group_multiplier_thresholds.end()) -
+      group_multiplier_thresholds.begin());
+
+  compact_properties.resize(props_to_use.size());
+  auto quantize_channel = [&]() {
+    if (!channel_multiplier_thresholds.empty()) {
+      return channel_multiplier_thresholds;
+    }
+    return QuantizeHistogram(channel_pixel_count, max_property_values);
+  };
+  auto quantize_group_id = [&]() {
+    if (!group_multiplier_thresholds.empty()) {
+      return group_multiplier_thresholds;
+    }
+    return QuantizeHistogram(group_pixel_count, max_property_values);
+  };
+  auto quantize_coordinate = [&]() {
+    std::vector<int> quantized;
+    quantized.reserve(max_property_values - 1);
+    for (size_t i = 0; i + 1 < max_property_values; i++) {
+      quantized.push_back((i + 1) * 256 / max_property_values - 1);
+    }
+    return quantized;
+  };
+  std::vector<int> abs_pixel_thr;
+  std::vector<int> pixel_thr;
+  auto quantize_pixel_property = [&]() {
+    if (pixel_thr.empty()) {
+      pixel_thr = QuantizeSamples(pixel_samples, max_property_values);
+    }
+    return pixel_thr;
+  };
+  auto quantize_abs_pixel_property = [&]() {
+    if (abs_pixel_thr.empty()) {
+      quantize_pixel_property();  // Compute the non-abs thresholds.
+      for (auto &v : pixel_samples) v = std::abs(v);
+      abs_pixel_thr = QuantizeSamples(pixel_samples, max_property_values);
+    }
+    return abs_pixel_thr;
+  };
+  std::vector<int> abs_diff_thr;
+  std::vector<int> diff_thr;
+  auto quantize_diff_property = [&]() {
+    if (diff_thr.empty()) {
+      diff_thr = QuantizeSamples(diff_samples, max_property_values);
+    }
+    return diff_thr;
+  };
+  auto quantize_abs_diff_property = [&]() {
+    if (abs_diff_thr.empty()) {
+      quantize_diff_property();  // Compute the non-abs thresholds.
+      for (auto &v : diff_samples) v = std::abs(v);
+      abs_diff_thr = QuantizeSamples(diff_samples, max_property_values);
+    }
+    return abs_diff_thr;
+  };
+  auto quantize_wp = [&]() {
+    if (max_property_values < 32) {
+      return std::vector<int>{-127, -63, -31, -15, -7, -3, -1, 0,
+                              1,    3,   7,   15,  31, 63, 127};
+    }
+    if (max_property_values < 64) {
+      return std::vector<int>{-255, -191, -127, -95, -63, -47, -31, -23,
+                              -15,  -11,  -7,   -5,  -3,  -1,  0,   1,
+                              3,    5,    7,    11,  15,  23,  31,  47,
+                              63,   95,   127,  191, 255};
+    }
+    return std::vector<int>{
+        -255, -223, -191, -159, -127, -111, -95, -79, -63, -55, -47,
+        -39,  -31,  -27,  -23,  -19,  -15,  -13, -11, -9,  -7,  -6,
+        -5,   -4,   -3,   -2,   -1,   0,    1,   2,   3,   4,   5,
+        6,    7,    9,    11,   13,   15,   19,  23,  27,  31,  39,
+        47,   55,   63,   79,   95,   111,  127, 159, 191, 223, 255};
+  };
+
+  property_mapping.resize(props_to_use.size());
+  for (size_t i = 0; i < props_to_use.size(); i++) {
+    if (props_to_use[i] == 0) {
+      compact_properties[i] = quantize_channel();
+    } else if (props_to_use[i] == 1) {
+      compact_properties[i] = quantize_group_id();
+    } else if (props_to_use[i] == 2 || props_to_use[i] == 3) {
+      compact_properties[i] = quantize_coordinate();
+    } else if (props_to_use[i] == 6 || props_to_use[i] == 7 ||
+               props_to_use[i] == 8 ||
+               (props_to_use[i] >= kNumNonrefProperties &&
+                (props_to_use[i] - kNumNonrefProperties) % 4 == 1)) {
+      compact_properties[i] = quantize_pixel_property();
+    } else if (props_to_use[i] == 4 || props_to_use[i] == 5 ||
+               (props_to_use[i] >= kNumNonrefProperties &&
+                (props_to_use[i] - kNumNonrefProperties) % 4 == 0)) {
+      compact_properties[i] = quantize_abs_pixel_property();
+    } else if (props_to_use[i] >= kNumNonrefProperties &&
+               (props_to_use[i] - kNumNonrefProperties) % 4 == 2) {
+      compact_properties[i] = quantize_abs_diff_property();
+    } else if (props_to_use[i] == kWPProp) {
+      compact_properties[i] = quantize_wp();
+    } else {
+      compact_properties[i] = quantize_diff_property();
+    }
+    property_mapping[i].resize(kPropertyRange * 2 + 1);
+    size_t mapped = 0;
+    for (size_t j = 0; j < property_mapping[i].size(); j++) {
+      while (mapped < compact_properties[i].size() &&
+             static_cast<int>(j) - kPropertyRange >
+                 compact_properties[i][mapped]) {
+        mapped++;
+      }
+      // property_mapping[i] of a value V is `mapped` if
+      // compact_properties[i][mapped] <= j and
+      // compact_properties[i][mapped-1] > j
+      // This is because the decision node in the tree splits on (property) > j,
+      // hence everything that is not > of a threshold should be clustered
+      // together.
+      property_mapping[i][j] = mapped;
+    }
+  }
+}
+
+void CollectPixelSamples(const Image &image, const ModularOptions &options,
+                         size_t group_id,
+                         std::vector<uint32_t> &group_pixel_count,
+                         std::vector<uint32_t> &channel_pixel_count,
+                         std::vector<pixel_type> &pixel_samples,
+                         std::vector<pixel_type> &diff_samples) {
+  if (options.nb_repeats == 0) return;
+  if (group_pixel_count.size() <= group_id) {
+    group_pixel_count.resize(group_id + 1);
+  }
+  if (channel_pixel_count.size() < image.channel.size()) {
+    channel_pixel_count.resize(image.channel.size());
+  }
+  Rng rng(group_id);
+  // Sample 10% of the final number of samples for property quantization.
+  float fraction = options.nb_repeats * 0.1;
+  std::geometric_distribution<uint32_t> dist(fraction);
+  size_t total_pixels = 0;
+  std::vector<size_t> channel_ids;
+  for (size_t i = 0; i < image.channel.size(); i++) {
+    if (image.channel[i].w <= 1 || image.channel[i].h == 0) {
+      continue;  // skip empty or width-1 channels.
+    }
+    if (i >= image.nb_meta_channels &&
+        (image.channel[i].w > options.max_chan_size ||
+         image.channel[i].h > options.max_chan_size)) {
+      break;
+    }
+    channel_ids.push_back(i);
+    group_pixel_count[group_id] += image.channel[i].w * image.channel[i].h;
+    channel_pixel_count[i] += image.channel[i].w * image.channel[i].h;
+    total_pixels += image.channel[i].w * image.channel[i].h;
+  }
+  if (channel_ids.empty()) return;
+  pixel_samples.reserve(pixel_samples.size() + fraction * total_pixels);
+  diff_samples.reserve(diff_samples.size() + fraction * total_pixels);
+  size_t i = 0;
+  size_t y = 0;
+  size_t x = 0;
+  auto advance = [&](size_t amount) {
+    x += amount;
+    // Detect row overflow (rare).
+    while (x >= image.channel[channel_ids[i]].w) {
+      x -= image.channel[channel_ids[i]].w;
+      y++;
+      // Detect end-of-channel (even rarer).
+      if (y == image.channel[channel_ids[i]].h) {
+        i++;
+        y = 0;
+        if (i >= channel_ids.size()) {
+          return;
+        }
+      }
+    }
+  };
+  advance(dist(rng));
+  for (; i < channel_ids.size(); advance(dist(rng) + 1)) {
+    const pixel_type *row = image.channel[channel_ids[i]].Row(y);
+    pixel_samples.push_back(row[x]);
+    size_t xp = x == 0 ? 1 : x - 1;
+    diff_samples.push_back(row[x] - row[xp]);
+  }
+}
+
+// TODO(veluca): very simple encoding scheme. This should be improved.
+void TokenizeTree(const Tree &tree, std::vector<Token> *tokens,
+                  Tree *decoder_tree) {
+  JXL_ASSERT(tree.size() <= kMaxTreeSize);
+  std::queue<int> q;
+  q.push(0);
+  size_t leaf_id = 0;
+  decoder_tree->clear();
+  while (!q.empty()) {
+    int cur = q.front();
+    q.pop();
+    JXL_ASSERT(tree[cur].property >= -1);
+    tokens->emplace_back(kPropertyContext, tree[cur].property + 1);
+    if (tree[cur].property == -1) {
+      tokens->emplace_back(kPredictorContext,
+                           static_cast<int>(tree[cur].predictor));
+      tokens->emplace_back(kOffsetContext,
+                           PackSigned(tree[cur].predictor_offset));
+      uint32_t mul_log = Num0BitsBelowLS1Bit_Nonzero(tree[cur].multiplier);
+      uint32_t mul_bits = (tree[cur].multiplier >> mul_log) - 1;
+      tokens->emplace_back(kMultiplierLogContext, mul_log);
+      tokens->emplace_back(kMultiplierBitsContext, mul_bits);
+      JXL_ASSERT(tree[cur].predictor < Predictor::Best);
+      decoder_tree->emplace_back(-1, 0, leaf_id++, 0, tree[cur].predictor,
+                                 tree[cur].predictor_offset,
+                                 tree[cur].multiplier);
+      continue;
+    }
+    decoder_tree->emplace_back(tree[cur].property, tree[cur].splitval,
+                               decoder_tree->size() + q.size() + 1,
+                               decoder_tree->size() + q.size() + 2,
+                               Predictor::Zero, 0, 1);
+    q.push(tree[cur].lchild);
+    q.push(tree[cur].rchild);
+    tokens->emplace_back(kSplitValContext, PackSigned(tree[cur].splitval));
+  }
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.h
new file mode 100644
index 0000000000..d0a90cc952
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/enc_ma.h
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENC_MA_H_
+#define LIB_JXL_MODULAR_ENCODING_ENC_MA_H_
+
+#include <numeric>
+
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+// Struct to collect all the data needed to build a tree.
+struct TreeSamples {
+  bool HasSamples() const {
+    return !residuals.empty() && !residuals[0].empty();
+  }
+  size_t NumDistinctSamples() const { return sample_counts.size(); }
+  size_t NumSamples() const { return num_samples; }
+  // Set the predictor to use. Must be called before adding any samples.
+  Status SetPredictor(Predictor predictor,
+                      ModularOptions::TreeMode wp_tree_mode);
+  // Set the properties to use. Must be called before adding any samples.
+  Status SetProperties(const std::vector<uint32_t> &properties,
+                       ModularOptions::TreeMode wp_tree_mode);
+
+  size_t Token(size_t pred, size_t i) const { return residuals[pred][i].tok; }
+  size_t NBits(size_t pred, size_t i) const { return residuals[pred][i].nbits; }
+  size_t Count(size_t i) const { return sample_counts[i]; }
+  size_t PredictorIndex(Predictor predictor) const {
+    const auto predictor_elem =
+        std::find(predictors.begin(), predictors.end(), predictor);
+    JXL_DASSERT(predictor_elem != predictors.end());
+    return predictor_elem - predictors.begin();
+  }
+  size_t PropertyIndex(size_t property) const {
+    const auto property_elem =
+        std::find(props_to_use.begin(), props_to_use.end(), property);
+    JXL_DASSERT(property_elem != props_to_use.end());
+    return property_elem - props_to_use.begin();
+  }
+  size_t NumPropertyValues(size_t property_index) const {
+    return compact_properties[property_index].size() + 1;
+  }
+  // Returns the *quantized* property value.
+  size_t Property(size_t property_index, size_t i) const {
+    return props[property_index][i];
+  }
+  int UnquantizeProperty(size_t property_index, uint32_t quant) const {
+    JXL_ASSERT(quant < compact_properties[property_index].size());
+    return compact_properties[property_index][quant];
+  }
+
+  Predictor PredictorFromIndex(size_t index) const {
+    JXL_DASSERT(index < predictors.size());
+    return predictors[index];
+  }
+  size_t PropertyFromIndex(size_t index) const {
+    JXL_DASSERT(index < props_to_use.size());
+    return props_to_use[index];
+  }
+  size_t NumPredictors() const { return predictors.size(); }
+  size_t NumProperties() const { return props_to_use.size(); }
+
+  // Preallocate data for a given number of samples. MUST be called before
+  // adding any sample.
+  void PrepareForSamples(size_t num_samples);
+  // Add a sample.
+  void AddSample(pixel_type_w pixel, const Properties &properties,
+                 const pixel_type_w *predictions);
+  // Pre-cluster property values.
+  void PreQuantizeProperties(
+      const StaticPropRange &range,
+      const std::vector<ModularMultiplierInfo> &multiplier_info,
+      const std::vector<uint32_t> &group_pixel_count,
+      const std::vector<uint32_t> &channel_pixel_count,
+      std::vector<pixel_type> &pixel_samples,
+      std::vector<pixel_type> &diff_samples, size_t max_property_values);
+
+  void AllSamplesDone() { dedup_table_ = std::vector<uint32_t>(); }
+
+  uint32_t QuantizeProperty(uint32_t prop, pixel_type v) const {
+    v = std::min(std::max(v, -kPropertyRange), kPropertyRange) + kPropertyRange;
+    return property_mapping[prop][v];
+  }
+
+  // Swaps samples in position a and b. Does nothing if a == b.
+  void Swap(size_t a, size_t b);
+
+  // Cycles samples: a -> b -> c -> a. We assume a <= b <= c, so that we can
+  // just call Swap(a, b) if b==c.
+  void ThreeShuffle(size_t a, size_t b, size_t c);
+
+ private:
+  // TODO(veluca): as the total number of properties and predictors are known
+  // before adding any samples, it might be better to interleave predictors,
+  // properties and counts in a single vector to improve locality.
+  // A first attempt at doing this actually results in much slower encoding,
+  // possibly because of the more complex addressing.
+  struct ResidualToken {
+    uint8_t tok;
+    uint8_t nbits;
+  };
+  // Residual information: token and number of extra bits, per predictor.
+  std::vector<std::vector<ResidualToken>> residuals;
+  // Number of occurrences of each sample.
+  std::vector<uint16_t> sample_counts;
+  // Property values, quantized to at most 256 distinct values.
+  std::vector<std::vector<uint8_t>> props;
+  // Decompactification info for `props`.
+  std::vector<std::vector<int>> compact_properties;
+  // List of properties to use.
+  std::vector<uint32_t> props_to_use;
+  // List of predictors to use.
+  std::vector<Predictor> predictors;
+  // Mapping property value -> quantized property value.
+  static constexpr int kPropertyRange = 511;
+  std::vector<std::vector<uint8_t>> property_mapping;
+  // Number of samples seen.
+  size_t num_samples = 0;
+  // Table for deduplication.
+  static constexpr uint32_t kDedupEntryUnused{static_cast<uint32_t>(-1)};
+  std::vector<uint32_t> dedup_table_;
+
+  // Functions for sample deduplication.
+  bool IsSameSample(size_t a, size_t b) const;
+  size_t Hash1(size_t a) const;
+  size_t Hash2(size_t a) const;
+  void InitTable(size_t size);
+  // Returns true if `a` was already present in the table.
+  bool AddToTableAndMerge(size_t a);
+  void AddToTable(size_t a);
+};
+
+void TokenizeTree(const Tree &tree, std::vector<Token> *tokens,
+                  Tree *decoder_tree);
+
+void CollectPixelSamples(const Image &image, const ModularOptions &options,
+                         size_t group_id,
+                         std::vector<uint32_t> &group_pixel_count,
+                         std::vector<uint32_t> &channel_pixel_count,
+                         std::vector<pixel_type> &pixel_samples,
+                         std::vector<pixel_type> &diff_samples);
+
+void ComputeBestTree(TreeSamples &tree_samples, float threshold,
+                     const std::vector<ModularMultiplierInfo> &mul_info,
+                     StaticPropRange static_prop_range,
+                     float fast_decode_multiplier, Tree *tree);
+
+}  // namespace jxl
+#endif  // LIB_JXL_MODULAR_ENCODING_ENC_MA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc
new file mode 100644
index 0000000000..0b757113f8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.cc
@@ -0,0 +1,530 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/encoding.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <queue>
+
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+// Removes all nodes that use a static property (i.e. channel or group ID) from
+// the tree and collapses each node on even levels with its two children to
+// produce a flatter tree. Also computes whether the resulting tree requires
+// using the weighted predictor.
+FlatTree FilterTree(const Tree &global_tree,
+                    std::array<pixel_type, kNumStaticProperties> &static_props,
+                    size_t *num_props, bool *use_wp, bool *wp_only,
+                    bool *gradient_only) {
+  *num_props = 0;
+  bool has_wp = false;
+  bool has_non_wp = false;
+  *gradient_only = true;
+  const auto mark_property = [&](int32_t p) {
+    if (p == kWPProp) {
+      has_wp = true;
+    } else if (p >= kNumStaticProperties) {
+      has_non_wp = true;
+    }
+    if (p >= kNumStaticProperties && p != kGradientProp) {
+      *gradient_only = false;
+    }
+  };
+  FlatTree output;
+  std::queue<size_t> nodes;
+  nodes.push(0);
+  // Produces a trimmed and flattened tree by doing a BFS visit of the original
+  // tree, ignoring branches that are known to be false and proceeding two
+  // levels at a time to collapse nodes in a flatter tree; if an inner parent
+  // node has a leaf as a child, the leaf is duplicated and an implicit fake
+  // node is added. This allows to reduce the number of branches when traversing
+  // the resulting flat tree.
+  while (!nodes.empty()) {
+    size_t cur = nodes.front();
+    nodes.pop();
+    // Skip nodes that we can decide now, by jumping directly to their children.
+    while (global_tree[cur].property < kNumStaticProperties &&
+           global_tree[cur].property != -1) {
+      if (static_props[global_tree[cur].property] > global_tree[cur].splitval) {
+        cur = global_tree[cur].lchild;
+      } else {
+        cur = global_tree[cur].rchild;
+      }
+    }
+    FlatDecisionNode flat;
+    if (global_tree[cur].property == -1) {
+      flat.property0 = -1;
+      flat.childID = global_tree[cur].lchild;
+      flat.predictor = global_tree[cur].predictor;
+      flat.predictor_offset = global_tree[cur].predictor_offset;
+      flat.multiplier = global_tree[cur].multiplier;
+      *gradient_only &= flat.predictor == Predictor::Gradient;
+      has_wp |= flat.predictor == Predictor::Weighted;
+      has_non_wp |= flat.predictor != Predictor::Weighted;
+      output.push_back(flat);
+      continue;
+    }
+    flat.childID = output.size() + nodes.size() + 1;
+
+    flat.property0 = global_tree[cur].property;
+    *num_props = std::max<size_t>(flat.property0 + 1, *num_props);
+    flat.splitval0 = global_tree[cur].splitval;
+
+    for (size_t i = 0; i < 2; i++) {
+      size_t cur_child =
+          i == 0 ? global_tree[cur].lchild : global_tree[cur].rchild;
+      // Skip nodes that we can decide now.
+      while (global_tree[cur_child].property < kNumStaticProperties &&
+             global_tree[cur_child].property != -1) {
+        if (static_props[global_tree[cur_child].property] >
+            global_tree[cur_child].splitval) {
+          cur_child = global_tree[cur_child].lchild;
+        } else {
+          cur_child = global_tree[cur_child].rchild;
+        }
+      }
+      // We ended up in a leaf, add a dummy decision and two copies of the leaf.
+      if (global_tree[cur_child].property == -1) {
+        flat.properties[i] = 0;
+        flat.splitvals[i] = 0;
+        nodes.push(cur_child);
+        nodes.push(cur_child);
+      } else {
+        flat.properties[i] = global_tree[cur_child].property;
+        flat.splitvals[i] = global_tree[cur_child].splitval;
+        nodes.push(global_tree[cur_child].lchild);
+        nodes.push(global_tree[cur_child].rchild);
+        *num_props = std::max<size_t>(flat.properties[i] + 1, *num_props);
+      }
+    }
+
+    for (size_t j = 0; j < 2; j++) mark_property(flat.properties[j]);
+    mark_property(flat.property0);
+    output.push_back(flat);
+  }
+  if (*num_props > kNumNonrefProperties) {
+    *num_props =
+        DivCeil(*num_props - kNumNonrefProperties, kExtraPropsPerChannel) *
+            kExtraPropsPerChannel +
+        kNumNonrefProperties;
+  } else {
+    *num_props = kNumNonrefProperties;
+  }
+  *use_wp = has_wp;
+  *wp_only = has_wp && !has_non_wp;
+
+  return output;
+}
+
+Status DecodeModularChannelMAANS(BitReader *br, ANSSymbolReader *reader,
+                                 const std::vector<uint8_t> &context_map,
+                                 const Tree &global_tree,
+                                 const weighted::Header &wp_header,
+                                 pixel_type chan, size_t group_id,
+                                 Image *image) {
+  Channel &channel = image->channel[chan];
+
+  std::array<pixel_type, kNumStaticProperties> static_props = {chan,
+                                                               (int)group_id};
+  // TODO(veluca): filter the tree according to static_props.
+
+  // zero pixel channel? could happen
+  if (channel.w == 0 || channel.h == 0) return true;
+
+  bool tree_has_wp_prop_or_pred = false;
+  bool is_wp_only = false;
+  bool is_gradient_only = false;
+  size_t num_props;
+  FlatTree tree =
+      FilterTree(global_tree, static_props, &num_props,
+                 &tree_has_wp_prop_or_pred, &is_wp_only, &is_gradient_only);
+
+  // From here on, tree lookup returns a *clustered* context ID.
+  // This avoids an extra memory lookup after tree traversal.
+  for (size_t i = 0; i < tree.size(); i++) {
+    if (tree[i].property0 == -1) {
+      tree[i].childID = context_map[tree[i].childID];
+    }
+  }
+
+  JXL_DEBUG_V(3, "Decoded MA tree with %zu nodes", tree.size());
+
+  // MAANS decode
+  const auto make_pixel = [](uint64_t v, pixel_type multiplier,
+                             pixel_type_w offset) -> pixel_type {
+    JXL_DASSERT((v & 0xFFFFFFFF) == v);
+    pixel_type_w val = UnpackSigned(v);
+    // if it overflows, it overflows, and we have a problem anyway
+    return val * multiplier + offset;
+  };
+
+  if (tree.size() == 1) {
+    // special optimized case: no meta-adaptation, so no need
+    // to compute properties.
+    Predictor predictor = tree[0].predictor;
+    int64_t offset = tree[0].predictor_offset;
+    int32_t multiplier = tree[0].multiplier;
+    size_t ctx_id = tree[0].childID;
+    if (predictor == Predictor::Zero) {
+      uint32_t value;
+      if (reader->IsSingleValueAndAdvance(ctx_id, &value,
+                                          channel.w * channel.h)) {
+        // Special-case: histogram has a single symbol, with no extra bits, and
+        // we use ANS mode.
+        JXL_DEBUG_V(8, "Fastest track.");
+        pixel_type v = make_pixel(value, multiplier, offset);
+        for (size_t y = 0; y < channel.h; y++) {
+          pixel_type *JXL_RESTRICT r = channel.Row(y);
+          std::fill(r, r + channel.w, v);
+        }
+
+      } else {
+        JXL_DEBUG_V(8, "Fast track.");
+        for (size_t y = 0; y < channel.h; y++) {
+          pixel_type *JXL_RESTRICT r = channel.Row(y);
+          for (size_t x = 0; x < channel.w; x++) {
+            uint32_t v = reader->ReadHybridUintClustered(ctx_id, br);
+            r[x] = make_pixel(v, multiplier, offset);
+          }
+        }
+      }
+    } else if (predictor == Predictor::Gradient && offset == 0 &&
+               multiplier == 1) {
+      JXL_DEBUG_V(8, "Gradient very fast track.");
+      const intptr_t onerow = channel.plane.PixelsPerRow();
+      for (size_t y = 0; y < channel.h; y++) {
+        pixel_type *JXL_RESTRICT r = channel.Row(y);
+        for (size_t x = 0; x < channel.w; x++) {
+          pixel_type left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+          pixel_type top = (y ? *(r + x - onerow) : left);
+          pixel_type topleft = (x && y ? *(r + x - 1 - onerow) : left);
+          pixel_type guess = ClampedGradient(top, left, topleft);
+          uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+          r[x] = make_pixel(v, 1, guess);
+        }
+      }
+    } else if (predictor != Predictor::Weighted) {
+      // special optimized case: no wp
+      JXL_DEBUG_V(8, "Quite fast track.");
+      const intptr_t onerow = channel.plane.PixelsPerRow();
+      for (size_t y = 0; y < channel.h; y++) {
+        pixel_type *JXL_RESTRICT r = channel.Row(y);
+        for (size_t x = 0; x < channel.w; x++) {
+          PredictionResult pred =
+              PredictNoTreeNoWP(channel.w, r + x, onerow, x, y, predictor);
+          pixel_type_w g = pred.guess + offset;
+          uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+          // NOTE: pred.multiplier is unset.
+          r[x] = make_pixel(v, multiplier, g);
+        }
+      }
+    } else {
+      JXL_DEBUG_V(8, "Somewhat fast track.");
+      const intptr_t onerow = channel.plane.PixelsPerRow();
+      weighted::State wp_state(wp_header, channel.w, channel.h);
+      for (size_t y = 0; y < channel.h; y++) {
+        pixel_type *JXL_RESTRICT r = channel.Row(y);
+        for (size_t x = 0; x < channel.w; x++) {
+          pixel_type_w g = PredictNoTreeWP(channel.w, r + x, onerow, x, y,
+                                           predictor, &wp_state)
+                               .guess +
+                           offset;
+          uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+          r[x] = make_pixel(v, multiplier, g);
+          wp_state.UpdateErrors(r[x], x, y, channel.w);
+        }
+      }
+    }
+    return true;
+  }
+
+  // Check if this tree is a WP-only tree with a small enough property value
+  // range.
+  // Initialized to avoid clang-tidy complaining.
+  uint8_t context_lookup[2 * kPropRangeFast] = {};
+  int8_t multipliers[2 * kPropRangeFast] = {};
+  int8_t offsets[2 * kPropRangeFast] = {};
+  if (is_wp_only) {
+    is_wp_only = TreeToLookupTable(tree, context_lookup, offsets, multipliers);
+  }
+  if (is_gradient_only) {
+    is_gradient_only =
+        TreeToLookupTable(tree, context_lookup, offsets, multipliers);
+  }
+
+  if (is_gradient_only) {
+    JXL_DEBUG_V(8, "Gradient fast track.");
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    for (size_t y = 0; y < channel.h; y++) {
+      pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        int32_t guess = ClampedGradient(top, left, topleft);
+        uint32_t pos =
+            kPropRangeFast +
+            std::min<pixel_type_w>(
+                std::max<pixel_type_w>(-kPropRangeFast, top + left - topleft),
+                kPropRangeFast - 1);
+        uint32_t ctx_id = context_lookup[pos];
+        uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+        r[x] = make_pixel(v, multipliers[pos],
+                          static_cast<pixel_type_w>(offsets[pos]) + guess);
+      }
+    }
+  } else if (is_wp_only) {
+    JXL_DEBUG_V(8, "WP fast track.");
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    weighted::State wp_state(wp_header, channel.w, channel.h);
+    Properties properties(1);
+    for (size_t y = 0; y < channel.h; y++) {
+      pixel_type *JXL_RESTRICT r = channel.Row(y);
+      for (size_t x = 0; x < channel.w; x++) {
+        size_t offset = 0;
+        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+        pixel_type_w top = (y ? *(r + x - onerow) : left);
+        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+        pixel_type_w topright =
+            (x + 1 < channel.w && y ? *(r + x + 1 - onerow) : top);
+        pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
+        int32_t guess = wp_state.Predict</*compute_properties=*/true>(
+            x, y, channel.w, top, left, topright, topleft, toptop, &properties,
+            offset);
+        uint32_t pos =
+            kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]),
+                                      kPropRangeFast - 1);
+        uint32_t ctx_id = context_lookup[pos];
+        uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+        r[x] = make_pixel(v, multipliers[pos],
+                          static_cast<pixel_type_w>(offsets[pos]) + guess);
+        wp_state.UpdateErrors(r[x], x, y, channel.w);
+      }
+    }
+  } else if (!tree_has_wp_prop_or_pred) {
+    // special optimized case: the weighted predictor and its properties are not
+    // used, so no need to compute weights and properties.
+    JXL_DEBUG_V(8, "Slow track.");
+    MATreeLookup tree_lookup(tree);
+    Properties properties = Properties(num_props);
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    Channel references(properties.size() - kNumNonrefProperties, channel.w);
+    for (size_t y = 0; y < channel.h; y++) {
+      pixel_type *JXL_RESTRICT p = channel.Row(y);
+      PrecomputeReferences(channel, y, *image, chan, &references);
+      InitPropsRow(&properties, static_props, y);
+      for (size_t x = 0; x < channel.w; x++) {
+        PredictionResult res =
+            PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+                            tree_lookup, references);
+        uint64_t v = reader->ReadHybridUintClustered(res.context, br);
+        p[x] = make_pixel(v, res.multiplier, res.guess);
+      }
+    }
+  } else {
+    JXL_DEBUG_V(8, "Slowest track.");
+    MATreeLookup tree_lookup(tree);
+    Properties properties = Properties(num_props);
+    const intptr_t onerow = channel.plane.PixelsPerRow();
+    Channel references(properties.size() - kNumNonrefProperties, channel.w);
+    weighted::State wp_state(wp_header, channel.w, channel.h);
+    for (size_t y = 0; y < channel.h; y++) {
+      pixel_type *JXL_RESTRICT p = channel.Row(y);
+      InitPropsRow(&properties, static_props, y);
+      PrecomputeReferences(channel, y, *image, chan, &references);
+      for (size_t x = 0; x < channel.w; x++) {
+        PredictionResult res =
+            PredictTreeWP(&properties, channel.w, p + x, onerow, x, y,
+                          tree_lookup, references, &wp_state);
+        uint64_t v = reader->ReadHybridUintClustered(res.context, br);
+        p[x] = make_pixel(v, res.multiplier, res.guess);
+        wp_state.UpdateErrors(p[x], x, y, channel.w);
+      }
+    }
+  }
+  return true;
+}
+
+GroupHeader::GroupHeader() { Bundle::Init(this); }
+
+Status ValidateChannelDimensions(const Image &image,
+                                 const ModularOptions &options) {
+  size_t nb_channels = image.channel.size();
+  for (bool is_dc : {true, false}) {
+    size_t group_dim = options.group_dim * (is_dc ? kBlockDim : 1);
+    size_t c = image.nb_meta_channels;
+    for (; c < nb_channels; c++) {
+      const Channel &ch = image.channel[c];
+      if (ch.w > options.group_dim || ch.h > options.group_dim) break;
+    }
+    for (; c < nb_channels; c++) {
+      const Channel &ch = image.channel[c];
+      if (ch.w == 0 || ch.h == 0) continue;  // skip empty
+      bool is_dc_channel = std::min(ch.hshift, ch.vshift) >= 3;
+      if (is_dc_channel != is_dc) continue;
+      size_t tile_dim = group_dim >> std::max(ch.hshift, ch.vshift);
+      if (tile_dim == 0) {
+        return JXL_FAILURE("Inconsistent transforms");
+      }
+    }
+  }
+  return true;
+}
+
+Status ModularDecode(BitReader *br, Image &image, GroupHeader &header,
+                     size_t group_id, ModularOptions *options,
+                     const Tree *global_tree, const ANSCode *global_code,
+                     const std::vector<uint8_t> *global_ctx_map,
+                     bool allow_truncated_group) {
+  if (image.channel.empty()) return true;
+
+  // decode transforms
+  JXL_RETURN_IF_ERROR(Bundle::Read(br, &header));
+  JXL_DEBUG_V(3, "Image data underwent %zu transformations: ",
+              header.transforms.size());
+  image.transform = header.transforms;
+  for (Transform &transform : image.transform) {
+    JXL_RETURN_IF_ERROR(transform.MetaApply(image));
+  }
+  if (image.error) {
+    return JXL_FAILURE("Corrupt file. Aborting.");
+  }
+  if (br->AllReadsWithinBounds()) {
+    // Only check if the transforms list is complete.
+    JXL_RETURN_IF_ERROR(ValidateChannelDimensions(image, *options));
+  }
+
+  size_t nb_channels = image.channel.size();
+
+  size_t num_chans = 0;
+  size_t distance_multiplier = 0;
+  for (size_t i = 0; i < nb_channels; i++) {
+    Channel &channel = image.channel[i];
+    if (!channel.w || !channel.h) {
+      continue;  // skip empty channels
+    }
+    if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size ||
+                                        channel.h > options->max_chan_size)) {
+      break;
+    }
+    if (channel.w > distance_multiplier) {
+      distance_multiplier = channel.w;
+    }
+    num_chans++;
+  }
+  if (num_chans == 0) return true;
+
+  // Read tree.
+  Tree tree_storage;
+  std::vector<uint8_t> context_map_storage;
+  ANSCode code_storage;
+  const Tree *tree = &tree_storage;
+  const ANSCode *code = &code_storage;
+  const std::vector<uint8_t> *context_map = &context_map_storage;
+  if (!header.use_global_tree) {
+    size_t max_tree_size = 1024;
+    for (size_t i = 0; i < nb_channels; i++) {
+      Channel &channel = image.channel[i];
+      if (!channel.w || !channel.h) {
+        continue;  // skip empty channels
+      }
+      if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size ||
+                                          channel.h > options->max_chan_size)) {
+        break;
+      }
+      size_t pixels = channel.w * channel.h;
+      if (pixels / channel.w != channel.h) {
+        return JXL_FAILURE("Tree size overflow");
+      }
+      max_tree_size += pixels;
+      if (max_tree_size < pixels) return JXL_FAILURE("Tree size overflow");
+    }
+
+    JXL_RETURN_IF_ERROR(DecodeTree(br, &tree_storage, max_tree_size));
+    JXL_RETURN_IF_ERROR(DecodeHistograms(br, (tree_storage.size() + 1) / 2,
+                                         &code_storage, &context_map_storage));
+  } else {
+    if (!global_tree || !global_code || !global_ctx_map ||
+        global_tree->empty()) {
+      return JXL_FAILURE("No global tree available but one was requested");
+    }
+    tree = global_tree;
+    code = global_code;
+    context_map = global_ctx_map;
+  }
+
+  // Read channels
+  ANSSymbolReader reader(code, br, distance_multiplier);
+  for (size_t i = 0; i < nb_channels; i++) {
+    Channel &channel = image.channel[i];
+    if (!channel.w || !channel.h) {
+      continue;  // skip empty channels
+    }
+    if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size ||
+                                        channel.h > options->max_chan_size)) {
+      break;
+    }
+    JXL_RETURN_IF_ERROR(DecodeModularChannelMAANS(br, &reader, *context_map,
+                                                  *tree, header.wp_header, i,
+                                                  group_id, &image));
+    // Truncated group.
+    if (!br->AllReadsWithinBounds()) {
+      if (!allow_truncated_group) return JXL_FAILURE("Truncated input");
+      ZeroFillImage(&channel.plane);
+      while (++i < nb_channels) ZeroFillImage(&image.channel[i].plane);
+      return Status(StatusCode::kNotEnoughBytes);
+    }
+  }
+  if (!reader.CheckANSFinalState()) {
+    return JXL_FAILURE("ANS decode final state failed");
+  }
+  return true;
+}
+
+Status ModularGenericDecompress(BitReader *br, Image &image,
+                                GroupHeader *header, size_t group_id,
+                                ModularOptions *options, int undo_transforms,
+                                const Tree *tree, const ANSCode *code,
+                                const std::vector<uint8_t> *ctx_map,
+                                bool allow_truncated_group) {
+#ifdef JXL_ENABLE_ASSERT
+  std::vector<std::pair<uint32_t, uint32_t>> req_sizes(image.channel.size());
+  for (size_t c = 0; c < req_sizes.size(); c++) {
+    req_sizes[c] = {image.channel[c].w, image.channel[c].h};
+  }
+#endif
+  GroupHeader local_header;
+  if (header == nullptr) header = &local_header;
+  auto dec_status = ModularDecode(br, image, *header, group_id, options, tree,
+                                  code, ctx_map, allow_truncated_group);
+  if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status);
+  if (dec_status.IsFatalError()) return dec_status;
+  image.undo_transforms(header->wp_header, undo_transforms);
+  if (image.error) return JXL_FAILURE("Corrupt file. Aborting.");
+  size_t bit_pos = br->TotalBitsConsumed();
+  JXL_DEBUG_V(4, "Modular-decoded a %zux%zu nbchans=%zu image from %zu bytes",
+              image.w, image.h, image.channel.size(),
+              (br->TotalBitsConsumed() - bit_pos) / 8);
+  (void)bit_pos;
+#ifdef JXL_ENABLE_ASSERT
+  // Check that after applying all transforms we are back to the requested image
+  // sizes, otherwise there's a programming error with the transformations.
+  if (undo_transforms == -1 || undo_transforms == 0) {
+    JXL_ASSERT(image.channel.size() == req_sizes.size());
+    for (size_t c = 0; c < req_sizes.size(); c++) {
+      JXL_ASSERT(req_sizes[c].first == image.channel[c].w);
+      JXL_ASSERT(req_sizes[c].second == image.channel[c].h);
+    }
+  }
+#endif
+  return dec_status;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.h
new file mode 100644
index 0000000000..8a208765f6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/encoding.h
@@ -0,0 +1,140 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENCODING_H_
+#define LIB_JXL_MODULAR_ENCODING_ENCODING_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+// Valid range of properties for using lookup tables instead of trees.
+constexpr int32_t kPropRangeFast = 512;
+
+struct GroupHeader : public Fields {
+  GroupHeader();
+
+  const char *Name() const override { return "GroupHeader"; }
+
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &use_global_tree));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&wp_header));
+    uint32_t num_transforms = static_cast<uint32_t>(transforms.size());
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(4, 2),
+                                           BitsOffset(8, 18), 0,
+                                           &num_transforms));
+    if (visitor->IsReading()) transforms.resize(num_transforms);
+    for (size_t i = 0; i < num_transforms; i++) {
+      JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&transforms[i]));
+    }
+    return true;
+  }
+
+  bool use_global_tree;
+  weighted::Header wp_header;
+
+  std::vector<Transform> transforms;
+};
+
+FlatTree FilterTree(const Tree &global_tree,
+                    std::array<pixel_type, kNumStaticProperties> &static_props,
+                    size_t *num_props, bool *use_wp, bool *wp_only,
+                    bool *gradient_only);
+
+template <typename T>
+bool TreeToLookupTable(const FlatTree &tree,
+                       T context_lookup[2 * kPropRangeFast],
+                       int8_t offsets[2 * kPropRangeFast],
+                       int8_t multipliers[2 * kPropRangeFast] = nullptr) {
+  struct TreeRange {
+    // Begin *excluded*, end *included*. This works best with > vs <= decision
+    // nodes.
+    int begin, end;
+    size_t pos;
+  };
+  std::vector<TreeRange> ranges;
+  ranges.push_back(TreeRange{-kPropRangeFast - 1, kPropRangeFast - 1, 0});
+  while (!ranges.empty()) {
+    TreeRange cur = ranges.back();
+    ranges.pop_back();
+    if (cur.begin < -kPropRangeFast - 1 || cur.begin >= kPropRangeFast - 1 ||
+        cur.end > kPropRangeFast - 1) {
+      // Tree is outside the allowed range, exit.
+      return false;
+    }
+    auto &node = tree[cur.pos];
+    // Leaf.
+    if (node.property0 == -1) {
+      if (node.predictor_offset < std::numeric_limits<int8_t>::min() ||
+          node.predictor_offset > std::numeric_limits<int8_t>::max()) {
+        return false;
+      }
+      if (node.multiplier < std::numeric_limits<int8_t>::min() ||
+          node.multiplier > std::numeric_limits<int8_t>::max()) {
+        return false;
+      }
+      if (multipliers == nullptr && node.multiplier != 1) {
+        return false;
+      }
+      for (int i = cur.begin + 1; i < cur.end + 1; i++) {
+        context_lookup[i + kPropRangeFast] = node.childID;
+        if (multipliers) multipliers[i + kPropRangeFast] = node.multiplier;
+        offsets[i + kPropRangeFast] = node.predictor_offset;
+      }
+      continue;
+    }
+    // > side of top node.
+    if (node.properties[0] >= kNumStaticProperties) {
+      ranges.push_back(TreeRange({node.splitvals[0], cur.end, node.childID}));
+      ranges.push_back(
+          TreeRange({node.splitval0, node.splitvals[0], node.childID + 1}));
+    } else {
+      ranges.push_back(TreeRange({node.splitval0, cur.end, node.childID}));
+    }
+    // <= side
+    if (node.properties[1] >= kNumStaticProperties) {
+      ranges.push_back(
+          TreeRange({node.splitvals[1], node.splitval0, node.childID + 2}));
+      ranges.push_back(
+          TreeRange({cur.begin, node.splitvals[1], node.childID + 3}));
+    } else {
+      ranges.push_back(
+          TreeRange({cur.begin, node.splitval0, node.childID + 2}));
+    }
+  }
+  return true;
+}
+// TODO(veluca): make cleaner interfaces.
+
+Status ValidateChannelDimensions(const Image &image,
+                                 const ModularOptions &options);
+
+// undo_transforms == N > 0: undo all transforms except the first N
+//                           (e.g. to represent YCbCr420 losslessly)
+// undo_transforms == 0: undo all transforms
+// undo_transforms == -1: undo all transforms but don't clamp to range
+// undo_transforms == -2: don't undo any transform
+Status ModularGenericDecompress(BitReader *br, Image &image,
+                                GroupHeader *header, size_t group_id,
+                                ModularOptions *options,
+                                int undo_transforms = -1,
+                                const Tree *tree = nullptr,
+                                const ANSCode *code = nullptr,
+                                const std::vector<uint8_t> *ctx_map = nullptr,
+                                bool allow_truncated_group = false);
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_ENCODING_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/ma_common.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/ma_common.h
new file mode 100644
index 0000000000..e5b6cf3335
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/encoding/ma_common.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_
+#define LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_
+
+#include <stddef.h>
+
+namespace jxl {
+
+enum MATreeContext : size_t {
+  kSplitValContext = 0,
+  kPropertyContext = 1,
+  kPredictorContext = 2,
+  kOffsetContext = 3,
+  kMultiplierLogContext = 4,
+  kMultiplierBitsContext = 5,
+
+  kNumTreeContexts = 6,
+};
+
+static constexpr size_t kMaxTreeSize = 1 << 26;
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc
new file mode 100644
index 0000000000..6c26b96c0d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.cc
@@ -0,0 +1,62 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/modular_image.h"
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+void Image::undo_transforms(const weighted::Header &wp_header, int keep,
+                            jxl::ThreadPool *pool) {
+  if (keep == -2) return;
+  while ((int)transform.size() > keep && transform.size() > 0) {
+    Transform t = transform.back();
+    JXL_DEBUG_V(4, "Undoing transform %s", t.Name());
+    Status result = t.Inverse(*this, wp_header, pool);
+    if (result == false) {
+      JXL_NOTIFY_ERROR("Error while undoing transform %s.", t.Name());
+      error = true;
+      return;
+    }
+    JXL_DEBUG_V(8, "Undoing transform %s: done", t.Name());
+    transform.pop_back();
+  }
+  if (!keep && bitdepth < 32) {
+    // clamp the values to the valid range (lossy compression can produce values
+    // outside the range)
+    pixel_type maxval = (1u << bitdepth) - 1;
+    for (size_t i = 0; i < channel.size(); i++) {
+      for (size_t y = 0; y < channel[i].h; y++) {
+        pixel_type *JXL_RESTRICT p = channel[i].plane.Row(y);
+        for (size_t x = 0; x < channel[i].w; x++, p++) {
+          *p = Clamp1(*p, 0, maxval);
+        }
+      }
+    }
+  }
+}
+
+Image::Image(size_t iw, size_t ih, int bd, int nb_chans)
+    : w(iw), h(ih), bitdepth(bd), nb_meta_channels(0), error(false) {
+  for (int i = 0; i < nb_chans; i++) channel.emplace_back(Channel(iw, ih));
+}
+
+Image::Image() : w(0), h(0), bitdepth(8), nb_meta_channels(0), error(true) {}
+
+Image &Image::operator=(Image &&other) noexcept {
+  w = other.w;
+  h = other.h;
+  bitdepth = other.bitdepth;
+  nb_meta_channels = other.nb_meta_channels;
+  error = other.error;
+  channel = std::move(other.channel);
+  transform = std::move(other.transform);
+  return *this;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.h
new file mode 100644
index 0000000000..c418ba4fe2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/modular_image.h
@@ -0,0 +1,107 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_MODULAR_IMAGE_H_
+#define LIB_JXL_MODULAR_MODULAR_IMAGE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+typedef int32_t pixel_type;  // can use int16_t if it's only for 8-bit images.
+                             // Need some wiggle room for YCoCg / Squeeze etc
+
+typedef int64_t pixel_type_w;
+
+namespace weighted {
+struct Header;
+}
+
+class Channel {
+ public:
+  jxl::Plane<pixel_type> plane;
+  size_t w, h;
+  int hshift, vshift;  // w ~= image.w >> hshift;  h ~= image.h >> vshift
+  Channel(size_t iw, size_t ih, int hsh = 0, int vsh = 0)
+      : plane(iw, ih), w(iw), h(ih), hshift(hsh), vshift(vsh) {}
+
+  Channel(const Channel& other) = delete;
+  Channel& operator=(const Channel& other) = delete;
+
+  // Move assignment
+  Channel& operator=(Channel&& other) noexcept {
+    w = other.w;
+    h = other.h;
+    hshift = other.hshift;
+    vshift = other.vshift;
+    plane = std::move(other.plane);
+    return *this;
+  }
+
+  // Move constructor
+  Channel(Channel&& other) noexcept = default;
+
+  void shrink() {
+    if (plane.xsize() == w && plane.ysize() == h) return;
+    jxl::Plane<pixel_type> resizedplane(w, h);
+    plane = std::move(resizedplane);
+  }
+  void shrink(int nw, int nh) {
+    w = nw;
+    h = nh;
+    shrink();
+  }
+
+  JXL_INLINE pixel_type* Row(const size_t y) { return plane.Row(y); }
+  JXL_INLINE const pixel_type* Row(const size_t y) const {
+    return plane.Row(y);
+  }
+};
+
+class Transform;
+
+class Image {
+ public:
+  // image data, transforms can dramatically change the number of channels and
+  // their semantics
+  std::vector<Channel> channel;
+  // transforms that have been applied (and that have to be undone)
+  std::vector<Transform> transform;
+
+  // image dimensions (channels may have different dimensions due to transforms)
+  size_t w, h;
+  int bitdepth;
+  size_t nb_meta_channels;  // first few channels might contain palette(s)
+  bool error;               // true if a fatal error occurred, false otherwise
+
+  Image(size_t iw, size_t ih, int bitdepth, int nb_chans);
+  Image();
+
+  Image(const Image& other) = delete;
+  Image& operator=(const Image& other) = delete;
+
+  Image& operator=(Image&& other) noexcept;
+  Image(Image&& other) noexcept = default;
+
+  // undo all except the first 'keep' transforms
+  void undo_transforms(const weighted::Header& wp_header, int keep = 0,
+                       jxl::ThreadPool* pool = nullptr);
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_MODULAR_IMAGE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/options.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/options.h
new file mode 100644
index 0000000000..b25b17c6c8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/options.h
@@ -0,0 +1,172 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_OPTIONS_H_
+#define LIB_JXL_MODULAR_OPTIONS_H_
+
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+namespace jxl {
+
+using PropertyVal = int32_t;
+using Properties = std::vector<PropertyVal>;
+
+enum class Predictor : uint32_t {
+  Zero = 0,
+  Left = 1,
+  Top = 2,
+  Average0 = 3,
+  Select = 4,
+  Gradient = 5,
+  Weighted = 6,
+  TopRight = 7,
+  TopLeft = 8,
+  LeftLeft = 9,
+  Average1 = 10,
+  Average2 = 11,
+  Average3 = 12,
+  Average4 = 13,
+  // The following predictors are encoder-only.
+  Best = 14,  // Best of Gradient and Weighted
+  Variable =
+      15,  // Find the best decision tree for predictors/predictor per row
+};
+
+inline const char* PredictorName(Predictor p) {
+  switch (p) {
+    case Predictor::Zero:
+      return "Zero";
+    case Predictor::Left:
+      return "Left";
+    case Predictor::Top:
+      return "Top";
+    case Predictor::Average0:
+      return "Avg0";
+    case Predictor::Average1:
+      return "Avg1";
+    case Predictor::Average2:
+      return "Avg2";
+    case Predictor::Average3:
+      return "Avg3";
+    case Predictor::Average4:
+      return "Avg4";
+    case Predictor::Select:
+      return "Sel";
+    case Predictor::Gradient:
+      return "Grd";
+    case Predictor::Weighted:
+      return "Wgh";
+    case Predictor::TopLeft:
+      return "TopL";
+    case Predictor::TopRight:
+      return "TopR";
+    case Predictor::LeftLeft:
+      return "LL";
+    default:
+      return "INVALID";
+  };
+}
+
+inline std::array<uint8_t, 3> PredictorColor(Predictor p) {
+  switch (p) {
+    case Predictor::Zero:
+      return {0, 0, 0};
+    case Predictor::Left:
+      return {255, 0, 0};
+    case Predictor::Top:
+      return {0, 255, 0};
+    case Predictor::Average0:
+      return {0, 0, 255};
+    case Predictor::Average4:
+      return {192, 128, 128};
+    case Predictor::Select:
+      return {255, 255, 0};
+    case Predictor::Gradient:
+      return {255, 0, 255};
+    case Predictor::Weighted:
+      return {0, 255, 255};
+      // TODO
+    default:
+      return {255, 255, 255};
+  };
+}
+
+constexpr size_t kNumModularPredictors = static_cast<size_t>(Predictor::Best);
+
+static constexpr ssize_t kNumStaticProperties = 2;  // channel, group_id.
+
+using StaticPropRange =
+    std::array<std::array<uint32_t, 2>, kNumStaticProperties>;
+
+struct ModularMultiplierInfo {
+  StaticPropRange range;
+  uint32_t multiplier;
+};
+
+struct ModularOptions {
+  /// Used in both encode and decode:
+
+  // Stop encoding/decoding when reaching a (non-meta) channel that has a
+  // dimension bigger than max_chan_size.
+  size_t max_chan_size = 0xFFFFFF;
+
+  // Used during decoding for validation of transforms (sqeeezing) scheme.
+  size_t group_dim = 0x1FFFFFFF;
+
+  /// Encode options:
+  // Fraction of pixels to look at to learn a MA tree
+  // Number of iterations to do to learn a MA tree
+  // (if zero there is no MA context model)
+  float nb_repeats = .5f;
+
+  // Maximum number of (previous channel) properties to use in the MA trees
+  int max_properties = 0;  // no previous channels
+
+  // Alternative heuristic tweaks.
+  // Properties default to channel, group, weighted, gradient residual, W-NW,
+  // NW-N, N-NE, N-NN
+  std::vector<uint32_t> splitting_heuristics_properties = {0,  1,  15, 9,
+                                                           10, 11, 12, 13};
+  float splitting_heuristics_node_threshold = 96;
+  size_t max_property_values = 32;
+
+  // Predictor to use for each channel.
+  Predictor predictor = static_cast<Predictor>(-1);
+
+  int wp_mode = 0;
+
+  float fast_decode_multiplier = 1.01f;
+
+  // Forces the encoder to produce a tree that is compatible with the WP-only
+  // decode path (or with the no-wp path, or the gradient-only path).
+  enum class TreeMode { kGradientOnly, kWPOnly, kNoWP, kDefault };
+  TreeMode wp_tree_mode = TreeMode::kDefault;
+
+  // Skip fast paths in the encoder.
+  bool skip_encoder_fast_path = false;
+
+  // Kind of tree to use.
+  // TODO(veluca): add tree kinds for JPEG recompression with CfL enabled,
+  // general AC metadata, different DC qualities, and others.
+  enum class TreeKind {
+    kLearn,
+    kJpegTranscodeACMeta,
+    kFalconACMeta,
+    kACMeta,
+    kWPFixedDC,
+    kGradientFixedDC,
+  };
+  TreeKind tree_kind = TreeKind::kLearn;
+
+  // Ignore the image and just pretend all tokens are zeroes
+  bool zero_tokens = false;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_OPTIONS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc
new file mode 100644
index 0000000000..cb012fff8a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.cc
@@ -0,0 +1,447 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_palette.h"
+
+#include <map>
+#include <set>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/enc_transform.h"
+#include "lib/jxl/modular/transform/palette.h"
+
+namespace jxl {
+
+namespace palette_internal {
+
+static constexpr bool kEncodeToHighQualityImplicitPalette = true;
+
+// Inclusive.
+static constexpr int kMinImplicitPaletteIndex = -(2 * 72 - 1);
+
+float ColorDistance(const std::vector<float> &JXL_RESTRICT a,
+                    const std::vector<pixel_type> &JXL_RESTRICT b) {
+  JXL_ASSERT(a.size() == b.size());
+  float distance = 0;
+  float ave3 = 0;
+  if (a.size() >= 3) {
+    ave3 = (a[0] + b[0] + a[1] + b[1] + a[2] + b[2]) * (1.21f / 3.0f);
+  }
+  float sum_a = 0, sum_b = 0;
+  for (size_t c = 0; c < a.size(); ++c) {
+    const float difference =
+        static_cast<float>(a[c]) - static_cast<float>(b[c]);
+    float weight = c == 0 ? 3 : c == 1 ? 5 : 2;
+    if (c < 3 && (a[c] + b[c] >= ave3)) {
+      const float add_w[3] = {
+          1.15,
+          1.15,
+          1.12,
+      };
+      weight += add_w[c];
+      if (c == 2 && ((a[2] + b[2]) < 1.22 * ave3)) {
+        weight -= 0.5;
+      }
+    }
+    distance += difference * difference * weight * weight;
+    const int sum_weight = c == 0 ? 3 : c == 1 ? 5 : 1;
+    sum_a += a[c] * sum_weight;
+    sum_b += b[c] * sum_weight;
+  }
+  distance *= 4;
+  float sum_difference = sum_a - sum_b;
+  distance += sum_difference * sum_difference;
+  return distance;
+}
+
+static int QuantizeColorToImplicitPaletteIndex(
+    const std::vector<pixel_type> &color, const int palette_size,
+    const int bit_depth, bool high_quality) {
+  int index = 0;
+  if (high_quality) {
+    int multiplier = 1;
+    for (size_t c = 0; c < color.size(); c++) {
+      int quantized = ((kLargeCube - 1) * color[c] + (1 << (bit_depth - 1))) /
+                      ((1 << bit_depth) - 1);
+      JXL_ASSERT((quantized % kLargeCube) == quantized);
+      index += quantized * multiplier;
+      multiplier *= kLargeCube;
+    }
+    return index + palette_size + kLargeCubeOffset;
+  } else {
+    int multiplier = 1;
+    for (size_t c = 0; c < color.size(); c++) {
+      int value = color[c];
+      value -= 1 << (std::max(0, bit_depth - 3));
+      value = std::max(0, value);
+      int quantized = ((kLargeCube - 1) * value + (1 << (bit_depth - 1))) /
+                      ((1 << bit_depth) - 1);
+      JXL_ASSERT((quantized % kLargeCube) == quantized);
+      if (quantized > kSmallCube - 1) {
+        quantized = kSmallCube - 1;
+      }
+      index += quantized * multiplier;
+      multiplier *= kSmallCube;
+    }
+    return index + palette_size;
+  }
+}
+
+}  // namespace palette_internal
+
+Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                  uint32_t &nb_colors, bool ordered, bool lossy,
+                  Predictor &predictor, const weighted::Header &wp_header) {
+  JXL_QUIET_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c));
+  JXL_ASSERT(begin_c >= input.nb_meta_channels);
+  uint32_t nb = end_c - begin_c + 1;
+
+  size_t w = input.channel[begin_c].w;
+  size_t h = input.channel[begin_c].h;
+
+  if (!lossy && nb == 1) {
+    // Channel palette special case
+    if (nb_colors == 0) return false;
+    std::vector<pixel_type> lookup;
+    pixel_type minval, maxval;
+    compute_minmax(input.channel[begin_c], &minval, &maxval);
+    size_t lookup_table_size =
+        static_cast<int64_t>(maxval) - static_cast<int64_t>(minval) + 1;
+    if (lookup_table_size > palette_internal::kMaxPaletteLookupTableSize) {
+      return false;  // too large lookup table
+    }
+    lookup.resize(lookup_table_size, 0);
+    pixel_type idx = 0;
+    for (size_t y = 0; y < h; y++) {
+      const pixel_type *p = input.channel[begin_c].Row(y);
+      for (size_t x = 0; x < w; x++) {
+        if (lookup[p[x] - minval] == 0) {
+          lookup[p[x] - minval] = 1;
+          idx++;
+          if (idx > (int)nb_colors) return false;
+        }
+      }
+    }
+    JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx);
+    Channel pch(idx, 1);
+    pch.hshift = -1;
+    nb_colors = idx;
+    idx = 0;
+    pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+    for (size_t i = 0; i < lookup_table_size; i++) {
+      if (lookup[i]) {
+        p_palette[idx] = i + minval;
+        lookup[i] = idx;
+        idx++;
+      }
+    }
+    for (size_t y = 0; y < h; y++) {
+      pixel_type *p = input.channel[begin_c].Row(y);
+      for (size_t x = 0; x < w; x++) p[x] = lookup[p[x] - minval];
+    }
+    predictor = Predictor::Zero;
+    input.nb_meta_channels++;
+    input.channel.insert(input.channel.begin(), std::move(pch));
+    return true;
+  }
+
+  Image quantized_input;
+  if (lossy) {
+    quantized_input = Image(w, h, input.bitdepth, nb);
+    for (size_t c = 0; c < nb; c++) {
+      CopyImageTo(input.channel[begin_c + c].plane,
+                  &quantized_input.channel[c].plane);
+    }
+  }
+
+  JXL_DEBUG_V(
+      7, "Trying to represent channels %i-%i using at most a %i-color palette.",
+      begin_c, end_c, nb_colors);
+  int nb_deltas = 0;
+  bool delta_used = false;
+  std::set<std::vector<pixel_type>>
+      candidate_palette;  // ordered lexicographically
+  std::vector<std::vector<pixel_type>> candidate_palette_imageorder;
+  std::vector<pixel_type> color(nb);
+  std::vector<float> color_with_error(nb);
+  std::vector<const pixel_type *> p_in(nb);
+
+  if (lossy) {
+    // Count color frequency for colors that make a cross.
+    std::map<std::vector<pixel_type>, size_t> color_freq_map;
+    for (size_t y = 1; y + 1 < h; y++) {
+      for (uint32_t c = 0; c < nb; c++) {
+        p_in[c] = input.channel[begin_c + c].Row(y);
+      }
+      for (size_t x = 1; x + 1 < w; x++) {
+        for (uint32_t c = 0; c < nb; c++) {
+          color[c] = p_in[c][x];
+        }
+        int offsets[4][2] = {{1, 0}, {-1, 0}, {0, 1}, {0, -1}};
+        bool makes_cross = true;
+        for (int i = 0; i < 4 && makes_cross; ++i) {
+          int dx = offsets[i][0];
+          int dy = offsets[i][1];
+          for (uint32_t c = 0; c < nb && makes_cross; c++) {
+            if (input.channel[begin_c + c].Row(y + dy)[x + dx] != color[c]) {
+              makes_cross = false;
+            }
+          }
+        }
+        if (makes_cross) color_freq_map[color] += 1;
+      }
+    }
+    // Add colors satisfying frequency condition to the palette.
+    constexpr float kImageFraction = 0.01f;
+    size_t color_frequency_lower_bound = 5 + input.h * input.w * kImageFraction;
+    for (const auto &color_freq : color_freq_map) {
+      if (color_freq.second > color_frequency_lower_bound) {
+        candidate_palette.insert(color_freq.first);
+        candidate_palette_imageorder.push_back(color_freq.first);
+      }
+    }
+  }
+
+  for (size_t y = 0; y < h; y++) {
+    for (uint32_t c = 0; c < nb; c++) {
+      p_in[c] = input.channel[begin_c + c].Row(y);
+    }
+    for (size_t x = 0; x < w; x++) {
+      if (lossy && candidate_palette.size() >= nb_colors) break;
+      for (uint32_t c = 0; c < nb; c++) {
+        color[c] = p_in[c][x];
+      }
+      const bool new_color = candidate_palette.insert(color).second;
+      if (new_color) {
+        candidate_palette_imageorder.push_back(color);
+      }
+      if (candidate_palette.size() > nb_colors) {
+        return false;  // too many colors
+      }
+    }
+  }
+
+  nb_colors = candidate_palette.size();
+  JXL_DEBUG_V(6, "Channels %i-%i can be represented using a %i-color palette.",
+              begin_c, end_c, nb_colors);
+
+  Channel pch(nb_colors, nb);
+  pch.hshift = -1;
+  int x = 0;
+  pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+  intptr_t onerow = pch.plane.PixelsPerRow();
+  intptr_t onerow_image = input.channel[begin_c].plane.PixelsPerRow();
+  const int bit_depth = input.bitdepth;
+  if (ordered) {
+    JXL_DEBUG_V(7, "Palette of %i colors, using lexicographic order",
+                nb_colors);
+    for (auto pcol : candidate_palette) {
+      JXL_DEBUG_V(9, "  Color %i :  ", x);
+      for (size_t i = 0; i < nb; i++) {
+        p_palette[i * onerow + x] = pcol[i];
+      }
+      for (size_t i = 0; i < nb; i++) {
+        JXL_DEBUG_V(9, "%i ", pcol[i]);
+      }
+      x++;
+    }
+  } else {
+    JXL_DEBUG_V(7, "Palette of %i colors, using image order", nb_colors);
+    for (auto pcol : candidate_palette_imageorder) {
+      JXL_DEBUG_V(9, "  Color %i :  ", x);
+      for (size_t i = 0; i < nb; i++) p_palette[i * onerow + x] = pcol[i];
+      for (size_t i = 0; i < nb; i++) JXL_DEBUG_V(9, "%i ", pcol[i]);
+      x++;
+    }
+  }
+  std::vector<weighted::State> wp_states;
+  for (size_t c = 0; c < nb; c++) {
+    wp_states.emplace_back(wp_header, w, h);
+  }
+  std::vector<pixel_type *> p_quant(nb);
+  // Three rows of error for dithering: y to y + 2.
+  // Each row has two pixels of padding in the ends, which is
+  // beneficial for both precision and encoding speed.
+  std::vector<std::vector<float>> error_row[3];
+  if (lossy) {
+    for (int i = 0; i < 3; ++i) {
+      error_row[i].resize(nb);
+      for (size_t c = 0; c < nb; ++c) {
+        error_row[i][c].resize(w + 4);
+      }
+    }
+  }
+  for (size_t y = 0; y < h; y++) {
+    for (size_t c = 0; c < nb; c++) {
+      p_in[c] = input.channel[begin_c + c].Row(y);
+      if (lossy) p_quant[c] = quantized_input.channel[c].Row(y);
+    }
+    pixel_type *JXL_RESTRICT p = input.channel[begin_c].Row(y);
+    for (size_t x = 0; x < w; x++) {
+      int index;
+      if (!lossy) {
+        for (size_t c = 0; c < nb; c++) color[c] = p_in[c][x];
+        // Exact search.
+        for (index = 0; static_cast<uint32_t>(index) < nb_colors; index++) {
+          bool found = true;
+          for (size_t c = 0; c < nb; c++) {
+            if (color[c] != p_palette[c * onerow + index]) {
+              found = false;
+              break;
+            }
+          }
+          if (found) break;
+        }
+        if (index < nb_deltas) {
+          delta_used = true;
+        }
+      } else {
+        for (size_t c = 0; c < nb; c++) {
+          color_with_error[c] = p_in[c][x] + error_row[0][c][x + 2];
+          color[c] = Clamp1(lroundf(color_with_error[c]), 0l,
+                            (1l << input.bitdepth) - 1);
+        }
+        float best_distance = std::numeric_limits<float>::infinity();
+        int best_index = 0;
+        bool best_is_delta = false;
+        std::vector<pixel_type> best_val(nb, 0);
+        std::vector<pixel_type> quantized_val(nb);
+        std::vector<pixel_type> predictions(nb);
+        for (size_t c = 0; c < nb; ++c) {
+          predictions[c] = PredictNoTreeWP(w, p_quant[c] + x, onerow_image, x,
+                                           y, predictor, &wp_states[c])
+                               .guess;
+        }
+        const auto TryIndex = [&](const int index) {
+          for (size_t c = 0; c < nb; c++) {
+            quantized_val[c] = palette_internal::GetPaletteValue(
+                p_palette, index, /*c=*/c,
+                /*palette_size=*/nb_colors,
+                /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+            if (index < nb_deltas) {
+              quantized_val[c] += predictions[c];
+            }
+          }
+          const float color_distance =
+              32 *
+              palette_internal::ColorDistance(color_with_error, quantized_val);
+          float index_penalty = 0;
+          if (index == -1) {
+            index_penalty = -124;
+          } else if (index < static_cast<int>(nb_colors)) {
+            index_penalty = 2 * std::abs(index);
+          } else if (index < static_cast<int>(nb_colors) +
+                                 palette_internal::kLargeCubeOffset) {
+            index_penalty = 70;
+          } else {
+            index_penalty = 256;
+          }
+          index_penalty *= 1LL << std::max(2 * (bit_depth - 8), 0);
+          const float distance = color_distance + index_penalty;
+          if (distance < best_distance) {
+            best_distance = distance;
+            best_index = index;
+            best_is_delta = index < nb_deltas;
+            best_val.swap(quantized_val);
+          }
+        };
+        for (index = palette_internal::kMinImplicitPaletteIndex;
+             index < static_cast<int32_t>(nb_colors); index++) {
+          TryIndex(index);
+        }
+        TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex(
+            color, nb_colors, bit_depth,
+            /*high_quality=*/false));
+        if (palette_internal::kEncodeToHighQualityImplicitPalette) {
+          TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex(
+              color, nb_colors, bit_depth,
+              /*high_quality=*/true));
+        }
+        index = best_index;
+        delta_used |= best_is_delta;
+        for (size_t c = 0; c < nb; ++c) {
+          wp_states[c].UpdateErrors(best_val[c], x, y, w);
+          p_quant[c][x] = best_val[c];
+        }
+        float len_error = 0;
+        for (size_t c = 0; c < nb; ++c) {
+          float local_error = color_with_error[c] - best_val[c];
+          len_error += local_error * local_error;
+        }
+        len_error = sqrt(len_error);
+        float modulate = 1.0;
+        int len_limit = 38 << std::max(0, bit_depth - 8);
+        if (len_error > len_limit) {
+          modulate *= len_limit / len_error;
+        }
+        for (size_t c = 0; c < nb; ++c) {
+          float local_error = (color_with_error[c] - best_val[c]);
+          float total_error = 0.65 * local_error;
+
+          // If the neighboring pixels have some error in the opposite
+          // direction of total_error, cancel some or all of it out before
+          // spreading among them.
+          constexpr int offsets[12][2] = {{1, 2}, {0, 3}, {0, 4}, {1, 1},
+                                          {1, 3}, {2, 2}, {1, 0}, {1, 4},
+                                          {2, 1}, {2, 3}, {2, 0}, {2, 4}};
+          float total_available = 0;
+          int n = 0;
+          for (int i = 0; i < 11; ++i) {
+            const int row = offsets[i][0];
+            const int col = offsets[i][1];
+            if (std::signbit(error_row[row][c][x + col]) !=
+                std::signbit(total_error)) {
+              total_available += error_row[row][c][x + col];
+              n++;
+            }
+          }
+          float weight =
+              std::abs(total_error) / (std::abs(total_available) + 1e-3);
+          weight = std::min(weight, 1.0f);
+          for (int i = 0; i < 11; ++i) {
+            const int row = offsets[i][0];
+            const int col = offsets[i][1];
+            if (std::signbit(error_row[row][c][x + col]) !=
+                std::signbit(total_error)) {
+              total_error += weight * error_row[row][c][x + col];
+              error_row[row][c][x + col] *= (1 - weight);
+            }
+          }
+          total_error *= modulate;
+          const float remaining_error = (1.0f / 14.) * total_error;
+          error_row[0][c][x + 3] += 2 * remaining_error;
+          error_row[0][c][x + 4] += remaining_error;
+          error_row[1][c][x + 0] += remaining_error;
+          for (int i = 0; i < 5; ++i) {
+            error_row[1][c][x + i] += remaining_error;
+            error_row[2][c][x + i] += remaining_error;
+          }
+        }
+      }
+      p[x] = index;
+    }
+    if (lossy) {
+      for (size_t c = 0; c < nb; ++c) {
+        error_row[0][c].swap(error_row[1][c]);
+        error_row[1][c].swap(error_row[2][c]);
+        std::fill(error_row[2][c].begin(), error_row[2][c].end(), 0.f);
+      }
+    }
+  }
+  if (!delta_used) {
+    predictor = Predictor::Zero;
+  }
+  input.nb_meta_channels++;
+  input.channel.erase(input.channel.begin() + begin_c + 1,
+                      input.channel.begin() + end_c + 1);
+  input.channel.insert(input.channel.begin(), std::move(pch));
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.h
new file mode 100644
index 0000000000..3a0dbd97dc
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_palette.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                  uint32_t &nb_colors, bool ordered, bool lossy,
+                  Predictor &predictor, const weighted::Header &wp_header);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc
new file mode 100644
index 0000000000..81ba7e6433
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.cc
@@ -0,0 +1,71 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_rct.h"
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"  // CheckEqualChannels
+
+namespace jxl {
+
+Status FwdRCT(Image& input, size_t begin_c, size_t rct_type) {
+  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
+  if (rct_type == 0) {  // noop
+    return false;
+  }
+  // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
+  int permutation = rct_type / 7;
+  // 0-5 values have the low bit corresponding to Third and the high bits
+  // corresponding to Second. 6 corresponds to YCoCg.
+  //
+  // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
+  //
+  // Third: 0=nop, 1=SubtractFirst
+  int custom = rct_type % 7;
+  size_t m = begin_c;
+  size_t w = input.channel[m + 0].w;
+  size_t h = input.channel[m + 0].h;
+  int second = (custom % 7) >> 1;
+  int third = (custom % 7) & 1;
+  for (size_t y = 0; y < h; y++) {
+    const pixel_type* in0 = input.channel[m + (permutation % 3)].Row(y);
+    const pixel_type* in1 =
+        input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
+    const pixel_type* in2 =
+        input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
+    pixel_type* out0 = input.channel[m].Row(y);
+    pixel_type* out1 = input.channel[m + 1].Row(y);
+    pixel_type* out2 = input.channel[m + 2].Row(y);
+    for (size_t x = 0; x < w; x++) {
+      if (custom == 6) {
+        pixel_type R = in0[x];
+        pixel_type G = in1[x];
+        pixel_type B = in2[x];
+        out1[x] = R - B;
+        pixel_type tmp = B + (out1[x] >> 1);
+        out2[x] = G - tmp;
+        out0[x] = tmp + (out2[x] >> 1);
+      } else {
+        pixel_type First = in0[x];
+        pixel_type Second = in1[x];
+        pixel_type Third = in2[x];
+        if (second == 1) {
+          Second = Second - First;
+        } else if (second == 2) {
+          Second = Second - ((First + Third) >> 1);
+        }
+        if (third) Third = Third - First;
+        out0[x] = First;
+        out1[x] = Second;
+        out2[x] = Third;
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.h
new file mode 100644
index 0000000000..8a412393d4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_rct.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
+
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+Status FwdRCT(Image &input, size_t begin_c, size_t rct_type);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc
new file mode 100644
index 0000000000..7a3219e677
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.cc
@@ -0,0 +1,140 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_squeeze.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/squeeze.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+void FwdHSqueeze(Image &input, int c, int rc) {
+  const Channel &chin = input.channel[c];
+
+  JXL_DEBUG_V(4, "Doing horizontal squeeze of channel %i to new channel %i", c,
+              rc);
+
+  Channel chout((chin.w + 1) / 2, chin.h, chin.hshift + 1, chin.vshift);
+  Channel chout_residual(chin.w - chout.w, chout.h, chin.hshift + 1,
+                         chin.vshift);
+
+  for (size_t y = 0; y < chout.h; y++) {
+    const pixel_type *JXL_RESTRICT p_in = chin.Row(y);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+    pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y);
+    for (size_t x = 0; x < chout_residual.w; x++) {
+      pixel_type A = p_in[x * 2];
+      pixel_type B = p_in[x * 2 + 1];
+      pixel_type avg = (A + B + (A > B)) >> 1;
+      p_out[x] = avg;
+
+      pixel_type diff = A - B;
+
+      pixel_type next_avg = avg;
+      if (x + 1 < chout_residual.w) {
+        next_avg = (p_in[x * 2 + 2] + p_in[x * 2 + 3] +
+                    (p_in[x * 2 + 2] > p_in[x * 2 + 3])) >>
+                   1;  // which will be chout.value(y,x+1)
+      } else if (chin.w & 1)
+        next_avg = p_in[x * 2 + 2];
+      pixel_type left = (x > 0 ? p_in[x * 2 - 1] : avg);
+      pixel_type tendency = SmoothTendency(left, avg, next_avg);
+
+      p_res[x] = diff - tendency;
+    }
+    if (chin.w & 1) {
+      int x = chout.w - 1;
+      p_out[x] = p_in[x * 2];
+    }
+  }
+  input.channel[c] = std::move(chout);
+  input.channel.insert(input.channel.begin() + rc, std::move(chout_residual));
+}
+
+void FwdVSqueeze(Image &input, int c, int rc) {
+  const Channel &chin = input.channel[c];
+
+  JXL_DEBUG_V(4, "Doing vertical squeeze of channel %i to new channel %i", c,
+              rc);
+
+  Channel chout(chin.w, (chin.h + 1) / 2, chin.hshift, chin.vshift + 1);
+  Channel chout_residual(chin.w, chin.h - chout.h, chin.hshift,
+                         chin.vshift + 1);
+  intptr_t onerow_in = chin.plane.PixelsPerRow();
+  for (size_t y = 0; y < chout_residual.h; y++) {
+    const pixel_type *JXL_RESTRICT p_in = chin.Row(y * 2);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+    pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y);
+    for (size_t x = 0; x < chout.w; x++) {
+      pixel_type A = p_in[x];
+      pixel_type B = p_in[x + onerow_in];
+      pixel_type avg = (A + B + (A > B)) >> 1;
+      p_out[x] = avg;
+
+      pixel_type diff = A - B;
+
+      pixel_type next_avg = avg;
+      if (y + 1 < chout_residual.h) {
+        next_avg = (p_in[x + 2 * onerow_in] + p_in[x + 3 * onerow_in] +
+                    (p_in[x + 2 * onerow_in] > p_in[x + 3 * onerow_in])) >>
+                   1;  // which will be chout.value(y+1,x)
+      } else if (chin.h & 1) {
+        next_avg = p_in[x + 2 * onerow_in];
+      }
+      pixel_type top =
+          (y > 0 ? p_in[static_cast<ssize_t>(x) - onerow_in] : avg);
+      pixel_type tendency = SmoothTendency(top, avg, next_avg);
+
+      p_res[x] = diff - tendency;
+    }
+  }
+  if (chin.h & 1) {
+    size_t y = chout.h - 1;
+    const pixel_type *p_in = chin.Row(y * 2);
+    pixel_type *p_out = chout.Row(y);
+    for (size_t x = 0; x < chout.w; x++) {
+      p_out[x] = p_in[x];
+    }
+  }
+  input.channel[c] = std::move(chout);
+  input.channel.insert(input.channel.begin() + rc, std::move(chout_residual));
+}
+
+Status FwdSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool) {
+  if (parameters.empty()) {
+    DefaultSqueezeParameters(&parameters, input);
+  }
+
+  for (size_t i = 0; i < parameters.size(); i++) {
+    JXL_RETURN_IF_ERROR(
+        CheckMetaSqueezeParams(parameters[i], input.channel.size()));
+    bool horizontal = parameters[i].horizontal;
+    bool in_place = parameters[i].in_place;
+    uint32_t beginc = parameters[i].begin_c;
+    uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1;
+    uint32_t offset;
+    if (in_place) {
+      offset = endc + 1;
+    } else {
+      offset = input.channel.size();
+    }
+    for (uint32_t c = beginc; c <= endc; c++) {
+      if (horizontal) {
+        FwdHSqueeze(input, c, offset + c - beginc);
+      } else {
+        FwdVSqueeze(input, c, offset + c - beginc);
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.h
new file mode 100644
index 0000000000..39b001017b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_squeeze.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Status FwdSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc
new file mode 100644
index 0000000000..2d7c2949e3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.cc
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_transform.h"
+
+#include "lib/jxl/modular/transform/enc_palette.h"
+#include "lib/jxl/modular/transform/enc_rct.h"
+#include "lib/jxl/modular/transform/enc_squeeze.h"
+
+namespace jxl {
+
+Status TransformForward(Transform &t, Image &input,
+                        const weighted::Header &wp_header, ThreadPool *pool) {
+  switch (t.id) {
+    case TransformId::kRCT:
+      return FwdRCT(input, t.begin_c, t.rct_type);
+    case TransformId::kSqueeze:
+      return FwdSqueeze(input, t.squeezes, pool);
+    case TransformId::kPalette:
+      return FwdPalette(input, t.begin_c, t.begin_c + t.num_c - 1, t.nb_colors,
+                        t.ordered_palette, t.lossy_palette, t.predictor,
+                        wp_header);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(t.id));
+  }
+}
+
+void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max) {
+  pixel_type realmin = std::numeric_limits<pixel_type>::max();
+  pixel_type realmax = std::numeric_limits<pixel_type>::min();
+  for (size_t y = 0; y < ch.h; y++) {
+    const pixel_type *JXL_RESTRICT p = ch.Row(y);
+    for (size_t x = 0; x < ch.w; x++) {
+      if (p[x] < realmin) realmin = p[x];
+      if (p[x] > realmax) realmax = p[x];
+    }
+  }
+
+  if (min) *min = realmin;
+  if (max) *max = realmax;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.h
new file mode 100644
index 0000000000..07659e1b0a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/enc_transform.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Status TransformForward(Transform &t, Image &input,
+                        const weighted::Header &wp_header, ThreadPool *pool);
+
+void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc
new file mode 100644
index 0000000000..e63013a38c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/jxl_transform.cc
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/transform.h"
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/palette.h"
+#include "lib/jxl/modular/transform/rct.h"
+#include "lib/jxl/modular/transform/squeeze.h"
+
+namespace jxl {
+
+SqueezeParams::SqueezeParams() { Bundle::Init(this); }
+Transform::Transform(TransformId id) {
+  Bundle::Init(this);
+  this->id = id;
+}
+
+Status Transform::Inverse(Image &input, const weighted::Header &wp_header,
+                          ThreadPool *pool) {
+  switch (id) {
+    case TransformId::kRCT:
+      return InvRCT(input, begin_c, rct_type);
+    case TransformId::kSqueeze:
+      return InvSqueeze(input, squeezes, pool);
+    case TransformId::kPalette:
+      return InvPalette(input, begin_c, nb_colors, nb_deltas, predictor,
+                        wp_header, pool);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(id));
+  }
+}
+
+Status Transform::MetaApply(Image &input) {
+  switch (id) {
+    case TransformId::kRCT:
+      JXL_DEBUG_V(2, "Transform: kRCT, rct_type=%" PRIu32, rct_type);
+      return CheckEqualChannels(input, begin_c, begin_c + 2);
+    case TransformId::kSqueeze:
+      JXL_DEBUG_V(2, "Transform: kSqueeze:");
+#if JXL_DEBUG_V_LEVEL >= 2
+      {
+        auto squeezes_copy = squeezes;
+        if (squeezes_copy.empty()) {
+          DefaultSqueezeParameters(&squeezes_copy, input);
+        }
+        for (const auto &params : squeezes_copy) {
+          JXL_DEBUG_V(
+              2,
+              "  squeeze params: horizontal=%d, in_place=%d, begin_c=%" PRIu32
+              ", num_c=%" PRIu32,
+              params.horizontal, params.in_place, params.begin_c, params.num_c);
+        }
+      }
+#endif
+      return MetaSqueeze(input, &squeezes);
+    case TransformId::kPalette:
+      JXL_DEBUG_V(2,
+                  "Transform: kPalette, begin_c=%" PRIu32 ", num_c=%" PRIu32
+                  ", nb_colors=%" PRIu32 ", nb_deltas=%" PRIu32,
+                  begin_c, num_c, nb_colors, nb_deltas);
+      return MetaPalette(input, begin_c, begin_c + num_c - 1, nb_colors,
+                         nb_deltas, lossy_palette);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(id));
+  }
+}
+
+Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2) {
+  if (c1 > image.channel.size() || c2 >= image.channel.size() || c2 < c1) {
+    return JXL_FAILURE("Invalid channel range");
+  }
+  if (c1 < image.nb_meta_channels && c2 >= image.nb_meta_channels) {
+    return JXL_FAILURE("Invalid: transforming mix of meta and nonmeta");
+  }
+  const auto &ch1 = image.channel[c1];
+  for (size_t c = c1 + 1; c <= c2; c++) {
+    const auto &ch2 = image.channel[c];
+    if (ch1.w != ch2.w || ch1.h != ch2.h || ch1.hshift != ch2.hshift ||
+        ch1.vshift != ch2.vshift) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/palette.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/palette.h
new file mode 100644
index 0000000000..da5423afae
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/palette.h
@@ -0,0 +1,311 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"  // CheckEqualChannels
+
+namespace jxl {
+
+namespace palette_internal {
+
+static constexpr int kMaxPaletteLookupTableSize = 1 << 16;
+
+static constexpr int kCubePow = 3;
+
+// 5x5x5 color cube for the larger cube.
+static constexpr int kLargeCube = 5;
+
+// Smaller interleaved color cube to fill the holes of the larger cube.
+static constexpr int kSmallCube = kLargeCube - 1;
+// kSmallCube ** kCubePow
+static constexpr int kLargeCubeOffset = kSmallCube * kSmallCube * kSmallCube;
+
+static constexpr pixel_type Scale(int value, int bit_depth, int denom) {
+  return (value * ((static_cast<pixel_type_w>(1) << bit_depth) - 1)) / denom;
+}
+
+// The purpose of this function is solely to extend the interpretation of
+// palette indices to implicit values. If index < nb_deltas, indicating that the
+// result is a delta palette entry, it is the responsibility of the caller to
+// treat it as such.
+static pixel_type GetPaletteValue(const pixel_type *const palette, int index,
+                                  const size_t c, const int palette_size,
+                                  const int onerow, const int bit_depth) {
+  if (index < 0) {
+    static constexpr std::array<std::array<pixel_type, 3>, 72> kDeltaPalette = {
+        {
+            {0, 0, 0},       {4, 4, 4},       {11, 0, 0},      {0, 0, -13},
+            {0, -12, 0},     {-10, -10, -10}, {-18, -18, -18}, {-27, -27, -27},
+            {-18, -18, 0},   {0, 0, -32},     {-32, 0, 0},     {-37, -37, -37},
+            {0, -32, -32},   {24, 24, 45},    {50, 50, 50},    {-45, -24, -24},
+            {-24, -45, -45}, {0, -24, -24},   {-34, -34, 0},   {-24, 0, -24},
+            {-45, -45, -24}, {64, 64, 64},    {-32, 0, -32},   {0, -32, 0},
+            {-32, 0, 32},    {-24, -45, -24}, {45, 24, 45},    {24, -24, -45},
+            {-45, -24, 24},  {80, 80, 80},    {64, 0, 0},      {0, 0, -64},
+            {0, -64, -64},   {-24, -24, 45},  {96, 96, 96},    {64, 64, 0},
+            {45, -24, -24},  {34, -34, 0},    {112, 112, 112}, {24, -45, -45},
+            {45, 45, -24},   {0, -32, 32},    {24, -24, 45},   {0, 96, 96},
+            {45, -24, 24},   {24, -45, -24},  {-24, -45, 24},  {0, -64, 0},
+            {96, 0, 0},      {128, 128, 128}, {64, 0, 64},     {144, 144, 144},
+            {96, 96, 0},     {-36, -36, 36},  {45, -24, -45},  {45, -45, -24},
+            {0, 0, -96},     {0, 128, 128},   {0, 96, 0},      {45, 24, -45},
+            {-128, 0, 0},    {24, -45, 24},   {-45, 24, -45},  {64, 0, -64},
+            {64, -64, -64},  {96, 0, 96},     {45, -45, 24},   {24, 45, -45},
+            {64, 64, -64},   {128, 128, 0},   {0, 0, -128},    {-24, 45, -45},
+        }};
+    if (c >= kDeltaPalette[0].size()) {
+      return 0;
+    }
+    // Do not open the brackets, otherwise INT32_MIN negation could overflow.
+    index = -(index + 1);
+    index %= 1 + 2 * (kDeltaPalette.size() - 1);
+    static constexpr int kMultiplier[] = {-1, 1};
+    pixel_type result =
+        kDeltaPalette[((index + 1) >> 1)][c] * kMultiplier[index & 1];
+    if (bit_depth > 8) {
+      result *= static_cast<pixel_type>(1) << (bit_depth - 8);
+    }
+    return result;
+  } else if (palette_size <= index && index < palette_size + kLargeCubeOffset) {
+    if (c >= kCubePow) return 0;
+    index -= palette_size;
+    if (c > 0) {
+      int divisor = kSmallCube;
+      for (size_t i = 1; i < c; ++i) {
+        divisor *= kSmallCube;
+      }
+      index /= divisor;
+    }
+    return Scale(index % kSmallCube, bit_depth, kSmallCube) +
+           (1 << (std::max(0, bit_depth - 3)));
+  } else if (palette_size + kLargeCubeOffset <= index) {
+    if (c >= kCubePow) return 0;
+    index -= palette_size + kLargeCubeOffset;
+    // TODO(eustas): should we take care of ambiguity created by
+    //               index >= kLargeCube ** 3 ?
+    if (c > 0) {
+      int divisor = kLargeCube;
+      for (size_t i = 1; i < c; ++i) {
+        divisor *= kLargeCube;
+      }
+      index /= divisor;
+    }
+    return Scale(index % kLargeCube, bit_depth, kLargeCube - 1);
+  }
+
+  return palette[c * onerow + static_cast<size_t>(index)];
+}
+
+}  // namespace palette_internal
+
+static Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors,
+                         uint32_t nb_deltas, Predictor predictor,
+                         const weighted::Header &wp_header, ThreadPool *pool) {
+  if (input.nb_meta_channels < 1) {
+    return JXL_FAILURE("Error: Palette transform without palette.");
+  }
+  std::atomic<int> num_errors{0};
+  int nb = input.channel[0].h;
+  uint32_t c0 = begin_c + 1;
+  if (c0 >= input.channel.size()) {
+    return JXL_FAILURE("Channel is out of range.");
+  }
+  size_t w = input.channel[c0].w;
+  size_t h = input.channel[c0].h;
+  if (nb < 1) return JXL_FAILURE("Corrupted transforms");
+  for (int i = 1; i < nb; i++) {
+    input.channel.insert(
+        input.channel.begin() + c0 + 1,
+        Channel(w, h, input.channel[c0].hshift, input.channel[c0].vshift));
+  }
+  const Channel &palette = input.channel[0];
+  const pixel_type *JXL_RESTRICT p_palette = input.channel[0].Row(0);
+  intptr_t onerow = input.channel[0].plane.PixelsPerRow();
+  intptr_t onerow_image = input.channel[c0].plane.PixelsPerRow();
+  const int bit_depth = input.bitdepth;
+
+  if (w == 0) {
+    // Nothing to do.
+    // Avoid touching "empty" channels with non-zero height.
+  } else if (nb_deltas == 0 && predictor == Predictor::Zero) {
+    if (nb == 1) {
+      RunOnPool(
+          pool, 0, h, ThreadPool::SkipInit(),
+          [&](const int task, const int thread) {
+            const size_t y = task;
+            pixel_type *p = input.channel[c0].Row(y);
+            for (size_t x = 0; x < w; x++) {
+              const int index = Clamp1(p[x], 0, (pixel_type)palette.w - 1);
+              p[x] = palette_internal::GetPaletteValue(
+                  p_palette, index, /*c=*/0,
+                  /*palette_size=*/palette.w,
+                  /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+            }
+          },
+          "UndoChannelPalette");
+    } else {
+      RunOnPool(
+          pool, 0, h, ThreadPool::SkipInit(),
+          [&](const int task, const int thread) {
+            const size_t y = task;
+            std::vector<pixel_type *> p_out(nb);
+            const pixel_type *p_index = input.channel[c0].Row(y);
+            for (int c = 0; c < nb; c++)
+              p_out[c] = input.channel[c0 + c].Row(y);
+            for (size_t x = 0; x < w; x++) {
+              const int index = p_index[x];
+              for (int c = 0; c < nb; c++) {
+                p_out[c][x] = palette_internal::GetPaletteValue(
+                    p_palette, index, /*c=*/c,
+                    /*palette_size=*/palette.w,
+                    /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+              }
+            }
+          },
+          "UndoPalette");
+    }
+  } else {
+    // Parallelized per channel.
+    ImageI indices = CopyImage(input.channel[c0].plane);
+    if (predictor == Predictor::Weighted) {
+      RunOnPool(
+          pool, 0, nb, ThreadPool::SkipInit(),
+          [&](size_t c, size_t _) {
+            Channel &channel = input.channel[c0 + c];
+            weighted::State wp_state(wp_header, channel.w, channel.h);
+            for (size_t y = 0; y < channel.h; y++) {
+              pixel_type *JXL_RESTRICT p = channel.Row(y);
+              const pixel_type *JXL_RESTRICT idx = indices.Row(y);
+              for (size_t x = 0; x < channel.w; x++) {
+                int index = idx[x];
+                pixel_type_w val = 0;
+                const pixel_type palette_entry =
+                    palette_internal::GetPaletteValue(
+                        p_palette, index, /*c=*/c,
+                        /*palette_size=*/palette.w, /*onerow=*/onerow,
+                        /*bit_depth=*/bit_depth);
+                if (index < static_cast<int32_t>(nb_deltas)) {
+                  PredictionResult pred =
+                      PredictNoTreeWP(channel.w, p + x, onerow_image, x, y,
+                                      predictor, &wp_state);
+                  val = pred.guess + palette_entry;
+                } else {
+                  val = palette_entry;
+                }
+                p[x] = val;
+                wp_state.UpdateErrors(p[x], x, y, channel.w);
+              }
+            }
+          },
+          "UndoDeltaPaletteWP");
+    } else if (predictor == Predictor::Gradient) {
+      // Gradient is the most common predictor for now. This special case gives
+      // about 20% extra speed.
+      RunOnPool(
+          pool, 0, nb, ThreadPool::SkipInit(),
+          [&](size_t c, size_t _) {
+            Channel &channel = input.channel[c0 + c];
+            for (size_t y = 0; y < channel.h; y++) {
+              pixel_type *JXL_RESTRICT p = channel.Row(y);
+              const pixel_type *JXL_RESTRICT idx = indices.Row(y);
+              for (size_t x = 0; x < channel.w; x++) {
+                int index = idx[x];
+                pixel_type val = 0;
+                const pixel_type palette_entry =
+                    palette_internal::GetPaletteValue(
+                        p_palette, index, /*c=*/c,
+                        /*palette_size=*/palette.w,
+                        /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+                if (index < static_cast<int32_t>(nb_deltas)) {
+                  pixel_type left =
+                      x ? p[x - 1] : (y ? *(p + x - onerow_image) : 0);
+                  pixel_type top = y ? *(p + x - onerow_image) : left;
+                  pixel_type topleft =
+                      x && y ? *(p + x - 1 - onerow_image) : left;
+                  val = PixelAdd(ClampedGradient(left, top, topleft),
+                                 palette_entry);
+                } else {
+                  val = palette_entry;
+                }
+                p[x] = val;
+              }
+            }
+          },
+          "UndoDeltaPaletteGradient");
+    } else {
+      RunOnPool(
+          pool, 0, nb, ThreadPool::SkipInit(),
+          [&](size_t c, size_t _) {
+            Channel &channel = input.channel[c0 + c];
+            for (size_t y = 0; y < channel.h; y++) {
+              pixel_type *JXL_RESTRICT p = channel.Row(y);
+              const pixel_type *JXL_RESTRICT idx = indices.Row(y);
+              for (size_t x = 0; x < channel.w; x++) {
+                int index = idx[x];
+                pixel_type_w val = 0;
+                const pixel_type palette_entry =
+                    palette_internal::GetPaletteValue(
+                        p_palette, index, /*c=*/c,
+                        /*palette_size=*/palette.w,
+                        /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+                if (index < static_cast<int32_t>(nb_deltas)) {
+                  PredictionResult pred = PredictNoTreeNoWP(
+                      channel.w, p + x, onerow_image, x, y, predictor);
+                  val = pred.guess + palette_entry;
+                } else {
+                  val = palette_entry;
+                }
+                p[x] = val;
+              }
+            }
+          },
+          "UndoDeltaPaletteNoWP");
+    }
+  }
+  if (c0 >= input.nb_meta_channels) {
+    // Palette was done on normal channels
+    input.nb_meta_channels--;
+  } else {
+    // Palette was done on metachannels
+    JXL_ASSERT(static_cast<int>(input.nb_meta_channels) >= 2 - nb);
+    input.nb_meta_channels -= 2 - nb;
+    JXL_ASSERT(begin_c + nb - 1 < input.nb_meta_channels);
+  }
+  input.channel.erase(input.channel.begin(), input.channel.begin() + 1);
+  return num_errors.load(std::memory_order_relaxed) == 0;
+}
+
+static Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                          uint32_t nb_colors, uint32_t nb_deltas, bool lossy) {
+  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c));
+
+  size_t nb = end_c - begin_c + 1;
+  if (begin_c >= input.nb_meta_channels) {
+    // Palette was done on normal channels
+    input.nb_meta_channels++;
+  } else {
+    // Palette was done on metachannels
+    JXL_ASSERT(end_c < input.nb_meta_channels);
+    // we remove nb-1 metachannels and add one
+    input.nb_meta_channels += 2 - nb;
+  }
+  input.channel.erase(input.channel.begin() + begin_c + 1,
+                      input.channel.begin() + end_c + 1);
+  Channel pch(nb_colors + nb_deltas, nb);
+  pch.hshift = -1;
+  input.channel.insert(input.channel.begin(), std::move(pch));
+  return true;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/rct.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/rct.h
new file mode 100644
index 0000000000..e6434de1d2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/rct.h
@@ -0,0 +1,103 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_RCT_H_
+#define LIB_JXL_MODULAR_TRANSFORM_RCT_H_
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"  // CheckEqualChannels
+
+namespace jxl {
+
+template <int transform_type>
+void InvRCTRow(const pixel_type* in0, const pixel_type* in1,
+               const pixel_type* in2, pixel_type* out0, pixel_type* out1,
+               pixel_type* out2, size_t w) {
+  static_assert(transform_type >= 0 && transform_type < 7,
+                "Invalid transform type");
+  int second = transform_type >> 1;
+  int third = transform_type & 1;
+  for (size_t x = 0; x < w; x++) {
+    if (transform_type == 6) {
+      pixel_type Y = in0[x];
+      pixel_type Co = in1[x];
+      pixel_type Cg = in2[x];
+      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
+      pixel_type G = PixelAdd(Cg, tmp);
+      pixel_type B = PixelAdd(tmp, -(Co >> 1));
+      pixel_type R = PixelAdd(B, Co);
+      out0[x] = R;
+      out1[x] = G;
+      out2[x] = B;
+    } else {
+      pixel_type First = in0[x];
+      pixel_type Second = in1[x];
+      pixel_type Third = in2[x];
+      if (third) Third = PixelAdd(Third, First);
+      if (second == 1) {
+        Second = PixelAdd(Second, First);
+      } else if (second == 2) {
+        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
+      }
+      out0[x] = First;
+      out1[x] = Second;
+      out2[x] = Third;
+    }
+  }
+}
+
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type) {
+  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
+  size_t m = begin_c;
+  Channel& c0 = input.channel[m + 0];
+  size_t w = c0.w;
+  size_t h = c0.h;
+  if (rct_type == 0) {  // noop
+    return true;
+  }
+  // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
+  int permutation = rct_type / 7;
+  JXL_CHECK(permutation < 6);
+  // 0-5 values have the low bit corresponding to Third and the high bits
+  // corresponding to Second. 6 corresponds to YCoCg.
+  //
+  // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
+  //
+  // Third: 0=nop, 1=SubtractFirst
+  int custom = rct_type % 7;
+  // Special case: permute-only. Swap channels around.
+  if (custom == 0) {
+    Channel ch0 = std::move(input.channel[m]);
+    Channel ch1 = std::move(input.channel[m + 1]);
+    Channel ch2 = std::move(input.channel[m + 2]);
+    input.channel[m + (permutation % 3)] = std::move(ch0);
+    input.channel[m + ((permutation + 1 + permutation / 3) % 3)] =
+        std::move(ch1);
+    input.channel[m + ((permutation + 2 - permutation / 3) % 3)] =
+        std::move(ch2);
+    return true;
+  }
+  constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = {
+      InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>,
+      InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>};
+  for (size_t y = 0; y < h; y++) {
+    const pixel_type* in0 = input.channel[m].Row(y);
+    const pixel_type* in1 = input.channel[m + 1].Row(y);
+    const pixel_type* in2 = input.channel[m + 2].Row(y);
+    pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y);
+    pixel_type* out1 =
+        input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
+    pixel_type* out2 =
+        input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
+    inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w);
+  }
+  return true;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_RCT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc
new file mode 100644
index 0000000000..3edbfc9cd1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.cc
@@ -0,0 +1,329 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/squeeze.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+void InvHSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) {
+  JXL_ASSERT(c < input.channel.size());
+  JXL_ASSERT(rc < input.channel.size());
+  const Channel &chin = input.channel[c];
+  const Channel &chin_residual = input.channel[rc];
+  // These must be valid since we ran MetaApply already.
+  JXL_ASSERT(chin.w == DivCeil(chin.w + chin_residual.w, 2));
+  JXL_ASSERT(chin.h == chin_residual.h);
+
+  if (chin_residual.w == 0) {
+    // Short-circuit: output channel has same dimensions as input.
+    input.channel[c].hshift--;
+    return;
+  }
+
+  // Note: chin.w >= chin_residual.w and at most 1 different.
+  Channel chout(chin.w + chin_residual.w, chin.h, chin.hshift - 1, chin.vshift);
+  JXL_DEBUG_V(4,
+              "Undoing horizontal squeeze of channel %i using residuals in "
+              "channel %i (going from width %zu to %zu)",
+              c, rc, chin.w, chout.w);
+
+  if (chin_residual.h == 0) {
+    // Short-circuit: channel with no pixels.
+    input.channel[c] = std::move(chout);
+    return;
+  }
+
+  RunOnPool(
+      pool, 0, chin.h, ThreadPool::SkipInit(),
+      [&](const int task, const int thread) {
+        const size_t y = task;
+        const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y);
+        const pixel_type *JXL_RESTRICT p_avg = chin.Row(y);
+        pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+
+        // special case for x=0 so we don't have to check x>0
+        pixel_type_w avg = p_avg[0];
+        pixel_type_w next_avg = (1 < chin.w ? p_avg[1] : avg);
+        pixel_type_w tendency = SmoothTendency(avg, avg, next_avg);
+        pixel_type_w diff = p_residual[0] + tendency;
+        pixel_type_w A =
+            ((avg * 2) + diff + (diff > 0 ? -(diff & 1) : (diff & 1))) >> 1;
+        pixel_type_w B = A - diff;
+        p_out[0] = A;
+        p_out[1] = B;
+
+        for (size_t x = 1; x < chin_residual.w; x++) {
+          pixel_type_w diff_minus_tendency = p_residual[x];
+          pixel_type_w avg = p_avg[x];
+          pixel_type_w next_avg = (x + 1 < chin.w ? p_avg[x + 1] : avg);
+          pixel_type_w left = p_out[(x << 1) - 1];
+          pixel_type_w tendency = SmoothTendency(left, avg, next_avg);
+          pixel_type_w diff = diff_minus_tendency + tendency;
+          pixel_type_w A =
+              ((avg * 2) + diff + (diff > 0 ? -(diff & 1) : (diff & 1))) >> 1;
+          p_out[x << 1] = A;
+          pixel_type_w B = A - diff;
+          p_out[(x << 1) + 1] = B;
+        }
+        if (chout.w & 1) p_out[chout.w - 1] = p_avg[chin.w - 1];
+      },
+      "InvHorizontalSqueeze");
+  input.channel[c] = std::move(chout);
+}
+
+void InvVSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) {
+  JXL_ASSERT(c < input.channel.size());
+  JXL_ASSERT(rc < input.channel.size());
+  const Channel &chin = input.channel[c];
+  const Channel &chin_residual = input.channel[rc];
+  // These must be valid since we ran MetaApply already.
+  JXL_ASSERT(chin.h == DivCeil(chin.h + chin_residual.h, 2));
+  JXL_ASSERT(chin.w == chin_residual.w);
+
+  if (chin_residual.h == 0) {
+    // Short-circuit: output channel has same dimensions as input.
+    input.channel[c].vshift--;
+    return;
+  }
+
+  // Note: chin.h >= chin_residual.h and at most 1 different.
+  Channel chout(chin.w, chin.h + chin_residual.h, chin.hshift, chin.vshift - 1);
+  JXL_DEBUG_V(
+      4,
+      "Undoing vertical squeeze of channel %i using residuals in channel "
+      "%i (going from height %zu to %zu)",
+      c, rc, chin.h, chout.h);
+
+  if (chin_residual.w == 0) {
+    // Short-circuit: channel with no pixels.
+    input.channel[c] = std::move(chout);
+    return;
+  }
+
+  intptr_t onerow_in = chin.plane.PixelsPerRow();
+  intptr_t onerow_out = chout.plane.PixelsPerRow();
+  constexpr int kColsPerThread = 64;
+  RunOnPool(
+      pool, 0, DivCeil(chin.w, kColsPerThread), ThreadPool::SkipInit(),
+      [&](const int task, const int thread) {
+        const size_t x0 = task * kColsPerThread;
+        const size_t x1 = std::min((size_t)(task + 1) * kColsPerThread, chin.w);
+        // We only iterate up to std::min(chin_residual.h, chin.h) which is
+        // always chin_residual.h.
+        for (size_t y = 0; y < chin_residual.h; y++) {
+          const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y);
+          const pixel_type *JXL_RESTRICT p_avg = chin.Row(y);
+          pixel_type *JXL_RESTRICT p_out = chout.Row(y << 1);
+          for (size_t x = x0; x < x1; x++) {
+            pixel_type_w diff_minus_tendency = p_residual[x];
+            pixel_type_w avg = p_avg[x];
+
+            pixel_type_w next_avg = avg;
+            if (y + 1 < chin.h) next_avg = p_avg[x + onerow_in];
+            pixel_type_w top =
+                (y > 0 ? p_out[static_cast<ssize_t>(x) - onerow_out] : avg);
+            pixel_type_w tendency = SmoothTendency(top, avg, next_avg);
+            pixel_type_w diff = diff_minus_tendency + tendency;
+            pixel_type_w out =
+                ((avg * 2) + diff + (diff > 0 ? -(diff & 1) : (diff & 1))) >> 1;
+
+            p_out[x] = out;
+            // If the chin_residual.h == chin.h, the output has an even number
+            // of rows so the next line is fine. Otherwise, this loop won't
+            // write to the last output row which is handled separately.
+            p_out[x + onerow_out] = p_out[x] - diff;
+          }
+        }
+      },
+      "InvVertSqueeze");
+
+  if (chout.h & 1) {
+    size_t y = chin.h - 1;
+    const pixel_type *p_avg = chin.Row(y);
+    pixel_type *p_out = chout.Row(y << 1);
+    for (size_t x = 0; x < chin.w; x++) {
+      p_out[x] = p_avg[x];
+    }
+  }
+  input.channel[c] = std::move(chout);
+}
+
+void DefaultSqueezeParameters(std::vector<SqueezeParams> *parameters,
+                              const Image &image) {
+  int nb_channels = image.channel.size() - image.nb_meta_channels;
+
+  parameters->clear();
+  size_t w = image.channel[image.nb_meta_channels].w;
+  size_t h = image.channel[image.nb_meta_channels].h;
+  JXL_DEBUG_V(7, "Default squeeze parameters for %zux%zu image: ", w, h);
+
+  // do horizontal first on wide images; vertical first on tall images
+  bool wide = (w > h);
+
+  if (nb_channels > 2 && image.channel[image.nb_meta_channels + 1].w == w &&
+      image.channel[image.nb_meta_channels + 1].h == h) {
+    // assume channels 1 and 2 are chroma, and can be squeezed first for 4:2:0
+    // previews
+    JXL_DEBUG_V(7, "(4:2:0 chroma), %zux%zu image", w, h);
+    SqueezeParams params;
+    // horizontal chroma squeeze
+    params.horizontal = true;
+    params.in_place = false;
+    params.begin_c = image.nb_meta_channels + 1;
+    params.num_c = 2;
+    parameters->push_back(params);
+    params.horizontal = false;
+    // vertical chroma squeeze
+    parameters->push_back(params);
+  }
+  SqueezeParams params;
+  params.begin_c = image.nb_meta_channels;
+  params.num_c = nb_channels;
+  params.in_place = true;
+
+  if (!wide) {
+    if (h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+      params.horizontal = false;
+      parameters->push_back(params);
+      h = (h + 1) / 2;
+      JXL_DEBUG_V(7, "Vertical (%zux%zu), ", w, h);
+    }
+  }
+  while (w > JXL_MAX_FIRST_PREVIEW_SIZE || h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+    if (w > JXL_MAX_FIRST_PREVIEW_SIZE) {
+      params.horizontal = true;
+      parameters->push_back(params);
+      w = (w + 1) / 2;
+      JXL_DEBUG_V(7, "Horizontal (%zux%zu), ", w, h);
+    }
+    if (h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+      params.horizontal = false;
+      parameters->push_back(params);
+      h = (h + 1) / 2;
+      JXL_DEBUG_V(7, "Vertical (%zux%zu), ", w, h);
+    }
+  }
+  JXL_DEBUG_V(7, "that's it");
+}
+
+Status CheckMetaSqueezeParams(const SqueezeParams &parameter,
+                              int num_channels) {
+  int c1 = parameter.begin_c;
+  int c2 = parameter.begin_c + parameter.num_c - 1;
+  if (c1 < 0 || c1 >= num_channels || c2 < 0 || c2 >= num_channels || c2 < c1) {
+    return JXL_FAILURE("Invalid channel range");
+  }
+  return true;
+}
+
+Status MetaSqueeze(Image &image, std::vector<SqueezeParams> *parameters) {
+  if (parameters->empty()) {
+    DefaultSqueezeParameters(parameters, image);
+  }
+
+  for (size_t i = 0; i < parameters->size(); i++) {
+    JXL_RETURN_IF_ERROR(
+        CheckMetaSqueezeParams((*parameters)[i], image.channel.size()));
+    bool horizontal = (*parameters)[i].horizontal;
+    bool in_place = (*parameters)[i].in_place;
+    uint32_t beginc = (*parameters)[i].begin_c;
+    uint32_t endc = (*parameters)[i].begin_c + (*parameters)[i].num_c - 1;
+
+    uint32_t offset;
+    if (beginc < image.nb_meta_channels) {
+      if (endc >= image.nb_meta_channels) {
+        return JXL_FAILURE("Invalid squeeze: mix of meta and nonmeta channels");
+      }
+      if (!in_place)
+        return JXL_FAILURE(
+            "Invalid squeeze: meta channels require in-place residuals");
+      image.nb_meta_channels += (*parameters)[i].num_c;
+    }
+    if (in_place) {
+      offset = endc + 1;
+    } else {
+      offset = image.channel.size();
+    }
+    for (uint32_t c = beginc; c <= endc; c++) {
+      if (image.channel[c].hshift > 30 || image.channel[c].vshift > 30) {
+        return JXL_FAILURE("Too many squeezes: shift > 30");
+      }
+      size_t w = image.channel[c].w;
+      size_t h = image.channel[c].h;
+      if (horizontal) {
+        image.channel[c].w = (w + 1) / 2;
+        image.channel[c].hshift++;
+        w = w - (w + 1) / 2;
+      } else {
+        image.channel[c].h = (h + 1) / 2;
+        image.channel[c].vshift++;
+        h = h - (h + 1) / 2;
+      }
+      image.channel[c].shrink();
+      Channel dummy(w, h);
+      dummy.hshift = image.channel[c].hshift;
+      dummy.vshift = image.channel[c].vshift;
+
+      image.channel.insert(image.channel.begin() + offset + (c - beginc),
+                           std::move(dummy));
+    }
+  }
+  return true;
+}
+
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool) {
+  if (parameters.empty()) {
+    DefaultSqueezeParameters(&parameters, input);
+  }
+
+  for (int i = parameters.size() - 1; i >= 0; i--) {
+    JXL_RETURN_IF_ERROR(
+        CheckMetaSqueezeParams(parameters[i], input.channel.size()));
+    bool horizontal = parameters[i].horizontal;
+    bool in_place = parameters[i].in_place;
+    uint32_t beginc = parameters[i].begin_c;
+    uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1;
+    uint32_t offset;
+    if (in_place) {
+      offset = endc + 1;
+    } else {
+      offset = input.channel.size() + beginc - endc - 1;
+    }
+    if (beginc < input.nb_meta_channels) {
+      // This is checked in MetaSqueeze.
+      JXL_ASSERT(input.nb_meta_channels > parameters[i].num_c);
+      input.nb_meta_channels -= parameters[i].num_c;
+    }
+
+    for (uint32_t c = beginc; c <= endc; c++) {
+      uint32_t rc = offset + c - beginc;
+      // MetaApply should imply that `rc` is within range, otherwise there's a
+      // programming bug.
+      JXL_ASSERT(rc < input.channel.size());
+      if ((input.channel[c].w < input.channel[rc].w) ||
+          (input.channel[c].h < input.channel[rc].h)) {
+        return JXL_FAILURE("Corrupted squeeze transform");
+      }
+      if (horizontal) {
+        InvHSqueeze(input, c, rc, pool);
+      } else {
+        InvVSqueeze(input, c, rc, pool);
+      }
+    }
+    input.channel.erase(input.channel.begin() + offset,
+                        input.channel.begin() + offset + (endc - beginc + 1));
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.h
new file mode 100644
index 0000000000..a2d3afdc6e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/squeeze.h
@@ -0,0 +1,94 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
+
+// Haar-like transform: halves the resolution in one direction
+// A B   -> (A+B)>>1              in one channel (average)  -> same range as
+// original channel
+//          A-B - tendency        in a new channel ('residual' needed to make
+//          the transform reversible)
+//                                        -> theoretically range could be 2.5
+//                                        times larger (2 times without the
+//                                        'tendency'), but there should be lots
+//                                        of zeroes
+// Repeated application (alternating horizontal and vertical squeezes) results
+// in downscaling
+//
+// The default coefficient ordering is low-frequency to high-frequency, as in
+// M. Antonini, M. Barlaud, P. Mathieu and I. Daubechies, "Image coding using
+// wavelet transform", IEEE Transactions on Image Processing, vol. 1, no. 2, pp.
+// 205-220, April 1992, doi: 10.1109/83.136597.
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+#define JXL_MAX_FIRST_PREVIEW_SIZE 8
+
+namespace jxl {
+
+/*
+        int avg=(A+B)>>1;
+        int diff=(A-B);
+        int rA=(diff+(avg<<1)+(diff&1))>>1;
+        int rB=rA-diff;
+
+*/
+//         |A B|C D|E F|
+//           p   a   n             p=avg(A,B), a=avg(C,D), n=avg(E,F)
+//
+// Goal: estimate C-D (avoiding ringing artifacts)
+// (ensuring that in smooth areas, a zero residual corresponds to a smooth
+// gradient)
+
+// best estimate for C: (B + 2*a)/3
+// best estimate for D: (n + 3*a)/4
+// best estimate for C-D:  4*B - 3*n - a /12
+
+// avoid ringing by 1) only doing this if B <= a <= n  or  B >= a >= n
+// (otherwise, this is not a smooth area and we cannot really estimate C-D)
+//                  2) making sure that B <= C <= D <= n  or B >= C >= D >= n
+
+inline pixel_type_w SmoothTendency(pixel_type_w B, pixel_type_w a,
+                                   pixel_type_w n) {
+  pixel_type_w diff = 0;
+  if (B >= a && a >= n) {
+    diff = (4 * B - 3 * n - a + 6) / 12;
+    //      2C = a<<1 + diff - diff&1 <= 2B  so diff - diff&1 <= 2B - 2a
+    //      2D = a<<1 - diff - diff&1 >= 2n  so diff + diff&1 <= 2a - 2n
+    if (diff - (diff & 1) > 2 * (B - a)) diff = 2 * (B - a) + 1;
+    if (diff + (diff & 1) > 2 * (a - n)) diff = 2 * (a - n);
+  } else if (B <= a && a <= n) {
+    diff = (4 * B - 3 * n - a - 6) / 12;
+    //      2C = a<<1 + diff + diff&1 >= 2B  so diff + diff&1 >= 2B - 2a
+    //      2D = a<<1 - diff + diff&1 <= 2n  so diff - diff&1 >= 2a - 2n
+    if (diff + (diff & 1) < 2 * (B - a)) diff = 2 * (B - a) - 1;
+    if (diff - (diff & 1) < 2 * (a - n)) diff = 2 * (a - n);
+  }
+  return diff;
+}
+
+void InvHSqueeze(Image &input, int c, int rc, ThreadPool *pool);
+
+void InvVSqueeze(Image &input, int c, int rc, ThreadPool *pool);
+
+void DefaultSqueezeParameters(std::vector<SqueezeParams> *parameters,
+                              const Image &image);
+
+Status CheckMetaSqueezeParams(const SqueezeParams &parameter, int num_channels);
+
+Status MetaSqueeze(Image &image, std::vector<SqueezeParams> *parameters);
+
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/transform.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/transform.cc
new file mode 100644
index 0000000000..e63013a38c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/transform.cc
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/transform.h"
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/palette.h"
+#include "lib/jxl/modular/transform/rct.h"
+#include "lib/jxl/modular/transform/squeeze.h"
+
+namespace jxl {
+
+SqueezeParams::SqueezeParams() { Bundle::Init(this); }
+Transform::Transform(TransformId id) {
+  Bundle::Init(this);
+  this->id = id;
+}
+
+Status Transform::Inverse(Image &input, const weighted::Header &wp_header,
+                          ThreadPool *pool) {
+  switch (id) {
+    case TransformId::kRCT:
+      return InvRCT(input, begin_c, rct_type);
+    case TransformId::kSqueeze:
+      return InvSqueeze(input, squeezes, pool);
+    case TransformId::kPalette:
+      return InvPalette(input, begin_c, nb_colors, nb_deltas, predictor,
+                        wp_header, pool);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(id));
+  }
+}
+
+Status Transform::MetaApply(Image &input) {
+  switch (id) {
+    case TransformId::kRCT:
+      JXL_DEBUG_V(2, "Transform: kRCT, rct_type=%" PRIu32, rct_type);
+      return CheckEqualChannels(input, begin_c, begin_c + 2);
+    case TransformId::kSqueeze:
+      JXL_DEBUG_V(2, "Transform: kSqueeze:");
+#if JXL_DEBUG_V_LEVEL >= 2
+      {
+        auto squeezes_copy = squeezes;
+        if (squeezes_copy.empty()) {
+          DefaultSqueezeParameters(&squeezes_copy, input);
+        }
+        for (const auto &params : squeezes_copy) {
+          JXL_DEBUG_V(
+              2,
+              "  squeeze params: horizontal=%d, in_place=%d, begin_c=%" PRIu32
+              ", num_c=%" PRIu32,
+              params.horizontal, params.in_place, params.begin_c, params.num_c);
+        }
+      }
+#endif
+      return MetaSqueeze(input, &squeezes);
+    case TransformId::kPalette:
+      JXL_DEBUG_V(2,
+                  "Transform: kPalette, begin_c=%" PRIu32 ", num_c=%" PRIu32
+                  ", nb_colors=%" PRIu32 ", nb_deltas=%" PRIu32,
+                  begin_c, num_c, nb_colors, nb_deltas);
+      return MetaPalette(input, begin_c, begin_c + num_c - 1, nb_colors,
+                         nb_deltas, lossy_palette);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(id));
+  }
+}
+
+Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2) {
+  if (c1 > image.channel.size() || c2 >= image.channel.size() || c2 < c1) {
+    return JXL_FAILURE("Invalid channel range");
+  }
+  if (c1 < image.nb_meta_channels && c2 >= image.nb_meta_channels) {
+    return JXL_FAILURE("Invalid: transforming mix of meta and nonmeta");
+  }
+  const auto &ch1 = image.channel[c1];
+  for (size_t c = c1 + 1; c <= c2; c++) {
+    const auto &ch2 = image.channel[c];
+    if (ch1.w != ch2.w || ch1.h != ch2.h || ch1.hshift != ch2.hshift ||
+        ch1.vshift != ch2.vshift) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/transform.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/transform.h
new file mode 100644
index 0000000000..0562d2fe3e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular/transform/transform.h
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
+#define LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+enum class TransformId : uint32_t {
+  // G, R-G, B-G and variants (including YCoCg).
+  kRCT = 0,
+
+  // Color palette. Parameters are: [begin_c] [end_c] [nb_colors]
+  kPalette = 1,
+
+  // Squeezing (Haar-style)
+  kSqueeze = 2,
+
+  // Invalid for now.
+  kInvalid = 3,
+};
+
+struct SqueezeParams : public Fields {
+  const char *Name() const override { return "SqueezeParams"; }
+  bool horizontal;
+  bool in_place;
+  uint32_t begin_c;
+  uint32_t num_c;
+  SqueezeParams();
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &horizontal));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &in_place));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(3), BitsOffset(6, 8),
+                                           BitsOffset(10, 72),
+                                           BitsOffset(13, 1096), 0, &begin_c));
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(3), BitsOffset(4, 4), 2, &num_c));
+    return true;
+  }
+};
+
+class Transform : public Fields {
+ public:
+  TransformId id;
+  // for Palette and RCT.
+  uint32_t begin_c;
+  // for RCT. 42 possible values starting from 0.
+  uint32_t rct_type;
+  // Only for Palette and NearLossless.
+  uint32_t num_c;
+  // Only for Palette.
+  uint32_t nb_colors;
+  uint32_t nb_deltas;
+  // for Squeeze. Default squeeze if empty.
+  std::vector<SqueezeParams> squeezes;
+  // for NearLossless, not serialized.
+  int max_delta_error;
+  // Serialized for Palette.
+  Predictor predictor;
+  // for Palette, not serialized.
+  bool ordered_palette = true;
+  bool lossy_palette = false;
+
+  explicit Transform(TransformId id);
+  // default constructor for bundles.
+  Transform() : Transform(TransformId::kInvalid) {}
+
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val((uint32_t)TransformId::kRCT), Val((uint32_t)TransformId::kPalette),
+        Val((uint32_t)TransformId::kSqueeze),
+        Val((uint32_t)TransformId::kInvalid), (uint32_t)TransformId::kRCT,
+        reinterpret_cast<uint32_t *>(&id)));
+    if (id == TransformId::kInvalid) {
+      return JXL_FAILURE("Invalid transform ID");
+    }
+    if (visitor->Conditional(id == TransformId::kRCT ||
+                             id == TransformId::kPalette)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Bits(3), BitsOffset(6, 8), BitsOffset(10, 72),
+                       BitsOffset(13, 1096), 0, &begin_c));
+    }
+    if (visitor->Conditional(id == TransformId::kRCT)) {
+      // 0-41, default YCoCg.
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(6), Bits(2), BitsOffset(4, 2),
+                                             BitsOffset(6, 10), 6, &rct_type));
+      if (rct_type >= 42) {
+        return JXL_FAILURE("Invalid transform RCT type");
+      }
+    }
+    if (visitor->Conditional(id == TransformId::kPalette)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(1), Val(3), Val(4), BitsOffset(13, 1), 3, &num_c));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+          BitsOffset(8, 0), BitsOffset(10, 256), BitsOffset(12, 1280),
+          BitsOffset(16, 5376), 256, &nb_colors));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(0), BitsOffset(8, 1), BitsOffset(10, 257),
+                       BitsOffset(16, 1281), 0, &nb_deltas));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->Bits(4, (uint32_t)Predictor::Zero,
+                        reinterpret_cast<uint32_t *>(&predictor)));
+      if (predictor >= Predictor::Best) {
+        return JXL_FAILURE("Invalid predictor");
+      }
+    }
+
+    if (visitor->Conditional(id == TransformId::kSqueeze)) {
+      uint32_t num_squeezes = static_cast<uint32_t>(squeezes.size());
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(0), BitsOffset(4, 1), BitsOffset(6, 9),
+                       BitsOffset(8, 41), 0, &num_squeezes));
+      if (visitor->IsReading()) squeezes.resize(num_squeezes);
+      for (size_t i = 0; i < num_squeezes; i++) {
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&squeezes[i]));
+      }
+    }
+    return true;
+  }
+
+  const char *Name() const override { return "Transform"; }
+
+  Status Inverse(Image &input, const weighted::Header &wp_header,
+                 ThreadPool *pool = nullptr);
+  Status MetaApply(Image &input);
+};
+
+Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2);
+
+static inline pixel_type PixelAdd(pixel_type a, pixel_type b) {
+  return static_cast<pixel_type>(static_cast<uint32_t>(a) +
+                                 static_cast<uint32_t>(b));
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular_test.cc
new file mode 100644
index 0000000000..a528998971
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/modular_test.cc
@@ -0,0 +1,171 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <array>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/modular/encoding/enc_encoding.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+namespace {
+using test::Roundtrip;
+
+void TestLosslessGroups(size_t group_size_shift) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.modular_group_size_shift = group_size_shift;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  DecompressParams dparams;
+
+  CodecInOut io_out;
+  size_t compressed_size;
+
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize() / 4, io.ysize() / 4);
+
+  compressed_size = Roundtrip(&io, cparams, dparams, pool, &io_out);
+  EXPECT_LE(compressed_size, 280000);
+  EXPECT_LE(ButteraugliDistance(io, io_out, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            0.0);
+}
+
+TEST(ModularTest, RoundtripLosslessGroups128) { TestLosslessGroups(0); }
+
+TEST(ModularTest, JXL_TSAN_SLOW_TEST(RoundtripLosslessGroups512)) {
+  TestLosslessGroups(2);
+}
+
+TEST(ModularTest, JXL_TSAN_SLOW_TEST(RoundtripLosslessGroups1024)) {
+  TestLosslessGroups(3);
+}
+
+TEST(ModularTest, RoundtripLossy) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.quality_pair = {80.0f, 80.0f};
+  DecompressParams dparams;
+
+  CodecInOut io_out;
+  size_t compressed_size;
+
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  compressed_size = Roundtrip(&io, cparams, dparams, pool, &io_out);
+  EXPECT_LE(compressed_size, 40000);
+  cparams.ba_params.intensity_target = 80.0f;
+  EXPECT_LE(ButteraugliDistance(io, io_out, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            3.0);
+}
+
+TEST(ModularTest, RoundtripLossy16) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("raw.pixls/DJI-FC6310-16bit_709_v4_krita.png");
+  CompressParams cparams;
+  cparams.modular_mode = true;
+  cparams.quality_pair = {80.0f, 80.0f};
+  DecompressParams dparams;
+
+  CodecInOut io_out;
+  size_t compressed_size;
+
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  JXL_CHECK(io.TransformTo(ColorEncoding::SRGB(), pool));
+  io.metadata.m.color_encoding = ColorEncoding::SRGB();
+
+  compressed_size = Roundtrip(&io, cparams, dparams, pool, &io_out);
+  EXPECT_LE(compressed_size, 400);
+  cparams.ba_params.intensity_target = 80.0f;
+  EXPECT_LE(ButteraugliDistance(io, io_out, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.5);
+}
+
+TEST(ModularTest, RoundtripExtraProperties) {
+  constexpr size_t kSize = 250;
+  Image image(kSize, kSize, /*bitdepth=*/8, 3);
+  ModularOptions options;
+  options.max_properties = 4;
+  options.predictor = Predictor::Zero;
+  std::mt19937 rng(0);
+  std::uniform_int_distribution<> dist(0, 8);
+  for (size_t y = 0; y < kSize; y++) {
+    for (size_t x = 0; x < kSize; x++) {
+      image.channel[0].plane.Row(y)[x] = image.channel[2].plane.Row(y)[x] =
+          dist(rng);
+    }
+  }
+  ZeroFillImage(&image.channel[1].plane);
+  BitWriter writer;
+  ASSERT_TRUE(ModularGenericCompress(image, options, &writer));
+  writer.ZeroPadToByte();
+  Image decoded(kSize, kSize, /*bitdepth=*/8, image.channel.size());
+  for (size_t i = 0; i < image.channel.size(); i++) {
+    const Channel& ch = image.channel[i];
+    decoded.channel[i] = Channel(ch.w, ch.h, ch.hshift, ch.vshift);
+  }
+  Status status = true;
+  {
+    BitReader reader(writer.GetSpan());
+    BitReaderScopedCloser closer(&reader, &status);
+    ASSERT_TRUE(ModularGenericDecompress(&reader, decoded, /*header=*/nullptr,
+                                         /*group_id=*/0, &options));
+  }
+  ASSERT_TRUE(status);
+  ASSERT_EQ(image.channel.size(), decoded.channel.size());
+  for (size_t c = 0; c < image.channel.size(); c++) {
+    for (size_t y = 0; y < image.channel[c].plane.ysize(); y++) {
+      for (size_t x = 0; x < image.channel[c].plane.xsize(); x++) {
+        EXPECT_EQ(image.channel[c].plane.Row(y)[x],
+                  decoded.channel[c].plane.Row(y)[x])
+            << "c = " << c << ", x = " << x << ",  y = " << y;
+      }
+    }
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/noise.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/noise.h
new file mode 100644
index 0000000000..329b325f1c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/noise.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_NOISE_H_
+#define LIB_JXL_NOISE_H_
+
+// Noise parameters shared by encoder/decoder.
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+const float kNoisePrecision = 1 << 10;
+
+struct NoiseParams {
+  // LUT index is an intensity of pixel / mean intensity of patch
+  static constexpr size_t kNumNoisePoints = 8;
+  float lut[kNumNoisePoints];
+
+  void Clear() {
+    for (float& i : lut) i = 0;
+  }
+  bool HasAny() const {
+    for (float i : lut) {
+      if (std::abs(i) > 1e-3f) return true;
+    }
+    return false;
+  }
+};
+
+static inline std::pair<int, float> IndexAndFrac(float x) {
+  constexpr size_t kScaleNumerator = NoiseParams::kNumNoisePoints - 2;
+  // TODO: instead of 1, this should be a proper Y range.
+  constexpr float kScale = kScaleNumerator / 1;
+  float scaled_x = std::max(0.f, x * kScale);
+  float floor_x;
+  float frac_x = std::modf(scaled_x, &floor_x);
+  if (JXL_UNLIKELY(scaled_x >= kScaleNumerator)) {
+    floor_x = kScaleNumerator - 1;
+    frac_x = 1;
+  }
+  return std::make_pair(static_cast<size_t>(static_cast<int>(floor_x)), frac_x);
+}
+
+struct NoiseLevel {
+  float noise_level;
+  float intensity;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_NOISE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/noise_distributions.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/noise_distributions.h
new file mode 100644
index 0000000000..65a61cc6ef
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/noise_distributions.h
@@ -0,0 +1,138 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_NOISE_DISTRIBUTIONS_H_
+#define LIB_JXL_NOISE_DISTRIBUTIONS_H_
+
+// Noise distributions for testing partial_derivatives and robust_statistics.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <random>  // distributions
+#include <string>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Unmodified input
+struct NoiseNone {
+  std::string Name() const { return "None"; }
+
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    return in;
+  }
+};
+
+// Salt+pepper
+class NoiseImpulse {
+ public:
+  explicit NoiseImpulse(const uint32_t threshold) : threshold_(threshold) {}
+  std::string Name() const { return "Impulse" + ToString(threshold_); }
+
+  // Sets pixels to 0 if rand < threshold or 1 if rand > ~threshold.
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    const uint32_t rand = (*rng)();
+    float out = 0.0f;
+    if (rand > ~threshold_) {
+      out = 1.0f;
+    }
+    if (rand > threshold_) {
+      out = in;
+    }
+    return out;
+  }
+
+ private:
+  const uint32_t threshold_;
+};
+
+class NoiseUniform {
+ public:
+  NoiseUniform(const float min, const float max_exclusive)
+      : dist_(min, max_exclusive) {}
+  std::string Name() const { return "Uniform" + ToString(dist_.b()); }
+
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    return in + dist_(*rng);
+  }
+
+ private:
+  mutable std::uniform_real_distribution<float> dist_;
+};
+
+// Additive, zero-mean Gaussian.
+class NoiseGaussian {
+ public:
+  explicit NoiseGaussian(const float stddev) : dist_(0.0f, stddev) {}
+  std::string Name() const { return "Gaussian" + ToString(dist_.stddev()); }
+
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    return in + dist_(*rng);
+  }
+
+ private:
+  mutable std::normal_distribution<float> dist_;
+};
+
+// Integer noise is scaled by 1E-3.
+class NoisePoisson {
+ public:
+  explicit NoisePoisson(const double mean) : dist_(mean) {}
+  std::string Name() const { return "Poisson" + ToString(dist_.mean()); }
+
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    return in + dist_(*rng) * 1E-3f;
+  }
+
+ private:
+  mutable std::poisson_distribution<int> dist_;
+};
+
+// Returns the result of applying the randomized "noise" function to each pixel.
+template <class NoiseType, class Random>
+ImageF AddNoise(const ImageF& in, const NoiseType& noise, Random* rng) {
+  const size_t xsize = in.xsize();
+  const size_t ysize = in.ysize();
+  ImageF out(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* JXL_RESTRICT in_row = in.ConstRow(y);
+    float* JXL_RESTRICT out_row = out.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      out_row[x] = noise(in_row[x], rng);
+    }
+  }
+  return out;
+}
+
+template <class NoiseType, class Random>
+Image3F AddNoise(const Image3F& in, const NoiseType& noise, Random* rng) {
+  const size_t xsize = in.xsize();
+  const size_t ysize = in.ysize();
+  Image3F out(xsize, ysize);
+  // noise_estimator_test requires this loop order.
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* JXL_RESTRICT in_row = in.ConstPlaneRow(c, y);
+      float* JXL_RESTRICT out_row = out.PlaneRow(c, y);
+
+      for (size_t x = 0; x < xsize; ++x) {
+        out_row[x] = noise(in_row[x], rng);
+      }
+    }
+  }
+  return out;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_NOISE_DISTRIBUTIONS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_image_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_image_test.cc
new file mode 100644
index 0000000000..d79c7cf479
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_image_test.cc
@@ -0,0 +1,127 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/linalg.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+namespace {
+
+class OpsinImageTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(OpsinImageTargetTest);
+
+TEST_P(OpsinImageTargetTest, MaxCubeRootError) { TestCubeRoot(); }
+
+// Convert a single linear sRGB color to xyb, using the exact image conversion
+// procedure that jpeg xl uses.
+void LinearSrgbToOpsin(float rgb_r, float rgb_g, float rgb_b,
+                       float* JXL_RESTRICT xyb_x, float* JXL_RESTRICT xyb_y,
+                       float* JXL_RESTRICT xyb_b) {
+  Image3F linear(1, 1);
+  linear.PlaneRow(0, 0)[0] = rgb_r;
+  linear.PlaneRow(1, 0)[0] = rgb_g;
+  linear.PlaneRow(2, 0)[0] = rgb_b;
+
+  ImageMetadata metadata;
+  metadata.SetFloat32Samples();
+  metadata.color_encoding = ColorEncoding::LinearSRGB();
+  ImageBundle ib(&metadata);
+  ib.SetFromImage(std::move(linear), metadata.color_encoding);
+  Image3F opsin(1, 1);
+  (void)ToXYB(ib, /*pool=*/nullptr, &opsin);
+
+  *xyb_x = opsin.PlaneRow(0, 0)[0];
+  *xyb_y = opsin.PlaneRow(1, 0)[0];
+  *xyb_b = opsin.PlaneRow(2, 0)[0];
+}
+
+// Convert a single XYB color to linear sRGB, using the exact image conversion
+// procedure that jpeg xl uses.
+void OpsinToLinearSrgb(float xyb_x, float xyb_y, float xyb_b,
+                       float* JXL_RESTRICT rgb_r, float* JXL_RESTRICT rgb_g,
+                       float* JXL_RESTRICT rgb_b) {
+  Image3F opsin(1, 1);
+  opsin.PlaneRow(0, 0)[0] = xyb_x;
+  opsin.PlaneRow(1, 0)[0] = xyb_y;
+  opsin.PlaneRow(2, 0)[0] = xyb_b;
+  Image3F linear(1, 1);
+  OpsinParams opsin_params;
+  opsin_params.Init(/*intensity_target=*/255.0f);
+  OpsinToLinear(opsin, Rect(opsin), nullptr, &linear, opsin_params);
+  *rgb_r = linear.PlaneRow(0, 0)[0];
+  *rgb_g = linear.PlaneRow(1, 0)[0];
+  *rgb_b = linear.PlaneRow(2, 0)[0];
+}
+
+void OpsinRoundtripTestRGB(float r, float g, float b) {
+  float xyb_x, xyb_y, xyb_b;
+  LinearSrgbToOpsin(r, g, b, &xyb_x, &xyb_y, &xyb_b);
+  float r2, g2, b2;
+  OpsinToLinearSrgb(xyb_x, xyb_y, xyb_b, &r2, &g2, &b2);
+  EXPECT_NEAR(r, r2, 1e-3);
+  EXPECT_NEAR(g, g2, 1e-3);
+  EXPECT_NEAR(b, b2, 1e-3);
+}
+
+TEST(OpsinImageTest, VerifyOpsinAbsorbanceInverseMatrix) {
+  float matrix[9];  // writable copy
+  for (int i = 0; i < 9; i++) {
+    matrix[i] = GetOpsinAbsorbanceInverseMatrix()[i];
+  }
+  EXPECT_TRUE(Inv3x3Matrix(matrix));
+  for (int i = 0; i < 9; i++) {
+    EXPECT_NEAR(matrix[i], kOpsinAbsorbanceMatrix[i], 1e-6);
+  }
+}
+
+TEST(OpsinImageTest, OpsinRoundtrip) {
+  OpsinRoundtripTestRGB(0, 0, 0);
+  OpsinRoundtripTestRGB(1. / 255, 1. / 255, 1. / 255);
+  OpsinRoundtripTestRGB(128. / 255, 128. / 255, 128. / 255);
+  OpsinRoundtripTestRGB(1, 1, 1);
+
+  OpsinRoundtripTestRGB(0, 0, 1. / 255);
+  OpsinRoundtripTestRGB(0, 0, 128. / 255);
+  OpsinRoundtripTestRGB(0, 0, 1);
+
+  OpsinRoundtripTestRGB(0, 1. / 255, 0);
+  OpsinRoundtripTestRGB(0, 128. / 255, 0);
+  OpsinRoundtripTestRGB(0, 1, 0);
+
+  OpsinRoundtripTestRGB(1. / 255, 0, 0);
+  OpsinRoundtripTestRGB(128. / 255, 0, 0);
+  OpsinRoundtripTestRGB(1, 0, 0);
+}
+
+TEST(OpsinImageTest, VerifyZero) {
+  // Test that black color (zero energy) is 0,0,0 in xyb.
+  float x, y, b;
+  LinearSrgbToOpsin(0, 0, 0, &x, &y, &b);
+  EXPECT_NEAR(0, x, 1e-9);
+  EXPECT_NEAR(0, y, 1e-7);
+  EXPECT_NEAR(0, b, 1e-7);
+}
+
+TEST(OpsinImageTest, VerifyGray) {
+  // Test that grayscale colors have a fixed y/b ratio and x==0.
+  for (size_t i = 1; i < 255; i++) {
+    float x, y, b;
+    LinearSrgbToOpsin(i / 255., i / 255., i / 255., &x, &y, &b);
+    EXPECT_NEAR(0, x, 1e-6);
+    EXPECT_NEAR(kYToBRatio, b / y, 3e-5);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_inverse_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_inverse_test.cc
new file mode 100644
index 0000000000..b7c1964259
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_inverse_test.cc
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_test_utils.h"
+
+namespace jxl {
+namespace {
+
+TEST(OpsinInverseTest, LinearInverseInverts) {
+  Image3F linear(128, 128);
+  RandomFillImage(&linear, 1.0f);
+
+  CodecInOut io;
+  io.metadata.m.SetFloat32Samples();
+  io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+  io.SetFromImage(CopyImage(linear), io.metadata.m.color_encoding);
+  ThreadPool* null_pool = nullptr;
+  Image3F opsin(io.xsize(), io.ysize());
+  (void)ToXYB(io.Main(), null_pool, &opsin);
+
+  OpsinParams opsin_params;
+  opsin_params.Init(/*intensity_target=*/255.0f);
+  OpsinToLinearInplace(&opsin, /*pool=*/nullptr, opsin_params);
+
+  VerifyRelativeError(linear, opsin, 3E-3, 2E-4);
+}
+
+TEST(OpsinInverseTest, YcbCrInverts) {
+  Image3F rgb(128, 128);
+  RandomFillImage(&rgb, 1.0f);
+
+  ThreadPool* null_pool = nullptr;
+  Image3F ycbcr(rgb.xsize(), rgb.ysize());
+  RgbToYcbcr(rgb.Plane(0), rgb.Plane(1), rgb.Plane(2), &ycbcr.Plane(1),
+             &ycbcr.Plane(0), &ycbcr.Plane(2), null_pool);
+
+  Image3F rgb2(rgb.xsize(), rgb.ysize());
+  YcbcrToRgb(ycbcr, &rgb2, Rect(rgb));
+
+  VerifyRelativeError(rgb, rgb2, 4E-5, 4E-7);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc
new file mode 100644
index 0000000000..f80a18af8c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.cc
@@ -0,0 +1,44 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/opsin_params.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/linalg.h"
+
+namespace jxl {
+
+#define INVERSE_OPSIN_FROM_SPEC 1
+
+const float* GetOpsinAbsorbanceInverseMatrix() {
+#if INVERSE_OPSIN_FROM_SPEC
+  return DefaultInverseOpsinAbsorbanceMatrix();
+#else   // INVERSE_OPSIN_FROM_SPEC
+  // Compute the inverse opsin matrix from the forward matrix. Less precise
+  // than taking the values from the specification, but must be used if the
+  // forward transform is changed and the spec will require updating.
+  static const float* const kInverse = [] {
+    static float inverse[9];
+    for (int i = 0; i < 9; i++) {
+      inverse[i] = kOpsinAbsorbanceMatrix[i];
+    }
+    Inv3x3Matrix(inverse);
+    return inverse;
+  }();
+  return kInverse;
+#endif  // INVERSE_OPSIN_FROM_SPEC
+}
+
+void InitSIMDInverseMatrix(const float* JXL_RESTRICT inverse,
+                           float* JXL_RESTRICT simd_inverse,
+                           float intensity_target) {
+  for (size_t i = 0; i < 9; ++i) {
+    simd_inverse[4 * i] = simd_inverse[4 * i + 1] = simd_inverse[4 * i + 2] =
+        simd_inverse[4 * i + 3] = inverse[i] * (255.0f / intensity_target);
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.h
new file mode 100644
index 0000000000..e8e2e4331e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/opsin_params.h
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_OPSIN_PARAMS_H_
+#define LIB_JXL_OPSIN_PARAMS_H_
+
+// Constants that define the XYB color space.
+
+#include <stdlib.h>
+
+#include <cmath>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// Parameters for opsin absorbance.
+static const float kM02 = 0.078f;
+static const float kM00 = 0.30f;
+static const float kM01 = 1.0f - kM02 - kM00;
+
+static const float kM12 = 0.078f;
+static const float kM10 = 0.23f;
+static const float kM11 = 1.0f - kM12 - kM10;
+
+static const float kM20 = 0.24342268924547819f;
+static const float kM21 = 0.20476744424496821f;
+static const float kM22 = 1.0f - kM20 - kM21;
+
+static const float kBScale = 1.0f;
+static const float kYToBRatio = 1.0f;  // works better with 0.50017729543783418
+static const float kBToYRatio = 1.0f / kYToBRatio;
+
+static const float kB0 = 0.0037930732552754493f;
+static const float kB1 = kB0;
+static const float kB2 = kB0;
+
+// Opsin absorbance matrix is now frozen.
+static const float kOpsinAbsorbanceMatrix[9] = {
+    kM00, kM01, kM02, kM10, kM11, kM12, kM20, kM21, kM22,
+};
+
+// Must be the inverse matrix of kOpsinAbsorbanceMatrix and match the spec.
+static inline const float* DefaultInverseOpsinAbsorbanceMatrix() {
+  static float kDefaultInverseOpsinAbsorbanceMatrix[9] = {
+      11.031566901960783f,  -9.866943921568629f, -0.16462299647058826f,
+      -3.254147380392157f,  4.418770392156863f,  -0.16462299647058826f,
+      -3.6588512862745097f, 2.7129230470588235f, 1.9459282392156863f};
+  return kDefaultInverseOpsinAbsorbanceMatrix;
+}
+
+// Returns 3x3 row-major matrix inverse of kOpsinAbsorbanceMatrix.
+// opsin_image_test verifies this is actually the inverse.
+const float* GetOpsinAbsorbanceInverseMatrix();
+
+void InitSIMDInverseMatrix(const float* JXL_RESTRICT inverse,
+                           float* JXL_RESTRICT simd_inverse,
+                           float intensity_target);
+
+static const float kOpsinAbsorbanceBias[3] = {
+    kB0,
+    kB1,
+    kB2,
+};
+
+static const float kNegOpsinAbsorbanceBiasRGB[4] = {
+    -kOpsinAbsorbanceBias[0], -kOpsinAbsorbanceBias[1],
+    -kOpsinAbsorbanceBias[2], 1.0f};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_OPSIN_PARAMS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/optimize.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/optimize.cc
new file mode 100644
index 0000000000..0816596365
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/optimize.cc
@@ -0,0 +1,163 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/optimize.h"
+
+#include <algorithm>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+namespace optimize {
+
+namespace {
+
+// simplex vector must be sorted by first element of its elements
+std::vector<double> Midpoint(const std::vector<std::vector<double>>& simplex) {
+  JXL_CHECK(!simplex.empty());
+  JXL_CHECK(simplex.size() == simplex[0].size());
+  int dim = simplex.size() - 1;
+  std::vector<double> result(dim + 1, 0);
+  for (int i = 0; i < dim; i++) {
+    for (int k = 0; k < dim; k++) {
+      result[i + 1] += simplex[k][i + 1];
+    }
+    result[i + 1] /= dim;
+  }
+  return result;
+}
+
+// first element ignored
+std::vector<double> Subtract(const std::vector<double>& a,
+                             const std::vector<double>& b) {
+  JXL_CHECK(a.size() == b.size());
+  std::vector<double> result(a.size());
+  result[0] = 0;
+  for (size_t i = 1; i < result.size(); i++) {
+    result[i] = a[i] - b[i];
+  }
+  return result;
+}
+
+// first element ignored
+std::vector<double> Add(const std::vector<double>& a,
+                        const std::vector<double>& b) {
+  JXL_CHECK(a.size() == b.size());
+  std::vector<double> result(a.size());
+  result[0] = 0;
+  for (size_t i = 1; i < result.size(); i++) {
+    result[i] = a[i] + b[i];
+  }
+  return result;
+}
+
+// first element ignored
+std::vector<double> Average(const std::vector<double>& a,
+                            const std::vector<double>& b) {
+  JXL_CHECK(a.size() == b.size());
+  std::vector<double> result(a.size());
+  result[0] = 0;
+  for (size_t i = 1; i < result.size(); i++) {
+    result[i] = 0.5 * (a[i] + b[i]);
+  }
+  return result;
+}
+
+// vec: [0] will contain the objective function, [1:] will
+//   contain the vector position for the objective function.
+// fun: the function evaluates the value.
+void Eval(std::vector<double>* vec,
+          const std::function<double(const std::vector<double>&)>& fun) {
+  std::vector<double> args(vec->begin() + 1, vec->end());
+  (*vec)[0] = fun(args);
+}
+
+void Sort(std::vector<std::vector<double>>* simplex) {
+  std::sort(simplex->begin(), simplex->end());
+}
+
+// Main iteration step of Nelder-Mead like optimization.
+void Reflect(std::vector<std::vector<double>>* simplex,
+             const std::function<double(const std::vector<double>&)>& fun) {
+  Sort(simplex);
+  const std::vector<double>& last = simplex->back();
+  std::vector<double> mid = Midpoint(*simplex);
+  std::vector<double> diff = Subtract(mid, last);
+  std::vector<double> mirrored = Add(mid, diff);
+  Eval(&mirrored, fun);
+  if (mirrored[0] > (*simplex)[simplex->size() - 2][0]) {
+    // Still the worst, shrink towards the best.
+    std::vector<double> shrinking = Average(simplex->back(), (*simplex)[0]);
+    Eval(&shrinking, fun);
+    simplex->back() = shrinking;
+  } else if (mirrored[0] < (*simplex)[0][0]) {
+    // new best
+    std::vector<double> even_further = Add(mirrored, diff);
+    Eval(&even_further, fun);
+    if (even_further[0] < mirrored[0]) {
+      mirrored = even_further;
+    }
+    simplex->back() = mirrored;
+  } else {
+    // not a best, not a worst point
+    simplex->back() = mirrored;
+  }
+}
+
+// Initialize the simplex at origin.
+std::vector<std::vector<double>> InitialSimplex(
+    int dim, double amount, const std::vector<double>& init,
+    const std::function<double(const std::vector<double>&)>& fun) {
+  std::vector<double> best(1 + dim, 0);
+  std::copy(init.begin(), init.end(), best.begin() + 1);
+  Eval(&best, fun);
+  std::vector<std::vector<double>> result{best};
+  for (int i = 0; i < dim; i++) {
+    best = result[0];
+    best[i + 1] += amount;
+    Eval(&best, fun);
+    result.push_back(best);
+    Sort(&result);
+  }
+  return result;
+}
+
+// For comparing the same with the python tool
+/*void RunSimplexExternal(
+    int dim, double amount, int max_iterations,
+    const std::function<double((const vector<double>&))>& fun) {
+  vector<double> vars;
+  for (int i = 0; i < dim; i++) {
+    vars.push_back(atof(getenv(StrCat("VAR", i).c_str())));
+  }
+  double result = fun(vars);
+  std::cout << "Result=" << result;
+}*/
+
+}  // namespace
+
+std::vector<double> RunSimplex(
+    int dim, double amount, int max_iterations, const std::vector<double>& init,
+    const std::function<double(const std::vector<double>&)>& fun) {
+  std::vector<std::vector<double>> simplex =
+      InitialSimplex(dim, amount, init, fun);
+  for (int i = 0; i < max_iterations; i++) {
+    Sort(&simplex);
+    Reflect(&simplex, fun);
+  }
+  return simplex[0];
+}
+
+std::vector<double> RunSimplex(
+    int dim, double amount, int max_iterations,
+    const std::function<double(const std::vector<double>&)>& fun) {
+  std::vector<double> init(dim, 0.0);
+  return RunSimplex(dim, amount, max_iterations, init, fun);
+}
+
+}  // namespace optimize
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/optimize.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/optimize.h
new file mode 100644
index 0000000000..0a60198214
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/optimize.h
@@ -0,0 +1,218 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Utility functions for optimizing multi-dimensional nonlinear functions.
+
+#ifndef LIB_JXL_OPTIMIZE_H_
+#define LIB_JXL_OPTIMIZE_H_
+
+#include <stdio.h>
+
+#include <cmath>
+#include <cstdio>
+#include <functional>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace optimize {
+
+// An array type of numeric values that supports math operations with operator-,
+// operator+, etc.
+template <typename T, size_t N>
+class Array {
+ public:
+  Array() = default;
+  explicit Array(T v) {
+    for (size_t i = 0; i < N; i++) v_[i] = v;
+  }
+
+  size_t size() const { return N; }
+
+  T& operator[](size_t index) {
+    JXL_DASSERT(index < N);
+    return v_[index];
+  }
+  T operator[](size_t index) const {
+    JXL_DASSERT(index < N);
+    return v_[index];
+  }
+
+ private:
+  // The values used by this Array.
+  T v_[N];
+};
+
+template <typename T, size_t N>
+Array<T, N> operator+(const Array<T, N>& x, const Array<T, N>& y) {
+  Array<T, N> z;
+  for (size_t i = 0; i < N; ++i) {
+    z[i] = x[i] + y[i];
+  }
+  return z;
+}
+
+template <typename T, size_t N>
+Array<T, N> operator-(const Array<T, N>& x, const Array<T, N>& y) {
+  Array<T, N> z;
+  for (size_t i = 0; i < N; ++i) {
+    z[i] = x[i] - y[i];
+  }
+  return z;
+}
+
+template <typename T, size_t N>
+Array<T, N> operator*(T v, const Array<T, N>& x) {
+  Array<T, N> y;
+  for (size_t i = 0; i < N; ++i) {
+    y[i] = v * x[i];
+  }
+  return y;
+}
+
+template <typename T, size_t N>
+T operator*(const Array<T, N>& x, const Array<T, N>& y) {
+  T r = 0.0;
+  for (size_t i = 0; i < N; ++i) {
+    r += x[i] * y[i];
+  }
+  return r;
+}
+
+// Runs Nelder-Mead like optimization. Runs for max_iterations times,
+// fun gets called with a vector of size dim as argument, and returns the score
+// based on those parameters (lower is better). Returns a vector of dim+1
+// dimensions, where the first value is the optimal value of the function and
+// the rest is the argmin value. Use init to pass an initial guess or where
+// the optimal value is.
+//
+// Usage example:
+//
+// RunSimplex(2, 0.1, 100, [](const vector<float>& v) {
+//   return (v[0] - 5) * (v[0] - 5) + (v[1] - 7) * (v[1] - 7);
+// });
+//
+// Returns (0.0, 5, 7)
+std::vector<double> RunSimplex(
+    int dim, double amount, int max_iterations,
+    const std::function<double(const std::vector<double>&)>& fun);
+std::vector<double> RunSimplex(
+    int dim, double amount, int max_iterations, const std::vector<double>& init,
+    const std::function<double(const std::vector<double>&)>& fun);
+
+// Implementation of the Scaled Conjugate Gradient method described in the
+// following paper:
+//   Moller, M. "A Scaled Conjugate Gradient Algorithm for Fast Supervised
+//   Learning", Neural Networks, Vol. 6. pp. 525-533, 1993
+//   http://sci2s.ugr.es/keel/pdf/algorithm/articulo/moller1990.pdf
+//
+// The Function template parameter is a class that has the following method:
+//
+//   // Returns the value of the function at point w and sets *df to be the
+//   // negative gradient vector of the function at point w.
+//   double Compute(const optimize::Array<T, N>& w,
+//                  optimize::Array<T, N>* df) const;
+//
+// Returns a vector w, such that |df(w)| < grad_norm_threshold.
+template <typename T, size_t N, typename Function>
+Array<T, N> OptimizeWithScaledConjugateGradientMethod(
+    const Function& f, const Array<T, N>& w0, const T grad_norm_threshold,
+    size_t max_iters) {
+  const size_t n = w0.size();
+  const T rsq_threshold = grad_norm_threshold * grad_norm_threshold;
+  const T sigma0 = static_cast<T>(0.0001);
+  const T l_min = static_cast<T>(1.0e-15);
+  const T l_max = static_cast<T>(1.0e15);
+
+  Array<T, N> w = w0;
+  Array<T, N> wp;
+  Array<T, N> r;
+  Array<T, N> rt;
+  Array<T, N> e;
+  Array<T, N> p;
+  T psq;
+  T fp;
+  T D;
+  T d;
+  T m;
+  T a;
+  T b;
+  T s;
+  T t;
+
+  T fw = f.Compute(w, &r);
+  T rsq = r * r;
+  e = r;
+  p = r;
+  T l = static_cast<T>(1.0);
+  bool success = true;
+  size_t n_success = 0;
+  size_t k = 0;
+
+  while (k++ < max_iters) {
+    if (success) {
+      m = -(p * r);
+      if (m >= 0) {
+        p = r;
+        m = -(p * r);
+      }
+      psq = p * p;
+      s = sigma0 / std::sqrt(psq);
+      f.Compute(w + (s * p), &rt);
+      t = (p * (r - rt)) / s;
+    }
+
+    d = t + l * psq;
+    if (d <= 0) {
+      d = l * psq;
+      l = l - t / psq;
+    }
+
+    a = -m / d;
+    wp = w + a * p;
+    fp = f.Compute(wp, &rt);
+
+    D = 2.0 * (fp - fw) / (a * m);
+    if (D >= 0.0) {
+      success = true;
+      n_success++;
+      w = wp;
+    } else {
+      success = false;
+    }
+
+    if (success) {
+      e = r;
+      r = rt;
+      rsq = r * r;
+      fw = fp;
+      if (rsq <= rsq_threshold) {
+        break;
+      }
+    }
+
+    if (D < 0.25) {
+      l = std::min(4.0 * l, l_max);
+    } else if (D > 0.75) {
+      l = std::max(0.25 * l, l_min);
+    }
+
+    if ((n_success % n) == 0) {
+      p = r;
+      l = 1.0;
+    } else if (success) {
+      b = ((e - r) * r) / m;
+      p = b * p + r;
+    }
+  }
+
+  return w;
+}
+
+}  // namespace optimize
+}  // namespace jxl
+
+#endif  // LIB_JXL_OPTIMIZE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/optimize_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/optimize_test.cc
new file mode 100644
index 0000000000..5d5b5a8365
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/optimize_test.cc
@@ -0,0 +1,109 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/optimize.h"
+
+#include <stdio.h>
+
+#include "gtest/gtest.h"
+
+namespace jxl {
+namespace optimize {
+namespace {
+
+// The maximum number of iterations for the test.
+static const size_t kMaxTestIter = 100000;
+
+// F(w) = (w - w_min)^2.
+struct SimpleQuadraticFunction {
+  typedef Array<double, 2> ArrayType;
+  explicit SimpleQuadraticFunction(const ArrayType& w0) : w_min(w0) {}
+
+  double Compute(const ArrayType& w, ArrayType* df) const {
+    ArrayType dw = w - w_min;
+    *df = -2.0 * dw;
+    return dw * dw;
+  }
+
+  ArrayType w_min;
+};
+
+// F(alpha, beta, gamma| x,y) = \sum_i(y_i - (alpha x_i ^ gamma + beta))^2.
+struct PowerFunction {
+  explicit PowerFunction(const std::vector<double>& x0,
+                         const std::vector<double>& y0)
+      : x(x0), y(y0) {}
+
+  typedef Array<double, 3> ArrayType;
+  double Compute(const ArrayType& w, ArrayType* df) const {
+    double loss_function = 0;
+    (*df)[0] = 0;
+    (*df)[1] = 0;
+    (*df)[2] = 0;
+    for (size_t ind = 0; ind < y.size(); ++ind) {
+      if (x[ind] != 0) {
+        double l_f = y[ind] - (w[0] * pow(x[ind], w[1]) + w[2]);
+        (*df)[0] += 2.0 * l_f * pow(x[ind], w[1]);
+        (*df)[1] += 2.0 * l_f * w[0] * pow(x[ind], w[1]) * log(x[ind]);
+        (*df)[2] += 2.0 * l_f * 1;
+        loss_function += l_f * l_f;
+      }
+    }
+    return loss_function;
+  }
+
+  std::vector<double> x;
+  std::vector<double> y;
+};
+
+TEST(OptimizeTest, SimpleQuadraticFunction) {
+  SimpleQuadraticFunction::ArrayType w_min;
+  w_min[0] = 1.0;
+  w_min[1] = 2.0;
+  SimpleQuadraticFunction f(w_min);
+  SimpleQuadraticFunction::ArrayType w(0.);
+  static const double kPrecision = 1e-8;
+  w = optimize::OptimizeWithScaledConjugateGradientMethod(f, w, kPrecision,
+                                                          kMaxTestIter);
+  EXPECT_NEAR(w[0], 1.0, kPrecision);
+  EXPECT_NEAR(w[1], 2.0, kPrecision);
+}
+
+TEST(OptimizeTest, PowerFunction) {
+  std::vector<double> x(10);
+  std::vector<double> y(10);
+  for (int ind = 0; ind < 10; ++ind) {
+    x[ind] = 1. * ind;
+    y[ind] = 2. * pow(x[ind], 3) + 5.;
+  }
+  PowerFunction f(x, y);
+  PowerFunction::ArrayType w(0.);
+
+  static const double kPrecision = 0.01;
+  w = optimize::OptimizeWithScaledConjugateGradientMethod(f, w, kPrecision,
+                                                          kMaxTestIter);
+  EXPECT_NEAR(w[0], 2.0, kPrecision);
+  EXPECT_NEAR(w[1], 3.0, kPrecision);
+  EXPECT_NEAR(w[2], 5.0, kPrecision);
+}
+
+TEST(OptimizeTest, SimplexOptTest) {
+  auto f = [](const std::vector<double>& x) -> double {
+    double t1 = x[0] - 1.0;
+    double t2 = x[1] + 1.5;
+    return 2.0 + t1 * t1 + t2 * t2;
+  };
+  auto opt = RunSimplex(2, 0.01, 100, f);
+  EXPECT_EQ(opt.size(), 3);
+
+  static const double kPrecision = 0.01;
+  EXPECT_NEAR(opt[0], 2.0, kPrecision);
+  EXPECT_NEAR(opt[1], 1.0, kPrecision);
+  EXPECT_NEAR(opt[2], -1.5, kPrecision);
+}
+
+}  // namespace
+}  // namespace optimize
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/padded_bytes_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/padded_bytes_test.cc
new file mode 100644
index 0000000000..1f4786fbcf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/padded_bytes_test.cc
@@ -0,0 +1,126 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/padded_bytes.h"
+
+#include <numeric>  // iota
+#include <vector>
+
+#include "gtest/gtest.h"
+
+namespace jxl {
+namespace {
+
+TEST(PaddedBytesTest, TestNonEmptyFirstByteZero) {
+  PaddedBytes pb(1);
+  EXPECT_EQ(0, pb[0]);
+  // Even after resizing..
+  pb.resize(20);
+  EXPECT_EQ(0, pb[0]);
+  // And reserving.
+  pb.reserve(200);
+  EXPECT_EQ(0, pb[0]);
+}
+
+TEST(PaddedBytesTest, TestEmptyFirstByteZero) {
+  PaddedBytes pb(0);
+  // After resizing - new zero is written despite there being nothing to copy.
+  pb.resize(20);
+  EXPECT_EQ(0, pb[0]);
+}
+
+TEST(PaddedBytesTest, TestFillWithoutReserve) {
+  PaddedBytes pb;
+  for (size_t i = 0; i < 170; ++i) {
+    pb.push_back(i);
+  }
+  EXPECT_EQ(170, pb.size());
+  EXPECT_GE(pb.capacity(), 170);
+}
+
+TEST(PaddedBytesTest, TestFillWithExactReserve) {
+  PaddedBytes pb;
+  pb.reserve(170);
+  for (size_t i = 0; i < 170; ++i) {
+    pb.push_back(i);
+  }
+  EXPECT_EQ(170, pb.size());
+  EXPECT_EQ(pb.capacity(), 170);
+}
+
+TEST(PaddedBytesTest, TestFillWithMoreReserve) {
+  PaddedBytes pb;
+  pb.reserve(171);
+  for (size_t i = 0; i < 170; ++i) {
+    pb.push_back(i);
+  }
+  EXPECT_EQ(170, pb.size());
+  EXPECT_GT(pb.capacity(), 170);
+}
+
+// Can assign() a subset of the valid data.
+TEST(PaddedBytesTest, TestAssignFromWithin) {
+  PaddedBytes pb;
+  pb.reserve(256);
+  for (size_t i = 0; i < 256; ++i) {
+    pb.push_back(i);
+  }
+  pb.assign(pb.data() + 64, pb.data() + 192);
+  EXPECT_EQ(128, pb.size());
+  for (size_t i = 0; i < 128; ++i) {
+    EXPECT_EQ(i + 64, pb[i]);
+  }
+}
+
+// Can assign() a range with both valid and previously-allocated data.
+TEST(PaddedBytesTest, TestAssignReclaim) {
+  PaddedBytes pb;
+  pb.reserve(256);
+  for (size_t i = 0; i < 256; ++i) {
+    pb.push_back(i);
+  }
+
+  const uint8_t* mem = pb.data();
+  pb.resize(200);
+  // Just shrank without reallocating
+  EXPECT_EQ(mem, pb.data());
+  EXPECT_EQ(256, pb.capacity());
+
+  // Reclaim part of initial allocation
+  pb.assign(pb.data() + 100, pb.data() + 240);
+  EXPECT_EQ(140, pb.size());
+
+  for (size_t i = 0; i < 140; ++i) {
+    EXPECT_EQ(i + 100, pb[i]);
+  }
+}
+
+// Can assign() smaller and larger ranges outside the current allocation.
+TEST(PaddedBytesTest, TestAssignOutside) {
+  PaddedBytes pb;
+  pb.resize(400);
+  std::iota(pb.begin(), pb.end(), 1);
+
+  std::vector<uint8_t> small(64);
+  std::iota(small.begin(), small.end(), 500);
+
+  pb.assign(small.data(), small.data() + small.size());
+  EXPECT_EQ(64, pb.size());
+  for (size_t i = 0; i < 64; ++i) {
+    EXPECT_EQ((i + 500) & 0xFF, pb[i]);
+  }
+
+  std::vector<uint8_t> large(1000);
+  std::iota(large.begin(), large.end(), 600);
+
+  pb.assign(large.data(), large.data() + large.size());
+  EXPECT_EQ(1000, pb.size());
+  for (size_t i = 0; i < 1000; ++i) {
+    EXPECT_EQ((i + 600) & 0xFF, pb[i]);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc
new file mode 100644
index 0000000000..a0cc1983fe
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.cc
@@ -0,0 +1,68 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/passes_state.h"
+
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+Status InitializePassesSharedState(const FrameHeader& frame_header,
+                                   PassesSharedState* JXL_RESTRICT shared,
+                                   bool encoder) {
+  JXL_ASSERT(frame_header.nonserialized_metadata != nullptr);
+  shared->frame_header = frame_header;
+  shared->metadata = frame_header.nonserialized_metadata;
+  shared->frame_dim = frame_header.ToFrameDimensions();
+  shared->image_features.patches.SetPassesSharedState(shared);
+
+  const FrameDimensions& frame_dim = shared->frame_dim;
+
+  shared->ac_strategy =
+      AcStrategyImage(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  shared->raw_quant_field =
+      ImageI(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  shared->epf_sharpness =
+      ImageB(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  shared->cmap = ColorCorrelationMap(frame_dim.xsize, frame_dim.ysize);
+
+  // In the decoder, we allocate coeff orders afterwards, when we know how many
+  // we will actually need.
+  shared->coeff_order_size = kCoeffOrderMaxSize;
+  if (encoder &&
+      shared->coeff_orders.size() <
+          frame_header.passes.num_passes * kCoeffOrderMaxSize &&
+      frame_header.encoding == FrameEncoding::kVarDCT) {
+    shared->coeff_orders.resize(frame_header.passes.num_passes *
+                                kCoeffOrderMaxSize);
+  }
+
+  shared->quant_dc = ImageB(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  if (!(frame_header.flags & FrameHeader::kUseDcFrame) || encoder) {
+    shared->dc_storage =
+        Image3F(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+  } else {
+    if (frame_header.dc_level == 4) {
+      return JXL_FAILURE("Invalid DC level for kUseDcFrame: %u",
+                         frame_header.dc_level);
+    }
+    shared->dc = &shared->dc_frames[frame_header.dc_level];
+    if (shared->dc->xsize() == 0) {
+      return JXL_FAILURE(
+          "kUseDcFrame specified for dc_level %u, but no frame was decoded "
+          "with level %u",
+          frame_header.dc_level, frame_header.dc_level + 1);
+    }
+    ZeroFillImage(&shared->quant_dc);
+  }
+
+  shared->dc_storage = Image3F(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.h
new file mode 100644
index 0000000000..069d7acdf0
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/passes_state.h
@@ -0,0 +1,138 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_PASSES_STATE_H_
+#define LIB_JXL_PASSES_STATE_H_
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/noise.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+
+// Structures that hold the (en/de)coder state for a JPEG XL kVarDCT
+// (en/de)coder.
+
+namespace jxl {
+
+struct ImageFeatures {
+  NoiseParams noise_params;
+  PatchDictionary patches;
+  Splines splines;
+};
+
+// State common to both encoder and decoder.
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct PassesSharedState {
+  PassesSharedState() : frame_header(nullptr) {}
+
+  // Headers and metadata.
+  const CodecMetadata* metadata;
+  FrameHeader frame_header;
+
+  FrameDimensions frame_dim;
+
+  // Control fields and parameters.
+  AcStrategyImage ac_strategy;
+
+  // Dequant matrices + quantizer.
+  DequantMatrices matrices;
+  Quantizer quantizer{&matrices};
+  ImageI raw_quant_field;
+
+  // Per-block side information for EPF detail preservation.
+  ImageB epf_sharpness;
+
+  ColorCorrelationMap cmap;
+
+  ImageFeatures image_features;
+
+  // Memory area for storing coefficient orders.
+  // `coeff_order_size` is the size used by *one* set of coefficient orders (at
+  // most kMaxCoeffOrderSize). A set of coefficient orders is present for each
+  // pass.
+  size_t coeff_order_size = 0;
+  std::vector<coeff_order_t> coeff_orders;
+
+  // Decoder-side DC and quantized DC.
+  ImageB quant_dc;
+  Image3F dc_storage;
+  const Image3F* JXL_RESTRICT dc = &dc_storage;
+
+  BlockCtxMap block_ctx_map;
+
+  Image3F dc_frames[4];
+
+  struct {
+    ImageBundle storage;
+    // Can either point to `storage`, if this is a frame that is not stored in
+    // the CodecInOut, or can point to an existing ImageBundle.
+    // TODO(veluca): pointing to ImageBundles in CodecInOut is not possible for
+    // now, as they are stored in a vector and thus may be moved. Fix this.
+    ImageBundle* JXL_RESTRICT frame = &storage;
+    // ImageBundle doesn't yet have a simple way to state it is in XYB.
+    bool ib_is_in_xyb = false;
+  } reference_frames[4] = {};
+
+  // Number of pre-clustered set of histograms (with the same ctx map), per
+  // pass. Encoded as num_histograms_ - 1.
+  size_t num_histograms = 0;
+
+  bool IsGrayscale() const { return metadata->m.color_encoding.IsGray(); }
+
+  Rect GroupRect(size_t group_index) const {
+    const size_t gx = group_index % frame_dim.xsize_groups;
+    const size_t gy = group_index / frame_dim.xsize_groups;
+    const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+                    frame_dim.group_dim, frame_dim.group_dim, frame_dim.xsize,
+                    frame_dim.ysize);
+    return rect;
+  }
+
+  Rect PaddedGroupRect(size_t group_index) const {
+    const size_t gx = group_index % frame_dim.xsize_groups;
+    const size_t gy = group_index / frame_dim.xsize_groups;
+    const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+                    frame_dim.group_dim, frame_dim.group_dim,
+                    frame_dim.xsize_padded, frame_dim.ysize_padded);
+    return rect;
+  }
+
+  Rect BlockGroupRect(size_t group_index) const {
+    const size_t gx = group_index % frame_dim.xsize_groups;
+    const size_t gy = group_index / frame_dim.xsize_groups;
+    const Rect rect(gx * (frame_dim.group_dim >> 3),
+                    gy * (frame_dim.group_dim >> 3), frame_dim.group_dim >> 3,
+                    frame_dim.group_dim >> 3, frame_dim.xsize_blocks,
+                    frame_dim.ysize_blocks);
+    return rect;
+  }
+
+  Rect DCGroupRect(size_t group_index) const {
+    const size_t gx = group_index % frame_dim.xsize_dc_groups;
+    const size_t gy = group_index / frame_dim.xsize_dc_groups;
+    const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+                    frame_dim.group_dim, frame_dim.group_dim,
+                    frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+    return rect;
+  }
+};
+
+// Initialized the state information that is shared between encoder and decoder.
+Status InitializePassesSharedState(const FrameHeader& frame_header,
+                                   PassesSharedState* JXL_RESTRICT shared,
+                                   bool encoder = false);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_PASSES_STATE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/passes_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/passes_test.cc
new file mode 100644
index 0000000000..9ed3a6aba5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/passes_test.cc
@@ -0,0 +1,389 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+
+#include <string>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+namespace {
+using test::Roundtrip;
+
+TEST(PassesTest, RoundtripSmallPasses) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 1.0;
+  cparams.progressive_mode = true;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  Roundtrip(&io, cparams, dparams, pool, &io2);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.5);
+}
+
+TEST(PassesTest, RoundtripUnalignedPasses) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize() / 12, io.ysize() / 7);
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 2.0;
+  cparams.progressive_mode = true;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  Roundtrip(&io, cparams, dparams, pool, &io2);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            3.2);
+}
+
+TEST(PassesTest, RoundtripMultiGroupPasses) {
+  ThreadPoolInternal pool(4);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+  io.ShrinkTo(600, 1024);  // partial X, full Y group
+
+  CompressParams cparams;
+  DecompressParams dparams;
+
+  cparams.butteraugli_distance = 1.0f;
+  cparams.progressive_mode = true;
+  CodecInOut io2;
+  Roundtrip(&io, cparams, dparams, &pool, &io2);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, &pool),
+            1.99f);
+
+  cparams.butteraugli_distance = 2.0f;
+  CodecInOut io3;
+  Roundtrip(&io, cparams, dparams, &pool, &io3);
+  EXPECT_LE(ButteraugliDistance(io, io3, cparams.ba_params,
+                                /*distmap=*/nullptr, &pool),
+            3.0f);
+}
+
+TEST(PassesTest, RoundtripLargeFastPasses) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_mode = true;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  Roundtrip(&io, cparams, dparams, &pool, &io2);
+}
+
+// Checks for differing size/distance in two consecutive runs of distance 2,
+// which involves additional processing including adaptive reconstruction.
+// Failing this may be a sign of race conditions or invalid memory accesses.
+TEST(PassesTest, RoundtripProgressiveConsistent) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_mode = true;
+  cparams.butteraugli_distance = 2.0;
+  DecompressParams dparams;
+
+  // Try each xsize mod kBlockDim to verify right border handling.
+  for (size_t xsize = 48; xsize > 40; --xsize) {
+    io.ShrinkTo(xsize, 15);
+
+    CodecInOut io2;
+    const size_t size2 = Roundtrip(&io, cparams, dparams, &pool, &io2);
+
+    CodecInOut io3;
+    const size_t size3 = Roundtrip(&io, cparams, dparams, &pool, &io3);
+
+    // Exact same compressed size.
+    EXPECT_EQ(size2, size3);
+
+    // Exact same distance.
+    const float dist2 = ButteraugliDistance(io, io2, cparams.ba_params,
+                                            /*distmap=*/nullptr, &pool);
+    const float dist3 = ButteraugliDistance(io, io3, cparams.ba_params,
+                                            /*distmap=*/nullptr, &pool);
+    EXPECT_EQ(dist2, dist3);
+  }
+}
+
+TEST(PassesTest, AllDownsampleFeasible) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_mode = true;
+  cparams.butteraugli_distance = 1.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, &aux, &pool));
+
+  EXPECT_LE(compressed.size(), 240000);
+  float target_butteraugli[9] = {};
+  target_butteraugli[1] = 2.5f;
+  target_butteraugli[2] = 14.5f;
+  target_butteraugli[4] = 20.0f;
+  target_butteraugli[8] = 80.0f;
+
+  // The default progressive encoding scheme should make all these downsampling
+  // factors achievable.
+  // TODO(veluca): re-enable downsampling 16.
+  std::vector<size_t> downsamplings = {1, 2, 4, 8};  //, 16};
+
+  auto check = [&](uint32_t task, uint32_t /* thread */) -> void {
+    const size_t downsampling = downsamplings[task];
+    DecompressParams dparams;
+    dparams.max_downsampling = downsampling;
+    CodecInOut output;
+    ASSERT_TRUE(DecodeFile(dparams, compressed, &output, nullptr));
+    EXPECT_EQ(output.xsize(), io.xsize()) << "downsampling = " << downsampling;
+    EXPECT_EQ(output.ysize(), io.ysize()) << "downsampling = " << downsampling;
+    EXPECT_LE(ButteraugliDistance(io, output, cparams.ba_params,
+                                  /*distmap=*/nullptr, nullptr),
+              target_butteraugli[downsampling])
+        << "downsampling: " << downsampling;
+  };
+  pool.Run(0, downsamplings.size(), ThreadPool::SkipInit(), check);
+}
+
+TEST(PassesTest, AllDownsampleFeasibleQProgressive) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.qprogressive_mode = true;
+  cparams.butteraugli_distance = 1.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, &aux, &pool));
+
+  EXPECT_LE(compressed.size(), 220000);
+
+  float target_butteraugli[9] = {};
+  target_butteraugli[1] = 3.0f;
+  target_butteraugli[2] = 6.0f;
+  target_butteraugli[4] = 10.0f;
+  target_butteraugli[8] = 80.0f;
+
+  // The default progressive encoding scheme should make all these downsampling
+  // factors achievable.
+  std::vector<size_t> downsamplings = {1, 2, 4, 8};
+
+  auto check = [&](uint32_t task, uint32_t /* thread */) -> void {
+    const size_t downsampling = downsamplings[task];
+    DecompressParams dparams;
+    dparams.max_downsampling = downsampling;
+    CodecInOut output;
+    ASSERT_TRUE(DecodeFile(dparams, compressed, &output, nullptr));
+    EXPECT_EQ(output.xsize(), io.xsize()) << "downsampling = " << downsampling;
+    EXPECT_EQ(output.ysize(), io.ysize()) << "downsampling = " << downsampling;
+    EXPECT_LE(ButteraugliDistance(io, output, cparams.ba_params,
+                                  /*distmap=*/nullptr, nullptr),
+              target_butteraugli[downsampling])
+        << "downsampling: " << downsampling;
+  };
+  pool.Run(0, downsamplings.size(), ThreadPool::SkipInit(), check);
+}
+
+TEST(PassesTest, ProgressiveDownsample2DegradesCorrectlyGrayscale) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/cvo9xd_keong_macan_grayscale.png");
+  CodecInOut io_orig;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_orig, &pool));
+  Rect rect(0, 0, io_orig.xsize(), 128);
+  // need 2 DC groups for the DC frame to actually be progressive.
+  Image3F large(4242, rect.ysize());
+  ZeroFillImage(&large);
+  CopyImageTo(rect, *io_orig.Main().color(), rect, &large);
+  CodecInOut io;
+  io.metadata = io_orig.metadata;
+  io.SetFromImage(std::move(large), io_orig.Main().c_current());
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_dc = 1;
+  cparams.responsive = true;
+  cparams.qprogressive_mode = true;
+  cparams.butteraugli_distance = 1.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, &aux, &pool));
+
+  EXPECT_LE(compressed.size(), 10000);
+
+  DecompressParams dparams;
+  dparams.max_downsampling = 1;
+  CodecInOut output;
+  ASSERT_TRUE(DecodeFile(dparams, compressed, &output, nullptr));
+
+  dparams.max_downsampling = 2;
+  CodecInOut output_d2;
+  ASSERT_TRUE(DecodeFile(dparams, compressed, &output_d2, nullptr));
+
+  // 0 if reading all the passes, ~15 if skipping the 8x pass.
+  float butteraugli_distance_down2_full =
+      ButteraugliDistance(output, output_d2, cparams.ba_params,
+                          /*distmap=*/nullptr, nullptr);
+
+  EXPECT_LE(butteraugli_distance_down2_full, 3.2f);
+  EXPECT_GE(butteraugli_distance_down2_full, 1.0f);
+}
+
+TEST(PassesTest, ProgressiveDownsample2DegradesCorrectly) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io_orig;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_orig, &pool));
+  Rect rect(0, 0, io_orig.xsize(), 128);
+  // need 2 DC groups for the DC frame to actually be progressive.
+  Image3F large(4242, rect.ysize());
+  ZeroFillImage(&large);
+  CopyImageTo(rect, *io_orig.Main().color(), rect, &large);
+  CodecInOut io;
+  io.SetFromImage(std::move(large), io_orig.Main().c_current());
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_dc = 1;
+  cparams.responsive = true;
+  cparams.qprogressive_mode = true;
+  cparams.butteraugli_distance = 1.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, &aux, &pool));
+
+  EXPECT_LE(compressed.size(), 220000);
+
+  DecompressParams dparams;
+  dparams.max_downsampling = 1;
+  CodecInOut output;
+  ASSERT_TRUE(DecodeFile(dparams, compressed, &output, nullptr));
+
+  dparams.max_downsampling = 2;
+  CodecInOut output_d2;
+  ASSERT_TRUE(DecodeFile(dparams, compressed, &output_d2, nullptr));
+
+  // 0 if reading all the passes, ~15 if skipping the 8x pass.
+  float butteraugli_distance_down2_full =
+      ButteraugliDistance(output, output_d2, cparams.ba_params,
+                          /*distmap=*/nullptr, nullptr);
+
+  EXPECT_LE(butteraugli_distance_down2_full, 3.0f);
+  EXPECT_GE(butteraugli_distance_down2_full, 1.0f);
+}
+
+TEST(PassesTest, NonProgressiveDCImage) {
+  ThreadPoolInternal pool(8);
+  const PaddedBytes orig =
+      ReadTestData("imagecompression.info/flower_foveon.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  PaddedBytes compressed;
+  AuxOut aux;
+
+  CompressParams cparams;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  cparams.progressive_mode = false;
+  cparams.butteraugli_distance = 2.0;
+  PassesEncoderState enc_state;
+  ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, &aux, &pool));
+
+  // Even in non-progressive mode, it should be possible to return a DC-only
+  // image.
+  DecompressParams dparams;
+  dparams.max_downsampling = 100;
+  CodecInOut output;
+  ASSERT_TRUE(DecodeFile(dparams, compressed, &output, &pool));
+  EXPECT_EQ(output.xsize(), io.xsize());
+  EXPECT_EQ(output.ysize(), io.ysize());
+}
+
+TEST(PassesTest, RoundtripSmallNoGaborishPasses) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+
+  CompressParams cparams;
+  cparams.gaborish = Override::kOff;
+  cparams.butteraugli_distance = 1.0;
+  cparams.progressive_mode = true;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  Roundtrip(&io, cparams, dparams, pool, &io2);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.7);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/patch_dictionary_internal.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/patch_dictionary_internal.h
new file mode 100644
index 0000000000..e4172f6db6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/patch_dictionary_internal.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_
+#define LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_
+
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/passes_state.h"  // for PassesSharedState
+
+namespace jxl {
+
+// Context numbers as specified in Section C.4.5, Listing C.2:
+enum Contexts {
+  kNumRefPatchContext = 0,
+  kReferenceFrameContext = 1,
+  kPatchSizeContext = 2,
+  kPatchReferencePositionContext = 3,
+  kPatchPositionContext = 4,
+  kPatchBlendModeContext = 5,
+  kPatchOffsetContext = 6,
+  kPatchCountContext = 7,
+  kPatchAlphaChannelContext = 8,
+  kPatchClampContext = 9,
+  kNumPatchDictionaryContexts
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/patch_dictionary_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/patch_dictionary_test.cc
new file mode 100644
index 0000000000..3bcc1351e6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/patch_dictionary_test.cc
@@ -0,0 +1,58 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "gtest/gtest.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+namespace {
+
+using ::jxl::test::Roundtrip;
+
+TEST(PatchDictionaryTest, GrayscaleModular) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = ReadTestData("jxl/grayscale_patches.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  CompressParams cparams;
+  cparams.color_transform = jxl::ColorTransform::kNone;
+  cparams.modular_mode = true;
+  cparams.patches = jxl::Override::kOn;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  // Without patches: ~25k
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 8000);
+  VerifyRelativeError(*io.Main().color(), *io2.Main().color(), 1e-7f, 0);
+}
+
+TEST(PatchDictionaryTest, GrayscaleVarDCT) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig = ReadTestData("jxl/grayscale_patches.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+
+  CompressParams cparams;
+  cparams.patches = jxl::Override::kOn;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  // Without patches: ~47k
+  EXPECT_LE(Roundtrip(&io, cparams, dparams, pool, &io2), 14000);
+  // Without patches: ~1.2
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            1.1);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/preview_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/preview_test.cc
new file mode 100644
index 0000000000..0a36be8c7b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/preview_test.cc
@@ -0,0 +1,83 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+namespace {
+using test::Roundtrip;
+
+TEST(PreviewTest, RoundtripGivenPreview) {
+  ThreadPool* pool = nullptr;
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, pool));
+  io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+  // Same as main image
+  io.preview_frame = io.Main().Copy();
+  const size_t preview_xsize = 15;
+  const size_t preview_ysize = 27;
+  io.preview_frame.ShrinkTo(preview_xsize, preview_ysize);
+  io.metadata.m.have_preview = true;
+  ASSERT_TRUE(io.metadata.m.preview_size.Set(io.preview_frame.xsize(),
+                                             io.preview_frame.ysize()));
+
+  CompressParams cparams;
+  cparams.butteraugli_distance = 2.0;
+  cparams.speed_tier = SpeedTier::kSquirrel;
+  DecompressParams dparams;
+
+  dparams.preview = Override::kOff;
+
+  CodecInOut io2;
+  Roundtrip(&io, cparams, dparams, pool, &io2);
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            2.5);
+  EXPECT_EQ(0, io2.preview_frame.xsize());
+
+  dparams.preview = Override::kOn;
+
+  CodecInOut io3;
+  Roundtrip(&io, cparams, dparams, pool, &io3);
+  EXPECT_EQ(preview_xsize, io3.metadata.m.preview_size.xsize());
+  EXPECT_EQ(preview_ysize, io3.metadata.m.preview_size.ysize());
+  EXPECT_EQ(preview_xsize, io3.preview_frame.xsize());
+  EXPECT_EQ(preview_ysize, io3.preview_frame.ysize());
+
+  EXPECT_LE(ButteraugliDistance(io.preview_frame, io3.preview_frame,
+                                cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            2.5);
+  EXPECT_LE(ButteraugliDistance(io.Main(), io3.Main(), cparams.ba_params,
+                                /*distmap=*/nullptr, pool),
+            2.5);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc
new file mode 100644
index 0000000000..d0a16b915a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.cc
@@ -0,0 +1,128 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/progressive_split.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+bool ProgressiveSplitter::SuperblockIsSalient(size_t row_start,
+                                              size_t col_start, size_t num_rows,
+                                              size_t num_cols) const {
+  if (saliency_map_ == nullptr || saliency_map_->xsize() == 0 ||
+      saliency_threshold_ == 0.0) {
+    // If we do not have a saliency-map, or the threshold says to include
+    // every block, we straightaway classify the superblock as 'salient'.
+    return true;
+  }
+  const size_t row_end = std::min(saliency_map_->ysize(), row_start + num_rows);
+  const size_t col_end = std::min(saliency_map_->xsize(), col_start + num_cols);
+  for (size_t num_row = row_start; num_row < row_end; num_row++) {
+    const float* JXL_RESTRICT map_row = saliency_map_->ConstRow(num_row);
+    for (size_t num_col = col_start; num_col < col_end; num_col++) {
+      if (map_row[num_col] >= saliency_threshold_) {
+        // One of the blocks covered by this superblock is above the saliency
+        // threshold.
+        return true;
+      }
+    }
+  }
+  // We did not see any block above the saliency threshold.
+  return false;
+}
+
+template <typename T>
+void ProgressiveSplitter::SplitACCoefficients(
+    const T* JXL_RESTRICT block, size_t size, const AcStrategy& acs, size_t bx,
+    size_t by, size_t offset, T* JXL_RESTRICT output[kMaxNumPasses][3]) {
+  auto shift_right_round0 = [&](T v, int shift) {
+    T one_if_negative = static_cast<uint32_t>(v) >> 31;
+    T add = (one_if_negative << shift) - one_if_negative;
+    return (v + add) >> shift;
+  };
+  // Early quit for the simple case of only one pass.
+  if (mode_.num_passes == 1) {
+    for (size_t c = 0; c < 3; c++) {
+      memcpy(output[0][c] + offset, block + c * size, sizeof(T) * size);
+    }
+    return;
+  }
+  size_t ncoeffs_all_done_from_earlier_passes = 1;
+  size_t previous_pass_salient_only = false;
+
+  int previous_pass_shift = 0;
+  for (size_t num_pass = 0; num_pass < mode_.num_passes; num_pass++) {  // pass
+    // Zero out output block.
+    for (size_t c = 0; c < 3; c++) {
+      memset(output[num_pass][c] + offset, 0, size * sizeof(T));
+    }
+    const bool current_pass_salient_only = mode_.passes[num_pass].salient_only;
+    const int pass_shift = mode_.passes[num_pass].shift;
+    size_t frame_ncoeffs = mode_.passes[num_pass].num_coefficients;
+    for (size_t c = 0; c < 3; c++) {  // color-channel
+      size_t xsize = acs.covered_blocks_x();
+      size_t ysize = acs.covered_blocks_y();
+      CoefficientLayout(&ysize, &xsize);
+      if (current_pass_salient_only || previous_pass_salient_only) {
+        // Current or previous pass is salient-only.
+        const bool superblock_is_salient =
+            SuperblockIsSalient(by, bx, ysize, xsize);
+        if (current_pass_salient_only != superblock_is_salient) {
+          // Current pass is salient-only, but block is not salient,
+          // OR last pass was salient-only, and block is salient
+          // (hence was already included in last pass).
+          continue;
+        }
+      }
+      for (size_t y = 0; y < ysize * frame_ncoeffs; y++) {    // superblk-y
+        for (size_t x = 0; x < xsize * frame_ncoeffs; x++) {  // superblk-x
+          size_t pos = y * xsize * kBlockDim + x;
+          if (x < xsize * ncoeffs_all_done_from_earlier_passes &&
+              y < ysize * ncoeffs_all_done_from_earlier_passes) {
+            // This coefficient was already included in an earlier pass,
+            // which included a genuinely smaller set of coefficients
+            // (= is not about saliency-splitting).
+            continue;
+          }
+          T v = block[c * size + pos];
+          // Previous pass discarded some bits: do not encode them again.
+          if (previous_pass_shift != 0) {
+            T previous_v = shift_right_round0(v, previous_pass_shift) *
+                           (1 << previous_pass_shift);
+            v -= previous_v;
+          }
+          output[num_pass][c][offset + pos] = shift_right_round0(v, pass_shift);
+        }  // superblk-x
+      }    // superblk-y
+    }      // color-channel
+    if (!current_pass_salient_only) {
+      // We just finished a non-salient pass.
+      // Hence, we are now guaranteed to have included all coeffs up to
+      // frame_ncoeffs in every block, unless the current pass is shifted.
+      if (mode_.passes[num_pass].shift == 0) {
+        ncoeffs_all_done_from_earlier_passes = frame_ncoeffs;
+      }
+    }
+    previous_pass_salient_only = current_pass_salient_only;
+    previous_pass_shift = mode_.passes[num_pass].shift;
+  }  // num_pass
+}
+
+template void ProgressiveSplitter::SplitACCoefficients<int32_t>(
+    const int32_t* JXL_RESTRICT, size_t, const AcStrategy&, size_t, size_t,
+    size_t, int32_t* JXL_RESTRICT[kMaxNumPasses][3]);
+
+template void ProgressiveSplitter::SplitACCoefficients<int16_t>(
+    const int16_t* JXL_RESTRICT, size_t, const AcStrategy&, size_t, size_t,
+    size_t, int16_t* JXL_RESTRICT[kMaxNumPasses][3]);
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.h
new file mode 100644
index 0000000000..68ab7bc9dc
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/progressive_split.h
@@ -0,0 +1,149 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_PROGRESSIVE_SPLIT_H_
+#define LIB_JXL_PROGRESSIVE_SPLIT_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/splines.h"
+
+// Functions to split DCT coefficients in multiple passes. All the passes of a
+// single frame are added together.
+
+namespace jxl {
+
+constexpr size_t kNoDownsamplingFactor = std::numeric_limits<size_t>::max();
+
+struct PassDefinition {
+  // Side of the square of the coefficients that should be kept in each 8x8
+  // block. Must be greater than 1, and at most 8. Should be in non-decreasing
+  // order.
+  size_t num_coefficients;
+
+  // How much to shift the encoded values by, with rounding.
+  size_t shift;
+
+  // Whether or not we should include only salient blocks.
+  // TODO(veluca): ignored for now.
+  bool salient_only;
+
+  // If specified, this indicates that if the requested downsampling factor is
+  // sufficiently high, then it is fine to stop decoding after this pass.
+  // By default, passes are not marked as being suitable for any downsampling.
+  size_t suitable_for_downsampling_of_at_least;
+};
+
+struct ProgressiveMode {
+  size_t num_passes = 1;
+  PassDefinition passes[kMaxNumPasses] = {PassDefinition{
+      /*num_coefficients=*/8, /*shift=*/0, /*salient_only=*/false,
+      /*suitable_for_downsampling_of_at_least=*/1}};
+
+  ProgressiveMode() = default;
+
+  template <size_t nump>
+  explicit ProgressiveMode(const PassDefinition (&p)[nump]) {
+    JXL_ASSERT(nump <= kMaxNumPasses);
+    num_passes = nump;
+    PassDefinition previous_pass{
+        /*num_coefficients=*/1, /*shift=*/0,
+        /*salient_only=*/false,
+        /*suitable_for_downsampling_of_at_least=*/kNoDownsamplingFactor};
+    size_t last_downsampling_factor = kNoDownsamplingFactor;
+    for (size_t i = 0; i < nump; i++) {
+      JXL_ASSERT(p[i].num_coefficients > previous_pass.num_coefficients ||
+                 (p[i].num_coefficients == previous_pass.num_coefficients &&
+                  !p[i].salient_only && previous_pass.salient_only) ||
+                 (p[i].num_coefficients == previous_pass.num_coefficients &&
+                  p[i].shift < previous_pass.shift));
+      JXL_ASSERT(p[i].suitable_for_downsampling_of_at_least ==
+                     kNoDownsamplingFactor ||
+                 p[i].suitable_for_downsampling_of_at_least <=
+                     last_downsampling_factor);
+      if (p[i].suitable_for_downsampling_of_at_least != kNoDownsamplingFactor) {
+        last_downsampling_factor = p[i].suitable_for_downsampling_of_at_least;
+      }
+      previous_pass = passes[i] = p[i];
+    }
+  }
+};
+
+class ProgressiveSplitter {
+ public:
+  void SetProgressiveMode(ProgressiveMode mode) { mode_ = mode; }
+
+  void SetSaliencyMap(const ImageF* saliency_map) {
+    saliency_map_ = saliency_map;
+  }
+
+  void SetSaliencyThreshold(float threshold) {
+    saliency_threshold_ = threshold;
+  }
+
+  size_t GetNumPasses() const { return mode_.num_passes; }
+
+  void InitPasses(Passes* JXL_RESTRICT passes) const {
+    passes->num_passes = static_cast<uint32_t>(GetNumPasses());
+    passes->num_downsample = 0;
+    JXL_ASSERT(passes->num_passes != 0);
+    passes->shift[passes->num_passes - 1] = 0;
+    if (passes->num_passes == 1) return;  // Done, arrays are empty
+
+    for (uint32_t i = 0; i < mode_.num_passes - 1; ++i) {
+      const size_t min_downsampling_factor =
+          mode_.passes[i].suitable_for_downsampling_of_at_least;
+      passes->shift[i] = mode_.passes[i].shift;
+      if (1 < min_downsampling_factor &&
+          min_downsampling_factor != kNoDownsamplingFactor) {
+        passes->downsample[passes->num_downsample] = min_downsampling_factor;
+        passes->last_pass[passes->num_downsample] = i;
+        passes->num_downsample += 1;
+      }
+    }
+  }
+
+  template <typename T>
+  void SplitACCoefficients(const T* JXL_RESTRICT block, size_t size,
+                           const AcStrategy& acs, size_t bx, size_t by,
+                           size_t offset,
+                           T* JXL_RESTRICT output[kMaxNumPasses][3]);
+
+ private:
+  bool SuperblockIsSalient(size_t row_start, size_t col_start, size_t num_rows,
+                           size_t num_cols) const;
+  ProgressiveMode mode_;
+
+  // Not owned, must remain valid.
+  const ImageF* saliency_map_ = nullptr;
+  float saliency_threshold_ = 0.0;
+};
+
+extern template void ProgressiveSplitter::SplitACCoefficients<int32_t>(
+    const int32_t* JXL_RESTRICT, size_t, const AcStrategy&, size_t, size_t,
+    size_t, int32_t* JXL_RESTRICT[kMaxNumPasses][3]);
+
+extern template void ProgressiveSplitter::SplitACCoefficients<int16_t>(
+    const int16_t* JXL_RESTRICT, size_t, const AcStrategy&, size_t, size_t,
+    size_t, int16_t* JXL_RESTRICT[kMaxNumPasses][3]);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_PROGRESSIVE_SPLIT_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc
new file mode 100644
index 0000000000..f7adc0a838
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.cc
@@ -0,0 +1,1184 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "lib/jxl/quant_weights.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// kQuantWeights[N * N * c + N * y + x] is the relative weight of the (x, y)
+// coefficient in component c. Higher weights correspond to finer quantization
+// intervals and more bits spent in encoding.
+
+namespace {
+
+static constexpr const float kAlmostZero = 1e-8f;
+
+void GetQuantWeightsDCT2(const QuantEncoding::DCT2Weights& dct2weights,
+                         float* weights) {
+  for (size_t c = 0; c < 3; c++) {
+    size_t start = c * 64;
+    weights[start] = 0xBAD;
+    weights[start + 1] = weights[start + 8] = dct2weights[c][0];
+    weights[start + 9] = dct2weights[c][1];
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        weights[start + y * 8 + x + 2] = dct2weights[c][2];
+        weights[start + (y + 2) * 8 + x] = dct2weights[c][2];
+      }
+    }
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        weights[start + (y + 2) * 8 + x + 2] = dct2weights[c][3];
+      }
+    }
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        weights[start + y * 8 + x + 4] = dct2weights[c][4];
+        weights[start + (y + 4) * 8 + x] = dct2weights[c][4];
+      }
+    }
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        weights[start + (y + 4) * 8 + x + 4] = dct2weights[c][5];
+      }
+    }
+  }
+}
+
+void GetQuantWeightsIdentity(const QuantEncoding::IdWeights& idweights,
+                             float* weights) {
+  for (size_t c = 0; c < 3; c++) {
+    for (int i = 0; i < 64; i++) {
+      weights[64 * c + i] = idweights[c][0];
+    }
+    weights[64 * c + 1] = idweights[c][1];
+    weights[64 * c + 8] = idweights[c][1];
+    weights[64 * c + 9] = idweights[c][2];
+  }
+}
+
+float Mult(float v) {
+  if (v > 0) return 1 + v;
+  return 1 / (1 - v);
+}
+
+float Interpolate(float pos, float max, const float* array, size_t len) {
+  float scaled_pos = pos * (len - 1) / max;
+  size_t idx = scaled_pos;
+  JXL_ASSERT(idx + 1 < len);
+  float a = array[idx];
+  float b = array[idx + 1];
+  return a * pow(b / a, scaled_pos - idx);
+}
+
+// Computes quant weights for a COLS*ROWS-sized transform, using num_bands
+// eccentricity bands and num_ebands eccentricity bands. If print_mode is 1,
+// prints the resulting matrix; if print_mode is 2, prints the matrix in a
+// format suitable for a 3d plot with gnuplot.
+template <size_t print_mode = 0>
+Status GetQuantWeights(
+    size_t ROWS, size_t COLS,
+    const DctQuantWeightParams::DistanceBandsArray& distance_bands,
+    size_t num_bands, float* out) {
+  for (size_t c = 0; c < 3; c++) {
+    if (print_mode) {
+      fprintf(stderr, "Channel %zu\n", c);
+    }
+    float bands[DctQuantWeightParams::kMaxDistanceBands] = {
+        distance_bands[c][0]};
+    if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid distance bands");
+    for (size_t i = 1; i < num_bands; i++) {
+      bands[i] = bands[i - 1] * Mult(distance_bands[c][i]);
+      if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid distance bands");
+    }
+    for (size_t y = 0; y < ROWS; y++) {
+      for (size_t x = 0; x < COLS; x++) {
+        float dx = 1.0f * x / (COLS - 1);
+        float dy = 1.0f * y / (ROWS - 1);
+        float distance = std::sqrt(dx * dx + dy * dy);
+        float weight =
+            num_bands == 1
+                ? bands[0]
+                : Interpolate(distance, std::sqrt(2) + 1e-6f, bands, num_bands);
+
+        if (print_mode == 1) {
+          fprintf(stderr, "%15.12f, ", weight);
+        }
+        if (print_mode == 2) {
+          fprintf(stderr, "%zu %zu %15.12f\n", x, y, weight);
+        }
+        out[c * COLS * ROWS + y * COLS + x] = weight;
+      }
+      if (print_mode) fprintf(stderr, "\n");
+      if (print_mode == 1) fprintf(stderr, "\n");
+    }
+    if (print_mode) fprintf(stderr, "\n");
+  }
+  return true;
+}
+
+Status DecodeDctParams(BitReader* br, DctQuantWeightParams* params) {
+  params->num_distance_bands =
+      br->ReadFixedBits<DctQuantWeightParams::kLog2MaxDistanceBands>() + 1;
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t i = 0; i < params->num_distance_bands; i++) {
+      JXL_RETURN_IF_ERROR(F16Coder::Read(br, &params->distance_bands[c][i]));
+    }
+    if (params->distance_bands[c][0] < kAlmostZero) {
+      return JXL_FAILURE("Distance band seed is too small");
+    }
+    params->distance_bands[c][0] *= 64.0f;
+  }
+  return true;
+}
+
+Status Decode(BitReader* br, QuantEncoding* encoding, size_t required_size_x,
+              size_t required_size_y, size_t idx,
+              ModularFrameDecoder* modular_frame_decoder) {
+  size_t required_size = required_size_x * required_size_y;
+  required_size_x *= kBlockDim;
+  required_size_y *= kBlockDim;
+  int mode = br->ReadFixedBits<kLog2NumQuantModes>();
+  switch (mode) {
+    case QuantEncoding::kQuantModeLibrary: {
+      encoding->predefined = br->ReadFixedBits<kCeilLog2NumPredefinedTables>();
+      if (encoding->predefined >= kNumPredefinedTables) {
+        return JXL_FAILURE("Invalid predefined table");
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeID: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 3; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->idweights[c][i]));
+          if (std::abs(encoding->idweights[c][i]) < kAlmostZero) {
+            return JXL_FAILURE("ID Quantizer is too small");
+          }
+          encoding->idweights[c][i] *= 64;
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT2: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 6; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->dct2weights[c][i]));
+          if (std::abs(encoding->dct2weights[c][i]) < kAlmostZero) {
+            return JXL_FAILURE("Quantizer is too small");
+          }
+          encoding->dct2weights[c][i] *= 64;
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4X8: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        JXL_RETURN_IF_ERROR(
+            F16Coder::Read(br, &encoding->dct4x8multipliers[c]));
+        if (std::abs(encoding->dct4x8multipliers[c]) < kAlmostZero) {
+          return JXL_FAILURE("DCT4X8 multiplier is too small");
+        }
+      }
+      JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 2; i++) {
+          JXL_RETURN_IF_ERROR(
+              F16Coder::Read(br, &encoding->dct4multipliers[c][i]));
+          if (std::abs(encoding->dct4multipliers[c][i]) < kAlmostZero) {
+            return JXL_FAILURE("DCT4 multiplier is too small");
+          }
+        }
+      }
+      JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+      break;
+    }
+    case QuantEncoding::kQuantModeAFV: {
+      if (required_size != 1) return JXL_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 9; i++) {
+          JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->afv_weights[c][i]));
+        }
+        for (size_t i = 0; i < 6; i++) {
+          encoding->afv_weights[c][i] *= 64;
+        }
+        JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+        JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params_afv_4x4));
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT: {
+      JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+      break;
+    }
+    case QuantEncoding::kQuantModeRAW: {
+      // Set mode early, to avoid mem-leak.
+      encoding->mode = QuantEncoding::kQuantModeRAW;
+      JXL_RETURN_IF_ERROR(ModularFrameDecoder::DecodeQuantTable(
+          required_size_x, required_size_y, br, encoding, idx,
+          modular_frame_decoder));
+      break;
+    }
+    default:
+      return JXL_FAILURE("Invalid quantization table encoding");
+  }
+  encoding->mode = QuantEncoding::Mode(mode);
+  return true;
+}
+
+// TODO(veluca): SIMD-fy. With 256x256, this is actually slow.
+Status ComputeQuantTable(const QuantEncoding& encoding,
+                         float* JXL_RESTRICT table,
+                         float* JXL_RESTRICT inv_table, size_t table_num,
+                         DequantMatrices::QuantTable kind, size_t* pos) {
+  std::vector<float> weights(3 * kMaxQuantTableSize);
+
+  constexpr size_t N = kBlockDim;
+  size_t wrows = 8 * DequantMatrices::required_size_x[kind],
+         wcols = 8 * DequantMatrices::required_size_y[kind];
+  size_t num = wrows * wcols;
+
+  switch (encoding.mode) {
+    case QuantEncoding::kQuantModeLibrary: {
+      // Library and copy quant encoding should get replaced by the actual
+      // parameters by the caller.
+      JXL_ASSERT(false);
+      break;
+    }
+    case QuantEncoding::kQuantModeID: {
+      JXL_ASSERT(num == kDCTBlockSize);
+      GetQuantWeightsIdentity(encoding.idweights, weights.data());
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT2: {
+      JXL_ASSERT(num == kDCTBlockSize);
+      GetQuantWeightsDCT2(encoding.dct2weights, weights.data());
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4: {
+      JXL_ASSERT(num == kDCTBlockSize);
+      float weights4x4[3 * 4 * 4];
+      // Always use 4x4 GetQuantWeights for DCT4 quantization tables.
+      JXL_RETURN_IF_ERROR(
+          GetQuantWeights(4, 4, encoding.dct_params.distance_bands,
+                          encoding.dct_params.num_distance_bands, weights4x4));
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t y = 0; y < kBlockDim; y++) {
+          for (size_t x = 0; x < kBlockDim; x++) {
+            weights[c * num + y * kBlockDim + x] =
+                weights4x4[c * 16 + (y / 2) * 4 + (x / 2)];
+          }
+        }
+        weights[c * num + 1] /= encoding.dct4multipliers[c][0];
+        weights[c * num + N] /= encoding.dct4multipliers[c][0];
+        weights[c * num + N + 1] /= encoding.dct4multipliers[c][1];
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4X8: {
+      JXL_ASSERT(num == kDCTBlockSize);
+      float weights4x8[3 * 4 * 8];
+      // Always use 4x8 GetQuantWeights for DCT4X8 quantization tables.
+      JXL_RETURN_IF_ERROR(
+          GetQuantWeights(4, 8, encoding.dct_params.distance_bands,
+                          encoding.dct_params.num_distance_bands, weights4x8));
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t y = 0; y < kBlockDim; y++) {
+          for (size_t x = 0; x < kBlockDim; x++) {
+            weights[c * num + y * kBlockDim + x] =
+                weights4x8[c * 32 + (y / 2) * 8 + x];
+          }
+        }
+        weights[c * num + N] /= encoding.dct4x8multipliers[c];
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT: {
+      JXL_RETURN_IF_ERROR(GetQuantWeights(
+          wrows, wcols, encoding.dct_params.distance_bands,
+          encoding.dct_params.num_distance_bands, weights.data()));
+      break;
+    }
+    case QuantEncoding::kQuantModeRAW: {
+      if (!encoding.qraw.qtable || encoding.qraw.qtable->size() != 3 * num) {
+        return JXL_FAILURE("Invalid table encoding");
+      }
+      for (size_t i = 0; i < 3 * num; i++) {
+        weights[i] =
+            1.f / (encoding.qraw.qtable_den * (*encoding.qraw.qtable)[i]);
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeAFV: {
+      constexpr float kFreqs[] = {
+          0xBAD,
+          0xBAD,
+          0.8517778890324296,
+          5.37778436506804,
+          0xBAD,
+          0xBAD,
+          4.734747904497923,
+          5.449245381693219,
+          1.6598270267479331,
+          4,
+          7.275749096817861,
+          10.423227632456525,
+          2.662932286148962,
+          7.630657783650829,
+          8.962388608184032,
+          12.97166202570235,
+      };
+
+      float weights4x8[3 * 4 * 8];
+      JXL_RETURN_IF_ERROR((
+          GetQuantWeights(4, 8, encoding.dct_params.distance_bands,
+                          encoding.dct_params.num_distance_bands, weights4x8)));
+      float weights4x4[3 * 4 * 4];
+      JXL_RETURN_IF_ERROR((GetQuantWeights(
+          4, 4, encoding.dct_params_afv_4x4.distance_bands,
+          encoding.dct_params_afv_4x4.num_distance_bands, weights4x4)));
+
+      constexpr float lo = 0.8517778890324296;
+      constexpr float hi = 12.97166202570235 - lo + 1e-6;
+      for (size_t c = 0; c < 3; c++) {
+        float bands[4];
+        bands[0] = encoding.afv_weights[c][5];
+        if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands");
+        for (size_t i = 1; i < 4; i++) {
+          bands[i] = bands[i - 1] * Mult(encoding.afv_weights[c][i + 5]);
+          if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands");
+        }
+        size_t start = c * 64;
+        auto set_weight = [&start, &weights](size_t x, size_t y, float val) {
+          weights[start + y * 8 + x] = val;
+        };
+        weights[start] = 1;  // Not used, but causes MSAN error otherwise.
+        // Weights for (0, 1) and (1, 0).
+        set_weight(0, 1, encoding.afv_weights[c][0]);
+        set_weight(1, 0, encoding.afv_weights[c][1]);
+        // AFV special weights for 3-pixel corner.
+        set_weight(0, 2, encoding.afv_weights[c][2]);
+        set_weight(2, 0, encoding.afv_weights[c][3]);
+        set_weight(2, 2, encoding.afv_weights[c][4]);
+
+        // All other AFV weights.
+        for (size_t y = 0; y < 4; y++) {
+          for (size_t x = 0; x < 4; x++) {
+            if (x < 2 && y < 2) continue;
+            float val = Interpolate(kFreqs[y * 4 + x] - lo, hi, bands, 4);
+            set_weight(2 * x, 2 * y, val);
+          }
+        }
+
+        // Put 4x8 weights in odd rows, except (1, 0).
+        for (size_t y = 0; y < kBlockDim / 2; y++) {
+          for (size_t x = 0; x < kBlockDim; x++) {
+            if (x == 0 && y == 0) continue;
+            weights[c * num + (2 * y + 1) * kBlockDim + x] =
+                weights4x8[c * 32 + y * 8 + x];
+          }
+        }
+        // Put 4x4 weights in even rows / odd columns, except (0, 1).
+        for (size_t y = 0; y < kBlockDim / 2; y++) {
+          for (size_t x = 0; x < kBlockDim / 2; x++) {
+            if (x == 0 && y == 0) continue;
+            weights[c * num + (2 * y) * kBlockDim + 2 * x + 1] =
+                weights4x4[c * 16 + y * 4 + x];
+          }
+        }
+      }
+      break;
+    }
+  }
+  size_t prev_pos = *pos;
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t i = 0; i < num; i++) {
+      float inv_val = weights[c * num + i];
+      if (inv_val > 1.0f / kAlmostZero || inv_val < kAlmostZero) {
+        return JXL_FAILURE("Invalid quantization table");
+      }
+      float val = 1.0f / inv_val;
+      table[*pos] = val;
+      inv_table[*pos] = inv_val;
+      (*pos)++;
+    }
+  }
+  // Ensure that the lowest frequencies have a 0 inverse table.
+  // This does not affect en/decoding, but allows AC strategy selection to be
+  // slightly simpler.
+  size_t xs = DequantMatrices::required_size_x[kind];
+  size_t ys = DequantMatrices::required_size_y[kind];
+  CoefficientLayout(&ys, &xs);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < ys; y++) {
+      for (size_t x = 0; x < xs; x++) {
+        inv_table[prev_pos + c * ys * xs * kDCTBlockSize + y * kBlockDim * xs +
+                  x] = 0;
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+// These definitions are needed before C++17.
+constexpr size_t DequantMatrices::required_size_[];
+constexpr size_t DequantMatrices::required_size_x[];
+constexpr size_t DequantMatrices::required_size_y[];
+constexpr DequantMatrices::QuantTable DequantMatrices::kQuantTable[];
+
+Status DequantMatrices::Decode(BitReader* br,
+                               ModularFrameDecoder* modular_frame_decoder) {
+  size_t all_default = br->ReadBits(1);
+  size_t num_tables = all_default ? 0 : static_cast<size_t>(kNum);
+  encodings_.clear();
+  encodings_.resize(kNum, QuantEncoding::Library(0));
+  for (size_t i = 0; i < num_tables; i++) {
+    JXL_RETURN_IF_ERROR(
+        jxl::Decode(br, &encodings_[i], required_size_x[i % kNum],
+                    required_size_y[i % kNum], i, modular_frame_decoder));
+  }
+  return DequantMatrices::Compute();
+}
+
+Status DequantMatrices::DecodeDC(BitReader* br) {
+  bool all_default = br->ReadBits(1);
+  if (!all_default) {
+    for (size_t c = 0; c < 3; c++) {
+      JXL_RETURN_IF_ERROR(F16Coder::Read(br, &dc_quant_[c]));
+      dc_quant_[c] *= 1.0f / 128.0f;
+      // Negative values and nearly zero are invalid values.
+      if (dc_quant_[c] < kAlmostZero) {
+        return JXL_FAILURE("Invalid dc_quant: coefficient is too small.");
+      }
+      inv_dc_quant_[c] = 1.0f / dc_quant_[c];
+    }
+  }
+  return true;
+}
+
+constexpr float V(float v) { return static_cast<float>(v); }
+
+namespace {
+struct DequantMatricesLibraryDef {
+  // DCT8
+  static constexpr const QuantEncodingInternal DCT() {
+    return QuantEncodingInternal::DCT(DctQuantWeightParams({{{
+                                                                 V(3150.0),
+                                                                 V(0.0),
+                                                                 V(-0.4),
+                                                                 V(-0.4),
+                                                                 V(-0.4),
+                                                                 V(-2.0),
+                                                             },
+                                                             {
+                                                                 V(560.0),
+                                                                 V(0.0),
+                                                                 V(-0.3),
+                                                                 V(-0.3),
+                                                                 V(-0.3),
+                                                                 V(-0.3),
+                                                             },
+                                                             {
+                                                                 V(512.0),
+                                                                 V(-2.0),
+                                                                 V(-1.0),
+                                                                 V(0.0),
+                                                                 V(-1.0),
+                                                                 V(-2.0),
+                                                             }}},
+                                                           6));
+  }
+
+  // Identity
+  static constexpr const QuantEncodingInternal IDENTITY() {
+    return QuantEncodingInternal::Identity({{{
+                                                 V(280.0),
+                                                 V(3160.0),
+                                                 V(3160.0),
+                                             },
+                                             {
+                                                 V(60.0),
+                                                 V(864.0),
+                                                 V(864.0),
+                                             },
+                                             {
+                                                 V(18.0),
+                                                 V(200.0),
+                                                 V(200.0),
+                                             }}});
+  }
+
+  // DCT2
+  static constexpr const QuantEncodingInternal DCT2X2() {
+    return QuantEncodingInternal::DCT2({{{
+                                             V(3840.0),
+                                             V(2560.0),
+                                             V(1280.0),
+                                             V(640.0),
+                                             V(480.0),
+                                             V(300.0),
+                                         },
+                                         {
+                                             V(960.0),
+                                             V(640.0),
+                                             V(320.0),
+                                             V(180.0),
+                                             V(140.0),
+                                             V(120.0),
+                                         },
+                                         {
+                                             V(640.0),
+                                             V(320.0),
+                                             V(128.0),
+                                             V(64.0),
+                                             V(32.0),
+                                             V(16.0),
+                                         }}});
+  }
+
+  // DCT4 (quant_kind 3)
+  static constexpr const QuantEncodingInternal DCT4X4() {
+    return QuantEncodingInternal::DCT4(DctQuantWeightParams({{{
+                                                                  V(2200.0),
+                                                                  V(0.0),
+                                                                  V(0.0),
+                                                                  V(0.0),
+                                                              },
+                                                              {
+                                                                  V(392.0),
+                                                                  V(0.0),
+                                                                  V(0.0),
+                                                                  V(0.0),
+                                                              },
+                                                              {
+                                                                  V(112.0),
+                                                                  V(-0.25),
+                                                                  V(-0.25),
+                                                                  V(-0.5),
+                                                              }}},
+                                                            4),
+                                       /* kMul */
+                                       {{{
+                                             V(1.0),
+                                             V(1.0),
+                                         },
+                                         {
+                                             V(1.0),
+                                             V(1.0),
+                                         },
+                                         {
+                                             V(1.0),
+                                             V(1.0),
+                                         }}});
+  }
+
+  // DCT16
+  static constexpr const QuantEncodingInternal DCT16X16() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(8996.8725711814115328),
+                                   V(-1.3000777393353804),
+                                   V(-0.49424529824571225),
+                                   V(-0.439093774457103443),
+                                   V(-0.6350101832695744),
+                                   V(-0.90177264050827612),
+                                   V(-1.6162099239887414),
+                               },
+                               {
+                                   V(3191.48366296844234752),
+                                   V(-0.67424582104194355),
+                                   V(-0.80745813428471001),
+                                   V(-0.44925837484843441),
+                                   V(-0.35865440981033403),
+                                   V(-0.31322389111877305),
+                                   V(-0.37615025315725483),
+                               },
+                               {
+                                   V(1157.50408145487200256),
+                                   V(-2.0531423165804414),
+                                   V(-1.4),
+                                   V(-0.50687130033378396),
+                                   V(-0.42708730624733904),
+                                   V(-1.4856834539296244),
+                                   V(-4.9209142884401604),
+                               }}},
+                             7));
+  }
+
+  // DCT32
+  static constexpr const QuantEncodingInternal DCT32X32() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(15718.40830982518931456),
+                                   V(-1.025),
+                                   V(-0.98),
+                                   V(-0.9012),
+                                   V(-0.4),
+                                   V(-0.48819395464),
+                                   V(-0.421064),
+                                   V(-0.27),
+                               },
+                               {
+                                   V(7305.7636810695983104),
+                                   V(-0.8041958212306401),
+                                   V(-0.7633036457487539),
+                                   V(-0.55660379990111464),
+                                   V(-0.49785304658857626),
+                                   V(-0.43699592683512467),
+                                   V(-0.40180866526242109),
+                                   V(-0.27321683125358037),
+                               },
+                               {
+                                   V(3803.53173721215041536),
+                                   V(-3.060733579805728),
+                                   V(-2.0413270132490346),
+                                   V(-2.0235650159727417),
+                                   V(-0.5495389509954993),
+                                   V(-0.4),
+                                   V(-0.4),
+                                   V(-0.3),
+                               }}},
+                             8));
+  }
+
+  // DCT16X8
+  static constexpr const QuantEncodingInternal DCT8X16() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(7240.7734393502),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.2),
+                                   V(-0.2),
+                                   V(-0.2),
+                                   V(-0.5),
+                               },
+                               {
+                                   V(1448.15468787004),
+                                   V(-0.5),
+                                   V(-0.5),
+                                   V(-0.5),
+                                   V(-0.2),
+                                   V(-0.2),
+                                   V(-0.2),
+                               },
+                               {
+                                   V(506.854140754517),
+                                   V(-1.4),
+                                   V(-0.2),
+                                   V(-0.5),
+                                   V(-0.5),
+                                   V(-1.5),
+                                   V(-3.6),
+                               }}},
+                             7));
+  }
+
+  // DCT32X8
+  static constexpr const QuantEncodingInternal DCT8X32() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(16283.2494710648897),
+                                   V(-1.7812845336559429),
+                                   V(-1.6309059012653515),
+                                   V(-1.0382179034313539),
+                                   V(-0.85),
+                                   V(-0.7),
+                                   V(-0.9),
+                                   V(-1.2360638576849587),
+                               },
+                               {
+                                   V(5089.15750884921511936),
+                                   V(-0.320049391452786891),
+                                   V(-0.35362849922161446),
+                                   V(-0.30340000000000003),
+                                   V(-0.61),
+                                   V(-0.5),
+                                   V(-0.5),
+                                   V(-0.6),
+                               },
+                               {
+                                   V(3397.77603275308720128),
+                                   V(-0.321327362693153371),
+                                   V(-0.34507619223117997),
+                                   V(-0.70340000000000003),
+                                   V(-0.9),
+                                   V(-1.0),
+                                   V(-1.0),
+                                   V(-1.1754605576265209),
+                               }}},
+                             8));
+  }
+
+  // DCT32X16
+  static constexpr const QuantEncodingInternal DCT16X32() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(13844.97076442300573),
+                                   V(-0.97113799999999995),
+                                   V(-0.658),
+                                   V(-0.42026),
+                                   V(-0.22712),
+                                   V(-0.2206),
+                                   V(-0.226),
+                                   V(-0.6),
+                               },
+                               {
+                                   V(4798.964084220744293),
+                                   V(-0.61125308982767057),
+                                   V(-0.83770786552491361),
+                                   V(-0.79014862079498627),
+                                   V(-0.2692727459704829),
+                                   V(-0.38272769465388551),
+                                   V(-0.22924222653091453),
+                                   V(-0.20719098826199578),
+                               },
+                               {
+                                   V(1807.236946760964614),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}},
+                             8));
+  }
+
+  // DCT4X8 and 8x4
+  static constexpr const QuantEncodingInternal DCT4X8() {
+    return QuantEncodingInternal::DCT4X8(
+        DctQuantWeightParams({{
+                                 {
+                                     V(2198.050556016380522),
+                                     V(-0.96269623020744692),
+                                     V(-0.76194253026666783),
+                                     V(-0.6551140670773547),
+                                 },
+                                 {
+                                     V(764.3655248643528689),
+                                     V(-0.92630200888366945),
+                                     V(-0.9675229603596517),
+                                     V(-0.27845290869168118),
+                                 },
+                                 {
+                                     V(527.107573587542228),
+                                     V(-1.4594385811273854),
+                                     V(-1.450082094097871593),
+                                     V(-1.5843722511996204),
+                                 },
+                             }},
+                             4),
+        /* kMuls */
+        {{
+            V(1.0),
+            V(1.0),
+            V(1.0),
+        }});
+  }
+  // AFV
+  static const QuantEncodingInternal AFV0() {
+    return QuantEncodingInternal::AFV(DCT4X8().dct_params, DCT4X4().dct_params,
+                                      {{{
+                                            // 4x4/4x8 DC tendency.
+                                            V(3072.0),
+                                            V(3072.0),
+                                            // AFV corner.
+                                            V(256.0),
+                                            V(256.0),
+                                            V(256.0),
+                                            // AFV high freqs.
+                                            V(414.0),
+                                            V(0.0),
+                                            V(0.0),
+                                            V(0.0),
+                                        },
+                                        {
+                                            // 4x4/4x8 DC tendency.
+                                            V(1024.0),
+                                            V(1024.0),
+                                            // AFV corner.
+                                            V(50),
+                                            V(50),
+                                            V(50),
+                                            // AFV high freqs.
+                                            V(58.0),
+                                            V(0.0),
+                                            V(0.0),
+                                            V(0.0),
+                                        },
+                                        {
+                                            // 4x4/4x8 DC tendency.
+                                            V(384.0),
+                                            V(384.0),
+                                            // AFV corner.
+                                            V(12.0),
+                                            V(12.0),
+                                            V(12.0),
+                                            // AFV high freqs.
+                                            V(22.0),
+                                            V(-0.25),
+                                            V(-0.25),
+                                            V(-0.25),
+                                        }}});
+  }
+
+  // DCT64
+  static const QuantEncodingInternal DCT64X64() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(0.9 * 26629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               },
+                               {
+                                   V(0.9 * 9311.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               },
+                               {
+                                   V(0.9 * 4992.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}},
+                             8));
+  }
+
+  // DCT64X32
+  static const QuantEncodingInternal DCT32X64() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(0.65 * 23629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               },
+                               {
+                                   V(0.65 * 8611.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               },
+                               {
+                                   V(0.65 * 4492.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}},
+                             8));
+  }
+  // DCT128X128
+  static const QuantEncodingInternal DCT128X128() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(1.8 * 26629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               },
+                               {
+                                   V(1.8 * 9311.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               },
+                               {
+                                   V(1.8 * 4992.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}},
+                             8));
+  }
+
+  // DCT128X64
+  static const QuantEncodingInternal DCT64X128() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(1.3 * 23629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               },
+                               {
+                                   V(1.3 * 8611.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               },
+                               {
+                                   V(1.3 * 4492.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}},
+                             8));
+  }
+  // DCT256X256
+  static const QuantEncodingInternal DCT256X256() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(3.6 * 26629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               },
+                               {
+                                   V(3.6 * 9311.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               },
+                               {
+                                   V(3.6 * 4992.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}},
+                             8));
+  }
+
+  // DCT256X128
+  static const QuantEncodingInternal DCT128X256() {
+    return QuantEncodingInternal::DCT(
+        DctQuantWeightParams({{{
+                                   V(2.6 * 23629.073922049845),
+                                   V(-1.025),
+                                   V(-0.78),
+                                   V(-0.65012),
+                                   V(-0.19041574084286472),
+                                   V(-0.20819395464),
+                                   V(-0.421064),
+                                   V(-0.32733845535848671),
+                               },
+                               {
+                                   V(2.6 * 8611.3238710010046),
+                                   V(-0.3041958212306401),
+                                   V(-0.3633036457487539),
+                                   V(-0.35660379990111464),
+                                   V(-0.3443074455424403),
+                                   V(-0.33699592683512467),
+                                   V(-0.30180866526242109),
+                                   V(-0.27321683125358037),
+                               },
+                               {
+                                   V(2.6 * 4492.2486445538634),
+                                   V(-1.2),
+                                   V(-1.2),
+                                   V(-0.8),
+                                   V(-0.7),
+                                   V(-0.7),
+                                   V(-0.4),
+                                   V(-0.5),
+                               }}},
+                             8));
+  }
+};
+}  // namespace
+
+const DequantMatrices::DequantLibraryInternal DequantMatrices::LibraryInit() {
+  static_assert(kNum == 17,
+                "Update this function when adding new quantization kinds.");
+  static_assert(kNumPredefinedTables == 1,
+                "Update this function when adding new quantization matrices to "
+                "the library.");
+
+  // The library and the indices need to be kept in sync manually.
+  static_assert(0 == DCT, "Update the DequantLibrary array below.");
+  static_assert(1 == IDENTITY, "Update the DequantLibrary array below.");
+  static_assert(2 == DCT2X2, "Update the DequantLibrary array below.");
+  static_assert(3 == DCT4X4, "Update the DequantLibrary array below.");
+  static_assert(4 == DCT16X16, "Update the DequantLibrary array below.");
+  static_assert(5 == DCT32X32, "Update the DequantLibrary array below.");
+  static_assert(6 == DCT8X16, "Update the DequantLibrary array below.");
+  static_assert(7 == DCT8X32, "Update the DequantLibrary array below.");
+  static_assert(8 == DCT16X32, "Update the DequantLibrary array below.");
+  static_assert(9 == DCT4X8, "Update the DequantLibrary array below.");
+  static_assert(10 == AFV0, "Update the DequantLibrary array below.");
+  static_assert(11 == DCT64X64, "Update the DequantLibrary array below.");
+  static_assert(12 == DCT32X64, "Update the DequantLibrary array below.");
+  static_assert(13 == DCT128X128, "Update the DequantLibrary array below.");
+  static_assert(14 == DCT64X128, "Update the DequantLibrary array below.");
+  static_assert(15 == DCT256X256, "Update the DequantLibrary array below.");
+  static_assert(16 == DCT128X256, "Update the DequantLibrary array below.");
+  return DequantMatrices::DequantLibraryInternal{
+      DequantMatricesLibraryDef::DCT(),
+      DequantMatricesLibraryDef::IDENTITY(),
+      DequantMatricesLibraryDef::DCT2X2(),
+      DequantMatricesLibraryDef::DCT4X4(),
+      DequantMatricesLibraryDef::DCT16X16(),
+      DequantMatricesLibraryDef::DCT32X32(),
+      DequantMatricesLibraryDef::DCT8X16(),
+      DequantMatricesLibraryDef::DCT8X32(),
+      DequantMatricesLibraryDef::DCT16X32(),
+      DequantMatricesLibraryDef::DCT4X8(),
+      DequantMatricesLibraryDef::AFV0(),
+      DequantMatricesLibraryDef::DCT64X64(),
+      DequantMatricesLibraryDef::DCT32X64(),
+      // Same default for large transforms (128+) as for 64x* transforms.
+      DequantMatricesLibraryDef::DCT128X128(),
+      DequantMatricesLibraryDef::DCT64X128(),
+      DequantMatricesLibraryDef::DCT256X256(),
+      DequantMatricesLibraryDef::DCT128X256(),
+  };
+}
+
+const QuantEncoding* DequantMatrices::Library() {
+  static const DequantMatrices::DequantLibraryInternal kDequantLibrary =
+      DequantMatrices::LibraryInit();
+  // Downcast the result to a const QuantEncoding* from QuantEncodingInternal*
+  // since the subclass (QuantEncoding) doesn't add any new members and users
+  // will need to upcast to QuantEncodingInternal to access the members of that
+  // class. This allows to have kDequantLibrary as a constexpr value while still
+  // allowing to create QuantEncoding::RAW() instances that use std::vector in
+  // C++11.
+  return reinterpret_cast<const QuantEncoding*>(kDequantLibrary.data());
+}
+
+Status DequantMatrices::Compute() {
+  size_t pos = 0;
+
+  struct DefaultMatrices {
+    DefaultMatrices() {
+      const QuantEncoding* library = Library();
+      size_t pos = 0;
+      for (size_t i = 0; i < kNum; i++) {
+        JXL_CHECK(ComputeQuantTable(library[i], table, inv_table, i,
+                                    QuantTable(i), &pos));
+      }
+      JXL_CHECK(pos == kTotalTableSize);
+    }
+    HWY_ALIGN_MAX float table[kTotalTableSize];
+    HWY_ALIGN_MAX float inv_table[kTotalTableSize];
+  };
+
+  static const DefaultMatrices& default_matrices =
+      *hwy::MakeUniqueAligned<DefaultMatrices>().release();
+
+  JXL_ASSERT(encodings_.size() == kNum);
+
+  bool has_nondefault_matrix = false;
+  for (const auto& enc : encodings_) {
+    if (enc.mode != QuantEncoding::kQuantModeLibrary) {
+      has_nondefault_matrix = true;
+    }
+  }
+  if (has_nondefault_matrix) {
+    table_storage_ = hwy::AllocateAligned<float>(2 * kTotalTableSize);
+    table_ = table_storage_.get();
+    inv_table_ = table_storage_.get() + kTotalTableSize;
+    for (size_t table = 0; table < kNum; table++) {
+      size_t prev_pos = pos;
+      if (encodings_[table].mode == QuantEncoding::kQuantModeLibrary) {
+        size_t num = required_size_[table] * kDCTBlockSize;
+        memcpy(table_storage_.get() + prev_pos,
+               default_matrices.table + prev_pos, num * sizeof(float) * 3);
+        memcpy(table_storage_.get() + kTotalTableSize + prev_pos,
+               default_matrices.inv_table + prev_pos, num * sizeof(float) * 3);
+        pos += num * 3;
+      } else {
+        JXL_RETURN_IF_ERROR(
+            ComputeQuantTable(encodings_[table], table_storage_.get(),
+                              table_storage_.get() + kTotalTableSize, table,
+                              QuantTable(table), &pos));
+      }
+    }
+    JXL_ASSERT(pos == kTotalTableSize);
+  } else {
+    table_ = default_matrices.table;
+    inv_table_ = default_matrices.inv_table;
+  }
+
+  return true;
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.h
new file mode 100644
index 0000000000..816362f81c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights.h
@@ -0,0 +1,469 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_QUANT_WEIGHTS_H_
+#define LIB_JXL_QUANT_WEIGHTS_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include <array>
+#include <hwy/aligned_allocator.h>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T, size_t N>
+constexpr T ArraySum(T (&a)[N], size_t i = N - 1) {
+  static_assert(N > 0, "Trying to compute the sum of an empty array");
+  return i == 0 ? a[0] : a[i] + ArraySum(a, i - 1);
+}
+
+static constexpr size_t kMaxQuantTableSize = AcStrategy::kMaxCoeffArea;
+static constexpr size_t kNumPredefinedTables = 1;
+static constexpr size_t kCeilLog2NumPredefinedTables = 0;
+static constexpr size_t kLog2NumQuantModes = 3;
+
+struct DctQuantWeightParams {
+  static constexpr size_t kLog2MaxDistanceBands = 4;
+  static constexpr size_t kMaxDistanceBands = 1 + (1 << kLog2MaxDistanceBands);
+  typedef std::array<std::array<float, kMaxDistanceBands>, 3>
+      DistanceBandsArray;
+
+  size_t num_distance_bands = 0;
+  DistanceBandsArray distance_bands = {};
+
+  constexpr DctQuantWeightParams() : num_distance_bands(0) {}
+
+  constexpr DctQuantWeightParams(const DistanceBandsArray& dist_bands,
+                                 size_t num_dist_bands)
+      : num_distance_bands(num_dist_bands), distance_bands(dist_bands) {}
+
+  template <size_t num_dist_bands>
+  explicit DctQuantWeightParams(const float dist_bands[3][num_dist_bands]) {
+    num_distance_bands = num_dist_bands;
+    for (size_t c = 0; c < 3; c++) {
+      memcpy(distance_bands[c].data(), dist_bands[c],
+             sizeof(float) * num_dist_bands);
+    }
+  }
+};
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct QuantEncodingInternal {
+  enum Mode {
+    kQuantModeLibrary,
+    kQuantModeID,
+    kQuantModeDCT2,
+    kQuantModeDCT4,
+    kQuantModeDCT4X8,
+    kQuantModeAFV,
+    kQuantModeDCT,
+    kQuantModeRAW,
+  };
+
+  template <Mode mode>
+  struct Tag {};
+
+  typedef std::array<std::array<float, 3>, 3> IdWeights;
+  typedef std::array<std::array<float, 6>, 3> DCT2Weights;
+  typedef std::array<std::array<float, 2>, 3> DCT4Multipliers;
+  typedef std::array<std::array<float, 9>, 3> AFVWeights;
+  typedef std::array<float, 3> DCT4x8Multipliers;
+
+  static constexpr QuantEncodingInternal Library(uint8_t predefined) {
+    return ((predefined < kNumPredefinedTables) ||
+            JXL_ABORT("Assert predefined < kNumPredefinedTables")),
+           QuantEncodingInternal(Tag<kQuantModeLibrary>(), predefined);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeLibrary> /* tag */,
+                                  uint8_t predefined)
+      : mode(kQuantModeLibrary), predefined(predefined) {}
+
+  // Identity
+  // xybweights is an array of {xweights, yweights, bweights}.
+  static constexpr QuantEncodingInternal Identity(const IdWeights& xybweights) {
+    return QuantEncodingInternal(Tag<kQuantModeID>(), xybweights);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeID> /* tag */,
+                                  const IdWeights& xybweights)
+      : mode(kQuantModeID), idweights(xybweights) {}
+
+  // DCT2
+  static constexpr QuantEncodingInternal DCT2(const DCT2Weights& xybweights) {
+    return QuantEncodingInternal(Tag<kQuantModeDCT2>(), xybweights);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeDCT2> /* tag */,
+                                  const DCT2Weights& xybweights)
+      : mode(kQuantModeDCT2), dct2weights(xybweights) {}
+
+  // DCT4
+  static constexpr QuantEncodingInternal DCT4(
+      const DctQuantWeightParams& params, const DCT4Multipliers& xybmul) {
+    return QuantEncodingInternal(Tag<kQuantModeDCT4>(), params, xybmul);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeDCT4> /* tag */,
+                                  const DctQuantWeightParams& params,
+                                  const DCT4Multipliers& xybmul)
+      : mode(kQuantModeDCT4), dct_params(params), dct4multipliers(xybmul) {}
+
+  // DCT4x8
+  static constexpr QuantEncodingInternal DCT4X8(
+      const DctQuantWeightParams& params, const DCT4x8Multipliers& xybmul) {
+    return QuantEncodingInternal(Tag<kQuantModeDCT4X8>(), params, xybmul);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeDCT4X8> /* tag */,
+                                  const DctQuantWeightParams& params,
+                                  const DCT4x8Multipliers& xybmul)
+      : mode(kQuantModeDCT4X8), dct_params(params), dct4x8multipliers(xybmul) {}
+
+  // DCT
+  static constexpr QuantEncodingInternal DCT(
+      const DctQuantWeightParams& params) {
+    return QuantEncodingInternal(Tag<kQuantModeDCT>(), params);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeDCT> /* tag */,
+                                  const DctQuantWeightParams& params)
+      : mode(kQuantModeDCT), dct_params(params) {}
+
+  // AFV
+  static constexpr QuantEncodingInternal AFV(
+      const DctQuantWeightParams& params4x8,
+      const DctQuantWeightParams& params4x4, const AFVWeights& weights) {
+    return QuantEncodingInternal(Tag<kQuantModeAFV>(), params4x8, params4x4,
+                                 weights);
+  }
+  constexpr QuantEncodingInternal(Tag<kQuantModeAFV> /* tag */,
+                                  const DctQuantWeightParams& params4x8,
+                                  const DctQuantWeightParams& params4x4,
+                                  const AFVWeights& weights)
+      : mode(kQuantModeAFV),
+        dct_params(params4x8),
+        afv_weights(weights),
+        dct_params_afv_4x4(params4x4) {}
+
+  // This constructor is not constexpr so it can't be used in any of the
+  // constexpr cases above.
+  explicit QuantEncodingInternal(Mode mode) : mode(mode) {}
+
+  Mode mode;
+
+  // Weights for DCT4+ tables.
+  DctQuantWeightParams dct_params;
+
+  union {
+    // Weights for identity.
+    IdWeights idweights;
+
+    // Weights for DCT2.
+    DCT2Weights dct2weights;
+
+    // Extra multipliers for coefficients 01/10 and 11 for DCT4 and AFV.
+    DCT4Multipliers dct4multipliers;
+
+    // Weights for AFV. {0, 1} are used directly for coefficients (0, 1) and (1,
+    // 0);  {2, 3, 4} are used directly corner DC, (1,0) - (0,1) and (0, 1) +
+    // (1, 0) - (0, 0) inside the AFV block. Values from 5 to 8 are interpolated
+    // as in GetQuantWeights for DC and are used for other coefficients.
+    AFVWeights afv_weights = {};
+
+    // Extra multipliers for coefficients 01 or 10 for DCT4X8 and DCT8X4.
+    DCT4x8Multipliers dct4x8multipliers;
+
+    // Only used in kQuantModeRAW mode.
+    struct {
+      // explicit quantization table (like in JPEG)
+      std::vector<int>* qtable = nullptr;
+      float qtable_den = 1.f / (8 * 255);
+    } qraw;
+  };
+
+  // Weights for 4x4 sub-block in AFV.
+  DctQuantWeightParams dct_params_afv_4x4;
+
+  union {
+    // Which predefined table to use. Only used if mode is kQuantModeLibrary.
+    uint8_t predefined = 0;
+
+    // Which other quant table to copy; must copy from a table that comes before
+    // the current one. Only used if mode is kQuantModeCopy.
+    uint8_t source;
+  };
+};
+
+class QuantEncoding final : public QuantEncodingInternal {
+ public:
+  QuantEncoding(const QuantEncoding& other)
+      : QuantEncodingInternal(
+            static_cast<const QuantEncodingInternal&>(other)) {
+    if (mode == kQuantModeRAW && qraw.qtable) {
+      // Need to make a copy of the passed *qtable.
+      qraw.qtable = new std::vector<int>(*other.qraw.qtable);
+    }
+  }
+  QuantEncoding(QuantEncoding&& other) noexcept
+      : QuantEncodingInternal(
+            static_cast<const QuantEncodingInternal&>(other)) {
+    // Steal the qtable from the other object if any.
+    if (mode == kQuantModeRAW) {
+      other.qraw.qtable = nullptr;
+    }
+  }
+  QuantEncoding& operator=(const QuantEncoding& other) {
+    if (mode == kQuantModeRAW && qraw.qtable) {
+      delete qraw.qtable;
+    }
+    *static_cast<QuantEncodingInternal*>(this) =
+        QuantEncodingInternal(static_cast<const QuantEncodingInternal&>(other));
+    if (mode == kQuantModeRAW && qraw.qtable) {
+      // Need to make a copy of the passed *qtable.
+      qraw.qtable = new std::vector<int>(*other.qraw.qtable);
+    }
+    return *this;
+  }
+
+  ~QuantEncoding() {
+    if (mode == kQuantModeRAW && qraw.qtable) {
+      delete qraw.qtable;
+    }
+  }
+
+  // Wrappers of the QuantEncodingInternal:: static functions that return a
+  // QuantEncoding instead. This is using the explicit and private cast from
+  // QuantEncodingInternal to QuantEncoding, which would be inlined anyway.
+  // In general, you should use this wrappers. The only reason to directly
+  // create a QuantEncodingInternal instance is if you need a constexpr version
+  // of this class. Note that RAW() is not supported in that case since it uses
+  // a std::vector.
+  static QuantEncoding Library(uint8_t predefined) {
+    return QuantEncoding(QuantEncodingInternal::Library(predefined));
+  }
+  static QuantEncoding Identity(const IdWeights& xybweights) {
+    return QuantEncoding(QuantEncodingInternal::Identity(xybweights));
+  }
+  static QuantEncoding DCT2(const DCT2Weights& xybweights) {
+    return QuantEncoding(QuantEncodingInternal::DCT2(xybweights));
+  }
+  static QuantEncoding DCT4(const DctQuantWeightParams& params,
+                            const DCT4Multipliers& xybmul) {
+    return QuantEncoding(QuantEncodingInternal::DCT4(params, xybmul));
+  }
+  static QuantEncoding DCT4X8(const DctQuantWeightParams& params,
+                              const DCT4x8Multipliers& xybmul) {
+    return QuantEncoding(QuantEncodingInternal::DCT4X8(params, xybmul));
+  }
+  static QuantEncoding DCT(const DctQuantWeightParams& params) {
+    return QuantEncoding(QuantEncodingInternal::DCT(params));
+  }
+  static QuantEncoding AFV(const DctQuantWeightParams& params4x8,
+                           const DctQuantWeightParams& params4x4,
+                           const AFVWeights& weights) {
+    return QuantEncoding(
+        QuantEncodingInternal::AFV(params4x8, params4x4, weights));
+  }
+
+  // RAW, note that this one is not a constexpr one.
+  static QuantEncoding RAW(const std::vector<int>& qtable, int shift = 0) {
+    QuantEncoding encoding(kQuantModeRAW);
+    encoding.qraw.qtable = new std::vector<int>();
+    *encoding.qraw.qtable = qtable;
+    encoding.qraw.qtable_den = (1 << shift) * (1.f / (8 * 255));
+    return encoding;
+  }
+
+ private:
+  explicit QuantEncoding(const QuantEncodingInternal& other)
+      : QuantEncodingInternal(other) {}
+
+  explicit QuantEncoding(QuantEncodingInternal::Mode mode)
+      : QuantEncodingInternal(mode) {}
+};
+
+// A constexpr QuantEncodingInternal instance is often downcasted to the
+// QuantEncoding subclass even if the instance wasn't an instance of the
+// subclass. This is safe because user will upcast to QuantEncodingInternal to
+// access any of its members.
+static_assert(sizeof(QuantEncoding) == sizeof(QuantEncodingInternal),
+              "Don't add any members to QuantEncoding");
+
+// Let's try to keep these 2**N for possible future simplicity.
+const float kInvDCQuant[3] = {
+    4096.0f,
+    512.0f,
+    256.0f,
+};
+
+const float kDCQuant[3] = {
+    1.0f / kInvDCQuant[0],
+    1.0f / kInvDCQuant[1],
+    1.0f / kInvDCQuant[2],
+};
+
+class ModularFrameEncoder;
+class ModularFrameDecoder;
+
+class DequantMatrices {
+ public:
+  enum QuantTable : size_t {
+    DCT = 0,
+    IDENTITY,
+    DCT2X2,
+    DCT4X4,
+    DCT16X16,
+    DCT32X32,
+    // DCT16X8
+    DCT8X16,
+    // DCT32X8
+    DCT8X32,
+    // DCT32X16
+    DCT16X32,
+    DCT4X8,
+    // DCT8X4
+    AFV0,
+    // AFV1
+    // AFV2
+    // AFV3
+    DCT64X64,
+    // DCT64X32,
+    DCT32X64,
+    DCT128X128,
+    // DCT128X64,
+    DCT64X128,
+    DCT256X256,
+    // DCT256X128,
+    DCT128X256,
+    kNum
+  };
+
+  static constexpr QuantTable kQuantTable[] = {
+      QuantTable::DCT,        QuantTable::IDENTITY,   QuantTable::DCT2X2,
+      QuantTable::DCT4X4,     QuantTable::DCT16X16,   QuantTable::DCT32X32,
+      QuantTable::DCT8X16,    QuantTable::DCT8X16,    QuantTable::DCT8X32,
+      QuantTable::DCT8X32,    QuantTable::DCT16X32,   QuantTable::DCT16X32,
+      QuantTable::DCT4X8,     QuantTable::DCT4X8,     QuantTable::AFV0,
+      QuantTable::AFV0,       QuantTable::AFV0,       QuantTable::AFV0,
+      QuantTable::DCT64X64,   QuantTable::DCT32X64,   QuantTable::DCT32X64,
+      QuantTable::DCT128X128, QuantTable::DCT64X128,  QuantTable::DCT64X128,
+      QuantTable::DCT256X256, QuantTable::DCT128X256, QuantTable::DCT128X256,
+  };
+  static_assert(AcStrategy::kNumValidStrategies ==
+                    sizeof(kQuantTable) / sizeof *kQuantTable,
+                "Update this array when adding or removing AC strategies.");
+
+  DequantMatrices() {
+    encodings_.resize(size_t(QuantTable::kNum), QuantEncoding::Library(0));
+    size_t pos = 0;
+    size_t offsets[kNum * 3];
+    for (size_t i = 0; i < size_t(QuantTable::kNum); i++) {
+      encodings_[i] = QuantEncoding::Library(0);
+      size_t num = required_size_[i] * kDCTBlockSize;
+      for (size_t c = 0; c < 3; c++) {
+        offsets[3 * i + c] = pos + c * num;
+      }
+      pos += 3 * num;
+    }
+    for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+      for (size_t c = 0; c < 3; c++) {
+        table_offsets_[i * 3 + c] = offsets[kQuantTable[i] * 3 + c];
+      }
+    }
+    // Default quantization tables need to be valid.
+    JXL_CHECK(Compute());
+  }
+
+  static const QuantEncoding* Library();
+
+  typedef std::array<QuantEncodingInternal, kNumPredefinedTables * kNum>
+      DequantLibraryInternal;
+  // Return the array of library kNumPredefinedTables QuantEncoding entries as
+  // a constexpr array. Use Library() to obtain a pointer to the copy in the
+  // .cc file.
+  static const DequantLibraryInternal LibraryInit();
+
+  JXL_INLINE size_t MatrixOffset(size_t quant_kind, size_t c) const {
+    JXL_DASSERT(quant_kind < AcStrategy::kNumValidStrategies);
+    return table_offsets_[quant_kind * 3 + c];
+  }
+
+  // Returns aligned memory.
+  JXL_INLINE const float* Matrix(size_t quant_kind, size_t c) const {
+    JXL_DASSERT(quant_kind < AcStrategy::kNumValidStrategies);
+    return &table_[MatrixOffset(quant_kind, c)];
+  }
+
+  JXL_INLINE const float* InvMatrix(size_t quant_kind, size_t c) const {
+    JXL_DASSERT(quant_kind < AcStrategy::kNumValidStrategies);
+    return &inv_table_[MatrixOffset(quant_kind, c)];
+  }
+
+  // DC quants are used in modular mode for XYB multipliers.
+  JXL_INLINE float DCQuant(size_t c) const { return dc_quant_[c]; }
+  JXL_INLINE const float* DCQuants() const { return dc_quant_; }
+
+  JXL_INLINE float InvDCQuant(size_t c) const { return inv_dc_quant_[c]; }
+
+  // For encoder.
+  void SetEncodings(const std::vector<QuantEncoding>& encodings) {
+    encodings_ = encodings;
+  }
+
+  // For encoder.
+  void SetDCQuant(const float dc[3]) {
+    for (size_t c = 0; c < 3; c++) {
+      dc_quant_[c] = 1.0f / dc[c];
+      inv_dc_quant_[c] = dc[c];
+    }
+  }
+
+  Status Decode(BitReader* br,
+                ModularFrameDecoder* modular_frame_decoder = nullptr);
+  Status DecodeDC(BitReader* br);
+
+  const std::vector<QuantEncoding>& encodings() const { return encodings_; }
+
+  static constexpr size_t required_size_x[] = {1, 1, 1, 1, 2,  4, 1,  1, 2,
+                                               1, 1, 8, 4, 16, 8, 32, 16};
+  static_assert(kNum == sizeof(required_size_x) / sizeof(*required_size_x),
+                "Update this array when adding or removing quant tables.");
+
+  static constexpr size_t required_size_y[] = {1, 1, 1, 1, 2,  4,  2,  4, 4,
+                                               1, 1, 8, 8, 16, 16, 32, 32};
+  static_assert(kNum == sizeof(required_size_y) / sizeof(*required_size_y),
+                "Update this array when adding or removing quant tables.");
+
+ private:
+  Status Compute();
+
+  static constexpr size_t required_size_[] = {
+      1, 1, 1, 1, 4, 16, 2, 4, 8, 1, 1, 64, 32, 256, 128, 1024, 512};
+  static_assert(kNum == sizeof(required_size_) / sizeof(*required_size_),
+                "Update this array when adding or removing quant tables.");
+  static constexpr size_t kTotalTableSize =
+      ArraySum(required_size_) * kDCTBlockSize * 3;
+
+  // kTotalTableSize entries followed by kTotalTableSize for inv_table
+  hwy::AlignedFreeUniquePtr<float[]> table_storage_;
+  const float* table_;
+  const float* inv_table_;
+  float dc_quant_[3] = {kDCQuant[0], kDCQuant[1], kDCQuant[2]};
+  float inv_dc_quant_[3] = {kInvDCQuant[0], kInvDCQuant[1], kInvDCQuant[2]};
+  size_t table_offsets_[AcStrategy::kNumValidStrategies * 3];
+  std::vector<QuantEncoding> encodings_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_QUANT_WEIGHTS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights_test.cc
new file mode 100644
index 0000000000..2392c74cc6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quant_weights_test.cc
@@ -0,0 +1,240 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "lib/jxl/quant_weights.h"
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <hwy/base.h>  // HWY_ALIGN_MAX
+#include <hwy/tests/test_util-inl.h>
+#include <numeric>
+#include <random>
+
+#include "lib/jxl/dct_for_test.h"
+#include "lib/jxl/dec_transforms_testonly.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_transforms.h"
+
+namespace jxl {
+namespace {
+
+template <typename T>
+void CheckSimilar(T a, T b) {
+  EXPECT_EQ(a, b);
+}
+// minimum exponent = -15.
+template <>
+void CheckSimilar(float a, float b) {
+  float m = std::max(std::abs(a), std::abs(b));
+  // 10 bits of precision are used in the format. Relative error should be
+  // below 2^-10.
+  EXPECT_LE(std::abs(a - b), m / 1024.0f) << "a: " << a << " b: " << b;
+}
+
+TEST(QuantWeightsTest, DC) {
+  DequantMatrices mat;
+  float dc_quant[3] = {1e+5, 1e+3, 1e+1};
+  DequantMatricesSetCustomDC(&mat, dc_quant);
+  for (size_t c = 0; c < 3; c++) {
+    CheckSimilar(mat.InvDCQuant(c), dc_quant[c]);
+  }
+}
+
+void RoundtripMatrices(const std::vector<QuantEncoding>& encodings) {
+  ASSERT_TRUE(encodings.size() == DequantMatrices::kNum);
+  DequantMatrices mat;
+  CodecMetadata metadata;
+  FrameHeader frame_header(&metadata);
+  ModularFrameEncoder encoder(frame_header, CompressParams{});
+  DequantMatricesSetCustom(&mat, encodings, &encoder);
+  const std::vector<QuantEncoding>& encodings_dec = mat.encodings();
+  for (size_t i = 0; i < encodings.size(); i++) {
+    const QuantEncoding& e = encodings[i];
+    const QuantEncoding& d = encodings_dec[i];
+    // Check values roundtripped correctly.
+    EXPECT_EQ(e.mode, d.mode);
+    EXPECT_EQ(e.predefined, d.predefined);
+    EXPECT_EQ(e.source, d.source);
+
+    EXPECT_EQ(static_cast<uint64_t>(e.dct_params.num_distance_bands),
+              static_cast<uint64_t>(d.dct_params.num_distance_bands));
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t j = 0; j < DctQuantWeightParams::kMaxDistanceBands; j++) {
+        CheckSimilar(e.dct_params.distance_bands[c][j],
+                     d.dct_params.distance_bands[c][j]);
+      }
+    }
+
+    if (e.mode == QuantEncoding::kQuantModeRAW) {
+      EXPECT_FALSE(!e.qraw.qtable);
+      EXPECT_FALSE(!d.qraw.qtable);
+      EXPECT_EQ(e.qraw.qtable->size(), d.qraw.qtable->size());
+      for (size_t j = 0; j < e.qraw.qtable->size(); j++) {
+        EXPECT_EQ((*e.qraw.qtable)[j], (*d.qraw.qtable)[j]);
+      }
+      EXPECT_NEAR(e.qraw.qtable_den, d.qraw.qtable_den, 1e-7f);
+    } else {
+      // modes different than kQuantModeRAW use one of the other fields used
+      // here, which all happen to be arrays of floats.
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t j = 0; j < 3; j++) {
+          CheckSimilar(e.idweights[c][j], d.idweights[c][j]);
+        }
+        for (size_t j = 0; j < 6; j++) {
+          CheckSimilar(e.dct2weights[c][j], d.dct2weights[c][j]);
+        }
+        for (size_t j = 0; j < 2; j++) {
+          CheckSimilar(e.dct4multipliers[c][j], d.dct4multipliers[c][j]);
+        }
+        CheckSimilar(e.dct4x8multipliers[c], d.dct4x8multipliers[c]);
+        for (size_t j = 0; j < 9; j++) {
+          CheckSimilar(e.afv_weights[c][j], d.afv_weights[c][j]);
+        }
+        for (size_t j = 0; j < DctQuantWeightParams::kMaxDistanceBands; j++) {
+          CheckSimilar(e.dct_params_afv_4x4.distance_bands[c][j],
+                       d.dct_params_afv_4x4.distance_bands[c][j]);
+        }
+      }
+    }
+  }
+}
+
+TEST(QuantWeightsTest, AllDefault) {
+  std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                       QuantEncoding::Library(0));
+  RoundtripMatrices(encodings);
+}
+
+void TestSingleQuantMatrix(DequantMatrices::QuantTable kind) {
+  std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                       QuantEncoding::Library(0));
+  encodings[kind] = DequantMatrices::Library()[kind];
+  RoundtripMatrices(encodings);
+}
+
+// Ensure we can reasonably represent default quant tables.
+TEST(QuantWeightsTest, DCT) { TestSingleQuantMatrix(DequantMatrices::DCT); }
+TEST(QuantWeightsTest, IDENTITY) {
+  TestSingleQuantMatrix(DequantMatrices::IDENTITY);
+}
+TEST(QuantWeightsTest, DCT2X2) {
+  TestSingleQuantMatrix(DequantMatrices::DCT2X2);
+}
+TEST(QuantWeightsTest, DCT4X4) {
+  TestSingleQuantMatrix(DequantMatrices::DCT4X4);
+}
+TEST(QuantWeightsTest, DCT16X16) {
+  TestSingleQuantMatrix(DequantMatrices::DCT16X16);
+}
+TEST(QuantWeightsTest, DCT32X32) {
+  TestSingleQuantMatrix(DequantMatrices::DCT32X32);
+}
+TEST(QuantWeightsTest, DCT8X16) {
+  TestSingleQuantMatrix(DequantMatrices::DCT8X16);
+}
+TEST(QuantWeightsTest, DCT8X32) {
+  TestSingleQuantMatrix(DequantMatrices::DCT8X32);
+}
+TEST(QuantWeightsTest, DCT16X32) {
+  TestSingleQuantMatrix(DequantMatrices::DCT16X32);
+}
+TEST(QuantWeightsTest, DCT4X8) {
+  TestSingleQuantMatrix(DequantMatrices::DCT4X8);
+}
+TEST(QuantWeightsTest, AFV0) { TestSingleQuantMatrix(DequantMatrices::AFV0); }
+TEST(QuantWeightsTest, RAW) {
+  std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                       QuantEncoding::Library(0));
+  std::vector<int> matrix(3 * 32 * 32);
+  std::mt19937 rng;
+  std::uniform_int_distribution<size_t> dist(1, 255);
+  for (size_t i = 0; i < matrix.size(); i++) matrix[i] = dist(rng);
+  encodings[DequantMatrices::kQuantTable[AcStrategy::DCT32X32]] =
+      QuantEncoding::RAW(matrix, 2);
+  RoundtripMatrices(encodings);
+}
+
+class QuantWeightsTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(QuantWeightsTargetTest);
+
+TEST_P(QuantWeightsTargetTest, DCTUniform) {
+  constexpr float kUniformQuant = 4;
+  float weights[3][2] = {{1.0f / kUniformQuant, 0},
+                         {1.0f / kUniformQuant, 0},
+                         {1.0f / kUniformQuant, 0}};
+  DctQuantWeightParams dct_params(weights);
+  std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+                                       QuantEncoding::DCT(dct_params));
+  DequantMatrices dequant_matrices;
+  CodecMetadata metadata;
+  FrameHeader frame_header(&metadata);
+  ModularFrameEncoder encoder(frame_header, CompressParams{});
+  DequantMatricesSetCustom(&dequant_matrices, encodings, &encoder);
+
+  const float dc_quant[3] = {1.0f / kUniformQuant, 1.0f / kUniformQuant,
+                             1.0f / kUniformQuant};
+  DequantMatricesSetCustomDC(&dequant_matrices, dc_quant);
+
+  HWY_ALIGN_MAX float scratch_space[16 * 16 * 2];
+
+  // DCT8
+  {
+    HWY_ALIGN_MAX float pixels[64];
+    std::iota(std::begin(pixels), std::end(pixels), 0);
+    HWY_ALIGN_MAX float coeffs[64];
+    const AcStrategy::Type dct = AcStrategy::DCT;
+    TransformFromPixels(dct, pixels, 8, coeffs, scratch_space);
+    HWY_ALIGN_MAX double slow_coeffs[64];
+    for (size_t i = 0; i < 64; i++) slow_coeffs[i] = pixels[i];
+    DCTSlow<8>(slow_coeffs);
+
+    for (size_t i = 0; i < 64; i++) {
+      // DCTSlow doesn't multiply/divide by 1/N, so we do it manually.
+      slow_coeffs[i] = roundf(slow_coeffs[i] / kUniformQuant) * kUniformQuant;
+      coeffs[i] = roundf(coeffs[i] / dequant_matrices.Matrix(dct, 0)[i]) *
+                  dequant_matrices.Matrix(dct, 0)[i];
+    }
+    IDCTSlow<8>(slow_coeffs);
+    TransformToPixels(dct, coeffs, pixels, 8, scratch_space);
+    for (size_t i = 0; i < 64; i++) {
+      EXPECT_NEAR(pixels[i], slow_coeffs[i], 1e-4);
+    }
+  }
+
+  // DCT16
+  {
+    HWY_ALIGN_MAX float pixels[64 * 4];
+    std::iota(std::begin(pixels), std::end(pixels), 0);
+    HWY_ALIGN_MAX float coeffs[64 * 4];
+    const AcStrategy::Type dct = AcStrategy::DCT16X16;
+    TransformFromPixels(dct, pixels, 16, coeffs, scratch_space);
+    HWY_ALIGN_MAX double slow_coeffs[64 * 4];
+    for (size_t i = 0; i < 64 * 4; i++) slow_coeffs[i] = pixels[i];
+    DCTSlow<16>(slow_coeffs);
+
+    for (size_t i = 0; i < 64 * 4; i++) {
+      slow_coeffs[i] = roundf(slow_coeffs[i] / kUniformQuant) * kUniformQuant;
+      coeffs[i] = roundf(coeffs[i] / dequant_matrices.Matrix(dct, 0)[i]) *
+                  dequant_matrices.Matrix(dct, 0)[i];
+    }
+
+    IDCTSlow<16>(slow_coeffs);
+    TransformToPixels(dct, coeffs, pixels, 16, scratch_space);
+    for (size_t i = 0; i < 64 * 4; i++) {
+      EXPECT_NEAR(pixels[i], slow_coeffs[i], 1e-4);
+    }
+  }
+
+  // Check that all matrices have the same DC quantization, i.e. that they all
+  // have the same scaling.
+  for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+    EXPECT_NEAR(dequant_matrices.Matrix(i, 0)[0], kUniformQuant, 1e-6);
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer-inl.h
new file mode 100644
index 0000000000..2627148dc2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer-inl.h
@@ -0,0 +1,73 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_QUANTIZER_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_QUANTIZER_INL_H_
+#undef LIB_JXL_QUANTIZER_INL_H_
+#else
+#define LIB_JXL_QUANTIZER_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Vec;
+
+template <class DI>
+HWY_INLINE HWY_MAYBE_UNUSED Vec<Rebind<float, DI>> AdjustQuantBias(
+    DI di, const size_t c, const Vec<DI> quant_i,
+    const float* HWY_RESTRICT biases) {
+  const Rebind<float, DI> df;
+
+#if JXL_HIGH_PRECISION
+  const auto quant = ConvertTo(df, quant_i);
+
+  // Compare |quant|, keep sign bit for negating result.
+  const auto kSign = BitCast(df, Set(di, INT32_MIN));
+  const auto sign = And(quant, kSign);  // TODO(janwas): = abs ^ orig
+  const auto abs_quant = AndNot(kSign, quant);
+
+  // If |x| is 1, kZeroBias creates a different bias for each channel.
+  // We're implementing the following:
+  // if (quant == 0) return 0;
+  // if (quant == 1) return biases[c];
+  // if (quant == -1) return -biases[c];
+  // return quant - biases[3] / quant;
+
+  // Integer comparison is not helpful because Clang incurs bypass penalties
+  // from unnecessarily mixing integer and float.
+  const auto is_01 = abs_quant < Set(df, 1.125f);
+  const auto not_0 = abs_quant > Zero(df);
+
+  // Bitwise logic is faster than quant * biases[c].
+  const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
+
+  // About 2E-5 worse than ReciprocalNR or division.
+  const auto bias =
+      NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
+
+  return IfThenElse(is_01, one_bias, bias);
+#else
+  auto sign = IfThenElseZero(quant_i < Zero(di), Set(di, INT32_MIN));
+  return BitCast(df, IfThenElse(Abs(quant_i) == Set(di, 1),
+                                sign | BitCast(di, Set(df, biases[c])),
+                                BitCast(di, ConvertTo(df, quant_i))));
+#endif
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_QUANTIZER_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc
new file mode 100644
index 0000000000..2a7480f175
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.cc
@@ -0,0 +1,146 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/quantizer.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/robust_statistics.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+static const int kDefaultQuant = 64;
+
+constexpr int Quantizer::kQuantMax;
+
+Quantizer::Quantizer(const DequantMatrices* dequant)
+    : Quantizer(dequant, kDefaultQuant, kGlobalScaleDenom / kDefaultQuant) {}
+
+Quantizer::Quantizer(const DequantMatrices* dequant, int quant_dc,
+                     int global_scale)
+    : global_scale_(global_scale), quant_dc_(quant_dc), dequant_(dequant) {
+  JXL_ASSERT(dequant_ != nullptr);
+  RecomputeFromGlobalScale();
+  inv_quant_dc_ = inv_global_scale_ / quant_dc_;
+
+  memcpy(zero_bias_, kZeroBiasDefault, sizeof(kZeroBiasDefault));
+}
+
+void Quantizer::ComputeGlobalScaleAndQuant(float quant_dc, float quant_median,
+                                           float quant_median_absd) {
+  // Target value for the median value in the quant field.
+  const float kQuantFieldTarget = 3.80987740592518214386f;
+  // We reduce the median of the quant field by the median absolute deviation:
+  // higher resolution on highly varying quant fields.
+  float scale = kGlobalScaleDenom * (quant_median - quant_median_absd) /
+                kQuantFieldTarget;
+  // Ensure that new_global_scale is positive and no more than 1<<15.
+  if (scale < 1) scale = 1;
+  if (scale > (1 << 15)) scale = 1 << 15;
+  int new_global_scale = static_cast<int>(scale);
+  // Ensure that quant_dc_ will always be at least
+  // kGlobalScaleDenom/kGlobalScaleNumerator.
+  const int scaled_quant_dc =
+      static_cast<int>(quant_dc * kGlobalScaleNumerator);
+  if (new_global_scale > scaled_quant_dc) {
+    new_global_scale = scaled_quant_dc;
+    if (new_global_scale <= 0) new_global_scale = 1;
+  }
+  global_scale_ = new_global_scale;
+  // Code below uses inv_global_scale_.
+  RecomputeFromGlobalScale();
+
+  float fval = quant_dc * inv_global_scale_ + 0.5f;
+  fval = std::min<float>(1 << 16, fval);
+  const int new_quant_dc = static_cast<int>(fval);
+  quant_dc_ = new_quant_dc;
+
+  // quant_dc_ was updated, recompute values.
+  RecomputeFromGlobalScale();
+}
+
+void Quantizer::SetQuantFieldRect(const ImageF& qf, const Rect& rect,
+                                  ImageI* JXL_RESTRICT raw_quant_field) {
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    const float* JXL_RESTRICT row_qf = rect.ConstRow(qf, y);
+    int32_t* JXL_RESTRICT row_qi = rect.Row(raw_quant_field, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      int val = ClampVal(row_qf[x] * inv_global_scale_ + 0.5f);
+      row_qi[x] = val;
+    }
+  }
+}
+
+void Quantizer::SetQuantField(const float quant_dc, const ImageF& qf,
+                              ImageI* JXL_RESTRICT raw_quant_field) {
+  JXL_CHECK(SameSize(*raw_quant_field, qf));
+  std::vector<float> data(qf.xsize() * qf.ysize());
+  for (size_t y = 0; y < qf.ysize(); ++y) {
+    const float* JXL_RESTRICT row_qf = qf.Row(y);
+    for (size_t x = 0; x < qf.xsize(); ++x) {
+      float quant = row_qf[x];
+      data[qf.xsize() * y + x] = quant;
+    }
+  }
+  const float quant_median = Median(&data);
+  const float quant_median_absd = MedianAbsoluteDeviation(data, quant_median);
+  ComputeGlobalScaleAndQuant(quant_dc, quant_median, quant_median_absd);
+  SetQuantFieldRect(qf, Rect(qf), raw_quant_field);
+}
+
+void Quantizer::SetQuant(float quant_dc, float quant_ac,
+                         ImageI* JXL_RESTRICT raw_quant_field) {
+  ComputeGlobalScaleAndQuant(quant_dc, quant_ac, 0);
+  int val = ClampVal(quant_ac * inv_global_scale_ + 0.5f);
+  FillImage(val, raw_quant_field);
+}
+
+Status QuantizerParams::VisitFields(Visitor* JXL_RESTRICT visitor) {
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+      BitsOffset(11, 1), BitsOffset(11, 2049), BitsOffset(12, 4097),
+      BitsOffset(16, 8193), 1, &global_scale));
+  JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), BitsOffset(5, 1),
+                                         BitsOffset(8, 1), BitsOffset(16, 1), 1,
+                                         &quant_dc));
+  return true;
+}
+
+Status Quantizer::Encode(BitWriter* writer, size_t layer,
+                         AuxOut* aux_out) const {
+  QuantizerParams params;
+  params.global_scale = global_scale_;
+  params.quant_dc = quant_dc_;
+  return Bundle::Write(params, writer, layer, aux_out);
+}
+
+Status Quantizer::Decode(BitReader* reader) {
+  QuantizerParams params;
+  JXL_RETURN_IF_ERROR(Bundle::Read(reader, &params));
+  global_scale_ = static_cast<int>(params.global_scale);
+  quant_dc_ = static_cast<int>(params.quant_dc);
+  RecomputeFromGlobalScale();
+  return true;
+}
+
+void Quantizer::DumpQuantizationMap(const ImageI& raw_quant_field) const {
+  printf("Global scale: %d (%.7f)\nDC quant: %d\n", global_scale_,
+         global_scale_ * 1.0 / kGlobalScaleDenom, quant_dc_);
+  printf("AC quantization Map:\n");
+  for (size_t y = 0; y < raw_quant_field.ysize(); ++y) {
+    for (size_t x = 0; x < raw_quant_field.xsize(); ++x) {
+      printf(" %3d", raw_quant_field.Row(y)[x]);
+    }
+    printf("\n");
+  }
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.h
new file mode 100644
index 0000000000..f2da45f1c4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer.h
@@ -0,0 +1,178 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_QUANTIZER_H_
+#define LIB_JXL_QUANTIZER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/linalg.h"
+#include "lib/jxl/quant_weights.h"
+
+// Quantizes DC and AC coefficients, with separate quantization tables according
+// to the quant_kind (which is currently computed from the AC strategy and the
+// block index inside that strategy).
+
+namespace jxl {
+
+static constexpr int kGlobalScaleDenom = 1 << 16;
+static constexpr int kGlobalScaleNumerator = 4096;
+
+// zero-biases for quantizing channels X, Y, B
+static constexpr float kZeroBiasDefault[3] = {0.5f, 0.5f, 0.5f};
+
+// Returns adjusted version of a quantized integer, such that its value is
+// closer to the expected value of the original.
+// The residuals of AC coefficients that we quantize are not uniformly
+// distributed. Numerical experiments show that they have a distribution with
+// the "shape" of 1/(1+x^2) [up to some coefficients]. This means that the
+// expected value of a coefficient that gets quantized to x will not be x
+// itself, but (at least with reasonable approximation):
+// - 0 if x is 0
+// - x * biases[c] if x is 1 or -1
+// - x - biases[3]/x otherwise
+// This follows from computing the distribution of the quantization bias, which
+// can be approximated fairly well by <constant>/x when |x| is at least two.
+static constexpr float kBiasNumerator = 0.145f;
+
+static constexpr float kDefaultQuantBias[4] = {
+    1.0f - 0.05465007330715401f,
+    1.0f - 0.07005449891748593f,
+    1.0f - 0.049935103337343655f,
+    0.145f,
+};
+
+class Quantizer {
+ public:
+  explicit Quantizer(const DequantMatrices* dequant);
+  Quantizer(const DequantMatrices* dequant, int quant_dc, int global_scale);
+
+  static constexpr int kQuantMax = 256;
+
+  static JXL_INLINE int ClampVal(float val) {
+    return static_cast<int>(std::max(1.0f, std::min<float>(val, kQuantMax)));
+  }
+
+  // Recomputes other derived fields after global_scale_ has changed.
+  void RecomputeFromGlobalScale() {
+    global_scale_float_ = global_scale_ * (1.0 / kGlobalScaleDenom);
+    inv_global_scale_ = 1.0 * kGlobalScaleDenom / global_scale_;
+    inv_quant_dc_ = inv_global_scale_ / quant_dc_;
+    for (size_t c = 0; c < 3; c++) {
+      mul_dc_[c] = GetDcStep(c);
+      inv_mul_dc_[c] = GetInvDcStep(c);
+    }
+  }
+
+  // Returns scaling factor such that Scale() * (RawDC() or RawQuantField())
+  // pixels yields the same float values returned by GetQuantField.
+  JXL_INLINE float Scale() const { return global_scale_float_; }
+
+  // Reciprocal of Scale().
+  JXL_INLINE float InvGlobalScale() const { return inv_global_scale_; }
+
+  void SetQuantFieldRect(const ImageF& qf, const Rect& rect,
+                         ImageI* JXL_RESTRICT raw_quant_field);
+
+  void SetQuantField(float quant_dc, const ImageF& qf,
+                     ImageI* JXL_RESTRICT raw_quant_field);
+
+  void SetQuant(float quant_dc, float quant_ac,
+                ImageI* JXL_RESTRICT raw_quant_field);
+
+  // Returns the DC quantization base value, which is currently global (not
+  // adaptive). The actual scale factor used to dequantize pixels in channel c
+  // is: inv_quant_dc() * dequant_->DCQuant(c).
+  float inv_quant_dc() const { return inv_quant_dc_; }
+
+  // Dequantize by multiplying with this times dequant_matrix.
+  float inv_quant_ac(int32_t quant) const { return inv_global_scale_ / quant; }
+
+  Status Encode(BitWriter* writer, size_t layer, AuxOut* aux_out) const;
+
+  Status Decode(BitReader* reader);
+
+  void DumpQuantizationMap(const ImageI& raw_quant_field) const;
+
+  JXL_INLINE const float* DequantMatrix(size_t quant_kind, size_t c) const {
+    return dequant_->Matrix(quant_kind, c);
+  }
+
+  JXL_INLINE const float* InvDequantMatrix(size_t quant_kind, size_t c) const {
+    return dequant_->InvMatrix(quant_kind, c);
+  }
+
+  JXL_INLINE size_t DequantMatrixOffset(size_t quant_kind, size_t c) const {
+    return dequant_->MatrixOffset(quant_kind, c);
+  }
+
+  // Calculates DC quantization step.
+  JXL_INLINE float GetDcStep(size_t c) const {
+    return inv_quant_dc_ * dequant_->DCQuant(c);
+  }
+  JXL_INLINE float GetInvDcStep(size_t c) const {
+    return dequant_->InvDCQuant(c) * (global_scale_float_ * quant_dc_);
+  }
+
+  JXL_INLINE const float* MulDC() const { return mul_dc_; }
+  JXL_INLINE const float* InvMulDC() const { return inv_mul_dc_; }
+
+  JXL_INLINE void ClearDCMul() {
+    std::fill(mul_dc_, mul_dc_ + 4, 1);
+    std::fill(inv_mul_dc_, inv_mul_dc_ + 4, 1);
+  }
+
+  void ComputeGlobalScaleAndQuant(float quant_dc, float quant_median,
+                                  float quant_median_absd);
+
+ private:
+  float mul_dc_[4];
+  float inv_mul_dc_[4];
+
+  // These are serialized:
+  int global_scale_;
+  int quant_dc_;
+
+  // These are derived from global_scale_:
+  float inv_global_scale_;
+  float global_scale_float_;  // reciprocal of inv_global_scale_
+  float inv_quant_dc_;
+
+  float zero_bias_[3];
+  const DequantMatrices* dequant_;
+};
+
+struct QuantizerParams : public Fields {
+  QuantizerParams() { Bundle::Init(this); }
+  const char* Name() const override { return "QuantizerParams"; }
+
+  Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+  uint32_t global_scale;
+  uint32_t quant_dc;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_QUANTIZER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer_test.cc
new file mode 100644
index 0000000000..052e138fe3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/quantizer_test.cc
@@ -0,0 +1,82 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/quantizer.h"
+
+#include <random>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+
+namespace jxl {
+namespace {
+
+void TestEquivalence(int qxsize, int qysize, const Quantizer& quantizer1,
+                     const Quantizer& quantizer2) {
+  ASSERT_NEAR(quantizer1.inv_quant_dc(), quantizer2.inv_quant_dc(), 1e-7);
+}
+
+TEST(QuantizerTest, QuantizerParams) {
+  for (uint32_t i = 1; i < 10000; ++i) {
+    QuantizerParams p;
+    p.global_scale = i;
+    size_t extension_bits = 0, total_bits = 0;
+    EXPECT_TRUE(Bundle::CanEncode(p, &extension_bits, &total_bits));
+    EXPECT_EQ(0, extension_bits);
+    EXPECT_GE(total_bits, 4);
+  }
+}
+
+TEST(QuantizerTest, BitStreamRoundtripSameQuant) {
+  const int qxsize = 8;
+  const int qysize = 8;
+  DequantMatrices dequant;
+  Quantizer quantizer1(&dequant);
+  ImageI raw_quant_field(qxsize, qysize);
+  quantizer1.SetQuant(0.17f, 0.17f, &raw_quant_field);
+  BitWriter writer;
+  EXPECT_TRUE(quantizer1.Encode(&writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  const size_t bits_written = writer.BitsWritten();
+  Quantizer quantizer2(&dequant, qxsize, qysize);
+  BitReader reader(writer.GetSpan());
+  EXPECT_TRUE(quantizer2.Decode(&reader));
+  EXPECT_TRUE(reader.JumpToByteBoundary());
+  EXPECT_EQ(reader.TotalBitsConsumed(), bits_written);
+  EXPECT_TRUE(reader.Close());
+  TestEquivalence(qxsize, qysize, quantizer1, quantizer2);
+}
+
+TEST(QuantizerTest, BitStreamRoundtripRandomQuant) {
+  const int qxsize = 8;
+  const int qysize = 8;
+  DequantMatrices dequant;
+  Quantizer quantizer1(&dequant);
+  ImageI raw_quant_field(qxsize, qysize);
+  quantizer1.SetQuant(0.17f, 0.17f, &raw_quant_field);
+  std::mt19937_64 rng;
+  std::uniform_int_distribution<> uniform(1, 256);
+  float quant_dc = 0.17f;
+  ImageF qf(qxsize, qysize);
+  RandomFillImage(&qf, 1.0f);
+  quantizer1.SetQuantField(quant_dc, qf, &raw_quant_field);
+  BitWriter writer;
+  EXPECT_TRUE(quantizer1.Encode(&writer, 0, nullptr));
+  writer.ZeroPadToByte();
+  const size_t bits_written = writer.BitsWritten();
+  Quantizer quantizer2(&dequant, qxsize, qysize);
+  BitReader reader(writer.GetSpan());
+  EXPECT_TRUE(quantizer2.Decode(&reader));
+  EXPECT_TRUE(reader.JumpToByteBoundary());
+  EXPECT_EQ(reader.TotalBitsConsumed(), bits_written);
+  EXPECT_TRUE(reader.Close());
+  TestEquivalence(qxsize, qysize, quantizer1, quantizer2);
+}
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/rational_polynomial-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/rational_polynomial-inl.h
new file mode 100644
index 0000000000..87bddd1bb2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/rational_polynomial-inl.h
@@ -0,0 +1,94 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD evaluation of rational polynomials for approximating functions.
+
+#if defined(LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
+#undef LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
+#else
+#define LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// Primary template: default to actual division.
+template <typename T, class V>
+struct FastDivision {
+  HWY_INLINE V operator()(const V n, const V d) const { return n / d; }
+};
+// Partial specialization for float vectors.
+template <class V>
+struct FastDivision<float, V> {
+  // One Newton-Raphson iteration.
+  static HWY_INLINE V ReciprocalNR(const V x) {
+    const auto rcp = ApproximateReciprocal(x);
+    const auto sum = rcp + rcp;
+    const auto x_rcp = x * rcp;
+    return NegMulAdd(x_rcp, rcp, sum);
+  }
+
+  V operator()(const V n, const V d) const {
+#if 1  // Faster on SKX
+    return n / d;
+#else
+    return n * ReciprocalNR(d);
+#endif
+  }
+};
+
+// Approximates smooth functions via rational polynomials (i.e. dividing two
+// polynomials). Evaluates polynomials via Horner's scheme, which is faster than
+// Clenshaw recurrence for Chebyshev polynomials. LoadDup128 allows us to
+// specify constants (replicated 4x) independently of the lane count.
+template <size_t NP, size_t NQ, class D, class V, typename T>
+HWY_INLINE HWY_MAYBE_UNUSED V EvalRationalPolynomial(const D d, const V x,
+                                                     const T (&p)[NP],
+                                                     const T (&q)[NQ]) {
+  constexpr size_t kDegP = NP / 4 - 1;
+  constexpr size_t kDegQ = NQ / 4 - 1;
+  auto yp = LoadDup128(d, &p[kDegP * 4]);
+  auto yq = LoadDup128(d, &q[kDegQ * 4]);
+  // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a
+  // compiler warning that the index is out of bounds since we are already
+  // checking that it is not out of bounds with (kDegP >= n) and the access
+  // will be optimized away. Similarly with q and kDegQ.
+  HWY_FENCE;
+  if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4)));
+  if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4)));
+  if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4)));
+  if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4)));
+  if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4)));
+  if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4)));
+  if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4)));
+  HWY_FENCE;
+  if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4)));
+  if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4)));
+
+  return FastDivision<T, V>()(yp, yq);
+}
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+#endif  // LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/rational_polynomial_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/rational_polynomial_test.cc
new file mode 100644
index 0000000000..699afd076e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/rational_polynomial_test.cc
@@ -0,0 +1,239 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <cmath>
+#include <string>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/rational_polynomial_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/descriptive_statistics.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/rational_polynomial-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+using T = float;  // required by EvalLog2
+using D = HWY_FULL(T);
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+
+// Generic: only computes polynomial
+struct EvalPoly {
+  template <size_t NP, size_t NQ>
+  T operator()(T x, const T (&p)[NP], const T (&q)[NQ]) const {
+    const HWY_FULL(T) d;
+    const auto vx = Set(d, x);
+    const auto approx = EvalRationalPolynomial(d, vx, p, q);
+    return GetLane(approx);
+  }
+};
+
+// Range reduction for log2
+struct EvalLog2 {
+  template <size_t NP, size_t NQ>
+  T operator()(T x, const T (&p)[NP], const T (&q)[NQ]) const {
+    const HWY_FULL(T) d;
+    auto vx = Set(d, x);
+
+    const HWY_FULL(int32_t) di;
+    const auto x_bits = BitCast(di, vx);
+    // Cannot handle negative numbers / NaN.
+    JXL_DASSERT(AllTrue(Abs(x_bits) == x_bits));
+
+    // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops
+    const auto exp_bits = x_bits - Set(di, 0x3f2aaaab);  // = 2/3
+    // Shifted exponent = log2; also used to clear mantissa.
+    const auto exp_shifted = ShiftRight<23>(exp_bits);
+    const auto mantissa = BitCast(d, x_bits - ShiftLeft<23>(exp_shifted));
+    const auto exp_val = ConvertTo(d, exp_shifted);
+    vx = mantissa - Set(d, 1.0f);
+
+    const auto approx = EvalRationalPolynomial(d, vx, p, q) + exp_val;
+    return GetLane(approx);
+  }
+};
+
+// Functions to approximate:
+
+T LinearToSrgb8Direct(T val) {
+  if (val < 0.0) return 0.0;
+  if (val >= 255.0) return 255.0;
+  if (val <= 10.0 / 12.92) return val * 12.92;
+  return 255.0 * (std::pow(val / 255.0, 1.0 / 2.4) * 1.055 - 0.055);
+}
+
+T SimpleGamma(T v) {
+  static const T kGamma = 0.387494322593;
+  static const T limit = 43.01745241042018;
+  T bright = v - limit;
+  if (bright >= 0) {
+    static const T mul = 0.0383723643799;
+    v -= bright * mul;
+  }
+  static const T limit2 = 94.68634353321337;
+  T bright2 = v - limit2;
+  if (bright2 >= 0) {
+    static const T mul = 0.22885405968;
+    v -= bright2 * mul;
+  }
+  static const T offset = 0.156775786057;
+  static const T scale = 8.898059160493739;
+  T retval = scale * (offset + pow(v, kGamma));
+  return retval;
+}
+
+// Runs CaratheodoryFejer and verifies the polynomial using a lot of samples to
+// return the biggest error.
+template <size_t NP, size_t NQ, class Eval>
+T RunApproximation(T x0, T x1, const T (&p)[NP], const T (&q)[NQ],
+                   const Eval& eval, T func_to_approx(T)) {
+  Stats err;
+
+  T lastPrint = 0;
+  // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
+  for (T x = x0; x <= x1; x += (x1 - x0) / 10000.0) {
+    const T f = func_to_approx(x);
+    const T g = eval(x, p, q);
+    err.Notify(fabs(g - f));
+    if (x == x0 || x - lastPrint > (x1 - x0) / 20.0) {
+      printf("x: %11.6f, f: %11.6f, g: %11.6f, e: %11.6f\n", x, f, g,
+             fabs(g - f));
+      lastPrint = x;
+    }
+  }
+  printf("%s\n", err.ToString().c_str());
+
+  return err.Max();
+}
+
+void TestSimpleGamma() {
+  const T p[4 * (6 + 1)] = {
+      HWY_REP4(-5.0646949363741811E-05), HWY_REP4(6.7369380528439771E-05),
+      HWY_REP4(8.9376652530412794E-05),  HWY_REP4(2.1153513301520462E-06),
+      HWY_REP4(-6.9130322970386449E-08), HWY_REP4(3.9424752749293728E-10),
+      HWY_REP4(1.2360288207619576E-13)};
+
+  const T q[4 * (6 + 1)] = {
+      HWY_REP4(-6.6389733798591366E-06), HWY_REP4(1.3299859726565908E-05),
+      HWY_REP4(3.8538748358398873E-06),  HWY_REP4(-2.8707687262928236E-08),
+      HWY_REP4(-6.6897385800005434E-10), HWY_REP4(6.1428748869186003E-12),
+      HWY_REP4(-2.5475738169252870E-15)};
+
+  const T err = RunApproximation(0.77, 274.579999999999984, p, q, EvalPoly(),
+                                 SimpleGamma);
+  EXPECT_LT(err, 0.05);
+}
+
+void TestLinearToSrgb8Direct() {
+  const T p[4 * (5 + 1)] = {
+      HWY_REP4(-9.5357499040105154E-05), HWY_REP4(4.6761186249798248E-04),
+      HWY_REP4(2.5708174333943594E-04),  HWY_REP4(1.5250087770436082E-05),
+      HWY_REP4(1.1946768008931187E-07),  HWY_REP4(5.9916446295972850E-11)};
+
+  const T q[4 * (4 + 1)] = {
+      HWY_REP4(1.8932479758079768E-05), HWY_REP4(2.7312342474687321E-05),
+      HWY_REP4(4.3901204783327006E-06), HWY_REP4(1.0417787306920273E-07),
+      HWY_REP4(3.0084206762140419E-10)};
+
+  const T err =
+      RunApproximation(0.77, 255, p, q, EvalPoly(), LinearToSrgb8Direct);
+  EXPECT_LT(err, 0.05);
+}
+
+void TestExp() {
+  const T p[4 * (2 + 1)] = {HWY_REP4(9.6266879665530902E-01),
+                            HWY_REP4(4.8961265681586763E-01),
+                            HWY_REP4(8.2619259189548433E-02)};
+  const T q[4 * (2 + 1)] = {HWY_REP4(9.6259895571622622E-01),
+                            HWY_REP4(-4.7272457588933831E-01),
+                            HWY_REP4(7.4802088567547664E-02)};
+  const T err =
+      RunApproximation(-1, 1, p, q, EvalPoly(), [](T x) { return T(exp(x)); });
+  EXPECT_LT(err, 1E-4);
+}
+
+void TestNegExp() {
+  // 4,3 is the min required for monotonicity; max error in 0,10: 751 ppm
+  // no benefit for k>50.
+  const T p[4 * (4 + 1)] = {
+      HWY_REP4(5.9580258551150123E-02), HWY_REP4(-2.5073728806886408E-02),
+      HWY_REP4(4.1561830213689248E-03), HWY_REP4(-3.1815408488900372E-04),
+      HWY_REP4(9.3866690094906802E-06)};
+  const T q[4 * (3 + 1)] = {
+      HWY_REP4(5.9579108238812878E-02), HWY_REP4(3.4542074345478582E-02),
+      HWY_REP4(8.7263562483501714E-03), HWY_REP4(1.4095109143061216E-03)};
+
+  const T err =
+      RunApproximation(0, 10, p, q, EvalPoly(), [](T x) { return T(exp(-x)); });
+  EXPECT_LT(err, sizeof(T) == 8 ? 2E-5 : 3E-5);
+}
+
+void TestSin() {
+  const T p[4 * (6 + 1)] = {
+      HWY_REP4(1.5518122109203780E-05),  HWY_REP4(2.3388958643675966E+00),
+      HWY_REP4(-8.6705520940849157E-01), HWY_REP4(-1.9702294764873535E-01),
+      HWY_REP4(1.2193404314472320E-01),  HWY_REP4(-1.7373966109788839E-02),
+      HWY_REP4(7.8829435883034796E-04)};
+  const T q[4 * (5 + 1)] = {
+      HWY_REP4(2.3394371422557279E+00), HWY_REP4(-8.7028221081288615E-01),
+      HWY_REP4(2.0052872219658430E-01), HWY_REP4(-3.2460335995264836E-02),
+      HWY_REP4(3.1546157932479282E-03), HWY_REP4(-1.6692542019380155E-04)};
+
+  const T err = RunApproximation(0, Pi<T>(1) * 2, p, q, EvalPoly(),
+                                 [](T x) { return T(sin(x)); });
+  EXPECT_LT(err, sizeof(T) == 8 ? 5E-4 : 7E-4);
+}
+
+void TestLog() {
+  HWY_ALIGN const T p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06),
+                                      HWY_REP4(1.4287160470083755E+00),
+                                      HWY_REP4(7.4245873327820566E-01)};
+  HWY_ALIGN const T q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01),
+                                      HWY_REP4(1.0096718572241148E+00),
+                                      HWY_REP4(1.7409343003366853E-01)};
+  const T err = RunApproximation(1E-6, 1000, p, q, EvalLog2(), std::log2);
+  printf("%E\n", err);
+}
+
+HWY_NOINLINE void TestRationalPolynomial() {
+  TestSimpleGamma();
+  TestLinearToSrgb8Direct();
+  TestExp();
+  TestNegExp();
+  TestSin();
+  TestLog();
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class RationalPolynomialTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(RationalPolynomialTest);
+
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestSimpleGamma);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestLinearToSrgb8Direct);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestExp);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestNegExp);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestSin);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestLog);
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/robust_statistics_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/robust_statistics_test.cc
new file mode 100644
index 0000000000..22ee56abdb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/robust_statistics_test.cc
@@ -0,0 +1,150 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/robust_statistics.h"
+
+#include <stdio.h>
+
+#include <numeric>  // partial_sum
+#include <random>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/noise_distributions.h"
+
+namespace jxl {
+namespace {
+
+TEST(RobustStatisticsTest, TestMode) {
+  // Enough to populate bins. We have to sort this many values.
+  constexpr size_t kReps = 15000;
+  constexpr size_t kBins = 101;
+
+  std::mt19937 rng(65537);
+
+  // Place Poisson mean at 1/10, 2/10 .. 9/10 of the bin range.
+  for (int frac = 1; frac < 10; ++frac) {
+    printf("===========================frac %d\n", frac);
+
+    NoisePoisson noise(frac * kBins / 10);
+    std::vector<float> values;
+    values.reserve(kReps);
+
+    uint32_t bins[kBins] = {0};
+
+    std::uniform_real_distribution<float> jitter(-1E-3f, 1E-3f);
+    for (size_t rep = 0; rep < kReps; ++rep) {
+      // Scale back to integer, add jitter to avoid too many repeated values.
+      const float poisson = noise(0.0f, &rng) * 1E3f + jitter(rng);
+
+      values.push_back(poisson);
+
+      const int idx_bin = static_cast<int>(poisson);
+      if (idx_bin < static_cast<ssize_t>(kBins)) {
+        bins[idx_bin] += 1;
+      }  // else skip instead of clamping to avoid bias
+    }
+
+    // // Print histogram
+    // for (const uint32_t b : bins) {
+    //   printf("%u\n", b);
+    // }
+
+    // (Smoothed) argmax and median for verification
+    float smoothed[kBins];
+    smoothed[0] = bins[0];
+    smoothed[kBins - 1] = bins[kBins - 1];
+    for (size_t i = 1; i < kBins - 1; ++i) {
+      smoothed[i] = (2 * bins[i] + bins[i - 1] + bins[i + 1]) * 0.25f;
+    }
+    const float argmax =
+        std::max_element(smoothed, smoothed + kBins) - smoothed;
+    const float median = Median(&values);
+
+    std::sort(values.begin(), values.end());
+    const float hsm = HalfSampleMode()(values.data(), values.size());
+
+    uint32_t cdf[kBins];
+    std::partial_sum(bins, bins + kBins, cdf);
+    const int hrm = HalfRangeMode()(cdf, kBins);
+
+    const auto is_near = [](const float expected, const float actual) {
+      return std::abs(expected - actual) <= 1.0f + 1E-5f;
+    };
+    EXPECT_TRUE(is_near(hsm, argmax) || is_near(hsm, median));
+    EXPECT_TRUE(is_near(hrm, argmax) || is_near(hrm, median));
+
+    printf("hsm %.1f hrm %d argmax %.1f median %f\n", hsm, hrm, argmax, median);
+    const int center = static_cast<int>(argmax);
+    printf("%d %d %d %d %d\n", bins[center - 2], bins[center - 1], bins[center],
+           bins[center + 1], bins[center + 2]);
+  }
+}
+
+// Ensures Median3/5 return the same results as Median.
+TEST(RobustStatisticsTest, TestMedian) {
+  std::vector<float> v3(3), v5(5);
+
+  std::uniform_real_distribution<float> dist(-100.0f, 100.0f);
+  std::mt19937 rng(129);
+
+#ifdef NDEBUG
+  constexpr size_t kReps = 100000;
+#else
+  constexpr size_t kReps = 100;
+#endif
+  for (size_t i = 0; i < kReps; ++i) {
+    v3[0] = dist(rng);
+    v3[1] = dist(rng);
+    v3[2] = dist(rng);
+    for (size_t j = 0; j < 5; ++j) {
+      v5[j] = dist(rng);
+    }
+
+    JXL_ASSERT(Median(&v3) == Median3(v3[0], v3[1], v3[2]));
+    JXL_ASSERT(Median(&v5) == Median5(v5[0], v5[1], v5[2], v5[3], v5[4]));
+  }
+}
+
+template <class Noise>
+void TestLine(const Noise& noise, float max_l1_limit, float mad_limit) {
+  std::vector<Bivariate> points;
+  Line perfect(0.6f, 2.0f);
+
+  // Random spacing of X (must be unique)
+  float x = -100.0f;
+  std::mt19937_64 rng(129);
+  std::uniform_real_distribution<float> x_dist(1E-6f, 10.0f);
+  for (size_t ix = 0; ix < 500; ++ix) {
+    x += x_dist(rng);
+    const float y = noise(perfect(x), &rng);
+    points.emplace_back(x, y);
+    // printf("%f,%f\n", x, y);
+  }
+
+  Line est(points);
+  float max_l1, mad;
+  EvaluateQuality(est, points, &max_l1, &mad);
+  printf("x %f  slope=%.2f b=%.2f  max_l1 %f mad %f\n", x, est.slope(),
+         est.intercept(), max_l1, mad);
+
+  EXPECT_LE(max_l1, max_l1_limit);
+  EXPECT_LE(mad, mad_limit);
+}
+
+TEST(RobustStatisticsTest, CleanLine) {
+  const NoiseNone noise;
+  TestLine(noise, 1E-6, 1E-7);
+}
+TEST(RobustStatisticsTest, Uniform) {
+  const NoiseUniform noise(-100.0f, 100.0f);
+  TestLine(noise, 107, 53);
+}
+TEST(RobustStatisticsTest, Gauss) {
+  const NoiseGaussian noise(10.0f);
+  TestLine(noise, 37, 7);
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/roundtrip_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/roundtrip_test.cc
new file mode 100644
index 0000000000..219619fd7f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/roundtrip_test.cc
@@ -0,0 +1,573 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "gtest/gtest.h"
+#include "jxl/decode.h"
+#include "jxl/decode_cxx.h"
+#include "jxl/encode.h"
+#include "jxl/encode_cxx.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+
+namespace {
+
+// Converts a test image to a CodecInOut.
+// icc_profile can be empty to automatically deduce profile from the pixel
+// format, or filled in to force this ICC profile
+jxl::CodecInOut ConvertTestImage(const std::vector<uint8_t>& buf,
+                                 const size_t xsize, const size_t ysize,
+                                 const JxlPixelFormat& pixel_format,
+                                 const jxl::PaddedBytes& icc_profile) {
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+
+  bool is_gray =
+      pixel_format.num_channels == 1 || pixel_format.num_channels == 2;
+  bool has_alpha =
+      pixel_format.num_channels == 2 || pixel_format.num_channels == 4;
+
+  io.metadata.m.color_encoding.SetColorSpace(is_gray ? jxl::ColorSpace::kGray
+                                                     : jxl::ColorSpace::kRGB);
+  if (has_alpha) {
+    // Note: alpha > 16 not yet supported by the C++ codec
+    switch (pixel_format.data_type) {
+      case JXL_TYPE_UINT8:
+        io.metadata.m.SetAlphaBits(8);
+        break;
+      case JXL_TYPE_UINT16:
+      case JXL_TYPE_UINT32:
+      case JXL_TYPE_FLOAT:
+      case JXL_TYPE_FLOAT16:
+        io.metadata.m.SetAlphaBits(16);
+        break;
+      default:
+        EXPECT_TRUE(false) << "Roundtrip tests for data type "
+                           << pixel_format.data_type << " not yet implemented.";
+    }
+  }
+  size_t bitdepth = 0;
+  switch (pixel_format.data_type) {
+    case JXL_TYPE_FLOAT:
+      bitdepth = 32;
+      io.metadata.m.SetFloat32Samples();
+      break;
+    case JXL_TYPE_FLOAT16:
+      bitdepth = 16;
+      io.metadata.m.SetFloat16Samples();
+      break;
+    case JXL_TYPE_UINT8:
+      bitdepth = 8;
+      io.metadata.m.SetUintSamples(8);
+      break;
+    case JXL_TYPE_UINT16:
+      bitdepth = 16;
+      io.metadata.m.SetUintSamples(16);
+      break;
+    default:
+      EXPECT_TRUE(false) << "Roundtrip tests for data type "
+                         << pixel_format.data_type << " not yet implemented.";
+  }
+  jxl::ColorEncoding color_encoding;
+  if (!icc_profile.empty()) {
+    jxl::PaddedBytes icc_profile_copy(icc_profile);
+    EXPECT_TRUE(color_encoding.SetICC(std::move(icc_profile_copy)));
+  } else if (pixel_format.data_type == JXL_TYPE_FLOAT) {
+    color_encoding = jxl::ColorEncoding::LinearSRGB(is_gray);
+  } else {
+    color_encoding = jxl::ColorEncoding::SRGB(is_gray);
+  }
+  EXPECT_TRUE(
+      ConvertFromExternal(jxl::Span<const uint8_t>(buf.data(), buf.size()),
+                          xsize, ysize, color_encoding, has_alpha,
+                          /*alpha_is_premultiplied=*/false,
+                          /*bits_per_sample=*/bitdepth, pixel_format.endianness,
+                          /*flipped_y=*/false, /*pool=*/nullptr, &io.Main()));
+  return io;
+}
+
+template <typename T>
+T ConvertTestPixel(const float val);
+
+template <>
+float ConvertTestPixel<float>(const float val) {
+  return val;
+}
+
+template <>
+uint16_t ConvertTestPixel<uint16_t>(const float val) {
+  return (uint16_t)(val * UINT16_MAX);
+}
+
+template <>
+uint8_t ConvertTestPixel<uint8_t>(const float val) {
+  return (uint8_t)(val * UINT8_MAX);
+}
+
+// Returns a test image.
+template <typename T>
+std::vector<uint8_t> GetTestImage(const size_t xsize, const size_t ysize,
+                                  const JxlPixelFormat& pixel_format) {
+  std::vector<T> pixels(xsize * ysize * pixel_format.num_channels);
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      for (size_t chan = 0; chan < pixel_format.num_channels; chan++) {
+        float val;
+        switch (chan % 4) {
+          case 0:
+            val = static_cast<float>(y) / static_cast<float>(ysize);
+            break;
+          case 1:
+            val = static_cast<float>(x) / static_cast<float>(xsize);
+            break;
+          case 2:
+            val = static_cast<float>(x + y) / static_cast<float>(xsize + ysize);
+            break;
+          case 3:
+            val = static_cast<float>(x * y) / static_cast<float>(xsize * ysize);
+            break;
+        }
+        pixels[(y * xsize + x) * pixel_format.num_channels + chan] =
+            ConvertTestPixel<T>(val);
+      }
+    }
+  }
+  std::vector<uint8_t> bytes(pixels.size() * sizeof(T));
+  memcpy(bytes.data(), pixels.data(), sizeof(T) * pixels.size());
+  return bytes;
+}
+
+void EncodeWithEncoder(JxlEncoder* enc, std::vector<uint8_t>* compressed) {
+  compressed->resize(64);
+  uint8_t* next_out = compressed->data();
+  size_t avail_out = compressed->size() - (next_out - compressed->data());
+  JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+  while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+    process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+    if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+      size_t offset = next_out - compressed->data();
+      compressed->resize(compressed->size() * 2);
+      next_out = compressed->data() + offset;
+      avail_out = compressed->size() - offset;
+    }
+  }
+  compressed->resize(next_out - compressed->data());
+  EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+}
+
+// Generates some pixels using using some dimensions and pixel_format,
+// compresses them, and verifies that the decoded version is similar to the
+// original pixels.
+template <typename T>
+void VerifyRoundtripCompression(const size_t xsize, const size_t ysize,
+                                const JxlPixelFormat& input_pixel_format,
+                                const JxlPixelFormat& output_pixel_format,
+                                const bool lossless, const bool use_container) {
+  const std::vector<uint8_t> original_bytes =
+      GetTestImage<T>(xsize, ysize, input_pixel_format);
+  jxl::CodecInOut original_io =
+      ConvertTestImage(original_bytes, xsize, ysize, input_pixel_format, {});
+
+  JxlEncoder* enc = JxlEncoderCreate(nullptr);
+  EXPECT_NE(nullptr, enc);
+
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc, use_container));
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &input_pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = lossless;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+  JxlColorEncoding color_encoding;
+  if (input_pixel_format.data_type == JXL_TYPE_FLOAT) {
+    JxlColorEncodingSetToLinearSRGB(
+        &color_encoding,
+        /*is_gray=*/input_pixel_format.num_channels < 3);
+  } else {
+    JxlColorEncodingSetToSRGB(&color_encoding,
+                              /*is_gray=*/input_pixel_format.num_channels < 3);
+  }
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding));
+  JxlEncoderOptions* opts = JxlEncoderOptionsCreate(enc, nullptr);
+  JxlEncoderOptionsSetLossless(opts, lossless);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(opts, &input_pixel_format,
+                                    (void*)original_bytes.data(),
+                                    original_bytes.size()));
+  JxlEncoderCloseInput(enc);
+
+  std::vector<uint8_t> compressed;
+  EncodeWithEncoder(enc, &compressed);
+  JxlEncoderDestroy(enc);
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO |
+                                               JXL_DEC_COLOR_ENCODING |
+                                               JXL_DEC_FULL_IMAGE));
+
+  JxlDecoderSetInput(dec, next_in, avail_in);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderImageOutBufferSize(
+                                 dec, &output_pixel_format, &buffer_size));
+  if (&input_pixel_format == &output_pixel_format) {
+    EXPECT_EQ(buffer_size, original_bytes.size());
+  }
+
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  size_t icc_profile_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, &output_pixel_format,
+                                        JXL_COLOR_PROFILE_TARGET_DATA,
+                                        &icc_profile_size));
+  jxl::PaddedBytes icc_profile(icc_profile_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsICCProfile(
+                dec, &output_pixel_format, JXL_COLOR_PROFILE_TARGET_DATA,
+                icc_profile.data(), icc_profile.size()));
+
+  std::vector<uint8_t> decoded_bytes(buffer_size);
+
+  EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+                                 dec, &output_pixel_format,
+                                 decoded_bytes.data(), decoded_bytes.size()));
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+
+  jxl::CodecInOut decoded_io = ConvertTestImage(
+      decoded_bytes, xsize, ysize, output_pixel_format, icc_profile);
+
+  jxl::ButteraugliParams ba;
+  float butteraugli_score = ButteraugliDistance(original_io, decoded_io, ba,
+                                                /*distmap=*/nullptr, nullptr);
+  if (lossless) {
+    EXPECT_LE(butteraugli_score, 0.0f);
+  } else {
+    EXPECT_LE(butteraugli_score, 2.0f);
+  }
+}
+
+}  // namespace
+
+TEST(RoundtripTest, FloatFrameRoundtripTest) {
+  for (int use_container = 0; use_container < 2; use_container++) {
+    for (int lossless = 0; lossless < 2; lossless++) {
+      for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+        // There's no support (yet) for lossless extra float channels, so we
+        // don't test it.
+        if (num_channels % 2 != 0 || !lossless) {
+          JxlPixelFormat pixel_format = JxlPixelFormat{
+              num_channels, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+          VerifyRoundtripCompression<float>(63, 129, pixel_format, pixel_format,
+                                            (bool)lossless,
+                                            (bool)use_container);
+        }
+      }
+    }
+  }
+}
+
+TEST(RoundtripTest, Uint16FrameRoundtripTest) {
+  for (int use_container = 0; use_container < 2; use_container++) {
+    for (int lossless = 0; lossless < 2; lossless++) {
+      for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+        JxlPixelFormat pixel_format =
+            JxlPixelFormat{num_channels, JXL_TYPE_UINT16, JXL_NATIVE_ENDIAN, 0};
+        VerifyRoundtripCompression<uint16_t>(63, 129, pixel_format,
+                                             pixel_format, (bool)lossless,
+                                             (bool)use_container);
+      }
+    }
+  }
+}
+
+TEST(RoundtripTest, Uint8FrameRoundtripTest) {
+  for (int use_container = 0; use_container < 2; use_container++) {
+    for (int lossless = 0; lossless < 2; lossless++) {
+      for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+        JxlPixelFormat pixel_format =
+            JxlPixelFormat{num_channels, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+        VerifyRoundtripCompression<uint8_t>(63, 129, pixel_format, pixel_format,
+                                            (bool)lossless,
+                                            (bool)use_container);
+      }
+    }
+  }
+}
+
+TEST(RoundtripTest, TestNonlinearSrgbAsXybEncoded) {
+  for (int use_container = 0; use_container < 2; use_container++) {
+    for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+      JxlPixelFormat pixel_format_in =
+          JxlPixelFormat{num_channels, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+      JxlPixelFormat pixel_format_out =
+          JxlPixelFormat{num_channels, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+      VerifyRoundtripCompression<uint8_t>(
+          63, 129, pixel_format_in, pixel_format_out,
+          /*lossless=*/false, (bool)use_container);
+    }
+  }
+}
+
+TEST(RoundtripTest, ExtraBoxesTest) {
+  JxlPixelFormat pixel_format =
+      JxlPixelFormat{4, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+  const size_t xsize = 61;
+  const size_t ysize = 71;
+
+  const std::vector<uint8_t> original_bytes =
+      GetTestImage<float>(xsize, ysize, pixel_format);
+  jxl::CodecInOut original_io =
+      ConvertTestImage(original_bytes, xsize, ysize, pixel_format, {});
+
+  JxlEncoder* enc = JxlEncoderCreate(nullptr);
+  EXPECT_NE(nullptr, enc);
+
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc, true));
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = false;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+  JxlColorEncoding color_encoding;
+  if (pixel_format.data_type == JXL_TYPE_FLOAT) {
+    JxlColorEncodingSetToLinearSRGB(&color_encoding,
+                                    /*is_gray=*/pixel_format.num_channels < 3);
+  } else {
+    JxlColorEncodingSetToSRGB(&color_encoding,
+                              /*is_gray=*/pixel_format.num_channels < 3);
+  }
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding));
+  JxlEncoderOptions* opts = JxlEncoderOptionsCreate(enc, nullptr);
+  JxlEncoderOptionsSetLossless(opts, false);
+  EXPECT_EQ(
+      JXL_ENC_SUCCESS,
+      JxlEncoderAddImageFrame(opts, &pixel_format, (void*)original_bytes.data(),
+                              original_bytes.size()));
+  JxlEncoderCloseInput(enc);
+
+  std::vector<uint8_t> compressed;
+  EncodeWithEncoder(enc, &compressed);
+  JxlEncoderDestroy(enc);
+
+  std::vector<uint8_t> extra_data(1023);
+  jxl::AppendBoxHeader(jxl::MakeBoxType("crud"), extra_data.size(), false,
+                       &compressed);
+  compressed.insert(compressed.end(), extra_data.begin(), extra_data.end());
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO |
+                                               JXL_DEC_COLOR_ENCODING |
+                                               JXL_DEC_FULL_IMAGE));
+
+  JxlDecoderSetInput(dec, next_in, avail_in);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &pixel_format, &buffer_size));
+  EXPECT_EQ(buffer_size, original_bytes.size());
+
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  size_t icc_profile_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetICCProfileSize(dec, &pixel_format,
+                                        JXL_COLOR_PROFILE_TARGET_DATA,
+                                        &icc_profile_size));
+  jxl::PaddedBytes icc_profile(icc_profile_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsICCProfile(
+                dec, &pixel_format, JXL_COLOR_PROFILE_TARGET_DATA,
+                icc_profile.data(), icc_profile.size()));
+
+  std::vector<uint8_t> decoded_bytes(buffer_size);
+
+  EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(dec, &pixel_format,
+                                                         decoded_bytes.data(),
+                                                         decoded_bytes.size()));
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+  JxlDecoderDestroy(dec);
+
+  jxl::CodecInOut decoded_io =
+      ConvertTestImage(decoded_bytes, xsize, ysize, pixel_format, icc_profile);
+
+  jxl::ButteraugliParams ba;
+  float butteraugli_score = ButteraugliDistance(original_io, decoded_io, ba,
+                                                /*distmap=*/nullptr, nullptr);
+  EXPECT_LE(butteraugli_score, 2.0f);
+}
+
+TEST(RoundtripTest, TestICCProfile) {
+  // This ICC profile is not a valid ICC profile, however neither the encoder
+  // nor the decoder parse this profile, and the bytes should be passed on
+  // correctly through the roundtrip.
+  jxl::PaddedBytes icc;
+  for (size_t i = 0; i < 200; i++) {
+    icc.push_back(i ^ 55);
+  }
+
+  JxlPixelFormat format =
+      JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+
+  size_t xsize = 25;
+  size_t ysize = 37;
+  const std::vector<uint8_t> original_bytes =
+      GetTestImage<uint8_t>(xsize, ysize, format);
+
+  JxlEncoder* enc = JxlEncoderCreate(nullptr);
+  EXPECT_NE(nullptr, enc);
+
+  JxlBasicInfo basic_info;
+  jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &format);
+  basic_info.xsize = xsize;
+  basic_info.ysize = ysize;
+  basic_info.uses_original_profile = JXL_FALSE;
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderSetICCProfile(enc, icc.data(), icc.size()));
+  JxlEncoderOptions* opts = JxlEncoderOptionsCreate(enc, nullptr);
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddImageFrame(opts, &format, (void*)original_bytes.data(),
+                                    original_bytes.size()));
+  JxlEncoderCloseInput(enc);
+
+  std::vector<uint8_t> compressed;
+  EncodeWithEncoder(enc, &compressed);
+  JxlEncoderDestroy(enc);
+
+  JxlDecoder* dec = JxlDecoderCreate(nullptr);
+  EXPECT_NE(nullptr, dec);
+
+  const uint8_t* next_in = compressed.data();
+  size_t avail_in = compressed.size();
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO |
+                                               JXL_DEC_COLOR_ENCODING |
+                                               JXL_DEC_FULL_IMAGE));
+
+  JxlDecoderSetInput(dec, next_in, avail_in);
+  EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+  size_t buffer_size;
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+  EXPECT_EQ(buffer_size, original_bytes.size());
+
+  JxlBasicInfo info;
+  EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+  EXPECT_EQ(xsize, info.xsize);
+  EXPECT_EQ(ysize, info.ysize);
+
+  EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+  size_t dec_icc_size;
+  EXPECT_EQ(
+      JXL_DEC_SUCCESS,
+      JxlDecoderGetICCProfileSize(
+          dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, &dec_icc_size));
+  EXPECT_EQ(icc.size(), dec_icc_size);
+  jxl::PaddedBytes dec_icc(dec_icc_size);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderGetColorAsICCProfile(dec, &format,
+                                           JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+                                           dec_icc.data(), dec_icc.size()));
+
+  std::vector<uint8_t> decoded_bytes(buffer_size);
+
+  EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetImageOutBuffer(dec, &format, decoded_bytes.data(),
+                                        decoded_bytes.size()));
+
+  EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+  EXPECT_EQ(icc, dec_icc);
+
+  JxlDecoderDestroy(dec);
+}
+
+TEST(RoundtripTest, JXL_TRANSCODE_JPEG_TEST(TestJPEGReconstruction)) {
+  const std::string jpeg_path =
+      "imagecompression.info/flower_foveon.png.im_q85_420.jpg";
+  const jxl::PaddedBytes orig = jxl::ReadTestData(jpeg_path);
+  jxl::CodecInOut orig_io;
+  ASSERT_TRUE(
+      SetFromBytes(jxl::Span<const uint8_t>(orig), &orig_io, /*pool=*/nullptr));
+
+  JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+  JxlEncoderOptions* options = JxlEncoderOptionsCreate(enc.get(), NULL);
+
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc.get(), JXL_TRUE));
+  EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE));
+  EXPECT_EQ(JXL_ENC_SUCCESS,
+            JxlEncoderAddJPEGFrame(options, orig.data(), orig.size()));
+  JxlEncoderCloseInput(enc.get());
+
+  std::vector<uint8_t> compressed;
+  EncodeWithEncoder(enc.get(), &compressed);
+
+  JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSubscribeEvents(
+                dec.get(), JXL_DEC_JPEG_RECONSTRUCTION | JXL_DEC_FULL_IMAGE));
+  JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+  EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec.get()));
+  std::vector<uint8_t> reconstructed_buffer(128);
+  EXPECT_EQ(JXL_DEC_SUCCESS,
+            JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data(),
+                                    reconstructed_buffer.size()));
+  size_t used = 0;
+  JxlDecoderStatus dec_process_result = JXL_DEC_JPEG_NEED_MORE_OUTPUT;
+  while (dec_process_result == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+    used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+    reconstructed_buffer.resize(reconstructed_buffer.size() * 2);
+    EXPECT_EQ(
+        JXL_DEC_SUCCESS,
+        JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data() + used,
+                                reconstructed_buffer.size() - used));
+    dec_process_result = JxlDecoderProcessInput(dec.get());
+  }
+  ASSERT_EQ(JXL_DEC_FULL_IMAGE, dec_process_result);
+  used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+  ASSERT_EQ(used, orig.size());
+  EXPECT_EQ(0, memcmp(reconstructed_buffer.data(), orig.data(), used));
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/sanitizers.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/sanitizers.h
new file mode 100644
index 0000000000..69cec8afac
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/sanitizers.h
@@ -0,0 +1,222 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_SANITIZERS_H_
+#define LIB_JXL_SANITIZERS_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/image.h"
+
+#ifdef MEMORY_SANITIZER
+#define JXL_MEMORY_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define JXL_MEMORY_SANITIZER 1
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+
+#if JXL_MEMORY_SANITIZER
+#include <stdio.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "sanitizer/msan_interface.h"
+#endif
+
+namespace jxl {
+namespace msan {
+
+#if JXL_MEMORY_SANITIZER
+
+// Chosen so that kSanitizerSentinel is four copies of kSanitizerSentinelByte.
+constexpr uint8_t kSanitizerSentinelByte = 0x48;
+constexpr float kSanitizerSentinel = 205089.125f;
+
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonMemory(const volatile void* m,
+                                                     size_t size) {
+  __msan_poison(m, size);
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonMemory(const volatile void* m,
+                                                       size_t size) {
+  __msan_unpoison(m, size);
+}
+
+// Mark all the bytes of an image (including padding) as poisoned bytes.
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const PlaneBase& im) {
+  PoisonMemory(im.bytes(), im.bytes_per_row() * im.ysize());
+}
+
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const Image3<T>& im) {
+  PoisonImage(im.Plane(0));
+  PoisonImage(im.Plane(1));
+  PoisonImage(im.Plane(2));
+}
+
+// Print the uninitialized regions of an image.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void PrintImageUninitialized(
+    const Plane<T>& im) {
+  fprintf(stderr, "Uninitialized regions for image of size %zux%zu:\n",
+          im.xsize(), im.ysize());
+
+  // A segment of uninitialized pixels in a row, in the format [first, second).
+  typedef std::pair<size_t, size_t> PixelSegment;
+
+  // Helper class to merge and print a list of rows of PixelSegment that may be
+  // the same over big ranges of rows. This compacts the output to ranges of
+  // rows like "[y0, y1): [x0, x1) [x2, x3)".
+  class RowsMerger {
+   public:
+    // Add a new row the list of rows. If the row is the same as the previous
+    // one it will be merged showing a range of rows [y0, y1), but if the new
+    // row is different the current range of rows (if any) will be printed and a
+    // new one will be started.
+    void AddRow(size_t y, std::vector<PixelSegment>&& new_row) {
+      if (start_y_ != -1 && new_row != segments_) {
+        PrintRow(y);
+      }
+      if (new_row.empty()) {
+        // Skip ranges with no uninitialized pixels.
+        start_y_ = -1;
+        segments_.clear();
+        return;
+      }
+      if (start_y_ == -1) {
+        start_y_ = y;
+        segments_ = std::move(new_row);
+      }
+    }
+
+    // Print the contents of the range of rows [start_y_, end_y) if any.
+    void PrintRow(size_t end_y) {
+      if (start_y_ == -1) return;
+      if (segments_.empty()) {
+        start_y_ = -1;
+        return;
+      }
+      if (end_y - start_y_ > 1) {
+        fprintf(stderr, " y=[%zd, %zu):", start_y_, end_y);
+      } else {
+        fprintf(stderr, " y=[%zd]:", start_y_);
+      }
+      for (const auto& seg : segments_) {
+        if (seg.first + 1 == seg.second) {
+          fprintf(stderr, " [%zd]", seg.first);
+        } else {
+          fprintf(stderr, " [%zd, %zu)", seg.first, seg.second);
+        }
+      }
+      fprintf(stderr, "\n");
+      start_y_ = -1;
+    }
+
+   private:
+    std::vector<PixelSegment> segments_;
+    // Row number of the first row in the range of rows that have |segments| as
+    // the undefined segments.
+    ssize_t start_y_ = -1;
+  } rows_merger;
+
+  class SegmentsMerger {
+   public:
+    void AddValue(size_t x) {
+      if (row.empty() || row.back().second != x) {
+        row.emplace_back(x, x + 1);
+      } else {
+        row.back().second = x + 1;
+      }
+    }
+
+    std::vector<PixelSegment> row;
+  };
+
+  for (size_t y = 0; y < im.ysize(); y++) {
+    auto* row = im.Row(y);
+    SegmentsMerger seg_merger;
+    size_t x = 0;
+    while (x < im.xsize()) {
+      intptr_t ret =
+          __msan_test_shadow(row + x, (im.xsize() - x) * sizeof(row[0]));
+      if (ret < 0) break;
+      size_t next_x = x + ret / sizeof(row[0]);
+      seg_merger.AddValue(next_x);
+      x = next_x + 1;
+    }
+    rows_merger.AddRow(y, std::move(seg_merger.row));
+  }
+  rows_merger.PrintRow(im.ysize());
+}
+
+// Check that all the pixels in the provided rect of the image are initialized
+// (not poisoned). If any of the values is poisoned it will abort.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void CheckImageInitialized(
+    const Plane<T>& im, const Rect& r, const char* message) {
+  JXL_ASSERT(r.x0() <= im.xsize());
+  JXL_ASSERT(r.x0() + r.xsize() <= im.xsize());
+  JXL_ASSERT(r.y0() <= im.ysize());
+  JXL_ASSERT(r.y0() + r.ysize() <= im.ysize());
+  for (size_t y = r.y0(); y < r.y0() + r.ysize(); y++) {
+    const auto* row = im.Row(y);
+    intptr_t ret = __msan_test_shadow(row + r.x0(), sizeof(*row) * r.xsize());
+    if (ret != -1) {
+      JXL_DEBUG(1,
+                "Checking an image of %zu x %zu, rect x0=%zu, y0=%zu, "
+                "xsize=%zu, ysize=%zu",
+                im.xsize(), im.ysize(), r.x0(), r.y0(), r.xsize(), r.ysize());
+      size_t x = ret / sizeof(*row);
+      JXL_DEBUG(1, "CheckImageInitialized failed at x=%zu, y=%zu: %s", x, y,
+                message ? message : "");
+      PrintImageUninitialized(im);
+    }
+    // This will report an error if memory is not initialized.
+    __msan_check_mem_is_initialized(row + r.x0(), sizeof(*row) * r.xsize());
+  }
+}
+
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void CheckImageInitialized(
+    const Image3<T>& im, const Rect& r, const char* message) {
+  for (size_t c = 0; c < 3; c++) {
+    std::string str_message(message);
+    str_message += " c=" + std::to_string(c);
+    CheckImageInitialized(im.Plane(c), r, str_message.c_str());
+  }
+}
+
+#define JXL_CHECK_IMAGE_INITIALIZED(im, r) \
+  ::jxl::msan::CheckImageInitialized(im, r, "im=" #im ", r=" #r);
+
+#else  // JXL_MEMORY_SANITIZER
+
+// In non-msan mode these functions don't use volatile since it is not needed
+// for the empty functions.
+
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonMemory(const void*, size_t) {}
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonMemory(const void*, size_t) {}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const PlaneBase& im) {}
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const Plane<T>& im) {}
+
+#define JXL_CHECK_IMAGE_INITIALIZED(im, r)
+
+#endif
+
+}  // namespace msan
+}  // namespace jxl
+
+#endif  // LIB_JXL_SANITIZERS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/speed_tier_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/speed_tier_test.cc
new file mode 100644
index 0000000000..4e7c9f9fc4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/speed_tier_test.cc
@@ -0,0 +1,112 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+namespace {
+
+struct SpeedTierTestParams {
+  explicit SpeedTierTestParams(const SpeedTier speed_tier,
+                               const bool shrink8 = false)
+      : speed_tier(speed_tier), shrink8(shrink8) {}
+  SpeedTier speed_tier;
+  bool shrink8;
+};
+
+std::ostream& operator<<(std::ostream& os, SpeedTierTestParams params) {
+  auto previous_flags = os.flags();
+  os << std::boolalpha;
+  os << "SpeedTierTestParams{" << SpeedTierName(params.speed_tier)
+     << ", /*shrink8=*/" << params.shrink8 << "}";
+  os.flags(previous_flags);
+  return os;
+}
+
+class SpeedTierTest : public testing::TestWithParam<SpeedTierTestParams> {};
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(
+    SpeedTierTestInstantiation, SpeedTierTest,
+    testing::Values(SpeedTierTestParams{SpeedTier::kCheetah,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kCheetah,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kThunder,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kThunder,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kLightning,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kLightning,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kFalcon,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kFalcon,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kHare,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kHare,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kWombat,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kWombat,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kSquirrel,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kSquirrel,
+                                        /*shrink8=*/false},
+                    SpeedTierTestParams{SpeedTier::kKitten,
+                                        /*shrink8=*/true},
+                    SpeedTierTestParams{SpeedTier::kKitten,
+                                        /*shrink8=*/false},
+                    // Only downscaled image for Tortoise mode.
+                    SpeedTierTestParams{SpeedTier::kTortoise,
+                                        /*shrink8=*/true}));
+
+TEST_P(SpeedTierTest, Roundtrip) {
+  const PaddedBytes orig =
+      ReadTestData("wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+  CodecInOut io;
+  ThreadPoolInternal pool(8);
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+  const SpeedTierTestParams& params = GetParam();
+
+  if (params.shrink8) {
+    io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+  }
+
+  CompressParams cparams;
+  cparams.speed_tier = params.speed_tier;
+  DecompressParams dparams;
+
+  CodecInOut io2;
+  test::Roundtrip(&io, cparams, dparams, nullptr, &io2);
+
+  // Can be 2.2 in non-hare mode.
+  EXPECT_LE(ButteraugliDistance(io, io2, cparams.ba_params,
+                                /*distmap=*/nullptr, /*pool=*/nullptr),
+            2.8);
+}
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc
new file mode 100644
index 0000000000..802fc5b029
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines.cc
@@ -0,0 +1,514 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/splines.h"
+
+#include <algorithm>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/opsin_params.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/splines.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/fast_math-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// Given a set of DCT coefficients, this returns the result of performing cosine
+// interpolation on the original samples.
+float ContinuousIDCT(const float dct[32], float t) {
+  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
+  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
+  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
+  // treatment of all the coefficients.
+  constexpr float kMultipliers[32] = {
+      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
+      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
+      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
+      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
+      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
+      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
+      kPi / 32 * 30, kPi / 32 * 31,
+  };
+  HWY_CAPPED(float, 32) df;
+  auto result = Zero(df);
+  const auto tandhalf = Set(df, t + 0.5f);
+  for (int i = 0; i < 32; i += Lanes(df)) {
+    auto cos_arg = LoadU(df, kMultipliers + i) * tandhalf;
+    auto cos = FastCosf(df, cos_arg);
+    auto local_res = LoadU(df, dct + i) * cos;
+    result = MulAdd(Set(df, square_root<2>::value), local_res, result);
+  }
+  return GetLane(SumOfLanes(result));
+}
+
+// Splats a single Gaussian on the image.
+void DrawGaussian(Image3F* const opsin, const Rect& opsin_rect,
+                  const Rect& image_rect, const Spline::Point& center,
+                  const float intensity, const float color[3],
+                  const float sigma, std::vector<int32_t>& xs,
+                  std::vector<int32_t>& ys,
+                  std::vector<float>& local_intensity_storage) {
+  constexpr float kDistanceMultiplier = 4.605170185988091f;  // -2 * log(0.1)
+  // Distance beyond which exp(-d^2 / (2 * sigma^2)) drops below 0.1.
+  const float maximum_distance = sigma * sigma * kDistanceMultiplier;
+  const auto xbegin_s =
+      std::max<ssize_t>(image_rect.x0(), center.x - maximum_distance + .5f);
+  const auto xend_s =
+      std::min<ssize_t>(center.x + maximum_distance + .5f,
+                        image_rect.x0() + image_rect.xsize() - 1);
+  const auto ybegin_s =
+      std::max<ssize_t>(image_rect.y0(), center.y - maximum_distance + .5f);
+  const auto yend_s =
+      std::min<ssize_t>(center.y + maximum_distance + .5f,
+                        image_rect.y0() + image_rect.ysize() - 1);
+  if ((xend_s) <= 0 || (xend_s < xbegin_s)) return;
+  const size_t xbegin = xbegin_s;
+  const size_t xend = xend_s;
+  if ((yend_s <= 0) || (yend_s < ybegin_s)) return;
+  const size_t ybegin = ybegin_s;
+  const size_t yend = yend_s;
+  const size_t opsin_stride = opsin->PixelsPerRow();
+  float* JXL_RESTRICT rows[3] = {
+      opsin_rect.PlaneRow(opsin, 0, ybegin - image_rect.y0()),
+      opsin_rect.PlaneRow(opsin, 1, ybegin - image_rect.y0()),
+      opsin_rect.PlaneRow(opsin, 2, ybegin - image_rect.y0()),
+  };
+  const size_t nx = xend + 1 - xbegin;
+  const size_t ny = yend + 1 - ybegin;
+  HWY_FULL(float) df;
+  if (xs.size() < nx * ny) {
+    size_t sz = DivCeil(nx * ny, Lanes(df)) * Lanes(df);
+    xs.resize(sz);
+    ys.resize(sz);
+    local_intensity_storage.resize(sz);
+  }
+  for (size_t y = ybegin; y <= yend; ++y) {
+    for (size_t x = xbegin; x <= xend; ++x) {
+      xs[(y - ybegin) * nx + (x - xbegin)] = x;
+      ys[(y - ybegin) * nx + (x - xbegin)] = y;
+    }
+  }
+  Rebind<int32_t, decltype(df)> di;
+  const auto inv_sigma = Set(df, 1.0f / sigma);
+  const auto half = Set(df, 0.5f);
+  const auto one_over_2s2 = Set(df, 0.353553391f);
+  const auto sigma_over_4_times_intensity = Set(df, .25f * sigma * intensity);
+  for (size_t i = 0; i < nx * ny; i += Lanes(df)) {
+    const auto x = ConvertTo(df, LoadU(di, &xs[i]));
+    const auto y = ConvertTo(df, LoadU(di, &ys[i]));
+    const auto dx = x - Set(df, center.x);
+    const auto dy = y - Set(df, center.y);
+    const auto sqd = MulAdd(dx, dx, dy * dy);
+    const auto distance = Sqrt(sqd);
+    const auto one_dimensional_factor =
+        FastErff(df, MulAdd(distance, half, one_over_2s2) * inv_sigma) -
+        FastErff(df, MulSub(distance, half, one_over_2s2) * inv_sigma);
+    const auto local_intensity = sigma_over_4_times_intensity *
+                                 one_dimensional_factor *
+                                 one_dimensional_factor;
+    StoreU(local_intensity, df, &local_intensity_storage[i]);
+  }
+  ssize_t off = -static_cast<ssize_t>(image_rect.x0());
+  for (size_t y = ybegin; y <= yend; ++y) {
+    HWY_CAPPED(float, 1) df;
+    for (size_t x = xbegin; x <= xend; ++x) {
+      const auto local_intensity = Load(
+          df, local_intensity_storage.data() + (y - ybegin) * nx + x - xbegin);
+      for (size_t c = 0; c < 3; ++c) {
+        const auto cm = Set(df, color[c]);
+        const auto in = LoadU(df, rows[c] + x + off);
+        StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x + off);
+      }
+    }
+    off += opsin_stride;
+  }
+}
+
+void DrawFromPoints(
+    Image3F* const opsin, const Rect& opsin_rect, const Rect& image_rect,
+    const Spline& spline, bool add,
+    const std::vector<std::pair<Spline::Point, float>>& points_to_draw,
+    float arc_length) {
+  float inv_arc_length = 1.0f / arc_length;
+  int k = 0;
+  std::vector<int32_t> xs, ys;
+  std::vector<float> local_intensity_storage;
+  for (const auto& point_to_draw : points_to_draw) {
+    const Spline::Point& point = point_to_draw.first;
+    const float multiplier = add ? point_to_draw.second : -point_to_draw.second;
+    const float progress_along_arc =
+        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
+    ++k;
+    float color[3];
+    for (size_t c = 0; c < 3; ++c) {
+      color[c] =
+          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
+    }
+    const float sigma =
+        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
+    DrawGaussian(opsin, opsin_rect, image_rect, point, multiplier, color, sigma,
+                 xs, ys, local_intensity_storage);
+  }
+}
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(DrawFromPoints);
+
+namespace {
+
+// Maximum number of spline control points per frame is
+//   std::min(kMaxNumControlPoints, xsize * ysize / 2)
+constexpr size_t kMaxNumControlPoints = 1u << 20u;
+constexpr size_t kMaxNumControlPointsPerPixelRatio = 2;
+
+// X, Y, B, sigma.
+float ColorQuantizationWeight(const int32_t adjustment, const int channel,
+                              const int i) {
+  const float multiplier = adjustment >= 0 ? 1.f + .125f * adjustment
+                                           : 1.f / (1.f + .125f * -adjustment);
+
+  static constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f};
+
+  return multiplier / kChannelWeight[channel];
+}
+
+Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points,
+                               BitReader* const br, ANSSymbolReader* reader,
+                               const std::vector<uint8_t>& context_map,
+                               size_t num_splines) {
+  points->clear();
+  points->reserve(num_splines);
+  int64_t last_x = 0;
+  int64_t last_y = 0;
+  for (size_t i = 0; i < num_splines; i++) {
+    int64_t x =
+        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
+    int64_t y =
+        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
+    if (i != 0) {
+      x = UnpackSigned(x) + last_x;
+      y = UnpackSigned(y) + last_y;
+    }
+    points->emplace_back(static_cast<float>(x), static_cast<float>(y));
+    last_x = x;
+    last_y = y;
+  }
+  return true;
+}
+
+struct Vector {
+  float x, y;
+  Vector operator-() const { return {-x, -y}; }
+  Vector operator+(const Vector& other) const {
+    return {x + other.x, y + other.y};
+  }
+  float SquaredNorm() const { return x * x + y * y; }
+};
+Vector operator*(const float k, const Vector& vec) {
+  return {k * vec.x, k * vec.y};
+}
+
+Spline::Point operator+(const Spline::Point& p, const Vector& vec) {
+  return {p.x + vec.x, p.y + vec.y};
+}
+Spline::Point operator-(const Spline::Point& p, const Vector& vec) {
+  return p + -vec;
+}
+Vector operator-(const Spline::Point& a, const Spline::Point& b) {
+  return {a.x - b.x, a.y - b.y};
+}
+
+std::vector<Spline::Point> DrawCentripetalCatmullRomSpline(
+    std::vector<Spline::Point> points) {
+  if (points.size() <= 1) return points;
+  // Number of points to compute between each control point.
+  static constexpr int kNumPoints = 16;
+  std::vector<Spline::Point> result;
+  result.reserve((points.size() - 1) * kNumPoints + 1);
+  points.insert(points.begin(), points[0] + (points[0] - points[1]));
+  points.push_back(points[points.size() - 1] +
+                   (points[points.size() - 1] - points[points.size() - 2]));
+  // points has at least 4 elements at this point.
+  for (size_t start = 0; start < points.size() - 3; ++start) {
+    // 4 of them are used, and we draw from p[1] to p[2].
+    const Spline::Point* const p = &points[start];
+    result.push_back(p[1]);
+    float t[4] = {0};
+    for (int k = 1; k < 4; ++k) {
+      t[k] = std::sqrt(hypotf(p[k].x - p[k - 1].x, p[k].y - p[k - 1].y)) +
+             t[k - 1];
+    }
+    for (int i = 1; i < kNumPoints; ++i) {
+      const float tt =
+          t[1] + (static_cast<float>(i) / kNumPoints) * (t[2] - t[1]);
+      Spline::Point a[3];
+      for (int k = 0; k < 3; ++k) {
+        a[k] = p[k] + ((tt - t[k]) / (t[k + 1] - t[k])) * (p[k + 1] - p[k]);
+      }
+      Spline::Point b[2];
+      for (int k = 0; k < 2; ++k) {
+        b[k] = a[k] + ((tt - t[k]) / (t[k + 2] - t[k])) * (a[k + 1] - a[k]);
+      }
+      result.push_back(b[0] + ((tt - t[1]) / (t[2] - t[1])) * (b[1] - b[0]));
+    }
+  }
+  result.push_back(points[points.size() - 2]);
+  return result;
+}
+
+// Move along the line segments defined by `points`, `kDesiredRenderingDistance`
+// pixels at a time, and call `functor` with each point and the actual distance
+// to the previous point (which will always be kDesiredRenderingDistance except
+// possibly for the very last point).
+template <typename Points, typename Functor>
+void ForEachEquallySpacedPoint(const Points& points, const Functor& functor) {
+  JXL_ASSERT(!points.empty());
+  Spline::Point current = points.front();
+  functor(current, kDesiredRenderingDistance);
+  auto next = points.begin();
+  while (next != points.end()) {
+    const Spline::Point* previous = &current;
+    float arclength_from_previous = 0.f;
+    for (;;) {
+      if (next == points.end()) {
+        functor(*previous, arclength_from_previous);
+        return;
+      }
+      const float arclength_to_next =
+          std::sqrt((*next - *previous).SquaredNorm());
+      if (arclength_from_previous + arclength_to_next >=
+          kDesiredRenderingDistance) {
+        current =
+            *previous + ((kDesiredRenderingDistance - arclength_from_previous) /
+                         arclength_to_next) *
+                            (*next - *previous);
+        functor(current, kDesiredRenderingDistance);
+        break;
+      }
+      arclength_from_previous += arclength_to_next;
+      previous = &*next;
+      ++next;
+    }
+  }
+}
+
+}  // namespace
+
+QuantizedSpline::QuantizedSpline(const Spline& original,
+                                 const int32_t quantization_adjustment,
+                                 float ytox, float ytob) {
+  JXL_ASSERT(!original.control_points.empty());
+  control_points_.reserve(original.control_points.size() - 1);
+  const Spline::Point& starting_point = original.control_points.front();
+  int previous_x = static_cast<int>(roundf(starting_point.x)),
+      previous_y = static_cast<int>(roundf(starting_point.y));
+  int previous_delta_x = 0, previous_delta_y = 0;
+  for (auto it = original.control_points.begin() + 1;
+       it != original.control_points.end(); ++it) {
+    const int new_x = static_cast<int>(roundf(it->x));
+    const int new_y = static_cast<int>(roundf(it->y));
+    const int new_delta_x = new_x - previous_x;
+    const int new_delta_y = new_y - previous_y;
+    control_points_.emplace_back(new_delta_x - previous_delta_x,
+                                 new_delta_y - previous_delta_y);
+    previous_delta_x = new_delta_x;
+    previous_delta_y = new_delta_y;
+    previous_x = new_x;
+    previous_y = new_y;
+  }
+
+  for (int c = 0; c < 3; ++c) {
+    float factor = c == 0 ? ytox : c == 1 ? 0 : ytob;
+    for (int i = 0; i < 32; ++i) {
+      const float coefficient =
+          original.color_dct[c][i] -
+          factor * color_dct_[1][i] /
+              ColorQuantizationWeight(quantization_adjustment, 1, i);
+      color_dct_[c][i] = static_cast<int>(
+          roundf(coefficient *
+                 ColorQuantizationWeight(quantization_adjustment, c, i)));
+    }
+  }
+  for (int i = 0; i < 32; ++i) {
+    sigma_dct_[i] = static_cast<int>(
+        roundf(original.sigma_dct[i] *
+               ColorQuantizationWeight(quantization_adjustment, 3, i)));
+  }
+}
+
+Spline QuantizedSpline::Dequantize(const Spline::Point& starting_point,
+                                   const int32_t quantization_adjustment,
+                                   float ytox, float ytob) const {
+  Spline result;
+
+  result.control_points.reserve(control_points_.size() + 1);
+  int current_x = static_cast<int>(roundf(starting_point.x)),
+      current_y = static_cast<int>(roundf(starting_point.y));
+  result.control_points.push_back(Spline::Point{static_cast<float>(current_x),
+                                                static_cast<float>(current_y)});
+  int current_delta_x = 0, current_delta_y = 0;
+  for (const auto& point : control_points_) {
+    current_delta_x += point.first;
+    current_delta_y += point.second;
+    current_x += current_delta_x;
+    current_y += current_delta_y;
+    result.control_points.push_back(Spline::Point{
+        static_cast<float>(current_x), static_cast<float>(current_y)});
+  }
+
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < 32; ++i) {
+      result.color_dct[c][i] =
+          color_dct_[c][i] * (i == 0 ? 1.0f / square_root<2>::value : 1.0f) /
+          ColorQuantizationWeight(quantization_adjustment, c, i);
+    }
+  }
+  for (int i = 0; i < 32; ++i) {
+    result.color_dct[0][i] += ytox * result.color_dct[1][i];
+    result.color_dct[2][i] += ytob * result.color_dct[1][i];
+  }
+  for (int i = 0; i < 32; ++i) {
+    result.sigma_dct[i] =
+        sigma_dct_[i] * (i == 0 ? 1.0f / square_root<2>::value : 1.0f) /
+        ColorQuantizationWeight(quantization_adjustment, 3, i);
+  }
+
+  return result;
+}
+
+Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map,
+                               ANSSymbolReader* const decoder,
+                               BitReader* const br, size_t max_control_points,
+                               size_t* total_num_control_points) {
+  const size_t num_control_points =
+      decoder->ReadHybridUint(kNumControlPointsContext, br, context_map);
+  *total_num_control_points += num_control_points;
+  if (*total_num_control_points > max_control_points) {
+    return JXL_FAILURE("Too many control points: %zu",
+                       *total_num_control_points);
+  }
+  control_points_.resize(num_control_points);
+  for (std::pair<int64_t, int64_t>& control_point : control_points_) {
+    control_point.first = UnpackSigned(
+        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
+    control_point.second = UnpackSigned(
+        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
+  }
+
+  const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status {
+    for (int i = 0; i < 32; ++i) {
+      dct[i] =
+          UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map));
+    }
+    return true;
+  };
+  for (int c = 0; c < 3; ++c) {
+    JXL_RETURN_IF_ERROR(decode_dct(color_dct_[c]));
+  }
+  JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_));
+  return true;
+}
+
+Status Splines::Decode(jxl::BitReader* br, size_t num_pixels) {
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  JXL_RETURN_IF_ERROR(
+      DecodeHistograms(br, kNumSplineContexts, &code, &context_map));
+  ANSSymbolReader decoder(&code, br);
+  const size_t num_splines =
+      1 + decoder.ReadHybridUint(kNumSplinesContext, br, context_map);
+  size_t max_control_points = std::min(
+      kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio);
+  if (num_splines > max_control_points) {
+    return JXL_FAILURE("Too many splines: %zu", num_splines);
+  }
+  JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder,
+                                              context_map, num_splines));
+
+  quantization_adjustment_ = UnpackSigned(
+      decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map));
+
+  splines_.clear();
+  splines_.reserve(num_splines);
+  size_t num_control_points = num_splines;
+  for (size_t i = 0; i < num_splines; ++i) {
+    QuantizedSpline spline;
+    JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br,
+                                      max_control_points, &num_control_points));
+    splines_.push_back(std::move(spline));
+  }
+
+  JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState());
+
+  if (!HasAny()) {
+    return JXL_FAILURE("Decoded splines but got none");
+  }
+
+  return true;
+}
+
+Status Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect,
+                      const Rect& image_rect,
+                      const ColorCorrelationMap& cmap) const {
+  return Apply</*add=*/true>(opsin, opsin_rect, image_rect, cmap);
+}
+
+Status Splines::SubtractFrom(Image3F* const opsin,
+                             const ColorCorrelationMap& cmap) const {
+  return Apply</*add=*/false>(opsin, Rect(*opsin), Rect(*opsin), cmap);
+}
+
+template <bool add>
+Status Splines::Apply(Image3F* const opsin, const Rect& opsin_rect,
+                      const Rect& image_rect,
+                      const ColorCorrelationMap& cmap) const {
+  for (size_t i = 0; i < splines_.size(); ++i) {
+    const Spline spline =
+        splines_[i].Dequantize(starting_points_[i], quantization_adjustment_,
+                               cmap.YtoXRatio(0), cmap.YtoBRatio(0));
+    if (std::adjacent_find(spline.control_points.begin(),
+                           spline.control_points.end()) !=
+        spline.control_points.end()) {
+      return JXL_FAILURE("identical successive control points in spline %zu",
+                         i);
+    }
+    std::vector<std::pair<Spline::Point, float>> points_to_draw;
+    ForEachEquallySpacedPoint(
+        DrawCentripetalCatmullRomSpline(spline.control_points),
+        [&](const Spline::Point& point, const float multiplier) {
+          points_to_draw.emplace_back(point, multiplier);
+        });
+    const float arc_length =
+        (points_to_draw.size() - 2) * kDesiredRenderingDistance +
+        points_to_draw.back().second;
+    if (arc_length <= 0.f) {
+      // This spline wouldn't have any effect.
+      continue;
+    }
+    HWY_DYNAMIC_DISPATCH(DrawFromPoints)
+    (opsin, opsin_rect, image_rect, spline, add, points_to_draw, arc_length);
+  }
+  return true;
+}
+
+}  // namespace jxl
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines.h
new file mode 100644
index 0000000000..8ec10e928d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines.h
@@ -0,0 +1,124 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_SPLINES_H_
+#define LIB_JXL_SPLINES_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+static constexpr float kDesiredRenderingDistance = 1.f;
+
+enum SplineEntropyContexts : size_t {
+  kQuantizationAdjustmentContext = 0,
+  kStartingPositionContext,
+  kNumSplinesContext,
+  kNumControlPointsContext,
+  kControlPointsContext,
+  kDCTContext,
+  kNumSplineContexts
+};
+
+struct Spline {
+  struct Point {
+    Point() : x(0.0f), y(0.0f) {}
+    Point(float x, float y) : x(x), y(y) {}
+    float x, y;
+    bool operator==(const Point& other) const {
+      return std::fabs(x - other.x) < 1e-3f && std::fabs(y - other.y) < 1e-3f;
+    }
+  };
+  std::vector<Point> control_points;
+  // X, Y, B.
+  float color_dct[3][32];
+  // Splines are draws by normalized Gaussian splatting. This controls the
+  // Gaussian's parameter along the spline.
+  float sigma_dct[32];
+};
+
+class QuantizedSplineEncoder;
+
+class QuantizedSpline {
+ public:
+  QuantizedSpline() = default;
+  explicit QuantizedSpline(const Spline& original,
+                           int32_t quantization_adjustment, float ytox,
+                           float ytob);
+
+  Spline Dequantize(const Spline::Point& starting_point,
+                    int32_t quantization_adjustment, float ytox,
+                    float ytob) const;
+
+  Status Decode(const std::vector<uint8_t>& context_map,
+                ANSSymbolReader* decoder, BitReader* br,
+                size_t max_control_points, size_t* total_num_control_points);
+
+ private:
+  friend class QuantizedSplineEncoder;
+
+  std::vector<std::pair<int64_t, int64_t>>
+      control_points_;  // Double delta-encoded.
+  int color_dct_[3][32] = {};
+  int sigma_dct_[32] = {};
+};
+
+class Splines {
+ public:
+  Splines() = default;
+  explicit Splines(const int32_t quantization_adjustment,
+                   std::vector<QuantizedSpline> splines,
+                   std::vector<Spline::Point> starting_points)
+      : quantization_adjustment_(quantization_adjustment),
+        splines_(std::move(splines)),
+        starting_points_(std::move(starting_points)) {}
+
+  bool HasAny() const { return !splines_.empty(); }
+
+  Status Decode(BitReader* br, size_t num_pixels);
+
+  Status AddTo(Image3F* opsin, const Rect& opsin_rect, const Rect& image_rect,
+               const ColorCorrelationMap& cmap) const;
+  Status SubtractFrom(Image3F* opsin, const ColorCorrelationMap& cmap) const;
+
+  const std::vector<QuantizedSpline>& QuantizedSplines() const {
+    return splines_;
+  }
+  const std::vector<Spline::Point>& StartingPoints() const {
+    return starting_points_;
+  }
+
+  int32_t GetQuantizationAdjustment() const { return quantization_adjustment_; }
+
+ private:
+  template <bool>
+  Status Apply(Image3F* opsin, const Rect& opsin_rect, const Rect& image_rect,
+               const ColorCorrelationMap& cmap) const;
+
+  // If positive, quantization weights are multiplied by 1 + this/8, which
+  // increases precision. If negative, they are divided by 1 - this/8. If 0,
+  // they are unchanged.
+  int32_t quantization_adjustment_ = 0;
+  std::vector<QuantizedSpline> splines_;
+  std::vector<Spline::Point> starting_points_;
+};
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_SPLINES_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines_gbench.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines_gbench.cc
new file mode 100644
index 0000000000..490bdb00e1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines_gbench.cc
@@ -0,0 +1,51 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+namespace {
+
+constexpr int kQuantizationAdjustment = 0;
+const ColorCorrelationMap* const cmap = new ColorCorrelationMap;
+const float kYToX = cmap->YtoXRatio(0);
+const float kYToB = cmap->YtoBRatio(0);
+
+void BM_Splines(benchmark::State& state) {
+  const size_t n = state.range();
+
+  std::vector<Spline> spline_data = {
+      {/*control_points=*/{
+           {9, 54}, {118, 159}, {97, 3}, {10, 40}, {150, 25}, {120, 300}},
+       /*color_dct=*/
+       {{0.03125f, 0.00625f, 0.003125f}, {1.f, 0.321875f}, {1.f, 0.24375f}},
+       /*sigma_dct=*/{0.3125f, 0.f, 0.f, 0.0625f}}};
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (const Spline& spline : spline_data) {
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+
+  Image3F drawing_area(320, 320);
+  ZeroFillImage(&drawing_area);
+  for (auto _ : state) {
+    for (size_t i = 0; i < n; ++i) {
+      JXL_CHECK(splines.AddTo(&drawing_area, Rect(drawing_area),
+                              Rect(drawing_area), *cmap));
+    }
+  }
+
+  state.SetItemsProcessed(n * state.iterations());
+}
+
+BENCHMARK(BM_Splines)->Range(1, 1 << 10);
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines_test.cc
new file mode 100644
index 0000000000..8e6dfc5dd4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/splines_test.cc
@@ -0,0 +1,312 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/splines.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "lib/extras/codec.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testdata.h"
+
+namespace jxl {
+
+std::ostream& operator<<(std::ostream& os, const Spline::Point& p) {
+  return os << "(" << p.x << ", " << p.y << ")";
+}
+
+std::ostream& operator<<(std::ostream& os, const Spline& spline) {
+  return os << "(spline with " << spline.control_points.size()
+            << " control points)";
+}
+
+namespace {
+
+using ::testing::AllOf;
+using ::testing::Field;
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
+constexpr int kQuantizationAdjustment = 0;
+const ColorCorrelationMap* const cmap = new ColorCorrelationMap;
+const float kYToX = cmap->YtoXRatio(0);
+const float kYToB = cmap->YtoBRatio(0);
+
+constexpr float kTolerance = 0.003125;
+
+std::vector<Spline> DequantizeSplines(const Splines& splines) {
+  const auto& quantized_splines = splines.QuantizedSplines();
+  const auto& starting_points = splines.StartingPoints();
+  JXL_ASSERT(quantized_splines.size() == starting_points.size());
+
+  std::vector<Spline> dequantized;
+  for (size_t i = 0; i < quantized_splines.size(); ++i) {
+    dequantized.push_back(quantized_splines[i].Dequantize(
+        starting_points[i], kQuantizationAdjustment, kYToX, kYToB));
+  }
+  return dequantized;
+}
+
+MATCHER(ControlPointIs, "") {
+  const Spline::Point& actual = std::get<0>(arg);
+  const Spline::Point& expected = std::get<1>(arg);
+  return testing::ExplainMatchResult(
+      AllOf(Field(&Spline::Point::x, FloatNear(expected.x, kTolerance)),
+            Field(&Spline::Point::y, FloatNear(expected.y, kTolerance))),
+      actual, result_listener);
+}
+
+MATCHER(ControlPointsMatch, "") {
+  const Spline& actual = std::get<0>(arg);
+  const Spline& expected = std::get<1>(arg);
+  return testing::ExplainMatchResult(
+      Field(&Spline::control_points,
+            Pointwise(ControlPointIs(), expected.control_points)),
+      actual, result_listener);
+}
+
+MATCHER(SplinesMatch, "") {
+  const Spline& actual = std::get<0>(arg);
+  const Spline& expected = std::get<1>(arg);
+  if (!testing::ExplainMatchResult(ControlPointsMatch(), arg,
+                                   result_listener)) {
+    return false;
+  }
+  for (int i = 0; i < 3; ++i) {
+    size_t color_dct_size =
+        sizeof(expected.color_dct[i]) / sizeof(expected.color_dct[i][0]);
+    for (size_t j = 0; j < color_dct_size; j++) {
+      testing::StringMatchResultListener color_dct_listener;
+      if (!testing::ExplainMatchResult(
+              FloatNear(expected.color_dct[i][j], kTolerance),
+              actual.color_dct[i][j], &color_dct_listener)) {
+        *result_listener << ", where color_dct[" << i << "][" << j
+                         << "] don't match, " << color_dct_listener.str();
+        return false;
+      }
+    }
+  }
+  size_t sigma_dct_size =
+      sizeof(expected.sigma_dct) / sizeof(expected.sigma_dct[0]);
+  for (size_t i = 0; i < sigma_dct_size; i++) {
+    testing::StringMatchResultListener sigma_listener;
+    if (!testing::ExplainMatchResult(
+            FloatNear(expected.sigma_dct[i], kTolerance), actual.sigma_dct[i],
+            &sigma_listener)) {
+      *result_listener << ", where sigma_dct[" << i << "] don't match, "
+                       << sigma_listener.str();
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+TEST(SplinesTest, Serialization) {
+  std::vector<Spline> spline_data = {
+      {/*control_points=*/{
+           {109, 54}, {218, 159}, {80, 3}, {110, 274}, {94, 185}, {17, 277}},
+       /*color_dct=*/
+       {{36.3, 39.7, 23.2, 67.5, 4.4,  71.5, 62.3, 32.3, 92.2, 10.1, 10.8,
+         9.2,  6.1,  10.5, 79.1, 7,    24.6, 90.8, 5.5,  84,   43.8, 49,
+         33.5, 78.9, 54.5, 77.9, 62.1, 51.4, 36.4, 14.3, 83.7, 35.4},
+        {9.4,  53.4, 9.5,  74.9, 72.7, 26.7, 7.9,  0.9, 84.9, 23.2, 26.5,
+         31.1, 91,   11.7, 74.1, 39.3, 23.7, 82.5, 4.8, 2.7,  61.2, 96.4,
+         13.7, 66.7, 62.9, 82.4, 5.9,  98.7, 21.5, 7.9, 51.7, 63.1},
+        {48,   39.3, 6.9,  26.3, 33.3, 6.2,  1.7,  98.9, 59.9, 59.6, 95,
+         61.3, 82.7, 53,   6.1,  30.4, 34.7, 96.9, 93.4, 17,   38.8, 80.8,
+         63,   18.6, 43.6, 32.3, 61,   20.2, 24.3, 28.3, 69.1, 62.4}},
+       /*sigma_dct=*/{32.7, 21.5, 44.4, 1.8,  45.8, 90.6, 29.3, 59.2,
+                      23.7, 85.2, 84.8, 27.2, 42.1, 84.1, 50.6, 17.6,
+                      93.7, 4.9,  2.6,  69.8, 94.9, 52,   24.3, 18.8,
+                      12.1, 95.7, 28.5, 81.4, 89.9, 31.4, 74.8, 52}},
+      {/*control_points=*/{{172, 309},
+                           {196, 277},
+                           {42, 238},
+                           {114, 350},
+                           {307, 290},
+                           {316, 269},
+                           {124, 66},
+                           {233, 267}},
+       /*color_dct=*/
+       {{15,   28.9, 22, 6.6,  41.8, 83,   8.6,  56.8, 68.9, 9.7,  5.4,
+         19.8, 70.8, 90, 52.5, 65.2, 7.8,  23.5, 26.4, 72.2, 64.7, 87.1,
+         1.3,  67.5, 46, 68.4, 65.4, 35.5, 29.1, 13,   41.6, 23.9},
+        {47.7, 79.4, 62.7, 29.1, 96.8, 18.5, 17.6, 15.2, 80.5, 56,  96.2,
+         59.9, 26.7, 96.1, 92.3, 42.1, 35.8, 54,   23.2, 55,   76,  35.8,
+         58.4, 88.7, 2.4,  78.1, 95.6, 27.5, 6.6,  78.5, 24.1, 69.8},
+        {43.8, 96.5, 0.9,  95.1, 49.1, 71.2, 25.1, 33.6, 75.2, 95,  82.1,
+         19.7, 10.5, 44.9, 50,   93.3, 83.5, 99.5, 64.6, 54,   3.5, 99.7,
+         45.3, 82.1, 22.4, 37.9, 60,   32.2, 12.6, 4.6,  65.5, 96.4}},
+       /*sigma_dct=*/{72.5, 2.6,  41.7, 2.2,  39.7, 79.1, 69.6, 19.9,
+                      92.3, 71.5, 41.9, 62.1, 30,   49.4, 70.3, 45.3,
+                      62.5, 47.2, 46.7, 41.2, 90.8, 46.8, 91.2, 55,
+                      8.1,  69.6, 25.4, 84.7, 61.7, 27.6, 3.7,  46.9}},
+      {/*control_points=*/{{100, 186},
+                           {257, 97},
+                           {170, 49},
+                           {25, 169},
+                           {309, 104},
+                           {232, 237},
+                           {385, 101},
+                           {122, 168},
+                           {26, 300},
+                           {390, 88}},
+       /*color_dct=*/
+       {{16.9, 64.8, 4.2,  10.6, 23.5, 17,   79.3, 5.7,  60.4, 16.6, 94.9,
+         63.7, 87.6, 10.5, 3.8,  61.1, 22.9, 81.9, 80.4, 40.5, 45.9, 25.4,
+         39.8, 30,   50.2, 90.4, 27.9, 93.7, 65.1, 48.2, 22.3, 43.9},
+        {24.9, 66,   3.5,  90.2, 97.1, 15.8, 35.6, 0.6,  68,   39.6, 24.4,
+         85.9, 57.7, 77.6, 47.5, 67.9, 4.3,  5.4,  91.2, 58.5, 0.1,  52.2,
+         3.5,  47.8, 63.2, 43.5, 85.8, 35.8, 50.2, 35.9, 19.2, 48.2},
+        {82.8, 44.9, 76.4, 39.5, 94.1, 14.3, 89.8, 10,   10.5, 74.5, 56.3,
+         65.8, 7.8,  23.3, 52.8, 99.3, 56.8, 46,   76.7, 13.5, 67,   22.4,
+         29.9, 43.3, 70.3, 26,   74.3, 53.9, 62,   19.1, 49.3, 46.7}},
+       /*sigma_dct=*/{83.5, 1.7,  25.1, 18.7, 46.5, 75.3, 28,   62.3,
+                      50.3, 23.3, 85.6, 96,   45.8, 33.1, 33.4, 52.9,
+                      26.3, 58.5, 19.6, 70,   92.6, 22.5, 57,   21.6,
+                      76.8, 87.5, 22.9, 66.3, 35.7, 35.6, 56.8, 67.2}},
+  };
+
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (const Spline& spline : spline_data) {
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+  const std::vector<Spline> quantized_spline_data = DequantizeSplines(splines);
+  EXPECT_THAT(quantized_spline_data,
+              Pointwise(ControlPointsMatch(), spline_data));
+
+  BitWriter writer;
+  EncodeSplines(splines, &writer, kLayerSplines, HistogramParams(), nullptr);
+  writer.ZeroPadToByte();
+  const size_t bits_written = writer.BitsWritten();
+
+  printf("Wrote %zu bits of splines.\n", bits_written);
+
+  BitReader reader(writer.GetSpan());
+  Splines decoded_splines;
+  ASSERT_TRUE(decoded_splines.Decode(&reader, /*num_pixels=*/1000));
+  ASSERT_TRUE(reader.JumpToByteBoundary());
+  EXPECT_EQ(reader.TotalBitsConsumed(), bits_written);
+  ASSERT_TRUE(reader.Close());
+
+  const std::vector<Spline> decoded_spline_data =
+      DequantizeSplines(decoded_splines);
+  EXPECT_THAT(decoded_spline_data,
+              Pointwise(SplinesMatch(), quantized_spline_data));
+}
+
+#ifdef JXL_CRASH_ON_ERROR
+TEST(SplinesTest, DISABLED_TooManySplinesTest) {
+#else
+TEST(SplinesTest, TooManySplinesTest) {
+#endif
+  // This is more than the limit for 1000 pixels.
+  const size_t kNumSplines = 300;
+
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (size_t i = 0; i < kNumSplines; i++) {
+    Spline spline = {
+        /*control_points=*/{{1.f + i, 2}, {10.f + i, 25}, {30.f + i, 300}},
+        /*color_dct=*/
+        {{1.f, 0.2f, 0.1f}, {35.7f, 10.3f}, {35.7f, 7.8f}},
+        /*sigma_dct=*/{10.f, 0.f, 0.f, 2.f}};
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+  BitWriter writer;
+  EncodeSplines(splines, &writer, kLayerSplines,
+                HistogramParams(SpeedTier::kFalcon, 1), nullptr);
+  writer.ZeroPadToByte();
+  // Re-read splines.
+  BitReader reader(writer.GetSpan());
+  Splines decoded_splines;
+  EXPECT_FALSE(decoded_splines.Decode(&reader, /*num_pixels=*/1000));
+  EXPECT_TRUE(reader.Close());
+}
+
+#ifdef JXL_CRASH_ON_ERROR
+TEST(SplinesTest, DISABLED_DuplicatePoints) {
+#else
+TEST(SplinesTest, DuplicatePoints) {
+#endif
+  std::vector<Spline::Point> control_points{
+      {9, 54}, {118, 159}, {97, 3},  // Repeated.
+      {97, 3}, {10, 40},   {150, 25}, {120, 300}};
+  Spline spline{control_points,
+                /*color_dct=*/
+                {{1.f, 0.2f, 0.1f}, {35.7f, 10.3f}, {35.7f, 7.8f}},
+                /*sigma_dct=*/{10.f, 0.f, 0.f, 2.f}};
+  std::vector<Spline> spline_data{spline};
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (const Spline& spline : spline_data) {
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+
+  Image3F image(320, 320);
+  ZeroFillImage(&image);
+  EXPECT_FALSE(splines.AddTo(&image, Rect(image), Rect(image), *cmap));
+}
+
+TEST(SplinesTest, Drawing) {
+  CodecInOut io_expected;
+  const PaddedBytes orig = ReadTestData("jxl/splines.png");
+  ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_expected,
+                           /*pool=*/nullptr));
+
+  std::vector<Spline::Point> control_points{{9, 54},  {118, 159}, {97, 3},
+                                            {10, 40}, {150, 25},  {120, 300}};
+  const Spline spline{
+      control_points,
+      /*color_dct=*/
+      {{0.03125f, 0.00625f, 0.003125f}, {1.f, 0.321875f}, {1.f, 0.24375f}},
+      /*sigma_dct=*/{0.3125f, 0.f, 0.f, 0.0625f}};
+  std::vector<Spline> spline_data = {spline};
+  std::vector<QuantizedSpline> quantized_splines;
+  std::vector<Spline::Point> starting_points;
+  for (const Spline& spline : spline_data) {
+    quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+                                   kYToB);
+    starting_points.push_back(spline.control_points.front());
+  }
+  Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+                  std::move(starting_points));
+
+  Image3F image(320, 320);
+  ZeroFillImage(&image);
+  ASSERT_TRUE(splines.AddTo(&image, Rect(image), Rect(image), *cmap));
+
+  OpsinParams opsin_params{};
+  opsin_params.Init(kDefaultIntensityTarget);
+  (void)OpsinToLinearInplace(&image, /*pool=*/nullptr, opsin_params);
+
+  CodecInOut io_actual;
+  io_actual.SetFromImage(CopyImage(image), ColorEncoding::LinearSRGB());
+  ASSERT_TRUE(io_actual.TransformTo(io_expected.Main().c_current()));
+
+  VerifyRelativeError(*io_expected.Main().color(), *io_actual.Main().color(),
+                      1e-2f, 1e-1f);
+}
+
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/test_utils.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/test_utils.h
new file mode 100644
index 0000000000..31abf4a96a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/test_utils.h
@@ -0,0 +1,388 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TEST_UTILS_H_
+#define LIB_JXL_TEST_UTILS_H_
+
+// Macros and functions useful for tests.
+
+#include <random>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "jxl/codestream_header.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"  // JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/dec_params.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+
+#ifdef JXL_DISABLE_SLOW_TESTS
+#define JXL_SLOW_TEST(X) DISABLED_##X
+#else
+#define JXL_SLOW_TEST(X) X
+#endif  // JXL_DISABLE_SLOW_TESTS
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+#define JXL_TRANSCODE_JPEG_TEST(X) X
+#else
+#define JXL_TRANSCODE_JPEG_TEST(X) DISABLED_##X
+#endif  // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+#ifdef THREAD_SANITIZER
+#define JXL_TSAN_SLOW_TEST(X) DISABLED_##X
+#else
+#define JXL_TSAN_SLOW_TEST(X) X
+#endif  // THREAD_SANITIZER
+
+// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead
+// used INSTANTIATE_TEST_CASE_P which is now deprecated.
+#ifdef INSTANTIATE_TEST_SUITE_P
+#define JXL_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P
+#else
+#define JXL_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#endif
+
+namespace jxl {
+namespace test {
+
+void JxlBasicInfoSetFromPixelFormat(JxlBasicInfo* basic_info,
+                                    const JxlPixelFormat* pixel_format) {
+  switch (pixel_format->data_type) {
+    case JXL_TYPE_FLOAT:
+      basic_info->bits_per_sample = 32;
+      basic_info->exponent_bits_per_sample = 8;
+      break;
+    case JXL_TYPE_FLOAT16:
+      basic_info->bits_per_sample = 16;
+      basic_info->exponent_bits_per_sample = 5;
+      break;
+    case JXL_TYPE_UINT8:
+      basic_info->bits_per_sample = 8;
+      basic_info->exponent_bits_per_sample = 0;
+      break;
+    case JXL_TYPE_UINT16:
+      basic_info->bits_per_sample = 16;
+      basic_info->exponent_bits_per_sample = 0;
+      break;
+    case JXL_TYPE_UINT32:
+      basic_info->bits_per_sample = 32;
+      basic_info->exponent_bits_per_sample = 0;
+      break;
+    case JXL_TYPE_BOOLEAN:
+      basic_info->bits_per_sample = 1;
+      basic_info->exponent_bits_per_sample = 0;
+      break;
+  }
+  if (pixel_format->num_channels == 2 || pixel_format->num_channels == 4) {
+    basic_info->alpha_exponent_bits = 0;
+    if (basic_info->bits_per_sample == 32) {
+      basic_info->alpha_bits = 16;
+    } else {
+      basic_info->alpha_bits = basic_info->bits_per_sample;
+    }
+  } else {
+    basic_info->alpha_exponent_bits = 0;
+    basic_info->alpha_bits = 0;
+  }
+}
+
+MATCHER_P(MatchesPrimariesAndTransferFunction, color_encoding, "") {
+  return arg.primaries == color_encoding.primaries &&
+         arg.tf.IsSame(color_encoding.tf);
+}
+
+MATCHER(MatchesPrimariesAndTransferFunction, "") {
+  return testing::ExplainMatchResult(
+      MatchesPrimariesAndTransferFunction(std::get<1>(arg)), std::get<0>(arg),
+      result_listener);
+}
+
+// Returns compressed size [bytes].
+size_t Roundtrip(const CodecInOut* io, const CompressParams& cparams,
+                 const DecompressParams& dparams, ThreadPool* pool,
+                 CodecInOut* JXL_RESTRICT io2, AuxOut* aux_out = nullptr) {
+  PaddedBytes compressed;
+
+  std::vector<ColorEncoding> original_metadata_encodings;
+  std::vector<ColorEncoding> original_current_encodings;
+  for (const ImageBundle& ib : io->frames) {
+    // Remember original encoding, will be returned by decoder.
+    original_metadata_encodings.push_back(ib.metadata()->color_encoding);
+    // c_current should not change during encoding.
+    original_current_encodings.push_back(ib.c_current());
+  }
+
+  std::unique_ptr<PassesEncoderState> enc_state =
+      jxl::make_unique<PassesEncoderState>();
+  EXPECT_TRUE(
+      EncodeFile(cparams, io, enc_state.get(), &compressed, aux_out, pool));
+
+  std::vector<ColorEncoding> metadata_encodings_1;
+  for (const ImageBundle& ib1 : io->frames) {
+    metadata_encodings_1.push_back(ib1.metadata()->color_encoding);
+  }
+
+  // Should still be in the same color space after encoding.
+  EXPECT_THAT(metadata_encodings_1,
+              testing::Pointwise(MatchesPrimariesAndTransferFunction(),
+                                 original_metadata_encodings));
+
+  EXPECT_TRUE(DecodeFile(dparams, compressed, io2, pool));
+
+  std::vector<ColorEncoding> metadata_encodings_2;
+  std::vector<ColorEncoding> current_encodings_2;
+  for (const ImageBundle& ib2 : io2->frames) {
+    metadata_encodings_2.push_back(ib2.metadata()->color_encoding);
+    current_encodings_2.push_back(ib2.c_current());
+  }
+
+  EXPECT_THAT(io2->frames, testing::SizeIs(io->frames.size()));
+  // We always produce the original color encoding if a color transform hook is
+  // set.
+  EXPECT_THAT(current_encodings_2,
+              testing::Pointwise(MatchesPrimariesAndTransferFunction(),
+                                 original_current_encodings));
+
+  // Decoder returns the originals passed to the encoder.
+  EXPECT_THAT(metadata_encodings_2,
+              testing::Pointwise(MatchesPrimariesAndTransferFunction(),
+                                 original_metadata_encodings));
+
+  return compressed.size();
+}
+
+void CoalesceGIFAnimationWithAlpha(CodecInOut* io) {
+  ImageBundle canvas = io->frames[0].Copy();
+  for (size_t i = 1; i < io->frames.size(); i++) {
+    const ImageBundle& frame = io->frames[i];
+    ImageBundle rendered = canvas.Copy();
+    for (size_t y = 0; y < frame.ysize(); y++) {
+      float* row0 =
+          rendered.color()->PlaneRow(0, frame.origin.y0 + y) + frame.origin.x0;
+      float* row1 =
+          rendered.color()->PlaneRow(1, frame.origin.y0 + y) + frame.origin.x0;
+      float* row2 =
+          rendered.color()->PlaneRow(2, frame.origin.y0 + y) + frame.origin.x0;
+      float* rowa =
+          rendered.alpha()->Row(frame.origin.y0 + y) + frame.origin.x0;
+      const float* row0f = frame.color().PlaneRow(0, y);
+      const float* row1f = frame.color().PlaneRow(1, y);
+      const float* row2f = frame.color().PlaneRow(2, y);
+      const float* rowaf = frame.alpha().Row(y);
+      for (size_t x = 0; x < frame.xsize(); x++) {
+        if (rowaf[x] != 0) {
+          row0[x] = row0f[x];
+          row1[x] = row1f[x];
+          row2[x] = row2f[x];
+          rowa[x] = rowaf[x];
+        }
+      }
+    }
+    if (frame.use_for_next_frame) {
+      canvas = rendered.Copy();
+    }
+    io->frames[i] = std::move(rendered);
+  }
+}
+
+// A POD descriptor of a ColorEncoding. Only used in tests as the return value
+// of AllEncodings().
+struct ColorEncodingDescriptor {
+  ColorSpace color_space;
+  WhitePoint white_point;
+  Primaries primaries;
+  TransferFunction tf;
+  RenderingIntent rendering_intent;
+};
+
+static inline ColorEncoding ColorEncodingFromDescriptor(
+    const ColorEncodingDescriptor& desc) {
+  ColorEncoding c;
+  c.SetColorSpace(desc.color_space);
+  c.white_point = desc.white_point;
+  c.primaries = desc.primaries;
+  c.tf.SetTransferFunction(desc.tf);
+  c.rendering_intent = desc.rendering_intent;
+  return c;
+}
+
+// Define the operator<< for tests.
+static inline ::std::ostream& operator<<(::std::ostream& os,
+                                         const ColorEncodingDescriptor& c) {
+  return os << "ColorEncoding/" << Description(ColorEncodingFromDescriptor(c));
+}
+
+// Returns ColorEncodingDescriptors, which are only used in tests. To obtain a
+// ColorEncoding object call ColorEncodingFromDescriptor and then call
+// ColorEncoding::CreateProfile() on that object to generate a profile.
+std::vector<ColorEncodingDescriptor> AllEncodings() {
+  std::vector<ColorEncodingDescriptor> all_encodings;
+  all_encodings.reserve(300);
+  ColorEncoding c;
+
+  for (ColorSpace cs : Values<ColorSpace>()) {
+    if (cs == ColorSpace::kUnknown || cs == ColorSpace::kXYB) continue;
+    c.SetColorSpace(cs);
+
+    for (WhitePoint wp : Values<WhitePoint>()) {
+      if (wp == WhitePoint::kCustom) continue;
+      if (c.ImplicitWhitePoint() && c.white_point != wp) continue;
+      c.white_point = wp;
+
+      for (Primaries primaries : Values<Primaries>()) {
+        if (primaries == Primaries::kCustom) continue;
+        if (!c.HasPrimaries()) continue;
+        c.primaries = primaries;
+
+        for (TransferFunction tf : Values<TransferFunction>()) {
+          if (tf == TransferFunction::kUnknown) continue;
+          if (c.tf.SetImplicit() &&
+              (c.tf.IsGamma() || c.tf.GetTransferFunction() != tf)) {
+            continue;
+          }
+          c.tf.SetTransferFunction(tf);
+
+          for (RenderingIntent ri : Values<RenderingIntent>()) {
+            ColorEncodingDescriptor cdesc;
+            cdesc.color_space = cs;
+            cdesc.white_point = wp;
+            cdesc.primaries = primaries;
+            cdesc.tf = tf;
+            cdesc.rendering_intent = ri;
+            all_encodings.push_back(cdesc);
+          }
+        }
+      }
+    }
+  }
+
+  return all_encodings;
+}
+
+// Returns a test image with some autogenerated pixel content, using 16 bits per
+// channel, big endian order, 1 to 4 channels
+// The seed parameter allows to create images with different pixel content.
+std::vector<uint8_t> GetSomeTestImage(size_t xsize, size_t ysize,
+                                      size_t num_channels, uint16_t seed) {
+  // Cause more significant image difference for successive seeds.
+  std::mt19937 std_rng(seed);
+  std::uniform_int_distribution<uint16_t> std_distr(0, 65535);
+
+  // Returns random integer in interval (0, max_value - 1)
+  auto rng = [&std_rng, &std_distr](size_t max_value) -> size_t {
+    return static_cast<size_t>(std_distr(std_rng) / 65536.0f * max_value);
+  };
+
+  // Dark background gradient color
+  uint16_t r0 = rng(32768);
+  uint16_t g0 = rng(32768);
+  uint16_t b0 = rng(32768);
+  uint16_t a0 = rng(32768);
+  uint16_t r1 = rng(32768);
+  uint16_t g1 = rng(32768);
+  uint16_t b1 = rng(32768);
+  uint16_t a1 = rng(32768);
+
+  // Circle with different color
+  size_t circle_x = rng(xsize);
+  size_t circle_y = rng(ysize);
+  size_t circle_r = rng(std::min(xsize, ysize));
+
+  // Rectangle with random noise
+  size_t rect_x0 = rng(xsize);
+  size_t rect_y0 = rng(ysize);
+  size_t rect_x1 = rng(xsize);
+  size_t rect_y1 = rng(ysize);
+  if (rect_x1 < rect_x0) std::swap(rect_x0, rect_y1);
+  if (rect_y1 < rect_y0) std::swap(rect_y0, rect_y1);
+
+  size_t num_pixels = xsize * ysize;
+  // 16 bits per channel, big endian, 4 channels
+  std::vector<uint8_t> pixels(num_pixels * num_channels * 2);
+  // Create pixel content to test, actual content does not matter as long as it
+  // can be compared after roundtrip.
+  for (size_t y = 0; y < ysize; y++) {
+    for (size_t x = 0; x < xsize; x++) {
+      uint16_t r = r0 * (ysize - y - 1) / ysize + r1 * y / ysize;
+      uint16_t g = g0 * (ysize - y - 1) / ysize + g1 * y / ysize;
+      uint16_t b = b0 * (ysize - y - 1) / ysize + b1 * y / ysize;
+      uint16_t a = a0 * (ysize - y - 1) / ysize + a1 * y / ysize;
+      // put some shape in there for visual debugging
+      if ((x - circle_x) * (x - circle_x) + (y - circle_y) * (y - circle_y) <
+          circle_r * circle_r) {
+        r = (65535 - x * y) ^ seed;
+        g = (x << 8) + y + seed;
+        b = (y << 8) + x * seed;
+        a = 32768 + x * 256 - y;
+      } else if (x > rect_x0 && x < rect_x1 && y > rect_y0 && y < rect_y1) {
+        r = rng(65536);
+        g = rng(65536);
+        b = rng(65536);
+        a = rng(65536);
+      }
+      size_t i = (y * xsize + x) * 2 * num_channels;
+      pixels[i + 0] = (r >> 8);
+      pixels[i + 1] = (r & 255);
+      if (num_channels >= 2) {
+        // This may store what is called 'g' in the alpha channel of a 2-channel
+        // image, but that's ok since the content is arbitrary
+        pixels[i + 2] = (g >> 8);
+        pixels[i + 3] = (g & 255);
+      }
+      if (num_channels >= 3) {
+        pixels[i + 4] = (b >> 8);
+        pixels[i + 5] = (b & 255);
+      }
+      if (num_channels >= 4) {
+        pixels[i + 6] = (a >> 8);
+        pixels[i + 7] = (a & 255);
+      }
+    }
+  }
+  return pixels;
+}
+
+// Returns a CodecInOut based on the buf, xsize, ysize, and the assumption
+// that the buffer was created using `GetSomeTestImage`.
+jxl::CodecInOut SomeTestImageToCodecInOut(const std::vector<uint8_t>& buf,
+                                          size_t num_channels, size_t xsize,
+                                          size_t ysize) {
+  jxl::CodecInOut io;
+  io.SetSize(xsize, ysize);
+  io.metadata.m.SetAlphaBits(16);
+  io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(
+      /*is_gray=*/num_channels == 1 || num_channels == 2);
+  EXPECT_TRUE(ConvertFromExternal(
+      jxl::Span<const uint8_t>(buf.data(), buf.size()), xsize, ysize,
+      jxl::ColorEncoding::SRGB(/*is_gray=*/num_channels == 1 ||
+                               num_channels == 2),
+      /*has_alpha=*/num_channels == 2 || num_channels == 4,
+      /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, JXL_BIG_ENDIAN,
+      /*flipped_y=*/false, /*pool=*/nullptr,
+      /*ib=*/&io.Main()));
+  return io;
+}
+
+}  // namespace test
+
+bool operator==(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b) {
+  if (a.size() != b.size()) return false;
+  if (memcmp(a.data(), b.data(), a.size()) != 0) return false;
+  return true;
+}
+
+// Allow using EXPECT_EQ on jxl::PaddedBytes
+bool operator!=(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b) {
+  return !(a == b);
+}
+}  // namespace jxl
+
+#endif  // LIB_JXL_TEST_UTILS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/testdata.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/testdata.h
new file mode 100644
index 0000000000..28d1015d0b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/testdata.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TESTDATA_H_
+#define LIB_JXL_TESTDATA_H_
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
+#include <string>
+
+#include "lib/jxl/base/file_io.h"
+
+namespace jxl {
+
+static inline PaddedBytes ReadTestData(const std::string& filename) {
+  std::string full_path = std::string(TEST_DATA_PATH "/") + filename;
+  PaddedBytes data;
+  bool ok = ReadFile(full_path, &data);
+#ifdef __EMSCRIPTEN__
+  // Fallback in case FS is not supported in current JS engine.
+  if (!ok) {
+    // {size_t size, uint8_t* bytes} pair.
+    uint32_t size_bytes[2] = {0, 0};
+    EM_ASM(
+        {
+          let buffer = null;
+          try {
+            buffer = readbuffer(UTF8ToString($0));
+          } catch {
+          }
+          if (!buffer) return;
+          let bytes = new Uint8Array(buffer);
+          let size = bytes.length;
+          let out = _malloc(size);
+          if (!out) return;
+          HEAP8.set(bytes, out);
+          HEAP32[$1 >> 2] = size;
+          HEAP32[($1 + 4) >> 2] = out;
+        },
+        full_path.c_str(), size_bytes);
+    size_t size = size_bytes[0];
+    uint8_t* bytes = reinterpret_cast<uint8_t*>(size_bytes[1]);
+    if (size) {
+      data.append(bytes, bytes + size);
+      free(reinterpret_cast<void*>(bytes));
+      ok = true;
+    }
+  }
+#endif
+  JXL_CHECK(ok);
+  return data;
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_TESTDATA_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/tf_gbench.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/tf_gbench.cc
new file mode 100644
index 0000000000..9c010d460a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/tf_gbench.cc
@@ -0,0 +1,143 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/image_ops.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/tf_gbench.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+#define RUN_BENCHMARK(F)                                            \
+  constexpr size_t kNum = 1 << 12;                                  \
+  HWY_FULL(float) d;                                                \
+  /* Three parallel runs, as this will run on R, G and B. */        \
+  auto sum1 = Zero(d);                                              \
+  auto sum2 = Zero(d);                                              \
+  auto sum3 = Zero(d);                                              \
+  for (auto _ : state) {                                            \
+    auto x = Set(d, 1e-5);                                          \
+    auto v1 = Set(d, 1e-5);                                         \
+    auto v2 = Set(d, 1.1e-5);                                       \
+    auto v3 = Set(d, 1.2e-5);                                       \
+    for (size_t i = 0; i < kNum; i++) {                             \
+      sum1 += F(d, v1);                                             \
+      sum2 += F(d, v2);                                             \
+      sum3 += F(d, v3);                                             \
+      v1 += x;                                                      \
+      v2 += x;                                                      \
+      v3 += x;                                                      \
+    }                                                               \
+  }                                                                 \
+  /* floats per second */                                           \
+  state.SetItemsProcessed(kNum* state.iterations() * Lanes(d) * 3); \
+  benchmark::DoNotOptimize(sum1 + sum2 + sum3);
+
+#define RUN_BENCHMARK_SCALAR(F)                              \
+  constexpr size_t kNum = 1 << 12;                           \
+  /* Three parallel runs, as this will run on R, G and B. */ \
+  float sum1 = 0, sum2 = 0, sum3 = 0;                        \
+  for (auto _ : state) {                                     \
+    float x = 1e-5;                                          \
+    float v1 = 1e-5;                                         \
+    float v2 = 1.1e-5;                                       \
+    float v3 = 1.2e-5;                                       \
+    for (size_t i = 0; i < kNum; i++) {                      \
+      sum1 += F(v1);                                         \
+      sum2 += F(v2);                                         \
+      sum3 += F(v3);                                         \
+      v1 += x;                                               \
+      v2 += x;                                               \
+      v3 += x;                                               \
+    }                                                        \
+  }                                                          \
+  /* floats per second */                                    \
+  state.SetItemsProcessed(kNum* state.iterations() * 3);     \
+  benchmark::DoNotOptimize(sum1 + sum2 + sum3);
+
+HWY_NOINLINE void BM_FastSRGB(benchmark::State& state) {
+  RUN_BENCHMARK(FastLinearToSRGB);
+}
+
+HWY_NOINLINE void BM_TFSRGB(benchmark::State& state) {
+  RUN_BENCHMARK(TF_SRGB().EncodedFromDisplay);
+}
+
+HWY_NOINLINE void BM_PQDFE(benchmark::State& state) {
+  RUN_BENCHMARK(TF_PQ().DisplayFromEncoded);
+}
+
+HWY_NOINLINE void BM_PQEFD(benchmark::State& state) {
+  RUN_BENCHMARK(TF_PQ().EncodedFromDisplay);
+}
+
+HWY_NOINLINE void BM_PQSlowDFE(benchmark::State& state) {
+  RUN_BENCHMARK_SCALAR(TF_PQ().DisplayFromEncoded);
+}
+
+HWY_NOINLINE void BM_PQSlowEFD(benchmark::State& state) {
+  RUN_BENCHMARK_SCALAR(TF_PQ().EncodedFromDisplay);
+}
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+
+HWY_EXPORT(BM_FastSRGB);
+HWY_EXPORT(BM_TFSRGB);
+HWY_EXPORT(BM_PQDFE);
+HWY_EXPORT(BM_PQEFD);
+HWY_EXPORT(BM_PQSlowDFE);
+HWY_EXPORT(BM_PQSlowEFD);
+
+float SRGB_pow(float x) {
+  return x < 0.0031308f ? 12.92f * x : 1.055f * powf(x, 1.0f / 2.4f) - 0.055f;
+}
+
+void BM_FastSRGB(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_FastSRGB)(state);
+}
+void BM_TFSRGB(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_TFSRGB)(state);
+}
+void BM_PQDFE(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_PQDFE)(state);
+}
+void BM_PQEFD(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_PQEFD)(state);
+}
+void BM_PQSlowDFE(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_PQSlowDFE)(state);
+}
+void BM_PQSlowEFD(benchmark::State& state) {
+  HWY_DYNAMIC_DISPATCH(BM_PQSlowEFD)(state);
+}
+
+void BM_SRGB_pow(benchmark::State& state) { RUN_BENCHMARK_SCALAR(SRGB_pow); }
+
+BENCHMARK(BM_FastSRGB);
+BENCHMARK(BM_TFSRGB);
+BENCHMARK(BM_SRGB_pow);
+BENCHMARK(BM_PQDFE);
+BENCHMARK(BM_PQEFD);
+BENCHMARK(BM_PQSlowDFE);
+BENCHMARK(BM_PQSlowEFD);
+
+}  // namespace
+}  // namespace jxl
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc
new file mode 100644
index 0000000000..3a2193e42d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/toc.cc
@@ -0,0 +1,97 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/toc.h"
+
+#include <stdint.h>
+
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+size_t MaxBits(const size_t num_sizes) {
+  const size_t entry_bits = U32Coder::MaxEncodedBits(kTocDist) * num_sizes;
+  // permutation bit (not its tokens!), padding, entries, padding.
+  return 1 + kBitsPerByte + entry_bits + kBitsPerByte;
+}
+
+Status ReadGroupOffsets(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+                        std::vector<uint64_t>* JXL_RESTRICT offsets,
+                        std::vector<uint32_t>* JXL_RESTRICT sizes,
+                        uint64_t* total_size) {
+  if (toc_entries > 65536) {
+    // Prevent out of memory if invalid JXL codestream causes a bogus amount
+    // of toc_entries such as 2720436919446 to be computed.
+    // TODO(lode): verify whether 65536 is a reasonable upper bound
+    return JXL_FAILURE("too many toc entries");
+  }
+
+  const auto check_bit_budget = [&](size_t num_entries) -> Status {
+    // U32Coder reads 2 bits to recognize variant and kTocDist cheapest variant
+    // is Bits(10), this way at least 12 bits are required per toc-entry.
+    size_t minimal_bit_cost = num_entries * (2 + 10);
+    size_t bit_budget = reader->TotalBytes() * 8;
+    size_t expenses = reader->TotalBitsConsumed();
+    if ((expenses <= bit_budget) &&
+        (minimal_bit_cost <= bit_budget - expenses)) {
+      return true;
+    }
+    return JXL_STATUS(StatusCode::kNotEnoughBytes, "Not enough bytes for TOC");
+  };
+
+  JXL_DASSERT(offsets != nullptr && sizes != nullptr);
+  std::vector<coeff_order_t> permutation;
+  if (reader->ReadFixedBits<1>() == 1 && toc_entries > 0) {
+    // Skip permutation description if the toc_entries is 0.
+    JXL_RETURN_IF_ERROR(check_bit_budget(toc_entries));
+    permutation.resize(toc_entries);
+    JXL_RETURN_IF_ERROR(
+        DecodePermutation(/*skip=*/0, toc_entries, permutation.data(), reader));
+  }
+
+  JXL_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+  JXL_RETURN_IF_ERROR(check_bit_budget(toc_entries));
+  sizes->clear();
+  sizes->reserve(toc_entries);
+  for (size_t i = 0; i < toc_entries; ++i) {
+    sizes->push_back(U32Coder::Read(kTocDist, reader));
+  }
+  JXL_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+  JXL_RETURN_IF_ERROR(check_bit_budget(0));
+
+  // Prefix sum starting with 0 and ending with the offset of the last group
+  offsets->clear();
+  offsets->reserve(toc_entries);
+  uint64_t offset = 0;
+  for (size_t i = 0; i < toc_entries; ++i) {
+    if (offset + (*sizes)[i] < offset) {
+      return JXL_FAILURE("group offset overflow");
+    }
+    offsets->push_back(offset);
+    offset += (*sizes)[i];
+  }
+  if (total_size) {
+    *total_size = offset;
+  }
+
+  if (!permutation.empty()) {
+    std::vector<uint64_t> permuted_offsets;
+    std::vector<uint32_t> permuted_sizes;
+    permuted_offsets.reserve(toc_entries);
+    permuted_sizes.reserve(toc_entries);
+    for (coeff_order_t index : permutation) {
+      permuted_offsets.push_back((*offsets)[index]);
+      permuted_sizes.push_back((*sizes)[index]);
+    }
+    std::swap(*offsets, permuted_offsets);
+    std::swap(*sizes, permuted_sizes);
+  }
+
+  return true;
+}
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/toc.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/toc.h
new file mode 100644
index 0000000000..ffebdf9115
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/toc.h
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TOC_H_
+#define LIB_JXL_TOC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// (2+bits) = 2,3,4 bytes so encoders can patch TOC after encoding.
+// 30 is sufficient for 4K channels of uncompressed 16-bit samples.
+constexpr U32Enc kTocDist(Bits(10), BitsOffset(14, 1024), BitsOffset(22, 17408),
+                          BitsOffset(30, 4211712));
+
+size_t MaxBits(const size_t num_sizes);
+
+// TODO(veluca): move these to FrameDimensions.
+static JXL_INLINE size_t AcGroupIndex(size_t pass, size_t group,
+                                      size_t num_groups, size_t num_dc_groups,
+                                      bool has_ac_global) {
+  return 1 + num_dc_groups + static_cast<size_t>(has_ac_global) +
+         pass * num_groups + group;
+}
+
+static JXL_INLINE size_t NumTocEntries(size_t num_groups, size_t num_dc_groups,
+                                       size_t num_passes, bool has_ac_global) {
+  if (num_groups == 1 && num_passes == 1) return 1;
+  return AcGroupIndex(0, 0, num_groups, num_dc_groups, has_ac_global) +
+         num_groups * num_passes;
+}
+
+Status ReadGroupOffsets(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+                        std::vector<uint64_t>* JXL_RESTRICT offsets,
+                        std::vector<uint32_t>* JXL_RESTRICT sizes,
+                        uint64_t* total_size);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_TOC_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/toc_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/toc_test.cc
new file mode 100644
index 0000000000..ef27320926
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/toc_test.cc
@@ -0,0 +1,93 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/toc.h"
+
+#include <random>
+
+#include "gtest/gtest.h"
+#include "lib/jxl/aux_out_fwd.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_toc.h"
+
+namespace jxl {
+namespace {
+
+void Roundtrip(size_t num_entries, bool permute, std::mt19937* rng) {
+  // Generate a random permutation.
+  std::vector<coeff_order_t> permutation(num_entries);
+  std::vector<coeff_order_t> inv_permutation(num_entries);
+  for (size_t i = 0; i < num_entries; i++) {
+    permutation[i] = i;
+    inv_permutation[i] = i;
+  }
+  if (permute) {
+    std::shuffle(permutation.begin(), permutation.end(), *rng);
+    for (size_t i = 0; i < num_entries; i++) {
+      inv_permutation[permutation[i]] = i;
+    }
+  }
+
+  // Generate num_entries groups of random (byte-aligned) length
+  std::vector<BitWriter> group_codes(num_entries);
+  for (BitWriter& writer : group_codes) {
+    const size_t max_bits = (*rng)() & 0xFFF;
+    BitWriter::Allotment allotment(&writer, max_bits + kBitsPerByte);
+    size_t i = 0;
+    for (; i + BitWriter::kMaxBitsPerCall < max_bits;
+         i += BitWriter::kMaxBitsPerCall) {
+      writer.Write(BitWriter::kMaxBitsPerCall, 0);
+    }
+    for (; i < max_bits; i += 1) {
+      writer.Write(/*n_bits=*/1, 0);
+    }
+    writer.ZeroPadToByte();
+    AuxOut aux_out;
+    ReclaimAndCharge(&writer, &allotment, 0, &aux_out);
+  }
+
+  BitWriter writer;
+  AuxOut aux_out;
+  ASSERT_TRUE(WriteGroupOffsets(group_codes, permute ? &permutation : nullptr,
+                                &writer, &aux_out));
+
+  BitReader reader(writer.GetSpan());
+  std::vector<uint64_t> group_offsets;
+  std::vector<uint32_t> group_sizes;
+  uint64_t total_size;
+  ASSERT_TRUE(ReadGroupOffsets(num_entries, &reader, &group_offsets,
+                               &group_sizes, &total_size));
+  ASSERT_EQ(num_entries, group_offsets.size());
+  ASSERT_EQ(num_entries, group_sizes.size());
+  EXPECT_TRUE(reader.Close());
+
+  uint64_t prefix_sum = 0;
+  for (size_t i = 0; i < num_entries; ++i) {
+    EXPECT_EQ(prefix_sum, group_offsets[inv_permutation[i]]);
+
+    EXPECT_EQ(0, group_codes[i].BitsWritten() % kBitsPerByte);
+    prefix_sum += group_codes[i].BitsWritten() / kBitsPerByte;
+
+    if (i + 1 < num_entries) {
+      EXPECT_EQ(
+          group_offsets[inv_permutation[i]] + group_sizes[inv_permutation[i]],
+          group_offsets[inv_permutation[i + 1]]);
+    }
+  }
+  EXPECT_EQ(prefix_sum, total_size);
+}
+
+TEST(TocTest, Test) {
+  std::mt19937 rng(12345);
+  for (size_t num_entries = 0; num_entries < 10; ++num_entries) {
+    for (bool permute : std::vector<bool>{false, true}) {
+      Roundtrip(num_entries, permute, &rng);
+    }
+  }
+}
+
+}  // namespace
+}  // namespace jxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/transfer_functions-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/transfer_functions-inl.h
new file mode 100644
index 0000000000..43069ac0be
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/transfer_functions-inl.h
@@ -0,0 +1,397 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Transfer functions for color encodings.
+
+#if defined(LIB_JXL_TRANSFER_FUNCTIONS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
+#undef LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
+#else
+#define LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
+#endif
+
+#include <algorithm>
+#include <cmath>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/rational_polynomial-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// Definitions for BT.2100-2 transfer functions (used inside/outside SIMD):
+// "display" is linear light (nits) normalized to [0, 1].
+// "encoded" is a nonlinear encoding (e.g. PQ) in [0, 1].
+// "scene" is a linear function of photon counts, normalized to [0, 1].
+
+// Despite the stated ranges, we need unbounded transfer functions: see
+// http://www.littlecms.com/CIC18_UnboundedCMM.pdf. Inputs can be negative or
+// above 1 due to chromatic adaptation. To avoid severe round-trip errors caused
+// by clamping, we mirror negative inputs via copysign (f(-x) = -f(x), see
+// https://developer.apple.com/documentation/coregraphics/cgcolorspace/1644735-extendedsrgb)
+// and extend the function domains above 1.
+
+// Hybrid Log-Gamma.
+class TF_HLG {
+ public:
+  // EOTF. e = encoded.
+  JXL_INLINE double DisplayFromEncoded(const double e) const {
+    const double lifted = e * (1.0 - kBeta) + kBeta;
+    return OOTF(InvOETF(lifted));
+  }
+
+  // Inverse EOTF. d = display.
+  JXL_INLINE double EncodedFromDisplay(const double d) const {
+    const double lifted = OETF(InvOOTF(d));
+    const double e = (lifted - kBeta) * (1.0 / (1.0 - kBeta));
+    return e;
+  }
+
+  // Maximum error 5e-7.
+  template <class D, class V>
+  JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+    const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+    const V below_div12 = Sqrt(Set(d, 3.0f) * x);
+    const V e =
+        MulAdd(Set(d, kA * 0.693147181f),
+               FastLog2f(d, MulAdd(Set(d, 12), x, Set(d, -kB))), Set(d, kC));
+    const V magnitude = IfThenElse(x <= Set(d, kDiv12), below_div12, e);
+    const V lifted = Or(AndNot(kSign, magnitude), original_sign);
+    const V kMul = Set(d, 1.0f / (1.0f - kBeta));
+    const V kAdd = Set(d, -kBeta / (1.0f - kBeta));
+    return MulAdd(kMul, lifted, kAdd);
+  }
+
+ private:
+  // OETF (defines the HLG approach). s = scene, returns encoded.
+  JXL_INLINE double OETF(double s) const {
+    if (s == 0.0) return 0.0;
+    const double original_sign = s;
+    s = std::abs(s);
+
+    if (s <= kDiv12) return copysignf(std::sqrt(3.0 * s), original_sign);
+
+    const double e = kA * std::log(12 * s - kB) + kC;
+    JXL_ASSERT(e > 0.0);
+    return copysignf(e, original_sign);
+  }
+
+  // e = encoded, returns scene.
+  JXL_INLINE double InvOETF(double e) const {
+    if (e == 0.0) return 0.0;
+    const double original_sign = e;
+    e = std::abs(e);
+
+    if (e <= 0.5) return copysignf(e * e * (1.0 / 3), original_sign);
+
+    const double s = (std::exp((e - kC) * kRA) + kB) * kDiv12;
+    JXL_ASSERT(s >= 0);
+    return copysignf(s, original_sign);
+  }
+
+  // s = scene, returns display.
+  JXL_INLINE double OOTF(const double s) const {
+    // The actual (red channel) OOTF is RD = alpha * YS^(gamma-1) * RS, where
+    // YS = 0.2627 * RS + 0.6780 * GS + 0.0593 * BS. Let alpha = 1 so we return
+    // "display" (normalized [0, 1]) instead of nits. Our transfer function
+    // interface does not allow a dependency on YS. Fortunately, the system
+    // gamma at 334 nits is 1.0, so this reduces to RD = RS.
+    return s;
+  }
+
+  // d = display, returns scene.
+  JXL_INLINE double InvOOTF(const double d) const {
+    return d;  // see OOTF().
+  }
+
+  // Assume 1000:1 contrast @ 200 nits => gamma 0.9
+  static constexpr double kBeta = 0.04;  // = sqrt(3 * contrast^(1/gamma))
+
+  static constexpr double kA = 0.17883277;
+  static constexpr double kRA = 1.0 / kA;
+  static constexpr double kB = 1 - 4 * kA;
+  static constexpr double kC = 0.5599107295;
+  static constexpr double kDiv12 = 1.0 / 12;
+};
+
+class TF_709 {
+ public:
+  JXL_INLINE double EncodedFromDisplay(const double d) const {
+    if (d < kThresh) return kMulLow * d;
+    return kMulHi * std::pow(d, kPowHi) + kSub;
+  }
+
+  // Maximum error 1e-6.
+  template <class D, class V>
+  JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+    auto low = Set(d, kMulLow) * x;
+    auto hi =
+        MulAdd(Set(d, kMulHi), FastPowf(d, x, Set(d, kPowHi)), Set(d, kSub));
+    return IfThenElse(x <= Set(d, kThresh), low, hi);
+  }
+
+ private:
+  static constexpr double kThresh = 0.018;
+  static constexpr double kMulLow = 4.5;
+  static constexpr double kMulHi = 1.099;
+  static constexpr double kPowHi = 0.45;
+  static constexpr double kSub = -0.099;
+};
+
+// Perceptual Quantization
+class TF_PQ {
+ public:
+  // EOTF (defines the PQ approach). e = encoded.
+  JXL_INLINE double DisplayFromEncoded(double e) const {
+    if (e == 0.0) return 0.0;
+    const double original_sign = e;
+    e = std::abs(e);
+
+    const double xp = std::pow(e, 1.0 / kM2);
+    const double num = std::max(xp - kC1, 0.0);
+    const double den = kC2 - kC3 * xp;
+    JXL_DASSERT(den != 0.0);
+    const double d = std::pow(num / den, 1.0 / kM1);
+    JXL_DASSERT(d >= 0.0);  // Equal for e ~= 1E-9
+    return copysignf(d, original_sign);
+  }
+
+  // Maximum error 3e-6
+  template <class D, class V>
+  JXL_INLINE V DisplayFromEncoded(D d, V x) const {
+    const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+    // 4-over-4-degree rational polynomial approximation on x+x*x. This improves
+    // the maximum error by about 5x over a rational polynomial for x.
+    auto xpxx = MulAdd(x, x, x);
+    HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+        HWY_REP4(2.62975656e-04f), HWY_REP4(-6.23553089e-03f),
+        HWY_REP4(7.38602301e-01f), HWY_REP4(2.64553172e+00f),
+        HWY_REP4(5.50034862e-01f),
+    };
+    HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+        HWY_REP4(4.21350107e+02f), HWY_REP4(-4.28736818e+02f),
+        HWY_REP4(1.74364667e+02f), HWY_REP4(-3.39078883e+01f),
+        HWY_REP4(2.67718770e+00f),
+    };
+    auto magnitude = EvalRationalPolynomial(d, xpxx, p, q);
+    return Or(AndNot(kSign, magnitude), original_sign);
+  }
+
+  // Inverse EOTF. d = display.
+  JXL_INLINE double EncodedFromDisplay(double d) const {
+    if (d == 0.0) return 0.0;
+    const double original_sign = d;
+    d = std::abs(d);
+
+    const double xp = std::pow(d, kM1);
+    const double num = kC1 + xp * kC2;
+    const double den = 1.0 + xp * kC3;
+    const double e = std::pow(num / den, kM2);
+    JXL_DASSERT(e > 0.0);
+    return copysignf(e, original_sign);
+  }
+
+  // Maximum error 7e-7.
+  template <class D, class V>
+  JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+    const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+    // 4-over-4-degree rational polynomial approximation on x**0.25, with two
+    // different polynomials above and below 1e-4.
+    auto xto025 = Sqrt(Sqrt(x));
+    HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+        HWY_REP4(1.351392e-02f), HWY_REP4(-1.095778e+00f),
+        HWY_REP4(5.522776e+01f), HWY_REP4(1.492516e+02f),
+        HWY_REP4(4.838434e+01f),
+    };
+    HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+        HWY_REP4(1.012416e+00f), HWY_REP4(2.016708e+01f),
+        HWY_REP4(9.263710e+01f), HWY_REP4(1.120607e+02f),
+        HWY_REP4(2.590418e+01f),
+    };
+
+    HWY_ALIGN constexpr float plo[(4 + 1) * 4] = {
+        HWY_REP4(9.863406e-06f),  HWY_REP4(3.881234e-01f),
+        HWY_REP4(1.352821e+02f),  HWY_REP4(6.889862e+04f),
+        HWY_REP4(-2.864824e+05f),
+    };
+    HWY_ALIGN constexpr float qlo[(4 + 1) * 4] = {
+        HWY_REP4(3.371868e+01f),  HWY_REP4(1.477719e+03f),
+        HWY_REP4(1.608477e+04f),  HWY_REP4(-4.389884e+04f),
+        HWY_REP4(-2.072546e+05f),
+    };
+
+    auto magnitude = IfThenElse(x < Set(d, 1e-4f),
+                                EvalRationalPolynomial(d, xto025, plo, qlo),
+                                EvalRationalPolynomial(d, xto025, p, q));
+    return Or(AndNot(kSign, magnitude), original_sign);
+  }
+
+ private:
+  static constexpr double kM1 = 2610.0 / 16384;
+  static constexpr double kM2 = (2523.0 / 4096) * 128;
+  static constexpr double kC1 = 3424.0 / 4096;
+  static constexpr double kC2 = (2413.0 / 4096) * 32;
+  static constexpr double kC3 = (2392.0 / 4096) * 32;
+};
+
+// sRGB
+class TF_SRGB {
+ public:
+  template <typename V>
+  JXL_INLINE V DisplayFromEncoded(V x) const {
+    const HWY_FULL(float) d;
+    const HWY_FULL(uint32_t) du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+
+    // TODO(janwas): range reduction
+    // Computed via af_cheb_rational (k=100); replicated 4x.
+    HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+        2.200248328e-04f, 2.200248328e-04f, 2.200248328e-04f, 2.200248328e-04f,
+        1.043637593e-02f, 1.043637593e-02f, 1.043637593e-02f, 1.043637593e-02f,
+        1.624820318e-01f, 1.624820318e-01f, 1.624820318e-01f, 1.624820318e-01f,
+        7.961564959e-01f, 7.961564959e-01f, 7.961564959e-01f, 7.961564959e-01f,
+        8.210152774e-01f, 8.210152774e-01f, 8.210152774e-01f, 8.210152774e-01f,
+    };
+    HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+        2.631846970e-01f,  2.631846970e-01f,  2.631846970e-01f,
+        2.631846970e-01f,  1.076976492e+00f,  1.076976492e+00f,
+        1.076976492e+00f,  1.076976492e+00f,  4.987528350e-01f,
+        4.987528350e-01f,  4.987528350e-01f,  4.987528350e-01f,
+        -5.512498495e-02f, -5.512498495e-02f, -5.512498495e-02f,
+        -5.512498495e-02f, 6.521209011e-03f,  6.521209011e-03f,
+        6.521209011e-03f,  6.521209011e-03f,
+    };
+    const V linear = x * Set(d, kLowDivInv);
+    const V poly = EvalRationalPolynomial(d, x, p, q);
+    const V magnitude =
+        IfThenElse(x > Set(d, kThreshSRGBToLinear), poly, linear);
+    return Or(AndNot(kSign, magnitude), original_sign);
+  }
+
+  // Error ~5e-07
+  template <class D, class V>
+  JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+    const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+    const V kSign = BitCast(d, Set(du, 0x80000000u));
+    const V original_sign = And(x, kSign);
+    x = AndNot(kSign, x);  // abs
+
+    // Computed via af_cheb_rational (k=100); replicated 4x.
+    HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+        -5.135152395e-04f, -5.135152395e-04f, -5.135152395e-04f,
+        -5.135152395e-04f, 5.287254571e-03f,  5.287254571e-03f,
+        5.287254571e-03f,  5.287254571e-03f,  3.903842876e-01f,
+        3.903842876e-01f,  3.903842876e-01f,  3.903842876e-01f,
+        1.474205315e+00f,  1.474205315e+00f,  1.474205315e+00f,
+        1.474205315e+00f,  7.352629620e-01f,  7.352629620e-01f,
+        7.352629620e-01f,  7.352629620e-01f,
+    };
+    HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+        1.004519624e-02f, 1.004519624e-02f, 1.004519624e-02f, 1.004519624e-02f,
+        3.036675394e-01f, 3.036675394e-01f, 3.036675394e-01f, 3.036675394e-01f,
+        1.340816930e+00f, 1.340816930e+00f, 1.340816930e+00f, 1.340816930e+00f,
+        9.258482155e-01f, 9.258482155e-01f, 9.258482155e-01f, 9.258482155e-01f,
+        2.424867759e-02f, 2.424867759e-02f, 2.424867759e-02f, 2.424867759e-02f,
+    };
+    const V linear = x * Set(d, kLowDiv);
+    const V poly = EvalRationalPolynomial(d, Sqrt(x), p, q);
+    const V magnitude =
+        IfThenElse(x > Set(d, kThreshLinearToSRGB), poly, linear);
+    return Or(AndNot(kSign, magnitude), original_sign);
+  }
+
+ private:
+  static constexpr float kThreshSRGBToLinear = 0.04045f;
+  static constexpr float kThreshLinearToSRGB = 0.0031308f;
+  static constexpr float kLowDiv = 12.92f;
+  static constexpr float kLowDivInv = 1.0f / kLowDiv;
+};
+
+// Linear to sRGB conversion with error of at most 1.2e-4.
+template <typename D, typename V>
+V FastLinearToSRGB(D d, V v) {
+  const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+  const hwy::HWY_NAMESPACE::Rebind<int32_t, D> di;
+  // Convert to 0.25 - 0.5 range.
+  auto v025_05 =
+      BitCast(d, (BitCast(du, v) | Set(du, 0x3e800000)) & Set(du, 0x3effffff));
+  // third degree polynomial approximation between 0.25 and 0.5
+  // of 1.055/2^(7/2.4) * x^(1/2.4) * 0.5. A degree 4 polynomial only improves
+  // accuracy by about 3x.
+  auto d1 = MulAdd(v025_05, Set(d, 0.059914046f), Set(d, -0.108894556f));
+  auto d2 = MulAdd(d1, v025_05, Set(d, 0.107963754f));
+  auto pow = MulAdd(d2, v025_05, Set(d, 0.018092343f));
+  // Compute extra multiplier depending on exponent. Valid exponent range for
+  // [0.0031308f, 1.0) is 0...8 after subtracting 118.
+  // The next three constants contain a representation of the powers of
+  // 2**(1/2.4) = 2**(5/12) times two; in particular, bits from 26 to 31 are
+  // always the same and in k2to512powers_basebits, and the two arrays contain
+  // the next groups of 8 bits. This ends up being a 22-bit representation (with
+  // a mantissa of 13 bits). The choice of polynomial to approximate is such
+  // that the multiplication factor has the highest 5 bits constant, and that
+  // the factor for the lowest possible exponent is a power of two (thus making
+  // the additional bits 0, which is used to correctly merge back together the
+  // floats).
+  constexpr uint32_t k2to512powers_basebits = 0x40000000;
+  HWY_ALIGN constexpr uint8_t k2to512powers_25to18bits[16] = {
+      0x0,  0xa,  0x19, 0x26, 0x32, 0x41, 0x4d, 0x5c,
+      0x68, 0x75, 0x83, 0x8f, 0xa0, 0xaa, 0xb9, 0xc6,
+  };
+  HWY_ALIGN constexpr uint8_t k2to512powers_17to10bits[16] = {
+      0x0,  0xb7, 0x4,  0xd,  0xcb, 0xe7, 0x41, 0x68,
+      0x51, 0xd1, 0xeb, 0xf2, 0x0,  0xb7, 0x4,  0xd,
+  };
+  // Note that vld1q_s8_x2 on ARM seems to actually be slower.
+#if HWY_TARGET != HWY_SCALAR
+  using hwy::HWY_NAMESPACE::ShiftLeft;
+  using hwy::HWY_NAMESPACE::ShiftRight;
+  // Every lane of exp is now (if cast to byte) {0, 0, 0, <index for lookup>}.
+  auto exp = ShiftRight<23>(BitCast(di, v)) - Set(di, 118);
+  auto pow25to18bits = TableLookupBytes(
+      LoadDup128(di,
+                 reinterpret_cast<const int32_t*>(k2to512powers_25to18bits)),
+      exp);
+  auto pow17to10bits = TableLookupBytes(
+      LoadDup128(di,
+                 reinterpret_cast<const int32_t*>(k2to512powers_17to10bits)),
+      exp);
+  // Now, pow* contain {0, 0, 0, <part of float repr of multiplier>}. Here
+  // we take advantage of the fact that each table has its position 0 equal to
+  // 0.
+  // We can now just reassemble the float.
+  auto mul =
+      BitCast(d, ShiftLeft<18>(pow25to18bits) | ShiftLeft<10>(pow17to10bits) |
+                     Set(di, k2to512powers_basebits));
+#else
+  // Fallback for scalar.
+  uint32_t exp = ((BitCast(di, v).raw >> 23) - 118) & 0xf;
+  auto mul = BitCast(d, Set(di, (k2to512powers_25to18bits[exp] << 18) |
+                                    (k2to512powers_17to10bits[exp] << 10) |
+                                    k2to512powers_basebits));
+#endif
+  return IfThenElse(v < Set(d, 0.0031308f), v * Set(d, 12.92f),
+                    MulAdd(pow, mul, Set(d, -0.055)));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/transpose-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/transpose-inl.h
new file mode 100644
index 0000000000..d12b1295e8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/transpose-inl.h
@@ -0,0 +1,201 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Block transpose for DCT/IDCT
+
+#if defined(LIB_JXL_TRANSPOSE_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_TRANSPOSE_INL_H_
+#undef LIB_JXL_TRANSPOSE_INL_H_
+#else
+#define LIB_JXL_TRANSPOSE_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+#include <type_traits>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dct_block-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+#ifndef JXL_INLINE_TRANSPOSE
+// Workaround for issue #42 - (excessive?) inlining causes invalid codegen.
+#if defined(__arm__)
+#define JXL_INLINE_TRANSPOSE HWY_NOINLINE
+#else
+#define JXL_INLINE_TRANSPOSE HWY_INLINE
+#endif
+#endif  // JXL_INLINE_TRANSPOSE
+
+// Simple wrapper that ensures that a function will not be inlined.
+template <typename T, typename... Args>
+JXL_NOINLINE void NoInlineWrapper(const T& f, const Args&... args) {
+  return f(args...);
+}
+
+template <bool enabled>
+struct TransposeSimdTag {};
+
+// TODO(veluca): it's not super useful to have this in the SIMD namespace.
+template <size_t ROWS_or_0, size_t COLS_or_0, class From, class To>
+JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag<false>,
+                                                const From& from, const To& to,
+                                                size_t ROWSp, size_t COLSp) {
+  size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0;
+  size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0;
+  for (size_t n = 0; n < ROWS; ++n) {
+    for (size_t m = 0; m < COLS; ++m) {
+      to.Write(from.Read(n, m), m, n);
+    }
+  }
+}
+
+// TODO(veluca): AVX3?
+#if HWY_CAP_GE256
+constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) {
+  return ROWS % 8 == 0 && COLS % 8 == 0;
+}
+
+template <size_t ROWS_or_0, size_t COLS_or_0, class From, class To>
+JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag<true>,
+                                                const From& from, const To& to,
+                                                size_t ROWSp, size_t COLSp) {
+  size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0;
+  size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0;
+  static_assert(MaxLanes(BlockDesc<8>()) == 8, "Invalid descriptor size");
+  static_assert(ROWS_or_0 % 8 == 0, "Invalid number of rows");
+  static_assert(COLS_or_0 % 8 == 0, "Invalid number of columns");
+  for (size_t n = 0; n < ROWS; n += 8) {
+    for (size_t m = 0; m < COLS; m += 8) {
+      auto i0 = from.LoadPart(BlockDesc<8>(), n + 0, m + 0);
+      auto i1 = from.LoadPart(BlockDesc<8>(), n + 1, m + 0);
+      auto i2 = from.LoadPart(BlockDesc<8>(), n + 2, m + 0);
+      auto i3 = from.LoadPart(BlockDesc<8>(), n + 3, m + 0);
+      auto i4 = from.LoadPart(BlockDesc<8>(), n + 4, m + 0);
+      auto i5 = from.LoadPart(BlockDesc<8>(), n + 5, m + 0);
+      auto i6 = from.LoadPart(BlockDesc<8>(), n + 6, m + 0);
+      auto i7 = from.LoadPart(BlockDesc<8>(), n + 7, m + 0);
+      // Surprisingly, this straightforward implementation (24 cycles on port5)
+      // is faster than load128+insert and LoadDup128+ConcatUpperLower+blend.
+      const auto q0 = InterleaveLower(i0, i2);
+      const auto q1 = InterleaveLower(i1, i3);
+      const auto q2 = InterleaveUpper(i0, i2);
+      const auto q3 = InterleaveUpper(i1, i3);
+      const auto q4 = InterleaveLower(i4, i6);
+      const auto q5 = InterleaveLower(i5, i7);
+      const auto q6 = InterleaveUpper(i4, i6);
+      const auto q7 = InterleaveUpper(i5, i7);
+
+      const auto r0 = InterleaveLower(q0, q1);
+      const auto r1 = InterleaveUpper(q0, q1);
+      const auto r2 = InterleaveLower(q2, q3);
+      const auto r3 = InterleaveUpper(q2, q3);
+      const auto r4 = InterleaveLower(q4, q5);
+      const auto r5 = InterleaveUpper(q4, q5);
+      const auto r6 = InterleaveLower(q6, q7);
+      const auto r7 = InterleaveUpper(q6, q7);
+
+      i0 = ConcatLowerLower(r4, r0);
+      i1 = ConcatLowerLower(r5, r1);
+      i2 = ConcatLowerLower(r6, r2);
+      i3 = ConcatLowerLower(r7, r3);
+      i4 = ConcatUpperUpper(r4, r0);
+      i5 = ConcatUpperUpper(r5, r1);
+      i6 = ConcatUpperUpper(r6, r2);
+      i7 = ConcatUpperUpper(r7, r3);
+      to.StorePart(BlockDesc<8>(), i0, m + 0, n + 0);
+      to.StorePart(BlockDesc<8>(), i1, m + 1, n + 0);
+      to.StorePart(BlockDesc<8>(), i2, m + 2, n + 0);
+      to.StorePart(BlockDesc<8>(), i3, m + 3, n + 0);
+      to.StorePart(BlockDesc<8>(), i4, m + 4, n + 0);
+      to.StorePart(BlockDesc<8>(), i5, m + 5, n + 0);
+      to.StorePart(BlockDesc<8>(), i6, m + 6, n + 0);
+      to.StorePart(BlockDesc<8>(), i7, m + 7, n + 0);
+    }
+  }
+}
+#elif HWY_TARGET != HWY_SCALAR
+constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) {
+  return ROWS % 4 == 0 && COLS % 4 == 0;
+}
+
+template <size_t ROWS_or_0, size_t COLS_or_0, class From, class To>
+JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag<true>,
+                                                const From& from, const To& to,
+                                                size_t ROWSp, size_t COLSp) {
+  size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0;
+  size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0;
+  static_assert(MaxLanes(BlockDesc<4>()) == 4, "Invalid descriptor size");
+  static_assert(ROWS_or_0 % 4 == 0, "Invalid number of rows");
+  static_assert(COLS_or_0 % 4 == 0, "Invalid number of columns");
+  for (size_t n = 0; n < ROWS; n += 4) {
+    for (size_t m = 0; m < COLS; m += 4) {
+      const auto p0 = from.LoadPart(BlockDesc<4>(), n + 0, m + 0);
+      const auto p1 = from.LoadPart(BlockDesc<4>(), n + 1, m + 0);
+      const auto p2 = from.LoadPart(BlockDesc<4>(), n + 2, m + 0);
+      const auto p3 = from.LoadPart(BlockDesc<4>(), n + 3, m + 0);
+
+      const auto q0 = InterleaveLower(p0, p2);
+      const auto q1 = InterleaveLower(p1, p3);
+      const auto q2 = InterleaveUpper(p0, p2);
+      const auto q3 = InterleaveUpper(p1, p3);
+
+      const auto r0 = InterleaveLower(q0, q1);
+      const auto r1 = InterleaveUpper(q0, q1);
+      const auto r2 = InterleaveLower(q2, q3);
+      const auto r3 = InterleaveUpper(q2, q3);
+
+      to.StorePart(BlockDesc<4>(), r0, m + 0, n + 0);
+      to.StorePart(BlockDesc<4>(), r1, m + 1, n + 0);
+      to.StorePart(BlockDesc<4>(), r2, m + 2, n + 0);
+      to.StorePart(BlockDesc<4>(), r3, m + 3, n + 0);
+    }
+  }
+}
+#else
+constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) { return false; }
+#endif
+
+template <size_t N, size_t M, typename = void>
+struct Transpose {
+  template <typename From, typename To>
+  static void Run(const From& from, const To& to) {
+    // This does not guarantee anything, just saves from the most stupid
+    // mistakes.
+    JXL_DASSERT(from.Address(0, 0) != to.Address(0, 0));
+    TransposeSimdTag<TransposeUseSimd(N, M)> tag;
+    GenericTransposeBlock<N, M>(tag, from, to, N, M);
+  }
+};
+
+// Avoid inlining and unrolling transposes for large blocks.
+template <size_t N, size_t M>
+struct Transpose<
+    N, M, typename std::enable_if<(N >= 8 && M >= 8 && N * M >= 512)>::type> {
+  template <typename From, typename To>
+  static void Run(const From& from, const To& to) {
+    // This does not guarantee anything, just saves from the most stupid
+    // mistakes.
+    JXL_DASSERT(from.Address(0, 0) != to.Address(0, 0));
+    TransposeSimdTag<TransposeUseSimd(N, M)> tag;
+    constexpr void (*transpose)(TransposeSimdTag<TransposeUseSimd(N, M)>,
+                                const From&, const To&, size_t, size_t) =
+        GenericTransposeBlock<0, 0, From, To>;
+    NoInlineWrapper(transpose, tag, from, to, N, M);
+  }
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_TRANSPOSE_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/xorshift128plus-inl.h b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/xorshift128plus-inl.h
new file mode 100644
index 0000000000..6c1865181c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/xorshift128plus-inl.h
@@ -0,0 +1,88 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast but weak random generator.
+
+#if defined(LIB_JXL_XORSHIFT128PLUS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_XORSHIFT128PLUS_INL_H_
+#undef LIB_JXL_XORSHIFT128PLUS_INL_H_
+#else
+#define LIB_JXL_XORSHIFT128PLUS_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+
+// Adapted from https://github.com/vpxyz/xorshift/blob/master/xorshift128plus/
+// (MIT-license)
+class Xorshift128Plus {
+ public:
+  // 8 independent generators (= single iteration for AVX-512)
+  enum { N = 8 };
+
+  explicit HWY_MAYBE_UNUSED Xorshift128Plus(const uint64_t seed) {
+    // Init state using SplitMix64 generator
+    s0_[0] = SplitMix64(seed + 0x9E3779B97F4A7C15ull);
+    s1_[0] = SplitMix64(s0_[0]);
+    for (size_t i = 1; i < N; ++i) {
+      s0_[i] = SplitMix64(s1_[i - 1]);
+      s1_[i] = SplitMix64(s0_[i]);
+    }
+  }
+
+  HWY_INLINE HWY_MAYBE_UNUSED void Fill(uint64_t* HWY_RESTRICT random_bits) {
+#if HWY_CAP_INTEGER64
+    const HWY_FULL(uint64_t) d;
+    for (size_t i = 0; i < N; i += Lanes(d)) {
+      auto s1 = Load(d, s0_ + i);
+      const auto s0 = Load(d, s1_ + i);
+      const auto bits = s1 + s0;  // b, c
+      Store(s0, d, s0_ + i);
+      s1 ^= ShiftLeft<23>(s1);
+      Store(bits, d, random_bits + i);
+      s1 ^= s0 ^ ShiftRight<18>(s1) ^ ShiftRight<5>(s0);
+      Store(s1, d, s1_ + i);
+    }
+#else
+    for (size_t i = 0; i < N; ++i) {
+      auto s1 = s0_[i];
+      const auto s0 = s1_[i];
+      const auto bits = s1 + s0;  // b, c
+      s0_[i] = s0;
+      s1 ^= s1 << 23;
+      random_bits[i] = bits;
+      s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+      s1_[i] = s1;
+    }
+#endif
+  }
+
+ private:
+  static uint64_t SplitMix64(uint64_t z) {
+    z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
+    z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
+    return z ^ (z >> 31);
+  }
+
+  HWY_ALIGN uint64_t s0_[N];
+  HWY_ALIGN uint64_t s1_[N];
+};
+
+}  // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif  // LIB_JXL_XORSHIFT128PLUS_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/jxl/xorshift128plus_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/xorshift128plus_test.cc
new file mode 100644
index 0000000000..f86f921906
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/jxl/xorshift128plus_test.cc
@@ -0,0 +1,372 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/xorshift128plus_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/xorshift128plus-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::ShiftRight;
+
+// Define to nonzero in order to print the (new) golden outputs.
+#define PRINT_RESULTS 0
+
+const size_t kVectors = 64;
+
+#if PRINT_RESULTS
+
+template <int kNumLanes>
+void Print(const uint64_t (&result)[kNumLanes]) {
+  printf("{ ");
+  for (int i = 0; i < kNumLanes; ++i) {
+    if (i != 0) {
+      printf(", ");
+    }
+    printf("0x%016llXull", result[i]);
+  }
+  printf("},\n");
+}
+
+#else  // PRINT_RESULTS
+
+const uint64_t kExpected[kVectors][Xorshift128Plus::N] = {
+    {0x6E901576D477CBB1ull, 0xE9E53789195DA2A2ull, 0xB681F6DDA5E0AE99ull,
+     0x8EFD18CE21FD6896ull, 0xA898A80DF75CF532ull, 0x50CEB2C9E2DE7E32ull,
+     0x3CA7C2FEB25C0DD0ull, 0xA4D0866B80B4D836ull},
+    {0x8CD6A1E6233D3A26ull, 0x3D4603ADE98B112Dull, 0xDC427AF674019E36ull,
+     0xE28B4D230705AC53ull, 0x7297E9BBA88783DDull, 0x34D3D23CFCD9B41Aull,
+     0x5A223615ADBE96B8ull, 0xE5EB529027CFBD01ull},
+    {0xC1894CF00DFAC6A2ull, 0x18EDF8AE9085E404ull, 0x8E936625296B4CCDull,
+     0x31971EF3A14A899Bull, 0xBE87535FCE0BF26Aull, 0x576F7A752BC6649Full,
+     0xA44CBADCE0C6B937ull, 0x3DBA819BB17A353Aull},
+    {0x27CE38DFCC1C5EB6ull, 0x920BEB5606340256ull, 0x3986CBC40C9AFC2Cull,
+     0xE22BCB3EEB1E191Eull, 0x6E1FCDD3602A8FBAull, 0x052CB044E5415A29ull,
+     0x46266646EFB9ECD7ull, 0x8F44914618D29335ull},
+    {0xDD30AEDF72A362C5ull, 0xBC1D824E16BB98F4ull, 0x9EA6009C2AA3D2F1ull,
+     0xF65C0FBBE17AF081ull, 0x22424D06A8738991ull, 0x8A62763F2B7611D2ull,
+     0x2F3E89F722637939ull, 0x84D338BEF50AFD50ull},
+    {0x00F46494898E2B0Bull, 0x81239DC4FB8E8003ull, 0x414AD93EC5773FE7ull,
+     0x791473C450E4110Full, 0x87F127BF68C959ACull, 0x6429282D695EF67Bull,
+     0x661082E11546CBA8ull, 0x5815D53FA5436BFDull},
+    {0xB3DEADAB9BE6E0F9ull, 0xAA1B7B8F7CED0202ull, 0x4C5ED437699D279Eull,
+     0xA4471727F1CB39D3ull, 0xE439DA193F802F70ull, 0xF89401BB04FA6493ull,
+     0x3B08045A4FE898BAull, 0x32137BFE98227950ull},
+    {0xFBAE4A092897FEF3ull, 0x0639F6CE56E71C8Eull, 0xF0AD6465C07F0C1Eull,
+     0xFF8E28563361DCE5ull, 0xC2013DB7F86BC6B9ull, 0x8EFCC0503330102Full,
+     0x3F6B767EA5C4DA40ull, 0xB9864B950B2232E1ull},
+    {0x76EB58DE8E5EC22Aull, 0x9BBBF49A18B32F4Full, 0xC8405F02B2B2FAB9ull,
+     0xC3E122A5F146BC34ull, 0xC90BB046660F5765ull, 0xB933981310DBECCFull,
+     0x5A2A7BFC9126FD1Cull, 0x8BB388C94DF87901ull},
+    {0x753EB89AD63EF3C3ull, 0xF24AAF40C89D65ADull, 0x23F68931C1A6AA6Dull,
+     0xF47E79BF702C6DD0ull, 0xA3AD113244EE7EAEull, 0xD42CBEA28F793DC3ull,
+     0xD896FCF1820F497Cull, 0x042B86D2818948C1ull},
+    {0x8F2A4FC5A4265763ull, 0xEC499E6F95EAA10Cull, 0xE3786D4ECCD0DEB5ull,
+     0xC725C53D3AC4CC43ull, 0x065A4ACBBF83610Eull, 0x35C61C9FEF167129ull,
+     0x7B720AEAA7D70048ull, 0x14206B841377D039ull},
+    {0xAD27D78BF96055F6ull, 0x5F43B20FF47ADCD4ull, 0xE184C2401E2BF71Eull,
+     0x30B263D78990045Dull, 0xC22F00EBFF9BA201ull, 0xAE7F86522B53A562ull,
+     0x2853312BC039F0A4ull, 0x868D619E6549C3C8ull},
+    {0xFD5493D8AE9A8371ull, 0x773D5E224DF61B3Bull, 0x5377C54FBB1A8280ull,
+     0xCAD4DE3B8265CAFAull, 0xCDF3F19C91EBD5F6ull, 0xC8EA0F182D73BD78ull,
+     0x220502D593433FF1ull, 0xB81205E612DC31B1ull},
+    {0x8F32A39EAEDA4C70ull, 0x1D4B0914AA4DAC7Full, 0x56EF1570F3A8B405ull,
+     0x29812CB17404A592ull, 0x97A2AAF69CAE90F2ull, 0x12BF5E02778BBFE5ull,
+     0x9D4B55AD42A05FD2ull, 0x06C2BAB5E6086620ull},
+    {0x8DB4B9648302B253ull, 0xD756AD9E3AEA12C7ull, 0x68709B7F11D4B188ull,
+     0x7CC299DDCD707A4Bull, 0x97B860C370A7661Dull, 0xCECD314FC20E64F5ull,
+     0x55F412CDFB4C7EC3ull, 0x55EE97591193B525ull},
+    {0xCF70F3ACA96E6254ull, 0x022FEDECA2E09F46ull, 0x686823DB60AE1ECFull,
+     0xFD36190D3739830Eull, 0x74E1C09027F68120ull, 0xB5883A835C093842ull,
+     0x93E1EFB927E9E4E3ull, 0xB2721E249D7E5EBEull},
+    {0x69B6E21C44188CB8ull, 0x5D6CFB853655A7AAull, 0x3E001A0B425A66DCull,
+     0x8C57451103A5138Full, 0x7BF8B4BE18EAB402ull, 0x494102EB8761A365ull,
+     0xB33796A9F6A81F0Eull, 0x10005AB3BCCFD960ull},
+    {0xB2CF25740AE965DCull, 0x6F7C1DF7EF53D670ull, 0x648DD6087AC2251Eull,
+     0x040955D9851D487Dull, 0xBD550FC7E21A7F66ull, 0x57408F484DEB3AB5ull,
+     0x481E24C150B506C1ull, 0x72C0C3EAF91A40D6ull},
+    {0x1997A481858A5D39ull, 0x539718F4BEF50DC1ull, 0x2EC4DC4787E7E368ull,
+     0xFF1CE78879419845ull, 0xE219A93DD6F6DD30ull, 0x85328618D02FEC1Aull,
+     0xC86E02D969181B20ull, 0xEBEC8CD8BBA34E6Eull},
+    {0x28B55088A16CE947ull, 0xDD25AC11E6350195ull, 0xBD1F176694257B1Cull,
+     0x09459CCF9FCC9402ull, 0xF8047341E386C4E4ull, 0x7E8E9A9AD984C6C0ull,
+     0xA4661E95062AA092ull, 0x70A9947005ED1152ull},
+    {0x4C01CF75DBE98CCDull, 0x0BA076CDFC7373B9ull, 0x6C5E7A004B57FB59ull,
+     0x336B82297FD3BC56ull, 0x7990C0BE74E8D60Full, 0xF0275CC00EC5C8C8ull,
+     0x6CF29E682DFAD2E9ull, 0xFA4361524BD95D72ull},
+    {0x631D2A19FF62F018ull, 0x41C43863B985B3FAull, 0xE052B2267038EFD9ull,
+     0xE2A535FAC575F430ull, 0xE004EEA90B1FF5B8ull, 0x42DFE2CA692A1F26ull,
+     0x90FB0BFC9A189ECCull, 0x4484102BD3536BD0ull},
+    {0xD027134E9ACCA5A5ull, 0xBBAB4F966D476A9Bull, 0x713794A96E03D693ull,
+     0x9F6335E6B94CD44Aull, 0xC5090C80E7471617ull, 0x6D9C1B0C87B58E33ull,
+     0x1969CE82E31185A5ull, 0x2099B97E87754EBEull},
+    {0x60EBAF4ED934350Full, 0xC26FBF0BA5E6ECFFull, 0x9E54150F0312EC57ull,
+     0x0973B48364ED0041ull, 0x800A523241426CFCull, 0x03AB5EC055F75989ull,
+     0x8CF315935DEEB40Aull, 0x83D3FC0190BD1409ull},
+    {0x26D35394CF720A51ull, 0xCE9EAA15243CBAFEull, 0xE2B45FBAF21B29E0ull,
+     0xDB92E98EDE73F9E0ull, 0x79B16F5101C26387ull, 0x1AC15959DE88C86Full,
+     0x387633AEC6D6A580ull, 0xA6FC05807BFC5EB8ull},
+    {0x2D26C8E47C6BADA9ull, 0x820E6EC832D52D73ull, 0xB8432C3E0ED0EE5Bull,
+     0x0F84B3C4063AAA87ull, 0xF393E4366854F651ull, 0x749E1B4D2366A567ull,
+     0x805EACA43480D004ull, 0x244EBF3AA54400A5ull},
+    {0xBFDC3763AA79F75Aull, 0x9E3A74CC751F41DBull, 0xF401302A149DBC55ull,
+     0x6B25F7973D7BF7BCull, 0x13371D34FDBC3DAEull, 0xC5E1998C8F484DCDull,
+     0x7031B8AE5C364464ull, 0x3847F0C4F3DA2C25ull},
+    {0x24C6387D2C0F1225ull, 0x77CCE960255C67A4ull, 0x21A0947E497B10EBull,
+     0xBB5DB73A825A9D7Eull, 0x26294A41999E553Dull, 0x3953E0089F87D925ull,
+     0x3DAE6E5D4E5EAAFEull, 0x74B545460341A7AAull},
+    {0x710E5EB08A7DB820ull, 0x7E43C4E77CAEA025ull, 0xD4C91529C8B060C1ull,
+     0x09AE26D8A7B0CA29ull, 0xAB9F356BB360A772ull, 0xB68834A25F19F6E9ull,
+     0x79B8D9894C5734E2ull, 0xC6847E7C8FFD265Full},
+    {0x10C4BCB06A5111E6ull, 0x57CB50955B6A2516ull, 0xEF53C87798B6995Full,
+     0xAB38E15BBD8D0197ull, 0xA51C6106EFF73C93ull, 0x83D7F0E2270A7134ull,
+     0x0923FD330397FCE5ull, 0xF9DE54EDFE58FB45ull},
+    {0x07D44833ACCD1A94ull, 0xAAD3C9E945E2F9F3ull, 0xABF4C879B876AA37ull,
+     0xF29C69A21B301619ull, 0x2DDCE959111C788Bull, 0x7CEDB48F8AC1729Bull,
+     0x93F3BA9A02B659BEull, 0xF20A87FF17933CBEull},
+    {0x8E96EBE93180CFE6ull, 0x94CAA12873937079ull, 0x05F613D9380D4189ull,
+     0xBCAB40C1DC79F38Aull, 0x0AD8907B7C61D19Eull, 0x88534E189D103910ull,
+     0x2DB2FAABA160AB8Full, 0xA070E7506B06F15Cull},
+    {0x6FB1FCDAFFEF87A9ull, 0xE735CF25337A090Dull, 0x172C6EDCEFEF1825ull,
+     0x76957EA49EF0542Dull, 0x819BF4CD250F7C49ull, 0xD6FF23E4AD00C4D4ull,
+     0xE79673C1EC358FF0ull, 0xAC9C048144337938ull},
+    {0x4C5387FF258B3AF4ull, 0xEDB68FAEC2CB1AA3ull, 0x02A624E67B4E1DA4ull,
+     0x5C44797A38E08AF2ull, 0x36546A70E9411B4Bull, 0x47C17B24D2FD9675ull,
+     0x101957AAA020CA26ull, 0x47A1619D4779F122ull},
+    {0xF84B8BCDC92D9A3Cull, 0x951D7D2C74B3066Bull, 0x7AC287C06EDDD9B2ull,
+     0x4C38FC476608D38Full, 0x224D793B19CB4BCDull, 0x835A255899BF1A41ull,
+     0x4AD250E9F62DB4ABull, 0xD9B44F4B58781096ull},
+    {0xABBAF99A8EB5C6B8ull, 0xFB568E900D3A9F56ull, 0x11EDF63D23C5DF11ull,
+     0xA9C3011D3FA7C5A8ull, 0xAEDD3CF11AFFF725ull, 0xABCA472B5F1EDD6Bull,
+     0x0600B6BB5D879804ull, 0xDB4DE007F22191A0ull},
+    {0xD76CC9EFF0CE9392ull, 0xF5E0A772B59BA49Aull, 0x7D1AE1ED0C1261B5ull,
+     0x79224A33B5EA4F4Aull, 0x6DD825D80C40EA60ull, 0x47FC8E747E51C953ull,
+     0x695C05F72888BF98ull, 0x1A012428440B9015ull},
+    {0xD754DD61F9B772BFull, 0xC4A2FCF4C0F9D4EBull, 0x461167CDF67A24A2ull,
+     0x434748490EBCB9D4ull, 0x274DD9CDCA5781DEull, 0x36BAC63BA9A85209ull,
+     0x30324DAFDA36B70Full, 0x337570DB4FE6DAB3ull},
+    {0xF46CBDD57C551546ull, 0x8E02507E676DA3E3ull, 0xD826245A8C15406Dull,
+     0xDFB38A5B71113B72ull, 0x5EA38454C95B16B5ull, 0x28C054FB87ABF3E1ull,
+     0xAA2724C0BA1A8096ull, 0xECA83EC980304F2Full},
+    {0x6AA76EC294EB3303ull, 0x42D4CDB2A8032E3Bull, 0x7999EDF75DCD8735ull,
+     0xB422BFFE696CCDCCull, 0x8F721461FD7CCDFEull, 0x148E1A5814FDE253ull,
+     0x4DC941F4375EF8FFull, 0x27B2A9E0EB5B49CFull},
+    {0xCEA592EF9343EBE1ull, 0xF7D38B5FA7698903ull, 0x6CCBF352203FEAB6ull,
+     0x830F3095FCCDA9C5ull, 0xDBEEF4B81B81C8F4ull, 0x6D7EB9BCEECA5CF9ull,
+     0xC58ABB0FBE436C69ull, 0xE4B97E6DB2041A4Bull},
+    {0x7E40FC772978AF14ull, 0xCDDA4BBAE28354A1ull, 0xE4F993B832C32613ull,
+     0xD3608093C68A4B35ull, 0x9A3B60E01BEE3699ull, 0x03BEF248F3288713ull,
+     0x70B9294318F3E9B4ull, 0x8D2ABB913B8610DEull},
+    {0x37F209128E7D8B2Cull, 0x81D2AB375BD874BCull, 0xA716A1B7373F7408ull,
+     0x0CEE97BEC4706540ull, 0xA40C5FD9CDBC1512ull, 0x73CAF6C8918409E7ull,
+     0x45E11BCEDF0BBAA1ull, 0x612C612BFF6E6605ull},
+    {0xF8ECB14A12D0F649ull, 0xDA683CD7C01BA1ACull, 0xA2203F7510E124C1ull,
+     0x7F83E52E162F3C78ull, 0x77D2BB73456ACADBull, 0x37FC34FC840BBA6Full,
+     0x3076BC7D4C6EBC1Full, 0x4F514123632B5FA9ull},
+    {0x44D789DED935E884ull, 0xF8291591E09FEC9Full, 0xD9CED2CF32A2E4B7ull,
+     0x95F70E1EB604904Aull, 0xDE438FE43C14F6ABull, 0x4C8D23E4FAFCF8D8ull,
+     0xC716910A3067EB86ull, 0x3D6B7915315095D3ull},
+    {0x3170FDBADAB92095ull, 0x8F1963933FC5650Bull, 0x72F94F00ABECFEABull,
+     0x6E3AE826C6AAB4CEull, 0xA677A2BF31068258ull, 0x9660CDC4F363AF10ull,
+     0xD81A15A152379EF1ull, 0x5D7D285E1080A3F9ull},
+    {0xDAD5DDFF9A2249B3ull, 0x6F9721D926103FAEull, 0x1418CBB83FFA349Aull,
+     0xE71A30AD48C012B2ull, 0xBE76376C63751132ull, 0x3496467ACA713AE6ull,
+     0x8D7EC01369F991A3ull, 0xD8C73A88B96B154Eull},
+    {0x8B5D9C74AEB4833Aull, 0xF914FB3F867B912Full, 0xB894EA034936B1DCull,
+     0x8A16D21BE51C4F5Bull, 0x31FF048ED582D98Eull, 0xB95AB2F4DC65B820ull,
+     0x04082B9170561AF7ull, 0xA215610A5DC836FAull},
+    {0xB2ADE592C092FAACull, 0x7A1E683BCBF13294ull, 0xC7A4DBF86858C096ull,
+     0x3A49940F97BFF316ull, 0xCAE5C06B82C46703ull, 0xC7F413A0F951E2BDull,
+     0x6665E7BB10EB5916ull, 0x86F84A5A94EDE319ull},
+    {0x4EA199D8FAA79CA3ull, 0xDFA26E5BF1981704ull, 0x0F5E081D37FA4E01ull,
+     0x9CB632F89CD675CDull, 0x4A09DB89D48C0304ull, 0x88142742EA3C7672ull,
+     0xAC4F149E6D2E9BDBull, 0x6D9E1C23F8B1C6C6ull},
+    {0xD58BE47B92DEC0E9ull, 0x8E57573645E34328ull, 0x4CC094CCB5FB5126ull,
+     0x5F1D66AF6FB40E3Cull, 0x2BA15509132D3B00ull, 0x0D6545646120E567ull,
+     0x3CF680C45C223666ull, 0x96B28E32930179DAull},
+    {0x5900C45853AC7990ull, 0x61881E3E8B7FF169ull, 0x4DE5F835DF2230FFull,
+     0x4427A9E7932F73FFull, 0x9B641BAD379A8C8Dull, 0xDF271E5BF98F4E5Cull,
+     0xDFDA16DB830FF5EEull, 0x371C7E7CFB89C0E9ull},
+    {0x4410A8576247A250ull, 0x6AD2DA12B45AC0D9ull, 0x18DFC72AAC85EECCull,
+     0x06FC8BB2A0EF25C8ull, 0xEB287619C85E6118ull, 0x19553ECA67F25A2Cull,
+     0x3B9557F1DCEC5BAAull, 0x7BAD9E8B710D1079ull},
+    {0x34F365D66BD22B28ull, 0xE6E124B9F10F835Dull, 0x0573C38ABF2B24DCull,
+     0xD32E6AF10A0125AEull, 0x383590ACEA979519ull, 0x8376ED7A39E28205ull,
+     0xF0B7F184DCBDA435ull, 0x062A203390E31794ull},
+    {0xA2AFFD7E41918760ull, 0x7F90FC1BD0819C86ull, 0x5033C08E5A969533ull,
+     0x2707AF5C6D039590ull, 0x57BBD5980F17DF9Cull, 0xD3FE6E61D763268Aull,
+     0x9E0A0AE40F335A3Bull, 0x43CF4EB0A99613C5ull},
+    {0xD4D2A397CE1A7C2Eull, 0x3DF7CE7CC3212DADull, 0x0880F0D5D356C75Aull,
+     0xA8AFC44DD03B1346ull, 0x79263B46C13A29E0ull, 0x11071B3C0ED58E7Aull,
+     0xED46DC9F538406BFull, 0x2C94974F2B94843Dull},
+    {0xE246E13C39AB5D5Eull, 0xAC1018489D955B20ull, 0x8601B558771852B8ull,
+     0x110BD4C06DB40173ull, 0x738FC8A18CCA0EBBull, 0x6673E09BE0EA76E5ull,
+     0x024BC7A0C7527877ull, 0x45E6B4652E2EC34Eull},
+    {0xD1ED26A1A375CDC8ull, 0xAABC4E896A617CB8ull, 0x0A9C9E8E57D753C6ull,
+     0xA3774A75FEB4C30Eull, 0x30B816C01C93E49Eull, 0xF405BABC06D2408Cull,
+     0xCC0CE6B4CE788ABCull, 0x75E7922D0447956Cull},
+    {0xD07C1676A698BC95ull, 0x5F9AEA4840E2D860ull, 0xD5FC10D58BDF6F02ull,
+     0xF190A2AD4BC2EEA7ull, 0x0C24D11F51726931ull, 0xDB646899A16B6512ull,
+     0x7BC10670047B1DD8ull, 0x2413A5ABCD45F092ull},
+    {0x4E66892190CFD923ull, 0xF10162440365EC8Eull, 0x158ACA5A6A2280AEull,
+     0x0D60ED11C0224166ull, 0x7CD2E9A71B9D7488ull, 0x450D7289706AB2A3ull,
+     0x88FAE34EC9A0D7DCull, 0x96FF9103575A97DAull},
+    {0x77990FAC6046C446ull, 0xB174B5FB30C76676ull, 0xE352CE3EB56CF82Aull,
+     0xC6039B6873A9A082ull, 0xE3F80F3AE333148Aull, 0xB853BA24BA3539B9ull,
+     0xE8863E52ECCB0C74ull, 0x309B4CC1092CC245ull},
+    {0xBC2B70BEE8388D9Full, 0xE48D92AE22216DCEull, 0xF15F3BF3E2C15D8Full,
+     0x1DD964D4812D8B24ull, 0xD56AF02FB4665E4Cull, 0x98002200595BD9A3ull,
+     0x049246D50BB8FA12ull, 0x1B542DF485B579B9ull},
+    {0x2347409ADFA8E497ull, 0x36015C2211D62498ull, 0xE9F141F32EB82690ull,
+     0x1F839912D0449FB9ull, 0x4E4DCFFF2D02D97Cull, 0xF8A03AB4C0F625C9ull,
+     0x0605F575795DAC5Cull, 0x4746C9BEA0DDA6B1ull},
+    {0xCA5BB519ECE7481Bull, 0xFD496155E55CA945ull, 0xF753B9DBB1515F81ull,
+     0x50549E8BAC0F70E7ull, 0x8614FB0271E21C60ull, 0x60C72947EB0F0070ull,
+     0xA6511C10AEE742B6ull, 0x48FB48F2CACCB43Eull}};
+
+#endif  // PRINT_RESULTS
+
+// Ensures Xorshift128+ returns consistent and unchanging values.
+void TestGolden() {
+  HWY_ALIGN Xorshift128Plus rng(12345);
+  for (uint64_t vector = 0; vector < kVectors; ++vector) {
+    HWY_ALIGN uint64_t lanes[Xorshift128Plus::N];
+    rng.Fill(lanes);
+#if PRINT_RESULTS
+    Print(lanes);
+#else
+    for (size_t i = 0; i < Xorshift128Plus::N; ++i) {
+      ASSERT_EQ(kExpected[vector][i], lanes[i])
+          << "Where vector=" << vector << " i=" << i;
+    }
+#endif
+  }
+}
+
+// Output changes when given different seeds
+void TestSeedChanges() {
+  HWY_ALIGN uint64_t lanes[Xorshift128Plus::N];
+
+  std::vector<uint64_t> first;
+  constexpr size_t kNumSeeds = 16384;
+  first.reserve(kNumSeeds);
+
+  // All 14-bit seeds
+  for (size_t seed = 0; seed < kNumSeeds; ++seed) {
+    HWY_ALIGN Xorshift128Plus rng(seed);
+
+    rng.Fill(lanes);
+    first.push_back(lanes[0]);
+  }
+
+  // All outputs are unique
+  ASSERT_EQ(kNumSeeds, first.size());
+  std::sort(first.begin(), first.end());
+  first.erase(std::unique(first.begin(), first.end()), first.end());
+  EXPECT_EQ(kNumSeeds, first.size());
+}
+
+void TestFloat() {
+  ThreadPoolInternal pool(8);
+
+#ifdef JXL_DISABLE_SLOW_TESTS
+  const uint32_t kMaxSeed = 2048;
+#else   // JXL_DISABLE_SLOW_TESTS
+  const uint32_t kMaxSeed = 16384;  // All 14-bit seeds
+#endif  // JXL_DISABLE_SLOW_TESTS
+  pool.Run(0, kMaxSeed, ThreadPool::SkipInit(),
+           [](const int seed, const int /*thread*/) {
+             HWY_ALIGN Xorshift128Plus rng(seed);
+
+             const HWY_FULL(uint32_t) du;
+             const HWY_FULL(float) df;
+             HWY_ALIGN uint64_t batch[Xorshift128Plus::N];
+             HWY_ALIGN float lanes[MaxLanes(df)];
+             double sum = 0.0;
+             size_t count = 0;
+             const size_t kReps = 2000;
+             for (size_t reps = 0; reps < kReps; ++reps) {
+               rng.Fill(batch);
+               for (size_t i = 0; i < Xorshift128Plus::N * 2; i += Lanes(df)) {
+                 const auto bits =
+                     Load(du, reinterpret_cast<const uint32_t*>(batch) + i);
+                 // 1.0 + 23 random mantissa bits = [1, 2)
+                 const auto rand12 =
+                     BitCast(df, ShiftRight<9>(bits) | Set(du, 0x3F800000));
+                 const auto rand01 = rand12 - Set(df, 1.0f);
+                 Store(rand01, df, lanes);
+                 for (float lane : lanes) {
+                   sum += lane;
+                   count += 1;
+                   EXPECT_LE(lane, 1.0f);
+                   EXPECT_GE(lane, 0.0f);
+                 }
+               }
+             }
+
+             // Verify average (uniform distribution)
+             EXPECT_NEAR(0.5, sum / count, 0.00702);
+           });
+}
+
+// Not more than one 64-bit zero
+void TestNotZero() {
+  ThreadPoolInternal pool(8);
+
+#ifdef JXL_DISABLE_SLOW_TESTS
+  const uint32_t kMaxSeed = 500;
+#else   // JXL_DISABLE_SLOW_TESTS
+  const uint32_t kMaxSeed = 2000;
+#endif  // JXL_DISABLE_SLOW_TESTS
+  pool.Run(0, kMaxSeed, ThreadPool::SkipInit(),
+           [](const int task, const int /*thread*/) {
+             HWY_ALIGN uint64_t lanes[Xorshift128Plus::N];
+
+             HWY_ALIGN Xorshift128Plus rng(task);
+             size_t num_zero = 0;
+             for (size_t vectors = 0; vectors < 10000; ++vectors) {
+               rng.Fill(lanes);
+               for (uint64_t lane : lanes) {
+                 num_zero += static_cast<size_t>(lane == 0);
+               }
+             }
+             EXPECT_LE(num_zero, 1);
+           });
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class Xorshift128Test : public hwy::TestWithParamTarget {};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(Xorshift128Test);
+
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestNotZero);
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestGolden);
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestSeedChanges);
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestFloat);
+
+}  // namespace jxl
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/profiler/profiler.cc b/codec/L2/demos/jxlEnc/third_partys/lib/profiler/profiler.cc
new file mode 100644
index 0000000000..d21ee098f8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/profiler/profiler.cc
@@ -0,0 +1,459 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/profiler.h"
+
+#if PROFILER_ENABLED
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>  // memcpy
+
+#include <algorithm>  // sort
+#include <atomic>
+#include <cinttypes>  // PRIu64
+#include <hwy/cache_control.h>
+#include <new>
+
+#include "lib/jxl/base/robust_statistics.h"  // HalfSampleMode
+
+// Optionally use SIMD in StreamCacheLine if available.
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/profiler/profiler.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace profiler {
+namespace HWY_NAMESPACE {
+
+// Overwrites `to` without loading it into cache (read-for-ownership).
+// Copies 64 bytes from/to naturally aligned addresses.
+void StreamCacheLine(const Packet* HWY_RESTRICT from, Packet* HWY_RESTRICT to) {
+#if HWY_TARGET == HWY_SCALAR
+  hwy::CopyBytes<64>(from, to);
+#else
+  const HWY_CAPPED(uint64_t, 2) d;
+  HWY_FENCE;
+  const uint64_t* HWY_RESTRICT from64 = reinterpret_cast<const uint64_t*>(from);
+  const auto v0 = Load(d, from64 + 0);
+  const auto v1 = Load(d, from64 + 2);
+  const auto v2 = Load(d, from64 + 4);
+  const auto v3 = Load(d, from64 + 6);
+  // Fences prevent the compiler from reordering loads/stores, which may
+  // interfere with write-combining.
+  HWY_FENCE;
+  uint64_t* HWY_RESTRICT to64 = reinterpret_cast<uint64_t*>(to);
+  Stream(v0, d, to64 + 0);
+  Stream(v1, d, to64 + 2);
+  Stream(v2, d, to64 + 4);
+  Stream(v3, d, to64 + 6);
+  HWY_FENCE;
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace profiler
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace profiler {
+
+HWY_EXPORT(StreamCacheLine);
+
+namespace {
+
+// How many mebibytes to allocate (if PROFILER_ENABLED) per thread that
+// enters at least one zone. Once this buffer is full, the thread will analyze
+// packets (two per zone), which introduces observer overhead.
+#ifndef PROFILER_THREAD_STORAGE
+#define PROFILER_THREAD_STORAGE 32ULL
+#endif
+
+#define PROFILER_PRINT_OVERHEAD 0
+
+// Upper bounds for fixed-size data structures (guarded via HWY_ASSERT):
+constexpr size_t kMaxDepth = 64;   // Maximum nesting of zones.
+constexpr size_t kMaxZones = 256;  // Total number of zones.
+
+// Stack of active (entered but not exited) zones. POD, uninitialized.
+// Used to deduct child duration from the parent's self time.
+struct ActiveZone {
+  const char* name;
+  uint64_t entry_timestamp;
+  uint64_t child_total;
+};
+
+// Totals for all Zones with the same name. POD, must be zero-initialized.
+struct ZoneTotals {
+  uint64_t total_duration;
+  const char* name;
+  uint64_t num_calls;
+};
+
+template <typename T>
+inline T ClampedSubtract(const T minuend, const T subtrahend) {
+  if (subtrahend > minuend) {
+    return 0;
+  }
+  return minuend - subtrahend;
+}
+
+}  // namespace
+
+// Per-thread call graph (stack) and ZoneTotals for each zone.
+class Results {
+ public:
+  Results() {
+    // Zero-initialize all accumulators (avoids a check for num_zones_ == 0).
+    memset(zones_, 0, sizeof(zones_));
+  }
+
+  // Used for computing overhead when this thread encounters its first Zone.
+  // This has no observable effect apart from increasing "analyze_elapsed_".
+  uint64_t ZoneDuration(const Packet* packets) {
+    HWY_ASSERT(depth_ == 0);
+    HWY_ASSERT(num_zones_ == 0);
+    AnalyzePackets(packets, 2);
+    const uint64_t duration = zones_[0].total_duration;
+    zones_[0].num_calls = 0;
+    zones_[0].total_duration = 0;
+    HWY_ASSERT(depth_ == 0);
+    num_zones_ = 0;
+    return duration;
+  }
+
+  void SetSelfOverhead(const uint64_t self_overhead) {
+    self_overhead_ = self_overhead;
+  }
+
+  void SetChildOverhead(const uint64_t child_overhead) {
+    child_overhead_ = child_overhead;
+  }
+
+  // Draw all required information from the packets, which can be discarded
+  // afterwards. Called whenever this thread's storage is full.
+  void AnalyzePackets(const Packet* HWY_RESTRICT packets,
+                      const size_t num_packets) {
+    // Ensures prior weakly-ordered streaming stores are globally visible.
+    hwy::StoreFence();
+
+    const uint64_t t0 = TicksBefore();
+
+    for (size_t i = 0; i < num_packets; ++i) {
+      const uint64_t timestamp = packets[i].timestamp;
+      // Entering a zone
+      if (packets[i].name != nullptr) {
+        HWY_ASSERT(depth_ < kMaxDepth);
+        zone_stack_[depth_].name = packets[i].name;
+        zone_stack_[depth_].entry_timestamp = timestamp;
+        zone_stack_[depth_].child_total = 0;
+        ++depth_;
+        continue;
+      }
+
+      HWY_ASSERT(depth_ != 0);
+      const ActiveZone& active = zone_stack_[depth_ - 1];
+      const uint64_t duration = timestamp - active.entry_timestamp;
+      const uint64_t self_duration = ClampedSubtract(
+          duration, self_overhead_ + child_overhead_ + active.child_total);
+
+      UpdateOrAdd(active.name, 1, self_duration);
+      --depth_;
+
+      // "Deduct" the nested time from its parent's self_duration.
+      if (depth_ != 0) {
+        zone_stack_[depth_ - 1].child_total += duration + child_overhead_;
+      }
+    }
+
+    const uint64_t t1 = TicksAfter();
+    analyze_elapsed_ += t1 - t0;
+  }
+
+  // Incorporates results from another thread. Call after all threads have
+  // exited any zones.
+  void Assimilate(const Results& other) {
+    const uint64_t t0 = TicksBefore();
+    HWY_ASSERT(depth_ == 0);
+    HWY_ASSERT(other.depth_ == 0);
+
+    for (size_t i = 0; i < other.num_zones_; ++i) {
+      const ZoneTotals& zone = other.zones_[i];
+      UpdateOrAdd(zone.name, zone.num_calls, zone.total_duration);
+    }
+    const uint64_t t1 = TicksAfter();
+    analyze_elapsed_ += t1 - t0 + other.analyze_elapsed_;
+  }
+
+  // Single-threaded.
+  void Print() {
+    const uint64_t t0 = TicksBefore();
+    MergeDuplicates();
+
+    // Sort by decreasing total (self) cost.
+    std::sort(zones_, zones_ + num_zones_,
+              [](const ZoneTotals& r1, const ZoneTotals& r2) {
+                return r1.total_duration > r2.total_duration;
+              });
+
+    uint64_t total_visible_duration = 0;
+    for (size_t i = 0; i < num_zones_; ++i) {
+      const ZoneTotals& r = zones_[i];
+      if (r.name[0] != '@') {
+        total_visible_duration += r.total_duration;
+        printf("%-40s: %10" PRIu64 " x %15" PRIu64 "= %15" PRIu64 "\n", r.name,
+               r.num_calls, r.total_duration / r.num_calls, r.total_duration);
+      }
+    }
+
+    const uint64_t t1 = TicksAfter();
+    analyze_elapsed_ += t1 - t0;
+    printf("Total clocks during analysis: %" PRIu64 "\n", analyze_elapsed_);
+    printf("Total clocks measured: %" PRIu64 "\n", total_visible_duration);
+  }
+
+  // Single-threaded. Clears all results as if no zones had been recorded.
+  void Reset() {
+    analyze_elapsed_ = 0;
+    HWY_ASSERT(depth_ == 0);
+    num_zones_ = 0;
+    memset(zone_stack_, 0, sizeof(zone_stack_));
+    memset(zones_, 0, sizeof(zones_));
+  }
+
+ private:
+  // Updates ZoneTotals of the same name, or inserts a new one if this thread
+  // has not yet seen that name. Uses a self-organizing list data structure,
+  // which avoids dynamic memory allocations and is faster than unordered_map.
+  void UpdateOrAdd(const char* name, const uint64_t num_calls,
+                   const uint64_t duration) {
+    // Special case for first zone: (maybe) update, without swapping.
+    if (zones_[0].name == name) {
+      zones_[0].total_duration += duration;
+      zones_[0].num_calls += num_calls;
+      return;
+    }
+
+    // Look for a zone with the same name.
+    for (size_t i = 1; i < num_zones_; ++i) {
+      if (zones_[i].name == name) {
+        zones_[i].total_duration += duration;
+        zones_[i].num_calls += num_calls;
+        // Swap with predecessor (more conservative than move to front,
+        // but at least as successful).
+        std::swap(zones_[i - 1], zones_[i]);
+        return;
+      }
+    }
+
+    // Not found; create a new ZoneTotals.
+    HWY_ASSERT(num_zones_ < kMaxZones);
+    ZoneTotals* HWY_RESTRICT zone = zones_ + num_zones_;
+    zone->name = name;
+    zone->num_calls = num_calls;
+    zone->total_duration = duration;
+    ++num_zones_;
+  }
+
+  // Each instantiation of a function template seems to get its own copy of
+  // __func__ and GCC doesn't merge them. An N^2 search for duplicates is
+  // acceptable because we only expect a few dozen zones.
+  void MergeDuplicates() {
+    for (size_t i = 0; i < num_zones_; ++i) {
+      // Add any subsequent duplicates to num_calls and total_duration.
+      for (size_t j = i + 1; j < num_zones_;) {
+        if (!strcmp(zones_[i].name, zones_[j].name)) {
+          zones_[i].num_calls += zones_[j].num_calls;
+          zones_[i].total_duration += zones_[j].total_duration;
+          // Fill hole with last item.
+          zones_[j] = zones_[--num_zones_];
+        } else {  // Name differed, try next ZoneTotals.
+          ++j;
+        }
+      }
+    }
+  }
+
+  uint64_t analyze_elapsed_ = 0;
+  uint64_t self_overhead_ = 0;
+  uint64_t child_overhead_ = 0;
+
+  size_t depth_ = 0;      // Number of active zones <= kMaxDepth.
+  size_t num_zones_ = 0;  // Number of unique zones <= kMaxZones.
+
+  // After other members to avoid large pointer offsets.
+  alignas(64) ActiveZone zone_stack_[kMaxDepth];  // Last = newest
+  alignas(64) ZoneTotals zones_[kMaxZones];       // Self-organizing list
+};
+
+ThreadSpecific::ThreadSpecific()
+    : max_packets_(PROFILER_THREAD_STORAGE << 16),  // MiB / sizeof(Packet)
+      packets_(hwy::AllocateAligned<Packet>(max_packets_)),
+      num_packets_(0),
+      results_(hwy::MakeUniqueAligned<Results>()) {}
+
+ThreadSpecific::~ThreadSpecific() {}
+
+void ThreadSpecific::FlushBuffer() {
+  if (num_packets_ + kBufferCapacity > max_packets_) {
+    results_->AnalyzePackets(packets_.get(), num_packets_);
+    num_packets_ = 0;
+  }
+  // This buffering halves observer overhead and decreases the overall
+  // runtime by about 3%.
+  HWY_DYNAMIC_DISPATCH(StreamCacheLine)
+  (buffer_, packets_.get() + num_packets_);
+  num_packets_ += kBufferCapacity;
+  buffer_size_ = 0;
+}
+
+void ThreadSpecific::AnalyzeRemainingPackets() {
+  // Storage full => empty it.
+  if (num_packets_ + buffer_size_ > max_packets_) {
+    results_->AnalyzePackets(packets_.get(), num_packets_);
+    num_packets_ = 0;
+  }
+
+  // Move buffer to storage
+  memcpy(packets_.get() + num_packets_, buffer_, buffer_size_ * sizeof(Packet));
+  num_packets_ += buffer_size_;
+  buffer_size_ = 0;
+
+  results_->AnalyzePackets(packets_.get(), num_packets_);
+  num_packets_ = 0;
+}
+
+void ThreadSpecific::ComputeOverhead() {
+  // Delay after capturing timestamps before/after the actual zone runs. Even
+  // with frequency throttling disabled, this has a multimodal distribution,
+  // including 32, 34, 48, 52, 59, 62.
+  uint64_t self_overhead;
+  {
+    const size_t kNumSamples = 32;
+    uint32_t samples[kNumSamples];
+    for (size_t idx_sample = 0; idx_sample < kNumSamples; ++idx_sample) {
+      const size_t kNumDurations = 1024;
+      uint32_t durations[kNumDurations];
+
+      for (size_t idx_duration = 0; idx_duration < kNumDurations;
+           ++idx_duration) {
+        {  //
+          PROFILER_ZONE("Dummy Zone (never shown)");
+        }
+        const uint64_t duration = results_->ZoneDuration(buffer_);
+        buffer_size_ = 0;
+        durations[idx_duration] = static_cast<uint32_t>(duration);
+        HWY_ASSERT(num_packets_ == 0);
+      }
+      jxl::CountingSort(durations, durations + kNumDurations);
+      samples[idx_sample] = jxl::HalfSampleMode()(durations, kNumDurations);
+    }
+    // Median.
+    jxl::CountingSort(samples, samples + kNumSamples);
+    self_overhead = samples[kNumSamples / 2];
+#if PROFILER_PRINT_OVERHEAD
+    printf("Overhead: %zu\n", self_overhead);
+#endif
+    results_->SetSelfOverhead(self_overhead);
+  }
+
+  // Delay before capturing start timestamp / after end timestamp.
+  const size_t kNumSamples = 32;
+  uint32_t samples[kNumSamples];
+  for (size_t idx_sample = 0; idx_sample < kNumSamples; ++idx_sample) {
+    const size_t kNumDurations = 16;
+    uint32_t durations[kNumDurations];
+    for (size_t idx_duration = 0; idx_duration < kNumDurations;
+         ++idx_duration) {
+      const size_t kReps = 10000;
+      // Analysis time should not be included => must fit within buffer.
+      HWY_ASSERT(kReps * 2 < max_packets_);
+      hwy::StoreFence();
+      const uint64_t t0 = TicksBefore();
+      for (size_t i = 0; i < kReps; ++i) {
+        PROFILER_ZONE("Dummy");
+      }
+      hwy::StoreFence();
+      const uint64_t t1 = TicksAfter();
+      HWY_ASSERT(num_packets_ + buffer_size_ == kReps * 2);
+      buffer_size_ = 0;
+      num_packets_ = 0;
+      const uint64_t avg_duration = (t1 - t0 + kReps / 2) / kReps;
+      durations[idx_duration] =
+          static_cast<uint32_t>(ClampedSubtract(avg_duration, self_overhead));
+    }
+    jxl::CountingSort(durations, durations + kNumDurations);
+    samples[idx_sample] = jxl::HalfSampleMode()(durations, kNumDurations);
+  }
+  jxl::CountingSort(samples, samples + kNumSamples);
+  const uint64_t child_overhead = samples[9 * kNumSamples / 10];
+#if PROFILER_PRINT_OVERHEAD
+  printf("Child overhead: %zu\n", child_overhead);
+#endif
+  results_->SetChildOverhead(child_overhead);
+}
+
+namespace {
+
+// Could be a static member of Zone, but that would expose <atomic> in header.
+std::atomic<ThreadSpecific*>& GetHead() {
+  static std::atomic<ThreadSpecific*> head_{nullptr};  // Owning
+  return head_;
+}
+
+}  // namespace
+
+// Thread-safe.
+ThreadSpecific* Zone::InitThreadSpecific() {
+  ThreadSpecific* thread_specific =
+      hwy::MakeUniqueAligned<ThreadSpecific>().release();
+
+  // Insert into unordered list
+  std::atomic<ThreadSpecific*>& head = GetHead();
+  ThreadSpecific* old_head = head.load(std::memory_order_relaxed);
+  thread_specific->SetNext(old_head);
+  while (!head.compare_exchange_weak(old_head, thread_specific,
+                                     std::memory_order_release,
+                                     std::memory_order_relaxed)) {
+    thread_specific->SetNext(old_head);
+    // TODO(janwas): pause
+  }
+
+  // ComputeOverhead also creates a Zone, so this needs to be set before that
+  // to prevent infinite recursion.
+  GetThreadSpecific() = thread_specific;
+
+  thread_specific->ComputeOverhead();
+  return thread_specific;
+}
+
+// Single-threaded.
+/*static*/ void Zone::PrintResults() {
+  ThreadSpecific* head = GetHead().load(std::memory_order_relaxed);
+  ThreadSpecific* p = head;
+  while (p) {
+    p->AnalyzeRemainingPackets();
+
+    // Combine all threads into a single Result.
+    if (p != head) {
+      head->GetResults().Assimilate(p->GetResults());
+      p->GetResults().Reset();
+    }
+
+    p = p->GetNext();
+  }
+
+  if (head != nullptr) {
+    head->GetResults().Print();
+    head->GetResults().Reset();
+  }
+}
+
+}  // namespace profiler
+
+#endif  // HWY_ONCE
+#endif  // PROFILER_ENABLED
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/profiler/profiler.h b/codec/L2/demos/jxlEnc/third_partys/lib/profiler/profiler.h
new file mode 100644
index 0000000000..c71f63cb3f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/profiler/profiler.h
@@ -0,0 +1,165 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_PROFILER_PROFILER_H_
+#define LIB_PROFILER_PROFILER_H_
+
+// High precision, low overhead time measurements. Returns exact call counts and
+// total elapsed time for user-defined 'zones' (code regions, i.e. C++ scopes).
+//
+// Usage: instrument regions of interest: { PROFILER_ZONE("name"); /*code*/ } or
+// void FuncToMeasure() { PROFILER_FUNC; /*code*/ }.
+// After all threads have exited any zones, invoke PROFILER_PRINT_RESULTS() to
+// print call counts and average durations [CPU cycles] to stdout, sorted in
+// descending order of total duration.
+
+// If zero, this file has no effect and no measurements will be recorded.
+#ifndef PROFILER_ENABLED
+#define PROFILER_ENABLED 0
+#endif
+#if PROFILER_ENABLED
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <hwy/aligned_allocator.h>
+#include <hwy/base.h>
+
+#include "lib/profiler/tsc_timer.h"
+
+#if HWY_COMPILER_MSVC
+#define PROFILER_PUBLIC
+#else
+#define PROFILER_PUBLIC __attribute__((visibility("default")))
+#endif
+
+namespace profiler {
+
+// Represents zone entry/exit events. POD.
+#pragma pack(push, 1)
+struct Packet {
+  // Computing a hash or string table is likely too expensive, and offsets
+  // from other libraries' string literals can be too large to combine them and
+  // a full-resolution timestamp into 64 bits.
+  uint64_t timestamp;
+  const char* name;  // nullptr for exit packets
+#if UINTPTR_MAX <= 0xFFFFFFFFu
+  uint32_t padding;
+#endif
+};
+#pragma pack(pop)
+static_assert(sizeof(Packet) == 16, "Wrong Packet size");
+
+class Results;  // pImpl
+
+// Per-thread packet storage, dynamically allocated and aligned.
+class ThreadSpecific {
+  static constexpr size_t kBufferCapacity = 64 / sizeof(Packet);
+
+ public:
+  PROFILER_PUBLIC explicit ThreadSpecific();
+  PROFILER_PUBLIC ~ThreadSpecific();
+
+  // Depends on Zone => defined out of line.
+  PROFILER_PUBLIC void ComputeOverhead();
+
+  HWY_INLINE void WriteEntry(const char* name) { Write(name, TicksBefore()); }
+  HWY_INLINE void WriteExit() { Write(nullptr, TicksAfter()); }
+
+  PROFILER_PUBLIC void AnalyzeRemainingPackets();
+
+  // Accessors instead of public member for well-defined data layout.
+  void SetNext(ThreadSpecific* next) { next_ = next; }
+  ThreadSpecific* GetNext() const { return next_; }
+
+  Results& GetResults() { return *results_; }
+
+ private:
+  PROFILER_PUBLIC void FlushBuffer();
+
+  // Write packet to buffer/storage, emptying them as needed.
+  void Write(const char* name, const uint64_t timestamp) {
+    if (buffer_size_ == kBufferCapacity) {  // Full
+      FlushBuffer();
+    }
+    buffer_[buffer_size_].name = name;
+    buffer_[buffer_size_].timestamp = timestamp;
+    ++buffer_size_;
+  }
+
+  // Write-combining buffer to avoid cache pollution. Must be the first
+  // non-static member to ensure cache-line alignment.
+  Packet buffer_[kBufferCapacity];
+  size_t buffer_size_ = 0;
+
+  // Contiguous storage for zone enter/exit packets.
+  const size_t max_packets_;
+  hwy::AlignedFreeUniquePtr<Packet[]> packets_;
+  size_t num_packets_;
+
+  // Linked list of all threads.
+  ThreadSpecific* next_ = nullptr;  // Owned, never released.
+
+  hwy::AlignedUniquePtr<Results> results_;
+};
+
+// RAII zone enter/exit recorder constructed by PROFILER_ZONE; also
+// responsible for initializing ThreadSpecific.
+class Zone {
+ public:
+  HWY_NOINLINE explicit Zone(const char* name) {
+    HWY_FENCE;
+    ThreadSpecific* HWY_RESTRICT thread_specific = GetThreadSpecific();
+    if (HWY_UNLIKELY(thread_specific == nullptr)) {
+      thread_specific = InitThreadSpecific();
+    }
+
+    thread_specific->WriteEntry(name);
+  }
+
+  HWY_NOINLINE ~Zone() { GetThreadSpecific()->WriteExit(); }
+
+  // Call exactly once after all threads have exited all zones.
+  PROFILER_PUBLIC static void PrintResults();
+
+ private:
+  // Returns reference to the thread's ThreadSpecific pointer (initially null).
+  // Function-local static avoids needing a separate definition.
+  static ThreadSpecific*& GetThreadSpecific() {
+    static thread_local ThreadSpecific* thread_specific;
+    return thread_specific;
+  }
+
+  // Non time-critical.
+  PROFILER_PUBLIC ThreadSpecific* InitThreadSpecific();
+};
+
+// Creates a zone starting from here until the end of the current scope.
+// Timestamps will be recorded when entering and exiting the zone.
+// To ensure the name pointer remains valid, we require it to be a string
+// literal (by merging with ""). We also compare strings by address.
+#define PROFILER_ZONE(name)             \
+  HWY_FENCE;                            \
+  const ::profiler::Zone zone("" name); \
+  HWY_FENCE
+
+// Creates a zone for an entire function (when placed at its beginning).
+// Shorter/more convenient than ZONE.
+#define PROFILER_FUNC                    \
+  HWY_FENCE;                             \
+  const ::profiler::Zone zone(__func__); \
+  HWY_FENCE
+
+#define PROFILER_PRINT_RESULTS ::profiler::Zone::PrintResults
+
+}  // namespace profiler
+
+#else  // !PROFILER_ENABLED
+#define PROFILER_ZONE(name)
+#define PROFILER_FUNC
+#define PROFILER_PRINT_RESULTS()
+#endif
+
+#endif  // LIB_PROFILER_PROFILER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/profiler/tsc_timer.h b/codec/L2/demos/jxlEnc/third_partys/lib/profiler/tsc_timer.h
new file mode 100644
index 0000000000..61ccd5af59
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/profiler/tsc_timer.h
@@ -0,0 +1,131 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_PROFILER_TSC_TIMER_H_
+#define LIB_PROFILER_TSC_TIMER_H_
+
+// High-resolution (~10 ns) timestamps, using fences to prevent reordering and
+// ensure exactly the desired regions are measured.
+
+#include <stdint.h>
+
+#include <ctime>
+#include <hwy/base.h>
+#include <hwy/cache_control.h>  // LoadFence
+
+namespace profiler {
+
+// TicksBefore/After return absolute timestamps and must be placed immediately
+// before and after the region to measure. The functions are distinct because
+// they use different fences.
+//
+// Background: RDTSC is not 'serializing'; earlier instructions may complete
+// after it, and/or later instructions may complete before it. 'Fences' ensure
+// regions' elapsed times are independent of such reordering. The only
+// documented unprivileged serializing instruction is CPUID, which acts as a
+// full fence (no reordering across it in either direction). Unfortunately
+// the latency of CPUID varies wildly (perhaps made worse by not initializing
+// its EAX input). Because it cannot reliably be deducted from the region's
+// elapsed time, it must not be included in the region to measure (i.e.
+// between the two RDTSC).
+//
+// The newer RDTSCP is sometimes described as serializing, but it actually
+// only serves as a half-fence with release semantics. Although all
+// instructions in the region will complete before the final timestamp is
+// captured, subsequent instructions may leak into the region and increase the
+// elapsed time. Inserting another fence after the final RDTSCP would prevent
+// such reordering without affecting the measured region.
+//
+// Fortunately, such a fence exists. The LFENCE instruction is only documented
+// to delay later loads until earlier loads are visible. However, Intel's
+// reference manual says it acts as a full fence (waiting until all earlier
+// instructions have completed, and delaying later instructions until it
+// completes). AMD assigns the same behavior to MFENCE.
+//
+// We need a fence before the initial RDTSC to prevent earlier instructions
+// from leaking into the region, and arguably another after RDTSC to avoid
+// region instructions from completing before the timestamp is recorded.
+// When surrounded by fences, the additional RDTSCP half-fence provides no
+// benefit, so the initial timestamp can be recorded via RDTSC, which has
+// lower overhead than RDTSCP because it does not read TSC_AUX. In summary,
+// we define Before = LFENCE/RDTSC/LFENCE; After = RDTSCP/LFENCE.
+//
+// Using Before+Before leads to higher variance and overhead than After+After.
+// However, After+After includes an LFENCE in the region measurements, which
+// adds a delay dependent on earlier loads. The combination of Before+After
+// is faster than Before+Before and more consistent than Stop+Stop because
+// the first LFENCE already delayed subsequent loads before the measured
+// region. This combination seems not to have been considered in prior work:
+// http://akaros.cs.berkeley.edu/lxr/akaros/kern/arch/x86/rdtsc_test.c
+//
+// Note: performance counters can measure 'exact' instructions-retired or
+// (unhalted) cycle counts. The RDPMC instruction is not serializing and also
+// requires fences. Unfortunately, it is not accessible on all OSes and we
+// prefer to avoid kernel-mode drivers. Performance counters are also affected
+// by several under/over-count errata, so we use the TSC instead.
+
+// Returns a 64-bit timestamp in unit of 'ticks'; to convert to seconds,
+// divide by InvariantTicksPerSecond. Although 32-bit ticks are faster to read,
+// they overflow too quickly to measure long regions.
+static HWY_INLINE HWY_MAYBE_UNUSED uint64_t TicksBefore() {
+  uint64_t t;
+#if HWY_ARCH_PPC
+  asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268));
+#elif HWY_ARCH_X86_64 && HWY_COMPILER_MSVC
+  hwy::LoadFence();
+  HWY_FENCE;
+  t = __rdtsc();
+  hwy::LoadFence();
+  HWY_FENCE;
+#elif HWY_ARCH_X86_64 && (HWY_COMPILER_CLANG || HWY_COMPILER_GCC)
+  asm volatile(
+      "lfence\n\t"
+      "rdtsc\n\t"
+      "shl $32, %%rdx\n\t"
+      "or %%rdx, %0\n\t"
+      "lfence"
+      : "=a"(t)
+      :
+      // "memory" avoids reordering. rdx = TSC >> 32.
+      // "cc" = flags modified by SHL.
+      : "rdx", "memory", "cc");
+#else
+  // Fall back to OS - unsure how to reliably query cntvct_el0 frequency.
+  timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  t = ts.tv_sec * 1000000000LL + ts.tv_nsec;
+#endif
+  return t;
+}
+
+static HWY_INLINE HWY_MAYBE_UNUSED uint64_t TicksAfter() {
+  uint64_t t;
+#if HWY_ARCH_X86_64 && HWY_COMPILER_MSVC
+  HWY_FENCE;
+  unsigned aux;
+  t = __rdtscp(&aux);
+  hwy::LoadFence();
+  HWY_FENCE;
+#elif HWY_ARCH_X86_64 && (HWY_COMPILER_CLANG || HWY_COMPILER_GCC)
+  // Use inline asm because __rdtscp generates code to store TSC_AUX (ecx).
+  asm volatile(
+      "rdtscp\n\t"
+      "shl $32, %%rdx\n\t"
+      "or %%rdx, %0\n\t"
+      "lfence"
+      : "=a"(t)
+      :
+      // "memory" avoids reordering. rcx = TSC_AUX. rdx = TSC >> 32.
+      // "cc" = flags modified by SHL.
+      : "rcx", "rdx", "memory", "cc");
+#else
+  t = TicksBefore();  // no difference on other platforms.
+#endif
+  return t;
+}
+
+}  // namespace profiler
+
+#endif  // LIB_PROFILER_TSC_TIMER_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/threads/libjxl_threads.pc.in b/codec/L2/demos/jxlEnc/third_partys/lib/threads/libjxl_threads.pc.in
new file mode 100644
index 0000000000..8a3275cf1c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/threads/libjxl_threads.pc.in
@@ -0,0 +1,12 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: libjxl_threads
+Description: JPEG XL multi-thread runner using std::threads.
+Version: @JPEGXL_LIBRARY_VERSION@
+Requires.private: @JPEGXL_THREADS_LIBRARY_REQUIRES@
+Libs: -L${libdir} -ljxl_threads
+Libs.private: -lm
+Cflags: -I${includedir}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/threads/resizable_parallel_runner.cc b/codec/L2/demos/jxlEnc/third_partys/lib/threads/resizable_parallel_runner.cc
new file mode 100644
index 0000000000..1208a3856e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/threads/resizable_parallel_runner.cc
@@ -0,0 +1,195 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "jxl/resizable_parallel_runner.h"
+
+#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+namespace jpegxl {
+namespace {
+
+// A thread pool that allows changing the number of threads it runs. It also
+// runs tasks on the calling thread, which can work better on schedulers for
+// heterogeneous architectures.
+struct ResizeableParallelRunner {
+  void SetNumThreads(size_t num) {
+    if (num > 0) {
+      num -= 1;
+    }
+    {
+      std::unique_lock<std::mutex> l(state_mutex_);
+      num_desired_workers_ = num;
+      workers_can_proceed_.notify_all();
+    }
+    if (workers_.size() < num) {
+      for (size_t i = workers_.size(); i < num; i++) {
+        workers_.emplace_back([this, i]() { WorkerBody(i); });
+      }
+    }
+    if (workers_.size() > num) {
+      for (size_t i = num; i < workers_.size(); i++) {
+        workers_[i].join();
+      }
+      workers_.resize(num);
+    }
+  }
+
+  ~ResizeableParallelRunner() { SetNumThreads(0); }
+
+  JxlParallelRetCode Run(void* jxl_opaque, JxlParallelRunInit init,
+                         JxlParallelRunFunction func, uint32_t start,
+                         uint32_t end) {
+    if (start + 1 == end) {
+      JxlParallelRetCode ret = init(jxl_opaque, 1);
+      if (ret != 0) return ret;
+
+      func(jxl_opaque, start, 0);
+      return ret;
+    }
+
+    size_t num_workers = std::min<size_t>(workers_.size() + 1, end - start);
+    JxlParallelRetCode ret = init(jxl_opaque, num_workers);
+    if (ret != 0) {
+      return ret;
+    }
+
+    {
+      std::unique_lock<std::mutex> l(state_mutex_);
+      // Avoid waking up more workers than needed.
+      max_running_workers_ = end - start - 1;
+      next_task_ = start;
+      end_task_ = end;
+      func_ = func;
+      jxl_opaque_ = jxl_opaque;
+      work_available_ = true;
+      num_running_workers_++;
+      workers_can_proceed_.notify_all();
+    }
+
+    DequeueTasks(0);
+
+    while (true) {
+      std::unique_lock<std::mutex> l(state_mutex_);
+      if (num_running_workers_ == 0) break;
+      work_done_.wait(l);
+    }
+
+    return ret;
+  }
+
+ private:
+  void WorkerBody(size_t worker_id) {
+    while (true) {
+      {
+        std::unique_lock<std::mutex> l(state_mutex_);
+        // Worker pool was reduced, resize down.
+        if (worker_id >= num_desired_workers_) {
+          return;
+        }
+        // Nothing to do this time.
+        if (!work_available_ || worker_id >= max_running_workers_) {
+          workers_can_proceed_.wait(l);
+          continue;
+        }
+        num_running_workers_++;
+      }
+      DequeueTasks(worker_id + 1);
+    }
+  }
+
+  void DequeueTasks(size_t thread_id) {
+    while (true) {
+      uint32_t task = next_task_++;
+      if (task >= end_task_) {
+        std::unique_lock<std::mutex> l(state_mutex_);
+        num_running_workers_--;
+        work_available_ = false;
+        if (num_running_workers_ == 0) {
+          work_done_.notify_all();
+        }
+        break;
+      }
+      func_(jxl_opaque_, task, thread_id);
+    }
+  }
+
+  // Checks when the worker has something to do, which can be one of:
+  // - quitting (when worker_id >= num_desired_workers_)
+  // - having work available for them (work_available_ is true and worker_id >=
+  // max_running_workers_)
+  std::condition_variable workers_can_proceed_;
+
+  // Workers are done, and the main thread can proceed (num_running_workers_ ==
+  // 0)
+  std::condition_variable work_done_;
+
+  std::vector<std::thread> workers_;
+
+  // Protects all the remaining variables, except for func_, jxl_opaque_ and
+  // end_task_ (for which only the write by the main thread is protected, and
+  // subsequent uses by workers happen-after it) and next_task_ (which is
+  // atomic).
+  std::mutex state_mutex_;
+
+  // Range of tasks still need to be done.
+  std::atomic<uint32_t> next_task_;
+  uint32_t end_task_;
+
+  // Function to run and its argument.
+  JxlParallelRunFunction func_;
+  void* jxl_opaque_;  // not owned
+
+  // Variables that control the workers:
+  // - work_available_ is set to true after a call to Run() and to false at the
+  // end of it.
+  // - num_desired_workers_ represents the number of workers that should be
+  // present.
+  // - max_running_workers_ represents the number of workers that should be
+  // executing tasks.
+  // - num_running_workers_ represents the number of workers that are executing
+  // tasks.
+  size_t num_desired_workers_ = 0;
+  size_t max_running_workers_ = 0;
+  size_t num_running_workers_ = 0;
+  bool work_available_ = false;
+};
+}  // namespace
+}  // namespace jpegxl
+
+extern "C" {
+JXL_THREADS_EXPORT JxlParallelRetCode JxlResizableParallelRunner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+  return static_cast<jpegxl::ResizeableParallelRunner*>(runner_opaque)
+      ->Run(jpegxl_opaque, init, func, start_range, end_range);
+}
+
+JXL_THREADS_EXPORT void* JxlResizableParallelRunnerCreate(
+    const JxlMemoryManager* memory_manager) {
+  return new jpegxl::ResizeableParallelRunner();
+}
+
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerSetThreads(
+    void* runner_opaque, size_t num_threads) {
+  static_cast<jpegxl::ResizeableParallelRunner*>(runner_opaque)
+      ->SetNumThreads(num_threads);
+}
+
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerDestroy(void* runner_opaque) {
+  delete static_cast<jpegxl::ResizeableParallelRunner*>(runner_opaque);
+}
+
+JXL_THREADS_EXPORT uint32_t
+JxlResizableParallelRunnerSuggestThreads(uint64_t xsize, uint64_t ysize) {
+  // ~one thread per group.
+  return std::min<uint64_t>(std::thread::hardware_concurrency(),
+                            xsize * ysize / (256 * 256));
+}
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner.cc b/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner.cc
new file mode 100644
index 0000000000..b9cf4aa6cd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner.cc
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "jxl/thread_parallel_runner.h"
+
+#include <string.h>
+
+#include "lib/threads/thread_parallel_runner_internal.h"
+
+namespace {
+
+// Default JxlMemoryManager using malloc and free for the jpegxl_threads
+// library. Same as the default JxlMemoryManager for the jpegxl library
+// itself.
+
+// Default alloc and free functions.
+void* ThreadMemoryManagerDefaultAlloc(void* opaque, size_t size) {
+  return malloc(size);
+}
+
+void ThreadMemoryManagerDefaultFree(void* opaque, void* address) {
+  free(address);
+}
+
+// Initializes the memory manager instance with the passed one. The
+// MemoryManager passed in |memory_manager| may be NULL or contain NULL
+// functions which will be initialized with the default ones. If either alloc
+// or free are NULL, then both must be NULL, otherwise this function returns an
+// error.
+bool ThreadMemoryManagerInit(JxlMemoryManager* self,
+                             const JxlMemoryManager* memory_manager) {
+  if (memory_manager) {
+    *self = *memory_manager;
+  } else {
+    memset(self, 0, sizeof(*self));
+  }
+  if (!self->alloc != !self->free) {
+    return false;
+  }
+  if (!self->alloc) self->alloc = ThreadMemoryManagerDefaultAlloc;
+  if (!self->free) self->free = ThreadMemoryManagerDefaultFree;
+
+  return true;
+}
+
+void* ThreadMemoryManagerAlloc(const JxlMemoryManager* memory_manager,
+                               size_t size) {
+  return memory_manager->alloc(memory_manager->opaque, size);
+}
+
+void ThreadMemoryManagerFree(const JxlMemoryManager* memory_manager,
+                             void* address) {
+  return memory_manager->free(memory_manager->opaque, address);
+}
+
+}  // namespace
+
+JxlParallelRetCode JxlThreadParallelRunner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+  return jpegxl::ThreadParallelRunner::Runner(
+      runner_opaque, jpegxl_opaque, init, func, start_range, end_range);
+}
+
+/// Starts the given number of worker threads and blocks until they are ready.
+/// "num_worker_threads" defaults to one per hyperthread. If zero, all tasks
+/// run on the main thread.
+void* JxlThreadParallelRunnerCreate(const JxlMemoryManager* memory_manager,
+                                    size_t num_worker_threads) {
+  JxlMemoryManager local_memory_manager;
+  if (!ThreadMemoryManagerInit(&local_memory_manager, memory_manager))
+    return nullptr;
+
+  void* alloc = ThreadMemoryManagerAlloc(&local_memory_manager,
+                                         sizeof(jpegxl::ThreadParallelRunner));
+  if (!alloc) return nullptr;
+  // Placement new constructor on allocated memory
+  jpegxl::ThreadParallelRunner* runner =
+      new (alloc) jpegxl::ThreadParallelRunner(num_worker_threads);
+  runner->memory_manager = local_memory_manager;
+
+  return runner;
+}
+
+void JxlThreadParallelRunnerDestroy(void* runner_opaque) {
+  jpegxl::ThreadParallelRunner* runner =
+      reinterpret_cast<jpegxl::ThreadParallelRunner*>(runner_opaque);
+  if (runner) {
+    // Call destructor directly since custom free function is used.
+    runner->~ThreadParallelRunner();
+    ThreadMemoryManagerFree(&runner->memory_manager, runner);
+  }
+}
+
+// Get default value for num_worker_threads parameter of
+// InitJxlThreadParallelRunner.
+size_t JxlThreadParallelRunnerDefaultNumWorkerThreads() {
+  return std::thread::hardware_concurrency();
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc b/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc
new file mode 100644
index 0000000000..5ceede42af
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.cc
@@ -0,0 +1,217 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/threads/thread_parallel_runner_internal.h"
+
+#include <algorithm>
+
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+#include "sanitizer/common_interface_defs.h"  // __sanitizer_print_stack_trace
+#endif                                        // defined(*_SANITIZER)
+
+#include "jxl/thread_parallel_runner.h"
+#include "lib/jxl/base/profiler.h"
+
+namespace {
+
+// Exits the program after printing a stack trace when possible.
+bool Abort() {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+  // If compiled with any sanitizer print a stack trace. This call doesn't crash
+  // the program, instead the trap below will crash it also allowing gdb to
+  // break there.
+  __sanitizer_print_stack_trace();
+#endif  // defined(*_SANITIZER)
+
+#ifdef _MSC_VER
+  __debugbreak();
+  abort();
+#else
+  __builtin_trap();
+#endif
+}
+
+// Does not guarantee running the code, use only for debug mode checks.
+#if JXL_ENABLE_ASSERT
+#define JXL_ASSERT(condition) \
+  do {                        \
+    if (!(condition)) {       \
+      Abort();                \
+    }                         \
+  } while (0)
+#else
+#define JXL_ASSERT(condition) \
+  do {                        \
+  } while (0)
+#endif
+}  // namespace
+
+namespace jpegxl {
+
+// static
+JxlParallelRetCode ThreadParallelRunner::Runner(
+    void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+    JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+  ThreadParallelRunner* self =
+      static_cast<ThreadParallelRunner*>(runner_opaque);
+  if (start_range > end_range) return -1;
+  if (start_range == end_range) return 0;
+
+  int ret = init(jpegxl_opaque, std::max<size_t>(self->num_worker_threads_, 1));
+  if (ret != 0) return ret;
+
+  // Use a sequential run when num_worker_threads_ is zero since we have no
+  // worker threads.
+  if (self->num_worker_threads_ == 0) {
+    const size_t thread = 0;
+    for (uint32_t task = start_range; task < end_range; ++task) {
+      func(jpegxl_opaque, task, thread);
+    }
+    return 0;
+  }
+
+  if (self->depth_.fetch_add(1, std::memory_order_acq_rel) != 0) {
+    return -1;  // Must not re-enter.
+  }
+
+  const WorkerCommand worker_command =
+      (static_cast<WorkerCommand>(start_range) << 32) + end_range;
+  // Ensure the inputs do not result in a reserved command.
+  JXL_ASSERT(worker_command != kWorkerWait);
+  JXL_ASSERT(worker_command != kWorkerOnce);
+  JXL_ASSERT(worker_command != kWorkerExit);
+
+  self->data_func_ = func;
+  self->jpegxl_opaque_ = jpegxl_opaque;
+  self->num_reserved_.store(0, std::memory_order_relaxed);
+
+  self->StartWorkers(worker_command);
+  self->WorkersReadyBarrier();
+
+  if (self->depth_.fetch_add(-1, std::memory_order_acq_rel) != 1) {
+    return -1;
+  }
+  return 0;
+}
+
+// static
+void ThreadParallelRunner::RunRange(ThreadParallelRunner* self,
+                                    const WorkerCommand command,
+                                    const int thread) {
+  const uint32_t begin = command >> 32;
+  const uint32_t end = command & 0xFFFFFFFF;
+  const uint32_t num_tasks = end - begin;
+  const uint32_t num_worker_threads = self->num_worker_threads_;
+
+  // OpenMP introduced several "schedule" strategies:
+  // "single" (static assignment of exactly one chunk per thread): slower.
+  // "dynamic" (allocates k tasks at a time): competitive for well-chosen k.
+  // "guided" (allocates k tasks, decreases k): computing k = remaining/n
+  //   is faster than halving k each iteration. We prefer this strategy
+  //   because it avoids user-specified parameters.
+
+  for (;;) {
+#if 0
+      // dynamic
+      const uint32_t my_size = std::max(num_tasks / (num_worker_threads * 4), 1);
+#else
+    // guided
+    const uint32_t num_reserved =
+        self->num_reserved_.load(std::memory_order_relaxed);
+    const uint32_t num_remaining = num_tasks - num_reserved;
+    const uint32_t my_size =
+        std::max(num_remaining / (num_worker_threads * 4), 1u);
+#endif
+    const uint32_t my_begin = begin + self->num_reserved_.fetch_add(
+                                          my_size, std::memory_order_relaxed);
+    const uint32_t my_end = std::min(my_begin + my_size, begin + num_tasks);
+    // Another thread already reserved the last task.
+    if (my_begin >= my_end) {
+      break;
+    }
+    for (uint32_t task = my_begin; task < my_end; ++task) {
+      self->data_func_(self->jpegxl_opaque_, task, thread);
+    }
+  }
+}
+
+// static
+void ThreadParallelRunner::ThreadFunc(ThreadParallelRunner* self,
+                                      const int thread) {
+  // Until kWorkerExit command received:
+  for (;;) {
+    std::unique_lock<std::mutex> lock(self->mutex_);
+    // Notify main thread that this thread is ready.
+    if (++self->workers_ready_ == self->num_threads_) {
+      self->workers_ready_cv_.notify_one();
+    }
+  RESUME_WAIT:
+    // Wait for a command.
+    self->worker_start_cv_.wait(lock);
+    const WorkerCommand command = self->worker_start_command_;
+    switch (command) {
+      case kWorkerWait:    // spurious wakeup:
+        goto RESUME_WAIT;  // lock still held, avoid incrementing ready.
+      case kWorkerOnce:
+        lock.unlock();
+        self->data_func_(self->jpegxl_opaque_, thread, thread);
+        break;
+      case kWorkerExit:
+        return;  // exits thread
+      default:
+        lock.unlock();
+        RunRange(self, command, thread);
+        break;
+    }
+  }
+}
+
+ThreadParallelRunner::ThreadParallelRunner(const int num_worker_threads)
+#if defined(__EMSCRIPTEN__)
+    : num_worker_threads_(0), num_threads_(1) {
+  // TODO(eustas): find out if pthreads would work for us.
+  (void)num_worker_threads;
+#else
+    : num_worker_threads_(num_worker_threads),
+      num_threads_(std::max(num_worker_threads, 1)) {
+#endif
+  PROFILER_ZONE("ThreadParallelRunner ctor");
+
+  threads_.reserve(num_worker_threads_);
+
+  // Suppress "unused-private-field" warning.
+  (void)padding1;
+  (void)padding2;
+
+  // Safely handle spurious worker wakeups.
+  worker_start_command_ = kWorkerWait;
+
+  for (uint32_t i = 0; i < num_worker_threads_; ++i) {
+    threads_.emplace_back(ThreadFunc, this, i);
+  }
+
+  if (num_worker_threads_ != 0) {
+    WorkersReadyBarrier();
+  }
+
+  // Warm up profiler on worker threads so its expensive initialization
+  // doesn't count towards other timer measurements.
+  RunOnEachThread(
+      [](const int task, const int thread) { PROFILER_ZONE("@InitWorkers"); });
+}
+
+ThreadParallelRunner::~ThreadParallelRunner() {
+  if (num_worker_threads_ != 0) {
+    StartWorkers(kWorkerExit);
+  }
+
+  for (std::thread& thread : threads_) {
+    JXL_ASSERT(thread.joinable());
+    thread.join();
+  }
+}
+}  // namespace jpegxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.h b/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.h
new file mode 100644
index 0000000000..372c6a8950
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_internal.h
@@ -0,0 +1,172 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+
+// C++ implementation using std::thread of a ::JxlParallelRunner.
+
+// The main class in this module, ThreadParallelRunner, implements a static
+// method ThreadParallelRunner::Runner than can be passed as a
+// JxlParallelRunner when using the JPEG XL library. This uses std::thread
+// internally and related synchronization functions. The number of threads
+// created is fixed at construction time and the threads are re-used for every
+// ThreadParallelRunner::Runner call. Only one concurrent Runner() call per
+// instance is allowed at a time.
+//
+// This is a scalable, lower-overhead thread pool runner, especially suitable
+// for data-parallel computations in the fork-join model, where clients need to
+// know when all tasks have completed.
+//
+// This thread pool can efficiently load-balance millions of tasks using an
+// atomic counter, thus avoiding per-task virtual or system calls. With 48
+// hyperthreads and 1M tasks that add to an atomic counter, overall runtime is
+// 10-20x higher when using std::async, and ~200x for a queue-based thread
+// pool.
+//
+// Usage:
+//   ThreadParallelRunner runner;
+//   JxlDecode(
+//       ... , &ThreadParallelRunner::Runner, static_cast<void*>(&runner));
+
+#ifndef LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_
+#define LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <atomic>
+#include <condition_variable>  //NOLINT
+#include <mutex>               //NOLINT
+#include <thread>              //NOLINT
+#include <vector>
+
+#include "jxl/memory_manager.h"
+#include "jxl/parallel_runner.h"
+
+namespace jpegxl {
+
+// Main helper class implementing the ::JxlParallelRunner interface.
+class ThreadParallelRunner {
+ public:
+  // ::JxlParallelRunner interface.
+  static JxlParallelRetCode Runner(void* runner_opaque, void* jpegxl_opaque,
+                                   JxlParallelRunInit init,
+                                   JxlParallelRunFunction func,
+                                   uint32_t start_range, uint32_t end_range);
+
+  // Starts the given number of worker threads and blocks until they are ready.
+  // "num_worker_threads" defaults to one per hyperthread. If zero, all tasks
+  // run on the main thread.
+  explicit ThreadParallelRunner(
+      int num_worker_threads = std::thread::hardware_concurrency());
+
+  // Waits for all threads to exit.
+  ~ThreadParallelRunner();
+
+  // Returns number of worker threads created (some may be sleeping and never
+  // wake up in time to participate in Run). Useful for characterizing
+  // performance; 0 means "run on main thread".
+  size_t NumWorkerThreads() const { return num_worker_threads_; }
+
+  // Returns maximum number of main/worker threads that may call Func. Useful
+  // for allocating per-thread storage.
+  size_t NumThreads() const { return num_threads_; }
+
+  // Runs func(thread, thread) on all thread(s) that may participate in Run.
+  // If NumThreads() == 0, runs on the main thread with thread == 0, otherwise
+  // concurrently called by each worker thread in [0, NumThreads()).
+  template <class Func>
+  void RunOnEachThread(const Func& func) {
+    if (num_worker_threads_ == 0) {
+      const int thread = 0;
+      func(thread, thread);
+      return;
+    }
+
+    data_func_ = reinterpret_cast<JxlParallelRunFunction>(&CallClosure<Func>);
+    jpegxl_opaque_ = const_cast<void*>(static_cast<const void*>(&func));
+    StartWorkers(kWorkerOnce);
+    WorkersReadyBarrier();
+  }
+
+  JxlMemoryManager memory_manager;
+
+ private:
+  // After construction and between calls to Run, workers are "ready", i.e.
+  // waiting on worker_start_cv_. They are "started" by sending a "command"
+  // and notifying all worker_start_cv_ waiters. (That is why all workers
+  // must be ready/waiting - otherwise, the notification will not reach all of
+  // them and the main thread waits in vain for them to report readiness.)
+  using WorkerCommand = uint64_t;
+
+  // Special values; all others encode the begin/end parameters. Note that all
+  // these are no-op ranges (begin >= end) and therefore never used to encode
+  // ranges.
+  static constexpr WorkerCommand kWorkerWait = ~1ULL;
+  static constexpr WorkerCommand kWorkerOnce = ~2ULL;
+  static constexpr WorkerCommand kWorkerExit = ~3ULL;
+
+  // Calls f(task, thread). Used for type erasure of Func arguments. The
+  // signature must match JxlParallelRunFunction, hence a void* argument.
+  template <class Closure>
+  static void CallClosure(void* f, const uint32_t task, const size_t thread) {
+    (*reinterpret_cast<const Closure*>(f))(task, thread);
+  }
+
+  void WorkersReadyBarrier() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    // Typically only a single iteration.
+    while (workers_ready_ != threads_.size()) {
+      workers_ready_cv_.wait(lock);
+    }
+    workers_ready_ = 0;
+
+    // Safely handle spurious worker wakeups.
+    worker_start_command_ = kWorkerWait;
+  }
+
+  // Precondition: all workers are ready.
+  void StartWorkers(const WorkerCommand worker_command) {
+    mutex_.lock();
+    worker_start_command_ = worker_command;
+    // Workers will need this lock, so release it before they wake up.
+    mutex_.unlock();
+    worker_start_cv_.notify_all();
+  }
+
+  // Attempts to reserve and perform some work from the global range of tasks,
+  // which is encoded within "command". Returns after all tasks are reserved.
+  static void RunRange(ThreadParallelRunner* self, const WorkerCommand command,
+                       const int thread);
+
+  static void ThreadFunc(ThreadParallelRunner* self, int thread);
+
+  // Unmodified after ctor, but cannot be const because we call thread::join().
+  std::vector<std::thread> threads_;
+
+  const uint32_t num_worker_threads_;  // == threads_.size()
+  const uint32_t num_threads_;
+
+  std::atomic<int> depth_{0};  // detects if Run is re-entered (not supported).
+
+  std::mutex mutex_;  // guards both cv and their variables.
+  std::condition_variable workers_ready_cv_;
+  uint32_t workers_ready_ = 0;
+  std::condition_variable worker_start_cv_;
+  WorkerCommand worker_start_command_;
+
+  // Written by main thread, read by workers (after mutex lock/unlock).
+  JxlParallelRunFunction data_func_;
+  void* jpegxl_opaque_;
+
+  // Updated by workers; padding avoids false sharing.
+  uint8_t padding1[64];
+  std::atomic<uint32_t> num_reserved_{0};
+  uint8_t padding2[64];
+};
+
+}  // namespace jpegxl
+
+#endif  // LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_test.cc b/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_test.cc
new file mode 100644
index 0000000000..7ff260e2f1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/lib/threads/thread_parallel_runner_test.cc
@@ -0,0 +1,115 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "gtest/gtest.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+
+namespace jpegxl {
+namespace {
+
+int PopulationCount(uint64_t bits) {
+  int num_set = 0;
+  while (bits != 0) {
+    num_set += bits & 1;
+    bits >>= 1;
+  }
+  return num_set;
+}
+
+// Ensures task parameter is in bounds, every parameter is reached,
+// pool can be reused (multiple consecutive Run calls), pool can be destroyed
+// (joining with its threads), num_threads=0 works (runs on current thread).
+TEST(ThreadParallelRunnerTest, TestPool) {
+  for (int num_threads = 0; num_threads <= 18; ++num_threads) {
+    jxl::ThreadPoolInternal pool(num_threads);
+    for (int num_tasks = 0; num_tasks < 32; ++num_tasks) {
+      std::vector<int> mementos(num_tasks);
+      for (int begin = 0; begin < 32; ++begin) {
+        std::fill(mementos.begin(), mementos.end(), 0);
+        pool.Run(
+            begin, begin + num_tasks, jxl::ThreadPool::SkipInit(),
+            [begin, num_tasks, &mementos](const int task, const int thread) {
+              // Parameter is in the given range
+              EXPECT_GE(task, begin);
+              EXPECT_LT(task, begin + num_tasks);
+
+              // Store mementos to be sure we visited each task.
+              mementos.at(task - begin) = 1000 + task;
+            });
+        for (int task = begin; task < begin + num_tasks; ++task) {
+          EXPECT_EQ(1000 + task, mementos.at(task - begin));
+        }
+      }
+    }
+  }
+}
+
+// Verify "thread" parameter when processing few tasks.
+TEST(ThreadParallelRunnerTest, TestSmallAssignments) {
+  // WARNING: cumulative total threads must not exceed profiler.h kMaxThreads.
+  const int kMaxThreads = 8;
+  for (int num_threads = 1; num_threads <= kMaxThreads; ++num_threads) {
+    jxl::ThreadPoolInternal pool(num_threads);
+
+    // (Avoid mutex because it may perturb the worker thread scheduling)
+    std::atomic<uint64_t> id_bits{0};
+    std::atomic<int> num_calls{0};
+
+    pool.Run(
+        0, num_threads, jxl::ThreadPool::SkipInit(),
+        [&num_calls, num_threads, &id_bits](const int task, const int thread) {
+          num_calls.fetch_add(1, std::memory_order_relaxed);
+
+          EXPECT_LT(thread, num_threads);
+          uint64_t bits = id_bits.load(std::memory_order_relaxed);
+          while (
+              !id_bits.compare_exchange_weak(bits, bits | (1ULL << thread))) {
+          }
+        });
+
+    // Correct number of tasks.
+    EXPECT_EQ(num_threads, num_calls.load());
+
+    const int num_participants = PopulationCount(id_bits.load());
+    // Can't expect equality because other workers may have woken up too late.
+    EXPECT_LE(num_participants, num_threads);
+  }
+}
+
+struct Counter {
+  Counter() {
+    // Suppress "unused-field" warning.
+    (void)padding;
+  }
+  void Assimilate(const Counter& victim) { counter += victim.counter; }
+  int counter = 0;
+  int padding[31];
+};
+
+TEST(ThreadParallelRunnerTest, TestCounter) {
+  const int kNumThreads = 12;
+  jxl::ThreadPoolInternal pool(kNumThreads);
+  alignas(128) Counter counters[kNumThreads];
+
+  const int kNumTasks = kNumThreads * 19;
+  pool.Run(0, kNumTasks, jxl::ThreadPool::SkipInit(),
+           [&counters](const int task, const int thread) {
+             counters[thread].counter += task;
+           });
+
+  int expected = 0;
+  for (int i = 0; i < kNumTasks; ++i) {
+    expected += i;
+  }
+
+  for (int i = 1; i < kNumThreads; ++i) {
+    counters[0].Assimilate(counters[i]);
+  }
+  EXPECT_EQ(expected, counters[0].counter);
+}
+
+}  // namespace
+}  // namespace jpegxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/LICENSE b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/LICENSE
new file mode 100644
index 0000000000..33b7cdd2db
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/constants.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/constants.h
new file mode 100644
index 0000000000..f6e44dc7b7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/constants.h
@@ -0,0 +1,184 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * Common constants used in decoder and encoder API.
+ */
+
+#ifndef BROTLI_COMMON_CONSTANTS_H_
+#define BROTLI_COMMON_CONSTANTS_H_
+
+#include "./platform.h"
+#include <brotli/types.h>
+
+/* Specification: 7.3. Encoding of the context map */
+#define BROTLI_CONTEXT_MAP_MAX_RLE 16
+
+/* Specification: 2. Compressed representation overview */
+#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES 256
+
+/* Specification: 3.3. Alphabet sizes: insert-and-copy length */
+#define BROTLI_NUM_LITERAL_SYMBOLS 256
+#define BROTLI_NUM_COMMAND_SYMBOLS 704
+#define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26
+#define BROTLI_MAX_CONTEXT_MAP_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + \
+                                        BROTLI_CONTEXT_MAP_MAX_RLE)
+#define BROTLI_MAX_BLOCK_TYPE_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 2)
+
+/* Specification: 3.5. Complex prefix codes */
+#define BROTLI_REPEAT_PREVIOUS_CODE_LENGTH 16
+#define BROTLI_REPEAT_ZERO_CODE_LENGTH 17
+#define BROTLI_CODE_LENGTH_CODES (BROTLI_REPEAT_ZERO_CODE_LENGTH + 1)
+/* "code length of 8 is repeated" */
+#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
+
+/* "Large Window Brotli" */
+
+/**
+ * The theoretical maximum number of distance bits specified for large window
+ * brotli, for 64-bit encoders and decoders. Even when in practice 32-bit
+ * encoders and decoders only support up to 30 max distance bits, the value is
+ * set to 62 because it affects the large window brotli file format.
+ * Specifically, it affects the encoding of simple huffman tree for distances,
+ * see Specification RFC 7932 chapter 3.4.
+ */
+#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
+#define BROTLI_LARGE_MIN_WBITS 10
+/**
+ * The maximum supported large brotli window bits by the encoder and decoder.
+ * Large window brotli allows up to 62 bits, however the current encoder and
+ * decoder, designed for 32-bit integers, only support up to 30 bits maximum.
+ */
+#define BROTLI_LARGE_MAX_WBITS 30
+
+/* Specification: 4. Encoding of distances */
+#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
+/**
+ * Maximal number of "postfix" bits.
+ *
+ * Number of "postfix" bits is stored as 2 bits in meta-block header.
+ */
+#define BROTLI_MAX_NPOSTFIX 3
+#define BROTLI_MAX_NDIRECT 120
+#define BROTLI_MAX_DISTANCE_BITS 24U
+#define BROTLI_DISTANCE_ALPHABET_SIZE(NPOSTFIX, NDIRECT, MAXNBITS) ( \
+    BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) +                    \
+    ((MAXNBITS) << ((NPOSTFIX) + 1)))
+/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
+#define BROTLI_NUM_DISTANCE_SYMBOLS \
+    BROTLI_DISTANCE_ALPHABET_SIZE(  \
+        BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
+
+/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932
+   brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and
+   NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */
+#define BROTLI_MAX_DISTANCE 0x3FFFFFC
+
+/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit
+   allows safe distance calculation without overflows, given the distance
+   alphabet size is limited to corresponding size
+   (see kLargeWindowDistanceCodeLimits). */
+#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
+
+/* 7.1. Context modes and context ID lookup for literals */
+/* "context IDs for literals are in the range of 0..63" */
+#define BROTLI_LITERAL_CONTEXT_BITS 6
+
+/* 7.2. Context ID for distances */
+#define BROTLI_DISTANCE_CONTEXT_BITS 2
+
+/* 9.1. Format of the Stream Header */
+/* Number of slack bytes for window size. Don't confuse
+   with BROTLI_NUM_DISTANCE_SHORT_CODES. */
+#define BROTLI_WINDOW_GAP 16
+#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
+
+typedef struct BrotliDistanceCodeLimit {
+  uint32_t max_alphabet_size;
+  uint32_t max_distance;
+} BrotliDistanceCodeLimit;
+
+/* This function calculates maximal size of distance alphabet, such that the
+   distances greater than the given values can not be represented.
+
+   This limits are designed to support fast and safe 32-bit decoders.
+   "32-bit" means that signed integer values up to ((1 << 31) - 1) could be
+   safely expressed.
+
+   Brotli distance alphabet symbols do not represent consecutive distance
+   ranges. Each distance alphabet symbol (excluding direct distances and short
+   codes), represent interleaved (for NPOSTFIX > 0) range of distances.
+   A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved
+   range. Two consecutive groups require the same amount of "extra bits".
+
+   It is important that distance alphabet represents complete "groups".
+   To avoid complex logic on encoder side about interleaved ranges
+   it was decided to restrict both sides to complete distance code "groups".
+ */
+BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit(
+    uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) {
+  BrotliDistanceCodeLimit result;
+  /* Marking this function as unused, because not all files
+     including "constants.h" use it -> compiler warns about that. */
+  BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit);
+  if (max_distance <= ndirect) {
+    /* This case never happens / exists only for the sake of completeness. */
+    result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES;
+    result.max_distance = max_distance;
+    return result;
+  } else {
+    /* The first prohibited value. */
+    uint32_t forbidden_distance = max_distance + 1;
+    /* Subtract "directly" encoded region. */
+    uint32_t offset = forbidden_distance - ndirect - 1;
+    uint32_t ndistbits = 0;
+    uint32_t tmp;
+    uint32_t half;
+    uint32_t group;
+    /* Postfix for the last dcode in the group. */
+    uint32_t postfix = (1u << npostfix) - 1;
+    uint32_t extra;
+    uint32_t start;
+    /* Remove postfix and "head-start". */
+    offset = (offset >> npostfix) + 4;
+    /* Calculate the number of distance bits. */
+    tmp = offset / 2;
+    /* Poor-man's log2floor, to avoid extra dependencies. */
+    while (tmp != 0) {ndistbits++; tmp = tmp >> 1;}
+    /* One bit is covered with subrange addressing ("half"). */
+    ndistbits--;
+    /* Find subrange. */
+    half = (offset >> ndistbits) & 1;
+    /* Calculate the "group" part of dcode. */
+    group = ((ndistbits - 1) << 1) | half;
+    /* Calculated "group" covers the prohibited distance value. */
+    if (group == 0) {
+      /* This case is added for correctness; does not occur for limit > 128. */
+      result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES;
+      result.max_distance = ndirect;
+      return result;
+    }
+    /* Decrement "group", so it is the last permitted "group". */
+    group--;
+    /* After group was decremented, ndistbits and half must be recalculated. */
+    ndistbits = (group >> 1) + 1;
+    /* The last available distance in the subrange has all extra bits set. */
+    extra = (1u << ndistbits) - 1;
+    /* Calculate region start. NB: ndistbits >= 1. */
+    start = (1u << (ndistbits + 1)) - 4;
+    /* Move to subregion. */
+    start += (group & 1) << ndistbits;
+    /* Calculate the alphabet size. */
+    result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect +
+        BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
+    /* Calculate the maximal distance representable by alphabet. */
+    result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1;
+    return result;
+  }
+}
+
+#endif  /* BROTLI_COMMON_CONSTANTS_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/context.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/context.h
new file mode 100755
index 0000000000..24b3eb48f5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/context.h
@@ -0,0 +1,261 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Lookup table to map the previous two bytes to a context id.
+
+  There are four different context modeling modes defined here:
+    CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
+    CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
+    CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
+    CONTEXT_SIGNED: second-order context model tuned for signed integers.
+
+  If |p1| and |p2| are the previous two bytes, and |mode| is current context
+  mode, we calculate the context as:
+
+    context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].
+
+  For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters
+  (i.e. < 128), this will be equivalent to
+
+    context = 4 * context1(p1) + context2(p2),
+
+  where context1 is based on the previous byte in the following way:
+
+    0  : non-ASCII control
+    1  : \t, \n, \r
+    2  : space
+    3  : other punctuation
+    4  : " '
+    5  : %
+    6  : ( < [ {
+    7  : ) > ] }
+    8  : , ; :
+    9  : .
+    10 : =
+    11 : number
+    12 : upper-case vowel
+    13 : upper-case consonant
+    14 : lower-case vowel
+    15 : lower-case consonant
+
+  and context2 is based on the second last byte:
+
+    0 : control, space
+    1 : punctuation
+    2 : upper-case letter, number
+    3 : lower-case letter
+
+  If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+  stream it will be a continuation byte, value between 128 and 191), the
+  context is the same as if the second last byte was an ASCII control or space.
+
+  If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+  be a continuation byte and the context id is 2 or 3 depending on the LSB of
+  the last byte and to a lesser extent on the second last byte if it is ASCII.
+
+  If the last byte is a UTF8 continuation byte, the second last byte can be:
+    - continuation byte: the next byte is probably ASCII or lead byte (assuming
+      4-byte UTF8 characters are rare) and the context id is 0 or 1.
+    - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+    - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+
+  The possible value combinations of the previous two bytes, the range of
+  context ids and the type of the next byte is summarized in the table below:
+
+  |--------\-----------------------------------------------------------------|
+  |         \                         Last byte                              |
+  | Second   \---------------------------------------------------------------|
+  | last byte \    ASCII            |   cont. byte        |   lead byte      |
+  |            \   (0-127)          |   (128-191)         |   (192-)         |
+  |=============|===================|=====================|==================|
+  |  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
+  |  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
+  |-------------|-------------------|---------------------|------------------|
+  |  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
+  |  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
+  |-------------|-------------------|---------------------|------------------|
+  |  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
+  |  (192-207)  |                   |  context: 0 - 1     |                  |
+  |-------------|-------------------|---------------------|------------------|
+  |  lead byte  | not valid         |  next: cont.        |  not valid       |
+  |  (208-)     |                   |  context: 2 - 3     |                  |
+  |-------------|-------------------|---------------------|------------------|
+*/
+
+#ifndef BROTLI_COMMON_CONTEXT_H_
+#define BROTLI_COMMON_CONTEXT_H_
+
+#include <brotli/types.h>
+
+typedef enum ContextType {
+  CONTEXT_LSB6 = 0,
+  CONTEXT_MSB6 = 1,
+  CONTEXT_UTF8 = 2,
+  CONTEXT_SIGNED = 3
+} ContextType;
+
+/* Common context lookup table for all context modes. */
+static const uint8_t kContextLookup[2048] = {
+  /* CONTEXT_LSB6, last byte. */
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+
+  /* CONTEXT_LSB6, second last byte, */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+  /* CONTEXT_MSB6, last byte. */
+   0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+   4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
+   8,  8,  8,  8,  9,  9,  9,  9, 10, 10, 10, 10, 11, 11, 11, 11,
+  12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+  16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
+  20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
+  24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
+  28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
+  32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
+  36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
+  40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
+  44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
+  48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
+  52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
+  56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
+  60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
+
+  /* CONTEXT_MSB6, second last byte, */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+  /* CONTEXT_UTF8, last byte. */
+  /* ASCII range. */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+  44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+  12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+  52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+  12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+  60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0,
+  /* UTF8 continuation byte range. */
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  /* UTF8 lead byte range. */
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+
+  /* CONTEXT_UTF8 second last byte. */
+  /* ASCII range. */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+  1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+  /* UTF8 continuation byte range. */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  /* UTF8 lead byte range. */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+
+  /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
+   0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
+
+  /* CONTEXT_SIGNED, second last byte. */
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+};
+
+typedef const uint8_t* ContextLut;
+
+/* typeof(MODE) == ContextType; returns ContextLut */
+#define BROTLI_CONTEXT_LUT(MODE) (&kContextLookup[(MODE) << 9])
+
+/* typeof(LUT) == ContextLut */
+#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2])
+
+#endif  /* BROTLI_COMMON_CONTEXT_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.bin b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.bin
new file mode 100644
index 0000000000..a585c0e292
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.bin
@@ -0,0 +1,432 @@
+timedownlifeleftbackcodedatashowonlysitecityopenjustlikefreeworktextyearoverbodyloveformbookplaylivelinehelphomesidemorewordlongthemviewfindpagedaysfullheadtermeachareafromtruemarkableuponhighdatelandnewsevennextcasebothpostusedmadehandherewhatnameLinkblogsizebaseheldmakemainuser') +holdendswithNewsreadweresigntakehavegameseencallpathwellplusmenufilmpartjointhislistgoodneedwayswestjobsmindalsologorichuseslastteamarmyfoodkingwilleastwardbestfirePageknowaway.pngmovethanloadgiveselfnotemuchfeedmanyrockicononcelookhidediedHomerulehostajaxinfoclublawslesshalfsomesuchzone100%onescareTimeracebluefourweekfacehopegavehardlostwhenparkkeptpassshiproomHTMLplanTypedonesavekeepflaglinksoldfivetookratetownjumpthusdarkcardfilefearstaykillthatfallautoever.comtalkshopvotedeepmoderestturnbornbandfellroseurl(skinrolecomeactsagesmeetgold.jpgitemvaryfeltthensenddropViewcopy1.0"</a>stopelseliestourpack.gifpastcss?graymean&gt;rideshotlatesaidroadvar feeljohnrickportfast'UA-dead</b>poorbilltypeU.S.woodmust2px;Inforankwidewantwalllead[0];paulwavesure$('#waitmassarmsgoesgainlangpaid!-- lockunitrootwalkfirmwifexml"songtest20pxkindrowstoolfontmailsafestarmapscorerainflowbabyspansays4px;6px;artsfootrealwikiheatsteptriporg/lakeweaktoldFormcastfansbankveryrunsjulytask1px;goalgrewslowedgeid="sets5px;.js?40pxif (soonseatnonetubezerosentreedfactintogiftharm18pxcamehillboldzoomvoideasyringfillpeakinitcost3px;jacktagsbitsrolleditknewnear<!--growJSONdutyNamesaleyou lotspainjazzcoldeyesfishwww.risktabsprev10pxrise25pxBlueding300,ballfordearnwildbox.fairlackverspairjunetechif(!pickevil$("#warmlorddoespull,000ideadrawhugespotfundburnhrefcellkeystickhourlossfuel12pxsuitdealRSS"agedgreyGET"easeaimsgirlaids8px;navygridtips#999warsladycars); }php?helltallwhomzh:�*/
+ 100hall.
+
+A7px;pushchat0px;crew*/</hash75pxflatrare && tellcampontolaidmissskiptentfinemalegetsplot400,
+
+coolfeet.php<br>ericmostguidbelldeschairmathatom/img&#82luckcent000;tinygonehtmlselldrugFREEnodenick?id=losenullvastwindRSS wearrelybeensamedukenasacapewishgulfT23:hitsslotgatekickblurthey15px''););">msiewinsbirdsortbetaseekT18:ordstreemall60pxfarm’sboys[0].');"POSTbearkids);}}marytend(UK)quadzh:�-siz----prop');liftT19:viceandydebt>RSSpoolneckblowT16:doorevalT17:letsfailoralpollnovacolsgene —softrometillross<h3>pourfadepink<tr>mini)|!(minezh:�barshear00);milk -->ironfreddiskwentsoilputs/js/holyT22:ISBNT20:adamsees<h2>json', 'contT21: RSSloopasiamoon</p>soulLINEfortcartT14:<h1>80px!--<9px;T04:mike:46ZniceinchYorkricezh:�'));puremageparatonebond:37Z_of_']);000,zh:�tankyardbowlbush:56ZJava30px
+|}
+%C3%:34ZjeffEXPIcashvisagolfsnowzh:�quer.csssickmeatmin.binddellhirepicsrent:36ZHTTP-201fotowolfEND xbox:54ZBODYdick;
+}
+exit:35Zvarsbeat'});diet999;anne}}</[i].Langkm²wiretoysaddssealalex;
+	}echonine.org005)tonyjewssandlegsroof000) 200winegeardogsbootgarycutstyletemption.xmlcockgang$('.50pxPh.Dmiscalanloandeskmileryanunixdisc);}
+dustclip).
+
+70px-200DVDs7]><tapedemoi++)wageeurophiloptsholeFAQsasin-26TlabspetsURL bulkcook;}
+HEAD[0])abbrjuan(198leshtwin</i>sonyguysfuckpipe|-
+!002)ndow[1];[];
+Log salt
+		bangtrimbath){
+00px
+});ko:�feesad>s:// [];tollplug(){
+{
+ .js'200pdualboat.JPG);
+}quot);
+
+');
+
+}201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037201320122011201020092008200720062005200420032002200120001999199819971996199519941993199219911990198919881987198619851984198319821981198019791978197719761975197419731972197119701969196819671966196519641963196219611960195919581957195619551954195319521951195010001024139400009999comomásesteestaperotodohacecadaañobiendíaasívidacasootroforosolootracualdijosidograntipotemadebealgoquéestonadatrespococasabajotodasinoaguapuesunosantediceluisellamayozonaamorpisoobraclicellodioshoracasiзанаомрарутанепоотизнодотожеонихНаеебымыВысовывоНообПолиниРФНеМытыОнимдаЗаДаНуОбтеИзейнуммТыужفيأنمامعكلأورديافىهولملكاولهبسالإنهيأيقدهلثمبهلوليبلايبكشيامأمنتبيلنحبهممشوشfirstvideolightworldmediawhitecloseblackrightsmallbooksplacemusicfieldorderpointvalueleveltableboardhousegroupworksyearsstatetodaywaterstartstyledeathpowerphonenighterrorinputabouttermstitletoolseventlocaltimeslargewordsgamesshortspacefocusclearmodelblockguideradiosharewomenagainmoneyimagenamesyounglineslatercolorgreenfront&amp;watchforcepricerulesbeginaftervisitissueareasbelowindextotalhourslabelprintpressbuiltlinksspeedstudytradefoundsenseundershownformsrangeaddedstillmovedtakenaboveflashfixedoftenotherviewschecklegalriveritemsquickshapehumanexistgoingmoviethirdbasicpeacestagewidthloginideaswrotepagesusersdrivestorebreaksouthvoicesitesmonthwherebuildwhichearthforumthreesportpartyClicklowerlivesclasslayerentrystoryusagesoundcourtyour birthpopuptypesapplyImagebeinguppernoteseveryshowsmeansextramatchtrackknownearlybegansuperpapernorthlearngivennamedendedTermspartsGroupbrandusingwomanfalsereadyaudiotakeswhile.com/livedcasesdailychildgreatjudgethoseunitsneverbroadcoastcoverapplefilescyclesceneplansclickwritequeenpieceemailframeolderphotolimitcachecivilscaleenterthemetheretouchboundroyalaskedwholesincestock namefaithheartemptyofferscopeownedmightalbumthinkbloodarraymajortrustcanonunioncountvalidstoneStyleLoginhappyoccurleft:freshquitefilmsgradeneedsurbanfightbasishoverauto;route.htmlmixedfinalYour slidetopicbrownalonedrawnsplitreachRightdatesmarchquotegoodsLinksdoubtasyncthumballowchiefyouthnovel10px;serveuntilhandsCheckSpacequeryjamesequaltwice0,000Startpanelsongsroundeightshiftworthpostsleadsweeksavoidthesemilesplanesmartalphaplantmarksratesplaysclaimsalestextsstarswrong</h3>thing.org/multiheardPowerstandtokensolid(thisbringshipsstafftriedcallsfullyfactsagentThis //-->adminegyptEvent15px;Emailtrue"crossspentblogsbox">notedleavechinasizesguest</h4>robotheavytrue,sevengrandcrimesignsawaredancephase><!--en_US&#39;200px_namelatinenjoyajax.ationsmithU.S. holdspeterindianav">chainscorecomesdoingpriorShare1990sromanlistsjapanfallstrialowneragree</h2>abusealertopera"-//WcardshillsteamsPhototruthclean.php?saintmetallouismeantproofbriefrow">genretrucklooksValueFrame.net/-->
+<try {
+var makescostsplainadultquesttrainlaborhelpscausemagicmotortheir250pxleaststepsCountcouldglasssidesfundshotelawardmouthmovesparisgivesdutchtexasfruitnull,||[];top">
+<!--POST"ocean<br/>floorspeakdepth sizebankscatchchart20px;aligndealswould50px;url="parksmouseMost ...</amongbrainbody none;basedcarrydraftreferpage_home.meterdelaydreamprovejoint</tr>drugs<!-- aprilidealallenexactforthcodeslogicView seemsblankports (200saved_linkgoalsgrantgreekhomesringsrated30px;whoseparse();" Blocklinuxjonespixel');">);if(-leftdavidhorseFocusraiseboxesTrackement</em>bar">.src=toweralt="cablehenry24px;setupitalysharpminortastewantsthis.resetwheelgirls/css/100%;clubsstuffbiblevotes 1000korea});
+bandsqueue= {};80px;cking{
+		aheadclockirishlike ratiostatsForm"yahoo)[0];Aboutfinds</h1>debugtasksURL =cells})();12px;primetellsturns0x600.jpg"spainbeachtaxesmicroangel--></giftssteve-linkbody.});
+	mount (199FAQ</rogerfrankClass28px;feeds<h1><scotttests22px;drink) || lewisshall#039; for lovedwaste00px;ja:�simon<fontreplymeetsuntercheaptightBrand) != dressclipsroomsonkeymobilmain.Name platefunnytreescom/"1.jpgwmodeparamSTARTleft idden, 201);
+}
+form.viruschairtransworstPagesitionpatch<!--
+o-cacfirmstours,000 asiani++){adobe')[0]id=10both;menu .2.mi.png"kevincoachChildbruce2.jpgURL)+.jpg|suitesliceharry120" sweettr>
+name=diegopage swiss-->
+
+#fff;">Log.com"treatsheet) && 14px;sleepntentfiledja:�id="cName"worseshots-box-delta
+&lt;bears:48Z<data-rural</a> spendbakershops= "";php">ction13px;brianhellosize=o=%2F joinmaybe<img img">, fjsimg" ")[0]MTopBType"newlyDanskczechtrailknows</h5>faq">zh-cn10);
+-1");type=bluestrulydavis.js';>
+<!steel you h2>
+form jesus100% menu.
+	
+walesrisksumentddingb-likteachgif" vegasdanskeestishqipsuomisobredesdeentretodospuedeañosestátienehastaotrospartedondenuevohacerformamismomejormundoaquídíassóloayudafechatodastantomenosdatosotrassitiomuchoahoralugarmayorestoshorastenerantesfotosestaspaísnuevasaludforosmedioquienmesespoderchileserávecesdecirjoséestarventagrupohechoellostengoamigocosasnivelgentemismaairesjuliotemashaciafavorjuniolibrepuntobuenoautorabrilbuenatextomarzosaberlistaluegocómoenerojuegoperúhaberestoynuncamujervalorfueralibrogustaigualvotoscasosguíapuedosomosavisousteddebennochebuscafaltaeurosseriedichocursoclavecasasleónplazolargoobrasvistaapoyojuntotratavistocrearcampohemoscincocargopisosordenhacenáreadiscopedrocercapuedapapelmenorútilclarojorgecalleponertardenadiemarcasigueellassiglocochemotosmadreclaserestoniñoquedapasarbancohijosviajepabloéstevienereinodejarfondocanalnorteletracausatomarmanoslunesautosvillavendopesartipostengamarcollevapadreunidovamoszonasambosbandamariaabusomuchasubirriojavivirgradochicaallíjovendichaestantalessalirsuelopesosfinesllamabuscoéstalleganegroplazahumorpagarjuntadobleislasbolsabañohablaluchaÁreadicenjugarnotasvalleallácargadolorabajoestégustomentemariofirmacostofichaplatahogarartesleyesaquelmuseobasespocosmitadcielochicomiedoganarsantoetapadebesplayaredessietecortecoreadudasdeseoviejodeseaaguas&quot;domaincommonstatuseventsmastersystemactionbannerremovescrollupdateglobalmediumfilternumberchangeresultpublicscreenchoosenormaltravelissuessourcetargetspringmodulemobileswitchphotosborderregionitselfsocialactivecolumnrecordfollowtitle>eitherlengthfamilyfriendlayoutauthorcreatereviewsummerserverplayedplayerexpandpolicyformatdoublepointsseriespersonlivingdesignmonthsforcesuniqueweightpeopleenergynaturesearchfigurehavingcustomoffsetletterwindowsubmitrendergroupsuploadhealthmethodvideosschoolfutureshadowdebatevaluesObjectothersrightsleaguechromesimplenoticesharedendingseasonreportonlinesquarebuttonimagesenablemovinglatestwinterFranceperiodstrongrepeatLondondetailformeddemandsecurepassedtoggleplacesdevicestaticcitiesstreamyellowattackstreetflighthiddeninfo">openedusefulvalleycausesleadersecretseconddamagesportsexceptratingsignedthingseffectfieldsstatesofficevisualeditorvolumeReportmuseummoviesparentaccessmostlymother" id="marketgroundchancesurveybeforesymbolmomentspeechmotioninsidematterCenterobjectexistsmiddleEuropegrowthlegacymannerenoughcareeransweroriginportalclientselectrandomclosedtopicscomingfatheroptionsimplyraisedescapechosenchurchdefinereasoncorneroutputmemoryiframepolicemodelsNumberduringoffersstyleskilledlistedcalledsilvermargindeletebetterbrowselimitsGlobalsinglewidgetcenterbudgetnowrapcreditclaimsenginesafetychoicespirit-stylespreadmakingneededrussiapleaseextentScriptbrokenallowschargedividefactormember-basedtheoryconfigaroundworkedhelpedChurchimpactshouldalwayslogo" bottomlist">){var prefixorangeHeader.push(couplegardenbridgelaunchReviewtakingvisionlittledatingButtonbeautythemesforgotSearchanchoralmostloadedChangereturnstringreloadMobileincomesupplySourceordersviewed&nbsp;courseAbout island<html cookiename="amazonmodernadvicein</a>: The dialoghousesBEGIN MexicostartscentreheightaddingIslandassetsEmpireSchooleffortdirectnearlymanualSelect.
+
+Onejoinedmenu">PhilipawardshandleimportOfficeregardskillsnationSportsdegreeweekly (e.g.behinddoctorloggedunited</b></beginsplantsassistartistissued300px|canadaagencyschemeremainBrazilsamplelogo">beyond-scaleacceptservedmarineFootercamera</h1>
+_form"leavesstress" />
+.gif" onloadloaderOxfordsistersurvivlistenfemaleDesignsize="appealtext">levelsthankshigherforcedanimalanyoneAfricaagreedrecentPeople<br />wonderpricesturned|| {};main">inlinesundaywrap">failedcensusminutebeaconquotes150px|estateremoteemail"linkedright;signalformal1.htmlsignupprincefloat:.png" forum.AccesspaperssoundsextendHeightsliderUTF-8"&amp; Before. WithstudioownersmanageprofitjQueryannualparamsboughtfamousgooglelongeri++) {israelsayingdecidehome">headerensurebranchpiecesblock;statedtop"><racingresize--&gt;pacitysexualbureau.jpg" 10,000obtaintitlesamount, Inc.comedymenu" lyricstoday.indeedcounty_logo.FamilylookedMarketlse ifPlayerturkey);var forestgivingerrorsDomain}else{insertBlog</footerlogin.fasteragents<body 10px 0pragmafridayjuniordollarplacedcoversplugin5,000 page">boston.test(avatartested_countforumsschemaindex,filledsharesreaderalert(appearSubmitline">body">
+* TheThoughseeingjerseyNews</verifyexpertinjurywidth=CookieSTART across_imagethreadnativepocketbox">
+System DavidcancertablesprovedApril reallydriveritem">more">boardscolorscampusfirst || [];media.guitarfinishwidth:showedOther .php" assumelayerswilsonstoresreliefswedenCustomeasily your String
+
+Whiltaylorclear:resortfrenchthough") + "<body>buyingbrandsMembername">oppingsector5px;">vspacepostermajor coffeemartinmaturehappen</nav>kansaslink">Images=falsewhile hspace0&amp; 
+
+In  powerPolski-colorjordanBottomStart -count2.htmlnews">01.jpgOnline-rightmillerseniorISBN 00,000 guidesvalue)ectionrepair.xml"  rights.html-blockregExp:hoverwithinvirginphones</tr>using 
+	var >');
+	</td>
+</tr>
+bahasabrasilgalegomagyarpolskisrpskiردو中文简体繁體信息中国我们一个公司管理论坛可以服务时间个人产品自己企业查看工作联系没有网站所有评论中心文章用户首页作者技术问题相关下载搜索使用软件在线主题资料视频回复注册网络收藏内容推荐市场消息空间发布什么好友生活图片发展如果手机新闻最新方式北京提供关于更多这个系统知道游戏广告其他发表安全第一会员进行点击版权电子世界设计免费教育加入活动他们商品博客现在上海如何已经留言详细社区登录本站需要价格支持国际链接国家建设朋友阅读法律位置经济选择这样当前分类排行因为交易最后音乐不能通过行业科技可能设备合作大家社会研究专业全部项目这里还是开始情况电脑文件品牌帮助文化资源大学学习地址浏览投资工程要求怎么时候功能主要目前资讯城市方法电影招聘声明任何健康数据美国汽车介绍但是交流生产所以电话显示一些单位人员分析地图旅游工具学生系列网友帖子密码频道控制地区基本全国网上重要第二喜欢进入友情这些考试发现培训以上政府成为环境香港同时娱乐发送一定开发作品标准欢迎解决地方一下以及责任或者客户代表积分女人数码销售出现离线应用列表不同编辑统计查询不要有关机构很多播放组织政策直接能力来源時間看到热门关键专区非常英语百度希望美女比较知识规定建议部门意见精彩日本提高发言方面基金处理权限影片银行还有分享物品经营添加专家这种话题起来业务公告记录简介质量男人影响引用报告部分快速咨询时尚注意申请学校应该历史只是返回购买名称为了成功说明供应孩子专题程序一般會員只有其它保护而且今天窗口动态状态特别认为必须更新小说我們作为媒体包括那么一样国内是否根据电视学院具有过程由于人才出来不过正在明星故事关系标题商务输入一直基础教学了解建筑结果全球通知计划对于艺术相册发生真的建立等级类型经验实现制作来自标签以下原创无法其中個人一切指南关闭集团第三关注因此照片深圳商业广州日期高级最近综合表示专辑行为交通评价觉得精华家庭完成感觉安装得到邮件制度食品虽然转载报价记者方案行政人民用品东西提出酒店然后付款热点以前完全发帖设置领导工业医院看看经典原因平台各种增加材料新增之后职业效果今年论文我国告诉版主修改参与打印快乐机械观点存在精神获得利用继续你们这么模式语言能够雅虎操作风格一起科学体育短信条件治疗运动产业会议导航先生联盟可是問題结构作用调查資料自动负责农业访问实施接受讨论那个反馈加强女性范围服務休闲今日客服觀看参加的话一点保证图书有效测试移动才能决定股票不断需求不得办法之间采用营销投诉目标爱情摄影有些複製文学机会数字装修购物农村全面精品其实事情水平提示上市谢谢普通教师上传类别歌曲拥有创新配件只要时代資訊达到人生订阅老师展示心理贴子網站主題自然级别简单改革那些来说打开代码删除证券节目重点次數多少规划资金找到以后大全主页最佳回答天下保障现代检查投票小时沒有正常甚至代理目录公开复制金融幸福版本形成准备行情回到思想怎样协议认证最好产生按照服装广东动漫采购新手组图面板参考政治容易天地努力人们升级速度人物调整流行造成文字韩国贸易开展相關表现影视如此美容大小报道条款心情许多法规家居书店连接立即举报技巧奥运登入以来理论事件自由中华办公妈妈真正不错全文合同价值别人监督具体世纪团队创业承担增长有人保持商家维修台湾左右股份答案实际电信经理生命宣传任务正式特色下来协会只能当然重新內容指导运行日志賣家超过土地浙江支付推出站长杭州执行制造之一推广现场描述变化传统歌手保险课程医疗经过过去之前收入年度杂志美丽最高登陆未来加工免责教程版块身体重庆出售成本形式土豆出價东方邮箱南京求职取得职位相信页面分钟网页确定图例网址积极错误目的宝贝机关风险授权病毒宠物除了評論疾病及时求购站点儿童每天中央认识每个天津字体台灣维护本页个性官方常见相机战略应当律师方便校园股市房屋栏目员工导致突然道具本网结合档案劳动另外美元引起改变第四会计說明隐私宝宝规范消费共同忘记体系带来名字發表开放加盟受到二手大量成人数量共享区域女孩原则所在结束通信超级配置当时优秀性感房产遊戲出口提交就业保健程度参数事业整个山东情感特殊分類搜尋属于门户财务声音及其财经坚持干部成立利益考虑成都包装用戶比赛文明招商完整真是眼睛伙伴威望领域卫生优惠論壇公共良好充分符合附件特点不可英文资产根本明显密碼公众民族更加享受同学启动适合原来问答本文美食绿色稳定终于生物供求搜狐力量严重永远写真有限竞争对象费用不好绝对十分促进点评影音优势不少欣赏并且有点方向全新信用设施形象资格突破随着重大于是毕业智能化工完美商城统一出版打造產品概况用于保留因素中國存储贴图最愛长期口价理财基地安排武汉里面创建天空首先完善驱动下面不再诚信意义阳光英国漂亮军事玩家群众农民即可名稱家具动画想到注明小学性能考研硬件观看清楚搞笑首頁黄金适用江苏真实主管阶段註冊翻译权利做好似乎通讯施工狀態也许环保培养概念大型机票理解匿名cuandoenviarmadridbuscariniciotiempoporquecuentaestadopuedenjuegoscontraestánnombretienenperfilmaneraamigosciudadcentroaunquepuedesdentroprimerpreciosegúnbuenosvolverpuntossemanahabíaagostonuevosunidoscarlosequiponiñosmuchosalgunacorreoimagenpartirarribamaríahombreempleoverdadcambiomuchasfueronpasadolíneaparecenuevascursosestabaquierolibroscuantoaccesomiguelvarioscuatrotienesgruposseráneuropamediosfrenteacercademásofertacochesmodeloitalialetrasalgúncompracualesexistecuerposiendoprensallegarviajesdineromurciapodrápuestodiariopuebloquieremanuelpropiocrisisciertoseguromuertefuentecerrargrandeefectopartesmedidapropiaofrecetierrae-mailvariasformasfuturoobjetoseguirriesgonormasmismosúnicocaminositiosrazóndebidopruebatoledoteníajesúsesperococinaorigentiendacientocádizhablarseríalatinafuerzaestiloguerraentraréxitolópezagendavídeoevitarpaginametrosjavierpadresfácilcabezaáreassalidaenvíojapónabusosbienestextosllevarpuedanfuertecomúnclaseshumanotenidobilbaounidadestáseditarcreadoдлячтокакилиэтовсеегопритакещеужеКакбезбылониВсеподЭтотомчемнетлетразонагдемнеДляПринаснихтемктогодвоттамСШАмаяЧтовасвамемуТакдванамэтиэтуВамтехпротутнаддняВоттринейВаснимсамтотрубОнимирнееОООлицэтаОнанемдоммойдвеоносудकेहैकीसेकाकोऔरपरनेएककिभीइसकरतोहोआपहीयहयातकथाjagranआजजोअबदोगईजागएहमइनवहयेथेथीघरजबदीकईजीवेनईनएहरउसमेकमवोलेसबमईदेओरआमबसभरबनचलमनआगसीलीعلىإلىهذاآخرعددالىهذهصورغيركانولابينعرضذلكهنايومقالعليانالكنحتىقبلوحةاخرفقطعبدركنإذاكمااحدإلافيهبعضكيفبحثومنوهوأناجدالهاسلمعندليسعبرصلىمنذبهاأنهمثلكنتالاحيثمصرشرححولوفياذالكلمرةانتالفأبوخاصأنتانهاليعضووقدابنخيربنتلكمشاءوهيابوقصصومارقمأحدنحنعدمرأياحةكتبدونيجبمنهتحتجهةسنةيتمكرةغزةنفسبيتللهلناتلكقلبلماعنهأولشيءنورأمافيكبكلذاترتببأنهمسانكبيعفقدحسنلهمشعرأهلشهرقطرطلبprofileservicedefaulthimselfdetailscontentsupportstartedmessagesuccessfashion<title>countryaccountcreatedstoriesresultsrunningprocesswritingobjectsvisiblewelcomearticleunknownnetworkcompanydynamicbrowserprivacyproblemServicerespectdisplayrequestreservewebsitehistoryfriendsoptionsworkingversionmillionchannelwindow.addressvisitedweathercorrectproductedirectforwardyou canremovedsubjectcontrolarchivecurrentreadinglibrarylimitedmanagerfurthersummarymachineminutesprivatecontextprogramsocietynumberswrittenenabledtriggersourcesloadingelementpartnerfinallyperfectmeaningsystemskeepingculture&quot;,journalprojectsurfaces&quot;expiresreviewsbalanceEnglishContentthroughPlease opinioncontactaverageprimaryvillageSpanishgallerydeclinemeetingmissionpopularqualitymeasuregeneralspeciessessionsectionwriterscounterinitialreportsfiguresmembersholdingdisputeearlierexpressdigitalpictureAnothermarriedtrafficleadingchangedcentralvictoryimages/reasonsstudiesfeaturelistingmust beschoolsVersionusuallyepisodeplayinggrowingobviousoverlaypresentactions</ul>
+wrapperalreadycertainrealitystorageanotherdesktopofferedpatternunusualDigitalcapitalWebsitefailureconnectreducedAndroiddecadesregular &amp; animalsreleaseAutomatgettingmethodsnothingPopularcaptionletterscapturesciencelicensechangesEngland=1&amp;History = new CentralupdatedSpecialNetworkrequirecommentwarningCollegetoolbarremainsbecauseelectedDeutschfinanceworkersquicklybetweenexactlysettingdiseaseSocietyweaponsexhibit&lt;!--Controlclassescoveredoutlineattacksdevices(windowpurposetitle="Mobile killingshowingItaliandroppedheavilyeffects-1']);
+confirmCurrentadvancesharingopeningdrawingbillionorderedGermanyrelated</form>includewhetherdefinedSciencecatalogArticlebuttonslargestuniformjourneysidebarChicagoholidayGeneralpassage,&quot;animatefeelingarrivedpassingnaturalroughly.
+
+The but notdensityBritainChineselack oftributeIreland" data-factorsreceivethat isLibraryhusbandin factaffairsCharlesradicalbroughtfindinglanding:lang="return leadersplannedpremiumpackageAmericaEdition]&quot;Messageneed tovalue="complexlookingstationbelievesmaller-mobilerecordswant tokind ofFirefoxyou aresimilarstudiedmaximumheadingrapidlyclimatekingdomemergedamountsfoundedpioneerformuladynastyhow to SupportrevenueeconomyResultsbrothersoldierlargelycalling.&quot;AccountEdward segmentRobert effortsPacificlearnedup withheight:we haveAngelesnations_searchappliedacquiremassivegranted: falsetreatedbiggestbenefitdrivingStudiesminimumperhapsmorningsellingis usedreversevariant role="missingachievepromotestudentsomeoneextremerestorebottom:evolvedall thesitemapenglishway to  AugustsymbolsCompanymattersmusicalagainstserving})();
+paymenttroubleconceptcompareparentsplayersregionsmonitor ''The winningexploreadaptedGalleryproduceabilityenhancecareers). The collectSearch ancientexistedfooter handlerprintedconsoleEasternexportswindowsChannelillegalneutralsuggest_headersigning.html">settledwesterncausing-webkitclaimedJusticechaptervictimsThomas mozillapromisepartieseditionoutside:false,hundredOlympic_buttonauthorsreachedchronicdemandssecondsprotectadoptedprepareneithergreatlygreateroverallimprovecommandspecialsearch.worshipfundingthoughthighestinsteadutilityquarterCulturetestingclearlyexposedBrowserliberal} catchProjectexamplehide();FloridaanswersallowedEmperordefenseseriousfreedomSeveral-buttonFurtherout of != nulltrainedDenmarkvoid(0)/all.jspreventRequestStephen
+
+When observe</h2>
+Modern provide" alt="borders.
+
+For 
+
+Many artistspoweredperformfictiontype ofmedicalticketsopposedCouncilwitnessjusticeGeorge Belgium...</a>twitternotablywaitingwarfare Other rankingphrasesmentionsurvivescholar</p>
+ Countryignoredloss ofjust asGeorgiastrange<head><stopped1']);
+islandsnotableborder:list ofcarried100,000</h3>
+ severalbecomesselect wedding00.htmlmonarchoff theteacherhighly biologylife ofor evenrise of&raquo;plusonehunting(thoughDouglasjoiningcirclesFor theAncientVietnamvehiclesuch ascrystalvalue =Windowsenjoyeda smallassumed<a id="foreign All rihow theDisplayretiredhoweverhidden;battlesseekingcabinetwas notlook atconductget theJanuaryhappensturninga:hoverOnline French lackingtypicalextractenemieseven ifgeneratdecidedare not/searchbeliefs-image:locatedstatic.login">convertviolententeredfirst">circuitFinlandchemistshe was10px;">as suchdivided</span>will beline ofa greatmystery/index.fallingdue to railwaycollegemonsterdescentit withnuclearJewish protestBritishflowerspredictreformsbutton who waslectureinstantsuicidegenericperiodsmarketsSocial fishingcombinegraphicwinners<br /><by the NaturalPrivacycookiesoutcomeresolveSwedishbrieflyPersianso muchCenturydepictscolumnshousingscriptsnext tobearingmappingrevisedjQuery(-width:title">tooltipSectiondesignsTurkishyounger.match(})();
+
+burningoperatedegreessource=Richardcloselyplasticentries</tr>
+color:#ul id="possessrollingphysicsfailingexecutecontestlink toDefault<br />
+: true,chartertourismclassicproceedexplain</h1>
+online.?xml vehelpingdiamonduse theairlineend -->).attr(readershosting#ffffffrealizeVincentsignals src="/ProductdespitediversetellingPublic held inJoseph theatreaffects<style>a largedoesn'tlater, ElementfaviconcreatorHungaryAirportsee theso thatMichaelSystemsPrograms, and  width=e&quot;tradingleft">
+personsGolden Affairsgrammarformingdestroyidea ofcase ofoldest this is.src = cartoonregistrCommonsMuslimsWhat isin manymarkingrevealsIndeed,equally/show_aoutdoorescape(Austriageneticsystem,In the sittingHe alsoIslandsAcademy
+		<!--Daniel bindingblock">imposedutilizeAbraham(except{width:putting).html(|| [];
+DATA[ *kitchenmountedactual dialectmainly _blank'installexpertsif(typeIt also&copy; ">Termsborn inOptionseasterntalkingconcerngained ongoingjustifycriticsfactoryits ownassaultinvitedlastinghis ownhref="/" rel="developconcertdiagramdollarsclusterphp?id=alcohol);})();using a><span>vesselsrevivalAddressamateurandroidallegedillnesswalkingcentersqualifymatchesunifiedextinctDefensedied in
+	<!-- customslinkingLittle Book ofeveningmin.js?are thekontakttoday's.html" target=wearingAll Rig;
+})();raising Also, crucialabout">declare-->
+<scfirefoxas muchappliesindex, s, but type = 
+
+<!--towardsRecordsPrivateForeignPremierchoicesVirtualreturnsCommentPoweredinline;povertychamberLiving volumesAnthonylogin" RelatedEconomyreachescuttinggravitylife inChapter-shadowNotable</td>
+ returnstadiumwidgetsvaryingtravelsheld bywho arework infacultyangularwho hadairporttown of
+
+Some 'click'chargeskeywordit willcity of(this);Andrew unique checkedor more300px; return;rsion="pluginswithin herselfStationFederalventurepublishsent totensionactresscome tofingersDuke ofpeople,exploitwhat isharmonya major":"httpin his menu">
+monthlyofficercouncilgainingeven inSummarydate ofloyaltyfitnessand wasemperorsupremeSecond hearingRussianlongestAlbertalateralset of small">.appenddo withfederalbank ofbeneathDespiteCapitalgrounds), and percentit fromclosingcontainInsteadfifteenas well.yahoo.respondfighterobscurereflectorganic= Math.editingonline paddinga wholeonerroryear ofend of barrierwhen itheader home ofresumedrenamedstrong>heatingretainscloudfrway of March 1knowingin partBetweenlessonsclosestvirtuallinks">crossedEND -->famous awardedLicenseHealth fairly wealthyminimalAfricancompetelabel">singingfarmersBrasil)discussreplaceGregoryfont copursuedappearsmake uproundedboth ofblockedsaw theofficescoloursif(docuwhen heenforcepush(fuAugust UTF-8">Fantasyin mostinjuredUsuallyfarmingclosureobject defenceuse of Medical<body>
+evidentbe usedkeyCodesixteenIslamic#000000entire widely active (typeofone cancolor =speakerextendsPhysicsterrain<tbody>funeralviewingmiddle cricketprophetshifteddoctorsRussell targetcompactalgebrasocial-bulk ofman and</td>
+ he left).val()false);logicalbankinghome tonaming Arizonacredits);
+});
+founderin turnCollinsbefore But thechargedTitle">CaptainspelledgoddessTag -->Adding:but wasRecent patientback in=false&Lincolnwe knowCounterJudaismscript altered']);
+  has theunclearEvent',both innot all
+
+<!-- placinghard to centersort ofclientsstreetsBernardassertstend tofantasydown inharbourFreedomjewelry/about..searchlegendsis mademodern only ononly toimage" linear painterand notrarely acronymdelivershorter00&amp;as manywidth="/* <![Ctitle =of the lowest picked escapeduses ofpeoples PublicMatthewtacticsdamagedway forlaws ofeasy to windowstrong  simple}catch(seventhinfoboxwent topaintedcitizenI don'tretreat. Some ww.");
+bombingmailto:made in. Many carries||{};wiwork ofsynonymdefeatsfavoredopticalpageTraunless sendingleft"><comScorAll thejQuery.touristClassicfalse" Wilhelmsuburbsgenuinebishops.split(global followsbody ofnominalContactsecularleft tochiefly-hidden-banner</li>
+
+. When in bothdismissExplorealways via thespañolwelfareruling arrangecaptainhis sonrule ofhe tookitself,=0&amp;(calledsamplesto makecom/pagMartin Kennedyacceptsfull ofhandledBesides//--></able totargetsessencehim to its by common.mineralto takeways tos.org/ladvisedpenaltysimple:if theyLettersa shortHerbertstrikes groups.lengthflightsoverlapslowly lesser social </p>
+		it intoranked rate oful>
+  attemptpair ofmake itKontaktAntoniohaving ratings activestreamstrapped").css(hostilelead tolittle groups,Picture-->
+
+ rows=" objectinverse<footerCustomV><\/scrsolvingChamberslaverywoundedwhereas!= 'undfor allpartly -right:Arabianbacked centuryunit ofmobile-Europe,is homerisk ofdesiredClintoncost ofage of become none ofp&quot;Middle ead')[0Criticsstudios>&copy;group">assemblmaking pressedwidget.ps:" ? rebuiltby someFormer editorsdelayedCanonichad thepushingclass="but arepartialBabylonbottom carrierCommandits useAs withcoursesa thirddenotesalso inHouston20px;">accuseddouble goal ofFamous ).bind(priests Onlinein Julyst + "gconsultdecimalhelpfulrevivedis veryr'+'iptlosing femalesis alsostringsdays ofarrivalfuture <objectforcingString(" />
+		here isencoded.  The balloondone by/commonbgcolorlaw of Indianaavoidedbut the2px 3pxjquery.after apolicy.men andfooter-= true;for usescreen.Indian image =family,http:// &nbsp;driverseternalsame asnoticedviewers})();
+ is moreseasonsformer the newis justconsent Searchwas thewhy theshippedbr><br>width: height=made ofcuisineis thata very Admiral fixed;normal MissionPress, ontariocharsettry to invaded="true"spacingis mosta more totallyfall of});
+  immensetime inset outsatisfyto finddown tolot of Playersin Junequantumnot thetime todistantFinnishsrc = (single help ofGerman law andlabeledforestscookingspace">header-well asStanleybridges/globalCroatia About [0];
+  it, andgroupedbeing a){throwhe madelighterethicalFFFFFF"bottom"like a employslive inas seenprintermost ofub-linkrejectsand useimage">succeedfeedingNuclearinformato helpWomen'sNeitherMexicanprotein<table by manyhealthylawsuitdevised.push({sellerssimply Through.cookie Image(older">us.js"> Since universlarger open to!-- endlies in']);
+  marketwho is ("DOMComanagedone fortypeof Kingdomprofitsproposeto showcenter;made itdressedwere inmixtureprecisearisingsrc = 'make a securedBaptistvoting 
+		var March 2grew upClimate.removeskilledway the</head>face ofacting right">to workreduceshas haderectedshow();action=book ofan area== "htt<header
+<html>conformfacing cookie.rely onhosted .customhe wentbut forspread Family a meansout theforums.footage">MobilClements" id="as highintense--><!--female is seenimpliedset thea stateand hisfastestbesidesbutton_bounded"><img Infoboxevents,a youngand areNative cheaperTimeoutand hasengineswon the(mostlyright: find a -bottomPrince area ofmore ofsearch_nature,legallyperiod,land ofor withinducedprovingmissilelocallyAgainstthe wayk&quot;px;">
+pushed abandonnumeralCertainIn thismore inor somename isand, incrownedISBN 0-createsOctobermay notcenter late inDefenceenactedwish tobroadlycoolingonload=it. TherecoverMembersheight assumes<html>
+people.in one =windowfooter_a good reklamaothers,to this_cookiepanel">London,definescrushedbaptismcoastalstatus title" move tolost inbetter impliesrivalryservers SystemPerhapses and contendflowinglasted rise inGenesisview ofrising seem tobut in backinghe willgiven agiving cities.flow of Later all butHighwayonly bysign ofhe doesdiffersbattery&amp;lasinglesthreatsintegertake onrefusedcalled =US&ampSee thenativesby thissystem.head of:hover,lesbiansurnameand allcommon/header__paramsHarvard/pixel.removalso longrole ofjointlyskyscraUnicodebr />
+AtlantanucleusCounty,purely count">easily build aonclicka givenpointerh&quot;events else {
+ditionsnow the, with man whoorg/Webone andcavalryHe diedseattle00,000 {windowhave toif(windand itssolely m&quot;renewedDetroitamongsteither them inSenatorUs</a><King ofFrancis-produche usedart andhim andused byscoringat hometo haverelatesibilityfactionBuffalolink"><what hefree toCity ofcome insectorscountedone daynervoussquare };if(goin whatimg" alis onlysearch/tuesdaylooselySolomonsexual - <a hrmedium"DO NOT France,with a war andsecond take a >
+
+
+market.highwaydone inctivity"last">obligedrise to"undefimade to Early praisedin its for hisathleteJupiterYahoo! termed so manyreally s. The a woman?value=direct right" bicycleacing="day andstatingRather,higher Office are nowtimes, when a pay foron this-link">;borderaround annual the Newput the.com" takin toa brief(in thegroups.; widthenzymessimple in late{returntherapya pointbanninginks">
+();" rea place\u003Caabout atr>
+		ccount gives a<SCRIPTRailwaythemes/toolboxById("xhumans,watchesin some if (wicoming formats Under but hashanded made bythan infear ofdenoted/iframeleft involtagein eacha&quot;base ofIn manyundergoregimesaction </p>
+<ustomVa;&gt;</importsor thatmostly &amp;re size="</a></ha classpassiveHost = WhetherfertileVarious=[];(fucameras/></td>acts asIn some>
+
+<!organis <br />Beijingcatalàdeutscheuropeueuskaragaeilgesvenskaespañamensajeusuariotrabajoméxicopáginasiempresistemaoctubreduranteañadirempresamomentonuestroprimeratravésgraciasnuestraprocesoestadoscalidadpersonanúmeroacuerdomúsicamiembroofertasalgunospaísesejemploderechoademásprivadoagregarenlacesposiblehotelessevillaprimeroúltimoeventosarchivoculturamujeresentradaanuncioembargomercadograndesestudiomejoresfebrerodiseñoturismocódigoportadaespaciofamiliaantoniopermiteguardaralgunaspreciosalguiensentidovisitastítuloconocersegundoconsejofranciaminutossegundatenemosefectosmálagasesiónrevistagranadacompraringresogarcíaacciónecuadorquienesinclusodeberámateriahombresmuestrapodríamañanaúltimaestamosoficialtambienningúnsaludospodemosmejorarpositionbusinesshomepagesecuritylanguagestandardcampaignfeaturescategoryexternalchildrenreservedresearchexchangefavoritetemplatemilitaryindustryservicesmaterialproductsz-index:commentssoftwarecompletecalendarplatformarticlesrequiredmovementquestionbuildingpoliticspossiblereligionphysicalfeedbackregisterpicturesdisabledprotocolaudiencesettingsactivityelementslearninganythingabstractprogressoverviewmagazineeconomictrainingpressurevarious <strong>propertyshoppingtogetheradvancedbehaviordownloadfeaturedfootballselectedLanguagedistanceremembertrackingpasswordmodifiedstudentsdirectlyfightingnortherndatabasefestivalbreakinglocationinternetdropdownpracticeevidencefunctionmarriageresponseproblemsnegativeprogramsanalysisreleasedbanner">purchasepoliciesregionalcreativeargumentbookmarkreferrerchemicaldivisioncallbackseparateprojectsconflicthardwareinterestdeliverymountainobtained= false;for(var acceptedcapacitycomputeridentityaircraftemployedproposeddomesticincludesprovidedhospitalverticalcollapseapproachpartnerslogo"><adaughterauthor" culturalfamilies/images/assemblypowerfulteachingfinisheddistrictcriticalcgi-bin/purposesrequireselectionbecomingprovidesacademicexerciseactuallymedicineconstantaccidentMagazinedocumentstartingbottom">observed: &quot;extendedpreviousSoftwarecustomerdecisionstrengthdetailedslightlyplanningtextareacurrencyeveryonestraighttransferpositiveproducedheritageshippingabsolutereceivedrelevantbutton" violenceanywherebenefitslaunchedrecentlyalliancefollowedmultiplebulletinincludedoccurredinternal$(this).republic><tr><tdcongressrecordedultimatesolution<ul id="discoverHome</a>websitesnetworksalthoughentirelymemorialmessagescontinueactive">somewhatvictoriaWestern  title="LocationcontractvisitorsDownloadwithout right">
+measureswidth = variableinvolvedvirginianormallyhappenedaccountsstandingnationalRegisterpreparedcontrolsaccuratebirthdaystrategyofficialgraphicscriminalpossiblyconsumerPersonalspeakingvalidateachieved.jpg" />machines</h2>
+  keywordsfriendlybrotherscombinedoriginalcomposedexpectedadequatepakistanfollow" valuable</label>relativebringingincreasegovernorplugins/List of Header">" name=" (&quot;graduate</head>
+commercemalaysiadirectormaintain;height:schedulechangingback to catholicpatternscolor: #greatestsuppliesreliable</ul>
+		<select citizensclothingwatching<li id="specificcarryingsentence<center>contrastthinkingcatch(e)southernMichael merchantcarouselpadding:interior.split("lizationOctober ){returnimproved--&gt;
+
+coveragechairman.png" />subjectsRichard whateverprobablyrecoverybaseballjudgmentconnect..css" /> websitereporteddefault"/></a>
+electricscotlandcreationquantity. ISBN 0did not instance-search-" lang="speakersComputercontainsarchivesministerreactiondiscountItalianocriteriastrongly: 'http:'script'coveringofferingappearedBritish identifyFacebooknumerousvehiclesconcernsAmericanhandlingdiv id="William provider_contentaccuracysection andersonflexibleCategorylawrence<script>layout="approved maximumheader"></table>Serviceshamiltoncurrent canadianchannels/themes//articleoptionalportugalvalue=""intervalwirelessentitledagenciesSearch" measuredthousandspending&hellip;new Date" size="pageNamemiddle" " /></a>hidden">sequencepersonaloverflowopinionsillinoislinks">
+	<title>versionssaturdayterminalitempropengineersectionsdesignerproposal="false"Españolreleasessubmit" er&quot;additionsymptomsorientedresourceright"><pleasurestationshistory.leaving  border=contentscenter">.
+
+Some directedsuitablebulgaria.show();designedGeneral conceptsExampleswilliamsOriginal"><span>search">operatorrequestsa &quot;allowingDocumentrevision. 
+
+The yourselfContact michiganEnglish columbiapriorityprintingdrinkingfacilityreturnedContent officersRussian generate-8859-1"indicatefamiliar qualitymargin:0 contentviewportcontacts-title">portable.length eligibleinvolvesatlanticonload="default.suppliedpaymentsglossary
+
+After guidance</td><tdencodingmiddle">came to displaysscottishjonathanmajoritywidgets.clinicalthailandteachers<head>
+	affectedsupportspointer;toString</small>oklahomawill be investor0" alt="holidaysResourcelicensed (which . After considervisitingexplorerprimary search" android"quickly meetingsestimate;return ;color:# height=approval, &quot; checked.min.js"magnetic></a></hforecast. While thursdaydvertise&eacute;hasClassevaluateorderingexistingpatients Online coloradoOptions"campbell<!-- end</span><<br />
+_popups|sciences,&quot; quality Windows assignedheight: <b classle&quot; value=" Companyexamples<iframe believespresentsmarshallpart of properly).
+
+The taxonomymuch of </span>
+" data-srtuguêsscrollTo project<head>
+attorneyemphasissponsorsfancyboxworld's wildlifechecked=sessionsprogrammpx;font- Projectjournalsbelievedvacationthompsonlightingand the special border=0checking</tbody><button Completeclearfix
+<head>
+article <sectionfindingsrole in popular  Octoberwebsite exposureused to  changesoperatedclickingenteringcommandsinformed numbers  </div>creatingonSubmitmarylandcollegesanalyticlistingscontact.loggedInadvisorysiblingscontent"s&quot;)s. This packagescheckboxsuggestspregnanttomorrowspacing=icon.pngjapanesecodebasebutton">gamblingsuch as , while </span> missourisportingtop:1px .</span>tensionswidth="2lazyloadnovemberused in height="cript">
+&nbsp;</<tr><td height:2/productcountry include footer" &lt;!-- title"></jquery.</form>
+(简体)(繁體)hrvatskiitalianoromânătürkçeاردوtambiénnoticiasmensajespersonasderechosnacionalserviciocontactousuariosprogramagobiernoempresasanunciosvalenciacolombiadespuésdeportesproyectoproductopúbliconosotroshistoriapresentemillonesmediantepreguntaanteriorrecursosproblemasantiagonuestrosopiniónimprimirmientrasaméricavendedorsociedadrespectorealizarregistropalabrasinterésentoncesespecialmiembrosrealidadcórdobazaragozapáginassocialesbloqueargestiónalquilersistemascienciascompletoversióncompletaestudiospúblicaobjetivoalicantebuscadorcantidadentradasaccionesarchivossuperiormayoríaalemaniafunciónúltimoshaciendoaquellosediciónfernandoambientefacebooknuestrasclientesprocesosbastantepresentareportarcongresopublicarcomerciocontratojóvenesdistritotécnicaconjuntoenergíatrabajarasturiasrecienteutilizarboletínsalvadorcorrectatrabajosprimerosnegocioslibertaddetallespantallapróximoalmeríaanimalesquiénescorazónsecciónbuscandoopcionesexteriorconceptotodavíagaleríaescribirmedicinalicenciaconsultaaspectoscríticadólaresjusticiadeberánperíodonecesitamantenerpequeñorecibidatribunaltenerifecancióncanariasdescargadiversosmallorcarequieretécnicodeberíaviviendafinanzasadelantefuncionaconsejosdifícilciudadesantiguasavanzadatérminounidadessánchezcampañasoftonicrevistascontienesectoresmomentosfacultadcréditodiversassupuestofactoressegundospequeñaгодаеслиестьбылобытьэтомЕслитогоменявсехэтойдажебылигодуденьэтотбыласебяодинсебенадосайтфотонегосвоисвойигрытожевсемсвоюлишьэтихпокаднейдомамиралиботемухотядвухсетилюдиделомиретебясвоевидечегоэтимсчеттемыценысталведьтемеводытебевышенамитипатомуправлицаоднагодызнаюмогудругвсейидеткиноодноделаделесрокиюнявесьЕстьразанашиاللهالتيجميعخاصةالذيعليهجديدالآنالردتحكمصفحةكانتاللييكونشبكةفيهابناتحواءأكثرخلالالحبدليلدروساضغطتكونهناكساحةناديالطبعليكشكرايمكنمنهاشركةرئيسنشيطماذاالفنشبابتعبررحمةكافةيقولمركزكلمةأحمدقلبييعنيصورةطريقشاركجوالأخرىمعناابحثعروضبشكلمسجلبنانخالدكتابكليةبدونأيضايوجدفريقكتبتأفضلمطبخاكثرباركافضلاحلىنفسهأيامردودأنهاديناالانمعرضتعلمداخلممكن                      	
+
+	����        ����                  ��      ��                resourcescountriesquestionsequipmentcommunityavailablehighlightDTD/xhtmlmarketingknowledgesomethingcontainerdirectionsubscribeadvertisecharacter" value="</select>Australia" class="situationauthorityfollowingprimarilyoperationchallengedevelopedanonymousfunction functionscompaniesstructureagreement" title="potentialeducationargumentssecondarycopyrightlanguagesexclusivecondition</form>
+statementattentionBiography} else {
+solutionswhen the Analyticstemplatesdangeroussatellitedocumentspublisherimportantprototypeinfluence&raquo;</effectivegenerallytransformbeautifultransportorganizedpublishedprominentuntil thethumbnailNational .focus();over the migrationannouncedfooter">
+exceptionless thanexpensiveformationframeworkterritoryndicationcurrentlyclassNamecriticismtraditionelsewhereAlexanderappointedmaterialsbroadcastmentionedaffiliate</option>treatmentdifferent/default.Presidentonclick="biographyotherwisepermanentFrançaisHollywoodexpansionstandards</style>
+reductionDecember preferredCambridgeopponentsBusiness confusion>
+<title>presentedexplaineddoes not worldwideinterfacepositionsnewspaper</table>
+mountainslike the essentialfinancialselectionaction="/abandonedEducationparseInt(stabilityunable to</title>
+relationsNote thatefficientperformedtwo yearsSince thethereforewrapper">alternateincreasedBattle ofperceivedtrying tonecessaryportrayedelectionsElizabeth</iframe>discoveryinsurances.length;legendaryGeographycandidatecorporatesometimesservices.inherited</strong>CommunityreligiouslocationsCommitteebuildingsthe worldno longerbeginningreferencecannot befrequencytypicallyinto the relative;recordingpresidentinitiallytechniquethe otherit can beexistenceunderlinethis timetelephoneitemscopepracticesadvantage);return For otherprovidingdemocracyboth the extensivesufferingsupportedcomputers functionpracticalsaid thatit may beEnglish</from the scheduleddownloads</label>
+suspectedmargin: 0spiritual</head>
+
+microsoftgraduallydiscussedhe becameexecutivejquery.jshouseholdconfirmedpurchasedliterallydestroyedup to thevariationremainingit is notcenturiesJapanese among thecompletedalgorithminterestsrebellionundefinedencourageresizableinvolvingsensitiveuniversalprovision(althoughfeaturingconducted), which continued-header">February numerous overflow:componentfragmentsexcellentcolspan="technicalnear the Advanced source ofexpressedHong Kong Facebookmultiple mechanismelevationoffensive</form>
+	sponsoreddocument.or &quot;there arethose whomovementsprocessesdifficultsubmittedrecommendconvincedpromoting" width=".replace(classicalcoalitionhis firstdecisionsassistantindicatedevolution-wrapper"enough toalong thedelivered-->
+<!--American protectedNovember </style><furnitureInternet  onblur="suspendedrecipientbased on Moreover,abolishedcollectedwere madeemotionalemergencynarrativeadvocatespx;bordercommitteddir="ltr"employeesresearch. selectedsuccessorcustomersdisplayedSeptemberaddClass(Facebook suggestedand lateroperatingelaborateSometimesInstitutecertainlyinstalledfollowersJerusalemthey havecomputinggeneratedprovincesguaranteearbitraryrecognizewanted topx;width:theory ofbehaviourWhile theestimatedbegan to it becamemagnitudemust havemore thanDirectoryextensionsecretarynaturallyoccurringvariablesgiven theplatform.</label><failed tocompoundskinds of societiesalongside --&gt;
+
+southwestthe rightradiationmay have unescape(spoken in" href="/programmeonly the come fromdirectoryburied ina similarthey were</font></Norwegianspecifiedproducingpassenger(new DatetemporaryfictionalAfter theequationsdownload.regularlydeveloperabove thelinked tophenomenaperiod oftooltip">substanceautomaticaspect ofAmong theconnectedestimatesAir Forcesystem ofobjectiveimmediatemaking itpaintingsconqueredare stillproceduregrowth ofheaded byEuropean divisionsmoleculesfranchiseintentionattractedchildhoodalso useddedicatedsingaporedegree offather ofconflicts</a></p>
+came fromwere usednote thatreceivingExecutiveeven moreaccess tocommanderPoliticalmusiciansdeliciousprisonersadvent ofUTF-8" /><![CDATA[">ContactSouthern bgcolor="series of. It was in Europepermittedvalidate.appearingofficialsseriously-languageinitiatedextendinglong-terminflationsuch thatgetCookiemarked by</button>implementbut it isincreasesdown the requiringdependent-->
+<!-- interviewWith the copies ofconsensuswas builtVenezuela(formerlythe statepersonnelstrategicfavour ofinventionWikipediacontinentvirtuallywhich wasprincipleComplete identicalshow thatprimitiveaway frommolecularpreciselydissolvedUnder theversion=">&nbsp;</It is the This is will haveorganismssome timeFriedrichwas firstthe only fact thatform id="precedingTechnicalphysicistoccurs innavigatorsection">span id="sought tobelow thesurviving}</style>his deathas in thecaused bypartiallyexisting using thewas givena list oflevels ofnotion ofOfficial dismissedscientistresemblesduplicateexplosiverecoveredall othergalleries{padding:people ofregion ofaddressesassociateimg alt="in modernshould bemethod ofreportingtimestampneeded tothe Greatregardingseemed toviewed asimpact onidea thatthe Worldheight ofexpandingThese arecurrent">carefullymaintainscharge ofClassicaladdressedpredictedownership<div id="right">
+residenceleave thecontent">are often  })();
+probably Professor-button" respondedsays thathad to beplaced inHungarianstatus ofserves asUniversalexecutionaggregatefor whichinfectionagreed tohowever, popular">placed onconstructelectoralsymbol ofincludingreturn toarchitectChristianprevious living ineasier toprofessor
+&lt;!-- effect ofanalyticswas takenwhere thetook overbelief inAfrikaansas far aspreventedwork witha special<fieldsetChristmasRetrieved
+
+In the back intonortheastmagazines><strong>committeegoverninggroups ofstored inestablisha generalits firsttheir ownpopulatedan objectCaribbeanallow thedistrictswisconsinlocation.; width: inhabitedSocialistJanuary 1</footer>similarlychoice ofthe same specific business The first.length; desire todeal withsince theuserAgentconceivedindex.phpas &quot;engage inrecently,few yearswere also
+<head>
+<edited byare knowncities inaccesskeycondemnedalso haveservices,family ofSchool ofconvertednature of languageministers</object>there is a popularsequencesadvocatedThey wereany otherlocation=enter themuch morereflectedwas namedoriginal a typicalwhen theyengineerscould notresidentswednesdaythe third productsJanuary 2what theya certainreactionsprocessorafter histhe last contained"></div>
+</a></td>depend onsearch">
+pieces ofcompetingReferencetennesseewhich has version=</span> <</header>gives thehistorianvalue="">padding:0view thattogether,the most was foundsubset ofattack onchildren,points ofpersonal position:allegedlyClevelandwas laterand afterare givenwas stillscrollingdesign ofmakes themuch lessAmericans.
+
+After , but theMuseum oflouisiana(from theminnesotaparticlesa processDominicanvolume ofreturningdefensive00px|righmade frommouseover" style="states of(which iscontinuesFranciscobuilding without awith somewho woulda form ofa part ofbefore itknown as  Serviceslocation and oftenmeasuringand it ispaperbackvalues of
+<title>= window.determineer&quot; played byand early</center>from thisthe threepower andof &quot;innerHTML<a href="y:inline;Church ofthe eventvery highofficial -height: content="/cgi-bin/to createafrikaansesperantofrançaislatviešulietuviųČeštinačeštinaไทย日本語简体字繁體字한국어为什么计算机笔记本討論區服务器互联网房地产俱乐部出版社排行榜部落格进一步支付宝验证码委员会数据库消费者办公室讨论区深圳市播放器北京市大学生越来越管理员信息网serviciosartículoargentinabarcelonacualquierpublicadoproductospolíticarespuestawikipediasiguientebúsquedacomunidadseguridadprincipalpreguntascontenidorespondervenezuelaproblemasdiciembrerelaciónnoviembresimilaresproyectosprogramasinstitutoactividadencuentraeconomíaimágenescontactardescargarnecesarioatenciónteléfonocomisióncancionescapacidadencontraranálisisfavoritostérminosprovinciaetiquetaselementosfuncionesresultadocarácterpropiedadprincipionecesidadmunicipalcreacióndescargaspresenciacomercialopinionesejercicioeditorialsalamancagonzálezdocumentopelícularecientesgeneralestarragonaprácticanovedadespropuestapacientestécnicasobjetivoscontactosमेंलिएहैंगयासाथएवंरहेकोईकुछरहाबादकहासभीहुएरहीमैंदिनबातdiplodocsसमयरूपनामपताफिरऔसततरहलोगहुआबारदेशहुईखेलयदिकामवेबतीनबीचमौतसाललेखजॉबमददतथानहीशहरअलगकभीनगरपासरातकिएउसेगयीहूँआगेटीमखोजकारअभीगयेतुमवोटदेंअगरऐसेमेललगाहालऊपरचारऐसादेरजिसदिलबंदबनाहूंलाखजीतबटनमिलइसेआनेनयाकुललॉगभागरेलजगहरामलगेपेजहाथइसीसहीकलाठीकहाँदूरतहतसातयादआयापाककौनशामदेखयहीरायखुदलगीcategoriesexperience</title>
+Copyright javascriptconditionseverything<p class="technologybackground<a class="management&copy; 201javaScriptcharactersbreadcrumbthemselveshorizontalgovernmentCaliforniaactivitiesdiscoveredNavigationtransitionconnectionnavigationappearance</title><mcheckbox" techniquesprotectionapparentlyas well asunt', 'UA-resolutionoperationstelevisiontranslatedWashingtonnavigator. = window.impression&lt;br&gt;literaturepopulationbgcolor="#especially content="productionnewsletterpropertiesdefinitionleadershipTechnologyParliamentcomparisonul class=".indexOf("conclusiondiscussioncomponentsbiologicalRevolution_containerunderstoodnoscript><permissioneach otheratmosphere onfocus="<form id="processingthis.valuegenerationConferencesubsequentwell-knownvariationsreputationphenomenondisciplinelogo.png" (document,boundariesexpressionsettlementBackgroundout of theenterprise("https:" unescape("password" democratic<a href="/wrapper">
+membershiplinguisticpx;paddingphilosophyassistanceuniversityfacilitiesrecognizedpreferenceif (typeofmaintainedvocabularyhypothesis.submit();&amp;nbsp;annotationbehind theFoundationpublisher"assumptionintroducedcorruptionscientistsexplicitlyinstead ofdimensions onClick="considereddepartmentoccupationsoon afterinvestmentpronouncedidentifiedexperimentManagementgeographic" height="link rel=".replace(/depressionconferencepunishmenteliminatedresistanceadaptationoppositionwell knownsupplementdeterminedh1 class="0px;marginmechanicalstatisticscelebratedGovernment
+
+During tdevelopersartificialequivalentoriginatedCommissionattachment<span id="there wereNederlandsbeyond theregisteredjournalistfrequentlyall of thelang="en" </style>
+absolute; supportingextremely mainstream</strong> popularityemployment</table>
+ colspan="</form>
+  conversionabout the </p></div>integrated" lang="enPortuguesesubstituteindividualimpossiblemultimediaalmost allpx solid #apart fromsubject toin Englishcriticizedexcept forguidelinesoriginallyremarkablethe secondh2 class="<a title="(includingparametersprohibited= "http://dictionaryperceptionrevolutionfoundationpx;height:successfulsupportersmillenniumhis fatherthe &quot;no-repeat;commercialindustrialencouragedamount of unofficialefficiencyReferencescoordinatedisclaimerexpeditiondevelopingcalculatedsimplifiedlegitimatesubstring(0" class="completelyillustratefive yearsinstrumentPublishing1" class="psychologyconfidencenumber of absence offocused onjoined thestructurespreviously></iframe>once againbut ratherimmigrantsof course,a group ofLiteratureUnlike the</a>&nbsp;
+function it was theConventionautomobileProtestantaggressiveafter the Similarly," /></div>collection
+functionvisibilitythe use ofvolunteersattractionunder the threatened*<![CDATA[importancein generalthe latter</form>
+</.indexOf('i = 0; i <differencedevoted totraditionssearch forultimatelytournamentattributesso-called }
+</style>evaluationemphasizedaccessible</section>successionalong withMeanwhile,industries</a><br />has becomeaspects ofTelevisionsufficientbasketballboth sidescontinuingan article<img alt="adventureshis mothermanchesterprinciplesparticularcommentaryeffects ofdecided to"><strong>publishersJournal ofdifficultyfacilitateacceptablestyle.css"	function innovation>Copyrightsituationswould havebusinessesDictionarystatementsoften usedpersistentin Januarycomprising</title>
+	diplomaticcontainingperformingextensionsmay not beconcept of onclick="It is alsofinancial making theLuxembourgadditionalare calledengaged in"script");but it waselectroniconsubmit="
+<!-- End electricalofficiallysuggestiontop of theunlike theAustralianOriginallyreferences
+</head>
+recognisedinitializelimited toAlexandriaretirementAdventuresfour years
+
+&lt;!-- increasingdecorationh3 class="origins ofobligationregulationclassified(function(advantagesbeing the historians<base hrefrepeatedlywilling tocomparabledesignatednominationfunctionalinside therevelationend of thes for the authorizedrefused totake placeautonomouscompromisepolitical restauranttwo of theFebruary 2quality ofswfobject.understandnearly allwritten byinterviews" width="1withdrawalfloat:leftis usuallycandidatesnewspapersmysteriousDepartmentbest knownparliamentsuppressedconvenientremembereddifferent systematichas led topropagandacontrolledinfluencesceremonialproclaimedProtectionli class="Scientificclass="no-trademarksmore than widespreadLiberationtook placeday of theas long asimprisonedAdditional
+<head>
+<mLaboratoryNovember 2exceptionsIndustrialvariety offloat: lefDuring theassessmenthave been deals withStatisticsoccurrence/ul></div>clearfix">the publicmany yearswhich wereover time,synonymouscontent">
+presumablyhis familyuserAgent.unexpectedincluding challengeda minorityundefined"belongs totaken fromin Octoberposition: said to bereligious Federation rowspan="only a fewmeant thatled to the-->
+<div <fieldset>Archbishop class="nobeing usedapproachesprivilegesnoscript>
+results inmay be theEaster eggmechanismsreasonablePopulationCollectionselected">noscript>/index.phparrival of-jssdk'));managed toincompletecasualtiescompletionChristiansSeptember arithmeticproceduresmight haveProductionit appearsPhilosophyfriendshipleading togiving thetoward theguaranteeddocumentedcolor:#000video gamecommissionreflectingchange theassociatedsans-serifonkeypress; padding:He was theunderlyingtypically , and the srcElementsuccessivesince the should be networkingaccountinguse of thelower thanshows that</span>
+		complaintscontinuousquantitiesastronomerhe did notdue to itsapplied toan averageefforts tothe futureattempt toTherefore,capabilityRepublicanwas formedElectronickilometerschallengespublishingthe formerindigenousdirectionssubsidiaryconspiracydetails ofand in theaffordablesubstancesreason forconventionitemtype="absolutelysupposedlyremained aattractivetravellingseparatelyfocuses onelementaryapplicablefound thatstylesheetmanuscriptstands for no-repeat(sometimesCommercialin Americaundertakenquarter ofan examplepersonallyindex.php?</button>
+percentagebest-knowncreating a" dir="ltrLieutenant
+<div id="they wouldability ofmade up ofnoted thatclear thatargue thatto anotherchildren'spurpose offormulatedbased uponthe regionsubject ofpassengerspossession.
+
+In the Before theafterwardscurrently across thescientificcommunity.capitalismin Germanyright-wingthe systemSociety ofpoliticiandirection:went on toremoval of New York apartmentsindicationduring theunless thehistoricalhad been adefinitiveingredientattendanceCenter forprominencereadyStatestrategiesbut in theas part ofconstituteclaim thatlaboratorycompatiblefailure of, such as began withusing the to providefeature offrom which/" class="geologicalseveral ofdeliberateimportant holds thating&quot; valign=topthe Germanoutside ofnegotiatedhis careerseparationid="searchwas calledthe fourthrecreationother thanpreventionwhile the education,connectingaccuratelywere builtwas killedagreementsmuch more Due to thewidth: 100some otherKingdom ofthe entirefamous forto connectobjectivesthe Frenchpeople andfeatured">is said tostructuralreferendummost oftena separate->
+<div id Official worldwide.aria-labelthe planetand it wasd" value="looking atbeneficialare in themonitoringreportedlythe modernworking onallowed towhere the innovative</a></div>soundtracksearchFormtend to beinput id="opening ofrestrictedadopted byaddressingtheologianmethods ofvariant ofChristian very largeautomotiveby far therange frompursuit offollow thebrought toin Englandagree thataccused ofcomes frompreventingdiv style=his or hertremendousfreedom ofconcerning0 1em 1em;Basketball/style.cssan earliereven after/" title=".com/indextaking thepittsburghcontent"><script>(fturned outhaving the</span>
+ occasionalbecause itstarted tophysically></div>
+  created byCurrently, bgcolor="tabindex="disastrousAnalytics also has a><div id="</style>
+<called forsinger and.src = "//violationsthis pointconstantlyis locatedrecordingsd from thenederlandsportuguêsעבריתفارسیdesarrollocomentarioeducaciónseptiembreregistradodirecciónubicaciónpublicidadrespuestasresultadosimportantereservadosartículosdiferentessiguientesrepúblicasituaciónministerioprivacidaddirectorioformaciónpoblaciónpresidentecontenidosaccesoriostechnoratipersonalescategoríaespecialesdisponibleactualidadreferenciavalladolidbibliotecarelacionescalendariopolíticasanterioresdocumentosnaturalezamaterialesdiferenciaeconómicatransporterodríguezparticiparencuentrandiscusiónestructurafundaciónfrecuentespermanentetotalmenteможнобудетможетвремятакжечтобыболееоченьэтогокогдапослевсегосайтечерезмогутсайтажизнимеждубудутПоискздесьвидеосвязинужносвоейлюдейпорномногодетейсвоихправатакойместоимеетжизньоднойлучшепередчастичастьработновыхправособойпотомменеечисленовыеуслугоколоназадтакоетогдапочтиПослетакиеновыйстоиттакихсразуСанктфорумКогдакнигислованашейнайтисвоимсвязьлюбойчастосредиКромеФорумрынкесталипоисктысячмесяццентртрудасамыхрынкаНовыйчасовместафильммартастранместетекстнашихминутимениимеютномергородсамомэтомуконцесвоемкакойАрхивمنتدىإرسالرسالةالعامكتبهابرامجاليومالصورجديدةالعضوإضافةالقسمالعابتحميلملفاتملتقىتعديلالشعرأخبارتطويرعليكمإرفاقطلباتاللغةترتيبالناسالشيخمنتديالعربالقصصافلامعليهاتحديثاللهمالعملمكتبةيمكنكالطفلفيديوإدارةتاريخالصحةتسجيلالوقتعندمامدينةتصميمأرشيفالذينعربيةبوابةألعابالسفرمشاكلتعالىالأولالسنةجامعةالصحفالدينكلماتالخاصالملفأعضاءكتابةالخيررسائلالقلبالأدبمقاطعمراسلمنطقةالكتبالرجلاشتركالقدميعطيكsByTagName(.jpg" alt="1px solid #.gif" alt="transparentinformationapplication" onclick="establishedadvertising.png" alt="environmentperformanceappropriate&amp;mdash;immediately</strong></rather thantemperaturedevelopmentcompetitionplaceholdervisibility:copyright">0" height="even thoughreplacementdestinationCorporation<ul class="AssociationindividualsperspectivesetTimeout(url(http://mathematicsmargin-top:eventually description) no-repeatcollections.JPG|thumb|participate/head><bodyfloat:left;<li class="hundreds of
+
+However, compositionclear:both;cooperationwithin the label for="border-top:New Zealandrecommendedphotographyinteresting&lt;sup&gt;controversyNetherlandsalternativemaxlength="switzerlandDevelopmentessentially
+
+Although </textarea>thunderbirdrepresented&amp;ndash;speculationcommunitieslegislationelectronics
+	<div id="illustratedengineeringterritoriesauthoritiesdistributed6" height="sans-serif;capable of disappearedinteractivelooking forit would beAfghanistanwas createdMath.floor(surroundingcan also beobservationmaintenanceencountered<h2 class="more recentit has beeninvasion of).getTime()fundamentalDespite the"><div id="inspirationexaminationpreparationexplanation<input id="</a></span>versions ofinstrumentsbefore the  = 'http://Descriptionrelatively .substring(each of theexperimentsinfluentialintegrationmany peopledue to the combinationdo not haveMiddle East<noscript><copyright" perhaps theinstitutionin Decemberarrangementmost famouspersonalitycreation oflimitationsexclusivelysovereignty-content">
+<td class="undergroundparallel todoctrine ofoccupied byterminologyRenaissancea number ofsupport forexplorationrecognitionpredecessor<img src="/<h1 class="publicationmay also bespecialized</fieldset>progressivemillions ofstates thatenforcementaround the one another.parentNodeagricultureAlternativeresearcherstowards theMost of themany other (especially<td width=";width:100%independent<h3 class=" onchange=").addClass(interactionOne of the daughter ofaccessoriesbranches of
+<div id="the largestdeclarationregulationsInformationtranslationdocumentaryin order to">
+<head>
+<" height="1across the orientation);</script>implementedcan be seenthere was ademonstratecontainer">connectionsthe Britishwas written!important;px; margin-followed byability to complicatedduring the immigrationalso called<h4 class="distinctionreplaced bygovernmentslocation ofin Novemberwhether the</p>
+</div>acquisitioncalled the persecutiondesignation{font-size:appeared ininvestigateexperiencedmost likelywidely useddiscussionspresence of (document.extensivelyIt has beenit does notcontrary toinhabitantsimprovementscholarshipconsumptioninstructionfor exampleone or morepx; paddingthe currenta series ofare usuallyrole in thepreviously derivativesevidence ofexperiencescolorschemestated thatcertificate</a></div>
+ selected="high schoolresponse tocomfortableadoption ofthree yearsthe countryin Februaryso that thepeople who provided by<param nameaffected byin terms ofappointmentISO-8859-1"was born inhistorical regarded asmeasurementis based on and other : function(significantcelebrationtransmitted/js/jquery.is known astheoretical tabindex="it could be<noscript>
+having been
+<head>
+< &quot;The compilationhe had beenproduced byphilosopherconstructedintended toamong othercompared toto say thatEngineeringa differentreferred todifferencesbelief thatphotographsidentifyingHistory of Republic ofnecessarilyprobabilitytechnicallyleaving thespectacularfraction ofelectricityhead of therestaurantspartnershipemphasis onmost recentshare with saying thatfilled withdesigned toit is often"></iframe>as follows:merged withthrough thecommercial pointed outopportunityview of therequirementdivision ofprogramminghe receivedsetInterval"></span></in New Yorkadditional compression
+
+<div id="incorporate;</script><attachEventbecame the " target="_carried outSome of thescience andthe time ofContainer">maintainingChristopherMuch of thewritings of" height="2size of theversion of mixture of between theExamples ofeducationalcompetitive onsubmit="director ofdistinctive/DTD XHTML relating totendency toprovince ofwhich woulddespite thescientific legislature.innerHTML allegationsAgriculturewas used inapproach tointelligentyears later,sans-serifdeterminingPerformanceappearances, which is foundationsabbreviatedhigher thans from the individual composed ofsupposed toclaims thatattributionfont-size:1elements ofHistorical his brotherat the timeanniversarygoverned byrelated to ultimately innovationsit is stillcan only bedefinitionstoGMTStringA number ofimg class="Eventually,was changedoccurred inneighboringdistinguishwhen he wasintroducingterrestrialMany of theargues thatan Americanconquest ofwidespread were killedscreen and In order toexpected todescendantsare locatedlegislativegenerations backgroundmost peopleyears afterthere is nothe highestfrequently they do notargued thatshowed thatpredominanttheologicalby the timeconsideringshort-lived</span></a>can be usedvery littleone of the had alreadyinterpretedcommunicatefeatures ofgovernment,</noscript>entered the" height="3Independentpopulationslarge-scale. Although used in thedestructionpossibilitystarting intwo or moreexpressionssubordinatelarger thanhistory and</option>
+Continentaleliminatingwill not bepractice ofin front ofsite of theensure thatto create amississippipotentiallyoutstandingbetter thanwhat is nowsituated inmeta name="TraditionalsuggestionsTranslationthe form ofatmosphericideologicalenterprisescalculatingeast of theremnants ofpluginspage/index.php?remained intransformedHe was alsowas alreadystatisticalin favor ofMinistry ofmovement offormulationis required<link rel="This is the <a href="/popularizedinvolved inare used toand severalmade by theseems to belikely thatPalestiniannamed afterit had beenmost commonto refer tobut this isconsecutivetemporarilyIn general,conventionstakes placesubdivisionterritorialoperationalpermanentlywas largelyoutbreak ofin the pastfollowing a xmlns:og="><a class="class="textConversion may be usedmanufactureafter beingclearfix">
+question ofwas electedto become abecause of some peopleinspired bysuccessful a time whenmore commonamongst thean officialwidth:100%;technology,was adoptedto keep thesettlementslive birthsindex.html"Connecticutassigned to&amp;times;account foralign=rightthe companyalways beenreturned toinvolvementBecause thethis period" name="q" confined toa result ofvalue="" />is actuallyEnvironment
+</head>
+Conversely,>
+<div id="0" width="1is probablyhave becomecontrollingthe problemcitizens ofpoliticiansreached theas early as:none; over<table cellvalidity ofdirectly toonmousedownwhere it iswhen it wasmembers of relation toaccommodatealong with In the latethe Englishdelicious">this is notthe presentif they areand finallya matter of
+	</div>
+
+</script>faster thanmajority ofafter whichcomparativeto maintainimprove theawarded theer" class="frameborderrestorationin the sameanalysis oftheir firstDuring the continentalsequence offunction(){font-size: work on the</script>
+<begins withjavascript:constituentwas foundedequilibriumassume thatis given byneeds to becoordinatesthe variousare part ofonly in thesections ofis a commontheories ofdiscoveriesassociationedge of thestrength ofposition inpresent-dayuniversallyto form thebut insteadcorporationattached tois commonlyreasons for &quot;the can be madewas able towhich meansbut did notonMouseOveras possibleoperated bycoming fromthe primaryaddition offor severaltransferreda period ofare able tohowever, itshould havemuch larger
+	</script>adopted theproperty ofdirected byeffectivelywas broughtchildren ofProgramminglonger thanmanuscriptswar againstby means ofand most ofsimilar to proprietaryoriginatingprestigiousgrammaticalexperience.to make theIt was alsois found incompetitorsin the U.S.replace thebrought thecalculationfall of thethe generalpracticallyin honor ofreleased inresidentialand some ofking of thereaction to1st Earl ofculture andprincipally</title>
+  they can beback to thesome of hisexposure toare similarform of theaddFavoritecitizenshippart in thepeople within practiceto continue&amp;minus;approved by the first allowed theand for thefunctioningplaying thesolution toheight="0" in his bookmore than afollows thecreated thepresence in&nbsp;</td>nationalistthe idea ofa characterwere forced class="btndays of thefeatured inshowing theinterest inin place ofturn of thethe head ofLord of thepoliticallyhas its ownEducationalapproval ofsome of theeach other,behavior ofand becauseand anotherappeared onrecorded inblack&quot;may includethe world'scan lead torefers to aborder="0" government winning theresulted in while the Washington,the subjectcity in the></div>
+		reflect theto completebecame moreradioactiverejected bywithout anyhis father,which couldcopy of theto indicatea politicalaccounts ofconstitutesworked wither</a></li>of his lifeaccompaniedclientWidthprevent theLegislativedifferentlytogether inhas severalfor anothertext of thefounded thee with the is used forchanged theusually theplace wherewhereas the> <a href=""><a href="themselves,although hethat can betraditionalrole of theas a resultremoveChilddesigned bywest of theSome peopleproduction,side of thenewslettersused by thedown to theaccepted bylive in theattempts tooutside thefrequenciesHowever, inprogrammersat least inapproximatealthough itwas part ofand variousGovernor ofthe articleturned into><a href="/the economyis the mostmost widelywould laterand perhapsrise to theoccurs whenunder whichconditions.the westerntheory thatis producedthe city ofin which heseen in thethe centralbuilding ofmany of hisarea of theis the onlymost of themany of thethe WesternThere is noextended toStatisticalcolspan=2 |short storypossible totopologicalcritical ofreported toa Christiandecision tois equal toproblems ofThis can bemerchandisefor most ofno evidenceeditions ofelements in&quot;. Thecom/images/which makesthe processremains theliterature,is a memberthe popularthe ancientproblems intime of thedefeated bybody of thea few yearsmuch of thethe work ofCalifornia,served as agovernment.concepts ofmovement in		<div id="it" value="language ofas they areproduced inis that theexplain thediv></div>
+However thelead to the	<a href="/was grantedpeople havecontinuallywas seen asand relatedthe role ofproposed byof the besteach other.Constantinepeople fromdialects ofto revisionwas renameda source ofthe initiallaunched inprovide theto the westwhere thereand similarbetween twois also theEnglish andconditions,that it wasentitled tothemselves.quantity ofransparencythe same asto join thecountry andthis is theThis led toa statementcontrast tolastIndexOfthrough hisis designedthe term isis providedprotect theng</a></li>The currentthe site ofsubstantialexperience,in the Westthey shouldslovenčinacomentariosuniversidadcondicionesactividadesexperienciatecnologíaproducciónpuntuaciónaplicacióncontraseñacategoríasregistrarseprofesionaltratamientoregístratesecretaríaprincipalesprotecciónimportantesimportanciaposibilidadinteresantecrecimientonecesidadessuscribirseasociacióndisponiblesevaluaciónestudiantesresponsableresoluciónguadalajararegistradosoportunidadcomercialesfotografíaautoridadesingenieríatelevisióncompetenciaoperacionesestablecidosimplementeactualmentenavegaciónconformidadline-height:font-family:" : "http://applicationslink" href="specifically//<![CDATA[
+Organizationdistribution0px; height:relationshipdevice-width<div class="<label for="registration</noscript>
+/index.html"window.open( !important;application/independence//www.googleorganizationautocompleterequirementsconservative<form name="intellectualmargin-left:18th centuryan importantinstitutionsabbreviation<img class="organisationcivilization19th centuryarchitectureincorporated20th century-container">most notably/></a></div>notification'undefined')Furthermore,believe thatinnerHTML = prior to thedramaticallyreferring tonegotiationsheadquartersSouth AfricaunsuccessfulPennsylvaniaAs a result,<html lang="&lt;/sup&gt;dealing withphiladelphiahistorically);</script>
+padding-top:experimentalgetAttributeinstructionstechnologiespart of the =function(){subscriptionl.dtd">
+<htgeographicalConstitution', function(supported byagriculturalconstructionpublicationsfont-size: 1a variety of<div style="Encyclopediaiframe src="demonstratedaccomplisheduniversitiesDemographics);</script><dedicated toknowledge ofsatisfactionparticularly</div></div>English (US)appendChild(transmissions. However, intelligence" tabindex="float:right;Commonwealthranging fromin which theat least onereproductionencyclopedia;font-size:1jurisdictionat that time"><a class="In addition,description+conversationcontact withis generallyr" content="representing&lt;math&gt;presentationoccasionally<img width="navigation">compensationchampionshipmedia="all" violation ofreference toreturn true;Strict//EN" transactionsinterventionverificationInformation difficultiesChampionshipcapabilities<![endif]-->}
+</script>
+Christianityfor example,Professionalrestrictionssuggest thatwas released(such as theremoveClass(unemploymentthe Americanstructure of/index.html published inspan class=""><a href="/introductionbelonging toclaimed thatconsequences<meta name="Guide to theoverwhelmingagainst the concentrated,
+.nontouch observations</a>
+</div>
+f (document.border: 1px {font-size:1treatment of0" height="1modificationIndependencedivided intogreater thanachievementsestablishingJavaScript" neverthelesssignificanceBroadcasting>&nbsp;</td>container">
+such as the influence ofa particularsrc='http://navigation" half of the substantial &nbsp;</div>advantage ofdiscovery offundamental metropolitanthe opposite" xml:lang="deliberatelyalign=centerevolution ofpreservationimprovementsbeginning inJesus ChristPublicationsdisagreementtext-align:r, function()similaritiesbody></html>is currentlyalphabeticalis sometimestype="image/many of the flow:hidden;available indescribe theexistence ofall over thethe Internet	<ul class="installationneighborhoodarmed forcesreducing thecontinues toNonetheless,temperatures
+		<a href="close to theexamples of is about the(see below)." id="searchprofessionalis availablethe official		</script>
+
+		<div id="accelerationthrough the Hall of Famedescriptionstranslationsinterference type='text/recent yearsin the worldvery popular{background:traditional some of the connected toexploitationemergence ofconstitutionA History ofsignificant manufacturedexpectations><noscript><can be foundbecause the has not beenneighbouringwithout the added to the	<li class="instrumentalSoviet Unionacknowledgedwhich can bename for theattention toattempts to developmentsIn fact, the<li class="aimplicationssuitable formuch of the colonizationpresidentialcancelBubble Informationmost of the is describedrest of the more or lessin SeptemberIntelligencesrc="http://px; height: available tomanufacturerhuman rightslink href="/availabilityproportionaloutside the astronomicalhuman beingsname of the are found inare based onsmaller thana person whoexpansion ofarguing thatnow known asIn the earlyintermediatederived fromScandinavian</a></div>
+consider thean estimatedthe National<div id="pagresulting incommissionedanalogous toare required/ul>
+</div>
+was based onand became a&nbsp;&nbsp;t" value="" was capturedno more thanrespectivelycontinue to >
+<head>
+<were createdmore generalinformation used for theindependent the Imperialcomponent ofto the northinclude the Constructionside of the would not befor instanceinvention ofmore complexcollectivelybackground: text-align: its originalinto accountthis processan extensivehowever, thethey are notrejected thecriticism ofduring whichprobably thethis article(function(){It should bean agreementaccidentallydiffers fromArchitecturebetter knownarrangementsinfluence onattended theidentical tosouth of thepass throughxml" title="weight:bold;creating thedisplay:nonereplaced the<img src="/ihttps://www.World War IItestimonialsfound in therequired to and that thebetween the was designedconsists of considerablypublished bythe languageConservationconsisted ofrefer to theback to the css" media="People from available onproved to besuggestions"was known asvarieties oflikely to becomprised ofsupport the hands of thecoupled withconnect and border:none;performancesbefore beinglater becamecalculationsoften calledresidents ofmeaning that><li class="evidence forexplanationsenvironments"></a></div>which allowsIntroductiondeveloped bya wide rangeon behalf ofvalign="top"principle ofat the time,</noscript>said to havein the firstwhile othershypotheticalphilosopherspower of thecontained inperformed byinability towere writtenspan style="input name="the questionintended forrejection ofimplies thatinvented thethe standardwas probablylink betweenprofessor ofinteractionschanging theIndian Ocean class="lastworking with'http://www.years beforeThis was therecreationalentering themeasurementsan extremelyvalue of thestart of the
+</script>
+
+an effort toincrease theto the southspacing="0">sufficientlythe Europeanconverted toclearTimeoutdid not haveconsequentlyfor the nextextension ofeconomic andalthough theare producedand with theinsufficientgiven by thestating thatexpenditures</span></a>
+thought thaton the basiscellpadding=image of thereturning toinformation,separated byassassinateds" content="authority ofnorthwestern</div>
+<div "></div>
+  consultationcommunity ofthe nationalit should beparticipants align="leftthe greatestselection ofsupernaturaldependent onis mentionedallowing thewas inventedaccompanyinghis personalavailable atstudy of theon the otherexecution ofHuman Rightsterms of theassociationsresearch andsucceeded bydefeated theand from thebut they arecommander ofstate of theyears of agethe study of<ul class="splace in thewhere he was<li class="fthere are nowhich becamehe publishedexpressed into which thecommissionerfont-weight:territory ofextensions">Roman Empireequal to theIn contrast,however, andis typicallyand his wife(also called><ul class="effectively evolved intoseem to havewhich is thethere was noan excellentall of thesedescribed byIn practice,broadcastingcharged withreflected insubjected tomilitary andto the pointeconomicallysetTargetingare actuallyvictory over();</script>continuouslyrequired forevolutionaryan effectivenorth of the, which was front of theor otherwisesome form ofhad not beengenerated byinformation.permitted toincludes thedevelopment,entered intothe previousconsistentlyare known asthe field ofthis type ofgiven to thethe title ofcontains theinstances ofin the northdue to theirare designedcorporationswas that theone of thesemore popularsucceeded insupport fromin differentdominated bydesigned forownership ofand possiblystandardizedresponseTextwas intendedreceived theassumed thatareas of theprimarily inthe basis ofin the senseaccounts fordestroyed byat least twowas declaredcould not beSecretary ofappear to bemargin-top:1/^\s+|\s+$/ge){throw e};the start oftwo separatelanguage andwho had beenoperation ofdeath of thereal numbers	<link rel="provided thethe story ofcompetitionsenglish (UK)english (US)МонголСрпскисрпскисрпскоلعربية正體中文简体中文繁体中文有限公司人民政府阿里巴巴社会主义操作系统政策法规informaciónherramientaselectrónicodescripciónclasificadosconocimientopublicaciónrelacionadasinformáticarelacionadosdepartamentotrabajadoresdirectamenteayuntamientomercadoLibrecontáctenoshabitacionescumplimientorestaurantesdisposiciónconsecuenciaelectrónicaaplicacionesdesconectadoinstalaciónrealizaciónutilizaciónenciclopediaenfermedadesinstrumentosexperienciasinstituciónparticularessubcategoriaтолькоРоссииработыбольшепростоможетедругихслучаесейчасвсегдаРоссияМоскведругиегородавопросданныхдолжныименноМосквырублейМосквастраныничегоработедолженуслугитеперьОднакопотомуработуапрелявообщеодногосвоегостатьидругойфорумехорошопротивссылкакаждыйвластигруппывместеработасказалпервыйделатьденьгипериодбизнесосновемоменткупитьдолжнарамкахначалоРаботаТолькосовсемвторойначаласписокслужбысистемпечатиновогопомощисайтовпочемупомощьдолжноссылкибыстроданныемногиепроектСейчасмоделитакогоонлайнгородеверсиястранефильмыуровняразныхискатьнеделюянваряменьшемногихданнойзначитнельзяфорумаТеперьмесяцазащитыЛучшиеनहींकरनेअपनेकियाकरेंअन्यक्यागाइडबारेकिसीदियापहलेसिंहभारतअपनीवालेसेवाकरतेमेरेहोनेसकतेबहुतसाइटहोगाजानेमिनटकरताकरनाउनकेयहाँसबसेभाषाआपकेलियेशुरूइसकेघंटेमेरीसकतामेरालेकरअधिकअपनासमाजमुझेकारणहोताकड़ीयहांहोटलशब्दलियाजीवनजाताकैसेआपकावालीदेनेपूरीपानीउसकेहोगीबैठकआपकीवर्षगांवआपकोजिलाजानासहमतहमेंउनकीयाहूदर्जसूचीपसंदसवालहोनाहोतीजैसेवापसजनतानेताजारीघायलजिलेनीचेजांचपत्रगूगलजातेबाहरआपनेवाहनइसकासुबहरहनेइससेसहितबड़ेघटनातलाशपांचश्रीबड़ीहोतेसाईटशायदसकतीजातीवालाहजारपटनारखनेसड़कमिलाउसकीकेवललगताखानाअर्थजहांदेखापहलीनियमबिनाबैंककहींकहनादेताहमलेकाफीजबकितुरतमांगवहींरोज़मिलीआरोपसेनायादवलेनेखाताकरीबउनकाजवाबपूराबड़ासौदाशेयरकियेकहांअकसरबनाएवहांस्थलमिलेलेखकविषयक्रंसमूहथानाتستطيعمشاركةبواسطةالصفحةمواضيعالخاصةالمزيدالعامةالكاتبالردودبرنامجالدولةالعالمالموقعالعربيالسريعالجوالالذهابالحياةالحقوقالكريمالعراقمحفوظةالثانيمشاهدةالمرأةالقرآنالشبابالحوارالجديدالأسرةالعلوممجموعةالرحمنالنقاطفلسطينالكويتالدنيابركاتهالرياضتحياتيبتوقيتالأولىالبريدالكلامالرابطالشخصيسياراتالثالثالصلاةالحديثالزوارالخليجالجميعالعامهالجمالالساعةمشاهدهالرئيسالدخولالفنيةالكتابالدوريالدروساستغرقتصاميمالبناتالعظيمentertainmentunderstanding = function().jpg" width="configuration.png" width="<body class="Math.random()contemporary United Statescircumstances.appendChild(organizations<span class=""><img src="/distinguishedthousands of communicationclear"></div>investigationfavicon.ico" margin-right:based on the Massachusettstable border=internationalalso known aspronunciationbackground:#fpadding-left:For example, miscellaneous&lt;/math&gt;psychologicalin particularearch" type="form method="as opposed toSupreme Courtoccasionally Additionally,North Americapx;backgroundopportunitiesEntertainment.toLowerCase(manufacturingprofessional combined withFor instance,consisting of" maxlength="return false;consciousnessMediterraneanextraordinaryassassinationsubsequently button type="the number ofthe original comprehensiverefers to the</ul>
+</div>
+philosophicallocation.hrefwas publishedSan Francisco(function(){
+<div id="mainsophisticatedmathematical /head>
+<bodysuggests thatdocumentationconcentrationrelationshipsmay have been(for example,This article in some casesparts of the definition ofGreat Britain cellpadding=equivalent toplaceholder="; font-size: justificationbelieved thatsuffered fromattempted to leader of thecript" src="/(function() {are available
+	<link rel=" src='http://interested inconventional " alt="" /></are generallyhas also beenmost popular correspondingcredited withtyle="border:</a></span></.gif" width="<iframe src="table class="inline-block;according to together withapproximatelyparliamentarymore and moredisplay:none;traditionallypredominantly&nbsp;|&nbsp;&nbsp;</span> cellspacing=<input name="or" content="controversialproperty="og:/x-shockwave-demonstrationsurrounded byNevertheless,was the firstconsiderable Although the collaborationshould not beproportion of<span style="known as the shortly afterfor instance,described as /head>
+<body starting withincreasingly the fact thatdiscussion ofmiddle of thean individualdifficult to point of viewhomosexualityacceptance of</span></div>manufacturersorigin of thecommonly usedimportance ofdenominationsbackground: #length of thedeterminationa significant" border="0">revolutionaryprinciples ofis consideredwas developedIndo-Europeanvulnerable toproponents ofare sometimescloser to theNew York City name="searchattributed tocourse of themathematicianby the end ofat the end of" border="0" technological.removeClass(branch of theevidence that![endif]-->
+Institute of into a singlerespectively.and thereforeproperties ofis located insome of whichThere is alsocontinued to appearance of &amp;ndash; describes theconsiderationauthor of theindependentlyequipped withdoes not have</a><a href="confused with<link href="/at the age ofappear in theThese includeregardless ofcould be used style=&quot;several timesrepresent thebody>
+</html>thought to bepopulation ofpossibilitiespercentage ofaccess to thean attempt toproduction ofjquery/jquerytwo differentbelong to theestablishmentreplacing thedescription" determine theavailable forAccording to wide range of	<div class="more commonlyorganisationsfunctionalitywas completed &amp;mdash; participationthe characteran additionalappears to befact that thean example ofsignificantlyonmouseover="because they async = true;problems withseems to havethe result of src="http://familiar withpossession offunction () {took place inand sometimessubstantially<span></span>is often usedin an attemptgreat deal ofEnvironmentalsuccessfully virtually all20th century,professionalsnecessary to determined bycompatibilitybecause it isDictionary ofmodificationsThe followingmay refer to:Consequently,Internationalalthough somethat would beworld's firstclassified asbottom of the(particularlyalign="left" most commonlybasis for thefoundation ofcontributionspopularity ofcenter of theto reduce thejurisdictionsapproximation onmouseout="New Testamentcollection of</span></a></in the Unitedfilm director-strict.dtd">has been usedreturn to thealthough thischange in theseveral otherbut there areunprecedentedis similar toespecially inweight: bold;is called thecomputationalindicate thatrestricted to	<meta name="are typicallyconflict withHowever, the An example ofcompared withquantities ofrather than aconstellationnecessary forreported thatspecificationpolitical and&nbsp;&nbsp;<references tothe same yearGovernment ofgeneration ofhave not beenseveral yearscommitment to		<ul class="visualization19th century,practitionersthat he wouldand continuedoccupation ofis defined ascentre of thethe amount of><div style="equivalent ofdifferentiatebrought aboutmargin-left: automaticallythought of asSome of these
+<div class="input class="replaced withis one of theeducation andinfluenced byreputation as
+<meta name="accommodation</div>
+</div>large part ofInstitute forthe so-called against the In this case,was appointedclaimed to beHowever, thisDepartment ofthe remainingeffect on theparticularly deal with the
+<div style="almost alwaysare currentlyexpression ofphilosophy offor more thancivilizationson the islandselectedIndexcan result in" value="" />the structure /></a></div>Many of thesecaused by theof the Unitedspan class="mcan be tracedis related tobecame one ofis frequentlyliving in thetheoreticallyFollowing theRevolutionarygovernment inis determinedthe politicalintroduced insufficient todescription">short storiesseparation ofas to whetherknown for itswas initiallydisplay:blockis an examplethe principalconsists of arecognized as/body></html>a substantialreconstructedhead of stateresistance toundergraduateThere are twogravitationalare describedintentionallyserved as theclass="headeropposition tofundamentallydominated theand the otheralliance withwas forced torespectively,and politicalin support ofpeople in the20th century.and publishedloadChartbeatto understandmember statesenvironmentalfirst half ofcountries andarchitecturalbe consideredcharacterizedclearIntervalauthoritativeFederation ofwas succeededand there area consequencethe Presidentalso includedfree softwaresuccession ofdeveloped thewas destroyedaway from the;
+</script>
+<although theyfollowed by amore powerfulresulted in aUniversity ofHowever, manythe presidentHowever, someis thought tountil the endwas announcedare importantalso includes><input type=the center of DO NOT ALTERused to referthemes/?sort=that had beenthe basis forhas developedin the summercomparativelydescribed thesuch as thosethe resultingis impossiblevarious otherSouth Africanhave the sameeffectivenessin which case; text-align:structure and; background:regarding thesupported theis also knownstyle="marginincluding thebahasa Melayunorsk bokmålnorsk nynorskslovenščinainternacionalcalificacióncomunicaciónconstrucción"><div class="disambiguationDomainName', 'administrationsimultaneouslytransportationInternational margin-bottom:responsibility<![endif]-->
+</><meta name="implementationinfrastructurerepresentationborder-bottom:</head>
+<body>=http%3A%2F%2F<form method="method="post" /favicon.ico" });
+</script>
+.setAttribute(Administration= new Array();<![endif]-->
+display:block;Unfortunately,">&nbsp;</div>/favicon.ico">='stylesheet' identification, for example,<li><a href="/an alternativeas a result ofpt"></script>
+type="submit" 
+(function() {recommendationform action="/transformationreconstruction.style.display According to hidden" name="along with thedocument.body.approximately Communicationspost" action="meaning &quot;--<![endif]-->Prime Ministercharacteristic</a> <a class=the history of onmouseover="the governmenthref="https://was originallywas introducedclassificationrepresentativeare considered<![endif]-->
+
+depends on theUniversity of in contrast to placeholder="in the case ofinternational constitutionalstyle="border-: function() {Because of the-strict.dtd">
+<table class="accompanied byaccount of the<script src="/nature of the the people in in addition tos); js.id = id" width="100%"regarding the Roman Catholican independentfollowing the .gif" width="1the following discriminationarchaeologicalprime minister.js"></script>combination of marginwidth="createElement(w.attachEvent(</a></td></tr>src="https://aIn particular, align="left" Czech RepublicUnited Kingdomcorrespondenceconcluded that.html" title="(function () {comes from theapplication of<span class="sbelieved to beement('script'</a>
+</li>
+<livery different><span class="option value="(also known as	<li><a href="><input name="separated fromreferred to as valign="top">founder of theattempting to carbon dioxide
+
+<div class="class="search-/body>
+</html>opportunity tocommunications</head>
+<body style="width:Tiếng Việtchanges in theborder-color:#0" border="0" </span></div><was discovered" type="text" );
+</script>
+
+Department of ecclesiasticalthere has beenresulting from</body></html>has never beenthe first timein response toautomatically </div>
+
+<div iwas consideredpercent of the" /></a></div>collection of descended fromsection of theaccept-charsetto be confusedmember of the padding-right:translation ofinterpretation href='http://whether or notThere are alsothere are manya small numberother parts ofimpossible to  class="buttonlocated in the. However, theand eventuallyAt the end of because of itsrepresents the<form action=" method="post"it is possiblemore likely toan increase inhave also beencorresponds toannounced thatalign="right">many countriesfor many yearsearliest knownbecause it waspt"></script> valign="top" inhabitants offollowing year
+<div class="million peoplecontroversial concerning theargue that thegovernment anda reference totransferred todescribing the style="color:although therebest known forsubmit" name="multiplicationmore than one recognition ofCouncil of theedition of the  <meta name="Entertainment away from the ;margin-right:at the time ofinvestigationsconnected withand many otheralthough it isbeginning with <span class="descendants of<span class="i align="right"</head>
+<body aspects of thehas since beenEuropean Unionreminiscent ofmore difficultVice Presidentcomposition ofpassed throughmore importantfont-size:11pxexplanation ofthe concept ofwritten in the	<span class="is one of the resemblance toon the groundswhich containsincluding the defined by thepublication ofmeans that theoutside of thesupport of the<input class="<span class="t(Math.random()most prominentdescription ofConstantinoplewere published<div class="seappears in the1" height="1" most importantwhich includeswhich had beendestruction ofthe population
+	<div class="possibility ofsometimes usedappear to havesuccess of theintended to bepresent in thestyle="clear:b
+</script>
+<was founded ininterview with_id" content="capital of the
+<link rel="srelease of thepoint out thatxMLHttpRequestand subsequentsecond largestvery importantspecificationssurface of theapplied to theforeign policy_setDomainNameestablished inis believed toIn addition tomeaning of theis named afterto protect theis representedDeclaration ofmore efficientClassificationother forms ofhe returned to<span class="cperformance of(function() {if and only ifregions of theleading to therelations withUnited Nationsstyle="height:other than theype" content="Association of
+</head>
+<bodylocated on theis referred to(including theconcentrationsthe individualamong the mostthan any other/>
+<link rel=" return false;the purpose ofthe ability to;color:#fff}
+.
+<span class="the subject ofdefinitions of>
+<link rel="claim that thehave developed<table width="celebration ofFollowing the to distinguish<span class="btakes place inunder the namenoted that the><![endif]-->
+style="margin-instead of theintroduced thethe process ofincreasing thedifferences inestimated thatespecially the/div><div id="was eventuallythroughout histhe differencesomething thatspan></span></significantly ></script>
+
+environmental to prevent thehave been usedespecially forunderstand theis essentiallywere the firstis the largesthave been made" src="http://interpreted assecond half ofcrolling="no" is composed ofII, Holy Romanis expected tohave their owndefined as thetraditionally have differentare often usedto ensure thatagreement withcontaining theare frequentlyinformation onexample is theresulting in a</a></li></ul> class="footerand especiallytype="button" </span></span>which included>
+<meta name="considered thecarried out byHowever, it isbecame part ofin relation topopular in thethe capital ofwas officiallywhich has beenthe History ofalternative todifferent fromto support thesuggested thatin the process  <div class="the foundationbecause of hisconcerned withthe universityopposed to thethe context of<span class="ptext" name="q"		<div class="the scientificrepresented bymathematicianselected by thethat have been><div class="cdiv id="headerin particular,converted into);
+</script>
+<philosophical srpskohrvatskitiếng ViệtРусскийрусскийinvestigaciónparticipaciónкоторыеобластикоторыйчеловексистемыНовостикоторыхобластьвременикотораясегодняскачатьновостиУкраинывопросыкоторойсделатьпомощьюсредствобразомстороныучастиетечениеГлавнаяисториисистемарешенияСкачатьпоэтомуследуетсказатьтоваровконечнорешениекотороеоргановкоторомРекламаالمنتدىمنتدياتالموضوعالبرامجالمواقعالرسائلمشاركاتالأعضاءالرياضةالتصميمالاعضاءالنتائجالألعابالتسجيلالأقسامالضغطاتالفيديوالترحيبالجديدةالتعليمالأخبارالافلامالأفلامالتاريخالتقنيةالالعابالخواطرالمجتمعالديكورالسياحةعبداللهالتربيةالروابطالأدبيةالاخبارالمتحدةالاغانيcursor:pointer;</title>
+<meta " href="http://"><span class="members of the window.locationvertical-align:/a> | <a href="<!doctype html>media="screen" <option value="favicon.ico" />
+		<div class="characteristics" method="get" /body>
+</html>
+shortcut icon" document.write(padding-bottom:representativessubmit" value="align="center" throughout the science fiction
+  <div class="submit" class="one of the most valign="top"><was established);
+</script>
+return false;">).style.displaybecause of the document.cookie<form action="/}body{margin:0;Encyclopedia ofversion of the .createElement(name" content="</div>
+</div>
+
+administrative </body>
+</html>history of the "><input type="portion of the as part of the &nbsp;<a href="other countries">
+<div class="</span></span><In other words,display: block;control of the introduction of/>
+<meta name="as well as the in recent years
+	<div class="</div>
+	</div>
+inspired by thethe end of the compatible withbecame known as style="margin:.js"></script>< International there have beenGerman language style="color:#Communist Partyconsistent withborder="0" cell marginheight="the majority of" align="centerrelated to the many different Orthodox Churchsimilar to the />
+<link rel="swas one of the until his death})();
+</script>other languagescompared to theportions of thethe Netherlandsthe most commonbackground:url(argued that thescrolling="no" included in theNorth American the name of theinterpretationsthe traditionaldevelopment of frequently useda collection ofvery similar tosurrounding theexample of thisalign="center">would have beenimage_caption =attached to thesuggesting thatin the form of involved in theis derived fromnamed after theIntroduction torestrictions on style="width: can be used to the creation ofmost important information andresulted in thecollapse of theThis means thatelements of thewas replaced byanalysis of theinspiration forregarded as themost successfulknown as &quot;a comprehensiveHistory of the were consideredreturned to theare referred toUnsourced image>
+	<div class="consists of thestopPropagationinterest in theavailability ofappears to haveelectromagneticenableServices(function of theIt is important</script></div>function(){var relative to theas a result of the position ofFor example, in method="post" was followed by&amp;mdash; thethe applicationjs"></script>
+ul></div></div>after the deathwith respect tostyle="padding:is particularlydisplay:inline; type="submit" is divided into中文 (简体)responsabilidadadministracióninternacionalescorrespondienteउपयोगपूर्वहमारेलोगोंचुनावलेकिनसरकारपुलिसखोजेंचाहिएभेजेंशामिलहमारीजागरणबनानेकुमारब्लॉगमालिकमहिलापृष्ठबढ़तेभाजपाक्लिकट्रेनखिलाफदौरानमामलेमतदानबाजारविकासक्योंचाहतेपहुँचबतायासंवाददेखनेपिछलेविशेषराज्यउत्तरमुंबईदोनोंउपकरणपढ़ेंस्थितफिल्ममुख्यअच्छाछूटतीसंगीतजाएगाविभागघण्टेदूसरेदिनोंहत्यासेक्सगांधीविश्वरातेंदैट्सनक्शासामनेअदालतबिजलीपुरूषहिंदीमित्रकवितारुपयेस्थानकरोड़मुक्तयोजनाकृपयापोस्टघरेलूकार्यविचारसूचनामूल्यदेखेंहमेशास्कूलमैंनेतैयारजिसकेrss+xml" title="-type" content="title" content="at the same time.js"></script>
+<" method="post" </span></a></li>vertical-align:t/jquery.min.js">.click(function( style="padding-})();
+</script>
+</span><a href="<a href="http://); return false;text-decoration: scrolling="no" border-collapse:associated with Bahasa IndonesiaEnglish language<text xml:space=.gif" border="0"</body>
+</html>
+overflow:hidden;img src="http://addEventListenerresponsible for s.js"></script>
+/favicon.ico" />operating system" style="width:1target="_blank">State Universitytext-align:left;
+document.write(, including the around the world);
+</script>
+<" style="height:;overflow:hiddenmore informationan internationala member of the one of the firstcan be found in </div>
+		</div>
+display: none;">" />
+<link rel="
+  (function() {the 15th century.preventDefault(large number of Byzantine Empire.jpg|thumb|left|vast majority ofmajority of the  align="center">University Pressdominated by theSecond World Wardistribution of style="position:the rest of the characterized by rel="nofollow">derives from therather than the a combination ofstyle="width:100English-speakingcomputer scienceborder="0" alt="the existence ofDemocratic Party" style="margin-For this reason,.js"></script>
+	sByTagName(s)[0]js"></script>
+<.js"></script>
+link rel="icon" ' alt='' class='formation of theversions of the </a></div></div>/page>
+  <page>
+<div class="contbecame the firstbahasa Indonesiaenglish (simple)ΕλληνικάхрватскикомпанииявляетсяДобавитьчеловекаразвитияИнтернетОтветитьнапримеринтернеткоторогостраницыкачествеусловияхпроблемыполучитьявляютсянаиболеекомпаниявниманиесредстваالمواضيعالرئيسيةالانتقالمشاركاتكالسياراتالمكتوبةالسعوديةاحصائياتالعالميةالصوتياتالانترنتالتصاميمالإسلاميالمشاركةالمرئياتrobots" content="<div id="footer">the United States<img src="http://.jpg|right|thumb|.js"></script>
+<location.protocolframeborder="0" s" />
+<meta name="</a></div></div><font-weight:bold;&quot; and &quot;depending on the margin:0;padding:" rel="nofollow" President of the twentieth centuryevision>
+  </pageInternet Explorera.async = true;
+information about<div id="header">" action="http://<a href="https://<div id="content"</div>
+</div>
+<derived from the <img src='http://according to the 
+</body>
+</html>
+style="font-size:script language="Arial, Helvetica,</a><span class="</script><script political partiestd></tr></table><href="http://www.interpretation ofrel="stylesheet" document.write('<charset="utf-8">
+beginning of the revealed that thetelevision series" rel="nofollow"> target="_blank">claiming that thehttp%3A%2F%2Fwww.manifestations ofPrime Minister ofinfluenced by theclass="clearfix">/div>
+</div>
+
+three-dimensionalChurch of Englandof North Carolinasquare kilometres.addEventListenerdistinct from thecommonly known asPhonetic Alphabetdeclared that thecontrolled by theBenjamin Franklinrole-playing gamethe University ofin Western Europepersonal computerProject Gutenbergregardless of thehas been proposedtogether with the></li><li class="in some countriesmin.js"></script>of the populationofficial language<img src="images/identified by thenatural resourcesclassification ofcan be consideredquantum mechanicsNevertheless, themillion years ago</body>
+</html>Ελληνικά
+take advantage ofand, according toattributed to theMicrosoft Windowsthe first centuryunder the controldiv class="headershortly after thenotable exceptiontens of thousandsseveral differentaround the world.reaching militaryisolated from theopposition to thethe Old TestamentAfrican Americansinserted into theseparate from themetropolitan areamakes it possibleacknowledged thatarguably the mosttype="text/css">
+the InternationalAccording to the pe="text/css" />
+coincide with thetwo-thirds of theDuring this time,during the periodannounced that hethe internationaland more recentlybelieved that theconsciousness andformerly known assurrounded by thefirst appeared inoccasionally usedposition:absolute;" target="_blank" position:relative;text-align:center;jax/libs/jquery/1.background-color:#type="application/anguage" content="<meta http-equiv="Privacy Policy</a>e("%3Cscript src='" target="_blank">On the other hand,.jpg|thumb|right|2</div><div class="<div style="float:nineteenth century</body>
+</html>
+<img src="http://s;text-align:centerfont-weight: bold; According to the difference between" frameborder="0" " style="position:link href="http://html4/loose.dtd">
+during this period</td></tr></table>closely related tofor the first time;font-weight:bold;input type="text" <span style="font-onreadystatechange	<div class="cleardocument.location. For example, the a wide variety of <!DOCTYPE html>
+<&nbsp;&nbsp;&nbsp;"><a href="http://style="float:left;concerned with the=http%3A%2F%2Fwww.in popular culturetype="text/css" />it is possible to Harvard Universitytylesheet" href="/the main characterOxford University  name="keywords" cstyle="text-align:the United Kingdomfederal government<div style="margin depending on the description of the<div class="header.min.js"></script>destruction of theslightly differentin accordance withtelecommunicationsindicates that theshortly thereafterespecially in the European countriesHowever, there aresrc="http://staticsuggested that the" src="http://www.a large number of Telecommunications" rel="nofollow" tHoly Roman Emperoralmost exclusively" border="0" alt="Secretary of Stateculminating in theCIA World Factbookthe most importantanniversary of thestyle="background-<li><em><a href="/the Atlantic Oceanstrictly speaking,shortly before thedifferent types ofthe Ottoman Empire><img src="http://An Introduction toconsequence of thedeparture from theConfederate Statesindigenous peoplesProceedings of theinformation on thetheories have beeninvolvement in thedivided into threeadjacent countriesis responsible fordissolution of thecollaboration withwidely regarded ashis contemporariesfounding member ofDominican Republicgenerally acceptedthe possibility ofare also availableunder constructionrestoration of thethe general publicis almost entirelypasses through thehas been suggestedcomputer and videoGermanic languages according to the different from theshortly afterwardshref="https://www.recent developmentBoard of Directors<div class="search| <a href="http://In particular, theMultiple footnotesor other substancethousands of yearstranslation of the</div>
+</div>
+
+<a href="index.phpwas established inmin.js"></script>
+participate in thea strong influencestyle="margin-top:represented by thegraduated from theTraditionally, theElement("script");However, since the/div>
+</div>
+<div left; margin-left:protection against0; vertical-align:Unfortunately, thetype="image/x-icon/div>
+<div class=" class="clearfix"><div class="footer		</div>
+		</div>
+the motion pictureБългарскибългарскиФедерациинесколькосообщениесообщенияпрограммыОтправитьбесплатноматериалыпозволяетпоследниеразличныхпродукциипрограммаполностьюнаходитсяизбранноенаселенияизменениякатегорииАлександрद्वारामैनुअलप्रदानभारतीयअनुदेशहिन्दीइंडियादिल्लीअधिकारवीडियोचिट्ठेसमाचारजंक्शनदुनियाप्रयोगअनुसारऑनलाइनपार्टीशर्तोंलोकसभाफ़्लैशशर्तेंप्रदेशप्लेयरकेंद्रस्थितिउत्पादउन्हेंचिट्ठायात्राज्यादापुरानेजोड़ेंअनुवादश्रेणीशिक्षासरकारीसंग्रहपरिणामब्रांडबच्चोंउपलब्धमंत्रीसंपर्कउम्मीदमाध्यमसहायताशब्दोंमीडियाआईपीएलमोबाइलसंख्याआपरेशनअनुबंधबाज़ारनवीनतमप्रमुखप्रश्नपरिवारनुकसानसमर्थनआयोजितसोमवारالمشاركاتالمنتدياتالكمبيوترالمشاهداتعددالزوارعددالردودالإسلاميةالفوتوشوبالمسابقاتالمعلوماتالمسلسلاتالجرافيكسالاسلاميةالاتصالاتkeywords" content="w3.org/1999/xhtml"><a target="_blank" text/html; charset=" target="_blank"><table cellpadding="autocomplete="off" text-align: center;to last version by background-color: #" href="http://www./div></div><div id=<a href="#" class=""><img src="http://cript" src="http://
+<script language="//EN" "http://www.wencodeURIComponent(" href="javascript:<div class="contentdocument.write('<scposition: absolute;script src="http:// style="margin-top:.min.js"></script>
+</div>
+<div class="w3.org/1999/xhtml" 
+
+</body>
+</html>distinction between/" target="_blank"><link href="http://encoding="utf-8"?>
+w.addEventListener?action="http://www.icon" href="http:// style="background:type="text/css" />
+meta property="og:t<input type="text"  style="text-align:the development of tylesheet" type="tehtml; charset=utf-8is considered to betable width="100%" In addition to the contributed to the differences betweendevelopment of the It is important to </script>
+
+<script  style="font-size:1></span><span id=gbLibrary of Congress<img src="http://imEnglish translationAcademy of Sciencesdiv style="display:construction of the.getElementById(id)in conjunction withElement('script'); <meta property="og:Български
+ type="text" name=">Privacy Policy</a>administered by theenableSingleRequeststyle=&quot;margin:</div></div></div><><img src="http://i style=&quot;float:referred to as the total population ofin Washington, D.C. style="background-among other things,organization of theparticipated in thethe introduction ofidentified with thefictional character Oxford University misunderstanding ofThere are, however,stylesheet" href="/Columbia Universityexpanded to includeusually referred toindicating that thehave suggested thataffiliated with thecorrelation betweennumber of different></td></tr></table>Republic of Ireland
+</script>
+<script under the influencecontribution to theOfficial website ofheadquarters of thecentered around theimplications of thehave been developedFederal Republic ofbecame increasinglycontinuation of theNote, however, thatsimilar to that of capabilities of theaccordance with theparticipants in thefurther developmentunder the directionis often consideredhis younger brother</td></tr></table><a http-equiv="X-UA-physical propertiesof British Columbiahas been criticized(with the exceptionquestions about thepassing through the0" cellpadding="0" thousands of peopleredirects here. Forhave children under%3E%3C/script%3E"));<a href="http://www.<li><a href="http://site_name" content="text-decoration:nonestyle="display: none<meta http-equiv="X-new Date().getTime() type="image/x-icon"</span><span class="language="javascriptwindow.location.href<a href="javascript:-->
+<script type="t<a href='http://www.hortcut icon" href="</div>
+<div class="<script src="http://" rel="stylesheet" t</div>
+<script type=/a> <a href="http:// allowTransparency="X-UA-Compatible" conrelationship between
+</script>
+<script </a></li></ul></div>associated with the programming language</a><a href="http://</a></li><li class="form action="http://<div style="display:type="text" name="q"<table width="100%" background-position:" border="0" width="rel="shortcut icon" h6><ul><li><a href="  <meta http-equiv="css" media="screen" responsible for the " type="application/" style="background-html; charset=utf-8" allowtransparency="stylesheet" type="te
+<meta http-equiv="></span><span class="0" cellspacing="0">;
+</script>
+<script sometimes called thedoes not necessarilyFor more informationat the beginning of <!DOCTYPE html><htmlparticularly in the type="hidden" name="javascript:void(0);"effectiveness of the autocomplete="off" generally considered><input type="text" "></script>
+<scriptthroughout the worldcommon misconceptionassociation with the</div>
+</div>
+<div cduring his lifetime,corresponding to thetype="image/x-icon" an increasing numberdiplomatic relationsare often consideredmeta charset="utf-8" <input type="text" examples include the"><img src="http://iparticipation in thethe establishment of
+</div>
+<div class="&amp;nbsp;&amp;nbsp;to determine whetherquite different frommarked the beginningdistance between thecontributions to theconflict between thewidely considered towas one of the firstwith varying degreeshave speculated that(document.getElementparticipating in theoriginally developedeta charset="utf-8"> type="text/css" />
+interchangeably withmore closely relatedsocial and politicalthat would otherwiseperpendicular to thestyle type="text/csstype="submit" name="families residing indeveloping countriescomputer programmingeconomic developmentdetermination of thefor more informationon several occasionsportuguês (Europeu)УкраїнськаукраїнськаРоссийскойматериаловинформацииуправлениянеобходимоинформацияИнформацияРеспубликиколичествоинформациютерриториидостаточноالمتواجدونالاشتراكاتالاقتراحاتhtml; charset=UTF-8" setTimeout(function()display:inline-block;<input type="submit" type = 'text/javascri<img src="http://www." "http://www.w3.org/shortcut icon" href="" autocomplete="off" </a></div><div class=</a></li>
+<li class="css" type="text/css" <form action="http://xt/css" href="http://link rel="alternate" 
+<script type="text/ onclick="javascript:(new Date).getTime()}height="1" width="1" People's Republic of  <a href="http://www.text-decoration:underthe beginning of the </div>
+</div>
+</div>
+establishment of the </div></div></div></d#viewport{min-height:
+<script src="http://option><option value=often referred to as /option>
+<option valu<!DOCTYPE html>
+<!--[International Airport>
+<a href="http://www</a><a href="http://wภาษาไทยქართული正體中文 (繁體)निर्देशडाउनलोडक्षेत्रजानकारीसंबंधितस्थापनास्वीकारसंस्करणसामग्रीचिट्ठोंविज्ञानअमेरिकाविभिन्नगाडियाँक्योंकिसुरक्षापहुँचतीप्रबंधनटिप्पणीक्रिकेटप्रारंभप्राप्तमालिकोंरफ़्तारनिर्माणलिमिटेडdescription" content="document.location.prot.getElementsByTagName(<!DOCTYPE html>
+<html <meta charset="utf-8">:url" content="http://.css" rel="stylesheet"style type="text/css">type="text/css" href="w3.org/1999/xhtml" xmltype="text/javascript" method="get" action="link rel="stylesheet"  = document.getElementtype="image/x-icon" />cellpadding="0" cellsp.css" type="text/css" </a></li><li><a href="" width="1" height="1""><a href="http://www.style="display:none;">alternate" type="appli-//W3C//DTD XHTML 1.0 ellspacing="0" cellpad type="hidden" value="/a>&nbsp;<span role="s
+<input type="hidden" language="JavaScript"  document.getElementsBg="0" cellspacing="0" ype="text/css" media="type='text/javascript'with the exception of ype="text/css" rel="st height="1" width="1" ='+encodeURIComponent(<link rel="alternate" 
+body, tr, input, textmeta name="robots" conmethod="post" action=">
+<a href="http://www.css" rel="stylesheet" </div></div><div classlanguage="javascript">aria-hidden="true">·<ript" type="text/javasl=0;})();
+(function(){background-image: url(/a></li><li><a href="h		<li><a href="http://ator" aria-hidden="tru> <a href="http://www.language="javascript" /option>
+<option value/div></div><div class=rator" aria-hidden="tre=(new Date).getTime()português (do Brasil)организациивозможностьобразованиярегистрациивозможностиобязательна<!DOCTYPE html PUBLIC "nt-Type" content="text/<meta http-equiv="Conteransitional//EN" "http:<html xmlns="http://www-//W3C//DTD XHTML 1.0 TDTD/xhtml1-transitional//www.w3.org/TR/xhtml1/pe = 'text/javascript';<meta name="descriptionparentNode.insertBefore<input type="hidden" najs" type="text/javascri(document).ready(functiscript type="text/javasimage" content="http://UA-Compatible" content=tml; charset=utf-8" />
+link rel="shortcut icon<link rel="stylesheet" </script>
+<script type== document.createElemen<a target="_blank" href= document.getElementsBinput type="text" name=a.type = 'text/javascrinput type="hidden" namehtml; charset=utf-8" />dtd">
+<html xmlns="http-//W3C//DTD HTML 4.01 TentsByTagName('script')input type="hidden" nam<script type="text/javas" style="display:none;">document.getElementById(=document.createElement(' type='text/javascript'input type="text" name="d.getElementsByTagName(snical" href="http://www.C//DTD HTML 4.01 Transit<style type="text/css">
+
+<style type="text/css">ional.dtd">
+<html xmlns=http-equiv="Content-Typeding="0" cellspacing="0"html; charset=utf-8" />
+ style="display:none;"><<li><a href="http://www. type='text/javascript'>деятельностисоответствиипроизводствабезопасностиपुस्तिकाकांग्रेसउन्होंनेविधानसभाफिक्सिंगसुरक्षितकॉपीराइटविज्ञापनकार्रवाईसक्रियता
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c
new file mode 100644
index 0000000000..64822a381b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.c
@@ -0,0 +1,5905 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./dictionary.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifndef BROTLI_EXTERNAL_DICTIONARY_DATA
+static const uint8_t kBrotliDictionaryData[] =
+{
+116,105,109,101,100,111,119,110,108,105,102,101,108,101,102,116,98,97,99,107,99,
+111,100,101,100,97,116,97,115,104,111,119,111,110,108,121,115,105,116,101,99,105
+,116,121,111,112,101,110,106,117,115,116,108,105,107,101,102,114,101,101,119,111
+,114,107,116,101,120,116,121,101,97,114,111,118,101,114,98,111,100,121,108,111,
+118,101,102,111,114,109,98,111,111,107,112,108,97,121,108,105,118,101,108,105,
+110,101,104,101,108,112,104,111,109,101,115,105,100,101,109,111,114,101,119,111,
+114,100,108,111,110,103,116,104,101,109,118,105,101,119,102,105,110,100,112,97,
+103,101,100,97,121,115,102,117,108,108,104,101,97,100,116,101,114,109,101,97,99,
+104,97,114,101,97,102,114,111,109,116,114,117,101,109,97,114,107,97,98,108,101,
+117,112,111,110,104,105,103,104,100,97,116,101,108,97,110,100,110,101,119,115,
+101,118,101,110,110,101,120,116,99,97,115,101,98,111,116,104,112,111,115,116,117
+,115,101,100,109,97,100,101,104,97,110,100,104,101,114,101,119,104,97,116,110,97
+,109,101,76,105,110,107,98,108,111,103,115,105,122,101,98,97,115,101,104,101,108
+,100,109,97,107,101,109,97,105,110,117,115,101,114,39,41,32,43,104,111,108,100,
+101,110,100,115,119,105,116,104,78,101,119,115,114,101,97,100,119,101,114,101,
+115,105,103,110,116,97,107,101,104,97,118,101,103,97,109,101,115,101,101,110,99,
+97,108,108,112,97,116,104,119,101,108,108,112,108,117,115,109,101,110,117,102,
+105,108,109,112,97,114,116,106,111,105,110,116,104,105,115,108,105,115,116,103,
+111,111,100,110,101,101,100,119,97,121,115,119,101,115,116,106,111,98,115,109,
+105,110,100,97,108,115,111,108,111,103,111,114,105,99,104,117,115,101,115,108,97
+,115,116,116,101,97,109,97,114,109,121,102,111,111,100,107,105,110,103,119,105,
+108,108,101,97,115,116,119,97,114,100,98,101,115,116,102,105,114,101,80,97,103,
+101,107,110,111,119,97,119,97,121,46,112,110,103,109,111,118,101,116,104,97,110,
+108,111,97,100,103,105,118,101,115,101,108,102,110,111,116,101,109,117,99,104,
+102,101,101,100,109,97,110,121,114,111,99,107,105,99,111,110,111,110,99,101,108,
+111,111,107,104,105,100,101,100,105,101,100,72,111,109,101,114,117,108,101,104,
+111,115,116,97,106,97,120,105,110,102,111,99,108,117,98,108,97,119,115,108,101,
+115,115,104,97,108,102,115,111,109,101,115,117,99,104,122,111,110,101,49,48,48,
+37,111,110,101,115,99,97,114,101,84,105,109,101,114,97,99,101,98,108,117,101,102
+,111,117,114,119,101,101,107,102,97,99,101,104,111,112,101,103,97,118,101,104,97
+,114,100,108,111,115,116,119,104,101,110,112,97,114,107,107,101,112,116,112,97,
+115,115,115,104,105,112,114,111,111,109,72,84,77,76,112,108,97,110,84,121,112,
+101,100,111,110,101,115,97,118,101,107,101,101,112,102,108,97,103,108,105,110,
+107,115,111,108,100,102,105,118,101,116,111,111,107,114,97,116,101,116,111,119,
+110,106,117,109,112,116,104,117,115,100,97,114,107,99,97,114,100,102,105,108,101
+,102,101,97,114,115,116,97,121,107,105,108,108,116,104,97,116,102,97,108,108,97,
+117,116,111,101,118,101,114,46,99,111,109,116,97,108,107,115,104,111,112,118,111
+,116,101,100,101,101,112,109,111,100,101,114,101,115,116,116,117,114,110,98,111,
+114,110,98,97,110,100,102,101,108,108,114,111,115,101,117,114,108,40,115,107,105
+,110,114,111,108,101,99,111,109,101,97,99,116,115,97,103,101,115,109,101,101,116
+,103,111,108,100,46,106,112,103,105,116,101,109,118,97,114,121,102,101,108,116,
+116,104,101,110,115,101,110,100,100,114,111,112,86,105,101,119,99,111,112,121,49
+,46,48,34,60,47,97,62,115,116,111,112,101,108,115,101,108,105,101,115,116,111,
+117,114,112,97,99,107,46,103,105,102,112,97,115,116,99,115,115,63,103,114,97,121
+,109,101,97,110,38,103,116,59,114,105,100,101,115,104,111,116,108,97,116,101,115
+,97,105,100,114,111,97,100,118,97,114,32,102,101,101,108,106,111,104,110,114,105
+,99,107,112,111,114,116,102,97,115,116,39,85,65,45,100,101,97,100,60,47,98,62,
+112,111,111,114,98,105,108,108,116,121,112,101,85,46,83,46,119,111,111,100,109,
+117,115,116,50,112,120,59,73,110,102,111,114,97,110,107,119,105,100,101,119,97,
+110,116,119,97,108,108,108,101,97,100,91,48,93,59,112,97,117,108,119,97,118,101,
+115,117,114,101,36,40,39,35,119,97,105,116,109,97,115,115,97,114,109,115,103,111
+,101,115,103,97,105,110,108,97,110,103,112,97,105,100,33,45,45,32,108,111,99,107
+,117,110,105,116,114,111,111,116,119,97,108,107,102,105,114,109,119,105,102,101,
+120,109,108,34,115,111,110,103,116,101,115,116,50,48,112,120,107,105,110,100,114
+,111,119,115,116,111,111,108,102,111,110,116,109,97,105,108,115,97,102,101,115,
+116,97,114,109,97,112,115,99,111,114,101,114,97,105,110,102,108,111,119,98,97,98
+,121,115,112,97,110,115,97,121,115,52,112,120,59,54,112,120,59,97,114,116,115,
+102,111,111,116,114,101,97,108,119,105,107,105,104,101,97,116,115,116,101,112,
+116,114,105,112,111,114,103,47,108,97,107,101,119,101,97,107,116,111,108,100,70,
+111,114,109,99,97,115,116,102,97,110,115,98,97,110,107,118,101,114,121,114,117,
+110,115,106,117,108,121,116,97,115,107,49,112,120,59,103,111,97,108,103,114,101,
+119,115,108,111,119,101,100,103,101,105,100,61,34,115,101,116,115,53,112,120,59,
+46,106,115,63,52,48,112,120,105,102,32,40,115,111,111,110,115,101,97,116,110,111
+,110,101,116,117,98,101,122,101,114,111,115,101,110,116,114,101,101,100,102,97,
+99,116,105,110,116,111,103,105,102,116,104,97,114,109,49,56,112,120,99,97,109,
+101,104,105,108,108,98,111,108,100,122,111,111,109,118,111,105,100,101,97,115,
+121,114,105,110,103,102,105,108,108,112,101,97,107,105,110,105,116,99,111,115,
+116,51,112,120,59,106,97,99,107,116,97,103,115,98,105,116,115,114,111,108,108,
+101,100,105,116,107,110,101,119,110,101,97,114,60,33,45,45,103,114,111,119,74,83
+,79,78,100,117,116,121,78,97,109,101,115,97,108,101,121,111,117,32,108,111,116,
+115,112,97,105,110,106,97,122,122,99,111,108,100,101,121,101,115,102,105,115,104
+,119,119,119,46,114,105,115,107,116,97,98,115,112,114,101,118,49,48,112,120,114,
+105,115,101,50,53,112,120,66,108,117,101,100,105,110,103,51,48,48,44,98,97,108,
+108,102,111,114,100,101,97,114,110,119,105,108,100,98,111,120,46,102,97,105,114,
+108,97,99,107,118,101,114,115,112,97,105,114,106,117,110,101,116,101,99,104,105,
+102,40,33,112,105,99,107,101,118,105,108,36,40,34,35,119,97,114,109,108,111,114,
+100,100,111,101,115,112,117,108,108,44,48,48,48,105,100,101,97,100,114,97,119,
+104,117,103,101,115,112,111,116,102,117,110,100,98,117,114,110,104,114,101,102,
+99,101,108,108,107,101,121,115,116,105,99,107,104,111,117,114,108,111,115,115,
+102,117,101,108,49,50,112,120,115,117,105,116,100,101,97,108,82,83,83,34,97,103,
+101,100,103,114,101,121,71,69,84,34,101,97,115,101,97,105,109,115,103,105,114,
+108,97,105,100,115,56,112,120,59,110,97,118,121,103,114,105,100,116,105,112,115,
+35,57,57,57,119,97,114,115,108,97,100,121,99,97,114,115,41,59,32,125,112,104,112
+,63,104,101,108,108,116,97,108,108,119,104,111,109,122,104,58,229,42,47,13,10,32
+,49,48,48,104,97,108,108,46,10,10,65,55,112,120,59,112,117,115,104,99,104,97,116
+,48,112,120,59,99,114,101,119,42,47,60,47,104,97,115,104,55,53,112,120,102,108,
+97,116,114,97,114,101,32,38,38,32,116,101,108,108,99,97,109,112,111,110,116,111,
+108,97,105,100,109,105,115,115,115,107,105,112,116,101,110,116,102,105,110,101,
+109,97,108,101,103,101,116,115,112,108,111,116,52,48,48,44,13,10,13,10,99,111,
+111,108,102,101,101,116,46,112,104,112,60,98,114,62,101,114,105,99,109,111,115,
+116,103,117,105,100,98,101,108,108,100,101,115,99,104,97,105,114,109,97,116,104,
+97,116,111,109,47,105,109,103,38,35,56,50,108,117,99,107,99,101,110,116,48,48,48
+,59,116,105,110,121,103,111,110,101,104,116,109,108,115,101,108,108,100,114,117,
+103,70,82,69,69,110,111,100,101,110,105,99,107,63,105,100,61,108,111,115,101,110
+,117,108,108,118,97,115,116,119,105,110,100,82,83,83,32,119,101,97,114,114,101,
+108,121,98,101,101,110,115,97,109,101,100,117,107,101,110,97,115,97,99,97,112,
+101,119,105,115,104,103,117,108,102,84,50,51,58,104,105,116,115,115,108,111,116,
+103,97,116,101,107,105,99,107,98,108,117,114,116,104,101,121,49,53,112,120,39,39
+,41,59,41,59,34,62,109,115,105,101,119,105,110,115,98,105,114,100,115,111,114,
+116,98,101,116,97,115,101,101,107,84,49,56,58,111,114,100,115,116,114,101,101,
+109,97,108,108,54,48,112,120,102,97,114,109,226,128,153,115,98,111,121,115,91,48
+,93,46,39,41,59,34,80,79,83,84,98,101,97,114,107,105,100,115,41,59,125,125,109,
+97,114,121,116,101,110,100,40,85,75,41,113,117,97,100,122,104,58,230,45,115,105,
+122,45,45,45,45,112,114,111,112,39,41,59,13,108,105,102,116,84,49,57,58,118,105,
+99,101,97,110,100,121,100,101,98,116,62,82,83,83,112,111,111,108,110,101,99,107,
+98,108,111,119,84,49,54,58,100,111,111,114,101,118,97,108,84,49,55,58,108,101,
+116,115,102,97,105,108,111,114,97,108,112,111,108,108,110,111,118,97,99,111,108,
+115,103,101,110,101,32,226,128,148,115,111,102,116,114,111,109,101,116,105,108,
+108,114,111,115,115,60,104,51,62,112,111,117,114,102,97,100,101,112,105,110,107,
+60,116,114,62,109,105,110,105,41,124,33,40,109,105,110,101,122,104,58,232,98,97,
+114,115,104,101,97,114,48,48,41,59,109,105,108,107,32,45,45,62,105,114,111,110,
+102,114,101,100,100,105,115,107,119,101,110,116,115,111,105,108,112,117,116,115,
+47,106,115,47,104,111,108,121,84,50,50,58,73,83,66,78,84,50,48,58,97,100,97,109,
+115,101,101,115,60,104,50,62,106,115,111,110,39,44,32,39,99,111,110,116,84,50,49
+,58,32,82,83,83,108,111,111,112,97,115,105,97,109,111,111,110,60,47,112,62,115,
+111,117,108,76,73,78,69,102,111,114,116,99,97,114,116,84,49,52,58,60,104,49,62,
+56,48,112,120,33,45,45,60,57,112,120,59,84,48,52,58,109,105,107,101,58,52,54,90,
+110,105,99,101,105,110,99,104,89,111,114,107,114,105,99,101,122,104,58,228,39,41
+,41,59,112,117,114,101,109,97,103,101,112,97,114,97,116,111,110,101,98,111,110,
+100,58,51,55,90,95,111,102,95,39,93,41,59,48,48,48,44,122,104,58,231,116,97,110,
+107,121,97,114,100,98,111,119,108,98,117,115,104,58,53,54,90,74,97,118,97,51,48,
+112,120,10,124,125,10,37,67,51,37,58,51,52,90,106,101,102,102,69,88,80,73,99,97,
+115,104,118,105,115,97,103,111,108,102,115,110,111,119,122,104,58,233,113,117,
+101,114,46,99,115,115,115,105,99,107,109,101,97,116,109,105,110,46,98,105,110,
+100,100,101,108,108,104,105,114,101,112,105,99,115,114,101,110,116,58,51,54,90,
+72,84,84,80,45,50,48,49,102,111,116,111,119,111,108,102,69,78,68,32,120,98,111,
+120,58,53,52,90,66,79,68,89,100,105,99,107,59,10,125,10,101,120,105,116,58,51,53
+,90,118,97,114,115,98,101,97,116,39,125,41,59,100,105,101,116,57,57,57,59,97,110
+,110,101,125,125,60,47,91,105,93,46,76,97,110,103,107,109,194,178,119,105,114,
+101,116,111,121,115,97,100,100,115,115,101,97,108,97,108,101,120,59,10,9,125,101
+,99,104,111,110,105,110,101,46,111,114,103,48,48,53,41,116,111,110,121,106,101,
+119,115,115,97,110,100,108,101,103,115,114,111,111,102,48,48,48,41,32,50,48,48,
+119,105,110,101,103,101,97,114,100,111,103,115,98,111,111,116,103,97,114,121,99,
+117,116,115,116,121,108,101,116,101,109,112,116,105,111,110,46,120,109,108,99,
+111,99,107,103,97,110,103,36,40,39,46,53,48,112,120,80,104,46,68,109,105,115,99,
+97,108,97,110,108,111,97,110,100,101,115,107,109,105,108,101,114,121,97,110,117,
+110,105,120,100,105,115,99,41,59,125,10,100,117,115,116,99,108,105,112,41,46,10,
+10,55,48,112,120,45,50,48,48,68,86,68,115,55,93,62,60,116,97,112,101,100,101,109
+,111,105,43,43,41,119,97,103,101,101,117,114,111,112,104,105,108,111,112,116,115
+,104,111,108,101,70,65,81,115,97,115,105,110,45,50,54,84,108,97,98,115,112,101,
+116,115,85,82,76,32,98,117,108,107,99,111,111,107,59,125,13,10,72,69,65,68,91,48
+,93,41,97,98,98,114,106,117,97,110,40,49,57,56,108,101,115,104,116,119,105,110,
+60,47,105,62,115,111,110,121,103,117,121,115,102,117,99,107,112,105,112,101,124,
+45,10,33,48,48,50,41,110,100,111,119,91,49,93,59,91,93,59,10,76,111,103,32,115,
+97,108,116,13,10,9,9,98,97,110,103,116,114,105,109,98,97,116,104,41,123,13,10,48
+,48,112,120,10,125,41,59,107,111,58,236,102,101,101,115,97,100,62,13,115,58,47,
+47,32,91,93,59,116,111,108,108,112,108,117,103,40,41,123,10,123,13,10,32,46,106,
+115,39,50,48,48,112,100,117,97,108,98,111,97,116,46,74,80,71,41,59,10,125,113,
+117,111,116,41,59,10,10,39,41,59,10,13,10,125,13,50,48,49,52,50,48,49,53,50,48,
+49,54,50,48,49,55,50,48,49,56,50,48,49,57,50,48,50,48,50,48,50,49,50,48,50,50,50
+,48,50,51,50,48,50,52,50,48,50,53,50,48,50,54,50,48,50,55,50,48,50,56,50,48,50,
+57,50,48,51,48,50,48,51,49,50,48,51,50,50,48,51,51,50,48,51,52,50,48,51,53,50,48
+,51,54,50,48,51,55,50,48,49,51,50,48,49,50,50,48,49,49,50,48,49,48,50,48,48,57,
+50,48,48,56,50,48,48,55,50,48,48,54,50,48,48,53,50,48,48,52,50,48,48,51,50,48,48
+,50,50,48,48,49,50,48,48,48,49,57,57,57,49,57,57,56,49,57,57,55,49,57,57,54,49,
+57,57,53,49,57,57,52,49,57,57,51,49,57,57,50,49,57,57,49,49,57,57,48,49,57,56,57
+,49,57,56,56,49,57,56,55,49,57,56,54,49,57,56,53,49,57,56,52,49,57,56,51,49,57,
+56,50,49,57,56,49,49,57,56,48,49,57,55,57,49,57,55,56,49,57,55,55,49,57,55,54,49
+,57,55,53,49,57,55,52,49,57,55,51,49,57,55,50,49,57,55,49,49,57,55,48,49,57,54,
+57,49,57,54,56,49,57,54,55,49,57,54,54,49,57,54,53,49,57,54,52,49,57,54,51,49,57
+,54,50,49,57,54,49,49,57,54,48,49,57,53,57,49,57,53,56,49,57,53,55,49,57,53,54,
+49,57,53,53,49,57,53,52,49,57,53,51,49,57,53,50,49,57,53,49,49,57,53,48,49,48,48
+,48,49,48,50,52,49,51,57,52,48,48,48,48,57,57,57,57,99,111,109,111,109,195,161,
+115,101,115,116,101,101,115,116,97,112,101,114,111,116,111,100,111,104,97,99,101
+,99,97,100,97,97,195,177,111,98,105,101,110,100,195,173,97,97,115,195,173,118,
+105,100,97,99,97,115,111,111,116,114,111,102,111,114,111,115,111,108,111,111,116
+,114,97,99,117,97,108,100,105,106,111,115,105,100,111,103,114,97,110,116,105,112
+,111,116,101,109,97,100,101,98,101,97,108,103,111,113,117,195,169,101,115,116,
+111,110,97,100,97,116,114,101,115,112,111,99,111,99,97,115,97,98,97,106,111,116,
+111,100,97,115,105,110,111,97,103,117,97,112,117,101,115,117,110,111,115,97,110,
+116,101,100,105,99,101,108,117,105,115,101,108,108,97,109,97,121,111,122,111,110
+,97,97,109,111,114,112,105,115,111,111,98,114,97,99,108,105,99,101,108,108,111,
+100,105,111,115,104,111,114,97,99,97,115,105,208,183,208,176,208,189,208,176,208
+,190,208,188,209,128,208,176,209,128,209,131,209,130,208,176,208,189,208,181,208
+,191,208,190,208,190,209,130,208,184,208,183,208,189,208,190,208,180,208,190,209
+,130,208,190,208,182,208,181,208,190,208,189,208,184,209,133,208,157,208,176,208
+,181,208,181,208,177,209,139,208,188,209,139,208,146,209,139,209,129,208,190,208
+,178,209,139,208,178,208,190,208,157,208,190,208,190,208,177,208,159,208,190,208
+,187,208,184,208,189,208,184,208,160,208,164,208,157,208,181,208,156,209,139,209
+,130,209,139,208,158,208,189,208,184,208,188,208,180,208,176,208,151,208,176,208
+,148,208,176,208,157,209,131,208,158,208,177,209,130,208,181,208,152,208,183,208
+,181,208,185,208,189,209,131,208,188,208,188,208,162,209,139,209,131,208,182,217
+,129,217,138,216,163,217,134,217,133,216,167,217,133,216,185,217,131,217,132,216
+,163,217,136,216,177,216,175,217,138,216,167,217,129,217,137,217,135,217,136,217
+,132,217,133,217,132,217,131,216,167,217,136,217,132,217,135,216,168,216,179,216
+,167,217,132,216,165,217,134,217,135,217,138,216,163,217,138,217,130,216,175,217
+,135,217,132,216,171,217,133,216,168,217,135,217,132,217,136,217,132,217,138,216
+,168,217,132,216,167,217,138,216,168,217,131,216,180,217,138,216,167,217,133,216
+,163,217,133,217,134,216,170,216,168,217,138,217,132,217,134,216,173,216,168,217
+,135,217,133,217,133,216,180,217,136,216,180,102,105,114,115,116,118,105,100,101
+,111,108,105,103,104,116,119,111,114,108,100,109,101,100,105,97,119,104,105,116,
+101,99,108,111,115,101,98,108,97,99,107,114,105,103,104,116,115,109,97,108,108,
+98,111,111,107,115,112,108,97,99,101,109,117,115,105,99,102,105,101,108,100,111,
+114,100,101,114,112,111,105,110,116,118,97,108,117,101,108,101,118,101,108,116,
+97,98,108,101,98,111,97,114,100,104,111,117,115,101,103,114,111,117,112,119,111,
+114,107,115,121,101,97,114,115,115,116,97,116,101,116,111,100,97,121,119,97,116,
+101,114,115,116,97,114,116,115,116,121,108,101,100,101,97,116,104,112,111,119,
+101,114,112,104,111,110,101,110,105,103,104,116,101,114,114,111,114,105,110,112,
+117,116,97,98,111,117,116,116,101,114,109,115,116,105,116,108,101,116,111,111,
+108,115,101,118,101,110,116,108,111,99,97,108,116,105,109,101,115,108,97,114,103
+,101,119,111,114,100,115,103,97,109,101,115,115,104,111,114,116,115,112,97,99,
+101,102,111,99,117,115,99,108,101,97,114,109,111,100,101,108,98,108,111,99,107,
+103,117,105,100,101,114,97,100,105,111,115,104,97,114,101,119,111,109,101,110,97
+,103,97,105,110,109,111,110,101,121,105,109,97,103,101,110,97,109,101,115,121,
+111,117,110,103,108,105,110,101,115,108,97,116,101,114,99,111,108,111,114,103,
+114,101,101,110,102,114,111,110,116,38,97,109,112,59,119,97,116,99,104,102,111,
+114,99,101,112,114,105,99,101,114,117,108,101,115,98,101,103,105,110,97,102,116,
+101,114,118,105,115,105,116,105,115,115,117,101,97,114,101,97,115,98,101,108,111
+,119,105,110,100,101,120,116,111,116,97,108,104,111,117,114,115,108,97,98,101,
+108,112,114,105,110,116,112,114,101,115,115,98,117,105,108,116,108,105,110,107,
+115,115,112,101,101,100,115,116,117,100,121,116,114,97,100,101,102,111,117,110,
+100,115,101,110,115,101,117,110,100,101,114,115,104,111,119,110,102,111,114,109,
+115,114,97,110,103,101,97,100,100,101,100,115,116,105,108,108,109,111,118,101,
+100,116,97,107,101,110,97,98,111,118,101,102,108,97,115,104,102,105,120,101,100,
+111,102,116,101,110,111,116,104,101,114,118,105,101,119,115,99,104,101,99,107,
+108,101,103,97,108,114,105,118,101,114,105,116,101,109,115,113,117,105,99,107,
+115,104,97,112,101,104,117,109,97,110,101,120,105,115,116,103,111,105,110,103,
+109,111,118,105,101,116,104,105,114,100,98,97,115,105,99,112,101,97,99,101,115,
+116,97,103,101,119,105,100,116,104,108,111,103,105,110,105,100,101,97,115,119,
+114,111,116,101,112,97,103,101,115,117,115,101,114,115,100,114,105,118,101,115,
+116,111,114,101,98,114,101,97,107,115,111,117,116,104,118,111,105,99,101,115,105
+,116,101,115,109,111,110,116,104,119,104,101,114,101,98,117,105,108,100,119,104,
+105,99,104,101,97,114,116,104,102,111,114,117,109,116,104,114,101,101,115,112,
+111,114,116,112,97,114,116,121,67,108,105,99,107,108,111,119,101,114,108,105,118
+,101,115,99,108,97,115,115,108,97,121,101,114,101,110,116,114,121,115,116,111,
+114,121,117,115,97,103,101,115,111,117,110,100,99,111,117,114,116,121,111,117,
+114,32,98,105,114,116,104,112,111,112,117,112,116,121,112,101,115,97,112,112,108
+,121,73,109,97,103,101,98,101,105,110,103,117,112,112,101,114,110,111,116,101,
+115,101,118,101,114,121,115,104,111,119,115,109,101,97,110,115,101,120,116,114,
+97,109,97,116,99,104,116,114,97,99,107,107,110,111,119,110,101,97,114,108,121,98
+,101,103,97,110,115,117,112,101,114,112,97,112,101,114,110,111,114,116,104,108,
+101,97,114,110,103,105,118,101,110,110,97,109,101,100,101,110,100,101,100,84,101
+,114,109,115,112,97,114,116,115,71,114,111,117,112,98,114,97,110,100,117,115,105
+,110,103,119,111,109,97,110,102,97,108,115,101,114,101,97,100,121,97,117,100,105
+,111,116,97,107,101,115,119,104,105,108,101,46,99,111,109,47,108,105,118,101,100
+,99,97,115,101,115,100,97,105,108,121,99,104,105,108,100,103,114,101,97,116,106,
+117,100,103,101,116,104,111,115,101,117,110,105,116,115,110,101,118,101,114,98,
+114,111,97,100,99,111,97,115,116,99,111,118,101,114,97,112,112,108,101,102,105,
+108,101,115,99,121,99,108,101,115,99,101,110,101,112,108,97,110,115,99,108,105,
+99,107,119,114,105,116,101,113,117,101,101,110,112,105,101,99,101,101,109,97,105
+,108,102,114,97,109,101,111,108,100,101,114,112,104,111,116,111,108,105,109,105,
+116,99,97,99,104,101,99,105,118,105,108,115,99,97,108,101,101,110,116,101,114,
+116,104,101,109,101,116,104,101,114,101,116,111,117,99,104,98,111,117,110,100,
+114,111,121,97,108,97,115,107,101,100,119,104,111,108,101,115,105,110,99,101,115
+,116,111,99,107,32,110,97,109,101,102,97,105,116,104,104,101,97,114,116,101,109,
+112,116,121,111,102,102,101,114,115,99,111,112,101,111,119,110,101,100,109,105,
+103,104,116,97,108,98,117,109,116,104,105,110,107,98,108,111,111,100,97,114,114,
+97,121,109,97,106,111,114,116,114,117,115,116,99,97,110,111,110,117,110,105,111,
+110,99,111,117,110,116,118,97,108,105,100,115,116,111,110,101,83,116,121,108,101
+,76,111,103,105,110,104,97,112,112,121,111,99,99,117,114,108,101,102,116,58,102,
+114,101,115,104,113,117,105,116,101,102,105,108,109,115,103,114,97,100,101,110,
+101,101,100,115,117,114,98,97,110,102,105,103,104,116,98,97,115,105,115,104,111,
+118,101,114,97,117,116,111,59,114,111,117,116,101,46,104,116,109,108,109,105,120
+,101,100,102,105,110,97,108,89,111,117,114,32,115,108,105,100,101,116,111,112,
+105,99,98,114,111,119,110,97,108,111,110,101,100,114,97,119,110,115,112,108,105,
+116,114,101,97,99,104,82,105,103,104,116,100,97,116,101,115,109,97,114,99,104,
+113,117,111,116,101,103,111,111,100,115,76,105,110,107,115,100,111,117,98,116,97
+,115,121,110,99,116,104,117,109,98,97,108,108,111,119,99,104,105,101,102,121,111
+,117,116,104,110,111,118,101,108,49,48,112,120,59,115,101,114,118,101,117,110,
+116,105,108,104,97,110,100,115,67,104,101,99,107,83,112,97,99,101,113,117,101,
+114,121,106,97,109,101,115,101,113,117,97,108,116,119,105,99,101,48,44,48,48,48,
+83,116,97,114,116,112,97,110,101,108,115,111,110,103,115,114,111,117,110,100,101
+,105,103,104,116,115,104,105,102,116,119,111,114,116,104,112,111,115,116,115,108
+,101,97,100,115,119,101,101,107,115,97,118,111,105,100,116,104,101,115,101,109,
+105,108,101,115,112,108,97,110,101,115,109,97,114,116,97,108,112,104,97,112,108,
+97,110,116,109,97,114,107,115,114,97,116,101,115,112,108,97,121,115,99,108,97,
+105,109,115,97,108,101,115,116,101,120,116,115,115,116,97,114,115,119,114,111,
+110,103,60,47,104,51,62,116,104,105,110,103,46,111,114,103,47,109,117,108,116,
+105,104,101,97,114,100,80,111,119,101,114,115,116,97,110,100,116,111,107,101,110
+,115,111,108,105,100,40,116,104,105,115,98,114,105,110,103,115,104,105,112,115,
+115,116,97,102,102,116,114,105,101,100,99,97,108,108,115,102,117,108,108,121,102
+,97,99,116,115,97,103,101,110,116,84,104,105,115,32,47,47,45,45,62,97,100,109,
+105,110,101,103,121,112,116,69,118,101,110,116,49,53,112,120,59,69,109,97,105,
+108,116,114,117,101,34,99,114,111,115,115,115,112,101,110,116,98,108,111,103,115
+,98,111,120,34,62,110,111,116,101,100,108,101,97,118,101,99,104,105,110,97,115,
+105,122,101,115,103,117,101,115,116,60,47,104,52,62,114,111,98,111,116,104,101,
+97,118,121,116,114,117,101,44,115,101,118,101,110,103,114,97,110,100,99,114,105,
+109,101,115,105,103,110,115,97,119,97,114,101,100,97,110,99,101,112,104,97,115,
+101,62,60,33,45,45,101,110,95,85,83,38,35,51,57,59,50,48,48,112,120,95,110,97,
+109,101,108,97,116,105,110,101,110,106,111,121,97,106,97,120,46,97,116,105,111,
+110,115,109,105,116,104,85,46,83,46,32,104,111,108,100,115,112,101,116,101,114,
+105,110,100,105,97,110,97,118,34,62,99,104,97,105,110,115,99,111,114,101,99,111,
+109,101,115,100,111,105,110,103,112,114,105,111,114,83,104,97,114,101,49,57,57,
+48,115,114,111,109,97,110,108,105,115,116,115,106,97,112,97,110,102,97,108,108,
+115,116,114,105,97,108,111,119,110,101,114,97,103,114,101,101,60,47,104,50,62,97
+,98,117,115,101,97,108,101,114,116,111,112,101,114,97,34,45,47,47,87,99,97,114,
+100,115,104,105,108,108,115,116,101,97,109,115,80,104,111,116,111,116,114,117,
+116,104,99,108,101,97,110,46,112,104,112,63,115,97,105,110,116,109,101,116,97,
+108,108,111,117,105,115,109,101,97,110,116,112,114,111,111,102,98,114,105,101,
+102,114,111,119,34,62,103,101,110,114,101,116,114,117,99,107,108,111,111,107,115
+,86,97,108,117,101,70,114,97,109,101,46,110,101,116,47,45,45,62,10,60,116,114,
+121,32,123,10,118,97,114,32,109,97,107,101,115,99,111,115,116,115,112,108,97,105
+,110,97,100,117,108,116,113,117,101,115,116,116,114,97,105,110,108,97,98,111,114
+,104,101,108,112,115,99,97,117,115,101,109,97,103,105,99,109,111,116,111,114,116
+,104,101,105,114,50,53,48,112,120,108,101,97,115,116,115,116,101,112,115,67,111,
+117,110,116,99,111,117,108,100,103,108,97,115,115,115,105,100,101,115,102,117,
+110,100,115,104,111,116,101,108,97,119,97,114,100,109,111,117,116,104,109,111,
+118,101,115,112,97,114,105,115,103,105,118,101,115,100,117,116,99,104,116,101,
+120,97,115,102,114,117,105,116,110,117,108,108,44,124,124,91,93,59,116,111,112,
+34,62,10,60,33,45,45,80,79,83,84,34,111,99,101,97,110,60,98,114,47,62,102,108,
+111,111,114,115,112,101,97,107,100,101,112,116,104,32,115,105,122,101,98,97,110,
+107,115,99,97,116,99,104,99,104,97,114,116,50,48,112,120,59,97,108,105,103,110,
+100,101,97,108,115,119,111,117,108,100,53,48,112,120,59,117,114,108,61,34,112,97
+,114,107,115,109,111,117,115,101,77,111,115,116,32,46,46,46,60,47,97,109,111,110
+,103,98,114,97,105,110,98,111,100,121,32,110,111,110,101,59,98,97,115,101,100,99
+,97,114,114,121,100,114,97,102,116,114,101,102,101,114,112,97,103,101,95,104,111
+,109,101,46,109,101,116,101,114,100,101,108,97,121,100,114,101,97,109,112,114,
+111,118,101,106,111,105,110,116,60,47,116,114,62,100,114,117,103,115,60,33,45,45
+,32,97,112,114,105,108,105,100,101,97,108,97,108,108,101,110,101,120,97,99,116,
+102,111,114,116,104,99,111,100,101,115,108,111,103,105,99,86,105,101,119,32,115,
+101,101,109,115,98,108,97,110,107,112,111,114,116,115,32,40,50,48,48,115,97,118,
+101,100,95,108,105,110,107,103,111,97,108,115,103,114,97,110,116,103,114,101,101
+,107,104,111,109,101,115,114,105,110,103,115,114,97,116,101,100,51,48,112,120,59
+,119,104,111,115,101,112,97,114,115,101,40,41,59,34,32,66,108,111,99,107,108,105
+,110,117,120,106,111,110,101,115,112,105,120,101,108,39,41,59,34,62,41,59,105,
+102,40,45,108,101,102,116,100,97,118,105,100,104,111,114,115,101,70,111,99,117,
+115,114,97,105,115,101,98,111,120,101,115,84,114,97,99,107,101,109,101,110,116,
+60,47,101,109,62,98,97,114,34,62,46,115,114,99,61,116,111,119,101,114,97,108,116
+,61,34,99,97,98,108,101,104,101,110,114,121,50,52,112,120,59,115,101,116,117,112
+,105,116,97,108,121,115,104,97,114,112,109,105,110,111,114,116,97,115,116,101,
+119,97,110,116,115,116,104,105,115,46,114,101,115,101,116,119,104,101,101,108,
+103,105,114,108,115,47,99,115,115,47,49,48,48,37,59,99,108,117,98,115,115,116,
+117,102,102,98,105,98,108,101,118,111,116,101,115,32,49,48,48,48,107,111,114,101
+,97,125,41,59,13,10,98,97,110,100,115,113,117,101,117,101,61,32,123,125,59,56,48
+,112,120,59,99,107,105,110,103,123,13,10,9,9,97,104,101,97,100,99,108,111,99,107
+,105,114,105,115,104,108,105,107,101,32,114,97,116,105,111,115,116,97,116,115,70
+,111,114,109,34,121,97,104,111,111,41,91,48,93,59,65,98,111,117,116,102,105,110,
+100,115,60,47,104,49,62,100,101,98,117,103,116,97,115,107,115,85,82,76,32,61,99,
+101,108,108,115,125,41,40,41,59,49,50,112,120,59,112,114,105,109,101,116,101,108
+,108,115,116,117,114,110,115,48,120,54,48,48,46,106,112,103,34,115,112,97,105,
+110,98,101,97,99,104,116,97,120,101,115,109,105,99,114,111,97,110,103,101,108,45
+,45,62,60,47,103,105,102,116,115,115,116,101,118,101,45,108,105,110,107,98,111,
+100,121,46,125,41,59,10,9,109,111,117,110,116,32,40,49,57,57,70,65,81,60,47,114,
+111,103,101,114,102,114,97,110,107,67,108,97,115,115,50,56,112,120,59,102,101,
+101,100,115,60,104,49,62,60,115,99,111,116,116,116,101,115,116,115,50,50,112,120
+,59,100,114,105,110,107,41,32,124,124,32,108,101,119,105,115,115,104,97,108,108,
+35,48,51,57,59,32,102,111,114,32,108,111,118,101,100,119,97,115,116,101,48,48,
+112,120,59,106,97,58,227,130,115,105,109,111,110,60,102,111,110,116,114,101,112,
+108,121,109,101,101,116,115,117,110,116,101,114,99,104,101,97,112,116,105,103,
+104,116,66,114,97,110,100,41,32,33,61,32,100,114,101,115,115,99,108,105,112,115,
+114,111,111,109,115,111,110,107,101,121,109,111,98,105,108,109,97,105,110,46,78,
+97,109,101,32,112,108,97,116,101,102,117,110,110,121,116,114,101,101,115,99,111,
+109,47,34,49,46,106,112,103,119,109,111,100,101,112,97,114,97,109,83,84,65,82,84
+,108,101,102,116,32,105,100,100,101,110,44,32,50,48,49,41,59,10,125,10,102,111,
+114,109,46,118,105,114,117,115,99,104,97,105,114,116,114,97,110,115,119,111,114,
+115,116,80,97,103,101,115,105,116,105,111,110,112,97,116,99,104,60,33,45,45,10,
+111,45,99,97,99,102,105,114,109,115,116,111,117,114,115,44,48,48,48,32,97,115,
+105,97,110,105,43,43,41,123,97,100,111,98,101,39,41,91,48,93,105,100,61,49,48,98
+,111,116,104,59,109,101,110,117,32,46,50,46,109,105,46,112,110,103,34,107,101,
+118,105,110,99,111,97,99,104,67,104,105,108,100,98,114,117,99,101,50,46,106,112,
+103,85,82,76,41,43,46,106,112,103,124,115,117,105,116,101,115,108,105,99,101,104
+,97,114,114,121,49,50,48,34,32,115,119,101,101,116,116,114,62,13,10,110,97,109,
+101,61,100,105,101,103,111,112,97,103,101,32,115,119,105,115,115,45,45,62,10,10,
+35,102,102,102,59,34,62,76,111,103,46,99,111,109,34,116,114,101,97,116,115,104,
+101,101,116,41,32,38,38,32,49,52,112,120,59,115,108,101,101,112,110,116,101,110,
+116,102,105,108,101,100,106,97,58,227,131,105,100,61,34,99,78,97,109,101,34,119,
+111,114,115,101,115,104,111,116,115,45,98,111,120,45,100,101,108,116,97,10,38,
+108,116,59,98,101,97,114,115,58,52,56,90,60,100,97,116,97,45,114,117,114,97,108,
+60,47,97,62,32,115,112,101,110,100,98,97,107,101,114,115,104,111,112,115,61,32,
+34,34,59,112,104,112,34,62,99,116,105,111,110,49,51,112,120,59,98,114,105,97,110
+,104,101,108,108,111,115,105,122,101,61,111,61,37,50,70,32,106,111,105,110,109,
+97,121,98,101,60,105,109,103,32,105,109,103,34,62,44,32,102,106,115,105,109,103,
+34,32,34,41,91,48,93,77,84,111,112,66,84,121,112,101,34,110,101,119,108,121,68,
+97,110,115,107,99,122,101,99,104,116,114,97,105,108,107,110,111,119,115,60,47,
+104,53,62,102,97,113,34,62,122,104,45,99,110,49,48,41,59,10,45,49,34,41,59,116,
+121,112,101,61,98,108,117,101,115,116,114,117,108,121,100,97,118,105,115,46,106,
+115,39,59,62,13,10,60,33,115,116,101,101,108,32,121,111,117,32,104,50,62,13,10,
+102,111,114,109,32,106,101,115,117,115,49,48,48,37,32,109,101,110,117,46,13,10,9
+,13,10,119,97,108,101,115,114,105,115,107,115,117,109,101,110,116,100,100,105,
+110,103,98,45,108,105,107,116,101,97,99,104,103,105,102,34,32,118,101,103,97,115
+,100,97,110,115,107,101,101,115,116,105,115,104,113,105,112,115,117,111,109,105,
+115,111,98,114,101,100,101,115,100,101,101,110,116,114,101,116,111,100,111,115,
+112,117,101,100,101,97,195,177,111,115,101,115,116,195,161,116,105,101,110,101,
+104,97,115,116,97,111,116,114,111,115,112,97,114,116,101,100,111,110,100,101,110
+,117,101,118,111,104,97,99,101,114,102,111,114,109,97,109,105,115,109,111,109,
+101,106,111,114,109,117,110,100,111,97,113,117,195,173,100,195,173,97,115,115,
+195,179,108,111,97,121,117,100,97,102,101,99,104,97,116,111,100,97,115,116,97,
+110,116,111,109,101,110,111,115,100,97,116,111,115,111,116,114,97,115,115,105,
+116,105,111,109,117,99,104,111,97,104,111,114,97,108,117,103,97,114,109,97,121,
+111,114,101,115,116,111,115,104,111,114,97,115,116,101,110,101,114,97,110,116,
+101,115,102,111,116,111,115,101,115,116,97,115,112,97,195,173,115,110,117,101,
+118,97,115,97,108,117,100,102,111,114,111,115,109,101,100,105,111,113,117,105,
+101,110,109,101,115,101,115,112,111,100,101,114,99,104,105,108,101,115,101,114,
+195,161,118,101,99,101,115,100,101,99,105,114,106,111,115,195,169,101,115,116,97
+,114,118,101,110,116,97,103,114,117,112,111,104,101,99,104,111,101,108,108,111,
+115,116,101,110,103,111,97,109,105,103,111,99,111,115,97,115,110,105,118,101,108
+,103,101,110,116,101,109,105,115,109,97,97,105,114,101,115,106,117,108,105,111,
+116,101,109,97,115,104,97,99,105,97,102,97,118,111,114,106,117,110,105,111,108,
+105,98,114,101,112,117,110,116,111,98,117,101,110,111,97,117,116,111,114,97,98,
+114,105,108,98,117,101,110,97,116,101,120,116,111,109,97,114,122,111,115,97,98,
+101,114,108,105,115,116,97,108,117,101,103,111,99,195,179,109,111,101,110,101,
+114,111,106,117,101,103,111,112,101,114,195,186,104,97,98,101,114,101,115,116,
+111,121,110,117,110,99,97,109,117,106,101,114,118,97,108,111,114,102,117,101,114
+,97,108,105,98,114,111,103,117,115,116,97,105,103,117,97,108,118,111,116,111,115
+,99,97,115,111,115,103,117,195,173,97,112,117,101,100,111,115,111,109,111,115,97
+,118,105,115,111,117,115,116,101,100,100,101,98,101,110,110,111,99,104,101,98,
+117,115,99,97,102,97,108,116,97,101,117,114,111,115,115,101,114,105,101,100,105,
+99,104,111,99,117,114,115,111,99,108,97,118,101,99,97,115,97,115,108,101,195,179
+,110,112,108,97,122,111,108,97,114,103,111,111,98,114,97,115,118,105,115,116,97,
+97,112,111,121,111,106,117,110,116,111,116,114,97,116,97,118,105,115,116,111,99,
+114,101,97,114,99,97,109,112,111,104,101,109,111,115,99,105,110,99,111,99,97,114
+,103,111,112,105,115,111,115,111,114,100,101,110,104,97,99,101,110,195,161,114,
+101,97,100,105,115,99,111,112,101,100,114,111,99,101,114,99,97,112,117,101,100,
+97,112,97,112,101,108,109,101,110,111,114,195,186,116,105,108,99,108,97,114,111,
+106,111,114,103,101,99,97,108,108,101,112,111,110,101,114,116,97,114,100,101,110
+,97,100,105,101,109,97,114,99,97,115,105,103,117,101,101,108,108,97,115,115,105,
+103,108,111,99,111,99,104,101,109,111,116,111,115,109,97,100,114,101,99,108,97,
+115,101,114,101,115,116,111,110,105,195,177,111,113,117,101,100,97,112,97,115,97
+,114,98,97,110,99,111,104,105,106,111,115,118,105,97,106,101,112,97,98,108,111,
+195,169,115,116,101,118,105,101,110,101,114,101,105,110,111,100,101,106,97,114,
+102,111,110,100,111,99,97,110,97,108,110,111,114,116,101,108,101,116,114,97,99,
+97,117,115,97,116,111,109,97,114,109,97,110,111,115,108,117,110,101,115,97,117,
+116,111,115,118,105,108,108,97,118,101,110,100,111,112,101,115,97,114,116,105,
+112,111,115,116,101,110,103,97,109,97,114,99,111,108,108,101,118,97,112,97,100,
+114,101,117,110,105,100,111,118,97,109,111,115,122,111,110,97,115,97,109,98,111,
+115,98,97,110,100,97,109,97,114,105,97,97,98,117,115,111,109,117,99,104,97,115,
+117,98,105,114,114,105,111,106,97,118,105,118,105,114,103,114,97,100,111,99,104,
+105,99,97,97,108,108,195,173,106,111,118,101,110,100,105,99,104,97,101,115,116,
+97,110,116,97,108,101,115,115,97,108,105,114,115,117,101,108,111,112,101,115,111
+,115,102,105,110,101,115,108,108,97,109,97,98,117,115,99,111,195,169,115,116,97,
+108,108,101,103,97,110,101,103,114,111,112,108,97,122,97,104,117,109,111,114,112
+,97,103,97,114,106,117,110,116,97,100,111,98,108,101,105,115,108,97,115,98,111,
+108,115,97,98,97,195,177,111,104,97,98,108,97,108,117,99,104,97,195,129,114,101,
+97,100,105,99,101,110,106,117,103,97,114,110,111,116,97,115,118,97,108,108,101,
+97,108,108,195,161,99,97,114,103,97,100,111,108,111,114,97,98,97,106,111,101,115
+,116,195,169,103,117,115,116,111,109,101,110,116,101,109,97,114,105,111,102,105,
+114,109,97,99,111,115,116,111,102,105,99,104,97,112,108,97,116,97,104,111,103,97
+,114,97,114,116,101,115,108,101,121,101,115,97,113,117,101,108,109,117,115,101,
+111,98,97,115,101,115,112,111,99,111,115,109,105,116,97,100,99,105,101,108,111,
+99,104,105,99,111,109,105,101,100,111,103,97,110,97,114,115,97,110,116,111,101,
+116,97,112,97,100,101,98,101,115,112,108,97,121,97,114,101,100,101,115,115,105,
+101,116,101,99,111,114,116,101,99,111,114,101,97,100,117,100,97,115,100,101,115,
+101,111,118,105,101,106,111,100,101,115,101,97,97,103,117,97,115,38,113,117,111,
+116,59,100,111,109,97,105,110,99,111,109,109,111,110,115,116,97,116,117,115,101,
+118,101,110,116,115,109,97,115,116,101,114,115,121,115,116,101,109,97,99,116,105
+,111,110,98,97,110,110,101,114,114,101,109,111,118,101,115,99,114,111,108,108,
+117,112,100,97,116,101,103,108,111,98,97,108,109,101,100,105,117,109,102,105,108
+,116,101,114,110,117,109,98,101,114,99,104,97,110,103,101,114,101,115,117,108,
+116,112,117,98,108,105,99,115,99,114,101,101,110,99,104,111,111,115,101,110,111,
+114,109,97,108,116,114,97,118,101,108,105,115,115,117,101,115,115,111,117,114,99
+,101,116,97,114,103,101,116,115,112,114,105,110,103,109,111,100,117,108,101,109,
+111,98,105,108,101,115,119,105,116,99,104,112,104,111,116,111,115,98,111,114,100
+,101,114,114,101,103,105,111,110,105,116,115,101,108,102,115,111,99,105,97,108,
+97,99,116,105,118,101,99,111,108,117,109,110,114,101,99,111,114,100,102,111,108,
+108,111,119,116,105,116,108,101,62,101,105,116,104,101,114,108,101,110,103,116,
+104,102,97,109,105,108,121,102,114,105,101,110,100,108,97,121,111,117,116,97,117
+,116,104,111,114,99,114,101,97,116,101,114,101,118,105,101,119,115,117,109,109,
+101,114,115,101,114,118,101,114,112,108,97,121,101,100,112,108,97,121,101,114,
+101,120,112,97,110,100,112,111,108,105,99,121,102,111,114,109,97,116,100,111,117
+,98,108,101,112,111,105,110,116,115,115,101,114,105,101,115,112,101,114,115,111,
+110,108,105,118,105,110,103,100,101,115,105,103,110,109,111,110,116,104,115,102,
+111,114,99,101,115,117,110,105,113,117,101,119,101,105,103,104,116,112,101,111,
+112,108,101,101,110,101,114,103,121,110,97,116,117,114,101,115,101,97,114,99,104
+,102,105,103,117,114,101,104,97,118,105,110,103,99,117,115,116,111,109,111,102,
+102,115,101,116,108,101,116,116,101,114,119,105,110,100,111,119,115,117,98,109,
+105,116,114,101,110,100,101,114,103,114,111,117,112,115,117,112,108,111,97,100,
+104,101,97,108,116,104,109,101,116,104,111,100,118,105,100,101,111,115,115,99,
+104,111,111,108,102,117,116,117,114,101,115,104,97,100,111,119,100,101,98,97,116
+,101,118,97,108,117,101,115,79,98,106,101,99,116,111,116,104,101,114,115,114,105
+,103,104,116,115,108,101,97,103,117,101,99,104,114,111,109,101,115,105,109,112,
+108,101,110,111,116,105,99,101,115,104,97,114,101,100,101,110,100,105,110,103,
+115,101,97,115,111,110,114,101,112,111,114,116,111,110,108,105,110,101,115,113,
+117,97,114,101,98,117,116,116,111,110,105,109,97,103,101,115,101,110,97,98,108,
+101,109,111,118,105,110,103,108,97,116,101,115,116,119,105,110,116,101,114,70,
+114,97,110,99,101,112,101,114,105,111,100,115,116,114,111,110,103,114,101,112,
+101,97,116,76,111,110,100,111,110,100,101,116,97,105,108,102,111,114,109,101,100
+,100,101,109,97,110,100,115,101,99,117,114,101,112,97,115,115,101,100,116,111,
+103,103,108,101,112,108,97,99,101,115,100,101,118,105,99,101,115,116,97,116,105,
+99,99,105,116,105,101,115,115,116,114,101,97,109,121,101,108,108,111,119,97,116,
+116,97,99,107,115,116,114,101,101,116,102,108,105,103,104,116,104,105,100,100,
+101,110,105,110,102,111,34,62,111,112,101,110,101,100,117,115,101,102,117,108,
+118,97,108,108,101,121,99,97,117,115,101,115,108,101,97,100,101,114,115,101,99,
+114,101,116,115,101,99,111,110,100,100,97,109,97,103,101,115,112,111,114,116,115
+,101,120,99,101,112,116,114,97,116,105,110,103,115,105,103,110,101,100,116,104,
+105,110,103,115,101,102,102,101,99,116,102,105,101,108,100,115,115,116,97,116,
+101,115,111,102,102,105,99,101,118,105,115,117,97,108,101,100,105,116,111,114,
+118,111,108,117,109,101,82,101,112,111,114,116,109,117,115,101,117,109,109,111,
+118,105,101,115,112,97,114,101,110,116,97,99,99,101,115,115,109,111,115,116,108,
+121,109,111,116,104,101,114,34,32,105,100,61,34,109,97,114,107,101,116,103,114,
+111,117,110,100,99,104,97,110,99,101,115,117,114,118,101,121,98,101,102,111,114,
+101,115,121,109,98,111,108,109,111,109,101,110,116,115,112,101,101,99,104,109,
+111,116,105,111,110,105,110,115,105,100,101,109,97,116,116,101,114,67,101,110,
+116,101,114,111,98,106,101,99,116,101,120,105,115,116,115,109,105,100,100,108,
+101,69,117,114,111,112,101,103,114,111,119,116,104,108,101,103,97,99,121,109,97,
+110,110,101,114,101,110,111,117,103,104,99,97,114,101,101,114,97,110,115,119,101
+,114,111,114,105,103,105,110,112,111,114,116,97,108,99,108,105,101,110,116,115,
+101,108,101,99,116,114,97,110,100,111,109,99,108,111,115,101,100,116,111,112,105
+,99,115,99,111,109,105,110,103,102,97,116,104,101,114,111,112,116,105,111,110,
+115,105,109,112,108,121,114,97,105,115,101,100,101,115,99,97,112,101,99,104,111,
+115,101,110,99,104,117,114,99,104,100,101,102,105,110,101,114,101,97,115,111,110
+,99,111,114,110,101,114,111,117,116,112,117,116,109,101,109,111,114,121,105,102,
+114,97,109,101,112,111,108,105,99,101,109,111,100,101,108,115,78,117,109,98,101,
+114,100,117,114,105,110,103,111,102,102,101,114,115,115,116,121,108,101,115,107,
+105,108,108,101,100,108,105,115,116,101,100,99,97,108,108,101,100,115,105,108,
+118,101,114,109,97,114,103,105,110,100,101,108,101,116,101,98,101,116,116,101,
+114,98,114,111,119,115,101,108,105,109,105,116,115,71,108,111,98,97,108,115,105,
+110,103,108,101,119,105,100,103,101,116,99,101,110,116,101,114,98,117,100,103,
+101,116,110,111,119,114,97,112,99,114,101,100,105,116,99,108,97,105,109,115,101,
+110,103,105,110,101,115,97,102,101,116,121,99,104,111,105,99,101,115,112,105,114
+,105,116,45,115,116,121,108,101,115,112,114,101,97,100,109,97,107,105,110,103,
+110,101,101,100,101,100,114,117,115,115,105,97,112,108,101,97,115,101,101,120,
+116,101,110,116,83,99,114,105,112,116,98,114,111,107,101,110,97,108,108,111,119,
+115,99,104,97,114,103,101,100,105,118,105,100,101,102,97,99,116,111,114,109,101,
+109,98,101,114,45,98,97,115,101,100,116,104,101,111,114,121,99,111,110,102,105,
+103,97,114,111,117,110,100,119,111,114,107,101,100,104,101,108,112,101,100,67,
+104,117,114,99,104,105,109,112,97,99,116,115,104,111,117,108,100,97,108,119,97,
+121,115,108,111,103,111,34,32,98,111,116,116,111,109,108,105,115,116,34,62,41,
+123,118,97,114,32,112,114,101,102,105,120,111,114,97,110,103,101,72,101,97,100,
+101,114,46,112,117,115,104,40,99,111,117,112,108,101,103,97,114,100,101,110,98,
+114,105,100,103,101,108,97,117,110,99,104,82,101,118,105,101,119,116,97,107,105,
+110,103,118,105,115,105,111,110,108,105,116,116,108,101,100,97,116,105,110,103,
+66,117,116,116,111,110,98,101,97,117,116,121,116,104,101,109,101,115,102,111,114
+,103,111,116,83,101,97,114,99,104,97,110,99,104,111,114,97,108,109,111,115,116,
+108,111,97,100,101,100,67,104,97,110,103,101,114,101,116,117,114,110,115,116,114
+,105,110,103,114,101,108,111,97,100,77,111,98,105,108,101,105,110,99,111,109,101
+,115,117,112,112,108,121,83,111,117,114,99,101,111,114,100,101,114,115,118,105,
+101,119,101,100,38,110,98,115,112,59,99,111,117,114,115,101,65,98,111,117,116,32
+,105,115,108,97,110,100,60,104,116,109,108,32,99,111,111,107,105,101,110,97,109,
+101,61,34,97,109,97,122,111,110,109,111,100,101,114,110,97,100,118,105,99,101,
+105,110,60,47,97,62,58,32,84,104,101,32,100,105,97,108,111,103,104,111,117,115,
+101,115,66,69,71,73,78,32,77,101,120,105,99,111,115,116,97,114,116,115,99,101,
+110,116,114,101,104,101,105,103,104,116,97,100,100,105,110,103,73,115,108,97,110
+,100,97,115,115,101,116,115,69,109,112,105,114,101,83,99,104,111,111,108,101,102
+,102,111,114,116,100,105,114,101,99,116,110,101,97,114,108,121,109,97,110,117,97
+,108,83,101,108,101,99,116,46,10,10,79,110,101,106,111,105,110,101,100,109,101,
+110,117,34,62,80,104,105,108,105,112,97,119,97,114,100,115,104,97,110,100,108,
+101,105,109,112,111,114,116,79,102,102,105,99,101,114,101,103,97,114,100,115,107
+,105,108,108,115,110,97,116,105,111,110,83,112,111,114,116,115,100,101,103,114,
+101,101,119,101,101,107,108,121,32,40,101,46,103,46,98,101,104,105,110,100,100,
+111,99,116,111,114,108,111,103,103,101,100,117,110,105,116,101,100,60,47,98,62,
+60,47,98,101,103,105,110,115,112,108,97,110,116,115,97,115,115,105,115,116,97,
+114,116,105,115,116,105,115,115,117,101,100,51,48,48,112,120,124,99,97,110,97,
+100,97,97,103,101,110,99,121,115,99,104,101,109,101,114,101,109,97,105,110,66,
+114,97,122,105,108,115,97,109,112,108,101,108,111,103,111,34,62,98,101,121,111,
+110,100,45,115,99,97,108,101,97,99,99,101,112,116,115,101,114,118,101,100,109,97
+,114,105,110,101,70,111,111,116,101,114,99,97,109,101,114,97,60,47,104,49,62,10,
+95,102,111,114,109,34,108,101,97,118,101,115,115,116,114,101,115,115,34,32,47,62
+,13,10,46,103,105,102,34,32,111,110,108,111,97,100,108,111,97,100,101,114,79,120
+,102,111,114,100,115,105,115,116,101,114,115,117,114,118,105,118,108,105,115,116
+,101,110,102,101,109,97,108,101,68,101,115,105,103,110,115,105,122,101,61,34,97,
+112,112,101,97,108,116,101,120,116,34,62,108,101,118,101,108,115,116,104,97,110,
+107,115,104,105,103,104,101,114,102,111,114,99,101,100,97,110,105,109,97,108,97,
+110,121,111,110,101,65,102,114,105,99,97,97,103,114,101,101,100,114,101,99,101,
+110,116,80,101,111,112,108,101,60,98,114,32,47,62,119,111,110,100,101,114,112,
+114,105,99,101,115,116,117,114,110,101,100,124,124,32,123,125,59,109,97,105,110,
+34,62,105,110,108,105,110,101,115,117,110,100,97,121,119,114,97,112,34,62,102,97
+,105,108,101,100,99,101,110,115,117,115,109,105,110,117,116,101,98,101,97,99,111
+,110,113,117,111,116,101,115,49,53,48,112,120,124,101,115,116,97,116,101,114,101
+,109,111,116,101,101,109,97,105,108,34,108,105,110,107,101,100,114,105,103,104,
+116,59,115,105,103,110,97,108,102,111,114,109,97,108,49,46,104,116,109,108,115,
+105,103,110,117,112,112,114,105,110,99,101,102,108,111,97,116,58,46,112,110,103,
+34,32,102,111,114,117,109,46,65,99,99,101,115,115,112,97,112,101,114,115,115,111
+,117,110,100,115,101,120,116,101,110,100,72,101,105,103,104,116,115,108,105,100,
+101,114,85,84,70,45,56,34,38,97,109,112,59,32,66,101,102,111,114,101,46,32,87,
+105,116,104,115,116,117,100,105,111,111,119,110,101,114,115,109,97,110,97,103,
+101,112,114,111,102,105,116,106,81,117,101,114,121,97,110,110,117,97,108,112,97,
+114,97,109,115,98,111,117,103,104,116,102,97,109,111,117,115,103,111,111,103,108
+,101,108,111,110,103,101,114,105,43,43,41,32,123,105,115,114,97,101,108,115,97,
+121,105,110,103,100,101,99,105,100,101,104,111,109,101,34,62,104,101,97,100,101,
+114,101,110,115,117,114,101,98,114,97,110,99,104,112,105,101,99,101,115,98,108,
+111,99,107,59,115,116,97,116,101,100,116,111,112,34,62,60,114,97,99,105,110,103,
+114,101,115,105,122,101,45,45,38,103,116,59,112,97,99,105,116,121,115,101,120,
+117,97,108,98,117,114,101,97,117,46,106,112,103,34,32,49,48,44,48,48,48,111,98,
+116,97,105,110,116,105,116,108,101,115,97,109,111,117,110,116,44,32,73,110,99,46
+,99,111,109,101,100,121,109,101,110,117,34,32,108,121,114,105,99,115,116,111,100
+,97,121,46,105,110,100,101,101,100,99,111,117,110,116,121,95,108,111,103,111,46,
+70,97,109,105,108,121,108,111,111,107,101,100,77,97,114,107,101,116,108,115,101,
+32,105,102,80,108,97,121,101,114,116,117,114,107,101,121,41,59,118,97,114,32,102
+,111,114,101,115,116,103,105,118,105,110,103,101,114,114,111,114,115,68,111,109,
+97,105,110,125,101,108,115,101,123,105,110,115,101,114,116,66,108,111,103,60,47,
+102,111,111,116,101,114,108,111,103,105,110,46,102,97,115,116,101,114,97,103,101
+,110,116,115,60,98,111,100,121,32,49,48,112,120,32,48,112,114,97,103,109,97,102,
+114,105,100,97,121,106,117,110,105,111,114,100,111,108,108,97,114,112,108,97,99,
+101,100,99,111,118,101,114,115,112,108,117,103,105,110,53,44,48,48,48,32,112,97,
+103,101,34,62,98,111,115,116,111,110,46,116,101,115,116,40,97,118,97,116,97,114,
+116,101,115,116,101,100,95,99,111,117,110,116,102,111,114,117,109,115,115,99,104
+,101,109,97,105,110,100,101,120,44,102,105,108,108,101,100,115,104,97,114,101,
+115,114,101,97,100,101,114,97,108,101,114,116,40,97,112,112,101,97,114,83,117,98
+,109,105,116,108,105,110,101,34,62,98,111,100,121,34,62,10,42,32,84,104,101,84,
+104,111,117,103,104,115,101,101,105,110,103,106,101,114,115,101,121,78,101,119,
+115,60,47,118,101,114,105,102,121,101,120,112,101,114,116,105,110,106,117,114,
+121,119,105,100,116,104,61,67,111,111,107,105,101,83,84,65,82,84,32,97,99,114,
+111,115,115,95,105,109,97,103,101,116,104,114,101,97,100,110,97,116,105,118,101,
+112,111,99,107,101,116,98,111,120,34,62,10,83,121,115,116,101,109,32,68,97,118,
+105,100,99,97,110,99,101,114,116,97,98,108,101,115,112,114,111,118,101,100,65,
+112,114,105,108,32,114,101,97,108,108,121,100,114,105,118,101,114,105,116,101,
+109,34,62,109,111,114,101,34,62,98,111,97,114,100,115,99,111,108,111,114,115,99,
+97,109,112,117,115,102,105,114,115,116,32,124,124,32,91,93,59,109,101,100,105,97
+,46,103,117,105,116,97,114,102,105,110,105,115,104,119,105,100,116,104,58,115,
+104,111,119,101,100,79,116,104,101,114,32,46,112,104,112,34,32,97,115,115,117,
+109,101,108,97,121,101,114,115,119,105,108,115,111,110,115,116,111,114,101,115,
+114,101,108,105,101,102,115,119,101,100,101,110,67,117,115,116,111,109,101,97,
+115,105,108,121,32,121,111,117,114,32,83,116,114,105,110,103,10,10,87,104,105,
+108,116,97,121,108,111,114,99,108,101,97,114,58,114,101,115,111,114,116,102,114,
+101,110,99,104,116,104,111,117,103,104,34,41,32,43,32,34,60,98,111,100,121,62,98
+,117,121,105,110,103,98,114,97,110,100,115,77,101,109,98,101,114,110,97,109,101,
+34,62,111,112,112,105,110,103,115,101,99,116,111,114,53,112,120,59,34,62,118,115
+,112,97,99,101,112,111,115,116,101,114,109,97,106,111,114,32,99,111,102,102,101,
+101,109,97,114,116,105,110,109,97,116,117,114,101,104,97,112,112,101,110,60,47,
+110,97,118,62,107,97,110,115,97,115,108,105,110,107,34,62,73,109,97,103,101,115,
+61,102,97,108,115,101,119,104,105,108,101,32,104,115,112,97,99,101,48,38,97,109,
+112,59,32,10,10,73,110,32,32,112,111,119,101,114,80,111,108,115,107,105,45,99,
+111,108,111,114,106,111,114,100,97,110,66,111,116,116,111,109,83,116,97,114,116,
+32,45,99,111,117,110,116,50,46,104,116,109,108,110,101,119,115,34,62,48,49,46,
+106,112,103,79,110,108,105,110,101,45,114,105,103,104,116,109,105,108,108,101,
+114,115,101,110,105,111,114,73,83,66,78,32,48,48,44,48,48,48,32,103,117,105,100,
+101,115,118,97,108,117,101,41,101,99,116,105,111,110,114,101,112,97,105,114,46,
+120,109,108,34,32,32,114,105,103,104,116,115,46,104,116,109,108,45,98,108,111,99
+,107,114,101,103,69,120,112,58,104,111,118,101,114,119,105,116,104,105,110,118,
+105,114,103,105,110,112,104,111,110,101,115,60,47,116,114,62,13,117,115,105,110,
+103,32,10,9,118,97,114,32,62,39,41,59,10,9,60,47,116,100,62,10,60,47,116,114,62,
+10,98,97,104,97,115,97,98,114,97,115,105,108,103,97,108,101,103,111,109,97,103,
+121,97,114,112,111,108,115,107,105,115,114,112,115,107,105,216,177,216,175,217,
+136,228,184,173,230,150,135,231,174,128,228,189,147,231,185,129,233,171,148,228,
+191,161,230,129,175,228,184,173,229,155,189,230,136,145,228,187,172,228,184,128,
+228,184,170,229,133,172,229,143,184,231,174,161,231,144,134,232,174,186,229,157,
+155,229,143,175,228,187,165,230,156,141,229,138,161,230,151,182,233,151,180,228,
+184,170,228,186,186,228,186,167,229,147,129,232,135,170,229,183,177,228,188,129,
+228,184,154,230,159,165,231,156,139,229,183,165,228,189,156,232,129,148,231,179,
+187,230,178,161,230,156,137,231,189,145,231,171,153,230,137,128,230,156,137,232,
+175,132,232,174,186,228,184,173,229,191,131,230,150,135,231,171,160,231,148,168,
+230,136,183,233,166,150,233,161,181,228,189,156,232,128,133,230,138,128,230,156,
+175,233,151,174,233,162,152,231,155,184,229,133,179,228,184,139,232,189,189,230,
+144,156,231,180,162,228,189,191,231,148,168,232,189,175,228,187,182,229,156,168,
+231,186,191,228,184,187,233,162,152,232,181,132,230,150,153,232,167,134,233,162,
+145,229,155,158,229,164,141,230,179,168,229,134,140,231,189,145,231,187,156,230,
+148,182,232,151,143,229,134,133,229,174,185,230,142,168,232,141,144,229,184,130,
+229,156,186,230,182,136,230,129,175,231,169,186,233,151,180,229,143,145,229,184,
+131,228,187,128,228,185,136,229,165,189,229,143,139,231,148,159,230,180,187,229,
+155,190,231,137,135,229,143,145,229,177,149,229,166,130,230,158,156,230,137,139,
+230,156,186,230,150,176,233,151,187,230,156,128,230,150,176,230,150,185,229,188,
+143,229,140,151,228,186,172,230,143,144,228,190,155,229,133,179,228,186,142,230,
+155,180,229,164,154,232,191,153,228,184,170,231,179,187,231,187,159,231,159,165,
+233,129,147,230,184,184,230,136,143,229,185,191,229,145,138,229,133,182,228,187,
+150,229,143,145,232,161,168,229,174,137,229,133,168,231,172,172,228,184,128,228,
+188,154,229,145,152,232,191,155,232,161,140,231,130,185,229,135,187,231,137,136,
+230,157,131,231,148,181,229,173,144,228,184,150,231,149,140,232,174,190,232,174,
+161,229,133,141,232,180,185,230,149,153,232,130,178,229,138,160,229,133,165,230,
+180,187,229,138,168,228,187,150,228,187,172,229,149,134,229,147,129,229,141,154,
+229,174,162,231,142,176,229,156,168,228,184,138,230,181,183,229,166,130,228,189,
+149,229,183,178,231,187,143,231,149,153,232,168,128,232,175,166,231,187,134,231,
+164,190,229,140,186,231,153,187,229,189,149,230,156,172,231,171,153,233,156,128,
+232,166,129,228,187,183,230,160,188,230,148,175,230,140,129,229,155,189,233,153,
+133,233,147,190,230,142,165,229,155,189,229,174,182,229,187,186,232,174,190,230,
+156,139,229,143,139,233,152,133,232,175,187,230,179,149,229,190,139,228,189,141,
+231,189,174,231,187,143,230,181,142,233,128,137,230,139,169,232,191,153,230,160,
+183,229,189,147,229,137,141,229,136,134,231,177,187,230,142,146,232,161,140,229,
+155,160,228,184,186,228,186,164,230,152,147,230,156,128,229,144,142,233,159,179,
+228,185,144,228,184,141,232,131,189,233,128,154,232,191,135,232,161,140,228,184,
+154,231,167,145,230,138,128,229,143,175,232,131,189,232,174,190,229,164,135,229,
+144,136,228,189,156,229,164,167,229,174,182,231,164,190,228,188,154,231,160,148,
+231,169,182,228,184,147,228,184,154,229,133,168,233,131,168,233,161,185,231,155,
+174,232,191,153,233,135,140,232,191,152,230,152,175,229,188,128,229,167,139,230,
+131,133,229,134,181,231,148,181,232,132,145,230,150,135,228,187,182,229,147,129,
+231,137,140,229,184,174,229,138,169,230,150,135,229,140,150,232,181,132,230,186,
+144,229,164,167,229,173,166,229,173,166,228,185,160,229,156,176,229,157,128,230,
+181,143,232,167,136,230,138,149,232,181,132,229,183,165,231,168,139,232,166,129,
+230,177,130,230,128,142,228,185,136,230,151,182,229,128,153,229,138,159,232,131,
+189,228,184,187,232,166,129,231,155,174,229,137,141,232,181,132,232,174,175,229,
+159,142,229,184,130,230,150,185,230,179,149,231,148,181,229,189,177,230,139,155,
+232,129,152,229,163,176,230,152,142,228,187,187,228,189,149,229,129,165,229,186,
+183,230,149,176,230,141,174,231,190,142,229,155,189,230,177,189,232,189,166,228,
+187,139,231,187,141,228,189,134,230,152,175,228,186,164,230,181,129,231,148,159,
+228,186,167,230,137,128,228,187,165,231,148,181,232,175,157,230,152,190,231,164,
+186,228,184,128,228,186,155,229,141,149,228,189,141,228,186,186,229,145,152,229,
+136,134,230,158,144,229,156,176,229,155,190,230,151,133,230,184,184,229,183,165,
+229,133,183,229,173,166,231,148,159,231,179,187,229,136,151,231,189,145,229,143,
+139,229,184,150,229,173,144,229,175,134,231,160,129,233,162,145,233,129,147,230,
+142,167,229,136,182,229,156,176,229,140,186,229,159,186,230,156,172,229,133,168,
+229,155,189,231,189,145,228,184,138,233,135,141,232,166,129,231,172,172,228,186,
+140,229,150,156,230,172,162,232,191,155,229,133,165,229,143,139,230,131,133,232,
+191,153,228,186,155,232,128,131,232,175,149,229,143,145,231,142,176,229,159,185,
+232,174,173,228,187,165,228,184,138,230,148,191,229,186,156,230,136,144,228,184,
+186,231,142,175,229,162,131,233,166,153,230,184,175,229,144,140,230,151,182,229,
+168,177,228,185,144,229,143,145,233,128,129,228,184,128,229,174,154,229,188,128,
+229,143,145,228,189,156,229,147,129,230,160,135,229,135,134,230,172,162,232,191,
+142,232,167,163,229,134,179,229,156,176,230,150,185,228,184,128,228,184,139,228,
+187,165,229,143,138,232,180,163,228,187,187,230,136,150,232,128,133,229,174,162,
+230,136,183,228,187,163,232,161,168,231,167,175,229,136,134,229,165,179,228,186,
+186,230,149,176,231,160,129,233,148,128,229,148,174,229,135,186,231,142,176,231,
+166,187,231,186,191,229,186,148,231,148,168,229,136,151,232,161,168,228,184,141,
+229,144,140,231,188,150,232,190,145,231,187,159,232,174,161,230,159,165,232,175,
+162,228,184,141,232,166,129,230,156,137,229,133,179,230,156,186,230,158,132,229,
+190,136,229,164,154,230,146,173,230,148,190,231,187,132,231,187,135,230,148,191,
+231,173,150,231,155,180,230,142,165,232,131,189,229,138,155,230,157,165,230,186,
+144,230,153,130,233,150,147,231,156,139,229,136,176,231,131,173,233,151,168,229,
+133,179,233,148,174,228,184,147,229,140,186,233,157,158,229,184,184,232,139,177,
+232,175,173,231,153,190,229,186,166,229,184,140,230,156,155,231,190,142,229,165,
+179,230,175,148,232,190,131,231,159,165,232,175,134,232,167,132,229,174,154,229,
+187,186,232,174,174,233,131,168,233,151,168,230,132,143,232,167,129,231,178,190,
+229,189,169,230,151,165,230,156,172,230,143,144,233,171,152,229,143,145,232,168,
+128,230,150,185,233,157,162,229,159,186,233,135,145,229,164,132,231,144,134,230,
+157,131,233,153,144,229,189,177,231,137,135,233,147,182,232,161,140,232,191,152,
+230,156,137,229,136,134,228,186,171,231,137,169,229,147,129,231,187,143,232,144,
+165,230,183,187,229,138,160,228,184,147,229,174,182,232,191,153,231,167,141,232,
+175,157,233,162,152,232,181,183,230,157,165,228,184,154,229,138,161,229,133,172,
+229,145,138,232,174,176,229,189,149,231,174,128,228,187,139,232,180,168,233,135,
+143,231,148,183,228,186,186,229,189,177,229,147,141,229,188,149,231,148,168,230,
+138,165,229,145,138,233,131,168,229,136,134,229,191,171,233,128,159,229,146,168,
+232,175,162,230,151,182,229,176,154,230,179,168,230,132,143,231,148,179,232,175,
+183,229,173,166,230,160,161,229,186,148,232,175,165,229,142,134,229,143,178,229,
+143,170,230,152,175,232,191,148,229,155,158,232,180,173,228,185,176,229,144,141,
+231,167,176,228,184,186,228,186,134,230,136,144,229,138,159,232,175,180,230,152,
+142,228,190,155,229,186,148,229,173,169,229,173,144,228,184,147,233,162,152,231,
+168,139,229,186,143,228,184,128,232,136,172,230,156,131,229,147,161,229,143,170,
+230,156,137,229,133,182,229,174,131,228,191,157,230,138,164,232,128,140,228,184,
+148,228,187,138,229,164,169,231,170,151,229,143,163,229,138,168,230,128,129,231,
+138,182,230,128,129,231,137,185,229,136,171,232,174,164,228,184,186,229,191,133,
+233,161,187,230,155,180,230,150,176,229,176,143,232,175,180,230,136,145,229,128,
+145,228,189,156,228,184,186,229,170,146,228,189,147,229,140,133,230,139,172,233,
+130,163,228,185,136,228,184,128,230,160,183,229,155,189,229,134,133,230,152,175,
+229,144,166,230,160,185,230,141,174,231,148,181,232,167,134,229,173,166,233,153,
+162,229,133,183,230,156,137,232,191,135,231,168,139,231,148,177,228,186,142,228,
+186,186,230,137,141,229,135,186,230,157,165,228,184,141,232,191,135,230,173,163,
+229,156,168,230,152,142,230,152,159,230,149,133,228,186,139,229,133,179,231,179,
+187,230,160,135,233,162,152,229,149,134,229,138,161,232,190,147,229,133,165,228,
+184,128,231,155,180,229,159,186,231,161,128,230,149,153,229,173,166,228,186,134,
+232,167,163,229,187,186,231,173,145,231,187,147,230,158,156,229,133,168,231,144,
+131,233,128,154,231,159,165,232,174,161,229,136,146,229,175,185,228,186,142,232,
+137,186,230,156,175,231,155,184,229,134,140,229,143,145,231,148,159,231,156,159,
+231,154,132,229,187,186,231,171,139,231,173,137,231,186,167,231,177,187,229,158,
+139,231,187,143,233,170,140,229,174,158,231,142,176,229,136,182,228,189,156,230,
+157,165,232,135,170,230,160,135,231,173,190,228,187,165,228,184,139,229,142,159,
+229,136,155,230,151,160,230,179,149,229,133,182,228,184,173,229,128,139,228,186,
+186,228,184,128,229,136,135,230,140,135,229,141,151,229,133,179,233,151,173,233,
+155,134,229,155,162,231,172,172,228,184,137,229,133,179,230,179,168,229,155,160,
+230,173,164,231,133,167,231,137,135,230,183,177,229,156,179,229,149,134,228,184,
+154,229,185,191,229,183,158,230,151,165,230,156,159,233,171,152,231,186,167,230,
+156,128,232,191,145,231,187,188,229,144,136,232,161,168,231,164,186,228,184,147,
+232,190,145,232,161,140,228,184,186,228,186,164,233,128,154,232,175,132,228,187,
+183,232,167,137,229,190,151,231,178,190,229,141,142,229,174,182,229,186,173,229,
+174,140,230,136,144,230,132,159,232,167,137,229,174,137,232,163,133,229,190,151,
+229,136,176,233,130,174,228,187,182,229,136,182,229,186,166,233,163,159,229,147,
+129,232,153,189,231,132,182,232,189,172,232,189,189,230,138,165,228,187,183,232,
+174,176,232,128,133,230,150,185,230,161,136,232,161,140,230,148,191,228,186,186,
+230,176,145,231,148,168,229,147,129,228,184,156,232,165,191,230,143,144,229,135,
+186,233,133,146,229,186,151,231,132,182,229,144,142,228,187,152,230,172,190,231,
+131,173,231,130,185,228,187,165,229,137,141,229,174,140,229,133,168,229,143,145,
+229,184,150,232,174,190,231,189,174,233,162,134,229,175,188,229,183,165,228,184,
+154,229,140,187,233,153,162,231,156,139,231,156,139,231,187,143,229,133,184,229,
+142,159,229,155,160,229,185,179,229,143,176,229,144,132,231,167,141,229,162,158,
+229,138,160,230,157,144,230,150,153,230,150,176,229,162,158,228,185,139,229,144,
+142,232,129,140,228,184,154,230,149,136,230,158,156,228,187,138,229,185,180,232,
+174,186,230,150,135,230,136,145,229,155,189,229,145,138,232,175,137,231,137,136,
+228,184,187,228,191,174,230,148,185,229,143,130,228,184,142,230,137,147,229,141,
+176,229,191,171,228,185,144,230,156,186,230,162,176,232,167,130,231,130,185,229,
+173,152,229,156,168,231,178,190,231,165,158,232,142,183,229,190,151,229,136,169,
+231,148,168,231,187,167,231,187,173,228,189,160,228,187,172,232,191,153,228,185,
+136,230,168,161,229,188,143,232,175,173,232,168,128,232,131,189,229,164,159,233,
+155,133,232,153,142,230,147,141,228,189,156,233,163,142,230,160,188,228,184,128,
+232,181,183,231,167,145,229,173,166,228,189,147,232,130,178,231,159,173,228,191,
+161,230,157,161,228,187,182,230,178,187,231,150,151,232,191,144,229,138,168,228,
+186,167,228,184,154,228,188,154,232,174,174,229,175,188,232,136,170,229,133,136,
+231,148,159,232,129,148,231,155,159,229,143,175,230,152,175,229,149,143,233,161,
+140,231,187,147,230,158,132,228,189,156,231,148,168,232,176,131,230,159,165,232,
+179,135,230,150,153,232,135,170,229,138,168,232,180,159,232,180,163,229,134,156,
+228,184,154,232,174,191,233,151,174,229,174,158,230,150,189,230,142,165,229,143,
+151,232,174,168,232,174,186,233,130,163,228,184,170,229,143,141,233,166,136,229,
+138,160,229,188,186,229,165,179,230,128,167,232,140,131,229,155,180,230,156,141,
+229,139,153,228,188,145,233,151,178,228,187,138,230,151,165,229,174,162,230,156,
+141,232,167,128,231,156,139,229,143,130,229,138,160,231,154,132,232,175,157,228,
+184,128,231,130,185,228,191,157,232,175,129,229,155,190,228,185,166,230,156,137,
+230,149,136,230,181,139,232,175,149,231,167,187,229,138,168,230,137,141,232,131,
+189,229,134,179,229,174,154,232,130,161,231,165,168,228,184,141,230,150,173,233,
+156,128,230,177,130,228,184,141,229,190,151,229,138,158,230,179,149,228,185,139,
+233,151,180,233,135,135,231,148,168,232,144,165,233,148,128,230,138,149,232,175,
+137,231,155,174,230,160,135,231,136,177,230,131,133,230,145,132,229,189,177,230,
+156,137,228,186,155,232,164,135,232,163,189,230,150,135,229,173,166,230,156,186,
+228,188,154,230,149,176,229,173,151,232,163,133,228,191,174,232,180,173,231,137,
+169,229,134,156,230,157,145,229,133,168,233,157,162,231,178,190,229,147,129,229,
+133,182,229,174,158,228,186,139,230,131,133,230,176,180,229,185,179,230,143,144,
+231,164,186,228,184,138,229,184,130,232,176,162,232,176,162,230,153,174,233,128,
+154,230,149,153,229,184,136,228,184,138,228,188,160,231,177,187,229,136,171,230,
+173,140,230,155,178,230,139,165,230,156,137,229,136,155,230,150,176,233,133,141,
+228,187,182,229,143,170,232,166,129,230,151,182,228,187,163,232,179,135,232,168,
+138,232,190,190,229,136,176,228,186,186,231,148,159,232,174,162,233,152,133,232,
+128,129,229,184,136,229,177,149,231,164,186,229,191,131,231,144,134,232,180,180,
+229,173,144,231,182,178,231,171,153,228,184,187,233,161,140,232,135,170,231,132,
+182,231,186,167,229,136,171,231,174,128,229,141,149,230,148,185,233,157,169,233,
+130,163,228,186,155,230,157,165,232,175,180,230,137,147,229,188,128,228,187,163,
+231,160,129,229,136,160,233,153,164,232,175,129,229,136,184,232,138,130,231,155,
+174,233,135,141,231,130,185,230,172,161,230,149,184,229,164,154,229,176,145,232,
+167,132,229,136,146,232,181,132,233,135,145,230,137,190,229,136,176,228,187,165,
+229,144,142,229,164,167,229,133,168,228,184,187,233,161,181,230,156,128,228,189,
+179,229,155,158,231,173,148,229,164,169,228,184,139,228,191,157,233,154,156,231,
+142,176,228,187,163,230,163,128,230,159,165,230,138,149,231,165,168,229,176,143,
+230,151,182,230,178,146,230,156,137,230,173,163,229,184,184,231,148,154,232,135,
+179,228,187,163,231,144,134,231,155,174,229,189,149,229,133,172,229,188,128,229,
+164,141,229,136,182,233,135,145,232,158,141,229,185,184,231,166,143,231,137,136,
+230,156,172,229,189,162,230,136,144,229,135,134,229,164,135,232,161,140,230,131,
+133,229,155,158,229,136,176,230,128,157,230,131,179,230,128,142,230,160,183,229,
+141,143,232,174,174,232,174,164,232,175,129,230,156,128,229,165,189,228,186,167,
+231,148,159,230,140,137,231,133,167,230,156,141,232,163,133,229,185,191,228,184,
+156,229,138,168,230,188,171,233,135,135,232,180,173,230,150,176,230,137,139,231,
+187,132,229,155,190,233,157,162,230,157,191,229,143,130,232,128,131,230,148,191,
+230,178,187,229,174,185,230,152,147,229,164,169,229,156,176,229,138,170,229,138,
+155,228,186,186,228,187,172,229,141,135,231,186,167,233,128,159,229,186,166,228,
+186,186,231,137,169,232,176,131,230,149,180,230,181,129,232,161,140,233,128,160,
+230,136,144,230,150,135,229,173,151,233,159,169,229,155,189,232,180,184,230,152,
+147,229,188,128,229,177,149,231,155,184,233,151,156,232,161,168,231,142,176,229,
+189,177,232,167,134,229,166,130,230,173,164,231,190,142,229,174,185,229,164,167,
+229,176,143,230,138,165,233,129,147,230,157,161,230,172,190,229,191,131,230,131,
+133,232,174,184,229,164,154,230,179,149,232,167,132,229,174,182,229,177,133,228,
+185,166,229,186,151,232,191,158,230,142,165,231,171,139,229,141,179,228,184,190,
+230,138,165,230,138,128,229,183,167,229,165,165,232,191,144,231,153,187,229,133,
+165,228,187,165,230,157,165,231,144,134,232,174,186,228,186,139,228,187,182,232,
+135,170,231,148,177,228,184,173,229,141,142,229,138,158,229,133,172,229,166,136,
+229,166,136,231,156,159,230,173,163,228,184,141,233,148,153,229,133,168,230,150,
+135,229,144,136,229,144,140,228,187,183,229,128,188,229,136,171,228,186,186,231,
+155,145,231,157,163,229,133,183,228,189,147,228,184,150,231,186,170,229,155,162,
+233,152,159,229,136,155,228,184,154,230,137,191,230,139,133,229,162,158,233,149,
+191,230,156,137,228,186,186,228,191,157,230,140,129,229,149,134,229,174,182,231,
+187,180,228,191,174,229,143,176,230,185,190,229,183,166,229,143,179,232,130,161,
+228,187,189,231,173,148,230,161,136,229,174,158,233,153,133,231,148,181,228,191,
+161,231,187,143,231,144,134,231,148,159,229,145,189,229,174,163,228,188,160,228,
+187,187,229,138,161,230,173,163,229,188,143,231,137,185,232,137,178,228,184,139,
+230,157,165,229,141,143,228,188,154,229,143,170,232,131,189,229,189,147,231,132,
+182,233,135,141,230,150,176,229,133,167,229,174,185,230,140,135,229,175,188,232,
+191,144,232,161,140,230,151,165,229,191,151,232,179,163,229,174,182,232,182,133,
+232,191,135,229,156,159,229,156,176,230,181,153,230,177,159,230,148,175,228,187,
+152,230,142,168,229,135,186,231,171,153,233,149,191,230,157,173,229,183,158,230,
+137,167,232,161,140,229,136,182,233,128,160,228,185,139,228,184,128,230,142,168,
+229,185,191,231,142,176,229,156,186,230,143,143,232,191,176,229,143,152,229,140,
+150,228,188,160,231,187,159,230,173,140,230,137,139,228,191,157,233,153,169,232,
+175,190,231,168,139,229,140,187,231,150,151,231,187,143,232,191,135,232,191,135,
+229,142,187,228,185,139,229,137,141,230,148,182,229,133,165,229,185,180,229,186,
+166,230,157,130,229,191,151,231,190,142,228,184,189,230,156,128,233,171,152,231,
+153,187,233,153,134,230,156,170,230,157,165,229,138,160,229,183,165,229,133,141,
+232,180,163,230,149,153,231,168,139,231,137,136,229,157,151,232,186,171,228,189,
+147,233,135,141,229,186,134,229,135,186,229,148,174,230,136,144,230,156,172,229,
+189,162,229,188,143,229,156,159,232,177,134,229,135,186,229,131,185,228,184,156,
+230,150,185,233,130,174,231,174,177,229,141,151,228,186,172,230,177,130,232,129,
+140,229,143,150,229,190,151,232,129,140,228,189,141,231,155,184,228,191,161,233,
+161,181,233,157,162,229,136,134,233,146,159,231,189,145,233,161,181,231,161,174,
+229,174,154,229,155,190,228,190,139,231,189,145,229,157,128,231,167,175,230,158,
+129,233,148,153,232,175,175,231,155,174,231,154,132,229,174,157,232,180,157,230,
+156,186,229,133,179,233,163,142,233,153,169,230,142,136,230,157,131,231,151,133,
+230,175,146,229,174,160,231,137,169,233,153,164,228,186,134,232,169,149,232,171,
+150,231,150,190,231,151,133,229,143,138,230,151,182,230,177,130,232,180,173,231,
+171,153,231,130,185,229,132,191,231,171,165,230,175,143,229,164,169,228,184,173,
+229,164,174,232,174,164,232,175,134,230,175,143,228,184,170,229,164,169,230,180,
+165,229,173,151,228,189,147,229,143,176,231,129,163,231,187,180,230,138,164,230,
+156,172,233,161,181,228,184,170,230,128,167,229,174,152,230,150,185,229,184,184,
+232,167,129,231,155,184,230,156,186,230,136,152,231,149,165,229,186,148,229,189,
+147,229,190,139,229,184,136,230,150,185,228,190,191,230,160,161,229,155,173,232,
+130,161,229,184,130,230,136,191,229,177,139,230,160,143,231,155,174,229,145,152,
+229,183,165,229,175,188,232,135,180,231,170,129,231,132,182,233,129,147,229,133,
+183,230,156,172,231,189,145,231,187,147,229,144,136,230,161,163,230,161,136,229,
+138,179,229,138,168,229,143,166,229,164,150,231,190,142,229,133,131,229,188,149,
+232,181,183,230,148,185,229,143,152,231,172,172,229,155,155,228,188,154,232,174,
+161,232,170,170,230,152,142,233,154,144,231,167,129,229,174,157,229,174,157,232,
+167,132,232,140,131,230,182,136,232,180,185,229,133,177,229,144,140,229,191,152,
+232,174,176,228,189,147,231,179,187,229,184,166,230,157,165,229,144,141,229,173,
+151,231,153,188,232,161,168,229,188,128,230,148,190,229,138,160,231,155,159,229,
+143,151,229,136,176,228,186,140,230,137,139,229,164,167,233,135,143,230,136,144,
+228,186,186,230,149,176,233,135,143,229,133,177,228,186,171,229,140,186,229,159,
+159,229,165,179,229,173,169,229,142,159,229,136,153,230,137,128,229,156,168,231,
+187,147,230,157,159,233,128,154,228,191,161,232,182,133,231,186,167,233,133,141,
+231,189,174,229,189,147,230,151,182,228,188,152,231,167,128,230,128,167,230,132,
+159,230,136,191,228,186,167,233,129,138,230,136,178,229,135,186,229,143,163,230,
+143,144,228,186,164,229,176,177,228,184,154,228,191,157,229,129,165,231,168,139,
+229,186,166,229,143,130,230,149,176,228,186,139,228,184,154,230,149,180,228,184,
+170,229,177,177,228,184,156,230,131,133,230,132,159,231,137,185,230,174,138,229,
+136,134,233,161,158,230,144,156,229,176,139,229,177,158,228,186,142,233,151,168,
+230,136,183,232,180,162,229,138,161,229,163,176,233,159,179,229,143,138,229,133,
+182,232,180,162,231,187,143,229,157,154,230,140,129,229,185,178,233,131,168,230,
+136,144,231,171,139,229,136,169,231,155,138,232,128,131,232,153,145,230,136,144,
+233,131,189,229,140,133,232,163,133,231,148,168,230,136,182,230,175,148,232,181,
+155,230,150,135,230,152,142,230,139,155,229,149,134,229,174,140,230,149,180,231,
+156,159,230,152,175,231,156,188,231,157,155,228,188,153,228,188,180,229,168,129,
+230,156,155,233,162,134,229,159,159,229,141,171,231,148,159,228,188,152,230,131,
+160,232,171,150,229,163,135,229,133,172,229,133,177,232,137,175,229,165,189,229,
+133,133,229,136,134,231,172,166,229,144,136,233,153,132,228,187,182,231,137,185,
+231,130,185,228,184,141,229,143,175,232,139,177,230,150,135,232,181,132,228,186,
+167,230,160,185,230,156,172,230,152,142,230,152,190,229,175,134,231,162,188,229,
+133,172,228,188,151,230,176,145,230,151,143,230,155,180,229,138,160,228,186,171,
+229,143,151,229,144,140,229,173,166,229,144,175,229,138,168,233,128,130,229,144,
+136,229,142,159,230,157,165,233,151,174,231,173,148,230,156,172,230,150,135,231,
+190,142,233,163,159,231,187,191,232,137,178,231,168,179,229,174,154,231,187,136,
+228,186,142,231,148,159,231,137,169,228,190,155,230,177,130,230,144,156,231,139,
+144,229,138,155,233,135,143,228,184,165,233,135,141,230,176,184,232,191,156,229,
+134,153,231,156,159,230,156,137,233,153,144,231,171,158,228,186,137,229,175,185,
+232,177,161,232,180,185,231,148,168,228,184,141,229,165,189,231,187,157,229,175,
+185,229,141,129,229,136,134,228,191,131,232,191,155,231,130,185,232,175,132,229,
+189,177,233,159,179,228,188,152,229,138,191,228,184,141,229,176,145,230,172,163,
+232,181,143,229,185,182,228,184,148,230,156,137,231,130,185,230,150,185,229,144,
+145,229,133,168,230,150,176,228,191,161,231,148,168,232,174,190,230,150,189,229,
+189,162,232,177,161,232,181,132,230,160,188,231,170,129,231,160,180,233,154,143,
+231,157,128,233,135,141,229,164,167,228,186,142,230,152,175,230,175,149,228,184,
+154,230,153,186,232,131,189,229,140,150,229,183,165,229,174,140,231,190,142,229,
+149,134,229,159,142,231,187,159,228,184,128,229,135,186,231,137,136,230,137,147,
+233,128,160,231,148,162,229,147,129,230,166,130,229,134,181,231,148,168,228,186,
+142,228,191,157,231,149,153,229,155,160,231,180,160,228,184,173,229,156,139,229,
+173,152,229,130,168,232,180,180,229,155,190,230,156,128,230,132,155,233,149,191,
+230,156,159,229,143,163,228,187,183,231,144,134,232,180,162,229,159,186,229,156,
+176,229,174,137,230,142,146,230,173,166,230,177,137,233,135,140,233,157,162,229,
+136,155,229,187,186,229,164,169,231,169,186,233,166,150,229,133,136,229,174,140,
+229,150,132,233,169,177,229,138,168,228,184,139,233,157,162,228,184,141,229,134,
+141,232,175,154,228,191,161,230,132,143,228,185,137,233,152,179,229,133,137,232,
+139,177,229,155,189,230,188,130,228,186,174,229,134,155,228,186,139,231,142,169,
+229,174,182,231,190,164,228,188,151,229,134,156,230,176,145,229,141,179,229,143,
+175,229,144,141,231,168,177,229,174,182,229,133,183,229,138,168,231,148,187,230,
+131,179,229,136,176,230,179,168,230,152,142,229,176,143,229,173,166,230,128,167,
+232,131,189,232,128,131,231,160,148,231,161,172,228,187,182,232,167,130,231,156,
+139,230,184,133,230,165,154,230,144,158,231,172,145,233,166,150,233,160,129,233,
+187,132,233,135,145,233,128,130,231,148,168,230,177,159,232,139,143,231,156,159,
+229,174,158,228,184,187,231,174,161,233,152,182,230,174,181,232,168,187,229,134,
+138,231,191,187,232,175,145,230,157,131,229,136,169,229,129,154,229,165,189,228,
+188,188,228,185,142,233,128,154,232,174,175,230,150,189,229,183,165,231,139,128,
+230,133,139,228,185,159,232,174,184,231,142,175,228,191,157,229,159,185,229,133,
+187,230,166,130,229,191,181,229,164,167,229,158,139,230,156,186,231,165,168,231,
+144,134,232,167,163,229,140,191,229,144,141,99,117,97,110,100,111,101,110,118,
+105,97,114,109,97,100,114,105,100,98,117,115,99,97,114,105,110,105,99,105,111,
+116,105,101,109,112,111,112,111,114,113,117,101,99,117,101,110,116,97,101,115,
+116,97,100,111,112,117,101,100,101,110,106,117,101,103,111,115,99,111,110,116,
+114,97,101,115,116,195,161,110,110,111,109,98,114,101,116,105,101,110,101,110,
+112,101,114,102,105,108,109,97,110,101,114,97,97,109,105,103,111,115,99,105,117,
+100,97,100,99,101,110,116,114,111,97,117,110,113,117,101,112,117,101,100,101,115
+,100,101,110,116,114,111,112,114,105,109,101,114,112,114,101,99,105,111,115,101,
+103,195,186,110,98,117,101,110,111,115,118,111,108,118,101,114,112,117,110,116,
+111,115,115,101,109,97,110,97,104,97,98,195,173,97,97,103,111,115,116,111,110,
+117,101,118,111,115,117,110,105,100,111,115,99,97,114,108,111,115,101,113,117,
+105,112,111,110,105,195,177,111,115,109,117,99,104,111,115,97,108,103,117,110,97
+,99,111,114,114,101,111,105,109,97,103,101,110,112,97,114,116,105,114,97,114,114
+,105,98,97,109,97,114,195,173,97,104,111,109,98,114,101,101,109,112,108,101,111,
+118,101,114,100,97,100,99,97,109,98,105,111,109,117,99,104,97,115,102,117,101,
+114,111,110,112,97,115,97,100,111,108,195,173,110,101,97,112,97,114,101,99,101,
+110,117,101,118,97,115,99,117,114,115,111,115,101,115,116,97,98,97,113,117,105,
+101,114,111,108,105,98,114,111,115,99,117,97,110,116,111,97,99,99,101,115,111,
+109,105,103,117,101,108,118,97,114,105,111,115,99,117,97,116,114,111,116,105,101
+,110,101,115,103,114,117,112,111,115,115,101,114,195,161,110,101,117,114,111,112
+,97,109,101,100,105,111,115,102,114,101,110,116,101,97,99,101,114,99,97,100,101,
+109,195,161,115,111,102,101,114,116,97,99,111,99,104,101,115,109,111,100,101,108
+,111,105,116,97,108,105,97,108,101,116,114,97,115,97,108,103,195,186,110,99,111,
+109,112,114,97,99,117,97,108,101,115,101,120,105,115,116,101,99,117,101,114,112,
+111,115,105,101,110,100,111,112,114,101,110,115,97,108,108,101,103,97,114,118,
+105,97,106,101,115,100,105,110,101,114,111,109,117,114,99,105,97,112,111,100,114
+,195,161,112,117,101,115,116,111,100,105,97,114,105,111,112,117,101,98,108,111,
+113,117,105,101,114,101,109,97,110,117,101,108,112,114,111,112,105,111,99,114,
+105,115,105,115,99,105,101,114,116,111,115,101,103,117,114,111,109,117,101,114,
+116,101,102,117,101,110,116,101,99,101,114,114,97,114,103,114,97,110,100,101,101
+,102,101,99,116,111,112,97,114,116,101,115,109,101,100,105,100,97,112,114,111,
+112,105,97,111,102,114,101,99,101,116,105,101,114,114,97,101,45,109,97,105,108,
+118,97,114,105,97,115,102,111,114,109,97,115,102,117,116,117,114,111,111,98,106,
+101,116,111,115,101,103,117,105,114,114,105,101,115,103,111,110,111,114,109,97,
+115,109,105,115,109,111,115,195,186,110,105,99,111,99,97,109,105,110,111,115,105
+,116,105,111,115,114,97,122,195,179,110,100,101,98,105,100,111,112,114,117,101,
+98,97,116,111,108,101,100,111,116,101,110,195,173,97,106,101,115,195,186,115,101
+,115,112,101,114,111,99,111,99,105,110,97,111,114,105,103,101,110,116,105,101,
+110,100,97,99,105,101,110,116,111,99,195,161,100,105,122,104,97,98,108,97,114,
+115,101,114,195,173,97,108,97,116,105,110,97,102,117,101,114,122,97,101,115,116,
+105,108,111,103,117,101,114,114,97,101,110,116,114,97,114,195,169,120,105,116,
+111,108,195,179,112,101,122,97,103,101,110,100,97,118,195,173,100,101,111,101,
+118,105,116,97,114,112,97,103,105,110,97,109,101,116,114,111,115,106,97,118,105,
+101,114,112,97,100,114,101,115,102,195,161,99,105,108,99,97,98,101,122,97,195,
+161,114,101,97,115,115,97,108,105,100,97,101,110,118,195,173,111,106,97,112,195,
+179,110,97,98,117,115,111,115,98,105,101,110,101,115,116,101,120,116,111,115,108
+,108,101,118,97,114,112,117,101,100,97,110,102,117,101,114,116,101,99,111,109,
+195,186,110,99,108,97,115,101,115,104,117,109,97,110,111,116,101,110,105,100,111
+,98,105,108,98,97,111,117,110,105,100,97,100,101,115,116,195,161,115,101,100,105
+,116,97,114,99,114,101,97,100,111,208,180,208,187,209,143,209,135,209,130,208,
+190,208,186,208,176,208,186,208,184,208,187,208,184,209,141,209,130,208,190,208,
+178,209,129,208,181,208,181,208,179,208,190,208,191,209,128,208,184,209,130,208,
+176,208,186,208,181,209,137,208,181,209,131,208,182,208,181,208,154,208,176,208,
+186,208,177,208,181,208,183,208,177,209,139,208,187,208,190,208,189,208,184,208,
+146,209,129,208,181,208,191,208,190,208,180,208,173,209,130,208,190,209,130,208,
+190,208,188,209,135,208,181,208,188,208,189,208,181,209,130,208,187,208,181,209,
+130,209,128,208,176,208,183,208,190,208,189,208,176,208,179,208,180,208,181,208,
+188,208,189,208,181,208,148,208,187,209,143,208,159,209,128,208,184,208,189,208,
+176,209,129,208,189,208,184,209,133,209,130,208,181,208,188,208,186,209,130,208,
+190,208,179,208,190,208,180,208,178,208,190,209,130,209,130,208,176,208,188,208,
+161,208,168,208,144,208,188,208,176,209,143,208,167,209,130,208,190,208,178,208,
+176,209,129,208,178,208,176,208,188,208,181,208,188,209,131,208,162,208,176,208,
+186,208,180,208,178,208,176,208,189,208,176,208,188,209,141,209,130,208,184,209,
+141,209,130,209,131,208,146,208,176,208,188,209,130,208,181,209,133,208,191,209,
+128,208,190,209,130,209,131,209,130,208,189,208,176,208,180,208,180,208,189,209,
+143,208,146,208,190,209,130,209,130,209,128,208,184,208,189,208,181,208,185,208,
+146,208,176,209,129,208,189,208,184,208,188,209,129,208,176,208,188,209,130,208,
+190,209,130,209,128,209,131,208,177,208,158,208,189,208,184,208,188,208,184,209,
+128,208,189,208,181,208,181,208,158,208,158,208,158,208,187,208,184,209,134,209,
+141,209,130,208,176,208,158,208,189,208,176,208,189,208,181,208,188,208,180,208,
+190,208,188,208,188,208,190,208,185,208,180,208,178,208,181,208,190,208,189,208,
+190,209,129,209,131,208,180,224,164,149,224,165,135,224,164,185,224,165,136,224,
+164,149,224,165,128,224,164,184,224,165,135,224,164,149,224,164,190,224,164,149,
+224,165,139,224,164,148,224,164,176,224,164,170,224,164,176,224,164,168,224,165,
+135,224,164,143,224,164,149,224,164,149,224,164,191,224,164,173,224,165,128,224,
+164,135,224,164,184,224,164,149,224,164,176,224,164,164,224,165,139,224,164,185,
+224,165,139,224,164,134,224,164,170,224,164,185,224,165,128,224,164,175,224,164,
+185,224,164,175,224,164,190,224,164,164,224,164,149,224,164,165,224,164,190,106,
+97,103,114,97,110,224,164,134,224,164,156,224,164,156,224,165,139,224,164,133,
+224,164,172,224,164,166,224,165,139,224,164,151,224,164,136,224,164,156,224,164,
+190,224,164,151,224,164,143,224,164,185,224,164,174,224,164,135,224,164,168,224,
+164,181,224,164,185,224,164,175,224,165,135,224,164,165,224,165,135,224,164,165,
+224,165,128,224,164,152,224,164,176,224,164,156,224,164,172,224,164,166,224,165,
+128,224,164,149,224,164,136,224,164,156,224,165,128,224,164,181,224,165,135,224,
+164,168,224,164,136,224,164,168,224,164,143,224,164,185,224,164,176,224,164,137,
+224,164,184,224,164,174,224,165,135,224,164,149,224,164,174,224,164,181,224,165,
+139,224,164,178,224,165,135,224,164,184,224,164,172,224,164,174,224,164,136,224,
+164,166,224,165,135,224,164,147,224,164,176,224,164,134,224,164,174,224,164,172,
+224,164,184,224,164,173,224,164,176,224,164,172,224,164,168,224,164,154,224,164,
+178,224,164,174,224,164,168,224,164,134,224,164,151,224,164,184,224,165,128,224,
+164,178,224,165,128,216,185,217,132,217,137,216,165,217,132,217,137,217,135,216,
+176,216,167,216,162,216,174,216,177,216,185,216,175,216,175,216,167,217,132,217,
+137,217,135,216,176,217,135,216,181,217,136,216,177,216,186,217,138,216,177,217,
+131,216,167,217,134,217,136,217,132,216,167,216,168,217,138,217,134,216,185,216,
+177,216,182,216,176,217,132,217,131,217,135,217,134,216,167,217,138,217,136,217,
+133,217,130,216,167,217,132,216,185,217,132,217,138,216,167,217,134,216,167,217,
+132,217,131,217,134,216,173,216,170,217,137,217,130,216,168,217,132,217,136,216,
+173,216,169,216,167,216,174,216,177,217,129,217,130,216,183,216,185,216,168,216,
+175,216,177,217,131,217,134,216,165,216,176,216,167,217,131,217,133,216,167,216,
+167,216,173,216,175,216,165,217,132,216,167,217,129,217,138,217,135,216,168,216,
+185,216,182,217,131,217,138,217,129,216,168,216,173,216,171,217,136,217,133,217,
+134,217,136,217,135,217,136,216,163,217,134,216,167,216,172,216,175,216,167,217,
+132,217,135,216,167,216,179,217,132,217,133,216,185,217,134,216,175,217,132,217,
+138,216,179,216,185,216,168,216,177,216,181,217,132,217,137,217,133,217,134,216,
+176,216,168,217,135,216,167,216,163,217,134,217,135,217,133,216,171,217,132,217,
+131,217,134,216,170,216,167,217,132,216,167,216,173,217,138,216,171,217,133,216,
+181,216,177,216,180,216,177,216,173,216,173,217,136,217,132,217,136,217,129,217,
+138,216,167,216,176,216,167,217,132,217,131,217,132,217,133,216,177,216,169,216,
+167,217,134,216,170,216,167,217,132,217,129,216,163,216,168,217,136,216,174,216,
+167,216,181,216,163,217,134,216,170,216,167,217,134,217,135,216,167,217,132,217,
+138,216,185,216,182,217,136,217,136,217,130,216,175,216,167,216,168,217,134,216,
+174,217,138,216,177,216,168,217,134,216,170,217,132,217,131,217,133,216,180,216,
+167,216,161,217,136,217,135,217,138,216,167,216,168,217,136,217,130,216,181,216,
+181,217,136,217,133,216,167,216,177,217,130,217,133,216,163,216,173,216,175,217,
+134,216,173,217,134,216,185,216,175,217,133,216,177,216,163,217,138,216,167,216,
+173,216,169,217,131,216,170,216,168,216,175,217,136,217,134,217,138,216,172,216,
+168,217,133,217,134,217,135,216,170,216,173,216,170,216,172,217,135,216,169,216,
+179,217,134,216,169,217,138,216,170,217,133,217,131,216,177,216,169,216,186,216,
+178,216,169,217,134,217,129,216,179,216,168,217,138,216,170,217,132,217,132,217,
+135,217,132,217,134,216,167,216,170,217,132,217,131,217,130,217,132,216,168,217,
+132,217,133,216,167,216,185,217,134,217,135,216,163,217,136,217,132,216,180,217,
+138,216,161,217,134,217,136,216,177,216,163,217,133,216,167,217,129,217,138,217,
+131,216,168,217,131,217,132,216,176,216,167,216,170,216,177,216,170,216,168,216,
+168,216,163,217,134,217,135,217,133,216,179,216,167,217,134,217,131,216,168,217,
+138,216,185,217,129,217,130,216,175,216,173,216,179,217,134,217,132,217,135,217,
+133,216,180,216,185,216,177,216,163,217,135,217,132,216,180,217,135,216,177,217,
+130,216,183,216,177,216,183,217,132,216,168,112,114,111,102,105,108,101,115,101,
+114,118,105,99,101,100,101,102,97,117,108,116,104,105,109,115,101,108,102,100,
+101,116,97,105,108,115,99,111,110,116,101,110,116,115,117,112,112,111,114,116,
+115,116,97,114,116,101,100,109,101,115,115,97,103,101,115,117,99,99,101,115,115,
+102,97,115,104,105,111,110,60,116,105,116,108,101,62,99,111,117,110,116,114,121,
+97,99,99,111,117,110,116,99,114,101,97,116,101,100,115,116,111,114,105,101,115,
+114,101,115,117,108,116,115,114,117,110,110,105,110,103,112,114,111,99,101,115,
+115,119,114,105,116,105,110,103,111,98,106,101,99,116,115,118,105,115,105,98,108
+,101,119,101,108,99,111,109,101,97,114,116,105,99,108,101,117,110,107,110,111,
+119,110,110,101,116,119,111,114,107,99,111,109,112,97,110,121,100,121,110,97,109
+,105,99,98,114,111,119,115,101,114,112,114,105,118,97,99,121,112,114,111,98,108,
+101,109,83,101,114,118,105,99,101,114,101,115,112,101,99,116,100,105,115,112,108
+,97,121,114,101,113,117,101,115,116,114,101,115,101,114,118,101,119,101,98,115,
+105,116,101,104,105,115,116,111,114,121,102,114,105,101,110,100,115,111,112,116,
+105,111,110,115,119,111,114,107,105,110,103,118,101,114,115,105,111,110,109,105,
+108,108,105,111,110,99,104,97,110,110,101,108,119,105,110,100,111,119,46,97,100,
+100,114,101,115,115,118,105,115,105,116,101,100,119,101,97,116,104,101,114,99,
+111,114,114,101,99,116,112,114,111,100,117,99,116,101,100,105,114,101,99,116,102
+,111,114,119,97,114,100,121,111,117,32,99,97,110,114,101,109,111,118,101,100,115
+,117,98,106,101,99,116,99,111,110,116,114,111,108,97,114,99,104,105,118,101,99,
+117,114,114,101,110,116,114,101,97,100,105,110,103,108,105,98,114,97,114,121,108
+,105,109,105,116,101,100,109,97,110,97,103,101,114,102,117,114,116,104,101,114,
+115,117,109,109,97,114,121,109,97,99,104,105,110,101,109,105,110,117,116,101,115
+,112,114,105,118,97,116,101,99,111,110,116,101,120,116,112,114,111,103,114,97,
+109,115,111,99,105,101,116,121,110,117,109,98,101,114,115,119,114,105,116,116,
+101,110,101,110,97,98,108,101,100,116,114,105,103,103,101,114,115,111,117,114,99
+,101,115,108,111,97,100,105,110,103,101,108,101,109,101,110,116,112,97,114,116,
+110,101,114,102,105,110,97,108,108,121,112,101,114,102,101,99,116,109,101,97,110
+,105,110,103,115,121,115,116,101,109,115,107,101,101,112,105,110,103,99,117,108,
+116,117,114,101,38,113,117,111,116,59,44,106,111,117,114,110,97,108,112,114,111,
+106,101,99,116,115,117,114,102,97,99,101,115,38,113,117,111,116,59,101,120,112,
+105,114,101,115,114,101,118,105,101,119,115,98,97,108,97,110,99,101,69,110,103,
+108,105,115,104,67,111,110,116,101,110,116,116,104,114,111,117,103,104,80,108,
+101,97,115,101,32,111,112,105,110,105,111,110,99,111,110,116,97,99,116,97,118,
+101,114,97,103,101,112,114,105,109,97,114,121,118,105,108,108,97,103,101,83,112,
+97,110,105,115,104,103,97,108,108,101,114,121,100,101,99,108,105,110,101,109,101
+,101,116,105,110,103,109,105,115,115,105,111,110,112,111,112,117,108,97,114,113,
+117,97,108,105,116,121,109,101,97,115,117,114,101,103,101,110,101,114,97,108,115
+,112,101,99,105,101,115,115,101,115,115,105,111,110,115,101,99,116,105,111,110,
+119,114,105,116,101,114,115,99,111,117,110,116,101,114,105,110,105,116,105,97,
+108,114,101,112,111,114,116,115,102,105,103,117,114,101,115,109,101,109,98,101,
+114,115,104,111,108,100,105,110,103,100,105,115,112,117,116,101,101,97,114,108,
+105,101,114,101,120,112,114,101,115,115,100,105,103,105,116,97,108,112,105,99,
+116,117,114,101,65,110,111,116,104,101,114,109,97,114,114,105,101,100,116,114,97
+,102,102,105,99,108,101,97,100,105,110,103,99,104,97,110,103,101,100,99,101,110,
+116,114,97,108,118,105,99,116,111,114,121,105,109,97,103,101,115,47,114,101,97,
+115,111,110,115,115,116,117,100,105,101,115,102,101,97,116,117,114,101,108,105,
+115,116,105,110,103,109,117,115,116,32,98,101,115,99,104,111,111,108,115,86,101,
+114,115,105,111,110,117,115,117,97,108,108,121,101,112,105,115,111,100,101,112,
+108,97,121,105,110,103,103,114,111,119,105,110,103,111,98,118,105,111,117,115,
+111,118,101,114,108,97,121,112,114,101,115,101,110,116,97,99,116,105,111,110,115
+,60,47,117,108,62,13,10,119,114,97,112,112,101,114,97,108,114,101,97,100,121,99,
+101,114,116,97,105,110,114,101,97,108,105,116,121,115,116,111,114,97,103,101,97,
+110,111,116,104,101,114,100,101,115,107,116,111,112,111,102,102,101,114,101,100,
+112,97,116,116,101,114,110,117,110,117,115,117,97,108,68,105,103,105,116,97,108,
+99,97,112,105,116,97,108,87,101,98,115,105,116,101,102,97,105,108,117,114,101,99
+,111,110,110,101,99,116,114,101,100,117,99,101,100,65,110,100,114,111,105,100,
+100,101,99,97,100,101,115,114,101,103,117,108,97,114,32,38,97,109,112,59,32,97,
+110,105,109,97,108,115,114,101,108,101,97,115,101,65,117,116,111,109,97,116,103,
+101,116,116,105,110,103,109,101,116,104,111,100,115,110,111,116,104,105,110,103,
+80,111,112,117,108,97,114,99,97,112,116,105,111,110,108,101,116,116,101,114,115,
+99,97,112,116,117,114,101,115,99,105,101,110,99,101,108,105,99,101,110,115,101,
+99,104,97,110,103,101,115,69,110,103,108,97,110,100,61,49,38,97,109,112,59,72,
+105,115,116,111,114,121,32,61,32,110,101,119,32,67,101,110,116,114,97,108,117,
+112,100,97,116,101,100,83,112,101,99,105,97,108,78,101,116,119,111,114,107,114,
+101,113,117,105,114,101,99,111,109,109,101,110,116,119,97,114,110,105,110,103,67
+,111,108,108,101,103,101,116,111,111,108,98,97,114,114,101,109,97,105,110,115,98
+,101,99,97,117,115,101,101,108,101,99,116,101,100,68,101,117,116,115,99,104,102,
+105,110,97,110,99,101,119,111,114,107,101,114,115,113,117,105,99,107,108,121,98,
+101,116,119,101,101,110,101,120,97,99,116,108,121,115,101,116,116,105,110,103,
+100,105,115,101,97,115,101,83,111,99,105,101,116,121,119,101,97,112,111,110,115,
+101,120,104,105,98,105,116,38,108,116,59,33,45,45,67,111,110,116,114,111,108,99,
+108,97,115,115,101,115,99,111,118,101,114,101,100,111,117,116,108,105,110,101,97
+,116,116,97,99,107,115,100,101,118,105,99,101,115,40,119,105,110,100,111,119,112
+,117,114,112,111,115,101,116,105,116,108,101,61,34,77,111,98,105,108,101,32,107,
+105,108,108,105,110,103,115,104,111,119,105,110,103,73,116,97,108,105,97,110,100
+,114,111,112,112,101,100,104,101,97,118,105,108,121,101,102,102,101,99,116,115,
+45,49,39,93,41,59,10,99,111,110,102,105,114,109,67,117,114,114,101,110,116,97,
+100,118,97,110,99,101,115,104,97,114,105,110,103,111,112,101,110,105,110,103,100
+,114,97,119,105,110,103,98,105,108,108,105,111,110,111,114,100,101,114,101,100,
+71,101,114,109,97,110,121,114,101,108,97,116,101,100,60,47,102,111,114,109,62,
+105,110,99,108,117,100,101,119,104,101,116,104,101,114,100,101,102,105,110,101,
+100,83,99,105,101,110,99,101,99,97,116,97,108,111,103,65,114,116,105,99,108,101,
+98,117,116,116,111,110,115,108,97,114,103,101,115,116,117,110,105,102,111,114,
+109,106,111,117,114,110,101,121,115,105,100,101,98,97,114,67,104,105,99,97,103,
+111,104,111,108,105,100,97,121,71,101,110,101,114,97,108,112,97,115,115,97,103,
+101,44,38,113,117,111,116,59,97,110,105,109,97,116,101,102,101,101,108,105,110,
+103,97,114,114,105,118,101,100,112,97,115,115,105,110,103,110,97,116,117,114,97,
+108,114,111,117,103,104,108,121,46,10,10,84,104,101,32,98,117,116,32,110,111,116
+,100,101,110,115,105,116,121,66,114,105,116,97,105,110,67,104,105,110,101,115,
+101,108,97,99,107,32,111,102,116,114,105,98,117,116,101,73,114,101,108,97,110,
+100,34,32,100,97,116,97,45,102,97,99,116,111,114,115,114,101,99,101,105,118,101,
+116,104,97,116,32,105,115,76,105,98,114,97,114,121,104,117,115,98,97,110,100,105
+,110,32,102,97,99,116,97,102,102,97,105,114,115,67,104,97,114,108,101,115,114,97
+,100,105,99,97,108,98,114,111,117,103,104,116,102,105,110,100,105,110,103,108,97
+,110,100,105,110,103,58,108,97,110,103,61,34,114,101,116,117,114,110,32,108,101,
+97,100,101,114,115,112,108,97,110,110,101,100,112,114,101,109,105,117,109,112,97
+,99,107,97,103,101,65,109,101,114,105,99,97,69,100,105,116,105,111,110,93,38,113
+,117,111,116,59,77,101,115,115,97,103,101,110,101,101,100,32,116,111,118,97,108,
+117,101,61,34,99,111,109,112,108,101,120,108,111,111,107,105,110,103,115,116,97,
+116,105,111,110,98,101,108,105,101,118,101,115,109,97,108,108,101,114,45,109,111
+,98,105,108,101,114,101,99,111,114,100,115,119,97,110,116,32,116,111,107,105,110
+,100,32,111,102,70,105,114,101,102,111,120,121,111,117,32,97,114,101,115,105,109
+,105,108,97,114,115,116,117,100,105,101,100,109,97,120,105,109,117,109,104,101,
+97,100,105,110,103,114,97,112,105,100,108,121,99,108,105,109,97,116,101,107,105,
+110,103,100,111,109,101,109,101,114,103,101,100,97,109,111,117,110,116,115,102,
+111,117,110,100,101,100,112,105,111,110,101,101,114,102,111,114,109,117,108,97,
+100,121,110,97,115,116,121,104,111,119,32,116,111,32,83,117,112,112,111,114,116,
+114,101,118,101,110,117,101,101,99,111,110,111,109,121,82,101,115,117,108,116,
+115,98,114,111,116,104,101,114,115,111,108,100,105,101,114,108,97,114,103,101,
+108,121,99,97,108,108,105,110,103,46,38,113,117,111,116,59,65,99,99,111,117,110,
+116,69,100,119,97,114,100,32,115,101,103,109,101,110,116,82,111,98,101,114,116,
+32,101,102,102,111,114,116,115,80,97,99,105,102,105,99,108,101,97,114,110,101,
+100,117,112,32,119,105,116,104,104,101,105,103,104,116,58,119,101,32,104,97,118,
+101,65,110,103,101,108,101,115,110,97,116,105,111,110,115,95,115,101,97,114,99,
+104,97,112,112,108,105,101,100,97,99,113,117,105,114,101,109,97,115,115,105,118,
+101,103,114,97,110,116,101,100,58,32,102,97,108,115,101,116,114,101,97,116,101,
+100,98,105,103,103,101,115,116,98,101,110,101,102,105,116,100,114,105,118,105,
+110,103,83,116,117,100,105,101,115,109,105,110,105,109,117,109,112,101,114,104,
+97,112,115,109,111,114,110,105,110,103,115,101,108,108,105,110,103,105,115,32,
+117,115,101,100,114,101,118,101,114,115,101,118,97,114,105,97,110,116,32,114,111
+,108,101,61,34,109,105,115,115,105,110,103,97,99,104,105,101,118,101,112,114,111
+,109,111,116,101,115,116,117,100,101,110,116,115,111,109,101,111,110,101,101,120
+,116,114,101,109,101,114,101,115,116,111,114,101,98,111,116,116,111,109,58,101,
+118,111,108,118,101,100,97,108,108,32,116,104,101,115,105,116,101,109,97,112,101
+,110,103,108,105,115,104,119,97,121,32,116,111,32,32,65,117,103,117,115,116,115,
+121,109,98,111,108,115,67,111,109,112,97,110,121,109,97,116,116,101,114,115,109,
+117,115,105,99,97,108,97,103,97,105,110,115,116,115,101,114,118,105,110,103,125,
+41,40,41,59,13,10,112,97,121,109,101,110,116,116,114,111,117,98,108,101,99,111,
+110,99,101,112,116,99,111,109,112,97,114,101,112,97,114,101,110,116,115,112,108,
+97,121,101,114,115,114,101,103,105,111,110,115,109,111,110,105,116,111,114,32,39
+,39,84,104,101,32,119,105,110,110,105,110,103,101,120,112,108,111,114,101,97,100
+,97,112,116,101,100,71,97,108,108,101,114,121,112,114,111,100,117,99,101,97,98,
+105,108,105,116,121,101,110,104,97,110,99,101,99,97,114,101,101,114,115,41,46,32
+,84,104,101,32,99,111,108,108,101,99,116,83,101,97,114,99,104,32,97,110,99,105,
+101,110,116,101,120,105,115,116,101,100,102,111,111,116,101,114,32,104,97,110,
+100,108,101,114,112,114,105,110,116,101,100,99,111,110,115,111,108,101,69,97,115
+,116,101,114,110,101,120,112,111,114,116,115,119,105,110,100,111,119,115,67,104,
+97,110,110,101,108,105,108,108,101,103,97,108,110,101,117,116,114,97,108,115,117
+,103,103,101,115,116,95,104,101,97,100,101,114,115,105,103,110,105,110,103,46,
+104,116,109,108,34,62,115,101,116,116,108,101,100,119,101,115,116,101,114,110,99
+,97,117,115,105,110,103,45,119,101,98,107,105,116,99,108,97,105,109,101,100,74,
+117,115,116,105,99,101,99,104,97,112,116,101,114,118,105,99,116,105,109,115,84,
+104,111,109,97,115,32,109,111,122,105,108,108,97,112,114,111,109,105,115,101,112
+,97,114,116,105,101,115,101,100,105,116,105,111,110,111,117,116,115,105,100,101,
+58,102,97,108,115,101,44,104,117,110,100,114,101,100,79,108,121,109,112,105,99,
+95,98,117,116,116,111,110,97,117,116,104,111,114,115,114,101,97,99,104,101,100,
+99,104,114,111,110,105,99,100,101,109,97,110,100,115,115,101,99,111,110,100,115,
+112,114,111,116,101,99,116,97,100,111,112,116,101,100,112,114,101,112,97,114,101
+,110,101,105,116,104,101,114,103,114,101,97,116,108,121,103,114,101,97,116,101,
+114,111,118,101,114,97,108,108,105,109,112,114,111,118,101,99,111,109,109,97,110
+,100,115,112,101,99,105,97,108,115,101,97,114,99,104,46,119,111,114,115,104,105,
+112,102,117,110,100,105,110,103,116,104,111,117,103,104,116,104,105,103,104,101,
+115,116,105,110,115,116,101,97,100,117,116,105,108,105,116,121,113,117,97,114,
+116,101,114,67,117,108,116,117,114,101,116,101,115,116,105,110,103,99,108,101,97
+,114,108,121,101,120,112,111,115,101,100,66,114,111,119,115,101,114,108,105,98,
+101,114,97,108,125,32,99,97,116,99,104,80,114,111,106,101,99,116,101,120,97,109,
+112,108,101,104,105,100,101,40,41,59,70,108,111,114,105,100,97,97,110,115,119,
+101,114,115,97,108,108,111,119,101,100,69,109,112,101,114,111,114,100,101,102,
+101,110,115,101,115,101,114,105,111,117,115,102,114,101,101,100,111,109,83,101,
+118,101,114,97,108,45,98,117,116,116,111,110,70,117,114,116,104,101,114,111,117,
+116,32,111,102,32,33,61,32,110,117,108,108,116,114,97,105,110,101,100,68,101,110
+,109,97,114,107,118,111,105,100,40,48,41,47,97,108,108,46,106,115,112,114,101,
+118,101,110,116,82,101,113,117,101,115,116,83,116,101,112,104,101,110,10,10,87,
+104,101,110,32,111,98,115,101,114,118,101,60,47,104,50,62,13,10,77,111,100,101,
+114,110,32,112,114,111,118,105,100,101,34,32,97,108,116,61,34,98,111,114,100,101
+,114,115,46,10,10,70,111,114,32,10,10,77,97,110,121,32,97,114,116,105,115,116,
+115,112,111,119,101,114,101,100,112,101,114,102,111,114,109,102,105,99,116,105,
+111,110,116,121,112,101,32,111,102,109,101,100,105,99,97,108,116,105,99,107,101,
+116,115,111,112,112,111,115,101,100,67,111,117,110,99,105,108,119,105,116,110,
+101,115,115,106,117,115,116,105,99,101,71,101,111,114,103,101,32,66,101,108,103,
+105,117,109,46,46,46,60,47,97,62,116,119,105,116,116,101,114,110,111,116,97,98,
+108,121,119,97,105,116,105,110,103,119,97,114,102,97,114,101,32,79,116,104,101,
+114,32,114,97,110,107,105,110,103,112,104,114,97,115,101,115,109,101,110,116,105
+,111,110,115,117,114,118,105,118,101,115,99,104,111,108,97,114,60,47,112,62,13,
+10,32,67,111,117,110,116,114,121,105,103,110,111,114,101,100,108,111,115,115,32,
+111,102,106,117,115,116,32,97,115,71,101,111,114,103,105,97,115,116,114,97,110,
+103,101,60,104,101,97,100,62,60,115,116,111,112,112,101,100,49,39,93,41,59,13,10
+,105,115,108,97,110,100,115,110,111,116,97,98,108,101,98,111,114,100,101,114,58,
+108,105,115,116,32,111,102,99,97,114,114,105,101,100,49,48,48,44,48,48,48,60,47,
+104,51,62,10,32,115,101,118,101,114,97,108,98,101,99,111,109,101,115,115,101,108
+,101,99,116,32,119,101,100,100,105,110,103,48,48,46,104,116,109,108,109,111,110,
+97,114,99,104,111,102,102,32,116,104,101,116,101,97,99,104,101,114,104,105,103,
+104,108,121,32,98,105,111,108,111,103,121,108,105,102,101,32,111,102,111,114,32,
+101,118,101,110,114,105,115,101,32,111,102,38,114,97,113,117,111,59,112,108,117,
+115,111,110,101,104,117,110,116,105,110,103,40,116,104,111,117,103,104,68,111,
+117,103,108,97,115,106,111,105,110,105,110,103,99,105,114,99,108,101,115,70,111,
+114,32,116,104,101,65,110,99,105,101,110,116,86,105,101,116,110,97,109,118,101,
+104,105,99,108,101,115,117,99,104,32,97,115,99,114,121,115,116,97,108,118,97,108
+,117,101,32,61,87,105,110,100,111,119,115,101,110,106,111,121,101,100,97,32,115,
+109,97,108,108,97,115,115,117,109,101,100,60,97,32,105,100,61,34,102,111,114,101
+,105,103,110,32,65,108,108,32,114,105,104,111,119,32,116,104,101,68,105,115,112,
+108,97,121,114,101,116,105,114,101,100,104,111,119,101,118,101,114,104,105,100,
+100,101,110,59,98,97,116,116,108,101,115,115,101,101,107,105,110,103,99,97,98,
+105,110,101,116,119,97,115,32,110,111,116,108,111,111,107,32,97,116,99,111,110,
+100,117,99,116,103,101,116,32,116,104,101,74,97,110,117,97,114,121,104,97,112,
+112,101,110,115,116,117,114,110,105,110,103,97,58,104,111,118,101,114,79,110,108
+,105,110,101,32,70,114,101,110,99,104,32,108,97,99,107,105,110,103,116,121,112,
+105,99,97,108,101,120,116,114,97,99,116,101,110,101,109,105,101,115,101,118,101,
+110,32,105,102,103,101,110,101,114,97,116,100,101,99,105,100,101,100,97,114,101,
+32,110,111,116,47,115,101,97,114,99,104,98,101,108,105,101,102,115,45,105,109,97
+,103,101,58,108,111,99,97,116,101,100,115,116,97,116,105,99,46,108,111,103,105,
+110,34,62,99,111,110,118,101,114,116,118,105,111,108,101,110,116,101,110,116,101
+,114,101,100,102,105,114,115,116,34,62,99,105,114,99,117,105,116,70,105,110,108,
+97,110,100,99,104,101,109,105,115,116,115,104,101,32,119,97,115,49,48,112,120,59
+,34,62,97,115,32,115,117,99,104,100,105,118,105,100,101,100,60,47,115,112,97,110
+,62,119,105,108,108,32,98,101,108,105,110,101,32,111,102,97,32,103,114,101,97,
+116,109,121,115,116,101,114,121,47,105,110,100,101,120,46,102,97,108,108,105,110
+,103,100,117,101,32,116,111,32,114,97,105,108,119,97,121,99,111,108,108,101,103,
+101,109,111,110,115,116,101,114,100,101,115,99,101,110,116,105,116,32,119,105,
+116,104,110,117,99,108,101,97,114,74,101,119,105,115,104,32,112,114,111,116,101,
+115,116,66,114,105,116,105,115,104,102,108,111,119,101,114,115,112,114,101,100,
+105,99,116,114,101,102,111,114,109,115,98,117,116,116,111,110,32,119,104,111,32,
+119,97,115,108,101,99,116,117,114,101,105,110,115,116,97,110,116,115,117,105,99,
+105,100,101,103,101,110,101,114,105,99,112,101,114,105,111,100,115,109,97,114,
+107,101,116,115,83,111,99,105,97,108,32,102,105,115,104,105,110,103,99,111,109,
+98,105,110,101,103,114,97,112,104,105,99,119,105,110,110,101,114,115,60,98,114,
+32,47,62,60,98,121,32,116,104,101,32,78,97,116,117,114,97,108,80,114,105,118,97,
+99,121,99,111,111,107,105,101,115,111,117,116,99,111,109,101,114,101,115,111,108
+,118,101,83,119,101,100,105,115,104,98,114,105,101,102,108,121,80,101,114,115,
+105,97,110,115,111,32,109,117,99,104,67,101,110,116,117,114,121,100,101,112,105,
+99,116,115,99,111,108,117,109,110,115,104,111,117,115,105,110,103,115,99,114,105
+,112,116,115,110,101,120,116,32,116,111,98,101,97,114,105,110,103,109,97,112,112
+,105,110,103,114,101,118,105,115,101,100,106,81,117,101,114,121,40,45,119,105,
+100,116,104,58,116,105,116,108,101,34,62,116,111,111,108,116,105,112,83,101,99,
+116,105,111,110,100,101,115,105,103,110,115,84,117,114,107,105,115,104,121,111,
+117,110,103,101,114,46,109,97,116,99,104,40,125,41,40,41,59,10,10,98,117,114,110
+,105,110,103,111,112,101,114,97,116,101,100,101,103,114,101,101,115,115,111,117,
+114,99,101,61,82,105,99,104,97,114,100,99,108,111,115,101,108,121,112,108,97,115
+,116,105,99,101,110,116,114,105,101,115,60,47,116,114,62,13,10,99,111,108,111,
+114,58,35,117,108,32,105,100,61,34,112,111,115,115,101,115,115,114,111,108,108,
+105,110,103,112,104,121,115,105,99,115,102,97,105,108,105,110,103,101,120,101,99
+,117,116,101,99,111,110,116,101,115,116,108,105,110,107,32,116,111,68,101,102,97
+,117,108,116,60,98,114,32,47,62,10,58,32,116,114,117,101,44,99,104,97,114,116,
+101,114,116,111,117,114,105,115,109,99,108,97,115,115,105,99,112,114,111,99,101,
+101,100,101,120,112,108,97,105,110,60,47,104,49,62,13,10,111,110,108,105,110,101
+,46,63,120,109,108,32,118,101,104,101,108,112,105,110,103,100,105,97,109,111,110
+,100,117,115,101,32,116,104,101,97,105,114,108,105,110,101,101,110,100,32,45,45,
+62,41,46,97,116,116,114,40,114,101,97,100,101,114,115,104,111,115,116,105,110,
+103,35,102,102,102,102,102,102,114,101,97,108,105,122,101,86,105,110,99,101,110,
+116,115,105,103,110,97,108,115,32,115,114,99,61,34,47,80,114,111,100,117,99,116,
+100,101,115,112,105,116,101,100,105,118,101,114,115,101,116,101,108,108,105,110,
+103,80,117,98,108,105,99,32,104,101,108,100,32,105,110,74,111,115,101,112,104,32
+,116,104,101,97,116,114,101,97,102,102,101,99,116,115,60,115,116,121,108,101,62,
+97,32,108,97,114,103,101,100,111,101,115,110,39,116,108,97,116,101,114,44,32,69,
+108,101,109,101,110,116,102,97,118,105,99,111,110,99,114,101,97,116,111,114,72,
+117,110,103,97,114,121,65,105,114,112,111,114,116,115,101,101,32,116,104,101,115
+,111,32,116,104,97,116,77,105,99,104,97,101,108,83,121,115,116,101,109,115,80,
+114,111,103,114,97,109,115,44,32,97,110,100,32,32,119,105,100,116,104,61,101,38,
+113,117,111,116,59,116,114,97,100,105,110,103,108,101,102,116,34,62,10,112,101,
+114,115,111,110,115,71,111,108,100,101,110,32,65,102,102,97,105,114,115,103,114,
+97,109,109,97,114,102,111,114,109,105,110,103,100,101,115,116,114,111,121,105,
+100,101,97,32,111,102,99,97,115,101,32,111,102,111,108,100,101,115,116,32,116,
+104,105,115,32,105,115,46,115,114,99,32,61,32,99,97,114,116,111,111,110,114,101,
+103,105,115,116,114,67,111,109,109,111,110,115,77,117,115,108,105,109,115,87,104
+,97,116,32,105,115,105,110,32,109,97,110,121,109,97,114,107,105,110,103,114,101,
+118,101,97,108,115,73,110,100,101,101,100,44,101,113,117,97,108,108,121,47,115,
+104,111,119,95,97,111,117,116,100,111,111,114,101,115,99,97,112,101,40,65,117,
+115,116,114,105,97,103,101,110,101,116,105,99,115,121,115,116,101,109,44,73,110,
+32,116,104,101,32,115,105,116,116,105,110,103,72,101,32,97,108,115,111,73,115,
+108,97,110,100,115,65,99,97,100,101,109,121,10,9,9,60,33,45,45,68,97,110,105,101
+,108,32,98,105,110,100,105,110,103,98,108,111,99,107,34,62,105,109,112,111,115,
+101,100,117,116,105,108,105,122,101,65,98,114,97,104,97,109,40,101,120,99,101,
+112,116,123,119,105,100,116,104,58,112,117,116,116,105,110,103,41,46,104,116,109
+,108,40,124,124,32,91,93,59,10,68,65,84,65,91,32,42,107,105,116,99,104,101,110,
+109,111,117,110,116,101,100,97,99,116,117,97,108,32,100,105,97,108,101,99,116,
+109,97,105,110,108,121,32,95,98,108,97,110,107,39,105,110,115,116,97,108,108,101
+,120,112,101,114,116,115,105,102,40,116,121,112,101,73,116,32,97,108,115,111,38,
+99,111,112,121,59,32,34,62,84,101,114,109,115,98,111,114,110,32,105,110,79,112,
+116,105,111,110,115,101,97,115,116,101,114,110,116,97,108,107,105,110,103,99,111
+,110,99,101,114,110,103,97,105,110,101,100,32,111,110,103,111,105,110,103,106,
+117,115,116,105,102,121,99,114,105,116,105,99,115,102,97,99,116,111,114,121,105,
+116,115,32,111,119,110,97,115,115,97,117,108,116,105,110,118,105,116,101,100,108
+,97,115,116,105,110,103,104,105,115,32,111,119,110,104,114,101,102,61,34,47,34,
+32,114,101,108,61,34,100,101,118,101,108,111,112,99,111,110,99,101,114,116,100,
+105,97,103,114,97,109,100,111,108,108,97,114,115,99,108,117,115,116,101,114,112,
+104,112,63,105,100,61,97,108,99,111,104,111,108,41,59,125,41,40,41,59,117,115,
+105,110,103,32,97,62,60,115,112,97,110,62,118,101,115,115,101,108,115,114,101,
+118,105,118,97,108,65,100,100,114,101,115,115,97,109,97,116,101,117,114,97,110,
+100,114,111,105,100,97,108,108,101,103,101,100,105,108,108,110,101,115,115,119,
+97,108,107,105,110,103,99,101,110,116,101,114,115,113,117,97,108,105,102,121,109
+,97,116,99,104,101,115,117,110,105,102,105,101,100,101,120,116,105,110,99,116,68
+,101,102,101,110,115,101,100,105,101,100,32,105,110,10,9,60,33,45,45,32,99,117,
+115,116,111,109,115,108,105,110,107,105,110,103,76,105,116,116,108,101,32,66,111
+,111,107,32,111,102,101,118,101,110,105,110,103,109,105,110,46,106,115,63,97,114
+,101,32,116,104,101,107,111,110,116,97,107,116,116,111,100,97,121,39,115,46,104,
+116,109,108,34,32,116,97,114,103,101,116,61,119,101,97,114,105,110,103,65,108,
+108,32,82,105,103,59,10,125,41,40,41,59,114,97,105,115,105,110,103,32,65,108,115
+,111,44,32,99,114,117,99,105,97,108,97,98,111,117,116,34,62,100,101,99,108,97,
+114,101,45,45,62,10,60,115,99,102,105,114,101,102,111,120,97,115,32,109,117,99,
+104,97,112,112,108,105,101,115,105,110,100,101,120,44,32,115,44,32,98,117,116,32
+,116,121,112,101,32,61,32,10,13,10,60,33,45,45,116,111,119,97,114,100,115,82,101
+,99,111,114,100,115,80,114,105,118,97,116,101,70,111,114,101,105,103,110,80,114,
+101,109,105,101,114,99,104,111,105,99,101,115,86,105,114,116,117,97,108,114,101,
+116,117,114,110,115,67,111,109,109,101,110,116,80,111,119,101,114,101,100,105,
+110,108,105,110,101,59,112,111,118,101,114,116,121,99,104,97,109,98,101,114,76,
+105,118,105,110,103,32,118,111,108,117,109,101,115,65,110,116,104,111,110,121,
+108,111,103,105,110,34,32,82,101,108,97,116,101,100,69,99,111,110,111,109,121,
+114,101,97,99,104,101,115,99,117,116,116,105,110,103,103,114,97,118,105,116,121,
+108,105,102,101,32,105,110,67,104,97,112,116,101,114,45,115,104,97,100,111,119,
+78,111,116,97,98,108,101,60,47,116,100,62,13,10,32,114,101,116,117,114,110,115,
+116,97,100,105,117,109,119,105,100,103,101,116,115,118,97,114,121,105,110,103,
+116,114,97,118,101,108,115,104,101,108,100,32,98,121,119,104,111,32,97,114,101,
+119,111,114,107,32,105,110,102,97,99,117,108,116,121,97,110,103,117,108,97,114,
+119,104,111,32,104,97,100,97,105,114,112,111,114,116,116,111,119,110,32,111,102,
+10,10,83,111,109,101,32,39,99,108,105,99,107,39,99,104,97,114,103,101,115,107,
+101,121,119,111,114,100,105,116,32,119,105,108,108,99,105,116,121,32,111,102,40,
+116,104,105,115,41,59,65,110,100,114,101,119,32,117,110,105,113,117,101,32,99,
+104,101,99,107,101,100,111,114,32,109,111,114,101,51,48,48,112,120,59,32,114,101
+,116,117,114,110,59,114,115,105,111,110,61,34,112,108,117,103,105,110,115,119,
+105,116,104,105,110,32,104,101,114,115,101,108,102,83,116,97,116,105,111,110,70,
+101,100,101,114,97,108,118,101,110,116,117,114,101,112,117,98,108,105,115,104,
+115,101,110,116,32,116,111,116,101,110,115,105,111,110,97,99,116,114,101,115,115
+,99,111,109,101,32,116,111,102,105,110,103,101,114,115,68,117,107,101,32,111,102
+,112,101,111,112,108,101,44,101,120,112,108,111,105,116,119,104,97,116,32,105,
+115,104,97,114,109,111,110,121,97,32,109,97,106,111,114,34,58,34,104,116,116,112
+,105,110,32,104,105,115,32,109,101,110,117,34,62,10,109,111,110,116,104,108,121,
+111,102,102,105,99,101,114,99,111,117,110,99,105,108,103,97,105,110,105,110,103,
+101,118,101,110,32,105,110,83,117,109,109,97,114,121,100,97,116,101,32,111,102,
+108,111,121,97,108,116,121,102,105,116,110,101,115,115,97,110,100,32,119,97,115,
+101,109,112,101,114,111,114,115,117,112,114,101,109,101,83,101,99,111,110,100,32
+,104,101,97,114,105,110,103,82,117,115,115,105,97,110,108,111,110,103,101,115,
+116,65,108,98,101,114,116,97,108,97,116,101,114,97,108,115,101,116,32,111,102,32
+,115,109,97,108,108,34,62,46,97,112,112,101,110,100,100,111,32,119,105,116,104,
+102,101,100,101,114,97,108,98,97,110,107,32,111,102,98,101,110,101,97,116,104,68
+,101,115,112,105,116,101,67,97,112,105,116,97,108,103,114,111,117,110,100,115,41
+,44,32,97,110,100,32,112,101,114,99,101,110,116,105,116,32,102,114,111,109,99,
+108,111,115,105,110,103,99,111,110,116,97,105,110,73,110,115,116,101,97,100,102,
+105,102,116,101,101,110,97,115,32,119,101,108,108,46,121,97,104,111,111,46,114,
+101,115,112,111,110,100,102,105,103,104,116,101,114,111,98,115,99,117,114,101,
+114,101,102,108,101,99,116,111,114,103,97,110,105,99,61,32,77,97,116,104,46,101,
+100,105,116,105,110,103,111,110,108,105,110,101,32,112,97,100,100,105,110,103,97
+,32,119,104,111,108,101,111,110,101,114,114,111,114,121,101,97,114,32,111,102,
+101,110,100,32,111,102,32,98,97,114,114,105,101,114,119,104,101,110,32,105,116,
+104,101,97,100,101,114,32,104,111,109,101,32,111,102,114,101,115,117,109,101,100
+,114,101,110,97,109,101,100,115,116,114,111,110,103,62,104,101,97,116,105,110,
+103,114,101,116,97,105,110,115,99,108,111,117,100,102,114,119,97,121,32,111,102,
+32,77,97,114,99,104,32,49,107,110,111,119,105,110,103,105,110,32,112,97,114,116,
+66,101,116,119,101,101,110,108,101,115,115,111,110,115,99,108,111,115,101,115,
+116,118,105,114,116,117,97,108,108,105,110,107,115,34,62,99,114,111,115,115,101,
+100,69,78,68,32,45,45,62,102,97,109,111,117,115,32,97,119,97,114,100,101,100,76,
+105,99,101,110,115,101,72,101,97,108,116,104,32,102,97,105,114,108,121,32,119,
+101,97,108,116,104,121,109,105,110,105,109,97,108,65,102,114,105,99,97,110,99,
+111,109,112,101,116,101,108,97,98,101,108,34,62,115,105,110,103,105,110,103,102,
+97,114,109,101,114,115,66,114,97,115,105,108,41,100,105,115,99,117,115,115,114,
+101,112,108,97,99,101,71,114,101,103,111,114,121,102,111,110,116,32,99,111,112,
+117,114,115,117,101,100,97,112,112,101,97,114,115,109,97,107,101,32,117,112,114,
+111,117,110,100,101,100,98,111,116,104,32,111,102,98,108,111,99,107,101,100,115,
+97,119,32,116,104,101,111,102,102,105,99,101,115,99,111,108,111,117,114,115,105,
+102,40,100,111,99,117,119,104,101,110,32,104,101,101,110,102,111,114,99,101,112,
+117,115,104,40,102,117,65,117,103,117,115,116,32,85,84,70,45,56,34,62,70,97,110,
+116,97,115,121,105,110,32,109,111,115,116,105,110,106,117,114,101,100,85,115,117
+,97,108,108,121,102,97,114,109,105,110,103,99,108,111,115,117,114,101,111,98,106
+,101,99,116,32,100,101,102,101,110,99,101,117,115,101,32,111,102,32,77,101,100,
+105,99,97,108,60,98,111,100,121,62,10,101,118,105,100,101,110,116,98,101,32,117,
+115,101,100,107,101,121,67,111,100,101,115,105,120,116,101,101,110,73,115,108,97
+,109,105,99,35,48,48,48,48,48,48,101,110,116,105,114,101,32,119,105,100,101,108,
+121,32,97,99,116,105,118,101,32,40,116,121,112,101,111,102,111,110,101,32,99,97,
+110,99,111,108,111,114,32,61,115,112,101,97,107,101,114,101,120,116,101,110,100,
+115,80,104,121,115,105,99,115,116,101,114,114,97,105,110,60,116,98,111,100,121,
+62,102,117,110,101,114,97,108,118,105,101,119,105,110,103,109,105,100,100,108,
+101,32,99,114,105,99,107,101,116,112,114,111,112,104,101,116,115,104,105,102,116
+,101,100,100,111,99,116,111,114,115,82,117,115,115,101,108,108,32,116,97,114,103
+,101,116,99,111,109,112,97,99,116,97,108,103,101,98,114,97,115,111,99,105,97,108
+,45,98,117,108,107,32,111,102,109,97,110,32,97,110,100,60,47,116,100,62,10,32,
+104,101,32,108,101,102,116,41,46,118,97,108,40,41,102,97,108,115,101,41,59,108,
+111,103,105,99,97,108,98,97,110,107,105,110,103,104,111,109,101,32,116,111,110,
+97,109,105,110,103,32,65,114,105,122,111,110,97,99,114,101,100,105,116,115,41,59
+,10,125,41,59,10,102,111,117,110,100,101,114,105,110,32,116,117,114,110,67,111,
+108,108,105,110,115,98,101,102,111,114,101,32,66,117,116,32,116,104,101,99,104,
+97,114,103,101,100,84,105,116,108,101,34,62,67,97,112,116,97,105,110,115,112,101
+,108,108,101,100,103,111,100,100,101,115,115,84,97,103,32,45,45,62,65,100,100,
+105,110,103,58,98,117,116,32,119,97,115,82,101,99,101,110,116,32,112,97,116,105,
+101,110,116,98,97,99,107,32,105,110,61,102,97,108,115,101,38,76,105,110,99,111,
+108,110,119,101,32,107,110,111,119,67,111,117,110,116,101,114,74,117,100,97,105,
+115,109,115,99,114,105,112,116,32,97,108,116,101,114,101,100,39,93,41,59,10,32,
+32,104,97,115,32,116,104,101,117,110,99,108,101,97,114,69,118,101,110,116,39,44,
+98,111,116,104,32,105,110,110,111,116,32,97,108,108,10,10,60,33,45,45,32,112,108
+,97,99,105,110,103,104,97,114,100,32,116,111,32,99,101,110,116,101,114,115,111,
+114,116,32,111,102,99,108,105,101,110,116,115,115,116,114,101,101,116,115,66,101
+,114,110,97,114,100,97,115,115,101,114,116,115,116,101,110,100,32,116,111,102,97
+,110,116,97,115,121,100,111,119,110,32,105,110,104,97,114,98,111,117,114,70,114,
+101,101,100,111,109,106,101,119,101,108,114,121,47,97,98,111,117,116,46,46,115,
+101,97,114,99,104,108,101,103,101,110,100,115,105,115,32,109,97,100,101,109,111,
+100,101,114,110,32,111,110,108,121,32,111,110,111,110,108,121,32,116,111,105,109
+,97,103,101,34,32,108,105,110,101,97,114,32,112,97,105,110,116,101,114,97,110,
+100,32,110,111,116,114,97,114,101,108,121,32,97,99,114,111,110,121,109,100,101,
+108,105,118,101,114,115,104,111,114,116,101,114,48,48,38,97,109,112,59,97,115,32
+,109,97,110,121,119,105,100,116,104,61,34,47,42,32,60,33,91,67,116,105,116,108,
+101,32,61,111,102,32,116,104,101,32,108,111,119,101,115,116,32,112,105,99,107,
+101,100,32,101,115,99,97,112,101,100,117,115,101,115,32,111,102,112,101,111,112,
+108,101,115,32,80,117,98,108,105,99,77,97,116,116,104,101,119,116,97,99,116,105,
+99,115,100,97,109,97,103,101,100,119,97,121,32,102,111,114,108,97,119,115,32,111
+,102,101,97,115,121,32,116,111,32,119,105,110,100,111,119,115,116,114,111,110,
+103,32,32,115,105,109,112,108,101,125,99,97,116,99,104,40,115,101,118,101,110,
+116,104,105,110,102,111,98,111,120,119,101,110,116,32,116,111,112,97,105,110,116
+,101,100,99,105,116,105,122,101,110,73,32,100,111,110,39,116,114,101,116,114,101
+,97,116,46,32,83,111,109,101,32,119,119,46,34,41,59,10,98,111,109,98,105,110,103
+,109,97,105,108,116,111,58,109,97,100,101,32,105,110,46,32,77,97,110,121,32,99,
+97,114,114,105,101,115,124,124,123,125,59,119,105,119,111,114,107,32,111,102,115
+,121,110,111,110,121,109,100,101,102,101,97,116,115,102,97,118,111,114,101,100,
+111,112,116,105,99,97,108,112,97,103,101,84,114,97,117,110,108,101,115,115,32,
+115,101,110,100,105,110,103,108,101,102,116,34,62,60,99,111,109,83,99,111,114,65
+,108,108,32,116,104,101,106,81,117,101,114,121,46,116,111,117,114,105,115,116,67
+,108,97,115,115,105,99,102,97,108,115,101,34,32,87,105,108,104,101,108,109,115,
+117,98,117,114,98,115,103,101,110,117,105,110,101,98,105,115,104,111,112,115,46,
+115,112,108,105,116,40,103,108,111,98,97,108,32,102,111,108,108,111,119,115,98,
+111,100,121,32,111,102,110,111,109,105,110,97,108,67,111,110,116,97,99,116,115,
+101,99,117,108,97,114,108,101,102,116,32,116,111,99,104,105,101,102,108,121,45,
+104,105,100,100,101,110,45,98,97,110,110,101,114,60,47,108,105,62,10,10,46,32,87
+,104,101,110,32,105,110,32,98,111,116,104,100,105,115,109,105,115,115,69,120,112
+,108,111,114,101,97,108,119,97,121,115,32,118,105,97,32,116,104,101,115,112,97,
+195,177,111,108,119,101,108,102,97,114,101,114,117,108,105,110,103,32,97,114,114
+,97,110,103,101,99,97,112,116,97,105,110,104,105,115,32,115,111,110,114,117,108,
+101,32,111,102,104,101,32,116,111,111,107,105,116,115,101,108,102,44,61,48,38,97
+,109,112,59,40,99,97,108,108,101,100,115,97,109,112,108,101,115,116,111,32,109,
+97,107,101,99,111,109,47,112,97,103,77,97,114,116,105,110,32,75,101,110,110,101,
+100,121,97,99,99,101,112,116,115,102,117,108,108,32,111,102,104,97,110,100,108,
+101,100,66,101,115,105,100,101,115,47,47,45,45,62,60,47,97,98,108,101,32,116,111
+,116,97,114,103,101,116,115,101,115,115,101,110,99,101,104,105,109,32,116,111,32
+,105,116,115,32,98,121,32,99,111,109,109,111,110,46,109,105,110,101,114,97,108,
+116,111,32,116,97,107,101,119,97,121,115,32,116,111,115,46,111,114,103,47,108,97
+,100,118,105,115,101,100,112,101,110,97,108,116,121,115,105,109,112,108,101,58,
+105,102,32,116,104,101,121,76,101,116,116,101,114,115,97,32,115,104,111,114,116,
+72,101,114,98,101,114,116,115,116,114,105,107,101,115,32,103,114,111,117,112,115
+,46,108,101,110,103,116,104,102,108,105,103,104,116,115,111,118,101,114,108,97,
+112,115,108,111,119,108,121,32,108,101,115,115,101,114,32,115,111,99,105,97,108,
+32,60,47,112,62,10,9,9,105,116,32,105,110,116,111,114,97,110,107,101,100,32,114,
+97,116,101,32,111,102,117,108,62,13,10,32,32,97,116,116,101,109,112,116,112,97,
+105,114,32,111,102,109,97,107,101,32,105,116,75,111,110,116,97,107,116,65,110,
+116,111,110,105,111,104,97,118,105,110,103,32,114,97,116,105,110,103,115,32,97,
+99,116,105,118,101,115,116,114,101,97,109,115,116,114,97,112,112,101,100,34,41,
+46,99,115,115,40,104,111,115,116,105,108,101,108,101,97,100,32,116,111,108,105,
+116,116,108,101,32,103,114,111,117,112,115,44,80,105,99,116,117,114,101,45,45,62
+,13,10,13,10,32,114,111,119,115,61,34,32,111,98,106,101,99,116,105,110,118,101,
+114,115,101,60,102,111,111,116,101,114,67,117,115,116,111,109,86,62,60,92,47,115
+,99,114,115,111,108,118,105,110,103,67,104,97,109,98,101,114,115,108,97,118,101,
+114,121,119,111,117,110,100,101,100,119,104,101,114,101,97,115,33,61,32,39,117,
+110,100,102,111,114,32,97,108,108,112,97,114,116,108,121,32,45,114,105,103,104,
+116,58,65,114,97,98,105,97,110,98,97,99,107,101,100,32,99,101,110,116,117,114,
+121,117,110,105,116,32,111,102,109,111,98,105,108,101,45,69,117,114,111,112,101,
+44,105,115,32,104,111,109,101,114,105,115,107,32,111,102,100,101,115,105,114,101
+,100,67,108,105,110,116,111,110,99,111,115,116,32,111,102,97,103,101,32,111,102,
+32,98,101,99,111,109,101,32,110,111,110,101,32,111,102,112,38,113,117,111,116,59
+,77,105,100,100,108,101,32,101,97,100,39,41,91,48,67,114,105,116,105,99,115,115,
+116,117,100,105,111,115,62,38,99,111,112,121,59,103,114,111,117,112,34,62,97,115
+,115,101,109,98,108,109,97,107,105,110,103,32,112,114,101,115,115,101,100,119,
+105,100,103,101,116,46,112,115,58,34,32,63,32,114,101,98,117,105,108,116,98,121,
+32,115,111,109,101,70,111,114,109,101,114,32,101,100,105,116,111,114,115,100,101
+,108,97,121,101,100,67,97,110,111,110,105,99,104,97,100,32,116,104,101,112,117,
+115,104,105,110,103,99,108,97,115,115,61,34,98,117,116,32,97,114,101,112,97,114,
+116,105,97,108,66,97,98,121,108,111,110,98,111,116,116,111,109,32,99,97,114,114,
+105,101,114,67,111,109,109,97,110,100,105,116,115,32,117,115,101,65,115,32,119,
+105,116,104,99,111,117,114,115,101,115,97,32,116,104,105,114,100,100,101,110,111
+,116,101,115,97,108,115,111,32,105,110,72,111,117,115,116,111,110,50,48,112,120,
+59,34,62,97,99,99,117,115,101,100,100,111,117,98,108,101,32,103,111,97,108,32,
+111,102,70,97,109,111,117,115,32,41,46,98,105,110,100,40,112,114,105,101,115,116
+,115,32,79,110,108,105,110,101,105,110,32,74,117,108,121,115,116,32,43,32,34,103
+,99,111,110,115,117,108,116,100,101,99,105,109,97,108,104,101,108,112,102,117,
+108,114,101,118,105,118,101,100,105,115,32,118,101,114,121,114,39,43,39,105,112,
+116,108,111,115,105,110,103,32,102,101,109,97,108,101,115,105,115,32,97,108,115,
+111,115,116,114,105,110,103,115,100,97,121,115,32,111,102,97,114,114,105,118,97,
+108,102,117,116,117,114,101,32,60,111,98,106,101,99,116,102,111,114,99,105,110,
+103,83,116,114,105,110,103,40,34,32,47,62,10,9,9,104,101,114,101,32,105,115,101,
+110,99,111,100,101,100,46,32,32,84,104,101,32,98,97,108,108,111,111,110,100,111,
+110,101,32,98,121,47,99,111,109,109,111,110,98,103,99,111,108,111,114,108,97,119
+,32,111,102,32,73,110,100,105,97,110,97,97,118,111,105,100,101,100,98,117,116,32
+,116,104,101,50,112,120,32,51,112,120,106,113,117,101,114,121,46,97,102,116,101,
+114,32,97,112,111,108,105,99,121,46,109,101,110,32,97,110,100,102,111,111,116,
+101,114,45,61,32,116,114,117,101,59,102,111,114,32,117,115,101,115,99,114,101,
+101,110,46,73,110,100,105,97,110,32,105,109,97,103,101,32,61,102,97,109,105,108,
+121,44,104,116,116,112,58,47,47,32,38,110,98,115,112,59,100,114,105,118,101,114,
+115,101,116,101,114,110,97,108,115,97,109,101,32,97,115,110,111,116,105,99,101,
+100,118,105,101,119,101,114,115,125,41,40,41,59,10,32,105,115,32,109,111,114,101
+,115,101,97,115,111,110,115,102,111,114,109,101,114,32,116,104,101,32,110,101,
+119,105,115,32,106,117,115,116,99,111,110,115,101,110,116,32,83,101,97,114,99,
+104,119,97,115,32,116,104,101,119,104,121,32,116,104,101,115,104,105,112,112,101
+,100,98,114,62,60,98,114,62,119,105,100,116,104,58,32,104,101,105,103,104,116,61
+,109,97,100,101,32,111,102,99,117,105,115,105,110,101,105,115,32,116,104,97,116,
+97,32,118,101,114,121,32,65,100,109,105,114,97,108,32,102,105,120,101,100,59,110
+,111,114,109,97,108,32,77,105,115,115,105,111,110,80,114,101,115,115,44,32,111,
+110,116,97,114,105,111,99,104,97,114,115,101,116,116,114,121,32,116,111,32,105,
+110,118,97,100,101,100,61,34,116,114,117,101,34,115,112,97,99,105,110,103,105,
+115,32,109,111,115,116,97,32,109,111,114,101,32,116,111,116,97,108,108,121,102,
+97,108,108,32,111,102,125,41,59,13,10,32,32,105,109,109,101,110,115,101,116,105,
+109,101,32,105,110,115,101,116,32,111,117,116,115,97,116,105,115,102,121,116,111
+,32,102,105,110,100,100,111,119,110,32,116,111,108,111,116,32,111,102,32,80,108,
+97,121,101,114,115,105,110,32,74,117,110,101,113,117,97,110,116,117,109,110,111,
+116,32,116,104,101,116,105,109,101,32,116,111,100,105,115,116,97,110,116,70,105,
+110,110,105,115,104,115,114,99,32,61,32,40,115,105,110,103,108,101,32,104,101,
+108,112,32,111,102,71,101,114,109,97,110,32,108,97,119,32,97,110,100,108,97,98,
+101,108,101,100,102,111,114,101,115,116,115,99,111,111,107,105,110,103,115,112,
+97,99,101,34,62,104,101,97,100,101,114,45,119,101,108,108,32,97,115,83,116,97,
+110,108,101,121,98,114,105,100,103,101,115,47,103,108,111,98,97,108,67,114,111,
+97,116,105,97,32,65,98,111,117,116,32,91,48,93,59,10,32,32,105,116,44,32,97,110,
+100,103,114,111,117,112,101,100,98,101,105,110,103,32,97,41,123,116,104,114,111,
+119,104,101,32,109,97,100,101,108,105,103,104,116,101,114,101,116,104,105,99,97,
+108,70,70,70,70,70,70,34,98,111,116,116,111,109,34,108,105,107,101,32,97,32,101,
+109,112,108,111,121,115,108,105,118,101,32,105,110,97,115,32,115,101,101,110,112
+,114,105,110,116,101,114,109,111,115,116,32,111,102,117,98,45,108,105,110,107,
+114,101,106,101,99,116,115,97,110,100,32,117,115,101,105,109,97,103,101,34,62,
+115,117,99,99,101,101,100,102,101,101,100,105,110,103,78,117,99,108,101,97,114,
+105,110,102,111,114,109,97,116,111,32,104,101,108,112,87,111,109,101,110,39,115,
+78,101,105,116,104,101,114,77,101,120,105,99,97,110,112,114,111,116,101,105,110,
+60,116,97,98,108,101,32,98,121,32,109,97,110,121,104,101,97,108,116,104,121,108,
+97,119,115,117,105,116,100,101,118,105,115,101,100,46,112,117,115,104,40,123,115
+,101,108,108,101,114,115,115,105,109,112,108,121,32,84,104,114,111,117,103,104,
+46,99,111,111,107,105,101,32,73,109,97,103,101,40,111,108,100,101,114,34,62,117,
+115,46,106,115,34,62,32,83,105,110,99,101,32,117,110,105,118,101,114,115,108,97,
+114,103,101,114,32,111,112,101,110,32,116,111,33,45,45,32,101,110,100,108,105,
+101,115,32,105,110,39,93,41,59,13,10,32,32,109,97,114,107,101,116,119,104,111,32
+,105,115,32,40,34,68,79,77,67,111,109,97,110,97,103,101,100,111,110,101,32,102,
+111,114,116,121,112,101,111,102,32,75,105,110,103,100,111,109,112,114,111,102,
+105,116,115,112,114,111,112,111,115,101,116,111,32,115,104,111,119,99,101,110,
+116,101,114,59,109,97,100,101,32,105,116,100,114,101,115,115,101,100,119,101,114
+,101,32,105,110,109,105,120,116,117,114,101,112,114,101,99,105,115,101,97,114,
+105,115,105,110,103,115,114,99,32,61,32,39,109,97,107,101,32,97,32,115,101,99,
+117,114,101,100,66,97,112,116,105,115,116,118,111,116,105,110,103,32,10,9,9,118,
+97,114,32,77,97,114,99,104,32,50,103,114,101,119,32,117,112,67,108,105,109,97,
+116,101,46,114,101,109,111,118,101,115,107,105,108,108,101,100,119,97,121,32,116
+,104,101,60,47,104,101,97,100,62,102,97,99,101,32,111,102,97,99,116,105,110,103,
+32,114,105,103,104,116,34,62,116,111,32,119,111,114,107,114,101,100,117,99,101,
+115,104,97,115,32,104,97,100,101,114,101,99,116,101,100,115,104,111,119,40,41,59
+,97,99,116,105,111,110,61,98,111,111,107,32,111,102,97,110,32,97,114,101,97,61,
+61,32,34,104,116,116,60,104,101,97,100,101,114,10,60,104,116,109,108,62,99,111,
+110,102,111,114,109,102,97,99,105,110,103,32,99,111,111,107,105,101,46,114,101,
+108,121,32,111,110,104,111,115,116,101,100,32,46,99,117,115,116,111,109,104,101,
+32,119,101,110,116,98,117,116,32,102,111,114,115,112,114,101,97,100,32,70,97,109
+,105,108,121,32,97,32,109,101,97,110,115,111,117,116,32,116,104,101,102,111,114,
+117,109,115,46,102,111,111,116,97,103,101,34,62,77,111,98,105,108,67,108,101,109
+,101,110,116,115,34,32,105,100,61,34,97,115,32,104,105,103,104,105,110,116,101,
+110,115,101,45,45,62,60,33,45,45,102,101,109,97,108,101,32,105,115,32,115,101,
+101,110,105,109,112,108,105,101,100,115,101,116,32,116,104,101,97,32,115,116,97,
+116,101,97,110,100,32,104,105,115,102,97,115,116,101,115,116,98,101,115,105,100,
+101,115,98,117,116,116,111,110,95,98,111,117,110,100,101,100,34,62,60,105,109,
+103,32,73,110,102,111,98,111,120,101,118,101,110,116,115,44,97,32,121,111,117,
+110,103,97,110,100,32,97,114,101,78,97,116,105,118,101,32,99,104,101,97,112,101,
+114,84,105,109,101,111,117,116,97,110,100,32,104,97,115,101,110,103,105,110,101,
+115,119,111,110,32,116,104,101,40,109,111,115,116,108,121,114,105,103,104,116,58
+,32,102,105,110,100,32,97,32,45,98,111,116,116,111,109,80,114,105,110,99,101,32,
+97,114,101,97,32,111,102,109,111,114,101,32,111,102,115,101,97,114,99,104,95,110
+,97,116,117,114,101,44,108,101,103,97,108,108,121,112,101,114,105,111,100,44,108
+,97,110,100,32,111,102,111,114,32,119,105,116,104,105,110,100,117,99,101,100,112
+,114,111,118,105,110,103,109,105,115,115,105,108,101,108,111,99,97,108,108,121,
+65,103,97,105,110,115,116,116,104,101,32,119,97,121,107,38,113,117,111,116,59,
+112,120,59,34,62,13,10,112,117,115,104,101,100,32,97,98,97,110,100,111,110,110,
+117,109,101,114,97,108,67,101,114,116,97,105,110,73,110,32,116,104,105,115,109,
+111,114,101,32,105,110,111,114,32,115,111,109,101,110,97,109,101,32,105,115,97,
+110,100,44,32,105,110,99,114,111,119,110,101,100,73,83,66,78,32,48,45,99,114,101
+,97,116,101,115,79,99,116,111,98,101,114,109,97,121,32,110,111,116,99,101,110,
+116,101,114,32,108,97,116,101,32,105,110,68,101,102,101,110,99,101,101,110,97,99
+,116,101,100,119,105,115,104,32,116,111,98,114,111,97,100,108,121,99,111,111,108
+,105,110,103,111,110,108,111,97,100,61,105,116,46,32,84,104,101,114,101,99,111,
+118,101,114,77,101,109,98,101,114,115,104,101,105,103,104,116,32,97,115,115,117,
+109,101,115,60,104,116,109,108,62,10,112,101,111,112,108,101,46,105,110,32,111,
+110,101,32,61,119,105,110,100,111,119,102,111,111,116,101,114,95,97,32,103,111,
+111,100,32,114,101,107,108,97,109,97,111,116,104,101,114,115,44,116,111,32,116,
+104,105,115,95,99,111,111,107,105,101,112,97,110,101,108,34,62,76,111,110,100,
+111,110,44,100,101,102,105,110,101,115,99,114,117,115,104,101,100,98,97,112,116,
+105,115,109,99,111,97,115,116,97,108,115,116,97,116,117,115,32,116,105,116,108,
+101,34,32,109,111,118,101,32,116,111,108,111,115,116,32,105,110,98,101,116,116,
+101,114,32,105,109,112,108,105,101,115,114,105,118,97,108,114,121,115,101,114,
+118,101,114,115,32,83,121,115,116,101,109,80,101,114,104,97,112,115,101,115,32,
+97,110,100,32,99,111,110,116,101,110,100,102,108,111,119,105,110,103,108,97,115,
+116,101,100,32,114,105,115,101,32,105,110,71,101,110,101,115,105,115,118,105,101
+,119,32,111,102,114,105,115,105,110,103,32,115,101,101,109,32,116,111,98,117,116
+,32,105,110,32,98,97,99,107,105,110,103,104,101,32,119,105,108,108,103,105,118,
+101,110,32,97,103,105,118,105,110,103,32,99,105,116,105,101,115,46,102,108,111,
+119,32,111,102,32,76,97,116,101,114,32,97,108,108,32,98,117,116,72,105,103,104,
+119,97,121,111,110,108,121,32,98,121,115,105,103,110,32,111,102,104,101,32,100,
+111,101,115,100,105,102,102,101,114,115,98,97,116,116,101,114,121,38,97,109,112,
+59,108,97,115,105,110,103,108,101,115,116,104,114,101,97,116,115,105,110,116,101
+,103,101,114,116,97,107,101,32,111,110,114,101,102,117,115,101,100,99,97,108,108
+,101,100,32,61,85,83,38,97,109,112,83,101,101,32,116,104,101,110,97,116,105,118,
+101,115,98,121,32,116,104,105,115,115,121,115,116,101,109,46,104,101,97,100,32,
+111,102,58,104,111,118,101,114,44,108,101,115,98,105,97,110,115,117,114,110,97,
+109,101,97,110,100,32,97,108,108,99,111,109,109,111,110,47,104,101,97,100,101,
+114,95,95,112,97,114,97,109,115,72,97,114,118,97,114,100,47,112,105,120,101,108,
+46,114,101,109,111,118,97,108,115,111,32,108,111,110,103,114,111,108,101,32,111,
+102,106,111,105,110,116,108,121,115,107,121,115,99,114,97,85,110,105,99,111,100,
+101,98,114,32,47,62,13,10,65,116,108,97,110,116,97,110,117,99,108,101,117,115,67
+,111,117,110,116,121,44,112,117,114,101,108,121,32,99,111,117,110,116,34,62,101,
+97,115,105,108,121,32,98,117,105,108,100,32,97,111,110,99,108,105,99,107,97,32,
+103,105,118,101,110,112,111,105,110,116,101,114,104,38,113,117,111,116,59,101,
+118,101,110,116,115,32,101,108,115,101,32,123,10,100,105,116,105,111,110,115,110
+,111,119,32,116,104,101,44,32,119,105,116,104,32,109,97,110,32,119,104,111,111,
+114,103,47,87,101,98,111,110,101,32,97,110,100,99,97,118,97,108,114,121,72,101,
+32,100,105,101,100,115,101,97,116,116,108,101,48,48,44,48,48,48,32,123,119,105,
+110,100,111,119,104,97,118,101,32,116,111,105,102,40,119,105,110,100,97,110,100,
+32,105,116,115,115,111,108,101,108,121,32,109,38,113,117,111,116,59,114,101,110,
+101,119,101,100,68,101,116,114,111,105,116,97,109,111,110,103,115,116,101,105,
+116,104,101,114,32,116,104,101,109,32,105,110,83,101,110,97,116,111,114,85,115,
+60,47,97,62,60,75,105,110,103,32,111,102,70,114,97,110,99,105,115,45,112,114,111
+,100,117,99,104,101,32,117,115,101,100,97,114,116,32,97,110,100,104,105,109,32,
+97,110,100,117,115,101,100,32,98,121,115,99,111,114,105,110,103,97,116,32,104,
+111,109,101,116,111,32,104,97,118,101,114,101,108,97,116,101,115,105,98,105,108,
+105,116,121,102,97,99,116,105,111,110,66,117,102,102,97,108,111,108,105,110,107,
+34,62,60,119,104,97,116,32,104,101,102,114,101,101,32,116,111,67,105,116,121,32,
+111,102,99,111,109,101,32,105,110,115,101,99,116,111,114,115,99,111,117,110,116,
+101,100,111,110,101,32,100,97,121,110,101,114,118,111,117,115,115,113,117,97,114
+,101,32,125,59,105,102,40,103,111,105,110,32,119,104,97,116,105,109,103,34,32,97
+,108,105,115,32,111,110,108,121,115,101,97,114,99,104,47,116,117,101,115,100,97,
+121,108,111,111,115,101,108,121,83,111,108,111,109,111,110,115,101,120,117,97,
+108,32,45,32,60,97,32,104,114,109,101,100,105,117,109,34,68,79,32,78,79,84,32,70
+,114,97,110,99,101,44,119,105,116,104,32,97,32,119,97,114,32,97,110,100,115,101,
+99,111,110,100,32,116,97,107,101,32,97,32,62,13,10,13,10,13,10,109,97,114,107,
+101,116,46,104,105,103,104,119,97,121,100,111,110,101,32,105,110,99,116,105,118,
+105,116,121,34,108,97,115,116,34,62,111,98,108,105,103,101,100,114,105,115,101,
+32,116,111,34,117,110,100,101,102,105,109,97,100,101,32,116,111,32,69,97,114,108
+,121,32,112,114,97,105,115,101,100,105,110,32,105,116,115,32,102,111,114,32,104,
+105,115,97,116,104,108,101,116,101,74,117,112,105,116,101,114,89,97,104,111,111,
+33,32,116,101,114,109,101,100,32,115,111,32,109,97,110,121,114,101,97,108,108,
+121,32,115,46,32,84,104,101,32,97,32,119,111,109,97,110,63,118,97,108,117,101,61
+,100,105,114,101,99,116,32,114,105,103,104,116,34,32,98,105,99,121,99,108,101,97
+,99,105,110,103,61,34,100,97,121,32,97,110,100,115,116,97,116,105,110,103,82,97,
+116,104,101,114,44,104,105,103,104,101,114,32,79,102,102,105,99,101,32,97,114,
+101,32,110,111,119,116,105,109,101,115,44,32,119,104,101,110,32,97,32,112,97,121
+,32,102,111,114,111,110,32,116,104,105,115,45,108,105,110,107,34,62,59,98,111,
+114,100,101,114,97,114,111,117,110,100,32,97,110,110,117,97,108,32,116,104,101,
+32,78,101,119,112,117,116,32,116,104,101,46,99,111,109,34,32,116,97,107,105,110,
+32,116,111,97,32,98,114,105,101,102,40,105,110,32,116,104,101,103,114,111,117,
+112,115,46,59,32,119,105,100,116,104,101,110,122,121,109,101,115,115,105,109,112
+,108,101,32,105,110,32,108,97,116,101,123,114,101,116,117,114,110,116,104,101,
+114,97,112,121,97,32,112,111,105,110,116,98,97,110,110,105,110,103,105,110,107,
+115,34,62,10,40,41,59,34,32,114,101,97,32,112,108,97,99,101,92,117,48,48,51,67,
+97,97,98,111,117,116,32,97,116,114,62,13,10,9,9,99,99,111,117,110,116,32,103,105
+,118,101,115,32,97,60,83,67,82,73,80,84,82,97,105,108,119,97,121,116,104,101,109
+,101,115,47,116,111,111,108,98,111,120,66,121,73,100,40,34,120,104,117,109,97,
+110,115,44,119,97,116,99,104,101,115,105,110,32,115,111,109,101,32,105,102,32,40
+,119,105,99,111,109,105,110,103,32,102,111,114,109,97,116,115,32,85,110,100,101,
+114,32,98,117,116,32,104,97,115,104,97,110,100,101,100,32,109,97,100,101,32,98,
+121,116,104,97,110,32,105,110,102,101,97,114,32,111,102,100,101,110,111,116,101,
+100,47,105,102,114,97,109,101,108,101,102,116,32,105,110,118,111,108,116,97,103,
+101,105,110,32,101,97,99,104,97,38,113,117,111,116,59,98,97,115,101,32,111,102,
+73,110,32,109,97,110,121,117,110,100,101,114,103,111,114,101,103,105,109,101,115
+,97,99,116,105,111,110,32,60,47,112,62,13,10,60,117,115,116,111,109,86,97,59,38,
+103,116,59,60,47,105,109,112,111,114,116,115,111,114,32,116,104,97,116,109,111,
+115,116,108,121,32,38,97,109,112,59,114,101,32,115,105,122,101,61,34,60,47,97,62
+,60,47,104,97,32,99,108,97,115,115,112,97,115,115,105,118,101,72,111,115,116,32,
+61,32,87,104,101,116,104,101,114,102,101,114,116,105,108,101,86,97,114,105,111,
+117,115,61,91,93,59,40,102,117,99,97,109,101,114,97,115,47,62,60,47,116,100,62,
+97,99,116,115,32,97,115,73,110,32,115,111,109,101,62,13,10,13,10,60,33,111,114,
+103,97,110,105,115,32,60,98,114,32,47,62,66,101,105,106,105,110,103,99,97,116,97
+,108,195,160,100,101,117,116,115,99,104,101,117,114,111,112,101,117,101,117,115,
+107,97,114,97,103,97,101,105,108,103,101,115,118,101,110,115,107,97,101,115,112,
+97,195,177,97,109,101,110,115,97,106,101,117,115,117,97,114,105,111,116,114,97,
+98,97,106,111,109,195,169,120,105,99,111,112,195,161,103,105,110,97,115,105,101,
+109,112,114,101,115,105,115,116,101,109,97,111,99,116,117,98,114,101,100,117,114
+,97,110,116,101,97,195,177,97,100,105,114,101,109,112,114,101,115,97,109,111,109
+,101,110,116,111,110,117,101,115,116,114,111,112,114,105,109,101,114,97,116,114,
+97,118,195,169,115,103,114,97,99,105,97,115,110,117,101,115,116,114,97,112,114,
+111,99,101,115,111,101,115,116,97,100,111,115,99,97,108,105,100,97,100,112,101,
+114,115,111,110,97,110,195,186,109,101,114,111,97,99,117,101,114,100,111,109,195
+,186,115,105,99,97,109,105,101,109,98,114,111,111,102,101,114,116,97,115,97,108,
+103,117,110,111,115,112,97,195,173,115,101,115,101,106,101,109,112,108,111,100,
+101,114,101,99,104,111,97,100,101,109,195,161,115,112,114,105,118,97,100,111,97,
+103,114,101,103,97,114,101,110,108,97,99,101,115,112,111,115,105,98,108,101,104,
+111,116,101,108,101,115,115,101,118,105,108,108,97,112,114,105,109,101,114,111,
+195,186,108,116,105,109,111,101,118,101,110,116,111,115,97,114,99,104,105,118,
+111,99,117,108,116,117,114,97,109,117,106,101,114,101,115,101,110,116,114,97,100
+,97,97,110,117,110,99,105,111,101,109,98,97,114,103,111,109,101,114,99,97,100,
+111,103,114,97,110,100,101,115,101,115,116,117,100,105,111,109,101,106,111,114,
+101,115,102,101,98,114,101,114,111,100,105,115,101,195,177,111,116,117,114,105,
+115,109,111,99,195,179,100,105,103,111,112,111,114,116,97,100,97,101,115,112,97,
+99,105,111,102,97,109,105,108,105,97,97,110,116,111,110,105,111,112,101,114,109,
+105,116,101,103,117,97,114,100,97,114,97,108,103,117,110,97,115,112,114,101,99,
+105,111,115,97,108,103,117,105,101,110,115,101,110,116,105,100,111,118,105,115,
+105,116,97,115,116,195,173,116,117,108,111,99,111,110,111,99,101,114,115,101,103
+,117,110,100,111,99,111,110,115,101,106,111,102,114,97,110,99,105,97,109,105,110
+,117,116,111,115,115,101,103,117,110,100,97,116,101,110,101,109,111,115,101,102,
+101,99,116,111,115,109,195,161,108,97,103,97,115,101,115,105,195,179,110,114,101
+,118,105,115,116,97,103,114,97,110,97,100,97,99,111,109,112,114,97,114,105,110,
+103,114,101,115,111,103,97,114,99,195,173,97,97,99,99,105,195,179,110,101,99,117
+,97,100,111,114,113,117,105,101,110,101,115,105,110,99,108,117,115,111,100,101,
+98,101,114,195,161,109,97,116,101,114,105,97,104,111,109,98,114,101,115,109,117,
+101,115,116,114,97,112,111,100,114,195,173,97,109,97,195,177,97,110,97,195,186,
+108,116,105,109,97,101,115,116,97,109,111,115,111,102,105,99,105,97,108,116,97,
+109,98,105,101,110,110,105,110,103,195,186,110,115,97,108,117,100,111,115,112,
+111,100,101,109,111,115,109,101,106,111,114,97,114,112,111,115,105,116,105,111,
+110,98,117,115,105,110,101,115,115,104,111,109,101,112,97,103,101,115,101,99,117
+,114,105,116,121,108,97,110,103,117,97,103,101,115,116,97,110,100,97,114,100,99,
+97,109,112,97,105,103,110,102,101,97,116,117,114,101,115,99,97,116,101,103,111,
+114,121,101,120,116,101,114,110,97,108,99,104,105,108,100,114,101,110,114,101,
+115,101,114,118,101,100,114,101,115,101,97,114,99,104,101,120,99,104,97,110,103,
+101,102,97,118,111,114,105,116,101,116,101,109,112,108,97,116,101,109,105,108,
+105,116,97,114,121,105,110,100,117,115,116,114,121,115,101,114,118,105,99,101,
+115,109,97,116,101,114,105,97,108,112,114,111,100,117,99,116,115,122,45,105,110,
+100,101,120,58,99,111,109,109,101,110,116,115,115,111,102,116,119,97,114,101,99,
+111,109,112,108,101,116,101,99,97,108,101,110,100,97,114,112,108,97,116,102,111,
+114,109,97,114,116,105,99,108,101,115,114,101,113,117,105,114,101,100,109,111,
+118,101,109,101,110,116,113,117,101,115,116,105,111,110,98,117,105,108,100,105,
+110,103,112,111,108,105,116,105,99,115,112,111,115,115,105,98,108,101,114,101,
+108,105,103,105,111,110,112,104,121,115,105,99,97,108,102,101,101,100,98,97,99,
+107,114,101,103,105,115,116,101,114,112,105,99,116,117,114,101,115,100,105,115,
+97,98,108,101,100,112,114,111,116,111,99,111,108,97,117,100,105,101,110,99,101,
+115,101,116,116,105,110,103,115,97,99,116,105,118,105,116,121,101,108,101,109,
+101,110,116,115,108,101,97,114,110,105,110,103,97,110,121,116,104,105,110,103,97
+,98,115,116,114,97,99,116,112,114,111,103,114,101,115,115,111,118,101,114,118,
+105,101,119,109,97,103,97,122,105,110,101,101,99,111,110,111,109,105,99,116,114,
+97,105,110,105,110,103,112,114,101,115,115,117,114,101,118,97,114,105,111,117,
+115,32,60,115,116,114,111,110,103,62,112,114,111,112,101,114,116,121,115,104,111
+,112,112,105,110,103,116,111,103,101,116,104,101,114,97,100,118,97,110,99,101,
+100,98,101,104,97,118,105,111,114,100,111,119,110,108,111,97,100,102,101,97,116,
+117,114,101,100,102,111,111,116,98,97,108,108,115,101,108,101,99,116,101,100,76,
+97,110,103,117,97,103,101,100,105,115,116,97,110,99,101,114,101,109,101,109,98,
+101,114,116,114,97,99,107,105,110,103,112,97,115,115,119,111,114,100,109,111,100
+,105,102,105,101,100,115,116,117,100,101,110,116,115,100,105,114,101,99,116,108,
+121,102,105,103,104,116,105,110,103,110,111,114,116,104,101,114,110,100,97,116,
+97,98,97,115,101,102,101,115,116,105,118,97,108,98,114,101,97,107,105,110,103,
+108,111,99,97,116,105,111,110,105,110,116,101,114,110,101,116,100,114,111,112,
+100,111,119,110,112,114,97,99,116,105,99,101,101,118,105,100,101,110,99,101,102,
+117,110,99,116,105,111,110,109,97,114,114,105,97,103,101,114,101,115,112,111,110
+,115,101,112,114,111,98,108,101,109,115,110,101,103,97,116,105,118,101,112,114,
+111,103,114,97,109,115,97,110,97,108,121,115,105,115,114,101,108,101,97,115,101,
+100,98,97,110,110,101,114,34,62,112,117,114,99,104,97,115,101,112,111,108,105,99
+,105,101,115,114,101,103,105,111,110,97,108,99,114,101,97,116,105,118,101,97,114
+,103,117,109,101,110,116,98,111,111,107,109,97,114,107,114,101,102,101,114,114,
+101,114,99,104,101,109,105,99,97,108,100,105,118,105,115,105,111,110,99,97,108,
+108,98,97,99,107,115,101,112,97,114,97,116,101,112,114,111,106,101,99,116,115,99
+,111,110,102,108,105,99,116,104,97,114,100,119,97,114,101,105,110,116,101,114,
+101,115,116,100,101,108,105,118,101,114,121,109,111,117,110,116,97,105,110,111,
+98,116,97,105,110,101,100,61,32,102,97,108,115,101,59,102,111,114,40,118,97,114,
+32,97,99,99,101,112,116,101,100,99,97,112,97,99,105,116,121,99,111,109,112,117,
+116,101,114,105,100,101,110,116,105,116,121,97,105,114,99,114,97,102,116,101,109
+,112,108,111,121,101,100,112,114,111,112,111,115,101,100,100,111,109,101,115,116
+,105,99,105,110,99,108,117,100,101,115,112,114,111,118,105,100,101,100,104,111,
+115,112,105,116,97,108,118,101,114,116,105,99,97,108,99,111,108,108,97,112,115,
+101,97,112,112,114,111,97,99,104,112,97,114,116,110,101,114,115,108,111,103,111,
+34,62,60,97,100,97,117,103,104,116,101,114,97,117,116,104,111,114,34,32,99,117,
+108,116,117,114,97,108,102,97,109,105,108,105,101,115,47,105,109,97,103,101,115,
+47,97,115,115,101,109,98,108,121,112,111,119,101,114,102,117,108,116,101,97,99,
+104,105,110,103,102,105,110,105,115,104,101,100,100,105,115,116,114,105,99,116,
+99,114,105,116,105,99,97,108,99,103,105,45,98,105,110,47,112,117,114,112,111,115
+,101,115,114,101,113,117,105,114,101,115,101,108,101,99,116,105,111,110,98,101,
+99,111,109,105,110,103,112,114,111,118,105,100,101,115,97,99,97,100,101,109,105,
+99,101,120,101,114,99,105,115,101,97,99,116,117,97,108,108,121,109,101,100,105,
+99,105,110,101,99,111,110,115,116,97,110,116,97,99,99,105,100,101,110,116,77,97,
+103,97,122,105,110,101,100,111,99,117,109,101,110,116,115,116,97,114,116,105,110
+,103,98,111,116,116,111,109,34,62,111,98,115,101,114,118,101,100,58,32,38,113,
+117,111,116,59,101,120,116,101,110,100,101,100,112,114,101,118,105,111,117,115,
+83,111,102,116,119,97,114,101,99,117,115,116,111,109,101,114,100,101,99,105,115,
+105,111,110,115,116,114,101,110,103,116,104,100,101,116,97,105,108,101,100,115,
+108,105,103,104,116,108,121,112,108,97,110,110,105,110,103,116,101,120,116,97,
+114,101,97,99,117,114,114,101,110,99,121,101,118,101,114,121,111,110,101,115,116
+,114,97,105,103,104,116,116,114,97,110,115,102,101,114,112,111,115,105,116,105,
+118,101,112,114,111,100,117,99,101,100,104,101,114,105,116,97,103,101,115,104,
+105,112,112,105,110,103,97,98,115,111,108,117,116,101,114,101,99,101,105,118,101
+,100,114,101,108,101,118,97,110,116,98,117,116,116,111,110,34,32,118,105,111,108
+,101,110,99,101,97,110,121,119,104,101,114,101,98,101,110,101,102,105,116,115,
+108,97,117,110,99,104,101,100,114,101,99,101,110,116,108,121,97,108,108,105,97,
+110,99,101,102,111,108,108,111,119,101,100,109,117,108,116,105,112,108,101,98,
+117,108,108,101,116,105,110,105,110,99,108,117,100,101,100,111,99,99,117,114,114
+,101,100,105,110,116,101,114,110,97,108,36,40,116,104,105,115,41,46,114,101,112,
+117,98,108,105,99,62,60,116,114,62,60,116,100,99,111,110,103,114,101,115,115,114
+,101,99,111,114,100,101,100,117,108,116,105,109,97,116,101,115,111,108,117,116,
+105,111,110,60,117,108,32,105,100,61,34,100,105,115,99,111,118,101,114,72,111,
+109,101,60,47,97,62,119,101,98,115,105,116,101,115,110,101,116,119,111,114,107,
+115,97,108,116,104,111,117,103,104,101,110,116,105,114,101,108,121,109,101,109,
+111,114,105,97,108,109,101,115,115,97,103,101,115,99,111,110,116,105,110,117,101
+,97,99,116,105,118,101,34,62,115,111,109,101,119,104,97,116,118,105,99,116,111,
+114,105,97,87,101,115,116,101,114,110,32,32,116,105,116,108,101,61,34,76,111,99,
+97,116,105,111,110,99,111,110,116,114,97,99,116,118,105,115,105,116,111,114,115,
+68,111,119,110,108,111,97,100,119,105,116,104,111,117,116,32,114,105,103,104,116
+,34,62,10,109,101,97,115,117,114,101,115,119,105,100,116,104,32,61,32,118,97,114
+,105,97,98,108,101,105,110,118,111,108,118,101,100,118,105,114,103,105,110,105,
+97,110,111,114,109,97,108,108,121,104,97,112,112,101,110,101,100,97,99,99,111,
+117,110,116,115,115,116,97,110,100,105,110,103,110,97,116,105,111,110,97,108,82,
+101,103,105,115,116,101,114,112,114,101,112,97,114,101,100,99,111,110,116,114,
+111,108,115,97,99,99,117,114,97,116,101,98,105,114,116,104,100,97,121,115,116,
+114,97,116,101,103,121,111,102,102,105,99,105,97,108,103,114,97,112,104,105,99,
+115,99,114,105,109,105,110,97,108,112,111,115,115,105,98,108,121,99,111,110,115,
+117,109,101,114,80,101,114,115,111,110,97,108,115,112,101,97,107,105,110,103,118
+,97,108,105,100,97,116,101,97,99,104,105,101,118,101,100,46,106,112,103,34,32,47
+,62,109,97,99,104,105,110,101,115,60,47,104,50,62,10,32,32,107,101,121,119,111,
+114,100,115,102,114,105,101,110,100,108,121,98,114,111,116,104,101,114,115,99,
+111,109,98,105,110,101,100,111,114,105,103,105,110,97,108,99,111,109,112,111,115
+,101,100,101,120,112,101,99,116,101,100,97,100,101,113,117,97,116,101,112,97,107
+,105,115,116,97,110,102,111,108,108,111,119,34,32,118,97,108,117,97,98,108,101,
+60,47,108,97,98,101,108,62,114,101,108,97,116,105,118,101,98,114,105,110,103,105
+,110,103,105,110,99,114,101,97,115,101,103,111,118,101,114,110,111,114,112,108,
+117,103,105,110,115,47,76,105,115,116,32,111,102,32,72,101,97,100,101,114,34,62,
+34,32,110,97,109,101,61,34,32,40,38,113,117,111,116,59,103,114,97,100,117,97,116
+,101,60,47,104,101,97,100,62,10,99,111,109,109,101,114,99,101,109,97,108,97,121,
+115,105,97,100,105,114,101,99,116,111,114,109,97,105,110,116,97,105,110,59,104,
+101,105,103,104,116,58,115,99,104,101,100,117,108,101,99,104,97,110,103,105,110,
+103,98,97,99,107,32,116,111,32,99,97,116,104,111,108,105,99,112,97,116,116,101,
+114,110,115,99,111,108,111,114,58,32,35,103,114,101,97,116,101,115,116,115,117,
+112,112,108,105,101,115,114,101,108,105,97,98,108,101,60,47,117,108,62,10,9,9,60
+,115,101,108,101,99,116,32,99,105,116,105,122,101,110,115,99,108,111,116,104,105
+,110,103,119,97,116,99,104,105,110,103,60,108,105,32,105,100,61,34,115,112,101,
+99,105,102,105,99,99,97,114,114,121,105,110,103,115,101,110,116,101,110,99,101,
+60,99,101,110,116,101,114,62,99,111,110,116,114,97,115,116,116,104,105,110,107,
+105,110,103,99,97,116,99,104,40,101,41,115,111,117,116,104,101,114,110,77,105,99
+,104,97,101,108,32,109,101,114,99,104,97,110,116,99,97,114,111,117,115,101,108,
+112,97,100,100,105,110,103,58,105,110,116,101,114,105,111,114,46,115,112,108,105
+,116,40,34,108,105,122,97,116,105,111,110,79,99,116,111,98,101,114,32,41,123,114
+,101,116,117,114,110,105,109,112,114,111,118,101,100,45,45,38,103,116,59,10,10,
+99,111,118,101,114,97,103,101,99,104,97,105,114,109,97,110,46,112,110,103,34,32,
+47,62,115,117,98,106,101,99,116,115,82,105,99,104,97,114,100,32,119,104,97,116,
+101,118,101,114,112,114,111,98,97,98,108,121,114,101,99,111,118,101,114,121,98,
+97,115,101,98,97,108,108,106,117,100,103,109,101,110,116,99,111,110,110,101,99,
+116,46,46,99,115,115,34,32,47,62,32,119,101,98,115,105,116,101,114,101,112,111,
+114,116,101,100,100,101,102,97,117,108,116,34,47,62,60,47,97,62,13,10,101,108,
+101,99,116,114,105,99,115,99,111,116,108,97,110,100,99,114,101,97,116,105,111,
+110,113,117,97,110,116,105,116,121,46,32,73,83,66,78,32,48,100,105,100,32,110,
+111,116,32,105,110,115,116,97,110,99,101,45,115,101,97,114,99,104,45,34,32,108,
+97,110,103,61,34,115,112,101,97,107,101,114,115,67,111,109,112,117,116,101,114,
+99,111,110,116,97,105,110,115,97,114,99,104,105,118,101,115,109,105,110,105,115,
+116,101,114,114,101,97,99,116,105,111,110,100,105,115,99,111,117,110,116,73,116,
+97,108,105,97,110,111,99,114,105,116,101,114,105,97,115,116,114,111,110,103,108,
+121,58,32,39,104,116,116,112,58,39,115,99,114,105,112,116,39,99,111,118,101,114,
+105,110,103,111,102,102,101,114,105,110,103,97,112,112,101,97,114,101,100,66,114
+,105,116,105,115,104,32,105,100,101,110,116,105,102,121,70,97,99,101,98,111,111,
+107,110,117,109,101,114,111,117,115,118,101,104,105,99,108,101,115,99,111,110,99
+,101,114,110,115,65,109,101,114,105,99,97,110,104,97,110,100,108,105,110,103,100
+,105,118,32,105,100,61,34,87,105,108,108,105,97,109,32,112,114,111,118,105,100,
+101,114,95,99,111,110,116,101,110,116,97,99,99,117,114,97,99,121,115,101,99,116,
+105,111,110,32,97,110,100,101,114,115,111,110,102,108,101,120,105,98,108,101,67,
+97,116,101,103,111,114,121,108,97,119,114,101,110,99,101,60,115,99,114,105,112,
+116,62,108,97,121,111,117,116,61,34,97,112,112,114,111,118,101,100,32,109,97,120
+,105,109,117,109,104,101,97,100,101,114,34,62,60,47,116,97,98,108,101,62,83,101,
+114,118,105,99,101,115,104,97,109,105,108,116,111,110,99,117,114,114,101,110,116
+,32,99,97,110,97,100,105,97,110,99,104,97,110,110,101,108,115,47,116,104,101,109
+,101,115,47,47,97,114,116,105,99,108,101,111,112,116,105,111,110,97,108,112,111,
+114,116,117,103,97,108,118,97,108,117,101,61,34,34,105,110,116,101,114,118,97,
+108,119,105,114,101,108,101,115,115,101,110,116,105,116,108,101,100,97,103,101,
+110,99,105,101,115,83,101,97,114,99,104,34,32,109,101,97,115,117,114,101,100,116
+,104,111,117,115,97,110,100,115,112,101,110,100,105,110,103,38,104,101,108,108,
+105,112,59,110,101,119,32,68,97,116,101,34,32,115,105,122,101,61,34,112,97,103,
+101,78,97,109,101,109,105,100,100,108,101,34,32,34,32,47,62,60,47,97,62,104,105,
+100,100,101,110,34,62,115,101,113,117,101,110,99,101,112,101,114,115,111,110,97,
+108,111,118,101,114,102,108,111,119,111,112,105,110,105,111,110,115,105,108,108,
+105,110,111,105,115,108,105,110,107,115,34,62,10,9,60,116,105,116,108,101,62,118
+,101,114,115,105,111,110,115,115,97,116,117,114,100,97,121,116,101,114,109,105,
+110,97,108,105,116,101,109,112,114,111,112,101,110,103,105,110,101,101,114,115,
+101,99,116,105,111,110,115,100,101,115,105,103,110,101,114,112,114,111,112,111,
+115,97,108,61,34,102,97,108,115,101,34,69,115,112,97,195,177,111,108,114,101,108
+,101,97,115,101,115,115,117,98,109,105,116,34,32,101,114,38,113,117,111,116,59,
+97,100,100,105,116,105,111,110,115,121,109,112,116,111,109,115,111,114,105,101,
+110,116,101,100,114,101,115,111,117,114,99,101,114,105,103,104,116,34,62,60,112,
+108,101,97,115,117,114,101,115,116,97,116,105,111,110,115,104,105,115,116,111,
+114,121,46,108,101,97,118,105,110,103,32,32,98,111,114,100,101,114,61,99,111,110
+,116,101,110,116,115,99,101,110,116,101,114,34,62,46,10,10,83,111,109,101,32,100
+,105,114,101,99,116,101,100,115,117,105,116,97,98,108,101,98,117,108,103,97,114,
+105,97,46,115,104,111,119,40,41,59,100,101,115,105,103,110,101,100,71,101,110,
+101,114,97,108,32,99,111,110,99,101,112,116,115,69,120,97,109,112,108,101,115,
+119,105,108,108,105,97,109,115,79,114,105,103,105,110,97,108,34,62,60,115,112,97
+,110,62,115,101,97,114,99,104,34,62,111,112,101,114,97,116,111,114,114,101,113,
+117,101,115,116,115,97,32,38,113,117,111,116,59,97,108,108,111,119,105,110,103,
+68,111,99,117,109,101,110,116,114,101,118,105,115,105,111,110,46,32,10,10,84,104
+,101,32,121,111,117,114,115,101,108,102,67,111,110,116,97,99,116,32,109,105,99,
+104,105,103,97,110,69,110,103,108,105,115,104,32,99,111,108,117,109,98,105,97,
+112,114,105,111,114,105,116,121,112,114,105,110,116,105,110,103,100,114,105,110,
+107,105,110,103,102,97,99,105,108,105,116,121,114,101,116,117,114,110,101,100,67
+,111,110,116,101,110,116,32,111,102,102,105,99,101,114,115,82,117,115,115,105,97
+,110,32,103,101,110,101,114,97,116,101,45,56,56,53,57,45,49,34,105,110,100,105,
+99,97,116,101,102,97,109,105,108,105,97,114,32,113,117,97,108,105,116,121,109,97
+,114,103,105,110,58,48,32,99,111,110,116,101,110,116,118,105,101,119,112,111,114
+,116,99,111,110,116,97,99,116,115,45,116,105,116,108,101,34,62,112,111,114,116,
+97,98,108,101,46,108,101,110,103,116,104,32,101,108,105,103,105,98,108,101,105,
+110,118,111,108,118,101,115,97,116,108,97,110,116,105,99,111,110,108,111,97,100,
+61,34,100,101,102,97,117,108,116,46,115,117,112,112,108,105,101,100,112,97,121,
+109,101,110,116,115,103,108,111,115,115,97,114,121,10,10,65,102,116,101,114,32,
+103,117,105,100,97,110,99,101,60,47,116,100,62,60,116,100,101,110,99,111,100,105
+,110,103,109,105,100,100,108,101,34,62,99,97,109,101,32,116,111,32,100,105,115,
+112,108,97,121,115,115,99,111,116,116,105,115,104,106,111,110,97,116,104,97,110,
+109,97,106,111,114,105,116,121,119,105,100,103,101,116,115,46,99,108,105,110,105
+,99,97,108,116,104,97,105,108,97,110,100,116,101,97,99,104,101,114,115,60,104,
+101,97,100,62,10,9,97,102,102,101,99,116,101,100,115,117,112,112,111,114,116,115
+,112,111,105,110,116,101,114,59,116,111,83,116,114,105,110,103,60,47,115,109,97,
+108,108,62,111,107,108,97,104,111,109,97,119,105,108,108,32,98,101,32,105,110,
+118,101,115,116,111,114,48,34,32,97,108,116,61,34,104,111,108,105,100,97,121,115
+,82,101,115,111,117,114,99,101,108,105,99,101,110,115,101,100,32,40,119,104,105,
+99,104,32,46,32,65,102,116,101,114,32,99,111,110,115,105,100,101,114,118,105,115
+,105,116,105,110,103,101,120,112,108,111,114,101,114,112,114,105,109,97,114,121,
+32,115,101,97,114,99,104,34,32,97,110,100,114,111,105,100,34,113,117,105,99,107,
+108,121,32,109,101,101,116,105,110,103,115,101,115,116,105,109,97,116,101,59,114
+,101,116,117,114,110,32,59,99,111,108,111,114,58,35,32,104,101,105,103,104,116,
+61,97,112,112,114,111,118,97,108,44,32,38,113,117,111,116,59,32,99,104,101,99,
+107,101,100,46,109,105,110,46,106,115,34,109,97,103,110,101,116,105,99,62,60,47,
+97,62,60,47,104,102,111,114,101,99,97,115,116,46,32,87,104,105,108,101,32,116,
+104,117,114,115,100,97,121,100,118,101,114,116,105,115,101,38,101,97,99,117,116,
+101,59,104,97,115,67,108,97,115,115,101,118,97,108,117,97,116,101,111,114,100,
+101,114,105,110,103,101,120,105,115,116,105,110,103,112,97,116,105,101,110,116,
+115,32,79,110,108,105,110,101,32,99,111,108,111,114,97,100,111,79,112,116,105,
+111,110,115,34,99,97,109,112,98,101,108,108,60,33,45,45,32,101,110,100,60,47,115
+,112,97,110,62,60,60,98,114,32,47,62,13,10,95,112,111,112,117,112,115,124,115,99
+,105,101,110,99,101,115,44,38,113,117,111,116,59,32,113,117,97,108,105,116,121,
+32,87,105,110,100,111,119,115,32,97,115,115,105,103,110,101,100,104,101,105,103,
+104,116,58,32,60,98,32,99,108,97,115,115,108,101,38,113,117,111,116,59,32,118,97
+,108,117,101,61,34,32,67,111,109,112,97,110,121,101,120,97,109,112,108,101,115,
+60,105,102,114,97,109,101,32,98,101,108,105,101,118,101,115,112,114,101,115,101,
+110,116,115,109,97,114,115,104,97,108,108,112,97,114,116,32,111,102,32,112,114,
+111,112,101,114,108,121,41,46,10,10,84,104,101,32,116,97,120,111,110,111,109,121
+,109,117,99,104,32,111,102,32,60,47,115,112,97,110,62,10,34,32,100,97,116,97,45,
+115,114,116,117,103,117,195,170,115,115,99,114,111,108,108,84,111,32,112,114,111
+,106,101,99,116,60,104,101,97,100,62,13,10,97,116,116,111,114,110,101,121,101,
+109,112,104,97,115,105,115,115,112,111,110,115,111,114,115,102,97,110,99,121,98,
+111,120,119,111,114,108,100,39,115,32,119,105,108,100,108,105,102,101,99,104,101
+,99,107,101,100,61,115,101,115,115,105,111,110,115,112,114,111,103,114,97,109,
+109,112,120,59,102,111,110,116,45,32,80,114,111,106,101,99,116,106,111,117,114,
+110,97,108,115,98,101,108,105,101,118,101,100,118,97,99,97,116,105,111,110,116,
+104,111,109,112,115,111,110,108,105,103,104,116,105,110,103,97,110,100,32,116,
+104,101,32,115,112,101,99,105,97,108,32,98,111,114,100,101,114,61,48,99,104,101,
+99,107,105,110,103,60,47,116,98,111,100,121,62,60,98,117,116,116,111,110,32,67,
+111,109,112,108,101,116,101,99,108,101,97,114,102,105,120,10,60,104,101,97,100,
+62,10,97,114,116,105,99,108,101,32,60,115,101,99,116,105,111,110,102,105,110,100
+,105,110,103,115,114,111,108,101,32,105,110,32,112,111,112,117,108,97,114,32,32,
+79,99,116,111,98,101,114,119,101,98,115,105,116,101,32,101,120,112,111,115,117,
+114,101,117,115,101,100,32,116,111,32,32,99,104,97,110,103,101,115,111,112,101,
+114,97,116,101,100,99,108,105,99,107,105,110,103,101,110,116,101,114,105,110,103
+,99,111,109,109,97,110,100,115,105,110,102,111,114,109,101,100,32,110,117,109,98
+,101,114,115,32,32,60,47,100,105,118,62,99,114,101,97,116,105,110,103,111,110,83
+,117,98,109,105,116,109,97,114,121,108,97,110,100,99,111,108,108,101,103,101,115
+,97,110,97,108,121,116,105,99,108,105,115,116,105,110,103,115,99,111,110,116,97,
+99,116,46,108,111,103,103,101,100,73,110,97,100,118,105,115,111,114,121,115,105,
+98,108,105,110,103,115,99,111,110,116,101,110,116,34,115,38,113,117,111,116,59,
+41,115,46,32,84,104,105,115,32,112,97,99,107,97,103,101,115,99,104,101,99,107,98
+,111,120,115,117,103,103,101,115,116,115,112,114,101,103,110,97,110,116,116,111,
+109,111,114,114,111,119,115,112,97,99,105,110,103,61,105,99,111,110,46,112,110,
+103,106,97,112,97,110,101,115,101,99,111,100,101,98,97,115,101,98,117,116,116,
+111,110,34,62,103,97,109,98,108,105,110,103,115,117,99,104,32,97,115,32,44,32,
+119,104,105,108,101,32,60,47,115,112,97,110,62,32,109,105,115,115,111,117,114,
+105,115,112,111,114,116,105,110,103,116,111,112,58,49,112,120,32,46,60,47,115,
+112,97,110,62,116,101,110,115,105,111,110,115,119,105,100,116,104,61,34,50,108,
+97,122,121,108,111,97,100,110,111,118,101,109,98,101,114,117,115,101,100,32,105,
+110,32,104,101,105,103,104,116,61,34,99,114,105,112,116,34,62,10,38,110,98,115,
+112,59,60,47,60,116,114,62,60,116,100,32,104,101,105,103,104,116,58,50,47,112,
+114,111,100,117,99,116,99,111,117,110,116,114,121,32,105,110,99,108,117,100,101,
+32,102,111,111,116,101,114,34,32,38,108,116,59,33,45,45,32,116,105,116,108,101,
+34,62,60,47,106,113,117,101,114,121,46,60,47,102,111,114,109,62,10,40,231,174,
+128,228,189,147,41,40,231,185,129,233,171,148,41,104,114,118,97,116,115,107,105,
+105,116,97,108,105,97,110,111,114,111,109,195,162,110,196,131,116,195,188,114,
+107,195,167,101,216,167,216,177,216,175,217,136,116,97,109,98,105,195,169,110,
+110,111,116,105,99,105,97,115,109,101,110,115,97,106,101,115,112,101,114,115,111
+,110,97,115,100,101,114,101,99,104,111,115,110,97,99,105,111,110,97,108,115,101,
+114,118,105,99,105,111,99,111,110,116,97,99,116,111,117,115,117,97,114,105,111,
+115,112,114,111,103,114,97,109,97,103,111,98,105,101,114,110,111,101,109,112,114
+,101,115,97,115,97,110,117,110,99,105,111,115,118,97,108,101,110,99,105,97,99,
+111,108,111,109,98,105,97,100,101,115,112,117,195,169,115,100,101,112,111,114,
+116,101,115,112,114,111,121,101,99,116,111,112,114,111,100,117,99,116,111,112,
+195,186,98,108,105,99,111,110,111,115,111,116,114,111,115,104,105,115,116,111,
+114,105,97,112,114,101,115,101,110,116,101,109,105,108,108,111,110,101,115,109,
+101,100,105,97,110,116,101,112,114,101,103,117,110,116,97,97,110,116,101,114,105
+,111,114,114,101,99,117,114,115,111,115,112,114,111,98,108,101,109,97,115,97,110
+,116,105,97,103,111,110,117,101,115,116,114,111,115,111,112,105,110,105,195,179,
+110,105,109,112,114,105,109,105,114,109,105,101,110,116,114,97,115,97,109,195,
+169,114,105,99,97,118,101,110,100,101,100,111,114,115,111,99,105,101,100,97,100,
+114,101,115,112,101,99,116,111,114,101,97,108,105,122,97,114,114,101,103,105,115
+,116,114,111,112,97,108,97,98,114,97,115,105,110,116,101,114,195,169,115,101,110
+,116,111,110,99,101,115,101,115,112,101,99,105,97,108,109,105,101,109,98,114,111
+,115,114,101,97,108,105,100,97,100,99,195,179,114,100,111,98,97,122,97,114,97,
+103,111,122,97,112,195,161,103,105,110,97,115,115,111,99,105,97,108,101,115,98,
+108,111,113,117,101,97,114,103,101,115,116,105,195,179,110,97,108,113,117,105,
+108,101,114,115,105,115,116,101,109,97,115,99,105,101,110,99,105,97,115,99,111,
+109,112,108,101,116,111,118,101,114,115,105,195,179,110,99,111,109,112,108,101,
+116,97,101,115,116,117,100,105,111,115,112,195,186,98,108,105,99,97,111,98,106,
+101,116,105,118,111,97,108,105,99,97,110,116,101,98,117,115,99,97,100,111,114,99
+,97,110,116,105,100,97,100,101,110,116,114,97,100,97,115,97,99,99,105,111,110,
+101,115,97,114,99,104,105,118,111,115,115,117,112,101,114,105,111,114,109,97,121
+,111,114,195,173,97,97,108,101,109,97,110,105,97,102,117,110,99,105,195,179,110,
+195,186,108,116,105,109,111,115,104,97,99,105,101,110,100,111,97,113,117,101,108
+,108,111,115,101,100,105,99,105,195,179,110,102,101,114,110,97,110,100,111,97,
+109,98,105,101,110,116,101,102,97,99,101,98,111,111,107,110,117,101,115,116,114,
+97,115,99,108,105,101,110,116,101,115,112,114,111,99,101,115,111,115,98,97,115,
+116,97,110,116,101,112,114,101,115,101,110,116,97,114,101,112,111,114,116,97,114
+,99,111,110,103,114,101,115,111,112,117,98,108,105,99,97,114,99,111,109,101,114,
+99,105,111,99,111,110,116,114,97,116,111,106,195,179,118,101,110,101,115,100,105
+,115,116,114,105,116,111,116,195,169,99,110,105,99,97,99,111,110,106,117,110,116
+,111,101,110,101,114,103,195,173,97,116,114,97,98,97,106,97,114,97,115,116,117,
+114,105,97,115,114,101,99,105,101,110,116,101,117,116,105,108,105,122,97,114,98,
+111,108,101,116,195,173,110,115,97,108,118,97,100,111,114,99,111,114,114,101,99,
+116,97,116,114,97,98,97,106,111,115,112,114,105,109,101,114,111,115,110,101,103,
+111,99,105,111,115,108,105,98,101,114,116,97,100,100,101,116,97,108,108,101,115,
+112,97,110,116,97,108,108,97,112,114,195,179,120,105,109,111,97,108,109,101,114,
+195,173,97,97,110,105,109,97,108,101,115,113,117,105,195,169,110,101,115,99,111,
+114,97,122,195,179,110,115,101,99,99,105,195,179,110,98,117,115,99,97,110,100,
+111,111,112,99,105,111,110,101,115,101,120,116,101,114,105,111,114,99,111,110,99
+,101,112,116,111,116,111,100,97,118,195,173,97,103,97,108,101,114,195,173,97,101
+,115,99,114,105,98,105,114,109,101,100,105,99,105,110,97,108,105,99,101,110,99,
+105,97,99,111,110,115,117,108,116,97,97,115,112,101,99,116,111,115,99,114,195,
+173,116,105,99,97,100,195,179,108,97,114,101,115,106,117,115,116,105,99,105,97,
+100,101,98,101,114,195,161,110,112,101,114,195,173,111,100,111,110,101,99,101,
+115,105,116,97,109,97,110,116,101,110,101,114,112,101,113,117,101,195,177,111,
+114,101,99,105,98,105,100,97,116,114,105,98,117,110,97,108,116,101,110,101,114,
+105,102,101,99,97,110,99,105,195,179,110,99,97,110,97,114,105,97,115,100,101,115
+,99,97,114,103,97,100,105,118,101,114,115,111,115,109,97,108,108,111,114,99,97,
+114,101,113,117,105,101,114,101,116,195,169,99,110,105,99,111,100,101,98,101,114
+,195,173,97,118,105,118,105,101,110,100,97,102,105,110,97,110,122,97,115,97,100,
+101,108,97,110,116,101,102,117,110,99,105,111,110,97,99,111,110,115,101,106,111,
+115,100,105,102,195,173,99,105,108,99,105,117,100,97,100,101,115,97,110,116,105,
+103,117,97,115,97,118,97,110,122,97,100,97,116,195,169,114,109,105,110,111,117,
+110,105,100,97,100,101,115,115,195,161,110,99,104,101,122,99,97,109,112,97,195,
+177,97,115,111,102,116,111,110,105,99,114,101,118,105,115,116,97,115,99,111,110,
+116,105,101,110,101,115,101,99,116,111,114,101,115,109,111,109,101,110,116,111,
+115,102,97,99,117,108,116,97,100,99,114,195,169,100,105,116,111,100,105,118,101,
+114,115,97,115,115,117,112,117,101,115,116,111,102,97,99,116,111,114,101,115,115
+,101,103,117,110,100,111,115,112,101,113,117,101,195,177,97,208,179,208,190,208,
+180,208,176,208,181,209,129,208,187,208,184,208,181,209,129,209,130,209,140,208,
+177,209,139,208,187,208,190,208,177,209,139,209,130,209,140,209,141,209,130,208,
+190,208,188,208,149,209,129,208,187,208,184,209,130,208,190,208,179,208,190,208,
+188,208,181,208,189,209,143,208,178,209,129,208,181,209,133,209,141,209,130,208,
+190,208,185,208,180,208,176,208,182,208,181,208,177,209,139,208,187,208,184,208,
+179,208,190,208,180,209,131,208,180,208,181,208,189,209,140,209,141,209,130,208,
+190,209,130,208,177,209,139,208,187,208,176,209,129,208,181,208,177,209,143,208,
+190,208,180,208,184,208,189,209,129,208,181,208,177,208,181,208,189,208,176,208,
+180,208,190,209,129,208,176,208,185,209,130,209,132,208,190,209,130,208,190,208,
+189,208,181,208,179,208,190,209,129,208,178,208,190,208,184,209,129,208,178,208,
+190,208,185,208,184,208,179,209,128,209,139,209,130,208,190,208,182,208,181,208,
+178,209,129,208,181,208,188,209,129,208,178,208,190,209,142,208,187,208,184,209,
+136,209,140,209,141,209,130,208,184,209,133,208,191,208,190,208,186,208,176,208,
+180,208,189,208,181,208,185,208,180,208,190,208,188,208,176,208,188,208,184,209,
+128,208,176,208,187,208,184,208,177,208,190,209,130,208,181,208,188,209,131,209,
+133,208,190,209,130,209,143,208,180,208,178,209,131,209,133,209,129,208,181,209,
+130,208,184,208,187,209,142,208,180,208,184,208,180,208,181,208,187,208,190,208,
+188,208,184,209,128,208,181,209,130,208,181,208,177,209,143,209,129,208,178,208,
+190,208,181,208,178,208,184,208,180,208,181,209,135,208,181,208,179,208,190,209,
+141,209,130,208,184,208,188,209,129,209,135,208,181,209,130,209,130,208,181,208,
+188,209,139,209,134,208,181,208,189,209,139,209,129,209,130,208,176,208,187,208,
+178,208,181,208,180,209,140,209,130,208,181,208,188,208,181,208,178,208,190,208,
+180,209,139,209,130,208,181,208,177,208,181,208,178,209,139,209,136,208,181,208,
+189,208,176,208,188,208,184,209,130,208,184,208,191,208,176,209,130,208,190,208,
+188,209,131,208,191,209,128,208,176,208,178,208,187,208,184,209,134,208,176,208,
+190,208,180,208,189,208,176,208,179,208,190,208,180,209,139,208,183,208,189,208,
+176,209,142,208,188,208,190,208,179,209,131,208,180,209,128,209,131,208,179,208,
+178,209,129,208,181,208,185,208,184,208,180,208,181,209,130,208,186,208,184,208,
+189,208,190,208,190,208,180,208,189,208,190,208,180,208,181,208,187,208,176,208,
+180,208,181,208,187,208,181,209,129,209,128,208,190,208,186,208,184,209,142,208,
+189,209,143,208,178,208,181,209,129,209,140,208,149,209,129,209,130,209,140,209,
+128,208,176,208,183,208,176,208,189,208,176,209,136,208,184,216,167,217,132,217,
+132,217,135,216,167,217,132,216,170,217,138,216,172,217,133,217,138,216,185,216,
+174,216,167,216,181,216,169,216,167,217,132,216,176,217,138,216,185,217,132,217,
+138,217,135,216,172,216,175,217,138,216,175,216,167,217,132,216,162,217,134,216,
+167,217,132,216,177,216,175,216,170,216,173,217,131,217,133,216,181,217,129,216,
+173,216,169,217,131,216,167,217,134,216,170,216,167,217,132,217,132,217,138,217,
+138,217,131,217,136,217,134,216,180,216,168,217,131,216,169,217,129,217,138,217,
+135,216,167,216,168,217,134,216,167,216,170,216,173,217,136,216,167,216,161,216,
+163,217,131,216,171,216,177,216,174,217,132,216,167,217,132,216,167,217,132,216,
+173,216,168,216,175,217,132,217,138,217,132,216,175,216,177,217,136,216,179,216,
+167,216,182,216,186,216,183,216,170,217,131,217,136,217,134,217,135,217,134,216,
+167,217,131,216,179,216,167,216,173,216,169,217,134,216,167,216,175,217,138,216,
+167,217,132,216,183,216,168,216,185,217,132,217,138,217,131,216,180,217,131,216,
+177,216,167,217,138,217,133,217,131,217,134,217,133,217,134,217,135,216,167,216,
+180,216,177,217,131,216,169,216,177,216,166,217,138,216,179,217,134,216,180,217,
+138,216,183,217,133,216,167,216,176,216,167,216,167,217,132,217,129,217,134,216,
+180,216,168,216,167,216,168,216,170,216,185,216,168,216,177,216,177,216,173,217,
+133,216,169,217,131,216,167,217,129,216,169,217,138,217,130,217,136,217,132,217,
+133,216,177,217,131,216,178,217,131,217,132,217,133,216,169,216,163,216,173,217,
+133,216,175,217,130,217,132,216,168,217,138,217,138,216,185,217,134,217,138,216,
+181,217,136,216,177,216,169,216,183,216,177,217,138,217,130,216,180,216,167,216,
+177,217,131,216,172,217,136,216,167,217,132,216,163,216,174,216,177,217,137,217,
+133,216,185,217,134,216,167,216,167,216,168,216,173,216,171,216,185,216,177,217,
+136,216,182,216,168,216,180,217,131,217,132,217,133,216,179,216,172,217,132,216,
+168,217,134,216,167,217,134,216,174,216,167,217,132,216,175,217,131,216,170,216,
+167,216,168,217,131,217,132,217,138,216,169,216,168,216,175,217,136,217,134,216,
+163,217,138,216,182,216,167,217,138,217,136,216,172,216,175,217,129,216,177,217,
+138,217,130,217,131,216,170,216,168,216,170,216,163,217,129,216,182,217,132,217,
+133,216,183,216,168,216,174,216,167,217,131,216,171,216,177,216,168,216,167,216,
+177,217,131,216,167,217,129,216,182,217,132,216,167,216,173,217,132,217,137,217,
+134,217,129,216,179,217,135,216,163,217,138,216,167,217,133,216,177,216,175,217,
+136,216,175,216,163,217,134,217,135,216,167,216,175,217,138,217,134,216,167,216,
+167,217,132,216,167,217,134,217,133,216,185,216,177,216,182,216,170,216,185,217,
+132,217,133,216,175,216,167,216,174,217,132,217,133,217,133,217,131,217,134,0,0,
+0,0,0,0,0,0,1,0,1,0,1,0,1,0,2,0,2,0,2,0,2,0,4,0,4,0,4,0,4,0,0,1,2,3,4,5,6,7,7,6,
+5,4,3,2,1,0,8,9,10,11,12,13,14,15,15,14,13,12,11,10,9,8,16,17,18,19,20,21,22,23,
+23,22,21,20,19,18,17,16,24,25,26,27,28,29,30,31,31,30,29,28,27,26,25,24,255,255,
+255,255,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,2,0,0,0,2,0,0,0,1,0,0,0,1,0,0,0,
+3,0,0,0,255,255,0,1,0,0,0,1,0,0,255,255,0,1,0,0,0,8,0,8,0,8,0,8,0,0,0,1,0,2,0,3,
+0,4,0,5,0,6,0,7,114,101,115,111,117,114,99,101,115,99,111,117,110,116,114,105,
+101,115,113,117,101,115,116,105,111,110,115,101,113,117,105,112,109,101,110,116,
+99,111,109,109,117,110,105,116,121,97,118,97,105,108,97,98,108,101,104,105,103,
+104,108,105,103,104,116,68,84,68,47,120,104,116,109,108,109,97,114,107,101,116,
+105,110,103,107,110,111,119,108,101,100,103,101,115,111,109,101,116,104,105,110,
+103,99,111,110,116,97,105,110,101,114,100,105,114,101,99,116,105,111,110,115,117
+,98,115,99,114,105,98,101,97,100,118,101,114,116,105,115,101,99,104,97,114,97,99
+,116,101,114,34,32,118,97,108,117,101,61,34,60,47,115,101,108,101,99,116,62,65,
+117,115,116,114,97,108,105,97,34,32,99,108,97,115,115,61,34,115,105,116,117,97,
+116,105,111,110,97,117,116,104,111,114,105,116,121,102,111,108,108,111,119,105,
+110,103,112,114,105,109,97,114,105,108,121,111,112,101,114,97,116,105,111,110,99
+,104,97,108,108,101,110,103,101,100,101,118,101,108,111,112,101,100,97,110,111,
+110,121,109,111,117,115,102,117,110,99,116,105,111,110,32,102,117,110,99,116,105
+,111,110,115,99,111,109,112,97,110,105,101,115,115,116,114,117,99,116,117,114,
+101,97,103,114,101,101,109,101,110,116,34,32,116,105,116,108,101,61,34,112,111,
+116,101,110,116,105,97,108,101,100,117,99,97,116,105,111,110,97,114,103,117,109,
+101,110,116,115,115,101,99,111,110,100,97,114,121,99,111,112,121,114,105,103,104
+,116,108,97,110,103,117,97,103,101,115,101,120,99,108,117,115,105,118,101,99,111
+,110,100,105,116,105,111,110,60,47,102,111,114,109,62,13,10,115,116,97,116,101,
+109,101,110,116,97,116,116,101,110,116,105,111,110,66,105,111,103,114,97,112,104
+,121,125,32,101,108,115,101,32,123,10,115,111,108,117,116,105,111,110,115,119,
+104,101,110,32,116,104,101,32,65,110,97,108,121,116,105,99,115,116,101,109,112,
+108,97,116,101,115,100,97,110,103,101,114,111,117,115,115,97,116,101,108,108,105
+,116,101,100,111,99,117,109,101,110,116,115,112,117,98,108,105,115,104,101,114,
+105,109,112,111,114,116,97,110,116,112,114,111,116,111,116,121,112,101,105,110,
+102,108,117,101,110,99,101,38,114,97,113,117,111,59,60,47,101,102,102,101,99,116
+,105,118,101,103,101,110,101,114,97,108,108,121,116,114,97,110,115,102,111,114,
+109,98,101,97,117,116,105,102,117,108,116,114,97,110,115,112,111,114,116,111,114
+,103,97,110,105,122,101,100,112,117,98,108,105,115,104,101,100,112,114,111,109,
+105,110,101,110,116,117,110,116,105,108,32,116,104,101,116,104,117,109,98,110,97
+,105,108,78,97,116,105,111,110,97,108,32,46,102,111,99,117,115,40,41,59,111,118,
+101,114,32,116,104,101,32,109,105,103,114,97,116,105,111,110,97,110,110,111,117,
+110,99,101,100,102,111,111,116,101,114,34,62,10,101,120,99,101,112,116,105,111,
+110,108,101,115,115,32,116,104,97,110,101,120,112,101,110,115,105,118,101,102,
+111,114,109,97,116,105,111,110,102,114,97,109,101,119,111,114,107,116,101,114,
+114,105,116,111,114,121,110,100,105,99,97,116,105,111,110,99,117,114,114,101,110
+,116,108,121,99,108,97,115,115,78,97,109,101,99,114,105,116,105,99,105,115,109,
+116,114,97,100,105,116,105,111,110,101,108,115,101,119,104,101,114,101,65,108,
+101,120,97,110,100,101,114,97,112,112,111,105,110,116,101,100,109,97,116,101,114
+,105,97,108,115,98,114,111,97,100,99,97,115,116,109,101,110,116,105,111,110,101,
+100,97,102,102,105,108,105,97,116,101,60,47,111,112,116,105,111,110,62,116,114,
+101,97,116,109,101,110,116,100,105,102,102,101,114,101,110,116,47,100,101,102,97
+,117,108,116,46,80,114,101,115,105,100,101,110,116,111,110,99,108,105,99,107,61,
+34,98,105,111,103,114,97,112,104,121,111,116,104,101,114,119,105,115,101,112,101
+,114,109,97,110,101,110,116,70,114,97,110,195,167,97,105,115,72,111,108,108,121,
+119,111,111,100,101,120,112,97,110,115,105,111,110,115,116,97,110,100,97,114,100
+,115,60,47,115,116,121,108,101,62,10,114,101,100,117,99,116,105,111,110,68,101,
+99,101,109,98,101,114,32,112,114,101,102,101,114,114,101,100,67,97,109,98,114,
+105,100,103,101,111,112,112,111,110,101,110,116,115,66,117,115,105,110,101,115,
+115,32,99,111,110,102,117,115,105,111,110,62,10,60,116,105,116,108,101,62,112,
+114,101,115,101,110,116,101,100,101,120,112,108,97,105,110,101,100,100,111,101,
+115,32,110,111,116,32,119,111,114,108,100,119,105,100,101,105,110,116,101,114,
+102,97,99,101,112,111,115,105,116,105,111,110,115,110,101,119,115,112,97,112,101
+,114,60,47,116,97,98,108,101,62,10,109,111,117,110,116,97,105,110,115,108,105,
+107,101,32,116,104,101,32,101,115,115,101,110,116,105,97,108,102,105,110,97,110,
+99,105,97,108,115,101,108,101,99,116,105,111,110,97,99,116,105,111,110,61,34,47,
+97,98,97,110,100,111,110,101,100,69,100,117,99,97,116,105,111,110,112,97,114,115
+,101,73,110,116,40,115,116,97,98,105,108,105,116,121,117,110,97,98,108,101,32,
+116,111,60,47,116,105,116,108,101,62,10,114,101,108,97,116,105,111,110,115,78,
+111,116,101,32,116,104,97,116,101,102,102,105,99,105,101,110,116,112,101,114,102
+,111,114,109,101,100,116,119,111,32,121,101,97,114,115,83,105,110,99,101,32,116,
+104,101,116,104,101,114,101,102,111,114,101,119,114,97,112,112,101,114,34,62,97,
+108,116,101,114,110,97,116,101,105,110,99,114,101,97,115,101,100,66,97,116,116,
+108,101,32,111,102,112,101,114,99,101,105,118,101,100,116,114,121,105,110,103,32
+,116,111,110,101,99,101,115,115,97,114,121,112,111,114,116,114,97,121,101,100,
+101,108,101,99,116,105,111,110,115,69,108,105,122,97,98,101,116,104,60,47,105,
+102,114,97,109,101,62,100,105,115,99,111,118,101,114,121,105,110,115,117,114,97,
+110,99,101,115,46,108,101,110,103,116,104,59,108,101,103,101,110,100,97,114,121,
+71,101,111,103,114,97,112,104,121,99,97,110,100,105,100,97,116,101,99,111,114,
+112,111,114,97,116,101,115,111,109,101,116,105,109,101,115,115,101,114,118,105,
+99,101,115,46,105,110,104,101,114,105,116,101,100,60,47,115,116,114,111,110,103,
+62,67,111,109,109,117,110,105,116,121,114,101,108,105,103,105,111,117,115,108,
+111,99,97,116,105,111,110,115,67,111,109,109,105,116,116,101,101,98,117,105,108,
+100,105,110,103,115,116,104,101,32,119,111,114,108,100,110,111,32,108,111,110,
+103,101,114,98,101,103,105,110,110,105,110,103,114,101,102,101,114,101,110,99,
+101,99,97,110,110,111,116,32,98,101,102,114,101,113,117,101,110,99,121,116,121,
+112,105,99,97,108,108,121,105,110,116,111,32,116,104,101,32,114,101,108,97,116,
+105,118,101,59,114,101,99,111,114,100,105,110,103,112,114,101,115,105,100,101,
+110,116,105,110,105,116,105,97,108,108,121,116,101,99,104,110,105,113,117,101,
+116,104,101,32,111,116,104,101,114,105,116,32,99,97,110,32,98,101,101,120,105,
+115,116,101,110,99,101,117,110,100,101,114,108,105,110,101,116,104,105,115,32,
+116,105,109,101,116,101,108,101,112,104,111,110,101,105,116,101,109,115,99,111,
+112,101,112,114,97,99,116,105,99,101,115,97,100,118,97,110,116,97,103,101,41,59,
+114,101,116,117,114,110,32,70,111,114,32,111,116,104,101,114,112,114,111,118,105
+,100,105,110,103,100,101,109,111,99,114,97,99,121,98,111,116,104,32,116,104,101,
+32,101,120,116,101,110,115,105,118,101,115,117,102,102,101,114,105,110,103,115,
+117,112,112,111,114,116,101,100,99,111,109,112,117,116,101,114,115,32,102,117,
+110,99,116,105,111,110,112,114,97,99,116,105,99,97,108,115,97,105,100,32,116,104
+,97,116,105,116,32,109,97,121,32,98,101,69,110,103,108,105,115,104,60,47,102,114
+,111,109,32,116,104,101,32,115,99,104,101,100,117,108,101,100,100,111,119,110,
+108,111,97,100,115,60,47,108,97,98,101,108,62,10,115,117,115,112,101,99,116,101,
+100,109,97,114,103,105,110,58,32,48,115,112,105,114,105,116,117,97,108,60,47,104
+,101,97,100,62,10,10,109,105,99,114,111,115,111,102,116,103,114,97,100,117,97,
+108,108,121,100,105,115,99,117,115,115,101,100,104,101,32,98,101,99,97,109,101,
+101,120,101,99,117,116,105,118,101,106,113,117,101,114,121,46,106,115,104,111,
+117,115,101,104,111,108,100,99,111,110,102,105,114,109,101,100,112,117,114,99,
+104,97,115,101,100,108,105,116,101,114,97,108,108,121,100,101,115,116,114,111,
+121,101,100,117,112,32,116,111,32,116,104,101,118,97,114,105,97,116,105,111,110,
+114,101,109,97,105,110,105,110,103,105,116,32,105,115,32,110,111,116,99,101,110,
+116,117,114,105,101,115,74,97,112,97,110,101,115,101,32,97,109,111,110,103,32,
+116,104,101,99,111,109,112,108,101,116,101,100,97,108,103,111,114,105,116,104,
+109,105,110,116,101,114,101,115,116,115,114,101,98,101,108,108,105,111,110,117,
+110,100,101,102,105,110,101,100,101,110,99,111,117,114,97,103,101,114,101,115,
+105,122,97,98,108,101,105,110,118,111,108,118,105,110,103,115,101,110,115,105,
+116,105,118,101,117,110,105,118,101,114,115,97,108,112,114,111,118,105,115,105,
+111,110,40,97,108,116,104,111,117,103,104,102,101,97,116,117,114,105,110,103,99,
+111,110,100,117,99,116,101,100,41,44,32,119,104,105,99,104,32,99,111,110,116,105
+,110,117,101,100,45,104,101,97,100,101,114,34,62,70,101,98,114,117,97,114,121,32
+,110,117,109,101,114,111,117,115,32,111,118,101,114,102,108,111,119,58,99,111,
+109,112,111,110,101,110,116,102,114,97,103,109,101,110,116,115,101,120,99,101,
+108,108,101,110,116,99,111,108,115,112,97,110,61,34,116,101,99,104,110,105,99,97
+,108,110,101,97,114,32,116,104,101,32,65,100,118,97,110,99,101,100,32,115,111,
+117,114,99,101,32,111,102,101,120,112,114,101,115,115,101,100,72,111,110,103,32,
+75,111,110,103,32,70,97,99,101,98,111,111,107,109,117,108,116,105,112,108,101,32
+,109,101,99,104,97,110,105,115,109,101,108,101,118,97,116,105,111,110,111,102,
+102,101,110,115,105,118,101,60,47,102,111,114,109,62,10,9,115,112,111,110,115,
+111,114,101,100,100,111,99,117,109,101,110,116,46,111,114,32,38,113,117,111,116,
+59,116,104,101,114,101,32,97,114,101,116,104,111,115,101,32,119,104,111,109,111,
+118,101,109,101,110,116,115,112,114,111,99,101,115,115,101,115,100,105,102,102,
+105,99,117,108,116,115,117,98,109,105,116,116,101,100,114,101,99,111,109,109,101
+,110,100,99,111,110,118,105,110,99,101,100,112,114,111,109,111,116,105,110,103,
+34,32,119,105,100,116,104,61,34,46,114,101,112,108,97,99,101,40,99,108,97,115,
+115,105,99,97,108,99,111,97,108,105,116,105,111,110,104,105,115,32,102,105,114,
+115,116,100,101,99,105,115,105,111,110,115,97,115,115,105,115,116,97,110,116,105
+,110,100,105,99,97,116,101,100,101,118,111,108,117,116,105,111,110,45,119,114,97
+,112,112,101,114,34,101,110,111,117,103,104,32,116,111,97,108,111,110,103,32,116
+,104,101,100,101,108,105,118,101,114,101,100,45,45,62,13,10,60,33,45,45,65,109,
+101,114,105,99,97,110,32,112,114,111,116,101,99,116,101,100,78,111,118,101,109,
+98,101,114,32,60,47,115,116,121,108,101,62,60,102,117,114,110,105,116,117,114,
+101,73,110,116,101,114,110,101,116,32,32,111,110,98,108,117,114,61,34,115,117,
+115,112,101,110,100,101,100,114,101,99,105,112,105,101,110,116,98,97,115,101,100
+,32,111,110,32,77,111,114,101,111,118,101,114,44,97,98,111,108,105,115,104,101,
+100,99,111,108,108,101,99,116,101,100,119,101,114,101,32,109,97,100,101,101,109,
+111,116,105,111,110,97,108,101,109,101,114,103,101,110,99,121,110,97,114,114,97,
+116,105,118,101,97,100,118,111,99,97,116,101,115,112,120,59,98,111,114,100,101,
+114,99,111,109,109,105,116,116,101,100,100,105,114,61,34,108,116,114,34,101,109,
+112,108,111,121,101,101,115,114,101,115,101,97,114,99,104,46,32,115,101,108,101,
+99,116,101,100,115,117,99,99,101,115,115,111,114,99,117,115,116,111,109,101,114,
+115,100,105,115,112,108,97,121,101,100,83,101,112,116,101,109,98,101,114,97,100,
+100,67,108,97,115,115,40,70,97,99,101,98,111,111,107,32,115,117,103,103,101,115,
+116,101,100,97,110,100,32,108,97,116,101,114,111,112,101,114,97,116,105,110,103,
+101,108,97,98,111,114,97,116,101,83,111,109,101,116,105,109,101,115,73,110,115,
+116,105,116,117,116,101,99,101,114,116,97,105,110,108,121,105,110,115,116,97,108
+,108,101,100,102,111,108,108,111,119,101,114,115,74,101,114,117,115,97,108,101,
+109,116,104,101,121,32,104,97,118,101,99,111,109,112,117,116,105,110,103,103,101
+,110,101,114,97,116,101,100,112,114,111,118,105,110,99,101,115,103,117,97,114,97
+,110,116,101,101,97,114,98,105,116,114,97,114,121,114,101,99,111,103,110,105,122
+,101,119,97,110,116,101,100,32,116,111,112,120,59,119,105,100,116,104,58,116,104
+,101,111,114,121,32,111,102,98,101,104,97,118,105,111,117,114,87,104,105,108,101
+,32,116,104,101,101,115,116,105,109,97,116,101,100,98,101,103,97,110,32,116,111,
+32,105,116,32,98,101,99,97,109,101,109,97,103,110,105,116,117,100,101,109,117,
+115,116,32,104,97,118,101,109,111,114,101,32,116,104,97,110,68,105,114,101,99,
+116,111,114,121,101,120,116,101,110,115,105,111,110,115,101,99,114,101,116,97,
+114,121,110,97,116,117,114,97,108,108,121,111,99,99,117,114,114,105,110,103,118,
+97,114,105,97,98,108,101,115,103,105,118,101,110,32,116,104,101,112,108,97,116,
+102,111,114,109,46,60,47,108,97,98,101,108,62,60,102,97,105,108,101,100,32,116,
+111,99,111,109,112,111,117,110,100,115,107,105,110,100,115,32,111,102,32,115,111
+,99,105,101,116,105,101,115,97,108,111,110,103,115,105,100,101,32,45,45,38,103,
+116,59,10,10,115,111,117,116,104,119,101,115,116,116,104,101,32,114,105,103,104,
+116,114,97,100,105,97,116,105,111,110,109,97,121,32,104,97,118,101,32,117,110,
+101,115,99,97,112,101,40,115,112,111,107,101,110,32,105,110,34,32,104,114,101,
+102,61,34,47,112,114,111,103,114,97,109,109,101,111,110,108,121,32,116,104,101,
+32,99,111,109,101,32,102,114,111,109,100,105,114,101,99,116,111,114,121,98,117,
+114,105,101,100,32,105,110,97,32,115,105,109,105,108,97,114,116,104,101,121,32,
+119,101,114,101,60,47,102,111,110,116,62,60,47,78,111,114,119,101,103,105,97,110
+,115,112,101,99,105,102,105,101,100,112,114,111,100,117,99,105,110,103,112,97,
+115,115,101,110,103,101,114,40,110,101,119,32,68,97,116,101,116,101,109,112,111,
+114,97,114,121,102,105,99,116,105,111,110,97,108,65,102,116,101,114,32,116,104,
+101,101,113,117,97,116,105,111,110,115,100,111,119,110,108,111,97,100,46,114,101
+,103,117,108,97,114,108,121,100,101,118,101,108,111,112,101,114,97,98,111,118,
+101,32,116,104,101,108,105,110,107,101,100,32,116,111,112,104,101,110,111,109,
+101,110,97,112,101,114,105,111,100,32,111,102,116,111,111,108,116,105,112,34,62,
+115,117,98,115,116,97,110,99,101,97,117,116,111,109,97,116,105,99,97,115,112,101
+,99,116,32,111,102,65,109,111,110,103,32,116,104,101,99,111,110,110,101,99,116,
+101,100,101,115,116,105,109,97,116,101,115,65,105,114,32,70,111,114,99,101,115,
+121,115,116,101,109,32,111,102,111,98,106,101,99,116,105,118,101,105,109,109,101
+,100,105,97,116,101,109,97,107,105,110,103,32,105,116,112,97,105,110,116,105,110
+,103,115,99,111,110,113,117,101,114,101,100,97,114,101,32,115,116,105,108,108,
+112,114,111,99,101,100,117,114,101,103,114,111,119,116,104,32,111,102,104,101,97
+,100,101,100,32,98,121,69,117,114,111,112,101,97,110,32,100,105,118,105,115,105,
+111,110,115,109,111,108,101,99,117,108,101,115,102,114,97,110,99,104,105,115,101
+,105,110,116,101,110,116,105,111,110,97,116,116,114,97,99,116,101,100,99,104,105
+,108,100,104,111,111,100,97,108,115,111,32,117,115,101,100,100,101,100,105,99,97
+,116,101,100,115,105,110,103,97,112,111,114,101,100,101,103,114,101,101,32,111,
+102,102,97,116,104,101,114,32,111,102,99,111,110,102,108,105,99,116,115,60,47,97
+,62,60,47,112,62,10,99,97,109,101,32,102,114,111,109,119,101,114,101,32,117,115,
+101,100,110,111,116,101,32,116,104,97,116,114,101,99,101,105,118,105,110,103,69,
+120,101,99,117,116,105,118,101,101,118,101,110,32,109,111,114,101,97,99,99,101,
+115,115,32,116,111,99,111,109,109,97,110,100,101,114,80,111,108,105,116,105,99,
+97,108,109,117,115,105,99,105,97,110,115,100,101,108,105,99,105,111,117,115,112,
+114,105,115,111,110,101,114,115,97,100,118,101,110,116,32,111,102,85,84,70,45,56
+,34,32,47,62,60,33,91,67,68,65,84,65,91,34,62,67,111,110,116,97,99,116,83,111,
+117,116,104,101,114,110,32,98,103,99,111,108,111,114,61,34,115,101,114,105,101,
+115,32,111,102,46,32,73,116,32,119,97,115,32,105,110,32,69,117,114,111,112,101,
+112,101,114,109,105,116,116,101,100,118,97,108,105,100,97,116,101,46,97,112,112,
+101,97,114,105,110,103,111,102,102,105,99,105,97,108,115,115,101,114,105,111,117
+,115,108,121,45,108,97,110,103,117,97,103,101,105,110,105,116,105,97,116,101,100
+,101,120,116,101,110,100,105,110,103,108,111,110,103,45,116,101,114,109,105,110,
+102,108,97,116,105,111,110,115,117,99,104,32,116,104,97,116,103,101,116,67,111,
+111,107,105,101,109,97,114,107,101,100,32,98,121,60,47,98,117,116,116,111,110,62
+,105,109,112,108,101,109,101,110,116,98,117,116,32,105,116,32,105,115,105,110,99
+,114,101,97,115,101,115,100,111,119,110,32,116,104,101,32,114,101,113,117,105,
+114,105,110,103,100,101,112,101,110,100,101,110,116,45,45,62,10,60,33,45,45,32,
+105,110,116,101,114,118,105,101,119,87,105,116,104,32,116,104,101,32,99,111,112,
+105,101,115,32,111,102,99,111,110,115,101,110,115,117,115,119,97,115,32,98,117,
+105,108,116,86,101,110,101,122,117,101,108,97,40,102,111,114,109,101,114,108,121
+,116,104,101,32,115,116,97,116,101,112,101,114,115,111,110,110,101,108,115,116,
+114,97,116,101,103,105,99,102,97,118,111,117,114,32,111,102,105,110,118,101,110,
+116,105,111,110,87,105,107,105,112,101,100,105,97,99,111,110,116,105,110,101,110
+,116,118,105,114,116,117,97,108,108,121,119,104,105,99,104,32,119,97,115,112,114
+,105,110,99,105,112,108,101,67,111,109,112,108,101,116,101,32,105,100,101,110,
+116,105,99,97,108,115,104,111,119,32,116,104,97,116,112,114,105,109,105,116,105,
+118,101,97,119,97,121,32,102,114,111,109,109,111,108,101,99,117,108,97,114,112,
+114,101,99,105,115,101,108,121,100,105,115,115,111,108,118,101,100,85,110,100,
+101,114,32,116,104,101,118,101,114,115,105,111,110,61,34,62,38,110,98,115,112,59
+,60,47,73,116,32,105,115,32,116,104,101,32,84,104,105,115,32,105,115,32,119,105,
+108,108,32,104,97,118,101,111,114,103,97,110,105,115,109,115,115,111,109,101,32,
+116,105,109,101,70,114,105,101,100,114,105,99,104,119,97,115,32,102,105,114,115,
+116,116,104,101,32,111,110,108,121,32,102,97,99,116,32,116,104,97,116,102,111,
+114,109,32,105,100,61,34,112,114,101,99,101,100,105,110,103,84,101,99,104,110,
+105,99,97,108,112,104,121,115,105,99,105,115,116,111,99,99,117,114,115,32,105,
+110,110,97,118,105,103,97,116,111,114,115,101,99,116,105,111,110,34,62,115,112,
+97,110,32,105,100,61,34,115,111,117,103,104,116,32,116,111,98,101,108,111,119,32
+,116,104,101,115,117,114,118,105,118,105,110,103,125,60,47,115,116,121,108,101,
+62,104,105,115,32,100,101,97,116,104,97,115,32,105,110,32,116,104,101,99,97,117,
+115,101,100,32,98,121,112,97,114,116,105,97,108,108,121,101,120,105,115,116,105,
+110,103,32,117,115,105,110,103,32,116,104,101,119,97,115,32,103,105,118,101,110,
+97,32,108,105,115,116,32,111,102,108,101,118,101,108,115,32,111,102,110,111,116,
+105,111,110,32,111,102,79,102,102,105,99,105,97,108,32,100,105,115,109,105,115,
+115,101,100,115,99,105,101,110,116,105,115,116,114,101,115,101,109,98,108,101,
+115,100,117,112,108,105,99,97,116,101,101,120,112,108,111,115,105,118,101,114,
+101,99,111,118,101,114,101,100,97,108,108,32,111,116,104,101,114,103,97,108,108,
+101,114,105,101,115,123,112,97,100,100,105,110,103,58,112,101,111,112,108,101,32
+,111,102,114,101,103,105,111,110,32,111,102,97,100,100,114,101,115,115,101,115,
+97,115,115,111,99,105,97,116,101,105,109,103,32,97,108,116,61,34,105,110,32,109,
+111,100,101,114,110,115,104,111,117,108,100,32,98,101,109,101,116,104,111,100,32
+,111,102,114,101,112,111,114,116,105,110,103,116,105,109,101,115,116,97,109,112,
+110,101,101,100,101,100,32,116,111,116,104,101,32,71,114,101,97,116,114,101,103,
+97,114,100,105,110,103,115,101,101,109,101,100,32,116,111,118,105,101,119,101,
+100,32,97,115,105,109,112,97,99,116,32,111,110,105,100,101,97,32,116,104,97,116,
+116,104,101,32,87,111,114,108,100,104,101,105,103,104,116,32,111,102,101,120,112
+,97,110,100,105,110,103,84,104,101,115,101,32,97,114,101,99,117,114,114,101,110,
+116,34,62,99,97,114,101,102,117,108,108,121,109,97,105,110,116,97,105,110,115,99
+,104,97,114,103,101,32,111,102,67,108,97,115,115,105,99,97,108,97,100,100,114,
+101,115,115,101,100,112,114,101,100,105,99,116,101,100,111,119,110,101,114,115,
+104,105,112,60,100,105,118,32,105,100,61,34,114,105,103,104,116,34,62,13,10,114,
+101,115,105,100,101,110,99,101,108,101,97,118,101,32,116,104,101,99,111,110,116,
+101,110,116,34,62,97,114,101,32,111,102,116,101,110,32,32,125,41,40,41,59,13,10,
+112,114,111,98,97,98,108,121,32,80,114,111,102,101,115,115,111,114,45,98,117,116
+,116,111,110,34,32,114,101,115,112,111,110,100,101,100,115,97,121,115,32,116,104
+,97,116,104,97,100,32,116,111,32,98,101,112,108,97,99,101,100,32,105,110,72,117,
+110,103,97,114,105,97,110,115,116,97,116,117,115,32,111,102,115,101,114,118,101,
+115,32,97,115,85,110,105,118,101,114,115,97,108,101,120,101,99,117,116,105,111,
+110,97,103,103,114,101,103,97,116,101,102,111,114,32,119,104,105,99,104,105,110,
+102,101,99,116,105,111,110,97,103,114,101,101,100,32,116,111,104,111,119,101,118
+,101,114,44,32,112,111,112,117,108,97,114,34,62,112,108,97,99,101,100,32,111,110
+,99,111,110,115,116,114,117,99,116,101,108,101,99,116,111,114,97,108,115,121,109
+,98,111,108,32,111,102,105,110,99,108,117,100,105,110,103,114,101,116,117,114,
+110,32,116,111,97,114,99,104,105,116,101,99,116,67,104,114,105,115,116,105,97,
+110,112,114,101,118,105,111,117,115,32,108,105,118,105,110,103,32,105,110,101,97
+,115,105,101,114,32,116,111,112,114,111,102,101,115,115,111,114,10,38,108,116,59
+,33,45,45,32,101,102,102,101,99,116,32,111,102,97,110,97,108,121,116,105,99,115,
+119,97,115,32,116,97,107,101,110,119,104,101,114,101,32,116,104,101,116,111,111,
+107,32,111,118,101,114,98,101,108,105,101,102,32,105,110,65,102,114,105,107,97,
+97,110,115,97,115,32,102,97,114,32,97,115,112,114,101,118,101,110,116,101,100,
+119,111,114,107,32,119,105,116,104,97,32,115,112,101,99,105,97,108,60,102,105,
+101,108,100,115,101,116,67,104,114,105,115,116,109,97,115,82,101,116,114,105,101
+,118,101,100,10,10,73,110,32,116,104,101,32,98,97,99,107,32,105,110,116,111,110,
+111,114,116,104,101,97,115,116,109,97,103,97,122,105,110,101,115,62,60,115,116,
+114,111,110,103,62,99,111,109,109,105,116,116,101,101,103,111,118,101,114,110,
+105,110,103,103,114,111,117,112,115,32,111,102,115,116,111,114,101,100,32,105,
+110,101,115,116,97,98,108,105,115,104,97,32,103,101,110,101,114,97,108,105,116,
+115,32,102,105,114,115,116,116,104,101,105,114,32,111,119,110,112,111,112,117,
+108,97,116,101,100,97,110,32,111,98,106,101,99,116,67,97,114,105,98,98,101,97,
+110,97,108,108,111,119,32,116,104,101,100,105,115,116,114,105,99,116,115,119,105
+,115,99,111,110,115,105,110,108,111,99,97,116,105,111,110,46,59,32,119,105,100,
+116,104,58,32,105,110,104,97,98,105,116,101,100,83,111,99,105,97,108,105,115,116
+,74,97,110,117,97,114,121,32,49,60,47,102,111,111,116,101,114,62,115,105,109,105
+,108,97,114,108,121,99,104,111,105,99,101,32,111,102,116,104,101,32,115,97,109,
+101,32,115,112,101,99,105,102,105,99,32,98,117,115,105,110,101,115,115,32,84,104
+,101,32,102,105,114,115,116,46,108,101,110,103,116,104,59,32,100,101,115,105,114
+,101,32,116,111,100,101,97,108,32,119,105,116,104,115,105,110,99,101,32,116,104,
+101,117,115,101,114,65,103,101,110,116,99,111,110,99,101,105,118,101,100,105,110
+,100,101,120,46,112,104,112,97,115,32,38,113,117,111,116,59,101,110,103,97,103,
+101,32,105,110,114,101,99,101,110,116,108,121,44,102,101,119,32,121,101,97,114,
+115,119,101,114,101,32,97,108,115,111,10,60,104,101,97,100,62,10,60,101,100,105,
+116,101,100,32,98,121,97,114,101,32,107,110,111,119,110,99,105,116,105,101,115,
+32,105,110,97,99,99,101,115,115,107,101,121,99,111,110,100,101,109,110,101,100,
+97,108,115,111,32,104,97,118,101,115,101,114,118,105,99,101,115,44,102,97,109,
+105,108,121,32,111,102,83,99,104,111,111,108,32,111,102,99,111,110,118,101,114,
+116,101,100,110,97,116,117,114,101,32,111,102,32,108,97,110,103,117,97,103,101,
+109,105,110,105,115,116,101,114,115,60,47,111,98,106,101,99,116,62,116,104,101,
+114,101,32,105,115,32,97,32,112,111,112,117,108,97,114,115,101,113,117,101,110,
+99,101,115,97,100,118,111,99,97,116,101,100,84,104,101,121,32,119,101,114,101,97
+,110,121,32,111,116,104,101,114,108,111,99,97,116,105,111,110,61,101,110,116,101
+,114,32,116,104,101,109,117,99,104,32,109,111,114,101,114,101,102,108,101,99,116
+,101,100,119,97,115,32,110,97,109,101,100,111,114,105,103,105,110,97,108,32,97,
+32,116,121,112,105,99,97,108,119,104,101,110,32,116,104,101,121,101,110,103,105,
+110,101,101,114,115,99,111,117,108,100,32,110,111,116,114,101,115,105,100,101,
+110,116,115,119,101,100,110,101,115,100,97,121,116,104,101,32,116,104,105,114,
+100,32,112,114,111,100,117,99,116,115,74,97,110,117,97,114,121,32,50,119,104,97,
+116,32,116,104,101,121,97,32,99,101,114,116,97,105,110,114,101,97,99,116,105,111
+,110,115,112,114,111,99,101,115,115,111,114,97,102,116,101,114,32,104,105,115,
+116,104,101,32,108,97,115,116,32,99,111,110,116,97,105,110,101,100,34,62,60,47,
+100,105,118,62,10,60,47,97,62,60,47,116,100,62,100,101,112,101,110,100,32,111,
+110,115,101,97,114,99,104,34,62,10,112,105,101,99,101,115,32,111,102,99,111,109,
+112,101,116,105,110,103,82,101,102,101,114,101,110,99,101,116,101,110,110,101,
+115,115,101,101,119,104,105,99,104,32,104,97,115,32,118,101,114,115,105,111,110,
+61,60,47,115,112,97,110,62,32,60,60,47,104,101,97,100,101,114,62,103,105,118,101
+,115,32,116,104,101,104,105,115,116,111,114,105,97,110,118,97,108,117,101,61,34,
+34,62,112,97,100,100,105,110,103,58,48,118,105,101,119,32,116,104,97,116,116,111
+,103,101,116,104,101,114,44,116,104,101,32,109,111,115,116,32,119,97,115,32,102,
+111,117,110,100,115,117,98,115,101,116,32,111,102,97,116,116,97,99,107,32,111,
+110,99,104,105,108,100,114,101,110,44,112,111,105,110,116,115,32,111,102,112,101
+,114,115,111,110,97,108,32,112,111,115,105,116,105,111,110,58,97,108,108,101,103
+,101,100,108,121,67,108,101,118,101,108,97,110,100,119,97,115,32,108,97,116,101,
+114,97,110,100,32,97,102,116,101,114,97,114,101,32,103,105,118,101,110,119,97,
+115,32,115,116,105,108,108,115,99,114,111,108,108,105,110,103,100,101,115,105,
+103,110,32,111,102,109,97,107,101,115,32,116,104,101,109,117,99,104,32,108,101,
+115,115,65,109,101,114,105,99,97,110,115,46,10,10,65,102,116,101,114,32,44,32,98
+,117,116,32,116,104,101,77,117,115,101,117,109,32,111,102,108,111,117,105,115,
+105,97,110,97,40,102,114,111,109,32,116,104,101,109,105,110,110,101,115,111,116,
+97,112,97,114,116,105,99,108,101,115,97,32,112,114,111,99,101,115,115,68,111,109
+,105,110,105,99,97,110,118,111,108,117,109,101,32,111,102,114,101,116,117,114,
+110,105,110,103,100,101,102,101,110,115,105,118,101,48,48,112,120,124,114,105,
+103,104,109,97,100,101,32,102,114,111,109,109,111,117,115,101,111,118,101,114,34
+,32,115,116,121,108,101,61,34,115,116,97,116,101,115,32,111,102,40,119,104,105,
+99,104,32,105,115,99,111,110,116,105,110,117,101,115,70,114,97,110,99,105,115,99
+,111,98,117,105,108,100,105,110,103,32,119,105,116,104,111,117,116,32,97,119,105
+,116,104,32,115,111,109,101,119,104,111,32,119,111,117,108,100,97,32,102,111,114
+,109,32,111,102,97,32,112,97,114,116,32,111,102,98,101,102,111,114,101,32,105,
+116,107,110,111,119,110,32,97,115,32,32,83,101,114,118,105,99,101,115,108,111,99
+,97,116,105,111,110,32,97,110,100,32,111,102,116,101,110,109,101,97,115,117,114,
+105,110,103,97,110,100,32,105,116,32,105,115,112,97,112,101,114,98,97,99,107,118
+,97,108,117,101,115,32,111,102,13,10,60,116,105,116,108,101,62,61,32,119,105,110
+,100,111,119,46,100,101,116,101,114,109,105,110,101,101,114,38,113,117,111,116,
+59,32,112,108,97,121,101,100,32,98,121,97,110,100,32,101,97,114,108,121,60,47,99
+,101,110,116,101,114,62,102,114,111,109,32,116,104,105,115,116,104,101,32,116,
+104,114,101,101,112,111,119,101,114,32,97,110,100,111,102,32,38,113,117,111,116,
+59,105,110,110,101,114,72,84,77,76,60,97,32,104,114,101,102,61,34,121,58,105,110
+,108,105,110,101,59,67,104,117,114,99,104,32,111,102,116,104,101,32,101,118,101,
+110,116,118,101,114,121,32,104,105,103,104,111,102,102,105,99,105,97,108,32,45,
+104,101,105,103,104,116,58,32,99,111,110,116,101,110,116,61,34,47,99,103,105,45,
+98,105,110,47,116,111,32,99,114,101,97,116,101,97,102,114,105,107,97,97,110,115,
+101,115,112,101,114,97,110,116,111,102,114,97,110,195,167,97,105,115,108,97,116,
+118,105,101,197,161,117,108,105,101,116,117,118,105,197,179,196,140,101,197,161,
+116,105,110,97,196,141,101,197,161,116,105,110,97,224,185,132,224,184,151,224,
+184,162,230,151,165,230,156,172,232,170,158,231,174,128,228,189,147,229,173,151,
+231,185,129,233,171,148,229,173,151,237,149,156,234,181,173,236,150,180,228,184,
+186,228,187,128,228,185,136,232,174,161,231,174,151,230,156,186,231,172,148,232,
+174,176,230,156,172,232,168,142,232,171,150,229,141,128,230,156,141,229,138,161,
+229,153,168,228,186,146,232,129,148,231,189,145,230,136,191,229,156,176,228,186,
+167,228,191,177,228,185,144,233,131,168,229,135,186,231,137,136,231,164,190,230,
+142,146,232,161,140,230,166,156,233,131,168,232,144,189,230,160,188,232,191,155,
+228,184,128,230,173,165,230,148,175,228,187,152,229,174,157,233,170,140,232,175,
+129,231,160,129,229,167,148,229,145,152,228,188,154,230,149,176,230,141,174,229,
+186,147,230,182,136,232,180,185,232,128,133,229,138,158,229,133,172,229,174,164,
+232,174,168,232,174,186,229,140,186,230,183,177,229,156,179,229,184,130,230,146,
+173,230,148,190,229,153,168,229,140,151,228,186,172,229,184,130,229,164,167,229,
+173,166,231,148,159,232,182,138,230,157,165,232,182,138,231,174,161,231,144,134,
+229,145,152,228,191,161,230,129,175,231,189,145,115,101,114,118,105,99,105,111,
+115,97,114,116,195,173,99,117,108,111,97,114,103,101,110,116,105,110,97,98,97,
+114,99,101,108,111,110,97,99,117,97,108,113,117,105,101,114,112,117,98,108,105,
+99,97,100,111,112,114,111,100,117,99,116,111,115,112,111,108,195,173,116,105,99,
+97,114,101,115,112,117,101,115,116,97,119,105,107,105,112,101,100,105,97,115,105
+,103,117,105,101,110,116,101,98,195,186,115,113,117,101,100,97,99,111,109,117,
+110,105,100,97,100,115,101,103,117,114,105,100,97,100,112,114,105,110,99,105,112
+,97,108,112,114,101,103,117,110,116,97,115,99,111,110,116,101,110,105,100,111,
+114,101,115,112,111,110,100,101,114,118,101,110,101,122,117,101,108,97,112,114,
+111,98,108,101,109,97,115,100,105,99,105,101,109,98,114,101,114,101,108,97,99,
+105,195,179,110,110,111,118,105,101,109,98,114,101,115,105,109,105,108,97,114,
+101,115,112,114,111,121,101,99,116,111,115,112,114,111,103,114,97,109,97,115,105
+,110,115,116,105,116,117,116,111,97,99,116,105,118,105,100,97,100,101,110,99,117
+,101,110,116,114,97,101,99,111,110,111,109,195,173,97,105,109,195,161,103,101,
+110,101,115,99,111,110,116,97,99,116,97,114,100,101,115,99,97,114,103,97,114,110
+,101,99,101,115,97,114,105,111,97,116,101,110,99,105,195,179,110,116,101,108,195
+,169,102,111,110,111,99,111,109,105,115,105,195,179,110,99,97,110,99,105,111,110
+,101,115,99,97,112,97,99,105,100,97,100,101,110,99,111,110,116,114,97,114,97,110
+,195,161,108,105,115,105,115,102,97,118,111,114,105,116,111,115,116,195,169,114,
+109,105,110,111,115,112,114,111,118,105,110,99,105,97,101,116,105,113,117,101,
+116,97,115,101,108,101,109,101,110,116,111,115,102,117,110,99,105,111,110,101,
+115,114,101,115,117,108,116,97,100,111,99,97,114,195,161,99,116,101,114,112,114,
+111,112,105,101,100,97,100,112,114,105,110,99,105,112,105,111,110,101,99,101,115
+,105,100,97,100,109,117,110,105,99,105,112,97,108,99,114,101,97,99,105,195,179,
+110,100,101,115,99,97,114,103,97,115,112,114,101,115,101,110,99,105,97,99,111,
+109,101,114,99,105,97,108,111,112,105,110,105,111,110,101,115,101,106,101,114,99
+,105,99,105,111,101,100,105,116,111,114,105,97,108,115,97,108,97,109,97,110,99,
+97,103,111,110,122,195,161,108,101,122,100,111,99,117,109,101,110,116,111,112,
+101,108,195,173,99,117,108,97,114,101,99,105,101,110,116,101,115,103,101,110,101
+,114,97,108,101,115,116,97,114,114,97,103,111,110,97,112,114,195,161,99,116,105,
+99,97,110,111,118,101,100,97,100,101,115,112,114,111,112,117,101,115,116,97,112,
+97,99,105,101,110,116,101,115,116,195,169,99,110,105,99,97,115,111,98,106,101,
+116,105,118,111,115,99,111,110,116,97,99,116,111,115,224,164,174,224,165,135,224
+,164,130,224,164,178,224,164,191,224,164,143,224,164,185,224,165,136,224,164,130
+,224,164,151,224,164,175,224,164,190,224,164,184,224,164,190,224,164,165,224,164
+,143,224,164,181,224,164,130,224,164,176,224,164,185,224,165,135,224,164,149,224
+,165,139,224,164,136,224,164,149,224,165,129,224,164,155,224,164,176,224,164,185
+,224,164,190,224,164,172,224,164,190,224,164,166,224,164,149,224,164,185,224,164
+,190,224,164,184,224,164,173,224,165,128,224,164,185,224,165,129,224,164,143,224
+,164,176,224,164,185,224,165,128,224,164,174,224,165,136,224,164,130,224,164,166
+,224,164,191,224,164,168,224,164,172,224,164,190,224,164,164,100,105,112,108,111
+,100,111,99,115,224,164,184,224,164,174,224,164,175,224,164,176,224,165,130,224,
+164,170,224,164,168,224,164,190,224,164,174,224,164,170,224,164,164,224,164,190,
+224,164,171,224,164,191,224,164,176,224,164,148,224,164,184,224,164,164,224,164,
+164,224,164,176,224,164,185,224,164,178,224,165,139,224,164,151,224,164,185,224,
+165,129,224,164,134,224,164,172,224,164,190,224,164,176,224,164,166,224,165,135,
+224,164,182,224,164,185,224,165,129,224,164,136,224,164,150,224,165,135,224,164,
+178,224,164,175,224,164,166,224,164,191,224,164,149,224,164,190,224,164,174,224,
+164,181,224,165,135,224,164,172,224,164,164,224,165,128,224,164,168,224,164,172,
+224,165,128,224,164,154,224,164,174,224,165,140,224,164,164,224,164,184,224,164,
+190,224,164,178,224,164,178,224,165,135,224,164,150,224,164,156,224,165,137,224,
+164,172,224,164,174,224,164,166,224,164,166,224,164,164,224,164,165,224,164,190,
+224,164,168,224,164,185,224,165,128,224,164,182,224,164,185,224,164,176,224,164,
+133,224,164,178,224,164,151,224,164,149,224,164,173,224,165,128,224,164,168,224,
+164,151,224,164,176,224,164,170,224,164,190,224,164,184,224,164,176,224,164,190,
+224,164,164,224,164,149,224,164,191,224,164,143,224,164,137,224,164,184,224,165,
+135,224,164,151,224,164,175,224,165,128,224,164,185,224,165,130,224,164,129,224,
+164,134,224,164,151,224,165,135,224,164,159,224,165,128,224,164,174,224,164,150,
+224,165,139,224,164,156,224,164,149,224,164,190,224,164,176,224,164,133,224,164,
+173,224,165,128,224,164,151,224,164,175,224,165,135,224,164,164,224,165,129,224,
+164,174,224,164,181,224,165,139,224,164,159,224,164,166,224,165,135,224,164,130,
+224,164,133,224,164,151,224,164,176,224,164,144,224,164,184,224,165,135,224,164,
+174,224,165,135,224,164,178,224,164,178,224,164,151,224,164,190,224,164,185,224,
+164,190,224,164,178,224,164,138,224,164,170,224,164,176,224,164,154,224,164,190,
+224,164,176,224,164,144,224,164,184,224,164,190,224,164,166,224,165,135,224,164,
+176,224,164,156,224,164,191,224,164,184,224,164,166,224,164,191,224,164,178,224,
+164,172,224,164,130,224,164,166,224,164,172,224,164,168,224,164,190,224,164,185,
+224,165,130,224,164,130,224,164,178,224,164,190,224,164,150,224,164,156,224,165,
+128,224,164,164,224,164,172,224,164,159,224,164,168,224,164,174,224,164,191,224,
+164,178,224,164,135,224,164,184,224,165,135,224,164,134,224,164,168,224,165,135,
+224,164,168,224,164,175,224,164,190,224,164,149,224,165,129,224,164,178,224,164,
+178,224,165,137,224,164,151,224,164,173,224,164,190,224,164,151,224,164,176,224,
+165,135,224,164,178,224,164,156,224,164,151,224,164,185,224,164,176,224,164,190,
+224,164,174,224,164,178,224,164,151,224,165,135,224,164,170,224,165,135,224,164,
+156,224,164,185,224,164,190,224,164,165,224,164,135,224,164,184,224,165,128,224,
+164,184,224,164,185,224,165,128,224,164,149,224,164,178,224,164,190,224,164,160,
+224,165,128,224,164,149,224,164,185,224,164,190,224,164,129,224,164,166,224,165,
+130,224,164,176,224,164,164,224,164,185,224,164,164,224,164,184,224,164,190,224,
+164,164,224,164,175,224,164,190,224,164,166,224,164,134,224,164,175,224,164,190,
+224,164,170,224,164,190,224,164,149,224,164,149,224,165,140,224,164,168,224,164,
+182,224,164,190,224,164,174,224,164,166,224,165,135,224,164,150,224,164,175,224,
+164,185,224,165,128,224,164,176,224,164,190,224,164,175,224,164,150,224,165,129,
+224,164,166,224,164,178,224,164,151,224,165,128,99,97,116,101,103,111,114,105,
+101,115,101,120,112,101,114,105,101,110,99,101,60,47,116,105,116,108,101,62,13,
+10,67,111,112,121,114,105,103,104,116,32,106,97,118,97,115,99,114,105,112,116,99
+,111,110,100,105,116,105,111,110,115,101,118,101,114,121,116,104,105,110,103,60,
+112,32,99,108,97,115,115,61,34,116,101,99,104,110,111,108,111,103,121,98,97,99,
+107,103,114,111,117,110,100,60,97,32,99,108,97,115,115,61,34,109,97,110,97,103,
+101,109,101,110,116,38,99,111,112,121,59,32,50,48,49,106,97,118,97,83,99,114,105
+,112,116,99,104,97,114,97,99,116,101,114,115,98,114,101,97,100,99,114,117,109,98
+,116,104,101,109,115,101,108,118,101,115,104,111,114,105,122,111,110,116,97,108,
+103,111,118,101,114,110,109,101,110,116,67,97,108,105,102,111,114,110,105,97,97,
+99,116,105,118,105,116,105,101,115,100,105,115,99,111,118,101,114,101,100,78,97,
+118,105,103,97,116,105,111,110,116,114,97,110,115,105,116,105,111,110,99,111,110
+,110,101,99,116,105,111,110,110,97,118,105,103,97,116,105,111,110,97,112,112,101
+,97,114,97,110,99,101,60,47,116,105,116,108,101,62,60,109,99,104,101,99,107,98,
+111,120,34,32,116,101,99,104,110,105,113,117,101,115,112,114,111,116,101,99,116,
+105,111,110,97,112,112,97,114,101,110,116,108,121,97,115,32,119,101,108,108,32,
+97,115,117,110,116,39,44,32,39,85,65,45,114,101,115,111,108,117,116,105,111,110,
+111,112,101,114,97,116,105,111,110,115,116,101,108,101,118,105,115,105,111,110,
+116,114,97,110,115,108,97,116,101,100,87,97,115,104,105,110,103,116,111,110,110,
+97,118,105,103,97,116,111,114,46,32,61,32,119,105,110,100,111,119,46,105,109,112
+,114,101,115,115,105,111,110,38,108,116,59,98,114,38,103,116,59,108,105,116,101,
+114,97,116,117,114,101,112,111,112,117,108,97,116,105,111,110,98,103,99,111,108,
+111,114,61,34,35,101,115,112,101,99,105,97,108,108,121,32,99,111,110,116,101,110
+,116,61,34,112,114,111,100,117,99,116,105,111,110,110,101,119,115,108,101,116,
+116,101,114,112,114,111,112,101,114,116,105,101,115,100,101,102,105,110,105,116,
+105,111,110,108,101,97,100,101,114,115,104,105,112,84,101,99,104,110,111,108,111
+,103,121,80,97,114,108,105,97,109,101,110,116,99,111,109,112,97,114,105,115,111,
+110,117,108,32,99,108,97,115,115,61,34,46,105,110,100,101,120,79,102,40,34,99,
+111,110,99,108,117,115,105,111,110,100,105,115,99,117,115,115,105,111,110,99,111
+,109,112,111,110,101,110,116,115,98,105,111,108,111,103,105,99,97,108,82,101,118
+,111,108,117,116,105,111,110,95,99,111,110,116,97,105,110,101,114,117,110,100,
+101,114,115,116,111,111,100,110,111,115,99,114,105,112,116,62,60,112,101,114,109
+,105,115,115,105,111,110,101,97,99,104,32,111,116,104,101,114,97,116,109,111,115
+,112,104,101,114,101,32,111,110,102,111,99,117,115,61,34,60,102,111,114,109,32,
+105,100,61,34,112,114,111,99,101,115,115,105,110,103,116,104,105,115,46,118,97,
+108,117,101,103,101,110,101,114,97,116,105,111,110,67,111,110,102,101,114,101,
+110,99,101,115,117,98,115,101,113,117,101,110,116,119,101,108,108,45,107,110,111
+,119,110,118,97,114,105,97,116,105,111,110,115,114,101,112,117,116,97,116,105,
+111,110,112,104,101,110,111,109,101,110,111,110,100,105,115,99,105,112,108,105,
+110,101,108,111,103,111,46,112,110,103,34,32,40,100,111,99,117,109,101,110,116,
+44,98,111,117,110,100,97,114,105,101,115,101,120,112,114,101,115,115,105,111,110
+,115,101,116,116,108,101,109,101,110,116,66,97,99,107,103,114,111,117,110,100,
+111,117,116,32,111,102,32,116,104,101,101,110,116,101,114,112,114,105,115,101,40
+,34,104,116,116,112,115,58,34,32,117,110,101,115,99,97,112,101,40,34,112,97,115,
+115,119,111,114,100,34,32,100,101,109,111,99,114,97,116,105,99,60,97,32,104,114,
+101,102,61,34,47,119,114,97,112,112,101,114,34,62,10,109,101,109,98,101,114,115,
+104,105,112,108,105,110,103,117,105,115,116,105,99,112,120,59,112,97,100,100,105
+,110,103,112,104,105,108,111,115,111,112,104,121,97,115,115,105,115,116,97,110,
+99,101,117,110,105,118,101,114,115,105,116,121,102,97,99,105,108,105,116,105,101
+,115,114,101,99,111,103,110,105,122,101,100,112,114,101,102,101,114,101,110,99,
+101,105,102,32,40,116,121,112,101,111,102,109,97,105,110,116,97,105,110,101,100,
+118,111,99,97,98,117,108,97,114,121,104,121,112,111,116,104,101,115,105,115,46,
+115,117,98,109,105,116,40,41,59,38,97,109,112,59,110,98,115,112,59,97,110,110,
+111,116,97,116,105,111,110,98,101,104,105,110,100,32,116,104,101,70,111,117,110,
+100,97,116,105,111,110,112,117,98,108,105,115,104,101,114,34,97,115,115,117,109,
+112,116,105,111,110,105,110,116,114,111,100,117,99,101,100,99,111,114,114,117,
+112,116,105,111,110,115,99,105,101,110,116,105,115,116,115,101,120,112,108,105,
+99,105,116,108,121,105,110,115,116,101,97,100,32,111,102,100,105,109,101,110,115
+,105,111,110,115,32,111,110,67,108,105,99,107,61,34,99,111,110,115,105,100,101,
+114,101,100,100,101,112,97,114,116,109,101,110,116,111,99,99,117,112,97,116,105,
+111,110,115,111,111,110,32,97,102,116,101,114,105,110,118,101,115,116,109,101,
+110,116,112,114,111,110,111,117,110,99,101,100,105,100,101,110,116,105,102,105,
+101,100,101,120,112,101,114,105,109,101,110,116,77,97,110,97,103,101,109,101,110
+,116,103,101,111,103,114,97,112,104,105,99,34,32,104,101,105,103,104,116,61,34,
+108,105,110,107,32,114,101,108,61,34,46,114,101,112,108,97,99,101,40,47,100,101,
+112,114,101,115,115,105,111,110,99,111,110,102,101,114,101,110,99,101,112,117,
+110,105,115,104,109,101,110,116,101,108,105,109,105,110,97,116,101,100,114,101,
+115,105,115,116,97,110,99,101,97,100,97,112,116,97,116,105,111,110,111,112,112,
+111,115,105,116,105,111,110,119,101,108,108,32,107,110,111,119,110,115,117,112,
+112,108,101,109,101,110,116,100,101,116,101,114,109,105,110,101,100,104,49,32,99
+,108,97,115,115,61,34,48,112,120,59,109,97,114,103,105,110,109,101,99,104,97,110
+,105,99,97,108,115,116,97,116,105,115,116,105,99,115,99,101,108,101,98,114,97,
+116,101,100,71,111,118,101,114,110,109,101,110,116,10,10,68,117,114,105,110,103,
+32,116,100,101,118,101,108,111,112,101,114,115,97,114,116,105,102,105,99,105,97,
+108,101,113,117,105,118,97,108,101,110,116,111,114,105,103,105,110,97,116,101,
+100,67,111,109,109,105,115,115,105,111,110,97,116,116,97,99,104,109,101,110,116,
+60,115,112,97,110,32,105,100,61,34,116,104,101,114,101,32,119,101,114,101,78,101
+,100,101,114,108,97,110,100,115,98,101,121,111,110,100,32,116,104,101,114,101,
+103,105,115,116,101,114,101,100,106,111,117,114,110,97,108,105,115,116,102,114,
+101,113,117,101,110,116,108,121,97,108,108,32,111,102,32,116,104,101,108,97,110,
+103,61,34,101,110,34,32,60,47,115,116,121,108,101,62,13,10,97,98,115,111,108,117
+,116,101,59,32,115,117,112,112,111,114,116,105,110,103,101,120,116,114,101,109,
+101,108,121,32,109,97,105,110,115,116,114,101,97,109,60,47,115,116,114,111,110,
+103,62,32,112,111,112,117,108,97,114,105,116,121,101,109,112,108,111,121,109,101
+,110,116,60,47,116,97,98,108,101,62,13,10,32,99,111,108,115,112,97,110,61,34,60,
+47,102,111,114,109,62,10,32,32,99,111,110,118,101,114,115,105,111,110,97,98,111,
+117,116,32,116,104,101,32,60,47,112,62,60,47,100,105,118,62,105,110,116,101,103,
+114,97,116,101,100,34,32,108,97,110,103,61,34,101,110,80,111,114,116,117,103,117
+,101,115,101,115,117,98,115,116,105,116,117,116,101,105,110,100,105,118,105,100,
+117,97,108,105,109,112,111,115,115,105,98,108,101,109,117,108,116,105,109,101,
+100,105,97,97,108,109,111,115,116,32,97,108,108,112,120,32,115,111,108,105,100,
+32,35,97,112,97,114,116,32,102,114,111,109,115,117,98,106,101,99,116,32,116,111,
+105,110,32,69,110,103,108,105,115,104,99,114,105,116,105,99,105,122,101,100,101,
+120,99,101,112,116,32,102,111,114,103,117,105,100,101,108,105,110,101,115,111,
+114,105,103,105,110,97,108,108,121,114,101,109,97,114,107,97,98,108,101,116,104,
+101,32,115,101,99,111,110,100,104,50,32,99,108,97,115,115,61,34,60,97,32,116,105
+,116,108,101,61,34,40,105,110,99,108,117,100,105,110,103,112,97,114,97,109,101,
+116,101,114,115,112,114,111,104,105,98,105,116,101,100,61,32,34,104,116,116,112,
+58,47,47,100,105,99,116,105,111,110,97,114,121,112,101,114,99,101,112,116,105,
+111,110,114,101,118,111,108,117,116,105,111,110,102,111,117,110,100,97,116,105,
+111,110,112,120,59,104,101,105,103,104,116,58,115,117,99,99,101,115,115,102,117,
+108,115,117,112,112,111,114,116,101,114,115,109,105,108,108,101,110,110,105,117,
+109,104,105,115,32,102,97,116,104,101,114,116,104,101,32,38,113,117,111,116,59,
+110,111,45,114,101,112,101,97,116,59,99,111,109,109,101,114,99,105,97,108,105,
+110,100,117,115,116,114,105,97,108,101,110,99,111,117,114,97,103,101,100,97,109,
+111,117,110,116,32,111,102,32,117,110,111,102,102,105,99,105,97,108,101,102,102,
+105,99,105,101,110,99,121,82,101,102,101,114,101,110,99,101,115,99,111,111,114,
+100,105,110,97,116,101,100,105,115,99,108,97,105,109,101,114,101,120,112,101,100
+,105,116,105,111,110,100,101,118,101,108,111,112,105,110,103,99,97,108,99,117,
+108,97,116,101,100,115,105,109,112,108,105,102,105,101,100,108,101,103,105,116,
+105,109,97,116,101,115,117,98,115,116,114,105,110,103,40,48,34,32,99,108,97,115,
+115,61,34,99,111,109,112,108,101,116,101,108,121,105,108,108,117,115,116,114,97,
+116,101,102,105,118,101,32,121,101,97,114,115,105,110,115,116,114,117,109,101,
+110,116,80,117,98,108,105,115,104,105,110,103,49,34,32,99,108,97,115,115,61,34,
+112,115,121,99,104,111,108,111,103,121,99,111,110,102,105,100,101,110,99,101,110
+,117,109,98,101,114,32,111,102,32,97,98,115,101,110,99,101,32,111,102,102,111,99
+,117,115,101,100,32,111,110,106,111,105,110,101,100,32,116,104,101,115,116,114,
+117,99,116,117,114,101,115,112,114,101,118,105,111,117,115,108,121,62,60,47,105,
+102,114,97,109,101,62,111,110,99,101,32,97,103,97,105,110,98,117,116,32,114,97,
+116,104,101,114,105,109,109,105,103,114,97,110,116,115,111,102,32,99,111,117,114
+,115,101,44,97,32,103,114,111,117,112,32,111,102,76,105,116,101,114,97,116,117,
+114,101,85,110,108,105,107,101,32,116,104,101,60,47,97,62,38,110,98,115,112,59,
+10,102,117,110,99,116,105,111,110,32,105,116,32,119,97,115,32,116,104,101,67,111
+,110,118,101,110,116,105,111,110,97,117,116,111,109,111,98,105,108,101,80,114,
+111,116,101,115,116,97,110,116,97,103,103,114,101,115,115,105,118,101,97,102,116
+,101,114,32,116,104,101,32,83,105,109,105,108,97,114,108,121,44,34,32,47,62,60,
+47,100,105,118,62,99,111,108,108,101,99,116,105,111,110,13,10,102,117,110,99,116
+,105,111,110,118,105,115,105,98,105,108,105,116,121,116,104,101,32,117,115,101,
+32,111,102,118,111,108,117,110,116,101,101,114,115,97,116,116,114,97,99,116,105,
+111,110,117,110,100,101,114,32,116,104,101,32,116,104,114,101,97,116,101,110,101
+,100,42,60,33,91,67,68,65,84,65,91,105,109,112,111,114,116,97,110,99,101,105,110
+,32,103,101,110,101,114,97,108,116,104,101,32,108,97,116,116,101,114,60,47,102,
+111,114,109,62,10,60,47,46,105,110,100,101,120,79,102,40,39,105,32,61,32,48,59,
+32,105,32,60,100,105,102,102,101,114,101,110,99,101,100,101,118,111,116,101,100,
+32,116,111,116,114,97,100,105,116,105,111,110,115,115,101,97,114,99,104,32,102,
+111,114,117,108,116,105,109,97,116,101,108,121,116,111,117,114,110,97,109,101,
+110,116,97,116,116,114,105,98,117,116,101,115,115,111,45,99,97,108,108,101,100,
+32,125,10,60,47,115,116,121,108,101,62,101,118,97,108,117,97,116,105,111,110,101
+,109,112,104,97,115,105,122,101,100,97,99,99,101,115,115,105,98,108,101,60,47,
+115,101,99,116,105,111,110,62,115,117,99,99,101,115,115,105,111,110,97,108,111,
+110,103,32,119,105,116,104,77,101,97,110,119,104,105,108,101,44,105,110,100,117,
+115,116,114,105,101,115,60,47,97,62,60,98,114,32,47,62,104,97,115,32,98,101,99,
+111,109,101,97,115,112,101,99,116,115,32,111,102,84,101,108,101,118,105,115,105,
+111,110,115,117,102,102,105,99,105,101,110,116,98,97,115,107,101,116,98,97,108,
+108,98,111,116,104,32,115,105,100,101,115,99,111,110,116,105,110,117,105,110,103
+,97,110,32,97,114,116,105,99,108,101,60,105,109,103,32,97,108,116,61,34,97,100,
+118,101,110,116,117,114,101,115,104,105,115,32,109,111,116,104,101,114,109,97,
+110,99,104,101,115,116,101,114,112,114,105,110,99,105,112,108,101,115,112,97,114
+,116,105,99,117,108,97,114,99,111,109,109,101,110,116,97,114,121,101,102,102,101
+,99,116,115,32,111,102,100,101,99,105,100,101,100,32,116,111,34,62,60,115,116,
+114,111,110,103,62,112,117,98,108,105,115,104,101,114,115,74,111,117,114,110,97,
+108,32,111,102,100,105,102,102,105,99,117,108,116,121,102,97,99,105,108,105,116,
+97,116,101,97,99,99,101,112,116,97,98,108,101,115,116,121,108,101,46,99,115,115,
+34,9,102,117,110,99,116,105,111,110,32,105,110,110,111,118,97,116,105,111,110,62
+,67,111,112,121,114,105,103,104,116,115,105,116,117,97,116,105,111,110,115,119,
+111,117,108,100,32,104,97,118,101,98,117,115,105,110,101,115,115,101,115,68,105,
+99,116,105,111,110,97,114,121,115,116,97,116,101,109,101,110,116,115,111,102,116
+,101,110,32,117,115,101,100,112,101,114,115,105,115,116,101,110,116,105,110,32,
+74,97,110,117,97,114,121,99,111,109,112,114,105,115,105,110,103,60,47,116,105,
+116,108,101,62,10,9,100,105,112,108,111,109,97,116,105,99,99,111,110,116,97,105,
+110,105,110,103,112,101,114,102,111,114,109,105,110,103,101,120,116,101,110,115,
+105,111,110,115,109,97,121,32,110,111,116,32,98,101,99,111,110,99,101,112,116,32
+,111,102,32,111,110,99,108,105,99,107,61,34,73,116,32,105,115,32,97,108,115,111,
+102,105,110,97,110,99,105,97,108,32,109,97,107,105,110,103,32,116,104,101,76,117
+,120,101,109,98,111,117,114,103,97,100,100,105,116,105,111,110,97,108,97,114,101
+,32,99,97,108,108,101,100,101,110,103,97,103,101,100,32,105,110,34,115,99,114,
+105,112,116,34,41,59,98,117,116,32,105,116,32,119,97,115,101,108,101,99,116,114,
+111,110,105,99,111,110,115,117,98,109,105,116,61,34,10,60,33,45,45,32,69,110,100
+,32,101,108,101,99,116,114,105,99,97,108,111,102,102,105,99,105,97,108,108,121,
+115,117,103,103,101,115,116,105,111,110,116,111,112,32,111,102,32,116,104,101,
+117,110,108,105,107,101,32,116,104,101,65,117,115,116,114,97,108,105,97,110,79,
+114,105,103,105,110,97,108,108,121,114,101,102,101,114,101,110,99,101,115,10,60,
+47,104,101,97,100,62,13,10,114,101,99,111,103,110,105,115,101,100,105,110,105,
+116,105,97,108,105,122,101,108,105,109,105,116,101,100,32,116,111,65,108,101,120
+,97,110,100,114,105,97,114,101,116,105,114,101,109,101,110,116,65,100,118,101,
+110,116,117,114,101,115,102,111,117,114,32,121,101,97,114,115,10,10,38,108,116,
+59,33,45,45,32,105,110,99,114,101,97,115,105,110,103,100,101,99,111,114,97,116,
+105,111,110,104,51,32,99,108,97,115,115,61,34,111,114,105,103,105,110,115,32,111
+,102,111,98,108,105,103,97,116,105,111,110,114,101,103,117,108,97,116,105,111,
+110,99,108,97,115,115,105,102,105,101,100,40,102,117,110,99,116,105,111,110,40,
+97,100,118,97,110,116,97,103,101,115,98,101,105,110,103,32,116,104,101,32,104,
+105,115,116,111,114,105,97,110,115,60,98,97,115,101,32,104,114,101,102,114,101,
+112,101,97,116,101,100,108,121,119,105,108,108,105,110,103,32,116,111,99,111,109
+,112,97,114,97,98,108,101,100,101,115,105,103,110,97,116,101,100,110,111,109,105
+,110,97,116,105,111,110,102,117,110,99,116,105,111,110,97,108,105,110,115,105,
+100,101,32,116,104,101,114,101,118,101,108,97,116,105,111,110,101,110,100,32,111
+,102,32,116,104,101,115,32,102,111,114,32,116,104,101,32,97,117,116,104,111,114,
+105,122,101,100,114,101,102,117,115,101,100,32,116,111,116,97,107,101,32,112,108
+,97,99,101,97,117,116,111,110,111,109,111,117,115,99,111,109,112,114,111,109,105
+,115,101,112,111,108,105,116,105,99,97,108,32,114,101,115,116,97,117,114,97,110,
+116,116,119,111,32,111,102,32,116,104,101,70,101,98,114,117,97,114,121,32,50,113
+,117,97,108,105,116,121,32,111,102,115,119,102,111,98,106,101,99,116,46,117,110,
+100,101,114,115,116,97,110,100,110,101,97,114,108,121,32,97,108,108,119,114,105,
+116,116,101,110,32,98,121,105,110,116,101,114,118,105,101,119,115,34,32,119,105,
+100,116,104,61,34,49,119,105,116,104,100,114,97,119,97,108,102,108,111,97,116,58
+,108,101,102,116,105,115,32,117,115,117,97,108,108,121,99,97,110,100,105,100,97,
+116,101,115,110,101,119,115,112,97,112,101,114,115,109,121,115,116,101,114,105,
+111,117,115,68,101,112,97,114,116,109,101,110,116,98,101,115,116,32,107,110,111,
+119,110,112,97,114,108,105,97,109,101,110,116,115,117,112,112,114,101,115,115,
+101,100,99,111,110,118,101,110,105,101,110,116,114,101,109,101,109,98,101,114,
+101,100,100,105,102,102,101,114,101,110,116,32,115,121,115,116,101,109,97,116,
+105,99,104,97,115,32,108,101,100,32,116,111,112,114,111,112,97,103,97,110,100,97
+,99,111,110,116,114,111,108,108,101,100,105,110,102,108,117,101,110,99,101,115,
+99,101,114,101,109,111,110,105,97,108,112,114,111,99,108,97,105,109,101,100,80,
+114,111,116,101,99,116,105,111,110,108,105,32,99,108,97,115,115,61,34,83,99,105,
+101,110,116,105,102,105,99,99,108,97,115,115,61,34,110,111,45,116,114,97,100,101
+,109,97,114,107,115,109,111,114,101,32,116,104,97,110,32,119,105,100,101,115,112
+,114,101,97,100,76,105,98,101,114,97,116,105,111,110,116,111,111,107,32,112,108,
+97,99,101,100,97,121,32,111,102,32,116,104,101,97,115,32,108,111,110,103,32,97,
+115,105,109,112,114,105,115,111,110,101,100,65,100,100,105,116,105,111,110,97,
+108,10,60,104,101,97,100,62,10,60,109,76,97,98,111,114,97,116,111,114,121,78,111
+,118,101,109,98,101,114,32,50,101,120,99,101,112,116,105,111,110,115,73,110,100,
+117,115,116,114,105,97,108,118,97,114,105,101,116,121,32,111,102,102,108,111,97,
+116,58,32,108,101,102,68,117,114,105,110,103,32,116,104,101,97,115,115,101,115,
+115,109,101,110,116,104,97,118,101,32,98,101,101,110,32,100,101,97,108,115,32,
+119,105,116,104,83,116,97,116,105,115,116,105,99,115,111,99,99,117,114,114,101,
+110,99,101,47,117,108,62,60,47,100,105,118,62,99,108,101,97,114,102,105,120,34,
+62,116,104,101,32,112,117,98,108,105,99,109,97,110,121,32,121,101,97,114,115,119
+,104,105,99,104,32,119,101,114,101,111,118,101,114,32,116,105,109,101,44,115,121
+,110,111,110,121,109,111,117,115,99,111,110,116,101,110,116,34,62,10,112,114,101
+,115,117,109,97,98,108,121,104,105,115,32,102,97,109,105,108,121,117,115,101,114
+,65,103,101,110,116,46,117,110,101,120,112,101,99,116,101,100,105,110,99,108,117
+,100,105,110,103,32,99,104,97,108,108,101,110,103,101,100,97,32,109,105,110,111,
+114,105,116,121,117,110,100,101,102,105,110,101,100,34,98,101,108,111,110,103,
+115,32,116,111,116,97,107,101,110,32,102,114,111,109,105,110,32,79,99,116,111,98
+,101,114,112,111,115,105,116,105,111,110,58,32,115,97,105,100,32,116,111,32,98,
+101,114,101,108,105,103,105,111,117,115,32,70,101,100,101,114,97,116,105,111,110
+,32,114,111,119,115,112,97,110,61,34,111,110,108,121,32,97,32,102,101,119,109,
+101,97,110,116,32,116,104,97,116,108,101,100,32,116,111,32,116,104,101,45,45,62,
+13,10,60,100,105,118,32,60,102,105,101,108,100,115,101,116,62,65,114,99,104,98,
+105,115,104,111,112,32,99,108,97,115,115,61,34,110,111,98,101,105,110,103,32,117
+,115,101,100,97,112,112,114,111,97,99,104,101,115,112,114,105,118,105,108,101,
+103,101,115,110,111,115,99,114,105,112,116,62,10,114,101,115,117,108,116,115,32,
+105,110,109,97,121,32,98,101,32,116,104,101,69,97,115,116,101,114,32,101,103,103
+,109,101,99,104,97,110,105,115,109,115,114,101,97,115,111,110,97,98,108,101,80,
+111,112,117,108,97,116,105,111,110,67,111,108,108,101,99,116,105,111,110,115,101
+,108,101,99,116,101,100,34,62,110,111,115,99,114,105,112,116,62,13,47,105,110,
+100,101,120,46,112,104,112,97,114,114,105,118,97,108,32,111,102,45,106,115,115,
+100,107,39,41,41,59,109,97,110,97,103,101,100,32,116,111,105,110,99,111,109,112,
+108,101,116,101,99,97,115,117,97,108,116,105,101,115,99,111,109,112,108,101,116,
+105,111,110,67,104,114,105,115,116,105,97,110,115,83,101,112,116,101,109,98,101,
+114,32,97,114,105,116,104,109,101,116,105,99,112,114,111,99,101,100,117,114,101,
+115,109,105,103,104,116,32,104,97,118,101,80,114,111,100,117,99,116,105,111,110,
+105,116,32,97,112,112,101,97,114,115,80,104,105,108,111,115,111,112,104,121,102,
+114,105,101,110,100,115,104,105,112,108,101,97,100,105,110,103,32,116,111,103,
+105,118,105,110,103,32,116,104,101,116,111,119,97,114,100,32,116,104,101,103,117
+,97,114,97,110,116,101,101,100,100,111,99,117,109,101,110,116,101,100,99,111,108
+,111,114,58,35,48,48,48,118,105,100,101,111,32,103,97,109,101,99,111,109,109,105
+,115,115,105,111,110,114,101,102,108,101,99,116,105,110,103,99,104,97,110,103,
+101,32,116,104,101,97,115,115,111,99,105,97,116,101,100,115,97,110,115,45,115,
+101,114,105,102,111,110,107,101,121,112,114,101,115,115,59,32,112,97,100,100,105
+,110,103,58,72,101,32,119,97,115,32,116,104,101,117,110,100,101,114,108,121,105,
+110,103,116,121,112,105,99,97,108,108,121,32,44,32,97,110,100,32,116,104,101,32,
+115,114,99,69,108,101,109,101,110,116,115,117,99,99,101,115,115,105,118,101,115,
+105,110,99,101,32,116,104,101,32,115,104,111,117,108,100,32,98,101,32,110,101,
+116,119,111,114,107,105,110,103,97,99,99,111,117,110,116,105,110,103,117,115,101
+,32,111,102,32,116,104,101,108,111,119,101,114,32,116,104,97,110,115,104,111,119
+,115,32,116,104,97,116,60,47,115,112,97,110,62,10,9,9,99,111,109,112,108,97,105,
+110,116,115,99,111,110,116,105,110,117,111,117,115,113,117,97,110,116,105,116,
+105,101,115,97,115,116,114,111,110,111,109,101,114,104,101,32,100,105,100,32,110
+,111,116,100,117,101,32,116,111,32,105,116,115,97,112,112,108,105,101,100,32,116
+,111,97,110,32,97,118,101,114,97,103,101,101,102,102,111,114,116,115,32,116,111,
+116,104,101,32,102,117,116,117,114,101,97,116,116,101,109,112,116,32,116,111,84,
+104,101,114,101,102,111,114,101,44,99,97,112,97,98,105,108,105,116,121,82,101,
+112,117,98,108,105,99,97,110,119,97,115,32,102,111,114,109,101,100,69,108,101,99
+,116,114,111,110,105,99,107,105,108,111,109,101,116,101,114,115,99,104,97,108,
+108,101,110,103,101,115,112,117,98,108,105,115,104,105,110,103,116,104,101,32,
+102,111,114,109,101,114,105,110,100,105,103,101,110,111,117,115,100,105,114,101,
+99,116,105,111,110,115,115,117,98,115,105,100,105,97,114,121,99,111,110,115,112,
+105,114,97,99,121,100,101,116,97,105,108,115,32,111,102,97,110,100,32,105,110,32
+,116,104,101,97,102,102,111,114,100,97,98,108,101,115,117,98,115,116,97,110,99,
+101,115,114,101,97,115,111,110,32,102,111,114,99,111,110,118,101,110,116,105,111
+,110,105,116,101,109,116,121,112,101,61,34,97,98,115,111,108,117,116,101,108,121
+,115,117,112,112,111,115,101,100,108,121,114,101,109,97,105,110,101,100,32,97,97
+,116,116,114,97,99,116,105,118,101,116,114,97,118,101,108,108,105,110,103,115,
+101,112,97,114,97,116,101,108,121,102,111,99,117,115,101,115,32,111,110,101,108,
+101,109,101,110,116,97,114,121,97,112,112,108,105,99,97,98,108,101,102,111,117,
+110,100,32,116,104,97,116,115,116,121,108,101,115,104,101,101,116,109,97,110,117
+,115,99,114,105,112,116,115,116,97,110,100,115,32,102,111,114,32,110,111,45,114,
+101,112,101,97,116,40,115,111,109,101,116,105,109,101,115,67,111,109,109,101,114
+,99,105,97,108,105,110,32,65,109,101,114,105,99,97,117,110,100,101,114,116,97,
+107,101,110,113,117,97,114,116,101,114,32,111,102,97,110,32,101,120,97,109,112,
+108,101,112,101,114,115,111,110,97,108,108,121,105,110,100,101,120,46,112,104,
+112,63,60,47,98,117,116,116,111,110,62,10,112,101,114,99,101,110,116,97,103,101,
+98,101,115,116,45,107,110,111,119,110,99,114,101,97,116,105,110,103,32,97,34,32,
+100,105,114,61,34,108,116,114,76,105,101,117,116,101,110,97,110,116,10,60,100,
+105,118,32,105,100,61,34,116,104,101,121,32,119,111,117,108,100,97,98,105,108,
+105,116,121,32,111,102,109,97,100,101,32,117,112,32,111,102,110,111,116,101,100,
+32,116,104,97,116,99,108,101,97,114,32,116,104,97,116,97,114,103,117,101,32,116,
+104,97,116,116,111,32,97,110,111,116,104,101,114,99,104,105,108,100,114,101,110,
+39,115,112,117,114,112,111,115,101,32,111,102,102,111,114,109,117,108,97,116,101
+,100,98,97,115,101,100,32,117,112,111,110,116,104,101,32,114,101,103,105,111,110
+,115,117,98,106,101,99,116,32,111,102,112,97,115,115,101,110,103,101,114,115,112
+,111,115,115,101,115,115,105,111,110,46,10,10,73,110,32,116,104,101,32,66,101,
+102,111,114,101,32,116,104,101,97,102,116,101,114,119,97,114,100,115,99,117,114,
+114,101,110,116,108,121,32,97,99,114,111,115,115,32,116,104,101,115,99,105,101,
+110,116,105,102,105,99,99,111,109,109,117,110,105,116,121,46,99,97,112,105,116,
+97,108,105,115,109,105,110,32,71,101,114,109,97,110,121,114,105,103,104,116,45,
+119,105,110,103,116,104,101,32,115,121,115,116,101,109,83,111,99,105,101,116,121
+,32,111,102,112,111,108,105,116,105,99,105,97,110,100,105,114,101,99,116,105,111
+,110,58,119,101,110,116,32,111,110,32,116,111,114,101,109,111,118,97,108,32,111,
+102,32,78,101,119,32,89,111,114,107,32,97,112,97,114,116,109,101,110,116,115,105
+,110,100,105,99,97,116,105,111,110,100,117,114,105,110,103,32,116,104,101,117,
+110,108,101,115,115,32,116,104,101,104,105,115,116,111,114,105,99,97,108,104,97,
+100,32,98,101,101,110,32,97,100,101,102,105,110,105,116,105,118,101,105,110,103,
+114,101,100,105,101,110,116,97,116,116,101,110,100,97,110,99,101,67,101,110,116,
+101,114,32,102,111,114,112,114,111,109,105,110,101,110,99,101,114,101,97,100,121
+,83,116,97,116,101,115,116,114,97,116,101,103,105,101,115,98,117,116,32,105,110,
+32,116,104,101,97,115,32,112,97,114,116,32,111,102,99,111,110,115,116,105,116,
+117,116,101,99,108,97,105,109,32,116,104,97,116,108,97,98,111,114,97,116,111,114
+,121,99,111,109,112,97,116,105,98,108,101,102,97,105,108,117,114,101,32,111,102,
+44,32,115,117,99,104,32,97,115,32,98,101,103,97,110,32,119,105,116,104,117,115,
+105,110,103,32,116,104,101,32,116,111,32,112,114,111,118,105,100,101,102,101,97,
+116,117,114,101,32,111,102,102,114,111,109,32,119,104,105,99,104,47,34,32,99,108
+,97,115,115,61,34,103,101,111,108,111,103,105,99,97,108,115,101,118,101,114,97,
+108,32,111,102,100,101,108,105,98,101,114,97,116,101,105,109,112,111,114,116,97,
+110,116,32,104,111,108,100,115,32,116,104,97,116,105,110,103,38,113,117,111,116,
+59,32,118,97,108,105,103,110,61,116,111,112,116,104,101,32,71,101,114,109,97,110
+,111,117,116,115,105,100,101,32,111,102,110,101,103,111,116,105,97,116,101,100,
+104,105,115,32,99,97,114,101,101,114,115,101,112,97,114,97,116,105,111,110,105,
+100,61,34,115,101,97,114,99,104,119,97,115,32,99,97,108,108,101,100,116,104,101,
+32,102,111,117,114,116,104,114,101,99,114,101,97,116,105,111,110,111,116,104,101
+,114,32,116,104,97,110,112,114,101,118,101,110,116,105,111,110,119,104,105,108,
+101,32,116,104,101,32,101,100,117,99,97,116,105,111,110,44,99,111,110,110,101,99
+,116,105,110,103,97,99,99,117,114,97,116,101,108,121,119,101,114,101,32,98,117,
+105,108,116,119,97,115,32,107,105,108,108,101,100,97,103,114,101,101,109,101,110
+,116,115,109,117,99,104,32,109,111,114,101,32,68,117,101,32,116,111,32,116,104,
+101,119,105,100,116,104,58,32,49,48,48,115,111,109,101,32,111,116,104,101,114,75
+,105,110,103,100,111,109,32,111,102,116,104,101,32,101,110,116,105,114,101,102,
+97,109,111,117,115,32,102,111,114,116,111,32,99,111,110,110,101,99,116,111,98,
+106,101,99,116,105,118,101,115,116,104,101,32,70,114,101,110,99,104,112,101,111,
+112,108,101,32,97,110,100,102,101,97,116,117,114,101,100,34,62,105,115,32,115,97
+,105,100,32,116,111,115,116,114,117,99,116,117,114,97,108,114,101,102,101,114,
+101,110,100,117,109,109,111,115,116,32,111,102,116,101,110,97,32,115,101,112,97,
+114,97,116,101,45,62,10,60,100,105,118,32,105,100,32,79,102,102,105,99,105,97,
+108,32,119,111,114,108,100,119,105,100,101,46,97,114,105,97,45,108,97,98,101,108
+,116,104,101,32,112,108,97,110,101,116,97,110,100,32,105,116,32,119,97,115,100,
+34,32,118,97,108,117,101,61,34,108,111,111,107,105,110,103,32,97,116,98,101,110,
+101,102,105,99,105,97,108,97,114,101,32,105,110,32,116,104,101,109,111,110,105,
+116,111,114,105,110,103,114,101,112,111,114,116,101,100,108,121,116,104,101,32,
+109,111,100,101,114,110,119,111,114,107,105,110,103,32,111,110,97,108,108,111,
+119,101,100,32,116,111,119,104,101,114,101,32,116,104,101,32,105,110,110,111,118
+,97,116,105,118,101,60,47,97,62,60,47,100,105,118,62,115,111,117,110,100,116,114
+,97,99,107,115,101,97,114,99,104,70,111,114,109,116,101,110,100,32,116,111,32,98
+,101,105,110,112,117,116,32,105,100,61,34,111,112,101,110,105,110,103,32,111,102
+,114,101,115,116,114,105,99,116,101,100,97,100,111,112,116,101,100,32,98,121,97,
+100,100,114,101,115,115,105,110,103,116,104,101,111,108,111,103,105,97,110,109,
+101,116,104,111,100,115,32,111,102,118,97,114,105,97,110,116,32,111,102,67,104,
+114,105,115,116,105,97,110,32,118,101,114,121,32,108,97,114,103,101,97,117,116,
+111,109,111,116,105,118,101,98,121,32,102,97,114,32,116,104,101,114,97,110,103,
+101,32,102,114,111,109,112,117,114,115,117,105,116,32,111,102,102,111,108,108,
+111,119,32,116,104,101,98,114,111,117,103,104,116,32,116,111,105,110,32,69,110,
+103,108,97,110,100,97,103,114,101,101,32,116,104,97,116,97,99,99,117,115,101,100
+,32,111,102,99,111,109,101,115,32,102,114,111,109,112,114,101,118,101,110,116,
+105,110,103,100,105,118,32,115,116,121,108,101,61,104,105,115,32,111,114,32,104,
+101,114,116,114,101,109,101,110,100,111,117,115,102,114,101,101,100,111,109,32,
+111,102,99,111,110,99,101,114,110,105,110,103,48,32,49,101,109,32,49,101,109,59,
+66,97,115,107,101,116,98,97,108,108,47,115,116,121,108,101,46,99,115,115,97,110,
+32,101,97,114,108,105,101,114,101,118,101,110,32,97,102,116,101,114,47,34,32,116
+,105,116,108,101,61,34,46,99,111,109,47,105,110,100,101,120,116,97,107,105,110,
+103,32,116,104,101,112,105,116,116,115,98,117,114,103,104,99,111,110,116,101,110
+,116,34,62,13,60,115,99,114,105,112,116,62,40,102,116,117,114,110,101,100,32,111
+,117,116,104,97,118,105,110,103,32,116,104,101,60,47,115,112,97,110,62,13,10,32,
+111,99,99,97,115,105,111,110,97,108,98,101,99,97,117,115,101,32,105,116,115,116,
+97,114,116,101,100,32,116,111,112,104,121,115,105,99,97,108,108,121,62,60,47,100
+,105,118,62,10,32,32,99,114,101,97,116,101,100,32,98,121,67,117,114,114,101,110,
+116,108,121,44,32,98,103,99,111,108,111,114,61,34,116,97,98,105,110,100,101,120,
+61,34,100,105,115,97,115,116,114,111,117,115,65,110,97,108,121,116,105,99,115,32
+,97,108,115,111,32,104,97,115,32,97,62,60,100,105,118,32,105,100,61,34,60,47,115
+,116,121,108,101,62,10,60,99,97,108,108,101,100,32,102,111,114,115,105,110,103,
+101,114,32,97,110,100,46,115,114,99,32,61,32,34,47,47,118,105,111,108,97,116,105
+,111,110,115,116,104,105,115,32,112,111,105,110,116,99,111,110,115,116,97,110,
+116,108,121,105,115,32,108,111,99,97,116,101,100,114,101,99,111,114,100,105,110,
+103,115,100,32,102,114,111,109,32,116,104,101,110,101,100,101,114,108,97,110,100
+,115,112,111,114,116,117,103,117,195,170,115,215,162,215,145,215,168,215,153,215
+,170,217,129,216,167,216,177,216,179,219,140,100,101,115,97,114,114,111,108,108,
+111,99,111,109,101,110,116,97,114,105,111,101,100,117,99,97,99,105,195,179,110,
+115,101,112,116,105,101,109,98,114,101,114,101,103,105,115,116,114,97,100,111,
+100,105,114,101,99,99,105,195,179,110,117,98,105,99,97,99,105,195,179,110,112,
+117,98,108,105,99,105,100,97,100,114,101,115,112,117,101,115,116,97,115,114,101,
+115,117,108,116,97,100,111,115,105,109,112,111,114,116,97,110,116,101,114,101,
+115,101,114,118,97,100,111,115,97,114,116,195,173,99,117,108,111,115,100,105,102
+,101,114,101,110,116,101,115,115,105,103,117,105,101,110,116,101,115,114,101,112
+,195,186,98,108,105,99,97,115,105,116,117,97,99,105,195,179,110,109,105,110,105,
+115,116,101,114,105,111,112,114,105,118,97,99,105,100,97,100,100,105,114,101,99,
+116,111,114,105,111,102,111,114,109,97,99,105,195,179,110,112,111,98,108,97,99,
+105,195,179,110,112,114,101,115,105,100,101,110,116,101,99,111,110,116,101,110,
+105,100,111,115,97,99,99,101,115,111,114,105,111,115,116,101,99,104,110,111,114,
+97,116,105,112,101,114,115,111,110,97,108,101,115,99,97,116,101,103,111,114,195,
+173,97,101,115,112,101,99,105,97,108,101,115,100,105,115,112,111,110,105,98,108,
+101,97,99,116,117,97,108,105,100,97,100,114,101,102,101,114,101,110,99,105,97,
+118,97,108,108,97,100,111,108,105,100,98,105,98,108,105,111,116,101,99,97,114,
+101,108,97,99,105,111,110,101,115,99,97,108,101,110,100,97,114,105,111,112,111,
+108,195,173,116,105,99,97,115,97,110,116,101,114,105,111,114,101,115,100,111,99,
+117,109,101,110,116,111,115,110,97,116,117,114,97,108,101,122,97,109,97,116,101,
+114,105,97,108,101,115,100,105,102,101,114,101,110,99,105,97,101,99,111,110,195,
+179,109,105,99,97,116,114,97,110,115,112,111,114,116,101,114,111,100,114,195,173
+,103,117,101,122,112,97,114,116,105,99,105,112,97,114,101,110,99,117,101,110,116
+,114,97,110,100,105,115,99,117,115,105,195,179,110,101,115,116,114,117,99,116,
+117,114,97,102,117,110,100,97,99,105,195,179,110,102,114,101,99,117,101,110,116,
+101,115,112,101,114,109,97,110,101,110,116,101,116,111,116,97,108,109,101,110,
+116,101,208,188,208,190,208,182,208,189,208,190,208,177,209,131,208,180,208,181,
+209,130,208,188,208,190,208,182,208,181,209,130,208,178,209,128,208,181,208,188,
+209,143,209,130,208,176,208,186,208,182,208,181,209,135,209,130,208,190,208,177,
+209,139,208,177,208,190,208,187,208,181,208,181,208,190,209,135,208,181,208,189,
+209,140,209,141,209,130,208,190,208,179,208,190,208,186,208,190,208,179,208,180,
+208,176,208,191,208,190,209,129,208,187,208,181,208,178,209,129,208,181,208,179,
+208,190,209,129,208,176,208,185,209,130,208,181,209,135,208,181,209,128,208,181,
+208,183,208,188,208,190,208,179,209,131,209,130,209,129,208,176,208,185,209,130,
+208,176,208,182,208,184,208,183,208,189,208,184,208,188,208,181,208,182,208,180,
+209,131,208,177,209,131,208,180,209,131,209,130,208,159,208,190,208,184,209,129,
+208,186,208,183,208,180,208,181,209,129,209,140,208,178,208,184,208,180,208,181,
+208,190,209,129,208,178,209,143,208,183,208,184,208,189,209,131,208,182,208,189,
+208,190,209,129,208,178,208,190,208,181,208,185,208,187,209,142,208,180,208,181,
+208,185,208,191,208,190,209,128,208,189,208,190,208,188,208,189,208,190,208,179,
+208,190,208,180,208,181,209,130,208,181,208,185,209,129,208,178,208,190,208,184,
+209,133,208,191,209,128,208,176,208,178,208,176,209,130,208,176,208,186,208,190,
+208,185,208,188,208,181,209,129,209,130,208,190,208,184,208,188,208,181,208,181,
+209,130,208,182,208,184,208,183,208,189,209,140,208,190,208,180,208,189,208,190,
+208,185,208,187,209,131,209,135,209,136,208,181,208,191,208,181,209,128,208,181,
+208,180,209,135,208,176,209,129,209,130,208,184,209,135,208,176,209,129,209,130,
+209,140,209,128,208,176,208,177,208,190,209,130,208,189,208,190,208,178,209,139,
+209,133,208,191,209,128,208,176,208,178,208,190,209,129,208,190,208,177,208,190,
+208,185,208,191,208,190,209,130,208,190,208,188,208,188,208,181,208,189,208,181,
+208,181,209,135,208,184,209,129,208,187,208,181,208,189,208,190,208,178,209,139,
+208,181,209,131,209,129,208,187,209,131,208,179,208,190,208,186,208,190,208,187,
+208,190,208,189,208,176,208,183,208,176,208,180,209,130,208,176,208,186,208,190,
+208,181,209,130,208,190,208,179,208,180,208,176,208,191,208,190,209,135,209,130,
+208,184,208,159,208,190,209,129,208,187,208,181,209,130,208,176,208,186,208,184,
+208,181,208,189,208,190,208,178,209,139,208,185,209,129,209,130,208,190,208,184,
+209,130,209,130,208,176,208,186,208,184,209,133,209,129,209,128,208,176,208,183,
+209,131,208,161,208,176,208,189,208,186,209,130,209,132,208,190,209,128,209,131,
+208,188,208,154,208,190,208,179,208,180,208,176,208,186,208,189,208,184,208,179,
+208,184,209,129,208,187,208,190,208,178,208,176,208,189,208,176,209,136,208,181,
+208,185,208,189,208,176,208,185,209,130,208,184,209,129,208,178,208,190,208,184,
+208,188,209,129,208,178,209,143,208,183,209,140,208,187,209,142,208,177,208,190,
+208,185,209,135,208,176,209,129,209,130,208,190,209,129,209,128,208,181,208,180,
+208,184,208,154,209,128,208,190,208,188,208,181,208,164,208,190,209,128,209,131,
+208,188,209,128,209,139,208,189,208,186,208,181,209,129,209,130,208,176,208,187,
+208,184,208,191,208,190,208,184,209,129,208,186,209,130,209,139,209,129,209,143,
+209,135,208,188,208,181,209,129,209,143,209,134,209,134,208,181,208,189,209,130,
+209,128,209,130,209,128,209,131,208,180,208,176,209,129,208,176,208,188,209,139,
+209,133,209,128,209,139,208,189,208,186,208,176,208,157,208,190,208,178,209,139,
+208,185,209,135,208,176,209,129,208,190,208,178,208,188,208,181,209,129,209,130,
+208,176,209,132,208,184,208,187,209,140,208,188,208,188,208,176,209,128,209,130,
+208,176,209,129,209,130,209,128,208,176,208,189,208,188,208,181,209,129,209,130,
+208,181,209,130,208,181,208,186,209,129,209,130,208,189,208,176,209,136,208,184,
+209,133,208,188,208,184,208,189,209,131,209,130,208,184,208,188,208,181,208,189,
+208,184,208,184,208,188,208,181,209,142,209,130,208,189,208,190,208,188,208,181,
+209,128,208,179,208,190,209,128,208,190,208,180,209,129,208,176,208,188,208,190,
+208,188,209,141,209,130,208,190,208,188,209,131,208,186,208,190,208,189,209,134,
+208,181,209,129,208,178,208,190,208,181,208,188,208,186,208,176,208,186,208,190,
+208,185,208,144,209,128,209,133,208,184,208,178,217,133,217,134,216,170,216,175,
+217,137,216,165,216,177,216,179,216,167,217,132,216,177,216,179,216,167,217,132,
+216,169,216,167,217,132,216,185,216,167,217,133,217,131,216,170,216,168,217,135,
+216,167,216,168,216,177,216,167,217,133,216,172,216,167,217,132,217,138,217,136,
+217,133,216,167,217,132,216,181,217,136,216,177,216,172,216,175,217,138,216,175,
+216,169,216,167,217,132,216,185,216,182,217,136,216,165,216,182,216,167,217,129,
+216,169,216,167,217,132,217,130,216,179,217,133,216,167,217,132,216,185,216,167,
+216,168,216,170,216,173,217,133,217,138,217,132,217,133,217,132,217,129,216,167,
+216,170,217,133,217,132,216,170,217,130,217,137,216,170,216,185,216,175,217,138,
+217,132,216,167,217,132,216,180,216,185,216,177,216,163,216,174,216,168,216,167,
+216,177,216,170,216,183,217,136,217,138,216,177,216,185,217,132,217,138,217,131,
+217,133,216,165,216,177,217,129,216,167,217,130,216,183,217,132,216,168,216,167,
+216,170,216,167,217,132,217,132,216,186,216,169,216,170,216,177,216,170,217,138,
+216,168,216,167,217,132,217,134,216,167,216,179,216,167,217,132,216,180,217,138,
+216,174,217,133,217,134,216,170,216,175,217,138,216,167,217,132,216,185,216,177,
+216,168,216,167,217,132,217,130,216,181,216,181,216,167,217,129,217,132,216,167,
+217,133,216,185,217,132,217,138,217,135,216,167,216,170,216,173,216,175,217,138,
+216,171,216,167,217,132,217,132,217,135,217,133,216,167,217,132,216,185,217,133,
+217,132,217,133,217,131,216,170,216,168,216,169,217,138,217,133,217,131,217,134,
+217,131,216,167,217,132,216,183,217,129,217,132,217,129,217,138,216,175,217,138,
+217,136,216,165,216,175,216,167,216,177,216,169,216,170,216,167,216,177,217,138,
+216,174,216,167,217,132,216,181,216,173,216,169,216,170,216,179,216,172,217,138,
+217,132,216,167,217,132,217,136,217,130,216,170,216,185,217,134,216,175,217,133,
+216,167,217,133,216,175,217,138,217,134,216,169,216,170,216,181,217,133,217,138,
+217,133,216,163,216,177,216,180,217,138,217,129,216,167,217,132,216,176,217,138,
+217,134,216,185,216,177,216,168,217,138,216,169,216,168,217,136,216,167,216,168,
+216,169,216,163,217,132,216,185,216,167,216,168,216,167,217,132,216,179,217,129,
+216,177,217,133,216,180,216,167,217,131,217,132,216,170,216,185,216,167,217,132,
+217,137,216,167,217,132,216,163,217,136,217,132,216,167,217,132,216,179,217,134,
+216,169,216,172,216,167,217,133,216,185,216,169,216,167,217,132,216,181,216,173,
+217,129,216,167,217,132,216,175,217,138,217,134,217,131,217,132,217,133,216,167,
+216,170,216,167,217,132,216,174,216,167,216,181,216,167,217,132,217,133,217,132,
+217,129,216,163,216,185,216,182,216,167,216,161,217,131,216,170,216,167,216,168,
+216,169,216,167,217,132,216,174,217,138,216,177,216,177,216,179,216,167,216,166,
+217,132,216,167,217,132,217,130,217,132,216,168,216,167,217,132,216,163,216,175,
+216,168,217,133,217,130,216,167,216,183,216,185,217,133,216,177,216,167,216,179,
+217,132,217,133,217,134,216,183,217,130,216,169,216,167,217,132,217,131,216,170,
+216,168,216,167,217,132,216,177,216,172,217,132,216,167,216,180,216,170,216,177,
+217,131,216,167,217,132,217,130,216,175,217,133,217,138,216,185,216,183,217,138,
+217,131,115,66,121,84,97,103,78,97,109,101,40,46,106,112,103,34,32,97,108,116,61
+,34,49,112,120,32,115,111,108,105,100,32,35,46,103,105,102,34,32,97,108,116,61,
+34,116,114,97,110,115,112,97,114,101,110,116,105,110,102,111,114,109,97,116,105,
+111,110,97,112,112,108,105,99,97,116,105,111,110,34,32,111,110,99,108,105,99,107
+,61,34,101,115,116,97,98,108,105,115,104,101,100,97,100,118,101,114,116,105,115,
+105,110,103,46,112,110,103,34,32,97,108,116,61,34,101,110,118,105,114,111,110,
+109,101,110,116,112,101,114,102,111,114,109,97,110,99,101,97,112,112,114,111,112
+,114,105,97,116,101,38,97,109,112,59,109,100,97,115,104,59,105,109,109,101,100,
+105,97,116,101,108,121,60,47,115,116,114,111,110,103,62,60,47,114,97,116,104,101
+,114,32,116,104,97,110,116,101,109,112,101,114,97,116,117,114,101,100,101,118,
+101,108,111,112,109,101,110,116,99,111,109,112,101,116,105,116,105,111,110,112,
+108,97,99,101,104,111,108,100,101,114,118,105,115,105,98,105,108,105,116,121,58,
+99,111,112,121,114,105,103,104,116,34,62,48,34,32,104,101,105,103,104,116,61,34,
+101,118,101,110,32,116,104,111,117,103,104,114,101,112,108,97,99,101,109,101,110
+,116,100,101,115,116,105,110,97,116,105,111,110,67,111,114,112,111,114,97,116,
+105,111,110,60,117,108,32,99,108,97,115,115,61,34,65,115,115,111,99,105,97,116,
+105,111,110,105,110,100,105,118,105,100,117,97,108,115,112,101,114,115,112,101,
+99,116,105,118,101,115,101,116,84,105,109,101,111,117,116,40,117,114,108,40,104,
+116,116,112,58,47,47,109,97,116,104,101,109,97,116,105,99,115,109,97,114,103,105
+,110,45,116,111,112,58,101,118,101,110,116,117,97,108,108,121,32,100,101,115,99,
+114,105,112,116,105,111,110,41,32,110,111,45,114,101,112,101,97,116,99,111,108,
+108,101,99,116,105,111,110,115,46,74,80,71,124,116,104,117,109,98,124,112,97,114
+,116,105,99,105,112,97,116,101,47,104,101,97,100,62,60,98,111,100,121,102,108,
+111,97,116,58,108,101,102,116,59,60,108,105,32,99,108,97,115,115,61,34,104,117,
+110,100,114,101,100,115,32,111,102,10,10,72,111,119,101,118,101,114,44,32,99,111
+,109,112,111,115,105,116,105,111,110,99,108,101,97,114,58,98,111,116,104,59,99,
+111,111,112,101,114,97,116,105,111,110,119,105,116,104,105,110,32,116,104,101,32
+,108,97,98,101,108,32,102,111,114,61,34,98,111,114,100,101,114,45,116,111,112,58
+,78,101,119,32,90,101,97,108,97,110,100,114,101,99,111,109,109,101,110,100,101,
+100,112,104,111,116,111,103,114,97,112,104,121,105,110,116,101,114,101,115,116,
+105,110,103,38,108,116,59,115,117,112,38,103,116,59,99,111,110,116,114,111,118,
+101,114,115,121,78,101,116,104,101,114,108,97,110,100,115,97,108,116,101,114,110
+,97,116,105,118,101,109,97,120,108,101,110,103,116,104,61,34,115,119,105,116,122
+,101,114,108,97,110,100,68,101,118,101,108,111,112,109,101,110,116,101,115,115,
+101,110,116,105,97,108,108,121,10,10,65,108,116,104,111,117,103,104,32,60,47,116
+,101,120,116,97,114,101,97,62,116,104,117,110,100,101,114,98,105,114,100,114,101
+,112,114,101,115,101,110,116,101,100,38,97,109,112,59,110,100,97,115,104,59,115,
+112,101,99,117,108,97,116,105,111,110,99,111,109,109,117,110,105,116,105,101,115
+,108,101,103,105,115,108,97,116,105,111,110,101,108,101,99,116,114,111,110,105,
+99,115,10,9,60,100,105,118,32,105,100,61,34,105,108,108,117,115,116,114,97,116,
+101,100,101,110,103,105,110,101,101,114,105,110,103,116,101,114,114,105,116,111,
+114,105,101,115,97,117,116,104,111,114,105,116,105,101,115,100,105,115,116,114,
+105,98,117,116,101,100,54,34,32,104,101,105,103,104,116,61,34,115,97,110,115,45,
+115,101,114,105,102,59,99,97,112,97,98,108,101,32,111,102,32,100,105,115,97,112,
+112,101,97,114,101,100,105,110,116,101,114,97,99,116,105,118,101,108,111,111,107
+,105,110,103,32,102,111,114,105,116,32,119,111,117,108,100,32,98,101,65,102,103,
+104,97,110,105,115,116,97,110,119,97,115,32,99,114,101,97,116,101,100,77,97,116,
+104,46,102,108,111,111,114,40,115,117,114,114,111,117,110,100,105,110,103,99,97,
+110,32,97,108,115,111,32,98,101,111,98,115,101,114,118,97,116,105,111,110,109,97
+,105,110,116,101,110,97,110,99,101,101,110,99,111,117,110,116,101,114,101,100,60
+,104,50,32,99,108,97,115,115,61,34,109,111,114,101,32,114,101,99,101,110,116,105
+,116,32,104,97,115,32,98,101,101,110,105,110,118,97,115,105,111,110,32,111,102,
+41,46,103,101,116,84,105,109,101,40,41,102,117,110,100,97,109,101,110,116,97,108
+,68,101,115,112,105,116,101,32,116,104,101,34,62,60,100,105,118,32,105,100,61,34
+,105,110,115,112,105,114,97,116,105,111,110,101,120,97,109,105,110,97,116,105,
+111,110,112,114,101,112,97,114,97,116,105,111,110,101,120,112,108,97,110,97,116,
+105,111,110,60,105,110,112,117,116,32,105,100,61,34,60,47,97,62,60,47,115,112,97
+,110,62,118,101,114,115,105,111,110,115,32,111,102,105,110,115,116,114,117,109,
+101,110,116,115,98,101,102,111,114,101,32,116,104,101,32,32,61,32,39,104,116,116
+,112,58,47,47,68,101,115,99,114,105,112,116,105,111,110,114,101,108,97,116,105,
+118,101,108,121,32,46,115,117,98,115,116,114,105,110,103,40,101,97,99,104,32,111
+,102,32,116,104,101,101,120,112,101,114,105,109,101,110,116,115,105,110,102,108,
+117,101,110,116,105,97,108,105,110,116,101,103,114,97,116,105,111,110,109,97,110
+,121,32,112,101,111,112,108,101,100,117,101,32,116,111,32,116,104,101,32,99,111,
+109,98,105,110,97,116,105,111,110,100,111,32,110,111,116,32,104,97,118,101,77,
+105,100,100,108,101,32,69,97,115,116,60,110,111,115,99,114,105,112,116,62,60,99,
+111,112,121,114,105,103,104,116,34,32,112,101,114,104,97,112,115,32,116,104,101,
+105,110,115,116,105,116,117,116,105,111,110,105,110,32,68,101,99,101,109,98,101,
+114,97,114,114,97,110,103,101,109,101,110,116,109,111,115,116,32,102,97,109,111,
+117,115,112,101,114,115,111,110,97,108,105,116,121,99,114,101,97,116,105,111,110
+,32,111,102,108,105,109,105,116,97,116,105,111,110,115,101,120,99,108,117,115,
+105,118,101,108,121,115,111,118,101,114,101,105,103,110,116,121,45,99,111,110,
+116,101,110,116,34,62,10,60,116,100,32,99,108,97,115,115,61,34,117,110,100,101,
+114,103,114,111,117,110,100,112,97,114,97,108,108,101,108,32,116,111,100,111,99,
+116,114,105,110,101,32,111,102,111,99,99,117,112,105,101,100,32,98,121,116,101,
+114,109,105,110,111,108,111,103,121,82,101,110,97,105,115,115,97,110,99,101,97,
+32,110,117,109,98,101,114,32,111,102,115,117,112,112,111,114,116,32,102,111,114,
+101,120,112,108,111,114,97,116,105,111,110,114,101,99,111,103,110,105,116,105,
+111,110,112,114,101,100,101,99,101,115,115,111,114,60,105,109,103,32,115,114,99,
+61,34,47,60,104,49,32,99,108,97,115,115,61,34,112,117,98,108,105,99,97,116,105,
+111,110,109,97,121,32,97,108,115,111,32,98,101,115,112,101,99,105,97,108,105,122
+,101,100,60,47,102,105,101,108,100,115,101,116,62,112,114,111,103,114,101,115,
+115,105,118,101,109,105,108,108,105,111,110,115,32,111,102,115,116,97,116,101,
+115,32,116,104,97,116,101,110,102,111,114,99,101,109,101,110,116,97,114,111,117,
+110,100,32,116,104,101,32,111,110,101,32,97,110,111,116,104,101,114,46,112,97,
+114,101,110,116,78,111,100,101,97,103,114,105,99,117,108,116,117,114,101,65,108,
+116,101,114,110,97,116,105,118,101,114,101,115,101,97,114,99,104,101,114,115,116
+,111,119,97,114,100,115,32,116,104,101,77,111,115,116,32,111,102,32,116,104,101,
+109,97,110,121,32,111,116,104,101,114,32,40,101,115,112,101,99,105,97,108,108,
+121,60,116,100,32,119,105,100,116,104,61,34,59,119,105,100,116,104,58,49,48,48,
+37,105,110,100,101,112,101,110,100,101,110,116,60,104,51,32,99,108,97,115,115,61
+,34,32,111,110,99,104,97,110,103,101,61,34,41,46,97,100,100,67,108,97,115,115,40
+,105,110,116,101,114,97,99,116,105,111,110,79,110,101,32,111,102,32,116,104,101,
+32,100,97,117,103,104,116,101,114,32,111,102,97,99,99,101,115,115,111,114,105,
+101,115,98,114,97,110,99,104,101,115,32,111,102,13,10,60,100,105,118,32,105,100,
+61,34,116,104,101,32,108,97,114,103,101,115,116,100,101,99,108,97,114,97,116,105
+,111,110,114,101,103,117,108,97,116,105,111,110,115,73,110,102,111,114,109,97,
+116,105,111,110,116,114,97,110,115,108,97,116,105,111,110,100,111,99,117,109,101
+,110,116,97,114,121,105,110,32,111,114,100,101,114,32,116,111,34,62,10,60,104,
+101,97,100,62,10,60,34,32,104,101,105,103,104,116,61,34,49,97,99,114,111,115,115
+,32,116,104,101,32,111,114,105,101,110,116,97,116,105,111,110,41,59,60,47,115,99
+,114,105,112,116,62,105,109,112,108,101,109,101,110,116,101,100,99,97,110,32,98,
+101,32,115,101,101,110,116,104,101,114,101,32,119,97,115,32,97,100,101,109,111,
+110,115,116,114,97,116,101,99,111,110,116,97,105,110,101,114,34,62,99,111,110,
+110,101,99,116,105,111,110,115,116,104,101,32,66,114,105,116,105,115,104,119,97,
+115,32,119,114,105,116,116,101,110,33,105,109,112,111,114,116,97,110,116,59,112,
+120,59,32,109,97,114,103,105,110,45,102,111,108,108,111,119,101,100,32,98,121,97
+,98,105,108,105,116,121,32,116,111,32,99,111,109,112,108,105,99,97,116,101,100,
+100,117,114,105,110,103,32,116,104,101,32,105,109,109,105,103,114,97,116,105,111
+,110,97,108,115,111,32,99,97,108,108,101,100,60,104,52,32,99,108,97,115,115,61,
+34,100,105,115,116,105,110,99,116,105,111,110,114,101,112,108,97,99,101,100,32,
+98,121,103,111,118,101,114,110,109,101,110,116,115,108,111,99,97,116,105,111,110
+,32,111,102,105,110,32,78,111,118,101,109,98,101,114,119,104,101,116,104,101,114
+,32,116,104,101,60,47,112,62,10,60,47,100,105,118,62,97,99,113,117,105,115,105,
+116,105,111,110,99,97,108,108,101,100,32,116,104,101,32,112,101,114,115,101,99,
+117,116,105,111,110,100,101,115,105,103,110,97,116,105,111,110,123,102,111,110,
+116,45,115,105,122,101,58,97,112,112,101,97,114,101,100,32,105,110,105,110,118,
+101,115,116,105,103,97,116,101,101,120,112,101,114,105,101,110,99,101,100,109,
+111,115,116,32,108,105,107,101,108,121,119,105,100,101,108,121,32,117,115,101,
+100,100,105,115,99,117,115,115,105,111,110,115,112,114,101,115,101,110,99,101,32
+,111,102,32,40,100,111,99,117,109,101,110,116,46,101,120,116,101,110,115,105,118
+,101,108,121,73,116,32,104,97,115,32,98,101,101,110,105,116,32,100,111,101,115,
+32,110,111,116,99,111,110,116,114,97,114,121,32,116,111,105,110,104,97,98,105,
+116,97,110,116,115,105,109,112,114,111,118,101,109,101,110,116,115,99,104,111,
+108,97,114,115,104,105,112,99,111,110,115,117,109,112,116,105,111,110,105,110,
+115,116,114,117,99,116,105,111,110,102,111,114,32,101,120,97,109,112,108,101,111
+,110,101,32,111,114,32,109,111,114,101,112,120,59,32,112,97,100,100,105,110,103,
+116,104,101,32,99,117,114,114,101,110,116,97,32,115,101,114,105,101,115,32,111,
+102,97,114,101,32,117,115,117,97,108,108,121,114,111,108,101,32,105,110,32,116,
+104,101,112,114,101,118,105,111,117,115,108,121,32,100,101,114,105,118,97,116,
+105,118,101,115,101,118,105,100,101,110,99,101,32,111,102,101,120,112,101,114,
+105,101,110,99,101,115,99,111,108,111,114,115,99,104,101,109,101,115,116,97,116,
+101,100,32,116,104,97,116,99,101,114,116,105,102,105,99,97,116,101,60,47,97,62,
+60,47,100,105,118,62,10,32,115,101,108,101,99,116,101,100,61,34,104,105,103,104,
+32,115,99,104,111,111,108,114,101,115,112,111,110,115,101,32,116,111,99,111,109,
+102,111,114,116,97,98,108,101,97,100,111,112,116,105,111,110,32,111,102,116,104,
+114,101,101,32,121,101,97,114,115,116,104,101,32,99,111,117,110,116,114,121,105,
+110,32,70,101,98,114,117,97,114,121,115,111,32,116,104,97,116,32,116,104,101,112
+,101,111,112,108,101,32,119,104,111,32,112,114,111,118,105,100,101,100,32,98,121
+,60,112,97,114,97,109,32,110,97,109,101,97,102,102,101,99,116,101,100,32,98,121,
+105,110,32,116,101,114,109,115,32,111,102,97,112,112,111,105,110,116,109,101,110
+,116,73,83,79,45,56,56,53,57,45,49,34,119,97,115,32,98,111,114,110,32,105,110,
+104,105,115,116,111,114,105,99,97,108,32,114,101,103,97,114,100,101,100,32,97,
+115,109,101,97,115,117,114,101,109,101,110,116,105,115,32,98,97,115,101,100,32,
+111,110,32,97,110,100,32,111,116,104,101,114,32,58,32,102,117,110,99,116,105,111
+,110,40,115,105,103,110,105,102,105,99,97,110,116,99,101,108,101,98,114,97,116,
+105,111,110,116,114,97,110,115,109,105,116,116,101,100,47,106,115,47,106,113,117
+,101,114,121,46,105,115,32,107,110,111,119,110,32,97,115,116,104,101,111,114,101
+,116,105,99,97,108,32,116,97,98,105,110,100,101,120,61,34,105,116,32,99,111,117,
+108,100,32,98,101,60,110,111,115,99,114,105,112,116,62,10,104,97,118,105,110,103
+,32,98,101,101,110,13,10,60,104,101,97,100,62,13,10,60,32,38,113,117,111,116,59,
+84,104,101,32,99,111,109,112,105,108,97,116,105,111,110,104,101,32,104,97,100,32
+,98,101,101,110,112,114,111,100,117,99,101,100,32,98,121,112,104,105,108,111,115
+,111,112,104,101,114,99,111,110,115,116,114,117,99,116,101,100,105,110,116,101,
+110,100,101,100,32,116,111,97,109,111,110,103,32,111,116,104,101,114,99,111,109,
+112,97,114,101,100,32,116,111,116,111,32,115,97,121,32,116,104,97,116,69,110,103
+,105,110,101,101,114,105,110,103,97,32,100,105,102,102,101,114,101,110,116,114,
+101,102,101,114,114,101,100,32,116,111,100,105,102,102,101,114,101,110,99,101,
+115,98,101,108,105,101,102,32,116,104,97,116,112,104,111,116,111,103,114,97,112,
+104,115,105,100,101,110,116,105,102,121,105,110,103,72,105,115,116,111,114,121,
+32,111,102,32,82,101,112,117,98,108,105,99,32,111,102,110,101,99,101,115,115,97,
+114,105,108,121,112,114,111,98,97,98,105,108,105,116,121,116,101,99,104,110,105,
+99,97,108,108,121,108,101,97,118,105,110,103,32,116,104,101,115,112,101,99,116,
+97,99,117,108,97,114,102,114,97,99,116,105,111,110,32,111,102,101,108,101,99,116
+,114,105,99,105,116,121,104,101,97,100,32,111,102,32,116,104,101,114,101,115,116
+,97,117,114,97,110,116,115,112,97,114,116,110,101,114,115,104,105,112,101,109,
+112,104,97,115,105,115,32,111,110,109,111,115,116,32,114,101,99,101,110,116,115,
+104,97,114,101,32,119,105,116,104,32,115,97,121,105,110,103,32,116,104,97,116,
+102,105,108,108,101,100,32,119,105,116,104,100,101,115,105,103,110,101,100,32,
+116,111,105,116,32,105,115,32,111,102,116,101,110,34,62,60,47,105,102,114,97,109
+,101,62,97,115,32,102,111,108,108,111,119,115,58,109,101,114,103,101,100,32,119,
+105,116,104,116,104,114,111,117,103,104,32,116,104,101,99,111,109,109,101,114,99
+,105,97,108,32,112,111,105,110,116,101,100,32,111,117,116,111,112,112,111,114,
+116,117,110,105,116,121,118,105,101,119,32,111,102,32,116,104,101,114,101,113,
+117,105,114,101,109,101,110,116,100,105,118,105,115,105,111,110,32,111,102,112,
+114,111,103,114,97,109,109,105,110,103,104,101,32,114,101,99,101,105,118,101,100
+,115,101,116,73,110,116,101,114,118,97,108,34,62,60,47,115,112,97,110,62,60,47,
+105,110,32,78,101,119,32,89,111,114,107,97,100,100,105,116,105,111,110,97,108,32
+,99,111,109,112,114,101,115,115,105,111,110,10,10,60,100,105,118,32,105,100,61,
+34,105,110,99,111,114,112,111,114,97,116,101,59,60,47,115,99,114,105,112,116,62,
+60,97,116,116,97,99,104,69,118,101,110,116,98,101,99,97,109,101,32,116,104,101,
+32,34,32,116,97,114,103,101,116,61,34,95,99,97,114,114,105,101,100,32,111,117,
+116,83,111,109,101,32,111,102,32,116,104,101,115,99,105,101,110,99,101,32,97,110
+,100,116,104,101,32,116,105,109,101,32,111,102,67,111,110,116,97,105,110,101,114
+,34,62,109,97,105,110,116,97,105,110,105,110,103,67,104,114,105,115,116,111,112,
+104,101,114,77,117,99,104,32,111,102,32,116,104,101,119,114,105,116,105,110,103,
+115,32,111,102,34,32,104,101,105,103,104,116,61,34,50,115,105,122,101,32,111,102
+,32,116,104,101,118,101,114,115,105,111,110,32,111,102,32,109,105,120,116,117,
+114,101,32,111,102,32,98,101,116,119,101,101,110,32,116,104,101,69,120,97,109,
+112,108,101,115,32,111,102,101,100,117,99,97,116,105,111,110,97,108,99,111,109,
+112,101,116,105,116,105,118,101,32,111,110,115,117,98,109,105,116,61,34,100,105,
+114,101,99,116,111,114,32,111,102,100,105,115,116,105,110,99,116,105,118,101,47,
+68,84,68,32,88,72,84,77,76,32,114,101,108,97,116,105,110,103,32,116,111,116,101,
+110,100,101,110,99,121,32,116,111,112,114,111,118,105,110,99,101,32,111,102,119,
+104,105,99,104,32,119,111,117,108,100,100,101,115,112,105,116,101,32,116,104,101
+,115,99,105,101,110,116,105,102,105,99,32,108,101,103,105,115,108,97,116,117,114
+,101,46,105,110,110,101,114,72,84,77,76,32,97,108,108,101,103,97,116,105,111,110
+,115,65,103,114,105,99,117,108,116,117,114,101,119,97,115,32,117,115,101,100,32,
+105,110,97,112,112,114,111,97,99,104,32,116,111,105,110,116,101,108,108,105,103,
+101,110,116,121,101,97,114,115,32,108,97,116,101,114,44,115,97,110,115,45,115,
+101,114,105,102,100,101,116,101,114,109,105,110,105,110,103,80,101,114,102,111,
+114,109,97,110,99,101,97,112,112,101,97,114,97,110,99,101,115,44,32,119,104,105,
+99,104,32,105,115,32,102,111,117,110,100,97,116,105,111,110,115,97,98,98,114,101
+,118,105,97,116,101,100,104,105,103,104,101,114,32,116,104,97,110,115,32,102,114
+,111,109,32,116,104,101,32,105,110,100,105,118,105,100,117,97,108,32,99,111,109,
+112,111,115,101,100,32,111,102,115,117,112,112,111,115,101,100,32,116,111,99,108
+,97,105,109,115,32,116,104,97,116,97,116,116,114,105,98,117,116,105,111,110,102,
+111,110,116,45,115,105,122,101,58,49,101,108,101,109,101,110,116,115,32,111,102,
+72,105,115,116,111,114,105,99,97,108,32,104,105,115,32,98,114,111,116,104,101,
+114,97,116,32,116,104,101,32,116,105,109,101,97,110,110,105,118,101,114,115,97,
+114,121,103,111,118,101,114,110,101,100,32,98,121,114,101,108,97,116,101,100,32,
+116,111,32,117,108,116,105,109,97,116,101,108,121,32,105,110,110,111,118,97,116,
+105,111,110,115,105,116,32,105,115,32,115,116,105,108,108,99,97,110,32,111,110,
+108,121,32,98,101,100,101,102,105,110,105,116,105,111,110,115,116,111,71,77,84,
+83,116,114,105,110,103,65,32,110,117,109,98,101,114,32,111,102,105,109,103,32,99
+,108,97,115,115,61,34,69,118,101,110,116,117,97,108,108,121,44,119,97,115,32,99,
+104,97,110,103,101,100,111,99,99,117,114,114,101,100,32,105,110,110,101,105,103,
+104,98,111,114,105,110,103,100,105,115,116,105,110,103,117,105,115,104,119,104,
+101,110,32,104,101,32,119,97,115,105,110,116,114,111,100,117,99,105,110,103,116,
+101,114,114,101,115,116,114,105,97,108,77,97,110,121,32,111,102,32,116,104,101,
+97,114,103,117,101,115,32,116,104,97,116,97,110,32,65,109,101,114,105,99,97,110,
+99,111,110,113,117,101,115,116,32,111,102,119,105,100,101,115,112,114,101,97,100
+,32,119,101,114,101,32,107,105,108,108,101,100,115,99,114,101,101,110,32,97,110,
+100,32,73,110,32,111,114,100,101,114,32,116,111,101,120,112,101,99,116,101,100,
+32,116,111,100,101,115,99,101,110,100,97,110,116,115,97,114,101,32,108,111,99,97
+,116,101,100,108,101,103,105,115,108,97,116,105,118,101,103,101,110,101,114,97,
+116,105,111,110,115,32,98,97,99,107,103,114,111,117,110,100,109,111,115,116,32,
+112,101,111,112,108,101,121,101,97,114,115,32,97,102,116,101,114,116,104,101,114
+,101,32,105,115,32,110,111,116,104,101,32,104,105,103,104,101,115,116,102,114,
+101,113,117,101,110,116,108,121,32,116,104,101,121,32,100,111,32,110,111,116,97,
+114,103,117,101,100,32,116,104,97,116,115,104,111,119,101,100,32,116,104,97,116,
+112,114,101,100,111,109,105,110,97,110,116,116,104,101,111,108,111,103,105,99,97
+,108,98,121,32,116,104,101,32,116,105,109,101,99,111,110,115,105,100,101,114,105
+,110,103,115,104,111,114,116,45,108,105,118,101,100,60,47,115,112,97,110,62,60,
+47,97,62,99,97,110,32,98,101,32,117,115,101,100,118,101,114,121,32,108,105,116,
+116,108,101,111,110,101,32,111,102,32,116,104,101,32,104,97,100,32,97,108,114,
+101,97,100,121,105,110,116,101,114,112,114,101,116,101,100,99,111,109,109,117,
+110,105,99,97,116,101,102,101,97,116,117,114,101,115,32,111,102,103,111,118,101,
+114,110,109,101,110,116,44,60,47,110,111,115,99,114,105,112,116,62,101,110,116,
+101,114,101,100,32,116,104,101,34,32,104,101,105,103,104,116,61,34,51,73,110,100
+,101,112,101,110,100,101,110,116,112,111,112,117,108,97,116,105,111,110,115,108,
+97,114,103,101,45,115,99,97,108,101,46,32,65,108,116,104,111,117,103,104,32,117,
+115,101,100,32,105,110,32,116,104,101,100,101,115,116,114,117,99,116,105,111,110
+,112,111,115,115,105,98,105,108,105,116,121,115,116,97,114,116,105,110,103,32,
+105,110,116,119,111,32,111,114,32,109,111,114,101,101,120,112,114,101,115,115,
+105,111,110,115,115,117,98,111,114,100,105,110,97,116,101,108,97,114,103,101,114
+,32,116,104,97,110,104,105,115,116,111,114,121,32,97,110,100,60,47,111,112,116,
+105,111,110,62,13,10,67,111,110,116,105,110,101,110,116,97,108,101,108,105,109,
+105,110,97,116,105,110,103,119,105,108,108,32,110,111,116,32,98,101,112,114,97,
+99,116,105,99,101,32,111,102,105,110,32,102,114,111,110,116,32,111,102,115,105,
+116,101,32,111,102,32,116,104,101,101,110,115,117,114,101,32,116,104,97,116,116,
+111,32,99,114,101,97,116,101,32,97,109,105,115,115,105,115,115,105,112,112,105,
+112,111,116,101,110,116,105,97,108,108,121,111,117,116,115,116,97,110,100,105,
+110,103,98,101,116,116,101,114,32,116,104,97,110,119,104,97,116,32,105,115,32,
+110,111,119,115,105,116,117,97,116,101,100,32,105,110,109,101,116,97,32,110,97,
+109,101,61,34,84,114,97,100,105,116,105,111,110,97,108,115,117,103,103,101,115,
+116,105,111,110,115,84,114,97,110,115,108,97,116,105,111,110,116,104,101,32,102,
+111,114,109,32,111,102,97,116,109,111,115,112,104,101,114,105,99,105,100,101,111
+,108,111,103,105,99,97,108,101,110,116,101,114,112,114,105,115,101,115,99,97,108
+,99,117,108,97,116,105,110,103,101,97,115,116,32,111,102,32,116,104,101,114,101,
+109,110,97,110,116,115,32,111,102,112,108,117,103,105,110,115,112,97,103,101,47,
+105,110,100,101,120,46,112,104,112,63,114,101,109,97,105,110,101,100,32,105,110,
+116,114,97,110,115,102,111,114,109,101,100,72,101,32,119,97,115,32,97,108,115,
+111,119,97,115,32,97,108,114,101,97,100,121,115,116,97,116,105,115,116,105,99,97
+,108,105,110,32,102,97,118,111,114,32,111,102,77,105,110,105,115,116,114,121,32,
+111,102,109,111,118,101,109,101,110,116,32,111,102,102,111,114,109,117,108,97,
+116,105,111,110,105,115,32,114,101,113,117,105,114,101,100,60,108,105,110,107,32
+,114,101,108,61,34,84,104,105,115,32,105,115,32,116,104,101,32,60,97,32,104,114,
+101,102,61,34,47,112,111,112,117,108,97,114,105,122,101,100,105,110,118,111,108,
+118,101,100,32,105,110,97,114,101,32,117,115,101,100,32,116,111,97,110,100,32,
+115,101,118,101,114,97,108,109,97,100,101,32,98,121,32,116,104,101,115,101,101,
+109,115,32,116,111,32,98,101,108,105,107,101,108,121,32,116,104,97,116,80,97,108
+,101,115,116,105,110,105,97,110,110,97,109,101,100,32,97,102,116,101,114,105,116
+,32,104,97,100,32,98,101,101,110,109,111,115,116,32,99,111,109,109,111,110,116,
+111,32,114,101,102,101,114,32,116,111,98,117,116,32,116,104,105,115,32,105,115,
+99,111,110,115,101,99,117,116,105,118,101,116,101,109,112,111,114,97,114,105,108
+,121,73,110,32,103,101,110,101,114,97,108,44,99,111,110,118,101,110,116,105,111,
+110,115,116,97,107,101,115,32,112,108,97,99,101,115,117,98,100,105,118,105,115,
+105,111,110,116,101,114,114,105,116,111,114,105,97,108,111,112,101,114,97,116,
+105,111,110,97,108,112,101,114,109,97,110,101,110,116,108,121,119,97,115,32,108,
+97,114,103,101,108,121,111,117,116,98,114,101,97,107,32,111,102,105,110,32,116,
+104,101,32,112,97,115,116,102,111,108,108,111,119,105,110,103,32,97,32,120,109,
+108,110,115,58,111,103,61,34,62,60,97,32,99,108,97,115,115,61,34,99,108,97,115,
+115,61,34,116,101,120,116,67,111,110,118,101,114,115,105,111,110,32,109,97,121,
+32,98,101,32,117,115,101,100,109,97,110,117,102,97,99,116,117,114,101,97,102,116
+,101,114,32,98,101,105,110,103,99,108,101,97,114,102,105,120,34,62,10,113,117,
+101,115,116,105,111,110,32,111,102,119,97,115,32,101,108,101,99,116,101,100,116,
+111,32,98,101,99,111,109,101,32,97,98,101,99,97,117,115,101,32,111,102,32,115,
+111,109,101,32,112,101,111,112,108,101,105,110,115,112,105,114,101,100,32,98,121
+,115,117,99,99,101,115,115,102,117,108,32,97,32,116,105,109,101,32,119,104,101,
+110,109,111,114,101,32,99,111,109,109,111,110,97,109,111,110,103,115,116,32,116,
+104,101,97,110,32,111,102,102,105,99,105,97,108,119,105,100,116,104,58,49,48,48,
+37,59,116,101,99,104,110,111,108,111,103,121,44,119,97,115,32,97,100,111,112,116
+,101,100,116,111,32,107,101,101,112,32,116,104,101,115,101,116,116,108,101,109,
+101,110,116,115,108,105,118,101,32,98,105,114,116,104,115,105,110,100,101,120,46
+,104,116,109,108,34,67,111,110,110,101,99,116,105,99,117,116,97,115,115,105,103,
+110,101,100,32,116,111,38,97,109,112,59,116,105,109,101,115,59,97,99,99,111,117,
+110,116,32,102,111,114,97,108,105,103,110,61,114,105,103,104,116,116,104,101,32,
+99,111,109,112,97,110,121,97,108,119,97,121,115,32,98,101,101,110,114,101,116,
+117,114,110,101,100,32,116,111,105,110,118,111,108,118,101,109,101,110,116,66,
+101,99,97,117,115,101,32,116,104,101,116,104,105,115,32,112,101,114,105,111,100,
+34,32,110,97,109,101,61,34,113,34,32,99,111,110,102,105,110,101,100,32,116,111,
+97,32,114,101,115,117,108,116,32,111,102,118,97,108,117,101,61,34,34,32,47,62,
+105,115,32,97,99,116,117,97,108,108,121,69,110,118,105,114,111,110,109,101,110,
+116,13,10,60,47,104,101,97,100,62,13,10,67,111,110,118,101,114,115,101,108,121,
+44,62,10,60,100,105,118,32,105,100,61,34,48,34,32,119,105,100,116,104,61,34,49,
+105,115,32,112,114,111,98,97,98,108,121,104,97,118,101,32,98,101,99,111,109,101,
+99,111,110,116,114,111,108,108,105,110,103,116,104,101,32,112,114,111,98,108,101
+,109,99,105,116,105,122,101,110,115,32,111,102,112,111,108,105,116,105,99,105,97
+,110,115,114,101,97,99,104,101,100,32,116,104,101,97,115,32,101,97,114,108,121,
+32,97,115,58,110,111,110,101,59,32,111,118,101,114,60,116,97,98,108,101,32,99,
+101,108,108,118,97,108,105,100,105,116,121,32,111,102,100,105,114,101,99,116,108
+,121,32,116,111,111,110,109,111,117,115,101,100,111,119,110,119,104,101,114,101,
+32,105,116,32,105,115,119,104,101,110,32,105,116,32,119,97,115,109,101,109,98,
+101,114,115,32,111,102,32,114,101,108,97,116,105,111,110,32,116,111,97,99,99,111
+,109,109,111,100,97,116,101,97,108,111,110,103,32,119,105,116,104,32,73,110,32,
+116,104,101,32,108,97,116,101,116,104,101,32,69,110,103,108,105,115,104,100,101,
+108,105,99,105,111,117,115,34,62,116,104,105,115,32,105,115,32,110,111,116,116,
+104,101,32,112,114,101,115,101,110,116,105,102,32,116,104,101,121,32,97,114,101,
+97,110,100,32,102,105,110,97,108,108,121,97,32,109,97,116,116,101,114,32,111,102
+,13,10,9,60,47,100,105,118,62,13,10,13,10,60,47,115,99,114,105,112,116,62,102,97
+,115,116,101,114,32,116,104,97,110,109,97,106,111,114,105,116,121,32,111,102,97,
+102,116,101,114,32,119,104,105,99,104,99,111,109,112,97,114,97,116,105,118,101,
+116,111,32,109,97,105,110,116,97,105,110,105,109,112,114,111,118,101,32,116,104,
+101,97,119,97,114,100,101,100,32,116,104,101,101,114,34,32,99,108,97,115,115,61,
+34,102,114,97,109,101,98,111,114,100,101,114,114,101,115,116,111,114,97,116,105,
+111,110,105,110,32,116,104,101,32,115,97,109,101,97,110,97,108,121,115,105,115,
+32,111,102,116,104,101,105,114,32,102,105,114,115,116,68,117,114,105,110,103,32,
+116,104,101,32,99,111,110,116,105,110,101,110,116,97,108,115,101,113,117,101,110
+,99,101,32,111,102,102,117,110,99,116,105,111,110,40,41,123,102,111,110,116,45,
+115,105,122,101,58,32,119,111,114,107,32,111,110,32,116,104,101,60,47,115,99,114
+,105,112,116,62,10,60,98,101,103,105,110,115,32,119,105,116,104,106,97,118,97,
+115,99,114,105,112,116,58,99,111,110,115,116,105,116,117,101,110,116,119,97,115,
+32,102,111,117,110,100,101,100,101,113,117,105,108,105,98,114,105,117,109,97,115
+,115,117,109,101,32,116,104,97,116,105,115,32,103,105,118,101,110,32,98,121,110,
+101,101,100,115,32,116,111,32,98,101,99,111,111,114,100,105,110,97,116,101,115,
+116,104,101,32,118,97,114,105,111,117,115,97,114,101,32,112,97,114,116,32,111,
+102,111,110,108,121,32,105,110,32,116,104,101,115,101,99,116,105,111,110,115,32,
+111,102,105,115,32,97,32,99,111,109,109,111,110,116,104,101,111,114,105,101,115,
+32,111,102,100,105,115,99,111,118,101,114,105,101,115,97,115,115,111,99,105,97,
+116,105,111,110,101,100,103,101,32,111,102,32,116,104,101,115,116,114,101,110,
+103,116,104,32,111,102,112,111,115,105,116,105,111,110,32,105,110,112,114,101,
+115,101,110,116,45,100,97,121,117,110,105,118,101,114,115,97,108,108,121,116,111
+,32,102,111,114,109,32,116,104,101,98,117,116,32,105,110,115,116,101,97,100,99,
+111,114,112,111,114,97,116,105,111,110,97,116,116,97,99,104,101,100,32,116,111,
+105,115,32,99,111,109,109,111,110,108,121,114,101,97,115,111,110,115,32,102,111,
+114,32,38,113,117,111,116,59,116,104,101,32,99,97,110,32,98,101,32,109,97,100,
+101,119,97,115,32,97,98,108,101,32,116,111,119,104,105,99,104,32,109,101,97,110,
+115,98,117,116,32,100,105,100,32,110,111,116,111,110,77,111,117,115,101,79,118,
+101,114,97,115,32,112,111,115,115,105,98,108,101,111,112,101,114,97,116,101,100,
+32,98,121,99,111,109,105,110,103,32,102,114,111,109,116,104,101,32,112,114,105,
+109,97,114,121,97,100,100,105,116,105,111,110,32,111,102,102,111,114,32,115,101,
+118,101,114,97,108,116,114,97,110,115,102,101,114,114,101,100,97,32,112,101,114,
+105,111,100,32,111,102,97,114,101,32,97,98,108,101,32,116,111,104,111,119,101,
+118,101,114,44,32,105,116,115,104,111,117,108,100,32,104,97,118,101,109,117,99,
+104,32,108,97,114,103,101,114,10,9,60,47,115,99,114,105,112,116,62,97,100,111,
+112,116,101,100,32,116,104,101,112,114,111,112,101,114,116,121,32,111,102,100,
+105,114,101,99,116,101,100,32,98,121,101,102,102,101,99,116,105,118,101,108,121,
+119,97,115,32,98,114,111,117,103,104,116,99,104,105,108,100,114,101,110,32,111,
+102,80,114,111,103,114,97,109,109,105,110,103,108,111,110,103,101,114,32,116,104
+,97,110,109,97,110,117,115,99,114,105,112,116,115,119,97,114,32,97,103,97,105,
+110,115,116,98,121,32,109,101,97,110,115,32,111,102,97,110,100,32,109,111,115,
+116,32,111,102,115,105,109,105,108,97,114,32,116,111,32,112,114,111,112,114,105,
+101,116,97,114,121,111,114,105,103,105,110,97,116,105,110,103,112,114,101,115,
+116,105,103,105,111,117,115,103,114,97,109,109,97,116,105,99,97,108,101,120,112,
+101,114,105,101,110,99,101,46,116,111,32,109,97,107,101,32,116,104,101,73,116,32
+,119,97,115,32,97,108,115,111,105,115,32,102,111,117,110,100,32,105,110,99,111,
+109,112,101,116,105,116,111,114,115,105,110,32,116,104,101,32,85,46,83,46,114,
+101,112,108,97,99,101,32,116,104,101,98,114,111,117,103,104,116,32,116,104,101,
+99,97,108,99,117,108,97,116,105,111,110,102,97,108,108,32,111,102,32,116,104,101
+,116,104,101,32,103,101,110,101,114,97,108,112,114,97,99,116,105,99,97,108,108,
+121,105,110,32,104,111,110,111,114,32,111,102,114,101,108,101,97,115,101,100,32,
+105,110,114,101,115,105,100,101,110,116,105,97,108,97,110,100,32,115,111,109,101
+,32,111,102,107,105,110,103,32,111,102,32,116,104,101,114,101,97,99,116,105,111,
+110,32,116,111,49,115,116,32,69,97,114,108,32,111,102,99,117,108,116,117,114,101
+,32,97,110,100,112,114,105,110,99,105,112,97,108,108,121,60,47,116,105,116,108,
+101,62,10,32,32,116,104,101,121,32,99,97,110,32,98,101,98,97,99,107,32,116,111,
+32,116,104,101,115,111,109,101,32,111,102,32,104,105,115,101,120,112,111,115,117
+,114,101,32,116,111,97,114,101,32,115,105,109,105,108,97,114,102,111,114,109,32,
+111,102,32,116,104,101,97,100,100,70,97,118,111,114,105,116,101,99,105,116,105,
+122,101,110,115,104,105,112,112,97,114,116,32,105,110,32,116,104,101,112,101,111
+,112,108,101,32,119,105,116,104,105,110,32,112,114,97,99,116,105,99,101,116,111,
+32,99,111,110,116,105,110,117,101,38,97,109,112,59,109,105,110,117,115,59,97,112
+,112,114,111,118,101,100,32,98,121,32,116,104,101,32,102,105,114,115,116,32,97,
+108,108,111,119,101,100,32,116,104,101,97,110,100,32,102,111,114,32,116,104,101,
+102,117,110,99,116,105,111,110,105,110,103,112,108,97,121,105,110,103,32,116,104
+,101,115,111,108,117,116,105,111,110,32,116,111,104,101,105,103,104,116,61,34,48
+,34,32,105,110,32,104,105,115,32,98,111,111,107,109,111,114,101,32,116,104,97,
+110,32,97,102,111,108,108,111,119,115,32,116,104,101,99,114,101,97,116,101,100,
+32,116,104,101,112,114,101,115,101,110,99,101,32,105,110,38,110,98,115,112,59,60
+,47,116,100,62,110,97,116,105,111,110,97,108,105,115,116,116,104,101,32,105,100,
+101,97,32,111,102,97,32,99,104,97,114,97,99,116,101,114,119,101,114,101,32,102,
+111,114,99,101,100,32,99,108,97,115,115,61,34,98,116,110,100,97,121,115,32,111,
+102,32,116,104,101,102,101,97,116,117,114,101,100,32,105,110,115,104,111,119,105
+,110,103,32,116,104,101,105,110,116,101,114,101,115,116,32,105,110,105,110,32,
+112,108,97,99,101,32,111,102,116,117,114,110,32,111,102,32,116,104,101,116,104,
+101,32,104,101,97,100,32,111,102,76,111,114,100,32,111,102,32,116,104,101,112,
+111,108,105,116,105,99,97,108,108,121,104,97,115,32,105,116,115,32,111,119,110,
+69,100,117,99,97,116,105,111,110,97,108,97,112,112,114,111,118,97,108,32,111,102
+,115,111,109,101,32,111,102,32,116,104,101,101,97,99,104,32,111,116,104,101,114,
+44,98,101,104,97,118,105,111,114,32,111,102,97,110,100,32,98,101,99,97,117,115,
+101,97,110,100,32,97,110,111,116,104,101,114,97,112,112,101,97,114,101,100,32,
+111,110,114,101,99,111,114,100,101,100,32,105,110,98,108,97,99,107,38,113,117,
+111,116,59,109,97,121,32,105,110,99,108,117,100,101,116,104,101,32,119,111,114,
+108,100,39,115,99,97,110,32,108,101,97,100,32,116,111,114,101,102,101,114,115,32
+,116,111,32,97,98,111,114,100,101,114,61,34,48,34,32,103,111,118,101,114,110,109
+,101,110,116,32,119,105,110,110,105,110,103,32,116,104,101,114,101,115,117,108,
+116,101,100,32,105,110,32,119,104,105,108,101,32,116,104,101,32,87,97,115,104,
+105,110,103,116,111,110,44,116,104,101,32,115,117,98,106,101,99,116,99,105,116,
+121,32,105,110,32,116,104,101,62,60,47,100,105,118,62,13,10,9,9,114,101,102,108,
+101,99,116,32,116,104,101,116,111,32,99,111,109,112,108,101,116,101,98,101,99,97
+,109,101,32,109,111,114,101,114,97,100,105,111,97,99,116,105,118,101,114,101,106
+,101,99,116,101,100,32,98,121,119,105,116,104,111,117,116,32,97,110,121,104,105,
+115,32,102,97,116,104,101,114,44,119,104,105,99,104,32,99,111,117,108,100,99,111
+,112,121,32,111,102,32,116,104,101,116,111,32,105,110,100,105,99,97,116,101,97,
+32,112,111,108,105,116,105,99,97,108,97,99,99,111,117,110,116,115,32,111,102,99,
+111,110,115,116,105,116,117,116,101,115,119,111,114,107,101,100,32,119,105,116,
+104,101,114,60,47,97,62,60,47,108,105,62,111,102,32,104,105,115,32,108,105,102,
+101,97,99,99,111,109,112,97,110,105,101,100,99,108,105,101,110,116,87,105,100,
+116,104,112,114,101,118,101,110,116,32,116,104,101,76,101,103,105,115,108,97,116
+,105,118,101,100,105,102,102,101,114,101,110,116,108,121,116,111,103,101,116,104
+,101,114,32,105,110,104,97,115,32,115,101,118,101,114,97,108,102,111,114,32,97,
+110,111,116,104,101,114,116,101,120,116,32,111,102,32,116,104,101,102,111,117,
+110,100,101,100,32,116,104,101,101,32,119,105,116,104,32,116,104,101,32,105,115,
+32,117,115,101,100,32,102,111,114,99,104,97,110,103,101,100,32,116,104,101,117,
+115,117,97,108,108,121,32,116,104,101,112,108,97,99,101,32,119,104,101,114,101,
+119,104,101,114,101,97,115,32,116,104,101,62,32,60,97,32,104,114,101,102,61,34,
+34,62,60,97,32,104,114,101,102,61,34,116,104,101,109,115,101,108,118,101,115,44,
+97,108,116,104,111,117,103,104,32,104,101,116,104,97,116,32,99,97,110,32,98,101,
+116,114,97,100,105,116,105,111,110,97,108,114,111,108,101,32,111,102,32,116,104,
+101,97,115,32,97,32,114,101,115,117,108,116,114,101,109,111,118,101,67,104,105,
+108,100,100,101,115,105,103,110,101,100,32,98,121,119,101,115,116,32,111,102,32,
+116,104,101,83,111,109,101,32,112,101,111,112,108,101,112,114,111,100,117,99,116
+,105,111,110,44,115,105,100,101,32,111,102,32,116,104,101,110,101,119,115,108,
+101,116,116,101,114,115,117,115,101,100,32,98,121,32,116,104,101,100,111,119,110
+,32,116,111,32,116,104,101,97,99,99,101,112,116,101,100,32,98,121,108,105,118,
+101,32,105,110,32,116,104,101,97,116,116,101,109,112,116,115,32,116,111,111,117,
+116,115,105,100,101,32,116,104,101,102,114,101,113,117,101,110,99,105,101,115,72
+,111,119,101,118,101,114,44,32,105,110,112,114,111,103,114,97,109,109,101,114,
+115,97,116,32,108,101,97,115,116,32,105,110,97,112,112,114,111,120,105,109,97,
+116,101,97,108,116,104,111,117,103,104,32,105,116,119,97,115,32,112,97,114,116,
+32,111,102,97,110,100,32,118,97,114,105,111,117,115,71,111,118,101,114,110,111,
+114,32,111,102,116,104,101,32,97,114,116,105,99,108,101,116,117,114,110,101,100,
+32,105,110,116,111,62,60,97,32,104,114,101,102,61,34,47,116,104,101,32,101,99,
+111,110,111,109,121,105,115,32,116,104,101,32,109,111,115,116,109,111,115,116,32
+,119,105,100,101,108,121,119,111,117,108,100,32,108,97,116,101,114,97,110,100,32
+,112,101,114,104,97,112,115,114,105,115,101,32,116,111,32,116,104,101,111,99,99,
+117,114,115,32,119,104,101,110,117,110,100,101,114,32,119,104,105,99,104,99,111,
+110,100,105,116,105,111,110,115,46,116,104,101,32,119,101,115,116,101,114,110,
+116,104,101,111,114,121,32,116,104,97,116,105,115,32,112,114,111,100,117,99,101,
+100,116,104,101,32,99,105,116,121,32,111,102,105,110,32,119,104,105,99,104,32,
+104,101,115,101,101,110,32,105,110,32,116,104,101,116,104,101,32,99,101,110,116,
+114,97,108,98,117,105,108,100,105,110,103,32,111,102,109,97,110,121,32,111,102,
+32,104,105,115,97,114,101,97,32,111,102,32,116,104,101,105,115,32,116,104,101,32
+,111,110,108,121,109,111,115,116,32,111,102,32,116,104,101,109,97,110,121,32,111
+,102,32,116,104,101,116,104,101,32,87,101,115,116,101,114,110,84,104,101,114,101
+,32,105,115,32,110,111,101,120,116,101,110,100,101,100,32,116,111,83,116,97,116,
+105,115,116,105,99,97,108,99,111,108,115,112,97,110,61,50,32,124,115,104,111,114
+,116,32,115,116,111,114,121,112,111,115,115,105,98,108,101,32,116,111,116,111,
+112,111,108,111,103,105,99,97,108,99,114,105,116,105,99,97,108,32,111,102,114,
+101,112,111,114,116,101,100,32,116,111,97,32,67,104,114,105,115,116,105,97,110,
+100,101,99,105,115,105,111,110,32,116,111,105,115,32,101,113,117,97,108,32,116,
+111,112,114,111,98,108,101,109,115,32,111,102,84,104,105,115,32,99,97,110,32,98,
+101,109,101,114,99,104,97,110,100,105,115,101,102,111,114,32,109,111,115,116,32,
+111,102,110,111,32,101,118,105,100,101,110,99,101,101,100,105,116,105,111,110,
+115,32,111,102,101,108,101,109,101,110,116,115,32,105,110,38,113,117,111,116,59,
+46,32,84,104,101,99,111,109,47,105,109,97,103,101,115,47,119,104,105,99,104,32,
+109,97,107,101,115,116,104,101,32,112,114,111,99,101,115,115,114,101,109,97,105,
+110,115,32,116,104,101,108,105,116,101,114,97,116,117,114,101,44,105,115,32,97,
+32,109,101,109,98,101,114,116,104,101,32,112,111,112,117,108,97,114,116,104,101,
+32,97,110,99,105,101,110,116,112,114,111,98,108,101,109,115,32,105,110,116,105,
+109,101,32,111,102,32,116,104,101,100,101,102,101,97,116,101,100,32,98,121,98,
+111,100,121,32,111,102,32,116,104,101,97,32,102,101,119,32,121,101,97,114,115,
+109,117,99,104,32,111,102,32,116,104,101,116,104,101,32,119,111,114,107,32,111,
+102,67,97,108,105,102,111,114,110,105,97,44,115,101,114,118,101,100,32,97,115,32
+,97,103,111,118,101,114,110,109,101,110,116,46,99,111,110,99,101,112,116,115,32,
+111,102,109,111,118,101,109,101,110,116,32,105,110,9,9,60,100,105,118,32,105,100
+,61,34,105,116,34,32,118,97,108,117,101,61,34,108,97,110,103,117,97,103,101,32,
+111,102,97,115,32,116,104,101,121,32,97,114,101,112,114,111,100,117,99,101,100,
+32,105,110,105,115,32,116,104,97,116,32,116,104,101,101,120,112,108,97,105,110,
+32,116,104,101,100,105,118,62,60,47,100,105,118,62,10,72,111,119,101,118,101,114
+,32,116,104,101,108,101,97,100,32,116,111,32,116,104,101,9,60,97,32,104,114,101,
+102,61,34,47,119,97,115,32,103,114,97,110,116,101,100,112,101,111,112,108,101,32
+,104,97,118,101,99,111,110,116,105,110,117,97,108,108,121,119,97,115,32,115,101,
+101,110,32,97,115,97,110,100,32,114,101,108,97,116,101,100,116,104,101,32,114,
+111,108,101,32,111,102,112,114,111,112,111,115,101,100,32,98,121,111,102,32,116,
+104,101,32,98,101,115,116,101,97,99,104,32,111,116,104,101,114,46,67,111,110,115
+,116,97,110,116,105,110,101,112,101,111,112,108,101,32,102,114,111,109,100,105,
+97,108,101,99,116,115,32,111,102,116,111,32,114,101,118,105,115,105,111,110,119,
+97,115,32,114,101,110,97,109,101,100,97,32,115,111,117,114,99,101,32,111,102,116
+,104,101,32,105,110,105,116,105,97,108,108,97,117,110,99,104,101,100,32,105,110,
+112,114,111,118,105,100,101,32,116,104,101,116,111,32,116,104,101,32,119,101,115
+,116,119,104,101,114,101,32,116,104,101,114,101,97,110,100,32,115,105,109,105,
+108,97,114,98,101,116,119,101,101,110,32,116,119,111,105,115,32,97,108,115,111,
+32,116,104,101,69,110,103,108,105,115,104,32,97,110,100,99,111,110,100,105,116,
+105,111,110,115,44,116,104,97,116,32,105,116,32,119,97,115,101,110,116,105,116,
+108,101,100,32,116,111,116,104,101,109,115,101,108,118,101,115,46,113,117,97,110
+,116,105,116,121,32,111,102,114,97,110,115,112,97,114,101,110,99,121,116,104,101
+,32,115,97,109,101,32,97,115,116,111,32,106,111,105,110,32,116,104,101,99,111,
+117,110,116,114,121,32,97,110,100,116,104,105,115,32,105,115,32,116,104,101,84,
+104,105,115,32,108,101,100,32,116,111,97,32,115,116,97,116,101,109,101,110,116,
+99,111,110,116,114,97,115,116,32,116,111,108,97,115,116,73,110,100,101,120,79,
+102,116,104,114,111,117,103,104,32,104,105,115,105,115,32,100,101,115,105,103,
+110,101,100,116,104,101,32,116,101,114,109,32,105,115,105,115,32,112,114,111,118
+,105,100,101,100,112,114,111,116,101,99,116,32,116,104,101,110,103,60,47,97,62,
+60,47,108,105,62,84,104,101,32,99,117,114,114,101,110,116,116,104,101,32,115,105
+,116,101,32,111,102,115,117,98,115,116,97,110,116,105,97,108,101,120,112,101,114
+,105,101,110,99,101,44,105,110,32,116,104,101,32,87,101,115,116,116,104,101,121,
+32,115,104,111,117,108,100,115,108,111,118,101,110,196,141,105,110,97,99,111,109
+,101,110,116,97,114,105,111,115,117,110,105,118,101,114,115,105,100,97,100,99,
+111,110,100,105,99,105,111,110,101,115,97,99,116,105,118,105,100,97,100,101,115,
+101,120,112,101,114,105,101,110,99,105,97,116,101,99,110,111,108,111,103,195,173
+,97,112,114,111,100,117,99,99,105,195,179,110,112,117,110,116,117,97,99,105,195,
+179,110,97,112,108,105,99,97,99,105,195,179,110,99,111,110,116,114,97,115,101,
+195,177,97,99,97,116,101,103,111,114,195,173,97,115,114,101,103,105,115,116,114,
+97,114,115,101,112,114,111,102,101,115,105,111,110,97,108,116,114,97,116,97,109,
+105,101,110,116,111,114,101,103,195,173,115,116,114,97,116,101,115,101,99,114,
+101,116,97,114,195,173,97,112,114,105,110,99,105,112,97,108,101,115,112,114,111,
+116,101,99,99,105,195,179,110,105,109,112,111,114,116,97,110,116,101,115,105,109
+,112,111,114,116,97,110,99,105,97,112,111,115,105,98,105,108,105,100,97,100,105,
+110,116,101,114,101,115,97,110,116,101,99,114,101,99,105,109,105,101,110,116,111
+,110,101,99,101,115,105,100,97,100,101,115,115,117,115,99,114,105,98,105,114,115
+,101,97,115,111,99,105,97,99,105,195,179,110,100,105,115,112,111,110,105,98,108,
+101,115,101,118,97,108,117,97,99,105,195,179,110,101,115,116,117,100,105,97,110,
+116,101,115,114,101,115,112,111,110,115,97,98,108,101,114,101,115,111,108,117,99
+,105,195,179,110,103,117,97,100,97,108,97,106,97,114,97,114,101,103,105,115,116,
+114,97,100,111,115,111,112,111,114,116,117,110,105,100,97,100,99,111,109,101,114
+,99,105,97,108,101,115,102,111,116,111,103,114,97,102,195,173,97,97,117,116,111,
+114,105,100,97,100,101,115,105,110,103,101,110,105,101,114,195,173,97,116,101,
+108,101,118,105,115,105,195,179,110,99,111,109,112,101,116,101,110,99,105,97,111
+,112,101,114,97,99,105,111,110,101,115,101,115,116,97,98,108,101,99,105,100,111,
+115,105,109,112,108,101,109,101,110,116,101,97,99,116,117,97,108,109,101,110,116
+,101,110,97,118,101,103,97,99,105,195,179,110,99,111,110,102,111,114,109,105,100
+,97,100,108,105,110,101,45,104,101,105,103,104,116,58,102,111,110,116,45,102,97,
+109,105,108,121,58,34,32,58,32,34,104,116,116,112,58,47,47,97,112,112,108,105,99
+,97,116,105,111,110,115,108,105,110,107,34,32,104,114,101,102,61,34,115,112,101,
+99,105,102,105,99,97,108,108,121,47,47,60,33,91,67,68,65,84,65,91,10,79,114,103,
+97,110,105,122,97,116,105,111,110,100,105,115,116,114,105,98,117,116,105,111,110
+,48,112,120,59,32,104,101,105,103,104,116,58,114,101,108,97,116,105,111,110,115,
+104,105,112,100,101,118,105,99,101,45,119,105,100,116,104,60,100,105,118,32,99,
+108,97,115,115,61,34,60,108,97,98,101,108,32,102,111,114,61,34,114,101,103,105,
+115,116,114,97,116,105,111,110,60,47,110,111,115,99,114,105,112,116,62,10,47,105
+,110,100,101,120,46,104,116,109,108,34,119,105,110,100,111,119,46,111,112,101,
+110,40,32,33,105,109,112,111,114,116,97,110,116,59,97,112,112,108,105,99,97,116,
+105,111,110,47,105,110,100,101,112,101,110,100,101,110,99,101,47,47,119,119,119,
+46,103,111,111,103,108,101,111,114,103,97,110,105,122,97,116,105,111,110,97,117,
+116,111,99,111,109,112,108,101,116,101,114,101,113,117,105,114,101,109,101,110,
+116,115,99,111,110,115,101,114,118,97,116,105,118,101,60,102,111,114,109,32,110,
+97,109,101,61,34,105,110,116,101,108,108,101,99,116,117,97,108,109,97,114,103,
+105,110,45,108,101,102,116,58,49,56,116,104,32,99,101,110,116,117,114,121,97,110
+,32,105,109,112,111,114,116,97,110,116,105,110,115,116,105,116,117,116,105,111,
+110,115,97,98,98,114,101,118,105,97,116,105,111,110,60,105,109,103,32,99,108,97,
+115,115,61,34,111,114,103,97,110,105,115,97,116,105,111,110,99,105,118,105,108,
+105,122,97,116,105,111,110,49,57,116,104,32,99,101,110,116,117,114,121,97,114,99
+,104,105,116,101,99,116,117,114,101,105,110,99,111,114,112,111,114,97,116,101,
+100,50,48,116,104,32,99,101,110,116,117,114,121,45,99,111,110,116,97,105,110,101
+,114,34,62,109,111,115,116,32,110,111,116,97,98,108,121,47,62,60,47,97,62,60,47,
+100,105,118,62,110,111,116,105,102,105,99,97,116,105,111,110,39,117,110,100,101,
+102,105,110,101,100,39,41,70,117,114,116,104,101,114,109,111,114,101,44,98,101,
+108,105,101,118,101,32,116,104,97,116,105,110,110,101,114,72,84,77,76,32,61,32,
+112,114,105,111,114,32,116,111,32,116,104,101,100,114,97,109,97,116,105,99,97,
+108,108,121,114,101,102,101,114,114,105,110,103,32,116,111,110,101,103,111,116,
+105,97,116,105,111,110,115,104,101,97,100,113,117,97,114,116,101,114,115,83,111,
+117,116,104,32,65,102,114,105,99,97,117,110,115,117,99,99,101,115,115,102,117,
+108,80,101,110,110,115,121,108,118,97,110,105,97,65,115,32,97,32,114,101,115,117
+,108,116,44,60,104,116,109,108,32,108,97,110,103,61,34,38,108,116,59,47,115,117,
+112,38,103,116,59,100,101,97,108,105,110,103,32,119,105,116,104,112,104,105,108,
+97,100,101,108,112,104,105,97,104,105,115,116,111,114,105,99,97,108,108,121,41,
+59,60,47,115,99,114,105,112,116,62,10,112,97,100,100,105,110,103,45,116,111,112,
+58,101,120,112,101,114,105,109,101,110,116,97,108,103,101,116,65,116,116,114,105
+,98,117,116,101,105,110,115,116,114,117,99,116,105,111,110,115,116,101,99,104,
+110,111,108,111,103,105,101,115,112,97,114,116,32,111,102,32,116,104,101,32,61,
+102,117,110,99,116,105,111,110,40,41,123,115,117,98,115,99,114,105,112,116,105,
+111,110,108,46,100,116,100,34,62,13,10,60,104,116,103,101,111,103,114,97,112,104
+,105,99,97,108,67,111,110,115,116,105,116,117,116,105,111,110,39,44,32,102,117,
+110,99,116,105,111,110,40,115,117,112,112,111,114,116,101,100,32,98,121,97,103,
+114,105,99,117,108,116,117,114,97,108,99,111,110,115,116,114,117,99,116,105,111,
+110,112,117,98,108,105,99,97,116,105,111,110,115,102,111,110,116,45,115,105,122,
+101,58,32,49,97,32,118,97,114,105,101,116,121,32,111,102,60,100,105,118,32,115,
+116,121,108,101,61,34,69,110,99,121,99,108,111,112,101,100,105,97,105,102,114,97
+,109,101,32,115,114,99,61,34,100,101,109,111,110,115,116,114,97,116,101,100,97,
+99,99,111,109,112,108,105,115,104,101,100,117,110,105,118,101,114,115,105,116,
+105,101,115,68,101,109,111,103,114,97,112,104,105,99,115,41,59,60,47,115,99,114,
+105,112,116,62,60,100,101,100,105,99,97,116,101,100,32,116,111,107,110,111,119,
+108,101,100,103,101,32,111,102,115,97,116,105,115,102,97,99,116,105,111,110,112,
+97,114,116,105,99,117,108,97,114,108,121,60,47,100,105,118,62,60,47,100,105,118,
+62,69,110,103,108,105,115,104,32,40,85,83,41,97,112,112,101,110,100,67,104,105,
+108,100,40,116,114,97,110,115,109,105,115,115,105,111,110,115,46,32,72,111,119,
+101,118,101,114,44,32,105,110,116,101,108,108,105,103,101,110,99,101,34,32,116,
+97,98,105,110,100,101,120,61,34,102,108,111,97,116,58,114,105,103,104,116,59,67,
+111,109,109,111,110,119,101,97,108,116,104,114,97,110,103,105,110,103,32,102,114
+,111,109,105,110,32,119,104,105,99,104,32,116,104,101,97,116,32,108,101,97,115,
+116,32,111,110,101,114,101,112,114,111,100,117,99,116,105,111,110,101,110,99,121
+,99,108,111,112,101,100,105,97,59,102,111,110,116,45,115,105,122,101,58,49,106,
+117,114,105,115,100,105,99,116,105,111,110,97,116,32,116,104,97,116,32,116,105,
+109,101,34,62,60,97,32,99,108,97,115,115,61,34,73,110,32,97,100,100,105,116,105,
+111,110,44,100,101,115,99,114,105,112,116,105,111,110,43,99,111,110,118,101,114,
+115,97,116,105,111,110,99,111,110,116,97,99,116,32,119,105,116,104,105,115,32,
+103,101,110,101,114,97,108,108,121,114,34,32,99,111,110,116,101,110,116,61,34,
+114,101,112,114,101,115,101,110,116,105,110,103,38,108,116,59,109,97,116,104,38,
+103,116,59,112,114,101,115,101,110,116,97,116,105,111,110,111,99,99,97,115,105,
+111,110,97,108,108,121,60,105,109,103,32,119,105,100,116,104,61,34,110,97,118,
+105,103,97,116,105,111,110,34,62,99,111,109,112,101,110,115,97,116,105,111,110,
+99,104,97,109,112,105,111,110,115,104,105,112,109,101,100,105,97,61,34,97,108,
+108,34,32,118,105,111,108,97,116,105,111,110,32,111,102,114,101,102,101,114,101,
+110,99,101,32,116,111,114,101,116,117,114,110,32,116,114,117,101,59,83,116,114,
+105,99,116,47,47,69,78,34,32,116,114,97,110,115,97,99,116,105,111,110,115,105,
+110,116,101,114,118,101,110,116,105,111,110,118,101,114,105,102,105,99,97,116,
+105,111,110,73,110,102,111,114,109,97,116,105,111,110,32,100,105,102,102,105,99,
+117,108,116,105,101,115,67,104,97,109,112,105,111,110,115,104,105,112,99,97,112,
+97,98,105,108,105,116,105,101,115,60,33,91,101,110,100,105,102,93,45,45,62,125,
+10,60,47,115,99,114,105,112,116,62,10,67,104,114,105,115,116,105,97,110,105,116,
+121,102,111,114,32,101,120,97,109,112,108,101,44,80,114,111,102,101,115,115,105,
+111,110,97,108,114,101,115,116,114,105,99,116,105,111,110,115,115,117,103,103,
+101,115,116,32,116,104,97,116,119,97,115,32,114,101,108,101,97,115,101,100,40,
+115,117,99,104,32,97,115,32,116,104,101,114,101,109,111,118,101,67,108,97,115,
+115,40,117,110,101,109,112,108,111,121,109,101,110,116,116,104,101,32,65,109,101
+,114,105,99,97,110,115,116,114,117,99,116,117,114,101,32,111,102,47,105,110,100,
+101,120,46,104,116,109,108,32,112,117,98,108,105,115,104,101,100,32,105,110,115,
+112,97,110,32,99,108,97,115,115,61,34,34,62,60,97,32,104,114,101,102,61,34,47,
+105,110,116,114,111,100,117,99,116,105,111,110,98,101,108,111,110,103,105,110,
+103,32,116,111,99,108,97,105,109,101,100,32,116,104,97,116,99,111,110,115,101,
+113,117,101,110,99,101,115,60,109,101,116,97,32,110,97,109,101,61,34,71,117,105,
+100,101,32,116,111,32,116,104,101,111,118,101,114,119,104,101,108,109,105,110,
+103,97,103,97,105,110,115,116,32,116,104,101,32,99,111,110,99,101,110,116,114,97
+,116,101,100,44,10,46,110,111,110,116,111,117,99,104,32,111,98,115,101,114,118,
+97,116,105,111,110,115,60,47,97,62,10,60,47,100,105,118,62,10,102,32,40,100,111,
+99,117,109,101,110,116,46,98,111,114,100,101,114,58,32,49,112,120,32,123,102,111
+,110,116,45,115,105,122,101,58,49,116,114,101,97,116,109,101,110,116,32,111,102,
+48,34,32,104,101,105,103,104,116,61,34,49,109,111,100,105,102,105,99,97,116,105,
+111,110,73,110,100,101,112,101,110,100,101,110,99,101,100,105,118,105,100,101,
+100,32,105,110,116,111,103,114,101,97,116,101,114,32,116,104,97,110,97,99,104,
+105,101,118,101,109,101,110,116,115,101,115,116,97,98,108,105,115,104,105,110,
+103,74,97,118,97,83,99,114,105,112,116,34,32,110,101,118,101,114,116,104,101,108
+,101,115,115,115,105,103,110,105,102,105,99,97,110,99,101,66,114,111,97,100,99,
+97,115,116,105,110,103,62,38,110,98,115,112,59,60,47,116,100,62,99,111,110,116,
+97,105,110,101,114,34,62,10,115,117,99,104,32,97,115,32,116,104,101,32,105,110,
+102,108,117,101,110,99,101,32,111,102,97,32,112,97,114,116,105,99,117,108,97,114
+,115,114,99,61,39,104,116,116,112,58,47,47,110,97,118,105,103,97,116,105,111,110
+,34,32,104,97,108,102,32,111,102,32,116,104,101,32,115,117,98,115,116,97,110,116
+,105,97,108,32,38,110,98,115,112,59,60,47,100,105,118,62,97,100,118,97,110,116,
+97,103,101,32,111,102,100,105,115,99,111,118,101,114,121,32,111,102,102,117,110,
+100,97,109,101,110,116,97,108,32,109,101,116,114,111,112,111,108,105,116,97,110,
+116,104,101,32,111,112,112,111,115,105,116,101,34,32,120,109,108,58,108,97,110,
+103,61,34,100,101,108,105,98,101,114,97,116,101,108,121,97,108,105,103,110,61,99
+,101,110,116,101,114,101,118,111,108,117,116,105,111,110,32,111,102,112,114,101,
+115,101,114,118,97,116,105,111,110,105,109,112,114,111,118,101,109,101,110,116,
+115,98,101,103,105,110,110,105,110,103,32,105,110,74,101,115,117,115,32,67,104,
+114,105,115,116,80,117,98,108,105,99,97,116,105,111,110,115,100,105,115,97,103,
+114,101,101,109,101,110,116,116,101,120,116,45,97,108,105,103,110,58,114,44,32,
+102,117,110,99,116,105,111,110,40,41,115,105,109,105,108,97,114,105,116,105,101,
+115,98,111,100,121,62,60,47,104,116,109,108,62,105,115,32,99,117,114,114,101,110
+,116,108,121,97,108,112,104,97,98,101,116,105,99,97,108,105,115,32,115,111,109,
+101,116,105,109,101,115,116,121,112,101,61,34,105,109,97,103,101,47,109,97,110,
+121,32,111,102,32,116,104,101,32,102,108,111,119,58,104,105,100,100,101,110,59,
+97,118,97,105,108,97,98,108,101,32,105,110,100,101,115,99,114,105,98,101,32,116,
+104,101,101,120,105,115,116,101,110,99,101,32,111,102,97,108,108,32,111,118,101,
+114,32,116,104,101,116,104,101,32,73,110,116,101,114,110,101,116,9,60,117,108,32
+,99,108,97,115,115,61,34,105,110,115,116,97,108,108,97,116,105,111,110,110,101,
+105,103,104,98,111,114,104,111,111,100,97,114,109,101,100,32,102,111,114,99,101,
+115,114,101,100,117,99,105,110,103,32,116,104,101,99,111,110,116,105,110,117,101
+,115,32,116,111,78,111,110,101,116,104,101,108,101,115,115,44,116,101,109,112,
+101,114,97,116,117,114,101,115,10,9,9,60,97,32,104,114,101,102,61,34,99,108,111,
+115,101,32,116,111,32,116,104,101,101,120,97,109,112,108,101,115,32,111,102,32,
+105,115,32,97,98,111,117,116,32,116,104,101,40,115,101,101,32,98,101,108,111,119
+,41,46,34,32,105,100,61,34,115,101,97,114,99,104,112,114,111,102,101,115,115,105
+,111,110,97,108,105,115,32,97,118,97,105,108,97,98,108,101,116,104,101,32,111,
+102,102,105,99,105,97,108,9,9,60,47,115,99,114,105,112,116,62,10,10,9,9,60,100,
+105,118,32,105,100,61,34,97,99,99,101,108,101,114,97,116,105,111,110,116,104,114
+,111,117,103,104,32,116,104,101,32,72,97,108,108,32,111,102,32,70,97,109,101,100
+,101,115,99,114,105,112,116,105,111,110,115,116,114,97,110,115,108,97,116,105,
+111,110,115,105,110,116,101,114,102,101,114,101,110,99,101,32,116,121,112,101,61
+,39,116,101,120,116,47,114,101,99,101,110,116,32,121,101,97,114,115,105,110,32,
+116,104,101,32,119,111,114,108,100,118,101,114,121,32,112,111,112,117,108,97,114
+,123,98,97,99,107,103,114,111,117,110,100,58,116,114,97,100,105,116,105,111,110,
+97,108,32,115,111,109,101,32,111,102,32,116,104,101,32,99,111,110,110,101,99,116
+,101,100,32,116,111,101,120,112,108,111,105,116,97,116,105,111,110,101,109,101,
+114,103,101,110,99,101,32,111,102,99,111,110,115,116,105,116,117,116,105,111,110
+,65,32,72,105,115,116,111,114,121,32,111,102,115,105,103,110,105,102,105,99,97,
+110,116,32,109,97,110,117,102,97,99,116,117,114,101,100,101,120,112,101,99,116,
+97,116,105,111,110,115,62,60,110,111,115,99,114,105,112,116,62,60,99,97,110,32,
+98,101,32,102,111,117,110,100,98,101,99,97,117,115,101,32,116,104,101,32,104,97,
+115,32,110,111,116,32,98,101,101,110,110,101,105,103,104,98,111,117,114,105,110,
+103,119,105,116,104,111,117,116,32,116,104,101,32,97,100,100,101,100,32,116,111,
+32,116,104,101,9,60,108,105,32,99,108,97,115,115,61,34,105,110,115,116,114,117,
+109,101,110,116,97,108,83,111,118,105,101,116,32,85,110,105,111,110,97,99,107,
+110,111,119,108,101,100,103,101,100,119,104,105,99,104,32,99,97,110,32,98,101,
+110,97,109,101,32,102,111,114,32,116,104,101,97,116,116,101,110,116,105,111,110,
+32,116,111,97,116,116,101,109,112,116,115,32,116,111,32,100,101,118,101,108,111,
+112,109,101,110,116,115,73,110,32,102,97,99,116,44,32,116,104,101,60,108,105,32,
+99,108,97,115,115,61,34,97,105,109,112,108,105,99,97,116,105,111,110,115,115,117
+,105,116,97,98,108,101,32,102,111,114,109,117,99,104,32,111,102,32,116,104,101,
+32,99,111,108,111,110,105,122,97,116,105,111,110,112,114,101,115,105,100,101,110
+,116,105,97,108,99,97,110,99,101,108,66,117,98,98,108,101,32,73,110,102,111,114,
+109,97,116,105,111,110,109,111,115,116,32,111,102,32,116,104,101,32,105,115,32,
+100,101,115,99,114,105,98,101,100,114,101,115,116,32,111,102,32,116,104,101,32,
+109,111,114,101,32,111,114,32,108,101,115,115,105,110,32,83,101,112,116,101,109,
+98,101,114,73,110,116,101,108,108,105,103,101,110,99,101,115,114,99,61,34,104,
+116,116,112,58,47,47,112,120,59,32,104,101,105,103,104,116,58,32,97,118,97,105,
+108,97,98,108,101,32,116,111,109,97,110,117,102,97,99,116,117,114,101,114,104,
+117,109,97,110,32,114,105,103,104,116,115,108,105,110,107,32,104,114,101,102,61,
+34,47,97,118,97,105,108,97,98,105,108,105,116,121,112,114,111,112,111,114,116,
+105,111,110,97,108,111,117,116,115,105,100,101,32,116,104,101,32,97,115,116,114,
+111,110,111,109,105,99,97,108,104,117,109,97,110,32,98,101,105,110,103,115,110,
+97,109,101,32,111,102,32,116,104,101,32,97,114,101,32,102,111,117,110,100,32,105
+,110,97,114,101,32,98,97,115,101,100,32,111,110,115,109,97,108,108,101,114,32,
+116,104,97,110,97,32,112,101,114,115,111,110,32,119,104,111,101,120,112,97,110,
+115,105,111,110,32,111,102,97,114,103,117,105,110,103,32,116,104,97,116,110,111,
+119,32,107,110,111,119,110,32,97,115,73,110,32,116,104,101,32,101,97,114,108,121
+,105,110,116,101,114,109,101,100,105,97,116,101,100,101,114,105,118,101,100,32,
+102,114,111,109,83,99,97,110,100,105,110,97,118,105,97,110,60,47,97,62,60,47,100
+,105,118,62,13,10,99,111,110,115,105,100,101,114,32,116,104,101,97,110,32,101,
+115,116,105,109,97,116,101,100,116,104,101,32,78,97,116,105,111,110,97,108,60,
+100,105,118,32,105,100,61,34,112,97,103,114,101,115,117,108,116,105,110,103,32,
+105,110,99,111,109,109,105,115,115,105,111,110,101,100,97,110,97,108,111,103,111
+,117,115,32,116,111,97,114,101,32,114,101,113,117,105,114,101,100,47,117,108,62,
+10,60,47,100,105,118,62,10,119,97,115,32,98,97,115,101,100,32,111,110,97,110,100
+,32,98,101,99,97,109,101,32,97,38,110,98,115,112,59,38,110,98,115,112,59,116,34,
+32,118,97,108,117,101,61,34,34,32,119,97,115,32,99,97,112,116,117,114,101,100,
+110,111,32,109,111,114,101,32,116,104,97,110,114,101,115,112,101,99,116,105,118,
+101,108,121,99,111,110,116,105,110,117,101,32,116,111,32,62,13,10,60,104,101,97,
+100,62,13,10,60,119,101,114,101,32,99,114,101,97,116,101,100,109,111,114,101,32,
+103,101,110,101,114,97,108,105,110,102,111,114,109,97,116,105,111,110,32,117,115
+,101,100,32,102,111,114,32,116,104,101,105,110,100,101,112,101,110,100,101,110,
+116,32,116,104,101,32,73,109,112,101,114,105,97,108,99,111,109,112,111,110,101,
+110,116,32,111,102,116,111,32,116,104,101,32,110,111,114,116,104,105,110,99,108,
+117,100,101,32,116,104,101,32,67,111,110,115,116,114,117,99,116,105,111,110,115,
+105,100,101,32,111,102,32,116,104,101,32,119,111,117,108,100,32,110,111,116,32,
+98,101,102,111,114,32,105,110,115,116,97,110,99,101,105,110,118,101,110,116,105,
+111,110,32,111,102,109,111,114,101,32,99,111,109,112,108,101,120,99,111,108,108,
+101,99,116,105,118,101,108,121,98,97,99,107,103,114,111,117,110,100,58,32,116,
+101,120,116,45,97,108,105,103,110,58,32,105,116,115,32,111,114,105,103,105,110,
+97,108,105,110,116,111,32,97,99,99,111,117,110,116,116,104,105,115,32,112,114,
+111,99,101,115,115,97,110,32,101,120,116,101,110,115,105,118,101,104,111,119,101
+,118,101,114,44,32,116,104,101,116,104,101,121,32,97,114,101,32,110,111,116,114,
+101,106,101,99,116,101,100,32,116,104,101,99,114,105,116,105,99,105,115,109,32,
+111,102,100,117,114,105,110,103,32,119,104,105,99,104,112,114,111,98,97,98,108,
+121,32,116,104,101,116,104,105,115,32,97,114,116,105,99,108,101,40,102,117,110,
+99,116,105,111,110,40,41,123,73,116,32,115,104,111,117,108,100,32,98,101,97,110,
+32,97,103,114,101,101,109,101,110,116,97,99,99,105,100,101,110,116,97,108,108,
+121,100,105,102,102,101,114,115,32,102,114,111,109,65,114,99,104,105,116,101,99,
+116,117,114,101,98,101,116,116,101,114,32,107,110,111,119,110,97,114,114,97,110,
+103,101,109,101,110,116,115,105,110,102,108,117,101,110,99,101,32,111,110,97,116
+,116,101,110,100,101,100,32,116,104,101,105,100,101,110,116,105,99,97,108,32,116
+,111,115,111,117,116,104,32,111,102,32,116,104,101,112,97,115,115,32,116,104,114
+,111,117,103,104,120,109,108,34,32,116,105,116,108,101,61,34,119,101,105,103,104
+,116,58,98,111,108,100,59,99,114,101,97,116,105,110,103,32,116,104,101,100,105,
+115,112,108,97,121,58,110,111,110,101,114,101,112,108,97,99,101,100,32,116,104,
+101,60,105,109,103,32,115,114,99,61,34,47,105,104,116,116,112,115,58,47,47,119,
+119,119,46,87,111,114,108,100,32,87,97,114,32,73,73,116,101,115,116,105,109,111,
+110,105,97,108,115,102,111,117,110,100,32,105,110,32,116,104,101,114,101,113,117
+,105,114,101,100,32,116,111,32,97,110,100,32,116,104,97,116,32,116,104,101,98,
+101,116,119,101,101,110,32,116,104,101,32,119,97,115,32,100,101,115,105,103,110,
+101,100,99,111,110,115,105,115,116,115,32,111,102,32,99,111,110,115,105,100,101,
+114,97,98,108,121,112,117,98,108,105,115,104,101,100,32,98,121,116,104,101,32,
+108,97,110,103,117,97,103,101,67,111,110,115,101,114,118,97,116,105,111,110,99,
+111,110,115,105,115,116,101,100,32,111,102,114,101,102,101,114,32,116,111,32,116
+,104,101,98,97,99,107,32,116,111,32,116,104,101,32,99,115,115,34,32,109,101,100,
+105,97,61,34,80,101,111,112,108,101,32,102,114,111,109,32,97,118,97,105,108,97,
+98,108,101,32,111,110,112,114,111,118,101,100,32,116,111,32,98,101,115,117,103,
+103,101,115,116,105,111,110,115,34,119,97,115,32,107,110,111,119,110,32,97,115,
+118,97,114,105,101,116,105,101,115,32,111,102,108,105,107,101,108,121,32,116,111
+,32,98,101,99,111,109,112,114,105,115,101,100,32,111,102,115,117,112,112,111,114
+,116,32,116,104,101,32,104,97,110,100,115,32,111,102,32,116,104,101,99,111,117,
+112,108,101,100,32,119,105,116,104,99,111,110,110,101,99,116,32,97,110,100,32,98
+,111,114,100,101,114,58,110,111,110,101,59,112,101,114,102,111,114,109,97,110,99
+,101,115,98,101,102,111,114,101,32,98,101,105,110,103,108,97,116,101,114,32,98,
+101,99,97,109,101,99,97,108,99,117,108,97,116,105,111,110,115,111,102,116,101,
+110,32,99,97,108,108,101,100,114,101,115,105,100,101,110,116,115,32,111,102,109,
+101,97,110,105,110,103,32,116,104,97,116,62,60,108,105,32,99,108,97,115,115,61,
+34,101,118,105,100,101,110,99,101,32,102,111,114,101,120,112,108,97,110,97,116,
+105,111,110,115,101,110,118,105,114,111,110,109,101,110,116,115,34,62,60,47,97,
+62,60,47,100,105,118,62,119,104,105,99,104,32,97,108,108,111,119,115,73,110,116,
+114,111,100,117,99,116,105,111,110,100,101,118,101,108,111,112,101,100,32,98,121
+,97,32,119,105,100,101,32,114,97,110,103,101,111,110,32,98,101,104,97,108,102,32
+,111,102,118,97,108,105,103,110,61,34,116,111,112,34,112,114,105,110,99,105,112,
+108,101,32,111,102,97,116,32,116,104,101,32,116,105,109,101,44,60,47,110,111,115
+,99,114,105,112,116,62,13,115,97,105,100,32,116,111,32,104,97,118,101,105,110,32
+,116,104,101,32,102,105,114,115,116,119,104,105,108,101,32,111,116,104,101,114,
+115,104,121,112,111,116,104,101,116,105,99,97,108,112,104,105,108,111,115,111,
+112,104,101,114,115,112,111,119,101,114,32,111,102,32,116,104,101,99,111,110,116
+,97,105,110,101,100,32,105,110,112,101,114,102,111,114,109,101,100,32,98,121,105
+,110,97,98,105,108,105,116,121,32,116,111,119,101,114,101,32,119,114,105,116,116
+,101,110,115,112,97,110,32,115,116,121,108,101,61,34,105,110,112,117,116,32,110,
+97,109,101,61,34,116,104,101,32,113,117,101,115,116,105,111,110,105,110,116,101,
+110,100,101,100,32,102,111,114,114,101,106,101,99,116,105,111,110,32,111,102,105
+,109,112,108,105,101,115,32,116,104,97,116,105,110,118,101,110,116,101,100,32,
+116,104,101,116,104,101,32,115,116,97,110,100,97,114,100,119,97,115,32,112,114,
+111,98,97,98,108,121,108,105,110,107,32,98,101,116,119,101,101,110,112,114,111,
+102,101,115,115,111,114,32,111,102,105,110,116,101,114,97,99,116,105,111,110,115
+,99,104,97,110,103,105,110,103,32,116,104,101,73,110,100,105,97,110,32,79,99,101
+,97,110,32,99,108,97,115,115,61,34,108,97,115,116,119,111,114,107,105,110,103,32
+,119,105,116,104,39,104,116,116,112,58,47,47,119,119,119,46,121,101,97,114,115,
+32,98,101,102,111,114,101,84,104,105,115,32,119,97,115,32,116,104,101,114,101,99
+,114,101,97,116,105,111,110,97,108,101,110,116,101,114,105,110,103,32,116,104,
+101,109,101,97,115,117,114,101,109,101,110,116,115,97,110,32,101,120,116,114,101
+,109,101,108,121,118,97,108,117,101,32,111,102,32,116,104,101,115,116,97,114,116
+,32,111,102,32,116,104,101,10,60,47,115,99,114,105,112,116,62,10,10,97,110,32,
+101,102,102,111,114,116,32,116,111,105,110,99,114,101,97,115,101,32,116,104,101,
+116,111,32,116,104,101,32,115,111,117,116,104,115,112,97,99,105,110,103,61,34,48
+,34,62,115,117,102,102,105,99,105,101,110,116,108,121,116,104,101,32,69,117,114,
+111,112,101,97,110,99,111,110,118,101,114,116,101,100,32,116,111,99,108,101,97,
+114,84,105,109,101,111,117,116,100,105,100,32,110,111,116,32,104,97,118,101,99,
+111,110,115,101,113,117,101,110,116,108,121,102,111,114,32,116,104,101,32,110,
+101,120,116,101,120,116,101,110,115,105,111,110,32,111,102,101,99,111,110,111,
+109,105,99,32,97,110,100,97,108,116,104,111,117,103,104,32,116,104,101,97,114,
+101,32,112,114,111,100,117,99,101,100,97,110,100,32,119,105,116,104,32,116,104,
+101,105,110,115,117,102,102,105,99,105,101,110,116,103,105,118,101,110,32,98,121
+,32,116,104,101,115,116,97,116,105,110,103,32,116,104,97,116,101,120,112,101,110
+,100,105,116,117,114,101,115,60,47,115,112,97,110,62,60,47,97,62,10,116,104,111,
+117,103,104,116,32,116,104,97,116,111,110,32,116,104,101,32,98,97,115,105,115,99
+,101,108,108,112,97,100,100,105,110,103,61,105,109,97,103,101,32,111,102,32,116,
+104,101,114,101,116,117,114,110,105,110,103,32,116,111,105,110,102,111,114,109,
+97,116,105,111,110,44,115,101,112,97,114,97,116,101,100,32,98,121,97,115,115,97,
+115,115,105,110,97,116,101,100,115,34,32,99,111,110,116,101,110,116,61,34,97,117
+,116,104,111,114,105,116,121,32,111,102,110,111,114,116,104,119,101,115,116,101,
+114,110,60,47,100,105,118,62,10,60,100,105,118,32,34,62,60,47,100,105,118,62,13,
+10,32,32,99,111,110,115,117,108,116,97,116,105,111,110,99,111,109,109,117,110,
+105,116,121,32,111,102,116,104,101,32,110,97,116,105,111,110,97,108,105,116,32,
+115,104,111,117,108,100,32,98,101,112,97,114,116,105,99,105,112,97,110,116,115,
+32,97,108,105,103,110,61,34,108,101,102,116,116,104,101,32,103,114,101,97,116,
+101,115,116,115,101,108,101,99,116,105,111,110,32,111,102,115,117,112,101,114,
+110,97,116,117,114,97,108,100,101,112,101,110,100,101,110,116,32,111,110,105,115
+,32,109,101,110,116,105,111,110,101,100,97,108,108,111,119,105,110,103,32,116,
+104,101,119,97,115,32,105,110,118,101,110,116,101,100,97,99,99,111,109,112,97,
+110,121,105,110,103,104,105,115,32,112,101,114,115,111,110,97,108,97,118,97,105,
+108,97,98,108,101,32,97,116,115,116,117,100,121,32,111,102,32,116,104,101,111,
+110,32,116,104,101,32,111,116,104,101,114,101,120,101,99,117,116,105,111,110,32,
+111,102,72,117,109,97,110,32,82,105,103,104,116,115,116,101,114,109,115,32,111,
+102,32,116,104,101,97,115,115,111,99,105,97,116,105,111,110,115,114,101,115,101,
+97,114,99,104,32,97,110,100,115,117,99,99,101,101,100,101,100,32,98,121,100,101,
+102,101,97,116,101,100,32,116,104,101,97,110,100,32,102,114,111,109,32,116,104,
+101,98,117,116,32,116,104,101,121,32,97,114,101,99,111,109,109,97,110,100,101,
+114,32,111,102,115,116,97,116,101,32,111,102,32,116,104,101,121,101,97,114,115,
+32,111,102,32,97,103,101,116,104,101,32,115,116,117,100,121,32,111,102,60,117,
+108,32,99,108,97,115,115,61,34,115,112,108,97,99,101,32,105,110,32,116,104,101,
+119,104,101,114,101,32,104,101,32,119,97,115,60,108,105,32,99,108,97,115,115,61,
+34,102,116,104,101,114,101,32,97,114,101,32,110,111,119,104,105,99,104,32,98,101
+,99,97,109,101,104,101,32,112,117,98,108,105,115,104,101,100,101,120,112,114,101
+,115,115,101,100,32,105,110,116,111,32,119,104,105,99,104,32,116,104,101,99,111,
+109,109,105,115,115,105,111,110,101,114,102,111,110,116,45,119,101,105,103,104,
+116,58,116,101,114,114,105,116,111,114,121,32,111,102,101,120,116,101,110,115,
+105,111,110,115,34,62,82,111,109,97,110,32,69,109,112,105,114,101,101,113,117,97
+,108,32,116,111,32,116,104,101,73,110,32,99,111,110,116,114,97,115,116,44,104,
+111,119,101,118,101,114,44,32,97,110,100,105,115,32,116,121,112,105,99,97,108,
+108,121,97,110,100,32,104,105,115,32,119,105,102,101,40,97,108,115,111,32,99,97,
+108,108,101,100,62,60,117,108,32,99,108,97,115,115,61,34,101,102,102,101,99,116,
+105,118,101,108,121,32,101,118,111,108,118,101,100,32,105,110,116,111,115,101,
+101,109,32,116,111,32,104,97,118,101,119,104,105,99,104,32,105,115,32,116,104,
+101,116,104,101,114,101,32,119,97,115,32,110,111,97,110,32,101,120,99,101,108,
+108,101,110,116,97,108,108,32,111,102,32,116,104,101,115,101,100,101,115,99,114,
+105,98,101,100,32,98,121,73,110,32,112,114,97,99,116,105,99,101,44,98,114,111,97
+,100,99,97,115,116,105,110,103,99,104,97,114,103,101,100,32,119,105,116,104,114,
+101,102,108,101,99,116,101,100,32,105,110,115,117,98,106,101,99,116,101,100,32,
+116,111,109,105,108,105,116,97,114,121,32,97,110,100,116,111,32,116,104,101,32,
+112,111,105,110,116,101,99,111,110,111,109,105,99,97,108,108,121,115,101,116,84,
+97,114,103,101,116,105,110,103,97,114,101,32,97,99,116,117,97,108,108,121,118,
+105,99,116,111,114,121,32,111,118,101,114,40,41,59,60,47,115,99,114,105,112,116,
+62,99,111,110,116,105,110,117,111,117,115,108,121,114,101,113,117,105,114,101,
+100,32,102,111,114,101,118,111,108,117,116,105,111,110,97,114,121,97,110,32,101,
+102,102,101,99,116,105,118,101,110,111,114,116,104,32,111,102,32,116,104,101,44,
+32,119,104,105,99,104,32,119,97,115,32,102,114,111,110,116,32,111,102,32,116,104
+,101,111,114,32,111,116,104,101,114,119,105,115,101,115,111,109,101,32,102,111,
+114,109,32,111,102,104,97,100,32,110,111,116,32,98,101,101,110,103,101,110,101,
+114,97,116,101,100,32,98,121,105,110,102,111,114,109,97,116,105,111,110,46,112,
+101,114,109,105,116,116,101,100,32,116,111,105,110,99,108,117,100,101,115,32,116
+,104,101,100,101,118,101,108,111,112,109,101,110,116,44,101,110,116,101,114,101,
+100,32,105,110,116,111,116,104,101,32,112,114,101,118,105,111,117,115,99,111,110
+,115,105,115,116,101,110,116,108,121,97,114,101,32,107,110,111,119,110,32,97,115
+,116,104,101,32,102,105,101,108,100,32,111,102,116,104,105,115,32,116,121,112,
+101,32,111,102,103,105,118,101,110,32,116,111,32,116,104,101,116,104,101,32,116,
+105,116,108,101,32,111,102,99,111,110,116,97,105,110,115,32,116,104,101,105,110,
+115,116,97,110,99,101,115,32,111,102,105,110,32,116,104,101,32,110,111,114,116,
+104,100,117,101,32,116,111,32,116,104,101,105,114,97,114,101,32,100,101,115,105,
+103,110,101,100,99,111,114,112,111,114,97,116,105,111,110,115,119,97,115,32,116,
+104,97,116,32,116,104,101,111,110,101,32,111,102,32,116,104,101,115,101,109,111,
+114,101,32,112,111,112,117,108,97,114,115,117,99,99,101,101,100,101,100,32,105,
+110,115,117,112,112,111,114,116,32,102,114,111,109,105,110,32,100,105,102,102,
+101,114,101,110,116,100,111,109,105,110,97,116,101,100,32,98,121,100,101,115,105
+,103,110,101,100,32,102,111,114,111,119,110,101,114,115,104,105,112,32,111,102,
+97,110,100,32,112,111,115,115,105,98,108,121,115,116,97,110,100,97,114,100,105,
+122,101,100,114,101,115,112,111,110,115,101,84,101,120,116,119,97,115,32,105,110
+,116,101,110,100,101,100,114,101,99,101,105,118,101,100,32,116,104,101,97,115,
+115,117,109,101,100,32,116,104,97,116,97,114,101,97,115,32,111,102,32,116,104,
+101,112,114,105,109,97,114,105,108,121,32,105,110,116,104,101,32,98,97,115,105,
+115,32,111,102,105,110,32,116,104,101,32,115,101,110,115,101,97,99,99,111,117,
+110,116,115,32,102,111,114,100,101,115,116,114,111,121,101,100,32,98,121,97,116,
+32,108,101,97,115,116,32,116,119,111,119,97,115,32,100,101,99,108,97,114,101,100
+,99,111,117,108,100,32,110,111,116,32,98,101,83,101,99,114,101,116,97,114,121,32
+,111,102,97,112,112,101,97,114,32,116,111,32,98,101,109,97,114,103,105,110,45,
+116,111,112,58,49,47,94,92,115,43,124,92,115,43,36,47,103,101,41,123,116,104,114
+,111,119,32,101,125,59,116,104,101,32,115,116,97,114,116,32,111,102,116,119,111,
+32,115,101,112,97,114,97,116,101,108,97,110,103,117,97,103,101,32,97,110,100,119
+,104,111,32,104,97,100,32,98,101,101,110,111,112,101,114,97,116,105,111,110,32,
+111,102,100,101,97,116,104,32,111,102,32,116,104,101,114,101,97,108,32,110,117,
+109,98,101,114,115,9,60,108,105,110,107,32,114,101,108,61,34,112,114,111,118,105
+,100,101,100,32,116,104,101,116,104,101,32,115,116,111,114,121,32,111,102,99,111
+,109,112,101,116,105,116,105,111,110,115,101,110,103,108,105,115,104,32,40,85,75
+,41,101,110,103,108,105,115,104,32,40,85,83,41,208,156,208,190,208,189,208,179,
+208,190,208,187,208,161,209,128,208,191,209,129,208,186,208,184,209,129,209,128,
+208,191,209,129,208,186,208,184,209,129,209,128,208,191,209,129,208,186,208,190,
+217,132,216,185,216,177,216,168,217,138,216,169,230,173,163,233,171,148,228,184,
+173,230,150,135,231,174,128,228,189,147,228,184,173,230,150,135,231,185,129,228,
+189,147,228,184,173,230,150,135,230,156,137,233,153,144,229,133,172,229,143,184,
+228,186,186,230,176,145,230,148,191,229,186,156,233,152,191,233,135,140,229,183,
+180,229,183,180,231,164,190,228,188,154,228,184,187,228,185,137,230,147,141,228,
+189,156,231,179,187,231,187,159,230,148,191,231,173,150,230,179,149,232,167,132,
+105,110,102,111,114,109,97,99,105,195,179,110,104,101,114,114,97,109,105,101,110
+,116,97,115,101,108,101,99,116,114,195,179,110,105,99,111,100,101,115,99,114,105
+,112,99,105,195,179,110,99,108,97,115,105,102,105,99,97,100,111,115,99,111,110,
+111,99,105,109,105,101,110,116,111,112,117,98,108,105,99,97,99,105,195,179,110,
+114,101,108,97,99,105,111,110,97,100,97,115,105,110,102,111,114,109,195,161,116,
+105,99,97,114,101,108,97,99,105,111,110,97,100,111,115,100,101,112,97,114,116,97
+,109,101,110,116,111,116,114,97,98,97,106,97,100,111,114,101,115,100,105,114,101
+,99,116,97,109,101,110,116,101,97,121,117,110,116,97,109,105,101,110,116,111,109
+,101,114,99,97,100,111,76,105,98,114,101,99,111,110,116,195,161,99,116,101,110,
+111,115,104,97,98,105,116,97,99,105,111,110,101,115,99,117,109,112,108,105,109,
+105,101,110,116,111,114,101,115,116,97,117,114,97,110,116,101,115,100,105,115,
+112,111,115,105,99,105,195,179,110,99,111,110,115,101,99,117,101,110,99,105,97,
+101,108,101,99,116,114,195,179,110,105,99,97,97,112,108,105,99,97,99,105,111,110
+,101,115,100,101,115,99,111,110,101,99,116,97,100,111,105,110,115,116,97,108,97,
+99,105,195,179,110,114,101,97,108,105,122,97,99,105,195,179,110,117,116,105,108,
+105,122,97,99,105,195,179,110,101,110,99,105,99,108,111,112,101,100,105,97,101,
+110,102,101,114,109,101,100,97,100,101,115,105,110,115,116,114,117,109,101,110,
+116,111,115,101,120,112,101,114,105,101,110,99,105,97,115,105,110,115,116,105,
+116,117,99,105,195,179,110,112,97,114,116,105,99,117,108,97,114,101,115,115,117,
+98,99,97,116,101,103,111,114,105,97,209,130,208,190,208,187,209,140,208,186,208,
+190,208,160,208,190,209,129,209,129,208,184,208,184,209,128,208,176,208,177,208,
+190,209,130,209,139,208,177,208,190,208,187,209,140,209,136,208,181,208,191,209,
+128,208,190,209,129,209,130,208,190,208,188,208,190,208,182,208,181,209,130,208,
+181,208,180,209,128,209,131,208,179,208,184,209,133,209,129,208,187,209,131,209,
+135,208,176,208,181,209,129,208,181,208,185,209,135,208,176,209,129,208,178,209,
+129,208,181,208,179,208,180,208,176,208,160,208,190,209,129,209,129,208,184,209,
+143,208,156,208,190,209,129,208,186,208,178,208,181,208,180,209,128,209,131,208,
+179,208,184,208,181,208,179,208,190,209,128,208,190,208,180,208,176,208,178,208,
+190,208,191,209,128,208,190,209,129,208,180,208,176,208,189,208,189,209,139,209,
+133,208,180,208,190,208,187,208,182,208,189,209,139,208,184,208,188,208,181,208,
+189,208,189,208,190,208,156,208,190,209,129,208,186,208,178,209,139,209,128,209,
+131,208,177,208,187,208,181,208,185,208,156,208,190,209,129,208,186,208,178,208,
+176,209,129,209,130,209,128,208,176,208,189,209,139,208,189,208,184,209,135,208,
+181,208,179,208,190,209,128,208,176,208,177,208,190,209,130,208,181,208,180,208,
+190,208,187,208,182,208,181,208,189,209,131,209,129,208,187,209,131,208,179,208,
+184,209,130,208,181,208,191,208,181,209,128,209,140,208,158,208,180,208,189,208,
+176,208,186,208,190,208,191,208,190,209,130,208,190,208,188,209,131,209,128,208,
+176,208,177,208,190,209,130,209,131,208,176,208,191,209,128,208,181,208,187,209,
+143,208,178,208,190,208,190,208,177,209,137,208,181,208,190,208,180,208,189,208,
+190,208,179,208,190,209,129,208,178,208,190,208,181,208,179,208,190,209,129,209,
+130,208,176,209,130,209,140,208,184,208,180,209,128,209,131,208,179,208,190,208,
+185,209,132,208,190,209,128,209,131,208,188,208,181,209,133,208,190,209,128,208,
+190,209,136,208,190,208,191,209,128,208,190,209,130,208,184,208,178,209,129,209,
+129,209,139,208,187,208,186,208,176,208,186,208,176,208,182,208,180,209,139,208,
+185,208,178,208,187,208,176,209,129,209,130,208,184,208,179,209,128,209,131,208,
+191,208,191,209,139,208,178,208,188,208,181,209,129,209,130,208,181,209,128,208,
+176,208,177,208,190,209,130,208,176,209,129,208,186,208,176,208,183,208,176,208,
+187,208,191,208,181,209,128,208,178,209,139,208,185,208,180,208,181,208,187,208,
+176,209,130,209,140,208,180,208,181,208,189,209,140,208,179,208,184,208,191,208,
+181,209,128,208,184,208,190,208,180,208,177,208,184,208,183,208,189,208,181,209,
+129,208,190,209,129,208,189,208,190,208,178,208,181,208,188,208,190,208,188,208,
+181,208,189,209,130,208,186,209,131,208,191,208,184,209,130,209,140,208,180,208,
+190,208,187,208,182,208,189,208,176,209,128,208,176,208,188,208,186,208,176,209,
+133,208,189,208,176,209,135,208,176,208,187,208,190,208,160,208,176,208,177,208,
+190,209,130,208,176,208,162,208,190,208,187,209,140,208,186,208,190,209,129,208,
+190,208,178,209,129,208,181,208,188,208,178,209,130,208,190,209,128,208,190,208,
+185,208,189,208,176,209,135,208,176,208,187,208,176,209,129,208,191,208,184,209,
+129,208,190,208,186,209,129,208,187,209,131,208,182,208,177,209,139,209,129,208,
+184,209,129,209,130,208,181,208,188,208,191,208,181,209,135,208,176,209,130,208,
+184,208,189,208,190,208,178,208,190,208,179,208,190,208,191,208,190,208,188,208,
+190,209,137,208,184,209,129,208,176,208,185,209,130,208,190,208,178,208,191,208,
+190,209,135,208,181,208,188,209,131,208,191,208,190,208,188,208,190,209,137,209,
+140,208,180,208,190,208,187,208,182,208,189,208,190,209,129,209,129,209,139,208,
+187,208,186,208,184,208,177,209,139,209,129,209,130,209,128,208,190,208,180,208,
+176,208,189,208,189,209,139,208,181,208,188,208,189,208,190,208,179,208,184,208,
+181,208,191,209,128,208,190,208,181,208,186,209,130,208,161,208,181,208,185,209,
+135,208,176,209,129,208,188,208,190,208,180,208,181,208,187,208,184,209,130,208,
+176,208,186,208,190,208,179,208,190,208,190,208,189,208,187,208,176,208,185,208,
+189,208,179,208,190,209,128,208,190,208,180,208,181,208,178,208,181,209,128,209,
+129,208,184,209,143,209,129,209,130,209,128,208,176,208,189,208,181,209,132,208,
+184,208,187,209,140,208,188,209,139,209,131,209,128,208,190,208,178,208,189,209,
+143,209,128,208,176,208,183,208,189,209,139,209,133,208,184,209,129,208,186,208,
+176,209,130,209,140,208,189,208,181,208,180,208,181,208,187,209,142,209,143,208,
+189,208,178,208,176,209,128,209,143,208,188,208,181,208,189,209,140,209,136,208,
+181,208,188,208,189,208,190,208,179,208,184,209,133,208,180,208,176,208,189,208,
+189,208,190,208,185,208,183,208,189,208,176,209,135,208,184,209,130,208,189,208,
+181,208,187,209,140,208,183,209,143,209,132,208,190,209,128,209,131,208,188,208,
+176,208,162,208,181,208,191,208,181,209,128,209,140,208,188,208,181,209,129,209,
+143,209,134,208,176,208,183,208,176,209,137,208,184,209,130,209,139,208,155,209,
+131,209,135,209,136,208,184,208,181,224,164,168,224,164,185,224,165,128,224,164,
+130,224,164,149,224,164,176,224,164,168,224,165,135,224,164,133,224,164,170,224,
+164,168,224,165,135,224,164,149,224,164,191,224,164,175,224,164,190,224,164,149,
+224,164,176,224,165,135,224,164,130,224,164,133,224,164,168,224,165,141,224,164,
+175,224,164,149,224,165,141,224,164,175,224,164,190,224,164,151,224,164,190,224,
+164,135,224,164,161,224,164,172,224,164,190,224,164,176,224,165,135,224,164,149,
+224,164,191,224,164,184,224,165,128,224,164,166,224,164,191,224,164,175,224,164,
+190,224,164,170,224,164,185,224,164,178,224,165,135,224,164,184,224,164,191,224,
+164,130,224,164,185,224,164,173,224,164,190,224,164,176,224,164,164,224,164,133,
+224,164,170,224,164,168,224,165,128,224,164,181,224,164,190,224,164,178,224,165,
+135,224,164,184,224,165,135,224,164,181,224,164,190,224,164,149,224,164,176,224,
+164,164,224,165,135,224,164,174,224,165,135,224,164,176,224,165,135,224,164,185,
+224,165,139,224,164,168,224,165,135,224,164,184,224,164,149,224,164,164,224,165,
+135,224,164,172,224,164,185,224,165,129,224,164,164,224,164,184,224,164,190,224,
+164,135,224,164,159,224,164,185,224,165,139,224,164,151,224,164,190,224,164,156,
+224,164,190,224,164,168,224,165,135,224,164,174,224,164,191,224,164,168,224,164,
+159,224,164,149,224,164,176,224,164,164,224,164,190,224,164,149,224,164,176,224,
+164,168,224,164,190,224,164,137,224,164,168,224,164,149,224,165,135,224,164,175,
+224,164,185,224,164,190,224,164,129,224,164,184,224,164,172,224,164,184,224,165,
+135,224,164,173,224,164,190,224,164,183,224,164,190,224,164,134,224,164,170,224,
+164,149,224,165,135,224,164,178,224,164,191,224,164,175,224,165,135,224,164,182,
+224,165,129,224,164,176,224,165,130,224,164,135,224,164,184,224,164,149,224,165,
+135,224,164,152,224,164,130,224,164,159,224,165,135,224,164,174,224,165,135,224,
+164,176,224,165,128,224,164,184,224,164,149,224,164,164,224,164,190,224,164,174,
+224,165,135,224,164,176,224,164,190,224,164,178,224,165,135,224,164,149,224,164,
+176,224,164,133,224,164,167,224,164,191,224,164,149,224,164,133,224,164,170,224,
+164,168,224,164,190,224,164,184,224,164,174,224,164,190,224,164,156,224,164,174,
+224,165,129,224,164,157,224,165,135,224,164,149,224,164,190,224,164,176,224,164,
+163,224,164,185,224,165,139,224,164,164,224,164,190,224,164,149,224,164,161,224,
+164,188,224,165,128,224,164,175,224,164,185,224,164,190,224,164,130,224,164,185,
+224,165,139,224,164,159,224,164,178,224,164,182,224,164,172,224,165,141,224,164,
+166,224,164,178,224,164,191,224,164,175,224,164,190,224,164,156,224,165,128,224,
+164,181,224,164,168,224,164,156,224,164,190,224,164,164,224,164,190,224,164,149,
+224,165,136,224,164,184,224,165,135,224,164,134,224,164,170,224,164,149,224,164,
+190,224,164,181,224,164,190,224,164,178,224,165,128,224,164,166,224,165,135,224,
+164,168,224,165,135,224,164,170,224,165,130,224,164,176,224,165,128,224,164,170,
+224,164,190,224,164,168,224,165,128,224,164,137,224,164,184,224,164,149,224,165,
+135,224,164,185,224,165,139,224,164,151,224,165,128,224,164,172,224,165,136,224,
+164,160,224,164,149,224,164,134,224,164,170,224,164,149,224,165,128,224,164,181,
+224,164,176,224,165,141,224,164,183,224,164,151,224,164,190,224,164,130,224,164,
+181,224,164,134,224,164,170,224,164,149,224,165,139,224,164,156,224,164,191,224,
+164,178,224,164,190,224,164,156,224,164,190,224,164,168,224,164,190,224,164,184,
+224,164,185,224,164,174,224,164,164,224,164,185,224,164,174,224,165,135,224,164,
+130,224,164,137,224,164,168,224,164,149,224,165,128,224,164,175,224,164,190,224,
+164,185,224,165,130,224,164,166,224,164,176,224,165,141,224,164,156,224,164,184,
+224,165,130,224,164,154,224,165,128,224,164,170,224,164,184,224,164,130,224,164,
+166,224,164,184,224,164,181,224,164,190,224,164,178,224,164,185,224,165,139,224,
+164,168,224,164,190,224,164,185,224,165,139,224,164,164,224,165,128,224,164,156,
+224,165,136,224,164,184,224,165,135,224,164,181,224,164,190,224,164,170,224,164,
+184,224,164,156,224,164,168,224,164,164,224,164,190,224,164,168,224,165,135,224,
+164,164,224,164,190,224,164,156,224,164,190,224,164,176,224,165,128,224,164,152,
+224,164,190,224,164,175,224,164,178,224,164,156,224,164,191,224,164,178,224,165,
+135,224,164,168,224,165,128,224,164,154,224,165,135,224,164,156,224,164,190,224,
+164,130,224,164,154,224,164,170,224,164,164,224,165,141,224,164,176,224,164,151,
+224,165,130,224,164,151,224,164,178,224,164,156,224,164,190,224,164,164,224,165,
+135,224,164,172,224,164,190,224,164,185,224,164,176,224,164,134,224,164,170,224,
+164,168,224,165,135,224,164,181,224,164,190,224,164,185,224,164,168,224,164,135,
+224,164,184,224,164,149,224,164,190,224,164,184,224,165,129,224,164,172,224,164,
+185,224,164,176,224,164,185,224,164,168,224,165,135,224,164,135,224,164,184,224,
+164,184,224,165,135,224,164,184,224,164,185,224,164,191,224,164,164,224,164,172,
+224,164,161,224,164,188,224,165,135,224,164,152,224,164,159,224,164,168,224,164,
+190,224,164,164,224,164,178,224,164,190,224,164,182,224,164,170,224,164,190,224,
+164,130,224,164,154,224,164,182,224,165,141,224,164,176,224,165,128,224,164,172,
+224,164,161,224,164,188,224,165,128,224,164,185,224,165,139,224,164,164,224,165,
+135,224,164,184,224,164,190,224,164,136,224,164,159,224,164,182,224,164,190,224,
+164,175,224,164,166,224,164,184,224,164,149,224,164,164,224,165,128,224,164,156,
+224,164,190,224,164,164,224,165,128,224,164,181,224,164,190,224,164,178,224,164,
+190,224,164,185,224,164,156,224,164,190,224,164,176,224,164,170,224,164,159,224,
+164,168,224,164,190,224,164,176,224,164,150,224,164,168,224,165,135,224,164,184,
+224,164,161,224,164,188,224,164,149,224,164,174,224,164,191,224,164,178,224,164,
+190,224,164,137,224,164,184,224,164,149,224,165,128,224,164,149,224,165,135,224,
+164,181,224,164,178,224,164,178,224,164,151,224,164,164,224,164,190,224,164,150,
+224,164,190,224,164,168,224,164,190,224,164,133,224,164,176,224,165,141,224,164,
+165,224,164,156,224,164,185,224,164,190,224,164,130,224,164,166,224,165,135,224,
+164,150,224,164,190,224,164,170,224,164,185,224,164,178,224,165,128,224,164,168,
+224,164,191,224,164,175,224,164,174,224,164,172,224,164,191,224,164,168,224,164,
+190,224,164,172,224,165,136,224,164,130,224,164,149,224,164,149,224,164,185,224,
+165,128,224,164,130,224,164,149,224,164,185,224,164,168,224,164,190,224,164,166,
+224,165,135,224,164,164,224,164,190,224,164,185,224,164,174,224,164,178,224,165,
+135,224,164,149,224,164,190,224,164,171,224,165,128,224,164,156,224,164,172,224,
+164,149,224,164,191,224,164,164,224,165,129,224,164,176,224,164,164,224,164,174,
+224,164,190,224,164,130,224,164,151,224,164,181,224,164,185,224,165,128,224,164,
+130,224,164,176,224,165,139,224,164,156,224,164,188,224,164,174,224,164,191,224,
+164,178,224,165,128,224,164,134,224,164,176,224,165,139,224,164,170,224,164,184,
+224,165,135,224,164,168,224,164,190,224,164,175,224,164,190,224,164,166,224,164,
+181,224,164,178,224,165,135,224,164,168,224,165,135,224,164,150,224,164,190,224,
+164,164,224,164,190,224,164,149,224,164,176,224,165,128,224,164,172,224,164,137,
+224,164,168,224,164,149,224,164,190,224,164,156,224,164,181,224,164,190,224,164,
+172,224,164,170,224,165,130,224,164,176,224,164,190,224,164,172,224,164,161,224,
+164,188,224,164,190,224,164,184,224,165,140,224,164,166,224,164,190,224,164,182,
+224,165,135,224,164,175,224,164,176,224,164,149,224,164,191,224,164,175,224,165,
+135,224,164,149,224,164,185,224,164,190,224,164,130,224,164,133,224,164,149,224,
+164,184,224,164,176,224,164,172,224,164,168,224,164,190,224,164,143,224,164,181,
+224,164,185,224,164,190,224,164,130,224,164,184,224,165,141,224,164,165,224,164,
+178,224,164,174,224,164,191,224,164,178,224,165,135,224,164,178,224,165,135,224,
+164,150,224,164,149,224,164,181,224,164,191,224,164,183,224,164,175,224,164,149,
+224,165,141,224,164,176,224,164,130,224,164,184,224,164,174,224,165,130,224,164,
+185,224,164,165,224,164,190,224,164,168,224,164,190,216,170,216,179,216,170,216,
+183,217,138,216,185,217,133,216,180,216,167,216,177,217,131,216,169,216,168,217,
+136,216,167,216,179,216,183,216,169,216,167,217,132,216,181,217,129,216,173,216,
+169,217,133,217,136,216,167,216,182,217,138,216,185,216,167,217,132,216,174,216,
+167,216,181,216,169,216,167,217,132,217,133,216,178,217,138,216,175,216,167,217,
+132,216,185,216,167,217,133,216,169,216,167,217,132,217,131,216,167,216,170,216,
+168,216,167,217,132,216,177,216,175,217,136,216,175,216,168,216,177,217,134,216,
+167,217,133,216,172,216,167,217,132,216,175,217,136,217,132,216,169,216,167,217,
+132,216,185,216,167,217,132,217,133,216,167,217,132,217,133,217,136,217,130,216,
+185,216,167,217,132,216,185,216,177,216,168,217,138,216,167,217,132,216,179,216,
+177,217,138,216,185,216,167,217,132,216,172,217,136,216,167,217,132,216,167,217,
+132,216,176,217,135,216,167,216,168,216,167,217,132,216,173,217,138,216,167,216,
+169,216,167,217,132,216,173,217,130,217,136,217,130,216,167,217,132,217,131,216,
+177,217,138,217,133,216,167,217,132,216,185,216,177,216,167,217,130,217,133,216,
+173,217,129,217,136,216,184,216,169,216,167,217,132,216,171,216,167,217,134,217,
+138,217,133,216,180,216,167,217,135,216,175,216,169,216,167,217,132,217,133,216,
+177,216,163,216,169,216,167,217,132,217,130,216,177,216,162,217,134,216,167,217,
+132,216,180,216,168,216,167,216,168,216,167,217,132,216,173,217,136,216,167,216,
+177,216,167,217,132,216,172,216,175,217,138,216,175,216,167,217,132,216,163,216,
+179,216,177,216,169,216,167,217,132,216,185,217,132,217,136,217,133,217,133,216,
+172,217,133,217,136,216,185,216,169,216,167,217,132,216,177,216,173,217,133,217,
+134,216,167,217,132,217,134,217,130,216,167,216,183,217,129,217,132,216,179,216,
+183,217,138,217,134,216,167,217,132,217,131,217,136,217,138,216,170,216,167,217,
+132,216,175,217,134,217,138,216,167,216,168,216,177,217,131,216,167,216,170,217,
+135,216,167,217,132,216,177,217,138,216,167,216,182,216,170,216,173,217,138,216,
+167,216,170,217,138,216,168,216,170,217,136,217,130,217,138,216,170,216,167,217,
+132,216,163,217,136,217,132,217,137,216,167,217,132,216,168,216,177,217,138,216,
+175,216,167,217,132,217,131,217,132,216,167,217,133,216,167,217,132,216,177,216,
+167,216,168,216,183,216,167,217,132,216,180,216,174,216,181,217,138,216,179,217,
+138,216,167,216,177,216,167,216,170,216,167,217,132,216,171,216,167,217,132,216,
+171,216,167,217,132,216,181,217,132,216,167,216,169,216,167,217,132,216,173,216,
+175,217,138,216,171,216,167,217,132,216,178,217,136,216,167,216,177,216,167,217,
+132,216,174,217,132,217,138,216,172,216,167,217,132,216,172,217,133,217,138,216,
+185,216,167,217,132,216,185,216,167,217,133,217,135,216,167,217,132,216,172,217,
+133,216,167,217,132,216,167,217,132,216,179,216,167,216,185,216,169,217,133,216,
+180,216,167,217,135,216,175,217,135,216,167,217,132,216,177,216,166,217,138,216,
+179,216,167,217,132,216,175,216,174,217,136,217,132,216,167,217,132,217,129,217,
+134,217,138,216,169,216,167,217,132,217,131,216,170,216,167,216,168,216,167,217,
+132,216,175,217,136,216,177,217,138,216,167,217,132,216,175,216,177,217,136,216,
+179,216,167,216,179,216,170,216,186,216,177,217,130,216,170,216,181,216,167,217,
+133,217,138,217,133,216,167,217,132,216,168,217,134,216,167,216,170,216,167,217,
+132,216,185,216,184,217,138,217,133,101,110,116,101,114,116,97,105,110,109,101,
+110,116,117,110,100,101,114,115,116,97,110,100,105,110,103,32,61,32,102,117,110,
+99,116,105,111,110,40,41,46,106,112,103,34,32,119,105,100,116,104,61,34,99,111,
+110,102,105,103,117,114,97,116,105,111,110,46,112,110,103,34,32,119,105,100,116,
+104,61,34,60,98,111,100,121,32,99,108,97,115,115,61,34,77,97,116,104,46,114,97,
+110,100,111,109,40,41,99,111,110,116,101,109,112,111,114,97,114,121,32,85,110,
+105,116,101,100,32,83,116,97,116,101,115,99,105,114,99,117,109,115,116,97,110,99
+,101,115,46,97,112,112,101,110,100,67,104,105,108,100,40,111,114,103,97,110,105,
+122,97,116,105,111,110,115,60,115,112,97,110,32,99,108,97,115,115,61,34,34,62,60
+,105,109,103,32,115,114,99,61,34,47,100,105,115,116,105,110,103,117,105,115,104,
+101,100,116,104,111,117,115,97,110,100,115,32,111,102,32,99,111,109,109,117,110,
+105,99,97,116,105,111,110,99,108,101,97,114,34,62,60,47,100,105,118,62,105,110,
+118,101,115,116,105,103,97,116,105,111,110,102,97,118,105,99,111,110,46,105,99,
+111,34,32,109,97,114,103,105,110,45,114,105,103,104,116,58,98,97,115,101,100,32,
+111,110,32,116,104,101,32,77,97,115,115,97,99,104,117,115,101,116,116,115,116,97
+,98,108,101,32,98,111,114,100,101,114,61,105,110,116,101,114,110,97,116,105,111,
+110,97,108,97,108,115,111,32,107,110,111,119,110,32,97,115,112,114,111,110,117,
+110,99,105,97,116,105,111,110,98,97,99,107,103,114,111,117,110,100,58,35,102,112
+,97,100,100,105,110,103,45,108,101,102,116,58,70,111,114,32,101,120,97,109,112,
+108,101,44,32,109,105,115,99,101,108,108,97,110,101,111,117,115,38,108,116,59,47
+,109,97,116,104,38,103,116,59,112,115,121,99,104,111,108,111,103,105,99,97,108,
+105,110,32,112,97,114,116,105,99,117,108,97,114,101,97,114,99,104,34,32,116,121,
+112,101,61,34,102,111,114,109,32,109,101,116,104,111,100,61,34,97,115,32,111,112
+,112,111,115,101,100,32,116,111,83,117,112,114,101,109,101,32,67,111,117,114,116
+,111,99,99,97,115,105,111,110,97,108,108,121,32,65,100,100,105,116,105,111,110,
+97,108,108,121,44,78,111,114,116,104,32,65,109,101,114,105,99,97,112,120,59,98,
+97,99,107,103,114,111,117,110,100,111,112,112,111,114,116,117,110,105,116,105,
+101,115,69,110,116,101,114,116,97,105,110,109,101,110,116,46,116,111,76,111,119,
+101,114,67,97,115,101,40,109,97,110,117,102,97,99,116,117,114,105,110,103,112,
+114,111,102,101,115,115,105,111,110,97,108,32,99,111,109,98,105,110,101,100,32,
+119,105,116,104,70,111,114,32,105,110,115,116,97,110,99,101,44,99,111,110,115,
+105,115,116,105,110,103,32,111,102,34,32,109,97,120,108,101,110,103,116,104,61,
+34,114,101,116,117,114,110,32,102,97,108,115,101,59,99,111,110,115,99,105,111,
+117,115,110,101,115,115,77,101,100,105,116,101,114,114,97,110,101,97,110,101,120
+,116,114,97,111,114,100,105,110,97,114,121,97,115,115,97,115,115,105,110,97,116,
+105,111,110,115,117,98,115,101,113,117,101,110,116,108,121,32,98,117,116,116,111
+,110,32,116,121,112,101,61,34,116,104,101,32,110,117,109,98,101,114,32,111,102,
+116,104,101,32,111,114,105,103,105,110,97,108,32,99,111,109,112,114,101,104,101,
+110,115,105,118,101,114,101,102,101,114,115,32,116,111,32,116,104,101,60,47,117,
+108,62,10,60,47,100,105,118,62,10,112,104,105,108,111,115,111,112,104,105,99,97,
+108,108,111,99,97,116,105,111,110,46,104,114,101,102,119,97,115,32,112,117,98,
+108,105,115,104,101,100,83,97,110,32,70,114,97,110,99,105,115,99,111,40,102,117,
+110,99,116,105,111,110,40,41,123,10,60,100,105,118,32,105,100,61,34,109,97,105,
+110,115,111,112,104,105,115,116,105,99,97,116,101,100,109,97,116,104,101,109,97,
+116,105,99,97,108,32,47,104,101,97,100,62,13,10,60,98,111,100,121,115,117,103,
+103,101,115,116,115,32,116,104,97,116,100,111,99,117,109,101,110,116,97,116,105,
+111,110,99,111,110,99,101,110,116,114,97,116,105,111,110,114,101,108,97,116,105,
+111,110,115,104,105,112,115,109,97,121,32,104,97,118,101,32,98,101,101,110,40,
+102,111,114,32,101,120,97,109,112,108,101,44,84,104,105,115,32,97,114,116,105,99
+,108,101,32,105,110,32,115,111,109,101,32,99,97,115,101,115,112,97,114,116,115,
+32,111,102,32,116,104,101,32,100,101,102,105,110,105,116,105,111,110,32,111,102,
+71,114,101,97,116,32,66,114,105,116,97,105,110,32,99,101,108,108,112,97,100,100,
+105,110,103,61,101,113,117,105,118,97,108,101,110,116,32,116,111,112,108,97,99,
+101,104,111,108,100,101,114,61,34,59,32,102,111,110,116,45,115,105,122,101,58,32
+,106,117,115,116,105,102,105,99,97,116,105,111,110,98,101,108,105,101,118,101,
+100,32,116,104,97,116,115,117,102,102,101,114,101,100,32,102,114,111,109,97,116,
+116,101,109,112,116,101,100,32,116,111,32,108,101,97,100,101,114,32,111,102,32,
+116,104,101,99,114,105,112,116,34,32,115,114,99,61,34,47,40,102,117,110,99,116,
+105,111,110,40,41,32,123,97,114,101,32,97,118,97,105,108,97,98,108,101,10,9,60,
+108,105,110,107,32,114,101,108,61,34,32,115,114,99,61,39,104,116,116,112,58,47,
+47,105,110,116,101,114,101,115,116,101,100,32,105,110,99,111,110,118,101,110,116
+,105,111,110,97,108,32,34,32,97,108,116,61,34,34,32,47,62,60,47,97,114,101,32,
+103,101,110,101,114,97,108,108,121,104,97,115,32,97,108,115,111,32,98,101,101,
+110,109,111,115,116,32,112,111,112,117,108,97,114,32,99,111,114,114,101,115,112,
+111,110,100,105,110,103,99,114,101,100,105,116,101,100,32,119,105,116,104,116,
+121,108,101,61,34,98,111,114,100,101,114,58,60,47,97,62,60,47,115,112,97,110,62,
+60,47,46,103,105,102,34,32,119,105,100,116,104,61,34,60,105,102,114,97,109,101,
+32,115,114,99,61,34,116,97,98,108,101,32,99,108,97,115,115,61,34,105,110,108,105
+,110,101,45,98,108,111,99,107,59,97,99,99,111,114,100,105,110,103,32,116,111,32,
+116,111,103,101,116,104,101,114,32,119,105,116,104,97,112,112,114,111,120,105,
+109,97,116,101,108,121,112,97,114,108,105,97,109,101,110,116,97,114,121,109,111,
+114,101,32,97,110,100,32,109,111,114,101,100,105,115,112,108,97,121,58,110,111,
+110,101,59,116,114,97,100,105,116,105,111,110,97,108,108,121,112,114,101,100,111
+,109,105,110,97,110,116,108,121,38,110,98,115,112,59,124,38,110,98,115,112,59,38
+,110,98,115,112,59,60,47,115,112,97,110,62,32,99,101,108,108,115,112,97,99,105,
+110,103,61,60,105,110,112,117,116,32,110,97,109,101,61,34,111,114,34,32,99,111,
+110,116,101,110,116,61,34,99,111,110,116,114,111,118,101,114,115,105,97,108,112,
+114,111,112,101,114,116,121,61,34,111,103,58,47,120,45,115,104,111,99,107,119,97
+,118,101,45,100,101,109,111,110,115,116,114,97,116,105,111,110,115,117,114,114,
+111,117,110,100,101,100,32,98,121,78,101,118,101,114,116,104,101,108,101,115,115
+,44,119,97,115,32,116,104,101,32,102,105,114,115,116,99,111,110,115,105,100,101,
+114,97,98,108,101,32,65,108,116,104,111,117,103,104,32,116,104,101,32,99,111,108
+,108,97,98,111,114,97,116,105,111,110,115,104,111,117,108,100,32,110,111,116,32,
+98,101,112,114,111,112,111,114,116,105,111,110,32,111,102,60,115,112,97,110,32,
+115,116,121,108,101,61,34,107,110,111,119,110,32,97,115,32,116,104,101,32,115,
+104,111,114,116,108,121,32,97,102,116,101,114,102,111,114,32,105,110,115,116,97,
+110,99,101,44,100,101,115,99,114,105,98,101,100,32,97,115,32,47,104,101,97,100,
+62,10,60,98,111,100,121,32,115,116,97,114,116,105,110,103,32,119,105,116,104,105
+,110,99,114,101,97,115,105,110,103,108,121,32,116,104,101,32,102,97,99,116,32,
+116,104,97,116,100,105,115,99,117,115,115,105,111,110,32,111,102,109,105,100,100
+,108,101,32,111,102,32,116,104,101,97,110,32,105,110,100,105,118,105,100,117,97,
+108,100,105,102,102,105,99,117,108,116,32,116,111,32,112,111,105,110,116,32,111,
+102,32,118,105,101,119,104,111,109,111,115,101,120,117,97,108,105,116,121,97,99,
+99,101,112,116,97,110,99,101,32,111,102,60,47,115,112,97,110,62,60,47,100,105,
+118,62,109,97,110,117,102,97,99,116,117,114,101,114,115,111,114,105,103,105,110,
+32,111,102,32,116,104,101,99,111,109,109,111,110,108,121,32,117,115,101,100,105,
+109,112,111,114,116,97,110,99,101,32,111,102,100,101,110,111,109,105,110,97,116,
+105,111,110,115,98,97,99,107,103,114,111,117,110,100,58,32,35,108,101,110,103,
+116,104,32,111,102,32,116,104,101,100,101,116,101,114,109,105,110,97,116,105,111
+,110,97,32,115,105,103,110,105,102,105,99,97,110,116,34,32,98,111,114,100,101,
+114,61,34,48,34,62,114,101,118,111,108,117,116,105,111,110,97,114,121,112,114,
+105,110,99,105,112,108,101,115,32,111,102,105,115,32,99,111,110,115,105,100,101,
+114,101,100,119,97,115,32,100,101,118,101,108,111,112,101,100,73,110,100,111,45,
+69,117,114,111,112,101,97,110,118,117,108,110,101,114,97,98,108,101,32,116,111,
+112,114,111,112,111,110,101,110,116,115,32,111,102,97,114,101,32,115,111,109,101
+,116,105,109,101,115,99,108,111,115,101,114,32,116,111,32,116,104,101,78,101,119
+,32,89,111,114,107,32,67,105,116,121,32,110,97,109,101,61,34,115,101,97,114,99,
+104,97,116,116,114,105,98,117,116,101,100,32,116,111,99,111,117,114,115,101,32,
+111,102,32,116,104,101,109,97,116,104,101,109,97,116,105,99,105,97,110,98,121,32
+,116,104,101,32,101,110,100,32,111,102,97,116,32,116,104,101,32,101,110,100,32,
+111,102,34,32,98,111,114,100,101,114,61,34,48,34,32,116,101,99,104,110,111,108,
+111,103,105,99,97,108,46,114,101,109,111,118,101,67,108,97,115,115,40,98,114,97,
+110,99,104,32,111,102,32,116,104,101,101,118,105,100,101,110,99,101,32,116,104,
+97,116,33,91,101,110,100,105,102,93,45,45,62,13,10,73,110,115,116,105,116,117,
+116,101,32,111,102,32,105,110,116,111,32,97,32,115,105,110,103,108,101,114,101,
+115,112,101,99,116,105,118,101,108,121,46,97,110,100,32,116,104,101,114,101,102,
+111,114,101,112,114,111,112,101,114,116,105,101,115,32,111,102,105,115,32,108,
+111,99,97,116,101,100,32,105,110,115,111,109,101,32,111,102,32,119,104,105,99,
+104,84,104,101,114,101,32,105,115,32,97,108,115,111,99,111,110,116,105,110,117,
+101,100,32,116,111,32,97,112,112,101,97,114,97,110,99,101,32,111,102,32,38,97,
+109,112,59,110,100,97,115,104,59,32,100,101,115,99,114,105,98,101,115,32,116,104
+,101,99,111,110,115,105,100,101,114,97,116,105,111,110,97,117,116,104,111,114,32
+,111,102,32,116,104,101,105,110,100,101,112,101,110,100,101,110,116,108,121,101,
+113,117,105,112,112,101,100,32,119,105,116,104,100,111,101,115,32,110,111,116,32
+,104,97,118,101,60,47,97,62,60,97,32,104,114,101,102,61,34,99,111,110,102,117,
+115,101,100,32,119,105,116,104,60,108,105,110,107,32,104,114,101,102,61,34,47,97
+,116,32,116,104,101,32,97,103,101,32,111,102,97,112,112,101,97,114,32,105,110,32
+,116,104,101,84,104,101,115,101,32,105,110,99,108,117,100,101,114,101,103,97,114
+,100,108,101,115,115,32,111,102,99,111,117,108,100,32,98,101,32,117,115,101,100,
+32,115,116,121,108,101,61,38,113,117,111,116,59,115,101,118,101,114,97,108,32,
+116,105,109,101,115,114,101,112,114,101,115,101,110,116,32,116,104,101,98,111,
+100,121,62,10,60,47,104,116,109,108,62,116,104,111,117,103,104,116,32,116,111,32
+,98,101,112,111,112,117,108,97,116,105,111,110,32,111,102,112,111,115,115,105,98
+,105,108,105,116,105,101,115,112,101,114,99,101,110,116,97,103,101,32,111,102,97
+,99,99,101,115,115,32,116,111,32,116,104,101,97,110,32,97,116,116,101,109,112,
+116,32,116,111,112,114,111,100,117,99,116,105,111,110,32,111,102,106,113,117,101
+,114,121,47,106,113,117,101,114,121,116,119,111,32,100,105,102,102,101,114,101,
+110,116,98,101,108,111,110,103,32,116,111,32,116,104,101,101,115,116,97,98,108,
+105,115,104,109,101,110,116,114,101,112,108,97,99,105,110,103,32,116,104,101,100
+,101,115,99,114,105,112,116,105,111,110,34,32,100,101,116,101,114,109,105,110,
+101,32,116,104,101,97,118,97,105,108,97,98,108,101,32,102,111,114,65,99,99,111,
+114,100,105,110,103,32,116,111,32,119,105,100,101,32,114,97,110,103,101,32,111,
+102,9,60,100,105,118,32,99,108,97,115,115,61,34,109,111,114,101,32,99,111,109,
+109,111,110,108,121,111,114,103,97,110,105,115,97,116,105,111,110,115,102,117,
+110,99,116,105,111,110,97,108,105,116,121,119,97,115,32,99,111,109,112,108,101,
+116,101,100,32,38,97,109,112,59,109,100,97,115,104,59,32,112,97,114,116,105,99,
+105,112,97,116,105,111,110,116,104,101,32,99,104,97,114,97,99,116,101,114,97,110
+,32,97,100,100,105,116,105,111,110,97,108,97,112,112,101,97,114,115,32,116,111,
+32,98,101,102,97,99,116,32,116,104,97,116,32,116,104,101,97,110,32,101,120,97,
+109,112,108,101,32,111,102,115,105,103,110,105,102,105,99,97,110,116,108,121,111
+,110,109,111,117,115,101,111,118,101,114,61,34,98,101,99,97,117,115,101,32,116,
+104,101,121,32,97,115,121,110,99,32,61,32,116,114,117,101,59,112,114,111,98,108,
+101,109,115,32,119,105,116,104,115,101,101,109,115,32,116,111,32,104,97,118,101,
+116,104,101,32,114,101,115,117,108,116,32,111,102,32,115,114,99,61,34,104,116,
+116,112,58,47,47,102,97,109,105,108,105,97,114,32,119,105,116,104,112,111,115,
+115,101,115,115,105,111,110,32,111,102,102,117,110,99,116,105,111,110,32,40,41,
+32,123,116,111,111,107,32,112,108,97,99,101,32,105,110,97,110,100,32,115,111,109
+,101,116,105,109,101,115,115,117,98,115,116,97,110,116,105,97,108,108,121,60,115
+,112,97,110,62,60,47,115,112,97,110,62,105,115,32,111,102,116,101,110,32,117,115
+,101,100,105,110,32,97,110,32,97,116,116,101,109,112,116,103,114,101,97,116,32,
+100,101,97,108,32,111,102,69,110,118,105,114,111,110,109,101,110,116,97,108,115,
+117,99,99,101,115,115,102,117,108,108,121,32,118,105,114,116,117,97,108,108,121,
+32,97,108,108,50,48,116,104,32,99,101,110,116,117,114,121,44,112,114,111,102,101
+,115,115,105,111,110,97,108,115,110,101,99,101,115,115,97,114,121,32,116,111,32,
+100,101,116,101,114,109,105,110,101,100,32,98,121,99,111,109,112,97,116,105,98,
+105,108,105,116,121,98,101,99,97,117,115,101,32,105,116,32,105,115,68,105,99,116
+,105,111,110,97,114,121,32,111,102,109,111,100,105,102,105,99,97,116,105,111,110
+,115,84,104,101,32,102,111,108,108,111,119,105,110,103,109,97,121,32,114,101,102
+,101,114,32,116,111,58,67,111,110,115,101,113,117,101,110,116,108,121,44,73,110,
+116,101,114,110,97,116,105,111,110,97,108,97,108,116,104,111,117,103,104,32,115,
+111,109,101,116,104,97,116,32,119,111,117,108,100,32,98,101,119,111,114,108,100,
+39,115,32,102,105,114,115,116,99,108,97,115,115,105,102,105,101,100,32,97,115,98
+,111,116,116,111,109,32,111,102,32,116,104,101,40,112,97,114,116,105,99,117,108,
+97,114,108,121,97,108,105,103,110,61,34,108,101,102,116,34,32,109,111,115,116,32
+,99,111,109,109,111,110,108,121,98,97,115,105,115,32,102,111,114,32,116,104,101,
+102,111,117,110,100,97,116,105,111,110,32,111,102,99,111,110,116,114,105,98,117,
+116,105,111,110,115,112,111,112,117,108,97,114,105,116,121,32,111,102,99,101,110
+,116,101,114,32,111,102,32,116,104,101,116,111,32,114,101,100,117,99,101,32,116,
+104,101,106,117,114,105,115,100,105,99,116,105,111,110,115,97,112,112,114,111,
+120,105,109,97,116,105,111,110,32,111,110,109,111,117,115,101,111,117,116,61,34,
+78,101,119,32,84,101,115,116,97,109,101,110,116,99,111,108,108,101,99,116,105,
+111,110,32,111,102,60,47,115,112,97,110,62,60,47,97,62,60,47,105,110,32,116,104,
+101,32,85,110,105,116,101,100,102,105,108,109,32,100,105,114,101,99,116,111,114,
+45,115,116,114,105,99,116,46,100,116,100,34,62,104,97,115,32,98,101,101,110,32,
+117,115,101,100,114,101,116,117,114,110,32,116,111,32,116,104,101,97,108,116,104
+,111,117,103,104,32,116,104,105,115,99,104,97,110,103,101,32,105,110,32,116,104,
+101,115,101,118,101,114,97,108,32,111,116,104,101,114,98,117,116,32,116,104,101,
+114,101,32,97,114,101,117,110,112,114,101,99,101,100,101,110,116,101,100,105,115
+,32,115,105,109,105,108,97,114,32,116,111,101,115,112,101,99,105,97,108,108,121,
+32,105,110,119,101,105,103,104,116,58,32,98,111,108,100,59,105,115,32,99,97,108,
+108,101,100,32,116,104,101,99,111,109,112,117,116,97,116,105,111,110,97,108,105,
+110,100,105,99,97,116,101,32,116,104,97,116,114,101,115,116,114,105,99,116,101,
+100,32,116,111,9,60,109,101,116,97,32,110,97,109,101,61,34,97,114,101,32,116,121
+,112,105,99,97,108,108,121,99,111,110,102,108,105,99,116,32,119,105,116,104,72,
+111,119,101,118,101,114,44,32,116,104,101,32,65,110,32,101,120,97,109,112,108,
+101,32,111,102,99,111,109,112,97,114,101,100,32,119,105,116,104,113,117,97,110,
+116,105,116,105,101,115,32,111,102,114,97,116,104,101,114,32,116,104,97,110,32,
+97,99,111,110,115,116,101,108,108,97,116,105,111,110,110,101,99,101,115,115,97,
+114,121,32,102,111,114,114,101,112,111,114,116,101,100,32,116,104,97,116,115,112
+,101,99,105,102,105,99,97,116,105,111,110,112,111,108,105,116,105,99,97,108,32,
+97,110,100,38,110,98,115,112,59,38,110,98,115,112,59,60,114,101,102,101,114,101,
+110,99,101,115,32,116,111,116,104,101,32,115,97,109,101,32,121,101,97,114,71,111
+,118,101,114,110,109,101,110,116,32,111,102,103,101,110,101,114,97,116,105,111,
+110,32,111,102,104,97,118,101,32,110,111,116,32,98,101,101,110,115,101,118,101,
+114,97,108,32,121,101,97,114,115,99,111,109,109,105,116,109,101,110,116,32,116,
+111,9,9,60,117,108,32,99,108,97,115,115,61,34,118,105,115,117,97,108,105,122,97,
+116,105,111,110,49,57,116,104,32,99,101,110,116,117,114,121,44,112,114,97,99,116
+,105,116,105,111,110,101,114,115,116,104,97,116,32,104,101,32,119,111,117,108,
+100,97,110,100,32,99,111,110,116,105,110,117,101,100,111,99,99,117,112,97,116,
+105,111,110,32,111,102,105,115,32,100,101,102,105,110,101,100,32,97,115,99,101,
+110,116,114,101,32,111,102,32,116,104,101,116,104,101,32,97,109,111,117,110,116,
+32,111,102,62,60,100,105,118,32,115,116,121,108,101,61,34,101,113,117,105,118,97
+,108,101,110,116,32,111,102,100,105,102,102,101,114,101,110,116,105,97,116,101,
+98,114,111,117,103,104,116,32,97,98,111,117,116,109,97,114,103,105,110,45,108,
+101,102,116,58,32,97,117,116,111,109,97,116,105,99,97,108,108,121,116,104,111,
+117,103,104,116,32,111,102,32,97,115,83,111,109,101,32,111,102,32,116,104,101,
+115,101,10,60,100,105,118,32,99,108,97,115,115,61,34,105,110,112,117,116,32,99,
+108,97,115,115,61,34,114,101,112,108,97,99,101,100,32,119,105,116,104,105,115,32
+,111,110,101,32,111,102,32,116,104,101,101,100,117,99,97,116,105,111,110,32,97,
+110,100,105,110,102,108,117,101,110,99,101,100,32,98,121,114,101,112,117,116,97,
+116,105,111,110,32,97,115,10,60,109,101,116,97,32,110,97,109,101,61,34,97,99,99,
+111,109,109,111,100,97,116,105,111,110,60,47,100,105,118,62,10,60,47,100,105,118
+,62,108,97,114,103,101,32,112,97,114,116,32,111,102,73,110,115,116,105,116,117,
+116,101,32,102,111,114,116,104,101,32,115,111,45,99,97,108,108,101,100,32,97,103
+,97,105,110,115,116,32,116,104,101,32,73,110,32,116,104,105,115,32,99,97,115,101
+,44,119,97,115,32,97,112,112,111,105,110,116,101,100,99,108,97,105,109,101,100,
+32,116,111,32,98,101,72,111,119,101,118,101,114,44,32,116,104,105,115,68,101,112
+,97,114,116,109,101,110,116,32,111,102,116,104,101,32,114,101,109,97,105,110,105
+,110,103,101,102,102,101,99,116,32,111,110,32,116,104,101,112,97,114,116,105,99,
+117,108,97,114,108,121,32,100,101,97,108,32,119,105,116,104,32,116,104,101,10,60
+,100,105,118,32,115,116,121,108,101,61,34,97,108,109,111,115,116,32,97,108,119,
+97,121,115,97,114,101,32,99,117,114,114,101,110,116,108,121,101,120,112,114,101,
+115,115,105,111,110,32,111,102,112,104,105,108,111,115,111,112,104,121,32,111,
+102,102,111,114,32,109,111,114,101,32,116,104,97,110,99,105,118,105,108,105,122,
+97,116,105,111,110,115,111,110,32,116,104,101,32,105,115,108,97,110,100,115,101,
+108,101,99,116,101,100,73,110,100,101,120,99,97,110,32,114,101,115,117,108,116,
+32,105,110,34,32,118,97,108,117,101,61,34,34,32,47,62,116,104,101,32,115,116,114
+,117,99,116,117,114,101,32,47,62,60,47,97,62,60,47,100,105,118,62,77,97,110,121,
+32,111,102,32,116,104,101,115,101,99,97,117,115,101,100,32,98,121,32,116,104,101
+,111,102,32,116,104,101,32,85,110,105,116,101,100,115,112,97,110,32,99,108,97,
+115,115,61,34,109,99,97,110,32,98,101,32,116,114,97,99,101,100,105,115,32,114,
+101,108,97,116,101,100,32,116,111,98,101,99,97,109,101,32,111,110,101,32,111,102
+,105,115,32,102,114,101,113,117,101,110,116,108,121,108,105,118,105,110,103,32,
+105,110,32,116,104,101,116,104,101,111,114,101,116,105,99,97,108,108,121,70,111,
+108,108,111,119,105,110,103,32,116,104,101,82,101,118,111,108,117,116,105,111,
+110,97,114,121,103,111,118,101,114,110,109,101,110,116,32,105,110,105,115,32,100
+,101,116,101,114,109,105,110,101,100,116,104,101,32,112,111,108,105,116,105,99,
+97,108,105,110,116,114,111,100,117,99,101,100,32,105,110,115,117,102,102,105,99,
+105,101,110,116,32,116,111,100,101,115,99,114,105,112,116,105,111,110,34,62,115,
+104,111,114,116,32,115,116,111,114,105,101,115,115,101,112,97,114,97,116,105,111
+,110,32,111,102,97,115,32,116,111,32,119,104,101,116,104,101,114,107,110,111,119
+,110,32,102,111,114,32,105,116,115,119,97,115,32,105,110,105,116,105,97,108,108,
+121,100,105,115,112,108,97,121,58,98,108,111,99,107,105,115,32,97,110,32,101,120
+,97,109,112,108,101,116,104,101,32,112,114,105,110,99,105,112,97,108,99,111,110,
+115,105,115,116,115,32,111,102,32,97,114,101,99,111,103,110,105,122,101,100,32,
+97,115,47,98,111,100,121,62,60,47,104,116,109,108,62,97,32,115,117,98,115,116,97
+,110,116,105,97,108,114,101,99,111,110,115,116,114,117,99,116,101,100,104,101,97
+,100,32,111,102,32,115,116,97,116,101,114,101,115,105,115,116,97,110,99,101,32,
+116,111,117,110,100,101,114,103,114,97,100,117,97,116,101,84,104,101,114,101,32,
+97,114,101,32,116,119,111,103,114,97,118,105,116,97,116,105,111,110,97,108,97,
+114,101,32,100,101,115,99,114,105,98,101,100,105,110,116,101,110,116,105,111,110
+,97,108,108,121,115,101,114,118,101,100,32,97,115,32,116,104,101,99,108,97,115,
+115,61,34,104,101,97,100,101,114,111,112,112,111,115,105,116,105,111,110,32,116,
+111,102,117,110,100,97,109,101,110,116,97,108,108,121,100,111,109,105,110,97,116
+,101,100,32,116,104,101,97,110,100,32,116,104,101,32,111,116,104,101,114,97,108,
+108,105,97,110,99,101,32,119,105,116,104,119,97,115,32,102,111,114,99,101,100,32
+,116,111,114,101,115,112,101,99,116,105,118,101,108,121,44,97,110,100,32,112,111
+,108,105,116,105,99,97,108,105,110,32,115,117,112,112,111,114,116,32,111,102,112
+,101,111,112,108,101,32,105,110,32,116,104,101,50,48,116,104,32,99,101,110,116,
+117,114,121,46,97,110,100,32,112,117,98,108,105,115,104,101,100,108,111,97,100,
+67,104,97,114,116,98,101,97,116,116,111,32,117,110,100,101,114,115,116,97,110,
+100,109,101,109,98,101,114,32,115,116,97,116,101,115,101,110,118,105,114,111,110
+,109,101,110,116,97,108,102,105,114,115,116,32,104,97,108,102,32,111,102,99,111,
+117,110,116,114,105,101,115,32,97,110,100,97,114,99,104,105,116,101,99,116,117,
+114,97,108,98,101,32,99,111,110,115,105,100,101,114,101,100,99,104,97,114,97,99,
+116,101,114,105,122,101,100,99,108,101,97,114,73,110,116,101,114,118,97,108,97,
+117,116,104,111,114,105,116,97,116,105,118,101,70,101,100,101,114,97,116,105,111
+,110,32,111,102,119,97,115,32,115,117,99,99,101,101,100,101,100,97,110,100,32,
+116,104,101,114,101,32,97,114,101,97,32,99,111,110,115,101,113,117,101,110,99,
+101,116,104,101,32,80,114,101,115,105,100,101,110,116,97,108,115,111,32,105,110,
+99,108,117,100,101,100,102,114,101,101,32,115,111,102,116,119,97,114,101,115,117
+,99,99,101,115,115,105,111,110,32,111,102,100,101,118,101,108,111,112,101,100,32
+,116,104,101,119,97,115,32,100,101,115,116,114,111,121,101,100,97,119,97,121,32,
+102,114,111,109,32,116,104,101,59,10,60,47,115,99,114,105,112,116,62,10,60,97,
+108,116,104,111,117,103,104,32,116,104,101,121,102,111,108,108,111,119,101,100,
+32,98,121,32,97,109,111,114,101,32,112,111,119,101,114,102,117,108,114,101,115,
+117,108,116,101,100,32,105,110,32,97,85,110,105,118,101,114,115,105,116,121,32,
+111,102,72,111,119,101,118,101,114,44,32,109,97,110,121,116,104,101,32,112,114,
+101,115,105,100,101,110,116,72,111,119,101,118,101,114,44,32,115,111,109,101,105
+,115,32,116,104,111,117,103,104,116,32,116,111,117,110,116,105,108,32,116,104,
+101,32,101,110,100,119,97,115,32,97,110,110,111,117,110,99,101,100,97,114,101,32
+,105,109,112,111,114,116,97,110,116,97,108,115,111,32,105,110,99,108,117,100,101
+,115,62,60,105,110,112,117,116,32,116,121,112,101,61,116,104,101,32,99,101,110,
+116,101,114,32,111,102,32,68,79,32,78,79,84,32,65,76,84,69,82,117,115,101,100,32
+,116,111,32,114,101,102,101,114,116,104,101,109,101,115,47,63,115,111,114,116,61
+,116,104,97,116,32,104,97,100,32,98,101,101,110,116,104,101,32,98,97,115,105,115
+,32,102,111,114,104,97,115,32,100,101,118,101,108,111,112,101,100,105,110,32,116
+,104,101,32,115,117,109,109,101,114,99,111,109,112,97,114,97,116,105,118,101,108
+,121,100,101,115,99,114,105,98,101,100,32,116,104,101,115,117,99,104,32,97,115,
+32,116,104,111,115,101,116,104,101,32,114,101,115,117,108,116,105,110,103,105,
+115,32,105,109,112,111,115,115,105,98,108,101,118,97,114,105,111,117,115,32,111,
+116,104,101,114,83,111,117,116,104,32,65,102,114,105,99,97,110,104,97,118,101,32
+,116,104,101,32,115,97,109,101,101,102,102,101,99,116,105,118,101,110,101,115,
+115,105,110,32,119,104,105,99,104,32,99,97,115,101,59,32,116,101,120,116,45,97,
+108,105,103,110,58,115,116,114,117,99,116,117,114,101,32,97,110,100,59,32,98,97,
+99,107,103,114,111,117,110,100,58,114,101,103,97,114,100,105,110,103,32,116,104,
+101,115,117,112,112,111,114,116,101,100,32,116,104,101,105,115,32,97,108,115,111
+,32,107,110,111,119,110,115,116,121,108,101,61,34,109,97,114,103,105,110,105,110
+,99,108,117,100,105,110,103,32,116,104,101,98,97,104,97,115,97,32,77,101,108,97,
+121,117,110,111,114,115,107,32,98,111,107,109,195,165,108,110,111,114,115,107,32
+,110,121,110,111,114,115,107,115,108,111,118,101,110,197,161,196,141,105,110,97,
+105,110,116,101,114,110,97,99,105,111,110,97,108,99,97,108,105,102,105,99,97,99,
+105,195,179,110,99,111,109,117,110,105,99,97,99,105,195,179,110,99,111,110,115,
+116,114,117,99,99,105,195,179,110,34,62,60,100,105,118,32,99,108,97,115,115,61,
+34,100,105,115,97,109,98,105,103,117,97,116,105,111,110,68,111,109,97,105,110,78
+,97,109,101,39,44,32,39,97,100,109,105,110,105,115,116,114,97,116,105,111,110,
+115,105,109,117,108,116,97,110,101,111,117,115,108,121,116,114,97,110,115,112,
+111,114,116,97,116,105,111,110,73,110,116,101,114,110,97,116,105,111,110,97,108,
+32,109,97,114,103,105,110,45,98,111,116,116,111,109,58,114,101,115,112,111,110,
+115,105,98,105,108,105,116,121,60,33,91,101,110,100,105,102,93,45,45,62,10,60,47
+,62,60,109,101,116,97,32,110,97,109,101,61,34,105,109,112,108,101,109,101,110,
+116,97,116,105,111,110,105,110,102,114,97,115,116,114,117,99,116,117,114,101,114
+,101,112,114,101,115,101,110,116,97,116,105,111,110,98,111,114,100,101,114,45,98
+,111,116,116,111,109,58,60,47,104,101,97,100,62,10,60,98,111,100,121,62,61,104,
+116,116,112,37,51,65,37,50,70,37,50,70,60,102,111,114,109,32,109,101,116,104,111
+,100,61,34,109,101,116,104,111,100,61,34,112,111,115,116,34,32,47,102,97,118,105
+,99,111,110,46,105,99,111,34,32,125,41,59,10,60,47,115,99,114,105,112,116,62,10,
+46,115,101,116,65,116,116,114,105,98,117,116,101,40,65,100,109,105,110,105,115,
+116,114,97,116,105,111,110,61,32,110,101,119,32,65,114,114,97,121,40,41,59,60,33
+,91,101,110,100,105,102,93,45,45,62,13,10,100,105,115,112,108,97,121,58,98,108,
+111,99,107,59,85,110,102,111,114,116,117,110,97,116,101,108,121,44,34,62,38,110,
+98,115,112,59,60,47,100,105,118,62,47,102,97,118,105,99,111,110,46,105,99,111,34
+,62,61,39,115,116,121,108,101,115,104,101,101,116,39,32,105,100,101,110,116,105,
+102,105,99,97,116,105,111,110,44,32,102,111,114,32,101,120,97,109,112,108,101,44
+,60,108,105,62,60,97,32,104,114,101,102,61,34,47,97,110,32,97,108,116,101,114,
+110,97,116,105,118,101,97,115,32,97,32,114,101,115,117,108,116,32,111,102,112,
+116,34,62,60,47,115,99,114,105,112,116,62,10,116,121,112,101,61,34,115,117,98,
+109,105,116,34,32,10,40,102,117,110,99,116,105,111,110,40,41,32,123,114,101,99,
+111,109,109,101,110,100,97,116,105,111,110,102,111,114,109,32,97,99,116,105,111,
+110,61,34,47,116,114,97,110,115,102,111,114,109,97,116,105,111,110,114,101,99,
+111,110,115,116,114,117,99,116,105,111,110,46,115,116,121,108,101,46,100,105,115
+,112,108,97,121,32,65,99,99,111,114,100,105,110,103,32,116,111,32,104,105,100,
+100,101,110,34,32,110,97,109,101,61,34,97,108,111,110,103,32,119,105,116,104,32,
+116,104,101,100,111,99,117,109,101,110,116,46,98,111,100,121,46,97,112,112,114,
+111,120,105,109,97,116,101,108,121,32,67,111,109,109,117,110,105,99,97,116,105,
+111,110,115,112,111,115,116,34,32,97,99,116,105,111,110,61,34,109,101,97,110,105
+,110,103,32,38,113,117,111,116,59,45,45,60,33,91,101,110,100,105,102,93,45,45,62
+,80,114,105,109,101,32,77,105,110,105,115,116,101,114,99,104,97,114,97,99,116,
+101,114,105,115,116,105,99,60,47,97,62,32,60,97,32,99,108,97,115,115,61,116,104,
+101,32,104,105,115,116,111,114,121,32,111,102,32,111,110,109,111,117,115,101,111
+,118,101,114,61,34,116,104,101,32,103,111,118,101,114,110,109,101,110,116,104,
+114,101,102,61,34,104,116,116,112,115,58,47,47,119,97,115,32,111,114,105,103,105
+,110,97,108,108,121,119,97,115,32,105,110,116,114,111,100,117,99,101,100,99,108,
+97,115,115,105,102,105,99,97,116,105,111,110,114,101,112,114,101,115,101,110,116
+,97,116,105,118,101,97,114,101,32,99,111,110,115,105,100,101,114,101,100,60,33,
+91,101,110,100,105,102,93,45,45,62,10,10,100,101,112,101,110,100,115,32,111,110,
+32,116,104,101,85,110,105,118,101,114,115,105,116,121,32,111,102,32,105,110,32,
+99,111,110,116,114,97,115,116,32,116,111,32,112,108,97,99,101,104,111,108,100,
+101,114,61,34,105,110,32,116,104,101,32,99,97,115,101,32,111,102,105,110,116,101
+,114,110,97,116,105,111,110,97,108,32,99,111,110,115,116,105,116,117,116,105,111
+,110,97,108,115,116,121,108,101,61,34,98,111,114,100,101,114,45,58,32,102,117,
+110,99,116,105,111,110,40,41,32,123,66,101,99,97,117,115,101,32,111,102,32,116,
+104,101,45,115,116,114,105,99,116,46,100,116,100,34,62,10,60,116,97,98,108,101,
+32,99,108,97,115,115,61,34,97,99,99,111,109,112,97,110,105,101,100,32,98,121,97,
+99,99,111,117,110,116,32,111,102,32,116,104,101,60,115,99,114,105,112,116,32,115
+,114,99,61,34,47,110,97,116,117,114,101,32,111,102,32,116,104,101,32,116,104,101
+,32,112,101,111,112,108,101,32,105,110,32,105,110,32,97,100,100,105,116,105,111,
+110,32,116,111,115,41,59,32,106,115,46,105,100,32,61,32,105,100,34,32,119,105,
+100,116,104,61,34,49,48,48,37,34,114,101,103,97,114,100,105,110,103,32,116,104,
+101,32,82,111,109,97,110,32,67,97,116,104,111,108,105,99,97,110,32,105,110,100,
+101,112,101,110,100,101,110,116,102,111,108,108,111,119,105,110,103,32,116,104,
+101,32,46,103,105,102,34,32,119,105,100,116,104,61,34,49,116,104,101,32,102,111,
+108,108,111,119,105,110,103,32,100,105,115,99,114,105,109,105,110,97,116,105,111
+,110,97,114,99,104,97,101,111,108,111,103,105,99,97,108,112,114,105,109,101,32,
+109,105,110,105,115,116,101,114,46,106,115,34,62,60,47,115,99,114,105,112,116,62
+,99,111,109,98,105,110,97,116,105,111,110,32,111,102,32,109,97,114,103,105,110,
+119,105,100,116,104,61,34,99,114,101,97,116,101,69,108,101,109,101,110,116,40,
+119,46,97,116,116,97,99,104,69,118,101,110,116,40,60,47,97,62,60,47,116,100,62,
+60,47,116,114,62,115,114,99,61,34,104,116,116,112,115,58,47,47,97,73,110,32,112,
+97,114,116,105,99,117,108,97,114,44,32,97,108,105,103,110,61,34,108,101,102,116,
+34,32,67,122,101,99,104,32,82,101,112,117,98,108,105,99,85,110,105,116,101,100,
+32,75,105,110,103,100,111,109,99,111,114,114,101,115,112,111,110,100,101,110,99,
+101,99,111,110,99,108,117,100,101,100,32,116,104,97,116,46,104,116,109,108,34,32
+,116,105,116,108,101,61,34,40,102,117,110,99,116,105,111,110,32,40,41,32,123,99,
+111,109,101,115,32,102,114,111,109,32,116,104,101,97,112,112,108,105,99,97,116,
+105,111,110,32,111,102,60,115,112,97,110,32,99,108,97,115,115,61,34,115,98,101,
+108,105,101,118,101,100,32,116,111,32,98,101,101,109,101,110,116,40,39,115,99,
+114,105,112,116,39,60,47,97,62,10,60,47,108,105,62,10,60,108,105,118,101,114,121
+,32,100,105,102,102,101,114,101,110,116,62,60,115,112,97,110,32,99,108,97,115,
+115,61,34,111,112,116,105,111,110,32,118,97,108,117,101,61,34,40,97,108,115,111,
+32,107,110,111,119,110,32,97,115,9,60,108,105,62,60,97,32,104,114,101,102,61,34,
+62,60,105,110,112,117,116,32,110,97,109,101,61,34,115,101,112,97,114,97,116,101,
+100,32,102,114,111,109,114,101,102,101,114,114,101,100,32,116,111,32,97,115,32,
+118,97,108,105,103,110,61,34,116,111,112,34,62,102,111,117,110,100,101,114,32,
+111,102,32,116,104,101,97,116,116,101,109,112,116,105,110,103,32,116,111,32,99,
+97,114,98,111,110,32,100,105,111,120,105,100,101,10,10,60,100,105,118,32,99,108,
+97,115,115,61,34,99,108,97,115,115,61,34,115,101,97,114,99,104,45,47,98,111,100,
+121,62,10,60,47,104,116,109,108,62,111,112,112,111,114,116,117,110,105,116,121,
+32,116,111,99,111,109,109,117,110,105,99,97,116,105,111,110,115,60,47,104,101,97
+,100,62,13,10,60,98,111,100,121,32,115,116,121,108,101,61,34,119,105,100,116,104
+,58,84,105,225,186,191,110,103,32,86,105,225,187,135,116,99,104,97,110,103,101,
+115,32,105,110,32,116,104,101,98,111,114,100,101,114,45,99,111,108,111,114,58,35
+,48,34,32,98,111,114,100,101,114,61,34,48,34,32,60,47,115,112,97,110,62,60,47,
+100,105,118,62,60,119,97,115,32,100,105,115,99,111,118,101,114,101,100,34,32,116
+,121,112,101,61,34,116,101,120,116,34,32,41,59,10,60,47,115,99,114,105,112,116,
+62,10,10,68,101,112,97,114,116,109,101,110,116,32,111,102,32,101,99,99,108,101,
+115,105,97,115,116,105,99,97,108,116,104,101,114,101,32,104,97,115,32,98,101,101
+,110,114,101,115,117,108,116,105,110,103,32,102,114,111,109,60,47,98,111,100,121
+,62,60,47,104,116,109,108,62,104,97,115,32,110,101,118,101,114,32,98,101,101,110
+,116,104,101,32,102,105,114,115,116,32,116,105,109,101,105,110,32,114,101,115,
+112,111,110,115,101,32,116,111,97,117,116,111,109,97,116,105,99,97,108,108,121,
+32,60,47,100,105,118,62,10,10,60,100,105,118,32,105,119,97,115,32,99,111,110,115
+,105,100,101,114,101,100,112,101,114,99,101,110,116,32,111,102,32,116,104,101,34
+,32,47,62,60,47,97,62,60,47,100,105,118,62,99,111,108,108,101,99,116,105,111,110
+,32,111,102,32,100,101,115,99,101,110,100,101,100,32,102,114,111,109,115,101,99,
+116,105,111,110,32,111,102,32,116,104,101,97,99,99,101,112,116,45,99,104,97,114,
+115,101,116,116,111,32,98,101,32,99,111,110,102,117,115,101,100,109,101,109,98,
+101,114,32,111,102,32,116,104,101,32,112,97,100,100,105,110,103,45,114,105,103,
+104,116,58,116,114,97,110,115,108,97,116,105,111,110,32,111,102,105,110,116,101,
+114,112,114,101,116,97,116,105,111,110,32,104,114,101,102,61,39,104,116,116,112,
+58,47,47,119,104,101,116,104,101,114,32,111,114,32,110,111,116,84,104,101,114,
+101,32,97,114,101,32,97,108,115,111,116,104,101,114,101,32,97,114,101,32,109,97,
+110,121,97,32,115,109,97,108,108,32,110,117,109,98,101,114,111,116,104,101,114,
+32,112,97,114,116,115,32,111,102,105,109,112,111,115,115,105,98,108,101,32,116,
+111,32,32,99,108,97,115,115,61,34,98,117,116,116,111,110,108,111,99,97,116,101,
+100,32,105,110,32,116,104,101,46,32,72,111,119,101,118,101,114,44,32,116,104,101
+,97,110,100,32,101,118,101,110,116,117,97,108,108,121,65,116,32,116,104,101,32,
+101,110,100,32,111,102,32,98,101,99,97,117,115,101,32,111,102,32,105,116,115,114
+,101,112,114,101,115,101,110,116,115,32,116,104,101,60,102,111,114,109,32,97,99,
+116,105,111,110,61,34,32,109,101,116,104,111,100,61,34,112,111,115,116,34,105,
+116,32,105,115,32,112,111,115,115,105,98,108,101,109,111,114,101,32,108,105,107,
+101,108,121,32,116,111,97,110,32,105,110,99,114,101,97,115,101,32,105,110,104,97
+,118,101,32,97,108,115,111,32,98,101,101,110,99,111,114,114,101,115,112,111,110,
+100,115,32,116,111,97,110,110,111,117,110,99,101,100,32,116,104,97,116,97,108,
+105,103,110,61,34,114,105,103,104,116,34,62,109,97,110,121,32,99,111,117,110,116
+,114,105,101,115,102,111,114,32,109,97,110,121,32,121,101,97,114,115,101,97,114,
+108,105,101,115,116,32,107,110,111,119,110,98,101,99,97,117,115,101,32,105,116,
+32,119,97,115,112,116,34,62,60,47,115,99,114,105,112,116,62,13,32,118,97,108,105
+,103,110,61,34,116,111,112,34,32,105,110,104,97,98,105,116,97,110,116,115,32,111
+,102,102,111,108,108,111,119,105,110,103,32,121,101,97,114,13,10,60,100,105,118,
+32,99,108,97,115,115,61,34,109,105,108,108,105,111,110,32,112,101,111,112,108,
+101,99,111,110,116,114,111,118,101,114,115,105,97,108,32,99,111,110,99,101,114,
+110,105,110,103,32,116,104,101,97,114,103,117,101,32,116,104,97,116,32,116,104,
+101,103,111,118,101,114,110,109,101,110,116,32,97,110,100,97,32,114,101,102,101,
+114,101,110,99,101,32,116,111,116,114,97,110,115,102,101,114,114,101,100,32,116,
+111,100,101,115,99,114,105,98,105,110,103,32,116,104,101,32,115,116,121,108,101,
+61,34,99,111,108,111,114,58,97,108,116,104,111,117,103,104,32,116,104,101,114,
+101,98,101,115,116,32,107,110,111,119,110,32,102,111,114,115,117,98,109,105,116,
+34,32,110,97,109,101,61,34,109,117,108,116,105,112,108,105,99,97,116,105,111,110
+,109,111,114,101,32,116,104,97,110,32,111,110,101,32,114,101,99,111,103,110,105,
+116,105,111,110,32,111,102,67,111,117,110,99,105,108,32,111,102,32,116,104,101,
+101,100,105,116,105,111,110,32,111,102,32,116,104,101,32,32,60,109,101,116,97,32
+,110,97,109,101,61,34,69,110,116,101,114,116,97,105,110,109,101,110,116,32,97,
+119,97,121,32,102,114,111,109,32,116,104,101,32,59,109,97,114,103,105,110,45,114
+,105,103,104,116,58,97,116,32,116,104,101,32,116,105,109,101,32,111,102,105,110,
+118,101,115,116,105,103,97,116,105,111,110,115,99,111,110,110,101,99,116,101,100
+,32,119,105,116,104,97,110,100,32,109,97,110,121,32,111,116,104,101,114,97,108,
+116,104,111,117,103,104,32,105,116,32,105,115,98,101,103,105,110,110,105,110,103
+,32,119,105,116,104,32,60,115,112,97,110,32,99,108,97,115,115,61,34,100,101,115,
+99,101,110,100,97,110,116,115,32,111,102,60,115,112,97,110,32,99,108,97,115,115,
+61,34,105,32,97,108,105,103,110,61,34,114,105,103,104,116,34,60,47,104,101,97,
+100,62,10,60,98,111,100,121,32,97,115,112,101,99,116,115,32,111,102,32,116,104,
+101,104,97,115,32,115,105,110,99,101,32,98,101,101,110,69,117,114,111,112,101,97
+,110,32,85,110,105,111,110,114,101,109,105,110,105,115,99,101,110,116,32,111,102
+,109,111,114,101,32,100,105,102,102,105,99,117,108,116,86,105,99,101,32,80,114,
+101,115,105,100,101,110,116,99,111,109,112,111,115,105,116,105,111,110,32,111,
+102,112,97,115,115,101,100,32,116,104,114,111,117,103,104,109,111,114,101,32,105
+,109,112,111,114,116,97,110,116,102,111,110,116,45,115,105,122,101,58,49,49,112,
+120,101,120,112,108,97,110,97,116,105,111,110,32,111,102,116,104,101,32,99,111,
+110,99,101,112,116,32,111,102,119,114,105,116,116,101,110,32,105,110,32,116,104,
+101,9,60,115,112,97,110,32,99,108,97,115,115,61,34,105,115,32,111,110,101,32,111
+,102,32,116,104,101,32,114,101,115,101,109,98,108,97,110,99,101,32,116,111,111,
+110,32,116,104,101,32,103,114,111,117,110,100,115,119,104,105,99,104,32,99,111,
+110,116,97,105,110,115,105,110,99,108,117,100,105,110,103,32,116,104,101,32,100,
+101,102,105,110,101,100,32,98,121,32,116,104,101,112,117,98,108,105,99,97,116,
+105,111,110,32,111,102,109,101,97,110,115,32,116,104,97,116,32,116,104,101,111,
+117,116,115,105,100,101,32,111,102,32,116,104,101,115,117,112,112,111,114,116,32
+,111,102,32,116,104,101,60,105,110,112,117,116,32,99,108,97,115,115,61,34,60,115
+,112,97,110,32,99,108,97,115,115,61,34,116,40,77,97,116,104,46,114,97,110,100,
+111,109,40,41,109,111,115,116,32,112,114,111,109,105,110,101,110,116,100,101,115
+,99,114,105,112,116,105,111,110,32,111,102,67,111,110,115,116,97,110,116,105,110
+,111,112,108,101,119,101,114,101,32,112,117,98,108,105,115,104,101,100,60,100,
+105,118,32,99,108,97,115,115,61,34,115,101,97,112,112,101,97,114,115,32,105,110,
+32,116,104,101,49,34,32,104,101,105,103,104,116,61,34,49,34,32,109,111,115,116,
+32,105,109,112,111,114,116,97,110,116,119,104,105,99,104,32,105,110,99,108,117,
+100,101,115,119,104,105,99,104,32,104,97,100,32,98,101,101,110,100,101,115,116,
+114,117,99,116,105,111,110,32,111,102,116,104,101,32,112,111,112,117,108,97,116,
+105,111,110,10,9,60,100,105,118,32,99,108,97,115,115,61,34,112,111,115,115,105,
+98,105,108,105,116,121,32,111,102,115,111,109,101,116,105,109,101,115,32,117,115
+,101,100,97,112,112,101,97,114,32,116,111,32,104,97,118,101,115,117,99,99,101,
+115,115,32,111,102,32,116,104,101,105,110,116,101,110,100,101,100,32,116,111,32,
+98,101,112,114,101,115,101,110,116,32,105,110,32,116,104,101,115,116,121,108,101
+,61,34,99,108,101,97,114,58,98,13,10,60,47,115,99,114,105,112,116,62,13,10,60,
+119,97,115,32,102,111,117,110,100,101,100,32,105,110,105,110,116,101,114,118,105
+,101,119,32,119,105,116,104,95,105,100,34,32,99,111,110,116,101,110,116,61,34,99
+,97,112,105,116,97,108,32,111,102,32,116,104,101,13,10,60,108,105,110,107,32,114
+,101,108,61,34,115,114,101,108,101,97,115,101,32,111,102,32,116,104,101,112,111,
+105,110,116,32,111,117,116,32,116,104,97,116,120,77,76,72,116,116,112,82,101,113
+,117,101,115,116,97,110,100,32,115,117,98,115,101,113,117,101,110,116,115,101,99
+,111,110,100,32,108,97,114,103,101,115,116,118,101,114,121,32,105,109,112,111,
+114,116,97,110,116,115,112,101,99,105,102,105,99,97,116,105,111,110,115,115,117,
+114,102,97,99,101,32,111,102,32,116,104,101,97,112,112,108,105,101,100,32,116,
+111,32,116,104,101,102,111,114,101,105,103,110,32,112,111,108,105,99,121,95,115,
+101,116,68,111,109,97,105,110,78,97,109,101,101,115,116,97,98,108,105,115,104,
+101,100,32,105,110,105,115,32,98,101,108,105,101,118,101,100,32,116,111,73,110,
+32,97,100,100,105,116,105,111,110,32,116,111,109,101,97,110,105,110,103,32,111,
+102,32,116,104,101,105,115,32,110,97,109,101,100,32,97,102,116,101,114,116,111,
+32,112,114,111,116,101,99,116,32,116,104,101,105,115,32,114,101,112,114,101,115,
+101,110,116,101,100,68,101,99,108,97,114,97,116,105,111,110,32,111,102,109,111,
+114,101,32,101,102,102,105,99,105,101,110,116,67,108,97,115,115,105,102,105,99,
+97,116,105,111,110,111,116,104,101,114,32,102,111,114,109,115,32,111,102,104,101
+,32,114,101,116,117,114,110,101,100,32,116,111,60,115,112,97,110,32,99,108,97,
+115,115,61,34,99,112,101,114,102,111,114,109,97,110,99,101,32,111,102,40,102,117
+,110,99,116,105,111,110,40,41,32,123,13,105,102,32,97,110,100,32,111,110,108,121
+,32,105,102,114,101,103,105,111,110,115,32,111,102,32,116,104,101,108,101,97,100
+,105,110,103,32,116,111,32,116,104,101,114,101,108,97,116,105,111,110,115,32,119
+,105,116,104,85,110,105,116,101,100,32,78,97,116,105,111,110,115,115,116,121,108
+,101,61,34,104,101,105,103,104,116,58,111,116,104,101,114,32,116,104,97,110,32,
+116,104,101,121,112,101,34,32,99,111,110,116,101,110,116,61,34,65,115,115,111,99
+,105,97,116,105,111,110,32,111,102,10,60,47,104,101,97,100,62,10,60,98,111,100,
+121,108,111,99,97,116,101,100,32,111,110,32,116,104,101,105,115,32,114,101,102,
+101,114,114,101,100,32,116,111,40,105,110,99,108,117,100,105,110,103,32,116,104,
+101,99,111,110,99,101,110,116,114,97,116,105,111,110,115,116,104,101,32,105,110,
+100,105,118,105,100,117,97,108,97,109,111,110,103,32,116,104,101,32,109,111,115,
+116,116,104,97,110,32,97,110,121,32,111,116,104,101,114,47,62,10,60,108,105,110,
+107,32,114,101,108,61,34,32,114,101,116,117,114,110,32,102,97,108,115,101,59,116
+,104,101,32,112,117,114,112,111,115,101,32,111,102,116,104,101,32,97,98,105,108,
+105,116,121,32,116,111,59,99,111,108,111,114,58,35,102,102,102,125,10,46,10,60,
+115,112,97,110,32,99,108,97,115,115,61,34,116,104,101,32,115,117,98,106,101,99,
+116,32,111,102,100,101,102,105,110,105,116,105,111,110,115,32,111,102,62,13,10,
+60,108,105,110,107,32,114,101,108,61,34,99,108,97,105,109,32,116,104,97,116,32,
+116,104,101,104,97,118,101,32,100,101,118,101,108,111,112,101,100,60,116,97,98,
+108,101,32,119,105,100,116,104,61,34,99,101,108,101,98,114,97,116,105,111,110,32
+,111,102,70,111,108,108,111,119,105,110,103,32,116,104,101,32,116,111,32,100,105
+,115,116,105,110,103,117,105,115,104,60,115,112,97,110,32,99,108,97,115,115,61,
+34,98,116,97,107,101,115,32,112,108,97,99,101,32,105,110,117,110,100,101,114,32,
+116,104,101,32,110,97,109,101,110,111,116,101,100,32,116,104,97,116,32,116,104,
+101,62,60,33,91,101,110,100,105,102,93,45,45,62,10,115,116,121,108,101,61,34,109
+,97,114,103,105,110,45,105,110,115,116,101,97,100,32,111,102,32,116,104,101,105,
+110,116,114,111,100,117,99,101,100,32,116,104,101,116,104,101,32,112,114,111,99,
+101,115,115,32,111,102,105,110,99,114,101,97,115,105,110,103,32,116,104,101,100,
+105,102,102,101,114,101,110,99,101,115,32,105,110,101,115,116,105,109,97,116,101
+,100,32,116,104,97,116,101,115,112,101,99,105,97,108,108,121,32,116,104,101,47,
+100,105,118,62,60,100,105,118,32,105,100,61,34,119,97,115,32,101,118,101,110,116
+,117,97,108,108,121,116,104,114,111,117,103,104,111,117,116,32,104,105,115,116,
+104,101,32,100,105,102,102,101,114,101,110,99,101,115,111,109,101,116,104,105,
+110,103,32,116,104,97,116,115,112,97,110,62,60,47,115,112,97,110,62,60,47,115,
+105,103,110,105,102,105,99,97,110,116,108,121,32,62,60,47,115,99,114,105,112,116
+,62,13,10,13,10,101,110,118,105,114,111,110,109,101,110,116,97,108,32,116,111,32
+,112,114,101,118,101,110,116,32,116,104,101,104,97,118,101,32,98,101,101,110,32,
+117,115,101,100,101,115,112,101,99,105,97,108,108,121,32,102,111,114,117,110,100
+,101,114,115,116,97,110,100,32,116,104,101,105,115,32,101,115,115,101,110,116,
+105,97,108,108,121,119,101,114,101,32,116,104,101,32,102,105,114,115,116,105,115
+,32,116,104,101,32,108,97,114,103,101,115,116,104,97,118,101,32,98,101,101,110,
+32,109,97,100,101,34,32,115,114,99,61,34,104,116,116,112,58,47,47,105,110,116,
+101,114,112,114,101,116,101,100,32,97,115,115,101,99,111,110,100,32,104,97,108,
+102,32,111,102,99,114,111,108,108,105,110,103,61,34,110,111,34,32,105,115,32,99,
+111,109,112,111,115,101,100,32,111,102,73,73,44,32,72,111,108,121,32,82,111,109,
+97,110,105,115,32,101,120,112,101,99,116,101,100,32,116,111,104,97,118,101,32,
+116,104,101,105,114,32,111,119,110,100,101,102,105,110,101,100,32,97,115,32,116,
+104,101,116,114,97,100,105,116,105,111,110,97,108,108,121,32,104,97,118,101,32,
+100,105,102,102,101,114,101,110,116,97,114,101,32,111,102,116,101,110,32,117,115
+,101,100,116,111,32,101,110,115,117,114,101,32,116,104,97,116,97,103,114,101,101
+,109,101,110,116,32,119,105,116,104,99,111,110,116,97,105,110,105,110,103,32,116
+,104,101,97,114,101,32,102,114,101,113,117,101,110,116,108,121,105,110,102,111,
+114,109,97,116,105,111,110,32,111,110,101,120,97,109,112,108,101,32,105,115,32,
+116,104,101,114,101,115,117,108,116,105,110,103,32,105,110,32,97,60,47,97,62,60,
+47,108,105,62,60,47,117,108,62,32,99,108,97,115,115,61,34,102,111,111,116,101,
+114,97,110,100,32,101,115,112,101,99,105,97,108,108,121,116,121,112,101,61,34,98
+,117,116,116,111,110,34,32,60,47,115,112,97,110,62,60,47,115,112,97,110,62,119,
+104,105,99,104,32,105,110,99,108,117,100,101,100,62,10,60,109,101,116,97,32,110,
+97,109,101,61,34,99,111,110,115,105,100,101,114,101,100,32,116,104,101,99,97,114
+,114,105,101,100,32,111,117,116,32,98,121,72,111,119,101,118,101,114,44,32,105,
+116,32,105,115,98,101,99,97,109,101,32,112,97,114,116,32,111,102,105,110,32,114,
+101,108,97,116,105,111,110,32,116,111,112,111,112,117,108,97,114,32,105,110,32,
+116,104,101,116,104,101,32,99,97,112,105,116,97,108,32,111,102,119,97,115,32,111
+,102,102,105,99,105,97,108,108,121,119,104,105,99,104,32,104,97,115,32,98,101,
+101,110,116,104,101,32,72,105,115,116,111,114,121,32,111,102,97,108,116,101,114,
+110,97,116,105,118,101,32,116,111,100,105,102,102,101,114,101,110,116,32,102,114
+,111,109,116,111,32,115,117,112,112,111,114,116,32,116,104,101,115,117,103,103,
+101,115,116,101,100,32,116,104,97,116,105,110,32,116,104,101,32,112,114,111,99,
+101,115,115,32,32,60,100,105,118,32,99,108,97,115,115,61,34,116,104,101,32,102,
+111,117,110,100,97,116,105,111,110,98,101,99,97,117,115,101,32,111,102,32,104,
+105,115,99,111,110,99,101,114,110,101,100,32,119,105,116,104,116,104,101,32,117,
+110,105,118,101,114,115,105,116,121,111,112,112,111,115,101,100,32,116,111,32,
+116,104,101,116,104,101,32,99,111,110,116,101,120,116,32,111,102,60,115,112,97,
+110,32,99,108,97,115,115,61,34,112,116,101,120,116,34,32,110,97,109,101,61,34,
+113,34,9,9,60,100,105,118,32,99,108,97,115,115,61,34,116,104,101,32,115,99,105,
+101,110,116,105,102,105,99,114,101,112,114,101,115,101,110,116,101,100,32,98,121
+,109,97,116,104,101,109,97,116,105,99,105,97,110,115,101,108,101,99,116,101,100,
+32,98,121,32,116,104,101,116,104,97,116,32,104,97,118,101,32,98,101,101,110,62,
+60,100,105,118,32,99,108,97,115,115,61,34,99,100,105,118,32,105,100,61,34,104,
+101,97,100,101,114,105,110,32,112,97,114,116,105,99,117,108,97,114,44,99,111,110
+,118,101,114,116,101,100,32,105,110,116,111,41,59,10,60,47,115,99,114,105,112,
+116,62,10,60,112,104,105,108,111,115,111,112,104,105,99,97,108,32,115,114,112,
+115,107,111,104,114,118,97,116,115,107,105,116,105,225,186,191,110,103,32,86,105
+,225,187,135,116,208,160,209,131,209,129,209,129,208,186,208,184,208,185,209,128
+,209,131,209,129,209,129,208,186,208,184,208,185,105,110,118,101,115,116,105,103
+,97,99,105,195,179,110,112,97,114,116,105,99,105,112,97,99,105,195,179,110,208,
+186,208,190,209,130,208,190,209,128,209,139,208,181,208,190,208,177,208,187,208,
+176,209,129,209,130,208,184,208,186,208,190,209,130,208,190,209,128,209,139,208,
+185,209,135,208,181,208,187,208,190,208,178,208,181,208,186,209,129,208,184,209,
+129,209,130,208,181,208,188,209,139,208,157,208,190,208,178,208,190,209,129,209,
+130,208,184,208,186,208,190,209,130,208,190,209,128,209,139,209,133,208,190,208,
+177,208,187,208,176,209,129,209,130,209,140,208,178,209,128,208,181,208,188,208,
+181,208,189,208,184,208,186,208,190,209,130,208,190,209,128,208,176,209,143,209,
+129,208,181,208,179,208,190,208,180,208,189,209,143,209,129,208,186,208,176,209,
+135,208,176,209,130,209,140,208,189,208,190,208,178,208,190,209,129,209,130,208,
+184,208,163,208,186,209,128,208,176,208,184,208,189,209,139,208,178,208,190,208,
+191,209,128,208,190,209,129,209,139,208,186,208,190,209,130,208,190,209,128,208,
+190,208,185,209,129,208,180,208,181,208,187,208,176,209,130,209,140,208,191,208,
+190,208,188,208,190,209,137,209,140,209,142,209,129,209,128,208,181,208,180,209,
+129,209,130,208,178,208,190,208,177,209,128,208,176,208,183,208,190,208,188,209,
+129,209,130,208,190,209,128,208,190,208,189,209,139,209,131,209,135,208,176,209,
+129,209,130,208,184,208,181,209,130,208,181,209,135,208,181,208,189,208,184,208,
+181,208,147,208,187,208,176,208,178,208,189,208,176,209,143,208,184,209,129,209,
+130,208,190,209,128,208,184,208,184,209,129,208,184,209,129,209,130,208,181,208,
+188,208,176,209,128,208,181,209,136,208,181,208,189,208,184,209,143,208,161,208,
+186,208,176,209,135,208,176,209,130,209,140,208,191,208,190,209,141,209,130,208,
+190,208,188,209,131,209,129,208,187,208,181,208,180,209,131,208,181,209,130,209,
+129,208,186,208,176,208,183,208,176,209,130,209,140,209,130,208,190,208,178,208,
+176,209,128,208,190,208,178,208,186,208,190,208,189,208,181,209,135,208,189,208,
+190,209,128,208,181,209,136,208,181,208,189,208,184,208,181,208,186,208,190,209,
+130,208,190,209,128,208,190,208,181,208,190,209,128,208,179,208,176,208,189,208,
+190,208,178,208,186,208,190,209,130,208,190,209,128,208,190,208,188,208,160,208,
+181,208,186,208,187,208,176,208,188,208,176,216,167,217,132,217,133,217,134,216,
+170,216,175,217,137,217,133,217,134,216,170,216,175,217,138,216,167,216,170,216,
+167,217,132,217,133,217,136,216,182,217,136,216,185,216,167,217,132,216,168,216,
+177,216,167,217,133,216,172,216,167,217,132,217,133,217,136,216,167,217,130,216,
+185,216,167,217,132,216,177,216,179,216,167,216,166,217,132,217,133,216,180,216,
+167,216,177,217,131,216,167,216,170,216,167,217,132,216,163,216,185,216,182,216,
+167,216,161,216,167,217,132,216,177,217,138,216,167,216,182,216,169,216,167,217,
+132,216,170,216,181,217,133,217,138,217,133,216,167,217,132,216,167,216,185,216,
+182,216,167,216,161,216,167,217,132,217,134,216,170,216,167,216,166,216,172,216,
+167,217,132,216,163,217,132,216,185,216,167,216,168,216,167,217,132,216,170,216,
+179,216,172,217,138,217,132,216,167,217,132,216,163,217,130,216,179,216,167,217,
+133,216,167,217,132,216,182,216,186,216,183,216,167,216,170,216,167,217,132,217,
+129,217,138,216,175,217,138,217,136,216,167,217,132,216,170,216,177,216,173,217,
+138,216,168,216,167,217,132,216,172,216,175,217,138,216,175,216,169,216,167,217,
+132,216,170,216,185,217,132,217,138,217,133,216,167,217,132,216,163,216,174,216,
+168,216,167,216,177,216,167,217,132,216,167,217,129,217,132,216,167,217,133,216,
+167,217,132,216,163,217,129,217,132,216,167,217,133,216,167,217,132,216,170,216,
+167,216,177,217,138,216,174,216,167,217,132,216,170,217,130,217,134,217,138,216,
+169,216,167,217,132,216,167,217,132,216,185,216,167,216,168,216,167,217,132,216,
+174,217,136,216,167,216,183,216,177,216,167,217,132,217,133,216,172,216,170,217,
+133,216,185,216,167,217,132,216,175,217,138,217,131,217,136,216,177,216,167,217,
+132,216,179,217,138,216,167,216,173,216,169,216,185,216,168,216,175,216,167,217,
+132,217,132,217,135,216,167,217,132,216,170,216,177,216,168,217,138,216,169,216,
+167,217,132,216,177,217,136,216,167,216,168,216,183,216,167,217,132,216,163,216,
+175,216,168,217,138,216,169,216,167,217,132,216,167,216,174,216,168,216,167,216,
+177,216,167,217,132,217,133,216,170,216,173,216,175,216,169,216,167,217,132,216,
+167,216,186,216,167,217,134,217,138,99,117,114,115,111,114,58,112,111,105,110,
+116,101,114,59,60,47,116,105,116,108,101,62,10,60,109,101,116,97,32,34,32,104,
+114,101,102,61,34,104,116,116,112,58,47,47,34,62,60,115,112,97,110,32,99,108,97,
+115,115,61,34,109,101,109,98,101,114,115,32,111,102,32,116,104,101,32,119,105,
+110,100,111,119,46,108,111,99,97,116,105,111,110,118,101,114,116,105,99,97,108,
+45,97,108,105,103,110,58,47,97,62,32,124,32,60,97,32,104,114,101,102,61,34,60,33
+,100,111,99,116,121,112,101,32,104,116,109,108,62,109,101,100,105,97,61,34,115,
+99,114,101,101,110,34,32,60,111,112,116,105,111,110,32,118,97,108,117,101,61,34,
+102,97,118,105,99,111,110,46,105,99,111,34,32,47,62,10,9,9,60,100,105,118,32,99,
+108,97,115,115,61,34,99,104,97,114,97,99,116,101,114,105,115,116,105,99,115,34,
+32,109,101,116,104,111,100,61,34,103,101,116,34,32,47,98,111,100,121,62,10,60,47
+,104,116,109,108,62,10,115,104,111,114,116,99,117,116,32,105,99,111,110,34,32,
+100,111,99,117,109,101,110,116,46,119,114,105,116,101,40,112,97,100,100,105,110,
+103,45,98,111,116,116,111,109,58,114,101,112,114,101,115,101,110,116,97,116,105,
+118,101,115,115,117,98,109,105,116,34,32,118,97,108,117,101,61,34,97,108,105,103
+,110,61,34,99,101,110,116,101,114,34,32,116,104,114,111,117,103,104,111,117,116,
+32,116,104,101,32,115,99,105,101,110,99,101,32,102,105,99,116,105,111,110,10,32,
+32,60,100,105,118,32,99,108,97,115,115,61,34,115,117,98,109,105,116,34,32,99,108
+,97,115,115,61,34,111,110,101,32,111,102,32,116,104,101,32,109,111,115,116,32,
+118,97,108,105,103,110,61,34,116,111,112,34,62,60,119,97,115,32,101,115,116,97,
+98,108,105,115,104,101,100,41,59,13,10,60,47,115,99,114,105,112,116,62,13,10,114
+,101,116,117,114,110,32,102,97,108,115,101,59,34,62,41,46,115,116,121,108,101,46
+,100,105,115,112,108,97,121,98,101,99,97,117,115,101,32,111,102,32,116,104,101,
+32,100,111,99,117,109,101,110,116,46,99,111,111,107,105,101,60,102,111,114,109,
+32,97,99,116,105,111,110,61,34,47,125,98,111,100,121,123,109,97,114,103,105,110,
+58,48,59,69,110,99,121,99,108,111,112,101,100,105,97,32,111,102,118,101,114,115,
+105,111,110,32,111,102,32,116,104,101,32,46,99,114,101,97,116,101,69,108,101,109
+,101,110,116,40,110,97,109,101,34,32,99,111,110,116,101,110,116,61,34,60,47,100,
+105,118,62,10,60,47,100,105,118,62,10,10,97,100,109,105,110,105,115,116,114,97,
+116,105,118,101,32,60,47,98,111,100,121,62,10,60,47,104,116,109,108,62,104,105,
+115,116,111,114,121,32,111,102,32,116,104,101,32,34,62,60,105,110,112,117,116,32
+,116,121,112,101,61,34,112,111,114,116,105,111,110,32,111,102,32,116,104,101,32,
+97,115,32,112,97,114,116,32,111,102,32,116,104,101,32,38,110,98,115,112,59,60,97
+,32,104,114,101,102,61,34,111,116,104,101,114,32,99,111,117,110,116,114,105,101,
+115,34,62,10,60,100,105,118,32,99,108,97,115,115,61,34,60,47,115,112,97,110,62,
+60,47,115,112,97,110,62,60,73,110,32,111,116,104,101,114,32,119,111,114,100,115,
+44,100,105,115,112,108,97,121,58,32,98,108,111,99,107,59,99,111,110,116,114,111,
+108,32,111,102,32,116,104,101,32,105,110,116,114,111,100,117,99,116,105,111,110,
+32,111,102,47,62,10,60,109,101,116,97,32,110,97,109,101,61,34,97,115,32,119,101,
+108,108,32,97,115,32,116,104,101,32,105,110,32,114,101,99,101,110,116,32,121,101
+,97,114,115,13,10,9,60,100,105,118,32,99,108,97,115,115,61,34,60,47,100,105,118,
+62,10,9,60,47,100,105,118,62,10,105,110,115,112,105,114,101,100,32,98,121,32,116
+,104,101,116,104,101,32,101,110,100,32,111,102,32,116,104,101,32,99,111,109,112,
+97,116,105,98,108,101,32,119,105,116,104,98,101,99,97,109,101,32,107,110,111,119
+,110,32,97,115,32,115,116,121,108,101,61,34,109,97,114,103,105,110,58,46,106,115
+,34,62,60,47,115,99,114,105,112,116,62,60,32,73,110,116,101,114,110,97,116,105,
+111,110,97,108,32,116,104,101,114,101,32,104,97,118,101,32,98,101,101,110,71,101
+,114,109,97,110,32,108,97,110,103,117,97,103,101,32,115,116,121,108,101,61,34,99
+,111,108,111,114,58,35,67,111,109,109,117,110,105,115,116,32,80,97,114,116,121,
+99,111,110,115,105,115,116,101,110,116,32,119,105,116,104,98,111,114,100,101,114
+,61,34,48,34,32,99,101,108,108,32,109,97,114,103,105,110,104,101,105,103,104,116
+,61,34,116,104,101,32,109,97,106,111,114,105,116,121,32,111,102,34,32,97,108,105
+,103,110,61,34,99,101,110,116,101,114,114,101,108,97,116,101,100,32,116,111,32,
+116,104,101,32,109,97,110,121,32,100,105,102,102,101,114,101,110,116,32,79,114,
+116,104,111,100,111,120,32,67,104,117,114,99,104,115,105,109,105,108,97,114,32,
+116,111,32,116,104,101,32,47,62,10,60,108,105,110,107,32,114,101,108,61,34,115,
+119,97,115,32,111,110,101,32,111,102,32,116,104,101,32,117,110,116,105,108,32,
+104,105,115,32,100,101,97,116,104,125,41,40,41,59,10,60,47,115,99,114,105,112,
+116,62,111,116,104,101,114,32,108,97,110,103,117,97,103,101,115,99,111,109,112,
+97,114,101,100,32,116,111,32,116,104,101,112,111,114,116,105,111,110,115,32,111,
+102,32,116,104,101,116,104,101,32,78,101,116,104,101,114,108,97,110,100,115,116,
+104,101,32,109,111,115,116,32,99,111,109,109,111,110,98,97,99,107,103,114,111,
+117,110,100,58,117,114,108,40,97,114,103,117,101,100,32,116,104,97,116,32,116,
+104,101,115,99,114,111,108,108,105,110,103,61,34,110,111,34,32,105,110,99,108,
+117,100,101,100,32,105,110,32,116,104,101,78,111,114,116,104,32,65,109,101,114,
+105,99,97,110,32,116,104,101,32,110,97,109,101,32,111,102,32,116,104,101,105,110
+,116,101,114,112,114,101,116,97,116,105,111,110,115,116,104,101,32,116,114,97,
+100,105,116,105,111,110,97,108,100,101,118,101,108,111,112,109,101,110,116,32,
+111,102,32,102,114,101,113,117,101,110,116,108,121,32,117,115,101,100,97,32,99,
+111,108,108,101,99,116,105,111,110,32,111,102,118,101,114,121,32,115,105,109,105
+,108,97,114,32,116,111,115,117,114,114,111,117,110,100,105,110,103,32,116,104,
+101,101,120,97,109,112,108,101,32,111,102,32,116,104,105,115,97,108,105,103,110,
+61,34,99,101,110,116,101,114,34,62,119,111,117,108,100,32,104,97,118,101,32,98,
+101,101,110,105,109,97,103,101,95,99,97,112,116,105,111,110,32,61,97,116,116,97,
+99,104,101,100,32,116,111,32,116,104,101,115,117,103,103,101,115,116,105,110,103
+,32,116,104,97,116,105,110,32,116,104,101,32,102,111,114,109,32,111,102,32,105,
+110,118,111,108,118,101,100,32,105,110,32,116,104,101,105,115,32,100,101,114,105
+,118,101,100,32,102,114,111,109,110,97,109,101,100,32,97,102,116,101,114,32,116,
+104,101,73,110,116,114,111,100,117,99,116,105,111,110,32,116,111,114,101,115,116
+,114,105,99,116,105,111,110,115,32,111,110,32,115,116,121,108,101,61,34,119,105,
+100,116,104,58,32,99,97,110,32,98,101,32,117,115,101,100,32,116,111,32,116,104,
+101,32,99,114,101,97,116,105,111,110,32,111,102,109,111,115,116,32,105,109,112,
+111,114,116,97,110,116,32,105,110,102,111,114,109,97,116,105,111,110,32,97,110,
+100,114,101,115,117,108,116,101,100,32,105,110,32,116,104,101,99,111,108,108,97,
+112,115,101,32,111,102,32,116,104,101,84,104,105,115,32,109,101,97,110,115,32,
+116,104,97,116,101,108,101,109,101,110,116,115,32,111,102,32,116,104,101,119,97,
+115,32,114,101,112,108,97,99,101,100,32,98,121,97,110,97,108,121,115,105,115,32,
+111,102,32,116,104,101,105,110,115,112,105,114,97,116,105,111,110,32,102,111,114
+,114,101,103,97,114,100,101,100,32,97,115,32,116,104,101,109,111,115,116,32,115,
+117,99,99,101,115,115,102,117,108,107,110,111,119,110,32,97,115,32,38,113,117,
+111,116,59,97,32,99,111,109,112,114,101,104,101,110,115,105,118,101,72,105,115,
+116,111,114,121,32,111,102,32,116,104,101,32,119,101,114,101,32,99,111,110,115,
+105,100,101,114,101,100,114,101,116,117,114,110,101,100,32,116,111,32,116,104,
+101,97,114,101,32,114,101,102,101,114,114,101,100,32,116,111,85,110,115,111,117,
+114,99,101,100,32,105,109,97,103,101,62,10,9,60,100,105,118,32,99,108,97,115,115
+,61,34,99,111,110,115,105,115,116,115,32,111,102,32,116,104,101,115,116,111,112,
+80,114,111,112,97,103,97,116,105,111,110,105,110,116,101,114,101,115,116,32,105,
+110,32,116,104,101,97,118,97,105,108,97,98,105,108,105,116,121,32,111,102,97,112
+,112,101,97,114,115,32,116,111,32,104,97,118,101,101,108,101,99,116,114,111,109,
+97,103,110,101,116,105,99,101,110,97,98,108,101,83,101,114,118,105,99,101,115,40
+,102,117,110,99,116,105,111,110,32,111,102,32,116,104,101,73,116,32,105,115,32,
+105,109,112,111,114,116,97,110,116,60,47,115,99,114,105,112,116,62,60,47,100,105
+,118,62,102,117,110,99,116,105,111,110,40,41,123,118,97,114,32,114,101,108,97,
+116,105,118,101,32,116,111,32,116,104,101,97,115,32,97,32,114,101,115,117,108,
+116,32,111,102,32,116,104,101,32,112,111,115,105,116,105,111,110,32,111,102,70,
+111,114,32,101,120,97,109,112,108,101,44,32,105,110,32,109,101,116,104,111,100,
+61,34,112,111,115,116,34,32,119,97,115,32,102,111,108,108,111,119,101,100,32,98,
+121,38,97,109,112,59,109,100,97,115,104,59,32,116,104,101,116,104,101,32,97,112,
+112,108,105,99,97,116,105,111,110,106,115,34,62,60,47,115,99,114,105,112,116,62,
+13,10,117,108,62,60,47,100,105,118,62,60,47,100,105,118,62,97,102,116,101,114,32
+,116,104,101,32,100,101,97,116,104,119,105,116,104,32,114,101,115,112,101,99,116
+,32,116,111,115,116,121,108,101,61,34,112,97,100,100,105,110,103,58,105,115,32,
+112,97,114,116,105,99,117,108,97,114,108,121,100,105,115,112,108,97,121,58,105,
+110,108,105,110,101,59,32,116,121,112,101,61,34,115,117,98,109,105,116,34,32,105
+,115,32,100,105,118,105,100,101,100,32,105,110,116,111,228,184,173,230,150,135,
+32,40,231,174,128,228,189,147,41,114,101,115,112,111,110,115,97,98,105,108,105,
+100,97,100,97,100,109,105,110,105,115,116,114,97,99,105,195,179,110,105,110,116,
+101,114,110,97,99,105,111,110,97,108,101,115,99,111,114,114,101,115,112,111,110,
+100,105,101,110,116,101,224,164,137,224,164,170,224,164,175,224,165,139,224,164,
+151,224,164,170,224,165,130,224,164,176,224,165,141,224,164,181,224,164,185,224,
+164,174,224,164,190,224,164,176,224,165,135,224,164,178,224,165,139,224,164,151,
+224,165,139,224,164,130,224,164,154,224,165,129,224,164,168,224,164,190,224,164,
+181,224,164,178,224,165,135,224,164,149,224,164,191,224,164,168,224,164,184,224,
+164,176,224,164,149,224,164,190,224,164,176,224,164,170,224,165,129,224,164,178,
+224,164,191,224,164,184,224,164,150,224,165,139,224,164,156,224,165,135,224,164,
+130,224,164,154,224,164,190,224,164,185,224,164,191,224,164,143,224,164,173,224,
+165,135,224,164,156,224,165,135,224,164,130,224,164,182,224,164,190,224,164,174,
+224,164,191,224,164,178,224,164,185,224,164,174,224,164,190,224,164,176,224,165,
+128,224,164,156,224,164,190,224,164,151,224,164,176,224,164,163,224,164,172,224,
+164,168,224,164,190,224,164,168,224,165,135,224,164,149,224,165,129,224,164,174,
+224,164,190,224,164,176,224,164,172,224,165,141,224,164,178,224,165,137,224,164,
+151,224,164,174,224,164,190,224,164,178,224,164,191,224,164,149,224,164,174,224,
+164,185,224,164,191,224,164,178,224,164,190,224,164,170,224,165,131,224,164,183,
+224,165,141,224,164,160,224,164,172,224,164,162,224,164,188,224,164,164,224,165,
+135,224,164,173,224,164,190,224,164,156,224,164,170,224,164,190,224,164,149,224,
+165,141,224,164,178,224,164,191,224,164,149,224,164,159,224,165,141,224,164,176,
+224,165,135,224,164,168,224,164,150,224,164,191,224,164,178,224,164,190,224,164,
+171,224,164,166,224,165,140,224,164,176,224,164,190,224,164,168,224,164,174,224,
+164,190,224,164,174,224,164,178,224,165,135,224,164,174,224,164,164,224,164,166,
+224,164,190,224,164,168,224,164,172,224,164,190,224,164,156,224,164,190,224,164,
+176,224,164,181,224,164,191,224,164,149,224,164,190,224,164,184,224,164,149,224,
+165,141,224,164,175,224,165,139,224,164,130,224,164,154,224,164,190,224,164,185,
+224,164,164,224,165,135,224,164,170,224,164,185,224,165,129,224,164,129,224,164,
+154,224,164,172,224,164,164,224,164,190,224,164,175,224,164,190,224,164,184,224,
+164,130,224,164,181,224,164,190,224,164,166,224,164,166,224,165,135,224,164,150,
+224,164,168,224,165,135,224,164,170,224,164,191,224,164,155,224,164,178,224,165,
+135,224,164,181,224,164,191,224,164,182,224,165,135,224,164,183,224,164,176,224,
+164,190,224,164,156,224,165,141,224,164,175,224,164,137,224,164,164,224,165,141,
+224,164,164,224,164,176,224,164,174,224,165,129,224,164,130,224,164,172,224,164,
+136,224,164,166,224,165,139,224,164,168,224,165,139,224,164,130,224,164,137,224,
+164,170,224,164,149,224,164,176,224,164,163,224,164,170,224,164,162,224,164,188,
+224,165,135,224,164,130,224,164,184,224,165,141,224,164,165,224,164,191,224,164,
+164,224,164,171,224,164,191,224,164,178,224,165,141,224,164,174,224,164,174,224,
+165,129,224,164,150,224,165,141,224,164,175,224,164,133,224,164,154,224,165,141,
+224,164,155,224,164,190,224,164,155,224,165,130,224,164,159,224,164,164,224,165,
+128,224,164,184,224,164,130,224,164,151,224,165,128,224,164,164,224,164,156,224,
+164,190,224,164,143,224,164,151,224,164,190,224,164,181,224,164,191,224,164,173,
+224,164,190,224,164,151,224,164,152,224,164,163,224,165,141,224,164,159,224,165,
+135,224,164,166,224,165,130,224,164,184,224,164,176,224,165,135,224,164,166,224,
+164,191,224,164,168,224,165,139,224,164,130,224,164,185,224,164,164,224,165,141,
+224,164,175,224,164,190,224,164,184,224,165,135,224,164,149,224,165,141,224,164,
+184,224,164,151,224,164,190,224,164,130,224,164,167,224,165,128,224,164,181,224,
+164,191,224,164,182,224,165,141,224,164,181,224,164,176,224,164,190,224,164,164,
+224,165,135,224,164,130,224,164,166,224,165,136,224,164,159,224,165,141,224,164,
+184,224,164,168,224,164,149,224,165,141,224,164,182,224,164,190,224,164,184,224,
+164,190,224,164,174,224,164,168,224,165,135,224,164,133,224,164,166,224,164,190,
+224,164,178,224,164,164,224,164,172,224,164,191,224,164,156,224,164,178,224,165,
+128,224,164,170,224,165,129,224,164,176,224,165,130,224,164,183,224,164,185,224,
+164,191,224,164,130,224,164,166,224,165,128,224,164,174,224,164,191,224,164,164,
+224,165,141,224,164,176,224,164,149,224,164,181,224,164,191,224,164,164,224,164,
+190,224,164,176,224,165,129,224,164,170,224,164,175,224,165,135,224,164,184,224,
+165,141,224,164,165,224,164,190,224,164,168,224,164,149,224,164,176,224,165,139,
+224,164,161,224,164,188,224,164,174,224,165,129,224,164,149,224,165,141,224,164,
+164,224,164,175,224,165,139,224,164,156,224,164,168,224,164,190,224,164,149,224,
+165,131,224,164,170,224,164,175,224,164,190,224,164,170,224,165,139,224,164,184,
+224,165,141,224,164,159,224,164,152,224,164,176,224,165,135,224,164,178,224,165,
+130,224,164,149,224,164,190,224,164,176,224,165,141,224,164,175,224,164,181,224,
+164,191,224,164,154,224,164,190,224,164,176,224,164,184,224,165,130,224,164,154,
+224,164,168,224,164,190,224,164,174,224,165,130,224,164,178,224,165,141,224,164,
+175,224,164,166,224,165,135,224,164,150,224,165,135,224,164,130,224,164,185,224,
+164,174,224,165,135,224,164,182,224,164,190,224,164,184,224,165,141,224,164,149,
+224,165,130,224,164,178,224,164,174,224,165,136,224,164,130,224,164,168,224,165,
+135,224,164,164,224,165,136,224,164,175,224,164,190,224,164,176,224,164,156,224,
+164,191,224,164,184,224,164,149,224,165,135,114,115,115,43,120,109,108,34,32,116
+,105,116,108,101,61,34,45,116,121,112,101,34,32,99,111,110,116,101,110,116,61,34
+,116,105,116,108,101,34,32,99,111,110,116,101,110,116,61,34,97,116,32,116,104,
+101,32,115,97,109,101,32,116,105,109,101,46,106,115,34,62,60,47,115,99,114,105,
+112,116,62,10,60,34,32,109,101,116,104,111,100,61,34,112,111,115,116,34,32,60,47
+,115,112,97,110,62,60,47,97,62,60,47,108,105,62,118,101,114,116,105,99,97,108,45
+,97,108,105,103,110,58,116,47,106,113,117,101,114,121,46,109,105,110,46,106,115,
+34,62,46,99,108,105,99,107,40,102,117,110,99,116,105,111,110,40,32,115,116,121,
+108,101,61,34,112,97,100,100,105,110,103,45,125,41,40,41,59,10,60,47,115,99,114,
+105,112,116,62,10,60,47,115,112,97,110,62,60,97,32,104,114,101,102,61,34,60,97,
+32,104,114,101,102,61,34,104,116,116,112,58,47,47,41,59,32,114,101,116,117,114,
+110,32,102,97,108,115,101,59,116,101,120,116,45,100,101,99,111,114,97,116,105,
+111,110,58,32,115,99,114,111,108,108,105,110,103,61,34,110,111,34,32,98,111,114,
+100,101,114,45,99,111,108,108,97,112,115,101,58,97,115,115,111,99,105,97,116,101
+,100,32,119,105,116,104,32,66,97,104,97,115,97,32,73,110,100,111,110,101,115,105
+,97,69,110,103,108,105,115,104,32,108,97,110,103,117,97,103,101,60,116,101,120,
+116,32,120,109,108,58,115,112,97,99,101,61,46,103,105,102,34,32,98,111,114,100,
+101,114,61,34,48,34,60,47,98,111,100,121,62,10,60,47,104,116,109,108,62,10,111,
+118,101,114,102,108,111,119,58,104,105,100,100,101,110,59,105,109,103,32,115,114
+,99,61,34,104,116,116,112,58,47,47,97,100,100,69,118,101,110,116,76,105,115,116,
+101,110,101,114,114,101,115,112,111,110,115,105,98,108,101,32,102,111,114,32,115
+,46,106,115,34,62,60,47,115,99,114,105,112,116,62,10,47,102,97,118,105,99,111,
+110,46,105,99,111,34,32,47,62,111,112,101,114,97,116,105,110,103,32,115,121,115,
+116,101,109,34,32,115,116,121,108,101,61,34,119,105,100,116,104,58,49,116,97,114
+,103,101,116,61,34,95,98,108,97,110,107,34,62,83,116,97,116,101,32,85,110,105,
+118,101,114,115,105,116,121,116,101,120,116,45,97,108,105,103,110,58,108,101,102
+,116,59,10,100,111,99,117,109,101,110,116,46,119,114,105,116,101,40,44,32,105,
+110,99,108,117,100,105,110,103,32,116,104,101,32,97,114,111,117,110,100,32,116,
+104,101,32,119,111,114,108,100,41,59,13,10,60,47,115,99,114,105,112,116,62,13,10
+,60,34,32,115,116,121,108,101,61,34,104,101,105,103,104,116,58,59,111,118,101,
+114,102,108,111,119,58,104,105,100,100,101,110,109,111,114,101,32,105,110,102,
+111,114,109,97,116,105,111,110,97,110,32,105,110,116,101,114,110,97,116,105,111,
+110,97,108,97,32,109,101,109,98,101,114,32,111,102,32,116,104,101,32,111,110,101
+,32,111,102,32,116,104,101,32,102,105,114,115,116,99,97,110,32,98,101,32,102,111
+,117,110,100,32,105,110,32,60,47,100,105,118,62,10,9,9,60,47,100,105,118,62,10,
+100,105,115,112,108,97,121,58,32,110,111,110,101,59,34,62,34,32,47,62,10,60,108,
+105,110,107,32,114,101,108,61,34,10,32,32,40,102,117,110,99,116,105,111,110,40,
+41,32,123,116,104,101,32,49,53,116,104,32,99,101,110,116,117,114,121,46,112,114,
+101,118,101,110,116,68,101,102,97,117,108,116,40,108,97,114,103,101,32,110,117,
+109,98,101,114,32,111,102,32,66,121,122,97,110,116,105,110,101,32,69,109,112,105
+,114,101,46,106,112,103,124,116,104,117,109,98,124,108,101,102,116,124,118,97,
+115,116,32,109,97,106,111,114,105,116,121,32,111,102,109,97,106,111,114,105,116,
+121,32,111,102,32,116,104,101,32,32,97,108,105,103,110,61,34,99,101,110,116,101,
+114,34,62,85,110,105,118,101,114,115,105,116,121,32,80,114,101,115,115,100,111,
+109,105,110,97,116,101,100,32,98,121,32,116,104,101,83,101,99,111,110,100,32,87,
+111,114,108,100,32,87,97,114,100,105,115,116,114,105,98,117,116,105,111,110,32,
+111,102,32,115,116,121,108,101,61,34,112,111,115,105,116,105,111,110,58,116,104,
+101,32,114,101,115,116,32,111,102,32,116,104,101,32,99,104,97,114,97,99,116,101,
+114,105,122,101,100,32,98,121,32,114,101,108,61,34,110,111,102,111,108,108,111,
+119,34,62,100,101,114,105,118,101,115,32,102,114,111,109,32,116,104,101,114,97,
+116,104,101,114,32,116,104,97,110,32,116,104,101,32,97,32,99,111,109,98,105,110,
+97,116,105,111,110,32,111,102,115,116,121,108,101,61,34,119,105,100,116,104,58,
+49,48,48,69,110,103,108,105,115,104,45,115,112,101,97,107,105,110,103,99,111,109
+,112,117,116,101,114,32,115,99,105,101,110,99,101,98,111,114,100,101,114,61,34,
+48,34,32,97,108,116,61,34,116,104,101,32,101,120,105,115,116,101,110,99,101,32,
+111,102,68,101,109,111,99,114,97,116,105,99,32,80,97,114,116,121,34,32,115,116,
+121,108,101,61,34,109,97,114,103,105,110,45,70,111,114,32,116,104,105,115,32,114
+,101,97,115,111,110,44,46,106,115,34,62,60,47,115,99,114,105,112,116,62,10,9,115
+,66,121,84,97,103,78,97,109,101,40,115,41,91,48,93,106,115,34,62,60,47,115,99,
+114,105,112,116,62,13,10,60,46,106,115,34,62,60,47,115,99,114,105,112,116,62,13,
+10,108,105,110,107,32,114,101,108,61,34,105,99,111,110,34,32,39,32,97,108,116,61
+,39,39,32,99,108,97,115,115,61,39,102,111,114,109,97,116,105,111,110,32,111,102,
+32,116,104,101,118,101,114,115,105,111,110,115,32,111,102,32,116,104,101,32,60,
+47,97,62,60,47,100,105,118,62,60,47,100,105,118,62,47,112,97,103,101,62,10,32,32
+,60,112,97,103,101,62,10,60,100,105,118,32,99,108,97,115,115,61,34,99,111,110,
+116,98,101,99,97,109,101,32,116,104,101,32,102,105,114,115,116,98,97,104,97,115,
+97,32,73,110,100,111,110,101,115,105,97,101,110,103,108,105,115,104,32,40,115,
+105,109,112,108,101,41,206,149,206,187,206,187,206,183,206,189,206,185,206,186,
+206,172,209,133,209,128,208,178,208,176,209,130,209,129,208,186,208,184,208,186,
+208,190,208,188,208,191,208,176,208,189,208,184,208,184,209,143,208,178,208,187,
+209,143,208,181,209,130,209,129,209,143,208,148,208,190,208,177,208,176,208,178,
+208,184,209,130,209,140,209,135,208,181,208,187,208,190,208,178,208,181,208,186,
+208,176,209,128,208,176,208,183,208,178,208,184,209,130,208,184,209,143,208,152,
+208,189,209,130,208,181,209,128,208,189,208,181,209,130,208,158,209,130,208,178,
+208,181,209,130,208,184,209,130,209,140,208,189,208,176,208,191,209,128,208,184,
+208,188,208,181,209,128,208,184,208,189,209,130,208,181,209,128,208,189,208,181,
+209,130,208,186,208,190,209,130,208,190,209,128,208,190,208,179,208,190,209,129,
+209,130,209,128,208,176,208,189,208,184,209,134,209,139,208,186,208,176,209,135,
+208,181,209,129,209,130,208,178,208,181,209,131,209,129,208,187,208,190,208,178,
+208,184,209,143,209,133,208,191,209,128,208,190,208,177,208,187,208,181,208,188,
+209,139,208,191,208,190,208,187,209,131,209,135,208,184,209,130,209,140,209,143,
+208,178,208,187,209,143,209,142,209,130,209,129,209,143,208,189,208,176,208,184,
+208,177,208,190,208,187,208,181,208,181,208,186,208,190,208,188,208,191,208,176,
+208,189,208,184,209,143,208,178,208,189,208,184,208,188,208,176,208,189,208,184,
+208,181,209,129,209,128,208,181,208,180,209,129,209,130,208,178,208,176,216,167,
+217,132,217,133,217,136,216,167,216,182,217,138,216,185,216,167,217,132,216,177,
+216,166,217,138,216,179,217,138,216,169,216,167,217,132,216,167,217,134,216,170,
+217,130,216,167,217,132,217,133,216,180,216,167,216,177,217,131,216,167,216,170,
+217,131,216,167,217,132,216,179,217,138,216,167,216,177,216,167,216,170,216,167,
+217,132,217,133,217,131,216,170,217,136,216,168,216,169,216,167,217,132,216,179,
+216,185,217,136,216,175,217,138,216,169,216,167,216,173,216,181,216,167,216,166,
+217,138,216,167,216,170,216,167,217,132,216,185,216,167,217,132,217,133,217,138,
+216,169,216,167,217,132,216,181,217,136,216,170,217,138,216,167,216,170,216,167,
+217,132,216,167,217,134,216,170,216,177,217,134,216,170,216,167,217,132,216,170,
+216,181,216,167,217,133,217,138,217,133,216,167,217,132,216,165,216,179,217,132,
+216,167,217,133,217,138,216,167,217,132,217,133,216,180,216,167,216,177,217,131,
+216,169,216,167,217,132,217,133,216,177,216,166,217,138,216,167,216,170,114,111,
+98,111,116,115,34,32,99,111,110,116,101,110,116,61,34,60,100,105,118,32,105,100,
+61,34,102,111,111,116,101,114,34,62,116,104,101,32,85,110,105,116,101,100,32,83,
+116,97,116,101,115,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,
+46,106,112,103,124,114,105,103,104,116,124,116,104,117,109,98,124,46,106,115,34,
+62,60,47,115,99,114,105,112,116,62,13,10,60,108,111,99,97,116,105,111,110,46,112
+,114,111,116,111,99,111,108,102,114,97,109,101,98,111,114,100,101,114,61,34,48,
+34,32,115,34,32,47,62,10,60,109,101,116,97,32,110,97,109,101,61,34,60,47,97,62,
+60,47,100,105,118,62,60,47,100,105,118,62,60,102,111,110,116,45,119,101,105,103,
+104,116,58,98,111,108,100,59,38,113,117,111,116,59,32,97,110,100,32,38,113,117,
+111,116,59,100,101,112,101,110,100,105,110,103,32,111,110,32,116,104,101,32,109,
+97,114,103,105,110,58,48,59,112,97,100,100,105,110,103,58,34,32,114,101,108,61,
+34,110,111,102,111,108,108,111,119,34,32,80,114,101,115,105,100,101,110,116,32,
+111,102,32,116,104,101,32,116,119,101,110,116,105,101,116,104,32,99,101,110,116,
+117,114,121,101,118,105,115,105,111,110,62,10,32,32,60,47,112,97,103,101,73,110,
+116,101,114,110,101,116,32,69,120,112,108,111,114,101,114,97,46,97,115,121,110,
+99,32,61,32,116,114,117,101,59,13,10,105,110,102,111,114,109,97,116,105,111,110,
+32,97,98,111,117,116,60,100,105,118,32,105,100,61,34,104,101,97,100,101,114,34,
+62,34,32,97,99,116,105,111,110,61,34,104,116,116,112,58,47,47,60,97,32,104,114,
+101,102,61,34,104,116,116,112,115,58,47,47,60,100,105,118,32,105,100,61,34,99,
+111,110,116,101,110,116,34,60,47,100,105,118,62,13,10,60,47,100,105,118,62,13,10
+,60,100,101,114,105,118,101,100,32,102,114,111,109,32,116,104,101,32,60,105,109,
+103,32,115,114,99,61,39,104,116,116,112,58,47,47,97,99,99,111,114,100,105,110,
+103,32,116,111,32,116,104,101,32,10,60,47,98,111,100,121,62,10,60,47,104,116,109
+,108,62,10,115,116,121,108,101,61,34,102,111,110,116,45,115,105,122,101,58,115,
+99,114,105,112,116,32,108,97,110,103,117,97,103,101,61,34,65,114,105,97,108,44,
+32,72,101,108,118,101,116,105,99,97,44,60,47,97,62,60,115,112,97,110,32,99,108,
+97,115,115,61,34,60,47,115,99,114,105,112,116,62,60,115,99,114,105,112,116,32,
+112,111,108,105,116,105,99,97,108,32,112,97,114,116,105,101,115,116,100,62,60,47
+,116,114,62,60,47,116,97,98,108,101,62,60,104,114,101,102,61,34,104,116,116,112,
+58,47,47,119,119,119,46,105,110,116,101,114,112,114,101,116,97,116,105,111,110,
+32,111,102,114,101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,32,100,
+111,99,117,109,101,110,116,46,119,114,105,116,101,40,39,60,99,104,97,114,115,101
+,116,61,34,117,116,102,45,56,34,62,10,98,101,103,105,110,110,105,110,103,32,111,
+102,32,116,104,101,32,114,101,118,101,97,108,101,100,32,116,104,97,116,32,116,
+104,101,116,101,108,101,118,105,115,105,111,110,32,115,101,114,105,101,115,34,32
+,114,101,108,61,34,110,111,102,111,108,108,111,119,34,62,32,116,97,114,103,101,
+116,61,34,95,98,108,97,110,107,34,62,99,108,97,105,109,105,110,103,32,116,104,97
+,116,32,116,104,101,104,116,116,112,37,51,65,37,50,70,37,50,70,119,119,119,46,
+109,97,110,105,102,101,115,116,97,116,105,111,110,115,32,111,102,80,114,105,109,
+101,32,77,105,110,105,115,116,101,114,32,111,102,105,110,102,108,117,101,110,99,
+101,100,32,98,121,32,116,104,101,99,108,97,115,115,61,34,99,108,101,97,114,102,
+105,120,34,62,47,100,105,118,62,13,10,60,47,100,105,118,62,13,10,13,10,116,104,
+114,101,101,45,100,105,109,101,110,115,105,111,110,97,108,67,104,117,114,99,104,
+32,111,102,32,69,110,103,108,97,110,100,111,102,32,78,111,114,116,104,32,67,97,
+114,111,108,105,110,97,115,113,117,97,114,101,32,107,105,108,111,109,101,116,114
+,101,115,46,97,100,100,69,118,101,110,116,76,105,115,116,101,110,101,114,100,105
+,115,116,105,110,99,116,32,102,114,111,109,32,116,104,101,99,111,109,109,111,110
+,108,121,32,107,110,111,119,110,32,97,115,80,104,111,110,101,116,105,99,32,65,
+108,112,104,97,98,101,116,100,101,99,108,97,114,101,100,32,116,104,97,116,32,116
+,104,101,99,111,110,116,114,111,108,108,101,100,32,98,121,32,116,104,101,66,101,
+110,106,97,109,105,110,32,70,114,97,110,107,108,105,110,114,111,108,101,45,112,
+108,97,121,105,110,103,32,103,97,109,101,116,104,101,32,85,110,105,118,101,114,
+115,105,116,121,32,111,102,105,110,32,87,101,115,116,101,114,110,32,69,117,114,
+111,112,101,112,101,114,115,111,110,97,108,32,99,111,109,112,117,116,101,114,80,
+114,111,106,101,99,116,32,71,117,116,101,110,98,101,114,103,114,101,103,97,114,
+100,108,101,115,115,32,111,102,32,116,104,101,104,97,115,32,98,101,101,110,32,
+112,114,111,112,111,115,101,100,116,111,103,101,116,104,101,114,32,119,105,116,
+104,32,116,104,101,62,60,47,108,105,62,60,108,105,32,99,108,97,115,115,61,34,105
+,110,32,115,111,109,101,32,99,111,117,110,116,114,105,101,115,109,105,110,46,106
+,115,34,62,60,47,115,99,114,105,112,116,62,111,102,32,116,104,101,32,112,111,112
+,117,108,97,116,105,111,110,111,102,102,105,99,105,97,108,32,108,97,110,103,117,
+97,103,101,60,105,109,103,32,115,114,99,61,34,105,109,97,103,101,115,47,105,100,
+101,110,116,105,102,105,101,100,32,98,121,32,116,104,101,110,97,116,117,114,97,
+108,32,114,101,115,111,117,114,99,101,115,99,108,97,115,115,105,102,105,99,97,
+116,105,111,110,32,111,102,99,97,110,32,98,101,32,99,111,110,115,105,100,101,114
+,101,100,113,117,97,110,116,117,109,32,109,101,99,104,97,110,105,99,115,78,101,
+118,101,114,116,104,101,108,101,115,115,44,32,116,104,101,109,105,108,108,105,
+111,110,32,121,101,97,114,115,32,97,103,111,60,47,98,111,100,121,62,13,10,60,47,
+104,116,109,108,62,13,206,149,206,187,206,187,206,183,206,189,206,185,206,186,
+206,172,10,116,97,107,101,32,97,100,118,97,110,116,97,103,101,32,111,102,97,110,
+100,44,32,97,99,99,111,114,100,105,110,103,32,116,111,97,116,116,114,105,98,117,
+116,101,100,32,116,111,32,116,104,101,77,105,99,114,111,115,111,102,116,32,87,
+105,110,100,111,119,115,116,104,101,32,102,105,114,115,116,32,99,101,110,116,117
+,114,121,117,110,100,101,114,32,116,104,101,32,99,111,110,116,114,111,108,100,
+105,118,32,99,108,97,115,115,61,34,104,101,97,100,101,114,115,104,111,114,116,
+108,121,32,97,102,116,101,114,32,116,104,101,110,111,116,97,98,108,101,32,101,
+120,99,101,112,116,105,111,110,116,101,110,115,32,111,102,32,116,104,111,117,115
+,97,110,100,115,115,101,118,101,114,97,108,32,100,105,102,102,101,114,101,110,
+116,97,114,111,117,110,100,32,116,104,101,32,119,111,114,108,100,46,114,101,97,
+99,104,105,110,103,32,109,105,108,105,116,97,114,121,105,115,111,108,97,116,101,
+100,32,102,114,111,109,32,116,104,101,111,112,112,111,115,105,116,105,111,110,32
+,116,111,32,116,104,101,116,104,101,32,79,108,100,32,84,101,115,116,97,109,101,
+110,116,65,102,114,105,99,97,110,32,65,109,101,114,105,99,97,110,115,105,110,115
+,101,114,116,101,100,32,105,110,116,111,32,116,104,101,115,101,112,97,114,97,116
+,101,32,102,114,111,109,32,116,104,101,109,101,116,114,111,112,111,108,105,116,
+97,110,32,97,114,101,97,109,97,107,101,115,32,105,116,32,112,111,115,115,105,98,
+108,101,97,99,107,110,111,119,108,101,100,103,101,100,32,116,104,97,116,97,114,
+103,117,97,98,108,121,32,116,104,101,32,109,111,115,116,116,121,112,101,61,34,
+116,101,120,116,47,99,115,115,34,62,10,116,104,101,32,73,110,116,101,114,110,97,
+116,105,111,110,97,108,65,99,99,111,114,100,105,110,103,32,116,111,32,116,104,
+101,32,112,101,61,34,116,101,120,116,47,99,115,115,34,32,47,62,10,99,111,105,110
+,99,105,100,101,32,119,105,116,104,32,116,104,101,116,119,111,45,116,104,105,114
+,100,115,32,111,102,32,116,104,101,68,117,114,105,110,103,32,116,104,105,115,32,
+116,105,109,101,44,100,117,114,105,110,103,32,116,104,101,32,112,101,114,105,111
+,100,97,110,110,111,117,110,99,101,100,32,116,104,97,116,32,104,101,116,104,101,
+32,105,110,116,101,114,110,97,116,105,111,110,97,108,97,110,100,32,109,111,114,
+101,32,114,101,99,101,110,116,108,121,98,101,108,105,101,118,101,100,32,116,104,
+97,116,32,116,104,101,99,111,110,115,99,105,111,117,115,110,101,115,115,32,97,
+110,100,102,111,114,109,101,114,108,121,32,107,110,111,119,110,32,97,115,115,117
+,114,114,111,117,110,100,101,100,32,98,121,32,116,104,101,102,105,114,115,116,32
+,97,112,112,101,97,114,101,100,32,105,110,111,99,99,97,115,105,111,110,97,108,
+108,121,32,117,115,101,100,112,111,115,105,116,105,111,110,58,97,98,115,111,108,
+117,116,101,59,34,32,116,97,114,103,101,116,61,34,95,98,108,97,110,107,34,32,112
+,111,115,105,116,105,111,110,58,114,101,108,97,116,105,118,101,59,116,101,120,
+116,45,97,108,105,103,110,58,99,101,110,116,101,114,59,106,97,120,47,108,105,98,
+115,47,106,113,117,101,114,121,47,49,46,98,97,99,107,103,114,111,117,110,100,45,
+99,111,108,111,114,58,35,116,121,112,101,61,34,97,112,112,108,105,99,97,116,105,
+111,110,47,97,110,103,117,97,103,101,34,32,99,111,110,116,101,110,116,61,34,60,
+109,101,116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,80,114,105,118,97
+,99,121,32,80,111,108,105,99,121,60,47,97,62,101,40,34,37,51,67,115,99,114,105,
+112,116,32,115,114,99,61,39,34,32,116,97,114,103,101,116,61,34,95,98,108,97,110,
+107,34,62,79,110,32,116,104,101,32,111,116,104,101,114,32,104,97,110,100,44,46,
+106,112,103,124,116,104,117,109,98,124,114,105,103,104,116,124,50,60,47,100,105,
+118,62,60,100,105,118,32,99,108,97,115,115,61,34,60,100,105,118,32,115,116,121,
+108,101,61,34,102,108,111,97,116,58,110,105,110,101,116,101,101,110,116,104,32,
+99,101,110,116,117,114,121,60,47,98,111,100,121,62,13,10,60,47,104,116,109,108,
+62,13,10,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,115,59,116,
+101,120,116,45,97,108,105,103,110,58,99,101,110,116,101,114,102,111,110,116,45,
+119,101,105,103,104,116,58,32,98,111,108,100,59,32,65,99,99,111,114,100,105,110,
+103,32,116,111,32,116,104,101,32,100,105,102,102,101,114,101,110,99,101,32,98,
+101,116,119,101,101,110,34,32,102,114,97,109,101,98,111,114,100,101,114,61,34,48
+,34,32,34,32,115,116,121,108,101,61,34,112,111,115,105,116,105,111,110,58,108,
+105,110,107,32,104,114,101,102,61,34,104,116,116,112,58,47,47,104,116,109,108,52
+,47,108,111,111,115,101,46,100,116,100,34,62,10,100,117,114,105,110,103,32,116,
+104,105,115,32,112,101,114,105,111,100,60,47,116,100,62,60,47,116,114,62,60,47,
+116,97,98,108,101,62,99,108,111,115,101,108,121,32,114,101,108,97,116,101,100,32
+,116,111,102,111,114,32,116,104,101,32,102,105,114,115,116,32,116,105,109,101,59
+,102,111,110,116,45,119,101,105,103,104,116,58,98,111,108,100,59,105,110,112,117
+,116,32,116,121,112,101,61,34,116,101,120,116,34,32,60,115,112,97,110,32,115,116
+,121,108,101,61,34,102,111,110,116,45,111,110,114,101,97,100,121,115,116,97,116,
+101,99,104,97,110,103,101,9,60,100,105,118,32,99,108,97,115,115,61,34,99,108,101
+,97,114,100,111,99,117,109,101,110,116,46,108,111,99,97,116,105,111,110,46,32,70
+,111,114,32,101,120,97,109,112,108,101,44,32,116,104,101,32,97,32,119,105,100,
+101,32,118,97,114,105,101,116,121,32,111,102,32,60,33,68,79,67,84,89,80,69,32,
+104,116,109,108,62,13,10,60,38,110,98,115,112,59,38,110,98,115,112,59,38,110,98,
+115,112,59,34,62,60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,47,115,116
+,121,108,101,61,34,102,108,111,97,116,58,108,101,102,116,59,99,111,110,99,101,
+114,110,101,100,32,119,105,116,104,32,116,104,101,61,104,116,116,112,37,51,65,37
+,50,70,37,50,70,119,119,119,46,105,110,32,112,111,112,117,108,97,114,32,99,117,
+108,116,117,114,101,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34,32,47
+,62,105,116,32,105,115,32,112,111,115,115,105,98,108,101,32,116,111,32,72,97,114
+,118,97,114,100,32,85,110,105,118,101,114,115,105,116,121,116,121,108,101,115,
+104,101,101,116,34,32,104,114,101,102,61,34,47,116,104,101,32,109,97,105,110,32,
+99,104,97,114,97,99,116,101,114,79,120,102,111,114,100,32,85,110,105,118,101,114
+,115,105,116,121,32,32,110,97,109,101,61,34,107,101,121,119,111,114,100,115,34,
+32,99,115,116,121,108,101,61,34,116,101,120,116,45,97,108,105,103,110,58,116,104
+,101,32,85,110,105,116,101,100,32,75,105,110,103,100,111,109,102,101,100,101,114
+,97,108,32,103,111,118,101,114,110,109,101,110,116,60,100,105,118,32,115,116,121
+,108,101,61,34,109,97,114,103,105,110,32,100,101,112,101,110,100,105,110,103,32,
+111,110,32,116,104,101,32,100,101,115,99,114,105,112,116,105,111,110,32,111,102,
+32,116,104,101,60,100,105,118,32,99,108,97,115,115,61,34,104,101,97,100,101,114,
+46,109,105,110,46,106,115,34,62,60,47,115,99,114,105,112,116,62,100,101,115,116,
+114,117,99,116,105,111,110,32,111,102,32,116,104,101,115,108,105,103,104,116,108
+,121,32,100,105,102,102,101,114,101,110,116,105,110,32,97,99,99,111,114,100,97,
+110,99,101,32,119,105,116,104,116,101,108,101,99,111,109,109,117,110,105,99,97,
+116,105,111,110,115,105,110,100,105,99,97,116,101,115,32,116,104,97,116,32,116,
+104,101,115,104,111,114,116,108,121,32,116,104,101,114,101,97,102,116,101,114,
+101,115,112,101,99,105,97,108,108,121,32,105,110,32,116,104,101,32,69,117,114,
+111,112,101,97,110,32,99,111,117,110,116,114,105,101,115,72,111,119,101,118,101,
+114,44,32,116,104,101,114,101,32,97,114,101,115,114,99,61,34,104,116,116,112,58,
+47,47,115,116,97,116,105,99,115,117,103,103,101,115,116,101,100,32,116,104,97,
+116,32,116,104,101,34,32,115,114,99,61,34,104,116,116,112,58,47,47,119,119,119,
+46,97,32,108,97,114,103,101,32,110,117,109,98,101,114,32,111,102,32,84,101,108,
+101,99,111,109,109,117,110,105,99,97,116,105,111,110,115,34,32,114,101,108,61,34
+,110,111,102,111,108,108,111,119,34,32,116,72,111,108,121,32,82,111,109,97,110,
+32,69,109,112,101,114,111,114,97,108,109,111,115,116,32,101,120,99,108,117,115,
+105,118,101,108,121,34,32,98,111,114,100,101,114,61,34,48,34,32,97,108,116,61,34
+,83,101,99,114,101,116,97,114,121,32,111,102,32,83,116,97,116,101,99,117,108,109
+,105,110,97,116,105,110,103,32,105,110,32,116,104,101,67,73,65,32,87,111,114,108
+,100,32,70,97,99,116,98,111,111,107,116,104,101,32,109,111,115,116,32,105,109,
+112,111,114,116,97,110,116,97,110,110,105,118,101,114,115,97,114,121,32,111,102,
+32,116,104,101,115,116,121,108,101,61,34,98,97,99,107,103,114,111,117,110,100,45
+,60,108,105,62,60,101,109,62,60,97,32,104,114,101,102,61,34,47,116,104,101,32,65
+,116,108,97,110,116,105,99,32,79,99,101,97,110,115,116,114,105,99,116,108,121,32
+,115,112,101,97,107,105,110,103,44,115,104,111,114,116,108,121,32,98,101,102,111
+,114,101,32,116,104,101,100,105,102,102,101,114,101,110,116,32,116,121,112,101,
+115,32,111,102,116,104,101,32,79,116,116,111,109,97,110,32,69,109,112,105,114,
+101,62,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,65,110,32,73,
+110,116,114,111,100,117,99,116,105,111,110,32,116,111,99,111,110,115,101,113,117
+,101,110,99,101,32,111,102,32,116,104,101,100,101,112,97,114,116,117,114,101,32,
+102,114,111,109,32,116,104,101,67,111,110,102,101,100,101,114,97,116,101,32,83,
+116,97,116,101,115,105,110,100,105,103,101,110,111,117,115,32,112,101,111,112,
+108,101,115,80,114,111,99,101,101,100,105,110,103,115,32,111,102,32,116,104,101,
+105,110,102,111,114,109,97,116,105,111,110,32,111,110,32,116,104,101,116,104,101
+,111,114,105,101,115,32,104,97,118,101,32,98,101,101,110,105,110,118,111,108,118
+,101,109,101,110,116,32,105,110,32,116,104,101,100,105,118,105,100,101,100,32,
+105,110,116,111,32,116,104,114,101,101,97,100,106,97,99,101,110,116,32,99,111,
+117,110,116,114,105,101,115,105,115,32,114,101,115,112,111,110,115,105,98,108,
+101,32,102,111,114,100,105,115,115,111,108,117,116,105,111,110,32,111,102,32,116
+,104,101,99,111,108,108,97,98,111,114,97,116,105,111,110,32,119,105,116,104,119,
+105,100,101,108,121,32,114,101,103,97,114,100,101,100,32,97,115,104,105,115,32,
+99,111,110,116,101,109,112,111,114,97,114,105,101,115,102,111,117,110,100,105,
+110,103,32,109,101,109,98,101,114,32,111,102,68,111,109,105,110,105,99,97,110,32
+,82,101,112,117,98,108,105,99,103,101,110,101,114,97,108,108,121,32,97,99,99,101
+,112,116,101,100,116,104,101,32,112,111,115,115,105,98,105,108,105,116,121,32,
+111,102,97,114,101,32,97,108,115,111,32,97,118,97,105,108,97,98,108,101,117,110,
+100,101,114,32,99,111,110,115,116,114,117,99,116,105,111,110,114,101,115,116,111
+,114,97,116,105,111,110,32,111,102,32,116,104,101,116,104,101,32,103,101,110,101
+,114,97,108,32,112,117,98,108,105,99,105,115,32,97,108,109,111,115,116,32,101,
+110,116,105,114,101,108,121,112,97,115,115,101,115,32,116,104,114,111,117,103,
+104,32,116,104,101,104,97,115,32,98,101,101,110,32,115,117,103,103,101,115,116,
+101,100,99,111,109,112,117,116,101,114,32,97,110,100,32,118,105,100,101,111,71,
+101,114,109,97,110,105,99,32,108,97,110,103,117,97,103,101,115,32,97,99,99,111,
+114,100,105,110,103,32,116,111,32,116,104,101,32,100,105,102,102,101,114,101,110
+,116,32,102,114,111,109,32,116,104,101,115,104,111,114,116,108,121,32,97,102,116
+,101,114,119,97,114,100,115,104,114,101,102,61,34,104,116,116,112,115,58,47,47,
+119,119,119,46,114,101,99,101,110,116,32,100,101,118,101,108,111,112,109,101,110
+,116,66,111,97,114,100,32,111,102,32,68,105,114,101,99,116,111,114,115,60,100,
+105,118,32,99,108,97,115,115,61,34,115,101,97,114,99,104,124,32,60,97,32,104,114
+,101,102,61,34,104,116,116,112,58,47,47,73,110,32,112,97,114,116,105,99,117,108,
+97,114,44,32,116,104,101,77,117,108,116,105,112,108,101,32,102,111,111,116,110,
+111,116,101,115,111,114,32,111,116,104,101,114,32,115,117,98,115,116,97,110,99,
+101,116,104,111,117,115,97,110,100,115,32,111,102,32,121,101,97,114,115,116,114,
+97,110,115,108,97,116,105,111,110,32,111,102,32,116,104,101,60,47,100,105,118,62
+,13,10,60,47,100,105,118,62,13,10,13,10,60,97,32,104,114,101,102,61,34,105,110,
+100,101,120,46,112,104,112,119,97,115,32,101,115,116,97,98,108,105,115,104,101,
+100,32,105,110,109,105,110,46,106,115,34,62,60,47,115,99,114,105,112,116,62,10,
+112,97,114,116,105,99,105,112,97,116,101,32,105,110,32,116,104,101,97,32,115,116
+,114,111,110,103,32,105,110,102,108,117,101,110,99,101,115,116,121,108,101,61,34
+,109,97,114,103,105,110,45,116,111,112,58,114,101,112,114,101,115,101,110,116,
+101,100,32,98,121,32,116,104,101,103,114,97,100,117,97,116,101,100,32,102,114,
+111,109,32,116,104,101,84,114,97,100,105,116,105,111,110,97,108,108,121,44,32,
+116,104,101,69,108,101,109,101,110,116,40,34,115,99,114,105,112,116,34,41,59,72,
+111,119,101,118,101,114,44,32,115,105,110,99,101,32,116,104,101,47,100,105,118,
+62,10,60,47,100,105,118,62,10,60,100,105,118,32,108,101,102,116,59,32,109,97,114
+,103,105,110,45,108,101,102,116,58,112,114,111,116,101,99,116,105,111,110,32,97,
+103,97,105,110,115,116,48,59,32,118,101,114,116,105,99,97,108,45,97,108,105,103,
+110,58,85,110,102,111,114,116,117,110,97,116,101,108,121,44,32,116,104,101,116,
+121,112,101,61,34,105,109,97,103,101,47,120,45,105,99,111,110,47,100,105,118,62,
+10,60,100,105,118,32,99,108,97,115,115,61,34,32,99,108,97,115,115,61,34,99,108,
+101,97,114,102,105,120,34,62,60,100,105,118,32,99,108,97,115,115,61,34,102,111,
+111,116,101,114,9,9,60,47,100,105,118,62,10,9,9,60,47,100,105,118,62,10,116,104,
+101,32,109,111,116,105,111,110,32,112,105,99,116,117,114,101,208,145,209,138,208
+,187,208,179,208,176,209,128,209,129,208,186,208,184,208,177,209,138,208,187,208
+,179,208,176,209,128,209,129,208,186,208,184,208,164,208,181,208,180,208,181,209
+,128,208,176,209,134,208,184,208,184,208,189,208,181,209,129,208,186,208,190,208
+,187,209,140,208,186,208,190,209,129,208,190,208,190,208,177,209,137,208,181,208
+,189,208,184,208,181,209,129,208,190,208,190,208,177,209,137,208,181,208,189,208
+,184,209,143,208,191,209,128,208,190,208,179,209,128,208,176,208,188,208,188,209
+,139,208,158,209,130,208,191,209,128,208,176,208,178,208,184,209,130,209,140,208
+,177,208,181,209,129,208,191,208,187,208,176,209,130,208,189,208,190,208,188,208
+,176,209,130,208,181,209,128,208,184,208,176,208,187,209,139,208,191,208,190,208
+,183,208,178,208,190,208,187,209,143,208,181,209,130,208,191,208,190,209,129,208
+,187,208,181,208,180,208,189,208,184,208,181,209,128,208,176,208,183,208,187,208
+,184,209,135,208,189,209,139,209,133,208,191,209,128,208,190,208,180,209,131,208
+,186,209,134,208,184,208,184,208,191,209,128,208,190,208,179,209,128,208,176,208
+,188,208,188,208,176,208,191,208,190,208,187,208,189,208,190,209,129,209,130,209
+,140,209,142,208,189,208,176,209,133,208,190,208,180,208,184,209,130,209,129,209
+,143,208,184,208,183,208,177,209,128,208,176,208,189,208,189,208,190,208,181,208
+,189,208,176,209,129,208,181,208,187,208,181,208,189,208,184,209,143,208,184,208
+,183,208,188,208,181,208,189,208,181,208,189,208,184,209,143,208,186,208,176,209
+,130,208,181,208,179,208,190,209,128,208,184,208,184,208,144,208,187,208,181,208
+,186,209,129,208,176,208,189,208,180,209,128,224,164,166,224,165,141,224,164,181
+,224,164,190,224,164,176,224,164,190,224,164,174,224,165,136,224,164,168,224,165
+,129,224,164,133,224,164,178,224,164,170,224,165,141,224,164,176,224,164,166,224
+,164,190,224,164,168,224,164,173,224,164,190,224,164,176,224,164,164,224,165,128
+,224,164,175,224,164,133,224,164,168,224,165,129,224,164,166,224,165,135,224,164
+,182,224,164,185,224,164,191,224,164,168,224,165,141,224,164,166,224,165,128,224
+,164,135,224,164,130,224,164,161,224,164,191,224,164,175,224,164,190,224,164,166
+,224,164,191,224,164,178,224,165,141,224,164,178,224,165,128,224,164,133,224,164
+,167,224,164,191,224,164,149,224,164,190,224,164,176,224,164,181,224,165,128,224
+,164,161,224,164,191,224,164,175,224,165,139,224,164,154,224,164,191,224,164,159
+,224,165,141,224,164,160,224,165,135,224,164,184,224,164,174,224,164,190,224,164
+,154,224,164,190,224,164,176,224,164,156,224,164,130,224,164,149,224,165,141,224
+,164,182,224,164,168,224,164,166,224,165,129,224,164,168,224,164,191,224,164,175
+,224,164,190,224,164,170,224,165,141,224,164,176,224,164,175,224,165,139,224,164
+,151,224,164,133,224,164,168,224,165,129,224,164,184,224,164,190,224,164,176,224
+,164,145,224,164,168,224,164,178,224,164,190,224,164,135,224,164,168,224,164,170
+,224,164,190,224,164,176,224,165,141,224,164,159,224,165,128,224,164,182,224,164
+,176,224,165,141,224,164,164,224,165,139,224,164,130,224,164,178,224,165,139,224
+,164,149,224,164,184,224,164,173,224,164,190,224,164,171,224,164,188,224,165,141
+,224,164,178,224,165,136,224,164,182,224,164,182,224,164,176,224,165,141,224,164
+,164,224,165,135,224,164,130,224,164,170,224,165,141,224,164,176,224,164,166,224
+,165,135,224,164,182,224,164,170,224,165,141,224,164,178,224,165,135,224,164,175
+,224,164,176,224,164,149,224,165,135,224,164,130,224,164,166,224,165,141,224,164
+,176,224,164,184,224,165,141,224,164,165,224,164,191,224,164,164,224,164,191,224
+,164,137,224,164,164,224,165,141,224,164,170,224,164,190,224,164,166,224,164,137
+,224,164,168,224,165,141,224,164,185,224,165,135,224,164,130,224,164,154,224,164
+,191,224,164,159,224,165,141,224,164,160,224,164,190,224,164,175,224,164,190,224
+,164,164,224,165,141,224,164,176,224,164,190,224,164,156,224,165,141,224,164,175
+,224,164,190,224,164,166,224,164,190,224,164,170,224,165,129,224,164,176,224,164
+,190,224,164,168,224,165,135,224,164,156,224,165,139,224,164,161,224,164,188,224
+,165,135,224,164,130,224,164,133,224,164,168,224,165,129,224,164,181,224,164,190
+,224,164,166,224,164,182,224,165,141,224,164,176,224,165,135,224,164,163,224,165
+,128,224,164,182,224,164,191,224,164,149,224,165,141,224,164,183,224,164,190,224
+,164,184,224,164,176,224,164,149,224,164,190,224,164,176,224,165,128,224,164,184
+,224,164,130,224,164,151,224,165,141,224,164,176,224,164,185,224,164,170,224,164
+,176,224,164,191,224,164,163,224,164,190,224,164,174,224,164,172,224,165,141,224
+,164,176,224,164,190,224,164,130,224,164,161,224,164,172,224,164,154,224,165,141
+,224,164,154,224,165,139,224,164,130,224,164,137,224,164,170,224,164,178,224,164
+,172,224,165,141,224,164,167,224,164,174,224,164,130,224,164,164,224,165,141,224
+,164,176,224,165,128,224,164,184,224,164,130,224,164,170,224,164,176,224,165,141
+,224,164,149,224,164,137,224,164,174,224,165,141,224,164,174,224,165,128,224,164
+,166,224,164,174,224,164,190,224,164,167,224,165,141,224,164,175,224,164,174,224
+,164,184,224,164,185,224,164,190,224,164,175,224,164,164,224,164,190,224,164,182
+,224,164,172,224,165,141,224,164,166,224,165,139,224,164,130,224,164,174,224,165
+,128,224,164,161,224,164,191,224,164,175,224,164,190,224,164,134,224,164,136,224
+,164,170,224,165,128,224,164,143,224,164,178,224,164,174,224,165,139,224,164,172
+,224,164,190,224,164,135,224,164,178,224,164,184,224,164,130,224,164,150,224,165
+,141,224,164,175,224,164,190,224,164,134,224,164,170,224,164,176,224,165,135,224
+,164,182,224,164,168,224,164,133,224,164,168,224,165,129,224,164,172,224,164,130
+,224,164,167,224,164,172,224,164,190,224,164,156,224,164,188,224,164,190,224,164
+,176,224,164,168,224,164,181,224,165,128,224,164,168,224,164,164,224,164,174,224
+,164,170,224,165,141,224,164,176,224,164,174,224,165,129,224,164,150,224,164,170
+,224,165,141,224,164,176,224,164,182,224,165,141,224,164,168,224,164,170,224,164
+,176,224,164,191,224,164,181,224,164,190,224,164,176,224,164,168,224,165,129,224
+,164,149,224,164,184,224,164,190,224,164,168,224,164,184,224,164,174,224,164,176
+,224,165,141,224,164,165,224,164,168,224,164,134,224,164,175,224,165,139,224,164
+,156,224,164,191,224,164,164,224,164,184,224,165,139,224,164,174,224,164,181,224
+,164,190,224,164,176,216,167,217,132,217,133,216,180,216,167,216,177,217,131,216
+,167,216,170,216,167,217,132,217,133,217,134,216,170,216,175,217,138,216,167,216
+,170,216,167,217,132,217,131,217,133,216,168,217,138,217,136,216,170,216,177,216
+,167,217,132,217,133,216,180,216,167,217,135,216,175,216,167,216,170,216,185,216
+,175,216,175,216,167,217,132,216,178,217,136,216,167,216,177,216,185,216,175,216
+,175,216,167,217,132,216,177,216,175,217,136,216,175,216,167,217,132,216,165,216
+,179,217,132,216,167,217,133,217,138,216,169,216,167,217,132,217,129,217,136,216
+,170,217,136,216,180,217,136,216,168,216,167,217,132,217,133,216,179,216,167,216
+,168,217,130,216,167,216,170,216,167,217,132,217,133,216,185,217,132,217,136,217
+,133,216,167,216,170,216,167,217,132,217,133,216,179,217,132,216,179,217,132,216
+,167,216,170,216,167,217,132,216,172,216,177,216,167,217,129,217,138,217,131,216
+,179,216,167,217,132,216,167,216,179,217,132,216,167,217,133,217,138,216,169,216
+,167,217,132,216,167,216,170,216,181,216,167,217,132,216,167,216,170,107,101,121
+,119,111,114,100,115,34,32,99,111,110,116,101,110,116,61,34,119,51,46,111,114,
+103,47,49,57,57,57,47,120,104,116,109,108,34,62,60,97,32,116,97,114,103,101,116,
+61,34,95,98,108,97,110,107,34,32,116,101,120,116,47,104,116,109,108,59,32,99,104
+,97,114,115,101,116,61,34,32,116,97,114,103,101,116,61,34,95,98,108,97,110,107,
+34,62,60,116,97,98,108,101,32,99,101,108,108,112,97,100,100,105,110,103,61,34,97
+,117,116,111,99,111,109,112,108,101,116,101,61,34,111,102,102,34,32,116,101,120,
+116,45,97,108,105,103,110,58,32,99,101,110,116,101,114,59,116,111,32,108,97,115,
+116,32,118,101,114,115,105,111,110,32,98,121,32,98,97,99,107,103,114,111,117,110
+,100,45,99,111,108,111,114,58,32,35,34,32,104,114,101,102,61,34,104,116,116,112,
+58,47,47,119,119,119,46,47,100,105,118,62,60,47,100,105,118,62,60,100,105,118,32
+,105,100,61,60,97,32,104,114,101,102,61,34,35,34,32,99,108,97,115,115,61,34,34,
+62,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,99,114,105,112,
+116,34,32,115,114,99,61,34,104,116,116,112,58,47,47,10,60,115,99,114,105,112,116
+,32,108,97,110,103,117,97,103,101,61,34,47,47,69,78,34,32,34,104,116,116,112,58,
+47,47,119,119,119,46,119,101,110,99,111,100,101,85,82,73,67,111,109,112,111,110,
+101,110,116,40,34,32,104,114,101,102,61,34,106,97,118,97,115,99,114,105,112,116,
+58,60,100,105,118,32,99,108,97,115,115,61,34,99,111,110,116,101,110,116,100,111,
+99,117,109,101,110,116,46,119,114,105,116,101,40,39,60,115,99,112,111,115,105,
+116,105,111,110,58,32,97,98,115,111,108,117,116,101,59,115,99,114,105,112,116,32
+,115,114,99,61,34,104,116,116,112,58,47,47,32,115,116,121,108,101,61,34,109,97,
+114,103,105,110,45,116,111,112,58,46,109,105,110,46,106,115,34,62,60,47,115,99,
+114,105,112,116,62,10,60,47,100,105,118,62,10,60,100,105,118,32,99,108,97,115,
+115,61,34,119,51,46,111,114,103,47,49,57,57,57,47,120,104,116,109,108,34,32,10,
+13,10,60,47,98,111,100,121,62,13,10,60,47,104,116,109,108,62,100,105,115,116,105
+,110,99,116,105,111,110,32,98,101,116,119,101,101,110,47,34,32,116,97,114,103,
+101,116,61,34,95,98,108,97,110,107,34,62,60,108,105,110,107,32,104,114,101,102,
+61,34,104,116,116,112,58,47,47,101,110,99,111,100,105,110,103,61,34,117,116,102,
+45,56,34,63,62,10,119,46,97,100,100,69,118,101,110,116,76,105,115,116,101,110,
+101,114,63,97,99,116,105,111,110,61,34,104,116,116,112,58,47,47,119,119,119,46,
+105,99,111,110,34,32,104,114,101,102,61,34,104,116,116,112,58,47,47,32,115,116,
+121,108,101,61,34,98,97,99,107,103,114,111,117,110,100,58,116,121,112,101,61,34,
+116,101,120,116,47,99,115,115,34,32,47,62,10,109,101,116,97,32,112,114,111,112,
+101,114,116,121,61,34,111,103,58,116,60,105,110,112,117,116,32,116,121,112,101,
+61,34,116,101,120,116,34,32,32,115,116,121,108,101,61,34,116,101,120,116,45,97,
+108,105,103,110,58,116,104,101,32,100,101,118,101,108,111,112,109,101,110,116,32
+,111,102,32,116,121,108,101,115,104,101,101,116,34,32,116,121,112,101,61,34,116,
+101,104,116,109,108,59,32,99,104,97,114,115,101,116,61,117,116,102,45,56,105,115
+,32,99,111,110,115,105,100,101,114,101,100,32,116,111,32,98,101,116,97,98,108,
+101,32,119,105,100,116,104,61,34,49,48,48,37,34,32,73,110,32,97,100,100,105,116,
+105,111,110,32,116,111,32,116,104,101,32,99,111,110,116,114,105,98,117,116,101,
+100,32,116,111,32,116,104,101,32,100,105,102,102,101,114,101,110,99,101,115,32,
+98,101,116,119,101,101,110,100,101,118,101,108,111,112,109,101,110,116,32,111,
+102,32,116,104,101,32,73,116,32,105,115,32,105,109,112,111,114,116,97,110,116,32
+,116,111,32,60,47,115,99,114,105,112,116,62,10,10,60,115,99,114,105,112,116,32,
+32,115,116,121,108,101,61,34,102,111,110,116,45,115,105,122,101,58,49,62,60,47,
+115,112,97,110,62,60,115,112,97,110,32,105,100,61,103,98,76,105,98,114,97,114,
+121,32,111,102,32,67,111,110,103,114,101,115,115,60,105,109,103,32,115,114,99,61
+,34,104,116,116,112,58,47,47,105,109,69,110,103,108,105,115,104,32,116,114,97,
+110,115,108,97,116,105,111,110,65,99,97,100,101,109,121,32,111,102,32,83,99,105,
+101,110,99,101,115,100,105,118,32,115,116,121,108,101,61,34,100,105,115,112,108,
+97,121,58,99,111,110,115,116,114,117,99,116,105,111,110,32,111,102,32,116,104,
+101,46,103,101,116,69,108,101,109,101,110,116,66,121,73,100,40,105,100,41,105,
+110,32,99,111,110,106,117,110,99,116,105,111,110,32,119,105,116,104,69,108,101,
+109,101,110,116,40,39,115,99,114,105,112,116,39,41,59,32,60,109,101,116,97,32,
+112,114,111,112,101,114,116,121,61,34,111,103,58,208,145,209,138,208,187,208,179
+,208,176,209,128,209,129,208,186,208,184,10,32,116,121,112,101,61,34,116,101,120
+,116,34,32,110,97,109,101,61,34,62,80,114,105,118,97,99,121,32,80,111,108,105,99
+,121,60,47,97,62,97,100,109,105,110,105,115,116,101,114,101,100,32,98,121,32,116
+,104,101,101,110,97,98,108,101,83,105,110,103,108,101,82,101,113,117,101,115,116
+,115,116,121,108,101,61,38,113,117,111,116,59,109,97,114,103,105,110,58,60,47,
+100,105,118,62,60,47,100,105,118,62,60,47,100,105,118,62,60,62,60,105,109,103,32
+,115,114,99,61,34,104,116,116,112,58,47,47,105,32,115,116,121,108,101,61,38,113,
+117,111,116,59,102,108,111,97,116,58,114,101,102,101,114,114,101,100,32,116,111,
+32,97,115,32,116,104,101,32,116,111,116,97,108,32,112,111,112,117,108,97,116,105
+,111,110,32,111,102,105,110,32,87,97,115,104,105,110,103,116,111,110,44,32,68,46
+,67,46,32,115,116,121,108,101,61,34,98,97,99,107,103,114,111,117,110,100,45,97,
+109,111,110,103,32,111,116,104,101,114,32,116,104,105,110,103,115,44,111,114,103
+,97,110,105,122,97,116,105,111,110,32,111,102,32,116,104,101,112,97,114,116,105,
+99,105,112,97,116,101,100,32,105,110,32,116,104,101,116,104,101,32,105,110,116,
+114,111,100,117,99,116,105,111,110,32,111,102,105,100,101,110,116,105,102,105,
+101,100,32,119,105,116,104,32,116,104,101,102,105,99,116,105,111,110,97,108,32,
+99,104,97,114,97,99,116,101,114,32,79,120,102,111,114,100,32,85,110,105,118,101,
+114,115,105,116,121,32,109,105,115,117,110,100,101,114,115,116,97,110,100,105,
+110,103,32,111,102,84,104,101,114,101,32,97,114,101,44,32,104,111,119,101,118,
+101,114,44,115,116,121,108,101,115,104,101,101,116,34,32,104,114,101,102,61,34,
+47,67,111,108,117,109,98,105,97,32,85,110,105,118,101,114,115,105,116,121,101,
+120,112,97,110,100,101,100,32,116,111,32,105,110,99,108,117,100,101,117,115,117,
+97,108,108,121,32,114,101,102,101,114,114,101,100,32,116,111,105,110,100,105,99,
+97,116,105,110,103,32,116,104,97,116,32,116,104,101,104,97,118,101,32,115,117,
+103,103,101,115,116,101,100,32,116,104,97,116,97,102,102,105,108,105,97,116,101,
+100,32,119,105,116,104,32,116,104,101,99,111,114,114,101,108,97,116,105,111,110,
+32,98,101,116,119,101,101,110,110,117,109,98,101,114,32,111,102,32,100,105,102,
+102,101,114,101,110,116,62,60,47,116,100,62,60,47,116,114,62,60,47,116,97,98,108
+,101,62,82,101,112,117,98,108,105,99,32,111,102,32,73,114,101,108,97,110,100,10,
+60,47,115,99,114,105,112,116,62,10,60,115,99,114,105,112,116,32,117,110,100,101,
+114,32,116,104,101,32,105,110,102,108,117,101,110,99,101,99,111,110,116,114,105,
+98,117,116,105,111,110,32,116,111,32,116,104,101,79,102,102,105,99,105,97,108,32
+,119,101,98,115,105,116,101,32,111,102,104,101,97,100,113,117,97,114,116,101,114
+,115,32,111,102,32,116,104,101,99,101,110,116,101,114,101,100,32,97,114,111,117,
+110,100,32,116,104,101,105,109,112,108,105,99,97,116,105,111,110,115,32,111,102,
+32,116,104,101,104,97,118,101,32,98,101,101,110,32,100,101,118,101,108,111,112,
+101,100,70,101,100,101,114,97,108,32,82,101,112,117,98,108,105,99,32,111,102,98,
+101,99,97,109,101,32,105,110,99,114,101,97,115,105,110,103,108,121,99,111,110,
+116,105,110,117,97,116,105,111,110,32,111,102,32,116,104,101,78,111,116,101,44,
+32,104,111,119,101,118,101,114,44,32,116,104,97,116,115,105,109,105,108,97,114,
+32,116,111,32,116,104,97,116,32,111,102,32,99,97,112,97,98,105,108,105,116,105,
+101,115,32,111,102,32,116,104,101,97,99,99,111,114,100,97,110,99,101,32,119,105,
+116,104,32,116,104,101,112,97,114,116,105,99,105,112,97,110,116,115,32,105,110,
+32,116,104,101,102,117,114,116,104,101,114,32,100,101,118,101,108,111,112,109,
+101,110,116,117,110,100,101,114,32,116,104,101,32,100,105,114,101,99,116,105,111
+,110,105,115,32,111,102,116,101,110,32,99,111,110,115,105,100,101,114,101,100,
+104,105,115,32,121,111,117,110,103,101,114,32,98,114,111,116,104,101,114,60,47,
+116,100,62,60,47,116,114,62,60,47,116,97,98,108,101,62,60,97,32,104,116,116,112,
+45,101,113,117,105,118,61,34,88,45,85,65,45,112,104,121,115,105,99,97,108,32,112
+,114,111,112,101,114,116,105,101,115,111,102,32,66,114,105,116,105,115,104,32,67
+,111,108,117,109,98,105,97,104,97,115,32,98,101,101,110,32,99,114,105,116,105,99
+,105,122,101,100,40,119,105,116,104,32,116,104,101,32,101,120,99,101,112,116,105
+,111,110,113,117,101,115,116,105,111,110,115,32,97,98,111,117,116,32,116,104,101
+,112,97,115,115,105,110,103,32,116,104,114,111,117,103,104,32,116,104,101,48,34,
+32,99,101,108,108,112,97,100,100,105,110,103,61,34,48,34,32,116,104,111,117,115,
+97,110,100,115,32,111,102,32,112,101,111,112,108,101,114,101,100,105,114,101,99,
+116,115,32,104,101,114,101,46,32,70,111,114,104,97,118,101,32,99,104,105,108,100
+,114,101,110,32,117,110,100,101,114,37,51,69,37,51,67,47,115,99,114,105,112,116,
+37,51,69,34,41,41,59,60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,47,119
+,119,119,46,60,108,105,62,60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,
+47,115,105,116,101,95,110,97,109,101,34,32,99,111,110,116,101,110,116,61,34,116,
+101,120,116,45,100,101,99,111,114,97,116,105,111,110,58,110,111,110,101,115,116,
+121,108,101,61,34,100,105,115,112,108,97,121,58,32,110,111,110,101,60,109,101,
+116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,88,45,110,101,119,32,68,
+97,116,101,40,41,46,103,101,116,84,105,109,101,40,41,32,116,121,112,101,61,34,
+105,109,97,103,101,47,120,45,105,99,111,110,34,60,47,115,112,97,110,62,60,115,
+112,97,110,32,99,108,97,115,115,61,34,108,97,110,103,117,97,103,101,61,34,106,97
+,118,97,115,99,114,105,112,116,119,105,110,100,111,119,46,108,111,99,97,116,105,
+111,110,46,104,114,101,102,60,97,32,104,114,101,102,61,34,106,97,118,97,115,99,
+114,105,112,116,58,45,45,62,13,10,60,115,99,114,105,112,116,32,116,121,112,101,
+61,34,116,60,97,32,104,114,101,102,61,39,104,116,116,112,58,47,47,119,119,119,46
+,104,111,114,116,99,117,116,32,105,99,111,110,34,32,104,114,101,102,61,34,60,47,
+100,105,118,62,13,10,60,100,105,118,32,99,108,97,115,115,61,34,60,115,99,114,105
+,112,116,32,115,114,99,61,34,104,116,116,112,58,47,47,34,32,114,101,108,61,34,
+115,116,121,108,101,115,104,101,101,116,34,32,116,60,47,100,105,118,62,10,60,115
+,99,114,105,112,116,32,116,121,112,101,61,47,97,62,32,60,97,32,104,114,101,102,
+61,34,104,116,116,112,58,47,47,32,97,108,108,111,119,84,114,97,110,115,112,97,
+114,101,110,99,121,61,34,88,45,85,65,45,67,111,109,112,97,116,105,98,108,101,34,
+32,99,111,110,114,101,108,97,116,105,111,110,115,104,105,112,32,98,101,116,119,
+101,101,110,10,60,47,115,99,114,105,112,116,62,13,10,60,115,99,114,105,112,116,
+32,60,47,97,62,60,47,108,105,62,60,47,117,108,62,60,47,100,105,118,62,97,115,115
+,111,99,105,97,116,101,100,32,119,105,116,104,32,116,104,101,32,112,114,111,103,
+114,97,109,109,105,110,103,32,108,97,110,103,117,97,103,101,60,47,97,62,60,97,32
+,104,114,101,102,61,34,104,116,116,112,58,47,47,60,47,97,62,60,47,108,105,62,60,
+108,105,32,99,108,97,115,115,61,34,102,111,114,109,32,97,99,116,105,111,110,61,
+34,104,116,116,112,58,47,47,60,100,105,118,32,115,116,121,108,101,61,34,100,105,
+115,112,108,97,121,58,116,121,112,101,61,34,116,101,120,116,34,32,110,97,109,101
+,61,34,113,34,60,116,97,98,108,101,32,119,105,100,116,104,61,34,49,48,48,37,34,
+32,98,97,99,107,103,114,111,117,110,100,45,112,111,115,105,116,105,111,110,58,34
+,32,98,111,114,100,101,114,61,34,48,34,32,119,105,100,116,104,61,34,114,101,108,
+61,34,115,104,111,114,116,99,117,116,32,105,99,111,110,34,32,104,54,62,60,117,
+108,62,60,108,105,62,60,97,32,104,114,101,102,61,34,32,32,60,109,101,116,97,32,
+104,116,116,112,45,101,113,117,105,118,61,34,99,115,115,34,32,109,101,100,105,97
+,61,34,115,99,114,101,101,110,34,32,114,101,115,112,111,110,115,105,98,108,101,
+32,102,111,114,32,116,104,101,32,34,32,116,121,112,101,61,34,97,112,112,108,105,
+99,97,116,105,111,110,47,34,32,115,116,121,108,101,61,34,98,97,99,107,103,114,
+111,117,110,100,45,104,116,109,108,59,32,99,104,97,114,115,101,116,61,117,116,
+102,45,56,34,32,97,108,108,111,119,116,114,97,110,115,112,97,114,101,110,99,121,
+61,34,115,116,121,108,101,115,104,101,101,116,34,32,116,121,112,101,61,34,116,
+101,13,10,60,109,101,116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,62,
+60,47,115,112,97,110,62,60,115,112,97,110,32,99,108,97,115,115,61,34,48,34,32,99
+,101,108,108,115,112,97,99,105,110,103,61,34,48,34,62,59,10,60,47,115,99,114,105
+,112,116,62,10,60,115,99,114,105,112,116,32,115,111,109,101,116,105,109,101,115,
+32,99,97,108,108,101,100,32,116,104,101,100,111,101,115,32,110,111,116,32,110,
+101,99,101,115,115,97,114,105,108,121,70,111,114,32,109,111,114,101,32,105,110,
+102,111,114,109,97,116,105,111,110,97,116,32,116,104,101,32,98,101,103,105,110,
+110,105,110,103,32,111,102,32,60,33,68,79,67,84,89,80,69,32,104,116,109,108,62,
+60,104,116,109,108,112,97,114,116,105,99,117,108,97,114,108,121,32,105,110,32,
+116,104,101,32,116,121,112,101,61,34,104,105,100,100,101,110,34,32,110,97,109,
+101,61,34,106,97,118,97,115,99,114,105,112,116,58,118,111,105,100,40,48,41,59,34
+,101,102,102,101,99,116,105,118,101,110,101,115,115,32,111,102,32,116,104,101,32
+,97,117,116,111,99,111,109,112,108,101,116,101,61,34,111,102,102,34,32,103,101,
+110,101,114,97,108,108,121,32,99,111,110,115,105,100,101,114,101,100,62,60,105,
+110,112,117,116,32,116,121,112,101,61,34,116,101,120,116,34,32,34,62,60,47,115,
+99,114,105,112,116,62,13,10,60,115,99,114,105,112,116,116,104,114,111,117,103,
+104,111,117,116,32,116,104,101,32,119,111,114,108,100,99,111,109,109,111,110,32,
+109,105,115,99,111,110,99,101,112,116,105,111,110,97,115,115,111,99,105,97,116,
+105,111,110,32,119,105,116,104,32,116,104,101,60,47,100,105,118,62,10,60,47,100,
+105,118,62,10,60,100,105,118,32,99,100,117,114,105,110,103,32,104,105,115,32,108
+,105,102,101,116,105,109,101,44,99,111,114,114,101,115,112,111,110,100,105,110,
+103,32,116,111,32,116,104,101,116,121,112,101,61,34,105,109,97,103,101,47,120,45
+,105,99,111,110,34,32,97,110,32,105,110,99,114,101,97,115,105,110,103,32,110,117
+,109,98,101,114,100,105,112,108,111,109,97,116,105,99,32,114,101,108,97,116,105,
+111,110,115,97,114,101,32,111,102,116,101,110,32,99,111,110,115,105,100,101,114,
+101,100,109,101,116,97,32,99,104,97,114,115,101,116,61,34,117,116,102,45,56,34,
+32,60,105,110,112,117,116,32,116,121,112,101,61,34,116,101,120,116,34,32,101,120
+,97,109,112,108,101,115,32,105,110,99,108,117,100,101,32,116,104,101,34,62,60,
+105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,105,112,97,114,116,105,
+99,105,112,97,116,105,111,110,32,105,110,32,116,104,101,116,104,101,32,101,115,
+116,97,98,108,105,115,104,109,101,110,116,32,111,102,10,60,47,100,105,118,62,10,
+60,100,105,118,32,99,108,97,115,115,61,34,38,97,109,112,59,110,98,115,112,59,38,
+97,109,112,59,110,98,115,112,59,116,111,32,100,101,116,101,114,109,105,110,101,
+32,119,104,101,116,104,101,114,113,117,105,116,101,32,100,105,102,102,101,114,
+101,110,116,32,102,114,111,109,109,97,114,107,101,100,32,116,104,101,32,98,101,
+103,105,110,110,105,110,103,100,105,115,116,97,110,99,101,32,98,101,116,119,101,
+101,110,32,116,104,101,99,111,110,116,114,105,98,117,116,105,111,110,115,32,116,
+111,32,116,104,101,99,111,110,102,108,105,99,116,32,98,101,116,119,101,101,110,
+32,116,104,101,119,105,100,101,108,121,32,99,111,110,115,105,100,101,114,101,100
+,32,116,111,119,97,115,32,111,110,101,32,111,102,32,116,104,101,32,102,105,114,
+115,116,119,105,116,104,32,118,97,114,121,105,110,103,32,100,101,103,114,101,101
+,115,104,97,118,101,32,115,112,101,99,117,108,97,116,101,100,32,116,104,97,116,
+40,100,111,99,117,109,101,110,116,46,103,101,116,69,108,101,109,101,110,116,112,
+97,114,116,105,99,105,112,97,116,105,110,103,32,105,110,32,116,104,101,111,114,
+105,103,105,110,97,108,108,121,32,100,101,118,101,108,111,112,101,100,101,116,97
+,32,99,104,97,114,115,101,116,61,34,117,116,102,45,56,34,62,32,116,121,112,101,
+61,34,116,101,120,116,47,99,115,115,34,32,47,62,10,105,110,116,101,114,99,104,97
+,110,103,101,97,98,108,121,32,119,105,116,104,109,111,114,101,32,99,108,111,115,
+101,108,121,32,114,101,108,97,116,101,100,115,111,99,105,97,108,32,97,110,100,32
+,112,111,108,105,116,105,99,97,108,116,104,97,116,32,119,111,117,108,100,32,111,
+116,104,101,114,119,105,115,101,112,101,114,112,101,110,100,105,99,117,108,97,
+114,32,116,111,32,116,104,101,115,116,121,108,101,32,116,121,112,101,61,34,116,
+101,120,116,47,99,115,115,116,121,112,101,61,34,115,117,98,109,105,116,34,32,110
+,97,109,101,61,34,102,97,109,105,108,105,101,115,32,114,101,115,105,100,105,110,
+103,32,105,110,100,101,118,101,108,111,112,105,110,103,32,99,111,117,110,116,114
+,105,101,115,99,111,109,112,117,116,101,114,32,112,114,111,103,114,97,109,109,
+105,110,103,101,99,111,110,111,109,105,99,32,100,101,118,101,108,111,112,109,101
+,110,116,100,101,116,101,114,109,105,110,97,116,105,111,110,32,111,102,32,116,
+104,101,102,111,114,32,109,111,114,101,32,105,110,102,111,114,109,97,116,105,111
+,110,111,110,32,115,101,118,101,114,97,108,32,111,99,99,97,115,105,111,110,115,
+112,111,114,116,117,103,117,195,170,115,32,40,69,117,114,111,112,101,117,41,208,
+163,208,186,209,128,208,176,209,151,208,189,209,129,209,140,208,186,208,176,209,
+131,208,186,209,128,208,176,209,151,208,189,209,129,209,140,208,186,208,176,208,
+160,208,190,209,129,209,129,208,184,208,185,209,129,208,186,208,190,208,185,208,
+188,208,176,209,130,208,181,209,128,208,184,208,176,208,187,208,190,208,178,208,
+184,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,208,184,209,
+131,208,191,209,128,208,176,208,178,208,187,208,181,208,189,208,184,209,143,208,
+189,208,181,208,190,208,177,209,133,208,190,208,180,208,184,208,188,208,190,208,
+184,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,209,143,208,
+152,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,209,143,208,
+160,208,181,209,129,208,191,209,131,208,177,208,187,208,184,208,186,208,184,208,
+186,208,190,208,187,208,184,209,135,208,181,209,129,209,130,208,178,208,190,208,
+184,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,209,142,209,
+130,208,181,209,128,209,128,208,184,209,130,208,190,209,128,208,184,208,184,208,
+180,208,190,209,129,209,130,208,176,209,130,208,190,209,135,208,189,208,190,216,
+167,217,132,217,133,216,170,217,136,216,167,216,172,216,175,217,136,217,134,216,
+167,217,132,216,167,216,180,216,170,216,177,216,167,217,131,216,167,216,170,216,
+167,217,132,216,167,217,130,216,170,216,177,216,167,216,173,216,167,216,170,104,
+116,109,108,59,32,99,104,97,114,115,101,116,61,85,84,70,45,56,34,32,115,101,116,
+84,105,109,101,111,117,116,40,102,117,110,99,116,105,111,110,40,41,100,105,115,
+112,108,97,121,58,105,110,108,105,110,101,45,98,108,111,99,107,59,60,105,110,112
+,117,116,32,116,121,112,101,61,34,115,117,98,109,105,116,34,32,116,121,112,101,
+32,61,32,39,116,101,120,116,47,106,97,118,97,115,99,114,105,60,105,109,103,32,
+115,114,99,61,34,104,116,116,112,58,47,47,119,119,119,46,34,32,34,104,116,116,
+112,58,47,47,119,119,119,46,119,51,46,111,114,103,47,115,104,111,114,116,99,117,
+116,32,105,99,111,110,34,32,104,114,101,102,61,34,34,32,97,117,116,111,99,111,
+109,112,108,101,116,101,61,34,111,102,102,34,32,60,47,97,62,60,47,100,105,118,62
+,60,100,105,118,32,99,108,97,115,115,61,60,47,97,62,60,47,108,105,62,10,60,108,
+105,32,99,108,97,115,115,61,34,99,115,115,34,32,116,121,112,101,61,34,116,101,
+120,116,47,99,115,115,34,32,60,102,111,114,109,32,97,99,116,105,111,110,61,34,
+104,116,116,112,58,47,47,120,116,47,99,115,115,34,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,108,105,110,107,32,114,101,108,61,34,97,108,116,101,114,110
+,97,116,101,34,32,13,10,60,115,99,114,105,112,116,32,116,121,112,101,61,34,116,
+101,120,116,47,32,111,110,99,108,105,99,107,61,34,106,97,118,97,115,99,114,105,
+112,116,58,40,110,101,119,32,68,97,116,101,41,46,103,101,116,84,105,109,101,40,
+41,125,104,101,105,103,104,116,61,34,49,34,32,119,105,100,116,104,61,34,49,34,32
+,80,101,111,112,108,101,39,115,32,82,101,112,117,98,108,105,99,32,111,102,32,32,
+60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,47,119,119,119,46,116,101,
+120,116,45,100,101,99,111,114,97,116,105,111,110,58,117,110,100,101,114,116,104,
+101,32,98,101,103,105,110,110,105,110,103,32,111,102,32,116,104,101,32,60,47,100
+,105,118,62,10,60,47,100,105,118,62,10,60,47,100,105,118,62,10,101,115,116,97,98
+,108,105,115,104,109,101,110,116,32,111,102,32,116,104,101,32,60,47,100,105,118,
+62,60,47,100,105,118,62,60,47,100,105,118,62,60,47,100,35,118,105,101,119,112,
+111,114,116,123,109,105,110,45,104,101,105,103,104,116,58,10,60,115,99,114,105,
+112,116,32,115,114,99,61,34,104,116,116,112,58,47,47,111,112,116,105,111,110,62,
+60,111,112,116,105,111,110,32,118,97,108,117,101,61,111,102,116,101,110,32,114,
+101,102,101,114,114,101,100,32,116,111,32,97,115,32,47,111,112,116,105,111,110,
+62,10,60,111,112,116,105,111,110,32,118,97,108,117,60,33,68,79,67,84,89,80,69,32
+,104,116,109,108,62,10,60,33,45,45,91,73,110,116,101,114,110,97,116,105,111,110,
+97,108,32,65,105,114,112,111,114,116,62,10,60,97,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,119,119,119,60,47,97,62,60,97,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,119,224,184,160,224,184,178,224,184,169,224,184,178,224,185
+,132,224,184,151,224,184,162,225,131,165,225,131,144,225,131,160,225,131,151,225
+,131,163,225,131,154,225,131,152,230,173,163,233,171,148,228,184,173,230,150,135
+,32,40,231,185,129,233,171,148,41,224,164,168,224,164,191,224,164,176,224,165,
+141,224,164,166,224,165,135,224,164,182,224,164,161,224,164,190,224,164,137,224,
+164,168,224,164,178,224,165,139,224,164,161,224,164,149,224,165,141,224,164,183,
+224,165,135,224,164,164,224,165,141,224,164,176,224,164,156,224,164,190,224,164,
+168,224,164,149,224,164,190,224,164,176,224,165,128,224,164,184,224,164,130,224,
+164,172,224,164,130,224,164,167,224,164,191,224,164,164,224,164,184,224,165,141,
+224,164,165,224,164,190,224,164,170,224,164,168,224,164,190,224,164,184,224,165,
+141,224,164,181,224,165,128,224,164,149,224,164,190,224,164,176,224,164,184,224,
+164,130,224,164,184,224,165,141,224,164,149,224,164,176,224,164,163,224,164,184,
+224,164,190,224,164,174,224,164,151,224,165,141,224,164,176,224,165,128,224,164,
+154,224,164,191,224,164,159,224,165,141,224,164,160,224,165,139,224,164,130,224,
+164,181,224,164,191,224,164,156,224,165,141,224,164,158,224,164,190,224,164,168,
+224,164,133,224,164,174,224,165,135,224,164,176,224,164,191,224,164,149,224,164,
+190,224,164,181,224,164,191,224,164,173,224,164,191,224,164,168,224,165,141,224,
+164,168,224,164,151,224,164,190,224,164,161,224,164,191,224,164,175,224,164,190,
+224,164,129,224,164,149,224,165,141,224,164,175,224,165,139,224,164,130,224,164,
+149,224,164,191,224,164,184,224,165,129,224,164,176,224,164,149,224,165,141,224,
+164,183,224,164,190,224,164,170,224,164,185,224,165,129,224,164,129,224,164,154,
+224,164,164,224,165,128,224,164,170,224,165,141,224,164,176,224,164,172,224,164,
+130,224,164,167,224,164,168,224,164,159,224,164,191,224,164,170,224,165,141,224,
+164,170,224,164,163,224,165,128,224,164,149,224,165,141,224,164,176,224,164,191,
+224,164,149,224,165,135,224,164,159,224,164,170,224,165,141,224,164,176,224,164,
+190,224,164,176,224,164,130,224,164,173,224,164,170,224,165,141,224,164,176,224,
+164,190,224,164,170,224,165,141,224,164,164,224,164,174,224,164,190,224,164,178,
+224,164,191,224,164,149,224,165,139,224,164,130,224,164,176,224,164,171,224,164,
+188,224,165,141,224,164,164,224,164,190,224,164,176,224,164,168,224,164,191,224,
+164,176,224,165,141,224,164,174,224,164,190,224,164,163,224,164,178,224,164,191,
+224,164,174,224,164,191,224,164,159,224,165,135,224,164,161,100,101,115,99,114,
+105,112,116,105,111,110,34,32,99,111,110,116,101,110,116,61,34,100,111,99,117,
+109,101,110,116,46,108,111,99,97,116,105,111,110,46,112,114,111,116,46,103,101,
+116,69,108,101,109,101,110,116,115,66,121,84,97,103,78,97,109,101,40,60,33,68,79
+,67,84,89,80,69,32,104,116,109,108,62,10,60,104,116,109,108,32,60,109,101,116,97
+,32,99,104,97,114,115,101,116,61,34,117,116,102,45,56,34,62,58,117,114,108,34,32
+,99,111,110,116,101,110,116,61,34,104,116,116,112,58,47,47,46,99,115,115,34,32,
+114,101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,115,116,121,108,101
+,32,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34,62,116,121,112,101,61
+,34,116,101,120,116,47,99,115,115,34,32,104,114,101,102,61,34,119,51,46,111,114,
+103,47,49,57,57,57,47,120,104,116,109,108,34,32,120,109,108,116,121,112,101,61,
+34,116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,34,32,109,101,116,104
+,111,100,61,34,103,101,116,34,32,97,99,116,105,111,110,61,34,108,105,110,107,32,
+114,101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,32,32,61,32,100,111
+,99,117,109,101,110,116,46,103,101,116,69,108,101,109,101,110,116,116,121,112,
+101,61,34,105,109,97,103,101,47,120,45,105,99,111,110,34,32,47,62,99,101,108,108
+,112,97,100,100,105,110,103,61,34,48,34,32,99,101,108,108,115,112,46,99,115,115,
+34,32,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34,32,60,47,97,62,60,
+47,108,105,62,60,108,105,62,60,97,32,104,114,101,102,61,34,34,32,119,105,100,116
+,104,61,34,49,34,32,104,101,105,103,104,116,61,34,49,34,34,62,60,97,32,104,114,
+101,102,61,34,104,116,116,112,58,47,47,119,119,119,46,115,116,121,108,101,61,34,
+100,105,115,112,108,97,121,58,110,111,110,101,59,34,62,97,108,116,101,114,110,97
+,116,101,34,32,116,121,112,101,61,34,97,112,112,108,105,45,47,47,87,51,67,47,47,
+68,84,68,32,88,72,84,77,76,32,49,46,48,32,101,108,108,115,112,97,99,105,110,103,
+61,34,48,34,32,99,101,108,108,112,97,100,32,116,121,112,101,61,34,104,105,100,
+100,101,110,34,32,118,97,108,117,101,61,34,47,97,62,38,110,98,115,112,59,60,115,
+112,97,110,32,114,111,108,101,61,34,115,10,60,105,110,112,117,116,32,116,121,112
+,101,61,34,104,105,100,100,101,110,34,32,108,97,110,103,117,97,103,101,61,34,74,
+97,118,97,83,99,114,105,112,116,34,32,32,100,111,99,117,109,101,110,116,46,103,
+101,116,69,108,101,109,101,110,116,115,66,103,61,34,48,34,32,99,101,108,108,115,
+112,97,99,105,110,103,61,34,48,34,32,121,112,101,61,34,116,101,120,116,47,99,115
+,115,34,32,109,101,100,105,97,61,34,116,121,112,101,61,39,116,101,120,116,47,106
+,97,118,97,115,99,114,105,112,116,39,119,105,116,104,32,116,104,101,32,101,120,
+99,101,112,116,105,111,110,32,111,102,32,121,112,101,61,34,116,101,120,116,47,99
+,115,115,34,32,114,101,108,61,34,115,116,32,104,101,105,103,104,116,61,34,49,34,
+32,119,105,100,116,104,61,34,49,34,32,61,39,43,101,110,99,111,100,101,85,82,73,
+67,111,109,112,111,110,101,110,116,40,60,108,105,110,107,32,114,101,108,61,34,97
+,108,116,101,114,110,97,116,101,34,32,10,98,111,100,121,44,32,116,114,44,32,105,
+110,112,117,116,44,32,116,101,120,116,109,101,116,97,32,110,97,109,101,61,34,114
+,111,98,111,116,115,34,32,99,111,110,109,101,116,104,111,100,61,34,112,111,115,
+116,34,32,97,99,116,105,111,110,61,34,62,10,60,97,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,119,119,119,46,99,115,115,34,32,114,101,108,61,34,115,116,
+121,108,101,115,104,101,101,116,34,32,60,47,100,105,118,62,60,47,100,105,118,62,
+60,100,105,118,32,99,108,97,115,115,108,97,110,103,117,97,103,101,61,34,106,97,
+118,97,115,99,114,105,112,116,34,62,97,114,105,97,45,104,105,100,100,101,110,61,
+34,116,114,117,101,34,62,194,183,60,114,105,112,116,34,32,116,121,112,101,61,34,
+116,101,120,116,47,106,97,118,97,115,108,61,48,59,125,41,40,41,59,10,40,102,117,
+110,99,116,105,111,110,40,41,123,98,97,99,107,103,114,111,117,110,100,45,105,109
+,97,103,101,58,32,117,114,108,40,47,97,62,60,47,108,105,62,60,108,105,62,60,97,
+32,104,114,101,102,61,34,104,9,9,60,108,105,62,60,97,32,104,114,101,102,61,34,
+104,116,116,112,58,47,47,97,116,111,114,34,32,97,114,105,97,45,104,105,100,100,
+101,110,61,34,116,114,117,62,32,60,97,32,104,114,101,102,61,34,104,116,116,112,
+58,47,47,119,119,119,46,108,97,110,103,117,97,103,101,61,34,106,97,118,97,115,99
+,114,105,112,116,34,32,47,111,112,116,105,111,110,62,10,60,111,112,116,105,111,
+110,32,118,97,108,117,101,47,100,105,118,62,60,47,100,105,118,62,60,100,105,118,
+32,99,108,97,115,115,61,114,97,116,111,114,34,32,97,114,105,97,45,104,105,100,
+100,101,110,61,34,116,114,101,61,40,110,101,119,32,68,97,116,101,41,46,103,101,
+116,84,105,109,101,40,41,112,111,114,116,117,103,117,195,170,115,32,40,100,111,
+32,66,114,97,115,105,108,41,208,190,209,128,208,179,208,176,208,189,208,184,208,
+183,208,176,209,134,208,184,208,184,208,178,208,190,208,183,208,188,208,190,208,
+182,208,189,208,190,209,129,209,130,209,140,208,190,208,177,209,128,208,176,208,
+183,208,190,208,178,208,176,208,189,208,184,209,143,209,128,208,181,208,179,208,
+184,209,129,209,130,209,128,208,176,209,134,208,184,208,184,208,178,208,190,208,
+183,208,188,208,190,208,182,208,189,208,190,209,129,209,130,208,184,208,190,208,
+177,209,143,208,183,208,176,209,130,208,181,208,187,209,140,208,189,208,176,60,
+33,68,79,67,84,89,80,69,32,104,116,109,108,32,80,85,66,76,73,67,32,34,110,116,45
+,84,121,112,101,34,32,99,111,110,116,101,110,116,61,34,116,101,120,116,47,60,109
+,101,116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,67,111,110,116,101,
+114,97,110,115,105,116,105,111,110,97,108,47,47,69,78,34,32,34,104,116,116,112,
+58,60,104,116,109,108,32,120,109,108,110,115,61,34,104,116,116,112,58,47,47,119,
+119,119,45,47,47,87,51,67,47,47,68,84,68,32,88,72,84,77,76,32,49,46,48,32,84,68,
+84,68,47,120,104,116,109,108,49,45,116,114,97,110,115,105,116,105,111,110,97,108
+,47,47,119,119,119,46,119,51,46,111,114,103,47,84,82,47,120,104,116,109,108,49,
+47,112,101,32,61,32,39,116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,
+39,59,60,109,101,116,97,32,110,97,109,101,61,34,100,101,115,99,114,105,112,116,
+105,111,110,112,97,114,101,110,116,78,111,100,101,46,105,110,115,101,114,116,66,
+101,102,111,114,101,60,105,110,112,117,116,32,116,121,112,101,61,34,104,105,100,
+100,101,110,34,32,110,97,106,115,34,32,116,121,112,101,61,34,116,101,120,116,47,
+106,97,118,97,115,99,114,105,40,100,111,99,117,109,101,110,116,41,46,114,101,97,
+100,121,40,102,117,110,99,116,105,115,99,114,105,112,116,32,116,121,112,101,61,
+34,116,101,120,116,47,106,97,118,97,115,105,109,97,103,101,34,32,99,111,110,116,
+101,110,116,61,34,104,116,116,112,58,47,47,85,65,45,67,111,109,112,97,116,105,98
+,108,101,34,32,99,111,110,116,101,110,116,61,116,109,108,59,32,99,104,97,114,115
+,101,116,61,117,116,102,45,56,34,32,47,62,10,108,105,110,107,32,114,101,108,61,
+34,115,104,111,114,116,99,117,116,32,105,99,111,110,60,108,105,110,107,32,114,
+101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,32,60,47,115,99,114,105
+,112,116,62,10,60,115,99,114,105,112,116,32,116,121,112,101,61,61,32,100,111,99,
+117,109,101,110,116,46,99,114,101,97,116,101,69,108,101,109,101,110,60,97,32,116
+,97,114,103,101,116,61,34,95,98,108,97,110,107,34,32,104,114,101,102,61,32,100,
+111,99,117,109,101,110,116,46,103,101,116,69,108,101,109,101,110,116,115,66,105,
+110,112,117,116,32,116,121,112,101,61,34,116,101,120,116,34,32,110,97,109,101,61
+,97,46,116,121,112,101,32,61,32,39,116,101,120,116,47,106,97,118,97,115,99,114,
+105,110,112,117,116,32,116,121,112,101,61,34,104,105,100,100,101,110,34,32,110,
+97,109,101,104,116,109,108,59,32,99,104,97,114,115,101,116,61,117,116,102,45,56,
+34,32,47,62,100,116,100,34,62,10,60,104,116,109,108,32,120,109,108,110,115,61,34
+,104,116,116,112,45,47,47,87,51,67,47,47,68,84,68,32,72,84,77,76,32,52,46,48,49,
+32,84,101,110,116,115,66,121,84,97,103,78,97,109,101,40,39,115,99,114,105,112,
+116,39,41,105,110,112,117,116,32,116,121,112,101,61,34,104,105,100,100,101,110,
+34,32,110,97,109,60,115,99,114,105,112,116,32,116,121,112,101,61,34,116,101,120,
+116,47,106,97,118,97,115,34,32,115,116,121,108,101,61,34,100,105,115,112,108,97,
+121,58,110,111,110,101,59,34,62,100,111,99,117,109,101,110,116,46,103,101,116,69
+,108,101,109,101,110,116,66,121,73,100,40,61,100,111,99,117,109,101,110,116,46,
+99,114,101,97,116,101,69,108,101,109,101,110,116,40,39,32,116,121,112,101,61,39,
+116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,39,105,110,112,117,116,
+32,116,121,112,101,61,34,116,101,120,116,34,32,110,97,109,101,61,34,100,46,103,
+101,116,69,108,101,109,101,110,116,115,66,121,84,97,103,78,97,109,101,40,115,110
+,105,99,97,108,34,32,104,114,101,102,61,34,104,116,116,112,58,47,47,119,119,119,
+46,67,47,47,68,84,68,32,72,84,77,76,32,52,46,48,49,32,84,114,97,110,115,105,116,
+60,115,116,121,108,101,32,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34
+,62,10,10,60,115,116,121,108,101,32,116,121,112,101,61,34,116,101,120,116,47,99,
+115,115,34,62,105,111,110,97,108,46,100,116,100,34,62,10,60,104,116,109,108,32,
+120,109,108,110,115,61,104,116,116,112,45,101,113,117,105,118,61,34,67,111,110,
+116,101,110,116,45,84,121,112,101,100,105,110,103,61,34,48,34,32,99,101,108,108,
+115,112,97,99,105,110,103,61,34,48,34,104,116,109,108,59,32,99,104,97,114,115,
+101,116,61,117,116,102,45,56,34,32,47,62,10,32,115,116,121,108,101,61,34,100,105
+,115,112,108,97,121,58,110,111,110,101,59,34,62,60,60,108,105,62,60,97,32,104,
+114,101,102,61,34,104,116,116,112,58,47,47,119,119,119,46,32,116,121,112,101,61,
+39,116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,39,62,208,180,208,181
+,209,143,209,130,208,181,208,187,209,140,208,189,208,190,209,129,209,130,208,184
+,209,129,208,190,208,190,209,130,208,178,208,181,209,130,209,129,209,130,208,178
+,208,184,208,184,208,191,209,128,208,190,208,184,208,183,208,178,208,190,208,180
+,209,129,209,130,208,178,208,176,208,177,208,181,208,183,208,190,208,191,208,176
+,209,129,208,189,208,190,209,129,209,130,208,184,224,164,170,224,165,129,224,164
+,184,224,165,141,224,164,164,224,164,191,224,164,149,224,164,190,224,164,149,224
+,164,190,224,164,130,224,164,151,224,165,141,224,164,176,224,165,135,224,164,184
+,224,164,137,224,164,168,224,165,141,224,164,185,224,165,139,224,164,130,224,164
+,168,224,165,135,224,164,181,224,164,191,224,164,167,224,164,190,224,164,168,224
+,164,184,224,164,173,224,164,190,224,164,171,224,164,191,224,164,149,224,165,141
+,224,164,184,224,164,191,224,164,130,224,164,151,224,164,184,224,165,129,224,164
+,176,224,164,149,224,165,141,224,164,183,224,164,191,224,164,164,224,164,149,224
+,165,137,224,164,170,224,165,128,224,164,176,224,164,190,224,164,135,224,164,159
+,224,164,181,224,164,191,224,164,156,224,165,141,224,164,158,224,164,190,224,164
+,170,224,164,168,224,164,149,224,164,190,224,164,176,224,165,141,224,164,176,224
+,164,181,224,164,190,224,164,136,224,164,184,224,164,149,224,165,141,224,164,176
+,224,164,191,224,164,175,224,164,164,224,164,190
+}
+;
+#endif  /* !BROTLI_EXTERNAL_DICTIONARY_DATA */
+
+static BrotliDictionary kBrotliDictionary = {
+  /* size_bits_by_length */
+  {
+    0, 0, 0, 0, 10, 10, 11, 11,
+    10, 10, 10, 10, 10, 9, 9, 8,
+    7, 7, 8, 7, 7, 6, 6, 5,
+    5, 0, 0, 0, 0, 0, 0, 0
+  },
+
+  /* offsets_by_length */
+  {
+    0, 0, 0, 0, 0, 4096, 9216, 21504,
+    35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
+    104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280,
+    122016, 122784, 122784, 122784, 122784, 122784, 122784, 122784
+  },
+
+  /* data_size ==  sizeof(kBrotliDictionaryData) */
+  122784,
+
+  /* data */
+#if defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
+  NULL
+#else
+  kBrotliDictionaryData
+#endif
+};
+
+const BrotliDictionary* BrotliGetDictionary() {
+  return &kBrotliDictionary;
+}
+
+void BrotliSetDictionaryData(const uint8_t* data) {
+  if (!!data && !kBrotliDictionary.data) {
+    kBrotliDictionary.data = data;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.h
new file mode 100644
index 0000000000..b1c6f7f580
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/dictionary.h
@@ -0,0 +1,64 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Collection of static dictionary words. */
+
+#ifndef BROTLI_COMMON_DICTIONARY_H_
+#define BROTLI_COMMON_DICTIONARY_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BrotliDictionary {
+  /**
+   * Number of bits to encode index of dictionary word in a bucket.
+   *
+   * Specification: Appendix A. Static Dictionary Data
+   *
+   * Words in a dictionary are bucketed by length.
+   * @c 0 means that there are no words of a given length.
+   * Dictionary consists of words with length of [4..24] bytes.
+   * Values at [0..3] and [25..31] indices should not be addressed.
+   */
+  uint8_t size_bits_by_length[32];
+
+  /* assert(offset[i + 1] == offset[i] + (bits[i] ? (i << bits[i]) : 0)) */
+  uint32_t offsets_by_length[32];
+
+  /* assert(data_size == offsets_by_length[31]) */
+  size_t data_size;
+
+  /* Data array is not bound, and should obey to size_bits_by_length values.
+     Specified size matches default (RFC 7932) dictionary. Its size is
+     defined by data_size */
+  const uint8_t* data;
+} BrotliDictionary;
+
+BROTLI_COMMON_API const BrotliDictionary* BrotliGetDictionary(void);
+
+/**
+ * Sets dictionary data.
+ *
+ * When dictionary data is already set / present, this method is no-op.
+ *
+ * Dictionary data MUST be provided before BrotliGetDictionary is invoked.
+ * This method is used ONLY in multi-client environment (e.g. C + Java),
+ * to reduce storage by sharing single dictionary between implementations.
+ */
+BROTLI_COMMON_API void BrotliSetDictionaryData(const uint8_t* data);
+
+#define BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
+#define BROTLI_MAX_DICTIONARY_WORD_LENGTH 24
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_COMMON_DICTIONARY_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/platform.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/platform.h
new file mode 100755
index 0000000000..2633e04052
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/platform.h
@@ -0,0 +1,567 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for compiler / platform specific features and build options.
+
+   Build options are:
+    * BROTLI_BUILD_32_BIT disables 64-bit optimizations
+    * BROTLI_BUILD_64_BIT forces to use 64-bit optimizations
+    * BROTLI_BUILD_BIG_ENDIAN forces to use big-endian optimizations
+    * BROTLI_BUILD_ENDIAN_NEUTRAL disables endian-aware optimizations
+    * BROTLI_BUILD_LITTLE_ENDIAN forces to use little-endian optimizations
+    * BROTLI_BUILD_PORTABLE disables dangerous optimizations, like unaligned
+      read and overlapping memcpy; this reduces decompression speed by 5%
+    * BROTLI_BUILD_NO_RBIT disables "rbit" optimization for ARM CPUs
+    * BROTLI_DEBUG dumps file name and line number when decoder detects stream
+      or memory error
+    * BROTLI_ENABLE_LOG enables asserts and dumps various state information
+*/
+
+#ifndef BROTLI_COMMON_PLATFORM_H_
+#define BROTLI_COMMON_PLATFORM_H_
+
+#include <string.h>  /* memcpy */
+#include <stdlib.h>  /* malloc, free */
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(OS_LINUX) || defined(OS_CYGWIN) || defined(__EMSCRIPTEN__)
+#include <endian.h>
+#elif defined(OS_FREEBSD)
+#include <machine/endian.h>
+#elif defined(OS_MACOSX)
+#include <machine/endian.h>
+/* Let's try and follow the Linux convention */
+#define BROTLI_X_BYTE_ORDER BYTE_ORDER
+#define BROTLI_X_LITTLE_ENDIAN LITTLE_ENDIAN
+#define BROTLI_X_BIG_ENDIAN BIG_ENDIAN
+#endif
+
+#if defined(BROTLI_ENABLE_LOG) || defined(BROTLI_DEBUG)
+#include <assert.h>
+#include <stdio.h>
+#endif
+
+/* The following macros were borrowed from https://github.com/nemequ/hedley
+ * with permission of original author - Evan Nemerson <evan@nemerson.com> */
+
+/* >>> >>> >>> hedley macros */
+
+/* Define "BROTLI_PREDICT_TRUE" and "BROTLI_PREDICT_FALSE" macros for capable
+   compilers.
+
+To apply compiler hint, enclose the branching condition into macros, like this:
+
+  if (BROTLI_PREDICT_TRUE(zero == 0)) {
+    // main execution path
+  } else {
+    // compiler should place this code outside of main execution path
+  }
+
+OR:
+
+  if (BROTLI_PREDICT_FALSE(something_rare_or_unexpected_happens)) {
+    // compiler should place this code outside of main execution path
+  }
+
+*/
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_expect, 3, 0, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||               \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 15, 0) ||              \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                  \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                 \
+    BROTLI_TI_VERSION_CHECK(7, 3, 0) ||                   \
+    BROTLI_TINYC_VERSION_CHECK(0, 9, 27)
+#define BROTLI_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#define BROTLI_PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#else
+#define BROTLI_PREDICT_FALSE(x) (x)
+#define BROTLI_PREDICT_TRUE(x) (x)
+#endif
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+    !defined(__cplusplus)
+#define BROTLI_RESTRICT restrict
+#elif BROTLI_GNUC_VERSION_CHECK(3, 1, 0) ||                         \
+    BROTLI_MSVC_VERSION_CHECK(14, 0, 0) ||                          \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                         \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                            \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                           \
+    BROTLI_PGI_VERSION_CHECK(17, 10, 0) ||                          \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                             \
+    BROTLI_IAR_VERSION_CHECK(8, 0, 0) ||                            \
+    (BROTLI_SUNPRO_VERSION_CHECK(5, 14, 0) && defined(__cplusplus))
+#define BROTLI_RESTRICT __restrict
+#elif BROTLI_SUNPRO_VERSION_CHECK(5, 3, 0) && !defined(__cplusplus)
+#define BROTLI_RESTRICT _Restrict
+#else
+#define BROTLI_RESTRICT
+#endif
+
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
+    (defined(__cplusplus) && (__cplusplus >= 199711L))
+#define BROTLI_MAYBE_INLINE inline
+#elif defined(__GNUC_STDC_INLINE__) || defined(__GNUC_GNU_INLINE__) || \
+    BROTLI_ARM_VERSION_CHECK(6, 2, 0)
+#define BROTLI_MAYBE_INLINE __inline__
+#elif BROTLI_MSVC_VERSION_CHECK(12, 0, 0) || \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) || BROTLI_TI_VERSION_CHECK(8, 0, 0)
+#define BROTLI_MAYBE_INLINE __inline
+#else
+#define BROTLI_MAYBE_INLINE
+#endif
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(always_inline, 4, 0, 0) ||                       \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                                    \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                                   \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                                       \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                                      \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__))
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE __attribute__((__always_inline__))
+#elif BROTLI_MSVC_VERSION_CHECK(12, 0, 0)
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE __forceinline
+#elif BROTLI_TI_VERSION_CHECK(7, 0, 0) && defined(__cplusplus)
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE _Pragma("FUNC_ALWAYS_INLINE;")
+#elif BROTLI_IAR_VERSION_CHECK(8, 0, 0)
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE _Pragma("inline=forced")
+#else
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE
+#endif
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(noinline, 4, 0, 0) ||                            \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                                    \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                                   \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                                       \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                                      \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__))
+#define BROTLI_NOINLINE __attribute__((__noinline__))
+#elif BROTLI_MSVC_VERSION_CHECK(13, 10, 0)
+#define BROTLI_NOINLINE __declspec(noinline)
+#elif BROTLI_PGI_VERSION_CHECK(10, 2, 0)
+#define BROTLI_NOINLINE _Pragma("noinline")
+#elif BROTLI_TI_VERSION_CHECK(6, 0, 0) && defined(__cplusplus)
+#define BROTLI_NOINLINE _Pragma("FUNC_CANNOT_INLINE;")
+#elif BROTLI_IAR_VERSION_CHECK(8, 0, 0)
+#define BROTLI_NOINLINE _Pragma("inline=never")
+#else
+#define BROTLI_NOINLINE
+#endif
+
+/* BROTLI_INTERNAL could be defined to override visibility, e.g. for tests. */
+#if !defined(BROTLI_INTERNAL)
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define BROTLI_INTERNAL
+#elif BROTLI_GNUC_VERSION_CHECK(3, 3, 0) ||                         \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                             \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                         \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                            \
+    BROTLI_IBM_VERSION_CHECK(13, 1, 0) ||                           \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) &&                            \
+     defined(__TI_GNU_ATTRIBUTE_SUPPORT__) && defined(__TI_EABI__))
+#define BROTLI_INTERNAL __attribute__ ((visibility ("hidden")))
+#else
+#define BROTLI_INTERNAL
+#endif
+#endif
+
+/* <<< <<< <<< end of hedley macros. */
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(unused, 2, 7, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+#define BROTLI_UNUSED_FUNCTION static BROTLI_INLINE __attribute__ ((unused))
+#else
+#define BROTLI_UNUSED_FUNCTION static BROTLI_INLINE
+#endif
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)
+#define BROTLI_ALIGNED(N) __attribute__((aligned(N)))
+#else
+#define BROTLI_ALIGNED(N)
+#endif
+
+#if (defined(__ARM_ARCH) && (__ARM_ARCH == 7)) || \
+    (defined(M_ARM) && (M_ARM == 7))
+#define BROTLI_TARGET_ARMV7
+#endif  /* ARMv7 */
+
+#if (defined(__ARM_ARCH) && (__ARM_ARCH == 8)) || \
+    defined(__aarch64__) || defined(__ARM64_ARCH_8__)
+#define BROTLI_TARGET_ARMV8_ANY
+
+#if defined(__ARM_32BIT_STATE)
+#define BROTLI_TARGET_ARMV8_32
+#elif defined(__ARM_64BIT_STATE)
+#define BROTLI_TARGET_ARMV8_64
+#endif
+
+#endif  /* ARMv8 */
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define BROTLI_TARGET_NEON
+#endif
+
+#if defined(__i386) || defined(_M_IX86)
+#define BROTLI_TARGET_X86
+#endif
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define BROTLI_TARGET_X64
+#endif
+
+#if defined(__PPC64__)
+#define BROTLI_TARGET_POWERPC64
+#endif
+
+#if defined(__riscv) && defined(__riscv_xlen) && __riscv_xlen == 64
+#define BROTLI_TARGET_RISCV64
+#endif
+
+#if defined(BROTLI_BUILD_64_BIT)
+#define BROTLI_64_BITS 1
+#elif defined(BROTLI_BUILD_32_BIT)
+#define BROTLI_64_BITS 0
+#elif defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8_64) || \
+    defined(BROTLI_TARGET_POWERPC64) || defined(BROTLI_TARGET_RISCV64)
+#define BROTLI_64_BITS 1
+#else
+#define BROTLI_64_BITS 0
+#endif
+
+#if (BROTLI_64_BITS)
+#define brotli_reg_t uint64_t
+#else
+#define brotli_reg_t uint32_t
+#endif
+
+#if defined(BROTLI_BUILD_BIG_ENDIAN)
+#define BROTLI_BIG_ENDIAN 1
+#elif defined(BROTLI_BUILD_LITTLE_ENDIAN)
+#define BROTLI_LITTLE_ENDIAN 1
+#elif defined(BROTLI_BUILD_ENDIAN_NEUTRAL)
+/* Just break elif chain. */
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define BROTLI_LITTLE_ENDIAN 1
+#elif defined(_WIN32) || defined(BROTLI_TARGET_X64)
+/* Win32 & x64 can currently always be assumed to be little endian */
+#define BROTLI_LITTLE_ENDIAN 1
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define BROTLI_BIG_ENDIAN 1
+#elif defined(BROTLI_X_BYTE_ORDER)
+#if BROTLI_X_BYTE_ORDER == BROTLI_X_LITTLE_ENDIAN
+#define BROTLI_LITTLE_ENDIAN 1
+#elif BROTLI_X_BYTE_ORDER == BROTLI_X_BIG_ENDIAN
+#define BROTLI_BIG_ENDIAN 1
+#endif
+#endif  /* BROTLI_X_BYTE_ORDER */
+
+#if !defined(BROTLI_LITTLE_ENDIAN)
+#define BROTLI_LITTLE_ENDIAN 0
+#endif
+
+#if !defined(BROTLI_BIG_ENDIAN)
+#define BROTLI_BIG_ENDIAN 0
+#endif
+
+#if defined(BROTLI_X_BYTE_ORDER)
+#undef BROTLI_X_BYTE_ORDER
+#undef BROTLI_X_LITTLE_ENDIAN
+#undef BROTLI_X_BIG_ENDIAN
+#endif
+
+#if defined(BROTLI_BUILD_PORTABLE)
+#define BROTLI_ALIGNED_READ (!!1)
+#elif defined(BROTLI_TARGET_X86) || defined(BROTLI_TARGET_X64) || \
+    defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY) || \
+    defined(BROTLI_TARGET_RISCV64)
+/* Allow unaligned read only for white-listed CPUs. */
+#define BROTLI_ALIGNED_READ (!!0)
+#else
+#define BROTLI_ALIGNED_READ (!!1)
+#endif
+
+#if BROTLI_ALIGNED_READ
+/* Portable unaligned memory access: read / write values via memcpy. */
+static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) {
+  uint16_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+static BROTLI_INLINE uint32_t BrotliUnalignedRead32(const void* p) {
+  uint32_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
+  memcpy(p, &v, sizeof v);
+}
+#else  /* BROTLI_ALIGNED_READ */
+/* Unaligned memory access is allowed: just cast pointer to requested type. */
+#if BROTLI_SANITIZED
+/* Consider we have an unaligned load/store of 4 bytes from address 0x...05.
+   AddressSanitizer will treat it as a 3-byte access to the range 05:07 and
+   will miss a bug if 08 is the first unaddressable byte.
+   ThreadSanitizer will also treat this as a 3-byte access to 05:07 and will
+   miss a race between this access and some other accesses to 08.
+   MemorySanitizer will correctly propagate the shadow on unaligned stores
+   and correctly report bugs on unaligned loads, but it may not properly
+   update and report the origin of the uninitialized memory.
+   For all three tools, replacing an unaligned access with a tool-specific
+   callback solves the problem. */
+#if defined(__cplusplus)
+extern "C" {
+#endif  /* __cplusplus */
+  uint16_t __sanitizer_unaligned_load16(const void* p);
+  uint32_t __sanitizer_unaligned_load32(const void* p);
+  uint64_t __sanitizer_unaligned_load64(const void* p);
+  void __sanitizer_unaligned_store64(void* p, uint64_t v);
+#if defined(__cplusplus)
+}  /* extern "C" */
+#endif  /* __cplusplus */
+#define BrotliUnalignedRead16 __sanitizer_unaligned_load16
+#define BrotliUnalignedRead32 __sanitizer_unaligned_load32
+#define BrotliUnalignedRead64 __sanitizer_unaligned_load64
+#define BrotliUnalignedWrite64 __sanitizer_unaligned_store64
+#else  /* BROTLI_SANITIZED */
+static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) {
+  return *(const uint16_t*)p;
+}
+static BROTLI_INLINE uint32_t BrotliUnalignedRead32(const void* p) {
+  return *(const uint32_t*)p;
+}
+#if (BROTLI_64_BITS)
+static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
+  return *(const uint64_t*)p;
+}
+static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
+  *(uint64_t*)p = v;
+}
+#else  /* BROTLI_64_BITS */
+/* Avoid emitting LDRD / STRD, which require properly aligned address. */
+/* If __attribute__(aligned) is available, use that. Otherwise, memcpy. */
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)
+typedef BROTLI_ALIGNED(1) uint64_t brotli_unaligned_uint64_t;
+
+static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
+  return (uint64_t) ((brotli_unaligned_uint64_t*) p)[0];
+}
+static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
+  brotli_unaligned_uint64_t* dwords = (brotli_unaligned_uint64_t*) p;
+  dwords[0] = (brotli_unaligned_uint64_t) v;
+}
+#else /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */
+static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
+  uint64_t v;
+  memcpy(&v, p, sizeof(uint64_t));
+  return v;
+}
+
+static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
+  memcpy(p, &v, sizeof(uint64_t));
+}
+#endif  /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */
+#endif  /* BROTLI_64_BITS */
+#endif  /* BROTLI_SANITIZED */
+#endif  /* BROTLI_ALIGNED_READ */
+
+#if BROTLI_LITTLE_ENDIAN
+/* Straight endianness. Just read / write values. */
+#define BROTLI_UNALIGNED_LOAD16LE BrotliUnalignedRead16
+#define BROTLI_UNALIGNED_LOAD32LE BrotliUnalignedRead32
+#define BROTLI_UNALIGNED_LOAD64LE BrotliUnalignedRead64
+#define BROTLI_UNALIGNED_STORE64LE BrotliUnalignedWrite64
+#elif BROTLI_BIG_ENDIAN  /* BROTLI_LITTLE_ENDIAN */
+/* Explain compiler to byte-swap values. */
+#define BROTLI_BSWAP16_(V) ((uint16_t)( \
+  (((V) & 0xFFU) << 8) | \
+  (((V) >> 8) & 0xFFU)))
+static BROTLI_INLINE uint16_t BROTLI_UNALIGNED_LOAD16LE(const void* p) {
+  uint16_t value = BrotliUnalignedRead16(p);
+  return BROTLI_BSWAP16_(value);
+}
+#define BROTLI_BSWAP32_(V) ( \
+  (((V) & 0xFFU) << 24) | (((V) & 0xFF00U) << 8) | \
+  (((V) >> 8) & 0xFF00U) | (((V) >> 24) & 0xFFU))
+static BROTLI_INLINE uint32_t BROTLI_UNALIGNED_LOAD32LE(const void* p) {
+  uint32_t value = BrotliUnalignedRead32(p);
+  return BROTLI_BSWAP32_(value);
+}
+#define BROTLI_BSWAP64_(V) ( \
+  (((V) & 0xFFU) << 56) | (((V) & 0xFF00U) << 40) | \
+  (((V) & 0xFF0000U) << 24) | (((V) & 0xFF000000U) << 8) | \
+  (((V) >> 8) & 0xFF000000U) | (((V) >> 24) & 0xFF0000U) | \
+  (((V) >> 40) & 0xFF00U) | (((V) >> 56) & 0xFFU))
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64LE(const void* p) {
+  uint64_t value = BrotliUnalignedRead64(p);
+  return BROTLI_BSWAP64_(value);
+}
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) {
+  uint64_t value = BROTLI_BSWAP64_(v);
+  BrotliUnalignedWrite64(p, value);
+}
+#else  /* BROTLI_LITTLE_ENDIAN */
+/* Read / store values byte-wise; hopefully compiler will understand. */
+static BROTLI_INLINE uint16_t BROTLI_UNALIGNED_LOAD16LE(const void* p) {
+  const uint8_t* in = (const uint8_t*)p;
+  return (uint16_t)(in[0] | (in[1] << 8));
+}
+static BROTLI_INLINE uint32_t BROTLI_UNALIGNED_LOAD32LE(const void* p) {
+  const uint8_t* in = (const uint8_t*)p;
+  uint32_t value = (uint32_t)(in[0]);
+  value |= (uint32_t)(in[1]) << 8;
+  value |= (uint32_t)(in[2]) << 16;
+  value |= (uint32_t)(in[3]) << 24;
+  return value;
+}
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64LE(const void* p) {
+  const uint8_t* in = (const uint8_t*)p;
+  uint64_t value = (uint64_t)(in[0]);
+  value |= (uint64_t)(in[1]) << 8;
+  value |= (uint64_t)(in[2]) << 16;
+  value |= (uint64_t)(in[3]) << 24;
+  value |= (uint64_t)(in[4]) << 32;
+  value |= (uint64_t)(in[5]) << 40;
+  value |= (uint64_t)(in[6]) << 48;
+  value |= (uint64_t)(in[7]) << 56;
+  return value;
+}
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) {
+  uint8_t* out = (uint8_t*)p;
+  out[0] = (uint8_t)v;
+  out[1] = (uint8_t)(v >> 8);
+  out[2] = (uint8_t)(v >> 16);
+  out[3] = (uint8_t)(v >> 24);
+  out[4] = (uint8_t)(v >> 32);
+  out[5] = (uint8_t)(v >> 40);
+  out[6] = (uint8_t)(v >> 48);
+  out[7] = (uint8_t)(v >> 56);
+}
+#endif  /* BROTLI_LITTLE_ENDIAN */
+
+/* BROTLI_IS_CONSTANT macros returns true for compile-time constants. */
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_constant_p, 3, 0, 1) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+#define BROTLI_IS_CONSTANT(x) (!!__builtin_constant_p(x))
+#else
+#define BROTLI_IS_CONSTANT(x) (!!0)
+#endif
+
+#if defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY)
+#define BROTLI_HAS_UBFX (!!1)
+#else
+#define BROTLI_HAS_UBFX (!!0)
+#endif
+
+#if defined(BROTLI_ENABLE_LOG)
+#define BROTLI_LOG(x) printf x
+#else
+#define BROTLI_LOG(x)
+#endif
+
+#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
+#define BROTLI_DCHECK(x) assert(x)
+static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) {
+  fprintf(stderr, "%s:%d (%s)\n", f, l, fn);
+  fflush(stderr);
+}
+#define BROTLI_DUMP() BrotliDump(__FILE__, __LINE__, __FUNCTION__)
+#else
+#define BROTLI_DCHECK(x)
+#define BROTLI_DUMP() (void)(0)
+#endif
+
+/* TODO: add appropriate icc/sunpro/arm/ibm/ti checks. */
+#if (BROTLI_GNUC_VERSION_CHECK(3, 0, 0) || defined(__llvm__)) && \
+    !defined(BROTLI_BUILD_NO_RBIT)
+#if defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY)
+/* TODO: detect ARMv6T2 and enable this code for it. */
+static BROTLI_INLINE brotli_reg_t BrotliRBit(brotli_reg_t input) {
+  brotli_reg_t output;
+  __asm__("rbit %0, %1\n" : "=r"(output) : "r"(input));
+  return output;
+}
+#define BROTLI_RBIT(x) BrotliRBit(x)
+#endif  /* armv7 / armv8 */
+#endif  /* gcc || clang */
+#if !defined(BROTLI_RBIT)
+static BROTLI_INLINE void BrotliRBit(void) { /* Should break build if used. */ }
+#endif  /* BROTLI_RBIT */
+
+#define BROTLI_REPEAT(N, X) {     \
+  if ((N & 1) != 0) {X;}          \
+  if ((N & 2) != 0) {X; X;}       \
+  if ((N & 4) != 0) {X; X; X; X;} \
+}
+
+#define BROTLI_UNUSED(X) (void)(X)
+
+#define BROTLI_MIN_MAX(T)                                                      \
+  static BROTLI_INLINE T brotli_min_ ## T (T a, T b) { return a < b ? a : b; } \
+  static BROTLI_INLINE T brotli_max_ ## T (T a, T b) { return a > b ? a : b; }
+BROTLI_MIN_MAX(double) BROTLI_MIN_MAX(float) BROTLI_MIN_MAX(int)
+BROTLI_MIN_MAX(size_t) BROTLI_MIN_MAX(uint32_t) BROTLI_MIN_MAX(uint8_t)
+#undef BROTLI_MIN_MAX
+#define BROTLI_MIN(T, A, B) (brotli_min_ ## T((A), (B)))
+#define BROTLI_MAX(T, A, B) (brotli_max_ ## T((A), (B)))
+
+#define BROTLI_SWAP(T, A, I, J) { \
+  T __brotli_swap_tmp = (A)[(I)]; \
+  (A)[(I)] = (A)[(J)];            \
+  (A)[(J)] = __brotli_swap_tmp;   \
+}
+
+/* Default brotli_alloc_func */
+static void* BrotliDefaultAllocFunc(void* opaque, size_t size) {
+  BROTLI_UNUSED(opaque);
+  return malloc(size);
+}
+
+/* Default brotli_free_func */
+static void BrotliDefaultFreeFunc(void* opaque, void* address) {
+  BROTLI_UNUSED(opaque);
+  free(address);
+}
+
+BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
+  BROTLI_UNUSED(&BrotliSuppressUnusedFunctions);
+  BROTLI_UNUSED(&BrotliUnalignedRead16);
+  BROTLI_UNUSED(&BrotliUnalignedRead32);
+  BROTLI_UNUSED(&BrotliUnalignedRead64);
+  BROTLI_UNUSED(&BrotliUnalignedWrite64);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD16LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD32LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD64LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE64LE);
+  BROTLI_UNUSED(&BrotliRBit);
+  BROTLI_UNUSED(&brotli_min_double);
+  BROTLI_UNUSED(&brotli_max_double);
+  BROTLI_UNUSED(&brotli_min_float);
+  BROTLI_UNUSED(&brotli_max_float);
+  BROTLI_UNUSED(&brotli_min_int);
+  BROTLI_UNUSED(&brotli_max_int);
+  BROTLI_UNUSED(&brotli_min_size_t);
+  BROTLI_UNUSED(&brotli_max_size_t);
+  BROTLI_UNUSED(&brotli_min_uint32_t);
+  BROTLI_UNUSED(&brotli_max_uint32_t);
+  BROTLI_UNUSED(&brotli_min_uint8_t);
+  BROTLI_UNUSED(&brotli_max_uint8_t);
+  BROTLI_UNUSED(&BrotliDefaultAllocFunc);
+  BROTLI_UNUSED(&BrotliDefaultFreeFunc);
+#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
+  BROTLI_UNUSED(&BrotliDump);
+#endif
+}
+
+#endif  /* BROTLI_COMMON_PLATFORM_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c
new file mode 100755
index 0000000000..c44f671509
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.c
@@ -0,0 +1,291 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./transform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* RFC 7932 transforms string data */
+static const char kPrefixSuffix[217] =
+      "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
+/* 0x  _0 _2  __5        _E    _3  _6 _8     _E */
+      "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
+/* 2x     _3_ _5    _A_  _D_ _F  _2 _4     _A   _E */
+      "that \1\'\6 with \6 from \4 by \1(\6. T"
+/* 4x       _5_ _7      _E      _5    _A _C */
+      "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
+/* 6x     _3    _8    _D    _2    _7_ _ _A _C */
+      "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
+/* 8x  _0 _ _3    _8   _C _E _ _1     _7       _F */
+      " not \3er \3al \4ful \4ive \5less \4es"
+/* Ax       _5   _9   _D    _2    _7     _D */
+      "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */
+/* Cx    _2    _7___ ___ _A    _F     _5        _8 */
+
+static const uint16_t kPrefixSuffixMap[50] = {
+  0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
+  0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
+  0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
+  0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
+  0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
+};
+
+/* RFC 7932 transforms */
+static const uint8_t kTransformsData[] = {
+  49, BROTLI_TRANSFORM_IDENTITY, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 0,
+   0, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 47,
+   0, BROTLI_TRANSFORM_IDENTITY, 49,
+   4, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 3,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 6,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
+   1, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 1,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 7,
+  49, BROTLI_TRANSFORM_IDENTITY, 9,
+  48, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 8,
+  49, BROTLI_TRANSFORM_IDENTITY, 5,
+  49, BROTLI_TRANSFORM_IDENTITY, 10,
+  49, BROTLI_TRANSFORM_IDENTITY, 11,
+  49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 13,
+  49, BROTLI_TRANSFORM_IDENTITY, 14,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 15,
+  49, BROTLI_TRANSFORM_IDENTITY, 16,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 12,
+   5, BROTLI_TRANSFORM_IDENTITY, 49,
+   0, BROTLI_TRANSFORM_IDENTITY, 1,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 18,
+  49, BROTLI_TRANSFORM_IDENTITY, 17,
+  49, BROTLI_TRANSFORM_IDENTITY, 19,
+  49, BROTLI_TRANSFORM_IDENTITY, 20,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
+  47, BROTLI_TRANSFORM_IDENTITY, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 22,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 23,
+  49, BROTLI_TRANSFORM_IDENTITY, 24,
+  49, BROTLI_TRANSFORM_IDENTITY, 25,
+  49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
+  49, BROTLI_TRANSFORM_IDENTITY, 27,
+  49, BROTLI_TRANSFORM_IDENTITY, 28,
+   0, BROTLI_TRANSFORM_IDENTITY, 12,
+  49, BROTLI_TRANSFORM_IDENTITY, 29,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+  49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 31,
+  49, BROTLI_TRANSFORM_IDENTITY, 32,
+  47, BROTLI_TRANSFORM_IDENTITY, 3,
+  49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
+   5, BROTLI_TRANSFORM_IDENTITY, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
+  49, BROTLI_TRANSFORM_IDENTITY, 30,
+   0, BROTLI_TRANSFORM_IDENTITY, 5,
+  35, BROTLI_TRANSFORM_IDENTITY, 49,
+  47, BROTLI_TRANSFORM_IDENTITY, 2,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
+  49, BROTLI_TRANSFORM_IDENTITY, 36,
+  49, BROTLI_TRANSFORM_IDENTITY, 33,
+   5, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+  49, BROTLI_TRANSFORM_IDENTITY, 37,
+   0, BROTLI_TRANSFORM_IDENTITY, 30,
+  49, BROTLI_TRANSFORM_IDENTITY, 38,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 39,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 34,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+   0, BROTLI_TRANSFORM_IDENTITY, 21,
+  49, BROTLI_TRANSFORM_IDENTITY, 40,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+  49, BROTLI_TRANSFORM_IDENTITY, 41,
+  49, BROTLI_TRANSFORM_IDENTITY, 42,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
+  49, BROTLI_TRANSFORM_IDENTITY, 43,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
+   0, BROTLI_TRANSFORM_IDENTITY, 34,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+  49, BROTLI_TRANSFORM_IDENTITY, 44,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+  45, BROTLI_TRANSFORM_IDENTITY, 49,
+   0, BROTLI_TRANSFORM_IDENTITY, 33,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+  49, BROTLI_TRANSFORM_IDENTITY, 46,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+};
+
+static BrotliTransforms kBrotliTransforms = {
+  sizeof(kPrefixSuffix),
+  (const uint8_t*)kPrefixSuffix,
+  kPrefixSuffixMap,
+  sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
+  kTransformsData,
+  NULL,  /* no extra parameters */
+  {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
+};
+
+const BrotliTransforms* BrotliGetTransforms(void) {
+  return &kBrotliTransforms;
+}
+
+static int ToUpperCase(uint8_t* p) {
+  if (p[0] < 0xC0) {
+    if (p[0] >= 'a' && p[0] <= 'z') {
+      p[0] ^= 32;
+    }
+    return 1;
+  }
+  /* An overly simplified uppercasing model for UTF-8. */
+  if (p[0] < 0xE0) {
+    p[1] ^= 32;
+    return 2;
+  }
+  /* An arbitrary transform for three byte characters. */
+  p[2] ^= 5;
+  return 3;
+}
+
+static int Shift(uint8_t* word, int word_len, uint16_t parameter) {
+  /* Limited sign extension: scalar < (1 << 24). */
+  uint32_t scalar =
+      (parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u));
+  if (word[0] < 0x80) {
+    /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
+    scalar += (uint32_t)word[0];
+    word[0] = (uint8_t)(scalar & 0x7Fu);
+    return 1;
+  } else if (word[0] < 0xC0) {
+    /* Continuation / 10AAAAAA. */
+    return 1;
+  } else if (word[0] < 0xE0) {
+    /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
+    if (word_len < 2) return 1;
+    scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u));
+    word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F));
+    word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F));
+    return 2;
+  } else if (word[0] < 0xF0) {
+    /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
+    if (word_len < 3) return word_len;
+    scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) |
+        ((word[0] & 0x0Fu) << 12u));
+    word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F));
+    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F));
+    word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F));
+    return 3;
+  } else if (word[0] < 0xF8) {
+    /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
+    if (word_len < 4) return word_len;
+    scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) |
+        ((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u));
+    word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07));
+    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F));
+    word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F));
+    word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F));
+    return 4;
+  }
+  return 1;
+}
+
+int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
+    const BrotliTransforms* transforms, int transform_idx) {
+  int idx = 0;
+  const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx);
+  uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx);
+  const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx);
+  {
+    int prefix_len = *prefix++;
+    while (prefix_len--) { dst[idx++] = *prefix++; }
+  }
+  {
+    const int t = type;
+    int i = 0;
+    if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
+      len -= t;
+    } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
+        && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
+      int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
+      word += skip;
+      len -= skip;
+    }
+    while (i < len) { dst[idx++] = word[i++]; }
+    if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
+      ToUpperCase(&dst[idx - len]);
+    } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
+      uint8_t* uppercase = &dst[idx - len];
+      while (len > 0) {
+        int step = ToUpperCase(uppercase);
+        uppercase += step;
+        len -= step;
+      }
+    } else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) {
+      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
+          + (transforms->params[transform_idx * 2 + 1] << 8u));
+      Shift(&dst[idx - len], len, param);
+    } else if (t == BROTLI_TRANSFORM_SHIFT_ALL) {
+      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
+          + (transforms->params[transform_idx * 2 + 1] << 8u));
+      uint8_t* shift = &dst[idx - len];
+      while (len > 0) {
+        int step = Shift(shift, len, param);
+        shift += step;
+        len -= step;
+      }
+    }
+  }
+  {
+    int suffix_len = *suffix++;
+    while (suffix_len--) { dst[idx++] = *suffix++; }
+    return idx;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.h
new file mode 100755
index 0000000000..b6f86cc7d5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/transform.h
@@ -0,0 +1,85 @@
+/* transforms is a part of ABI, but not API.
+
+   It means that there are some functions that are supposed to be in "common"
+   library, but header itself is not placed into include/brotli. This way,
+   aforementioned functions will be available only to brotli internals.
+ */
+
+#ifndef BROTLI_COMMON_TRANSFORM_H_
+#define BROTLI_COMMON_TRANSFORM_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+enum BrotliWordTransformType {
+  BROTLI_TRANSFORM_IDENTITY = 0,
+  BROTLI_TRANSFORM_OMIT_LAST_1 = 1,
+  BROTLI_TRANSFORM_OMIT_LAST_2 = 2,
+  BROTLI_TRANSFORM_OMIT_LAST_3 = 3,
+  BROTLI_TRANSFORM_OMIT_LAST_4 = 4,
+  BROTLI_TRANSFORM_OMIT_LAST_5 = 5,
+  BROTLI_TRANSFORM_OMIT_LAST_6 = 6,
+  BROTLI_TRANSFORM_OMIT_LAST_7 = 7,
+  BROTLI_TRANSFORM_OMIT_LAST_8 = 8,
+  BROTLI_TRANSFORM_OMIT_LAST_9 = 9,
+  BROTLI_TRANSFORM_UPPERCASE_FIRST = 10,
+  BROTLI_TRANSFORM_UPPERCASE_ALL = 11,
+  BROTLI_TRANSFORM_OMIT_FIRST_1 = 12,
+  BROTLI_TRANSFORM_OMIT_FIRST_2 = 13,
+  BROTLI_TRANSFORM_OMIT_FIRST_3 = 14,
+  BROTLI_TRANSFORM_OMIT_FIRST_4 = 15,
+  BROTLI_TRANSFORM_OMIT_FIRST_5 = 16,
+  BROTLI_TRANSFORM_OMIT_FIRST_6 = 17,
+  BROTLI_TRANSFORM_OMIT_FIRST_7 = 18,
+  BROTLI_TRANSFORM_OMIT_FIRST_8 = 19,
+  BROTLI_TRANSFORM_OMIT_FIRST_9 = 20,
+  BROTLI_TRANSFORM_SHIFT_FIRST = 21,
+  BROTLI_TRANSFORM_SHIFT_ALL = 22,
+  BROTLI_NUM_TRANSFORM_TYPES  /* Counts transforms, not a transform itself. */
+};
+
+#define BROTLI_TRANSFORMS_MAX_CUT_OFF BROTLI_TRANSFORM_OMIT_LAST_9
+
+typedef struct BrotliTransforms {
+  uint16_t prefix_suffix_size;
+  /* Last character must be null, so prefix_suffix_size must be at least 1. */
+  const uint8_t* prefix_suffix;
+  const uint16_t* prefix_suffix_map;
+  uint32_t num_transforms;
+  /* Each entry is a [prefix_id, transform, suffix_id] triplet. */
+  const uint8_t* transforms;
+  /* Shift for BROTLI_TRANSFORM_SHIFT_FIRST and BROTLI_TRANSFORM_SHIFT_ALL,
+     must be NULL if and only if no such transforms are present. */
+  const uint8_t* params;
+  /* Indices of transforms like ["", BROTLI_TRANSFORM_OMIT_LAST_#, ""].
+     0-th element corresponds to ["", BROTLI_TRANSFORM_IDENTITY, ""].
+     -1, if cut-off transform does not exist. */
+  int16_t cutOffTransforms[BROTLI_TRANSFORMS_MAX_CUT_OFF + 1];
+} BrotliTransforms;
+
+/* T is BrotliTransforms*; result is uint8_t. */
+#define BROTLI_TRANSFORM_PREFIX_ID(T, I) ((T)->transforms[((I) * 3) + 0])
+#define BROTLI_TRANSFORM_TYPE(T, I)      ((T)->transforms[((I) * 3) + 1])
+#define BROTLI_TRANSFORM_SUFFIX_ID(T, I) ((T)->transforms[((I) * 3) + 2])
+
+/* T is BrotliTransforms*; result is const uint8_t*. */
+#define BROTLI_TRANSFORM_PREFIX(T, I) (&(T)->prefix_suffix[ \
+    (T)->prefix_suffix_map[BROTLI_TRANSFORM_PREFIX_ID(T, I)]])
+#define BROTLI_TRANSFORM_SUFFIX(T, I) (&(T)->prefix_suffix[ \
+    (T)->prefix_suffix_map[BROTLI_TRANSFORM_SUFFIX_ID(T, I)]])
+
+BROTLI_COMMON_API const BrotliTransforms* BrotliGetTransforms(void);
+
+BROTLI_COMMON_API int BrotliTransformDictionaryWord(
+    uint8_t* dst, const uint8_t* word, int len,
+    const BrotliTransforms* transforms, int transform_idx);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_COMMON_TRANSFORM_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/version.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/version.h
new file mode 100644
index 0000000000..0d0d0c7967
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/common/version.h
@@ -0,0 +1,26 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Version definition. */
+
+#ifndef BROTLI_COMMON_VERSION_H_
+#define BROTLI_COMMON_VERSION_H_
+
+/* This macro should only be used when library is compiled together with client.
+   If library is dynamically linked, use BrotliDecoderVersion and
+   BrotliEncoderVersion methods. */
+
+/* Semantic version, calculated as (MAJOR << 24) | (MINOR << 12) | PATCH */
+#define BROTLI_VERSION 0x1000007
+
+/* This macro is used by build system to produce Libtool-friendly soname. See
+   https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html
+ */
+
+/* ABI version, calculated as (CURRENT << 24) | (REVISION << 12) | AGE */
+#define BROTLI_ABI_VERSION 0x1007000
+
+#endif  /* BROTLI_COMMON_VERSION_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c
new file mode 100644
index 0000000000..41cd0504f2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.c
@@ -0,0 +1,65 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Bit reading helpers */
+
+#include "./bit_reader.h"
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliInitBitReader(BrotliBitReader* const br) {
+  br->val_ = 0;
+  br->bit_pos_ = sizeof(br->val_) << 3;
+}
+
+BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
+  size_t aligned_read_mask = (sizeof(br->val_) >> 1) - 1;
+  /* Fixing alignment after unaligned BrotliFillWindow would result accumulator
+     overflow. If unalignment is caused by BrotliSafeReadBits, then there is
+     enough space in accumulator to fix alignment. */
+  if (!BROTLI_ALIGNED_READ) {
+    aligned_read_mask = 0;
+  }
+  if (BrotliGetAvailableBits(br) == 0) {
+    if (!BrotliPullByte(br)) {
+      return BROTLI_FALSE;
+    }
+  }
+
+  while ((((size_t)br->next_in) & aligned_read_mask) != 0) {
+    if (!BrotliPullByte(br)) {
+      /* If we consumed all the input, we don't care about the alignment. */
+      return BROTLI_TRUE;
+    }
+  }
+  return BROTLI_TRUE;
+}
+
+BROTLI_BOOL BrotliSafeReadBits32Slow(BrotliBitReader* const br,
+    uint32_t n_bits, uint32_t* val) {
+  uint32_t low_val;
+  uint32_t high_val;
+  BrotliBitReaderState memento;
+  BROTLI_DCHECK(n_bits <= 32);
+  BROTLI_DCHECK(n_bits > 24);
+  BrotliBitReaderSaveState(br, &memento);
+  if (!BrotliSafeReadBits(br, 16, &low_val) ||
+      !BrotliSafeReadBits(br, n_bits - 16, &high_val)) {
+    BrotliBitReaderRestoreState(br, &memento);
+    return BROTLI_FALSE;
+  }
+  *val = low_val | (high_val << 16);
+  return BROTLI_TRUE;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.h
new file mode 100644
index 0000000000..f94a717ea3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/bit_reader.h
@@ -0,0 +1,356 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Bit reading helpers */
+
+#ifndef BROTLI_DEC_BIT_READER_H_
+#define BROTLI_DEC_BIT_READER_H_
+
+#include <string.h>  /* memcpy */
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1)
+
+static const uint32_t kBitMask[33] = {  0x00000000,
+    0x00000001, 0x00000003, 0x00000007, 0x0000000F,
+    0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
+    0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
+    0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
+    0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
+    0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
+    0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
+    0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
+};
+
+static BROTLI_INLINE uint32_t BitMask(uint32_t n) {
+  if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) {
+    /* Masking with this expression turns to a single
+       "Unsigned Bit Field Extract" UBFX instruction on ARM. */
+    return ~((0xFFFFFFFFu) << n);
+  } else {
+    return kBitMask[n];
+  }
+}
+
+typedef struct {
+  brotli_reg_t val_;       /* pre-fetched bits */
+  uint32_t bit_pos_;       /* current bit-reading position in val_ */
+  const uint8_t* next_in;  /* the byte we're reading from */
+  size_t avail_in;
+} BrotliBitReader;
+
+typedef struct {
+  brotli_reg_t val_;
+  uint32_t bit_pos_;
+  const uint8_t* next_in;
+  size_t avail_in;
+} BrotliBitReaderState;
+
+/* Initializes the BrotliBitReader fields. */
+BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
+
+/* Ensures that accumulator is not empty.
+   May consume up to sizeof(brotli_reg_t) - 1 bytes of input.
+   Returns BROTLI_FALSE if data is required but there is no input available.
+   For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned
+   reading. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br);
+
+/* Fallback for BrotliSafeReadBits32. Extracted as noninlined method to unburden
+   the main code-path. Never called for RFC brotli streams, required only for
+   "large-window" mode and other extensions. */
+BROTLI_INTERNAL BROTLI_NOINLINE BROTLI_BOOL BrotliSafeReadBits32Slow(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val);
+
+static BROTLI_INLINE void BrotliBitReaderSaveState(
+    BrotliBitReader* const from, BrotliBitReaderState* to) {
+  to->val_ = from->val_;
+  to->bit_pos_ = from->bit_pos_;
+  to->next_in = from->next_in;
+  to->avail_in = from->avail_in;
+}
+
+static BROTLI_INLINE void BrotliBitReaderRestoreState(
+    BrotliBitReader* const to, BrotliBitReaderState* from) {
+  to->val_ = from->val_;
+  to->bit_pos_ = from->bit_pos_;
+  to->next_in = from->next_in;
+  to->avail_in = from->avail_in;
+}
+
+static BROTLI_INLINE uint32_t BrotliGetAvailableBits(
+    const BrotliBitReader* br) {
+  return (BROTLI_64_BITS ? 64 : 32) - br->bit_pos_;
+}
+
+/* Returns amount of unread bytes the bit reader still has buffered from the
+   BrotliInput, including whole bytes in br->val_. */
+static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) {
+  return br->avail_in + (BrotliGetAvailableBits(br) >> 3);
+}
+
+/* Checks if there is at least |num| bytes left in the input ring-buffer
+   (excluding the bits remaining in br->val_). */
+static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount(
+    BrotliBitReader* const br, size_t num) {
+  return TO_BROTLI_BOOL(br->avail_in >= num);
+}
+
+/* Guarantees that there are at least |n_bits| + 1 bits in accumulator.
+   Precondition: accumulator contains at least 1 bit.
+   |n_bits| should be in the range [1..24] for regular build. For portable
+   non-64-bit little-endian build only 16 bits are safe to request. */
+static BROTLI_INLINE void BrotliFillBitWindow(
+    BrotliBitReader* const br, uint32_t n_bits) {
+#if (BROTLI_64_BITS)
+  if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
+    if (br->bit_pos_ >= 56) {
+      br->val_ >>= 56;
+      br->bit_pos_ ^= 56;  /* here same as -= 56 because of the if condition */
+      br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 8;
+      br->avail_in -= 7;
+      br->next_in += 7;
+    }
+  } else if (
+      !BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 16)) {
+    if (br->bit_pos_ >= 48) {
+      br->val_ >>= 48;
+      br->bit_pos_ ^= 48;  /* here same as -= 48 because of the if condition */
+      br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 16;
+      br->avail_in -= 6;
+      br->next_in += 6;
+    }
+  } else {
+    if (br->bit_pos_ >= 32) {
+      br->val_ >>= 32;
+      br->bit_pos_ ^= 32;  /* here same as -= 32 because of the if condition */
+      br->val_ |= ((uint64_t)BROTLI_UNALIGNED_LOAD32LE(br->next_in)) << 32;
+      br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+      br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+    }
+  }
+#else
+  if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
+    if (br->bit_pos_ >= 24) {
+      br->val_ >>= 24;
+      br->bit_pos_ ^= 24;  /* here same as -= 24 because of the if condition */
+      br->val_ |= BROTLI_UNALIGNED_LOAD32LE(br->next_in) << 8;
+      br->avail_in -= 3;
+      br->next_in += 3;
+    }
+  } else {
+    if (br->bit_pos_ >= 16) {
+      br->val_ >>= 16;
+      br->bit_pos_ ^= 16;  /* here same as -= 16 because of the if condition */
+      br->val_ |= ((uint32_t)BROTLI_UNALIGNED_LOAD16LE(br->next_in)) << 16;
+      br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+      br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+    }
+  }
+#endif
+}
+
+/* Mostly like BrotliFillBitWindow, but guarantees only 16 bits and reads no
+   more than BROTLI_SHORT_FILL_BIT_WINDOW_READ bytes of input. */
+static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) {
+  BrotliFillBitWindow(br, 17);
+}
+
+/* Tries to pull one byte of input to accumulator.
+   Returns BROTLI_FALSE if there is no input available. */
+static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) {
+  if (br->avail_in == 0) {
+    return BROTLI_FALSE;
+  }
+  br->val_ >>= 8;
+#if (BROTLI_64_BITS)
+  br->val_ |= ((uint64_t)*br->next_in) << 56;
+#else
+  br->val_ |= ((uint32_t)*br->next_in) << 24;
+#endif
+  br->bit_pos_ -= 8;
+  --br->avail_in;
+  ++br->next_in;
+  return BROTLI_TRUE;
+}
+
+/* Returns currently available bits.
+   The number of valid bits could be calculated by BrotliGetAvailableBits. */
+static BROTLI_INLINE brotli_reg_t BrotliGetBitsUnmasked(
+    BrotliBitReader* const br) {
+  return br->val_ >> br->bit_pos_;
+}
+
+/* Like BrotliGetBits, but does not mask the result.
+   The result contains at least 16 valid bits. */
+static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked(
+    BrotliBitReader* const br) {
+  BrotliFillBitWindow(br, 16);
+  return (uint32_t)BrotliGetBitsUnmasked(br);
+}
+
+/* Returns the specified number of bits from |br| without advancing bit
+   position. */
+static BROTLI_INLINE uint32_t BrotliGetBits(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  BrotliFillBitWindow(br, n_bits);
+  return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+}
+
+/* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there
+   is not enough input. */
+static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  while (BrotliGetAvailableBits(br) < n_bits) {
+    if (!BrotliPullByte(br)) {
+      return BROTLI_FALSE;
+    }
+  }
+  *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+  return BROTLI_TRUE;
+}
+
+/* Advances the bit pos by |n_bits|. */
+static BROTLI_INLINE void BrotliDropBits(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  br->bit_pos_ += n_bits;
+}
+
+static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
+  uint32_t unused_bytes = BrotliGetAvailableBits(br) >> 3;
+  uint32_t unused_bits = unused_bytes << 3;
+  br->avail_in += unused_bytes;
+  br->next_in -= unused_bytes;
+  if (unused_bits == sizeof(br->val_) << 3) {
+    br->val_ = 0;
+  } else {
+    br->val_ <<= unused_bits;
+  }
+  br->bit_pos_ += unused_bits;
+}
+
+/* Reads the specified number of bits from |br| and advances the bit pos.
+   Precondition: accumulator MUST contain at least |n_bits|. */
+static BROTLI_INLINE void BrotliTakeBits(
+  BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+  BROTLI_LOG(("[BrotliTakeBits]  %d %d %d val: %6x\n",
+      (int)br->avail_in, (int)br->bit_pos_, (int)n_bits, (int)*val));
+  BrotliDropBits(br, n_bits);
+}
+
+/* Reads the specified number of bits from |br| and advances the bit pos.
+   Assumes that there is enough input to perform BrotliFillBitWindow.
+   Up to 24 bits are allowed to be requested from this method. */
+static BROTLI_INLINE uint32_t BrotliReadBits24(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  BROTLI_DCHECK(n_bits <= 24);
+  if (BROTLI_64_BITS || (n_bits <= 16)) {
+    uint32_t val;
+    BrotliFillBitWindow(br, n_bits);
+    BrotliTakeBits(br, n_bits, &val);
+    return val;
+  } else {
+    uint32_t low_val;
+    uint32_t high_val;
+    BrotliFillBitWindow(br, 16);
+    BrotliTakeBits(br, 16, &low_val);
+    BrotliFillBitWindow(br, 8);
+    BrotliTakeBits(br, n_bits - 16, &high_val);
+    return low_val | (high_val << 16);
+  }
+}
+
+/* Same as BrotliReadBits24, but allows reading up to 32 bits. */
+static BROTLI_INLINE uint32_t BrotliReadBits32(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  BROTLI_DCHECK(n_bits <= 32);
+  if (BROTLI_64_BITS || (n_bits <= 16)) {
+    uint32_t val;
+    BrotliFillBitWindow(br, n_bits);
+    BrotliTakeBits(br, n_bits, &val);
+    return val;
+  } else {
+    uint32_t low_val;
+    uint32_t high_val;
+    BrotliFillBitWindow(br, 16);
+    BrotliTakeBits(br, 16, &low_val);
+    BrotliFillBitWindow(br, 16);
+    BrotliTakeBits(br, n_bits - 16, &high_val);
+    return low_val | (high_val << 16);
+  }
+}
+
+/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there
+   is not enough input. |n_bits| MUST be positive.
+   Up to 24 bits are allowed to be requested from this method. */
+static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  BROTLI_DCHECK(n_bits <= 24);
+  while (BrotliGetAvailableBits(br) < n_bits) {
+    if (!BrotliPullByte(br)) {
+      return BROTLI_FALSE;
+    }
+  }
+  BrotliTakeBits(br, n_bits, val);
+  return BROTLI_TRUE;
+}
+
+/* Same as BrotliSafeReadBits, but allows reading up to 32 bits. */
+static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits32(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  BROTLI_DCHECK(n_bits <= 32);
+  if (BROTLI_64_BITS || (n_bits <= 24)) {
+    while (BrotliGetAvailableBits(br) < n_bits) {
+      if (!BrotliPullByte(br)) {
+        return BROTLI_FALSE;
+      }
+    }
+    BrotliTakeBits(br, n_bits, val);
+    return BROTLI_TRUE;
+  } else {
+    return BrotliSafeReadBits32Slow(br, n_bits, val);
+  }
+}
+
+/* Advances the bit reader position to the next byte boundary and verifies
+   that any skipped bits are set to zero. */
+static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) {
+  uint32_t pad_bits_count = BrotliGetAvailableBits(br) & 0x7;
+  uint32_t pad_bits = 0;
+  if (pad_bits_count != 0) {
+    BrotliTakeBits(br, pad_bits_count, &pad_bits);
+  }
+  return TO_BROTLI_BOOL(pad_bits == 0);
+}
+
+/* Copies remaining input bytes stored in the bit reader to the output. Value
+   |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be
+   warmed up again after this. */
+static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
+                                          BrotliBitReader* br, size_t num) {
+  while (BrotliGetAvailableBits(br) >= 8 && num > 0) {
+    *dest = (uint8_t)BrotliGetBitsUnmasked(br);
+    BrotliDropBits(br, 8);
+    ++dest;
+    --num;
+  }
+  memcpy(dest, br->next_in, num);
+  br->avail_in -= num;
+  br->next_in += num;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_BIT_READER_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c
new file mode 100644
index 0000000000..9c10f50fbf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/decode.c
@@ -0,0 +1,2605 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include <brotli/decode.h>
+
+#include <stdlib.h>  /* free, malloc */
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/transform.h"
+#include "../common/version.h"
+#include "./bit_reader.h"
+#include "./huffman.h"
+#include "./prefix.h"
+#include "./state.h"
+
+#if defined(BROTLI_TARGET_NEON)
+#include <arm_neon.h>
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_FAILURE(CODE) (BROTLI_DUMP(), CODE)
+
+#define BROTLI_LOG_UINT(name)                                       \
+  BROTLI_LOG(("[%s] %s = %lu\n", __func__, #name, (unsigned long)(name)))
+#define BROTLI_LOG_ARRAY_INDEX(array_name, idx)                     \
+  BROTLI_LOG(("[%s] %s[%lu] = %lu\n", __func__, #array_name,        \
+         (unsigned long)(idx), (unsigned long)array_name[idx]))
+
+#define HUFFMAN_TABLE_BITS 8U
+#define HUFFMAN_TABLE_MASK 0xFF
+
+/* We need the slack region for the following reasons:
+    - doing up to two 16-byte copies for fast backward copying
+    - inserting transformed dictionary word:
+        5 prefix + 24 base + 8 suffix */
+static const uint32_t kRingBufferWriteAheadSlack = 42;
+
+static const uint8_t kCodeLengthCodeOrder[BROTLI_CODE_LENGTH_CODES] = {
+  1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+
+/* Static prefix code for the complex code length code lengths. */
+static const uint8_t kCodeLengthPrefixLength[16] = {
+  2, 2, 2, 3, 2, 2, 2, 4, 2, 2, 2, 3, 2, 2, 2, 4,
+};
+
+static const uint8_t kCodeLengthPrefixValue[16] = {
+  0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5,
+};
+
+BROTLI_BOOL BrotliDecoderSetParameter(
+    BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) {
+  if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
+  switch (p) {
+    case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION:
+      state->canny_ringbuffer_allocation = !!value ? 0 : 1;
+      return BROTLI_TRUE;
+
+    case BROTLI_DECODER_PARAM_LARGE_WINDOW:
+      state->large_window = TO_BROTLI_BOOL(!!value);
+      return BROTLI_TRUE;
+
+    default: return BROTLI_FALSE;
+  }
+}
+
+BrotliDecoderState* BrotliDecoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  BrotliDecoderState* state = 0;
+  if (!alloc_func && !free_func) {
+    state = (BrotliDecoderState*)malloc(sizeof(BrotliDecoderState));
+  } else if (alloc_func && free_func) {
+    state = (BrotliDecoderState*)alloc_func(opaque, sizeof(BrotliDecoderState));
+  }
+  if (state == 0) {
+    BROTLI_DUMP();
+    return 0;
+  }
+  if (!BrotliDecoderStateInit(state, alloc_func, free_func, opaque)) {
+    BROTLI_DUMP();
+    if (!alloc_func && !free_func) {
+      free(state);
+    } else if (alloc_func && free_func) {
+      free_func(opaque, state);
+    }
+    return 0;
+  }
+  return state;
+}
+
+/* Deinitializes and frees BrotliDecoderState instance. */
+void BrotliDecoderDestroyInstance(BrotliDecoderState* state) {
+  if (!state) {
+    return;
+  } else {
+    brotli_free_func free_func = state->free_func;
+    void* opaque = state->memory_manager_opaque;
+    BrotliDecoderStateCleanup(state);
+    free_func(opaque, state);
+  }
+}
+
+/* Saves error code and converts it to BrotliDecoderResult. */
+static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode(
+    BrotliDecoderState* s, BrotliDecoderErrorCode e) {
+  s->error_code = (int)e;
+  switch (e) {
+    case BROTLI_DECODER_SUCCESS:
+      return BROTLI_DECODER_RESULT_SUCCESS;
+
+    case BROTLI_DECODER_NEEDS_MORE_INPUT:
+      return BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+
+    case BROTLI_DECODER_NEEDS_MORE_OUTPUT:
+      return BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
+
+    default:
+      return BROTLI_DECODER_RESULT_ERROR;
+  }
+}
+
+/* Decodes WBITS by reading 1 - 7 bits, or 0x11 for "Large Window Brotli".
+   Precondition: bit-reader accumulator has at least 8 bits. */
+static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s,
+                                               BrotliBitReader* br) {
+  uint32_t n;
+  BROTLI_BOOL large_window = s->large_window;
+  s->large_window = BROTLI_FALSE;
+  BrotliTakeBits(br, 1, &n);
+  if (n == 0) {
+    s->window_bits = 16;
+    return BROTLI_DECODER_SUCCESS;
+  }
+  BrotliTakeBits(br, 3, &n);
+  if (n != 0) {
+    s->window_bits = 17 + n;
+    return BROTLI_DECODER_SUCCESS;
+  }
+  BrotliTakeBits(br, 3, &n);
+  if (n == 1) {
+    if (large_window) {
+      BrotliTakeBits(br, 1, &n);
+      if (n == 1) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+      }
+      s->large_window = BROTLI_TRUE;
+      return BROTLI_DECODER_SUCCESS;
+    } else {
+      return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+    }
+  }
+  if (n != 0) {
+    s->window_bits = 8 + n;
+    return BROTLI_DECODER_SUCCESS;
+  }
+  s->window_bits = 17;
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) {
+#if defined(BROTLI_TARGET_NEON)
+  vst1q_u8(dst, vld1q_u8(src));
+#else
+  uint32_t buffer[4];
+  memcpy(buffer, src, 16);
+  memcpy(dst, buffer, 16);
+#endif
+}
+
+/* Decodes a number in the range [0..255], by reading 1 - 11 bits. */
+static BROTLI_NOINLINE BrotliDecoderErrorCode DecodeVarLenUint8(
+    BrotliDecoderState* s, BrotliBitReader* br, uint32_t* value) {
+  uint32_t bits;
+  switch (s->substate_decode_uint8) {
+    case BROTLI_STATE_DECODE_UINT8_NONE:
+      if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 1, &bits))) {
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if (bits == 0) {
+        *value = 0;
+        return BROTLI_DECODER_SUCCESS;
+      }
+    /* Fall through. */
+
+    case BROTLI_STATE_DECODE_UINT8_SHORT:
+      if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 3, &bits))) {
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_SHORT;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if (bits == 0) {
+        *value = 1;
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+        return BROTLI_DECODER_SUCCESS;
+      }
+      /* Use output value as a temporary storage. It MUST be persisted. */
+      *value = bits;
+    /* Fall through. */
+
+    case BROTLI_STATE_DECODE_UINT8_LONG:
+      if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, *value, &bits))) {
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_LONG;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      *value = (1U << *value) + bits;
+      s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+      return BROTLI_DECODER_SUCCESS;
+
+    default:
+      return
+          BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+  }
+}
+
+/* Decodes a metablock length and flags by reading 2 - 31 bits. */
+static BrotliDecoderErrorCode BROTLI_NOINLINE DecodeMetaBlockLength(
+    BrotliDecoderState* s, BrotliBitReader* br) {
+  uint32_t bits;
+  int i;
+  for (;;) {
+    switch (s->substate_metablock_header) {
+      case BROTLI_STATE_METABLOCK_HEADER_NONE:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->is_last_metablock = bits ? 1 : 0;
+        s->meta_block_remaining_len = 0;
+        s->is_uncompressed = 0;
+        s->is_metadata = 0;
+        if (!s->is_last_metablock) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES;
+          break;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_EMPTY;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_EMPTY:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        if (bits) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+          return BROTLI_DECODER_SUCCESS;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_NIBBLES:
+        if (!BrotliSafeReadBits(br, 2, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->size_nibbles = (uint8_t)(bits + 4);
+        s->loop_counter = 0;
+        if (bits == 3) {
+          s->is_metadata = 1;
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_RESERVED;
+          break;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_SIZE;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_SIZE:
+        i = s->loop_counter;
+        for (; i < (int)s->size_nibbles; ++i) {
+          if (!BrotliSafeReadBits(br, 4, &bits)) {
+            s->loop_counter = i;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          if (i + 1 == s->size_nibbles && s->size_nibbles > 4 && bits == 0) {
+            return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE);
+          }
+          s->meta_block_remaining_len |= (int)(bits << (i * 4));
+        }
+        s->substate_metablock_header =
+            BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED:
+        if (!s->is_last_metablock) {
+          if (!BrotliSafeReadBits(br, 1, &bits)) {
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          s->is_uncompressed = bits ? 1 : 0;
+        }
+        ++s->meta_block_remaining_len;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+        return BROTLI_DECODER_SUCCESS;
+
+      case BROTLI_STATE_METABLOCK_HEADER_RESERVED:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        if (bits != 0) {
+          return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_RESERVED);
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_BYTES;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_BYTES:
+        if (!BrotliSafeReadBits(br, 2, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        if (bits == 0) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+          return BROTLI_DECODER_SUCCESS;
+        }
+        s->size_nibbles = (uint8_t)bits;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_METADATA;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_METADATA:
+        i = s->loop_counter;
+        for (; i < (int)s->size_nibbles; ++i) {
+          if (!BrotliSafeReadBits(br, 8, &bits)) {
+            s->loop_counter = i;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          if (i + 1 == s->size_nibbles && s->size_nibbles > 1 && bits == 0) {
+            return BROTLI_FAILURE(
+                BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE);
+          }
+          s->meta_block_remaining_len |= (int)(bits << (i * 8));
+        }
+        ++s->meta_block_remaining_len;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+        return BROTLI_DECODER_SUCCESS;
+
+      default:
+        return
+            BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+    }
+  }
+}
+
+/* Decodes the Huffman code.
+   This method doesn't read data from the bit reader, BUT drops the amount of
+   bits that correspond to the decoded symbol.
+   bits MUST contain at least 15 (BROTLI_HUFFMAN_MAX_CODE_LENGTH) valid bits. */
+static BROTLI_INLINE uint32_t DecodeSymbol(uint32_t bits,
+                                           const HuffmanCode* table,
+                                           BrotliBitReader* br) {
+  BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, bits & HUFFMAN_TABLE_MASK);
+  if (BROTLI_HC_FAST_LOAD_BITS(table) > HUFFMAN_TABLE_BITS) {
+    uint32_t nbits = BROTLI_HC_FAST_LOAD_BITS(table) - HUFFMAN_TABLE_BITS;
+    BrotliDropBits(br, HUFFMAN_TABLE_BITS);
+    BROTLI_HC_ADJUST_TABLE_INDEX(table,
+        BROTLI_HC_FAST_LOAD_VALUE(table) +
+        ((bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits)));
+  }
+  BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table));
+  return BROTLI_HC_FAST_LOAD_VALUE(table);
+}
+
+/* Reads and decodes the next Huffman code from bit-stream.
+   This method peeks 16 bits of input and drops 0 - 15 of them. */
+static BROTLI_INLINE uint32_t ReadSymbol(const HuffmanCode* table,
+                                         BrotliBitReader* br) {
+  return DecodeSymbol(BrotliGet16BitsUnmasked(br), table, br);
+}
+
+/* Same as DecodeSymbol, but it is known that there is less than 15 bits of
+   input are currently available. */
+static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
+    const HuffmanCode* table, BrotliBitReader* br, uint32_t* result) {
+  uint32_t val;
+  uint32_t available_bits = BrotliGetAvailableBits(br);
+  BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+  if (available_bits == 0) {
+    if (BROTLI_HC_FAST_LOAD_BITS(table) == 0) {
+      *result = BROTLI_HC_FAST_LOAD_VALUE(table);
+      return BROTLI_TRUE;
+    }
+    return BROTLI_FALSE;  /* No valid bits at all. */
+  }
+  val = (uint32_t)BrotliGetBitsUnmasked(br);
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, val & HUFFMAN_TABLE_MASK);
+  if (BROTLI_HC_FAST_LOAD_BITS(table) <= HUFFMAN_TABLE_BITS) {
+    if (BROTLI_HC_FAST_LOAD_BITS(table) <= available_bits) {
+      BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table));
+      *result = BROTLI_HC_FAST_LOAD_VALUE(table);
+      return BROTLI_TRUE;
+    } else {
+      return BROTLI_FALSE;  /* Not enough bits for the first level. */
+    }
+  }
+  if (available_bits <= HUFFMAN_TABLE_BITS) {
+    return BROTLI_FALSE;  /* Not enough bits to move to the second level. */
+  }
+
+  /* Speculatively drop HUFFMAN_TABLE_BITS. */
+  val = (val & BitMask(BROTLI_HC_FAST_LOAD_BITS(table))) >> HUFFMAN_TABLE_BITS;
+  available_bits -= HUFFMAN_TABLE_BITS;
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, BROTLI_HC_FAST_LOAD_VALUE(table) + val);
+  if (available_bits < BROTLI_HC_FAST_LOAD_BITS(table)) {
+    return BROTLI_FALSE;  /* Not enough bits for the second level. */
+  }
+
+  BrotliDropBits(br, HUFFMAN_TABLE_BITS + BROTLI_HC_FAST_LOAD_BITS(table));
+  *result = BROTLI_HC_FAST_LOAD_VALUE(table);
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadSymbol(
+    const HuffmanCode* table, BrotliBitReader* br, uint32_t* result) {
+  uint32_t val;
+  if (BROTLI_PREDICT_TRUE(BrotliSafeGetBits(br, 15, &val))) {
+    *result = DecodeSymbol(val, table, br);
+    return BROTLI_TRUE;
+  }
+  return SafeDecodeSymbol(table, br, result);
+}
+
+/* Makes a look-up in first level Huffman table. Peeks 8 bits. */
+static BROTLI_INLINE void PreloadSymbol(int safe,
+                                        const HuffmanCode* table,
+                                        BrotliBitReader* br,
+                                        uint32_t* bits,
+                                        uint32_t* value) {
+  if (safe) {
+    return;
+  }
+  BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, BrotliGetBits(br, HUFFMAN_TABLE_BITS));
+  *bits = BROTLI_HC_FAST_LOAD_BITS(table);
+  *value = BROTLI_HC_FAST_LOAD_VALUE(table);
+}
+
+/* Decodes the next Huffman code using data prepared by PreloadSymbol.
+   Reads 0 - 15 bits. Also peeks 8 following bits. */
+static BROTLI_INLINE uint32_t ReadPreloadedSymbol(const HuffmanCode* table,
+                                                  BrotliBitReader* br,
+                                                  uint32_t* bits,
+                                                  uint32_t* value) {
+  uint32_t result = *value;
+  if (BROTLI_PREDICT_FALSE(*bits > HUFFMAN_TABLE_BITS)) {
+    uint32_t val = BrotliGet16BitsUnmasked(br);
+    const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value;
+    uint32_t mask = BitMask((*bits - HUFFMAN_TABLE_BITS));
+    BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(ext);
+    BrotliDropBits(br, HUFFMAN_TABLE_BITS);
+    BROTLI_HC_ADJUST_TABLE_INDEX(ext, (val >> HUFFMAN_TABLE_BITS) & mask);
+    BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(ext));
+    result = BROTLI_HC_FAST_LOAD_VALUE(ext);
+  } else {
+    BrotliDropBits(br, *bits);
+  }
+  PreloadSymbol(0, table, br, bits, value);
+  return result;
+}
+
+static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) {
+  uint32_t result = 0;
+  while (x) {
+    x >>= 1;
+    ++result;
+  }
+  return result;
+}
+
+/* Reads (s->symbol + 1) symbols.
+   Totally 1..4 symbols are read, 1..11 bits each.
+   The list of symbols MUST NOT contain duplicates. */
+static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
+    uint32_t alphabet_size_max, uint32_t alphabet_size_limit,
+    BrotliDecoderState* s) {
+  /* max_bits == 1..11; symbol == 0..3; 1..44 bits will be read. */
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  uint32_t max_bits = Log2Floor(alphabet_size_max - 1);
+  uint32_t i = h->sub_loop_counter;
+  uint32_t num_symbols = h->symbol;
+  while (i <= num_symbols) {
+    uint32_t v;
+    if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, max_bits, &v))) {
+      h->sub_loop_counter = i;
+      h->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ;
+      return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    }
+    if (v >= alphabet_size_limit) {
+      return
+          BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET);
+    }
+    h->symbols_lists_array[i] = (uint16_t)v;
+    BROTLI_LOG_UINT(h->symbols_lists_array[i]);
+    ++i;
+  }
+
+  for (i = 0; i < num_symbols; ++i) {
+    uint32_t k = i + 1;
+    for (; k <= num_symbols; ++k) {
+      if (h->symbols_lists_array[i] == h->symbols_lists_array[k]) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_SAME);
+      }
+    }
+  }
+
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Process single decoded symbol code length:
+    A) reset the repeat variable
+    B) remember code length (if it is not 0)
+    C) extend corresponding index-chain
+    D) reduce the Huffman space
+    E) update the histogram */
+static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len,
+    uint32_t* symbol, uint32_t* repeat, uint32_t* space,
+    uint32_t* prev_code_len, uint16_t* symbol_lists,
+    uint16_t* code_length_histo, int* next_symbol) {
+  *repeat = 0;
+  if (code_len != 0) {  /* code_len == 1..15 */
+    symbol_lists[next_symbol[code_len]] = (uint16_t)(*symbol);
+    next_symbol[code_len] = (int)(*symbol);
+    *prev_code_len = code_len;
+    *space -= 32768U >> code_len;
+    code_length_histo[code_len]++;
+    BROTLI_LOG(("[ReadHuffmanCode] code_length[%d] = %d\n",
+        (int)*symbol, (int)code_len));
+  }
+  (*symbol)++;
+}
+
+/* Process repeated symbol code length.
+    A) Check if it is the extension of previous repeat sequence; if the decoded
+       value is not BROTLI_REPEAT_PREVIOUS_CODE_LENGTH, then it is a new
+       symbol-skip
+    B) Update repeat variable
+    C) Check if operation is feasible (fits alphabet)
+    D) For each symbol do the same operations as in ProcessSingleCodeLength
+
+   PRECONDITION: code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH or
+                 code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH */
+static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len,
+    uint32_t repeat_delta, uint32_t alphabet_size, uint32_t* symbol,
+    uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len,
+    uint32_t* repeat_code_len, uint16_t* symbol_lists,
+    uint16_t* code_length_histo, int* next_symbol) {
+  uint32_t old_repeat;
+  uint32_t extra_bits = 3;  /* for BROTLI_REPEAT_ZERO_CODE_LENGTH */
+  uint32_t new_len = 0;  /* for BROTLI_REPEAT_ZERO_CODE_LENGTH */
+  if (code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
+    new_len = *prev_code_len;
+    extra_bits = 2;
+  }
+  if (*repeat_code_len != new_len) {
+    *repeat = 0;
+    *repeat_code_len = new_len;
+  }
+  old_repeat = *repeat;
+  if (*repeat > 0) {
+    *repeat -= 2;
+    *repeat <<= extra_bits;
+  }
+  *repeat += repeat_delta + 3U;
+  repeat_delta = *repeat - old_repeat;
+  if (*symbol + repeat_delta > alphabet_size) {
+    BROTLI_DUMP();
+    *symbol = alphabet_size;
+    *space = 0xFFFFF;
+    return;
+  }
+  BROTLI_LOG(("[ReadHuffmanCode] code_length[%d..%d] = %d\n",
+      (int)*symbol, (int)(*symbol + repeat_delta - 1), (int)*repeat_code_len));
+  if (*repeat_code_len != 0) {
+    unsigned last = *symbol + repeat_delta;
+    int next = next_symbol[*repeat_code_len];
+    do {
+      symbol_lists[next] = (uint16_t)*symbol;
+      next = (int)*symbol;
+    } while (++(*symbol) != last);
+    next_symbol[*repeat_code_len] = next;
+    *space -= repeat_delta << (15 - *repeat_code_len);
+    code_length_histo[*repeat_code_len] =
+        (uint16_t)(code_length_histo[*repeat_code_len] + repeat_delta);
+  } else {
+    *symbol += repeat_delta;
+  }
+}
+
+/* Reads and decodes symbol codelengths. */
+static BrotliDecoderErrorCode ReadSymbolCodeLengths(
+    uint32_t alphabet_size, BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  uint32_t symbol = h->symbol;
+  uint32_t repeat = h->repeat;
+  uint32_t space = h->space;
+  uint32_t prev_code_len = h->prev_code_len;
+  uint32_t repeat_code_len = h->repeat_code_len;
+  uint16_t* symbol_lists = h->symbol_lists;
+  uint16_t* code_length_histo = h->code_length_histo;
+  int* next_symbol = h->next_symbol;
+  if (!BrotliWarmupBitReader(br)) {
+    return BROTLI_DECODER_NEEDS_MORE_INPUT;
+  }
+  while (symbol < alphabet_size && space > 0) {
+    const HuffmanCode* p = h->table;
+    uint32_t code_len;
+    BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
+    if (!BrotliCheckInputAmount(br, BROTLI_SHORT_FILL_BIT_WINDOW_READ)) {
+      h->symbol = symbol;
+      h->repeat = repeat;
+      h->prev_code_len = prev_code_len;
+      h->repeat_code_len = repeat_code_len;
+      h->space = space;
+      return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    }
+    BrotliFillBitWindow16(br);
+    BROTLI_HC_ADJUST_TABLE_INDEX(p, BrotliGetBitsUnmasked(br) &
+        BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH));
+    BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p));  /* Use 1..5 bits. */
+    code_len = BROTLI_HC_FAST_LOAD_VALUE(p);  /* code_len == 0..17 */
+    if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
+      ProcessSingleCodeLength(code_len, &symbol, &repeat, &space,
+          &prev_code_len, symbol_lists, code_length_histo, next_symbol);
+    } else {  /* code_len == 16..17, extra_bits == 2..3 */
+      uint32_t extra_bits =
+          (code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) ? 2 : 3;
+      uint32_t repeat_delta =
+          (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(extra_bits);
+      BrotliDropBits(br, extra_bits);
+      ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
+          &symbol, &repeat, &space, &prev_code_len, &repeat_code_len,
+          symbol_lists, code_length_histo, next_symbol);
+    }
+  }
+  h->space = space;
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
+    uint32_t alphabet_size, BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  BROTLI_BOOL get_byte = BROTLI_FALSE;
+  while (h->symbol < alphabet_size && h->space > 0) {
+    const HuffmanCode* p = h->table;
+    uint32_t code_len;
+    uint32_t available_bits;
+    uint32_t bits = 0;
+    BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
+    if (get_byte && !BrotliPullByte(br)) return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    get_byte = BROTLI_FALSE;
+    available_bits = BrotliGetAvailableBits(br);
+    if (available_bits != 0) {
+      bits = (uint32_t)BrotliGetBitsUnmasked(br);
+    }
+    BROTLI_HC_ADJUST_TABLE_INDEX(p,
+        bits & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH));
+    if (BROTLI_HC_FAST_LOAD_BITS(p) > available_bits) {
+      get_byte = BROTLI_TRUE;
+      continue;
+    }
+    code_len = BROTLI_HC_FAST_LOAD_VALUE(p);  /* code_len == 0..17 */
+    if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
+      BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p));
+      ProcessSingleCodeLength(code_len, &h->symbol, &h->repeat, &h->space,
+          &h->prev_code_len, h->symbol_lists, h->code_length_histo,
+          h->next_symbol);
+    } else {  /* code_len == 16..17, extra_bits == 2..3 */
+      uint32_t extra_bits = code_len - 14U;
+      uint32_t repeat_delta = (bits >> BROTLI_HC_FAST_LOAD_BITS(p)) &
+          BitMask(extra_bits);
+      if (available_bits < BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits) {
+        get_byte = BROTLI_TRUE;
+        continue;
+      }
+      BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits);
+      ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
+          &h->symbol, &h->repeat, &h->space, &h->prev_code_len,
+          &h->repeat_code_len, h->symbol_lists, h->code_length_histo,
+          h->next_symbol);
+    }
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Reads and decodes 15..18 codes using static prefix code.
+   Each code is 2..4 bits long. In total 30..72 bits are used. */
+static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  uint32_t num_codes = h->repeat;
+  unsigned space = h->space;
+  uint32_t i = h->sub_loop_counter;
+  for (; i < BROTLI_CODE_LENGTH_CODES; ++i) {
+    const uint8_t code_len_idx = kCodeLengthCodeOrder[i];
+    uint32_t ix;
+    uint32_t v;
+    if (BROTLI_PREDICT_FALSE(!BrotliSafeGetBits(br, 4, &ix))) {
+      uint32_t available_bits = BrotliGetAvailableBits(br);
+      if (available_bits != 0) {
+        ix = BrotliGetBitsUnmasked(br) & 0xF;
+      } else {
+        ix = 0;
+      }
+      if (kCodeLengthPrefixLength[ix] > available_bits) {
+        h->sub_loop_counter = i;
+        h->repeat = num_codes;
+        h->space = space;
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+    }
+    v = kCodeLengthPrefixValue[ix];
+    BrotliDropBits(br, kCodeLengthPrefixLength[ix]);
+    h->code_length_code_lengths[code_len_idx] = (uint8_t)v;
+    BROTLI_LOG_ARRAY_INDEX(h->code_length_code_lengths, code_len_idx);
+    if (v != 0) {
+      space = space - (32U >> v);
+      ++num_codes;
+      ++h->code_length_histo[v];
+      if (space - 1U >= 32U) {
+        /* space is 0 or wrapped around. */
+        break;
+      }
+    }
+  }
+  if (!(num_codes == 1 || space == 0)) {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_CL_SPACE);
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Decodes the Huffman tables.
+   There are 2 scenarios:
+    A) Huffman code contains only few symbols (1..4). Those symbols are read
+       directly; their code lengths are defined by the number of symbols.
+       For this scenario 4 - 49 bits will be read.
+
+    B) 2-phase decoding:
+    B.1) Small Huffman table is decoded; it is specified with code lengths
+         encoded with predefined entropy code. 32 - 74 bits are used.
+    B.2) Decoded table is used to decode code lengths of symbols in resulting
+         Huffman table. In worst case 3520 bits are read. */
+static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size_max,
+                                              uint32_t alphabet_size_limit,
+                                              HuffmanCode* table,
+                                              uint32_t* opt_table_size,
+                                              BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  /* State machine. */
+  for (;;) {
+    switch (h->substate_huffman) {
+      case BROTLI_STATE_HUFFMAN_NONE:
+        if (!BrotliSafeReadBits(br, 2, &h->sub_loop_counter)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        BROTLI_LOG_UINT(h->sub_loop_counter);
+        /* The value is used as follows:
+           1 for simple code;
+           0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
+        if (h->sub_loop_counter != 1) {
+          h->space = 32;
+          h->repeat = 0;  /* num_codes */
+          memset(&h->code_length_histo[0], 0, sizeof(h->code_length_histo[0]) *
+              (BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1));
+          memset(&h->code_length_code_lengths[0], 0,
+              sizeof(h->code_length_code_lengths));
+          h->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
+          continue;
+        }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE:
+        /* Read symbols, codes & code lengths directly. */
+        if (!BrotliSafeReadBits(br, 2, &h->symbol)) {  /* num_symbols */
+          h->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE;
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        h->sub_loop_counter = 0;
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_SIMPLE_READ: {
+        BrotliDecoderErrorCode result =
+            ReadSimpleHuffmanSymbols(alphabet_size_max, alphabet_size_limit, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: {
+        uint32_t table_size;
+        if (h->symbol == 3) {
+          uint32_t bits;
+          if (!BrotliSafeReadBits(br, 1, &bits)) {
+            h->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_BUILD;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          h->symbol += bits;
+        }
+        BROTLI_LOG_UINT(h->symbol);
+        table_size = BrotliBuildSimpleHuffmanTable(
+            table, HUFFMAN_TABLE_BITS, h->symbols_lists_array, h->symbol);
+        if (opt_table_size) {
+          *opt_table_size = table_size;
+        }
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+        return BROTLI_DECODER_SUCCESS;
+      }
+
+      /* Decode Huffman-coded code lengths. */
+      case BROTLI_STATE_HUFFMAN_COMPLEX: {
+        uint32_t i;
+        BrotliDecoderErrorCode result = ReadCodeLengthCodeLengths(s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+        BrotliBuildCodeLengthsHuffmanTable(h->table,
+                                           h->code_length_code_lengths,
+                                           h->code_length_histo);
+        memset(&h->code_length_histo[0], 0, sizeof(h->code_length_histo));
+        for (i = 0; i <= BROTLI_HUFFMAN_MAX_CODE_LENGTH; ++i) {
+          h->next_symbol[i] = (int)i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+          h->symbol_lists[h->next_symbol[i]] = 0xFFFF;
+        }
+
+        h->symbol = 0;
+        h->prev_code_len = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
+        h->repeat = 0;
+        h->repeat_code_len = 0;
+        h->space = 32768;
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: {
+        uint32_t table_size;
+        BrotliDecoderErrorCode result = ReadSymbolCodeLengths(
+            alphabet_size_limit, s);
+        if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
+          result = SafeReadSymbolCodeLengths(alphabet_size_limit, s);
+        }
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+
+        if (h->space != 0) {
+          BROTLI_LOG(("[ReadHuffmanCode] space = %d\n", (int)h->space));
+          return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE);
+        }
+        table_size = BrotliBuildHuffmanTable(
+            table, HUFFMAN_TABLE_BITS, h->symbol_lists, h->code_length_histo);
+        if (opt_table_size) {
+          *opt_table_size = table_size;
+        }
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+        return BROTLI_DECODER_SUCCESS;
+      }
+
+      default:
+        return
+            BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+    }
+  }
+}
+
+/* Decodes a block length by reading 3..39 bits. */
+static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table,
+                                              BrotliBitReader* br) {
+  uint32_t code;
+  uint32_t nbits;
+  code = ReadSymbol(table, br);
+  nbits = kBlockLengthPrefixCode[code].nbits;  /* nbits == 2..24 */
+  return kBlockLengthPrefixCode[code].offset + BrotliReadBits24(br, nbits);
+}
+
+/* WARNING: if state is not BROTLI_STATE_READ_BLOCK_LENGTH_NONE, then
+   reading can't be continued with ReadBlockLength. */
+static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength(
+    BrotliDecoderState* s, uint32_t* result, const HuffmanCode* table,
+    BrotliBitReader* br) {
+  uint32_t index;
+  if (s->substate_read_block_length == BROTLI_STATE_READ_BLOCK_LENGTH_NONE) {
+    if (!SafeReadSymbol(table, br, &index)) {
+      return BROTLI_FALSE;
+    }
+  } else {
+    index = s->block_length_index;
+  }
+  {
+    uint32_t bits;
+    uint32_t nbits = kBlockLengthPrefixCode[index].nbits;  /* nbits == 2..24 */
+    if (!BrotliSafeReadBits(br, nbits, &bits)) {
+      s->block_length_index = index;
+      s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX;
+      return BROTLI_FALSE;
+    }
+    *result = kBlockLengthPrefixCode[index].offset + bits;
+    s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+    return BROTLI_TRUE;
+  }
+}
+
+/* Transform:
+    1) initialize list L with values 0, 1,... 255
+    2) For each input element X:
+    2.1) let Y = L[X]
+    2.2) remove X-th element from L
+    2.3) prepend Y to L
+    2.4) append Y to output
+
+   In most cases max(Y) <= 7, so most of L remains intact.
+   To reduce the cost of initialization, we reuse L, remember the upper bound
+   of Y values, and reinitialize only first elements in L.
+
+   Most of input values are 0 and 1. To reduce number of branches, we replace
+   inner for loop with do-while. */
+static BROTLI_NOINLINE void InverseMoveToFrontTransform(
+    uint8_t* v, uint32_t v_len, BrotliDecoderState* state) {
+  /* Reinitialize elements that could have been changed. */
+  uint32_t i = 1;
+  uint32_t upper_bound = state->mtf_upper_bound;
+  uint32_t* mtf = &state->mtf[1];  /* Make mtf[-1] addressable. */
+  uint8_t* mtf_u8 = (uint8_t*)mtf;
+  /* Load endian-aware constant. */
+  const uint8_t b0123[4] = {0, 1, 2, 3};
+  uint32_t pattern;
+  memcpy(&pattern, &b0123, 4);
+
+  /* Initialize list using 4 consequent values pattern. */
+  mtf[0] = pattern;
+  do {
+    pattern += 0x04040404;  /* Advance all 4 values by 4. */
+    mtf[i] = pattern;
+    i++;
+  } while (i <= upper_bound);
+
+  /* Transform the input. */
+  upper_bound = 0;
+  for (i = 0; i < v_len; ++i) {
+    int index = v[i];
+    uint8_t value = mtf_u8[index];
+    upper_bound |= v[i];
+    v[i] = value;
+    mtf_u8[-1] = value;
+    do {
+      index--;
+      mtf_u8[index + 1] = mtf_u8[index];
+    } while (index >= 0);
+  }
+  /* Remember amount of elements to be reinitialized. */
+  state->mtf_upper_bound = upper_bound >> 2;
+}
+
+/* Decodes a series of Huffman table using ReadHuffmanCode function. */
+static BrotliDecoderErrorCode HuffmanTreeGroupDecode(
+    HuffmanTreeGroup* group, BrotliDecoderState* s) {
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+  if (h->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) {
+    h->next = group->codes;
+    h->htree_index = 0;
+    h->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP;
+  }
+  while (h->htree_index < group->num_htrees) {
+    uint32_t table_size;
+    BrotliDecoderErrorCode result = ReadHuffmanCode(group->alphabet_size_max,
+        group->alphabet_size_limit, h->next, &table_size, s);
+    if (result != BROTLI_DECODER_SUCCESS) return result;
+    group->htrees[h->htree_index] = h->next;
+    h->next += table_size;
+    ++h->htree_index;
+  }
+  h->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Decodes a context map.
+   Decoding is done in 4 phases:
+    1) Read auxiliary information (6..16 bits) and allocate memory.
+       In case of trivial context map, decoding is finished at this phase.
+    2) Decode Huffman table using ReadHuffmanCode function.
+       This table will be used for reading context map items.
+    3) Read context map items; "0" values could be run-length encoded.
+    4) Optionally, apply InverseMoveToFront transform to the resulting map. */
+static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
+                                               uint32_t* num_htrees,
+                                               uint8_t** context_map_arg,
+                                               BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
+  BrotliMetablockHeaderArena* h = &s->arena.header;
+
+  switch ((int)h->substate_context_map) {
+    case BROTLI_STATE_CONTEXT_MAP_NONE:
+      result = DecodeVarLenUint8(s, br, num_htrees);
+      if (result != BROTLI_DECODER_SUCCESS) {
+        return result;
+      }
+      (*num_htrees)++;
+      h->context_index = 0;
+      BROTLI_LOG_UINT(context_map_size);
+      BROTLI_LOG_UINT(*num_htrees);
+      *context_map_arg =
+          (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)context_map_size);
+      if (*context_map_arg == 0) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MAP);
+      }
+      if (*num_htrees <= 1) {
+        memset(*context_map_arg, 0, (size_t)context_map_size);
+        return BROTLI_DECODER_SUCCESS;
+      }
+      h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX;
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: {
+      uint32_t bits;
+      /* In next stage ReadHuffmanCode uses at least 4 bits, so it is safe
+         to peek 4 bits ahead. */
+      if (!BrotliSafeGetBits(br, 5, &bits)) {
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if ((bits & 1) != 0) { /* Use RLE for zeros. */
+        h->max_run_length_prefix = (bits >> 1) + 1;
+        BrotliDropBits(br, 5);
+      } else {
+        h->max_run_length_prefix = 0;
+        BrotliDropBits(br, 1);
+      }
+      BROTLI_LOG_UINT(h->max_run_length_prefix);
+      h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN;
+    }
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: {
+      uint32_t alphabet_size = *num_htrees + h->max_run_length_prefix;
+      result = ReadHuffmanCode(alphabet_size, alphabet_size,
+                               h->context_map_table, NULL, s);
+      if (result != BROTLI_DECODER_SUCCESS) return result;
+      h->code = 0xFFFF;
+      h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE;
+    }
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_DECODE: {
+      uint32_t context_index = h->context_index;
+      uint32_t max_run_length_prefix = h->max_run_length_prefix;
+      uint8_t* context_map = *context_map_arg;
+      uint32_t code = h->code;
+      BROTLI_BOOL skip_preamble = (code != 0xFFFF);
+      while (context_index < context_map_size || skip_preamble) {
+        if (!skip_preamble) {
+          if (!SafeReadSymbol(h->context_map_table, br, &code)) {
+            h->code = 0xFFFF;
+            h->context_index = context_index;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          BROTLI_LOG_UINT(code);
+
+          if (code == 0) {
+            context_map[context_index++] = 0;
+            continue;
+          }
+          if (code > max_run_length_prefix) {
+            context_map[context_index++] =
+                (uint8_t)(code - max_run_length_prefix);
+            continue;
+          }
+        } else {
+          skip_preamble = BROTLI_FALSE;
+        }
+        /* RLE sub-stage. */
+        {
+          uint32_t reps;
+          if (!BrotliSafeReadBits(br, code, &reps)) {
+            h->code = code;
+            h->context_index = context_index;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          reps += 1U << code;
+          BROTLI_LOG_UINT(reps);
+          if (context_index + reps > context_map_size) {
+            return
+                BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_CONTEXT_MAP_REPEAT);
+          }
+          do {
+            context_map[context_index++] = 0;
+          } while (--reps);
+        }
+      }
+    }
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: {
+      uint32_t bits;
+      if (!BrotliSafeReadBits(br, 1, &bits)) {
+        h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_TRANSFORM;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if (bits != 0) {
+        InverseMoveToFrontTransform(*context_map_arg, context_map_size, s);
+      }
+      h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
+      return BROTLI_DECODER_SUCCESS;
+    }
+
+    default:
+      return
+          BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+  }
+}
+
+/* Decodes a command or literal and updates block type ring-buffer.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength(
+    int safe, BrotliDecoderState* s, int tree_type) {
+  uint32_t max_block_type = s->num_block_types[tree_type];
+  const HuffmanCode* type_tree = &s->block_type_trees[
+      tree_type * BROTLI_HUFFMAN_MAX_SIZE_258];
+  const HuffmanCode* len_tree = &s->block_len_trees[
+      tree_type * BROTLI_HUFFMAN_MAX_SIZE_26];
+  BrotliBitReader* br = &s->br;
+  uint32_t* ringbuffer = &s->block_type_rb[tree_type * 2];
+  uint32_t block_type;
+  if (max_block_type <= 1) {
+    return BROTLI_FALSE;
+  }
+
+  /* Read 0..15 + 3..39 bits. */
+  if (!safe) {
+    block_type = ReadSymbol(type_tree, br);
+    s->block_length[tree_type] = ReadBlockLength(len_tree, br);
+  } else {
+    BrotliBitReaderState memento;
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(type_tree, br, &block_type)) return BROTLI_FALSE;
+    if (!SafeReadBlockLength(s, &s->block_length[tree_type], len_tree, br)) {
+      s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+      BrotliBitReaderRestoreState(br, &memento);
+      return BROTLI_FALSE;
+    }
+  }
+
+  if (block_type == 1) {
+    block_type = ringbuffer[1] + 1;
+  } else if (block_type == 0) {
+    block_type = ringbuffer[0];
+  } else {
+    block_type -= 2;
+  }
+  if (block_type >= max_block_type) {
+    block_type -= max_block_type;
+  }
+  ringbuffer[0] = ringbuffer[1];
+  ringbuffer[1] = block_type;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void DetectTrivialLiteralBlockTypes(
+    BrotliDecoderState* s) {
+  size_t i;
+  for (i = 0; i < 8; ++i) s->trivial_literal_contexts[i] = 0;
+  for (i = 0; i < s->num_block_types[0]; i++) {
+    size_t offset = i << BROTLI_LITERAL_CONTEXT_BITS;
+    size_t error = 0;
+    size_t sample = s->context_map[offset];
+    size_t j;
+    for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS);) {
+      BROTLI_REPEAT(4, error |= s->context_map[offset + j++] ^ sample;)
+    }
+    if (error == 0) {
+      s->trivial_literal_contexts[i >> 5] |= 1u << (i & 31);
+    }
+  }
+}
+
+static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) {
+  uint8_t context_mode;
+  size_t trivial;
+  uint32_t block_type = s->block_type_rb[1];
+  uint32_t context_offset = block_type << BROTLI_LITERAL_CONTEXT_BITS;
+  s->context_map_slice = s->context_map + context_offset;
+  trivial = s->trivial_literal_contexts[block_type >> 5];
+  s->trivial_literal_context = (trivial >> (block_type & 31)) & 1;
+  s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]];
+  context_mode = s->context_modes[block_type] & 3;
+  s->context_lookup = BROTLI_CONTEXT_LUT(context_mode);
+}
+
+/* Decodes the block type and updates the state for literal context.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeLiteralBlockSwitchInternal(
+    int safe, BrotliDecoderState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 0)) {
+    return BROTLI_FALSE;
+  }
+  PrepareLiteralDecoding(s);
+  return BROTLI_TRUE;
+}
+
+static void BROTLI_NOINLINE DecodeLiteralBlockSwitch(BrotliDecoderState* s) {
+  DecodeLiteralBlockSwitchInternal(0, s);
+}
+
+static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeLiteralBlockSwitch(
+    BrotliDecoderState* s) {
+  return DecodeLiteralBlockSwitchInternal(1, s);
+}
+
+/* Block switch for insert/copy length.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeCommandBlockSwitchInternal(
+    int safe, BrotliDecoderState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 1)) {
+    return BROTLI_FALSE;
+  }
+  s->htree_command = s->insert_copy_hgroup.htrees[s->block_type_rb[3]];
+  return BROTLI_TRUE;
+}
+
+static void BROTLI_NOINLINE DecodeCommandBlockSwitch(BrotliDecoderState* s) {
+  DecodeCommandBlockSwitchInternal(0, s);
+}
+
+static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeCommandBlockSwitch(
+    BrotliDecoderState* s) {
+  return DecodeCommandBlockSwitchInternal(1, s);
+}
+
+/* Block switch for distance codes.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeDistanceBlockSwitchInternal(
+    int safe, BrotliDecoderState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 2)) {
+    return BROTLI_FALSE;
+  }
+  s->dist_context_map_slice = s->dist_context_map +
+      (s->block_type_rb[5] << BROTLI_DISTANCE_CONTEXT_BITS);
+  s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
+  return BROTLI_TRUE;
+}
+
+static void BROTLI_NOINLINE DecodeDistanceBlockSwitch(BrotliDecoderState* s) {
+  DecodeDistanceBlockSwitchInternal(0, s);
+}
+
+static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeDistanceBlockSwitch(
+    BrotliDecoderState* s) {
+  return DecodeDistanceBlockSwitchInternal(1, s);
+}
+
+static size_t UnwrittenBytes(const BrotliDecoderState* s, BROTLI_BOOL wrap) {
+  size_t pos = wrap && s->pos > s->ringbuffer_size ?
+      (size_t)s->ringbuffer_size : (size_t)(s->pos);
+  size_t partial_pos_rb = (s->rb_roundtrips * (size_t)s->ringbuffer_size) + pos;
+  return partial_pos_rb - s->partial_pos_out;
+}
+
+/* Dumps output.
+   Returns BROTLI_DECODER_NEEDS_MORE_OUTPUT only if there is more output to push
+   and either ring-buffer is as big as window size, or |force| is true. */
+static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer(
+    BrotliDecoderState* s, size_t* available_out, uint8_t** next_out,
+    size_t* total_out, BROTLI_BOOL force) {
+  uint8_t* start =
+      s->ringbuffer + (s->partial_pos_out & (size_t)s->ringbuffer_mask);
+  size_t to_write = UnwrittenBytes(s, BROTLI_TRUE);
+  size_t num_written = *available_out;
+  if (num_written > to_write) {
+    num_written = to_write;
+  }
+  if (s->meta_block_remaining_len < 0) {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_1);
+  }
+  if (next_out && !*next_out) {
+    *next_out = start;
+  } else {
+    if (next_out) {
+      memcpy(*next_out, start, num_written);
+      *next_out += num_written;
+    }
+  }
+  *available_out -= num_written;
+  BROTLI_LOG_UINT(to_write);
+  BROTLI_LOG_UINT(num_written);
+  s->partial_pos_out += num_written;
+  if (total_out) {
+    *total_out = s->partial_pos_out;
+  }
+  if (num_written < to_write) {
+    if (s->ringbuffer_size == (1 << s->window_bits) || force) {
+      return BROTLI_DECODER_NEEDS_MORE_OUTPUT;
+    } else {
+      return BROTLI_DECODER_SUCCESS;
+    }
+  }
+  /* Wrap ring buffer only if it has reached its maximal size. */
+  if (s->ringbuffer_size == (1 << s->window_bits) &&
+      s->pos >= s->ringbuffer_size) {
+    s->pos -= s->ringbuffer_size;
+    s->rb_roundtrips++;
+    s->should_wrap_ringbuffer = (size_t)s->pos != 0 ? 1 : 0;
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) {
+  if (s->should_wrap_ringbuffer) {
+    memcpy(s->ringbuffer, s->ringbuffer_end, (size_t)s->pos);
+    s->should_wrap_ringbuffer = 0;
+  }
+}
+
+/* Allocates ring-buffer.
+
+   s->ringbuffer_size MUST be updated by BrotliCalculateRingBufferSize before
+   this function is called.
+
+   Last two bytes of ring-buffer are initialized to 0, so context calculation
+   could be done uniformly for the first two and all other positions. */
+static BROTLI_BOOL BROTLI_NOINLINE BrotliEnsureRingBuffer(
+    BrotliDecoderState* s) {
+  uint8_t* old_ringbuffer = s->ringbuffer;
+  if (s->ringbuffer_size == s->new_ringbuffer_size) {
+    return BROTLI_TRUE;
+  }
+
+  s->ringbuffer = (uint8_t*)BROTLI_DECODER_ALLOC(s,
+      (size_t)(s->new_ringbuffer_size) + kRingBufferWriteAheadSlack);
+  if (s->ringbuffer == 0) {
+    /* Restore previous value. */
+    s->ringbuffer = old_ringbuffer;
+    return BROTLI_FALSE;
+  }
+  s->ringbuffer[s->new_ringbuffer_size - 2] = 0;
+  s->ringbuffer[s->new_ringbuffer_size - 1] = 0;
+
+  if (!!old_ringbuffer) {
+    memcpy(s->ringbuffer, old_ringbuffer, (size_t)s->pos);
+    BROTLI_DECODER_FREE(s, old_ringbuffer);
+  }
+
+  s->ringbuffer_size = s->new_ringbuffer_size;
+  s->ringbuffer_mask = s->new_ringbuffer_size - 1;
+  s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
+
+  return BROTLI_TRUE;
+}
+
+static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
+    size_t* available_out, uint8_t** next_out, size_t* total_out,
+    BrotliDecoderState* s) {
+  /* TODO: avoid allocation for single uncompressed block. */
+  if (!BrotliEnsureRingBuffer(s)) {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_1);
+  }
+
+  /* State machine */
+  for (;;) {
+    switch (s->substate_uncompressed) {
+      case BROTLI_STATE_UNCOMPRESSED_NONE: {
+        int nbytes = (int)BrotliGetRemainingBytes(&s->br);
+        if (nbytes > s->meta_block_remaining_len) {
+          nbytes = s->meta_block_remaining_len;
+        }
+        if (s->pos + nbytes > s->ringbuffer_size) {
+          nbytes = s->ringbuffer_size - s->pos;
+        }
+        /* Copy remaining bytes from s->br.buf_ to ring-buffer. */
+        BrotliCopyBytes(&s->ringbuffer[s->pos], &s->br, (size_t)nbytes);
+        s->pos += nbytes;
+        s->meta_block_remaining_len -= nbytes;
+        if (s->pos < 1 << s->window_bits) {
+          if (s->meta_block_remaining_len == 0) {
+            return BROTLI_DECODER_SUCCESS;
+          }
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_UNCOMPRESSED_WRITE: {
+        BrotliDecoderErrorCode result;
+        result = WriteRingBuffer(
+            s, available_out, next_out, total_out, BROTLI_FALSE);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+        if (s->ringbuffer_size == 1 << s->window_bits) {
+          s->max_distance = s->max_backward_distance;
+        }
+        s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
+        break;
+      }
+    }
+  }
+  BROTLI_DCHECK(0);  /* Unreachable */
+}
+
+/* Calculates the smallest feasible ring buffer.
+
+   If we know the data size is small, do not allocate more ring buffer
+   size than needed to reduce memory usage.
+
+   When this method is called, metablock size and flags MUST be decoded. */
+static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(
+    BrotliDecoderState* s) {
+  int window_size = 1 << s->window_bits;
+  int new_ringbuffer_size = window_size;
+  /* We need at least 2 bytes of ring buffer size to get the last two
+     bytes for context from there */
+  int min_size = s->ringbuffer_size ? s->ringbuffer_size : 1024;
+  int output_size;
+
+  /* If maximum is already reached, no further extension is retired. */
+  if (s->ringbuffer_size == window_size) {
+    return;
+  }
+
+  /* Metadata blocks does not touch ring buffer. */
+  if (s->is_metadata) {
+    return;
+  }
+
+  if (!s->ringbuffer) {
+    output_size = 0;
+  } else {
+    output_size = s->pos;
+  }
+  output_size += s->meta_block_remaining_len;
+  min_size = min_size < output_size ? output_size : min_size;
+
+  if (!!s->canny_ringbuffer_allocation) {
+    /* Reduce ring buffer size to save memory when server is unscrupulous.
+       In worst case memory usage might be 1.5x bigger for a short period of
+       ring buffer reallocation. */
+    while ((new_ringbuffer_size >> 1) >= min_size) {
+      new_ringbuffer_size >>= 1;
+    }
+  }
+
+  s->new_ringbuffer_size = new_ringbuffer_size;
+}
+
+/* Reads 1..256 2-bit context modes. */
+static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  int i = s->loop_counter;
+
+  while (i < (int)s->num_block_types[0]) {
+    uint32_t bits;
+    if (!BrotliSafeReadBits(br, 2, &bits)) {
+      s->loop_counter = i;
+      return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    }
+    s->context_modes[i] = (uint8_t)bits;
+    BROTLI_LOG_ARRAY_INDEX(s->context_modes, i);
+    i++;
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
+  int offset = s->distance_code - 3;
+  if (s->distance_code <= 3) {
+    /* Compensate double distance-ring-buffer roll for dictionary items. */
+    s->distance_context = 1 >> s->distance_code;
+    s->distance_code = s->dist_rb[(s->dist_rb_idx - offset) & 3];
+    s->dist_rb_idx -= s->distance_context;
+  } else {
+    int index_delta = 3;
+    int delta;
+    int base = s->distance_code - 10;
+    if (s->distance_code < 10) {
+      base = s->distance_code - 4;
+    } else {
+      index_delta = 2;
+    }
+    /* Unpack one of six 4-bit values. */
+    delta = ((0x605142 >> (4 * base)) & 0xF) - 3;
+    s->distance_code = s->dist_rb[(s->dist_rb_idx + index_delta) & 0x3] + delta;
+    if (s->distance_code <= 0) {
+      /* A huge distance will cause a BROTLI_FAILURE() soon.
+         This is a little faster than failing here. */
+      s->distance_code = 0x7FFFFFFF;
+    }
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  if (n_bits != 0) {
+    return BrotliSafeReadBits(br, n_bits, val);
+  } else {
+    *val = 0;
+    return BROTLI_TRUE;
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadBits32(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  if (n_bits != 0) {
+    return BrotliSafeReadBits32(br, n_bits, val);
+  } else {
+    *val = 0;
+    return BROTLI_TRUE;
+  }
+}
+
+/*
+   RFC 7932 Section 4 with "..." shortenings and "[]" emendations.
+
+   Each distance ... is represented with a pair <distance code, extra bits>...
+   The distance code is encoded using a prefix code... The number of extra bits
+   can be 0..24... Two additional parameters: NPOSTFIX (0..3), and ...
+   NDIRECT (0..120) ... are encoded in the meta-block header...
+
+   The first 16 distance symbols ... reference past distances... ring buffer ...
+   Next NDIRECT distance symbols ... represent distances from 1 to NDIRECT...
+   [For] distance symbols 16 + NDIRECT and greater ... the number of extra bits
+   ... is given by the following formula:
+
+   [ xcode = dcode - NDIRECT - 16 ]
+   ndistbits = 1 + [ xcode ] >> (NPOSTFIX + 1)
+
+   ...
+*/
+
+/*
+   RFC 7932 Section 9.2 with "..." shortenings and "[]" emendations.
+
+   ... to get the actual value of the parameter NDIRECT, left-shift this
+   four-bit number by NPOSTFIX bits ...
+*/
+
+/* Remaining formulas from RFC 7932 Section 4 could be rewritten as following:
+
+     alphabet_size = 16 + NDIRECT + (max_distbits << (NPOSTFIX + 1))
+
+     half = ((xcode >> NPOSTFIX) & 1) << ndistbits
+     postfix = xcode & ((1 << NPOSTFIX) - 1)
+     range_start = 2 * (1 << ndistbits - 1 - 1)
+
+     distance = (range_start + half + extra) << NPOSTFIX + postfix + NDIRECT + 1
+
+   NB: ndistbits >= 1 -> range_start >= 0
+   NB: range_start has factor 2, as the range is covered by 2 "halves"
+   NB: extra -1 offset in range_start formula covers the absence of
+       ndistbits = 0 case
+   NB: when NPOSTFIX = 0, NDIRECT is not greater than 15
+
+   In other words, xcode has the following binary structure - XXXHPPP:
+    - XXX represent the number of extra distance bits
+    - H selects upper / lower range of distances
+    - PPP represent "postfix"
+
+  "Regular" distance encoding has NPOSTFIX = 0; omitting the postfix part
+  simplifies distance calculation.
+
+  Using NPOSTFIX > 0 allows cheaper encoding of regular structures, e.g. where
+  most of distances have the same reminder of division by 2/4/8. For example,
+  the table of int32_t values that come from different sources; if it is likely
+  that 3 highest bytes of values from the same source are the same, then
+  copy distance often looks like 4x + y.
+
+  Distance calculation could be rewritten to:
+
+    ndistbits = NDISTBITS(NDIRECT, NPOSTFIX)[dcode]
+    distance = OFFSET(NDIRECT, NPOSTFIX)[dcode] + extra << NPOSTFIX
+
+  NDISTBITS and OFFSET could be pre-calculated, as NDIRECT and NPOSTFIX could
+  change only once per meta-block.
+*/
+
+/* Calculates distance lookup table.
+   NB: it is possible to have all 64 tables precalculated. */
+static void CalculateDistanceLut(BrotliDecoderState* s) {
+  BrotliMetablockBodyArena* b = &s->arena.body;
+  uint32_t npostfix = s->distance_postfix_bits;
+  uint32_t ndirect = s->num_direct_distance_codes;
+  uint32_t alphabet_size_limit = s->distance_hgroup.alphabet_size_limit;
+  uint32_t postfix = 1u << npostfix;
+  uint32_t j;
+  uint32_t bits = 1;
+  uint32_t half = 0;
+
+  /* Skip short codes. */
+  uint32_t i = BROTLI_NUM_DISTANCE_SHORT_CODES;
+
+  /* Fill direct codes. */
+  for (j = 0; j < ndirect; ++j) {
+    b->dist_extra_bits[i] = 0;
+    b->dist_offset[i] = j + 1;
+    ++i;
+  }
+
+  /* Fill regular distance codes. */
+  while (i < alphabet_size_limit) {
+    uint32_t base = ndirect + ((((2 + half) << bits) - 4) << npostfix) + 1;
+    /* Always fill the complete group. */
+    for (j = 0; j < postfix; ++j) {
+      b->dist_extra_bits[i] = (uint8_t)bits;
+      b->dist_offset[i] = base + j;
+      ++i;
+    }
+    bits = bits + half;
+    half = half ^ 1;
+  }
+}
+
+/* Precondition: s->distance_code < 0. */
+static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
+    int safe, BrotliDecoderState* s, BrotliBitReader* br) {
+  BrotliMetablockBodyArena* b = &s->arena.body;
+  uint32_t code;
+  uint32_t bits;
+  BrotliBitReaderState memento;
+  HuffmanCode* distance_tree = s->distance_hgroup.htrees[s->dist_htree_index];
+  if (!safe) {
+    code = ReadSymbol(distance_tree, br);
+  } else {
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(distance_tree, br, &code)) {
+      return BROTLI_FALSE;
+    }
+  }
+  --s->block_length[2];
+  /* Convert the distance code to the actual distance by possibly
+     looking up past distances from the s->dist_rb. */
+  s->distance_context = 0;
+  if ((code & ~0xFu) == 0) {
+    s->distance_code = (int)code;
+    TakeDistanceFromRingBuffer(s);
+    return BROTLI_TRUE;
+  }
+  if (!safe) {
+    bits = BrotliReadBits32(br, b->dist_extra_bits[code]);
+  } else {
+    if (!SafeReadBits32(br, b->dist_extra_bits[code], &bits)) {
+      ++s->block_length[2];
+      BrotliBitReaderRestoreState(br, &memento);
+      return BROTLI_FALSE;
+    }
+  }
+  s->distance_code =
+      (int)(b->dist_offset[code] + (bits << s->distance_postfix_bits));
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void ReadDistance(
+    BrotliDecoderState* s, BrotliBitReader* br) {
+  ReadDistanceInternal(0, s, br);
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadDistance(
+    BrotliDecoderState* s, BrotliBitReader* br) {
+  return ReadDistanceInternal(1, s, br);
+}
+
+static BROTLI_INLINE BROTLI_BOOL ReadCommandInternal(
+    int safe, BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
+  uint32_t cmd_code;
+  uint32_t insert_len_extra = 0;
+  uint32_t copy_length;
+  CmdLutElement v;
+  BrotliBitReaderState memento;
+  if (!safe) {
+    cmd_code = ReadSymbol(s->htree_command, br);
+  } else {
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(s->htree_command, br, &cmd_code)) {
+      return BROTLI_FALSE;
+    }
+  }
+  v = kCmdLut[cmd_code];
+  s->distance_code = v.distance_code;
+  s->distance_context = v.context;
+  s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
+  *insert_length = v.insert_len_offset;
+  if (!safe) {
+    if (BROTLI_PREDICT_FALSE(v.insert_len_extra_bits != 0)) {
+      insert_len_extra = BrotliReadBits24(br, v.insert_len_extra_bits);
+    }
+    copy_length = BrotliReadBits24(br, v.copy_len_extra_bits);
+  } else {
+    if (!SafeReadBits(br, v.insert_len_extra_bits, &insert_len_extra) ||
+        !SafeReadBits(br, v.copy_len_extra_bits, &copy_length)) {
+      BrotliBitReaderRestoreState(br, &memento);
+      return BROTLI_FALSE;
+    }
+  }
+  s->copy_length = (int)copy_length + v.copy_len_offset;
+  --s->block_length[1];
+  *insert_length += (int)insert_len_extra;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void ReadCommand(
+    BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
+  ReadCommandInternal(0, s, br, insert_length);
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadCommand(
+    BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
+  return ReadCommandInternal(1, s, br, insert_length);
+}
+
+static BROTLI_INLINE BROTLI_BOOL CheckInputAmount(
+    int safe, BrotliBitReader* const br, size_t num) {
+  if (safe) {
+    return BROTLI_TRUE;
+  }
+  return BrotliCheckInputAmount(br, num);
+}
+
+#define BROTLI_SAFE(METHOD)                       \
+  {                                               \
+    if (safe) {                                   \
+      if (!Safe##METHOD) {                        \
+        result = BROTLI_DECODER_NEEDS_MORE_INPUT; \
+        goto saveStateAndReturn;                  \
+      }                                           \
+    } else {                                      \
+      METHOD;                                     \
+    }                                             \
+  }
+
+static BROTLI_INLINE BrotliDecoderErrorCode ProcessCommandsInternal(
+    int safe, BrotliDecoderState* s) {
+  int pos = s->pos;
+  int i = s->loop_counter;
+  BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
+  BrotliBitReader* br = &s->br;
+
+  if (!CheckInputAmount(safe, br, 28)) {
+    result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+    goto saveStateAndReturn;
+  }
+  if (!safe) {
+    BROTLI_UNUSED(BrotliWarmupBitReader(br));
+  }
+
+  /* Jump into state machine. */
+  if (s->state == BROTLI_STATE_COMMAND_BEGIN) {
+    goto CommandBegin;
+  } else if (s->state == BROTLI_STATE_COMMAND_INNER) {
+    goto CommandInner;
+  } else if (s->state == BROTLI_STATE_COMMAND_POST_DECODE_LITERALS) {
+    goto CommandPostDecodeLiterals;
+  } else if (s->state == BROTLI_STATE_COMMAND_POST_WRAP_COPY) {
+    goto CommandPostWrapCopy;
+  } else {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+  }
+
+CommandBegin:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_BEGIN;
+  }
+  if (!CheckInputAmount(safe, br, 28)) {  /* 156 bits + 7 bytes */
+    s->state = BROTLI_STATE_COMMAND_BEGIN;
+    result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+    goto saveStateAndReturn;
+  }
+  if (BROTLI_PREDICT_FALSE(s->block_length[1] == 0)) {
+    BROTLI_SAFE(DecodeCommandBlockSwitch(s));
+    goto CommandBegin;
+  }
+  /* Read the insert/copy length in the command. */
+  BROTLI_SAFE(ReadCommand(s, br, &i));
+  BROTLI_LOG(("[ProcessCommandsInternal] pos = %d insert = %d copy = %d\n",
+              pos, i, s->copy_length));
+  if (i == 0) {
+    goto CommandPostDecodeLiterals;
+  }
+  s->meta_block_remaining_len -= i;
+
+CommandInner:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_INNER;
+  }
+  /* Read the literals in the command. */
+  if (s->trivial_literal_context) {
+    uint32_t bits;
+    uint32_t value;
+    PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
+    do {
+      if (!CheckInputAmount(safe, br, 28)) {  /* 162 bits + 7 bytes */
+        s->state = BROTLI_STATE_COMMAND_INNER;
+        result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+        goto saveStateAndReturn;
+      }
+      if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
+        BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
+        PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
+        if (!s->trivial_literal_context) goto CommandInner;
+      }
+      if (!safe) {
+        s->ringbuffer[pos] =
+            (uint8_t)ReadPreloadedSymbol(s->literal_htree, br, &bits, &value);
+      } else {
+        uint32_t literal;
+        if (!SafeReadSymbol(s->literal_htree, br, &literal)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          goto saveStateAndReturn;
+        }
+        s->ringbuffer[pos] = (uint8_t)literal;
+      }
+      --s->block_length[0];
+      BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
+      ++pos;
+      if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
+        s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
+        --i;
+        goto saveStateAndReturn;
+      }
+    } while (--i != 0);
+  } else {
+    uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
+    uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
+    do {
+      const HuffmanCode* hc;
+      uint8_t context;
+      if (!CheckInputAmount(safe, br, 28)) {  /* 162 bits + 7 bytes */
+        s->state = BROTLI_STATE_COMMAND_INNER;
+        result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+        goto saveStateAndReturn;
+      }
+      if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
+        BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
+        if (s->trivial_literal_context) goto CommandInner;
+      }
+      context = BROTLI_CONTEXT(p1, p2, s->context_lookup);
+      BROTLI_LOG_UINT(context);
+      hc = s->literal_hgroup.htrees[s->context_map_slice[context]];
+      p2 = p1;
+      if (!safe) {
+        p1 = (uint8_t)ReadSymbol(hc, br);
+      } else {
+        uint32_t literal;
+        if (!SafeReadSymbol(hc, br, &literal)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          goto saveStateAndReturn;
+        }
+        p1 = (uint8_t)literal;
+      }
+      s->ringbuffer[pos] = p1;
+      --s->block_length[0];
+      BROTLI_LOG_UINT(s->context_map_slice[context]);
+      BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos & s->ringbuffer_mask);
+      ++pos;
+      if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
+        s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
+        --i;
+        goto saveStateAndReturn;
+      }
+    } while (--i != 0);
+  }
+  BROTLI_LOG_UINT(s->meta_block_remaining_len);
+  if (BROTLI_PREDICT_FALSE(s->meta_block_remaining_len <= 0)) {
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  }
+
+CommandPostDecodeLiterals:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS;
+  }
+  if (s->distance_code >= 0) {
+    /* Implicit distance case. */
+    s->distance_context = s->distance_code ? 0 : 1;
+    --s->dist_rb_idx;
+    s->distance_code = s->dist_rb[s->dist_rb_idx & 3];
+  } else {
+    /* Read distance code in the command, unless it was implicitly zero. */
+    if (BROTLI_PREDICT_FALSE(s->block_length[2] == 0)) {
+      BROTLI_SAFE(DecodeDistanceBlockSwitch(s));
+    }
+    BROTLI_SAFE(ReadDistance(s, br));
+  }
+  BROTLI_LOG(("[ProcessCommandsInternal] pos = %d distance = %d\n",
+              pos, s->distance_code));
+  if (s->max_distance != s->max_backward_distance) {
+    s->max_distance =
+        (pos < s->max_backward_distance) ? pos : s->max_backward_distance;
+  }
+  i = s->copy_length;
+  /* Apply copy of LZ77 back-reference, or static dictionary reference if
+     the distance is larger than the max LZ77 distance */
+  if (s->distance_code > s->max_distance) {
+    /* The maximum allowed distance is BROTLI_MAX_ALLOWED_DISTANCE = 0x7FFFFFFC.
+       With this choice, no signed overflow can occur after decoding
+       a special distance code (e.g., after adding 3 to the last distance). */
+    if (s->distance_code > BROTLI_MAX_ALLOWED_DISTANCE) {
+      BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+          "len: %d bytes left: %d\n",
+          pos, s->distance_code, i, s->meta_block_remaining_len));
+      return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE);
+    }
+    if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
+        i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
+      int address = s->distance_code - s->max_distance - 1;
+      const BrotliDictionary* words = s->dictionary;
+      const BrotliTransforms* transforms = s->transforms;
+      int offset = (int)s->dictionary->offsets_by_length[i];
+      uint32_t shift = s->dictionary->size_bits_by_length[i];
+
+      int mask = (int)BitMask(shift);
+      int word_idx = address & mask;
+      int transform_idx = address >> shift;
+      /* Compensate double distance-ring-buffer roll. */
+      s->dist_rb_idx += s->distance_context;
+      offset += word_idx * i;
+      if (BROTLI_PREDICT_FALSE(!words->data)) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET);
+      }
+      if (transform_idx < (int)transforms->num_transforms) {
+        const uint8_t* word = &words->data[offset];
+        int len = i;
+        if (transform_idx == transforms->cutOffTransforms[0]) {
+          memcpy(&s->ringbuffer[pos], word, (size_t)len);
+          BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]\n",
+                      len, word));
+        } else {
+          len = BrotliTransformDictionaryWord(&s->ringbuffer[pos], word, len,
+              transforms, transform_idx);
+          BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s],"
+                      " transform_idx = %d, transformed: [%.*s]\n",
+                      i, word, transform_idx, len, &s->ringbuffer[pos]));
+        }
+        pos += len;
+        s->meta_block_remaining_len -= len;
+        if (pos >= s->ringbuffer_size) {
+          s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
+          goto saveStateAndReturn;
+        }
+      } else {
+        BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+            "len: %d bytes left: %d\n",
+            pos, s->distance_code, i, s->meta_block_remaining_len));
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_TRANSFORM);
+      }
+    } else {
+      BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+          "len: %d bytes left: %d\n",
+          pos, s->distance_code, i, s->meta_block_remaining_len));
+      return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DICTIONARY);
+    }
+  } else {
+    int src_start = (pos - s->distance_code) & s->ringbuffer_mask;
+    uint8_t* copy_dst = &s->ringbuffer[pos];
+    uint8_t* copy_src = &s->ringbuffer[src_start];
+    int dst_end = pos + i;
+    int src_end = src_start + i;
+    /* Update the recent distances cache. */
+    s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
+    ++s->dist_rb_idx;
+    s->meta_block_remaining_len -= i;
+    /* There are 32+ bytes of slack in the ring-buffer allocation.
+       Also, we have 16 short codes, that make these 16 bytes irrelevant
+       in the ring-buffer. Let's copy over them as a first guess. */
+    memmove16(copy_dst, copy_src);
+    if (src_end > pos && dst_end > src_start) {
+      /* Regions intersect. */
+      goto CommandPostWrapCopy;
+    }
+    if (dst_end >= s->ringbuffer_size || src_end >= s->ringbuffer_size) {
+      /* At least one region wraps. */
+      goto CommandPostWrapCopy;
+    }
+    pos += i;
+    if (i > 16) {
+      if (i > 32) {
+        memcpy(copy_dst + 16, copy_src + 16, (size_t)(i - 16));
+      } else {
+        /* This branch covers about 45% cases.
+           Fixed size short copy allows more compiler optimizations. */
+        memmove16(copy_dst + 16, copy_src + 16);
+      }
+    }
+  }
+  BROTLI_LOG_UINT(s->meta_block_remaining_len);
+  if (s->meta_block_remaining_len <= 0) {
+    /* Next metablock, if any. */
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  } else {
+    goto CommandBegin;
+  }
+CommandPostWrapCopy:
+  {
+    int wrap_guard = s->ringbuffer_size - pos;
+    while (--i >= 0) {
+      s->ringbuffer[pos] =
+          s->ringbuffer[(pos - s->distance_code) & s->ringbuffer_mask];
+      ++pos;
+      if (BROTLI_PREDICT_FALSE(--wrap_guard == 0)) {
+        s->state = BROTLI_STATE_COMMAND_POST_WRITE_2;
+        goto saveStateAndReturn;
+      }
+    }
+  }
+  if (s->meta_block_remaining_len <= 0) {
+    /* Next metablock, if any. */
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  } else {
+    goto CommandBegin;
+  }
+
+saveStateAndReturn:
+  s->pos = pos;
+  s->loop_counter = i;
+  return result;
+}
+
+#undef BROTLI_SAFE
+
+static BROTLI_NOINLINE BrotliDecoderErrorCode ProcessCommands(
+    BrotliDecoderState* s) {
+  return ProcessCommandsInternal(0, s);
+}
+
+static BROTLI_NOINLINE BrotliDecoderErrorCode SafeProcessCommands(
+    BrotliDecoderState* s) {
+  return ProcessCommandsInternal(1, s);
+}
+
+BrotliDecoderResult BrotliDecoderDecompress(
+    size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size,
+    uint8_t* decoded_buffer) {
+  BrotliDecoderState s;
+  BrotliDecoderResult result;
+  size_t total_out = 0;
+  size_t available_in = encoded_size;
+  const uint8_t* next_in = encoded_buffer;
+  size_t available_out = *decoded_size;
+  uint8_t* next_out = decoded_buffer;
+  if (!BrotliDecoderStateInit(&s, 0, 0, 0)) {
+    return BROTLI_DECODER_RESULT_ERROR;
+  }
+  result = BrotliDecoderDecompressStream(
+      &s, &available_in, &next_in, &available_out, &next_out, &total_out);
+  *decoded_size = total_out;
+  BrotliDecoderStateCleanup(&s);
+  if (result != BROTLI_DECODER_RESULT_SUCCESS) {
+    result = BROTLI_DECODER_RESULT_ERROR;
+  }
+  return result;
+}
+
+/* Invariant: input stream is never overconsumed:
+    - invalid input implies that the whole stream is invalid -> any amount of
+      input could be read and discarded
+    - when result is "needs more input", then at least one more byte is REQUIRED
+      to complete decoding; all input data MUST be consumed by decoder, so
+      client could swap the input buffer
+    - when result is "needs more output" decoder MUST ensure that it doesn't
+      hold more than 7 bits in bit reader; this saves client from swapping input
+      buffer ahead of time
+    - when result is "success" decoder MUST return all unused data back to input
+      buffer; this is possible because the invariant is held on enter */
+BrotliDecoderResult BrotliDecoderDecompressStream(
+    BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in,
+    size_t* available_out, uint8_t** next_out, size_t* total_out) {
+  BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
+  BrotliBitReader* br = &s->br;
+  /* Ensure that |total_out| is set, even if no data will ever be pushed out. */
+  if (total_out) {
+    *total_out = s->partial_pos_out;
+  }
+  /* Do not try to process further in a case of unrecoverable error. */
+  if ((int)s->error_code < 0) {
+    return BROTLI_DECODER_RESULT_ERROR;
+  }
+  if (*available_out && (!next_out || !*next_out)) {
+    return SaveErrorCode(
+        s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS));
+  }
+  if (!*available_out) next_out = 0;
+  if (s->buffer_length == 0) {  /* Just connect bit reader to input stream. */
+    br->avail_in = *available_in;
+    br->next_in = *next_in;
+  } else {
+    /* At least one byte of input is required. More than one byte of input may
+       be required to complete the transaction -> reading more data must be
+       done in a loop -> do it in a main loop. */
+    result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+    br->next_in = &s->buffer.u8[0];
+  }
+  /* State machine */
+  for (;;) {
+    if (result != BROTLI_DECODER_SUCCESS) {
+      /* Error, needs more input/output. */
+      if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
+        if (s->ringbuffer != 0) {  /* Pro-actively push output. */
+          BrotliDecoderErrorCode intermediate_result = WriteRingBuffer(s,
+              available_out, next_out, total_out, BROTLI_TRUE);
+          /* WriteRingBuffer checks s->meta_block_remaining_len validity. */
+          if ((int)intermediate_result < 0) {
+            result = intermediate_result;
+            break;
+          }
+        }
+        if (s->buffer_length != 0) {  /* Used with internal buffer. */
+          if (br->avail_in == 0) {
+            /* Successfully finished read transaction.
+               Accumulator contains less than 8 bits, because internal buffer
+               is expanded byte-by-byte until it is enough to complete read. */
+            s->buffer_length = 0;
+            /* Switch to input stream and restart. */
+            result = BROTLI_DECODER_SUCCESS;
+            br->avail_in = *available_in;
+            br->next_in = *next_in;
+            continue;
+          } else if (*available_in != 0) {
+            /* Not enough data in buffer, but can take one more byte from
+               input stream. */
+            result = BROTLI_DECODER_SUCCESS;
+            s->buffer.u8[s->buffer_length] = **next_in;
+            s->buffer_length++;
+            br->avail_in = s->buffer_length;
+            (*next_in)++;
+            (*available_in)--;
+            /* Retry with more data in buffer. */
+            continue;
+          }
+          /* Can't finish reading and no more input. */
+          break;
+        } else {  /* Input stream doesn't contain enough input. */
+          /* Copy tail to internal buffer and return. */
+          *next_in = br->next_in;
+          *available_in = br->avail_in;
+          while (*available_in) {
+            s->buffer.u8[s->buffer_length] = **next_in;
+            s->buffer_length++;
+            (*next_in)++;
+            (*available_in)--;
+          }
+          break;
+        }
+        /* Unreachable. */
+      }
+
+      /* Fail or needs more output. */
+
+      if (s->buffer_length != 0) {
+        /* Just consumed the buffered input and produced some output. Otherwise
+           it would result in "needs more input". Reset internal buffer. */
+        s->buffer_length = 0;
+      } else {
+        /* Using input stream in last iteration. When decoder switches to input
+           stream it has less than 8 bits in accumulator, so it is safe to
+           return unused accumulator bits there. */
+        BrotliBitReaderUnload(br);
+        *available_in = br->avail_in;
+        *next_in = br->next_in;
+      }
+      break;
+    }
+    switch (s->state) {
+      case BROTLI_STATE_UNINITED:
+        /* Prepare to the first read. */
+        if (!BrotliWarmupBitReader(br)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        /* Decode window size. */
+        result = DecodeWindowBits(s, br);  /* Reads 1..8 bits. */
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        if (s->large_window) {
+          s->state = BROTLI_STATE_LARGE_WINDOW_BITS;
+          break;
+        }
+        s->state = BROTLI_STATE_INITIALIZE;
+        break;
+
+      case BROTLI_STATE_LARGE_WINDOW_BITS:
+        if (!BrotliSafeReadBits(br, 6, &s->window_bits)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        if (s->window_bits < BROTLI_LARGE_MIN_WBITS ||
+            s->window_bits > BROTLI_LARGE_MAX_WBITS) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+          break;
+        }
+        s->state = BROTLI_STATE_INITIALIZE;
+      /* Fall through. */
+
+      case BROTLI_STATE_INITIALIZE:
+        BROTLI_LOG_UINT(s->window_bits);
+        /* Maximum distance, see section 9.1. of the spec. */
+        s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP;
+
+        /* Allocate memory for both block_type_trees and block_len_trees. */
+        s->block_type_trees = (HuffmanCode*)BROTLI_DECODER_ALLOC(s,
+            sizeof(HuffmanCode) * 3 *
+                (BROTLI_HUFFMAN_MAX_SIZE_258 + BROTLI_HUFFMAN_MAX_SIZE_26));
+        if (s->block_type_trees == 0) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_BLOCK_TYPE_TREES);
+          break;
+        }
+        s->block_len_trees =
+            s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258;
+
+        s->state = BROTLI_STATE_METABLOCK_BEGIN;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_BEGIN:
+        BrotliDecoderStateMetablockBegin(s);
+        BROTLI_LOG_UINT(s->pos);
+        s->state = BROTLI_STATE_METABLOCK_HEADER;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER:
+        result = DecodeMetaBlockLength(s, br);  /* Reads 2 - 31 bits. */
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        BROTLI_LOG_UINT(s->is_last_metablock);
+        BROTLI_LOG_UINT(s->meta_block_remaining_len);
+        BROTLI_LOG_UINT(s->is_metadata);
+        BROTLI_LOG_UINT(s->is_uncompressed);
+        if (s->is_metadata || s->is_uncompressed) {
+          if (!BrotliJumpToByteBoundary(br)) {
+            result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_1);
+            break;
+          }
+        }
+        if (s->is_metadata) {
+          s->state = BROTLI_STATE_METADATA;
+          break;
+        }
+        if (s->meta_block_remaining_len == 0) {
+          s->state = BROTLI_STATE_METABLOCK_DONE;
+          break;
+        }
+        BrotliCalculateRingBufferSize(s);
+        if (s->is_uncompressed) {
+          s->state = BROTLI_STATE_UNCOMPRESSED;
+          break;
+        }
+        s->state = BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER;
+      /* Fall through. */
+
+      case BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER: {
+        BrotliMetablockHeaderArena* h = &s->arena.header;
+        s->loop_counter = 0;
+        /* Initialize compressed metablock header arena. */
+        h->sub_loop_counter = 0;
+        /* Make small negative indexes addressable. */
+        h->symbol_lists =
+            &h->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1];
+        h->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+        h->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
+        h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_0:
+        if (s->loop_counter >= 3) {
+          s->state = BROTLI_STATE_METABLOCK_HEADER_2;
+          break;
+        }
+        /* Reads 1..11 bits. */
+        result = DecodeVarLenUint8(s, br, &s->num_block_types[s->loop_counter]);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        s->num_block_types[s->loop_counter]++;
+        BROTLI_LOG_UINT(s->num_block_types[s->loop_counter]);
+        if (s->num_block_types[s->loop_counter] < 2) {
+          s->loop_counter++;
+          break;
+        }
+        s->state = BROTLI_STATE_HUFFMAN_CODE_1;
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_1: {
+        uint32_t alphabet_size = s->num_block_types[s->loop_counter] + 2;
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_258;
+        result = ReadHuffmanCode(alphabet_size, alphabet_size,
+            &s->block_type_trees[tree_offset], NULL, s);
+        if (result != BROTLI_DECODER_SUCCESS) break;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_2;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_2: {
+        uint32_t alphabet_size = BROTLI_NUM_BLOCK_LEN_SYMBOLS;
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
+        result = ReadHuffmanCode(alphabet_size, alphabet_size,
+            &s->block_len_trees[tree_offset], NULL, s);
+        if (result != BROTLI_DECODER_SUCCESS) break;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_3;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_3: {
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
+        if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter],
+            &s->block_len_trees[tree_offset], br)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        BROTLI_LOG_UINT(s->block_length[s->loop_counter]);
+        s->loop_counter++;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
+        break;
+      }
+
+      case BROTLI_STATE_UNCOMPRESSED: {
+        result = CopyUncompressedBlockToOutput(
+            available_out, next_out, total_out, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        s->state = BROTLI_STATE_METABLOCK_DONE;
+        break;
+      }
+
+      case BROTLI_STATE_METADATA:
+        for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) {
+          uint32_t bits;
+          /* Read one byte and ignore it. */
+          if (!BrotliSafeReadBits(br, 8, &bits)) {
+            result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+            break;
+          }
+        }
+        if (result == BROTLI_DECODER_SUCCESS) {
+          s->state = BROTLI_STATE_METABLOCK_DONE;
+        }
+        break;
+
+      case BROTLI_STATE_METABLOCK_HEADER_2: {
+        uint32_t bits;
+        if (!BrotliSafeReadBits(br, 6, &bits)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        s->distance_postfix_bits = bits & BitMask(2);
+        bits >>= 2;
+        s->num_direct_distance_codes = bits << s->distance_postfix_bits;
+        BROTLI_LOG_UINT(s->num_direct_distance_codes);
+        BROTLI_LOG_UINT(s->distance_postfix_bits);
+        s->context_modes =
+            (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)s->num_block_types[0]);
+        if (s->context_modes == 0) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MODES);
+          break;
+        }
+        s->loop_counter = 0;
+        s->state = BROTLI_STATE_CONTEXT_MODES;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_CONTEXT_MODES:
+        result = ReadContextModes(s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        s->state = BROTLI_STATE_CONTEXT_MAP_1;
+      /* Fall through. */
+
+      case BROTLI_STATE_CONTEXT_MAP_1:
+        result = DecodeContextMap(
+            s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS,
+            &s->num_literal_htrees, &s->context_map, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        DetectTrivialLiteralBlockTypes(s);
+        s->state = BROTLI_STATE_CONTEXT_MAP_2;
+      /* Fall through. */
+
+      case BROTLI_STATE_CONTEXT_MAP_2: {
+        uint32_t npostfix = s->distance_postfix_bits;
+        uint32_t ndirect = s->num_direct_distance_codes;
+        uint32_t distance_alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
+            npostfix, ndirect, BROTLI_MAX_DISTANCE_BITS);
+        uint32_t distance_alphabet_size_limit = distance_alphabet_size_max;
+        BROTLI_BOOL allocation_success = BROTLI_TRUE;
+        if (s->large_window) {
+          BrotliDistanceCodeLimit limit = BrotliCalculateDistanceCodeLimit(
+              BROTLI_MAX_ALLOWED_DISTANCE, npostfix, ndirect);
+          distance_alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
+              npostfix, ndirect, BROTLI_LARGE_MAX_DISTANCE_BITS);
+          distance_alphabet_size_limit = limit.max_alphabet_size;
+        }
+        result = DecodeContextMap(
+            s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS,
+            &s->num_dist_htrees, &s->dist_context_map, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+            s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS,
+            BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees);
+        allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+            s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS,
+            BROTLI_NUM_COMMAND_SYMBOLS, s->num_block_types[1]);
+        allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+            s, &s->distance_hgroup, distance_alphabet_size_max,
+            distance_alphabet_size_limit, s->num_dist_htrees);
+        if (!allocation_success) {
+          return SaveErrorCode(s,
+              BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
+        }
+        s->loop_counter = 0;
+        s->state = BROTLI_STATE_TREE_GROUP;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_TREE_GROUP: {
+        HuffmanTreeGroup* hgroup = NULL;
+        switch (s->loop_counter) {
+          case 0: hgroup = &s->literal_hgroup; break;
+          case 1: hgroup = &s->insert_copy_hgroup; break;
+          case 2: hgroup = &s->distance_hgroup; break;
+          default: return SaveErrorCode(s, BROTLI_FAILURE(
+              BROTLI_DECODER_ERROR_UNREACHABLE));
+        }
+        result = HuffmanTreeGroupDecode(hgroup, s);
+        if (result != BROTLI_DECODER_SUCCESS) break;
+        s->loop_counter++;
+        if (s->loop_counter < 3) {
+          break;
+        }
+        s->state = BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY:
+        PrepareLiteralDecoding(s);
+        s->dist_context_map_slice = s->dist_context_map;
+        s->htree_command = s->insert_copy_hgroup.htrees[0];
+        if (!BrotliEnsureRingBuffer(s)) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2);
+          break;
+        }
+        CalculateDistanceLut(s);
+        s->state = BROTLI_STATE_COMMAND_BEGIN;
+      /* Fall through. */
+
+      case BROTLI_STATE_COMMAND_BEGIN:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_INNER:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_WRAP_COPY:
+        result = ProcessCommands(s);
+        if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
+          result = SafeProcessCommands(s);
+        }
+        break;
+
+      case BROTLI_STATE_COMMAND_INNER_WRITE:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_WRITE_1:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_WRITE_2:
+        result = WriteRingBuffer(
+            s, available_out, next_out, total_out, BROTLI_FALSE);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        WrapRingBuffer(s);
+        if (s->ringbuffer_size == 1 << s->window_bits) {
+          s->max_distance = s->max_backward_distance;
+        }
+        if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) {
+          if (s->meta_block_remaining_len == 0) {
+            /* Next metablock, if any. */
+            s->state = BROTLI_STATE_METABLOCK_DONE;
+          } else {
+            s->state = BROTLI_STATE_COMMAND_BEGIN;
+          }
+          break;
+        } else if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_2) {
+          s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY;
+        } else {  /* BROTLI_STATE_COMMAND_INNER_WRITE */
+          if (s->loop_counter == 0) {
+            if (s->meta_block_remaining_len == 0) {
+              s->state = BROTLI_STATE_METABLOCK_DONE;
+            } else {
+              s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS;
+            }
+            break;
+          }
+          s->state = BROTLI_STATE_COMMAND_INNER;
+        }
+        break;
+
+      case BROTLI_STATE_METABLOCK_DONE:
+        if (s->meta_block_remaining_len < 0) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2);
+          break;
+        }
+        BrotliDecoderStateCleanupAfterMetablock(s);
+        if (!s->is_last_metablock) {
+          s->state = BROTLI_STATE_METABLOCK_BEGIN;
+          break;
+        }
+        if (!BrotliJumpToByteBoundary(br)) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_2);
+          break;
+        }
+        if (s->buffer_length == 0) {
+          BrotliBitReaderUnload(br);
+          *available_in = br->avail_in;
+          *next_in = br->next_in;
+        }
+        s->state = BROTLI_STATE_DONE;
+      /* Fall through. */
+
+      case BROTLI_STATE_DONE:
+        if (s->ringbuffer != 0) {
+          result = WriteRingBuffer(
+              s, available_out, next_out, total_out, BROTLI_TRUE);
+          if (result != BROTLI_DECODER_SUCCESS) {
+            break;
+          }
+        }
+        return SaveErrorCode(s, result);
+    }
+  }
+  return SaveErrorCode(s, result);
+}
+
+BROTLI_BOOL BrotliDecoderHasMoreOutput(const BrotliDecoderState* s) {
+  /* After unrecoverable error remaining output is considered nonsensical. */
+  if ((int)s->error_code < 0) {
+    return BROTLI_FALSE;
+  }
+  return TO_BROTLI_BOOL(
+      s->ringbuffer != 0 && UnwrittenBytes(s, BROTLI_FALSE) != 0);
+}
+
+const uint8_t* BrotliDecoderTakeOutput(BrotliDecoderState* s, size_t* size) {
+  uint8_t* result = 0;
+  size_t available_out = *size ? *size : 1u << 24;
+  size_t requested_out = available_out;
+  BrotliDecoderErrorCode status;
+  if ((s->ringbuffer == 0) || ((int)s->error_code < 0)) {
+    *size = 0;
+    return 0;
+  }
+  WrapRingBuffer(s);
+  status = WriteRingBuffer(s, &available_out, &result, 0, BROTLI_TRUE);
+  /* Either WriteRingBuffer returns those "success" codes... */
+  if (status == BROTLI_DECODER_SUCCESS ||
+      status == BROTLI_DECODER_NEEDS_MORE_OUTPUT) {
+    *size = requested_out - available_out;
+  } else {
+    /* ... or stream is broken. Normally this should be caught by
+       BrotliDecoderDecompressStream, this is just a safeguard. */
+    if ((int)status < 0) SaveErrorCode(s, status);
+    *size = 0;
+    result = 0;
+  }
+  return result;
+}
+
+BROTLI_BOOL BrotliDecoderIsUsed(const BrotliDecoderState* s) {
+  return TO_BROTLI_BOOL(s->state != BROTLI_STATE_UNINITED ||
+      BrotliGetAvailableBits(&s->br) != 0);
+}
+
+BROTLI_BOOL BrotliDecoderIsFinished(const BrotliDecoderState* s) {
+  return TO_BROTLI_BOOL(s->state == BROTLI_STATE_DONE) &&
+      !BrotliDecoderHasMoreOutput(s);
+}
+
+BrotliDecoderErrorCode BrotliDecoderGetErrorCode(const BrotliDecoderState* s) {
+  return (BrotliDecoderErrorCode)s->error_code;
+}
+
+const char* BrotliDecoderErrorString(BrotliDecoderErrorCode c) {
+  switch (c) {
+#define BROTLI_ERROR_CODE_CASE_(PREFIX, NAME, CODE) \
+    case BROTLI_DECODER ## PREFIX ## NAME: return #NAME;
+#define BROTLI_NOTHING_
+    BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE_CASE_, BROTLI_NOTHING_)
+#undef BROTLI_ERROR_CODE_CASE_
+#undef BROTLI_NOTHING_
+    default: return "INVALID";
+  }
+}
+
+uint32_t BrotliDecoderVersion() {
+  return BROTLI_VERSION;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c
new file mode 100644
index 0000000000..30c40d33f2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.c
@@ -0,0 +1,339 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+#include "./huffman.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_REVERSE_BITS_MAX 8
+
+#if defined(BROTLI_RBIT)
+#define BROTLI_REVERSE_BITS_BASE \
+  ((sizeof(brotli_reg_t) << 3) - BROTLI_REVERSE_BITS_MAX)
+#else
+#define BROTLI_REVERSE_BITS_BASE 0
+static uint8_t kReverseBits[1 << BROTLI_REVERSE_BITS_MAX] = {
+  0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
+  0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+  0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+  0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+  0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
+  0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+  0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
+  0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+  0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+  0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+  0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
+  0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+  0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+  0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+  0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+  0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+  0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
+  0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+  0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
+  0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+  0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+  0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+  0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
+  0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+  0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
+  0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+  0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+  0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+  0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
+  0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+  0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
+  0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
+#endif  /* BROTLI_RBIT */
+
+#define BROTLI_REVERSE_BITS_LOWEST \
+  ((brotli_reg_t)1 << (BROTLI_REVERSE_BITS_MAX - 1 + BROTLI_REVERSE_BITS_BASE))
+
+/* Returns reverse(num >> BROTLI_REVERSE_BITS_BASE, BROTLI_REVERSE_BITS_MAX),
+   where reverse(value, len) is the bit-wise reversal of the len least
+   significant bits of value. */
+static BROTLI_INLINE brotli_reg_t BrotliReverseBits(brotli_reg_t num) {
+#if defined(BROTLI_RBIT)
+  return BROTLI_RBIT(num);
+#else
+  return kReverseBits[num];
+#endif
+}
+
+/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
+/* Assumes that end is an integer multiple of step */
+static BROTLI_INLINE void ReplicateValue(HuffmanCode* table,
+                                         int step, int end,
+                                         HuffmanCode code) {
+  do {
+    end -= step;
+    table[end] = code;
+  } while (end > 0);
+}
+
+/* Returns the table width of the next 2nd level table. |count| is the histogram
+   of bit lengths for the remaining symbols, |len| is the code length of the
+   next processed symbol. */
+static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count,
+                                          int len, int root_bits) {
+  int left = 1 << (len - root_bits);
+  while (len < BROTLI_HUFFMAN_MAX_CODE_LENGTH) {
+    left -= count[len];
+    if (left <= 0) break;
+    ++len;
+    left <<= 1;
+  }
+  return len - root_bits;
+}
+
+void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
+                                        const uint8_t* const code_lengths,
+                                        uint16_t* count) {
+  HuffmanCode code;       /* current table entry */
+  int symbol;             /* symbol index in original or sorted table */
+  brotli_reg_t key;       /* prefix code */
+  brotli_reg_t key_step;  /* prefix code addend */
+  int step;               /* step size to replicate values in current table */
+  int table_size;         /* size of current table */
+  int sorted[BROTLI_CODE_LENGTH_CODES];  /* symbols sorted by code length */
+  /* offsets in sorted table for each length */
+  int offset[BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1];
+  int bits;
+  int bits_count;
+  BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <=
+                BROTLI_REVERSE_BITS_MAX);
+
+  /* Generate offsets into sorted symbol table by code length. */
+  symbol = -1;
+  bits = 1;
+  BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, {
+    symbol += count[bits];
+    offset[bits] = symbol;
+    bits++;
+  });
+  /* Symbols with code length 0 are placed after all other symbols. */
+  offset[0] = BROTLI_CODE_LENGTH_CODES - 1;
+
+  /* Sort symbols by length, by symbol order within each length. */
+  symbol = BROTLI_CODE_LENGTH_CODES;
+  do {
+    BROTLI_REPEAT(6, {
+      symbol--;
+      sorted[offset[code_lengths[symbol]]--] = symbol;
+    });
+  } while (symbol != 0);
+
+  table_size = 1 << BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH;
+
+  /* Special case: all symbols but one have 0 code length. */
+  if (offset[0] == 0) {
+    code = ConstructHuffmanCode(0, (uint16_t)sorted[0]);
+    for (key = 0; key < (brotli_reg_t)table_size; ++key) {
+      table[key] = code;
+    }
+    return;
+  }
+
+  /* Fill in table. */
+  key = 0;
+  key_step = BROTLI_REVERSE_BITS_LOWEST;
+  symbol = 0;
+  bits = 1;
+  step = 2;
+  do {
+    for (bits_count = count[bits]; bits_count != 0; --bits_count) {
+      code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)sorted[symbol++]);
+      ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
+      key += key_step;
+    }
+    step <<= 1;
+    key_step >>= 1;
+  } while (++bits <= BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
+}
+
+uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
+                                 int root_bits,
+                                 const uint16_t* const symbol_lists,
+                                 uint16_t* count) {
+  HuffmanCode code;       /* current table entry */
+  HuffmanCode* table;     /* next available space in table */
+  int len;                /* current code length */
+  int symbol;             /* symbol index in original or sorted table */
+  brotli_reg_t key;       /* prefix code */
+  brotli_reg_t key_step;  /* prefix code addend */
+  brotli_reg_t sub_key;   /* 2nd level table prefix code */
+  brotli_reg_t sub_key_step;  /* 2nd level table prefix code addend */
+  int step;               /* step size to replicate values in current table */
+  int table_bits;         /* key length of current table */
+  int table_size;         /* size of current table */
+  int total_size;         /* sum of root table size and 2nd level table sizes */
+  int max_length = -1;
+  int bits;
+  int bits_count;
+
+  BROTLI_DCHECK(root_bits <= BROTLI_REVERSE_BITS_MAX);
+  BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH - root_bits <=
+                BROTLI_REVERSE_BITS_MAX);
+
+  while (symbol_lists[max_length] == 0xFFFF) max_length--;
+  max_length += BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1;
+
+  table = root_table;
+  table_bits = root_bits;
+  table_size = 1 << table_bits;
+  total_size = table_size;
+
+  /* Fill in the root table. Reduce the table size to if possible,
+     and create the repetitions by memcpy. */
+  if (table_bits > max_length) {
+    table_bits = max_length;
+    table_size = 1 << table_bits;
+  }
+  key = 0;
+  key_step = BROTLI_REVERSE_BITS_LOWEST;
+  bits = 1;
+  step = 2;
+  do {
+    symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+    for (bits_count = count[bits]; bits_count != 0; --bits_count) {
+      symbol = symbol_lists[symbol];
+      code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)symbol);
+      ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
+      key += key_step;
+    }
+    step <<= 1;
+    key_step >>= 1;
+  } while (++bits <= table_bits);
+
+  /* If root_bits != table_bits then replicate to fill the remaining slots. */
+  while (total_size != table_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+
+  /* Fill in 2nd level tables and add pointers to root table. */
+  key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1);
+  sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1);
+  sub_key_step = BROTLI_REVERSE_BITS_LOWEST;
+  for (len = root_bits + 1, step = 2; len <= max_length; ++len) {
+    symbol = len - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+    for (; count[len] != 0; --count[len]) {
+      if (sub_key == (BROTLI_REVERSE_BITS_LOWEST << 1U)) {
+        table += table_size;
+        table_bits = NextTableBitSize(count, len, root_bits);
+        table_size = 1 << table_bits;
+        total_size += table_size;
+        sub_key = BrotliReverseBits(key);
+        key += key_step;
+        root_table[sub_key] = ConstructHuffmanCode(
+            (uint8_t)(table_bits + root_bits),
+            (uint16_t)(((size_t)(table - root_table)) - sub_key));
+        sub_key = 0;
+      }
+      symbol = symbol_lists[symbol];
+      code = ConstructHuffmanCode((uint8_t)(len - root_bits), (uint16_t)symbol);
+      ReplicateValue(
+          &table[BrotliReverseBits(sub_key)], step, table_size, code);
+      sub_key += sub_key_step;
+    }
+    step <<= 1;
+    sub_key_step >>= 1;
+  }
+  return (uint32_t)total_size;
+}
+
+uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
+                                       int root_bits,
+                                       uint16_t* val,
+                                       uint32_t num_symbols) {
+  uint32_t table_size = 1;
+  const uint32_t goal_size = 1U << root_bits;
+  switch (num_symbols) {
+    case 0:
+      table[0] = ConstructHuffmanCode(0, val[0]);
+      break;
+    case 1:
+      if (val[1] > val[0]) {
+        table[0] = ConstructHuffmanCode(1, val[0]);
+        table[1] = ConstructHuffmanCode(1, val[1]);
+      } else {
+        table[0] = ConstructHuffmanCode(1, val[1]);
+        table[1] = ConstructHuffmanCode(1, val[0]);
+      }
+      table_size = 2;
+      break;
+    case 2:
+      table[0] = ConstructHuffmanCode(1, val[0]);
+      table[2] = ConstructHuffmanCode(1, val[0]);
+      if (val[2] > val[1]) {
+        table[1] = ConstructHuffmanCode(2, val[1]);
+        table[3] = ConstructHuffmanCode(2, val[2]);
+      } else {
+        table[1] = ConstructHuffmanCode(2, val[2]);
+        table[3] = ConstructHuffmanCode(2, val[1]);
+      }
+      table_size = 4;
+      break;
+    case 3: {
+      int i, k;
+      for (i = 0; i < 3; ++i) {
+        for (k = i + 1; k < 4; ++k) {
+          if (val[k] < val[i]) {
+            uint16_t t = val[k];
+            val[k] = val[i];
+            val[i] = t;
+          }
+        }
+      }
+      table[0] = ConstructHuffmanCode(2, val[0]);
+      table[2] = ConstructHuffmanCode(2, val[1]);
+      table[1] = ConstructHuffmanCode(2, val[2]);
+      table[3] = ConstructHuffmanCode(2, val[3]);
+      table_size = 4;
+      break;
+    }
+    case 4: {
+      if (val[3] < val[2]) {
+        uint16_t t = val[3];
+        val[3] = val[2];
+        val[2] = t;
+      }
+      table[0] = ConstructHuffmanCode(1, val[0]);
+      table[1] = ConstructHuffmanCode(2, val[1]);
+      table[2] = ConstructHuffmanCode(1, val[0]);
+      table[3] = ConstructHuffmanCode(3, val[2]);
+      table[4] = ConstructHuffmanCode(1, val[0]);
+      table[5] = ConstructHuffmanCode(2, val[1]);
+      table[6] = ConstructHuffmanCode(1, val[0]);
+      table[7] = ConstructHuffmanCode(3, val[3]);
+      table_size = 8;
+      break;
+    }
+  }
+  while (table_size != goal_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+  return goal_size;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.h
new file mode 100644
index 0000000000..70e8469e3a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/huffman.h
@@ -0,0 +1,128 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+#ifndef BROTLI_DEC_HUFFMAN_H_
+#define BROTLI_DEC_HUFFMAN_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15
+
+/* Maximum possible Huffman table size for an alphabet size of (index * 32),
+   max code length 15 and root table bits 8. This table describes table sizes
+   for alphabets containing up to 1152 = 36 * 32 symbols. */
+static const uint16_t kMaxHuffmanTableSize[] = {
+  256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822,
+  854, 886, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240, 1272,
+  1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528};
+/* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */
+#define BROTLI_HUFFMAN_MAX_SIZE_26 396
+/* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */
+#define BROTLI_HUFFMAN_MAX_SIZE_258 632
+/* BROTLI_MAX_CONTEXT_MAP_SYMBOLS == 272 */
+#define BROTLI_HUFFMAN_MAX_SIZE_272 646
+
+#define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5
+
+#if ((defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_32)) && \
+  BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0))
+#define BROTLI_HUFFMAN_CODE_FAST_LOAD
+#endif
+
+#if !defined(BROTLI_HUFFMAN_CODE_FAST_LOAD)
+/* Do not create this struct directly - use the ConstructHuffmanCode
+ * constructor below! */
+typedef struct {
+  uint8_t bits;    /* number of bits used for this symbol */
+  uint16_t value;  /* symbol value or table offset */
+} HuffmanCode;
+
+static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
+    const uint16_t value) {
+  HuffmanCode h;
+  h.bits = bits;
+  h.value = value;
+  return h;
+}
+
+/* Please use the following macros to optimize HuffmanCode accesses in hot
+ * paths.
+ *
+ * For example, assuming |table| contains a HuffmanCode pointer:
+ *
+ *   BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+ *   BROTLI_HC_ADJUST_TABLE_INDEX(table, index_into_table);
+ *   *bits = BROTLI_HC_GET_BITS(table);
+ *   *value = BROTLI_HC_GET_VALUE(table);
+ *   BROTLI_HC_ADJUST_TABLE_INDEX(table, offset);
+ *   *bits2 = BROTLI_HC_GET_BITS(table);
+ *   *value2 = BROTLI_HC_GET_VALUE(table);
+ *
+ */
+
+#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H)
+#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V)
+
+/* These must be given a HuffmanCode pointer! */
+#define BROTLI_HC_FAST_LOAD_BITS(H) (H->bits)
+#define BROTLI_HC_FAST_LOAD_VALUE(H) (H->value)
+
+#else /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
+
+typedef BROTLI_ALIGNED(4) uint32_t HuffmanCode;
+
+static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
+    const uint16_t value) {
+  return ((value & 0xFFFF) << 16) | (bits & 0xFF);
+}
+
+#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H) uint32_t __fastload_##H = (*H)
+#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V); __fastload_##H = (*H)
+
+/* These must be given a HuffmanCode pointer! */
+#define BROTLI_HC_FAST_LOAD_BITS(H) ((__fastload_##H) & 0xFF)
+#define BROTLI_HC_FAST_LOAD_VALUE(H) ((__fastload_##H) >> 16)
+#endif /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order. */
+BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table,
+    const uint8_t* const code_lengths, uint16_t* count);
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order.
+   Returns size of resulting table. */
+BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
+    int root_bits, const uint16_t* const symbol_lists, uint16_t* count_arg);
+
+/* Builds a simple Huffman table. The |num_symbols| parameter is to be
+   interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
+   2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
+   4 means 4 symbols with lengths [1, 2, 3, 3]. */
+BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
+    int root_bits, uint16_t* symbols, uint32_t num_symbols);
+
+/* Contains a collection of Huffman trees with the same alphabet size. */
+/* alphabet_size_limit is needed due to simple codes, since
+   log2(alphabet_size_max) could be greater than log2(alphabet_size_limit). */
+typedef struct {
+  HuffmanCode** htrees;
+  HuffmanCode* codes;
+  uint16_t alphabet_size_max;
+  uint16_t alphabet_size_limit;
+  uint16_t num_htrees;
+} HuffmanTreeGroup;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_HUFFMAN_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/prefix.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/prefix.h
new file mode 100644
index 0000000000..3ea062d84a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/prefix.h
@@ -0,0 +1,750 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Lookup tables to map prefix codes to value ranges. This is used during
+   decoding of the block lengths, literal insertion lengths and copy lengths. */
+
+#ifndef BROTLI_DEC_PREFIX_H_
+#define BROTLI_DEC_PREFIX_H_
+
+#include "../common/constants.h"
+#include <brotli/types.h>
+
+/* Represents the range of values belonging to a prefix code:
+   [offset, offset + 2^nbits) */
+struct PrefixCodeRange {
+  uint16_t offset;
+  uint8_t nbits;
+};
+
+static const struct PrefixCodeRange
+    kBlockLengthPrefixCode[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
+  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
+  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
+  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
+  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
+  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
+  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12},
+  {8433, 13}, {16625, 24}
+};
+
+typedef struct CmdLutElement {
+  uint8_t insert_len_extra_bits;
+  uint8_t copy_len_extra_bits;
+  int8_t distance_code;
+  uint8_t context;
+  uint16_t insert_len_offset;
+  uint16_t copy_len_offset;
+} CmdLutElement;
+
+static const CmdLutElement kCmdLut[BROTLI_NUM_COMMAND_SYMBOLS] = {
+  { 0x00, 0x00, 0, 0x00, 0x0000, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0000, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0000, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0001, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0001, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0001, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0002, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0002, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0002, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0003, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0003, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0003, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0004, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0004, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0004, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0005, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0005, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0005, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0009 },
+  { 0x01, 0x00, 0, 0x00, 0x0006, 0x0002 },
+  { 0x01, 0x00, 0, 0x01, 0x0006, 0x0003 },
+  { 0x01, 0x00, 0, 0x02, 0x0006, 0x0004 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0005 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0006 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0007 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0008 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0009 },
+  { 0x01, 0x00, 0, 0x00, 0x0008, 0x0002 },
+  { 0x01, 0x00, 0, 0x01, 0x0008, 0x0003 },
+  { 0x01, 0x00, 0, 0x02, 0x0008, 0x0004 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0005 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0006 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0007 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0008 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0009 },
+  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0000, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0000, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0000, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0000, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0001, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0001, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0001, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0001, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0002, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0002, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0002, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0002, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0003, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0003, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0003, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0003, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0004, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0004, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0004, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0004, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0005, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0005, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0005, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0005, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0036 },
+  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000a },
+  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000c },
+  { 0x01, 0x02, 0, 0x03, 0x0006, 0x000e },
+  { 0x01, 0x02, 0, 0x03, 0x0006, 0x0012 },
+  { 0x01, 0x03, 0, 0x03, 0x0006, 0x0016 },
+  { 0x01, 0x03, 0, 0x03, 0x0006, 0x001e },
+  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0026 },
+  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0036 },
+  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000a },
+  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000c },
+  { 0x01, 0x02, 0, 0x03, 0x0008, 0x000e },
+  { 0x01, 0x02, 0, 0x03, 0x0008, 0x0012 },
+  { 0x01, 0x03, 0, 0x03, 0x0008, 0x0016 },
+  { 0x01, 0x03, 0, 0x03, 0x0008, 0x001e },
+  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0026 },
+  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0036 },
+  { 0x00, 0x00, -1, 0x00, 0x0000, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0000, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0000, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0001, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0001, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0001, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0002, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0002, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0002, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0003, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0003, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0003, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0004, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0004, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0004, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0005, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0005, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0005, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0009 },
+  { 0x01, 0x00, -1, 0x00, 0x0006, 0x0002 },
+  { 0x01, 0x00, -1, 0x01, 0x0006, 0x0003 },
+  { 0x01, 0x00, -1, 0x02, 0x0006, 0x0004 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0005 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0006 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0007 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0008 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0009 },
+  { 0x01, 0x00, -1, 0x00, 0x0008, 0x0002 },
+  { 0x01, 0x00, -1, 0x01, 0x0008, 0x0003 },
+  { 0x01, 0x00, -1, 0x02, 0x0008, 0x0004 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0005 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0006 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0007 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0008 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0009 },
+  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0000, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0000, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0000, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0000, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0001, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0001, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0001, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0001, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0002, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0002, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0002, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0002, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0003, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0003, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0003, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0003, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0004, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0004, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0004, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0004, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0005, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0005, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0005, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0005, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0036 },
+  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000a },
+  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000c },
+  { 0x01, 0x02, -1, 0x03, 0x0006, 0x000e },
+  { 0x01, 0x02, -1, 0x03, 0x0006, 0x0012 },
+  { 0x01, 0x03, -1, 0x03, 0x0006, 0x0016 },
+  { 0x01, 0x03, -1, 0x03, 0x0006, 0x001e },
+  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0026 },
+  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0036 },
+  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000a },
+  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000c },
+  { 0x01, 0x02, -1, 0x03, 0x0008, 0x000e },
+  { 0x01, 0x02, -1, 0x03, 0x0008, 0x0012 },
+  { 0x01, 0x03, -1, 0x03, 0x0008, 0x0016 },
+  { 0x01, 0x03, -1, 0x03, 0x0008, 0x001e },
+  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0026 },
+  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0036 },
+  { 0x02, 0x00, -1, 0x00, 0x000a, 0x0002 },
+  { 0x02, 0x00, -1, 0x01, 0x000a, 0x0003 },
+  { 0x02, 0x00, -1, 0x02, 0x000a, 0x0004 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0005 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0006 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0007 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0008 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0009 },
+  { 0x02, 0x00, -1, 0x00, 0x000e, 0x0002 },
+  { 0x02, 0x00, -1, 0x01, 0x000e, 0x0003 },
+  { 0x02, 0x00, -1, 0x02, 0x000e, 0x0004 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0005 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0006 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0007 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0008 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0009 },
+  { 0x03, 0x00, -1, 0x00, 0x0012, 0x0002 },
+  { 0x03, 0x00, -1, 0x01, 0x0012, 0x0003 },
+  { 0x03, 0x00, -1, 0x02, 0x0012, 0x0004 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0005 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0006 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0007 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0008 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0009 },
+  { 0x03, 0x00, -1, 0x00, 0x001a, 0x0002 },
+  { 0x03, 0x00, -1, 0x01, 0x001a, 0x0003 },
+  { 0x03, 0x00, -1, 0x02, 0x001a, 0x0004 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0005 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0006 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0007 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0008 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0009 },
+  { 0x04, 0x00, -1, 0x00, 0x0022, 0x0002 },
+  { 0x04, 0x00, -1, 0x01, 0x0022, 0x0003 },
+  { 0x04, 0x00, -1, 0x02, 0x0022, 0x0004 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0005 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0006 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0007 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0008 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0009 },
+  { 0x04, 0x00, -1, 0x00, 0x0032, 0x0002 },
+  { 0x04, 0x00, -1, 0x01, 0x0032, 0x0003 },
+  { 0x04, 0x00, -1, 0x02, 0x0032, 0x0004 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0005 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0006 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0007 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0008 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0009 },
+  { 0x05, 0x00, -1, 0x00, 0x0042, 0x0002 },
+  { 0x05, 0x00, -1, 0x01, 0x0042, 0x0003 },
+  { 0x05, 0x00, -1, 0x02, 0x0042, 0x0004 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0005 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0006 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0007 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0008 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0009 },
+  { 0x05, 0x00, -1, 0x00, 0x0062, 0x0002 },
+  { 0x05, 0x00, -1, 0x01, 0x0062, 0x0003 },
+  { 0x05, 0x00, -1, 0x02, 0x0062, 0x0004 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0005 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0006 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0007 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0008 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0009 },
+  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000a },
+  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000c },
+  { 0x02, 0x02, -1, 0x03, 0x000a, 0x000e },
+  { 0x02, 0x02, -1, 0x03, 0x000a, 0x0012 },
+  { 0x02, 0x03, -1, 0x03, 0x000a, 0x0016 },
+  { 0x02, 0x03, -1, 0x03, 0x000a, 0x001e },
+  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0026 },
+  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0036 },
+  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000a },
+  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000c },
+  { 0x02, 0x02, -1, 0x03, 0x000e, 0x000e },
+  { 0x02, 0x02, -1, 0x03, 0x000e, 0x0012 },
+  { 0x02, 0x03, -1, 0x03, 0x000e, 0x0016 },
+  { 0x02, 0x03, -1, 0x03, 0x000e, 0x001e },
+  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0026 },
+  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0036 },
+  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000a },
+  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000c },
+  { 0x03, 0x02, -1, 0x03, 0x0012, 0x000e },
+  { 0x03, 0x02, -1, 0x03, 0x0012, 0x0012 },
+  { 0x03, 0x03, -1, 0x03, 0x0012, 0x0016 },
+  { 0x03, 0x03, -1, 0x03, 0x0012, 0x001e },
+  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0026 },
+  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0036 },
+  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000a },
+  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000c },
+  { 0x03, 0x02, -1, 0x03, 0x001a, 0x000e },
+  { 0x03, 0x02, -1, 0x03, 0x001a, 0x0012 },
+  { 0x03, 0x03, -1, 0x03, 0x001a, 0x0016 },
+  { 0x03, 0x03, -1, 0x03, 0x001a, 0x001e },
+  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0026 },
+  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0036 },
+  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000a },
+  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000c },
+  { 0x04, 0x02, -1, 0x03, 0x0022, 0x000e },
+  { 0x04, 0x02, -1, 0x03, 0x0022, 0x0012 },
+  { 0x04, 0x03, -1, 0x03, 0x0022, 0x0016 },
+  { 0x04, 0x03, -1, 0x03, 0x0022, 0x001e },
+  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0026 },
+  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0036 },
+  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000a },
+  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000c },
+  { 0x04, 0x02, -1, 0x03, 0x0032, 0x000e },
+  { 0x04, 0x02, -1, 0x03, 0x0032, 0x0012 },
+  { 0x04, 0x03, -1, 0x03, 0x0032, 0x0016 },
+  { 0x04, 0x03, -1, 0x03, 0x0032, 0x001e },
+  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0026 },
+  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0036 },
+  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000a },
+  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000c },
+  { 0x05, 0x02, -1, 0x03, 0x0042, 0x000e },
+  { 0x05, 0x02, -1, 0x03, 0x0042, 0x0012 },
+  { 0x05, 0x03, -1, 0x03, 0x0042, 0x0016 },
+  { 0x05, 0x03, -1, 0x03, 0x0042, 0x001e },
+  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0026 },
+  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0036 },
+  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000a },
+  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000c },
+  { 0x05, 0x02, -1, 0x03, 0x0062, 0x000e },
+  { 0x05, 0x02, -1, 0x03, 0x0062, 0x0012 },
+  { 0x05, 0x03, -1, 0x03, 0x0062, 0x0016 },
+  { 0x05, 0x03, -1, 0x03, 0x0062, 0x001e },
+  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0026 },
+  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0036 },
+  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0000, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0000, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0000, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0000, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0000, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0000, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0001, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0001, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0001, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0001, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0001, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0001, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0002, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0002, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0002, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0002, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0002, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0002, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0003, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0003, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0003, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0003, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0003, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0003, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0004, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0004, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0004, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0004, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0004, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0004, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0005, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0005, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0005, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0005, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0005, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0005, 0x0846 },
+  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0046 },
+  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0066 },
+  { 0x01, 0x06, -1, 0x03, 0x0006, 0x0086 },
+  { 0x01, 0x07, -1, 0x03, 0x0006, 0x00c6 },
+  { 0x01, 0x08, -1, 0x03, 0x0006, 0x0146 },
+  { 0x01, 0x09, -1, 0x03, 0x0006, 0x0246 },
+  { 0x01, 0x0a, -1, 0x03, 0x0006, 0x0446 },
+  { 0x01, 0x18, -1, 0x03, 0x0006, 0x0846 },
+  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0046 },
+  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0066 },
+  { 0x01, 0x06, -1, 0x03, 0x0008, 0x0086 },
+  { 0x01, 0x07, -1, 0x03, 0x0008, 0x00c6 },
+  { 0x01, 0x08, -1, 0x03, 0x0008, 0x0146 },
+  { 0x01, 0x09, -1, 0x03, 0x0008, 0x0246 },
+  { 0x01, 0x0a, -1, 0x03, 0x0008, 0x0446 },
+  { 0x01, 0x18, -1, 0x03, 0x0008, 0x0846 },
+  { 0x06, 0x00, -1, 0x00, 0x0082, 0x0002 },
+  { 0x06, 0x00, -1, 0x01, 0x0082, 0x0003 },
+  { 0x06, 0x00, -1, 0x02, 0x0082, 0x0004 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0005 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0006 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0007 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0008 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0009 },
+  { 0x07, 0x00, -1, 0x00, 0x00c2, 0x0002 },
+  { 0x07, 0x00, -1, 0x01, 0x00c2, 0x0003 },
+  { 0x07, 0x00, -1, 0x02, 0x00c2, 0x0004 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0005 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0006 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0007 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0008 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0009 },
+  { 0x08, 0x00, -1, 0x00, 0x0142, 0x0002 },
+  { 0x08, 0x00, -1, 0x01, 0x0142, 0x0003 },
+  { 0x08, 0x00, -1, 0x02, 0x0142, 0x0004 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0005 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0006 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0007 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0008 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0009 },
+  { 0x09, 0x00, -1, 0x00, 0x0242, 0x0002 },
+  { 0x09, 0x00, -1, 0x01, 0x0242, 0x0003 },
+  { 0x09, 0x00, -1, 0x02, 0x0242, 0x0004 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0005 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0006 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0007 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0008 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0009 },
+  { 0x0a, 0x00, -1, 0x00, 0x0442, 0x0002 },
+  { 0x0a, 0x00, -1, 0x01, 0x0442, 0x0003 },
+  { 0x0a, 0x00, -1, 0x02, 0x0442, 0x0004 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0005 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0006 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0007 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0008 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0009 },
+  { 0x0c, 0x00, -1, 0x00, 0x0842, 0x0002 },
+  { 0x0c, 0x00, -1, 0x01, 0x0842, 0x0003 },
+  { 0x0c, 0x00, -1, 0x02, 0x0842, 0x0004 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0005 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0006 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0007 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0008 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0009 },
+  { 0x0e, 0x00, -1, 0x00, 0x1842, 0x0002 },
+  { 0x0e, 0x00, -1, 0x01, 0x1842, 0x0003 },
+  { 0x0e, 0x00, -1, 0x02, 0x1842, 0x0004 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0005 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0006 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0007 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0008 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0009 },
+  { 0x18, 0x00, -1, 0x00, 0x5842, 0x0002 },
+  { 0x18, 0x00, -1, 0x01, 0x5842, 0x0003 },
+  { 0x18, 0x00, -1, 0x02, 0x5842, 0x0004 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0005 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0006 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0007 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0008 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0009 },
+  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0046 },
+  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0066 },
+  { 0x02, 0x06, -1, 0x03, 0x000a, 0x0086 },
+  { 0x02, 0x07, -1, 0x03, 0x000a, 0x00c6 },
+  { 0x02, 0x08, -1, 0x03, 0x000a, 0x0146 },
+  { 0x02, 0x09, -1, 0x03, 0x000a, 0x0246 },
+  { 0x02, 0x0a, -1, 0x03, 0x000a, 0x0446 },
+  { 0x02, 0x18, -1, 0x03, 0x000a, 0x0846 },
+  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0046 },
+  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0066 },
+  { 0x02, 0x06, -1, 0x03, 0x000e, 0x0086 },
+  { 0x02, 0x07, -1, 0x03, 0x000e, 0x00c6 },
+  { 0x02, 0x08, -1, 0x03, 0x000e, 0x0146 },
+  { 0x02, 0x09, -1, 0x03, 0x000e, 0x0246 },
+  { 0x02, 0x0a, -1, 0x03, 0x000e, 0x0446 },
+  { 0x02, 0x18, -1, 0x03, 0x000e, 0x0846 },
+  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0046 },
+  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0066 },
+  { 0x03, 0x06, -1, 0x03, 0x0012, 0x0086 },
+  { 0x03, 0x07, -1, 0x03, 0x0012, 0x00c6 },
+  { 0x03, 0x08, -1, 0x03, 0x0012, 0x0146 },
+  { 0x03, 0x09, -1, 0x03, 0x0012, 0x0246 },
+  { 0x03, 0x0a, -1, 0x03, 0x0012, 0x0446 },
+  { 0x03, 0x18, -1, 0x03, 0x0012, 0x0846 },
+  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0046 },
+  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0066 },
+  { 0x03, 0x06, -1, 0x03, 0x001a, 0x0086 },
+  { 0x03, 0x07, -1, 0x03, 0x001a, 0x00c6 },
+  { 0x03, 0x08, -1, 0x03, 0x001a, 0x0146 },
+  { 0x03, 0x09, -1, 0x03, 0x001a, 0x0246 },
+  { 0x03, 0x0a, -1, 0x03, 0x001a, 0x0446 },
+  { 0x03, 0x18, -1, 0x03, 0x001a, 0x0846 },
+  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0046 },
+  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0066 },
+  { 0x04, 0x06, -1, 0x03, 0x0022, 0x0086 },
+  { 0x04, 0x07, -1, 0x03, 0x0022, 0x00c6 },
+  { 0x04, 0x08, -1, 0x03, 0x0022, 0x0146 },
+  { 0x04, 0x09, -1, 0x03, 0x0022, 0x0246 },
+  { 0x04, 0x0a, -1, 0x03, 0x0022, 0x0446 },
+  { 0x04, 0x18, -1, 0x03, 0x0022, 0x0846 },
+  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0046 },
+  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0066 },
+  { 0x04, 0x06, -1, 0x03, 0x0032, 0x0086 },
+  { 0x04, 0x07, -1, 0x03, 0x0032, 0x00c6 },
+  { 0x04, 0x08, -1, 0x03, 0x0032, 0x0146 },
+  { 0x04, 0x09, -1, 0x03, 0x0032, 0x0246 },
+  { 0x04, 0x0a, -1, 0x03, 0x0032, 0x0446 },
+  { 0x04, 0x18, -1, 0x03, 0x0032, 0x0846 },
+  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0046 },
+  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0066 },
+  { 0x05, 0x06, -1, 0x03, 0x0042, 0x0086 },
+  { 0x05, 0x07, -1, 0x03, 0x0042, 0x00c6 },
+  { 0x05, 0x08, -1, 0x03, 0x0042, 0x0146 },
+  { 0x05, 0x09, -1, 0x03, 0x0042, 0x0246 },
+  { 0x05, 0x0a, -1, 0x03, 0x0042, 0x0446 },
+  { 0x05, 0x18, -1, 0x03, 0x0042, 0x0846 },
+  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0046 },
+  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0066 },
+  { 0x05, 0x06, -1, 0x03, 0x0062, 0x0086 },
+  { 0x05, 0x07, -1, 0x03, 0x0062, 0x00c6 },
+  { 0x05, 0x08, -1, 0x03, 0x0062, 0x0146 },
+  { 0x05, 0x09, -1, 0x03, 0x0062, 0x0246 },
+  { 0x05, 0x0a, -1, 0x03, 0x0062, 0x0446 },
+  { 0x05, 0x18, -1, 0x03, 0x0062, 0x0846 },
+  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000a },
+  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000c },
+  { 0x06, 0x02, -1, 0x03, 0x0082, 0x000e },
+  { 0x06, 0x02, -1, 0x03, 0x0082, 0x0012 },
+  { 0x06, 0x03, -1, 0x03, 0x0082, 0x0016 },
+  { 0x06, 0x03, -1, 0x03, 0x0082, 0x001e },
+  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0026 },
+  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0036 },
+  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000a },
+  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000c },
+  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x000e },
+  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x0012 },
+  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x0016 },
+  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x001e },
+  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0026 },
+  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0036 },
+  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000a },
+  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000c },
+  { 0x08, 0x02, -1, 0x03, 0x0142, 0x000e },
+  { 0x08, 0x02, -1, 0x03, 0x0142, 0x0012 },
+  { 0x08, 0x03, -1, 0x03, 0x0142, 0x0016 },
+  { 0x08, 0x03, -1, 0x03, 0x0142, 0x001e },
+  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0026 },
+  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0036 },
+  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000a },
+  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000c },
+  { 0x09, 0x02, -1, 0x03, 0x0242, 0x000e },
+  { 0x09, 0x02, -1, 0x03, 0x0242, 0x0012 },
+  { 0x09, 0x03, -1, 0x03, 0x0242, 0x0016 },
+  { 0x09, 0x03, -1, 0x03, 0x0242, 0x001e },
+  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0026 },
+  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0036 },
+  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000a },
+  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000c },
+  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x000e },
+  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x0012 },
+  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x0016 },
+  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x001e },
+  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0026 },
+  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0036 },
+  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000a },
+  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000c },
+  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x000e },
+  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x0012 },
+  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x0016 },
+  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x001e },
+  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0026 },
+  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0036 },
+  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000a },
+  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000c },
+  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x000e },
+  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x0012 },
+  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x0016 },
+  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x001e },
+  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0026 },
+  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0036 },
+  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000a },
+  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000c },
+  { 0x18, 0x02, -1, 0x03, 0x5842, 0x000e },
+  { 0x18, 0x02, -1, 0x03, 0x5842, 0x0012 },
+  { 0x18, 0x03, -1, 0x03, 0x5842, 0x0016 },
+  { 0x18, 0x03, -1, 0x03, 0x5842, 0x001e },
+  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0026 },
+  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0036 },
+  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0046 },
+  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0066 },
+  { 0x06, 0x06, -1, 0x03, 0x0082, 0x0086 },
+  { 0x06, 0x07, -1, 0x03, 0x0082, 0x00c6 },
+  { 0x06, 0x08, -1, 0x03, 0x0082, 0x0146 },
+  { 0x06, 0x09, -1, 0x03, 0x0082, 0x0246 },
+  { 0x06, 0x0a, -1, 0x03, 0x0082, 0x0446 },
+  { 0x06, 0x18, -1, 0x03, 0x0082, 0x0846 },
+  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0046 },
+  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0066 },
+  { 0x07, 0x06, -1, 0x03, 0x00c2, 0x0086 },
+  { 0x07, 0x07, -1, 0x03, 0x00c2, 0x00c6 },
+  { 0x07, 0x08, -1, 0x03, 0x00c2, 0x0146 },
+  { 0x07, 0x09, -1, 0x03, 0x00c2, 0x0246 },
+  { 0x07, 0x0a, -1, 0x03, 0x00c2, 0x0446 },
+  { 0x07, 0x18, -1, 0x03, 0x00c2, 0x0846 },
+  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0046 },
+  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0066 },
+  { 0x08, 0x06, -1, 0x03, 0x0142, 0x0086 },
+  { 0x08, 0x07, -1, 0x03, 0x0142, 0x00c6 },
+  { 0x08, 0x08, -1, 0x03, 0x0142, 0x0146 },
+  { 0x08, 0x09, -1, 0x03, 0x0142, 0x0246 },
+  { 0x08, 0x0a, -1, 0x03, 0x0142, 0x0446 },
+  { 0x08, 0x18, -1, 0x03, 0x0142, 0x0846 },
+  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0046 },
+  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0066 },
+  { 0x09, 0x06, -1, 0x03, 0x0242, 0x0086 },
+  { 0x09, 0x07, -1, 0x03, 0x0242, 0x00c6 },
+  { 0x09, 0x08, -1, 0x03, 0x0242, 0x0146 },
+  { 0x09, 0x09, -1, 0x03, 0x0242, 0x0246 },
+  { 0x09, 0x0a, -1, 0x03, 0x0242, 0x0446 },
+  { 0x09, 0x18, -1, 0x03, 0x0242, 0x0846 },
+  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0046 },
+  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0066 },
+  { 0x0a, 0x06, -1, 0x03, 0x0442, 0x0086 },
+  { 0x0a, 0x07, -1, 0x03, 0x0442, 0x00c6 },
+  { 0x0a, 0x08, -1, 0x03, 0x0442, 0x0146 },
+  { 0x0a, 0x09, -1, 0x03, 0x0442, 0x0246 },
+  { 0x0a, 0x0a, -1, 0x03, 0x0442, 0x0446 },
+  { 0x0a, 0x18, -1, 0x03, 0x0442, 0x0846 },
+  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0046 },
+  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0066 },
+  { 0x0c, 0x06, -1, 0x03, 0x0842, 0x0086 },
+  { 0x0c, 0x07, -1, 0x03, 0x0842, 0x00c6 },
+  { 0x0c, 0x08, -1, 0x03, 0x0842, 0x0146 },
+  { 0x0c, 0x09, -1, 0x03, 0x0842, 0x0246 },
+  { 0x0c, 0x0a, -1, 0x03, 0x0842, 0x0446 },
+  { 0x0c, 0x18, -1, 0x03, 0x0842, 0x0846 },
+  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0046 },
+  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0066 },
+  { 0x0e, 0x06, -1, 0x03, 0x1842, 0x0086 },
+  { 0x0e, 0x07, -1, 0x03, 0x1842, 0x00c6 },
+  { 0x0e, 0x08, -1, 0x03, 0x1842, 0x0146 },
+  { 0x0e, 0x09, -1, 0x03, 0x1842, 0x0246 },
+  { 0x0e, 0x0a, -1, 0x03, 0x1842, 0x0446 },
+  { 0x0e, 0x18, -1, 0x03, 0x1842, 0x0846 },
+  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0046 },
+  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0066 },
+  { 0x18, 0x06, -1, 0x03, 0x5842, 0x0086 },
+  { 0x18, 0x07, -1, 0x03, 0x5842, 0x00c6 },
+  { 0x18, 0x08, -1, 0x03, 0x5842, 0x0146 },
+  { 0x18, 0x09, -1, 0x03, 0x5842, 0x0246 },
+  { 0x18, 0x0a, -1, 0x03, 0x5842, 0x0446 },
+  { 0x18, 0x18, -1, 0x03, 0x5842, 0x0846 },
+};
+
+#endif  /* BROTLI_DEC_PREFIX_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c
new file mode 100644
index 0000000000..6cf2476c7a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.c
@@ -0,0 +1,157 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./state.h"
+
+#include <stdlib.h>  /* free, malloc */
+
+#include <brotli/types.h>
+#include "./huffman.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  if (!alloc_func) {
+    s->alloc_func = BrotliDefaultAllocFunc;
+    s->free_func = BrotliDefaultFreeFunc;
+    s->memory_manager_opaque = 0;
+  } else {
+    s->alloc_func = alloc_func;
+    s->free_func = free_func;
+    s->memory_manager_opaque = opaque;
+  }
+
+  s->error_code = 0; /* BROTLI_DECODER_NO_ERROR */
+
+  BrotliInitBitReader(&s->br);
+  s->state = BROTLI_STATE_UNINITED;
+  s->large_window = 0;
+  s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+  s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
+  s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+  s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+
+  s->buffer_length = 0;
+  s->loop_counter = 0;
+  s->pos = 0;
+  s->rb_roundtrips = 0;
+  s->partial_pos_out = 0;
+
+  s->block_type_trees = NULL;
+  s->block_len_trees = NULL;
+  s->ringbuffer = NULL;
+  s->ringbuffer_size = 0;
+  s->new_ringbuffer_size = 0;
+  s->ringbuffer_mask = 0;
+
+  s->context_map = NULL;
+  s->context_modes = NULL;
+  s->dist_context_map = NULL;
+  s->context_map_slice = NULL;
+  s->dist_context_map_slice = NULL;
+
+  s->literal_hgroup.codes = NULL;
+  s->literal_hgroup.htrees = NULL;
+  s->insert_copy_hgroup.codes = NULL;
+  s->insert_copy_hgroup.htrees = NULL;
+  s->distance_hgroup.codes = NULL;
+  s->distance_hgroup.htrees = NULL;
+
+  s->is_last_metablock = 0;
+  s->is_uncompressed = 0;
+  s->is_metadata = 0;
+  s->should_wrap_ringbuffer = 0;
+  s->canny_ringbuffer_allocation = 1;
+
+  s->window_bits = 0;
+  s->max_distance = 0;
+  s->dist_rb[0] = 16;
+  s->dist_rb[1] = 15;
+  s->dist_rb[2] = 11;
+  s->dist_rb[3] = 4;
+  s->dist_rb_idx = 0;
+  s->block_type_trees = NULL;
+  s->block_len_trees = NULL;
+
+  s->mtf_upper_bound = 63;
+
+  s->dictionary = BrotliGetDictionary();
+  s->transforms = BrotliGetTransforms();
+
+  return BROTLI_TRUE;
+}
+
+void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) {
+  s->meta_block_remaining_len = 0;
+  s->block_length[0] = 1U << 24;
+  s->block_length[1] = 1U << 24;
+  s->block_length[2] = 1U << 24;
+  s->num_block_types[0] = 1;
+  s->num_block_types[1] = 1;
+  s->num_block_types[2] = 1;
+  s->block_type_rb[0] = 1;
+  s->block_type_rb[1] = 0;
+  s->block_type_rb[2] = 1;
+  s->block_type_rb[3] = 0;
+  s->block_type_rb[4] = 1;
+  s->block_type_rb[5] = 0;
+  s->context_map = NULL;
+  s->context_modes = NULL;
+  s->dist_context_map = NULL;
+  s->context_map_slice = NULL;
+  s->literal_htree = NULL;
+  s->dist_context_map_slice = NULL;
+  s->dist_htree_index = 0;
+  s->context_lookup = NULL;
+  s->literal_hgroup.codes = NULL;
+  s->literal_hgroup.htrees = NULL;
+  s->insert_copy_hgroup.codes = NULL;
+  s->insert_copy_hgroup.htrees = NULL;
+  s->distance_hgroup.codes = NULL;
+  s->distance_hgroup.htrees = NULL;
+}
+
+void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) {
+  BROTLI_DECODER_FREE(s, s->context_modes);
+  BROTLI_DECODER_FREE(s, s->context_map);
+  BROTLI_DECODER_FREE(s, s->dist_context_map);
+  BROTLI_DECODER_FREE(s, s->literal_hgroup.htrees);
+  BROTLI_DECODER_FREE(s, s->insert_copy_hgroup.htrees);
+  BROTLI_DECODER_FREE(s, s->distance_hgroup.htrees);
+}
+
+void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
+  BrotliDecoderStateCleanupAfterMetablock(s);
+
+  BROTLI_DECODER_FREE(s, s->ringbuffer);
+  BROTLI_DECODER_FREE(s, s->block_type_trees);
+}
+
+BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
+    HuffmanTreeGroup* group, uint32_t alphabet_size_max,
+    uint32_t alphabet_size_limit, uint32_t ntrees) {
+  /* Pack two allocations into one */
+  const size_t max_table_size =
+      kMaxHuffmanTableSize[(alphabet_size_limit + 31) >> 5];
+  const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size;
+  const size_t htree_size = sizeof(HuffmanCode*) * ntrees;
+  /* Pointer alignment is, hopefully, wider than sizeof(HuffmanCode). */
+  HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s,
+      code_size + htree_size);
+  group->alphabet_size_max = (uint16_t)alphabet_size_max;
+  group->alphabet_size_limit = (uint16_t)alphabet_size_limit;
+  group->num_htrees = (uint16_t)ntrees;
+  group->htrees = p;
+  group->codes = (HuffmanCode*)(&p[ntrees]);
+  return !!p;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.h
new file mode 100644
index 0000000000..54dab698ba
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/dec/state.h
@@ -0,0 +1,365 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Brotli state for partial streaming decoding. */
+
+#ifndef BROTLI_DEC_STATE_H_
+#define BROTLI_DEC_STATE_H_
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/transform.h"
+#include <brotli/types.h>
+#include "./bit_reader.h"
+#include "./huffman.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Graphviz diagram that describes state transitions:
+
+digraph States {
+  graph [compound=true]
+  concentrate=true
+  node [shape="box"]
+
+  UNINITED -> {LARGE_WINDOW_BITS -> INITIALIZE}
+  subgraph cluster_metablock_workflow {
+    style="rounded"
+    label=< <B>METABLOCK CYCLE</B> >
+    METABLOCK_BEGIN -> METABLOCK_HEADER
+    METABLOCK_HEADER:sw -> METADATA
+    METABLOCK_HEADER:s -> UNCOMPRESSED
+    METABLOCK_HEADER:se -> METABLOCK_DONE:ne
+    METADATA:s -> METABLOCK_DONE:w
+    UNCOMPRESSED:s -> METABLOCK_DONE:n
+    METABLOCK_DONE:e -> METABLOCK_BEGIN:e [constraint="false"]
+  }
+  INITIALIZE -> METABLOCK_BEGIN
+  METABLOCK_DONE -> DONE
+
+  subgraph cluster_compressed_metablock {
+    style="rounded"
+    label=< <B>COMPRESSED METABLOCK</B> >
+
+    subgraph cluster_command {
+      style="rounded"
+      label=< <B>HOT LOOP</B> >
+
+      _METABLOCK_DONE_PORT_ [shape=point style=invis]
+
+      {
+        // Set different shape for nodes returning from "compressed metablock".
+        node [shape=invhouse]; CMD_INNER CMD_POST_DECODE_LITERALS;
+        CMD_POST_WRAP_COPY; CMD_INNER_WRITE; CMD_POST_WRITE_1;
+      }
+
+      CMD_BEGIN -> CMD_INNER -> CMD_POST_DECODE_LITERALS -> CMD_POST_WRAP_COPY
+
+      // IO ("write") nodes are not in the hot loop!
+      CMD_INNER_WRITE [style=dashed]
+      CMD_INNER -> CMD_INNER_WRITE
+      CMD_POST_WRITE_1 [style=dashed]
+      CMD_POST_DECODE_LITERALS -> CMD_POST_WRITE_1
+      CMD_POST_WRITE_2 [style=dashed]
+      CMD_POST_WRAP_COPY -> CMD_POST_WRITE_2
+
+      CMD_POST_WRITE_1 -> CMD_BEGIN:s [constraint="false"]
+      CMD_INNER_WRITE -> {CMD_INNER CMD_POST_DECODE_LITERALS}
+          [constraint="false"]
+      CMD_BEGIN:ne -> CMD_POST_DECODE_LITERALS [constraint="false"]
+      CMD_POST_WRAP_COPY -> CMD_BEGIN [constraint="false"]
+      CMD_POST_DECODE_LITERALS -> CMD_BEGIN:ne [constraint="false"]
+      CMD_POST_WRITE_2 -> CMD_POST_WRAP_COPY [constraint="false"]
+      {rank=same; CMD_BEGIN; CMD_INNER; CMD_POST_DECODE_LITERALS;
+          CMD_POST_WRAP_COPY}
+      {rank=same; CMD_INNER_WRITE; CMD_POST_WRITE_1; CMD_POST_WRITE_2}
+
+      {CMD_INNER CMD_POST_DECODE_LITERALS CMD_POST_WRAP_COPY} ->
+          _METABLOCK_DONE_PORT_ [style=invis]
+      {CMD_INNER_WRITE CMD_POST_WRITE_1} -> _METABLOCK_DONE_PORT_
+          [constraint="false" style=invis]
+    }
+
+    BEFORE_COMPRESSED_METABLOCK_HEADER:s -> HUFFMAN_CODE_0:n
+    HUFFMAN_CODE_0 -> HUFFMAN_CODE_1 -> HUFFMAN_CODE_2 -> HUFFMAN_CODE_3
+    HUFFMAN_CODE_0 -> METABLOCK_HEADER_2 -> CONTEXT_MODES -> CONTEXT_MAP_1
+    CONTEXT_MAP_1 -> CONTEXT_MAP_2 -> TREE_GROUP
+    TREE_GROUP -> BEFORE_COMPRESSED_METABLOCK_BODY:e
+    BEFORE_COMPRESSED_METABLOCK_BODY:s -> CMD_BEGIN:n
+
+    HUFFMAN_CODE_3:e -> HUFFMAN_CODE_0:ne [constraint="false"]
+    {rank=same; HUFFMAN_CODE_0; HUFFMAN_CODE_1; HUFFMAN_CODE_2; HUFFMAN_CODE_3}
+    {rank=same; METABLOCK_HEADER_2; CONTEXT_MODES; CONTEXT_MAP_1; CONTEXT_MAP_2;
+        TREE_GROUP}
+  }
+  METABLOCK_HEADER:e -> BEFORE_COMPRESSED_METABLOCK_HEADER:n
+
+  _METABLOCK_DONE_PORT_ -> METABLOCK_DONE:se
+      [constraint="false" ltail=cluster_command]
+
+  UNINITED [shape=Mdiamond];
+  DONE [shape=Msquare];
+}
+
+
+ */
+
+typedef enum {
+  BROTLI_STATE_UNINITED,
+  BROTLI_STATE_LARGE_WINDOW_BITS,
+  BROTLI_STATE_INITIALIZE,
+  BROTLI_STATE_METABLOCK_BEGIN,
+  BROTLI_STATE_METABLOCK_HEADER,
+  BROTLI_STATE_METABLOCK_HEADER_2,
+  BROTLI_STATE_CONTEXT_MODES,
+  BROTLI_STATE_COMMAND_BEGIN,
+  BROTLI_STATE_COMMAND_INNER,
+  BROTLI_STATE_COMMAND_POST_DECODE_LITERALS,
+  BROTLI_STATE_COMMAND_POST_WRAP_COPY,
+  BROTLI_STATE_UNCOMPRESSED,
+  BROTLI_STATE_METADATA,
+  BROTLI_STATE_COMMAND_INNER_WRITE,
+  BROTLI_STATE_METABLOCK_DONE,
+  BROTLI_STATE_COMMAND_POST_WRITE_1,
+  BROTLI_STATE_COMMAND_POST_WRITE_2,
+  BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER,
+  BROTLI_STATE_HUFFMAN_CODE_0,
+  BROTLI_STATE_HUFFMAN_CODE_1,
+  BROTLI_STATE_HUFFMAN_CODE_2,
+  BROTLI_STATE_HUFFMAN_CODE_3,
+  BROTLI_STATE_CONTEXT_MAP_1,
+  BROTLI_STATE_CONTEXT_MAP_2,
+  BROTLI_STATE_TREE_GROUP,
+  BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY,
+  BROTLI_STATE_DONE
+} BrotliRunningState;
+
+typedef enum {
+  BROTLI_STATE_METABLOCK_HEADER_NONE,
+  BROTLI_STATE_METABLOCK_HEADER_EMPTY,
+  BROTLI_STATE_METABLOCK_HEADER_NIBBLES,
+  BROTLI_STATE_METABLOCK_HEADER_SIZE,
+  BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED,
+  BROTLI_STATE_METABLOCK_HEADER_RESERVED,
+  BROTLI_STATE_METABLOCK_HEADER_BYTES,
+  BROTLI_STATE_METABLOCK_HEADER_METADATA
+} BrotliRunningMetablockHeaderState;
+
+typedef enum {
+  BROTLI_STATE_UNCOMPRESSED_NONE,
+  BROTLI_STATE_UNCOMPRESSED_WRITE
+} BrotliRunningUncompressedState;
+
+typedef enum {
+  BROTLI_STATE_TREE_GROUP_NONE,
+  BROTLI_STATE_TREE_GROUP_LOOP
+} BrotliRunningTreeGroupState;
+
+typedef enum {
+  BROTLI_STATE_CONTEXT_MAP_NONE,
+  BROTLI_STATE_CONTEXT_MAP_READ_PREFIX,
+  BROTLI_STATE_CONTEXT_MAP_HUFFMAN,
+  BROTLI_STATE_CONTEXT_MAP_DECODE,
+  BROTLI_STATE_CONTEXT_MAP_TRANSFORM
+} BrotliRunningContextMapState;
+
+typedef enum {
+  BROTLI_STATE_HUFFMAN_NONE,
+  BROTLI_STATE_HUFFMAN_SIMPLE_SIZE,
+  BROTLI_STATE_HUFFMAN_SIMPLE_READ,
+  BROTLI_STATE_HUFFMAN_SIMPLE_BUILD,
+  BROTLI_STATE_HUFFMAN_COMPLEX,
+  BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS
+} BrotliRunningHuffmanState;
+
+typedef enum {
+  BROTLI_STATE_DECODE_UINT8_NONE,
+  BROTLI_STATE_DECODE_UINT8_SHORT,
+  BROTLI_STATE_DECODE_UINT8_LONG
+} BrotliRunningDecodeUint8State;
+
+typedef enum {
+  BROTLI_STATE_READ_BLOCK_LENGTH_NONE,
+  BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
+} BrotliRunningReadBlockLengthState;
+
+typedef struct BrotliMetablockHeaderArena {
+  BrotliRunningTreeGroupState substate_tree_group;
+  BrotliRunningContextMapState substate_context_map;
+  BrotliRunningHuffmanState substate_huffman;
+
+  uint32_t sub_loop_counter;
+
+  uint32_t repeat_code_len;
+  uint32_t prev_code_len;
+
+  /* For ReadHuffmanCode. */
+  uint32_t symbol;
+  uint32_t repeat;
+  uint32_t space;
+
+  /* Huffman table for "histograms". */
+  HuffmanCode table[32];
+  /* List of heads of symbol chains. */
+  uint16_t* symbol_lists;
+  /* Storage from symbol_lists. */
+  uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 +
+                               BROTLI_NUM_COMMAND_SYMBOLS];
+  /* Tails of symbol chains. */
+  int next_symbol[32];
+  uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
+  /* Population counts for the code lengths. */
+  uint16_t code_length_histo[16];
+
+  /* For HuffmanTreeGroupDecode. */
+  int htree_index;
+  HuffmanCode* next;
+
+  /* For DecodeContextMap. */
+  uint32_t context_index;
+  uint32_t max_run_length_prefix;
+  uint32_t code;
+  HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
+} BrotliMetablockHeaderArena;
+
+typedef struct BrotliMetablockBodyArena {
+  uint8_t dist_extra_bits[544];
+  uint32_t dist_offset[544];
+} BrotliMetablockBodyArena;
+
+struct BrotliDecoderStateStruct {
+  BrotliRunningState state;
+
+  /* This counter is reused for several disjoint loops. */
+  int loop_counter;
+
+  BrotliBitReader br;
+
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* memory_manager_opaque;
+
+  /* Temporary storage for remaining input. Brotli stream format is designed in
+     a way, that 64 bits are enough to make progress in decoding. */
+  union {
+    uint64_t u64;
+    uint8_t u8[8];
+  } buffer;
+  uint32_t buffer_length;
+
+  int pos;
+  int max_backward_distance;
+  int max_distance;
+  int ringbuffer_size;
+  int ringbuffer_mask;
+  int dist_rb_idx;
+  int dist_rb[4];
+  int error_code;
+  uint8_t* ringbuffer;
+  uint8_t* ringbuffer_end;
+  HuffmanCode* htree_command;
+  const uint8_t* context_lookup;
+  uint8_t* context_map_slice;
+  uint8_t* dist_context_map_slice;
+
+  /* This ring buffer holds a few past copy distances that will be used by
+     some special distance codes. */
+  HuffmanTreeGroup literal_hgroup;
+  HuffmanTreeGroup insert_copy_hgroup;
+  HuffmanTreeGroup distance_hgroup;
+  HuffmanCode* block_type_trees;
+  HuffmanCode* block_len_trees;
+  /* This is true if the literal context map histogram type always matches the
+     block type. It is then not needed to keep the context (faster decoding). */
+  int trivial_literal_context;
+  /* Distance context is actual after command is decoded and before distance is
+     computed. After distance computation it is used as a temporary variable. */
+  int distance_context;
+  int meta_block_remaining_len;
+  uint32_t block_length_index;
+  uint32_t block_length[3];
+  uint32_t num_block_types[3];
+  uint32_t block_type_rb[6];
+  uint32_t distance_postfix_bits;
+  uint32_t num_direct_distance_codes;
+  uint32_t num_dist_htrees;
+  uint8_t* dist_context_map;
+  HuffmanCode* literal_htree;
+  uint8_t dist_htree_index;
+
+  int copy_length;
+  int distance_code;
+
+  /* For partial write operations. */
+  size_t rb_roundtrips;  /* how many times we went around the ring-buffer */
+  size_t partial_pos_out;  /* how much output to the user in total */
+
+  /* For InverseMoveToFrontTransform. */
+  uint32_t mtf_upper_bound;
+  uint32_t mtf[64 + 1];
+
+  /* Less used attributes are at the end of this struct. */
+
+  /* States inside function calls. */
+  BrotliRunningMetablockHeaderState substate_metablock_header;
+  BrotliRunningUncompressedState substate_uncompressed;
+  BrotliRunningDecodeUint8State substate_decode_uint8;
+  BrotliRunningReadBlockLengthState substate_read_block_length;
+
+  unsigned int is_last_metablock : 1;
+  unsigned int is_uncompressed : 1;
+  unsigned int is_metadata : 1;
+  unsigned int should_wrap_ringbuffer : 1;
+  unsigned int canny_ringbuffer_allocation : 1;
+  unsigned int large_window : 1;
+  unsigned int size_nibbles : 8;
+  uint32_t window_bits;
+
+  int new_ringbuffer_size;
+
+  uint32_t num_literal_htrees;
+  uint8_t* context_map;
+  uint8_t* context_modes;
+
+  const BrotliDictionary* dictionary;
+  const BrotliTransforms* transforms;
+
+  uint32_t trivial_literal_contexts[8];  /* 256 bits */
+
+  union {
+    BrotliMetablockHeaderArena header;
+    BrotliMetablockBodyArena body;
+  } arena;
+};
+
+typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal;
+#define BrotliDecoderState BrotliDecoderStateInternal
+
+BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s);
+BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s);
+BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock(
+    BrotliDecoderState* s);
+BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(
+    BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size_max,
+    uint32_t alphabet_size_limit, uint32_t ntrees);
+
+#define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L)
+
+#define BROTLI_DECODER_FREE(S, X) {          \
+  S->free_func(S->memory_manager_opaque, X); \
+  X = NULL;                                  \
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_STATE_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c
new file mode 100644
index 0000000000..a07a617a09
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.c
@@ -0,0 +1,145 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#include "./backward_references.h"
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./dictionary_hash.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
+                                                size_t max_distance,
+                                                const int* dist_cache) {
+  if (distance <= max_distance) {
+    size_t distance_plus_3 = distance + 3;
+    size_t offset0 = distance_plus_3 - (size_t)dist_cache[0];
+    size_t offset1 = distance_plus_3 - (size_t)dist_cache[1];
+    if (distance == (size_t)dist_cache[0]) {
+      return 0;
+    } else if (distance == (size_t)dist_cache[1]) {
+      return 1;
+    } else if (offset0 < 7) {
+      return (0x9750468 >> (4 * offset0)) & 0xF;
+    } else if (offset1 < 7) {
+      return (0xFDB1ACE >> (4 * offset1)) & 0xF;
+    } else if (distance == (size_t)dist_cache[2]) {
+      return 2;
+    } else if (distance == (size_t)dist_cache[3]) {
+      return 3;
+    }
+  }
+  return distance + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
+}
+
+#define EXPAND_CAT(a, b) CAT(a, b)
+#define CAT(a, b) a ## b
+#define FN(X) EXPAND_CAT(X, HASHER())
+#define EXPORT_FN(X) EXPAND_CAT(X, EXPAND_CAT(PREFIX(), HASHER()))
+
+#define PREFIX() N
+
+#define HASHER() H2
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H3
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H4
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H5
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H6
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H40
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H41
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H42
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H54
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H35
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H55
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H65
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#undef PREFIX
+
+#undef EXPORT_FN
+#undef FN
+#undef CAT
+#undef EXPAND_CAT
+
+void BrotliCreateBackwardReferences(size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  switch (params->hasher.type) {
+#define CASE_(N)                                                  \
+    case N:                                                       \
+      CreateBackwardReferencesNH ## N(num_bytes,                  \
+          position, ringbuffer, ringbuffer_mask,                  \
+          literal_context_lut, params, hasher, dist_cache,        \
+          last_insert_len, commands, num_commands, num_literals); \
+      return;
+    FOR_GENERIC_HASHERS(CASE_)
+#undef CASE_
+    default:
+      break;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.h
new file mode 100644
index 0000000000..9589cc1541
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references.h
@@ -0,0 +1,39 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./hash.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* "commands" points to the next output command to write to, "*num_commands" is
+   initially the total amount of commands output by previous
+   CreateBackwardReferences calls, and must be incremented by the amount written
+   by this call. */
+BROTLI_INTERNAL void BrotliCreateBackwardReferences(size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c
new file mode 100644
index 0000000000..5651caeb7a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.c
@@ -0,0 +1,843 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#include "./backward_references_hq.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./literal_cost.h"
+#include "./memory.h"
+#include "./params.h"
+#include "./prefix.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* BrotliCalculateDistanceCodeLimit(BROTLI_MAX_ALLOWED_DISTANCE, 3, 120). */
+#define BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE 544
+
+static const float kInfinity = 1.7e38f;  /* ~= 2 ^ 127 */
+
+static const uint32_t kDistanceCacheIndex[] = {
+  0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+};
+static const int kDistanceCacheOffset[] = {
+  0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+};
+
+void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
+  ZopfliNode stub;
+  size_t i;
+  stub.length = 1;
+  stub.distance = 0;
+  stub.dcode_insert_length = 0;
+  stub.u.cost = kInfinity;
+  for (i = 0; i < length; ++i) array[i] = stub;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
+  return self->length & 0x1FFFFFF;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
+  const uint32_t modifier = self->length >> 25;
+  return ZopfliNodeCopyLength(self) + 9u - modifier;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
+  return self->distance;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
+  const uint32_t short_code = self->dcode_insert_length >> 27;
+  return short_code == 0 ?
+      ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
+      short_code - 1;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
+  return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
+}
+
+/* Histogram based cost model for zopflification. */
+typedef struct ZopfliCostModel {
+  /* The insert and copy length symbols. */
+  float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
+  float* cost_dist_;
+  uint32_t distance_histogram_size;
+  /* Cumulative costs of literals per position in the stream. */
+  float* literal_costs_;
+  float min_cost_cmd_;
+  size_t num_bytes_;
+} ZopfliCostModel;
+
+static void InitZopfliCostModel(
+    MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist,
+    size_t num_bytes) {
+  self->num_bytes_ = num_bytes;
+  self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
+  self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size_limit);
+  self->distance_histogram_size = dist->alphabet_size_limit;
+  if (BROTLI_IS_OOM(m)) return;
+}
+
+static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
+  BROTLI_FREE(m, self->literal_costs_);
+  BROTLI_FREE(m, self->cost_dist_);
+}
+
+static void SetCost(const uint32_t* histogram, size_t histogram_size,
+                    BROTLI_BOOL literal_histogram, float* cost) {
+  size_t sum = 0;
+  size_t missing_symbol_sum;
+  float log2sum;
+  float missing_symbol_cost;
+  size_t i;
+  for (i = 0; i < histogram_size; i++) {
+    sum += histogram[i];
+  }
+  log2sum = (float)FastLog2(sum);
+  missing_symbol_sum = sum;
+  if (!literal_histogram) {
+    for (i = 0; i < histogram_size; i++) {
+      if (histogram[i] == 0) missing_symbol_sum++;
+    }
+  }
+  missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2;
+  for (i = 0; i < histogram_size; i++) {
+    if (histogram[i] == 0) {
+      cost[i] = missing_symbol_cost;
+      continue;
+    }
+
+    /* Shannon bits for this symbol. */
+    cost[i] = log2sum - (float)FastLog2(histogram[i]);
+
+    /* Cannot be coded with less than 1 bit */
+    if (cost[i] < 1) cost[i] = 1;
+  }
+}
+
+static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
+                                           size_t position,
+                                           const uint8_t* ringbuffer,
+                                           size_t ringbuffer_mask,
+                                           const Command* commands,
+                                           size_t num_commands,
+                                           size_t last_insert_len) {
+  uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
+  float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+  size_t pos = position - last_insert_len;
+  float min_cost_cmd = kInfinity;
+  size_t i;
+  float* cost_cmd = self->cost_cmd_;
+
+  memset(histogram_literal, 0, sizeof(histogram_literal));
+  memset(histogram_cmd, 0, sizeof(histogram_cmd));
+  memset(histogram_dist, 0, sizeof(histogram_dist));
+
+  for (i = 0; i < num_commands; i++) {
+    size_t inslength = commands[i].insert_len_;
+    size_t copylength = CommandCopyLen(&commands[i]);
+    size_t distcode = commands[i].dist_prefix_ & 0x3FF;
+    size_t cmdcode = commands[i].cmd_prefix_;
+    size_t j;
+
+    histogram_cmd[cmdcode]++;
+    if (cmdcode >= 128) histogram_dist[distcode]++;
+
+    for (j = 0; j < inslength; j++) {
+      histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
+    }
+
+    pos += inslength + copylength;
+  }
+
+  SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
+          cost_literal);
+  SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
+          cost_cmd);
+  SetCost(histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
+          self->cost_dist_);
+
+  for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
+    min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
+  }
+  self->min_cost_cmd_ = min_cost_cmd;
+
+  {
+    float* literal_costs = self->literal_costs_;
+    float literal_carry = 0.0;
+    size_t num_bytes = self->num_bytes_;
+    literal_costs[0] = 0.0;
+    for (i = 0; i < num_bytes; ++i) {
+      literal_carry +=
+          cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
+      literal_costs[i + 1] = literal_costs[i] + literal_carry;
+      literal_carry -= literal_costs[i + 1] - literal_costs[i];
+    }
+  }
+}
+
+static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
+                                               size_t position,
+                                               const uint8_t* ringbuffer,
+                                               size_t ringbuffer_mask) {
+  float* literal_costs = self->literal_costs_;
+  float literal_carry = 0.0;
+  float* cost_dist = self->cost_dist_;
+  float* cost_cmd = self->cost_cmd_;
+  size_t num_bytes = self->num_bytes_;
+  size_t i;
+  BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
+                                    ringbuffer, &literal_costs[1]);
+  literal_costs[0] = 0.0;
+  for (i = 0; i < num_bytes; ++i) {
+    literal_carry += literal_costs[i + 1];
+    literal_costs[i + 1] = literal_costs[i] + literal_carry;
+    literal_carry -= literal_costs[i + 1] - literal_costs[i];
+  }
+  for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
+    cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
+  }
+  for (i = 0; i < self->distance_histogram_size; ++i) {
+    cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
+  }
+  self->min_cost_cmd_ = (float)FastLog2(11);
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetCommandCost(
+    const ZopfliCostModel* self, uint16_t cmdcode) {
+  return self->cost_cmd_[cmdcode];
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetDistanceCost(
+    const ZopfliCostModel* self, size_t distcode) {
+  return self->cost_dist_[distcode];
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetLiteralCosts(
+    const ZopfliCostModel* self, size_t from, size_t to) {
+  return self->literal_costs_[to] - self->literal_costs_[from];
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetMinCostCmd(
+    const ZopfliCostModel* self) {
+  return self->min_cost_cmd_;
+}
+
+/* REQUIRES: len >= 2, start_pos <= pos */
+/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
+/* Maintains the "ZopfliNode array invariant". */
+static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
+    size_t start_pos, size_t len, size_t len_code, size_t dist,
+    size_t short_code, float cost) {
+  ZopfliNode* next = &nodes[pos + len];
+  next->length = (uint32_t)(len | ((len + 9u - len_code) << 25));
+  next->distance = (uint32_t)dist;
+  next->dcode_insert_length = (uint32_t)(
+      (short_code << 27) | (pos - start_pos));
+  next->u.cost = cost;
+}
+
+typedef struct PosData {
+  size_t pos;
+  int distance_cache[4];
+  float costdiff;
+  float cost;
+} PosData;
+
+/* Maintains the smallest 8 cost difference together with their positions */
+typedef struct StartPosQueue {
+  PosData q_[8];
+  size_t idx_;
+} StartPosQueue;
+
+static BROTLI_INLINE void InitStartPosQueue(StartPosQueue* self) {
+  self->idx_ = 0;
+}
+
+static size_t StartPosQueueSize(const StartPosQueue* self) {
+  return BROTLI_MIN(size_t, self->idx_, 8);
+}
+
+static void StartPosQueuePush(StartPosQueue* self, const PosData* posdata) {
+  size_t offset = ~(self->idx_++) & 7;
+  size_t len = StartPosQueueSize(self);
+  size_t i;
+  PosData* q = self->q_;
+  q[offset] = *posdata;
+  /* Restore the sorted order. In the list of |len| items at most |len - 1|
+     adjacent element comparisons / swaps are required. */
+  for (i = 1; i < len; ++i) {
+    if (q[offset & 7].costdiff > q[(offset + 1) & 7].costdiff) {
+      BROTLI_SWAP(PosData, q, offset & 7, (offset + 1) & 7);
+    }
+    ++offset;
+  }
+}
+
+static const PosData* StartPosQueueAt(const StartPosQueue* self, size_t k) {
+  return &self->q_[(k - self->idx_) & 7];
+}
+
+/* Returns the minimum possible copy length that can improve the cost of any */
+/* future position. */
+static size_t ComputeMinimumCopyLength(const float start_cost,
+                                       const ZopfliNode* nodes,
+                                       const size_t num_bytes,
+                                       const size_t pos) {
+  /* Compute the minimum possible cost of reaching any future position. */
+  float min_cost = start_cost;
+  size_t len = 2;
+  size_t next_len_bucket = 4;
+  size_t next_len_offset = 10;
+  while (pos + len <= num_bytes && nodes[pos + len].u.cost <= min_cost) {
+    /* We already reached (pos + len) with no more cost than the minimum
+       possible cost of reaching anything from this pos, so there is no point in
+       looking for lengths <= len. */
+    ++len;
+    if (len == next_len_offset) {
+      /* We reached the next copy length code bucket, so we add one more
+         extra bit to the minimum cost. */
+      min_cost += 1.0f;
+      next_len_offset += next_len_bucket;
+      next_len_bucket *= 2;
+    }
+  }
+  return len;
+}
+
+/* REQUIRES: nodes[pos].cost < kInfinity
+   REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
+static uint32_t ComputeDistanceShortcut(const size_t block_start,
+                                        const size_t pos,
+                                        const size_t max_backward_limit,
+                                        const size_t gap,
+                                        const ZopfliNode* nodes) {
+  const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
+  const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF;
+  const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
+  /* Since |block_start + pos| is the end position of the command, the copy part
+     starts from |block_start + pos - clen|. Distances that are greater than
+     this or greater than |max_backward_limit| + |gap| are static dictionary
+     references, and do not update the last distances.
+     Also distance code 0 (last distance) does not update the last distances. */
+  if (pos == 0) {
+    return 0;
+  } else if (dist + clen <= block_start + pos + gap &&
+             dist <= max_backward_limit + gap &&
+             ZopfliNodeDistanceCode(&nodes[pos]) > 0) {
+    return (uint32_t)pos;
+  } else {
+    return nodes[pos - clen - ilen].u.shortcut;
+  }
+}
+
+/* Fills in dist_cache[0..3] with the last four distances (as defined by
+   Section 4. of the Spec) that would be used at (block_start + pos) if we
+   used the shortest path of commands from block_start, computed from
+   nodes[0..pos]. The last four distances at block_start are in
+   starting_dist_cache[0..3].
+   REQUIRES: nodes[pos].cost < kInfinity
+   REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
+static void ComputeDistanceCache(const size_t pos,
+                                 const int* starting_dist_cache,
+                                 const ZopfliNode* nodes,
+                                 int* dist_cache) {
+  int idx = 0;
+  size_t p = nodes[pos].u.shortcut;
+  while (idx < 4 && p > 0) {
+    const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF;
+    const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
+    const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
+    dist_cache[idx++] = (int)dist;
+    /* Because of prerequisite, p >= clen + ilen >= 2. */
+    p = nodes[p - clen - ilen].u.shortcut;
+  }
+  for (; idx < 4; ++idx) {
+    dist_cache[idx] = *starting_dist_cache++;
+  }
+}
+
+/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
+   is eligible. */
+static void EvaluateNode(
+    const size_t block_start, const size_t pos, const size_t max_backward_limit,
+    const size_t gap, const int* starting_dist_cache,
+    const ZopfliCostModel* model, StartPosQueue* queue, ZopfliNode* nodes) {
+  /* Save cost, because ComputeDistanceCache invalidates it. */
+  float node_cost = nodes[pos].u.cost;
+  nodes[pos].u.shortcut = ComputeDistanceShortcut(
+      block_start, pos, max_backward_limit, gap, nodes);
+  if (node_cost <= ZopfliCostModelGetLiteralCosts(model, 0, pos)) {
+    PosData posdata;
+    posdata.pos = pos;
+    posdata.cost = node_cost;
+    posdata.costdiff = node_cost -
+        ZopfliCostModelGetLiteralCosts(model, 0, pos);
+    ComputeDistanceCache(
+        pos, starting_dist_cache, nodes, posdata.distance_cache);
+    StartPosQueuePush(queue, &posdata);
+  }
+}
+
+/* Returns longest copy length. */
+static size_t UpdateNodes(
+    const size_t num_bytes, const size_t block_start, const size_t pos,
+    const uint8_t* ringbuffer, const size_t ringbuffer_mask,
+    const BrotliEncoderParams* params, const size_t max_backward_limit,
+    const int* starting_dist_cache, const size_t num_matches,
+    const BackwardMatch* matches, const ZopfliCostModel* model,
+    StartPosQueue* queue, ZopfliNode* nodes) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t cur_ix = block_start + pos;
+  const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
+  const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
+  const size_t dictionary_start = BROTLI_MIN(size_t,
+      cur_ix + stream_offset, max_backward_limit);
+  const size_t max_len = num_bytes - pos;
+  const size_t max_zopfli_len = MaxZopfliLen(params);
+  const size_t max_iters = MaxZopfliCandidates(params);
+  size_t min_len;
+  size_t result = 0;
+  size_t k;
+  size_t gap = 0;
+
+  EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
+      starting_dist_cache, model, queue, nodes);
+
+  {
+    const PosData* posdata = StartPosQueueAt(queue, 0);
+    float min_cost = (posdata->cost + ZopfliCostModelGetMinCostCmd(model) +
+        ZopfliCostModelGetLiteralCosts(model, posdata->pos, pos));
+    min_len = ComputeMinimumCopyLength(min_cost, nodes, num_bytes, pos);
+  }
+
+  /* Go over the command starting positions in order of increasing cost
+     difference. */
+  for (k = 0; k < max_iters && k < StartPosQueueSize(queue); ++k) {
+    const PosData* posdata = StartPosQueueAt(queue, k);
+    const size_t start = posdata->pos;
+    const uint16_t inscode = GetInsertLengthCode(pos - start);
+    const float start_costdiff = posdata->costdiff;
+    const float base_cost = start_costdiff + (float)GetInsertExtra(inscode) +
+        ZopfliCostModelGetLiteralCosts(model, 0, pos);
+
+    /* Look for last distance matches using the distance cache from this
+       starting position. */
+    size_t best_len = min_len - 1;
+    size_t j = 0;
+    for (; j < BROTLI_NUM_DISTANCE_SHORT_CODES && best_len < max_len; ++j) {
+      const size_t idx = kDistanceCacheIndex[j];
+      const size_t backward =
+          (size_t)(posdata->distance_cache[idx] + kDistanceCacheOffset[j]);
+      size_t prev_ix = cur_ix - backward;
+      size_t len = 0;
+      uint8_t continuation = ringbuffer[cur_ix_masked + best_len];
+      if (cur_ix_masked + best_len > ringbuffer_mask) {
+        break;
+      }
+      if (BROTLI_PREDICT_FALSE(backward > dictionary_start + gap)) {
+        /* Word dictionary -> ignore. */
+        continue;
+      }
+      if (backward <= max_distance) {
+        /* Regular backward reference. */
+        if (prev_ix >= cur_ix) {
+          continue;
+        }
+
+        prev_ix &= ringbuffer_mask;
+        if (prev_ix + best_len > ringbuffer_mask ||
+            continuation != ringbuffer[prev_ix + best_len]) {
+          continue;
+        }
+        len = FindMatchLengthWithLimit(&ringbuffer[prev_ix],
+                                       &ringbuffer[cur_ix_masked],
+                                       max_len);
+      } else {
+        /* "Gray" area. It is addressable by decoder, but this encoder
+           instance does not have that data -> should not touch it. */
+        continue;
+      }
+      {
+        const float dist_cost = base_cost +
+            ZopfliCostModelGetDistanceCost(model, j);
+        size_t l;
+        for (l = best_len + 1; l <= len; ++l) {
+          const uint16_t copycode = GetCopyLengthCode(l);
+          const uint16_t cmdcode =
+              CombineLengthCodes(inscode, copycode, j == 0);
+          const float cost = (cmdcode < 128 ? base_cost : dist_cost) +
+              (float)GetCopyExtra(copycode) +
+              ZopfliCostModelGetCommandCost(model, cmdcode);
+          if (cost < nodes[pos + l].u.cost) {
+            UpdateZopfliNode(nodes, pos, start, l, l, backward, j + 1, cost);
+            result = BROTLI_MAX(size_t, result, l);
+          }
+          best_len = l;
+        }
+      }
+    }
+
+    /* At higher iterations look only for new last distance matches, since
+       looking only for new command start positions with the same distances
+       does not help much. */
+    if (k >= 2) continue;
+
+    {
+      /* Loop through all possible copy lengths at this position. */
+      size_t len = min_len;
+      for (j = 0; j < num_matches; ++j) {
+        BackwardMatch match = matches[j];
+        size_t dist = match.distance;
+        BROTLI_BOOL is_dictionary_match =
+            TO_BROTLI_BOOL(dist > dictionary_start + gap);
+        /* We already tried all possible last distance matches, so we can use
+           normal distance code here. */
+        size_t dist_code = dist + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
+        uint16_t dist_symbol;
+        uint32_t distextra;
+        uint32_t distnumextra;
+        float dist_cost;
+        size_t max_match_len;
+        PrefixEncodeCopyDistance(
+            dist_code, params->dist.num_direct_distance_codes,
+            params->dist.distance_postfix_bits, &dist_symbol, &distextra);
+        distnumextra = dist_symbol >> 10;
+        dist_cost = base_cost + (float)distnumextra +
+            ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF);
+
+        /* Try all copy lengths up until the maximum copy length corresponding
+           to this distance. If the distance refers to the static dictionary, or
+           the maximum length is long enough, try only one maximum length. */
+        max_match_len = BackwardMatchLength(&match);
+        if (len < max_match_len &&
+            (is_dictionary_match || max_match_len > max_zopfli_len)) {
+          len = max_match_len;
+        }
+        for (; len <= max_match_len; ++len) {
+          const size_t len_code =
+              is_dictionary_match ? BackwardMatchLengthCode(&match) : len;
+          const uint16_t copycode = GetCopyLengthCode(len_code);
+          const uint16_t cmdcode = CombineLengthCodes(inscode, copycode, 0);
+          const float cost = dist_cost + (float)GetCopyExtra(copycode) +
+              ZopfliCostModelGetCommandCost(model, cmdcode);
+          if (cost < nodes[pos + len].u.cost) {
+            UpdateZopfliNode(nodes, pos, start, len, len_code, dist, 0, cost);
+            result = BROTLI_MAX(size_t, result, len);
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+
+static size_t ComputeShortestPathFromNodes(size_t num_bytes,
+    ZopfliNode* nodes) {
+  size_t index = num_bytes;
+  size_t num_commands = 0;
+  while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 &&
+      nodes[index].length == 1) --index;
+  nodes[index].u.next = BROTLI_UINT32_MAX;
+  while (index != 0) {
+    size_t len = ZopfliNodeCommandLength(&nodes[index]);
+    index -= len;
+    nodes[index].u.next = (uint32_t)len;
+    num_commands++;
+  }
+  return num_commands;
+}
+
+/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
+void BrotliZopfliCreateCommands(const size_t num_bytes,
+    const size_t block_start, const ZopfliNode* nodes, int* dist_cache,
+    size_t* last_insert_len, const BrotliEncoderParams* params,
+    Command* commands, size_t* num_literals) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  size_t pos = 0;
+  uint32_t offset = nodes[0].u.next;
+  size_t i;
+  size_t gap = 0;
+  for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
+    const ZopfliNode* next = &nodes[pos + offset];
+    size_t copy_length = ZopfliNodeCopyLength(next);
+    size_t insert_length = next->dcode_insert_length & 0x7FFFFFF;
+    pos += insert_length;
+    offset = next->u.next;
+    if (i == 0) {
+      insert_length += *last_insert_len;
+      *last_insert_len = 0;
+    }
+    {
+      size_t distance = ZopfliNodeCopyDistance(next);
+      size_t len_code = ZopfliNodeLengthCode(next);
+      size_t dictionary_start = BROTLI_MIN(size_t,
+          block_start + pos + stream_offset, max_backward_limit);
+      BROTLI_BOOL is_dictionary =
+          TO_BROTLI_BOOL(distance > dictionary_start + gap);
+      size_t dist_code = ZopfliNodeDistanceCode(next);
+      InitCommand(&commands[i], &params->dist, insert_length,
+          copy_length, (int)len_code - (int)copy_length, dist_code);
+
+      if (!is_dictionary && dist_code > 0) {
+        dist_cache[3] = dist_cache[2];
+        dist_cache[2] = dist_cache[1];
+        dist_cache[1] = dist_cache[0];
+        dist_cache[0] = (int)distance;
+      }
+    }
+
+    *num_literals += insert_length;
+    pos += copy_length;
+  }
+  *last_insert_len += num_bytes - pos;
+}
+
+static size_t ZopfliIterate(size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    const BrotliEncoderParams* params, const size_t gap, const int* dist_cache,
+    const ZopfliCostModel* model, const uint32_t* num_matches,
+    const BackwardMatch* matches, ZopfliNode* nodes) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  const size_t max_zopfli_len = MaxZopfliLen(params);
+  StartPosQueue queue;
+  size_t cur_match_pos = 0;
+  size_t i;
+  nodes[0].length = 0;
+  nodes[0].u.cost = 0;
+  InitStartPosQueue(&queue);
+  for (i = 0; i + 3 < num_bytes; i++) {
+    size_t skip = UpdateNodes(num_bytes, position, i, ringbuffer,
+        ringbuffer_mask, params, max_backward_limit, dist_cache,
+        num_matches[i], &matches[cur_match_pos], model, &queue, nodes);
+    if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
+    cur_match_pos += num_matches[i];
+    if (num_matches[i] == 1 &&
+        BackwardMatchLength(&matches[cur_match_pos - 1]) > max_zopfli_len) {
+      skip = BROTLI_MAX(size_t,
+          BackwardMatchLength(&matches[cur_match_pos - 1]), skip);
+    }
+    if (skip > 1) {
+      skip--;
+      while (skip) {
+        i++;
+        if (i + 3 >= num_bytes) break;
+        EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
+            dist_cache, model, &queue, nodes);
+        cur_match_pos += num_matches[i];
+        skip--;
+      }
+    }
+  }
+  return ComputeShortestPathFromNodes(num_bytes, nodes);
+}
+
+/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
+size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    const int* dist_cache, Hasher* hasher, ZopfliNode* nodes) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  const size_t max_zopfli_len = MaxZopfliLen(params);
+  ZopfliCostModel model;
+  StartPosQueue queue;
+  BackwardMatch matches[2 * (MAX_NUM_MATCHES_H10 + 64)];
+  const size_t store_end = num_bytes >= StoreLookaheadH10() ?
+      position + num_bytes - StoreLookaheadH10() + 1 : position;
+  size_t i;
+  size_t gap = 0;
+  size_t lz_matches_offset = 0;
+  BROTLI_UNUSED(literal_context_lut);
+  nodes[0].length = 0;
+  nodes[0].u.cost = 0;
+  InitZopfliCostModel(m, &model, &params->dist, num_bytes);
+  if (BROTLI_IS_OOM(m)) return 0;
+  ZopfliCostModelSetFromLiteralCosts(
+      &model, position, ringbuffer, ringbuffer_mask);
+  InitStartPosQueue(&queue);
+  for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
+    const size_t pos = position + i;
+    const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
+    const size_t dictionary_start = BROTLI_MIN(size_t,
+        pos + stream_offset, max_backward_limit);
+    size_t skip;
+    size_t num_matches;
+    num_matches = FindAllMatchesH10(&hasher->privat._H10,
+        &params->dictionary,
+        ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
+        dictionary_start + gap, params, &matches[lz_matches_offset]);
+    if (num_matches > 0 &&
+        BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
+      matches[0] = matches[num_matches - 1];
+      num_matches = 1;
+    }
+    skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
+        params, max_backward_limit, dist_cache, num_matches, matches, &model,
+        &queue, nodes);
+    if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
+    if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
+      skip = BROTLI_MAX(size_t, BackwardMatchLength(&matches[0]), skip);
+    }
+    if (skip > 1) {
+      /* Add the tail of the copy to the hasher. */
+      StoreRangeH10(&hasher->privat._H10,
+          ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
+          size_t, pos + skip, store_end));
+      skip--;
+      while (skip) {
+        i++;
+        if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
+        EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
+            dist_cache, &model, &queue, nodes);
+        skip--;
+      }
+    }
+  }
+  CleanupZopfliCostModel(m, &model);
+  return ComputeShortestPathFromNodes(num_bytes, nodes);
+}
+
+void BrotliCreateZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
+  BrotliInitZopfliNodes(nodes, num_bytes + 1);
+  *num_commands += BrotliZopfliComputeShortestPath(m, num_bytes,
+      position, ringbuffer, ringbuffer_mask, literal_context_lut, params,
+      dist_cache, hasher, nodes);
+  if (BROTLI_IS_OOM(m)) return;
+  BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
+      last_insert_len, params, commands, num_literals);
+  BROTLI_FREE(m, nodes);
+}
+
+void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  const size_t stream_offset = params->stream_offset;
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
+  size_t matches_size = 4 * num_bytes;
+  const size_t store_end = num_bytes >= StoreLookaheadH10() ?
+      position + num_bytes - StoreLookaheadH10() + 1 : position;
+  size_t cur_match_pos = 0;
+  size_t i;
+  size_t orig_num_literals;
+  size_t orig_last_insert_len;
+  int orig_dist_cache[4];
+  size_t orig_num_commands;
+  ZopfliCostModel model;
+  ZopfliNode* nodes;
+  BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
+  size_t gap = 0;
+  size_t shadow_matches = 0;
+  BROTLI_UNUSED(literal_context_lut);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(num_matches) ||
+      BROTLI_IS_NULL(matches)) {
+    return;
+  }
+  for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
+    const size_t pos = position + i;
+    size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
+    size_t dictionary_start = BROTLI_MIN(size_t,
+        pos + stream_offset, max_backward_limit);
+    size_t max_length = num_bytes - i;
+    size_t num_found_matches;
+    size_t cur_match_end;
+    size_t j;
+    /* Ensure that we have enough free slots. */
+    BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
+        cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
+    if (BROTLI_IS_OOM(m)) return;
+    num_found_matches = FindAllMatchesH10(&hasher->privat._H10,
+        &params->dictionary,
+        ringbuffer, ringbuffer_mask, pos, max_length,
+        max_distance, dictionary_start + gap, params,
+        &matches[cur_match_pos + shadow_matches]);
+    cur_match_end = cur_match_pos + num_found_matches;
+    for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
+      BROTLI_DCHECK(BackwardMatchLength(&matches[j]) <=
+          BackwardMatchLength(&matches[j + 1]));
+    }
+    num_matches[i] = (uint32_t)num_found_matches;
+    if (num_found_matches > 0) {
+      const size_t match_len = BackwardMatchLength(&matches[cur_match_end - 1]);
+      if (match_len > MAX_ZOPFLI_LEN_QUALITY_11) {
+        const size_t skip = match_len - 1;
+        matches[cur_match_pos++] = matches[cur_match_end - 1];
+        num_matches[i] = 1;
+        /* Add the tail of the copy to the hasher. */
+        StoreRangeH10(&hasher->privat._H10,
+                      ringbuffer, ringbuffer_mask, pos + 1,
+                      BROTLI_MIN(size_t, pos + match_len, store_end));
+        memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
+        i += skip;
+      } else {
+        cur_match_pos = cur_match_end;
+      }
+    }
+  }
+  orig_num_literals = *num_literals;
+  orig_last_insert_len = *last_insert_len;
+  memcpy(orig_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
+  orig_num_commands = *num_commands;
+  nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
+  InitZopfliCostModel(m, &model, &params->dist, num_bytes);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < 2; i++) {
+    BrotliInitZopfliNodes(nodes, num_bytes + 1);
+    if (i == 0) {
+      ZopfliCostModelSetFromLiteralCosts(
+          &model, position, ringbuffer, ringbuffer_mask);
+    } else {
+      ZopfliCostModelSetFromCommands(&model, position, ringbuffer,
+          ringbuffer_mask, commands, *num_commands - orig_num_commands,
+          orig_last_insert_len);
+    }
+    *num_commands = orig_num_commands;
+    *num_literals = orig_num_literals;
+    *last_insert_len = orig_last_insert_len;
+    memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
+    *num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
+        ringbuffer_mask, params, gap, dist_cache, &model, num_matches, matches,
+        nodes);
+    BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
+        last_insert_len, params, commands, num_literals);
+  }
+  CleanupZopfliCostModel(m, &model);
+  BROTLI_FREE(m, nodes);
+  BROTLI_FREE(m, matches);
+  BROTLI_FREE(m, num_matches);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.h
new file mode 100644
index 0000000000..36b75f250d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_hq.h
@@ -0,0 +1,95 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./hash.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
+    size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals);
+
+BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
+    size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals);
+
+typedef struct ZopfliNode {
+  /* Best length to get up to this byte (not including this byte itself)
+     highest 7 bit is used to reconstruct the length code. */
+  uint32_t length;
+  /* Distance associated with the length. */
+  uint32_t distance;
+  /* Number of literal inserts before this copy; highest 5 bits contain
+     distance short code + 1 (or zero if no short code). */
+  uint32_t dcode_insert_length;
+
+  /* This union holds information used by dynamic-programming. During forward
+     pass |cost| it used to store the goal function. When node is processed its
+     |cost| is invalidated in favor of |shortcut|. On path back-tracing pass
+     |next| is assigned the offset to next node on the path. */
+  union {
+    /* Smallest cost to get to this byte from the beginning, as found so far. */
+    float cost;
+    /* Offset to the next node on the path. Equals to command_length() of the
+       next node on the path. For last node equals to BROTLI_UINT32_MAX */
+    uint32_t next;
+    /* Node position that provides next distance for distance cache. */
+    uint32_t shortcut;
+  } u;
+} ZopfliNode;
+
+BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
+
+/* Computes the shortest path of commands from position to at most
+   position + num_bytes.
+
+   On return, path->size() is the number of commands found and path[i] is the
+   length of the i-th command (copy length plus insert length).
+   Note that the sum of the lengths of all commands can be less than num_bytes.
+
+   On return, the nodes[0..num_bytes] array will have the following
+   "ZopfliNode array invariant":
+   For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
+     (1) nodes[i].copy_length() >= 2
+     (2) nodes[i].command_length() <= i and
+     (3) nodes[i - nodes[i].command_length()].cost < kInfinity */
+BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
+    MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    const int* dist_cache, Hasher* hasher, ZopfliNode* nodes);
+
+BROTLI_INTERNAL void BrotliZopfliCreateCommands(
+    const size_t num_bytes, const size_t block_start, const ZopfliNode* nodes,
+    int* dist_cache, size_t* last_insert_len, const BrotliEncoderParams* params,
+    Command* commands, size_t* num_literals);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_inc.h
new file mode 100644
index 0000000000..766bf91ffd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/backward_references_inc.h
@@ -0,0 +1,163 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: EXPORT_FN, FN */
+
+static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
+    size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    ContextLut literal_context_lut, const BrotliEncoderParams* params,
+    Hasher* hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  HASHER()* privat = &hasher->privat.FN(_);
+  /* Set maximum distance, see section 9.1. of the spec. */
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  const size_t position_offset = params->stream_offset;
+
+  const Command* const orig_commands = commands;
+  size_t insert_length = *last_insert_len;
+  const size_t pos_end = position + num_bytes;
+  const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
+      position + num_bytes - FN(StoreLookahead)() + 1 : position;
+
+  /* For speed up heuristics for random data. */
+  const size_t random_heuristics_window_size =
+      LiteralSpreeLengthForSparseSearch(params);
+  size_t apply_random_heuristics = position + random_heuristics_window_size;
+  const size_t gap = 0;
+
+  /* Minimum score to accept a backward reference. */
+  const score_t kMinScore = BROTLI_SCORE_BASE + 100;
+
+  BROTLI_UNUSED(literal_context_lut);
+
+  FN(PrepareDistanceCache)(privat, dist_cache);
+
+  while (position + FN(HashTypeLength)() < pos_end) {
+    size_t max_length = pos_end - position;
+    size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
+    size_t dictionary_start = BROTLI_MIN(size_t,
+        position + position_offset, max_backward_limit);
+    HasherSearchResult sr;
+    sr.len = 0;
+    sr.len_code_delta = 0;
+    sr.distance = 0;
+    sr.score = kMinScore;
+    FN(FindLongestMatch)(privat, &params->dictionary,
+        ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
+        max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
+    if (sr.score > kMinScore) {
+      /* Found a match. Let's look for something even better ahead. */
+      int delayed_backward_references_in_row = 0;
+      --max_length;
+      for (;; --max_length) {
+        const score_t cost_diff_lazy = 175;
+        HasherSearchResult sr2;
+        sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
+            BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
+        sr2.len_code_delta = 0;
+        sr2.distance = 0;
+        sr2.score = kMinScore;
+        max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
+        dictionary_start = BROTLI_MIN(size_t,
+            position + 1 + position_offset, max_backward_limit);
+        FN(FindLongestMatch)(privat,
+            &params->dictionary,
+            ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
+            max_distance, dictionary_start + gap, params->dist.max_distance,
+            &sr2);
+        if (sr2.score >= sr.score + cost_diff_lazy) {
+          /* Ok, let's just write one byte for now and start a match from the
+             next byte. */
+          ++position;
+          ++insert_length;
+          sr = sr2;
+          if (++delayed_backward_references_in_row < 4 &&
+              position + FN(HashTypeLength)() < pos_end) {
+            continue;
+          }
+        }
+        break;
+      }
+      apply_random_heuristics =
+          position + 2 * sr.len + random_heuristics_window_size;
+      dictionary_start = BROTLI_MIN(size_t,
+          position + position_offset, max_backward_limit);
+      {
+        /* The first 16 codes are special short-codes,
+           and the minimum offset is 1. */
+        size_t distance_code = ComputeDistanceCode(
+            sr.distance, dictionary_start + gap, dist_cache);
+        if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
+          dist_cache[3] = dist_cache[2];
+          dist_cache[2] = dist_cache[1];
+          dist_cache[1] = dist_cache[0];
+          dist_cache[0] = (int)sr.distance;
+          FN(PrepareDistanceCache)(privat, dist_cache);
+        }
+        InitCommand(commands++, &params->dist, insert_length,
+            sr.len, sr.len_code_delta, distance_code);
+      }
+      *num_literals += insert_length;
+      insert_length = 0;
+      /* Put the hash keys into the table, if there are enough bytes left.
+         Depending on the hasher implementation, it can push all positions
+         in the given range or only a subset of them.
+         Avoid hash poisoning with RLE data. */
+      {
+        size_t range_start = position + 2;
+        size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
+        if (sr.distance < (sr.len >> 2)) {
+          range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
+              range_start, position + sr.len - (sr.distance << 2)));
+        }
+        FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
+                       range_end);
+      }
+      position += sr.len;
+    } else {
+      ++insert_length;
+      ++position;
+      /* If we have not seen matches for a long time, we can skip some
+         match lookups. Unsuccessful match lookups are very very expensive
+         and this kind of a heuristic speeds up compression quite
+         a lot. */
+      if (position > apply_random_heuristics) {
+        /* Going through uncompressible data, jump. */
+        if (position >
+            apply_random_heuristics + 4 * random_heuristics_window_size) {
+          /* It is quite a long time since we saw a copy, so we assume
+             that this data is not compressible, and store hashes less
+             often. Hashes of non compressible data are less likely to
+             turn out to be useful in the future, too, so we store less of
+             them to not to flood out the hash table of good compressible
+             data. */
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
+          for (; position < pos_jump; position += 4) {
+            FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
+            insert_length += 4;
+          }
+        } else {
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
+          for (; position < pos_jump; position += 2) {
+            FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
+            insert_length += 2;
+          }
+        }
+      }
+    }
+  }
+  insert_length += pos_end - position;
+  *last_insert_len = insert_length;
+  *num_commands += (size_t)(commands - orig_commands);
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c
new file mode 100644
index 0000000000..1f3f7ad5c9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.c
@@ -0,0 +1,35 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to estimate the bit cost of Huffman trees. */
+
+#include "./bit_cost.h"
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+#include "./histogram.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define FN(X) X ## Literal
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.h
new file mode 100644
index 0000000000..6586469e62
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost.h
@@ -0,0 +1,63 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to estimate the bit cost of Huffman trees. */
+
+#ifndef BROTLI_ENC_BIT_COST_H_
+#define BROTLI_ENC_BIT_COST_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+#include "./histogram.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE double ShannonEntropy(
+    const uint32_t* population, size_t size, size_t* total) {
+  size_t sum = 0;
+  double retval = 0;
+  const uint32_t* population_end = population + size;
+  size_t p;
+  if (size & 1) {
+    goto odd_number_of_elements_left;
+  }
+  while (population < population_end) {
+    p = *population++;
+    sum += p;
+    retval -= (double)p * FastLog2(p);
+ odd_number_of_elements_left:
+    p = *population++;
+    sum += p;
+    retval -= (double)p * FastLog2(p);
+  }
+  if (sum) retval += (double)sum * FastLog2(sum);
+  *total = sum;
+  return retval;
+}
+
+static BROTLI_INLINE double BitsEntropy(
+    const uint32_t* population, size_t size) {
+  size_t sum;
+  double retval = ShannonEntropy(population, size, &sum);
+  if (retval < sum) {
+    /* At least one bit per literal is needed. */
+    retval = (double)sum;
+  }
+  return retval;
+}
+
+BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
+BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
+BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BIT_COST_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost_inc.h
new file mode 100644
index 0000000000..453c226042
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/bit_cost_inc.h
@@ -0,0 +1,127 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+double FN(BrotliPopulationCost)(const HistogramType* histogram) {
+  static const double kOneSymbolHistogramCost = 12;
+  static const double kTwoSymbolHistogramCost = 20;
+  static const double kThreeSymbolHistogramCost = 28;
+  static const double kFourSymbolHistogramCost = 37;
+  const size_t data_size = FN(HistogramDataSize)();
+  int count = 0;
+  size_t s[5];
+  double bits = 0.0;
+  size_t i;
+  if (histogram->total_count_ == 0) {
+    return kOneSymbolHistogramCost;
+  }
+  for (i = 0; i < data_size; ++i) {
+    if (histogram->data_[i] > 0) {
+      s[count] = i;
+      ++count;
+      if (count > 4) break;
+    }
+  }
+  if (count == 1) {
+    return kOneSymbolHistogramCost;
+  }
+  if (count == 2) {
+    return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
+  }
+  if (count == 3) {
+    const uint32_t histo0 = histogram->data_[s[0]];
+    const uint32_t histo1 = histogram->data_[s[1]];
+    const uint32_t histo2 = histogram->data_[s[2]];
+    const uint32_t histomax =
+        BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
+    return (kThreeSymbolHistogramCost +
+            2 * (histo0 + histo1 + histo2) - histomax);
+  }
+  if (count == 4) {
+    uint32_t histo[4];
+    uint32_t h23;
+    uint32_t histomax;
+    for (i = 0; i < 4; ++i) {
+      histo[i] = histogram->data_[s[i]];
+    }
+    /* Sort */
+    for (i = 0; i < 4; ++i) {
+      size_t j;
+      for (j = i + 1; j < 4; ++j) {
+        if (histo[j] > histo[i]) {
+          BROTLI_SWAP(uint32_t, histo, j, i);
+        }
+      }
+    }
+    h23 = histo[2] + histo[3];
+    histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
+    return (kFourSymbolHistogramCost +
+            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
+  }
+
+  {
+    /* In this loop we compute the entropy of the histogram and simultaneously
+       build a simplified histogram of the code length codes where we use the
+       zero repeat code 17, but we don't use the non-zero repeat code 16. */
+    size_t max_depth = 1;
+    uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
+    const double log2total = FastLog2(histogram->total_count_);
+    for (i = 0; i < data_size;) {
+      if (histogram->data_[i] > 0) {
+        /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
+                                    = log2(total_count) - log2(count(symbol)) */
+        double log2p = log2total - FastLog2(histogram->data_[i]);
+        /* Approximate the bit depth by round(-log2(P(symbol))) */
+        size_t depth = (size_t)(log2p + 0.5);
+        bits += histogram->data_[i] * log2p;
+        if (depth > 15) {
+          depth = 15;
+        }
+        if (depth > max_depth) {
+          max_depth = depth;
+        }
+        ++depth_histo[depth];
+        ++i;
+      } else {
+        /* Compute the run length of zeros and add the appropriate number of 0
+           and 17 code length codes to the code length code histogram. */
+        uint32_t reps = 1;
+        size_t k;
+        for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
+          ++reps;
+        }
+        i += reps;
+        if (i == data_size) {
+          /* Don't add any cost for the last zero run, since these are encoded
+             only implicitly. */
+          break;
+        }
+        if (reps < 3) {
+          depth_histo[0] += reps;
+        } else {
+          reps -= 2;
+          while (reps > 0) {
+            ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
+            /* Add the 3 extra bits for the 17 code length code. */
+            bits += 3;
+            reps >>= 3;
+          }
+        }
+      }
+    }
+    /* Add the estimated encoding cost of the code length code histogram. */
+    bits += (double)(18 + 2 * max_depth);
+    /* Add the entropy of the code length code histogram. */
+    bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
+  }
+  return bits;
+}
+
+#undef HistogramType
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_encoder_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_encoder_inc.h
new file mode 100644
index 0000000000..8cbd5eac67
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_encoder_inc.h
@@ -0,0 +1,34 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Creates entropy codes for all block types and stores them to the bit
+   stream. */
+static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
+    const HistogramType* histograms, const size_t histograms_size,
+    const size_t alphabet_size, HuffmanTree* tree,
+    size_t* storage_ix, uint8_t* storage) {
+  const size_t table_size = histograms_size * self->histogram_length_;
+  self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
+  self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  {
+    size_t i;
+    for (i = 0; i < histograms_size; ++i) {
+      size_t ix = i * self->histogram_length_;
+      BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_,
+          alphabet_size, tree, &self->depths_[ix], &self->bits_[ix],
+          storage_ix, storage);
+    }
+  }
+}
+
+#undef HistogramType
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c
new file mode 100644
index 0000000000..deb7c2e151
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.c
@@ -0,0 +1,194 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Block split point selection utilities. */
+
+#include "./block_splitter.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/platform.h"
+#include "./bit_cost.h"
+#include "./cluster.h"
+#include "./command.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const size_t kMaxLiteralHistograms = 100;
+static const size_t kMaxCommandHistograms = 50;
+static const double kLiteralBlockSwitchCost = 28.1;
+static const double kCommandBlockSwitchCost = 13.5;
+static const double kDistanceBlockSwitchCost = 14.6;
+static const size_t kLiteralStrideLength = 70;
+static const size_t kCommandStrideLength = 40;
+static const size_t kSymbolsPerLiteralHistogram = 544;
+static const size_t kSymbolsPerCommandHistogram = 530;
+static const size_t kSymbolsPerDistanceHistogram = 544;
+static const size_t kMinLengthForBlockSplitting = 128;
+static const size_t kIterMulForRefining = 2;
+static const size_t kMinItersForRefining = 100;
+
+static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
+  /* Count how many we have. */
+  size_t total_length = 0;
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
+    total_length += cmds[i].insert_len_;
+  }
+  return total_length;
+}
+
+static void CopyLiteralsToByteArray(const Command* cmds,
+                                    const size_t num_commands,
+                                    const uint8_t* data,
+                                    const size_t offset,
+                                    const size_t mask,
+                                    uint8_t* literals) {
+  size_t pos = 0;
+  size_t from_pos = offset & mask;
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
+    size_t insert_len = cmds[i].insert_len_;
+    if (from_pos + insert_len > mask) {
+      size_t head_size = mask + 1 - from_pos;
+      memcpy(literals + pos, data + from_pos, head_size);
+      from_pos = 0;
+      pos += head_size;
+      insert_len -= head_size;
+    }
+    if (insert_len > 0) {
+      memcpy(literals + pos, data + from_pos, insert_len);
+      pos += insert_len;
+    }
+    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
+  }
+}
+
+static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
+  /* Initial seed should be 7. In this case, loop length is (1 << 29). */
+  *seed *= 16807U;
+  return *seed;
+}
+
+static BROTLI_INLINE double BitCost(size_t count) {
+  return count == 0 ? -2.0 : FastLog2(count);
+}
+
+#define HISTOGRAMS_PER_BATCH 64
+#define CLUSTERS_PER_BATCH 16
+
+#define FN(X) X ## Literal
+#define DataType uint8_t
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+#define FN(X) X ## Command
+#define DataType uint16_t
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef FN
+
+#define FN(X) X ## Distance
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+void BrotliInitBlockSplit(BlockSplit* self) {
+  self->num_types = 0;
+  self->num_blocks = 0;
+  self->types = 0;
+  self->lengths = 0;
+  self->types_alloc_size = 0;
+  self->lengths_alloc_size = 0;
+}
+
+void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
+  BROTLI_FREE(m, self->types);
+  BROTLI_FREE(m, self->lengths);
+}
+
+void BrotliSplitBlock(MemoryManager* m,
+                      const Command* cmds,
+                      const size_t num_commands,
+                      const uint8_t* data,
+                      const size_t pos,
+                      const size_t mask,
+                      const BrotliEncoderParams* params,
+                      BlockSplit* literal_split,
+                      BlockSplit* insert_and_copy_split,
+                      BlockSplit* dist_split) {
+  {
+    size_t literals_count = CountLiterals(cmds, num_commands);
+    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
+    /* Create a continuous array of literals. */
+    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
+    /* Create the block split on the array of literals.
+       Literal histograms have alphabet size 256. */
+    SplitByteVectorLiteral(
+        m, literals, literals_count,
+        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
+        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
+        literal_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, literals);
+  }
+
+  {
+    /* Compute prefix codes for commands. */
+    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t i;
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
+    for (i = 0; i < num_commands; ++i) {
+      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
+    }
+    /* Create the block split on the array of command prefixes. */
+    SplitByteVectorCommand(
+        m, insert_and_copy_codes, num_commands,
+        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
+        kCommandStrideLength, kCommandBlockSwitchCost, params,
+        insert_and_copy_split);
+    if (BROTLI_IS_OOM(m)) return;
+    /* TODO: reuse for distances? */
+    BROTLI_FREE(m, insert_and_copy_codes);
+  }
+
+  {
+    /* Create a continuous array of distance prefixes. */
+    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t j = 0;
+    size_t i;
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
+    for (i = 0; i < num_commands; ++i) {
+      const Command* cmd = &cmds[i];
+      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+        distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
+      }
+    }
+    /* Create the block split on the array of distance prefixes. */
+    SplitByteVectorDistance(
+        m, distance_prefixes, j,
+        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
+        kCommandStrideLength, kDistanceBlockSwitchCost, params,
+        dist_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, distance_prefixes);
+  }
+}
+
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.h
new file mode 100644
index 0000000000..a5e006c4b3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter.h
@@ -0,0 +1,51 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Block split point selection utilities. */
+
+#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
+#define BROTLI_ENC_BLOCK_SPLITTER_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BlockSplit {
+  size_t num_types;  /* Amount of distinct types */
+  size_t num_blocks;  /* Amount of values in types and length */
+  uint8_t* types;
+  uint32_t* lengths;
+
+  size_t types_alloc_size;
+  size_t lengths_alloc_size;
+} BlockSplit;
+
+BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
+BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
+                                             BlockSplit* self);
+
+BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
+                                      const Command* cmds,
+                                      const size_t num_commands,
+                                      const uint8_t* data,
+                                      const size_t offset,
+                                      const size_t mask,
+                                      const BrotliEncoderParams* params,
+                                      BlockSplit* literal_split,
+                                      BlockSplit* insert_and_copy_split,
+                                      BlockSplit* dist_split);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter_inc.h
new file mode 100644
index 0000000000..e612d6a370
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/block_splitter_inc.h
@@ -0,0 +1,440 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, DataType */
+
+#define HistogramType FN(Histogram)
+
+static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
+                                    size_t stride,
+                                    size_t num_histograms,
+                                    HistogramType* histograms) {
+  uint32_t seed = 7;
+  size_t block_length = length / num_histograms;
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    size_t pos = length * i / num_histograms;
+    if (i != 0) {
+      pos += MyRand(&seed) % block_length;
+    }
+    if (pos + stride >= length) {
+      pos = length - stride - 1;
+    }
+    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
+  }
+}
+
+static void FN(RandomSample)(uint32_t* seed,
+                             const DataType* data,
+                             size_t length,
+                             size_t stride,
+                             HistogramType* sample) {
+  size_t pos = 0;
+  if (stride >= length) {
+    stride = length;
+  } else {
+    pos = MyRand(seed) % (length - stride + 1);
+  }
+  FN(HistogramAddVector)(sample, data + pos, stride);
+}
+
+static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
+                                   size_t stride,
+                                   size_t num_histograms,
+                                   HistogramType* histograms) {
+  size_t iters =
+      kIterMulForRefining * length / stride + kMinItersForRefining;
+  uint32_t seed = 7;
+  size_t iter;
+  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
+  for (iter = 0; iter < iters; ++iter) {
+    HistogramType sample;
+    FN(HistogramClear)(&sample);
+    FN(RandomSample)(&seed, data, length, stride, &sample);
+    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
+  }
+}
+
+/* Assigns a block id from the range [0, num_histograms) to each data element
+   in data[0..length) and fills in block_id[0..length) with the assigned values.
+   Returns the number of blocks, i.e. one plus the number of block switches. */
+static size_t FN(FindBlocks)(const DataType* data, const size_t length,
+                             const double block_switch_bitcost,
+                             const size_t num_histograms,
+                             const HistogramType* histograms,
+                             double* insert_cost,
+                             double* cost,
+                             uint8_t* switch_signal,
+                             uint8_t* block_id) {
+  const size_t data_size = FN(HistogramDataSize)();
+  const size_t bitmaplen = (num_histograms + 7) >> 3;
+  size_t num_blocks = 1;
+  size_t i;
+  size_t j;
+  BROTLI_DCHECK(num_histograms <= 256);
+  if (num_histograms <= 1) {
+    for (i = 0; i < length; ++i) {
+      block_id[i] = 0;
+    }
+    return 1;
+  }
+  memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
+  }
+  for (i = data_size; i != 0;) {
+    --i;
+    for (j = 0; j < num_histograms; ++j) {
+      insert_cost[i * num_histograms + j] =
+          insert_cost[j] - BitCost(histograms[j].data_[i]);
+    }
+  }
+  memset(cost, 0, sizeof(cost[0]) * num_histograms);
+  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
+  /* After each iteration of this loop, cost[k] will contain the difference
+     between the minimum cost of arriving at the current byte position using
+     entropy code k, and the minimum cost of arriving at the current byte
+     position. This difference is capped at the block switch cost, and if it
+     reaches block switch cost, it means that when we trace back from the last
+     position, we need to switch here. */
+  for (i = 0; i < length; ++i) {
+    const size_t byte_ix = i;
+    size_t ix = byte_ix * bitmaplen;
+    size_t insert_cost_ix = data[byte_ix] * num_histograms;
+    double min_cost = 1e99;
+    double block_switch_cost = block_switch_bitcost;
+    size_t k;
+    for (k = 0; k < num_histograms; ++k) {
+      /* We are coding the symbol in data[byte_ix] with entropy code k. */
+      cost[k] += insert_cost[insert_cost_ix + k];
+      if (cost[k] < min_cost) {
+        min_cost = cost[k];
+        block_id[byte_ix] = (uint8_t)k;
+      }
+    }
+    /* More blocks for the beginning. */
+    if (byte_ix < 2000) {
+      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
+    }
+    for (k = 0; k < num_histograms; ++k) {
+      cost[k] -= min_cost;
+      if (cost[k] >= block_switch_cost) {
+        const uint8_t mask = (uint8_t)(1u << (k & 7));
+        cost[k] = block_switch_cost;
+        BROTLI_DCHECK((k >> 3) < bitmaplen);
+        switch_signal[ix + (k >> 3)] |= mask;
+      }
+    }
+  }
+  {  /* Trace back from the last position and switch at the marked places. */
+    size_t byte_ix = length - 1;
+    size_t ix = byte_ix * bitmaplen;
+    uint8_t cur_id = block_id[byte_ix];
+    while (byte_ix > 0) {
+      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
+      BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmaplen);
+      --byte_ix;
+      ix -= bitmaplen;
+      if (switch_signal[ix + (cur_id >> 3)] & mask) {
+        if (cur_id != block_id[byte_ix]) {
+          cur_id = block_id[byte_ix];
+          ++num_blocks;
+        }
+      }
+      block_id[byte_ix] = cur_id;
+    }
+  }
+  return num_blocks;
+}
+
+static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
+                                uint16_t* new_id, const size_t num_histograms) {
+  static const uint16_t kInvalidId = 256;
+  uint16_t next_id = 0;
+  size_t i;
+  for (i = 0; i < num_histograms; ++i) {
+    new_id[i] = kInvalidId;
+  }
+  for (i = 0; i < length; ++i) {
+    BROTLI_DCHECK(block_ids[i] < num_histograms);
+    if (new_id[block_ids[i]] == kInvalidId) {
+      new_id[block_ids[i]] = next_id++;
+    }
+  }
+  for (i = 0; i < length; ++i) {
+    block_ids[i] = (uint8_t)new_id[block_ids[i]];
+    BROTLI_DCHECK(block_ids[i] < num_histograms);
+  }
+  BROTLI_DCHECK(next_id <= num_histograms);
+  return next_id;
+}
+
+static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
+                                     const uint8_t* block_ids,
+                                     const size_t num_histograms,
+                                     HistogramType* histograms) {
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < length; ++i) {
+    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
+  }
+}
+
+static void FN(ClusterBlocks)(MemoryManager* m,
+                              const DataType* data, const size_t length,
+                              const size_t num_blocks,
+                              uint8_t* block_ids,
+                              BlockSplit* split) {
+  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
+      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
+  size_t all_histograms_size = 0;
+  size_t all_histograms_capacity = expected_num_clusters;
+  HistogramType* all_histograms =
+      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
+  size_t cluster_size_size = 0;
+  size_t cluster_size_capacity = expected_num_clusters;
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
+  size_t num_clusters = 0;
+  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
+      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
+  size_t max_num_pairs =
+      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
+  size_t pairs_capacity = max_num_pairs + 1;
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
+  size_t pos = 0;
+  uint32_t* clusters;
+  size_t num_final_clusters;
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index;
+  size_t i;
+  uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 };
+  uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 };
+  uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 };
+  uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 };
+
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
+      BROTLI_IS_NULL(block_lengths) || BROTLI_IS_NULL(all_histograms) ||
+      BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
+      BROTLI_IS_NULL(pairs)) {
+    return;
+  }
+
+  memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
+
+  {
+    size_t block_idx = 0;
+    for (i = 0; i < length; ++i) {
+      BROTLI_DCHECK(block_idx < num_blocks);
+      ++block_lengths[block_idx];
+      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
+        ++block_idx;
+      }
+    }
+    BROTLI_DCHECK(block_idx == num_blocks);
+  }
+
+  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
+    const size_t num_to_combine =
+        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      size_t k;
+      FN(HistogramClear)(&histograms[j]);
+      for (k = 0; k < block_lengths[i + j]; ++k) {
+        FN(HistogramAdd)(&histograms[j], data[pos++]);
+      }
+      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
+      new_clusters[j] = (uint32_t)j;
+      symbols[j] = (uint32_t)j;
+      sizes[j] = 1;
+    }
+    num_new_clusters = FN(BrotliHistogramCombine)(
+        histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
+        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
+    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
+        all_histograms_capacity, all_histograms_size + num_new_clusters);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
+        cluster_size_capacity, cluster_size_size + num_new_clusters);
+    if (BROTLI_IS_OOM(m)) return;
+    for (j = 0; j < num_new_clusters; ++j) {
+      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
+      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
+      remap[new_clusters[j]] = (uint32_t)j;
+    }
+    for (j = 0; j < num_to_combine; ++j) {
+      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
+    }
+    num_clusters += num_new_clusters;
+    BROTLI_DCHECK(num_clusters == cluster_size_size);
+    BROTLI_DCHECK(num_clusters == all_histograms_size);
+  }
+  BROTLI_FREE(m, histograms);
+
+  max_num_pairs =
+      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
+  if (pairs_capacity < max_num_pairs + 1) {
+    BROTLI_FREE(m, pairs);
+    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
+  }
+
+  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
+  for (i = 0; i < num_clusters; ++i) {
+    clusters[i] = (uint32_t)i;
+  }
+  num_final_clusters = FN(BrotliHistogramCombine)(
+      all_histograms, cluster_size, histogram_symbols, clusters, pairs,
+      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
+      max_num_pairs);
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+
+  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
+  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
+  pos = 0;
+  {
+    uint32_t next_index = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      HistogramType histo;
+      size_t j;
+      uint32_t best_out;
+      double best_bits;
+      FN(HistogramClear)(&histo);
+      for (j = 0; j < block_lengths[i]; ++j) {
+        FN(HistogramAdd)(&histo, data[pos++]);
+      }
+      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
+      best_bits =
+          FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
+      for (j = 0; j < num_final_clusters; ++j) {
+        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
+            &histo, &all_histograms[clusters[j]]);
+        if (cur_bits < best_bits) {
+          best_bits = cur_bits;
+          best_out = clusters[j];
+        }
+      }
+      histogram_symbols[i] = best_out;
+      if (new_index[best_out] == kInvalidIndex) {
+        new_index[best_out] = next_index++;
+      }
+    }
+  }
+  BROTLI_FREE(m, clusters);
+  BROTLI_FREE(m, all_histograms);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  {
+    uint32_t cur_length = 0;
+    size_t block_idx = 0;
+    uint8_t max_type = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      cur_length += block_lengths[i];
+      if (i + 1 == num_blocks ||
+          histogram_symbols[i] != histogram_symbols[i + 1]) {
+        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
+        split->types[block_idx] = id;
+        split->lengths[block_idx] = cur_length;
+        max_type = BROTLI_MAX(uint8_t, max_type, id);
+        cur_length = 0;
+        ++block_idx;
+      }
+    }
+    split->num_blocks = block_idx;
+    split->num_types = (size_t)max_type + 1;
+  }
+  BROTLI_FREE(m, new_index);
+  BROTLI_FREE(m, block_lengths);
+  BROTLI_FREE(m, histogram_symbols);
+}
+
+static void FN(SplitByteVector)(MemoryManager* m,
+                                const DataType* data, const size_t length,
+                                const size_t literals_per_histogram,
+                                const size_t max_histograms,
+                                const size_t sampling_stride_length,
+                                const double block_switch_cost,
+                                const BrotliEncoderParams* params,
+                                BlockSplit* split) {
+  const size_t data_size = FN(HistogramDataSize)();
+  size_t num_histograms = length / literals_per_histogram + 1;
+  HistogramType* histograms;
+  if (num_histograms > max_histograms) {
+    num_histograms = max_histograms;
+  }
+  if (length == 0) {
+    split->num_types = 1;
+    return;
+  } else if (length < kMinLengthForBlockSplitting) {
+    BROTLI_ENSURE_CAPACITY(m, uint8_t,
+        split->types, split->types_alloc_size, split->num_blocks + 1);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t,
+        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
+    if (BROTLI_IS_OOM(m)) return;
+    split->num_types = 1;
+    split->types[split->num_blocks] = 0;
+    split->lengths[split->num_blocks] = (uint32_t)length;
+    split->num_blocks++;
+    return;
+  }
+  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
+  /* Find good entropy codes. */
+  FN(InitialEntropyCodes)(data, length,
+                          sampling_stride_length,
+                          num_histograms, histograms);
+  FN(RefineEntropyCodes)(data, length,
+                         sampling_stride_length,
+                         num_histograms, histograms);
+  {
+    /* Find a good path through literals with the good entropy codes. */
+    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
+    size_t num_blocks = 0;
+    const size_t bitmaplen = (num_histograms + 7) >> 3;
+    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
+    double* cost = BROTLI_ALLOC(m, double, num_histograms);
+    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
+    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
+    const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
+    size_t i;
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
+        BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
+        BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
+      return;
+    }
+    for (i = 0; i < iters; ++i) {
+      num_blocks = FN(FindBlocks)(data, length,
+                                  block_switch_cost,
+                                  num_histograms, histograms,
+                                  insert_cost, cost, switch_signal,
+                                  block_ids);
+      num_histograms = FN(RemapBlockIds)(block_ids, length,
+                                         new_id, num_histograms);
+      FN(BuildBlockHistograms)(data, length, block_ids,
+                               num_histograms, histograms);
+    }
+    BROTLI_FREE(m, insert_cost);
+    BROTLI_FREE(m, cost);
+    BROTLI_FREE(m, switch_signal);
+    BROTLI_FREE(m, new_id);
+    BROTLI_FREE(m, histograms);
+    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, block_ids);
+  }
+}
+
+#undef HistogramType
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c
new file mode 100644
index 0000000000..6391454f98
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.c
@@ -0,0 +1,1329 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Brotli bit stream functions to support the low level format. There are no
+   compression algorithms here, just the right ordering of bits to match the
+   specs. */
+
+#include "./brotli_bit_stream.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./entropy_encode.h"
+#include "./entropy_encode_static.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_HUFFMAN_TREE_SIZE (2 * BROTLI_NUM_COMMAND_SYMBOLS + 1)
+/* The maximum size of Huffman dictionary for distances assuming that
+   NPOSTFIX = 0 and NDIRECT = 0. */
+#define MAX_SIMPLE_DISTANCE_ALPHABET_SIZE \
+  BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_LARGE_MAX_DISTANCE_BITS)
+/* MAX_SIMPLE_DISTANCE_ALPHABET_SIZE == 140 */
+
+/* Represents the range of values belonging to a prefix code:
+   [offset, offset + 2^nbits) */
+typedef struct PrefixCodeRange {
+  uint32_t offset;
+  uint32_t nbits;
+} PrefixCodeRange;
+
+static const PrefixCodeRange
+    kBlockLengthPrefixCode[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
+  { 1, 2}, { 5, 2}, { 9, 2}, {13, 2}, {17, 3}, { 25, 3}, { 33, 3},
+  {41, 3}, {49, 4}, {65, 4}, {81, 4}, {97, 4}, {113, 5}, {145, 5},
+  {177, 5}, { 209,  5}, { 241,  6}, { 305,  6}, { 369,  7}, {  497,  8},
+  {753, 9}, {1265, 10}, {2289, 11}, {4337, 12}, {8433, 13}, {16625, 24}
+};
+
+static BROTLI_INLINE uint32_t BlockLengthPrefixCode(uint32_t len) {
+  uint32_t code = (len >= 177) ? (len >= 753 ? 20 : 14) : (len >= 41 ? 7 : 0);
+  while (code < (BROTLI_NUM_BLOCK_LEN_SYMBOLS - 1) &&
+      len >= kBlockLengthPrefixCode[code + 1].offset) ++code;
+  return code;
+}
+
+static BROTLI_INLINE void GetBlockLengthPrefixCode(uint32_t len, size_t* code,
+    uint32_t* n_extra, uint32_t* extra) {
+  *code = BlockLengthPrefixCode(len);
+  *n_extra = kBlockLengthPrefixCode[*code].nbits;
+  *extra = len - kBlockLengthPrefixCode[*code].offset;
+}
+
+typedef struct BlockTypeCodeCalculator {
+  size_t last_type;
+  size_t second_last_type;
+} BlockTypeCodeCalculator;
+
+static void InitBlockTypeCodeCalculator(BlockTypeCodeCalculator* self) {
+  self->last_type = 1;
+  self->second_last_type = 0;
+}
+
+static BROTLI_INLINE size_t NextBlockTypeCode(
+    BlockTypeCodeCalculator* calculator, uint8_t type) {
+  size_t type_code = (type == calculator->last_type + 1) ? 1u :
+      (type == calculator->second_last_type) ? 0u : type + 2u;
+  calculator->second_last_type = calculator->last_type;
+  calculator->last_type = type;
+  return type_code;
+}
+
+/* |nibblesbits| represents the 2 bits to encode MNIBBLES (0-3)
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+static void BrotliEncodeMlen(size_t length, uint64_t* bits,
+                             size_t* numbits, uint64_t* nibblesbits) {
+  size_t lg = (length == 1) ? 1 : Log2FloorNonZero((uint32_t)(length - 1)) + 1;
+  size_t mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
+  BROTLI_DCHECK(length > 0);
+  BROTLI_DCHECK(length <= (1 << 24));
+  BROTLI_DCHECK(lg <= 24);
+  *nibblesbits = mnibbles - 4;
+  *numbits = mnibbles * 4;
+  *bits = length - 1;
+}
+
+static BROTLI_INLINE void StoreCommandExtra(
+    const Command* cmd, size_t* storage_ix, uint8_t* storage) {
+  uint32_t copylen_code = CommandCopyLenCode(cmd);
+  uint16_t inscode = GetInsertLengthCode(cmd->insert_len_);
+  uint16_t copycode = GetCopyLengthCode(copylen_code);
+  uint32_t insnumextra = GetInsertExtra(inscode);
+  uint64_t insextraval = cmd->insert_len_ - GetInsertBase(inscode);
+  uint64_t copyextraval = copylen_code - GetCopyBase(copycode);
+  uint64_t bits = (copyextraval << insnumextra) | insextraval;
+  BrotliWriteBits(
+      insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
+}
+
+/* Data structure that stores almost everything that is needed to encode each
+   block switch command. */
+typedef struct BlockSplitCode {
+  BlockTypeCodeCalculator type_code_calculator;
+  uint8_t type_depths[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint16_t type_bits[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint8_t length_depths[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+  uint16_t length_bits[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+} BlockSplitCode;
+
+/* Stores a number between 0 and 255. */
+static void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
+  if (n == 0) {
+    BrotliWriteBits(1, 0, storage_ix, storage);
+  } else {
+    size_t nbits = Log2FloorNonZero(n);
+    BrotliWriteBits(1, 1, storage_ix, storage);
+    BrotliWriteBits(3, nbits, storage_ix, storage);
+    BrotliWriteBits(nbits, n - ((size_t)1 << nbits), storage_ix, storage);
+  }
+}
+
+/* Stores the compressed meta-block header.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+static void StoreCompressedMetaBlockHeader(BROTLI_BOOL is_final_block,
+                                           size_t length,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  uint64_t lenbits;
+  size_t nlenbits;
+  uint64_t nibblesbits;
+
+  /* Write ISLAST bit. */
+  BrotliWriteBits(1, (uint64_t)is_final_block, storage_ix, storage);
+  /* Write ISEMPTY bit. */
+  if (is_final_block) {
+    BrotliWriteBits(1, 0, storage_ix, storage);
+  }
+
+  BrotliEncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
+  BrotliWriteBits(2, nibblesbits, storage_ix, storage);
+  BrotliWriteBits(nlenbits, lenbits, storage_ix, storage);
+
+  if (!is_final_block) {
+    /* Write ISUNCOMPRESSED bit. */
+    BrotliWriteBits(1, 0, storage_ix, storage);
+  }
+}
+
+/* Stores the uncompressed meta-block header.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+static void BrotliStoreUncompressedMetaBlockHeader(size_t length,
+                                                   size_t* storage_ix,
+                                                   uint8_t* storage) {
+  uint64_t lenbits;
+  size_t nlenbits;
+  uint64_t nibblesbits;
+
+  /* Write ISLAST bit.
+     Uncompressed block cannot be the last one, so set to 0. */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  BrotliEncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
+  BrotliWriteBits(2, nibblesbits, storage_ix, storage);
+  BrotliWriteBits(nlenbits, lenbits, storage_ix, storage);
+  /* Write ISUNCOMPRESSED bit. */
+  BrotliWriteBits(1, 1, storage_ix, storage);
+}
+
+static void BrotliStoreHuffmanTreeOfHuffmanTreeToBitMask(
+    const int num_codes, const uint8_t* code_length_bitdepth,
+    size_t* storage_ix, uint8_t* storage) {
+  static const uint8_t kStorageOrder[BROTLI_CODE_LENGTH_CODES] = {
+    1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
+  };
+  /* The bit lengths of the Huffman code over the code length alphabet
+     are compressed with the following static Huffman code:
+       Symbol   Code
+       ------   ----
+       0          00
+       1        1110
+       2         110
+       3          01
+       4          10
+       5        1111 */
+  static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {
+     0, 7, 3, 2, 1, 15
+  };
+  static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {
+    2, 4, 3, 2, 2, 4
+  };
+
+  size_t skip_some = 0;  /* skips none. */
+
+  /* Throw away trailing zeros: */
+  size_t codes_to_store = BROTLI_CODE_LENGTH_CODES;
+  if (num_codes > 1) {
+    for (; codes_to_store > 0; --codes_to_store) {
+      if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+        break;
+      }
+    }
+  }
+  if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+      code_length_bitdepth[kStorageOrder[1]] == 0) {
+    skip_some = 2;  /* skips two. */
+    if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+      skip_some = 3;  /* skips three. */
+    }
+  }
+  BrotliWriteBits(2, skip_some, storage_ix, storage);
+  {
+    size_t i;
+    for (i = skip_some; i < codes_to_store; ++i) {
+      size_t l = code_length_bitdepth[kStorageOrder[i]];
+      BrotliWriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
+          kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
+    }
+  }
+}
+
+static void BrotliStoreHuffmanTreeToBitMask(
+    const size_t huffman_tree_size, const uint8_t* huffman_tree,
+    const uint8_t* huffman_tree_extra_bits, const uint8_t* code_length_bitdepth,
+    const uint16_t* code_length_bitdepth_symbols,
+    size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage) {
+  size_t i;
+  for (i = 0; i < huffman_tree_size; ++i) {
+    size_t ix = huffman_tree[i];
+    BrotliWriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
+                    storage_ix, storage);
+    /* Extra bits */
+    switch (ix) {
+      case BROTLI_REPEAT_PREVIOUS_CODE_LENGTH:
+        BrotliWriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
+        break;
+      case BROTLI_REPEAT_ZERO_CODE_LENGTH:
+        BrotliWriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage);
+        break;
+    }
+  }
+}
+
+static void StoreSimpleHuffmanTree(const uint8_t* depths,
+                                   size_t symbols[4],
+                                   size_t num_symbols,
+                                   size_t max_bits,
+                                   size_t* storage_ix, uint8_t* storage) {
+  /* value of 1 indicates a simple Huffman code */
+  BrotliWriteBits(2, 1, storage_ix, storage);
+  BrotliWriteBits(2, num_symbols - 1, storage_ix, storage);  /* NSYM - 1 */
+
+  {
+    /* Sort */
+    size_t i;
+    for (i = 0; i < num_symbols; i++) {
+      size_t j;
+      for (j = i + 1; j < num_symbols; j++) {
+        if (depths[symbols[j]] < depths[symbols[i]]) {
+          BROTLI_SWAP(size_t, symbols, j, i);
+        }
+      }
+    }
+  }
+
+  if (num_symbols == 2) {
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+  } else if (num_symbols == 3) {
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+  } else {
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[3], storage_ix, storage);
+    /* tree-select */
+    BrotliWriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
+  }
+}
+
+/* num = alphabet size
+   depths = symbol depths */
+void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
+                            HuffmanTree* tree,
+                            size_t* storage_ix, uint8_t* storage) {
+  /* Write the Huffman tree into the brotli-representation.
+     The command alphabet is the largest, so this allocation will fit all
+     alphabets. */
+  uint8_t huffman_tree[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint8_t huffman_tree_extra_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+  size_t huffman_tree_size = 0;
+  uint8_t code_length_bitdepth[BROTLI_CODE_LENGTH_CODES] = { 0 };
+  uint16_t code_length_bitdepth_symbols[BROTLI_CODE_LENGTH_CODES];
+  uint32_t huffman_tree_histogram[BROTLI_CODE_LENGTH_CODES] = { 0 };
+  size_t i;
+  int num_codes = 0;
+  size_t code = 0;
+
+  BROTLI_DCHECK(num <= BROTLI_NUM_COMMAND_SYMBOLS);
+
+  BrotliWriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
+                         huffman_tree_extra_bits);
+
+  /* Calculate the statistics of the Huffman tree in brotli-representation. */
+  for (i = 0; i < huffman_tree_size; ++i) {
+    ++huffman_tree_histogram[huffman_tree[i]];
+  }
+
+  for (i = 0; i < BROTLI_CODE_LENGTH_CODES; ++i) {
+    if (huffman_tree_histogram[i]) {
+      if (num_codes == 0) {
+        code = i;
+        num_codes = 1;
+      } else if (num_codes == 1) {
+        num_codes = 2;
+        break;
+      }
+    }
+  }
+
+  /* Calculate another Huffman tree to use for compressing both the
+     earlier Huffman tree with. */
+  BrotliCreateHuffmanTree(huffman_tree_histogram, BROTLI_CODE_LENGTH_CODES,
+                          5, tree, code_length_bitdepth);
+  BrotliConvertBitDepthsToSymbols(code_length_bitdepth,
+                                  BROTLI_CODE_LENGTH_CODES,
+                                  code_length_bitdepth_symbols);
+
+  /* Now, we have all the data, let's start storing it */
+  BrotliStoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
+                                               storage_ix, storage);
+
+  if (num_codes == 1) {
+    code_length_bitdepth[code] = 0;
+  }
+
+  /* Store the real Huffman tree now. */
+  BrotliStoreHuffmanTreeToBitMask(huffman_tree_size,
+                                  huffman_tree,
+                                  huffman_tree_extra_bits,
+                                  code_length_bitdepth,
+                                  code_length_bitdepth_symbols,
+                                  storage_ix, storage);
+}
+
+/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
+   bits[0:length] and stores the encoded tree to the bit stream. */
+static void BuildAndStoreHuffmanTree(const uint32_t* histogram,
+                                     const size_t histogram_length,
+                                     const size_t alphabet_size,
+                                     HuffmanTree* tree,
+                                     uint8_t* depth,
+                                     uint16_t* bits,
+                                     size_t* storage_ix,
+                                     uint8_t* storage) {
+  size_t count = 0;
+  size_t s4[4] = { 0 };
+  size_t i;
+  size_t max_bits = 0;
+  for (i = 0; i < histogram_length; i++) {
+    if (histogram[i]) {
+      if (count < 4) {
+        s4[count] = i;
+      } else if (count > 4) {
+        break;
+      }
+      count++;
+    }
+  }
+
+  {
+    size_t max_bits_counter = alphabet_size - 1;
+    while (max_bits_counter) {
+      max_bits_counter >>= 1;
+      ++max_bits;
+    }
+  }
+
+  if (count <= 1) {
+    BrotliWriteBits(4, 1, storage_ix, storage);
+    BrotliWriteBits(max_bits, s4[0], storage_ix, storage);
+    depth[s4[0]] = 0;
+    bits[s4[0]] = 0;
+    return;
+  }
+
+  memset(depth, 0, histogram_length * sizeof(depth[0]));
+  BrotliCreateHuffmanTree(histogram, histogram_length, 15, tree, depth);
+  BrotliConvertBitDepthsToSymbols(depth, histogram_length, bits);
+
+  if (count <= 4) {
+    StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
+  } else {
+    BrotliStoreHuffmanTree(depth, histogram_length, tree, storage_ix, storage);
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
+    const HuffmanTree* v0, const HuffmanTree* v1) {
+  return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
+}
+
+void BrotliBuildAndStoreHuffmanTreeFast(MemoryManager* m,
+                                        const uint32_t* histogram,
+                                        const size_t histogram_total,
+                                        const size_t max_bits,
+                                        uint8_t* depth, uint16_t* bits,
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
+  size_t count = 0;
+  size_t symbols[4] = { 0 };
+  size_t length = 0;
+  size_t total = histogram_total;
+  while (total != 0) {
+    if (histogram[length]) {
+      if (count < 4) {
+        symbols[count] = length;
+      }
+      ++count;
+      total -= histogram[length];
+    }
+    ++length;
+  }
+
+  if (count <= 1) {
+    BrotliWriteBits(4, 1, storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    depth[symbols[0]] = 0;
+    bits[symbols[0]] = 0;
+    return;
+  }
+
+  memset(depth, 0, length * sizeof(depth[0]));
+  {
+    const size_t max_tree_size = 2 * length + 1;
+    HuffmanTree* tree = BROTLI_ALLOC(m, HuffmanTree, max_tree_size);
+    uint32_t count_limit;
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tree)) return;
+    for (count_limit = 1; ; count_limit *= 2) {
+      HuffmanTree* node = tree;
+      size_t l;
+      for (l = length; l != 0;) {
+        --l;
+        if (histogram[l]) {
+          if (BROTLI_PREDICT_TRUE(histogram[l] >= count_limit)) {
+            InitHuffmanTree(node, histogram[l], -1, (int16_t)l);
+          } else {
+            InitHuffmanTree(node, count_limit, -1, (int16_t)l);
+          }
+          ++node;
+        }
+      }
+      {
+        const int n = (int)(node - tree);
+        HuffmanTree sentinel;
+        int i = 0;      /* Points to the next leaf node. */
+        int j = n + 1;  /* Points to the next non-leaf node. */
+        int k;
+
+        SortHuffmanTreeItems(tree, (size_t)n, SortHuffmanTree);
+        /* The nodes are:
+           [0, n): the sorted leaf nodes that we start with.
+           [n]: we add a sentinel here.
+           [n + 1, 2n): new parent nodes are added here, starting from
+                        (n+1). These are naturally in ascending order.
+           [2n]: we add a sentinel at the end as well.
+           There will be (2n+1) elements at the end. */
+        InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
+        *node++ = sentinel;
+        *node++ = sentinel;
+
+        for (k = n - 1; k > 0; --k) {
+          int left, right;
+          if (tree[i].total_count_ <= tree[j].total_count_) {
+            left = i;
+            ++i;
+          } else {
+            left = j;
+            ++j;
+          }
+          if (tree[i].total_count_ <= tree[j].total_count_) {
+            right = i;
+            ++i;
+          } else {
+            right = j;
+            ++j;
+          }
+          /* The sentinel node becomes the parent node. */
+          node[-1].total_count_ =
+              tree[left].total_count_ + tree[right].total_count_;
+          node[-1].index_left_ = (int16_t)left;
+          node[-1].index_right_or_value_ = (int16_t)right;
+          /* Add back the last sentinel node. */
+          *node++ = sentinel;
+        }
+        if (BrotliSetDepth(2 * n - 1, tree, depth, 14)) {
+          /* We need to pack the Huffman tree in 14 bits. If this was not
+             successful, add fake entities to the lowest values and retry. */
+          break;
+        }
+      }
+    }
+    BROTLI_FREE(m, tree);
+  }
+  BrotliConvertBitDepthsToSymbols(depth, length, bits);
+  if (count <= 4) {
+    size_t i;
+    /* value of 1 indicates a simple Huffman code */
+    BrotliWriteBits(2, 1, storage_ix, storage);
+    BrotliWriteBits(2, count - 1, storage_ix, storage);  /* NSYM - 1 */
+
+    /* Sort */
+    for (i = 0; i < count; i++) {
+      size_t j;
+      for (j = i + 1; j < count; j++) {
+        if (depth[symbols[j]] < depth[symbols[i]]) {
+          BROTLI_SWAP(size_t, symbols, j, i);
+        }
+      }
+    }
+
+    if (count == 2) {
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    } else if (count == 3) {
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+    } else {
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[3], storage_ix, storage);
+      /* tree-select */
+      BrotliWriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
+    }
+  } else {
+    uint8_t previous_value = 8;
+    size_t i;
+    /* Complex Huffman Tree */
+    StoreStaticCodeLengthCode(storage_ix, storage);
+
+    /* Actual RLE coding. */
+    for (i = 0; i < length;) {
+      const uint8_t value = depth[i];
+      size_t reps = 1;
+      size_t k;
+      for (k = i + 1; k < length && depth[k] == value; ++k) {
+        ++reps;
+      }
+      i += reps;
+      if (value == 0) {
+        BrotliWriteBits(kZeroRepsDepth[reps], kZeroRepsBits[reps],
+                        storage_ix, storage);
+      } else {
+        if (previous_value != value) {
+          BrotliWriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
+                          storage_ix, storage);
+          --reps;
+        }
+        if (reps < 3) {
+          while (reps != 0) {
+            reps--;
+            BrotliWriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
+                            storage_ix, storage);
+          }
+        } else {
+          reps -= 3;
+          BrotliWriteBits(kNonZeroRepsDepth[reps], kNonZeroRepsBits[reps],
+                          storage_ix, storage);
+        }
+        previous_value = value;
+      }
+    }
+  }
+}
+
+static size_t IndexOf(const uint8_t* v, size_t v_size, uint8_t value) {
+  size_t i = 0;
+  for (; i < v_size; ++i) {
+    if (v[i] == value) return i;
+  }
+  return i;
+}
+
+static void MoveToFront(uint8_t* v, size_t index) {
+  uint8_t value = v[index];
+  size_t i;
+  for (i = index; i != 0; --i) {
+    v[i] = v[i - 1];
+  }
+  v[0] = value;
+}
+
+static void MoveToFrontTransform(const uint32_t* BROTLI_RESTRICT v_in,
+                                 const size_t v_size,
+                                 uint32_t* v_out) {
+  size_t i;
+  uint8_t mtf[256];
+  uint32_t max_value;
+  if (v_size == 0) {
+    return;
+  }
+  max_value = v_in[0];
+  for (i = 1; i < v_size; ++i) {
+    if (v_in[i] > max_value) max_value = v_in[i];
+  }
+  BROTLI_DCHECK(max_value < 256u);
+  for (i = 0; i <= max_value; ++i) {
+    mtf[i] = (uint8_t)i;
+  }
+  {
+    size_t mtf_size = max_value + 1;
+    for (i = 0; i < v_size; ++i) {
+      size_t index = IndexOf(mtf, mtf_size, (uint8_t)v_in[i]);
+      BROTLI_DCHECK(index < mtf_size);
+      v_out[i] = (uint32_t)index;
+      MoveToFront(mtf, index);
+    }
+  }
+}
+
+/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
+   the run length plus extra bits (lower 9 bits is the prefix code and the rest
+   are the extra bits). Non-zero values in v[] are shifted by
+   *max_length_prefix. Will not create prefix codes bigger than the initial
+   value of *max_run_length_prefix. The prefix code of run length L is simply
+   Log2Floor(L) and the number of extra bits is the same as the prefix code. */
+static void RunLengthCodeZeros(const size_t in_size,
+    uint32_t* BROTLI_RESTRICT v, size_t* BROTLI_RESTRICT out_size,
+    uint32_t* BROTLI_RESTRICT max_run_length_prefix) {
+  uint32_t max_reps = 0;
+  size_t i;
+  uint32_t max_prefix;
+  for (i = 0; i < in_size;) {
+    uint32_t reps = 0;
+    for (; i < in_size && v[i] != 0; ++i) ;
+    for (; i < in_size && v[i] == 0; ++i) {
+      ++reps;
+    }
+    max_reps = BROTLI_MAX(uint32_t, reps, max_reps);
+  }
+  max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
+  max_prefix = BROTLI_MIN(uint32_t, max_prefix, *max_run_length_prefix);
+  *max_run_length_prefix = max_prefix;
+  *out_size = 0;
+  for (i = 0; i < in_size;) {
+    BROTLI_DCHECK(*out_size <= i);
+    if (v[i] != 0) {
+      v[*out_size] = v[i] + *max_run_length_prefix;
+      ++i;
+      ++(*out_size);
+    } else {
+      uint32_t reps = 1;
+      size_t k;
+      for (k = i + 1; k < in_size && v[k] == 0; ++k) {
+        ++reps;
+      }
+      i += reps;
+      while (reps != 0) {
+        if (reps < (2u << max_prefix)) {
+          uint32_t run_length_prefix = Log2FloorNonZero(reps);
+          const uint32_t extra_bits = reps - (1u << run_length_prefix);
+          v[*out_size] = run_length_prefix + (extra_bits << 9);
+          ++(*out_size);
+          break;
+        } else {
+          const uint32_t extra_bits = (1u << max_prefix) - 1u;
+          v[*out_size] = max_prefix + (extra_bits << 9);
+          reps -= (2u << max_prefix) - 1u;
+          ++(*out_size);
+        }
+      }
+    }
+  }
+}
+
+#define SYMBOL_BITS 9
+
+static void EncodeContextMap(MemoryManager* m,
+                             const uint32_t* context_map,
+                             size_t context_map_size,
+                             size_t num_clusters,
+                             HuffmanTree* tree,
+                             size_t* storage_ix, uint8_t* storage) {
+  size_t i;
+  uint32_t* rle_symbols;
+  uint32_t max_run_length_prefix = 6;
+  size_t num_rle_symbols = 0;
+  uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  static const uint32_t kSymbolMask = (1u << SYMBOL_BITS) - 1u;
+  uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+
+  StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
+
+  if (num_clusters == 1) {
+    return;
+  }
+
+  rle_symbols = BROTLI_ALLOC(m, uint32_t, context_map_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(rle_symbols)) return;
+  MoveToFrontTransform(context_map, context_map_size, rle_symbols);
+  RunLengthCodeZeros(context_map_size, rle_symbols,
+                     &num_rle_symbols, &max_run_length_prefix);
+  memset(histogram, 0, sizeof(histogram));
+  for (i = 0; i < num_rle_symbols; ++i) {
+    ++histogram[rle_symbols[i] & kSymbolMask];
+  }
+  {
+    BROTLI_BOOL use_rle = TO_BROTLI_BOOL(max_run_length_prefix > 0);
+    BrotliWriteBits(1, (uint64_t)use_rle, storage_ix, storage);
+    if (use_rle) {
+      BrotliWriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+    }
+  }
+  BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
+                           num_clusters + max_run_length_prefix,
+                           tree, depths, bits, storage_ix, storage);
+  for (i = 0; i < num_rle_symbols; ++i) {
+    const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
+    const uint32_t extra_bits_val = rle_symbols[i] >> SYMBOL_BITS;
+    BrotliWriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
+    if (rle_symbol > 0 && rle_symbol <= max_run_length_prefix) {
+      BrotliWriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
+    }
+  }
+  BrotliWriteBits(1, 1, storage_ix, storage);  /* use move-to-front */
+  BROTLI_FREE(m, rle_symbols);
+}
+
+/* Stores the block switch command with index block_ix to the bit stream. */
+static BROTLI_INLINE void StoreBlockSwitch(BlockSplitCode* code,
+                                           const uint32_t block_len,
+                                           const uint8_t block_type,
+                                           BROTLI_BOOL is_first_block,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  size_t typecode = NextBlockTypeCode(&code->type_code_calculator, block_type);
+  size_t lencode;
+  uint32_t len_nextra;
+  uint32_t len_extra;
+  if (!is_first_block) {
+    BrotliWriteBits(code->type_depths[typecode], code->type_bits[typecode],
+                    storage_ix, storage);
+  }
+  GetBlockLengthPrefixCode(block_len, &lencode, &len_nextra, &len_extra);
+
+  BrotliWriteBits(code->length_depths[lencode], code->length_bits[lencode],
+                  storage_ix, storage);
+  BrotliWriteBits(len_nextra, len_extra, storage_ix, storage);
+}
+
+/* Builds a BlockSplitCode data structure from the block split given by the
+   vector of block types and block lengths and stores it to the bit stream. */
+static void BuildAndStoreBlockSplitCode(const uint8_t* types,
+                                        const uint32_t* lengths,
+                                        const size_t num_blocks,
+                                        const size_t num_types,
+                                        HuffmanTree* tree,
+                                        BlockSplitCode* code,
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
+  uint32_t type_histo[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint32_t length_histo[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+  size_t i;
+  BlockTypeCodeCalculator type_code_calculator;
+  memset(type_histo, 0, (num_types + 2) * sizeof(type_histo[0]));
+  memset(length_histo, 0, sizeof(length_histo));
+  InitBlockTypeCodeCalculator(&type_code_calculator);
+  for (i = 0; i < num_blocks; ++i) {
+    size_t type_code = NextBlockTypeCode(&type_code_calculator, types[i]);
+    if (i != 0) ++type_histo[type_code];
+    ++length_histo[BlockLengthPrefixCode(lengths[i])];
+  }
+  StoreVarLenUint8(num_types - 1, storage_ix, storage);
+  if (num_types > 1) {  /* TODO: else? could StoreBlockSwitch occur? */
+    BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, num_types + 2, tree,
+                             &code->type_depths[0], &code->type_bits[0],
+                             storage_ix, storage);
+    BuildAndStoreHuffmanTree(&length_histo[0], BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+                             BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+                             tree, &code->length_depths[0],
+                             &code->length_bits[0], storage_ix, storage);
+    StoreBlockSwitch(code, lengths[0], types[0], 1, storage_ix, storage);
+  }
+}
+
+/* Stores a context map where the histogram type is always the block type. */
+static void StoreTrivialContextMap(size_t num_types,
+                                   size_t context_bits,
+                                   HuffmanTree* tree,
+                                   size_t* storage_ix,
+                                   uint8_t* storage) {
+  StoreVarLenUint8(num_types - 1, storage_ix, storage);
+  if (num_types > 1) {
+    size_t repeat_code = context_bits - 1u;
+    size_t repeat_bits = (1u << repeat_code) - 1u;
+    size_t alphabet_size = num_types + repeat_code;
+    uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    size_t i;
+    memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
+    /* Write RLEMAX. */
+    BrotliWriteBits(1, 1, storage_ix, storage);
+    BrotliWriteBits(4, repeat_code - 1, storage_ix, storage);
+    histogram[repeat_code] = (uint32_t)num_types;
+    histogram[0] = 1;
+    for (i = context_bits; i < alphabet_size; ++i) {
+      histogram[i] = 1;
+    }
+    BuildAndStoreHuffmanTree(histogram, alphabet_size, alphabet_size,
+                             tree, depths, bits, storage_ix, storage);
+    for (i = 0; i < num_types; ++i) {
+      size_t code = (i == 0 ? 0 : i + context_bits - 1);
+      BrotliWriteBits(depths[code], bits[code], storage_ix, storage);
+      BrotliWriteBits(
+          depths[repeat_code], bits[repeat_code], storage_ix, storage);
+      BrotliWriteBits(repeat_code, repeat_bits, storage_ix, storage);
+    }
+    /* Write IMTF (inverse-move-to-front) bit. */
+    BrotliWriteBits(1, 1, storage_ix, storage);
+  }
+}
+
+/* Manages the encoding of one block category (literal, command or distance). */
+typedef struct BlockEncoder {
+  size_t histogram_length_;
+  size_t num_block_types_;
+  const uint8_t* block_types_;  /* Not owned. */
+  const uint32_t* block_lengths_;  /* Not owned. */
+  size_t num_blocks_;
+  BlockSplitCode block_split_code_;
+  size_t block_ix_;
+  size_t block_len_;
+  size_t entropy_ix_;
+  uint8_t* depths_;
+  uint16_t* bits_;
+} BlockEncoder;
+
+static void InitBlockEncoder(BlockEncoder* self, size_t histogram_length,
+    size_t num_block_types, const uint8_t* block_types,
+    const uint32_t* block_lengths, const size_t num_blocks) {
+  self->histogram_length_ = histogram_length;
+  self->num_block_types_ = num_block_types;
+  self->block_types_ = block_types;
+  self->block_lengths_ = block_lengths;
+  self->num_blocks_ = num_blocks;
+  InitBlockTypeCodeCalculator(&self->block_split_code_.type_code_calculator);
+  self->block_ix_ = 0;
+  self->block_len_ = num_blocks == 0 ? 0 : block_lengths[0];
+  self->entropy_ix_ = 0;
+  self->depths_ = 0;
+  self->bits_ = 0;
+}
+
+static void CleanupBlockEncoder(MemoryManager* m, BlockEncoder* self) {
+  BROTLI_FREE(m, self->depths_);
+  BROTLI_FREE(m, self->bits_);
+}
+
+/* Creates entropy codes of block lengths and block types and stores them
+   to the bit stream. */
+static void BuildAndStoreBlockSwitchEntropyCodes(BlockEncoder* self,
+    HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) {
+  BuildAndStoreBlockSplitCode(self->block_types_, self->block_lengths_,
+      self->num_blocks_, self->num_block_types_, tree, &self->block_split_code_,
+      storage_ix, storage);
+}
+
+/* Stores the next symbol with the entropy code of the current block type.
+   Updates the block type and block length at block boundaries. */
+static void StoreSymbol(BlockEncoder* self, size_t symbol, size_t* storage_ix,
+    uint8_t* storage) {
+  if (self->block_len_ == 0) {
+    size_t block_ix = ++self->block_ix_;
+    uint32_t block_len = self->block_lengths_[block_ix];
+    uint8_t block_type = self->block_types_[block_ix];
+    self->block_len_ = block_len;
+    self->entropy_ix_ = block_type * self->histogram_length_;
+    StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
+        storage_ix, storage);
+  }
+  --self->block_len_;
+  {
+    size_t ix = self->entropy_ix_ + symbol;
+    BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
+  }
+}
+
+/* Stores the next symbol with the entropy code of the current block type and
+   context value.
+   Updates the block type and block length at block boundaries. */
+static void StoreSymbolWithContext(BlockEncoder* self, size_t symbol,
+    size_t context, const uint32_t* context_map, size_t* storage_ix,
+    uint8_t* storage, const size_t context_bits) {
+  if (self->block_len_ == 0) {
+    size_t block_ix = ++self->block_ix_;
+    uint32_t block_len = self->block_lengths_[block_ix];
+    uint8_t block_type = self->block_types_[block_ix];
+    self->block_len_ = block_len;
+    self->entropy_ix_ = (size_t)block_type << context_bits;
+    StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
+        storage_ix, storage);
+  }
+  --self->block_len_;
+  {
+    size_t histo_ix = context_map[self->entropy_ix_ + context];
+    size_t ix = histo_ix * self->histogram_length_ + symbol;
+    BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
+  }
+}
+
+#define FN(X) X ## Literal
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_encoder_inc.h"
+#undef FN
+
+#define FN(X) X ## Command
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_encoder_inc.h"
+#undef FN
+
+#define FN(X) X ## Distance
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_encoder_inc.h"
+#undef FN
+
+static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  storage[*storage_ix >> 3] = 0;
+}
+
+void BrotliStoreMetaBlock(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+    const BrotliEncoderParams* params, ContextType literal_context_mode,
+    const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+    size_t* storage_ix, uint8_t* storage) {
+
+  size_t pos = start_pos;
+  size_t i;
+  uint32_t num_distance_symbols = params->dist.alphabet_size_max;
+  uint32_t num_effective_distance_symbols = params->dist.alphabet_size_limit;
+  HuffmanTree* tree;
+  ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
+  BlockEncoder literal_enc;
+  BlockEncoder command_enc;
+  BlockEncoder distance_enc;
+  const BrotliDistanceParams* dist = &params->dist;
+  BROTLI_DCHECK(
+      num_effective_distance_symbols <= BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS);
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tree)) return;
+  InitBlockEncoder(&literal_enc, BROTLI_NUM_LITERAL_SYMBOLS,
+      mb->literal_split.num_types, mb->literal_split.types,
+      mb->literal_split.lengths, mb->literal_split.num_blocks);
+  InitBlockEncoder(&command_enc, BROTLI_NUM_COMMAND_SYMBOLS,
+      mb->command_split.num_types, mb->command_split.types,
+      mb->command_split.lengths, mb->command_split.num_blocks);
+  InitBlockEncoder(&distance_enc, num_effective_distance_symbols,
+      mb->distance_split.num_types, mb->distance_split.types,
+      mb->distance_split.lengths, mb->distance_split.num_blocks);
+
+  BuildAndStoreBlockSwitchEntropyCodes(&literal_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(&command_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(
+      &distance_enc, tree, storage_ix, storage);
+
+  BrotliWriteBits(2, dist->distance_postfix_bits, storage_ix, storage);
+  BrotliWriteBits(
+      4, dist->num_direct_distance_codes >> dist->distance_postfix_bits,
+      storage_ix, storage);
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    BrotliWriteBits(2, literal_context_mode, storage_ix, storage);
+  }
+
+  if (mb->literal_context_map_size == 0) {
+    StoreTrivialContextMap(mb->literal_histograms_size,
+        BROTLI_LITERAL_CONTEXT_BITS, tree, storage_ix, storage);
+  } else {
+    EncodeContextMap(m,
+        mb->literal_context_map, mb->literal_context_map_size,
+        mb->literal_histograms_size, tree, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+
+  if (mb->distance_context_map_size == 0) {
+    StoreTrivialContextMap(mb->distance_histograms_size,
+        BROTLI_DISTANCE_CONTEXT_BITS, tree, storage_ix, storage);
+  } else {
+    EncodeContextMap(m,
+        mb->distance_context_map, mb->distance_context_map_size,
+        mb->distance_histograms_size, tree, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+
+  BuildAndStoreEntropyCodesLiteral(m, &literal_enc, mb->literal_histograms,
+      mb->literal_histograms_size, BROTLI_NUM_LITERAL_SYMBOLS, tree,
+      storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreEntropyCodesCommand(m, &command_enc, mb->command_histograms,
+      mb->command_histograms_size, BROTLI_NUM_COMMAND_SYMBOLS, tree,
+      storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreEntropyCodesDistance(m, &distance_enc, mb->distance_histograms,
+      mb->distance_histograms_size, num_distance_symbols, tree,
+      storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, tree);
+
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t cmd_code = cmd.cmd_prefix_;
+    StoreSymbol(&command_enc, cmd_code, storage_ix, storage);
+    StoreCommandExtra(&cmd, storage_ix, storage);
+    if (mb->literal_context_map_size == 0) {
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        StoreSymbol(&literal_enc, input[pos & mask], storage_ix, storage);
+        ++pos;
+      }
+    } else {
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        size_t context =
+            BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
+        uint8_t literal = input[pos & mask];
+        StoreSymbolWithContext(&literal_enc, literal, context,
+            mb->literal_context_map, storage_ix, storage,
+            BROTLI_LITERAL_CONTEXT_BITS);
+        prev_byte2 = prev_byte;
+        prev_byte = literal;
+        ++pos;
+      }
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd)) {
+      prev_byte2 = input[(pos - 2) & mask];
+      prev_byte = input[(pos - 1) & mask];
+      if (cmd.cmd_prefix_ >= 128) {
+        size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+        uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+        uint64_t distextra = cmd.dist_extra_;
+        if (mb->distance_context_map_size == 0) {
+          StoreSymbol(&distance_enc, dist_code, storage_ix, storage);
+        } else {
+          size_t context = CommandDistanceContext(&cmd);
+          StoreSymbolWithContext(&distance_enc, dist_code, context,
+              mb->distance_context_map, storage_ix, storage,
+              BROTLI_DISTANCE_CONTEXT_BITS);
+        }
+        BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
+      }
+    }
+  }
+  CleanupBlockEncoder(m, &distance_enc);
+  CleanupBlockEncoder(m, &command_enc);
+  CleanupBlockEncoder(m, &literal_enc);
+  if (is_last) {
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+static void BuildHistograms(const uint8_t* input,
+                            size_t start_pos,
+                            size_t mask,
+                            const Command* commands,
+                            size_t n_commands,
+                            HistogramLiteral* lit_histo,
+                            HistogramCommand* cmd_histo,
+                            HistogramDistance* dist_histo) {
+  size_t pos = start_pos;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t j;
+    HistogramAddCommand(cmd_histo, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      HistogramAddLiteral(lit_histo, input[pos & mask]);
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
+      HistogramAddDistance(dist_histo, cmd.dist_prefix_ & 0x3FF);
+    }
+  }
+}
+
+static void StoreDataWithHuffmanCodes(const uint8_t* input,
+                                      size_t start_pos,
+                                      size_t mask,
+                                      const Command* commands,
+                                      size_t n_commands,
+                                      const uint8_t* lit_depth,
+                                      const uint16_t* lit_bits,
+                                      const uint8_t* cmd_depth,
+                                      const uint16_t* cmd_bits,
+                                      const uint8_t* dist_depth,
+                                      const uint16_t* dist_bits,
+                                      size_t* storage_ix,
+                                      uint8_t* storage) {
+  size_t pos = start_pos;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    const size_t cmd_code = cmd.cmd_prefix_;
+    size_t j;
+    BrotliWriteBits(
+        cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
+    StoreCommandExtra(&cmd, storage_ix, storage);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      const uint8_t literal = input[pos & mask];
+      BrotliWriteBits(
+          lit_depth[literal], lit_bits[literal], storage_ix, storage);
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
+      const size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+      const uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+      const uint32_t distextra = cmd.dist_extra_;
+      BrotliWriteBits(dist_depth[dist_code], dist_bits[dist_code],
+                      storage_ix, storage);
+      BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
+    }
+  }
+}
+
+void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage) {
+  HistogramLiteral lit_histo;
+  HistogramCommand cmd_histo;
+  HistogramDistance dist_histo;
+  uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+  uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+  HuffmanTree* tree;
+  uint32_t num_distance_symbols = params->dist.alphabet_size_max;
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  HistogramClearLiteral(&lit_histo);
+  HistogramClearCommand(&cmd_histo);
+  HistogramClearDistance(&dist_histo);
+
+  BuildHistograms(input, start_pos, mask, commands, n_commands,
+                  &lit_histo, &cmd_histo, &dist_histo);
+
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tree)) return;
+  BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS,
+                           BROTLI_NUM_LITERAL_SYMBOLS, tree,
+                           lit_depth, lit_bits,
+                           storage_ix, storage);
+  BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS,
+                           BROTLI_NUM_COMMAND_SYMBOLS, tree,
+                           cmd_depth, cmd_bits,
+                           storage_ix, storage);
+  BuildAndStoreHuffmanTree(dist_histo.data_, MAX_SIMPLE_DISTANCE_ALPHABET_SIZE,
+                           num_distance_symbols, tree,
+                           dist_depth, dist_bits,
+                           storage_ix, storage);
+  BROTLI_FREE(m, tree);
+  StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                            n_commands, lit_depth, lit_bits,
+                            cmd_depth, cmd_bits,
+                            dist_depth, dist_bits,
+                            storage_ix, storage);
+  if (is_last) {
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+void BrotliStoreMetaBlockFast(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage) {
+  uint32_t num_distance_symbols = params->dist.alphabet_size_max;
+  uint32_t distance_alphabet_bits =
+      Log2FloorNonZero(num_distance_symbols - 1) + 1;
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  if (n_commands <= 128) {
+    uint32_t histogram[BROTLI_NUM_LITERAL_SYMBOLS] = { 0 };
+    size_t pos = start_pos;
+    size_t num_literals = 0;
+    size_t i;
+    uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
+    uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
+    for (i = 0; i < n_commands; ++i) {
+      const Command cmd = commands[i];
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        ++histogram[input[pos & mask]];
+        ++pos;
+      }
+      num_literals += cmd.insert_len_;
+      pos += CommandCopyLen(&cmd);
+    }
+    BrotliBuildAndStoreHuffmanTreeFast(m, histogram, num_literals,
+                                       /* max_bits = */ 8,
+                                       lit_depth, lit_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    StoreStaticCommandHuffmanTree(storage_ix, storage);
+    StoreStaticDistanceHuffmanTree(storage_ix, storage);
+    StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                              n_commands, lit_depth, lit_bits,
+                              kStaticCommandCodeDepth,
+                              kStaticCommandCodeBits,
+                              kStaticDistanceCodeDepth,
+                              kStaticDistanceCodeBits,
+                              storage_ix, storage);
+  } else {
+    HistogramLiteral lit_histo;
+    HistogramCommand cmd_histo;
+    HistogramDistance dist_histo;
+    uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
+    uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
+    uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+    uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+    uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+    uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+    HistogramClearLiteral(&lit_histo);
+    HistogramClearCommand(&cmd_histo);
+    HistogramClearDistance(&dist_histo);
+    BuildHistograms(input, start_pos, mask, commands, n_commands,
+                    &lit_histo, &cmd_histo, &dist_histo);
+    BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo.data_,
+                                       lit_histo.total_count_,
+                                       /* max_bits = */ 8,
+                                       lit_depth, lit_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    BrotliBuildAndStoreHuffmanTreeFast(m, cmd_histo.data_,
+                                       cmd_histo.total_count_,
+                                       /* max_bits = */ 10,
+                                       cmd_depth, cmd_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    BrotliBuildAndStoreHuffmanTreeFast(m, dist_histo.data_,
+                                       dist_histo.total_count_,
+                                       /* max_bits = */
+                                       distance_alphabet_bits,
+                                       dist_depth, dist_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                              n_commands, lit_depth, lit_bits,
+                              cmd_depth, cmd_bits,
+                              dist_depth, dist_bits,
+                              storage_ix, storage);
+  }
+
+  if (is_last) {
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+/* This is for storing uncompressed blocks (simple raw storage of
+   bytes-as-bytes). */
+void BrotliStoreUncompressedMetaBlock(BROTLI_BOOL is_final_block,
+                                      const uint8_t* BROTLI_RESTRICT input,
+                                      size_t position, size_t mask,
+                                      size_t len,
+                                      size_t* BROTLI_RESTRICT storage_ix,
+                                      uint8_t* BROTLI_RESTRICT storage) {
+  size_t masked_pos = position & mask;
+  BrotliStoreUncompressedMetaBlockHeader(len, storage_ix, storage);
+  JumpToByteBoundary(storage_ix, storage);
+
+  if (masked_pos + len > mask + 1) {
+    size_t len1 = mask + 1 - masked_pos;
+    memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
+    *storage_ix += len1 << 3;
+    len -= len1;
+    masked_pos = 0;
+  }
+  memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
+  *storage_ix += len << 3;
+
+  /* We need to clear the next 4 bytes to continue to be
+     compatible with BrotliWriteBits. */
+  BrotliWriteBitsPrepareStorage(*storage_ix, storage);
+
+  /* Since the uncompressed block itself may not be the final block, add an
+     empty one after this. */
+  if (is_final_block) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.h
new file mode 100644
index 0000000000..2ed703bf79
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/brotli_bit_stream.h
@@ -0,0 +1,84 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to convert brotli-related data structures into the
+   brotli bit stream. The functions here operate under
+   assumption that there is enough space in the storage, i.e., there are
+   no out-of-range checks anywhere.
+
+   These functions do bit addressing into a byte array. The byte array
+   is called "storage" and the index to the bit is called storage_ix
+   in function arguments. */
+
+#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
+#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
+
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./entropy_encode.h"
+#include "./memory.h"
+#include "./metablock.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* All Store functions here will use a storage_ix, which is always the bit
+   position for the current storage. */
+
+BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
+    HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
+
+BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
+    MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
+    const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
+    uint8_t* storage);
+
+/* REQUIRES: length > 0 */
+/* REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+    const BrotliEncoderParams* params, ContextType literal_context_mode,
+    const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+    size_t* storage_ix, uint8_t* storage);
+
+/* Stores the meta-block without doing any block splitting, just collects
+   one histogram per block category and uses that for entropy coding.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage);
+
+/* Same as above, but uses static prefix codes for histograms with a only a few
+   symbols, and uses static code length prefix codes for all other histograms.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage);
+
+/* This is for storing uncompressed blocks (simple raw storage of
+   bytes-as-bytes).
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
+    BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input,
+    size_t position, size_t mask, size_t len,
+    size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c
new file mode 100644
index 0000000000..a20dfd385f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.c
@@ -0,0 +1,56 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for clustering similar histograms together. */
+
+#include "./cluster.h"
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./bit_cost.h"  /* BrotliPopulationCost */
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE BROTLI_BOOL HistogramPairIsLess(
+    const HistogramPair* p1, const HistogramPair* p2) {
+  if (p1->cost_diff != p2->cost_diff) {
+    return TO_BROTLI_BOOL(p1->cost_diff > p2->cost_diff);
+  }
+  return TO_BROTLI_BOOL((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1));
+}
+
+/* Returns entropy reduction of the context map when we combine two clusters. */
+static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
+  size_t size_c = size_a + size_b;
+  return (double)size_a * FastLog2(size_a) +
+    (double)size_b * FastLog2(size_b) -
+    (double)size_c * FastLog2(size_c);
+}
+
+#define CODE(X) X
+
+#define FN(X) X ## Literal
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#undef CODE
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.h
new file mode 100644
index 0000000000..bb26124d24
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster.h
@@ -0,0 +1,48 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for clustering similar histograms together. */
+
+#ifndef BROTLI_ENC_CLUSTER_H_
+#define BROTLI_ENC_CLUSTER_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./histogram.h"
+#include "./memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct HistogramPair {
+  uint32_t idx1;
+  uint32_t idx2;
+  double cost_combo;
+  double cost_diff;
+} HistogramPair;
+
+#define CODE(X) /* Declaration */;
+
+#define FN(X) X ## Literal
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#undef CODE
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_CLUSTER_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster_inc.h
new file mode 100644
index 0000000000..3d4f40e601
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/cluster_inc.h
@@ -0,0 +1,320 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, CODE */
+
+#define HistogramType FN(Histogram)
+
+/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+   it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
+BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
+    const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
+    uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
+    size_t* num_pairs) CODE({
+  BROTLI_BOOL is_good_pair = BROTLI_FALSE;
+  HistogramPair p;
+  p.idx1 = p.idx2 = 0;
+  p.cost_diff = p.cost_combo = 0;
+  if (idx1 == idx2) {
+    return;
+  }
+  if (idx2 < idx1) {
+    uint32_t t = idx2;
+    idx2 = idx1;
+    idx1 = t;
+  }
+  p.idx1 = idx1;
+  p.idx2 = idx2;
+  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+  p.cost_diff -= out[idx1].bit_cost_;
+  p.cost_diff -= out[idx2].bit_cost_;
+
+  if (out[idx1].total_count_ == 0) {
+    p.cost_combo = out[idx2].bit_cost_;
+    is_good_pair = BROTLI_TRUE;
+  } else if (out[idx2].total_count_ == 0) {
+    p.cost_combo = out[idx1].bit_cost_;
+    is_good_pair = BROTLI_TRUE;
+  } else {
+    double threshold = *num_pairs == 0 ? 1e99 :
+        BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
+    HistogramType combo = out[idx1];
+    double cost_combo;
+    FN(HistogramAddHistogram)(&combo, &out[idx2]);
+    cost_combo = FN(BrotliPopulationCost)(&combo);
+    if (cost_combo < threshold - p.cost_diff) {
+      p.cost_combo = cost_combo;
+      is_good_pair = BROTLI_TRUE;
+    }
+  }
+  if (is_good_pair) {
+    p.cost_diff += p.cost_combo;
+    if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
+      /* Replace the top of the queue if needed. */
+      if (*num_pairs < max_num_pairs) {
+        pairs[*num_pairs] = pairs[0];
+        ++(*num_pairs);
+      }
+      pairs[0] = p;
+    } else if (*num_pairs < max_num_pairs) {
+      pairs[*num_pairs] = p;
+      ++(*num_pairs);
+    }
+  }
+})
+
+BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
+                                                  uint32_t* cluster_size,
+                                                  uint32_t* symbols,
+                                                  uint32_t* clusters,
+                                                  HistogramPair* pairs,
+                                                  size_t num_clusters,
+                                                  size_t symbols_size,
+                                                  size_t max_clusters,
+                                                  size_t max_num_pairs) CODE({
+  double cost_diff_threshold = 0.0;
+  size_t min_cluster_size = 1;
+  size_t num_pairs = 0;
+
+  {
+    /* We maintain a vector of histogram pairs, with the property that the pair
+       with the maximum bit cost reduction is the first. */
+    size_t idx1;
+    for (idx1 = 0; idx1 < num_clusters; ++idx1) {
+      size_t idx2;
+      for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
+        FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
+            clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
+      }
+    }
+  }
+
+  while (num_clusters > min_cluster_size) {
+    uint32_t best_idx1;
+    uint32_t best_idx2;
+    size_t i;
+    if (pairs[0].cost_diff >= cost_diff_threshold) {
+      cost_diff_threshold = 1e99;
+      min_cluster_size = max_clusters;
+      continue;
+    }
+    /* Take the best pair from the top of heap. */
+    best_idx1 = pairs[0].idx1;
+    best_idx2 = pairs[0].idx2;
+    FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
+    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+    cluster_size[best_idx1] += cluster_size[best_idx2];
+    for (i = 0; i < symbols_size; ++i) {
+      if (symbols[i] == best_idx2) {
+        symbols[i] = best_idx1;
+      }
+    }
+    for (i = 0; i < num_clusters; ++i) {
+      if (clusters[i] == best_idx2) {
+        memmove(&clusters[i], &clusters[i + 1],
+                (num_clusters - i - 1) * sizeof(clusters[0]));
+        break;
+      }
+    }
+    --num_clusters;
+    {
+      /* Remove pairs intersecting the just combined best pair. */
+      size_t copy_to_idx = 0;
+      for (i = 0; i < num_pairs; ++i) {
+        HistogramPair* p = &pairs[i];
+        if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
+            p->idx1 == best_idx2 || p->idx2 == best_idx2) {
+          /* Remove invalid pair from the queue. */
+          continue;
+        }
+        if (HistogramPairIsLess(&pairs[0], p)) {
+          /* Replace the top of the queue if needed. */
+          HistogramPair front = pairs[0];
+          pairs[0] = *p;
+          pairs[copy_to_idx] = front;
+        } else {
+          pairs[copy_to_idx] = *p;
+        }
+        ++copy_to_idx;
+      }
+      num_pairs = copy_to_idx;
+    }
+
+    /* Push new pairs formed with the combined histogram to the heap. */
+    for (i = 0; i < num_clusters; ++i) {
+      FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
+                                      max_num_pairs, &pairs[0], &num_pairs);
+    }
+  }
+  return num_clusters;
+})
+
+/* What is the bit cost of moving histogram from cur_symbol to candidate. */
+BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
+    const HistogramType* histogram, const HistogramType* candidate) CODE({
+  if (histogram->total_count_ == 0) {
+    return 0.0;
+  } else {
+    HistogramType tmp = *histogram;
+    FN(HistogramAddHistogram)(&tmp, candidate);
+    return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
+  }
+})
+
+/* Find the best 'out' histogram for each of the 'in' histograms.
+   When called, clusters[0..num_clusters) contains the unique values from
+   symbols[0..in_size), but this property is not preserved in this function.
+   Note: we assume that out[]->bit_cost_ is already up-to-date. */
+BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
+    size_t in_size, const uint32_t* clusters, size_t num_clusters,
+    HistogramType* out, uint32_t* symbols) CODE({
+  size_t i;
+  for (i = 0; i < in_size; ++i) {
+    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
+    double best_bits =
+        FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
+    size_t j;
+    for (j = 0; j < num_clusters; ++j) {
+      const double cur_bits =
+          FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = clusters[j];
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  /* Recompute each out based on raw and symbols. */
+  for (i = 0; i < num_clusters; ++i) {
+    FN(HistogramClear)(&out[clusters[i]]);
+  }
+  for (i = 0; i < in_size; ++i) {
+    FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
+  }
+})
+
+/* Reorders elements of the out[0..length) array and changes values in
+   symbols[0..length) array in the following way:
+     * when called, symbols[] contains indexes into out[], and has N unique
+       values (possibly N < length)
+     * on return, symbols'[i] = f(symbols[i]) and
+                  out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
+       where f is a bijection between the range of symbols[] and [0..N), and
+       the first occurrences of values in symbols'[i] come in consecutive
+       increasing order.
+   Returns N, the number of unique values in symbols[]. */
+BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
+    HistogramType* out, uint32_t* symbols, size_t length) CODE({
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
+  uint32_t next_index;
+  HistogramType* tmp;
+  size_t i;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return 0;
+  for (i = 0; i < length; ++i) {
+      new_index[i] = kInvalidIndex;
+  }
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == kInvalidIndex) {
+      new_index[symbols[i]] = next_index;
+      ++next_index;
+    }
+  }
+  /* TODO: by using idea of "cycle-sort" we can avoid allocation of
+     tmp and reduce the number of copying by the factor of 2. */
+  tmp = BROTLI_ALLOC(m, HistogramType, next_index);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp)) return 0;
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == next_index) {
+      tmp[next_index] = out[symbols[i]];
+      ++next_index;
+    }
+    symbols[i] = new_index[symbols[i]];
+  }
+  BROTLI_FREE(m, new_index);
+  for (i = 0; i < next_index; ++i) {
+    out[i] = tmp[i];
+  }
+  BROTLI_FREE(m, tmp);
+  return next_index;
+})
+
+BROTLI_INTERNAL void FN(BrotliClusterHistograms)(
+    MemoryManager* m, const HistogramType* in, const size_t in_size,
+    size_t max_histograms, HistogramType* out, size_t* out_size,
+    uint32_t* histogram_symbols) CODE({
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
+  uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
+  size_t num_clusters = 0;
+  const size_t max_input_histograms = 64;
+  size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
+  /* For the first pass of clustering, we allow all pairs. */
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
+  size_t i;
+
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(cluster_size) ||
+      BROTLI_IS_NULL(clusters) || BROTLI_IS_NULL(pairs)) {
+    return;
+  }
+
+  for (i = 0; i < in_size; ++i) {
+    cluster_size[i] = 1;
+  }
+
+  for (i = 0; i < in_size; ++i) {
+    out[i] = in[i];
+    out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
+    histogram_symbols[i] = (uint32_t)i;
+  }
+
+  for (i = 0; i < in_size; i += max_input_histograms) {
+    size_t num_to_combine =
+        BROTLI_MIN(size_t, in_size - i, max_input_histograms);
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      clusters[num_clusters + j] = (uint32_t)(i + j);
+    }
+    num_new_clusters =
+        FN(BrotliHistogramCombine)(out, cluster_size,
+                                   &histogram_symbols[i],
+                                   &clusters[num_clusters], pairs,
+                                   num_to_combine, num_to_combine,
+                                   max_histograms, pairs_capacity);
+    num_clusters += num_new_clusters;
+  }
+
+  {
+    /* For the second pass, we limit the total number of histogram pairs.
+       After this limit is reached, we only keep searching for the best pair. */
+    size_t max_num_pairs = BROTLI_MIN(size_t,
+        64 * num_clusters, (num_clusters / 2) * num_clusters);
+    BROTLI_ENSURE_CAPACITY(
+        m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m)) return;
+
+    /* Collapse similar histograms. */
+    num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
+                                              histogram_symbols, clusters,
+                                              pairs, num_clusters, in_size,
+                                              max_histograms, max_num_pairs);
+  }
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+  /* Find the optimal map from original histograms to the final ones. */
+  FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
+                           out, histogram_symbols);
+  BROTLI_FREE(m, clusters);
+  /* Convert the context map to a canonical form. */
+  *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
+  if (BROTLI_IS_OOM(m)) return;
+})
+
+#undef HistogramType
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/command.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/command.h
new file mode 100644
index 0000000000..1aac85689b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/command.h
@@ -0,0 +1,190 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* This class models a sequence of literals and a backward reference copy. */
+
+#ifndef BROTLI_ENC_COMMAND_H_
+#define BROTLI_ENC_COMMAND_H_
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+#include "./params.h"
+#include "./prefix.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static uint32_t kInsBase[] =   { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
+    66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
+static uint32_t kInsExtra[] =  { 0, 0, 0, 0, 0, 0, 1, 1,  2,  2,  3,  3,  4,  4,
+    5,   5,   6,   7,   8,   9,   10,   12,   14,    24 };
+static uint32_t kCopyBase[] =  { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
+    38, 54,  70, 102, 134, 198, 326,   582, 1094,  2118 };
+static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0,  1,  1,  2,  2,  3,  3,
+     4,  4,   5,   5,   6,   7,   8,     9,   10,    24 };
+
+static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) {
+  if (insertlen < 6) {
+    return (uint16_t)insertlen;
+  } else if (insertlen < 130) {
+    uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u;
+    return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2);
+  } else if (insertlen < 2114) {
+    return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10);
+  } else if (insertlen < 6210) {
+    return 21u;
+  } else if (insertlen < 22594) {
+    return 22u;
+  } else {
+    return 23u;
+  }
+}
+
+static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) {
+  if (copylen < 10) {
+    return (uint16_t)(copylen - 2);
+  } else if (copylen < 134) {
+    uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u;
+    return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4);
+  } else if (copylen < 2118) {
+    return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12);
+  } else {
+    return 23u;
+  }
+}
+
+static BROTLI_INLINE uint16_t CombineLengthCodes(
+    uint16_t inscode, uint16_t copycode, BROTLI_BOOL use_last_distance) {
+  uint16_t bits64 =
+      (uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3u));
+  if (use_last_distance && inscode < 8u && copycode < 16u) {
+    return (copycode < 8u) ? bits64 : (bits64 | 64u);
+  } else {
+    /* Specification: 5 Encoding of ... (last table) */
+    /* offset = 2 * index, where index is in range [0..8] */
+    uint32_t offset = 2u * ((copycode >> 3u) + 3u * (inscode >> 3u));
+    /* All values in specification are K * 64,
+       where   K = [2, 3, 6, 4, 5, 8, 7, 9, 10],
+           i + 1 = [1, 2, 3, 4, 5, 6, 7, 8,  9],
+       K - i - 1 = [1, 1, 3, 0, 0, 2, 0, 1,  2] = D.
+       All values in D require only 2 bits to encode.
+       Magic constant is shifted 6 bits left, to avoid final multiplication. */
+    offset = (offset << 5u) + 0x40u + ((0x520D40u >> offset) & 0xC0u);
+    return (uint16_t)(offset | bits64);
+  }
+}
+
+static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen,
+                                        BROTLI_BOOL use_last_distance,
+                                        uint16_t* code) {
+  uint16_t inscode = GetInsertLengthCode(insertlen);
+  uint16_t copycode = GetCopyLengthCode(copylen);
+  *code = CombineLengthCodes(inscode, copycode, use_last_distance);
+}
+
+static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) {
+  return kInsBase[inscode];
+}
+
+static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) {
+  return kInsExtra[inscode];
+}
+
+static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) {
+  return kCopyBase[copycode];
+}
+
+static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
+  return kCopyExtra[copycode];
+}
+
+typedef struct Command {
+  uint32_t insert_len_;
+  /* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */
+  uint32_t copy_len_;
+  /* Stores distance extra bits. */
+  uint32_t dist_extra_;
+  uint16_t cmd_prefix_;
+  /* Stores distance code in low 10 bits
+     and number of extra bits in high 6 bits. */
+  uint16_t dist_prefix_;
+} Command;
+
+/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
+static BROTLI_INLINE void InitCommand(Command* self,
+    const BrotliDistanceParams* dist, size_t insertlen,
+    size_t copylen, int copylen_code_delta, size_t distance_code) {
+  /* Don't rely on signed int representation, use honest casts. */
+  uint32_t delta = (uint8_t)((int8_t)copylen_code_delta);
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = (uint32_t)(copylen | (delta << 25));
+  /* The distance prefix and extra bits are stored in this Command as if
+     npostfix and ndirect were 0, they are only recomputed later after the
+     clustering if needed. */
+  PrefixEncodeCopyDistance(
+      distance_code, dist->num_direct_distance_codes,
+      dist->distance_postfix_bits, &self->dist_prefix_, &self->dist_extra_);
+  GetLengthCode(
+      insertlen, (size_t)((int)copylen + copylen_code_delta),
+      TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_);
+}
+
+static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = 4 << 25;
+  self->dist_extra_ = 0;
+  self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES;
+  GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_);
+}
+
+static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(
+    const Command* self, const BrotliDistanceParams* dist) {
+  if ((self->dist_prefix_ & 0x3FFu) <
+      BROTLI_NUM_DISTANCE_SHORT_CODES + dist->num_direct_distance_codes) {
+    return self->dist_prefix_ & 0x3FFu;
+  } else {
+    uint32_t dcode = self->dist_prefix_ & 0x3FFu;
+    uint32_t nbits = self->dist_prefix_ >> 10;
+    uint32_t extra = self->dist_extra_;
+    uint32_t postfix_mask = (1U << dist->distance_postfix_bits) - 1U;
+    uint32_t hcode = (dcode - dist->num_direct_distance_codes -
+        BROTLI_NUM_DISTANCE_SHORT_CODES) >>
+        dist->distance_postfix_bits;
+    uint32_t lcode = (dcode - dist->num_direct_distance_codes -
+        BROTLI_NUM_DISTANCE_SHORT_CODES) & postfix_mask;
+    uint32_t offset = ((2U + (hcode & 1U)) << nbits) - 4U;
+    return ((offset + extra) << dist->distance_postfix_bits) + lcode +
+        dist->num_direct_distance_codes + BROTLI_NUM_DISTANCE_SHORT_CODES;
+  }
+}
+
+static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
+  uint32_t r = self->cmd_prefix_ >> 6;
+  uint32_t c = self->cmd_prefix_ & 7;
+  if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
+    return c;
+  }
+  return 3;
+}
+
+static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
+  return self->copy_len_ & 0x1FFFFFF;
+}
+
+static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
+  uint32_t modifier = self->copy_len_ >> 25;
+  int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1)));
+  return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_COMMAND_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c
new file mode 100644
index 0000000000..9e50b2098a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.c
@@ -0,0 +1,790 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses one-pass processing: when we find a backward
+   match, we immediately emit the corresponding command and literal codes to
+   the bit stream.
+
+   Adapted from the CompressFragment() function in
+   https://github.com/google/snappy/blob/master/snappy.cc */
+
+#include "./compress_fragment.h"
+
+#include <string.h>  /* memcmp, memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./brotli_bit_stream.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./memory.h"
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
+  const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
+  return (uint32_t)(h >> shift);
+}
+
+static BROTLI_INLINE uint32_t HashBytesAtOffset(
+    uint64_t v, int offset, size_t shift) {
+  BROTLI_DCHECK(offset >= 0);
+  BROTLI_DCHECK(offset <= 3);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
+  return TO_BROTLI_BOOL(
+      BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
+      p1[4] == p2[4]);
+}
+
+/* Builds a literal prefix code into "depths" and "bits" based on the statistics
+   of the "input" string and stores it into the bit stream.
+   Note that the prefix code here is built from the pre-LZ77 input, therefore
+   we can only approximate the statistics of the actual literal stream.
+   Moreover, for long inputs we build a histogram from a sample of the input
+   and thus have to assign a non-zero depth for each literal.
+   Returns estimated compression ratio millibytes/char for encoding given input
+   with generated code. */
+static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
+                                             const uint8_t* input,
+                                             const size_t input_size,
+                                             uint8_t depths[256],
+                                             uint16_t bits[256],
+                                             size_t* storage_ix,
+                                             uint8_t* storage) {
+  uint32_t histogram[256] = { 0 };
+  size_t histogram_total;
+  size_t i;
+  if (input_size < (1 << 15)) {
+    for (i = 0; i < input_size; ++i) {
+      ++histogram[input[i]];
+    }
+    histogram_total = input_size;
+    for (i = 0; i < 256; ++i) {
+      /* We weigh the first 11 samples with weight 3 to account for the
+         balancing effect of the LZ77 phase on the histogram. */
+      const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
+      histogram[i] += adjust;
+      histogram_total += adjust;
+    }
+  } else {
+    static const size_t kSampleRate = 29;
+    for (i = 0; i < input_size; i += kSampleRate) {
+      ++histogram[input[i]];
+    }
+    histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
+    for (i = 0; i < 256; ++i) {
+      /* We add 1 to each population count to avoid 0 bit depths (since this is
+         only a sample and we don't know if the symbol appears or not), and we
+         weigh the first 11 samples with weight 3 to account for the balancing
+         effect of the LZ77 phase on the histogram (more frequent symbols are
+         more likely to be in backward references instead as literals). */
+      const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
+      histogram[i] += adjust;
+      histogram_total += adjust;
+    }
+  }
+  BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
+                                     /* max_bits = */ 8,
+                                     depths, bits, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return 0;
+  {
+    size_t literal_ratio = 0;
+    for (i = 0; i < 256; ++i) {
+      if (histogram[i]) literal_ratio += histogram[i] * depths[i];
+    }
+    /* Estimated encoding ratio, millibytes per symbol. */
+    return (literal_ratio * 125) / histogram_total;
+  }
+}
+
+/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
+   "bits" based on "histogram" and stores it into the bit stream. */
+static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
+    uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
+    uint8_t* storage) {
+  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
+  HuffmanTree tree[129];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
+  uint16_t cmd_bits[64];
+
+  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  /* We have to jump through a few hoops here in order to compute
+     the command bits because the symbols are in a different order than in
+     the full alphabet. This looks complicated, but having the symbols
+     in this order in the command bits saves a few branches in the Emit*
+     functions. */
+  memcpy(cmd_depth, depth, 24);
+  memcpy(cmd_depth + 24, depth + 40, 8);
+  memcpy(cmd_depth + 32, depth + 24, 8);
+  memcpy(cmd_depth + 40, depth + 48, 8);
+  memcpy(cmd_depth + 48, depth + 32, 8);
+  memcpy(cmd_depth + 56, depth + 56, 8);
+  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  memcpy(bits, cmd_bits, 48);
+  memcpy(bits + 24, cmd_bits + 32, 16);
+  memcpy(bits + 32, cmd_bits + 48, 16);
+  memcpy(bits + 40, cmd_bits + 24, 16);
+  memcpy(bits + 48, cmd_bits + 40, 16);
+  memcpy(bits + 56, cmd_bits + 56, 16);
+  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  {
+    /* Create the bit length array for the full command alphabet. */
+    size_t i;
+    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
+    memcpy(cmd_depth, depth, 8);
+    memcpy(cmd_depth + 64, depth + 8, 8);
+    memcpy(cmd_depth + 128, depth + 16, 8);
+    memcpy(cmd_depth + 192, depth + 24, 8);
+    memcpy(cmd_depth + 384, depth + 32, 8);
+    for (i = 0; i < 8; ++i) {
+      cmd_depth[128 + 8 * i] = depth[40 + i];
+      cmd_depth[256 + 8 * i] = depth[48 + i];
+      cmd_depth[448 + 8 * i] = depth[56 + i];
+    }
+    BrotliStoreHuffmanTree(
+        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
+  }
+  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+}
+
+/* REQUIRES: insertlen < 6210 */
+static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
+                                        const uint8_t depth[128],
+                                        const uint16_t bits[128],
+                                        uint32_t histo[128],
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
+  if (insertlen < 6) {
+    const size_t code = insertlen + 40;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    ++histo[code];
+  } else if (insertlen < 130) {
+    const size_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
+    const size_t inscode = (nbits << 1) + prefix + 42;
+    BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
+    ++histo[inscode];
+  } else if (insertlen < 2114) {
+    const size_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 50;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
+    ++histo[code];
+  } else {
+    BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
+    BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
+    ++histo[61];
+  }
+}
+
+static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
+                                            const uint8_t depth[128],
+                                            const uint16_t bits[128],
+                                            uint32_t histo[128],
+                                            size_t* storage_ix,
+                                            uint8_t* storage) {
+  if (insertlen < 22594) {
+    BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
+    BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
+    ++histo[62];
+  } else {
+    BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
+    BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
+    ++histo[63];
+  }
+}
+
+static BROTLI_INLINE void EmitCopyLen(size_t copylen,
+                                      const uint8_t depth[128],
+                                      const uint16_t bits[128],
+                                      uint32_t histo[128],
+                                      size_t* storage_ix,
+                                      uint8_t* storage) {
+  if (copylen < 10) {
+    BrotliWriteBits(
+        depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
+    ++histo[copylen + 14];
+  } else if (copylen < 134) {
+    const size_t tail = copylen - 6;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 20;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
+    ++histo[code];
+  } else if (copylen < 2118) {
+    const size_t tail = copylen - 70;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 28;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
+    ++histo[code];
+  } else {
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
+    ++histo[39];
+  }
+}
+
+static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
+                                                  const uint8_t depth[128],
+                                                  const uint16_t bits[128],
+                                                  uint32_t histo[128],
+                                                  size_t* storage_ix,
+                                                  uint8_t* storage) {
+  if (copylen < 12) {
+    BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
+    ++histo[copylen - 4];
+  } else if (copylen < 72) {
+    const size_t tail = copylen - 8;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 4;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
+    ++histo[code];
+  } else if (copylen < 136) {
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 30;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(5, tail & 31, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[code];
+    ++histo[64];
+  } else if (copylen < 2120) {
+    const size_t tail = copylen - 72;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 28;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[code];
+    ++histo[64];
+  } else {
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[39];
+    ++histo[64];
+  }
+}
+
+static BROTLI_INLINE void EmitDistance(size_t distance,
+                                       const uint8_t depth[128],
+                                       const uint16_t bits[128],
+                                       uint32_t histo[128],
+                                       size_t* storage_ix, uint8_t* storage) {
+  const size_t d = distance + 3;
+  const uint32_t nbits = Log2FloorNonZero(d) - 1u;
+  const size_t prefix = (d >> nbits) & 1;
+  const size_t offset = (2 + prefix) << nbits;
+  const size_t distcode = 2 * (nbits - 1) + prefix + 80;
+  BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
+  BrotliWriteBits(nbits, d - offset, storage_ix, storage);
+  ++histo[distcode];
+}
+
+static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
+                                       const uint8_t depth[256],
+                                       const uint16_t bits[256],
+                                       size_t* storage_ix, uint8_t* storage) {
+  size_t j;
+  for (j = 0; j < len; j++) {
+    const uint8_t lit = input[j];
+    BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
+  }
+}
+
+/* REQUIRES: len <= 1 << 24. */
+static void BrotliStoreMetaBlockHeader(
+    size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
+    uint8_t* storage) {
+  size_t nibbles = 6;
+  /* ISLAST */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  if (len <= (1U << 16)) {
+    nibbles = 4;
+  } else if (len <= (1U << 20)) {
+    nibbles = 5;
+  }
+  BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
+  BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
+  /* ISUNCOMPRESSED */
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
+}
+
+static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
+    uint8_t* array) {
+  while (n_bits > 0) {
+    size_t byte_pos = pos >> 3;
+    size_t n_unchanged_bits = pos & 7;
+    size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
+    size_t total_bits = n_unchanged_bits + n_changed_bits;
+    uint32_t mask =
+        (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
+    uint32_t unchanged_bits = array[byte_pos] & mask;
+    uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
+    array[byte_pos] =
+        (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
+    n_bits -= n_changed_bits;
+    bits >>= n_changed_bits;
+    pos += n_changed_bits;
+  }
+}
+
+static void RewindBitPosition(const size_t new_storage_ix,
+                              size_t* storage_ix, uint8_t* storage) {
+  const size_t bitpos = new_storage_ix & 7;
+  const size_t mask = (1u << bitpos) - 1;
+  storage[new_storage_ix >> 3] &= (uint8_t)mask;
+  *storage_ix = new_storage_ix;
+}
+
+static BROTLI_BOOL ShouldMergeBlock(
+    const uint8_t* data, size_t len, const uint8_t* depths) {
+  size_t histo[256] = { 0 };
+  static const size_t kSampleRate = 43;
+  size_t i;
+  for (i = 0; i < len; i += kSampleRate) {
+    ++histo[data[i]];
+  }
+  {
+    const size_t total = (len + kSampleRate - 1) / kSampleRate;
+    double r = (FastLog2(total) + 0.5) * (double)total + 200;
+    for (i = 0; i < 256; ++i) {
+      r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
+    }
+    return TO_BROTLI_BOOL(r >= 0.0);
+  }
+}
+
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 980
+
+static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
+    const uint8_t* metablock_start, const uint8_t* next_emit,
+    const size_t insertlen, const size_t literal_ratio) {
+  const size_t compressed = (size_t)(next_emit - metablock_start);
+  if (compressed * 50 > insertlen) {
+    return BROTLI_FALSE;
+  } else {
+    return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
+  }
+}
+
+static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
+                                      const size_t storage_ix_start,
+                                      size_t* storage_ix, uint8_t* storage) {
+  const size_t len = (size_t)(end - begin);
+  RewindBitPosition(storage_ix_start, storage_ix, storage);
+  BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  memcpy(&storage[*storage_ix >> 3], begin, len);
+  *storage_ix += len << 3;
+  storage[*storage_ix >> 3] = 0;
+}
+
+static uint32_t kCmdHistoSeed[128] = {
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 0, 0, 0, 0,
+};
+
+static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
+    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
+    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
+    size_t* storage_ix, uint8_t* storage) {
+  uint32_t cmd_histo[128];
+  const uint8_t* ip_end;
+
+  /* "next_emit" is a pointer to the first byte that is not covered by a
+     previous copy. Bytes between "next_emit" and the start of the next copy or
+     the end of the input will be emitted as literal bytes. */
+  const uint8_t* next_emit = input;
+  /* Save the start of the first block for position and distance computations.
+  */
+  const uint8_t* base_ip = input;
+
+  static const size_t kFirstBlockSize = 3 << 15;
+  static const size_t kMergeBlockSize = 1 << 16;
+
+  const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
+  const size_t kMinMatchLen = 5;
+
+  const uint8_t* metablock_start = input;
+  size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
+  size_t total_block_size = block_size;
+  /* Save the bit position of the MLEN field of the meta-block header, so that
+     we can update it later if we decide to extend this meta-block. */
+  size_t mlen_storage_ix = *storage_ix + 3;
+
+  uint8_t lit_depth[256];
+  uint16_t lit_bits[256];
+
+  size_t literal_ratio;
+
+  const uint8_t* ip;
+  int last_distance;
+
+  const size_t shift = 64u - table_bits;
+
+  BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+  /* No block splits, no contexts. */
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  literal_ratio = BuildAndStoreLiteralPrefixCode(
+      m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+
+  {
+    /* Store the pre-compressed command and distance prefix codes. */
+    size_t i;
+    for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
+      BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
+    }
+  }
+  BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
+                  storage_ix, storage);
+
+ emit_commands:
+  /* Initialize the command and distance histograms. We will gather
+     statistics of command and distance codes during the processing
+     of this block and use it to update the command and distance
+     prefix codes for the next block. */
+  memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
+
+  /* "ip" is the input pointer. */
+  ip = input;
+  last_distance = -1;
+  ip_end = input + block_size;
+
+  if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
+    /* For the last block, we need to keep a 16 bytes margin so that we can be
+       sure that all distances are at most window size - 16.
+       For all other blocks, we only need to keep a margin of 5 bytes so that
+       we don't go over the block size with a copy. */
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
+                                        input_size - kInputMarginBytes);
+    const uint8_t* ip_limit = input + len_limit;
+
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift); ; ) {
+      /* Step 1: Scan forward in the input looking for a 5-byte-long match.
+         If we get close to exhausting the input then goto emit_remainder.
+
+         Heuristic match skipping: If 32 bytes are scanned with no matches
+         found, start looking only at every other byte. If 32 more bytes are
+         scanned, look at every third byte, etc.. When a match is found,
+         immediately go back to looking at every byte. This is a small loss
+         (~5% performance, ~0.1% density) for compressible data due to more
+         bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+         win since the compressor quickly "realizes" the data is incompressible
+         and doesn't bother looking for matches everywhere.
+
+         The "skip" variable keeps track of how many bytes there are since the
+         last match; dividing it by 32 (i.e. right-shifting by five) gives the
+         number of bytes to move ahead for each iteration. */
+      uint32_t skip = 32;
+
+      const uint8_t* next_ip = ip;
+      const uint8_t* candidate;
+      BROTLI_DCHECK(next_emit < ip);
+trawl:
+      do {
+        uint32_t hash = next_hash;
+        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        BROTLI_DCHECK(hash == Hash(next_ip, shift));
+        ip = next_ip;
+        next_ip = ip + bytes_between_hash_lookups;
+        if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
+          goto emit_remainder;
+        }
+        next_hash = Hash(next_ip, shift);
+        candidate = ip - last_distance;
+        if (IsMatch(ip, candidate)) {
+          if (BROTLI_PREDICT_TRUE(candidate < ip)) {
+            table[hash] = (int)(ip - base_ip);
+            break;
+          }
+        }
+        candidate = base_ip + table[hash];
+        BROTLI_DCHECK(candidate >= base_ip);
+        BROTLI_DCHECK(candidate < ip);
+
+        table[hash] = (int)(ip - base_ip);
+      } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
+
+      /* Check copy distance. If candidate is not feasible, continue search.
+         Checking is done outside of hot loop to reduce overhead. */
+      if (ip - candidate > MAX_DISTANCE) goto trawl;
+
+      /* Step 2: Emit the found match together with the literal bytes from
+         "next_emit" to the bit stream, and then see if we can find a next match
+         immediately afterwards. Repeat until we find no match for the input
+         without emitting some literal bytes. */
+
+      {
+        /* We have a 5-byte match at ip, and we need to emit bytes in
+           [next_emit, ip). */
+        const uint8_t* base = ip;
+        size_t matched = 5 + FindMatchLengthWithLimit(
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
+        int distance = (int)(base - candidate);  /* > 0 */
+        size_t insert = (size_t)(base - next_emit);
+        ip += matched;
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        if (BROTLI_PREDICT_TRUE(insert < 6210)) {
+          EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                        storage_ix, storage);
+        } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
+                                             literal_ratio)) {
+          EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
+                                    storage_ix, storage);
+          input_size -= (size_t)(base - input);
+          input = base;
+          next_emit = input;
+          goto next_block;
+        } else {
+          EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                            storage_ix, storage);
+        }
+        EmitLiterals(next_emit, insert, lit_depth, lit_bits,
+                     storage_ix, storage);
+        if (distance == last_distance) {
+          BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
+          ++cmd_histo[64];
+        } else {
+          EmitDistance((size_t)distance, cmd_depth, cmd_bits,
+                       cmd_histo, storage_ix, storage);
+          last_distance = distance;
+        }
+        EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
+                                storage_ix, storage);
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        /* We could immediately start working at ip now, but to improve
+           compression we first update "table" with the hashes of some positions
+           within the last copy. */
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      while (IsMatch(ip, candidate)) {
+        /* We have a 5-byte match at ip, and no need to emit any literal bytes
+           prior to ip. */
+        const uint8_t* base = ip;
+        size_t matched = 5 + FindMatchLengthWithLimit(
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
+        if (ip - candidate > MAX_DISTANCE) break;
+        ip += matched;
+        last_distance = (int)(base - candidate);  /* > 0 */
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
+                    storage_ix, storage);
+        EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
+                     cmd_histo, storage_ix, storage);
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        /* We could immediately start working at ip now, but to improve
+           compression we first update "table" with the hashes of some positions
+           within the last copy. */
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      next_hash = Hash(++ip, shift);
+    }
+  }
+
+ emit_remainder:
+  BROTLI_DCHECK(next_emit <= ip_end);
+  input += block_size;
+  input_size -= block_size;
+  block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
+
+  /* Decide if we want to continue this meta-block instead of emitting the
+     last insert-only command. */
+  if (input_size > 0 &&
+      total_block_size + block_size <= (1 << 20) &&
+      ShouldMergeBlock(input, block_size, lit_depth)) {
+    BROTLI_DCHECK(total_block_size > (1 << 16));
+    /* Update the size of the current meta-block and continue emitting commands.
+       We can do this because the current size and the new size both have 5
+       nibbles. */
+    total_block_size += block_size;
+    UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
+    goto emit_commands;
+  }
+
+  /* Emit the remaining bytes as literals. */
+  if (next_emit < ip_end) {
+    const size_t insert = (size_t)(ip_end - next_emit);
+    if (BROTLI_PREDICT_TRUE(insert < 6210)) {
+      EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                    storage_ix, storage);
+      EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
+    } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
+                                         literal_ratio)) {
+      EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
+                                storage_ix, storage);
+    } else {
+      EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                        storage_ix, storage);
+      EmitLiterals(next_emit, insert, lit_depth, lit_bits,
+                   storage_ix, storage);
+    }
+  }
+  next_emit = ip_end;
+
+next_block:
+  /* If we have more data, write a new meta-block header and prefix codes and
+     then continue emitting commands. */
+  if (input_size > 0) {
+    metablock_start = input;
+    block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
+    total_block_size = block_size;
+    /* Save the bit position of the MLEN field of the meta-block header, so that
+       we can update it later if we decide to extend this meta-block. */
+    mlen_storage_ix = *storage_ix + 3;
+    BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+    /* No block splits, no contexts. */
+    BrotliWriteBits(13, 0, storage_ix, storage);
+    literal_ratio = BuildAndStoreLiteralPrefixCode(
+        m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
+                                   storage_ix, storage);
+    goto emit_commands;
+  }
+
+  if (!is_last) {
+    /* If this is not the last block, update the command and distance prefix
+       codes for the next block and store the compressed forms. */
+    cmd_code[0] = 0;
+    *cmd_code_numbits = 0;
+    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
+                                   cmd_code_numbits, cmd_code);
+  }
+}
+
+#define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
+
+#define BAKE_METHOD_PARAM_(B) \
+static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
+    MemoryManager* m, const uint8_t* input, size_t input_size,               \
+    BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128],                 \
+    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,     \
+    size_t* storage_ix, uint8_t* storage) {                                  \
+  BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B,    \
+      cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
+}
+FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
+#undef BAKE_METHOD_PARAM_
+
+void BrotliCompressFragmentFast(
+    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
+    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
+    size_t* storage_ix, uint8_t* storage) {
+  const size_t initial_storage_ix = *storage_ix;
+  const size_t table_bits = Log2FloorNonZero(table_size);
+
+  if (input_size == 0) {
+    BROTLI_DCHECK(is_last);
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+    return;
+  }
+
+  switch (table_bits) {
+#define CASE_(B)                                                     \
+    case B:                                                          \
+      BrotliCompressFragmentFastImpl ## B(                           \
+          m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
+          cmd_code_numbits, cmd_code, storage_ix, storage);          \
+      break;
+    FOR_TABLE_BITS_(CASE_)
+#undef CASE_
+    default: BROTLI_DCHECK(0); break;
+  }
+
+  /* If output is larger than single uncompressed block, rewrite it. */
+  if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
+    EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
+                              storage_ix, storage);
+  }
+
+  if (is_last) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+  }
+}
+
+#undef FOR_TABLE_BITS_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.h
new file mode 100644
index 0000000000..80007f5dca
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment.h
@@ -0,0 +1,61 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses one-pass processing: when we find a backward
+   match, we immediately emit the corresponding command and literal codes to
+   the bit stream. */
+
+#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
+#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Compresses "input" string to the "*storage" buffer as one or more complete
+   meta-blocks, and updates the "*storage_ix" bit position.
+
+   If "is_last" is 1, emits an additional empty last meta-block.
+
+   "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
+   (see comment in encode.h) used for the encoding of this input fragment.
+   If "is_last" is 0, they are updated to reflect the statistics
+   of this input fragment, to be used for the encoding of the next fragment.
+
+   "*cmd_code_numbits" is the number of bits of the compressed representation
+   of the command and distance prefix codes, and "cmd_code" is an array of
+   at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
+   command and distance prefix codes. If "is_last" is 0, these are also
+   updated to represent the updated "cmd_depth" and "cmd_bits".
+
+   REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
+   REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
+   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+   REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
+   OUTPUT: maximal copy distance <= |input_size|
+   OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
+BROTLI_INTERNAL void BrotliCompressFragmentFast(MemoryManager* m,
+                                                const uint8_t* input,
+                                                size_t input_size,
+                                                BROTLI_BOOL is_last,
+                                                int* table, size_t table_size,
+                                                uint8_t cmd_depth[128],
+                                                uint16_t cmd_bits[128],
+                                                size_t* cmd_code_numbits,
+                                                uint8_t* cmd_code,
+                                                size_t* storage_ix,
+                                                uint8_t* storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c
new file mode 100644
index 0000000000..f8a5606384
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.c
@@ -0,0 +1,645 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses two-pass processing: in the first pass we save
+   the found backward matches and literal bytes into a buffer, and in the
+   second pass we emit them into the bit stream using prefix codes built based
+   on the actual command and literal byte histograms. */
+
+#include "./compress_fragment_two_pass.h"
+
+#include <string.h>  /* memcmp, memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./bit_cost.h"
+#include "./brotli_bit_stream.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./memory.h"
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p,
+    size_t shift, size_t length) {
+  const uint64_t h =
+      (BROTLI_UNALIGNED_LOAD64LE(p) << ((8 - length) * 8)) * kHashMul32;
+  return (uint32_t)(h >> shift);
+}
+
+static BROTLI_INLINE uint32_t HashBytesAtOffset(uint64_t v, size_t offset,
+    size_t shift, size_t length) {
+  BROTLI_DCHECK(offset <= 8 - length);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << ((8 - length) * 8)) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2,
+    size_t length) {
+  if (BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2)) {
+    if (length == 4) return BROTLI_TRUE;
+    return TO_BROTLI_BOOL(p1[4] == p2[4] && p1[5] == p2[5]);
+  }
+  return BROTLI_FALSE;
+}
+
+/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
+   "bits" based on "histogram" and stores it into the bit stream. */
+static void BuildAndStoreCommandPrefixCode(
+    const uint32_t histogram[128],
+    uint8_t depth[128], uint16_t bits[128],
+    size_t* storage_ix, uint8_t* storage) {
+  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
+  HuffmanTree tree[129];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
+  uint16_t cmd_bits[64];
+  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  /* We have to jump through a few hoops here in order to compute
+     the command bits because the symbols are in a different order than in
+     the full alphabet. This looks complicated, but having the symbols
+     in this order in the command bits saves a few branches in the Emit*
+     functions. */
+  memcpy(cmd_depth, depth + 24, 24);
+  memcpy(cmd_depth + 24, depth, 8);
+  memcpy(cmd_depth + 32, depth + 48, 8);
+  memcpy(cmd_depth + 40, depth + 8, 8);
+  memcpy(cmd_depth + 48, depth + 56, 8);
+  memcpy(cmd_depth + 56, depth + 16, 8);
+  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  memcpy(bits, cmd_bits + 24, 16);
+  memcpy(bits + 8, cmd_bits + 40, 16);
+  memcpy(bits + 16, cmd_bits + 56, 16);
+  memcpy(bits + 24, cmd_bits, 48);
+  memcpy(bits + 48, cmd_bits + 32, 16);
+  memcpy(bits + 56, cmd_bits + 48, 16);
+  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  {
+    /* Create the bit length array for the full command alphabet. */
+    size_t i;
+    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
+    memcpy(cmd_depth, depth + 24, 8);
+    memcpy(cmd_depth + 64, depth + 32, 8);
+    memcpy(cmd_depth + 128, depth + 40, 8);
+    memcpy(cmd_depth + 192, depth + 48, 8);
+    memcpy(cmd_depth + 384, depth + 56, 8);
+    for (i = 0; i < 8; ++i) {
+      cmd_depth[128 + 8 * i] = depth[i];
+      cmd_depth[256 + 8 * i] = depth[8 + i];
+      cmd_depth[448 + 8 * i] = depth[16 + i];
+    }
+    BrotliStoreHuffmanTree(
+        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
+  }
+  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+}
+
+static BROTLI_INLINE void EmitInsertLen(
+    uint32_t insertlen, uint32_t** commands) {
+  if (insertlen < 6) {
+    **commands = insertlen;
+  } else if (insertlen < 130) {
+    const uint32_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const uint32_t prefix = tail >> nbits;
+    const uint32_t inscode = (nbits << 1) + prefix + 2;
+    const uint32_t extra = tail - (prefix << nbits);
+    **commands = inscode | (extra << 8);
+  } else if (insertlen < 2114) {
+    const uint32_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const uint32_t code = nbits + 10;
+    const uint32_t extra = tail - (1u << nbits);
+    **commands = code | (extra << 8);
+  } else if (insertlen < 6210) {
+    const uint32_t extra = insertlen - 2114;
+    **commands = 21 | (extra << 8);
+  } else if (insertlen < 22594) {
+    const uint32_t extra = insertlen - 6210;
+    **commands = 22 | (extra << 8);
+  } else {
+    const uint32_t extra = insertlen - 22594;
+    **commands = 23 | (extra << 8);
+  }
+  ++(*commands);
+}
+
+static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
+  if (copylen < 10) {
+    **commands = (uint32_t)(copylen + 38);
+  } else if (copylen < 134) {
+    const size_t tail = copylen - 6;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 44;
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+  } else if (copylen < 2118) {
+    const size_t tail = copylen - 70;
+    const size_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 52;
+    const size_t extra = tail - ((size_t)1 << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+  } else {
+    const size_t extra = copylen - 2118;
+    **commands = (uint32_t)(63 | (extra << 8));
+  }
+  ++(*commands);
+}
+
+static BROTLI_INLINE void EmitCopyLenLastDistance(
+    size_t copylen, uint32_t** commands) {
+  if (copylen < 12) {
+    **commands = (uint32_t)(copylen + 20);
+    ++(*commands);
+  } else if (copylen < 72) {
+    const size_t tail = copylen - 8;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 28;
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+    ++(*commands);
+  } else if (copylen < 136) {
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 54;
+    const size_t extra = tail & 31;
+    **commands = (uint32_t)(code | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  } else if (copylen < 2120) {
+    const size_t tail = copylen - 72;
+    const size_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 52;
+    const size_t extra = tail - ((size_t)1 << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  } else {
+    const size_t extra = copylen - 2120;
+    **commands = (uint32_t)(63 | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  }
+}
+
+static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
+  uint32_t d = distance + 3;
+  uint32_t nbits = Log2FloorNonZero(d) - 1;
+  const uint32_t prefix = (d >> nbits) & 1;
+  const uint32_t offset = (2 + prefix) << nbits;
+  const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
+  uint32_t extra = d - offset;
+  **commands = distcode | (extra << 8);
+  ++(*commands);
+}
+
+/* REQUIRES: len <= 1 << 24. */
+static void BrotliStoreMetaBlockHeader(
+    size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
+    uint8_t* storage) {
+  size_t nibbles = 6;
+  /* ISLAST */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  if (len <= (1U << 16)) {
+    nibbles = 4;
+  } else if (len <= (1U << 20)) {
+    nibbles = 5;
+  }
+  BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
+  BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
+  /* ISUNCOMPRESSED */
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
+}
+
+static BROTLI_INLINE void CreateCommands(const uint8_t* input,
+    size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
+    size_t table_bits, size_t min_match,
+    uint8_t** literals, uint32_t** commands) {
+  /* "ip" is the input pointer. */
+  const uint8_t* ip = input;
+  const size_t shift = 64u - table_bits;
+  const uint8_t* ip_end = input + block_size;
+  /* "next_emit" is a pointer to the first byte that is not covered by a
+     previous copy. Bytes between "next_emit" and the start of the next copy or
+     the end of the input will be emitted as literal bytes. */
+  const uint8_t* next_emit = input;
+
+  int last_distance = -1;
+  const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
+
+  if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
+    /* For the last block, we need to keep a 16 bytes margin so that we can be
+       sure that all distances are at most window size - 16.
+       For all other blocks, we only need to keep a margin of 5 bytes so that
+       we don't go over the block size with a copy. */
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - min_match,
+                                        input_size - kInputMarginBytes);
+    const uint8_t* ip_limit = input + len_limit;
+
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift, min_match); ; ) {
+      /* Step 1: Scan forward in the input looking for a 6-byte-long match.
+         If we get close to exhausting the input then goto emit_remainder.
+
+         Heuristic match skipping: If 32 bytes are scanned with no matches
+         found, start looking only at every other byte. If 32 more bytes are
+         scanned, look at every third byte, etc.. When a match is found,
+         immediately go back to looking at every byte. This is a small loss
+         (~5% performance, ~0.1% density) for compressible data due to more
+         bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+         win since the compressor quickly "realizes" the data is incompressible
+         and doesn't bother looking for matches everywhere.
+
+         The "skip" variable keeps track of how many bytes there are since the
+         last match; dividing it by 32 (ie. right-shifting by five) gives the
+         number of bytes to move ahead for each iteration. */
+      uint32_t skip = 32;
+
+      const uint8_t* next_ip = ip;
+      const uint8_t* candidate;
+
+      BROTLI_DCHECK(next_emit < ip);
+trawl:
+      do {
+        uint32_t hash = next_hash;
+        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        ip = next_ip;
+        BROTLI_DCHECK(hash == Hash(ip, shift, min_match));
+        next_ip = ip + bytes_between_hash_lookups;
+        if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
+          goto emit_remainder;
+        }
+        next_hash = Hash(next_ip, shift, min_match);
+        candidate = ip - last_distance;
+        if (IsMatch(ip, candidate, min_match)) {
+          if (BROTLI_PREDICT_TRUE(candidate < ip)) {
+            table[hash] = (int)(ip - base_ip);
+            break;
+          }
+        }
+        candidate = base_ip + table[hash];
+        BROTLI_DCHECK(candidate >= base_ip);
+        BROTLI_DCHECK(candidate < ip);
+
+        table[hash] = (int)(ip - base_ip);
+      } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate, min_match)));
+
+      /* Check copy distance. If candidate is not feasible, continue search.
+         Checking is done outside of hot loop to reduce overhead. */
+      if (ip - candidate > MAX_DISTANCE) goto trawl;
+
+      /* Step 2: Emit the found match together with the literal bytes from
+         "next_emit", and then see if we can find a next match immediately
+         afterwards. Repeat until we find no match for the input
+         without emitting some literal bytes. */
+
+      {
+        /* We have a 6-byte match at ip, and we need to emit bytes in
+           [next_emit, ip). */
+        const uint8_t* base = ip;
+        size_t matched = min_match + FindMatchLengthWithLimit(
+            candidate + min_match, ip + min_match,
+            (size_t)(ip_end - ip) - min_match);
+        int distance = (int)(base - candidate);  /* > 0 */
+        int insert = (int)(base - next_emit);
+        ip += matched;
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        EmitInsertLen((uint32_t)insert, commands);
+        memcpy(*literals, next_emit, (size_t)insert);
+        *literals += insert;
+        if (distance == last_distance) {
+          **commands = 64;
+          ++(*commands);
+        } else {
+          EmitDistance((uint32_t)distance, commands);
+          last_distance = distance;
+        }
+        EmitCopyLenLastDistance(matched, commands);
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        {
+          /* We could immediately start working at ip now, but to improve
+             compression we first update "table" with the hashes of some
+             positions within the last copy. */
+          uint64_t input_bytes;
+          uint32_t cur_hash;
+          uint32_t prev_hash;
+          if (min_match == 4) {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+            cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          } else {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 4);
+            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
+            cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          }
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      while (ip - candidate <= MAX_DISTANCE &&
+          IsMatch(ip, candidate, min_match)) {
+        /* We have a 6-byte match at ip, and no need to emit any
+           literal bytes prior to ip. */
+        const uint8_t* base = ip;
+        size_t matched = min_match + FindMatchLengthWithLimit(
+            candidate + min_match, ip + min_match,
+            (size_t)(ip_end - ip) - min_match);
+        ip += matched;
+        last_distance = (int)(base - candidate);  /* > 0 */
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        EmitCopyLen(matched, commands);
+        EmitDistance((uint32_t)last_distance, commands);
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        {
+          /* We could immediately start working at ip now, but to improve
+             compression we first update "table" with the hashes of some
+             positions within the last copy. */
+          uint64_t input_bytes;
+          uint32_t cur_hash;
+          uint32_t prev_hash;
+          if (min_match == 4) {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+            cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          } else {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 4);
+            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
+            cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          }
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      next_hash = Hash(++ip, shift, min_match);
+    }
+  }
+
+emit_remainder:
+  BROTLI_DCHECK(next_emit <= ip_end);
+  /* Emit the remaining bytes as literals. */
+  if (next_emit < ip_end) {
+    const uint32_t insert = (uint32_t)(ip_end - next_emit);
+    EmitInsertLen(insert, commands);
+    memcpy(*literals, next_emit, insert);
+    *literals += insert;
+  }
+}
+
+static void StoreCommands(MemoryManager* m,
+                          const uint8_t* literals, const size_t num_literals,
+                          const uint32_t* commands, const size_t num_commands,
+                          size_t* storage_ix, uint8_t* storage) {
+  static const uint32_t kNumExtraBits[128] = {
+    0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+    9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
+    17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
+  };
+  static const uint32_t kInsertOffset[24] = {
+    0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
+    1090, 2114, 6210, 22594,
+  };
+
+  uint8_t lit_depths[256];
+  uint16_t lit_bits[256];
+  uint32_t lit_histo[256] = { 0 };
+  uint8_t cmd_depths[128] = { 0 };
+  uint16_t cmd_bits[128] = { 0 };
+  uint32_t cmd_histo[128] = { 0 };
+  size_t i;
+  for (i = 0; i < num_literals; ++i) {
+    ++lit_histo[literals[i]];
+  }
+  BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
+                                     /* max_bits = */ 8,
+                                     lit_depths, lit_bits,
+                                     storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < num_commands; ++i) {
+    const uint32_t code = commands[i] & 0xFF;
+    BROTLI_DCHECK(code < 128);
+    ++cmd_histo[code];
+  }
+  cmd_histo[1] += 1;
+  cmd_histo[2] += 1;
+  cmd_histo[64] += 1;
+  cmd_histo[84] += 1;
+  BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
+                                 storage_ix, storage);
+
+  for (i = 0; i < num_commands; ++i) {
+    const uint32_t cmd = commands[i];
+    const uint32_t code = cmd & 0xFF;
+    const uint32_t extra = cmd >> 8;
+    BROTLI_DCHECK(code < 128);
+    BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
+    BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
+    if (code < 24) {
+      const uint32_t insert = kInsertOffset[code] + extra;
+      uint32_t j;
+      for (j = 0; j < insert; ++j) {
+        const uint8_t lit = *literals;
+        BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
+        ++literals;
+      }
+    }
+  }
+}
+
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 0.98
+#define SAMPLE_RATE 43
+
+static BROTLI_BOOL ShouldCompress(
+    const uint8_t* input, size_t input_size, size_t num_literals) {
+  double corpus_size = (double)input_size;
+  if (num_literals < MIN_RATIO * corpus_size) {
+    return BROTLI_TRUE;
+  } else {
+    uint32_t literal_histo[256] = { 0 };
+    const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
+    size_t i;
+    for (i = 0; i < input_size; i += SAMPLE_RATE) {
+      ++literal_histo[input[i]];
+    }
+    return TO_BROTLI_BOOL(BitsEntropy(literal_histo, 256) < max_total_bit_cost);
+  }
+}
+
+static void RewindBitPosition(const size_t new_storage_ix,
+                              size_t* storage_ix, uint8_t* storage) {
+  const size_t bitpos = new_storage_ix & 7;
+  const size_t mask = (1u << bitpos) - 1;
+  storage[new_storage_ix >> 3] &= (uint8_t)mask;
+  *storage_ix = new_storage_ix;
+}
+
+static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
+                                      size_t* storage_ix, uint8_t* storage) {
+  BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  memcpy(&storage[*storage_ix >> 3], input, input_size);
+  *storage_ix += input_size << 3;
+  storage[*storage_ix >> 3] = 0;
+}
+
+static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
+    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
+    int* table, size_t table_bits, size_t min_match,
+    size_t* storage_ix, uint8_t* storage) {
+  /* Save the start of the first block for position and distance computations.
+  */
+  const uint8_t* base_ip = input;
+  BROTLI_UNUSED(is_last);
+
+  while (input_size > 0) {
+    size_t block_size =
+        BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
+    uint32_t* commands = command_buf;
+    uint8_t* literals = literal_buf;
+    size_t num_literals;
+    CreateCommands(input, block_size, input_size, base_ip, table,
+                   table_bits, min_match, &literals, &commands);
+    num_literals = (size_t)(literals - literal_buf);
+    if (ShouldCompress(input, block_size, num_literals)) {
+      const size_t num_commands = (size_t)(commands - command_buf);
+      BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+      /* No block splits, no contexts. */
+      BrotliWriteBits(13, 0, storage_ix, storage);
+      StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
+                    storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return;
+    } else {
+      /* Since we did not find many backward references and the entropy of
+         the data is close to 8 bits, we can simply emit an uncompressed block.
+         This makes compression speed of uncompressible data about 3x faster. */
+      EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
+    }
+    input += block_size;
+    input_size -= block_size;
+  }
+}
+
+#define FOR_TABLE_BITS_(X) \
+  X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
+
+#define BAKE_METHOD_PARAM_(B)                                                  \
+static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B(            \
+    MemoryManager* m, const uint8_t* input, size_t input_size,                 \
+    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,          \
+    int* table, size_t* storage_ix, uint8_t* storage) {                        \
+  size_t min_match = (B <= 15) ? 4 : 6;                                        \
+  BrotliCompressFragmentTwoPassImpl(m, input, input_size, is_last, command_buf,\
+      literal_buf, table, B, min_match, storage_ix, storage);                  \
+}
+FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
+#undef BAKE_METHOD_PARAM_
+
+void BrotliCompressFragmentTwoPass(
+    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
+    int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
+  const size_t initial_storage_ix = *storage_ix;
+  const size_t table_bits = Log2FloorNonZero(table_size);
+  switch (table_bits) {
+#define CASE_(B)                                      \
+    case B:                                           \
+      BrotliCompressFragmentTwoPassImpl ## B(         \
+          m, input, input_size, is_last, command_buf, \
+          literal_buf, table, storage_ix, storage);   \
+      break;
+    FOR_TABLE_BITS_(CASE_)
+#undef CASE_
+    default: BROTLI_DCHECK(0); break;
+  }
+
+  /* If output is larger than single uncompressed block, rewrite it. */
+  if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
+    RewindBitPosition(initial_storage_ix, storage_ix, storage);
+    EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
+  }
+
+  if (is_last) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+  }
+}
+
+#undef FOR_TABLE_BITS_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.h
new file mode 100644
index 0000000000..928677df42
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/compress_fragment_two_pass.h
@@ -0,0 +1,54 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses two-pass processing: in the first pass we save
+   the found backward matches and literal bytes into a buffer, and in the
+   second pass we emit them into the bit stream using prefix codes built based
+   on the actual command and literal byte histograms. */
+
+#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
+#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
+
+/* Compresses "input" string to the "*storage" buffer as one or more complete
+   meta-blocks, and updates the "*storage_ix" bit position.
+
+   If "is_last" is 1, emits an additional empty last meta-block.
+
+   REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
+   REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
+   REQUIRES: "command_buf" and "literal_buf" point to at least
+              kCompressFragmentTwoPassBlockSize long arrays.
+   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+   REQUIRES: "table_size" is a power of two
+   OUTPUT: maximal copy distance <= |input_size|
+   OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
+BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(MemoryManager* m,
+                                                   const uint8_t* input,
+                                                   size_t input_size,
+                                                   BROTLI_BOOL is_last,
+                                                   uint32_t* command_buf,
+                                                   uint8_t* literal_buf,
+                                                   int* table,
+                                                   size_t table_size,
+                                                   size_t* storage_ix,
+                                                   uint8_t* storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c
new file mode 100644
index 0000000000..16d853fe5a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.c
@@ -0,0 +1,1846 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Hash table on the 4-byte prefixes of static dictionary words. */
+
+#include "../common/platform.h"
+#include "./dictionary_hash.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_INTERNAL const uint16_t kStaticDictionaryHashWords[32768] = {
+1002,0,0,0,0,0,0,0,0,683,0,0,0,0,0,0,0,1265,0,0,0,0,0,1431,0,0,0,0,0,0,40,0,0,0,
+0,155,8,741,0,624,0,0,0,0,0,0,0,0,0,0,0,0,66,503,0,0,0,451,0,0,0,0,0,0,0,835,70,
+0,0,539,0,0,0,0,0,0,0,0,0,113,0,0,0,0,718,0,0,0,0,0,0,520,0,1070,0,0,0,0,0,1515,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,610,0,0,750,0,0,0,307,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,964,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,999,0,0,0,0,0,0,0,0,
+645,75,0,649,52,282,0,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1621,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,211,225,0,0,687,718,0,0,110,0,58,0,0,0,0,0,0,345,0,0,301,0,0,
+0,203,0,0,1154,674,1949,0,0,0,0,0,0,0,0,0,259,0,0,0,0,0,0,0,1275,0,0,0,1231,254,
+0,0,0,0,0,0,0,277,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248,0,0,800,0,0,0,29,
+116,100,490,0,0,0,0,0,1641,0,543,0,0,0,0,41,181,0,657,0,0,202,25,0,0,0,0,0,0,0,
+0,0,0,423,0,0,0,113,0,0,0,927,963,0,976,0,206,0,0,0,0,0,0,0,0,0,2002,0,0,0,0,0,
+0,0,0,0,0,0,696,0,1170,0,0,0,0,226,13,0,769,678,551,0,0,0,0,0,0,57,0,0,0,10,188,
+0,0,0,624,0,0,0,0,0,0,0,0,0,1941,130,0,0,0,0,378,269,0,0,528,0,1146,0,0,0,1105,
+0,1616,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,656,0,1940,0,0,0,0,0,173,0,0,0,0,0,0,0,0,0,
+0,0,457,342,810,0,0,0,0,620,0,0,0,0,0,0,0,967,95,447,406,0,0,0,477,0,1268,944,
+1941,0,0,0,629,0,0,0,0,0,375,0,0,0,1636,0,0,0,0,774,0,1,1034,0,0,0,0,0,824,0,0,
+0,0,0,118,0,0,560,296,0,0,0,0,0,0,0,0,1009,894,0,0,0,0,0,0,0,0,0,0,0,0,0,1474,
+366,0,0,0,0,0,0,0,0,0,79,1723,0,0,200,0,0,0,0,0,0,0,0,1759,372,0,16,0,943,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,258,0,0,900,1839,707,30,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,2004,0,0,10,115,0,50,0,0,0,0,0,0,0,0,0,0,520,1,0,738,98,482,0,0,0,0,
+0,0,0,0,0,0,701,2,0,0,0,0,0,0,0,0,557,0,0,0,0,0,0,0,0,0,347,0,0,0,0,572,0,0,0,0,
+0,0,0,0,0,832,0,0,797,809,0,0,0,0,0,0,0,0,0,0,0,528,0,0,0,861,0,0,294,0,0,0,109,
+0,0,0,0,0,0,0,0,1187,290,266,0,0,0,0,49,50,748,0,0,466,399,0,0,0,0,0,0,0,378,0,
+519,0,0,0,0,0,0,0,0,0,0,0,0,667,351,902,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,180,
+0,0,869,0,0,0,0,0,0,0,260,0,0,0,0,0,0,0,0,0,0,523,36,0,0,587,510,809,29,260,0,0,
+0,0,0,0,0,0,570,0,565,0,1464,0,0,0,0,0,0,10,0,0,787,399,380,200,0,0,0,0,516,0,
+844,887,0,0,0,0,0,0,0,44,0,0,0,305,1655,0,0,0,0,0,0,0,0,0,0,0,0,0,0,786,10,0,0,
+0,0,0,0,0,0,0,2031,0,0,0,0,0,684,0,0,0,0,0,1480,0,0,0,27,0,0,0,395,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,813,511,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,0,206,
+496,0,0,0,0,0,909,0,891,0,0,0,0,0,0,0,0,0,687,0,0,0,1342,0,0,0,0,0,0,0,0,0,0,
+160,41,0,0,0,0,0,0,0,0,0,0,0,1718,778,0,0,0,0,0,0,0,0,0,0,1610,0,0,0,0,0,115,0,
+0,0,0,314,294,0,0,0,983,178,193,0,0,0,0,0,0,0,0,0,174,0,0,0,0,0,0,0,0,0,0,848,
+1796,0,0,0,0,0,0,221,0,687,1660,0,0,0,0,262,0,0,179,0,0,0,0,0,66,0,773,0,352,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35,0,152,0,0,1197,0,0,0,0,0,0,0,0,0,0,0,0,560,0,0,
+564,0,0,0,797,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,556,0,819,0,0,0,0,0,0,0,0,719,544,
+637,5,0,0,0,0,0,0,0,0,0,0,0,101,0,1441,0,0,0,893,0,0,0,0,0,0,0,0,0,238,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,1296,0,0,969,1729,314,60,0,0,0,0,0,1144,0,1147,0,0,0,0,0,
+0,0,0,0,0,437,1853,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,828,0,176,0,0,0,0,0,0,434,39,0,
+0,0,0,0,159,0,0,0,902,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,270,0,0,0,0,801,556,0,0,
+0,0,0,0,0,416,19,197,369,0,0,0,0,0,0,0,0,0,28,34,0,757,0,0,898,1553,0,721,0,0,0,
+0,1012,0,0,0,0,1102,0,898,183,0,0,0,0,0,0,0,136,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,247,277,0,0,0,435,0,0,0,0,0,1311,0,0,0,0,
+0,0,211,437,0,0,0,28,0,0,750,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2012,0,702,
+0,808,0,0,0,0,739,166,0,0,0,0,0,0,719,170,500,0,0,0,0,0,0,0,0,1500,327,0,0,450,
+0,0,0,1318,0,0,0,1602,0,0,331,754,0,0,0,0,0,1368,0,0,557,0,0,0,799,850,0,0,0,0,
+0,0,0,0,908,0,0,0,0,0,19,62,459,0,0,0,0,0,0,0,0,0,0,0,0,1802,0,0,0,0,0,0,0,0,0,
+1397,0,0,0,0,120,238,0,0,0,0,0,0,0,0,0,0,0,1324,0,0,0,0,0,0,0,0,602,201,0,0,164,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,615,0,0,0,0,0,0,0,0,0,0,0,0,0,1243,0,0,0,0,968,0,0,
+0,0,0,0,882,0,0,0,907,329,100,0,0,0,0,0,0,0,0,0,0,0,176,26,9,0,0,265,256,0,0,0,
+0,0,0,0,0,0,643,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,127,610,0,0,0,0,973,2001,0,
+0,0,0,0,0,522,0,0,0,0,0,0,0,0,0,0,0,553,0,0,0,0,0,0,1582,0,1578,0,0,0,0,0,0,0,0,
+0,0,0,795,0,0,0,432,0,0,0,0,0,0,84,126,0,0,0,0,790,0,377,64,0,1529,0,0,0,0,530,
+1857,539,1104,0,0,0,0,0,0,0,0,0,0,0,0,977,0,0,0,34,0,0,0,0,0,0,0,0,0,0,0,24,26,
+0,0,918,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,183,379,0,0,0,0,0,0,0,792,
+0,0,0,0,0,0,0,0,0,1920,0,0,0,0,0,0,0,0,0,771,0,0,0,1979,0,901,254,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,140,0,0,0,0,0,440,37,0,
+508,0,0,0,513,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,533,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,752,920,0,1048,0,153,0,
+0,391,0,0,1952,0,0,0,0,0,0,0,0,0,0,126,0,0,0,0,640,0,483,69,1616,0,0,0,0,0,734,
+0,0,0,0,0,0,480,0,495,0,472,0,0,0,0,0,0,0,0,874,229,0,0,0,0,948,0,0,0,0,0,0,0,0,
+1009,748,0,555,0,0,0,0,0,0,193,0,653,0,0,0,0,0,0,0,0,0,0,984,0,0,0,172,0,0,0,0,
+0,0,0,0,83,1568,0,0,384,0,0,0,0,0,0,0,164,880,0,0,0,0,0,0,0,0,0,0,0,367,121,0,0,
+828,0,0,0,0,0,0,0,1541,0,0,0,0,0,0,0,343,0,0,0,0,0,0,0,0,561,57,0,0,0,0,0,0,0,
+926,0,0,0,0,827,0,194,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,
+0,0,0,896,1249,0,0,0,0,0,1614,0,0,0,860,0,0,0,0,0,0,0,0,964,102,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,899,0,569,0,0,0,0,795,2045,0,0,0,
+0,0,0,104,52,0,0,0,0,0,604,0,0,0,0,779,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,
+494,0,677,0,0,0,0,0,0,0,508,0,0,0,0,0,0,0,0,0,1014,0,957,0,0,630,310,0,0,0,570,
+0,0,449,0,64,537,0,0,0,0,0,0,0,244,0,0,0,0,0,0,0,0,0,0,0,0,0,0,702,1650,49,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,338,0,0,0,0,1279,0,0,0,0,0,0,0,896,0,0,
+178,0,0,0,0,0,0,0,0,0,0,0,0,0,808,695,0,0,0,0,539,1117,0,0,0,0,0,0,0,0,257,0,
+1003,0,0,0,1,448,0,516,0,0,960,0,125,4,0,1268,30,748,0,0,852,0,0,0,6,0,0,848,
+236,1385,862,1811,0,0,0,0,698,803,0,0,0,0,0,0,0,610,992,0,0,878,0,1847,0,0,0,0,
+0,0,0,383,0,1404,0,0,0,0,986,0,347,0,0,0,0,0,0,0,0,0,0,0,592,572,0,1411,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,606,0,0,0,0,0,0,
+0,0,0,0,0,0,0,1829,0,0,0,0,0,0,0,0,0,0,0,0,700,748,0,0,0,0,0,0,365,0,0,127,0,0,
+83,198,0,0,0,0,0,0,864,55,0,0,0,0,726,1752,0,0,0,0,0,0,0,0,0,0,0,0,0,1066,0,764,
+0,0,0,0,683,0,550,309,0,0,874,1212,0,0,0,1364,0,986,381,723,0,0,0,1573,0,0,0,0,
+0,1025,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1559,0,0,0,0,493,133,0,0,0,0,148,
+119,0,0,0,0,0,0,537,14,541,0,635,126,0,0,0,495,0,0,0,0,861,998,1009,0,0,0,0,0,0,
+0,359,368,0,0,0,0,304,1577,0,0,0,0,0,1107,0,0,0,0,0,929,0,0,0,1142,0,0,0,0,289,
+175,0,432,0,219,0,0,0,0,0,785,0,0,595,0,0,0,0,0,0,0,0,0,0,0,0,0,80,0,0,0,0,0,0,
+931,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1323,0,0,0,0,290,0,559,1751,127,0,0,0,
+934,1167,0,963,0,260,0,0,0,573,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+580,1689,0,0,0,0,0,0,0,0,0,1164,0,0,982,1922,0,63,0,0,0,0,0,793,0,0,0,0,0,0,0,0,
+0,0,0,0,0,67,790,0,0,0,0,0,0,0,0,0,0,391,443,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,271,0,0,0,0,0,0,0,0,0,0,0,1140,0,0,0,0,340,300,0,897,0,0,0,0,0,0,
+0,0,0,0,890,0,0,0,0,818,321,53,0,0,0,0,0,0,0,0,0,468,0,243,0,870,0,0,0,1765,121,
+0,0,0,180,518,0,822,419,634,0,0,0,0,0,0,0,0,0,898,0,0,0,0,454,36,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,806,0,0,0,0,0,0,0,0,0,0,0,0,1326,0,104,0,0,0,0,0,0,0,
+0,0,260,0,0,0,0,0,0,0,0,0,0,0,0,542,45,0,0,263,1516,42,0,0,0,0,0,468,0,1005,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,288,87,0,0,0,0,0,0,0,0,502,988,133,0,0,0,0,0,0,
+141,0,0,872,1842,0,0,0,0,0,0,0,0,261,619,0,0,0,0,189,246,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,678,0,0,0,0,0,0,0,0,0,0,0,0,285,35,0,517,0,0,0,0,0,0,0,0,0,0,
+540,214,667,0,74,0,0,125,0,0,0,0,0,761,131,0,0,0,0,0,0,0,0,0,0,0,0,0,333,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1338,94,0,0,0,0,0,0,0,0,0,0,0,0,449,0,646,103,
+86,641,2028,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,869,87,277,117,39,0,0,0,0,0,0,0,0,938,
+297,0,0,0,0,558,464,0,0,0,0,0,0,0,0,0,0,731,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1608,0,
+0,0,0,0,0,0,1429,0,0,733,1010,0,0,338,1656,0,0,0,1038,979,2010,0,0,0,0,0,0,0,
+1005,0,0,121,0,0,0,219,20,0,0,0,0,0,0,872,1440,0,0,0,683,0,1070,0,0,522,0,0,0,0,
+439,669,0,0,0,0,0,0,0,0,1245,0,0,0,0,0,1218,0,0,547,233,0,0,0,0,0,0,0,0,0,482,0,
+0,0,0,0,0,0,886,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,795,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,371,0,0,0,0,0,0,0,0,0,0,0,0,0,622,0,625,0,0,0,339,29,0,0,338,0,0,0,
+0,130,0,0,0,0,0,0,0,0,0,307,0,0,0,0,0,0,0,0,0,0,2044,0,0,0,0,0,0,0,0,308,770,0,
+0,0,0,0,1266,0,0,0,0,0,0,0,0,0,400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,690,739,0,0,
+0,0,0,0,0,990,0,0,0,1831,0,0,0,0,0,0,0,0,0,0,0,0,0,613,0,0,0,0,0,0,0,0,0,0,0,0,
+0,763,0,878,0,0,0,977,0,100,0,0,0,0,0,0,0,0,0,463,0,0,0,0,623,318,0,0,296,463,
+137,0,0,454,0,0,0,1527,58,0,0,0,0,0,0,0,18,48,0,0,0,0,0,729,0,0,0,442,0,0,0,0,
+40,449,0,853,0,0,0,0,0,0,227,0,0,0,0,0,0,1491,0,0,0,0,0,0,0,0,0,0,161,55,0,450,
+0,1174,62,0,207,0,0,0,0,0,0,0,0,869,0,0,0,0,80,213,0,0,0,0,0,0,0,0,0,0,354,820,
+0,0,747,0,0,0,954,0,0,1073,0,556,0,0,0,692,0,191,0,804,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,831,162,0,0,35,0,0,0,0,0,0,0,0,1235,0,0,0,0,0,1234,0,0,
+0,0,0,0,0,0,0,0,96,0,0,0,0,0,0,0,149,0,0,0,902,204,0,0,833,0,287,366,0,0,0,0,0,
+0,992,2020,0,0,0,0,0,0,0,0,0,0,0,356,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,784,0,0,567,
+630,0,0,0,539,0,0,27,0,0,0,0,0,0,0,0,0,0,755,0,0,0,0,0,0,0,0,0,0,0,0,814,0,0,0,
+0,0,0,0,0,0,0,0,0,0,987,0,0,255,761,194,0,1086,0,0,0,0,0,0,1016,0,0,1396,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,562,271,913,0,0,0,0,0,0,0,0,320,153,45,475,0,0,
+0,0,0,0,0,713,0,327,0,0,0,0,0,0,604,552,3,359,0,0,0,0,853,80,0,0,0,0,0,0,0,2016,
+6,887,0,0,0,0,975,0,961,0,0,0,0,0,916,1891,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,100,101,390,708,0,0,0,587,983,512,0,0,0,0,0,0,0,0,0,0,0,645,0,0,0,851,0,0,0,
+0,0,498,140,217,0,0,0,1448,0,0,0,0,0,0,0,0,0,905,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+643,105,0,792,0,0,0,0,0,0,0,0,0,0,0,0,56,0,0,0,0,0,0,0,0,0,0,535,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1748,0,0,0,0,0,754,0,0,0,0,0,0,0,0,0,0,0,0,91,0,0,1565,0,91,792,
+939,3,370,0,0,0,0,95,0,0,0,0,551,7,619,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1150,0,
+0,0,0,0,0,0,0,0,0,0,0,0,671,0,0,0,0,0,888,368,149,0,0,105,1134,0,983,0,0,458,31,
+0,643,0,0,0,312,0,740,0,0,0,1642,0,0,0,0,0,0,0,236,0,0,0,0,0,0,0,59,68,0,0,0,0,
+0,867,795,0,0,0,0,970,1977,0,0,0,0,0,0,0,1148,0,775,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,970,0,0,0,0,0,0,0,0,0,665,71,0,0,0,0,827,0,0,0,0,0,0,0,0,0,
+0,479,0,0,0,0,0,0,0,0,99,607,0,0,0,0,0,0,0,1960,0,0,0,793,0,0,871,41,0,0,241,94,
+0,0,0,0,209,0,0,1497,0,0,0,0,0,0,0,0,0,98,0,0,0,463,0,0,0,0,291,0,0,0,0,0,0,0,0,
+0,0,984,0,0,0,0,0,205,0,0,0,0,0,0,205,42,0,801,0,0,0,0,0,635,0,0,533,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,371,0,1282,0,0,0,825,0,0,0,0,0,0,0,0,0,357,879,467,0,317,0,0,
+0,0,0,0,0,924,0,0,0,0,849,1795,0,0,0,0,895,1799,43,0,0,0,0,0,0,0,0,0,0,1820,0,0,
+0,0,0,0,0,525,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,110,0,493,0,174,417,0,0,
+0,0,0,583,733,0,0,0,0,0,0,481,215,0,0,0,0,477,0,0,0,0,0,0,0,0,308,0,0,0,0,0,0,0,
+0,297,126,0,0,361,1551,0,0,0,0,0,0,871,1807,0,0,0,0,0,1307,0,685,0,0,0,0,0,0,0,
+797,0,858,0,565,0,0,0,0,0,0,0,0,0,0,0,0,434,252,826,0,0,0,0,0,0,791,0,0,0,0,509,
+231,178,601,0,0,0,0,0,0,0,0,43,1591,0,0,0,0,0,1683,0,0,0,0,45,0,0,0,0,0,0,0,0,0,
+0,1120,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,556,494,0,398,0,0,0,1030,0,0,0,0,0,0,
+168,0,0,0,0,0,0,0,0,0,0,973,0,642,0,0,0,0,0,0,0,0,0,1615,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,378,594,0,1093,0,679,112,0,0,0,0,1492,540,1374,714,
+1486,0,0,0,0,825,1511,0,0,0,0,0,0,0,0,0,0,0,0,0,952,0,0,736,143,0,700,0,1540,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1557,0,0,0,860,990,0,0,0,807,0,0,0,0,0,131,
+515,0,646,0,0,0,0,117,728,508,121,0,0,0,0,0,0,357,0,0,0,0,0,0,237,0,0,0,0,0,0,0,
+0,0,1784,0,0,0,0,0,0,0,0,0,0,0,713,348,1536,0,738,0,0,0,0,0,0,0,434,0,0,0,0,0,0,
+366,1877,39,0,0,0,0,0,0,580,0,0,0,0,0,0,0,0,0,0,0,0,0,0,873,0,0,0,0,171,0,625,
+550,107,343,943,0,0,0,0,0,0,0,768,0,0,0,0,0,0,0,799,0,0,0,894,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1673,0,0,0,0,0,0,0,0,0,0,0,1052,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+272,0,441,0,0,3,9,0,0,0,1182,0,1346,0,0,0,0,0,0,0,0,682,0,0,1004,24,0,0,968,0,0,
+0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,185,0,0,0,578,
+474,0,0,0,0,0,0,0,0,0,0,0,0,0,0,113,530,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,556,0,0,0,0,0,0,16,1317,0,0,97,0,0,0,703,0,0,0,0,0,0,0,0,892,0,0,0,1571,0,0,
+426,186,0,1101,0,0,0,0,0,0,0,0,937,585,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,644,291,
+0,0,0,0,749,0,162,0,0,381,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,762,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,628,21,0,0,0,0,0,0,0,0,919,0,0,0,0,0,0,0,0,0,
+633,0,0,0,0,332,0,0,0,0,0,0,0,0,0,1489,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,832,398,0,645,0,0,0,13,0,0,0,0,0,0,0,0,0,0,20,0,800,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1993,0,0,0,0,769,0,0,0,665,0,0,0,0,0,0,0,0,0,0,1426,0,0,0,0,60,0,0,0,
+641,1874,0,644,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1757,0,0,0,0,0,937,0,1652,0,654,0,
+0,0,0,0,0,0,527,0,0,0,0,0,0,0,0,0,0,0,0,0,226,0,0,0,0,0,1486,0,0,0,0,0,0,0,0,0,
+0,0,325,0,0,0,0,0,0,0,1345,0,0,91,0,404,0,0,0,0,0,0,0,0,0,0,0,0,973,0,0,0,0,0,0,
+0,1176,0,549,0,0,0,0,0,0,0,0,0,0,976,0,0,0,0,0,21,0,0,0,0,0,51,0,0,0,0,314,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,198,6,0,1093,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1776,0,0,0,0,0,1528,0,419,0,0,0,0,0,0,0,0,76,138,0,0,0,0,638,29,0,0,0,0,
+0,0,0,1418,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1710,0,0,0,0,0,
+0,0,0,0,0,0,0,532,23,0,0,0,0,0,0,0,862,0,0,946,592,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,70,0,0,0,0,0,0,0,0,0,812,0,0,0,76,0,0,988,0,442,0,0,0,896,0,0,0,0,0,0,
+483,0,0,0,0,1709,0,0,0,0,0,0,119,0,0,0,117,0,309,0,0,0,0,0,596,976,0,0,0,0,0,0,
+0,0,0,0,0,768,0,0,0,0,0,0,0,0,0,518,0,0,0,0,0,0,0,0,0,0,0,0,0,0,863,0,0,0,24,
+145,1020,0,0,1984,0,0,0,0,0,0,0,658,0,0,0,0,0,0,0,0,0,0,106,1827,0,1010,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,582,87,0,0,0,0,0,0,0,267,0,0,0,703,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,496,0,0,0,0,1121,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,249,561,0,0,0,0,0,
+0,0,760,0,0,154,0,0,0,255,0,419,323,0,0,0,0,0,368,0,0,0,0,0,0,0,0,0,0,522,0,0,0,
+0,0,0,0,551,562,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,
+0,0,0,284,525,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,958,0,0,594,0,0,0,0,0,0,6,479,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,61,0,0,0,0,0,0,0,820,1641,0,1556,0,0,0,0,0,0,0,302,0,0,
+0,0,0,148,0,0,676,0,0,0,0,0,0,1674,0,0,0,0,0,0,178,0,0,0,0,0,0,0,94,389,0,0,0,0,
+91,8,0,0,0,0,0,0,0,0,0,0,112,0,0,0,0,0,0,0,0,0,0,747,0,0,0,0,0,0,0,1746,0,0,0,0,
+0,24,0,1352,158,1530,0,0,718,130,280,1401,0,0,0,0,0,1946,8,0,0,0,0,1607,0,0,0,0,
+0,0,882,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,417,0,0,0,1597,633,433,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,234,0,0,0,0,0,0,0,0,680,1950,0,0,0,0,249,5,0,0,0,
+0,0,0,0,0,0,1216,0,1773,0,0,0,0,0,0,0,0,0,0,0,0,0,0,509,180,0,0,0,0,0,0,0,1002,
+0,0,0,0,0,0,0,0,0,0,0,0,0,931,0,0,0,0,0,0,0,0,747,943,0,1837,0,0,0,0,0,0,0,641,
+0,0,0,0,280,0,0,0,5,0,0,0,0,0,72,545,0,0,0,0,0,0,0,0,0,742,0,0,254,151,872,0,0,
+0,0,0,0,0,0,0,0,0,0,921,0,0,517,833,0,1680,0,0,436,251,584,0,0,0,0,0,0,0,0,0,0,
+0,24,500,0,0,0,0,0,0,0,0,195,1775,514,389,0,0,0,0,0,0,0,743,0,0,0,0,0,0,292,0,0,
+0,227,1283,774,1805,0,0,0,0,0,0,0,0,0,0,119,81,0,0,0,0,0,0,0,0,0,0,0,0,0,0,913,
+1910,0,0,0,1826,490,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1162,700,30,
+0,0,0,721,839,0,0,0,617,0,0,0,0,0,0,0,0,0,169,428,0,0,0,0,0,1648,637,1205,0,0,0,
+1596,0,0,4,266,0,0,0,0,0,0,0,0,0,0,0,862,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,
+0,279,157,391,604,0,0,713,945,877,973,0,0,0,0,0,0,0,0,0,0,0,0,0,0,859,567,628,
+1846,0,0,0,0,0,0,0,0,0,762,0,0,191,0,0,0,0,298,0,0,767,909,0,0,0,0,0,0,0,795,0,
+0,301,0,0,1970,0,0,0,0,0,0,0,0,0,1236,0,0,0,0,0,0,644,369,15,0,160,71,0,0,0,0,0,
+1447,0,0,0,0,0,0,0,0,735,1255,76,0,0,0,0,0,0,0,0,0,0,474,0,0,0,0,0,0,0,0,0,0,
+841,0,0,0,0,0,0,0,0,0,0,836,0,0,0,0,0,1622,0,0,735,0,0,0,0,1601,804,1390,394,0,
+0,0,0,0,0,96,0,289,0,0,35,688,0,0,0,667,0,513,0,0,0,0,0,0,0,2034,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,704,0,1524,0,1078,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,306,
+0,0,0,0,0,0,0,431,0,1196,0,0,54,0,15,1448,0,1418,0,0,0,0,0,0,0,0,0,907,0,0,0,0,
+0,0,194,1767,0,0,0,0,0,840,0,900,0,0,0,0,0,0,0,0,0,0,0,1436,0,0,0,0,642,1560,0,
+0,0,0,0,0,94,386,0,0,0,0,0,0,0,0,0,0,830,416,0,0,20,731,0,0,0,0,0,0,0,0,697,0,0,
+662,0,0,0,0,0,0,0,0,0,861,0,0,0,0,0,0,0,871,671,864,0,928,7,0,332,0,0,0,0,1055,
+0,0,0,0,0,0,986,0,0,0,0,0,44,76,0,0,0,0,0,0,0,0,0,0,300,0,0,0,0,0,0,0,175,518,
+831,1108,0,0,0,836,0,1852,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,843,1804,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,246,0,0,0,610,202,0,0,36,0,0,0,240,654,13,0,0,0,0,0,0,0,
+0,391,0,403,0,0,0,0,0,0,0,0,0,0,75,0,366,815,0,0,631,0,0,0,0,0,0,0,0,345,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,952,0,0,0,0,0,0,0,0,0,0,0,673,35,662,0,287,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,5,34,0,0,0,0,0,0,0,0,151,0,427,0,0,382,0,0,0,329,0,0,279,0,0,0,
+0,0,0,0,0,0,0,906,0,0,366,843,0,1443,0,1372,992,0,36,123,0,649,0,0,0,0,0,767,0,
+1018,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,995,0,0,0,0,0,0,0,72,368,0,0,1345,0,0,0,
+589,0,0,0,0,0,0,0,0,0,1988,0,0,220,541,0,0,0,686,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,32,196,0,0,0,0,0,0,0,0,0,0,0,0,0,381,0,0,0,0,0,0,0,0,0,1452,0,
+0,0,616,0,0,0,0,0,0,0,0,0,1229,0,0,0,0,0,0,0,0,0,0,667,120,0,0,0,0,0,0,0,1146,0,
+0,0,0,0,0,0,0,0,0,0,352,0,0,0,0,0,293,0,0,0,0,0,0,0,0,0,0,0,0,0,935,0,1050,0,
+147,88,0,0,923,0,0,0,0,0,934,0,0,0,0,0,0,0,0,114,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,341,222,0,0,0,0,0,0,0,0,0,0,293,0,0,0,0,0,0,0,0,0,0,0,0,
+637,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1159,0,0,0,847,0,0,0,0,0,0,683,0,867,944,0,0,
+0,0,0,1809,0,0,0,0,0,0,0,0,0,0,395,170,0,0,0,0,0,0,0,0,0,0,618,535,0,1625,0,0,0,
+0,0,0,0,0,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,778,0,0,0,0,0,46,0,2032,0,0,37,
+1458,0,938,363,34,0,0,0,0,0,0,0,0,0,0,0,0,0,0,314,0,0,0,0,0,0,889,0,0,0,0,0,0,0,
+0,0,0,0,462,0,0,0,0,525,0,0,23,0,0,0,0,0,0,0,0,0,0,0,676,0,0,0,0,0,0,0,0,0,0,0,
+0,498,725,0,0,0,0,7,0,0,0,0,773,0,0,0,164,0,0,0,0,0,0,0,0,936,583,659,1462,0,
+220,0,0,0,0,803,0,0,544,119,0,0,0,0,0,0,0,0,0,0,0,181,176,0,1192,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,1878,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,0,0,0,0,0,
+944,0,0,0,0,0,0,0,273,0,0,0,0,0,855,0,0,0,0,5,127,0,0,0,0,0,0,0,0,752,230,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,162,0,654,48,156,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,240,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,197,
+0,0,0,0,0,0,0,963,0,0,0,0,0,0,0,0,0,0,858,0,0,0,0,0,0,0,0,0,0,676,1978,0,0,102,
+972,0,0,0,0,0,0,0,361,0,461,0,0,0,472,0,0,0,0,0,0,0,0,0,0,0,0,0,0,747,905,0,0,0,
+155,0,0,0,0,0,0,0,0,0,0,319,163,0,0,0,0,0,0,0,0,0,848,0,0,36,631,0,0,0,0,0,1769,
+0,0,0,0,0,144,0,0,0,0,0,0,0,0,0,0,369,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,555,247,0,0,
+996,0,0,189,0,0,0,0,0,0,0,0,0,0,280,0,0,0,0,0,0,0,0,0,0,0,526,746,0,0,345,0,0,0,
+1017,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,651,428,0,0,0,1162,230,327,546,792,0,0,0,
+1203,0,0,0,0,0,0,0,0,0,672,189,0,0,0,0,0,0,99,0,0,0,298,0,0,0,0,0,0,555,397,0,0,
+0,0,0,1157,0,0,0,0,0,0,0,0,0,0,398,1523,0,366,0,0,787,0,0,0,282,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,157,0,941,0,0,0,0,0,1336,0,0,116,0,0,0,0,0,0,787,0,0,0,0,0,0,0,0,0,
+0,170,160,0,1815,0,0,0,0,0,866,0,0,0,0,0,0,0,0,0,689,0,0,0,0,820,0,498,108,0,0,
+0,1119,0,0,0,244,609,1005,0,581,0,0,0,0,0,895,0,0,0,1898,0,0,0,0,0,926,0,0,0,0,
+0,0,0,0,0,0,0,0,0,538,496,294,301,0,0,0,18,0,0,757,0,0,0,0,0,1263,0,820,0,722,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2028,0,0,0,0,124,1875,0,0,0,881,0,0,0,1348,
+0,0,0,0,0,0,0,911,0,954,0,0,0,0,414,0,0,0,0,517,0,0,0,0,0,816,0,0,0,0,0,0,0,0,
+713,0,0,0,0,0,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,593,150,0,0,0,0,
+0,553,0,0,0,0,0,0,0,0,0,0,108,0,0,0,0,420,0,0,0,0,0,0,0,0,0,0,0,1777,0,0,55,493,
+0,0,81,0,321,980,0,0,0,0,0,0,0,0,0,0,0,0,0,0,362,112,0,74,0,0,0,0,0,0,0,625,0,0,
+0,0,0,0,377,16,0,0,61,281,0,0,0,0,0,0,0,0,0,0,0,0,0,0,224,1031,0,0,0,0,0,0,51,0,
+0,0,0,0,0,0,211,309,15,125,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,789,173,0,439,9,648,
+0,0,294,0,0,0,0,0,0,0,374,8,0,1099,0,0,0,0,0,0,0,575,0,0,0,518,0,0,0,702,0,0,0,
+0,0,0,87,0,0,0,438,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,464,122,0,0,0,1802,0,0,0,0,
+0,0,499,0,0,0,87,476,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,840,283,0,0,0,0,1620,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,609,1160,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,600,
+323,372,0,0,0,0,471,722,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,
+477,1304,0,1774,0,0,88,0,438,12,0,0,0,0,0,0,0,0,671,997,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,639,22,0,0,782,681,0,0,0,0,0,0,0,0,0,0,1013,664,0,942,0,1349,0,0,0,0,0,0,0,
+0,0,0,0,0,356,0,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,215,289,0,1975,
+109,450,0,0,0,0,0,0,0,0,0,0,705,0,0,664,0,0,0,0,0,0,0,1238,0,0,318,0,0,0,0,0,0,
+0,0,0,0,0,0,0,960,1872,0,0,0,0,0,0,0,0,0,0,0,0,0,0,103,0,0,0,0,0,0,0,0,0,239,
+777,0,26,0,0,0,0,0,0,0,0,0,0,0,0,375,414,0,17,0,0,0,1350,0,955,0,0,0,0,0,0,0,0,
+887,960,0,0,0,0,0,0,0,0,0,0,708,710,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,919,0,0,0,
+0,502,280,7,45,0,0,0,0,777,0,0,0,0,410,0,1110,0,0,0,0,0,0,414,341,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,787,0,0,0,436,0,0,0,0,0,0,0,1707,613,377,96,0,0,0,0,451,
+0,0,0,0,0,0,0,0,0,0,0,0,0,680,0,483,916,0,0,0,0,0,0,937,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,739,0,0,0,0,0,0,0,0,82,0,0,663,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,128,0,0,0,0,0,0,0,0,1087,0,0,0,0,0,0,0,503,0,0,0,0,0,0,9,113,104,324,0,460,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,935,702,434,485,1014,949,423,0,900,
+0,0,0,0,0,0,0,2018,574,0,0,0,0,0,0,0,0,0,0,0,0,1206,0,0,0,0,0,0,0,0,38,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1022,0,0,0,0,143,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2029,0,0,0,0,0,0,0,0,0,0,0,0,523,0,0,0,0,0,0,625,0,0,425,37,0,0,0,1943,0,0,0,
+0,0,765,0,0,0,0,0,0,0,0,0,0,551,0,0,0,0,0,0,0,0,0,0,0,0,168,0,0,1010,0,0,1994,0,
+0,0,91,0,0,0,0,532,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1884,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,240,15,0,0,0,1227,0,1534,0,0,0,0,0,0,0,0,0,0,0,0,0,0,392,0,
+0,0,0,0,0,0,0,0,0,0,0,655,562,395,0,0,0,501,1019,0,0,0,0,509,267,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1099,0,0,0,0,0,0,948,0,0,0,0,0,0,0,
+462,114,0,0,258,404,0,1717,0,0,0,0,82,1061,0,724,0,0,0,0,0,1133,0,0,0,0,0,0,
+1021,841,0,1021,0,0,0,0,0,0,0,0,0,0,488,373,37,0,0,0,0,564,0,0,0,0,0,513,0,0,0,
+825,0,0,899,0,0,778,0,0,12,1417,0,1116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,114,545,0,5,
+0,0,0,0,0,0,0,192,0,0,763,0,0,0,0,0,0,0,755,759,0,0,0,0,0,0,0,0,0,370,0,1237,0,
+0,0,0,0,0,298,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,0,0,
+0,0,0,0,814,991,0,757,57,0,0,0,0,0,0,0,0,0,540,0,0,0,0,608,0,0,0,0,0,0,0,0,1014,
+0,0,0,902,0,0,0,0,553,1668,0,0,0,0,0,0,0,0,0,559,60,0,0,0,0,0,511,0,0,675,0,0,
+156,0,0,0,0,0,0,709,0,698,0,0,0,1745,0,0,0,0,0,0,0,0,0,714,0,0,0,0,0,0,0,0,206,
+8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,776,0,0,0,0,0,0,0,0,0,1272,0,0,
+0,0,0,1059,0,0,0,0,0,0,406,0,0,0,0,0,0,0,0,0,0,947,0,0,0,0,0,0,168,0,0,0,0,0,0,
+870,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,554,0,0,0,0,784,908,0,0,0,0,0,0,
+0,396,358,0,0,0,0,0,0,0,0,2,228,0,0,0,0,0,0,0,0,0,0,0,845,14,0,716,1820,594,0,
+81,1428,0,161,0,782,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,64,0,0,0,0,0,998,0,
+0,0,0,0,0,0,0,0,0,0,0,1043,0,1496,0,0,0,0,0,0,0,0,781,0,0,0,0,0,0,0,817,1114,0,
+1814,958,0,0,0,0,812,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,139,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,236,643,0,0,0,0,0,0,0,0,0,1172,0,0,0,0,0,0,0,0,0,1338,0,0,0,
+0,0,0,0,0,0,0,0,54,0,0,0,256,0,0,351,0,955,1885,0,469,0,0,0,1270,0,744,0,313,0,
+0,0,0,0,0,0,0,402,969,0,0,0,0,0,0,50,0,0,0,0,572,0,0,0,0,847,0,0,0,0,0,0,0,248,
+43,0,369,0,0,0,0,0,0,0,0,0,0,0,0,0,766,0,363,0,0,0,0,0,0,0,0,0,0,0,678,0,0,409,
+258,82,249,0,0,0,0,0,0,0,0,0,0,0,0,32,393,0,788,0,0,0,1281,509,1968,0,0,0,0,39,
+291,0,0,0,589,0,0,54,1059,0,0,0,0,0,0,824,0,0,0,0,0,0,0,0,0,0,1005,0,1598,0,0,0,
+0,0,919,0,0,0,0,0,0,0,0,52,132,0,0,0,0,0,328,0,0,0,0,173,0,0,0,0,0,65,1411,0,0,
+0,0,0,0,0,0,0,0,442,0,842,0,0,0,0,0,0,0,0,0,534,0,0,0,0,0,0,0,0,0,0,0,0,0,845,
+210,0,0,0,0,0,0,0,0,892,0,0,223,0,0,0,0,529,0,0,0,807,0,137,218,0,1444,0,0,0,0,
+0,332,661,0,0,0,0,0,0,0,76,1517,0,0,0,0,0,0,0,0,0,0,0,418,0,0,0,0,0,0,0,0,481,
+379,0,0,0,0,0,149,18,0,0,0,0,0,0,0,0,742,304,142,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,799,925,195,51,0,0,0,0,688,0,0,0,0,697,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1169,751,0,0,0,452,929,0,221,0,1437,0,0,0,0,955,1251,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,0,132,0,0,0,0,0,865,0,0,0,0,0,0,0,767,
+672,42,0,0,0,1050,0,0,0,0,0,0,0,0,368,44,0,0,0,0,0,0,0,570,29,0,0,0,0,0,0,227,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,522,0,0,0,0,0,0,0,1529,0,0,0,0,0,0,739,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1667,0,0,0,0,0,0,132,511,0,138,208,1020,0,0,23,565,0,344,0,0,0,
+0,0,922,0,0,0,0,0,0,0,240,0,0,415,171,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,402,0,0,754,31,716,0,982,731,0,0,0,0,0,0,0,888,0,0,0,803,847,0,0,823,
+0,0,0,0,0,0,785,0,0,2,0,0,0,0,0,0,0,532,0,0,681,0,0,314,0,384,684,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,649,447,0,1818,1007,0,321,0,66,360,0,0,0,385,0,0,0,0,0,0,
+0,900,73,254,0,0,0,0,683,1959,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,86,0,0,725,0,0,0,0,0,196,0,0,0,0,0,831,0,0,0,0,723,0,0,0,0,0,994,627,0,0,
+0,0,0,0,0,0,0,0,764,66,0,0,0,0,205,36,0,0,0,0,0,0,0,950,0,0,0,887,111,0,0,831,
+388,165,0,0,0,0,0,155,0,0,0,0,0,0,0,0,0,0,0,0,0,0,780,755,0,0,0,0,898,146,0,0,0,
+0,0,0,0,45,7,0,0,0,0,0,0,0,0,607,0,0,0,0,0,0,65,0,0,0,0,0,0,0,0,0,88,0,0,0,0,0,
+621,600,0,367,0,0,0,0,0,0,0,561,0,559,0,585,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+287,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,672,157,0,0,0,0,714,0,0,0,
+0,0,456,0,925,0,0,0,0,0,0,0,0,19,0,0,0,0,1473,0,0,0,0,0,0,0,0,0,0,113,0,0,0,0,0,
+0,0,0,0,0,0,0,0,69,463,0,0,82,193,2,471,0,0,0,0,633,0,0,0,0,0,0,1148,129,1392,
+542,803,0,0,0,0,0,0,0,0,0,0,0,0,438,0,0,0,0,0,0,875,0,0,0,0,0,237,0,0,0,0,0,0,0,
+65,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,563,0,0,0,9,444,0,0,43,1260,0,0,0,0,0,0,
+971,0,0,699,0,0,0,0,0,1116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,829,242,0,
+0,593,0,0,0,0,0,0,0,0,201,36,224,0,0,0,0,0,0,1430,0,1806,0,523,0,0,212,1889,0,0,
+0,827,0,0,0,0,0,2043,136,242,0,0,0,0,0,0,284,148,10,0,0,0,0,0,0,1249,0,0,0,807,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94,0,0,0,494,0,0,0,0,0,0,0,0,1510,0,0,0,0,0,
+0,0,0,0,0,505,1306,0,0,764,268,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,384,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1703,0,0,0,0,159,964,583,0,0,0,
+0,0,0,515,0,0,854,0,0,0,0,0,0,0,0,0,0,0,0,1123,0,0,0,0,0,0,0,136,0,0,0,0,0,1782,
+0,0,44,1287,0,0,0,0,0,732,0,0,0,0,313,679,0,0,316,0,0,0,0,595,0,0,0,0,0,0,753,
+147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,137,0,0,0,0,414,0,1762,0,0,0,0,0,0,0,0,
+0,0,0,599,0,0,0,0,0,0,0,0,0,1749,0,0,0,1627,0,488,0,0,0,0,0,83,0,0,0,0,676,0,0,
+1639,0,0,0,0,0,0,0,0,0,278,0,0,0,0,0,0,97,0,14,1085,0,0,0,0,0,0,781,388,0,849,
+59,229,0,0,0,0,0,1115,0,0,0,0,108,0,0,0,0,700,0,0,0,0,0,0,0,0,0,1414,0,0,0,0,0,
+0,0,0,0,0,0,0,0,660,737,1035,0,0,0,0,0,0,521,690,0,0,0,0,0,0,0,0,0,0,0,0,272,0,
+0,0,0,0,0,0,0,0,0,1744,0,0,0,0,0,0,128,733,0,0,277,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0,0,0,0,0,936,1981,40,0,0,0,0,0,0,0,0,775,0,0,0,0,0,0,0,0,0,306,0,0,0,0,
+0,0,0,979,0,0,0,0,0,611,0,0,0,0,0,178,0,0,0,1969,0,0,0,0,0,0,0,664,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,390,0,0,0,1510,0,0,0,0,0,0,0,0,0,0,0,493,0,0,37,0,0,0,0,724,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,1537,0,0,168,473,0,0,0,105,0,0,0,0,
+627,438,0,0,0,0,0,0,0,0,0,0,11,1256,0,0,0,1626,0,779,0,0,0,0,25,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,308,0,0,0,0,0,741,0,671,0,0,0,0,649,150,0,0,99,521,0,0,3,339,0,0,0,
+543,0,0,0,0,0,0,0,0,0,1358,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,234,155,
+0,0,0,0,0,0,0,1628,0,766,0,0,0,0,0,0,0,0,0,0,0,0,0,829,0,0,0,1445,0,0,0,486,0,0,
+0,0,2,1635,0,0,0,0,558,0,0,0,0,0,0,0,0,0,0,1461,0,0,0,0,0,599,0,0,0,0,0,0,0,0,0,
+1376,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,93,0,0,0,0,0,0,447,0,0,66,1432,0,0,0,0,
+0,0,307,0,413,609,0,0,0,930,0,0,0,0,21,939,0,0,0,0,0,962,4,651,0,0,0,0,15,579,0,
+0,0,0,0,597,0,0,0,0,0,981,0,0,0,545,0,0,0,0,0,0,0,1558,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,800,17,0,0,17,0,907,0,0,0,110,0,0,0,53,458,0,1983,0,0,0,0,0,0,0,0,0,0,443,0,
+0,0,0,0,0,0,0,0,0,0,924,1844,0,1232,0,0,0,0,70,519,0,993,0,0,0,0,0,0,14,530,0,
+907,0,0,0,0,0,733,0,0,0,0,0,0,0,0,55,0,188,531,56,0,0,1693,0,0,0,0,0,0,0,0,441,
+0,192,928,0,0,0,0,0,241,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1525,0,259,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,512,185,0,464,1603,0,0,0,0,0,0,0,0,0,0,0,1113,
+284,720,0,0,722,0,0,0,0,0,13,0,0,0,0,0,0,0,4,289,43,0,0,0,0,0,0,1694,0,0,0,0,
+193,0,0,0,0,409,0,0,0,0,0,0,0,0,0,0,0,0,308,0,0,1863,0,0,0,0,0,0,0,0,0,790,0,0,
+745,1002,0,0,0,0,0,0,0,0,0,289,68,477,13,0,0,0,0,0,0,0,0,0,0,609,0,0,0,0,0,0,0,
+0,0,0,0,367,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,528,0,0,0,0,0,0,0,0,0,694,58,
+548,0,0,0,0,0,0,687,0,0,0,0,1749,0,0,0,0,0,0,0,0,1004,661,0,0,0,0,0,0,445,0,0,0,
+74,0,0,0,0,213,0,0,0,0,0,0,0,0,0,0,0,0,0,834,0,0,189,1672,0,0,0,0,0,0,0,1548,
+192,0,0,0,0,0,0,0,0,0,0,0,0,0,32,751,0,78,0,0,0,0,0,0,544,1602,105,473,0,0,0,0,
+0,0,156,1949,0,1779,0,0,0,0,0,0,0,0,0,0,0,763,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,
+0,0,0,883,0,0,0,0,0,0,0,488,0,617,0,0,50,0,694,1518,785,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,546,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,1016,0,0,0,577,0,0,0,0,0,0,
+184,935,114,720,0,0,100,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,95,14,0,969,0,0,0,0,0,0,0,
+727,0,1021,0,0,0,0,0,1190,0,0,0,0,0,0,0,0,0,0,0,0,0,153,0,0,0,0,0,0,0,0,0,798,0,
+587,0,0,695,42,0,1929,141,957,0,465,7,908,0,0,450,148,0,0,0,1166,0,0,0,0,0,0,0,
+0,0,0,0,0,253,0,1003,0,0,0,0,0,0,0,0,0,0,0,46,0,0,879,0,806,0,1868,0,0,0,0,0,
+1846,0,0,0,730,0,0,0,0,0,0,0,965,0,0,0,0,506,0,0,0,10,0,0,0,22,0,0,0,0,0,0,0,0,
+0,0,0,0,0,960,296,0,0,0,0,0,0,0,0,0,0,0,587,0,0,0,0,20,0,0,0,32,982,0,0,0,0,0,0,
+0,0,0,0,941,0,0,0,0,435,0,0,0,0,0,0,71,419,0,0,0,0,0,0,688,740,94,345,0,0,679,
+582,0,0,0,0,0,0,0,945,0,0,0,0,0,0,0,0,0,0,0,0,539,0,684,1993,0,0,0,659,0,583,0,
+803,0,704,0,0,0,0,0,198,181,347,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,481,405,203,0,0,99,826,0,0,0,0,0,0,0,492,0,408,0,0,0,0,0,0,0,0,0,0,4,0,0,
+0,0,665,349,137,0,0,0,0,612,1270,0,0,0,0,0,371,0,0,0,826,0,0,0,0,21,1535,858,
+374,0,0,0,0,0,0,311,0,0,0,991,1968,0,0,0,0,494,1647,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,769,0,0,0,0,0,642,0,0,157,123,0,0,0,1435,0,0,0,0,0,0,0,0,0,0,79,0,0,0,
+0,0,0,1425,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,106,393,486,1690,0,0,0,0,
+0,0,0,0,0,0,0,0,756,184,0,0,0,1382,0,0,0,175,0,1493,0,1007,0,0,0,0,0,0,0,0,0,0,
+0,219,0,0,0,0,515,99,0,851,0,0,0,0,0,1278,0,0,0,0,0,0,0,1000,982,0,762,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,910,1819,0,0,0,0,0,0,906,0,0,0,0,0,0,0,0,0,0,1730,0,0,
+0,0,0,0,0,0,0,0,0,1185,0,0,0,0,0,0,0,0,40,0,0,0,147,0,0,0,0,0,0,0,0,0,0,0,0,0,
+650,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,56,30,0,553,0,0,20,597,0,1614,0,0,0,0,0,327,
+49,0,0,0,0,0,0,0,78,0,0,786,134,0,0,0,12,496,0,0,0,0,0,0,0,0,0,0,42,204,0,614,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147,247,0,0,0,0,942,0,0,2023,0,0,0,0,
+0,0,67,285,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1309,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,41,532,0,0,0,0,0,0,0,
+1692,0,0,0,0,55,1704,0,0,0,0,988,0,0,0,223,0,0,0,0,0,0,0,57,1123,0,0,0,0,0,1764,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,0,0,0,1599,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,129,0,0,0,0,0,0,0,0,0,0,0,534,0,0,0,0,0,0,0,0,0,0,0,
+0,0,504,621,1248,321,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1397,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,441,75,0,0,0,0,0,0,0,0,0,0,841,0,0,0,0,0,693,0,650,314,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,913,0,0,0,0,0,0,0,0,0,0,0,0,0,0,880,0,475,0,
+0,1016,179,602,111,329,0,0,0,1864,0,0,0,0,846,1888,0,0,780,0,0,0,82,0,0,0,0,821,
+0,0,0,0,0,0,0,0,0,0,0,956,112,0,0,0,261,455,0,0,0,0,0,0,337,385,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,184,1865,0,0,721,16,0,486,0,0,0,265,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,621,0,0,0,0,0,0,0,0,234,0,0,815,0,0,743,
+1987,205,197,0,0,0,0,0,0,0,0,0,314,0,0,0,0,0,0,0,0,0,0,0,0,0,0,219,452,589,0,
+176,333,0,0,0,0,0,0,0,1110,47,0,0,0,0,0,0,0,0,0,0,0,864,0,0,300,0,1237,0,0,0,0,
+0,0,0,0,0,0,0,1685,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,135,395,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,631,0,0,0,0,0,0,835,0,0,0,606,459,0,979,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,612,0,0,0,0,0,0,0,0,158,372,0,854,0,0,0,0,0,
+0,0,1492,0,0,0,833,0,0,0,0,0,0,0,1739,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+195,0,0,0,0,0,0,0,0,730,1997,0,0,0,0,0,0,0,0,61,0,0,0,0,0,0,0,266,751,0,0,0,0,0,
+0,0,821,0,0,0,715,0,0,0,868,0,959,0,0,0,0,0,0,0,0,0,0,0,1053,0,0,0,950,0,1081,0,
+1595,0,0,0,0,59,0,0,0,0,0,0,0,0,0,0,47,684,0,0,0,0,0,0,1606,0,777,0,1020,0,0,0,
+1094,0,0,0,0,0,0,0,350,0,0,0,0,0,0,242,1812,0,0,0,967,0,0,0,473,286,0,0,0,0,0,0,
+798,629,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,513,337,306,0,0,0,0,0,0,0,0,0,
+146,0,0,1646,0,0,0,0,0,465,0,0,0,525,0,0,0,0,0,0,299,165,0,0,0,0,0,0,0,1064,0,0,
+0,0,0,596,0,0,0,0,0,0,0,0,0,0,0,0,0,0,238,1741,0,1233,451,1824,0,0,0,0,733,495,
+0,0,0,0,0,1204,0,0,0,559,341,0,224,21,0,0,0,0,0,0,0,0,97,1446,0,0,0,0,0,0,0,729,
+0,0,565,727,0,1948,0,0,0,519,0,0,0,0,0,0,0,0,0,1193,0,0,0,0,0,0,790,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,323,2,201,0,0,59,0,0,34,0,896,961,0,1285,0,0,46,0,479,0,0,
+0,0,549,0,663,0,0,0,0,0,783,65,682,0,0,0,0,0,11,0,0,0,0,0,522,0,0,0,52,0,0,0,0,
+0,383,0,0,0,0,0,0,0,0,127,0,0,0,0,0,397,194,0,0,635,0,0,0,0,0,0,0,0,0,0,975,0,0,
+0,0,0,0,0,0,0,0,116,0,51,0,0,858,0,1075,535,448,0,0,0,0,0,610,0,0,0,0,0,0,0,0,0,
+0,191,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,267,673,319,94,92,0,551,0,0,218,
+1406,69,256,0,0,952,1980,0,833,0,0,0,0,0,0,0,0,0,0,0,0,39,0,0,0,0,0,0,0,81,0,0,
+0,352,634,0,0,0,0,0,618,0,0,0,0,0,0,73,339,0,0,0,0,0,0,0,0,0,0,0,0,0,0,169,759,
+0,0,0,0,0,0,0,0,0,0,0,0,0,1075,0,0,0,0,0,0,482,649,0,0,0,0,0,0,0,0,386,336,0,0,
+0,1035,0,0,0,0,0,0,0,0,0,0,0,924,0,73,0,0,0,0,0,1971,0,0,0,0,0,0,0,0,0,1344,0,
+501,0,0,0,0,0,0,0,0,46,799,0,0,0,0,0,0,0,276,0,0,0,0,0,0,0,770,0,0,0,0,0,0,0,0,
+0,0,0,0,0,158,0,0,0,0,0,1432,0,0,0,0,0,0,0,0,0,0,25,0,0,2001,0,0,0,0,0,0,0,0,0,
+0,0,0,0,478,0,0,0,0,0,0,91,1461,211,602,0,0,0,0,0,0,0,0,0,1068,0,0,124,567,0,0,
+0,1006,0,0,0,0,0,0,0,0,0,735,812,0,0,323,0,0,0,304,0,0,0,0,0,0,0,0,0,148,0,0,0,
+0,0,0,0,0,0,523,0,0,144,730,0,0,981,0,0,111,0,0,132,0,0,0,0,0,0,890,0,0,0,0,0,
+444,0,1787,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,2041,932,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,937,0,995,0,0,255,0,0,138,863,965,0,0,631,0,0,0,0,1394,16,652,0,0,0,0,0,0,
+0,0,0,0,0,0,0,897,0,321,0,0,0,0,0,922,0,619,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,844,0,0,0,0,0,0,1659,0,1100,0,0,0,1173,0,1930,268,251,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,390,711,0,0,0,0,0,0,0,0,0,0,0,0,0,744,0,0,0,0,0,0,0,0,0,624,0,0,0,
+1998,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1125,0,0,0,594,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,268,0,0,0,0,0,0,0,563,0,0,0,0,0,0,0,0,2,39,0,0,0,1332,0,0,0,0,0,
+0,0,508,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,66,796,0,0,0,0,527,0,0,0,0,98,0,0,576,0,
+0,0,0,0,122,0,276,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,645,0,0,0,0,
+0,0,0,0,0,0,0,290,0,0,762,1292,0,0,0,1315,0,1955,0,0,0,0,0,0,0,0,0,0,210,131,0,
+0,0,0,797,0,38,0,11,488,0,936,0,441,0,0,0,0,0,595,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+991,0,0,0,0,0,0,0,0,0,0,0,653,0,523,0,0,0,903,0,0,0,0,0,0,0,0,0,0,0,0,80,0,0,0,
+0,0,0,0,0,0,432,0,0,314,0,0,0,0,232,1368,534,0,0,0,0,0,27,0,0,0,12,0,0,0,0,0,0,
+0,0,0,264,736,0,1657,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1117,0,127,0,0,0,1208,0,1294,
+0,0,0,0,364,0,0,0,0,0,125,1334,0,0,0,0,0,0,0,0,0,0,0,0,0,0,792,0,0,0,0,0,0,0,
+849,699,0,0,0,0,0,968,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1446,
+124,397,0,0,0,0,0,0,0,0,0,0,0,641,0,0,0,0,0,0,0,0,0,0,0,0,127,346,0,0,517,75,0,
+0,0,0,0,0,0,0,83,0,0,0,0,0,0,1031,0,0,0,0,0,0,0,1470,0,954,0,0,345,304,410,0,0,
+0,0,734,0,0,0,0,0,1822,0,0,0,1798,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,161,
+1865,69,0,0,0,0,0,0,922,0,0,0,0,0,0,0,0,0,0,0,541,0,627,0,0,0,0,0,0,0,0,0,166,0,
+0,0,0,0,0,0,0,0,849,0,0,0,0,0,0,0,717,0,0,0,0,0,0,0,0,0,0,0,0,0,0,600,0,0,0,0,0,
+0,654,0,0,188,273,0,0,0,543,0,410,87,0,0,941,0,0,186,250,0,1785,0,0,0,0,0,1339,
+462,961,0,780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,529,0,0,0,0,0,0,474,1276,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,24,948,0,0,0,0,657,753,0,0,0,0,941,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,706,985,837,0,1861,0,0,0,0,0,0,0,0,0,0,0,0,0,0,292,933,0,0,0,0,0,
+0,0,0,0,767,0,0,0,0,0,0,0,641,0,0,0,1233,114,0,883,0,274,2008,0,1794,285,0,0,
+571,0,0,0,0,0,0,0,0,0,0,823,960,16,617,0,431,0,0,0,0,0,0,0,0,0,0,567,0,401,0,2,
+781,424,33,0,2006,0,0,274,0,0,1882,0,794,0,0,0,1848,0,0,0,0,0,0,448,47,0,0,0,
+1199,0,0,0,0,0,0,0,0,417,0,0,0,0,0,0,0,0,0,0,295,0,0,0,0,0,0,0,1019,0,0,0,0,0,0,
+0,0,0,0,0,0,0,620,0,0,0,0,464,0,0,0,0,208,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,442,0,930,0,0,0,0,0,516,68,0,0,0,0,0,1128,104,0,0,0,0,0,0,0,0,787,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,491,0,0,0,0,0,0,711,0,0,9,0,101,441,0,0,0,0,0,0,0,0,
+0,0,160,396,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,679,326,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,1128,0,0,0,0,0,737,0,1796,0,0,0,0,0,0,0,0,0,0,0,0,338,574,0,0,
+0,0,0,1096,491,405,0,0,0,0,0,1081,0,0,0,0,0,0,0,0,0,0,0,0,0,1676,0,1207,0,0,0,0,
+0,0,969,354,0,0,0,0,598,0,297,0,0,0,0,0,0,0,0,1772,751,0,37,0,0,1828,0,0,0,0,0,
+0,0,0,0,257,191,582,0,0,0,0,0,0,790,0,0,0,0,0,47,0,0,0,0,0,0,0,449,306,1011,0,0,
+0,0,0,299,0,0,0,0,0,0,837,0,0,0,0,0,0,10,329,0,0,0,0,0,1320,0,0,0,0,0,0,158,657,
+0,1191,0,0,0,0,0,0,7,0,974,1939,0,1665,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,288,
+66,0,0,0,0,494,175,0,1643,0,0,0,0,0,0,0,0,570,750,719,0,0,0,0,0,0,0,0,0,0,0,0,0,
+13,0,0,1247,0,0,221,356,0,0,0,0,0,0,0,0,0,0,694,1809,0,0,0,0,0,0,0,411,0,44,31,
+0,0,0,0,669,0,673,0,0,0,0,0,0,0,0,0,1303,704,299,0,0,0,275,0,0,216,1761,0,0,0,0,
+0,0,0,0,0,0,0,1319,0,0,428,0,0,0,0,0,0,0,0,0,0,514,0,0,0,0,0,0,49,55,102,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,364,0,0,0,0,379,0,921,971,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1258,0,0,0,1058,0,0,0,0,0,656,0,0,0,0,0,144,0,0,0,0,0,0,0,0,0,0,
+0,1373,10,605,0,0,0,0,0,0,0,838,0,1012,0,0,0,0,0,0,0,0,0,0,0,0,0,0,154,365,0,0,
+0,0,0,0,0,0,0,340,0,0,0,0,0,810,0,0,0,0,0,0,495,0,0,0,0,0,0,0,0,0,261,0,535,248,
+0,358,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,567,445,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,697,0,0,0,1336,0,0,0,0,0,0,0,0,917,174,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,972,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,351,0,0,0,0,0,0,0,0,0,0,
+0,0,0,286,0,0,56,438,0,0,0,0,0,1950,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,738,0,0,0,0,0,
+0,0,0,0,0,969,2047,0,0,0,0,0,0,0,818,0,0,0,0,0,0,0,866,0,0,0,0,0,0,0,1467,0,0,0,
+0,0,0,0,0,0,0,0,0,0,972,0,355,0,0,0,116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,267,189,104,0,0,0,0,1613,0,0,0,0,0,0,0,116,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,886,0,86,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45,0,0,863,0,0,0,0,0,
+0,0,1953,450,1773,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,381,0,0,0,0,0,0,0,
+0,0,0,0,0,1142,0,1189,0,0,0,663,0,0,0,0,0,0,0,846,0,0,528,0,393,378,0,0,0,0,0,0,
+325,899,680,1880,0,1770,0,0,0,0,0,648,0,0,0,0,0,0,185,167,0,2046,0,0,0,0,0,0,
+249,1645,0,152,0,0,0,1733,0,0,0,0,0,1006,0,0,0,0,0,420,0,0,0,832,0,0,0,0,0,351,
+0,0,0,0,6,40,0,0,60,0,0,0,0,1354,745,724,0,0,0,0,0,0,0,0,772,1951,275,108,639,0,
+0,0,0,0,0,0,0,0,500,1758,0,0,0,0,0,0,0,0,0,0,0,1886,711,205,0,0,965,865,0,0,0,
+534,0,0,0,0,691,0,0,0,237,443,0,878,0,0,0,0,0,1410,0,0,0,0,0,0,0,0,0,0,0,0,0,
+995,0,0,0,0,0,0,0,0,0,0,0,0,0,578,0,0,0,0,881,0,0,0,0,0,0,0,0,822,0,923,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,924,0,0,0,665,0,0,0,0,0,1901,0,0,0,0,0,950,498,93,
+0,0,0,1451,0,0,0,0,0,747,828,788,400,184,0,198,0,0,0,0,0,0,0,0,0,0,0,994,0,0,0,
+0,0,0,0,0,615,320,0,0,0,978,843,905,0,0,0,0,0,0,0,0,850,974,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,509,0,0,0,0,0,273,0,0,0,0,0,0,0,0,0,0,0,0,0,
+201,0,0,0,1041,0,0,0,1040,0,0,0,0,0,0,0,0,0,693,234,774,0,336,0,1399,22,0,805,
+802,777,167,789,0,0,1705,0,0,0,0,0,0,0,0,0,0,0,10,13,11,0,0,204,264,0,0,56,0,0,
+1917,0,470,0,0,0,0,0,0,0,0,0,0,0,1198,0,0,0,0,0,0,0,0,0,0,1015,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,715,0,0,1002,0,0,0,298,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,867,0,0,724,0,0,0,0,0,0,0,0,0,0,0,0,768,0,0,0,0,0,1066,0,0,0,0,67,0,174,948,
+0,0,0,0,0,0,0,0,0,0,0,0,0,764,0,0,0,0,75,137,0,756,0,0,0,0,0,0,1008,842,643,0,0,
+0,67,0,0,0,0,0,0,0,0,0,0,0,135,821,0,0,0,0,0,0,0,0,736,0,389,355,0,0,786,0,0,0,
+0,0,0,2044,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1030,0,0,0,1083,0,0,0,0,0,
+1226,0,0,0,0,356,319,8,389,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,474,0,0,0,427,
+0,413,0,730,0,0,0,0,0,373,0,0,0,0,0,0,0,0,0,799,0,0,0,1793,0,0,0,322,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,89,290,2,0,0,0,0,0,0,0,0,0,0,672,
+699,1860,0,0,0,737,0,0,0,1612,0,0,0,0,0,0,0,0,0,0,0,145,124,884,0,0,0,0,0,387,0,
+0,0,0,0,0,0,0,0,0,0,679,0,550,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1305,0,0,0,0,0,0,0,
+576,0,0,0,0,0,0,0,686,0,607,0,0,37,0,0,0,0,0,0,0,0,0,101,1726,0,0,0,0,0,958,0,0,
+0,903,0,0,0,0,147,0,0,0,0,0,0,0,0,0,0,0,367,0,0,0,0,690,0,705,273,0,0,887,0,0,0,
+0,0,0,0,0,0,0,0,90,0,0,0,0,0,0,0,908,0,0,0,0,0,0,0,1261,0,0,497,1235,0,429,0,0,
+0,0,904,0,12,125,0,0,0,841,0,0,0,0,0,860,946,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,768,0,770,160,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,271,0,0,0,0,0,0,0,719,0,699,581,0,0,0,0,0,0,0,0,0,0,862,304,0,631,0,0,0,0,880,
+1513,0,0,0,0,0,981,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,434,0,0,0,0,0,550,0,0,476,930,
+824,553,0,0,452,0,151,0,0,0,0,0,0,772,0,292,135,0,0,0,0,0,0,0,504,0,0,1089,0,0,
+0,0,0,0,0,0,0,0,0,783,0,0,0,0,0,0,206,393,0,0,0,0,0,0,0,0,232,912,0,0,0,0,0,977,
+0,0,716,98,0,0,0,0,0,733,0,0,0,0,0,0,0,0,19,0,0,0,0,668,0,360,0,0,0,0,0,0,656,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,726,0,0,0,0,0,0,0,0,0,0,0,0,72,0,0,1269,0,0,463,0,
+0,0,0,0,0,1454,0,1287,245,0,989,0,0,0,0,0,0,0,0,0,107,164,0,0,0,0,0,0,0,1061,0,
+0,0,0,2,484,0,0,0,0,0,0,0,1127,0,0,0,0,0,0,0,460,0,0,0,0,0,932,0,0,0,0,0,0,0,
+588,625,0,0,0,0,76,92,0,0,0,0,0,0,0,0,0,0,0,0,0,104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+763,0,622,0,0,0,253,0,546,0,0,110,0,256,916,0,0,35,212,0,0,746,0,0,0,150,0,0,
+1466,0,0,0,1299,0,0,0,0,0,0,0,0,0,1518,0,0,0,0,0,0,0,0,0,0,0,0,0,1229,0,0,0,816,
+0,0,0,0,0,0,159,0,0,0,0,0,734,869,126,1716,0,0,0,0,0,0,202,232,0,0,0,0,212,0,0,
+0,0,0,111,1003,0,0,0,0,0,0,0,0,0,0,0,1712,0,0,216,0,0,0,0,516,0,0,0,0,0,650,0,0,
+0,0,57,99,0,0,0,0,300,574,0,0,0,0,1023,0,0,302,0,1871,0,728,252,0,0,461,0,0,0,
+323,0,0,0,0,0,0,775,461,0,0,0,0,0,0,172,0,0,464,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,73,727,0,1023,0,0,0,0,0,0,0,0,0,0,577,0,0,0,0,0,0,0,0,1037,0,0,0,0,0,0,
+0,0,280,677,0,0,0,0,0,0,0,0,0,0,0,799,0,0,0,0,159,0,446,1730,0,0,0,0,0,0,0,0,0,
+395,0,0,0,0,145,0,0,0,0,0,0,0,20,0,0,426,608,0,0,0,0,0,977,0,250,0,0,0,0,0,100,
+0,0,0,0,1982,0,0,0,0,0,476,0,0,0,0,0,0,594,76,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,447,0,0,0,0,526,0,0,14,1124,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,188,0,0,0,0,0,0,0,0,362,301,0,0,0,1743,0,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,872,0,831,0,0,208,202,0,0,0,0,0,0,0,1954,0,
+0,0,0,516,872,0,0,313,224,0,0,24,0,11,546,0,0,0,1937,242,241,46,0,0,0,830,1273,
+0,0,0,0,0,0,0,825,327,1006,0,0,0,0,0,1580,516,366,0,0,0,0,0,1736,0,0,0,0,0,0,0,
+0,0,0,0,1935,0,826,0,0,0,0,139,331,0,0,0,0,0,0,0,0,0,0,0,288,0,916,0,0,0,0,0,
+1888,0,0,0,0,0,0,0,1471,0,1570,0,394,0,0,0,0,0,0,0,1931,0,1719,0,658,228,0,0,0,
+0,0,374,0,0,0,0,735,0,0,0,0,0,0,323,498,0,1063,0,0,0,0,155,0,0,0,0,0,0,0,0,906,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1139,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,108,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,616,
+902,0,0,0,0,0,692,0,0,0,0,0,0,823,0,0,0,305,0,0,0,0,0,0,0,681,0,0,0,0,0,214,
+1004,0,0,0,0,0,0,0,23,0,0,1703,0,0,0,0,0,0,0,0,0,1443,0,0,19,714,0,0,0,0,64,737,
+0,0,345,1758,0,0,579,47,0,0,539,139,0,0,0,0,388,0,0,0,0,253,0,0,0,0,0,0,252,0,
+745,0,0,0,0,0,0,0,0,0,0,0,504,107,0,871,0,0,0,229,0,0,0,0,0,903,0,0,71,0,0,549,
+6,47,0,0,0,0,0,0,0,0,0,980,865,705,0,0,0,161,0,0,0,0,143,1331,0,0,0,1388,33,724,
+0,0,0,19,0,0,0,395,0,0,0,0,0,846,210,0,0,0,122,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,695,937,497,0,0,0,0,0,718,0,0,0,0,0,0,0,1581,0,
+0,0,0,0,0,161,49,0,0,0,0,0,0,0,0,0,597,0,0,0,1094,0,0,0,811,908,0,0,0,0,0,0,0,0,
+0,0,1471,0,0,0,0,0,0,0,0,0,0,42,1935,0,0,0,2014,66,2007,0,0,586,0,0,0,0,0,0,0,0,
+0,28,1077,0,0,0,1221,0,0,62,0,0,0,0,0,0,0,0,0,0,1766,0,0,0,0,0,0,0,0,0,0,0,0,25,
+0,499,1388,0,0,97,10,0,0,0,0,0,481,0,0,0,0,0,0,0,0,0,0,37,134,155,486,0,1442,0,
+0,0,0,0,591,0,0,0,0,0,0,310,1173,0,0,0,0,409,1156,0,0,0,482,0,0,263,926,0,0,0,0,
+0,0,0,0,0,0,0,0,0,804,0,0,0,0,0,0,0,0,0,0,0,0,0,1265,0,415,0,348,0,0,0,1012,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,165,1803,0,0,0,0,0,0,0,408,
+0,0,0,0,0,0,257,1321,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1138,0,0,0,249,0,
+0,0,576,0,0,0,0,231,0,0,0,288,0,0,0,0,0,0,0,0,0,433,1487,569,1678,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,0,0,0,0,779,538,0,0,0,413,0,0,0,
+0,0,0,0,0,0,0,495,0,0,0,0,0,191,54,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,530,567,
+0,0,0,0,0,1484,0,0,0,0,0,0,815,609,0,0,0,0,0,484,0,0,0,0,0,0,0,0,0,0,900,0,0,0,
+0,1335,0,1724,0,0,0,0,0,0,0,0,0,0,0,640,0,0,0,0,0,0,0,0,0,0,0,1831,0,0,0,0,0,0,
+0,0,0,0,0,0,0,474,0,0,0,0,0,0,0,0,0,1103,0,1504,655,1034,0,0,0,0,0,305,0,0,0,0,
+0,0,0,0,0,1236,0,0,429,217,0,0,0,0,739,278,0,0,0,0,0,0,0,708,0,0,0,0,0,1840,233,
+0,0,0,0,0,0,0,0,2017,0,0,0,0,0,1488,0,0,0,1590,0,0,0,0,0,1800,28,0,0,0,0,0,0,0,
+0,0,45,0,36,0,22,1442,378,0,0,0,0,0,0,1507,0,0,0,0,0,0,0,0,0,0,39,0,0,1054,725,
+1955,0,2036,0,0,0,0,0,0,0,0,0,0,896,1871,0,0,0,0,0,0,0,0,0,0,805,0,0,0,0,2046,0,
+0,0,0,17,712,0,617,55,320,271,0,0,0,0,0,0,0,0,0,445,0,184,103,0,0,0,0,0,0,0,0,
+659,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,676,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+337,0,0,0,506,0,0,0,0,0,843,77,0,458,0,0,0,0,0,1420,382,109,142,330,0,0,0,0,0,0,
+0,0,0,0,0,0,87,0,0,0,492,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1239,0,0,0,0,0,0,
+211,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1049,0,321,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1985,0,0,122,0,0,234,0,0,0,1098,0,0,0,0,0,0,549,253,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,522,131,0,0,149,0,0,0,0,0,0,0,0,0,0,0,0,0,0,507,0,0,0,0,811,630,0,0,0,343,
+0,0,0,0,0,448,591,455,0,1381,0,0,0,0,0,0,0,575,0,0,0,0,0,1175,0,0,0,0,0,0,0,0,0,
+653,0,0,0,1761,0,1198,0,0,0,0,297,1127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,678,0,0,
+164,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35,0,45,0,0,0,0,0,121,0,0,0,0,0,0,
+0,0,125,0,0,0,1622,0,0,0,0,0,721,145,0,0,0,970,792,0,0,0,715,0,0,0,0,0,1999,0,0,
+74,531,0,0,65,0,0,0,105,220,0,0,0,0,0,0,0,960,0,0,0,0,0,0,428,19,0,0,401,96,0,0,
+0,0,0,1595,116,0,1021,0,0,0,0,0,750,1961,0,0,148,0,0,0,0,0,0,0,0,0,0,0,0,0,75,0,
+0,1383,0,0,0,0,0,0,0,0,0,0,0,0,0,0,779,0,0,0,0,0,0,0,0,598,0,424,0,0,0,0,0,0,0,
+1222,0,0,0,876,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,133,0,0,0,0,187,0,8,0,0,0,0,0,
+0,0,429,0,685,0,0,0,0,0,0,0,0,0,0,0,132,472,0,0,0,0,0,0,0,0,0,938,0,0,874,0,0,0,
+0,0,774,0,0,0,0,0,92,0,0,0,0,0,0,830,701,0,0,0,0,0,426,350,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,603,59,0,0,0,0,0,0,0,0,0,0,293,0,0,0,0,0,0,0,0,0,0,0,0,0,0,441,163,4,0,
+0,0,0,0,0,0,0,0,806,0,0,0,0,0,0,233,0,0,0,0,1994,0,1739,0,0,393,0,47,1038,0,0,0,
+309,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,363,0,0,0,175,0,0,0,0,0,0,0,666,
+0,0,1675,0,1600,0,0,0,808,0,0,0,0,0,0,0,0,0,0,0,280,54,0,0,0,0,0,0,0,0,421,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,249,0,0,103,254,0,262,1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,805,0,0,0,0,0,0,0,0,0,1630,0,0,0,0,0,0,0,0,0,0,0,0,0,671,972,989,0,0,
+0,0,0,0,0,889,0,0,0,1382,0,0,0,0,0,0,0,775,0,0,0,0,0,0,0,0,0,0,388,202,0,0,0,0,
+16,560,0,0,0,841,0,0,566,0,0,0,938,0,0,0,0,0,0,0,0,0,0,912,0,0,0,1361,0,0,0,0,0,
+0,618,236,0,1854,0,0,318,190,0,1376,0,0,0,0,0,0,0,349,0,0,0,0,951,1972,0,0,0,0,
+0,0,344,0,0,0,0,0,0,0,0,850,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,910,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0,163,85,0,487,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,145,0,83,0,0,1013,0,0,0,1922,0,0,169,557,66,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1193,82,0,352,454,57,0,0,1333,396,107,0,370,0,0,0,0,0,0,0,0,0,204,0,0,0,
+0,0,1706,0,0,0,0,0,0,0,0,0,0,0,0,394,1204,0,0,0,0,0,1007,0,0,0,1696,0,1519,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,981,0,0,0,0,1072,0,0,0,712,0,1629,0,0,0,0,0,0,0,728,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1271,0,0,0,1608,16,0,0,0,0,485,0,0,0,0,0,0,
+153,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1991,0,0,0,0,0,0,0,0,52,0,21,0,
+0,0,0,0,0,0,0,0,819,0,0,0,0,0,917,0,0,0,0,784,0,0,0,0,135,0,0,0,0,0,454,0,0,0,0,
+0,0,0,0,0,852,1719,0,0,0,0,0,852,0,0,0,0,0,952,0,0,0,0,568,0,0,0,0,0,448,0,0,0,
+67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1826,657,0,729,666,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+669,0,0,0,0,0,0,0,402,0,0,152,0,0,0,0,912,0,0,0,0,0,0,51,320,0,445,0,0,0,0,308,
+0,0,0,0,0,386,0,0,239,0,0,130,83,0,143,0,348,0,0,0,0,0,0,0,958,0,0,0,0,0,210,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,430,0,0,0,0,0,0,0,0,0,0,0,0,7,213,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,801,0,0,0,0,0,0,0,0,0,936,0,108,0,0,
+0,0,0,0,0,0,0,885,587,219,398,364,0,1165,0,0,342,241,303,0,0,0,0,0,0,0,0,0,0,
+1454,0,0,0,0,0,0,0,0,0,0,254,562,0,786,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1294,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,493,216,0,0,0,0,219,341,0,0,0,0,0,
+0,0,0,0,0,130,1734,154,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,701,604,0,0,879,0,195,
+666,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1669,0,0,0,1791,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1228,0,0,0,0,0,623,0,0,0,0,0,0,0,798,0,0,0,0,0,0,0,0,0,0,0,0,84,
+122,0,0,0,837,0,0,0,0,0,0,1013,0,0,577,0,0,0,460,932,0,0,0,0,0,0,0,0,0,0,0,31,
+131,0,0,0,605,0,0,0,1246,0,0,0,0,68,278,165,307,781,0,0,0,0,0,0,33,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,1113,0,0,720,1953,203,0,0,0,0,0,0,0,425,326,0,0,0,0,0,
+0,0,0,0,0,241,1316,0,0,0,0,0,416,0,0,0,1300,0,847,0,0,662,358,0,0,0,0,839,1823,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,654,1522,0,0,0,0,0,0,163,0,0,0,0,0,314,978,0,0,0,
+601,0,0,0,0,0,946,434,0,0,0,402,411,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,1467,
+410,0,0,0,0,0,0,0,0,0,0,0,0,0,0,483,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,677,0,0,0,0,0,0,0,0,0,0,0,0,70,0,0,0,0,1405,0,0,0,0,0,0,108,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,777,0,0,0,0,0,747,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,505,0,326,0,0,164,628,654,0,0,0,
+37,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,668,152,0,0,0,0,0,0,0,0,0,0,0,581,
+0,0,0,0,44,126,89,0,0,0,0,0,0,0,0,1531,0,0,0,0,0,0,0,0,203,1167,0,0,0,0,0,0,0,0,
+531,1232,0,0,0,0,0,943,0,670,231,880,0,1617,0,0,0,1957,0,0,0,0,0,0,0,975,0,0,0,
+0,0,0,0,0,0,0,0,242,0,0,0,0,0,0,0,0,0,421,0,0,14,834,0,0,0,0,0,0,0,0,0,0,0,0,
+465,0,0,0,0,0,834,688,413,855,0,0,0,590,0,0,0,0,0,0,0,0,114,0,0,0,0,0,0,0,0,0,0,
+0,45,169,0,0,0,0,0,0,0,0,0,0,0,198,0,0,565,585,0,0,0,0,0,0,0,0,0,0,0,0,0,691,0,
+0,0,593,0,0,0,0,0,0,0,0,0,913,116,0,0,0,0,1360,0,0,0,802,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,673,308,0,709,1006,1895,0,228,0,0,0,1840,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,608,0,0,0,0,0,0,0,0,0,1573,0,2039,136,540,0,0,0,0,0,0,0,
+897,0,0,938,1878,0,0,0,0,0,0,0,0,0,1469,0,999,0,299,0,0,0,0,0,0,0,578,0,0,0,0,0,
+456,0,0,0,1679,163,693,0,0,0,0,0,0,48,755,0,0,0,0,0,0,0,0,0,0,0,0,338,0,0,0,0,
+1091,0,0,0,0,695,0,0,1464,0,0,0,0,0,975,0,0,335,0,0,1979,0,0,0,0,269,1566,630,
+396,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1815,634,0,0,0,966,0,0,0,0,0,0,0,9,
+412,0,958,0,0,579,382,0,212,0,0,0,0,965,681,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,655,
+0,0,0,0,67,0,0,0,0,0,0,751,0,0,0,0,423,231,0,0,1016,300,0,0,0,0,100,237,0,0,0,
+1370,0,0,0,1208,0,0,0,0,0,1219,129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,199,0,0,427,0,0,
+0,0,949,665,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,712,0,0,0,0,0,1186,0,0,0,0,0,0,0,0,0,0,295,312,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+151,0,0,0,0,588,4,0,0,0,0,0,414,104,0,0,757,263,0,561,0,0,0,320,0,0,0,0,0,0,0,0,
+0,0,0,225,0,0,0,0,37,817,0,974,0,0,0,0,0,0,0,0,0,0,0,0,0,2026,131,235,16,0,590,
+1157,0,0,0,0,0,0,0,0,221,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,140,390,0,0,0,0,
+0,0,0,1144,0,0,0,464,0,0,0,0,0,0,0,0,0,0,0,0,204,407,303,1218,0,0,0,0,5,325,0,0,
+0,0,12,800,0,1783,0,0,0,0,0,0,0,0,0,0,504,621,0,0,0,0,0,0,0,0,0,920,0,376,0,0,0,
+0,0,218,580,0,768,454,0,0,0,0,0,0,0,0,0,0,0,0,676,0,0,0,0,0,0,164,0,0,0,0,0,0,0,
+0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,120,285,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,226,343,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,1812,0,0,8,0,0,0,21,1125,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,1327,0,0,0,0,575,1598,0,0,0,0,0,0,0,0,0,895,0,0,0,959,0,0,
+0,0,0,1759,173,0,0,0,0,266,261,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1427,0,0,300,1033,0,0,0,0,0,0,0,0,0,0,0,584,0,0,0,0,52,734,
+0,0,217,239,0,1129,0,0,0,0,0,0,0,0,732,20,0,0,0,0,0,0,0,0,0,0,0,418,0,0,0,613,0,
+0,0,0,0,0,0,0,0,632,0,0,85,984,0,0,0,0,909,694,7,1109,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,167,0,0,0,0,280,62,0,0,33,0,0,359,186,980,0,0,0,0,0,0,0,0,0,0,0,585,0,0,0,
+211,0,0,336,145,0,1130,0,873,0,0,840,263,0,0,0,0,0,0,0,0,0,916,0,0,0,0,0,0,0,0,
+0,0,155,0,0,0,461,97,0,0,0,0,0,1356,0,0,0,0,0,0,0,593,0,0,0,0,0,1392,0,0,0,0,
+126,0,0,0,0,1179,0,0,0,0,0,162,0,0,0,0,0,765,0,187,0,1286,0,0,0,0,0,0,0,0,0,635,
+0,0,23,215,0,0,0,1306,0,0,97,716,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,657,0,
+0,0,0,0,0,0,0,299,0,0,0,0,0,0,134,0,0,0,0,0,0,0,0,0,0,0,658,1082,0,0,0,0,0,2002,
+0,0,0,0,0,0,833,248,0,0,0,0,0,1654,0,0,531,0,0,0,0,0,0,634,0,0,0,0,0,0,0,0,0,
+853,573,249,0,0,0,0,0,0,0,0,527,0,0,0,0,1419,0,0,0,0,0,0,20,49,0,0,0,992,0,0,0,
+728,0,0,0,0,0,0,0,0,0,0,0,0,497,1579,0,0,0,0,62,268,0,0,0,0,0,0,0,1201,0,0,0,0,
+0,0,0,0,0,0,0,0,495,193,0,0,0,0,106,0,0,859,0,0,23,0,0,0,0,0,0,0,813,925,0,0,
+223,613,953,0,0,0,0,0,0,0,0,666,0,0,0,0,0,0,0,0,0,670,0,0,40,216,0,0,0,0,0,0,
+259,0,0,0,440,1114,0,0,0,0,0,0,0,0,74,475,0,0,188,139,0,797,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,1572,0,0,0,0,39,0,0,0,0,0,0,0,0,0,0,0,0,1594,0,0,0,0,0,0,0,290,0,232,
+0,0,887,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,521,14,0,0,0,0,0,741,0,0,0,992,0,
+0,0,0,0,0,0,0,111,0,0,425,0,0,0,0,0,789,0,0,0,1593,0,1768,0,0,233,0,0,0,0,943,0,
+0,0,0,0,0,0,955,225,245,0,0,0,0,0,0,241,0,0,0,0,1943,0,0,0,1284,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,709,0,0,0,0,0,0,554,0,0,0,0,0,0,0,0,1564,0,0,0,
+443,0,0,0,0,0,0,280,0,0,0,0,0,0,0,0,729,0,0,0,348,0,0,0,0,0,0,0,758,848,298,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,829,1422,189,121,0,0,632,812,0,0,556,0,0,0,0,0,436,172,
+530,844,232,984,0,0,0,0,0,0,0,0,0,0,147,0,0,0,0,0,0,0,0,537,0,0,0,0,0,859,0,0,
+842,0,0,0,0,0,0,0,0,0,0,1291,0,0,0,0,0,0,0,0,0,0,0,1482,612,392,0,0,0,262,31,0,
+0,0,0,0,0,0,0,0,0,753,549,0,0,0,0,0,0,696,0,0,0,0,0,0,0,834,0,0,0,0,0,771,0,0,0,
+0,0,0,0,0,0,0,0,0,0,921,0,0,0,674,0,0,0,0,0,0,0,0,0,0,308,444,0,0,0,0,0,0,805,
+180,0,0,278,271,0,0,214,505,0,1215,0,0,0,0,0,0,387,271,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,1645,42,92,0,459,0,0,330,1557,0,0,0,0,0,0,0,0,113,18,0,0,0,
+1742,0,0,0,965,0,0,0,0,0,0,0,0,0,0,0,0,0,182,0,0,65,0,0,0,0,0,0,0,0,0,0,0,0,973,
+0,0,0,0,0,328,0,0,588,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1786,
+0,0,962,1985,0,0,0,308,508,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,588,0,0,0,0,0,0,614,793,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,290,0,0,0,0,0,0,0,0,0,0,1136,0,0,0,0,0,0,0,0,0,0,796,719,0,0,
+326,210,0,0,0,701,758,472,0,0,0,1947,278,1079,0,0,0,0,0,0,497,41,0,0,634,46,961,
+0,810,524,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0,532,0,997,0,0,0,0,0,0,0,0,0,0,0,1301,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1298,0,671,0,0,0,306,0,0,0,0,0,0,0,0,0,0,
+693,1823,0,0,0,759,0,0,0,0,0,1932,0,0,0,0,0,0,0,0,0,0,0,0,0,0,88,182,0,0,0,1964,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,521,0,0,0,0,0,0,424,857,0,0,0,0,671,328,0,
+529,0,0,0,0,0,716,0,1509,80,67,0,0,0,0,59,141,0,0,0,0,0,0,783,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1498,0,0,0,0,343,430,803,1183,677,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1357,53,0,0,0,0,590,0,0,0,0,0,0,0,0,0,0,
+0,0,0,329,0,0,0,0,0,0,0,469,0,0,0,0,0,0,0,0,0,0,460,0,0,1743,0,0,963,340,0,0,0,
+0,0,1603,0,0,250,0,0,0,0,0,646,218,0,1794,0,0,0,571,0,455,0,0,0,1012,0,0,0,0,0,
+0,0,0,0,0,0,0,597,161,0,349,0,524,0,0,0,0,0,0,0,0,0,0,0,0,322,432,0,0,0,0,0,0,
+325,223,0,0,0,0,0,566,0,0,0,1394,481,436,0,48,457,610,756,618,0,0,0,755,0,1217,
+0,0,0,0,0,197,0,0,0,0,0,0,0,0,0,0,0,0,0,0,544,492,107,414,0,0,0,0,0,0,0,0,0,0,0,
+1007,0,0,0,0,5,0,0,1580,0,0,0,0,0,0,0,0,0,0,0,0,0,673,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1843,0,0,0,0,0,0,0,0,0,165,0,0,0,0,0,0,809,885,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,498,0,0,0,306,9,0,0,0,0,0,0,0,437,721,146,0,0,0,0,0,0,0,0,0,0,0,177,0,0,0,0,
+0,0,0,1377,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,200,0,959,0,0,0,1928,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1435,0,481,0,0,0,0,0,0,142,84,0,0,0,0,0,
+1015,0,0,0,315,0,0,0,0,0,0,759,0,0,0,0,0,0,0,0,712,0,0,0,1722,0,0,0,0,0,0,0,0,0,
+0,0,0,222,0,985,1414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1273,
+538,706,0,0,0,0,0,0,0,0,115,0,0,0,0,0,0,0,0,0,0,1781,0,0,0,0,0,431,97,665,42,
+237,0,0,0,264,0,0,213,0,0,0,0,0,0,0,455,0,0,0,906,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+624,0,574,0,0,0,0,0,0,0,0,0,0,0,0,354,0,0,0,1558,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,
+235,723,1813,0,0,0,957,0,830,0,0,0,0,0,0,0,0,0,0,0,0,23,0,0,496,0,0,0,0,0,0,0,
+547,239,88,0,0,0,0,0,0,0,0,0,1310,0,0,0,0,0,0,0,0,80,1076,0,0,118,0,0,0,479,274,
+0,0,0,0,0,0,0,0,0,0,0,497,0,0,669,261,0,0,0,0,13,0,0,0,0,0,0,791,250,642,0,0,0,
+1429,939,949,0,0,0,0,0,0,0,0,0,0,0,0,0,818,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,982,330,0,0,0,0,545,0,0,0,0,0,0,947,0,1188,0,0,0,0,0,904,0,0,0,0,0,1372,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,693,377,0,0,0,0,0,0,0,0,0,0,0,0,0,0,695,0,0,
+713,386,0,0,0,0,128,1575,0,0,0,0,0,0,424,893,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,904,0,0,0,0,0,552,322,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,1808,49,0,0,0,0,
+1832,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,421,0,0,442,415,0,0,289,
+0,0,0,0,0,206,110,0,0,0,0,0,205,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+19,1539,0,0,0,0,0,1340,0,1194,0,0,0,0,0,0,0,0,549,0,0,0,0,0,0,0,0,1720,0,0,0,0,
+0,0,0,0,0,319,0,0,0,0,112,1180,0,0,0,0,0,0,0,0,0,0,0,967,0,0,0,0,0,0,0,0,0,1940,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,735,0,0,0,0,0,0,0,0,0,897,132,0,0,0,0,0,0,0,
+0,0,0,38,838,0,0,0,379,218,8,660,1017,0,0,0,0,0,0,111,387,647,877,0,0,53,790,0,
+0,0,0,0,0,0,0,458,0,0,0,0,0,0,954,0,0,0,394,0,1367,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,882,0,0,0,0,0,0,0,1409,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,38,124,342,199,0,0,0,0,
+0,0,0,0,0,0,724,628,0,0,0,0,804,266,0,0,0,0,0,208,0,79,0,0,0,0,0,0,0,0,741,0,0,
+0,0,0,0,0,0,0,0,606,0,1494,821,1553,0,0,135,405,0,0,178,100,0,0,0,0,0,0,0,0,0,0,
+0,0,0,481,0,0,0,1378,0,0,0,0,0,0,0,0,0,0,0,0,0,791,33,1227,857,0,467,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,447,0,0,0,0,0,0,86,128,0,0,0,0,0,0,587,0,0,0,692,1018,0,
+195,0,0,0,0,0,0,0,1546,0,0,0,0,0,0,0,0,0,0,0,684,0,0,345,0,0,0,0,0,0,365,0,1683,
+0,0,472,0,433,0,0,0,0,0,0,0,28,0,0,0,997,0,705,3,0,0,0,0,0,0,0,0,0,229,0,0,0,0,
+102,0,0,0,0,866,1022,0,0,0,0,0,0,0,0,0,55,0,115,0,0,0,0,933,0,0,0,0,0,0,0,702,0,
+0,0,0,0,0,0,1728,26,484,0,0,0,185,618,417,0,803,0,0,0,0,0,0,0,0,0,0,0,1262,0,0,
+0,0,0,0,0,0,0,0,0,0,0,633,0,0,0,0,0,0,0,0,0,0,0,0,0,479,262,0,0,0,0,0,0,830,0,0,
+0,0,26,70,0,0,0,0,0,0,0,0,217,0,640,51,0,0,360,1586,0,0,0,0,0,652,0,0,0,0,0,766,
+0,0,0,0,298,737,0,0,0,0,0,0,0,0,0,0,655,222,906,0,0,1013,991,2009,0,0,0,0,503,0,
+0,0,216,154,0,0,0,716,0,844,0,0,0,0,621,252,0,0,0,0,748,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,103,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,576,0,0,0,648,0,0,0,331,0,0,0,
+0,0,0,0,0,0,0,0,0,632,0,0,0,518,107,0,0,0,0,0,0,0,0,851,0,0,0,0,504,0,0,0,0,0,0,
+0,0,0,0,0,0,7,883,0,0,0,0,0,0,0,922,0,0,0,0,0,0,0,0,91,993,0,0,0,0,0,0,200,131,
+10,0,0,0,0,0,0,0,0,0,0,0,0,0,365,1433,0,0,0,0,28,103,0,0,798,1013,0,0,0,0,0,0,0,
+0,39,1925,0,853,0,0,271,519,0,0,0,0,338,0,0,300,470,419,0,0,0,0,0,0,836,0,0,0,0,
+0,0,1937,0,0,0,0,0,393,0,0,357,0,0,0,0,0,703,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,387,0,0,0,0,0,0,75,708,453,1351,0,303,0,0,772,0,0,0,0,0,0,0,0,749,0,0,
+0,0,0,0,0,0,0,0,0,0,0,1065,0,0,717,226,0,0,0,0,0,890,431,626,0,0,0,0,706,0,0,0,
+51,698,0,0,0,0,0,0,0,0,0,0,0,828,0,0,17,0,0,0,0,1929,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,84,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27,871,498,0,101,1793,0,0,0,0,0,0,435,0,
+0,0,0,0,966,0,129,1644,0,0,0,0,0,0,0,0,0,0,0,0,0,997,502,0,0,0,0,0,0,0,0,0,0,0,
+0,823,0,1927,0,0,0,0,98,1756,0,0,0,0,0,0,0,0,0,0,0,0,8,0,160,1046,0,492,0,0,0,0,
+0,0,129,45,0,0,0,0,0,0,353,558,0,0,0,0,0,785,0,0,0,1145,189,0,0,0,26,353,0,0,0,
+0,0,2024,0,0,0,606,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,855,0,0,0,0,0,0,0,0,0,0,0,
+0,0,2011,0,0,5,4,0,0,461,764,0,0,0,1449,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1445,0,0,
+0,1168,0,0,0,233,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,0,0,216,0,0,0,286,0,0,0,
+3,0,0,0,723,536,0,0,0,0,0,285,0,0,0,560,0,0,0,0,0,690,0,0,0,0,0,1246,0,0,63,0,
+33,0,0,0,0,0,520,1862,0,0,0,0,0,0,0,0,0,0,0,0,630,0,0,0,0,554,0,0,0,0,0,1001,0,
+0,0,0,0,446,0,0,0,0,0,0,0,1313,0,0,837,636,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,278,
+0,0,0,0,0,0,0,0,868,0,0,0,0,1010,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1231,0,304,0,506,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,93,1408,794,
+843,704,0,285,114,485,898,145,0,19,2035,0,0,0,1933,0,0,0,0,0,0,0,1728,0,0,0,0,0,
+0,0,0,746,0,0,0,0,0,0,0,995,1964,0,0,0,0,0,0,0,0,0,0,0,1550,0,874,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,1018,0,0,0,814,126,0,0,1264,0,0,814,955,0,0,0,0,0,0,
+0,981,0,0,0,0,0,0,0,0,915,56,0,0,100,0,0,0,0,0,0,0,0,0,638,0,0,0,0,738,0,0,0,0,
+0,0,0,0,0,758,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1112,0,0,214,0,0,0,133,0,196,
+168,0,0,0,0,0,1152,0,1245,0,0,538,169,871,1816,0,0,413,133,0,0,0,978,0,0,43,93,
+371,0,0,0,0,0,0,526,25,0,754,335,0,0,0,0,182,0,0,0,0,0,0,0,0,0,0,0,39,601,0,0,0,
+0,0,0,0,181,370,0,0,1652,358,0,0,0,0,0,0,0,0,0,176,286,0,788,0,0,0,0,0,1223,780,
+254,1003,896,0,0,0,1447,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,744,0,0,0,0,0,126,0,
+41,788,0,0,0,629,0,0,0,0,0,0,0,0,0,0,0,293,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,420,37,1900,0,0,0,0,542,1570,957,0,0,0,0,0,0,
+0,373,31,0,0,0,0,125,325,0,0,0,0,0,0,323,0,0,1547,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1216,0,0,0,0,0,0,198,1905,629,15,0,0,0,0,0,0,20,75,543,1353,0,0,0,533,0,0,6,0,0,
+0,0,0,0,538,0,0,0,0,0,0,0,0,0,0,0,338,0,0,0,0,11,0,0,0,284,659,0,989,0,0,0,0,0,
+0,0,0,0,848,0,0,507,0,0,0,0,0,0,0,0,188,991,884,0,0,0,0,60,959,0,0,0,0,0,1653,0,
+0,922,337,0,638,0,0,500,0,0,0,0,0,0,0,0,0,0,0,166,0,0,0,0,0,0,0,0,0,0,0,0,418,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,760,0,0,0,0,0,0,1277,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,770,0,0,0,0,0,0,0,243,89,0,0,0,0,0,0,0,0,0,1396,0,
+560,0,0,3,1658,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,586,0,0,1271,0,0,0,505,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,637,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1947,
+41,445,0,0,0,0,0,0,0,0,57,189,0,0,371,0,0,0,0,552,0,883,0,923,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,875,0,0,0,1788,49,0,0,0,0,0,
+0,0,0,0,0,0,661,0,0,1945,0,0,0,0,0,794,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,1135,0,0,0,745,0,0,0,0,0,0,0,84,0,0,0,0,0,0,0,410,0,976,0,0,0,0,0,703,0,0,
+0,0,0,0,187,322,0,0,0,227,0,0,0,0,560,0,31,1395,0,0,0,0,0,466,0,0,0,0,643,167,0,
+0,0,1428,0,412,0,0,0,0,0,0,0,0,0,1118,562,0,0,0,0,0,256,0,0,0,0,0,0,1771,0,0,0,
+0,0,1190,132,0,66,0,0,0,0,0,0,0,0,0,0,317,0,0,0,63,0,0,0,0,0,0,0,1475,0,0,0,0,0,
+0,0,288,0,0,0,0,608,0,0,0,0,0,0,0,0,1225,0,1189,0,0,0,0,0,0,0,1468,0,0,0,0,0,
+689,120,0,0,0,0,0,0,0,1,0,329,0,0,0,0,226,0,0,0,0,0,1855,0,0,461,0,0,0,0,1346,0,
+0,0,0,0,85,0,0,299,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1171,0,0,
+0,980,0,0,0,0,0,0,0,0,637,279,0,0,0,0,0,293,0,0,0,0,528,17,0,0,0,0,5,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0,0,0,0,0,601,0,0,0,0,0,0,779,0,
+196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1322,737,752,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,412,192,80,0,0,8,1470,0,0,0,0,0,0,0,0,0,873,0,0,0,0,0,835,0,0,0,0,256,
+38,986,0,0,0,0,0,0,0,0,0,91,257,278,911,0,0,0,0,0,0,0,0,749,151,0,0,0,0,0,0,0,0,
+0,0,0,0,989,0,0,990,0,0,90,194,0,0,0,0,0,425,0,0,0,0,0,774,0,0,0,0,0,0,0,0,0,0,
+646,827,752,0,0,0,662,0,22,21,0,0,0,0,0,0,95,239,0,0,0,431,0,0,0,0,0,874,0,0,
+265,65,0,0,0,1350,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1887,0,0,0,0,0,0,0,809,
+0,696,0,1074,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,630,0,0,802,0,0,0,56,776,0,
+970,0,0,797,0,0,0,0,0,400,0,0,1951,0,0,41,0,11,118,0,0,0,0,0,0,0,0,251,615,0,0,
+0,1044,0,0,0,0,0,0,0,0,0,0,0,225,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,370,0,0,0,0,
+104,48,209,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,930,0,0,0,0,
+0,0,0,0,0,0,0,1286,0,759,0,120,385,0,0,0,429,0,0,0,0,0,0,0,0,820,0,0,0,0,0,0,
+199,0,10,151,0,0,0,761,365,0,0,0,0,0,0,0,0,0,46,1086,0,0,0,0,11,1624,58,344,0,0,
+1008,1868,0,0,0,888,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,711,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,440,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,914,1913,0,958,0,885,0,0,0,0,0,0,0,0,0,0,0,
+0,0,847,276,0,302,65,0,0,0,510,0,1514,0,0,0,0,0,0,152,291,0,0,0,0,0,0,0,0,0,0,0,
+0,282,589,0,0,0,0,0,0,0,0,0,0,0,0,0,130,0,0,463,42,0,0,0,0,0,372,0,0,0,0,0,0,0,
+0,0,680,0,0,0,0,0,0,0,0,977,1997,0,0,0,810,0,0,0,0,0,0,0,0,0,1390,0,0,0,644,0,0,
+867,982,0,0,0,0,0,0,0,540,0,123,0,0,0,1978,0,0,0,0,789,623,0,1723,0,1220,0,0,0,
+0,0,0,0,480,0,0,0,0,0,0,0,0,0,0,0,888,0,0,0,0,0,0,0,0,0,0,0,0,299,1995,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,788,179,0,0,0,0,0,0,431,156,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1373,39,80,196,0,0,507,0,0,0,646,0,0,0,0,
+0,1214,0,0,0,0,926,0,0,0,1,114,0,0,0,0,0,446,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,490,0,0,0,491,0,1584,0,0,507,250,0,0,0,158,
+10,362,1,0,0,0,0,0,0,0,0,0,408,228,860,480,0,779,0,0,0,557,0,0,142,197,0,0,0,0,
+0,0,0,0,0,0,0,1490,11,378,316,1057,0,0,18,579,299,1546,0,177,0,0,0,0,0,0,0,0,0,
+411,0,0,0,0,727,439,0,0,0,0,0,1528,0,0,0,0,0,0,58,0,482,0,0,0,505,1952,0,0,0,0,
+0,0,0,0,0,0,0,242,0,0,0,0,0,0,0,953,0,0,0,0,802,0,0,0,0,0,0,0,0,0,0,290,0,0,791,
+52,0,0,0,0,0,0,0,0,0,0,0,112,0,0,0,0,0,1028,0,0,138,0,0,0,0,1811,0,0,0,0,0,0,
+934,1821,0,0,0,0,371,38,0,0,0,1296,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,723,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1330,0,0,0,0,0,0,0,1255,296,109,0,0,0,0,0,660,0,0,0,0,270,591,0,
+0,0,0,0,0,0,1090,81,0,0,0,0,391,0,0,0,0,249,322,0,0,0,0,0,0,0,1412,0,0,0,0,0,0,
+0,0,0,0,526,632,0,0,0,0,0,0,235,144,0,0,0,0,0,940,0,0,0,52,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,309,196,0,0,0,0,0,1912,0,1290,0,686,0,0,625,0,0,0,0,0,0,0,0,0,0,0,412,0,
+0,0,0,43,0,0,0,0,11,967,758,0,0,0,0,0,0,0,0,0,0,0,0,0,0,220,0,0,0,0,0,0,0,0,0,0,
+873,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,890,0,0,2,0,0,0,0,0,0,0,0,1774,
+393,263,0,0,0,0,0,0,818,456,0,0,251,178,393,97,0,0,0,0,0,674,168,0,0,0,0,0,0,0,
+159,1639,0,0,0,0,0,0,0,0,59,934,0,191,0,0,0,0,346,165,0,877,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,128,0,0,0,0,0,0,1297,0,0,0,0,0,0,164,0,0,0,15,132,241,1073,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,228,324,53,0,0,910,0,0,0,0,0,0,0,0,734,705,
+217,73,0,0,0,0,0,0,0,0,636,389,0,1409,0,0,0,0,0,893,0,0,0,0,21,0,0,0,0,0,0,0,0,
+0,0,0,0,0,721,0,0,0,959,0,0,0,0,1433,0,0,0,0,0,0,0,0,0,0,0,0,174,189,0,0,0,0,0,
+0,0,0,0,0,22,2,0,0,815,354,0,0,0,0,425,0,411,60,13,1611,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1478,596,0,0,398,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,192,1159,0,0,0,0,0,
+592,223,0,0,0,0,0,0,0,245,64,0,0,0,0,278,0,604,0,0,1502,265,0,0,0,0,0,0,0,310,
+1763,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,129,0,0,0,0,0,0,0,0,0,1356,0,0,0,0,0,0,0,
+0,505,0,0,0,0,0,0,0,1000,0,0,966,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,839,0,0,0,0,0,0,
+0,0,0,0,0,0,0,637,0,0,0,0,0,0,0,0,0,0,0,0,0,0,590,0,0,0,0,280,0,0,0,1386,0,0,0,
+281,0,1064,0,0,0,0,0,917,0,0,15,555,0,0,1014,1883,0,0,0,965,0,0,117,33,0,0,0,
+801,0,0,0,0,0,877,0,824,0,0,0,0,0,0,0,0,0,0,0,365,0,0,0,0,0,0,774,7,0,430,0,0,
+231,360,0,0,0,0,0,0,0,0,822,740,0,0,929,1485,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,852,0,0,0,0,17,0,0,0,0,0,0,1001,0,0,0,0,35,831,0,0,384,457,0,0,0,1351,0,27,
+0,0,984,0,264,552,0,401,0,0,0,710,0,1211,0,0,11,205,0,0,0,0,0,0,0,0,0,0,0,0,5,
+579,0,717,0,0,1011,0,0,0,0,0,0,0,0,0,0,0,0,0,0,805,0,0,0,0,0,0,0,0,0,0,0,489,0,
+0,0,1024,0,0,0,0,0,0,0,0,0,892,0,0,0,0,0,0,0,0,0,0,0,0,473,0,0,0,659,864,0,0,0,
+0,0,0,152,819,0,51,0,0,0,0,0,0,0,0,0,0,130,0,0,0,0,0,229,0,0,0,0,674,0,0,0,0,0,
+0,0,0,0,770,52,79,0,0,0,1666,0,409,0,0,0,0,0,0,0,195,0,688,0,0,0,0,0,0,0,0,0,0,
+0,889,174,160,0,0,0,0,0,0,0,0,0,0,0,0,0,872,0,918,569,268,0,0,0,1224,0,1361,0,0,
+0,0,0,0,0,0,0,374,0,0,0,0,0,731,0,0,0,0,190,0,0,0,0,0,0,0,202,506,444,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,835,0,17,1526,0,0,0,0,0,477,0,0,
+994,1374,76,0,0,0,0,0,0,0,355,287,0,1389,0,0,0,0,0,0,455,384,0,0,0,264,0,0,0,0,
+0,0,0,0,0,0,0,0,1001,0,0,0,0,0,0,0,0,0,0,0,0,28,0,0,0,851,175,359,0,0,0,0,0,0,0,
+0,287,740,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,857,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+819,1402,0,0,0,0,0,0,174,224,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1649,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,655,573,0,0,0,0,0,0,0,0,128,351,0,0,0,0,0,0,
+0,0,0,0,0,918,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,687,0,0,0,0,0,0,0,0,0,1525,
+0,0,0,1009,0,0,0,0,0,0,0,340,0,0,0,0,0,0,0,0,0,0,861,0,176,0,0,0,0,0,0,0,0,0,96,
+985,0,615,0,0,0,0,0,0,0,1919,0,0,0,0,0,1131,0,0,0,0,0,0,0,247,0,0,0,0,27,23,0,0,
+0,0,0,0,0,0,38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1015,0,0,0,0,0,1088,0,0,
+0,0,0,1585,0,0,0,0,227,0,0,0,478,360,0,0,0,95,0,0,0,0,0,0,699,0,0,0,26,0,0,0,0,
+1119,0,0,0,739,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,741,67,0,0,0,0,0,0,464,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,42,0,96,0,0,0,26,342,0,0,0,0,0,0,203,0,0,449,0,
+0,0,0,0,0,0,0,0,0,256,311,0,0,0,0,0,0,758,0,0,0,0,0,0,0,0,827,0,0,0,0,581,64,0,
+1047,0,0,0,0,0,288,0,0,0,0,0,1375,0,0,0,0,0,0,0,0,0,0,0,1309,0,0,0,0,0,0,0,0,
+376,12,0,0,0,0,0,154,0,1520,0,1753,95,502,0,0,0,0,0,0,0,269,291,1197,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,1341,0,1017,0,0,0,0,0,0,0,
+0,857,1810,533,0,0,1453,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,836,211,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,19,0,156,0,0,0,0,1009,0,0,0,0,0,0,0,0,0,0,0,0,0,820,0,0,
+0,0,0,0,0,0,0,228,0,0,0,1131,0,1276,0,0,0,0,0,0,0,0,0,0,0,0,849,1792,0,0,389,
+291,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,525,0,0,
+0,453,0,0,0,0,666,0,0,0,422,0,355,0,0,0,0,165,0,260,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,865,0,0,0,0,0,0,0,1625,0,0,0,234,0,1383,0,0,0,0,0,0,0,0,306,0,0,0,802,1921,
+0,0,0,0,0,0,180,0,0,0,0,1312,814,0,0,0,0,0,0,0,0,0,0,707,0,0,0,1493,11,61,733,0,
+0,0,341,0,0,0,98,0,0,0,0,0,0,0,0,0,0,0,1014,0,0,0,0,0,0,0,142,102,0,0,30,0,0,
+823,0,1045,0,0,0,1930,0,1512,0,0,0,0,0,0,0,87,0,1243,245,0,0,0,0,0,0,0,48,68,0,
+0,0,0,0,0,0,0,126,77,625,938,0,0,351,0,0,0,174,1668,0,707,0,0,0,0,0,0,0,0,0,0,0,
+403,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,282,0,0,0,0,0,0,8,44,0,0,363,115,0,0,0,0,0,0,
+0,0,0,0,0,0,545,761,0,0,835,1254,0,0,0,0,930,1936,0,0,0,0,0,0,0,0,653,0,0,0,0,0,
+344,0,0,1483,673,185,0,0,460,93,753,478,0,0,0,0,0,1020,0,0,0,0,0,0,0,103,0,0,0,
+499,0,0,0,0,0,0,207,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,0,968,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,3,0,0,0,0,399,0,0,0,0,224,563,0,0,0,0,0,704,0,0,0,0,0,0,0,0,0,0,0,
+1559,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,861,0,0,0,0,946,333,746,0,0,0,0,0,
+0,0,910,0,0,0,0,0,0,0,0,0,0,0,0,0,652,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1393,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1514,0,0,0,0,201,0,510,717,0,0,528,0,0,0,0,
+20,0,0,0,1251,0,0,0,1163,0,0,0,307,0,0,0,0,0,1091,0,0,0,0,0,0,0,0,0,0,0,429,0,0,
+0,881,0,0,0,0,0,621,0,0,0,0,0,0,0,736,0,348,0,868,0,0,0,0,433,0,0,0,771,1495,0,
+0,0,0,215,0,0,0,0,0,124,0,0,0,0,0,0,0,0,0,0,0,55,0,0,0,0,0,0,0,112,62,0,856,270,
+0,572,0,0,0,0,939,0,0,0,0,0,0,0,352,0,0,0,0,0,0,0,0,0,647,0,0,0,0,10,0,0,0,0,0,
+0,0,220,0,0,0,0,0,0,0,0,0,0,0,0,0,464,0,0,109,0,0,0,1746,0,0,0,515,0,0,0,566,0,
+0,0,0,0,0,67,40,0,0,722,992,0,0,923,0,0,0,0,0,0,1145,0,0,0,0,0,0,0,0,0,0,0,568,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,247,0,0,0,0,645,0,0,328,0,0,0,0,0,0,0,0,0,0,0,0,
+1363,0,0,0,0,0,1280,0,0,0,0,0,0,0,0,0,0,7,28,360,162,0,0,0,0,0,0,0,0,0,0,0,764,
+0,0,833,862,0,856,0,0,0,0,0,0,736,92,0,0,948,1944,0,1479,63,590,0,0,0,1521,0,0,
+0,709,0,0,61,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,483,0,0,0,0,1213,
+0,0,0,0,29,1022,0,1712,0,466,0,0,0,0,0,0,0,0,0,0,0,0,0,731,0,0,0,0,0,0,171,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,241,0,0,0,0,0,0,0,0,0,0,0,964,2005,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1100,0,0,0,954,0,0,0,0,0,0,0,0,0,1958,0,0,34,549,994,0,0,449,
+137,850,0,0,670,146,0,0,0,0,518,159,0,0,0,0,0,0,0,0,151,0,0,1027,0,0,0,0,0,0,0,
+0,0,0,983,0,0,0,0,993,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,141,501,0,0,0,
+0,0,0,0,0,0,452,0,0,0,0,0,0,0,0,0,0,233,149,0,0,0,0,0,0,0,0,582,0,0,0,801,0,0,0,
+0,0,0,70,0,0,369,0,36,0,0,0,0,0,0,0,204,721,430,241,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1817,16,1078,1021,0,0,
+406,0,0,0,0,0,69,0,0,0,0,0,1830,0,0,0,824,0,0,0,0,0,0,0,0,0,826,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,816,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,717,1845,0,423,0,0,
+0,0,0,0,0,0,510,0,0,1048,0,0,0,618,0,0,0,520,0,0,0,0,990,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,321,0,0,0,0,0,0,0,1135,0,0,921,0,0,0,24,397,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,856,0,0,0,139,282,981,0,288,0,0,0,1890,651,56,0,0,0,0,0,0,0,
+0,261,0,0,0,0,0,0,0,0,0,0,0,617,1403,0,1205,0,0,563,0,0,0,0,0,0,0,0,333,0,0,0,0,
+0,369,0,0,0,0,0,0,0,0,0,622,0,0,0,1407,0,0,0,0,0,0,0,0,0,0,0,0,624,160,0,363,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,619,0,174,292,0,0,656,616,0,0,0,685,0,0,0,0,0,0,0,0,0,0,0,0,0,647,0,0,0,631,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1267,0,0,0,1797,0,0,0,1684,0,0,469,0,531,
+1230,73,0,0,0,0,0,0,0,0,0,268,0,0,0,0,0,102,558,109,65,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,595,0,0,0,0,0,374,1832,0,0,0,0,0,0,16,0,405,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,881,0,1495,0,0,0,0,0,0,0,0,0,142,0,0,0,0,0,0,0,0,0,0,21,466,23,
+257,0,0,0,0,0,0,77,404,0,0,0,0,0,0,712,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,860,
+1848,0,0,652,629,0,0,0,0,13,377,0,1842,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1501,0,
+0,0,1906,0,0,0,0,0,0,0,0,0,0,0,0,0,491,234,171,0,0,0,0,631,1186,0,0,0,0,0,0,0,0,
+0,0,0,0,931,0,170,0,0,0,0,0,0,0,0,0,0,1587,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+765,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,424,0,0,714,0,0,0,0,685,0,0,0,0,0,
+0,285,0,0,0,0,0,0,429,0,0,0,0,0,0,0,0,0,0,71,18,0,0,0,0,0,0,0,0,0,0,116,828,0,0,
+0,0,0,0,289,0,0,0,0,0,0,0,0,675,0,0,0,1424,0,0,0,0,0,647,0,0,0,1334,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,36,209,0,0,0,0,0,0,0,342,0,0,0,928,0,0,0,0,0,1838,118,856,654,
+318,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,915,895,454,0,0,513,1425,0,0,
+0,0,0,0,791,0,153,0,0,0,0,0,0,796,909,445,345,0,0,0,0,0,0,0,0,578,0,0,0,1387,0,
+0,0,555,0,0,0,0,0,0,766,0,0,0,0,0,0,0,0,0,0,541,0,0,0,0,0,0,0,0,0,0,0,0,0,880,0,
+0,0,0,0,1506,0,0,983,0,768,0,0,0,0,584,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,737,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,226,30,426,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,462,0,0,0,385,0,398,0,0,0,0,0,0,
+0,0,0,347,0,0,0,0,125,1259,644,136,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,469,0,0,0,0,0,
+1367,0,0,0,0,0,0,0,0,0,0,0,719,0,0,0,0,0,0,0,0,0,0,0,0,0,1423,0,0,0,0,0,0,0,0,0,
+749,0,0,0,0,546,645,0,0,0,0,0,0,277,0,0,1275,0,0,0,0,0,0,0,453,536,555,0,0,987,
+1107,0,0,90,0,0,0,0,0,0,0,0,860,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+257,0,1768,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1071,0,0,0,0,0,0,0,0,0,0,0,0,0,83,
+0,835,0,0,0,0,0,0,0,2006,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,696,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,95,1718,0,0,0,0,0,0,0,26,0,550,0,0,0,0,0,901,0,0,0,0,0,
+0,822,0,0,122,0,0,0,807,0,0,0,0,0,262,0,620,601,34,0,0,170,0,0,0,0,537,0,0,0,0,
+0,0,0,0,0,332,0,0,208,1909,182,261,0,0,0,1721,0,0,0,0,0,933,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,1609,0,895,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,812,0,0,942,1916,0,0,0,0,
+0,0,0,778,0,0,0,137,0,1314,0,0,0,0,0,0,0,1661,0,0,0,0,0,0,0,1591,0,0,0,0,0,0,
+820,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,185,89,0,1160,230,6,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,63,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1740,0,0,177,
+170,0,1961,0,0,0,0,0,0,0,0,0,0,0,0,91,0,17,44,0,0,0,0,0,0,0,0,0,270,0,296,0,0,0,
+0,0,0,0,1523,0,0,0,0,0,0,0,0,0,0,757,7,0,0,0,0,0,0,0,0,0,0,530,588,0,0,0,0,0,0,
+0,0,0,786,0,0,0,0,0,580,627,88,447,57,0,0,0,0,0,0,0,0,845,735,0,0,0,0,0,31,15,0,
+460,521,12,424,0,0,0,1302,0,0,0,0,0,0,0,595,0,0,0,13,548,97,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,1472,452,1767,0,0,0,0,0,0,0,0,0,0,115,0,0,0,0,0,0,1543,0,1111,0,0,0,0,
+1,0,359,488,0,267,0,0,0,1983,0,0,0,0,0,0,0,1155,0,1575,0,1438,31,0,0,377,101,0,
+0,0,0,0,0,0,0,0,0,0,0,0,476,0,0,0,0,0,0,0,0,2023,0,0,0,0,0,1836,0,0,0,0,35,843,
+0,0,0,0,0,0,0,554,0,0,0,536,625,207,0,1371,0,0,0,424,785,336,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,896,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27,750,0,0,0,0,238,0,0,
+0,0,0,383,0,0,0,0,0,0,0,0,603,725,11,0,0,0,0,0,0,0,0,0,476,0,0,0,0,0,1552,0,0,0,
+0,0,0,0,680,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,435,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1431,0,0,13,112,0,0,356,0,0,0,0,0,0,0,0,0,0,1963,0,0,0,1244,18,0,0,0,0,0,0,867,
+0,0,0,0,0,0,50,708,73,592,0,502,0,0,0,0,0,0,161,347,0,0,0,0,470,33,0,246,571,10,
+0,465,614,0,237,0,0,0,0,0,24,18,0,506,0,0,0,0,0,0,33,309,0,0,0,0,0,0,0,0,0,0,
+140,0,0,0,0,1056,0,0,0,1704,0,0,0,0,0,0,0,1036,0,0,0,0,0,0,0,0,0,1315,432,86,
+264,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,107,0,0,0,0,0,123,927,0,0,957,1149,0,0,
+0,0,0,778,0,502,196,0,0,0,0,1312,0,0,0,0,0,0,0,855,0,0,0,0,0,0,0,0,0,0,45,1400,
+0,0,0,1003,0,0,0,0,0,1097,0,0,0,0,0,0,0,0,545,612,0,0,0,0,0,0,0,0,0,0,0,0,54,0,
+0,0,0,172,0,0,0,1029,0,0,0,0,0,0,0,0,0,568,0,0,0,732,617,0,0,974,94,989,733,0,0,
+0,0,0,0,1789,0,0,665,2015,0,0,0,0,0,0,806,287,0,0,0,0,0,1539,0,0,0,0,0,0,0,0,0,
+0,182,1563,0,0,0,0,0,0,0,0,0,484,0,0,0,0,0,1623,0,0,0,0,0,0,0,0,878,1833,0,1569,
+0,0,0,0,0,0,0,0,93,0,715,994,0,0,0,0,0,63,0,591,0,0,0,0,0,0,0,749,0,0,0,0,547,
+366,0,0,0,1747,0,0,0,0,0,0,0,89,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1463,0,772,
+893,0,0,0,48,0,0,941,0,0,690,1785,106,440,0,0,0,0,0,0,0,0,0,0,32,0,332,216,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,852,0,
+0,416,564,0,918,0,1764,0,0,3,0,0,274,0,0,0,0,501,0,0,0,0,0,0,0,851,743,0,49,0,
+879,0,0,47,0,0,0,0,0,0,865,0,1202,0,0,0,0,0,0,47,272,0,0,0,0,0,0,0,0,0,0,0,1455,
+0,0,0,0,891,1911,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,761,0,0,0,0,0,0,0,0,0,407,0,
+183,0,0,490,0,0,0,0,0,0,0,35,731,0,0,0,0,0,0,0,819,0,0,0,0,0,0,0,0,0,0,0,0,0,
+575,0,0,0,0,45,818,0,0,77,222,0,0,0,0,849,1880,0,0,0,633,0,1308,0,0,0,0,0,0,0,0,
+0,0,86,0,0,0,0,0,0,0,0,0,0,0,0,0,0,817,0,0,0,0,0,0,0,0,0,882,0,0,0,914,0,0,0,0,
+0,0,0,0,0,0,865,0,0,426,399,58,0,0,0,0,0,0,538,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,876,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,139,566,0,63,12,0,0,0,
+0,0,0,0,0,0,0,0,0,0,3,114,0,0,0,0,0,0,0,0,576,0,0,0,0,0,0,0,0,933,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,692,0,0,0,0,0,0,0,0,0,0,0,0,752,0,0,0,0,
+0,0,0,0,375,0,1011,0,0,96,0,0,0,0,0,0,0,0,0,148,0,0,0,0,0,0,0,0,0,0,0,337,56,
+666,0,246,394,0,0,0,0,0,0,0,0,437,0,0,0,506,0,0,0,0,1003,0,1163,0,328,0,0,0,0,0,
+0,0,0,1000,0,0,0,0,0,744,101,0,0,0,0,0,726,0,0,176,0,146,9,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,839,0,0,0,0,0,0,223,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,246,1931,29,0,0,1771,0,0,0,0,0,846,6,157,0,0,0,0,0,0,0,0,0,875,0,0,477,
+773,177,639,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1747,0,0,0,0,158,873,0,659,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,391,0,0,0,0,0,0,0,0,0,0,0,0,668,883,0,78,628,0,0,0,
+0,0,0,0,0,0,0,0,0,1460,0,962,0,0,0,0,0,460,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34,199,0,
+0,0,388,474,0,271,0,333,608,0,0,0,0,0,0,49,0,988,0,707,617,0,0,0,0,0,0,0,756,0,
+0,0,0,0,1583,0,0,0,0,0,0,0,0,0,0,285,0,0,0,0,0,0,0,0,0,0,0,0,0,0,344,0,0,0,0,0,
+0,0,0,515,1709,0,0,0,0,0,0,0,0,404,0,0,0,0,500,0,0,0,0,0,0,0,0,0,68,216,0,0,0,0,
+0,0,0,488,353,0,0,177,236,0,0,458,490,0,0,0,0,0,0,756,1504,0,757,0,1735,0,0,108,
+598,0,0,0,0};
+BROTLI_INTERNAL const uint8_t kStaticDictionaryHashLengths[32768] = {
+8,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,0,0,12,0,0,0,0,4,22,5,0,
+4,0,0,0,0,0,0,0,0,0,0,0,0,14,6,0,0,0,5,0,0,0,0,0,0,0,7,13,0,0,4,0,0,0,0,0,0,0,0,
+0,6,0,0,0,0,8,0,0,0,0,0,0,7,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,4,0,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,10,4,0,5,13,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,8,7,0,0,9,0,8,0,0,0,0,0,0,6,0,
+0,9,0,0,0,11,0,0,6,8,7,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,7,0,0,0,6,8,0,0,0,0,0,
+0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,9,0,0,0,8,4,13,7,0,0,0,0,0,
+7,0,5,0,0,0,0,8,5,0,5,0,0,8,7,0,0,0,0,0,0,0,0,0,0,9,0,0,0,8,0,0,0,10,4,0,5,0,4,
+0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,8,7,0,4,9,4,0,0,0,0,0,0,
+9,0,0,0,8,5,0,0,0,6,0,0,0,0,0,0,0,0,0,7,18,0,0,0,0,4,9,0,0,4,0,6,0,0,0,6,0,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,5,8,7,0,0,0,
+0,9,0,0,0,0,0,0,0,8,6,10,6,0,0,0,4,0,6,8,6,0,0,0,4,0,0,0,0,0,5,0,0,0,6,0,0,0,0,
+10,0,12,7,0,0,0,0,0,4,0,0,0,0,0,5,0,0,8,7,0,0,0,0,0,0,0,0,9,5,0,0,0,0,0,0,0,0,0,
+0,0,0,0,6,11,0,0,0,0,0,0,0,0,0,8,7,0,0,10,0,0,0,0,0,0,0,0,6,10,0,17,0,8,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,8,6,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+7,0,0,11,4,0,5,0,0,0,0,0,0,0,0,0,0,10,5,0,6,8,5,0,0,0,0,0,0,0,0,0,0,11,5,0,0,0,
+0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,9,0,0,0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,8,7,0,0,0,0,0,
+0,0,0,0,0,0,5,0,0,0,6,0,0,10,0,0,0,20,0,0,0,0,0,0,0,0,6,9,5,0,0,0,0,10,4,8,0,0,
+4,13,0,0,0,0,0,0,0,9,0,9,0,0,0,0,0,0,0,0,0,0,0,0,4,8,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,12,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,12,5,0,0,10,4,10,7,13,
+0,0,0,0,0,0,0,0,6,0,6,0,6,0,0,0,0,0,0,19,0,0,4,12,6,9,0,0,0,0,4,0,4,11,0,0,0,0,
+0,0,0,12,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,4,0,0,0,0,0,0,0,0,0,6,0,0,0,0,
+0,5,0,0,0,0,0,6,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,8,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,9,6,0,0,0,0,0,4,0,4,0,0,0,0,0,0,0,0,0,4,0,0,0,
+6,0,0,0,0,0,0,0,0,0,0,13,6,0,0,0,0,0,0,0,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,0,6,0,0,
+0,0,0,5,0,0,0,0,14,4,0,0,0,4,12,5,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,8,6,0,
+0,0,0,0,0,12,0,9,6,0,0,0,0,13,0,0,5,0,0,0,0,0,4,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,13,0,9,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,5,0,0,0,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,8,7,8,4,0,0,0,0,0,0,0,0,0,0,0,7,0,7,0,0,0,4,0,
+0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,6,8,4,0,0,0,0,0,6,0,7,0,
+0,0,0,0,0,0,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,7,0,0,0,0,0,0,9,5,0,0,
+0,0,0,7,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,9,4,0,0,0,0,0,0,0,4,
+12,5,11,0,0,0,0,0,0,0,0,0,8,7,0,5,0,0,8,7,0,5,0,0,0,0,8,0,0,0,0,7,0,4,10,0,0,0,
+0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+13,5,0,0,0,4,0,0,0,0,0,6,0,0,0,0,0,0,14,5,0,0,0,7,0,0,10,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,6,0,4,0,5,0,0,0,0,8,5,0,0,0,0,0,0,9,5,9,0,0,0,0,0,0,0,0,6,9,0,
+0,4,0,0,0,7,0,0,0,6,0,0,10,4,0,0,0,0,0,6,0,0,10,0,0,0,8,5,0,0,0,0,0,0,0,0,10,0,
+0,0,0,0,18,4,12,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,8,7,0,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,8,4,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,
+0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,8,0,0,0,0,0,0,6,0,0,0,4,10,5,0,0,0,0,0,0,0,0,0,0,
+0,4,8,7,0,0,8,6,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,
+0,0,0,8,6,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,6,0,7,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,8,7,0,0,0,0,8,0,12,6,0,6,0,0,0,0,9,7,11,7,0,0,0,
+0,0,0,0,0,0,0,0,0,11,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,10,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,
+0,0,0,6,0,0,0,7,0,4,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,14,0,0,0,0,0,8,4,0,4,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,20,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,12,5,0,7,0,5,0,0,10,0,0,7,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,6,0,4,9,7,0,0,0,
+0,0,7,0,0,0,0,0,0,10,0,9,0,9,0,0,0,0,0,0,0,0,4,9,0,0,0,0,6,0,0,0,0,0,0,0,0,11,4,
+0,6,0,0,0,0,0,0,8,0,8,0,0,0,0,0,0,0,0,0,0,4,0,0,0,5,0,0,0,0,0,0,0,0,13,6,0,0,11,
+0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,0,0,0,0,6,18,0,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,
+0,5,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,6,0,0,0,0,9,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,11,
+4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,4,0,0,0,0,8,
+6,0,0,0,0,0,0,9,6,0,0,0,0,0,4,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0,
+0,6,0,6,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,6,0,6,0,0,10,6,0,0,0,7,0,0,8,0,8,7,0,
+0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,9,0,0,0,0,6,0,0,0,0,0,0,0,5,0,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,
+0,0,0,8,7,0,0,0,0,0,0,0,0,12,0,12,0,0,0,11,6,0,5,0,0,12,0,12,5,0,7,11,6,0,0,11,
+0,0,0,12,0,0,4,12,7,8,6,0,0,0,0,8,5,0,0,0,0,0,0,0,4,11,0,0,6,0,7,0,0,0,0,0,0,0,
+5,0,6,0,0,0,0,8,0,10,0,0,0,0,0,0,0,0,0,0,0,9,7,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,0,0,10,0,0,5,0,0,12,6,0,0,0,0,0,0,10,6,0,0,0,0,8,
+6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,5,0,0,0,0,11,0,10,6,0,0,8,6,0,0,0,6,0,7,10,6,0,
+0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,10,7,0,0,0,0,
+10,6,0,0,0,0,0,0,8,5,11,0,8,4,0,0,0,4,0,0,0,0,9,4,8,0,0,0,0,0,0,0,11,6,0,0,0,0,
+10,7,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,7,0,0,0,0,9,6,0,5,0,7,0,0,0,0,0,7,0,0,11,0,0,
+0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,0,13,0,8,6,13,0,0,0,11,7,0,7,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,6,0,0,9,6,0,6,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
+0,0,0,0,0,5,9,0,0,0,0,0,0,0,0,0,0,4,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,9,7,0,7,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,
+5,11,5,0,0,0,0,0,0,0,0,0,4,0,7,0,6,0,0,0,6,20,0,0,0,10,7,0,5,14,4,0,0,0,0,0,0,0,
+0,0,6,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,
+0,0,6,0,4,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,9,7,0,0,11,6,15,0,0,0,0,0,
+10,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,7,0,0,0,0,0,0,0,0,9,7,13,0,0,0,0,0,
+0,7,0,0,8,6,0,0,0,0,0,0,0,0,9,4,0,0,0,0,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,5,0,0,0,0,0,0,0,0,0,0,0,0,8,5,0,4,0,0,0,0,0,0,0,0,0,0,12,6,8,0,12,0,0,7,0,0,0,
+0,0,5,10,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,
+14,0,0,0,0,0,0,0,0,0,0,0,0,5,0,5,8,7,10,7,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,18,6,
+14,7,0,0,0,0,0,0,0,0,11,6,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,11,7,0,0,10,7,0,0,0,6,8,6,0,0,0,0,0,0,0,6,0,0,
+19,0,0,0,9,5,0,0,0,0,0,0,11,7,0,0,0,7,0,6,0,0,11,0,0,0,0,4,8,0,0,0,0,0,0,0,0,6,
+0,0,0,0,0,6,0,0,8,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,7,
+0,7,0,0,0,7,15,0,0,5,0,0,0,0,10,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,0,0,0,0,0,9,6,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+11,7,0,0,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0,5,0,4,0,0,0,4,0,4,0,0,0,0,0,0,0,0,0,6,0,0,0,0,11,6,0,0,8,5,14,0,0,4,0,0,0,7,
+17,0,0,0,0,0,0,0,13,5,0,0,0,0,0,5,0,0,0,5,0,0,0,0,16,6,0,4,0,0,0,0,0,0,12,0,0,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,0,12,5,0,5,0,6,10,0,12,0,0,0,0,0,0,0,0,7,0,0,0,0,8,4,
+0,0,0,0,0,0,0,0,0,0,8,7,0,0,8,0,0,0,8,0,0,6,0,7,0,0,0,5,0,6,0,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,22,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,18,0,0,0,9,4,0,0,8,0,9,7,0,0,0,0,0,0,8,6,0,0,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,9,7,0,0,0,6,0,0,14,0,0,0,0,
+0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,7,10,4,
+0,6,0,0,0,0,0,0,8,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,9,6,0,0,0,0,0,0,
+0,0,11,6,12,7,0,0,0,0,0,0,0,6,0,5,0,0,0,0,0,0,9,6,11,6,0,0,0,0,9,5,0,0,0,0,0,0,
+0,6,8,5,0,0,0,0,8,0,10,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,
+5,10,7,0,0,0,5,8,7,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,4,8,7,0,0,0,6,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,22,
+0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,5,0,0,0,0,0,0,0,
+0,0,0,0,0,17,0,0,6,0,6,12,4,19,6,0,0,0,0,16,0,0,0,0,7,15,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,4,10,4,0,0,8,7,0,7,0,0,9,
+4,0,6,0,0,0,4,0,5,0,0,0,7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,7,10,0,0,0,0,0,11,7,0,0,
+0,0,12,6,0,0,0,0,0,0,0,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,
+0,0,0,0,0,0,0,0,0,10,4,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,8,7,0,0,
+0,0,0,0,0,6,0,0,0,4,0,0,11,4,0,0,12,7,0,0,0,0,9,0,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,
+4,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,0,0,9,4,0,6,0,0,0,0,0,4,
+0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,6,0,0,0,5,0,0,0,0,0,0,0,0,0,7,9,6,0,7,0,
+0,0,0,0,0,0,6,0,0,0,0,8,6,0,0,0,0,10,6,11,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,5,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,5,0,4,8,0,0,0,0,0,9,7,0,0,0,0,0,0,
+13,5,0,0,0,0,8,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,8,5,0,0,11,7,0,0,0,0,0,0,8,6,0,
+0,0,0,0,7,0,4,0,0,0,0,0,0,0,5,0,6,0,5,0,0,0,0,0,0,0,0,0,0,0,0,10,4,9,0,0,0,0,0,
+0,4,0,0,0,0,10,5,10,7,0,0,0,0,0,0,0,0,16,7,0,0,0,0,0,7,0,0,0,0,11,0,0,0,0,0,0,0,
+0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,5,0,4,0,0,0,7,0,0,0,0,0,0,13,0,0,
+0,0,0,0,0,0,0,0,7,0,4,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,13,7,0,7,0,4,16,0,0,0,0,6,8,7,9,7,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,6,0,0,8,5,0,4,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,5,11,7,0,0,11,
+0,0,0,0,0,9,5,0,4,0,0,0,0,9,7,8,6,0,0,0,0,0,0,10,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
+0,7,0,0,0,0,0,0,0,0,0,0,0,4,10,6,0,7,0,0,0,0,0,0,0,5,0,0,0,0,0,0,10,7,10,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,7,0,6,8,7,12,4,0,0,0,0,0,0,0,5,14,
+0,0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,4,0,0,20,4,0,0,0,7,0,6,0,0,0,0,0,0,0,0,8,0,
+0,6,15,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,12,0,0,0,9,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,5,0,0,0,0,0,0,8,6,0,0,18,0,0,0,10,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,9,6,0,
+6,0,0,0,0,0,0,0,0,9,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,9,0,9,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,9,5,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,10,0,0,0,0,7,0,0,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,8,0,8,0,0,0,16,0,0,0,0,0,0,0,
+0,0,0,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,8,0,0,0,11,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,0,11,0,0,0,9,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,7,0,7,0,6,
+0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,0,0,0,0,6,0,0,18,0,8,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,7,0,4,0,0,0,
+0,0,0,0,0,0,0,8,0,0,0,0,0,16,0,0,0,0,0,16,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,18,0,0,0,0,0,0,0,0,0,9,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,4,0,
+0,0,0,0,0,0,0,9,4,0,0,0,0,12,5,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,12,5,0,0,0,0,0,0,0,5,0,0,10,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,9,0,0,0,11,0,0,6,0,6,0,0,
+0,7,0,0,0,0,0,0,8,0,0,0,0,6,0,0,0,0,0,0,19,0,0,0,12,0,9,0,0,0,0,0,10,7,0,0,0,0,
+0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,16,7,12,
+0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,12,6,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,10,5,0,0,0,0,0,0,0,4,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,7,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,4,0,0,0,0,0,0,0,4,0,0,9,0,0,0,8,0,12,4,0,0,0,0,
+0,4,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,5,0,
+0,0,0,0,0,13,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,0,0,8,6,0,6,0,0,0,0,0,0,
+0,4,0,0,0,0,0,6,0,0,9,0,0,0,0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,0,0,10,6,0,0,0,0,8,
+6,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,7,0,0,0,0,0,7,0,6,
+10,7,0,0,10,5,11,6,0,0,0,0,0,7,16,0,0,0,0,6,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,5,0,0,0,7,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,
+0,0,0,0,0,8,7,0,0,0,0,11,6,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+8,7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,12,7,0,7,0,0,0,
+0,0,0,0,6,0,0,0,0,9,0,0,0,23,0,0,0,0,0,10,5,0,0,0,0,0,0,0,0,0,4,0,0,11,7,10,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,8,7,0,7,0,0,8,7,8,0,0,0,0,0,0,0,0,0,0,0,14,5,0,0,0,0,
+0,0,0,0,18,6,8,7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,11,0,0,0,9,7,12,6,0,0,0,0,0,0,0,0,
+0,0,12,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,7,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,8,7,0,0,0,6,10,0,0,0,9,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,6,
+10,7,0,0,0,7,0,0,8,4,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0,
+0,0,0,8,7,8,6,0,0,11,7,10,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,4,8,7,0,0,0,0,0,0,0,0,
+0,5,0,0,13,0,0,0,0,5,0,0,9,7,0,0,0,0,0,0,0,4,0,0,11,0,0,7,0,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,0,12,7,19,0,8,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,10,6,8,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,7,0,0,12,0,0,0,0,6,9,6,
+14,0,0,0,0,0,0,6,0,5,0,0,8,7,0,0,0,6,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,4,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,5,0,
+7,0,0,10,0,9,7,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,12,6,0,0,0,0,0,5,0,6,0,0,0,0,
+0,0,0,0,0,0,0,6,0,0,0,0,9,7,0,0,0,0,0,0,11,6,0,0,0,0,0,0,0,0,0,0,11,7,0,0,13,7,
+0,0,0,0,0,0,0,0,12,0,0,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,6,11,5,0,5,13,0,8,0,
+0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,11,5,
+9,6,0,0,0,4,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,10,0,0,0,8,5,0,0,9,0,0,0,8,7,9,0,0,0,0,0,0,0,0,7,0,6,0,0,0,0,0,0,0,0,0,
+0,11,0,13,6,0,0,9,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,5,21,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,5,0,0,0,0,0,0,0,0,10,0,8,0,
+0,6,0,0,0,4,0,0,9,0,0,0,0,0,0,0,0,0,0,4,0,0,8,6,0,6,0,7,10,0,8,4,0,4,0,0,0,0,0,
+5,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,6,12,0,0,7,0,0,0,5,0,0,
+0,0,0,0,0,0,0,6,0,0,8,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+15,7,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,24,7,0,0,0,0,0,0,0,0,0,
+7,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,4,12,0,0,7,0,0,0,0,0,5,0,0,0,0,0,0,0,0,15,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,8,0,0,0,
+0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,9,0,9,6,
+0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,8,4,0,7,0,0,0,0,0,0,0,0,
+22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,4,0,7,0,0,21,7,0,7,9,6,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,8,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,23,0,0,0,0,7,0,0,0,
+4,0,0,0,0,0,0,0,0,9,4,11,7,0,5,0,0,0,0,11,0,0,4,20,0,0,0,0,0,0,0,0,0,0,0,11,5,0,
+7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+21,0,0,0,0,0,0,7,0,0,0,0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,11,6,0,0,0,0,0,0,0,0,9,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,5,0,4,9,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,
+0,0,0,10,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,8,7,0,0,11,7,0,0,0,0,0,0,0,4,
+0,4,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,8,7,0,
+0,0,0,0,0,0,0,0,6,0,0,21,6,0,0,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,14,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,8,0,0,7,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,
+0,0,0,8,7,0,0,11,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,0,0,7,13,7,10,4,0,
+0,0,6,0,0,0,0,0,0,0,0,0,5,10,0,0,0,0,0,0,5,0,0,0,7,0,0,0,0,0,0,8,4,0,0,0,0,0,6,
+0,0,0,0,0,0,0,0,0,0,12,7,0,6,0,0,10,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,6,0,
+0,0,0,0,7,0,0,8,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,10,5,0,6,0,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0,4,0,0,0,0,9,0,11,4,0,0,0,6,0,0,0,5,12,7,0,5,0,0,0,0,0,4,0,0,0,7,0,0,0,
+0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,13,6,10,0,0,0,17,0,0,4,0,0,0,0,0,6,0,4,0,5,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,11,7,0,0,0,7,0,0,0,6,0,0,0,0,0,0,
+0,6,0,4,0,0,0,0,8,0,0,0,0,5,0,0,0,0,0,4,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,9,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,12,0,0,
+0,0,7,0,0,0,0,0,0,0,0,0,0,0,7,0,0,16,4,0,0,11,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+8,7,0,4,0,0,0,0,0,0,0,4,0,0,0,0,0,0,8,6,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,
+7,0,0,0,0,0,0,9,0,0,0,0,0,0,0,12,5,10,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,
+5,18,7,0,0,14,0,0,0,0,0,0,0,9,4,0,7,0,0,0,0,0,0,0,5,0,0,0,6,0,0,0,6,0,0,0,0,0,0,
+8,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,0,0,7,0,0,0,0,0,0,11,0,0,0,
+10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,14,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+11,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,14,6,0,0,0,0,11,4,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,10,7,0,6,0,0,9,0,9,5,0,0,0,0,0,
+0,0,0,10,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,8,5,0,0,0,0,0,0,0,0,0,0,11,4,0,6,
+0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,12,4,0,6,8,6,0,0,0,0,0,0,0,0,0,0,8,0,0,5,0,0,0,0,0,0,0,7,0,0,13,0,0,0,0,0,0,0,
+0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,12,7,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,13,4,0,7,0,0,0,7,0,7,0,0,0,0,0,0,0,0,10,4,0,0,0,0,0,0,0,0,0,0,
+9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,10,6,21,5,0,0,0,0,8,0,0,0,0,4,0,
+7,0,0,0,0,0,0,11,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,4,0,0,0,0,0,0,
+0,7,9,6,11,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,7,10,0,0,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,19,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,18,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,7,0,0,0,0,0,0,9,4,10,4,0,7,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,9,7,9,7,10,4,0,7,0,0,0,0,0,0,0,6,12,0,
+0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,
+0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,8,0,
+0,0,0,0,0,5,0,0,8,7,0,0,0,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,
+0,0,0,0,4,0,0,8,0,0,6,0,0,0,7,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,7,9,7,0,0,0,4,8,0,0,0,0,6,11,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,10,0,0,0,0,0,0,0,13,4,0,0,
+12,6,0,6,0,0,0,0,8,7,0,7,0,0,0,0,0,6,0,0,0,0,0,0,12,6,0,4,0,0,0,0,0,0,0,0,0,0,9,
+7,22,0,0,0,0,4,0,0,0,0,0,6,0,0,0,4,0,0,9,0,0,6,0,0,24,7,0,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,10,6,0,5,0,0,0,0,0,0,0,7,0,0,8,0,0,0,0,0,0,0,10,5,0,0,0,0,0,0,0,0,0,7,0,
+7,0,0,0,0,0,0,13,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,
+0,0,0,0,0,7,12,0,9,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,8,0,0,0,0,0,0,0,0,4,0,0,0,7,0,
+0,0,0,8,7,0,0,0,0,0,0,0,0,0,4,18,0,0,0,0,0,10,0,0,5,0,0,11,0,0,0,0,0,0,5,0,6,0,
+0,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,
+4,0,0,0,0,0,0,10,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,
+0,0,0,5,8,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,20,7,0,0,0,0,0,0,0,0,0,0,0,4,9,0,12,
+6,8,0,14,7,0,5,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,10,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,9,6,0,7,12,0,0,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,
+0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,4,0,0,9,0,
+12,6,0,5,0,0,0,6,0,4,0,6,0,0,0,0,0,0,0,0,10,7,0,0,0,0,0,0,8,0,0,0,0,4,0,0,0,0,
+10,0,0,0,0,0,0,0,8,6,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,5,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,12,6,20,5,0,0,0,0,0,0,0,0,0,0,0,0,9,5,0,5,0,0,0,6,13,7,0,0,0,0,15,6,0,0,0,
+6,0,0,13,7,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,5,0,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
+10,6,0,0,0,0,0,6,0,0,0,0,9,0,0,0,0,0,19,6,0,0,0,0,0,0,0,0,0,0,13,0,11,0,0,0,0,0,
+0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,0,0,10,0,0,6,0,0,0,0,8,0,0,
+0,9,0,15,4,0,6,0,0,0,0,0,6,12,0,0,0,0,0,0,0,14,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,
+0,0,0,0,0,8,7,0,0,0,0,0,6,10,0,0,0,0,0,0,0,0,7,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,7,10,5,0,0,0,0,8,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,6,12,0,0,0,10,7,0,5,0,6,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,6,0,4,0,0,0,0,0,7,0,0,0,0,0,0,0,4,9,6,0,0,0,7,0,0,0,0,0,0,0,0,8,6,0,0,
+0,0,0,0,0,4,12,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,12,6,0,6,9,4,0,0,8,4,0,6,
+0,0,0,0,0,4,0,0,0,0,0,0,0,6,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,8,0,0,6,13,4,0,5,8,0,0,0,0,0,0,0,8,0,0,0,10,5,0,0,9,0,0,0,0,0,0,6,0,0,
+24,0,0,0,0,0,0,0,8,0,0,7,0,0,12,0,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,7,0,
+6,8,0,10,0,9,7,0,0,0,5,0,0,0,0,0,0,0,4,8,5,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0,0,4,0,0,0,0,0,6,0,0,0,0,0,5,0,0,0,0,8,0,0,
+0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,10,4,0,0,0,0,0,0,0,6,0,0,0,4,20,0,0,7,
+10,6,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,0,0,0,9,6,0,0,0,0,0,0,0,4,
+12,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,9,4,0,5,0,0,
+0,0,0,0,0,6,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,6,9,0,0,0,0,7,0,0,0,0,0,6,0,5,0,0,0,0,0,0,0,0,9,0,0,0,
+0,6,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,17,7,0,0,13,6,14,6,0,0,0,0,
+8,0,0,0,0,0,0,7,12,7,8,7,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,4,0,0,0,0,0,4,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,12,4,0,0,10,7,0,0,0,
+0,0,0,10,0,0,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,12,0,0,6,
+0,0,0,0,0,0,0,0,8,7,12,0,0,0,0,0,0,6,0,6,0,4,0,0,18,6,0,0,0,6,0,0,0,0,0,6,10,6,
+0,0,0,0,0,0,8,7,14,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,
+0,0,0,8,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,8,7,0,0,10,5,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,0,9,4,8,0,0,0,0,0,0,4,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,4,0,0,0,0,
+0,6,0,0,9,7,0,0,0,0,0,5,0,0,0,0,8,7,0,0,14,0,0,0,0,6,0,0,0,0,0,0,9,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,5,0,7,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,
+0,0,0,6,0,0,0,6,0,4,0,0,0,0,0,4,0,0,0,0,12,0,0,7,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,
+0,12,0,16,6,0,0,0,0,0,0,11,7,0,4,8,7,0,0,0,0,0,6,0,0,0,0,16,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,10,7,0,0,0,0,0,0,12,7,0,0,0,0,0,0,0,0,0,0,
+0,0,10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,13,4,0,0,10,0,0,0,0,0,0,0,0,0,19,0,0,0,
+0,0,0,0,0,0,0,0,0,0,8,6,22,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,
+5,0,0,0,0,0,5,0,0,0,0,0,5,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,7,0,0,18,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,14,7,0,0,11,5,0,0,0,5,0,0,0,0,12,5,0,0,0,0,0,0,0,0,0,0,24,6,0,0,
+0,7,0,4,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,7,0,4,0,0,0,0,8,7,0,0,
+9,6,0,0,14,5,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,12,6,0,0,0,0,0,0,0,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,7,0,0,0,5,0,0,
+0,0,12,7,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,6,0,0,13,7,0,0,0,0,0,0,14,0,11,4,0,
+0,0,4,0,0,0,0,14,5,0,0,0,0,0,5,11,5,0,0,0,0,22,5,0,0,0,0,0,7,0,0,0,0,0,4,0,0,0,
+4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,17,0,10,0,0,0,8,0,0,0,19,
+5,18,7,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,10,6,0,6,0,0,0,0,10,4,0,4,0,
+0,0,0,0,0,14,7,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,8,0,9,6,12,0,0,6,0,0,0,0,0,0,0,0,
+12,0,10,6,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,13,0,9,7,0,0,0,0,0,0,0,0,0,0,0,7,9,7,0,0,8,0,0,0,0,0,
+22,0,0,0,0,0,0,0,23,6,14,0,0,0,0,0,0,7,0,0,0,0,11,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,
+0,0,10,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,8,5,0,0,0,0,0,0,0,0,0,7,11,6,21,0,0,0,0,0,
+0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,
+0,0,0,0,0,0,0,4,9,7,0,0,0,0,0,0,12,0,0,0,0,7,0,0,0,0,0,0,0,0,10,4,0,0,0,0,0,0,9,
+0,0,0,20,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,11,7,0,0,0,0,0,0,0,6,15,0,0,
+0,0,0,0,0,0,0,0,0,0,0,12,4,0,5,0,0,0,0,0,0,11,7,17,6,0,0,0,0,0,0,15,6,0,7,0,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,6,0,5,
+0,0,11,0,11,7,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
+17,0,0,0,0,6,0,0,0,5,0,0,0,0,0,0,8,7,9,6,0,0,14,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,
+8,7,0,4,0,0,0,0,0,0,0,6,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
+0,0,0,5,0,4,0,0,8,7,0,6,12,5,0,7,18,7,0,0,8,5,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,
+10,0,11,0,0,0,0,0,0,0,0,0,0,0,9,0,0,4,0,6,0,7,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,
+7,0,0,0,0,8,0,0,0,15,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,0,0,0,
+0,0,6,0,0,0,0,23,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,5,0,0,0,0,0,0,8,6,0,0,
+0,0,0,0,12,7,9,7,0,0,10,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,9,0,8,7,0,0,0,
+6,0,6,0,4,0,5,0,0,0,0,0,5,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,7,10,5,0,0,11,6,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,4,9,7,0,
+0,0,0,11,7,0,0,0,0,0,5,0,0,0,7,0,0,0,0,23,6,11,4,0,0,0,0,0,0,9,0,0,0,10,6,0,0,0,
+0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,10,6,0,0,0,7,0,0,
+0,0,0,0,0,0,0,0,20,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,
+6,11,7,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,0,0,6,0,0,0,5,0,6,0,6,0,0,0,0,0,0,0,0,0,0,
+0,6,0,0,0,0,8,7,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,4,10,0,8,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,10,6,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,
+0,0,0,0,0,0,10,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,11,6,0,4,0,0,14,5,0,7,0,0,0,0,0,6,16,0,0,0,0,0,0,0,10,0,0,7,15,0,0,0,11,7,0,0,
+0,0,0,0,0,0,0,0,8,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,5,0,0,0,
+0,8,0,0,6,0,0,0,0,0,0,9,5,0,0,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,6,0,
+0,0,0,0,0,0,7,0,0,0,0,15,7,0,0,0,0,8,0,0,0,14,0,0,0,0,0,0,0,16,7,0,0,0,0,0,7,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,12,6,11,7,
+9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,
+7,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,12,0,10,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,8,0,0,5,8,7,10,6,0,0,0,7,0,0,0,0,12,6,
+0,0,9,0,0,0,12,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,4,10,0,0,0,10,5,0,0,0,0,0,0,9,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,6,0,0,9,5,0,4,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,9,0,0,5,0,0,8,7,8,
+6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,10,0,9,4,0,0,0,0,0,0,0,6,
+11,0,0,0,0,0,0,0,0,0,0,0,8,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,8,7,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,
+0,0,0,10,0,0,0,8,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,
+0,0,8,4,0,5,0,0,0,0,0,0,0,7,0,0,0,6,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,8,5,0,0,0,
+0,0,0,0,7,0,0,0,6,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,5,0,6,0,7,0,0,0,0,
+20,0,0,0,0,0,0,0,0,0,0,7,9,0,0,0,0,0,0,6,0,6,0,7,0,0,0,7,0,0,0,0,0,0,0,4,0,0,0,
+0,0,0,14,7,0,0,0,5,0,0,22,4,10,0,0,0,0,0,0,4,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,11,5,13,0,0,0,0,0,0,0,0,0,8,0,0,7,0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,0,10,7,0,
+0,0,0,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,7,0,7,14,6,0,0,0,0,9,5,
+0,0,0,0,0,6,0,0,0,5,10,0,8,6,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,6,0,0,8,4,0,6,0,
+0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,
+14,0,0,5,0,0,18,0,8,4,0,6,0,0,20,0,13,0,0,0,0,7,0,4,0,0,0,0,0,4,8,4,0,0,0,0,0,6,
+0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,14,0,0,0,0,0,9,7,0,0,9,0,0,0,0,
+0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,20,0,14,0,0,4,0,6,8,5,0,0,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,10,4,12,7,0,6,0,0,9,7,10,5,
+0,0,8,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,0,0,18,0,0,0,14,7,0,0,0,0,0,4,
+0,0,0,0,0,0,17,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,4,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,8,5,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,5,0,7,0,0,0,0,0,
+7,0,0,0,0,0,0,0,0,0,7,0,6,0,0,0,0,0,0,0,0,8,5,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,5,0,
+0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,23,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,0,0,0,0,0,0,12,7,8,4,0,0,0,0,0,0,0,0,0,6,0,0,9,5,0,0,0,7,0,0,0,
+0,0,0,0,0,0,4,10,0,0,7,0,0,0,5,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,5,0,0,18,7,
+0,0,8,0,0,5,0,0,10,0,0,0,0,0,0,6,0,0,0,0,0,5,0,7,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,
+6,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,6,0,0,10,0,0,5,10,4,0,0,12,0,0,0,0,
+6,22,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,5,0,0,0,0,0,7,0,5,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,6,0,7,0,0,0,6,0,6,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,7,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,
+0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,16,6,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,12,7,0,0,0,0,9,0,0,0,0,6,0,0,11,0,0,0,0,0,13,0,9,6,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,4,0,0,10,7,0,0,0,7,0,6,0,
+0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,11,0,15,0,22,7,0,4,0,6,0,0,0,0,0,7,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,4,0,7,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,
+18,0,0,0,0,0,0,0,0,0,14,0,0,4,0,0,0,0,8,7,9,0,0,0,0,0,9,0,0,0,14,0,0,0,0,0,0,0,
+0,0,11,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,7,0,0,0,6,0,6,0,0,0,0,8,0,0,0,0,
+0,11,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,9,4,0,0,0,0,0,4,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,8,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,
+0,0,0,0,0,0,8,6,0,0,9,5,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,5,0,
+0,10,6,9,0,0,0,0,6,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,
+11,7,12,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,4,0,5,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,
+0,0,0,0,6,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,5,0,0,10,6,
+0,0,0,4,0,7,13,0,0,4,0,0,11,4,0,6,0,0,0,0,0,6,8,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,5,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,5,0,0,0,0,12,6,0,0,0,0,
+11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,11,5,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,
+7,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,4,0,0,0,6,17,0,9,0,10,6,0,6,12,0,0,4,0,0,0,
+0,0,0,0,0,0,0,8,5,12,7,0,4,0,0,0,0,0,0,0,0,0,0,11,0,9,0,10,6,11,5,0,7,0,0,8,0,0,
+7,0,4,0,0,0,7,0,0,0,0,0,0,8,6,0,0,0,6,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,11,0,0,0,0,6,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,10,0,0,0,0,0,8,6,0,0,0,0,0,6,12,0,0,0,0,0,
+0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,6,0,0,16,0,11,5,0,0,0,0,0,
+0,0,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,9,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,6,10,
+7,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,6,0,0,0,0,0,0,9,5,0,0,0,0,8,0,9,0,0,
+0,0,0,0,0,0,7,10,0,13,0,0,6,0,0,0,0,0,0,0,0,0,6,9,4,0,0,0,0,0,0,10,0,0,0,0,0,10,
+0,0,0,0,0,0,0,10,6,11,0,0,0,0,0,9,0,0,0,0,0,0,4,0,0,0,0,0,0,10,5,0,0,0,0,0,6,0,
+0,0,0,0,0,18,4,0,7,0,0,0,0,0,0,24,0,8,6,0,7,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,5,0,0,0,0,10,7,0,6,0,0,0,0,0,0,0,0,8,5,10,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,
+6,0,0,8,7,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,4,0,5,15,0,0,0,0,7,0,7,0,0,0,0,
+0,0,0,0,0,6,10,5,0,0,0,6,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,7,0,0,12,0,0,0,0,0,0,0,0,
+0,0,5,0,0,0,0,0,0,14,4,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,11,0,10,4,9,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,7,0,0,0,
+0,0,0,0,0,0,0,0,7,13,7,0,0,0,0,0,0,0,5,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,8,0,10,6,0,4,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,
+0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,9,7,0,0,0,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,0,6,0,0,0,
+0,0,0,0,5,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,6,0,0,0,5,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,11,0,0,0,0,6,0,0,0,0,0,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,
+6,0,0,0,0,0,0,0,6,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,
+0,0,0,0,0,0,0,6,0,6,0,0,0,5,0,0,0,0,0,0,0,5,0,0,10,0,11,5,0,0,0,0,0,0,14,7,9,7,
+0,6,0,0,0,0,0,4,0,0,0,0,0,0,11,7,0,6,0,0,0,0,0,0,9,7,0,4,0,0,0,7,0,0,0,0,0,5,0,
+0,0,0,0,5,0,0,0,7,0,0,0,0,0,5,0,0,0,0,17,5,0,0,8,0,0,0,0,6,9,4,0,0,0,0,0,0,0,0,
+8,7,11,7,9,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,6,9,5,0,0,8,6,0,0,0,5,0,
+0,0,0,9,0,0,0,9,6,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,4,0,0,0,0,10,0,0,0,0,0,0,0,0,4,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,4,0,0,0,5,0,0,0,0,0,7,0,0,0,0,0,7,13,5,0,0,0,7,0,0,0,0,0,7,9,6,11,7,0,7,0,0,0,
+0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,8,5,0,0,0,5,9,4,0,0,0,0,0,0,0,0,8,4,0,0,0,0,
+24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,
+0,0,0,0,6,0,0,0,7,0,0,0,6,0,0,0,0,0,0,0,0,0,5,11,6,0,4,0,7,20,0,8,5,9,5,9,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,7,23,5,0,0,8,4,0,0,10,0,0,6,0,5,0,0,0,0,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,9,0,0,0,
+10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,
+6,0,0,0,0,14,0,18,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,9,6,0,4,0,0,0,0,0,0,8,4,
+11,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,0,0,0,12,0,10,7,0,0,10,0,0,0,0,
+0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,6,0,0,0,0,8,
+6,10,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,4,0,6,0,4,0,0,0,0,0,5,0,0,
+0,0,0,0,0,0,0,7,0,0,0,7,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,17,7,11,0,0,0,0,0,0,0,0,0,0,4,12,6,0,0,0,5,0,0,0,6,0,0,0,0,0,0,0,0,0,0,
+0,5,12,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,6,0,6,0,0,20,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,4,
+0,0,0,5,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,6,0,4,13,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,12,6,0,7,0,0,0,0,10,0,23,6,0,0,
+0,4,0,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+10,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,11,0,9,7,0,0,
+0,0,0,0,0,0,0,0,9,7,0,4,0,0,0,0,8,7,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+0,0,0,0,0,6,0,0,10,7,10,5,0,0,8,0,8,0,0,0,0,0,0,4,0,5,10,0,0,0,0,0,0,0,9,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,11,7,0,0,0,0,0,0,0,0,9,4,0,0,0,0,0,6,0,0,8,
+7,0,0,0,0,0,5,0,0,0,0,0,0,0,0,10,0,0,0,0,5,0,4,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,24,7,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,6,0,0,9,0,0,0,0,0,0,7,0,6,13,0,8,
+0,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,6,0,0,0,0,8,5,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,
+4,0,0,0,0,0,4,0,0,0,0,0,0,0,6,8,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,4,0,4,0,0,0,5,0,7,0,0,10,0,10,7,0,0,12,5,0,0,9,0,0,0,10,0,
+0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,5,0,0,0,0,0,0,
+12,0,0,0,0,0,8,5,13,6,0,0,0,0,0,0,9,4,0,0,0,0,8,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,
+0,0,6,0,0,14,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,17,6,0,0,0,0,12,6,0,0,0,0,8,0,0,7,0,
+7,0,4,9,0,0,6,0,0,0,6,0,0,0,0,0,0,8,7,0,0,0,0,0,0,11,0,0,4,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,18,7,0,4,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,6,0,0,0,0,0,
+0,0,0,12,5,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,8,0,11,7,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
+11,0,0,0,0,0,0,0,21,0,0,6,10,0,0,0,0,0,9,0,10,0,0,0,0,0,11,0,0,0,0,6,0,0,0,0,0,
+5,0,0,0,0,0,0,10,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,4,0,0,23,7,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,9,7,0,0,0,7,
+0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,6,0,0,
+11,6,0,0,0,0,0,0,0,6,0,0,0,0,10,7,0,0,9,4,0,0,11,0,8,5,0,0,0,7,8,5,22,0,0,0,9,6,
+0,0,0,0,0,0,0,6,10,4,0,0,0,0,0,7,9,4,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,4,0,
+0,0,0,11,6,0,0,0,0,0,0,0,0,0,0,0,7,0,6,0,0,0,0,0,7,0,0,0,0,0,0,0,6,0,6,0,4,0,0,
+0,0,0,0,0,7,0,7,0,4,13,0,0,0,0,0,8,0,0,0,0,7,0,0,0,0,0,0,11,6,0,7,0,0,0,0,9,0,0,
+0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,8,0,0,0,0,0,8,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,6,0,0,0,0,13,5,8,0,0,
+0,0,0,0,0,14,0,0,6,0,0,0,0,0,0,0,0,0,7,0,0,17,6,0,0,0,0,13,4,0,0,9,6,0,0,10,5,0,
+0,10,5,0,0,0,0,13,0,0,0,0,6,0,0,0,0,0,0,10,0,12,0,0,0,0,0,0,0,0,0,0,0,8,4,0,4,0,
+0,0,4,0,0,0,0,0,4,0,0,12,0,0,5,9,4,0,0,0,0,0,0,0,0,0,5,8,5,0,0,0,7,0,0,0,0,8,7,
+0,0,0,6,12,5,0,0,0,5,0,0,0,5,0,0,0,0,0,4,12,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,8,7,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,
+0,9,6,0,0,0,0,0,0,0,0,0,4,0,0,0,6,0,0,0,4,11,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,12,7,0,0,0,7,10,7,0,0,11,0,0,0,0,0,0,0,0,0,11,7,0,0,0,6,0,0,11,0,0,0,0,
+0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,22,0,10,7,0,0,8,5,0,0,0,0,0,5,0,0,0,0,0,0,
+0,0,0,0,9,6,8,7,0,6,0,0,0,0,0,5,0,0,0,0,0,0,8,7,0,0,0,0,9,7,0,0,0,6,0,0,8,7,0,0,
+0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,4,0,5,0,0,0,4,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,6,0,0,0,0,0,0,0,4,0,0,0,0,0,0,9,
+6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,5,0,0,0,0,14,0,0,0,
+9,0,0,0,0,0,0,0,0,0,9,7,12,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,12,0,0,0,0,0,12,7,0,0,0,5,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,10,7,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,6,0,0,0,0,0,0,9,6,0,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,9,0,0,0,0,7,0,6,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,6,0,7,12,6,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
+0,7,0,0,8,6,0,0,0,0,10,7,0,0,0,0,0,0,0,6,0,0,0,0,0,6,12,0,0,0,0,0,0,0,0,6,0,0,0,
+0,0,6,0,0,0,6,0,0,0,0,0,6,16,0,0,0,0,0,0,0,0,0,9,0,17,0,14,7,8,0,0,0,0,0,0,6,0,
+0,0,0,0,0,0,0,0,0,11,0,0,6,8,7,0,6,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,0,0,0,
+9,0,0,0,0,7,0,0,0,0,11,5,0,4,9,6,8,0,0,0,0,0,0,0,0,0,10,0,11,7,0,0,0,0,0,0,0,0,
+9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,
+0,0,0,12,0,0,0,0,0,10,5,0,4,0,0,0,0,0,7,10,6,11,6,0,0,0,0,0,0,0,0,0,0,0,0,17,0,
+0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,8,0,0,4,0,0,0,6,0,0,0,
+0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,4,0,0,0,0,9,6,0,0,0,4,0,0,0,0,0,4,10,7,0,7,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,
+0,0,0,0,0,0,6,0,0,0,6,0,6,0,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,18,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,13,0,0,0,0,0,10,0,0,0,0,0,0,0,0,4,
+0,0,0,6,0,0,0,0,0,4,8,0,0,0,11,7,0,0,0,4,0,0,0,0,0,7,0,0,8,5,0,0,16,0,0,0,13,6,
+0,0,0,0,0,0,0,6,0,0,0,0,20,0,11,6,0,0,8,7,0,0,0,0,0,6,17,0,8,0,0,0,0,0,8,7,0,0,
+9,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,
+0,0,4,0,7,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,8,
+0,8,0,0,0,0,0,0,0,11,0,8,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,6,0,0,9,0,
+0,0,0,0,8,0,0,0,0,0,18,0,0,0,0,0,0,4,9,0,0,0,0,0,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,9,6,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,8,7,0,0,0,0,0,0,0,0,
+0,4,0,0,0,0,0,0,14,0,0,0,0,7,0,6,0,0,8,0,20,7,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,8,0,0,0,14,0,0,0,0,0,0,0,8,0,0,7,0,6,0,0,0,7,0,0,0,0,0,0,0,0,
+0,0,0,4,12,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,10,6,0,
+5,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,
+0,0,0,5,8,4,0,0,0,0,0,0,0,4,0,0,0,7,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,12,7,0,
+0,0,0,13,6,0,0,0,7,0,0,8,0,0,0,8,0,0,0,0,0,0,0,0,0,0,5,0,0,0,7,0,0,0,0,0,0,11,5,
+0,6,0,0,8,5,0,7,0,0,0,0,0,0,0,7,0,0,0,0,8,6,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,4,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+14,0,10,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,19,0,0,4,0,0,0,7,
+0,0,11,5,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,16,0,10,5,18,0,0,7,9,6,0,5,0,0,0,0,0,
+0,0,0,0,5,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,5,0,0,0,7,0,6,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,6,0,0,0,4,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,0,7,23,0,0,0,0,5,0,0,0,0,0,0,8,5,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,14,0,20,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,
+11,0,0,0,0,7,0,0,0,0,15,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,7,0,0,0,0,
+0,4,0,0,0,0,10,0,0,0,0,0,9,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,10,0,11,6,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,5,0,0,11,0,0,0,0,7,0,0,0,0,0,0,8,7,0,
+4,0,0,0,0,11,0,0,0,0,0,11,0,0,5,0,0,8,7,0,4,0,7,0,0,0,0,0,0,0,6,0,0,0,0,0,4,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,10,5,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,6,0,5,0,0,0,0,0,0,0,
+0,0,4,11,5,10,7,0,7,0,0,9,6,9,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,9,4,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,8,6,0,0,0,0,11,7,0,0,0,0,0,0,0,0,0,0,11,7,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,8,5,0,0,8,0,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,4,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,
+10,7,0,0,0,6,0,0,0,0,0,0,8,0,0,6,0,0,0,6,10,0,0,0,0,0,0,0,0,0,0,0,8,5,0,0,0,6,0,
+0,0,6,0,0,0,0,9,5,8,5,8,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,
+0,8,7,10,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,0,5,0,0,0,6,0,7,0,0,
+10,5,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,11,0,0,0,0,0,13,4,
+0,0,0,4,0,0,0,0,0,5,8,0,0,0,12,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,7,14,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,7,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,5,0,0,15,6,10,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,14,6,10,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,6,0,5,11,4,0,6,0,0,0,7,0,0,0,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,5,0,0,8,5,0,0,0,0,0,0,0,0,0,0,
+0,0,10,0,0,0,0,0,9,6,9,4,0,0,0,4,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,8,5,0,
+0,0,0,0,0,0,0,0,0,0,4,0,0,11,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,5,0,0,0,0,0,0,
+0,0,0,7,12,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,
+4,9,6,0,4,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,6,0,
+7,8,6,0,0,0,0,0,0,0,4,0,0,9,6,0,0,0,0,0,0,0,0,0,6,0,5,0,4,0,0,0,0,0,0,0,5,0,0,0,
+0,0,5,0,0,0,7,12,7,0,0,0,0,0,0,18,4,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,6,0,0,0,
+0,12,0,0,7,0,0,0,0,0,7,0,0,13,0,0,6,0,0,0,0,8,7,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,7,10,5,0,0,8,0,0,0,0,0,0,0,8,6,0,7,0,0,8,4,0,4,0,0,0,0,10,4,0,0,14,0,
+0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,17,0,0,0,0,0,0,6,0,0,0,0,8,6,0,0,10,5,0,0,0,0,8,
+6,0,0,0,6,0,0,0,7,0,0,0,0,0,6,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,12,0,0,0,0,6,
+8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,4,24,0,0,
+0,0,0,12,6,0,0,10,6,0,5,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,17,7,0,5,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,11,5,9,0,8,7,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,10,7,0,0,0,0,0,0,0,7,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,5,8,7,0,0,0,
+0,8,5,0,0,0,0,10,7,0,7,0,0,0,0,0,0,0,0,0,0,13,6,0,0,0,0,0,0,0,0,0,6,0,4,0,0,0,0,
+0,6,12,0,8,7,0,0,0,0,0,0,0,0,0,0,16,0,10,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,22,0,0,0,
+0,0,0,0,0,0,0,0,0,0,13,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,22,0,0,6,0,0,21,0,0,0,22,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,6,0,0,0,5,0,0,0,0,0,7,8,0,0,0,0,6,14,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,6,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,0,8,5,0,0,11,7,0,6,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,5,0,0,0,0,0,0,0,0,0,4,0,0,8,7,0,0,0,0,8,5,11,7,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,8,5,0,0,10,0,0,4,13,7,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,5,0,0,13,6,
+0,6,0,7,0,0,8,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,15,0,0,0,10,7,0,0,0,0,0,
+7,0,0,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,19,0,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,6,0,5,
+0,7,0,0,0,0,0,0,0,0,0,6,0,0,11,4,0,0,0,6,0,0,13,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,8,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,12,6,0,0,0,0,
+0,7,0,0,0,0,0,0,11,7,0,0,0,0,0,6,0,0,10,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,5,11,6,
+0,0,0,0,0,0,0,0,10,0,0,0,0,6,0,0,0,0,0,0,8,7,0,0,0,5,0,0,0,5,0,0,0,0,0,0,0,0,0,
+0,0,0,8,7,0,0,0,0,9,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,0,0,0,10,0,
+0,6,0,0,13,0,0,0,0,0,0,0,9,6,0,0,8,6,8,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,4,0,
+0,9,7,0,0,0,0,0,0,11,0,0,0,10,7,0,0,0,0,0,0,0,0,9,6,0,0,12,4,0,4,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,5,0,0,
+9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,
+16,0,0,4,0,0,0,0,0,7,0,0,0,6,0,6,0,0,11,0,0,0,0,5,0,0,0,0,0,0,0,4,8,5,0,0,0,0,0,
+0,14,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,
+0,0,8,0,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,4,0,0,0,4,0,0,0,
+0,0,0,0,6,9,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,14,7,0,0,9,7,0,0,11,0,0,0,0,0,10,
+4,11,5,13,6,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,5,0,0,0,0,0,4,0,0,9,0,0,0,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,6,12,5,0,0,0,6,14,0,0,0,0,0,0,0,0,0,0,4,9,4,
+0,0,0,0,0,5,0,0,0,0,0,0,0,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,5,0,0,
+0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,11,6,0,0,13,7,0,0,13,6,0,7,0,0,0,0,0,0,8,6,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,10,6,0,4,0,0,12,6,0,0,0,0,0,0,0,0,10,6,
+0,0,0,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,7,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,6,0,
+0,0,7,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,
+0,0,0,5,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,
+0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,8,7,0,0,8,5,0,0,0,4,9,5,0,0,0,7,10,6,0,0,
+0,0,0,0,9,7,0,0,8,5,8,0,8,4,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,5,0,5,0,0,0,0,0,0,0,
+0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,11,7,0,0,0,7,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,5,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,9,7,0,0,0,0,8,5,0,4,0,0,0,0,0,6,0,6,14,
+6,0,0,0,0,9,6,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,6,0,0,0,0,14,7,9,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,16,
+0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,14,0,
+0,6,0,0,8,6,0,0,0,0,0,6,0,0,12,0,0,0,0,0,8,5,0,7,11,0,0,5,0,4,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,9,6,0,4,0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,6,0,0,0,0,0,0,10,5,0,0,0,0,
+0,4,0,0,0,7,11,6,0,4,8,5,9,5,0,0,0,5,0,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,5,14,7,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,9,6,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,9,0,0,0,12,5,0,0,0,0,0,0,0,4,10,5,0,0,0,0,0,0,0,0,0,0,0,6,0,
+0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,4,0,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,10,4,0,0,0,0,0,5,0,0,0,4,
+0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,8,0,10,7,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,10,7,0,0,0,0,0,0,0,0,15,0,0,0,
+0,0,0,0,0,0,0,7,0,0,0,0,0,7,10,7,9,7,0,0,0,7,0,0,8,0,0,0,0,0,0,0,9,0,0,0,8,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,8,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,7,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,15,7,12,6,0,0,0,7,0,5,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,5,0,0,0,0,
+0,0,0,6,9,5,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,9,7,0,0,14,0,0,0,11,7,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,11,7,0,0,0,0,8,0,0,0,0,0,0,6,8,7,0,0,0,7,10,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,10,0,0,0,0,0,0,
+6,0,6,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,11,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,7,0,0,10,7,0,0,0,0,9,7,0,0,0,0,0,0,13,7,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,12,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,9,6,0,0,11,0,0,
+0,0,0,14,4,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23,7,0,0,
+0,0,0,6,0,7,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,0,20,
+7,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,0,0,0,0,0,0,0,0,0,11,5,0,0,0,0,0,0,0,0,0,0,10,4,0,0,0,5,8,5,10,4,0,0,0,0,0,
+0,13,6,9,7,0,0,10,7,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,6,0,0,0,7,0,6,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,10,7,0,0,
+0,0,0,0,0,0,0,0,12,4,0,0,0,0,8,7,0,0,0,0,0,7,0,6,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,
+0,0,0,0,6,0,6,9,6,0,0,12,5,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,6,0,0,0,0,
+0,0,0,0,0,0,0,0,0,5,8,7,9,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,11,
+4,0,0,0,0,0,0,8,0,0,0,10,7,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,0,
+0,0,0,0,0,5,0,6,0,0,10,0,14,0,0,0,0,0,0,0,23,0,0,0,12,0,10,5,0,0,0,0,0,0,0,0,0,
+5,0,0,0,0,8,0,0,0,0,6,8,0,0,0,0,0,0,0,0,0,22,0,8,0,0,0,0,6,0,0,0,0,0,0,0,5,0,0,
+0,0,0,0,0,6,18,4,0,0,0,7,10,6,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,
+0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,7,10,0,0,0,0,0,0,6,0,0,0,0,11,5,0,0,0,0,0,0,0,0,
+15,0,8,6,0,0,13,7,0,0,0,0,0,7,0,0,0,0,0,7,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,9,5,9,
+0,0,6,8,6,0,0,0,0,10,0,0,0,18,5,0,0,0,5,0,7,0,0,0,0,8,6,0,0,0,0,9,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,14,0,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,6,0,0,0,5,0,
+0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,8,5,0,0,0,0,0,0,0,0,9,0,0,0,0,4,0,0,0,0,0,0,0,0,
+0,0,0,0,20,5,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,9,5,0,0,0,0,0,0,8,4,24,0,0,0,0,0,0,
+0,0,0,0,0,0,0,9,7,0,0,0,0,10,5,0,0,8,5,0,0,0,0,0,0,0,0,12,7,0,6,0,0,10,6,0,0,0,
+0,14,0,0,4,9,5,0,0,0,0,0,0,9,0,0,0,0,0,0,6,0,0,0,0,0,4,0,0,8,0,0,0,0,0,11,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,8,5,11,7,0,4,0,0,10,0,0,0,0,
+0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,11,6,0,0,0,0,0,5,14,6,0,0,0,0,10,0,0,
+0,13,4,0,0,0,0,0,0,0,0,0,0,0,6,0,0,10,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,7,12,0,10,6,0,0,0,0,0,0,10,0,0,0,0,0,10,0,9,
+7,0,0,0,0,0,0,0,0,0,0,0,0,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,4,0,7,0,0,0,0,9,7,0,0,0,
+0,0,0,0,0,0,0,0,0,24,0,11,7,0,7,0,0,0,0,0,0,8,6,0,0,0,0,0,0,8,7,0,0,0,0,0,5,0,0,
+0,6,9,0,0,0,23,5,0,0,0,0,0,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,7,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,18,4,0,0,11,7,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,9,0,0,0,11,0,0,0,23,0,0,
+0,10,4,0,0,0,0,0,7,0,0,0,7,0,0,0,0,0,4,0,0,0,0,0,7,0,0,19,0,11,0,0,0,0,0,12,7,0,
+0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,5,0,0,0,0,0,5,0,0,0,0,0,5,0,0,0,0,0,0,0,6,0,0,
+9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,4,0,0,0,0,10,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,4,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,22,0,8,7,10,4,11,0,13,5,8,7,9,0,8,7,0,0,0,7,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,
+0,8,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,4,0,0,0,4,11,0,0,6,0,0,8,5,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,8,5,0,0,
+20,0,0,0,0,0,0,0,0,0,11,0,0,0,0,5,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,7,0,0,14,0,0,0,9,0,13,7,0,0,0,0,0,6,0,7,0,0,8,6,10,6,0,0,8,6,0,0,0,6,0,
+0,12,6,9,0,0,0,0,0,0,5,9,0,12,4,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,8,5,0,0,0,0,0,
+0,0,4,8,0,0,6,8,0,0,0,0,0,0,0,0,0,13,6,0,7,0,0,0,0,0,6,8,7,8,6,0,0,0,7,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,18,0,11,4,0,0,0,5,0,0,0,0,0,0,0,0,0,0,
+0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,14,
+6,0,0,0,0,12,7,8,0,0,0,0,0,0,0,8,7,0,0,0,0,10,4,0,0,0,0,0,0,10,0,0,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,15,6,9,7,0,0,0,0,0,0,15,6,11,7,0,0,0,7,0,0,21,0,0,
+0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,17,6,0,0,10,5,0,5,0,0,0,0,0,0,0,0,0,7,
+0,0,10,0,0,0,0,0,0,0,0,4,11,5,0,0,0,0,16,7,0,0,0,0,0,6,0,0,8,7,0,4,0,0,10,0,0,0,
+0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,
+0,0,0,10,4,0,0,0,0,0,0,0,0,0,6,0,5,0,0,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,
+0,7,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,6,10,7,0,0,0,0,0,0,0,0,8,4,0,0,10,0,0,0,0,4,0,6,0,6,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,7,17,0,0,0,0,0,
+0,0,0,0,0,0,10,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6,0,0,0,5,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,5,0,4,0,0,0,0,0,6,0,0,0,0,0,0,10,5,0,0,
+0,5,0,0,0,0,9,0,19,7,0,0,0,0,0,7,0,0,0,0,10,6,0,0,0,6,0,5,0,0,0,0,0,0,0,0,0,6,8,
+0,0,0,0,0,11,0,0,0,0,0,0,6,0,0,0,0,0,7,9,0,15,0,0,0,0,0,0,0,0,0,0,4,0,0,0,5,0,0,
+0,0,0,0,0,6,0,0,0,0,0,0,0,4,0,0,0,0,9,0,0,0,0,0,0,0,0,6,0,7,0,0,0,0,0,0,0,6,0,0,
+0,0,0,6,10,0,0,0,0,0,0,0,23,0,14,0,0,0,0,7,0,0,0,0,0,7,0,0,9,0,0,0,0,7,0,0,0,0,
+0,6,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,4,0,0,0,
+0,0,0,0,0,9,5,0,0,0,0,0,4,0,0,0,0,9,5,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,10,0,0,0,0,0,0,5,0,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,11,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,14,7,0,0,12,7,0,0,0,
+0,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,8,6,10,0,0,0,0,0,0,0,0,0,10,7,8,5,0,0,0,0,0,0,
+0,0,8,4,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,5,0,0,9,5,0,0,0,0,0,5,0,0,0,0,0,4,0,0,0,
+0,0,0,0,0,0,0,12,4,11,0,0,0,9,0,11,7,0,0,0,0,0,0,10,6,0,0,0,6,0,0,0,0,15,5,0,0,
+11,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,4,0,4,0,6,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,8,0,0,0,19,7,0,4,0,0,9,0,0,0,0,0,10,0,
+0,6,0,0,13,0,12,6,0,0,0,0,0,0,0,0,10,7,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,13,7,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,4,9,0,0,0,10,0,0,0,0,0,0,0,
+0,5,0,0,0,0,0,0,10,0,23,6,0,0,0,6,8,0,0,0,0,0,0,0,0,0,17,7,0,0,0,0,11,6,22,5,0,
+0,9,6,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,5,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,4,11,0,9,4,0,0,
+0,7,0,7,0,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,11,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,0,0,11,5,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,8,6,0,0,0,4,0,0,0,0,
+0,0,0,0,0,7,0,0,0,4,0,0,10,4,0,0,0,0,0,0,0,7,0,7,0,0,0,6,0,0,0,0,8,6,0,6,0,6,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,0,0,0,0,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,6,22,6,12,0,0,6,0,0,0,6,0,0,0,0,0,7,0,0,0,0,11,0,0,0,
+9,7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,6,0,0,0,6,0,6,0,0,8,7,0,0,0,4,9,7,19,0,0,0,0,0,0,0,0,0,9,6,10,6,0,6,0,0,0,
+4,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,6,16,7,10,6,0,0,23,6,11,7,0,4,0,0,0,0,0,0,0,0,0,
+5,0,0,0,0,10,7,0,0,0,0,0,7,0,0,0,0,0,0,15,0,10,0,0,0,14,6,0,0,0,0,0,0,0,0,0,0,0,
+5,0,0,0,0,0,0,0,5,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,5,0,0,11,5,0,0,0,0,0,0,0,0,0,0,
+0,4,0,0,0,0,0,6,0,0,10,0,0,0,0,7,0,0,0,0,0,0,10,6,0,0,0,0,8,4,0,0,0,7,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,7,12,5,0,0,0,0,
+0,6,0,0,0,0,9,6,0,0,0,0,0,0,0,6,9,0,0,0,0,6,0,0,0,0,8,7,0,0,0,0,0,0,0,6,0,0,0,0,
+0,0,0,0,0,0,10,5,0,0,0,0,0,0,8,6,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,8,5,0,0,0,0,0,7,0,7,0,4,0,0,10,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,5,0,0,0,0,13,
+7,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,7,0,0,13,0,0,0,0,0,0,0,0,7,10,5,0,0,0,0,0,0,9,7,0,0,8,6,9,
+5,0,0,0,0,0,6,12,0,0,0,0,0,0,0,18,6,0,0,0,0,0,0,0,0,19,7,0,4,0,0,0,0,9,5,0,5,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,7,0,0,0,0,0,0,14,0,0,0,23,7,8,7,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,22,0,0,7,0,0,0,0,0,0,0,0,9,7,8,4,0,
+0,0,0,0,0,0,0,8,5,0,6,0,0,0,0,0,6,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,
+8,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,12,5,0,0,0,0,0,0,0,0,0,0,8,6,0,0,11,7,0,0,0,
+0,12,0,8,6,19,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,11,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,11,7,0,0,0,0,0,4,10,0,0,0,0,0,0,0,8,7,0,0,0,0,14,0,8,0,0,6,10,0,0,
+0,0,0,0,0,12,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,6,0,0,0,0,
+0,0,0,0,13,0,0,0,0,0,0,0,11,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,
+0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,5,0,0,0,6,0,0,0,5,0,7,0,0,0,
+0,0,6,0,0,21,7,0,0,9,6,0,0,0,6,0,0,13,7,0,0,0,5,0,0,0,0,0,4,0,6,0,0,0,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,11,5,0,6,0,0,10,5,0,0,0,0,0,0,0,0,9,6,0,0,8,7,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,9,0,0,0,0,0,0,6,0,0,0,0,15,4,0,0,12,7,0,0,0,6,
+0,7,0,0,8,0,9,5,0,4,0,0,0,6,0,6,0,0,23,4,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,4,0,0,8,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,6,0,0,0,0,0,0,0,0,0,
+7,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,12,6,0,0,0,0,0,0,10,7,0,7,0,0,0,0,0,0,0,0,0,0,
+9,0,0,0,0,0,8,0,0,0,0,4,0,0,0,0,0,0,0,0,0,4,11,5,0,0,0,6,0,6,0,0,0,0,0,0,0,6,0,
+4,0,0,0,0,0,0,0,0,0,0,0,5,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,8,7,0,0,0,6,0,6,0,
+0,0,0,0,0,0,0,0,5,0,0,0,0,0,5,0,0,0,0,11,0,0,0,0,0,0,0,10,5,9,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,23,7,0,0,0,0,0,7,0,0,10,6,18,0,0,0,
+0,0,0,0,8,7,0,6,0,0,0,0,0,0,8,5,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,
+0,0,0,0,0,6,0,0,0,4,12,7,0,0,0,0,0,0,0,0,10,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,13,5,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,0,0,0,
+11,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,
+0,0,0,0,6,0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,10,0,11,0,0,0,0,0,0,0,0,0,
+17,5,0,4,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,4,0,0,0,0,8,7,0,0,0,0,0,0,0,
+0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,
+10,0,0,0,8,6,0,0,0,7,0,0,0,0,0,0,8,0,0,0,14,0,0,0,0,7,0,0,0,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,9,4,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,
+10,0,0,0,16,5,0,0,0,0,0,0,8,0,0,4,0,0,0,0,0,0,0,0,0,0,9,6,0,0,0,0,0,0,10,0,0,0,
+0,0,0,0,0,5,0,0,0,0,12,5,0,7,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,7,0,
+0,0,0,0,0,0,0,12,6,0,0,0,0,0,7,0,6,0,6,12,6,0,0,0,0,0,0,0,4,8,7,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,0,0,0,0,6,0,6,0,0,0,0,0,0,0,0,10,6,8,0,0,
+6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+16,0,8,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,23,5,0,0,0,7,0,6,0,
+0,0,0,0,0,0,0,0,0,0,0,10,6,0,0,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,14,0,0,0,0,7,0,0,0,4,17,5,0,0,0,0,11,0,9,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,6,0,0,0,5,0,7,0,0,0,0,0,0,0,0,8,0,0,0,
+12,6,0,0,0,0,0,0,13,0,0,0,0,7,9,0,0,0,0,0,0,0,0,0,0,5,0,0,0,7,10,7,12,0,0,0,9,0,
+0,0,14,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,15,6,0,0,23,0,0,7,0,6,0,0,0,7,0,6,
+0,0,0,0,0,0,0,6,0,6,9,0,0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,8,7,9,4,0,0,10,0,0,0,10,
+6,0,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,23,0,0,6,0,0,0,0,0,0,9,4,
+0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,9,7,0,0,9,6,0,0,0,0,8,6,0,0,0,0,0,0,0,0,12,0,0,
+0,0,0,8,0,0,6,11,6,0,0,8,7,8,5,0,0,0,0,0,5,0,0,0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,0,
+10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,
+7,0,0,0,0,9,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,8,0,0,0,0,6,12,5,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,10,0,10,
+7,0,0,8,0,0,0,0,4,0,0,0,6,0,0,0,6,0,0,0,6,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,5,0,
+0,0,4,0,0,0,0,0,4,0,0,0,0,0,0,0,6,0,6,0,5,0,0,0,0,8,0,0,0,10,7,0,0,0,0,10,0,0,0,
+0,0,13,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,19,7,0,4,12,0,8,0,0,0,0,6,0,0,0,0,
+0,0,0,6,0,0,0,0,0,0,0,0,0,4,0,0,0,0,18,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,
+0,14,0,0,4,0,0,0,6,0,0,0,6,0,0,0,7,0,0,0,0,0,0,10,4,0,0,9,7,0,0,11,0,0,0,0,0,0,
+7,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,4,0,0,12,0,0,0,
+0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,22,5,9,7,0,0,0,0,0,0,0,0,0,
+0,0,6,0,0,9,6,0,5,0,0,0,0,0,0,10,5,0,0,8,6,0,6,10,5,0,0,0,6,0,0,0,6,0,0,20,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,6,0,0,0,0,17,4,0,7,0,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,
+0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,
+0,0,7,0,0,8,6,12,0,0,7,18,7,0,0,8,4,0,0,0,0,9,6,0,0,0,0,0,0,0,0,13,0,0,6,0,0,0,
+0,0,0,0,0,0,0,10,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,0,
+0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,8,5,0,0,0,0,0,0,0,0,12,0,0,0,8,0,0,0,0,0,0,
+4,0,0,10,0,16,0,0,0,0,0,0,0,12,7,10,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,16,6,10,0,0,5,0,0,0,0,0,6,0,0,0,0,
+0,7,0,0,0,7,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,5,8,7,0,7,0,0,0,0,0,0,0,0,8,0,0,6,0,0,0,6,0,0,0,4,0,0,0,0,
+8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,7,0,0,8,0,0,0,
+9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,7,13,5,0,5,0,0,0,7,8,4,0,0,0,0,0,0,0,
+0,12,0,0,0,0,0,0,0,0,0,0,0,8,6,0,6,0,0,11,0,0,0,0,0,0,0,0,6,0,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,10,7,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,11,6,0,0,10,6,0,0,
+0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,6,0,0,0,7,0,0,9,0,8,7,11,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,9,6,10,5,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,10,7,0,0,0,0,0,0,11,0,9,6,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,15,5,12,5,
+0,0,0,0,0,0,12,7,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,12,6,0,
+0,0,0,24,4,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,10,4,0,0,0,0,10,7,0,0,0,0,0,0,0,0,0,0,0,0,9,0,11,0,0,0,0,0,0,0,0,0,0,6,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,
+0,0,8,0,0,0,0,7,0,0,0,0,0,0,10,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,10,7,0,0,0,0,0,
+0,0,0,0,0,14,7,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,4,0,0,0,6,0,0,0,0,0,6,0,0,0,6,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,11,6,0,0,0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,7,20,7,11,4,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,7,9,6,0,0,12,7,0,0,0,0,0,0,10,0,12,0,
+0,0,0,0,0,4,9,6,13,0,0,0,0,0,0,0,0,6,0,0,0,6,0,0,0,5,0,0,0,0,0,0,8,0,0,0,0,0,0,
+0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,6,0,0,11,0,9,0,0,0,0,4,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,8,5,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,4,0,5,0,0,0,0,0,0,0,0,0,4,0,0,0,0,9,7,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,
+0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,
+0,0,0,0,8,7,0,0,0,0,0,0,12,0,0,6,0,0,0,0,0,0,0,6,8,4,0,0,10,7,0,0,10,0,0,0,0,0,
+0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,7,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,4,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,7,0,0,0,0,0,0,0,5,
+0,4,0,0,0,0,0,6,0,0,0,0,0,0,8,0,0,6,0,0,0,6,0,0,0,0,0,7,0,5,8,4,0,0,9,0,0,0,0,4,
+0,0,0,0,0,0,0,0,0,5,0,0,15,6,8,6,0,0,0,6,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,6,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,9,6,0,0,0,0,0,0,0,7,0,0,0,4,0,
+6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,9,5,0,6,12,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,6,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,8,7,0,6,0,0,0,0,0,0,0,0,0,0,0,0,11,0,12,7,0,0,0,0,
+0,0,0,0,0,5,0,5,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,11,4,0,0,0,0,0,0,0,0,0,0,10,
+7,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,7,8,7,9,6,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,5,12,0,
+10,5,12,6,0,0,0,7,0,0,0,0,0,0,0,5,0,0,0,5,9,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,
+11,7,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,0,6,0,7,0,0,0,0,8,0,8,5,0,6,0,0,0,6,0,0,0,
+0,0,0,0,6,0,6,0,6,9,0,0,5,17,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,7,0,0,
+0,0,0,7,0,0,0,0,16,5,0,0,0,0,0,0,0,4,0,0,0,5,11,5,0,7,0,0,0,4,8,7,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,7,0,0,0,0,12,0,0,0,
+0,0,12,0,0,0,0,0,0,0,0,4,10,4,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,6,0,0,0,0,0,0,0,4,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,20,5,0,0,
+10,0,0,0,0,0,0,0,0,0,0,6,0,0,0,6,12,0,0,0,0,0,0,6,0,0,0,0,0,0,9,4,10,7,0,4,0,0,
+0,0,0,0,10,6,0,0,0,0,8,4,0,7,8,6,0,6,8,0,10,0,0,0,0,0,13,5,0,6,0,0,0,0,0,0,22,4,
+0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,6,0,0,0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,6,10,
+5,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,10,4,0,0,10,7,0,0,0,0,0,5,0,
+5,8,0,0,0,0,6,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,10,7,0,0,0,4,0,0,0,0,0,6,0,0,
+0,0,0,0,0,0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,7,0,0,0,6,0,0,0,0,0,0,0,0,0,
+4,0,0,0,4,10,0,0,6,13,7,8,0,0,0,0,0,0,7,0,0,12,7,0,0,0,0,0,0,10,5,0,0,0,0,0,6,0,
+0,0,0,0,0,0,0,0,0,13,7,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,6,0,0,0,0,0,0,0,0,8,6,0,6,
+0,0,0,0,0,0,0,0,12,0,8,4,0,0,0,0,0,4,0,4,0,0,0,0,0,0,0,5,0,0,0,0,12,5,0,0,0,7,0,
+0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,10,0,0,0,20,0,0,5,0,0,10,
+7,11,7,0,0,0,0,0,0,0,0,0,0,17,0,9,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,10,7,0,4,0,6,0,0,24,0,0,5,0,0,0,0,8,0,0,
+0,0,0,0,0,10,5,0,4,0,6,0,0,8,0,0,0,0,0,0,4,0,6,0,0,0,0,0,0,9,5,0,0,0,0,0,0,0,0,
+0,0,0,6,0,0,0,0,9,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,4,0,7,
+0,0,13,0,0,0,0,0,0,0,11,6,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
+17,7,0,0,11,6,0,0,0,0,12,6,0,0,0,6,0,6,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,5,0,0,0,6,0,0,0,0,0,0,0,0,0,0,10,0,0,4,8,6,0,0,0,
+0,0,0,9,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,9,5,0,7,18,0,0,0,0,0,0,0,0,0,0,0,0,0,8,6,0,0,0,0,0,0,0,0,8,0,0,0,
+0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,
+0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,4,0,6,0,0,9,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,
+0,0,0,8,7,10,0,8,5,0,0,0,0,0,0,0,0,9,0,0,0,10,0,0,0,0,6,0,7,0,4,0,0,0,0,0,0,0,0,
+8,0,0,0,0,0,8,4,0,0,0,0,0,5,0,0,10,0,12,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+4,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,6,11,0,0,
+7,0,0,0,0,0,6,10,5,0,0,0,0,0,0,0,0,0,5,0,0,9,5,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,6,0,0,0,0,13,6,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,
+0,0,0,8,4,0,6,12,0,0,0,0,0,0,0,0,0,0,0,0,6,0,6,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,11,4,0,0,0,6,14,0,11,0,9,6,0,0,0,0,0,0,22,0,12,0,8,6,0,0,0,0,0,0,0,6,0,
+0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,
+10,7,0,0,0,0,0,0,0,0,9,0,0,0,0,4,0,0,0,0,0,0,0,0,0,5,11,0,0,0,0,0,0,0,8,6,0,0,9,
+7,0,0,12,4,0,0,0,0,0,0,12,6,0,6,0,7,0,0,8,5,0,0,0,0};
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.h
new file mode 100644
index 0000000000..e553ea5d4e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/dictionary_hash.h
@@ -0,0 +1,25 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Hash table on the 4-byte prefixes of static dictionary words. */
+
+#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
+#define BROTLI_ENC_DICTIONARY_HASH_H_
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+extern const uint16_t kStaticDictionaryHashWords[32768];
+extern const uint8_t kStaticDictionaryHashLengths[32768];
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_DICTIONARY_HASH_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c
new file mode 100644
index 0000000000..68548ef55a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encode.c
@@ -0,0 +1,1925 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Implementation of Brotli compressor. */
+
+#include <brotli/encode.h>
+
+#include <stdlib.h>  /* free, malloc */
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include "../common/version.h"
+#include "./backward_references.h"
+#include "./backward_references_hq.h"
+#include "./bit_cost.h"
+#include "./brotli_bit_stream.h"
+#include "./compress_fragment.h"
+#include "./compress_fragment_two_pass.h"
+#include "./encoder_dict.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./hash.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./metablock.h"
+#include "./prefix.h"
+#include "./quality.h"
+#include "./ringbuffer.h"
+#include "./utf8_util.h"
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
+
+typedef enum BrotliEncoderStreamState {
+  /* Default state. */
+  BROTLI_STREAM_PROCESSING = 0,
+  /* Intermediate state; after next block is emitted, byte-padding should be
+     performed before getting back to default state. */
+  BROTLI_STREAM_FLUSH_REQUESTED = 1,
+  /* Last metablock was produced; no more input is acceptable. */
+  BROTLI_STREAM_FINISHED = 2,
+  /* Flushing compressed block and writing meta-data block header. */
+  BROTLI_STREAM_METADATA_HEAD = 3,
+  /* Writing metadata block body. */
+  BROTLI_STREAM_METADATA_BODY = 4
+} BrotliEncoderStreamState;
+
+typedef enum BrotliEncoderFlintState {
+  BROTLI_FLINT_NEEDS_2_BYTES = 2,
+  BROTLI_FLINT_NEEDS_1_BYTE = 1,
+  BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
+  BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
+  BROTLI_FLINT_DONE = -2
+} BrotliEncoderFlintState;
+
+typedef struct BrotliEncoderStateStruct {
+  BrotliEncoderParams params;
+
+  MemoryManager memory_manager_;
+
+  uint64_t input_pos_;
+  RingBuffer ringbuffer_;
+  size_t cmd_alloc_size_;
+  Command* commands_;
+  size_t num_commands_;
+  size_t num_literals_;
+  size_t last_insert_len_;
+  uint64_t last_flush_pos_;
+  uint64_t last_processed_pos_;
+  int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
+  int saved_dist_cache_[4];
+  uint16_t last_bytes_;
+  uint8_t last_bytes_bits_;
+  /* "Flint" is a tiny uncompressed block emitted before the continuation
+     block to unwire literal context from previous data. Despite being int8_t,
+     field is actually BrotliEncoderFlintState enum. */
+  int8_t flint_;
+  uint8_t prev_byte_;
+  uint8_t prev_byte2_;
+  size_t storage_size_;
+  uint8_t* storage_;
+
+  Hasher hasher_;
+
+  /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
+  int small_table_[1 << 10];  /* 4KiB */
+  int* large_table_;          /* Allocated only when needed */
+  size_t large_table_size_;
+  /* Command and distance prefix codes (each 64 symbols, stored back-to-back)
+     used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command
+     prefix code is over a smaller alphabet with the following 64 symbols:
+        0 - 15: insert length code 0, copy length code 0 - 15, same distance
+       16 - 39: insert length code 0, copy length code 0 - 23
+       40 - 63: insert length code 0 - 23, copy length code 0
+     Note that symbols 16 and 40 represent the same code in the full alphabet,
+     but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */
+  uint8_t cmd_depths_[128];
+  uint16_t cmd_bits_[128];
+  /* The compressed form of the command and distance prefix codes for the next
+     block in FAST_ONE_PASS_COMPRESSION_QUALITY. */
+  uint8_t cmd_code_[512];
+  size_t cmd_code_numbits_;
+  /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
+  uint32_t* command_buf_;
+  uint8_t* literal_buf_;
+
+  uint8_t* next_out_;
+  size_t available_out_;
+  size_t total_out_;
+  /* Temporary buffer for padding flush bits or metadata block header / body. */
+  union {
+    uint64_t u64[2];
+    uint8_t u8[16];
+  } tiny_buf_;
+  uint32_t remaining_metadata_bytes_;
+  BrotliEncoderStreamState stream_state_;
+
+  BROTLI_BOOL is_last_block_emitted_;
+  BROTLI_BOOL is_initialized_;
+} BrotliEncoderStateStruct;
+
+static size_t InputBlockSize(BrotliEncoderState* s) {
+  return (size_t)1 << s->params.lgblock;
+}
+
+static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
+  return s->input_pos_ - s->last_processed_pos_;
+}
+
+static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
+  const uint64_t delta = UnprocessedInputSize(s);
+  size_t block_size = InputBlockSize(s);
+  if (delta >= block_size) return 0;
+  return block_size - (size_t)delta;
+}
+
+BROTLI_BOOL BrotliEncoderSetParameter(
+    BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
+  /* Changing parameters on the fly is not implemented yet. */
+  if (state->is_initialized_) return BROTLI_FALSE;
+  /* TODO: Validate/clamp parameters here. */
+  switch (p) {
+    case BROTLI_PARAM_MODE:
+      state->params.mode = (BrotliEncoderMode)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_QUALITY:
+      state->params.quality = (int)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_LGWIN:
+      state->params.lgwin = (int)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_LGBLOCK:
+      state->params.lgblock = (int)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
+      if ((value != 0) && (value != 1)) return BROTLI_FALSE;
+      state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_SIZE_HINT:
+      state->params.size_hint = value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_LARGE_WINDOW:
+      state->params.large_window = TO_BROTLI_BOOL(!!value);
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_NPOSTFIX:
+      state->params.dist.distance_postfix_bits = value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_NDIRECT:
+      state->params.dist.num_direct_distance_codes = value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_STREAM_OFFSET:
+      if (value > (1u << 30)) return BROTLI_FALSE;
+      state->params.stream_offset = value;
+      return BROTLI_TRUE;
+
+    default: return BROTLI_FALSE;
+  }
+}
+
+/* Wraps 64-bit input position to 32-bit ring-buffer position preserving
+   "not-a-first-lap" feature. */
+static uint32_t WrapPosition(uint64_t position) {
+  uint32_t result = (uint32_t)position;
+  uint64_t gb = position >> 30;
+  if (gb > 2) {
+    /* Wrap every 2GiB; The first 3GB are continuous. */
+    result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
+  }
+  return result;
+}
+
+static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
+  MemoryManager* m = &s->memory_manager_;
+  if (s->storage_size_ < size) {
+    BROTLI_FREE(m, s->storage_);
+    s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL;
+    s->storage_size_ = size;
+  }
+  return s->storage_;
+}
+
+static size_t HashTableSize(size_t max_table_size, size_t input_size) {
+  size_t htsize = 256;
+  while (htsize < max_table_size && htsize < input_size) {
+    htsize <<= 1;
+  }
+  return htsize;
+}
+
+static int* GetHashTable(BrotliEncoderState* s, int quality,
+                         size_t input_size, size_t* table_size) {
+  /* Use smaller hash table when input.size() is smaller, since we
+     fill the table, incurring O(hash table size) overhead for
+     compression, and if the input is short, we won't need that
+     many hash table entries anyway. */
+  MemoryManager* m = &s->memory_manager_;
+  const size_t max_table_size = MaxHashTableSize(quality);
+  size_t htsize = HashTableSize(max_table_size, input_size);
+  int* table;
+  BROTLI_DCHECK(max_table_size >= 256);
+  if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+    /* Only odd shifts are supported by fast-one-pass. */
+    if ((htsize & 0xAAAAA) == 0) {
+      htsize <<= 1;
+    }
+  }
+
+  if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
+    table = s->small_table_;
+  } else {
+    if (htsize > s->large_table_size_) {
+      s->large_table_size_ = htsize;
+      BROTLI_FREE(m, s->large_table_);
+      s->large_table_ = BROTLI_ALLOC(m, int, htsize);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0;
+    }
+    table = s->large_table_;
+  }
+
+  *table_size = htsize;
+  memset(table, 0, htsize * sizeof(*table));
+  return table;
+}
+
+static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
+    uint16_t* last_bytes, uint8_t* last_bytes_bits) {
+  if (large_window) {
+    *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
+    *last_bytes_bits = 14;
+  } else {
+    if (lgwin == 16) {
+      *last_bytes = 0;
+      *last_bytes_bits = 1;
+    } else if (lgwin == 17) {
+      *last_bytes = 1;
+      *last_bytes_bits = 7;
+    } else if (lgwin > 17) {
+      *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
+      *last_bytes_bits = 4;
+    } else {
+      *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
+      *last_bytes_bits = 7;
+    }
+  }
+}
+
+/* Initializes the command and distance prefix codes for the first block. */
+static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
+                                   uint16_t cmd_bits[128],
+                                   uint8_t cmd_code[512],
+                                   size_t* cmd_code_numbits) {
+  static const uint8_t kDefaultCommandDepths[128] = {
+    0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
+    0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
+    7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
+    7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
+    4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  };
+  static const uint16_t kDefaultCommandBits[128] = {
+    0,   0,   8,   9,   3,  35,   7,   71,
+    39, 103,  23,  47, 175, 111, 239,   31,
+    0,   0,   0,   4,  12,   2,  10,    6,
+    13,  29,  11,  43,  27,  59,  87,   55,
+    15,  79, 319, 831, 191, 703, 447,  959,
+    0,  14,   1,  25,   5,  21,  19,   51,
+    119, 159,  95, 223, 479, 991,  63,  575,
+    127, 639, 383, 895, 255, 767, 511, 1023,
+    14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
+    2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
+    767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
+  };
+  static const uint8_t kDefaultCommandCode[] = {
+    0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
+    0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
+    0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
+    0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
+    0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
+  };
+  static const size_t kDefaultCommandCodeNumBits = 448;
+  COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
+  COPY_ARRAY(cmd_bits, kDefaultCommandBits);
+
+  /* Initialize the pre-compressed form of the command and distance prefix
+     codes. */
+  COPY_ARRAY(cmd_code, kDefaultCommandCode);
+  *cmd_code_numbits = kDefaultCommandCodeNumBits;
+}
+
+/* Decide about the context map based on the ability of the prediction
+   ability of the previous byte UTF8-prefix on the next byte. The
+   prediction ability is calculated as Shannon entropy. Here we need
+   Shannon entropy instead of 'BitsEntropy' since the prefix will be
+   encoded with the remaining 6 bits of the following byte, and
+   BitsEntropy will assume that symbol to be stored alone using Huffman
+   coding. */
+static void ChooseContextMap(int quality,
+                             uint32_t* bigram_histo,
+                             size_t* num_literal_contexts,
+                             const uint32_t** literal_context_map) {
+  static const uint32_t kStaticContextMapContinuation[64] = {
+    1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  static const uint32_t kStaticContextMapSimpleUTF8[64] = {
+    0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+
+  uint32_t monogram_histo[3] = { 0 };
+  uint32_t two_prefix_histo[6] = { 0 };
+  size_t total;
+  size_t i;
+  size_t dummy;
+  double entropy[4];
+  for (i = 0; i < 9; ++i) {
+    monogram_histo[i % 3] += bigram_histo[i];
+    two_prefix_histo[i % 6] += bigram_histo[i];
+  }
+  entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
+  entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
+                ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
+  entropy[3] = 0;
+  for (i = 0; i < 3; ++i) {
+    entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
+  }
+
+  total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
+  BROTLI_DCHECK(total != 0);
+  entropy[0] = 1.0 / (double)total;
+  entropy[1] *= entropy[0];
+  entropy[2] *= entropy[0];
+  entropy[3] *= entropy[0];
+
+  if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
+    /* 3 context models is a bit slower, don't use it at lower qualities. */
+    entropy[3] = entropy[1] * 10;
+  }
+  /* If expected savings by symbol are less than 0.2 bits, skip the
+     context modeling -- in exchange for faster decoding speed. */
+  if (entropy[1] - entropy[2] < 0.2 &&
+      entropy[1] - entropy[3] < 0.2) {
+    *num_literal_contexts = 1;
+  } else if (entropy[2] - entropy[3] < 0.02) {
+    *num_literal_contexts = 2;
+    *literal_context_map = kStaticContextMapSimpleUTF8;
+  } else {
+    *num_literal_contexts = 3;
+    *literal_context_map = kStaticContextMapContinuation;
+  }
+}
+
+/* Decide if we want to use a more complex static context map containing 13
+   context values, based on the entropy reduction of histograms over the
+   first 5 bits of literals. */
+static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
+    size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
+    size_t* num_literal_contexts, const uint32_t** literal_context_map) {
+  static const uint32_t kStaticContextMapComplexUTF8[64] = {
+    11, 11, 12, 12, /* 0 special */
+    0, 0, 0, 0, /* 4 lf */
+    1, 1, 9, 9, /* 8 space */
+    2, 2, 2, 2, /* !, first after space/lf and after something else. */
+    1, 1, 1, 1, /* " */
+    8, 3, 3, 3, /* % */
+    1, 1, 1, 1, /* ({[ */
+    2, 2, 2, 2, /* }]) */
+    8, 4, 4, 4, /* :; */
+    8, 7, 4, 4, /* . */
+    8, 0, 0, 0, /* > */
+    3, 3, 3, 3, /* [0..9] */
+    5, 5, 10, 5, /* [A-Z] */
+    5, 5, 10, 5,
+    6, 6, 6, 6, /* [a-z] */
+    6, 6, 6, 6,
+  };
+  BROTLI_UNUSED(quality);
+  /* Try the more complex static context map only for long data. */
+  if (size_hint < (1 << 20)) {
+    return BROTLI_FALSE;
+  } else {
+    const size_t end_pos = start_pos + length;
+    /* To make entropy calculations faster and to fit on the stack, we collect
+       histograms over the 5 most significant bits of literals. One histogram
+       without context and 13 additional histograms for each context value. */
+    uint32_t combined_histo[32] = { 0 };
+    uint32_t context_histo[13][32] = { { 0 } };
+    uint32_t total = 0;
+    double entropy[3];
+    size_t dummy;
+    size_t i;
+    ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
+    for (; start_pos + 64 <= end_pos; start_pos += 4096) {
+      const size_t stride_end_pos = start_pos + 64;
+      uint8_t prev2 = input[start_pos & mask];
+      uint8_t prev1 = input[(start_pos + 1) & mask];
+      size_t pos;
+      /* To make the analysis of the data faster we only examine 64 byte long
+         strides at every 4kB intervals. */
+      for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
+        const uint8_t literal = input[pos & mask];
+        const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
+            BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
+        ++total;
+        ++combined_histo[literal >> 3];
+        ++context_histo[context][literal >> 3];
+        prev2 = prev1;
+        prev1 = literal;
+      }
+    }
+    entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
+    entropy[2] = 0;
+    for (i = 0; i < 13; ++i) {
+      entropy[2] += ShannonEntropy(&context_histo[i][0], 32, &dummy);
+    }
+    entropy[0] = 1.0 / (double)total;
+    entropy[1] *= entropy[0];
+    entropy[2] *= entropy[0];
+    /* The triggering heuristics below were tuned by compressing the individual
+       files of the silesia corpus. If we skip this kind of context modeling
+       for not very well compressible input (i.e. entropy using context modeling
+       is 60% of maximal entropy) or if expected savings by symbol are less
+       than 0.2 bits, then in every case when it triggers, the final compression
+       ratio is improved. Note however that this heuristics might be too strict
+       for some cases and could be tuned further. */
+    if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
+      return BROTLI_FALSE;
+    } else {
+      *num_literal_contexts = 13;
+      *literal_context_map = kStaticContextMapComplexUTF8;
+      return BROTLI_TRUE;
+    }
+  }
+}
+
+static void DecideOverLiteralContextModeling(const uint8_t* input,
+    size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
+    size_t* num_literal_contexts, const uint32_t** literal_context_map) {
+  if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
+    return;
+  } else if (ShouldUseComplexStaticContextMap(
+      input, start_pos, length, mask, quality, size_hint,
+      num_literal_contexts, literal_context_map)) {
+    /* Context map was already set, nothing else to do. */
+  } else {
+    /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
+       UTF8 data faster we only examine 64 byte long strides at every 4kB
+       intervals. */
+    const size_t end_pos = start_pos + length;
+    uint32_t bigram_prefix_histo[9] = { 0 };
+    for (; start_pos + 64 <= end_pos; start_pos += 4096) {
+      static const int lut[4] = { 0, 0, 1, 2 };
+      const size_t stride_end_pos = start_pos + 64;
+      int prev = lut[input[start_pos & mask] >> 6] * 3;
+      size_t pos;
+      for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
+        const uint8_t literal = input[pos & mask];
+        ++bigram_prefix_histo[prev + lut[literal >> 6]];
+        prev = lut[literal >> 6] * 3;
+      }
+    }
+    ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
+                     literal_context_map);
+  }
+}
+
+static BROTLI_BOOL ShouldCompress(
+    const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
+    const size_t bytes, const size_t num_literals, const size_t num_commands) {
+  /* TODO: find more precise minimal block overhead. */
+  if (bytes <= 2) return BROTLI_FALSE;
+  if (num_commands < (bytes >> 8) + 2) {
+    if (num_literals > 0.99 * (double)bytes) {
+      uint32_t literal_histo[256] = { 0 };
+      static const uint32_t kSampleRate = 13;
+      static const double kMinEntropy = 7.92;
+      const double bit_cost_threshold =
+          (double)bytes * kMinEntropy / kSampleRate;
+      size_t t = (bytes + kSampleRate - 1) / kSampleRate;
+      uint32_t pos = (uint32_t)last_flush_pos;
+      size_t i;
+      for (i = 0; i < t; i++) {
+        ++literal_histo[data[pos & mask]];
+        pos += kSampleRate;
+      }
+      if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
+        return BROTLI_FALSE;
+      }
+    }
+  }
+  return BROTLI_TRUE;
+}
+
+/* Chooses the literal context mode for a metablock */
+static ContextType ChooseContextMode(const BrotliEncoderParams* params,
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length) {
+  /* We only do the computation for the option of something else than
+     CONTEXT_UTF8 for the highest qualities */
+  if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
+      !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
+    return CONTEXT_SIGNED;
+  }
+  return CONTEXT_UTF8;
+}
+
+static void WriteMetaBlockInternal(MemoryManager* m,
+                                   const uint8_t* data,
+                                   const size_t mask,
+                                   const uint64_t last_flush_pos,
+                                   const size_t bytes,
+                                   const BROTLI_BOOL is_last,
+                                   ContextType literal_context_mode,
+                                   const BrotliEncoderParams* params,
+                                   const uint8_t prev_byte,
+                                   const uint8_t prev_byte2,
+                                   const size_t num_literals,
+                                   const size_t num_commands,
+                                   Command* commands,
+                                   const int* saved_dist_cache,
+                                   int* dist_cache,
+                                   size_t* storage_ix,
+                                   uint8_t* storage) {
+  const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
+  uint16_t last_bytes;
+  uint8_t last_bytes_bits;
+  ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
+  BrotliEncoderParams block_params = *params;
+
+  if (bytes == 0) {
+    /* Write the ISLAST and ISEMPTY bits. */
+    BrotliWriteBits(2, 3, storage_ix, storage);
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+    return;
+  }
+
+  if (!ShouldCompress(data, mask, last_flush_pos, bytes,
+                      num_literals, num_commands)) {
+    /* Restore the distance cache, as its last update by
+       CreateBackwardReferences is now unused. */
+    memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+    BrotliStoreUncompressedMetaBlock(is_last, data,
+                                     wrapped_last_flush_pos, mask, bytes,
+                                     storage_ix, storage);
+    return;
+  }
+
+  BROTLI_DCHECK(*storage_ix <= 14);
+  last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
+  last_bytes_bits = (uint8_t)(*storage_ix);
+  if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
+    BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
+                             bytes, mask, is_last, params,
+                             commands, num_commands,
+                             storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
+    BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
+                                bytes, mask, is_last, params,
+                                commands, num_commands,
+                                storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  } else {
+    MetaBlockSplit mb;
+    InitMetaBlockSplit(&mb);
+    if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
+      size_t num_literal_contexts = 1;
+      const uint32_t* literal_context_map = NULL;
+      if (!params->disable_literal_context_modeling) {
+        DecideOverLiteralContextModeling(
+            data, wrapped_last_flush_pos, bytes, mask, params->quality,
+            params->size_hint, &num_literal_contexts,
+            &literal_context_map);
+      }
+      BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
+          prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
+          literal_context_map, commands, num_commands, &mb);
+      if (BROTLI_IS_OOM(m)) return;
+    } else {
+      BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
+                           prev_byte, prev_byte2,
+                           commands, num_commands,
+                           literal_context_mode,
+                           &mb);
+      if (BROTLI_IS_OOM(m)) return;
+    }
+    if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
+      /* The number of distance symbols effectively used for distance
+         histograms. It might be less than distance alphabet size
+         for "Large Window Brotli" (32-bit). */
+      BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
+    }
+    BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
+                         prev_byte, prev_byte2,
+                         is_last,
+                         &block_params,
+                         literal_context_mode,
+                         commands, num_commands,
+                         &mb,
+                         storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    DestroyMetaBlockSplit(m, &mb);
+  }
+  if (bytes + 4 < (*storage_ix >> 3)) {
+    /* Restore the distance cache and last byte. */
+    memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+    storage[0] = (uint8_t)last_bytes;
+    storage[1] = (uint8_t)(last_bytes >> 8);
+    *storage_ix = last_bytes_bits;
+    BrotliStoreUncompressedMetaBlock(is_last, data,
+                                     wrapped_last_flush_pos, mask,
+                                     bytes, storage_ix, storage);
+  }
+}
+
+static void ChooseDistanceParams(BrotliEncoderParams* params) {
+  uint32_t distance_postfix_bits = 0;
+  uint32_t num_direct_distance_codes = 0;
+
+  if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
+    uint32_t ndirect_msb;
+    if (params->mode == BROTLI_MODE_FONT) {
+      distance_postfix_bits = 1;
+      num_direct_distance_codes = 12;
+    } else {
+      distance_postfix_bits = params->dist.distance_postfix_bits;
+      num_direct_distance_codes = params->dist.num_direct_distance_codes;
+    }
+    ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
+    if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
+        num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
+        (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
+      distance_postfix_bits = 0;
+      num_direct_distance_codes = 0;
+    }
+  }
+
+  BrotliInitDistanceParams(
+      params, distance_postfix_bits, num_direct_distance_codes);
+}
+
+static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
+  if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
+  if (s->is_initialized_) return BROTLI_TRUE;
+
+  s->last_bytes_bits_ = 0;
+  s->last_bytes_ = 0;
+  s->flint_ = BROTLI_FLINT_DONE;
+  s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
+
+  SanitizeParams(&s->params);
+  s->params.lgblock = ComputeLgBlock(&s->params);
+  ChooseDistanceParams(&s->params);
+
+  if (s->params.stream_offset != 0) {
+    s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
+    /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
+    s->dist_cache_[0] = -16;
+    s->dist_cache_[1] = -16;
+    s->dist_cache_[2] = -16;
+    s->dist_cache_[3] = -16;
+    memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
+  }
+
+  RingBufferSetup(&s->params, &s->ringbuffer_);
+
+  /* Initialize last byte with stream header. */
+  {
+    int lgwin = s->params.lgwin;
+    if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+        s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+      lgwin = BROTLI_MAX(int, lgwin, 18);
+    }
+    if (s->params.stream_offset == 0) {
+      EncodeWindowBits(lgwin, s->params.large_window,
+                       &s->last_bytes_, &s->last_bytes_bits_);
+    } else {
+      /* Bigger values have the same effect, but could cause overflows. */
+      s->params.stream_offset = BROTLI_MIN(size_t,
+          s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
+    }
+  }
+
+  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+    InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_,
+                           s->cmd_code_, &s->cmd_code_numbits_);
+  }
+
+  s->is_initialized_ = BROTLI_TRUE;
+  return BROTLI_TRUE;
+}
+
+static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
+  params->mode = BROTLI_DEFAULT_MODE;
+  params->large_window = BROTLI_FALSE;
+  params->quality = BROTLI_DEFAULT_QUALITY;
+  params->lgwin = BROTLI_DEFAULT_WINDOW;
+  params->lgblock = 0;
+  params->stream_offset = 0;
+  params->size_hint = 0;
+  params->disable_literal_context_modeling = BROTLI_FALSE;
+  BrotliInitEncoderDictionary(&params->dictionary);
+  params->dist.distance_postfix_bits = 0;
+  params->dist.num_direct_distance_codes = 0;
+  params->dist.alphabet_size_max =
+      BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
+  params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
+  params->dist.max_distance = BROTLI_MAX_DISTANCE;
+}
+
+static void BrotliEncoderInitState(BrotliEncoderState* s) {
+  BrotliEncoderInitParams(&s->params);
+  s->input_pos_ = 0;
+  s->num_commands_ = 0;
+  s->num_literals_ = 0;
+  s->last_insert_len_ = 0;
+  s->last_flush_pos_ = 0;
+  s->last_processed_pos_ = 0;
+  s->prev_byte_ = 0;
+  s->prev_byte2_ = 0;
+  s->storage_size_ = 0;
+  s->storage_ = 0;
+  HasherInit(&s->hasher_);
+  s->large_table_ = NULL;
+  s->large_table_size_ = 0;
+  s->cmd_code_numbits_ = 0;
+  s->command_buf_ = NULL;
+  s->literal_buf_ = NULL;
+  s->next_out_ = NULL;
+  s->available_out_ = 0;
+  s->total_out_ = 0;
+  s->stream_state_ = BROTLI_STREAM_PROCESSING;
+  s->is_last_block_emitted_ = BROTLI_FALSE;
+  s->is_initialized_ = BROTLI_FALSE;
+
+  RingBufferInit(&s->ringbuffer_);
+
+  s->commands_ = 0;
+  s->cmd_alloc_size_ = 0;
+
+  /* Initialize distance cache. */
+  s->dist_cache_[0] = 4;
+  s->dist_cache_[1] = 11;
+  s->dist_cache_[2] = 15;
+  s->dist_cache_[3] = 16;
+  /* Save the state of the distance cache in case we need to restore it for
+     emitting an uncompressed block. */
+  memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
+}
+
+BrotliEncoderState* BrotliEncoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  BrotliEncoderState* state = 0;
+  if (!alloc_func && !free_func) {
+    state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));
+  } else if (alloc_func && free_func) {
+    state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState));
+  }
+  if (state == 0) {
+    /* BROTLI_DUMP(); */
+    return 0;
+  }
+  BrotliInitMemoryManager(
+      &state->memory_manager_, alloc_func, free_func, opaque);
+  BrotliEncoderInitState(state);
+  return state;
+}
+
+static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
+  MemoryManager* m = &s->memory_manager_;
+  if (BROTLI_IS_OOM(m)) {
+    BrotliWipeOutMemoryManager(m);
+    return;
+  }
+  BROTLI_FREE(m, s->storage_);
+  BROTLI_FREE(m, s->commands_);
+  RingBufferFree(m, &s->ringbuffer_);
+  DestroyHasher(m, &s->hasher_);
+  BROTLI_FREE(m, s->large_table_);
+  BROTLI_FREE(m, s->command_buf_);
+  BROTLI_FREE(m, s->literal_buf_);
+}
+
+/* Deinitializes and frees BrotliEncoderState instance. */
+void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
+  if (!state) {
+    return;
+  } else {
+    MemoryManager* m = &state->memory_manager_;
+    brotli_free_func free_func = m->free_func;
+    void* opaque = m->opaque;
+    BrotliEncoderCleanupState(state);
+    free_func(opaque, state);
+  }
+}
+
+/*
+   Copies the given input data to the internal ring buffer of the compressor.
+   No processing of the data occurs at this time and this function can be
+   called multiple times before calling WriteBrotliData() to process the
+   accumulated input. At most input_block_size() bytes of input data can be
+   copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
+ */
+static void CopyInputToRingBuffer(BrotliEncoderState* s,
+                                  const size_t input_size,
+                                  const uint8_t* input_buffer) {
+  RingBuffer* ringbuffer_ = &s->ringbuffer_;
+  MemoryManager* m = &s->memory_manager_;
+  RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
+  if (BROTLI_IS_OOM(m)) return;
+  s->input_pos_ += input_size;
+
+  /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
+     hashing not depend on uninitialized data. This makes compression
+     deterministic and it prevents uninitialized memory warnings in Valgrind.
+     Even without erasing, the output would be valid (but nondeterministic).
+
+     Background information: The compressor stores short (at most 8 bytes)
+     substrings of the input already read in a hash table, and detects
+     repetitions by looking up such substrings in the hash table. If it
+     can find a substring, it checks whether the substring is really there
+     in the ring buffer (or it's just a hash collision). Should the hash
+     table become corrupt, this check makes sure that the output is
+     still valid, albeit the compression ratio would be bad.
+
+     The compressor populates the hash table from the ring buffer as it's
+     reading new bytes from the input. However, at the last few indexes of
+     the ring buffer, there are not enough bytes to build full-length
+     substrings from. Since the hash table always contains full-length
+     substrings, we erase with dummy zeros here to make sure that those
+     substrings will contain zeros at the end instead of uninitialized
+     data.
+
+     Please note that erasing is not necessary (because the
+     memory region is already initialized since he ring buffer
+     has a `tail' that holds a copy of the beginning,) so we
+     skip erasing if we have already gone around at least once in
+     the ring buffer.
+
+     Only clear during the first round of ring-buffer writes. On
+     subsequent rounds data in the ring-buffer would be affected. */
+  if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
+    /* This is the first time when the ring buffer is being written.
+       We clear 7 bytes just after the bytes that have been copied from
+       the input buffer.
+
+       The ring-buffer has a "tail" that holds a copy of the beginning,
+       but only once the ring buffer has been fully written once, i.e.,
+       pos <= mask. For the first time, we need to write values
+       in this tail (where index may be larger than mask), so that
+       we have exactly defined behavior and don't read uninitialized
+       memory. Due to performance reasons, hashing reads data using a
+       LOAD64, which can go 7 bytes beyond the bytes written in the
+       ring-buffer. */
+    memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
+  }
+}
+
+/* Marks all input as processed.
+   Returns true if position wrapping occurs. */
+static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
+  uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
+  uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
+  s->last_processed_pos_ = s->input_pos_;
+  return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
+}
+
+static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
+                              uint32_t* wrapped_last_processed_pos) {
+  Command* last_command = &s->commands_[s->num_commands_ - 1];
+  const uint8_t* data = s->ringbuffer_.buffer_;
+  const uint32_t mask = s->ringbuffer_.mask_;
+  uint64_t max_backward_distance =
+      (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
+  uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
+  uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
+  uint64_t max_distance = last_processed_pos < max_backward_distance ?
+      last_processed_pos : max_backward_distance;
+  uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
+  uint32_t distance_code = CommandRestoreDistanceCode(last_command,
+                                                      &s->params.dist);
+  if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
+      distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
+    if (cmd_dist <= max_distance) {
+      while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
+             data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
+        last_command->copy_len_++;
+        (*bytes)--;
+        (*wrapped_last_processed_pos)++;
+      }
+    } else {
+    }
+    /* The copy length is at most the metablock size, and thus expressible. */
+    GetLengthCode(last_command->insert_len_,
+                  (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
+                           (int)(last_command->copy_len_ >> 25)),
+                  TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
+                  &last_command->cmd_prefix_);
+  }
+}
+
+/*
+   Processes the accumulated input data and sets |*out_size| to the length of
+   the new output meta-block, or to zero if no new output meta-block has been
+   created (in this case the processed input data is buffered internally).
+   If |*out_size| is positive, |*output| points to the start of the output
+   data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
+   always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
+   to 7 bits of the last byte of output. To force encoder to dump the remaining
+   bits use WriteMetadata() to append an empty meta-data block.
+   Returns BROTLI_FALSE if the size of the input data is larger than
+   input_block_size().
+ */
+static BROTLI_BOOL EncodeData(
+    BrotliEncoderState* s, const BROTLI_BOOL is_last,
+    const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
+  const uint64_t delta = UnprocessedInputSize(s);
+  uint32_t bytes = (uint32_t)delta;
+  uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
+  uint8_t* data;
+  uint32_t mask;
+  MemoryManager* m = &s->memory_manager_;
+  ContextType literal_context_mode;
+  ContextLut literal_context_lut;
+
+  data = s->ringbuffer_.buffer_;
+  mask = s->ringbuffer_.mask_;
+
+  /* Adding more blocks after "last" block is forbidden. */
+  if (s->is_last_block_emitted_) return BROTLI_FALSE;
+  if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
+
+  if (delta > InputBlockSize(s)) {
+    return BROTLI_FALSE;
+  }
+  if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
+      !s->command_buf_) {
+    s->command_buf_ =
+        BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
+    s->literal_buf_ =
+        BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
+        BROTLI_IS_NULL(s->literal_buf_)) {
+      return BROTLI_FALSE;
+    }
+  }
+
+  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    uint8_t* storage;
+    size_t storage_ix = s->last_bytes_bits_;
+    size_t table_size;
+    int* table;
+
+    if (delta == 0 && !is_last) {
+      /* We have no new input data and we don't have to finish the stream, so
+         nothing to do. */
+      *out_size = 0;
+      return BROTLI_TRUE;
+    }
+    storage = GetBrotliStorage(s, 2 * bytes + 503);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    storage[0] = (uint8_t)s->last_bytes_;
+    storage[1] = (uint8_t)(s->last_bytes_ >> 8);
+    table = GetHashTable(s, s->params.quality, bytes, &table_size);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+      BrotliCompressFragmentFast(
+          m, &data[wrapped_last_processed_pos & mask],
+          bytes, is_last,
+          table, table_size,
+          s->cmd_depths_, s->cmd_bits_,
+          &s->cmd_code_numbits_, s->cmd_code_,
+          &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    } else {
+      BrotliCompressFragmentTwoPass(
+          m, &data[wrapped_last_processed_pos & mask],
+          bytes, is_last,
+          s->command_buf_, s->literal_buf_,
+          table, table_size,
+          &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    }
+    s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+    s->last_bytes_bits_ = storage_ix & 7u;
+    UpdateLastProcessedPos(s);
+    *output = &storage[0];
+    *out_size = storage_ix >> 3;
+    return BROTLI_TRUE;
+  }
+
+  {
+    /* Theoretical max number of commands is 1 per 2 bytes. */
+    size_t newsize = s->num_commands_ + bytes / 2 + 1;
+    if (newsize > s->cmd_alloc_size_) {
+      Command* new_commands;
+      /* Reserve a bit more memory to allow merging with a next block
+         without reallocation: that would impact speed. */
+      newsize += (bytes / 4) + 16;
+      s->cmd_alloc_size_ = newsize;
+      new_commands = BROTLI_ALLOC(m, Command, newsize);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE;
+      if (s->commands_) {
+        memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
+        BROTLI_FREE(m, s->commands_);
+      }
+      s->commands_ = new_commands;
+    }
+  }
+
+  InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
+      wrapped_last_processed_pos, bytes, is_last);
+
+  literal_context_mode = ChooseContextMode(
+      &s->params, data, WrapPosition(s->last_flush_pos_),
+      mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
+  literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
+
+  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+
+  if (s->num_commands_ && s->last_insert_len_ == 0) {
+    ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
+  }
+
+  if (s->params.quality == ZOPFLIFICATION_QUALITY) {
+    BROTLI_DCHECK(s->params.hasher.type == 10);
+    BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
+        data, mask, literal_context_lut, &s->params,
+        &s->hasher_, s->dist_cache_,
+        &s->last_insert_len_, &s->commands_[s->num_commands_],
+        &s->num_commands_, &s->num_literals_);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
+    BROTLI_DCHECK(s->params.hasher.type == 10);
+    BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
+        data, mask, literal_context_lut, &s->params,
+        &s->hasher_, s->dist_cache_,
+        &s->last_insert_len_, &s->commands_[s->num_commands_],
+        &s->num_commands_, &s->num_literals_);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  } else {
+    BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
+        data, mask, literal_context_lut, &s->params,
+        &s->hasher_, s->dist_cache_,
+        &s->last_insert_len_, &s->commands_[s->num_commands_],
+        &s->num_commands_, &s->num_literals_);
+  }
+
+  {
+    const size_t max_length = MaxMetablockSize(&s->params);
+    const size_t max_literals = max_length / 8;
+    const size_t max_commands = max_length / 8;
+    const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
+    /* If maximal possible additional block doesn't fit metablock, flush now. */
+    /* TODO: Postpone decision until next block arrives? */
+    const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
+        processed_bytes + InputBlockSize(s) <= max_length);
+    /* If block splitting is not used, then flush as soon as there is some
+       amount of commands / literals produced. */
+    const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
+        s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
+        s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
+    if (!is_last && !force_flush && !should_flush &&
+        next_input_fits_metablock &&
+        s->num_literals_ < max_literals &&
+        s->num_commands_ < max_commands) {
+      /* Merge with next input block. Everything will happen later. */
+      if (UpdateLastProcessedPos(s)) {
+        HasherReset(&s->hasher_);
+      }
+      *out_size = 0;
+      return BROTLI_TRUE;
+    }
+  }
+
+  /* Create the last insert-only command. */
+  if (s->last_insert_len_ > 0) {
+    InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
+    s->num_literals_ += s->last_insert_len_;
+    s->last_insert_len_ = 0;
+  }
+
+  if (!is_last && s->input_pos_ == s->last_flush_pos_) {
+    /* We have no new input data and we don't have to finish the stream, so
+       nothing to do. */
+    *out_size = 0;
+    return BROTLI_TRUE;
+  }
+  BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
+  BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
+  BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
+  {
+    const uint32_t metablock_size =
+        (uint32_t)(s->input_pos_ - s->last_flush_pos_);
+    uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
+    size_t storage_ix = s->last_bytes_bits_;
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    storage[0] = (uint8_t)s->last_bytes_;
+    storage[1] = (uint8_t)(s->last_bytes_ >> 8);
+    WriteMetaBlockInternal(
+        m, data, mask, s->last_flush_pos_, metablock_size, is_last,
+        literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
+        s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
+        s->dist_cache_, &storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+    s->last_bytes_bits_ = storage_ix & 7u;
+    s->last_flush_pos_ = s->input_pos_;
+    if (UpdateLastProcessedPos(s)) {
+      HasherReset(&s->hasher_);
+    }
+    if (s->last_flush_pos_ > 0) {
+      s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
+    }
+    if (s->last_flush_pos_ > 1) {
+      s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
+    }
+    s->num_commands_ = 0;
+    s->num_literals_ = 0;
+    /* Save the state of the distance cache in case we need to restore it for
+       emitting an uncompressed block. */
+    memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
+    *output = &storage[0];
+    *out_size = storage_ix >> 3;
+    return BROTLI_TRUE;
+  }
+}
+
+/* Dumps remaining output bits and metadata header to |header|.
+   Returns number of produced bytes.
+   REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
+   REQUIRED: |block_size| <= (1 << 24). */
+static size_t WriteMetadataHeader(
+    BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
+  size_t storage_ix;
+  storage_ix = s->last_bytes_bits_;
+  header[0] = (uint8_t)s->last_bytes_;
+  header[1] = (uint8_t)(s->last_bytes_ >> 8);
+  s->last_bytes_ = 0;
+  s->last_bytes_bits_ = 0;
+
+  BrotliWriteBits(1, 0, &storage_ix, header);
+  BrotliWriteBits(2, 3, &storage_ix, header);
+  BrotliWriteBits(1, 0, &storage_ix, header);
+  if (block_size == 0) {
+    BrotliWriteBits(2, 0, &storage_ix, header);
+  } else {
+    uint32_t nbits = (block_size == 1) ? 0 :
+        (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
+    uint32_t nbytes = (nbits + 7) / 8;
+    BrotliWriteBits(2, nbytes, &storage_ix, header);
+    BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
+  }
+  return (storage_ix + 7u) >> 3;
+}
+
+static BROTLI_BOOL BrotliCompressBufferQuality10(
+    int lgwin, size_t input_size, const uint8_t* input_buffer,
+    size_t* encoded_size, uint8_t* encoded_buffer) {
+  MemoryManager memory_manager;
+  MemoryManager* m = &memory_manager;
+
+  const size_t mask = BROTLI_SIZE_MAX >> 1;
+  int dist_cache[4] = { 4, 11, 15, 16 };
+  int saved_dist_cache[4] = { 4, 11, 15, 16 };
+  BROTLI_BOOL ok = BROTLI_TRUE;
+  const size_t max_out_size = *encoded_size;
+  size_t total_out_size = 0;
+  uint16_t last_bytes;
+  uint8_t last_bytes_bits;
+
+  const size_t hasher_eff_size = BROTLI_MIN(size_t,
+      input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
+
+  BrotliEncoderParams params;
+
+  const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
+  size_t max_block_size;
+  const size_t max_metablock_size = (size_t)1 << lgmetablock;
+  const size_t max_literals_per_metablock = max_metablock_size / 8;
+  const size_t max_commands_per_metablock = max_metablock_size / 8;
+  size_t metablock_start = 0;
+  uint8_t prev_byte = 0;
+  uint8_t prev_byte2 = 0;
+
+  Hasher hasher;
+  HasherInit(&hasher);
+
+  BrotliEncoderInitParams(&params);
+  params.quality = 10;
+  params.lgwin = lgwin;
+  if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+    params.large_window = BROTLI_TRUE;
+  }
+  SanitizeParams(&params);
+  params.lgblock = ComputeLgBlock(&params);
+  ChooseDistanceParams(&params);
+  max_block_size = (size_t)1 << params.lgblock;
+
+  BrotliInitMemoryManager(m, 0, 0, 0);
+
+  BROTLI_DCHECK(input_size <= mask + 1);
+  EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits);
+  InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, &params,
+      0, hasher_eff_size, BROTLI_TRUE);
+  if (BROTLI_IS_OOM(m)) goto oom;
+
+  while (ok && metablock_start < input_size) {
+    const size_t metablock_end =
+        BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
+    const size_t expected_num_commands =
+        (metablock_end - metablock_start) / 12 + 16;
+    Command* commands = 0;
+    size_t num_commands = 0;
+    size_t last_insert_len = 0;
+    size_t num_literals = 0;
+    size_t metablock_size = 0;
+    size_t cmd_alloc_size = 0;
+    BROTLI_BOOL is_last;
+    uint8_t* storage;
+    size_t storage_ix;
+
+    ContextType literal_context_mode = ChooseContextMode(&params,
+        input_buffer, metablock_start, mask, metablock_end - metablock_start);
+    ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
+
+    size_t block_start;
+    for (block_start = metablock_start; block_start < metablock_end; ) {
+      size_t block_size =
+          BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);
+      ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);
+      size_t path_size;
+      size_t new_cmd_alloc_size;
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) goto oom;
+      BrotliInitZopfliNodes(nodes, block_size + 1);
+      StitchToPreviousBlockH10(&hasher.privat._H10, block_size, block_start,
+                               input_buffer, mask);
+      path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
+          input_buffer, mask, literal_context_lut, &params, dist_cache, &hasher,
+          nodes);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      /* We allocate a command buffer in the first iteration of this loop that
+         will be likely big enough for the whole metablock, so that for most
+         inputs we will not have to reallocate in later iterations. We do the
+         allocation here and not before the loop, because if the input is small,
+         this will be allocated after the Zopfli cost model is freed, so this
+         will not increase peak memory usage.
+         TODO: If the first allocation is too small, increase command
+         buffer size exponentially. */
+      new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,
+                                      num_commands + path_size + 1);
+      if (cmd_alloc_size != new_cmd_alloc_size) {
+        Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);
+        if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) goto oom;
+        cmd_alloc_size = new_cmd_alloc_size;
+        if (commands) {
+          memcpy(new_commands, commands, sizeof(Command) * num_commands);
+          BROTLI_FREE(m, commands);
+        }
+        commands = new_commands;
+      }
+      BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache,
+          &last_insert_len, &params, &commands[num_commands], &num_literals);
+      num_commands += path_size;
+      block_start += block_size;
+      metablock_size += block_size;
+      BROTLI_FREE(m, nodes);
+      if (num_literals > max_literals_per_metablock ||
+          num_commands > max_commands_per_metablock) {
+        break;
+      }
+    }
+
+    if (last_insert_len > 0) {
+      InitInsertCommand(&commands[num_commands++], last_insert_len);
+      num_literals += last_insert_len;
+    }
+
+    is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
+    storage = NULL;
+    storage_ix = last_bytes_bits;
+
+    if (metablock_size == 0) {
+      /* Write the ISLAST and ISEMPTY bits. */
+      storage = BROTLI_ALLOC(m, uint8_t, 16);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
+      storage[0] = (uint8_t)last_bytes;
+      storage[1] = (uint8_t)(last_bytes >> 8);
+      BrotliWriteBits(2, 3, &storage_ix, storage);
+      storage_ix = (storage_ix + 7u) & ~7u;
+    } else if (!ShouldCompress(input_buffer, mask, metablock_start,
+                               metablock_size, num_literals, num_commands)) {
+      /* Restore the distance cache, as its last update by
+         CreateBackwardReferences is now unused. */
+      memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+      storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
+      storage[0] = (uint8_t)last_bytes;
+      storage[1] = (uint8_t)(last_bytes >> 8);
+      BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
+                                       metablock_start, mask, metablock_size,
+                                       &storage_ix, storage);
+    } else {
+      MetaBlockSplit mb;
+      BrotliEncoderParams block_params = params;
+      InitMetaBlockSplit(&mb);
+      BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,
+                           &block_params,
+                           prev_byte, prev_byte2,
+                           commands, num_commands,
+                           literal_context_mode,
+                           &mb);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      {
+        /* The number of distance symbols effectively used for distance
+           histograms. It might be less than distance alphabet size
+           for "Large Window Brotli" (32-bit). */
+        BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
+      }
+      storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
+      storage[0] = (uint8_t)last_bytes;
+      storage[1] = (uint8_t)(last_bytes >> 8);
+      BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
+                           mask, prev_byte, prev_byte2,
+                           is_last,
+                           &block_params,
+                           literal_context_mode,
+                           commands, num_commands,
+                           &mb,
+                           &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      if (metablock_size + 4 < (storage_ix >> 3)) {
+        /* Restore the distance cache and last byte. */
+        memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+        storage[0] = (uint8_t)last_bytes;
+        storage[1] = (uint8_t)(last_bytes >> 8);
+        storage_ix = last_bytes_bits;
+        BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
+                                         metablock_start, mask,
+                                         metablock_size, &storage_ix, storage);
+      }
+      DestroyMetaBlockSplit(m, &mb);
+    }
+    last_bytes = (uint16_t)(storage[storage_ix >> 3]);
+    last_bytes_bits = storage_ix & 7u;
+    metablock_start += metablock_size;
+    if (metablock_start < input_size) {
+      prev_byte = input_buffer[metablock_start - 1];
+      prev_byte2 = input_buffer[metablock_start - 2];
+    }
+    /* Save the state of the distance cache in case we need to restore it for
+       emitting an uncompressed block. */
+    memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
+
+    {
+      const size_t out_size = storage_ix >> 3;
+      total_out_size += out_size;
+      if (total_out_size <= max_out_size) {
+        memcpy(encoded_buffer, storage, out_size);
+        encoded_buffer += out_size;
+      } else {
+        ok = BROTLI_FALSE;
+      }
+    }
+    BROTLI_FREE(m, storage);
+    BROTLI_FREE(m, commands);
+  }
+
+  *encoded_size = total_out_size;
+  DestroyHasher(m, &hasher);
+  return ok;
+
+oom:
+  BrotliWipeOutMemoryManager(m);
+  return BROTLI_FALSE;
+}
+
+size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
+  /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
+  size_t num_large_blocks = input_size >> 14;
+  size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
+  size_t result = input_size + overhead;
+  if (input_size == 0) return 2;
+  return (result < input_size) ? 0 : result;
+}
+
+/* Wraps data to uncompressed brotli stream with minimal window size.
+   |output| should point at region with at least BrotliEncoderMaxCompressedSize
+   addressable bytes.
+   Returns the length of stream. */
+static size_t MakeUncompressedStream(
+    const uint8_t* input, size_t input_size, uint8_t* output) {
+  size_t size = input_size;
+  size_t result = 0;
+  size_t offset = 0;
+  if (input_size == 0) {
+    output[0] = 6;
+    return 1;
+  }
+  output[result++] = 0x21;  /* window bits = 10, is_last = false */
+  output[result++] = 0x03;  /* empty metadata, padding */
+  while (size > 0) {
+    uint32_t nibbles = 0;
+    uint32_t chunk_size;
+    uint32_t bits;
+    chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
+    if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
+    bits =
+        (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
+    output[result++] = (uint8_t)bits;
+    output[result++] = (uint8_t)(bits >> 8);
+    output[result++] = (uint8_t)(bits >> 16);
+    if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
+    memcpy(&output[result], &input[offset], chunk_size);
+    result += chunk_size;
+    offset += chunk_size;
+    size -= chunk_size;
+  }
+  output[result++] = 3;
+  return result;
+}
+
+BROTLI_BOOL BrotliEncoderCompress(
+    int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
+    const uint8_t* input_buffer, size_t* encoded_size,
+    uint8_t* encoded_buffer) {
+  BrotliEncoderState* s;
+  size_t out_size = *encoded_size;
+  const uint8_t* input_start = input_buffer;
+  uint8_t* output_start = encoded_buffer;
+  size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
+  if (out_size == 0) {
+    /* Output buffer needs at least one byte. */
+    return BROTLI_FALSE;
+  }
+  if (input_size == 0) {
+    /* Handle the special case of empty input. */
+    *encoded_size = 1;
+    *encoded_buffer = 6;
+    return BROTLI_TRUE;
+  }
+  if (quality == 10) {
+    /* TODO: Implement this direct path for all quality levels. */
+    const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
+                                       BROTLI_MAX(int, 16, lgwin));
+    int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
+                                           encoded_size, encoded_buffer);
+    if (!ok || (max_out_size && *encoded_size > max_out_size)) {
+      goto fallback;
+    }
+    return BROTLI_TRUE;
+  }
+
+  s = BrotliEncoderCreateInstance(0, 0, 0);
+  if (!s) {
+    return BROTLI_FALSE;
+  } else {
+    size_t available_in = input_size;
+    const uint8_t* next_in = input_buffer;
+    size_t available_out = *encoded_size;
+    uint8_t* next_out = encoded_buffer;
+    size_t total_out = 0;
+    BROTLI_BOOL result = BROTLI_FALSE;
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
+    if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+      BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
+    }
+    result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
+        &available_in, &next_in, &available_out, &next_out, &total_out);
+    if (!BrotliEncoderIsFinished(s)) result = 0;
+    *encoded_size = total_out;
+    BrotliEncoderDestroyInstance(s);
+    if (!result || (max_out_size && *encoded_size > max_out_size)) {
+      goto fallback;
+    }
+    return BROTLI_TRUE;
+  }
+fallback:
+  *encoded_size = 0;
+  if (!max_out_size) return BROTLI_FALSE;
+  if (out_size >= max_out_size) {
+    *encoded_size =
+        MakeUncompressedStream(input_start, input_size, output_start);
+    return BROTLI_TRUE;
+  }
+  return BROTLI_FALSE;
+}
+
+static void InjectBytePaddingBlock(BrotliEncoderState* s) {
+  uint32_t seal = s->last_bytes_;
+  size_t seal_bits = s->last_bytes_bits_;
+  uint8_t* destination;
+  s->last_bytes_ = 0;
+  s->last_bytes_bits_ = 0;
+  /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
+  seal |= 0x6u << seal_bits;
+  seal_bits += 6;
+  /* If we have already created storage, then append to it.
+     Storage is valid until next block is being compressed. */
+  if (s->next_out_) {
+    destination = s->next_out_ + s->available_out_;
+  } else {
+    destination = s->tiny_buf_.u8;
+    s->next_out_ = destination;
+  }
+  destination[0] = (uint8_t)seal;
+  if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
+  if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
+  s->available_out_ += (seal_bits + 7) >> 3;
+}
+
+/* Injects padding bits or pushes compressed data to output.
+   Returns false if nothing is done. */
+static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
+    size_t* available_out, uint8_t** next_out, size_t* total_out) {
+  if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
+      s->last_bytes_bits_ != 0) {
+    InjectBytePaddingBlock(s);
+    return BROTLI_TRUE;
+  }
+
+  if (s->available_out_ != 0 && *available_out != 0) {
+    size_t copy_output_size =
+        BROTLI_MIN(size_t, s->available_out_, *available_out);
+    memcpy(*next_out, s->next_out_, copy_output_size);
+    *next_out += copy_output_size;
+    *available_out -= copy_output_size;
+    s->next_out_ += copy_output_size;
+    s->available_out_ -= copy_output_size;
+    s->total_out_ += copy_output_size;
+    if (total_out) *total_out = s->total_out_;
+    return BROTLI_TRUE;
+  }
+
+  return BROTLI_FALSE;
+}
+
+static void CheckFlushComplete(BrotliEncoderState* s) {
+  if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
+      s->available_out_ == 0) {
+    s->stream_state_ = BROTLI_STREAM_PROCESSING;
+    s->next_out_ = 0;
+  }
+}
+
+static BROTLI_BOOL BrotliEncoderCompressStreamFast(
+    BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
+    size_t* total_out) {
+  const size_t block_size_limit = (size_t)1 << s->params.lgwin;
+  const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
+      BROTLI_MIN(size_t, *available_in, block_size_limit));
+  uint32_t* tmp_command_buf = NULL;
+  uint32_t* command_buf = NULL;
+  uint8_t* tmp_literal_buf = NULL;
+  uint8_t* literal_buf = NULL;
+  MemoryManager* m = &s->memory_manager_;
+  if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
+      s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    return BROTLI_FALSE;
+  }
+  if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
+      s->command_buf_ =
+          BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
+      s->literal_buf_ =
+          BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
+          BROTLI_IS_NULL(s->literal_buf_)) {
+        return BROTLI_FALSE;
+      }
+    }
+    if (s->command_buf_) {
+      command_buf = s->command_buf_;
+      literal_buf = s->literal_buf_;
+    } else {
+      tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
+      tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
+      if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) ||
+          BROTLI_IS_NULL(tmp_literal_buf)) {
+        return BROTLI_FALSE;
+      }
+      command_buf = tmp_command_buf;
+      literal_buf = tmp_literal_buf;
+    }
+  }
+
+  while (BROTLI_TRUE) {
+    if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
+      continue;
+    }
+
+    /* Compress block only when internal output buffer is empty, stream is not
+       finished, there is no pending flush request, and there is either
+       additional input or pending operation. */
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_PROCESSING &&
+        (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
+      size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
+      BROTLI_BOOL is_last =
+          (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
+      BROTLI_BOOL force_flush =
+          (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
+      size_t max_out_size = 2 * block_size + 503;
+      BROTLI_BOOL inplace = BROTLI_TRUE;
+      uint8_t* storage = NULL;
+      size_t storage_ix = s->last_bytes_bits_;
+      size_t table_size;
+      int* table;
+
+      if (force_flush && block_size == 0) {
+        s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+        continue;
+      }
+      if (max_out_size <= *available_out) {
+        storage = *next_out;
+      } else {
+        inplace = BROTLI_FALSE;
+        storage = GetBrotliStorage(s, max_out_size);
+        if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      }
+      storage[0] = (uint8_t)s->last_bytes_;
+      storage[1] = (uint8_t)(s->last_bytes_ >> 8);
+      table = GetHashTable(s, s->params.quality, block_size, &table_size);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+
+      if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+        BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table,
+            table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_,
+            s->cmd_code_, &storage_ix, storage);
+        if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      } else {
+        BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last,
+            command_buf, literal_buf, table, table_size,
+            &storage_ix, storage);
+        if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      }
+      *next_in += block_size;
+      *available_in -= block_size;
+      if (inplace) {
+        size_t out_bytes = storage_ix >> 3;
+        BROTLI_DCHECK(out_bytes <= *available_out);
+        BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
+        *next_out += out_bytes;
+        *available_out -= out_bytes;
+        s->total_out_ += out_bytes;
+        if (total_out) *total_out = s->total_out_;
+      } else {
+        size_t out_bytes = storage_ix >> 3;
+        s->next_out_ = storage;
+        s->available_out_ = out_bytes;
+      }
+      s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+      s->last_bytes_bits_ = storage_ix & 7u;
+
+      if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+      if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
+      continue;
+    }
+    break;
+  }
+  BROTLI_FREE(m, tmp_command_buf);
+  BROTLI_FREE(m, tmp_literal_buf);
+  CheckFlushComplete(s);
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProcessMetadata(
+    BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
+    size_t* available_out, uint8_t** next_out, size_t* total_out) {
+  if (*available_in > (1u << 24)) return BROTLI_FALSE;
+  /* Switch to metadata block workflow, if required. */
+  if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
+    s->remaining_metadata_bytes_ = (uint32_t)*available_in;
+    s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
+  }
+  if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
+      s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
+    return BROTLI_FALSE;
+  }
+
+  while (BROTLI_TRUE) {
+    if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
+      continue;
+    }
+    if (s->available_out_ != 0) break;
+
+    if (s->input_pos_ != s->last_flush_pos_) {
+      BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
+          &s->available_out_, &s->next_out_);
+      if (!result) return BROTLI_FALSE;
+      continue;
+    }
+
+    if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
+      s->next_out_ = s->tiny_buf_.u8;
+      s->available_out_ =
+          WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
+      s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
+      continue;
+    } else {
+      /* Exit workflow only when there is no more input and no more output.
+         Otherwise client may continue producing empty metadata blocks. */
+      if (s->remaining_metadata_bytes_ == 0) {
+        s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
+        s->stream_state_ = BROTLI_STREAM_PROCESSING;
+        break;
+      }
+      if (*available_out) {
+        /* Directly copy input to output. */
+        uint32_t copy = (uint32_t)BROTLI_MIN(
+            size_t, s->remaining_metadata_bytes_, *available_out);
+        memcpy(*next_out, *next_in, copy);
+        *next_in += copy;
+        *available_in -= copy;
+        s->remaining_metadata_bytes_ -= copy;
+        *next_out += copy;
+        *available_out -= copy;
+      } else {
+        /* This guarantees progress in "TakeOutput" workflow. */
+        uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
+        s->next_out_ = s->tiny_buf_.u8;
+        memcpy(s->next_out_, *next_in, copy);
+        *next_in += copy;
+        *available_in -= copy;
+        s->remaining_metadata_bytes_ -= copy;
+        s->available_out_ = copy;
+      }
+      continue;
+    }
+  }
+
+  return BROTLI_TRUE;
+}
+
+static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
+  if (s->params.size_hint == 0) {
+    uint64_t delta = UnprocessedInputSize(s);
+    uint64_t tail = available_in;
+    uint32_t limit = 1u << 30;
+    uint32_t total;
+    if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
+      total = limit;
+    } else {
+      total = (uint32_t)(delta + tail);
+    }
+    s->params.size_hint = total;
+  }
+}
+
+BROTLI_BOOL BrotliEncoderCompressStream(
+    BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out,uint8_t** next_out,
+    size_t* total_out) {
+  if (!EnsureInitialized(s)) return BROTLI_FALSE;
+
+  /* Unfinished metadata block; check requirements. */
+  if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
+    if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
+    if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
+  }
+
+  if (op == BROTLI_OPERATION_EMIT_METADATA) {
+    UpdateSizeHint(s, 0);  /* First data metablock might be emitted here. */
+    return ProcessMetadata(
+        s, available_in, next_in, available_out, next_out, total_out);
+  }
+
+  if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
+      s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
+    return BROTLI_FALSE;
+  }
+
+  if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
+    return BROTLI_FALSE;
+  }
+  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
+        available_out, next_out, total_out);
+  }
+  while (BROTLI_TRUE) {
+    size_t remaining_block_size = RemainingInputBlockSize(s);
+    /* Shorten input to flint size. */
+    if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
+      remaining_block_size = (size_t)s->flint_;
+    }
+
+    if (remaining_block_size != 0 && *available_in != 0) {
+      size_t copy_input_size =
+          BROTLI_MIN(size_t, remaining_block_size, *available_in);
+      CopyInputToRingBuffer(s, copy_input_size, *next_in);
+      *next_in += copy_input_size;
+      *available_in -= copy_input_size;
+      if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
+      continue;
+    }
+
+    if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
+      /* Exit the "emit flint" workflow. */
+      if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
+        CheckFlushComplete(s);
+        if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
+          s->flint_ = BROTLI_FLINT_DONE;
+        }
+      }
+      continue;
+    }
+
+    /* Compress data only when internal output buffer is empty, stream is not
+       finished and there is no pending flush request. */
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_PROCESSING) {
+      if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
+        BROTLI_BOOL is_last = TO_BROTLI_BOOL(
+            (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
+        BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
+            (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
+        BROTLI_BOOL result;
+        /* Force emitting (uncompressed) piece containing flint. */
+        if (!is_last && s->flint_ == 0) {
+          s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
+          force_flush = BROTLI_TRUE;
+        }
+        UpdateSizeHint(s, *available_in);
+        result = EncodeData(s, is_last, force_flush,
+            &s->available_out_, &s->next_out_);
+        if (!result) return BROTLI_FALSE;
+        if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+        if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
+        continue;
+      }
+    }
+    break;
+  }
+  CheckFlushComplete(s);
+  return BROTLI_TRUE;
+}
+
+BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
+  return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
+      !BrotliEncoderHasMoreOutput(s));
+}
+
+BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
+  return TO_BROTLI_BOOL(s->available_out_ != 0);
+}
+
+const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
+  size_t consumed_size = s->available_out_;
+  uint8_t* result = s->next_out_;
+  if (*size) {
+    consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
+  }
+  if (consumed_size) {
+    s->next_out_ += consumed_size;
+    s->available_out_ -= consumed_size;
+    s->total_out_ += consumed_size;
+    CheckFlushComplete(s);
+    *size = consumed_size;
+  } else {
+    *size = 0;
+    result = 0;
+  }
+  return result;
+}
+
+uint32_t BrotliEncoderVersion(void) {
+  return BROTLI_VERSION;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c
new file mode 100755
index 0000000000..c9e963b89d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.c
@@ -0,0 +1,33 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./encoder_dict.h"
+
+#include "../common/dictionary.h"
+#include "../common/transform.h"
+#include "./dictionary_hash.h"
+#include "./hash.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
+  dict->words = BrotliGetDictionary();
+  dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
+
+  dict->hash_table_words = kStaticDictionaryHashWords;
+  dict->hash_table_lengths = kStaticDictionaryHashLengths;
+  dict->buckets = kStaticDictionaryBuckets;
+  dict->dict_words = kStaticDictionaryWords;
+
+  dict->cutoffTransformsCount = kCutoffTransformsCount;
+  dict->cutoffTransforms = kCutoffTransforms;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.h
new file mode 100755
index 0000000000..a1c329fbf4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/encoder_dict.h
@@ -0,0 +1,43 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#ifndef BROTLI_ENC_ENCODER_DICT_H_
+#define BROTLI_ENC_ENCODER_DICT_H_
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./static_dict_lut.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Dictionary data (words and transforms) for 1 possible context */
+typedef struct BrotliEncoderDictionary {
+  const BrotliDictionary* words;
+  uint32_t num_transforms;
+
+  /* cut off for fast encoder */
+  uint32_t cutoffTransformsCount;
+  uint64_t cutoffTransforms;
+
+  /* from dictionary_hash.h, for fast encoder */
+  const uint16_t* hash_table_words;
+  const uint8_t* hash_table_lengths;
+
+  /* from static_dict_lut.h, for slow encoder */
+  const uint16_t* buckets;
+  const DictWord* dict_words;
+} BrotliEncoderDictionary;
+
+BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENCODER_DICT_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c
new file mode 100644
index 0000000000..97f9dfb82a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.c
@@ -0,0 +1,501 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Entropy encoding (Huffman) utilities. */
+
+#include "./entropy_encode.h"
+
+#include <string.h>  /* memset */
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_BOOL BrotliSetDepth(
+    int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
+  int stack[16];
+  int level = 0;
+  int p = p0;
+  BROTLI_DCHECK(max_depth <= 15);
+  stack[0] = -1;
+  while (BROTLI_TRUE) {
+    if (pool[p].index_left_ >= 0) {
+      level++;
+      if (level > max_depth) return BROTLI_FALSE;
+      stack[level] = pool[p].index_right_or_value_;
+      p = pool[p].index_left_;
+      continue;
+    } else {
+      depth[pool[p].index_right_or_value_] = (uint8_t)level;
+    }
+    while (level >= 0 && stack[level] == -1) level--;
+    if (level < 0) return BROTLI_TRUE;
+    p = stack[level];
+    stack[level] = -1;
+  }
+}
+
+/* Sort the root nodes, least popular first. */
+static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
+    const HuffmanTree* v0, const HuffmanTree* v1) {
+  if (v0->total_count_ != v1->total_count_) {
+    return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
+  }
+  return TO_BROTLI_BOOL(v0->index_right_or_value_ > v1->index_right_or_value_);
+}
+
+/* This function will create a Huffman tree.
+
+   The catch here is that the tree cannot be arbitrarily deep.
+   Brotli specifies a maximum depth of 15 bits for "code trees"
+   and 7 bits for "code length code trees."
+
+   count_limit is the value that is to be faked as the minimum value
+   and this minimum value is raised until the tree matches the
+   maximum length requirement.
+
+   This algorithm is not of excellent performance for very long data blocks,
+   especially when population counts are longer than 2**tree_limit, but
+   we are not planning to use this with extremely long blocks.
+
+   See http://en.wikipedia.org/wiki/Huffman_coding */
+void BrotliCreateHuffmanTree(const uint32_t* data,
+                             const size_t length,
+                             const int tree_limit,
+                             HuffmanTree* tree,
+                             uint8_t* depth) {
+  uint32_t count_limit;
+  HuffmanTree sentinel;
+  InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
+  /* For block sizes below 64 kB, we never need to do a second iteration
+     of this loop. Probably all of our block sizes will be smaller than
+     that, so this loop is mostly of academic interest. If we actually
+     would need this, we would be better off with the Katajainen algorithm. */
+  for (count_limit = 1; ; count_limit *= 2) {
+    size_t n = 0;
+    size_t i;
+    size_t j;
+    size_t k;
+    for (i = length; i != 0;) {
+      --i;
+      if (data[i]) {
+        const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
+        InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
+      }
+    }
+
+    if (n == 1) {
+      depth[tree[0].index_right_or_value_] = 1;  /* Only one element. */
+      break;
+    }
+
+    SortHuffmanTreeItems(tree, n, SortHuffmanTree);
+
+    /* The nodes are:
+       [0, n): the sorted leaf nodes that we start with.
+       [n]: we add a sentinel here.
+       [n + 1, 2n): new parent nodes are added here, starting from
+                    (n+1). These are naturally in ascending order.
+       [2n]: we add a sentinel at the end as well.
+       There will be (2n+1) elements at the end. */
+    tree[n] = sentinel;
+    tree[n + 1] = sentinel;
+
+    i = 0;      /* Points to the next leaf node. */
+    j = n + 1;  /* Points to the next non-leaf node. */
+    for (k = n - 1; k != 0; --k) {
+      size_t left, right;
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      {
+        /* The sentinel node becomes the parent node. */
+        size_t j_end = 2 * n - k;
+        tree[j_end].total_count_ =
+            tree[left].total_count_ + tree[right].total_count_;
+        tree[j_end].index_left_ = (int16_t)left;
+        tree[j_end].index_right_or_value_ = (int16_t)right;
+
+        /* Add back the last sentinel node. */
+        tree[j_end + 1] = sentinel;
+      }
+    }
+    if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
+      /* We need to pack the Huffman tree in tree_limit bits. If this was not
+         successful, add fake entities to the lowest values and retry. */
+      break;
+    }
+  }
+}
+
+static void Reverse(uint8_t* v, size_t start, size_t end) {
+  --end;
+  while (start < end) {
+    uint8_t tmp = v[start];
+    v[start] = v[end];
+    v[end] = tmp;
+    ++start;
+    --end;
+  }
+}
+
+static void BrotliWriteHuffmanTreeRepetitions(
+    const uint8_t previous_value,
+    const uint8_t value,
+    size_t repetitions,
+    size_t* tree_size,
+    uint8_t* tree,
+    uint8_t* extra_bits_data) {
+  BROTLI_DCHECK(repetitions > 0);
+  if (previous_value != value) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions == 7) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = value;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    size_t start = *tree_size;
+    repetitions -= 3;
+    while (BROTLI_TRUE) {
+      tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
+      extra_bits_data[*tree_size] = repetitions & 0x3;
+      ++(*tree_size);
+      repetitions >>= 2;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+static void BrotliWriteHuffmanTreeRepetitionsZeros(
+    size_t repetitions,
+    size_t* tree_size,
+    uint8_t* tree,
+    uint8_t* extra_bits_data) {
+  if (repetitions == 11) {
+    tree[*tree_size] = 0;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = 0;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    size_t start = *tree_size;
+    repetitions -= 3;
+    while (BROTLI_TRUE) {
+      tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
+      extra_bits_data[*tree_size] = repetitions & 0x7;
+      ++(*tree_size);
+      repetitions >>= 3;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
+                                       uint8_t* good_for_rle) {
+  size_t nonzero_count = 0;
+  size_t stride;
+  size_t limit;
+  size_t sum;
+  const size_t streak_limit = 1240;
+  /* Let's make the Huffman code more compatible with RLE encoding. */
+  size_t i;
+  for (i = 0; i < length; i++) {
+    if (counts[i]) {
+      ++nonzero_count;
+    }
+  }
+  if (nonzero_count < 16) {
+    return;
+  }
+  while (length != 0 && counts[length - 1] == 0) {
+    --length;
+  }
+  if (length == 0) {
+    return;  /* All zeros. */
+  }
+  /* Now counts[0..length - 1] does not have trailing zeros. */
+  {
+    size_t nonzeros = 0;
+    uint32_t smallest_nonzero = 1 << 30;
+    for (i = 0; i < length; ++i) {
+      if (counts[i] != 0) {
+        ++nonzeros;
+        if (smallest_nonzero > counts[i]) {
+          smallest_nonzero = counts[i];
+        }
+      }
+    }
+    if (nonzeros < 5) {
+      /* Small histogram will model it well. */
+      return;
+    }
+    if (smallest_nonzero < 4) {
+      size_t zeros = length - nonzeros;
+      if (zeros < 6) {
+        for (i = 1; i < length - 1; ++i) {
+          if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
+            counts[i] = 1;
+          }
+        }
+      }
+    }
+    if (nonzeros < 28) {
+      return;
+    }
+  }
+  /* 2) Let's mark all population counts that already can be encoded
+     with an RLE code. */
+  memset(good_for_rle, 0, length);
+  {
+    /* Let's not spoil any of the existing good RLE codes.
+       Mark any seq of 0's that is longer as 5 as a good_for_rle.
+       Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
+    uint32_t symbol = counts[0];
+    size_t step = 0;
+    for (i = 0; i <= length; ++i) {
+      if (i == length || counts[i] != symbol) {
+        if ((symbol == 0 && step >= 5) ||
+            (symbol != 0 && step >= 7)) {
+          size_t k;
+          for (k = 0; k < step; ++k) {
+            good_for_rle[i - k - 1] = 1;
+          }
+        }
+        step = 1;
+        if (i != length) {
+          symbol = counts[i];
+        }
+      } else {
+        ++step;
+      }
+    }
+  }
+  /* 3) Let's replace those population counts that lead to more RLE codes.
+     Math here is in 24.8 fixed point representation. */
+  stride = 0;
+  limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
+  sum = 0;
+  for (i = 0; i <= length; ++i) {
+    if (i == length || good_for_rle[i] ||
+        (i != 0 && good_for_rle[i - 1]) ||
+        (256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
+      if (stride >= 4 || (stride >= 3 && sum == 0)) {
+        size_t k;
+        /* The stride must end, collapse what we have, if we have enough (4). */
+        size_t count = (sum + stride / 2) / stride;
+        if (count == 0) {
+          count = 1;
+        }
+        if (sum == 0) {
+          /* Don't make an all zeros stride to be upgraded to ones. */
+          count = 0;
+        }
+        for (k = 0; k < stride; ++k) {
+          /* We don't want to change value at counts[i],
+             that is already belonging to the next stride. Thus - 1. */
+          counts[i - k - 1] = (uint32_t)count;
+        }
+      }
+      stride = 0;
+      sum = 0;
+      if (i < length - 2) {
+        /* All interesting strides have a count of at least 4, */
+        /* at least when non-zeros. */
+        limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
+      } else if (i < length) {
+        limit = 256 * counts[i];
+      } else {
+        limit = 0;
+      }
+    }
+    ++stride;
+    if (i != length) {
+      sum += counts[i];
+      if (stride >= 4) {
+        limit = (256 * sum + stride / 2) / stride;
+      }
+      if (stride == 4) {
+        limit += 120;
+      }
+    }
+  }
+}
+
+static void DecideOverRleUse(const uint8_t* depth, const size_t length,
+                             BROTLI_BOOL* use_rle_for_non_zero,
+                             BROTLI_BOOL* use_rle_for_zero) {
+  size_t total_reps_zero = 0;
+  size_t total_reps_non_zero = 0;
+  size_t count_reps_zero = 1;
+  size_t count_reps_non_zero = 1;
+  size_t i;
+  for (i = 0; i < length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    size_t k;
+    for (k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    if (reps >= 3 && value == 0) {
+      total_reps_zero += reps;
+      ++count_reps_zero;
+    }
+    if (reps >= 4 && value != 0) {
+      total_reps_non_zero += reps;
+      ++count_reps_non_zero;
+    }
+    i += reps;
+  }
+  *use_rle_for_non_zero =
+      TO_BROTLI_BOOL(total_reps_non_zero > count_reps_non_zero * 2);
+  *use_rle_for_zero = TO_BROTLI_BOOL(total_reps_zero > count_reps_zero * 2);
+}
+
+void BrotliWriteHuffmanTree(const uint8_t* depth,
+                            size_t length,
+                            size_t* tree_size,
+                            uint8_t* tree,
+                            uint8_t* extra_bits_data) {
+  uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
+  size_t i;
+  BROTLI_BOOL use_rle_for_non_zero = BROTLI_FALSE;
+  BROTLI_BOOL use_rle_for_zero = BROTLI_FALSE;
+
+  /* Throw away trailing zeros. */
+  size_t new_length = length;
+  for (i = 0; i < length; ++i) {
+    if (depth[length - i - 1] == 0) {
+      --new_length;
+    } else {
+      break;
+    }
+  }
+
+  /* First gather statistics on if it is a good idea to do RLE. */
+  if (length > 50) {
+    /* Find RLE coding for longer codes.
+       Shorter codes seem not to benefit from RLE. */
+    DecideOverRleUse(depth, new_length,
+                     &use_rle_for_non_zero, &use_rle_for_zero);
+  }
+
+  /* Actual RLE coding. */
+  for (i = 0; i < new_length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    if ((value != 0 && use_rle_for_non_zero) ||
+        (value == 0 && use_rle_for_zero)) {
+      size_t k;
+      for (k = i + 1; k < new_length && depth[k] == value; ++k) {
+        ++reps;
+      }
+    }
+    if (value == 0) {
+      BrotliWriteHuffmanTreeRepetitionsZeros(
+          reps, tree_size, tree, extra_bits_data);
+    } else {
+      BrotliWriteHuffmanTreeRepetitions(previous_value,
+                                        value, reps, tree_size,
+                                        tree, extra_bits_data);
+      previous_value = value;
+    }
+    i += reps;
+  }
+}
+
+static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
+  static const size_t kLut[16] = {  /* Pre-reversed 4-bit values. */
+    0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E,
+    0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F
+  };
+  size_t retval = kLut[bits & 0x0F];
+  size_t i;
+  for (i = 4; i < num_bits; i += 4) {
+    retval <<= 4;
+    bits = (uint16_t)(bits >> 4);
+    retval |= kLut[bits & 0x0F];
+  }
+  retval >>= ((0 - num_bits) & 0x03);
+  return (uint16_t)retval;
+}
+
+/* 0..15 are values for bits */
+#define MAX_HUFFMAN_BITS 16
+
+void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
+                                     size_t len,
+                                     uint16_t* bits) {
+  /* In Brotli, all bit depths are [1..15]
+     0 bit depth means that the symbol does not exist. */
+  uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
+  uint16_t next_code[MAX_HUFFMAN_BITS];
+  size_t i;
+  int code = 0;
+  for (i = 0; i < len; ++i) {
+    ++bl_count[depth[i]];
+  }
+  bl_count[0] = 0;
+  next_code[0] = 0;
+  for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
+    code = (code + bl_count[i - 1]) << 1;
+    next_code[i] = (uint16_t)code;
+  }
+  for (i = 0; i < len; ++i) {
+    if (depth[i]) {
+      bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.h
new file mode 100644
index 0000000000..f23d9c379d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode.h
@@ -0,0 +1,122 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Entropy encoding (Huffman) utilities. */
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* A node of a Huffman tree. */
+typedef struct HuffmanTree {
+  uint32_t total_count_;
+  int16_t index_left_;
+  int16_t index_right_or_value_;
+} HuffmanTree;
+
+static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
+    int16_t left, int16_t right) {
+  self->total_count_ = count;
+  self->index_left_ = left;
+  self->index_right_or_value_ = right;
+}
+
+/* Returns 1 is assignment of depths succeeded, otherwise 0. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
+    int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
+
+/* This function will create a Huffman tree.
+
+   The (data,length) contains the population counts.
+   The tree_limit is the maximum bit depth of the Huffman codes.
+
+   The depth contains the tree, i.e., how many bits are used for
+   the symbol.
+
+   The actual Huffman tree is constructed in the tree[] array, which has to
+   be at least 2 * length + 1 long.
+
+   See http://en.wikipedia.org/wiki/Huffman_coding */
+BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
+                                             const size_t length,
+                                             const int tree_limit,
+                                             HuffmanTree* tree,
+                                             uint8_t* depth);
+
+/* Change the population counts in a way that the consequent
+   Huffman tree compression, especially its RLE-part will be more
+   likely to compress this data more efficiently.
+
+   length contains the size of the histogram.
+   counts contains the population counts.
+   good_for_rle is a buffer of at least length size */
+BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
+    size_t length, uint32_t* counts, uint8_t* good_for_rle);
+
+/* Write a Huffman tree from bit depths into the bit-stream representation
+   of a Huffman tree. The generated Huffman tree is to be compressed once
+   more using a Huffman tree */
+BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
+                                            size_t num,
+                                            size_t* tree_size,
+                                            uint8_t* tree,
+                                            uint8_t* extra_bits_data);
+
+/* Get the actual bit values for a tree of bit depths. */
+BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
+                                                     size_t len,
+                                                     uint16_t* bits);
+
+/* Input size optimized Shell sort. */
+typedef BROTLI_BOOL (*HuffmanTreeComparator)(
+    const HuffmanTree*, const HuffmanTree*);
+static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
+    const size_t n, HuffmanTreeComparator comparator) {
+  static const size_t gaps[] = {132, 57, 23, 10, 4, 1};
+  if (n < 13) {
+    /* Insertion sort. */
+    size_t i;
+    for (i = 1; i < n; ++i) {
+      HuffmanTree tmp = items[i];
+      size_t k = i;
+      size_t j = i - 1;
+      while (comparator(&tmp, &items[j])) {
+        items[k] = items[j];
+        k = j;
+        if (!j--) break;
+      }
+      items[k] = tmp;
+    }
+    return;
+  } else {
+    /* Shell sort. */
+    int g = n < 57 ? 2 : 0;
+    for (; g < 6; ++g) {
+      size_t gap = gaps[g];
+      size_t i;
+      for (i = gap; i < n; ++i) {
+        size_t j = i;
+        HuffmanTree tmp = items[i];
+        for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
+          items[j] = items[j - gap];
+        }
+        items[j] = tmp;
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode_static.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode_static.h
new file mode 100644
index 0000000000..62b99a954c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/entropy_encode_static.h
@@ -0,0 +1,539 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Static entropy codes used for faster meta-block encoding. */
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const uint8_t kCodeLengthDepth[18] = {
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
+};
+
+static const uint8_t kStaticCommandCodeDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+};
+
+static const uint8_t kStaticDistanceCodeDepth[64] = {
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+};
+
+static const uint32_t kCodeLengthBits[18] = {
+  0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
+};
+
+static BROTLI_INLINE void StoreStaticCodeLengthCode(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage);
+}
+
+static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
+  0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
+  0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
+  0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
+  0x00003b87, 0x00000397, 0x00000b97, 0x00001397, 0x00001b97, 0x00002397,
+  0x00002b97, 0x00003397, 0x00003b97, 0x000003a7, 0x00000ba7, 0x000013a7,
+  0x00001ba7, 0x000023a7, 0x00002ba7, 0x000033a7, 0x00003ba7, 0x000003b7,
+  0x00000bb7, 0x000013b7, 0x00001bb7, 0x000023b7, 0x00002bb7, 0x000033b7,
+  0x00003bb7, 0x000003c7, 0x00000bc7, 0x000013c7, 0x00001bc7, 0x000023c7,
+  0x00002bc7, 0x000033c7, 0x00003bc7, 0x000003d7, 0x00000bd7, 0x000013d7,
+  0x00001bd7, 0x000023d7, 0x00002bd7, 0x000033d7, 0x00003bd7, 0x000003e7,
+  0x00000be7, 0x000013e7, 0x00001be7, 0x000023e7, 0x00002be7, 0x000033e7,
+  0x00003be7, 0x000003f7, 0x00000bf7, 0x000013f7, 0x00001bf7, 0x000023f7,
+  0x00002bf7, 0x000033f7, 0x00003bf7, 0x0001c387, 0x0005c387, 0x0009c387,
+  0x000dc387, 0x0011c387, 0x0015c387, 0x0019c387, 0x001dc387, 0x0001cb87,
+  0x0005cb87, 0x0009cb87, 0x000dcb87, 0x0011cb87, 0x0015cb87, 0x0019cb87,
+  0x001dcb87, 0x0001d387, 0x0005d387, 0x0009d387, 0x000dd387, 0x0011d387,
+  0x0015d387, 0x0019d387, 0x001dd387, 0x0001db87, 0x0005db87, 0x0009db87,
+  0x000ddb87, 0x0011db87, 0x0015db87, 0x0019db87, 0x001ddb87, 0x0001e387,
+  0x0005e387, 0x0009e387, 0x000de387, 0x0011e387, 0x0015e387, 0x0019e387,
+  0x001de387, 0x0001eb87, 0x0005eb87, 0x0009eb87, 0x000deb87, 0x0011eb87,
+  0x0015eb87, 0x0019eb87, 0x001deb87, 0x0001f387, 0x0005f387, 0x0009f387,
+  0x000df387, 0x0011f387, 0x0015f387, 0x0019f387, 0x001df387, 0x0001fb87,
+  0x0005fb87, 0x0009fb87, 0x000dfb87, 0x0011fb87, 0x0015fb87, 0x0019fb87,
+  0x001dfb87, 0x0001c397, 0x0005c397, 0x0009c397, 0x000dc397, 0x0011c397,
+  0x0015c397, 0x0019c397, 0x001dc397, 0x0001cb97, 0x0005cb97, 0x0009cb97,
+  0x000dcb97, 0x0011cb97, 0x0015cb97, 0x0019cb97, 0x001dcb97, 0x0001d397,
+  0x0005d397, 0x0009d397, 0x000dd397, 0x0011d397, 0x0015d397, 0x0019d397,
+  0x001dd397, 0x0001db97, 0x0005db97, 0x0009db97, 0x000ddb97, 0x0011db97,
+  0x0015db97, 0x0019db97, 0x001ddb97, 0x0001e397, 0x0005e397, 0x0009e397,
+  0x000de397, 0x0011e397, 0x0015e397, 0x0019e397, 0x001de397, 0x0001eb97,
+  0x0005eb97, 0x0009eb97, 0x000deb97, 0x0011eb97, 0x0015eb97, 0x0019eb97,
+  0x001deb97, 0x0001f397, 0x0005f397, 0x0009f397, 0x000df397, 0x0011f397,
+  0x0015f397, 0x0019f397, 0x001df397, 0x0001fb97, 0x0005fb97, 0x0009fb97,
+  0x000dfb97, 0x0011fb97, 0x0015fb97, 0x0019fb97, 0x001dfb97, 0x0001c3a7,
+  0x0005c3a7, 0x0009c3a7, 0x000dc3a7, 0x0011c3a7, 0x0015c3a7, 0x0019c3a7,
+  0x001dc3a7, 0x0001cba7, 0x0005cba7, 0x0009cba7, 0x000dcba7, 0x0011cba7,
+  0x0015cba7, 0x0019cba7, 0x001dcba7, 0x0001d3a7, 0x0005d3a7, 0x0009d3a7,
+  0x000dd3a7, 0x0011d3a7, 0x0015d3a7, 0x0019d3a7, 0x001dd3a7, 0x0001dba7,
+  0x0005dba7, 0x0009dba7, 0x000ddba7, 0x0011dba7, 0x0015dba7, 0x0019dba7,
+  0x001ddba7, 0x0001e3a7, 0x0005e3a7, 0x0009e3a7, 0x000de3a7, 0x0011e3a7,
+  0x0015e3a7, 0x0019e3a7, 0x001de3a7, 0x0001eba7, 0x0005eba7, 0x0009eba7,
+  0x000deba7, 0x0011eba7, 0x0015eba7, 0x0019eba7, 0x001deba7, 0x0001f3a7,
+  0x0005f3a7, 0x0009f3a7, 0x000df3a7, 0x0011f3a7, 0x0015f3a7, 0x0019f3a7,
+  0x001df3a7, 0x0001fba7, 0x0005fba7, 0x0009fba7, 0x000dfba7, 0x0011fba7,
+  0x0015fba7, 0x0019fba7, 0x001dfba7, 0x0001c3b7, 0x0005c3b7, 0x0009c3b7,
+  0x000dc3b7, 0x0011c3b7, 0x0015c3b7, 0x0019c3b7, 0x001dc3b7, 0x0001cbb7,
+  0x0005cbb7, 0x0009cbb7, 0x000dcbb7, 0x0011cbb7, 0x0015cbb7, 0x0019cbb7,
+  0x001dcbb7, 0x0001d3b7, 0x0005d3b7, 0x0009d3b7, 0x000dd3b7, 0x0011d3b7,
+  0x0015d3b7, 0x0019d3b7, 0x001dd3b7, 0x0001dbb7, 0x0005dbb7, 0x0009dbb7,
+  0x000ddbb7, 0x0011dbb7, 0x0015dbb7, 0x0019dbb7, 0x001ddbb7, 0x0001e3b7,
+  0x0005e3b7, 0x0009e3b7, 0x000de3b7, 0x0011e3b7, 0x0015e3b7, 0x0019e3b7,
+  0x001de3b7, 0x0001ebb7, 0x0005ebb7, 0x0009ebb7, 0x000debb7, 0x0011ebb7,
+  0x0015ebb7, 0x0019ebb7, 0x001debb7, 0x0001f3b7, 0x0005f3b7, 0x0009f3b7,
+  0x000df3b7, 0x0011f3b7, 0x0015f3b7, 0x0019f3b7, 0x001df3b7, 0x0001fbb7,
+  0x0005fbb7, 0x0009fbb7, 0x000dfbb7, 0x0011fbb7, 0x0015fbb7, 0x0019fbb7,
+  0x001dfbb7, 0x0001c3c7, 0x0005c3c7, 0x0009c3c7, 0x000dc3c7, 0x0011c3c7,
+  0x0015c3c7, 0x0019c3c7, 0x001dc3c7, 0x0001cbc7, 0x0005cbc7, 0x0009cbc7,
+  0x000dcbc7, 0x0011cbc7, 0x0015cbc7, 0x0019cbc7, 0x001dcbc7, 0x0001d3c7,
+  0x0005d3c7, 0x0009d3c7, 0x000dd3c7, 0x0011d3c7, 0x0015d3c7, 0x0019d3c7,
+  0x001dd3c7, 0x0001dbc7, 0x0005dbc7, 0x0009dbc7, 0x000ddbc7, 0x0011dbc7,
+  0x0015dbc7, 0x0019dbc7, 0x001ddbc7, 0x0001e3c7, 0x0005e3c7, 0x0009e3c7,
+  0x000de3c7, 0x0011e3c7, 0x0015e3c7, 0x0019e3c7, 0x001de3c7, 0x0001ebc7,
+  0x0005ebc7, 0x0009ebc7, 0x000debc7, 0x0011ebc7, 0x0015ebc7, 0x0019ebc7,
+  0x001debc7, 0x0001f3c7, 0x0005f3c7, 0x0009f3c7, 0x000df3c7, 0x0011f3c7,
+  0x0015f3c7, 0x0019f3c7, 0x001df3c7, 0x0001fbc7, 0x0005fbc7, 0x0009fbc7,
+  0x000dfbc7, 0x0011fbc7, 0x0015fbc7, 0x0019fbc7, 0x001dfbc7, 0x0001c3d7,
+  0x0005c3d7, 0x0009c3d7, 0x000dc3d7, 0x0011c3d7, 0x0015c3d7, 0x0019c3d7,
+  0x001dc3d7, 0x0001cbd7, 0x0005cbd7, 0x0009cbd7, 0x000dcbd7, 0x0011cbd7,
+  0x0015cbd7, 0x0019cbd7, 0x001dcbd7, 0x0001d3d7, 0x0005d3d7, 0x0009d3d7,
+  0x000dd3d7, 0x0011d3d7, 0x0015d3d7, 0x0019d3d7, 0x001dd3d7, 0x0001dbd7,
+  0x0005dbd7, 0x0009dbd7, 0x000ddbd7, 0x0011dbd7, 0x0015dbd7, 0x0019dbd7,
+  0x001ddbd7, 0x0001e3d7, 0x0005e3d7, 0x0009e3d7, 0x000de3d7, 0x0011e3d7,
+  0x0015e3d7, 0x0019e3d7, 0x001de3d7, 0x0001ebd7, 0x0005ebd7, 0x0009ebd7,
+  0x000debd7, 0x0011ebd7, 0x0015ebd7, 0x0019ebd7, 0x001debd7, 0x0001f3d7,
+  0x0005f3d7, 0x0009f3d7, 0x000df3d7, 0x0011f3d7, 0x0015f3d7, 0x0019f3d7,
+  0x001df3d7, 0x0001fbd7, 0x0005fbd7, 0x0009fbd7, 0x000dfbd7, 0x0011fbd7,
+  0x0015fbd7, 0x0019fbd7, 0x001dfbd7, 0x0001c3e7, 0x0005c3e7, 0x0009c3e7,
+  0x000dc3e7, 0x0011c3e7, 0x0015c3e7, 0x0019c3e7, 0x001dc3e7, 0x0001cbe7,
+  0x0005cbe7, 0x0009cbe7, 0x000dcbe7, 0x0011cbe7, 0x0015cbe7, 0x0019cbe7,
+  0x001dcbe7, 0x0001d3e7, 0x0005d3e7, 0x0009d3e7, 0x000dd3e7, 0x0011d3e7,
+  0x0015d3e7, 0x0019d3e7, 0x001dd3e7, 0x0001dbe7, 0x0005dbe7, 0x0009dbe7,
+  0x000ddbe7, 0x0011dbe7, 0x0015dbe7, 0x0019dbe7, 0x001ddbe7, 0x0001e3e7,
+  0x0005e3e7, 0x0009e3e7, 0x000de3e7, 0x0011e3e7, 0x0015e3e7, 0x0019e3e7,
+  0x001de3e7, 0x0001ebe7, 0x0005ebe7, 0x0009ebe7, 0x000debe7, 0x0011ebe7,
+  0x0015ebe7, 0x0019ebe7, 0x001debe7, 0x0001f3e7, 0x0005f3e7, 0x0009f3e7,
+  0x000df3e7, 0x0011f3e7, 0x0015f3e7, 0x0019f3e7, 0x001df3e7, 0x0001fbe7,
+  0x0005fbe7, 0x0009fbe7, 0x000dfbe7, 0x0011fbe7, 0x0015fbe7, 0x0019fbe7,
+  0x001dfbe7, 0x0001c3f7, 0x0005c3f7, 0x0009c3f7, 0x000dc3f7, 0x0011c3f7,
+  0x0015c3f7, 0x0019c3f7, 0x001dc3f7, 0x0001cbf7, 0x0005cbf7, 0x0009cbf7,
+  0x000dcbf7, 0x0011cbf7, 0x0015cbf7, 0x0019cbf7, 0x001dcbf7, 0x0001d3f7,
+  0x0005d3f7, 0x0009d3f7, 0x000dd3f7, 0x0011d3f7, 0x0015d3f7, 0x0019d3f7,
+  0x001dd3f7, 0x0001dbf7, 0x0005dbf7, 0x0009dbf7, 0x000ddbf7, 0x0011dbf7,
+  0x0015dbf7, 0x0019dbf7, 0x001ddbf7, 0x0001e3f7, 0x0005e3f7, 0x0009e3f7,
+  0x000de3f7, 0x0011e3f7, 0x0015e3f7, 0x0019e3f7, 0x001de3f7, 0x0001ebf7,
+  0x0005ebf7, 0x0009ebf7, 0x000debf7, 0x0011ebf7, 0x0015ebf7, 0x0019ebf7,
+  0x001debf7, 0x0001f3f7, 0x0005f3f7, 0x0009f3f7, 0x000df3f7, 0x0011f3f7,
+  0x0015f3f7, 0x0019f3f7, 0x001df3f7, 0x0001fbf7, 0x0005fbf7, 0x0009fbf7,
+  0x000dfbf7, 0x0011fbf7, 0x0015fbf7, 0x0019fbf7, 0x001dfbf7, 0x00e1c387,
+  0x02e1c387, 0x04e1c387, 0x06e1c387, 0x08e1c387, 0x0ae1c387, 0x0ce1c387,
+  0x0ee1c387, 0x00e5c387, 0x02e5c387, 0x04e5c387, 0x06e5c387, 0x08e5c387,
+  0x0ae5c387, 0x0ce5c387, 0x0ee5c387, 0x00e9c387, 0x02e9c387, 0x04e9c387,
+  0x06e9c387, 0x08e9c387, 0x0ae9c387, 0x0ce9c387, 0x0ee9c387, 0x00edc387,
+  0x02edc387, 0x04edc387, 0x06edc387, 0x08edc387, 0x0aedc387, 0x0cedc387,
+  0x0eedc387, 0x00f1c387, 0x02f1c387, 0x04f1c387, 0x06f1c387, 0x08f1c387,
+  0x0af1c387, 0x0cf1c387, 0x0ef1c387, 0x00f5c387, 0x02f5c387, 0x04f5c387,
+  0x06f5c387, 0x08f5c387, 0x0af5c387, 0x0cf5c387, 0x0ef5c387, 0x00f9c387,
+  0x02f9c387, 0x04f9c387, 0x06f9c387, 0x08f9c387, 0x0af9c387, 0x0cf9c387,
+  0x0ef9c387, 0x00fdc387, 0x02fdc387, 0x04fdc387, 0x06fdc387, 0x08fdc387,
+  0x0afdc387, 0x0cfdc387, 0x0efdc387, 0x00e1cb87, 0x02e1cb87, 0x04e1cb87,
+  0x06e1cb87, 0x08e1cb87, 0x0ae1cb87, 0x0ce1cb87, 0x0ee1cb87, 0x00e5cb87,
+  0x02e5cb87, 0x04e5cb87, 0x06e5cb87, 0x08e5cb87, 0x0ae5cb87, 0x0ce5cb87,
+  0x0ee5cb87, 0x00e9cb87, 0x02e9cb87, 0x04e9cb87, 0x06e9cb87, 0x08e9cb87,
+  0x0ae9cb87, 0x0ce9cb87, 0x0ee9cb87, 0x00edcb87, 0x02edcb87, 0x04edcb87,
+  0x06edcb87, 0x08edcb87, 0x0aedcb87, 0x0cedcb87, 0x0eedcb87, 0x00f1cb87,
+  0x02f1cb87, 0x04f1cb87, 0x06f1cb87, 0x08f1cb87, 0x0af1cb87, 0x0cf1cb87,
+  0x0ef1cb87, 0x00f5cb87, 0x02f5cb87, 0x04f5cb87, 0x06f5cb87, 0x08f5cb87,
+  0x0af5cb87, 0x0cf5cb87, 0x0ef5cb87, 0x00f9cb87, 0x02f9cb87, 0x04f9cb87,
+  0x06f9cb87, 0x08f9cb87,
+};
+
+static const uint32_t kZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
+   0,  4,  8,  7,  7,  7,  7,  7,  7,  7,  7, 11, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+};
+
+static const uint64_t kNonZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
+  0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
+  0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
+  0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
+  0x00000afb, 0x00000efb, 0x0000b2cb, 0x0001b2cb, 0x0002b2cb, 0x0003b2cb,
+  0x0000b6cb, 0x0001b6cb, 0x0002b6cb, 0x0003b6cb, 0x0000bacb, 0x0001bacb,
+  0x0002bacb, 0x0003bacb, 0x0000becb, 0x0001becb, 0x0002becb, 0x0003becb,
+  0x0000b2db, 0x0001b2db, 0x0002b2db, 0x0003b2db, 0x0000b6db, 0x0001b6db,
+  0x0002b6db, 0x0003b6db, 0x0000badb, 0x0001badb, 0x0002badb, 0x0003badb,
+  0x0000bedb, 0x0001bedb, 0x0002bedb, 0x0003bedb, 0x0000b2eb, 0x0001b2eb,
+  0x0002b2eb, 0x0003b2eb, 0x0000b6eb, 0x0001b6eb, 0x0002b6eb, 0x0003b6eb,
+  0x0000baeb, 0x0001baeb, 0x0002baeb, 0x0003baeb, 0x0000beeb, 0x0001beeb,
+  0x0002beeb, 0x0003beeb, 0x0000b2fb, 0x0001b2fb, 0x0002b2fb, 0x0003b2fb,
+  0x0000b6fb, 0x0001b6fb, 0x0002b6fb, 0x0003b6fb, 0x0000bafb, 0x0001bafb,
+  0x0002bafb, 0x0003bafb, 0x0000befb, 0x0001befb, 0x0002befb, 0x0003befb,
+  0x002cb2cb, 0x006cb2cb, 0x00acb2cb, 0x00ecb2cb, 0x002db2cb, 0x006db2cb,
+  0x00adb2cb, 0x00edb2cb, 0x002eb2cb, 0x006eb2cb, 0x00aeb2cb, 0x00eeb2cb,
+  0x002fb2cb, 0x006fb2cb, 0x00afb2cb, 0x00efb2cb, 0x002cb6cb, 0x006cb6cb,
+  0x00acb6cb, 0x00ecb6cb, 0x002db6cb, 0x006db6cb, 0x00adb6cb, 0x00edb6cb,
+  0x002eb6cb, 0x006eb6cb, 0x00aeb6cb, 0x00eeb6cb, 0x002fb6cb, 0x006fb6cb,
+  0x00afb6cb, 0x00efb6cb, 0x002cbacb, 0x006cbacb, 0x00acbacb, 0x00ecbacb,
+  0x002dbacb, 0x006dbacb, 0x00adbacb, 0x00edbacb, 0x002ebacb, 0x006ebacb,
+  0x00aebacb, 0x00eebacb, 0x002fbacb, 0x006fbacb, 0x00afbacb, 0x00efbacb,
+  0x002cbecb, 0x006cbecb, 0x00acbecb, 0x00ecbecb, 0x002dbecb, 0x006dbecb,
+  0x00adbecb, 0x00edbecb, 0x002ebecb, 0x006ebecb, 0x00aebecb, 0x00eebecb,
+  0x002fbecb, 0x006fbecb, 0x00afbecb, 0x00efbecb, 0x002cb2db, 0x006cb2db,
+  0x00acb2db, 0x00ecb2db, 0x002db2db, 0x006db2db, 0x00adb2db, 0x00edb2db,
+  0x002eb2db, 0x006eb2db, 0x00aeb2db, 0x00eeb2db, 0x002fb2db, 0x006fb2db,
+  0x00afb2db, 0x00efb2db, 0x002cb6db, 0x006cb6db, 0x00acb6db, 0x00ecb6db,
+  0x002db6db, 0x006db6db, 0x00adb6db, 0x00edb6db, 0x002eb6db, 0x006eb6db,
+  0x00aeb6db, 0x00eeb6db, 0x002fb6db, 0x006fb6db, 0x00afb6db, 0x00efb6db,
+  0x002cbadb, 0x006cbadb, 0x00acbadb, 0x00ecbadb, 0x002dbadb, 0x006dbadb,
+  0x00adbadb, 0x00edbadb, 0x002ebadb, 0x006ebadb, 0x00aebadb, 0x00eebadb,
+  0x002fbadb, 0x006fbadb, 0x00afbadb, 0x00efbadb, 0x002cbedb, 0x006cbedb,
+  0x00acbedb, 0x00ecbedb, 0x002dbedb, 0x006dbedb, 0x00adbedb, 0x00edbedb,
+  0x002ebedb, 0x006ebedb, 0x00aebedb, 0x00eebedb, 0x002fbedb, 0x006fbedb,
+  0x00afbedb, 0x00efbedb, 0x002cb2eb, 0x006cb2eb, 0x00acb2eb, 0x00ecb2eb,
+  0x002db2eb, 0x006db2eb, 0x00adb2eb, 0x00edb2eb, 0x002eb2eb, 0x006eb2eb,
+  0x00aeb2eb, 0x00eeb2eb, 0x002fb2eb, 0x006fb2eb, 0x00afb2eb, 0x00efb2eb,
+  0x002cb6eb, 0x006cb6eb, 0x00acb6eb, 0x00ecb6eb, 0x002db6eb, 0x006db6eb,
+  0x00adb6eb, 0x00edb6eb, 0x002eb6eb, 0x006eb6eb, 0x00aeb6eb, 0x00eeb6eb,
+  0x002fb6eb, 0x006fb6eb, 0x00afb6eb, 0x00efb6eb, 0x002cbaeb, 0x006cbaeb,
+  0x00acbaeb, 0x00ecbaeb, 0x002dbaeb, 0x006dbaeb, 0x00adbaeb, 0x00edbaeb,
+  0x002ebaeb, 0x006ebaeb, 0x00aebaeb, 0x00eebaeb, 0x002fbaeb, 0x006fbaeb,
+  0x00afbaeb, 0x00efbaeb, 0x002cbeeb, 0x006cbeeb, 0x00acbeeb, 0x00ecbeeb,
+  0x002dbeeb, 0x006dbeeb, 0x00adbeeb, 0x00edbeeb, 0x002ebeeb, 0x006ebeeb,
+  0x00aebeeb, 0x00eebeeb, 0x002fbeeb, 0x006fbeeb, 0x00afbeeb, 0x00efbeeb,
+  0x002cb2fb, 0x006cb2fb, 0x00acb2fb, 0x00ecb2fb, 0x002db2fb, 0x006db2fb,
+  0x00adb2fb, 0x00edb2fb, 0x002eb2fb, 0x006eb2fb, 0x00aeb2fb, 0x00eeb2fb,
+  0x002fb2fb, 0x006fb2fb, 0x00afb2fb, 0x00efb2fb, 0x002cb6fb, 0x006cb6fb,
+  0x00acb6fb, 0x00ecb6fb, 0x002db6fb, 0x006db6fb, 0x00adb6fb, 0x00edb6fb,
+  0x002eb6fb, 0x006eb6fb, 0x00aeb6fb, 0x00eeb6fb, 0x002fb6fb, 0x006fb6fb,
+  0x00afb6fb, 0x00efb6fb, 0x002cbafb, 0x006cbafb, 0x00acbafb, 0x00ecbafb,
+  0x002dbafb, 0x006dbafb, 0x00adbafb, 0x00edbafb, 0x002ebafb, 0x006ebafb,
+  0x00aebafb, 0x00eebafb, 0x002fbafb, 0x006fbafb, 0x00afbafb, 0x00efbafb,
+  0x002cbefb, 0x006cbefb, 0x00acbefb, 0x00ecbefb, 0x002dbefb, 0x006dbefb,
+  0x00adbefb, 0x00edbefb, 0x002ebefb, 0x006ebefb, 0x00aebefb, 0x00eebefb,
+  0x002fbefb, 0x006fbefb, 0x00afbefb, 0x00efbefb, 0x0b2cb2cb, 0x1b2cb2cb,
+  0x2b2cb2cb, 0x3b2cb2cb, 0x0b6cb2cb, 0x1b6cb2cb, 0x2b6cb2cb, 0x3b6cb2cb,
+  0x0bacb2cb, 0x1bacb2cb, 0x2bacb2cb, 0x3bacb2cb, 0x0becb2cb, 0x1becb2cb,
+  0x2becb2cb, 0x3becb2cb, 0x0b2db2cb, 0x1b2db2cb, 0x2b2db2cb, 0x3b2db2cb,
+  0x0b6db2cb, 0x1b6db2cb, 0x2b6db2cb, 0x3b6db2cb, 0x0badb2cb, 0x1badb2cb,
+  0x2badb2cb, 0x3badb2cb, 0x0bedb2cb, 0x1bedb2cb, 0x2bedb2cb, 0x3bedb2cb,
+  0x0b2eb2cb, 0x1b2eb2cb, 0x2b2eb2cb, 0x3b2eb2cb, 0x0b6eb2cb, 0x1b6eb2cb,
+  0x2b6eb2cb, 0x3b6eb2cb, 0x0baeb2cb, 0x1baeb2cb, 0x2baeb2cb, 0x3baeb2cb,
+  0x0beeb2cb, 0x1beeb2cb, 0x2beeb2cb, 0x3beeb2cb, 0x0b2fb2cb, 0x1b2fb2cb,
+  0x2b2fb2cb, 0x3b2fb2cb, 0x0b6fb2cb, 0x1b6fb2cb, 0x2b6fb2cb, 0x3b6fb2cb,
+  0x0bafb2cb, 0x1bafb2cb, 0x2bafb2cb, 0x3bafb2cb, 0x0befb2cb, 0x1befb2cb,
+  0x2befb2cb, 0x3befb2cb, 0x0b2cb6cb, 0x1b2cb6cb, 0x2b2cb6cb, 0x3b2cb6cb,
+  0x0b6cb6cb, 0x1b6cb6cb, 0x2b6cb6cb, 0x3b6cb6cb, 0x0bacb6cb, 0x1bacb6cb,
+  0x2bacb6cb, 0x3bacb6cb, 0x0becb6cb, 0x1becb6cb, 0x2becb6cb, 0x3becb6cb,
+  0x0b2db6cb, 0x1b2db6cb, 0x2b2db6cb, 0x3b2db6cb, 0x0b6db6cb, 0x1b6db6cb,
+  0x2b6db6cb, 0x3b6db6cb, 0x0badb6cb, 0x1badb6cb, 0x2badb6cb, 0x3badb6cb,
+  0x0bedb6cb, 0x1bedb6cb, 0x2bedb6cb, 0x3bedb6cb, 0x0b2eb6cb, 0x1b2eb6cb,
+  0x2b2eb6cb, 0x3b2eb6cb, 0x0b6eb6cb, 0x1b6eb6cb, 0x2b6eb6cb, 0x3b6eb6cb,
+  0x0baeb6cb, 0x1baeb6cb, 0x2baeb6cb, 0x3baeb6cb, 0x0beeb6cb, 0x1beeb6cb,
+  0x2beeb6cb, 0x3beeb6cb, 0x0b2fb6cb, 0x1b2fb6cb, 0x2b2fb6cb, 0x3b2fb6cb,
+  0x0b6fb6cb, 0x1b6fb6cb, 0x2b6fb6cb, 0x3b6fb6cb, 0x0bafb6cb, 0x1bafb6cb,
+  0x2bafb6cb, 0x3bafb6cb, 0x0befb6cb, 0x1befb6cb, 0x2befb6cb, 0x3befb6cb,
+  0x0b2cbacb, 0x1b2cbacb, 0x2b2cbacb, 0x3b2cbacb, 0x0b6cbacb, 0x1b6cbacb,
+  0x2b6cbacb, 0x3b6cbacb, 0x0bacbacb, 0x1bacbacb, 0x2bacbacb, 0x3bacbacb,
+  0x0becbacb, 0x1becbacb, 0x2becbacb, 0x3becbacb, 0x0b2dbacb, 0x1b2dbacb,
+  0x2b2dbacb, 0x3b2dbacb, 0x0b6dbacb, 0x1b6dbacb, 0x2b6dbacb, 0x3b6dbacb,
+  0x0badbacb, 0x1badbacb, 0x2badbacb, 0x3badbacb, 0x0bedbacb, 0x1bedbacb,
+  0x2bedbacb, 0x3bedbacb, 0x0b2ebacb, 0x1b2ebacb, 0x2b2ebacb, 0x3b2ebacb,
+  0x0b6ebacb, 0x1b6ebacb, 0x2b6ebacb, 0x3b6ebacb, 0x0baebacb, 0x1baebacb,
+  0x2baebacb, 0x3baebacb, 0x0beebacb, 0x1beebacb, 0x2beebacb, 0x3beebacb,
+  0x0b2fbacb, 0x1b2fbacb, 0x2b2fbacb, 0x3b2fbacb, 0x0b6fbacb, 0x1b6fbacb,
+  0x2b6fbacb, 0x3b6fbacb, 0x0bafbacb, 0x1bafbacb, 0x2bafbacb, 0x3bafbacb,
+  0x0befbacb, 0x1befbacb, 0x2befbacb, 0x3befbacb, 0x0b2cbecb, 0x1b2cbecb,
+  0x2b2cbecb, 0x3b2cbecb, 0x0b6cbecb, 0x1b6cbecb, 0x2b6cbecb, 0x3b6cbecb,
+  0x0bacbecb, 0x1bacbecb, 0x2bacbecb, 0x3bacbecb, 0x0becbecb, 0x1becbecb,
+  0x2becbecb, 0x3becbecb, 0x0b2dbecb, 0x1b2dbecb, 0x2b2dbecb, 0x3b2dbecb,
+  0x0b6dbecb, 0x1b6dbecb, 0x2b6dbecb, 0x3b6dbecb, 0x0badbecb, 0x1badbecb,
+  0x2badbecb, 0x3badbecb, 0x0bedbecb, 0x1bedbecb, 0x2bedbecb, 0x3bedbecb,
+  0x0b2ebecb, 0x1b2ebecb, 0x2b2ebecb, 0x3b2ebecb, 0x0b6ebecb, 0x1b6ebecb,
+  0x2b6ebecb, 0x3b6ebecb, 0x0baebecb, 0x1baebecb, 0x2baebecb, 0x3baebecb,
+  0x0beebecb, 0x1beebecb, 0x2beebecb, 0x3beebecb, 0x0b2fbecb, 0x1b2fbecb,
+  0x2b2fbecb, 0x3b2fbecb, 0x0b6fbecb, 0x1b6fbecb, 0x2b6fbecb, 0x3b6fbecb,
+  0x0bafbecb, 0x1bafbecb, 0x2bafbecb, 0x3bafbecb, 0x0befbecb, 0x1befbecb,
+  0x2befbecb, 0x3befbecb, 0x0b2cb2db, 0x1b2cb2db, 0x2b2cb2db, 0x3b2cb2db,
+  0x0b6cb2db, 0x1b6cb2db, 0x2b6cb2db, 0x3b6cb2db, 0x0bacb2db, 0x1bacb2db,
+  0x2bacb2db, 0x3bacb2db, 0x0becb2db, 0x1becb2db, 0x2becb2db, 0x3becb2db,
+  0x0b2db2db, 0x1b2db2db, 0x2b2db2db, 0x3b2db2db, 0x0b6db2db, 0x1b6db2db,
+  0x2b6db2db, 0x3b6db2db, 0x0badb2db, 0x1badb2db, 0x2badb2db, 0x3badb2db,
+  0x0bedb2db, 0x1bedb2db, 0x2bedb2db, 0x3bedb2db, 0x0b2eb2db, 0x1b2eb2db,
+  0x2b2eb2db, 0x3b2eb2db, 0x0b6eb2db, 0x1b6eb2db, 0x2b6eb2db, 0x3b6eb2db,
+  0x0baeb2db, 0x1baeb2db, 0x2baeb2db, 0x3baeb2db, 0x0beeb2db, 0x1beeb2db,
+  0x2beeb2db, 0x3beeb2db, 0x0b2fb2db, 0x1b2fb2db, 0x2b2fb2db, 0x3b2fb2db,
+  0x0b6fb2db, 0x1b6fb2db, 0x2b6fb2db, 0x3b6fb2db, 0x0bafb2db, 0x1bafb2db,
+  0x2bafb2db, 0x3bafb2db, 0x0befb2db, 0x1befb2db, 0x2befb2db, 0x3befb2db,
+  0x0b2cb6db, 0x1b2cb6db, 0x2b2cb6db, 0x3b2cb6db, 0x0b6cb6db, 0x1b6cb6db,
+  0x2b6cb6db, 0x3b6cb6db, 0x0bacb6db, 0x1bacb6db, 0x2bacb6db, 0x3bacb6db,
+  0x0becb6db, 0x1becb6db, 0x2becb6db, 0x3becb6db, 0x0b2db6db, 0x1b2db6db,
+  0x2b2db6db, 0x3b2db6db, 0x0b6db6db, 0x1b6db6db, 0x2b6db6db, 0x3b6db6db,
+  0x0badb6db, 0x1badb6db, 0x2badb6db, 0x3badb6db, 0x0bedb6db, 0x1bedb6db,
+  0x2bedb6db, 0x3bedb6db, 0x0b2eb6db, 0x1b2eb6db, 0x2b2eb6db, 0x3b2eb6db,
+  0x0b6eb6db, 0x1b6eb6db, 0x2b6eb6db, 0x3b6eb6db, 0x0baeb6db, 0x1baeb6db,
+  0x2baeb6db, 0x3baeb6db,
+};
+
+static const uint32_t kNonZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
+   6,  6,  6,  6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+};
+
+static const uint16_t kStaticCommandCodeBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
+    0,  256,  128,  384,   64,  320,  192,  448,
+   32,  288,  160,  416,   96,  352,  224,  480,
+   16,  272,  144,  400,   80,  336,  208,  464,
+   48,  304,  176,  432,  112,  368,  240,  496,
+    8,  264,  136,  392,   72,  328,  200,  456,
+   40,  296,  168,  424,  104,  360,  232,  488,
+   24,  280,  152,  408,   88,  344,  216,  472,
+   56,  312,  184,  440,  120,  376,  248,  504,
+    4,  260,  132,  388,   68,  324,  196,  452,
+   36,  292,  164,  420,  100,  356,  228,  484,
+   20,  276,  148,  404,   84,  340,  212,  468,
+   52,  308,  180,  436,  116,  372,  244,  500,
+   12,  268,  140,  396,   76,  332,  204,  460,
+   44,  300,  172,  428,  108,  364,  236,  492,
+   28,  284,  156,  412,   92,  348,  220,  476,
+   60,  316,  188,  444,  124,  380,  252,  508,
+    2,  258,  130,  386,   66,  322,  194,  450,
+   34,  290,  162,  418,   98,  354,  226,  482,
+   18,  274,  146,  402,   82,  338,  210,  466,
+   50,  306,  178,  434,  114,  370,  242,  498,
+   10,  266,  138,  394,   74,  330,  202,  458,
+   42,  298,  170,  426,  106,  362,  234,  490,
+   26,  282,  154,  410,   90,  346,  218,  474,
+   58,  314,  186,  442,  122,  378,  250,  506,
+    6,  262,  134,  390,   70,  326,  198,  454,
+   38,  294,  166,  422,  102,  358,  230,  486,
+   22,  278,  150,  406,   86,  342,  214,  470,
+   54,  310,  182,  438,  118,  374,  246,  502,
+   14,  270,  142,  398,   78,  334,  206,  462,
+   46,  302,  174,  430,  110,  366,  238,  494,
+   30,  286,  158,  414,   94,  350,  222,  478,
+   62,  318,  190,  446,  126,  382,  254,  510,
+    1,  257,  129,  385,   65,  321,  193,  449,
+   33,  289,  161,  417,   97,  353,  225,  481,
+   17,  273,  145,  401,   81,  337,  209,  465,
+   49,  305,  177,  433,  113,  369,  241,  497,
+    9,  265,  137,  393,   73,  329,  201,  457,
+   41,  297,  169,  425,  105,  361,  233,  489,
+   25,  281,  153,  409,   89,  345,  217,  473,
+   57,  313,  185,  441,  121,  377,  249,  505,
+    5,  261,  133,  389,   69,  325,  197,  453,
+   37,  293,  165,  421,  101,  357,  229,  485,
+   21,  277,  149,  405,   85,  341,  213,  469,
+   53,  309,  181,  437,  117,  373,  245,  501,
+   13,  269,  141,  397,   77,  333,  205,  461,
+   45,  301,  173,  429,  109,  365,  237,  493,
+   29,  285,  157,  413,   93,  349,  221,  477,
+   61,  317,  189,  445,  125,  381,  253,  509,
+    3,  259,  131,  387,   67,  323,  195,  451,
+   35,  291,  163,  419,   99,  355,  227,  483,
+   19,  275,  147,  403,   83,  339,  211,  467,
+   51,  307,  179,  435,  115,  371,  243,  499,
+   11,  267,  139,  395,   75,  331,  203,  459,
+   43,  299,  171,  427,  107,  363,  235,  491,
+   27,  283,  155,  411,   91,  347,  219,  475,
+   59,  315,  187,  443,  123,  379,  251,  507,
+    7, 1031,  519, 1543,  263, 1287,  775, 1799,
+  135, 1159,  647, 1671,  391, 1415,  903, 1927,
+   71, 1095,  583, 1607,  327, 1351,  839, 1863,
+  199, 1223,  711, 1735,  455, 1479,  967, 1991,
+   39, 1063,  551, 1575,  295, 1319,  807, 1831,
+  167, 1191,  679, 1703,  423, 1447,  935, 1959,
+  103, 1127,  615, 1639,  359, 1383,  871, 1895,
+  231, 1255,  743, 1767,  487, 1511,  999, 2023,
+   23, 1047,  535, 1559,  279, 1303,  791, 1815,
+  151, 1175,  663, 1687,  407, 1431,  919, 1943,
+   87, 1111,  599, 1623,  343, 1367,  855, 1879,
+  215, 1239,  727, 1751,  471, 1495,  983, 2007,
+   55, 1079,  567, 1591,  311, 1335,  823, 1847,
+  183, 1207,  695, 1719,  439, 1463,  951, 1975,
+  119, 1143,  631, 1655,  375, 1399,  887, 1911,
+  247, 1271,  759, 1783,  503, 1527, 1015, 2039,
+   15, 1039,  527, 1551,  271, 1295,  783, 1807,
+  143, 1167,  655, 1679,  399, 1423,  911, 1935,
+   79, 1103,  591, 1615,  335, 1359,  847, 1871,
+  207, 1231,  719, 1743,  463, 1487,  975, 1999,
+   47, 1071,  559, 1583,  303, 1327,  815, 1839,
+  175, 1199,  687, 1711,  431, 1455,  943, 1967,
+  111, 1135,  623, 1647,  367, 1391,  879, 1903,
+  239, 1263,  751, 1775,  495, 1519, 1007, 2031,
+   31, 1055,  543, 1567,  287, 1311,  799, 1823,
+  159, 1183,  671, 1695,  415, 1439,  927, 1951,
+   95, 1119,  607, 1631,  351, 1375,  863, 1887,
+  223, 1247,  735, 1759,  479, 1503,  991, 2015,
+   63, 1087,  575, 1599,  319, 1343,  831, 1855,
+  191, 1215,  703, 1727,  447, 1471,  959, 1983,
+  127, 1151,  639, 1663,  383, 1407,  895, 1919,
+  255, 1279,  767, 1791,  511, 1535, 1023, 2047,
+};
+
+static BROTLI_INLINE void StoreStaticCommandHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      56, BROTLI_MAKE_UINT64_T(0x926244U, 0x16307003U), storage_ix, storage);
+  BrotliWriteBits(3, 0x00000000U, storage_ix, storage);
+}
+
+static const uint16_t kStaticDistanceCodeBits[64] = {
+   0, 32, 16, 48,  8, 40, 24, 56,  4, 36, 20, 52, 12, 44, 28, 60,
+   2, 34, 18, 50, 10, 42, 26, 58,  6, 38, 22, 54, 14, 46, 30, 62,
+   1, 33, 17, 49,  9, 41, 25, 57,  5, 37, 21, 53, 13, 45, 29, 61,
+   3, 35, 19, 51, 11, 43, 27, 59,  7, 39, 23, 55, 15, 47, 31, 63,
+};
+
+static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/fast_log.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/fast_log.h
new file mode 100644
index 0000000000..cade1235ad
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/fast_log.h
@@ -0,0 +1,147 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for fast computation of logarithms. */
+
+#ifndef BROTLI_ENC_FAST_LOG_H_
+#define BROTLI_ENC_FAST_LOG_H_
+
+#include <math.h>
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
+  /* TODO: generalize and move to platform.h */
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_clz, 3, 4, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+  return 31u ^ (uint32_t)__builtin_clz((uint32_t)n);
+#else
+  uint32_t result = 0;
+  while (n >>= 1) result++;
+  return result;
+#endif
+}
+
+/* A lookup table for small values of log2(int) to be used in entropy
+   computation.
+
+   ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
+static const float kLog2Table[] = {
+  0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+  1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+  2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+  3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+  3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+  3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+  4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+  4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+  4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+  4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+  4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+  5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+  5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+  5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+  5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+  5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+  5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+  5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+  5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+  5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+  5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+  5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+  6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+  6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+  6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+  6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+  6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+  6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+  6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+  6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+  6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+  6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+  6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+  6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+  6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+  6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+  6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+  6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+  6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+  6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+  6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+  7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+  7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+  7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+  7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+  7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+  7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+  7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+  7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+  7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+  7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+  7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+  7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+  7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+  7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+  7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+  7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+  7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+  7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+  7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+  7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+  7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+  7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+  7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+  7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+  7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+  7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+  7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+  7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+  7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+  7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+  7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+  7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+  7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+  7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+  7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+  7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+  7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+  7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+  7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+  7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+  7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+  7.9943534368588578f
+};
+
+#define LOG_2_INV 1.4426950408889634
+
+/* Faster logarithm for small integers, with the property of log2(0) == 0. */
+static BROTLI_INLINE double FastLog2(size_t v) {
+  if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
+    return kLog2Table[v];
+  }
+#if (defined(_MSC_VER) && _MSC_VER <= 1700) || \
+    (defined(__ANDROID_API__) && __ANDROID_API__ < 18)
+  /* Visual Studio 2012 and Android API levels < 18 do not have the log2()
+   * function defined, so we use log() and a multiplication instead. */
+  return log((double)v) * LOG_2_INV;
+#else
+  return log2((double)v);
+#endif
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_FAST_LOG_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/find_match_length.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/find_match_length.h
new file mode 100644
index 0000000000..bc428cffda
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/find_match_length.h
@@ -0,0 +1,80 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find maximal matching prefixes of strings. */
+
+#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
+#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Separate implementation for little-endian 64-bit targets, for speed. */
+#if defined(__GNUC__) && defined(_LP64) && defined(BROTLI_LITTLE_ENDIAN)
+
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
+  size_t matched = 0;
+  size_t limit2 = (limit >> 3) + 1;  /* + 1 is for pre-decrement in while */
+  while (BROTLI_PREDICT_TRUE(--limit2)) {
+    if (BROTLI_PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64LE(s2) ==
+                      BROTLI_UNALIGNED_LOAD64LE(s1 + matched))) {
+      s2 += 8;
+      matched += 8;
+    } else {
+      uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
+          BROTLI_UNALIGNED_LOAD64LE(s1 + matched);
+      size_t matching_bits = (size_t)__builtin_ctzll(x);
+      matched += matching_bits >> 3;
+      return matched;
+    }
+  }
+  limit = (limit & 7) + 1;  /* + 1 is for pre-decrement in while */
+  while (--limit) {
+    if (BROTLI_PREDICT_TRUE(s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    } else {
+      return matched;
+    }
+  }
+  return matched;
+}
+#else
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
+  size_t matched = 0;
+  const uint8_t* s2_limit = s2 + limit;
+  const uint8_t* s2_ptr = s2;
+  /* Find out how long the match is. We loop over the data 32 bits at a
+     time until we find a 32-bit block that doesn't match; then we find
+     the first non-matching bit and use that to calculate the total
+     length of the match. */
+  while (s2_ptr <= s2_limit - 4 &&
+         BrotliUnalignedRead32(s2_ptr) ==
+         BrotliUnalignedRead32(s1 + matched)) {
+    s2_ptr += 4;
+    matched += 4;
+  }
+  while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
+    ++s2_ptr;
+    ++matched;
+  }
+  return matched;
+}
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash.h
new file mode 100644
index 0000000000..6362f69b9f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash.h
@@ -0,0 +1,488 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data. */
+
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
+#include <string.h>  /* memcmp, memset */
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./encoder_dict.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./memory.h"
+#include "./quality.h"
+#include "./static_dict.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct {
+  /* Dynamically allocated area; first member for quickest access. */
+  void* extra;
+
+  size_t dict_num_lookups;
+  size_t dict_num_matches;
+
+  BrotliHasherParams params;
+
+  /* False if hasher needs to be "prepared" before use. */
+  BROTLI_BOOL is_prepared_;
+} HasherCommon;
+
+#define score_t size_t
+
+static const uint32_t kCutoffTransformsCount = 10;
+/*   0,  12,   27,    23,    42,    63,    56,    48,    59,    64 */
+/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
+static const uint64_t kCutoffTransforms =
+    BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
+
+typedef struct HasherSearchResult {
+  size_t len;
+  size_t distance;
+  score_t score;
+  int len_code_delta; /* == len_code - len */
+} HasherSearchResult;
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
+static const uint64_t kHashMul64Long =
+    BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
+
+static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - 14);
+}
+
+static BROTLI_INLINE void PrepareDistanceCache(
+    int* BROTLI_RESTRICT distance_cache, const int num_distances) {
+  if (num_distances > 4) {
+    int last_distance = distance_cache[0];
+    distance_cache[4] = last_distance - 1;
+    distance_cache[5] = last_distance + 1;
+    distance_cache[6] = last_distance - 2;
+    distance_cache[7] = last_distance + 2;
+    distance_cache[8] = last_distance - 3;
+    distance_cache[9] = last_distance + 3;
+    if (num_distances > 10) {
+      int next_last_distance = distance_cache[1];
+      distance_cache[10] = next_last_distance - 1;
+      distance_cache[11] = next_last_distance + 1;
+      distance_cache[12] = next_last_distance - 2;
+      distance_cache[13] = next_last_distance + 2;
+      distance_cache[14] = next_last_distance - 3;
+      distance_cache[15] = next_last_distance + 3;
+    }
+  }
+}
+
+#define BROTLI_LITERAL_BYTE_SCORE 135
+#define BROTLI_DISTANCE_BIT_PENALTY 30
+/* Score must be positive after applying maximal penalty. */
+#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
+
+/* Usually, we always choose the longest backward reference. This function
+   allows for the exception of that rule.
+
+   If we choose a backward reference that is further away, it will
+   usually be coded with more bits. We approximate this by assuming
+   log2(distance). If the distance can be expressed in terms of the
+   last four distances, we use some heuristic constants to estimate
+   the bits cost. For the first up to four literals we use the bit
+   cost of the literals from the literal cost model, after that we
+   use the average bit cost of the cost model.
+
+   This function is used to sometimes discard a longer backward reference
+   when it is not much longer and the bit cost for encoding it is more
+   than the saved literals.
+
+   backward_reference_offset MUST be positive. */
+static BROTLI_INLINE score_t BackwardReferenceScore(
+    size_t copy_length, size_t backward_reference_offset) {
+  return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
+      BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
+}
+
+static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
+    size_t copy_length) {
+  return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
+      BROTLI_SCORE_BASE + 15;
+}
+
+static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
+    size_t distance_short_code) {
+  return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
+}
+
+static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
+    const BrotliEncoderDictionary* dictionary, size_t len, size_t word_idx,
+    const uint8_t* data, size_t max_length, size_t max_backward,
+    size_t max_distance, HasherSearchResult* out) {
+  size_t offset;
+  size_t matchlen;
+  size_t backward;
+  score_t score;
+  offset = dictionary->words->offsets_by_length[len] + len * word_idx;
+  if (len > max_length) {
+    return BROTLI_FALSE;
+  }
+
+  matchlen =
+      FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
+  if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
+    return BROTLI_FALSE;
+  }
+  {
+    size_t cut = len - matchlen;
+    size_t transform_id = (cut << 2) +
+        (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
+    backward = max_backward + 1 + word_idx +
+        (transform_id << dictionary->words->size_bits_by_length[len]);
+  }
+  if (backward > max_distance) {
+    return BROTLI_FALSE;
+  }
+  score = BackwardReferenceScore(matchlen, backward);
+  if (score < out->score) {
+    return BROTLI_FALSE;
+  }
+  out->len = matchlen;
+  out->len_code_delta = (int)len - (int)matchlen;
+  out->distance = backward;
+  out->score = score;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void SearchInStaticDictionary(
+    const BrotliEncoderDictionary* dictionary,
+    HasherCommon* common, const uint8_t* data, size_t max_length,
+    size_t max_backward, size_t max_distance,
+    HasherSearchResult* out, BROTLI_BOOL shallow) {
+  size_t key;
+  size_t i;
+  if (common->dict_num_matches < (common->dict_num_lookups >> 7)) {
+    return;
+  }
+  key = Hash14(data) << 1;
+  for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
+    common->dict_num_lookups++;
+    if (dictionary->hash_table_lengths[key] != 0) {
+      BROTLI_BOOL item_matches = TestStaticDictionaryItem(
+          dictionary, dictionary->hash_table_lengths[key],
+          dictionary->hash_table_words[key], data,
+          max_length, max_backward, max_distance, out);
+      if (item_matches) {
+        common->dict_num_matches++;
+      }
+    }
+  }
+}
+
+typedef struct BackwardMatch {
+  uint32_t distance;
+  uint32_t length_and_code;
+} BackwardMatch;
+
+static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
+    size_t dist, size_t len) {
+  self->distance = (uint32_t)dist;
+  self->length_and_code = (uint32_t)(len << 5);
+}
+
+static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
+    size_t dist, size_t len, size_t len_code) {
+  self->distance = (uint32_t)dist;
+  self->length_and_code =
+      (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
+}
+
+static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
+  return self->length_and_code >> 5;
+}
+
+static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
+  size_t code = self->length_and_code & 31;
+  return code ? code : BackwardMatchLength(self);
+}
+
+#define EXPAND_CAT(a, b) CAT(a, b)
+#define CAT(a, b) a ## b
+#define FN(X) EXPAND_CAT(X, HASHER())
+
+#define HASHER() H10
+#define BUCKET_BITS 17
+#define MAX_TREE_SEARCH_DEPTH 64
+#define MAX_TREE_COMP_LENGTH 128
+#include "./hash_to_binary_tree_inc.h"  /* NOLINT(build/include) */
+#undef MAX_TREE_SEARCH_DEPTH
+#undef MAX_TREE_COMP_LENGTH
+#undef BUCKET_BITS
+#undef HASHER
+/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
+#define MAX_NUM_MATCHES_H10 128
+
+/* For BUCKET_SWEEP_BITS == 0, enabling the dictionary lookup makes compression
+   a little faster (0.5% - 1%) and it compresses 0.15% better on small text
+   and HTML inputs. */
+
+#define HASHER() H2
+#define BUCKET_BITS 16
+#define BUCKET_SWEEP_BITS 0
+#define HASH_LEN 5
+#define USE_DICTIONARY 1
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef BUCKET_SWEEP_BITS
+#undef USE_DICTIONARY
+#undef HASHER
+
+#define HASHER() H3
+#define BUCKET_SWEEP_BITS 1
+#define USE_DICTIONARY 0
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef BUCKET_SWEEP_BITS
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H4
+#define BUCKET_BITS 17
+#define BUCKET_SWEEP_BITS 2
+#define USE_DICTIONARY 1
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef HASH_LEN
+#undef BUCKET_SWEEP_BITS
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H5
+#include "./hash_longest_match_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+
+#define HASHER() H6
+#include "./hash_longest_match64_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+
+#define BUCKET_BITS 15
+
+#define NUM_LAST_DISTANCES_TO_CHECK 4
+#define NUM_BANKS 1
+#define BANK_BITS 16
+#define HASHER() H40
+#include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+
+#define NUM_LAST_DISTANCES_TO_CHECK 10
+#define HASHER() H41
+#include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef NUM_BANKS
+#undef BANK_BITS
+
+#define NUM_LAST_DISTANCES_TO_CHECK 16
+#define NUM_BANKS 512
+#define BANK_BITS 9
+#define HASHER() H42
+#include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef NUM_BANKS
+#undef BANK_BITS
+
+#undef BUCKET_BITS
+
+#define HASHER() H54
+#define BUCKET_BITS 20
+#define BUCKET_SWEEP_BITS 2
+#define HASH_LEN 7
+#define USE_DICTIONARY 0
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef HASH_LEN
+#undef BUCKET_SWEEP_BITS
+#undef BUCKET_BITS
+#undef HASHER
+
+/* fast large window hashers */
+
+#define HASHER() HROLLING_FAST
+#define CHUNKLEN 32
+#define JUMP 4
+#define NUMBUCKETS 16777216
+#define MASK ((NUMBUCKETS * 64) - 1)
+#include "./hash_rolling_inc.h"  /* NOLINT(build/include) */
+#undef JUMP
+#undef HASHER
+
+
+#define HASHER() HROLLING
+#define JUMP 1
+#include "./hash_rolling_inc.h"  /* NOLINT(build/include) */
+#undef MASK
+#undef NUMBUCKETS
+#undef JUMP
+#undef CHUNKLEN
+#undef HASHER
+
+#define HASHER() H35
+#define HASHER_A H3
+#define HASHER_B HROLLING_FAST
+#include "./hash_composite_inc.h"  /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H55
+#define HASHER_A H54
+#define HASHER_B HROLLING_FAST
+#include "./hash_composite_inc.h"  /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H65
+#define HASHER_A H6
+#define HASHER_B HROLLING
+#include "./hash_composite_inc.h"  /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#undef FN
+#undef CAT
+#undef EXPAND_CAT
+
+#define FOR_SIMPLE_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
+#define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65)
+#define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H)
+#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
+
+typedef struct {
+  HasherCommon common;
+
+  union {
+#define MEMBER_(N) \
+    H ## N _H ## N;
+    FOR_ALL_HASHERS(MEMBER_)
+#undef MEMBER_
+  } privat;
+} Hasher;
+
+/* MUST be invoked before any other method. */
+static BROTLI_INLINE void HasherInit(Hasher* hasher) {
+  hasher->common.extra = NULL;
+}
+
+static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) {
+  if (hasher->common.extra == NULL) return;
+  BROTLI_FREE(m, hasher->common.extra);
+}
+
+static BROTLI_INLINE void HasherReset(Hasher* hasher) {
+  hasher->common.is_prepared_ = BROTLI_FALSE;
+}
+
+static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
+    BROTLI_BOOL one_shot, const size_t input_size) {
+  switch (params->hasher.type) {
+#define SIZE_(N)                                                      \
+    case N:                                                           \
+      return HashMemAllocInBytesH ## N(params, one_shot, input_size);
+    FOR_ALL_HASHERS(SIZE_)
+#undef SIZE_
+    default:
+      break;
+  }
+  return 0;  /* Default case. */
+}
+
+static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
+    BrotliEncoderParams* params, const uint8_t* data, size_t position,
+    size_t input_size, BROTLI_BOOL is_last) {
+  BROTLI_BOOL one_shot = (position == 0 && is_last);
+  if (hasher->common.extra == NULL) {
+    size_t alloc_size;
+    ChooseHasher(params, &params->hasher);
+    alloc_size = HasherSize(params, one_shot, input_size);
+    hasher->common.extra = BROTLI_ALLOC(m, uint8_t, alloc_size);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra)) return;
+    hasher->common.params = params->hasher;
+    switch (hasher->common.params.type) {
+#define INITIALIZE_(N)                        \
+      case N:                                 \
+        InitializeH ## N(&hasher->common,     \
+            &hasher->privat._H ## N, params); \
+        break;
+      FOR_ALL_HASHERS(INITIALIZE_);
+#undef INITIALIZE_
+      default:
+        break;
+    }
+    HasherReset(hasher);
+  }
+
+  if (!hasher->common.is_prepared_) {
+    switch (hasher->common.params.type) {
+#define PREPARE_(N)                      \
+      case N:                            \
+        PrepareH ## N(                   \
+            &hasher->privat._H ## N,     \
+            one_shot, input_size, data); \
+        break;
+      FOR_ALL_HASHERS(PREPARE_)
+#undef PREPARE_
+      default: break;
+    }
+    if (position == 0) {
+      hasher->common.dict_num_lookups = 0;
+      hasher->common.dict_num_matches = 0;
+    }
+    hasher->common.is_prepared_ = BROTLI_TRUE;
+  }
+}
+
+static BROTLI_INLINE void InitOrStitchToPreviousBlock(
+    MemoryManager* m, Hasher* hasher, const uint8_t* data, size_t mask,
+    BrotliEncoderParams* params, size_t position, size_t input_size,
+    BROTLI_BOOL is_last) {
+  HasherSetup(m, hasher, params, data, position, input_size, is_last);
+  if (BROTLI_IS_OOM(m)) return;
+  switch (hasher->common.params.type) {
+#define INIT_(N)                             \
+    case N:                                  \
+      StitchToPreviousBlockH ## N(           \
+          &hasher->privat._H ## N,           \
+          input_size, position, data, mask); \
+    break;
+    FOR_ALL_HASHERS(INIT_)
+#undef INIT_
+    default: break;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_HASH_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_composite_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_composite_inc.h
new file mode 100755
index 0000000000..cba156c0e2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_composite_inc.h
@@ -0,0 +1,125 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, HASHER_A, HASHER_B */
+
+/* Composite hasher: This hasher allows to combine two other hashers, HASHER_A
+   and HASHER_B. */
+
+#define HashComposite HASHER()
+
+#define FN_A(X) EXPAND_CAT(X, HASHER_A)
+#define FN_B(X) EXPAND_CAT(X, HASHER_B)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) {
+  size_t a =  FN_A(HashTypeLength)();
+  size_t b =  FN_B(HashTypeLength)();
+  return a > b ? a : b;
+}
+
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
+  size_t a =  FN_A(StoreLookahead)();
+  size_t b =  FN_B(StoreLookahead)();
+  return a > b ? a : b;
+}
+
+typedef struct HashComposite {
+  HASHER_A ha;
+  HASHER_B hb;
+  HasherCommon hb_common;
+
+  /* Shortcuts. */
+  void* extra;
+  HasherCommon* common;
+
+  BROTLI_BOOL fresh;
+  const BrotliEncoderParams* params;
+} HashComposite;
+
+static void FN(Initialize)(HasherCommon* common,
+    HashComposite* BROTLI_RESTRICT self, const BrotliEncoderParams* params) {
+  self->common = common;
+  self->extra = common->extra;
+
+  self->hb_common = *self->common;
+  self->fresh = BROTLI_TRUE;
+  self->params = params;
+  /* TODO: Initialize of the hashers is defered to Prepare (and params
+     remembered here) because we don't get the one_shot and input_size params
+     here that are needed to know the memory size of them. Instead provide
+     those params to all hashers FN(Initialize) */
+}
+
+static void FN(Prepare)(
+    HashComposite* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  if (self->fresh) {
+    self->fresh = BROTLI_FALSE;
+    self->hb_common.extra = (uint8_t*)self->extra +
+        FN_A(HashMemAllocInBytes)(self->params, one_shot, input_size);
+
+    FN_A(Initialize)(self->common, &self->ha, self->params);
+    FN_B(Initialize)(&self->hb_common, &self->hb, self->params);
+  }
+  FN_A(Prepare)(&self->ha, one_shot, input_size, data);
+  FN_B(Prepare)(&self->hb, one_shot, input_size, data);
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  return FN_A(HashMemAllocInBytes)(params, one_shot, input_size) +
+      FN_B(HashMemAllocInBytes)(params, one_shot, input_size);
+}
+
+static BROTLI_INLINE void FN(Store)(HashComposite* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  FN_A(Store)(&self->ha, data, mask, ix);
+  FN_B(Store)(&self->hb, data, mask, ix);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(
+    HashComposite* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  FN_A(StoreRange)(&self->ha, data, mask, ix_start, ix_end);
+  FN_B(StoreRange)(&self->hb, data, mask, ix_start, ix_end);
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashComposite* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ring_buffer_mask) {
+  FN_A(StitchToPreviousBlock)(&self->ha, num_bytes, position,
+      ringbuffer, ring_buffer_mask);
+  FN_B(StitchToPreviousBlock)(&self->hb, num_bytes, position,
+      ringbuffer, ring_buffer_mask);
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashComposite* BROTLI_RESTRICT self, int* BROTLI_RESTRICT distance_cache) {
+  FN_A(PrepareDistanceCache)(&self->ha, distance_cache);
+  FN_B(PrepareDistanceCache)(&self->hb, distance_cache);
+}
+
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashComposite* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  FN_A(FindLongestMatch)(&self->ha, dictionary, data, ring_buffer_mask,
+      distance_cache, cur_ix, max_length, max_backward, dictionary_distance,
+      max_distance, out);
+  FN_B(FindLongestMatch)(&self->hb, dictionary, data, ring_buffer_mask,
+      distance_cache, cur_ix, max_length, max_backward, dictionary_distance,
+      max_distance, out);
+}
+
+#undef HashComposite
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_forgetful_chain_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_forgetful_chain_inc.h
new file mode 100644
index 0000000000..bfae6ba6a2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_forgetful_chain_inc.h
@@ -0,0 +1,293 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
+                        NUM_LAST_DISTANCES_TO_CHECK */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   Hashes are stored in chains which are bucketed to groups. Group of chains
+   share a storage "bank". When more than "bank size" chain nodes are added,
+   oldest nodes are replaced; this way several chains may share a tail. */
+
+#define HashForgetfulChain HASHER()
+
+#define BANK_SIZE (1 << BANK_BITS)
+
+/* Number of hash buckets. */
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+#define CAPPED_CHAINS 0
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* HashBytes is the function that chooses the bucket to place the address in.*/
+static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
+  const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - BUCKET_BITS);
+}
+
+typedef struct FN(Slot) {
+  uint16_t delta;
+  uint16_t next;
+} FN(Slot);
+
+typedef struct FN(Bank) {
+  FN(Slot) slots[BANK_SIZE];
+} FN(Bank);
+
+typedef struct HashForgetfulChain {
+  uint16_t free_slot_idx[NUM_BANKS];  /* Up to 1KiB. Move to dynamic? */
+  size_t max_hops;
+
+  /* Shortcuts. */
+  void* extra;
+  HasherCommon* common;
+
+  /* --- Dynamic size members --- */
+
+  /* uint32_t addr[BUCKET_SIZE]; */
+
+  /* uint16_t head[BUCKET_SIZE]; */
+
+  /* Truncated hash used for quick rejection of "distance cache" candidates. */
+  /* uint8_t tiny_hash[65536];*/
+
+  /* FN(Bank) banks[NUM_BANKS]; */
+} HashForgetfulChain;
+
+static uint32_t* FN(Addr)(void* extra) {
+  return (uint32_t*)extra;
+}
+
+static uint16_t* FN(Head)(void* extra) {
+  return (uint16_t*)(&FN(Addr)(extra)[BUCKET_SIZE]);
+}
+
+static uint8_t* FN(TinyHash)(void* extra) {
+  return (uint8_t*)(&FN(Head)(extra)[BUCKET_SIZE]);
+}
+
+static FN(Bank)* FN(Banks)(void* extra) {
+  return (FN(Bank)*)(&FN(TinyHash)(extra)[65536]);
+}
+
+static void FN(Initialize)(
+    HasherCommon* common, HashForgetfulChain* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->common = common;
+  self->extra = common->extra;
+
+  self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
+}
+
+static void FN(Prepare)(
+    HashForgetfulChain* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
+  uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
+  uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      size_t bucket = FN(HashBytes)(&data[i]);
+      /* See InitEmpty comment. */
+      addr[bucket] = 0xCCCCCCCC;
+      head[bucket] = 0xCCCC;
+    }
+  } else {
+    /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
+       processed by hasher never reaches 3GB + 64M; this makes all new chains
+       to be terminated after the first node. */
+    memset(addr, 0xCC, sizeof(uint32_t) * BUCKET_SIZE);
+    memset(head, 0, sizeof(uint16_t) * BUCKET_SIZE);
+  }
+  memset(tiny_hash, 0, sizeof(uint8_t) * 65536);
+  memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  BROTLI_UNUSED(params);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  return sizeof(uint32_t) * BUCKET_SIZE + sizeof(uint16_t) * BUCKET_SIZE +
+         sizeof(uint8_t) * 65536 + sizeof(FN(Bank)) * NUM_BANKS;
+}
+
+/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
+   node to corresponding chain; also update tiny_hash for current position. */
+static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
+  uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
+  uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
+  FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
+  const size_t key = FN(HashBytes)(&data[ix & mask]);
+  const size_t bank = key & (NUM_BANKS - 1);
+  const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
+  size_t delta = ix - addr[key];
+  tiny_hash[(uint16_t)ix] = (uint8_t)key;
+  if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
+  banks[bank].slots[idx].delta = (uint16_t)delta;
+  banks[bank].slots[idx].next = head[key];
+  addr[key] = (uint32_t)ix;
+  head[key] = (uint16_t)idx;
+}
+
+static BROTLI_INLINE void FN(StoreRange)(
+    HashForgetfulChain* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashForgetfulChain* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ring_buffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashForgetfulChain* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  BROTLI_UNUSED(self);
+  PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashForgetfulChain* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache,
+    const size_t cur_ix, const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
+  uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
+  uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra);
+  FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* Don't accept a short copy from far away. */
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  const uint8_t tiny_hash = (uint8_t)(key);
+  out->len = 0;
+  out->len_code_delta = 0;
+  /* Try last distance first. */
+  for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
+    const size_t backward = (size_t)distance_cache[i];
+    size_t prev_ix = (cur_ix - backward);
+    /* For distance code 0 we want to consider 2-byte matches. */
+    if (i > 0 && tiny_hashes[(uint16_t)prev_ix] != tiny_hash) continue;
+    if (prev_ix >= cur_ix || backward > max_backward) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 2) {
+        score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+  }
+  {
+    const size_t bank = key & (NUM_BANKS - 1);
+    size_t backward = 0;
+    size_t hops = self->max_hops;
+    size_t delta = cur_ix - addr[key];
+    size_t slot = head[key];
+    while (hops--) {
+      size_t prev_ix;
+      size_t last = slot;
+      backward += delta;
+      if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
+      prev_ix = (cur_ix - backward) & ring_buffer_mask;
+      slot = banks[bank].slots[last].next;
+      delta = banks[bank].slots[last].delta;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          score_t score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+    FN(Store)(self, data, ring_buffer_mask, cur_ix);
+  }
+  if (out->score == min_score) {
+    SearchInStaticDictionary(dictionary,
+        self->common, &data[cur_ix_masked], max_length, dictionary_distance,
+        max_distance, out, BROTLI_FALSE);
+  }
+}
+
+#undef BANK_SIZE
+#undef BUCKET_SIZE
+#undef CAPPED_CHAINS
+
+#undef HashForgetfulChain
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_longest_match64_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_longest_match64_inc.h
new file mode 100644
index 0000000000..bdee7e41b5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_longest_match64_inc.h
@@ -0,0 +1,267 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (bucket_size_) to a ring buffer of
+   fixed size (block_size_). The ring buffer contains the last block_size_
+   index positions of the given hash key in the compressed data. */
+
+#define HashLongestMatch HASHER()
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
+
+/* HashBytes is the function that chooses the bucket to place the address in. */
+static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
+                                            const uint64_t mask,
+                                            const int shift) {
+  const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> shift);
+}
+
+typedef struct HashLongestMatch {
+  /* Number of hash buckets. */
+  size_t bucket_size_;
+  /* Only block_size_ newest backward references are kept,
+     and the older are forgotten. */
+  size_t block_size_;
+  /* Left-shift for computing hash bucket index from hash value. */
+  int hash_shift_;
+  /* Mask for selecting the next 4-8 bytes of input */
+  uint64_t hash_mask_;
+  /* Mask for accessing entries in a block (in a ring-buffer manner). */
+  uint32_t block_mask_;
+
+  int block_bits_;
+  int num_last_distances_to_check_;
+
+  /* Shortcuts. */
+  HasherCommon* common_;
+
+  /* --- Dynamic size members --- */
+
+  /* Number of entries in a particular bucket. */
+  uint16_t* num_;  /* uint16_t[bucket_size]; */
+
+  /* Buckets containing block_size_ of backward references. */
+  uint32_t* buckets_;  /* uint32_t[bucket_size * block_size]; */
+} HashLongestMatch;
+
+static void FN(Initialize)(
+    HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->common_ = common;
+
+  BROTLI_UNUSED(params);
+  self->hash_shift_ = 64 - common->params.bucket_bits;
+  self->hash_mask_ = (~((uint64_t)0U)) >> (64 - 8 * common->params.hash_len);
+  self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
+  self->block_bits_ = common->params.block_bits;
+  self->block_size_ = (size_t)1 << common->params.block_bits;
+  self->block_mask_ = (uint32_t)(self->block_size_ - 1);
+  self->num_last_distances_to_check_ =
+      common->params.num_last_distances_to_check;
+  self->num_ = (uint16_t*)common->extra;
+  self->buckets_ = (uint32_t*)&self->num_[self->bucket_size_];
+}
+
+static void FN(Prepare)(
+    HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = self->bucket_size_ >> 6;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      const uint32_t key = FN(HashBytes)(&data[i], self->hash_mask_,
+                                         self->hash_shift_);
+      num[key] = 0;
+    }
+  } else {
+    memset(num, 0, self->bucket_size_ * sizeof(num[0]));
+  }
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
+  size_t block_size = (size_t)1 << params->hasher.block_bits;
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  return sizeof(uint16_t) * bucket_size +
+         sizeof(uint32_t) * bucket_size * block_size;
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+static BROTLI_INLINE void FN(Store)(
+    HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t mask, const size_t ix) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_mask_,
+                                     self->hash_shift_);
+  const size_t minor_ix = num[key] & self->block_mask_;
+  const size_t offset = minor_ix + (key << self->block_bits_);
+  buckets[offset] = (uint32_t)ix;
+  ++num[key];
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* Don't accept a short copy from far away. */
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  out->len = 0;
+  out->len_code_delta = 0;
+  /* Try last distance first. */
+  for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
+    const size_t backward = (size_t)distance_cache[i];
+    size_t prev_ix = (size_t)(cur_ix - backward);
+    if (prev_ix >= cur_ix) {
+      continue;
+    }
+    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        prev_ix + best_len > ring_buffer_mask ||
+        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+      continue;
+    }
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        /* Comparing for >= 2 does not change the semantics, but just saves for
+           a few unnecessary binary logarithms in backward reference score,
+           since we are not interested in such short matches. */
+        score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+  }
+  {
+    const uint32_t key = FN(HashBytes)(
+        &data[cur_ix_masked], self->hash_mask_, self->hash_shift_);
+    uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
+    const size_t down =
+        (num[key] > self->block_size_) ?
+        (num[key] - self->block_size_) : 0u;
+    for (i = num[key]; i > down;) {
+      size_t prev_ix = bucket[--i & self->block_mask_];
+      const size_t backward = cur_ix - prev_ix;
+      if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          score_t score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+    bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
+    ++num[key];
+  }
+  if (min_score == out->score) {
+    SearchInStaticDictionary(dictionary,
+        self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
+        max_distance, out, BROTLI_FALSE);
+  }
+}
+
+#undef HashLongestMatch
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_longest_match_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_longest_match_inc.h
new file mode 100644
index 0000000000..27f4463d7f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_longest_match_inc.h
@@ -0,0 +1,262 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (bucket_size_) to a ring buffer of
+   fixed size (block_size_). The ring buffer contains the last block_size_
+   index positions of the given hash key in the compressed data. */
+
+#define HashLongestMatch HASHER()
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* HashBytes is the function that chooses the bucket to place the address in. */
+static uint32_t FN(HashBytes)(
+    const uint8_t* BROTLI_RESTRICT data, const int shift) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> shift);
+}
+
+typedef struct HashLongestMatch {
+  /* Number of hash buckets. */
+  size_t bucket_size_;
+  /* Only block_size_ newest backward references are kept,
+     and the older are forgotten. */
+  size_t block_size_;
+  /* Left-shift for computing hash bucket index from hash value. */
+  int hash_shift_;
+  /* Mask for accessing entries in a block (in a ring-buffer manner). */
+  uint32_t block_mask_;
+
+  int block_bits_;
+  int num_last_distances_to_check_;
+
+  /* Shortcuts. */
+  HasherCommon* common_;
+
+  /* --- Dynamic size members --- */
+
+  /* Number of entries in a particular bucket. */
+  uint16_t* num_;  /* uint16_t[bucket_size]; */
+
+  /* Buckets containing block_size_ of backward references. */
+  uint32_t* buckets_;  /* uint32_t[bucket_size * block_size]; */
+} HashLongestMatch;
+
+static BROTLI_INLINE uint16_t* FN(Num)(void* extra) {
+  return (uint16_t*)extra;
+}
+
+static void FN(Initialize)(
+    HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->common_ = common;
+
+  BROTLI_UNUSED(params);
+  self->hash_shift_ = 32 - common->params.bucket_bits;
+  self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
+  self->block_size_ = (size_t)1 << common->params.block_bits;
+  self->block_mask_ = (uint32_t)(self->block_size_ - 1);
+  self->num_ = (uint16_t*)common->extra;
+  self->buckets_ = (uint32_t*)(&self->num_[self->bucket_size_]);
+  self->block_bits_ = common->params.block_bits;
+  self->num_last_distances_to_check_ =
+      common->params.num_last_distances_to_check;
+}
+
+static void FN(Prepare)(
+    HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = self->bucket_size_ >> 6;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
+      num[key] = 0;
+    }
+  } else {
+    memset(num, 0, self->bucket_size_ * sizeof(num[0]));
+  }
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
+  size_t block_size = (size_t)1 << params->hasher.block_bits;
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  return sizeof(uint16_t) * bucket_size +
+         sizeof(uint32_t) * bucket_size * block_size;
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+static BROTLI_INLINE void FN(Store)(
+    HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t mask, const size_t ix) {
+  const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
+  const size_t minor_ix = self->num_[key] & self->block_mask_;
+  const size_t offset = minor_ix + (key << self->block_bits_);
+  self->buckets_[offset] = (uint32_t)ix;
+  ++self->num_[key];
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashLongestMatch* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  uint16_t* BROTLI_RESTRICT num = self->num_;
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* Don't accept a short copy from far away. */
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  out->len = 0;
+  out->len_code_delta = 0;
+  /* Try last distance first. */
+  for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
+    const size_t backward = (size_t)distance_cache[i];
+    size_t prev_ix = (size_t)(cur_ix - backward);
+    if (prev_ix >= cur_ix) {
+      continue;
+    }
+    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        prev_ix + best_len > ring_buffer_mask ||
+        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+      continue;
+    }
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        /* Comparing for >= 2 does not change the semantics, but just saves for
+           a few unnecessary binary logarithms in backward reference score,
+           since we are not interested in such short matches. */
+        score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+  }
+  {
+    const uint32_t key =
+        FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
+    uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
+    const size_t down =
+        (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
+    for (i = num[key]; i > down;) {
+      size_t prev_ix = bucket[--i & self->block_mask_];
+      const size_t backward = cur_ix - prev_ix;
+      if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          score_t score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+    bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
+    ++num[key];
+  }
+  if (min_score == out->score) {
+    SearchInStaticDictionary(dictionary,
+        self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
+        max_distance, out, BROTLI_FALSE);
+  }
+}
+
+#undef HashLongestMatch
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_longest_match_quickly_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_longest_match_quickly_inc.h
new file mode 100644
index 0000000000..e5ba840ab9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_longest_match_quickly_inc.h
@@ -0,0 +1,266 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP_BITS, HASH_LEN,
+                        USE_DICTIONARY
+ */
+
+#define HashLongestMatchQuickly HASHER()
+
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+#define BUCKET_MASK (BUCKET_SIZE - 1)
+#define BUCKET_SWEEP (1 << BUCKET_SWEEP_BITS)
+#define BUCKET_SWEEP_MASK ((BUCKET_SWEEP - 1) << 3)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
+
+/* HashBytes is the function that chooses the bucket to place
+   the address in. The HashLongestMatch and HashLongestMatchQuickly
+   classes have separate, different implementations of hashing. */
+static uint32_t FN(HashBytes)(const uint8_t* data) {
+  const uint64_t h = ((BROTLI_UNALIGNED_LOAD64LE(data) << (64 - 8 * HASH_LEN)) *
+                      kHashMul64);
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> (64 - BUCKET_BITS));
+}
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (BUCKET_SIZE). */
+typedef struct HashLongestMatchQuickly {
+  /* Shortcuts. */
+  HasherCommon* common;
+
+  /* --- Dynamic size members --- */
+
+  uint32_t* buckets_;  /* uint32_t[BUCKET_SIZE]; */
+} HashLongestMatchQuickly;
+
+static void FN(Initialize)(
+    HasherCommon* common, HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->common = common;
+
+  BROTLI_UNUSED(params);
+  self->buckets_ = (uint32_t*)common->extra;
+}
+
+static void FN(Prepare)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = BUCKET_SIZE >> 5;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      const uint32_t key = FN(HashBytes)(&data[i]);
+      if (BUCKET_SWEEP == 1) {
+        buckets[key] = 0;
+      } else {
+        uint32_t j;
+        for (j = 0; j < BUCKET_SWEEP; ++j) {
+          buckets[(key + (j << 3)) & BUCKET_MASK] = 0;
+        }
+      }
+    }
+  } else {
+    /* It is not strictly necessary to fill this buffer here, but
+       not filling will make the results of the compression stochastic
+       (but correct). This is because random data would cause the
+       system to find accidentally good backward references here and there. */
+    memset(buckets, 0, sizeof(uint32_t) * BUCKET_SIZE);
+  }
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  BROTLI_UNUSED(params);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  return sizeof(uint32_t) * BUCKET_SIZE;
+}
+
+/* Look at 5 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value somewhere within
+   [ix .. ix+3]. */
+static BROTLI_INLINE void FN(Store)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  const uint32_t key = FN(HashBytes)(&data[ix & mask]);
+  if (BUCKET_SWEEP == 1) {
+    self->buckets_[key] = (uint32_t)ix;
+  } else {
+    /* Wiggle the value with the bucket sweep range. */
+    const uint32_t off = ix & BUCKET_SWEEP_MASK;
+    self->buckets_[(key + off) & BUCKET_MASK] = (uint32_t)ix;
+  }
+}
+
+static BROTLI_INLINE void FN(StoreRange)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  BROTLI_UNUSED(self);
+  BROTLI_UNUSED(distance_cache);
+}
+
+/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
+   up to the length of max_length and stores the position cur_ix in the
+   hash table.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashLongestMatchQuickly* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data,
+    const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
+    const size_t cur_ix, const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  const size_t best_len_in = out->len;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  int compare_char = data[cur_ix_masked + best_len_in];
+  size_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  size_t key_out;
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = best_len_in;
+  size_t cached_backward = (size_t)distance_cache[0];
+  size_t prev_ix = cur_ix - cached_backward;
+  out->len_code_delta = 0;
+  if (prev_ix < cur_ix) {
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char == data[prev_ix + best_len]) {
+      const size_t len = FindMatchLengthWithLimit(
+          &data[prev_ix], &data[cur_ix_masked], max_length);
+      if (len >= 4) {
+        const score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          out->len = len;
+          out->distance = cached_backward;
+          out->score = score;
+          if (BUCKET_SWEEP == 1) {
+            buckets[key] = (uint32_t)cur_ix;
+            return;
+          } else {
+            best_len = len;
+            best_score = score;
+            compare_char = data[cur_ix_masked + len];
+          }
+        }
+      }
+    }
+  }
+  if (BUCKET_SWEEP == 1) {
+    size_t backward;
+    size_t len;
+    /* Only one to look for, don't bother to prepare for a loop. */
+    prev_ix = buckets[key];
+    buckets[key] = (uint32_t)cur_ix;
+    backward = cur_ix - prev_ix;
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char != data[prev_ix + best_len_in]) {
+      return;
+    }
+    if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+      return;
+    }
+    len = FindMatchLengthWithLimit(&data[prev_ix],
+                                   &data[cur_ix_masked],
+                                   max_length);
+    if (len >= 4) {
+      const score_t score = BackwardReferenceScore(len, backward);
+      if (best_score < score) {
+        out->len = len;
+        out->distance = backward;
+        out->score = score;
+        return;
+      }
+    }
+  } else {
+    size_t keys[BUCKET_SWEEP];
+    size_t i;
+    for (i = 0; i < BUCKET_SWEEP; ++i) {
+      keys[i] = (key + (i << 3)) & BUCKET_MASK;
+    }
+    key_out = keys[(cur_ix & BUCKET_SWEEP_MASK) >> 3];
+    for (i = 0; i < BUCKET_SWEEP; ++i) {
+      size_t len;
+      size_t backward;
+      prev_ix = buckets[keys[i]];
+      backward = cur_ix - prev_ix;
+      prev_ix &= (uint32_t)ring_buffer_mask;
+      if (compare_char != data[prev_ix + best_len]) {
+        continue;
+      }
+      if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        continue;
+      }
+      len = FindMatchLengthWithLimit(&data[prev_ix],
+                                     &data[cur_ix_masked],
+                                     max_length);
+      if (len >= 4) {
+        const score_t score = BackwardReferenceScore(len, backward);
+        if (best_score < score) {
+          best_len = len;
+          out->len = len;
+          compare_char = data[cur_ix_masked + len];
+          best_score = score;
+          out->score = score;
+          out->distance = backward;
+        }
+      }
+    }
+  }
+  if (USE_DICTIONARY && min_score == out->score) {
+    SearchInStaticDictionary(dictionary,
+        self->common, &data[cur_ix_masked], max_length, dictionary_distance,
+        max_distance, out, BROTLI_TRUE);
+  }
+  if (BUCKET_SWEEP != 1) {
+    buckets[key_out] = (uint32_t)cur_ix;
+  }
+}
+
+#undef BUCKET_SWEEP_MASK
+#undef BUCKET_SWEEP
+#undef BUCKET_MASK
+#undef BUCKET_SIZE
+
+#undef HashLongestMatchQuickly
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_rolling_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_rolling_inc.h
new file mode 100755
index 0000000000..586ae73859
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_rolling_inc.h
@@ -0,0 +1,212 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
+/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
+/* JUMP = skip bytes for speedup */
+
+/* Rolling hash for long distance long string matches. Stores one position
+   per bucket, bucket key is computed over a long region. */
+
+#define HashRolling HASHER()
+
+static const uint32_t FN(kRollingHashMul32) = 69069;
+static const uint32_t FN(kInvalidPos) = 0xffffffff;
+
+/* This hasher uses a longer forward length, but returning a higher value here
+   will hurt compression by the main hasher when combined with a composite
+   hasher. The hasher tests for forward itself instead. */
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* Computes a code from a single byte. A lookup table of 256 values could be
+   used, but simply adding 1 works about as good. */
+static uint32_t FN(HashByte)(uint8_t byte) {
+  return (uint32_t)byte + 1u;
+}
+
+static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
+                                               uint32_t factor) {
+  return (uint32_t)(factor * state + FN(HashByte)(add));
+}
+
+static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
+                                        uint8_t rem, uint32_t factor,
+                                        uint32_t factor_remove) {
+  return (uint32_t)(factor * state +
+      FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
+}
+
+typedef struct HashRolling {
+  uint32_t state;
+  uint32_t* table;
+  size_t next_ix;
+
+  uint32_t chunk_len;
+  uint32_t factor;
+  uint32_t factor_remove;
+} HashRolling;
+
+static void FN(Initialize)(
+    HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  size_t i;
+  self->state = 0;
+  self->next_ix = 0;
+
+  self->factor = FN(kRollingHashMul32);
+
+  /* Compute the factor of the oldest byte to remove: factor**steps modulo
+     0xffffffff (the multiplications rely on 32-bit overflow) */
+  self->factor_remove = 1;
+  for (i = 0; i < CHUNKLEN; i += JUMP) {
+    self->factor_remove *= self->factor;
+  }
+
+  self->table = (uint32_t*)common->extra;
+  for (i = 0; i < NUMBUCKETS; i++) {
+    self->table[i] = FN(kInvalidPos);
+  }
+
+  BROTLI_UNUSED(params);
+}
+
+static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  size_t i;
+  /* Too small size, cannot use this hasher. */
+  if (input_size < CHUNKLEN) return;
+  self->state = 0;
+  for (i = 0; i < CHUNKLEN; i += JUMP) {
+    self->state = FN(HashRollingFunctionInitial)(
+        self->state, data[i], self->factor);
+  }
+  BROTLI_UNUSED(one_shot);
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  return NUMBUCKETS * sizeof(uint32_t);
+  BROTLI_UNUSED(params);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+}
+
+static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  BROTLI_UNUSED(self);
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(mask);
+  BROTLI_UNUSED(ix);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  BROTLI_UNUSED(self);
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(mask);
+  BROTLI_UNUSED(ix_start);
+  BROTLI_UNUSED(ix_end);
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashRolling* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ring_buffer_mask) {
+  /* In this case we must re-initialize the hasher from scratch from the
+     current position. */
+  size_t position_masked;
+  size_t available = num_bytes;
+  if ((position & (JUMP - 1)) != 0) {
+    size_t diff = JUMP - (position & (JUMP - 1));
+    available = (diff > available) ? 0 : (available - diff);
+    position += diff;
+  }
+  position_masked = position & ring_buffer_mask;
+  /* wrapping around ringbuffer not handled. */
+  if (available > ring_buffer_mask - position_masked) {
+    available = ring_buffer_mask - position_masked;
+  }
+
+  FN(Prepare)(self, BROTLI_FALSE, available,
+      ringbuffer + (position & ring_buffer_mask));
+  self->next_ix = position;
+  BROTLI_UNUSED(num_bytes);
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HashRolling* BROTLI_RESTRICT self,
+    int* BROTLI_RESTRICT distance_cache) {
+  BROTLI_UNUSED(self);
+  BROTLI_UNUSED(distance_cache);
+}
+
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HashRolling* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  size_t pos;
+
+  if ((cur_ix & (JUMP - 1)) != 0) return;
+
+  /* Not enough lookahead */
+  if (max_length < CHUNKLEN) return;
+
+  for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
+    uint32_t code = self->state & MASK;
+
+    uint8_t rem = data[pos & ring_buffer_mask];
+    uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
+    size_t found_ix = FN(kInvalidPos);
+
+    self->state = FN(HashRollingFunction)(
+        self->state, add, rem, self->factor, self->factor_remove);
+
+    if (code < NUMBUCKETS) {
+      found_ix = self->table[code];
+      self->table[code] = (uint32_t)pos;
+      if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
+        /* The cast to 32-bit makes backward distances up to 4GB work even
+           if cur_ix is above 4GB, despite using 32-bit values in the table. */
+        size_t backward = (uint32_t)(cur_ix - found_ix);
+        if (backward <= max_backward) {
+          const size_t found_ix_masked = found_ix & ring_buffer_mask;
+          const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
+                                                      &data[cur_ix_masked],
+                                                      max_length);
+          if (len >= 4 && len > out->len) {
+            score_t score = BackwardReferenceScore(len, backward);
+            if (score > out->score) {
+              out->len = len;
+              out->distance = backward;
+              out->score = score;
+              out->len_code_delta = 0;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  self->next_ix = cur_ix + JUMP;
+
+  /* NOTE: this hasher does not search in the dictionary. It is used as
+     backup-hasher, the main hasher already searches in it. */
+  BROTLI_UNUSED(dictionary);
+  BROTLI_UNUSED(distance_cache);
+  BROTLI_UNUSED(dictionary_distance);
+  BROTLI_UNUSED(max_distance);
+}
+
+#undef HashRolling
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_to_binary_tree_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_to_binary_tree_inc.h
new file mode 100644
index 0000000000..9880e0aef6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/hash_to_binary_tree_inc.h
@@ -0,0 +1,329 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, MAX_TREE_COMP_LENGTH,
+                        MAX_TREE_SEARCH_DEPTH */
+
+/* A (forgetful) hash table where each hash bucket contains a binary tree of
+   sequences whose first 4 bytes share the same hash code.
+   Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
+   position in the input data. The binary tree is sorted by the lexicographic
+   order of the sequences, and it is also a max-heap with respect to the
+   starting positions. */
+
+#define HashToBinaryTree HASHER()
+
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
+  return MAX_TREE_COMP_LENGTH;
+}
+
+static uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - BUCKET_BITS);
+}
+
+typedef struct HashToBinaryTree {
+  /* The window size minus 1 */
+  size_t window_mask_;
+
+  /* Hash table that maps the 4-byte hashes of the sequence to the last
+     position where this hash was found, which is the root of the binary
+     tree of sequences that share this hash bucket. */
+  uint32_t* buckets_;  /* uint32_t[BUCKET_SIZE]; */
+
+  /* A position used to mark a non-existent sequence, i.e. a tree is empty if
+     its root is at invalid_pos_ and a node is a leaf if both its children
+     are at invalid_pos_. */
+  uint32_t invalid_pos_;
+
+  /* --- Dynamic size members --- */
+
+  /* The union of the binary trees of each hash bucket. The root of the tree
+     corresponding to a hash is a sequence starting at buckets_[hash] and
+     the left and right children of a sequence starting at pos are
+     forest_[2 * pos] and forest_[2 * pos + 1]. */
+  uint32_t* forest_;  /* uint32_t[2 * num_nodes] */
+} HashToBinaryTree;
+
+static void FN(Initialize)(
+    HasherCommon* common, HashToBinaryTree* BROTLI_RESTRICT self,
+    const BrotliEncoderParams* params) {
+  self->buckets_ = (uint32_t*)common->extra;
+  self->forest_ = &self->buckets_[BUCKET_SIZE];
+
+  self->window_mask_ = (1u << params->lgwin) - 1u;
+  self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
+}
+
+static void FN(Prepare)
+    (HashToBinaryTree* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
+  uint32_t invalid_pos = self->invalid_pos_;
+  uint32_t i;
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  for (i = 0; i < BUCKET_SIZE; i++) {
+    buckets[i] = invalid_pos;
+  }
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  size_t num_nodes = (size_t)1 << params->lgwin;
+  if (one_shot && input_size < num_nodes) {
+    num_nodes = input_size;
+  }
+  return sizeof(uint32_t) * BUCKET_SIZE + 2 * sizeof(uint32_t) * num_nodes;
+}
+
+static BROTLI_INLINE size_t FN(LeftChildIndex)(
+    HashToBinaryTree* BROTLI_RESTRICT self,
+    const size_t pos) {
+  return 2 * (pos & self->window_mask_);
+}
+
+static BROTLI_INLINE size_t FN(RightChildIndex)(
+    HashToBinaryTree* BROTLI_RESTRICT self,
+    const size_t pos) {
+  return 2 * (pos & self->window_mask_) + 1;
+}
+
+/* Stores the hash of the next 4 bytes and in a single tree-traversal, the
+   hash bucket's binary tree is searched for matches and is re-rooted at the
+   current position.
+
+   If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
+   current position is searched for matches, but the state of the hash table
+   is not changed, since we can not know the final sorting order of the
+   current (incomplete) sequence.
+
+   This function must be called with increasing cur_ix positions. */
+static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
+    HashToBinaryTree* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
+    const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
+    const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
+    BackwardMatch* BROTLI_RESTRICT matches) {
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  const size_t max_comp_len =
+      BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
+  const BROTLI_BOOL should_reroot_tree =
+      TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
+  const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
+  uint32_t* BROTLI_RESTRICT forest = self->forest_;
+  size_t prev_ix = buckets[key];
+  /* The forest index of the rightmost node of the left subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t node_left = FN(LeftChildIndex)(self, cur_ix);
+  /* The forest index of the leftmost node of the right subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t node_right = FN(RightChildIndex)(self, cur_ix);
+  /* The match length of the rightmost node of the left subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t best_len_left = 0;
+  /* The match length of the leftmost node of the right subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t best_len_right = 0;
+  size_t depth_remaining;
+  if (should_reroot_tree) {
+    buckets[key] = (uint32_t)cur_ix;
+  }
+  for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
+    const size_t backward = cur_ix - prev_ix;
+    const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
+    if (backward == 0 || backward > max_backward || depth_remaining == 0) {
+      if (should_reroot_tree) {
+        forest[node_left] = self->invalid_pos_;
+        forest[node_right] = self->invalid_pos_;
+      }
+      break;
+    }
+    {
+      const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
+      size_t len;
+      BROTLI_DCHECK(cur_len <= MAX_TREE_COMP_LENGTH);
+      len = cur_len +
+          FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
+                                   &data[prev_ix_masked + cur_len],
+                                   max_length - cur_len);
+      BROTLI_DCHECK(
+          0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
+      if (matches && len > *best_len) {
+        *best_len = len;
+        InitBackwardMatch(matches++, backward, len);
+      }
+      if (len >= max_comp_len) {
+        if (should_reroot_tree) {
+          forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];
+          forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];
+        }
+        break;
+      }
+      if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
+        best_len_left = len;
+        if (should_reroot_tree) {
+          forest[node_left] = (uint32_t)prev_ix;
+        }
+        node_left = FN(RightChildIndex)(self, prev_ix);
+        prev_ix = forest[node_left];
+      } else {
+        best_len_right = len;
+        if (should_reroot_tree) {
+          forest[node_right] = (uint32_t)prev_ix;
+        }
+        node_right = FN(LeftChildIndex)(self, prev_ix);
+        prev_ix = forest[node_right];
+      }
+    }
+  }
+  return matches;
+}
+
+/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
+   length of max_length and stores the position cur_ix in the hash table.
+
+   Sets *num_matches to the number of matches found, and stores the found
+   matches in matches[0] to matches[*num_matches - 1]. The matches will be
+   sorted by strictly increasing length and (non-strictly) increasing
+   distance. */
+static BROTLI_INLINE size_t FN(FindAllMatches)(
+    HashToBinaryTree* BROTLI_RESTRICT self,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data,
+    const size_t ring_buffer_mask, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t dictionary_distance, const BrotliEncoderParams* params,
+    BackwardMatch* matches) {
+  BackwardMatch* const orig_matches = matches;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  size_t best_len = 1;
+  const size_t short_match_max_backward =
+      params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
+  size_t stop = cur_ix - short_match_max_backward;
+  uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
+  size_t i;
+  if (cur_ix < short_match_max_backward) { stop = 0; }
+  for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
+    size_t prev_ix = i;
+    const size_t backward = cur_ix - prev_ix;
+    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+      break;
+    }
+    prev_ix &= ring_buffer_mask;
+    if (data[cur_ix_masked] != data[prev_ix] ||
+        data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+      continue;
+    }
+    {
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                   max_length);
+      if (len > best_len) {
+        best_len = len;
+        InitBackwardMatch(matches++, backward, len);
+      }
+    }
+  }
+  if (best_len < max_length) {
+    matches = FN(StoreAndFindMatches)(self, data, cur_ix,
+        ring_buffer_mask, max_length, max_backward, &best_len, matches);
+  }
+  for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
+    dict_matches[i] = kInvalidMatch;
+  }
+  {
+    size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
+    if (BrotliFindAllStaticDictionaryMatches(dictionary,
+        &data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {
+      size_t maxlen = BROTLI_MIN(
+          size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
+      size_t l;
+      for (l = minlen; l <= maxlen; ++l) {
+        uint32_t dict_id = dict_matches[l];
+        if (dict_id < kInvalidMatch) {
+          size_t distance = dictionary_distance + (dict_id >> 5) + 1;
+          if (distance <= params->dist.max_distance) {
+            InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
+          }
+        }
+      }
+    }
+  }
+  return (size_t)(matches - orig_matches);
+}
+
+/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
+   current sequence, without returning any matches.
+   REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
+static BROTLI_INLINE void FN(Store)(HashToBinaryTree* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data,
+    const size_t mask, const size_t ix) {
+  /* Maximum distance is window size - 16, see section 9.1. of the spec. */
+  const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
+  FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
+      max_backward, NULL, NULL);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* BROTLI_RESTRICT self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
+    const size_t ix_start, const size_t ix_end) {
+  size_t i = ix_start;
+  size_t j = ix_start;
+  if (ix_start + 63 <= ix_end) {
+    i = ix_end - 63;
+  }
+  if (ix_start + 512 <= i) {
+    for (; j < i; j += 8) {
+      FN(Store)(self, data, mask, j);
+    }
+  }
+  for (; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashToBinaryTree* BROTLI_RESTRICT self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 &&
+      position >= MAX_TREE_COMP_LENGTH) {
+    /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
+    const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
+    size_t i;
+    for (i = i_start; i < i_end; ++i) {
+      /* Maximum distance is window size - 16, see section 9.1. of the spec.
+         Furthermore, we have to make sure that we don't look further back
+         from the start of the next block than the window size, otherwise we
+         could access already overwritten areas of the ring-buffer. */
+      const size_t max_backward =
+          self->window_mask_ - BROTLI_MAX(size_t,
+                                          BROTLI_WINDOW_GAP - 1,
+                                          position - i);
+      /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
+         end of the current block and that we have at least
+         MAX_TREE_COMP_LENGTH tail in the ring-buffer. */
+      FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
+          MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
+    }
+  }
+}
+
+#undef BUCKET_SIZE
+
+#undef HashToBinaryTree
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c
new file mode 100644
index 0000000000..6da2ff6bb4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.c
@@ -0,0 +1,100 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Build per-context histograms of literals, commands and distance codes. */
+
+#include "./histogram.h"
+
+#include "../common/context.h"
+#include "./block_splitter.h"
+#include "./command.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BlockSplitIterator {
+  const BlockSplit* split_;  /* Not owned. */
+  size_t idx_;
+  size_t type_;
+  size_t length_;
+} BlockSplitIterator;
+
+static void InitBlockSplitIterator(BlockSplitIterator* self,
+    const BlockSplit* split) {
+  self->split_ = split;
+  self->idx_ = 0;
+  self->type_ = 0;
+  self->length_ = split->lengths ? split->lengths[0] : 0;
+}
+
+static void BlockSplitIteratorNext(BlockSplitIterator* self) {
+  if (self->length_ == 0) {
+    ++self->idx_;
+    self->type_ = self->split_->types[self->idx_];
+    self->length_ = self->split_->lengths[self->idx_];
+  }
+  --self->length_;
+}
+
+void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms) {
+  size_t pos = start_pos;
+  BlockSplitIterator literal_it;
+  BlockSplitIterator insert_and_copy_it;
+  BlockSplitIterator dist_it;
+  size_t i;
+
+  InitBlockSplitIterator(&literal_it, literal_split);
+  InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
+  InitBlockSplitIterator(&dist_it, dist_split);
+  for (i = 0; i < num_commands; ++i) {
+    const Command* cmd = &cmds[i];
+    size_t j;
+    BlockSplitIteratorNext(&insert_and_copy_it);
+    HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
+        cmd->cmd_prefix_);
+    /* TODO: unwrap iterator blocks. */
+    for (j = cmd->insert_len_; j != 0; --j) {
+      size_t context;
+      BlockSplitIteratorNext(&literal_it);
+      context = literal_it.type_;
+      if (context_modes) {
+        ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
+        context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
+            BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
+      }
+      HistogramAddLiteral(&literal_histograms[context],
+          ringbuffer[pos & mask]);
+      prev_byte2 = prev_byte;
+      prev_byte = ringbuffer[pos & mask];
+      ++pos;
+    }
+    pos += CommandCopyLen(cmd);
+    if (CommandCopyLen(cmd)) {
+      prev_byte2 = ringbuffer[(pos - 2) & mask];
+      prev_byte = ringbuffer[(pos - 1) & mask];
+      if (cmd->cmd_prefix_ >= 128) {
+        size_t context;
+        BlockSplitIteratorNext(&dist_it);
+        context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
+            CommandDistanceContext(cmd);
+        HistogramAddDistance(&copy_dist_histograms[context],
+            cmd->dist_prefix_ & 0x3FF);
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.h
new file mode 100644
index 0000000000..42af3c3f9d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram.h
@@ -0,0 +1,63 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Models the histograms of literals, commands and distance codes. */
+
+#ifndef BROTLI_ENC_HISTOGRAM_H_
+#define BROTLI_ENC_HISTOGRAM_H_
+
+#include <string.h>  /* memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./block_splitter.h"
+#include "./command.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
+#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
+
+#define FN(X) X ## Literal
+#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
+#define DataType uint8_t
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN
+
+#define FN(X) X ## Command
+#define DataType uint16_t
+#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DATA_SIZE
+#undef FN
+
+#define FN(X) X ## Distance
+#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN
+
+BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_HISTOGRAM_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram_inc.h
new file mode 100644
index 0000000000..50eaf7468d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/histogram_inc.h
@@ -0,0 +1,51 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: Histogram, DATA_SIZE, DataType */
+
+/* A simple container for histograms of data in blocks. */
+
+typedef struct FN(Histogram) {
+  uint32_t data_[DATA_SIZE];
+  size_t total_count_;
+  double bit_cost_;
+} FN(Histogram);
+
+static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
+  memset(self->data_, 0, sizeof(self->data_));
+  self->total_count_ = 0;
+  self->bit_cost_ = HUGE_VAL;
+}
+
+static BROTLI_INLINE void FN(ClearHistograms)(
+    FN(Histogram)* array, size_t length) {
+  size_t i;
+  for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
+}
+
+static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
+  ++self->data_[val];
+  ++self->total_count_;
+}
+
+static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
+    const DataType* p, size_t n) {
+  self->total_count_ += n;
+  n += 1;
+  while (--n) ++self->data_[*p++];
+}
+
+static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
+    const FN(Histogram)* v) {
+  size_t i;
+  self->total_count_ += v->total_count_;
+  for (i = 0; i < DATA_SIZE; ++i) {
+    self->data_[i] += v->data_[i];
+  }
+}
+
+static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c
new file mode 100644
index 0000000000..c231100e34
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.c
@@ -0,0 +1,175 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Literal cost model to allow backward reference replacement to be efficient.
+*/
+
+#include "./literal_cost.h"
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+#include "./utf8_util.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
+  if (c < 128) {
+    return 0;  /* Next one is the 'Byte 1' again. */
+  } else if (c >= 192) {  /* Next one is the 'Byte 2' of utf-8 encoding. */
+    return BROTLI_MIN(size_t, 1, clamp);
+  } else {
+    /* Let's decide over the last byte if this ends the sequence. */
+    if (last < 0xE0) {
+      return 0;  /* Completed two or three byte coding. */
+    } else {  /* Next one is the 'Byte 3' of utf-8 encoding. */
+      return BROTLI_MIN(size_t, 2, clamp);
+    }
+  }
+}
+
+static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
+                                        const uint8_t* data) {
+  size_t counts[3] = { 0 };
+  size_t max_utf8 = 1;  /* should be 2, but 1 compresses better. */
+  size_t last_c = 0;
+  size_t i;
+  for (i = 0; i < len; ++i) {
+    size_t c = data[(pos + i) & mask];
+    ++counts[UTF8Position(last_c, c, 2)];
+    last_c = c;
+  }
+  if (counts[2] < 500) {
+    max_utf8 = 1;
+  }
+  if (counts[1] + counts[2] < 25) {
+    max_utf8 = 0;
+  }
+  return max_utf8;
+}
+
+static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
+                                            const uint8_t* data, float* cost) {
+  /* max_utf8 is 0 (normal ASCII single byte modeling),
+     1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
+  const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
+  size_t histogram[3][256] = { { 0 } };
+  size_t window_half = 495;
+  size_t in_window = BROTLI_MIN(size_t, window_half, len);
+  size_t in_window_utf8[3] = { 0 };
+
+  size_t i;
+  {  /* Bootstrap histograms. */
+    size_t last_c = 0;
+    size_t utf8_pos = 0;
+    for (i = 0; i < in_window; ++i) {
+      size_t c = data[(pos + i) & mask];
+      ++histogram[utf8_pos][c];
+      ++in_window_utf8[utf8_pos];
+      utf8_pos = UTF8Position(last_c, c, max_utf8);
+      last_c = c;
+    }
+  }
+
+  /* Compute bit costs with sliding window. */
+  for (i = 0; i < len; ++i) {
+    if (i >= window_half) {
+      /* Remove a byte in the past. */
+      size_t c =
+          i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
+      size_t last_c =
+          i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
+      --in_window_utf8[utf8_pos2];
+    }
+    if (i + window_half < len) {
+      /* Add a byte in the future. */
+      size_t c = data[(pos + i + window_half - 1) & mask];
+      size_t last_c = data[(pos + i + window_half - 2) & mask];
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
+      ++in_window_utf8[utf8_pos2];
+    }
+    {
+      size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
+      size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
+      size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
+      size_t masked_pos = (pos + i) & mask;
+      size_t histo = histogram[utf8_pos][data[masked_pos]];
+      double lit_cost;
+      if (histo == 0) {
+        histo = 1;
+      }
+      lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
+      lit_cost += 0.02905;
+      if (lit_cost < 1.0) {
+        lit_cost *= 0.5;
+        lit_cost += 0.5;
+      }
+      /* Make the first bytes more expensive -- seems to help, not sure why.
+         Perhaps because the entropy source is changing its properties
+         rapidly in the beginning of the file, perhaps because the beginning
+         of the data is a statistical "anomaly". */
+      if (i < 2000) {
+        lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
+      }
+      cost[i] = (float)lit_cost;
+    }
+  }
+}
+
+void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+                                       const uint8_t* data, float* cost) {
+  if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
+    EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
+    return;
+  } else {
+    size_t histogram[256] = { 0 };
+    size_t window_half = 2000;
+    size_t in_window = BROTLI_MIN(size_t, window_half, len);
+
+    /* Bootstrap histogram. */
+    size_t i;
+    for (i = 0; i < in_window; ++i) {
+      ++histogram[data[(pos + i) & mask]];
+    }
+
+    /* Compute bit costs with sliding window. */
+    for (i = 0; i < len; ++i) {
+      size_t histo;
+      if (i >= window_half) {
+        /* Remove a byte in the past. */
+        --histogram[data[(pos + i - window_half) & mask]];
+        --in_window;
+      }
+      if (i + window_half < len) {
+        /* Add a byte in the future. */
+        ++histogram[data[(pos + i + window_half) & mask]];
+        ++in_window;
+      }
+      histo = histogram[data[(pos + i) & mask]];
+      if (histo == 0) {
+        histo = 1;
+      }
+      {
+        double lit_cost = FastLog2(in_window) - FastLog2(histo);
+        lit_cost += 0.029;
+        if (lit_cost < 1.0) {
+          lit_cost *= 0.5;
+          lit_cost += 0.5;
+        }
+        cost[i] = (float)lit_cost;
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.h
new file mode 100644
index 0000000000..8f53f39d3f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/literal_cost.h
@@ -0,0 +1,30 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Literal cost model to allow backward reference replacement to be efficient.
+*/
+
+#ifndef BROTLI_ENC_LITERAL_COST_H_
+#define BROTLI_ENC_LITERAL_COST_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Estimates how many bits the literals in the interval [pos, pos + len) in the
+   ring-buffer (data, mask) will take entropy coded and writes these estimates
+   to the cost[0..len) array. */
+BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
+    size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_LITERAL_COST_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c
new file mode 100644
index 0000000000..f6ed7e3cb7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.c
@@ -0,0 +1,170 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#include "./memory.h"
+
+#include <stdlib.h>  /* exit, free, malloc */
+#include <string.h>  /* memcpy */
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_PERM_ALLOCATED 128
+#define MAX_NEW_ALLOCATED 64
+#define MAX_NEW_FREED 64
+
+#define PERM_ALLOCATED_OFFSET 0
+#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
+#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
+
+void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque) {
+  if (!alloc_func) {
+    m->alloc_func = BrotliDefaultAllocFunc;
+    m->free_func = BrotliDefaultFreeFunc;
+    m->opaque = 0;
+  } else {
+    m->alloc_func = alloc_func;
+    m->free_func = free_func;
+    m->opaque = opaque;
+  }
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  m->is_oom = BROTLI_FALSE;
+  m->perm_allocated = 0;
+  m->new_allocated = 0;
+  m->new_freed = 0;
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) exit(EXIT_FAILURE);
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  m->free_func(m->opaque, p);
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  BROTLI_UNUSED(m);
+}
+
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+static void SortPointers(void** items, const size_t n) {
+  /* Shell sort. */
+  static const size_t gaps[] = {23, 10, 4, 1};
+  int g = 0;
+  for (; g < 4; ++g) {
+    size_t gap = gaps[g];
+    size_t i;
+    for (i = gap; i < n; ++i) {
+      size_t j = i;
+      void* tmp = items[i];
+      for (; j >= gap && tmp < items[j - gap]; j -= gap) {
+        items[j] = items[j - gap];
+      }
+      items[j] = tmp;
+    }
+  }
+}
+
+static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
+  size_t a_read_index = 0;
+  size_t b_read_index = 0;
+  size_t a_write_index = 0;
+  size_t b_write_index = 0;
+  size_t annihilated = 0;
+  while (a_read_index < a_len && b_read_index < b_len) {
+    if (a[a_read_index] == b[b_read_index]) {
+      a_read_index++;
+      b_read_index++;
+      annihilated++;
+    } else if (a[a_read_index] < b[b_read_index]) {
+      a[a_write_index++] = a[a_read_index++];
+    } else {
+      b[b_write_index++] = b[b_read_index++];
+    }
+  }
+  while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
+  while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
+  return annihilated;
+}
+
+static void CollectGarbagePointers(MemoryManager* m) {
+  size_t annihilated;
+  SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
+  SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  annihilated = Annihilate(
+      m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
+      m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  m->new_allocated -= annihilated;
+  m->new_freed -= annihilated;
+
+  if (m->new_freed != 0) {
+    annihilated = Annihilate(
+        m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
+        m->pointers + NEW_FREED_OFFSET, m->new_freed);
+    m->perm_allocated -= annihilated;
+    m->new_freed -= annihilated;
+    BROTLI_DCHECK(m->new_freed == 0);
+  }
+
+  if (m->new_allocated != 0) {
+    BROTLI_DCHECK(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
+    memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
+           m->pointers + NEW_ALLOCATED_OFFSET,
+           sizeof(void*) * m->new_allocated);
+    m->perm_allocated += m->new_allocated;
+    m->new_allocated = 0;
+    SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
+  }
+}
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) {
+    m->is_oom = BROTLI_TRUE;
+    return NULL;
+  }
+  if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
+  m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  if (!p) return;
+  m->free_func(m->opaque, p);
+  if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
+  m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  size_t i;
+  CollectGarbagePointers(m);
+  /* Now all unfreed pointers are in perm-allocated list. */
+  for (i = 0; i < m->perm_allocated; ++i) {
+    m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
+  }
+  m->perm_allocated = 0;
+}
+
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.h
new file mode 100644
index 0000000000..832e7b2b6e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/memory.h
@@ -0,0 +1,114 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for memory management. */
+
+#ifndef BROTLI_ENC_MEMORY_H_
+#define BROTLI_ENC_MEMORY_H_
+
+#include <string.h>  /* memcpy */
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
+    !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_ENCODER_EXIT_ON_OOM
+#endif
+
+typedef struct MemoryManager {
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* opaque;
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  BROTLI_BOOL is_oom;
+  size_t perm_allocated;
+  size_t new_allocated;
+  size_t new_freed;
+  void* pointers[256];
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+} MemoryManager;
+
+BROTLI_INTERNAL void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque);
+
+BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
+#define BROTLI_ALLOC(M, T, N)                               \
+  ((N) > 0 ? ((T*)BrotliAllocate((M), (N) * sizeof(T))) : NULL)
+
+BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
+#define BROTLI_FREE(M, P) { \
+  BrotliFree((M), (P));     \
+  P = NULL;                 \
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_IS_OOM(M) (!!0)
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+#define BROTLI_IS_OOM(M) (!!(M)->is_oom)
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+/*
+BROTLI_IS_NULL is a fake check, BROTLI_IS_OOM does the heavy lifting.
+The only purpose of it is to explain static analyzers the state of things.
+NB: use ONLY together with BROTLI_IS_OOM
+    AND ONLY for allocations in the current scope.
+ */
+#if defined(__clang_analyzer__) && !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_IS_NULL(A) ((A) == nullptr)
+#else  /* defined(__clang_analyzer__) */
+#define BROTLI_IS_NULL(A) (!!0)
+#endif  /* defined(__clang_analyzer__) */
+
+BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
+
+/*
+Dynamically grows array capacity to at least the requested size
+M: MemoryManager
+T: data type
+A: array
+C: capacity
+R: requested size
+*/
+#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) {                    \
+  if (C < (R)) {                                                   \
+    size_t _new_size = (C == 0) ? (R) : C;                         \
+    T* new_array;                                                  \
+    while (_new_size < (R)) _new_size *= 2;                        \
+    new_array = BROTLI_ALLOC((M), T, _new_size);                   \
+    if (!BROTLI_IS_OOM(M) && !BROTLI_IS_NULL(new_array) && C != 0) \
+      memcpy(new_array, A, C * sizeof(T));                         \
+    BROTLI_FREE((M), A);                                           \
+    A = new_array;                                                 \
+    C = _new_size;                                                 \
+  }                                                                \
+}
+
+/*
+Appends value and dynamically grows array capacity when needed
+M: MemoryManager
+T: data type
+A: array
+C: array capacity
+S: array size
+V: value to append
+*/
+#define BROTLI_ENSURE_CAPACITY_APPEND(M, T, A, C, S, V) { \
+  (S)++;                                                  \
+  BROTLI_ENSURE_CAPACITY(M, T, A, C, S);                  \
+  A[(S) - 1] = (V);                                       \
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_MEMORY_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c
new file mode 100644
index 0000000000..5aa4d4f17c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.c
@@ -0,0 +1,663 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#include "./metablock.h"
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./bit_cost.h"
+#include "./block_splitter.h"
+#include "./cluster.h"
+#include "./entropy_encode.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliInitDistanceParams(BrotliEncoderParams* params,
+    uint32_t npostfix, uint32_t ndirect) {
+  BrotliDistanceParams* dist_params = &params->dist;
+  uint32_t alphabet_size_max;
+  uint32_t alphabet_size_limit;
+  uint32_t max_distance;
+
+  dist_params->distance_postfix_bits = npostfix;
+  dist_params->num_direct_distance_codes = ndirect;
+
+  alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
+      npostfix, ndirect, BROTLI_MAX_DISTANCE_BITS);
+  alphabet_size_limit = alphabet_size_max;
+  max_distance = ndirect + (1U << (BROTLI_MAX_DISTANCE_BITS + npostfix + 2)) -
+      (1U << (npostfix + 2));
+
+  if (params->large_window) {
+    BrotliDistanceCodeLimit limit = BrotliCalculateDistanceCodeLimit(
+        BROTLI_MAX_ALLOWED_DISTANCE, npostfix, ndirect);
+    alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
+        npostfix, ndirect, BROTLI_LARGE_MAX_DISTANCE_BITS);
+    alphabet_size_limit = limit.max_alphabet_size;
+    max_distance = limit.max_distance;
+  }
+
+  dist_params->alphabet_size_max = alphabet_size_max;
+  dist_params->alphabet_size_limit = alphabet_size_limit;
+  dist_params->max_distance = max_distance;
+}
+
+static void RecomputeDistancePrefixes(Command* cmds,
+                                      size_t num_commands,
+                                      const BrotliDistanceParams* orig_params,
+                                      const BrotliDistanceParams* new_params) {
+  size_t i;
+
+  if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
+      orig_params->num_direct_distance_codes ==
+      new_params->num_direct_distance_codes) {
+    return;
+  }
+
+  for (i = 0; i < num_commands; ++i) {
+    Command* cmd = &cmds[i];
+    if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+      PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd, orig_params),
+                               new_params->num_direct_distance_codes,
+                               new_params->distance_postfix_bits,
+                               &cmd->dist_prefix_,
+                               &cmd->dist_extra_);
+    }
+  }
+}
+
+static BROTLI_BOOL ComputeDistanceCost(const Command* cmds,
+                                       size_t num_commands,
+                                       const BrotliDistanceParams* orig_params,
+                                       const BrotliDistanceParams* new_params,
+                                       double* cost) {
+  size_t i;
+  BROTLI_BOOL equal_params = BROTLI_FALSE;
+  uint16_t dist_prefix;
+  uint32_t dist_extra;
+  double extra_bits = 0.0;
+  HistogramDistance histo;
+  HistogramClearDistance(&histo);
+
+  if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
+      orig_params->num_direct_distance_codes ==
+      new_params->num_direct_distance_codes) {
+    equal_params = BROTLI_TRUE;
+  }
+
+  for (i = 0; i < num_commands; i++) {
+    const Command* cmd = &cmds[i];
+    if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+      if (equal_params) {
+        dist_prefix = cmd->dist_prefix_;
+      } else {
+        uint32_t distance = CommandRestoreDistanceCode(cmd, orig_params);
+        if (distance > new_params->max_distance) {
+          return BROTLI_FALSE;
+        }
+        PrefixEncodeCopyDistance(distance,
+                                 new_params->num_direct_distance_codes,
+                                 new_params->distance_postfix_bits,
+                                 &dist_prefix,
+                                 &dist_extra);
+      }
+      HistogramAddDistance(&histo, dist_prefix & 0x3FF);
+      extra_bits += dist_prefix >> 10;
+    }
+  }
+
+  *cost = BrotliPopulationCostDistance(&histo) + extra_bits;
+  return BROTLI_TRUE;
+}
+
+void BrotliBuildMetaBlock(MemoryManager* m,
+                          const uint8_t* ringbuffer,
+                          const size_t pos,
+                          const size_t mask,
+                          BrotliEncoderParams* params,
+                          uint8_t prev_byte,
+                          uint8_t prev_byte2,
+                          Command* cmds,
+                          size_t num_commands,
+                          ContextType literal_context_mode,
+                          MetaBlockSplit* mb) {
+  /* Histogram ids need to fit in one byte. */
+  static const size_t kMaxNumberOfHistograms = 256;
+  HistogramDistance* distance_histograms;
+  HistogramLiteral* literal_histograms;
+  ContextType* literal_context_modes = NULL;
+  size_t literal_histograms_size;
+  size_t distance_histograms_size;
+  size_t i;
+  size_t literal_context_multiplier = 1;
+  uint32_t npostfix;
+  uint32_t ndirect_msb = 0;
+  BROTLI_BOOL check_orig = BROTLI_TRUE;
+  double best_dist_cost = 1e99;
+  BrotliEncoderParams orig_params = *params;
+  BrotliEncoderParams new_params = *params;
+
+  for (npostfix = 0; npostfix <= BROTLI_MAX_NPOSTFIX; npostfix++) {
+    for (; ndirect_msb < 16; ndirect_msb++) {
+      uint32_t ndirect = ndirect_msb << npostfix;
+      BROTLI_BOOL skip;
+      double dist_cost;
+      BrotliInitDistanceParams(&new_params, npostfix, ndirect);
+      if (npostfix == orig_params.dist.distance_postfix_bits &&
+          ndirect == orig_params.dist.num_direct_distance_codes) {
+        check_orig = BROTLI_FALSE;
+      }
+      skip = !ComputeDistanceCost(
+          cmds, num_commands,
+          &orig_params.dist, &new_params.dist, &dist_cost);
+      if (skip || (dist_cost > best_dist_cost)) {
+        break;
+      }
+      best_dist_cost = dist_cost;
+      params->dist = new_params.dist;
+    }
+    if (ndirect_msb > 0) ndirect_msb--;
+    ndirect_msb /= 2;
+  }
+  if (check_orig) {
+    double dist_cost;
+    ComputeDistanceCost(cmds, num_commands,
+                        &orig_params.dist, &orig_params.dist, &dist_cost);
+    if (dist_cost < best_dist_cost) {
+      /* NB: currently unused; uncomment when more param tuning is added. */
+      /* best_dist_cost = dist_cost; */
+      params->dist = orig_params.dist;
+    }
+  }
+  RecomputeDistancePrefixes(cmds, num_commands,
+                            &orig_params.dist, &params->dist);
+
+  BrotliSplitBlock(m, cmds, num_commands,
+                   ringbuffer, pos, mask, params,
+                   &mb->literal_split,
+                   &mb->command_split,
+                   &mb->distance_split);
+  if (BROTLI_IS_OOM(m)) return;
+
+  if (!params->disable_literal_context_modeling) {
+    literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS;
+    literal_context_modes =
+        BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literal_context_modes)) return;
+    for (i = 0; i < mb->literal_split.num_types; ++i) {
+      literal_context_modes[i] = literal_context_mode;
+    }
+  }
+
+  literal_histograms_size =
+      mb->literal_split.num_types * literal_context_multiplier;
+  literal_histograms =
+      BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literal_histograms)) return;
+  ClearHistogramsLiteral(literal_histograms, literal_histograms_size);
+
+  distance_histograms_size =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_histograms)) return;
+  ClearHistogramsDistance(distance_histograms, distance_histograms_size);
+
+  BROTLI_DCHECK(mb->command_histograms == 0);
+  mb->command_histograms_size = mb->command_split.num_types;
+  mb->command_histograms =
+      BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->command_histograms)) return;
+  ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
+
+  BrotliBuildHistogramsWithContext(cmds, num_commands,
+      &mb->literal_split, &mb->command_split, &mb->distance_split,
+      ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
+      literal_histograms, mb->command_histograms, distance_histograms);
+  BROTLI_FREE(m, literal_context_modes);
+
+  BROTLI_DCHECK(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_context_map)) return;
+
+  BROTLI_DCHECK(mb->literal_histograms == 0);
+  mb->literal_histograms_size = mb->literal_context_map_size;
+  mb->literal_histograms =
+      BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_histograms)) return;
+
+  BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size,
+      kMaxNumberOfHistograms, mb->literal_histograms,
+      &mb->literal_histograms_size, mb->literal_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, literal_histograms);
+
+  if (params->disable_literal_context_modeling) {
+    /* Distribute assignment to all contexts. */
+    for (i = mb->literal_split.num_types; i != 0;) {
+      size_t j = 0;
+      i--;
+      for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) {
+        mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
+            mb->literal_context_map[i];
+      }
+    }
+  }
+
+  BROTLI_DCHECK(mb->distance_context_map == 0);
+  mb->distance_context_map_size =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  mb->distance_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->distance_context_map)) return;
+
+  BROTLI_DCHECK(mb->distance_histograms == 0);
+  mb->distance_histograms_size = mb->distance_context_map_size;
+  mb->distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->distance_histograms)) return;
+
+  BrotliClusterHistogramsDistance(m, distance_histograms,
+                                  mb->distance_context_map_size,
+                                  kMaxNumberOfHistograms,
+                                  mb->distance_histograms,
+                                  &mb->distance_histograms_size,
+                                  mb->distance_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, distance_histograms);
+}
+
+#define FN(X) X ## Literal
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define BROTLI_MAX_STATIC_CONTEXTS 13
+
+/* Greedy block splitter for one block category (literal, command or distance).
+   Gathers histograms for all context buckets. */
+typedef struct ContextBlockSplitter {
+  /* Alphabet size of particular block category. */
+  size_t alphabet_size_;
+  size_t num_contexts_;
+  size_t max_block_types_;
+  /* We collect at least this many symbols for each block. */
+  size_t min_block_size_;
+  /* We merge histograms A and B if
+       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+     where A is the current histogram and B is the histogram of the last or the
+     second last block type. */
+  double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  /* not owned */
+  HistogramLiteral* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */
+
+  /* The number of symbols that we want to collect before deciding on whether
+     or not to merge the block with a previous one or emit a new block. */
+  size_t target_block_size_;
+  /* The number of symbols in the current histogram. */
+  size_t block_size_;
+  /* Offset of the current histogram. */
+  size_t curr_histogram_ix_;
+  /* Offset of the histograms of the previous two block types. */
+  size_t last_histogram_ix_[2];
+  /* Entropy of the previous two block types. */
+  double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS];
+  /* The number of times we merged the current block with the last one. */
+  size_t merge_last_count_;
+} ContextBlockSplitter;
+
+static void InitContextBlockSplitter(
+    MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
+    size_t num_contexts, size_t min_block_size, double split_threshold,
+    size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
+    size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  size_t max_num_types;
+  BROTLI_DCHECK(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS);
+
+  self->alphabet_size_ = alphabet_size;
+  self->num_contexts_ = num_contexts;
+  self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;
+
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  split->num_blocks = max_num_blocks;
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_DCHECK(*histograms == 0);
+  *histograms_size = max_num_types * num_contexts;
+  *histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
+  /* Clear only current histogram. */
+  ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
+}
+
+/* Does either of three things:
+     (1) emits the current block with a new block type;
+     (2) emits the current block with the type of the second last block;
+     (3) merges the current block with the last block. */
+static void ContextBlockSplitterFinishBlock(
+    ContextBlockSplitter* self, MemoryManager* m, BROTLI_BOOL is_final) {
+  BlockSplit* split = self->split_;
+  const size_t num_contexts = self->num_contexts_;
+  double* last_entropy = self->last_entropy_;
+  HistogramLiteral* histograms = self->histograms_;
+
+  if (self->block_size_ < self->min_block_size_) {
+    self->block_size_ = self->min_block_size_;
+  }
+  if (self->num_blocks_ == 0) {
+    size_t i;
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+
+    for (i = 0; i < num_contexts; ++i) {
+      last_entropy[i] =
+          BitsEntropy(histograms[i].data_, self->alphabet_size_);
+      last_entropy[num_contexts + i] = last_entropy[i];
+    }
+    ++self->num_blocks_;
+    ++split->num_types;
+    self->curr_histogram_ix_ += num_contexts;
+    if (self->curr_histogram_ix_ < *self->histograms_size_) {
+      ClearHistogramsLiteral(
+          &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+    }
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
+    /* Try merging the set of histograms for the current block type with the
+       respective set of histograms for the last and second last block types.
+       Decide over the split based on the total reduction of entropy across
+       all contexts. */
+    double entropy[BROTLI_MAX_STATIC_CONTEXTS];
+    HistogramLiteral* combined_histo =
+        BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
+    double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS];
+    double diff[2] = { 0.0 };
+    size_t i;
+    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(combined_histo)) return;
+    for (i = 0; i < num_contexts; ++i) {
+      size_t curr_histo_ix = self->curr_histogram_ix_ + i;
+      size_t j;
+      entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
+                               self->alphabet_size_);
+      for (j = 0; j < 2; ++j) {
+        size_t jx = j * num_contexts + i;
+        size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
+        combined_histo[jx] = histograms[curr_histo_ix];
+        HistogramAddHistogramLiteral(&combined_histo[jx],
+            &histograms[last_histogram_ix]);
+        combined_entropy[jx] = BitsEntropy(
+            &combined_histo[jx].data_[0], self->alphabet_size_);
+        diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
+      }
+    }
+
+    if (split->num_types < self->max_block_types_ &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = split->num_types * num_contexts;
+      for (i = 0; i < num_contexts; ++i) {
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = entropy[i];
+      }
+      ++self->num_blocks_;
+      ++split->num_types;
+      self->curr_histogram_ix_ += num_contexts;
+      if (self->curr_histogram_ix_ < *self->histograms_size_) {
+        ClearHistogramsLiteral(
+            &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+      }
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] =
+            combined_histo[num_contexts + i];
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = combined_entropy[num_contexts + i];
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
+        last_entropy[i] = combined_entropy[i];
+        if (split->num_types == 1) {
+          last_entropy[num_contexts + i] = last_entropy[i];
+        }
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      self->block_size_ = 0;
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
+      }
+    }
+    BROTLI_FREE(m, combined_histo);
+  }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types * num_contexts;
+    split->num_blocks = self->num_blocks_;
+  }
+}
+
+/* Adds the next symbol to the current block type and context. When the
+   current block reaches the target size, decides on merging the block. */
+static void ContextBlockSplitterAddSymbol(
+    ContextBlockSplitter* self, MemoryManager* m,
+    size_t symbol, size_t context) {
+  HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
+      symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    ContextBlockSplitterFinishBlock(self, m, /* is_final = */ BROTLI_FALSE);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+}
+
+static void MapStaticContexts(MemoryManager* m,
+                              size_t num_contexts,
+                              const uint32_t* static_context_map,
+                              MetaBlockSplit* mb) {
+  size_t i;
+  BROTLI_DCHECK(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(mb->literal_context_map)) return;
+
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    uint32_t offset = (uint32_t)(i * num_contexts);
+    size_t j;
+    for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
+      mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
+          offset + static_context_map[j];
+    }
+  }
+}
+
+static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
+    MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
+    const size_t num_contexts, const uint32_t* static_context_map,
+    const Command* commands, size_t n_commands, MetaBlockSplit* mb) {
+  union {
+    BlockSplitterLiteral plain;
+    ContextBlockSplitter ctx;
+  } lit_blocks;
+  BlockSplitterCommand cmd_blocks;
+  BlockSplitterDistance dist_blocks;
+  size_t num_literals = 0;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    num_literals += commands[i].insert_len_;
+  }
+
+  if (num_contexts == 1) {
+    InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0,
+        num_literals, &mb->literal_split, &mb->literal_histograms,
+        &mb->literal_histograms_size);
+  } else {
+    InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0,
+        num_literals, &mb->literal_split, &mb->literal_histograms,
+        &mb->literal_histograms_size);
+  }
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
+      500.0, n_commands, &mb->command_split, &mb->command_histograms,
+      &mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms,
+      &mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t j;
+    BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      uint8_t literal = ringbuffer[pos & mask];
+      if (num_contexts == 1) {
+        BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
+      } else {
+        size_t context =
+            BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
+        ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
+                                      static_context_map[context]);
+        if (BROTLI_IS_OOM(m)) return;
+      }
+      prev_byte2 = prev_byte;
+      prev_byte = literal;
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd)) {
+      prev_byte2 = ringbuffer[(pos - 2) & mask];
+      prev_byte = ringbuffer[(pos - 1) & mask];
+      if (cmd.cmd_prefix_ >= 128) {
+        BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF);
+      }
+    }
+  }
+
+  if (num_contexts == 1) {
+    BlockSplitterFinishBlockLiteral(
+        &lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
+  } else {
+    ContextBlockSplitterFinishBlock(
+        &lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+  BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
+  BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
+
+  if (num_contexts > 1) {
+    MapStaticContexts(m, num_contexts, static_context_map, mb);
+  }
+}
+
+void BrotliBuildMetaBlockGreedy(MemoryManager* m,
+                                const uint8_t* ringbuffer,
+                                size_t pos,
+                                size_t mask,
+                                uint8_t prev_byte,
+                                uint8_t prev_byte2,
+                                ContextLut literal_context_lut,
+                                size_t num_contexts,
+                                const uint32_t* static_context_map,
+                                const Command* commands,
+                                size_t n_commands,
+                                MetaBlockSplit* mb) {
+  if (num_contexts == 1) {
+    BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
+        prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb);
+  } else {
+    BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
+        prev_byte2, literal_context_lut, num_contexts, static_context_map,
+        commands, n_commands, mb);
+  }
+}
+
+void BrotliOptimizeHistograms(uint32_t num_distance_codes,
+                              MetaBlockSplit* mb) {
+  uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
+  size_t i;
+  for (i = 0; i < mb->literal_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
+                                      good_for_rle);
+  }
+  for (i = 0; i < mb->command_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
+                                      mb->command_histograms[i].data_,
+                                      good_for_rle);
+  }
+  for (i = 0; i < mb->distance_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
+                                      mb->distance_histograms[i].data_,
+                                      good_for_rle);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.h
new file mode 100644
index 0000000000..334a79a443
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock.h
@@ -0,0 +1,105 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#ifndef BROTLI_ENC_METABLOCK_H_
+#define BROTLI_ENC_METABLOCK_H_
+
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./block_splitter.h"
+#include "./command.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct MetaBlockSplit {
+  BlockSplit literal_split;
+  BlockSplit command_split;
+  BlockSplit distance_split;
+  uint32_t* literal_context_map;
+  size_t literal_context_map_size;
+  uint32_t* distance_context_map;
+  size_t distance_context_map_size;
+  HistogramLiteral* literal_histograms;
+  size_t literal_histograms_size;
+  HistogramCommand* command_histograms;
+  size_t command_histograms_size;
+  HistogramDistance* distance_histograms;
+  size_t distance_histograms_size;
+} MetaBlockSplit;
+
+static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
+  BrotliInitBlockSplit(&mb->literal_split);
+  BrotliInitBlockSplit(&mb->command_split);
+  BrotliInitBlockSplit(&mb->distance_split);
+  mb->literal_context_map = 0;
+  mb->literal_context_map_size = 0;
+  mb->distance_context_map = 0;
+  mb->distance_context_map_size = 0;
+  mb->literal_histograms = 0;
+  mb->literal_histograms_size = 0;
+  mb->command_histograms = 0;
+  mb->command_histograms_size = 0;
+  mb->distance_histograms = 0;
+  mb->distance_histograms_size = 0;
+}
+
+static BROTLI_INLINE void DestroyMetaBlockSplit(
+    MemoryManager* m, MetaBlockSplit* mb) {
+  BrotliDestroyBlockSplit(m, &mb->literal_split);
+  BrotliDestroyBlockSplit(m, &mb->command_split);
+  BrotliDestroyBlockSplit(m, &mb->distance_split);
+  BROTLI_FREE(m, mb->literal_context_map);
+  BROTLI_FREE(m, mb->distance_context_map);
+  BROTLI_FREE(m, mb->literal_histograms);
+  BROTLI_FREE(m, mb->command_histograms);
+  BROTLI_FREE(m, mb->distance_histograms);
+}
+
+/* Uses the slow shortest-path block splitter and does context clustering.
+   The distance parameters are dynamically selected based on the commands
+   which get recomputed under the new distance parameters. The new distance
+   parameters are stored into *params. */
+BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
+                                          const uint8_t* ringbuffer,
+                                          const size_t pos,
+                                          const size_t mask,
+                                          BrotliEncoderParams* params,
+                                          uint8_t prev_byte,
+                                          uint8_t prev_byte2,
+                                          Command* cmds,
+                                          size_t num_commands,
+                                          ContextType literal_context_mode,
+                                          MetaBlockSplit* mb);
+
+/* Uses a fast greedy block splitter that tries to merge current block with the
+   last or the second last block and uses a static context clustering which
+   is the same for all block types. */
+BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
+    MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
+    size_t num_contexts, const uint32_t* static_context_map,
+    const Command* commands, size_t n_commands, MetaBlockSplit* mb);
+
+BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
+                                              MetaBlockSplit* mb);
+
+BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliEncoderParams* params,
+    uint32_t npostfix, uint32_t ndirect);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_METABLOCK_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock_inc.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock_inc.h
new file mode 100644
index 0000000000..ed507ef5ef
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/metablock_inc.h
@@ -0,0 +1,183 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Greedy block splitter for one block category (literal, command or distance).
+*/
+typedef struct FN(BlockSplitter) {
+  /* Alphabet size of particular block category. */
+  size_t alphabet_size_;
+  /* We collect at least this many symbols for each block. */
+  size_t min_block_size_;
+  /* We merge histograms A and B if
+       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+     where A is the current histogram and B is the histogram of the last or the
+     second last block type. */
+  double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  /* not owned */
+  HistogramType* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */
+
+  /* The number of symbols that we want to collect before deciding on whether
+     or not to merge the block with a previous one or emit a new block. */
+  size_t target_block_size_;
+  /* The number of symbols in the current histogram. */
+  size_t block_size_;
+  /* Offset of the current histogram. */
+  size_t curr_histogram_ix_;
+  /* Offset of the histograms of the previous two block types. */
+  size_t last_histogram_ix_[2];
+  /* Entropy of the previous two block types. */
+  double last_entropy_[2];
+  /* The number of times we merged the current block with the last one. */
+  size_t merge_last_count_;
+} FN(BlockSplitter);
+
+static void FN(InitBlockSplitter)(
+    MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
+    size_t min_block_size, double split_threshold, size_t num_symbols,
+    BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  size_t max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
+  self->alphabet_size_ = alphabet_size;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  self->split_->num_blocks = max_num_blocks;
+  BROTLI_DCHECK(*histograms == 0);
+  *histograms_size = max_num_types;
+  *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
+  /* Clear only current histogram. */
+  FN(HistogramClear)(&self->histograms_[0]);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
+}
+
+/* Does either of three things:
+     (1) emits the current block with a new block type;
+     (2) emits the current block with the type of the second last block;
+     (3) merges the current block with the last block. */
+static void FN(BlockSplitterFinishBlock)(
+    FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
+  BlockSplit* split = self->split_;
+  double* last_entropy = self->last_entropy_;
+  HistogramType* histograms = self->histograms_;
+  self->block_size_ =
+      BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
+  if (self->num_blocks_ == 0) {
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+    last_entropy[0] =
+        BitsEntropy(histograms[0].data_, self->alphabet_size_);
+    last_entropy[1] = last_entropy[0];
+    ++self->num_blocks_;
+    ++split->num_types;
+    ++self->curr_histogram_ix_;
+    if (self->curr_histogram_ix_ < *self->histograms_size_)
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
+    double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
+                                 self->alphabet_size_);
+    HistogramType combined_histo[2];
+    double combined_entropy[2];
+    double diff[2];
+    size_t j;
+    for (j = 0; j < 2; ++j) {
+      size_t last_histogram_ix = self->last_histogram_ix_[j];
+      combined_histo[j] = histograms[self->curr_histogram_ix_];
+      FN(HistogramAddHistogram)(&combined_histo[j],
+          &histograms[last_histogram_ix]);
+      combined_entropy[j] = BitsEntropy(
+          &combined_histo[j].data_[0], self->alphabet_size_);
+      diff[j] = combined_entropy[j] - entropy - last_entropy[j];
+    }
+
+    if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = (uint8_t)split->num_types;
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = entropy;
+      ++self->num_blocks_;
+      ++split->num_types;
+      ++self->curr_histogram_ix_;
+      if (self->curr_histogram_ix_ < *self->histograms_size_)
+        FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      histograms[self->last_histogram_ix_[0]] = combined_histo[1];
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = combined_entropy[1];
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      histograms[self->last_histogram_ix_[0]] = combined_histo[0];
+      last_entropy[0] = combined_entropy[0];
+      if (split->num_types == 1) {
+        last_entropy[1] = last_entropy[0];
+      }
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
+      }
+    }
+  }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types;
+    split->num_blocks = self->num_blocks_;
+  }
+}
+
+/* Adds the next symbol to the current histogram. When the current histogram
+   reaches the target size, decides on merging the block. */
+static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
+  FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
+  }
+}
+
+#undef HistogramType
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/params.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/params.h
new file mode 100755
index 0000000000..54a7f00736
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/params.h
@@ -0,0 +1,46 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Parameters for the Brotli encoder with chosen quality levels. */
+
+#ifndef BROTLI_ENC_PARAMS_H_
+#define BROTLI_ENC_PARAMS_H_
+
+#include <brotli/encode.h>
+#include "./encoder_dict.h"
+
+typedef struct BrotliHasherParams {
+  int type;
+  int bucket_bits;
+  int block_bits;
+  int hash_len;
+  int num_last_distances_to_check;
+} BrotliHasherParams;
+
+typedef struct BrotliDistanceParams {
+  uint32_t distance_postfix_bits;
+  uint32_t num_direct_distance_codes;
+  uint32_t alphabet_size_max;
+  uint32_t alphabet_size_limit;
+  size_t max_distance;
+} BrotliDistanceParams;
+
+/* Encoding parameters */
+typedef struct BrotliEncoderParams {
+  BrotliEncoderMode mode;
+  int quality;
+  int lgwin;
+  int lgblock;
+  size_t stream_offset;
+  size_t size_hint;
+  BROTLI_BOOL disable_literal_context_modeling;
+  BROTLI_BOOL large_window;
+  BrotliHasherParams hasher;
+  BrotliDistanceParams dist;
+  BrotliEncoderDictionary dictionary;
+} BrotliEncoderParams;
+
+#endif  /* BROTLI_ENC_PARAMS_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/prefix.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/prefix.h
new file mode 100644
index 0000000000..fd359a478d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/prefix.h
@@ -0,0 +1,53 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for encoding of integers into prefix codes the amount of extra
+   bits, and the actual values of the extra bits. */
+
+#ifndef BROTLI_ENC_PREFIX_H_
+#define BROTLI_ENC_PREFIX_H_
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Here distance_code is an intermediate code, i.e. one of the special codes or
+   the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
+static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
+                                                   size_t num_direct_codes,
+                                                   size_t postfix_bits,
+                                                   uint16_t* code,
+                                                   uint32_t* extra_bits) {
+  if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
+    *code = (uint16_t)distance_code;
+    *extra_bits = 0;
+    return;
+  } else {
+    size_t dist = ((size_t)1 << (postfix_bits + 2u)) +
+        (distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
+    size_t bucket = Log2FloorNonZero(dist) - 1;
+    size_t postfix_mask = (1u << postfix_bits) - 1;
+    size_t postfix = dist & postfix_mask;
+    size_t prefix = (dist >> bucket) & 1;
+    size_t offset = (2 + prefix) << bucket;
+    size_t nbits = bucket - postfix_bits;
+    *code = (uint16_t)((nbits << 10) |
+        (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
+         ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
+    *extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_PREFIX_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/quality.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/quality.h
new file mode 100644
index 0000000000..5f4d034503
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/quality.h
@@ -0,0 +1,165 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Constants and formulas that affect speed-ratio trade-offs and thus define
+   quality levels. */
+
+#ifndef BROTLI_ENC_QUALITY_H_
+#define BROTLI_ENC_QUALITY_H_
+
+#include "../common/platform.h"
+#include <brotli/encode.h>
+#include "./params.h"
+
+#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
+#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
+#define ZOPFLIFICATION_QUALITY 10
+#define HQ_ZOPFLIFICATION_QUALITY 11
+
+#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
+#define MIN_QUALITY_FOR_BLOCK_SPLIT 4
+#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
+#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
+#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
+#define MIN_QUALITY_FOR_CONTEXT_MODELING 5
+#define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
+#define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
+
+/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
+   so we buffer at most this much literals and commands. */
+#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
+
+/* Returns hash-table size for quality levels 0 and 1. */
+static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
+  return quality == FAST_ONE_PASS_COMPRESSION_QUALITY ? 1 << 15 : 1 << 17;
+}
+
+/* The maximum length for which the zopflification uses distinct distances. */
+#define MAX_ZOPFLI_LEN_QUALITY_10 150
+#define MAX_ZOPFLI_LEN_QUALITY_11 325
+
+/* Do not thoroughly search when a long copy is found. */
+#define BROTLI_LONG_COPY_QUICK_STEP 16384
+
+static BROTLI_INLINE size_t MaxZopfliLen(const BrotliEncoderParams* params) {
+  return params->quality <= 10 ?
+      MAX_ZOPFLI_LEN_QUALITY_10 :
+      MAX_ZOPFLI_LEN_QUALITY_11;
+}
+
+/* Number of best candidates to evaluate to expand Zopfli chain. */
+static BROTLI_INLINE size_t MaxZopfliCandidates(
+  const BrotliEncoderParams* params) {
+  return params->quality <= 10 ? 1 : 5;
+}
+
+static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
+  params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
+      BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
+  if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
+    params->large_window = BROTLI_FALSE;
+  }
+  if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+    params->lgwin = BROTLI_MIN_WINDOW_BITS;
+  } else {
+    int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
+                                           BROTLI_MAX_WINDOW_BITS;
+    if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
+  }
+}
+
+/* Returns optimized lg_block value. */
+static BROTLI_INLINE int ComputeLgBlock(const BrotliEncoderParams* params) {
+  int lgblock = params->lgblock;
+  if (params->quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      params->quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    lgblock = params->lgwin;
+  } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
+    lgblock = 14;
+  } else if (lgblock == 0) {
+    lgblock = 16;
+    if (params->quality >= 9 && params->lgwin > lgblock) {
+      lgblock = BROTLI_MIN(int, 18, params->lgwin);
+    }
+  } else {
+    lgblock = BROTLI_MIN(int, BROTLI_MAX_INPUT_BLOCK_BITS,
+        BROTLI_MAX(int, BROTLI_MIN_INPUT_BLOCK_BITS, lgblock));
+  }
+  return lgblock;
+}
+
+/* Returns log2 of the size of main ring buffer area.
+   Allocate at least lgwin + 1 bits for the ring buffer so that the newly
+   added block fits there completely and we still get lgwin bits and at least
+   read_block_size_bits + 1 bits because the copy tail length needs to be
+   smaller than ring-buffer size. */
+static BROTLI_INLINE int ComputeRbBits(const BrotliEncoderParams* params) {
+  return 1 + BROTLI_MAX(int, params->lgwin, params->lgblock);
+}
+
+static BROTLI_INLINE size_t MaxMetablockSize(
+    const BrotliEncoderParams* params) {
+  int bits =
+      BROTLI_MIN(int, ComputeRbBits(params), BROTLI_MAX_INPUT_BLOCK_BITS);
+  return (size_t)1 << bits;
+}
+
+/* When searching for backward references and have not seen matches for a long
+   time, we can skip some match lookups. Unsuccessful match lookups are very
+   expensive and this kind of a heuristic speeds up compression quite a lot.
+   At first 8 byte strides are taken and every second byte is put to hasher.
+   After 4x more literals stride by 16 bytes, every put 4-th byte to hasher.
+   Applied only to qualities 2 to 9. */
+static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
+    const BrotliEncoderParams* params) {
+  return params->quality < 9 ? 64 : 512;
+}
+
+static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
+                                       BrotliHasherParams* hparams) {
+  if (params->quality > 9) {
+    hparams->type = 10;
+  } else if (params->quality == 4 && params->size_hint >= (1 << 20)) {
+    hparams->type = 54;
+  } else if (params->quality < 5) {
+    hparams->type = params->quality;
+  } else if (params->lgwin <= 16) {
+    hparams->type = params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
+  } else if (params->size_hint >= (1 << 20) && params->lgwin >= 19) {
+    hparams->type = 6;
+    hparams->block_bits = params->quality - 1;
+    hparams->bucket_bits = 15;
+    hparams->hash_len = 5;
+    hparams->num_last_distances_to_check =
+        params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
+  } else {
+    hparams->type = 5;
+    hparams->block_bits = params->quality - 1;
+    hparams->bucket_bits = params->quality < 7 ? 14 : 15;
+    hparams->num_last_distances_to_check =
+        params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
+  }
+
+  if (params->lgwin > 24) {
+    /* Different hashers for large window brotli: not for qualities <= 2,
+       these are too fast for large window. Not for qualities >= 10: their
+       hasher already works well with large window. So the changes are:
+       H3 --> H35: for quality 3.
+       H54 --> H55: for quality 4 with size hint > 1MB
+       H6 --> H65: for qualities 5, 6, 7, 8, 9. */
+    if (hparams->type == 3) {
+      hparams->type = 35;
+    }
+    if (hparams->type == 54) {
+      hparams->type = 55;
+    }
+    if (hparams->type == 6) {
+      hparams->type = 65;
+    }
+  }
+}
+
+#endif  /* BROTLI_ENC_QUALITY_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/ringbuffer.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/ringbuffer.h
new file mode 100644
index 0000000000..8dce148039
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/ringbuffer.h
@@ -0,0 +1,167 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Sliding window over the input data. */
+
+#ifndef BROTLI_ENC_RINGBUFFER_H_
+#define BROTLI_ENC_RINGBUFFER_H_
+
+#include <string.h>  /* memcpy */
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
+   data in a circular manner: writing a byte writes it to:
+     `position() % (1 << window_bits)'.
+   For convenience, the RingBuffer array contains another copy of the
+   first `1 << tail_bits' bytes:
+     buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
+   and another copy of the last two bytes:
+     buffer_[-1] == buffer_[(1 << window_bits) - 1] and
+     buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
+typedef struct RingBuffer {
+  /* Size of the ring-buffer is (1 << window_bits) + tail_size_. */
+  const uint32_t size_;
+  const uint32_t mask_;
+  const uint32_t tail_size_;
+  const uint32_t total_size_;
+
+  uint32_t cur_size_;
+  /* Position to write in the ring buffer. */
+  uint32_t pos_;
+  /* The actual ring buffer containing the copy of the last two bytes, the data,
+     and the copy of the beginning as a tail. */
+  uint8_t* data_;
+  /* The start of the ring-buffer. */
+  uint8_t* buffer_;
+} RingBuffer;
+
+static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
+  rb->cur_size_ = 0;
+  rb->pos_ = 0;
+  rb->data_ = 0;
+  rb->buffer_ = 0;
+}
+
+static BROTLI_INLINE void RingBufferSetup(
+    const BrotliEncoderParams* params, RingBuffer* rb) {
+  int window_bits = ComputeRbBits(params);
+  int tail_bits = params->lgblock;
+  *(uint32_t*)&rb->size_ = 1u << window_bits;
+  *(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
+  *(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
+  *(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
+}
+
+static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
+  BROTLI_FREE(m, rb->data_);
+}
+
+/* Allocates or re-allocates data_ to the given length + plus some slack
+   region before and after. Fills the slack regions with zeros. */
+static BROTLI_INLINE void RingBufferInitBuffer(
+    MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
+  static const size_t kSlackForEightByteHashingEverywhere = 7;
+  uint8_t* new_data = BROTLI_ALLOC(
+      m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
+  size_t i;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_data)) return;
+  if (rb->data_) {
+    memcpy(new_data, rb->data_,
+        2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
+    BROTLI_FREE(m, rb->data_);
+  }
+  rb->data_ = new_data;
+  rb->cur_size_ = buflen;
+  rb->buffer_ = rb->data_ + 2;
+  rb->buffer_[-2] = rb->buffer_[-1] = 0;
+  for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
+    rb->buffer_[rb->cur_size_ + i] = 0;
+  }
+}
+
+static BROTLI_INLINE void RingBufferWriteTail(
+    const uint8_t* bytes, size_t n, RingBuffer* rb) {
+  const size_t masked_pos = rb->pos_ & rb->mask_;
+  if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
+    /* Just fill the tail buffer with the beginning data. */
+    const size_t p = rb->size_ + masked_pos;
+    memcpy(&rb->buffer_[p], bytes,
+        BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
+  }
+}
+
+/* Push bytes into the ring buffer. */
+static BROTLI_INLINE void RingBufferWrite(
+    MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
+  if (rb->pos_ == 0 && n < rb->tail_size_) {
+    /* Special case for the first write: to process the first block, we don't
+       need to allocate the whole ring-buffer and we don't need the tail
+       either. However, we do this memory usage optimization only if the
+       first write is less than the tail size, which is also the input block
+       size, otherwise it is likely that other blocks will follow and we
+       will need to reallocate to the full size anyway. */
+    rb->pos_ = (uint32_t)n;
+    RingBufferInitBuffer(m, rb->pos_, rb);
+    if (BROTLI_IS_OOM(m)) return;
+    memcpy(rb->buffer_, bytes, n);
+    return;
+  }
+  if (rb->cur_size_ < rb->total_size_) {
+    /* Lazily allocate the full buffer. */
+    RingBufferInitBuffer(m, rb->total_size_, rb);
+    if (BROTLI_IS_OOM(m)) return;
+    /* Initialize the last two bytes to zero, so that we don't have to worry
+       later when we copy the last two bytes to the first two positions. */
+    rb->buffer_[rb->size_ - 2] = 0;
+    rb->buffer_[rb->size_ - 1] = 0;
+    /* Initialize tail; might be touched by "best_len++" optimization when
+       ring buffer is "full". */
+    rb->buffer_[rb->size_] = 241;
+  }
+  {
+    const size_t masked_pos = rb->pos_ & rb->mask_;
+    /* The length of the writes is limited so that we do not need to worry
+       about a write */
+    RingBufferWriteTail(bytes, n, rb);
+    if (BROTLI_PREDICT_TRUE(masked_pos + n <= rb->size_)) {
+      /* A single write fits. */
+      memcpy(&rb->buffer_[masked_pos], bytes, n);
+    } else {
+      /* Split into two writes.
+         Copy into the end of the buffer, including the tail buffer. */
+      memcpy(&rb->buffer_[masked_pos], bytes,
+             BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
+      /* Copy into the beginning of the buffer */
+      memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
+             n - (rb->size_ - masked_pos));
+    }
+  }
+  {
+    BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
+    uint32_t rb_pos_mask = (1u << 31) - 1;
+    rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
+    rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
+    rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
+    if (not_first_lap) {
+      /* Wrap, but preserve not-a-first-lap feature. */
+      rb->pos_ |= 1u << 31;
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_RINGBUFFER_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c
new file mode 100644
index 0000000000..7299ab7203
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.c
@@ -0,0 +1,486 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./static_dict.h"
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/transform.h"
+#include "./encoder_dict.h"
+#include "./find_match_length.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - kDictNumBits);
+}
+
+static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
+                                   uint32_t* matches) {
+  uint32_t match = (uint32_t)((distance << 5) + len_code);
+  matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
+}
+
+static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
+                                            const uint8_t* data,
+                                            size_t id,
+                                            size_t len,
+                                            size_t maxlen) {
+  const size_t offset = dictionary->offsets_by_length[len] + len * id;
+  return FindMatchLengthWithLimit(&dictionary->data[offset], data,
+                                  BROTLI_MIN(size_t, len, maxlen));
+}
+
+static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
+    DictWord w, const uint8_t* data, size_t max_length) {
+  if (w.len > max_length) {
+    return BROTLI_FALSE;
+  } else {
+    const size_t offset = dictionary->offsets_by_length[w.len] +
+        (size_t)w.len * (size_t)w.idx;
+    const uint8_t* dict = &dictionary->data[offset];
+    if (w.transform == 0) {
+      /* Match against base dictionary word. */
+      return
+          TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
+    } else if (w.transform == 10) {
+      /* Match against uppercase first transform.
+         Note that there are only ASCII uppercase words in the lookup table. */
+      return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
+              (dict[0] ^ 32) == data[0] &&
+              FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
+              w.len - 1u);
+    } else {
+      /* Match against uppercase all transform.
+         Note that there are only ASCII uppercase words in the lookup table. */
+      size_t i;
+      for (i = 0; i < w.len; ++i) {
+        if (dict[i] >= 'a' && dict[i] <= 'z') {
+          if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
+        } else {
+          if (dict[i] != data[i]) return BROTLI_FALSE;
+        }
+      }
+      return BROTLI_TRUE;
+    }
+  }
+}
+
+BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
+    const BrotliEncoderDictionary* dictionary, const uint8_t* data,
+    size_t min_length, size_t max_length, uint32_t* matches) {
+  BROTLI_BOOL has_found_match = BROTLI_FALSE;
+  {
+    size_t offset = dictionary->buckets[Hash(data)];
+    BROTLI_BOOL end = !offset;
+    while (!end) {
+      DictWord w = dictionary->dict_words[offset++];
+      const size_t l = w.len & 0x1F;
+      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+      const size_t id = w.idx;
+      end = !!(w.len & 0x80);
+      w.len = (uint8_t)l;
+      if (w.transform == 0) {
+        const size_t matchlen =
+            DictMatchLength(dictionary->words, data, id, l, max_length);
+        const uint8_t* s;
+        size_t minlen;
+        size_t maxlen;
+        size_t len;
+        /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
+        if (matchlen == l) {
+          AddMatch(id, l, l, matches);
+          has_found_match = BROTLI_TRUE;
+        }
+        /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
+                      "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
+        if (matchlen >= l - 1) {
+          AddMatch(id + 12 * n, l - 1, l, matches);
+          if (l + 2 < max_length &&
+              data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
+              data[l + 2] == ' ') {
+            AddMatch(id + 49 * n, l + 3, l, matches);
+          }
+          has_found_match = BROTLI_TRUE;
+        }
+        /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
+        minlen = min_length;
+        if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
+        maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
+        for (len = minlen; len <= maxlen; ++len) {
+          size_t cut = l - len;
+          size_t transform_id = (cut << 2) +
+              (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
+          AddMatch(id + transform_id * n, len, l, matches);
+          has_found_match = BROTLI_TRUE;
+        }
+        if (matchlen < l || l + 6 >= max_length) {
+          continue;
+        }
+        s = &data[l];
+        /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
+        if (s[0] == ' ') {
+          AddMatch(id + n, l + 1, l, matches);
+          if (s[1] == 'a') {
+            if (s[2] == ' ') {
+              AddMatch(id + 28 * n, l + 3, l, matches);
+            } else if (s[2] == 's') {
+              if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
+            } else if (s[2] == 't') {
+              if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
+            } else if (s[2] == 'n') {
+              if (s[3] == 'd' && s[4] == ' ') {
+                AddMatch(id + 10 * n, l + 5, l, matches);
+              }
+            }
+          } else if (s[1] == 'b') {
+            if (s[2] == 'y' && s[3] == ' ') {
+              AddMatch(id + 38 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'i') {
+            if (s[2] == 'n') {
+              if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
+            } else if (s[2] == 's') {
+              if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'f') {
+            if (s[2] == 'o') {
+              if (s[3] == 'r' && s[4] == ' ') {
+                AddMatch(id + 25 * n, l + 5, l, matches);
+              }
+            } else if (s[2] == 'r') {
+              if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
+                AddMatch(id + 37 * n, l + 6, l, matches);
+              }
+            }
+          } else if (s[1] == 'o') {
+            if (s[2] == 'f') {
+              if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
+            } else if (s[2] == 'n') {
+              if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'n') {
+            if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
+              AddMatch(id + 80 * n, l + 5, l, matches);
+            }
+          } else if (s[1] == 't') {
+            if (s[2] == 'h') {
+              if (s[3] == 'e') {
+                if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
+              } else if (s[3] == 'a') {
+                if (s[4] == 't' && s[5] == ' ') {
+                  AddMatch(id + 29 * n, l + 6, l, matches);
+                }
+              }
+            } else if (s[2] == 'o') {
+              if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'w') {
+            if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
+              AddMatch(id + 35 * n, l + 6, l, matches);
+            }
+          }
+        } else if (s[0] == '"') {
+          AddMatch(id + 19 * n, l + 1, l, matches);
+          if (s[1] == '>') {
+            AddMatch(id + 21 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + 20 * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + 31 * n, l + 2, l, matches);
+            if (s[2] == 'T' && s[3] == 'h') {
+              if (s[4] == 'e') {
+                if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
+              } else if (s[4] == 'i') {
+                if (s[5] == 's' && s[6] == ' ') {
+                  AddMatch(id + 75 * n, l + 7, l, matches);
+                }
+              }
+            }
+          }
+        } else if (s[0] == ',') {
+          AddMatch(id + 76 * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + 14 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '\n') {
+          AddMatch(id + 22 * n, l + 1, l, matches);
+          if (s[1] == '\t') {
+            AddMatch(id + 50 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == ']') {
+          AddMatch(id + 24 * n, l + 1, l, matches);
+        } else if (s[0] == '\'') {
+          AddMatch(id + 36 * n, l + 1, l, matches);
+        } else if (s[0] == ':') {
+          AddMatch(id + 51 * n, l + 1, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + 57 * n, l + 1, l, matches);
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + 70 * n, l + 2, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + 86 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == 'a') {
+          if (s[1] == 'l' && s[2] == ' ') {
+            AddMatch(id + 84 * n, l + 3, l, matches);
+          }
+        } else if (s[0] == 'e') {
+          if (s[1] == 'd') {
+            if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
+          } else if (s[1] == 'r') {
+            if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
+          } else if (s[1] == 's') {
+            if (s[2] == 't' && s[3] == ' ') {
+              AddMatch(id + 95 * n, l + 4, l, matches);
+            }
+          }
+        } else if (s[0] == 'f') {
+          if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
+            AddMatch(id + 90 * n, l + 4, l, matches);
+          }
+        } else if (s[0] == 'i') {
+          if (s[1] == 'v') {
+            if (s[2] == 'e' && s[3] == ' ') {
+              AddMatch(id + 92 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'z') {
+            if (s[2] == 'e' && s[3] == ' ') {
+              AddMatch(id + 100 * n, l + 4, l, matches);
+            }
+          }
+        } else if (s[0] == 'l') {
+          if (s[1] == 'e') {
+            if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
+              AddMatch(id + 93 * n, l + 5, l, matches);
+            }
+          } else if (s[1] == 'y') {
+            if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
+          }
+        } else if (s[0] == 'o') {
+          if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
+            AddMatch(id + 106 * n, l + 4, l, matches);
+          }
+        }
+      } else {
+        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+           transform. */
+        const BROTLI_BOOL is_all_caps =
+            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
+        const uint8_t* s;
+        if (!IsMatch(dictionary->words, w, data, max_length)) {
+          continue;
+        }
+        /* Transform "" + kUppercase{First,All} + "" */
+        AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
+        has_found_match = BROTLI_TRUE;
+        if (l + 1 >= max_length) {
+          continue;
+        }
+        /* Transforms "" + kUppercase{First,All} + <suffix> */
+        s = &data[l];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
+        } else if (s[0] == '"') {
+          AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
+          if (s[1] == '>') {
+            AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == ',') {
+          AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '\'') {
+          AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
+          }
+        }
+      }
+    }
+  }
+  /* Transforms with prefixes " " and "." */
+  if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
+    BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
+    size_t offset = dictionary->buckets[Hash(&data[1])];
+    BROTLI_BOOL end = !offset;
+    while (!end) {
+      DictWord w = dictionary->dict_words[offset++];
+      const size_t l = w.len & 0x1F;
+      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+      const size_t id = w.idx;
+      end = !!(w.len & 0x80);
+      w.len = (uint8_t)l;
+      if (w.transform == 0) {
+        const uint8_t* s;
+        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
+          continue;
+        }
+        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
+                      "." + BROTLI_TRANSFORM_IDENTITY + "" */
+        AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
+        has_found_match = BROTLI_TRUE;
+        if (l + 2 >= max_length) {
+          continue;
+        }
+        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
+                      "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
+        */
+        s = &data[l + 1];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
+        } else if (is_space) {
+          if (s[0] == ',') {
+            AddMatch(id + 103 * n, l + 2, l, matches);
+            if (s[1] == ' ') {
+              AddMatch(id + 33 * n, l + 3, l, matches);
+            }
+          } else if (s[0] == '.') {
+            AddMatch(id + 71 * n, l + 2, l, matches);
+            if (s[1] == ' ') {
+              AddMatch(id + 52 * n, l + 3, l, matches);
+            }
+          } else if (s[0] == '=') {
+            if (s[1] == '"') {
+              AddMatch(id + 81 * n, l + 3, l, matches);
+            } else if (s[1] == '\'') {
+              AddMatch(id + 98 * n, l + 3, l, matches);
+            }
+          }
+        }
+      } else if (is_space) {
+        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+           transform. */
+        const BROTLI_BOOL is_all_caps =
+            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
+        const uint8_t* s;
+        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
+          continue;
+        }
+        /* Transforms " " + kUppercase{First,All} + "" */
+        AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
+        has_found_match = BROTLI_TRUE;
+        if (l + 2 >= max_length) {
+          continue;
+        }
+        /* Transforms " " + kUppercase{First,All} + <suffix> */
+        s = &data[l + 1];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
+        } else if (s[0] == ',') {
+          if (!is_all_caps) {
+            AddMatch(id + 109 * n, l + 2, l, matches);
+          }
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
+          }
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
+          }
+        }
+      }
+    }
+  }
+  if (max_length >= 6) {
+    /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
+    if ((data[1] == ' ' &&
+         (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
+        (data[0] == 0xC2 && data[1] == 0xA0)) {
+      size_t offset = dictionary->buckets[Hash(&data[2])];
+      BROTLI_BOOL end = !offset;
+      while (!end) {
+        DictWord w = dictionary->dict_words[offset++];
+        const size_t l = w.len & 0x1F;
+        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+        const size_t id = w.idx;
+        end = !!(w.len & 0x80);
+        w.len = (uint8_t)l;
+        if (w.transform == 0 &&
+            IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
+          if (data[0] == 0xC2) {
+            AddMatch(id + 102 * n, l + 2, l, matches);
+            has_found_match = BROTLI_TRUE;
+          } else if (l + 2 < max_length && data[l + 2] == ' ') {
+            size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
+            AddMatch(id + t * n, l + 3, l, matches);
+            has_found_match = BROTLI_TRUE;
+          }
+        }
+      }
+    }
+  }
+  if (max_length >= 9) {
+    /* Transforms with prefixes " the " and ".com/" */
+    if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
+         data[3] == 'e' && data[4] == ' ') ||
+        (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
+         data[3] == 'm' && data[4] == '/')) {
+      size_t offset = dictionary->buckets[Hash(&data[5])];
+      BROTLI_BOOL end = !offset;
+      while (!end) {
+        DictWord w = dictionary->dict_words[offset++];
+        const size_t l = w.len & 0x1F;
+        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+        const size_t id = w.idx;
+        end = !!(w.len & 0x80);
+        w.len = (uint8_t)l;
+        if (w.transform == 0 &&
+            IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
+          AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
+          has_found_match = BROTLI_TRUE;
+          if (l + 5 < max_length) {
+            const uint8_t* s = &data[l + 5];
+            if (data[0] == ' ') {
+              if (l + 8 < max_length &&
+                  s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
+                AddMatch(id + 62 * n, l + 9, l, matches);
+                if (l + 12 < max_length &&
+                    s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
+                  AddMatch(id + 73 * n, l + 13, l, matches);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return has_found_match;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.h
new file mode 100644
index 0000000000..6b5d4eb0c9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict.h
@@ -0,0 +1,40 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Class to model the static dictionary. */
+
+#ifndef BROTLI_ENC_STATIC_DICT_H_
+#define BROTLI_ENC_STATIC_DICT_H_
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./encoder_dict.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
+static const uint32_t kInvalidMatch = 0xFFFFFFF;
+
+/* Matches data against static dictionary words, and for each length l,
+   for which a match is found, updates matches[l] to be the minimum possible
+     (distance << 5) + len_code.
+   Returns 1 if matches have been found, otherwise 0.
+   Prerequisites:
+     matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
+     all elements are initialized to kInvalidMatch */
+BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* data, size_t min_length, size_t max_length,
+    uint32_t* matches);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_STATIC_DICT_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict_lut.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict_lut.h
new file mode 100644
index 0000000000..e299cda6d8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/static_dict_lut.h
@@ -0,0 +1,5864 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Lookup table for static dictionary and transforms. */
+
+#ifndef BROTLI_ENC_STATIC_DICT_LUT_H_
+#define BROTLI_ENC_STATIC_DICT_LUT_H_
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct DictWord {
+  /* Highest bit is used to indicate end of bucket. */
+  uint8_t len;
+  uint8_t transform;
+  uint16_t idx;
+} DictWord;
+
+static const int kDictNumBits = 15;
+static const uint32_t kDictHashMul32 = 0x1E35A7BD;
+
+static const uint16_t kStaticDictionaryBuckets[32768] = {
+1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29,
+0,53,0,0,0,0,0,0,55,0,0,0,0,0,0,61,76,0,0,0,94,0,0,0,0,0,0,96,0,97,0,98,0,0,0,0,
+0,0,0,99,101,106,108,0,0,0,0,0,110,0,111,112,0,113,118,124,0,0,0,0,0,125,128,0,0
+,0,0,129,0,0,131,0,0,0,0,0,0,132,0,0,135,0,0,0,137,0,0,0,0,0,138,139,0,0,0,0,0,0
+,0,142,143,144,0,0,0,0,0,145,0,0,0,146,149,151,152,0,0,153,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,154,0,0,0,0,0,0,155,0,0,0,0,160,182,0,0,0,0,0,0,183,0,0,0,188,189,0,0,
+192,0,0,0,0,0,0,194,0,0,0,0,0,0,0,0,197,202,209,0,0,210,0,224,0,0,0,225,0,0,0,0,
+0,0,0,0,0,0,231,0,0,0,232,0,240,0,0,242,0,0,0,0,0,0,0,0,0,0,0,244,0,0,0,246,0,0,
+249,251,253,0,0,0,0,0,258,0,0,261,263,0,0,0,267,0,0,268,0,269,0,0,0,0,0,0,0,0,0,
+271,0,0,0,0,0,0,272,0,273,0,277,0,278,286,0,0,0,0,287,0,289,290,291,0,0,0,295,0,
+0,296,297,0,0,0,0,0,0,0,0,0,0,298,0,0,0,299,0,0,305,0,324,0,0,0,0,0,327,0,328,
+329,0,0,0,0,336,0,0,340,0,341,342,343,0,0,346,0,348,0,0,0,0,0,0,349,351,0,0,355,
+0,363,0,364,0,368,369,0,370,0,0,0,0,0,0,0,372,0,0,0,0,0,0,0,0,0,0,0,373,0,375,0,
+0,0,0,376,377,0,0,394,395,396,0,0,398,0,0,0,0,400,0,0,408,0,0,0,0,420,0,0,0,0,0,
+0,421,0,0,422,423,0,0,429,435,436,442,0,0,443,0,444,445,453,456,0,457,0,0,0,0,0,
+458,0,0,0,459,0,0,0,460,0,462,463,465,0,0,0,0,0,0,466,469,0,0,0,0,0,0,470,0,0,0,
+474,0,476,0,0,0,0,483,0,485,0,0,0,486,0,0,488,491,492,0,0,497,499,500,0,501,0,0,
+0,505,0,0,506,0,0,0,507,0,0,0,509,0,0,0,0,511,512,519,0,0,0,0,0,0,529,530,0,0,0,
+534,0,0,0,0,543,0,0,0,0,0,0,0,0,0,553,0,0,0,0,557,560,0,0,0,0,0,0,561,0,564,0,0,
+0,0,0,0,565,566,0,575,0,619,0,620,0,0,623,624,0,0,0,625,0,0,626,627,0,0,628,0,0,
+0,0,630,0,631,0,0,0,0,0,0,0,0,0,641,0,0,0,0,643,656,668,0,0,0,673,0,0,0,674,0,0,
+0,0,0,0,0,0,682,0,687,0,690,0,693,699,700,0,0,0,0,0,0,704,705,0,0,0,0,707,710,0,
+711,0,0,0,0,726,0,0,729,0,0,0,730,731,0,0,0,0,0,752,0,0,0,762,0,763,0,0,767,0,0,
+0,770,774,0,0,775,0,0,0,0,0,0,0,0,0,0,776,0,0,0,777,783,0,0,0,785,788,0,0,0,0,
+790,0,0,0,793,0,0,0,0,794,0,0,804,819,821,0,827,0,0,0,834,0,0,835,0,0,0,841,0,
+844,0,850,851,859,0,860,0,0,0,0,0,0,0,874,0,876,0,877,890,0,0,0,0,0,0,0,0,893,
+894,898,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,899,0,0,0,900,904,906,0,0,0,907,0,908,909,
+0,910,0,0,0,0,911,0,0,0,0,0,916,0,0,0,922,925,0,930,0,934,0,0,0,0,0,943,0,0,944,
+0,953,954,0,0,0,0,0,0,955,0,962,963,0,0,976,0,0,977,978,979,980,0,981,0,0,0,0,
+984,0,0,985,0,0,987,989,991,0,0,0,0,0,0,0,0,0,992,0,0,0,993,0,0,0,0,0,0,996,0,0,
+0,1000,0,0,0,0,0,1002,0,0,0,0,1005,1007,0,0,0,1009,0,0,0,1010,0,0,0,0,0,0,1011,0
+,1012,0,0,0,0,1014,1016,0,0,0,1020,0,1021,0,0,0,0,1022,0,0,0,1024,0,0,0,0,0,0,
+1025,0,0,1026,1027,0,0,0,0,0,1031,0,1033,0,0,0,0,1034,0,0,0,1037,1040,0,0,0,1042
+,1043,0,0,1053,0,1054,0,0,1057,0,0,0,1058,0,0,1060,0,0,0,0,0,0,0,1061,0,0,1062,0
+,0,0,0,1063,0,0,0,0,1064,0,0,0,0,0,1065,0,0,0,0,1066,1067,0,0,0,1069,1070,1072,0
+,0,0,0,0,0,1073,0,1075,0,0,0,0,0,0,1080,1084,0,0,0,0,1088,0,0,0,0,0,0,1094,0,
+1095,0,1107,0,0,0,1112,1114,0,1119,0,1122,0,0,1126,0,1129,0,1130,0,0,0,0,0,1132,
+0,0,0,0,0,0,1144,0,0,1145,1146,0,1148,1149,0,0,1150,1151,0,0,0,0,1152,0,1153,0,0
+,0,0,0,1154,0,1163,0,0,0,1164,0,0,0,0,0,1165,0,1167,0,1170,0,0,0,0,0,1171,1172,0
+,0,0,0,0,0,0,0,1173,1175,1177,0,1186,0,0,0,0,0,0,0,0,0,0,1195,0,0,1221,0,0,1224,
+0,0,1227,0,0,0,0,0,1228,1229,0,0,1230,0,0,0,0,0,0,0,0,0,1231,0,0,0,1233,0,0,1243
+,1244,1246,1248,0,0,0,0,1254,1255,1258,1259,0,0,0,1260,0,0,1261,0,0,0,1262,1264,
+0,0,1265,0,0,0,0,0,0,0,0,0,0,0,0,1266,0,1267,0,0,0,0,1273,1274,1276,1289,0,0,
+1291,1292,1293,0,0,1294,1295,1296,0,0,0,0,1302,0,1304,0,0,0,0,0,0,0,0,0,1311,
+1312,0,1314,0,1316,1320,1321,0,0,0,0,0,0,0,1322,1323,1324,0,1335,0,1336,0,0,0,0,
+1341,1342,0,1346,0,1357,0,0,0,1358,1360,0,0,0,0,0,0,1361,0,0,0,1362,1365,0,1366,
+0,0,0,0,0,0,0,1379,0,0,0,0,0,0,0,0,0,0,0,0,1386,0,1388,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,1395,0,0,0,0,1403,0,1405,0,0,1407,0,0,0,0,0,1408,1409,0,1410,0,0,0,1412,1413,
+1416,0,0,1429,1451,0,0,1454,0,0,0,0,0,0,0,1455,0,0,0,0,0,0,0,1456,0,0,0,0,1459,
+1460,1461,1475,0,0,0,0,0,0,1477,0,1480,0,1481,0,0,1486,0,0,1495,0,0,0,1496,0,0,
+1498,1499,1501,1520,1521,0,0,0,1526,0,0,0,0,1528,1529,0,1533,1536,0,0,0,1537,
+1538,1549,0,1550,1558,1559,1572,0,1573,0,0,0,0,0,0,0,0,0,1575,0,0,0,0,0,1579,0,
+1599,0,1603,0,1604,0,1605,0,0,0,0,0,1608,1610,0,0,0,0,1611,0,1615,0,1616,1618,0,
+1619,0,0,1622,0,0,0,0,1634,0,0,0,1635,0,0,0,1641,0,0,0,0,0,0,0,0,0,1643,0,0,0,
+1650,0,0,1652,0,0,0,0,0,1653,0,0,0,1654,0,0,0,0,1655,0,1662,0,0,1663,1664,0,0,
+1668,0,0,1669,1670,0,1672,1673,0,0,0,0,0,1674,0,0,0,1675,1676,1680,0,1682,0,0,
+1687,0,0,0,0,0,1704,0,0,1705,0,0,1721,0,0,0,0,1734,1735,0,0,0,0,1737,0,0,0,0,
+1739,0,0,1740,0,0,0,0,0,0,0,0,0,0,1741,1743,0,0,0,0,1745,0,0,0,1749,0,0,0,1751,0
+,0,0,0,0,0,1760,0,0,0,0,1765,0,0,0,0,0,1784,0,1785,1787,0,0,0,0,1788,1789,0,0,0,
+0,1790,1791,1793,0,1798,1799,0,0,0,0,1801,0,1803,1805,0,0,0,1806,1811,0,1812,
+1814,0,1821,0,0,0,0,0,1822,1833,0,0,0,0,0,0,1848,0,0,0,0,0,0,1857,0,0,0,1859,0,0
+,0,0,1861,0,0,0,0,0,0,0,1866,0,1921,1925,0,0,0,1929,1930,0,0,0,0,0,0,0,0,0,1931,
+0,0,0,0,1932,0,0,0,1934,0,0,0,0,0,0,0,0,1946,0,0,1948,0,0,0,0,1950,0,1957,0,1958
+,0,0,0,0,0,1965,1967,0,0,0,0,1968,0,1969,0,1971,1972,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,1973,0,0,0,0,1975,0,0,0,0,1976,1979,0,1982,0,0,0,0,1984,1988,0,0,0,0,1990,
+2004,2008,0,0,0,2012,2013,0,0,0,0,0,0,0,0,0,0,2015,0,2016,2017,0,0,0,0,2021,0,0,
+2025,0,0,0,0,0,2029,2036,2040,0,2042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2043,0,0,0,0,0,
+2045,0,0,0,0,0,0,0,2046,2047,0,2048,2049,0,2059,0,0,2063,0,2064,2065,0,0,2066,0,
+0,0,0,0,0,2069,0,0,0,0,2070,0,2071,0,2072,0,0,0,0,2080,2082,2083,0,0,0,0,0,2085,
+0,2086,2088,2089,2105,0,0,0,0,2107,0,0,2116,2117,0,2120,0,0,2122,0,0,0,0,0,2123,
+0,0,2125,2127,2128,0,0,0,2130,0,0,0,2137,2139,2140,2141,0,0,0,0,0,0,0,0,0,2144,
+2145,0,0,2146,2149,0,0,0,0,2150,0,0,2151,2158,0,2159,0,2160,0,0,0,0,0,0,2161,
+2162,0,0,2194,2202,0,0,0,0,0,0,2205,2217,0,2220,0,2221,0,2222,2224,0,0,0,0,2237,
+0,0,0,0,0,2238,0,2239,2241,0,0,2242,0,0,0,0,0,2243,0,0,0,0,0,0,2252,0,0,2253,0,0
+,0,2257,2258,0,0,0,2260,0,0,0,0,0,0,0,2262,0,2264,0,0,0,0,0,2269,2270,0,0,0,0,0,
+0,0,0,0,2271,0,2273,0,0,0,0,2277,0,0,0,0,2278,0,0,0,0,2279,0,2280,0,2283,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2287,0,0,0,0,0,0,0,2289,2290,0,0,0,0,2291,0,2292,0,
+0,0,2293,2295,2296,0,0,0,0,0,0,0,2298,0,0,0,0,0,2303,0,2305,0,0,2306,0,2307,0,0,
+0,0,0,0,0,0,0,0,0,0,2313,2314,2315,2316,0,0,2318,0,2319,0,2322,0,0,2323,0,2324,0
+,2326,0,0,0,0,0,0,0,2335,0,2336,2338,2339,0,2340,0,0,0,2355,0,2375,0,2382,2386,0
+,2387,0,0,2394,0,0,0,0,2395,0,2397,0,0,0,0,0,2398,0,0,0,0,0,0,0,2399,2402,2404,
+2408,2411,0,0,0,2413,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2415,0,0,2416,2417,2419,0,2420,
+0,0,0,0,0,2425,0,0,0,2426,0,0,0,0,0,0,0,0,0,0,0,0,2427,2428,0,2429,0,0,2430,2434
+,0,2436,0,0,0,0,0,0,2441,2442,0,2445,0,0,2446,2457,0,2459,0,0,2462,0,2464,0,2477
+,0,2478,2486,0,0,0,2491,0,0,2493,0,0,2494,0,2495,0,2513,2523,0,0,0,0,2524,0,0,0,
+0,0,0,2528,2529,2530,0,0,2531,0,2533,0,0,2534,2535,0,2536,2537,0,2538,0,2539,
+2540,0,0,0,2545,2546,0,0,0,0,0,0,0,2548,0,0,2549,0,2550,2555,0,0,0,0,0,2557,0,
+2560,0,0,0,0,0,0,0,0,0,0,0,2561,0,2576,0,0,0,0,0,0,0,0,0,2577,2578,0,0,0,2579,0,
+0,0,0,0,0,0,2580,0,0,0,0,2581,0,0,0,0,2583,0,2584,0,2588,2590,0,0,0,2591,0,0,0,0
+,2593,2594,0,2595,0,2601,2602,0,0,2603,0,2605,0,0,0,2606,2607,2611,0,2615,0,0,0,
+2617,0,0,0,0,0,0,0,0,0,0,0,0,0,2619,0,0,2620,0,0,0,2621,0,2623,0,2625,0,0,2628,
+2629,0,0,2635,2636,2637,0,0,2639,0,0,0,2642,0,0,0,0,2643,0,2644,0,2649,0,0,0,0,0
+,0,2655,2656,0,0,2657,0,0,0,0,0,2658,0,0,0,0,0,2659,0,0,0,0,2664,2685,0,2687,0,
+2688,0,0,2689,0,0,2694,0,2695,0,0,2698,0,2701,2706,0,0,0,2707,0,2709,2710,2711,0
+,0,0,2720,2730,2735,0,0,0,0,2738,2740,0,0,0,0,2747,0,0,0,0,0,0,2748,0,0,2749,0,0
+,0,0,0,2750,0,0,2752,2754,0,0,0,0,0,2758,0,0,0,0,2762,0,0,0,0,2763,0,0,0,0,0,0,0
+,2764,2767,0,0,0,0,2768,0,0,2770,0,0,0,0,0,0,0,2771,0,0,0,0,0,0,0,0,0,2772,0,0,0
+,0,0,2773,2776,0,0,2783,0,0,2784,0,2789,0,2790,0,0,0,2792,0,0,0,0,0,0,0,0,0,0,
+2793,2795,0,0,0,0,0,0,2796,0,0,0,0,0,0,2797,2799,0,0,0,0,2803,0,0,0,0,2806,0,
+2807,2808,2817,2819,0,0,0,0,0,2821,0,0,0,0,2822,2823,0,0,0,0,0,0,0,2824,0,0,2828
+,0,2834,0,0,0,0,0,0,2836,0,2838,0,0,2839,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2841,
+0,0,0,2842,0,0,0,0,0,2843,2844,0,0,0,0,2846,0,0,2847,0,2849,0,2853,0,0,0,0,0,
+2857,0,0,0,0,2858,0,2859,0,0,2860,0,2862,2868,0,0,0,0,2875,0,2876,0,0,2877,2878,
+2884,2889,2890,0,0,2891,0,0,2892,0,0,0,2906,2912,0,2913,0,0,0,0,0,0,0,0,2916,0,
+2934,0,0,0,0,0,2935,0,0,0,0,2939,0,2940,0,0,0,0,0,0,0,2941,0,0,0,2946,0,2949,0,0
+,2950,2954,2955,0,0,0,2959,2961,0,0,2962,0,2963,0,0,0,0,0,0,2964,2965,2966,2967,
+0,0,0,0,0,0,0,2969,0,0,0,0,0,2970,2975,0,2982,2983,2984,0,0,0,0,0,2989,0,0,2990,
+0,0,0,0,0,0,0,2991,0,0,0,0,0,0,0,0,2998,0,3000,3001,0,0,3002,0,0,0,3003,0,0,3012
+,0,0,3022,0,0,3024,0,0,3025,3027,0,0,0,3030,0,0,0,0,3034,3035,0,0,3036,0,3039,0,
+3049,0,0,3050,0,0,0,0,0,0,3051,0,3053,0,0,0,0,3057,0,3058,0,0,0,0,0,0,0,0,3063,0
+,0,3073,3074,3078,3079,0,3080,3086,0,0,0,0,0,0,0,0,3087,0,3092,0,3095,0,3099,0,0
+,0,3100,0,3101,3102,0,3122,0,0,0,3124,0,3125,0,0,0,0,0,0,3132,3134,0,0,3136,0,0,
+0,0,0,0,0,3147,0,0,3149,0,0,0,0,0,3150,3151,3152,0,0,0,0,3158,0,0,3160,0,0,3161,
+0,0,3162,0,3163,3166,3168,0,0,3169,3170,0,0,3171,0,0,0,0,0,0,0,3182,0,3184,0,0,
+3188,0,0,3194,0,0,0,0,0,0,3204,0,0,0,0,3209,0,0,0,0,0,0,0,0,0,0,0,3216,3217,0,0,
+0,0,0,0,0,3219,0,0,3220,3222,0,3223,0,0,0,0,3224,0,3225,3226,0,3228,3233,0,3239,
+3241,3242,0,0,3251,3252,3253,3255,0,0,0,0,0,0,0,0,3260,0,0,3261,0,0,0,3267,0,0,0
+,0,0,0,0,0,3271,0,0,0,3278,0,3282,0,0,0,3284,0,0,0,3285,3286,0,0,0,0,0,0,0,3287,
+3292,0,0,0,0,3294,3296,0,0,3299,3300,3301,0,3302,0,0,0,0,0,3304,3306,0,0,0,0,0,0
+,3308,0,0,0,0,0,0,0,0,0,3311,0,0,0,0,0,0,0,0,3312,3314,3315,0,3318,0,0,0,0,0,0,0
+,0,3319,0,0,0,0,0,3321,0,0,0,0,0,0,0,0,0,3322,0,0,3324,3325,0,0,3326,0,0,3328,
+3329,3331,0,0,3335,0,0,3337,0,3338,0,0,0,0,3343,3347,0,0,0,3348,0,0,3351,0,0,0,0
+,0,0,3354,0,0,0,0,0,0,0,0,0,0,3355,0,0,3365,3366,3367,0,0,0,0,0,0,3368,3369,0,
+3370,0,0,3373,0,0,3376,0,0,3377,0,3379,3387,0,0,0,0,0,3390,0,0,0,0,0,0,0,3402,0,
+3403,3436,3437,3439,0,0,3441,0,0,0,3442,0,0,3449,0,0,0,3450,0,0,0,0,0,0,0,3451,0
+,0,3452,0,3453,3456,0,3457,0,0,3458,0,3459,0,0,0,0,0,0,0,0,0,3460,0,0,3469,3470,
+0,0,3475,0,0,0,3480,3487,3489,0,3490,0,0,3491,3499,0,3500,0,0,3501,0,0,0,3502,0,
+3514,0,0,0,3516,3517,0,0,0,3518,0,0,0,0,3520,3521,3522,0,0,3526,3530,0,0,0,0,
+3531,0,0,0,0,3536,0,0,0,0,0,0,0,3539,3541,0,0,3542,3544,0,3547,3548,0,0,3550,0,
+3553,0,0,0,0,0,0,0,3554,0,3555,0,3558,0,3559,0,0,0,0,0,0,0,0,3563,0,3581,0,0,0,
+3599,0,0,0,3600,0,3601,0,3602,3603,0,0,3606,3608,0,3610,3611,0,0,0,0,0,0,0,0,0,
+3612,3616,3619,0,0,0,0,0,0,0,0,0,0,0,0,0,3624,3628,0,3629,3634,3635,0,0,0,0,0,0,
+3636,0,3637,0,0,3638,3651,0,0,0,0,0,0,3652,3653,0,0,0,0,3656,3657,0,0,0,0,0,3658
+,0,0,0,0,3659,0,3661,3663,3664,0,3665,0,3692,0,0,0,3694,3696,0,0,0,0,0,0,0,0,0,0
+,0,0,3698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3700,0,0,3701,0,0,0,3708,3709,0,0,0,3711
+,3712,0,0,0,0,0,3723,0,3724,3725,0,0,3726,0,0,0,0,0,0,3728,3729,0,3734,3735,3737
+,0,0,0,3743,0,3745,0,0,3746,0,0,3747,3748,0,3757,0,3759,3766,3767,0,3768,0,0,0,0
+,3769,0,0,3771,0,3774,0,0,0,0,0,0,3775,0,0,0,0,0,0,3776,0,3777,3786,0,3788,3789,
+0,0,0,0,0,0,0,0,0,3791,0,3811,0,0,0,0,0,3814,3815,3816,3820,0,0,0,0,0,0,0,3821,0
+,0,3825,0,0,0,0,3835,0,0,3848,3849,0,0,0,0,3850,3851,3853,0,0,0,0,3859,0,3860,
+3862,0,0,0,0,0,3863,0,0,0,0,0,0,0,0,3873,0,3874,0,3875,3886,0,3887,0,0,0,0,3892,
+3913,0,3914,0,0,0,3925,3931,0,0,0,0,3934,3941,3942,0,0,0,0,3943,0,0,0,3944,0,0,0
+,0,0,3945,0,3947,0,0,0,3956,3957,0,0,0,0,0,0,0,0,0,3958,0,3959,3965,0,0,0,0,3966
+,0,0,0,3967,0,0,0,3968,3974,0,0,0,0,0,3975,3977,3978,0,0,0,0,3980,0,3985,0,0,0,0
+,0,0,0,0,3986,4011,0,0,4017,0,0,0,0,0,0,0,0,0,0,0,4018,0,0,0,0,4019,0,4023,0,0,0
+,4027,4028,0,0,0,0,0,0,0,0,4031,4034,0,0,4035,4037,4039,4040,0,0,0,0,0,4059,0,
+4060,4061,0,4062,4063,4066,0,0,4072,0,0,0,0,0,0,0,0,0,0,0,0,0,4088,0,0,0,0,0,
+4091,0,0,0,0,4094,4095,0,0,4096,0,0,0,0,0,4098,4099,0,0,0,4101,0,4104,0,0,0,4105
+,4108,0,4113,0,0,4115,4116,0,4126,0,0,4127,0,0,0,0,0,0,0,4128,4132,4133,0,4134,0
+,0,0,4137,0,0,4141,0,0,0,0,4144,4146,4147,0,0,0,0,4148,0,0,4311,0,0,0,4314,4329,
+0,4331,4332,0,4333,0,4334,0,0,0,4335,0,4336,0,0,0,4337,0,0,0,4342,4345,4346,4350
+,0,4351,4352,0,4354,4355,0,0,4364,0,0,0,0,4369,0,0,0,4373,0,4374,0,0,0,0,4377,0,
+0,0,0,4378,0,0,0,4380,0,0,0,4381,4382,0,0,0,0,0,0,0,4384,0,0,0,0,4385,0,0,0,4386
+,0,0,0,4391,4398,0,0,0,0,4407,4409,0,0,0,0,4410,0,0,4411,0,4414,4415,4418,0,4427
+,4428,4430,0,4431,0,4448,0,0,0,0,0,4449,0,0,0,4451,4452,0,4453,4454,0,4456,0,0,0
+,0,0,0,0,4459,0,4463,0,0,0,0,0,4466,0,4467,0,4469,0,0,0,0,0,0,0,0,0,0,0,0,0,4470
+,4471,0,4473,0,0,4475,0,0,0,0,4477,4478,0,0,0,4479,4481,0,4482,0,4484,0,0,0,0,0,
+0,0,4486,0,0,4488,0,0,4497,0,4508,0,0,4510,4511,0,4520,4523,0,4524,0,4525,0,4527
+,0,0,4528,0,0,0,0,4530,0,4531,0,0,4532,0,0,0,4533,0,0,0,0,0,4535,0,0,0,4536,0,0,
+0,0,0,4541,4543,4544,4545,4547,0,4548,0,0,0,0,4550,4551,0,4553,0,0,0,0,4562,0,0,
+4571,0,0,0,4574,0,0,0,4575,0,4576,0,4577,0,0,0,4581,0,0,0,0,0,4582,0,0,4586,0,0,
+0,4588,0,0,4597,0,4598,0,0,0,0,4616,4617,0,4618,0,0,0,0,4619,0,4620,0,0,4621,0,
+4624,0,0,0,0,0,4625,0,0,0,0,4657,0,4659,0,4667,0,0,0,4668,4670,0,4672,0,0,0,0,0,
+4673,4676,0,0,0,0,4687,0,0,0,0,4697,0,0,0,0,4699,0,4701,0,0,0,0,4702,0,0,4706,0,
+0,4713,0,0,0,4714,4715,4716,0,0,0,0,0,0,0,0,0,0,0,0,4717,0,0,4720,0,4721,4729,
+4735,0,0,0,4737,0,0,0,4739,0,0,0,4740,0,0,0,4741,0,0,0,0,0,4742,0,4745,4746,4747
+,0,0,0,0,0,0,0,0,4748,0,0,0,4749,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4751,
+4786,0,4787,0,4788,4796,0,0,4797,4798,0,4799,4806,4807,0,0,0,0,4809,4810,0,0,0,0
+,0,0,4811,0,0,0,0,0,4812,0,4813,0,0,4815,0,4821,4822,0,0,0,0,4823,0,0,0,0,0,0,0,
+0,0,0,4824,0,0,0,0,4826,0,0,0,4828,0,4829,0,0,0,4843,0,0,4847,0,4853,4855,4858,0
+,0,0,0,0,4859,0,4864,0,0,4879,0,0,0,0,4880,0,0,0,0,4881,0,4882,0,0,0,0,0,0,0,0,0
+,4883,0,0,0,0,4884,0,0,0,0,0,4886,4887,4888,4894,4896,0,4902,0,0,4905,0,0,4915,0
+,0,0,0,0,0,0,4916,4917,4919,4921,0,0,0,0,0,4926,0,0,0,0,4927,0,0,0,0,0,0,0,0,
+4929,0,4930,4931,0,4938,0,4952,0,4953,4957,4960,4964,0,0,0,0,0,0,0,5019,5020,
+5022,0,0,0,0,0,5023,0,0,0,5024,0,0,0,5025,0,0,0,0,5028,0,0,0,0,5029,5030,5031,0,
+5033,0,0,0,0,0,0,0,0,0,5034,5035,0,5036,0,0,5037,0,0,0,0,5038,0,0,5039,0,0,0,
+5041,5042,0,0,0,0,5044,5049,5054,0,5055,0,5057,0,0,0,5060,0,0,0,0,0,5063,0,5064,
+5065,0,5067,0,0,0,5068,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5076,0,0,0,0,0,0,
+0,5077,0,0,5078,5080,0,0,5083,0,0,0,0,0,0,0,0,5085,0,0,0,0,0,0,5098,5099,5101,
+5105,5107,0,5108,0,5109,0,0,0,0,0,0,0,5110,0,0,0,0,0,5117,5118,0,5121,0,5122,0,0
+,5130,0,0,0,5137,0,0,0,5148,0,0,0,0,0,0,0,5151,5154,0,0,0,5155,0,0,5156,5159,
+5161,0,0,0,0,5162,0,0,0,0,5163,5164,0,5166,0,0,0,0,0,0,0,0,0,0,5167,0,0,0,5172,0
+,0,0,0,0,0,5178,5179,0,0,5190,0,0,5191,5192,5194,0,0,5198,5201,0,0,0,0,0,5203,0,
+5206,5209,0,0,0,0,0,0,5213,0,5214,5216,0,0,0,0,0,5217,0,0,0,0,0,0,0,0,5218,5219,
+0,5231,0,0,5244,5249,0,5254,0,5255,0,0,5257,0,0,0,0,0,5258,0,5260,5270,0,5277,0,
+0,0,0,0,0,5280,5281,5282,5283,0,0,0,0,0,5284,0,5285,0,0,0,0,0,5287,5288,0,0,0,0,
+0,0,0,0,0,0,5289,5291,0,0,5294,0,0,5295,0,0,0,0,0,0,0,5304,0,0,5306,5307,5308,0,
+5309,0,0,5310,0,0,0,0,5311,5312,0,5313,0,0,0,0,0,5316,0,0,0,5317,0,0,0,0,0,0,0,0
+,0,5325,0,0,0,0,0,0,5326,0,5327,5329,0,5332,0,0,0,0,5338,0,0,0,0,0,0,0,0,5340,0,
+0,5341,0,0,0,5342,0,5343,5344,0,0,5345,0,0,0,0,0,0,5347,5348,0,0,0,0,0,0,0,0,0,
+5349,0,5350,0,5354,0,0,0,0,5358,0,0,5359,0,0,5361,0,0,5365,0,5367,0,5373,0,0,0,
+5379,0,0,0,5380,0,0,0,5382,0,5384,0,0,0,0,0,0,5385,0,0,0,0,5387,0,0,0,0,0,0,5388
+,5390,5393,0,0,0,0,0,0,0,0,0,0,0,5396,0,0,0,0,5397,5402,0,0,0,0,0,5403,0,0,0,
+5404,5405,0,0,0,0,0,0,0,0,0,0,0,0,5406,0,0,0,0,5410,0,0,5411,0,5415,0,0,0,0,5416
+,5434,0,0,0,0,0,0,0,0,0,0,0,5438,0,5440,0,0,0,0,0,0,5441,5442,0,0,0,5443,5444,
+5447,0,0,5448,5449,5451,0,0,0,5456,5457,0,0,0,5459,0,0,0,5461,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,5464,0,5466,0,0,5467,0,5470,0,0,5473,0,0,5474,0,0,5476,0,0,0,0,0,0,0,0
+,0,0,0,5477,0,0,0,0,0,0,0,5484,0,0,5485,5486,0,0,0,0,0,5488,0,0,0,0,0,0,0,5489,0
+,0,0,0,0,5507,0,0,0,5510,0,5511,0,0,5512,0,0,0,5513,0,5515,0,0,5516,5517,0,5518,
+0,0,5522,0,0,0,0,0,5534,5535,0,0,5536,0,5538,0,0,5543,0,5544,0,0,5545,0,5547,0,
+5557,0,0,5558,0,5560,5567,0,0,0,0,5568,0,0,0,5571,5573,0,5574,0,5575,0,0,0,0,
+5577,0,0,5598,0,0,0,0,0,0,0,0,0,5600,5609,0,0,0,0,5610,0,0,5612,0,5624,0,5625,0,
+0,0,5629,0,5641,0,5642,5643,0,0,0,0,0,0,5651,0,0,0,5652,5653,0,5661,5662,5678,0,
+5679,0,0,0,0,5685,5686,0,0,0,0,0,5690,5692,0,5703,0,0,0,0,0,5706,0,0,0,0,5707,0,
+0,0,0,0,0,5708,0,0,5709,0,5710,0,0,0,5712,0,5733,0,5734,5735,0,0,5744,5751,0,0,0
+,0,0,0,0,0,0,0,0,0,5752,0,5754,0,0,0,0,0,0,5757,5758,0,5760,5761,0,0,0,0,5763,
+5764,5765,0,5766,0,5767,5768,0,5770,0,0,0,0,5776,5780,0,0,0,0,5782,0,0,0,0,5784,
+0,0,5788,0,0,0,0,0,0,0,0,0,0,0,5797,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5799,0,0,5801,
+0,0,0,5811,0,0,0,0,0,0,5816,0,0,5827,0,0,0,0,0,0,0,0,5830,5831,0,0,5832,0,0,5833
+,0,5835,5844,5845,0,5846,0,0,0,0,0,5850,0,0,0,0,0,5852,0,5855,5857,0,0,5859,0,
+5861,0,0,5863,0,5865,0,0,0,5873,5875,0,0,0,5877,0,5879,0,0,0,5888,0,0,5889,5891,
+0,5894,0,0,0,0,0,0,5895,0,5897,0,0,0,0,0,0,5907,0,5911,0,0,5912,0,5913,5922,5924
+,0,5927,5928,0,0,0,0,5929,5930,0,5933,0,0,0,0,5949,0,0,5951,0,0,0,0,0,0,0,0,5953
+,0,0,5954,0,5959,5960,5961,0,5964,0,0,0,5976,5978,5987,5990,0,0,0,0,0,5991,0,
+5992,0,0,0,5994,5995,0,0,5996,0,0,6001,6003,0,0,0,0,6007,0,0,0,0,0,6008,0,0,6009
+,0,6010,0,0,0,6011,6015,0,6017,0,6019,0,6023,0,0,0,0,0,0,0,6025,0,0,0,0,0,0,0,0,
+0,0,6026,0,6030,0,0,6032,0,0,0,6033,6038,6040,0,0,0,6041,6045,0,0,6046,0,0,6053,
+0,0,6054,0,6055,0,0,0,0,0,0,6057,0,6063,0,0,0,6064,0,6066,6071,6072,0,0,0,0,0,0,
+6075,6076,0,0,6077,0,0,0,0,0,0,0,0,0,6078,6079,0,0,0,0,0,0,0,0,6080,0,6083,0,0,0
+,0,0,6084,0,0,6088,0,6089,0,0,6093,6105,0,0,6107,0,6110,0,0,0,6111,6125,6126,0,0
+,0,6129,0,0,0,0,6130,0,0,0,6131,6134,0,0,0,0,0,0,6142,0,0,0,0,0,6144,0,0,6146,
+6151,6153,0,6156,0,6163,0,6180,6181,0,0,0,0,0,6182,0,0,0,0,6184,6195,0,0,6206,0,
+6208,0,0,6212,6213,6214,0,6215,0,0,0,6228,0,0,0,6234,0,0,0,0,0,0,6235,6240,0,
+6242,6243,6244,0,6250,6255,0,0,0,0,0,6257,0,0,0,6258,6278,0,6284,0,0,0,6285,0,0,
+0,0,0,0,0,0,6286,0,0,0,6320,0,0,6322,6332,0,0,0,0,0,0,0,0,6334,0,0,0,0,0,0,0,
+6335,0,0,6337,0,6338,0,6339,6340,0,0,6356,6357,6369,0,0,0,6370,6371,6372,0,6373,
+0,0,0,0,0,6376,0,0,0,0,0,6382,6383,6384,0,0,0,0,6386,0,6389,6397,6400,6411,0,
+6414,0,0,0,0,0,0,0,6415,6416,0,0,0,0,0,0,6417,0,0,0,0,6418,0,0,0,0,0,0,0,6420,0,
+6421,6423,6425,0,6429,6430,0,6433,6438,0,0,0,0,0,0,0,0,0,0,6439,6440,0,0,6441,0,
+0,6444,0,0,0,0,6446,0,0,0,0,6447,6448,0,0,6450,0,0,0,6454,0,0,6455,0,6461,0,0,0,
+0,0,0,6462,0,0,6463,0,6464,0,6465,6467,0,0,0,6468,0,6479,6480,0,0,0,0,0,0,0,6481
+,0,0,6485,6487,0,0,0,0,0,0,6493,0,0,0,0,0,0,0,0,6494,6495,6496,0,0,0,0,0,6498,0,
+0,0,6507,6508,0,0,0,0,0,0,0,0,0,0,6511,6512,0,0,0,0,6513,0,0,0,6514,0,0,0,0,0,
+6516,0,0,6517,6518,0,0,0,6519,6520,6521,0,6523,0,0,0,0,6524,6528,0,6530,0,0,6532
+,0,6578,0,0,0,6583,0,6584,0,0,0,6587,0,0,0,6590,0,6591,0,0,0,0,0,6592,0,0,0,0,
+6593,6594,0,0,0,0,0,6599,6600,0,0,6601,6602,6604,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6608,0,0,0,0,0,0,0,0,6610,6611,0,6615,0,6616,6618,6620,0,6637,0,0,0,0,6639,0,0,0
+,0,6641,0,6642,0,0,0,6647,0,6660,6663,0,6664,0,6666,6669,0,6675,6676,6677,0,0,0,
+0,0,0,0,0,0,6678,0,0,0,6679,0,6680,0,0,0,0,0,0,0,6693,0,0,0,0,0,0,0,0,0,6704,
+6705,6706,0,0,6711,6713,0,0,0,0,0,6716,0,0,0,6717,0,6719,6724,0,0,0,0,0,0,0,0,
+6725,6726,0,0,0,0,0,6728,6729,6735,0,6737,6742,0,0,6743,6750,0,6751,0,0,6752,
+6753,0,0,0,0,0,0,6754,0,0,0,0,0,6756,0,0,0,0,0,0,6763,0,0,6764,6765,0,0,0,6770,0
+,0,0,6776,6780,0,6781,0,0,0,6783,0,6784,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6785,0,0,0,6792,0,0,0,6793,0,0,6802,0,0,0,0,0,6803,0,0,0,6804,0,0,0,6812,0,0,
+6823,0,6824,6839,0,0,0,0,6852,0,0,6854,0,6856,6857,0,0,0,0,0,0,0,0,0,6867,0,6868
+,6870,6872,0,0,0,6873,6874,0,0,0,0,0,6875,0,0,6877,0,0,0,0,0,0,0,6878,0,0,0,6879
+,0,6880,0,0,0,0,0,0,0,0,0,0,6887,0,6888,6891,6893,0,6895,0,0,0,0,0,0,0,0,6899,0,
+0,0,0,6901,0,0,0,0,6910,0,6911,0,0,6912,0,0,6913,6914,0,0,0,6915,0,0,0,6916,6919
+,0,0,0,0,0,0,6924,0,6925,0,0,0,6926,6927,6928,0,6929,0,6930,0,0,6931,6935,0,6936
+,0,0,0,0,6939,6940,6941,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6942,6948,6949,0,0,0,0,0,0
+,0,6952,6954,6963,6965,6966,0,0,6967,6968,0,0,0,0,0,0,0,0,0,6969,0,0,6970,6979,0
+,0,6980,0,0,6983,0,0,0,0,0,6984,0,0,0,0,0,0,0,6988,6990,6992,0,0,0,0,0,0,0,6995,
+0,0,0,7012,0,0,0,0,0,0,0,0,0,7019,0,0,0,0,0,0,0,0,7021,0,0,7022,7023,7028,0,7030
+,7033,0,0,0,0,0,0,7038,0,0,0,0,0,0,0,0,0,0,7039,0,0,0,0,0,7046,0,7047,0,0,0,0,0,
+0,0,0,0,0,0,7048,7052,0,0,0,0,0,7054,0,7060,0,0,0,0,7061,0,7065,0,0,0,0,7067,
+7069,0,7070,7071,7072,0,0,7078,0,7080,7081,0,7083,0,0,0,7084,7087,7088,0,0,7090,
+0,7093,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7107,0,0,7108,0,0,0,0,0,0,0,0,7110,0,7114,0
+,0,0,0,0,0,0,7115,0,7116,0,0,0,0,0,7117,0,0,7118,0,0,7124,0,7125,0,0,7126,0,0,0,
+0,7128,0,0,0,0,0,7129,0,7130,0,7132,7133,0,0,7134,0,0,7139,0,7148,7150,0,0,0,0,
+7152,0,0,0,7153,7156,7157,0,0,0,0,0,7158,0,0,0,0,0,0,0,0,0,0,7163,7165,7169,0,
+7171,0,0,0,0,0,0,0,0,0,7172,0,7173,7181,0,0,0,0,0,7182,7185,0,0,0,0,7187,0,7201,
+7204,0,0,0,0,0,7206,7207,0,0,0,0,7211,7216,0,7218,0,0,0,0,7226,7228,7230,7232,
+7233,7235,7237,0,0,0,0,7238,7241,0,7242,0,0,7247,0,0,0,7266,0,0,0,0,0,0,0,7289,0
+,0,7290,7291,0,0,7292,0,7297,0,0,0,0,0,0,0,0,0,0,7300,0,7301,0,0,0,0,0,0,0,0,0,0
+,0,0,7302,0,0,0,0,7305,0,0,0,0,7307,0,7308,0,7310,0,7335,0,0,0,0,0,0,0,7337,0,
+7343,7347,0,0,0,0,0,7348,0,7349,7350,7352,7354,0,0,0,0,7357,0,7358,7366,0,7367,
+7368,0,0,7373,0,0,0,7374,0,0,0,0,0,0,0,7376,0,0,0,7377,0,0,0,0,0,7378,0,7379,
+7380,0,0,0,0,0,7383,0,0,7386,0,0,0,0,7398,0,0,0,7399,7400,0,7401,0,0,0,0,0,0,0,
+7402,0,0,0,0,0,7405,0,0,0,0,0,7406,0,0,0,0,0,0,0,0,7421,7427,7429,0,0,0,7435,0,0
+,7436,0,0,0,7437,0,0,0,0,0,0,7438,7443,0,7446,0,7448,0,0,0,0,0,0,0,0,0,0,7456,0,
+0,0,0,0,7457,0,0,7461,0,0,0,0,0,7462,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7463,7466,7472,
+0,7476,0,0,7490,0,7491,0,0,7493,0,0,0,7498,7499,0,0,7508,0,0,0,0,0,7512,0,0,0,
+7513,7514,7516,0,0,0,0,7518,0,0,7519,7521,7522,0,0,0,7526,0,0,7529,0,0,7531,0,
+7536,0,7538,0,7539,0,0,7541,7542,7546,0,0,0,0,0,7547,0,7548,0,0,0,0,0,7550,0,0,
+7552,7553,0,0,0,0,0,0,0,0,0,0,7554,7563,0,7573,0,0,0,0,0,0,7574,7576,0,7578,7581
+,7583,0,0,0,7584,0,7587,0,0,0,0,0,7589,0,0,0,7594,0,0,7595,0,0,7600,7602,7610,0,
+0,0,0,0,7612,0,7613,7614,0,0,7615,0,0,7616,0,7620,0,7621,7622,0,7623,0,0,0,0,
+7626,0,0,0,0,7627,7629,7631,0,0,7633,0,0,0,0,0,7639,0,7640,7642,0,0,7643,0,0,0,0
+,7644,0,0,0,0,0,0,0,7645,0,0,0,0,0,7661,7662,7663,7665,0,7666,0,7667,0,7684,7688
+,7690,0,7691,0,0,0,0,0,0,7692,0,0,7700,0,7707,0,7708,0,7709,0,7721,0,0,0,7722,0,
+7724,0,0,0,0,0,0,7729,7731,0,7732,0,7733,7735,0,0,0,0,0,0,0,7739,0,0,7741,7745,0
+,7748,0,0,0,7751,0,0,0,7752,0,0,0,0,0,0,0,7753,0,0,7756,0,7757,0,7759,0,7760,0,0
+,0,0,7761,7768,0,0,7769,0,0,7770,0,0,7771,0,0,7772,0,0,7773,0,0,0,0,0,7778,7783,
+0,0,0,0,0,7784,7785,0,7790,0,0,0,0,7792,0,7798,0,0,0,0,0,7799,0,7810,0,0,7813,0,
+7814,0,7816,0,7818,7824,7825,7826,0,7828,7830,0,0,0,7840,0,7842,0,7843,0,0,0,0,
+7844,0,0,0,0,0,0,0,7846,0,0,0,0,0,7856,7857,7858,7862,0,7865,0,0,7866,0,0,7913,0
+,0,0,0,7914,0,0,7915,7917,7918,7919,0,7920,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7921,
+7922,0,7924,0,0,7925,0,0,7927,0,7930,7935,0,0,7937,0,0,0,0,0,0,7939,0,7940,0,0,0
+,0,0,7941,0,0,0,0,7945,0,0,0,0,7949,0,0,0,0,0,0,0,0,7950,0,7953,0,0,0,0,0,0,0,
+7968,0,0,0,0,7969,7972,7992,0,7993,0,0,0,0,0,0,0,0,0,0,0,7994,0,0,0,0,8007,8008,
+0,0,0,0,0,0,0,0,0,0,0,0,8010,0,0,0,8012,0,0,0,0,0,0,0,0,8018,0,8028,8029,0,0,
+8030,0,0,8032,8033,0,0,8034,8036,0,0,0,0,0,0,0,0,0,0,8037,0,0,0,8043,8052,8059,
+8060,0,0,8061,0,0,0,8062,0,8063,0,8064,0,8066,8068,0,0,0,8080,8081,0,8089,0,0,0,
+0,0,8092,0,0,0,0,0,0,8093,8110,0,0,0,0,0,0,0,8111,0,0,0,0,0,8112,8115,0,8117,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8120,8121,8122,8128,8129,8130,8131,0,0,8139,0,0,
+8144,0,0,0,0,8145,8146,8153,0,0,0,0,0,0,0,0,8154,0,8157,8160,8162,0,8164,8165,0,
+0,0,0,8166,8167,0,0,8179,0,0,0,8185,0,0,0,8186,0,0,8187,0,0,0,8188,0,0,0,0,0,
+8204,0,0,0,0,8210,0,0,0,0,0,8213,0,8214,0,0,8215,0,0,0,0,0,0,8218,0,0,0,0,0,0,0,
+0,0,8219,0,8221,0,0,8222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8225,0,0,0,8233,0,0,
+8242,0,0,0,0,0,0,0,0,0,0,0,8247,0,8248,8252,0,8256,8257,0,0,8261,0,8264,8265,0,0
+,0,0,8267,0,0,0,8269,0,0,0,0,0,0,0,0,0,8270,0,0,0,8278,0,8279,8283,0,0,8285,8286
+,8289,8292,0,0,0,0,8293,8295,8299,8300,8301,0,0,0,0,0,0,8304,8307,0,0,0,0,0,0,0,
+8321,0,0,0,8322,8323,8325,8326,8327,0,0,8332,8338,0,0,8340,0,0,0,0,0,8350,0,0,
+8351,0,8354,8355,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8360,8372,0,0,0,0,0,0,0,0,8377,0,0,
+0,0,8380,0,0,0,8383,0,8384,0,0,0,0,8386,8392,0,0,8394,0,0,0,0,0,0,0,8396,8397,0,
+8398,0,8399,0,0,0,0,0,8400,0,8401,8410,8411,0,8412,8413,8422,0,0,0,0,8423,0,0,0,
+0,8424,0,0,8425,0,0,0,0,0,0,0,8441,8442,0,0,0,0,0,0,8443,0,0,8444,0,8447,0,0,0,0
+,8451,0,8458,0,8462,0,0,8468,0,8469,0,0,0,8470,0,8473,8479,8480,0,0,0,0,8481,
+8483,0,0,0,0,0,0,0,0,0,8484,0,0,8490,0,0,0,0,0,0,8491,8493,8494,0,8528,0,0,0,0,0
+,0,0,8530,0,0,0,0,0,0,0,0,8534,8538,8540,0,0,8541,0,0,8545,0,8557,0,0,8569,8570,
+0,0,8571,8574,8575,8579,0,8583,0,0,0,0,8591,0,0,0,0,0,0,0,0,8606,0,8607,0,0,0,0,
+0,0,0,0,0,8608,0,0,8609,0,0,0,8610,0,0,0,8611,0,0,8613,8617,8621,0,0,8622,0,8623
+,0,8624,8625,0,0,0,0,0,0,0,0,0,8637,8638,8639,8650,0,0,0,0,8652,8654,8655,0,0,0,
+0,0,0,0,0,0,0,8656,0,0,0,0,0,8657,0,0,0,0,0,0,0,0,0,8658,0,0,8659,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,8660,0,0,0,0,0,0,8661,8663,8664,0,0,0,0,8665,0,8669,0,
+0,0,0,0,0,0,8671,8674,0,8684,0,8686,0,0,0,8689,0,0,0,8690,0,8706,0,0,0,0,0,0,0,0
+,0,0,0,8710,0,8711,8713,8714,8724,8727,8728,8733,8736,0,8737,8739,0,0,0,0,8742,
+8743,8745,8754,0,0,0,0,8756,0,0,0,0,0,0,8757,8760,0,0,0,0,0,8762,8763,8764,0,
+8766,8769,8770,8773,0,8774,0,8779,0,0,0,0,8780,0,0,8781,0,0,8783,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8784,0,0,0,0,0,0,0,0,8785,0,0,0,0,8786,0,0,0,0,8788
+,8790,0,0,0,8803,0,8813,8814,0,0,0,0,0,8815,8816,0,0,0,0,8818,0,0,0,0,8822,8828,
+8829,0,8831,0,0,0,0,8833,0,0,0,8834,0,0,0,8835,0,8836,0,0,0,8837,0,0,0,0,0,0,
+8838,8839,0,0,0,0,0,0,0,0,0,0,0,8840,0,0,0,8841,0,8842,0,0,0,8846,0,0,0,0,0,0,0,
+8847,0,8848,0,0,8864,0,0,8866,0,0,8870,8872,0,0,8873,8874,0,0,0,0,0,0,8875,0,
+8876,0,0,0,0,8896,8900,0,0,0,0,8901,0,0,0,0,0,8904,0,8907,0,0,0,0,8911,8912,8913
+,0,0,0,8914,0,8915,0,0,0,0,0,0,0,0,0,0,0,0,8916,0,0,0,8929,0,0,0,0,0,0,0,0,0,0,
+8930,0,8932,0,8943,0,0,0,8945,8947,0,0,0,0,8949,0,8950,0,8954,8957,0,0,8970,0,0,
+0,0,8971,0,8996,0,0,0,0,8997,9000,0,0,0,0,9001,9002,0,9004,9009,9024,0,0,0,0,0,0
+,0,0,0,0,0,0,9027,9082,0,0,9083,9089,0,0,0,0,0,0,9090,0,0,0,9092,0,0,9093,0,9095
+,0,0,9096,9097,9101,9102,0,0,0,0,0,0,0,0,9112,0,0,0,0,0,0,9114,0,0,9120,0,9121,
+9122,0,0,0,9123,9124,0,0,9125,0,0,9126,0,9127,0,0,9129,9131,0,0,0,9132,0,0,9136,
+0,9144,0,0,9148,0,0,0,0,0,0,9149,0,9152,9163,0,0,9165,0,0,0,0,0,0,0,0,0,0,0,0,0,
+9166,0,9169,0,0,0,0,0,0,0,9170,0,0,0,0,9172,0,9174,9175,9176,0,9177,0,0,0,0,0,0,
+0,0,9186,0,9187,0,0,0,9188,9189,0,0,9190,0,0,0,0,9191,0,0,0,9193,0,0,0,0,9197,
+9198,0,0,0,9208,9211,0,0,0,0,9216,9217,0,9220,0,0,0,0,9221,9222,9223,0,9224,9225
+,0,0,9227,0,9228,9229,0,0,9230,0,9232,0,9233,0,0,0,0,0,9234,9235,0,0,9237,0,0,0,
+0,0,0,0,0,9238,9240,0,0,9241,0,0,0,0,9244,0,0,0,0,9247,0,0,0,0,0,0,0,0,0,0,9248,
+0,0,0,9249,0,0,0,0,0,9250,0,0,0,0,9251,0,0,9252,9255,0,0,0,9256,0,0,0,0,0,0,0,
+9257,0,0,9258,0,0,0,0,0,0,9259,0,0,0,0,0,9262,9263,0,0,9265,9266,0,0,0,0,0,0,0,0
+,9268,9271,0,0,0,0,0,0,0,0,0,9273,0,0,0,9276,9277,9279,0,0,0,0,0,0,0,9280,0,0,
+9293,0,0,0,0,0,9297,9301,0,0,0,0,0,0,0,0,0,0,0,9308,9309,9313,9321,9322,0,9326,
+9327,0,0,9477,0,9479,0,0,0,0,9482,0,0,0,9483,0,9484,0,0,0,0,0,0,0,0,0,9485,0,0,
+9486,0,0,0,9489,0,0,0,0,9490,9491,0,0,0,0,9493,0,9495,9496,0,0,0,0,0,0,0,0,9500,
+0,9502,0,0,0,0,0,9504,9507,0,9509,0,9511,0,0,9513,0,0,0,0,0,0,0,0,9515,0,0,0,0,0
+,0,9516,9517,0,0,0,0,9532,0,0,9533,0,0,9538,0,9539,9540,0,0,0,0,9541,0,0,0,9542,
+0,0,0,0,0,0,0,0,9544,9545,0,9546,0,0,0,0,0,0,9547,9548,0,0,0,9550,0,9557,0,9558,
+0,9561,0,9563,9570,0,9572,9574,9575,0,0,0,9577,9592,0,0,9596,0,0,0,9598,0,9600,0
+,9601,0,0,0,0,0,0,9608,0,9638,9639,0,0,0,0,0,0,0,9641,0,0,9643,9644,9645,9646,0,
+0,0,9648,0,0,0,0,0,0,0,9650,9654,0,0,0,0,0,0,0,0,9655,0,0,0,0,0,9656,0,9657,0,0,
+0,0,9658,0,0,9659,0,0,9664,0,0,9665,0,9667,9669,0,0,0,0,0,0,0,0,0,0,0,0,9671,0,
+9673,9681,0,0,0,0,9682,9683,9684,0,0,0,0,9686,9698,0,0,9700,9701,9702,0,9703,
+9717,0,0,0,0,9718,0,9726,0,0,0,0,9727,0,0,0,9728,0,9742,0,9744,0,0,0,9750,0,9754
+,9755,0,0,0,0,0,9756,0,9757,9768,0,9769,0,0,0,9770,9771,0,9773,0,9774,0,9775,0,0
+,0,9776,9777,9784,0,0,0,9786,0,9789,0,0,0,0,9793,9794,0,0,0,9808,0,0,0,0,0,9811,
+0,0,0,0,0,0,0,0,0,0,0,0,9812,0,9820,0,9823,0,9828,0,0,0,0,9830,0,0,9833,9836,0,0
+,0,9840,0,0,0,9841,0,0,9842,0,9845,0,0,0,9847,9848,0,0,9855,0,0,0,0,0,0,9856,
+9863,9865,0,0,0,0,0,0,0,0,9866,9867,9868,9873,9875,0,0,0,0,0,0,9880,0,9886,0,0,0
+,9887,0,0,9891,0,0,0,0,0,0,0,9906,9907,9908,0,0,0,9909,0,0,0,0,0,0,9910,0,0,0,0,
+9913,0,0,0,0,9914,0,0,0,0,0,9922,0,0,0,0,9923,9925,0,0,0,0,0,0,9930,0,0,0,9931,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9932,0,9939,0,0,9940,9962,9966,0,9969,9970,0,0,9974
+,0,9979,9981,9982,0,0,0,9985,0,0,0,0,0,0,9987,0,0,0,0,0,0,0,9988,9993,0,0,9994,0
+,0,0,9997,0,10004,0,0,0,0,0,10007,10019,10020,10022,0,0,0,10031,0,0,0,0,0,10032,
+0,0,10034,0,10036,0,0,0,0,10038,0,10039,10040,10041,10042,0,0,0,0,0,10043,0,0,0,
+0,0,10045,10054,0,0,0,0,10055,0,0,10057,10058,0,0,0,0,0,0,10059,0,0,0,0,0,0,0,
+10060,0,0,0,0,0,0,0,10063,0,10066,0,0,0,10070,0,10072,0,0,10076,10077,0,0,10084,
+0,10087,10090,10091,0,0,0,10094,10097,0,0,0,0,0,0,10098,0,0,0,0,0,0,10103,0,
+10104,0,10108,0,0,0,0,0,0,0,0,10120,0,0,0,10122,0,0,10125,0,0,0,0,10127,10128,0,
+0,10134,0,10135,10136,0,10137,0,0,10147,0,10149,10150,0,0,10156,0,10158,10159,
+10160,10168,0,0,10171,0,10173,0,0,0,10176,0,0,0,0,10177,0,0,0,0,10178,0,0,0,0,
+10194,0,10202,0,0,10203,10204,0,10205,10206,0,10207,0,0,0,0,10209,0,0,0,0,0,0,0,
+10213,0,0,0,0,0,0,10217,0,10229,0,10230,10231,0,0,10232,0,0,10237,10238,10244,0,
+0,0,0,0,10250,0,10252,0,0,0,0,0,0,10255,0,0,10257,0,0,0,0,0,0,10258,0,10259,0,0,
+0,0,0,0,0,0,10260,0,0,0,0,0,0,0,10284,10288,10289,0,0,0,10290,0,10296,0,0,0,0,0,
+10297,0,0,0,0,0,0,10298,0,0,0,0,10299,10303,0,0,0,0,0,10306,0,0,0,10307,0,10308,
+0,0,0,0,10311,0,0,0,0,0,0,0,10315,10317,0,0,0,10318,10319,0,10321,0,10326,0,
+10328,0,0,0,0,10329,0,0,10331,0,10332,0,0,0,0,0,0,10334,0,0,10335,10338,0,0,0,0,
+0,10339,10349,0,0,0,0,0,0,10351,0,10353,0,0,0,0,0,0,10362,0,10368,0,10369,0,0,0,
+10372,10373,0,0,0,0,0,10374,0,0,0,10375,0,10376,0,0,10386,10388,10390,0,0,0,0,0,
+0,0,10391,0,0,10392,10394,0,0,10396,0,10397,0,10403,0,0,0,0,0,0,0,0,10404,0,
+10405,10410,0,0,10411,0,10412,0,0,0,0,0,0,0,10421,10422,10423,0,0,0,0,0,0,0,0,0,
+10425,0,0,10427,0,0,10430,0,0,0,0,0,10432,0,10433,10434,0,0,0,0,10436,10437,0,
+10438,0,10439,0,10444,10446,0,0,0,0,0,10448,0,0,0,0,0,10449,0,0,0,0,0,0,0,10451,
+0,10453,0,0,0,10454,10457,0,0,10459,0,10469,0,0,0,0,0,10472,10481,0,0,0,0,0,
+10482,10483,0,10492,0,0,0,0,0,0,0,0,0,0,10499,0,0,0,10502,0,0,10510,0,10521,
+10524,0,0,10525,10526,10528,0,0,0,0,0,0,0,0,10530,0,0,0,0,10533,0,10534,0,0,0,0,
+0,0,0,0,0,0,10535,10536,0,0,10544,0,10553,10556,0,10557,10559,0,0,0,0,0,10562,
+10563,10564,0,10565,0,0,0,10566,0,10567,0,0,0,0,10575,0,0,10576,0,10578,0,0,0,0,
+0,0,0,0,0,0,10585,10586,10587,10589,0,10590,0,0,10594,0,0,0,0,0,10598,0,0,10601,
+0,0,0,10602,0,10603,0,10604,0,10605,0,0,10607,0,10626,0,10627,0,0,0,0,0,10629,
+10630,10631,0,0,0,10646,0,0,0,10647,0,10650,0,10651,0,0,0,10652,10653,10655,0,
+10658,0,0,10659,0,10667,0,0,0,0,10669,0,0,0,0,0,0,0,0,0,10670,0,0,0,10671,0,0,0,
+0,10672,10673,0,10674,0,0,0,10676,0,0,0,0,0,0,10678,0,10682,0,0,10692,0,10697,0,
+0,0,0,10698,0,0,0,10700,0,0,0,0,0,10703,0,10704,0,0,0,0,0,0,0,10705,0,10715,
+10718,10720,0,0,10722,0,0,0,0,0,0,0,0,10723,0,0,0,0,10726,0,0,0,0,0,10727,10730,
+10743,0,0,0,0,0,0,10744,0,0,10745,0,0,0,0,0,0,10748,0,0,0,0,10750,0,0,10752,
+10753,0,0,0,10756,0,0,0,0,0,0,10758,0,0,0,10759,0,10769,0,0,10772,0,0,0,0,0,0,
+10773,0,0,0,10777,0,0,10779,0,0,0,0,0,0,0,0,10780,10784,0,0,0,10789,0,0,0,10791,
+0,0,0,0,0,0,0,0,0,10795,0,0,10796,0,10808,0,10809,0,0,0,10810,0,0,0,10812,0,0,
+10814,0,0,0,0,0,0,0,0,0,10815,0,0,0,0,10816,10817,0,0,0,0,10819,0,10820,0,0,0,0,
+10821,10822,10823,0,10826,10849,0,0,0,0,10850,0,0,10852,0,10853,0,0,10856,0,0,
+10857,10858,10859,10860,0,0,0,0,0,0,10863,0,10866,10867,10872,10890,0,0,10891,
+10892,0,0,0,0,0,10893,0,0,0,10896,10899,0,0,10900,10902,0,0,0,0,0,10903,0,0,0,0,
+0,0,0,0,0,0,0,0,10905,0,10906,0,0,0,0,10908,10911,0,10912,0,0,10916,0,0,0,0,0,
+10917,0,10918,0,0,0,10923,0,0,0,0,0,10924,0,0,10928,10929,0,0,10930,0,0,0,10932,
+0,0,0,0,10939,0,0,10945,0,0,0,10947,0,0,10948,0,0,0,0,0,0,0,0,0,0,0,0,10958,0,
+10960,10962,0,0,10964,0,0,0,10966,0,0,0,0,0,0,0,0,0,0,10967,0,0,0,10968,0,0,0,
+10973,0,0,0,0,0,10975,0,0,0,10976,10978,0,0,10982,10984,10987,0,0,10988,0,10989,
+0,0,10991,0,0,0,0,10992,0,0,0,10993,0,10995,0,0,0,10996,10997,0,0,0,10998,0,
+10999,0,11001,0,0,0,0,0,0,11010,11012,0,11013,11016,11017,0,0,11019,11020,11021,
+0,0,0,0,0,0,0,0,0,0,0,0,11022,0,0,11023,11029,0,0,0,0,11031,0,0,0,11034,0,0,0,0,
+11055,0,0,0,0,0,11056,11060,0,0,0,0,0,0,11061,0,0,11064,11065,0,11066,0,11069,0,
+11085,0,0,0,0,0,11086,0,0,0,11088,0,0,0,11094,0,0,0,11095,11096,0,0,0,0,0,0,
+11097,11098,0,0,0,0,0,0,11099,0,0,11102,11108,0,0,0,11109,0,11114,11119,0,11131,
+0,0,0,11142,0,0,11143,0,11146,0,11147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11148,0,
+11149,11152,11153,11154,0,11156,0,11157,0,0,0,11158,0,0,11159,11160,0,0,0,0,0,0,
+0,0,0,0,0,0,11163,0,0,11164,11166,0,0,0,11172,11174,0,0,0,11176,0,0,0,0,0,11182,
+11183,0,0,0,11184,11187,0,0,11188,11189,0,0,0,0,0,0,11194,0,0,0,0,0,0,0,11200,
+11202,0,0,0,0,0,0,11203,0,11204,0,0,0,0,0,11205,0,0,0,11206,0,11207,0,0,11209,0,
+11211,0,11214,0,0,11231,0,0,0,11293,11295,0,0,11296,11297,11302,0,0,0,11307,0,0,
+0,0,11309,11310,0,11311,0,0,0,11313,0,11314,0,0,0,0,11334,0,11338,0,0,0,11339,0,
+0,0,0,0,11340,0,11341,11342,0,11344,0,11345,0,0,0,11348,11349,0,0,11350,0,0,0,
+11355,0,0,0,0,0,0,11356,0,11357,11370,0,0,11371,0,11374,11376,0,0,0,11377,0,0,
+11378,11383,0,11386,11399,0,11400,11406,0,0,0,11408,0,0,11409,11412,0,0,0,0,
+11417,0,0,0,11418,0,11421,0,11426,11429,0,0,0,0,0,11430,0,11437,0,11438,0,0,0,0,
+0,11440,11453,0,0,0,0,0,0,11454,0,0,0,0,11455,0,0,11456,11460,11461,11463,0,
+11469,0,11473,0,0,0,0,11474,0,0,0,11475,0,11476,11477,11480,0,0,0,0,11481,0,0,
+11484,0,0,11487,0,0,0,0,0,0,0,0,0,0,11497,0,0,11502,0,11509,0,0,11510,11511,
+11513,0,0,0,0,0,0,0,0,0,0,11515,0,0,0,0,11516,0,11520,11521,0,0,0,0,0,0,0,0,0,0,
+0,11529,11530,11531,11534,0,0,11543,0,0,0,0,0,11547,0,11548,0,0,0,0,0,11552,
+11556,0,11557,0,0,11559,0,11560,0,0,0,0,0,0,11561,0,0,11563,11564,0,11565,0,0,0,
+0,11567,0,0,0,11569,0,11574,0,11575,0,0,0,11577,0,11578,0,0,0,11580,11581,0,0,0,
+11582,11584,0,0,0,0,0,0,0,11587,0,11588,11591,0,11595,0,0,0,0,0,0,0,0,11596,0,
+11597,0,0,0,0,11598,11601,0,0,0,11602,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11603,
+11604,0,11606,0,0,11608,0,0,0,0,11610,0,0,11611,0,0,0,0,11613,0,11622,0,0,0,
+11623,0,0,0,0,11625,0,0,11626,11627,11628,11630,0,0,0,0,0,0,11639,0,0,11646,0,
+11648,11649,0,11650,0,0,0,0,0,0,0,0,0,11651,0,0,11652,11653,11656,0,0,11677,
+11679,0,0,0,0,11680,0,0,11681,0,11685,0,0,0,0,0,0,0,0,11688,0,0,0,11716,0,11719,
+0,0,0,0,0,11721,0,0,11724,11743,0,0,0,0,0,0,0,0,11745,11748,11750,0,0,0,0,0,
+11751,0,0,0,11752,11754,0,11755,0,0,0,0,0,0,0,11759,0,0,0,0,0,0,11760,0,0,0,
+11761,0,0,0,0,0,0,11766,11767,0,11772,11773,0,11774,0,0,11775,0,11777,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,11778,11780,0,0,0,0,0,0,0,11783,0,11784,0,0,0,11785,
+0,0,0,11786,0,0,0,0,11788,0,0,11789,11791,11792,0,0,0,0,11795,11834,11835,11836,
+0,0,11837,0,0,0,11838,0,0,11846,11851,0,11852,0,11869,0,0,0,11871,0,0,0,11872,
+11874,0,0,0,0,0,0,11875,0,11876,11877,0,0,0,0,0,0,0,0,0,0,11883,0,0,0,0,0,0,0,
+11884,0,11885,0,11886,0,0,11887,0,11894,11895,11897,11909,11910,0,11912,11918,0,
+0,11920,0,11922,11924,11927,11928,0,0,0,0,11929,0,11934,0,0,0,0,0,11941,11943,
+11944,0,11945,0,0,0,0,11948,11949,0,0,0,0,11953,0,11954,0,11955,0,11956,0,0,0,0,
+0,11957,0,0,11959,0,0,0,0,0,0,0,0,11961,0,0,0,0,0,11978,0,0,0,11979,11980,11986,
+11987,0,11992,0,0,0,0,0,11993,0,0,0,11994,0,11999,12004,12005,12006,0,0,0,0,0,
+12011,0,0,12012,12014,0,0,12015,0,0,12019,12028,0,0,12029,0,0,12032,12033,0,0,0,
+0,12034,0,12041,12043,0,0,12044,0,0,0,0,0,0,0,12046,0,0,0,0,0,0,0,12054,12055,0,
+12056,0,0,0,12060,12064,0,0,0,0,0,12065,12067,12068,0,0,0,0,0,0,0,0,12074,0,0,0,
+12075,12076,0,0,0,12079,0,12081,12086,12087,0,0,12088,0,0,0,0,12089,0,12092,0,0,
+0,0,12097,0,0,0,0,0,0,0,0,12098,0,0,0,0,0,0,0,0,0,0,0,0,0,12102,12103,12104,
+12111,0,0,12114,12116,0,0,0,12118,0,0,0,12119,12120,12128,0,0,0,0,12130,0,0,0,0,
+0,0,12131,0,0,0,12132,12134,0,0,0,0,12137,0,12139,0,12141,0,0,12142,0,0,0,12144,
+0,0,0,0,0,12145,0,12148,0,12153,0,0,0,0,12154,12171,12173,0,0,0,12175,0,0,0,0,
+12178,0,0,0,0,0,0,0,12183,0,0,0,0,0,0,0,0,12184,0,0,0,12186,0,0,0,0,0,12187,
+12188,0,0,12189,0,12196,0,12197,0,0,12198,0,12201,0,0,0,0,12203,0,12209,0,0,0,0,
+12210,12211,12212,12213,0,12217,12218,0,0,0,0,0,0,0,0,0,12222,0,0,0,0,0,0,0,
+12223,0,0,12229,0,0,0,0,12233,0,0,0,0,12234,0,0,12236,12242,0,0,0,12243,0,0,0,
+12244,12253,0,12254,12256,0,12257,0,0,12275,0,0,0,0,0,12277,0,0,0,0,0,12278,0,
+12289,0,0,12290,0,12292,12293,0,0,12294,0,12295,0,0,12296,0,12297,0,12298,0,0,0,
+0,12301,0,0,0,0,0,0,0,0,0,0,0,0,0,12309,0,12338,12340,0,0,0,0,12341,0,0,0,0,0,0,
+0,0,12342,12343,0,12344,0,0,0,0,0,0,0,0,0,12345,0,0,0,0,0,0,0,0,12346,0,0,0,0,
+12348,0,0,0,0,0,0,0,0,0,0,0,0,12350,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12351,0,12355,
+12356,12357,0,0,12367,12370,12371,0,0,0,0,0,12372,12376,0,0,0,0,0,0,0,0,12379,0,
+12382,0,12383,0,0,12384,0,0,0,0,12393,0,0,12394,0,0,0,0,12398,12403,0,0,12404,0,
+0,0,0,0,0,0,0,0,0,0,0,0,12410,0,0,0,12411,0,0,0,12412,0,0,0,0,12420,0,12421,0,0,
+0,0,0,12423,0,12425,12429,0,0,0,12431,12432,0,0,0,0,0,0,0,0,0,0,0,0,12434,0,0,0,
+0,0,12435,12436,0,0,0,0,0,0,0,0,12437,0,0,0,0,0,12438,0,0,0,0,0,0,0,0,12445,0,0,
+0,12450,12451,0,0,0,0,0,0,0,0,12452,12475,0,0,12493,12494,0,0,0,12495,0,0,0,0,
+12496,12502,12509,0,0,0,0,12510,0,12512,12513,0,0,0,0,12514,0,0,0,12515,0,12520,
+0,0,0,12524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12527,0,0,0,12528,0,0,0,12529,0,0,0,
+0,0,12530,0,12535,0,0,12536,0,12538,0,0,0,0,0,0,0,0,0,0,0,0,12540,0,12548,0,0,0,
+0,0,12550,0,0,0,12551,12552,0,0,0,12554,0,0,0,0,0,0,0,0,12555,0,0,12562,0,12565,
+0,12566,0,0,0,0,0,0,0,0,0,0,0,0,12569,0,0,0,12571,12574,0,0,0,0,0,0,0,12577,0,0,
+0,0,0,0,0,12578,12579,12603,0,12608,0,0,12611,0,12612,0,12615,0,12625,0,0,0,0,
+12627,12646,0,12648,0,0,12657,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12670,0,0,12671,0,
+12673,12677,0,0,0,0,0,0,0,0,0,0,0,12679,0,12681,0,12682,12693,0,12694,0,12697,0,
+12701,0,0,0,12703,12704,0,0,0,0,12707,12737,0,0,12739,0,0,12740,0,0,12742,12743,
+0,0,0,0,0,0,0,0,0,12745,0,12746,12747,0,12748,0,0,12759,12767,0,0,0,0,12773,0,
+12774,12778,0,0,0,0,0,0,0,12779,0,0,0,0,0,12780,12793,0,12824,0,12825,0,12836,0,
+0,0,0,12839,0,12842,0,0,0,0,0,0,0,0,0,0,0,0,12843,12845,0,12846,0,0,0,0,12847,0,
+0,12850,12852,12853,0,0,0,12854,0,0,0,12855,0,12856,0,12858,0,0,12859,0,12862,0,
+12863,0,0,12866,0,12869,12872,12873,0,0,0,0,0,0,0,0,0,12875,0,12877,0,0,12878,0,
+0,0,0,0,0,0,0,0,12884,12885,12888,0,12889,0,0,0,0,12893,0,0,0,12895,12896,12898,
+0,0,0,0,0,0,0,12902,0,12909,12910,0,12926,0,12928,0,0,0,12929,0,12930,0,0,0,0,
+12931,0,12932,12933,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12934,0,12942,0,0,0,0,12944,
+0,0,0,0,0,0,0,0,12946,0,0,12948,0,0,12949,0,0,0,0,12950,0,0,0,0,12951,0,12952,0,
+12953,0,0,0,12954,12958,12959,0,0,0,0,0,12960,12964,0,0,0,0,0,12966,0,0,0,0,0,0,
+0,0,12970,0,12971,0,0,0,0,0,0,12972,0,0,12982,0,0,0,12984,12985,0,12986,12996,
+12997,13001,13002,0,0,0,0,13004,0,0,13005,0,0,13007,13009,0,13017,0,0,0,13020,0,
+13021,0,0,0,0,0,0,0,0,0,0,13022,0,0,0,0,0,0,0,0,13024,13027,0,0,0,0,0,13028,0,0,
+13029,0,0,0,0,0,0,0,13032,0,13037,0,0,0,0,0,0,13040,0,0,13041,0,0,0,13043,13044,
+13046,0,0,0,0,13047,0,0,0,0,0,0,0,13049,13054,0,13056,0,0,13060,13061,0,0,0,0,0,
+13067,0,0,13068,0,13071,0,0,0,0,0,13077,13078,0,0,0,0,0,13079,13080,13081,0,
+13082,0,0,0,13085,0,0,0,0,0,0,0,13086,0,13087,13088,0,0,0,0,0,13094,0,13099,0,
+13100,0,0,0,13101,0,13125,13126,13128,13129,0,0,13130,0,13131,0,0,0,0,0,0,13134,
+0,0,0,0,0,0,0,0,0,0,0,13150,0,13168,0,0,0,0,0,0,0,0,0,13169,0,0,13170,0,0,0,0,
+13174,0,0,0,13176,0,0,0,0,0,13177,0,13178,13183,13187,0,0,0,13189,0,0,13190,0,0,
+13191,0,0,13206,0,0,0,13207,0,0,0,0,0,0,0,0,0,0,13212,0,0,13219,13232,0,0,0,
+13241,0,13249,13253,0,0,0,0,0,13255,13259,0,13260,13261,0,13262,0,13272,0,0,0,0,
+13276,0,0,0,0,13277,13299,0,0,13301,13302,0,0,13303,0,0,13305,0,13310,0,0,0,
+13311,0,0,0,0,13325,0,13328,0,0,0,13329,0,0,0,0,0,0,13330,0,0,13331,0,13335,0,0,
+13342,0,0,0,0,0,13343,0,13354,0,13362,0,13366,13367,13369,0,0,13371,13372,0,
+13373,13374,0,13376,0,13380,13381,13386,0,13387,13388,0,13389,13391,13395,0,0,0,
+0,0,13401,13409,0,13410,0,0,0,0,13420,0,0,0,0,0,13422,0,0,0,0,13423,0,0,0,0,
+13425,0,0,0,0,0,13427,0,0,0,13428,0,0,13430,13438,0,13439,0,13445,0,13448,13449,
+0,0,0,0,0,0,13451,0,13457,0,0,0,0,13458,13459,0,13460,0,0,0,0,13464,13465,13466,
+13470,0,13471,13472,13474,13475,0,13476,0,0,13478,13479,0,13481,0,0,0,0,13487,0,
+13490,0,13493,0,0,13494,0,0,13495,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13496,13497,0,
+13500,0,0,13516,13522,0,0,13525,13528,0,0,0,13530,13535,0,13537,13539,0,13540,0,
+13543,0,13544,0,0,0,0,0,0,13545,0,0,0,0,0,0,13547,0,0,0,13549,13555,0,0,0,13556,
+13557,0,0,0,0,0,0,0,13558,0,13563,0,0,0,0,13564,0,0,0,0,0,0,0,0,13566,0,0,0,0,0,
+0,13569,0,0,13571,0,0,0,0,13573,0,0,0,0,0,0,13578,0,0,0,0,0,0,0,0,0,0,13581,0,
+13586,0,13595,0,13600,0,0,0,0,0,0,0,0,13601,13603,0,13604,13605,13606,13607,0,0,
+13617,13618,0,0,0,0,0,0,0,13623,0,13625,13627,0,0,0,0,0,0,0,0,13629,0,0,0,13634,
+0,0,0,13638,0,0,0,0,0,0,0,0,13654,0,0,0,0,0,0,0,0,0,0,13656,0,13659,0,0,13660,0,
+0,13662,0,0,0,13663,0,13664,0,0,0,0,0,13668,0,13669,13671,0,0,13672,0,0,0,0,0,0,
+13675,13685,0,13686,0,0,0,13687,0,0,0,13692,13694,13697,0,0,0,13702,0,0,0,0,0,
+13705,0,0,0,0,13707,0,0,0,13714,0,0,0,0,0,0,0,0,0,13715,0,13716,13717,0,0,13719,
+13724,13730,13731,0,0,0,0,0,0,0,0,13732,0,0,0,0,0,0,0,13734,0,13736,0,0,13737,
+13738,13747,0,13751,0,0,13752,0,0,0,13753,0,13757,0,0,13762,13763,0,13764,13765,
+0,13766,0,0,13767,0,0,0,13768,0,0,0,0,0,0,0,13769,0,0,13772,0,13775,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,13776,13778,13787,0,0,0,13797,0,13798,0,13801,0,13804,
+13806,0,0,0,0,13816,13817,0,0,0,0,0,0,0,0,0,0,0,0,0,13834,0,13836,0,0,13838,0,0,
+13839,0,13840,0,0,0,0,13842,0,0,0,0,0,0,13843,0,0,0,0,0,0,0,0,0,13845,0,0,0,0,0,
+13858,0,0,13860,0,0,13861,0,0,13862,13863,0,13868,0,13869,13870,0,0,0,0,0,0,0,0,
+0,0,13872,0,0,0,0,13873,13878,0,0,0,0,0,0,0,0,0,0,13886,0,13888,13889,13890,0,0,
+13891,13894,0,13897,13899,13900,13904,0,0,13906,0,0,0,13909,0,0,0,13910,0,0,0,
+13911,0,0,0,0,0,13912,13917,0,0,0,0,13918,0,13919,0,0,13920,0,0,0,13921,0,0,
+13922,0,0,0,0,0,0,0,13924,0,13927,0,0,0,0,0,13932,0,13933,0,13934,0,0,13935,0,
+13944,0,0,0,13954,0,0,13955,0,0,0,0,13956,0,13957,0,13967,13969,0,0,0,0,0,0,0,0,
+0,0,0,0,13970,13990,0,13991,13994,0,13995,0,0,0,0,13996,0,0,13999,0,0,0,14018,0,
+14019,0,14021,0,0,0,0,0,0,14041,0,0,0,0,0,0,0,0,14043,0,0,0,0,14046,0,0,0,14048,
+14049,0,0,0,0,0,0,0,0,0,0,14051,0,0,14052,14056,0,14063,0,14064,14066,0,0,14067,
+0,0,0,0,0,0,0,0,0,14068,0,0,0,14072,0,14074,14075,0,14076,14079,14085,14086,
+14087,14093,0,0,0,0,14095,0,0,0,0,0,0,14096,14097,0,0,0,0,0,0,0,14098,0,14102,0,
+0,0,0,0,14103,0,0,0,14104,0,0,14105,0,0,0,14107,14108,0,0,14109,0,0,0,0,0,0,0,0,
+14117,0,0,0,0,14118,0,0,0,0,14119,0,0,14120,0,0,14121,0,14122,14127,0,14128,
+14136,0,0,14138,0,14140,0,0,0,14141,14142,0,0,0,0,14146,0,0,14149,0,14151,0,0,0,
+14152,0,0,14153,0,0,0,0,0,0,0,0,0,14154,0,14156,14157,0,0,14159,0,14161,0,0,0,0,
+14162,0,0,0,0,0,0,14163,0,0,14173,0,0,0,0,0,0,14174,0,0,14176,0,0,14178,0,0,
+14179,14181,0,0,14182,14185,14187,0,14190,0,0,14197,0,0,0,0,0,0,0,0,0,0,0,0,
+14198,0,0,0,0,0,0,14199,14200,0,0,0,14204,0,0,14208,0,0,0,0,0,0,0,0,0,0,0,14231,
+0,0,0,0,0,0,0,0,0,14234,0,0,14235,0,0,0,14240,14241,0,0,0,14246,0,0,0,14247,0,
+14250,0,0,14251,0,0,14254,0,0,14256,0,0,0,14260,0,14261,0,0,0,0,14262,14267,
+14269,0,0,14277,0,0,14278,0,14279,14282,0,0,0,14283,0,0,0,14284,14285,0,0,0,0,
+14286,0,0,0,14288,0,0,0,14289,0,14290,0,14293,14301,14302,14304,14305,0,14307,0,
+14308,14309,0,0,0,0,0,0,0,0,0,0,0,14311,14312,0,0,14317,0,0,0,0,0,0,0,14318,0,0,
+0,0,14320,0,0,0,0,14321,14322,0,0,0,0,0,14326,14329,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+14330,14331,0,0,0,0,14332,0,0,0,14333,0,0,14337,14340,0,14341,0,0,14342,0,14345,
+14346,0,0,14347,0,14362,0,0,0,0,0,14364,14365,14371,0,14373,0,0,14374,0,14379,0,
+14400,0,0,0,0,0,14401,0,0,14405,0,14406,0,14408,14409,0,0,0,14417,0,0,14424,0,0,
+0,0,0,0,0,0,0,14430,0,0,0,14431,0,0,14435,0,14440,0,0,0,0,0,0,14442,0,0,14443,0,
+0,0,0,0,14446,0,0,0,0,0,0,0,14454,0,14457,0,14460,0,0,14466,0,0,0,0,0,14467,0,0,
+0,0,0,0,14469,0,14477,0,0,0,0,0,0,14478,14482,0,0,0,14483,0,0,0,14485,14486,0,0,
+0,14487,14488,14489,14492,14493,14494,14495,14496,14497,0,14499,0,14501,0,0,0,0,
+0,0,0,0,0,0,14502,0,14507,14512,14513,14514,0,0,0,0,0,0,0,0,0,0,0,14515,14526,
+14530,0,14537,0,14544,0,14547,0,0,14548,14550,14551,0,0,14552,0,0,0,14553,0,
+14554,0,0,0,0,14556,14564,0,0,14565,14566,0,0,0,0,0,0,14568,0,0,14569,0,0,0,
+14571,14576,0,0,14577,14578,14579,0,0,14580,0,0,0,0,14582,0,0,0,0,0,0,0,0,0,0,0,
+0,14583,0,0,0,0,0,14587,0,14588,0,0,14600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,14601,0,0,14604,14605,14611,0,14613,0,0,0,0,14615,0,0,0,0,0,0,14627,0,14628,0,
+0,0,0,14631,0,14633,14634,0,0,0,0,14635,0,0,0,0,0,0,0,0,14636,0,0,14639,14642,0,
+0,0,0,14644,0,0,0,0,14645,14646,0,14653,0,0,14654,0,14658,0,14661,0,0,0,14665,0,
+0,0,14668,0,0,0,0,0,0,0,0,0,14669,0,0,14670,0,0,0,14680,0,0,14681,0,0,0,0,0,
+14682,14683,0,0,0,0,14686,0,0,0,0,14687,14697,0,0,0,0,14699,14705,14711,0,0,0,0,
+0,0,0,0,0,0,14712,0,0,0,14713,0,0,0,0,14719,0,14720,14721,14726,0,0,0,14728,
+14729,0,0,0,0,14731,0,0,0,0,0,0,0,14733,14736,14737,0,0,14740,14742,0,0,0,14744,
+14753,0,0,0,0,14755,14758,14760,0,0,0,0,0,14761,14762,14765,14771,0,14772,0,
+14773,14774,0,0,14775,0,0,14776,0,0,0,0,14777,0,14779,0,0,14782,0,0,14785,14786,
+14788,0,0,0,0,0,14795,0,0,0,0,0,0,14798,0,14803,14804,14806,0,0,0,14809,0,0,0,0,
+0,0,14810,0,0,0,0,14811,0,14812,0,0,0,0,0,14815,0,0,0,0,0,0,0,0,14816,0,14818,0,
+0,0,0,0,0,14819,0,14820,0,14823,0,0,0,14824,0,0,14826,14827,0,0,0,0,0,0,0,0,0,0,
+0,0,14830,0,0,0,0,0,14833,0,14845,0,0,0,0,0,14846,0,0,14847,14871,0,14873,0,
+14876,0,14877,14878,14880,0,0,0,0,0,14881,0,14882,14894,0,0,0,0,14895,0,14907,0,
+14908,0,0,0,0,0,0,0,14911,0,0,0,0,14920,0,0,14931,0,14932,14934,14935,0,0,14936,
+0,14945,0,0,0,0,0,0,0,14947,0,0,14948,14949,14951,0,0,14952,0,0,0,14964,14973,0,
+0,14990,0,0,0,0,14995,0,0,14998,15001,0,0,15002,15020,0,0,0,0,0,0,15021,0,15022,
+0,0,0,0,15023,0,0,15025,15029,15033,0,0,0,15034,0,0,0,15035,0,0,0,0,0,15043,
+15044,0,0,0,15045,15046,15048,15050,0,15065,0,0,0,0,15066,0,0,15075,15082,15084,
+0,0,15085,15086,0,0,0,0,0,0,0,0,15088,0,0,0,15089,0,0,0,0,15094,0,15096,0,15097,
+0,15100,0,0,15102,0,0,0,0,0,0,0,0,15105,0,0,15106,0,15109,15113,0,0,0,15115,0,
+15118,0,0,0,0,0,0,15119,0,0,15120,0,0,0,0,0,15123,15129,0,0,0,15130,0,15131,0,0,
+15134,0,15135,0,0,0,15137,15138,0,0,0,0,0,0,15139,0,0,0,0,0,15140,0,0,15154,
+15162,0,15169,15170,0,15175,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15177,0,15178,15179,0,
+0,0,0,0,15183,0,0,0,0,0,0,0,0,0,0,0,0,15185,15187,0,15194,15195,15196,0,0,0,0,0,
+0,0,15204,0,0,0,0,15206,0,0,0,0,0,15207,0,0,0,0,0,0,0,0,0,15213,0,15214,0,0,0,0,
+0,0,0,15232,0,0,0,0,15234,0,15238,15240,0,15248,0,0,0,0,15250,15251,0,0,0,0,0,0,
+0,15252,0,0,0,15255,15262,15266,0,0,0,15267,0,0,0,15277,15279,0,0,0,15280,15281,
+15282,0,0,0,0,0,15285,0,0,0,0,15289,0,0,15291,0,0,0,0,0,0,0,15296,15297,0,0,
+15304,0,0,0,0,15306,0,0,0,0,0,0,15307,15308,0,15309,0,0,15311,0,0,15312,15313,0,
+0,0,0,0,0,0,0,0,0,0,0,15314,15317,0,0,0,15318,15319,0,0,0,0,15320,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,15321,0,0,0,0,0,15324,0,15325,15326,0,15330,0,0,0,0,15334,0,
+15335,0,15341,0,0,15342,0,0,15343,15344,0,0,0,0,15345,0,0,0,0,15347,0,0,15348,
+15349,15350,0,15356,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15357,0,15358,0,0,0,0,0,0,0,
+15359,15360,15364,0,15380,0,0,0,0,0,15392,0,0,15393,0,15395,0,0,0,0,0,0,0,0,
+15396,0,0,15397,15398,0,0,0,0,0,0,0,0,0,15399,0,15400,0,0,0,15402,0,15405,15410,
+0,0,0,0,15411,0,0,0,15412,0,15416,0,0,0,0,0,0,0,15428,0,15435,0,0,15438,0,0,0,0,
+15439,0,0,0,15440,0,0,0,15441,15449,15451,0,0,0,0,0,0,0,15452,0,0,15455,0,0,0,
+15456,0,0,15458,0,15460,15461,0,0,0,0,0,15462,15464,0,15465,0,0,15466,0,0,15467,
+0,0,0,0,0,15468,0,0,0,0,15481,0,0,15484,0,15485,15486,0,0,0,15487,0,0,0,0,0,
+15488,0,15492,15498,0,0,0,15499,0,0,0,15500,0,15501,0,0,15512,0,15522,0,0,0,
+15524,0,15525,15526,0,0,15527,0,0,15545,15546,0,15548,15552,0,15553,0,0,0,15554,
+0,15555,0,15557,15565,15573,15577,15578,0,15582,0,15583,0,0,0,0,0,0,0,0,0,0,0,0,
+0,15586,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15588,0,0,0,0,0,15589,0,0,0,0,0,0,0,15593,
+15594,0,0,0,0,15595,0,0,0,0,0,0,15596,0,0,0,15597,0,0,0,0,15600,0,0,15601,0,0,0,
+0,15602,15603,0,0,0,0,0,0,15604,0,15609,0,0,15612,0,0,15613,0,0,15615,15617,
+15618,0,0,15620,0,15636,15637,0,0,15649,0,0,0,0,0,0,0,15650,0,0,15651,0,0,0,
+15656,0,15658,0,0,0,15664,0,0,15665,0,0,15668,0,0,0,0,0,15669,0,0,15674,0,0,
+15675,0,0,0,0,15676,0,0,0,0,0,0,0,0,0,0,0,15677,0,0,0,0,15678,0,0,0,0,0,15679,0,
+0,15681,0,15686,0,0,0,0,15687,0,15688,0,0,15690,0,0,0,15697,0,15699,15700,0,0,0,
+0,0,0,0,0,0,15701,0,15702,15703,0,15704,0,15705,0,15707,0,15709,0,15712,15716,0,
+15717,0,15718,15720,0,0,0,0,0,15724,0,0,0,15725,0,15726,0,0,0,15740,0,15745,
+15746,0,0,15747,0,15748,0,0,0,0,0,15749,0,0,0,15752,0,15753,0,0,0,0,0,0,15759,0,
+0,0,15765,0,0,0,0,0,0,0,0,0,15767,0,0,0,15771,0,0,15784,0,0,0,0,15785,15790,
+15791,0,0,15792,0,0,0,15807,0,15811,0,0,0,0,0,0,0,0,0,0,0,0,15818,0,0,0,15819,0,
+0,0,0,15821,0,0,0,0,0,15822,15824,0,0,15827,0,0,15829,15831,0,15832,0,0,15833,0,
+15835,15838,15839,15843,0,0,0,0,0,0,0,0,0,0,0,15844,0,0,0,0,15845,15851,15856,0,
+0,0,0,0,0,0,15858,15860,0,15861,0,0,0,15864,0,0,0,0,15865,0,0,0,0,0,0,15866,0,
+15872,0,0,15876,0,0,0,0,15877,15878,15883,15885,0,0,15888,0,0,0,0,0,15889,15890,
+0,0,0,0,0,0,0,0,15892,0,0,0,0,0,0,0,15893,0,0,15894,0,0,0,15895,0,15896,15897,0,
+15898,15901,15902,0,15911,15915,0,15916,0,15924,15935,0,15937,0,0,0,0,0,15950,0,
+0,0,0,0,0,0,15958,0,0,0,15961,0,0,15966,0,15967,0,0,15977,0,0,15978,0,0,15981,
+15982,15983,0,0,0,0,0,0,0,15986,0,0,0,15990,0,15991,15995,15998,0,15999,0,16000,
+0,0,0,0,16008,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16009,16011,0,16013,0,0,0,0,
+0,0,0,0,16014,0,0,16015,16023,16024,16025,0,0,16026,0,16030,0,16032,0,16033,0,0,
+0,0,0,0,16035,16036,16037,0,0,0,0,0,16039,0,0,0,0,16041,0,0,0,0,0,16043,16044,0,
+0,16047,0,0,0,16048,0,0,16049,16050,16052,0,0,0,0,0,16055,0,0,0,0,0,0,0,0,16056,
+0,0,0,0,0,0,0,16058,16060,16061,0,0,16063,0,0,16064,0,0,0,16067,16068,0,0,16069,
+16078,0,0,0,16079,0,0,0,16080,0,16081,0,0,0,16088,0,0,0,0,0,0,0,0,0,0,0,16089,
+16093,0,16097,0,16103,0,16104,16105,0,0,16256,0,0,16259,0,0,0,0,0,0,0,16260,
+16261,0,0,16262,0,0,16263,0,16268,0,0,0,0,0,0,0,16269,0,0,16270,16273,0,16274,0,
+0,0,0,16275,16276,16277,16280,0,0,0,16281,16284,0,0,0,16286,0,16289,0,0,0,0,0,0,
+0,0,0,16290,0,0,0,0,16291,0,0,0,0,0,0,0,16292,0,0,0,0,0,0,0,0,16293,16295,16297,
+0,16302,0,16304,0,16305,0,16306,0,0,0,0,0,0,0,0,0,0,0,0,16307,16308,16312,0,0,0,
+0,0,0,16313,16315,0,16318,0,0,0,16321,0,0,0,0,0,0,0,16326,16333,16336,0,0,0,0,
+16337,16340,0,0,0,0,0,16345,0,0,16346,0,0,0,0,0,0,0,0,0,16347,0,0,16348,0,0,0,0,
+16349,0,0,0,16350,0,16357,0,0,0,0,16359,16360,0,0,0,0,16362,16363,16364,16365,0,
+0,16366,0,0,0,0,16367,16368,0,16369,16374,0,0,0,0,0,0,0,16376,0,0,0,0,16378,
+16379,0,16380,0,0,0,16381,16383,0,0,0,0,0,16390,0,0,0,16399,0,16402,16404,16406,
+16407,0,0,0,16409,16411,0,0,0,0,16412,0,16413,16415,16423,0,0,0,0,0,16424,0,0,0,
+16428,16434,16435,16449,0,16450,16451,0,0,0,16453,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+16454,0,0,16456,16458,0,0,16459,0,0,16460,0,0,0,0,16462,0,16463,0,0,16466,0,0,0,
+0,0,16479,0,0,16480,0,16481,16484,0,0,0,0,0,0,0,0,0,0,16485,0,0,0,0,0,0,16489,0,
+0,0,0,0,16491,0,0,16498,0,0,16503,0,16505,0,0,0,0,0,0,0,0,16506,0,0,0,16508,
+16509,0,0,0,0,0,0,0,0,16511,16513,0,0,0,16516,0,16517,0,16519,0,16529,0,0,16531,
+0,0,0,0,0,0,16534,0,0,16541,16542,0,0,0,0,0,0,0,0,0,16543,16547,16548,0,0,0,
+16551,0,16552,0,0,0,16553,0,0,16558,0,0,16562,16565,0,0,0,16570,0,0,0,16573,
+16585,0,0,0,16586,16587,16595,0,16596,0,16598,0,0,0,16600,0,0,0,0,0,0,0,0,0,0,0,
+0,0,16601,0,0,0,0,16603,0,0,0,0,0,0,0,16604,16612,0,0,0,0,16613,0,16618,0,0,0,
+16640,0,0,16641,0,0,0,0,0,0,16645,0,0,0,0,16646,0,0,0,0,0,0,16651,0,0,0,0,16653,
+16654,0,0,0,16655,0,0,16656,16667,0,0,0,0,16671,0,16672,0,0,0,16673,0,0,0,0,0,
+16676,0,16686,0,0,0,0,16689,0,16690,0,16692,0,16693,0,16694,0,16696,0,0,0,16705,
+0,0,0,0,0,0,16707,0,0,0,16709,0,0,0,0,16711,0,16712,16713,0,0,0,16715,0,0,0,0,
+16716,0,0,0,0,0,0,0,0,0,16718,16724,0,0,16726,16727,0,0,0,0,0,0,0,16728,0,16729,
+0,0,16730,0,0,0,0,0,16731,0,0,0,16732,0,0,0,0,16734,16738,0,0,0,0,0,0,0,0,16743,
+0,0,16745,0,0,0,0,0,16749,0,16752,0,0,0,0,16756,0,0,16758,0,16759,0,0,0,0,0,
+16760,0,0,0,0,0,0,0,16762,0,16769,0,16770,0,16772,0,0,0,16777,16780,0,0,0,0,0,0,
+16781,0,0,16782,0,16784,0,0,16785,16787,16792,0,0,16794,0,0,0,16798,0,0,16809,0,
+0,16814,16816,16817,0,16819,0,0,0,0,0,0,0,0,0,0,16820,0,0,16836,16839,0,0,16841,
+16851,16857,0,0,16858,16859,0,0,16860,0,0,0,0,0,0,0,0,16862,0,16863,0,0,0,0,0,0,
+0,16864,0,0,0,0,0,0,0,16876,0,16881,16882,0,16885,16886,0,16887,0,0,0,16889,
+16891,0,0,0,0,0,16894,16895,0,0,0,0,0,0,0,0,0,0,0,16897,0,16898,0,0,0,0,0,16913,
+0,0,16924,16925,16926,0,0,16927,0,0,0,16937,16938,0,0,0,16940,16941,0,0,0,16942,
+16945,0,16946,16949,16950,0,0,0,16952,16955,0,0,0,16965,0,16969,0,0,16975,0,0,
+16976,0,0,0,0,16978,0,0,16981,0,16983,16989,0,0,0,0,16990,0,0,16991,0,0,0,16993,
+0,16994,16996,17000,0,0,0,0,0,17002,17004,0,17006,0,0,17007,0,0,0,0,17008,17013,
+17014,0,0,0,0,0,0,0,0,0,17021,0,17031,0,0,0,0,0,17033,17036,0,17038,0,0,17039,0,
+17045,0,0,17046,17047,0,0,0,0,17048,0,17049,17050,0,17051,17053,0,17054,0,17055,
+0,0,0,0,0,17063,0,0,17064,0,0,0,0,0,0,0,17065,0,0,17068,0,0,0,0,0,17072,0,0,0,0,
+0,0,17073,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17074,0,17080,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,17081,17083,17084,0,0,0,17085,0,0,0,0,17092,0,0,0,0,0,0,0,
+0,0,17093,0,17095,17102,0,0,0,0,0,0,17103,0,0,17105,0,17107,0,0,0,0,17114,0,0,0,
+0,0,17115,17125,17127,0,0,17128,0,0,0,17129,17130,0,17131,0,0,0,0,0,17132,17135,
+17145,0,0,0,0,0,0,0,0,17146,0,17147,0,17148,0,0,0,0,0,0,17149,17150,0,17151,
+17153,0,17155,0,0,0,0,17163,17171,0,17174,0,0,0,0,17179,0,0,17182,17185,0,0,0,0,
+0,17186,0,0,17188,0,0,0,0,0,0,0,17189,17191,0,17194,0,0,0,0,0,0,0,0,0,17195,
+17196,17203,17204,0,0,17205,17217,0,0,0,0,0,17218,0,0,0,0,17219,0,17220,0,17221,
+0,0,17230,0,0,0,0,0,17236,0,17238,17239,0,0,0,17241,17244,0,0,17245,0,17248,0,0,
+17251,0,17252,0,0,17264,0,17266,0,0,0,17268,0,0,0,0,17271,17272,0,17273,0,17295,
+0,17302,0,17305,0,0,0,17306,0,0,0,0,0,0,0,17308,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+17309,0,17310,17313,0,0,0,0,17314,17315,0,17317,0,0,0,0,17318,0,0,0,0,0,0,0,
+17320,0,0,0,0,0,0,17334,0,17344,17348,0,0,0,17350,17351,0,0,17353,0,0,17354,0,0,
+0,0,0,0,0,0,0,17355,0,0,0,0,0,0,17356,17357,0,0,17359,0,0,0,17371,0,17372,0,0,0,
+17393,0,0,0,0,17394,0,0,0,0,0,17395,0,0,17399,0,0,0,17401,17417,0,17418,0,17419,
+0,0,0,0,0,17422,17423,0,0,0,0,0,17424,0,0,0,0,0,17428,17429,17433,0,0,0,17437,0,
+0,17441,0,0,17442,0,0,17453,0,0,0,0,0,0,0,0,17454,17456,17462,0,0,17466,0,0,
+17468,0,0,17469,0,0,0,0,17470,0,17475,0,0,0,0,0,17479,0,0,0,17483,17484,0,17485,
+0,17486,0,17491,17492,0,0,17493,0,17494,17495,0,0,0,17496,0,0,0,17497,0,0,0,
+17502,0,0,0,0,0,17503,0,17505,0,17507,0,0,0,17512,17513,17514,0,0,17515,0,0,0,
+17519,0,0,0,17522,0,0,17523,0,0,0,0,0,0,0,0,0,17527,0,0,0,17528,0,0,0,17534,0,0,
+0,0,17536,0,0,0,17539,0,17540,17543,17549,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17556,
+0,0,17558,0,17559,0,0,17560,0,0,0,17563,0,0,0,0,0,0,17564,0,0,17565,17566,0,
+17567,0,0,0,0,0,0,17569,17570,0,17575,0,0,0,0,0,0,0,0,0,0,0,17581,0,0,0,17582,
+17583,0,17586,0,0,17587,0,0,0,0,0,0,0,17588,0,0,0,0,17596,17597,0,0,17598,17600,
+0,0,0,0,0,0,17601,0,0,0,17604,0,0,17605,0,0,17607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,17612,0,0,17618,0,17621,17622,0,0,0,0,17623,0,0,17624,0,0,17630,0,0,
+17631,17633,17634,0,0,0,0,0,0,0,17635,0,0,17636,0,0,17637,0,17638,0,17640,0,0,0,
+0,0,0,0,0,0,0,17641,0,0,0,0,0,0,0,0,0,0,17643,0,0,0,0,17645,0,0,0,0,0,0,0,0,
+17646,17662,0,0,0,0,0,0,0,0,0,17663,17664,0,17665,17666,0,0,0,17669,17671,17673,
+0,17679,0,0,0,0,0,0,0,17684,0,0,0,17686,0,17714,0,0,17720,17722,17726,0,0,17728,
+0,0,17729,0,0,0,17732,0,17733,0,17734,0,0,0,17735,0,0,0,0,17737,0,0,0,0,17739,0,
+0,0,17741,17742,0,0,0,0,17743,17744,17745,0,0,0,17749,0,17750,17751,17752,17754,
+17761,17762,0,17763,0,17766,0,17772,0,0,0,0,0,17775,0,0,0,0,0,0,0,17776,0,0,
+17777,0,0,17778,17779,0,17782,17783,0,0,0,0,0,0,0,0,0,0,17784,0,0,0,0,0,0,0,
+17821,0,0,0,17822,0,0,0,17823,17825,0,0,0,0,0,17826,17831,17832,17833,0,0,17845,
+0,0,0,17846,0,0,0,17848,17850,17854,0,17855,0,0,17859,0,0,0,0,0,0,17860,17861,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17870,17871,0,0,0,0,0,0,17872,0,0,0,17879,0,
+0,0,17881,17883,0,17884,0,17885,0,0,17886,0,0,17887,17891,17953,0,0,0,0,17954,0,
+0,17955,0,17968,0,0,17972,0,0,0,0,0,17974,0,0,0,0,17976,17978,0,0,17983,0,0,0,0,
+18003,0,0,0,0,0,18007,0,0,0,0,0,18009,0,0,0,0,0,0,0,18010,0,0,0,0,0,0,18012,0,0,
+18014,0,0,0,18015,0,0,0,18016,0,18017,0,0,0,18030,0,0,0,0,0,0,0,18031,0,0,18036,
+18037,18038,0,0,18049,18056,0,18057,18058,0,18059,0,0,0,0,0,0,0,0,18062,0,0,0,0,
+18064,0,0,0,0,0,0,0,0,18067,0,0,0,18068,0,0,18075,0,0,18078,18093,18094,0,0,0,0,
+0,0,0,0,18097,0,0,0,0,0,18098,18100,0,0,0,18108,0,18111,0,0,18112,0,18113,0,0,
+18115,18116,0,18118,0,0,0,0,18121,0,0,0,0,18123,0,0,0,0,0,0,0,0,0,18124,0,0,0,0,
+18125,18126,0,18127,0,0,18128,18135,0,0,0,0,0,0,0,0,0,18150,0,0,0,0,0,18151,
+18152,0,0,18156,18164,0,18166,18171,0,0,0,0,0,0,0,0,0,18172,18183,0,18184,0,0,0,
+0,18185,0,18187,0,0,0,0,0,18188,0,0,0,0,0,0,0,0,18189,0,0,18190,0,0,18191,18192,
+0,0,18194,18195,18196,0,0,0,18197,0,18203,0,18204,0,0,0,0,18205,0,0,0,18207,
+18208,0,0,18214,0,0,0,18215,18216,0,0,0,18220,0,0,18222,0,0,0,0,0,18223,0,18225,
+18231,0,18234,0,18235,0,0,0,0,18240,0,0,18241,18242,0,0,0,0,0,18243,18251,0,
+18253,0,18254,0,0,0,18266,0,0,0,0,0,0,18269,18270,18271,18273,18281,0,0,0,0,0,0,
+0,0,0,0,0,0,18282,0,18283,0,18284,0,0,0,0,0,0,18285,0,18287,18289,0,0,18290,0,0,
+0,0,18308,0,0,0,18310,0,0,0,0,0,0,0,0,0,0,0,0,18311,0,18312,18313,0,18315,0,0,
+18316,18320,0,18331,0,18332,0,18336,0,0,0,0,18337,0,18340,0,0,0,0,0,0,0,0,0,
+18341,0,18344,18345,0,18346,0,0,0,0,0,18348,0,18351,0,0,18356,0,0,0,0,0,0,18357,
+0,0,0,0,0,18367,0,0,0,18368,0,18369,0,18370,18371,0,0,0,18437,18444,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,18445,18450,0,0,0,0,18451,0,18452,0,0,0,18453,0,0,0,0,0,18455,0,
+0,0,18456,0,18457,0,18460,0,0,18461,0,0,0,0,0,0,0,0,18466,0,0,18467,0,0,0,0,
+18473,0,0,0,18476,0,18477,0,0,0,18478,18479,18480,0,0,0,18485,0,0,0,18486,0,0,0,
+0,0,0,18488,18490,0,0,0,0,0,0,18491,0,0,0,0,0,18495,0,0,18496,0,0,0,0,0,0,18505,
+0,18521,0,18522,18523,0,0,0,18525,18526,0,0,0,0,0,18527,0,0,0,0,18532,18533,0,
+18534,0,0,0,0,0,0,18535,18537,0,18538,0,0,0,0,0,0,18540,18541,18542,18543,0,
+18546,0,0,0,0,18553,18556,0,0,18558,0,0,18569,18571,0,0,0,18572,0,18574,0,0,0,0,
+18586,0,0,0,0,0,18588,0,0,18589,0,0,0,0,0,0,18590,0,18592,0,0,0,0,18594,0,0,0,
+18596,0,0,18597,18598,0,0,18601,0,0,0,0,18602,0,0,0,18603,18604,0,18605,0,0,0,0,
+18608,0,0,18611,0,0,0,0,0,0,0,0,0,18612,0,18616,0,0,18617,18619,0,0,0,18628,0,0,
+0,18629,0,0,18630,0,0,0,0,0,0,0,18631,0,18632,0,0,18635,18637,0,0,0,0,0,0,18641,
+18643,18648,0,18652,0,0,18653,0,18655,18656,0,0,0,18657,0,0,18666,18674,0,0,0,0,
+18677,18684,18685,0,0,18686,0,0,18690,0,0,0,0,0,0,0,18695,18696,0,0,0,0,0,0,0,0,
+0,0,18697,0,0,18700,0,0,0,0,0,0,18702,0,18708,0,0,18709,0,18710,0,0,18711,0,
+18714,0,0,18718,0,0,0,0,0,0,18719,0,0,18722,0,18726,0,0,0,0,0,0,0,0,0,0,0,0,0,
+18731,0,0,0,0,0,18739,18741,0,0,18742,0,18743,18744,18746,18748,0,18752,18753,0,
+0,18754,18763,0,18765,0,0,0,18766,0,0,0,18769,0,0,0,0,0,18773,18778,18779,18781,
+0,0,18784,18787,0,18788,0,18793,0,0,0,0,0,0,18795,0,0,18800,0,0,0,0,0,18801,
+18804,0,0,0,0,0,0,0,18806,0,0,0,18811,18815,18816,0,0,0,0,18825,0,0,18827,18829,
+0,0,18830,0,0,0,0,18831,0,0,18832,0,0,0,0,18833,0,18840,0,18841,0,18842,0,0,0,0,
+18843,0,18844,0,0,0,0,0,0,18845,18846,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+18848,0,0,0,18853,18860,0,0,18862,18866,0,0,18867,18869,0,0,18874,18881,18891,0,
+0,0,0,0,0,0,0,0,0,18892,0,0,0,0,0,0,0,0,18895,0,18896,0,0,0,18900,0,0,0,18901,0,
+18902,18915,18916,0,0,0,0,0,0,0,0,18919,0,0,0,0,0,18920,0,0,0,18921,18929,0,0,0,
+0,18930,0,0,0,0,0,0,18932,0,0,0,0,18934,18942,0,0,0,18951,18957,0,0,0,0,18958,0,
+0,0,0,18959,18960,0,0,18961,0,0,18962,0,0,0,0,18963,18964,0,0,0,18965,0,18967,0,
+0,0,0,0,0,0,0,0,18968,0,18969,0,18970,18973,18976,0,0,0,0,0,0,18977,0,0,0,18981,
+0,0,0,18990,0,18998,0,0,0,0,0,18999,19003,0,0,19005,0,0,0,19006,0,0,0,0,0,0,
+19008,19011,0,0,19018,0,0,19019,0,19024,0,19031,19032,0,19039,0,19041,19050,0,0,
+0,19051,19055,19056,0,19059,19063,19064,0,0,19088,0,0,0,19093,19094,0,0,0,0,
+19095,0,19096,0,0,0,19097,0,0,19098,0,19099,19100,0,0,19103,0,0,0,0,0,0,0,19111,
+0,0,0,0,0,0,19112,0,0,0,19116,19117,0,19121,19122,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,19123,19124,0,0,0,0,0,0,0,19125,19126,0,19128,0,0,0,0,0,0,0,0,0,0,
+19129,19130,19131,19132,0,0,19146,0,0,19147,19156,19158,0,0,0,0,0,0,0,0,19182,
+19185,0,0,19187,0,0,0,19193,0,0,0,0,0,19194,0,19197,0,0,0,0,19198,0,0,0,0,0,0,0,
+0,0,0,19202,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19203,0,19205,19210,
+0,0,0,19213,0,19218,0,0,0,19223,19229,0,0,19230,0,0,19231,19232,19233,19239,0,0,
+0,0,0,19240,0,19248,19249,0,0,0,0,19254,0,19256,19258,19259,0,0,19261,0,19266,0,
+0,0,19272,0,19278,19281,19282,0,0,0,0,0,0,0,0,0,0,0,0,19283,0,0,19284,0,0,19285,
+19287,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19288,19291,0,19292,0,0,0,0,19297,0,19298,0,0,
+0,0,19302,19303,0,0,0,0,19304,19305,0,0,0,0,19314,0,0,19315,0,0,19321,0,0,0,0,0,
+0,0,19322,0,19333,0,19334,19335,0,19336,19337,0,0,0,0,0,0,0,0,0,0,0,19346,0,0,
+19353,0,19354,19362,0,19366,19367,0,0,19369,0,19375,0,19377,19380,19388,0,0,0,0,
+0,19389,19390,0,0,0,0,19392,0,0,0,0,0,19402,0,0,0,0,0,0,0,0,19412,0,0,19413,
+19422,0,19424,0,0,0,19425,0,0,0,19428,0,0,0,0,19431,0,0,0,0,0,19432,0,0,0,0,0,
+19448,19459,0,0,19461,0,19462,19463,0,19467,19474,19482,0,0,0,0,19494,0,0,0,0,
+19501,0,0,0,0,0,0,0,0,0,0,19502,19504,0,0,0,0,0,0,0,19505,0,0,0,0,19506,19507,0,
+0,0,19508,0,0,19511,0,0,19514,0,19515,0,19516,0,19518,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,19530,0,19537,19538,0,19543,19546,0,19547,19551,0,0,0,0,0,0,19552,
+19553,0,0,0,0,0,0,0,0,0,0,0,0,19555,0,0,19556,0,0,0,0,0,0,0,0,0,0,0,0,19560,
+19561,0,0,19562,0,0,0,0,0,0,19565,19567,0,19568,0,0,0,19569,19570,0,19578,0,0,0,
+0,19580,0,0,0,0,19581,19584,0,0,0,0,0,0,0,19585,19586,0,0,0,19587,19588,0,19589,
+0,0,0,0,0,0,19592,19593,19599,0,19600,0,0,19604,0,0,19605,0,19606,19608,19610,0,
+19613,19614,0,0,0,0,0,0,19616,19617,0,0,19618,0,0,19619,0,0,0,19620,19621,19631,
+0,0,19632,19634,19636,0,19643,0,0,19644,19658,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,19659,0,0,0,0,0,0,0,0,0,0,0,19675,19677,0,0,0,0,19679,0,19683,0,19684,0,0,
+0,0,0,0,19687,0,0,0,0,0,0,0,0,19688,19689,19692,0,0,0,0,0,0,0,19695,19697,0,0,0,
+0,0,19698,19699,0,0,19700,0,19702,0,0,19703,0,0,0,0,0,0,19704,19708,0,19710,0,
+19713,0,0,0,19715,0,0,0,0,19718,0,0,0,0,0,0,0,19720,0,19722,0,0,19725,0,0,0,0,0,
+0,0,0,0,0,0,0,0,19730,0,0,0,0,0,19731,0,19734,19735,19739,0,0,19740,0,19741,0,0,
+0,19746,0,0,19747,0,19771,0,0,0,0,0,0,0,0,19772,19775,0,0,0,0,0,0,19778,0,0,0,0,
+0,19779,0,0,19780,19790,0,19791,0,0,19792,0,0,0,19793,0,0,19796,19797,0,0,0,
+19799,0,0,0,19801,0,0,0,0,19803,0,19804,0,19805,0,0,19807,0,0,0,19808,0,0,0,0,0,
+0,19809,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19816,0,19821,0,19822,19830,19831,0,0,
+0,19833,0,0,0,0,0,0,0,0,0,0,19838,0,0,0,0,19839,0,0,19843,0,0,0,0,19845,0,0,0,0,
+19847,0,0,19848,0,19849,0,0,0,0,0,0,0,19851,0,0,0,19854,0,0,0,0,0,0,0,0,0,19864,
+0,19865,0,19866,0,0,0,0,0,0,0,19868,0,0,19870,0,0,19871,0,0,19872,19873,19875,0,
+19880,19882,19884,0,0,19885,19886,19888,0,0,0,0,0,0,0,0,0,0,0,0,19890,19892,
+19893,0,0,19894,0,0,0,19895,0,19896,19902,0,0,19903,0,0,19905,0,0,0,19906,0,
+19908,0,19909,19911,0,0,0,19913,19920,0,19938,19939,19940,0,0,0,0,0,0,0,19942,0,
+19943,0,19945,0,0,0,19951,19952,19954,19960,0,19965,0,19971,0,0,0,0,0,19975,0,
+19976,0,19990,0,0,19991,0,19993,0,19995,0,0,0,19998,19999,20001,0,20003,20005,0,
+20011,20012,0,0,0,0,0,0,20014,0,20020,0,0,0,0,20021,0,0,0,0,0,20023,20024,0,0,0,
+0,0,20025,0,0,20027,0,0,20029,0,0,20032,0,0,0,0,20044,20045,0,20048,20049,0,0,
+20050,0,20052,0,0,20054,20057,0,0,0,0,0,0,0,0,0,20059,0,0,20061,0,20062,0,20064,
+0,0,20066,0,0,20067,0,0,0,0,20069,0,0,0,0,0,0,20070,20071,0,0,0,0,0,0,0,0,0,0,0,
+20072,0,0,20073,20074,0,0,0,0,0,20075,0,20078,0,0,0,0,20080,0,20081,0,0,0,0,0,0,
+20095,0,20098,0,0,0,0,0,0,0,20107,0,0,0,0,0,0,0,0,20112,0,0,0,20113,20114,0,0,0,
+20115,20123,20124,0,0,0,20131,20133,20134,0,0,0,0,20136,0,0,20137,20138,20150,0,
+20152,0,0,0,20153,0,0,20154,0,0,0,20158,0,20163,0,0,20164,0,0,0,0,0,0,0,20166,0,
+20168,0,20170,0,20175,0,0,20178,0,0,0,0,20223,0,0,0,0,20224,0,20226,0,0,20230,0,
+20231,0,0,0,0,20232,0,0,20233,20234,0,20244,0,20247,0,0,0,0,0,0,20249,0,0,0,
+20250,0,0,0,0,20251,0,20253,0,20254,0,0,0,0,20256,0,0,20264,0,0,0,0,20266,0,0,0,
+20278,0,0,20279,20282,0,0,0,0,0,20283,0,20284,0,20285,0,20287,20290,0,0,0,0,
+20292,0,0,0,0,20293,20297,0,0,0,0,0,0,20299,0,20300,20303,0,0,0,0,0,0,20307,0,0,
+20308,0,20309,0,20310,0,0,0,0,0,0,20312,0,0,0,20314,0,0,0,0,20315,20316,0,20322,
+0,0,0,0,0,0,20339,0,0,0,20342,0,0,0,0,20352,0,0,0,0,0,0,0,0,0,0,20362,0,0,20365,
+0,20375,20377,0,0,0,0,0,0,0,0,0,0,0,20378,20379,0,20380,0,0,20381,0,20382,0,
+20383,0,20388,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20390,20392,20393,0,0,20395,0,0,0,0,0,
+20396,0,0,0,0,0,0,0,0,20398,20415,0,0,0,20417,0,0,20420,0,0,20426,20428,0,20431,
+0,0,20432,0,20433,20434,20435,0,0,0,0,20440,0,0,0,0,0,20442,0,20443,0,20446,0,0,
+0,0,20448,0,20451,0,0,0,0,0,0,0,0,0,20452,20453,0,0,20454,0,0,0,0,0,0,20457,0,
+20458,0,0,0,20465,0,0,0,0,0,20469,0,0,0,20473,0,20476,0,0,0,0,0,0,0,0,20477,0,0,
+20485,0,0,20486,0,0,20487,0,20496,0,20497,0,0,20498,0,0,0,0,0,0,0,0,0,0,20499,
+20500,0,20501,0,0,0,0,0,20520,20527,0,20529,0,0,0,0,20539,0,0,20540,0,0,0,20543,
+0,0,0,20546,0,0,0,0,0,20548,0,0,20563,0,0,20564,0,20566,0,0,0,0,0,20589,0,0,0,0,
+20590,0,0,20593,20594,0,0,0,0,20595,0,20597,20598,0,0,0,20618,20620,0,0,0,0,
+20621,0,0,0,0,20627,0,0,0,0,0,20628,0,0,0,20629,0,20630,0,0,20639,0,0,0,0,0,
+20707,0,0,20709,0,0,0,20713,20714,0,0,0,0,0,20724,20725,0,0,0,0,20726,20728,
+20729,0,20733,0,20734,0,20735,20736,0,20737,0,0,20744,0,20745,0,20748,0,0,20749,
+0,0,0,0,0,0,0,0,20750,0,0,0,0,20754,0,0,0,20761,0,0,20763,0,0,0,0,0,0,0,20766,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,20767,0,0,0,0,20768,0,20769,20777,0,0,0,0,0,0,20785,0,
+0,0,20786,20795,20801,0,20802,0,20807,0,0,20808,0,0,20810,0,0,20811,0,20812,0,0,
+0,0,0,20813,0,0,20818,20820,20821,0,0,0,20822,0,20823,0,0,0,20826,0,0,0,0,0,0,0,
+20829,20830,20831,0,20832,20836,0,0,20839,0,0,20840,20842,0,20843,0,20844,0,
+20854,0,0,0,20855,0,0,0,0,20856,0,0,0,20869,0,0,20871,0,0,0,0,0,0,0,20873,0,0,0,
+0,0,20876,0,0,0,0,0,20880,0,0,20882,0,0,0,0,20883,20884,0,0,20890,0,0,0,0,0,0,0,
+0,0,20891,0,0,0,0,0,20905,0,20906,20910,0,0,20912,20915,0,0,0,0,0,20916,0,20917,
+0,20919,20920,20922,0,20927,0,20928,20929,20930,0,0,20935,0,0,20939,0,0,20941,0,
+0,0,20943,0,0,0,20946,20947,0,0,0,0,0,20950,0,20954,0,0,20955,20964,0,0,20967,0,
+0,0,0,0,20973,20975,0,0,0,20984,0,20987,20988,0,0,0,0,0,20989,0,0,0,20995,0,
+20998,0,20999,0,0,0,0,21000,21001,0,0,0,0,21008,0,21010,0,21016,0,0,0,21017,
+21018,0,0,0,0,0,21021,21026,21027,21028,0,0,21029,0,0,0,0,0,21030,0,0,0,0,0,0,0,
+0,0,0,0,0,0,21031,21032,0,0,0,0,0,21037,0,0,21038,0,0,0,0,0,0,0,0,0,21039,0,
+21041,0,21046,21047,0,0,0,21049,21053,0,0,21057,21064,21065,0,0,21066,21067,0,0,
+0,21069,0,0,0,21071,21072,0,0,21073,0,21074,0,0,21078,0,0,0,0,21079,0,0,21080,
+21081,0,0,21086,21087,0,21089,0,0,0,0,0,0,0,21091,0,21093,0,21094,0,0,0,0,0,0,0,
+0,21095,0,0,0,0,0,21096,0,21098,0,0,0,0,0,0,0,21099,0,0,21100,21101,21102,0,0,0,
+0,0,21103,0,21104,0,0,0,0,0,21105,21108,21109,0,0,21112,21113,0,0,0,0,0,0,21115,
+21122,21123,0,0,0,0,0,21125,0,0,0,0,0,0,0,0,21129,21131,0,0,21134,0,0,0,21137,
+21142,0,21143,0,0,21144,0,21145,21146,0,21152,21154,21155,21156,0,0,0,21160,0,0,
+0,0,0,0,21161,0,21164,0,21166,0,0,0,0,21170,0,0,0,0,21171,0,0,21172,0,21174,0,
+21175,0,0,0,0,0,21176,21179,21188,0,0,0,21189,0,0,21190,0,0,0,21192,0,0,21193,0,
+0,0,21198,0,21212,0,0,21213,0,0,0,0,0,0,21215,21216,0,0,21223,21225,0,21226,0,0,
+0,0,21227,21228,0,0,21229,0,0,0,0,21230,21236,0,0,0,0,0,0,0,0,0,0,0,0,0,21237,0,
+0,21238,21239,0,0,0,0,21256,0,0,0,0,0,21257,0,0,0,0,0,0,0,21259,0,0,0,21263,0,
+21272,0,21274,0,21282,0,0,0,0,0,0,0,0,21283,0,0,0,0,0,0,0,0,21294,0,0,21297,0,0,
+0,0,21298,0,0,0,21299,0,21300,21302,0,21316,0,21318,21322,21323,0,21324,0,21326,
+0,0,0,21327,21328,0,0,0,21352,0,0,21354,21361,0,0,0,0,0,0,0,0,0,0,0,0,0,21362,0,
+0,0,21363,0,0,0,0,0,0,0,0,0,21366,0,0,21367,21372,21374,0,0,0,21375,21377,0,
+21378,0,0,0,21380,0,0,0,0,0,0,0,0,0,0,21381,0,0,0,0,0,0,21382,0,21383,0,0,21384,
+0,0,21385,0,0,0,0,21389,21390,0,0,0,0,0,0,0,0,0,0,0,0,0,21397,21398,0,0,0,0,0,0,
+0,0,0,0,21399,0,21400,0,0,0,0,21402,0,0,0,21403,21404,0,21405,21406,0,0,0,21407,
+0,0,0,0,0,0,0,0,0,0,0,0,21408,0,0,0,0,21409,0,21421,0,21422,0,0,0,21425,21428,0,
+0,0,0,21429,0,0,0,0,0,21433,0,0,0,0,0,0,0,0,0,0,21434,0,21443,0,21444,21449,0,
+21452,0,21453,21454,0,0,0,21457,0,0,21458,0,0,0,21460,21461,0,0,21464,0,0,0,
+21473,21478,0,0,21479,0,0,21481,21483,0,0,0,0,0,0,0,0,21484,0,0,21485,21486,0,0,
+21488,0,0,0,0,0,0,21523,0,0,21525,0,0,0,0,0,0,0,21526,0,0,0,0,0,0,21529,21530,0,
+0,21531,0,0,21533,0,0,21539,21564,0,21567,0,0,0,0,0,0,0,0,21575,0,0,0,0,21577,0,
+0,0,0,0,21591,0,0,21604,0,0,0,0,0,0,0,0,0,21605,0,21606,0,0,21617,21618,21619,
+21620,0,0,0,0,0,0,0,0,0,0,0,0,0,21623,0,0,0,0,21631,0,21635,0,0,0,0,21639,21646,
+21653,21662,0,0,21663,21664,0,21666,0,0,21667,0,21670,21672,21673,0,21674,21683,
+0,0,0,0,0,21684,0,21694,0,0,0,0,21695,21700,0,21703,0,21704,0,0,21709,0,0,0,
+21710,0,0,0,0,0,0,0,0,21711,0,0,0,21712,0,21717,0,21730,0,0,0,21731,21733,0,0,0,
+0,21737,21741,21742,0,21747,0,0,0,21749,0,0,0,0,0,0,0,0,0,0,0,0,0,21750,0,0,0,0,
+0,21752,0,0,0,0,21753,0,0,0,0,0,0,21755,21756,0,21757,0,0,0,0,0,0,21760,0,0,
+21763,0,0,0,0,0,0,0,0,0,21764,0,0,21766,0,0,21767,0,0,0,0,0,0,0,0,0,21773,0,
+21774,0,0,21775,0,0,0,0,21776,0,0,21777,0,0,0,0,0,0,0,0,0,21780,21787,21788,
+21791,0,0,0,21797,0,0,0,0,0,21805,0,0,0,0,21806,0,21807,21809,0,21810,21811,0,
+21817,21819,21820,0,21823,0,21824,0,0,21825,0,0,21826,21832,0,0,0,0,0,21833,
+21848,21849,0,0,21867,21870,21871,21873,0,0,0,21874,0,0,0,0,0,0,0,0,0,21875,0,
+21878,0,0,0,21879,0,21881,21886,0,0,0,0,21887,0,0,21888,21894,21895,21897,0,
+21901,0,21904,0,0,21906,0,0,0,21909,21910,21911,0,0,21912,0,0,21913,21914,21915,
+0,21919,0,0,0,0,0,0,0,21921,0,0,21922,21933,21939,0,0,0,0,0,0,0,0,0,0,0,21944,0,
+0,0,0,0,21945,0,21947,0,0,0,0,0,0,0,0,0,0,21949,0,0,0,21950,0,0,0,0,0,0,0,0,0,0,
+0,0,0,21951,0,21952,0,0,0,0,0,0,0,0,0,21954,21957,0,0,0,0,21958,0,21959,0,0,0,0,
+0,0,21962,21963,0,0,0,0,0,0,0,0,21964,21965,0,0,21969,21970,0,0,0,21974,0,0,
+21980,21981,0,21982,0,0,0,0,0,21985,0,21988,0,21992,0,21999,0,0,0,0,0,0,22001,0,
+22002,0,0,0,0,0,0,22003,0,0,0,0,0,22004,0,0,0,22008,0,22009,22015,0,0,22016,0,0,
+0,22017,22019,0,0,0,0,0,0,0,0,0,22020,0,0,0,0,0,0,0,0,0,0,22021,22037,0,22039,0,
+0,0,22040,0,0,0,22048,22049,0,0,22053,22055,22056,22059,0,0,22060,22061,0,0,
+22064,0,0,0,0,22066,0,0,0,0,0,0,0,22073,0,0,0,22074,22075,0,0,0,0,0,0,0,22076,0,
+0,0,0,22077,22084,22099,0,0,0,0,0,0,0,22104,0,0,22107,0,22108,0,22109,0,22110,0,
+0,0,0,0,0,0,22111,22119,0,22120,22122,0,0,0,0,22125,0,0,0,22128,22129,0,0,0,0,0,
+0,22141,0,0,0,22142,0,0,22144,22146,0,22148,22149,22151,22154,0,0,0,22162,0,0,0,
+0,22164,22177,0,0,0,0,22179,0,22182,22183,0,0,22184,22188,0,0,0,0,0,0,0,0,22190,
+0,22194,22201,0,0,22208,0,22209,0,22212,0,0,22215,0,22223,22231,0,0,22232,0,
+22234,0,0,22235,22236,0,22237,0,22240,0,0,0,0,0,22241,0,0,0,22242,22246,22247,0,
+0,0,22259,22268,0,22269,0,0,0,0,0,0,0,22270,0,0,0,0,22271,0,22272,0,22277,0,0,0,
+0,0,22278,22280,22283,22286,0,0,22287,22289,0,0,22290,0,22293,0,0,0,0,0,0,0,0,0,
+0,22295,0,22301,22302,0,0,0,22305,0,22308,0,0,0,0,0,0,0,0,0,0,22315,0,0,0,22317,
+0,22334,0,0,0,22335,0,0,0,0,0,22336,0,22338,22344,0,22347,22349,0,22350,0,0,0,0,
+0,0,0,22357,0,0,0,0,0,22358,0,0,0,0,0,0,0,0,0,0,22359,22360,0,0,0,0,0,0,0,0,
+22361,22366,0,0,22369,0,22370,22373,0,0,0,0,0,22375,0,22377,0,0,0,0,0,22378,0,0,
+0,0,22381,0,0,0,0,22382,0,22383,0,0,0,0,0,0,0,0,0,22391,0,0,22392,22395,22396,
+22402,0,0,0,0,0,0,0,0,0,0,0,0,0,22405,0,0,22406,0,0,22408,0,0,22409,22410,0,0,0,
+0,0,0,22424,0,0,0,0,22426,0,0,0,22427,0,22428,0,22432,0,22435,22442,22443,0,0,0,
+0,22444,0,0,0,0,0,22446,0,22454,0,22455,0,0,0,22465,0,22470,0,22471,0,0,0,0,
+22472,22473,0,22487,0,0,0,22488,0,0,0,0,22489,0,0,22499,0,0,0,0,0,0,22514,0,0,
+22515,0,0,0,0,0,0,0,22516,0,0,0,22517,22520,0,0,0,22534,0,0,22535,0,0,22536,0,
+22540,22553,0,22555,0,0,0,0,22561,0,0,22562,0,0,0,0,0,0,0,0,0,0,0,22566,0,0,0,0,
+22567,22568,0,0,22575,0,22579,0,22582,22583,22585,0,0,0,0,0,22586,0,0,22587,0,0,
+22590,0,0,0,0,0,22591,0,22592,0,0,0,0,0,22593,0,22602,0,0,22604,0,0,22609,0,0,
+22618,0,0,0,0,0,0,22619,0,22624,22625,0,0,22638,0,0,0,0,0,22639,0,0,22640,0,0,0,
+0,0,0,0,22644,0,22645,22647,0,0,0,0,22652,22653,0,0,0,22654,0,22655,0,0,0,22656,
+0,0,0,0,0,0,0,0,0,0,22673,22675,22676,0,0,22678,22679,0,22691,0,0,0,0,0,0,0,
+22693,0,0,22696,0,22699,22707,22708,0,0,0,0,0,0,0,0,22718,0,22719,0,0,0,0,22723,
+0,0,0,22724,22725,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22726,22728,0,0,0,0,0,0,0,0,22729,
+0,0,22731,0,0,0,0,22732,22735,22736,0,0,0,0,22739,0,22749,0,0,22751,0,0,0,0,0,0,
+0,0,0,0,0,22758,0,0,0,0,0,22760,0,0,0,0,0,22764,22765,22766,0,22768,0,0,0,0,0,
+22769,22770,0,0,0,0,0,0,22771,0,0,22772,22775,0,22776,22777,22780,0,0,22782,
+22784,0,22787,0,22789,22796,0,0,0,0,0,22798,0,0,0,0,0,0,22802,0,22803,22804,0,0,
+0,0,0,0,0,0,0,0,22805,0,0,22810,22811,22814,22816,0,22825,22826,0,22831,22833,0,
+0,0,0,0,0,0,0,0,22834,0,22836,22838,0,22839,0,0,0,0,0,22840,0,22847,0,0,0,0,0,
+22856,22857,0,22858,22859,0,0,22862,0,0,22864,0,0,0,0,22865,0,0,0,0,0,0,0,0,0,0,
+0,22866,0,22867,22868,0,0,0,0,22869,0,22871,0,22872,0,22873,22881,22882,22884,
+22885,0,0,0,0,0,0,0,22886,22887,0,22894,0,22895,0,0,0,22900,0,22901,0,0,0,0,
+22904,0,0,0,0,22905,22907,0,0,0,22915,22917,0,0,22918,0,0,0,22920,0,0,0,22929,
+22930,0,0,0,22941,22942,0,0,0,22943,0,0,0,22944,0,0,0,0,0,0,0,22946,0,22947,0,0,
+22954,0,22956,0,0,22962,0,0,0,0,0,0,0,22963,0,0,22964,0,0,0,0,0,0,0,22965,0,
+22968,0,0,0,22969,0,0,0,0,0,22970,0,22971,0,0,0,0,0,22978,0,0,22979,0,22987,0,0,
+22989,0,0,0,0,0,0,22990,0,23005,0,0,0,0,0,0,0,23006,23007,23008,0,0,23023,23024,
+23029,0,0,0,0,23030,0,0,0,0,0,23032,0,0,0,0,0,23035,0,0,0,0,23038,0,0,0,23048,0,
+23049,23052,23053,23060,23061,0,23063,0,0,0,0,23067,23068,0,0,0,23069,23073,0,0,
+0,23127,0,23128,0,0,0,0,0,23129,0,23138,23141,0,23149,0,0,23150,0,0,0,23152,0,0,
+0,0,0,0,0,0,23154,0,0,0,0,23157,23159,23160,0,0,0,0,0,0,0,0,0,0,0,0,23180,0,0,0,
+0,23181,0,0,23188,0,23189,0,0,0,0,0,0,0,0,0,0,0,0,23195,0,0,23196,23199,0,0,0,0,
+0,0,0,0,0,23202,0,23204,0,23207,0,23209,23210,0,0,0,0,0,0,23227,23229,0,0,23230,
+23234,23238,0,0,0,23245,23246,23248,0,0,0,0,23249,23254,0,0,0,23265,0,0,0,0,0,0,
+0,23268,0,23276,0,0,0,0,23277,0,23297,0,23298,0,0,0,0,23299,0,23302,0,0,23303,
+23312,0,0,23314,0,23320,0,0,0,0,23324,0,23325,0,23328,0,23334,0,0,0,23337,0,0,0,
+0,23343,23344,23346,0,23348,0,0,0,0,0,0,0,0,23353,0,0,0,0,23355,0,23356,23358,0,
+0,0,23359,23360,0,23361,0,23367,0,23369,0,0,23373,0,23378,23379,0,23382,23383,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,23387,0,0,0,0,0,0,23388,23390,0,0,23393,23398,0,0,0,
+23399,0,0,0,23400,0,0,0,0,23401,0,0,0,23415,0,0,0,0,0,0,0,0,23416,0,23422,0,
+23443,23444,0,0,0,0,23448,0,23454,0,0,0,0,0,0,23456,0,0,23458,23464,0,0,0,0,0,0,
+23465,0,0,0,23470,23471,0,0,23472,0,0,0,23473,23496,0,0,0,0,0,0,0,0,23497,0,
+23499,0,0,23502,0,0,23503,0,0,23513,0,0,23515,0,0,0,23517,0,0,0,0,23518,23519,
+23521,23524,0,23525,23528,23539,0,0,0,0,0,23541,0,0,23544,0,0,23556,0,0,23557,0,
+0,0,0,0,0,0,0,0,0,0,0,0,23559,0,23560,0,0,23561,0,0,23566,0,0,0,0,0,23568,23569,
+23570,0,0,0,0,23571,0,23574,0,0,0,0,0,0,0,0,0,0,0,23575,0,23579,0,0,23581,0,0,0,
+0,0,0,23587,0,0,0,0,0,0,0,23596,23598,0,0,0,0,23602,23606,0,0,23607,0,23608,0,0,
+0,23614,23616,0,0,0,0,0,23618,0,0,23619,0,0,0,0,23621,23626,0,23627,0,0,0,0,0,0,
+0,23629,0,23630,0,0,0,0,23634,0,23636,0,0,0,0,0,0,23638,0,0,0,0,23640,23667,0,
+23669,0,0,0,23681,0,0,0,0,0,0,0,23682,0,23683,0,0,0,0,0,23684,0,0,0,23685,23689,
+0,23693,23694,23700,0,23702,0,23709,0,0,0,0,0,0,0,23712,0,0,0,0,0,23714,0,0,
+23715,0,0,0,0,23718,0,0,23720,0,0,0,0,23722,0,0,0,23726,23729,0,23741,23746,0,
+23748,0,0,0,0,23749,0,0,0,0,0,23750,0,0,0,0,23751,0,23753,0,0,0,0,23757,23765,0,
+0,0,23770,0,0,0,0,0,0,0,23771,0,23772,23781,0,0,23796,0,0,0,0,23798,0,23799,0,0,
+0,23802,0,0,23806,0,23807,0,0,23808,0,23809,0,23819,0,0,0,23821,0,23827,0,0,0,
+23829,0,0,0,0,0,0,0,23830,0,0,0,0,0,0,23832,23833,23834,23835,0,0,0,0,23837,
+23838,0,0,0,0,0,23846,0,0,0,0,0,0,23847,0,0,0,0,0,23879,23881,0,0,23882,23883,
+23895,0,23899,0,0,0,0,23901,0,0,0,0,0,0,23902,0,0,0,0,0,23903,23905,0,23906,0,
+23907,23918,23919,23920,0,23922,0,23924,0,23927,0,23934,0,23937,23941,0,23942,
+23946,0,0,0,0,0,23955,23956,23958,0,0,0,0,0,0,23959,0,23962,23965,0,23966,0,0,0,
+0,23967,23968,0,0,23973,0,0,23974,0,0,0,0,23975,0,23976,0,0,0,0,0,0,0,0,0,0,0,0,
+0,23977,0,0,0,0,0,0,0,0,23980,0,0,23984,0,23985,0,0,23987,0,0,23988,23990,23991,
+0,0,0,0,0,0,23992,0,0,0,0,0,0,0,0,23994,0,0,0,23998,0,0,0,0,0,0,0,0,0,23999,0,0,
+24003,0,24004,0,24006,0,0,0,24007,0,0,24008,0,0,0,0,0,0,0,24009,0,0,24010,0,0,
+24011,0,0,24013,24014,0,0,24015,24016,24027,0,24028,24029,0,24030,0,0,0,0,0,
+24033,24034,0,24035,0,0,24036,0,0,24044,0,24048,24049,24063,24067,0,24068,24070,
+0,0,24071,24078,24087,0,24090,0,0,0,24095,0,24098,24101,24104,24106,0,24107,0,0,
+0,24108,0,0,0,0,24110,24111,0,24113,0,0,24115,24120,0,0,0,0,0,0,24124,0,24125,0,
+24126,0,24127,0,0,0,0,0,24135,0,0,24136,0,24137,24142,0,0,0,24146,0,0,24147,
+24149,24154,0,24163,0,0,0,24165,24166,24167,0,0,0,0,0,0,0,0,0,0,24169,24170,
+24175,0,0,0,24178,0,0,24179,0,0,24181,0,24184,24197,0,24201,24204,0,0,0,0,0,0,
+24206,24212,24220,0,0,0,24224,0,0,0,0,0,0,0,0,24226,0,24234,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,24235,0,24236,0,0,0,0,0,24239,24240,24241,0,0,24248,0,0,24249,0,
+24251,0,0,0,0,0,0,24253,0,24268,0,0,0,24269,0,24271,24272,0,0,0,0,24273,0,0,
+24274,0,0,24279,0,0,0,0,0,0,0,24280,0,24293,24294,0,0,0,0,0,0,24296,0,0,24323,0,
+0,0,24329,24330,24331,24339,0,24351,0,0,24369,24370,0,0,0,24371,0,0,0,0,24372,
+24373,24374,0,0,0,0,0,24378,0,0,0,0,24379,0,24381,0,24383,24389,0,24390,0,0,
+24394,24395,24400,0,0,0,24401,24402,0,24406,0,0,0,24411,0,0,0,24415,0,24416,0,0,
+0,0,0,24417,0,24419,0,24422,0,24423,24428,0,24435,0,0,0,24439,0,0,0,24440,24442,
+24446,0,0,0,24447,24448,24449,24452,0,0,0,0,24453,24457,0,0,24458,24459,24460,0,
+24465,0,0,0,0,0,0,0,24470,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24471,0,24473,
+24474,24475,24476,0,24478,0,0,0,0,24480,0,0,0,0,0,0,0,0,0,0,24481,0,0,0,0,0,0,0,
+0,0,0,24482,24485,0,0,0,0,24486,0,0,0,24488,0,0,0,24494,0,0,0,0,24497,0,0,24498,
+0,0,0,24499,24506,0,0,0,24507,0,0,24511,0,0,24513,24514,0,0,0,0,0,24517,0,24518,
+0,24520,0,24521,24524,24525,0,0,0,0,0,24527,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24528,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24537,24539,0,24540,0,0,0,24548,0,0,0,0,0,24549,
+24550,0,0,0,24553,24554,0,24555,0,24556,0,24558,0,0,0,0,0,24560,0,0,0,24561,0,0,
+0,0,0,24562,0,0,0,0,0,0,0,0,0,0,0,0,0,24567,0,0,0,0,0,24569,0,0,0,24574,0,24575,
+0,0,0,0,0,0,0,0,0,0,0,24577,24581,0,24584,0,0,0,0,0,24585,0,0,0,0,0,24586,0,0,
+24587,0,24588,0,0,0,0,0,0,0,0,0,0,24590,24591,0,0,0,0,24592,0,0,0,0,0,0,0,24594,
+0,0,0,0,0,0,0,24596,24597,0,0,0,0,24602,24603,0,0,0,0,24604,0,0,24605,0,24610,0,
+0,24611,0,0,0,0,24612,24615,24616,24624,0,0,0,24627,0,24638,24639,0,0,0,0,24640,
+0,0,0,24655,24656,24657,0,0,0,0,0,0,0,0,24662,0,24663,24664,0,0,0,0,0,24665,0,0,
+0,0,24667,0,0,0,0,0,0,24668,24669,0,24670,24674,0,0,0,24675,0,24678,0,0,24679,0,
+0,0,24681,0,24683,0,0,0,0,24684,0,24685,0,0,24686,0,0,24688,24689,0,0,0,0,24690,
+24691,0,0,0,0,0,0,0,24697,0,24698,0,0,0,0,0,0,0,0,24709,0,0,0,0,0,24710,0,24712,
+0,0,0,0,0,0,24713,24714,0,24715,0,24716,24718,0,24719,0,0,0,0,24720,0,0,24725,0,
+0,24738,0,24749,24750,0,0,0,24752,0,0,0,24753,0,0,0,24758,0,0,0,0,0,24762,0,
+24763,0,0,0,0,0,0,0,24764,0,0,0,0,0,24765,24767,24768,0,24772,0,0,0,0,24773,0,0,
+0,0,24777,0,0,0,0,0,24785,0,24786,24788,0,0,0,24789,0,0,0,0,24794,24798,0,24799,
+24800,0,0,0,24803,0,24804,24806,0,24807,0,0,0,24810,0,0,0,0,0,0,24827,24828,0,
+24835,0,0,0,0,0,0,24836,0,0,0,0,0,24839,0,24843,24844,0,0,0,0,0,0,0,0,0,0,24847,
+0,0,24848,0,0,0,0,0,0,24849,0,24850,24851,0,0,0,24852,0,24853,0,0,0,0,0,0,0,0,0,
+24854,0,24855,0,0,24868,0,0,0,24883,0,0,0,24884,0,24895,24897,0,0,0,0,0,24899,0,
+0,0,0,0,24900,0,24913,0,0,0,0,0,0,24914,0,0,24917,24930,24931,0,0,0,24932,0,0,
+24939,0,0,24942,0,0,0,0,0,0,0,0,0,24945,24950,0,24951,0,0,24953,0,0,0,24954,0,
+24959,0,0,0,24961,0,0,24962,0,24964,24968,24970,24972,0,0,0,0,0,24976,0,0,0,
+24977,0,24982,0,0,24983,0,0,24984,0,0,0,24993,0,0,0,24994,0,0,25001,0,0,0,25003,
+0,0,25018,0,0,25023,0,0,0,25034,0,0,25035,25036,0,25037,0,0,0,0,0,0,0,25039,0,0,
+0,0,0,25040,0,0,0,0,0,0,0,25042,0,0,25043,25045,0,0,0,0,0,0,25049,0,0,25051,0,
+25052,25053,0,0,25054,0,0,0,25055,0,0,0,0,25057,25059,0,0,25060,25064,0,25065,
+25069,25070,0,0,0,0,25072,0,25073,0,25090,0,0,25092,25093,25101,0,0,0,0,0,0,
+25105,25108,0,0,25113,0,0,25115,25116,0,0,0,0,0,0,25117,0,0,0,25120,25121,0,0,0,
+0,0,0,0,25125,0,0,0,25126,0,25130,25134,0,25139,0,25143,0,0,0,25151,0,25161,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25163,0,0,0,0,0,0,0,25174,0,25175,0,25207,0,0,
+0,25209,0,0,0,0,25213,0,25219,0,25223,0,25225,0,0,0,25227,0,0,0,25228,0,0,0,
+25229,0,0,0,0,0,0,0,25231,25233,0,0,0,0,25237,25239,0,0,0,25243,0,0,0,25252,0,
+25257,25258,0,0,0,0,25260,25265,0,25268,0,0,25273,25324,0,25325,0,25326,0,0,0,0,
+0,0,0,0,25327,0,0,0,0,0,25328,0,0,0,0,0,0,25332,0,0,0,25333,0,0,0,25336,25337,
+25338,0,0,25343,0,25350,0,0,0,0,0,0,0,25352,0,25354,0,25375,0,25379,0,0,0,0,
+25384,0,0,0,0,0,0,0,0,0,25386,0,25388,0,25390,0,0,25399,0,0,25401,0,0,0,25402,0,
+0,0,25407,0,0,0,0,0,0,0,0,0,0,0,25413,25415,0,0,25417,0,0,0,0,0,0,0,25419,0,0,0,
+25421,0,0,0,25424,0,0,0,0,25433,0,0,0,0,0,0,0,0,0,25435,0,0,0,0,0,0,25436,0,0,0,
+25437,0,0,25440,0,0,0,0,0,0,25442,0,0,25443,0,25446,0,0,25449,0,0,0,25450,0,0,0,
+0,25452,0,25453,25454,25455,0,0,0,25456,0,25457,0,0,0,25459,0,25461,0,25468,0,0,
+0,0,0,0,0,0,25469,0,0,0,0,0,25471,0,0,0,0,0,25474,0,0,0,0,0,0,0,0,25475,0,0,0,0,
+25477,0,0,0,0,25483,0,0,0,0,0,25484,0,0,0,0,0,0,0,0,0,0,0,0,25485,0,25497,0,0,
+25498,0,25504,0,25510,0,25512,0,0,25513,25514,0,0,0,0,0,0,25517,25518,25519,0,
+25520,0,0,0,0,0,0,0,25521,0,25522,25527,25534,0,25536,0,25537,0,0,25548,25550,0,
+0,25551,0,25552,0,0,0,0,0,25554,0,25555,0,25556,25557,25568,0,0,0,25570,25571,0,
+0,0,0,0,0,25574,0,0,0,0,25579,0,0,0,25581,0,0,0,25582,0,0,0,0,0,0,0,0,0,25588,0,
+0,0,0,25589,0,0,0,0,25590,0,25591,25592,25593,0,25594,0,0,0,25596,0,25597,25615,
+0,0,0,0,0,25618,0,0,0,0,25619,25623,0,0,25629,0,0,25631,0,0,0,25635,25636,0,0,
+25649,0,0,0,0,25654,0,0,0,25661,25663,0,0,25671,0,0,25678,25698,0,25699,25702,
+25703,0,0,0,0,0,0,0,0,25704,0,0,0,0,0,25706,0,0,25710,0,25711,0,25712,0,25715,
+25716,25717,0,0,25718,25728,25732,0,0,0,25734,0,0,0,0,0,0,0,0,0,25737,0,0,25739,
+0,0,0,25740,0,25741,25745,0,25746,0,25748,25772,25778,0,0,0,0,0,25780,0,0,0,0,
+25781,0,25782,25784,25785,0,0,0,25789,0,0,0,0,0,0,25797,25801,0,0,0,25808,25809,
+0,0,25811,25814,25815,0,0,25817,0,0,0,0,0,0,0,0,25820,0,0,0,0,25832,25833,0,0,0,
+25846,0,0,0,25847,25848,0,0,0,0,0,0,0,0,0,25849,25850,0,0,25851,0,0,25852,0,
+25862,0,0,0,25863,25865,0,0,0,0,0,0,0,25867,25868,0,25869,25874,0,25875,0,25876,
+25877,0,0,0,0,25878,25902,0,0,0,0,0,0,0,25903,25904,25905,0,0,0,25908,25909,0,0,
+0,0,25910,0,0,0,0,0,0,0,25912,0,25913,0,0,0,0,0,0,0,0,25914,0,0,25916,0,0,0,0,0,
+25917,25927,0,0,0,0,25928,0,0,25930,0,0,0,25933,0,0,25938,25942,0,0,0,0,0,0,0,
+25945,0,25950,0,25956,0,0,25961,25962,0,0,25963,0,25964,25965,25966,0,0,0,0,0,
+25967,0,0,0,0,25968,0,0,0,25969,25971,0,0,0,0,0,25973,25975,0,0,0,0,0,0,0,25978,
+0,25981,0,0,0,25982,0,0,0,25984,0,0,0,0,0,0,0,25993,0,0,0,0,0,0,0,0,0,0,0,0,0,
+26002,0,0,0,26005,0,0,0,26006,26007,0,0,26014,26015,26016,0,0,0,0,0,0,26017,
+26018,26020,0,26022,26023,0,0,0,26024,26028,0,26029,26033,26034,26044,0,0,0,0,0,
+26046,0,0,26047,0,0,26049,0,26050,0,26051,0,0,0,0,0,26053,0,0,0,0,26054,26059,0,
+0,0,0,0,0,26060,0,26066,0,0,0,0,0,0,0,0,0,0,0,0,26067,0,26069,0,0,26071,0,0,0,
+26073,0,26074,26077,0,0,0,0,26078,0,0,0,26079,0,26090,0,0,26094,0,0,0,0,0,0,0,0,
+26095,0,0,0,0,0,0,0,0,0,0,0,26096,26101,0,26107,26122,0,26124,0,0,26125,0,0,0,0,
+0,0,26136,26141,26155,0,0,0,0,0,0,0,0,0,26164,26166,0,0,0,26167,0,26170,26171,0,
+0,26172,0,0,26174,0,0,0,0,0,0,0,0,0,0,0,0,0,26175,0,0,0,26176,26177,0,26321,
+26322,0,26323,0,0,26324,0,0,0,0,0,0,0,26325,0,26331,0,0,0,0,0,0,26335,0,0,0,
+26350,0,0,0,26379,0,0,26382,26383,26385,0,0,26392,26406,0,0,0,0,26411,0,0,0,0,0,
+26412,0,0,26420,0,0,26423,0,26424,26426,26432,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+26435,0,26436,0,0,0,0,0,26441,0,26444,0,0,0,26446,0,0,0,0,26447,0,0,0,0,26449,0,
+26450,26452,0,26453,26454,0,0,0,26455,0,0,0,26456,0,0,26458,0,0,26460,0,26463,0,
+0,0,0,0,0,0,0,26464,26470,0,0,0,0,0,0,0,0,0,26473,0,0,26474,0,0,0,0,0,0,0,26475,
+0,0,0,0,0,0,0,26477,0,26485,0,0,26486,0,26487,0,0,26488,26493,26494,0,0,26495,0,
+26497,26504,26506,0,0,0,0,0,26507,0,0,0,0,0,26509,0,0,26510,0,0,0,0,0,0,0,0,0,0,
+0,0,0,26512,0,26513,26515,0,0,0,26518,0,0,0,26519,0,26524,26526,0,0,0,26527,0,
+26532,0,26533,26537,26558,0,0,0,26559,0,0,0,26571,0,0,26573,0,26588,0,26593,0,0,
+0,0,0,0,26603,0,26604,0,0,0,0,0,0,0,0,0,0,26606,0,0,0,0,0,0,0,26607,26609,26611,
+26614,0,0,0,26616,26620,0,26621,0,0,0,0,0,26627,0,26629,0,0,26630,0,0,26632,
+26643,0,0,0,26644,0,0,0,0,0,0,0,0,0,26646,26647,0,0,0,26650,0,0,26656,0,0,0,0,
+26663,26670,26671,0,0,0,26685,26686,26687,0,26689,0,0,0,0,26744,0,26745,0,26747,
+26748,0,26749,26750,26751,0,0,0,0,26752,26755,0,0,0,26756,26769,0,0,0,26774,0,0,
+0,0,0,26775,0,26777,26778,0,26786,0,0,0,26787,0,0,0,0,0,0,0,0,0,0,0,0,0,26788,0,
+0,26789,0,0,0,0,0,26791,0,26792,26793,0,0,0,26794,0,26797,26798,0,0,0,26800,0,0,
+26803,0,26804,0,0,0,0,0,0,0,0,0,26805,0,0,26808,0,0,26809,0,0,0,0,0,0,0,26812,0,
+26825,0,0,0,0,0,0,0,26826,0,0,26827,26829,26834,0,0,0,0,26835,0,0,26849,0,26851,
+0,0,0,0,0,0,0,0,0,26852,0,26853,26857,0,26858,0,26859,0,0,0,0,0,0,0,26876,0,
+26878,26882,26883,0,0,0,0,26890,26894,0,0,0,0,26895,26896,0,0,0,0,0,26900,0,0,0,
+0,0,0,0,26911,26913,26914,26915,26916,26919,0,0,0,26921,26922,0,0,26925,0,0,0,
+26928,0,0,26929,26930,0,0,0,26931,0,26932,0,0,0,0,0,26933,0,0,0,0,0,0,26937,0,0,
+26943,0,0,26944,0,0,0,26946,0,0,0,0,0,0,0,26956,0,26958,0,0,26963,0,0,0,0,0,0,0,
+26965,0,26969,26970,26972,0,0,0,0,0,26973,0,26974,0,26978,0,26980,0,0,0,0,0,0,
+26982,0,26986,26987,0,26990,0,0,0,0,27003,27006,0,0,27007,27010,27012,27013,0,0,
+0,0,0,0,0,0,27014,27015,27018,0,27019,0,0,0,0,0,27025,0,0,0,27026,0,0,0,0,27029,
+27030,27031,27034,0,0,27036,27037,0,0,0,27038,27042,0,0,0,27044,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,27045,0,0,0,0,0,0,0,27046,0,0,0,0,0,0,0,27047,27049,0,27050,0,0,0,
+27051,27052,0,27055,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27056,27058,27059,0,
+27061,0,27064,0,0,0,0,0,27069,0,0,27070,0,0,0,0,0,0,0,27072,0,0,0,0,0,0,0,0,
+27076,0,0,0,0,0,27078,0,27079,0,0,0,27081,0,0,0,0,0,0,27082,0,27083,27086,0,0,0,
+0,27087,0,0,0,0,0,27088,27090,0,27094,0,0,27095,0,27099,27102,0,0,0,27103,0,0,0,
+0,27105,0,0,0,27106,0,0,0,0,0,0,27107,0,0,0,0,27108,27117,0,0,0,0,27118,0,0,
+27124,0,27126,0,0,27130,27131,0,0,0,0,0,0,27147,0,0,0,0,27148,27149,0,0,0,0,
+27150,27151,0,27152,0,27159,0,0,0,27164,0,0,0,0,0,0,0,27175,0,27189,0,0,27191,0,
+27193,0,27195,0,27198,0,0,0,0,0,27200,0,0,0,0,27202,0,0,0,0,27203,0,0,27204,0,0,
+27206,0,27207,0,0,0,0,27209,0,0,0,27213,0,0,27216,27219,27220,27222,27223,0,
+27224,0,27225,27226,0,0,27233,0,0,0,0,27235,0,27237,0,27238,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,27239,0,27242,27243,0,27250,0,0,0,27251,0,27253,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,27254,27255,27258,0,0,0,27259,0,0,0,0,0,0,27267,0,27276,27278,
+0,0,0,0,0,0,0,0,0,27296,27297,27301,0,0,0,0,0,0,27302,0,0,0,0,0,0,27312,27313,0,
+0,0,0,0,27318,0,27320,0,27329,0,27330,27331,0,27332,0,0,0,0,27340,0,0,0,27348,0,
+0,0,0,0,0,27350,0,27351,0,0,0,0,27355,0,0,27358,27359,27361,0,0,0,27365,0,27367,
+0,27376,27378,0,0,27379,0,0,0,0,0,0,27396,0,27397,27404,0,0,0,0,0,27408,0,0,0,0,
+27453,0,0,0,27456,0,0,0,27458,0,0,0,0,0,0,0,27459,0,0,0,27460,0,0,27461,0,27465,
+27467,0,0,27469,0,27470,0,27471,0,27477,27482,0,0,0,0,0,0,27484,0,0,0,0,0,0,
+27485,0,0,0,0,0,27493,0,27494,27502,0,0,0,0,0,0,0,0,0,0,0,0,27511,27532,0,0,0,
+27533,27545,0,0,0,27546,0,0,0,0,0,0,0,0,0,0,27547,0,0,27549,27550,0,27551,0,0,0,
+0,0,0,0,27555,0,0,27571,0,27573,27574,27575,27577,0,27578,0,0,27579,27585,0,0,0,
+0,0,27586,0,0,27588,27589,0,0,0,0,27596,0,0,27600,0,0,0,0,0,0,0,0,0,0,0,27608,0,
+0,0,0,0,0,0,0,0,0,0,27610,0,0,0,27618,0,0,27620,0,0,0,27631,0,0,27632,27634,0,
+27636,27638,0,0,0,27643,0,27644,27649,0,0,0,0,0,0,0,0,0,0,0,0,0,27651,27660,0,
+27661,0,0,0,0,0,0,0,27662,0,0,27664,0,27665,0,0,0,27669,0,27671,0,0,0,27673,
+27674,0,0,0,27682,0,0,0,27711,0,27712,27713,27719,27720,0,0,27728,0,27729,0,0,0,
+0,0,0,0,0,0,27731,0,0,27732,0,27733,0,27738,0,0,0,27742,0,0,0,27743,27744,0,0,0,
+0,0,0,27745,27746,0,0,0,27747,27748,27751,27752,0,0,0,27768,27770,0,0,0,27774,
+27775,0,27776,27777,0,0,27781,0,27784,0,27786,0,0,27791,0,27792,27793,27804,0,
+27812,27813,0,0,0,0,0,0,0,0,27814,0,27825,0,27827,0,0,0,0,27828,27861,27862,0,0,
+0,27864,0,0,0,27865,27884,0,27889,0,0,0,0,0,27890,0,27891,0,0,0,27892,0,0,0,0,0,
+27897,27898,0,0,27899,0,0,0,27901,27905,0,0,27920,0,0,27921,0,27922,0,0,0,27931,
+27934,0,0,0,0,0,0,0,0,0,0,27941,0,27942,0,27945,0,27947,27954,0,0,0,0,27960,
+27963,0,0,0,0,0,0,0,0,27964,27965,0,0,0,27967,0,27969,27975,0,27976,27977,0,
+27981,0,27983,28051,28052,0,0,0,0,0,28056,0,0,0,0,0,0,28058,28059,0,0,28061,0,0,
+0,0,0,0,0,28063,0,0,0,0,0,0,28066,0,0,0,0,0,0,28069,28070,28072,0,28073,0,0,
+28074,0,0,0,0,28075,0,0,0,0,0,0,0,28078,0,0,0,0,28085,0,0,0,0,28086,0,0,0,0,0,0,
+28088,0,0,0,0,0,0,0,0,28090,0,28097,28114,28115,0,0,0,0,0,0,0,28116,0,0,0,0,0,
+28118,0,28129,0,28131,0,0,28135,0,0,0,28140,28141,0,0,0,28146,0,0,0,0,28152,0,0,
+0,0,28155,28157,28161,0,0,0,0,28166,0,28167,0,0,0,0,0,0,0,0,0,0,0,28172,0,0,0,0,
+0,0,28173,0,0,28175,0,0,0,0,0,0,0,0,0,28178,28188,0,28190,0,0,0,0,0,28191,0,
+28193,28206,0,0,28207,28209,0,28211,0,28213,0,0,0,28215,28216,28217,0,28222,0,
+28223,28225,0,0,0,28226,0,28227,28229,28232,0,0,0,0,0,0,0,0,0,28235,0,28241,0,0,
+28242,0,0,0,0,28243,0,0,0,28245,0,0,0,28248,28250,0,28251,28252,0,0,0,0,0,0,
+28253,0,0,28254,28255,0,0,28256,0,0,28258,0,0,0,0,0,28259,0,0,28260,0,0,28261,0,
+0,0,0,28262,28263,0,0,28264,0,0,0,28266,0,28268,28269,0,28270,28272,28274,0,
+28277,28278,0,0,0,28279,0,28280,28281,28283,0,28292,0,28294,0,28297,0,0,0,0,
+28299,0,0,0,0,0,28300,0,0,0,0,0,0,0,28301,0,0,0,0,0,0,0,0,0,0,0,0,0,28302,28303,
+0,0,0,0,28304,0,0,28305,0,28312,0,28313,28314,0,0,0,0,0,0,28315,0,0,0,28320,
+28321,0,0,28328,0,0,0,28329,28338,0,28339,0,0,28344,0,0,0,0,0,0,0,0,28347,0,0,0,
+0,0,0,0,0,28348,0,0,0,0,0,28411,0,28412,28413,0,28416,0,0,0,28420,0,0,0,0,0,
+28421,0,0,0,0,28423,0,0,0,28424,0,0,28428,0,0,0,0,0,28429,0,0,0,28431,28434,0,
+28458,0,0,0,0,0,0,0,0,0,0,0,28464,0,0,0,0,28465,0,28467,0,0,0,0,0,0,28471,0,0,0,
+0,28474,0,28480,0,28481,0,0,28485,0,0,0,0,28486,28488,0,0,28489,0,0,0,0,28492,0,
+0,0,28495,0,28497,0,28499,0,0,0,0,28500,0,0,28502,28503,0,0,0,28508,0,0,0,28510,
+0,0,28512,28513,28514,28521,0,28526,0,28527,28528,0,0,0,0,28529,0,0,28532,0,0,
+28537,28538,0,0,0,28539,0,28548,0,28553,28554,0,0,0,0,0,0,0,0,0,0,0,0,28560,
+28563,0,0,28564,0,0,0,0,28565,0,0,0,0,0,0,0,28566,28568,0,0,0,0,0,0,28569,0,0,0,
+28570,0,28572,28573,0,0,0,0,28575,0,0,0,0,28576,28581,28588,0,0,28589,0,0,0,
+28590,28595,0,28598,0,0,28601,0,0,28605,0,0,0,0,28614,28615,28619,0,0,0,0,0,0,
+28620,0,28626,0,0,28628,0,28631,0,28632,0,0,0,0,0,0,28635,0,0,0,28637,28638,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28639,0,28643,0,0,28652,0,0,0,28662,0,
+28670,28671,0,0,0,0,0,0,0,0,0,28672,28673,28675,28676,0,0,0,0,0,0,0,28691,0,0,0,
+28695,0,0,0,28696,0,28697,28698,0,28705,0,28707,28708,28710,0,0,0,0,0,0,0,28711,
+28728,0,0,0,28736,0,0,0,28737,0,0,0,0,0,0,0,0,0,28738,0,28739,0,28741,0,0,28742,
+0,0,0,0,0,0,0,0,0,0,0,28745,0,0,0,0,0,0,28749,28750,28752,28754,28756,0,28757,0,
+0,0,0,28759,28760,0,0,0,0,0,0,28762,0,0,0,28764,0,0,0,0,0,0,28766,0,28767,28768,
+0,0,0,0,28769,28770,0,0,0,0,0,0,0,0,0,0,0,0,0,28771,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,28772,0,28773,0,28782,0,0,0,0,0,0,28784,0,28785,0,28786,0,0,0,28787,0,0,0,
+28797,0,0,0,0,0,0,28799,0,0,28801,0,0,0,0,28802,0,28805,0,0,28806,0,0,28807,0,0,
+0,0,0,0,0,28808,0,0,0,0,0,28810,28812,0,0,28816,28819,0,0,28821,0,28826,0,0,0,
+28842,28852,0,0,28853,0,28854,28855,0,0,0,28857,0,0,0,28858,0,28867,28868,28869,
+0,0,0,28874,28880,28882,28890,28892,0,0,0,0,0,0,0,28895,0,0,0,28898,28899,0,0,0,
+28900,0,0,28904,0,28906,0,0,0,0,28907,0,0,0,0,0,0,28908,0,0,0,28910,0,28914,0,0,
+0,0,0,0,0,28915,28916,28919,0,0,28920,0,28921,0,0,0,0,0,0,0,0,28924,0,0,0,0,
+28926,28929,0,0,0,28930,0,28936,0,28939,0,0,0,0,28942,0,0,0,0,0,0,28956,0,0,0,
+28966,0,0,0,0,28967,0,0,0,0,0,0,0,0,0,28968,0,28971,0,28975,28976,0,28982,28983,
+0,0,28984,28989,28996,28997,28998,0,0,0,0,0,0,28999,0,0,0,0,0,29000,0,29001,0,0,
+0,29009,0,0,29011,0,0,29021,0,0,0,0,29024,0,29025,0,0,0,0,0,29026,0,0,0,29036,0,
+0,0,29037,0,0,0,0,29038,0,29045,0,29047,0,0,0,0,0,0,0,0,0,29051,0,0,0,29054,
+29056,29062,0,29070,29082,0,0,0,29083,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29084,0,0,
+0,0,29085,29088,0,0,0,0,0,0,0,29090,29097,0,0,0,29103,0,0,0,0,0,0,0,0,29105,0,0,
+0,0,0,29107,0,29109,0,0,0,29115,0,0,29120,0,0,29138,29140,0,0,0,0,0,0,0,0,0,
+29152,0,29160,29174,0,29176,0,0,29180,0,29181,0,0,0,0,0,0,0,0,29228,0,0,29229,0,
+0,29230,0,0,0,0,0,0,0,0,0,0,29234,0,0,0,29241,0,29245,0,29248,0,29250,29256,
+29280,0,29282,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29285,0,0,29286,29291,29292,0,0,0,0,
+29294,0,29295,0,0,0,0,0,29296,29297,29298,29300,0,29302,0,0,29304,29307,0,29312,
+0,0,0,29322,0,0,29323,0,0,29324,29326,29328,0,29335,0,0,0,0,0,0,0,29338,29339,0,
+0,0,0,0,29341,29343,0,0,0,0,29344,0,0,0,0,0,29345,0,0,0,0,29346,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,29347,29348,29349,0,0,29354,0,0,29355,0,0,0,0,0,0,0,0,29357,0,0,
+0,0,29364,0,29365,0,0,0,0,0,0,0,29366,0,0,29368,0,0,0,0,0,0,0,0,29378,0,29381,0,
+0,0,0,0,0,0,0,29386,0,0,0,0,0,0,29389,0,0,0,29390,0,0,29391,29397,0,29398,29412,
+29414,29418,29419,0,0,0,0,0,0,0,29420,0,0,0,0,0,0,0,29423,0,0,0,29435,0,0,0,
+29437,0,0,29439,0,29441,0,0,0,0,29443,0,29446,29450,29452,0,0,0,0,0,29456,0,0,0,
+0,0,29461,0,0,0,29464,0,0,0,0,0,0,0,0,29468,0,29473,0,0,0,29486,0,0,0,29490,0,0,
+0,29491,29492,0,0,29497,0,0,0,29498,0,29499,0,29502,29505,0,29509,0,0,0,29510,0,
+0,0,29512,0,0,0,29516,0,0,0,0,0,0,0,0,29518,0,29519,0,0,0,0,0,29520,29521,29529,
+0,0,0,0,0,0,0,0,29530,0,0,29531,29538,0,29540,0,0,0,29542,0,29543,29544,29547,0,
+0,29548,0,0,0,29549,0,0,0,29550,0,0,29552,0,0,0,0,29558,29561,0,29562,29564,0,0,
+29565,0,0,29566,0,0,0,0,0,0,0,0,0,0,29578,29584,29586,29591,0,0,0,0,29593,29594,
+0,0,29597,0,0,29613,0,29614,0,29615,0,0,0,0,29616,29617,0,0,29625,0,0,0,29632,0,
+0,0,0,0,0,0,29633,0,0,0,0,0,29634,29635,29637,0,29638,0,29641,29643,0,0,0,0,0,0,
+29644,0,29645,0,29649,0,0,0,29650,0,29653,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29656,
+29659,0,0,29660,0,0,0,29661,0,0,0,0,0,29664,0,0,0,29671,29673,0,0,0,0,0,0,0,
+29675,0,29677,29679,0,0,29684,0,0,0,0,0,29685,0,0,0,29687,0,0,0,29688,0,29689,
+29690,29700,0,29701,0,0,0,29702,0,29706,0,0,0,0,0,0,0,29720,0,29721,0,29727,0,
+29733,29734,0,29750,29761,0,29763,0,0,0,0,0,29764,0,0,29765,0,0,0,29771,0,0,0,0,
+0,0,0,0,0,0,0,0,29772,0,0,0,29773,29774,29775,0,0,0,0,0,0,0,0,0,0,0,29822,0,0,0,
+29824,0,29825,0,0,0,0,0,29827,0,0,0,0,0,0,0,0,29829,0,29832,29834,0,0,29835,0,0,
+29837,29838,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29843,0,0,0,0,29844,29845,0,0,0,
+0,0,0,0,0,0,29849,0,0,29869,29872,29890,29905,0,0,0,0,0,29907,29921,0,29922,0,0,
+29923,29926,29944,29946,0,0,0,0,0,0,0,29947,29948,0,0,0,29951,0,0,0,0,0,29953,0,
+0,29956,0,29957,0,0,29962,0,0,0,0,29971,0,0,0,29972,0,0,0,0,0,29978,0,29979,
+29992,30007,30008,30010,0,0,0,30013,0,0,0,0,30014,30016,0,0,0,0,0,0,0,0,0,0,0,
+30017,0,0,0,0,0,30023,30031,0,0,30033,0,0,0,0,0,0,0,0,0,0,30034,0,30038,0,30039,
+0,30040,0,0,0,0,0,0,30067,30068,0,0,0,30069,0,30072,0,0,0,30073,0,0,0,0,30075,0,
+0,0,0,0,0,30079,0,0,30080,0,0,0,0,0,30082,0,0,0,0,0,0,0,0,0,0,0,30084,30090,0,0,
+30091,0,0,0,0,30098,30118,0,30119,0,30121,30130,0,0,0,0,0,0,0,0,0,0,0,0,0,30131,
+30132,30133,0,0,0,0,0,0,30135,0,0,0,0,0,0,0,0,0,0,0,30136,0,0,30137,30138,0,0,0,
+30139,30146,0,0,0,0,0,30147,0,0,30148,30151,0,0,0,30168,0,30172,30173,0,0,0,0,0,
+0,0,0,30180,30181,0,30192,0,0,0,0,0,0,0,30194,30196,0,0,30199,0,0,30202,0,0,0,0,
+30203,0,0,0,0,0,0,0,0,0,0,30213,0,0,0,30216,0,0,30217,0,0,0,30218,0,0,0,0,30219,
+0,30220,0,30222,30227,0,0,0,0,0,30231,0,0,30233,30235,0,0,0,0,30238,0,30240,
+30243,30245,0,30250,30252,0,0,0,30269,0,0,30271,30272,0,0,0,30278,30280,0,0,
+30282,0,30284,0,30294,0,0,0,0,30295,30296,0,0,0,0,0,30298,30299,30302,30304,
+30306,0,0,0,0,0,0,30316,30317,0,0,0,30318,0,0,0,30319,0,30320,30322,30326,0,0,0,
+0,0,30327,0,30332,30348,30349,0,0,30356,0,0,0,0,0,0,0,0,30357,0,30358,0,30359,
+30360,0,0,30365,30366,30378,0,0,0,0,30379,0,0,30381,0,30385,0,30388,30397,0,0,0,
+30401,0,0,0,0,30403,0,0,0,0,0,30404,0,0,30405,0,30406,30408,0,30409,0,30410,0,0,
+0,30417,0,0,30418,30419,0,30420,0,30424,0,0,0,30427,30430,30432,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,30433,0,0,0,0,0,0,0,30436,0,30437,30438,0,30441,30442,0,0,
+0,30445,0,0,0,0,30452,30456,30457,0,0,0,30458,0,30464,0,0,0,0,0,0,30467,0,30469,
+0,0,0,0,0,30477,0,0,30484,0,0,0,0,0,30485,0,0,0,0,0,30486,30487,30497,30498,0,0,
+0,0,0,0,0,0,0,0,30505,0,30508,0,0,0,30509,30510,0,30514,30516,0,0,0,0,0,0,0,0,0,
+0,0,30523,0,30524,0,30525,0,0,0,0,30537,0,0,30538,0,0,0,0,0,30553,0,0,30555,
+30556,30558,30559,30560,0,0,30561,0,30562,0,0,0,0,0,0,0,0,30563,30570,30571,0,
+30586,30587,0,0,30590,0,0,30594,0,0,0,0,30611,30612,30623,30634,0,0,30636,30640,
+30655,30656,0,30657,0,0,30658,30669,0,30670,0,30676,30678,0,0,0,0,0,0,0,30679,0,
+0,0,0,0,0,0,0,0,0,0,30695,0,0,30698,0,0,0,0,30700,0,0,0,0,30701,0,30702,30703,0,
+0,0,0,30707,0,0,0,30709,0,0,30710,30719,30729,0,0,0,0,0,0,0,0,0,30731,0,0,30733,
+0,0,0,30734,0,0,0,0,0,30736,30737,0,0,0,30740,0,0,0,30743,0,30746,0,30747,30748,
+0,0,30751,30752,30753,0,0,0,30754,0,0,30760,0,0,0,0,0,0,0,30763,0,30764,0,0,
+30766,0,30769,30770,30771,30774,30777,0,0,30779,30780,30781,0,0,0,0,30790,0,0,0,
+30792,0,0,0,0,30810,0,0,0,0,0,0,0,30812,30819,0,0,30823,30824,0,30825,0,30827,0,
+0,0,0,0,0,30828,0,0,30830,0,0,0,30834,0,30835,0,30837,30838,0,30845,0,0,0,0,0,
+30846,30847,0,0,30849,0,30851,0,0,0,0,0,30852,30858,0,0,30859,0,30865,0,0,30866,
+0,0,30868,0,0,30869,0,0,0,30881,30883,0,0,0,0,0,30889,0,30891,0,0,0,0,30894,0,
+30895,0,30897,0,30898,0,0,0,30904,30906,0,30909,0,0,0,0,0,0,30910,0,0,0,30915,
+30933,30942,0,0,0,0,30943,0,0,30945,0,0,0,0,0,0,30946,0,0,30947,0,0,30955,30956,
+0,0,30960,0,0,30961,30962,30966,0,0,30969,30974,0,0,0,30976,0,0,30977,0,30978,
+30982,0,0,0,0,0,0,0,30994,30995,30998,0,31000,0,0,31001,0,0,31003,31005,0,0,
+31006,31011,0,0,31014,0,31016,0,0,0,0,31018,0,0,31020,31023,31024,31025,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,31027,31028,31029,0,0,0,0,0,0,31032,0,0,0,0,0,0,0,0,0,0,0,
+31036,31037,31038,0,0,0,31041,31043,31045,0,31047,0,0,0,31048,0,31049,0,0,0,
+31053,31054,31055,0,0,31063,0,0,0,0,0,31066,0,31068,31071,0,0,0,31072,31073,0,0,
+0,0,31075,0,0,31076,0,0,0,31077,31079,0,31080,0,0,0,0,0,0,0,0,0,0,31087,0,31142,
+0,31144,0,0,31145,31146,31147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31149,0,31151,31152,0,
+0,0,0,0,0,0,31162,31171,31174,31175,0,0,0,31176,0,0,0,0,0,0,0,31179,0,0,0,31186,
+0,0,0,31192,31195,0,0,31196,0,0,0,0,0,0,0,0,31198,0,0,0,0,0,31199,0,0,0,31205,0,
+0,0,0,31211,31215,0,0,0,0,31231,0,31232,0,0,0,0,0,0,0,0,0,0,31233,31236,31253,0,
+31254,0,0,0,0,0,0,31255,0,0,31257,0,0,0,0,0,0,0,0,0,31258,31259,0,0,31260,0,
+31261,0,0,0,0,0,31262,31263,0,0,31264,0,31266,0,31267,0,0,0,0,0,31281,0,31282,0,
+31284,0,0,31285,31287,31288,0,0,31290,0,0,0,31292,31295,0,31299,0,31300,0,0,0,0,
+0,31302,0,0,0,0,31303,0,0,0,0,0,0,31304,0,0,0,0,0,31305,31308,31309,31315,0,
+31317,0,0,0,0,0,31323,0,31324,0,0,0,0,0,31325,31327,0,0,31331,0,0,0,0,0,31333,0,
+0,0,0,0,31336,0,0,31337,0,0,0,0,0,0,31338,0,0,0,0,0,0,0,0,0,0,0,0,31339,0,0,0,0,
+0,0,0,31342,0,0,0,0,31345,0,0,0,0,0,0,0,0,31347,0,0,0,0,0,0,31348,0,0,31350,
+31351,0,31352,0,0,31354,0,0,0,0,31355,0,0,31356,0,0,0,0,0,0,0,0,0,0,31363,0,
+31372,0,0,31373,0,0,0,0,0,0,0,0,0,31376,0,31388,0,31389,0,31392,0,31401,0,31405,
+31407,31408,0,31409,0,0,0,0,0,0,31413,31415,0,0,0,31416,31418,0,0,0,0,0,0,31422,
+31423,0,0,31424,0,31425,31432,0,0,0,0,0,0,0,0,0,31433,0,0,0,0,0,0,0,0,31434,0,0,
+0,0,0,0,31435,0,0,0,0,31438,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31442,0,31444,0,
+31448,0,0,31451,0,0,0,0,31452,0,31461,31465,0,0,31466,0,0,31467,0,0,31468,0,0,0,
+31469,31473,0,31476,0,0,0,0,31489,31490,0,0,0,0,0,0,0,31492,31493,31494,0,0,0,0,
+31501,31504,31505,0,0,0,0,0,0,0,0,0,31509,0,0,0,0,31510,0,0,31511,0,0,31513,0,0,
+0,0,0,0,0,0,0,31514,0,31522,31536,31539,31540,0,31541,0,0,0,0,0,0,31546,31553,
+31559,0,0,0,31560,31561,31562,0,0,31564,31567,0,31569,0,0,0,31570,0,0,0,0,31571,
+0,0,0,0,0,0,31572,31574,31580,31581,0,0,31582,31584,31585,31586,31595,0,31596,0,
+0,0,0,31597,0,31599,0,31600,31601,0,0,31603,31604,0,0,31608,31610,0,0,0,31611,0,
+31615,0,0,0,0,31616,0,0,0,0,0,0,31617,0,0,0,0,0,31618,0,0,0,0,0,0,31621,0,0,0,0,
+0,0,0,0,0,31622,31625,0,0,0,0,31627,0,31641,0,0,31642,0,0,31643,0,0,0,0,0,0,0,0,
+0,31644,0,31646,0,0,0,0,31648,0,0,0,31652,0,0,0,31657,0,0,31676,0,0,0,0,0,0,0,
+31689,31691,31692,0,31694,0,0,0,31696,0,31702,0,31703,0};
+
+static const DictWord kStaticDictionaryWords[31705] = {
+{0,0,0},{8,0,1002},{136,0,1015},{4,0,683},{4,10,325},{138,10,125},{7,11,572},{9,
+11,592},{11,11,680},{11,11,842},{11,11,924},{12,11,356},{12,11,550},{13,11,317},
+{13,11,370},{13,11,469},{13,11,471},{14,11,397},{18,11,69},{146,11,145},{134,0,
+1265},{136,11,534},{134,0,1431},{11,0,138},{140,0,40},{4,0,155},{7,0,1689},{4,10
+,718},{135,10,1216},{4,0,245},{5,0,151},{5,0,741},{6,0,1147},{7,0,498},{7,0,870}
+,{7,0,1542},{12,0,213},{14,0,36},{14,0,391},{17,0,111},{18,0,6},{18,0,46},{18,0,
+151},{19,0,36},{20,0,32},{20,0,56},{20,0,69},{20,0,102},{21,0,4},{22,0,8},{22,0,
+10},{22,0,14},{150,0,31},{4,0,624},{135,0,1752},{5,10,124},{5,10,144},{6,10,548}
+,{7,10,15},{7,10,153},{137,10,629},{6,0,503},{9,0,586},{13,0,468},{14,0,66},{16,
+0,58},{7,10,1531},{8,10,416},{9,10,275},{10,10,100},{11,10,658},{11,10,979},{12,
+10,86},{14,10,207},{15,10,20},{143,10,25},{5,0,603},{7,0,1212},{9,0,565},{14,0,
+301},{5,10,915},{6,10,1783},{7,10,211},{7,10,1353},{9,10,83},{10,10,376},{10,10,
+431},{11,10,543},{12,10,664},{13,10,280},{13,10,428},{14,10,128},{17,10,52},{145
+,10,81},{4,0,492},{133,0,451},{135,0,835},{141,0,70},{132,0,539},{7,11,748},{139
+,11,700},{7,11,1517},{11,11,597},{14,11,76},{14,11,335},{148,11,33},{6,0,113},{
+135,0,436},{4,10,338},{133,10,400},{136,0,718},{133,11,127},{133,11,418},{6,0,
+1505},{7,0,520},{6,11,198},{11,10,892},{140,11,83},{4,10,221},{5,10,659},{5,10,
+989},{7,10,697},{7,10,1211},{138,10,284},{135,0,1070},{5,11,276},{6,11,55},{135,
+11,1369},{134,0,1515},{6,11,1752},{136,11,726},{138,10,507},{15,0,78},{4,10,188}
+,{135,10,805},{5,10,884},{139,10,991},{133,11,764},{134,10,1653},{6,11,309},{7,
+11,331},{138,11,550},{135,11,1861},{132,11,348},{135,11,986},{135,11,1573},{12,0
+,610},{13,0,431},{144,0,59},{9,11,799},{140,10,166},{134,0,1530},{132,0,750},{
+132,0,307},{133,0,964},{6,11,194},{7,11,133},{10,11,493},{10,11,570},{139,11,664
+},{5,11,24},{5,11,569},{6,11,3},{6,11,119},{6,11,143},{6,11,440},{7,11,295},{7,
+11,599},{7,11,1686},{7,11,1854},{8,11,424},{9,11,43},{9,11,584},{9,11,760},{10,
+11,148},{10,11,328},{11,11,159},{11,11,253},{11,11,506},{12,11,487},{12,11,531},
+{144,11,33},{136,10,760},{5,11,14},{5,11,892},{6,11,283},{7,11,234},{136,11,537}
+,{135,11,1251},{4,11,126},{8,11,635},{147,11,34},{4,11,316},{135,11,1561},{6,0,
+999},{6,0,1310},{137,11,861},{4,11,64},{5,11,352},{5,11,720},{6,11,368},{139,11,
+359},{4,0,75},{5,0,180},{6,0,500},{7,0,58},{7,0,710},{10,0,645},{136,10,770},{
+133,0,649},{6,0,276},{7,0,282},{7,0,879},{7,0,924},{8,0,459},{9,0,599},{9,0,754}
+,{11,0,574},{12,0,128},{12,0,494},{13,0,52},{13,0,301},{15,0,30},{143,0,132},{
+132,0,200},{4,10,89},{5,10,489},{6,10,315},{7,10,553},{7,10,1745},{138,10,243},{
+135,11,1050},{7,0,1621},{6,10,1658},{9,10,3},{10,10,154},{11,10,641},{13,10,85},
+{13,10,201},{141,10,346},{6,11,175},{137,11,289},{5,11,432},{133,11,913},{6,0,
+225},{137,0,211},{7,0,718},{8,0,687},{139,0,374},{4,10,166},{133,10,505},{9,0,
+110},{134,10,1670},{8,0,58},{9,0,724},{11,0,809},{13,0,113},{145,0,72},{6,0,345}
+,{7,0,1247},{144,11,82},{5,11,931},{134,11,1698},{8,0,767},{8,0,803},{9,0,301},{
+137,0,903},{139,0,203},{134,0,1154},{7,0,1949},{136,0,674},{134,0,259},{135,0,
+1275},{5,11,774},{6,11,1637},{6,11,1686},{134,11,1751},{134,0,1231},{7,10,445},{
+8,10,307},{8,10,704},{10,10,41},{10,10,439},{11,10,237},{11,10,622},{140,10,201}
+,{136,0,254},{6,11,260},{135,11,1484},{139,0,277},{135,10,1977},{4,10,189},{5,10
+,713},{6,11,573},{136,10,57},{138,10,371},{132,10,552},{134,11,344},{133,0,248},
+{9,0,800},{10,0,693},{11,0,482},{11,0,734},{11,0,789},{134,11,240},{4,0,116},{5,
+0,95},{5,0,445},{7,0,1688},{8,0,29},{9,0,272},{11,0,509},{11,0,915},{4,11,292},{
+4,11,736},{5,11,871},{6,11,171},{6,11,1689},{7,11,1324},{7,11,1944},{9,11,415},{
+9,11,580},{14,11,230},{146,11,68},{7,0,490},{13,0,100},{143,0,75},{135,0,1641},{
+133,0,543},{7,11,209},{8,11,661},{10,11,42},{11,11,58},{12,11,58},{12,11,118},{
+141,11,32},{5,0,181},{8,0,41},{6,11,63},{135,11,920},{133,0,657},{133,11,793},{
+138,0,709},{7,0,25},{8,0,202},{138,0,536},{5,11,665},{135,10,1788},{145,10,49},{
+9,0,423},{140,0,89},{5,11,67},{6,11,62},{6,11,374},{135,11,1391},{8,0,113},{9,0,
+877},{10,0,554},{11,0,83},{12,0,136},{19,0,109},{9,11,790},{140,11,47},{138,10,
+661},{4,0,963},{10,0,927},{14,0,442},{135,10,1945},{133,0,976},{132,0,206},{4,11
+,391},{135,11,1169},{134,0,2002},{6,0,696},{134,0,1008},{134,0,1170},{132,11,271
+},{7,0,13},{8,0,226},{10,0,537},{11,0,570},{11,0,605},{11,0,799},{11,0,804},{12,
+0,85},{12,0,516},{12,0,623},{13,0,112},{13,0,361},{14,0,77},{14,0,78},{17,0,28},
+{19,0,110},{140,11,314},{132,0,769},{134,0,1544},{4,0,551},{137,0,678},{5,10,84}
+,{134,10,163},{9,0,57},{9,0,459},{10,0,425},{11,0,119},{12,0,184},{12,0,371},{13
+,0,358},{145,0,51},{5,0,188},{5,0,814},{8,0,10},{9,0,421},{9,0,729},{10,0,609},{
+11,0,689},{4,11,253},{5,10,410},{5,11,544},{7,11,300},{137,11,340},{134,0,624},{
+138,11,321},{135,0,1941},{18,0,130},{5,10,322},{8,10,186},{9,10,262},{10,10,187}
+,{142,10,208},{5,11,53},{5,11,541},{6,11,94},{6,11,499},{7,11,230},{139,11,321},
+{133,10,227},{4,0,378},{4,11,920},{5,11,25},{5,11,790},{6,11,457},{135,11,853},{
+137,0,269},{132,0,528},{134,0,1146},{7,10,1395},{8,10,486},{9,10,236},{9,10,878}
+,{10,10,218},{11,10,95},{19,10,17},{147,10,31},{7,10,2043},{8,10,672},{141,10,
+448},{134,0,1105},{134,0,1616},{134,11,1765},{140,11,163},{5,10,412},{133,11,822
+},{132,11,634},{6,0,656},{134,11,1730},{134,0,1940},{5,0,104},{6,0,173},{135,0,
+1631},{136,10,562},{6,11,36},{7,11,658},{8,11,454},{147,11,86},{5,0,457},{134,10
+,1771},{7,0,810},{8,0,138},{8,0,342},{9,0,84},{10,0,193},{11,0,883},{140,0,359},
+{9,0,620},{135,10,1190},{137,10,132},{7,11,975},{137,11,789},{6,0,95},{6,0,1934}
+,{136,0,967},{141,11,335},{6,0,406},{10,0,409},{10,0,447},{11,0,44},{140,0,100},
+{4,10,317},{135,10,1279},{132,0,477},{134,0,1268},{6,0,1941},{8,0,944},{5,10,63}
+,{133,10,509},{132,0,629},{132,11,104},{4,0,246},{133,0,375},{6,0,1636},{132,10,
+288},{135,11,1614},{9,0,49},{10,0,774},{8,10,89},{8,10,620},{11,10,628},{12,10,
+322},{143,10,124},{4,0,282},{7,0,1034},{11,0,398},{11,0,634},{12,0,1},{12,0,79},
+{12,0,544},{14,0,237},{17,0,10},{146,0,20},{132,0,824},{7,11,45},{9,11,542},{9,
+11,566},{138,11,728},{5,0,118},{5,0,499},{6,0,476},{6,0,665},{6,0,1176},{6,0,
+1196},{7,0,600},{7,0,888},{135,0,1096},{7,0,296},{7,0,596},{8,0,560},{8,0,586},{
+9,0,612},{11,0,304},{12,0,46},{13,0,89},{14,0,112},{145,0,122},{5,0,894},{6,0,
+1772},{9,0,1009},{138,10,120},{5,11,533},{7,11,755},{138,11,780},{151,10,1},{6,0
+,1474},{7,11,87},{142,11,288},{139,0,366},{137,10,461},{7,11,988},{7,11,1939},{9
+,11,64},{9,11,502},{12,11,7},{12,11,34},{13,11,12},{13,11,234},{147,11,77},{7,0,
+1599},{7,0,1723},{8,0,79},{8,0,106},{8,0,190},{8,0,302},{8,0,383},{8,0,713},{9,0
+,119},{9,0,233},{9,0,419},{9,0,471},{10,0,181},{10,0,406},{11,0,57},{11,0,85},{
+11,0,120},{11,0,177},{11,0,296},{11,0,382},{11,0,454},{11,0,758},{11,0,999},{12,
+0,27},{12,0,98},{12,0,131},{12,0,245},{12,0,312},{12,0,446},{12,0,454},{13,0,25}
+,{13,0,98},{13,0,426},{13,0,508},{14,0,70},{14,0,163},{14,0,272},{14,0,277},{14,
+0,370},{15,0,95},{15,0,138},{15,0,167},{17,0,38},{148,0,96},{135,10,1346},{10,0,
+200},{19,0,2},{151,0,22},{135,11,141},{134,10,85},{134,0,1759},{138,0,372},{145,
+0,16},{8,0,943},{132,11,619},{139,11,88},{5,11,246},{8,11,189},{9,11,355},{9,11,
+512},{10,11,124},{10,11,453},{11,11,143},{11,11,416},{11,11,859},{141,11,341},{5
+,0,258},{134,0,719},{6,0,1798},{6,0,1839},{8,0,900},{10,0,874},{10,0,886},{12,0,
+698},{12,0,732},{12,0,770},{16,0,106},{18,0,163},{18,0,170},{18,0,171},{152,0,20
+},{9,0,707},{11,0,326},{11,0,339},{12,0,423},{12,0,502},{20,0,62},{9,11,707},{11
+,11,326},{11,11,339},{12,11,423},{12,11,502},{148,11,62},{5,0,30},{7,0,495},{8,0
+,134},{9,0,788},{140,0,438},{133,11,678},{5,10,279},{6,10,235},{7,10,468},{8,10,
+446},{9,10,637},{10,10,717},{11,10,738},{140,10,514},{5,11,35},{6,11,287},{7,11,
+862},{7,11,1886},{138,11,179},{7,0,1948},{7,0,2004},{132,11,517},{5,10,17},{6,10
+,371},{137,10,528},{4,0,115},{5,0,669},{6,0,407},{8,0,311},{11,0,10},{141,0,5},{
+137,0,381},{5,0,50},{6,0,439},{7,0,780},{135,0,1040},{136,11,667},{11,11,403},{
+146,11,83},{5,0,1},{6,0,81},{138,0,520},{134,0,738},{5,0,482},{8,0,98},{9,0,172}
+,{10,0,360},{10,0,700},{10,0,822},{11,0,302},{11,0,778},{12,0,50},{12,0,127},{12
+,0,396},{13,0,62},{13,0,328},{14,0,122},{147,0,72},{9,11,157},{10,11,131},{140,
+11,72},{135,11,714},{135,11,539},{5,0,2},{6,0,512},{7,0,797},{7,0,1494},{8,0,253
+},{8,0,589},{9,0,77},{10,0,1},{10,0,129},{10,0,225},{11,0,118},{11,0,226},{11,0,
+251},{11,0,430},{11,0,701},{11,0,974},{11,0,982},{12,0,64},{12,0,260},{12,0,488}
+,{140,0,690},{5,11,394},{7,11,367},{7,11,487},{7,11,857},{7,11,1713},{8,11,246},
+{9,11,537},{10,11,165},{12,11,219},{140,11,561},{136,0,557},{5,10,779},{5,10,807
+},{6,10,1655},{134,10,1676},{4,10,196},{5,10,558},{133,10,949},{11,11,827},{12,
+11,56},{14,11,34},{143,11,148},{137,0,347},{133,0,572},{134,0,832},{4,0,12},{7,0
+,504},{7,0,522},{7,0,809},{8,0,797},{141,0,88},{4,10,752},{133,11,449},{7,11,86}
+,{8,11,103},{145,11,69},{7,11,2028},{138,11,641},{5,0,528},{6,11,1},{142,11,2},{
+134,0,861},{10,0,294},{4,10,227},{5,10,159},{5,10,409},{7,10,80},{10,10,479},{12
+,10,418},{14,10,50},{14,10,249},{142,10,295},{7,10,1470},{8,10,66},{8,10,137},{8
+,10,761},{9,10,638},{11,10,80},{11,10,212},{11,10,368},{11,10,418},{12,10,8},{13
+,10,15},{16,10,61},{17,10,59},{19,10,28},{148,10,84},{20,0,109},{135,11,1148},{6
+,11,277},{7,11,1274},{7,11,1386},{7,11,1392},{12,11,129},{146,11,87},{6,11,187},
+{7,11,39},{7,11,1203},{8,11,380},{8,11,542},{14,11,117},{149,11,28},{134,0,1187}
+,{5,0,266},{9,0,290},{9,0,364},{10,0,293},{11,0,606},{142,0,45},{6,11,297},{7,11
+,793},{139,11,938},{4,0,50},{6,0,594},{9,0,121},{10,0,49},{10,0,412},{139,0,834}
+,{136,0,748},{7,11,464},{8,11,438},{11,11,105},{11,11,363},{12,11,231},{14,11,
+386},{15,11,102},{148,11,75},{132,0,466},{13,0,399},{14,0,337},{6,10,38},{7,10,
+1220},{8,10,185},{8,10,256},{9,10,22},{9,10,331},{10,10,738},{11,10,205},{11,10,
+540},{11,10,746},{13,10,465},{142,10,194},{9,0,378},{141,0,162},{137,0,519},{4,
+10,159},{6,10,115},{7,10,252},{7,10,257},{7,10,1928},{8,10,69},{9,10,384},{10,10
+,91},{10,10,615},{12,10,375},{14,10,235},{18,10,117},{147,10,123},{5,11,604},{5,
+10,911},{136,10,278},{132,0,667},{8,0,351},{9,0,322},{4,10,151},{135,10,1567},{
+134,0,902},{133,10,990},{12,0,180},{5,10,194},{7,10,1662},{137,10,90},{4,0,869},
+{134,0,1996},{134,0,813},{133,10,425},{137,11,761},{132,0,260},{133,10,971},{5,
+11,20},{6,11,298},{7,11,659},{7,11,1366},{137,11,219},{4,0,39},{5,0,36},{7,0,
+1843},{8,0,407},{11,0,144},{140,0,523},{4,0,510},{10,0,587},{139,10,752},{7,0,29
+},{7,0,66},{7,0,1980},{10,0,487},{138,0,809},{13,0,260},{14,0,82},{18,0,63},{137
+,10,662},{5,10,72},{6,10,264},{7,10,21},{7,10,46},{7,10,2013},{8,10,215},{8,10,
+513},{10,10,266},{139,10,22},{134,0,570},{6,0,565},{7,0,1667},{4,11,439},{10,10,
+95},{11,10,603},{12,11,242},{13,10,443},{14,10,160},{143,10,4},{134,0,1464},{134
+,10,431},{9,0,372},{15,0,2},{19,0,10},{19,0,18},{5,10,874},{6,10,1677},{143,10,0
+},{132,0,787},{6,0,380},{12,0,399},{21,0,19},{7,10,939},{7,10,1172},{7,10,1671},
+{9,10,540},{10,10,696},{11,10,265},{11,10,732},{11,10,928},{11,10,937},{141,10,
+438},{137,0,200},{132,11,233},{132,0,516},{134,11,577},{132,0,844},{11,0,887},{
+14,0,365},{142,0,375},{132,11,482},{8,0,821},{140,0,44},{7,0,1655},{136,0,305},{
+5,10,682},{135,10,1887},{135,11,346},{132,10,696},{4,0,10},{7,0,917},{139,0,786}
+,{5,11,795},{6,11,1741},{8,11,417},{137,11,782},{4,0,1016},{134,0,2031},{5,0,684
+},{4,10,726},{133,10,630},{6,0,1021},{134,0,1480},{8,10,802},{136,10,838},{134,0
+,27},{134,0,395},{135,11,622},{7,11,625},{135,11,1750},{4,11,203},{135,11,1936},
+{6,10,118},{7,10,215},{7,10,1521},{140,10,11},{132,0,813},{136,0,511},{7,10,615}
+,{138,10,251},{135,10,1044},{145,0,56},{133,10,225},{6,0,342},{6,0,496},{8,0,275
+},{137,0,206},{4,0,909},{133,0,940},{132,0,891},{7,11,311},{9,11,308},{140,11,
+255},{4,10,370},{5,10,756},{135,10,1326},{4,0,687},{134,0,1596},{134,0,1342},{6,
+10,1662},{7,10,48},{8,10,771},{10,10,116},{13,10,104},{14,10,105},{14,10,184},{
+15,10,168},{19,10,92},{148,10,68},{138,10,209},{4,11,400},{5,11,267},{135,11,232
+},{151,11,12},{6,0,41},{141,0,160},{141,11,314},{134,0,1718},{136,0,778},{142,11
+,261},{134,0,1610},{133,0,115},{132,0,294},{14,0,314},{132,10,120},{132,0,983},{
+5,0,193},{140,0,178},{138,10,429},{5,10,820},{135,10,931},{6,0,994},{6,0,1051},{
+6,0,1439},{7,0,174},{133,11,732},{4,11,100},{7,11,679},{8,11,313},{138,10,199},{
+6,10,151},{6,10,1675},{7,10,383},{151,10,10},{6,0,1796},{8,0,848},{8,0,867},{8,0
+,907},{10,0,855},{140,0,703},{140,0,221},{4,0,122},{5,0,796},{5,0,952},{6,0,1660
+},{6,0,1671},{8,0,567},{9,0,687},{9,0,742},{10,0,686},{11,0,682},{11,0,909},{140
+,0,281},{5,11,362},{5,11,443},{6,11,318},{7,11,1019},{139,11,623},{5,11,463},{
+136,11,296},{11,0,583},{13,0,262},{6,10,1624},{12,10,422},{142,10,360},{5,0,179}
+,{7,0,1095},{135,0,1213},{4,10,43},{4,11,454},{5,10,344},{133,10,357},{4,0,66},{
+7,0,722},{135,0,904},{134,0,773},{7,0,352},{133,10,888},{5,11,48},{5,11,404},{6,
+11,557},{7,11,458},{8,11,597},{10,11,455},{10,11,606},{11,11,49},{11,11,548},{12
+,11,476},{13,11,18},{141,11,450},{134,11,418},{132,10,711},{5,11,442},{135,11,
+1984},{141,0,35},{137,0,152},{134,0,1197},{135,11,1093},{137,11,203},{137,10,440
+},{10,0,592},{10,0,753},{12,0,317},{12,0,355},{12,0,465},{12,0,469},{12,0,560},{
+12,0,578},{141,0,243},{133,0,564},{134,0,797},{5,10,958},{133,10,987},{5,11,55},
+{7,11,376},{140,11,161},{133,11,450},{134,0,556},{134,0,819},{11,10,276},{142,10
+,293},{7,0,544},{138,0,61},{8,0,719},{4,10,65},{5,10,479},{5,10,1004},{7,10,1913
+},{8,10,317},{9,10,302},{10,10,612},{141,10,22},{4,0,5},{5,0,498},{8,0,637},{9,0
+,521},{4,11,213},{4,10,261},{7,11,223},{7,10,510},{136,11,80},{5,0,927},{7,0,101
+},{4,10,291},{7,11,381},{7,11,806},{7,11,820},{8,11,354},{8,11,437},{8,11,787},{
+9,10,515},{9,11,657},{10,11,58},{10,11,339},{10,11,749},{11,11,914},{12,10,152},
+{12,11,162},{12,10,443},{13,11,75},{13,10,392},{14,11,106},{14,11,198},{14,11,
+320},{14,10,357},{14,11,413},{146,11,43},{6,0,1153},{7,0,1441},{136,11,747},{4,0
+,893},{5,0,780},{133,0,893},{138,11,654},{133,11,692},{133,0,238},{134,11,191},{
+4,10,130},{135,10,843},{6,0,1296},{5,10,42},{5,10,879},{7,10,245},{7,10,324},{7,
+10,1532},{11,10,463},{11,10,472},{13,10,363},{144,10,52},{134,0,1729},{6,0,1999}
+,{136,0,969},{4,10,134},{133,10,372},{4,0,60},{7,0,941},{7,0,1800},{8,0,314},{9,
+0,700},{139,0,487},{134,0,1144},{6,11,162},{7,11,1960},{136,11,831},{132,11,706}
+,{135,0,1147},{138,11,426},{138,11,89},{7,0,1853},{138,0,437},{136,0,419},{135,
+10,1634},{133,0,828},{5,0,806},{7,0,176},{7,0,178},{7,0,1240},{7,0,1976},{132,10
+,644},{135,11,1877},{5,11,420},{135,11,1449},{4,0,51},{5,0,39},{6,0,4},{7,0,591}
+,{7,0,849},{7,0,951},{7,0,1613},{7,0,1760},{7,0,1988},{9,0,434},{10,0,754},{11,0
+,25},{139,0,37},{10,11,57},{138,11,277},{135,10,540},{132,11,204},{135,0,159},{
+139,11,231},{133,0,902},{7,0,928},{7,11,366},{9,11,287},{12,11,199},{12,11,556},
+{140,11,577},{6,10,623},{136,10,789},{4,10,908},{5,10,359},{5,10,508},{6,10,1723
+},{7,10,343},{7,10,1996},{135,10,2026},{134,0,270},{4,10,341},{135,10,480},{5,11
+,356},{135,11,224},{11,11,588},{11,11,864},{11,11,968},{143,11,160},{132,0,556},
+{137,0,801},{132,0,416},{142,0,372},{5,0,152},{5,0,197},{7,0,340},{7,0,867},{10,
+0,548},{10,0,581},{11,0,6},{12,0,3},{12,0,19},{14,0,110},{142,0,289},{139,0,369}
+,{7,11,630},{9,11,567},{11,11,150},{11,11,444},{141,11,119},{134,11,539},{7,10,
+1995},{8,10,299},{11,10,890},{140,10,674},{7,0,34},{7,0,190},{8,0,28},{8,0,141},
+{8,0,444},{8,0,811},{9,0,468},{11,0,334},{12,0,24},{12,0,386},{140,0,576},{133,0
+,757},{7,0,1553},{136,0,898},{133,0,721},{136,0,1012},{4,0,789},{5,0,647},{135,0
+,1102},{132,0,898},{10,0,183},{4,10,238},{5,10,503},{6,10,179},{7,10,2003},{8,10
+,381},{8,10,473},{9,10,149},{10,10,788},{15,10,45},{15,10,86},{20,10,110},{150,
+10,57},{9,0,136},{19,0,107},{4,10,121},{5,10,156},{5,10,349},{10,10,605},{142,10
+,342},{4,11,235},{135,11,255},{4,11,194},{5,11,584},{6,11,384},{7,11,583},{10,11
+,761},{11,11,760},{139,11,851},{6,10,80},{6,10,1694},{7,10,173},{7,10,1974},{9,
+10,547},{10,10,730},{14,10,18},{150,10,39},{4,10,923},{134,10,1711},{5,0,277},{
+141,0,247},{132,0,435},{133,11,562},{134,0,1311},{5,11,191},{137,11,271},{132,10
+,595},{7,11,1537},{14,11,96},{143,11,73},{5,0,437},{7,0,502},{7,0,519},{7,0,1122
+},{7,0,1751},{14,0,211},{6,10,459},{7,10,1753},{7,10,1805},{8,10,658},{9,10,1},{
+11,10,959},{141,10,446},{6,0,814},{4,11,470},{5,11,473},{6,11,153},{7,11,1503},{
+7,11,1923},{10,11,701},{11,11,132},{11,11,168},{11,11,227},{11,11,320},{11,11,
+436},{11,11,525},{11,11,855},{12,11,41},{12,11,286},{13,11,103},{13,11,284},{14,
+11,255},{14,11,262},{15,11,117},{143,11,127},{5,0,265},{6,0,212},{135,0,28},{138
+,0,750},{133,11,327},{6,11,552},{7,11,1754},{137,11,604},{134,0,2012},{132,0,702
+},{5,11,80},{6,11,405},{7,11,403},{7,11,1502},{7,11,1626},{8,11,456},{9,11,487},
+{9,11,853},{9,11,889},{10,11,309},{11,11,721},{11,11,994},{12,11,430},{141,11,
+165},{5,0,808},{135,0,2045},{5,0,166},{8,0,739},{140,0,511},{134,10,490},{4,11,
+453},{5,11,887},{6,11,535},{8,11,6},{136,11,543},{4,0,119},{5,0,170},{5,0,447},{
+7,0,1708},{7,0,1889},{9,0,357},{9,0,719},{12,0,486},{140,0,596},{137,0,500},{7,
+10,250},{136,10,507},{132,10,158},{6,0,809},{134,0,1500},{9,0,327},{11,0,350},{
+11,0,831},{13,0,352},{4,10,140},{7,10,362},{8,10,209},{9,10,10},{9,10,503},{9,10
+,614},{10,10,689},{11,10,327},{11,10,725},{12,10,252},{12,10,583},{13,10,192},{
+14,10,269},{14,10,356},{148,10,50},{135,11,741},{4,0,450},{7,0,1158},{19,10,1},{
+19,10,26},{150,10,9},{6,0,597},{135,0,1318},{134,0,1602},{6,10,228},{7,10,1341},
+{9,10,408},{138,10,343},{7,0,1375},{7,0,1466},{138,0,331},{132,0,754},{132,10,
+557},{5,11,101},{6,11,88},{6,11,543},{7,11,1677},{9,11,100},{10,11,677},{14,11,
+169},{14,11,302},{14,11,313},{15,11,48},{143,11,84},{134,0,1368},{4,11,310},{9,
+11,795},{10,11,733},{11,11,451},{12,11,249},{14,11,115},{14,11,286},{143,11,100}
+,{132,10,548},{10,0,557},{7,10,197},{8,10,142},{8,10,325},{9,10,150},{9,10,596},
+{10,10,353},{11,10,74},{11,10,315},{12,10,662},{12,10,681},{14,10,423},{143,10,
+141},{133,11,587},{5,0,850},{136,0,799},{10,0,908},{12,0,701},{12,0,757},{142,0,
+466},{4,0,62},{5,0,275},{18,0,19},{6,10,399},{6,10,579},{7,10,692},{7,10,846},{7
+,10,1015},{7,10,1799},{8,10,403},{9,10,394},{10,10,133},{12,10,4},{12,10,297},{
+12,10,452},{16,10,81},{18,10,25},{21,10,14},{22,10,12},{151,10,18},{12,0,459},{7
+,10,1546},{11,10,299},{142,10,407},{132,10,177},{132,11,498},{7,11,217},{8,11,
+140},{138,11,610},{5,10,411},{135,10,653},{134,0,1802},{7,10,439},{10,10,727},{
+11,10,260},{139,10,684},{133,11,905},{11,11,580},{142,11,201},{134,0,1397},{5,10
+,208},{7,10,753},{135,10,1528},{7,0,238},{7,0,2033},{8,0,120},{8,0,188},{8,0,659
+},{9,0,598},{10,0,466},{12,0,342},{12,0,588},{13,0,503},{14,0,246},{143,0,92},{
+135,11,1041},{4,11,456},{7,11,105},{7,11,358},{7,11,1637},{8,11,643},{139,11,483
+},{6,0,1318},{134,0,1324},{4,0,201},{7,0,1744},{8,0,602},{11,0,247},{11,0,826},{
+17,0,65},{133,10,242},{8,0,164},{146,0,62},{133,10,953},{139,10,802},{133,0,615}
+,{7,11,1566},{8,11,269},{9,11,212},{9,11,718},{14,11,15},{14,11,132},{142,11,227
+},{133,10,290},{132,10,380},{5,10,52},{7,10,277},{9,10,368},{139,10,791},{135,0,
+1243},{133,11,539},{11,11,919},{141,11,409},{136,0,968},{133,11,470},{134,0,882}
+,{132,0,907},{5,0,100},{10,0,329},{12,0,416},{149,0,29},{10,10,138},{139,10,476}
+,{5,10,725},{5,10,727},{6,11,91},{7,11,435},{135,10,1811},{4,11,16},{5,11,316},{
+5,11,842},{6,11,370},{6,11,1778},{8,11,166},{11,11,812},{12,11,206},{12,11,351},
+{14,11,418},{16,11,15},{16,11,34},{18,11,3},{19,11,3},{19,11,7},{20,11,4},{149,
+11,21},{132,0,176},{5,0,636},{5,0,998},{7,0,9},{7,0,1508},{8,0,26},{9,0,317},{9,
+0,358},{10,0,210},{10,0,292},{10,0,533},{11,0,555},{12,0,526},{12,0,607},{13,0,
+263},{13,0,459},{142,0,271},{6,0,256},{8,0,265},{4,10,38},{7,10,307},{7,10,999},
+{7,10,1481},{7,10,1732},{7,10,1738},{9,10,414},{11,10,316},{12,10,52},{13,10,420
+},{147,10,100},{135,10,1296},{4,11,611},{133,11,606},{4,0,643},{142,11,21},{133,
+11,715},{133,10,723},{6,0,610},{135,11,597},{10,0,127},{141,0,27},{6,0,1995},{6,
+0,2001},{8,0,119},{136,0,973},{4,11,149},{138,11,368},{12,0,522},{4,11,154},{5,
+10,109},{6,10,1784},{7,11,1134},{7,10,1895},{8,11,105},{12,10,296},{140,10,302},
+{4,11,31},{6,11,429},{7,11,962},{9,11,458},{139,11,691},{10,0,553},{11,0,876},{
+13,0,193},{13,0,423},{14,0,166},{19,0,84},{4,11,312},{5,10,216},{7,10,1879},{9,
+10,141},{9,10,270},{9,10,679},{10,10,159},{11,10,197},{12,10,538},{12,10,559},{
+14,10,144},{14,10,167},{143,10,67},{134,0,1582},{7,0,1578},{135,11,1578},{137,10
+,81},{132,11,236},{134,10,391},{134,0,795},{7,10,322},{136,10,249},{5,11,836},{5
+,11,857},{6,11,1680},{7,11,59},{147,11,53},{135,0,432},{10,11,68},{139,11,494},{
+4,11,81},{139,11,867},{7,0,126},{136,0,84},{142,11,280},{5,11,282},{8,11,650},{9
+,11,295},{9,11,907},{138,11,443},{136,0,790},{5,10,632},{138,10,526},{6,0,64},{
+12,0,377},{13,0,309},{14,0,141},{14,0,429},{14,11,141},{142,11,429},{134,0,1529}
+,{6,0,321},{7,0,1857},{9,0,530},{19,0,99},{7,10,948},{7,10,1042},{8,10,235},{8,
+10,461},{9,10,453},{10,10,354},{145,10,77},{7,0,1104},{11,0,269},{11,0,539},{11,
+0,627},{11,0,706},{11,0,975},{12,0,248},{12,0,434},{12,0,600},{12,0,622},{13,0,
+297},{13,0,485},{14,0,69},{14,0,409},{143,0,108},{4,10,362},{7,10,52},{7,10,303}
+,{10,11,70},{12,11,26},{14,11,17},{14,11,178},{15,11,34},{149,11,12},{11,0,977},
+{141,0,507},{9,0,34},{139,0,484},{5,10,196},{6,10,486},{7,10,212},{8,10,309},{
+136,10,346},{6,0,1700},{7,0,26},{7,0,293},{7,0,382},{7,0,1026},{7,0,1087},{7,0,
+2027},{8,0,24},{8,0,114},{8,0,252},{8,0,727},{8,0,729},{9,0,30},{9,0,199},{9,0,
+231},{9,0,251},{9,0,334},{9,0,361},{9,0,712},{10,0,55},{10,0,60},{10,0,232},{10,
+0,332},{10,0,384},{10,0,396},{10,0,504},{10,0,542},{10,0,652},{11,0,20},{11,0,48
+},{11,0,207},{11,0,291},{11,0,298},{11,0,342},{11,0,365},{11,0,394},{11,0,620},{
+11,0,705},{11,0,1017},{12,0,123},{12,0,340},{12,0,406},{12,0,643},{13,0,61},{13,
+0,269},{13,0,311},{13,0,319},{13,0,486},{14,0,234},{15,0,62},{15,0,85},{16,0,71}
+,{18,0,119},{20,0,105},{135,10,1912},{4,11,71},{5,11,376},{7,11,119},{138,11,665
+},{10,0,918},{10,0,926},{4,10,686},{136,11,55},{138,10,625},{136,10,706},{132,11
+,479},{4,10,30},{133,10,43},{6,0,379},{7,0,270},{8,0,176},{8,0,183},{9,0,432},{9
+,0,661},{12,0,247},{12,0,617},{18,0,125},{7,11,607},{8,11,99},{152,11,4},{5,0,
+792},{133,0,900},{4,11,612},{133,11,561},{4,11,41},{4,10,220},{5,11,74},{7,10,
+1535},{7,11,1627},{11,11,871},{140,11,619},{135,0,1920},{7,11,94},{11,11,329},{
+11,11,965},{12,11,241},{14,11,354},{15,11,22},{148,11,63},{9,11,209},{137,11,300
+},{134,0,771},{135,0,1979},{4,0,901},{133,0,776},{142,0,254},{133,11,98},{9,11,
+16},{141,11,386},{133,11,984},{4,11,182},{6,11,205},{135,11,220},{7,10,1725},{7,
+10,1774},{138,10,393},{5,10,263},{134,10,414},{4,11,42},{9,11,205},{9,11,786},{
+138,11,659},{14,0,140},{148,0,41},{8,0,440},{10,0,359},{6,10,178},{6,11,289},{6,
+10,1750},{7,11,1670},{9,10,690},{10,10,155},{10,10,373},{11,10,698},{12,11,57},{
+13,10,155},{20,10,93},{151,11,4},{4,0,37},{5,0,334},{7,0,1253},{151,11,25},{4,0,
+508},{4,11,635},{5,10,97},{137,10,393},{139,11,533},{4,0,640},{133,0,513},{134,
+10,1639},{132,11,371},{4,11,272},{7,11,836},{7,11,1651},{145,11,89},{5,11,825},{
+6,11,444},{6,11,1640},{136,11,308},{4,10,191},{7,10,934},{8,10,647},{145,10,97},
+{12,0,246},{15,0,162},{19,0,64},{20,0,8},{20,0,95},{22,0,24},{152,0,17},{4,0,533
+},{5,10,165},{9,10,346},{138,10,655},{5,11,737},{139,10,885},{133,10,877},{8,10,
+128},{139,10,179},{137,11,307},{140,0,752},{133,0,920},{135,0,1048},{5,0,153},{6
+,0,580},{6,10,1663},{7,10,132},{7,10,1154},{7,10,1415},{7,10,1507},{12,10,493},{
+15,10,105},{151,10,15},{5,10,459},{7,10,1073},{8,10,241},{136,10,334},{138,0,391
+},{135,0,1952},{133,11,525},{8,11,641},{11,11,388},{140,11,580},{142,0,126},{134
+,0,640},{132,0,483},{7,0,1616},{9,0,69},{6,10,324},{6,10,520},{7,10,338},{7,10,
+1729},{8,10,228},{139,10,750},{5,11,493},{134,11,528},{135,0,734},{4,11,174},{
+135,11,911},{138,0,480},{9,0,495},{146,0,104},{135,10,705},{9,0,472},{4,10,73},{
+6,10,612},{7,10,927},{7,10,1330},{7,10,1822},{8,10,217},{9,10,765},{9,10,766},{
+10,10,408},{11,10,51},{11,10,793},{12,10,266},{15,10,158},{20,10,89},{150,10,32}
+,{7,11,548},{137,11,58},{4,11,32},{5,11,215},{6,11,269},{7,11,1782},{7,11,1892},
+{10,11,16},{11,11,822},{11,11,954},{141,11,481},{132,0,874},{9,0,229},{5,10,389}
+,{136,10,636},{7,11,1749},{136,11,477},{134,0,948},{5,11,308},{135,11,1088},{4,0
+,748},{139,0,1009},{136,10,21},{6,0,555},{135,0,485},{5,11,126},{8,11,297},{9,11
+,366},{9,11,445},{12,11,53},{12,11,374},{141,11,492},{7,11,1551},{139,11,361},{
+136,0,193},{136,0,472},{8,0,653},{13,0,93},{147,0,14},{132,0,984},{132,11,175},{
+5,0,172},{6,0,1971},{132,11,685},{149,11,8},{133,11,797},{13,0,83},{5,10,189},{7
+,10,442},{7,10,443},{8,10,281},{12,10,174},{141,10,261},{134,0,1568},{133,11,565
+},{139,0,384},{133,0,260},{7,0,758},{7,0,880},{7,0,1359},{9,0,164},{9,0,167},{10
+,0,156},{10,0,588},{12,0,101},{14,0,48},{15,0,70},{6,10,2},{7,10,1262},{7,10,
+1737},{8,10,22},{8,10,270},{8,10,612},{9,10,312},{9,10,436},{10,10,311},{10,10,
+623},{11,10,72},{11,10,330},{11,10,455},{12,10,321},{12,10,504},{12,10,530},{12,
+10,543},{13,10,17},{13,10,156},{13,10,334},{17,10,60},{148,10,64},{4,11,252},{7,
+11,1068},{10,11,434},{11,11,228},{11,11,426},{13,11,231},{18,11,106},{148,11,87}
+,{7,10,354},{10,10,410},{139,10,815},{6,0,367},{7,10,670},{7,10,1327},{8,10,411}
+,{8,10,435},{9,10,653},{9,10,740},{10,10,385},{11,10,222},{11,10,324},{11,10,829
+},{140,10,611},{7,0,1174},{6,10,166},{135,10,374},{146,0,121},{132,0,828},{5,11,
+231},{138,11,509},{7,11,601},{9,11,277},{9,11,674},{10,11,178},{10,11,257},{10,
+11,418},{11,11,531},{11,11,544},{11,11,585},{12,11,113},{12,11,475},{13,11,99},{
+142,11,428},{134,0,1541},{135,11,1779},{5,0,343},{134,10,398},{135,10,50},{135,
+11,1683},{4,0,440},{7,0,57},{8,0,167},{8,0,375},{9,0,82},{9,0,561},{9,0,744},{10
+,0,620},{137,11,744},{134,0,926},{6,10,517},{7,10,1159},{10,10,621},{139,10,192}
+,{137,0,827},{8,0,194},{136,0,756},{10,10,223},{139,10,645},{7,10,64},{136,10,
+245},{4,11,399},{5,11,119},{5,11,494},{7,11,751},{137,11,556},{132,0,808},{135,0
+,22},{7,10,1763},{140,10,310},{5,0,639},{7,0,1249},{11,0,896},{134,11,584},{134,
+0,1614},{135,0,860},{135,11,1121},{5,10,129},{6,10,61},{135,10,947},{4,0,102},{7
+,0,815},{7,0,1699},{139,0,964},{13,10,505},{141,10,506},{139,10,1000},{132,11,
+679},{132,0,899},{132,0,569},{5,11,694},{137,11,714},{136,0,795},{6,0,2045},{139
+,11,7},{6,0,52},{9,0,104},{9,0,559},{12,0,308},{147,0,87},{4,0,301},{132,0,604},
+{133,10,637},{136,0,779},{5,11,143},{5,11,769},{6,11,1760},{7,11,682},{7,11,1992
+},{136,11,736},{137,10,590},{147,0,32},{137,11,527},{5,10,280},{135,10,1226},{
+134,0,494},{6,0,677},{6,0,682},{134,0,1044},{133,10,281},{135,10,1064},{7,0,508}
+,{133,11,860},{6,11,422},{7,11,0},{7,11,1544},{9,11,577},{11,11,990},{12,11,141}
+,{12,11,453},{13,11,47},{141,11,266},{134,0,1014},{5,11,515},{137,11,131},{134,0
+,957},{132,11,646},{6,0,310},{7,0,1849},{8,0,72},{8,0,272},{8,0,431},{9,0,12},{9
+,0,376},{10,0,563},{10,0,630},{10,0,796},{10,0,810},{11,0,367},{11,0,599},{11,0,
+686},{140,0,672},{7,0,570},{4,11,396},{7,10,120},{7,11,728},{8,10,489},{9,11,117
+},{9,10,319},{10,10,820},{11,10,1004},{12,10,379},{12,10,679},{13,10,117},{13,11
+,202},{13,10,412},{14,10,25},{15,10,52},{15,10,161},{16,10,47},{20,11,51},{149,
+10,2},{6,11,121},{6,11,124},{6,11,357},{7,11,1138},{7,11,1295},{8,11,162},{139,
+11,655},{8,0,449},{4,10,937},{5,10,801},{136,11,449},{139,11,958},{6,0,181},{7,0
+,537},{8,0,64},{9,0,127},{10,0,496},{12,0,510},{141,0,384},{138,11,253},{4,0,244
+},{135,0,233},{133,11,237},{132,10,365},{6,0,1650},{10,0,702},{139,0,245},{5,10,
+7},{139,10,774},{13,0,463},{20,0,49},{13,11,463},{148,11,49},{4,10,734},{5,10,
+662},{134,10,430},{4,10,746},{135,10,1090},{5,10,360},{136,10,237},{137,0,338},{
+143,11,10},{7,11,571},{138,11,366},{134,0,1279},{9,11,513},{10,11,22},{10,11,39}
+,{12,11,122},{140,11,187},{133,0,896},{146,0,178},{134,0,695},{137,0,808},{134,
+11,587},{7,11,107},{7,11,838},{8,11,550},{138,11,401},{7,0,1117},{136,0,539},{4,
+10,277},{5,10,608},{6,10,493},{7,10,457},{140,10,384},{133,11,768},{12,0,257},{7
+,10,27},{135,10,316},{140,0,1003},{4,0,207},{5,0,586},{5,0,676},{6,0,448},{8,0,
+244},{11,0,1},{13,0,3},{16,0,54},{17,0,4},{18,0,13},{133,10,552},{4,10,401},{137
+,10,264},{5,0,516},{7,0,1883},{135,11,1883},{12,0,960},{132,11,894},{5,0,4},{5,0
+,810},{6,0,13},{6,0,538},{6,0,1690},{6,0,1726},{7,0,499},{7,0,1819},{8,0,148},{8
+,0,696},{8,0,791},{12,0,125},{143,0,9},{135,0,1268},{11,0,30},{14,0,315},{9,10,
+543},{10,10,524},{12,10,524},{16,10,18},{20,10,26},{148,10,65},{6,0,748},{4,10,
+205},{5,10,623},{7,10,104},{136,10,519},{11,0,542},{139,0,852},{140,0,6},{132,0,
+848},{7,0,1385},{11,0,582},{11,0,650},{11,0,901},{11,0,949},{12,0,232},{12,0,236
+},{13,0,413},{13,0,501},{18,0,116},{7,10,579},{9,10,41},{9,10,244},{9,10,669},{
+10,10,5},{11,10,861},{11,10,951},{139,10,980},{4,0,945},{6,0,1811},{6,0,1845},{6
+,0,1853},{6,0,1858},{8,0,862},{12,0,782},{12,0,788},{18,0,160},{148,0,117},{132,
+10,717},{4,0,925},{5,0,803},{8,0,698},{138,0,828},{134,0,1416},{132,0,610},{139,
+0,992},{6,0,878},{134,0,1477},{135,0,1847},{138,11,531},{137,11,539},{134,11,272
+},{133,0,383},{134,0,1404},{132,10,489},{4,11,9},{5,11,128},{7,11,368},{11,11,
+480},{148,11,3},{136,0,986},{9,0,660},{138,0,347},{135,10,892},{136,11,682},{7,0
+,572},{9,0,592},{11,0,680},{12,0,356},{140,0,550},{7,0,1411},{138,11,527},{4,11,
+2},{7,11,545},{135,11,894},{137,10,473},{11,0,64},{7,11,481},{7,10,819},{9,10,26
+},{9,10,392},{9,11,792},{10,10,152},{10,10,226},{12,10,276},{12,10,426},{12,10,
+589},{13,10,460},{15,10,97},{19,10,48},{148,10,104},{135,10,51},{136,11,445},{
+136,11,646},{135,0,606},{132,10,674},{6,0,1829},{134,0,1830},{132,10,770},{5,10,
+79},{7,10,1027},{7,10,1477},{139,10,52},{5,11,530},{142,11,113},{134,10,1666},{7
+,0,748},{139,0,700},{134,10,195},{133,10,789},{9,0,87},{10,0,365},{4,10,251},{4,
+10,688},{7,10,513},{135,10,1284},{136,11,111},{133,0,127},{6,0,198},{140,0,83},{
+133,11,556},{133,10,889},{4,10,160},{5,10,330},{7,10,1434},{136,10,174},{5,0,276
+},{6,0,55},{7,0,1369},{138,0,864},{8,11,16},{140,11,568},{6,0,1752},{136,0,726},
+{135,0,1066},{133,0,764},{6,11,186},{137,11,426},{11,0,683},{139,11,683},{6,0,
+309},{7,0,331},{138,0,550},{133,10,374},{6,0,1212},{6,0,1852},{7,0,1062},{8,0,
+874},{8,0,882},{138,0,936},{132,11,585},{134,0,1364},{7,0,986},{133,10,731},{6,0
+,723},{6,0,1408},{138,0,381},{135,0,1573},{134,0,1025},{4,10,626},{5,10,642},{6,
+10,425},{10,10,202},{139,10,141},{4,11,93},{5,11,252},{6,11,229},{7,11,291},{9,
+11,550},{139,11,644},{137,11,749},{137,11,162},{132,11,381},{135,0,1559},{6,0,
+194},{7,0,133},{10,0,493},{10,0,570},{139,0,664},{5,0,24},{5,0,569},{6,0,3},{6,0
+,119},{6,0,143},{6,0,440},{7,0,295},{7,0,599},{7,0,1686},{7,0,1854},{8,0,424},{9
+,0,43},{9,0,584},{9,0,760},{10,0,148},{10,0,328},{11,0,159},{11,0,253},{11,0,506
+},{12,0,487},{140,0,531},{6,0,661},{134,0,1517},{136,10,835},{151,10,17},{5,0,14
+},{5,0,892},{6,0,283},{7,0,234},{136,0,537},{139,0,541},{4,0,126},{8,0,635},{147
+,0,34},{4,0,316},{4,0,495},{135,0,1561},{4,11,187},{5,11,184},{5,11,690},{7,11,
+1869},{138,11,756},{139,11,783},{4,0,998},{137,0,861},{136,0,1009},{139,11,292},
+{5,11,21},{6,11,77},{6,11,157},{7,11,974},{7,11,1301},{7,11,1339},{7,11,1490},{7
+,11,1873},{137,11,628},{7,11,1283},{9,11,227},{9,11,499},{10,11,341},{11,11,325}
+,{11,11,408},{14,11,180},{15,11,144},{18,11,47},{147,11,49},{4,0,64},{5,0,352},{
+5,0,720},{6,0,368},{139,0,359},{5,10,384},{8,10,455},{140,10,48},{5,10,264},{134
+,10,184},{7,0,1577},{10,0,304},{10,0,549},{12,0,365},{13,0,220},{13,0,240},{142,
+0,33},{134,0,1107},{134,0,929},{135,0,1142},{6,0,175},{137,0,289},{5,0,432},{133
+,0,913},{6,0,279},{7,0,219},{5,10,633},{135,10,1323},{7,0,785},{7,10,359},{8,10,
+243},{140,10,175},{139,0,595},{132,10,105},{8,11,398},{9,11,681},{139,11,632},{
+140,0,80},{5,0,931},{134,0,1698},{142,11,241},{134,11,20},{134,0,1323},{11,0,526
+},{11,0,939},{141,0,290},{5,0,774},{6,0,780},{6,0,1637},{6,0,1686},{6,0,1751},{8
+,0,559},{141,0,109},{141,0,127},{7,0,1167},{11,0,934},{13,0,391},{17,0,76},{135,
+11,709},{135,0,963},{6,0,260},{135,0,1484},{134,0,573},{4,10,758},{139,11,941},{
+135,10,1649},{145,11,36},{4,0,292},{137,0,580},{4,0,736},{5,0,871},{6,0,1689},{
+135,0,1944},{7,11,945},{11,11,713},{139,11,744},{134,0,1164},{135,11,937},{6,0,
+1922},{9,0,982},{15,0,173},{15,0,178},{15,0,200},{18,0,189},{18,0,207},{21,0,47}
+,{135,11,1652},{7,0,1695},{139,10,128},{6,0,63},{135,0,920},{133,0,793},{143,11,
+134},{133,10,918},{5,0,67},{6,0,62},{6,0,374},{135,0,1391},{9,0,790},{12,0,47},{
+4,11,579},{5,11,226},{5,11,323},{135,11,960},{10,11,784},{141,11,191},{4,0,391},
+{135,0,1169},{137,0,443},{13,11,232},{146,11,35},{132,10,340},{132,0,271},{137,
+11,313},{5,11,973},{137,11,659},{134,0,1140},{6,11,135},{135,11,1176},{4,0,253},
+{5,0,544},{7,0,300},{137,0,340},{7,0,897},{5,10,985},{7,10,509},{145,10,96},{138
+,11,735},{135,10,1919},{138,0,890},{5,0,818},{134,0,1122},{5,0,53},{5,0,541},{6,
+0,94},{6,0,499},{7,0,230},{139,0,321},{4,0,920},{5,0,25},{5,0,790},{6,0,457},{7,
+0,853},{8,0,788},{142,11,31},{132,10,247},{135,11,314},{132,0,468},{7,0,243},{6,
+10,337},{7,10,494},{8,10,27},{8,10,599},{138,10,153},{4,10,184},{5,10,390},{7,10
+,618},{7,10,1456},{139,10,710},{134,0,870},{134,0,1238},{134,0,1765},{10,0,853},
+{10,0,943},{14,0,437},{14,0,439},{14,0,443},{14,0,446},{14,0,452},{14,0,469},{14
+,0,471},{14,0,473},{16,0,93},{16,0,102},{16,0,110},{148,0,121},{4,0,605},{7,0,
+518},{7,0,1282},{7,0,1918},{10,0,180},{139,0,218},{133,0,822},{4,0,634},{11,0,
+916},{142,0,419},{6,11,281},{7,11,6},{8,11,282},{8,11,480},{8,11,499},{9,11,198}
+,{10,11,143},{10,11,169},{10,11,211},{10,11,417},{10,11,574},{11,11,147},{11,11,
+395},{12,11,75},{12,11,407},{12,11,608},{13,11,500},{142,11,251},{134,0,898},{6,
+0,36},{7,0,658},{8,0,454},{150,11,48},{133,11,674},{135,11,1776},{4,11,419},{10,
+10,227},{11,10,497},{11,10,709},{140,10,415},{6,10,360},{7,10,1664},{136,10,478}
+,{137,0,806},{12,11,508},{14,11,102},{14,11,226},{144,11,57},{135,11,1123},{4,11
+,138},{7,11,1012},{7,11,1280},{137,11,76},{5,11,29},{140,11,638},{136,10,699},{
+134,0,1326},{132,0,104},{135,11,735},{132,10,739},{134,0,1331},{7,0,260},{135,11
+,260},{135,11,1063},{7,0,45},{9,0,542},{9,0,566},{10,0,728},{137,10,869},{4,10,
+67},{5,10,422},{7,10,1037},{7,10,1289},{7,10,1555},{9,10,741},{145,10,108},{139,
+0,263},{134,0,1516},{14,0,146},{15,0,42},{16,0,23},{17,0,86},{146,0,17},{138,0,
+468},{136,0,1005},{4,11,17},{5,11,23},{7,11,995},{11,11,383},{11,11,437},{12,11,
+460},{140,11,532},{7,0,87},{142,0,288},{138,10,96},{135,11,626},{144,10,26},{7,0
+,988},{7,0,1939},{9,0,64},{9,0,502},{12,0,22},{12,0,34},{13,0,12},{13,0,234},{
+147,0,77},{13,0,133},{8,10,203},{11,10,823},{11,10,846},{12,10,482},{13,10,277},
+{13,10,302},{13,10,464},{14,10,205},{142,10,221},{4,10,449},{133,10,718},{135,0,
+141},{6,0,1842},{136,0,872},{8,11,70},{12,11,171},{141,11,272},{4,10,355},{6,10,
+311},{9,10,256},{138,10,404},{132,0,619},{137,0,261},{10,11,233},{10,10,758},{
+139,11,76},{5,0,246},{8,0,189},{9,0,355},{9,0,512},{10,0,124},{10,0,453},{11,0,
+143},{11,0,416},{11,0,859},{141,0,341},{134,11,442},{133,10,827},{5,10,64},{140,
+10,581},{4,10,442},{7,10,1047},{7,10,1352},{135,10,1643},{134,11,1709},{5,0,678}
+,{6,0,305},{7,0,775},{7,0,1065},{133,10,977},{11,11,69},{12,11,105},{12,11,117},
+{13,11,213},{14,11,13},{14,11,62},{14,11,177},{14,11,421},{15,11,19},{146,11,141
+},{137,11,309},{5,0,35},{7,0,862},{7,0,1886},{138,0,179},{136,0,285},{132,0,517}
+,{7,11,976},{9,11,146},{10,11,206},{10,11,596},{13,11,218},{142,11,153},{132,10,
+254},{6,0,214},{12,0,540},{4,10,275},{7,10,1219},{140,10,376},{8,0,667},{11,0,
+403},{146,0,83},{12,0,74},{10,11,648},{11,11,671},{143,11,46},{135,0,125},{134,
+10,1753},{133,0,761},{6,0,912},{4,11,518},{6,10,369},{6,10,502},{7,10,1036},{7,
+11,1136},{8,10,348},{9,10,452},{10,10,26},{11,10,224},{11,10,387},{11,10,772},{
+12,10,95},{12,10,629},{13,10,195},{13,10,207},{13,10,241},{14,10,260},{14,10,270
+},{143,10,140},{10,0,131},{140,0,72},{132,10,269},{5,10,480},{7,10,532},{7,10,
+1197},{7,10,1358},{8,10,291},{11,10,349},{142,10,396},{8,11,689},{137,11,863},{8
+,0,333},{138,0,182},{4,11,18},{7,11,145},{7,11,444},{7,11,1278},{8,11,49},{8,11,
+400},{9,11,71},{9,11,250},{10,11,459},{12,11,160},{144,11,24},{14,11,35},{142,11
+,191},{135,11,1864},{135,0,1338},{148,10,15},{14,0,94},{15,0,65},{16,0,4},{16,0,
+77},{16,0,80},{145,0,5},{12,11,82},{143,11,36},{133,11,1010},{133,0,449},{133,0,
+646},{7,0,86},{8,0,103},{135,10,657},{7,0,2028},{138,0,641},{136,10,533},{134,0,
+1},{139,11,970},{5,11,87},{7,11,313},{7,11,1103},{10,11,112},{10,11,582},{11,11,
+389},{11,11,813},{12,11,385},{13,11,286},{14,11,124},{146,11,108},{6,0,869},{132
+,11,267},{6,0,277},{7,0,1274},{7,0,1386},{146,0,87},{6,0,187},{7,0,39},{7,0,1203
+},{8,0,380},{14,0,117},{149,0,28},{4,10,211},{4,10,332},{5,10,335},{6,10,238},{7
+,10,269},{7,10,811},{7,10,1797},{8,10,836},{9,10,507},{141,10,242},{4,0,785},{5,
+0,368},{6,0,297},{7,0,793},{139,0,938},{7,0,464},{8,0,558},{11,0,105},{12,0,231}
+,{14,0,386},{15,0,102},{148,0,75},{133,10,1009},{8,0,877},{140,0,731},{139,11,
+289},{10,11,249},{139,11,209},{132,11,561},{134,0,1608},{132,11,760},{134,0,1429
+},{9,11,154},{140,11,485},{5,10,228},{6,10,203},{7,10,156},{8,10,347},{137,10,
+265},{7,0,1010},{11,0,733},{11,0,759},{13,0,34},{14,0,427},{146,0,45},{7,10,1131
+},{135,10,1468},{136,11,255},{7,0,1656},{9,0,369},{10,0,338},{10,0,490},{11,0,
+154},{11,0,545},{11,0,775},{13,0,77},{141,0,274},{133,11,621},{134,0,1038},{4,11
+,368},{135,11,641},{6,0,2010},{8,0,979},{8,0,985},{10,0,951},{138,0,1011},{134,0
+,1005},{19,0,121},{5,10,291},{5,10,318},{7,10,765},{9,10,389},{140,10,548},{5,0,
+20},{6,0,298},{7,0,659},{137,0,219},{7,0,1440},{11,0,854},{11,0,872},{11,0,921},
+{12,0,551},{13,0,472},{142,0,367},{5,0,490},{6,0,615},{6,0,620},{135,0,683},{6,0
+,1070},{134,0,1597},{139,0,522},{132,0,439},{136,0,669},{6,0,766},{6,0,1143},{6,
+0,1245},{10,10,525},{139,10,82},{9,11,92},{147,11,91},{6,0,668},{134,0,1218},{6,
+11,525},{9,11,876},{140,11,284},{132,0,233},{136,0,547},{132,10,422},{5,10,355},
+{145,10,0},{6,11,300},{135,11,1515},{4,0,482},{137,10,905},{4,0,886},{7,0,346},{
+133,11,594},{133,10,865},{5,10,914},{134,10,1625},{135,0,334},{5,0,795},{6,0,
+1741},{133,10,234},{135,10,1383},{6,11,1641},{136,11,820},{135,0,371},{7,11,1313
+},{138,11,660},{135,10,1312},{135,0,622},{7,0,625},{135,0,1750},{135,0,339},{4,0
+,203},{135,0,1936},{15,0,29},{16,0,38},{15,11,29},{144,11,38},{5,0,338},{135,0,
+1256},{135,10,1493},{10,0,130},{6,10,421},{7,10,61},{7,10,1540},{138,10,501},{6,
+11,389},{7,11,149},{9,11,142},{138,11,94},{137,10,341},{11,0,678},{12,0,307},{
+142,10,98},{6,11,8},{7,11,1881},{136,11,91},{135,0,2044},{6,0,770},{6,0,802},{6,
+0,812},{7,0,311},{9,0,308},{12,0,255},{6,10,102},{7,10,72},{15,10,142},{147,10,
+67},{151,10,30},{135,10,823},{135,0,1266},{135,11,1746},{135,10,1870},{4,0,400},
+{5,0,267},{135,0,232},{7,11,24},{11,11,542},{139,11,852},{135,11,1739},{4,11,503
+},{135,11,1661},{5,11,130},{7,11,1314},{9,11,610},{10,11,718},{11,11,601},{11,11
+,819},{11,11,946},{140,11,536},{10,11,149},{11,11,280},{142,11,336},{7,0,739},{
+11,0,690},{7,11,1946},{8,10,48},{8,10,88},{8,10,582},{8,10,681},{9,10,373},{9,10
+,864},{11,10,157},{11,10,843},{148,10,27},{134,0,990},{4,10,88},{5,10,137},{5,10
+,174},{5,10,777},{6,10,1664},{6,10,1725},{7,10,77},{7,10,426},{7,10,1317},{7,10,
+1355},{8,10,126},{8,10,563},{9,10,523},{9,10,750},{10,10,310},{10,10,836},{11,10
+,42},{11,10,318},{11,10,731},{12,10,68},{12,10,92},{12,10,507},{12,10,692},{13,
+10,81},{13,10,238},{13,10,374},{14,10,436},{18,10,138},{19,10,78},{19,10,111},{
+20,10,55},{20,10,77},{148,10,92},{141,10,418},{7,0,1831},{132,10,938},{6,0,776},
+{134,0,915},{138,10,351},{5,11,348},{6,11,522},{6,10,1668},{7,10,1499},{8,10,117
+},{9,10,314},{138,10,174},{135,10,707},{132,0,613},{133,10,403},{132,11,392},{5,
+11,433},{9,11,633},{139,11,629},{133,0,763},{132,0,878},{132,0,977},{132,0,100},
+{6,0,463},{4,10,44},{5,10,311},{7,10,639},{7,10,762},{7,10,1827},{9,10,8},{9,10,
+462},{148,10,83},{134,11,234},{4,10,346},{7,10,115},{9,10,180},{9,10,456},{138,
+10,363},{5,0,362},{5,0,443},{6,0,318},{7,0,1019},{139,0,623},{5,0,463},{8,0,296}
+,{7,11,140},{7,11,1950},{8,11,680},{11,11,817},{147,11,88},{7,11,1222},{138,11,
+386},{142,0,137},{132,0,454},{7,0,1914},{6,11,5},{7,10,1051},{9,10,545},{11,11,
+249},{12,11,313},{16,11,66},{145,11,26},{135,0,1527},{145,0,58},{148,11,59},{5,0
+,48},{5,0,404},{6,0,557},{7,0,458},{8,0,597},{10,0,455},{10,0,606},{11,0,49},{11
+,0,548},{12,0,476},{13,0,18},{141,0,450},{5,11,963},{134,11,1773},{133,0,729},{
+138,11,586},{5,0,442},{135,0,1984},{134,0,449},{144,0,40},{4,0,853},{7,11,180},{
+8,11,509},{136,11,792},{6,10,185},{7,10,1899},{9,10,875},{139,10,673},{134,11,
+524},{12,0,227},{4,10,327},{5,10,478},{7,10,1332},{136,10,753},{6,0,1491},{5,10,
+1020},{133,10,1022},{4,10,103},{133,10,401},{132,11,931},{4,10,499},{135,10,1421
+},{5,0,55},{7,0,376},{140,0,161},{133,0,450},{6,0,1174},{134,0,1562},{10,0,62},{
+13,0,400},{135,11,1837},{140,0,207},{135,0,869},{4,11,773},{5,11,618},{137,11,
+756},{132,10,96},{4,0,213},{7,0,223},{8,0,80},{135,10,968},{4,11,90},{5,11,337},
+{5,11,545},{7,11,754},{9,11,186},{10,11,72},{10,11,782},{11,11,513},{11,11,577},
+{11,11,610},{11,11,889},{11,11,961},{12,11,354},{12,11,362},{12,11,461},{12,11,
+595},{13,11,79},{143,11,121},{7,0,381},{7,0,806},{7,0,820},{8,0,354},{8,0,437},{
+8,0,787},{9,0,657},{10,0,58},{10,0,339},{10,0,749},{11,0,914},{12,0,162},{13,0,
+75},{14,0,106},{14,0,198},{14,0,320},{14,0,413},{146,0,43},{136,0,747},{136,0,
+954},{134,0,1073},{135,0,556},{7,11,151},{9,11,329},{139,11,254},{5,0,692},{134,
+0,1395},{6,10,563},{137,10,224},{134,0,191},{132,0,804},{9,11,187},{10,11,36},{
+17,11,44},{146,11,64},{7,11,165},{7,11,919},{136,11,517},{4,11,506},{5,11,295},{
+7,11,1680},{15,11,14},{144,11,5},{4,0,706},{6,0,162},{7,0,1960},{136,0,831},{135
+,11,1376},{7,11,987},{9,11,688},{10,11,522},{11,11,788},{140,11,566},{150,0,35},
+{138,0,426},{135,0,1235},{135,11,1741},{7,11,389},{7,11,700},{7,11,940},{8,11,
+514},{9,11,116},{9,11,535},{10,11,118},{11,11,107},{11,11,148},{11,11,922},{12,
+11,254},{12,11,421},{142,11,238},{134,0,1234},{132,11,743},{4,10,910},{5,10,832}
+,{135,11,1335},{141,0,96},{135,11,185},{146,0,149},{4,0,204},{137,0,902},{4,11,
+784},{133,11,745},{136,0,833},{136,0,949},{7,0,366},{9,0,287},{12,0,199},{12,0,
+556},{12,0,577},{5,11,81},{7,11,146},{7,11,1342},{7,11,1446},{8,11,53},{8,11,561
+},{8,11,694},{8,11,754},{9,11,97},{9,11,115},{9,11,894},{10,11,462},{10,11,813},
+{11,11,230},{11,11,657},{11,11,699},{11,11,748},{12,11,119},{12,11,200},{12,11,
+283},{14,11,273},{145,11,15},{5,11,408},{137,11,747},{9,11,498},{140,11,181},{6,
+0,2020},{136,0,992},{5,0,356},{135,0,224},{134,0,784},{7,0,630},{9,0,567},{11,0,
+150},{11,0,444},{13,0,119},{8,10,528},{137,10,348},{134,0,539},{4,10,20},{133,10
+,616},{142,0,27},{7,11,30},{8,11,86},{8,11,315},{8,11,700},{9,11,576},{9,11,858}
+,{11,11,310},{11,11,888},{11,11,904},{12,11,361},{141,11,248},{138,11,839},{134,
+0,755},{134,0,1063},{7,10,1091},{135,10,1765},{134,11,428},{7,11,524},{8,11,169}
+,{8,11,234},{9,11,480},{138,11,646},{139,0,814},{7,11,1462},{139,11,659},{4,10,
+26},{5,10,429},{6,10,245},{7,10,704},{7,10,1379},{135,10,1474},{7,11,1205},{138,
+11,637},{139,11,803},{132,10,621},{136,0,987},{4,11,266},{8,11,4},{9,11,39},{10,
+11,166},{11,11,918},{12,11,635},{20,11,10},{22,11,27},{150,11,43},{4,0,235},{135
+,0,255},{4,0,194},{5,0,584},{6,0,384},{7,0,583},{10,0,761},{11,0,760},{139,0,851
+},{133,10,542},{134,0,1086},{133,10,868},{8,0,1016},{136,0,1018},{7,0,1396},{7,
+11,1396},{136,10,433},{135,10,1495},{138,10,215},{141,10,124},{7,11,157},{8,11,
+279},{9,11,759},{16,11,31},{16,11,39},{16,11,75},{18,11,24},{20,11,42},{152,11,1
+},{5,0,562},{134,11,604},{134,0,913},{5,0,191},{137,0,271},{4,0,470},{6,0,153},{
+7,0,1503},{7,0,1923},{10,0,701},{11,0,132},{11,0,227},{11,0,320},{11,0,436},{11,
+0,525},{11,0,855},{11,0,873},{12,0,41},{12,0,286},{13,0,103},{13,0,284},{14,0,
+255},{14,0,262},{15,0,117},{143,0,127},{7,0,475},{12,0,45},{147,10,112},{132,11,
+567},{137,11,859},{6,0,713},{6,0,969},{6,0,1290},{134,0,1551},{133,0,327},{6,0,
+552},{6,0,1292},{7,0,1754},{137,0,604},{4,0,223},{6,0,359},{11,0,3},{13,0,108},{
+14,0,89},{16,0,22},{5,11,762},{7,11,1880},{9,11,680},{139,11,798},{5,0,80},{6,0,
+405},{7,0,403},{7,0,1502},{8,0,456},{9,0,487},{9,0,853},{9,0,889},{10,0,309},{11
+,0,721},{11,0,994},{12,0,430},{141,0,165},{133,11,298},{132,10,647},{134,0,2016}
+,{18,10,10},{146,11,10},{4,0,453},{5,0,887},{6,0,535},{8,0,6},{8,0,543},{136,0,
+826},{136,0,975},{10,0,961},{138,0,962},{138,10,220},{6,0,1891},{6,0,1893},{9,0,
+916},{9,0,965},{9,0,972},{12,0,801},{12,0,859},{12,0,883},{15,0,226},{149,0,51},
+{132,10,109},{135,11,267},{7,11,92},{7,11,182},{8,11,453},{9,11,204},{11,11,950}
+,{12,11,94},{12,11,644},{16,11,20},{16,11,70},{16,11,90},{147,11,55},{134,10,
+1746},{6,11,71},{7,11,845},{7,11,1308},{8,11,160},{137,11,318},{5,0,101},{6,0,88
+},{7,0,263},{7,0,628},{7,0,1677},{8,0,349},{9,0,100},{10,0,677},{14,0,169},{14,0
+,302},{14,0,313},{15,0,48},{15,0,84},{7,11,237},{8,11,664},{9,11,42},{9,11,266},
+{9,11,380},{9,11,645},{10,11,177},{138,11,276},{138,11,69},{4,0,310},{7,0,708},{
+7,0,996},{9,0,795},{10,0,390},{10,0,733},{11,0,451},{12,0,249},{14,0,115},{14,0,
+286},{143,0,100},{5,0,587},{4,10,40},{10,10,67},{11,10,117},{11,10,768},{139,10,
+935},{6,0,1942},{7,0,512},{136,0,983},{7,10,992},{8,10,301},{9,10,722},{12,10,63
+},{13,10,29},{14,10,161},{143,10,18},{136,11,76},{139,10,923},{134,0,645},{134,0
+,851},{4,0,498},{132,11,293},{7,0,217},{8,0,140},{10,0,610},{14,11,352},{17,11,
+53},{18,11,146},{18,11,152},{19,11,11},{150,11,54},{134,0,1448},{138,11,841},{
+133,0,905},{4,11,605},{7,11,518},{7,11,1282},{7,11,1918},{10,11,180},{139,11,218
+},{139,11,917},{135,10,825},{140,10,328},{4,0,456},{7,0,105},{7,0,358},{7,0,1637
+},{8,0,643},{139,0,483},{134,0,792},{6,11,96},{135,11,1426},{137,11,691},{4,11,
+651},{133,11,289},{7,11,688},{8,11,35},{9,11,511},{10,11,767},{147,11,118},{150,
+0,56},{5,0,243},{5,0,535},{6,10,204},{10,10,320},{10,10,583},{13,10,502},{14,10,
+72},{14,10,274},{14,10,312},{14,10,344},{15,10,159},{16,10,62},{16,10,69},{17,10
+,30},{18,10,42},{18,10,53},{18,10,84},{18,10,140},{19,10,68},{19,10,85},{20,10,5
+},{20,10,45},{20,10,101},{22,10,7},{150,10,20},{4,10,558},{6,10,390},{7,10,162},
+{7,10,689},{9,10,360},{138,10,653},{146,11,23},{135,0,1748},{5,10,856},{6,10,
+1672},{6,10,1757},{134,10,1781},{5,0,539},{5,0,754},{6,0,876},{132,11,704},{135,
+11,1078},{5,10,92},{10,10,736},{140,10,102},{17,0,91},{5,10,590},{137,10,213},{
+134,0,1565},{6,0,91},{135,0,435},{4,0,939},{140,0,792},{134,0,1399},{4,0,16},{5,
+0,316},{5,0,842},{6,0,370},{6,0,1778},{8,0,166},{11,0,812},{12,0,206},{12,0,351}
+,{14,0,418},{16,0,15},{16,0,34},{18,0,3},{19,0,3},{19,0,7},{20,0,4},{21,0,21},{4
+,11,720},{133,11,306},{144,0,95},{133,11,431},{132,11,234},{135,0,551},{4,0,999}
+,{6,0,1966},{134,0,2042},{7,0,619},{10,0,547},{11,0,122},{12,0,601},{15,0,7},{
+148,0,20},{5,11,464},{6,11,236},{7,11,276},{7,11,696},{7,11,914},{7,11,1108},{7,
+11,1448},{9,11,15},{9,11,564},{10,11,14},{12,11,565},{13,11,449},{14,11,53},{15,
+11,13},{16,11,64},{145,11,41},{6,0,884},{6,0,1019},{134,0,1150},{6,11,1767},{12,
+11,194},{145,11,107},{136,10,503},{133,11,840},{7,0,671},{134,10,466},{132,0,888
+},{4,0,149},{138,0,368},{4,0,154},{7,0,1134},{136,0,105},{135,0,983},{9,11,642},
+{11,11,236},{142,11,193},{4,0,31},{6,0,429},{7,0,962},{9,0,458},{139,0,691},{6,0
+,643},{134,0,1102},{132,0,312},{4,11,68},{5,11,634},{6,11,386},{7,11,794},{8,11,
+273},{9,11,563},{10,11,105},{10,11,171},{11,11,94},{139,11,354},{133,0,740},{135
+,0,1642},{4,11,95},{7,11,416},{8,11,211},{139,11,830},{132,0,236},{138,10,241},{
+7,11,731},{13,11,20},{143,11,11},{5,0,836},{5,0,857},{6,0,1680},{135,0,59},{10,0
+,68},{11,0,494},{152,11,6},{4,0,81},{139,0,867},{135,0,795},{133,11,689},{4,0,
+1001},{5,0,282},{6,0,1932},{6,0,1977},{6,0,1987},{6,0,1992},{8,0,650},{8,0,919},
+{8,0,920},{8,0,923},{8,0,926},{8,0,927},{8,0,931},{8,0,939},{8,0,947},{8,0,956},
+{8,0,997},{9,0,907},{10,0,950},{10,0,953},{10,0,954},{10,0,956},{10,0,958},{10,0
+,959},{10,0,964},{10,0,970},{10,0,972},{10,0,973},{10,0,975},{10,0,976},{10,0,
+980},{10,0,981},{10,0,984},{10,0,988},{10,0,990},{10,0,995},{10,0,999},{10,0,
+1002},{10,0,1003},{10,0,1005},{10,0,1006},{10,0,1008},{10,0,1009},{10,0,1012},{
+10,0,1014},{10,0,1015},{10,0,1019},{10,0,1020},{10,0,1022},{12,0,959},{12,0,961}
+,{12,0,962},{12,0,963},{12,0,964},{12,0,965},{12,0,967},{12,0,968},{12,0,969},{
+12,0,970},{12,0,971},{12,0,972},{12,0,973},{12,0,974},{12,0,975},{12,0,976},{12,
+0,977},{12,0,979},{12,0,981},{12,0,982},{12,0,983},{12,0,984},{12,0,985},{12,0,
+986},{12,0,987},{12,0,989},{12,0,990},{12,0,992},{12,0,993},{12,0,995},{12,0,998
+},{12,0,999},{12,0,1000},{12,0,1001},{12,0,1002},{12,0,1004},{12,0,1005},{12,0,
+1006},{12,0,1007},{12,0,1008},{12,0,1009},{12,0,1010},{12,0,1011},{12,0,1012},{
+12,0,1014},{12,0,1015},{12,0,1016},{12,0,1017},{12,0,1018},{12,0,1019},{12,0,
+1022},{12,0,1023},{14,0,475},{14,0,477},{14,0,478},{14,0,479},{14,0,480},{14,0,
+482},{14,0,483},{14,0,484},{14,0,485},{14,0,486},{14,0,487},{14,0,488},{14,0,489
+},{14,0,490},{14,0,491},{14,0,492},{14,0,493},{14,0,494},{14,0,495},{14,0,496},{
+14,0,497},{14,0,498},{14,0,499},{14,0,500},{14,0,501},{14,0,502},{14,0,503},{14,
+0,504},{14,0,506},{14,0,507},{14,0,508},{14,0,509},{14,0,510},{14,0,511},{16,0,
+113},{16,0,114},{16,0,115},{16,0,117},{16,0,118},{16,0,119},{16,0,121},{16,0,122
+},{16,0,123},{16,0,124},{16,0,125},{16,0,126},{16,0,127},{18,0,242},{18,0,243},{
+18,0,244},{18,0,245},{18,0,248},{18,0,249},{18,0,250},{18,0,251},{18,0,252},{18,
+0,253},{18,0,254},{18,0,255},{20,0,125},{20,0,126},{148,0,127},{7,11,1717},{7,11
+,1769},{138,11,546},{7,11,1127},{7,11,1572},{10,11,297},{10,11,422},{11,11,764},
+{11,11,810},{12,11,264},{13,11,102},{13,11,300},{13,11,484},{14,11,147},{14,11,
+229},{17,11,71},{18,11,118},{147,11,120},{6,0,1148},{134,0,1586},{132,0,775},{
+135,10,954},{133,11,864},{133,11,928},{138,11,189},{135,10,1958},{6,10,549},{8,
+10,34},{8,10,283},{9,10,165},{138,10,475},{5,10,652},{5,10,701},{135,10,449},{
+135,11,695},{4,10,655},{7,10,850},{17,10,75},{146,10,137},{140,11,682},{133,11,
+523},{8,0,970},{136,10,670},{136,11,555},{7,11,76},{8,11,44},{9,11,884},{10,11,
+580},{11,11,399},{11,11,894},{15,11,122},{18,11,144},{147,11,61},{6,10,159},{6,
+10,364},{7,10,516},{7,10,1439},{137,10,518},{4,0,71},{5,0,376},{7,0,119},{138,0,
+665},{141,10,151},{11,0,827},{14,0,34},{143,0,148},{133,11,518},{4,0,479},{135,
+11,1787},{135,11,1852},{135,10,993},{7,0,607},{136,0,99},{134,0,1960},{132,0,793
+},{4,0,41},{5,0,74},{7,0,1627},{11,0,871},{140,0,619},{7,0,94},{11,0,329},{11,0,
+965},{12,0,241},{14,0,354},{15,0,22},{148,0,63},{7,10,501},{9,10,111},{10,10,141
+},{11,10,332},{13,10,43},{13,10,429},{14,10,130},{14,10,415},{145,10,102},{9,0,
+209},{137,0,300},{134,0,1497},{138,11,255},{4,11,934},{5,11,138},{136,11,610},{
+133,0,98},{6,0,1316},{10,11,804},{138,11,832},{8,11,96},{9,11,36},{10,11,607},{
+11,11,423},{11,11,442},{12,11,309},{14,11,199},{15,11,90},{145,11,110},{132,0,
+463},{5,10,149},{136,10,233},{133,10,935},{4,11,652},{8,11,320},{9,11,13},{9,11,
+398},{9,11,727},{10,11,75},{10,11,184},{10,11,230},{10,11,564},{10,11,569},{11,
+11,973},{12,11,70},{12,11,189},{13,11,57},{13,11,257},{22,11,6},{150,11,16},{142
+,0,291},{12,10,582},{146,10,131},{136,10,801},{133,0,984},{145,11,116},{4,11,692
+},{133,11,321},{4,0,182},{6,0,205},{135,0,220},{4,0,42},{9,0,205},{9,0,786},{138
+,0,659},{6,0,801},{11,11,130},{140,11,609},{132,0,635},{5,11,345},{135,11,1016},
+{139,0,533},{132,0,371},{4,0,272},{135,0,836},{6,0,1282},{135,11,1100},{5,0,825}
+,{134,0,1640},{135,11,1325},{133,11,673},{4,11,287},{133,11,1018},{135,0,357},{6
+,0,467},{137,0,879},{7,0,317},{135,0,569},{6,0,924},{134,0,1588},{5,11,34},{5,10
+,406},{10,11,724},{12,11,444},{13,11,354},{18,11,32},{23,11,24},{23,11,31},{152,
+11,5},{6,0,1795},{6,0,1835},{6,0,1836},{6,0,1856},{8,0,844},{8,0,849},{8,0,854},
+{8,0,870},{8,0,887},{10,0,852},{138,0,942},{6,10,69},{135,10,117},{137,0,307},{4
+,0,944},{6,0,1799},{6,0,1825},{10,0,848},{10,0,875},{10,0,895},{10,0,899},{10,0,
+902},{140,0,773},{11,0,43},{13,0,72},{141,0,142},{135,10,1830},{134,11,382},{4,
+10,432},{135,10,824},{132,11,329},{7,0,1820},{139,11,124},{133,10,826},{133,0,
+525},{132,11,906},{7,11,1940},{136,11,366},{138,11,10},{4,11,123},{4,11,649},{5,
+11,605},{7,11,1509},{136,11,36},{6,0,110},{135,0,1681},{133,0,493},{133,11,767},
+{4,0,174},{135,0,911},{138,11,786},{8,0,417},{137,0,782},{133,10,1000},{7,0,733}
+,{137,0,583},{4,10,297},{6,10,529},{7,10,152},{7,10,713},{7,10,1845},{8,10,710},
+{8,10,717},{12,10,639},{140,10,685},{4,0,32},{5,0,215},{6,0,269},{7,0,1782},{7,0
+,1892},{10,0,16},{11,0,822},{11,0,954},{141,0,481},{4,11,273},{5,11,658},{133,11
+,995},{136,0,477},{134,11,72},{135,11,1345},{5,0,308},{7,0,1088},{4,10,520},{135
+,10,575},{133,11,589},{5,0,126},{8,0,297},{9,0,366},{140,0,374},{7,0,1551},{139,
+0,361},{5,11,117},{6,11,514},{6,11,541},{7,11,1164},{7,11,1436},{8,11,220},{8,11
+,648},{10,11,688},{139,11,560},{133,11,686},{4,0,946},{6,0,1807},{8,0,871},{10,0
+,854},{10,0,870},{10,0,888},{10,0,897},{10,0,920},{12,0,722},{12,0,761},{12,0,
+763},{12,0,764},{14,0,454},{14,0,465},{16,0,107},{18,0,167},{18,0,168},{146,0,
+172},{132,0,175},{135,0,1307},{132,0,685},{135,11,1834},{133,0,797},{6,0,745},{6
+,0,858},{134,0,963},{133,0,565},{5,10,397},{6,10,154},{7,11,196},{7,10,676},{8,
+10,443},{8,10,609},{9,10,24},{9,10,325},{10,10,35},{10,11,765},{11,11,347},{11,
+10,535},{11,11,552},{11,11,576},{11,10,672},{11,11,790},{11,10,1018},{12,11,263}
+,{12,10,637},{13,11,246},{13,11,270},{13,11,395},{14,11,74},{14,11,176},{14,11,
+190},{14,11,398},{14,11,412},{15,11,32},{15,11,63},{16,10,30},{16,11,88},{147,11
+,105},{13,11,84},{141,11,122},{4,0,252},{7,0,1068},{10,0,434},{11,0,228},{11,0,
+426},{13,0,231},{18,0,106},{148,0,87},{137,0,826},{4,11,589},{139,11,282},{5,11,
+381},{135,11,1792},{132,0,791},{5,0,231},{10,0,509},{133,10,981},{7,0,601},{9,0,
+277},{9,0,674},{10,0,178},{10,0,418},{10,0,571},{11,0,531},{12,0,113},{12,0,475}
+,{13,0,99},{142,0,428},{4,10,56},{7,11,616},{7,10,1791},{8,10,607},{8,10,651},{
+10,11,413},{11,10,465},{11,10,835},{12,10,337},{141,10,480},{7,0,1591},{144,0,43
+},{9,10,158},{138,10,411},{135,0,1683},{8,0,289},{11,0,45},{12,0,278},{140,0,537
+},{6,11,120},{7,11,1188},{7,11,1710},{8,11,286},{9,11,667},{11,11,592},{139,11,
+730},{136,10,617},{135,0,1120},{135,11,1146},{139,10,563},{4,11,352},{4,10,369},
+{135,11,687},{143,11,38},{4,0,399},{5,0,119},{5,0,494},{7,0,751},{9,0,556},{14,
+11,179},{15,11,151},{150,11,11},{4,11,192},{5,11,49},{6,11,200},{6,11,293},{6,11
+,1696},{135,11,488},{4,0,398},{133,0,660},{7,0,1030},{134,10,622},{135,11,595},{
+141,0,168},{132,11,147},{7,0,973},{10,10,624},{142,10,279},{132,10,363},{132,0,
+642},{133,11,934},{134,0,1615},{7,11,505},{135,11,523},{7,0,594},{7,0,851},{7,0,
+1858},{9,0,411},{9,0,574},{9,0,666},{9,0,737},{10,0,346},{10,0,712},{11,0,246},{
+11,0,432},{11,0,517},{11,0,647},{11,0,679},{11,0,727},{12,0,304},{12,0,305},{12,
+0,323},{12,0,483},{12,0,572},{12,0,593},{12,0,602},{13,0,95},{13,0,101},{13,0,
+171},{13,0,315},{13,0,378},{13,0,425},{13,0,475},{14,0,63},{14,0,380},{14,0,384}
+,{15,0,133},{18,0,112},{148,0,72},{135,0,1093},{132,0,679},{8,0,913},{10,0,903},
+{10,0,915},{12,0,648},{12,0,649},{14,0,455},{16,0,112},{138,11,438},{137,0,203},
+{134,10,292},{134,0,1492},{7,0,1374},{8,0,540},{5,10,177},{6,10,616},{7,10,827},
+{9,10,525},{138,10,656},{135,0,1486},{9,0,714},{138,10,31},{136,0,825},{134,0,
+1511},{132,11,637},{134,0,952},{4,10,161},{133,10,631},{5,0,143},{5,0,769},{6,0,
+1760},{7,0,682},{7,0,1992},{136,0,736},{132,0,700},{134,0,1540},{132,11,777},{9,
+11,867},{138,11,837},{7,0,1557},{135,10,1684},{133,0,860},{6,0,422},{7,0,0},{7,0
+,1544},{9,0,605},{11,0,990},{12,0,235},{12,0,453},{13,0,47},{13,0,266},{9,10,469
+},{9,10,709},{12,10,512},{14,10,65},{145,10,12},{11,0,807},{10,10,229},{11,10,73
+},{139,10,376},{6,11,170},{7,11,1080},{8,11,395},{8,11,487},{11,11,125},{141,11,
+147},{5,0,515},{137,0,131},{7,0,1605},{11,0,962},{146,0,139},{132,0,646},{4,0,
+396},{7,0,728},{9,0,117},{13,0,202},{148,0,51},{6,0,121},{6,0,124},{6,0,357},{7,
+0,1138},{7,0,1295},{8,0,162},{8,0,508},{11,0,655},{4,11,535},{6,10,558},{7,10,
+651},{8,11,618},{9,10,0},{10,10,34},{139,10,1008},{135,11,1245},{138,0,357},{150
+,11,23},{133,0,237},{135,0,1784},{7,10,1832},{138,10,374},{132,0,713},{132,11,46
+},{6,0,1536},{10,0,348},{5,11,811},{6,11,1679},{6,11,1714},{135,11,2032},{11,11,
+182},{142,11,195},{6,0,523},{7,0,738},{7,10,771},{7,10,1731},{9,10,405},{138,10,
+421},{7,11,1458},{9,11,407},{139,11,15},{6,11,34},{7,11,69},{7,11,640},{7,11,
+1089},{8,11,708},{8,11,721},{9,11,363},{9,11,643},{10,11,628},{148,11,98},{133,0
+,434},{135,0,1877},{7,0,571},{138,0,366},{5,10,881},{133,10,885},{9,0,513},{10,0
+,25},{10,0,39},{12,0,122},{140,0,187},{132,0,580},{5,10,142},{134,10,546},{132,
+11,462},{137,0,873},{5,10,466},{11,10,571},{12,10,198},{13,10,283},{14,10,186},{
+15,10,21},{143,10,103},{7,0,171},{4,10,185},{5,10,257},{5,10,839},{5,10,936},{9,
+10,399},{10,10,258},{10,10,395},{10,10,734},{11,10,1014},{12,10,23},{13,10,350},
+{14,10,150},{147,10,6},{134,0,625},{7,0,107},{7,0,838},{8,0,550},{138,0,401},{5,
+11,73},{6,11,23},{134,11,338},{4,0,943},{6,0,1850},{12,0,713},{142,0,434},{11,0,
+588},{11,0,864},{11,0,936},{11,0,968},{12,0,73},{12,0,343},{12,0,394},{13,0,275}
+,{14,0,257},{15,0,160},{7,10,404},{7,10,1377},{7,10,1430},{7,10,2017},{8,10,149}
+,{8,10,239},{8,10,512},{8,10,793},{8,10,818},{9,10,474},{9,10,595},{10,10,122},{
+10,10,565},{10,10,649},{10,10,783},{11,10,239},{11,10,295},{11,10,447},{11,10,
+528},{11,10,639},{11,10,800},{12,10,25},{12,10,157},{12,10,316},{12,10,390},{12,
+10,391},{12,10,395},{12,10,478},{12,10,503},{12,10,592},{12,10,680},{13,10,50},{
+13,10,53},{13,10,132},{13,10,198},{13,10,322},{13,10,415},{13,10,511},{14,10,71}
+,{14,10,395},{15,10,71},{15,10,136},{17,10,123},{18,10,93},{147,10,58},{133,0,
+768},{11,0,103},{142,0,0},{136,10,712},{132,0,799},{132,0,894},{7,11,725},{8,11,
+498},{139,11,268},{135,11,1798},{135,11,773},{141,11,360},{4,10,377},{152,10,13}
+,{135,0,1673},{132,11,583},{134,0,1052},{133,11,220},{140,11,69},{132,11,544},{4
+,10,180},{135,10,1906},{134,0,272},{4,0,441},{134,0,1421},{4,0,9},{5,0,128},{7,0
+,368},{11,0,480},{148,0,3},{5,11,176},{6,11,437},{6,11,564},{11,11,181},{141,11,
+183},{132,10,491},{7,0,1182},{141,11,67},{6,0,1346},{4,10,171},{138,10,234},{4,
+10,586},{7,10,1186},{138,10,631},{136,0,682},{134,0,1004},{15,0,24},{143,11,24},
+{134,0,968},{4,0,2},{6,0,742},{6,0,793},{7,0,545},{7,0,894},{9,10,931},{10,10,
+334},{148,10,71},{136,11,600},{133,10,765},{9,0,769},{140,0,185},{4,11,790},{5,
+11,273},{134,11,394},{7,0,474},{137,0,578},{4,11,135},{6,11,127},{7,11,1185},{7,
+11,1511},{8,11,613},{11,11,5},{12,11,133},{12,11,495},{12,11,586},{14,11,385},{
+15,11,118},{17,11,20},{146,11,98},{133,10,424},{5,0,530},{142,0,113},{6,11,230},
+{7,11,961},{7,11,1085},{136,11,462},{7,11,1954},{137,11,636},{136,10,714},{149,
+11,6},{135,10,685},{9,10,420},{10,10,269},{10,10,285},{10,10,576},{11,10,397},{
+13,10,175},{145,10,90},{132,10,429},{5,0,556},{5,11,162},{136,11,68},{132,11,654
+},{4,11,156},{7,11,998},{7,11,1045},{7,11,1860},{9,11,48},{9,11,692},{11,11,419}
+,{139,11,602},{6,0,1317},{8,0,16},{9,0,825},{12,0,568},{7,11,1276},{8,11,474},{
+137,11,652},{18,0,97},{7,10,18},{7,10,699},{7,10,1966},{8,10,752},{9,10,273},{9,
+10,412},{9,10,703},{10,10,71},{10,10,427},{138,10,508},{10,0,703},{7,11,1454},{
+138,11,703},{4,10,53},{5,10,186},{135,10,752},{134,0,892},{134,0,1571},{8,10,575
+},{10,10,289},{139,10,319},{6,0,186},{137,0,426},{134,0,1101},{132,10,675},{132,
+0,585},{6,0,1870},{137,0,937},{152,11,10},{9,11,197},{10,11,300},{12,11,473},{13
+,11,90},{141,11,405},{4,0,93},{5,0,252},{6,0,229},{7,0,291},{9,0,550},{139,0,644
+},{137,0,749},{9,0,162},{6,10,209},{8,10,468},{9,10,210},{11,10,36},{12,10,28},{
+12,10,630},{13,10,21},{13,10,349},{14,10,7},{145,10,13},{132,0,381},{132,11,606}
+,{4,10,342},{135,10,1179},{7,11,1587},{7,11,1707},{10,11,528},{139,11,504},{12,
+11,39},{13,11,265},{141,11,439},{4,10,928},{133,10,910},{7,10,1838},{7,11,1978},
+{136,11,676},{6,0,762},{6,0,796},{134,0,956},{4,10,318},{4,10,496},{7,10,856},{
+139,10,654},{137,11,242},{4,11,361},{133,11,315},{132,11,461},{132,11,472},{132,
+0,857},{5,0,21},{6,0,77},{6,0,157},{7,0,974},{7,0,1301},{7,0,1339},{7,0,1490},{7
+,0,1873},{9,0,628},{7,10,915},{8,10,247},{147,10,0},{4,10,202},{5,10,382},{6,10,
+454},{7,10,936},{7,10,1803},{8,10,758},{9,10,375},{9,10,895},{10,10,743},{10,10,
+792},{11,10,978},{11,10,1012},{142,10,109},{7,11,617},{10,11,498},{11,11,501},{
+12,11,16},{140,11,150},{7,10,1150},{7,10,1425},{7,10,1453},{10,11,747},{140,10,
+513},{133,11,155},{11,0,919},{141,0,409},{138,10,791},{10,0,633},{139,11,729},{7
+,11,163},{8,11,319},{9,11,402},{10,11,24},{10,11,681},{11,11,200},{11,11,567},{
+12,11,253},{12,11,410},{142,11,219},{5,11,475},{7,11,1780},{9,11,230},{11,11,297
+},{11,11,558},{14,11,322},{147,11,76},{7,0,332},{6,10,445},{137,10,909},{135,11,
+1956},{136,11,274},{134,10,578},{135,0,1489},{135,11,1848},{5,11,944},{134,11,
+1769},{132,11,144},{136,10,766},{4,0,832},{135,10,541},{8,0,398},{9,0,681},{139,
+0,632},{136,0,645},{9,0,791},{10,0,93},{16,0,13},{17,0,23},{18,0,135},{19,0,12},
+{20,0,1},{20,0,12},{148,0,14},{6,11,247},{137,11,555},{134,0,20},{132,0,800},{
+135,0,1841},{139,10,983},{137,10,768},{132,10,584},{141,11,51},{6,0,1993},{4,11,
+620},{138,11,280},{136,0,769},{11,0,290},{11,0,665},{7,11,1810},{11,11,866},{12,
+11,103},{13,11,495},{17,11,67},{147,11,74},{134,0,1426},{139,0,60},{4,10,326},{
+135,10,1770},{7,0,1874},{9,0,641},{132,10,226},{6,0,644},{5,10,426},{8,10,30},{9
+,10,2},{11,10,549},{147,10,122},{5,11,428},{138,11,442},{135,11,1871},{135,0,
+1757},{147,10,117},{135,0,937},{135,0,1652},{6,0,654},{134,0,1476},{133,11,99},{
+135,0,527},{132,10,345},{4,10,385},{4,11,397},{7,10,265},{135,10,587},{4,0,579},
+{5,0,226},{5,0,323},{135,0,960},{134,0,1486},{8,11,502},{144,11,9},{4,10,347},{5
+,10,423},{5,10,996},{135,10,1329},{7,11,727},{146,11,73},{4,11,485},{7,11,353},{
+7,10,1259},{7,11,1523},{9,10,125},{139,10,65},{6,0,325},{5,10,136},{6,11,366},{7
+,11,1384},{7,11,1601},{136,10,644},{138,11,160},{6,0,1345},{137,11,282},{18,0,91
+},{147,0,70},{136,0,404},{4,11,157},{133,11,471},{133,0,973},{6,0,135},{135,0,
+1176},{8,11,116},{11,11,551},{142,11,159},{4,0,549},{4,10,433},{133,10,719},{136
+,0,976},{5,11,160},{7,11,363},{7,11,589},{10,11,170},{141,11,55},{144,0,21},{144
+,0,51},{135,0,314},{135,10,1363},{4,11,108},{7,11,405},{10,11,491},{139,11,498},
+{146,0,4},{4,10,555},{8,10,536},{10,10,288},{139,10,1005},{135,11,1005},{6,0,281
+},{7,0,6},{8,0,282},{8,0,480},{8,0,499},{9,0,198},{10,0,143},{10,0,169},{10,0,
+211},{10,0,417},{10,0,574},{11,0,147},{11,0,395},{12,0,75},{12,0,407},{12,0,608}
+,{13,0,500},{142,0,251},{6,0,1093},{6,0,1405},{9,10,370},{138,10,90},{4,11,926},
+{133,11,983},{135,0,1776},{134,0,1528},{132,0,419},{132,11,538},{6,11,294},{7,11
+,1267},{136,11,624},{135,11,1772},{138,11,301},{4,10,257},{135,10,2031},{4,0,138
+},{7,0,1012},{7,0,1280},{9,0,76},{135,10,1768},{132,11,757},{5,0,29},{140,0,638}
+,{7,11,655},{135,11,1844},{7,0,1418},{6,11,257},{135,11,1522},{8,11,469},{138,11
+,47},{142,11,278},{6,10,83},{6,10,1733},{135,10,1389},{11,11,204},{11,11,243},{
+140,11,293},{135,11,1875},{6,0,1710},{135,0,2038},{137,11,299},{4,0,17},{5,0,23}
+,{7,0,995},{11,0,383},{11,0,437},{12,0,460},{140,0,532},{133,0,862},{137,10,696}
+,{6,0,592},{138,0,946},{138,11,599},{7,10,1718},{9,10,95},{9,10,274},{10,10,279}
+,{10,10,317},{10,10,420},{11,10,303},{11,10,808},{12,10,134},{12,10,367},{13,10,
+149},{13,10,347},{14,10,349},{14,10,406},{18,10,22},{18,10,89},{18,10,122},{147,
+10,47},{8,0,70},{12,0,171},{141,0,272},{133,10,26},{132,10,550},{137,0,812},{10,
+0,233},{139,0,76},{134,0,988},{134,0,442},{136,10,822},{7,0,896},{4,10,902},{5,
+10,809},{134,10,122},{5,11,150},{7,11,106},{8,11,603},{9,11,593},{9,11,634},{10,
+11,44},{10,11,173},{11,11,462},{11,11,515},{13,11,216},{13,11,288},{142,11,400},
+{136,0,483},{135,10,262},{6,0,1709},{133,10,620},{4,10,34},{5,10,574},{7,10,279}
+,{7,10,1624},{136,10,601},{137,10,170},{147,0,119},{12,11,108},{141,11,291},{11,
+0,69},{12,0,105},{12,0,117},{13,0,213},{14,0,13},{14,0,62},{14,0,177},{14,0,421}
+,{15,0,19},{146,0,141},{137,0,309},{11,11,278},{142,11,73},{7,0,608},{7,0,976},{
+9,0,146},{10,0,206},{10,0,596},{13,0,218},{142,0,153},{133,10,332},{6,10,261},{8
+,10,182},{139,10,943},{4,11,493},{144,11,55},{134,10,1721},{132,0,768},{4,10,933
+},{133,10,880},{7,11,555},{7,11,1316},{7,11,1412},{7,11,1839},{9,11,192},{9,11,
+589},{11,11,241},{11,11,676},{11,11,811},{11,11,891},{12,11,140},{12,11,346},{12
+,11,479},{13,11,30},{13,11,49},{13,11,381},{14,11,188},{15,11,150},{16,11,76},{
+18,11,30},{148,11,52},{4,0,518},{135,0,1136},{6,11,568},{7,11,112},{7,11,1804},{
+8,11,362},{8,11,410},{8,11,830},{9,11,514},{11,11,649},{142,11,157},{135,11,673}
+,{8,0,689},{137,0,863},{4,0,18},{7,0,145},{7,0,444},{7,0,1278},{8,0,49},{8,0,400
+},{9,0,71},{9,0,250},{10,0,459},{12,0,160},{16,0,24},{132,11,625},{140,0,1020},{
+4,0,997},{6,0,1946},{6,0,1984},{134,0,1998},{6,11,16},{6,11,158},{7,11,43},{7,11
+,129},{7,11,181},{8,11,276},{8,11,377},{10,11,523},{11,11,816},{12,11,455},{13,
+11,303},{142,11,135},{133,10,812},{134,0,658},{4,11,1},{7,11,1143},{7,11,1463},{
+8,11,61},{9,11,207},{9,11,390},{9,11,467},{139,11,836},{150,11,26},{140,0,106},{
+6,0,1827},{10,0,931},{18,0,166},{20,0,114},{4,10,137},{7,10,1178},{7,11,1319},{
+135,10,1520},{133,0,1010},{4,11,723},{5,11,895},{7,11,1031},{8,11,199},{8,11,340
+},{9,11,153},{9,11,215},{10,11,21},{10,11,59},{10,11,80},{10,11,224},{11,11,229}
+,{11,11,652},{12,11,192},{13,11,146},{142,11,91},{132,11,295},{6,11,619},{7,11,
+898},{7,11,1092},{8,11,485},{18,11,28},{147,11,116},{137,11,51},{6,10,1661},{7,
+10,1975},{7,10,2009},{135,10,2011},{5,11,309},{140,11,211},{5,0,87},{7,0,313},{7
+,0,1103},{10,0,208},{10,0,582},{11,0,389},{11,0,813},{12,0,385},{13,0,286},{14,0
+,124},{146,0,108},{5,11,125},{8,11,77},{138,11,15},{132,0,267},{133,0,703},{137,
+11,155},{133,11,439},{11,11,164},{140,11,76},{9,0,496},{5,10,89},{7,10,1915},{9,
+10,185},{9,10,235},{10,10,64},{10,10,270},{10,10,403},{10,10,469},{10,10,529},{
+10,10,590},{11,10,140},{11,10,860},{13,10,1},{13,10,422},{14,10,341},{14,10,364}
+,{17,10,93},{18,10,113},{19,10,97},{147,10,113},{133,10,695},{135,0,1121},{5,10,
+6},{6,10,183},{7,10,680},{7,10,978},{7,10,1013},{7,10,1055},{12,10,230},{13,10,
+172},{146,10,29},{4,11,8},{7,11,1152},{7,11,1153},{7,11,1715},{9,11,374},{10,11,
+478},{139,11,648},{135,11,1099},{6,10,29},{139,10,63},{4,0,561},{10,0,249},{139,
+0,209},{132,0,760},{7,11,799},{138,11,511},{136,11,87},{9,0,154},{140,0,485},{
+136,0,255},{132,0,323},{140,0,419},{132,10,311},{134,10,1740},{4,0,368},{135,0,
+641},{7,10,170},{8,10,90},{8,10,177},{8,10,415},{11,10,714},{142,10,281},{4,11,
+69},{5,11,122},{9,11,656},{138,11,464},{5,11,849},{134,11,1633},{8,0,522},{142,0
+,328},{11,10,91},{13,10,129},{15,10,101},{145,10,125},{7,0,562},{8,0,551},{4,10,
+494},{6,10,74},{7,10,44},{11,11,499},{12,10,17},{15,10,5},{148,10,11},{4,10,276}
+,{133,10,296},{9,0,92},{147,0,91},{4,10,7},{5,10,90},{5,10,158},{6,10,542},{7,10
+,221},{7,10,1574},{9,10,490},{10,10,540},{11,10,443},{139,10,757},{6,0,525},{6,0
+,1976},{8,0,806},{9,0,876},{140,0,284},{5,11,859},{7,10,588},{7,11,1160},{8,11,
+107},{9,10,175},{9,11,291},{9,11,439},{10,10,530},{10,11,663},{11,11,609},{140,
+11,197},{7,11,168},{13,11,196},{141,11,237},{139,0,958},{133,0,594},{135,10,580}
+,{7,10,88},{136,10,627},{6,0,479},{6,0,562},{7,0,1060},{13,0,6},{5,10,872},{6,10
+,57},{7,10,471},{9,10,447},{137,10,454},{136,11,413},{145,11,19},{4,11,117},{6,
+11,372},{7,11,1905},{142,11,323},{4,11,722},{139,11,471},{17,0,61},{5,10,31},{
+134,10,614},{8,10,330},{140,10,477},{7,10,1200},{138,10,460},{6,10,424},{135,10,
+1866},{6,0,1641},{136,0,820},{6,0,1556},{134,0,1618},{9,11,5},{12,11,216},{12,11
+,294},{12,11,298},{12,11,400},{12,11,518},{13,11,229},{143,11,139},{15,11,155},{
+144,11,79},{4,0,302},{135,0,1766},{5,10,13},{134,10,142},{6,0,148},{7,0,1313},{7
+,10,116},{8,10,322},{8,10,755},{9,10,548},{10,10,714},{11,10,884},{141,10,324},{
+137,0,676},{9,11,88},{139,11,270},{5,11,12},{7,11,375},{137,11,438},{134,0,1674}
+,{7,10,1472},{135,10,1554},{11,0,178},{7,10,1071},{7,10,1541},{7,10,1767},{7,10,
+1806},{11,10,162},{11,10,242},{12,10,605},{15,10,26},{144,10,44},{6,0,389},{7,0,
+149},{9,0,142},{138,0,94},{140,11,71},{145,10,115},{6,0,8},{7,0,1881},{8,0,91},{
+11,11,966},{12,11,287},{13,11,342},{13,11,402},{15,11,110},{143,11,163},{4,11,
+258},{136,11,639},{6,11,22},{7,11,903},{138,11,577},{133,11,681},{135,10,1111},{
+135,11,1286},{9,0,112},{8,10,1},{138,10,326},{5,10,488},{6,10,527},{7,10,489},{7
+,10,1636},{8,10,121},{8,10,144},{8,10,359},{9,10,193},{9,10,241},{9,10,336},{9,
+10,882},{11,10,266},{11,10,372},{11,10,944},{12,10,401},{140,10,641},{4,11,664},
+{133,11,804},{6,0,747},{134,0,1015},{135,0,1746},{9,10,31},{10,10,244},{10,10,
+699},{12,10,149},{141,10,497},{133,10,377},{135,0,24},{6,0,1352},{5,11,32},{145,
+10,101},{7,0,1530},{10,0,158},{13,0,13},{13,0,137},{13,0,258},{14,0,111},{14,0,
+225},{14,0,253},{14,0,304},{14,0,339},{14,0,417},{146,0,33},{4,0,503},{135,0,
+1661},{5,0,130},{6,0,845},{7,0,1314},{9,0,610},{10,0,718},{11,0,601},{11,0,819},
+{11,0,946},{140,0,536},{10,0,149},{11,0,280},{142,0,336},{134,0,1401},{135,0,
+1946},{8,0,663},{144,0,8},{134,0,1607},{135,10,2023},{4,11,289},{7,11,629},{7,11
+,1698},{7,11,1711},{140,11,215},{6,11,450},{136,11,109},{10,0,882},{10,0,883},{
+10,0,914},{138,0,928},{133,10,843},{136,11,705},{132,10,554},{133,10,536},{5,0,
+417},{9,10,79},{11,10,625},{145,10,7},{7,11,1238},{142,11,37},{4,0,392},{135,0,
+1597},{5,0,433},{9,0,633},{11,0,629},{132,10,424},{7,10,336},{136,10,785},{134,
+11,355},{6,0,234},{7,0,769},{9,0,18},{138,0,358},{4,10,896},{134,10,1777},{138,
+11,323},{7,0,140},{7,0,1950},{8,0,680},{11,0,817},{147,0,88},{7,0,1222},{138,0,
+386},{139,11,908},{11,0,249},{12,0,313},{16,0,66},{145,0,26},{134,0,5},{7,10,750
+},{9,10,223},{11,10,27},{11,10,466},{12,10,624},{14,10,265},{146,10,61},{134,11,
+26},{134,0,1216},{5,0,963},{134,0,1773},{4,11,414},{5,11,467},{9,11,654},{10,11,
+451},{12,11,59},{141,11,375},{135,11,17},{4,10,603},{133,10,661},{4,10,11},{6,10
+,128},{7,10,231},{7,10,1533},{138,10,725},{135,11,955},{7,0,180},{8,0,509},{136,
+0,792},{132,10,476},{132,0,1002},{133,11,538},{135,10,1807},{132,0,931},{7,0,943
+},{11,0,614},{140,0,747},{135,0,1837},{9,10,20},{10,10,324},{10,10,807},{139,10,
+488},{134,0,641},{6,11,280},{10,11,502},{11,11,344},{140,11,38},{5,11,45},{7,11,
+1161},{11,11,448},{11,11,880},{13,11,139},{13,11,407},{15,11,16},{17,11,95},{18,
+11,66},{18,11,88},{18,11,123},{149,11,7},{9,0,280},{138,0,134},{22,0,22},{23,0,5
+},{151,0,29},{136,11,777},{4,0,90},{5,0,545},{7,0,754},{9,0,186},{10,0,72},{10,0
+,782},{11,0,577},{11,0,610},{11,0,960},{12,0,354},{12,0,362},{12,0,595},{4,11,
+410},{135,11,521},{135,11,1778},{5,10,112},{6,10,103},{134,10,150},{138,10,356},
+{132,0,742},{7,0,151},{9,0,329},{139,0,254},{8,0,853},{8,0,881},{8,0,911},{8,0,
+912},{10,0,872},{12,0,741},{12,0,742},{152,0,18},{4,11,573},{136,11,655},{6,0,
+921},{134,0,934},{9,0,187},{10,0,36},{11,0,1016},{17,0,44},{146,0,64},{7,0,833},
+{136,0,517},{4,0,506},{5,0,295},{135,0,1680},{4,10,708},{8,10,15},{9,10,50},{9,
+10,386},{11,10,18},{11,10,529},{140,10,228},{7,0,251},{7,0,1701},{8,0,436},{4,10
+,563},{7,10,592},{7,10,637},{7,10,770},{8,10,463},{9,10,60},{9,10,335},{9,10,904
+},{10,10,73},{11,10,434},{12,10,585},{13,10,331},{18,10,110},{148,10,60},{132,10
+,502},{136,0,584},{6,10,347},{138,10,161},{7,0,987},{9,0,688},{10,0,522},{11,0,
+788},{12,0,137},{12,0,566},{14,0,9},{14,0,24},{14,0,64},{7,11,899},{142,11,325},
+{4,0,214},{5,0,500},{5,10,102},{6,10,284},{7,10,1079},{7,10,1423},{7,10,1702},{8
+,10,470},{9,10,554},{9,10,723},{139,10,333},{7,10,246},{135,10,840},{6,10,10},{8
+,10,571},{9,10,739},{143,10,91},{133,10,626},{146,0,195},{134,0,1775},{7,0,389},
+{7,0,700},{7,0,940},{8,0,514},{9,0,116},{9,0,535},{10,0,118},{11,0,107},{11,0,
+148},{11,0,922},{12,0,254},{12,0,421},{142,0,238},{5,10,18},{6,10,526},{13,10,24
+},{13,10,110},{19,10,5},{147,10,44},{132,0,743},{11,0,292},{4,10,309},{5,10,462}
+,{7,10,970},{135,10,1097},{22,10,30},{150,10,33},{139,11,338},{135,11,1598},{7,0
+,1283},{9,0,227},{11,0,325},{11,0,408},{14,0,180},{146,0,47},{4,0,953},{6,0,1805
+},{6,0,1814},{6,0,1862},{140,0,774},{6,11,611},{135,11,1733},{135,11,1464},{5,0,
+81},{7,0,146},{7,0,1342},{8,0,53},{8,0,561},{8,0,694},{8,0,754},{9,0,115},{9,0,
+179},{9,0,894},{10,0,462},{10,0,813},{11,0,230},{11,0,657},{11,0,699},{11,0,748}
+,{12,0,119},{12,0,200},{12,0,283},{142,0,273},{5,0,408},{6,0,789},{6,0,877},{6,0
+,1253},{6,0,1413},{137,0,747},{134,10,1704},{135,11,663},{6,0,1910},{6,0,1915},{
+6,0,1923},{9,0,913},{9,0,928},{9,0,950},{9,0,954},{9,0,978},{9,0,993},{12,0,812}
+,{12,0,819},{12,0,831},{12,0,833},{12,0,838},{12,0,909},{12,0,928},{12,0,931},{
+12,0,950},{15,0,186},{15,0,187},{15,0,195},{15,0,196},{15,0,209},{15,0,215},{15,
+0,236},{15,0,241},{15,0,249},{15,0,253},{18,0,180},{18,0,221},{18,0,224},{18,0,
+227},{18,0,229},{149,0,60},{7,0,1826},{135,0,1938},{11,0,490},{18,0,143},{5,10,
+86},{7,10,743},{9,10,85},{10,10,281},{10,10,432},{12,10,251},{13,10,118},{142,10
+,378},{5,10,524},{133,10,744},{141,11,442},{10,10,107},{140,10,436},{135,11,503}
+,{134,0,1162},{132,10,927},{7,0,30},{8,0,86},{8,0,315},{8,0,700},{9,0,576},{9,0,
+858},{10,0,414},{11,0,310},{11,0,888},{11,0,904},{12,0,361},{13,0,248},{13,0,371
+},{14,0,142},{12,10,670},{146,10,94},{134,0,721},{4,11,113},{5,11,163},{5,11,735
+},{7,11,1009},{7,10,1149},{9,11,9},{9,10,156},{9,11,771},{12,11,90},{13,11,138},
+{13,11,410},{143,11,128},{138,0,839},{133,10,778},{137,0,617},{133,10,502},{8,10
+,196},{10,10,283},{139,10,406},{6,0,428},{7,0,524},{8,0,169},{8,0,234},{9,0,480}
+,{138,0,646},{133,10,855},{134,0,1648},{7,0,1205},{138,0,637},{7,0,1596},{4,11,
+935},{133,11,823},{5,11,269},{7,11,434},{7,11,891},{8,11,339},{9,11,702},{11,11,
+594},{11,11,718},{145,11,100},{7,11,878},{9,11,485},{141,11,264},{4,0,266},{8,0,
+4},{9,0,39},{10,0,166},{11,0,918},{12,0,635},{20,0,10},{22,0,27},{22,0,43},{22,0
+,52},{134,11,1713},{7,10,1400},{9,10,446},{138,10,45},{135,11,900},{132,0,862},{
+134,0,1554},{135,11,1033},{19,0,16},{147,11,16},{135,11,1208},{7,0,157},{136,0,
+279},{6,0,604},{136,0,391},{13,10,455},{15,10,99},{15,10,129},{144,10,68},{135,
+10,172},{7,0,945},{11,0,713},{139,0,744},{4,0,973},{10,0,877},{10,0,937},{10,0,
+938},{140,0,711},{139,0,1022},{132,10,568},{142,11,143},{4,0,567},{9,0,859},{132
+,10,732},{7,0,1846},{136,0,628},{136,10,733},{133,0,762},{4,10,428},{135,10,1789
+},{10,0,784},{13,0,191},{7,10,2015},{140,10,665},{133,0,298},{7,0,633},{7,0,905}
+,{7,0,909},{7,0,1538},{9,0,767},{140,0,636},{138,10,806},{132,0,795},{139,0,301}
+,{135,0,1970},{5,11,625},{135,11,1617},{135,11,275},{7,11,37},{8,11,425},{8,11,
+693},{9,11,720},{10,11,380},{10,11,638},{11,11,273},{11,11,307},{11,11,473},{12,
+11,61},{143,11,43},{135,11,198},{134,0,1236},{7,0,369},{12,0,644},{12,0,645},{
+144,0,90},{19,0,15},{149,0,27},{6,0,71},{7,0,845},{8,0,160},{9,0,318},{6,10,1623
+},{134,10,1681},{134,0,1447},{134,0,1255},{138,0,735},{8,0,76},{132,11,168},{6,
+10,1748},{8,10,715},{9,10,802},{10,10,46},{10,10,819},{13,10,308},{14,10,351},{
+14,10,363},{146,10,67},{135,11,91},{6,0,474},{4,10,63},{133,10,347},{133,10,749}
+,{138,0,841},{133,10,366},{6,0,836},{132,11,225},{135,0,1622},{135,10,89},{140,0
+,735},{134,0,1601},{138,11,145},{6,0,1390},{137,0,804},{142,0,394},{6,11,15},{7,
+11,70},{10,11,240},{147,11,93},{6,0,96},{135,0,1426},{4,0,651},{133,0,289},{7,11
+,956},{7,10,977},{7,11,1157},{7,11,1506},{7,11,1606},{7,11,1615},{7,11,1619},{7,
+11,1736},{7,11,1775},{8,11,590},{9,11,324},{9,11,736},{9,11,774},{9,11,776},{9,
+11,784},{10,11,567},{10,11,708},{11,11,518},{11,11,613},{11,11,695},{11,11,716},
+{11,11,739},{11,11,770},{11,11,771},{11,11,848},{11,11,857},{11,11,931},{11,11,
+947},{12,11,326},{12,11,387},{12,11,484},{12,11,528},{12,11,552},{12,11,613},{13
+,11,189},{13,11,256},{13,11,340},{13,11,432},{13,11,436},{13,11,440},{13,11,454}
+,{14,11,174},{14,11,220},{14,11,284},{14,11,390},{145,11,121},{7,0,688},{8,0,35}
+,{9,0,511},{10,0,767},{147,0,118},{134,0,667},{4,0,513},{5,10,824},{133,10,941},
+{7,10,440},{8,10,230},{139,10,106},{134,0,2034},{135,11,1399},{143,11,66},{135,
+11,1529},{4,11,145},{6,11,176},{7,11,395},{9,11,562},{144,11,28},{132,11,501},{
+132,0,704},{134,0,1524},{7,0,1078},{134,11,464},{6,11,509},{10,11,82},{20,11,91}
+,{151,11,13},{4,0,720},{133,0,306},{133,0,431},{7,0,1196},{4,10,914},{5,10,800},
+{133,10,852},{135,11,1189},{10,0,54},{141,10,115},{7,10,564},{142,10,168},{5,0,
+464},{6,0,236},{7,0,696},{7,0,914},{7,0,1108},{7,0,1448},{9,0,15},{9,0,564},{10,
+0,14},{12,0,565},{13,0,449},{14,0,53},{15,0,13},{16,0,64},{17,0,41},{4,10,918},{
+133,10,876},{6,0,1418},{134,10,1764},{4,10,92},{133,10,274},{134,0,907},{4,11,
+114},{8,10,501},{9,11,492},{13,11,462},{142,11,215},{4,11,77},{5,11,361},{6,11,
+139},{6,11,401},{6,11,404},{7,11,413},{7,11,715},{7,11,1716},{11,11,279},{12,11,
+179},{12,11,258},{13,11,244},{142,11,358},{6,0,1767},{12,0,194},{145,0,107},{134
+,11,1717},{5,10,743},{142,11,329},{4,10,49},{7,10,280},{135,10,1633},{5,0,840},{
+7,11,1061},{8,11,82},{11,11,250},{12,11,420},{141,11,184},{135,11,724},{134,0,
+900},{136,10,47},{134,0,1436},{144,11,0},{6,0,675},{7,0,1008},{7,0,1560},{9,0,
+642},{11,0,236},{14,0,193},{5,10,272},{5,10,908},{5,10,942},{8,10,197},{9,10,47}
+,{11,10,538},{139,10,742},{4,0,68},{5,0,628},{5,0,634},{6,0,386},{7,0,794},{8,0,
+273},{9,0,563},{10,0,105},{10,0,171},{11,0,94},{139,0,354},{135,10,1911},{137,10
+,891},{4,0,95},{6,0,1297},{6,0,1604},{7,0,416},{139,0,830},{6,11,513},{135,11,
+1052},{7,0,731},{13,0,20},{143,0,11},{137,11,899},{10,0,850},{140,0,697},{4,0,
+662},{7,11,1417},{12,11,382},{17,11,48},{152,11,12},{133,0,736},{132,0,861},{4,
+10,407},{132,10,560},{141,10,490},{6,11,545},{7,11,565},{7,11,1669},{10,11,114},
+{11,11,642},{140,11,618},{6,0,871},{134,0,1000},{5,0,864},{10,0,648},{11,0,671},
+{15,0,46},{133,11,5},{133,0,928},{11,0,90},{13,0,7},{4,10,475},{11,10,35},{13,10
+,71},{13,10,177},{142,10,422},{136,0,332},{135,11,192},{134,0,1055},{136,11,763}
+,{11,0,986},{140,0,682},{7,0,76},{8,0,44},{9,0,884},{10,0,580},{11,0,399},{11,0,
+894},{143,0,122},{135,11,1237},{135,10,636},{11,0,300},{6,10,222},{7,10,1620},{8
+,10,409},{137,10,693},{4,11,87},{5,11,250},{10,11,601},{13,11,298},{13,11,353},{
+141,11,376},{5,0,518},{10,0,340},{11,0,175},{149,0,16},{140,0,771},{6,0,1108},{
+137,0,831},{132,0,836},{135,0,1852},{4,0,957},{6,0,1804},{8,0,842},{8,0,843},{8,
+0,851},{8,0,855},{140,0,767},{135,11,814},{4,11,57},{7,11,1195},{7,11,1438},{7,
+11,1548},{7,11,1835},{7,11,1904},{9,11,757},{10,11,604},{139,11,519},{133,10,882
+},{138,0,246},{4,0,934},{5,0,202},{8,0,610},{7,11,1897},{12,11,290},{13,11,80},{
+13,11,437},{145,11,74},{8,0,96},{9,0,36},{10,0,607},{10,0,804},{10,0,832},{11,0,
+423},{11,0,442},{12,0,309},{14,0,199},{15,0,90},{145,0,110},{132,10,426},{7,0,
+654},{8,0,240},{6,10,58},{7,10,745},{7,10,1969},{8,10,675},{9,10,479},{9,10,731}
+,{10,10,330},{10,10,593},{10,10,817},{11,10,32},{11,10,133},{11,10,221},{145,10,
+68},{9,0,13},{9,0,398},{9,0,727},{10,0,75},{10,0,184},{10,0,230},{10,0,564},{10,
+0,569},{11,0,973},{12,0,70},{12,0,189},{13,0,57},{141,0,257},{4,11,209},{135,11,
+902},{7,0,391},{137,10,538},{134,0,403},{6,11,303},{7,11,335},{7,11,1437},{7,11,
+1668},{8,11,553},{8,11,652},{8,11,656},{9,11,558},{11,11,743},{149,11,18},{132,
+11,559},{11,0,75},{142,0,267},{6,0,815},{141,11,2},{141,0,366},{137,0,631},{133,
+11,1017},{5,0,345},{135,0,1016},{133,11,709},{134,11,1745},{133,10,566},{7,0,952
+},{6,10,48},{9,10,139},{10,10,399},{11,10,469},{12,10,634},{141,10,223},{133,0,
+673},{9,0,850},{7,11,8},{136,11,206},{6,0,662},{149,0,35},{4,0,287},{133,0,1018}
+,{6,10,114},{7,10,1224},{7,10,1556},{136,10,3},{8,10,576},{137,10,267},{4,0,884}
+,{5,0,34},{10,0,724},{12,0,444},{13,0,354},{18,0,32},{23,0,24},{23,0,31},{152,0,
+5},{133,10,933},{132,11,776},{138,0,151},{136,0,427},{134,0,382},{132,0,329},{9,
+0,846},{10,0,827},{138,11,33},{9,0,279},{10,0,407},{14,0,84},{22,0,18},{135,11,
+1297},{136,11,406},{132,0,906},{136,0,366},{134,0,843},{134,0,1443},{135,0,1372}
+,{138,0,992},{4,0,123},{5,0,605},{7,0,1509},{136,0,36},{132,0,649},{8,11,175},{
+10,11,168},{138,11,573},{133,0,767},{134,0,1018},{135,11,1305},{12,10,30},{13,10
+,148},{14,10,87},{14,10,182},{16,10,42},{148,10,70},{134,11,607},{4,0,273},{5,0,
+658},{133,0,995},{6,0,72},{139,11,174},{10,0,483},{12,0,368},{7,10,56},{7,10,
+1989},{8,10,337},{8,10,738},{9,10,600},{13,10,447},{142,10,92},{5,11,784},{138,
+10,666},{135,0,1345},{139,11,882},{134,0,1293},{133,0,589},{134,0,1988},{5,0,117
+},{6,0,514},{6,0,541},{7,0,1164},{7,0,1436},{8,0,220},{8,0,648},{10,0,688},{139,
+0,560},{136,0,379},{5,0,686},{7,10,866},{135,10,1163},{132,10,328},{9,11,14},{9,
+11,441},{10,11,306},{139,11,9},{4,10,101},{135,10,1171},{5,10,833},{136,10,744},
+{5,11,161},{7,11,839},{135,11,887},{7,0,196},{10,0,765},{11,0,347},{11,0,552},{
+11,0,790},{12,0,263},{13,0,246},{13,0,270},{13,0,395},{14,0,176},{14,0,190},{14,
+0,398},{14,0,412},{15,0,32},{15,0,63},{16,0,88},{147,0,105},{6,10,9},{6,10,397},
+{7,10,53},{7,10,1742},{10,10,632},{11,10,828},{140,10,146},{5,0,381},{135,0,1792
+},{134,0,1452},{135,11,429},{8,0,367},{10,0,760},{14,0,79},{20,0,17},{152,0,0},{
+7,0,616},{138,0,413},{11,10,417},{12,10,223},{140,10,265},{7,11,1611},{13,11,14}
+,{15,11,44},{19,11,13},{148,11,76},{135,0,1229},{6,0,120},{7,0,1188},{7,0,1710},
+{8,0,286},{9,0,667},{11,0,592},{139,0,730},{135,11,1814},{135,0,1146},{4,10,186}
+,{5,10,157},{8,10,168},{138,10,6},{4,0,352},{135,0,687},{4,0,192},{5,0,49},{6,0,
+200},{6,0,293},{6,0,1696},{135,0,1151},{133,10,875},{5,10,773},{5,10,991},{6,10,
+1635},{134,10,1788},{7,10,111},{136,10,581},{6,0,935},{134,0,1151},{134,0,1050},
+{132,0,650},{132,0,147},{11,0,194},{12,0,62},{12,0,88},{11,11,194},{12,11,62},{
+140,11,88},{6,0,339},{135,0,923},{134,10,1747},{7,11,643},{136,11,236},{133,0,
+934},{7,10,1364},{7,10,1907},{141,10,158},{132,10,659},{4,10,404},{135,10,675},{
+7,11,581},{9,11,644},{137,11,699},{13,0,211},{14,0,133},{14,0,204},{15,0,64},{15
+,0,69},{15,0,114},{16,0,10},{19,0,23},{19,0,35},{19,0,39},{19,0,51},{19,0,71},{
+19,0,75},{152,0,15},{133,10,391},{5,11,54},{135,11,1513},{7,0,222},{8,0,341},{5,
+10,540},{134,10,1697},{134,10,78},{132,11,744},{136,0,293},{137,11,701},{7,11,
+930},{10,11,402},{10,11,476},{13,11,452},{18,11,55},{147,11,104},{132,0,637},{
+133,10,460},{8,11,50},{137,11,624},{132,11,572},{134,0,1159},{4,10,199},{139,10,
+34},{134,0,847},{134,10,388},{6,11,43},{7,11,38},{8,11,248},{9,11,504},{138,11,
+513},{9,0,683},{4,10,511},{6,10,608},{9,10,333},{10,10,602},{11,10,441},{11,10,
+723},{11,10,976},{140,10,357},{9,0,867},{138,0,837},{6,0,944},{135,11,326},{135,
+0,1809},{5,10,938},{7,11,783},{136,10,707},{133,11,766},{133,11,363},{6,0,170},{
+7,0,1080},{8,0,395},{8,0,487},{141,0,147},{6,11,258},{140,11,409},{4,0,535},{8,0
+,618},{5,11,249},{148,11,82},{6,0,1379},{149,11,15},{135,0,1625},{150,0,23},{5,
+11,393},{6,11,378},{7,11,1981},{9,11,32},{9,11,591},{10,11,685},{10,11,741},{142
+,11,382},{133,11,788},{7,11,1968},{10,11,19},{139,11,911},{7,11,1401},{135,11,
+1476},{4,11,61},{5,11,58},{5,11,171},{5,11,635},{5,11,683},{5,11,700},{6,11,291}
+,{6,11,566},{7,11,1650},{11,11,523},{12,11,273},{12,11,303},{15,11,39},{143,11,
+111},{6,10,469},{7,10,1709},{138,10,515},{4,0,778},{134,11,589},{132,0,46},{5,0,
+811},{6,0,1679},{6,0,1714},{135,0,2032},{7,0,1458},{9,0,407},{11,0,15},{12,0,651
+},{149,0,37},{7,0,938},{132,10,500},{6,0,34},{7,0,69},{7,0,1089},{7,0,1281},{8,0
+,708},{8,0,721},{9,0,363},{148,0,98},{10,11,231},{147,11,124},{7,11,726},{152,11
+,9},{5,10,68},{134,10,383},{136,11,583},{4,11,917},{133,11,1005},{11,10,216},{
+139,10,340},{135,11,1675},{8,0,441},{10,0,314},{143,0,3},{132,11,919},{4,10,337}
+,{6,10,353},{7,10,1934},{8,10,488},{137,10,429},{7,0,889},{7,10,1795},{8,10,259}
+,{9,10,135},{9,10,177},{9,10,860},{10,10,825},{11,10,115},{11,10,370},{11,10,405
+},{11,10,604},{12,10,10},{12,10,667},{12,10,669},{13,10,76},{14,10,310},{15,10,
+76},{15,10,147},{148,10,23},{4,10,15},{4,11,255},{5,10,22},{5,11,302},{6,11,132}
+,{6,10,244},{7,10,40},{7,11,128},{7,10,200},{7,11,283},{7,10,906},{7,10,1199},{7
+,11,1299},{9,10,616},{10,11,52},{10,11,514},{10,10,716},{11,10,635},{11,10,801},
+{11,11,925},{12,10,458},{13,11,92},{142,11,309},{132,0,462},{137,11,173},{135,10
+,1735},{8,0,525},{5,10,598},{7,10,791},{8,10,108},{137,10,123},{5,0,73},{6,0,23}
+,{134,0,338},{132,0,676},{132,10,683},{7,0,725},{8,0,498},{139,0,268},{12,0,21},
+{151,0,7},{135,0,773},{4,10,155},{135,10,1689},{4,0,164},{5,0,730},{5,10,151},{5
+,10,741},{6,11,210},{7,10,498},{7,10,870},{7,10,1542},{12,10,213},{14,10,36},{14
+,10,391},{17,10,111},{18,10,6},{18,10,46},{18,10,151},{19,10,36},{20,10,32},{20,
+10,56},{20,10,69},{20,10,102},{21,10,4},{22,10,8},{22,10,10},{22,10,14},{150,10,
+31},{4,10,624},{135,10,1752},{4,0,583},{9,0,936},{15,0,214},{18,0,199},{24,0,26}
+,{134,11,588},{7,0,1462},{11,0,659},{4,11,284},{134,11,223},{133,0,220},{139,0,
+803},{132,0,544},{4,10,492},{133,10,451},{16,0,98},{148,0,119},{4,11,218},{7,11,
+526},{143,11,137},{135,10,835},{4,11,270},{5,11,192},{6,11,332},{7,11,1322},{13,
+11,9},{13,10,70},{14,11,104},{142,11,311},{132,10,539},{140,11,661},{5,0,176},{6
+,0,437},{6,0,564},{11,0,181},{141,0,183},{135,0,1192},{6,10,113},{135,10,436},{
+136,10,718},{135,10,520},{135,0,1878},{140,11,196},{7,11,379},{8,11,481},{137,11
+,377},{5,11,1003},{6,11,149},{137,11,746},{8,11,262},{9,11,627},{10,11,18},{11,
+11,214},{11,11,404},{11,11,457},{11,11,780},{11,11,849},{11,11,913},{13,11,330},
+{13,11,401},{142,11,200},{149,0,26},{136,11,304},{132,11,142},{135,0,944},{4,0,
+790},{5,0,273},{134,0,394},{134,0,855},{4,0,135},{6,0,127},{7,0,1185},{7,0,1511}
+,{8,0,613},{11,0,5},{12,0,336},{12,0,495},{12,0,586},{12,0,660},{12,0,668},{14,0
+,385},{15,0,118},{17,0,20},{146,0,98},{6,0,230},{9,0,752},{18,0,109},{12,10,610}
+,{13,10,431},{144,10,59},{7,0,1954},{135,11,925},{4,11,471},{5,11,51},{6,11,602}
+,{8,11,484},{10,11,195},{140,11,159},{132,10,307},{136,11,688},{132,11,697},{7,
+11,812},{7,11,1261},{7,11,1360},{9,11,632},{140,11,352},{5,0,162},{8,0,68},{133,
+10,964},{4,0,654},{136,11,212},{4,0,156},{7,0,998},{7,0,1045},{7,0,1860},{9,0,48
+},{9,0,692},{11,0,419},{139,0,602},{133,11,221},{4,11,373},{5,11,283},{6,11,480}
+,{135,11,609},{142,11,216},{132,0,240},{6,11,192},{9,11,793},{145,11,55},{4,10,
+75},{5,10,180},{6,10,500},{7,10,58},{7,10,710},{138,10,645},{4,11,132},{5,11,69}
+,{5,10,649},{135,11,1242},{6,10,276},{7,10,282},{7,10,879},{7,10,924},{8,10,459}
+,{9,10,599},{9,10,754},{11,10,574},{12,10,128},{12,10,494},{13,10,52},{13,10,301
+},{15,10,30},{143,10,132},{132,10,200},{4,11,111},{135,11,302},{9,0,197},{10,0,
+300},{12,0,473},{13,0,90},{141,0,405},{132,11,767},{6,11,42},{7,11,1416},{7,11,
+1590},{7,11,2005},{8,11,131},{8,11,466},{9,11,672},{13,11,252},{148,11,103},{8,0
+,958},{8,0,999},{10,0,963},{138,0,1001},{135,10,1621},{135,0,858},{4,0,606},{137
+,11,444},{6,11,44},{136,11,368},{139,11,172},{4,11,570},{133,11,120},{139,11,624
+},{7,0,1978},{8,0,676},{6,10,225},{137,10,211},{7,0,972},{11,0,102},{136,10,687}
+,{6,11,227},{135,11,1589},{8,10,58},{9,10,724},{11,10,809},{13,10,113},{145,10,
+72},{4,0,361},{133,0,315},{132,0,461},{6,10,345},{135,10,1247},{132,0,472},{8,10
+,767},{8,10,803},{9,10,301},{137,10,903},{135,11,1333},{135,11,477},{7,10,1949},
+{136,10,674},{6,0,905},{138,0,747},{133,0,155},{134,10,259},{7,0,163},{8,0,319},
+{9,0,402},{10,0,24},{10,0,681},{11,0,200},{12,0,253},{12,0,410},{142,0,219},{5,0
+,475},{7,0,1780},{9,0,230},{11,0,297},{11,0,558},{14,0,322},{19,0,76},{6,11,1667
+},{7,11,2036},{138,11,600},{136,10,254},{6,0,848},{135,0,1956},{6,11,511},{140,
+11,132},{5,11,568},{6,11,138},{135,11,1293},{6,0,631},{137,0,838},{149,0,36},{4,
+11,565},{8,11,23},{136,11,827},{5,0,944},{134,0,1769},{4,0,144},{6,0,842},{6,0,
+1400},{4,11,922},{133,11,1023},{133,10,248},{9,10,800},{10,10,693},{11,10,482},{
+11,10,734},{139,10,789},{7,11,1002},{139,11,145},{4,10,116},{5,10,95},{5,10,445}
+,{7,10,1688},{8,10,29},{9,10,272},{11,10,509},{139,10,915},{14,0,369},{146,0,72}
+,{135,10,1641},{132,11,740},{133,10,543},{140,11,116},{6,0,247},{9,0,555},{5,10,
+181},{136,10,41},{133,10,657},{136,0,996},{138,10,709},{7,0,189},{8,10,202},{138
+,10,536},{136,11,402},{4,11,716},{141,11,31},{10,0,280},{138,0,797},{9,10,423},{
+140,10,89},{8,10,113},{9,10,877},{10,10,554},{11,10,83},{12,10,136},{147,10,109}
+,{133,10,976},{7,0,746},{132,10,206},{136,0,526},{139,0,345},{136,0,1017},{8,11,
+152},{9,11,53},{9,11,268},{9,11,901},{10,11,518},{10,11,829},{11,11,188},{13,11,
+74},{14,11,46},{15,11,17},{15,11,33},{17,11,40},{18,11,36},{19,11,20},{22,11,1},
+{152,11,2},{133,11,736},{136,11,532},{5,0,428},{138,0,651},{135,11,681},{135,0,
+1162},{7,0,327},{13,0,230},{17,0,113},{8,10,226},{10,10,537},{11,10,570},{11,10,
+605},{11,10,799},{11,10,804},{12,10,85},{12,10,516},{12,10,623},{12,11,677},{13,
+10,361},{14,10,77},{14,10,78},{147,10,110},{4,0,792},{7,0,1717},{10,0,546},{132,
+10,769},{4,11,684},{136,11,384},{132,10,551},{134,0,1203},{9,10,57},{9,10,459},{
+10,10,425},{11,10,119},{12,10,184},{12,10,371},{13,10,358},{145,10,51},{5,0,672}
+,{5,10,814},{8,10,10},{9,10,421},{9,10,729},{10,10,609},{139,10,689},{138,0,189}
+,{134,10,624},{7,11,110},{7,11,188},{8,11,290},{8,11,591},{9,11,382},{9,11,649},
+{11,11,71},{11,11,155},{11,11,313},{12,11,5},{13,11,325},{142,11,287},{133,0,99}
+,{6,0,1053},{135,0,298},{7,11,360},{7,11,425},{9,11,66},{9,11,278},{138,11,644},
+{4,0,397},{136,0,555},{137,10,269},{132,10,528},{4,11,900},{133,11,861},{6,0,
+1157},{5,11,254},{7,11,985},{136,11,73},{7,11,1959},{136,11,683},{12,0,398},{20,
+0,39},{21,0,11},{150,0,41},{4,0,485},{7,0,353},{135,0,1523},{6,0,366},{7,0,1384}
+,{135,0,1601},{138,0,787},{137,0,282},{5,10,104},{6,10,173},{135,10,1631},{139,
+11,146},{4,0,157},{133,0,471},{134,0,941},{132,11,725},{7,0,1336},{8,10,138},{8,
+10,342},{9,10,84},{10,10,193},{11,10,883},{140,10,359},{134,11,196},{136,0,116},
+{133,11,831},{134,0,787},{134,10,95},{6,10,406},{10,10,409},{10,10,447},{11,10,
+44},{140,10,100},{5,0,160},{7,0,363},{7,0,589},{10,0,170},{141,0,55},{134,0,1815
+},{132,0,866},{6,0,889},{6,0,1067},{6,0,1183},{4,11,321},{134,11,569},{5,11,848}
+,{134,11,66},{4,11,36},{6,10,1636},{7,11,1387},{10,11,205},{11,11,755},{141,11,
+271},{132,0,689},{9,0,820},{4,10,282},{7,10,1034},{11,10,398},{11,10,634},{12,10
+,1},{12,10,79},{12,10,544},{14,10,237},{17,10,10},{146,10,20},{4,0,108},{7,0,804
+},{139,0,498},{132,11,887},{6,0,1119},{135,11,620},{6,11,165},{138,11,388},{5,0,
+244},{5,10,499},{6,10,476},{7,10,600},{7,10,888},{135,10,1096},{140,0,609},{135,
+0,1005},{4,0,412},{133,0,581},{4,11,719},{135,11,155},{7,10,296},{7,10,596},{8,
+10,560},{8,10,586},{9,10,612},{11,10,304},{12,10,46},{13,10,89},{14,10,112},{145
+,10,122},{4,0,895},{133,0,772},{142,11,307},{135,0,1898},{4,0,926},{133,0,983},{
+4,11,353},{6,11,146},{6,11,1789},{7,11,288},{7,11,990},{7,11,1348},{9,11,665},{9
+,11,898},{11,11,893},{142,11,212},{132,0,538},{133,11,532},{6,0,294},{7,0,1267},
+{8,0,624},{141,0,496},{7,0,1325},{4,11,45},{135,11,1257},{138,0,301},{9,0,298},{
+12,0,291},{13,0,276},{14,0,6},{17,0,18},{21,0,32},{7,10,1599},{7,10,1723},{8,10,
+79},{8,10,106},{8,10,190},{8,10,302},{8,10,383},{8,10,713},{9,10,119},{9,10,233}
+,{9,10,419},{9,10,471},{10,10,181},{10,10,406},{11,10,57},{11,10,85},{11,10,120}
+,{11,10,177},{11,10,296},{11,10,382},{11,10,454},{11,10,758},{11,10,999},{12,10,
+27},{12,10,131},{12,10,245},{12,10,312},{12,10,446},{12,10,454},{13,10,98},{13,
+10,426},{13,10,508},{14,10,163},{14,10,272},{14,10,277},{14,10,370},{15,10,95},{
+15,10,138},{15,10,167},{17,10,38},{148,10,96},{132,0,757},{134,0,1263},{4,0,820}
+,{134,10,1759},{133,0,722},{136,11,816},{138,10,372},{145,10,16},{134,0,1039},{4
+,0,991},{134,0,2028},{133,10,258},{7,0,1875},{139,0,124},{6,11,559},{6,11,1691},
+{135,11,586},{5,0,324},{7,0,881},{8,10,134},{9,10,788},{140,10,438},{7,11,1823},
+{139,11,693},{6,0,1348},{134,0,1545},{134,0,911},{132,0,954},{8,0,329},{8,0,414}
+,{7,10,1948},{135,10,2004},{5,0,517},{6,10,439},{7,10,780},{135,10,1040},{132,0,
+816},{5,10,1},{6,10,81},{138,10,520},{9,0,713},{10,0,222},{5,10,482},{8,10,98},{
+10,10,700},{10,10,822},{11,10,302},{11,10,778},{12,10,50},{12,10,127},{12,10,396
+},{13,10,62},{13,10,328},{14,10,122},{147,10,72},{137,0,33},{5,10,2},{7,10,1494}
+,{136,10,589},{6,10,512},{7,10,797},{8,10,253},{9,10,77},{10,10,1},{10,11,108},{
+10,10,129},{10,10,225},{11,11,116},{11,10,118},{11,10,226},{11,10,251},{11,10,
+430},{11,10,701},{11,10,974},{11,10,982},{12,10,64},{12,10,260},{12,10,488},{140
+,10,690},{134,11,456},{133,11,925},{5,0,150},{7,0,106},{7,0,774},{8,0,603},{9,0,
+593},{9,0,634},{10,0,44},{10,0,173},{11,0,462},{11,0,515},{13,0,216},{13,0,288},
+{142,0,400},{137,10,347},{5,0,748},{134,0,553},{12,0,108},{141,0,291},{7,0,420},
+{4,10,12},{7,10,522},{7,10,809},{8,10,797},{141,10,88},{6,11,193},{7,11,240},{7,
+11,1682},{10,11,51},{10,11,640},{11,11,410},{13,11,82},{14,11,247},{14,11,331},{
+142,11,377},{133,10,528},{135,0,1777},{4,0,493},{144,0,55},{136,11,633},{139,0,
+81},{6,0,980},{136,0,321},{148,10,109},{5,10,266},{9,10,290},{9,10,364},{10,10,
+293},{11,10,606},{142,10,45},{6,0,568},{7,0,112},{7,0,1804},{8,0,362},{8,0,410},
+{8,0,830},{9,0,514},{11,0,649},{142,0,157},{4,0,74},{6,0,510},{6,10,594},{9,10,
+121},{10,10,49},{10,10,412},{139,10,834},{134,0,838},{136,10,748},{132,10,466},{
+132,0,625},{135,11,1443},{4,11,237},{135,11,514},{9,10,378},{141,10,162},{6,0,16
+},{6,0,158},{7,0,43},{7,0,129},{7,0,181},{8,0,276},{8,0,377},{10,0,523},{11,0,
+816},{12,0,455},{13,0,303},{142,0,135},{135,0,281},{4,0,1},{7,0,1143},{7,0,1463}
+,{8,0,61},{9,0,207},{9,0,390},{9,0,467},{139,0,836},{6,11,392},{7,11,65},{135,11
+,2019},{132,10,667},{4,0,723},{5,0,895},{7,0,1031},{8,0,199},{8,0,340},{9,0,153}
+,{9,0,215},{10,0,21},{10,0,59},{10,0,80},{10,0,224},{10,0,838},{11,0,229},{11,0,
+652},{12,0,192},{13,0,146},{142,0,91},{132,0,295},{137,0,51},{9,11,222},{10,11,
+43},{139,11,900},{5,0,309},{140,0,211},{5,0,125},{8,0,77},{138,0,15},{136,11,604
+},{138,0,789},{5,0,173},{4,10,39},{7,10,1843},{8,10,407},{11,10,144},{140,10,523
+},{138,11,265},{133,0,439},{132,10,510},{7,0,648},{7,0,874},{11,0,164},{12,0,76}
+,{18,0,9},{7,10,1980},{10,10,487},{138,10,809},{12,0,111},{14,0,294},{19,0,45},{
+13,10,260},{146,10,63},{133,11,549},{134,10,570},{4,0,8},{7,0,1152},{7,0,1153},{
+7,0,1715},{9,0,374},{10,0,478},{139,0,648},{135,0,1099},{5,0,575},{6,0,354},{135
+,0,701},{7,11,36},{8,11,201},{136,11,605},{4,10,787},{136,11,156},{6,0,518},{149
+,11,13},{140,11,224},{134,0,702},{132,10,516},{5,11,724},{10,11,305},{11,11,151}
+,{12,11,33},{12,11,121},{12,11,381},{17,11,3},{17,11,27},{17,11,78},{18,11,18},{
+19,11,54},{149,11,5},{8,0,87},{4,11,523},{5,11,638},{11,10,887},{14,10,365},{142
+,10,375},{138,0,438},{136,10,821},{135,11,1908},{6,11,242},{7,11,227},{7,11,1581
+},{8,11,104},{9,11,113},{9,11,220},{9,11,427},{10,11,74},{10,11,239},{11,11,579}
+,{11,11,1023},{13,11,4},{13,11,204},{13,11,316},{18,11,95},{148,11,86},{4,0,69},
+{5,0,122},{5,0,849},{6,0,1633},{9,0,656},{138,0,464},{7,0,1802},{4,10,10},{139,
+10,786},{135,11,861},{139,0,499},{7,0,476},{7,0,1592},{138,0,87},{133,10,684},{4
+,0,840},{134,10,27},{142,0,283},{6,0,1620},{7,11,1328},{136,11,494},{5,0,859},{7
+,0,1160},{8,0,107},{9,0,291},{9,0,439},{10,0,663},{11,0,609},{140,0,197},{7,11,
+1306},{8,11,505},{9,11,482},{10,11,126},{11,11,225},{12,11,347},{12,11,449},{13,
+11,19},{142,11,218},{5,11,268},{10,11,764},{12,11,120},{13,11,39},{145,11,127},{
+145,10,56},{7,11,1672},{10,11,472},{11,11,189},{143,11,51},{6,10,342},{6,10,496}
+,{8,10,275},{137,10,206},{133,0,600},{4,0,117},{6,0,372},{7,0,1905},{142,0,323},
+{4,10,909},{5,10,940},{135,11,1471},{132,10,891},{4,0,722},{139,0,471},{4,11,384
+},{135,11,1022},{132,10,687},{9,0,5},{12,0,216},{12,0,294},{12,0,298},{12,0,400}
+,{12,0,518},{13,0,229},{143,0,139},{135,11,1703},{7,11,1602},{10,11,698},{12,11,
+212},{141,11,307},{6,10,41},{141,10,160},{135,11,1077},{9,11,159},{11,11,28},{
+140,11,603},{4,0,514},{7,0,1304},{138,0,477},{134,0,1774},{9,0,88},{139,0,270},{
+5,0,12},{7,0,375},{9,0,438},{134,10,1718},{132,11,515},{136,10,778},{8,11,632},{
+8,11,697},{137,11,854},{6,0,362},{6,0,997},{146,0,51},{7,0,816},{7,0,1241},{9,0,
+283},{9,0,520},{10,0,213},{10,0,307},{10,0,463},{10,0,671},{10,0,746},{11,0,401}
+,{11,0,794},{12,0,517},{18,0,107},{147,0,115},{133,10,115},{150,11,28},{4,11,136
+},{133,11,551},{142,10,314},{132,0,258},{6,0,22},{7,0,903},{7,0,1963},{8,0,639},
+{138,0,577},{5,0,681},{8,0,782},{13,0,130},{17,0,84},{5,10,193},{140,10,178},{9,
+11,17},{138,11,291},{7,11,1287},{9,11,44},{10,11,552},{10,11,642},{11,11,839},{
+12,11,274},{12,11,275},{12,11,372},{13,11,91},{142,11,125},{135,10,174},{4,0,664
+},{5,0,804},{139,0,1013},{134,0,942},{6,0,1349},{6,0,1353},{6,0,1450},{7,11,1518
+},{139,11,694},{11,0,356},{4,10,122},{5,10,796},{5,10,952},{6,10,1660},{6,10,
+1671},{8,10,567},{9,10,687},{9,10,742},{10,10,686},{11,10,682},{140,10,281},{5,0
+,32},{6,11,147},{7,11,886},{9,11,753},{138,11,268},{5,10,179},{7,10,1095},{135,
+10,1213},{4,10,66},{7,10,722},{135,10,904},{135,10,352},{9,11,245},{138,11,137},
+{4,0,289},{7,0,629},{7,0,1698},{7,0,1711},{12,0,215},{133,11,414},{6,0,1975},{
+135,11,1762},{6,0,450},{136,0,109},{141,10,35},{134,11,599},{136,0,705},{133,0,
+664},{134,11,1749},{11,11,402},{12,11,109},{12,11,431},{13,11,179},{13,11,206},{
+14,11,175},{14,11,217},{16,11,3},{148,11,53},{135,0,1238},{134,11,1627},{132,11,
+488},{13,0,318},{10,10,592},{10,10,753},{12,10,317},{12,10,355},{12,10,465},{12,
+10,469},{12,10,560},{140,10,578},{133,10,564},{132,11,83},{140,11,676},{6,0,1872
+},{6,0,1906},{6,0,1907},{9,0,934},{9,0,956},{9,0,960},{9,0,996},{12,0,794},{12,0
+,876},{12,0,880},{12,0,918},{15,0,230},{18,0,234},{18,0,238},{21,0,38},{149,0,62
+},{134,10,556},{134,11,278},{137,0,103},{7,10,544},{8,10,719},{138,10,61},{4,10,
+5},{5,10,498},{8,10,637},{137,10,521},{7,0,777},{12,0,229},{12,0,239},{15,0,12},
+{12,11,229},{12,11,239},{143,11,12},{6,0,26},{7,11,388},{7,11,644},{139,11,781},
+{7,11,229},{8,11,59},{9,11,190},{9,11,257},{10,11,378},{140,11,191},{133,10,927}
+,{135,10,1441},{4,10,893},{5,10,780},{133,10,893},{4,0,414},{5,0,467},{9,0,654},
+{10,0,451},{12,0,59},{141,0,375},{142,0,173},{135,0,17},{7,0,1350},{133,10,238},
+{135,0,955},{4,0,960},{10,0,887},{12,0,753},{18,0,161},{18,0,162},{152,0,19},{
+136,11,344},{6,10,1729},{137,11,288},{132,11,660},{4,0,217},{5,0,710},{7,0,760},
+{7,0,1926},{9,0,428},{9,0,708},{10,0,254},{10,0,296},{10,0,720},{11,0,109},{11,0
+,255},{12,0,165},{12,0,315},{13,0,107},{13,0,203},{14,0,54},{14,0,99},{14,0,114}
+,{14,0,388},{16,0,85},{17,0,9},{17,0,33},{20,0,25},{20,0,28},{20,0,29},{21,0,9},
+{21,0,10},{21,0,34},{22,0,17},{4,10,60},{7,10,1800},{8,10,314},{9,10,700},{139,
+10,487},{7,11,1035},{138,11,737},{7,11,690},{9,11,217},{9,11,587},{140,11,521},{
+6,0,919},{7,11,706},{7,11,1058},{138,11,538},{7,10,1853},{138,10,437},{136,10,
+419},{6,0,280},{10,0,502},{11,0,344},{140,0,38},{5,0,45},{7,0,1161},{11,0,448},{
+11,0,880},{13,0,139},{13,0,407},{15,0,16},{17,0,95},{18,0,66},{18,0,88},{18,0,
+123},{149,0,7},{11,11,92},{11,11,196},{11,11,409},{11,11,450},{11,11,666},{11,11
+,777},{12,11,262},{13,11,385},{13,11,393},{15,11,115},{16,11,45},{145,11,82},{
+136,0,777},{134,11,1744},{4,0,410},{7,0,521},{133,10,828},{134,0,673},{7,0,1110}
+,{7,0,1778},{7,10,176},{135,10,178},{5,10,806},{7,11,268},{7,10,1976},{136,11,
+569},{4,11,733},{9,11,194},{10,11,92},{11,11,198},{12,11,84},{12,11,87},{13,11,
+128},{144,11,74},{5,0,341},{7,0,1129},{11,0,414},{4,10,51},{6,10,4},{7,10,591},{
+7,10,849},{7,10,951},{7,10,1613},{7,10,1760},{7,10,1988},{9,10,434},{10,10,754},
+{11,10,25},{139,10,37},{133,10,902},{135,10,928},{135,0,787},{132,0,436},{134,10
+,270},{7,0,1587},{135,0,1707},{6,0,377},{7,0,1025},{9,0,613},{145,0,104},{7,11,
+982},{7,11,1361},{10,11,32},{143,11,56},{139,0,96},{132,0,451},{132,10,416},{142
+,10,372},{5,10,152},{5,10,197},{7,11,306},{7,10,340},{7,10,867},{10,10,548},{10,
+10,581},{11,10,6},{12,10,3},{12,10,19},{14,10,110},{142,10,289},{134,0,680},{134
+,11,609},{7,0,483},{7,10,190},{8,10,28},{8,10,141},{8,10,444},{8,10,811},{9,10,
+468},{11,10,334},{12,10,24},{12,10,386},{140,10,576},{10,0,916},{133,10,757},{5,
+10,721},{135,10,1553},{133,11,178},{134,0,937},{132,10,898},{133,0,739},{147,0,
+82},{135,0,663},{146,0,128},{5,10,277},{141,10,247},{134,0,1087},{132,10,435},{6
+,11,381},{7,11,645},{7,11,694},{136,11,546},{7,0,503},{135,0,1885},{6,0,1965},{8
+,0,925},{138,0,955},{4,0,113},{5,0,163},{5,0,735},{7,0,1009},{9,0,9},{9,0,771},{
+12,0,90},{13,0,138},{13,0,410},{143,0,128},{4,0,324},{138,0,104},{7,0,460},{5,10
+,265},{134,10,212},{133,11,105},{7,11,261},{7,11,1107},{7,11,1115},{7,11,1354},{
+7,11,1588},{7,11,1705},{7,11,1902},{9,11,465},{10,11,248},{10,11,349},{10,11,647
+},{11,11,527},{11,11,660},{11,11,669},{12,11,529},{141,11,305},{5,11,438},{9,11,
+694},{12,11,627},{141,11,210},{152,11,11},{4,0,935},{133,0,823},{132,10,702},{5,
+0,269},{7,0,434},{7,0,891},{8,0,339},{9,0,702},{11,0,594},{11,0,718},{17,0,100},
+{5,10,808},{135,10,2045},{7,0,1014},{9,0,485},{141,0,264},{134,0,1713},{7,0,1810
+},{11,0,866},{12,0,103},{13,0,495},{140,11,233},{4,0,423},{10,0,949},{138,0,1013
+},{135,0,900},{8,11,25},{138,11,826},{5,10,166},{8,10,739},{140,10,511},{134,0,
+2018},{7,11,1270},{139,11,612},{4,10,119},{5,10,170},{5,10,447},{7,10,1708},{7,
+10,1889},{9,10,357},{9,10,719},{12,10,486},{140,10,596},{12,0,574},{140,11,574},
+{132,11,308},{6,0,964},{6,0,1206},{134,0,1302},{4,10,450},{135,10,1158},{135,11,
+150},{136,11,649},{14,0,213},{148,0,38},{9,11,45},{9,11,311},{141,11,42},{134,11
+,521},{7,10,1375},{7,10,1466},{138,10,331},{132,10,754},{5,11,339},{7,11,1442},{
+14,11,3},{15,11,41},{147,11,66},{136,11,378},{134,0,1022},{5,10,850},{136,10,799
+},{142,0,143},{135,0,2029},{134,11,1628},{8,0,523},{150,0,34},{5,0,625},{135,0,
+1617},{7,0,275},{7,10,238},{7,10,2033},{8,10,120},{8,10,188},{8,10,659},{9,10,
+598},{10,10,466},{12,10,342},{12,10,588},{13,10,503},{14,10,246},{143,10,92},{7,
+0,37},{8,0,425},{8,0,693},{9,0,720},{10,0,380},{10,0,638},{11,0,273},{11,0,473},
+{12,0,61},{143,0,43},{135,11,829},{135,0,1943},{132,0,765},{5,11,486},{135,11,
+1349},{7,11,1635},{8,11,17},{10,11,217},{138,11,295},{4,10,201},{7,10,1744},{8,
+10,602},{11,10,247},{11,10,826},{145,10,65},{138,11,558},{11,0,551},{142,0,159},
+{8,10,164},{146,10,62},{139,11,176},{132,0,168},{136,0,1010},{134,0,1994},{135,0
+,91},{138,0,532},{135,10,1243},{135,0,1884},{132,10,907},{5,10,100},{10,10,329},
+{12,10,416},{149,10,29},{134,11,447},{132,10,176},{5,10,636},{5,10,998},{7,10,9}
+,{7,10,1508},{8,10,26},{9,10,317},{9,10,358},{10,10,210},{10,10,292},{10,10,533}
+,{11,10,555},{12,10,526},{12,10,607},{13,10,263},{13,10,459},{142,10,271},{4,11,
+609},{135,11,756},{6,0,15},{7,0,70},{10,0,240},{147,0,93},{4,11,930},{133,11,947
+},{134,0,1227},{134,0,1534},{133,11,939},{133,11,962},{5,11,651},{8,11,170},{9,
+11,61},{9,11,63},{10,11,23},{10,11,37},{10,11,834},{11,11,4},{11,11,187},{11,11,
+281},{11,11,503},{11,11,677},{12,11,96},{12,11,130},{12,11,244},{14,11,5},{14,11
+,40},{14,11,162},{14,11,202},{146,11,133},{4,11,406},{5,11,579},{12,11,492},{150
+,11,15},{139,0,392},{6,10,610},{10,10,127},{141,10,27},{7,0,655},{7,0,1844},{136
+,10,119},{4,0,145},{6,0,176},{7,0,395},{137,0,562},{132,0,501},{140,11,145},{136
+,0,1019},{134,0,509},{139,0,267},{6,11,17},{7,11,16},{7,11,1001},{7,11,1982},{9,
+11,886},{10,11,489},{10,11,800},{11,11,782},{12,11,320},{13,11,467},{14,11,145},
+{14,11,387},{143,11,119},{145,11,17},{6,0,1099},{133,11,458},{7,11,1983},{8,11,0
+},{8,11,171},{9,11,120},{9,11,732},{10,11,473},{11,11,656},{11,11,998},{18,11,0}
+,{18,11,2},{147,11,21},{12,11,427},{146,11,38},{10,0,948},{138,0,968},{7,10,126}
+,{136,10,84},{136,10,790},{4,0,114},{9,0,492},{13,0,462},{142,0,215},{6,10,64},{
+12,10,377},{141,10,309},{4,0,77},{5,0,361},{6,0,139},{6,0,401},{6,0,404},{7,0,
+413},{7,0,715},{7,0,1716},{11,0,279},{12,0,179},{12,0,258},{13,0,244},{142,0,358
+},{134,0,1717},{7,0,772},{7,0,1061},{7,0,1647},{8,0,82},{11,0,250},{11,0,607},{
+12,0,311},{12,0,420},{13,0,184},{13,0,367},{7,10,1104},{11,10,269},{11,10,539},{
+11,10,627},{11,10,706},{11,10,975},{12,10,248},{12,10,434},{12,10,600},{12,10,
+622},{13,10,297},{13,10,485},{14,10,69},{14,10,409},{143,10,108},{135,0,724},{4,
+11,512},{4,11,519},{133,11,342},{134,0,1133},{145,11,29},{11,10,977},{141,10,507
+},{6,0,841},{6,0,1042},{6,0,1194},{10,0,993},{140,0,1021},{6,11,31},{7,11,491},{
+7,11,530},{8,11,592},{9,10,34},{11,11,53},{11,10,484},{11,11,779},{12,11,167},{
+12,11,411},{14,11,14},{14,11,136},{15,11,72},{16,11,17},{144,11,72},{4,0,1021},{
+6,0,2037},{133,11,907},{7,0,373},{8,0,335},{8,0,596},{9,0,488},{6,10,1700},{7,10
+,293},{7,10,382},{7,10,1026},{7,10,1087},{7,10,2027},{8,10,252},{8,10,727},{8,10
+,729},{9,10,30},{9,10,199},{9,10,231},{9,10,251},{9,10,334},{9,10,361},{9,10,712
+},{10,10,55},{10,10,60},{10,10,232},{10,10,332},{10,10,384},{10,10,396},{10,10,
+504},{10,10,542},{10,10,652},{11,10,20},{11,10,48},{11,10,207},{11,10,291},{11,
+10,298},{11,10,342},{11,10,365},{11,10,394},{11,10,620},{11,10,705},{11,10,1017}
+,{12,10,123},{12,10,340},{12,10,406},{12,10,643},{13,10,61},{13,10,269},{13,10,
+311},{13,10,319},{13,10,486},{14,10,234},{15,10,62},{15,10,85},{16,10,71},{18,10
+,119},{148,10,105},{150,0,37},{4,11,208},{5,11,106},{6,11,531},{8,11,408},{9,11,
+188},{138,11,572},{132,0,564},{6,0,513},{135,0,1052},{132,0,825},{9,0,899},{140,
+11,441},{134,0,778},{133,11,379},{7,0,1417},{12,0,382},{17,0,48},{152,0,12},{132
+,11,241},{7,0,1116},{6,10,379},{7,10,270},{8,10,176},{8,10,183},{9,10,432},{9,10
+,661},{12,10,247},{12,10,617},{146,10,125},{5,10,792},{133,10,900},{6,0,545},{7,
+0,565},{7,0,1669},{10,0,114},{11,0,642},{140,0,618},{133,0,5},{138,11,7},{132,11
+,259},{135,0,192},{134,0,701},{136,0,763},{135,10,1979},{4,10,901},{133,10,776},
+{10,0,755},{147,0,29},{133,0,759},{4,11,173},{5,11,312},{5,11,512},{135,11,1285}
+,{7,11,1603},{7,11,1691},{9,11,464},{11,11,195},{12,11,279},{12,11,448},{14,11,
+11},{147,11,102},{7,0,370},{7,0,1007},{7,0,1177},{135,0,1565},{135,0,1237},{4,0,
+87},{5,0,250},{141,0,298},{4,11,452},{5,11,583},{5,11,817},{6,11,433},{7,11,593}
+,{7,11,720},{7,11,1378},{8,11,161},{9,11,284},{10,11,313},{139,11,886},{4,11,547
+},{135,11,1409},{136,11,722},{4,10,37},{5,10,334},{135,10,1253},{132,10,508},{12
+,0,107},{146,0,31},{8,11,420},{139,11,193},{135,0,814},{135,11,409},{140,0,991},
+{4,0,57},{7,0,1195},{7,0,1438},{7,0,1548},{7,0,1835},{7,0,1904},{9,0,757},{10,0,
+604},{139,0,519},{132,0,540},{138,11,308},{132,10,533},{136,0,608},{144,11,65},{
+4,0,1014},{134,0,2029},{4,0,209},{7,0,902},{5,11,1002},{136,11,745},{134,0,2030}
+,{6,0,303},{7,0,335},{7,0,1437},{7,0,1668},{8,0,553},{8,0,652},{8,0,656},{9,0,
+558},{11,0,743},{149,0,18},{5,11,575},{6,11,354},{135,11,701},{4,11,239},{6,11,
+477},{7,11,1607},{11,11,68},{139,11,617},{132,0,559},{8,0,527},{18,0,60},{147,0,
+24},{133,10,920},{138,0,511},{133,0,1017},{133,0,675},{138,10,391},{11,0,156},{
+135,10,1952},{138,11,369},{132,11,367},{133,0,709},{6,0,698},{134,0,887},{142,10
+,126},{134,0,1745},{132,10,483},{13,11,299},{142,11,75},{133,0,714},{7,0,8},{136
+,0,206},{138,10,480},{4,11,694},{9,10,495},{146,10,104},{7,11,1248},{11,11,621},
+{139,11,702},{140,11,687},{132,0,776},{139,10,1009},{135,0,1272},{134,0,1059},{8
+,10,653},{13,10,93},{147,10,14},{135,11,213},{136,0,406},{133,10,172},{132,0,947
+},{8,0,175},{10,0,168},{138,0,573},{132,0,870},{6,0,1567},{151,11,28},{134,11,
+472},{5,10,260},{136,11,132},{4,11,751},{11,11,390},{140,11,32},{4,11,409},{133,
+11,78},{12,0,554},{6,11,473},{145,11,105},{133,0,784},{8,0,908},{136,11,306},{
+139,0,882},{6,0,358},{7,0,1393},{8,0,396},{10,0,263},{14,0,154},{16,0,48},{17,0,
+8},{7,11,1759},{8,11,396},{10,11,263},{14,11,154},{16,11,48},{145,11,8},{13,11,
+163},{13,11,180},{18,11,78},{148,11,35},{14,0,32},{18,0,85},{20,0,2},{152,0,16},
+{7,0,228},{10,0,770},{8,10,167},{8,10,375},{9,10,82},{9,10,561},{138,10,620},{
+132,0,845},{9,0,14},{9,0,441},{10,0,306},{139,0,9},{11,0,966},{12,0,287},{13,0,
+342},{13,0,402},{15,0,110},{15,0,163},{8,10,194},{136,10,756},{134,0,1578},{4,0,
+967},{6,0,1820},{6,0,1847},{140,0,716},{136,0,594},{7,0,1428},{7,0,1640},{7,0,
+1867},{9,0,169},{9,0,182},{9,0,367},{9,0,478},{9,0,506},{9,0,551},{9,0,557},{9,0
+,648},{9,0,697},{9,0,705},{9,0,725},{9,0,787},{9,0,794},{10,0,198},{10,0,214},{
+10,0,267},{10,0,275},{10,0,456},{10,0,551},{10,0,561},{10,0,613},{10,0,627},{10,
+0,668},{10,0,675},{10,0,691},{10,0,695},{10,0,707},{10,0,715},{11,0,183},{11,0,
+201},{11,0,244},{11,0,262},{11,0,352},{11,0,439},{11,0,493},{11,0,572},{11,0,591
+},{11,0,608},{11,0,611},{11,0,646},{11,0,674},{11,0,711},{11,0,751},{11,0,761},{
+11,0,776},{11,0,785},{11,0,850},{11,0,853},{11,0,862},{11,0,865},{11,0,868},{11,
+0,875},{11,0,898},{11,0,902},{11,0,903},{11,0,910},{11,0,932},{11,0,942},{11,0,
+957},{11,0,967},{11,0,972},{12,0,148},{12,0,195},{12,0,220},{12,0,237},{12,0,318
+},{12,0,339},{12,0,393},{12,0,445},{12,0,450},{12,0,474},{12,0,505},{12,0,509},{
+12,0,533},{12,0,591},{12,0,594},{12,0,597},{12,0,621},{12,0,633},{12,0,642},{13,
+0,59},{13,0,60},{13,0,145},{13,0,239},{13,0,250},{13,0,329},{13,0,344},{13,0,365
+},{13,0,372},{13,0,387},{13,0,403},{13,0,414},{13,0,456},{13,0,470},{13,0,478},{
+13,0,483},{13,0,489},{14,0,55},{14,0,57},{14,0,81},{14,0,90},{14,0,148},{14,0,
+239},{14,0,266},{14,0,321},{14,0,326},{14,0,327},{14,0,330},{14,0,347},{14,0,355
+},{14,0,401},{14,0,404},{14,0,411},{14,0,414},{14,0,416},{14,0,420},{15,0,61},{
+15,0,74},{15,0,87},{15,0,88},{15,0,94},{15,0,96},{15,0,116},{15,0,149},{15,0,154
+},{16,0,50},{16,0,63},{16,0,73},{17,0,2},{17,0,66},{17,0,92},{17,0,103},{17,0,
+112},{17,0,120},{18,0,50},{18,0,54},{18,0,82},{18,0,86},{18,0,90},{18,0,111},{18
+,0,115},{18,0,156},{19,0,40},{19,0,79},{20,0,78},{21,0,22},{135,11,883},{5,0,161
+},{135,0,839},{4,0,782},{13,11,293},{142,11,56},{133,11,617},{139,11,50},{135,10
+,22},{145,0,64},{5,10,639},{7,10,1249},{139,10,896},{138,0,998},{135,11,2042},{4
+,11,546},{142,11,233},{6,0,1043},{134,0,1574},{134,0,1496},{4,10,102},{7,10,815}
+,{7,10,1699},{139,10,964},{12,0,781},{142,0,461},{4,11,313},{133,11,577},{6,0,
+639},{6,0,1114},{137,0,817},{8,11,184},{141,11,433},{7,0,1814},{135,11,935},{10,
+0,997},{140,0,958},{4,0,812},{137,11,625},{132,10,899},{136,10,795},{5,11,886},{
+6,11,46},{6,11,1790},{7,11,14},{7,11,732},{7,11,1654},{8,11,95},{8,11,327},{8,11
+,616},{10,11,598},{10,11,769},{11,11,134},{11,11,747},{12,11,378},{142,11,97},{
+136,0,139},{6,10,52},{9,10,104},{9,10,559},{12,10,308},{147,10,87},{133,11,1021}
+,{132,10,604},{132,10,301},{136,10,779},{7,0,643},{136,0,236},{132,11,153},{134,
+0,1172},{147,10,32},{133,11,798},{6,0,1338},{132,11,587},{6,11,598},{7,11,42},{8
+,11,695},{10,11,212},{11,11,158},{14,11,196},{145,11,85},{135,10,508},{5,11,957}
+,{5,11,1008},{135,11,249},{4,11,129},{135,11,465},{5,0,54},{7,11,470},{7,11,1057
+},{7,11,1201},{9,11,755},{11,11,906},{140,11,527},{7,11,908},{146,11,7},{5,11,
+148},{136,11,450},{144,11,1},{4,0,256},{135,0,1488},{9,0,351},{6,10,310},{7,10,
+1849},{8,10,72},{8,10,272},{8,10,431},{9,10,12},{10,10,563},{10,10,630},{10,10,
+796},{10,10,810},{11,10,367},{11,10,599},{11,10,686},{140,10,672},{6,0,1885},{6,
+0,1898},{6,0,1899},{140,0,955},{4,0,714},{133,0,469},{6,0,1270},{134,0,1456},{
+132,0,744},{6,0,313},{7,10,537},{8,10,64},{9,10,127},{10,10,496},{12,10,510},{
+141,10,384},{4,11,217},{4,10,244},{5,11,710},{7,10,233},{7,11,1926},{9,11,428},{
+9,11,708},{10,11,254},{10,11,296},{10,11,720},{11,11,109},{11,11,255},{12,11,165
+},{12,11,315},{13,11,107},{13,11,203},{14,11,54},{14,11,99},{14,11,114},{14,11,
+388},{16,11,85},{17,11,9},{17,11,33},{20,11,25},{20,11,28},{20,11,29},{21,11,9},
+{21,11,10},{21,11,34},{150,11,17},{138,0,402},{7,0,969},{146,0,55},{8,0,50},{137
+,0,624},{134,0,1355},{132,0,572},{134,10,1650},{10,10,702},{139,10,245},{10,0,
+847},{142,0,445},{6,0,43},{7,0,38},{8,0,248},{138,0,513},{133,0,369},{137,10,338
+},{133,0,766},{133,0,363},{133,10,896},{8,11,392},{11,11,54},{13,11,173},{13,11,
+294},{148,11,7},{134,0,678},{7,11,1230},{136,11,531},{6,0,258},{140,0,409},{5,0,
+249},{148,0,82},{7,10,1117},{136,10,539},{5,0,393},{6,0,378},{7,0,1981},{9,0,32}
+,{9,0,591},{10,0,685},{10,0,741},{142,0,382},{133,0,788},{134,0,1281},{134,0,
+1295},{7,0,1968},{141,0,509},{4,0,61},{5,0,58},{5,0,171},{5,0,683},{6,0,291},{6,
+0,566},{7,0,1650},{11,0,523},{12,0,273},{12,0,303},{15,0,39},{143,0,111},{6,0,
+706},{134,0,1283},{134,0,589},{135,11,1433},{133,11,435},{7,0,1059},{13,0,54},{5
+,10,4},{5,10,810},{6,10,13},{6,10,538},{6,10,1690},{6,10,1726},{7,10,1819},{8,10
+,148},{8,10,696},{8,10,791},{12,10,125},{143,10,9},{135,10,1268},{5,11,85},{6,11
+,419},{7,11,134},{7,11,305},{7,11,361},{7,11,1337},{8,11,71},{140,11,519},{137,0
+,824},{140,11,688},{5,11,691},{7,11,345},{7,10,1385},{9,11,94},{11,10,582},{11,
+10,650},{11,10,901},{11,10,949},{12,11,169},{12,10,232},{12,10,236},{13,10,413},
+{13,10,501},{146,10,116},{4,0,917},{133,0,1005},{7,0,1598},{5,11,183},{6,11,582}
+,{9,11,344},{10,11,679},{140,11,435},{4,10,925},{5,10,803},{8,10,698},{138,10,
+828},{132,0,919},{135,11,511},{139,10,992},{4,0,255},{5,0,302},{6,0,132},{7,0,
+128},{7,0,283},{7,0,1299},{10,0,52},{10,0,514},{11,0,925},{13,0,92},{142,0,309},
+{134,0,1369},{135,10,1847},{134,0,328},{7,11,1993},{136,11,684},{133,10,383},{
+137,0,173},{134,11,583},{134,0,1411},{19,0,65},{5,11,704},{8,11,357},{10,11,745}
+,{14,11,426},{17,11,94},{147,11,57},{9,10,660},{138,10,347},{4,11,179},{5,11,198
+},{133,11,697},{7,11,347},{7,11,971},{8,11,181},{138,11,711},{141,0,442},{11,0,
+842},{11,0,924},{13,0,317},{13,0,370},{13,0,469},{13,0,471},{14,0,397},{18,0,69}
+,{18,0,145},{7,10,572},{9,10,592},{11,10,680},{12,10,356},{140,10,550},{14,11,19
+},{14,11,28},{144,11,29},{136,0,534},{4,11,243},{5,11,203},{7,11,19},{7,11,71},{
+7,11,113},{10,11,405},{11,11,357},{142,11,240},{6,0,210},{10,0,845},{138,0,862},
+{7,11,1351},{9,11,581},{10,11,639},{11,11,453},{140,11,584},{7,11,1450},{139,11,
+99},{10,0,892},{12,0,719},{144,0,105},{4,0,284},{6,0,223},{134,11,492},{5,11,134
+},{6,11,408},{6,11,495},{135,11,1593},{136,0,529},{137,0,807},{4,0,218},{7,0,526
+},{143,0,137},{6,0,1444},{142,11,4},{132,11,665},{4,0,270},{5,0,192},{6,0,332},{
+7,0,1322},{4,11,248},{7,11,137},{137,11,349},{140,0,661},{7,0,1517},{11,0,597},{
+14,0,76},{14,0,335},{20,0,33},{7,10,748},{139,10,700},{5,11,371},{135,11,563},{
+146,11,57},{133,10,127},{133,0,418},{4,11,374},{7,11,547},{7,11,1700},{7,11,1833
+},{139,11,858},{6,10,198},{140,10,83},{7,11,1812},{13,11,259},{13,11,356},{14,11
+,242},{147,11,114},{7,0,379},{8,0,481},{9,0,377},{5,10,276},{6,10,55},{135,10,
+1369},{138,11,286},{5,0,1003},{6,0,149},{6,10,1752},{136,10,726},{8,0,262},{9,0,
+627},{10,0,18},{11,0,214},{11,0,404},{11,0,457},{11,0,780},{11,0,913},{13,0,401}
+,{14,0,200},{6,11,1647},{7,11,1552},{7,11,2010},{9,11,494},{137,11,509},{135,0,
+742},{136,0,304},{132,0,142},{133,10,764},{6,10,309},{7,10,331},{138,10,550},{
+135,10,1062},{6,11,123},{7,11,214},{7,10,986},{9,11,728},{10,11,157},{11,11,346}
+,{11,11,662},{143,11,106},{135,10,1573},{7,0,925},{137,0,799},{4,0,471},{5,0,51}
+,{6,0,602},{8,0,484},{138,0,195},{136,0,688},{132,0,697},{6,0,1169},{6,0,1241},{
+6,10,194},{7,10,133},{10,10,493},{10,10,570},{139,10,664},{140,0,751},{7,0,929},
+{10,0,452},{11,0,878},{16,0,33},{5,10,24},{5,10,569},{6,10,3},{6,10,119},{6,10,
+143},{6,10,440},{7,10,599},{7,10,1686},{7,10,1854},{8,10,424},{9,10,43},{9,10,
+584},{9,10,760},{10,10,328},{11,10,159},{11,10,253},{12,10,487},{140,10,531},{4,
+11,707},{13,11,106},{18,11,49},{147,11,41},{5,0,221},{5,11,588},{134,11,393},{
+134,0,1437},{6,11,211},{7,11,1690},{11,11,486},{140,11,369},{5,10,14},{5,10,892}
+,{6,10,283},{7,10,234},{136,10,537},{4,0,988},{136,0,955},{135,0,1251},{4,10,126
+},{8,10,635},{147,10,34},{4,10,316},{135,10,1561},{137,10,861},{4,10,64},{5,10,
+352},{5,10,720},{6,10,368},{139,10,359},{134,0,192},{4,0,132},{5,0,69},{135,0,
+1242},{7,10,1577},{10,10,304},{10,10,549},{12,10,365},{13,10,220},{13,10,240},{
+142,10,33},{4,0,111},{7,0,865},{134,11,219},{5,11,582},{6,11,1646},{7,11,99},{7,
+11,1962},{7,11,1986},{8,11,515},{8,11,773},{9,11,23},{9,11,491},{12,11,620},{14,
+11,52},{145,11,50},{132,0,767},{7,11,568},{148,11,21},{6,0,42},{7,0,1416},{7,0,
+2005},{8,0,131},{8,0,466},{9,0,672},{13,0,252},{20,0,103},{133,11,851},{135,0,
+1050},{6,10,175},{137,10,289},{5,10,432},{133,10,913},{6,0,44},{136,0,368},{135,
+11,784},{132,0,570},{133,0,120},{139,10,595},{140,0,29},{6,0,227},{135,0,1589},{
+4,11,98},{7,11,1365},{9,11,422},{9,11,670},{10,11,775},{11,11,210},{13,11,26},{
+13,11,457},{141,11,476},{140,10,80},{5,10,931},{134,10,1698},{133,0,522},{134,0,
+1120},{135,0,1529},{12,0,739},{14,0,448},{142,0,467},{11,10,526},{11,10,939},{
+141,10,290},{5,10,774},{6,10,1637},{6,10,1686},{134,10,1751},{6,0,1667},{135,0,
+2036},{7,10,1167},{11,10,934},{13,10,391},{145,10,76},{137,11,147},{6,10,260},{7
+,10,1484},{11,11,821},{12,11,110},{12,11,153},{18,11,41},{150,11,19},{6,0,511},{
+12,0,132},{134,10,573},{5,0,568},{6,0,138},{135,0,1293},{132,0,1020},{8,0,258},{
+9,0,208},{137,0,359},{4,0,565},{8,0,23},{136,0,827},{134,0,344},{4,0,922},{5,0,
+1023},{13,11,477},{14,11,120},{148,11,61},{134,0,240},{5,11,209},{6,11,30},{11,
+11,56},{139,11,305},{6,0,171},{7,0,1002},{7,0,1324},{9,0,415},{14,0,230},{18,0,
+68},{4,10,292},{4,10,736},{5,10,871},{6,10,1689},{7,10,1944},{137,10,580},{9,11,
+635},{139,11,559},{4,11,150},{5,11,303},{134,11,327},{6,10,63},{135,10,920},{133
+,10,793},{8,11,192},{10,11,78},{10,11,555},{11,11,308},{13,11,359},{147,11,95},{
+135,11,786},{135,11,1712},{136,0,402},{6,0,754},{6,11,1638},{7,11,79},{7,11,496}
+,{9,11,138},{10,11,336},{11,11,12},{12,11,412},{12,11,440},{142,11,305},{4,0,716
+},{141,0,31},{133,0,982},{8,0,691},{8,0,731},{5,10,67},{6,10,62},{6,10,374},{135
+,10,1391},{9,10,790},{140,10,47},{139,11,556},{151,11,1},{7,11,204},{7,11,415},{
+8,11,42},{10,11,85},{11,11,33},{11,11,564},{12,11,571},{149,11,1},{8,0,888},{7,
+11,610},{135,11,1501},{4,10,391},{135,10,1169},{5,0,847},{9,0,840},{138,0,803},{
+137,0,823},{134,0,785},{8,0,152},{9,0,53},{9,0,268},{9,0,901},{10,0,518},{10,0,
+829},{11,0,188},{13,0,74},{14,0,46},{15,0,17},{15,0,33},{17,0,40},{18,0,36},{19,
+0,20},{22,0,1},{152,0,2},{4,11,3},{5,11,247},{5,11,644},{7,11,744},{7,11,1207},{
+7,11,1225},{7,11,1909},{146,11,147},{136,0,532},{135,0,681},{132,10,271},{140,0,
+314},{140,0,677},{4,0,684},{136,0,384},{5,11,285},{9,11,67},{13,11,473},{143,11,
+82},{4,10,253},{5,10,544},{7,10,300},{137,10,340},{7,0,110},{7,0,447},{8,0,290},
+{8,0,591},{9,0,382},{9,0,649},{11,0,71},{11,0,155},{11,0,313},{12,0,5},{13,0,325
+},{142,0,287},{134,0,1818},{136,0,1007},{138,0,321},{7,0,360},{7,0,425},{9,0,66}
+,{9,0,278},{138,0,644},{133,10,818},{5,0,385},{5,10,541},{6,10,94},{6,10,499},{7
+,10,230},{139,10,321},{4,10,920},{5,10,25},{5,10,790},{6,10,457},{7,10,853},{136
+,10,788},{4,0,900},{133,0,861},{5,0,254},{7,0,985},{136,0,73},{7,0,1959},{136,0,
+683},{134,10,1765},{133,10,822},{132,10,634},{4,11,29},{6,11,532},{7,11,1628},{7
+,11,1648},{9,11,303},{9,11,350},{10,11,433},{11,11,97},{11,11,557},{11,11,745},{
+12,11,289},{12,11,335},{12,11,348},{12,11,606},{13,11,116},{13,11,233},{13,11,
+466},{14,11,181},{14,11,209},{14,11,232},{14,11,236},{14,11,300},{16,11,41},{148
+,11,97},{19,0,86},{6,10,36},{7,10,658},{136,10,454},{135,11,1692},{132,0,725},{5
+,11,501},{7,11,1704},{9,11,553},{11,11,520},{12,11,557},{141,11,249},{134,0,196}
+,{133,0,831},{136,0,723},{7,0,1897},{13,0,80},{13,0,437},{145,0,74},{4,0,992},{6
+,0,627},{136,0,994},{135,11,1294},{132,10,104},{5,0,848},{6,0,66},{136,0,764},{4
+,0,36},{7,0,1387},{10,0,205},{139,0,755},{6,0,1046},{134,0,1485},{134,0,950},{
+132,0,887},{14,0,450},{148,0,111},{7,0,620},{7,0,831},{9,10,542},{9,10,566},{138
+,10,728},{6,0,165},{138,0,388},{139,10,263},{4,0,719},{135,0,155},{138,10,468},{
+6,11,453},{144,11,36},{134,11,129},{5,0,533},{7,0,755},{138,0,780},{134,0,1465},
+{4,0,353},{6,0,146},{6,0,1789},{7,0,427},{7,0,990},{7,0,1348},{9,0,665},{9,0,898
+},{11,0,893},{142,0,212},{7,10,87},{142,10,288},{4,0,45},{135,0,1257},{12,0,7},{
+7,10,988},{7,10,1939},{9,10,64},{9,10,502},{12,10,34},{13,10,12},{13,10,234},{
+147,10,77},{4,0,607},{5,11,60},{6,11,504},{7,11,614},{7,11,1155},{140,11,0},{135
+,10,141},{8,11,198},{11,11,29},{140,11,534},{140,0,65},{136,0,816},{132,10,619},
+{139,0,88},{5,10,246},{8,10,189},{9,10,355},{9,10,512},{10,10,124},{10,10,453},{
+11,10,143},{11,10,416},{11,10,859},{141,10,341},{4,11,379},{135,11,1397},{4,0,
+600},{137,0,621},{133,0,367},{134,0,561},{6,0,559},{134,0,1691},{6,0,585},{134,
+11,585},{135,11,1228},{4,11,118},{5,10,678},{6,11,274},{6,11,361},{7,11,75},{141
+,11,441},{135,11,1818},{137,11,841},{5,0,573},{6,0,287},{7,10,862},{7,10,1886},{
+138,10,179},{132,10,517},{140,11,693},{5,11,314},{6,11,221},{7,11,419},{10,11,
+650},{11,11,396},{12,11,156},{13,11,369},{14,11,333},{145,11,47},{140,10,540},{
+136,10,667},{11,10,403},{146,10,83},{6,0,672},{133,10,761},{9,0,157},{10,10,131}
+,{140,10,72},{7,0,714},{134,11,460},{134,0,456},{133,0,925},{5,11,682},{135,11,
+1887},{136,11,510},{136,11,475},{133,11,1016},{9,0,19},{7,11,602},{8,11,179},{10
+,11,781},{140,11,126},{6,11,329},{138,11,111},{6,0,822},{134,0,1473},{144,11,86}
+,{11,0,113},{139,11,113},{5,11,821},{134,11,1687},{133,10,449},{7,0,463},{17,0,
+69},{136,10,103},{7,10,2028},{138,10,641},{6,0,193},{7,0,240},{7,0,1682},{10,0,
+51},{10,0,640},{11,0,410},{13,0,82},{14,0,247},{14,0,331},{142,0,377},{6,0,471},
+{11,0,411},{142,0,2},{5,11,71},{7,11,1407},{9,11,388},{9,11,704},{10,11,261},{10
+,11,619},{11,11,547},{11,11,619},{143,11,157},{136,0,633},{135,0,1148},{6,0,554}
+,{7,0,1392},{12,0,129},{7,10,1274},{7,10,1386},{7,11,2008},{9,11,337},{10,11,517
+},{146,10,87},{7,0,803},{8,0,542},{6,10,187},{7,10,1203},{8,10,380},{14,10,117},
+{149,10,28},{6,10,297},{7,10,793},{139,10,938},{8,0,438},{11,0,363},{7,10,464},{
+11,10,105},{12,10,231},{14,10,386},{15,10,102},{148,10,75},{5,11,16},{6,11,86},{
+6,11,603},{7,11,292},{7,11,561},{8,11,257},{8,11,382},{9,11,721},{9,11,778},{11,
+11,581},{140,11,466},{6,0,717},{4,11,486},{133,11,491},{132,0,875},{132,11,72},{
+6,11,265},{135,11,847},{4,0,237},{135,0,514},{6,0,392},{7,0,65},{135,0,2019},{
+140,11,261},{135,11,922},{137,11,404},{12,0,563},{14,0,101},{18,0,129},{7,10,
+1010},{11,10,733},{11,10,759},{13,10,34},{146,10,45},{7,10,1656},{9,10,369},{10,
+10,338},{10,10,490},{11,10,154},{11,10,545},{11,10,775},{13,10,77},{141,10,274},
+{4,0,444},{10,0,146},{140,0,9},{139,11,163},{7,0,1260},{135,0,1790},{9,0,222},{
+10,0,43},{139,0,900},{137,11,234},{138,0,971},{137,0,761},{134,0,699},{136,11,
+434},{6,0,1116},{7,0,1366},{5,10,20},{6,11,197},{6,10,298},{7,10,659},{8,11,205}
+,{137,10,219},{132,11,490},{11,11,820},{150,11,51},{7,10,1440},{11,10,854},{11,
+10,872},{11,10,921},{12,10,551},{13,10,472},{142,10,367},{140,11,13},{132,0,829}
+,{12,0,242},{132,10,439},{136,10,669},{6,0,593},{6,11,452},{7,11,312},{138,11,
+219},{4,11,333},{9,11,176},{12,11,353},{141,11,187},{7,0,36},{8,0,201},{136,0,
+605},{140,0,224},{132,10,233},{134,0,1430},{134,0,1806},{4,0,523},{133,0,638},{6
+,0,1889},{9,0,958},{9,0,971},{9,0,976},{12,0,796},{12,0,799},{12,0,808},{12,0,
+835},{12,0,836},{12,0,914},{12,0,946},{15,0,216},{15,0,232},{18,0,183},{18,0,187
+},{18,0,194},{18,0,212},{18,0,232},{149,0,49},{132,10,482},{6,0,827},{134,0,1434
+},{135,10,346},{134,0,2043},{6,0,242},{7,0,227},{7,0,1581},{8,0,104},{9,0,113},{
+9,0,220},{9,0,427},{10,0,136},{10,0,239},{11,0,579},{11,0,1023},{13,0,4},{13,0,
+204},{13,0,316},{148,0,86},{134,11,1685},{7,0,148},{8,0,284},{141,0,63},{142,0,
+10},{135,11,584},{134,0,1249},{7,0,861},{135,10,334},{5,10,795},{6,10,1741},{137
+,11,70},{132,0,807},{7,11,135},{8,11,7},{8,11,62},{9,11,243},{10,11,658},{10,11,
+697},{11,11,456},{139,11,756},{9,11,395},{138,11,79},{137,11,108},{147,0,94},{
+136,0,494},{135,11,631},{135,10,622},{7,0,1510},{135,10,1750},{4,10,203},{135,10
+,1936},{7,11,406},{7,11,459},{8,11,606},{139,11,726},{7,0,1306},{8,0,505},{9,0,
+482},{10,0,126},{11,0,225},{12,0,347},{12,0,449},{13,0,19},{14,0,218},{142,0,435
+},{5,0,268},{10,0,764},{12,0,120},{13,0,39},{145,0,127},{142,11,68},{11,10,678},
+{140,10,307},{12,11,268},{12,11,640},{142,11,119},{135,10,2044},{133,11,612},{4,
+11,372},{7,11,482},{8,11,158},{9,11,602},{9,11,615},{10,11,245},{10,11,678},{10,
+11,744},{11,11,248},{139,11,806},{7,10,311},{9,10,308},{140,10,255},{4,0,384},{
+135,0,1022},{5,11,854},{135,11,1991},{135,10,1266},{4,10,400},{5,10,267},{135,10
+,232},{135,0,1703},{9,0,159},{11,0,661},{140,0,603},{4,0,964},{14,0,438},{14,0,
+444},{14,0,456},{22,0,60},{22,0,63},{9,11,106},{9,11,163},{9,11,296},{10,11,167}
+,{10,11,172},{10,11,777},{139,11,16},{136,0,583},{132,0,515},{8,0,632},{8,0,697}
+,{137,0,854},{5,11,195},{135,11,1685},{6,0,1123},{134,0,1365},{134,11,328},{7,11
+,1997},{8,11,730},{139,11,1006},{4,0,136},{133,0,551},{134,0,1782},{7,0,1287},{9
+,0,44},{10,0,552},{10,0,642},{11,0,839},{12,0,274},{12,0,275},{12,0,372},{13,0,
+91},{142,0,125},{5,11,751},{11,11,797},{140,11,203},{133,0,732},{7,0,679},{8,0,
+313},{4,10,100},{135,11,821},{10,0,361},{142,0,316},{134,0,595},{6,0,147},{7,0,
+886},{9,0,753},{138,0,268},{5,10,362},{5,10,443},{6,10,318},{7,10,1019},{139,10,
+623},{5,10,463},{136,10,296},{4,10,454},{5,11,950},{5,11,994},{134,11,351},{138,
+0,137},{5,10,48},{5,10,404},{6,10,557},{7,10,458},{8,10,597},{10,10,455},{10,10,
+606},{11,10,49},{11,10,548},{12,10,476},{13,10,18},{141,10,450},{133,0,414},{135
+,0,1762},{5,11,421},{135,11,47},{5,10,442},{135,10,1984},{134,0,599},{134,0,1749
+},{134,0,1627},{4,0,488},{132,11,350},{137,11,751},{132,0,83},{140,0,676},{133,
+11,967},{7,0,1639},{5,10,55},{140,10,161},{4,11,473},{7,11,623},{8,11,808},{9,11
+,871},{9,11,893},{11,11,38},{11,11,431},{12,11,112},{12,11,217},{12,11,243},{12,
+11,562},{12,11,683},{13,11,141},{13,11,197},{13,11,227},{13,11,406},{13,11,487},
+{14,11,156},{14,11,203},{14,11,224},{14,11,256},{18,11,58},{150,11,0},{133,10,
+450},{7,11,736},{139,11,264},{134,0,278},{4,11,222},{7,11,286},{136,11,629},{135
+,10,869},{140,0,97},{144,0,14},{134,0,1085},{4,10,213},{7,10,223},{136,10,80},{7
+,0,388},{7,0,644},{139,0,781},{132,0,849},{7,0,229},{8,0,59},{9,0,190},{10,0,378
+},{140,0,191},{7,10,381},{7,10,806},{7,10,820},{8,10,354},{8,10,437},{8,10,787},
+{9,10,657},{10,10,58},{10,10,339},{10,10,749},{11,10,914},{12,10,162},{13,10,75}
+,{14,10,106},{14,10,198},{14,10,320},{14,10,413},{146,10,43},{141,11,306},{136,
+10,747},{134,0,1115},{16,0,94},{16,0,108},{136,11,146},{6,0,700},{6,0,817},{134,
+0,1002},{133,10,692},{4,11,465},{135,11,1663},{134,10,191},{6,0,1414},{135,11,
+913},{132,0,660},{7,0,1035},{138,0,737},{6,10,162},{7,10,1960},{136,10,831},{132
+,10,706},{7,0,690},{9,0,217},{9,0,587},{140,0,521},{138,10,426},{135,10,1235},{6
+,11,82},{7,11,138},{7,11,517},{9,11,673},{139,11,238},{138,0,272},{5,11,495},{7,
+11,834},{9,11,733},{139,11,378},{134,0,1744},{132,0,1011},{7,11,828},{142,11,116
+},{4,0,733},{9,0,194},{10,0,92},{11,0,198},{12,0,84},{13,0,128},{133,11,559},{10
+,0,57},{10,0,277},{6,11,21},{6,11,1737},{7,11,1444},{136,11,224},{4,10,204},{137
+,10,902},{136,10,833},{11,0,348},{12,0,99},{18,0,1},{18,0,11},{19,0,4},{7,10,366
+},{9,10,287},{12,10,199},{12,10,556},{140,10,577},{6,0,1981},{136,0,936},{21,0,
+33},{150,0,40},{5,11,519},{138,11,204},{5,10,356},{135,10,224},{134,0,775},{135,
+0,306},{7,10,630},{9,10,567},{11,10,150},{11,10,444},{141,10,119},{5,0,979},{134
+,10,539},{133,0,611},{4,11,402},{135,11,1679},{5,0,178},{7,11,2},{8,11,323},{136
+,11,479},{5,11,59},{135,11,672},{4,0,1010},{6,0,1969},{138,11,237},{133,11,412},
+{146,11,34},{7,11,1740},{146,11,48},{134,0,664},{139,10,814},{4,11,85},{135,11,
+549},{133,11,94},{133,11,457},{132,0,390},{134,0,1510},{4,10,235},{135,10,255},{
+4,10,194},{5,10,584},{6,11,11},{6,10,384},{7,11,187},{7,10,583},{10,10,761},{11,
+10,760},{139,10,851},{4,11,522},{139,11,802},{135,0,493},{10,11,776},{13,11,345}
+,{142,11,425},{146,0,37},{4,11,52},{135,11,661},{134,0,724},{134,0,829},{133,11,
+520},{133,10,562},{4,11,281},{5,11,38},{7,11,194},{7,11,668},{7,11,1893},{137,11
+,397},{5,10,191},{137,10,271},{7,0,1537},{14,0,96},{143,0,73},{5,0,473},{11,0,
+168},{4,10,470},{6,10,153},{7,10,1503},{7,10,1923},{10,10,701},{11,10,132},{11,
+10,227},{11,10,320},{11,10,436},{11,10,525},{11,10,855},{12,10,41},{12,10,286},{
+13,10,103},{13,10,284},{14,10,255},{14,10,262},{15,10,117},{143,10,127},{133,0,
+105},{5,0,438},{9,0,694},{12,0,627},{141,0,210},{133,10,327},{6,10,552},{7,10,
+1754},{137,10,604},{134,0,1256},{152,0,11},{5,11,448},{11,11,98},{139,11,524},{7
+,0,1626},{5,10,80},{6,10,405},{7,10,403},{7,10,1502},{8,10,456},{9,10,487},{9,10
+,853},{9,10,889},{10,10,309},{11,10,721},{11,10,994},{12,10,430},{13,10,165},{14
+,11,16},{146,11,44},{132,0,779},{8,0,25},{138,0,826},{4,10,453},{5,10,887},{6,10
+,535},{8,10,6},{8,10,543},{136,10,826},{137,11,461},{140,11,632},{132,0,308},{
+135,0,741},{132,0,671},{7,0,150},{8,0,649},{136,0,1020},{9,0,99},{6,11,336},{8,
+11,552},{9,11,285},{10,11,99},{139,11,568},{134,0,521},{5,0,339},{14,0,3},{15,0,
+41},{15,0,166},{147,0,66},{6,11,423},{7,11,665},{7,11,1210},{9,11,218},{141,11,
+222},{6,0,543},{5,10,101},{5,11,256},{6,10,88},{7,10,1677},{9,10,100},{10,10,677
+},{14,10,169},{14,10,302},{14,10,313},{15,10,48},{143,10,84},{4,10,310},{7,10,
+708},{7,10,996},{9,10,795},{10,10,390},{10,10,733},{11,10,451},{12,10,249},{14,
+10,115},{14,10,286},{143,10,100},{133,10,587},{13,11,417},{14,11,129},{143,11,15
+},{134,0,1358},{136,11,554},{132,10,498},{7,10,217},{8,10,140},{138,10,610},{135
+,11,989},{135,11,634},{6,0,155},{140,0,234},{135,11,462},{132,11,618},{134,0,
+1628},{132,0,766},{4,11,339},{5,10,905},{135,11,259},{135,0,829},{4,11,759},{141
+,11,169},{7,0,1445},{4,10,456},{7,10,358},{7,10,1637},{8,10,643},{139,10,483},{5
+,0,486},{135,0,1349},{5,11,688},{135,11,712},{7,0,1635},{8,0,17},{10,0,217},{10,
+0,295},{12,0,2},{140,11,2},{138,0,558},{150,10,56},{4,11,278},{5,11,465},{135,11
+,1367},{136,11,482},{133,10,535},{6,0,1362},{6,0,1461},{10,11,274},{10,11,625},{
+139,11,530},{5,0,599},{5,11,336},{6,11,341},{6,11,478},{6,11,1763},{136,11,386},
+{7,10,1748},{137,11,151},{134,0,1376},{133,10,539},{135,11,73},{135,11,1971},{
+139,11,283},{9,0,93},{139,0,474},{6,10,91},{135,10,435},{6,0,447},{5,11,396},{
+134,11,501},{4,10,16},{5,10,316},{5,10,842},{6,10,370},{6,10,1778},{8,10,166},{
+11,10,812},{12,10,206},{12,10,351},{14,10,418},{16,10,15},{16,10,34},{18,10,3},{
+19,10,3},{19,10,7},{20,10,4},{149,10,21},{7,0,577},{7,0,1432},{9,0,475},{9,0,505
+},{9,0,526},{9,0,609},{9,0,689},{9,0,726},{9,0,735},{9,0,738},{10,0,556},{10,0,
+674},{10,0,684},{11,0,89},{11,0,202},{11,0,272},{11,0,380},{11,0,415},{11,0,505}
+,{11,0,537},{11,0,550},{11,0,562},{11,0,640},{11,0,667},{11,0,688},{11,0,847},{
+11,0,927},{11,0,930},{11,0,940},{12,0,144},{12,0,325},{12,0,329},{12,0,389},{12,
+0,403},{12,0,451},{12,0,515},{12,0,604},{12,0,616},{12,0,626},{13,0,66},{13,0,
+131},{13,0,167},{13,0,236},{13,0,368},{13,0,411},{13,0,434},{13,0,453},{13,0,461
+},{13,0,474},{14,0,59},{14,0,60},{14,0,139},{14,0,152},{14,0,276},{14,0,353},{14
+,0,402},{15,0,28},{15,0,81},{15,0,123},{15,0,152},{18,0,136},{148,0,88},{4,11,
+929},{133,11,799},{136,11,46},{142,0,307},{4,0,609},{7,0,756},{9,0,544},{11,0,
+413},{144,0,25},{10,0,687},{7,10,619},{10,10,547},{11,10,122},{140,10,601},{4,0,
+930},{133,0,947},{133,0,939},{142,0,21},{4,11,892},{133,11,770},{133,0,962},{5,0
+,651},{8,0,170},{9,0,61},{9,0,63},{10,0,23},{10,0,37},{10,0,834},{11,0,4},{11,0,
+187},{11,0,281},{11,0,503},{11,0,677},{12,0,96},{12,0,130},{12,0,244},{14,0,5},{
+14,0,40},{14,0,162},{14,0,202},{146,0,133},{4,0,406},{5,0,579},{12,0,492},{150,0
+,15},{135,11,158},{135,0,597},{132,0,981},{132,10,888},{4,10,149},{138,10,368},{
+132,0,545},{4,10,154},{7,10,1134},{136,10,105},{135,11,2001},{134,0,1558},{4,10,
+31},{6,10,429},{7,10,962},{9,10,458},{139,10,691},{132,10,312},{135,10,1642},{6,
+0,17},{6,0,1304},{7,0,16},{7,0,1001},{9,0,886},{10,0,489},{10,0,800},{11,0,782},
+{12,0,320},{13,0,467},{14,0,145},{14,0,387},{143,0,119},{135,0,1982},{17,0,17},{
+7,11,1461},{140,11,91},{4,10,236},{132,11,602},{138,0,907},{136,0,110},{7,0,272}
+,{19,0,53},{5,10,836},{5,10,857},{134,10,1680},{5,0,458},{7,11,1218},{136,11,303
+},{7,0,1983},{8,0,0},{8,0,171},{9,0,120},{9,0,732},{10,0,473},{11,0,656},{11,0,
+998},{18,0,0},{18,0,2},{19,0,21},{10,10,68},{139,10,494},{137,11,662},{4,11,13},
+{5,11,567},{7,11,1498},{9,11,124},{11,11,521},{140,11,405},{4,10,81},{139,10,867
+},{135,11,1006},{7,11,800},{7,11,1783},{138,11,12},{9,0,295},{10,0,443},{5,10,
+282},{8,10,650},{137,10,907},{132,11,735},{4,11,170},{4,10,775},{135,11,323},{6,
+0,1844},{10,0,924},{11,11,844},{12,11,104},{140,11,625},{5,11,304},{7,11,1403},{
+140,11,498},{134,0,1232},{4,0,519},{10,0,70},{12,0,26},{14,0,17},{14,0,178},{15,
+0,34},{149,0,12},{132,0,993},{4,11,148},{133,11,742},{6,0,31},{7,0,491},{7,0,530
+},{8,0,592},{11,0,53},{11,0,779},{12,0,167},{12,0,411},{14,0,14},{14,0,136},{15,
+0,72},{16,0,17},{144,0,72},{133,0,907},{134,0,733},{133,11,111},{4,10,71},{5,10,
+376},{7,10,119},{138,10,665},{136,0,55},{8,0,430},{136,11,430},{4,0,208},{5,0,
+106},{6,0,531},{8,0,408},{9,0,188},{138,0,572},{12,0,56},{11,10,827},{14,10,34},
+{143,10,148},{134,0,1693},{133,11,444},{132,10,479},{140,0,441},{9,0,449},{10,0,
+192},{138,0,740},{134,0,928},{4,0,241},{7,10,607},{136,10,99},{8,11,123},{15,11,
+6},{144,11,7},{6,11,285},{8,11,654},{11,11,749},{12,11,190},{12,11,327},{13,11,
+120},{13,11,121},{13,11,327},{15,11,47},{146,11,40},{4,10,41},{5,10,74},{7,10,
+1627},{11,10,871},{140,10,619},{7,0,1525},{11,10,329},{11,10,965},{12,10,241},{
+14,10,354},{15,10,22},{148,10,63},{132,0,259},{135,11,183},{9,10,209},{137,10,
+300},{5,11,937},{135,11,100},{133,10,98},{4,0,173},{5,0,312},{5,0,512},{135,0,
+1285},{141,0,185},{7,0,1603},{7,0,1691},{9,0,464},{11,0,195},{12,0,279},{12,0,
+448},{14,0,11},{147,0,102},{135,0,1113},{133,10,984},{4,0,452},{5,0,583},{135,0,
+720},{4,0,547},{5,0,817},{6,0,433},{7,0,593},{7,0,1378},{8,0,161},{9,0,284},{10,
+0,313},{139,0,886},{8,0,722},{4,10,182},{6,10,205},{135,10,220},{150,0,13},{4,10
+,42},{9,10,205},{9,10,786},{138,10,659},{6,0,289},{7,0,1670},{12,0,57},{151,0,4}
+,{132,10,635},{14,0,43},{146,0,21},{139,10,533},{135,0,1694},{8,0,420},{139,0,
+193},{135,0,409},{132,10,371},{4,10,272},{135,10,836},{5,10,825},{134,10,1640},{
+5,11,251},{5,11,956},{8,11,268},{9,11,214},{146,11,142},{138,0,308},{6,0,1863},{
+141,11,37},{137,10,879},{7,10,317},{135,10,569},{132,11,294},{134,0,790},{5,0,
+1002},{136,0,745},{5,11,346},{5,11,711},{136,11,390},{135,0,289},{5,0,504},{11,0
+,68},{137,10,307},{4,0,239},{6,0,477},{7,0,1607},{139,0,617},{149,0,13},{133,0,
+609},{133,11,624},{5,11,783},{7,11,1998},{135,11,2047},{133,10,525},{132,0,367},
+{132,11,594},{6,0,528},{133,10,493},{4,10,174},{135,10,911},{8,10,417},{137,10,
+782},{132,0,694},{7,0,548},{137,0,58},{4,10,32},{5,10,215},{6,10,269},{7,10,1782
+},{7,10,1892},{10,10,16},{11,10,822},{11,10,954},{141,10,481},{140,0,687},{7,0,
+1749},{136,10,477},{132,11,569},{133,10,308},{135,10,1088},{4,0,661},{138,0,1004
+},{5,11,37},{6,11,39},{6,11,451},{7,11,218},{7,11,667},{7,11,1166},{7,11,1687},{
+8,11,662},{144,11,2},{9,0,445},{12,0,53},{13,0,492},{5,10,126},{8,10,297},{9,10,
+366},{140,10,374},{7,10,1551},{139,10,361},{148,0,74},{134,11,508},{135,0,213},{
+132,10,175},{132,10,685},{6,0,760},{6,0,834},{134,0,1248},{7,11,453},{7,11,635},
+{7,11,796},{8,11,331},{9,11,328},{9,11,330},{9,11,865},{10,11,119},{10,11,235},{
+11,11,111},{11,11,129},{11,11,240},{12,11,31},{12,11,66},{12,11,222},{12,11,269}
+,{12,11,599},{12,11,689},{13,11,186},{13,11,364},{142,11,345},{7,0,1672},{139,0,
+189},{133,10,797},{133,10,565},{6,0,1548},{6,11,98},{7,11,585},{135,11,702},{9,0
+,968},{15,0,192},{149,0,56},{4,10,252},{6,11,37},{7,11,299},{7,10,1068},{7,11,
+1666},{8,11,195},{8,11,316},{9,11,178},{9,11,276},{9,11,339},{9,11,536},{10,11,
+102},{10,11,362},{10,10,434},{10,11,785},{11,11,55},{11,11,149},{11,10,228},{11,
+10,426},{11,11,773},{13,10,231},{13,11,416},{13,11,419},{14,11,38},{14,11,41},{
+14,11,210},{18,10,106},{148,10,87},{4,0,751},{11,0,390},{140,0,32},{4,0,409},{
+133,0,78},{11,11,458},{12,11,15},{140,11,432},{7,0,1602},{10,0,257},{10,0,698},{
+11,0,544},{11,0,585},{12,0,212},{13,0,307},{5,10,231},{7,10,601},{9,10,277},{9,
+10,674},{10,10,178},{10,10,418},{10,10,509},{11,10,531},{12,10,113},{12,10,475},
+{13,10,99},{142,10,428},{6,0,473},{145,0,105},{6,0,1949},{15,0,156},{133,11,645}
+,{7,10,1591},{144,10,43},{135,0,1779},{135,10,1683},{4,11,290},{135,11,1356},{
+134,0,763},{6,11,70},{7,11,1292},{10,11,762},{139,11,288},{142,0,29},{140,11,428
+},{7,0,883},{7,11,131},{7,11,422},{8,11,210},{140,11,573},{134,0,488},{4,10,399}
+,{5,10,119},{5,10,494},{7,10,751},{137,10,556},{133,0,617},{132,11,936},{139,0,
+50},{7,0,1518},{139,0,694},{137,0,785},{4,0,546},{135,0,2042},{7,11,716},{13,11,
+97},{141,11,251},{132,11,653},{145,0,22},{134,0,1016},{4,0,313},{133,0,577},{136
+,11,657},{8,0,184},{141,0,433},{135,0,935},{6,0,720},{9,0,114},{146,11,80},{12,0
+,186},{12,0,292},{14,0,100},{18,0,70},{7,10,594},{7,10,851},{7,10,1858},{9,10,
+411},{9,10,574},{9,10,666},{9,10,737},{10,10,346},{10,10,712},{11,10,246},{11,10
+,432},{11,10,517},{11,10,647},{11,10,679},{11,10,727},{12,10,304},{12,10,305},{
+12,10,323},{12,10,483},{12,10,572},{12,10,593},{12,10,602},{13,10,95},{13,10,101
+},{13,10,171},{13,10,315},{13,10,378},{13,10,425},{13,10,475},{14,10,63},{14,10,
+380},{14,10,384},{15,10,133},{18,10,112},{148,10,72},{135,10,1093},{135,11,1836}
+,{132,10,679},{137,10,203},{11,0,402},{12,0,109},{12,0,431},{13,0,179},{13,0,206
+},{14,0,217},{16,0,3},{148,0,53},{7,11,1368},{8,11,232},{8,11,361},{10,11,682},{
+138,11,742},{137,10,714},{5,0,886},{6,0,46},{6,0,1790},{7,0,14},{7,0,732},{7,0,
+1654},{8,0,95},{8,0,327},{8,0,616},{9,0,892},{10,0,598},{10,0,769},{11,0,134},{
+11,0,747},{12,0,378},{14,0,97},{137,11,534},{4,0,969},{136,10,825},{137,11,27},{
+6,0,727},{142,11,12},{133,0,1021},{134,0,1190},{134,11,1657},{5,10,143},{5,10,
+769},{6,10,1760},{7,10,682},{7,10,1992},{136,10,736},{132,0,153},{135,11,127},{
+133,0,798},{132,0,587},{6,0,598},{7,0,42},{8,0,695},{10,0,212},{11,0,158},{14,0,
+196},{145,0,85},{133,10,860},{6,0,1929},{134,0,1933},{5,0,957},{5,0,1008},{9,0,
+577},{12,0,141},{6,10,422},{7,10,0},{7,10,1544},{8,11,364},{11,10,990},{12,10,
+453},{13,10,47},{141,10,266},{134,0,1319},{4,0,129},{135,0,465},{7,0,470},{7,0,
+1057},{7,0,1201},{9,0,755},{11,0,906},{140,0,527},{7,0,908},{146,0,7},{5,0,148},
+{136,0,450},{5,10,515},{137,10,131},{7,10,1605},{11,10,962},{146,10,139},{132,10
+,646},{134,0,1166},{4,10,396},{7,10,728},{9,10,117},{13,10,202},{148,10,51},{6,
+10,121},{6,10,124},{6,10,357},{7,10,1138},{7,10,1295},{8,10,162},{139,10,655},{
+14,0,374},{142,11,374},{138,0,253},{139,0,1003},{5,11,909},{9,11,849},{138,11,
+805},{133,10,237},{7,11,525},{7,11,1579},{8,11,497},{136,11,573},{137,0,46},{132
+,0,879},{134,0,806},{135,0,1868},{6,0,1837},{134,0,1846},{6,0,730},{134,0,881},{
+7,0,965},{7,0,1460},{7,0,1604},{7,11,193},{7,11,397},{7,11,1105},{8,11,124},{8,
+11,619},{9,11,305},{10,11,264},{11,11,40},{12,11,349},{13,11,134},{13,11,295},{
+14,11,155},{15,11,120},{146,11,105},{136,0,506},{143,0,10},{4,11,262},{7,11,342}
+,{7,10,571},{7,10,1877},{10,10,366},{141,11,23},{133,11,641},{10,0,22},{9,10,513
+},{10,10,39},{12,10,122},{140,10,187},{135,11,1431},{150,11,49},{4,11,99},{6,11,
+250},{6,11,346},{8,11,127},{138,11,81},{6,0,2014},{8,0,928},{10,0,960},{10,0,979
+},{140,0,996},{134,0,296},{132,11,915},{5,11,75},{9,11,517},{10,11,470},{12,11,
+155},{141,11,224},{137,10,873},{4,0,854},{140,11,18},{134,0,587},{7,10,107},{7,
+10,838},{8,10,550},{138,10,401},{11,0,636},{15,0,145},{17,0,34},{19,0,50},{23,0,
+20},{11,10,588},{11,10,864},{11,10,968},{143,10,160},{135,11,216},{7,0,982},{10,
+0,32},{143,0,56},{133,10,768},{133,11,954},{6,11,304},{7,11,1114},{8,11,418},{10
+,11,345},{11,11,341},{11,11,675},{141,11,40},{9,11,410},{139,11,425},{136,0,941}
+,{5,0,435},{132,10,894},{5,0,85},{6,0,419},{7,0,134},{7,0,305},{7,0,361},{7,0,
+1337},{8,0,71},{140,0,519},{140,0,688},{135,0,740},{5,0,691},{7,0,345},{9,0,94},
+{140,0,169},{5,0,183},{6,0,582},{10,0,679},{140,0,435},{134,11,14},{6,0,945},{
+135,0,511},{134,11,1708},{5,11,113},{6,11,243},{7,11,1865},{11,11,161},{16,11,37
+},{145,11,99},{132,11,274},{137,0,539},{7,0,1993},{8,0,684},{134,10,272},{6,0,
+659},{134,0,982},{4,10,9},{5,10,128},{7,10,368},{11,10,480},{148,10,3},{134,0,
+583},{132,0,803},{133,0,704},{4,0,179},{5,0,198},{133,0,697},{7,0,347},{7,0,971}
+,{8,0,181},{10,0,711},{135,11,166},{136,10,682},{4,10,2},{7,10,545},{7,10,894},{
+136,11,521},{135,0,481},{132,0,243},{5,0,203},{7,0,19},{7,0,71},{7,0,113},{10,0,
+405},{11,0,357},{142,0,240},{5,11,725},{5,11,727},{135,11,1811},{6,0,826},{137,
+11,304},{7,0,1450},{139,0,99},{133,11,654},{134,0,492},{5,0,134},{6,0,408},{6,0,
+495},{7,0,1593},{6,11,273},{10,11,188},{13,11,377},{146,11,77},{9,10,769},{140,
+10,185},{135,11,410},{142,0,4},{4,0,665},{134,11,1785},{4,0,248},{7,0,137},{137,
+0,349},{5,10,530},{142,10,113},{7,0,1270},{139,0,612},{132,11,780},{5,0,371},{
+135,0,563},{135,0,826},{6,0,1535},{23,0,21},{151,0,23},{4,0,374},{7,0,547},{7,0,
+1700},{7,0,1833},{139,0,858},{133,10,556},{7,11,612},{8,11,545},{8,11,568},{8,11
+,642},{9,11,717},{10,11,541},{10,11,763},{11,11,449},{12,11,489},{13,11,153},{13
+,11,296},{14,11,138},{14,11,392},{15,11,50},{16,11,6},{16,11,12},{148,11,9},{9,0
+,311},{141,0,42},{8,10,16},{140,10,568},{6,0,1968},{6,0,2027},{138,0,991},{6,0,
+1647},{7,0,1552},{7,0,2010},{9,0,494},{137,0,509},{133,11,948},{6,10,186},{137,
+10,426},{134,0,769},{134,0,642},{132,10,585},{6,0,123},{7,0,214},{9,0,728},{10,0
+,157},{11,0,346},{11,0,662},{143,0,106},{142,11,381},{135,0,1435},{4,11,532},{5,
+11,706},{135,11,662},{5,11,837},{134,11,1651},{4,10,93},{5,10,252},{6,10,229},{7
+,10,291},{9,10,550},{139,10,644},{148,0,79},{137,10,749},{134,0,1425},{137,10,
+162},{4,11,362},{7,11,52},{7,11,303},{140,11,166},{132,10,381},{4,11,330},{7,11,
+933},{7,11,2012},{136,11,292},{135,11,767},{4,0,707},{5,0,588},{6,0,393},{13,0,
+106},{18,0,49},{147,0,41},{6,0,211},{7,0,1690},{11,0,486},{140,0,369},{137,11,
+883},{4,11,703},{135,11,207},{4,0,187},{5,0,184},{5,0,690},{7,0,1869},{10,0,756}
+,{139,0,783},{132,11,571},{134,0,1382},{5,0,175},{6,10,77},{6,10,157},{7,10,974}
+,{7,10,1301},{7,10,1339},{7,10,1490},{7,10,1873},{137,10,628},{134,0,1493},{5,11
+,873},{133,11,960},{134,0,1007},{12,11,93},{12,11,501},{13,11,362},{14,11,151},{
+15,11,40},{15,11,59},{16,11,46},{17,11,25},{18,11,14},{18,11,134},{19,11,25},{19
+,11,69},{20,11,16},{20,11,19},{20,11,66},{21,11,23},{21,11,25},{150,11,42},{11,
+10,919},{141,10,409},{134,0,219},{5,0,582},{6,0,1646},{7,0,99},{7,0,1962},{7,0,
+1986},{8,0,515},{8,0,773},{9,0,23},{9,0,491},{12,0,620},{142,0,93},{133,0,851},{
+5,11,33},{134,11,470},{135,11,1291},{134,0,1278},{135,11,1882},{135,10,1489},{
+132,0,1000},{138,0,982},{8,0,762},{8,0,812},{137,0,910},{6,11,47},{7,11,90},{7,
+11,664},{7,11,830},{7,11,1380},{7,11,2025},{8,11,448},{136,11,828},{4,0,98},{4,0
+,940},{6,0,1819},{6,0,1834},{6,0,1841},{7,0,1365},{8,0,859},{8,0,897},{8,0,918},
+{9,0,422},{9,0,670},{10,0,775},{10,0,894},{10,0,909},{10,0,910},{10,0,935},{11,0
+,210},{12,0,750},{12,0,755},{13,0,26},{13,0,457},{13,0,476},{16,0,100},{16,0,109
+},{18,0,173},{18,0,175},{8,10,398},{9,10,681},{139,10,632},{9,11,417},{137,11,
+493},{136,10,645},{138,0,906},{134,0,1730},{134,10,20},{133,11,1019},{134,0,1185
+},{10,0,40},{136,10,769},{9,0,147},{134,11,208},{140,0,650},{5,0,209},{6,0,30},{
+11,0,56},{139,0,305},{132,0,553},{138,11,344},{6,11,68},{7,11,398},{7,11,448},{7
+,11,1629},{7,11,1813},{8,11,387},{8,11,442},{9,11,710},{10,11,282},{138,11,722},
+{5,0,597},{14,0,20},{142,11,20},{135,0,1614},{135,10,1757},{4,0,150},{5,0,303},{
+6,0,327},{135,10,937},{16,0,49},{7,10,1652},{144,11,49},{8,0,192},{10,0,78},{141
+,0,359},{135,0,786},{143,0,134},{6,0,1638},{7,0,79},{7,0,496},{9,0,138},{10,0,
+336},{11,0,12},{12,0,412},{12,0,440},{142,0,305},{136,11,491},{4,10,579},{5,10,
+226},{5,10,323},{135,10,960},{7,0,204},{7,0,415},{8,0,42},{10,0,85},{139,0,564},
+{132,0,614},{4,11,403},{5,11,441},{7,11,450},{11,11,101},{12,11,193},{141,11,430
+},{135,11,1927},{135,11,1330},{4,0,3},{5,0,247},{5,0,644},{7,0,744},{7,0,1207},{
+7,0,1225},{7,0,1909},{146,0,147},{136,0,942},{4,0,1019},{134,0,2023},{5,11,679},
+{133,10,973},{5,0,285},{9,0,67},{13,0,473},{143,0,82},{7,11,328},{137,11,326},{
+151,0,8},{6,10,135},{135,10,1176},{135,11,1128},{134,0,1309},{135,11,1796},{135,
+10,314},{4,11,574},{7,11,350},{7,11,1024},{8,11,338},{9,11,677},{10,11,808},{139
+,11,508},{7,11,818},{17,11,14},{17,11,45},{18,11,75},{148,11,18},{146,10,4},{135
+,11,1081},{4,0,29},{6,0,532},{7,0,1628},{7,0,1648},{9,0,350},{10,0,433},{11,0,97
+},{11,0,557},{11,0,745},{12,0,289},{12,0,335},{12,0,348},{12,0,606},{13,0,116},{
+13,0,233},{13,0,466},{14,0,181},{14,0,209},{14,0,232},{14,0,236},{14,0,300},{16,
+0,41},{148,0,97},{7,0,318},{6,10,281},{8,10,282},{8,10,480},{8,10,499},{9,10,198
+},{10,10,143},{10,10,169},{10,10,211},{10,10,417},{10,10,574},{11,10,147},{11,10
+,395},{12,10,75},{12,10,407},{12,10,608},{13,10,500},{142,10,251},{135,11,1676},
+{135,11,2037},{135,0,1692},{5,0,501},{7,0,1704},{9,0,553},{11,0,520},{12,0,557},
+{141,0,249},{6,0,1527},{14,0,324},{15,0,55},{15,0,80},{14,11,324},{15,11,55},{
+143,11,80},{135,10,1776},{8,0,988},{137,11,297},{132,10,419},{142,0,223},{139,11
+,234},{7,0,1123},{12,0,508},{14,0,102},{14,0,226},{144,0,57},{4,10,138},{7,10,
+1012},{7,10,1280},{137,10,76},{7,0,1764},{5,10,29},{140,10,638},{134,0,2015},{
+134,0,1599},{138,11,56},{6,11,306},{7,11,1140},{7,11,1340},{8,11,133},{138,11,
+449},{139,11,1011},{6,10,1710},{135,10,2038},{7,11,1763},{140,11,310},{6,0,129},
+{4,10,17},{5,10,23},{7,10,995},{11,10,383},{11,10,437},{12,10,460},{140,10,532},
+{5,11,329},{136,11,260},{133,10,862},{132,0,534},{6,0,811},{135,0,626},{132,11,
+657},{4,0,25},{5,0,60},{6,0,504},{7,0,614},{7,0,1155},{12,0,0},{152,11,7},{7,0,
+1248},{11,0,621},{139,0,702},{137,0,321},{8,10,70},{12,10,171},{141,10,272},{10,
+10,233},{139,10,76},{4,0,379},{7,0,1397},{134,10,442},{5,11,66},{7,11,1896},{136
+,11,288},{134,11,1643},{134,10,1709},{4,11,21},{5,11,91},{5,11,570},{5,11,648},{
+5,11,750},{5,11,781},{6,11,54},{6,11,112},{6,11,402},{6,11,1732},{7,11,315},{7,
+11,749},{7,11,1347},{7,11,1900},{9,11,78},{9,11,508},{10,11,611},{11,11,510},{11
+,11,728},{13,11,36},{14,11,39},{16,11,83},{17,11,124},{148,11,30},{4,0,118},{6,0
+,274},{6,0,361},{7,0,75},{141,0,441},{10,11,322},{10,11,719},{139,11,407},{147,
+10,119},{12,11,549},{14,11,67},{147,11,60},{11,10,69},{12,10,105},{12,10,117},{
+13,10,213},{14,10,13},{14,10,62},{14,10,177},{14,10,421},{15,10,19},{146,10,141}
+,{9,0,841},{137,10,309},{7,10,608},{7,10,976},{8,11,125},{8,11,369},{8,11,524},{
+9,10,146},{10,10,206},{10,11,486},{10,10,596},{11,11,13},{11,11,381},{11,11,736}
+,{11,11,766},{11,11,845},{13,11,114},{13,10,218},{13,11,292},{14,11,47},{142,10,
+153},{12,0,693},{135,11,759},{5,0,314},{6,0,221},{7,0,419},{10,0,650},{11,0,396}
+,{12,0,156},{13,0,369},{14,0,333},{145,0,47},{6,11,1684},{6,11,1731},{7,11,356},
+{7,11,1932},{8,11,54},{8,11,221},{9,11,225},{9,11,356},{10,11,77},{10,11,446},{
+10,11,731},{12,11,404},{141,11,491},{132,11,375},{4,10,518},{135,10,1136},{4,0,
+913},{4,11,411},{11,11,643},{140,11,115},{4,11,80},{133,11,44},{8,10,689},{137,
+10,863},{138,0,880},{4,10,18},{7,10,145},{7,10,444},{7,10,1278},{8,10,49},{8,10,
+400},{9,10,71},{9,10,250},{10,10,459},{12,10,160},{144,10,24},{136,0,475},{5,0,
+1016},{5,11,299},{135,11,1083},{7,0,602},{8,0,179},{10,0,781},{140,0,126},{6,0,
+329},{138,0,111},{135,0,1864},{4,11,219},{7,11,1761},{137,11,86},{6,0,1888},{6,0
+,1892},{6,0,1901},{6,0,1904},{9,0,953},{9,0,985},{9,0,991},{9,0,1001},{12,0,818}
+,{12,0,846},{12,0,847},{12,0,861},{12,0,862},{12,0,873},{12,0,875},{12,0,877},{
+12,0,879},{12,0,881},{12,0,884},{12,0,903},{12,0,915},{12,0,926},{12,0,939},{15,
+0,182},{15,0,219},{15,0,255},{18,0,191},{18,0,209},{18,0,211},{149,0,41},{5,11,
+328},{135,11,918},{137,0,780},{12,0,82},{143,0,36},{133,10,1010},{5,0,821},{134,
+0,1687},{133,11,514},{132,0,956},{134,0,1180},{10,0,112},{5,10,87},{7,10,313},{7
+,10,1103},{10,10,582},{11,10,389},{11,10,813},{12,10,385},{13,10,286},{14,10,124
+},{146,10,108},{5,0,71},{7,0,1407},{9,0,704},{10,0,261},{10,0,619},{11,0,547},{
+11,0,619},{143,0,157},{4,0,531},{5,0,455},{5,11,301},{6,11,571},{14,11,49},{146,
+11,102},{132,10,267},{6,0,385},{7,0,2008},{9,0,337},{138,0,517},{133,11,726},{
+133,11,364},{4,11,76},{7,11,1550},{9,11,306},{9,11,430},{9,11,663},{10,11,683},{
+11,11,427},{11,11,753},{12,11,334},{12,11,442},{14,11,258},{14,11,366},{143,11,
+131},{6,0,1865},{6,0,1879},{6,0,1881},{6,0,1894},{6,0,1908},{9,0,915},{9,0,926},
+{9,0,940},{9,0,943},{9,0,966},{9,0,980},{9,0,989},{9,0,1005},{9,0,1010},{12,0,
+813},{12,0,817},{12,0,840},{12,0,843},{12,0,855},{12,0,864},{12,0,871},{12,0,872
+},{12,0,899},{12,0,905},{12,0,924},{15,0,171},{15,0,181},{15,0,224},{15,0,235},{
+15,0,251},{146,0,184},{137,11,52},{5,0,16},{6,0,86},{6,0,603},{7,0,292},{7,0,561
+},{8,0,257},{8,0,382},{9,0,721},{9,0,778},{11,0,581},{140,0,466},{4,0,486},{5,0,
+491},{135,10,1121},{4,0,72},{6,0,265},{135,0,1300},{135,11,1183},{10,10,249},{
+139,10,209},{132,10,561},{137,11,519},{4,11,656},{4,10,760},{135,11,779},{9,10,
+154},{140,10,485},{135,11,1793},{135,11,144},{136,10,255},{133,0,621},{4,10,368}
+,{135,10,641},{135,11,1373},{7,11,554},{7,11,605},{141,11,10},{137,0,234},{5,0,
+815},{6,0,1688},{134,0,1755},{5,11,838},{5,11,841},{134,11,1649},{7,0,1987},{7,0
+,2040},{136,0,743},{133,11,1012},{6,0,197},{136,0,205},{6,0,314},{134,11,314},{
+144,11,53},{6,11,251},{7,11,365},{7,11,1357},{7,11,1497},{8,11,154},{141,11,281}
+,{133,11,340},{6,0,452},{7,0,312},{138,0,219},{138,0,589},{4,0,333},{9,0,176},{
+12,0,353},{141,0,187},{9,10,92},{147,10,91},{134,0,1110},{11,0,47},{139,11,495},
+{6,10,525},{8,10,806},{9,10,876},{140,10,284},{8,11,261},{9,11,144},{9,11,466},{
+10,11,370},{12,11,470},{13,11,144},{142,11,348},{137,11,897},{8,0,863},{8,0,864}
+,{8,0,868},{8,0,884},{10,0,866},{10,0,868},{10,0,873},{10,0,911},{10,0,912},{10,
+0,944},{12,0,727},{6,11,248},{9,11,546},{10,11,535},{11,11,681},{141,11,135},{6,
+0,300},{135,0,1515},{134,0,1237},{139,10,958},{133,10,594},{140,11,250},{134,0,
+1685},{134,11,567},{7,0,135},{8,0,7},{8,0,62},{9,0,243},{10,0,658},{10,0,697},{
+11,0,456},{139,0,756},{9,0,395},{138,0,79},{6,10,1641},{136,10,820},{4,10,302},{
+135,10,1766},{134,11,174},{135,10,1313},{135,0,631},{134,10,1674},{134,11,395},{
+138,0,835},{7,0,406},{7,0,459},{8,0,606},{139,0,726},{134,11,617},{134,0,979},{6
+,10,389},{7,10,149},{9,10,142},{138,10,94},{5,11,878},{133,11,972},{6,10,8},{7,
+10,1881},{8,10,91},{136,11,511},{133,0,612},{132,11,351},{4,0,372},{7,0,482},{8,
+0,158},{9,0,602},{9,0,615},{10,0,245},{10,0,678},{10,0,744},{11,0,248},{139,0,
+806},{5,0,854},{135,0,1991},{132,11,286},{135,11,344},{7,11,438},{7,11,627},{7,
+11,1516},{8,11,40},{9,11,56},{9,11,294},{10,11,30},{10,11,259},{11,11,969},{146,
+11,148},{135,0,1492},{5,11,259},{7,11,414},{7,11,854},{142,11,107},{135,10,1746}
+,{6,0,833},{134,0,998},{135,10,24},{6,0,750},{135,0,1739},{4,10,503},{135,10,
+1661},{5,10,130},{7,10,1314},{9,10,610},{10,10,718},{11,10,601},{11,10,819},{11,
+10,946},{140,10,536},{10,10,149},{11,10,280},{142,10,336},{132,11,738},{135,10,
+1946},{5,0,195},{135,0,1685},{7,0,1997},{8,0,730},{139,0,1006},{151,11,17},{133,
+11,866},{14,0,463},{14,0,470},{150,0,61},{5,0,751},{8,0,266},{11,0,578},{4,10,
+392},{135,10,1597},{5,10,433},{9,10,633},{139,10,629},{135,0,821},{6,0,715},{134
+,0,1325},{133,11,116},{6,0,868},{132,11,457},{134,0,959},{6,10,234},{138,11,199}
+,{7,0,1053},{7,10,1950},{8,10,680},{11,10,817},{147,10,88},{7,10,1222},{138,10,
+386},{5,0,950},{5,0,994},{6,0,351},{134,0,1124},{134,0,1081},{7,0,1595},{6,10,5}
+,{11,10,249},{12,10,313},{16,10,66},{145,10,26},{148,0,59},{5,11,527},{6,11,189}
+,{135,11,859},{5,10,963},{6,10,1773},{11,11,104},{11,11,554},{15,11,60},{143,11,
+125},{135,0,47},{137,0,684},{134,11,116},{134,0,1606},{134,0,777},{7,0,1020},{8,
+10,509},{136,10,792},{135,0,1094},{132,0,350},{133,11,487},{4,11,86},{5,11,667},
+{5,11,753},{6,11,316},{6,11,455},{135,11,946},{7,0,1812},{13,0,259},{13,0,356},{
+14,0,242},{147,0,114},{132,10,931},{133,0,967},{4,0,473},{7,0,623},{8,0,808},{9,
+0,871},{9,0,893},{11,0,38},{11,0,431},{12,0,112},{12,0,217},{12,0,243},{12,0,562
+},{12,0,663},{12,0,683},{13,0,141},{13,0,197},{13,0,227},{13,0,406},{13,0,487},{
+14,0,156},{14,0,203},{14,0,224},{14,0,256},{18,0,58},{150,0,0},{138,0,286},{7,10
+,943},{139,10,614},{135,10,1837},{150,11,45},{132,0,798},{4,0,222},{7,0,286},{
+136,0,629},{4,11,79},{7,11,1773},{10,11,450},{11,11,589},{13,11,332},{13,11,493}
+,{14,11,183},{14,11,334},{14,11,362},{14,11,368},{14,11,376},{14,11,379},{19,11,
+90},{19,11,103},{19,11,127},{148,11,90},{5,0,337},{11,0,513},{11,0,889},{11,0,
+961},{12,0,461},{13,0,79},{15,0,121},{4,10,90},{5,10,545},{7,10,754},{9,10,186},
+{10,10,72},{10,10,782},{11,10,577},{11,10,610},{12,10,354},{12,10,362},{140,10,
+595},{141,0,306},{136,0,146},{7,0,1646},{9,10,329},{11,10,254},{141,11,124},{4,0
+,465},{135,0,1663},{132,0,525},{133,11,663},{10,0,299},{18,0,74},{9,10,187},{11,
+10,1016},{145,10,44},{7,0,165},{7,0,919},{4,10,506},{136,10,517},{5,10,295},{135
+,10,1680},{133,11,846},{134,0,1064},{5,11,378},{7,11,1402},{7,11,1414},{8,11,465
+},{9,11,286},{10,11,185},{10,11,562},{10,11,635},{11,11,31},{11,11,393},{12,11,
+456},{13,11,312},{18,11,65},{18,11,96},{147,11,89},{132,0,596},{7,10,987},{9,10,
+688},{10,10,522},{11,10,788},{140,10,566},{6,0,82},{7,0,138},{7,0,517},{7,0,1741
+},{11,0,238},{4,11,648},{134,10,1775},{7,0,1233},{7,10,700},{7,10,940},{8,10,514
+},{9,10,116},{9,10,535},{10,10,118},{11,10,107},{11,10,148},{11,10,922},{12,10,
+254},{12,10,421},{142,10,238},{4,0,962},{6,0,1824},{8,0,894},{12,0,708},{12,0,
+725},{14,0,451},{20,0,94},{22,0,59},{150,0,62},{5,11,945},{6,11,1656},{6,11,1787
+},{7,11,167},{8,11,824},{9,11,391},{10,11,375},{139,11,185},{5,0,495},{7,0,834},
+{9,0,733},{139,0,378},{4,10,743},{135,11,1273},{6,0,1204},{7,11,1645},{8,11,352}
+,{137,11,249},{139,10,292},{133,0,559},{132,11,152},{9,0,499},{10,0,341},{15,0,
+144},{19,0,49},{7,10,1283},{9,10,227},{11,10,325},{11,10,408},{14,10,180},{146,
+10,47},{6,0,21},{6,0,1737},{7,0,1444},{136,0,224},{133,11,1006},{7,0,1446},{9,0,
+97},{17,0,15},{5,10,81},{7,10,146},{7,10,1342},{8,10,53},{8,10,561},{8,10,694},{
+8,10,754},{9,10,115},{9,10,894},{10,10,462},{10,10,813},{11,10,230},{11,10,657},
+{11,10,699},{11,10,748},{12,10,119},{12,10,200},{12,10,283},{142,10,273},{5,10,
+408},{137,10,747},{135,11,431},{135,11,832},{6,0,729},{134,0,953},{4,0,727},{8,0
+,565},{5,11,351},{7,11,264},{136,11,565},{134,0,1948},{5,0,519},{5,11,40},{7,11,
+598},{7,11,1638},{8,11,78},{9,11,166},{9,11,640},{9,11,685},{9,11,773},{11,11,
+215},{13,11,65},{14,11,172},{14,11,317},{145,11,6},{8,11,60},{9,11,343},{139,11,
+769},{137,11,455},{134,0,1193},{140,0,790},{7,11,1951},{8,11,765},{8,11,772},{
+140,11,671},{7,11,108},{8,11,219},{8,11,388},{9,11,639},{9,11,775},{11,11,275},{
+140,11,464},{132,11,468},{7,10,30},{8,10,86},{8,10,315},{8,10,700},{9,10,576},{9
+,10,858},{11,10,310},{11,10,888},{11,10,904},{12,10,361},{141,10,248},{5,11,15},
+{6,11,56},{7,11,1758},{8,11,500},{9,11,730},{11,11,331},{13,11,150},{142,11,282}
+,{4,0,402},{7,0,2},{8,0,323},{136,0,479},{138,10,839},{11,0,580},{142,0,201},{5,
+0,59},{135,0,672},{137,10,617},{146,0,34},{134,11,1886},{4,0,961},{136,0,896},{6
+,0,1285},{5,11,205},{6,11,438},{137,11,711},{134,10,428},{7,10,524},{8,10,169},{
+8,10,234},{9,10,480},{138,10,646},{148,0,46},{141,0,479},{133,11,534},{6,0,2019}
+,{134,10,1648},{4,0,85},{7,0,549},{7,10,1205},{138,10,637},{4,0,663},{5,0,94},{7
+,11,235},{7,11,1475},{15,11,68},{146,11,120},{6,11,443},{9,11,237},{9,11,571},{9
+,11,695},{10,11,139},{11,11,715},{12,11,417},{141,11,421},{132,0,783},{4,0,682},
+{8,0,65},{9,10,39},{10,10,166},{11,10,918},{12,10,635},{20,10,10},{22,10,27},{22
+,10,43},{150,10,52},{6,0,11},{135,0,187},{132,0,522},{4,0,52},{135,0,661},{4,0,
+383},{133,0,520},{135,11,546},{11,0,343},{142,0,127},{4,11,578},{7,10,157},{7,11
+,624},{7,11,916},{8,10,279},{10,11,256},{11,11,87},{139,11,703},{134,10,604},{4,
+0,281},{5,0,38},{7,0,194},{7,0,668},{7,0,1893},{137,0,397},{7,10,945},{11,10,713
+},{139,10,744},{139,10,1022},{9,0,635},{139,0,559},{5,11,923},{7,11,490},{12,11,
+553},{13,11,100},{14,11,118},{143,11,75},{132,0,975},{132,10,567},{137,10,859},{
+7,10,1846},{7,11,1846},{8,10,628},{136,11,628},{148,0,116},{138,11,750},{14,0,51
+},{14,11,51},{15,11,7},{148,11,20},{132,0,858},{134,0,1075},{4,11,924},{133,10,
+762},{136,0,535},{133,0,448},{10,10,784},{141,10,191},{133,10,298},{7,0,610},{
+135,0,1501},{7,10,633},{7,10,905},{7,10,909},{7,10,1538},{9,10,767},{140,10,636}
+,{4,11,265},{7,11,807},{135,11,950},{5,11,93},{12,11,267},{144,11,26},{136,0,191
+},{139,10,301},{135,10,1970},{135,0,267},{4,0,319},{5,0,699},{138,0,673},{6,0,
+336},{7,0,92},{7,0,182},{8,0,453},{8,0,552},{9,0,204},{9,0,285},{10,0,99},{11,0,
+568},{11,0,950},{12,0,94},{16,0,20},{16,0,70},{19,0,55},{12,10,644},{144,10,90},
+{6,0,551},{7,0,1308},{7,10,845},{7,11,994},{8,10,160},{137,10,318},{19,11,1},{19
+,11,26},{150,11,9},{7,0,1406},{9,0,218},{141,0,222},{5,0,256},{138,0,69},{5,11,
+233},{5,11,320},{6,11,140},{7,11,330},{136,11,295},{6,0,1980},{136,0,952},{4,0,
+833},{137,11,678},{133,11,978},{4,11,905},{6,11,1701},{137,11,843},{138,10,735},
+{136,10,76},{17,0,39},{148,0,36},{18,0,81},{146,11,81},{14,0,352},{17,0,53},{18,
+0,146},{18,0,152},{19,0,11},{150,0,54},{135,0,634},{138,10,841},{132,0,618},{4,0
+,339},{7,0,259},{17,0,73},{4,11,275},{140,11,376},{132,11,509},{7,11,273},{139,
+11,377},{4,0,759},{13,0,169},{137,10,804},{6,10,96},{135,10,1426},{4,10,651},{
+133,10,289},{7,0,1075},{8,10,35},{9,10,511},{10,10,767},{147,10,118},{6,0,649},{
+6,0,670},{136,0,482},{5,0,336},{6,0,341},{6,0,478},{6,0,1763},{136,0,386},{5,11,
+802},{7,11,2021},{8,11,805},{14,11,94},{15,11,65},{16,11,4},{16,11,77},{16,11,80
+},{145,11,5},{6,0,1035},{5,11,167},{5,11,899},{6,11,410},{137,11,777},{134,11,
+1705},{5,0,924},{133,0,969},{132,10,704},{135,0,73},{135,11,10},{135,10,1078},{5
+,11,11},{6,11,117},{6,11,485},{7,11,1133},{9,11,582},{9,11,594},{11,11,21},{11,
+11,818},{12,11,535},{141,11,86},{135,0,1971},{4,11,264},{7,11,1067},{8,11,204},{
+8,11,385},{139,11,953},{6,0,1458},{135,0,1344},{5,0,396},{134,0,501},{4,10,720},
+{133,10,306},{4,0,929},{5,0,799},{8,0,46},{8,0,740},{133,10,431},{7,11,646},{7,
+11,1730},{11,11,446},{141,11,178},{7,0,276},{5,10,464},{6,10,236},{7,10,696},{7,
+10,914},{7,10,1108},{7,10,1448},{9,10,15},{9,10,564},{10,10,14},{12,10,565},{13,
+10,449},{14,10,53},{15,10,13},{16,10,64},{145,10,41},{4,0,892},{133,0,770},{6,10
+,1767},{12,10,194},{145,10,107},{135,0,158},{5,10,840},{138,11,608},{134,0,1432}
+,{138,11,250},{8,11,794},{9,11,400},{10,11,298},{142,11,228},{151,0,25},{7,11,
+1131},{135,11,1468},{135,0,2001},{9,10,642},{11,10,236},{142,10,193},{4,10,68},{
+5,10,634},{6,10,386},{7,10,794},{8,10,273},{9,10,563},{10,10,105},{10,10,171},{
+11,10,94},{139,10,354},{136,11,724},{132,0,478},{11,11,512},{13,11,205},{19,11,
+30},{22,11,36},{151,11,19},{7,0,1461},{140,0,91},{6,11,190},{7,11,768},{135,11,
+1170},{4,0,602},{8,0,211},{4,10,95},{7,10,416},{139,10,830},{7,10,731},{13,10,20
+},{143,10,11},{6,0,1068},{135,0,1872},{4,0,13},{5,0,567},{7,0,1498},{9,0,124},{
+11,0,521},{12,0,405},{135,11,1023},{135,0,1006},{132,0,735},{138,0,812},{4,0,170
+},{135,0,323},{6,11,137},{9,11,75},{9,11,253},{10,11,194},{138,11,444},{5,0,304}
+,{7,0,1403},{5,10,864},{10,10,648},{11,10,671},{143,10,46},{135,11,1180},{133,10
+,928},{4,0,148},{133,0,742},{11,10,986},{140,10,682},{133,0,523},{135,11,1743},{
+7,0,730},{18,0,144},{19,0,61},{8,10,44},{9,10,884},{10,10,580},{11,10,399},{11,
+10,894},{143,10,122},{5,11,760},{7,11,542},{8,11,135},{136,11,496},{136,0,981},{
+133,0,111},{10,0,132},{11,0,191},{11,0,358},{139,0,460},{7,11,319},{7,11,355},{7
+,11,763},{10,11,389},{145,11,43},{134,0,890},{134,0,1420},{136,11,557},{133,10,
+518},{133,0,444},{135,0,1787},{135,10,1852},{8,0,123},{15,0,6},{144,0,7},{6,0,
+2041},{10,11,38},{139,11,784},{136,0,932},{5,0,937},{135,0,100},{6,0,995},{4,11,
+58},{5,11,286},{6,11,319},{7,11,402},{7,11,1254},{7,11,1903},{8,11,356},{140,11,
+408},{4,11,389},{9,11,181},{9,11,255},{10,11,8},{10,11,29},{10,11,816},{11,11,
+311},{11,11,561},{12,11,67},{141,11,181},{138,0,255},{5,0,138},{4,10,934},{136,
+10,610},{4,0,965},{10,0,863},{138,0,898},{10,10,804},{138,10,832},{12,0,631},{8,
+10,96},{9,10,36},{10,10,607},{11,10,423},{11,10,442},{12,10,309},{14,10,199},{15
+,10,90},{145,10,110},{134,0,1394},{4,0,652},{8,0,320},{22,0,6},{22,0,16},{9,10,
+13},{9,10,398},{9,10,727},{10,10,75},{10,10,184},{10,10,230},{10,10,564},{10,10,
+569},{11,10,973},{12,10,70},{12,10,189},{13,10,57},{141,10,257},{6,0,897},{134,0
+,1333},{4,0,692},{133,0,321},{133,11,373},{135,0,922},{5,0,619},{133,0,698},{137
+,10,631},{5,10,345},{135,10,1016},{9,0,957},{9,0,1018},{12,0,828},{12,0,844},{12
+,0,897},{12,0,901},{12,0,943},{15,0,180},{18,0,197},{18,0,200},{18,0,213},{18,0,
+214},{146,0,226},{5,0,917},{134,0,1659},{135,0,1100},{134,0,1173},{134,0,1930},{
+5,0,251},{5,0,956},{8,0,268},{9,0,214},{146,0,142},{133,10,673},{137,10,850},{4,
+10,287},{133,10,1018},{132,11,672},{5,0,346},{5,0,711},{8,0,390},{11,11,752},{
+139,11,885},{5,10,34},{10,10,724},{12,10,444},{13,10,354},{18,10,32},{23,10,24},
+{23,10,31},{152,10,5},{4,11,710},{134,11,606},{134,0,744},{134,10,382},{133,11,
+145},{4,10,329},{7,11,884},{140,11,124},{4,11,467},{5,11,405},{134,11,544},{9,10
+,846},{138,10,827},{133,0,624},{9,11,372},{15,11,2},{19,11,10},{147,11,18},{4,11
+,387},{135,11,1288},{5,0,783},{7,0,1998},{135,0,2047},{132,10,906},{136,10,366},
+{135,11,550},{4,10,123},{4,10,649},{5,10,605},{7,10,1509},{136,10,36},{134,0,
+1125},{132,0,594},{133,10,767},{135,11,1227},{136,11,467},{4,11,576},{135,11,
+1263},{4,0,268},{7,0,1534},{135,11,1534},{4,10,273},{5,10,658},{5,11,919},{5,10,
+995},{134,11,1673},{133,0,563},{134,10,72},{135,10,1345},{4,11,82},{5,11,333},{5
+,11,904},{6,11,207},{7,11,325},{7,11,1726},{8,11,101},{10,11,778},{139,11,220},{
+5,0,37},{6,0,39},{6,0,451},{7,0,218},{7,0,667},{7,0,1166},{7,0,1687},{8,0,662},{
+16,0,2},{133,10,589},{134,0,1332},{133,11,903},{134,0,508},{5,10,117},{6,10,514}
+,{6,10,541},{7,10,1164},{7,10,1436},{8,10,220},{8,10,648},{10,10,688},{11,10,560
+},{140,11,147},{6,11,555},{135,11,485},{133,10,686},{7,0,453},{7,0,635},{7,0,796
+},{8,0,331},{9,0,330},{9,0,865},{10,0,119},{10,0,235},{11,0,111},{11,0,129},{11,
+0,240},{12,0,31},{12,0,66},{12,0,222},{12,0,269},{12,0,599},{12,0,684},{12,0,689
+},{12,0,691},{142,0,345},{135,0,1834},{4,11,705},{7,11,615},{138,11,251},{136,11
+,345},{137,0,527},{6,0,98},{7,0,702},{135,0,991},{11,0,576},{14,0,74},{7,10,196}
+,{10,10,765},{11,10,347},{11,10,552},{11,10,790},{12,10,263},{13,10,246},{13,10,
+270},{13,10,395},{14,10,176},{14,10,190},{14,10,398},{14,10,412},{15,10,32},{15,
+10,63},{16,10,88},{147,10,105},{134,11,90},{13,0,84},{141,0,122},{6,0,37},{7,0,
+299},{7,0,1666},{8,0,195},{8,0,316},{9,0,178},{9,0,276},{9,0,339},{9,0,536},{10,
+0,102},{10,0,362},{10,0,785},{11,0,55},{11,0,149},{11,0,773},{13,0,416},{13,0,
+419},{14,0,38},{14,0,41},{142,0,210},{5,10,381},{135,10,1792},{7,11,813},{12,11,
+497},{141,11,56},{7,10,616},{138,10,413},{133,0,645},{6,11,125},{135,11,1277},{
+132,0,290},{6,0,70},{7,0,1292},{10,0,762},{139,0,288},{6,10,120},{7,10,1188},{7,
+10,1710},{8,10,286},{9,10,667},{11,10,592},{139,10,730},{135,11,1784},{7,0,1315}
+,{135,11,1315},{134,0,1955},{135,10,1146},{7,0,131},{7,0,422},{8,0,210},{140,0,
+573},{4,10,352},{135,10,687},{139,0,797},{143,0,38},{14,0,179},{15,0,151},{150,0
+,11},{7,0,488},{4,10,192},{5,10,49},{6,10,200},{6,10,293},{134,10,1696},{132,0,
+936},{135,11,703},{6,11,160},{7,11,1106},{9,11,770},{10,11,618},{11,11,112},{140
+,11,413},{5,0,453},{134,0,441},{135,0,595},{132,10,650},{132,10,147},{6,0,991},{
+6,0,1182},{12,11,271},{145,11,109},{133,10,934},{140,11,221},{132,0,653},{7,0,
+505},{135,0,523},{134,0,903},{135,11,479},{7,11,304},{9,11,646},{9,11,862},{10,
+11,262},{11,11,696},{12,11,208},{15,11,79},{147,11,108},{146,0,80},{135,11,981},
+{142,0,432},{132,0,314},{137,11,152},{7,0,1368},{8,0,232},{8,0,361},{10,0,682},{
+138,0,742},{135,11,1586},{9,0,534},{4,11,434},{11,11,663},{12,11,210},{13,11,166
+},{13,11,310},{14,11,373},{147,11,43},{7,11,1091},{135,11,1765},{6,11,550},{135,
+11,652},{137,0,27},{142,0,12},{4,10,637},{5,11,553},{7,11,766},{138,11,824},{7,
+11,737},{8,11,298},{136,11,452},{7,0,736},{139,0,264},{134,0,1657},{133,11,292},
+{138,11,135},{6,0,844},{134,0,1117},{135,0,127},{9,10,867},{138,10,837},{6,0,
+1184},{134,0,1208},{134,0,1294},{136,0,364},{6,0,1415},{7,0,1334},{11,0,125},{6,
+10,170},{7,11,393},{8,10,395},{8,10,487},{10,11,603},{11,11,206},{141,10,147},{
+137,11,748},{4,11,912},{137,11,232},{4,10,535},{136,10,618},{137,0,792},{7,11,
+1973},{136,11,716},{135,11,98},{5,0,909},{9,0,849},{138,0,805},{4,0,630},{132,0,
+699},{5,11,733},{14,11,103},{150,10,23},{12,11,158},{18,11,8},{19,11,62},{20,11,
+6},{22,11,4},{23,11,2},{151,11,9},{132,0,968},{132,10,778},{132,10,46},{5,10,811
+},{6,10,1679},{6,10,1714},{135,10,2032},{6,0,1446},{7,10,1458},{9,10,407},{139,
+10,15},{7,0,206},{7,0,397},{7,0,621},{7,0,640},{8,0,124},{8,0,619},{9,0,305},{9,
+0,643},{10,0,264},{10,0,628},{11,0,40},{12,0,349},{13,0,134},{13,0,295},{14,0,
+155},{15,0,120},{18,0,105},{6,10,34},{7,10,1089},{8,10,708},{8,10,721},{9,10,363
+},{148,10,98},{4,0,262},{5,0,641},{135,0,342},{137,11,72},{4,0,99},{6,0,250},{6,
+0,346},{8,0,127},{138,0,81},{132,0,915},{5,0,75},{9,0,517},{10,0,470},{12,0,155}
+,{141,0,224},{132,10,462},{11,11,600},{11,11,670},{141,11,245},{142,0,83},{5,10,
+73},{6,10,23},{134,10,338},{6,0,1031},{139,11,923},{7,11,164},{7,11,1571},{9,11,
+107},{140,11,225},{134,0,1470},{133,0,954},{6,0,304},{8,0,418},{10,0,345},{11,0,
+341},{139,0,675},{9,0,410},{139,0,425},{4,11,27},{5,11,484},{5,11,510},{6,11,434
+},{7,11,1000},{7,11,1098},{8,11,2},{136,11,200},{134,0,734},{140,11,257},{7,10,
+725},{8,10,498},{139,10,268},{134,0,1822},{135,0,1798},{135,10,773},{132,11,460}
+,{4,11,932},{133,11,891},{134,0,14},{132,10,583},{7,10,1462},{8,11,625},{139,10,
+659},{5,0,113},{6,0,243},{6,0,1708},{7,0,1865},{11,0,161},{16,0,37},{17,0,99},{
+133,10,220},{134,11,76},{5,11,461},{135,11,1925},{140,0,69},{8,11,92},{137,11,
+221},{139,10,803},{132,10,544},{4,0,274},{134,0,922},{132,0,541},{5,0,627},{6,10
+,437},{6,10,564},{11,10,181},{141,10,183},{135,10,1192},{7,0,166},{132,11,763},{
+133,11,253},{134,0,849},{9,11,73},{10,11,110},{14,11,185},{145,11,119},{5,11,212
+},{12,11,35},{141,11,382},{133,0,717},{137,0,304},{136,0,600},{133,0,654},{6,0,
+273},{10,0,188},{13,0,377},{146,0,77},{4,10,790},{5,10,273},{134,10,394},{132,0,
+543},{135,0,410},{11,0,98},{11,0,524},{141,0,87},{132,0,941},{135,11,1175},{4,0,
+250},{7,0,1612},{11,0,186},{12,0,133},{6,10,127},{7,10,1511},{8,10,613},{12,10,
+495},{12,10,586},{12,10,660},{12,10,668},{14,10,385},{15,10,118},{17,10,20},{146
+,10,98},{6,0,1785},{133,11,816},{134,0,1339},{7,0,961},{7,0,1085},{7,0,1727},{8,
+0,462},{6,10,230},{135,11,1727},{9,0,636},{135,10,1954},{132,0,780},{5,11,869},{
+5,11,968},{6,11,1626},{8,11,734},{136,11,784},{4,11,542},{6,11,1716},{6,11,1727}
+,{7,11,1082},{7,11,1545},{8,11,56},{8,11,118},{8,11,412},{8,11,564},{9,11,888},{
+9,11,908},{10,11,50},{10,11,423},{11,11,685},{11,11,697},{11,11,933},{12,11,299}
+,{13,11,126},{13,11,136},{13,11,170},{141,11,190},{134,11,226},{4,11,232},{9,11,
+202},{10,11,474},{140,11,433},{137,11,500},{5,0,529},{136,10,68},{132,10,654},{4
+,10,156},{7,10,998},{7,10,1045},{7,10,1860},{9,10,48},{9,10,692},{11,10,419},{
+139,10,602},{7,0,1276},{8,0,474},{9,0,652},{6,11,108},{7,11,1003},{7,11,1181},{
+136,11,343},{7,11,1264},{7,11,1678},{11,11,945},{12,11,341},{12,11,471},{140,11,
+569},{134,11,1712},{5,0,948},{12,0,468},{19,0,96},{148,0,24},{4,11,133},{7,11,
+711},{7,11,1298},{7,11,1585},{135,11,1929},{6,0,753},{140,0,657},{139,0,941},{6,
+11,99},{7,11,1808},{145,11,57},{6,11,574},{7,11,428},{7,11,1250},{10,11,669},{11
+,11,485},{11,11,840},{12,11,300},{142,11,250},{4,0,532},{5,0,706},{135,0,662},{5
+,0,837},{6,0,1651},{139,0,985},{7,0,1861},{9,10,197},{10,10,300},{12,10,473},{13
+,10,90},{141,10,405},{137,11,252},{6,11,323},{135,11,1564},{4,0,330},{4,0,863},{
+7,0,933},{7,0,2012},{8,0,292},{7,11,461},{8,11,775},{138,11,435},{132,10,606},{4
+,11,655},{7,11,850},{17,11,75},{146,11,137},{135,0,767},{7,10,1978},{136,10,676}
+,{132,0,641},{135,11,1559},{134,0,1233},{137,0,242},{17,0,114},{4,10,361},{133,
+10,315},{137,0,883},{132,10,461},{138,0,274},{134,0,2008},{134,0,1794},{4,0,703}
+,{135,0,207},{12,0,285},{132,10,472},{132,0,571},{5,0,873},{5,0,960},{8,0,823},{
+9,0,881},{136,11,577},{7,0,617},{10,0,498},{11,0,501},{12,0,16},{140,0,150},{138
+,10,747},{132,0,431},{133,10,155},{11,0,283},{11,0,567},{7,10,163},{8,10,319},{9
+,10,402},{10,10,24},{10,10,681},{11,10,200},{12,10,253},{12,10,410},{142,10,219}
+,{4,11,413},{5,11,677},{8,11,432},{140,11,280},{9,0,401},{5,10,475},{7,10,1780},
+{11,10,297},{11,10,558},{14,10,322},{147,10,76},{6,0,781},{9,0,134},{10,0,2},{10
+,0,27},{10,0,333},{11,0,722},{143,0,1},{5,0,33},{6,0,470},{139,0,424},{135,0,
+2006},{12,0,783},{135,10,1956},{136,0,274},{135,0,1882},{132,0,794},{135,0,1848}
+,{5,10,944},{134,10,1769},{6,0,47},{7,0,90},{7,0,664},{7,0,830},{7,0,1380},{7,0,
+2025},{8,0,448},{136,0,828},{132,10,144},{134,0,1199},{4,11,395},{139,11,762},{
+135,11,1504},{9,0,417},{137,0,493},{9,11,174},{10,11,164},{11,11,440},{11,11,841
+},{143,11,98},{134,11,426},{139,11,1002},{134,0,295},{134,0,816},{6,10,247},{137
+,10,555},{133,0,1019},{4,0,620},{5,11,476},{10,10,280},{138,10,797},{139,0,464},
+{5,11,76},{6,11,458},{6,11,497},{7,11,764},{7,11,868},{9,11,658},{10,11,594},{11
+,11,173},{11,11,566},{12,11,20},{12,11,338},{141,11,200},{134,0,208},{4,11,526},
+{7,11,1029},{135,11,1054},{132,11,636},{6,11,233},{7,11,660},{7,11,1124},{17,11,
+31},{19,11,22},{151,11,14},{10,0,442},{133,10,428},{10,0,930},{140,0,778},{6,0,
+68},{7,0,448},{7,0,1629},{7,0,1769},{7,0,1813},{8,0,442},{8,0,516},{9,0,710},{10
+,0,282},{10,0,722},{7,10,1717},{138,10,546},{134,0,1128},{11,0,844},{12,0,104},{
+140,0,625},{4,11,432},{135,11,824},{138,10,189},{133,0,787},{133,10,99},{4,11,
+279},{7,11,301},{137,11,362},{8,0,491},{4,10,397},{136,10,555},{4,11,178},{133,
+11,399},{134,0,711},{144,0,9},{4,0,403},{5,0,441},{7,0,450},{10,0,840},{11,0,101
+},{12,0,193},{141,0,430},{135,11,1246},{12,10,398},{20,10,39},{21,10,11},{150,10
+,41},{4,10,485},{7,10,353},{135,10,1523},{6,10,366},{7,10,1384},{7,10,1601},{135
+,11,1912},{7,0,396},{10,0,160},{135,11,396},{137,10,282},{134,11,1692},{4,10,157
+},{5,10,471},{6,11,202},{10,11,448},{11,11,208},{12,11,360},{17,11,117},{17,11,
+118},{18,11,27},{148,11,67},{133,0,679},{137,0,326},{136,10,116},{7,11,872},{10,
+11,516},{139,11,167},{132,11,224},{5,11,546},{7,11,35},{8,11,11},{8,11,12},{9,11
+,315},{9,11,533},{10,11,802},{11,11,166},{12,11,525},{142,11,243},{7,0,1128},{
+135,11,1920},{5,11,241},{8,11,242},{9,11,451},{10,11,667},{11,11,598},{140,11,
+429},{6,0,737},{5,10,160},{7,10,363},{7,10,589},{10,10,170},{141,10,55},{135,0,
+1796},{142,11,254},{4,0,574},{7,0,350},{7,0,1024},{8,0,338},{9,0,677},{138,0,808
+},{134,0,1096},{137,11,516},{7,0,405},{10,0,491},{4,10,108},{4,11,366},{139,10,
+498},{11,11,337},{142,11,303},{134,11,1736},{7,0,1081},{140,11,364},{7,10,1005},
+{140,10,609},{7,0,1676},{4,10,895},{133,10,772},{135,0,2037},{6,0,1207},{11,11,
+916},{142,11,419},{14,11,140},{148,11,41},{6,11,331},{136,11,623},{9,0,944},{9,0
+,969},{9,0,1022},{12,0,913},{12,0,936},{15,0,177},{15,0,193},{4,10,926},{133,10,
+983},{5,0,354},{135,11,506},{8,0,598},{9,0,664},{138,0,441},{4,11,640},{133,11,
+513},{137,0,297},{132,10,538},{6,10,294},{7,10,1267},{136,10,624},{7,0,1772},{7,
+11,1888},{8,11,289},{11,11,45},{12,11,278},{140,11,537},{135,10,1325},{138,0,751
+},{141,0,37},{134,0,1828},{132,10,757},{132,11,394},{6,0,257},{135,0,1522},{4,0,
+582},{9,0,191},{135,11,1931},{7,11,574},{7,11,1719},{137,11,145},{132,11,658},{
+10,0,790},{132,11,369},{9,11,781},{10,11,144},{11,11,385},{13,11,161},{13,11,228
+},{13,11,268},{148,11,107},{8,0,469},{10,0,47},{136,11,374},{6,0,306},{7,0,1140}
+,{7,0,1340},{8,0,133},{138,0,449},{139,0,1011},{7,10,1875},{139,10,124},{4,11,
+344},{6,11,498},{139,11,323},{137,0,299},{132,0,837},{133,11,906},{5,0,329},{8,0
+,260},{138,0,10},{134,0,1320},{4,0,657},{146,0,158},{135,0,1191},{152,0,7},{6,0,
+1939},{8,0,974},{138,0,996},{135,0,1665},{11,11,126},{139,11,287},{143,0,8},{14,
+11,149},{14,11,399},{143,11,57},{5,0,66},{7,0,1896},{136,0,288},{7,0,175},{10,0,
+494},{5,10,150},{8,10,603},{9,10,593},{9,10,634},{10,10,173},{11,10,462},{11,10,
+515},{13,10,216},{13,10,288},{142,10,400},{134,0,1643},{136,11,21},{4,0,21},{5,0
+,91},{5,0,648},{5,0,750},{5,0,781},{6,0,54},{6,0,112},{6,0,402},{6,0,1732},{7,0,
+315},{7,0,749},{7,0,1427},{7,0,1900},{9,0,78},{9,0,508},{10,0,611},{10,0,811},{
+11,0,510},{11,0,728},{13,0,36},{14,0,39},{16,0,83},{17,0,124},{148,0,30},{4,0,
+668},{136,0,570},{10,0,322},{10,0,719},{139,0,407},{135,11,1381},{136,11,193},{
+12,10,108},{141,10,291},{132,11,616},{136,11,692},{8,0,125},{8,0,369},{8,0,524},
+{10,0,486},{11,0,13},{11,0,381},{11,0,736},{11,0,766},{11,0,845},{13,0,114},{13,
+0,292},{142,0,47},{134,0,1247},{6,0,1684},{6,0,1731},{7,0,356},{8,0,54},{8,0,221
+},{9,0,225},{9,0,356},{10,0,77},{10,0,446},{10,0,731},{12,0,404},{141,0,491},{
+135,10,1777},{4,11,305},{4,10,493},{144,10,55},{4,0,951},{6,0,1809},{6,0,1849},{
+8,0,846},{8,0,866},{8,0,899},{10,0,896},{12,0,694},{142,0,468},{5,11,214},{7,11,
+603},{8,11,611},{9,11,686},{10,11,88},{11,11,459},{11,11,496},{12,11,463},{12,11
+,590},{13,11,0},{142,11,214},{132,0,411},{4,0,80},{133,0,44},{140,11,74},{143,0,
+31},{7,0,669},{6,10,568},{7,10,1804},{8,10,362},{8,10,410},{8,10,830},{9,10,514}
+,{11,10,649},{142,10,157},{7,0,673},{134,11,1703},{132,10,625},{134,0,1303},{5,0
+,299},{135,0,1083},{138,0,704},{6,0,275},{7,0,408},{6,10,158},{7,10,129},{7,10,
+181},{8,10,276},{8,10,377},{10,10,523},{11,10,816},{12,10,455},{13,10,303},{142,
+10,135},{4,0,219},{7,0,367},{7,0,1713},{7,0,1761},{9,0,86},{9,0,537},{10,0,165},
+{12,0,219},{140,0,561},{8,0,216},{4,10,1},{4,11,737},{6,11,317},{7,10,1143},{7,
+10,1463},{9,10,207},{9,10,390},{9,10,467},{10,11,98},{11,11,294},{11,10,836},{12
+,11,60},{12,11,437},{13,11,64},{13,11,380},{142,11,430},{6,11,1758},{8,11,520},{
+9,11,345},{9,11,403},{142,11,350},{5,11,47},{10,11,242},{138,11,579},{5,11,139},
+{7,11,1168},{138,11,539},{135,0,1319},{4,10,295},{4,10,723},{5,10,895},{7,10,
+1031},{8,10,199},{8,10,340},{9,10,153},{9,10,215},{10,10,21},{10,10,59},{10,10,
+80},{10,10,224},{10,10,838},{11,10,229},{11,10,652},{12,10,192},{13,10,146},{142
+,10,91},{140,0,428},{137,10,51},{133,0,514},{5,10,309},{140,10,211},{6,0,1010},{
+5,10,125},{8,10,77},{138,10,15},{4,0,55},{5,0,301},{6,0,571},{142,0,49},{146,0,
+102},{136,11,370},{4,11,107},{7,11,613},{8,11,358},{8,11,439},{8,11,504},{9,11,
+501},{10,11,383},{139,11,477},{132,11,229},{133,0,364},{133,10,439},{4,11,903},{
+135,11,1816},{11,0,379},{140,10,76},{4,0,76},{4,0,971},{7,0,1550},{9,0,306},{9,0
+,430},{9,0,663},{10,0,683},{10,0,921},{11,0,427},{11,0,753},{12,0,334},{12,0,442
+},{14,0,258},{14,0,366},{143,0,131},{137,0,52},{4,11,47},{6,11,373},{7,11,452},{
+7,11,543},{7,11,1714},{7,11,1856},{9,11,6},{11,11,257},{139,11,391},{4,10,8},{7,
+10,1152},{7,10,1153},{7,10,1715},{9,10,374},{10,10,478},{139,10,648},{4,11,785},
+{133,11,368},{135,10,1099},{135,11,860},{5,11,980},{134,11,1754},{134,0,1258},{6
+,0,1058},{6,0,1359},{7,11,536},{7,11,1331},{136,11,143},{4,0,656},{135,0,779},{
+136,10,87},{5,11,19},{6,11,533},{146,11,126},{7,0,144},{138,10,438},{5,11,395},{
+5,11,951},{134,11,1776},{135,0,1373},{7,0,554},{7,0,605},{141,0,10},{4,10,69},{5
+,10,122},{9,10,656},{138,10,464},{5,10,849},{134,10,1633},{5,0,838},{5,0,841},{
+134,0,1649},{133,0,1012},{139,10,499},{7,10,476},{7,10,1592},{138,10,87},{6,0,
+251},{7,0,365},{7,0,1357},{7,0,1497},{8,0,154},{141,0,281},{132,11,441},{132,11,
+695},{7,11,497},{9,11,387},{147,11,81},{133,0,340},{14,10,283},{142,11,283},{134
+,0,810},{135,11,1894},{139,0,495},{5,11,284},{6,11,49},{6,11,350},{7,11,1},{7,11
+,377},{7,11,1693},{8,11,18},{8,11,678},{9,11,161},{9,11,585},{9,11,671},{9,11,
+839},{11,11,912},{141,11,427},{5,10,859},{7,10,1160},{8,10,107},{9,10,291},{9,10
+,439},{10,10,663},{11,10,609},{140,10,197},{8,0,261},{9,0,144},{9,0,466},{10,0,
+370},{12,0,470},{13,0,144},{142,0,348},{137,0,897},{6,0,248},{9,0,546},{10,0,535
+},{11,0,681},{141,0,135},{4,0,358},{135,0,1496},{134,0,567},{136,0,445},{4,10,
+117},{6,10,372},{7,10,1905},{142,10,323},{4,10,722},{139,10,471},{6,0,697},{134,
+0,996},{7,11,2007},{9,11,101},{9,11,450},{10,11,66},{10,11,842},{11,11,536},{140
+,11,587},{132,0,577},{134,0,1336},{9,10,5},{12,10,216},{12,10,294},{12,10,298},{
+12,10,400},{12,10,518},{13,10,229},{143,10,139},{6,0,174},{138,0,917},{134,10,
+1774},{5,10,12},{7,10,375},{9,10,88},{9,10,438},{11,11,62},{139,10,270},{134,11,
+1766},{6,11,0},{7,11,84},{7,10,816},{7,10,1241},{9,10,283},{9,10,520},{10,10,213
+},{10,10,307},{10,10,463},{10,10,671},{10,10,746},{11,10,401},{11,10,794},{11,11
+,895},{12,10,517},{17,11,11},{18,10,107},{147,10,115},{5,0,878},{133,0,972},{6,
+11,1665},{7,11,256},{7,11,1388},{138,11,499},{4,10,258},{136,10,639},{4,11,22},{
+5,11,10},{6,10,22},{7,11,848},{7,10,903},{7,10,1963},{8,11,97},{138,10,577},{5,
+10,681},{136,10,782},{133,11,481},{132,0,351},{4,10,664},{5,10,804},{139,10,1013
+},{6,11,134},{7,11,437},{7,11,959},{9,11,37},{14,11,285},{14,11,371},{144,11,60}
+,{7,11,486},{8,11,155},{11,11,93},{140,11,164},{132,0,286},{7,0,438},{7,0,627},{
+7,0,1516},{8,0,40},{9,0,56},{9,0,294},{10,0,30},{11,0,969},{11,0,995},{146,0,148
+},{5,11,591},{135,11,337},{134,0,1950},{133,10,32},{138,11,500},{5,11,380},{5,11
+,650},{136,11,310},{4,11,364},{7,11,1156},{7,11,1187},{137,11,409},{4,0,738},{
+134,11,482},{4,11,781},{6,11,487},{7,11,926},{8,11,263},{139,11,500},{135,11,418
+},{6,0,2047},{10,0,969},{4,10,289},{7,10,629},{7,10,1698},{7,10,1711},{140,10,
+215},{6,10,450},{136,10,109},{134,0,818},{136,10,705},{133,0,866},{4,11,94},{135
+,11,1265},{132,11,417},{134,0,1467},{135,10,1238},{4,0,972},{6,0,1851},{134,0,
+1857},{134,0,355},{133,0,116},{132,0,457},{135,11,1411},{4,11,408},{4,11,741},{
+135,11,500},{134,10,26},{142,11,137},{5,0,527},{6,0,189},{7,0,859},{136,0,267},{
+11,0,104},{11,0,554},{15,0,60},{143,0,125},{134,0,1613},{4,10,414},{5,10,467},{9
+,10,654},{10,10,451},{12,10,59},{141,10,375},{135,10,17},{134,0,116},{135,11,541
+},{135,10,955},{6,11,73},{135,11,177},{133,11,576},{134,0,886},{133,0,487},{4,0,
+86},{5,0,667},{5,0,753},{6,0,316},{6,0,455},{135,0,946},{142,11,231},{150,0,45},
+{134,0,863},{134,0,1953},{6,10,280},{10,10,502},{11,10,344},{140,10,38},{4,0,79}
+,{7,0,1773},{10,0,450},{11,0,589},{13,0,332},{13,0,493},{14,0,183},{14,0,334},{
+14,0,362},{14,0,368},{14,0,376},{14,0,379},{19,0,90},{19,0,103},{19,0,127},{148,
+0,90},{5,10,45},{7,10,1161},{11,10,448},{11,10,880},{13,10,139},{13,10,407},{15,
+10,16},{17,10,95},{18,10,66},{18,10,88},{18,10,123},{149,10,7},{136,10,777},{4,
+10,410},{135,10,521},{135,10,1778},{135,11,538},{142,0,381},{133,11,413},{134,0,
+1142},{6,0,1189},{136,11,495},{5,0,663},{6,0,1962},{134,0,2003},{7,11,54},{8,11,
+312},{10,11,191},{10,11,614},{140,11,567},{132,10,436},{133,0,846},{10,0,528},{
+11,0,504},{7,10,1587},{135,10,1707},{5,0,378},{8,0,465},{9,0,286},{10,0,185},{10
+,0,562},{10,0,635},{11,0,31},{11,0,393},{13,0,312},{18,0,65},{18,0,96},{147,0,89
+},{7,0,899},{14,0,325},{6,11,468},{7,11,567},{7,11,1478},{8,11,530},{142,11,290}
+,{7,0,1880},{9,0,680},{139,0,798},{134,0,1770},{132,0,648},{150,11,35},{5,0,945}
+,{6,0,1656},{6,0,1787},{7,0,167},{8,0,824},{9,0,391},{10,0,375},{139,0,185},{6,
+11,484},{135,11,822},{134,0,2046},{7,0,1645},{8,0,352},{137,0,249},{132,0,152},{
+6,0,611},{135,0,1733},{6,11,1724},{135,11,2022},{133,0,1006},{141,11,96},{5,0,
+420},{135,0,1449},{146,11,149},{135,0,832},{135,10,663},{133,0,351},{5,0,40},{7,
+0,598},{7,0,1638},{8,0,78},{9,0,166},{9,0,640},{9,0,685},{9,0,773},{11,0,215},{
+13,0,65},{14,0,172},{14,0,317},{145,0,6},{8,0,60},{9,0,343},{139,0,769},{134,0,
+1354},{132,0,724},{137,0,745},{132,11,474},{7,0,1951},{8,0,765},{8,0,772},{140,0
+,671},{7,0,108},{8,0,219},{8,0,388},{9,0,775},{11,0,275},{140,0,464},{137,0,639}
+,{135,10,503},{133,11,366},{5,0,15},{6,0,56},{7,0,1758},{8,0,500},{9,0,730},{11,
+0,331},{13,0,150},{14,0,282},{5,11,305},{9,11,560},{141,11,208},{4,10,113},{5,10
+,163},{5,10,735},{7,10,1009},{9,10,9},{9,10,771},{12,10,90},{13,10,138},{13,10,
+410},{143,10,128},{4,10,324},{138,10,104},{135,11,466},{142,11,27},{134,0,1886},
+{5,0,205},{6,0,438},{9,0,711},{4,11,480},{6,11,167},{6,11,302},{6,11,1642},{7,11
+,130},{7,11,656},{7,11,837},{7,11,1547},{7,11,1657},{8,11,429},{9,11,228},{10,11
+,643},{13,11,289},{13,11,343},{147,11,101},{134,0,865},{6,0,2025},{136,0,965},{7
+,11,278},{10,11,739},{11,11,708},{141,11,348},{133,0,534},{135,11,1922},{137,0,
+691},{4,10,935},{133,10,823},{6,0,443},{9,0,237},{9,0,571},{9,0,695},{10,0,139},
+{11,0,715},{12,0,417},{141,0,421},{5,10,269},{7,10,434},{7,10,891},{8,10,339},{9
+,10,702},{11,10,594},{11,10,718},{145,10,100},{6,0,1555},{7,0,878},{9,10,485},{
+141,10,264},{134,10,1713},{7,10,1810},{11,10,866},{12,10,103},{141,10,495},{135,
+10,900},{6,0,1410},{9,11,316},{139,11,256},{4,0,995},{135,0,1033},{132,0,578},{
+10,0,881},{12,0,740},{12,0,743},{140,0,759},{132,0,822},{133,0,923},{142,10,143}
+,{135,11,1696},{6,11,363},{7,11,1955},{136,11,725},{132,0,924},{133,0,665},{135,
+10,2029},{135,0,1901},{4,0,265},{6,0,1092},{6,0,1417},{7,0,807},{135,0,950},{5,0
+,93},{12,0,267},{141,0,498},{135,0,1451},{5,11,813},{135,11,2046},{5,10,625},{
+135,10,1617},{135,0,747},{6,0,788},{137,0,828},{7,0,184},{11,0,307},{11,0,400},{
+15,0,130},{5,11,712},{7,11,1855},{8,10,425},{8,10,693},{9,10,720},{10,10,380},{
+10,10,638},{11,11,17},{11,10,473},{12,10,61},{13,11,321},{144,11,67},{135,0,198}
+,{6,11,320},{7,11,781},{7,11,1921},{9,11,55},{10,11,186},{10,11,273},{10,11,664}
+,{10,11,801},{11,11,996},{11,11,997},{13,11,157},{142,11,170},{136,11,271},{135,
+0,994},{7,11,103},{7,11,863},{11,11,184},{14,11,299},{145,11,62},{11,10,551},{
+142,10,159},{5,0,233},{5,0,320},{6,0,140},{8,0,295},{8,0,615},{136,11,615},{133,
+0,978},{4,0,905},{6,0,1701},{137,0,843},{132,10,168},{4,0,974},{8,0,850},{12,0,
+709},{12,0,768},{140,0,786},{135,10,91},{152,0,6},{138,10,532},{135,10,1884},{
+132,0,509},{6,0,1307},{135,0,273},{5,11,77},{7,11,1455},{10,11,843},{19,11,73},{
+150,11,5},{132,11,458},{135,11,1420},{6,11,109},{138,11,382},{6,0,201},{6,11,330
+},{7,10,70},{7,11,1084},{10,10,240},{11,11,142},{147,10,93},{7,0,1041},{140,11,
+328},{133,11,354},{134,0,1040},{133,0,693},{134,0,774},{139,0,234},{132,0,336},{
+7,0,1399},{139,10,392},{20,0,22},{148,11,22},{5,0,802},{7,0,2021},{136,0,805},{5
+,0,167},{5,0,899},{6,0,410},{137,0,777},{137,0,789},{134,0,1705},{7,10,655},{135
+,10,1844},{4,10,145},{6,10,176},{7,10,395},{137,10,562},{132,10,501},{135,0,10},
+{5,0,11},{6,0,117},{6,0,485},{7,0,1133},{9,0,582},{9,0,594},{10,0,82},{11,0,21},
+{11,0,818},{12,0,535},{13,0,86},{20,0,91},{23,0,13},{134,10,509},{4,0,264},{7,0,
+1067},{8,0,204},{8,0,385},{139,0,953},{139,11,737},{138,0,56},{134,0,1917},{133,
+0,470},{10,11,657},{14,11,297},{142,11,361},{135,11,412},{7,0,1198},{7,11,1198},
+{8,11,556},{14,11,123},{14,11,192},{143,11,27},{7,11,1985},{14,11,146},{15,11,42
+},{16,11,23},{17,11,86},{146,11,17},{11,0,1015},{136,11,122},{4,10,114},{9,10,
+492},{13,10,462},{142,10,215},{4,10,77},{5,10,361},{6,10,139},{6,10,401},{6,10,
+404},{7,10,413},{7,10,715},{7,10,1716},{11,10,279},{12,10,179},{12,10,258},{13,
+10,244},{142,10,358},{134,10,1717},{7,10,1061},{8,10,82},{11,10,250},{12,10,420}
+,{141,10,184},{133,0,715},{135,10,724},{9,0,919},{9,0,922},{9,0,927},{9,0,933},{
+9,0,962},{9,0,1000},{9,0,1002},{9,0,1021},{12,0,890},{12,0,907},{12,0,930},{15,0
+,207},{15,0,228},{15,0,238},{149,0,61},{8,0,794},{9,0,400},{10,0,298},{142,0,228
+},{5,11,430},{5,11,932},{6,11,131},{7,11,417},{9,11,522},{11,11,314},{141,11,390
+},{132,0,867},{8,0,724},{132,11,507},{137,11,261},{4,11,343},{133,11,511},{6,0,
+190},{7,0,768},{135,0,1170},{6,10,513},{135,10,1052},{7,11,455},{138,11,591},{
+134,0,1066},{137,10,899},{14,0,67},{147,0,60},{4,0,948},{18,0,174},{146,0,176},{
+135,0,1023},{7,10,1417},{12,10,382},{17,10,48},{152,10,12},{134,11,575},{132,0,
+764},{6,10,545},{7,10,565},{7,10,1669},{10,10,114},{11,10,642},{140,10,618},{6,0
+,137},{9,0,75},{9,0,253},{10,0,194},{138,0,444},{4,0,756},{133,10,5},{8,0,1008},
+{135,10,192},{132,0,842},{11,0,643},{12,0,115},{136,10,763},{139,0,67},{133,10,
+759},{4,0,821},{5,0,760},{7,0,542},{8,0,135},{8,0,496},{135,11,580},{7,10,370},{
+7,10,1007},{7,10,1177},{135,10,1565},{135,10,1237},{140,0,736},{7,0,319},{7,0,
+355},{7,0,763},{10,0,389},{145,0,43},{8,11,333},{138,11,182},{4,10,87},{5,10,250
+},{141,10,298},{138,0,786},{134,0,2044},{8,11,330},{140,11,477},{135,11,1338},{
+132,11,125},{134,0,1030},{134,0,1083},{132,11,721},{135,10,814},{7,11,776},{8,11
+,145},{147,11,56},{134,0,1226},{4,10,57},{7,10,1195},{7,10,1438},{7,10,1548},{7,
+10,1835},{7,10,1904},{9,10,757},{10,10,604},{139,10,519},{7,11,792},{8,11,147},{
+10,11,821},{139,11,1021},{137,11,797},{4,0,58},{5,0,286},{6,0,319},{7,0,402},{7,
+0,1254},{7,0,1903},{8,0,356},{140,0,408},{4,0,389},{4,0,815},{9,0,181},{9,0,255}
+,{10,0,8},{10,0,29},{10,0,816},{11,0,311},{11,0,561},{12,0,67},{141,0,181},{7,11
+,1472},{135,11,1554},{7,11,1071},{7,11,1541},{7,11,1767},{7,11,1806},{7,11,1999}
+,{9,11,248},{10,11,400},{11,11,162},{11,11,178},{11,11,242},{12,11,605},{15,11,
+26},{144,11,44},{5,11,168},{5,11,930},{8,11,74},{9,11,623},{12,11,500},{12,11,
+579},{13,11,41},{143,11,93},{6,11,220},{7,11,1101},{141,11,105},{5,0,474},{7,0,
+507},{4,10,209},{7,11,507},{135,10,902},{132,0,427},{6,0,413},{7,10,335},{7,10,
+1437},{7,10,1668},{8,10,553},{8,10,652},{8,10,656},{9,10,558},{11,10,743},{149,
+10,18},{132,0,730},{6,11,19},{7,11,1413},{139,11,428},{133,0,373},{132,10,559},{
+7,11,96},{8,11,401},{137,11,896},{7,0,799},{7,0,1972},{5,10,1017},{138,10,511},{
+135,0,1793},{7,11,1961},{7,11,1965},{8,11,702},{136,11,750},{8,11,150},{8,11,737
+},{140,11,366},{132,0,322},{133,10,709},{8,11,800},{9,11,148},{9,11,872},{9,11,
+890},{11,11,309},{11,11,1001},{13,11,267},{141,11,323},{134,10,1745},{7,0,290},{
+136,10,206},{7,0,1651},{145,0,89},{139,0,2},{132,0,672},{6,0,1860},{8,0,905},{10
+,0,844},{10,0,846},{10,0,858},{12,0,699},{12,0,746},{140,0,772},{135,11,424},{
+133,11,547},{133,0,737},{5,11,490},{6,11,615},{6,11,620},{135,11,683},{6,0,746},
+{134,0,1612},{132,10,776},{9,11,385},{149,11,17},{133,0,145},{135,10,1272},{7,0,
+884},{140,0,124},{4,0,387},{135,0,1288},{5,11,133},{136,10,406},{136,11,187},{6,
+0,679},{8,11,8},{138,11,0},{135,0,550},{135,11,798},{136,11,685},{7,11,1086},{
+145,11,46},{8,10,175},{10,10,168},{138,10,573},{135,0,1305},{4,0,576},{135,0,
+1263},{6,0,686},{134,0,1563},{134,0,607},{5,0,919},{134,0,1673},{148,0,37},{8,11
+,774},{10,11,670},{140,11,51},{133,10,784},{139,10,882},{4,0,82},{5,0,333},{5,0,
+904},{6,0,207},{7,0,325},{7,0,1726},{8,0,101},{10,0,778},{139,0,220},{135,11,371
+},{132,0,958},{133,0,903},{4,11,127},{5,11,350},{6,11,356},{8,11,426},{9,11,572}
+,{10,11,247},{139,11,312},{140,0,147},{6,11,59},{7,11,885},{9,11,603},{141,11,
+397},{10,0,367},{9,10,14},{9,10,441},{139,10,9},{11,10,966},{12,10,287},{13,10,
+342},{13,10,402},{15,10,110},{143,10,163},{134,0,690},{132,0,705},{9,0,651},{11,
+0,971},{13,0,273},{7,10,1428},{7,10,1640},{7,10,1867},{9,10,169},{9,10,182},{9,
+10,367},{9,10,478},{9,10,506},{9,10,551},{9,10,557},{9,10,648},{9,10,697},{9,10,
+705},{9,10,725},{9,10,787},{9,10,794},{10,10,198},{10,10,214},{10,10,267},{10,10
+,275},{10,10,456},{10,10,551},{10,10,561},{10,10,613},{10,10,627},{10,10,668},{
+10,10,675},{10,10,691},{10,10,695},{10,10,707},{10,10,715},{11,10,183},{11,10,
+201},{11,10,262},{11,10,352},{11,10,439},{11,10,493},{11,10,572},{11,10,591},{11
+,10,608},{11,10,611},{11,10,646},{11,10,674},{11,10,711},{11,10,751},{11,10,761}
+,{11,10,776},{11,10,785},{11,10,850},{11,10,853},{11,10,862},{11,10,865},{11,10,
+868},{11,10,875},{11,10,898},{11,10,902},{11,10,903},{11,10,910},{11,10,932},{11
+,10,942},{11,10,957},{11,10,967},{11,10,972},{12,10,148},{12,10,195},{12,10,220}
+,{12,10,237},{12,10,318},{12,10,339},{12,10,393},{12,10,445},{12,10,450},{12,10,
+474},{12,10,505},{12,10,509},{12,10,533},{12,10,591},{12,10,594},{12,10,597},{12
+,10,621},{12,10,633},{12,10,642},{13,10,59},{13,10,60},{13,10,145},{13,10,239},{
+13,10,250},{13,10,329},{13,10,344},{13,10,365},{13,10,372},{13,10,387},{13,10,
+403},{13,10,414},{13,10,456},{13,10,470},{13,10,478},{13,10,483},{13,10,489},{14
+,10,55},{14,10,57},{14,10,81},{14,10,90},{14,10,148},{14,10,239},{14,10,266},{14
+,10,321},{14,10,326},{14,10,327},{14,10,330},{14,10,347},{14,10,355},{14,10,401}
+,{14,10,404},{14,10,411},{14,10,414},{14,10,416},{14,10,420},{15,10,61},{15,10,
+74},{15,10,87},{15,10,88},{15,10,94},{15,10,96},{15,10,116},{15,10,149},{15,10,
+154},{16,10,50},{16,10,63},{16,10,73},{17,10,2},{17,10,66},{17,10,92},{17,10,103
+},{17,10,112},{17,10,120},{18,10,50},{18,10,54},{18,10,82},{18,10,86},{18,10,90}
+,{18,10,111},{18,10,115},{18,10,156},{19,10,40},{19,10,79},{20,10,78},{149,10,22
+},{7,0,887},{5,10,161},{135,10,839},{142,11,98},{134,0,90},{138,11,356},{135,11,
+441},{6,11,111},{7,11,4},{8,11,163},{8,11,776},{138,11,566},{134,0,908},{134,0,
+1261},{7,0,813},{12,0,497},{141,0,56},{134,0,1235},{135,0,429},{135,11,1994},{
+138,0,904},{6,0,125},{7,0,1277},{137,0,772},{151,0,12},{4,0,841},{5,0,386},{133,
+11,386},{5,11,297},{135,11,1038},{6,0,860},{6,0,1069},{135,11,309},{136,0,946},{
+135,10,1814},{141,11,418},{136,11,363},{10,0,768},{139,0,787},{22,11,30},{150,11
+,33},{6,0,160},{7,0,1106},{9,0,770},{11,0,112},{140,0,413},{11,11,216},{139,11,
+340},{136,10,139},{135,11,1390},{135,11,808},{132,11,280},{12,0,271},{17,0,109},
+{7,10,643},{136,10,236},{140,11,54},{4,11,421},{133,11,548},{11,0,719},{12,0,36}
+,{141,0,337},{7,0,581},{9,0,644},{137,0,699},{11,11,511},{13,11,394},{14,11,298}
+,{14,11,318},{146,11,103},{7,0,304},{9,0,646},{9,0,862},{11,0,696},{12,0,208},{
+15,0,79},{147,0,108},{4,0,631},{7,0,1126},{135,0,1536},{135,11,1527},{8,0,880},{
+10,0,869},{138,0,913},{7,0,1513},{5,10,54},{6,11,254},{9,11,109},{138,11,103},{
+135,0,981},{133,11,729},{132,10,744},{132,0,434},{134,0,550},{7,0,930},{10,0,476
+},{13,0,452},{19,0,104},{6,11,1630},{10,10,402},{146,10,55},{5,0,553},{138,0,824
+},{136,0,452},{8,0,151},{137,10,624},{132,10,572},{132,0,772},{133,11,671},{133,
+0,292},{138,0,135},{132,11,889},{140,11,207},{9,0,504},{6,10,43},{7,10,38},{8,10
+,248},{138,10,513},{6,0,1089},{135,11,1910},{4,11,627},{133,11,775},{135,0,783},
+{133,10,766},{133,10,363},{7,0,387},{135,11,387},{7,0,393},{10,0,603},{11,0,206}
+,{7,11,202},{11,11,362},{11,11,948},{140,11,388},{6,11,507},{7,11,451},{8,11,389
+},{12,11,490},{13,11,16},{13,11,215},{13,11,351},{18,11,132},{147,11,125},{4,0,
+912},{9,0,232},{135,11,841},{6,10,258},{140,10,409},{5,10,249},{148,10,82},{136,
+11,566},{6,0,977},{135,11,1214},{7,0,1973},{136,0,716},{135,0,98},{133,0,733},{5
+,11,912},{134,11,1695},{5,10,393},{6,10,378},{7,10,1981},{9,10,32},{9,10,591},{
+10,10,685},{10,10,741},{142,10,382},{133,10,788},{10,0,19},{11,0,911},{7,10,1968
+},{141,10,509},{5,0,668},{5,11,236},{6,11,572},{8,11,492},{11,11,618},{144,11,56
+},{135,11,1789},{4,0,360},{5,0,635},{5,0,700},{5,10,58},{5,10,171},{5,10,683},{6
+,10,291},{6,10,566},{7,10,1650},{11,10,523},{12,10,273},{12,10,303},{15,10,39},{
+143,10,111},{133,0,901},{134,10,589},{5,11,190},{136,11,318},{140,0,656},{7,0,
+726},{152,0,9},{4,10,917},{133,10,1005},{135,10,1598},{134,11,491},{4,10,919},{
+133,11,434},{137,0,72},{6,0,1269},{6,0,1566},{134,0,1621},{9,0,463},{10,0,595},{
+4,10,255},{5,10,302},{6,10,132},{7,10,128},{7,10,283},{7,10,1299},{10,10,52},{10
+,10,514},{11,10,925},{13,10,92},{142,10,309},{135,0,1454},{134,0,1287},{11,0,600
+},{13,0,245},{137,10,173},{136,0,989},{7,0,164},{7,0,1571},{9,0,107},{140,0,225}
+,{6,0,1061},{141,10,442},{4,0,27},{5,0,484},{5,0,510},{6,0,434},{7,0,1000},{7,0,
+1098},{136,0,2},{7,11,85},{7,11,247},{8,11,585},{10,11,163},{138,11,316},{11,11,
+103},{142,11,0},{134,0,1127},{4,0,460},{134,0,852},{134,10,210},{4,0,932},{133,0
+,891},{6,0,588},{147,11,83},{8,0,625},{4,10,284},{134,10,223},{134,0,76},{8,0,92
+},{137,0,221},{4,11,124},{10,11,457},{11,11,121},{11,11,169},{11,11,422},{11,11,
+870},{12,11,214},{13,11,389},{14,11,187},{143,11,77},{9,11,618},{138,11,482},{4,
+10,218},{7,10,526},{143,10,137},{13,0,9},{14,0,104},{14,0,311},{4,10,270},{5,10,
+192},{6,10,332},{135,10,1322},{140,10,661},{135,11,1193},{6,11,107},{7,11,638},{
+7,11,1632},{137,11,396},{132,0,763},{4,0,622},{5,11,370},{134,11,1756},{133,0,
+253},{135,0,546},{9,0,73},{10,0,110},{14,0,185},{17,0,119},{133,11,204},{7,0,624
+},{7,0,916},{10,0,256},{139,0,87},{7,10,379},{8,10,481},{137,10,377},{5,0,212},{
+12,0,35},{13,0,382},{5,11,970},{134,11,1706},{9,0,746},{5,10,1003},{134,10,149},
+{10,0,150},{11,0,849},{13,0,330},{8,10,262},{9,10,627},{11,10,214},{11,10,404},{
+11,10,457},{11,10,780},{11,10,913},{13,10,401},{142,10,200},{134,0,1466},{135,11
+,3},{6,0,1299},{4,11,35},{5,11,121},{5,11,483},{5,11,685},{6,11,489},{7,11,1204}
+,{136,11,394},{135,10,742},{4,10,142},{136,10,304},{4,11,921},{133,11,1007},{134
+,0,1518},{6,0,1229},{135,0,1175},{133,0,816},{12,0,159},{4,10,471},{4,11,712},{5
+,10,51},{6,10,602},{7,10,925},{8,10,484},{138,10,195},{134,11,1629},{5,0,869},{5
+,0,968},{6,0,1626},{8,0,734},{136,0,784},{4,0,542},{6,0,1716},{6,0,1727},{7,0,
+1082},{7,0,1545},{8,0,56},{8,0,118},{8,0,412},{8,0,564},{9,0,888},{9,0,908},{10,
+0,50},{10,0,423},{11,0,685},{11,0,697},{11,0,933},{12,0,299},{13,0,126},{13,0,
+136},{13,0,170},{13,0,190},{136,10,688},{132,10,697},{4,0,232},{9,0,202},{10,0,
+474},{140,0,433},{136,0,212},{6,0,108},{7,0,1003},{7,0,1181},{8,0,111},{136,0,
+343},{5,10,221},{135,11,1255},{133,11,485},{134,0,1712},{142,0,216},{5,0,643},{6
+,0,516},{4,11,285},{5,11,317},{6,11,301},{7,11,7},{8,11,153},{10,11,766},{11,11,
+468},{12,11,467},{141,11,143},{4,0,133},{7,0,711},{7,0,1298},{135,0,1585},{134,0
+,650},{135,11,512},{6,0,99},{7,0,1808},{145,0,57},{6,0,246},{6,0,574},{7,0,428},
+{9,0,793},{10,0,669},{11,0,485},{11,0,840},{12,0,300},{14,0,250},{145,0,55},{4,
+10,132},{5,10,69},{135,10,1242},{136,0,1023},{7,0,302},{132,10,111},{135,0,1871}
+,{132,0,728},{9,0,252},{132,10,767},{6,0,461},{7,0,1590},{7,10,1416},{7,10,2005}
+,{8,10,131},{8,10,466},{9,10,672},{13,10,252},{148,10,103},{6,0,323},{135,0,1564
+},{7,0,461},{136,0,775},{6,10,44},{136,10,368},{139,0,172},{132,0,464},{4,10,570
+},{133,10,120},{137,11,269},{6,10,227},{135,10,1589},{6,11,1719},{6,11,1735},{7,
+11,2016},{7,11,2020},{8,11,837},{137,11,852},{7,0,727},{146,0,73},{132,0,1023},{
+135,11,852},{135,10,1529},{136,0,577},{138,11,568},{134,0,1037},{8,11,67},{138,
+11,419},{4,0,413},{5,0,677},{8,0,432},{140,0,280},{10,0,600},{6,10,1667},{7,11,
+967},{7,10,2036},{141,11,11},{6,10,511},{140,10,132},{6,0,799},{5,10,568},{6,10,
+138},{135,10,1293},{8,0,159},{4,10,565},{136,10,827},{7,0,646},{7,0,1730},{11,0,
+446},{141,0,178},{4,10,922},{133,10,1023},{135,11,11},{132,0,395},{11,0,145},{
+135,10,1002},{9,0,174},{10,0,164},{11,0,440},{11,0,514},{11,0,841},{15,0,98},{
+149,0,20},{134,0,426},{10,0,608},{139,0,1002},{7,11,320},{8,11,51},{12,11,481},{
+12,11,570},{148,11,106},{9,0,977},{9,0,983},{132,11,445},{138,0,250},{139,0,100}
+,{6,0,1982},{136,10,402},{133,11,239},{4,10,716},{141,10,31},{5,0,476},{7,11,83}
+,{7,11,1990},{8,11,130},{139,11,720},{8,10,691},{136,10,731},{5,11,123},{6,11,
+530},{7,11,348},{135,11,1419},{5,0,76},{6,0,458},{6,0,497},{7,0,868},{9,0,658},{
+10,0,594},{11,0,173},{11,0,566},{12,0,20},{12,0,338},{141,0,200},{9,11,139},{10,
+11,399},{11,11,469},{12,11,634},{141,11,223},{9,10,840},{138,10,803},{133,10,847
+},{11,11,223},{140,11,168},{132,11,210},{8,0,447},{9,10,53},{9,10,268},{9,10,901
+},{10,10,518},{10,10,829},{11,10,188},{13,10,74},{14,10,46},{15,10,17},{15,10,33
+},{17,10,40},{18,10,36},{19,10,20},{22,10,1},{152,10,2},{4,0,526},{7,0,1029},{
+135,0,1054},{19,11,59},{150,11,2},{4,0,636},{6,0,1875},{6,0,1920},{9,0,999},{12,
+0,807},{12,0,825},{15,0,179},{15,0,190},{18,0,182},{136,10,532},{6,0,1699},{7,0,
+660},{7,0,1124},{17,0,31},{19,0,22},{151,0,14},{135,10,681},{132,11,430},{140,10
+,677},{4,10,684},{136,10,384},{132,11,756},{133,11,213},{7,0,188},{7,10,110},{8,
+10,290},{8,10,591},{9,10,382},{9,10,649},{11,10,71},{11,10,155},{11,10,313},{12,
+10,5},{13,10,325},{142,10,287},{7,10,360},{7,10,425},{9,10,66},{9,10,278},{138,
+10,644},{142,11,164},{4,0,279},{7,0,301},{137,0,362},{134,11,586},{135,0,1743},{
+4,0,178},{133,0,399},{4,10,900},{133,10,861},{5,10,254},{7,10,985},{136,10,73},{
+133,11,108},{7,10,1959},{136,10,683},{133,11,219},{4,11,193},{5,11,916},{7,11,
+364},{10,11,398},{10,11,726},{11,11,317},{11,11,626},{12,11,142},{12,11,288},{12
+,11,678},{13,11,313},{15,11,113},{18,11,114},{21,11,30},{150,11,53},{6,11,241},{
+7,11,907},{8,11,832},{9,11,342},{10,11,729},{11,11,284},{11,11,445},{11,11,651},
+{11,11,863},{13,11,398},{146,11,99},{132,0,872},{134,0,831},{134,0,1692},{6,0,
+202},{6,0,1006},{9,0,832},{10,0,636},{11,0,208},{12,0,360},{17,0,118},{18,0,27},
+{20,0,67},{137,11,734},{132,10,725},{7,11,993},{138,11,666},{134,0,1954},{134,10
+,196},{7,0,872},{10,0,516},{139,0,167},{133,10,831},{4,11,562},{9,11,254},{139,
+11,879},{137,0,313},{4,0,224},{132,11,786},{11,0,24},{12,0,170},{136,10,723},{5,
+0,546},{7,0,35},{8,0,11},{8,0,12},{9,0,315},{9,0,533},{10,0,802},{11,0,166},{12,
+0,525},{142,0,243},{7,0,1937},{13,10,80},{13,10,437},{145,10,74},{5,0,241},{8,0,
+242},{9,0,451},{10,0,667},{11,0,598},{140,0,429},{150,0,46},{6,0,1273},{137,0,
+830},{5,10,848},{6,10,66},{136,10,764},{6,0,825},{134,0,993},{4,0,1006},{10,0,
+327},{13,0,271},{4,10,36},{7,10,1387},{139,10,755},{134,0,1023},{135,0,1580},{4,
+0,366},{137,0,516},{132,10,887},{6,0,1736},{135,0,1891},{6,11,216},{7,11,901},{7
+,11,1343},{136,11,493},{6,10,165},{138,10,388},{7,11,341},{139,11,219},{4,10,719
+},{135,10,155},{134,0,1935},{132,0,826},{6,0,331},{6,0,1605},{8,0,623},{11,0,139
+},{139,0,171},{135,11,1734},{10,11,115},{11,11,420},{12,11,154},{13,11,404},{14,
+11,346},{15,11,54},{143,11,112},{7,0,288},{4,10,353},{6,10,146},{6,10,1789},{7,
+10,990},{7,10,1348},{9,10,665},{9,10,898},{11,10,893},{142,10,212},{6,0,916},{
+134,0,1592},{7,0,1888},{4,10,45},{135,10,1257},{5,11,1011},{136,11,701},{139,11,
+596},{4,11,54},{5,11,666},{7,11,1039},{7,11,1130},{9,11,195},{138,11,302},{134,0
+,1471},{134,0,1570},{132,0,394},{140,10,65},{136,10,816},{135,0,1931},{7,0,574},
+{135,0,1719},{134,11,467},{132,0,658},{9,0,781},{10,0,144},{11,0,385},{13,0,161}
+,{13,0,228},{13,0,268},{20,0,107},{134,11,1669},{136,0,374},{135,0,735},{4,0,344
+},{6,0,498},{139,0,323},{7,0,586},{7,0,1063},{6,10,559},{134,10,1691},{137,0,155
+},{133,0,906},{7,11,122},{9,11,259},{10,11,84},{11,11,470},{12,11,541},{141,11,
+379},{134,0,1139},{10,0,108},{139,0,116},{134,10,456},{133,10,925},{5,11,82},{5,
+11,131},{7,11,1755},{8,11,31},{9,11,168},{9,11,764},{139,11,869},{134,11,605},{5
+,11,278},{137,11,68},{4,11,163},{5,11,201},{5,11,307},{5,11,310},{6,11,335},{7,
+11,284},{136,11,165},{135,11,1660},{6,11,33},{135,11,1244},{4,0,616},{136,11,483
+},{8,0,857},{8,0,902},{8,0,910},{10,0,879},{12,0,726},{4,11,199},{139,11,34},{
+136,0,692},{6,10,193},{7,10,240},{7,10,1682},{10,10,51},{10,10,640},{11,10,410},
+{13,10,82},{14,10,247},{14,10,331},{142,10,377},{6,0,823},{134,0,983},{139,10,
+411},{132,0,305},{136,10,633},{138,11,203},{134,0,681},{6,11,326},{7,11,677},{
+137,11,425},{5,0,214},{7,0,603},{8,0,611},{9,0,686},{10,0,88},{11,0,459},{11,0,
+496},{12,0,463},{12,0,590},{141,0,0},{136,0,1004},{142,0,23},{134,0,1703},{147,
+11,8},{145,11,56},{135,0,1443},{4,10,237},{135,10,514},{6,0,714},{145,0,19},{5,
+11,358},{7,11,473},{7,11,1184},{10,11,662},{13,11,212},{13,11,304},{13,11,333},{
+145,11,98},{4,0,737},{10,0,98},{11,0,294},{12,0,60},{12,0,437},{13,0,64},{13,0,
+380},{142,0,430},{6,10,392},{7,10,65},{135,10,2019},{6,0,1758},{8,0,520},{9,0,
+345},{9,0,403},{142,0,350},{5,0,47},{10,0,242},{138,0,579},{5,0,139},{7,0,1168},
+{138,0,539},{134,0,1459},{13,0,388},{141,11,388},{134,0,253},{7,10,1260},{135,10
+,1790},{10,0,252},{9,10,222},{139,10,900},{140,0,745},{133,11,946},{4,0,107},{7,
+0,613},{8,0,439},{8,0,504},{9,0,501},{10,0,383},{139,0,477},{135,11,1485},{132,0
+,871},{7,11,411},{7,11,590},{8,11,631},{9,11,323},{10,11,355},{11,11,491},{12,11
+,143},{12,11,402},{13,11,73},{14,11,408},{15,11,107},{146,11,71},{132,0,229},{
+132,0,903},{140,0,71},{133,0,549},{4,0,47},{6,0,373},{7,0,452},{7,0,543},{7,0,
+1828},{7,0,1856},{9,0,6},{11,0,257},{139,0,391},{7,11,1467},{8,11,328},{10,11,
+544},{11,11,955},{13,11,320},{145,11,83},{5,0,980},{134,0,1754},{136,0,865},{5,0
+,705},{137,0,606},{7,0,161},{8,10,201},{136,10,605},{143,11,35},{5,11,835},{6,11
+,483},{140,10,224},{7,0,536},{7,0,1331},{136,0,143},{134,0,1388},{5,0,724},{10,0
+,305},{11,0,151},{12,0,33},{12,0,121},{12,0,381},{17,0,3},{17,0,27},{17,0,78},{
+18,0,18},{19,0,54},{149,0,5},{4,10,523},{133,10,638},{5,0,19},{134,0,533},{5,0,
+395},{5,0,951},{134,0,1776},{135,0,1908},{132,0,846},{10,0,74},{11,0,663},{12,0,
+210},{13,0,166},{13,0,310},{14,0,373},{18,0,95},{19,0,43},{6,10,242},{7,10,227},
+{7,10,1581},{8,10,104},{9,10,113},{9,10,220},{9,10,427},{10,10,239},{11,10,579},
+{11,10,1023},{13,10,4},{13,10,204},{13,10,316},{148,10,86},{9,11,716},{11,11,108
+},{13,11,123},{14,11,252},{19,11,38},{21,11,3},{151,11,11},{8,0,372},{9,0,122},{
+138,0,175},{132,11,677},{7,11,1374},{136,11,540},{135,10,861},{132,0,695},{7,0,
+497},{9,0,387},{147,0,81},{136,0,937},{134,0,718},{7,0,1328},{136,10,494},{132,
+11,331},{6,0,1581},{133,11,747},{5,0,284},{6,0,49},{6,0,350},{7,0,1},{7,0,377},{
+7,0,1693},{8,0,18},{8,0,678},{9,0,161},{9,0,585},{9,0,671},{9,0,839},{11,0,912},
+{141,0,427},{7,10,1306},{8,10,505},{9,10,482},{10,10,126},{11,10,225},{12,10,347
+},{12,10,449},{13,10,19},{14,10,218},{142,10,435},{10,10,764},{12,10,120},{13,10
+,39},{145,10,127},{4,0,597},{133,10,268},{134,0,1094},{4,0,1008},{134,0,1973},{
+132,0,811},{139,0,908},{135,0,1471},{133,11,326},{4,10,384},{135,10,1022},{7,0,
+1935},{8,0,324},{12,0,42},{4,11,691},{7,11,1935},{8,11,324},{9,11,35},{10,11,680
+},{11,11,364},{12,11,42},{13,11,357},{146,11,16},{135,0,2014},{7,0,2007},{9,0,
+101},{9,0,450},{10,0,66},{10,0,842},{11,0,536},{12,0,587},{6,11,32},{7,11,385},{
+7,11,757},{7,11,1916},{8,11,37},{8,11,94},{8,11,711},{9,11,541},{10,11,162},{10,
+11,795},{11,11,989},{11,11,1010},{12,11,14},{142,11,308},{139,0,586},{135,10,
+1703},{7,0,1077},{11,0,28},{9,10,159},{140,10,603},{6,0,1221},{136,10,583},{6,11
+,152},{6,11,349},{6,11,1682},{7,11,1252},{8,11,112},{9,11,435},{9,11,668},{10,11
+,290},{10,11,319},{10,11,815},{11,11,180},{11,11,837},{12,11,240},{13,11,152},{
+13,11,219},{142,11,158},{139,0,62},{132,10,515},{8,10,632},{8,10,697},{137,10,
+854},{134,0,1766},{132,11,581},{6,11,126},{7,11,573},{8,11,397},{142,11,44},{150
+,0,28},{11,0,670},{22,0,25},{4,10,136},{133,10,551},{6,0,1665},{7,0,256},{7,0,
+1388},{138,0,499},{4,0,22},{5,0,10},{7,0,1576},{136,0,97},{134,10,1782},{5,0,481
+},{7,10,1287},{9,10,44},{10,10,552},{10,10,642},{11,10,839},{12,10,274},{12,10,
+275},{12,10,372},{13,10,91},{142,10,125},{133,11,926},{7,11,1232},{137,11,531},{
+6,0,134},{7,0,437},{7,0,1824},{9,0,37},{14,0,285},{142,0,371},{7,0,486},{8,0,155
+},{11,0,93},{140,0,164},{6,0,1391},{134,0,1442},{133,11,670},{133,0,591},{6,10,
+147},{7,10,886},{7,11,1957},{9,10,753},{138,10,268},{5,0,380},{5,0,650},{7,0,
+1173},{136,0,310},{4,0,364},{7,0,1156},{7,0,1187},{137,0,409},{135,11,1621},{134
+,0,482},{133,11,506},{4,0,781},{6,0,487},{7,0,926},{8,0,263},{139,0,500},{138,10
+,137},{135,11,242},{139,11,96},{133,10,414},{135,10,1762},{134,0,804},{5,11,834}
+,{7,11,1202},{8,11,14},{9,11,481},{137,11,880},{134,10,599},{4,0,94},{135,0,1265
+},{4,0,415},{132,0,417},{5,0,348},{6,0,522},{6,10,1749},{7,11,1526},{138,11,465}
+,{134,10,1627},{132,0,1012},{132,10,488},{4,11,357},{6,11,172},{7,11,143},{137,
+11,413},{4,10,83},{4,11,590},{146,11,76},{140,10,676},{7,11,287},{8,11,355},{9,
+11,293},{137,11,743},{134,10,278},{6,0,1803},{18,0,165},{24,0,21},{5,11,169},{7,
+11,333},{136,11,45},{12,10,97},{140,11,97},{4,0,408},{4,0,741},{135,0,500},{132,
+11,198},{7,10,388},{7,10,644},{139,10,781},{4,11,24},{5,11,140},{5,11,185},{7,11
+,1500},{11,11,565},{139,11,838},{6,0,1321},{9,0,257},{7,10,229},{8,10,59},{9,10,
+190},{10,10,378},{140,10,191},{4,11,334},{133,11,593},{135,11,1885},{134,0,1138}
+,{4,0,249},{6,0,73},{135,0,177},{133,0,576},{142,0,231},{137,0,288},{132,10,660}
+,{7,10,1035},{138,10,737},{135,0,1487},{6,0,989},{9,0,433},{7,10,690},{9,10,587}
+,{140,10,521},{7,0,1264},{7,0,1678},{11,0,945},{12,0,341},{12,0,471},{140,0,569}
+,{132,11,709},{133,11,897},{5,11,224},{13,11,174},{146,11,52},{135,11,1840},{134
+,10,1744},{12,0,87},{16,0,74},{4,10,733},{9,10,194},{10,10,92},{11,10,198},{12,
+10,84},{141,10,128},{140,0,779},{135,0,538},{4,11,608},{133,11,497},{133,0,413},
+{7,11,1375},{7,11,1466},{138,11,331},{136,0,495},{6,11,540},{136,11,136},{7,0,54
+},{8,0,312},{10,0,191},{10,0,614},{140,0,567},{6,0,468},{7,0,567},{7,0,1478},{8,
+0,530},{14,0,290},{133,11,999},{4,11,299},{7,10,306},{135,11,1004},{142,11,296},
+{134,0,1484},{133,10,979},{6,0,609},{9,0,815},{12,11,137},{14,11,9},{14,11,24},{
+142,11,64},{133,11,456},{6,0,484},{135,0,822},{133,10,178},{136,11,180},{132,11,
+755},{137,0,900},{135,0,1335},{6,0,1724},{135,0,2022},{135,11,1139},{5,0,640},{
+132,10,390},{6,0,1831},{138,11,633},{135,11,566},{4,11,890},{5,11,805},{5,11,819
+},{5,11,961},{6,11,396},{6,11,1631},{6,11,1678},{7,11,1967},{7,11,2041},{9,11,
+630},{11,11,8},{11,11,1019},{12,11,176},{13,11,225},{14,11,292},{149,11,24},{132
+,0,474},{134,0,1103},{135,0,1504},{134,0,1576},{6,0,961},{6,0,1034},{140,0,655},
+{11,11,514},{149,11,20},{5,0,305},{135,11,1815},{7,11,1505},{10,11,190},{10,11,
+634},{11,11,792},{12,11,358},{140,11,447},{5,11,0},{6,11,536},{7,11,604},{13,11,
+445},{145,11,126},{7,0,1236},{133,10,105},{4,0,480},{6,0,217},{6,0,302},{6,0,
+1642},{7,0,130},{7,0,837},{7,0,1321},{7,0,1547},{7,0,1657},{8,0,429},{9,0,228},{
+13,0,289},{13,0,343},{19,0,101},{6,11,232},{6,11,412},{7,11,1074},{8,11,9},{8,11
+,157},{8,11,786},{9,11,196},{9,11,352},{9,11,457},{10,11,337},{11,11,232},{11,11
+,877},{12,11,480},{140,11,546},{5,10,438},{7,11,958},{9,10,694},{12,10,627},{13,
+11,38},{141,10,210},{4,11,382},{136,11,579},{7,0,278},{10,0,739},{11,0,708},{141
+,0,348},{4,11,212},{135,11,1206},{135,11,1898},{6,0,708},{6,0,1344},{152,10,11},
+{137,11,768},{134,0,1840},{140,0,233},{8,10,25},{138,10,826},{6,0,2017},{133,11,
+655},{6,0,1488},{139,11,290},{132,10,308},{134,0,1590},{134,0,1800},{134,0,1259}
+,{16,0,28},{6,11,231},{7,11,95},{136,11,423},{133,11,300},{135,10,150},{136,10,
+649},{7,11,1874},{137,11,641},{6,11,237},{7,11,611},{8,11,100},{9,11,416},{11,11
+,335},{12,11,173},{146,11,101},{137,0,45},{134,10,521},{17,0,36},{14,11,26},{146
+,11,150},{7,0,1442},{14,0,22},{5,10,339},{15,10,41},{15,10,166},{147,10,66},{8,0
+,378},{6,11,581},{135,11,1119},{134,0,1507},{147,11,117},{139,0,39},{134,0,1054}
+,{6,0,363},{7,0,1955},{136,0,725},{134,0,2036},{133,11,199},{6,0,1871},{9,0,935}
+,{9,0,961},{9,0,1004},{9,0,1016},{12,0,805},{12,0,852},{12,0,853},{12,0,869},{12
+,0,882},{12,0,896},{12,0,906},{12,0,917},{12,0,940},{15,0,170},{15,0,176},{15,0,
+188},{15,0,201},{15,0,205},{15,0,212},{15,0,234},{15,0,244},{18,0,181},{18,0,193
+},{18,0,196},{18,0,201},{18,0,202},{18,0,210},{18,0,217},{18,0,235},{18,0,236},{
+18,0,237},{21,0,54},{21,0,55},{21,0,58},{21,0,59},{152,0,22},{134,10,1628},{137,
+0,805},{5,0,813},{135,0,2046},{142,11,42},{5,0,712},{6,0,1240},{11,0,17},{13,0,
+321},{144,0,67},{132,0,617},{135,10,829},{6,0,320},{7,0,781},{7,0,1921},{9,0,55}
+,{10,0,186},{10,0,273},{10,0,664},{10,0,801},{11,0,996},{11,0,997},{13,0,157},{
+142,0,170},{136,0,271},{5,10,486},{135,10,1349},{18,11,91},{147,11,70},{10,0,445
+},{7,10,1635},{8,10,17},{138,10,295},{136,11,404},{7,0,103},{7,0,863},{11,0,184}
+,{145,0,62},{138,10,558},{137,0,659},{6,11,312},{6,11,1715},{10,11,584},{11,11,
+546},{11,11,692},{12,11,259},{12,11,295},{13,11,46},{141,11,154},{134,0,676},{
+132,11,588},{4,11,231},{5,11,61},{6,11,104},{7,11,729},{7,11,964},{7,11,1658},{
+140,11,414},{6,11,263},{138,11,757},{11,0,337},{142,0,303},{135,11,1363},{132,11
+,320},{140,0,506},{134,10,447},{5,0,77},{7,0,1455},{10,0,843},{147,0,73},{7,10,
+577},{7,10,1432},{9,10,475},{9,10,505},{9,10,526},{9,10,609},{9,10,689},{9,10,
+726},{9,10,735},{9,10,738},{10,10,556},{10,10,674},{10,10,684},{11,10,89},{11,10
+,202},{11,10,272},{11,10,380},{11,10,415},{11,10,505},{11,10,537},{11,10,550},{
+11,10,562},{11,10,640},{11,10,667},{11,10,688},{11,10,847},{11,10,927},{11,10,
+930},{11,10,940},{12,10,144},{12,10,325},{12,10,329},{12,10,389},{12,10,403},{12
+,10,451},{12,10,515},{12,10,604},{12,10,616},{12,10,626},{13,10,66},{13,10,131},
+{13,10,167},{13,10,236},{13,10,368},{13,10,411},{13,10,434},{13,10,453},{13,10,
+461},{13,10,474},{14,10,59},{14,10,60},{14,10,139},{14,10,152},{14,10,276},{14,
+10,353},{14,10,402},{15,10,28},{15,10,81},{15,10,123},{15,10,152},{18,10,136},{
+148,10,88},{132,0,458},{135,0,1420},{6,0,109},{10,0,382},{4,11,405},{4,10,609},{
+7,10,756},{7,11,817},{9,10,544},{11,10,413},{14,11,58},{14,10,307},{16,10,25},{
+17,11,37},{146,11,124},{6,0,330},{7,0,1084},{11,0,142},{133,11,974},{4,10,930},{
+133,10,947},{5,10,939},{142,11,394},{16,0,91},{145,0,87},{5,11,235},{5,10,962},{
+7,11,1239},{11,11,131},{140,11,370},{11,0,492},{5,10,651},{8,10,170},{9,10,61},{
+9,10,63},{10,10,23},{10,10,37},{10,10,834},{11,10,4},{11,10,281},{11,10,503},{11
+,10,677},{12,10,96},{12,10,130},{12,10,244},{14,10,5},{14,10,40},{14,10,162},{14
+,10,202},{146,10,133},{4,10,406},{5,10,579},{12,10,492},{150,10,15},{9,11,137},{
+138,11,221},{134,0,1239},{11,0,211},{140,0,145},{7,11,390},{138,11,140},{135,11,
+1418},{135,11,1144},{134,0,1049},{7,0,321},{6,10,17},{7,10,1001},{7,10,1982},{9,
+10,886},{10,10,489},{10,10,800},{11,10,782},{12,10,320},{13,10,467},{14,10,145},
+{14,10,387},{143,10,119},{145,10,17},{5,11,407},{11,11,489},{19,11,37},{20,11,73
+},{150,11,38},{133,10,458},{135,0,1985},{7,10,1983},{8,10,0},{8,10,171},{9,10,
+120},{9,10,732},{10,10,473},{11,10,656},{11,10,998},{18,10,0},{18,10,2},{147,10,
+21},{5,11,325},{7,11,1483},{8,11,5},{8,11,227},{9,11,105},{10,11,585},{140,11,
+614},{136,0,122},{132,0,234},{135,11,1196},{6,0,976},{6,0,1098},{134,0,1441},{7,
+0,253},{136,0,549},{6,11,621},{13,11,504},{144,11,19},{132,10,519},{5,0,430},{5,
+0,932},{6,0,131},{7,0,417},{9,0,522},{11,0,314},{141,0,390},{14,0,149},{14,0,399
+},{143,0,57},{5,10,907},{6,10,31},{6,11,218},{7,10,491},{7,10,530},{8,10,592},{
+11,10,53},{11,10,779},{12,10,167},{12,10,411},{14,10,14},{14,10,136},{15,10,72},
+{16,10,17},{144,10,72},{140,11,330},{7,11,454},{7,11,782},{136,11,768},{132,0,
+507},{10,11,676},{140,11,462},{6,0,630},{9,0,811},{4,10,208},{5,10,106},{6,10,
+531},{8,10,408},{9,10,188},{138,10,572},{4,0,343},{5,0,511},{134,10,1693},{134,
+11,164},{132,0,448},{7,0,455},{138,0,591},{135,0,1381},{12,10,441},{150,11,50},{
+9,10,449},{10,10,192},{138,10,740},{6,0,575},{132,10,241},{134,0,1175},{134,0,
+653},{134,0,1761},{134,0,1198},{132,10,259},{6,11,343},{7,11,195},{9,11,226},{10
+,11,197},{10,11,575},{11,11,502},{139,11,899},{7,0,1127},{7,0,1572},{10,0,297},{
+10,0,422},{11,0,764},{11,0,810},{12,0,264},{13,0,102},{13,0,300},{13,0,484},{14,
+0,147},{14,0,229},{17,0,71},{18,0,118},{147,0,120},{135,11,666},{132,0,678},{4,
+10,173},{5,10,312},{5,10,512},{135,10,1285},{7,10,1603},{7,10,1691},{9,10,464},{
+11,10,195},{12,10,279},{12,10,448},{14,10,11},{147,10,102},{16,0,99},{146,0,164}
+,{7,11,1125},{9,11,143},{11,11,61},{14,11,405},{150,11,21},{137,11,260},{4,10,
+452},{5,10,583},{5,10,817},{6,10,433},{7,10,593},{7,10,720},{7,10,1378},{8,10,
+161},{9,10,284},{10,10,313},{139,10,886},{132,10,547},{136,10,722},{14,0,35},{
+142,0,191},{141,0,45},{138,0,121},{132,0,125},{134,0,1622},{133,11,959},{8,10,
+420},{139,10,193},{132,0,721},{135,10,409},{136,0,145},{7,0,792},{8,0,147},{10,0
+,821},{11,0,970},{11,0,1021},{136,11,173},{134,11,266},{132,0,715},{7,0,1999},{
+138,10,308},{133,0,531},{5,0,168},{5,0,930},{8,0,74},{9,0,623},{12,0,500},{140,0
+,579},{144,0,65},{138,11,246},{6,0,220},{7,0,1101},{13,0,105},{142,11,314},{5,10
+,1002},{136,10,745},{134,0,960},{20,0,0},{148,11,0},{4,0,1005},{4,10,239},{6,10,
+477},{7,10,1607},{11,10,68},{139,10,617},{6,0,19},{7,0,1413},{139,0,428},{149,10
+,13},{7,0,96},{8,0,401},{8,0,703},{9,0,896},{136,11,300},{134,0,1595},{145,0,116
+},{136,0,1021},{7,0,1961},{7,0,1965},{7,0,2030},{8,0,150},{8,0,702},{8,0,737},{8
+,0,750},{140,0,366},{11,11,75},{142,11,267},{132,10,367},{8,0,800},{9,0,148},{9,
+0,872},{9,0,890},{11,0,309},{11,0,1001},{13,0,267},{13,0,323},{5,11,427},{5,11,
+734},{7,11,478},{136,11,52},{7,11,239},{11,11,217},{142,11,165},{132,11,323},{
+140,11,419},{13,0,299},{142,0,75},{6,11,87},{6,11,1734},{7,11,20},{7,11,1056},{8
+,11,732},{9,11,406},{9,11,911},{138,11,694},{134,0,1383},{132,10,694},{133,11,
+613},{137,0,779},{4,0,598},{140,10,687},{6,0,970},{135,0,424},{133,0,547},{7,11,
+32},{7,11,984},{8,11,85},{8,11,709},{9,11,579},{9,11,847},{9,11,856},{10,11,799}
+,{11,11,258},{11,11,1007},{12,11,331},{12,11,615},{13,11,188},{13,11,435},{14,11
+,8},{15,11,165},{16,11,27},{148,11,40},{6,0,1222},{134,0,1385},{132,0,876},{138,
+11,151},{135,10,213},{4,11,167},{135,11,82},{133,0,133},{6,11,24},{7,11,74},{7,
+11,678},{137,11,258},{5,11,62},{6,11,534},{7,11,684},{7,11,1043},{7,11,1072},{8,
+11,280},{8,11,541},{8,11,686},{10,11,519},{11,11,252},{140,11,282},{136,0,187},{
+8,0,8},{10,0,0},{10,0,818},{139,0,988},{132,11,359},{11,0,429},{15,0,51},{135,10
+,1672},{136,0,685},{5,11,211},{7,11,88},{136,11,627},{134,0,472},{136,0,132},{6,
+11,145},{141,11,336},{4,10,751},{11,10,390},{140,10,32},{6,0,938},{6,0,1060},{4,
+11,263},{4,10,409},{133,10,78},{137,0,874},{8,0,774},{10,0,670},{12,0,51},{4,11,
+916},{6,10,473},{7,10,1602},{10,10,698},{12,10,212},{13,10,307},{145,10,105},{
+146,0,92},{143,10,156},{132,0,830},{137,0,701},{4,11,599},{6,11,1634},{7,11,5},{
+7,11,55},{7,11,67},{7,11,97},{7,11,691},{7,11,979},{7,11,1697},{8,11,207},{8,11,
+214},{8,11,231},{8,11,294},{8,11,336},{8,11,428},{8,11,451},{8,11,460},{8,11,471
+},{8,11,622},{8,11,626},{8,11,679},{8,11,759},{8,11,829},{9,11,11},{9,11,246},{9
+,11,484},{9,11,573},{9,11,706},{9,11,762},{9,11,798},{9,11,855},{9,11,870},{9,11
+,912},{10,11,303},{10,11,335},{10,11,424},{10,11,461},{10,11,543},{10,11,759},{
+10,11,814},{11,11,59},{11,11,199},{11,11,235},{11,11,475},{11,11,590},{11,11,929
+},{11,11,963},{12,11,114},{12,11,182},{12,11,226},{12,11,332},{12,11,439},{12,11
+,575},{12,11,598},{13,11,8},{13,11,125},{13,11,194},{13,11,287},{14,11,197},{14,
+11,383},{15,11,53},{17,11,63},{19,11,46},{19,11,98},{19,11,106},{148,11,85},{4,0
+,127},{5,0,350},{6,0,356},{8,0,426},{9,0,572},{10,0,247},{139,0,312},{134,0,1215
+},{6,0,59},{9,0,603},{13,0,397},{7,11,1853},{138,11,437},{134,0,1762},{147,11,
+126},{135,10,883},{13,0,293},{142,0,56},{133,10,617},{139,10,50},{5,11,187},{7,
+10,1518},{139,10,694},{135,0,441},{6,0,111},{7,0,4},{8,0,163},{8,0,776},{138,0,
+566},{132,0,806},{4,11,215},{9,11,38},{10,11,3},{11,11,23},{11,11,127},{139,11,
+796},{14,0,233},{4,10,546},{135,10,2042},{135,0,1994},{134,0,1739},{135,11,1530}
+,{136,0,393},{5,0,297},{7,0,1038},{14,0,359},{19,0,52},{148,0,47},{135,0,309},{4
+,10,313},{133,10,577},{8,10,184},{141,10,433},{135,10,935},{12,10,186},{12,10,
+292},{14,10,100},{146,10,70},{136,0,363},{14,0,175},{11,10,402},{12,10,109},{12,
+10,431},{13,10,179},{13,10,206},{14,10,217},{16,10,3},{148,10,53},{5,10,886},{6,
+10,46},{6,10,1790},{7,10,14},{7,10,732},{7,10,1654},{8,10,95},{8,10,327},{8,10,
+616},{9,10,892},{10,10,598},{10,10,769},{11,10,134},{11,10,747},{12,10,378},{142
+,10,97},{136,0,666},{135,0,1675},{6,0,655},{134,0,1600},{135,0,808},{133,10,1021
+},{4,11,28},{5,11,440},{7,11,248},{11,11,833},{140,11,344},{134,11,1654},{132,0,
+280},{140,0,54},{4,0,421},{133,0,548},{132,10,153},{6,11,339},{135,11,923},{133,
+11,853},{133,10,798},{132,10,587},{6,11,249},{7,11,1234},{139,11,573},{6,10,598}
+,{7,10,42},{8,10,695},{10,10,212},{11,10,158},{14,10,196},{145,10,85},{7,0,249},
+{5,10,957},{133,10,1008},{4,10,129},{135,10,465},{6,0,254},{7,0,842},{7,0,1659},
+{9,0,109},{10,0,103},{7,10,908},{7,10,1201},{9,10,755},{11,10,906},{12,10,527},{
+146,10,7},{5,0,262},{136,10,450},{144,0,1},{10,11,201},{142,11,319},{7,11,49},{7
+,11,392},{8,11,20},{8,11,172},{8,11,690},{9,11,383},{9,11,845},{10,11,48},{11,11
+,293},{11,11,832},{11,11,920},{141,11,221},{5,11,858},{133,11,992},{134,0,805},{
+139,10,1003},{6,0,1630},{134,11,307},{7,11,1512},{135,11,1794},{6,11,268},{137,
+11,62},{135,10,1868},{133,0,671},{4,0,989},{8,0,972},{136,0,998},{132,11,423},{
+132,0,889},{135,0,1382},{135,0,1910},{7,10,965},{7,10,1460},{135,10,1604},{4,0,
+627},{5,0,775},{138,11,106},{134,11,348},{7,0,202},{11,0,362},{11,0,948},{140,0,
+388},{138,11,771},{6,11,613},{136,11,223},{6,0,560},{7,0,451},{8,0,389},{12,0,
+490},{13,0,16},{13,0,215},{13,0,351},{18,0,132},{147,0,125},{135,0,841},{136,0,
+566},{136,0,938},{132,11,670},{5,0,912},{6,0,1695},{140,11,55},{9,11,40},{139,11
+,136},{7,0,1361},{7,10,982},{10,10,32},{143,10,56},{11,11,259},{140,11,270},{5,0
+,236},{6,0,572},{8,0,492},{11,0,618},{144,0,56},{8,11,572},{9,11,310},{9,11,682}
+,{137,11,698},{134,0,1854},{5,0,190},{136,0,318},{133,10,435},{135,0,1376},{4,11
+,296},{6,11,352},{7,11,401},{7,11,1410},{7,11,1594},{7,11,1674},{8,11,63},{8,11,
+660},{137,11,74},{7,0,349},{5,10,85},{6,10,419},{7,10,305},{7,10,361},{7,10,1337
+},{8,10,71},{140,10,519},{4,11,139},{4,11,388},{140,11,188},{6,0,1972},{6,0,2013
+},{8,0,951},{10,0,947},{10,0,974},{10,0,1018},{142,0,476},{140,10,688},{135,10,
+740},{5,10,691},{7,10,345},{9,10,94},{140,10,169},{9,0,344},{5,10,183},{6,10,582
+},{10,10,679},{140,10,435},{135,10,511},{132,0,850},{8,11,441},{10,11,314},{143,
+11,3},{7,10,1993},{136,10,684},{4,11,747},{6,11,290},{6,10,583},{7,11,649},{7,11
+,1479},{135,11,1583},{133,11,232},{133,10,704},{134,0,910},{4,10,179},{5,10,198}
+,{133,10,697},{7,10,347},{7,10,971},{8,10,181},{138,10,711},{136,11,525},{14,0,
+19},{14,0,28},{144,0,29},{7,0,85},{7,0,247},{8,0,585},{138,0,163},{4,0,487},{7,
+11,472},{7,11,1801},{10,11,748},{141,11,458},{4,10,243},{5,10,203},{7,10,19},{7,
+10,71},{7,10,113},{10,10,405},{11,10,357},{142,10,240},{7,10,1450},{139,10,99},{
+132,11,425},{138,0,145},{147,0,83},{6,10,492},{137,11,247},{4,0,1013},{134,0,
+2033},{5,10,134},{6,10,408},{6,10,495},{135,10,1593},{135,0,1922},{134,11,1768},
+{4,0,124},{10,0,457},{11,0,121},{11,0,169},{11,0,870},{11,0,874},{12,0,214},{14,
+0,187},{143,0,77},{5,0,557},{135,0,1457},{139,0,66},{5,11,943},{6,11,1779},{142,
+10,4},{4,10,248},{4,10,665},{7,10,137},{137,10,349},{7,0,1193},{5,11,245},{6,11,
+576},{7,11,582},{136,11,225},{144,0,82},{7,10,1270},{139,10,612},{5,0,454},{10,0
+,352},{138,11,352},{18,0,57},{5,10,371},{135,10,563},{135,0,1333},{6,0,107},{7,0
+,638},{7,0,1632},{9,0,396},{134,11,610},{5,0,370},{134,0,1756},{4,10,374},{7,10,
+547},{7,10,1700},{7,10,1833},{139,10,858},{133,0,204},{6,0,1305},{9,10,311},{141
+,10,42},{5,0,970},{134,0,1706},{6,10,1647},{7,10,1552},{7,10,2010},{9,10,494},{
+137,10,509},{13,11,455},{15,11,99},{15,11,129},{144,11,68},{135,0,3},{4,0,35},{5
+,0,121},{5,0,483},{5,0,685},{6,0,489},{6,0,782},{6,0,1032},{7,0,1204},{136,0,394
+},{4,0,921},{133,0,1007},{8,11,360},{138,11,63},{135,0,1696},{134,0,1519},{132,
+11,443},{135,11,944},{6,10,123},{7,10,214},{9,10,728},{10,10,157},{11,10,346},{
+11,10,662},{143,10,106},{137,0,981},{135,10,1435},{134,0,1072},{132,0,712},{134,
+0,1629},{134,0,728},{4,11,298},{137,11,483},{6,0,1177},{6,0,1271},{5,11,164},{7,
+11,121},{142,11,189},{7,0,1608},{4,10,707},{5,10,588},{6,10,393},{13,10,106},{18
+,10,49},{147,10,41},{23,0,16},{151,11,16},{6,10,211},{7,10,1690},{11,10,486},{
+140,10,369},{133,0,485},{19,11,15},{149,11,27},{4,11,172},{9,11,611},{10,11,436}
+,{12,11,673},{141,11,255},{5,11,844},{10,11,484},{11,11,754},{12,11,457},{14,11,
+171},{14,11,389},{146,11,153},{4,0,285},{5,0,27},{5,0,317},{6,0,301},{7,0,7},{8,
+0,153},{10,0,766},{11,0,468},{12,0,467},{141,0,143},{134,0,1462},{9,11,263},{10,
+11,147},{138,11,492},{133,11,537},{6,0,1945},{6,0,1986},{6,0,1991},{134,0,2038},
+{134,10,219},{137,11,842},{14,0,52},{17,0,50},{5,10,582},{6,10,1646},{7,10,99},{
+7,10,1962},{7,10,1986},{8,10,515},{8,10,773},{9,10,23},{9,10,491},{12,10,620},{
+142,10,93},{138,11,97},{20,0,21},{20,0,44},{133,10,851},{136,0,819},{139,0,917},
+{5,11,230},{5,11,392},{6,11,420},{8,10,762},{8,10,812},{9,11,568},{9,10,910},{
+140,11,612},{135,0,784},{15,0,135},{143,11,135},{10,0,454},{140,0,324},{4,11,0},
+{5,11,41},{7,11,1459},{7,11,1469},{7,11,1618},{7,11,1859},{9,11,549},{139,11,905
+},{4,10,98},{7,10,1365},{9,10,422},{9,10,670},{10,10,775},{11,10,210},{13,10,26}
+,{13,10,457},{141,10,476},{6,0,1719},{6,0,1735},{7,0,2016},{7,0,2020},{8,0,837},
+{137,0,852},{133,11,696},{135,0,852},{132,0,952},{134,10,1730},{132,11,771},{138
+,0,568},{137,0,448},{139,0,146},{8,0,67},{138,0,419},{133,11,921},{137,10,147},{
+134,0,1826},{10,0,657},{14,0,297},{142,0,361},{6,0,666},{6,0,767},{134,0,1542},{
+139,0,729},{6,11,180},{7,11,1137},{8,11,751},{139,11,805},{4,11,183},{7,11,271},
+{11,11,824},{11,11,952},{13,11,278},{13,11,339},{13,11,482},{14,11,424},{148,11,
+99},{4,0,669},{5,11,477},{5,11,596},{6,11,505},{7,11,1221},{11,11,907},{12,11,
+209},{141,11,214},{135,11,1215},{5,0,402},{6,10,30},{11,10,56},{139,10,305},{7,
+11,564},{142,11,168},{139,0,152},{7,0,912},{135,10,1614},{4,10,150},{5,10,303},{
+134,10,327},{7,0,320},{8,0,51},{9,0,868},{10,0,833},{12,0,481},{12,0,570},{148,0
+,106},{132,0,445},{7,11,274},{11,11,263},{11,11,479},{11,11,507},{140,11,277},{
+10,0,555},{11,0,308},{19,0,95},{6,11,1645},{8,10,192},{10,10,78},{141,10,359},{
+135,10,786},{6,11,92},{6,11,188},{7,11,1269},{7,11,1524},{7,11,1876},{10,11,228}
+,{139,11,1020},{4,11,459},{133,11,966},{11,0,386},{6,10,1638},{7,10,79},{7,10,
+496},{9,10,138},{10,10,336},{12,10,412},{12,10,440},{142,10,305},{133,0,239},{7,
+0,83},{7,0,1990},{8,0,130},{139,0,720},{138,11,709},{4,0,143},{5,0,550},{133,0,
+752},{5,0,123},{6,0,530},{7,0,348},{135,0,1419},{135,0,2024},{6,11,18},{7,11,179
+},{7,11,721},{7,11,932},{8,11,548},{8,11,757},{9,11,54},{9,11,65},{9,11,532},{9,
+11,844},{10,11,113},{10,11,117},{10,11,236},{10,11,315},{10,11,430},{10,11,798},
+{11,11,153},{11,11,351},{11,11,375},{12,11,78},{12,11,151},{12,11,392},{14,11,
+248},{143,11,23},{7,10,204},{7,10,415},{8,10,42},{10,10,85},{139,10,564},{134,0,
+958},{133,11,965},{132,0,210},{135,11,1429},{138,11,480},{134,11,182},{139,11,
+345},{10,11,65},{10,11,488},{138,11,497},{4,10,3},{5,10,247},{5,10,644},{7,10,
+744},{7,10,1207},{7,10,1225},{7,10,1909},{146,10,147},{132,0,430},{5,10,285},{9,
+10,67},{13,10,473},{143,10,82},{144,11,16},{7,11,1162},{9,11,588},{10,11,260},{
+151,10,8},{133,0,213},{138,0,7},{135,0,801},{134,11,1786},{135,11,308},{6,0,936}
+,{134,0,1289},{133,0,108},{132,0,885},{133,0,219},{139,0,587},{4,0,193},{5,0,916
+},{6,0,1041},{7,0,364},{10,0,398},{10,0,726},{11,0,317},{11,0,626},{12,0,142},{
+12,0,288},{12,0,678},{13,0,313},{15,0,113},{146,0,114},{135,0,1165},{6,0,241},{9
+,0,342},{10,0,729},{11,0,284},{11,0,445},{11,0,651},{11,0,863},{13,0,398},{146,0
+,99},{7,0,907},{136,0,832},{9,0,303},{4,10,29},{6,10,532},{7,10,1628},{7,10,1648
+},{9,10,350},{10,10,433},{11,10,97},{11,10,557},{11,10,745},{12,10,289},{12,10,
+335},{12,10,348},{12,10,606},{13,10,116},{13,10,233},{13,10,466},{14,10,181},{14
+,10,209},{14,10,232},{14,10,236},{14,10,300},{16,10,41},{148,10,97},{7,11,423},{
+7,10,1692},{136,11,588},{6,0,931},{134,0,1454},{5,10,501},{7,10,1704},{9,10,553}
+,{11,10,520},{12,10,557},{141,10,249},{136,11,287},{4,0,562},{9,0,254},{139,0,
+879},{132,0,786},{14,11,32},{18,11,85},{20,11,2},{152,11,16},{135,0,1294},{7,11,
+723},{135,11,1135},{6,0,216},{7,0,901},{7,0,1343},{8,0,493},{134,11,403},{7,11,
+719},{8,11,809},{136,11,834},{5,11,210},{6,11,213},{7,11,60},{10,11,364},{139,11
+,135},{7,0,341},{11,0,219},{5,11,607},{8,11,326},{136,11,490},{4,11,701},{5,11,
+472},{5,11,639},{7,11,1249},{9,11,758},{139,11,896},{135,11,380},{135,11,1947},{
+139,0,130},{135,0,1734},{10,0,115},{11,0,420},{12,0,154},{13,0,404},{14,0,346},{
+143,0,54},{134,10,129},{4,11,386},{7,11,41},{8,11,405},{9,11,497},{11,11,110},{
+11,11,360},{15,11,37},{144,11,84},{141,11,282},{5,11,46},{7,11,1452},{7,11,1480}
+,{8,11,634},{140,11,472},{4,11,524},{136,11,810},{10,11,238},{141,11,33},{133,0,
+604},{5,0,1011},{136,0,701},{8,0,856},{8,0,858},{8,0,879},{12,0,702},{142,0,447}
+,{4,0,54},{5,0,666},{7,0,1039},{7,0,1130},{9,0,195},{138,0,302},{4,10,25},{5,10,
+60},{6,10,504},{7,10,614},{7,10,1155},{140,10,0},{7,10,1248},{11,10,621},{139,10
+,702},{133,11,997},{137,10,321},{134,0,1669},{134,0,1791},{4,10,379},{135,10,
+1397},{138,11,372},{5,11,782},{5,11,829},{134,11,1738},{135,0,1228},{4,10,118},{
+6,10,274},{6,10,361},{7,10,75},{141,10,441},{132,0,623},{9,11,279},{10,11,407},{
+14,11,84},{150,11,18},{137,10,841},{135,0,798},{140,10,693},{5,10,314},{6,10,221
+},{7,10,419},{10,10,650},{11,10,396},{12,10,156},{13,10,369},{14,10,333},{145,10
+,47},{135,11,1372},{7,0,122},{9,0,259},{10,0,84},{11,0,470},{12,0,541},{141,0,
+379},{134,0,837},{8,0,1013},{4,11,78},{5,11,96},{5,11,182},{7,11,1724},{7,11,
+1825},{10,11,394},{10,11,471},{11,11,532},{14,11,340},{145,11,88},{134,0,577},{
+135,11,1964},{132,10,913},{134,0,460},{8,0,891},{10,0,901},{10,0,919},{10,0,932}
+,{12,0,715},{12,0,728},{12,0,777},{14,0,457},{144,0,103},{5,0,82},{5,0,131},{7,0
+,1755},{8,0,31},{9,0,168},{9,0,764},{139,0,869},{136,10,475},{6,0,605},{5,10,
+1016},{9,11,601},{9,11,619},{10,11,505},{10,11,732},{11,11,355},{140,11,139},{7,
+10,602},{8,10,179},{10,10,781},{140,10,126},{134,0,1246},{6,10,329},{138,10,111}
+,{6,11,215},{7,11,1028},{7,11,1473},{7,11,1721},{9,11,424},{138,11,779},{5,0,278
+},{137,0,68},{6,0,932},{6,0,1084},{144,0,86},{4,0,163},{5,0,201},{5,0,307},{5,0,
+310},{6,0,335},{7,0,284},{7,0,1660},{136,0,165},{136,0,781},{134,0,707},{6,0,33}
+,{135,0,1244},{5,10,821},{6,11,67},{6,10,1687},{7,11,258},{7,11,1630},{9,11,354}
+,{9,11,675},{10,11,830},{14,11,80},{145,11,80},{6,11,141},{7,11,225},{9,11,59},{
+9,11,607},{10,11,312},{11,11,687},{12,11,555},{13,11,373},{13,11,494},{148,11,58
+},{134,0,1113},{9,0,388},{5,10,71},{7,10,1407},{9,10,704},{10,10,261},{10,10,619
+},{11,10,547},{11,10,619},{143,10,157},{7,0,1953},{136,0,720},{138,0,203},{7,10,
+2008},{9,10,337},{138,10,517},{6,0,326},{7,0,677},{137,0,425},{139,11,81},{7,0,
+1316},{7,0,1412},{7,0,1839},{9,0,589},{11,0,241},{11,0,676},{11,0,811},{11,0,891
+},{12,0,140},{12,0,346},{12,0,479},{13,0,140},{13,0,381},{14,0,188},{18,0,30},{
+148,0,108},{5,0,416},{6,10,86},{6,10,603},{7,10,292},{7,10,561},{8,10,257},{8,10
+,382},{9,10,721},{9,10,778},{11,10,581},{140,10,466},{4,10,486},{133,10,491},{
+134,0,1300},{132,10,72},{7,0,847},{6,10,265},{7,11,430},{139,11,46},{5,11,602},{
+6,11,106},{7,11,1786},{7,11,1821},{7,11,2018},{9,11,418},{137,11,763},{5,0,358},
+{7,0,535},{7,0,1184},{10,0,662},{13,0,212},{13,0,304},{13,0,333},{145,0,98},{5,
+11,65},{6,11,416},{7,11,1720},{7,11,1924},{8,11,677},{10,11,109},{11,11,14},{11,
+11,70},{11,11,569},{11,11,735},{15,11,153},{148,11,80},{6,0,1823},{8,0,839},{8,0
+,852},{8,0,903},{10,0,940},{12,0,707},{140,0,775},{135,11,1229},{6,0,1522},{140,
+0,654},{136,11,595},{139,0,163},{141,0,314},{132,0,978},{4,0,601},{6,0,2035},{
+137,10,234},{5,10,815},{6,10,1688},{134,10,1755},{133,0,946},{136,0,434},{6,10,
+197},{136,10,205},{7,0,411},{7,0,590},{8,0,631},{9,0,323},{10,0,355},{11,0,491},
+{12,0,143},{12,0,402},{13,0,73},{14,0,408},{15,0,107},{146,0,71},{7,0,1467},{8,0
+,328},{10,0,544},{11,0,955},{12,0,13},{13,0,320},{145,0,83},{142,0,410},{11,0,
+511},{13,0,394},{14,0,298},{14,0,318},{146,0,103},{6,10,452},{7,10,312},{138,10,
+219},{138,10,589},{4,10,333},{9,10,176},{12,10,353},{141,10,187},{135,11,329},{
+132,11,469},{5,0,835},{134,0,483},{134,11,1743},{5,11,929},{6,11,340},{8,11,376}
+,{136,11,807},{134,10,1685},{132,0,677},{5,11,218},{7,11,1610},{138,11,83},{5,11
+,571},{135,11,1842},{132,11,455},{137,0,70},{135,0,1405},{7,10,135},{8,10,7},{8,
+10,62},{9,10,243},{10,10,658},{10,10,697},{11,10,456},{139,10,756},{9,10,395},{
+138,10,79},{137,0,108},{6,11,161},{7,11,372},{137,11,597},{132,11,349},{132,0,
+777},{132,0,331},{135,10,631},{133,0,747},{6,11,432},{6,11,608},{139,11,322},{
+138,10,835},{5,11,468},{7,11,1809},{10,11,325},{11,11,856},{12,11,345},{143,11,
+104},{133,11,223},{7,10,406},{7,10,459},{8,10,606},{139,10,726},{132,11,566},{
+142,0,68},{4,11,59},{135,11,1394},{6,11,436},{139,11,481},{4,11,48},{5,11,271},{
+135,11,953},{139,11,170},{5,11,610},{136,11,457},{133,11,755},{135,11,1217},{133
+,10,612},{132,11,197},{132,0,505},{4,10,372},{7,10,482},{8,10,158},{9,10,602},{9
+,10,615},{10,10,245},{10,10,678},{10,10,744},{11,10,248},{139,10,806},{133,0,326
+},{5,10,854},{135,10,1991},{4,0,691},{146,0,16},{6,0,628},{9,0,35},{10,0,680},{
+10,0,793},{11,0,364},{13,0,357},{143,0,164},{138,0,654},{6,0,32},{7,0,385},{7,0,
+757},{7,0,1916},{8,0,37},{8,0,94},{8,0,711},{9,0,541},{10,0,162},{10,0,795},{11,
+0,989},{11,0,1010},{12,0,14},{142,0,308},{133,11,217},{6,0,152},{6,0,349},{6,0,
+1682},{7,0,1252},{8,0,112},{9,0,435},{9,0,668},{10,0,290},{10,0,319},{10,0,815},
+{11,0,180},{11,0,837},{12,0,240},{13,0,152},{13,0,219},{142,0,158},{4,0,581},{
+134,0,726},{5,10,195},{135,10,1685},{6,0,126},{7,0,573},{8,0,397},{142,0,44},{
+138,0,89},{7,10,1997},{8,10,730},{139,10,1006},{134,0,1531},{134,0,1167},{5,0,
+926},{12,0,203},{133,10,751},{4,11,165},{7,11,1398},{135,11,1829},{7,0,1232},{
+137,0,531},{135,10,821},{134,0,943},{133,0,670},{4,0,880},{139,0,231},{134,0,
+1617},{135,0,1957},{5,11,9},{7,11,297},{7,11,966},{140,11,306},{6,0,975},{134,0,
+985},{5,10,950},{5,10,994},{134,10,351},{12,11,21},{151,11,7},{5,11,146},{6,11,
+411},{138,11,721},{7,0,242},{135,0,1942},{6,11,177},{135,11,467},{5,0,421},{7,10
+,47},{137,10,684},{5,0,834},{7,0,1202},{8,0,14},{9,0,481},{137,0,880},{138,0,465
+},{6,0,688},{9,0,834},{132,10,350},{132,0,855},{4,0,357},{6,0,172},{7,0,143},{
+137,0,413},{133,11,200},{132,0,590},{7,10,1812},{13,10,259},{13,10,356},{14,10,
+242},{147,10,114},{133,10,967},{11,0,114},{4,10,473},{7,10,623},{8,10,808},{9,10
+,871},{9,10,893},{11,10,431},{12,10,112},{12,10,217},{12,10,243},{12,10,562},{12
+,10,663},{12,10,683},{13,10,141},{13,10,197},{13,10,227},{13,10,406},{13,10,487}
+,{14,10,156},{14,10,203},{14,10,224},{14,10,256},{18,10,58},{150,10,0},{138,10,
+286},{4,10,222},{7,10,286},{136,10,629},{5,0,169},{7,0,333},{136,0,45},{134,11,
+481},{132,0,198},{4,0,24},{5,0,140},{5,0,185},{7,0,1500},{11,0,565},{11,0,838},{
+4,11,84},{7,11,1482},{10,11,76},{138,11,142},{133,0,585},{141,10,306},{133,11,
+1015},{4,11,315},{5,11,507},{135,11,1370},{136,10,146},{6,0,691},{134,0,1503},{4
+,0,334},{133,0,593},{4,10,465},{135,10,1663},{142,11,173},{135,0,913},{12,0,116}
+,{134,11,1722},{134,0,1360},{132,0,802},{8,11,222},{8,11,476},{9,11,238},{11,11,
+516},{11,11,575},{15,11,109},{146,11,100},{6,0,308},{9,0,673},{7,10,138},{7,10,
+517},{139,10,238},{132,0,709},{6,0,1876},{6,0,1895},{9,0,994},{9,0,1006},{12,0,
+829},{12,0,888},{12,0,891},{146,0,185},{148,10,94},{4,0,228},{133,0,897},{7,0,
+1840},{5,10,495},{7,10,834},{9,10,733},{139,10,378},{133,10,559},{6,10,21},{6,10
+,1737},{7,10,1444},{136,10,224},{4,0,608},{133,0,497},{6,11,40},{135,11,1781},{
+134,0,1573},{135,0,2039},{6,0,540},{136,0,136},{4,0,897},{5,0,786},{133,10,519},
+{6,0,1878},{6,0,1884},{9,0,938},{9,0,948},{9,0,955},{9,0,973},{9,0,1012},{12,0,
+895},{12,0,927},{143,0,254},{134,0,1469},{133,0,999},{4,0,299},{135,0,1004},{4,0
+,745},{133,0,578},{136,11,574},{133,0,456},{134,0,1457},{7,0,1679},{132,10,402},
+{7,0,693},{8,0,180},{12,0,163},{8,10,323},{136,10,479},{11,10,580},{142,10,201},
+{5,10,59},{135,10,672},{132,11,354},{146,10,34},{4,0,755},{135,11,1558},{7,0,
+1740},{146,0,48},{4,10,85},{135,10,549},{139,0,338},{133,10,94},{134,0,1091},{
+135,11,469},{12,0,695},{12,0,704},{20,0,113},{5,11,830},{14,11,338},{148,11,81},
+{135,0,1464},{6,10,11},{135,10,187},{135,0,975},{13,0,335},{132,10,522},{134,0,
+1979},{5,11,496},{135,11,203},{4,10,52},{135,10,661},{7,0,1566},{8,0,269},{9,0,
+212},{9,0,718},{14,0,15},{14,0,132},{142,0,227},{4,0,890},{5,0,805},{5,0,819},{5
+,0,961},{6,0,396},{6,0,1631},{6,0,1678},{7,0,1967},{7,0,2041},{9,0,630},{11,0,8}
+,{11,0,1019},{12,0,176},{13,0,225},{14,0,292},{21,0,24},{4,10,383},{133,10,520},
+{134,11,547},{135,11,1748},{5,11,88},{137,11,239},{146,11,128},{7,11,650},{135,
+11,1310},{4,10,281},{5,10,38},{7,10,194},{7,10,668},{7,10,1893},{137,10,397},{
+135,0,1815},{9,10,635},{139,10,559},{7,0,1505},{10,0,190},{10,0,634},{11,0,792},
+{12,0,358},{140,0,447},{5,0,0},{6,0,536},{7,0,604},{13,0,445},{145,0,126},{7,11,
+1076},{9,11,80},{11,11,78},{11,11,421},{11,11,534},{140,11,545},{8,0,966},{10,0,
+1023},{14,11,369},{146,11,72},{135,11,1641},{6,0,232},{6,0,412},{7,0,1074},{8,0,
+9},{8,0,157},{8,0,786},{9,0,196},{9,0,352},{9,0,457},{10,0,337},{11,0,232},{11,0
+,877},{12,0,480},{140,0,546},{135,0,958},{4,0,382},{136,0,579},{4,0,212},{135,0,
+1206},{4,11,497},{5,11,657},{135,11,1584},{132,0,681},{8,0,971},{138,0,965},{5,
+10,448},{136,10,535},{14,0,16},{146,0,44},{11,0,584},{11,0,616},{14,0,275},{11,
+11,584},{11,11,616},{142,11,275},{136,11,13},{7,10,610},{135,10,1501},{7,11,642}
+,{8,11,250},{11,11,123},{11,11,137},{13,11,48},{142,11,95},{133,0,655},{17,0,67}
+,{147,0,74},{134,0,751},{134,0,1967},{6,0,231},{136,0,423},{5,0,300},{138,0,1016
+},{4,10,319},{5,10,699},{138,10,673},{6,0,237},{7,0,611},{8,0,100},{9,0,416},{11
+,0,335},{12,0,173},{18,0,101},{6,10,336},{8,10,552},{9,10,285},{10,10,99},{139,
+10,568},{134,0,1370},{7,10,1406},{9,10,218},{141,10,222},{133,10,256},{135,0,
+1208},{14,11,213},{148,11,38},{6,0,1219},{135,11,1642},{13,0,417},{14,0,129},{
+143,0,15},{10,11,545},{140,11,301},{17,10,39},{148,10,36},{133,0,199},{4,11,904}
+,{133,11,794},{12,0,427},{146,0,38},{134,0,949},{8,0,665},{135,10,634},{132,10,
+618},{135,10,259},{132,10,339},{133,11,761},{141,10,169},{132,10,759},{5,0,688},
+{7,0,539},{135,0,712},{7,11,386},{138,11,713},{134,0,1186},{6,11,7},{6,11,35},{7
+,11,147},{7,11,1069},{7,11,1568},{7,11,1575},{7,11,1917},{8,11,43},{8,11,208},{9
+,11,128},{9,11,866},{10,11,20},{11,11,981},{147,11,33},{7,11,893},{8,10,482},{
+141,11,424},{6,0,312},{6,0,1715},{10,0,584},{11,0,546},{11,0,692},{12,0,259},{12
+,0,295},{13,0,46},{141,0,154},{5,10,336},{6,10,341},{6,10,478},{6,10,1763},{136,
+10,386},{137,0,151},{132,0,588},{152,0,4},{6,11,322},{9,11,552},{11,11,274},{13,
+11,209},{13,11,499},{14,11,85},{15,11,126},{145,11,70},{135,10,73},{4,0,231},{5,
+0,61},{6,0,104},{7,0,729},{7,0,964},{7,0,1658},{140,0,414},{6,0,263},{138,0,757}
+,{135,10,1971},{4,0,612},{133,0,561},{132,0,320},{135,10,1344},{8,11,83},{8,11,
+817},{9,11,28},{9,11,29},{9,11,885},{10,11,387},{11,11,633},{11,11,740},{13,11,
+235},{13,11,254},{15,11,143},{143,11,146},{5,10,396},{134,10,501},{140,11,49},{
+132,0,225},{4,10,929},{5,10,799},{8,10,46},{136,10,740},{4,0,405},{7,0,817},{14,
+0,58},{17,0,37},{146,0,124},{133,0,974},{4,11,412},{133,11,581},{4,10,892},{133,
+10,770},{4,0,996},{134,0,2026},{4,0,527},{5,0,235},{7,0,1239},{11,0,131},{140,0,
+370},{9,0,16},{13,0,386},{135,11,421},{7,0,956},{7,0,1157},{7,0,1506},{7,0,1606}
+,{7,0,1615},{7,0,1619},{7,0,1736},{7,0,1775},{8,0,590},{9,0,324},{9,0,736},{9,0,
+774},{9,0,776},{9,0,784},{10,0,567},{10,0,708},{11,0,518},{11,0,613},{11,0,695},
+{11,0,716},{11,0,739},{11,0,770},{11,0,771},{11,0,848},{11,0,857},{11,0,931},{11
+,0,947},{12,0,326},{12,0,387},{12,0,484},{12,0,528},{12,0,552},{12,0,613},{13,0,
+189},{13,0,256},{13,0,340},{13,0,432},{13,0,436},{13,0,440},{13,0,454},{14,0,174
+},{14,0,220},{14,0,284},{14,0,390},{145,0,121},{135,10,158},{9,0,137},{138,0,221
+},{4,11,110},{10,11,415},{10,11,597},{142,11,206},{141,11,496},{135,11,205},{151
+,10,25},{135,11,778},{7,11,1656},{7,10,2001},{9,11,369},{10,11,338},{10,11,490},
+{11,11,154},{11,11,545},{11,11,775},{13,11,77},{141,11,274},{4,11,444},{10,11,
+146},{140,11,9},{7,0,390},{138,0,140},{135,0,1144},{134,0,464},{7,10,1461},{140,
+10,91},{132,10,602},{4,11,283},{135,11,1194},{5,0,407},{11,0,204},{11,0,243},{11
+,0,489},{12,0,293},{19,0,37},{20,0,73},{150,0,38},{7,0,1218},{136,0,303},{5,0,
+325},{8,0,5},{8,0,227},{9,0,105},{10,0,585},{12,0,614},{4,10,13},{5,10,567},{7,
+10,1498},{9,10,124},{11,10,521},{140,10,405},{135,10,1006},{7,0,800},{10,0,12},{
+134,11,1720},{135,0,1783},{132,10,735},{138,10,812},{4,10,170},{135,10,323},{6,0
+,621},{13,0,504},{144,0,89},{5,10,304},{135,10,1403},{137,11,216},{6,0,920},{6,0
+,1104},{9,11,183},{139,11,286},{4,0,376},{133,10,742},{134,0,218},{8,0,641},{11,
+0,388},{140,0,580},{7,0,454},{7,0,782},{8,0,768},{140,0,686},{137,11,33},{133,10
+,111},{144,0,0},{10,0,676},{140,0,462},{6,0,164},{136,11,735},{133,10,444},{150,
+0,50},{7,11,1862},{12,11,491},{12,11,520},{13,11,383},{14,11,244},{146,11,12},{5
+,11,132},{9,11,486},{9,11,715},{10,11,458},{11,11,373},{11,11,668},{11,11,795},{
+11,11,897},{12,11,272},{12,11,424},{12,11,539},{12,11,558},{14,11,245},{14,11,
+263},{14,11,264},{14,11,393},{142,11,403},{8,10,123},{15,10,6},{144,10,7},{6,0,
+285},{8,0,654},{11,0,749},{12,0,190},{12,0,327},{13,0,120},{13,0,121},{13,0,327}
+,{15,0,47},{146,0,40},{5,11,8},{6,11,89},{6,11,400},{7,11,1569},{7,11,1623},{7,
+11,1850},{8,11,218},{8,11,422},{9,11,570},{138,11,626},{6,11,387},{7,11,882},{
+141,11,111},{6,0,343},{7,0,195},{9,0,226},{10,0,197},{10,0,575},{11,0,502},{11,0
+,899},{6,11,224},{7,11,877},{137,11,647},{5,10,937},{135,10,100},{135,11,790},{
+150,0,29},{147,0,8},{134,0,1812},{149,0,8},{135,11,394},{7,0,1125},{9,0,143},{11
+,0,61},{14,0,405},{150,0,21},{10,11,755},{147,11,29},{9,11,378},{141,11,162},{
+135,10,922},{5,10,619},{133,10,698},{134,0,1327},{6,0,1598},{137,0,575},{9,11,
+569},{12,11,12},{12,11,81},{12,11,319},{13,11,69},{14,11,259},{16,11,87},{17,11,
+1},{17,11,21},{17,11,24},{18,11,15},{18,11,56},{18,11,59},{18,11,127},{18,11,154
+},{19,11,19},{148,11,31},{6,0,895},{135,11,1231},{5,0,959},{7,11,124},{136,11,38
+},{5,11,261},{7,11,78},{7,11,199},{8,11,815},{9,11,126},{138,11,342},{5,10,917},
+{134,10,1659},{7,0,1759},{5,11,595},{135,11,1863},{136,0,173},{134,0,266},{142,0
+,261},{132,11,628},{5,10,251},{5,10,956},{8,10,268},{9,10,214},{146,10,142},{7,
+11,266},{136,11,804},{135,11,208},{6,11,79},{7,11,1021},{135,11,1519},{11,11,704
+},{141,11,396},{5,10,346},{5,10,711},{136,10,390},{136,11,741},{134,11,376},{134
+,0,1427},{6,0,1033},{6,0,1217},{136,0,300},{133,10,624},{6,11,100},{7,11,244},{7
+,11,632},{7,11,1609},{8,11,178},{8,11,638},{141,11,58},{6,0,584},{5,10,783},{7,
+10,1998},{135,10,2047},{5,0,427},{5,0,734},{7,0,478},{136,0,52},{7,0,239},{11,0,
+217},{142,0,165},{134,0,1129},{6,0,168},{6,0,1734},{7,0,20},{7,0,1056},{8,0,732}
+,{9,0,406},{9,0,911},{138,0,694},{132,10,594},{133,11,791},{7,11,686},{8,11,33},
+{8,11,238},{10,11,616},{11,11,467},{11,11,881},{13,11,217},{13,11,253},{142,11,
+268},{137,11,476},{134,0,418},{133,0,613},{132,0,632},{132,11,447},{7,0,32},{7,0
+,984},{8,0,85},{8,0,709},{9,0,579},{9,0,847},{9,0,856},{10,0,799},{11,0,258},{11
+,0,1007},{12,0,331},{12,0,615},{13,0,188},{13,0,435},{14,0,8},{15,0,165},{16,0,
+27},{20,0,40},{144,11,35},{4,11,128},{5,11,415},{6,11,462},{7,11,294},{7,11,578}
+,{10,11,710},{139,11,86},{5,0,694},{136,0,909},{7,0,1109},{11,0,7},{5,10,37},{6,
+10,39},{6,10,451},{7,10,218},{7,10,1166},{7,10,1687},{8,10,662},{144,10,2},{136,
+11,587},{6,11,427},{7,11,1018},{138,11,692},{4,11,195},{6,10,508},{135,11,802},{
+4,0,167},{135,0,82},{5,0,62},{6,0,24},{6,0,534},{7,0,74},{7,0,678},{7,0,684},{7,
+0,1043},{7,0,1072},{8,0,280},{8,0,541},{8,0,686},{9,0,258},{10,0,519},{11,0,252}
+,{140,0,282},{138,0,33},{4,0,359},{133,11,738},{7,0,980},{9,0,328},{13,0,186},{
+13,0,364},{7,10,635},{7,10,796},{8,10,331},{9,10,330},{9,10,865},{10,10,119},{10
+,10,235},{11,10,111},{11,10,129},{11,10,240},{12,10,31},{12,10,66},{12,10,222},{
+12,10,269},{12,10,599},{12,10,684},{12,10,689},{12,10,691},{142,10,345},{137,10,
+527},{6,0,596},{7,0,585},{135,10,702},{134,11,1683},{133,0,211},{6,0,145},{141,0
+,336},{134,0,1130},{7,0,873},{6,10,37},{7,10,1666},{8,10,195},{8,10,316},{9,10,
+178},{9,10,276},{9,10,339},{9,10,536},{10,10,102},{10,10,362},{10,10,785},{11,10
+,55},{11,10,149},{11,10,773},{13,10,416},{13,10,419},{14,10,38},{14,10,41},{142,
+10,210},{8,0,840},{136,0,841},{132,0,263},{5,11,3},{8,11,578},{9,11,118},{10,11,
+705},{12,11,383},{141,11,279},{132,0,916},{133,11,229},{133,10,645},{15,0,155},{
+16,0,79},{8,11,102},{10,11,578},{10,11,672},{12,11,496},{13,11,408},{14,11,121},
+{145,11,106},{4,0,599},{5,0,592},{6,0,1634},{7,0,5},{7,0,55},{7,0,67},{7,0,97},{
+7,0,691},{7,0,979},{7,0,1600},{7,0,1697},{8,0,207},{8,0,214},{8,0,231},{8,0,294}
+,{8,0,336},{8,0,428},{8,0,471},{8,0,622},{8,0,626},{8,0,679},{8,0,759},{8,0,829}
+,{9,0,11},{9,0,246},{9,0,484},{9,0,573},{9,0,706},{9,0,762},{9,0,798},{9,0,855},
+{9,0,870},{9,0,912},{10,0,303},{10,0,335},{10,0,424},{10,0,461},{10,0,543},{10,0
+,759},{10,0,814},{11,0,59},{11,0,199},{11,0,235},{11,0,590},{11,0,631},{11,0,929
+},{11,0,963},{11,0,987},{12,0,114},{12,0,182},{12,0,226},{12,0,332},{12,0,439},{
+12,0,575},{12,0,598},{12,0,675},{13,0,8},{13,0,125},{13,0,194},{13,0,287},{14,0,
+197},{14,0,383},{15,0,53},{17,0,63},{19,0,46},{19,0,98},{19,0,106},{148,0,85},{7
+,0,1356},{132,10,290},{6,10,70},{7,10,1292},{10,10,762},{139,10,288},{150,11,55}
+,{4,0,593},{8,11,115},{8,11,350},{9,11,489},{10,11,128},{11,11,306},{12,11,373},
+{14,11,30},{17,11,79},{147,11,80},{135,11,1235},{134,0,1392},{4,11,230},{133,11,
+702},{147,0,126},{7,10,131},{7,10,422},{8,10,210},{140,10,573},{134,0,1179},{139
+,11,435},{139,10,797},{134,11,1728},{4,0,162},{18,11,26},{19,11,42},{20,11,43},{
+21,11,0},{23,11,27},{152,11,14},{132,10,936},{6,0,765},{5,10,453},{134,10,441},{
+133,0,187},{135,0,1286},{6,0,635},{6,0,904},{6,0,1210},{134,0,1489},{4,0,215},{8
+,0,890},{9,0,38},{10,0,923},{11,0,23},{11,0,127},{139,0,796},{6,0,1165},{134,0,
+1306},{7,0,716},{13,0,97},{141,0,251},{132,10,653},{136,0,657},{146,10,80},{5,11
+,622},{7,11,1032},{11,11,26},{11,11,213},{11,11,707},{12,11,380},{13,11,226},{
+141,11,355},{6,0,299},{5,11,70},{6,11,334},{9,11,171},{11,11,637},{12,11,202},{
+14,11,222},{145,11,42},{142,0,134},{4,11,23},{5,11,313},{5,11,1014},{6,11,50},{6
+,11,51},{7,11,142},{7,11,384},{9,11,783},{139,11,741},{4,11,141},{7,11,559},{8,
+11,640},{9,11,460},{12,11,183},{141,11,488},{136,11,614},{7,10,1368},{8,10,232},
+{8,10,361},{10,10,682},{138,10,742},{137,10,534},{6,0,1082},{140,0,658},{137,10,
+27},{135,0,2002},{142,10,12},{4,0,28},{5,0,440},{7,0,248},{11,0,833},{140,0,344}
+,{7,10,736},{139,10,264},{134,10,1657},{134,0,1654},{138,0,531},{5,11,222},{9,11
+,140},{138,11,534},{6,0,634},{6,0,798},{134,0,840},{138,11,503},{135,10,127},{
+133,0,853},{5,11,154},{7,11,1491},{10,11,379},{138,11,485},{6,0,249},{7,0,1234},
+{139,0,573},{133,11,716},{7,11,1570},{140,11,542},{136,10,364},{138,0,527},{4,11
+,91},{5,11,388},{5,11,845},{6,11,206},{6,11,252},{6,11,365},{7,11,136},{7,11,531
+},{8,11,264},{136,11,621},{134,0,1419},{135,11,1441},{7,0,49},{7,0,392},{8,0,20}
+,{8,0,172},{8,0,690},{9,0,383},{9,0,845},{10,0,48},{11,0,293},{11,0,832},{11,0,
+920},{11,0,984},{141,0,221},{5,0,858},{133,0,992},{5,0,728},{137,10,792},{5,10,
+909},{9,10,849},{138,10,805},{7,0,525},{7,0,1579},{8,0,497},{136,0,573},{6,0,268
+},{137,0,62},{135,11,576},{134,0,1201},{5,11,771},{5,11,863},{5,11,898},{6,11,
+1632},{6,11,1644},{134,11,1780},{133,11,331},{7,0,193},{7,0,1105},{10,0,495},{7,
+10,397},{8,10,124},{8,10,619},{9,10,305},{11,10,40},{12,10,349},{13,10,134},{13,
+10,295},{14,10,155},{15,10,120},{146,10,105},{138,0,106},{6,0,859},{5,11,107},{7
+,11,201},{136,11,518},{6,11,446},{135,11,1817},{13,0,23},{4,10,262},{135,10,342}
+,{133,10,641},{137,11,851},{6,0,925},{137,0,813},{132,11,504},{6,0,613},{136,0,
+223},{4,10,99},{6,10,250},{6,10,346},{8,10,127},{138,10,81},{136,0,953},{132,10,
+915},{139,11,892},{5,10,75},{9,10,517},{10,10,470},{12,10,155},{141,10,224},{4,0
+,666},{7,0,1017},{7,11,996},{138,11,390},{5,11,883},{133,11,975},{14,10,83},{142
+,11,83},{4,0,670},{5,11,922},{134,11,1707},{135,0,216},{9,0,40},{11,0,136},{135,
+11,787},{5,10,954},{5,11,993},{7,11,515},{137,11,91},{139,0,259},{7,0,1114},{9,0
+,310},{9,0,682},{10,0,440},{13,0,40},{6,10,304},{8,10,418},{11,10,341},{139,10,
+675},{14,0,296},{9,10,410},{139,10,425},{10,11,377},{12,11,363},{13,11,68},{13,
+11,94},{14,11,108},{142,11,306},{7,0,1401},{135,0,1476},{4,0,296},{6,0,475},{7,0
+,401},{7,0,1410},{7,0,1594},{7,0,1674},{8,0,63},{8,0,660},{137,0,74},{4,0,139},{
+4,0,388},{140,0,188},{132,0,797},{132,11,766},{5,11,103},{7,11,921},{8,11,580},{
+8,11,593},{8,11,630},{138,11,28},{4,11,911},{5,11,867},{133,11,1013},{134,10,14}
+,{134,0,1572},{134,10,1708},{21,0,39},{5,10,113},{6,10,243},{7,10,1865},{11,10,
+161},{16,10,37},{145,10,99},{7,11,1563},{141,11,182},{5,11,135},{6,11,519},{7,11
+,1722},{10,11,271},{11,11,261},{145,11,54},{132,10,274},{134,0,1594},{4,11,300},
+{5,11,436},{135,11,484},{4,0,747},{6,0,290},{7,0,649},{7,0,1479},{135,0,1583},{
+133,11,535},{147,11,82},{133,0,232},{137,0,887},{135,10,166},{136,0,521},{4,0,14
+},{7,0,472},{7,0,1801},{10,0,748},{141,0,458},{134,0,741},{134,0,992},{16,0,111}
+,{137,10,304},{4,0,425},{5,11,387},{7,11,557},{12,11,547},{142,11,86},{135,11,
+1747},{5,10,654},{135,11,1489},{7,0,789},{4,11,6},{5,11,708},{136,11,75},{6,10,
+273},{10,10,188},{13,10,377},{146,10,77},{6,0,1593},{4,11,303},{7,11,619},{10,11
+,547},{10,11,687},{11,11,122},{140,11,601},{134,0,1768},{135,10,410},{138,11,772
+},{11,0,233},{139,10,524},{5,0,943},{134,0,1779},{134,10,1785},{136,11,529},{132
+,0,955},{5,0,245},{6,0,576},{7,0,582},{136,0,225},{132,10,780},{142,0,241},{134,
+0,1943},{4,11,106},{7,11,310},{7,11,1785},{10,11,690},{139,11,717},{134,0,1284},
+{5,11,890},{133,11,988},{6,11,626},{142,11,431},{10,11,706},{145,11,32},{137,11,
+332},{132,11,698},{135,0,709},{5,10,948},{138,11,17},{136,0,554},{134,0,1564},{
+139,10,941},{132,0,443},{134,0,909},{134,11,84},{142,0,280},{4,10,532},{5,10,706
+},{135,10,662},{132,0,729},{5,10,837},{6,10,1651},{139,10,985},{135,10,1861},{4,
+0,348},{152,11,3},{5,11,986},{6,11,130},{7,11,1582},{8,11,458},{10,11,101},{10,
+11,318},{138,11,823},{134,0,758},{4,0,298},{137,0,848},{4,10,330},{7,10,933},{7,
+10,2012},{136,10,292},{7,11,1644},{137,11,129},{6,0,1422},{9,0,829},{135,10,767}
+,{5,0,164},{7,0,121},{142,0,189},{7,0,812},{7,0,1261},{7,0,1360},{9,0,632},{140,
+0,352},{135,11,1788},{139,0,556},{135,11,997},{145,10,114},{4,0,172},{9,0,611},{
+10,0,436},{12,0,673},{13,0,255},{137,10,883},{11,0,530},{138,10,274},{133,0,844}
+,{134,0,984},{13,0,232},{18,0,35},{4,10,703},{135,10,207},{132,10,571},{9,0,263}
+,{10,0,147},{138,0,492},{7,11,1756},{137,11,98},{5,10,873},{5,10,960},{8,10,823}
+,{137,10,881},{133,0,537},{132,0,859},{7,11,1046},{139,11,160},{137,0,842},{139,
+10,283},{5,10,33},{6,10,470},{139,10,424},{6,11,45},{7,11,433},{8,11,129},{9,11,
+21},{10,11,392},{11,11,79},{12,11,499},{13,11,199},{141,11,451},{135,0,1291},{
+135,10,1882},{7,11,558},{136,11,353},{134,0,1482},{5,0,230},{5,0,392},{6,0,420},
+{9,0,568},{140,0,612},{6,0,262},{7,10,90},{7,10,664},{7,10,830},{7,10,1380},{7,
+10,2025},{8,11,81},{8,10,448},{8,10,828},{9,11,189},{9,11,201},{11,11,478},{11,
+11,712},{141,11,338},{142,0,31},{5,11,353},{151,11,26},{132,0,753},{4,0,0},{5,0,
+41},{7,0,1459},{7,0,1469},{7,0,1859},{9,0,549},{139,0,905},{9,10,417},{137,10,
+493},{135,11,1113},{133,0,696},{141,11,448},{134,10,295},{132,0,834},{4,0,771},{
+5,10,1019},{6,11,25},{7,11,855},{7,11,1258},{144,11,32},{134,0,1076},{133,0,921}
+,{133,0,674},{4,11,4},{7,11,1118},{7,11,1320},{7,11,1706},{8,11,277},{9,11,622},
+{10,11,9},{11,11,724},{12,11,350},{12,11,397},{13,11,28},{13,11,159},{15,11,89},
+{18,11,5},{19,11,9},{20,11,34},{150,11,47},{134,10,208},{6,0,444},{136,0,308},{6
+,0,180},{7,0,1137},{8,0,751},{139,0,805},{4,0,183},{7,0,271},{11,0,824},{11,0,
+952},{13,0,278},{13,0,339},{13,0,482},{14,0,424},{148,0,99},{7,11,317},{135,11,
+569},{4,0,19},{5,0,477},{5,0,596},{6,0,505},{7,0,1221},{11,0,907},{12,0,209},{
+141,0,214},{135,0,1215},{6,0,271},{7,0,398},{8,0,387},{10,0,344},{7,10,448},{7,
+10,1629},{7,10,1813},{8,10,442},{9,10,710},{10,10,282},{138,10,722},{11,10,844},
+{12,10,104},{140,10,625},{134,11,255},{133,10,787},{134,0,1645},{11,11,956},{151
+,11,3},{6,0,92},{6,0,188},{7,0,209},{7,0,1269},{7,0,1524},{7,0,1876},{8,0,661},{
+10,0,42},{10,0,228},{11,0,58},{11,0,1020},{12,0,58},{12,0,118},{141,0,32},{4,0,
+459},{133,0,966},{4,11,536},{7,11,1141},{10,11,723},{139,11,371},{140,0,330},{
+134,0,1557},{7,11,285},{135,11,876},{136,10,491},{135,11,560},{6,0,18},{7,0,179}
+,{7,0,932},{8,0,548},{8,0,757},{9,0,54},{9,0,65},{9,0,532},{9,0,844},{10,0,113},
+{10,0,117},{10,0,315},{10,0,560},{10,0,622},{10,0,798},{11,0,153},{11,0,351},{11
+,0,375},{12,0,78},{12,0,151},{12,0,392},{12,0,666},{14,0,248},{143,0,23},{6,0,
+1742},{132,11,690},{4,10,403},{5,10,441},{7,10,450},{10,10,840},{11,10,101},{12,
+10,193},{141,10,430},{133,0,965},{134,0,182},{10,0,65},{10,0,488},{138,0,497},{
+135,11,1346},{6,0,973},{6,0,1158},{10,11,200},{19,11,2},{151,11,22},{4,11,190},{
+133,11,554},{133,10,679},{7,0,328},{137,10,326},{133,11,1001},{9,0,588},{138,0,
+260},{133,11,446},{135,10,1128},{135,10,1796},{147,11,119},{134,0,1786},{6,0,
+1328},{6,0,1985},{8,0,962},{138,0,1017},{135,0,308},{11,0,508},{4,10,574},{7,10,
+350},{7,10,1024},{8,10,338},{9,10,677},{138,10,808},{138,11,752},{135,10,1081},{
+137,11,96},{7,10,1676},{135,10,2037},{136,0,588},{132,11,304},{133,0,614},{140,0
+,793},{136,0,287},{137,10,297},{141,10,37},{6,11,53},{6,11,199},{7,11,1408},{8,
+11,32},{8,11,93},{9,11,437},{10,11,397},{10,11,629},{11,11,593},{11,11,763},{13,
+11,326},{145,11,35},{134,11,105},{9,11,320},{10,11,506},{138,11,794},{5,11,114},
+{5,11,255},{141,11,285},{140,0,290},{7,11,2035},{8,11,19},{9,11,89},{138,11,831}
+,{134,0,1136},{7,0,719},{8,0,796},{8,0,809},{8,0,834},{6,10,306},{7,10,1140},{7,
+10,1340},{8,10,133},{138,10,449},{139,10,1011},{5,0,210},{6,0,213},{7,0,60},{10,
+0,364},{139,0,135},{5,0,607},{8,0,326},{136,0,490},{138,11,176},{132,0,701},{5,0
+,472},{7,0,380},{137,0,758},{135,0,1947},{6,0,1079},{138,0,278},{138,11,391},{5,
+10,329},{8,10,260},{139,11,156},{4,0,386},{7,0,41},{8,0,405},{8,0,728},{9,0,497}
+,{11,0,110},{11,0,360},{15,0,37},{144,0,84},{5,0,46},{7,0,1452},{7,0,1480},{8,0,
+634},{140,0,472},{136,0,961},{4,0,524},{136,0,810},{10,0,238},{141,0,33},{132,10
+,657},{152,10,7},{133,0,532},{5,0,997},{135,10,1665},{7,11,594},{7,11,851},{7,11
+,1858},{9,11,411},{9,11,574},{9,11,666},{9,11,737},{10,11,346},{10,11,712},{11,
+11,246},{11,11,432},{11,11,517},{11,11,647},{11,11,679},{11,11,727},{12,11,304},
+{12,11,305},{12,11,323},{12,11,483},{12,11,572},{12,11,593},{12,11,602},{13,11,
+95},{13,11,101},{13,11,171},{13,11,315},{13,11,378},{13,11,425},{13,11,475},{14,
+11,63},{14,11,380},{14,11,384},{15,11,133},{18,11,112},{148,11,72},{5,11,955},{
+136,11,814},{134,0,1301},{5,10,66},{7,10,1896},{136,10,288},{133,11,56},{134,10,
+1643},{6,0,1298},{148,11,100},{5,0,782},{5,0,829},{6,0,671},{6,0,1156},{6,0,1738
+},{137,11,621},{4,0,306},{5,0,570},{7,0,1347},{5,10,91},{5,10,648},{5,10,750},{5
+,10,781},{6,10,54},{6,10,112},{6,10,402},{6,10,1732},{7,10,315},{7,10,749},{7,10
+,1900},{9,10,78},{9,10,508},{10,10,611},{10,10,811},{11,10,510},{11,10,728},{13,
+10,36},{14,10,39},{16,10,83},{17,10,124},{148,10,30},{8,10,570},{9,11,477},{141,
+11,78},{4,11,639},{10,11,4},{10,10,322},{10,10,719},{11,10,407},{11,11,638},{12,
+11,177},{148,11,57},{7,0,1823},{139,0,693},{7,0,759},{5,11,758},{8,10,125},{8,10
+,369},{8,10,524},{10,10,486},{11,10,13},{11,10,381},{11,10,736},{11,10,766},{11,
+10,845},{13,10,114},{13,10,292},{142,10,47},{7,0,1932},{6,10,1684},{6,10,1731},{
+7,10,356},{8,10,54},{8,10,221},{9,10,225},{9,10,356},{10,10,77},{10,10,446},{10,
+10,731},{12,10,404},{141,10,491},{135,11,552},{135,11,1112},{4,0,78},{5,0,96},{5
+,0,182},{6,0,1257},{7,0,1724},{7,0,1825},{10,0,394},{10,0,471},{11,0,532},{14,0,
+340},{145,0,88},{139,11,328},{135,0,1964},{132,10,411},{4,10,80},{5,10,44},{137,
+11,133},{5,11,110},{6,11,169},{6,11,1702},{7,11,400},{8,11,538},{9,11,184},{9,11
+,524},{140,11,218},{4,0,521},{5,10,299},{7,10,1083},{140,11,554},{6,11,133},{9,
+11,353},{12,11,628},{146,11,79},{6,0,215},{7,0,584},{7,0,1028},{7,0,1473},{7,0,
+1721},{9,0,424},{138,0,779},{7,0,857},{7,0,1209},{7,10,1713},{9,10,537},{10,10,
+165},{12,10,219},{140,10,561},{4,10,219},{6,11,93},{7,11,1422},{7,10,1761},{7,11
+,1851},{8,11,673},{9,10,86},{9,11,529},{140,11,43},{137,11,371},{136,0,671},{5,0
+,328},{135,0,918},{132,0,529},{9,11,25},{10,11,467},{138,11,559},{4,11,335},{135
+,11,942},{134,0,716},{134,0,1509},{6,0,67},{7,0,258},{7,0,1630},{9,0,354},{9,0,
+675},{10,0,830},{14,0,80},{17,0,80},{140,10,428},{134,0,1112},{6,0,141},{7,0,225
+},{9,0,59},{9,0,607},{10,0,312},{11,0,687},{12,0,555},{13,0,373},{13,0,494},{148
+,0,58},{133,10,514},{8,11,39},{10,11,773},{11,11,84},{12,11,205},{142,11,1},{8,0
+,783},{5,11,601},{133,11,870},{136,11,594},{4,10,55},{5,10,301},{6,10,571},{14,
+10,49},{146,10,102},{132,11,181},{134,11,1652},{133,10,364},{4,11,97},{5,11,147}
+,{6,11,286},{7,11,1362},{141,11,176},{4,10,76},{7,10,1550},{9,10,306},{9,10,430}
+,{9,10,663},{10,10,683},{11,10,427},{11,10,753},{12,10,334},{12,10,442},{14,10,
+258},{14,10,366},{143,10,131},{137,10,52},{6,0,955},{134,0,1498},{6,11,375},{7,
+11,169},{7,11,254},{136,11,780},{7,0,430},{11,0,46},{14,0,343},{142,11,343},{135
+,0,1183},{5,0,602},{7,0,2018},{9,0,418},{9,0,803},{135,11,1447},{8,0,677},{135,
+11,1044},{139,11,285},{4,10,656},{135,10,779},{135,10,144},{5,11,629},{135,11,
+1549},{135,10,1373},{138,11,209},{7,10,554},{7,10,605},{141,10,10},{5,10,838},{5
+,10,841},{134,10,1649},{133,10,1012},{6,0,1357},{134,0,1380},{144,0,53},{6,0,590
+},{7,10,365},{7,10,1357},{7,10,1497},{8,10,154},{141,10,281},{133,10,340},{132,
+11,420},{135,0,329},{147,11,32},{4,0,469},{10,11,429},{139,10,495},{8,10,261},{9
+,10,144},{9,10,466},{10,10,370},{12,10,470},{13,10,144},{142,10,348},{142,0,460}
+,{4,11,325},{9,10,897},{138,11,125},{6,0,1743},{6,10,248},{9,10,546},{10,10,535}
+,{11,10,681},{141,10,135},{4,0,990},{5,0,929},{6,0,340},{8,0,376},{8,0,807},{8,0
+,963},{8,0,980},{138,0,1007},{134,0,1603},{140,0,250},{4,11,714},{133,11,469},{
+134,10,567},{136,10,445},{5,0,218},{7,0,1610},{8,0,646},{10,0,83},{11,11,138},{
+140,11,40},{7,0,1512},{135,0,1794},{135,11,1216},{11,0,0},{16,0,78},{132,11,718}
+,{133,0,571},{132,0,455},{134,0,1012},{5,11,124},{5,11,144},{6,11,548},{7,11,15}
+,{7,11,153},{137,11,629},{142,11,10},{6,11,75},{7,11,1531},{8,11,416},{9,11,240}
+,{9,11,275},{10,11,100},{11,11,658},{11,11,979},{12,11,86},{13,11,468},{14,11,66
+},{14,11,207},{15,11,20},{15,11,25},{144,11,58},{132,10,577},{5,11,141},{5,11,
+915},{6,11,1783},{7,11,211},{7,11,698},{7,11,1353},{9,11,83},{9,11,281},{10,11,
+376},{10,11,431},{11,11,543},{12,11,664},{13,11,280},{13,11,428},{14,11,61},{14,
+11,128},{17,11,52},{145,11,81},{6,0,161},{7,0,372},{137,0,597},{132,0,349},{10,
+11,702},{139,11,245},{134,0,524},{134,10,174},{6,0,432},{9,0,751},{139,0,322},{
+147,11,94},{4,11,338},{133,11,400},{5,0,468},{10,0,325},{11,0,856},{12,0,345},{
+143,0,104},{133,0,223},{132,0,566},{4,11,221},{5,11,659},{5,11,989},{7,11,697},{
+7,11,1211},{138,11,284},{135,11,1070},{4,0,59},{135,0,1394},{6,0,436},{11,0,481}
+,{5,10,878},{133,10,972},{4,0,48},{5,0,271},{135,0,953},{5,0,610},{136,0,457},{4
+,0,773},{5,0,618},{137,0,756},{133,0,755},{135,0,1217},{138,11,507},{132,10,351}
+,{132,0,197},{143,11,78},{4,11,188},{7,11,805},{11,11,276},{142,11,293},{5,11,
+884},{139,11,991},{132,10,286},{10,0,259},{10,0,428},{7,10,438},{7,10,627},{7,10
+,1516},{8,10,40},{9,10,56},{9,10,294},{11,10,969},{11,10,995},{146,10,148},{4,0,
+356},{5,0,217},{5,0,492},{5,0,656},{8,0,544},{136,11,544},{5,0,259},{6,0,1230},{
+7,0,414},{7,0,854},{142,0,107},{132,0,1007},{15,0,14},{144,0,5},{6,0,1580},{132,
+10,738},{132,11,596},{132,0,673},{133,10,866},{6,0,1843},{135,11,1847},{4,0,165}
+,{7,0,1398},{135,0,1829},{135,11,1634},{147,11,65},{6,0,885},{6,0,1009},{137,0,
+809},{133,10,116},{132,10,457},{136,11,770},{9,0,498},{12,0,181},{10,11,361},{
+142,11,316},{134,11,595},{5,0,9},{7,0,297},{7,0,966},{140,0,306},{4,11,89},{5,11
+,489},{6,11,315},{7,11,553},{7,11,1745},{138,11,243},{134,0,1487},{132,0,437},{5
+,0,146},{6,0,411},{138,0,721},{5,10,527},{6,10,189},{135,10,859},{11,10,104},{11
+,10,554},{15,10,60},{143,10,125},{6,11,1658},{9,11,3},{10,11,154},{11,11,641},{
+13,11,85},{13,11,201},{141,11,346},{6,0,177},{135,0,467},{134,0,1377},{134,10,
+116},{136,11,645},{4,11,166},{5,11,505},{6,11,1670},{137,11,110},{133,10,487},{4
+,10,86},{5,10,667},{5,10,753},{6,10,316},{6,10,455},{135,10,946},{133,0,200},{
+132,0,959},{6,0,1928},{134,0,1957},{139,11,203},{150,10,45},{4,10,79},{7,10,1773
+},{10,10,450},{11,10,589},{13,10,332},{13,10,493},{14,10,183},{14,10,334},{14,10
+,362},{14,10,368},{14,10,376},{14,10,379},{19,10,90},{19,10,103},{19,10,127},{
+148,10,90},{6,0,1435},{135,11,1275},{134,0,481},{7,11,445},{8,11,307},{8,11,704}
+,{10,11,41},{10,11,439},{11,11,237},{11,11,622},{140,11,201},{135,11,869},{4,0,
+84},{7,0,1482},{10,0,76},{138,0,142},{11,11,277},{144,11,14},{135,11,1977},{4,11
+,189},{5,11,713},{136,11,57},{133,0,1015},{138,11,371},{4,0,315},{5,0,507},{135,
+0,1370},{4,11,552},{142,10,381},{9,0,759},{16,0,31},{16,0,39},{16,0,75},{18,0,24
+},{20,0,42},{152,0,1},{134,0,712},{134,0,1722},{133,10,663},{133,10,846},{8,0,
+222},{8,0,476},{9,0,238},{11,0,516},{11,0,575},{15,0,109},{146,0,100},{7,0,1402}
+,{7,0,1414},{12,0,456},{5,10,378},{8,10,465},{9,10,286},{10,10,185},{10,10,562},
+{10,10,635},{11,10,31},{11,10,393},{13,10,312},{18,10,65},{18,10,96},{147,10,89}
+,{4,0,986},{6,0,1958},{6,0,2032},{8,0,934},{138,0,985},{7,10,1880},{9,10,680},{
+139,10,798},{134,10,1770},{145,11,49},{132,11,614},{132,10,648},{5,10,945},{6,10
+,1656},{6,10,1787},{7,10,167},{8,10,824},{9,10,391},{10,10,375},{139,10,185},{
+138,11,661},{7,0,1273},{135,11,1945},{7,0,706},{7,0,1058},{138,0,538},{7,10,1645
+},{8,10,352},{137,10,249},{132,10,152},{11,0,92},{11,0,196},{11,0,409},{11,0,450
+},{11,0,666},{11,0,777},{12,0,262},{13,0,385},{13,0,393},{15,0,115},{16,0,45},{
+145,0,82},{133,10,1006},{6,0,40},{135,0,1781},{9,11,614},{139,11,327},{5,10,420}
+,{135,10,1449},{135,0,431},{10,0,97},{135,10,832},{6,0,423},{7,0,665},{135,0,
+1210},{7,0,237},{8,0,664},{9,0,42},{9,0,266},{9,0,380},{9,0,645},{10,0,177},{138
+,0,276},{7,0,264},{133,10,351},{8,0,213},{5,10,40},{7,10,598},{7,10,1638},{9,10,
+166},{9,10,640},{9,10,685},{9,10,773},{11,10,215},{13,10,65},{14,10,172},{14,10,
+317},{145,10,6},{5,11,84},{134,11,163},{8,10,60},{9,10,343},{139,10,769},{137,0,
+455},{133,11,410},{8,0,906},{12,0,700},{12,0,706},{140,0,729},{21,11,33},{150,11
+,40},{7,10,1951},{8,10,765},{8,10,772},{140,10,671},{7,10,108},{8,10,219},{8,10,
+388},{9,10,639},{9,10,775},{11,10,275},{140,10,464},{5,11,322},{7,11,1941},{8,11
+,186},{9,11,262},{10,11,187},{14,11,208},{146,11,130},{139,0,624},{8,0,574},{5,
+11,227},{140,11,29},{7,11,1546},{11,11,299},{142,11,407},{5,10,15},{6,10,56},{7,
+10,1758},{8,10,500},{9,10,730},{11,10,331},{13,10,150},{142,10,282},{7,11,1395},
+{8,11,486},{9,11,236},{9,11,878},{10,11,218},{11,11,95},{19,11,17},{147,11,31},{
+135,11,2043},{4,0,354},{146,11,4},{140,11,80},{135,0,1558},{134,10,1886},{5,10,
+205},{6,10,438},{137,10,711},{133,11,522},{133,10,534},{7,0,235},{7,0,1475},{15,
+0,68},{146,0,120},{137,10,691},{4,0,942},{6,0,1813},{8,0,917},{10,0,884},{12,0,
+696},{12,0,717},{12,0,723},{12,0,738},{12,0,749},{12,0,780},{16,0,97},{146,0,169
+},{6,10,443},{8,11,562},{9,10,237},{9,10,571},{9,10,695},{10,10,139},{11,10,715}
+,{12,10,417},{141,10,421},{135,0,957},{133,0,830},{134,11,1771},{146,0,23},{5,0,
+496},{6,0,694},{7,0,203},{7,11,1190},{137,11,620},{137,11,132},{6,0,547},{134,0,
+1549},{8,11,258},{9,11,208},{137,11,359},{4,0,864},{5,0,88},{137,0,239},{135,11,
+493},{4,11,317},{135,11,1279},{132,11,477},{4,10,578},{5,11,63},{133,11,509},{7,
+0,650},{135,0,1310},{7,0,1076},{9,0,80},{11,0,78},{11,0,421},{11,0,534},{140,0,
+545},{132,11,288},{12,0,553},{14,0,118},{133,10,923},{7,0,274},{11,0,479},{139,0
+,507},{8,11,89},{8,11,620},{9,11,49},{10,11,774},{11,11,628},{12,11,322},{143,11
+,124},{4,0,497},{135,0,1584},{7,0,261},{7,0,1115},{7,0,1354},{7,0,1404},{7,0,
+1588},{7,0,1705},{7,0,1902},{9,0,465},{10,0,248},{10,0,349},{10,0,647},{11,0,527
+},{11,0,660},{11,0,669},{12,0,529},{13,0,305},{132,10,924},{133,10,665},{136,0,
+13},{6,0,791},{138,11,120},{7,0,642},{8,0,250},{11,0,123},{11,0,137},{13,0,48},{
+142,0,95},{4,10,265},{7,10,807},{135,10,950},{5,10,93},{140,10,267},{135,0,1429}
+,{4,0,949},{10,0,885},{10,0,891},{10,0,900},{10,0,939},{12,0,760},{142,0,449},{
+139,11,366},{132,0,818},{134,11,85},{135,10,994},{7,0,330},{5,10,233},{5,10,320}
+,{6,10,140},{136,10,295},{4,0,1004},{8,0,982},{136,0,993},{133,10,978},{4,10,905
+},{6,10,1701},{137,10,843},{10,0,545},{140,0,301},{6,0,947},{134,0,1062},{134,0,
+1188},{4,0,904},{5,0,794},{152,10,6},{134,0,1372},{135,11,608},{5,11,279},{6,11,
+235},{7,11,468},{8,11,446},{9,11,637},{10,11,717},{11,11,738},{140,11,514},{132,
+10,509},{5,11,17},{6,11,371},{137,11,528},{132,0,693},{4,11,115},{5,11,669},{6,
+11,407},{8,11,311},{11,11,10},{141,11,5},{11,0,377},{7,10,273},{137,11,381},{135
+,0,695},{7,0,386},{138,0,713},{135,10,1041},{134,0,1291},{6,0,7},{6,0,35},{7,0,
+147},{7,0,1069},{7,0,1568},{7,0,1575},{7,0,1917},{8,0,43},{8,0,208},{9,0,128},{9
+,0,866},{10,0,20},{11,0,981},{147,0,33},{7,0,893},{141,0,424},{139,10,234},{150,
+11,56},{5,11,779},{5,11,807},{6,11,1655},{134,11,1676},{5,10,802},{7,10,2021},{
+136,10,805},{4,11,196},{5,10,167},{5,11,558},{5,10,899},{5,11,949},{6,10,410},{
+137,10,777},{137,10,789},{134,10,1705},{8,0,904},{140,0,787},{6,0,322},{9,0,552}
+,{11,0,274},{13,0,209},{13,0,499},{14,0,85},{15,0,126},{145,0,70},{135,10,10},{5
+,10,11},{6,10,117},{6,10,485},{7,10,1133},{9,10,582},{9,10,594},{11,10,21},{11,
+10,818},{12,10,535},{141,10,86},{4,10,264},{7,10,1067},{8,10,204},{8,10,385},{
+139,10,953},{132,11,752},{138,10,56},{133,10,470},{6,0,1808},{8,0,83},{8,0,742},
+{8,0,817},{9,0,28},{9,0,29},{9,0,885},{10,0,387},{11,0,633},{11,0,740},{13,0,235
+},{13,0,254},{15,0,143},{143,0,146},{140,0,49},{134,0,1832},{4,11,227},{5,11,159
+},{5,11,409},{7,11,80},{10,11,294},{10,11,479},{12,11,418},{14,11,50},{14,11,249
+},{142,11,295},{7,11,1470},{8,11,66},{8,11,137},{8,11,761},{9,11,638},{11,11,80}
+,{11,11,212},{11,11,368},{11,11,418},{12,11,8},{13,11,15},{16,11,61},{17,11,59},
+{19,11,28},{148,11,84},{139,10,1015},{138,11,468},{135,0,421},{6,0,415},{7,0,
+1049},{137,0,442},{6,11,38},{7,11,1220},{8,11,185},{8,11,256},{9,11,22},{9,11,
+331},{10,11,738},{11,11,205},{11,11,540},{11,11,746},{13,11,399},{13,11,465},{14
+,11,88},{142,11,194},{139,0,289},{133,10,715},{4,0,110},{10,0,415},{10,0,597},{
+142,0,206},{4,11,159},{6,11,115},{7,11,252},{7,11,257},{7,11,1928},{8,11,69},{9,
+11,384},{10,11,91},{10,11,615},{12,11,375},{14,11,235},{18,11,117},{147,11,123},
+{5,11,911},{136,11,278},{7,0,205},{7,0,2000},{8,10,794},{9,10,400},{10,10,298},{
+142,10,228},{135,11,1774},{4,11,151},{7,11,1567},{8,11,351},{137,11,322},{136,10
+,724},{133,11,990},{7,0,1539},{11,0,512},{13,0,205},{19,0,30},{22,0,36},{23,0,19
+},{135,11,1539},{5,11,194},{7,11,1662},{9,11,90},{140,11,180},{6,10,190},{7,10,
+768},{135,10,1170},{134,0,1340},{4,0,283},{135,0,1194},{133,11,425},{133,11,971}
+,{12,0,549},{14,10,67},{147,10,60},{135,10,1023},{134,0,1720},{138,11,587},{5,11
+,72},{6,11,264},{7,11,21},{7,11,46},{7,11,2013},{8,11,215},{8,11,513},{10,11,266
+},{139,11,22},{5,0,319},{135,0,534},{6,10,137},{9,10,75},{9,10,253},{10,10,194},
+{138,10,444},{7,0,1180},{20,0,112},{6,11,239},{7,11,118},{10,11,95},{11,11,603},
+{13,11,443},{14,11,160},{143,11,4},{134,11,431},{5,11,874},{6,11,1677},{11,10,
+643},{12,10,115},{143,11,0},{134,0,967},{6,11,65},{7,11,939},{7,11,1172},{7,11,
+1671},{9,11,540},{10,11,696},{11,11,265},{11,11,732},{11,11,928},{11,11,937},{12
+,11,399},{13,11,438},{149,11,19},{137,11,200},{135,0,1940},{5,10,760},{7,10,542}
+,{8,10,135},{136,10,496},{140,11,44},{7,11,1655},{136,11,305},{7,10,319},{7,10,
+355},{7,10,763},{10,10,389},{145,10,43},{136,0,735},{138,10,786},{137,11,19},{
+132,11,696},{5,0,132},{9,0,486},{9,0,715},{10,0,458},{11,0,373},{11,0,668},{11,0
+,795},{11,0,897},{12,0,272},{12,0,424},{12,0,539},{12,0,558},{14,0,245},{14,0,
+263},{14,0,264},{14,0,393},{142,0,403},{10,0,38},{139,0,784},{132,0,838},{4,11,
+302},{135,11,1766},{133,0,379},{5,0,8},{6,0,89},{6,0,400},{7,0,1569},{7,0,1623},
+{7,0,1850},{8,0,218},{8,0,422},{9,0,570},{10,0,626},{4,11,726},{133,11,630},{4,0
+,1017},{138,0,660},{6,0,387},{7,0,882},{141,0,111},{6,0,224},{7,0,877},{137,0,
+647},{4,10,58},{5,10,286},{6,10,319},{7,10,402},{7,10,1254},{7,10,1903},{8,10,
+356},{140,10,408},{135,0,790},{9,0,510},{10,0,53},{4,10,389},{9,10,181},{10,10,
+29},{10,10,816},{11,10,311},{11,10,561},{12,10,67},{141,10,181},{142,0,458},{6,
+11,118},{7,11,215},{7,11,1521},{140,11,11},{134,0,954},{135,0,394},{134,0,1367},
+{5,11,225},{133,10,373},{132,0,882},{7,0,1409},{135,10,1972},{135,10,1793},{4,11
+,370},{5,11,756},{135,11,1326},{150,11,13},{7,11,354},{10,11,410},{139,11,815},{
+6,11,1662},{7,11,48},{8,11,771},{10,11,116},{13,11,104},{14,11,105},{14,11,184},
+{15,11,168},{19,11,92},{148,11,68},{7,0,124},{136,0,38},{5,0,261},{7,0,78},{7,0,
+199},{8,0,815},{9,0,126},{10,0,342},{140,0,647},{4,0,628},{140,0,724},{7,0,266},
+{8,0,804},{7,10,1651},{145,10,89},{135,0,208},{134,0,1178},{6,0,79},{135,0,1519}
+,{132,10,672},{133,10,737},{136,0,741},{132,11,120},{4,0,710},{6,0,376},{134,0,
+606},{134,0,1347},{134,0,1494},{6,0,850},{6,0,1553},{137,0,821},{5,10,145},{134,
+11,593},{7,0,1311},{140,0,135},{4,0,467},{5,0,405},{134,0,544},{5,11,820},{135,
+11,931},{6,0,100},{7,0,244},{7,0,632},{7,0,1609},{8,0,178},{8,0,638},{141,0,58},
+{4,10,387},{135,10,1288},{6,11,151},{6,11,1675},{7,11,383},{151,11,10},{132,0,
+481},{135,10,550},{134,0,1378},{6,11,1624},{11,11,11},{12,11,422},{13,11,262},{
+142,11,360},{133,0,791},{4,11,43},{5,11,344},{133,11,357},{7,0,1227},{140,0,978}
+,{7,0,686},{8,0,33},{8,0,238},{10,0,616},{11,0,467},{11,0,881},{13,0,217},{13,0,
+253},{142,0,268},{137,0,857},{8,0,467},{8,0,1006},{7,11,148},{8,11,284},{141,11,
+63},{4,10,576},{135,10,1263},{133,11,888},{5,10,919},{134,10,1673},{20,10,37},{
+148,11,37},{132,0,447},{132,11,711},{4,0,128},{5,0,415},{6,0,462},{7,0,294},{7,0
+,578},{10,0,710},{139,0,86},{4,10,82},{5,10,333},{5,10,904},{6,10,207},{7,10,325
+},{7,10,1726},{8,10,101},{10,10,778},{139,10,220},{136,0,587},{137,11,440},{133,
+10,903},{6,0,427},{7,0,1018},{138,0,692},{4,0,195},{135,0,802},{140,10,147},{134
+,0,1546},{134,0,684},{132,10,705},{136,0,345},{11,11,678},{140,11,307},{133,0,
+365},{134,0,1683},{4,11,65},{5,11,479},{5,11,1004},{7,11,1913},{8,11,317},{9,11,
+302},{10,11,612},{141,11,22},{138,0,472},{4,11,261},{135,11,510},{134,10,90},{
+142,0,433},{151,0,28},{4,11,291},{7,11,101},{9,11,515},{12,11,152},{12,11,443},{
+13,11,392},{142,11,357},{140,0,997},{5,0,3},{8,0,578},{9,0,118},{10,0,705},{141,
+0,279},{135,11,1266},{7,10,813},{12,10,497},{141,10,56},{133,0,229},{6,10,125},{
+135,10,1277},{8,0,102},{10,0,578},{10,0,672},{12,0,496},{13,0,408},{14,0,121},{
+17,0,106},{151,10,12},{6,0,866},{134,0,1080},{136,0,1022},{4,11,130},{135,11,843
+},{5,11,42},{5,11,879},{7,11,245},{7,11,324},{7,11,1532},{11,11,463},{11,11,472}
+,{13,11,363},{144,11,52},{150,0,55},{8,0,115},{8,0,350},{9,0,489},{10,0,128},{11
+,0,306},{12,0,373},{14,0,30},{17,0,79},{19,0,80},{4,11,134},{133,11,372},{134,0,
+657},{134,0,933},{135,11,1147},{4,0,230},{133,0,702},{134,0,1728},{4,0,484},{18,
+0,26},{19,0,42},{20,0,43},{21,0,0},{23,0,27},{152,0,14},{7,0,185},{135,0,703},{6
+,0,417},{10,0,618},{7,10,1106},{9,10,770},{11,10,112},{140,10,413},{134,0,803},{
+132,11,644},{134,0,1262},{7,11,540},{12,10,271},{145,10,109},{135,11,123},{132,0
+,633},{134,11,623},{4,11,908},{5,11,359},{5,11,508},{6,11,1723},{7,11,343},{7,11
+,1996},{135,11,2026},{135,0,479},{10,0,262},{7,10,304},{9,10,646},{9,10,862},{11
+,10,696},{12,10,208},{15,10,79},{147,10,108},{4,11,341},{135,11,480},{134,0,830}
+,{5,0,70},{5,0,622},{6,0,334},{7,0,1032},{9,0,171},{11,0,26},{11,0,213},{11,0,
+637},{11,0,707},{12,0,202},{12,0,380},{13,0,226},{13,0,355},{14,0,222},{145,0,42
+},{135,10,981},{143,0,217},{137,11,114},{4,0,23},{4,0,141},{5,0,313},{5,0,1014},
+{6,0,50},{6,0,51},{7,0,142},{7,0,384},{7,0,559},{8,0,640},{9,0,460},{9,0,783},{
+11,0,741},{12,0,183},{141,0,488},{141,0,360},{7,0,1586},{7,11,1995},{8,11,299},{
+11,11,890},{140,11,674},{132,10,434},{7,0,652},{134,10,550},{7,0,766},{5,10,553}
+,{138,10,824},{7,0,737},{8,0,298},{136,10,452},{4,11,238},{5,11,503},{6,11,179},
+{7,11,2003},{8,11,381},{8,11,473},{9,11,149},{10,11,183},{15,11,45},{143,11,86},
+{133,10,292},{5,0,222},{9,0,655},{138,0,534},{138,10,135},{4,11,121},{5,11,156},
+{5,11,349},{9,11,136},{10,11,605},{14,11,342},{147,11,107},{137,0,906},{6,0,1013
+},{134,0,1250},{6,0,1956},{6,0,2009},{8,0,991},{144,0,120},{135,11,1192},{138,0,
+503},{5,0,154},{7,0,1491},{10,0,379},{138,0,485},{6,0,1867},{6,0,1914},{6,0,1925
+},{9,0,917},{9,0,925},{9,0,932},{9,0,951},{9,0,1007},{9,0,1013},{12,0,806},{12,0
+,810},{12,0,814},{12,0,816},{12,0,824},{12,0,832},{12,0,837},{12,0,863},{12,0,
+868},{12,0,870},{12,0,889},{12,0,892},{12,0,900},{12,0,902},{12,0,908},{12,0,933
+},{12,0,942},{12,0,949},{12,0,954},{15,0,175},{15,0,203},{15,0,213},{15,0,218},{
+15,0,225},{15,0,231},{15,0,239},{15,0,248},{15,0,252},{18,0,190},{18,0,204},{18,
+0,215},{18,0,216},{18,0,222},{18,0,225},{18,0,230},{18,0,239},{18,0,241},{21,0,
+42},{21,0,43},{21,0,44},{21,0,45},{21,0,46},{21,0,53},{24,0,27},{152,0,31},{133,
+0,716},{135,0,844},{4,0,91},{5,0,388},{5,0,845},{6,0,206},{6,0,252},{6,0,365},{7
+,0,136},{7,0,531},{136,0,621},{7,10,393},{10,10,603},{139,10,206},{6,11,80},{6,
+11,1694},{7,11,173},{7,11,1974},{9,11,547},{10,11,730},{14,11,18},{150,11,39},{
+137,0,748},{4,11,923},{134,11,1711},{4,10,912},{137,10,232},{7,10,98},{7,10,1973
+},{136,10,716},{14,0,103},{133,10,733},{132,11,595},{12,0,158},{18,0,8},{19,0,62
+},{20,0,6},{22,0,4},{23,0,2},{23,0,9},{5,11,240},{6,11,459},{7,11,12},{7,11,114}
+,{7,11,502},{7,11,1751},{7,11,1753},{7,11,1805},{8,11,658},{9,11,1},{11,11,959},
+{13,11,446},{142,11,211},{135,0,576},{5,0,771},{5,0,863},{5,0,898},{6,0,648},{6,
+0,1632},{6,0,1644},{134,0,1780},{133,0,331},{7,11,633},{7,11,905},{7,11,909},{7,
+11,1538},{9,11,767},{140,11,636},{140,0,632},{5,0,107},{7,0,201},{136,0,518},{6,
+0,446},{7,0,1817},{134,11,490},{9,0,851},{141,0,510},{7,11,250},{8,11,506},{136,
+11,507},{4,0,504},{137,10,72},{132,11,158},{4,11,140},{7,11,362},{8,11,209},{9,
+11,10},{9,11,160},{9,11,503},{10,11,689},{11,11,350},{11,11,553},{11,11,725},{12
+,11,252},{12,11,583},{13,11,192},{13,11,352},{14,11,269},{14,11,356},{148,11,50}
+,{6,11,597},{135,11,1318},{135,10,1454},{5,0,883},{5,0,975},{8,0,392},{148,0,7},
+{6,11,228},{7,11,1341},{9,11,408},{138,11,343},{11,11,348},{11,10,600},{12,11,99
+},{13,10,245},{18,11,1},{18,11,11},{147,11,4},{134,11,296},{5,0,922},{134,0,1707
+},{132,11,557},{4,11,548},{7,10,164},{7,10,1571},{9,10,107},{140,10,225},{7,11,
+197},{8,11,142},{8,11,325},{9,11,150},{9,11,596},{10,11,350},{10,11,353},{11,11,
+74},{11,11,315},{14,11,423},{143,11,141},{5,0,993},{7,0,515},{137,0,91},{4,0,131
+},{8,0,200},{5,10,484},{5,10,510},{6,10,434},{7,10,1000},{7,10,1098},{136,10,2},
+{152,0,10},{4,11,62},{5,11,83},{6,11,399},{6,11,579},{7,11,692},{7,11,846},{7,11
+,1015},{7,11,1799},{8,11,403},{9,11,394},{10,11,133},{12,11,4},{12,11,297},{12,
+11,452},{16,11,81},{18,11,19},{18,11,25},{21,11,14},{22,11,12},{151,11,18},{140,
+11,459},{132,11,177},{7,0,1433},{9,0,365},{137,11,365},{132,10,460},{5,0,103},{6
+,0,2004},{7,0,921},{8,0,580},{8,0,593},{8,0,630},{10,0,28},{5,11,411},{135,11,
+653},{4,10,932},{133,10,891},{4,0,911},{5,0,867},{5,0,1013},{7,0,2034},{8,0,798}
+,{136,0,813},{7,11,439},{10,11,727},{11,11,260},{139,11,684},{136,10,625},{5,11,
+208},{7,11,753},{135,11,1528},{5,0,461},{7,0,1925},{12,0,39},{13,0,265},{13,0,
+439},{134,10,76},{6,0,853},{8,10,92},{137,10,221},{5,0,135},{6,0,519},{7,0,1722}
+,{10,0,271},{11,0,261},{145,0,54},{139,11,814},{14,0,338},{148,0,81},{4,0,300},{
+133,0,436},{5,0,419},{5,0,687},{7,0,864},{9,0,470},{135,11,864},{9,0,836},{133,
+11,242},{134,0,1937},{4,10,763},{133,11,953},{132,10,622},{132,0,393},{133,10,
+253},{8,0,357},{10,0,745},{14,0,426},{17,0,94},{19,0,57},{135,10,546},{5,11,615}
+,{146,11,37},{9,10,73},{10,10,110},{14,10,185},{145,10,119},{11,0,703},{7,10,624
+},{7,10,916},{10,10,256},{139,10,87},{133,11,290},{5,10,212},{12,10,35},{141,10,
+382},{132,11,380},{5,11,52},{7,11,277},{9,11,368},{139,11,791},{133,0,387},{10,
+11,138},{139,11,476},{4,0,6},{5,0,708},{136,0,75},{7,0,1351},{9,0,581},{10,0,639
+},{11,0,453},{140,0,584},{132,0,303},{138,0,772},{135,10,1175},{4,0,749},{5,10,
+816},{6,11,256},{7,11,307},{7,11,999},{7,11,1481},{7,11,1732},{7,11,1738},{8,11,
+265},{9,11,414},{11,11,316},{12,11,52},{13,11,420},{147,11,100},{135,11,1296},{6
+,0,1065},{5,10,869},{5,10,968},{6,10,1626},{8,10,734},{136,10,784},{4,10,542},{6
+,10,1716},{6,10,1727},{7,10,1082},{7,10,1545},{8,10,56},{8,10,118},{8,10,412},{8
+,10,564},{9,10,888},{9,10,908},{10,10,50},{10,10,423},{11,10,685},{11,10,697},{
+11,10,933},{12,10,299},{13,10,126},{13,10,136},{13,10,170},{141,10,190},{134,0,
+226},{4,0,106},{7,0,310},{11,0,717},{133,11,723},{5,0,890},{5,0,988},{4,10,232},
+{9,10,202},{10,10,474},{140,10,433},{6,0,626},{142,0,431},{10,0,706},{150,0,44},
+{13,0,51},{6,10,108},{7,10,1003},{7,10,1181},{8,10,111},{136,10,343},{132,0,698}
+,{5,11,109},{6,11,1784},{7,11,1895},{12,11,296},{140,11,302},{134,0,828},{134,10
+,1712},{138,0,17},{7,0,1929},{4,10,133},{5,11,216},{7,10,711},{7,10,1298},{7,10,
+1585},{7,11,1879},{9,11,141},{9,11,270},{9,11,679},{10,11,159},{10,11,553},{11,
+11,197},{11,11,438},{12,11,538},{12,11,559},{13,11,193},{13,11,423},{14,11,144},
+{14,11,166},{14,11,167},{15,11,67},{147,11,84},{141,11,127},{7,11,1872},{137,11,
+81},{6,10,99},{7,10,1808},{145,10,57},{134,11,391},{5,0,689},{6,0,84},{7,0,1250}
+,{6,10,574},{7,10,428},{10,10,669},{11,10,485},{11,10,840},{12,10,300},{142,10,
+250},{7,11,322},{136,11,249},{7,11,432},{135,11,1649},{135,10,1871},{137,10,252}
+,{6,11,155},{140,11,234},{7,0,871},{19,0,27},{147,11,27},{140,0,498},{5,0,986},{
+6,0,130},{138,0,823},{6,0,1793},{7,0,1582},{8,0,458},{10,0,101},{10,0,318},{10,0
+,945},{12,0,734},{16,0,104},{18,0,177},{6,10,323},{135,10,1564},{5,11,632},{138,
+11,526},{10,0,435},{7,10,461},{136,10,775},{6,11,144},{7,11,948},{7,11,1042},{7,
+11,1857},{8,11,235},{8,11,461},{9,11,453},{9,11,530},{10,11,354},{17,11,77},{19,
+11,99},{148,11,79},{138,0,966},{7,0,1644},{137,0,129},{135,0,997},{136,0,502},{5
+,11,196},{6,11,486},{7,11,212},{8,11,309},{136,11,346},{7,10,727},{146,10,73},{
+132,0,823},{132,11,686},{135,0,1927},{4,0,762},{7,0,1756},{137,0,98},{136,10,577
+},{24,0,8},{4,11,30},{5,11,43},{152,11,8},{7,0,1046},{139,0,160},{7,0,492},{4,10
+,413},{5,10,677},{7,11,492},{8,10,432},{140,10,280},{6,0,45},{7,0,433},{8,0,129}
+,{9,0,21},{10,0,392},{11,0,79},{12,0,499},{13,0,199},{141,0,451},{7,0,558},{136,
+0,353},{4,11,220},{7,11,1535},{9,11,93},{139,11,474},{7,10,646},{7,10,1730},{11,
+10,446},{141,10,178},{133,0,785},{134,0,1145},{8,0,81},{9,0,189},{9,0,201},{11,0
+,478},{11,0,712},{141,0,338},{5,0,353},{151,0,26},{11,0,762},{132,10,395},{134,0
+,2024},{4,0,611},{133,0,606},{9,10,174},{10,10,164},{11,10,440},{11,10,841},{143
+,10,98},{134,10,426},{10,10,608},{139,10,1002},{138,10,250},{6,0,25},{7,0,855},{
+7,0,1258},{144,0,32},{7,11,1725},{138,11,393},{5,11,263},{134,11,414},{6,0,2011}
+,{133,10,476},{4,0,4},{7,0,1118},{7,0,1320},{7,0,1706},{8,0,277},{9,0,622},{10,0
+,9},{11,0,724},{12,0,350},{12,0,397},{13,0,28},{13,0,159},{15,0,89},{18,0,5},{19
+,0,9},{20,0,34},{22,0,47},{6,11,178},{6,11,1750},{8,11,251},{9,11,690},{10,11,
+155},{10,11,196},{10,11,373},{11,11,698},{13,11,155},{148,11,93},{5,11,97},{137,
+11,393},{7,0,764},{11,0,461},{12,0,172},{5,10,76},{6,10,458},{6,10,497},{7,10,
+868},{9,10,658},{10,10,594},{11,10,566},{12,10,338},{141,10,200},{134,0,1449},{
+138,11,40},{134,11,1639},{134,0,1445},{6,0,1168},{4,10,526},{7,10,1029},{135,10,
+1054},{4,11,191},{7,11,934},{8,11,647},{145,11,97},{132,10,636},{6,0,233},{7,10,
+660},{7,10,1124},{17,10,31},{19,10,22},{151,10,14},{6,10,1699},{136,11,110},{12,
+11,246},{15,11,162},{19,11,64},{20,11,8},{20,11,95},{22,11,24},{152,11,17},{5,11
+,165},{9,11,346},{138,11,655},{5,11,319},{135,11,534},{134,0,255},{9,0,216},{8,
+11,128},{139,11,179},{9,0,183},{139,0,286},{11,0,956},{151,0,3},{4,0,536},{7,0,
+1141},{10,0,723},{139,0,371},{4,10,279},{7,10,301},{137,10,362},{7,0,285},{5,11,
+57},{6,11,101},{6,11,1663},{7,11,132},{7,11,1048},{7,11,1154},{7,11,1415},{7,11,
+1507},{12,11,493},{15,11,105},{151,11,15},{5,11,459},{7,11,1073},{7,10,1743},{8,
+11,241},{136,11,334},{4,10,178},{133,10,399},{135,0,560},{132,0,690},{135,0,1246
+},{18,0,157},{147,0,63},{10,0,599},{11,0,33},{12,0,571},{149,0,1},{6,11,324},{6,
+11,520},{7,11,338},{7,11,1616},{7,11,1729},{8,11,228},{9,11,69},{139,11,750},{7,
+0,1862},{12,0,491},{12,0,520},{13,0,383},{142,0,244},{135,11,734},{134,10,1692},
+{10,0,448},{11,0,630},{17,0,117},{6,10,202},{7,11,705},{12,10,360},{17,10,118},{
+18,10,27},{148,10,67},{4,11,73},{6,11,612},{7,11,927},{7,11,1822},{8,11,217},{9,
+11,472},{9,11,765},{9,11,766},{10,11,408},{11,11,51},{11,11,793},{12,11,266},{15
+,11,158},{20,11,89},{150,11,32},{4,0,190},{133,0,554},{133,0,1001},{5,11,389},{8
+,11,636},{137,11,229},{5,0,446},{7,10,872},{10,10,516},{139,10,167},{137,10,313}
+,{132,10,224},{134,0,1313},{5,10,546},{7,10,35},{8,10,11},{8,10,12},{9,10,315},{
+9,10,533},{10,10,802},{11,10,166},{12,10,525},{142,10,243},{6,0,636},{137,0,837}
+,{5,10,241},{8,10,242},{9,10,451},{10,10,667},{11,10,598},{140,10,429},{22,10,46
+},{150,11,46},{136,11,472},{11,0,278},{142,0,73},{141,11,185},{132,0,868},{134,0
+,972},{4,10,366},{137,10,516},{138,0,1010},{5,11,189},{6,10,1736},{7,11,442},{7,
+11,443},{8,11,281},{12,11,174},{13,11,83},{141,11,261},{139,11,384},{6,11,2},{7,
+11,191},{7,11,446},{7,11,758},{7,11,1262},{7,11,1737},{8,11,22},{8,11,270},{8,11
+,612},{9,11,4},{9,11,167},{9,11,312},{9,11,436},{10,11,156},{10,11,216},{10,11,
+311},{10,11,623},{11,11,72},{11,11,330},{11,11,455},{12,11,101},{12,11,321},{12,
+11,504},{12,11,530},{12,11,543},{13,11,17},{13,11,156},{13,11,334},{14,11,48},{
+15,11,70},{17,11,60},{148,11,64},{6,10,331},{136,10,623},{135,0,1231},{132,0,304
+},{6,11,60},{7,11,670},{7,11,1327},{8,11,411},{8,11,435},{9,11,653},{9,11,740},{
+10,11,385},{11,11,222},{11,11,324},{11,11,829},{140,11,611},{7,0,506},{6,11,166}
+,{7,11,374},{135,11,1174},{14,11,43},{146,11,21},{135,11,1694},{135,10,1888},{5,
+11,206},{134,11,398},{135,11,50},{150,0,26},{6,0,53},{6,0,199},{7,0,1408},{8,0,
+32},{8,0,93},{10,0,397},{10,0,629},{11,0,593},{11,0,763},{13,0,326},{145,0,35},{
+134,0,105},{132,10,394},{4,0,843},{138,0,794},{11,0,704},{141,0,396},{5,0,114},{
+5,0,255},{141,0,285},{6,0,619},{7,0,898},{7,0,1092},{8,0,485},{18,0,28},{19,0,
+116},{135,10,1931},{9,0,145},{7,10,574},{135,10,1719},{7,0,2035},{8,0,19},{9,0,
+89},{138,0,831},{132,10,658},{6,11,517},{7,11,1159},{10,11,621},{139,11,192},{7,
+0,1933},{7,11,1933},{9,10,781},{10,10,144},{11,10,385},{13,10,161},{13,10,228},{
+13,10,268},{148,10,107},{136,10,374},{10,11,223},{139,11,645},{135,0,1728},{7,11
+,64},{7,11,289},{136,11,245},{4,10,344},{6,10,498},{139,10,323},{136,0,746},{135
+,10,1063},{137,10,155},{4,0,987},{6,0,1964},{6,0,1974},{6,0,1990},{136,0,995},{
+133,11,609},{133,10,906},{134,0,1550},{134,0,874},{5,11,129},{6,11,61},{135,11,
+947},{4,0,1018},{6,0,1938},{6,0,2021},{134,0,2039},{132,0,814},{11,0,126},{139,0
+,287},{134,0,1264},{5,0,955},{136,0,814},{141,11,506},{132,11,314},{6,0,981},{
+139,11,1000},{5,0,56},{8,0,892},{8,0,915},{140,0,776},{148,0,100},{10,0,4},{10,0
+,13},{11,0,638},{148,0,57},{148,11,74},{5,0,738},{132,10,616},{133,11,637},{136,
+10,692},{133,0,758},{132,10,305},{137,11,590},{5,11,280},{135,11,1226},{134,11,
+494},{135,0,1112},{133,11,281},{13,0,44},{14,0,214},{5,10,214},{7,10,603},{8,10,
+611},{9,10,686},{10,10,88},{11,10,459},{11,10,496},{12,10,463},{140,10,590},{139
+,0,328},{135,11,1064},{137,0,133},{7,0,168},{13,0,196},{141,0,237},{134,10,1703}
+,{134,0,1152},{135,0,1245},{5,0,110},{6,0,169},{6,0,1702},{7,0,400},{8,0,538},{9
+,0,184},{9,0,524},{140,0,218},{6,0,1816},{10,0,871},{12,0,769},{140,0,785},{132,
+11,630},{7,11,33},{7,11,120},{8,11,489},{9,11,319},{10,11,820},{11,11,1004},{12,
+11,379},{13,11,117},{13,11,412},{14,11,25},{15,11,52},{15,11,161},{16,11,47},{
+149,11,2},{6,0,133},{8,0,413},{9,0,353},{139,0,993},{145,10,19},{4,11,937},{133,
+11,801},{134,0,978},{6,0,93},{6,0,1508},{7,0,1422},{7,0,1851},{8,0,673},{9,0,529
+},{140,0,43},{6,0,317},{10,0,512},{4,10,737},{11,10,294},{12,10,60},{12,10,437},
+{13,10,64},{13,10,380},{142,10,430},{9,0,371},{7,11,1591},{144,11,43},{6,10,1758
+},{8,10,520},{9,10,345},{9,10,403},{142,10,350},{5,0,526},{10,10,242},{138,10,
+579},{9,0,25},{10,0,467},{138,0,559},{5,10,139},{7,10,1168},{138,10,539},{4,0,
+335},{135,0,942},{140,0,754},{132,11,365},{11,0,182},{142,0,195},{142,11,29},{5,
+11,7},{139,11,774},{4,11,746},{135,11,1090},{8,0,39},{10,0,773},{11,0,84},{12,0,
+205},{142,0,1},{5,0,601},{5,0,870},{5,11,360},{136,11,237},{132,0,181},{136,0,
+370},{134,0,1652},{8,0,358},{4,10,107},{7,10,613},{8,10,439},{8,10,504},{9,10,
+501},{10,10,383},{139,10,477},{132,10,229},{137,11,785},{4,0,97},{5,0,147},{6,0,
+286},{7,0,1362},{141,0,176},{6,0,537},{7,0,788},{7,0,1816},{132,10,903},{140,10,
+71},{6,0,743},{134,0,1223},{6,0,375},{7,0,169},{7,0,254},{8,0,780},{135,11,1493}
+,{7,0,1714},{4,10,47},{6,10,373},{7,10,452},{7,10,543},{7,10,1856},{9,10,6},{11,
+10,257},{139,10,391},{6,0,896},{136,0,1003},{135,0,1447},{137,11,341},{5,10,980}
+,{134,10,1754},{145,11,22},{4,11,277},{5,11,608},{6,11,493},{7,11,457},{140,11,
+384},{7,10,536},{7,10,1331},{136,10,143},{140,0,744},{7,11,27},{135,11,316},{18,
+0,126},{5,10,19},{134,10,533},{4,0,788},{11,0,41},{5,11,552},{5,11,586},{5,11,
+676},{6,11,448},{8,11,244},{11,11,1},{11,11,41},{13,11,3},{16,11,54},{17,11,4},{
+146,11,13},{4,0,985},{6,0,1801},{4,11,401},{137,11,264},{5,10,395},{5,10,951},{
+134,10,1776},{5,0,629},{135,0,1549},{11,10,663},{12,10,210},{13,10,166},{13,10,
+310},{14,10,373},{147,10,43},{9,11,543},{10,11,524},{11,11,30},{12,11,524},{14,
+11,315},{16,11,18},{20,11,26},{148,11,65},{4,11,205},{5,11,623},{7,11,104},{136,
+11,519},{5,0,293},{134,0,601},{7,11,579},{9,11,41},{9,11,244},{9,11,669},{10,11,
+5},{11,11,861},{11,11,951},{139,11,980},{132,11,717},{132,10,695},{7,10,497},{9,
+10,387},{147,10,81},{132,0,420},{142,0,37},{6,0,1134},{6,0,1900},{12,0,830},{12,
+0,878},{12,0,894},{15,0,221},{143,0,245},{132,11,489},{7,0,1570},{140,0,542},{8,
+0,933},{136,0,957},{6,0,1371},{7,0,31},{8,0,373},{5,10,284},{6,10,49},{6,10,350}
+,{7,10,377},{7,10,1693},{8,10,678},{9,10,161},{9,10,585},{9,10,671},{9,10,839},{
+11,10,912},{141,10,427},{135,11,892},{4,0,325},{138,0,125},{139,11,47},{132,10,
+597},{138,0,323},{6,0,1547},{7,11,1605},{9,11,473},{11,11,962},{146,11,139},{139
+,10,908},{7,11,819},{9,11,26},{9,11,392},{10,11,152},{10,11,226},{11,11,19},{12,
+11,276},{12,11,426},{12,11,589},{13,11,460},{15,11,97},{19,11,48},{148,11,104},{
+135,11,51},{4,0,718},{135,0,1216},{6,0,1896},{6,0,1905},{6,0,1912},{9,0,947},{9,
+0,974},{12,0,809},{12,0,850},{12,0,858},{12,0,874},{12,0,887},{12,0,904},{12,0,
+929},{12,0,948},{12,0,952},{15,0,198},{15,0,206},{15,0,220},{15,0,227},{15,0,247
+},{18,0,188},{21,0,48},{21,0,50},{24,0,25},{24,0,29},{7,11,761},{7,11,1051},{137
+,11,545},{5,0,124},{5,0,144},{6,0,548},{7,0,15},{7,0,153},{137,0,629},{135,11,
+606},{135,10,2014},{7,10,2007},{9,11,46},{9,10,101},{9,10,450},{10,10,66},{10,10
+,842},{11,10,536},{140,10,587},{6,0,75},{7,0,1531},{8,0,416},{9,0,240},{9,0,275}
+,{10,0,100},{11,0,658},{11,0,979},{12,0,86},{14,0,207},{15,0,20},{143,0,25},{5,0
+,141},{5,0,915},{6,0,1783},{7,0,211},{7,0,698},{7,0,1353},{9,0,83},{9,0,281},{10
+,0,376},{10,0,431},{11,0,543},{12,0,664},{13,0,280},{13,0,428},{14,0,61},{14,0,
+128},{17,0,52},{145,0,81},{132,11,674},{135,0,533},{149,0,6},{132,11,770},{133,0
+,538},{5,11,79},{7,11,1027},{7,11,1477},{139,11,52},{139,10,62},{4,0,338},{133,0
+,400},{5,11,789},{134,11,195},{4,11,251},{4,11,688},{7,11,513},{7,11,1284},{9,11
+,87},{138,11,365},{134,10,1766},{6,0,0},{7,0,84},{11,0,895},{145,0,11},{139,0,
+892},{4,0,221},{5,0,659},{7,0,697},{7,0,1211},{138,0,284},{133,0,989},{133,11,
+889},{4,11,160},{5,11,330},{7,11,1434},{136,11,174},{6,10,1665},{7,10,256},{7,10
+,1388},{10,10,499},{139,10,670},{7,0,848},{4,10,22},{5,10,10},{136,10,97},{138,0
+,507},{133,10,481},{4,0,188},{135,0,805},{5,0,884},{6,0,732},{139,0,991},{135,11
+,968},{11,11,636},{15,11,145},{17,11,34},{19,11,50},{151,11,20},{7,0,959},{16,0,
+60},{6,10,134},{7,10,437},{9,10,37},{14,10,285},{142,10,371},{7,10,486},{8,10,
+155},{11,10,93},{140,10,164},{134,0,1653},{7,0,337},{133,10,591},{6,0,1989},{8,0
+,922},{8,0,978},{133,11,374},{132,0,638},{138,0,500},{133,11,731},{5,10,380},{5,
+10,650},{136,10,310},{138,11,381},{4,10,364},{7,10,1156},{7,10,1187},{137,10,409
+},{137,11,224},{140,0,166},{134,10,482},{4,11,626},{5,11,642},{6,11,425},{10,11,
+202},{139,11,141},{4,10,781},{6,10,487},{7,10,926},{8,10,263},{139,10,500},{135,
+0,418},{4,10,94},{135,10,1265},{136,0,760},{132,10,417},{136,11,835},{5,10,348},
+{134,10,522},{6,0,1277},{134,0,1538},{139,11,541},{135,11,1597},{5,11,384},{8,11
+,455},{140,11,48},{136,0,770},{5,11,264},{134,11,184},{4,0,89},{5,0,489},{6,0,
+315},{7,0,553},{7,0,1745},{138,0,243},{4,10,408},{4,10,741},{135,10,500},{134,0,
+1396},{133,0,560},{6,0,1658},{9,0,3},{10,0,154},{11,0,641},{13,0,85},{13,0,201},
+{141,0,346},{135,11,1595},{5,11,633},{6,11,28},{7,11,219},{135,11,1323},{9,11,
+769},{140,11,185},{135,11,785},{7,11,359},{8,11,243},{140,11,175},{138,0,586},{7
+,0,1271},{134,10,73},{132,11,105},{4,0,166},{5,0,505},{134,0,1670},{133,10,576},
+{4,11,324},{138,11,104},{142,10,231},{6,0,637},{7,10,1264},{7,10,1678},{11,10,
+945},{12,10,341},{12,10,471},{12,10,569},{23,11,21},{151,11,23},{8,11,559},{141,
+11,109},{134,0,1947},{7,0,445},{8,0,307},{8,0,704},{10,0,41},{10,0,439},{11,0,
+237},{11,0,622},{140,0,201},{135,11,963},{135,0,1977},{4,0,189},{5,0,713},{136,0
+,57},{138,0,371},{135,10,538},{132,0,552},{6,0,883},{133,10,413},{6,0,923},{132,
+11,758},{138,11,215},{136,10,495},{7,10,54},{8,10,312},{10,10,191},{10,10,614},{
+140,10,567},{7,11,351},{139,11,128},{7,0,875},{6,10,468},{7,10,1478},{8,10,530},
+{142,10,290},{135,0,1788},{17,0,49},{133,11,918},{12,11,398},{20,11,39},{21,11,
+11},{150,11,41},{10,0,661},{6,10,484},{135,10,822},{135,0,1945},{134,0,794},{137
+,10,900},{135,10,1335},{6,10,1724},{135,10,2022},{132,11,340},{134,0,1135},{4,0,
+784},{133,0,745},{5,0,84},{134,0,163},{133,0,410},{4,0,976},{5,11,985},{7,11,509
+},{7,11,529},{145,11,96},{132,10,474},{134,0,703},{135,11,1919},{5,0,322},{8,0,
+186},{9,0,262},{10,0,187},{142,0,208},{135,10,1504},{133,0,227},{9,0,560},{13,0,
+208},{133,10,305},{132,11,247},{7,0,1395},{8,0,486},{9,0,236},{9,0,878},{10,0,
+218},{11,0,95},{19,0,17},{147,0,31},{7,0,2043},{8,0,672},{141,0,448},{4,11,184},
+{5,11,390},{6,11,337},{7,11,23},{7,11,494},{7,11,618},{7,11,1456},{8,11,27},{8,
+11,599},{10,11,153},{139,11,710},{135,0,466},{135,10,1236},{6,0,167},{7,0,186},{
+7,0,656},{10,0,643},{4,10,480},{6,10,302},{6,10,1642},{7,10,837},{7,10,1547},{7,
+10,1657},{8,10,429},{9,10,228},{13,10,289},{13,10,343},{147,10,101},{134,0,1428}
+,{134,0,1440},{5,0,412},{7,10,278},{10,10,739},{11,10,708},{141,10,348},{134,0,
+1118},{136,0,562},{148,11,46},{9,0,316},{139,0,256},{134,0,1771},{135,0,1190},{
+137,0,132},{10,11,227},{11,11,497},{11,11,709},{140,11,415},{143,0,66},{6,11,360
+},{7,11,1664},{136,11,478},{144,10,28},{4,0,317},{135,0,1279},{5,0,63},{133,0,
+509},{136,11,699},{145,10,36},{134,0,1475},{11,11,343},{142,11,127},{132,11,739}
+,{132,0,288},{135,11,1757},{8,0,89},{8,0,620},{9,0,608},{11,0,628},{12,0,322},{
+143,0,124},{134,0,1225},{7,0,1189},{4,11,67},{5,11,422},{6,10,363},{7,11,1037},{
+7,11,1289},{7,11,1555},{7,10,1955},{8,10,725},{9,11,741},{145,11,108},{134,0,
+1468},{6,0,689},{134,0,1451},{138,0,120},{151,0,1},{137,10,805},{142,0,329},{5,
+10,813},{135,10,2046},{135,0,226},{138,11,96},{7,0,1855},{5,10,712},{11,10,17},{
+13,10,321},{144,10,67},{9,0,461},{6,10,320},{7,10,781},{7,10,1921},{9,10,55},{10
+,10,186},{10,10,273},{10,10,664},{10,10,801},{11,10,996},{11,10,997},{13,10,157}
+,{142,10,170},{8,11,203},{8,10,271},{11,11,823},{11,11,846},{12,11,482},{13,11,
+133},{13,11,277},{13,11,302},{13,11,464},{14,11,205},{142,11,221},{135,0,1346},{
+4,11,449},{133,11,718},{134,0,85},{14,0,299},{7,10,103},{7,10,863},{11,10,184},{
+145,10,62},{4,11,355},{6,11,311},{9,11,256},{138,11,404},{137,10,659},{138,11,
+758},{133,11,827},{5,11,64},{140,11,581},{134,0,1171},{4,11,442},{7,11,1047},{7,
+11,1352},{135,11,1643},{132,0,980},{5,11,977},{6,11,288},{7,11,528},{135,11,1065
+},{5,0,279},{6,0,235},{7,0,468},{8,0,446},{9,0,637},{10,0,717},{11,0,738},{140,0
+,514},{132,0,293},{11,10,337},{142,10,303},{136,11,285},{5,0,17},{6,0,371},{9,0,
+528},{12,0,364},{132,11,254},{5,10,77},{7,10,1455},{10,10,843},{147,10,73},{150,
+0,5},{132,10,458},{6,11,12},{7,11,1219},{145,11,73},{135,10,1420},{6,10,109},{
+138,10,382},{135,11,125},{6,10,330},{7,10,1084},{139,10,142},{6,11,369},{6,11,
+502},{7,11,1036},{8,11,348},{9,11,452},{10,11,26},{11,11,224},{11,11,387},{11,11
+,772},{12,11,95},{12,11,629},{13,11,195},{13,11,207},{13,11,241},{14,11,260},{14
+,11,270},{143,11,140},{132,11,269},{5,11,480},{7,11,532},{7,11,1197},{7,11,1358}
+,{8,11,291},{11,11,349},{142,11,396},{150,0,48},{10,0,601},{13,0,353},{141,0,376
+},{5,0,779},{5,0,807},{6,0,1655},{134,0,1676},{142,11,223},{4,0,196},{5,0,558},{
+133,0,949},{148,11,15},{135,11,1764},{134,0,1322},{132,0,752},{139,0,737},{135,
+11,657},{136,11,533},{135,0,412},{4,0,227},{5,0,159},{5,0,409},{7,0,80},{8,0,556
+},{10,0,479},{12,0,418},{14,0,50},{14,0,123},{14,0,192},{14,0,249},{14,0,295},{
+143,0,27},{7,0,1470},{8,0,66},{8,0,137},{8,0,761},{9,0,638},{11,0,80},{11,0,212}
+,{11,0,368},{11,0,418},{12,0,8},{13,0,15},{16,0,61},{17,0,59},{19,0,28},{148,0,
+84},{135,10,1985},{4,11,211},{4,11,332},{5,11,335},{6,11,238},{7,11,269},{7,11,
+811},{7,11,1797},{8,10,122},{8,11,836},{9,11,507},{141,11,242},{6,0,683},{134,0,
+1252},{4,0,873},{132,10,234},{134,0,835},{6,0,38},{7,0,1220},{8,0,185},{8,0,256}
+,{9,0,22},{9,0,331},{10,0,738},{11,0,205},{11,0,540},{11,0,746},{13,0,465},{14,0
+,88},{142,0,194},{138,0,986},{5,11,1009},{12,11,582},{146,11,131},{4,0,159},{6,0
+,115},{7,0,252},{7,0,257},{7,0,1928},{8,0,69},{9,0,384},{10,0,91},{10,0,615},{12
+,0,375},{14,0,235},{18,0,117},{147,0,123},{133,0,911},{136,0,278},{5,10,430},{5,
+10,932},{6,10,131},{7,10,417},{9,10,522},{11,10,314},{141,10,390},{14,10,149},{
+14,10,399},{143,10,57},{4,0,151},{7,0,1567},{136,0,749},{5,11,228},{6,11,203},{7
+,11,156},{8,11,347},{137,11,265},{132,10,507},{10,0,989},{140,0,956},{133,0,990}
+,{5,0,194},{6,0,927},{7,0,1662},{9,0,90},{140,0,564},{4,10,343},{133,10,511},{
+133,0,425},{7,10,455},{138,10,591},{4,0,774},{7,11,476},{7,11,1592},{138,11,87},
+{5,0,971},{135,10,1381},{5,11,318},{147,11,121},{5,11,291},{7,11,765},{9,11,389}
+,{140,11,548},{134,10,575},{4,0,827},{12,0,646},{12,0,705},{12,0,712},{140,0,714
+},{139,0,752},{137,0,662},{5,0,72},{6,0,264},{7,0,21},{7,0,46},{7,0,2013},{8,0,
+215},{8,0,513},{10,0,266},{139,0,22},{139,11,522},{6,0,239},{7,0,118},{10,0,95},
+{11,0,603},{13,0,443},{14,0,160},{143,0,4},{6,0,431},{134,0,669},{7,10,1127},{7,
+10,1572},{10,10,297},{10,10,422},{11,10,764},{11,10,810},{12,10,264},{13,10,102}
+,{13,10,300},{13,10,484},{14,10,147},{14,10,229},{17,10,71},{18,10,118},{147,10,
+120},{5,0,874},{6,0,1677},{15,0,0},{10,11,525},{139,11,82},{6,0,65},{7,0,939},{7
+,0,1172},{7,0,1671},{9,0,540},{10,0,696},{11,0,265},{11,0,732},{11,0,928},{11,0,
+937},{141,0,438},{134,0,1350},{136,11,547},{132,11,422},{5,11,355},{145,11,0},{
+137,11,905},{5,0,682},{135,0,1887},{132,0,809},{4,0,696},{133,11,865},{6,0,1074}
+,{6,0,1472},{14,10,35},{142,10,191},{5,11,914},{134,11,1625},{133,11,234},{135,
+11,1383},{137,11,780},{132,10,125},{4,0,726},{133,0,630},{8,0,802},{136,0,838},{
+132,10,721},{6,0,1337},{7,0,776},{19,0,56},{136,10,145},{132,0,970},{7,10,792},{
+8,10,147},{10,10,821},{139,10,1021},{139,10,970},{8,0,940},{137,0,797},{135,11,
+1312},{9,0,248},{10,0,400},{7,11,816},{7,11,1241},{7,10,1999},{9,11,283},{9,11,
+520},{10,11,213},{10,11,307},{10,11,463},{10,11,671},{10,11,746},{11,11,401},{11
+,11,794},{12,11,517},{18,11,107},{147,11,115},{6,0,1951},{134,0,2040},{135,11,
+339},{13,0,41},{15,0,93},{5,10,168},{5,10,930},{8,10,74},{9,10,623},{12,10,500},
+{140,10,579},{6,0,118},{7,0,215},{7,0,1521},{140,0,11},{6,10,220},{7,10,1101},{
+141,10,105},{6,11,421},{7,11,61},{7,11,1540},{10,11,11},{138,11,501},{7,0,615},{
+138,0,251},{140,11,631},{135,0,1044},{6,10,19},{7,10,1413},{139,10,428},{133,0,
+225},{7,10,96},{8,10,401},{8,10,703},{137,10,896},{145,10,116},{6,11,102},{7,11,
+72},{15,11,142},{147,11,67},{7,10,1961},{7,10,1965},{8,10,702},{136,10,750},{7,
+10,2030},{8,10,150},{8,10,737},{12,10,366},{151,11,30},{4,0,370},{5,0,756},{7,0,
+1326},{135,11,823},{8,10,800},{9,10,148},{9,10,872},{9,10,890},{11,10,309},{11,
+10,1001},{13,10,267},{141,10,323},{6,0,1662},{7,0,48},{8,0,771},{10,0,116},{13,0
+,104},{14,0,105},{14,0,184},{15,0,168},{19,0,92},{148,0,68},{10,0,209},{135,11,
+1870},{7,11,68},{8,11,48},{8,11,88},{8,11,582},{8,11,681},{9,11,373},{9,11,864},
+{11,11,157},{11,11,336},{11,11,843},{148,11,27},{134,0,930},{4,11,88},{5,11,137}
+,{5,11,174},{5,11,777},{6,11,1664},{6,11,1725},{7,11,77},{7,11,426},{7,11,1317},
+{7,11,1355},{8,11,126},{8,11,563},{9,11,523},{9,11,750},{10,11,310},{10,11,836},
+{11,11,42},{11,11,318},{11,11,731},{12,11,68},{12,11,92},{12,11,507},{12,11,692}
+,{13,11,81},{13,11,238},{13,11,374},{18,11,138},{19,11,78},{19,11,111},{20,11,55
+},{20,11,77},{148,11,92},{4,11,938},{135,11,1831},{5,10,547},{7,10,424},{8,11,
+617},{138,11,351},{6,0,1286},{6,11,1668},{7,11,1499},{8,11,117},{9,11,314},{138,
+11,174},{6,0,759},{6,0,894},{7,11,707},{139,11,563},{4,0,120},{135,0,1894},{9,0,
+385},{149,0,17},{138,0,429},{133,11,403},{5,0,820},{135,0,931},{10,0,199},{133,
+10,133},{6,0,151},{6,0,1675},{7,0,383},{151,0,10},{6,0,761},{136,10,187},{8,0,
+365},{10,10,0},{10,10,818},{139,10,988},{4,11,44},{5,11,311},{6,11,156},{7,11,
+639},{7,11,762},{7,11,1827},{9,11,8},{9,11,462},{148,11,83},{4,11,346},{7,11,115
+},{9,11,180},{9,11,456},{138,11,363},{136,10,685},{7,0,1086},{145,0,46},{6,0,
+1624},{11,0,11},{12,0,422},{13,0,444},{142,0,360},{6,0,1020},{6,0,1260},{134,0,
+1589},{4,0,43},{5,0,344},{5,0,357},{14,0,472},{150,0,58},{6,0,1864},{6,0,1866},{
+6,0,1868},{6,0,1869},{6,0,1874},{6,0,1877},{6,0,1903},{6,0,1911},{9,0,920},{9,0,
+921},{9,0,924},{9,0,946},{9,0,959},{9,0,963},{9,0,970},{9,0,997},{9,0,1008},{9,0
+,1017},{12,0,795},{12,0,797},{12,0,798},{12,0,800},{12,0,803},{12,0,811},{12,0,
+820},{12,0,821},{12,0,839},{12,0,841},{12,0,848},{12,0,911},{12,0,921},{12,0,922
+},{12,0,925},{12,0,937},{12,0,944},{12,0,945},{12,0,953},{15,0,184},{15,0,191},{
+15,0,199},{15,0,237},{15,0,240},{15,0,243},{15,0,246},{18,0,203},{21,0,40},{21,0
+,52},{21,0,57},{24,0,23},{24,0,28},{152,0,30},{134,0,725},{145,11,58},{133,0,888
+},{137,10,874},{4,0,711},{8,10,774},{10,10,670},{140,10,51},{144,11,40},{6,11,
+185},{7,11,1899},{139,11,673},{137,10,701},{137,0,440},{4,11,327},{5,11,478},{7,
+11,1332},{8,11,753},{140,11,227},{4,10,127},{5,10,350},{6,10,356},{8,10,426},{9,
+10,572},{10,10,247},{139,10,312},{5,11,1020},{133,11,1022},{4,11,103},{133,11,
+401},{6,0,1913},{6,0,1926},{6,0,1959},{9,0,914},{9,0,939},{9,0,952},{9,0,979},{9
+,0,990},{9,0,998},{9,0,1003},{9,0,1023},{12,0,827},{12,0,834},{12,0,845},{12,0,
+912},{12,0,935},{12,0,951},{15,0,172},{15,0,174},{18,0,198},{149,0,63},{5,0,958}
+,{5,0,987},{4,11,499},{135,11,1421},{7,0,885},{6,10,59},{6,10,1762},{9,10,603},{
+141,10,397},{10,11,62},{141,11,164},{4,0,847},{135,0,326},{11,0,276},{142,0,293}
+,{4,0,65},{5,0,479},{5,0,1004},{7,0,1913},{8,0,317},{9,0,302},{10,0,612},{13,0,
+22},{132,11,96},{4,0,261},{135,0,510},{135,0,1514},{6,10,111},{7,10,4},{8,10,163
+},{8,10,776},{138,10,566},{4,0,291},{9,0,515},{12,0,152},{12,0,443},{13,0,392},{
+142,0,357},{7,11,399},{135,11,1492},{4,0,589},{139,0,282},{6,11,563},{135,10,
+1994},{5,10,297},{135,10,1038},{4,0,130},{7,0,843},{135,0,1562},{5,0,42},{5,0,
+879},{7,0,245},{7,0,324},{7,0,1532},{11,0,463},{11,0,472},{13,0,363},{144,0,52},
+{4,0,134},{133,0,372},{133,0,680},{136,10,363},{6,0,1997},{8,0,935},{136,0,977},
+{4,0,810},{135,0,1634},{135,10,1675},{7,0,1390},{4,11,910},{133,11,832},{7,10,
+808},{8,11,266},{139,11,578},{132,0,644},{4,0,982},{138,0,867},{132,10,280},{135
+,0,540},{140,10,54},{135,0,123},{134,0,1978},{4,10,421},{133,10,548},{6,0,623},{
+136,0,789},{4,0,908},{5,0,359},{5,0,508},{6,0,1723},{7,0,343},{7,0,1996},{135,0,
+2026},{134,0,1220},{4,0,341},{135,0,480},{6,10,254},{9,10,109},{138,10,103},{134
+,0,888},{8,11,528},{137,11,348},{7,0,1995},{8,0,299},{11,0,890},{12,0,674},{4,11
+,20},{133,11,616},{135,11,1094},{134,10,1630},{4,0,238},{5,0,503},{6,0,179},{7,0
+,2003},{8,0,381},{8,0,473},{9,0,149},{10,0,788},{15,0,45},{15,0,86},{20,0,110},{
+150,0,57},{133,10,671},{4,11,26},{5,11,429},{6,11,245},{7,11,704},{7,11,1379},{
+135,11,1474},{4,0,121},{5,0,156},{5,0,349},{9,0,431},{10,0,605},{142,0,342},{7,
+11,943},{139,11,614},{132,10,889},{132,11,621},{7,10,1382},{7,11,1382},{135,10,
+1910},{132,10,627},{133,10,775},{133,11,542},{133,11,868},{136,11,433},{6,0,1373
+},{7,0,1011},{11,10,362},{11,10,948},{140,10,388},{6,0,80},{7,0,173},{9,0,547},{
+10,0,730},{14,0,18},{22,0,39},{135,11,1495},{6,0,1694},{135,0,1974},{140,0,196},
+{4,0,923},{6,0,507},{6,0,1711},{7,10,451},{8,10,389},{12,10,490},{13,10,16},{13,
+10,215},{13,10,351},{18,10,132},{147,10,125},{6,0,646},{134,0,1047},{135,10,841}
+,{136,10,566},{6,0,1611},{135,0,1214},{139,0,926},{132,11,525},{132,0,595},{5,0,
+240},{6,0,459},{7,0,12},{7,0,114},{7,0,949},{7,0,1753},{7,0,1805},{8,0,658},{9,0
+,1},{11,0,959},{141,0,446},{5,10,912},{134,10,1695},{132,0,446},{7,11,62},{12,11
+,45},{147,11,112},{5,10,236},{6,10,572},{8,10,492},{11,10,618},{144,10,56},{5,10
+,190},{136,10,318},{135,10,1376},{4,11,223},{6,11,359},{11,11,3},{13,11,108},{14
+,11,89},{144,11,22},{132,11,647},{134,0,490},{134,0,491},{134,0,1584},{135,11,
+685},{138,11,220},{7,0,250},{136,0,507},{132,0,158},{4,0,140},{7,0,362},{8,0,209
+},{9,0,10},{9,0,160},{9,0,503},{9,0,614},{10,0,689},{11,0,327},{11,0,553},{11,0,
+725},{11,0,767},{12,0,252},{12,0,583},{13,0,192},{14,0,269},{14,0,356},{148,0,50
+},{19,0,1},{19,0,26},{150,0,9},{132,11,109},{6,0,228},{7,0,1341},{9,0,408},{138,
+0,343},{4,0,373},{5,0,283},{6,0,480},{7,0,609},{10,0,860},{138,0,878},{6,0,779},
+{134,0,1209},{4,0,557},{7,11,263},{7,11,628},{136,11,349},{132,0,548},{7,0,197},
+{8,0,142},{8,0,325},{9,0,150},{9,0,596},{10,0,350},{10,0,353},{11,0,74},{11,0,
+315},{12,0,662},{12,0,681},{14,0,423},{143,0,141},{4,11,40},{10,11,67},{11,11,
+117},{11,11,768},{139,11,935},{7,11,992},{8,11,301},{9,11,722},{12,11,63},{13,11
+,29},{14,11,161},{143,11,18},{6,0,1490},{138,11,532},{5,0,580},{7,0,378},{7,0,
+674},{7,0,1424},{15,0,83},{16,0,11},{15,11,83},{144,11,11},{6,0,1057},{6,0,1335}
+,{10,0,316},{7,10,85},{7,10,247},{8,10,585},{138,10,163},{4,0,169},{5,0,83},{6,0
+,399},{6,0,579},{6,0,1513},{7,0,692},{7,0,846},{7,0,1015},{7,0,1799},{8,0,403},{
+9,0,394},{10,0,133},{12,0,4},{12,0,297},{12,0,452},{16,0,81},{18,0,25},{21,0,14}
+,{22,0,12},{151,0,18},{134,0,1106},{7,0,1546},{11,0,299},{142,0,407},{134,0,1192
+},{132,0,177},{5,0,411},{135,0,653},{7,0,439},{10,0,727},{11,0,260},{139,0,684},
+{138,10,145},{147,10,83},{5,0,208},{7,0,753},{135,0,1528},{137,11,617},{135,10,
+1922},{135,11,825},{11,0,422},{13,0,389},{4,10,124},{10,10,457},{11,10,121},{11,
+10,169},{11,10,870},{12,10,214},{14,10,187},{143,10,77},{11,0,615},{15,0,58},{11
+,11,615},{143,11,58},{9,0,618},{138,0,482},{6,0,1952},{6,0,1970},{142,0,505},{7,
+10,1193},{135,11,1838},{133,0,242},{135,10,1333},{6,10,107},{7,10,638},{7,10,
+1632},{137,10,396},{133,0,953},{5,10,370},{134,10,1756},{5,11,28},{6,11,204},{10
+,11,320},{10,11,583},{13,11,502},{14,11,72},{14,11,274},{14,11,312},{14,11,344},
+{15,11,159},{16,11,62},{16,11,69},{17,11,30},{18,11,42},{18,11,53},{18,11,84},{
+18,11,140},{19,11,68},{19,11,85},{20,11,5},{20,11,45},{20,11,101},{22,11,7},{150
+,11,20},{4,11,558},{6,11,390},{7,11,162},{7,11,689},{9,11,360},{138,11,653},{11,
+0,802},{141,0,67},{133,10,204},{133,0,290},{5,10,970},{134,10,1706},{132,0,380},
+{5,0,52},{7,0,277},{9,0,368},{139,0,791},{5,11,856},{6,11,1672},{6,11,1757},{6,
+11,1781},{7,11,1150},{7,11,1425},{7,11,1453},{140,11,513},{5,11,92},{7,10,3},{10
+,11,736},{140,11,102},{4,0,112},{5,0,653},{5,10,483},{5,10,685},{6,10,489},{7,10
+,1204},{136,10,394},{132,10,921},{6,0,1028},{133,10,1007},{5,11,590},{9,11,213},
+{145,11,91},{135,10,1696},{10,0,138},{139,0,476},{5,0,725},{5,0,727},{135,0,1811
+},{4,0,979},{6,0,1821},{6,0,1838},{8,0,876},{8,0,883},{8,0,889},{8,0,893},{8,0,
+895},{10,0,934},{12,0,720},{14,0,459},{148,0,123},{135,11,551},{4,0,38},{6,0,435
+},{7,0,307},{7,0,999},{7,0,1481},{7,0,1732},{7,0,1738},{8,0,371},{9,0,414},{11,0
+,316},{12,0,52},{13,0,420},{147,0,100},{135,0,1296},{132,10,712},{134,10,1629},{
+133,0,723},{134,0,651},{136,11,191},{9,11,791},{10,11,93},{11,11,301},{16,11,13}
+,{17,11,23},{18,11,135},{19,11,12},{20,11,1},{20,11,12},{148,11,14},{136,11,503}
+,{6,11,466},{135,11,671},{6,0,1200},{134,0,1330},{135,0,1255},{134,0,986},{5,0,
+109},{6,0,1784},{7,0,1895},{12,0,296},{140,0,302},{135,11,983},{133,10,485},{134
+,0,660},{134,0,800},{5,0,216},{5,0,294},{6,0,591},{7,0,1879},{9,0,141},{9,0,270}
+,{9,0,679},{10,0,159},{11,0,197},{11,0,438},{12,0,538},{12,0,559},{14,0,144},{14
+,0,167},{15,0,67},{4,10,285},{5,10,317},{6,10,301},{7,10,7},{8,10,153},{10,10,
+766},{11,10,468},{12,10,467},{141,10,143},{136,0,945},{134,0,1090},{137,0,81},{
+12,11,468},{19,11,96},{148,11,24},{134,0,391},{138,11,241},{7,0,322},{136,0,249}
+,{134,0,1412},{135,11,795},{5,0,632},{138,0,526},{136,10,819},{6,0,144},{7,0,948
+},{7,0,1042},{8,0,235},{8,0,461},{9,0,453},{9,0,796},{10,0,354},{17,0,77},{135,
+11,954},{139,10,917},{6,0,940},{134,0,1228},{4,0,362},{7,0,52},{135,0,303},{6,11
+,549},{8,11,34},{8,11,283},{9,11,165},{138,11,475},{7,11,370},{7,11,1007},{7,11,
+1177},{135,11,1565},{5,11,652},{5,11,701},{135,11,449},{5,0,196},{6,0,486},{7,0,
+212},{8,0,309},{136,0,346},{6,10,1719},{6,10,1735},{7,10,2016},{7,10,2020},{8,10
+,837},{137,10,852},{6,11,159},{6,11,364},{7,11,516},{7,11,1439},{137,11,518},{
+135,0,1912},{135,0,1290},{132,0,686},{141,11,151},{138,0,625},{136,0,706},{138,
+10,568},{139,0,412},{4,0,30},{133,0,43},{8,10,67},{138,10,419},{7,0,967},{141,0,
+11},{12,0,758},{14,0,441},{142,0,462},{10,10,657},{14,10,297},{142,10,361},{139,
+10,729},{4,0,220},{135,0,1535},{7,11,501},{9,11,111},{10,11,141},{11,11,332},{13
+,11,43},{13,11,429},{14,11,130},{14,11,415},{145,11,102},{4,0,950},{6,0,1859},{7
+,0,11},{8,0,873},{12,0,710},{12,0,718},{12,0,748},{12,0,765},{148,0,124},{5,11,
+149},{5,11,935},{136,11,233},{142,11,291},{134,0,1579},{7,0,890},{8,10,51},{9,10
+,868},{10,10,833},{12,10,481},{12,10,570},{148,10,106},{141,0,2},{132,10,445},{
+136,11,801},{135,0,1774},{7,0,1725},{138,0,393},{5,0,263},{134,0,414},{132,11,
+322},{133,10,239},{7,0,456},{7,10,1990},{8,10,130},{139,10,720},{137,0,818},{5,
+10,123},{6,10,530},{7,10,348},{135,10,1419},{135,10,2024},{6,0,178},{6,0,1750},{
+8,0,251},{9,0,690},{10,0,155},{10,0,196},{10,0,373},{11,0,698},{13,0,155},{148,0
+,93},{5,0,97},{137,0,393},{134,0,674},{11,0,223},{140,0,168},{132,10,210},{139,
+11,464},{6,0,1639},{146,0,159},{139,11,2},{7,0,934},{8,0,647},{17,0,97},{19,0,59
+},{150,0,2},{132,0,191},{5,0,165},{9,0,346},{10,0,655},{11,0,885},{4,10,430},{
+135,11,357},{133,0,877},{5,10,213},{133,11,406},{8,0,128},{139,0,179},{6,11,69},
+{135,11,117},{135,0,1297},{11,11,43},{13,11,72},{141,11,142},{135,11,1830},{142,
+0,164},{5,0,57},{6,0,101},{6,0,586},{6,0,1663},{7,0,132},{7,0,1154},{7,0,1415},{
+7,0,1507},{12,0,493},{15,0,105},{151,0,15},{5,0,459},{7,0,1073},{8,0,241},{136,0
+,334},{133,11,826},{133,10,108},{5,10,219},{10,11,132},{11,11,191},{11,11,358},{
+139,11,460},{6,0,324},{6,0,520},{7,0,338},{7,0,1729},{8,0,228},{139,0,750},{21,0
+,30},{22,0,53},{4,10,193},{5,10,916},{7,10,364},{10,10,398},{10,10,726},{11,10,
+317},{11,10,626},{12,10,142},{12,10,288},{12,10,678},{13,10,313},{15,10,113},{
+146,10,114},{6,11,110},{135,11,1681},{135,0,910},{6,10,241},{7,10,907},{8,10,832
+},{9,10,342},{10,10,729},{11,10,284},{11,10,445},{11,10,651},{11,10,863},{13,10,
+398},{146,10,99},{7,0,705},{9,0,734},{5,11,1000},{7,11,733},{137,11,583},{4,0,73
+},{6,0,612},{7,0,927},{7,0,1822},{8,0,217},{9,0,765},{9,0,766},{10,0,408},{11,0,
+51},{11,0,793},{12,0,266},{15,0,158},{20,0,89},{150,0,32},{7,0,1330},{4,11,297},
+{6,11,529},{7,11,152},{7,11,713},{7,11,1845},{8,11,710},{8,11,717},{140,11,639},
+{5,0,389},{136,0,636},{134,0,1409},{4,10,562},{9,10,254},{139,10,879},{134,0,893
+},{132,10,786},{4,11,520},{135,11,575},{136,0,21},{140,0,721},{136,0,959},{7,11,
+1428},{7,11,1640},{9,11,169},{9,11,182},{9,11,367},{9,11,478},{9,11,506},{9,11,
+551},{9,11,648},{9,11,651},{9,11,697},{9,11,705},{9,11,725},{9,11,787},{9,11,794
+},{10,11,198},{10,11,214},{10,11,267},{10,11,275},{10,11,456},{10,11,551},{10,11
+,561},{10,11,613},{10,11,627},{10,11,668},{10,11,675},{10,11,691},{10,11,695},{
+10,11,707},{10,11,715},{11,11,183},{11,11,201},{11,11,244},{11,11,262},{11,11,
+352},{11,11,439},{11,11,493},{11,11,572},{11,11,591},{11,11,608},{11,11,611},{11
+,11,646},{11,11,674},{11,11,711},{11,11,751},{11,11,761},{11,11,776},{11,11,785}
+,{11,11,850},{11,11,853},{11,11,862},{11,11,865},{11,11,868},{11,11,898},{11,11,
+902},{11,11,903},{11,11,910},{11,11,932},{11,11,942},{11,11,957},{11,11,967},{11
+,11,972},{12,11,148},{12,11,195},{12,11,220},{12,11,237},{12,11,318},{12,11,339}
+,{12,11,393},{12,11,445},{12,11,450},{12,11,474},{12,11,509},{12,11,533},{12,11,
+591},{12,11,594},{12,11,597},{12,11,621},{12,11,633},{12,11,642},{13,11,59},{13,
+11,60},{13,11,145},{13,11,239},{13,11,250},{13,11,273},{13,11,329},{13,11,344},{
+13,11,365},{13,11,372},{13,11,387},{13,11,403},{13,11,414},{13,11,456},{13,11,
+478},{13,11,483},{13,11,489},{14,11,55},{14,11,57},{14,11,81},{14,11,90},{14,11,
+148},{14,11,239},{14,11,266},{14,11,321},{14,11,326},{14,11,327},{14,11,330},{14
+,11,347},{14,11,355},{14,11,401},{14,11,411},{14,11,414},{14,11,416},{14,11,420}
+,{15,11,61},{15,11,74},{15,11,87},{15,11,88},{15,11,94},{15,11,96},{15,11,116},{
+15,11,149},{15,11,154},{16,11,50},{16,11,63},{16,11,73},{17,11,2},{17,11,66},{17
+,11,92},{17,11,103},{17,11,112},{18,11,50},{18,11,54},{18,11,82},{18,11,86},{18,
+11,90},{18,11,111},{18,11,115},{18,11,156},{19,11,40},{19,11,79},{20,11,78},{149
+,11,22},{137,11,170},{134,0,1433},{135,11,1307},{139,11,411},{5,0,189},{7,0,442}
+,{7,0,443},{8,0,281},{12,0,174},{141,0,261},{6,10,216},{7,10,901},{7,10,1343},{
+136,10,493},{5,11,397},{6,11,154},{7,10,341},{7,11,676},{8,11,443},{8,11,609},{9
+,11,24},{9,11,325},{10,11,35},{11,10,219},{11,11,535},{11,11,672},{11,11,1018},{
+12,11,637},{144,11,30},{6,0,2},{7,0,191},{7,0,446},{7,0,1262},{7,0,1737},{8,0,22
+},{8,0,270},{8,0,612},{9,0,4},{9,0,312},{9,0,436},{9,0,626},{10,0,216},{10,0,311
+},{10,0,521},{10,0,623},{11,0,72},{11,0,330},{11,0,455},{12,0,321},{12,0,504},{
+12,0,530},{12,0,543},{13,0,17},{13,0,156},{13,0,334},{14,0,131},{17,0,60},{148,0
+,64},{7,0,354},{10,0,410},{139,0,815},{139,10,130},{7,10,1734},{137,11,631},{12,
+0,425},{15,0,112},{10,10,115},{11,10,420},{13,10,404},{14,10,346},{143,10,54},{6
+,0,60},{6,0,166},{7,0,374},{7,0,670},{7,0,1327},{8,0,411},{8,0,435},{9,0,653},{9
+,0,740},{10,0,385},{11,0,222},{11,0,324},{11,0,829},{140,0,611},{7,0,1611},{13,0
+,14},{15,0,44},{19,0,13},{148,0,76},{133,11,981},{4,11,56},{7,11,1791},{8,11,607
+},{8,11,651},{11,11,465},{11,11,835},{12,11,337},{141,11,480},{6,0,1478},{5,10,
+1011},{136,10,701},{139,0,596},{5,0,206},{134,0,398},{4,10,54},{5,10,666},{7,10,
+1039},{7,10,1130},{9,10,195},{138,10,302},{7,0,50},{9,11,158},{138,11,411},{135,
+11,1120},{6,0,517},{7,0,1159},{10,0,621},{11,0,192},{134,10,1669},{4,0,592},{6,0
+,600},{135,0,1653},{10,0,223},{139,0,645},{136,11,139},{7,0,64},{136,0,245},{142
+,0,278},{6,11,622},{135,11,1030},{136,0,604},{134,0,1502},{138,0,265},{141,11,
+168},{7,0,1763},{140,0,310},{7,10,798},{139,11,719},{7,11,160},{10,11,624},{142,
+11,279},{132,11,363},{7,10,122},{9,10,259},{10,10,84},{11,10,470},{12,10,541},{
+141,10,379},{5,0,129},{6,0,61},{135,0,947},{134,0,1356},{135,11,1191},{13,0,505}
+,{141,0,506},{11,0,1000},{5,10,82},{5,10,131},{7,10,1755},{8,10,31},{9,10,168},{
+9,10,764},{139,10,869},{134,0,966},{134,10,605},{134,11,292},{5,11,177},{6,11,
+616},{7,11,827},{9,11,525},{138,11,656},{135,11,1486},{138,11,31},{5,10,278},{
+137,10,68},{4,10,163},{5,10,201},{5,10,307},{5,10,310},{6,10,335},{7,10,284},{
+136,10,165},{6,0,839},{135,10,1660},{136,10,781},{6,10,33},{135,10,1244},{133,0,
+637},{4,11,161},{133,11,631},{137,0,590},{7,10,1953},{136,10,720},{5,0,280},{7,0
+,1226},{138,10,203},{134,0,1386},{5,0,281},{6,0,1026},{6,10,326},{7,10,677},{137
+,10,425},{7,11,1557},{135,11,1684},{135,0,1064},{9,11,469},{9,11,709},{12,11,512
+},{14,11,65},{145,11,12},{134,0,917},{10,11,229},{11,11,73},{11,11,376},{139,11,
+433},{7,0,555},{9,0,192},{13,0,30},{13,0,49},{15,0,150},{16,0,76},{20,0,52},{7,
+10,1316},{7,10,1412},{7,10,1839},{9,10,589},{11,10,241},{11,10,676},{11,10,811},
+{11,10,891},{12,10,140},{12,10,346},{12,10,479},{13,10,381},{14,10,188},{146,10,
+30},{149,0,15},{6,0,1882},{6,0,1883},{6,0,1897},{9,0,945},{9,0,1014},{9,0,1020},
+{12,0,823},{12,0,842},{12,0,866},{12,0,934},{15,0,242},{146,0,208},{6,0,965},{
+134,0,1499},{7,0,33},{7,0,120},{8,0,489},{9,0,319},{10,0,820},{11,0,1004},{12,0,
+379},{12,0,679},{13,0,117},{13,0,412},{14,0,25},{15,0,52},{15,0,161},{16,0,47},{
+149,0,2},{6,11,558},{7,11,651},{8,11,421},{9,11,0},{138,11,34},{4,0,937},{5,0,
+801},{7,0,473},{5,10,358},{7,10,1184},{10,10,662},{13,10,212},{13,10,304},{13,10
+,333},{145,10,98},{132,0,877},{6,0,693},{134,0,824},{132,0,365},{7,11,1832},{138
+,11,374},{5,0,7},{139,0,774},{4,0,734},{5,0,662},{134,0,430},{4,0,746},{135,0,
+1090},{5,0,360},{8,0,237},{10,0,231},{147,0,124},{138,11,348},{6,11,6},{7,11,81}
+,{7,11,771},{7,11,1731},{9,11,405},{138,11,421},{6,0,740},{137,0,822},{133,10,
+946},{7,0,1485},{136,0,929},{7,10,411},{8,10,631},{9,10,323},{10,10,355},{11,10,
+491},{12,10,143},{12,10,402},{13,10,73},{14,10,408},{15,10,107},{146,10,71},{135
+,10,590},{5,11,881},{133,11,885},{150,11,25},{4,0,852},{5,11,142},{134,11,546},{
+7,10,1467},{8,10,328},{10,10,544},{11,10,955},{13,10,320},{145,10,83},{9,0,17},{
+10,0,291},{11,10,511},{13,10,394},{14,10,298},{14,10,318},{146,10,103},{5,11,466
+},{11,11,571},{12,11,198},{13,11,283},{14,11,186},{15,11,21},{143,11,103},{134,0
+,1001},{4,11,185},{5,11,257},{5,11,839},{5,11,936},{7,11,171},{9,11,399},{10,11,
+258},{10,11,395},{10,11,734},{11,11,1014},{12,11,23},{13,11,350},{14,11,150},{
+147,11,6},{143,0,35},{132,0,831},{5,10,835},{134,10,483},{4,0,277},{5,0,608},{6,
+0,493},{7,0,457},{12,0,384},{7,11,404},{7,11,1377},{7,11,1430},{7,11,2017},{8,11
+,149},{8,11,239},{8,11,512},{8,11,793},{8,11,818},{9,11,474},{9,11,595},{10,11,
+122},{10,11,565},{10,11,649},{10,11,783},{11,11,239},{11,11,295},{11,11,447},{11
+,11,528},{11,11,639},{11,11,800},{11,11,936},{12,11,25},{12,11,73},{12,11,77},{
+12,11,157},{12,11,316},{12,11,390},{12,11,391},{12,11,394},{12,11,395},{12,11,
+478},{12,11,503},{12,11,592},{12,11,680},{13,11,50},{13,11,53},{13,11,132},{13,
+11,198},{13,11,275},{13,11,322},{13,11,415},{14,11,71},{14,11,257},{14,11,395},{
+15,11,71},{15,11,136},{17,11,123},{18,11,93},{147,11,58},{134,0,1351},{7,0,27},{
+135,0,316},{136,11,712},{136,0,984},{133,0,552},{137,0,264},{132,0,401},{6,0,710
+},{6,0,1111},{134,0,1343},{134,0,1211},{9,0,543},{10,0,524},{11,0,108},{11,0,653
+},{12,0,524},{13,0,123},{14,0,252},{16,0,18},{19,0,38},{20,0,26},{20,0,65},{21,0
+,3},{151,0,11},{4,0,205},{5,0,623},{7,0,104},{8,0,519},{137,0,716},{132,10,677},
+{4,11,377},{152,11,13},{135,11,1673},{7,0,579},{9,0,41},{9,0,244},{9,0,669},{10,
+0,5},{11,0,861},{11,0,951},{139,0,980},{132,0,717},{136,0,1011},{132,0,805},{4,
+11,180},{135,11,1906},{132,10,777},{132,10,331},{132,0,489},{6,0,1024},{4,11,491
+},{133,10,747},{135,11,1182},{4,11,171},{138,11,234},{4,11,586},{7,11,1186},{138
+,11,631},{135,0,892},{135,11,336},{9,11,931},{10,11,334},{148,11,71},{137,0,473}
+,{6,0,864},{12,0,659},{139,11,926},{7,0,819},{9,0,26},{9,0,392},{10,0,152},{10,0
+,226},{11,0,19},{12,0,276},{12,0,426},{12,0,589},{13,0,460},{15,0,97},{19,0,48},
+{148,0,104},{135,0,51},{133,10,326},{4,10,691},{146,10,16},{9,0,130},{11,0,765},
+{10,10,680},{10,10,793},{141,10,357},{133,11,765},{8,0,229},{6,10,32},{7,10,385}
+,{7,10,757},{7,10,1916},{8,10,94},{8,10,711},{9,10,541},{10,10,162},{10,10,795},
+{11,10,989},{11,10,1010},{12,10,14},{142,10,308},{7,11,474},{137,11,578},{132,0,
+674},{132,0,770},{5,0,79},{7,0,1027},{7,0,1477},{139,0,52},{133,11,424},{134,0,
+1666},{6,0,409},{6,10,349},{6,10,1682},{7,10,1252},{8,10,112},{8,11,714},{9,10,
+435},{9,10,668},{10,10,290},{10,10,319},{10,10,815},{11,10,180},{11,10,837},{12,
+10,240},{13,10,152},{13,10,219},{142,10,158},{5,0,789},{134,0,195},{4,0,251},{4,
+0,688},{7,0,513},{135,0,1284},{132,10,581},{9,11,420},{10,11,269},{10,11,285},{
+10,11,576},{11,11,397},{13,11,175},{145,11,90},{6,10,126},{7,10,573},{8,10,397},
+{142,10,44},{132,11,429},{133,0,889},{4,0,160},{5,0,330},{7,0,1434},{136,0,174},
+{7,11,18},{7,11,699},{7,11,1966},{8,11,752},{9,11,273},{9,11,412},{9,11,703},{10
+,11,71},{10,11,427},{10,11,508},{146,11,97},{6,0,872},{134,0,899},{133,10,926},{
+134,0,1126},{134,0,918},{4,11,53},{5,11,186},{135,11,752},{7,0,268},{136,0,569},
+{134,0,1224},{6,0,1361},{7,10,1232},{137,10,531},{8,11,575},{10,11,289},{139,11,
+319},{133,10,670},{132,11,675},{133,0,374},{135,10,1957},{133,0,731},{11,0,190},
+{15,0,49},{11,11,190},{143,11,49},{4,0,626},{5,0,506},{5,0,642},{6,0,425},{10,0,
+202},{139,0,141},{137,0,444},{7,10,242},{135,10,1942},{6,11,209},{8,11,468},{9,
+11,210},{11,11,36},{12,11,28},{12,11,630},{13,11,21},{13,11,349},{14,11,7},{145,
+11,13},{4,11,342},{135,11,1179},{5,10,834},{7,10,1202},{8,10,14},{9,10,481},{137
+,10,880},{4,11,928},{133,11,910},{4,11,318},{4,11,496},{7,11,856},{139,11,654},{
+136,0,835},{7,0,1526},{138,10,465},{151,0,17},{135,0,477},{4,10,357},{6,10,172},
+{7,10,143},{137,10,413},{6,0,1374},{138,0,994},{18,0,76},{132,10,590},{7,0,287},
+{8,0,355},{9,0,293},{137,0,743},{134,0,1389},{7,11,915},{8,11,247},{147,11,0},{4
+,11,202},{5,11,382},{6,11,454},{7,11,936},{7,11,1803},{8,11,758},{9,11,375},{9,
+11,895},{10,11,743},{10,11,792},{11,11,978},{11,11,1012},{142,11,109},{5,0,384},
+{8,0,455},{140,0,48},{132,11,390},{5,10,169},{7,10,333},{136,10,45},{5,0,264},{
+134,0,184},{138,11,791},{133,11,717},{132,10,198},{6,11,445},{7,11,332},{137,11,
+909},{136,0,1001},{4,10,24},{5,10,140},{5,10,185},{7,10,1500},{11,10,565},{139,
+10,838},{134,11,578},{5,0,633},{6,0,28},{135,0,1323},{132,0,851},{136,11,267},{7
+,0,359},{8,0,243},{140,0,175},{4,10,334},{133,10,593},{141,11,87},{136,11,766},{
+10,0,287},{12,0,138},{10,11,287},{140,11,138},{4,0,105},{132,0,740},{140,10,116}
+,{134,0,857},{135,11,1841},{6,0,1402},{137,0,819},{132,11,584},{132,10,709},{133
+,10,897},{5,0,224},{13,0,174},{146,0,52},{135,10,1840},{4,10,608},{133,10,497},{
+139,11,60},{4,0,758},{135,0,1649},{4,11,226},{4,11,326},{135,11,1770},{5,11,426}
+,{8,11,30},{9,11,2},{11,11,549},{147,11,122},{135,10,2039},{6,10,540},{136,10,
+136},{4,0,573},{8,0,655},{4,10,897},{133,10,786},{7,0,351},{139,0,128},{133,10,
+999},{4,10,299},{135,10,1004},{133,0,918},{132,11,345},{4,11,385},{7,11,265},{
+135,11,587},{133,10,456},{136,10,180},{6,0,687},{134,0,1537},{4,11,347},{5,11,
+423},{5,11,996},{135,11,1329},{132,10,755},{7,11,1259},{9,11,125},{11,11,65},{
+140,11,285},{5,11,136},{6,11,136},{136,11,644},{134,0,1525},{4,0,1009},{135,0,
+1139},{139,10,338},{132,0,340},{135,10,1464},{8,0,847},{10,0,861},{10,0,876},{10
+,0,889},{10,0,922},{10,0,929},{10,0,933},{12,0,784},{140,0,791},{139,0,176},{9,
+11,134},{10,11,2},{10,11,27},{10,11,333},{11,11,722},{143,11,1},{4,11,433},{133,
+11,719},{5,0,985},{7,0,509},{7,0,529},{145,0,96},{132,0,615},{4,10,890},{5,10,
+805},{5,10,819},{5,10,961},{6,10,396},{6,10,1631},{6,10,1678},{7,10,1967},{7,10,
+2041},{9,10,630},{11,10,8},{11,10,1019},{12,10,176},{13,10,225},{14,10,292},{149
+,10,24},{135,0,1919},{134,0,1131},{144,11,21},{144,11,51},{135,10,1815},{4,0,247
+},{7,10,1505},{10,10,190},{10,10,634},{11,10,792},{12,10,358},{140,10,447},{5,10
+,0},{6,10,536},{7,10,604},{13,10,445},{145,10,126},{4,0,184},{5,0,390},{6,0,337}
+,{7,0,23},{7,0,494},{7,0,618},{7,0,1456},{8,0,27},{8,0,599},{10,0,153},{139,0,
+710},{6,10,232},{6,10,412},{7,10,1074},{8,10,9},{8,10,157},{8,10,786},{9,10,196}
+,{9,10,352},{9,10,457},{10,10,337},{11,10,232},{11,10,877},{12,10,480},{140,10,
+546},{13,0,38},{135,10,958},{4,10,382},{136,10,579},{4,10,212},{135,10,1206},{4,
+11,555},{8,11,536},{138,11,288},{11,11,139},{139,11,171},{9,11,370},{138,11,90},
+{132,0,1015},{134,0,1088},{5,10,655},{135,11,977},{134,0,1585},{17,10,67},{147,
+10,74},{10,0,227},{11,0,497},{11,0,709},{140,0,415},{6,0,360},{7,0,1664},{136,0,
+478},{7,0,95},{6,10,231},{136,10,423},{140,11,65},{4,11,257},{135,11,2031},{135,
+11,1768},{133,10,300},{139,11,211},{136,0,699},{6,10,237},{7,10,611},{8,10,100},
+{9,10,416},{11,10,335},{12,10,173},{146,10,101},{14,0,26},{146,0,150},{6,0,581},
+{135,0,1119},{135,10,1208},{132,0,739},{6,11,83},{6,11,1733},{135,11,1389},{137,
+0,869},{4,0,67},{5,0,422},{7,0,1037},{7,0,1289},{7,0,1555},{9,0,741},{145,0,108}
+,{133,10,199},{12,10,427},{146,10,38},{136,0,464},{142,0,42},{10,0,96},{8,11,501
+},{137,11,696},{134,11,592},{4,0,512},{4,0,966},{5,0,342},{6,0,1855},{8,0,869},{
+8,0,875},{8,0,901},{144,0,26},{8,0,203},{11,0,823},{11,0,846},{12,0,482},{13,0,
+277},{13,0,302},{13,0,464},{14,0,205},{142,0,221},{4,0,449},{133,0,718},{7,11,
+1718},{9,11,95},{9,11,274},{10,11,279},{10,11,317},{10,11,420},{11,11,303},{11,
+11,808},{12,11,134},{12,11,367},{13,11,149},{13,11,347},{14,11,349},{14,11,406},
+{18,11,22},{18,11,89},{18,11,122},{147,11,47},{133,11,26},{4,0,355},{6,0,311},{9
+,0,256},{138,0,404},{132,11,550},{10,0,758},{6,10,312},{6,10,1715},{10,10,584},{
+11,10,546},{11,10,692},{12,10,259},{12,10,295},{13,10,46},{141,10,154},{136,11,
+822},{5,0,827},{4,11,902},{5,11,809},{6,11,122},{135,11,896},{5,0,64},{140,0,581
+},{4,0,442},{6,0,739},{7,0,1047},{7,0,1352},{7,0,1643},{7,11,1911},{9,11,449},{
+10,11,192},{138,11,740},{135,11,262},{132,10,588},{133,11,620},{5,0,977},{6,0,
+288},{7,0,528},{4,11,34},{5,11,574},{7,11,279},{7,11,1624},{136,11,601},{6,0,
+1375},{4,10,231},{5,10,61},{6,10,104},{7,10,729},{7,10,964},{7,10,1658},{140,10,
+414},{6,10,263},{138,10,757},{132,10,320},{4,0,254},{7,0,1309},{5,11,332},{135,
+11,1309},{6,11,261},{8,11,182},{139,11,943},{132,10,225},{6,0,12},{135,0,1219},{
+4,0,275},{12,0,376},{6,11,1721},{141,11,490},{4,11,933},{133,11,880},{6,0,951},{
+6,0,1109},{6,0,1181},{7,0,154},{4,10,405},{7,10,817},{14,10,58},{17,10,37},{146,
+10,124},{6,0,1520},{133,10,974},{134,0,1753},{6,0,369},{6,0,502},{7,0,1036},{8,0
+,348},{9,0,452},{10,0,26},{11,0,224},{11,0,387},{11,0,772},{12,0,95},{12,0,629},
+{13,0,195},{13,0,207},{13,0,241},{14,0,260},{14,0,270},{143,0,140},{132,0,269},{
+5,0,480},{7,0,532},{7,0,1197},{7,0,1358},{8,0,291},{11,0,349},{142,0,396},{5,10,
+235},{7,10,1239},{11,10,131},{140,10,370},{7,10,956},{7,10,1157},{7,10,1506},{7,
+10,1606},{7,10,1615},{7,10,1619},{7,10,1736},{7,10,1775},{8,10,590},{9,10,324},{
+9,10,736},{9,10,774},{9,10,776},{9,10,784},{10,10,567},{10,10,708},{11,10,518},{
+11,10,613},{11,10,695},{11,10,716},{11,10,739},{11,10,770},{11,10,771},{11,10,
+848},{11,10,857},{11,10,931},{11,10,947},{12,10,326},{12,10,387},{12,10,484},{12
+,10,528},{12,10,552},{12,10,613},{13,10,189},{13,10,256},{13,10,340},{13,10,432}
+,{13,10,436},{13,10,440},{13,10,454},{14,10,174},{14,10,220},{14,10,284},{14,10,
+390},{145,10,121},{8,11,598},{9,11,664},{138,11,441},{9,10,137},{138,10,221},{
+133,11,812},{148,0,15},{134,0,1341},{6,0,1017},{4,11,137},{7,11,1178},{135,11,
+1520},{7,10,390},{138,10,140},{7,11,1260},{135,11,1790},{137,11,191},{135,10,
+1144},{6,0,1810},{7,0,657},{8,0,886},{10,0,857},{14,0,440},{144,0,96},{8,0,533},
+{6,11,1661},{7,11,1975},{7,11,2009},{135,11,2011},{6,0,1453},{134,10,464},{132,
+11,715},{5,10,407},{11,10,204},{11,10,243},{11,10,489},{12,10,293},{19,10,37},{
+20,10,73},{150,10,38},{133,11,703},{4,0,211},{7,0,1483},{5,10,325},{8,10,5},{8,
+10,227},{9,10,105},{10,10,585},{140,10,614},{4,0,332},{5,0,335},{6,0,238},{7,0,
+269},{7,0,811},{7,0,1797},{8,0,836},{9,0,507},{141,0,242},{5,11,89},{7,11,1915},
+{9,11,185},{9,11,235},{9,11,496},{10,11,64},{10,11,270},{10,11,403},{10,11,469},
+{10,11,529},{10,11,590},{11,11,140},{11,11,860},{13,11,1},{13,11,422},{14,11,341
+},{14,11,364},{17,11,93},{18,11,113},{19,11,97},{147,11,113},{133,11,695},{16,0,
+19},{5,11,6},{6,11,183},{6,10,621},{7,11,680},{7,11,978},{7,11,1013},{7,11,1055}
+,{12,11,230},{13,11,172},{13,10,504},{146,11,29},{136,0,156},{133,0,1009},{6,11,
+29},{139,11,63},{134,0,820},{134,10,218},{7,10,454},{7,10,782},{8,10,768},{140,
+10,686},{5,0,228},{6,0,203},{7,0,156},{8,0,347},{9,0,265},{18,0,39},{20,0,54},{
+21,0,31},{22,0,3},{23,0,0},{15,11,8},{18,11,39},{20,11,54},{21,11,31},{22,11,3},
+{151,11,0},{7,0,1131},{135,0,1468},{144,10,0},{134,0,1276},{10,10,676},{140,10,
+462},{132,11,311},{134,11,1740},{7,11,170},{8,11,90},{8,11,177},{8,11,415},{11,
+11,714},{142,11,281},{134,10,164},{6,0,1792},{138,0,849},{150,10,50},{5,0,291},{
+5,0,318},{7,0,765},{9,0,389},{12,0,548},{8,11,522},{142,11,328},{11,11,91},{13,
+11,129},{15,11,101},{145,11,125},{4,11,494},{6,11,74},{7,11,44},{7,11,407},{8,11
+,551},{12,11,17},{15,11,5},{148,11,11},{4,11,276},{133,11,296},{6,10,343},{7,10,
+195},{7,11,1777},{9,10,226},{10,10,197},{10,10,575},{11,10,502},{139,10,899},{10
+,0,525},{139,0,82},{14,0,453},{4,11,7},{5,11,90},{5,11,158},{6,11,542},{7,11,221
+},{7,11,1574},{9,11,490},{10,11,540},{11,11,443},{139,11,757},{135,0,666},{22,10
+,29},{150,11,29},{4,0,422},{147,10,8},{5,0,355},{145,0,0},{6,0,1873},{9,0,918},{
+7,11,588},{9,11,175},{138,11,530},{143,11,31},{11,0,165},{7,10,1125},{9,10,143},
+{14,10,405},{150,10,21},{9,0,260},{137,0,905},{5,11,872},{6,11,57},{6,11,479},{6
+,11,562},{7,11,471},{7,11,1060},{9,11,447},{9,11,454},{141,11,6},{138,11,704},{
+133,0,865},{5,0,914},{134,0,1625},{133,0,234},{7,0,1383},{5,11,31},{6,11,614},{
+145,11,61},{7,11,1200},{138,11,460},{6,11,424},{135,11,1866},{136,0,306},{5,10,
+959},{12,11,30},{13,11,148},{14,11,87},{14,11,182},{16,11,42},{18,11,92},{148,11
+,70},{6,0,1919},{6,0,1921},{9,0,923},{9,0,930},{9,0,941},{9,0,949},{9,0,987},{9,
+0,988},{9,0,992},{12,0,802},{12,0,815},{12,0,856},{12,0,885},{12,0,893},{12,0,
+898},{12,0,919},{12,0,920},{12,0,941},{12,0,947},{15,0,183},{15,0,185},{15,0,189
+},{15,0,197},{15,0,202},{15,0,233},{18,0,218},{18,0,219},{18,0,233},{143,11,156}
+,{135,10,1759},{136,10,173},{13,0,163},{13,0,180},{18,0,78},{20,0,35},{5,11,13},
+{134,11,142},{134,10,266},{6,11,97},{7,11,116},{8,11,322},{8,11,755},{9,11,548},
+{10,11,714},{11,11,884},{141,11,324},{135,0,1312},{9,0,814},{137,11,676},{133,0,
+707},{135,0,1493},{6,0,421},{7,0,61},{7,0,1540},{10,0,11},{138,0,501},{12,0,733}
+,{12,0,766},{7,11,866},{135,11,1163},{137,0,341},{142,0,98},{145,11,115},{135,11
+,1111},{136,10,300},{136,0,1014},{8,11,1},{9,11,112},{138,11,326},{132,11,730},{
+5,11,488},{6,11,527},{7,11,489},{7,11,1636},{8,11,121},{8,11,144},{8,11,359},{9,
+11,193},{9,11,241},{9,11,336},{9,11,882},{11,11,266},{11,11,372},{11,11,944},{12
+,11,401},{140,11,641},{6,0,971},{134,0,1121},{6,0,102},{7,0,72},{15,0,142},{147,
+0,67},{151,0,30},{135,0,823},{134,0,1045},{5,10,427},{5,10,734},{7,10,478},{136,
+10,52},{7,0,1930},{11,10,217},{142,10,165},{6,0,1512},{135,0,1870},{9,11,31},{10
+,11,244},{10,11,699},{12,11,149},{141,11,497},{133,11,377},{145,11,101},{10,11,
+158},{13,11,13},{13,11,137},{13,11,258},{14,11,111},{14,11,225},{14,11,253},{14,
+11,304},{14,11,339},{14,11,417},{146,11,33},{6,0,87},{6,10,1734},{7,10,20},{7,10
+,1056},{8,10,732},{9,10,406},{9,10,911},{138,10,694},{134,0,1243},{137,0,245},{7
+,0,68},{8,0,48},{8,0,88},{8,0,582},{8,0,681},{9,0,373},{9,0,864},{11,0,157},{11,
+0,336},{11,0,843},{148,0,27},{8,11,663},{144,11,8},{133,10,613},{4,0,88},{5,0,
+137},{5,0,174},{5,0,777},{6,0,1664},{6,0,1725},{7,0,77},{7,0,426},{7,0,1317},{7,
+0,1355},{8,0,126},{8,0,563},{9,0,523},{9,0,750},{10,0,310},{10,0,836},{11,0,42},
+{11,0,318},{11,0,731},{12,0,68},{12,0,92},{12,0,507},{12,0,692},{13,0,81},{13,0,
+238},{13,0,374},{14,0,436},{18,0,138},{19,0,78},{19,0,111},{20,0,55},{20,0,77},{
+148,0,92},{141,0,418},{4,0,938},{137,0,625},{138,0,351},{5,11,843},{7,10,32},{7,
+10,984},{8,10,85},{8,10,709},{9,10,579},{9,10,847},{9,10,856},{10,10,799},{11,10
+,258},{11,10,1007},{12,10,331},{12,10,615},{13,10,188},{13,10,435},{14,10,8},{15
+,10,165},{16,10,27},{148,10,40},{6,0,1668},{7,0,1499},{8,0,117},{9,0,314},{138,0
+,174},{135,0,707},{132,11,554},{133,11,536},{5,0,403},{5,11,207},{9,11,79},{11,
+11,625},{145,11,7},{132,11,424},{136,11,785},{4,10,167},{135,10,82},{9,0,7},{23,
+0,6},{9,11,7},{151,11,6},{6,0,282},{5,10,62},{6,10,534},{7,10,74},{7,10,678},{7,
+10,684},{7,10,1043},{7,10,1072},{8,10,280},{8,10,541},{8,10,686},{9,10,258},{10,
+10,519},{11,10,252},{140,10,282},{138,10,33},{132,10,359},{4,0,44},{5,0,311},{6,
+0,156},{7,0,639},{7,0,762},{7,0,1827},{9,0,8},{9,0,462},{148,0,83},{7,11,769},{9
+,11,18},{138,11,358},{4,0,346},{7,0,115},{9,0,180},{9,0,456},{10,0,363},{4,11,
+896},{134,11,1777},{133,10,211},{7,0,761},{7,0,1051},{137,0,545},{6,10,145},{141
+,10,336},{7,11,750},{9,11,223},{11,11,27},{11,11,466},{12,11,624},{14,11,265},{
+146,11,61},{6,0,752},{6,0,768},{6,0,1195},{6,0,1254},{6,0,1619},{137,0,835},{6,0
+,1936},{8,0,930},{136,0,960},{132,10,263},{132,11,249},{12,0,653},{132,10,916},{
+4,11,603},{133,11,661},{8,0,344},{4,11,11},{6,11,128},{7,11,231},{7,11,1533},{
+138,11,725},{134,0,1483},{134,0,875},{6,0,185},{7,0,1899},{9,0,875},{139,0,673},
+{15,10,155},{144,10,79},{7,0,93},{7,0,210},{7,0,1223},{8,0,451},{8,0,460},{11,0,
+353},{11,0,475},{4,10,599},{6,10,1634},{7,10,67},{7,10,691},{7,10,979},{7,10,
+1697},{8,10,207},{8,10,214},{8,10,231},{8,10,294},{8,10,336},{8,10,428},{8,10,
+471},{8,10,622},{8,10,626},{8,10,679},{8,10,759},{8,10,829},{9,10,11},{9,10,246}
+,{9,10,484},{9,10,573},{9,10,706},{9,10,762},{9,10,798},{9,10,855},{9,10,870},{9
+,10,912},{10,10,303},{10,10,335},{10,10,424},{10,10,461},{10,10,543},{10,10,759}
+,{10,10,814},{11,10,59},{11,10,235},{11,10,590},{11,10,929},{11,10,963},{11,10,
+987},{12,10,114},{12,10,182},{12,10,226},{12,10,332},{12,10,439},{12,10,575},{12
+,10,598},{12,10,675},{13,10,8},{13,10,125},{13,10,194},{13,10,287},{14,10,197},{
+14,10,383},{15,10,53},{17,10,63},{19,10,46},{19,10,98},{19,10,106},{148,10,85},{
+132,11,476},{4,0,327},{5,0,478},{7,0,1332},{136,0,753},{5,0,1020},{133,0,1022},{
+135,11,1807},{4,0,103},{133,0,401},{4,0,499},{135,0,1421},{10,0,207},{13,0,164},
+{147,10,126},{9,11,20},{10,11,324},{139,11,488},{132,0,96},{9,11,280},{138,11,
+134},{135,0,968},{133,10,187},{135,10,1286},{5,11,112},{6,11,103},{134,11,150},{
+8,0,914},{10,0,3},{4,10,215},{9,10,38},{11,10,23},{11,10,127},{139,10,796},{135,
+0,399},{6,0,563},{137,0,224},{6,0,704},{134,0,1214},{4,11,708},{8,11,15},{9,11,
+50},{9,11,386},{11,11,18},{11,11,529},{140,11,228},{4,11,563},{7,11,109},{7,11,
+592},{7,11,637},{7,11,770},{7,11,1701},{8,11,436},{8,11,463},{9,11,60},{9,11,335
+},{9,11,904},{10,11,73},{11,11,434},{12,11,585},{13,11,331},{18,11,110},{148,11,
+60},{134,0,1559},{132,11,502},{6,11,347},{138,11,161},{4,11,33},{5,11,102},{5,11
+,500},{6,11,284},{7,11,1079},{7,11,1423},{7,11,1702},{8,11,470},{9,11,554},{9,11
+,723},{139,11,333},{7,11,246},{135,11,840},{6,11,10},{8,11,571},{9,11,739},{143,
+11,91},{8,0,861},{10,0,905},{12,0,730},{12,0,789},{133,11,626},{134,0,946},{5,0,
+746},{12,0,333},{14,0,332},{12,11,333},{142,11,332},{5,11,18},{6,11,526},{13,11,
+24},{13,11,110},{19,11,5},{147,11,44},{4,0,910},{5,0,832},{135,10,2002},{10,11,
+768},{139,11,787},{4,11,309},{5,11,462},{7,11,970},{135,11,1097},{4,10,28},{5,10
+,440},{7,10,248},{11,10,833},{140,10,344},{134,10,1654},{6,0,632},{6,0,652},{6,0
+,1272},{6,0,1384},{134,0,1560},{134,11,1704},{6,0,1393},{133,10,853},{6,10,249},
+{7,10,1234},{139,10,573},{5,11,86},{7,11,743},{9,11,85},{10,11,281},{10,11,432},
+{11,11,490},{12,11,251},{13,11,118},{14,11,378},{146,11,143},{5,11,524},{133,11,
+744},{134,0,1514},{10,0,201},{142,0,319},{7,0,717},{10,0,510},{7,10,392},{8,10,
+20},{8,10,172},{8,10,690},{9,10,383},{9,10,845},{11,10,293},{11,10,832},{11,10,
+920},{11,10,984},{141,10,221},{134,0,1381},{5,10,858},{133,10,992},{8,0,528},{
+137,0,348},{10,11,107},{140,11,436},{4,0,20},{133,0,616},{134,0,1251},{132,11,
+927},{10,11,123},{12,11,670},{13,11,371},{14,11,142},{146,11,94},{134,0,1163},{7
+,11,1149},{137,11,156},{134,0,307},{133,11,778},{7,0,1091},{135,0,1765},{5,11,
+502},{6,10,268},{137,10,62},{8,11,196},{10,11,283},{139,11,406},{4,0,26},{5,0,
+429},{6,0,245},{7,0,704},{7,0,1379},{135,0,1474},{133,11,855},{132,0,881},{4,0,
+621},{135,11,1596},{7,11,1400},{9,11,446},{138,11,45},{6,0,736},{138,10,106},{
+133,0,542},{134,0,348},{133,0,868},{136,0,433},{135,0,1495},{138,0,771},{6,10,
+613},{136,10,223},{138,0,215},{141,0,124},{136,11,391},{135,11,172},{132,10,670}
+,{140,0,55},{9,10,40},{139,10,136},{7,0,62},{147,0,112},{132,0,856},{132,11,568}
+,{12,0,270},{139,10,259},{8,0,572},{137,0,698},{4,11,732},{9,10,310},{137,10,682
+},{142,10,296},{134,0,939},{136,11,733},{135,11,1435},{7,10,1401},{135,10,1476},
+{6,0,352},{4,10,296},{7,10,401},{7,10,1410},{7,10,1594},{7,10,1674},{8,10,63},{8
+,10,660},{137,10,74},{4,11,428},{133,11,668},{4,10,139},{4,10,388},{140,10,188},
+{7,11,2015},{140,11,665},{132,0,647},{146,0,10},{138,0,220},{142,0,464},{132,0,
+109},{134,0,1746},{6,0,515},{4,10,747},{6,11,1623},{6,11,1681},{7,10,649},{7,10,
+1479},{135,10,1583},{133,10,232},{135,0,566},{137,10,887},{4,0,40},{10,0,67},{11
+,0,117},{11,0,768},{139,0,935},{132,0,801},{7,0,992},{8,0,301},{9,0,722},{12,0,
+63},{13,0,29},{14,0,161},{143,0,18},{139,0,923},{6,11,1748},{8,11,715},{9,11,802
+},{10,11,46},{10,11,819},{13,11,308},{14,11,351},{14,11,363},{146,11,67},{137,11
+,745},{7,0,1145},{4,10,14},{7,10,1801},{10,10,748},{141,10,458},{4,11,63},{5,11,
+347},{134,11,474},{135,0,568},{4,10,425},{7,11,577},{7,11,1432},{9,11,475},{9,11
+,505},{9,11,526},{9,11,609},{9,11,689},{9,11,726},{9,11,735},{9,11,738},{10,11,
+556},{10,11,674},{10,11,684},{11,11,89},{11,11,202},{11,11,272},{11,11,380},{11,
+11,415},{11,11,505},{11,11,537},{11,11,550},{11,11,562},{11,11,640},{11,11,667},
+{11,11,688},{11,11,847},{11,11,927},{11,11,930},{11,11,940},{12,11,144},{12,11,
+325},{12,11,329},{12,11,389},{12,11,403},{12,11,451},{12,11,515},{12,11,604},{12
+,11,616},{12,11,626},{13,11,66},{13,11,131},{13,11,167},{13,11,236},{13,11,368},
+{13,11,411},{13,11,434},{13,11,453},{13,11,461},{13,11,474},{14,11,59},{14,11,60
+},{14,11,139},{14,11,152},{14,11,276},{14,11,353},{14,11,402},{15,11,28},{15,11,
+81},{15,11,123},{15,11,152},{18,11,136},{148,11,88},{137,0,247},{135,11,1622},{9
+,11,544},{11,11,413},{144,11,25},{4,0,645},{7,0,825},{6,10,1768},{135,11,89},{
+140,0,328},{5,10,943},{134,10,1779},{134,0,1363},{5,10,245},{6,10,576},{7,10,582
+},{136,10,225},{134,0,1280},{5,11,824},{133,11,941},{7,11,440},{8,11,230},{139,
+11,106},{5,0,28},{6,0,204},{10,0,320},{10,0,583},{13,0,502},{14,0,72},{14,0,274}
+,{14,0,312},{14,0,344},{15,0,159},{16,0,62},{16,0,69},{17,0,30},{18,0,42},{18,0,
+53},{18,0,84},{18,0,140},{19,0,68},{19,0,85},{20,0,5},{20,0,45},{20,0,101},{22,0
+,7},{150,0,20},{4,0,558},{6,0,390},{7,0,162},{7,0,689},{9,0,360},{138,0,653},{
+134,0,764},{6,0,862},{137,0,833},{5,0,856},{6,0,1672},{6,0,1757},{134,0,1781},{5
+,0,92},{10,0,736},{140,0,102},{6,0,1927},{6,0,1944},{8,0,924},{8,0,948},{10,0,
+967},{138,0,978},{134,0,1479},{5,0,590},{8,0,360},{9,0,213},{138,0,63},{134,0,
+1521},{6,0,709},{134,0,891},{132,10,443},{13,0,477},{14,0,120},{148,0,61},{4,11,
+914},{5,11,800},{133,11,852},{10,11,54},{141,11,115},{4,11,918},{133,11,876},{
+139,11,152},{4,11,92},{133,11,274},{135,11,1901},{9,11,800},{10,11,693},{11,11,
+482},{11,11,734},{139,11,789},{9,0,483},{132,10,298},{6,0,1213},{141,11,498},{
+135,11,1451},{133,11,743},{4,0,1022},{10,0,1000},{12,0,957},{12,0,980},{12,0,
+1013},{14,0,481},{144,0,116},{8,0,503},{17,0,29},{4,11,49},{7,11,280},{135,11,
+1633},{135,0,1712},{134,0,466},{136,11,47},{5,10,164},{7,10,121},{142,10,189},{7
+,10,812},{7,10,1261},{7,10,1360},{9,10,632},{140,10,352},{139,10,556},{132,0,731
+},{5,11,272},{5,11,908},{5,11,942},{7,11,1008},{7,11,1560},{8,11,197},{9,11,47},
+{11,11,538},{139,11,742},{4,10,172},{9,10,611},{10,10,436},{12,10,673},{141,10,
+255},{133,10,844},{10,0,484},{11,0,754},{12,0,457},{14,0,171},{14,0,389},{146,0,
+153},{9,10,263},{10,10,147},{138,10,492},{137,11,891},{138,0,241},{133,10,537},{
+6,0,2005},{136,0,964},{137,10,842},{151,11,8},{4,11,407},{132,11,560},{135,11,
+1884},{6,0,1100},{134,0,1242},{135,0,954},{5,10,230},{5,10,392},{6,10,420},{9,10
+,568},{140,10,612},{4,11,475},{11,11,35},{11,11,90},{13,11,7},{13,11,71},{13,11,
+177},{142,11,422},{136,11,332},{135,0,1958},{6,0,549},{8,0,34},{8,0,283},{9,0,
+165},{138,0,475},{10,0,952},{12,0,966},{140,0,994},{5,0,652},{5,0,701},{135,0,
+449},{4,0,655},{7,0,850},{17,0,75},{146,0,137},{4,0,146},{7,0,1618},{8,0,670},{5
+,10,41},{7,10,1459},{7,10,1469},{7,10,1859},{9,10,549},{139,10,905},{133,10,696}
+,{6,0,159},{6,0,364},{7,0,516},{137,0,518},{135,0,1439},{6,11,222},{7,11,636},{7
+,11,1620},{8,11,409},{9,11,693},{139,11,77},{13,0,151},{141,11,45},{6,0,1027},{4
+,11,336},{132,10,771},{139,11,392},{10,11,121},{11,11,175},{149,11,16},{8,0,950}
+,{138,0,983},{133,10,921},{135,0,993},{6,10,180},{7,10,1137},{8,10,751},{139,10,
+805},{7,0,501},{9,0,111},{10,0,141},{11,0,332},{13,0,43},{13,0,429},{14,0,130},{
+14,0,415},{145,0,102},{4,10,183},{5,11,882},{7,10,271},{11,10,824},{11,10,952},{
+13,10,278},{13,10,339},{13,10,482},{14,10,424},{148,10,99},{4,10,19},{5,10,477},
+{5,10,596},{6,10,505},{7,10,1221},{11,10,907},{12,10,209},{141,10,214},{135,10,
+1215},{133,0,452},{132,11,426},{5,0,149},{136,0,233},{133,0,935},{6,11,58},{7,11
+,654},{7,11,745},{7,11,1969},{8,11,240},{8,11,675},{9,11,479},{9,11,731},{10,11,
+330},{10,11,593},{10,11,817},{11,11,32},{11,11,133},{11,11,221},{145,11,68},{12,
+0,582},{18,0,131},{7,11,102},{137,11,538},{136,0,801},{134,10,1645},{132,0,70},{
+6,10,92},{6,10,188},{7,10,1269},{7,10,1524},{7,10,1876},{10,10,228},{139,10,1020
+},{4,10,459},{133,10,966},{138,0,369},{16,0,36},{140,10,330},{141,11,366},{7,0,
+721},{10,0,236},{12,0,204},{6,10,18},{7,10,932},{8,10,757},{9,10,54},{9,10,65},{
+9,10,844},{10,10,113},{10,10,315},{10,10,798},{11,10,153},{12,10,151},{12,10,392
+},{12,10,666},{142,10,248},{7,0,241},{10,0,430},{8,10,548},{9,10,532},{10,10,117
+},{11,10,351},{11,10,375},{143,10,23},{134,10,1742},{133,10,965},{133,11,566},{6
+,11,48},{135,11,63},{134,10,182},{10,10,65},{10,10,488},{138,10,497},{6,11,114},
+{7,11,1224},{7,11,1556},{136,11,3},{134,0,1817},{8,11,576},{137,11,267},{6,0,
+1078},{144,0,16},{9,10,588},{138,10,260},{138,0,1021},{5,0,406},{134,0,2022},{
+133,11,933},{6,0,69},{135,0,117},{7,0,1830},{136,11,427},{4,0,432},{135,0,824},{
+134,10,1786},{133,0,826},{139,11,67},{133,11,759},{135,10,308},{137,0,816},{133,
+0,1000},{4,0,297},{6,0,529},{7,0,152},{7,0,713},{7,0,1845},{8,0,710},{8,0,717},{
+12,0,639},{140,0,685},{7,0,423},{136,10,588},{136,10,287},{136,0,510},{134,0,
+1048},{6,0,618},{7,11,56},{7,11,1989},{8,11,337},{8,11,738},{9,11,600},{10,11,
+483},{12,11,37},{13,11,447},{142,11,92},{4,0,520},{135,0,575},{8,0,990},{138,0,
+977},{135,11,774},{9,11,347},{11,11,24},{140,11,170},{136,11,379},{140,10,290},{
+132,11,328},{4,0,321},{134,0,569},{4,11,101},{135,11,1171},{7,0,723},{7,0,1135},
+{5,11,833},{136,11,744},{7,10,719},{8,10,809},{136,10,834},{8,0,921},{136,10,796
+},{5,10,210},{6,10,213},{7,10,60},{10,10,364},{139,10,135},{5,0,397},{6,0,154},{
+7,0,676},{8,0,443},{8,0,609},{9,0,24},{9,0,325},{10,0,35},{11,0,535},{11,0,672},
+{11,0,1018},{12,0,637},{16,0,30},{5,10,607},{8,10,326},{136,10,490},{4,10,701},{
+5,10,472},{6,11,9},{6,11,397},{7,11,53},{7,11,1742},{9,10,758},{10,11,632},{11,
+11,828},{140,11,146},{135,10,380},{135,10,1947},{148,11,109},{10,10,278},{138,11
+,278},{134,0,856},{7,0,139},{4,10,386},{8,10,405},{8,10,728},{9,10,497},{11,10,
+110},{11,10,360},{15,10,37},{144,10,84},{141,0,282},{133,0,981},{5,0,288},{7,10,
+1452},{7,10,1480},{8,10,634},{140,10,472},{7,0,1890},{8,11,367},{10,11,760},{14,
+11,79},{20,11,17},{152,11,0},{4,10,524},{136,10,810},{4,0,56},{7,0,1791},{8,0,
+607},{8,0,651},{11,0,465},{11,0,835},{12,0,337},{141,0,480},{10,10,238},{141,10,
+33},{11,11,417},{12,11,223},{140,11,265},{9,0,158},{10,0,411},{140,0,261},{133,
+10,532},{133,10,997},{12,11,186},{12,11,292},{14,11,100},{146,11,70},{6,0,1403},
+{136,0,617},{134,0,1205},{139,0,563},{4,0,242},{134,0,333},{4,11,186},{5,11,157}
+,{8,11,168},{138,11,6},{132,0,369},{133,11,875},{5,10,782},{5,10,829},{134,10,
+1738},{134,0,622},{135,11,1272},{6,0,1407},{7,11,111},{136,11,581},{7,10,1823},{
+139,10,693},{7,0,160},{10,0,624},{142,0,279},{132,0,363},{10,11,589},{12,11,111}
+,{13,11,260},{14,11,82},{18,11,63},{147,11,45},{7,11,1364},{7,11,1907},{141,11,
+158},{4,11,404},{4,11,659},{135,11,675},{13,11,211},{14,11,133},{14,11,204},{15,
+11,64},{15,11,69},{15,11,114},{16,11,10},{19,11,23},{19,11,35},{19,11,39},{19,11
+,51},{19,11,71},{19,11,75},{152,11,15},{4,10,78},{5,10,96},{5,10,182},{7,10,1724
+},{7,10,1825},{10,10,394},{10,10,471},{11,10,532},{14,10,340},{145,10,88},{135,
+10,1964},{133,11,391},{11,11,887},{14,11,365},{142,11,375},{5,11,540},{6,11,1697
+},{7,11,222},{136,11,341},{134,11,78},{9,0,601},{9,0,619},{10,0,505},{10,0,732},
+{11,0,355},{140,0,139},{134,0,292},{139,0,174},{5,0,177},{6,0,616},{7,0,827},{9,
+0,525},{138,0,656},{10,0,31},{6,10,215},{7,10,1028},{7,10,1473},{7,10,1721},{9,
+10,424},{138,10,779},{135,10,584},{136,11,293},{134,0,685},{135,11,1868},{133,11
+,460},{7,0,647},{6,10,67},{7,10,1630},{9,10,354},{9,10,675},{10,10,830},{14,10,
+80},{145,10,80},{4,0,161},{133,0,631},{6,10,141},{7,10,225},{9,10,59},{9,10,607}
+,{10,10,312},{11,10,687},{12,10,555},{13,10,373},{13,10,494},{148,10,58},{7,11,
+965},{7,11,1460},{135,11,1604},{136,10,783},{134,11,388},{6,0,722},{6,0,1267},{4
+,11,511},{9,11,333},{9,11,379},{10,11,602},{11,11,441},{11,11,723},{11,11,976},{
+140,11,357},{134,0,1797},{135,0,1684},{9,0,469},{9,0,709},{12,0,512},{14,0,65},{
+17,0,12},{5,11,938},{136,11,707},{7,0,1230},{136,0,531},{10,0,229},{11,0,73},{11
+,0,376},{139,0,433},{12,0,268},{12,0,640},{142,0,119},{7,10,430},{139,10,46},{6,
+0,558},{7,0,651},{8,0,421},{9,0,0},{10,0,34},{139,0,1008},{6,0,106},{7,0,1786},{
+7,0,1821},{9,0,102},{9,0,763},{5,10,602},{7,10,2018},{137,10,418},{5,0,65},{6,0,
+416},{7,0,1720},{7,0,1924},{10,0,109},{11,0,14},{11,0,70},{11,0,569},{11,0,735},
+{15,0,153},{20,0,80},{136,10,677},{135,11,1625},{137,11,772},{136,0,595},{6,11,
+469},{7,11,1709},{138,11,515},{7,0,1832},{138,0,374},{9,0,106},{9,0,163},{9,0,
+296},{10,0,167},{10,0,172},{10,0,777},{139,0,16},{6,0,6},{7,0,81},{7,0,771},{7,0
+,1731},{9,0,405},{138,0,421},{4,11,500},{135,11,938},{5,11,68},{134,11,383},{5,0
+,881},{133,0,885},{6,0,854},{6,0,1132},{6,0,1495},{6,0,1526},{6,0,1533},{134,0,
+1577},{4,11,337},{6,11,353},{7,11,1934},{8,11,488},{137,11,429},{7,11,236},{7,11
+,1795},{8,11,259},{9,11,135},{9,11,177},{10,11,825},{11,11,115},{11,11,370},{11,
+11,405},{11,11,604},{12,11,10},{12,11,667},{12,11,669},{13,11,76},{14,11,310},{
+15,11,76},{15,11,147},{148,11,23},{5,0,142},{134,0,546},{4,11,15},{5,11,22},{6,
+11,244},{7,11,40},{7,11,200},{7,11,906},{7,11,1199},{9,11,616},{10,11,716},{11,
+11,635},{11,11,801},{140,11,458},{5,0,466},{11,0,571},{12,0,198},{13,0,283},{14,
+0,186},{15,0,21},{15,0,103},{135,10,329},{4,0,185},{5,0,257},{5,0,839},{5,0,936}
+,{9,0,399},{10,0,258},{10,0,395},{10,0,734},{11,0,1014},{12,0,23},{13,0,350},{14
+,0,150},{19,0,6},{135,11,1735},{12,11,36},{141,11,337},{5,11,598},{7,11,791},{8,
+11,108},{137,11,123},{132,10,469},{7,0,404},{7,0,1377},{7,0,1430},{7,0,2017},{8,
+0,149},{8,0,239},{8,0,512},{8,0,793},{8,0,818},{9,0,474},{9,0,595},{10,0,122},{
+10,0,565},{10,0,649},{10,0,783},{11,0,239},{11,0,295},{11,0,447},{11,0,528},{11,
+0,639},{11,0,800},{12,0,25},{12,0,77},{12,0,157},{12,0,256},{12,0,316},{12,0,390
+},{12,0,391},{12,0,395},{12,0,478},{12,0,503},{12,0,592},{12,0,680},{13,0,50},{
+13,0,53},{13,0,132},{13,0,198},{13,0,322},{13,0,415},{13,0,511},{14,0,71},{14,0,
+395},{15,0,71},{15,0,136},{17,0,123},{18,0,93},{147,0,58},{136,0,712},{134,10,
+1743},{5,10,929},{6,10,340},{8,10,376},{136,10,807},{6,0,1848},{8,0,860},{10,0,
+856},{10,0,859},{10,0,925},{10,0,941},{140,0,762},{6,0,629},{6,0,906},{9,0,810},
+{140,0,652},{5,10,218},{7,10,1610},{138,10,83},{7,10,1512},{135,10,1794},{4,0,
+377},{24,0,13},{4,11,155},{7,11,1689},{11,10,0},{144,10,78},{4,11,164},{5,11,151
+},{5,11,730},{5,11,741},{7,11,498},{7,11,870},{7,11,1542},{12,11,213},{14,11,36}
+,{14,11,391},{17,11,111},{18,11,6},{18,11,46},{18,11,151},{19,11,36},{20,11,32},
+{20,11,56},{20,11,69},{20,11,102},{21,11,4},{22,11,8},{22,11,10},{22,11,14},{150
+,11,31},{7,0,1842},{133,10,571},{4,10,455},{4,11,624},{135,11,1752},{134,0,1501}
+,{4,11,492},{5,11,451},{6,10,161},{7,10,372},{137,10,597},{132,10,349},{4,0,180}
+,{135,0,1906},{135,11,835},{141,11,70},{132,0,491},{137,10,751},{6,10,432},{139,
+10,322},{4,0,171},{138,0,234},{6,11,113},{135,11,436},{4,0,586},{7,0,1186},{138,
+0,631},{5,10,468},{10,10,325},{11,10,856},{12,10,345},{143,10,104},{5,10,223},{
+10,11,592},{10,11,753},{12,11,317},{12,11,355},{12,11,465},{12,11,469},{12,11,
+560},{12,11,578},{141,11,243},{132,10,566},{135,11,520},{4,10,59},{135,10,1394},
+{6,10,436},{139,10,481},{9,0,931},{10,0,334},{20,0,71},{4,10,48},{5,10,271},{7,
+10,953},{135,11,1878},{11,0,170},{5,10,610},{136,10,457},{133,10,755},{6,0,1587}
+,{135,10,1217},{4,10,197},{149,11,26},{133,11,585},{137,11,521},{133,0,765},{133
+,10,217},{139,11,586},{133,0,424},{9,11,752},{12,11,610},{13,11,431},{16,11,59},
+{146,11,109},{136,0,714},{7,0,685},{132,11,307},{9,0,420},{10,0,269},{10,0,285},
+{10,0,576},{11,0,397},{13,0,175},{145,0,90},{132,0,429},{133,11,964},{9,11,463},
+{138,11,595},{7,0,18},{7,0,699},{7,0,1966},{8,0,752},{9,0,273},{9,0,412},{9,0,
+703},{10,0,71},{10,0,427},{138,0,508},{4,10,165},{7,10,1398},{135,10,1829},{4,0,
+53},{5,0,186},{7,0,752},{7,0,828},{142,0,116},{8,0,575},{10,0,289},{139,0,319},{
+132,0,675},{134,0,1424},{4,11,75},{5,11,180},{6,11,500},{7,11,58},{7,11,710},{
+138,11,645},{133,11,649},{6,11,276},{7,11,282},{7,11,879},{7,11,924},{8,11,459},
+{9,11,599},{9,11,754},{11,11,574},{12,11,128},{12,11,494},{13,11,52},{13,11,301}
+,{15,11,30},{143,11,132},{6,0,647},{134,0,1095},{5,10,9},{7,10,297},{7,10,966},{
+140,10,306},{132,11,200},{134,0,1334},{5,10,146},{6,10,411},{138,10,721},{6,0,
+209},{6,0,1141},{6,0,1288},{8,0,468},{9,0,210},{11,0,36},{12,0,28},{12,0,630},{
+13,0,21},{13,0,349},{14,0,7},{145,0,13},{6,10,177},{135,10,467},{4,0,342},{135,0
+,1179},{10,11,454},{140,11,324},{4,0,928},{133,0,910},{7,0,1838},{6,11,225},{137
+,11,211},{16,0,101},{20,0,115},{20,0,118},{148,0,122},{4,0,496},{135,0,856},{4,0
+,318},{11,0,654},{7,11,718},{139,11,102},{8,11,58},{9,11,724},{11,11,809},{13,11
+,113},{145,11,72},{5,10,200},{6,11,345},{135,11,1247},{8,11,767},{8,11,803},{9,
+11,301},{137,11,903},{7,0,915},{8,0,247},{19,0,0},{7,11,1949},{136,11,674},{4,0,
+202},{5,0,382},{6,0,454},{7,0,936},{7,0,1803},{8,0,758},{9,0,375},{9,0,895},{10,
+0,743},{10,0,792},{11,0,978},{11,0,1012},{142,0,109},{7,0,1150},{7,0,1425},{7,0,
+1453},{140,0,513},{134,11,259},{138,0,791},{11,0,821},{12,0,110},{12,0,153},{18,
+0,41},{150,0,19},{134,10,481},{132,0,796},{6,0,445},{9,0,909},{136,11,254},{10,0
+,776},{13,0,345},{142,0,425},{4,10,84},{7,10,1482},{10,10,76},{138,10,142},{135,
+11,742},{6,0,578},{133,10,1015},{6,0,1387},{4,10,315},{5,10,507},{135,10,1370},{
+4,0,438},{133,0,555},{136,0,766},{133,11,248},{134,10,1722},{4,11,116},{5,11,95}
+,{5,11,445},{7,11,1688},{8,11,29},{9,11,272},{11,11,509},{139,11,915},{135,0,541
+},{133,11,543},{8,10,222},{8,10,476},{9,10,238},{11,10,516},{11,10,575},{15,10,
+109},{146,10,100},{6,0,880},{134,0,1191},{5,11,181},{136,11,41},{134,0,1506},{
+132,11,681},{7,11,25},{8,11,202},{138,11,536},{139,0,983},{137,0,768},{132,0,584
+},{9,11,423},{140,11,89},{8,11,113},{9,11,877},{10,11,554},{11,11,83},{12,11,136
+},{147,11,109},{7,10,706},{7,10,1058},{138,10,538},{133,11,976},{4,11,206},{135,
+11,746},{136,11,526},{140,0,737},{11,10,92},{11,10,196},{11,10,409},{11,10,450},
+{11,10,666},{11,10,777},{12,10,262},{13,10,385},{13,10,393},{15,10,115},{16,10,
+45},{145,10,82},{4,0,226},{4,0,326},{7,0,1770},{4,11,319},{5,11,699},{138,11,673
+},{6,10,40},{135,10,1781},{5,0,426},{8,0,30},{9,0,2},{11,0,549},{147,0,122},{6,0
+,1161},{134,0,1329},{138,10,97},{6,10,423},{7,10,665},{135,10,1210},{7,11,13},{8
+,11,226},{10,11,537},{11,11,570},{11,11,605},{11,11,799},{11,11,804},{12,11,85},
+{12,11,516},{12,11,623},{13,11,112},{13,11,361},{14,11,77},{14,11,78},{17,11,28}
+,{147,11,110},{132,11,769},{132,11,551},{132,11,728},{147,0,117},{9,11,57},{9,11
+,459},{10,11,425},{11,11,119},{12,11,184},{12,11,371},{13,11,358},{145,11,51},{5
+,11,188},{5,11,814},{8,11,10},{9,11,421},{9,11,729},{10,11,609},{139,11,689},{
+134,11,624},{135,11,298},{135,0,462},{4,0,345},{139,10,624},{136,10,574},{4,0,
+385},{7,0,265},{135,0,587},{6,0,808},{132,11,528},{133,0,398},{132,10,354},{4,0,
+347},{5,0,423},{5,0,996},{135,0,1329},{135,10,1558},{7,0,1259},{9,0,125},{139,0,
+65},{5,0,136},{6,0,136},{136,0,644},{5,11,104},{6,11,173},{135,11,1631},{135,0,
+469},{133,10,830},{4,0,278},{5,0,465},{135,0,1367},{7,11,810},{8,11,138},{8,11,
+342},{9,11,84},{10,11,193},{11,11,883},{140,11,359},{5,10,496},{135,10,203},{4,0
+,433},{133,0,719},{6,11,95},{134,10,547},{5,10,88},{137,10,239},{6,11,406},{10,
+11,409},{10,11,447},{11,11,44},{140,11,100},{134,0,1423},{7,10,650},{135,10,1310
+},{134,0,749},{135,11,1243},{135,0,1363},{6,0,381},{7,0,645},{7,0,694},{8,0,546}
+,{7,10,1076},{9,10,80},{11,10,78},{11,10,421},{11,10,534},{140,10,545},{134,11,
+1636},{135,11,1344},{12,0,277},{7,10,274},{11,10,479},{139,10,507},{6,0,705},{6,
+0,783},{6,0,1275},{6,0,1481},{4,11,282},{7,11,1034},{11,11,398},{11,11,634},{12,
+11,1},{12,11,79},{12,11,544},{14,11,237},{17,11,10},{146,11,20},{134,0,453},{4,0
+,555},{8,0,536},{10,0,288},{11,0,1005},{4,10,497},{135,10,1584},{5,11,118},{5,11
+,499},{6,11,476},{7,11,600},{7,11,888},{135,11,1096},{138,0,987},{7,0,1107},{7,
+10,261},{7,10,1115},{7,10,1354},{7,10,1588},{7,10,1705},{7,10,1902},{9,10,465},{
+10,10,248},{10,10,349},{10,10,647},{11,10,527},{11,10,660},{11,10,669},{12,10,
+529},{141,10,305},{7,11,296},{7,11,596},{8,11,560},{8,11,586},{9,11,612},{11,11,
+100},{11,11,304},{12,11,46},{13,11,89},{14,11,112},{145,11,122},{9,0,370},{138,0
+,90},{136,10,13},{132,0,860},{7,10,642},{8,10,250},{11,10,123},{11,10,137},{13,
+10,48},{142,10,95},{135,10,1429},{137,11,321},{132,0,257},{135,0,2031},{7,0,1768
+},{7,11,1599},{7,11,1723},{8,11,79},{8,11,106},{8,11,190},{8,11,302},{8,11,383},
+{9,11,119},{9,11,233},{9,11,298},{9,11,419},{9,11,471},{10,11,181},{10,11,406},{
+11,11,57},{11,11,85},{11,11,120},{11,11,177},{11,11,296},{11,11,382},{11,11,454}
+,{11,11,758},{11,11,999},{12,11,27},{12,11,98},{12,11,131},{12,11,245},{12,11,
+312},{12,11,446},{12,11,454},{13,11,25},{13,11,98},{13,11,426},{13,11,508},{14,
+11,6},{14,11,163},{14,11,272},{14,11,277},{14,11,370},{15,11,95},{15,11,138},{15
+,11,167},{17,11,18},{17,11,38},{20,11,96},{149,11,32},{5,11,722},{134,11,1759},{
+145,11,16},{6,0,1071},{134,0,1561},{10,10,545},{140,10,301},{6,0,83},{6,0,1733},
+{135,0,1389},{4,0,835},{135,0,1818},{133,11,258},{4,10,904},{133,10,794},{134,0,
+2006},{5,11,30},{7,11,495},{8,11,134},{9,11,788},{140,11,438},{135,11,2004},{137
+,0,696},{5,11,50},{6,11,439},{7,11,780},{135,11,1040},{7,11,772},{7,11,1104},{7,
+11,1647},{11,11,269},{11,11,539},{11,11,607},{11,11,627},{11,11,706},{11,11,975}
+,{12,11,248},{12,11,311},{12,11,434},{12,11,600},{12,11,622},{13,11,297},{13,11,
+367},{13,11,485},{14,11,69},{14,11,409},{143,11,108},{5,11,1},{6,11,81},{138,11,
+520},{7,0,1718},{9,0,95},{9,0,274},{10,0,279},{10,0,317},{10,0,420},{11,0,303},{
+11,0,808},{12,0,134},{12,0,367},{13,0,149},{13,0,347},{14,0,349},{14,0,406},{18,
+0,22},{18,0,89},{18,0,122},{147,0,47},{5,11,482},{8,11,98},{9,11,172},{10,11,222
+},{10,11,700},{10,11,822},{11,11,302},{11,11,778},{12,11,50},{12,11,127},{12,11,
+396},{13,11,62},{13,11,328},{14,11,122},{147,11,72},{7,10,386},{138,10,713},{6,
+10,7},{6,10,35},{7,10,147},{7,10,1069},{7,10,1568},{7,10,1575},{7,10,1917},{8,10
+,43},{8,10,208},{9,10,128},{9,10,866},{10,10,20},{11,10,981},{147,10,33},{133,0,
+26},{132,0,550},{5,11,2},{7,11,1494},{136,11,589},{6,11,512},{7,11,797},{8,11,
+253},{9,11,77},{10,11,1},{10,11,129},{10,11,225},{11,11,118},{11,11,226},{11,11,
+251},{11,11,430},{11,11,701},{11,11,974},{11,11,982},{12,11,64},{12,11,260},{12,
+11,488},{140,11,690},{7,10,893},{141,10,424},{134,0,901},{136,0,822},{4,0,902},{
+5,0,809},{134,0,122},{6,0,807},{134,0,1366},{7,0,262},{5,11,748},{134,11,553},{
+133,0,620},{4,0,34},{5,0,574},{7,0,279},{7,0,1624},{136,0,601},{9,0,170},{6,10,
+322},{9,10,552},{11,10,274},{13,10,209},{13,10,499},{14,10,85},{15,10,126},{145,
+10,70},{132,0,537},{4,11,12},{7,11,420},{7,11,522},{7,11,809},{8,11,797},{141,11
+,88},{133,0,332},{8,10,83},{8,10,742},{8,10,817},{9,10,28},{9,10,29},{9,10,885},
+{10,10,387},{11,10,633},{11,10,740},{13,10,235},{13,10,254},{15,10,143},{143,10,
+146},{6,0,1909},{9,0,964},{12,0,822},{12,0,854},{12,0,865},{12,0,910},{12,0,938}
+,{15,0,169},{15,0,208},{15,0,211},{18,0,205},{18,0,206},{18,0,220},{18,0,223},{
+152,0,24},{140,10,49},{5,11,528},{135,11,1580},{6,0,261},{8,0,182},{139,0,943},{
+134,0,1721},{4,0,933},{133,0,880},{136,11,321},{5,11,266},{9,11,290},{9,11,364},
+{10,11,293},{11,11,606},{142,11,45},{6,0,1609},{4,11,50},{6,11,510},{6,11,594},{
+9,11,121},{10,11,49},{10,11,412},{139,11,834},{7,0,895},{136,11,748},{132,11,466
+},{4,10,110},{10,10,415},{10,10,597},{142,10,206},{133,0,812},{135,11,281},{6,0,
+1890},{6,0,1902},{6,0,1916},{9,0,929},{9,0,942},{9,0,975},{9,0,984},{9,0,986},{9
+,0,1011},{9,0,1019},{12,0,804},{12,0,851},{12,0,867},{12,0,916},{12,0,923},{15,0
+,194},{15,0,204},{15,0,210},{15,0,222},{15,0,223},{15,0,229},{15,0,250},{18,0,
+179},{18,0,186},{18,0,192},{7,10,205},{135,10,2000},{132,11,667},{135,0,778},{4,
+0,137},{7,0,1178},{135,0,1520},{134,0,1314},{4,11,242},{134,11,333},{6,0,1661},{
+7,0,1975},{7,0,2009},{135,0,2011},{134,0,1591},{4,10,283},{135,10,1194},{11,0,
+820},{150,0,51},{4,11,39},{5,11,36},{7,11,1843},{8,11,407},{11,11,144},{140,11,
+523},{134,10,1720},{4,11,510},{7,11,29},{7,11,66},{7,11,1980},{10,11,487},{10,11
+,809},{146,11,9},{5,0,89},{7,0,1915},{9,0,185},{9,0,235},{10,0,64},{10,0,270},{
+10,0,403},{10,0,469},{10,0,529},{10,0,590},{11,0,140},{11,0,860},{13,0,1},{13,0,
+422},{14,0,341},{14,0,364},{17,0,93},{18,0,113},{19,0,97},{147,0,113},{133,0,695
+},{6,0,987},{134,0,1160},{5,0,6},{6,0,183},{7,0,680},{7,0,978},{7,0,1013},{7,0,
+1055},{12,0,230},{13,0,172},{146,0,29},{134,11,570},{132,11,787},{134,11,518},{6
+,0,29},{139,0,63},{132,11,516},{136,11,821},{132,0,311},{134,0,1740},{7,0,170},{
+8,0,90},{8,0,177},{8,0,415},{11,0,714},{14,0,281},{136,10,735},{134,0,1961},{135
+,11,1405},{4,11,10},{7,11,917},{139,11,786},{5,10,132},{9,10,486},{9,10,715},{10
+,10,458},{11,10,373},{11,10,668},{11,10,795},{11,10,897},{12,10,272},{12,10,424}
+,{12,10,539},{12,10,558},{14,10,245},{14,10,263},{14,10,264},{14,10,393},{142,10
+,403},{11,0,91},{13,0,129},{15,0,101},{145,0,125},{135,0,1132},{4,0,494},{6,0,74
+},{7,0,44},{7,0,407},{12,0,17},{15,0,5},{148,0,11},{133,10,379},{5,0,270},{5,11,
+684},{6,10,89},{6,10,400},{7,10,1569},{7,10,1623},{7,10,1850},{8,10,218},{8,10,
+422},{9,10,570},{138,10,626},{4,0,276},{133,0,296},{6,0,1523},{134,11,27},{6,10,
+387},{7,10,882},{141,10,111},{6,10,224},{7,10,877},{137,10,647},{135,10,790},{4,
+0,7},{5,0,90},{5,0,158},{6,0,542},{7,0,221},{7,0,1574},{9,0,490},{10,0,540},{11,
+0,443},{139,0,757},{7,0,588},{9,0,175},{138,0,530},{135,10,394},{142,11,23},{134
+,0,786},{135,0,580},{7,0,88},{136,0,627},{5,0,872},{6,0,57},{7,0,471},{9,0,447},
+{137,0,454},{6,11,342},{6,11,496},{8,11,275},{137,11,206},{4,11,909},{133,11,940
+},{6,0,735},{132,11,891},{8,0,845},{8,0,916},{135,10,1409},{5,0,31},{134,0,614},
+{11,0,458},{12,0,15},{140,0,432},{8,0,330},{140,0,477},{4,0,530},{5,0,521},{7,0,
+1200},{10,0,460},{132,11,687},{6,0,424},{135,0,1866},{9,0,569},{12,0,12},{12,0,
+81},{12,0,319},{13,0,69},{14,0,259},{16,0,87},{17,0,1},{17,0,21},{17,0,24},{18,0
+,15},{18,0,56},{18,0,59},{18,0,127},{18,0,154},{19,0,19},{148,0,31},{7,0,1302},{
+136,10,38},{134,11,253},{5,10,261},{7,10,78},{7,10,199},{8,10,815},{9,10,126},{
+138,10,342},{5,0,595},{135,0,1863},{6,11,41},{141,11,160},{5,0,13},{134,0,142},{
+6,0,97},{7,0,116},{8,0,322},{8,0,755},{9,0,548},{10,0,714},{11,0,884},{13,0,324}
+,{7,11,1304},{138,11,477},{132,10,628},{134,11,1718},{7,10,266},{136,10,804},{
+135,10,208},{7,0,1021},{6,10,79},{135,10,1519},{7,0,1472},{135,0,1554},{6,11,362
+},{146,11,51},{7,0,1071},{7,0,1541},{7,0,1767},{7,0,1806},{11,0,162},{11,0,242},
+{11,0,452},{12,0,605},{15,0,26},{144,0,44},{136,10,741},{133,11,115},{145,0,115}
+,{134,10,376},{6,0,1406},{134,0,1543},{5,11,193},{12,11,178},{13,11,130},{145,11
+,84},{135,0,1111},{8,0,1},{9,0,650},{10,0,326},{5,11,705},{137,11,606},{5,0,488}
+,{6,0,527},{7,0,489},{7,0,1636},{8,0,121},{8,0,144},{8,0,359},{9,0,193},{9,0,241
+},{9,0,336},{9,0,882},{11,0,266},{11,0,372},{11,0,944},{12,0,401},{140,0,641},{
+135,11,174},{6,0,267},{7,10,244},{7,10,632},{7,10,1609},{8,10,178},{8,10,638},{
+141,10,58},{134,0,1983},{134,0,1155},{134,0,1575},{134,0,1438},{9,0,31},{10,0,
+244},{10,0,699},{12,0,149},{141,0,497},{133,0,377},{4,11,122},{5,11,796},{5,11,
+952},{6,11,1660},{6,11,1671},{8,11,567},{9,11,687},{9,11,742},{10,11,686},{11,11
+,356},{11,11,682},{140,11,281},{145,0,101},{11,11,0},{144,11,78},{5,11,179},{5,
+10,791},{7,11,1095},{135,11,1213},{8,11,372},{9,11,122},{138,11,175},{7,10,686},
+{8,10,33},{8,10,238},{10,10,616},{11,10,467},{11,10,881},{13,10,217},{13,10,253}
+,{142,10,268},{9,0,476},{4,11,66},{7,11,722},{135,11,904},{7,11,352},{137,11,684
+},{135,0,2023},{135,0,1836},{132,10,447},{5,0,843},{144,0,35},{137,11,779},{141,
+11,35},{4,10,128},{5,10,415},{6,10,462},{7,10,294},{7,10,578},{10,10,710},{139,
+10,86},{132,0,554},{133,0,536},{136,10,587},{5,0,207},{9,0,79},{11,0,625},{145,0
+,7},{7,0,1371},{6,10,427},{138,10,692},{4,0,424},{4,10,195},{135,10,802},{8,0,
+785},{133,11,564},{135,0,336},{4,0,896},{6,0,1777},{134,11,556},{137,11,103},{
+134,10,1683},{7,11,544},{8,11,719},{138,11,61},{138,10,472},{4,11,5},{5,11,498},
+{136,11,637},{7,0,750},{9,0,223},{11,0,27},{11,0,466},{12,0,624},{14,0,265},{146
+,0,61},{12,0,238},{18,0,155},{12,11,238},{146,11,155},{151,10,28},{133,11,927},{
+12,0,383},{5,10,3},{8,10,578},{9,10,118},{10,10,705},{141,10,279},{4,11,893},{5,
+11,780},{133,11,893},{4,0,603},{133,0,661},{4,0,11},{6,0,128},{7,0,231},{7,0,
+1533},{10,0,725},{5,10,229},{5,11,238},{135,11,1350},{8,10,102},{10,10,578},{10,
+10,672},{12,10,496},{13,10,408},{14,10,121},{145,10,106},{132,0,476},{134,0,1552
+},{134,11,1729},{8,10,115},{8,10,350},{9,10,489},{10,10,128},{11,10,306},{12,10,
+373},{14,10,30},{17,10,79},{19,10,80},{150,10,55},{135,0,1807},{4,0,680},{4,11,
+60},{7,11,760},{7,11,1800},{8,11,314},{9,11,700},{139,11,487},{4,10,230},{5,10,
+702},{148,11,94},{132,11,228},{139,0,435},{9,0,20},{10,0,324},{10,0,807},{139,0,
+488},{6,10,1728},{136,11,419},{4,10,484},{18,10,26},{19,10,42},{20,10,43},{21,10
+,0},{23,10,27},{152,10,14},{135,0,1431},{133,11,828},{5,0,112},{6,0,103},{6,0,
+150},{7,0,1303},{9,0,292},{10,0,481},{20,0,13},{7,11,176},{7,11,178},{7,11,1110}
+,{10,11,481},{148,11,13},{138,0,356},{4,11,51},{5,11,39},{6,11,4},{7,11,591},{7,
+11,849},{7,11,951},{7,11,1129},{7,11,1613},{7,11,1760},{7,11,1988},{9,11,434},{
+10,11,754},{11,11,25},{11,11,37},{139,11,414},{6,0,1963},{134,0,2000},{132,10,
+633},{6,0,1244},{133,11,902},{135,11,928},{140,0,18},{138,0,204},{135,11,1173},{
+134,0,867},{4,0,708},{8,0,15},{9,0,50},{9,0,386},{11,0,18},{11,0,529},{140,0,228
+},{134,11,270},{4,0,563},{7,0,109},{7,0,592},{7,0,637},{7,0,770},{8,0,463},{9,0,
+60},{9,0,335},{9,0,904},{10,0,73},{11,0,434},{12,0,585},{13,0,331},{18,0,110},{
+148,0,60},{132,0,502},{14,11,359},{19,11,52},{148,11,47},{6,11,377},{7,11,1025},
+{9,11,613},{145,11,104},{6,0,347},{10,0,161},{5,10,70},{5,10,622},{6,10,334},{7,
+10,1032},{9,10,171},{11,10,26},{11,10,213},{11,10,637},{11,10,707},{12,10,202},{
+12,10,380},{13,10,226},{13,10,355},{14,10,222},{145,10,42},{132,11,416},{4,0,33}
+,{5,0,102},{6,0,284},{7,0,1079},{7,0,1423},{7,0,1702},{8,0,470},{9,0,554},{9,0,
+723},{11,0,333},{142,11,372},{5,11,152},{5,11,197},{7,11,340},{7,11,867},{10,11,
+548},{10,11,581},{11,11,6},{12,11,3},{12,11,19},{14,11,110},{142,11,289},{7,0,
+246},{135,0,840},{6,0,10},{8,0,571},{9,0,739},{143,0,91},{6,0,465},{7,0,1465},{4
+,10,23},{4,10,141},{5,10,313},{5,10,1014},{6,10,50},{7,10,142},{7,10,559},{8,10,
+640},{9,10,460},{9,10,783},{11,10,741},{12,10,183},{141,10,488},{133,0,626},{136
+,0,614},{138,0,237},{7,11,34},{7,11,190},{8,11,28},{8,11,141},{8,11,444},{8,11,
+811},{9,11,468},{11,11,334},{12,11,24},{12,11,386},{140,11,576},{133,11,757},{5,
+0,18},{6,0,526},{13,0,24},{13,0,110},{19,0,5},{147,0,44},{6,0,506},{134,11,506},
+{135,11,1553},{4,0,309},{5,0,462},{7,0,970},{7,0,1097},{22,0,30},{22,0,33},{7,11
+,1385},{11,11,582},{11,11,650},{11,11,901},{11,11,949},{12,11,232},{12,11,236},{
+13,11,413},{13,11,501},{146,11,116},{9,0,140},{5,10,222},{138,10,534},{6,0,1056}
+,{137,10,906},{134,0,1704},{138,10,503},{134,0,1036},{5,10,154},{7,10,1491},{10,
+10,379},{138,10,485},{4,11,383},{133,10,716},{134,0,1315},{5,0,86},{7,0,743},{9,
+0,85},{10,0,281},{10,0,432},{11,0,825},{12,0,251},{13,0,118},{142,0,378},{8,0,
+264},{4,10,91},{5,10,388},{5,10,845},{6,10,206},{6,10,252},{6,10,365},{7,10,136}
+,{7,10,531},{136,10,621},{5,0,524},{133,0,744},{5,11,277},{141,11,247},{132,11,
+435},{10,0,107},{140,0,436},{132,0,927},{10,0,123},{12,0,670},{146,0,94},{7,0,
+1149},{9,0,156},{138,0,957},{5,11,265},{6,11,212},{135,11,28},{133,0,778},{133,0
+,502},{8,0,196},{10,0,283},{139,0,406},{135,10,576},{136,11,535},{134,0,1312},{5
+,10,771},{5,10,863},{5,10,898},{6,10,1632},{6,10,1644},{134,10,1780},{5,0,855},{
+5,10,331},{135,11,1487},{132,11,702},{5,11,808},{135,11,2045},{7,0,1400},{9,0,
+446},{138,0,45},{140,10,632},{132,0,1003},{5,11,166},{8,11,739},{140,11,511},{5,
+10,107},{7,10,201},{136,10,518},{6,10,446},{135,10,1817},{134,0,1532},{134,0,
+1097},{4,11,119},{5,11,170},{5,11,447},{7,11,1708},{7,11,1889},{9,11,357},{9,11,
+719},{12,11,486},{140,11,596},{9,10,851},{141,10,510},{7,0,612},{8,0,545},{8,0,
+568},{8,0,642},{9,0,717},{10,0,541},{10,0,763},{11,0,449},{12,0,489},{13,0,153},
+{13,0,296},{14,0,138},{14,0,392},{15,0,50},{16,0,6},{16,0,12},{20,0,9},{132,10,
+504},{4,11,450},{135,11,1158},{11,0,54},{13,0,173},{13,0,294},{5,10,883},{5,10,
+975},{8,10,392},{148,10,7},{13,0,455},{15,0,99},{15,0,129},{144,0,68},{135,0,172
+},{132,11,754},{5,10,922},{134,10,1707},{134,0,1029},{17,11,39},{148,11,36},{4,0
+,568},{5,10,993},{7,10,515},{137,10,91},{132,0,732},{10,0,617},{138,11,617},{134
+,0,974},{7,0,989},{10,0,377},{12,0,363},{13,0,68},{13,0,94},{14,0,108},{142,0,
+306},{136,0,733},{132,0,428},{7,0,1789},{135,11,1062},{7,0,2015},{140,0,665},{
+135,10,1433},{5,0,287},{7,10,921},{8,10,580},{8,10,593},{8,10,630},{138,10,28},{
+138,0,806},{4,10,911},{5,10,867},{5,10,1013},{7,10,2034},{8,10,798},{136,10,813}
+,{134,0,1539},{8,11,523},{150,11,34},{135,11,740},{7,11,238},{7,11,2033},{8,11,
+120},{8,11,188},{8,11,659},{9,11,598},{10,11,466},{12,11,342},{12,11,588},{13,11
+,503},{14,11,246},{143,11,92},{7,0,1563},{141,0,182},{5,10,135},{6,10,519},{7,10
+,1722},{10,10,271},{11,10,261},{145,10,54},{14,10,338},{148,10,81},{7,0,484},{4,
+10,300},{133,10,436},{145,11,114},{6,0,1623},{134,0,1681},{133,11,640},{4,11,201
+},{7,11,1744},{8,11,602},{11,11,247},{11,11,826},{145,11,65},{8,11,164},{146,11,
+62},{6,0,1833},{6,0,1861},{136,0,878},{134,0,1569},{8,10,357},{10,10,745},{14,10
+,426},{17,10,94},{147,10,57},{12,0,93},{12,0,501},{13,0,362},{14,0,151},{15,0,40
+},{15,0,59},{16,0,46},{17,0,25},{18,0,14},{18,0,134},{19,0,25},{19,0,69},{20,0,
+16},{20,0,19},{20,0,66},{21,0,23},{21,0,25},{150,0,42},{6,0,1748},{8,0,715},{9,0
+,802},{10,0,46},{10,0,819},{13,0,308},{14,0,351},{14,0,363},{146,0,67},{132,0,
+994},{4,0,63},{133,0,347},{132,0,591},{133,0,749},{7,11,1577},{10,11,304},{10,11
+,549},{11,11,424},{12,11,365},{13,11,220},{13,11,240},{142,11,33},{133,0,366},{7
+,0,557},{12,0,547},{14,0,86},{133,10,387},{135,0,1747},{132,11,907},{5,11,100},{
+10,11,329},{12,11,416},{149,11,29},{4,10,6},{5,10,708},{136,10,75},{7,10,1351},{
+9,10,581},{10,10,639},{11,10,453},{140,10,584},{7,0,89},{132,10,303},{138,10,772
+},{132,11,176},{5,11,636},{5,11,998},{8,11,26},{137,11,358},{7,11,9},{7,11,1508}
+,{9,11,317},{10,11,210},{10,11,292},{10,11,533},{11,11,555},{12,11,526},{12,11,
+607},{13,11,263},{13,11,459},{142,11,271},{134,0,1463},{6,0,772},{6,0,1137},{139
+,11,595},{7,0,977},{139,11,66},{138,0,893},{20,0,48},{148,11,48},{5,0,824},{133,
+0,941},{134,11,295},{7,0,1543},{7,0,1785},{10,0,690},{4,10,106},{139,10,717},{7,
+0,440},{8,0,230},{139,0,106},{5,10,890},{133,10,988},{6,10,626},{142,10,431},{10
+,11,127},{141,11,27},{17,0,32},{10,10,706},{150,10,44},{132,0,216},{137,0,332},{
+4,10,698},{136,11,119},{139,11,267},{138,10,17},{11,11,526},{11,11,939},{141,11,
+290},{7,11,1167},{11,11,934},{13,11,391},{145,11,76},{139,11,39},{134,10,84},{4,
+0,914},{5,0,800},{133,0,852},{10,0,416},{141,0,115},{7,0,564},{142,0,168},{4,0,
+918},{133,0,876},{134,0,1764},{152,0,3},{4,0,92},{5,0,274},{7,11,126},{136,11,84
+},{140,10,498},{136,11,790},{8,0,501},{5,10,986},{6,10,130},{7,10,1582},{8,10,
+458},{10,10,101},{10,10,318},{138,10,823},{6,11,64},{12,11,377},{141,11,309},{5,
+0,743},{138,0,851},{4,0,49},{7,0,280},{135,0,1633},{134,0,879},{136,0,47},{7,10,
+1644},{137,10,129},{132,0,865},{134,0,1202},{9,11,34},{139,11,484},{135,10,997},
+{5,0,272},{5,0,908},{5,0,942},{8,0,197},{9,0,47},{11,0,538},{139,0,742},{6,11,
+1700},{7,11,26},{7,11,293},{7,11,382},{7,11,1026},{7,11,1087},{7,11,2027},{8,11,
+24},{8,11,114},{8,11,252},{8,11,727},{8,11,729},{9,11,30},{9,11,199},{9,11,231},
+{9,11,251},{9,11,334},{9,11,361},{9,11,488},{9,11,712},{10,11,55},{10,11,60},{10
+,11,232},{10,11,332},{10,11,384},{10,11,396},{10,11,504},{10,11,542},{10,11,652}
+,{11,11,20},{11,11,48},{11,11,207},{11,11,291},{11,11,298},{11,11,342},{11,11,
+365},{11,11,394},{11,11,620},{11,11,705},{11,11,1017},{12,11,123},{12,11,340},{
+12,11,406},{12,11,643},{13,11,61},{13,11,269},{13,11,311},{13,11,319},{13,11,486
+},{14,11,234},{15,11,62},{15,11,85},{16,11,71},{18,11,119},{148,11,105},{6,0,
+1455},{150,11,37},{135,10,1927},{135,0,1911},{137,0,891},{7,10,1756},{137,10,98}
+,{7,10,1046},{139,10,160},{132,0,761},{6,11,379},{7,11,270},{7,11,1116},{8,11,
+176},{8,11,183},{9,11,432},{9,11,661},{12,11,247},{12,11,617},{146,11,125},{6,10
+,45},{7,10,433},{8,10,129},{9,10,21},{10,10,392},{11,10,79},{12,10,499},{13,10,
+199},{141,10,451},{4,0,407},{5,11,792},{133,11,900},{132,0,560},{135,0,183},{13,
+0,490},{7,10,558},{136,10,353},{4,0,475},{6,0,731},{11,0,35},{13,0,71},{13,0,177
+},{14,0,422},{133,10,785},{8,10,81},{9,10,189},{9,10,201},{11,10,478},{11,10,712
+},{141,10,338},{4,0,418},{4,0,819},{133,10,353},{151,10,26},{4,11,901},{133,11,
+776},{132,0,575},{7,0,818},{16,0,92},{17,0,14},{17,0,45},{18,0,75},{148,0,18},{6
+,0,222},{7,0,636},{7,0,1620},{8,0,409},{9,0,693},{139,0,77},{6,10,25},{7,10,855}
+,{7,10,1258},{144,10,32},{6,0,1880},{6,0,1887},{6,0,1918},{6,0,1924},{9,0,967},{
+9,0,995},{9,0,1015},{12,0,826},{12,0,849},{12,0,857},{12,0,860},{12,0,886},{12,0
+,932},{18,0,228},{18,0,231},{146,0,240},{134,0,633},{134,0,1308},{4,11,37},{5,11
+,334},{135,11,1253},{10,0,86},{4,10,4},{7,10,1118},{7,10,1320},{7,10,1706},{8,10
+,277},{9,10,622},{11,10,724},{12,10,350},{12,10,397},{13,10,28},{13,10,159},{15,
+10,89},{18,10,5},{19,10,9},{20,10,34},{150,10,47},{132,11,508},{137,11,448},{12,
+11,107},{146,11,31},{132,0,817},{134,0,663},{133,0,882},{134,0,914},{132,11,540}
+,{132,11,533},{136,11,608},{8,0,885},{138,0,865},{132,0,426},{6,0,58},{7,0,745},
+{7,0,1969},{8,0,399},{8,0,675},{9,0,479},{9,0,731},{10,0,330},{10,0,593},{10,0,
+817},{11,0,32},{11,0,133},{11,0,221},{145,0,68},{134,10,255},{7,0,102},{137,0,
+538},{137,10,216},{7,11,253},{136,11,549},{135,11,912},{9,10,183},{139,10,286},{
+11,10,956},{151,10,3},{8,11,527},{18,11,60},{147,11,24},{4,10,536},{7,10,1141},{
+10,10,723},{139,10,371},{133,11,920},{7,0,876},{135,10,285},{135,10,560},{132,10
+,690},{142,11,126},{11,10,33},{12,10,571},{149,10,1},{133,0,566},{9,0,139},{10,0
+,399},{11,0,469},{12,0,634},{13,0,223},{132,11,483},{6,0,48},{135,0,63},{18,0,12
+},{7,10,1862},{12,10,491},{12,10,520},{13,10,383},{142,10,244},{135,11,1665},{
+132,11,448},{9,11,495},{146,11,104},{6,0,114},{7,0,1224},{7,0,1556},{136,0,3},{4
+,10,190},{133,10,554},{8,0,576},{9,0,267},{133,10,1001},{133,10,446},{133,0,933}
+,{139,11,1009},{8,11,653},{13,11,93},{147,11,14},{6,0,692},{6,0,821},{134,0,1077
+},{5,11,172},{135,11,801},{138,0,752},{4,0,375},{134,0,638},{134,0,1011},{140,11
+,540},{9,0,96},{133,11,260},{139,11,587},{135,10,1231},{12,0,30},{13,0,148},{14,
+0,87},{14,0,182},{16,0,42},{20,0,70},{132,10,304},{6,0,1398},{7,0,56},{7,0,1989}
+,{8,0,337},{8,0,738},{9,0,600},{12,0,37},{13,0,447},{142,0,92},{138,0,666},{5,0,
+394},{7,0,487},{136,0,246},{9,0,437},{6,10,53},{6,10,199},{7,10,1408},{8,10,32},
+{8,10,93},{10,10,397},{10,10,629},{11,10,593},{11,10,763},{13,10,326},{145,10,35
+},{134,10,105},{9,0,320},{10,0,506},{138,10,794},{7,11,57},{8,11,167},{8,11,375}
+,{9,11,82},{9,11,561},{10,11,620},{10,11,770},{11,10,704},{141,10,396},{6,0,1003
+},{5,10,114},{5,10,255},{141,10,285},{7,0,866},{135,0,1163},{133,11,531},{132,0,
+328},{7,10,2035},{8,10,19},{9,10,89},{138,10,831},{8,11,194},{136,11,756},{136,0
+,1000},{5,11,453},{134,11,441},{4,0,101},{5,0,833},{7,0,1171},{136,0,744},{133,0
+,726},{136,10,746},{138,0,176},{6,0,9},{6,0,397},{7,0,53},{7,0,1742},{10,0,632},
+{11,0,828},{140,0,146},{135,11,22},{145,11,64},{132,0,839},{11,0,417},{12,0,223}
+,{140,0,265},{4,11,102},{7,11,815},{7,11,1699},{139,11,964},{5,10,955},{136,10,
+814},{6,0,1931},{6,0,2007},{18,0,246},{146,0,247},{8,0,198},{11,0,29},{140,0,534
+},{135,0,1771},{6,0,846},{7,11,1010},{11,11,733},{11,11,759},{12,11,563},{13,11,
+34},{14,11,101},{18,11,45},{146,11,129},{4,0,186},{5,0,157},{8,0,168},{138,0,6},
+{132,11,899},{133,10,56},{148,10,100},{133,0,875},{5,0,773},{5,0,991},{6,0,1635}
+,{134,0,1788},{6,0,1274},{9,0,477},{141,0,78},{4,0,639},{7,0,111},{8,0,581},{12,
+0,177},{6,11,52},{9,11,104},{9,11,559},{10,10,4},{10,10,13},{11,10,638},{12,11,
+308},{19,11,87},{148,10,57},{132,11,604},{4,11,301},{133,10,738},{133,10,758},{
+134,0,1747},{7,11,1440},{11,11,854},{11,11,872},{11,11,921},{12,11,551},{13,11,
+472},{142,11,367},{7,0,1364},{7,0,1907},{141,0,158},{134,0,873},{4,0,404},{4,0,
+659},{7,0,552},{135,0,675},{135,10,1112},{139,10,328},{7,11,508},{137,10,133},{
+133,0,391},{5,10,110},{6,10,169},{6,10,1702},{7,10,400},{8,10,538},{9,10,184},{9
+,10,524},{140,10,218},{6,11,310},{7,11,1849},{8,11,72},{8,11,272},{8,11,431},{9,
+11,12},{9,11,351},{10,11,563},{10,11,630},{10,11,810},{11,11,367},{11,11,599},{
+11,11,686},{140,11,672},{5,0,540},{6,0,1697},{136,0,668},{132,0,883},{134,0,78},
+{12,0,628},{18,0,79},{6,10,133},{9,10,353},{139,10,993},{6,11,181},{7,11,537},{8
+,11,64},{9,11,127},{10,11,496},{12,11,510},{141,11,384},{6,10,93},{7,10,1422},{7
+,10,1851},{8,10,673},{9,10,529},{140,10,43},{137,10,371},{134,0,1460},{134,0,962
+},{4,11,244},{135,11,233},{9,10,25},{10,10,467},{138,10,559},{4,10,335},{135,10,
+942},{133,0,460},{135,11,334},{134,11,1650},{4,0,199},{139,0,34},{5,10,601},{8,
+10,39},{10,10,773},{11,10,84},{12,10,205},{142,10,1},{133,10,870},{134,0,388},{
+14,0,474},{148,0,120},{133,11,369},{139,0,271},{4,0,511},{9,0,333},{9,0,379},{10
+,0,602},{11,0,441},{11,0,723},{11,0,976},{12,0,357},{132,10,181},{134,0,608},{
+134,10,1652},{22,0,49},{137,11,338},{140,0,988},{134,0,617},{5,0,938},{136,0,707
+},{132,10,97},{5,10,147},{6,10,286},{7,10,1362},{141,10,176},{6,0,756},{134,0,
+1149},{133,11,896},{6,10,375},{7,10,169},{7,10,254},{136,10,780},{134,0,1583},{
+135,10,1447},{139,0,285},{7,11,1117},{8,11,393},{136,11,539},{135,0,344},{6,0,
+469},{7,0,1709},{138,0,515},{5,10,629},{135,10,1549},{5,11,4},{5,11,810},{6,11,
+13},{6,11,538},{6,11,1690},{6,11,1726},{7,11,499},{7,11,1819},{8,11,148},{8,11,
+696},{8,11,791},{12,11,125},{13,11,54},{143,11,9},{135,11,1268},{137,0,404},{132
+,0,500},{5,0,68},{134,0,383},{11,0,216},{139,0,340},{4,11,925},{5,11,803},{8,11,
+698},{138,11,828},{4,0,337},{6,0,353},{7,0,1934},{8,0,488},{137,0,429},{7,0,236}
+,{7,0,1795},{8,0,259},{9,0,135},{9,0,177},{9,0,860},{10,0,825},{11,0,115},{11,0,
+370},{11,0,405},{11,0,604},{12,0,10},{12,0,667},{12,0,669},{13,0,76},{14,0,310},
+{15,0,76},{15,0,147},{148,0,23},{4,0,15},{4,0,490},{5,0,22},{6,0,244},{7,0,40},{
+7,0,200},{7,0,906},{7,0,1199},{9,0,616},{10,0,716},{11,0,635},{11,0,801},{140,0,
+458},{12,0,756},{132,10,420},{134,0,1504},{6,0,757},{133,11,383},{6,0,1266},{135
+,0,1735},{5,0,598},{7,0,791},{8,0,108},{9,0,123},{7,10,1570},{140,10,542},{142,
+11,410},{9,11,660},{138,11,347}
+};
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_STATIC_DICT_LUT_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c
new file mode 100644
index 0000000000..04a7805161
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.c
@@ -0,0 +1,85 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Heuristics for deciding about the UTF8-ness of strings. */
+
+#include "./utf8_util.h"
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static size_t BrotliParseAsUTF8(
+    int* symbol, const uint8_t* input, size_t size) {
+  /* ASCII */
+  if ((input[0] & 0x80) == 0) {
+    *symbol = input[0];
+    if (*symbol > 0) {
+      return 1;
+    }
+  }
+  /* 2-byte UTF8 */
+  if (size > 1u &&
+      (input[0] & 0xE0) == 0xC0 &&
+      (input[1] & 0xC0) == 0x80) {
+    *symbol = (((input[0] & 0x1F) << 6) |
+               (input[1] & 0x3F));
+    if (*symbol > 0x7F) {
+      return 2;
+    }
+  }
+  /* 3-byte UFT8 */
+  if (size > 2u &&
+      (input[0] & 0xF0) == 0xE0 &&
+      (input[1] & 0xC0) == 0x80 &&
+      (input[2] & 0xC0) == 0x80) {
+    *symbol = (((input[0] & 0x0F) << 12) |
+               ((input[1] & 0x3F) << 6) |
+               (input[2] & 0x3F));
+    if (*symbol > 0x7FF) {
+      return 3;
+    }
+  }
+  /* 4-byte UFT8 */
+  if (size > 3u &&
+      (input[0] & 0xF8) == 0xF0 &&
+      (input[1] & 0xC0) == 0x80 &&
+      (input[2] & 0xC0) == 0x80 &&
+      (input[3] & 0xC0) == 0x80) {
+    *symbol = (((input[0] & 0x07) << 18) |
+               ((input[1] & 0x3F) << 12) |
+               ((input[2] & 0x3F) << 6) |
+               (input[3] & 0x3F));
+    if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
+      return 4;
+    }
+  }
+  /* Not UTF8, emit a special symbol above the UTF8-code space */
+  *symbol = 0x110000 | input[0];
+  return 1;
+}
+
+/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
+BROTLI_BOOL BrotliIsMostlyUTF8(
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length, const double min_fraction) {
+  size_t size_utf8 = 0;
+  size_t i = 0;
+  while (i < length) {
+    int symbol;
+    size_t bytes_read =
+        BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
+    i += bytes_read;
+    if (symbol < 0x110000) size_utf8 += bytes_read;
+  }
+  return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.h
new file mode 100644
index 0000000000..8fda80c220
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/utf8_util.h
@@ -0,0 +1,32 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Heuristics for deciding about the UTF8-ness of strings. */
+
+#ifndef BROTLI_ENC_UTF8_UTIL_H_
+#define BROTLI_ENC_UTF8_UTIL_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const double kMinUTF8Ratio = 0.75;
+
+/* Returns 1 if at least min_fraction of the bytes between pos and
+   pos + length in the (data, mask) ring-buffer is UTF8-encoded, otherwise
+   returns 0. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliIsMostlyUTF8(
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length, const double min_fraction);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_UTF8_UTIL_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/write_bits.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/write_bits.h
new file mode 100644
index 0000000000..f6f88b45be
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/enc/write_bits.h
@@ -0,0 +1,87 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Write bits into a byte array. */
+
+#ifndef BROTLI_ENC_WRITE_BITS_H_
+#define BROTLI_ENC_WRITE_BITS_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* This function writes bits into bytes in increasing addresses, and within
+   a byte least-significant-bit first.
+
+   The function can write up to 56 bits in one go with WriteBits
+   Example: let's assume that 3 bits (Rs below) have been written already:
+
+   BYTE-0     BYTE+1       BYTE+2
+
+   0000 0RRR    0000 0000    0000 0000
+
+   Now, we could write 5 or less bits in MSB by just shifting by 3
+   and OR'ing to BYTE-0.
+
+   For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+   and locate the rest in BYTE+1, BYTE+2, etc. */
+static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
+                                          uint64_t bits,
+                                          size_t* BROTLI_RESTRICT pos,
+                                          uint8_t* BROTLI_RESTRICT array) {
+  BROTLI_LOG(("WriteBits  %2d  0x%08x%08x  %10d\n", (int)n_bits,
+      (uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
+      (int)*pos));
+  BROTLI_DCHECK((bits >> n_bits) == 0);
+  BROTLI_DCHECK(n_bits <= 56);
+#if defined(BROTLI_LITTLE_ENDIAN)
+  /* This branch of the code can write up to 56 bits at a time,
+     7 bits are lost by being perhaps already in *p and at least
+     1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+     bits are in *p and we write 57 bits, then the next write will
+     access a byte that was never initialized). */
+  {
+    uint8_t* p = &array[*pos >> 3];
+    uint64_t v = (uint64_t)(*p);  /* Zero-extend 8 to 64 bits. */
+    v |= bits << (*pos & 7);
+    BROTLI_UNALIGNED_STORE64LE(p, v);  /* Set some bits. */
+    *pos += n_bits;
+  }
+#else
+  /* implicit & 0xFF is assumed for uint8_t arithmetics */
+  {
+    uint8_t* array_pos = &array[*pos >> 3];
+    const size_t bits_reserved_in_first_byte = (*pos & 7);
+    size_t bits_left_to_write;
+    bits <<= bits_reserved_in_first_byte;
+    *array_pos++ |= (uint8_t)bits;
+    for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
+         bits_left_to_write >= 9;
+         bits_left_to_write -= 8) {
+      bits >>= 8;
+      *array_pos++ = (uint8_t)bits;
+    }
+    *array_pos = 0;
+    *pos += n_bits;
+  }
+#endif
+}
+
+static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
+    size_t pos, uint8_t* array) {
+  BROTLI_LOG(("WriteBitsPrepareStorage            %10d\n", (int)pos));
+  BROTLI_DCHECK((pos & 7) == 0);
+  array[pos >> 3] = 0;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_WRITE_BITS_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/fuzz/decode_fuzzer.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/fuzz/decode_fuzzer.c
new file mode 100644
index 0000000000..46144e07eb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/fuzz/decode_fuzzer.c
@@ -0,0 +1,58 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <brotli/decode.h>
+
+// Entry point for LibFuzzer.
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  size_t addend = 0;
+  if (size > 0)
+    addend = data[size - 1] & 7;
+  const uint8_t* next_in = data;
+
+  const int kBufferSize = 1024;
+  uint8_t* buffer = (uint8_t*) malloc(kBufferSize);
+  if (!buffer) {
+    // OOM is out-of-scope here.
+    return 0;
+  }
+  /* The biggest "magic number" in brotli is 16MiB - 16, so no need to check
+     the cases with much longer output. */
+  const size_t total_out_limit = (addend == 0) ? (1 << 26) : (1 << 24);
+  size_t total_out = 0;
+
+  BrotliDecoderState* state = BrotliDecoderCreateInstance(0, 0, 0);
+
+  if (addend == 0)
+    addend = size;
+  /* Test both fast (addend == size) and slow (addend <= 7) decoding paths. */
+  for (size_t i = 0; i < size;) {
+    size_t next_i = i + addend;
+    if (next_i > size)
+      next_i = size;
+    size_t avail_in = next_i - i;
+    i = next_i;
+    BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
+    while (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+      size_t avail_out = kBufferSize;
+      uint8_t* next_out = buffer;
+      result = BrotliDecoderDecompressStream(
+          state, &avail_in, &next_in, &avail_out, &next_out, &total_out);
+      if (total_out > total_out_limit)
+        break;
+    }
+    if (total_out > total_out_limit)
+      break;
+    if (result != BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT)
+      break;
+  }
+
+  BrotliDecoderDestroyInstance(state);
+  free(buffer);
+  return 0;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/fuzz/run_decode_fuzzer.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/fuzz/run_decode_fuzzer.c
new file mode 100644
index 0000000000..c84f98a32b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/fuzz/run_decode_fuzzer.c
@@ -0,0 +1,44 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Simple runner for decode_fuzzer.cc */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+void LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+
+int main(int argc, char* *argv) {
+  if (argc != 2) {
+    fprintf(stderr, "Exactly one argument is expected.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  FILE* f = fopen(argv[1], "r");
+  if (!f) {
+    fprintf(stderr, "Failed to open input file.");
+    exit(EXIT_FAILURE);
+  }
+
+  size_t max_len = 1 << 20;
+  unsigned char* tmp = (unsigned char*)malloc(max_len);
+  size_t len = fread(tmp, 1, max_len, f);
+  if (ferror(f)) {
+    fclose(f);
+    fprintf(stderr, "Failed read input file.");
+    exit(EXIT_FAILURE);
+  }
+  /* Make data after the end "inaccessible". */
+  unsigned char* data = (unsigned char*)malloc(len);
+  memcpy(data, tmp, len);
+  free(tmp);
+
+  LLVMFuzzerTestOneInput(data, len);
+  free(data);
+  exit(EXIT_SUCCESS);
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/fuzz/test_fuzzer.sh b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/fuzz/test_fuzzer.sh
new file mode 100755
index 0000000000..9985194a19
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/fuzz/test_fuzzer.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+set -e
+
+export CC=${CC:-cc}
+
+BROTLI="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
+SRC=$BROTLI/c
+
+cd $BROTLI
+
+rm -rf bin
+mkdir bin
+cd bin
+
+cmake $BROTLI -DCMAKE_C_COMPILER="$CC" \
+    -DBUILD_TESTING=OFF -DENABLE_SANITIZER=address
+make -j$(nproc) brotlidec-static
+
+${CC} -o run_decode_fuzzer -std=c99 -fsanitize=address -I$SRC/include \
+    $SRC/fuzz/decode_fuzzer.c $SRC/fuzz/run_decode_fuzzer.c \
+    ./libbrotlidec-static.a ./libbrotlicommon-static.a
+
+mkdir decode_corpora
+unzip $BROTLI/java/org/brotli/integration/fuzz_data.zip -d decode_corpora
+
+for f in `ls decode_corpora`
+do
+ echo "Testing $f"
+ ./run_decode_fuzzer decode_corpora/$f
+done
+
+cd $BROTLI
+rm -rf bin
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/decode.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/decode.h
new file mode 100644
index 0000000000..0f5c8f9d11
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/decode.h
@@ -0,0 +1,344 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * API for Brotli decompression.
+ */
+
+#ifndef BROTLI_DEC_DECODE_H_
+#define BROTLI_DEC_DECODE_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Opaque structure that holds decoder state.
+ *
+ * Allocated and initialized with ::BrotliDecoderCreateInstance.
+ * Cleaned up and deallocated with ::BrotliDecoderDestroyInstance.
+ */
+typedef struct BrotliDecoderStateStruct BrotliDecoderState;
+
+/**
+ * Result type for ::BrotliDecoderDecompress and
+ * ::BrotliDecoderDecompressStream functions.
+ */
+typedef enum {
+  /** Decoding error, e.g. corrupted input or memory allocation problem. */
+  BROTLI_DECODER_RESULT_ERROR = 0,
+  /** Decoding successfully completed. */
+  BROTLI_DECODER_RESULT_SUCCESS = 1,
+  /** Partially done; should be called again with more input. */
+  BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT = 2,
+  /** Partially done; should be called again with more output. */
+  BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT = 3
+} BrotliDecoderResult;
+
+/**
+ * Template that evaluates items of ::BrotliDecoderErrorCode.
+ *
+ * Example: @code {.cpp}
+ * // Log Brotli error code.
+ * switch (brotliDecoderErrorCode) {
+ * #define CASE_(PREFIX, NAME, CODE) \
+ *   case BROTLI_DECODER ## PREFIX ## NAME: \
+ *     LOG(INFO) << "error code:" << #NAME; \
+ *     break;
+ * #define NEWLINE_
+ * BROTLI_DECODER_ERROR_CODES_LIST(CASE_, NEWLINE_)
+ * #undef CASE_
+ * #undef NEWLINE_
+ *   default: LOG(FATAL) << "unknown brotli error code";
+ * }
+ * @endcode
+ */
+#define BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE, SEPARATOR)      \
+  BROTLI_ERROR_CODE(_, NO_ERROR, 0) SEPARATOR                              \
+  /* Same as BrotliDecoderResult values */                                 \
+  BROTLI_ERROR_CODE(_, SUCCESS, 1) SEPARATOR                               \
+  BROTLI_ERROR_CODE(_, NEEDS_MORE_INPUT, 2) SEPARATOR                      \
+  BROTLI_ERROR_CODE(_, NEEDS_MORE_OUTPUT, 3) SEPARATOR                     \
+                                                                           \
+  /* Errors caused by invalid input */                                     \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, EXUBERANT_NIBBLE, -1) SEPARATOR        \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, RESERVED, -2) SEPARATOR                \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, EXUBERANT_META_NIBBLE, -3) SEPARATOR   \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, SIMPLE_HUFFMAN_ALPHABET, -4) SEPARATOR \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, SIMPLE_HUFFMAN_SAME, -5) SEPARATOR     \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, CL_SPACE, -6) SEPARATOR                \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, HUFFMAN_SPACE, -7) SEPARATOR           \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, CONTEXT_MAP_REPEAT, -8) SEPARATOR      \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, BLOCK_LENGTH_1, -9) SEPARATOR          \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, BLOCK_LENGTH_2, -10) SEPARATOR         \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, TRANSFORM, -11) SEPARATOR              \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, DICTIONARY, -12) SEPARATOR             \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, WINDOW_BITS, -13) SEPARATOR            \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_1, -14) SEPARATOR              \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR              \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, DISTANCE, -16) SEPARATOR               \
+                                                                           \
+  /* -17..-18 codes are reserved */                                        \
+                                                                           \
+  BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR            \
+  BROTLI_ERROR_CODE(_ERROR_, INVALID_ARGUMENTS, -20) SEPARATOR             \
+                                                                           \
+  /* Memory allocation problems */                                         \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, CONTEXT_MODES, -21) SEPARATOR           \
+  /* Literal, insert and distance trees together */                        \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, TREE_GROUPS, -22) SEPARATOR             \
+  /* -23..-24 codes are reserved for distinct tree groups */               \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, CONTEXT_MAP, -25) SEPARATOR             \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, RING_BUFFER_1, -26) SEPARATOR           \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, RING_BUFFER_2, -27) SEPARATOR           \
+  /* -28..-29 codes are reserved for dynamic ring-buffer allocation */     \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, BLOCK_TYPE_TREES, -30) SEPARATOR        \
+                                                                           \
+  /* "Impossible" states */                                                \
+  BROTLI_ERROR_CODE(_ERROR_, UNREACHABLE, -31)
+
+/**
+ * Error code for detailed logging / production debugging.
+ *
+ * See ::BrotliDecoderGetErrorCode and ::BROTLI_LAST_ERROR_CODE.
+ */
+typedef enum {
+#define BROTLI_COMMA_ ,
+#define BROTLI_ERROR_CODE_ENUM_ITEM_(PREFIX, NAME, CODE) \
+    BROTLI_DECODER ## PREFIX ## NAME = CODE
+  BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE_ENUM_ITEM_, BROTLI_COMMA_)
+} BrotliDecoderErrorCode;
+#undef BROTLI_ERROR_CODE_ENUM_ITEM_
+#undef BROTLI_COMMA_
+
+/**
+ * The value of the last error code, negative integer.
+ *
+ * All other error code values are in the range from ::BROTLI_LAST_ERROR_CODE
+ * to @c -1. There are also 4 other possible non-error codes @c 0 .. @c 3 in
+ * ::BrotliDecoderErrorCode enumeration.
+ */
+#define BROTLI_LAST_ERROR_CODE BROTLI_DECODER_ERROR_UNREACHABLE
+
+/** Options to be used with ::BrotliDecoderSetParameter. */
+typedef enum BrotliDecoderParameter {
+  /**
+   * Disable "canny" ring buffer allocation strategy.
+   *
+   * Ring buffer is allocated according to window size, despite the real size of
+   * the content.
+   */
+  BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0,
+  /**
+   * Flag that determines if "Large Window Brotli" is used.
+   */
+  BROTLI_DECODER_PARAM_LARGE_WINDOW = 1
+} BrotliDecoderParameter;
+
+/**
+ * Sets the specified parameter to the given decoder instance.
+ *
+ * @param state decoder instance
+ * @param param parameter to set
+ * @param value new parameter value
+ * @returns ::BROTLI_FALSE if parameter is unrecognized, or value is invalid
+ * @returns ::BROTLI_TRUE if value is accepted
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderSetParameter(
+    BrotliDecoderState* state, BrotliDecoderParameter param, uint32_t value);
+
+/**
+ * Creates an instance of ::BrotliDecoderState and initializes it.
+ *
+ * The instance can be used once for decoding and should then be destroyed with
+ * ::BrotliDecoderDestroyInstance, it cannot be reused for a new decoding
+ * session.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ * @returns @c 0 if instance can not be allocated or initialized
+ * @returns pointer to initialized ::BrotliDecoderState otherwise
+ */
+BROTLI_DEC_API BrotliDecoderState* BrotliDecoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+/**
+ * Deinitializes and frees ::BrotliDecoderState instance.
+ *
+ * @param state decoder instance to be cleaned up and deallocated
+ */
+BROTLI_DEC_API void BrotliDecoderDestroyInstance(BrotliDecoderState* state);
+
+/**
+ * Performs one-shot memory-to-memory decompression.
+ *
+ * Decompresses the data in @p encoded_buffer into @p decoded_buffer, and sets
+ * @p *decoded_size to the decompressed length.
+ *
+ * @param encoded_size size of @p encoded_buffer
+ * @param encoded_buffer compressed data buffer with at least @p encoded_size
+ *        addressable bytes
+ * @param[in, out] decoded_size @b in: size of @p decoded_buffer; \n
+ *                 @b out: length of decompressed data written to
+ *                 @p decoded_buffer
+ * @param decoded_buffer decompressed data destination buffer
+ * @returns ::BROTLI_DECODER_RESULT_ERROR if input is corrupted, memory
+ *          allocation failed, or @p decoded_buffer is not large enough;
+ * @returns ::BROTLI_DECODER_RESULT_SUCCESS otherwise
+ */
+BROTLI_DEC_API BrotliDecoderResult BrotliDecoderDecompress(
+    size_t encoded_size,
+    const uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(encoded_size)],
+    size_t* decoded_size,
+    uint8_t decoded_buffer[BROTLI_ARRAY_PARAM(*decoded_size)]);
+
+/**
+ * Decompresses the input stream to the output stream.
+ *
+ * The values @p *available_in and @p *available_out must specify the number of
+ * bytes addressable at @p *next_in and @p *next_out respectively.
+ * When @p *available_out is @c 0, @p next_out is allowed to be @c NULL.
+ *
+ * After each call, @p *available_in will be decremented by the amount of input
+ * bytes consumed, and the @p *next_in pointer will be incremented by that
+ * amount. Similarly, @p *available_out will be decremented by the amount of
+ * output bytes written, and the @p *next_out pointer will be incremented by
+ * that amount.
+ *
+ * @p total_out, if it is not a null-pointer, will be set to the number
+ * of bytes decompressed since the last @p state initialization.
+ *
+ * @note Input is never overconsumed, so @p next_in and @p available_in could be
+ * passed to the next consumer after decoding is complete.
+ *
+ * @param state decoder instance
+ * @param[in, out] available_in @b in: amount of available input; \n
+ *                 @b out: amount of unused input
+ * @param[in, out] next_in pointer to the next compressed byte
+ * @param[in, out] available_out @b in: length of output buffer; \n
+ *                 @b out: remaining size of output buffer
+ * @param[in, out] next_out output buffer cursor;
+ *                 can be @c NULL if @p available_out is @c 0
+ * @param[out] total_out number of bytes decompressed so far; can be @c NULL
+ * @returns ::BROTLI_DECODER_RESULT_ERROR if input is corrupted, memory
+ *          allocation failed, arguments were invalid, etc.;
+ *          use ::BrotliDecoderGetErrorCode to get detailed error code
+ * @returns ::BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT decoding is blocked until
+ *          more input data is provided
+ * @returns ::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT decoding is blocked until
+ *          more output space is provided
+ * @returns ::BROTLI_DECODER_RESULT_SUCCESS decoding is finished, no more
+ *          input might be consumed and no more output will be produced
+ */
+BROTLI_DEC_API BrotliDecoderResult BrotliDecoderDecompressStream(
+  BrotliDecoderState* state, size_t* available_in, const uint8_t** next_in,
+  size_t* available_out, uint8_t** next_out, size_t* total_out);
+
+/**
+ * Checks if decoder has more output.
+ *
+ * @param state decoder instance
+ * @returns ::BROTLI_TRUE, if decoder has some unconsumed output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderHasMoreOutput(
+    const BrotliDecoderState* state);
+
+/**
+ * Acquires pointer to internal output buffer.
+ *
+ * This method is used to make language bindings easier and more efficient:
+ *  -# push data to ::BrotliDecoderDecompressStream,
+ *     until ::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT is reported
+ *  -# use ::BrotliDecoderTakeOutput to peek bytes and copy to language-specific
+ *     entity
+ *
+ * Also this could be useful if there is an output stream that is able to
+ * consume all the provided data (e.g. when data is saved to file system).
+ *
+ * @attention After every call to ::BrotliDecoderTakeOutput @p *size bytes of
+ *            output are considered consumed for all consecutive calls to the
+ *            instance methods; returned pointer becomes invalidated as well.
+ *
+ * @note Decoder output is not guaranteed to be contiguous. This means that
+ *       after the size-unrestricted call to ::BrotliDecoderTakeOutput,
+ *       immediate next call to ::BrotliDecoderTakeOutput may return more data.
+ *
+ * @param state decoder instance
+ * @param[in, out] size @b in: number of bytes caller is ready to take, @c 0 if
+ *                 any amount could be handled; \n
+ *                 @b out: amount of data pointed by returned pointer and
+ *                 considered consumed; \n
+ *                 out value is never greater than in value, unless it is @c 0
+ * @returns pointer to output data
+ */
+BROTLI_DEC_API const uint8_t* BrotliDecoderTakeOutput(
+    BrotliDecoderState* state, size_t* size);
+
+/**
+ * Checks if instance has already consumed input.
+ *
+ * Instance that returns ::BROTLI_FALSE is considered "fresh" and could be
+ * reused.
+ *
+ * @param state decoder instance
+ * @returns ::BROTLI_TRUE if decoder has already used some input bytes
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderIsUsed(const BrotliDecoderState* state);
+
+/**
+ * Checks if decoder instance reached the final state.
+ *
+ * @param state decoder instance
+ * @returns ::BROTLI_TRUE if decoder is in a state where it reached the end of
+ *          the input and produced all of the output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderIsFinished(
+    const BrotliDecoderState* state);
+
+/**
+ * Acquires a detailed error code.
+ *
+ * Should be used only after ::BrotliDecoderDecompressStream returns
+ * ::BROTLI_DECODER_RESULT_ERROR.
+ *
+ * See also ::BrotliDecoderErrorString
+ *
+ * @param state decoder instance
+ * @returns last saved error code
+ */
+BROTLI_DEC_API BrotliDecoderErrorCode BrotliDecoderGetErrorCode(
+    const BrotliDecoderState* state);
+
+/**
+ * Converts error code to a c-string.
+ */
+BROTLI_DEC_API const char* BrotliDecoderErrorString(BrotliDecoderErrorCode c);
+
+/**
+ * Gets a decoder library version.
+ *
+ * Look at BROTLI_VERSION for more information.
+ */
+BROTLI_DEC_API uint32_t BrotliDecoderVersion(void);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_DECODE_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/encode.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/encode.h
new file mode 100644
index 0000000000..b2774cb631
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/encode.h
@@ -0,0 +1,448 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * API for Brotli compression.
+ */
+
+#ifndef BROTLI_ENC_ENCODE_H_
+#define BROTLI_ENC_ENCODE_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Minimal value for ::BROTLI_PARAM_LGWIN parameter. */
+#define BROTLI_MIN_WINDOW_BITS 10
+/**
+ * Maximal value for ::BROTLI_PARAM_LGWIN parameter.
+ *
+ * @note equal to @c BROTLI_MAX_DISTANCE_BITS constant.
+ */
+#define BROTLI_MAX_WINDOW_BITS 24
+/**
+ * Maximal value for ::BROTLI_PARAM_LGWIN parameter
+ * in "Large Window Brotli" (32-bit).
+ */
+#define BROTLI_LARGE_MAX_WINDOW_BITS 30
+/** Minimal value for ::BROTLI_PARAM_LGBLOCK parameter. */
+#define BROTLI_MIN_INPUT_BLOCK_BITS 16
+/** Maximal value for ::BROTLI_PARAM_LGBLOCK parameter. */
+#define BROTLI_MAX_INPUT_BLOCK_BITS 24
+/** Minimal value for ::BROTLI_PARAM_QUALITY parameter. */
+#define BROTLI_MIN_QUALITY 0
+/** Maximal value for ::BROTLI_PARAM_QUALITY parameter. */
+#define BROTLI_MAX_QUALITY 11
+
+/** Options for ::BROTLI_PARAM_MODE parameter. */
+typedef enum BrotliEncoderMode {
+  /**
+   * Default compression mode.
+   *
+   * In this mode compressor does not know anything in advance about the
+   * properties of the input.
+   */
+  BROTLI_MODE_GENERIC = 0,
+  /** Compression mode for UTF-8 formatted text input. */
+  BROTLI_MODE_TEXT = 1,
+  /** Compression mode used in WOFF 2.0. */
+  BROTLI_MODE_FONT = 2
+} BrotliEncoderMode;
+
+/** Default value for ::BROTLI_PARAM_QUALITY parameter. */
+#define BROTLI_DEFAULT_QUALITY 11
+/** Default value for ::BROTLI_PARAM_LGWIN parameter. */
+#define BROTLI_DEFAULT_WINDOW 22
+/** Default value for ::BROTLI_PARAM_MODE parameter. */
+#define BROTLI_DEFAULT_MODE BROTLI_MODE_GENERIC
+
+/** Operations that can be performed by streaming encoder. */
+typedef enum BrotliEncoderOperation {
+  /**
+   * Process input.
+   *
+   * Encoder may postpone producing output, until it has processed enough input.
+   */
+  BROTLI_OPERATION_PROCESS = 0,
+  /**
+   * Produce output for all processed input.
+   *
+   * Actual flush is performed when input stream is depleted and there is enough
+   * space in output stream. This means that client should repeat
+   * ::BROTLI_OPERATION_FLUSH operation until @p available_in becomes @c 0, and
+   * ::BrotliEncoderHasMoreOutput returns ::BROTLI_FALSE. If output is acquired
+   * via ::BrotliEncoderTakeOutput, then operation should be repeated after
+   * output buffer is drained.
+   *
+   * @warning Until flush is complete, client @b SHOULD @b NOT swap,
+   *          reduce or extend input stream.
+   *
+   * When flush is complete, output data will be sufficient for decoder to
+   * reproduce all the given input.
+   */
+  BROTLI_OPERATION_FLUSH = 1,
+  /**
+   * Finalize the stream.
+   *
+   * Actual finalization is performed when input stream is depleted and there is
+   * enough space in output stream. This means that client should repeat
+   * ::BROTLI_OPERATION_FINISH operation until @p available_in becomes @c 0, and
+   * ::BrotliEncoderHasMoreOutput returns ::BROTLI_FALSE. If output is acquired
+   * via ::BrotliEncoderTakeOutput, then operation should be repeated after
+   * output buffer is drained.
+   *
+   * @warning Until finalization is complete, client @b SHOULD @b NOT swap,
+   *          reduce or extend input stream.
+   *
+   * Helper function ::BrotliEncoderIsFinished checks if stream is finalized and
+   * output fully dumped.
+   *
+   * Adding more input data to finalized stream is impossible.
+   */
+  BROTLI_OPERATION_FINISH = 2,
+  /**
+   * Emit metadata block to stream.
+   *
+   * Metadata is opaque to Brotli: neither encoder, nor decoder processes this
+   * data or relies on it. It may be used to pass some extra information from
+   * encoder client to decoder client without interfering with main data stream.
+   *
+   * @note Encoder may emit empty metadata blocks internally, to pad encoded
+   *       stream to byte boundary.
+   *
+   * @warning Until emitting metadata is complete client @b SHOULD @b NOT swap,
+   *          reduce or extend input stream.
+   *
+   * @warning The whole content of input buffer is considered to be the content
+   *          of metadata block. Do @b NOT @e append metadata to input stream,
+   *          before it is depleted with other operations.
+   *
+   * Stream is soft-flushed before metadata block is emitted. Metadata block
+   * @b MUST be no longer than than 16MiB.
+   */
+  BROTLI_OPERATION_EMIT_METADATA = 3
+} BrotliEncoderOperation;
+
+/** Options to be used with ::BrotliEncoderSetParameter. */
+typedef enum BrotliEncoderParameter {
+  /**
+   * Tune encoder for specific input.
+   *
+   * ::BrotliEncoderMode enumerates all available values.
+   */
+  BROTLI_PARAM_MODE = 0,
+  /**
+   * The main compression speed-density lever.
+   *
+   * The higher the quality, the slower the compression. Range is
+   * from ::BROTLI_MIN_QUALITY to ::BROTLI_MAX_QUALITY.
+   */
+  BROTLI_PARAM_QUALITY = 1,
+  /**
+   * Recommended sliding LZ77 window size.
+   *
+   * Encoder may reduce this value, e.g. if input is much smaller than
+   * window size.
+   *
+   * Window size is `(1 << value) - 16`.
+   *
+   * Range is from ::BROTLI_MIN_WINDOW_BITS to ::BROTLI_MAX_WINDOW_BITS.
+   */
+  BROTLI_PARAM_LGWIN = 2,
+  /**
+   * Recommended input block size.
+   *
+   * Encoder may reduce this value, e.g. if input is much smaller than input
+   * block size.
+   *
+   * Range is from ::BROTLI_MIN_INPUT_BLOCK_BITS to
+   * ::BROTLI_MAX_INPUT_BLOCK_BITS.
+   *
+   * @note Bigger input block size allows better compression, but consumes more
+   *       memory. \n The rough formula of memory used for temporary input
+   *       storage is `3 << lgBlock`.
+   */
+  BROTLI_PARAM_LGBLOCK = 3,
+  /**
+   * Flag that affects usage of "literal context modeling" format feature.
+   *
+   * This flag is a "decoding-speed vs compression ratio" trade-off.
+   */
+  BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING = 4,
+  /**
+   * Estimated total input size for all ::BrotliEncoderCompressStream calls.
+   *
+   * The default value is 0, which means that the total input size is unknown.
+   */
+  BROTLI_PARAM_SIZE_HINT = 5,
+  /**
+   * Flag that determines if "Large Window Brotli" is used.
+   */
+  BROTLI_PARAM_LARGE_WINDOW = 6,
+  /**
+   * Recommended number of postfix bits (NPOSTFIX).
+   *
+   * Encoder may change this value.
+   *
+   * Range is from 0 to ::BROTLI_MAX_NPOSTFIX.
+   */
+  BROTLI_PARAM_NPOSTFIX = 7,
+  /**
+   * Recommended number of direct distance codes (NDIRECT).
+   *
+   * Encoder may change this value.
+   *
+   * Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX).
+   */
+  BROTLI_PARAM_NDIRECT = 8,
+  /**
+   * Number of bytes of input stream already processed by a different instance.
+   *
+   * @note It is important to configure all the encoder instances with same
+   *       parameters (except this one) in order to allow all the encoded parts
+   *       obey the same restrictions implied by header.
+   *
+   * If offset is not 0, then stream header is omitted.
+   * In any case output start is byte aligned, so for proper streams stitching
+   * "predecessor" stream must be flushed.
+   *
+   * Range is not artificially limited, but all the values greater or equal to
+   * maximal window size have the same effect. Values greater than 2**30 are not
+   * allowed.
+   */
+  BROTLI_PARAM_STREAM_OFFSET = 9
+} BrotliEncoderParameter;
+
+/**
+ * Opaque structure that holds encoder state.
+ *
+ * Allocated and initialized with ::BrotliEncoderCreateInstance.
+ * Cleaned up and deallocated with ::BrotliEncoderDestroyInstance.
+ */
+typedef struct BrotliEncoderStateStruct BrotliEncoderState;
+
+/**
+ * Sets the specified parameter to the given encoder instance.
+ *
+ * @param state encoder instance
+ * @param param parameter to set
+ * @param value new parameter value
+ * @returns ::BROTLI_FALSE if parameter is unrecognized, or value is invalid
+ * @returns ::BROTLI_FALSE if value of parameter can not be changed at current
+ *          encoder state (e.g. when encoding is started, window size might be
+ *          already encoded and therefore it is impossible to change it)
+ * @returns ::BROTLI_TRUE if value is accepted
+ * @warning invalid values might be accepted in case they would not break
+ *          encoding process.
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderSetParameter(
+    BrotliEncoderState* state, BrotliEncoderParameter param, uint32_t value);
+
+/**
+ * Creates an instance of ::BrotliEncoderState and initializes it.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ * @returns @c 0 if instance can not be allocated or initialized
+ * @returns pointer to initialized ::BrotliEncoderState otherwise
+ */
+BROTLI_ENC_API BrotliEncoderState* BrotliEncoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+/**
+ * Deinitializes and frees ::BrotliEncoderState instance.
+ *
+ * @param state decoder instance to be cleaned up and deallocated
+ */
+BROTLI_ENC_API void BrotliEncoderDestroyInstance(BrotliEncoderState* state);
+
+/**
+ * Calculates the output size bound for the given @p input_size.
+ *
+ * @warning Result is only valid if quality is at least @c 2 and, in
+ *          case ::BrotliEncoderCompressStream was used, no flushes
+ *          (::BROTLI_OPERATION_FLUSH) were performed.
+ *
+ * @param input_size size of projected input
+ * @returns @c 0 if result does not fit @c size_t
+ */
+BROTLI_ENC_API size_t BrotliEncoderMaxCompressedSize(size_t input_size);
+
+/**
+ * Performs one-shot memory-to-memory compression.
+ *
+ * Compresses the data in @p input_buffer into @p encoded_buffer, and sets
+ * @p *encoded_size to the compressed length.
+ *
+ * @note If ::BrotliEncoderMaxCompressedSize(@p input_size) returns non-zero
+ *       value, then output is guaranteed to be no longer than that.
+ *
+ * @note If @p lgwin is greater than ::BROTLI_MAX_WINDOW_BITS then resulting
+ *       stream might be incompatible with RFC 7932; to decode such streams,
+ *       decoder should be configured with
+ *       ::BROTLI_DECODER_PARAM_LARGE_WINDOW = @c 1
+ *
+ * @param quality quality parameter value, e.g. ::BROTLI_DEFAULT_QUALITY
+ * @param lgwin lgwin parameter value, e.g. ::BROTLI_DEFAULT_WINDOW
+ * @param mode mode parameter value, e.g. ::BROTLI_DEFAULT_MODE
+ * @param input_size size of @p input_buffer
+ * @param input_buffer input data buffer with at least @p input_size
+ *        addressable bytes
+ * @param[in, out] encoded_size @b in: size of @p encoded_buffer; \n
+ *                 @b out: length of compressed data written to
+ *                 @p encoded_buffer, or @c 0 if compression fails
+ * @param encoded_buffer compressed data destination buffer
+ * @returns ::BROTLI_FALSE in case of compression error
+ * @returns ::BROTLI_FALSE if output buffer is too small
+ * @returns ::BROTLI_TRUE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderCompress(
+    int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
+    const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
+    size_t* encoded_size,
+    uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]);
+
+/**
+ * Compresses input stream to output stream.
+ *
+ * The values @p *available_in and @p *available_out must specify the number of
+ * bytes addressable at @p *next_in and @p *next_out respectively.
+ * When @p *available_out is @c 0, @p next_out is allowed to be @c NULL.
+ *
+ * After each call, @p *available_in will be decremented by the amount of input
+ * bytes consumed, and the @p *next_in pointer will be incremented by that
+ * amount. Similarly, @p *available_out will be decremented by the amount of
+ * output bytes written, and the @p *next_out pointer will be incremented by
+ * that amount.
+ *
+ * @p total_out, if it is not a null-pointer, will be set to the number
+ * of bytes compressed since the last @p state initialization.
+ *
+ *
+ *
+ * Internally workflow consists of 3 tasks:
+ *  -# (optionally) copy input data to internal buffer
+ *  -# actually compress data and (optionally) store it to internal buffer
+ *  -# (optionally) copy compressed bytes from internal buffer to output stream
+ *
+ * Whenever all 3 tasks can't move forward anymore, or error occurs, this
+ * method returns the control flow to caller.
+ *
+ * @p op is used to perform flush, finish the stream, or inject metadata block.
+ * See ::BrotliEncoderOperation for more information.
+ *
+ * Flushing the stream means forcing encoding of all input passed to encoder and
+ * completing the current output block, so it could be fully decoded by stream
+ * decoder. To perform flush set @p op to ::BROTLI_OPERATION_FLUSH.
+ * Under some circumstances (e.g. lack of output stream capacity) this operation
+ * would require several calls to ::BrotliEncoderCompressStream. The method must
+ * be called again until both input stream is depleted and encoder has no more
+ * output (see ::BrotliEncoderHasMoreOutput) after the method is called.
+ *
+ * Finishing the stream means encoding of all input passed to encoder and
+ * adding specific "final" marks, so stream decoder could determine that stream
+ * is complete. To perform finish set @p op to ::BROTLI_OPERATION_FINISH.
+ * Under some circumstances (e.g. lack of output stream capacity) this operation
+ * would require several calls to ::BrotliEncoderCompressStream. The method must
+ * be called again until both input stream is depleted and encoder has no more
+ * output (see ::BrotliEncoderHasMoreOutput) after the method is called.
+ *
+ * @warning When flushing and finishing, @p op should not change until operation
+ *          is complete; input stream should not be swapped, reduced or
+ *          extended as well.
+ *
+ * @param state encoder instance
+ * @param op requested operation
+ * @param[in, out] available_in @b in: amount of available input; \n
+ *                 @b out: amount of unused input
+ * @param[in, out] next_in pointer to the next input byte
+ * @param[in, out] available_out @b in: length of output buffer; \n
+ *                 @b out: remaining size of output buffer
+ * @param[in, out] next_out compressed output buffer cursor;
+ *                 can be @c NULL if @p available_out is @c 0
+ * @param[out] total_out number of bytes produced so far; can be @c NULL
+ * @returns ::BROTLI_FALSE if there was an error
+ * @returns ::BROTLI_TRUE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderCompressStream(
+    BrotliEncoderState* state, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
+    size_t* total_out);
+
+/**
+ * Checks if encoder instance reached the final state.
+ *
+ * @param state encoder instance
+ * @returns ::BROTLI_TRUE if encoder is in a state where it reached the end of
+ *          the input and produced all of the output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* state);
+
+/**
+ * Checks if encoder has more output.
+ *
+ * @param state encoder instance
+ * @returns ::BROTLI_TRUE, if encoder has some unconsumed output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderHasMoreOutput(
+    BrotliEncoderState* state);
+
+/**
+ * Acquires pointer to internal output buffer.
+ *
+ * This method is used to make language bindings easier and more efficient:
+ *  -# push data to ::BrotliEncoderCompressStream,
+ *     until ::BrotliEncoderHasMoreOutput returns BROTL_TRUE
+ *  -# use ::BrotliEncoderTakeOutput to peek bytes and copy to language-specific
+ *     entity
+ *
+ * Also this could be useful if there is an output stream that is able to
+ * consume all the provided data (e.g. when data is saved to file system).
+ *
+ * @attention After every call to ::BrotliEncoderTakeOutput @p *size bytes of
+ *            output are considered consumed for all consecutive calls to the
+ *            instance methods; returned pointer becomes invalidated as well.
+ *
+ * @note Encoder output is not guaranteed to be contiguous. This means that
+ *       after the size-unrestricted call to ::BrotliEncoderTakeOutput,
+ *       immediate next call to ::BrotliEncoderTakeOutput may return more data.
+ *
+ * @param state encoder instance
+ * @param[in, out] size @b in: number of bytes caller is ready to take, @c 0 if
+ *                 any amount could be handled; \n
+ *                 @b out: amount of data pointed by returned pointer and
+ *                 considered consumed; \n
+ *                 out value is never greater than in value, unless it is @c 0
+ * @returns pointer to output data
+ */
+BROTLI_ENC_API const uint8_t* BrotliEncoderTakeOutput(
+    BrotliEncoderState* state, size_t* size);
+
+
+/**
+ * Gets an encoder library version.
+ *
+ * Look at BROTLI_VERSION for more information.
+ */
+BROTLI_ENC_API uint32_t BrotliEncoderVersion(void);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENCODE_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/port.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/port.h
new file mode 100644
index 0000000000..825237a335
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/port.h
@@ -0,0 +1,288 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for compiler / platform specific API declarations. */
+
+#ifndef BROTLI_COMMON_PORT_H_
+#define BROTLI_COMMON_PORT_H_
+
+/* The following macros were borrowed from https://github.com/nemequ/hedley
+ * with permission of original author - Evan Nemerson <evan@nemerson.com> */
+
+/* >>> >>> >>> hedley macros */
+
+#define BROTLI_MAKE_VERSION(major, minor, revision) \
+  (((major) * 1000000) + ((minor) * 1000) + (revision))
+
+#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__)
+#define BROTLI_GNUC_VERSION \
+  BROTLI_MAKE_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
+#elif defined(__GNUC__)
+#define BROTLI_GNUC_VERSION BROTLI_MAKE_VERSION(__GNUC__, __GNUC_MINOR__, 0)
+#endif
+
+#if defined(BROTLI_GNUC_VERSION)
+#define BROTLI_GNUC_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_GNUC_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_GNUC_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000)
+#define BROTLI_MSVC_VERSION                                \
+  BROTLI_MAKE_VERSION((_MSC_FULL_VER / 10000000),          \
+                      (_MSC_FULL_VER % 10000000) / 100000, \
+                      (_MSC_FULL_VER % 100000) / 100)
+#elif defined(_MSC_FULL_VER)
+#define BROTLI_MSVC_VERSION                              \
+  BROTLI_MAKE_VERSION((_MSC_FULL_VER / 1000000),         \
+                      (_MSC_FULL_VER % 1000000) / 10000, \
+                      (_MSC_FULL_VER % 10000) / 10)
+#elif defined(_MSC_VER)
+#define BROTLI_MSVC_VERSION \
+  BROTLI_MAKE_VERSION(_MSC_VER / 100, _MSC_VER % 100, 0)
+#endif
+
+#if !defined(_MSC_VER)
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) (0)
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) \
+  (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch)))
+#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) \
+  (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch)))
+#else
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) \
+  (_MSC_VER >= ((major * 100) + (minor)))
+#endif
+
+#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE)
+#define BROTLI_INTEL_VERSION                   \
+  BROTLI_MAKE_VERSION(__INTEL_COMPILER / 100,  \
+                      __INTEL_COMPILER % 100,  \
+                      __INTEL_COMPILER_UPDATE)
+#elif defined(__INTEL_COMPILER)
+#define BROTLI_INTEL_VERSION \
+  BROTLI_MAKE_VERSION(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)
+#endif
+
+#if defined(BROTLI_INTEL_VERSION)
+#define BROTLI_INTEL_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_INTEL_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_INTEL_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__PGI) && \
+    defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__)
+#define BROTLI_PGI_VERSION \
+  BROTLI_MAKE_VERSION(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__)
+#endif
+
+#if defined(BROTLI_PGI_VERSION)
+#define BROTLI_PGI_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_PGI_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_PGI_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000)
+#define BROTLI_SUNPRO_VERSION                                       \
+  BROTLI_MAKE_VERSION(                                              \
+    (((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), \
+    (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf),   \
+    (__SUNPRO_C & 0xf) * 10)
+#elif defined(__SUNPRO_C)
+#define BROTLI_SUNPRO_VERSION                  \
+  BROTLI_MAKE_VERSION((__SUNPRO_C >> 8) & 0xf, \
+                      (__SUNPRO_C >> 4) & 0xf, \
+                      (__SUNPRO_C) & 0xf)
+#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000)
+#define BROTLI_SUNPRO_VERSION                                         \
+  BROTLI_MAKE_VERSION(                                                \
+    (((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), \
+    (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf),   \
+    (__SUNPRO_CC & 0xf) * 10)
+#elif defined(__SUNPRO_CC)
+#define BROTLI_SUNPRO_VERSION                   \
+  BROTLI_MAKE_VERSION((__SUNPRO_CC >> 8) & 0xf, \
+                      (__SUNPRO_CC >> 4) & 0xf, \
+                      (__SUNPRO_CC) & 0xf)
+#endif
+
+#if defined(BROTLI_SUNPRO_VERSION)
+#define BROTLI_SUNPRO_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_SUNPRO_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_SUNPRO_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION)
+#define BROTLI_ARM_VERSION                                       \
+  BROTLI_MAKE_VERSION((__ARMCOMPILER_VERSION / 1000000),         \
+                      (__ARMCOMPILER_VERSION % 1000000) / 10000, \
+                      (__ARMCOMPILER_VERSION % 10000) / 100)
+#elif defined(__CC_ARM) && defined(__ARMCC_VERSION)
+#define BROTLI_ARM_VERSION                                 \
+  BROTLI_MAKE_VERSION((__ARMCC_VERSION / 1000000),         \
+                      (__ARMCC_VERSION % 1000000) / 10000, \
+                      (__ARMCC_VERSION % 10000) / 100)
+#endif
+
+#if defined(BROTLI_ARM_VERSION)
+#define BROTLI_ARM_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_ARM_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_ARM_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__ibmxl__)
+#define BROTLI_IBM_VERSION                    \
+  BROTLI_MAKE_VERSION(__ibmxl_version__,      \
+                      __ibmxl_release__,      \
+                      __ibmxl_modification__)
+#elif defined(__xlC__) && defined(__xlC_ver__)
+#define BROTLI_IBM_VERSION \
+  BROTLI_MAKE_VERSION(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff)
+#elif defined(__xlC__)
+#define BROTLI_IBM_VERSION BROTLI_MAKE_VERSION(__xlC__ >> 8, __xlC__ & 0xff, 0)
+#endif
+
+#if defined(BROTLI_IBM_VERSION)
+#define BROTLI_IBM_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_IBM_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_IBM_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__TI_COMPILER_VERSION__)
+#define BROTLI_TI_VERSION                                         \
+  BROTLI_MAKE_VERSION((__TI_COMPILER_VERSION__ / 1000000),        \
+                      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
+                      (__TI_COMPILER_VERSION__ % 1000))
+#endif
+
+#if defined(BROTLI_TI_VERSION)
+#define BROTLI_TI_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_TI_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_TI_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__IAR_SYSTEMS_ICC__)
+#if __VER__ > 1000
+#define BROTLI_IAR_VERSION                     \
+  BROTLI_MAKE_VERSION((__VER__ / 1000000),     \
+                      (__VER__ / 1000) % 1000, \
+                      (__VER__ % 1000))
+#else
+#define BROTLI_IAR_VERSION BROTLI_MAKE_VERSION(VER / 100, __VER__ % 100, 0)
+#endif
+#endif
+
+#if defined(BROTLI_IAR_VERSION)
+#define BROTLI_IAR_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_IAR_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_IAR_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__TINYC__)
+#define BROTLI_TINYC_VERSION \
+  BROTLI_MAKE_VERSION(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100)
+#endif
+
+#if defined(BROTLI_TINYC_VERSION)
+#define BROTLI_TINYC_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_TINYC_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_TINYC_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__has_attribute)
+#define BROTLI_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
+  __has_attribute(attribute)
+#else
+#define BROTLI_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
+  BROTLI_GNUC_VERSION_CHECK(major, minor, patch)
+#endif
+
+#if defined(__has_builtin)
+#define BROTLI_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \
+  __has_builtin(builtin)
+#else
+#define BROTLI_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \
+  BROTLI_GNUC_VERSION_CHECK(major, minor, patch)
+#endif
+
+#if defined(__has_feature)
+#define BROTLI_HAS_FEATURE(feature) __has_feature(feature)
+#else
+#define BROTLI_HAS_FEATURE(feature) (0)
+#endif
+
+#if defined(ADDRESS_SANITIZER) || BROTLI_HAS_FEATURE(address_sanitizer) || \
+    defined(THREAD_SANITIZER) || BROTLI_HAS_FEATURE(thread_sanitizer) ||   \
+    defined(MEMORY_SANITIZER) || BROTLI_HAS_FEATURE(memory_sanitizer)
+#define BROTLI_SANITIZED 1
+#else
+#define BROTLI_SANITIZED 0
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define BROTLI_PUBLIC
+#elif BROTLI_GNUC_VERSION_CHECK(3, 3, 0) ||                         \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                             \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                         \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                            \
+    BROTLI_IBM_VERSION_CHECK(13, 1, 0) ||                           \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) &&                            \
+     defined(__TI_GNU_ATTRIBUTE_SUPPORT__) && defined(__TI_EABI__))
+#define BROTLI_PUBLIC __attribute__ ((visibility ("default")))
+#else
+#define BROTLI_PUBLIC
+#endif
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+    !defined(__STDC_NO_VLA__) && !defined(__cplusplus) &&         \
+    !defined(__PGI) && !defined(__PGIC__) && !defined(__TINYC__)
+#define BROTLI_ARRAY_PARAM(name) (name)
+#else
+#define BROTLI_ARRAY_PARAM(name)
+#endif
+
+/* <<< <<< <<< end of hedley macros. */
+
+#if defined(BROTLI_SHARED_COMPILATION)
+#if defined(_WIN32)
+#if defined(BROTLICOMMON_SHARED_COMPILATION)
+#define BROTLI_COMMON_API __declspec(dllexport)
+#else
+#define BROTLI_COMMON_API __declspec(dllimport)
+#endif  /* BROTLICOMMON_SHARED_COMPILATION */
+#if defined(BROTLIDEC_SHARED_COMPILATION)
+#define BROTLI_DEC_API __declspec(dllexport)
+#else
+#define BROTLI_DEC_API __declspec(dllimport)
+#endif  /* BROTLIDEC_SHARED_COMPILATION */
+#if defined(BROTLIENC_SHARED_COMPILATION)
+#define BROTLI_ENC_API __declspec(dllexport)
+#else
+#define BROTLI_ENC_API __declspec(dllimport)
+#endif  /* BROTLIENC_SHARED_COMPILATION */
+#else  /* _WIN32 */
+#define BROTLI_COMMON_API BROTLI_PUBLIC
+#define BROTLI_DEC_API BROTLI_PUBLIC
+#define BROTLI_ENC_API BROTLI_PUBLIC
+#endif  /* _WIN32 */
+#else  /* BROTLI_SHARED_COMPILATION */
+#define BROTLI_COMMON_API
+#define BROTLI_DEC_API
+#define BROTLI_ENC_API
+#endif
+
+#endif  /* BROTLI_COMMON_PORT_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/types.h b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/types.h
new file mode 100644
index 0000000000..eff1a3cd07
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/include/brotli/types.h
@@ -0,0 +1,83 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * Common types used in decoder and encoder API.
+ */
+
+#ifndef BROTLI_COMMON_TYPES_H_
+#define BROTLI_COMMON_TYPES_H_
+
+#include <stddef.h>  /* for size_t */
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+typedef __int64 int64_t;
+#else
+#include <stdint.h>
+#endif  /* defined(_MSC_VER) && (_MSC_VER < 1600) */
+
+/**
+ * A portable @c bool replacement.
+ *
+ * ::BROTLI_BOOL is a "documentation" type: actually it is @c int, but in API it
+ * denotes a type, whose only values are ::BROTLI_TRUE and ::BROTLI_FALSE.
+ *
+ * ::BROTLI_BOOL values passed to Brotli should either be ::BROTLI_TRUE or
+ * ::BROTLI_FALSE, or be a result of ::TO_BROTLI_BOOL macros.
+ *
+ * ::BROTLI_BOOL values returned by Brotli should not be tested for equality
+ * with @c true, @c false, ::BROTLI_TRUE, ::BROTLI_FALSE, but rather should be
+ * evaluated, for example: @code{.cpp}
+ * if (SomeBrotliFunction(encoder, BROTLI_TRUE) &&
+ *     !OtherBrotliFunction(decoder, BROTLI_FALSE)) {
+ *   bool x = !!YetAnotherBrotliFunction(encoder, TO_BROLTI_BOOL(2 * 2 == 4));
+ *   DoSomething(x);
+ * }
+ * @endcode
+ */
+#define BROTLI_BOOL int
+/** Portable @c true replacement. */
+#define BROTLI_TRUE 1
+/** Portable @c false replacement. */
+#define BROTLI_FALSE 0
+/** @c bool to ::BROTLI_BOOL conversion macros. */
+#define TO_BROTLI_BOOL(X) (!!(X) ? BROTLI_TRUE : BROTLI_FALSE)
+
+#define BROTLI_MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
+
+#define BROTLI_UINT32_MAX (~((uint32_t)0))
+#define BROTLI_SIZE_MAX (~((size_t)0))
+
+/**
+ * Allocating function pointer type.
+ *
+ * @param opaque custom memory manager handle provided by client
+ * @param size requested memory region size; can not be @c 0
+ * @returns @c 0 in the case of failure
+ * @returns a valid pointer to a memory region of at least @p size bytes
+ *          long otherwise
+ */
+typedef void* (*brotli_alloc_func)(void* opaque, size_t size);
+
+/**
+ * Deallocating function pointer type.
+ *
+ * This function @b SHOULD do nothing if @p address is @c 0.
+ *
+ * @param opaque custom memory manager handle provided by client
+ * @param address memory region pointer returned by ::brotli_alloc_func, or @c 0
+ */
+typedef void (*brotli_free_func)(void* opaque, void* address);
+
+#endif  /* BROTLI_COMMON_TYPES_H_ */
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/tools/brotli.c b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/tools/brotli.c
new file mode 100644
index 0000000000..04606a897d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/tools/brotli.c
@@ -0,0 +1,1107 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Command line interface for Brotli library. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include "../common/constants.h"
+#include "../common/version.h"
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <utime.h>
+#define MAKE_BINARY(FILENO) (FILENO)
+#else
+#include <io.h>
+#include <share.h>
+#include <sys/utime.h>
+
+#define MAKE_BINARY(FILENO) (_setmode((FILENO), _O_BINARY), (FILENO))
+
+#if !defined(__MINGW32__)
+#define STDIN_FILENO _fileno(stdin)
+#define STDOUT_FILENO _fileno(stdout)
+#define S_IRUSR S_IREAD
+#define S_IWUSR S_IWRITE
+#endif
+
+#define fdopen _fdopen
+#define isatty _isatty
+#define unlink _unlink
+#define utimbuf _utimbuf
+#define utime _utime
+
+#define fopen ms_fopen
+#define open ms_open
+
+#define chmod(F, P) (0)
+#define chown(F, O, G) (0)
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+#define fseek _fseeki64
+#define ftell _ftelli64
+#endif
+
+static FILE* ms_fopen(const char* filename, const char* mode) {
+  FILE* result = 0;
+  fopen_s(&result, filename, mode);
+  return result;
+}
+
+static int ms_open(const char* filename, int oflag, int pmode) {
+  int result = -1;
+  _sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode);
+  return result;
+}
+#endif  /* WIN32 */
+
+typedef enum {
+  COMMAND_COMPRESS,
+  COMMAND_DECOMPRESS,
+  COMMAND_HELP,
+  COMMAND_INVALID,
+  COMMAND_TEST_INTEGRITY,
+  COMMAND_NOOP,
+  COMMAND_VERSION
+} Command;
+
+#define DEFAULT_LGWIN 24
+#define DEFAULT_SUFFIX ".br"
+#define MAX_OPTIONS 20
+
+typedef struct {
+  /* Parameters */
+  int quality;
+  int lgwin;
+  int verbosity;
+  BROTLI_BOOL force_overwrite;
+  BROTLI_BOOL junk_source;
+  BROTLI_BOOL copy_stat;
+  BROTLI_BOOL write_to_stdout;
+  BROTLI_BOOL test_integrity;
+  BROTLI_BOOL decompress;
+  BROTLI_BOOL large_window;
+  const char* output_path;
+  const char* suffix;
+  int not_input_indices[MAX_OPTIONS];
+  size_t longest_path_len;
+  size_t input_count;
+
+  /* Inner state */
+  int argc;
+  char** argv;
+  char* modified_path;  /* Storage for path with appended / cut suffix */
+  int iterator;
+  int ignore;
+  BROTLI_BOOL iterator_error;
+  uint8_t* buffer;
+  uint8_t* input;
+  uint8_t* output;
+  const char* current_input_path;
+  const char* current_output_path;
+  int64_t input_file_length;  /* -1, if impossible to calculate */
+  FILE* fin;
+  FILE* fout;
+
+  /* I/O buffers */
+  size_t available_in;
+  const uint8_t* next_in;
+  size_t available_out;
+  uint8_t* next_out;
+
+  /* Reporting */
+  /* size_t would be large enough,
+     until 4GiB+ files are compressed / decompressed on 32-bit CPUs. */
+  size_t total_in;
+  size_t total_out;
+} Context;
+
+/* Parse up to 5 decimal digits. */
+static BROTLI_BOOL ParseInt(const char* s, int low, int high, int* result) {
+  int value = 0;
+  int i;
+  for (i = 0; i < 5; ++i) {
+    char c = s[i];
+    if (c == 0) break;
+    if (s[i] < '0' || s[i] > '9') return BROTLI_FALSE;
+    value = (10 * value) + (c - '0');
+  }
+  if (i == 0) return BROTLI_FALSE;
+  if (i > 1 && s[0] == '0') return BROTLI_FALSE;
+  if (s[i] != 0) return BROTLI_FALSE;
+  if (value < low || value > high) return BROTLI_FALSE;
+  *result = value;
+  return BROTLI_TRUE;
+}
+
+/* Returns "base file name" or its tail, if it contains '/' or '\'. */
+static const char* FileName(const char* path) {
+  const char* separator_position = strrchr(path, '/');
+  if (separator_position) path = separator_position + 1;
+  separator_position = strrchr(path, '\\');
+  if (separator_position) path = separator_position + 1;
+  return path;
+}
+
+/* Detect if the program name is a special alias that infers a command type. */
+static Command ParseAlias(const char* name) {
+  /* TODO: cast name to lower case? */
+  const char* unbrotli = "unbrotli";
+  size_t unbrotli_len = strlen(unbrotli);
+  name = FileName(name);
+  /* Partial comparison. On Windows there could be ".exe" suffix. */
+  if (strncmp(name, unbrotli, unbrotli_len) == 0) {
+    char terminator = name[unbrotli_len];
+    if (terminator == 0 || terminator == '.') return COMMAND_DECOMPRESS;
+  }
+  return COMMAND_COMPRESS;
+}
+
+static Command ParseParams(Context* params) {
+  int argc = params->argc;
+  char** argv = params->argv;
+  int i;
+  int next_option_index = 0;
+  size_t input_count = 0;
+  size_t longest_path_len = 1;
+  BROTLI_BOOL command_set = BROTLI_FALSE;
+  BROTLI_BOOL quality_set = BROTLI_FALSE;
+  BROTLI_BOOL output_set = BROTLI_FALSE;
+  BROTLI_BOOL keep_set = BROTLI_FALSE;
+  BROTLI_BOOL lgwin_set = BROTLI_FALSE;
+  BROTLI_BOOL suffix_set = BROTLI_FALSE;
+  BROTLI_BOOL after_dash_dash = BROTLI_FALSE;
+  Command command = ParseAlias(argv[0]);
+
+  for (i = 1; i < argc; ++i) {
+    const char* arg = argv[i];
+    /* C99 5.1.2.2.1: "members argv[0] through argv[argc-1] inclusive shall
+       contain pointers to strings"; NULL and 0-length are not forbidden. */
+    size_t arg_len = arg ? strlen(arg) : 0;
+
+    if (arg_len == 0) {
+      params->not_input_indices[next_option_index++] = i;
+      continue;
+    }
+
+    /* Too many options. The expected longest option list is:
+       "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total.
+       This check is an additional guard that is never triggered, but provides
+       a guard for future changes. */
+    if (next_option_index > (MAX_OPTIONS - 2)) {
+      fprintf(stderr, "too many options passed\n");
+      return COMMAND_INVALID;
+    }
+
+    /* Input file entry. */
+    if (after_dash_dash || arg[0] != '-' || arg_len == 1) {
+      input_count++;
+      if (longest_path_len < arg_len) longest_path_len = arg_len;
+      continue;
+    }
+
+    /* Not a file entry. */
+    params->not_input_indices[next_option_index++] = i;
+
+    /* '--' entry stop parsing arguments. */
+    if (arg_len == 2 && arg[1] == '-') {
+      after_dash_dash = BROTLI_TRUE;
+      continue;
+    }
+
+    /* Simple / coalesced options. */
+    if (arg[1] != '-') {
+      size_t j;
+      for (j = 1; j < arg_len; ++j) {
+        char c = arg[j];
+        if (c >= '0' && c <= '9') {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = BROTLI_TRUE;
+          params->quality = c - '0';
+          continue;
+        } else if (c == 'c') {
+          if (output_set) {
+            fprintf(stderr, "write to standard output already set\n");
+            return COMMAND_INVALID;
+          }
+          output_set = BROTLI_TRUE;
+          params->write_to_stdout = BROTLI_TRUE;
+          continue;
+        } else if (c == 'd') {
+          if (command_set) {
+            fprintf(stderr, "command already set when parsing -d\n");
+            return COMMAND_INVALID;
+          }
+          command_set = BROTLI_TRUE;
+          command = COMMAND_DECOMPRESS;
+          continue;
+        } else if (c == 'f') {
+          if (params->force_overwrite) {
+            fprintf(stderr, "force output overwrite already set\n");
+            return COMMAND_INVALID;
+          }
+          params->force_overwrite = BROTLI_TRUE;
+          continue;
+        } else if (c == 'h') {
+          /* Don't parse further. */
+          return COMMAND_HELP;
+        } else if (c == 'j' || c == 'k') {
+          if (keep_set) {
+            fprintf(stderr, "argument --rm / -j or --keep / -k already set\n");
+            return COMMAND_INVALID;
+          }
+          keep_set = BROTLI_TRUE;
+          params->junk_source = TO_BROTLI_BOOL(c == 'j');
+          continue;
+        } else if (c == 'n') {
+          if (!params->copy_stat) {
+            fprintf(stderr, "argument --no-copy-stat / -n already set\n");
+            return COMMAND_INVALID;
+          }
+          params->copy_stat = BROTLI_FALSE;
+          continue;
+        } else if (c == 't') {
+          if (command_set) {
+            fprintf(stderr, "command already set when parsing -t\n");
+            return COMMAND_INVALID;
+          }
+          command_set = BROTLI_TRUE;
+          command = COMMAND_TEST_INTEGRITY;
+          continue;
+        } else if (c == 'v') {
+          if (params->verbosity > 0) {
+            fprintf(stderr, "argument --verbose / -v already set\n");
+            return COMMAND_INVALID;
+          }
+          params->verbosity = 1;
+          continue;
+        } else if (c == 'V') {
+          /* Don't parse further. */
+          return COMMAND_VERSION;
+        } else if (c == 'Z') {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = BROTLI_TRUE;
+          params->quality = 11;
+          continue;
+        }
+        /* o/q/w/D/S with parameter is expected */
+        if (c != 'o' && c != 'q' && c != 'w' && c != 'D' && c != 'S') {
+          fprintf(stderr, "invalid argument -%c\n", c);
+          return COMMAND_INVALID;
+        }
+        if (j + 1 != arg_len) {
+          fprintf(stderr, "expected parameter for argument -%c\n", c);
+          return COMMAND_INVALID;
+        }
+        i++;
+        if (i == argc || !argv[i] || argv[i][0] == 0) {
+          fprintf(stderr, "expected parameter for argument -%c\n", c);
+          return COMMAND_INVALID;
+        }
+        params->not_input_indices[next_option_index++] = i;
+        if (c == 'o') {
+          if (output_set) {
+            fprintf(stderr, "write to standard output already set (-o)\n");
+            return COMMAND_INVALID;
+          }
+          params->output_path = argv[i];
+        } else if (c == 'q') {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = ParseInt(argv[i], BROTLI_MIN_QUALITY,
+                                 BROTLI_MAX_QUALITY, &params->quality);
+          if (!quality_set) {
+            fprintf(stderr, "error parsing quality value [%s]\n", argv[i]);
+            return COMMAND_INVALID;
+          }
+        } else if (c == 'w') {
+          if (lgwin_set) {
+            fprintf(stderr, "lgwin parameter already set\n");
+            return COMMAND_INVALID;
+          }
+          lgwin_set = ParseInt(argv[i], 0,
+                               BROTLI_MAX_WINDOW_BITS, &params->lgwin);
+          if (!lgwin_set) {
+            fprintf(stderr, "error parsing lgwin value [%s]\n", argv[i]);
+            return COMMAND_INVALID;
+          }
+          if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+            fprintf(stderr,
+                    "lgwin parameter (%d) smaller than the minimum (%d)\n",
+                    params->lgwin, BROTLI_MIN_WINDOW_BITS);
+            return COMMAND_INVALID;
+          }
+        } else if (c == 'S') {
+          if (suffix_set) {
+            fprintf(stderr, "suffix already set\n");
+            return COMMAND_INVALID;
+          }
+          suffix_set = BROTLI_TRUE;
+          params->suffix = argv[i];
+        }
+      }
+    } else {  /* Double-dash. */
+      arg = &arg[2];
+      if (strcmp("best", arg) == 0) {
+        if (quality_set) {
+          fprintf(stderr, "quality already set\n");
+          return COMMAND_INVALID;
+        }
+        quality_set = BROTLI_TRUE;
+        params->quality = 11;
+      } else if (strcmp("decompress", arg) == 0) {
+        if (command_set) {
+          fprintf(stderr, "command already set when parsing --decompress\n");
+          return COMMAND_INVALID;
+        }
+        command_set = BROTLI_TRUE;
+        command = COMMAND_DECOMPRESS;
+      } else if (strcmp("force", arg) == 0) {
+        if (params->force_overwrite) {
+          fprintf(stderr, "force output overwrite already set\n");
+          return COMMAND_INVALID;
+        }
+        params->force_overwrite = BROTLI_TRUE;
+      } else if (strcmp("help", arg) == 0) {
+        /* Don't parse further. */
+        return COMMAND_HELP;
+      } else if (strcmp("keep", arg) == 0) {
+        if (keep_set) {
+          fprintf(stderr, "argument --rm / -j or --keep / -k already set\n");
+          return COMMAND_INVALID;
+        }
+        keep_set = BROTLI_TRUE;
+        params->junk_source = BROTLI_FALSE;
+      } else if (strcmp("no-copy-stat", arg) == 0) {
+        if (!params->copy_stat) {
+          fprintf(stderr, "argument --no-copy-stat / -n already set\n");
+          return COMMAND_INVALID;
+        }
+        params->copy_stat = BROTLI_FALSE;
+      } else if (strcmp("rm", arg) == 0) {
+        if (keep_set) {
+          fprintf(stderr, "argument --rm / -j or --keep / -k already set\n");
+          return COMMAND_INVALID;
+        }
+        keep_set = BROTLI_TRUE;
+        params->junk_source = BROTLI_TRUE;
+      } else if (strcmp("stdout", arg) == 0) {
+        if (output_set) {
+          fprintf(stderr, "write to standard output already set\n");
+          return COMMAND_INVALID;
+        }
+        output_set = BROTLI_TRUE;
+        params->write_to_stdout = BROTLI_TRUE;
+      } else if (strcmp("test", arg) == 0) {
+        if (command_set) {
+          fprintf(stderr, "command already set when parsing --test\n");
+          return COMMAND_INVALID;
+        }
+        command_set = BROTLI_TRUE;
+        command = COMMAND_TEST_INTEGRITY;
+      } else if (strcmp("verbose", arg) == 0) {
+        if (params->verbosity > 0) {
+          fprintf(stderr, "argument --verbose / -v already set\n");
+          return COMMAND_INVALID;
+        }
+        params->verbosity = 1;
+      } else if (strcmp("version", arg) == 0) {
+        /* Don't parse further. */
+        return COMMAND_VERSION;
+      } else {
+        /* key=value */
+        const char* value = strrchr(arg, '=');
+        size_t key_len;
+        if (!value || value[1] == 0) {
+          fprintf(stderr, "must pass the parameter as --%s=value\n", arg);
+          return COMMAND_INVALID;
+        }
+        key_len = (size_t)(value - arg);
+        value++;
+        if (strncmp("lgwin", arg, key_len) == 0) {
+          if (lgwin_set) {
+            fprintf(stderr, "lgwin parameter already set\n");
+            return COMMAND_INVALID;
+          }
+          lgwin_set = ParseInt(value, 0,
+                               BROTLI_MAX_WINDOW_BITS, &params->lgwin);
+          if (!lgwin_set) {
+            fprintf(stderr, "error parsing lgwin value [%s]\n", value);
+            return COMMAND_INVALID;
+          }
+          if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+            fprintf(stderr,
+                    "lgwin parameter (%d) smaller than the minimum (%d)\n",
+                    params->lgwin, BROTLI_MIN_WINDOW_BITS);
+            return COMMAND_INVALID;
+          }
+        } else if (strncmp("large_window", arg, key_len) == 0) {
+          /* This option is intentionally not mentioned in help. */
+          if (lgwin_set) {
+            fprintf(stderr, "lgwin parameter already set\n");
+            return COMMAND_INVALID;
+          }
+          lgwin_set = ParseInt(value, 0,
+                               BROTLI_LARGE_MAX_WINDOW_BITS, &params->lgwin);
+          if (!lgwin_set) {
+            fprintf(stderr, "error parsing lgwin value [%s]\n", value);
+            return COMMAND_INVALID;
+          }
+          if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+            fprintf(stderr,
+                    "lgwin parameter (%d) smaller than the minimum (%d)\n",
+                    params->lgwin, BROTLI_MIN_WINDOW_BITS);
+            return COMMAND_INVALID;
+          }
+        } else if (strncmp("output", arg, key_len) == 0) {
+          if (output_set) {
+            fprintf(stderr,
+                    "write to standard output already set (--output)\n");
+            return COMMAND_INVALID;
+          }
+          params->output_path = value;
+        } else if (strncmp("quality", arg, key_len) == 0) {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = ParseInt(value, BROTLI_MIN_QUALITY,
+                                 BROTLI_MAX_QUALITY, &params->quality);
+          if (!quality_set) {
+            fprintf(stderr, "error parsing quality value [%s]\n", value);
+            return COMMAND_INVALID;
+          }
+        } else if (strncmp("suffix", arg, key_len) == 0) {
+          if (suffix_set) {
+            fprintf(stderr, "suffix already set\n");
+            return COMMAND_INVALID;
+          }
+          suffix_set = BROTLI_TRUE;
+          params->suffix = value;
+        } else {
+          fprintf(stderr, "invalid parameter: [%s]\n", arg);
+          return COMMAND_INVALID;
+        }
+      }
+    }
+  }
+
+  params->input_count = input_count;
+  params->longest_path_len = longest_path_len;
+  params->decompress = (command == COMMAND_DECOMPRESS);
+  params->test_integrity = (command == COMMAND_TEST_INTEGRITY);
+
+  if (input_count > 1 && output_set) return COMMAND_INVALID;
+  if (params->test_integrity) {
+    if (params->output_path) return COMMAND_INVALID;
+    if (params->write_to_stdout) return COMMAND_INVALID;
+  }
+  if (strchr(params->suffix, '/') || strchr(params->suffix, '\\')) {
+    return COMMAND_INVALID;
+  }
+
+  return command;
+}
+
+static void PrintVersion(void) {
+  int major = BROTLI_VERSION >> 24;
+  int minor = (BROTLI_VERSION >> 12) & 0xFFF;
+  int patch = BROTLI_VERSION & 0xFFF;
+  fprintf(stdout, "brotli %d.%d.%d\n", major, minor, patch);
+}
+
+static void PrintHelp(const char* name, BROTLI_BOOL error) {
+  FILE* media = error ? stderr : stdout;
+  /* String is cut to pieces with length less than 509, to conform C90 spec. */
+  fprintf(media,
+"Usage: %s [OPTION]... [FILE]...\n",
+          name);
+  fprintf(media,
+"Options:\n"
+"  -#                          compression level (0-9)\n"
+"  -c, --stdout                write on standard output\n"
+"  -d, --decompress            decompress\n"
+"  -f, --force                 force output file overwrite\n"
+"  -h, --help                  display this help and exit\n");
+  fprintf(media,
+"  -j, --rm                    remove source file(s)\n"
+"  -k, --keep                  keep source file(s) (default)\n"
+"  -n, --no-copy-stat          do not copy source file(s) attributes\n"
+"  -o FILE, --output=FILE      output file (only if 1 input file)\n");
+  fprintf(media,
+"  -q NUM, --quality=NUM       compression level (%d-%d)\n",
+          BROTLI_MIN_QUALITY, BROTLI_MAX_QUALITY);
+  fprintf(media,
+"  -t, --test                  test compressed file integrity\n"
+"  -v, --verbose               verbose mode\n");
+  fprintf(media,
+"  -w NUM, --lgwin=NUM         set LZ77 window size (0, %d-%d)\n"
+"                              window size = 2**NUM - 16\n"
+"                              0 lets compressor choose the optimal value\n",
+          BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS);
+  fprintf(media,
+"  --large_window=NUM          use incompatible large-window brotli\n"
+"                              bitstream with window size (0, %d-%d)\n"
+"                              WARNING: this format is not compatible\n"
+"                              with brotli RFC 7932 and may not be\n"
+"                              decodable with regular brotli decoders\n",
+          BROTLI_MIN_WINDOW_BITS, BROTLI_LARGE_MAX_WINDOW_BITS);
+  fprintf(media,
+"  -S SUF, --suffix=SUF        output file suffix (default:'%s')\n",
+          DEFAULT_SUFFIX);
+  fprintf(media,
+"  -V, --version               display version and exit\n"
+"  -Z, --best                  use best compression level (11) (default)\n"
+"Simple options could be coalesced, i.e. '-9kf' is equivalent to '-9 -k -f'.\n"
+"With no FILE, or when FILE is -, read standard input.\n"
+"All arguments after '--' are treated as files.\n");
+}
+
+static const char* PrintablePath(const char* path) {
+  return path ? path : "con";
+}
+
+static BROTLI_BOOL OpenInputFile(const char* input_path, FILE** f) {
+  *f = NULL;
+  if (!input_path) {
+    *f = fdopen(MAKE_BINARY(STDIN_FILENO), "rb");
+    return BROTLI_TRUE;
+  }
+  *f = fopen(input_path, "rb");
+  if (!*f) {
+    fprintf(stderr, "failed to open input file [%s]: %s\n",
+            PrintablePath(input_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL OpenOutputFile(const char* output_path, FILE** f,
+                                  BROTLI_BOOL force) {
+  int fd;
+  *f = NULL;
+  if (!output_path) {
+    *f = fdopen(MAKE_BINARY(STDOUT_FILENO), "wb");
+    return BROTLI_TRUE;
+  }
+  fd = open(output_path, O_CREAT | (force ? 0 : O_EXCL) | O_WRONLY | O_TRUNC,
+            S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    fprintf(stderr, "failed to open output file [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  *f = fdopen(fd, "wb");
+  if (!*f) {
+    fprintf(stderr, "failed to open output file [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static int64_t FileSize(const char* path) {
+  FILE* f = fopen(path, "rb");
+  int64_t retval;
+  if (f == NULL) {
+    return -1;
+  }
+  if (fseek(f, 0L, SEEK_END) != 0) {
+    fclose(f);
+    return -1;
+  }
+  retval = ftell(f);
+  if (fclose(f) != 0) {
+    return -1;
+  }
+  return retval;
+}
+
+/* Copy file times and permissions.
+   TODO: this is a "best effort" implementation; honest cross-platform
+   fully featured implementation is way too hacky; add more hacks by request. */
+static void CopyStat(const char* input_path, const char* output_path) {
+  struct stat statbuf;
+  struct utimbuf times;
+  int res;
+  if (input_path == 0 || output_path == 0) {
+    return;
+  }
+  if (stat(input_path, &statbuf) != 0) {
+    return;
+  }
+  times.actime = statbuf.st_atime;
+  times.modtime = statbuf.st_mtime;
+  utime(output_path, &times);
+  res = chmod(output_path, statbuf.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO));
+  if (res != 0) {
+    fprintf(stderr, "setting access bits failed for [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+  }
+  res = chown(output_path, (uid_t)-1, statbuf.st_gid);
+  if (res != 0) {
+    fprintf(stderr, "setting group failed for [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+  }
+  res = chown(output_path, statbuf.st_uid, (gid_t)-1);
+  if (res != 0) {
+    fprintf(stderr, "setting user failed for [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+  }
+}
+
+static BROTLI_BOOL NextFile(Context* context) {
+  const char* arg;
+  size_t arg_len;
+
+  /* Iterator points to last used arg; increment to search for the next one. */
+  context->iterator++;
+
+  context->input_file_length = -1;
+
+  /* No input path; read from console. */
+  if (context->input_count == 0) {
+    if (context->iterator > 1) return BROTLI_FALSE;
+    context->current_input_path = NULL;
+    /* Either write to the specified path, or to console. */
+    context->current_output_path = context->output_path;
+    return BROTLI_TRUE;
+  }
+
+  /* Skip option arguments. */
+  while (context->iterator == context->not_input_indices[context->ignore]) {
+    context->iterator++;
+    context->ignore++;
+  }
+
+  /* All args are scanned already. */
+  if (context->iterator >= context->argc) return BROTLI_FALSE;
+
+  /* Iterator now points to the input file name. */
+  arg = context->argv[context->iterator];
+  arg_len = strlen(arg);
+  /* Read from console. */
+  if (arg_len == 1 && arg[0] == '-') {
+    context->current_input_path = NULL;
+    context->current_output_path = context->output_path;
+    return BROTLI_TRUE;
+  }
+
+  context->current_input_path = arg;
+  context->input_file_length = FileSize(arg);
+  context->current_output_path = context->output_path;
+
+  if (context->output_path) return BROTLI_TRUE;
+  if (context->write_to_stdout) return BROTLI_TRUE;
+
+  strcpy(context->modified_path, arg);
+  context->current_output_path = context->modified_path;
+  /* If output is not specified, input path suffix should match. */
+  if (context->decompress) {
+    size_t suffix_len = strlen(context->suffix);
+    char* name = (char*)FileName(context->modified_path);
+    char* name_suffix;
+    size_t name_len = strlen(name);
+    if (name_len < suffix_len + 1) {
+      fprintf(stderr, "empty output file name for [%s] input file\n",
+              PrintablePath(arg));
+      context->iterator_error = BROTLI_TRUE;
+      return BROTLI_FALSE;
+    }
+    name_suffix = name + name_len - suffix_len;
+    if (strcmp(context->suffix, name_suffix) != 0) {
+      fprintf(stderr, "input file [%s] suffix mismatch\n",
+              PrintablePath(arg));
+      context->iterator_error = BROTLI_TRUE;
+      return BROTLI_FALSE;
+    }
+    name_suffix[0] = 0;
+    return BROTLI_TRUE;
+  } else {
+    strcpy(context->modified_path + arg_len, context->suffix);
+    return BROTLI_TRUE;
+  }
+}
+
+static BROTLI_BOOL OpenFiles(Context* context) {
+  BROTLI_BOOL is_ok = OpenInputFile(context->current_input_path, &context->fin);
+  if (!context->test_integrity && is_ok) {
+    is_ok = OpenOutputFile(
+        context->current_output_path, &context->fout, context->force_overwrite);
+  }
+  return is_ok;
+}
+
+static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) {
+  BROTLI_BOOL is_ok = BROTLI_TRUE;
+  if (!context->test_integrity && context->fout) {
+    if (!success && context->current_output_path) {
+      unlink(context->current_output_path);
+    }
+    if (fclose(context->fout) != 0) {
+      if (success) {
+        fprintf(stderr, "fclose failed [%s]: %s\n",
+                PrintablePath(context->current_output_path), strerror(errno));
+      }
+      is_ok = BROTLI_FALSE;
+    }
+
+    /* TOCTOU violation, but otherwise it is impossible to set file times. */
+    if (success && is_ok && context->copy_stat) {
+      CopyStat(context->current_input_path, context->current_output_path);
+    }
+  }
+
+  if (context->fin) {
+    if (fclose(context->fin) != 0) {
+      if (is_ok) {
+        fprintf(stderr, "fclose failed [%s]: %s\n",
+                PrintablePath(context->current_input_path), strerror(errno));
+      }
+      is_ok = BROTLI_FALSE;
+    }
+  }
+  if (success && context->junk_source && context->current_input_path) {
+    unlink(context->current_input_path);
+  }
+
+  context->fin = NULL;
+  context->fout = NULL;
+
+  return is_ok;
+}
+
+static const size_t kFileBufferSize = 1 << 19;
+
+static void InitializeBuffers(Context* context) {
+  context->available_in = 0;
+  context->next_in = NULL;
+  context->available_out = kFileBufferSize;
+  context->next_out = context->output;
+  context->total_in = 0;
+  context->total_out = 0;
+}
+
+static BROTLI_BOOL HasMoreInput(Context* context) {
+  return feof(context->fin) ? BROTLI_FALSE : BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideInput(Context* context) {
+  context->available_in =
+      fread(context->input, 1, kFileBufferSize, context->fin);
+  context->total_in += context->available_in;
+  context->next_in = context->input;
+  if (ferror(context->fin)) {
+    fprintf(stderr, "failed to read input [%s]: %s\n",
+            PrintablePath(context->current_input_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+/* Internal: should be used only in Provide-/Flush-Output. */
+static BROTLI_BOOL WriteOutput(Context* context) {
+  size_t out_size = (size_t)(context->next_out - context->output);
+  context->total_out += out_size;
+  if (out_size == 0) return BROTLI_TRUE;
+  if (context->test_integrity) return BROTLI_TRUE;
+
+  fwrite(context->output, 1, out_size, context->fout);
+  if (ferror(context->fout)) {
+    fprintf(stderr, "failed to write output [%s]: %s\n",
+            PrintablePath(context->current_output_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideOutput(Context* context) {
+  if (!WriteOutput(context)) return BROTLI_FALSE;
+  context->available_out = kFileBufferSize;
+  context->next_out = context->output;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL FlushOutput(Context* context) {
+  if (!WriteOutput(context)) return BROTLI_FALSE;
+  context->available_out = 0;
+  return BROTLI_TRUE;
+}
+
+static void PrintBytes(size_t value) {
+  if (value < 1024) {
+    fprintf(stderr, "%d B", (int)value);
+  } else if (value < 1048576) {
+    fprintf(stderr, "%0.3f KiB", (double)value / 1024.0);
+  } else if (value < 1073741824) {
+    fprintf(stderr, "%0.3f MiB", (double)value / 1048576.0);
+  } else {
+    fprintf(stderr, "%0.3f GiB", (double)value / 1073741824.0);
+  }
+}
+
+static void PrintFileProcessingProgress(Context* context) {
+  fprintf(stderr, "[%s]: ", PrintablePath(context->current_input_path));
+  PrintBytes(context->total_in);
+  fprintf(stderr, " -> ");
+  PrintBytes(context->total_out);
+}
+
+static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
+  BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+  InitializeBuffers(context);
+  for (;;) {
+    if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+      if (!HasMoreInput(context)) {
+        fprintf(stderr, "corrupt input [%s]\n",
+                PrintablePath(context->current_input_path));
+        return BROTLI_FALSE;
+      }
+      if (!ProvideInput(context)) return BROTLI_FALSE;
+    } else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+      if (!ProvideOutput(context)) return BROTLI_FALSE;
+    } else if (result == BROTLI_DECODER_RESULT_SUCCESS) {
+      if (!FlushOutput(context)) return BROTLI_FALSE;
+      if (context->available_in != 0 || HasMoreInput(context)) {
+        fprintf(stderr, "corrupt input [%s]\n",
+                PrintablePath(context->current_input_path));
+        return BROTLI_FALSE;
+      }
+      if (context->verbosity > 0) {
+        fprintf(stderr, "Decompressed ");
+        PrintFileProcessingProgress(context);
+        fprintf(stderr, "\n");
+      }
+      return BROTLI_TRUE;
+    } else {
+      fprintf(stderr, "corrupt input [%s]\n",
+              PrintablePath(context->current_input_path));
+      return BROTLI_FALSE;
+    }
+
+    result = BrotliDecoderDecompressStream(s, &context->available_in,
+        &context->next_in, &context->available_out, &context->next_out, 0);
+  }
+}
+
+static BROTLI_BOOL DecompressFiles(Context* context) {
+  while (NextFile(context)) {
+    BROTLI_BOOL is_ok = BROTLI_TRUE;
+    BrotliDecoderState* s = BrotliDecoderCreateInstance(NULL, NULL, NULL);
+    if (!s) {
+      fprintf(stderr, "out of memory\n");
+      return BROTLI_FALSE;
+    }
+    /* This allows decoding "large-window" streams. Though it creates
+       fragmentation (new builds decode streams that old builds don't),
+       it is better from used experience perspective. */
+    BrotliDecoderSetParameter(s, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u);
+    is_ok = OpenFiles(context);
+    if (is_ok && !context->current_input_path &&
+        !context->force_overwrite && isatty(STDIN_FILENO)) {
+      fprintf(stderr, "Use -h help. Use -f to force input from a terminal.\n");
+      is_ok = BROTLI_FALSE;
+    }
+    if (is_ok) is_ok = DecompressFile(context, s);
+    BrotliDecoderDestroyInstance(s);
+    if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
+    if (!is_ok) return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
+  BROTLI_BOOL is_eof = BROTLI_FALSE;
+  InitializeBuffers(context);
+  for (;;) {
+    if (context->available_in == 0 && !is_eof) {
+      if (!ProvideInput(context)) return BROTLI_FALSE;
+      is_eof = !HasMoreInput(context);
+    }
+
+    if (!BrotliEncoderCompressStream(s,
+        is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+        &context->available_in, &context->next_in,
+        &context->available_out, &context->next_out, NULL)) {
+      /* Should detect OOM? */
+      fprintf(stderr, "failed to compress data [%s]\n",
+              PrintablePath(context->current_input_path));
+      return BROTLI_FALSE;
+    }
+
+    if (context->available_out == 0) {
+      if (!ProvideOutput(context)) return BROTLI_FALSE;
+    }
+
+    if (BrotliEncoderIsFinished(s)) {
+      if (!FlushOutput(context)) return BROTLI_FALSE;
+      if (context->verbosity > 0) {
+        fprintf(stderr, "Compressed ");
+        PrintFileProcessingProgress(context);
+        fprintf(stderr, "\n");
+      }
+      return BROTLI_TRUE;
+    }
+  }
+}
+
+static BROTLI_BOOL CompressFiles(Context* context) {
+  while (NextFile(context)) {
+    BROTLI_BOOL is_ok = BROTLI_TRUE;
+    BrotliEncoderState* s = BrotliEncoderCreateInstance(NULL, NULL, NULL);
+    if (!s) {
+      fprintf(stderr, "out of memory\n");
+      return BROTLI_FALSE;
+    }
+    BrotliEncoderSetParameter(s,
+        BROTLI_PARAM_QUALITY, (uint32_t)context->quality);
+    if (context->lgwin > 0) {
+      /* Specified by user. */
+      /* Do not enable "large-window" extension, if not required. */
+      if (context->lgwin > BROTLI_MAX_WINDOW_BITS) {
+        BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, 1u);
+      }
+      BrotliEncoderSetParameter(s,
+          BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin);
+    } else {
+      /* 0, or not specified by user; could be chosen by compressor. */
+      uint32_t lgwin = DEFAULT_LGWIN;
+      /* Use file size to limit lgwin. */
+      if (context->input_file_length >= 0) {
+        lgwin = BROTLI_MIN_WINDOW_BITS;
+        while (BROTLI_MAX_BACKWARD_LIMIT(lgwin) <
+               (uint64_t)context->input_file_length) {
+          lgwin++;
+          if (lgwin == BROTLI_MAX_WINDOW_BITS) break;
+        }
+      }
+      BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, lgwin);
+    }
+    if (context->input_file_length > 0) {
+      uint32_t size_hint = context->input_file_length < (1 << 30) ?
+          (uint32_t)context->input_file_length : (1u << 30);
+      BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint);
+    }
+    is_ok = OpenFiles(context);
+    if (is_ok && !context->current_output_path &&
+        !context->force_overwrite && isatty(STDOUT_FILENO)) {
+      fprintf(stderr, "Use -h help. Use -f to force output to a terminal.\n");
+      is_ok = BROTLI_FALSE;
+    }
+    if (is_ok) is_ok = CompressFile(context, s);
+    BrotliEncoderDestroyInstance(s);
+    if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
+    if (!is_ok) return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+int main(int argc, char** argv) {
+  Command command;
+  Context context;
+  BROTLI_BOOL is_ok = BROTLI_TRUE;
+  int i;
+
+  context.quality = 11;
+  context.lgwin = -1;
+  context.verbosity = 0;
+  context.force_overwrite = BROTLI_FALSE;
+  context.junk_source = BROTLI_FALSE;
+  context.copy_stat = BROTLI_TRUE;
+  context.test_integrity = BROTLI_FALSE;
+  context.write_to_stdout = BROTLI_FALSE;
+  context.decompress = BROTLI_FALSE;
+  context.large_window = BROTLI_FALSE;
+  context.output_path = NULL;
+  context.suffix = DEFAULT_SUFFIX;
+  for (i = 0; i < MAX_OPTIONS; ++i) context.not_input_indices[i] = 0;
+  context.longest_path_len = 1;
+  context.input_count = 0;
+
+  context.argc = argc;
+  context.argv = argv;
+  context.modified_path = NULL;
+  context.iterator = 0;
+  context.ignore = 0;
+  context.iterator_error = BROTLI_FALSE;
+  context.buffer = NULL;
+  context.current_input_path = NULL;
+  context.current_output_path = NULL;
+  context.fin = NULL;
+  context.fout = NULL;
+
+  command = ParseParams(&context);
+
+  if (command == COMMAND_COMPRESS || command == COMMAND_DECOMPRESS ||
+      command == COMMAND_TEST_INTEGRITY) {
+    if (is_ok) {
+      size_t modified_path_len =
+          context.longest_path_len + strlen(context.suffix) + 1;
+      context.modified_path = (char*)malloc(modified_path_len);
+      context.buffer = (uint8_t*)malloc(kFileBufferSize * 2);
+      if (!context.modified_path || !context.buffer) {
+        fprintf(stderr, "out of memory\n");
+        is_ok = BROTLI_FALSE;
+      } else {
+        context.input = context.buffer;
+        context.output = context.buffer + kFileBufferSize;
+      }
+    }
+  }
+
+  if (!is_ok) command = COMMAND_NOOP;
+
+  switch (command) {
+    case COMMAND_NOOP:
+      break;
+
+    case COMMAND_VERSION:
+      PrintVersion();
+      break;
+
+    case COMMAND_COMPRESS:
+      is_ok = CompressFiles(&context);
+      break;
+
+    case COMMAND_DECOMPRESS:
+    case COMMAND_TEST_INTEGRITY:
+      is_ok = DecompressFiles(&context);
+      break;
+
+    case COMMAND_HELP:
+    case COMMAND_INVALID:
+    default:
+      is_ok = (command == COMMAND_HELP);
+      PrintHelp(FileName(argv[0]), is_ok);
+      break;
+  }
+
+  if (context.iterator_error) is_ok = BROTLI_FALSE;
+
+  free(context.modified_path);
+  free(context.buffer);
+
+  if (!is_ok) exit(1);
+  return 0;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/tools/brotli.md b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/tools/brotli.md
new file mode 100644
index 0000000000..c029869bce
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/brotli/c/tools/brotli.md
@@ -0,0 +1,107 @@
+brotli(1) -- brotli, unbrotli - compress or decompress files
+================================================================
+
+SYNOPSIS
+--------
+
+`brotli` [*OPTION|FILE*]...
+
+`unbrotli` is equivalent to `brotli --decompress`
+
+DESCRIPTION
+-----------
+`brotli` is a generic-purpose lossless compression algorithm that compresses
+data using a combination of a modern variant of the **LZ77** algorithm, Huffman
+coding and 2-nd order context modeling, with a compression ratio comparable to
+the best currently available general-purpose compression methods. It is similar
+in speed with deflate but offers more dense compression.
+
+`brotli` command line syntax similar to `gzip (1)` and `zstd (1)`.
+Unlike `gzip (1)`, source files are preserved by default. It is possible to
+remove them after processing by using the `--rm` _option_.
+
+Arguments that look like "`--name`" or "`--name=value`" are _options_. Every
+_option_ has a short form "`-x`" or "`-x value`". Multiple short form _options_
+could be coalesced:
+
+* "`--decompress --stdout --suffix=.b`" works the same as
+* "`-d -s -S .b`" and
+* "`-dsS .b`"
+
+`brotli` has 3 operation modes:
+
+* default mode is compression;
+* `--decompress` option activates decompression mode;
+* `--test` option switches to integrity test mode; this option is equivalent to
+  "`--decompress --stdout`" except that the decompressed data is discarded
+  instead of being written to standard output.
+
+Every non-option argument is a _file_ entry. If no _files_ are given or _file_
+is "`-`", `brotli` reads from standard input. All arguments after "`--`" are
+_file_ entries.
+
+Unless `--stdout` or `--output` is specified, _files_ are written to a new file
+whose name is derived from the source _file_ name:
+
+* when compressing, a suffix is appended to the source filename to
+  get the target filename
+* when decompressing, a suffix is removed from the source filename to
+  get the target filename
+
+Default suffix is `.br`, but it could be specified with `--suffix` option.
+
+Conflicting or duplicate _options_ are not allowed.
+
+OPTIONS
+-------
+
+* `-#`:
+    compression level (0-9); bigger values cause denser, but slower compression
+* `-c`, `--stdout`:
+    write on standard output
+* `-d`, `--decompress`:
+    decompress mode
+* `-f`, `--force`:
+    force output file overwrite
+* `-h`, `--help`:
+    display this help and exit
+* `-j`, `--rm`:
+    remove source file(s); `gzip (1)`-like behaviour
+* `-k`, `--keep`:
+    keep source file(s); `zstd (1)`-like behaviour
+* `-n`, `--no-copy-stat`:
+    do not copy source file(s) attributes
+* `-o FILE`, `--output=FILE`
+    output file; valid only if there is a single input entry
+* `-q NUM`, `--quality=NUM`:
+    compression level (0-11); bigger values cause denser, but slower compression
+* `-t`, `--test`:
+    test file integrity mode
+* `-v`, `--verbose`:
+    increase output verbosity
+* `-w NUM`, `--lgwin=NUM`:
+    set LZ77 window size (0, 10-24) (default: 22); window size is
+    `(2**NUM - 16)`; 0 lets compressor decide over the optimal value; bigger
+    windows size improve density; decoder might require up to window size
+    memory to operate
+* `-S SUF`, `--suffix=SUF`:
+    output file suffix (default: `.br`)
+* `-V`, `--version`:
+    display version and exit
+* `-Z`, `--best`:
+    use best compression level (default); same as "`-q 11`"
+
+SEE ALSO
+--------
+
+`brotli` file format is defined in
+[RFC 7932](https://www.ietf.org/rfc/rfc7932.txt).
+
+`brotli` is open-sourced under the
+[MIT License](https://opensource.org/licenses/MIT).
+
+Mailing list: https://groups.google.com/forum/#!forum/brotli
+
+BUGS
+----
+Report bugs at: https://github.com/google/brotli/issues
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/LICENSE b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/LICENSE
new file mode 100644
index 0000000000..f49a4e16e6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc
new file mode 100644
index 0000000000..bec7c3bb1b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.cc
@@ -0,0 +1,138 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/aligned_allocator.h"
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>  // malloc
+
+#include <atomic>
+#include <limits>
+
+#include "hwy/base.h"
+
+namespace hwy {
+namespace {
+
+constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, kMaxVectorSize);
+// On x86, aliasing can only occur at multiples of 2K, but that's too wasteful
+// if this is used for single-vector allocations. 256 is more reasonable.
+constexpr size_t kAlias = kAlignment * 4;
+
+#pragma pack(push, 1)
+struct AllocationHeader {
+  void* allocated;
+  size_t payload_size;
+};
+#pragma pack(pop)
+
+// Returns a 'random' (cyclical) offset for AllocateAlignedBytes.
+size_t NextAlignedOffset() {
+  static std::atomic<uint32_t> next{0};
+  constexpr uint32_t kGroups = kAlias / kAlignment;
+  const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
+  const size_t offset = kAlignment * group;
+  HWY_DASSERT((offset % kAlignment == 0) && offset <= kAlias);
+  return offset;
+}
+
+}  // namespace
+
+void* AllocateAlignedBytes(const size_t payload_size, AllocPtr alloc_ptr,
+                           void* opaque_ptr) {
+  if (payload_size >= std::numeric_limits<size_t>::max() / 2) {
+    HWY_DASSERT(false && "payload_size too large");
+    return nullptr;
+  }
+
+  size_t offset = NextAlignedOffset();
+
+  // What: | misalign | unused | AllocationHeader |payload
+  // Size: |<= kAlias | offset                    |payload_size
+  //       ^allocated.^aligned.^header............^payload
+  // The header must immediately precede payload, which must remain aligned.
+  // To avoid wasting space, the header resides at the end of `unused`,
+  // which therefore cannot be empty (offset == 0).
+  if (offset == 0) {
+    offset = kAlignment;  // = RoundUpTo(sizeof(AllocationHeader), kAlignment)
+    static_assert(sizeof(AllocationHeader) <= kAlignment, "Else: round up");
+  }
+
+  const size_t allocated_size = kAlias + offset + payload_size;
+  void* allocated;
+  if (alloc_ptr == nullptr) {
+    allocated = malloc(allocated_size);
+  } else {
+    allocated = (*alloc_ptr)(opaque_ptr, allocated_size);
+  }
+  if (allocated == nullptr) return nullptr;
+  // Always round up even if already aligned - we already asked for kAlias
+  // extra bytes and there's no way to give them back.
+  uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias;
+  static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2");
+  static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias");
+  aligned &= ~(kAlias - 1);
+
+  const uintptr_t payload = aligned + offset;  // still aligned
+
+  // Stash `allocated` and payload_size inside header for FreeAlignedBytes().
+  // The allocated_size can be reconstructed from the payload_size.
+  AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
+  header->allocated = allocated;
+  header->payload_size = payload_size;
+
+  return HWY_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), kMaxVectorSize);
+}
+
+void FreeAlignedBytes(const void* aligned_pointer, FreePtr free_ptr,
+                      void* opaque_ptr) {
+  if (aligned_pointer == nullptr) return;
+
+  const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+  HWY_DASSERT(payload % kAlignment == 0);
+  const AllocationHeader* header =
+      reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+  if (free_ptr == nullptr) {
+    free(header->allocated);
+  } else {
+    (*free_ptr)(opaque_ptr, header->allocated);
+  }
+}
+
+// static
+void AlignedDeleter::DeleteAlignedArray(void* aligned_pointer, FreePtr free_ptr,
+                                        void* opaque_ptr,
+                                        ArrayDeleter deleter) {
+  if (aligned_pointer == nullptr) return;
+
+  const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+  HWY_DASSERT(payload % kAlignment == 0);
+  const AllocationHeader* header =
+      reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+  if (deleter) {
+    (*deleter)(aligned_pointer, header->payload_size);
+  }
+
+  if (free_ptr == nullptr) {
+    free(header->allocated);
+  } else {
+    (*free_ptr)(opaque_ptr, header->allocated);
+  }
+}
+
+}  // namespace hwy
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.h
new file mode 100644
index 0000000000..1e76cefb6f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator.h
@@ -0,0 +1,206 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
+#define HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
+
+// Memory allocator with support for alignment and offsets.
+
+#include <stddef.h>
+#include <memory>
+
+namespace hwy {
+
+// Minimum alignment of allocated memory for use in HWY_ASSUME_ALIGNED, which
+// requires a literal. This matches typical L1 cache line sizes, which prevents
+// false sharing.
+#define HWY_ALIGNMENT 64
+
+// Pointers to functions equivalent to malloc/free with an opaque void* passed
+// to them.
+using AllocPtr = void* (*)(void* opaque, size_t bytes);
+using FreePtr = void (*)(void* opaque, void* memory);
+
+// Returns null or a pointer to at least `payload_size` (which can be zero)
+// bytes of newly allocated memory, aligned to the larger of HWY_ALIGNMENT and
+// the vector size. Calls `alloc` with the passed `opaque` pointer to obtain
+// memory or malloc() if it is null.
+void* AllocateAlignedBytes(size_t payload_size, AllocPtr alloc_ptr,
+                           void* opaque_ptr);
+
+// Frees all memory. No effect if `aligned_pointer` == nullptr, otherwise it
+// must have been returned from a previous call to `AllocateAlignedBytes`.
+// Calls `free_ptr` with the passed `opaque_ptr` pointer to free the memory; if
+// `free_ptr` function is null, uses the default free().
+void FreeAlignedBytes(const void* aligned_pointer, FreePtr free_ptr,
+                      void* opaque_ptr);
+
+// Class that deletes the aligned pointer passed to operator() calling the
+// destructor before freeing the pointer. This is equivalent to the
+// std::default_delete but for aligned objects. For a similar deleter equivalent
+// to free() for aligned memory see AlignedFreer().
+class AlignedDeleter {
+ public:
+  AlignedDeleter() : free_(nullptr), opaque_ptr_(nullptr) {}
+  AlignedDeleter(FreePtr free_ptr, void* opaque_ptr)
+      : free_(free_ptr), opaque_ptr_(opaque_ptr) {}
+
+  template <typename T>
+  void operator()(T* aligned_pointer) const {
+    return DeleteAlignedArray(aligned_pointer, free_, opaque_ptr_,
+                              TypedArrayDeleter<T>);
+  }
+
+ private:
+  template <typename T>
+  static void TypedArrayDeleter(void* ptr, size_t size_in_bytes) {
+    size_t elems = size_in_bytes / sizeof(T);
+    for (size_t i = 0; i < elems; i++) {
+      // Explicitly call the destructor on each element.
+      (static_cast<T*>(ptr) + i)->~T();
+    }
+  }
+
+  // Function prototype that calls the destructor for each element in a typed
+  // array. TypeArrayDeleter<T> would match this prototype.
+  using ArrayDeleter = void (*)(void* t_ptr, size_t t_size);
+
+  static void DeleteAlignedArray(void* aligned_pointer, FreePtr free_ptr,
+                                 void* opaque_ptr, ArrayDeleter deleter);
+
+  FreePtr free_;
+  void* opaque_ptr_;
+};
+
+// Unique pointer to T with custom aligned deleter. This can be a single
+// element U or an array of element if T is a U[]. The custom aligned deleter
+// will call the destructor on U or each element of a U[] in the array case.
+template <typename T>
+using AlignedUniquePtr = std::unique_ptr<T, AlignedDeleter>;
+
+// Aligned memory equivalent of make_unique<T> using the custom allocators
+// alloc/free with the passed `opaque` pointer. This function calls the
+// constructor with the passed Args... and calls the destructor of the object
+// when the AlignedUniquePtr is destroyed.
+template <typename T, typename... Args>
+AlignedUniquePtr<T> MakeUniqueAlignedWithAlloc(AllocPtr alloc, FreePtr free,
+                                               void* opaque, Args&&... args) {
+  T* ptr = static_cast<T*>(AllocateAlignedBytes(sizeof(T), alloc, opaque));
+  return AlignedUniquePtr<T>(new (ptr) T(std::forward<Args>(args)...),
+                             AlignedDeleter(free, opaque));
+}
+
+// Similar to MakeUniqueAlignedWithAlloc but using the default alloc/free
+// functions.
+template <typename T, typename... Args>
+AlignedUniquePtr<T> MakeUniqueAligned(Args&&... args) {
+  T* ptr = static_cast<T*>(AllocateAlignedBytes(
+      sizeof(T), /*alloc_ptr=*/nullptr, /*opaque_ptr=*/nullptr));
+  return AlignedUniquePtr<T>(
+      new (ptr) T(std::forward<Args>(args)...), AlignedDeleter());
+}
+
+// Helpers for array allocators (avoids overflow)
+namespace detail {
+
+// Returns x such that 1u << x == n (if n is a power of two).
+static inline constexpr size_t ShiftCount(size_t n) {
+  return (n <= 1) ? 0 : 1 + ShiftCount(n / 2);
+}
+
+template <typename T>
+T* AllocateAlignedItems(size_t items, AllocPtr alloc_ptr, void* opaque_ptr) {
+  constexpr size_t size = sizeof(T);
+
+  constexpr bool is_pow2 = (size & (size - 1)) == 0;
+  constexpr size_t bits = ShiftCount(size);
+  static_assert(!is_pow2 || (1ull << bits) == size, "ShiftCount is incorrect");
+
+  const size_t bytes = is_pow2 ? items << bits : items * size;
+  const size_t check = is_pow2 ? bytes >> bits : bytes / size;
+  if (check != items) {
+    return nullptr;  // overflowed
+  }
+  return static_cast<T*>(AllocateAlignedBytes(bytes, alloc_ptr, opaque_ptr));
+}
+
+}  // namespace detail
+
+// Aligned memory equivalent of make_unique<T[]> for array types using the
+// custom allocators alloc/free. This function calls the constructor with the
+// passed Args... on every created item. The destructor of each element will be
+// called when the AlignedUniquePtr is destroyed.
+template <typename T, typename... Args>
+AlignedUniquePtr<T[]> MakeUniqueAlignedArrayWithAlloc(
+    size_t items, AllocPtr alloc, FreePtr free, void* opaque, Args&&... args) {
+  T* ptr = detail::AllocateAlignedItems<T>(items, alloc, opaque);
+  if (ptr != nullptr) {
+    for (size_t i = 0; i < items; i++) {
+      new (ptr + i) T(std::forward<Args>(args)...);
+    }
+  }
+  return AlignedUniquePtr<T[]>(ptr, AlignedDeleter(free, opaque));
+}
+
+template <typename T, typename... Args>
+AlignedUniquePtr<T[]> MakeUniqueAlignedArray(size_t items, Args&&... args) {
+  return MakeUniqueAlignedArrayWithAlloc<T, Args...>(
+      items, nullptr, nullptr, nullptr, std::forward<Args>(args)...);
+}
+
+// Custom deleter for std::unique_ptr equivalent to using free() as a deleter
+// but for aligned memory.
+class AlignedFreer {
+ public:
+  // Pass address of this to ctor to skip deleting externally-owned memory.
+  static void DoNothing(void* /*opaque*/, void* /*aligned_pointer*/) {}
+
+  AlignedFreer() : free_(nullptr), opaque_ptr_(nullptr) {}
+  AlignedFreer(FreePtr free_ptr, void* opaque_ptr)
+      : free_(free_ptr), opaque_ptr_(opaque_ptr) {}
+
+  template <typename T>
+  void operator()(T* aligned_pointer) const {
+    // TODO(deymo): assert that we are using a POD type T.
+    FreeAlignedBytes(aligned_pointer, free_, opaque_ptr_);
+  }
+
+ private:
+  FreePtr free_;
+  void* opaque_ptr_;
+};
+
+// Unique pointer to single POD, or (if T is U[]) an array of POD. For non POD
+// data use AlignedUniquePtr.
+template <typename T>
+using AlignedFreeUniquePtr = std::unique_ptr<T, AlignedFreer>;
+
+// Allocate an aligned and uninitialized array of POD values as a unique_ptr.
+// Upon destruction of the unique_ptr the aligned array will be freed.
+template <typename T>
+AlignedFreeUniquePtr<T[]> AllocateAligned(const size_t items, AllocPtr alloc,
+                                          FreePtr free, void* opaque) {
+  return AlignedFreeUniquePtr<T[]>(
+      detail::AllocateAlignedItems<T>(items, alloc, opaque),
+      AlignedFreer(free, opaque));
+}
+
+// Same as previous AllocateAligned(), using default allocate/free functions.
+template <typename T>
+AlignedFreeUniquePtr<T[]> AllocateAligned(const size_t items) {
+  return AllocateAligned<T>(items, nullptr, nullptr, nullptr);
+}
+
+}  // namespace hwy
+#endif  // HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator_test.cc
new file mode 100644
index 0000000000..c11033b18c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/aligned_allocator_test.cc
@@ -0,0 +1,278 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/aligned_allocator.h"
+
+#include <stddef.h>
+
+#include <array>
+#include <new>
+#include <random>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "hwy/base.h"
+
+namespace {
+
+// Sample object that keeps track on an external counter of how many times was
+// the explicit constructor and destructor called.
+template <size_t N>
+class SampleObject {
+ public:
+  SampleObject() { data_[0] = 'a'; }
+  explicit SampleObject(int* counter) : counter_(counter) {
+    if (counter) (*counter)++;
+    data_[0] = 'b';
+  }
+
+  ~SampleObject() {
+    if (counter_) (*counter_)--;
+  }
+
+  static_assert(N > sizeof(int*), "SampleObject size too small.");
+  int* counter_ = nullptr;
+  char data_[N - sizeof(int*)];
+};
+
+class FakeAllocator {
+ public:
+  // static AllocPtr and FreePtr member to be used with the alligned
+  // allocator. These functions calls the private non-static members.
+  static void* StaticAlloc(void* opaque, size_t bytes) {
+    return reinterpret_cast<FakeAllocator*>(opaque)->Alloc(bytes);
+  }
+  static void StaticFree(void* opaque, void* memory) {
+    return reinterpret_cast<FakeAllocator*>(opaque)->Free(memory);
+  }
+
+  // Returns the number of pending allocations to be freed.
+  size_t PendingAllocs() { return allocs_.size(); }
+
+ private:
+  void* Alloc(size_t bytes) {
+    void* ret = malloc(bytes);
+    allocs_.insert(ret);
+    return ret;
+  }
+  void Free(void* memory) {
+    if (!memory) return;
+    EXPECT_NE(allocs_.end(), allocs_.find(memory));
+    free(memory);
+    allocs_.erase(memory);
+  }
+
+  std::set<void*> allocs_;
+};
+
+}  // namespace
+
+namespace hwy {
+
+class AlignedAllocatorTest : public testing::Test {};
+
+TEST(AlignedAllocatorTest, FreeNullptr) {
+  // Calling free with a nullptr is always ok.
+  FreeAlignedBytes(/*aligned_pointer=*/nullptr, /*free_ptr=*/nullptr,
+                   /*opaque_ptr=*/nullptr);
+}
+
+TEST(AlignedAllocatorTest, Log2) {
+  EXPECT_EQ(0u, detail::ShiftCount(1));
+  EXPECT_EQ(1u, detail::ShiftCount(2));
+  EXPECT_EQ(3u, detail::ShiftCount(8));
+}
+
+// Allocator returns null when it detects overflow of items * sizeof(T).
+TEST(AlignedAllocatorTest, Overflow) {
+  constexpr size_t max = ~size_t(0);
+  constexpr size_t msb = (max >> 1) + 1;
+  using Size5 = std::array<uint8_t, 5>;
+  using Size10 = std::array<uint8_t, 10>;
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<uint32_t>(max / 2, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<uint32_t>(max / 3, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<Size5>(max / 4, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<uint16_t>(msb, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<double>(msb + 1, nullptr, nullptr));
+  EXPECT_EQ(nullptr,
+            detail::AllocateAlignedItems<Size10>(msb / 4, nullptr, nullptr));
+}
+
+TEST(AlignedAllocatorTest, AllocDefaultPointers) {
+  const size_t kSize = 7777;
+  void* ptr = AllocateAlignedBytes(kSize, /*alloc_ptr=*/nullptr,
+                                   /*opaque_ptr=*/nullptr);
+  ASSERT_NE(nullptr, ptr);
+  // Make sure the pointer is actually aligned.
+  EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % kMaxVectorSize);
+  char* p = static_cast<char*>(ptr);
+  size_t ret = 0;
+  for (size_t i = 0; i < kSize; i++) {
+    // Performs a computation using p[] to prevent it being optimized away.
+    p[i] = static_cast<char>(i & 0x7F);
+    if (i) ret += p[i] * p[i - 1];
+  }
+  EXPECT_NE(0U, ret);
+  FreeAlignedBytes(ptr, /*free_ptr=*/nullptr, /*opaque_ptr=*/nullptr);
+}
+
+TEST(AlignedAllocatorTest, EmptyAlignedUniquePtr) {
+  AlignedUniquePtr<SampleObject<32>> ptr(nullptr, AlignedDeleter());
+  AlignedUniquePtr<SampleObject<32>[]> arr(nullptr, AlignedDeleter());
+}
+
+TEST(AlignedAllocatorTest, EmptyAlignedFreeUniquePtr) {
+  AlignedFreeUniquePtr<SampleObject<32>> ptr(nullptr, AlignedFreer());
+  AlignedFreeUniquePtr<SampleObject<32>[]> arr(nullptr, AlignedFreer());
+}
+
+TEST(AlignedAllocatorTest, CustomAlloc) {
+  FakeAllocator fake_alloc;
+
+  const size_t kSize = 7777;
+  void* ptr =
+      AllocateAlignedBytes(kSize, &FakeAllocator::StaticAlloc, &fake_alloc);
+  ASSERT_NE(nullptr, ptr);
+  // We should have only requested one alloc from the allocator.
+  EXPECT_EQ(1U, fake_alloc.PendingAllocs());
+  // Make sure the pointer is actually aligned.
+  EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % kMaxVectorSize);
+  FreeAlignedBytes(ptr, &FakeAllocator::StaticFree, &fake_alloc);
+  EXPECT_EQ(0U, fake_alloc.PendingAllocs());
+}
+
+TEST(AlignedAllocatorTest, MakeUniqueAlignedDefaultConstructor) {
+  {
+    auto ptr = MakeUniqueAligned<SampleObject<24>>();
+    // Default constructor sets the data_[0] to 'a'.
+    EXPECT_EQ('a', ptr->data_[0]);
+    EXPECT_EQ(nullptr, ptr->counter_);
+  }
+}
+
+TEST(AlignedAllocatorTest, MakeUniqueAligned) {
+  int counter = 0;
+  {
+    // Creates the object, initializes it with the explicit constructor and
+    // returns an unique_ptr to it.
+    auto ptr = MakeUniqueAligned<SampleObject<24>>(&counter);
+    EXPECT_EQ(1, counter);
+    // Custom constructor sets the data_[0] to 'b'.
+    EXPECT_EQ('b', ptr->data_[0]);
+  }
+  EXPECT_EQ(0, counter);
+}
+
+TEST(AlignedAllocatorTest, MakeUniqueAlignedArray) {
+  int counter = 0;
+  {
+    // Creates the array of objects and initializes them with the explicit
+    // constructor.
+    auto arr = MakeUniqueAlignedArray<SampleObject<24>>(7, &counter);
+    EXPECT_EQ(7, counter);
+    for (size_t i = 0; i < 7; i++) {
+      // Custom constructor sets the data_[0] to 'b'.
+      EXPECT_EQ('b', arr[i].data_[0]) << "Where i = " << i;
+    }
+  }
+  EXPECT_EQ(0, counter);
+}
+
+TEST(AlignedAllocatorTest, AllocSingleInt) {
+  auto ptr = AllocateAligned<uint32_t>(1);
+  ASSERT_NE(nullptr, ptr.get());
+  EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % kMaxVectorSize);
+  // Force delete of the unique_ptr now to check that it doesn't crash.
+  ptr.reset(nullptr);
+  EXPECT_EQ(nullptr, ptr.get());
+}
+
+TEST(AlignedAllocatorTest, AllocMultipleInt) {
+  const size_t kSize = 7777;
+  auto ptr = AllocateAligned<uint32_t>(kSize);
+  ASSERT_NE(nullptr, ptr.get());
+  EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % kMaxVectorSize);
+  // ptr[i] is actually (*ptr.get())[i] which will use the operator[] of the
+  // underlying type chosen by AllocateAligned() for the std::unique_ptr.
+  EXPECT_EQ(&(ptr[0]) + 1, &(ptr[1]));
+
+  size_t ret = 0;
+  for (size_t i = 0; i < kSize; i++) {
+    // Performs a computation using ptr[] to prevent it being optimized away.
+    ptr[i] = static_cast<uint32_t>(i);
+    if (i) ret += ptr[i] * ptr[i - 1];
+  }
+  EXPECT_NE(0U, ret);
+}
+
+TEST(AlignedAllocatorTest, AllocateAlignedObjectWithoutDestructor) {
+  int counter = 0;
+  {
+    // This doesn't call the constructor.
+    auto obj = AllocateAligned<SampleObject<24>>(1);
+    obj[0].counter_ = &counter;
+  }
+  // Destroying the unique_ptr shouldn't have called the destructor of the
+  // SampleObject<24>.
+  EXPECT_EQ(0, counter);
+}
+
+TEST(AlignedAllocatorTest, MakeUniqueAlignedArrayWithCustomAlloc) {
+  FakeAllocator fake_alloc;
+  int counter = 0;
+  {
+    // Creates the array of objects and initializes them with the explicit
+    // constructor.
+    auto arr = MakeUniqueAlignedArrayWithAlloc<SampleObject<24>>(
+        7, FakeAllocator::StaticAlloc, FakeAllocator::StaticFree, &fake_alloc,
+        &counter);
+    ASSERT_NE(nullptr, arr.get());
+    // An array should still only call a single allocation.
+    EXPECT_EQ(1u, fake_alloc.PendingAllocs());
+    EXPECT_EQ(7, counter);
+    for (size_t i = 0; i < 7; i++) {
+      // Custom constructor sets the data_[0] to 'b'.
+      EXPECT_EQ('b', arr[i].data_[0]) << "Where i = " << i;
+    }
+  }
+  EXPECT_EQ(0, counter);
+  EXPECT_EQ(0u, fake_alloc.PendingAllocs());
+}
+
+TEST(AlignedAllocatorTest, DefaultInit) {
+  // The test is whether this compiles. Default-init is useful for output params
+  // and per-thread storage.
+  std::vector<AlignedUniquePtr<int[]>> ptrs;
+  std::vector<AlignedFreeUniquePtr<double[]>> free_ptrs;
+  ptrs.resize(128);
+  free_ptrs.resize(128);
+  // The following is to prevent elision of the pointers.
+  std::mt19937 rng(129);  // Emscripten lacks random_device.
+  std::uniform_int_distribution<size_t> dist(0, 127);
+  ptrs[dist(rng)] = MakeUniqueAlignedArray<int>(123);
+  free_ptrs[dist(rng)] = AllocateAligned<double>(456);
+  // "Use" pointer without resorting to printf. 0 == 0. Can't shift by 64.
+  const auto addr1 = reinterpret_cast<uintptr_t>(ptrs[dist(rng)].get());
+  const auto addr2 = reinterpret_cast<uintptr_t>(free_ptrs[dist(rng)].get());
+  constexpr size_t kBits = sizeof(uintptr_t) * 8;
+  EXPECT_EQ((addr1 >> (kBits - 1)) >> (kBits - 1),
+            (addr2 >> (kBits - 1)) >> (kBits - 1));
+}
+
+}  // namespace hwy
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/base.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/base.h
new file mode 100644
index 0000000000..d87eb34b8e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/base.h
@@ -0,0 +1,647 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_BASE_H_
+#define HIGHWAY_HWY_BASE_H_
+
+// For SIMD module implementations and their callers, target-independent.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <atomic>
+#include <cfloat>
+
+// Add to #if conditions to prevent IDE from graying out code.
+#if (defined __CDT_PARSER__) || (defined __INTELLISENSE__) || \
+    (defined Q_CREATOR_RUN) || (defined(__CLANGD__))
+#define HWY_IDE 1
+#else
+#define HWY_IDE 0
+#endif
+
+//------------------------------------------------------------------------------
+// Detect compiler using predefined macros
+
+// clang-cl defines _MSC_VER but doesn't behave like MSVC in other aspects like
+// used in HWY_DIAGNOSTICS(). We include a check that we are not clang for that
+// purpose.
+#if defined(_MSC_VER) && !defined(__clang__)
+#define HWY_COMPILER_MSVC _MSC_VER
+#else
+#define HWY_COMPILER_MSVC 0
+#endif
+
+#ifdef __INTEL_COMPILER
+#define HWY_COMPILER_ICC __INTEL_COMPILER
+#else
+#define HWY_COMPILER_ICC 0
+#endif
+
+#ifdef __GNUC__
+#define HWY_COMPILER_GCC (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#define HWY_COMPILER_GCC 0
+#endif
+
+// Clang can masquerade as MSVC/GCC, in which case both are set.
+#ifdef __clang__
+#ifdef __APPLE__
+// Apple LLVM version is unrelated to the actual Clang version, which we need
+// for enabling workarounds. Use the presence of warning flags to deduce it.
+// Adapted from https://github.com/simd-everywhere/simde/ simde-detect-clang.h.
+#if __has_warning("-Wformat-insufficient-args")
+#define HWY_COMPILER_CLANG 1200
+#elif __has_warning("-Wimplicit-const-int-float-conversion")
+#define HWY_COMPILER_CLANG 1100
+#elif __has_warning("-Wmisleading-indentation")
+#define HWY_COMPILER_CLANG 1000
+#elif defined(__FILE_NAME__)
+#define HWY_COMPILER_CLANG 900
+#elif __has_warning("-Wextra-semi-stmt") || \
+    __has_builtin(__builtin_rotateleft32)
+#define HWY_COMPILER_CLANG 800
+#elif __has_warning("-Wc++98-compat-extra-semi")
+#define HWY_COMPILER_CLANG 700
+#else  // Anything older than 7.0 is not recommended for Highway.
+#define HWY_COMPILER_CLANG 600
+#endif  // __has_warning chain
+#else   // Non-Apple: normal version
+#define HWY_COMPILER_CLANG (__clang_major__ * 100 + __clang_minor__)
+#endif
+#else  // Not clang
+#define HWY_COMPILER_CLANG 0
+#endif
+
+// More than one may be nonzero, but we want at least one.
+#if !HWY_COMPILER_MSVC && !HWY_COMPILER_ICC && !HWY_COMPILER_GCC && \
+    !HWY_COMPILER_CLANG
+#error "Unsupported compiler"
+#endif
+
+//------------------------------------------------------------------------------
+// Compiler-specific definitions
+
+#define HWY_STR_IMPL(macro) #macro
+#define HWY_STR(macro) HWY_STR_IMPL(macro)
+
+#if HWY_COMPILER_MSVC
+
+#include <intrin.h>
+
+#define HWY_RESTRICT __restrict
+#define HWY_INLINE __forceinline
+#define HWY_NOINLINE __declspec(noinline)
+#define HWY_FLATTEN
+#define HWY_NORETURN __declspec(noreturn)
+#define HWY_LIKELY(expr) (expr)
+#define HWY_UNLIKELY(expr) (expr)
+#define HWY_PRAGMA(tokens) __pragma(tokens)
+#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens))
+#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc)
+#define HWY_MAYBE_UNUSED
+#define HWY_HAS_ASSUME_ALIGNED 0
+#if (_MSC_VER >= 1700)
+#define HWY_MUST_USE_RESULT _Check_return_
+#else
+#define HWY_MUST_USE_RESULT
+#endif
+
+#else
+
+#define HWY_RESTRICT __restrict__
+#define HWY_INLINE inline __attribute__((always_inline))
+#define HWY_NOINLINE __attribute__((noinline))
+#define HWY_FLATTEN __attribute__((flatten))
+#define HWY_NORETURN __attribute__((noreturn))
+#define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1)
+#define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+#define HWY_PRAGMA(tokens) _Pragma(#tokens)
+#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens)
+#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc)
+// Encountered "attribute list cannot appear here" when using the C++17
+// [[maybe_unused]], so only use the old style attribute for now.
+#define HWY_MAYBE_UNUSED __attribute__((unused))
+#define HWY_MUST_USE_RESULT __attribute__((warn_unused_result))
+
+#endif  // !HWY_COMPILER_MSVC
+
+//------------------------------------------------------------------------------
+// Builtin/attributes
+
+#ifdef __has_builtin
+#define HWY_HAS_BUILTIN(name) __has_builtin(name)
+#else
+#define HWY_HAS_BUILTIN(name) 0
+#endif
+
+#ifdef __has_attribute
+#define HWY_HAS_ATTRIBUTE(name) __has_attribute(name)
+#else
+#define HWY_HAS_ATTRIBUTE(name) 0
+#endif
+
+// Enables error-checking of format strings.
+#if HWY_HAS_ATTRIBUTE(__format__)
+#define HWY_FORMAT(idx_fmt, idx_arg) \
+  __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
+#else
+#define HWY_FORMAT(idx_fmt, idx_arg)
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* HWY_RESTRICT aligned = (float*)HWY_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if HWY_HAS_BUILTIN(__builtin_assume_aligned)
+#define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
+#else
+#define HWY_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */
+#endif
+
+// Clang and GCC require attributes on each function into which SIMD intrinsics
+// are inlined. Support both per-function annotation (HWY_ATTR) for lambdas and
+// automatic annotation via pragmas.
+#if HWY_COMPILER_CLANG
+#define HWY_PUSH_ATTRIBUTES(targets_str)                                     \
+  HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \
+                                       apply_to = function))
+#define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop)
+#elif HWY_COMPILER_GCC
+#define HWY_PUSH_ATTRIBUTES(targets_str) \
+  HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str)
+#define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options)
+#else
+#define HWY_PUSH_ATTRIBUTES(targets_str)
+#define HWY_POP_ATTRIBUTES
+#endif
+
+//------------------------------------------------------------------------------
+// Detect architecture using predefined macros
+
+#if defined(__i386__) || defined(_M_IX86)
+#define HWY_ARCH_X86_32 1
+#else
+#define HWY_ARCH_X86_32 0
+#endif
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define HWY_ARCH_X86_64 1
+#else
+#define HWY_ARCH_X86_64 0
+#endif
+
+#if HWY_ARCH_X86_32 && HWY_ARCH_X86_64
+#error "Cannot have both x86-32 and x86-64"
+#endif
+
+#if HWY_ARCH_X86_32 || HWY_ARCH_X86_64
+#define HWY_ARCH_X86 1
+#else
+#define HWY_ARCH_X86 0
+#endif
+
+#if defined(__powerpc64__) || defined(_M_PPC)
+#define HWY_ARCH_PPC 1
+#else
+#define HWY_ARCH_PPC 0
+#endif
+
+#if defined(__ARM_ARCH_ISA_A64) || defined(__aarch64__) || defined(_M_ARM64)
+#define HWY_ARCH_ARM_A64 1
+#else
+#define HWY_ARCH_ARM_A64 0
+#endif
+
+#if defined(__arm__) || defined(_M_ARM)
+#define HWY_ARCH_ARM_V7 1
+#else
+#define HWY_ARCH_ARM_V7 0
+#endif
+
+#if HWY_ARCH_ARM_A64 && HWY_ARCH_ARM_V7
+#error "Cannot have both A64 and V7"
+#endif
+
+#if HWY_ARCH_ARM_A64 || HWY_ARCH_ARM_V7
+#define HWY_ARCH_ARM 1
+#else
+#define HWY_ARCH_ARM 0
+#endif
+
+#if defined(__EMSCRIPTEN__) || defined(__wasm__) || defined(__WASM__)
+#define HWY_ARCH_WASM 1
+#else
+#define HWY_ARCH_WASM 0
+#endif
+
+#ifdef __riscv
+#define HWY_ARCH_RVV 1
+#else
+#define HWY_ARCH_RVV 0
+#endif
+
+// It is an error to detect multiple architectures at the same time, but OK to
+// detect none of the above.
+#if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_WASM + \
+     HWY_ARCH_RVV) > 1
+#error "Must not detect more than one architecture"
+#endif
+
+//------------------------------------------------------------------------------
+// Macros
+
+#define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED
+
+#define HWY_CONCAT_IMPL(a, b) a##b
+#define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b)
+
+#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+// Compile-time fence to prevent undesirable code reordering. On Clang x86, the
+// typical asm volatile("" : : : "memory") has no effect, whereas atomic fence
+// does, without generating code.
+#if HWY_ARCH_X86
+#define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
+#else
+// TODO(janwas): investigate alternatives. On ARM, the above generates barriers.
+#define HWY_FENCE
+#endif
+
+// 4 instances of a given literal value, useful as input to LoadDup128.
+#define HWY_REP4(literal) literal, literal, literal, literal
+
+#define HWY_ABORT(format, ...) \
+  ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__)
+
+// Always enabled.
+#define HWY_ASSERT(condition)             \
+  do {                                    \
+    if (!(condition)) {                   \
+      HWY_ABORT("Assert %s", #condition); \
+    }                                     \
+  } while (0)
+
+// Only for "debug" builds
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || \
+    defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER)
+#define HWY_DASSERT(condition) HWY_ASSERT(condition)
+#else
+#define HWY_DASSERT(condition) \
+  do {                         \
+  } while (0)
+#endif
+
+
+namespace hwy {
+
+//------------------------------------------------------------------------------
+// Alignment
+
+// Not guaranteed to be an upper bound, but the alignment established by
+// aligned_allocator is HWY_MAX(HWY_ALIGNMENT, kMaxVectorSize).
+#if HWY_ARCH_X86
+static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 64;  // AVX-512
+#define HWY_ALIGN_MAX alignas(64)
+#elif HWY_ARCH_RVV
+// Not actually an upper bound on the size, but this value prevents crossing a
+// 4K boundary (relevant on Andes).
+static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 4096;
+#define HWY_ALIGN_MAX alignas(8)  // only elements need be aligned
+#else
+static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 16;
+#define HWY_ALIGN_MAX alignas(16)
+#endif
+
+//------------------------------------------------------------------------------
+// Lane types
+
+// Match [u]int##_t naming scheme so rvv-inl.h macros can obtain the type name
+// by concatenating base type and bits.
+
+// RVV already has a builtin type and the GCC intrinsics require it.
+#if HWY_ARCH_RVV && HWY_COMPILER_GCC
+#define HWY_NATIVE_FLOAT16 1
+#else
+#define HWY_NATIVE_FLOAT16 0
+#endif
+
+#if HWY_NATIVE_FLOAT16
+using float16_t = __fp16;
+// Clang does not allow __fp16 arguments, but scalar.h requires LaneType
+// arguments, so use a wrapper.
+// TODO(janwas): replace with _Float16 when that is supported?
+#else
+#pragma pack(push, 1)
+struct float16_t {
+  uint16_t bits;
+};
+#pragma pack(pop)
+#endif
+
+using float32_t = float;
+using float64_t = double;
+
+//------------------------------------------------------------------------------
+// Controlling overload resolution (SFINAE)
+
+template <bool Condition, class T>
+struct EnableIfT {};
+template <class T>
+struct EnableIfT<true, T> {
+  using type = T;
+};
+
+template <bool Condition, class T = void>
+using EnableIf = typename EnableIfT<Condition, T>::type;
+
+// Insert into template/function arguments to enable this overload only for
+// vectors of AT MOST this many bits.
+//
+// Note that enabling for exactly 128 bits is unnecessary because a function can
+// simply be overloaded with Vec128<T> and Full128<T> descriptor. Enabling for
+// other sizes (e.g. 64 bit) can be achieved with Simd<T, 8 / sizeof(T)>.
+#define HWY_IF_LE128(T, N) hwy::EnableIf<N * sizeof(T) <= 16>* = nullptr
+#define HWY_IF_LE64(T, N) hwy::EnableIf<N * sizeof(T) <= 8>* = nullptr
+#define HWY_IF_LE32(T, N) hwy::EnableIf<N * sizeof(T) <= 4>* = nullptr
+
+#define HWY_IF_UNSIGNED(T) hwy::EnableIf<!IsSigned<T>()>* = nullptr
+#define HWY_IF_SIGNED(T) \
+  hwy::EnableIf<IsSigned<T>() && !IsFloat<T>()>* = nullptr
+#define HWY_IF_FLOAT(T) hwy::EnableIf<hwy::IsFloat<T>()>* = nullptr
+#define HWY_IF_NOT_FLOAT(T) hwy::EnableIf<!hwy::IsFloat<T>()>* = nullptr
+
+#define HWY_IF_LANE_SIZE(T, bytes) \
+  hwy::EnableIf<sizeof(T) == (bytes)>* = nullptr
+#define HWY_IF_NOT_LANE_SIZE(T, bytes) \
+  hwy::EnableIf<sizeof(T) != (bytes)>* = nullptr
+
+// Empty struct used as a size tag type.
+template <size_t N>
+struct SizeTag {};
+
+//------------------------------------------------------------------------------
+// Type traits
+
+template <typename T>
+constexpr bool IsFloat() {
+  return T(1.25) != T(1);
+}
+
+template <typename T>
+constexpr bool IsSigned() {
+  return T(0) > T(-1);
+}
+
+// Largest/smallest representable integer values.
+template <typename T>
+constexpr T LimitsMax() {
+  static_assert(!IsFloat<T>(), "Only for integer types");
+  return IsSigned<T>() ? T((1ULL << (sizeof(T) * 8 - 1)) - 1)
+                       : static_cast<T>(~0ull);
+}
+template <typename T>
+constexpr T LimitsMin() {
+  static_assert(!IsFloat<T>(), "Only for integer types");
+  return IsSigned<T>() ? T(-1) - LimitsMax<T>() : T(0);
+}
+
+// Largest/smallest representable value (integer or float). This naming avoids
+// confusion with numeric_limits<float>::min() (the smallest positive value).
+template <typename T>
+constexpr T LowestValue() {
+  return LimitsMin<T>();
+}
+template <>
+constexpr float LowestValue<float>() {
+  return -FLT_MAX;
+}
+template <>
+constexpr double LowestValue<double>() {
+  return -DBL_MAX;
+}
+
+template <typename T>
+constexpr T HighestValue() {
+  return LimitsMax<T>();
+}
+template <>
+constexpr float HighestValue<float>() {
+  return FLT_MAX;
+}
+template <>
+constexpr double HighestValue<double>() {
+  return DBL_MAX;
+}
+
+// Returns bitmask of the exponent field in IEEE binary32/64.
+template <typename T>
+constexpr T ExponentMask() {
+  static_assert(sizeof(T) == 0, "Only instantiate the specializations");
+  return 0;
+}
+template <>
+constexpr uint32_t ExponentMask<uint32_t>() {
+  return 0x7F800000;
+}
+template <>
+constexpr uint64_t ExponentMask<uint64_t>() {
+  return 0x7FF0000000000000ULL;
+}
+
+// Returns 1 << mantissa_bits as a floating-point number. All integers whose
+// absolute value are less than this can be represented exactly.
+template <typename T>
+constexpr T MantissaEnd() {
+  static_assert(sizeof(T) == 0, "Only instantiate the specializations");
+  return 0;
+}
+template <>
+constexpr float MantissaEnd<float>() {
+  return 8388608.0f;  // 1 << 23
+}
+template <>
+constexpr double MantissaEnd<double>() {
+  // floating point literal with p52 requires C++17.
+  return 4503599627370496.0;  // 1 << 52
+}
+
+//------------------------------------------------------------------------------
+// Type relations
+
+namespace detail {
+
+template <typename T>
+struct Relations;
+template <>
+struct Relations<uint8_t> {
+  using Unsigned = uint8_t;
+  using Signed = int8_t;
+  using Wide = uint16_t;
+};
+template <>
+struct Relations<int8_t> {
+  using Unsigned = uint8_t;
+  using Signed = int8_t;
+  using Wide = int16_t;
+};
+template <>
+struct Relations<uint16_t> {
+  using Unsigned = uint16_t;
+  using Signed = int16_t;
+  using Wide = uint32_t;
+  using Narrow = uint8_t;
+};
+template <>
+struct Relations<int16_t> {
+  using Unsigned = uint16_t;
+  using Signed = int16_t;
+  using Wide = int32_t;
+  using Narrow = int8_t;
+};
+template <>
+struct Relations<uint32_t> {
+  using Unsigned = uint32_t;
+  using Signed = int32_t;
+  using Float = float;
+  using Wide = uint64_t;
+  using Narrow = uint16_t;
+};
+template <>
+struct Relations<int32_t> {
+  using Unsigned = uint32_t;
+  using Signed = int32_t;
+  using Float = float;
+  using Wide = int64_t;
+  using Narrow = int16_t;
+};
+template <>
+struct Relations<uint64_t> {
+  using Unsigned = uint64_t;
+  using Signed = int64_t;
+  using Float = double;
+  using Narrow = uint32_t;
+};
+template <>
+struct Relations<int64_t> {
+  using Unsigned = uint64_t;
+  using Signed = int64_t;
+  using Float = double;
+  using Narrow = int32_t;
+};
+template <>
+struct Relations<float16_t> {
+  using Unsigned = uint16_t;
+  using Signed = int16_t;
+  using Float = float16_t;
+  using Wide = float;
+};
+template <>
+struct Relations<float> {
+  using Unsigned = uint32_t;
+  using Signed = int32_t;
+  using Float = float;
+  using Wide = double;
+};
+template <>
+struct Relations<double> {
+  using Unsigned = uint64_t;
+  using Signed = int64_t;
+  using Float = double;
+  using Narrow = float;
+};
+
+}  // namespace detail
+
+// Aliases for types of a different category, but the same size.
+template <typename T>
+using MakeUnsigned = typename detail::Relations<T>::Unsigned;
+template <typename T>
+using MakeSigned = typename detail::Relations<T>::Signed;
+template <typename T>
+using MakeFloat = typename detail::Relations<T>::Float;
+
+// Aliases for types of the same category, but different size.
+template <typename T>
+using MakeWide = typename detail::Relations<T>::Wide;
+template <typename T>
+using MakeNarrow = typename detail::Relations<T>::Narrow;
+
+//------------------------------------------------------------------------------
+// Helper functions
+
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+  return (a + b - 1) / b;
+}
+
+// Works for any `align`; if a power of two, compiler emits ADD+AND.
+constexpr inline size_t RoundUpTo(size_t what, size_t align) {
+  return DivCeil(what, align) * align;
+}
+
+// Undefined results for x == 0.
+HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x) {
+#if HWY_COMPILER_MSVC
+  unsigned long index;  // NOLINT
+  _BitScanForward(&index, x);
+  return index;
+#else  // HWY_COMPILER_MSVC
+  return static_cast<size_t>(__builtin_ctz(x));
+#endif  // HWY_COMPILER_MSVC
+}
+
+HWY_API size_t PopCount(uint64_t x) {
+#if HWY_COMPILER_CLANG || HWY_COMPILER_GCC
+  return static_cast<size_t>(__builtin_popcountll(x));
+#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
+  return _mm_popcnt_u64(x);
+#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32
+  return _mm_popcnt_u32(uint32_t(x)) + _mm_popcnt_u32(uint32_t(x >> 32));
+#else
+  x -= ((x >> 1) & 0x55555555U);
+  x = (((x >> 2) & 0x33333333U) + (x & 0x33333333U));
+  x = (((x >> 4) + x) & 0x0F0F0F0FU);
+  x += (x >> 8);
+  x += (x >> 16);
+  x += (x >> 32);
+  x = x & 0x0000007FU;
+  return (unsigned int)x;
+#endif
+}
+
+// The source/destination must not overlap/alias.
+template <size_t kBytes, typename From, typename To>
+HWY_API void CopyBytes(const From* from, To* to) {
+#if HWY_COMPILER_MSVC
+  const uint8_t* HWY_RESTRICT from_bytes =
+      reinterpret_cast<const uint8_t*>(from);
+  uint8_t* HWY_RESTRICT to_bytes = reinterpret_cast<uint8_t*>(to);
+  for (size_t i = 0; i < kBytes; ++i) {
+    to_bytes[i] = from_bytes[i];
+  }
+#else
+  // Avoids horrible codegen on Clang (series of PINSRB)
+  __builtin_memcpy(to, from, kBytes);
+#endif
+}
+
+HWY_NORETURN void HWY_FORMAT(3, 4)
+    Abort(const char* file, int line, const char* format, ...);
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_BASE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/base_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/base_test.cc
new file mode 100644
index 0000000000..19e0b6f544
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/base_test.cc
@@ -0,0 +1,123 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+
+#include "hwy/base.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "base_test.cc"
+#include "hwy/foreach_target.h"
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+HWY_NOINLINE void TestAllLimits() {
+  HWY_ASSERT_EQ(uint8_t(0), LimitsMin<uint8_t>());
+  HWY_ASSERT_EQ(uint16_t(0), LimitsMin<uint16_t>());
+  HWY_ASSERT_EQ(uint32_t(0), LimitsMin<uint32_t>());
+  HWY_ASSERT_EQ(uint64_t(0), LimitsMin<uint64_t>());
+
+  HWY_ASSERT_EQ(int8_t(-128), LimitsMin<int8_t>());
+  HWY_ASSERT_EQ(int16_t(-32768), LimitsMin<int16_t>());
+  HWY_ASSERT_EQ(int32_t(0x80000000u), LimitsMin<int32_t>());
+  HWY_ASSERT_EQ(int64_t(0x8000000000000000ull), LimitsMin<int64_t>());
+
+  HWY_ASSERT_EQ(uint8_t(0xFF), LimitsMax<uint8_t>());
+  HWY_ASSERT_EQ(uint16_t(0xFFFF), LimitsMax<uint16_t>());
+  HWY_ASSERT_EQ(uint32_t(0xFFFFFFFFu), LimitsMax<uint32_t>());
+  HWY_ASSERT_EQ(uint64_t(0xFFFFFFFFFFFFFFFFull), LimitsMax<uint64_t>());
+
+  HWY_ASSERT_EQ(int8_t(0x7F), LimitsMax<int8_t>());
+  HWY_ASSERT_EQ(int16_t(0x7FFF), LimitsMax<int16_t>());
+  HWY_ASSERT_EQ(int32_t(0x7FFFFFFFu), LimitsMax<int32_t>());
+  HWY_ASSERT_EQ(int64_t(0x7FFFFFFFFFFFFFFFull), LimitsMax<int64_t>());
+}
+
+struct TestLowestHighest {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    HWY_ASSERT_EQ(std::numeric_limits<T>::lowest(), LowestValue<T>());
+    HWY_ASSERT_EQ(std::numeric_limits<T>::max(), HighestValue<T>());
+  }
+};
+
+HWY_NOINLINE void TestAllLowestHighest() { ForAllTypes(TestLowestHighest()); }
+struct TestIsUnsigned {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    static_assert(!IsFloat<T>(), "Expected !IsFloat");
+    static_assert(!IsSigned<T>(), "Expected !IsSigned");
+  }
+};
+
+struct TestIsSigned {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    static_assert(!IsFloat<T>(), "Expected !IsFloat");
+    static_assert(IsSigned<T>(), "Expected IsSigned");
+  }
+};
+
+struct TestIsFloat {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*unused*/) const {
+    static_assert(IsFloat<T>(), "Expected IsFloat");
+    static_assert(IsSigned<T>(), "Floats are also considered signed");
+  }
+};
+
+HWY_NOINLINE void TestAllType() {
+  ForUnsignedTypes(TestIsUnsigned());
+  ForSignedTypes(TestIsSigned());
+  ForFloatTypes(TestIsFloat());
+}
+
+HWY_NOINLINE void TestAllPopCount() {
+  HWY_ASSERT_EQ(size_t(0), PopCount(0u));
+  HWY_ASSERT_EQ(size_t(1), PopCount(1u));
+  HWY_ASSERT_EQ(size_t(1), PopCount(2u));
+  HWY_ASSERT_EQ(size_t(2), PopCount(3u));
+  HWY_ASSERT_EQ(size_t(1), PopCount(0x80000000u));
+  HWY_ASSERT_EQ(size_t(31), PopCount(0x7FFFFFFFu));
+  HWY_ASSERT_EQ(size_t(32), PopCount(0xFFFFFFFFu));
+
+  HWY_ASSERT_EQ(size_t(1), PopCount(0x80000000ull));
+  HWY_ASSERT_EQ(size_t(31), PopCount(0x7FFFFFFFull));
+  HWY_ASSERT_EQ(size_t(32), PopCount(0xFFFFFFFFull));
+  HWY_ASSERT_EQ(size_t(33), PopCount(0x10FFFFFFFFull));
+  HWY_ASSERT_EQ(size_t(63), PopCount(0xFFFEFFFFFFFFFFFFull));
+  HWY_ASSERT_EQ(size_t(64), PopCount(0xFFFFFFFFFFFFFFFFull));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(BaseTest);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllLimits);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllLowestHighest);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllType);
+HWY_EXPORT_AND_TEST_P(BaseTest, TestAllPopCount);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/cache_control.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/cache_control.h
new file mode 100644
index 0000000000..7020cc7b2e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/cache_control.h
@@ -0,0 +1,107 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_CACHE_CONTROL_H_
+#define HIGHWAY_HWY_CACHE_CONTROL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hwy/base.h"
+
+// Requires SSE2; fails to compile on 32-bit Clang 7 (see
+// https://github.com/gperftools/gperftools/issues/946).
+#if !defined(__SSE2__) || (HWY_COMPILER_CLANG && HWY_ARCH_X86_32)
+#undef HWY_DISABLE_CACHE_CONTROL
+#define HWY_DISABLE_CACHE_CONTROL
+#endif
+
+// intrin.h is sufficient on MSVC and already included by base.h.
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
+#include <emmintrin.h>  // SSE2
+#endif
+
+// Windows.h #defines these, which causes infinite recursion. Temporarily
+// undefine them in this header; these functions are anyway deprecated.
+// TODO(janwas): remove when these functions are removed.
+#pragma push_macro("LoadFence")
+#pragma push_macro("StoreFence")
+#undef LoadFence
+#undef StoreFence
+
+namespace hwy {
+
+// Even if N*sizeof(T) is smaller, Stream may write a multiple of this size.
+#define HWY_STREAM_MULTIPLE 16
+
+// The following functions may also require an attribute.
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
+#define HWY_ATTR_CACHE __attribute__((target("sse2")))
+#else
+#define HWY_ATTR_CACHE
+#endif
+
+// Delays subsequent loads until prior loads are visible. On Intel CPUs, also
+// serves as a full fence (waits for all prior instructions to complete).
+// No effect on non-x86.
+HWY_INLINE HWY_ATTR_CACHE void LoadFence() {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_lfence();
+#endif
+}
+
+// Ensures previous weakly-ordered stores are visible. No effect on non-x86.
+HWY_INLINE HWY_ATTR_CACHE void StoreFence() {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_sfence();
+#endif
+}
+
+// Begins loading the cache line containing "p".
+template <typename T>
+HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T* p) {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_prefetch(reinterpret_cast<const char*>(p), _MM_HINT_T0);
+#elif HWY_COMPILER_GCC || HWY_COMPILER_CLANG
+  // Hint=0 (NTA) behavior differs, but skipping outer caches is probably not
+  // desirable, so use the default 3 (keep in caches).
+  __builtin_prefetch(p, /*write=*/0, /*hint=*/3);
+#else
+  (void)p;
+#endif
+}
+
+// Invalidates and flushes the cache line containing "p". No effect on non-x86.
+HWY_INLINE HWY_ATTR_CACHE void FlushCacheline(const void* p) {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_clflush(p);
+#else
+  (void)p;
+#endif
+}
+
+// Reduces power consumption in spin-loops. No effect on non-x86.
+HWY_INLINE HWY_ATTR_CACHE void Pause() {
+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
+  _mm_pause();
+#endif
+}
+
+}  // namespace hwy
+
+// TODO(janwas): remove when these functions are removed. (See above.)
+#pragma pop_macro("StoreFence")
+#pragma pop_macro("LoadFence")
+
+#endif  // HIGHWAY_HWY_CACHE_CONTROL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/image/image.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/image/image.cc
new file mode 100644
index 0000000000..0dfe739a49
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/image/image.cc
@@ -0,0 +1,145 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/image/image.h"
+
+#include <cstddef>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/image/image.cc"
+
+#include <algorithm>  // swap
+
+#include "hwy/foreach_target.h"
+#include "hwy/highway.h"
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+size_t GetVectorSize() { return Lanes(HWY_FULL(uint8_t)()); }
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(GetVectorSize);  // Local function.
+}  // namespace
+
+size_t ImageBase::VectorSize() {
+  // Do not cache result - must return the current value, which may be greater
+  // than the first call if it was subject to DisableTargets!
+  return HWY_DYNAMIC_DISPATCH(GetVectorSize)();
+}
+
+size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) {
+  const size_t vec_size = VectorSize();
+  size_t valid_bytes = xsize * sizeof_t;
+
+  // Allow unaligned accesses starting at the last valid value - this may raise
+  // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
+  // Skip for the scalar case because no extra lanes will be loaded.
+  if (vec_size != 1) {
+    HWY_DASSERT(vec_size >= sizeof_t);
+    valid_bytes += vec_size - sizeof_t;
+  }
+
+  // Round up to vector and cache line size.
+  const size_t align = std::max<size_t>(vec_size, HWY_ALIGNMENT);
+  size_t bytes_per_row = RoundUpTo(valid_bytes, align);
+
+  // During the lengthy window before writes are committed to memory, CPUs
+  // guard against read after write hazards by checking the address, but
+  // only the lower 11 bits. We avoid a false dependency between writes to
+  // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
+  // Avoid2K prevents the same problem for the planes of an Image3.
+  if (bytes_per_row % HWY_ALIGNMENT == 0) {
+    bytes_per_row += align;
+  }
+
+  HWY_DASSERT(bytes_per_row % align == 0);
+  return bytes_per_row;
+}
+
+ImageBase::ImageBase(const size_t xsize, const size_t ysize,
+                     const size_t sizeof_t)
+    : xsize_(static_cast<uint32_t>(xsize)),
+      ysize_(static_cast<uint32_t>(ysize)),
+      bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
+  HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
+
+  bytes_per_row_ = 0;
+  // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
+  // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
+  if (xsize != 0 && ysize != 0) {
+    bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
+    bytes_ = AllocateAligned<uint8_t>(bytes_per_row_ * ysize);
+    HWY_ASSERT(bytes_.get() != nullptr);
+    InitializePadding(sizeof_t, Padding::kRoundUp);
+  }
+}
+
+ImageBase::ImageBase(const size_t xsize, const size_t ysize,
+                     const size_t bytes_per_row, void* const aligned)
+    : xsize_(static_cast<uint32_t>(xsize)),
+      ysize_(static_cast<uint32_t>(ysize)),
+      bytes_per_row_(bytes_per_row),
+      bytes_(static_cast<uint8_t*>(aligned),
+             AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
+  const size_t vec_size = VectorSize();
+  HWY_ASSERT(bytes_per_row % vec_size == 0);
+  HWY_ASSERT(reinterpret_cast<uintptr_t>(aligned) % vec_size == 0);
+}
+
+void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) {
+#if defined(MEMORY_SANITIZER) || HWY_IDE
+  if (xsize_ == 0 || ysize_ == 0) return;
+
+  const size_t vec_size = VectorSize();  // Bytes, independent of sizeof_t!
+  if (vec_size == 1) return;             // Scalar mode: no padding needed
+
+  const size_t valid_size = xsize_ * sizeof_t;
+  const size_t initialize_size = padding == Padding::kRoundUp
+                                     ? RoundUpTo(valid_size, vec_size)
+                                     : valid_size + vec_size - sizeof_t;
+  if (valid_size == initialize_size) return;
+
+  for (size_t y = 0; y < ysize_; ++y) {
+    uint8_t* HWY_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
+#if defined(__clang__) && (__clang_major__ <= 6)
+    // There's a bug in msan in clang-6 when handling AVX2 operations. This
+    // workaround allows tests to pass on msan, although it is slower and
+    // prevents msan warnings from uninitialized images.
+    memset(row, 0, initialize_size);
+#else
+    memset(row + valid_size, 0, initialize_size - valid_size);
+#endif  // clang6
+  }
+#else
+  (void)sizeof_t;
+  (void)padding;
+#endif  // MEMORY_SANITIZER
+}
+
+void ImageBase::Swap(ImageBase& other) {
+  std::swap(xsize_, other.xsize_);
+  std::swap(ysize_, other.ysize_);
+  std::swap(bytes_per_row_, other.bytes_per_row_);
+  std::swap(bytes_, other.bytes_);
+}
+
+}  // namespace hwy
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/image/image.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/image/image.h
new file mode 100644
index 0000000000..0dee21fd3b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/image/image.h
@@ -0,0 +1,468 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
+#define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
+
+// SIMD/multicore-friendly planar image representation with row accessors.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <cstddef>
+#include <utility>  // std::move
+
+#include "hwy/aligned_allocator.h"
+#include "hwy/base.h"
+
+namespace hwy {
+
+// Type-independent parts of Image<> - reduces code duplication and facilitates
+// moving member function implementations to cc file.
+struct ImageBase {
+  // Returns required alignment in bytes for externally allocated memory.
+  static size_t VectorSize();
+
+  // Returns distance [bytes] between the start of two consecutive rows, a
+  // multiple of VectorSize but NOT kAlias (see implementation).
+  static size_t BytesPerRow(const size_t xsize, const size_t sizeof_t);
+
+  // No allocation (for output params or unused images)
+  ImageBase()
+      : xsize_(0),
+        ysize_(0),
+        bytes_per_row_(0),
+        bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {}
+
+  // Allocates memory (this is the common case)
+  ImageBase(size_t xsize, size_t ysize, size_t sizeof_t);
+
+  // References but does not take ownership of external memory. Useful for
+  // interoperability with other libraries. `aligned` must be aligned to a
+  // multiple of VectorSize() and `bytes_per_row` must also be a multiple of
+  // VectorSize() or preferably equal to BytesPerRow().
+  ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned);
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo() instead.
+  ImageBase(const ImageBase& other) = delete;
+  ImageBase& operator=(const ImageBase& other) = delete;
+
+  // Move constructor (required for returning Image from function)
+  ImageBase(ImageBase&& other) noexcept = default;
+
+  // Move assignment (required for std::vector)
+  ImageBase& operator=(ImageBase&& other) noexcept = default;
+
+  void Swap(ImageBase& other);
+
+  // Useful for pre-allocating image with some padding for alignment purposes
+  // and later reporting the actual valid dimensions. Caller is responsible
+  // for ensuring xsize/ysize are <= the original dimensions.
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    xsize_ = static_cast<uint32_t>(xsize);
+    ysize_ = static_cast<uint32_t>(ysize);
+    // NOTE: we can't recompute bytes_per_row for more compact storage and
+    // better locality because that would invalidate the image contents.
+  }
+
+  // How many pixels.
+  HWY_INLINE size_t xsize() const { return xsize_; }
+  HWY_INLINE size_t ysize() const { return ysize_; }
+
+  // NOTE: do not use this for copying rows - the valid xsize may be much less.
+  HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
+
+  // Raw access to byte contents, for interfacing with other libraries.
+  // Unsigned char instead of char to avoid surprises (sign extension).
+  HWY_INLINE uint8_t* bytes() {
+    void* p = bytes_.get();
+    return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
+  }
+  HWY_INLINE const uint8_t* bytes() const {
+    const void* p = bytes_.get();
+    return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
+  }
+
+ protected:
+  // Returns pointer to the start of a row.
+  HWY_INLINE void* VoidRow(const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+    if (y >= ysize_) {
+      HWY_ABORT("Row(%zu) >= %u\n", y, ysize_);
+    }
+#endif
+
+    void* row = bytes_.get() + y * bytes_per_row_;
+    return HWY_ASSUME_ALIGNED(row, 64);
+  }
+
+  enum class Padding {
+    // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
+    kRoundUp,
+    // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra
+    // vector to be initialized. If done by default, this would suppress
+    // legitimate msan warnings. We therefore require users to explicitly call
+    // InitializePadding before using unaligned loads (e.g. convolution).
+    kUnaligned
+  };
+
+  // Initializes the minimum bytes required to suppress msan warnings from
+  // legitimate (according to Padding mode) vector loads/stores on the right
+  // border, where some lanes are uninitialized and assumed to be unused.
+  void InitializePadding(size_t sizeof_t, Padding padding);
+
+  // (Members are non-const to enable assignment during move-assignment.)
+  uint32_t xsize_;  // In valid pixels, not including any padding.
+  uint32_t ysize_;
+  size_t bytes_per_row_;  // Includes padding.
+  AlignedFreeUniquePtr<uint8_t[]> bytes_;
+};
+
+// Single channel, aligned rows separated by padding. T must be POD.
+//
+// 'Single channel' (one 2D array per channel) simplifies vectorization
+// (repeating the same operation on multiple adjacent components) without the
+// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
+// can easily iterate over all components in a row and Image requires no
+// knowledge of the pixel format beyond the component type "T".
+//
+// 'Aligned' means each row is aligned to the L1 cache line size. This prevents
+// false sharing between two threads operating on adjacent rows.
+//
+// 'Padding' is still relevant because vectors could potentially be larger than
+// a cache line. By rounding up row sizes to the vector size, we allow
+// reading/writing ALIGNED vectors whose first lane is a valid sample. This
+// avoids needing a separate loop to handle remaining unaligned lanes.
+//
+// This image layout could also be achieved with a vector and a row accessor
+// function, but a class wrapper with support for "deleter" allows wrapping
+// existing memory allocated by clients without copying the pixels. It also
+// provides convenient accessors for xsize/ysize, which shortens function
+// argument lists. Supports move-construction so it can be stored in containers.
+template <typename ComponentType>
+class Image : public ImageBase {
+ public:
+  using T = ComponentType;
+
+  Image() = default;
+  Image(const size_t xsize, const size_t ysize)
+      : ImageBase(xsize, ysize, sizeof(T)) {}
+  Image(const size_t xsize, const size_t ysize, size_t bytes_per_row,
+        void* aligned)
+      : ImageBase(xsize, ysize, bytes_per_row, aligned) {}
+
+  void InitializePaddingForUnalignedAccesses() {
+    InitializePadding(sizeof(T), Padding::kUnaligned);
+  }
+
+  HWY_INLINE const T* ConstRow(const size_t y) const {
+    return static_cast<const T*>(VoidRow(y));
+  }
+  HWY_INLINE const T* ConstRow(const size_t y) {
+    return static_cast<const T*>(VoidRow(y));
+  }
+
+  // Returns pointer to non-const. This allows passing const Image* parameters
+  // when the callee is only supposed to fill the pixels, as opposed to
+  // allocating or resizing the image.
+  HWY_INLINE T* MutableRow(const size_t y) const {
+    return static_cast<T*>(VoidRow(y));
+  }
+  HWY_INLINE T* MutableRow(const size_t y) {
+    return static_cast<T*>(VoidRow(y));
+  }
+
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must
+  // NOT be used to determine xsize.
+  HWY_INLINE intptr_t PixelsPerRow() const {
+    return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
+  }
+};
+
+using ImageF = Image<float>;
+
+// A bundle of 3 same-sized images. To fill an existing Image3 using
+// single-channel producers, we also need access to each const Image*. Const
+// prevents breaking the same-size invariant, while still allowing pixels to be
+// changed via MutableRow.
+template <typename ComponentType>
+class Image3 {
+ public:
+  using T = ComponentType;
+  using ImageT = Image<T>;
+  static constexpr size_t kNumPlanes = 3;
+
+  Image3() : planes_{ImageT(), ImageT(), ImageT()} {}
+
+  Image3(const size_t xsize, const size_t ysize)
+      : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize),
+                ImageT(xsize, ysize)} {}
+
+  Image3(Image3&& other) noexcept {
+    for (size_t i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+  }
+
+  Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
+    if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
+      HWY_ABORT("Not same size: %zu x %zu, %zu x %zu, %zu x %zu\n",
+                plane0.xsize(), plane0.ysize(), plane1.xsize(), plane1.ysize(),
+                plane2.xsize(), plane2.ysize());
+    }
+    planes_[0] = std::move(plane0);
+    planes_[1] = std::move(plane1);
+    planes_[2] = std::move(plane2);
+  }
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo instead.
+  Image3(const Image3& other) = delete;
+  Image3& operator=(const Image3& other) = delete;
+
+  Image3& operator=(Image3&& other) noexcept {
+    for (size_t i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+    return *this;
+  }
+
+  HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
+    return static_cast<const T*>(VoidPlaneRow(c, y));
+  }
+  HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) {
+    return static_cast<const T*>(VoidPlaneRow(c, y));
+  }
+
+  HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const {
+    return static_cast<T*>(VoidPlaneRow(c, y));
+  }
+  HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) {
+    return static_cast<T*>(VoidPlaneRow(c, y));
+  }
+
+  HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; }
+
+  void Swap(Image3& other) {
+    for (size_t c = 0; c < 3; ++c) {
+      other.planes_[c].Swap(planes_[c]);
+    }
+  }
+
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    for (ImageT& plane : planes_) {
+      plane.ShrinkTo(xsize, ysize);
+    }
+  }
+
+  // Sizes of all three images are guaranteed to be equal.
+  HWY_INLINE size_t xsize() const { return planes_[0].xsize(); }
+  HWY_INLINE size_t ysize() const { return planes_[0].ysize(); }
+  // Returns offset [bytes] from one row to the next row of the same plane.
+  // WARNING: this must NOT be used to determine xsize, nor for copying rows -
+  // the valid xsize may be much less.
+  HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must NOT be used
+  // to determine xsize.
+  HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
+
+ private:
+  // Returns pointer to the start of a row.
+  HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+    if (c >= kNumPlanes || y >= ysize()) {
+      HWY_ABORT("PlaneRow(%zu, %zu) >= %zu\n", c, y, ysize());
+    }
+#endif
+    // Use the first plane's stride because the compiler might not realize they
+    // are all equal. Thus we only need a single multiplication for all planes.
+    const size_t row_offset = y * planes_[0].bytes_per_row();
+    const void* row = planes_[c].bytes() + row_offset;
+    return static_cast<const T * HWY_RESTRICT>(
+        HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT));
+  }
+
+ private:
+  ImageT planes_[kNumPlanes];
+};
+
+using Image3F = Image3<float>;
+
+// Rectangular region in image(s). Factoring this out of Image instead of
+// shifting the pointer by x0/y0 allows this to apply to multiple images with
+// different resolutions. Can compare size via SameSize(rect1, rect2).
+class Rect {
+ public:
+  // Most windows are xsize_max * ysize_max, except those on the borders where
+  // begin + size_max > end.
+  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
+                 size_t ysize_max, size_t xend, size_t yend)
+      : x0_(xbegin),
+        y0_(ybegin),
+        xsize_(ClampedSize(xbegin, xsize_max, xend)),
+        ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
+
+  // Construct with origin and known size (typically from another Rect).
+  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
+      : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
+
+  // Construct a rect that covers a whole image.
+  template <typename Image>
+  explicit Rect(const Image& image)
+      : Rect(0, 0, image.xsize(), image.ysize()) {}
+
+  Rect() : Rect(0, 0, 0, 0) {}
+
+  Rect(const Rect&) = default;
+  Rect& operator=(const Rect&) = default;
+
+  Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
+               size_t ysize_max) {
+    return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
+                y0_ + ysize_);
+  }
+
+  template <typename T>
+  const T* ConstRow(const Image<T>* image, size_t y) const {
+    return image->ConstRow(y + y0_) + x0_;
+  }
+
+  template <typename T>
+  T* MutableRow(const Image<T>* image, size_t y) const {
+    return image->MutableRow(y + y0_) + x0_;
+  }
+
+  template <typename T>
+  const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
+    return image.ConstPlaneRow(c, y + y0_) + x0_;
+  }
+
+  template <typename T>
+  T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const {
+    return image->MutablePlaneRow(c, y + y0_) + x0_;
+  }
+
+  // Returns true if this Rect fully resides in the given image. ImageT could be
+  // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
+  template <class ImageT>
+  bool IsInside(const ImageT& image) const {
+    return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
+  }
+
+  size_t x0() const { return x0_; }
+  size_t y0() const { return y0_; }
+  size_t xsize() const { return xsize_; }
+  size_t ysize() const { return ysize_; }
+
+ private:
+  // Returns size_max, or whatever is left in [begin, end).
+  static constexpr size_t ClampedSize(size_t begin, size_t size_max,
+                                      size_t end) {
+    return (begin + size_max <= end) ? size_max
+                                     : (end > begin ? end - begin : 0);
+  }
+
+  size_t x0_;
+  size_t y0_;
+
+  size_t xsize_;
+  size_t ysize_;
+};
+
+// Works for any image-like input type(s).
+template <class Image1, class Image2>
+HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) {
+  return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
+}
+
+// Mirrors out of bounds coordinates and returns valid coordinates unchanged.
+// We assume the radius (distance outside the image) is small compared to the
+// image size, otherwise this might not terminate.
+// The mirror is outside the last column (border pixel is also replicated).
+static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x,
+                                                 const int64_t xsize) {
+  HWY_DASSERT(xsize != 0);
+
+  // TODO(janwas): replace with branchless version
+  while (x < 0 || x >= xsize) {
+    if (x < 0) {
+      x = -x - 1;
+    } else {
+      x = 2 * xsize - 1 - x;
+    }
+  }
+  return static_cast<size_t>(x);
+}
+
+// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
+
+// Mirrors (repeating the edge pixel once). Useful for convolutions.
+struct WrapMirror {
+  HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const {
+    return Mirror(coord, static_cast<int64_t>(size));
+  }
+};
+
+// Returns the same coordinate, for when we know "coord" is already valid (e.g.
+// interior of an image).
+struct WrapUnchanged {
+  HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const {
+    return static_cast<size_t>(coord);
+  }
+};
+
+// Similar to Wrap* but for row pointers (reduces Row() multiplications).
+
+class WrapRowMirror {
+ public:
+  template <class View>
+  WrapRowMirror(const View& image, size_t ysize)
+      : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
+
+  const float* operator()(const float* const HWY_RESTRICT row,
+                          const int64_t stride) const {
+    if (row < first_row_) {
+      const int64_t num_before = first_row_ - row;
+      // Mirrored; one row before => row 0, two before = row 1, ...
+      return first_row_ + num_before - stride;
+    }
+    if (row > last_row_) {
+      const int64_t num_after = row - last_row_;
+      // Mirrored; one row after => last row, two after = last - 1, ...
+      return last_row_ - num_after + stride;
+    }
+    return row;
+  }
+
+ private:
+  const float* const HWY_RESTRICT first_row_;
+  const float* const HWY_RESTRICT last_row_;
+};
+
+struct WrapRowUnchanged {
+  HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row,
+                                     int64_t /*stride*/) const {
+    return row;
+  }
+};
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/image/image_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/image/image_test.cc
new file mode 100644
index 0000000000..c27e52a195
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/image/image_test.cc
@@ -0,0 +1,151 @@
+// Copyright (c) the JPEG XL Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/image/image.h"
+
+#include <cstddef>
+
+#include "hwy/base.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/image/image_test.cc"
+#include "hwy/foreach_target.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <random>
+#include <utility>
+
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Ensure we can always write full aligned vectors.
+struct TestAlignedT {
+  template <typename T>
+  void operator()(T /*unused*/) const {
+    std::mt19937 rng(129);
+    std::uniform_int_distribution<int> dist(0, 16);
+    const HWY_FULL(T) d;
+
+    for (size_t ysize = 1; ysize < 4; ++ysize) {
+      for (size_t xsize = 1; xsize < 64; ++xsize) {
+        Image<T> img(xsize, ysize);
+
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize; x += Lanes(d)) {
+            const auto values = Iota(d, dist(rng));
+            Store(values, d, row + x);
+          }
+        }
+
+        // Sanity check to prevent optimizing out the writes
+        const auto x = std::uniform_int_distribution<size_t>(0, xsize - 1)(rng);
+        const auto y = std::uniform_int_distribution<size_t>(0, ysize - 1)(rng);
+        HWY_ASSERT(img.ConstRow(y)[x] < 16 + Lanes(d));
+      }
+    }
+  }
+};
+
+void TestAligned() { ForUnsignedTypes(TestAlignedT()); }
+
+// Ensure we can write an unaligned vector starting at the last valid value.
+struct TestUnalignedT {
+  template <typename T>
+  void operator()(T /*unused*/) const {
+    std::mt19937 rng(129);
+    std::uniform_int_distribution<int> dist(0, 3);
+    const HWY_FULL(T) d;
+
+    for (size_t ysize = 1; ysize < 4; ++ysize) {
+      for (size_t xsize = 1; xsize < 128; ++xsize) {
+        Image<T> img(xsize, ysize);
+        img.InitializePaddingForUnalignedAccesses();
+
+// This test reads padding, which only works if it was initialized,
+// which only happens in MSAN builds.
+#if defined(MEMORY_SANITIZER) || HWY_IDE
+        // Initialize only the valid samples
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            row[x] = 1 << dist(rng);
+          }
+        }
+
+        // Read padding bits
+        auto accum = Zero(d);
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            accum |= LoadU(d, row + x);
+          }
+        }
+
+        // Ensure padding was zero
+        const size_t N = Lanes(d);
+        auto lanes = AllocateAligned<T>(N);
+        Store(accum, d, lanes.get());
+        for (size_t i = 0; i < N; ++i) {
+          HWY_ASSERT(lanes[i] < 16);
+        }
+#else  // Check that writing padding does not overwrite valid samples
+       // Initialize only the valid samples
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize; ++x) {
+            row[x] = static_cast<T>(x);
+          }
+        }
+
+        // Zero padding and rightmost sample
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          StoreU(Zero(d), d, row + xsize - 1);
+        }
+
+        // Ensure no samples except the rightmost were overwritten
+        for (size_t y = 0; y < ysize; ++y) {
+          T* HWY_RESTRICT row = img.MutableRow(y);
+          for (size_t x = 0; x < xsize - 1; ++x) {
+            HWY_ASSERT_EQ(static_cast<T>(x), row[x]);
+          }
+        }
+#endif
+      }
+    }
+  }
+};
+
+void TestUnaligned() { ForUnsignedTypes(TestUnalignedT()); }
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(ImageTest);
+HWY_EXPORT_AND_TEST_P(ImageTest, TestAligned);
+HWY_EXPORT_AND_TEST_P(ImageTest, TestUnaligned);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/math/math-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/math/math-inl.h
new file mode 100644
index 0000000000..15b80d63ba
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/math/math-inl.h
@@ -0,0 +1,1192 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Include guard (still compiled once per target)
+#if defined(HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_) == \
+    defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
+#undef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
+#else
+#define HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
+#endif
+
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+/**
+ * Highway SIMD version of std::acos(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: [-1, +1]
+ * @return arc cosine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Acos(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAcos(const D d, V x) {
+  return Acos(d, x);
+}
+
+/**
+ * Highway SIMD version of std::acosh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: float32[1, +FLT_MAX], float64[1, +DBL_MAX]
+ * @return hyperbolic arc cosine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Acosh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAcosh(const D d, V x) {
+  return Acosh(d, x);
+}
+
+/**
+ * Highway SIMD version of std::asin(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: [-1, +1]
+ * @return arc sine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Asin(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAsin(const D d, V x) {
+  return Asin(d, x);
+}
+
+/**
+ * Highway SIMD version of std::asinh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: float32[-FLT_MAX, +FLT_MAX], float64[-DBL_MAX, +DBL_MAX]
+ * @return hyperbolic arc sine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Asinh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAsinh(const D d, V x) {
+  return Asinh(d, x);
+}
+
+/**
+ * Highway SIMD version of std::atan(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: float32[-FLT_MAX, +FLT_MAX], float64[-DBL_MAX, +DBL_MAX]
+ * @return arc tangent of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Atan(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAtan(const D d, V x) {
+  return Atan(d, x);
+}
+
+/**
+ * Highway SIMD version of std::atanh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: (-1, +1)
+ * @return hyperbolic arc tangent of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Atanh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallAtanh(const D d, V x) {
+  return Atanh(d, x);
+}
+
+/**
+ * Highway SIMD version of std::cos(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: [-39000, +39000]
+ * @return cosine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Cos(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallCos(const D d, V x) {
+  return Cos(d, x);
+}
+
+/**
+ * Highway SIMD version of std::exp(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 1
+ *      Valid Range: float32[-FLT_MAX, +104], float64[-DBL_MAX, +706]
+ * @return e^x
+ */
+template <class D, class V>
+HWY_INLINE V Exp(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallExp(const D d, V x) {
+  return Exp(d, x);
+}
+
+/**
+ * Highway SIMD version of std::expm1(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 4
+ *      Valid Range: float32[-FLT_MAX, +104], float64[-DBL_MAX, +706]
+ * @return e^x - 1
+ */
+template <class D, class V>
+HWY_INLINE V Expm1(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallExpm1(const D d, V x) {
+  return Expm1(d, x);
+}
+
+/**
+ * Highway SIMD version of std::log(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 4
+ *      Valid Range: float32(0, +FLT_MAX], float64(0, +DBL_MAX]
+ * @return natural logarithm of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Log(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallLog(const D d, V x) {
+  return Log(d, x);
+}
+
+/**
+ * Highway SIMD version of std::log10(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: float32(0, +FLT_MAX], float64(0, +DBL_MAX]
+ * @return base 10 logarithm of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Log10(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallLog10(const D d, V x) {
+  return Log10(d, x);
+}
+
+/**
+ * Highway SIMD version of std::log1p(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: float32[0, +FLT_MAX], float64[0, +DBL_MAX]
+ * @return log(1 + x)
+ */
+template <class D, class V>
+HWY_INLINE V Log1p(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallLog1p(const D d, V x) {
+  return Log1p(d, x);
+}
+
+/**
+ * Highway SIMD version of std::log2(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 2
+ *      Valid Range: float32(0, +FLT_MAX], float64(0, +DBL_MAX]
+ * @return base 2 logarithm of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Log2(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallLog2(const D d, V x) {
+  return Log2(d, x);
+}
+
+/**
+ * Highway SIMD version of std::sin(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 3
+ *      Valid Range: [-39000, +39000]
+ * @return sine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Sin(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallSin(const D d, V x) {
+  return Sin(d, x);
+}
+
+/**
+ * Highway SIMD version of std::sinh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 4
+ *      Valid Range: float32[-88.7228, +88.7228], float64[-709, +709]
+ * @return hyperbolic sine of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Sinh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallSinh(const D d, V x) {
+  return Sinh(d, x);
+}
+
+/**
+ * Highway SIMD version of std::tanh(x).
+ *
+ * Valid Lane Types: float32, float64
+ *        Max Error: ULP = 4
+ *      Valid Range: float32[-FLT_MAX, +FLT_MAX], float64[-DBL_MAX, +DBL_MAX]
+ * @return hyperbolic tangent of 'x'
+ */
+template <class D, class V>
+HWY_INLINE V Tanh(const D d, V x);
+template <class D, class V>
+HWY_NOINLINE V CallTanh(const D d, V x) {
+  return Tanh(d, x);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementation
+////////////////////////////////////////////////////////////////////////////////
+namespace impl {
+
+// Estrin's Scheme is a faster method for evaluating large polynomials on
+// super scalar architectures. It works by factoring the Horner's Method
+// polynomial into power of two sub-trees that can be evaluated in parallel.
+// Wikipedia Link: https://en.wikipedia.org/wiki/Estrin%27s_scheme
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1) {
+  return MulAdd(c1, x, c0);
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2) {
+  T x2(x * x);
+  return MulAdd(x2, c2, MulAdd(c1, x, c0));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3) {
+  T x2(x * x);
+  return MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4) {
+  T x2(x * x), x4(x2 * x2);
+  return MulAdd(x4, c4, MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5) {
+  T x2(x * x), x4(x2 * x2);
+  return MulAdd(x4, MulAdd(c5, x, c4),
+                MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6) {
+  T x2(x * x), x4(x2 * x2);
+  return MulAdd(x4, MulAdd(x2, c6, MulAdd(c5, x, c4)),
+                MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7) {
+  T x2(x * x), x4(x2 * x2);
+  return MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4);
+  return MulAdd(x8, c8,
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4);
+  return MulAdd(x8, MulAdd(c9, x, c8),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4);
+  return MulAdd(x8, MulAdd(x2, c10, MulAdd(c9, x, c8)),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4);
+  return MulAdd(x8, MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8)),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4);
+  return MulAdd(
+      x8, MulAdd(x4, c12, MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+      MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+             MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4);
+  return MulAdd(x8,
+                MulAdd(x4, MulAdd(c13, x, c12),
+                       MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4);
+  return MulAdd(x8,
+                MulAdd(x4, MulAdd(x2, c14, MulAdd(c13, x, c12)),
+                       MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14, T c15) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4);
+  return MulAdd(x8,
+                MulAdd(x4, MulAdd(x2, MulAdd(c15, x, c14), MulAdd(c13, x, c12)),
+                       MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+                MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                       MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14, T c15, T c16) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4), x16(x8 * x8);
+  return MulAdd(
+      x16, c16,
+      MulAdd(x8,
+             MulAdd(x4, MulAdd(x2, MulAdd(c15, x, c14), MulAdd(c13, x, c12)),
+                    MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+             MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                    MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14, T c15, T c16, T c17) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4), x16(x8 * x8);
+  return MulAdd(
+      x16, MulAdd(c17, x, c16),
+      MulAdd(x8,
+             MulAdd(x4, MulAdd(x2, MulAdd(c15, x, c14), MulAdd(c13, x, c12)),
+                    MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+             MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                    MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)))));
+}
+template <class T>
+HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1, T c2, T c3, T c4, T c5,
+                                     T c6, T c7, T c8, T c9, T c10, T c11,
+                                     T c12, T c13, T c14, T c15, T c16, T c17,
+                                     T c18) {
+  T x2(x * x), x4(x2 * x2), x8(x4 * x4), x16(x8 * x8);
+  return MulAdd(
+      x16, MulAdd(x2, c18, MulAdd(c17, x, c16)),
+      MulAdd(x8,
+             MulAdd(x4, MulAdd(x2, MulAdd(c15, x, c14), MulAdd(c13, x, c12)),
+                    MulAdd(x2, MulAdd(c11, x, c10), MulAdd(c9, x, c8))),
+             MulAdd(x4, MulAdd(x2, MulAdd(c7, x, c6), MulAdd(c5, x, c4)),
+                    MulAdd(x2, MulAdd(c3, x, c2), MulAdd(c1, x, c0)))));
+}
+
+template <class FloatOrDouble>
+struct AsinImpl {};
+template <class FloatOrDouble>
+struct AtanImpl {};
+template <class FloatOrDouble>
+struct CosSinImpl {};
+template <class FloatOrDouble>
+struct ExpImpl {};
+template <class FloatOrDouble>
+struct LogImpl {};
+
+template <>
+struct AsinImpl<float> {
+  // Polynomial approximation for asin(x) over the range [0, 0.5).
+  template <class D, class V>
+  HWY_INLINE V AsinPoly(D d, V x2, V /*x*/) {
+    const auto k0 = Set(d, +0.1666677296f);
+    const auto k1 = Set(d, +0.07495029271f);
+    const auto k2 = Set(d, +0.04547423869f);
+    const auto k3 = Set(d, +0.02424046025f);
+    const auto k4 = Set(d, +0.04197454825f);
+
+    return Estrin(x2, k0, k1, k2, k3, k4);
+  }
+};
+
+#if HWY_CAP_FLOAT64 && HWY_CAP_INTEGER64
+
+template <>
+struct AsinImpl<double> {
+  // Polynomial approximation for asin(x) over the range [0, 0.5).
+  template <class D, class V>
+  HWY_INLINE V AsinPoly(D d, V x2, V /*x*/) {
+    const auto k0 = Set(d, +0.1666666666666497543);
+    const auto k1 = Set(d, +0.07500000000378581611);
+    const auto k2 = Set(d, +0.04464285681377102438);
+    const auto k3 = Set(d, +0.03038195928038132237);
+    const auto k4 = Set(d, +0.02237176181932048341);
+    const auto k5 = Set(d, +0.01735956991223614604);
+    const auto k6 = Set(d, +0.01388715184501609218);
+    const auto k7 = Set(d, +0.01215360525577377331);
+    const auto k8 = Set(d, +0.006606077476277170610);
+    const auto k9 = Set(d, +0.01929045477267910674);
+    const auto k10 = Set(d, -0.01581918243329996643);
+    const auto k11 = Set(d, +0.03161587650653934628);
+
+    return Estrin(x2, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11);
+  }
+};
+
+#endif
+
+template <>
+struct AtanImpl<float> {
+  // Polynomial approximation for atan(x) over the range [0, 1.0).
+  template <class D, class V>
+  HWY_INLINE V AtanPoly(D d, V x) {
+    const auto k0 = Set(d, -0.333331018686294555664062f);
+    const auto k1 = Set(d, +0.199926957488059997558594f);
+    const auto k2 = Set(d, -0.142027363181114196777344f);
+    const auto k3 = Set(d, +0.106347933411598205566406f);
+    const auto k4 = Set(d, -0.0748900920152664184570312f);
+    const auto k5 = Set(d, +0.0425049886107444763183594f);
+    const auto k6 = Set(d, -0.0159569028764963150024414f);
+    const auto k7 = Set(d, +0.00282363896258175373077393f);
+
+    const auto y = (x * x);
+    return MulAdd(Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7), (y * x), x);
+  }
+};
+
+#if HWY_CAP_FLOAT64 && HWY_CAP_INTEGER64
+
+template <>
+struct AtanImpl<double> {
+  // Polynomial approximation for atan(x) over the range [0, 1.0).
+  template <class D, class V>
+  HWY_INLINE V AtanPoly(D d, V x) {
+    const auto k0 = Set(d, -0.333333333333311110369124);
+    const auto k1 = Set(d, +0.199999999996591265594148);
+    const auto k2 = Set(d, -0.14285714266771329383765);
+    const auto k3 = Set(d, +0.111111105648261418443745);
+    const auto k4 = Set(d, -0.090908995008245008229153);
+    const auto k5 = Set(d, +0.0769219538311769618355029);
+    const auto k6 = Set(d, -0.0666573579361080525984562);
+    const auto k7 = Set(d, +0.0587666392926673580854313);
+    const auto k8 = Set(d, -0.0523674852303482457616113);
+    const auto k9 = Set(d, +0.0466667150077840625632675);
+    const auto k10 = Set(d, -0.0407629191276836500001934);
+    const auto k11 = Set(d, +0.0337852580001353069993897);
+    const auto k12 = Set(d, -0.0254517624932312641616861);
+    const auto k13 = Set(d, +0.016599329773529201970117);
+    const auto k14 = Set(d, -0.00889896195887655491740809);
+    const auto k15 = Set(d, +0.00370026744188713119232403);
+    const auto k16 = Set(d, -0.00110611831486672482563471);
+    const auto k17 = Set(d, +0.000209850076645816976906797);
+    const auto k18 = Set(d, -1.88796008463073496563746e-5);
+
+    const auto y = (x * x);
+    return MulAdd(Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11,
+                         k12, k13, k14, k15, k16, k17, k18),
+                  (y * x), x);
+  }
+};
+
+#endif
+
+template <>
+struct CosSinImpl<float> {
+  // Rounds float toward zero and returns as int32_t.
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> ToInt32(D /*unused*/, V x) {
+    return ConvertTo(Rebind<int32_t, D>(), x);
+  }
+
+  template <class D, class V>
+  HWY_INLINE V Poly(D d, V x) {
+    const auto k0 = Set(d, -1.66666597127914428710938e-1f);
+    const auto k1 = Set(d, +8.33307858556509017944336e-3f);
+    const auto k2 = Set(d, -1.981069071916863322258e-4f);
+    const auto k3 = Set(d, +2.6083159809786593541503e-6f);
+
+    const auto y(x * x);
+    return MulAdd(Estrin(y, k0, k1, k2, k3), (y * x), x);
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V CosReduce(D d, V x, VI32 q) {
+    // kHalfPiPart0f + kHalfPiPart1f + kHalfPiPart2f + kHalfPiPart3f ~= -pi/2
+    const V kHalfPiPart0f = Set(d, -0.5f * 3.140625f);
+    const V kHalfPiPart1f = Set(d, -0.5f * 0.0009670257568359375f);
+    const V kHalfPiPart2f = Set(d, -0.5f * 6.2771141529083251953e-7f);
+    const V kHalfPiPart3f = Set(d, -0.5f * 1.2154201256553420762e-10f);
+
+    // Extended precision modular arithmetic.
+    const V qf = ConvertTo(d, q);
+    x = MulAdd(qf, kHalfPiPart0f, x);
+    x = MulAdd(qf, kHalfPiPart1f, x);
+    x = MulAdd(qf, kHalfPiPart2f, x);
+    x = MulAdd(qf, kHalfPiPart3f, x);
+    return x;
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V SinReduce(D d, V x, VI32 q) {
+    // kPiPart0f + kPiPart1f + kPiPart2f + kPiPart3f ~= -pi
+    const V kPiPart0f = Set(d, -3.140625f);
+    const V kPiPart1f = Set(d, -0.0009670257568359375f);
+    const V kPiPart2f = Set(d, -6.2771141529083251953e-7f);
+    const V kPiPart3f = Set(d, -1.2154201256553420762e-10f);
+
+    // Extended precision modular arithmetic.
+    const V qf = ConvertTo(d, q);
+    x = MulAdd(qf, kPiPart0f, x);
+    x = MulAdd(qf, kPiPart1f, x);
+    x = MulAdd(qf, kPiPart2f, x);
+    x = MulAdd(qf, kPiPart3f, x);
+    return x;
+  }
+
+  // (q & 2) == 0 ? -0.0 : +0.0
+  template <class D, class VI32>
+  HWY_INLINE Vec<Rebind<float, D>> CosSignFromQuadrant(D d, VI32 q) {
+    const VI32 kTwo = Set(Rebind<int32_t, D>(), 2);
+    return BitCast(d, ShiftLeft<30>(AndNot(q, kTwo)));
+  }
+
+  // ((q & 1) ? -0.0 : +0.0)
+  template <class D, class VI32>
+  HWY_INLINE Vec<Rebind<float, D>> SinSignFromQuadrant(D d, VI32 q) {
+    const VI32 kOne = Set(Rebind<int32_t, D>(), 1);
+    return BitCast(d, ShiftLeft<31>(And(q, kOne)));
+  }
+};
+
+#if HWY_CAP_FLOAT64 && HWY_CAP_INTEGER64
+
+template <>
+struct CosSinImpl<double> {
+  // Rounds double toward zero and returns as int32_t.
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> ToInt32(D /*unused*/, V x) {
+    return DemoteTo(Rebind<int32_t, D>(), x);
+  }
+
+  template <class D, class V>
+  HWY_INLINE V Poly(D d, V x) {
+    const auto k0 = Set(d, -0.166666666666666657414808);
+    const auto k1 = Set(d, +0.00833333333333332974823815);
+    const auto k2 = Set(d, -0.000198412698412696162806809);
+    const auto k3 = Set(d, +2.75573192239198747630416e-6);
+    const auto k4 = Set(d, -2.50521083763502045810755e-8);
+    const auto k5 = Set(d, +1.60590430605664501629054e-10);
+    const auto k6 = Set(d, -7.64712219118158833288484e-13);
+    const auto k7 = Set(d, +2.81009972710863200091251e-15);
+    const auto k8 = Set(d, -7.97255955009037868891952e-18);
+
+    const auto y(x * x);
+    return MulAdd(Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8), (y * x), x);
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V CosReduce(D d, V x, VI32 q) {
+    // kHalfPiPart0d + kHalfPiPart1d + kHalfPiPart2d + kHalfPiPart3d ~= -pi/2
+    const V kHalfPiPart0d = Set(d, -0.5 * 3.1415926218032836914);
+    const V kHalfPiPart1d = Set(d, -0.5 * 3.1786509424591713469e-8);
+    const V kHalfPiPart2d = Set(d, -0.5 * 1.2246467864107188502e-16);
+    const V kHalfPiPart3d = Set(d, -0.5 * 1.2736634327021899816e-24);
+
+    // Extended precision modular arithmetic.
+    const V qf = PromoteTo(d, q);
+    x = MulAdd(qf, kHalfPiPart0d, x);
+    x = MulAdd(qf, kHalfPiPart1d, x);
+    x = MulAdd(qf, kHalfPiPart2d, x);
+    x = MulAdd(qf, kHalfPiPart3d, x);
+    return x;
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V SinReduce(D d, V x, VI32 q) {
+    // kPiPart0d + kPiPart1d + kPiPart2d + kPiPart3d ~= -pi
+    const V kPiPart0d = Set(d, -3.1415926218032836914);
+    const V kPiPart1d = Set(d, -3.1786509424591713469e-8);
+    const V kPiPart2d = Set(d, -1.2246467864107188502e-16);
+    const V kPiPart3d = Set(d, -1.2736634327021899816e-24);
+
+    // Extended precision modular arithmetic.
+    const V qf = PromoteTo(d, q);
+    x = MulAdd(qf, kPiPart0d, x);
+    x = MulAdd(qf, kPiPart1d, x);
+    x = MulAdd(qf, kPiPart2d, x);
+    x = MulAdd(qf, kPiPart3d, x);
+    return x;
+  }
+
+  // (q & 2) == 0 ? -0.0 : +0.0
+  template <class D, class VI32>
+  HWY_INLINE Vec<Rebind<double, D>> CosSignFromQuadrant(D d, VI32 q) {
+    const VI32 kTwo = Set(Rebind<int32_t, D>(), 2);
+    return BitCast(
+        d, ShiftLeft<62>(PromoteTo(Rebind<int64_t, D>(), AndNot(q, kTwo))));
+  }
+
+  // ((q & 1) ? -0.0 : +0.0)
+  template <class D, class VI32>
+  HWY_INLINE Vec<Rebind<double, D>> SinSignFromQuadrant(D d, VI32 q) {
+    const VI32 kOne = Set(Rebind<int32_t, D>(), 1);
+    return BitCast(
+        d, ShiftLeft<63>(PromoteTo(Rebind<int64_t, D>(), And(q, kOne))));
+  }
+};
+
+#endif
+
+template <>
+struct ExpImpl<float> {
+  // Rounds float toward zero and returns as int32_t.
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> ToInt32(D /*unused*/, V x) {
+    return ConvertTo(Rebind<int32_t, D>(), x);
+  }
+
+  template <class D, class V>
+  HWY_INLINE V ExpPoly(D d, V x) {
+    const auto k0 = Set(d, +0.5f);
+    const auto k1 = Set(d, +0.166666671633720397949219f);
+    const auto k2 = Set(d, +0.0416664853692054748535156f);
+    const auto k3 = Set(d, +0.00833336077630519866943359f);
+    const auto k4 = Set(d, +0.00139304355252534151077271f);
+    const auto k5 = Set(d, +0.000198527617612853646278381f);
+
+    return MulAdd(Estrin(x, k0, k1, k2, k3, k4, k5), (x * x), x);
+  }
+
+  // Computes 2^x, where x is an integer.
+  template <class D, class VI32>
+  HWY_INLINE Vec<D> Pow2I(D d, VI32 x) {
+    const Rebind<int32_t, D> di32;
+    const VI32 kOffset = Set(di32, 0x7F);
+    return BitCast(d, ShiftLeft<23>(x + kOffset));
+  }
+
+  // Sets the exponent of 'x' to 2^e.
+  template <class D, class V, class VI32>
+  HWY_INLINE V LoadExpShortRange(D d, V x, VI32 e) {
+    const VI32 y = ShiftRight<1>(e);
+    return x * Pow2I(d, y) * Pow2I(d, e - y);
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V ExpReduce(D d, V x, VI32 q) {
+    // kLn2Part0f + kLn2Part1f ~= -ln(2)
+    const V kLn2Part0f = Set(d, -0.693145751953125f);
+    const V kLn2Part1f = Set(d, -1.428606765330187045e-6f);
+
+    // Extended precision modular arithmetic.
+    const V qf = ConvertTo(d, q);
+    x = MulAdd(qf, kLn2Part0f, x);
+    x = MulAdd(qf, kLn2Part1f, x);
+    return x;
+  }
+};
+
+template <>
+struct LogImpl<float> {
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> Log2p1NoSubnormal(D /*d*/, V x) {
+    const Rebind<int32_t, D> di32;
+    const Rebind<uint32_t, D> du32;
+    return BitCast(di32, ShiftRight<23>(BitCast(du32, x))) - Set(di32, 0x7F);
+  }
+
+  // Approximates Log(x) over the range [sqrt(2) / 2, sqrt(2)].
+  template <class D, class V>
+  HWY_INLINE V LogPoly(D d, V x) {
+    const V k0 = Set(d, 0.66666662693f);
+    const V k1 = Set(d, 0.40000972152f);
+    const V k2 = Set(d, 0.28498786688f);
+    const V k3 = Set(d, 0.24279078841f);
+
+    const V x2 = (x * x);
+    const V x4 = (x2 * x2);
+    return MulAdd(MulAdd(k2, x4, k0), x2, (MulAdd(k3, x4, k1) * x4));
+  }
+};
+
+#if HWY_CAP_FLOAT64 && HWY_CAP_INTEGER64
+template <>
+struct ExpImpl<double> {
+  // Rounds double toward zero and returns as int32_t.
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int32_t, D>> ToInt32(D /*unused*/, V x) {
+    return DemoteTo(Rebind<int32_t, D>(), x);
+  }
+
+  template <class D, class V>
+  HWY_INLINE V ExpPoly(D d, V x) {
+    const auto k0 = Set(d, +0.5);
+    const auto k1 = Set(d, +0.166666666666666851703837);
+    const auto k2 = Set(d, +0.0416666666666665047591422);
+    const auto k3 = Set(d, +0.00833333333331652721664984);
+    const auto k4 = Set(d, +0.00138888888889774492207962);
+    const auto k5 = Set(d, +0.000198412698960509205564975);
+    const auto k6 = Set(d, +2.4801587159235472998791e-5);
+    const auto k7 = Set(d, +2.75572362911928827629423e-6);
+    const auto k8 = Set(d, +2.75573911234900471893338e-7);
+    const auto k9 = Set(d, +2.51112930892876518610661e-8);
+    const auto k10 = Set(d, +2.08860621107283687536341e-9);
+
+    return MulAdd(Estrin(x, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10),
+                  (x * x), x);
+  }
+
+  // Computes 2^x, where x is an integer.
+  template <class D, class VI32>
+  HWY_INLINE Vec<D> Pow2I(D d, VI32 x) {
+    const Rebind<int32_t, D> di32;
+    const Rebind<int64_t, D> di64;
+    const VI32 kOffset = Set(di32, 0x3FF);
+    return BitCast(d, ShiftLeft<52>(PromoteTo(di64, x + kOffset)));
+  }
+
+  // Sets the exponent of 'x' to 2^e.
+  template <class D, class V, class VI32>
+  HWY_INLINE V LoadExpShortRange(D d, V x, VI32 e) {
+    const VI32 y = ShiftRight<1>(e);
+    return (x * Pow2I(d, y) * Pow2I(d, e - y));
+  }
+
+  template <class D, class V, class VI32>
+  HWY_INLINE V ExpReduce(D d, V x, VI32 q) {
+    // kLn2Part0d + kLn2Part1d ~= -ln(2)
+    const V kLn2Part0d = Set(d, -0.6931471805596629565116018);
+    const V kLn2Part1d = Set(d, -0.28235290563031577122588448175e-12);
+
+    // Extended precision modular arithmetic.
+    const V qf = PromoteTo(d, q);
+    x = MulAdd(qf, kLn2Part0d, x);
+    x = MulAdd(qf, kLn2Part1d, x);
+    return x;
+  }
+};
+
+template <>
+struct LogImpl<double> {
+  template <class D, class V>
+  HWY_INLINE Vec<Rebind<int64_t, D>> Log2p1NoSubnormal(D /*d*/, V x) {
+    const Rebind<int64_t, D> di64;
+    const Rebind<uint64_t, D> du64;
+    return BitCast(di64, ShiftRight<52>(BitCast(du64, x))) - Set(di64, 0x3FF);
+  }
+
+  // Approximates Log(x) over the range [sqrt(2) / 2, sqrt(2)].
+  template <class D, class V>
+  HWY_INLINE V LogPoly(D d, V x) {
+    const V k0 = Set(d, 0.6666666666666735130);
+    const V k1 = Set(d, 0.3999999999940941908);
+    const V k2 = Set(d, 0.2857142874366239149);
+    const V k3 = Set(d, 0.2222219843214978396);
+    const V k4 = Set(d, 0.1818357216161805012);
+    const V k5 = Set(d, 0.1531383769920937332);
+    const V k6 = Set(d, 0.1479819860511658591);
+
+    const V x2 = (x * x);
+    const V x4 = (x2 * x2);
+    return MulAdd(MulAdd(MulAdd(MulAdd(k6, x4, k4), x4, k2), x4, k0), x2,
+                  (MulAdd(MulAdd(k5, x4, k3), x4, k1) * x4));
+  }
+};
+
+#endif
+
+template <class D, class V, bool kAllowSubnormals = true>
+HWY_INLINE V Log(const D d, V x) {
+  // http://git.musl-libc.org/cgit/musl/tree/src/math/log.c for more info.
+  using LaneType = LaneType<V>;
+  impl::LogImpl<LaneType> impl;
+
+  // clang-format off
+  constexpr bool kIsF32 = (sizeof(LaneType) == 4);
+
+  // Float Constants
+  const V kLn2Hi     = Set(d, (kIsF32 ? 0.69313812256f   :
+                                        0.693147180369123816490   ));
+  const V kLn2Lo     = Set(d, (kIsF32 ? 9.0580006145e-6f :
+                                        1.90821492927058770002e-10));
+  const V kOne       = Set(d, +1.0);
+  const V kMinNormal = Set(d, (kIsF32 ? 1.175494351e-38f :
+                                        2.2250738585072014e-308   ));
+  const V kScale     = Set(d, (kIsF32 ? 3.355443200e+7f  :
+                                        1.8014398509481984e+16    ));
+
+  // Integer Constants
+  const Rebind<MakeSigned<LaneType>, D> di;
+  using VI = decltype(Zero(di));
+  const VI kLowerBits = Set(di, (kIsF32 ? 0x00000000L : 0xFFFFFFFFLL));
+  const VI kMagic     = Set(di, (kIsF32 ? 0x3F3504F3L : 0x3FE6A09E00000000LL));
+  const VI kExpMask   = Set(di, (kIsF32 ? 0x3F800000L : 0x3FF0000000000000LL));
+  const VI kExpScale  = Set(di, (kIsF32 ? -25         : -54));
+  const VI kManMask   = Set(di, (kIsF32 ? 0x7FFFFFL   : 0xFFFFF00000000LL));
+  // clang-format on
+
+  // Scale up 'x' so that it is no longer denormalized.
+  VI exp_bits;
+  V exp;
+  if (kAllowSubnormals == true) {
+    const auto is_denormal = (x < kMinNormal);
+    x = IfThenElse(is_denormal, (x * kScale), x);
+
+    // Compute the new exponent.
+    exp_bits = (BitCast(di, x) + (kExpMask - kMagic));
+    const VI exp_scale =
+        BitCast(di, IfThenElseZero(is_denormal, BitCast(d, kExpScale)));
+    exp = ConvertTo(
+        d, exp_scale + impl.Log2p1NoSubnormal(d, BitCast(d, exp_bits)));
+  } else {
+    // Compute the new exponent.
+    exp_bits = (BitCast(di, x) + (kExpMask - kMagic));
+    exp = ConvertTo(d, impl.Log2p1NoSubnormal(d, BitCast(d, exp_bits)));
+  }
+
+  // Renormalize.
+  const V y = Or(And(x, BitCast(d, kLowerBits)),
+                 BitCast(d, ((exp_bits & kManMask) + kMagic)));
+
+  // Approximate and reconstruct.
+  const V ym1 = (y - kOne);
+  const V z = (ym1 / (y + kOne));
+
+  return MulSub(exp, kLn2Hi,
+                (MulSub(z, (ym1 - impl.LogPoly(d, z)), (exp * kLn2Lo)) - ym1));
+}
+
+}  // namespace impl
+
+template <class D, class V>
+HWY_INLINE V Acos(const D d, V x) {
+  using LaneType = LaneType<V>;
+
+  const V kZero = Zero(d);
+  const V kHalf = Set(d, +0.5);
+  const V kOne = Set(d, +1.0);
+  const V kTwo = Set(d, +2.0);
+  const V kPi = Set(d, +3.14159265358979323846264);
+  const V kPiOverTwo = Set(d, +1.57079632679489661923132169);
+
+  const V sign_x = And(SignBit(d), x);
+  const V abs_x = Xor(x, sign_x);
+  const auto mask = (abs_x < kHalf);
+  const V yy = IfThenElse(mask, (abs_x * abs_x), ((kOne - abs_x) * kHalf));
+  const V y = IfThenElse(mask, abs_x, Sqrt(yy));
+
+  impl::AsinImpl<LaneType> impl;
+  const V t = (impl.AsinPoly(d, yy, y) * (y * yy));
+  const V z = IfThenElse(mask, (kPiOverTwo - (Xor(y, sign_x) + Xor(t, sign_x))),
+                         ((t + y) * kTwo));
+  return IfThenElse(Or(mask, (x >= kZero)), z, (kPi - z));
+}
+
+template <class D, class V>
+HWY_INLINE V Acosh(const D d, V x) {
+  const V kLarge = Set(d, 268435456.0);
+  const V kLog2 = Set(d, 0.693147180559945286227);
+  const V kOne = Set(d, +1.0);
+  const V kTwo = Set(d, +2.0);
+
+  const auto is_x_large = (x > kLarge);
+  const auto is_x_gt_2 = (x > kTwo);
+
+  const V x_minus_1 = (x - kOne);
+  const V y0 = MulSub(kTwo, x, (kOne / (Sqrt(MulSub(x, x, kOne)) + x)));
+  const V y1 =
+      (Sqrt(MulAdd(x_minus_1, kTwo, (x_minus_1 * x_minus_1))) + x_minus_1);
+  const V y2 =
+      IfThenElse(is_x_gt_2, IfThenElse(is_x_large, x, y0), (y1 + kOne));
+  const V z = impl::Log<D, V, /*kAllowSubnormals=*/false>(d, y2);
+
+  const auto is_pole = y2 == kOne;
+  const auto divisor = IfThenZeroElse(is_pole, y2) - kOne;
+  return IfThenElse(is_x_gt_2, z, IfThenElse(is_pole, y1, z * y1 / divisor)) +
+         IfThenElseZero(is_x_large, kLog2);
+}
+
+template <class D, class V>
+HWY_INLINE V Asin(const D d, V x) {
+  using LaneType = LaneType<V>;
+
+  const V kHalf = Set(d, +0.5);
+  const V kOne = Set(d, +1.0);
+  const V kTwo = Set(d, +2.0);
+  const V kPiOverTwo = Set(d, +1.57079632679489661923132169);
+
+  const V sign_x = And(SignBit(d), x);
+  const V abs_x = Xor(x, sign_x);
+  const auto mask = (abs_x < kHalf);
+  const V yy = IfThenElse(mask, (abs_x * abs_x), (kOne - abs_x) * kHalf);
+  const V y = IfThenElse(mask, abs_x, Sqrt(yy));
+
+  impl::AsinImpl<LaneType> impl;
+  const V z0 = MulAdd(impl.AsinPoly(d, yy, y), (yy * y), y);
+  const V z1 = (kPiOverTwo - (z0 * kTwo));
+  return Or(IfThenElse(mask, z0, z1), sign_x);
+}
+
+template <class D, class V>
+HWY_INLINE V Asinh(const D d, V x) {
+  const V kSmall = Set(d, 1.0 / 268435456.0);
+  const V kLarge = Set(d, 268435456.0);
+  const V kLog2 = Set(d, 0.693147180559945286227);
+  const V kOne = Set(d, +1.0);
+  const V kTwo = Set(d, +2.0);
+
+  const V sign_x = And(SignBit(d), x);  // Extract the sign bit
+  const V abs_x = Xor(x, sign_x);
+
+  const auto is_x_large = (abs_x > kLarge);
+  const auto is_x_lt_2 = (abs_x < kTwo);
+
+  const V x2 = (x * x);
+  const V sqrt_x2_plus_1 = Sqrt(x2 + kOne);
+
+  const V y0 = MulAdd(abs_x, kTwo, (kOne / (sqrt_x2_plus_1 + abs_x)));
+  const V y1 = ((x2 / (sqrt_x2_plus_1 + kOne)) + abs_x);
+  const V y2 =
+      IfThenElse(is_x_lt_2, (y1 + kOne), IfThenElse(is_x_large, abs_x, y0));
+  const V z = impl::Log<D, V, /*kAllowSubnormals=*/false>(d, y2);
+
+  const auto is_pole = y2 == kOne;
+  const auto divisor = IfThenZeroElse(is_pole, y2) - kOne;
+  const auto large = IfThenElse(is_pole, y1, z * y1 / divisor);
+  const V y = IfThenElse(abs_x < kSmall, x, large);
+  return Or((IfThenElse(is_x_lt_2, y, z) + IfThenElseZero(is_x_large, kLog2)),
+            sign_x);
+}
+
+template <class D, class V>
+HWY_INLINE V Atan(const D d, V x) {
+  using LaneType = LaneType<V>;
+
+  const V kOne = Set(d, +1.0);
+  const V kPiOverTwo = Set(d, +1.57079632679489661923132169);
+
+  const V sign = And(SignBit(d), x);
+  const V abs_x = Xor(x, sign);
+  const auto mask = (abs_x > kOne);
+
+  impl::AtanImpl<LaneType> impl;
+  const auto divisor = IfThenElse(mask, abs_x, kOne);
+  const V y = impl.AtanPoly(d, IfThenElse(mask, kOne / divisor, abs_x));
+  return Or(IfThenElse(mask, (kPiOverTwo - y), y), sign);
+}
+
+template <class D, class V>
+HWY_INLINE V Atanh(const D d, V x) {
+  const V kHalf = Set(d, +0.5);
+  const V kOne = Set(d, +1.0);
+
+  const V sign = And(SignBit(d), x);  // Extract the sign bit
+  const V abs_x = Xor(x, sign);
+  return Log1p(d, ((abs_x + abs_x) / (kOne - abs_x))) * Xor(kHalf, sign);
+}
+
+template <class D, class V>
+HWY_INLINE V Cos(const D d, V x) {
+  using LaneType = LaneType<V>;
+  impl::CosSinImpl<LaneType> impl;
+
+  // Float Constants
+  const V kOneOverPi = Set(d, 0.31830988618379067153);
+
+  // Integer Constants
+  const Rebind<int32_t, D> di32;
+  using VI32 = decltype(Zero(di32));
+  const VI32 kOne = Set(di32, 1);
+
+  const V y = Abs(x);  // cos(x) == cos(|x|)
+
+  // Compute the quadrant, q = int(|x| / pi) * 2 + 1
+  const VI32 q = (ShiftLeft<1>(impl.ToInt32(d, y * kOneOverPi)) + kOne);
+
+  // Reduce range, apply sign, and approximate.
+  return impl.Poly(
+      d, Xor(impl.CosReduce(d, y, q), impl.CosSignFromQuadrant(d, q)));
+}
+
+template <class D, class V>
+HWY_INLINE V Exp(const D d, V x) {
+  using LaneType = LaneType<V>;
+
+  // clang-format off
+  const V kHalf        = Set(d, +0.5);
+  const V kLowerBound  = Set(d, (sizeof(LaneType) == 4 ? -104.0 : -1000.0));
+  const V kNegZero     = Set(d, -0.0);
+  const V kOne         = Set(d, +1.0);
+  const V kOneOverLog2 = Set(d, +1.442695040888963407359924681);
+  // clang-format on
+
+  impl::ExpImpl<LaneType> impl;
+
+  // q = static_cast<int32>((x / log(2)) + ((x < 0) ? -0.5 : +0.5))
+  const auto q =
+      impl.ToInt32(d, MulAdd(x, kOneOverLog2, Or(kHalf, And(x, kNegZero))));
+
+  // Reduce, approximate, and then reconstruct.
+  const V y = impl.LoadExpShortRange(
+      d, (impl.ExpPoly(d, impl.ExpReduce(d, x, q)) + kOne), q);
+  return IfThenElseZero(x >= kLowerBound, y);
+}
+
+template <class D, class V>
+HWY_INLINE V Expm1(const D d, V x) {
+  using LaneType = LaneType<V>;
+
+  // clang-format off
+  const V kHalf        = Set(d, +0.5);
+  const V kLowerBound  = Set(d, (sizeof(LaneType) == 4 ? -104.0 : -1000.0));
+  const V kLn2Over2    = Set(d, +0.346573590279972654708616);
+  const V kNegOne      = Set(d, -1.0);
+  const V kNegZero     = Set(d, -0.0);
+  const V kOne         = Set(d, +1.0);
+  const V kOneOverLog2 = Set(d, +1.442695040888963407359924681);
+  // clang-format on
+
+  impl::ExpImpl<LaneType> impl;
+
+  // q = static_cast<int32>((x / log(2)) + ((x < 0) ? -0.5 : +0.5))
+  const auto q =
+      impl.ToInt32(d, MulAdd(x, kOneOverLog2, Or(kHalf, And(x, kNegZero))));
+
+  // Reduce, approximate, and then reconstruct.
+  const V y = impl.ExpPoly(d, impl.ExpReduce(d, x, q));
+  const V z = IfThenElse(Abs(x) < kLn2Over2, y,
+                         impl.LoadExpShortRange(d, (y + kOne), q) - kOne);
+  return IfThenElse(x < kLowerBound, kNegOne, z);
+}
+
+template <class D, class V>
+HWY_INLINE V Log(const D d, V x) {
+  return impl::Log<D, V, /*kAllowSubnormals=*/true>(d, x);
+}
+
+template <class D, class V>
+HWY_INLINE V Log10(const D d, V x) {
+  return Log(d, x) * Set(d, 0.4342944819032518276511);
+}
+
+template <class D, class V>
+HWY_INLINE V Log1p(const D d, V x) {
+  const V kOne = Set(d, +1.0);
+
+  const V y = x + kOne;
+  const auto is_pole = y == kOne;
+  const auto divisor = IfThenZeroElse(is_pole, y) - kOne;
+  const auto non_pole =
+      impl::Log<D, V, /*kAllowSubnormals=*/false>(d, y) * (x / divisor);
+  return IfThenElse(is_pole, x, non_pole);
+}
+
+template <class D, class V>
+HWY_INLINE V Log2(const D d, V x) {
+  return Log(d, x) * Set(d, 1.44269504088896340735992);
+}
+
+template <class D, class V>
+HWY_INLINE V Sin(const D d, V x) {
+  using LaneType = LaneType<V>;
+  impl::CosSinImpl<LaneType> impl;
+
+  // Float Constants
+  const V kOneOverPi = Set(d, 0.31830988618379067153);
+  const V kHalf = Set(d, 0.5);
+
+  // Integer Constants
+  const Rebind<int32_t, D> di32;
+  using VI32 = decltype(Zero(di32));
+
+  const V abs_x = Abs(x);
+  const V sign_x = Xor(abs_x, x);
+
+  // Compute the quadrant, q = int((|x| / pi) + 0.5)
+  const VI32 q = impl.ToInt32(d, MulAdd(abs_x, kOneOverPi, kHalf));
+
+  // Reduce range, apply sign, and approximate.
+  return impl.Poly(d, Xor(impl.SinReduce(d, abs_x, q),
+                          Xor(impl.SinSignFromQuadrant(d, q), sign_x)));
+}
+
+template <class D, class V>
+HWY_INLINE V Sinh(const D d, V x) {
+  const V kHalf = Set(d, +0.5);
+  const V kOne = Set(d, +1.0);
+  const V kTwo = Set(d, +2.0);
+
+  const V sign = And(SignBit(d), x);  // Extract the sign bit
+  const V abs_x = Xor(x, sign);
+  const V y = Expm1(d, abs_x);
+  const V z = ((y + kTwo) / (y + kOne) * (y * kHalf));
+  return Xor(z, sign);  // Reapply the sign bit
+}
+
+template <class D, class V>
+HWY_INLINE V Tanh(const D d, V x) {
+  const V kLimit = Set(d, 18.714973875);
+  const V kOne = Set(d, +1.0);
+  const V kTwo = Set(d, +2.0);
+
+  const V sign = And(SignBit(d), x);  // Extract the sign bit
+  const V abs_x = Xor(x, sign);
+  const V y = Expm1(d, abs_x * kTwo);
+  const V z = IfThenElse((abs_x > kLimit), kOne, (y / (y + kTwo)));
+  return Xor(z, sign);  // Reapply the sign bit
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/math/math_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/math/math_test.cc
new file mode 100644
index 0000000000..368ecfe062
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/contrib/math/math_test.cc
@@ -0,0 +1,189 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cfloat>  // FLT_MAX
+#include <iostream>
+#include <type_traits>
+
+// clang-format off
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/math/math_test.cc"
+#include "hwy/foreach_target.h"
+
+#include "hwy/contrib/math/math-inl.h"
+#include "hwy/tests/test_util-inl.h"
+// clang-format on
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <class T, class D>
+void TestMath(const std::string name, T (*fx1)(T), Vec<D> (*fxN)(D, Vec<D>),
+              D d, T min, T max, uint64_t max_error_ulp) {
+  constexpr bool kIsF32 = (sizeof(T) == 4);
+  using UintT = MakeUnsigned<T>;
+
+  const UintT min_bits = BitCast<UintT>(min);
+  const UintT max_bits = BitCast<UintT>(max);
+
+  // If min is negative and max is positive, the range needs to be broken into
+  // two pieces, [+0, max] and [-0, min], otherwise [min, max].
+  int range_count = 1;
+  UintT ranges[2][2] = {{min_bits, max_bits}, {0, 0}};
+  if ((min < 0.0) && (max > 0.0)) {
+    ranges[0][0] = BitCast<UintT>(static_cast<T>(+0.0));
+    ranges[0][1] = max_bits;
+    ranges[1][0] = BitCast<UintT>(static_cast<T>(-0.0));
+    ranges[1][1] = min_bits;
+    range_count = 2;
+  }
+
+  uint64_t max_ulp = 0;
+#if HWY_ARCH_ARM
+  // Emulation is slower, so cannot afford as many.
+  constexpr UintT kSamplesPerRange = 25000;
+#else
+  constexpr UintT kSamplesPerRange = 100000;
+#endif
+  for (int range_index = 0; range_index < range_count; ++range_index) {
+    const UintT start = ranges[range_index][0];
+    const UintT stop = ranges[range_index][1];
+    const UintT step = std::max<UintT>(1, ((stop - start) / kSamplesPerRange));
+    for (UintT value_bits = start; value_bits <= stop; value_bits += step) {
+      const T value = BitCast<T>(std::min(value_bits, stop));
+      const T actual = GetLane(fxN(d, Set(d, value)));
+      const T expected = fx1(value);
+
+      // Skip small inputs and outputs on armv7, it flushes subnormals to zero.
+#if HWY_TARGET == HWY_NEON && HWY_ARCH_ARM_V7
+      if ((std::abs(value) < 1e-37f) || (std::abs(expected) < 1e-37f)) {
+        continue;
+      }
+#endif
+
+      const auto ulp = ComputeUlpDelta(actual, expected);
+      max_ulp = std::max<uint64_t>(max_ulp, ulp);
+      if (ulp > max_error_ulp) {
+        std::cout << name << "<" << (kIsF32 ? "F32x" : "F64x") << Lanes(d)
+                  << ">(" << value << ") expected: " << expected
+                  << " actual: " << actual << " ulp: " << ulp
+                  << " max: " << max_error_ulp << std::endl;
+      }
+      HWY_ASSERT(ulp <= max_error_ulp);
+    }
+  }
+  std::cout << (kIsF32 ? "F32x" : "F64x") << Lanes(d)
+            << ", Max ULP: " << max_ulp << std::endl;
+}
+
+#define DEFINE_MATH_TEST(NAME, F32x1, F32xN, F32_MIN, F32_MAX, F32_ERROR, \
+                         F64x1, F64xN, F64_MIN, F64_MAX, F64_ERROR)       \
+  struct Test##NAME {                                                     \
+    template <class T, class D>                                           \
+    HWY_NOINLINE void operator()(T, D d) {                                \
+      if (sizeof(T) == 4) {                                               \
+        TestMath<T, D>(HWY_STR(NAME), F32x1, F32xN, d, F32_MIN, F32_MAX,  \
+                       F32_ERROR);                                        \
+      } else {                                                            \
+        TestMath<T, D>(HWY_STR(NAME), F64x1, F64xN, d, F64_MIN, F64_MAX,  \
+                       F64_ERROR);                                        \
+      }                                                                   \
+    }                                                                     \
+  };                                                                      \
+  HWY_NOINLINE void TestAll##NAME() {                                     \
+    ForFloatTypes(ForPartialVectors<Test##NAME>());                       \
+  }
+
+// Floating point values closest to but less than 1.0
+const float kNearOneF = BitCast<float>(0x3F7FFFFF);
+const double kNearOneD = BitCast<double>(0x3FEFFFFFFFFFFFFFULL);
+
+// clang-format off
+DEFINE_MATH_TEST(Acos,
+  std::acos,  CallAcos,  -1.0,       +1.0,        3,  // NEON is 3 instead of 2
+  std::acos,  CallAcos,  -1.0,       +1.0,        2)
+DEFINE_MATH_TEST(Acosh,
+  std::acosh, CallAcosh, +1.0,       +FLT_MAX,    3,
+  std::acosh, CallAcosh, +1.0,       +DBL_MAX,    3)
+DEFINE_MATH_TEST(Asin,
+  std::asin,  CallAsin,  -1.0,       +1.0,        4,  // ARMv7 is 4 instead of 2
+  std::asin,  CallAsin,  -1.0,       +1.0,        2)
+DEFINE_MATH_TEST(Asinh,
+  std::asinh, CallAsinh, -FLT_MAX,   +FLT_MAX,    3,
+  std::asinh, CallAsinh, -DBL_MAX,   +DBL_MAX,    3)
+DEFINE_MATH_TEST(Atan,
+  std::atan,  CallAtan,  -FLT_MAX,   +FLT_MAX,    3,
+  std::atan,  CallAtan,  -DBL_MAX,   +DBL_MAX,    3)
+DEFINE_MATH_TEST(Atanh,
+  std::atanh, CallAtanh, -kNearOneF, +kNearOneF,  4,  // NEON is 4 instead of 3
+  std::atanh, CallAtanh, -kNearOneD, +kNearOneD,  3)
+DEFINE_MATH_TEST(Cos,
+  std::cos,   CallCos,   -39000.0,   +39000.0,    3,
+  std::cos,   CallCos,   -39000.0,   +39000.0,    3)
+DEFINE_MATH_TEST(Exp,
+  std::exp,   CallExp,   -FLT_MAX,   +104.0,      1,
+  std::exp,   CallExp,   -DBL_MAX,   +104.0,      1)
+DEFINE_MATH_TEST(Expm1,
+  std::expm1, CallExpm1, -FLT_MAX,   +104.0,      4,
+  std::expm1, CallExpm1, -DBL_MAX,   +104.0,      4)
+DEFINE_MATH_TEST(Log,
+  std::log,   CallLog,   +FLT_MIN,   +FLT_MAX,    1,
+  std::log,   CallLog,   +DBL_MIN,   +DBL_MAX,    1)
+DEFINE_MATH_TEST(Log10,
+  std::log10, CallLog10, +FLT_MIN,   +FLT_MAX,    2,
+  std::log10, CallLog10, +DBL_MIN,   +DBL_MAX,    2)
+DEFINE_MATH_TEST(Log1p,
+  std::log1p, CallLog1p, +0.0f,      +1e37,       3,  // NEON is 3 instead of 2
+  std::log1p, CallLog1p, +0.0,       +DBL_MAX,    2)
+DEFINE_MATH_TEST(Log2,
+  std::log2,  CallLog2,  +FLT_MIN,   +FLT_MAX,    2,
+  std::log2,  CallLog2,  +DBL_MIN,   +DBL_MAX,    2)
+DEFINE_MATH_TEST(Sin,
+  std::sin,   CallSin,   -39000.0,   +39000.0,    3,
+  std::sin,   CallSin,   -39000.0,   +39000.0,    3)
+DEFINE_MATH_TEST(Sinh,
+  std::sinh,  CallSinh,  -80.0f,     +80.0f,      4,
+  std::sinh,  CallSinh,  -709.0,     +709.0,      4)
+DEFINE_MATH_TEST(Tanh,
+  std::tanh,  CallTanh,  -FLT_MAX,   +FLT_MAX,    4,
+  std::tanh,  CallTanh,  -DBL_MAX,   +DBL_MAX,    4)
+// clang-format on
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(HwyMathTest);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAcos);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAcosh);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAsin);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAsinh);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAtan);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAtanh);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllCos);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExp);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExpm1);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog10);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog1p);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog2);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSin);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSinh);
+HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllTanh);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/benchmark.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/benchmark.cc
new file mode 100644
index 0000000000..0debfd7db5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/benchmark.cc
@@ -0,0 +1,242 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/examples/benchmark.cc"
+#include "hwy/foreach_target.h"
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <memory>
+#include <numeric>  // iota
+
+#include "hwy/aligned_allocator.h"
+#include "hwy/highway.h"
+#include "hwy/nanobenchmark.h"
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+#if HWY_TARGET != HWY_SCALAR
+using hwy::HWY_NAMESPACE::CombineShiftRightBytes;
+#endif
+
+class TwoArray {
+ public:
+  // Must be a multiple of the vector lane count * 8.
+  static size_t NumItems() { return 3456; }
+
+  TwoArray()
+      : a_(AllocateAligned<float>(NumItems() * 2)), b_(a_.get() + NumItems()) {
+    // = 1, but compiler doesn't know
+    const float init = static_cast<float>(Unpredictable1());
+    std::iota(a_.get(), a_.get() + NumItems(), init);
+    std::iota(b_, b_ + NumItems(), init);
+  }
+
+ protected:
+  AlignedFreeUniquePtr<float[]> a_;
+  float* b_;
+};
+
+// Measures durations, verifies results, prints timings.
+template <class Benchmark>
+void RunBenchmark(const char* caption) {
+  printf("%10s: ", caption);
+  const size_t kNumInputs = 1;
+  const size_t num_items = Benchmark::NumItems() * Unpredictable1();
+  const FuncInput inputs[kNumInputs] = {num_items};
+  Result results[kNumInputs];
+
+  Benchmark benchmark;
+
+  Params p;
+  p.verbose = false;
+  p.max_evals = 7;
+  p.target_rel_mad = 0.002;
+  const size_t num_results = MeasureClosure(
+      [&benchmark](const FuncInput input) { return benchmark(input); }, inputs,
+      kNumInputs, results, p);
+  if (num_results != kNumInputs) {
+    fprintf(stderr, "MeasureClosure failed.\n");
+  }
+
+  benchmark.Verify(num_items);
+
+  for (size_t i = 0; i < num_results; ++i) {
+    const double cycles_per_item = results[i].ticks / results[i].input;
+    const double mad = results[i].variability * cycles_per_item;
+    printf("%6zu: %6.3f (+/- %5.3f)\n", results[i].input, cycles_per_item, mad);
+  }
+}
+
+void Intro() {
+  HWY_ALIGN const float in[16] = {1, 2, 3, 4, 5, 6};
+  HWY_ALIGN float out[16];
+  HWY_FULL(float) d;  // largest possible vector
+  for (size_t i = 0; i < 16; i += Lanes(d)) {
+    const auto vec = Load(d, in + i);  // aligned!
+    auto result = vec * vec;
+    result += result;  // can update if not const
+    Store(result, d, out + i);
+  }
+  printf("\nF(x)->2*x^2, F(%.0f) = %.1f\n", in[2], out[2]);
+}
+
+// BEGINNER: dot product
+// 0.4 cyc/float = bronze, 0.25 = silver, 0.15 = gold!
+class BenchmarkDot : public TwoArray {
+ public:
+  BenchmarkDot() : dot_{-1.0f} {}
+
+  FuncOutput operator()(const size_t num_items) {
+    HWY_FULL(float) d;
+    const size_t N = Lanes(d);
+    using V = decltype(Zero(d));
+    constexpr int unroll = 8;
+    // Compiler doesn't make independent sum* accumulators, so unroll manually.
+    // Some older compilers might not be able to fit the 8 arrays in registers,
+    // so manual unrolling can be helpfull if you run into this issue.
+    // 2 FMA ports * 4 cycle latency = 8x unrolled.
+    V sum[unroll];
+    for (int i = 0; i < unroll; ++i) {
+      sum[i] = Zero(d);
+    }
+    const float* const HWY_RESTRICT pa = &a_[0];
+    const float* const HWY_RESTRICT pb = b_;
+    for (size_t i = 0; i < num_items; i += unroll * N) {
+      for (int j = 0; j < unroll; ++j) {
+        const auto a = Load(d, pa + i + j * N);
+        const auto b = Load(d, pb + i + j * N);
+        sum[j] = MulAdd(a, b, sum[j]);
+      }
+    }
+    // Reduction tree: sum of all accumulators by pairs into sum[0], then the
+    // lanes.
+    for (int power = 1; power < unroll; power *= 2) {
+      for (int i = 0; i < unroll; i += 2 * power) {
+        sum[i] += sum[i + power];
+      }
+    }
+    dot_ = GetLane(SumOfLanes(sum[0]));
+    return static_cast<FuncOutput>(dot_);
+  }
+  void Verify(size_t num_items) {
+    if (dot_ == -1.0f) {
+      fprintf(stderr, "Dot: must call Verify after benchmark");
+      abort();
+    }
+
+    const float expected =
+        std::inner_product(a_.get(), a_.get() + num_items, b_, 0.0f);
+    const float rel_err = std::abs(expected - dot_) / expected;
+    if (rel_err > 1.1E-6f) {
+      fprintf(stderr, "Dot: expected %e actual %e (%e)\n", expected, dot_,
+              rel_err);
+      abort();
+    }
+  }
+
+ private:
+  float dot_;  // for Verify
+};
+
+// INTERMEDIATE: delta coding
+// 1.0 cycles/float = bronze, 0.7 = silver, 0.4 = gold!
+struct BenchmarkDelta : public TwoArray {
+  FuncOutput operator()(const size_t num_items) const {
+#if HWY_TARGET == HWY_SCALAR
+    b_[0] = a_[0];
+    for (size_t i = 1; i < num_items; ++i) {
+      b_[i] = a_[i] - a_[i - 1];
+    }
+#elif HWY_CAP_GE256
+    // Larger vectors are split into 128-bit blocks, easiest to use the
+    // unaligned load support to shift between them.
+    const HWY_FULL(float) df;
+    const size_t N = Lanes(df);
+    size_t i;
+    b_[0] = a_[0];
+    for (i = 1; i < N; ++i) {
+      b_[i] = a_[i] - a_[i - 1];
+    }
+    for (; i < num_items; i += N) {
+      const auto a = Load(df, &a_[i]);
+      const auto shifted = LoadU(df, &a_[i - 1]);
+      Store(a - shifted, df, &b_[i]);
+    }
+#else  // 128-bit
+    // Slightly better than unaligned loads
+    const HWY_CAPPED(float, 4) df;
+    const size_t N = Lanes(df);
+    size_t i;
+    b_[0] = a_[0];
+    for (i = 1; i < N; ++i) {
+      b_[i] = a_[i] - a_[i - 1];
+    }
+    auto prev = Load(df, &a_[0]);
+    for (; i < num_items; i += Lanes(df)) {
+      const auto a = Load(df, &a_[i]);
+      const auto shifted = CombineShiftRightLanes<3>(a, prev);
+      prev = a;
+      Store(a - shifted, df, &b_[i]);
+    }
+#endif
+    return static_cast<FuncOutput>(b_[num_items - 1]);
+  }
+
+  void Verify(size_t num_items) {
+    for (size_t i = 0; i < num_items; ++i) {
+      const float expected = (i == 0) ? a_[0] : a_[i] - a_[i - 1];
+      const float err = std::abs(expected - b_[i]);
+      if (err > 1E-6f) {
+        fprintf(stderr, "Delta: expected %e, actual %e\n", expected, b_[i]);
+      }
+    }
+  }
+};
+
+void RunBenchmarks() {
+  Intro();
+  printf("------------------------ %s\n", TargetName(HWY_TARGET));
+  RunBenchmark<BenchmarkDot>("dot");
+  RunBenchmark<BenchmarkDelta>("delta");
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_EXPORT(RunBenchmarks);
+
+void Run() {
+  for (uint32_t target : SupportedAndGeneratedTargets()) {
+    SetSupportedTargetsForTest(target);
+    HWY_DYNAMIC_DISPATCH(RunBenchmarks)();
+  }
+  SetSupportedTargetsForTest(0);  // Reset the mask afterwards.
+}
+
+}  // namespace hwy
+
+int main(int /*argc*/, char** /*argv*/) {
+  hwy::Run();
+  return 0;
+}
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton-inl.h
new file mode 100644
index 0000000000..d8136be4f5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton-inl.h
@@ -0,0 +1,62 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Demo of functions that might be called from multiple SIMD modules (either
+// other -inl.h files, or a .cc file between begin/end_target-inl). This is
+// optional - all SIMD code can reside in .cc files. However, this allows
+// splitting code into different files while still inlining instead of requiring
+// calling through function pointers.
+
+// Include guard (still compiled once per target)
+#if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
+#undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
+#else
+#define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
+#endif
+
+// It is fine to #include normal or *-inl headers.
+#include <stddef.h>
+
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace skeleton {
+namespace HWY_NAMESPACE {
+
+using namespace hwy::HWY_NAMESPACE;
+
+// Example of a type-agnostic (caller-specified lane type) and width-agnostic
+// (uses best available instruction set) function in a header.
+//
+// Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size.
+template <class D, typename T>
+HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array,
+                                 const T* HWY_RESTRICT add_array,
+                                 const size_t size, T* HWY_RESTRICT x_array) {
+  for (size_t i = 0; i < size; i += Lanes(d)) {
+    const auto mul = Load(d, mul_array + i);
+    const auto add = Load(d, add_array + i);
+    auto x = Load(d, x_array + i);
+    x = MulAdd(mul, x, add);
+    Store(x, d, x_array + i);
+  }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace skeleton
+HWY_AFTER_NAMESPACE();
+
+#endif  // include guard
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton.cc
new file mode 100644
index 0000000000..fc05eb371f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton.cc
@@ -0,0 +1,108 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/examples/skeleton.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+// First undef to prevent error when re-included.
+#undef HWY_TARGET_INCLUDE
+// For runtime dispatch, specify the name of the current file (unfortunately
+// __FILE__ is not reliable) so that foreach_target.h can re-include it.
+#define HWY_TARGET_INCLUDE "hwy/examples/skeleton.cc"
+// Generates code for each enabled target by re-including this source file.
+#include "hwy/foreach_target.h"
+
+#include "hwy/highway.h"
+
+// Optional, can instead add HWY_ATTR to all functions.
+HWY_BEFORE_NAMESPACE();
+namespace skeleton {
+namespace HWY_NAMESPACE {
+
+// Highway ops reside here; ADL does not find templates nor builtins.
+using namespace hwy::HWY_NAMESPACE;
+
+// Computes log2 by converting to a vector of floats. Compiled once per target.
+template <class DF>
+HWY_NOINLINE void OneFloorLog2(const DF df, const uint8_t* HWY_RESTRICT values,
+                               uint8_t* HWY_RESTRICT log2) {
+  // Type tags for converting to other element types (Rebind = same count).
+  const Rebind<int32_t, DF> d32;
+  const Rebind<uint8_t, DF> d8;
+
+  const auto u8 = Load(d8, values);
+  const auto bits = BitCast(d32, ConvertTo(df, PromoteTo(d32, u8)));
+  const auto exponent = ShiftRight<23>(bits) - Set(d32, 127);
+  Store(DemoteTo(d8, exponent), d8, log2);
+}
+
+HWY_NOINLINE void CodepathDemo() {
+  // Highway defaults to portability, but per-target codepaths may be selected
+  // via #if HWY_TARGET == HWY_SSE4 or by testing capability macros:
+#if HWY_CAP_INTEGER64
+  const char* gather = "Has int64";
+#else
+  const char* gather = "No int64";
+#endif
+  printf("Target %s: %s\n", hwy::TargetName(HWY_TARGET), gather);
+}
+
+HWY_NOINLINE void FloorLog2(const uint8_t* HWY_RESTRICT values, size_t count,
+                            uint8_t* HWY_RESTRICT log2) {
+  CodepathDemo();
+
+  // Second argument is necessary on RVV until it supports fractional lengths.
+  HWY_FULL(float, 4) df;
+
+  const size_t N = Lanes(df);
+  size_t i = 0;
+  for (; i + N <= count; i += N) {
+    OneFloorLog2(df, values + i, log2 + i);
+  }
+  // TODO(janwas): implement
+#if HWY_TARGET != HWY_RVV
+  for (; i < count; ++i) {
+    OneFloorLog2(HWY_CAPPED(float, 1)(), values + i, log2 + i);
+  }
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace skeleton
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace skeleton {
+
+// This macro declares a static array used for dynamic dispatch; it resides in
+// the same outer namespace that contains FloorLog2.
+HWY_EXPORT(FloorLog2);
+
+// This function is optional and only needed in the case of exposing it in the
+// header file. Otherwise using HWY_DYNAMIC_DISPATCH(FloorLog2) in this module
+// is equivalent to inlining this function.
+void CallFloorLog2(const uint8_t* HWY_RESTRICT in, const size_t count,
+                   uint8_t* HWY_RESTRICT out) {
+  return HWY_DYNAMIC_DISPATCH(FloorLog2)(in, count, out);
+}
+
+// Optional: anything to compile only once, e.g. non-SIMD implementations of
+// public functions provided by this module, can go inside #if HWY_ONCE.
+
+}  // namespace skeleton
+#endif  // HWY_ONCE
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton.h
new file mode 100644
index 0000000000..4935b881eb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton.h
@@ -0,0 +1,35 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Demo interface to target-specific code in skeleton.cc
+
+// Normal header with include guard and namespace.
+#ifndef HIGHWAY_HWY_EXAMPLES_SKELETON_H_
+#define HIGHWAY_HWY_EXAMPLES_SKELETON_H_
+
+#include <stddef.h>
+
+// Platform-specific definitions used for declaring an interface, independent of
+// the SIMD instruction set.
+#include "hwy/base.h"  // HWY_RESTRICT
+
+namespace skeleton {
+
+// Computes base-2 logarithm by converting to float. Supports dynamic dispatch.
+void CallFloorLog2(const uint8_t* HWY_RESTRICT in, const size_t count,
+                   uint8_t* HWY_RESTRICT out);
+
+}  // namespace skeleton
+
+#endif  // HIGHWAY_HWY_EXAMPLES_SKELETON_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton_test.cc
new file mode 100644
index 0000000000..4a6a8769b3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/examples/skeleton_test.cc
@@ -0,0 +1,107 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Example of unit test for the "skeleton" library.
+
+#include "hwy/examples/skeleton.h"
+
+#include <stdio.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "examples/skeleton_test.cc"
+#include "hwy/foreach_target.h"
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+// Optional: factor out parts of the implementation into *-inl.h
+#include "hwy/examples/skeleton-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace skeleton {
+namespace HWY_NAMESPACE {
+
+using namespace hwy::HWY_NAMESPACE;
+
+// Calls function defined in skeleton.cc.
+struct TestFloorLog2 {
+  template <class T, class DF>
+  HWY_NOINLINE void operator()(T /*unused*/, DF df) {
+    const size_t count = 5 * Lanes(df);
+    auto in = hwy::AllocateAligned<uint8_t>(count);
+    auto expected = hwy::AllocateAligned<uint8_t>(count);
+
+    hwy::RandomState rng;
+    for (size_t i = 0; i < count; ++i) {
+      expected[i] = Random32(&rng) & 7;
+      in[i] = static_cast<uint8_t>(1u << expected[i]);
+    }
+    auto out = hwy::AllocateAligned<uint8_t>(count);
+    CallFloorLog2(in.get(), count, out.get());
+    int sum = 0;
+    for (size_t i = 0; i < count; ++i) {
+      // TODO(janwas): implement
+#if HWY_TARGET != HWY_RVV
+      HWY_ASSERT_EQ(expected[i], out[i]);
+#endif
+      sum += out[i];
+    }
+    hwy::PreventElision(sum);
+  }
+};
+
+HWY_NOINLINE void TestAllFloorLog2() {
+  ForPartialVectors<TestFloorLog2>()(float());
+}
+
+// Calls function defined in skeleton-inl.h.
+struct TestSumMulAdd {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    hwy::RandomState rng;
+    const size_t count = 4096;
+    EXPECT_TRUE(count % Lanes(d) == 0);
+    auto mul = hwy::AllocateAligned<T>(count);
+    auto x = hwy::AllocateAligned<T>(count);
+    auto add = hwy::AllocateAligned<T>(count);
+    for (size_t i = 0; i < count; ++i) {
+      mul[i] = static_cast<T>(Random32(&rng) & 0xF);
+      x[i] = static_cast<T>(Random32(&rng) & 0xFF);
+      add[i] = static_cast<T>(Random32(&rng) & 0xFF);
+    }
+    double expected_sum = 0.0;
+    for (size_t i = 0; i < count; ++i) {
+      expected_sum += mul[i] * x[i] + add[i];
+    }
+
+    MulAddLoop(d, mul.get(), add.get(), count, x.get());
+    HWY_ASSERT_EQ(4344240.0, expected_sum);
+  }
+};
+
+HWY_NOINLINE void TestAllSumMulAdd() {
+  ForFloatTypes(ForPartialVectors<TestSumMulAdd>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace skeleton
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace skeleton {
+HWY_BEFORE_TEST(SkeletonTest);
+HWY_EXPORT_AND_TEST_P(SkeletonTest, TestAllFloorLog2);
+HWY_EXPORT_AND_TEST_P(SkeletonTest, TestAllSumMulAdd);
+}  // namespace skeleton
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/foreach_target.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/foreach_target.h
new file mode 100644
index 0000000000..a0c4198b17
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/foreach_target.h
@@ -0,0 +1,161 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_FOREACH_TARGET_H_
+#define HIGHWAY_HWY_FOREACH_TARGET_H_
+
+// Re-includes the translation unit zero or more times to compile for any
+// targets except HWY_STATIC_TARGET. Defines unique HWY_TARGET each time so that
+// highway.h defines the corresponding macro/namespace.
+
+#include "hwy/targets.h"
+
+// *_inl.h may include other headers, which requires include guards to prevent
+// repeated inclusion. The guards must be reset after compiling each target, so
+// the header is again visible. This is done by flipping HWY_TARGET_TOGGLE,
+// defining it if undefined and vice versa. This macro is initially undefined
+// so that IDEs don't gray out the contents of each header.
+#ifdef HWY_TARGET_TOGGLE
+#error "This macro must not be defined outside foreach_target.h"
+#endif
+
+#ifdef HWY_HIGHWAY_INCLUDED  // highway.h include guard
+// Trigger fixup at the bottom of this header.
+#define HWY_ALREADY_INCLUDED
+
+// The next highway.h must re-include set_macros-inl.h because the first
+// highway.h chose the static target instead of what we will set below.
+#undef HWY_SET_MACROS_PER_TARGET
+#endif
+
+// Disable HWY_EXPORT in user code until we have generated all targets. Note
+// that a subsequent highway.h will not override this definition.
+#undef HWY_ONCE
+#define HWY_ONCE (0 || HWY_IDE)
+
+// Avoid warnings on #include HWY_TARGET_INCLUDE by hiding them from the IDE;
+// also skip if only 1 target defined (no re-inclusion will be necessary).
+#if !HWY_IDE && (HWY_TARGETS != HWY_STATIC_TARGET)
+
+#if !defined(HWY_TARGET_INCLUDE)
+#error ">1 target enabled => define HWY_TARGET_INCLUDE before foreach_target.h"
+#endif
+
+#if (HWY_TARGETS & HWY_SCALAR) && (HWY_STATIC_TARGET != HWY_SCALAR)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SCALAR
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_NEON) && (HWY_STATIC_TARGET != HWY_NEON)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_NEON
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_SSE4) && (HWY_STATIC_TARGET != HWY_SSE4)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_SSE4
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_AVX2) && (HWY_STATIC_TARGET != HWY_AVX2)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_AVX2
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_AVX3) && (HWY_STATIC_TARGET != HWY_AVX3)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_AVX3
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_WASM) && (HWY_STATIC_TARGET != HWY_WASM)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_WASM
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#if (HWY_TARGETS & HWY_PPC8) && (HWY_STATIC_TARGET != HWY_PPC8)
+#undef HWY_TARGET
+#define HWY_TARGET HWY_PPC8
+#include HWY_TARGET_INCLUDE
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+#endif
+
+#endif  // !HWY_IDE && (HWY_TARGETS != HWY_STATIC_TARGET)
+
+// Now that all but the static target have been generated, re-enable HWY_EXPORT.
+#undef HWY_ONCE
+#define HWY_ONCE 1
+
+// If we re-include once per enabled target, the translation unit's
+// implementation would have to be skipped via #if to avoid redefining symbols.
+// We instead skip the re-include for HWY_STATIC_TARGET, and generate its
+// implementation when resuming compilation of the translation unit.
+#undef HWY_TARGET
+#define HWY_TARGET HWY_STATIC_TARGET
+
+#ifdef HWY_ALREADY_INCLUDED
+// Revert the previous toggle to prevent redefinitions for the static target.
+#ifdef HWY_TARGET_TOGGLE
+#undef HWY_TARGET_TOGGLE
+#else
+#define HWY_TARGET_TOGGLE
+#endif
+
+// Force re-inclusion of set_macros-inl.h now that HWY_TARGET is restored.
+#ifdef HWY_SET_MACROS_PER_TARGET
+#undef HWY_SET_MACROS_PER_TARGET
+#else
+#define HWY_SET_MACROS_PER_TARGET
+#endif
+#endif
+
+#endif  // HIGHWAY_HWY_FOREACH_TARGET_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/highway.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/highway.h
new file mode 100644
index 0000000000..cc1aa7c58d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/highway.h
@@ -0,0 +1,356 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This include guard is checked by foreach_target, so avoid the usual _H_
+// suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
+// after/outside this include guard.
+#ifndef HWY_HIGHWAY_INCLUDED
+#define HWY_HIGHWAY_INCLUDED
+
+// Main header required before using vector types.
+
+#include "hwy/base.h"
+#include "hwy/targets.h"
+
+namespace hwy {
+
+// API version (https://semver.org/); keep in sync with CMakeLists.txt.
+#define HWY_MAJOR 0
+#define HWY_MINOR 12
+#define HWY_PATCH 2
+
+//------------------------------------------------------------------------------
+// Shorthand for descriptors (defined in shared-inl.h) used to select overloads.
+
+// Because Highway functions take descriptor and/or vector arguments, ADL finds
+// these functions without requiring users in project::HWY_NAMESPACE to
+// qualify Highway functions with hwy::HWY_NAMESPACE. However, ADL rules for
+// templates require `using hwy::HWY_NAMESPACE::ShiftLeft;` etc. declarations.
+
+// HWY_FULL(T[,LMUL=1]) is a native vector/group. LMUL is the number of
+// registers in the group, and is ignored on targets that do not support groups.
+#define HWY_FULL1(T) hwy::HWY_NAMESPACE::Simd<T, HWY_LANES(T)>
+#define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3
+// Workaround for MSVC grouping __VA_ARGS__ into a single argument
+#define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren
+// Trailing comma avoids -pedantic false alarm
+#define HWY_CHOOSE_FULL(...) \
+  HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, ))
+#define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__)
+
+// Vector of up to MAX_N lanes. Discouraged, when possible, use Half<> instead.
+#define HWY_CAPPED(T, MAX_N) \
+  hwy::HWY_NAMESPACE::Simd<T, HWY_MIN(MAX_N, HWY_LANES(T))>
+
+//------------------------------------------------------------------------------
+// Export user functions for static/dynamic dispatch
+
+// Evaluates to 0 inside a translation unit if it is generating anything but the
+// static target (the last one if multiple targets are enabled). Used to prevent
+// redefinitions of HWY_EXPORT. Unless foreach_target.h is included, we only
+// compile once anyway, so this is 1 unless it is or has been included.
+#ifndef HWY_ONCE
+#define HWY_ONCE 1
+#endif
+
+// HWY_STATIC_DISPATCH(FUNC_NAME) is the namespace-qualified FUNC_NAME for
+// HWY_STATIC_TARGET (the only defined namespace unless HWY_TARGET_INCLUDE is
+// defined), and can be used to deduce the return type of Choose*.
+#if HWY_STATIC_TARGET == HWY_SCALAR
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_RVV
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_RVV::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_WASM
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_NEON
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SVE
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SVE2
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_PPC8
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_SSE4
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_AVX2
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME
+#elif HWY_STATIC_TARGET == HWY_AVX3
+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME
+#endif
+
+// Dynamic dispatch declarations.
+
+template <typename RetType, typename... Args>
+struct FunctionCache {
+ public:
+  typedef RetType(FunctionType)(Args...);
+
+  // A template function that when instantiated has the same signature as the
+  // function being called. This function initializes the global cache of the
+  // current supported targets mask used for dynamic dispatch and calls the
+  // appropriate function. Since this mask used for dynamic dispatch is a
+  // global cache, all the highway exported functions, even those exposed by
+  // different modules, will be initialized after this function runs for any one
+  // of those exported functions.
+  template <FunctionType* const table[]>
+  static RetType ChooseAndCall(Args... args) {
+    // If we are running here it means we need to update the chosen target.
+    chosen_target.Update();
+    return (table[chosen_target.GetIndex()])(args...);
+  }
+};
+
+// Factory function only used to infer the template parameters RetType and Args
+// from a function passed to the factory.
+template <typename RetType, typename... Args>
+FunctionCache<RetType, Args...> FunctionCacheFactory(RetType (*)(Args...)) {
+  return FunctionCache<RetType, Args...>();
+}
+
+// HWY_CHOOSE_*(FUNC_NAME) expands to the function pointer for that target or
+// nullptr is that target was not compiled.
+#if HWY_TARGETS & HWY_SCALAR
+#define HWY_CHOOSE_SCALAR(FUNC_NAME) &N_SCALAR::FUNC_NAME
+#else
+// When scalar is not present and we try to use scalar because other targets
+// were disabled at runtime we fall back to the baseline with
+// HWY_STATIC_DISPATCH()
+#define HWY_CHOOSE_SCALAR(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
+#endif
+
+#if HWY_TARGETS & HWY_WASM
+#define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
+#else
+#define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_RVV
+#define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
+#else
+#define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_NEON
+#define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
+#else
+#define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SVE
+#define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
+#else
+#define HWY_CHOOSE_SVE(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SVE2
+#define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME
+#else
+#define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_PPC8
+#define HWY_CHOOSE_PCC8(FUNC_NAME) &N_PPC8::FUNC_NAME
+#else
+#define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_SSE4
+#define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
+#else
+#define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_AVX2
+#define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
+#else
+#define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
+#endif
+
+#if HWY_TARGETS & HWY_AVX3
+#define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
+#else
+#define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
+#endif
+
+#define HWY_DISPATCH_TABLE(FUNC_NAME) \
+  HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
+
+// HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
+// HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime. This
+// static array must be defined at the same namespace level as the function
+// it is exporting.
+// After being exported, it can be called from other parts of the same source
+// file using HWY_DYNAMIC_DISTPATCH(), in particular from a function wrapper
+// like in the following example:
+//
+//   #include "hwy/highway.h"
+//   HWY_BEFORE_NAMESPACE();
+//   namespace skeleton {
+//   namespace HWY_NAMESPACE {
+//
+//   void MyFunction(int a, char b, const char* c) { ... }
+//
+//   // NOLINTNEXTLINE(google-readability-namespace-comments)
+//   }  // namespace HWY_NAMESPACE
+//   }  // namespace skeleton
+//   HWY_AFTER_NAMESPACE();
+//
+//   namespace skeleton {
+//   HWY_EXPORT(MyFunction);  // Defines the dispatch table in this scope.
+//
+//   void MyFunction(int a, char b, const char* c) {
+//     return HWY_DYNAMIC_DISPATCH(MyFunction)(a, b, c);
+//   }
+//   }  // namespace skeleton
+//
+
+#if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
+
+// Simplified version for IDE or the dynamic dispatch case with only one target.
+// This case still uses a table, although of a single element, to provide the
+// same compile error conditions as with the dynamic dispatch case when multiple
+// targets are being compiled.
+#define HWY_EXPORT(FUNC_NAME)                                       \
+  HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) \
+      const HWY_DISPATCH_TABLE(FUNC_NAME)[1] = {                    \
+          &HWY_STATIC_DISPATCH(FUNC_NAME)}
+#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
+
+#else
+
+// Dynamic dispatch case with one entry per dynamic target plus the scalar
+// mode and the initialization wrapper.
+#define HWY_EXPORT(FUNC_NAME)                                              \
+  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME))                         \
+      const HWY_DISPATCH_TABLE(FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
+          /* The first entry in the table initializes the global cache and \
+           * calls the appropriate function. */                            \
+          &decltype(hwy::FunctionCacheFactory(&HWY_STATIC_DISPATCH(        \
+              FUNC_NAME)))::ChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>,  \
+          HWY_CHOOSE_TARGET_LIST(FUNC_NAME),                               \
+          HWY_CHOOSE_SCALAR(FUNC_NAME),                                    \
+  }
+#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) \
+  (*(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::chosen_target.GetIndex()]))
+
+#endif  // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
+
+}  // namespace hwy
+
+#endif  // HWY_HIGHWAY_INCLUDED
+
+//------------------------------------------------------------------------------
+
+// NOTE: the following definitions and ops/*.h depend on HWY_TARGET, so we want
+// to include them once per target, which is ensured by the toggle check.
+// Because ops/*.h are included under it, they do not need their own guard.
+#if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
+#ifdef HWY_HIGHWAY_PER_TARGET
+#undef HWY_HIGHWAY_PER_TARGET
+#else
+#define HWY_HIGHWAY_PER_TARGET
+#endif
+
+#undef HWY_FULL2
+#if HWY_TARGET == HWY_RVV
+#define HWY_FULL2(T, LMUL) hwy::HWY_NAMESPACE::Simd<T, HWY_LANES(T) * (LMUL)>
+#else
+#define HWY_FULL2(T, LMUL) hwy::HWY_NAMESPACE::Simd<T, HWY_LANES(T)>
+#endif
+
+// These define ops inside namespace hwy::HWY_NAMESPACE.
+#if HWY_TARGET == HWY_SSE4
+#include "hwy/ops/x86_128-inl.h"
+#elif HWY_TARGET == HWY_AVX2
+#include "hwy/ops/x86_256-inl.h"
+#elif HWY_TARGET == HWY_AVX3
+#include "hwy/ops/x86_512-inl.h"
+#elif HWY_TARGET == HWY_PPC8
+#error "PPC is not yet supported"
+#elif HWY_TARGET == HWY_NEON
+#include "hwy/ops/arm_neon-inl.h"
+#elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2
+#include "hwy/ops/arm_sve-inl.h"
+#elif HWY_TARGET == HWY_WASM
+#include "hwy/ops/wasm_128-inl.h"
+#elif HWY_TARGET == HWY_RVV
+#include "hwy/ops/rvv-inl.h"
+#elif HWY_TARGET == HWY_SCALAR
+#include "hwy/ops/scalar-inl.h"
+#else
+#pragma message("HWY_TARGET does not match any known target")
+#endif  // HWY_TARGET
+
+// Commonly used functions/types that must come after ops are defined.
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// The lane type of a vector type, e.g. float for Vec<Simd<float, 4>>.
+template <class V>
+using LaneType = decltype(GetLane(V()));
+
+// Vector type, e.g. Vec128<float> for Simd<float, 4>. Useful as the return type
+// of functions that do not take a vector argument, or as an argument type if
+// the function only has a template argument for D, or for explicit type names
+// instead of auto. This may be a built-in type.
+template <class D>
+using Vec = decltype(Zero(D()));
+
+// Mask type. Useful as the return type of functions that do not take a mask
+// argument, or as an argument type if the function only has a template argument
+// for D, or for explicit type names instead of auto.
+template <class D>
+using Mask = decltype(MaskFromVec(Zero(D())));
+
+// Returns the closest value to v within [lo, hi].
+template <class V>
+HWY_API V Clamp(const V v, const V lo, const V hi) {
+  return Min(Max(lo, v), hi);
+}
+
+// CombineShiftRightBytes (and ..Lanes) are not available for the scalar target.
+// TODO(janwas): implement for RVV
+#if HWY_TARGET != HWY_SCALAR && HWY_TARGET != HWY_RVV
+
+template <size_t kLanes, class V>
+HWY_API V CombineShiftRightLanes(const V hi, const V lo) {
+  return CombineShiftRightBytes<kLanes * sizeof(LaneType<V>)>(hi, lo);
+}
+
+#endif
+
+// Returns lanes with the most significant bit set and all other bits zero.
+template <class D>
+HWY_API Vec<D> SignBit(D d) {
+  using Unsigned = MakeUnsigned<TFromD<D>>;
+  const Unsigned bit = Unsigned(1) << (sizeof(Unsigned) * 8 - 1);
+  return BitCast(d, Set(Rebind<Unsigned, D>(), bit));
+}
+
+// Returns quiet NaN.
+template <class D>
+HWY_API Vec<D> NaN(D d) {
+  const RebindToSigned<D> di;
+  // LimitsMax sets all exponent and mantissa bits to 1. The exponent plus
+  // mantissa MSB (to indicate quiet) would be sufficient.
+  return BitCast(d, Set(di, LimitsMax<TFromD<decltype(di)>>()));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // HWY_HIGHWAY_PER_TARGET
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/highway_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/highway_test.cc
new file mode 100644
index 0000000000..ebe57f0ed5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/highway_test.cc
@@ -0,0 +1,305 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "highway_test.cc"
+#include "hwy/foreach_target.h"
+#include "hwy/highway.h"
+#include "hwy/nanobenchmark.h"  // Unpredictable1
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct TestSet {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // Zero
+    const auto v0 = Zero(d);
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+    std::fill(expected.get(), expected.get() + N, T(0));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), v0);
+
+    // Set
+    const auto v2 = Set(d, T(2));
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = 2;
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), v2);
+
+    // Iota
+    const auto vi = Iota(d, T(5));
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = T(5 + i);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), vi);
+
+    // Undefined
+    const auto vu = Undefined(d);
+    Store(vu, d, expected.get());
+  }
+};
+
+HWY_NOINLINE void TestAllSet() { ForAllTypes(ForPartialVectors<TestSet>()); }
+
+// Ensures wraparound (mod 2^bits)
+struct TestOverflow {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v1 = Set(d, T(1));
+    const auto vmax = Set(d, LimitsMax<T>());
+    const auto vmin = Set(d, LimitsMin<T>());
+    // Unsigned underflow / negative -> positive
+    HWY_ASSERT_VEC_EQ(d, vmax, vmin - v1);
+    // Unsigned overflow / positive -> negative
+    HWY_ASSERT_VEC_EQ(d, vmin, vmax + v1);
+  }
+};
+
+HWY_NOINLINE void TestAllOverflow() {
+  ForIntegerTypes(ForPartialVectors<TestOverflow>());
+}
+
+struct TestSignBitInteger {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto all = VecFromMask(d, Eq(v0, v0));
+    const auto vs = SignBit(d);
+    const auto other = Sub(vs, Set(d, 1));
+
+    // Shifting left by one => overflow, equal zero
+    HWY_ASSERT_VEC_EQ(d, v0, Add(vs, vs));
+    // Verify the lower bits are zero (only +/- and logical ops are available
+    // for all types)
+    HWY_ASSERT_VEC_EQ(d, all, Add(vs, other));
+  }
+};
+
+struct TestSignBitFloat {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vs = SignBit(d);
+    const auto vp = Set(d, 2.25);
+    const auto vn = Set(d, -2.25);
+    HWY_ASSERT_VEC_EQ(d, Or(vp, vs), vn);
+    HWY_ASSERT_VEC_EQ(d, AndNot(vs, vn), vp);
+    HWY_ASSERT_VEC_EQ(d, v0, vs);
+  }
+};
+
+HWY_NOINLINE void TestAllSignBit() {
+  ForIntegerTypes(ForPartialVectors<TestSignBitInteger>());
+  ForFloatTypes(ForPartialVectors<TestSignBitFloat>());
+}
+
+// std::isnan returns false for 0x7F..FF in clang AVX3 builds, so DIY.
+template <typename TF>
+bool IsNaN(TF f) {
+  MakeUnsigned<TF> bits;
+  memcpy(&bits, &f, sizeof(TF));
+  bits += bits;
+  bits >>= 1;  // clear sign bit
+  // NaN if all exponent bits are set and the mantissa is not zero.
+  return bits > ExponentMask<decltype(bits)>();
+}
+
+template <class D, class V>
+HWY_NOINLINE void AssertNaN(const D d, const V v, const char* file, int line) {
+  using T = TFromD<D>;
+  const T lane = GetLane(v);
+  if (!IsNaN(lane)) {
+    const std::string type_name = TypeName(T(), Lanes(d));
+    MakeUnsigned<T> bits;
+    memcpy(&bits, &lane, sizeof(T));
+    // RVV lacks PRIu64, so use size_t; double will be truncated on 32-bit.
+    Abort(file, line, "Expected %s NaN, got %E (%zu)", type_name.c_str(), lane,
+          size_t(bits));
+  }
+}
+
+#define HWY_ASSERT_NAN(d, v) AssertNaN(d, v, __FILE__, __LINE__)
+
+struct TestNaN {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v1 = Set(d, T(Unpredictable1()));
+    const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1);
+    HWY_ASSERT_NAN(d, nan);
+
+    // Arithmetic
+    HWY_ASSERT_NAN(d, Add(nan, v1));
+    HWY_ASSERT_NAN(d, Add(v1, nan));
+    HWY_ASSERT_NAN(d, Sub(nan, v1));
+    HWY_ASSERT_NAN(d, Sub(v1, nan));
+    HWY_ASSERT_NAN(d, Mul(nan, v1));
+    HWY_ASSERT_NAN(d, Mul(v1, nan));
+    HWY_ASSERT_NAN(d, Div(nan, v1));
+    HWY_ASSERT_NAN(d, Div(v1, nan));
+
+    // FMA
+    HWY_ASSERT_NAN(d, MulAdd(nan, v1, v1));
+    HWY_ASSERT_NAN(d, MulAdd(v1, nan, v1));
+    HWY_ASSERT_NAN(d, MulAdd(v1, v1, nan));
+    HWY_ASSERT_NAN(d, MulSub(nan, v1, v1));
+    HWY_ASSERT_NAN(d, MulSub(v1, nan, v1));
+    HWY_ASSERT_NAN(d, MulSub(v1, v1, nan));
+    HWY_ASSERT_NAN(d, NegMulAdd(nan, v1, v1));
+    HWY_ASSERT_NAN(d, NegMulAdd(v1, nan, v1));
+    HWY_ASSERT_NAN(d, NegMulAdd(v1, v1, nan));
+    HWY_ASSERT_NAN(d, NegMulSub(nan, v1, v1));
+    HWY_ASSERT_NAN(d, NegMulSub(v1, nan, v1));
+    HWY_ASSERT_NAN(d, NegMulSub(v1, v1, nan));
+
+    // Rcp/Sqrt
+    HWY_ASSERT_NAN(d, Sqrt(nan));
+
+    // Sign manipulation
+    HWY_ASSERT_NAN(d, Abs(nan));
+    HWY_ASSERT_NAN(d, Neg(nan));
+    HWY_ASSERT_NAN(d, CopySign(nan, v1));
+    HWY_ASSERT_NAN(d, CopySignToAbs(nan, v1));
+
+    // Rounding
+    HWY_ASSERT_NAN(d, Ceil(nan));
+    HWY_ASSERT_NAN(d, Floor(nan));
+    HWY_ASSERT_NAN(d, Round(nan));
+    HWY_ASSERT_NAN(d, Trunc(nan));
+
+    // Logical (And/AndNot/Xor will clear NaN!)
+    HWY_ASSERT_NAN(d, Or(nan, v1));
+
+    // Comparison
+    HWY_ASSERT(AllFalse(Eq(nan, v1)));
+    HWY_ASSERT(AllFalse(Gt(nan, v1)));
+    HWY_ASSERT(AllFalse(Lt(nan, v1)));
+    HWY_ASSERT(AllFalse(Ge(nan, v1)));
+    HWY_ASSERT(AllFalse(Le(nan, v1)));
+
+    // Reduction
+    HWY_ASSERT_NAN(d, SumOfLanes(nan));
+// TODO(janwas): re-enable after QEMU is fixed
+#if HWY_TARGET != HWY_RVV
+    HWY_ASSERT_NAN(d, MinOfLanes(nan));
+    HWY_ASSERT_NAN(d, MaxOfLanes(nan));
+#endif
+
+    // Min
+#if HWY_ARCH_X86 && HWY_TARGET != HWY_SCALAR
+    // x86 SIMD returns the second operand if any input is NaN.
+    HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1));
+    HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1));
+    HWY_ASSERT_NAN(d, Min(v1, nan));
+    HWY_ASSERT_NAN(d, Max(v1, nan));
+#elif HWY_ARCH_WASM
+    // Should return NaN if any input is NaN, but does not for scalar.
+    // TODO(janwas): remove once this is fixed.
+#elif HWY_TARGET == HWY_NEON && HWY_ARCH_ARM_V7
+    // ARMv7 NEON returns NaN if any input is NaN.
+    HWY_ASSERT_NAN(d, Min(v1, nan));
+    HWY_ASSERT_NAN(d, Max(v1, nan));
+    HWY_ASSERT_NAN(d, Min(nan, v1));
+    HWY_ASSERT_NAN(d, Max(nan, v1));
+#else
+    // IEEE 754-2019 minimumNumber is defined as the other argument if exactly
+    // one is NaN, and qNaN if both are.
+    HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1));
+    HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1));
+    HWY_ASSERT_VEC_EQ(d, v1, Min(v1, nan));
+    HWY_ASSERT_VEC_EQ(d, v1, Max(v1, nan));
+#endif
+    HWY_ASSERT_NAN(d, Min(nan, nan));
+    HWY_ASSERT_NAN(d, Max(nan, nan));
+
+    // Comparison
+    HWY_ASSERT(AllFalse(Eq(nan, v1)));
+    HWY_ASSERT(AllFalse(Gt(nan, v1)));
+    HWY_ASSERT(AllFalse(Lt(nan, v1)));
+    HWY_ASSERT(AllFalse(Ge(nan, v1)));
+    HWY_ASSERT(AllFalse(Le(nan, v1)));
+  }
+};
+
+// For functions only available for float32
+struct TestF32NaN {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v1 = Set(d, T(Unpredictable1()));
+    const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1);
+    HWY_ASSERT_NAN(d, ApproximateReciprocal(nan));
+    HWY_ASSERT_NAN(d, ApproximateReciprocalSqrt(nan));
+    HWY_ASSERT_NAN(d, AbsDiff(nan, v1));
+    HWY_ASSERT_NAN(d, AbsDiff(v1, nan));
+  }
+};
+
+HWY_NOINLINE void TestAllNaN() {
+  ForFloatTypes(ForPartialVectors<TestNaN>());
+  ForPartialVectors<TestF32NaN>()(float());
+}
+
+struct TestCopyAndAssign {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // copy V
+    const auto v3 = Iota(d, 3);
+    auto v3b(v3);
+    HWY_ASSERT_VEC_EQ(d, v3, v3b);
+
+    // assign V
+    auto v3c = Undefined(d);
+    v3c = v3;
+    HWY_ASSERT_VEC_EQ(d, v3, v3c);
+  }
+};
+
+HWY_NOINLINE void TestAllCopyAndAssign() {
+  ForAllTypes(ForPartialVectors<TestCopyAndAssign>());
+}
+
+struct TestGetLane {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    HWY_ASSERT_EQ(T(0), GetLane(Zero(d)));
+    HWY_ASSERT_EQ(T(1), GetLane(Set(d, 1)));
+  }
+};
+
+HWY_NOINLINE void TestAllGetLane() {
+  ForAllTypes(ForPartialVectors<TestGetLane>());
+}
+
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(HighwayTest);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSet);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllOverflow);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSignBit);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllNaN);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCopyAndAssign);
+HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllGetLane);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/nanobenchmark.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/nanobenchmark.cc
new file mode 100644
index 0000000000..a31ca1b263
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/nanobenchmark.cc
@@ -0,0 +1,695 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/nanobenchmark.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>  // abort
+#include <string.h>  // memcpy
+#include <time.h>    // clock_gettime
+
+#include <algorithm>  // sort
+#include <array>
+#include <atomic>
+#include <limits>
+#include <numeric>  // iota
+#include <random>
+#include <string>
+#include <vector>
+
+#if defined(_WIN32) || defined(_WIN64)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif  // NOMINMAX
+#include <windows.h>
+#endif
+
+#if defined(__MACH__)
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#endif
+
+#if defined(__HAIKU__)
+#include <OS.h>
+#endif
+
+#include "hwy/base.h"
+#if HWY_ARCH_PPC
+#include <sys/platform/ppc.h>  // NOLINT __ppc_get_timebase_freq
+#elif HWY_ARCH_X86
+
+#if HWY_COMPILER_MSVC
+#include <intrin.h>
+#else
+#include <cpuid.h>  // NOLINT
+#endif              // HWY_COMPILER_MSVC
+
+#endif  // HWY_ARCH_X86
+
+namespace hwy {
+namespace {
+namespace timer {
+
+// Ticks := platform-specific timer values (CPU cycles on x86). Must be
+// unsigned to guarantee wraparound on overflow.
+using Ticks = uint64_t;
+
+// Start/Stop return absolute timestamps and must be placed immediately before
+// and after the region to measure. We provide separate Start/Stop functions
+// because they use different fences.
+//
+// Background: RDTSC is not 'serializing'; earlier instructions may complete
+// after it, and/or later instructions may complete before it. 'Fences' ensure
+// regions' elapsed times are independent of such reordering. The only
+// documented unprivileged serializing instruction is CPUID, which acts as a
+// full fence (no reordering across it in either direction). Unfortunately
+// the latency of CPUID varies wildly (perhaps made worse by not initializing
+// its EAX input). Because it cannot reliably be deducted from the region's
+// elapsed time, it must not be included in the region to measure (i.e.
+// between the two RDTSC).
+//
+// The newer RDTSCP is sometimes described as serializing, but it actually
+// only serves as a half-fence with release semantics. Although all
+// instructions in the region will complete before the final timestamp is
+// captured, subsequent instructions may leak into the region and increase the
+// elapsed time. Inserting another fence after the final RDTSCP would prevent
+// such reordering without affecting the measured region.
+//
+// Fortunately, such a fence exists. The LFENCE instruction is only documented
+// to delay later loads until earlier loads are visible. However, Intel's
+// reference manual says it acts as a full fence (waiting until all earlier
+// instructions have completed, and delaying later instructions until it
+// completes). AMD assigns the same behavior to MFENCE.
+//
+// We need a fence before the initial RDTSC to prevent earlier instructions
+// from leaking into the region, and arguably another after RDTSC to avoid
+// region instructions from completing before the timestamp is recorded.
+// When surrounded by fences, the additional RDTSCP half-fence provides no
+// benefit, so the initial timestamp can be recorded via RDTSC, which has
+// lower overhead than RDTSCP because it does not read TSC_AUX. In summary,
+// we define Start = LFENCE/RDTSC/LFENCE; Stop = RDTSCP/LFENCE.
+//
+// Using Start+Start leads to higher variance and overhead than Stop+Stop.
+// However, Stop+Stop includes an LFENCE in the region measurements, which
+// adds a delay dependent on earlier loads. The combination of Start+Stop
+// is faster than Start+Start and more consistent than Stop+Stop because
+// the first LFENCE already delayed subsequent loads before the measured
+// region. This combination seems not to have been considered in prior work:
+// http://akaros.cs.berkeley.edu/lxr/akaros/kern/arch/x86/rdtsc_test.c
+//
+// Note: performance counters can measure 'exact' instructions-retired or
+// (unhalted) cycle counts. The RDPMC instruction is not serializing and also
+// requires fences. Unfortunately, it is not accessible on all OSes and we
+// prefer to avoid kernel-mode drivers. Performance counters are also affected
+// by several under/over-count errata, so we use the TSC instead.
+
+// Returns a 64-bit timestamp in unit of 'ticks'; to convert to seconds,
+// divide by InvariantTicksPerSecond.
+inline Ticks Start() {
+  Ticks t;
+#if HWY_ARCH_PPC
+  asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268));
+#elif HWY_ARCH_X86 && HWY_COMPILER_MSVC
+  _ReadWriteBarrier();
+  _mm_lfence();
+  _ReadWriteBarrier();
+  t = __rdtsc();
+  _ReadWriteBarrier();
+  _mm_lfence();
+  _ReadWriteBarrier();
+#elif HWY_ARCH_X86_64
+  asm volatile(
+      "lfence\n\t"
+      "rdtsc\n\t"
+      "shl $32, %%rdx\n\t"
+      "or %%rdx, %0\n\t"
+      "lfence"
+      : "=a"(t)
+      :
+      // "memory" avoids reordering. rdx = TSC >> 32.
+      // "cc" = flags modified by SHL.
+      : "rdx", "memory", "cc");
+#elif HWY_ARCH_RVV
+  asm volatile("rdcycle %0" : "=r"(t));
+#elif defined(_WIN32) || defined(_WIN64)
+  LARGE_INTEGER counter;
+  (void)QueryPerformanceCounter(&counter);
+  t = counter.QuadPart;
+#elif defined(__MACH__)
+  t = mach_absolute_time();
+#elif defined(__HAIKU__)
+  t = system_time_nsecs();  // since boot
+#else  // POSIX
+  timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  t = ts.tv_sec * 1000000000LL + ts.tv_nsec;
+#endif
+  return t;
+}
+
+inline Ticks Stop() {
+  uint64_t t;
+#if HWY_ARCH_PPC
+  asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268));
+#elif HWY_ARCH_X86 && HWY_COMPILER_MSVC
+  _ReadWriteBarrier();
+  unsigned aux;
+  t = __rdtscp(&aux);
+  _ReadWriteBarrier();
+  _mm_lfence();
+  _ReadWriteBarrier();
+#elif HWY_ARCH_X86_64
+  // Use inline asm because __rdtscp generates code to store TSC_AUX (ecx).
+  asm volatile(
+      "rdtscp\n\t"
+      "shl $32, %%rdx\n\t"
+      "or %%rdx, %0\n\t"
+      "lfence"
+      : "=a"(t)
+      :
+      // "memory" avoids reordering. rcx = TSC_AUX. rdx = TSC >> 32.
+      // "cc" = flags modified by SHL.
+      : "rcx", "rdx", "memory", "cc");
+#else
+  t = Start();
+#endif
+  return t;
+}
+
+}  // namespace timer
+
+namespace robust_statistics {
+
+// Sorts integral values in ascending order (e.g. for Mode). About 3x faster
+// than std::sort for input distributions with very few unique values.
+template <class T>
+void CountingSort(T* values, size_t num_values) {
+  // Unique values and their frequency (similar to flat_map).
+  using Unique = std::pair<T, int>;
+  std::vector<Unique> unique;
+  for (size_t i = 0; i < num_values; ++i) {
+    const T value = values[i];
+    const auto pos =
+        std::find_if(unique.begin(), unique.end(),
+                     [value](const Unique u) { return u.first == value; });
+    if (pos == unique.end()) {
+      unique.push_back(std::make_pair(value, 1));
+    } else {
+      ++pos->second;
+    }
+  }
+
+  // Sort in ascending order of value (pair.first).
+  std::sort(unique.begin(), unique.end());
+
+  // Write that many copies of each unique value to the array.
+  T* HWY_RESTRICT p = values;
+  for (const auto& value_count : unique) {
+    std::fill(p, p + value_count.second, value_count.first);
+    p += value_count.second;
+  }
+  NANOBENCHMARK_CHECK(p == values + num_values);
+}
+
+// @return i in [idx_begin, idx_begin + half_count) that minimizes
+// sorted[i + half_count] - sorted[i].
+template <typename T>
+size_t MinRange(const T* const HWY_RESTRICT sorted, const size_t idx_begin,
+                const size_t half_count) {
+  T min_range = std::numeric_limits<T>::max();
+  size_t min_idx = 0;
+
+  for (size_t idx = idx_begin; idx < idx_begin + half_count; ++idx) {
+    NANOBENCHMARK_CHECK(sorted[idx] <= sorted[idx + half_count]);
+    const T range = sorted[idx + half_count] - sorted[idx];
+    if (range < min_range) {
+      min_range = range;
+      min_idx = idx;
+    }
+  }
+
+  return min_idx;
+}
+
+// Returns an estimate of the mode by calling MinRange on successively
+// halved intervals. "sorted" must be in ascending order. This is the
+// Half Sample Mode estimator proposed by Bickel in "On a fast, robust
+// estimator of the mode", with complexity O(N log N). The mode is less
+// affected by outliers in highly-skewed distributions than the median.
+// The averaging operation below assumes "T" is an unsigned integer type.
+template <typename T>
+T ModeOfSorted(const T* const HWY_RESTRICT sorted, const size_t num_values) {
+  size_t idx_begin = 0;
+  size_t half_count = num_values / 2;
+  while (half_count > 1) {
+    idx_begin = MinRange(sorted, idx_begin, half_count);
+    half_count >>= 1;
+  }
+
+  const T x = sorted[idx_begin + 0];
+  if (half_count == 0) {
+    return x;
+  }
+  NANOBENCHMARK_CHECK(half_count == 1);
+  const T average = (x + sorted[idx_begin + 1] + 1) / 2;
+  return average;
+}
+
+// Returns the mode. Side effect: sorts "values".
+template <typename T>
+T Mode(T* values, const size_t num_values) {
+  CountingSort(values, num_values);
+  return ModeOfSorted(values, num_values);
+}
+
+template <typename T, size_t N>
+T Mode(T (&values)[N]) {
+  return Mode(&values[0], N);
+}
+
+// Returns the median value. Side effect: sorts "values".
+template <typename T>
+T Median(T* values, const size_t num_values) {
+  NANOBENCHMARK_CHECK(!values->empty());
+  std::sort(values, values + num_values);
+  const size_t half = num_values / 2;
+  // Odd count: return middle
+  if (num_values % 2) {
+    return values[half];
+  }
+  // Even count: return average of middle two.
+  return (values[half] + values[half - 1] + 1) / 2;
+}
+
+// Returns a robust measure of variability.
+template <typename T>
+T MedianAbsoluteDeviation(const T* values, const size_t num_values,
+                          const T median) {
+  NANOBENCHMARK_CHECK(num_values != 0);
+  std::vector<T> abs_deviations;
+  abs_deviations.reserve(num_values);
+  for (size_t i = 0; i < num_values; ++i) {
+    const int64_t abs = std::abs(int64_t(values[i]) - int64_t(median));
+    abs_deviations.push_back(static_cast<T>(abs));
+  }
+  return Median(abs_deviations.data(), num_values);
+}
+
+}  // namespace robust_statistics
+}  // namespace
+namespace platform {
+namespace {
+
+// Prevents the compiler from eliding the computations that led to "output".
+template <class T>
+inline void PreventElision(T&& output) {
+#if HWY_COMPILER_MSVC == 0
+  // Works by indicating to the compiler that "output" is being read and
+  // modified. The +r constraint avoids unnecessary writes to memory, but only
+  // works for built-in types (typically FuncOutput).
+  asm volatile("" : "+r"(output) : : "memory");
+#else
+  // MSVC does not support inline assembly anymore (and never supported GCC's
+  // RTL constraints). Self-assignment with #pragma optimize("off") might be
+  // expected to prevent elision, but it does not with MSVC 2015. Type-punning
+  // with volatile pointers generates inefficient code on MSVC 2017.
+  static std::atomic<T> dummy(T{});
+  dummy.store(output, std::memory_order_relaxed);
+#endif
+}
+
+#if HWY_ARCH_X86
+
+void Cpuid(const uint32_t level, const uint32_t count,
+           uint32_t* HWY_RESTRICT abcd) {
+#if HWY_COMPILER_MSVC
+  int regs[4];
+  __cpuidex(regs, level, count);
+  for (int i = 0; i < 4; ++i) {
+    abcd[i] = regs[i];
+  }
+#else
+  uint32_t a;
+  uint32_t b;
+  uint32_t c;
+  uint32_t d;
+  __cpuid_count(level, count, a, b, c, d);
+  abcd[0] = a;
+  abcd[1] = b;
+  abcd[2] = c;
+  abcd[3] = d;
+#endif
+}
+
+std::string BrandString() {
+  char brand_string[49];
+  std::array<uint32_t, 4> abcd;
+
+  // Check if brand string is supported (it is on all reasonable Intel/AMD)
+  Cpuid(0x80000000U, 0, abcd.data());
+  if (abcd[0] < 0x80000004U) {
+    return std::string();
+  }
+
+  for (size_t i = 0; i < 3; ++i) {
+    Cpuid(static_cast<uint32_t>(0x80000002U + i), 0, abcd.data());
+    memcpy(brand_string + i * 16, abcd.data(), sizeof(abcd));
+  }
+  brand_string[48] = 0;
+  return brand_string;
+}
+
+// Returns the frequency quoted inside the brand string. This does not
+// account for throttling nor Turbo Boost.
+double NominalClockRate() {
+  const std::string& brand_string = BrandString();
+  // Brand strings include the maximum configured frequency. These prefixes are
+  // defined by Intel CPUID documentation.
+  const char* prefixes[3] = {"MHz", "GHz", "THz"};
+  const double multipliers[3] = {1E6, 1E9, 1E12};
+  for (size_t i = 0; i < 3; ++i) {
+    const size_t pos_prefix = brand_string.find(prefixes[i]);
+    if (pos_prefix != std::string::npos) {
+      const size_t pos_space = brand_string.rfind(' ', pos_prefix - 1);
+      if (pos_space != std::string::npos) {
+        const std::string digits =
+            brand_string.substr(pos_space + 1, pos_prefix - pos_space - 1);
+        return std::stod(digits) * multipliers[i];
+      }
+    }
+  }
+
+  return 0.0;
+}
+
+#endif  // HWY_ARCH_X86
+
+}  // namespace
+
+double InvariantTicksPerSecond() {
+#if HWY_ARCH_PPC
+  return __ppc_get_timebase_freq();
+#elif HWY_ARCH_X86
+  // We assume the TSC is invariant; it is on all recent Intel/AMD CPUs.
+  return NominalClockRate();
+#elif defined(_WIN32) || defined(_WIN64)
+  LARGE_INTEGER freq;
+  (void)QueryPerformanceFrequency(&freq);
+  return double(freq.QuadPart);
+#elif defined(__MACH__)
+  // https://developer.apple.com/library/mac/qa/qa1398/_index.html
+  mach_timebase_info_data_t timebase;
+  (void)mach_timebase_info(&timebase);
+  return double(timebase.denom) / timebase.numer * 1E9;
+#else
+  // TODO(janwas): ARM? Unclear how to reliably query cntvct_el0 frequency.
+  return 1E9;  // Haiku and clock_gettime return nanoseconds.
+#endif
+}
+
+double Now() {
+  static const double mul = 1.0 / InvariantTicksPerSecond();
+  return static_cast<double>(timer::Start()) * mul;
+}
+
+uint64_t TimerResolution() {
+  // Nested loop avoids exceeding stack/L1 capacity.
+  timer::Ticks repetitions[Params::kTimerSamples];
+  for (size_t rep = 0; rep < Params::kTimerSamples; ++rep) {
+    timer::Ticks samples[Params::kTimerSamples];
+    for (size_t i = 0; i < Params::kTimerSamples; ++i) {
+      const timer::Ticks t0 = timer::Start();
+      const timer::Ticks t1 = timer::Stop();
+      samples[i] = t1 - t0;
+    }
+    repetitions[rep] = robust_statistics::Mode(samples);
+  }
+  return robust_statistics::Mode(repetitions);
+}
+
+}  // namespace platform
+namespace {
+
+static const timer::Ticks timer_resolution = platform::TimerResolution();
+
+// Estimates the expected value of "lambda" values with a variable number of
+// samples until the variability "rel_mad" is less than "max_rel_mad".
+template <class Lambda>
+timer::Ticks SampleUntilStable(const double max_rel_mad, double* rel_mad,
+                               const Params& p, const Lambda& lambda) {
+  // Choose initial samples_per_eval based on a single estimated duration.
+  timer::Ticks t0 = timer::Start();
+  lambda();
+  timer::Ticks t1 = timer::Stop();
+  timer::Ticks est = t1 - t0;
+  static const double ticks_per_second = platform::InvariantTicksPerSecond();
+  const size_t ticks_per_eval =
+      static_cast<size_t>(ticks_per_second * p.seconds_per_eval);
+  size_t samples_per_eval =
+      est == 0 ? p.min_samples_per_eval : ticks_per_eval / est;
+  samples_per_eval = std::max(samples_per_eval, p.min_samples_per_eval);
+
+  std::vector<timer::Ticks> samples;
+  samples.reserve(1 + samples_per_eval);
+  samples.push_back(est);
+
+  // Percentage is too strict for tiny differences, so also allow a small
+  // absolute "median absolute deviation".
+  const timer::Ticks max_abs_mad = (timer_resolution + 99) / 100;
+  *rel_mad = 0.0;  // ensure initialized
+
+  for (size_t eval = 0; eval < p.max_evals; ++eval, samples_per_eval *= 2) {
+    samples.reserve(samples.size() + samples_per_eval);
+    for (size_t i = 0; i < samples_per_eval; ++i) {
+      t0 = timer::Start();
+      lambda();
+      t1 = timer::Stop();
+      samples.push_back(t1 - t0);
+    }
+
+    if (samples.size() >= p.min_mode_samples) {
+      est = robust_statistics::Mode(samples.data(), samples.size());
+    } else {
+      // For "few" (depends also on the variance) samples, Median is safer.
+      est = robust_statistics::Median(samples.data(), samples.size());
+    }
+    NANOBENCHMARK_CHECK(est != 0);
+
+    // Median absolute deviation (mad) is a robust measure of 'variability'.
+    const timer::Ticks abs_mad = robust_statistics::MedianAbsoluteDeviation(
+        samples.data(), samples.size(), est);
+    *rel_mad = static_cast<double>(abs_mad) / static_cast<double>(est);
+
+    if (*rel_mad <= max_rel_mad || abs_mad <= max_abs_mad) {
+      if (p.verbose) {
+        printf("%6zu samples => %5zu (abs_mad=%4zu, rel_mad=%4.2f%%)\n",
+               samples.size(), size_t(est), size_t(abs_mad), *rel_mad * 100.0);
+      }
+      return est;
+    }
+  }
+
+  if (p.verbose) {
+    printf(
+        "WARNING: rel_mad=%4.2f%% still exceeds %4.2f%% after %6zu samples.\n",
+        *rel_mad * 100.0, max_rel_mad * 100.0, samples.size());
+  }
+  return est;
+}
+
+using InputVec = std::vector<FuncInput>;
+
+// Returns vector of unique input values.
+InputVec UniqueInputs(const FuncInput* inputs, const size_t num_inputs) {
+  InputVec unique(inputs, inputs + num_inputs);
+  std::sort(unique.begin(), unique.end());
+  unique.erase(std::unique(unique.begin(), unique.end()), unique.end());
+  return unique;
+}
+
+// Returns how often we need to call func for sufficient precision.
+size_t NumSkip(const Func func, const uint8_t* arg, const InputVec& unique,
+               const Params& p) {
+  // Min elapsed ticks for any input.
+  timer::Ticks min_duration = ~timer::Ticks(0);
+
+  for (const FuncInput input : unique) {
+    double rel_mad;
+    const timer::Ticks total = SampleUntilStable(
+        p.target_rel_mad, &rel_mad, p,
+        [func, arg, input]() { platform::PreventElision(func(arg, input)); });
+    min_duration = std::min(min_duration, total - timer_resolution);
+  }
+
+  // Number of repetitions required to reach the target resolution.
+  const size_t max_skip = p.precision_divisor;
+  // Number of repetitions given the estimated duration.
+  const size_t num_skip =
+      min_duration == 0 ? 0 : (max_skip + min_duration - 1) / min_duration;
+  if (p.verbose) {
+    printf("res=%zu max_skip=%zu min_dur=%zu num_skip=%zu\n",
+           size_t(timer_resolution), max_skip, size_t(min_duration), num_skip);
+  }
+  return num_skip;
+}
+
+// Replicates inputs until we can omit "num_skip" occurrences of an input.
+InputVec ReplicateInputs(const FuncInput* inputs, const size_t num_inputs,
+                         const size_t num_unique, const size_t num_skip,
+                         const Params& p) {
+  InputVec full;
+  if (num_unique == 1) {
+    full.assign(p.subset_ratio * num_skip, inputs[0]);
+    return full;
+  }
+
+  full.reserve(p.subset_ratio * num_skip * num_inputs);
+  for (size_t i = 0; i < p.subset_ratio * num_skip; ++i) {
+    full.insert(full.end(), inputs, inputs + num_inputs);
+  }
+  std::mt19937 rng;
+  std::shuffle(full.begin(), full.end(), rng);
+  return full;
+}
+
+// Copies the "full" to "subset" in the same order, but with "num_skip"
+// randomly selected occurrences of "input_to_skip" removed.
+void FillSubset(const InputVec& full, const FuncInput input_to_skip,
+                const size_t num_skip, InputVec* subset) {
+  const size_t count =
+      static_cast<size_t>(std::count(full.begin(), full.end(), input_to_skip));
+  // Generate num_skip random indices: which occurrence to skip.
+  std::vector<uint32_t> omit(count);
+  std::iota(omit.begin(), omit.end(), 0);
+  // omit[] is the same on every call, but that's OK because they identify the
+  // Nth instance of input_to_skip, so the position within full[] differs.
+  std::mt19937 rng;
+  std::shuffle(omit.begin(), omit.end(), rng);
+  omit.resize(num_skip);
+  std::sort(omit.begin(), omit.end());
+
+  uint32_t occurrence = ~0u;  // 0 after preincrement
+  size_t idx_omit = 0;        // cursor within omit[]
+  size_t idx_subset = 0;      // cursor within *subset
+  for (const FuncInput next : full) {
+    if (next == input_to_skip) {
+      ++occurrence;
+      // Haven't removed enough already
+      if (idx_omit < num_skip) {
+        // This one is up for removal
+        if (occurrence == omit[idx_omit]) {
+          ++idx_omit;
+          continue;
+        }
+      }
+    }
+    if (idx_subset < subset->size()) {
+      (*subset)[idx_subset++] = next;
+    }
+  }
+  NANOBENCHMARK_CHECK(idx_subset == subset->size());
+  NANOBENCHMARK_CHECK(idx_omit == omit.size());
+  NANOBENCHMARK_CHECK(occurrence == count - 1);
+}
+
+// Returns total ticks elapsed for all inputs.
+timer::Ticks TotalDuration(const Func func, const uint8_t* arg,
+                           const InputVec* inputs, const Params& p,
+                           double* max_rel_mad) {
+  double rel_mad;
+  const timer::Ticks duration =
+      SampleUntilStable(p.target_rel_mad, &rel_mad, p, [func, arg, inputs]() {
+        for (const FuncInput input : *inputs) {
+          platform::PreventElision(func(arg, input));
+        }
+      });
+  *max_rel_mad = std::max(*max_rel_mad, rel_mad);
+  return duration;
+}
+
+// (Nearly) empty Func for measuring timer overhead/resolution.
+HWY_NOINLINE FuncOutput EmptyFunc(const void* /*arg*/, const FuncInput input) {
+  return input;
+}
+
+// Returns overhead of accessing inputs[] and calling a function; this will
+// be deducted from future TotalDuration return values.
+timer::Ticks Overhead(const uint8_t* arg, const InputVec* inputs,
+                      const Params& p) {
+  double rel_mad;
+  // Zero tolerance because repeatability is crucial and EmptyFunc is fast.
+  return SampleUntilStable(0.0, &rel_mad, p, [arg, inputs]() {
+    for (const FuncInput input : *inputs) {
+      platform::PreventElision(EmptyFunc(arg, input));
+    }
+  });
+}
+
+}  // namespace
+
+int Unpredictable1() { return timer::Start() != ~0ULL; }
+
+size_t Measure(const Func func, const uint8_t* arg, const FuncInput* inputs,
+               const size_t num_inputs, Result* results, const Params& p) {
+  NANOBENCHMARK_CHECK(num_inputs != 0);
+  const InputVec& unique = UniqueInputs(inputs, num_inputs);
+
+  const size_t num_skip = NumSkip(func, arg, unique, p);  // never 0
+  if (num_skip == 0) return 0;  // NumSkip already printed error message
+  // (slightly less work on x86 to cast from signed integer)
+  const float mul = 1.0f / static_cast<float>(static_cast<int>(num_skip));
+
+  const InputVec& full =
+      ReplicateInputs(inputs, num_inputs, unique.size(), num_skip, p);
+  InputVec subset(full.size() - num_skip);
+
+  const timer::Ticks overhead = Overhead(arg, &full, p);
+  const timer::Ticks overhead_skip = Overhead(arg, &subset, p);
+  if (overhead < overhead_skip) {
+    fprintf(stderr, "Measurement failed: overhead %zu < %zu\n",
+            size_t(overhead), size_t(overhead_skip));
+    return 0;
+  }
+
+  if (p.verbose) {
+    printf("#inputs=%5zu,%5zu overhead=%5zu,%5zu\n", full.size(), subset.size(),
+           size_t(overhead), size_t(overhead_skip));
+  }
+
+  double max_rel_mad = 0.0;
+  const timer::Ticks total = TotalDuration(func, arg, &full, p, &max_rel_mad);
+
+  for (size_t i = 0; i < unique.size(); ++i) {
+    FillSubset(full, unique[i], num_skip, &subset);
+    const timer::Ticks total_skip =
+        TotalDuration(func, arg, &subset, p, &max_rel_mad);
+
+    if (total < total_skip) {
+      fprintf(stderr, "Measurement failed: total %zu < %zu\n", size_t(total),
+              size_t(total_skip));
+      return 0;
+    }
+
+    const timer::Ticks duration =
+        (total - overhead) - (total_skip - overhead_skip);
+    results[i].input = unique[i];
+    results[i].ticks = static_cast<float>(duration) * mul;
+    results[i].variability = static_cast<float>(max_rel_mad);
+  }
+
+  return unique.size();
+}
+
+}  // namespace hwy
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/nanobenchmark.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/nanobenchmark.h
new file mode 100644
index 0000000000..18065f8f97
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/nanobenchmark.h
@@ -0,0 +1,191 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_NANOBENCHMARK_H_
+#define HIGHWAY_HWY_NANOBENCHMARK_H_
+
+// Benchmarks functions of a single integer argument with realistic branch
+// prediction hit rates. Uses a robust estimator to summarize the measurements.
+// The precision is about 0.2%.
+//
+// Examples: see nanobenchmark_test.cc.
+//
+// Background: Microbenchmarks such as http://github.com/google/benchmark
+// can measure elapsed times on the order of a microsecond. Shorter functions
+// are typically measured by repeating them thousands of times and dividing
+// the total elapsed time by this count. Unfortunately, repetition (especially
+// with the same input parameter!) influences the runtime. In time-critical
+// code, it is reasonable to expect warm instruction/data caches and TLBs,
+// but a perfect record of which branches will be taken is unrealistic.
+// Unless the application also repeatedly invokes the measured function with
+// the same parameter, the benchmark is measuring something very different -
+// a best-case result, almost as if the parameter were made a compile-time
+// constant. This may lead to erroneous conclusions about branch-heavy
+// algorithms outperforming branch-free alternatives.
+//
+// Our approach differs in three ways. Adding fences to the timer functions
+// reduces variability due to instruction reordering, improving the timer
+// resolution to about 40 CPU cycles. However, shorter functions must still
+// be invoked repeatedly. For more realistic branch prediction performance,
+// we vary the input parameter according to a user-specified distribution.
+// Thus, instead of VaryInputs(Measure(Repeat(func))), we change the
+// loop nesting to Measure(Repeat(VaryInputs(func))). We also estimate the
+// central tendency of the measurement samples with the "half sample mode",
+// which is more robust to outliers and skewed data than the mean or median.
+
+#include <stddef.h>
+#include <stdint.h>
+
+// Enables sanity checks that verify correct operation at the cost of
+// longer benchmark runs.
+#ifndef NANOBENCHMARK_ENABLE_CHECKS
+#define NANOBENCHMARK_ENABLE_CHECKS 0
+#endif
+
+#define NANOBENCHMARK_CHECK_ALWAYS(condition)                             \
+  while (!(condition)) {                                                  \
+    fprintf(stderr, "Nanobenchmark check failed at line %d\n", __LINE__); \
+    abort();                                                              \
+  }
+
+#if NANOBENCHMARK_ENABLE_CHECKS
+#define NANOBENCHMARK_CHECK(condition) NANOBENCHMARK_CHECK_ALWAYS(condition)
+#else
+#define NANOBENCHMARK_CHECK(condition)
+#endif
+
+namespace hwy {
+
+namespace platform {
+
+// Returns tick rate, useful for converting measurements to seconds. Invariant
+// means the tick counter frequency is independent of CPU throttling or sleep.
+// This call may be expensive, callers should cache the result.
+double InvariantTicksPerSecond();
+
+// Returns current timestamp [in seconds] relative to an unspecified origin.
+// Features: monotonic (no negative elapsed time), steady (unaffected by system
+// time changes), high-resolution (on the order of microseconds).
+double Now();
+
+// Returns ticks elapsed in back to back timer calls, i.e. a function of the
+// timer resolution (minimum measurable difference) and overhead.
+// This call is expensive, callers should cache the result.
+uint64_t TimerResolution();
+
+}  // namespace platform
+
+// Returns 1, but without the compiler knowing what the value is. This prevents
+// optimizing out code.
+int Unpredictable1();
+
+// Input influencing the function being measured (e.g. number of bytes to copy).
+using FuncInput = size_t;
+
+// "Proof of work" returned by Func to ensure the compiler does not elide it.
+using FuncOutput = uint64_t;
+
+// Function to measure: either 1) a captureless lambda or function with two
+// arguments or 2) a lambda with capture, in which case the first argument
+// is reserved for use by MeasureClosure.
+using Func = FuncOutput (*)(const void*, FuncInput);
+
+// Internal parameters that determine precision/resolution/measuring time.
+struct Params {
+  // For measuring timer overhead/resolution. Used in a nested loop =>
+  // quadratic time, acceptable because we know timer overhead is "low".
+  // constexpr because this is used to define array bounds.
+  static constexpr size_t kTimerSamples = 256;
+
+  // Best-case precision, expressed as a divisor of the timer resolution.
+  // Larger => more calls to Func and higher precision.
+  size_t precision_divisor = 1024;
+
+  // Ratio between full and subset input distribution sizes. Cannot be less
+  // than 2; larger values increase measurement time but more faithfully
+  // model the given input distribution.
+  size_t subset_ratio = 2;
+
+  // Together with the estimated Func duration, determines how many times to
+  // call Func before checking the sample variability. Larger values increase
+  // measurement time, memory/cache use and precision.
+  double seconds_per_eval = 4E-3;
+
+  // The minimum number of samples before estimating the central tendency.
+  size_t min_samples_per_eval = 7;
+
+  // The mode is better than median for estimating the central tendency of
+  // skewed/fat-tailed distributions, but it requires sufficient samples
+  // relative to the width of half-ranges.
+  size_t min_mode_samples = 64;
+
+  // Maximum permissible variability (= median absolute deviation / center).
+  double target_rel_mad = 0.002;
+
+  // Abort after this many evals without reaching target_rel_mad. This
+  // prevents infinite loops.
+  size_t max_evals = 9;
+
+  // Whether to print additional statistics to stdout.
+  bool verbose = true;
+};
+
+// Measurement result for each unique input.
+struct Result {
+  FuncInput input;
+
+  // Robust estimate (mode or median) of duration.
+  float ticks;
+
+  // Measure of variability (median absolute deviation relative to "ticks").
+  float variability;
+};
+
+// Precisely measures the number of ticks elapsed when calling "func" with the
+// given inputs, shuffled to ensure realistic branch prediction hit rates.
+//
+// "func" returns a 'proof of work' to ensure its computations are not elided.
+// "arg" is passed to Func, or reserved for internal use by MeasureClosure.
+// "inputs" is an array of "num_inputs" (not necessarily unique) arguments to
+//   "func". The values should be chosen to maximize coverage of "func". This
+//   represents a distribution, so a value's frequency should reflect its
+//   probability in the real application. Order does not matter; for example, a
+//   uniform distribution over [0, 4) could be represented as {3,0,2,1}.
+// Returns how many Result were written to "results": one per unique input, or
+//   zero if the measurement failed (an error message goes to stderr).
+size_t Measure(const Func func, const uint8_t* arg, const FuncInput* inputs,
+               const size_t num_inputs, Result* results,
+               const Params& p = Params());
+
+// Calls operator() of the given closure (lambda function).
+template <class Closure>
+static FuncOutput CallClosure(const Closure* f, const FuncInput input) {
+  return (*f)(input);
+}
+
+// Same as Measure, except "closure" is typically a lambda function of
+// FuncInput -> FuncOutput with a capture list.
+template <class Closure>
+static inline size_t MeasureClosure(const Closure& closure,
+                                    const FuncInput* inputs,
+                                    const size_t num_inputs, Result* results,
+                                    const Params& p = Params()) {
+  return Measure(reinterpret_cast<Func>(&CallClosure<Closure>),
+                 reinterpret_cast<const uint8_t*>(&closure), inputs, num_inputs,
+                 results, p);
+}
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_NANOBENCHMARK_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/nanobenchmark_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/nanobenchmark_test.cc
new file mode 100644
index 0000000000..a42caf5230
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/nanobenchmark_test.cc
@@ -0,0 +1,80 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/nanobenchmark.h"
+
+#include <stdio.h>
+
+#include <random>
+
+#include "hwy/tests/test_util-inl.h"
+
+namespace hwy {
+namespace {
+
+FuncOutput Div(const void*, FuncInput in) {
+  // Here we're measuring the throughput because benchmark invocations are
+  // independent. Any dividend will do; the divisor is nonzero.
+  return 0xFFFFF / in;
+}
+
+template <size_t N>
+void MeasureDiv(const FuncInput (&inputs)[N]) {
+  printf("Measuring integer division (output on final two lines)\n");
+  Result results[N];
+  Params params;
+  params.max_evals = 4;  // avoid test timeout
+  const size_t num_results = Measure(&Div, nullptr, inputs, N, results, params);
+  for (size_t i = 0; i < num_results; ++i) {
+    printf("%5zu: %6.2f ticks; MAD=%4.2f%%\n", results[i].input,
+           results[i].ticks, results[i].variability * 100.0);
+  }
+}
+
+std::mt19937 rng;
+
+// A function whose runtime depends on rng.
+FuncOutput Random(const void* /*arg*/, FuncInput in) {
+  const size_t r = rng() & 0xF;
+  uint32_t ret = in;
+  for (size_t i = 0; i < r; ++i) {
+    ret /= ((rng() & 1) + 2);
+  }
+  return ret;
+}
+
+// Ensure the measured variability is high.
+template <size_t N>
+void MeasureRandom(const FuncInput (&inputs)[N]) {
+  Result results[N];
+  Params p;
+  p.max_evals = 4;  // avoid test timeout
+  p.verbose = false;
+  const size_t num_results = Measure(&Random, nullptr, inputs, N, results, p);
+  for (size_t i = 0; i < num_results; ++i) {
+    NANOBENCHMARK_CHECK(results[i].variability > 1E-3);
+  }
+}
+
+TEST(NanobenchmarkTest, RunAll) {
+  const int unpredictable = Unpredictable1();  // == 1, unknown to compiler.
+  static const FuncInput inputs[] = {static_cast<FuncInput>(unpredictable) + 2,
+                                     static_cast<FuncInput>(unpredictable + 9)};
+
+  MeasureDiv(inputs);
+  MeasureRandom(inputs);
+}
+
+}  // namespace
+}  // namespace hwy
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/arm_neon-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/arm_neon-inl.h
new file mode 100644
index 0000000000..f1ed0c742d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/arm_neon-inl.h
@@ -0,0 +1,4382 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 128-bit ARM64 NEON vectors and operations.
+// External include guard in highway.h - see comment there.
+
+#include <arm_neon.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hwy/base.h"
+#include "hwy/ops/shared-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+namespace detail {  // for code folding and Raw128
+
+// Macros used to define single and double function calls for multiple types
+// for full and half vectors. These macros are undefined at the end of the file.
+
+// HWY_NEON_BUILD_TPL_* is the template<...> prefix to the function.
+#define HWY_NEON_BUILD_TPL_1
+#define HWY_NEON_BUILD_TPL_2
+#define HWY_NEON_BUILD_TPL_3
+
+// HWY_NEON_BUILD_RET_* is return type.
+#define HWY_NEON_BUILD_RET_1(type, size) Vec128<type, size>
+#define HWY_NEON_BUILD_RET_2(type, size) Vec128<type, size>
+#define HWY_NEON_BUILD_RET_3(type, size) Vec128<type, size>
+
+// HWY_NEON_BUILD_PARAM_* is the list of parameters the function receives.
+#define HWY_NEON_BUILD_PARAM_1(type, size) const Vec128<type, size> a
+#define HWY_NEON_BUILD_PARAM_2(type, size) \
+  const Vec128<type, size> a, const Vec128<type, size> b
+#define HWY_NEON_BUILD_PARAM_3(type, size)                \
+  const Vec128<type, size> a, const Vec128<type, size> b, \
+      const Vec128<type, size> c
+
+// HWY_NEON_BUILD_ARG_* is the list of arguments passed to the underlying
+// function.
+#define HWY_NEON_BUILD_ARG_1 a.raw
+#define HWY_NEON_BUILD_ARG_2 a.raw, b.raw
+#define HWY_NEON_BUILD_ARG_3 a.raw, b.raw, c.raw
+
+// We use HWY_NEON_EVAL(func, ...) to delay the evaluation of func until after
+// the __VA_ARGS__ have been expanded. This allows "func" to be a macro on
+// itself like with some of the library "functions" such as vshlq_u8. For
+// example, HWY_NEON_EVAL(vshlq_u8, MY_PARAMS) where MY_PARAMS is defined as
+// "a, b" (without the quotes) will end up expanding "vshlq_u8(a, b)" if needed.
+// Directly writing vshlq_u8(MY_PARAMS) would fail since vshlq_u8() macro
+// expects two arguments.
+#define HWY_NEON_EVAL(func, ...) func(__VA_ARGS__)
+
+// Main macro definition that defines a single function for the given type and
+// size of vector, using the underlying (prefix##infix##suffix) function and
+// the template, return type, parameters and arguments defined by the "args"
+// parameters passed here (see HWY_NEON_BUILD_* macros defined before).
+#define HWY_NEON_DEF_FUNCTION(type, size, name, prefix, infix, suffix, args) \
+  HWY_CONCAT(HWY_NEON_BUILD_TPL_, args)                                      \
+  HWY_INLINE HWY_CONCAT(HWY_NEON_BUILD_RET_, args)(type, size)               \
+      name(HWY_CONCAT(HWY_NEON_BUILD_PARAM_, args)(type, size)) {            \
+    return HWY_CONCAT(HWY_NEON_BUILD_RET_, args)(type, size)(                \
+        HWY_NEON_EVAL(prefix##infix##suffix, HWY_NEON_BUILD_ARG_##args));    \
+  }
+
+// The HWY_NEON_DEF_FUNCTION_* macros define all the variants of a function
+// called "name" using the set of neon functions starting with the given
+// "prefix" for all the variants of certain types, as specified next to each
+// macro. For example, the prefix "vsub" can be used to define the operator-
+// using args=2.
+
+// uint8_t
+#define HWY_NEON_DEF_FUNCTION_UINT_8(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION(uint8_t, 16, name, prefix##q, infix, u8, args) \
+  HWY_NEON_DEF_FUNCTION(uint8_t, 8, name, prefix, infix, u8, args)     \
+  HWY_NEON_DEF_FUNCTION(uint8_t, 4, name, prefix, infix, u8, args)     \
+  HWY_NEON_DEF_FUNCTION(uint8_t, 2, name, prefix, infix, u8, args)     \
+  HWY_NEON_DEF_FUNCTION(uint8_t, 1, name, prefix, infix, u8, args)
+
+// int8_t
+#define HWY_NEON_DEF_FUNCTION_INT_8(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION(int8_t, 16, name, prefix##q, infix, s8, args) \
+  HWY_NEON_DEF_FUNCTION(int8_t, 8, name, prefix, infix, s8, args)     \
+  HWY_NEON_DEF_FUNCTION(int8_t, 4, name, prefix, infix, s8, args)     \
+  HWY_NEON_DEF_FUNCTION(int8_t, 2, name, prefix, infix, s8, args)     \
+  HWY_NEON_DEF_FUNCTION(int8_t, 1, name, prefix, infix, s8, args)
+
+// uint16_t
+#define HWY_NEON_DEF_FUNCTION_UINT_16(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION(uint16_t, 8, name, prefix##q, infix, u16, args) \
+  HWY_NEON_DEF_FUNCTION(uint16_t, 4, name, prefix, infix, u16, args)    \
+  HWY_NEON_DEF_FUNCTION(uint16_t, 2, name, prefix, infix, u16, args)    \
+  HWY_NEON_DEF_FUNCTION(uint16_t, 1, name, prefix, infix, u16, args)
+
+// int16_t
+#define HWY_NEON_DEF_FUNCTION_INT_16(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION(int16_t, 8, name, prefix##q, infix, s16, args) \
+  HWY_NEON_DEF_FUNCTION(int16_t, 4, name, prefix, infix, s16, args)    \
+  HWY_NEON_DEF_FUNCTION(int16_t, 2, name, prefix, infix, s16, args)    \
+  HWY_NEON_DEF_FUNCTION(int16_t, 1, name, prefix, infix, s16, args)
+
+// uint32_t
+#define HWY_NEON_DEF_FUNCTION_UINT_32(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION(uint32_t, 4, name, prefix##q, infix, u32, args) \
+  HWY_NEON_DEF_FUNCTION(uint32_t, 2, name, prefix, infix, u32, args)    \
+  HWY_NEON_DEF_FUNCTION(uint32_t, 1, name, prefix, infix, u32, args)
+
+// int32_t
+#define HWY_NEON_DEF_FUNCTION_INT_32(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION(int32_t, 4, name, prefix##q, infix, s32, args) \
+  HWY_NEON_DEF_FUNCTION(int32_t, 2, name, prefix, infix, s32, args)    \
+  HWY_NEON_DEF_FUNCTION(int32_t, 1, name, prefix, infix, s32, args)
+
+// uint64_t
+#define HWY_NEON_DEF_FUNCTION_UINT_64(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION(uint64_t, 2, name, prefix##q, infix, u64, args) \
+  HWY_NEON_DEF_FUNCTION(uint64_t, 1, name, prefix, infix, u64, args)
+
+// int64_t
+#define HWY_NEON_DEF_FUNCTION_INT_64(name, prefix, infix, args)        \
+  HWY_NEON_DEF_FUNCTION(int64_t, 2, name, prefix##q, infix, s64, args) \
+  HWY_NEON_DEF_FUNCTION(int64_t, 1, name, prefix, infix, s64, args)
+
+// float and double
+#if HWY_ARCH_ARM_A64
+#define HWY_NEON_DEF_FUNCTION_ALL_FLOATS(name, prefix, infix, args)   \
+  HWY_NEON_DEF_FUNCTION(float, 4, name, prefix##q, infix, f32, args)  \
+  HWY_NEON_DEF_FUNCTION(float, 2, name, prefix, infix, f32, args)     \
+  HWY_NEON_DEF_FUNCTION(float, 1, name, prefix, infix, f32, args)     \
+  HWY_NEON_DEF_FUNCTION(double, 2, name, prefix##q, infix, f64, args) \
+  HWY_NEON_DEF_FUNCTION(double, 1, name, prefix, infix, f64, args)
+#else
+#define HWY_NEON_DEF_FUNCTION_ALL_FLOATS(name, prefix, infix, args)  \
+  HWY_NEON_DEF_FUNCTION(float, 4, name, prefix##q, infix, f32, args) \
+  HWY_NEON_DEF_FUNCTION(float, 2, name, prefix, infix, f32, args)    \
+  HWY_NEON_DEF_FUNCTION(float, 1, name, prefix, infix, f32, args)
+#endif
+
+// Helper macros to define for more than one type.
+// uint8_t, uint16_t and uint32_t
+#define HWY_NEON_DEF_FUNCTION_UINT_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_UINT_8(name, prefix, infix, args)             \
+  HWY_NEON_DEF_FUNCTION_UINT_16(name, prefix, infix, args)            \
+  HWY_NEON_DEF_FUNCTION_UINT_32(name, prefix, infix, args)
+
+// int8_t, int16_t and int32_t
+#define HWY_NEON_DEF_FUNCTION_INT_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INT_8(name, prefix, infix, args)             \
+  HWY_NEON_DEF_FUNCTION_INT_16(name, prefix, infix, args)            \
+  HWY_NEON_DEF_FUNCTION_INT_32(name, prefix, infix, args)
+
+// uint8_t, uint16_t, uint32_t and uint64_t
+#define HWY_NEON_DEF_FUNCTION_UINTS(name, prefix, infix, args)  \
+  HWY_NEON_DEF_FUNCTION_UINT_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_UINT_64(name, prefix, infix, args)
+
+// int8_t, int16_t, int32_t and int64_t
+#define HWY_NEON_DEF_FUNCTION_INTS(name, prefix, infix, args)  \
+  HWY_NEON_DEF_FUNCTION_INT_8_16_32(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INT_64(name, prefix, infix, args)
+
+// All int*_t and uint*_t up to 64
+#define HWY_NEON_DEF_FUNCTION_INTS_UINTS(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INTS(name, prefix, infix, args)             \
+  HWY_NEON_DEF_FUNCTION_UINTS(name, prefix, infix, args)
+
+// All previous types.
+#define HWY_NEON_DEF_FUNCTION_ALL_TYPES(name, prefix, infix, args) \
+  HWY_NEON_DEF_FUNCTION_INTS_UINTS(name, prefix, infix, args)      \
+  HWY_NEON_DEF_FUNCTION_ALL_FLOATS(name, prefix, infix, args)
+
+// Emulation of some intrinsics on armv7.
+#if HWY_ARCH_ARM_V7
+#define vuzp1_s8(x, y) vuzp_s8(x, y).val[0]
+#define vuzp1_u8(x, y) vuzp_u8(x, y).val[0]
+#define vuzp1_s16(x, y) vuzp_s16(x, y).val[0]
+#define vuzp1_u16(x, y) vuzp_u16(x, y).val[0]
+#define vuzp1_s32(x, y) vuzp_s32(x, y).val[0]
+#define vuzp1_u32(x, y) vuzp_u32(x, y).val[0]
+#define vuzp1_f32(x, y) vuzp_f32(x, y).val[0]
+#define vuzp1q_s8(x, y) vuzpq_s8(x, y).val[0]
+#define vuzp1q_u8(x, y) vuzpq_u8(x, y).val[0]
+#define vuzp1q_s16(x, y) vuzpq_s16(x, y).val[0]
+#define vuzp1q_u16(x, y) vuzpq_u16(x, y).val[0]
+#define vuzp1q_s32(x, y) vuzpq_s32(x, y).val[0]
+#define vuzp1q_u32(x, y) vuzpq_u32(x, y).val[0]
+#define vuzp1q_f32(x, y) vuzpq_f32(x, y).val[0]
+#define vuzp2_s8(x, y) vuzp_s8(x, y).val[1]
+#define vuzp2_u8(x, y) vuzp_u8(x, y).val[1]
+#define vuzp2_s16(x, y) vuzp_s16(x, y).val[1]
+#define vuzp2_u16(x, y) vuzp_u16(x, y).val[1]
+#define vuzp2_s32(x, y) vuzp_s32(x, y).val[1]
+#define vuzp2_u32(x, y) vuzp_u32(x, y).val[1]
+#define vuzp2_f32(x, y) vuzp_f32(x, y).val[1]
+#define vuzp2q_s8(x, y) vuzpq_s8(x, y).val[1]
+#define vuzp2q_u8(x, y) vuzpq_u8(x, y).val[1]
+#define vuzp2q_s16(x, y) vuzpq_s16(x, y).val[1]
+#define vuzp2q_u16(x, y) vuzpq_u16(x, y).val[1]
+#define vuzp2q_s32(x, y) vuzpq_s32(x, y).val[1]
+#define vuzp2q_u32(x, y) vuzpq_u32(x, y).val[1]
+#define vuzp2q_f32(x, y) vuzpq_f32(x, y).val[1]
+#define vzip1_s8(x, y) vzip_s8(x, y).val[0]
+#define vzip1_u8(x, y) vzip_u8(x, y).val[0]
+#define vzip1_s16(x, y) vzip_s16(x, y).val[0]
+#define vzip1_u16(x, y) vzip_u16(x, y).val[0]
+#define vzip1_f32(x, y) vzip_f32(x, y).val[0]
+#define vzip1_u32(x, y) vzip_u32(x, y).val[0]
+#define vzip1_s32(x, y) vzip_s32(x, y).val[0]
+#define vzip1q_s8(x, y) vzipq_s8(x, y).val[0]
+#define vzip1q_u8(x, y) vzipq_u8(x, y).val[0]
+#define vzip1q_s16(x, y) vzipq_s16(x, y).val[0]
+#define vzip1q_u16(x, y) vzipq_u16(x, y).val[0]
+#define vzip1q_s32(x, y) vzipq_s32(x, y).val[0]
+#define vzip1q_u32(x, y) vzipq_u32(x, y).val[0]
+#define vzip1q_f32(x, y) vzipq_f32(x, y).val[0]
+#define vzip2_s8(x, y) vzip_s8(x, y).val[1]
+#define vzip2_u8(x, y) vzip_u8(x, y).val[1]
+#define vzip2_s16(x, y) vzip_s16(x, y).val[1]
+#define vzip2_u16(x, y) vzip_u16(x, y).val[1]
+#define vzip2_s32(x, y) vzip_s32(x, y).val[1]
+#define vzip2_u32(x, y) vzip_u32(x, y).val[1]
+#define vzip2_f32(x, y) vzip_f32(x, y).val[1]
+#define vzip2q_s8(x, y) vzipq_s8(x, y).val[1]
+#define vzip2q_u8(x, y) vzipq_u8(x, y).val[1]
+#define vzip2q_s16(x, y) vzipq_s16(x, y).val[1]
+#define vzip2q_u16(x, y) vzipq_u16(x, y).val[1]
+#define vzip2q_s32(x, y) vzipq_s32(x, y).val[1]
+#define vzip2q_u32(x, y) vzipq_u32(x, y).val[1]
+#define vzip2q_f32(x, y) vzipq_f32(x, y).val[1]
+#endif
+
+template <typename T, size_t N>
+struct Raw128;
+
+// 128
+template <>
+struct Raw128<uint8_t, 16> {
+  using type = uint8x16_t;
+};
+
+template <>
+struct Raw128<uint16_t, 8> {
+  using type = uint16x8_t;
+};
+
+template <>
+struct Raw128<uint32_t, 4> {
+  using type = uint32x4_t;
+};
+
+template <>
+struct Raw128<uint64_t, 2> {
+  using type = uint64x2_t;
+};
+
+template <>
+struct Raw128<int8_t, 16> {
+  using type = int8x16_t;
+};
+
+template <>
+struct Raw128<int16_t, 8> {
+  using type = int16x8_t;
+};
+
+template <>
+struct Raw128<int32_t, 4> {
+  using type = int32x4_t;
+};
+
+template <>
+struct Raw128<int64_t, 2> {
+  using type = int64x2_t;
+};
+
+template <>
+struct Raw128<float16_t, 8> {
+  using type = uint16x8_t;
+};
+
+template <>
+struct Raw128<float, 4> {
+  using type = float32x4_t;
+};
+
+#if HWY_ARCH_ARM_A64
+template <>
+struct Raw128<double, 2> {
+  using type = float64x2_t;
+};
+#endif
+
+// 64
+template <>
+struct Raw128<uint8_t, 8> {
+  using type = uint8x8_t;
+};
+
+template <>
+struct Raw128<uint16_t, 4> {
+  using type = uint16x4_t;
+};
+
+template <>
+struct Raw128<uint32_t, 2> {
+  using type = uint32x2_t;
+};
+
+template <>
+struct Raw128<uint64_t, 1> {
+  using type = uint64x1_t;
+};
+
+template <>
+struct Raw128<int8_t, 8> {
+  using type = int8x8_t;
+};
+
+template <>
+struct Raw128<int16_t, 4> {
+  using type = int16x4_t;
+};
+
+template <>
+struct Raw128<int32_t, 2> {
+  using type = int32x2_t;
+};
+
+template <>
+struct Raw128<int64_t, 1> {
+  using type = int64x1_t;
+};
+
+template <>
+struct Raw128<float16_t, 4> {
+  using type = uint16x4_t;
+};
+
+template <>
+struct Raw128<float, 2> {
+  using type = float32x2_t;
+};
+
+#if HWY_ARCH_ARM_A64
+template <>
+struct Raw128<double, 1> {
+  using type = float64x1_t;
+};
+#endif
+
+// 32 (same as 64)
+template <>
+struct Raw128<uint8_t, 4> {
+  using type = uint8x8_t;
+};
+
+template <>
+struct Raw128<uint16_t, 2> {
+  using type = uint16x4_t;
+};
+
+template <>
+struct Raw128<uint32_t, 1> {
+  using type = uint32x2_t;
+};
+
+template <>
+struct Raw128<int8_t, 4> {
+  using type = int8x8_t;
+};
+
+template <>
+struct Raw128<int16_t, 2> {
+  using type = int16x4_t;
+};
+
+template <>
+struct Raw128<int32_t, 1> {
+  using type = int32x2_t;
+};
+
+template <>
+struct Raw128<float16_t, 2> {
+  using type = uint16x4_t;
+};
+
+template <>
+struct Raw128<float, 1> {
+  using type = float32x2_t;
+};
+
+// 16 (same as 64)
+template <>
+struct Raw128<uint8_t, 2> {
+  using type = uint8x8_t;
+};
+
+template <>
+struct Raw128<uint16_t, 1> {
+  using type = uint16x4_t;
+};
+
+template <>
+struct Raw128<int8_t, 2> {
+  using type = int8x8_t;
+};
+
+template <>
+struct Raw128<int16_t, 1> {
+  using type = int16x4_t;
+};
+
+template <>
+struct Raw128<float16_t, 1> {
+  using type = uint16x4_t;
+};
+
+// 8 (same as 64)
+template <>
+struct Raw128<uint8_t, 1> {
+  using type = uint8x8_t;
+};
+
+template <>
+struct Raw128<int8_t, 1> {
+  using type = int8x8_t;
+};
+
+}  // namespace detail
+
+template <typename T>
+using Full128 = Simd<T, 16 / sizeof(T)>;
+
+template <typename T, size_t N = 16 / sizeof(T)>
+class Vec128 {
+  using Raw = typename detail::Raw128<T, N>::type;
+
+ public:
+  HWY_INLINE Vec128() {}
+  Vec128(const Vec128&) = default;
+  Vec128& operator=(const Vec128&) = default;
+  HWY_INLINE explicit Vec128(const Raw raw) : raw(raw) {}
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec128& operator*=(const Vec128 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec128& operator/=(const Vec128 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec128& operator+=(const Vec128 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec128& operator-=(const Vec128 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec128& operator&=(const Vec128 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec128& operator|=(const Vec128 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec128& operator^=(const Vec128 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+// FF..FF or 0, also for floating-point - see README.
+template <typename T, size_t N = 16 / sizeof(T)>
+class Mask128 {
+  // ARM C Language Extensions return and expect unsigned type.
+  using Raw = typename detail::Raw128<MakeUnsigned<T>, N>::type;
+
+ public:
+  HWY_INLINE Mask128() {}
+  Mask128(const Mask128&) = default;
+  Mask128& operator=(const Mask128&) = default;
+  HWY_INLINE explicit Mask128(const Raw raw) : raw(raw) {}
+
+  Raw raw;
+};
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+// Converts from Vec128<T, N> to Vec128<uint8_t, N * sizeof(T)> using the
+// vreinterpret*_u8_*() set of functions.
+#define HWY_NEON_BUILD_TPL_HWY_CAST_TO_U8
+#define HWY_NEON_BUILD_RET_HWY_CAST_TO_U8(type, size) \
+  Vec128<uint8_t, size * sizeof(type)>
+#define HWY_NEON_BUILD_PARAM_HWY_CAST_TO_U8(type, size) Vec128<type, size> v
+#define HWY_NEON_BUILD_ARG_HWY_CAST_TO_U8 v.raw
+
+// Special case of u8 to u8 since vreinterpret*_u8_u8 is obviously not defined.
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N> BitCastToByte(Vec128<uint8_t, N> v) {
+  return v;
+}
+
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(BitCastToByte, vreinterpret, _u8_,
+                                 HWY_CAST_TO_U8)
+HWY_NEON_DEF_FUNCTION_INTS(BitCastToByte, vreinterpret, _u8_, HWY_CAST_TO_U8)
+HWY_NEON_DEF_FUNCTION_UINT_16(BitCastToByte, vreinterpret, _u8_, HWY_CAST_TO_U8)
+HWY_NEON_DEF_FUNCTION_UINT_32(BitCastToByte, vreinterpret, _u8_, HWY_CAST_TO_U8)
+HWY_NEON_DEF_FUNCTION_UINT_64(BitCastToByte, vreinterpret, _u8_, HWY_CAST_TO_U8)
+
+// Special case for float16_t, which has the same Raw as uint16_t.
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N * 2> BitCastToByte(Vec128<float16_t, N> v) {
+  return BitCastToByte(Vec128<uint16_t, N>(v.raw));
+}
+
+#undef HWY_NEON_BUILD_TPL_HWY_CAST_TO_U8
+#undef HWY_NEON_BUILD_RET_HWY_CAST_TO_U8
+#undef HWY_NEON_BUILD_PARAM_HWY_CAST_TO_U8
+#undef HWY_NEON_BUILD_ARG_HWY_CAST_TO_U8
+
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N> BitCastFromByte(Simd<uint8_t, N> /* tag */,
+                                              Vec128<uint8_t, N> v) {
+  return v;
+}
+
+// 64-bit or less:
+
+template <size_t N, HWY_IF_LE64(int8_t, N)>
+HWY_INLINE Vec128<int8_t, N> BitCastFromByte(Simd<int8_t, N> /* tag */,
+                                             Vec128<uint8_t, N> v) {
+  return Vec128<int8_t, N>(vreinterpret_s8_u8(v.raw));
+}
+template <size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint16_t, N> BitCastFromByte(Simd<uint16_t, N> /* tag */,
+                                               Vec128<uint8_t, N * 2> v) {
+  return Vec128<uint16_t, N>(vreinterpret_u16_u8(v.raw));
+}
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int16_t, N> BitCastFromByte(Simd<int16_t, N> /* tag */,
+                                              Vec128<uint8_t, N * 2> v) {
+  return Vec128<int16_t, N>(vreinterpret_s16_u8(v.raw));
+}
+template <size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_INLINE Vec128<uint32_t, N> BitCastFromByte(Simd<uint32_t, N> /* tag */,
+                                               Vec128<uint8_t, N * 4> v) {
+  return Vec128<uint32_t, N>(vreinterpret_u32_u8(v.raw));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int32_t, N> BitCastFromByte(Simd<int32_t, N> /* tag */,
+                                              Vec128<uint8_t, N * 4> v) {
+  return Vec128<int32_t, N>(vreinterpret_s32_u8(v.raw));
+}
+template <size_t N, HWY_IF_LE64(float, N)>
+HWY_INLINE Vec128<float, N> BitCastFromByte(Simd<float, N> /* tag */,
+                                            Vec128<uint8_t, N * 4> v) {
+  return Vec128<float, N>(vreinterpret_f32_u8(v.raw));
+}
+HWY_INLINE Vec128<uint64_t, 1> BitCastFromByte(Simd<uint64_t, 1> /* tag */,
+                                               Vec128<uint8_t, 1 * 8> v) {
+  return Vec128<uint64_t, 1>(vreinterpret_u64_u8(v.raw));
+}
+HWY_INLINE Vec128<int64_t, 1> BitCastFromByte(Simd<int64_t, 1> /* tag */,
+                                              Vec128<uint8_t, 1 * 8> v) {
+  return Vec128<int64_t, 1>(vreinterpret_s64_u8(v.raw));
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double, 1> BitCastFromByte(Simd<double, 1> /* tag */,
+                                             Vec128<uint8_t, 1 * 8> v) {
+  return Vec128<double, 1>(vreinterpret_f64_u8(v.raw));
+}
+#endif
+
+// 128-bit full:
+
+HWY_INLINE Vec128<int8_t> BitCastFromByte(Full128<int8_t> /* tag */,
+                                          Vec128<uint8_t> v) {
+  return Vec128<int8_t>(vreinterpretq_s8_u8(v.raw));
+}
+HWY_INLINE Vec128<uint16_t> BitCastFromByte(Full128<uint16_t> /* tag */,
+                                            Vec128<uint8_t> v) {
+  return Vec128<uint16_t>(vreinterpretq_u16_u8(v.raw));
+}
+HWY_INLINE Vec128<int16_t> BitCastFromByte(Full128<int16_t> /* tag */,
+                                           Vec128<uint8_t> v) {
+  return Vec128<int16_t>(vreinterpretq_s16_u8(v.raw));
+}
+HWY_INLINE Vec128<uint32_t> BitCastFromByte(Full128<uint32_t> /* tag */,
+                                            Vec128<uint8_t> v) {
+  return Vec128<uint32_t>(vreinterpretq_u32_u8(v.raw));
+}
+HWY_INLINE Vec128<int32_t> BitCastFromByte(Full128<int32_t> /* tag */,
+                                           Vec128<uint8_t> v) {
+  return Vec128<int32_t>(vreinterpretq_s32_u8(v.raw));
+}
+HWY_INLINE Vec128<float> BitCastFromByte(Full128<float> /* tag */,
+                                         Vec128<uint8_t> v) {
+  return Vec128<float>(vreinterpretq_f32_u8(v.raw));
+}
+HWY_INLINE Vec128<uint64_t> BitCastFromByte(Full128<uint64_t> /* tag */,
+                                            Vec128<uint8_t> v) {
+  return Vec128<uint64_t>(vreinterpretq_u64_u8(v.raw));
+}
+HWY_INLINE Vec128<int64_t> BitCastFromByte(Full128<int64_t> /* tag */,
+                                           Vec128<uint8_t> v) {
+  return Vec128<int64_t>(vreinterpretq_s64_u8(v.raw));
+}
+
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double> BitCastFromByte(Full128<double> /* tag */,
+                                          Vec128<uint8_t> v) {
+  return Vec128<double>(vreinterpretq_f64_u8(v.raw));
+}
+#endif
+
+// Special case for float16_t, which has the same Raw as uint16_t.
+template <size_t N>
+HWY_INLINE Vec128<float16_t, N> BitCastFromByte(Simd<float16_t, N> /* tag */,
+                                                Vec128<uint8_t, N * 2> v) {
+  return Vec128<float16_t, N>(BitCastFromByte(Simd<uint16_t, N>(), v).raw);
+}
+
+}  // namespace detail
+
+template <typename T, size_t N, typename FromT>
+HWY_INLINE Vec128<T, N> BitCast(
+    Simd<T, N> d, Vec128<FromT, N * sizeof(T) / sizeof(FromT)> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ Set
+
+// Returns a vector with all lanes set to "t".
+#define HWY_NEON_BUILD_TPL_HWY_SET1
+#define HWY_NEON_BUILD_RET_HWY_SET1(type, size) Vec128<type, size>
+#define HWY_NEON_BUILD_PARAM_HWY_SET1(type, size) \
+  Simd<type, size> /* tag */, const type t
+#define HWY_NEON_BUILD_ARG_HWY_SET1 t
+
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(Set, vdup, _n_, HWY_SET1)
+
+#undef HWY_NEON_BUILD_TPL_HWY_SET1
+#undef HWY_NEON_BUILD_RET_HWY_SET1
+#undef HWY_NEON_BUILD_PARAM_HWY_SET1
+#undef HWY_NEON_BUILD_ARG_HWY_SET1
+
+// Returns an all-zero vector.
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Zero(Simd<T, N> d) {
+  return Set(d, 0);
+}
+
+// Returns a vector with uninitialized elements.
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Undefined(Simd<T, N> /*d*/) {
+  HWY_DIAGNOSTICS(push)
+  HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wuninitialized")
+  typename detail::Raw128<T, N>::type a;
+  return Vec128<T, N>(a);
+  HWY_DIAGNOSTICS(pop)
+}
+
+// Returns a vector with lane i=[0, N) set to "first" + i.
+template <typename T, size_t N, typename T2>
+Vec128<T, N> Iota(const Simd<T, N> d, const T2 first) {
+  HWY_ALIGN T lanes[16 / sizeof(T)];
+  for (size_t i = 0; i < 16 / sizeof(T); ++i) {
+    lanes[i] = static_cast<T>(first + static_cast<T2>(i));
+  }
+  return Load(d, lanes);
+}
+
+// ------------------------------ GetLane
+
+HWY_INLINE uint8_t GetLane(const Vec128<uint8_t, 16> v) {
+  return vgetq_lane_u8(v.raw, 0);
+}
+template <size_t N>
+HWY_INLINE uint8_t GetLane(const Vec128<uint8_t, N> v) {
+  return vget_lane_u8(v.raw, 0);
+}
+
+HWY_INLINE int8_t GetLane(const Vec128<int8_t, 16> v) {
+  return vgetq_lane_s8(v.raw, 0);
+}
+template <size_t N>
+HWY_INLINE int8_t GetLane(const Vec128<int8_t, N> v) {
+  return vget_lane_s8(v.raw, 0);
+}
+
+HWY_INLINE uint16_t GetLane(const Vec128<uint16_t, 8> v) {
+  return vgetq_lane_u16(v.raw, 0);
+}
+template <size_t N>
+HWY_INLINE uint16_t GetLane(const Vec128<uint16_t, N> v) {
+  return vget_lane_u16(v.raw, 0);
+}
+
+HWY_INLINE int16_t GetLane(const Vec128<int16_t, 8> v) {
+  return vgetq_lane_s16(v.raw, 0);
+}
+template <size_t N>
+HWY_INLINE int16_t GetLane(const Vec128<int16_t, N> v) {
+  return vget_lane_s16(v.raw, 0);
+}
+
+HWY_INLINE uint32_t GetLane(const Vec128<uint32_t, 4> v) {
+  return vgetq_lane_u32(v.raw, 0);
+}
+template <size_t N>
+HWY_INLINE uint32_t GetLane(const Vec128<uint32_t, N> v) {
+  return vget_lane_u32(v.raw, 0);
+}
+
+HWY_INLINE int32_t GetLane(const Vec128<int32_t, 4> v) {
+  return vgetq_lane_s32(v.raw, 0);
+}
+template <size_t N>
+HWY_INLINE int32_t GetLane(const Vec128<int32_t, N> v) {
+  return vget_lane_s32(v.raw, 0);
+}
+
+HWY_INLINE uint64_t GetLane(const Vec128<uint64_t, 2> v) {
+  return vgetq_lane_u64(v.raw, 0);
+}
+HWY_INLINE uint64_t GetLane(const Vec128<uint64_t, 1> v) {
+  return vget_lane_u64(v.raw, 0);
+}
+HWY_INLINE int64_t GetLane(const Vec128<int64_t, 2> v) {
+  return vgetq_lane_s64(v.raw, 0);
+}
+HWY_INLINE int64_t GetLane(const Vec128<int64_t, 1> v) {
+  return vget_lane_s64(v.raw, 0);
+}
+
+HWY_INLINE float GetLane(const Vec128<float, 4> v) {
+  return vgetq_lane_f32(v.raw, 0);
+}
+HWY_INLINE float GetLane(const Vec128<float, 2> v) {
+  return vget_lane_f32(v.raw, 0);
+}
+HWY_INLINE float GetLane(const Vec128<float, 1> v) {
+  return vget_lane_f32(v.raw, 0);
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE double GetLane(const Vec128<double, 2> v) {
+  return vgetq_lane_f64(v.raw, 0);
+}
+HWY_INLINE double GetLane(const Vec128<double, 1> v) {
+  return vget_lane_f64(v.raw, 0);
+}
+#endif
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(operator+, vadd, _, 2)
+
+// ------------------------------ Subtraction
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(operator-, vsub, _, 2)
+
+// ------------------------------ Saturating addition and subtraction
+// Only defined for uint8_t, uint16_t and their signed versions, as in other
+// architectures.
+
+// Returns a + b clamped to the destination range.
+HWY_NEON_DEF_FUNCTION_INT_8(SaturatedAdd, vqadd, _, 2)
+HWY_NEON_DEF_FUNCTION_INT_16(SaturatedAdd, vqadd, _, 2)
+HWY_NEON_DEF_FUNCTION_UINT_8(SaturatedAdd, vqadd, _, 2)
+HWY_NEON_DEF_FUNCTION_UINT_16(SaturatedAdd, vqadd, _, 2)
+
+// Returns a - b clamped to the destination range.
+HWY_NEON_DEF_FUNCTION_INT_8(SaturatedSub, vqsub, _, 2)
+HWY_NEON_DEF_FUNCTION_INT_16(SaturatedSub, vqsub, _, 2)
+HWY_NEON_DEF_FUNCTION_UINT_8(SaturatedSub, vqsub, _, 2)
+HWY_NEON_DEF_FUNCTION_UINT_16(SaturatedSub, vqsub, _, 2)
+
+// Not part of API, used in implementation.
+namespace detail {
+HWY_NEON_DEF_FUNCTION_UINT_32(SaturatedSub, vqsub, _, 2)
+HWY_NEON_DEF_FUNCTION_UINT_64(SaturatedSub, vqsub, _, 2)
+HWY_NEON_DEF_FUNCTION_INT_32(SaturatedSub, vqsub, _, 2)
+HWY_NEON_DEF_FUNCTION_INT_64(SaturatedSub, vqsub, _, 2)
+}  // namespace detail
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+HWY_NEON_DEF_FUNCTION_UINT_8(AverageRound, vrhadd, _, 2)
+HWY_NEON_DEF_FUNCTION_UINT_16(AverageRound, vrhadd, _, 2)
+
+// ------------------------------ Absolute value
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+HWY_INLINE Vec128<int8_t> Abs(const Vec128<int8_t> v) {
+  return Vec128<int8_t>(vabsq_s8(v.raw));
+}
+HWY_INLINE Vec128<int16_t> Abs(const Vec128<int16_t> v) {
+  return Vec128<int16_t>(vabsq_s16(v.raw));
+}
+HWY_INLINE Vec128<int32_t> Abs(const Vec128<int32_t> v) {
+  return Vec128<int32_t>(vabsq_s32(v.raw));
+}
+// i64 is implemented after BroadcastSignBit.
+HWY_INLINE Vec128<float> Abs(const Vec128<float> v) {
+  return Vec128<float>(vabsq_f32(v.raw));
+}
+
+template <size_t N, HWY_IF_LE64(int8_t, N)>
+HWY_INLINE Vec128<int8_t, N> Abs(const Vec128<int8_t, N> v) {
+  return Vec128<int8_t, N>(vabs_s8(v.raw));
+}
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int16_t, N> Abs(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>(vabs_s16(v.raw));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int32_t, N> Abs(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>(vabs_s32(v.raw));
+}
+template <size_t N, HWY_IF_LE64(float, N)>
+HWY_INLINE Vec128<float, N> Abs(const Vec128<float, N> v) {
+  return Vec128<float, N>(vabs_f32(v.raw));
+}
+
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double> Abs(const Vec128<double> v) {
+  return Vec128<double>(vabsq_f64(v.raw));
+}
+
+HWY_INLINE Vec128<double, 1> Abs(const Vec128<double, 1> v) {
+  return Vec128<double, 1>(vabs_f64(v.raw));
+}
+#endif
+
+// ------------------------------ Neg
+
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Neg, vneg, _, 1)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(Neg, vneg, _, 1)  // i64 implemented below
+
+HWY_INLINE Vec128<int64_t, 1> Neg(const Vec128<int64_t, 1> v) {
+#if HWY_ARCH_ARM_A64
+  return Vec128<int64_t, 1>(vneg_s64(v.raw));
+#else
+  return Zero(Simd<int64_t, 1>()) - v;
+#endif
+}
+
+HWY_INLINE Vec128<int64_t> Neg(const Vec128<int64_t> v) {
+#if HWY_ARCH_ARM_A64
+  return Vec128<int64_t>(vnegq_s64(v.raw));
+#else
+  return Zero(Full128<int64_t>()) - v;
+#endif
+}
+
+// ------------------------------ ShiftLeft
+
+// Customize HWY_NEON_DEF_FUNCTION to special-case count=0 (not supported).
+#pragma push_macro("HWY_NEON_DEF_FUNCTION")
+#undef HWY_NEON_DEF_FUNCTION
+#define HWY_NEON_DEF_FUNCTION(type, size, name, prefix, infix, suffix, args)   \
+  template <int kBits>                                                         \
+  HWY_INLINE Vec128<type, size> name(const Vec128<type, size> v) {             \
+    return kBits == 0 ? v                                                      \
+                      : Vec128<type, size>(HWY_NEON_EVAL(                      \
+                            prefix##infix##suffix, v.raw, HWY_MAX(1, kBits))); \
+  }
+
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(ShiftLeft, vshl, _n_, HWY_SHIFT)
+
+HWY_NEON_DEF_FUNCTION_UINTS(ShiftRight, vshr, _n_, HWY_SHIFT)
+HWY_NEON_DEF_FUNCTION_INTS(ShiftRight, vshr, _n_, HWY_SHIFT)
+
+#pragma pop_macro("HWY_NEON_DEF_FUNCTION")
+
+// ------------------------------ Shl
+
+HWY_INLINE Vec128<uint8_t> operator<<(const Vec128<uint8_t> v,
+                                      const Vec128<uint8_t> bits) {
+  return Vec128<uint8_t>(vshlq_u8(v.raw, vreinterpretq_s8_u8(bits.raw)));
+}
+template <size_t N, HWY_IF_LE64(uint8_t, N)>
+HWY_INLINE Vec128<uint8_t, N> operator<<(const Vec128<uint8_t, N> v,
+                                         const Vec128<uint8_t, N> bits) {
+  return Vec128<uint8_t, N>(vshl_u8(v.raw, vreinterpret_s8_u8(bits.raw)));
+}
+
+HWY_INLINE Vec128<uint16_t> operator<<(const Vec128<uint16_t> v,
+                                       const Vec128<uint16_t> bits) {
+  return Vec128<uint16_t>(vshlq_u16(v.raw, vreinterpretq_s16_u16(bits.raw)));
+}
+template <size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint16_t, N> operator<<(const Vec128<uint16_t, N> v,
+                                          const Vec128<uint16_t, N> bits) {
+  return Vec128<uint16_t, N>(vshl_u16(v.raw, vreinterpret_s16_u16(bits.raw)));
+}
+
+HWY_INLINE Vec128<uint32_t> operator<<(const Vec128<uint32_t> v,
+                                       const Vec128<uint32_t> bits) {
+  return Vec128<uint32_t>(vshlq_u32(v.raw, vreinterpretq_s32_u32(bits.raw)));
+}
+template <size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_INLINE Vec128<uint32_t, N> operator<<(const Vec128<uint32_t, N> v,
+                                          const Vec128<uint32_t, N> bits) {
+  return Vec128<uint32_t, N>(vshl_u32(v.raw, vreinterpret_s32_u32(bits.raw)));
+}
+
+HWY_INLINE Vec128<uint64_t> operator<<(const Vec128<uint64_t> v,
+                                       const Vec128<uint64_t> bits) {
+  return Vec128<uint64_t>(vshlq_u64(v.raw, vreinterpretq_s64_u64(bits.raw)));
+}
+HWY_INLINE Vec128<uint64_t, 1> operator<<(const Vec128<uint64_t, 1> v,
+                                          const Vec128<uint64_t, 1> bits) {
+  return Vec128<uint64_t, 1>(vshl_u64(v.raw, vreinterpret_s64_u64(bits.raw)));
+}
+
+HWY_INLINE Vec128<int8_t> operator<<(const Vec128<int8_t> v,
+                                     const Vec128<int8_t> bits) {
+  return Vec128<int8_t>(vshlq_s8(v.raw, bits.raw));
+}
+template <size_t N, HWY_IF_LE64(int8_t, N)>
+HWY_INLINE Vec128<int8_t, N> operator<<(const Vec128<int8_t, N> v,
+                                        const Vec128<int8_t, N> bits) {
+  return Vec128<int8_t, N>(vshl_s8(v.raw, bits.raw));
+}
+
+HWY_INLINE Vec128<int16_t> operator<<(const Vec128<int16_t> v,
+                                      const Vec128<int16_t> bits) {
+  return Vec128<int16_t>(vshlq_s16(v.raw, bits.raw));
+}
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int16_t, N> operator<<(const Vec128<int16_t, N> v,
+                                         const Vec128<int16_t, N> bits) {
+  return Vec128<int16_t, N>(vshl_s16(v.raw, bits.raw));
+}
+
+HWY_INLINE Vec128<int32_t> operator<<(const Vec128<int32_t> v,
+                                      const Vec128<int32_t> bits) {
+  return Vec128<int32_t>(vshlq_s32(v.raw, bits.raw));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int32_t, N> operator<<(const Vec128<int32_t, N> v,
+                                         const Vec128<int32_t, N> bits) {
+  return Vec128<int32_t, N>(vshl_s32(v.raw, bits.raw));
+}
+
+HWY_INLINE Vec128<int64_t> operator<<(const Vec128<int64_t> v,
+                                      const Vec128<int64_t> bits) {
+  return Vec128<int64_t>(vshlq_s64(v.raw, bits.raw));
+}
+HWY_INLINE Vec128<int64_t, 1> operator<<(const Vec128<int64_t, 1> v,
+                                         const Vec128<int64_t, 1> bits) {
+  return Vec128<int64_t, 1>(vshl_s64(v.raw, bits.raw));
+}
+
+// ------------------------------ Shr (Neg)
+
+HWY_INLINE Vec128<uint8_t> operator>>(const Vec128<uint8_t> v,
+                                      const Vec128<uint8_t> bits) {
+  const int8x16_t neg_bits = Neg(BitCast(Full128<int8_t>(), bits)).raw;
+  return Vec128<uint8_t>(vshlq_u8(v.raw, neg_bits));
+}
+template <size_t N, HWY_IF_LE64(uint8_t, N)>
+HWY_INLINE Vec128<uint8_t, N> operator>>(const Vec128<uint8_t, N> v,
+                                         const Vec128<uint8_t, N> bits) {
+  const int8x8_t neg_bits = Neg(BitCast(Simd<int8_t, N>(), bits)).raw;
+  return Vec128<uint8_t, N>(vshl_u8(v.raw, neg_bits));
+}
+
+HWY_INLINE Vec128<uint16_t> operator>>(const Vec128<uint16_t> v,
+                                       const Vec128<uint16_t> bits) {
+  const int16x8_t neg_bits = Neg(BitCast(Full128<int16_t>(), bits)).raw;
+  return Vec128<uint16_t>(vshlq_u16(v.raw, neg_bits));
+}
+template <size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint16_t, N> operator>>(const Vec128<uint16_t, N> v,
+                                          const Vec128<uint16_t, N> bits) {
+  const int16x4_t neg_bits = Neg(BitCast(Simd<int16_t, N>(), bits)).raw;
+  return Vec128<uint16_t, N>(vshl_u16(v.raw, neg_bits));
+}
+
+HWY_INLINE Vec128<uint32_t> operator>>(const Vec128<uint32_t> v,
+                                       const Vec128<uint32_t> bits) {
+  const int32x4_t neg_bits = Neg(BitCast(Full128<int32_t>(), bits)).raw;
+  return Vec128<uint32_t>(vshlq_u32(v.raw, neg_bits));
+}
+template <size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_INLINE Vec128<uint32_t, N> operator>>(const Vec128<uint32_t, N> v,
+                                          const Vec128<uint32_t, N> bits) {
+  const int32x2_t neg_bits = Neg(BitCast(Simd<int32_t, N>(), bits)).raw;
+  return Vec128<uint32_t, N>(vshl_u32(v.raw, neg_bits));
+}
+
+HWY_INLINE Vec128<uint64_t> operator>>(const Vec128<uint64_t> v,
+                                       const Vec128<uint64_t> bits) {
+  const int64x2_t neg_bits = Neg(BitCast(Full128<int64_t>(), bits)).raw;
+  return Vec128<uint64_t>(vshlq_u64(v.raw, neg_bits));
+}
+HWY_INLINE Vec128<uint64_t, 1> operator>>(const Vec128<uint64_t, 1> v,
+                                          const Vec128<uint64_t, 1> bits) {
+  const int64x1_t neg_bits = Neg(BitCast(Simd<int64_t, 1>(), bits)).raw;
+  return Vec128<uint64_t, 1>(vshl_u64(v.raw, neg_bits));
+}
+
+HWY_INLINE Vec128<int8_t> operator>>(const Vec128<int8_t> v,
+                                     const Vec128<int8_t> bits) {
+  return Vec128<int8_t>(vshlq_s8(v.raw, Neg(bits).raw));
+}
+template <size_t N, HWY_IF_LE64(int8_t, N)>
+HWY_INLINE Vec128<int8_t, N> operator>>(const Vec128<int8_t, N> v,
+                                        const Vec128<int8_t, N> bits) {
+  return Vec128<int8_t, N>(vshl_s8(v.raw, Neg(bits).raw));
+}
+
+HWY_INLINE Vec128<int16_t> operator>>(const Vec128<int16_t> v,
+                                      const Vec128<int16_t> bits) {
+  return Vec128<int16_t>(vshlq_s16(v.raw, Neg(bits).raw));
+}
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int16_t, N> operator>>(const Vec128<int16_t, N> v,
+                                         const Vec128<int16_t, N> bits) {
+  return Vec128<int16_t, N>(vshl_s16(v.raw, Neg(bits).raw));
+}
+
+HWY_INLINE Vec128<int32_t> operator>>(const Vec128<int32_t> v,
+                                      const Vec128<int32_t> bits) {
+  return Vec128<int32_t>(vshlq_s32(v.raw, Neg(bits).raw));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int32_t, N> operator>>(const Vec128<int32_t, N> v,
+                                         const Vec128<int32_t, N> bits) {
+  return Vec128<int32_t, N>(vshl_s32(v.raw, Neg(bits).raw));
+}
+
+HWY_INLINE Vec128<int64_t> operator>>(const Vec128<int64_t> v,
+                                      const Vec128<int64_t> bits) {
+  return Vec128<int64_t>(vshlq_s64(v.raw, Neg(bits).raw));
+}
+HWY_INLINE Vec128<int64_t, 1> operator>>(const Vec128<int64_t, 1> v,
+                                         const Vec128<int64_t, 1> bits) {
+  return Vec128<int64_t, 1>(vshl_s64(v.raw, Neg(bits).raw));
+}
+
+// ------------------------------ ShiftLeftSame (Shl)
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> ShiftLeftSame(const Vec128<T, N> v, int bits) {
+  return v << Set(Simd<T, N>(), bits);
+}
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> ShiftRightSame(const Vec128<T, N> v, int bits) {
+  return v >> Set(Simd<T, N>(), bits);
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+HWY_INLINE Vec128<uint16_t> operator*(const Vec128<uint16_t> a,
+                                      const Vec128<uint16_t> b) {
+  return Vec128<uint16_t>(vmulq_u16(a.raw, b.raw));
+}
+HWY_INLINE Vec128<uint32_t> operator*(const Vec128<uint32_t> a,
+                                      const Vec128<uint32_t> b) {
+  return Vec128<uint32_t>(vmulq_u32(a.raw, b.raw));
+}
+
+template <size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint16_t, N> operator*(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>(vmul_u16(a.raw, b.raw));
+}
+template <size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_INLINE Vec128<uint32_t, N> operator*(const Vec128<uint32_t, N> a,
+                                         const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>(vmul_u32(a.raw, b.raw));
+}
+
+// Signed
+HWY_INLINE Vec128<int16_t> operator*(const Vec128<int16_t> a,
+                                     const Vec128<int16_t> b) {
+  return Vec128<int16_t>(vmulq_s16(a.raw, b.raw));
+}
+HWY_INLINE Vec128<int32_t> operator*(const Vec128<int32_t> a,
+                                     const Vec128<int32_t> b) {
+  return Vec128<int32_t>(vmulq_s32(a.raw, b.raw));
+}
+
+template <size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<int16_t, N> operator*(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>(vmul_s16(a.raw, b.raw));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int32_t, N> operator*(const Vec128<int32_t, N> a,
+                                        const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>(vmul_s32(a.raw, b.raw));
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+HWY_INLINE Vec128<int16_t> MulHigh(const Vec128<int16_t> a,
+                                   const Vec128<int16_t> b) {
+  int32x4_t rlo = vmull_s16(vget_low_s16(a.raw), vget_low_s16(b.raw));
+#if HWY_ARCH_ARM_A64
+  int32x4_t rhi = vmull_high_s16(a.raw, b.raw);
+#else
+  int32x4_t rhi = vmull_s16(vget_high_s16(a.raw), vget_high_s16(b.raw));
+#endif
+  return Vec128<int16_t>(
+      vuzp2q_s16(vreinterpretq_s16_s32(rlo), vreinterpretq_s16_s32(rhi)));
+}
+HWY_INLINE Vec128<uint16_t> MulHigh(const Vec128<uint16_t> a,
+                                    const Vec128<uint16_t> b) {
+  uint32x4_t rlo = vmull_u16(vget_low_u16(a.raw), vget_low_u16(b.raw));
+#if HWY_ARCH_ARM_A64
+  uint32x4_t rhi = vmull_high_u16(a.raw, b.raw);
+#else
+  uint32x4_t rhi = vmull_u16(vget_high_u16(a.raw), vget_high_u16(b.raw));
+#endif
+  return Vec128<uint16_t>(
+      vuzp2q_u16(vreinterpretq_u16_u32(rlo), vreinterpretq_u16_u32(rhi)));
+}
+
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int16_t, N> MulHigh(const Vec128<int16_t, N> a,
+                                      const Vec128<int16_t, N> b) {
+  int16x8_t hi_lo = vreinterpretq_s16_s32(vmull_s16(a.raw, b.raw));
+  return Vec128<int16_t, N>(vget_low_s16(vuzp2q_s16(hi_lo, hi_lo)));
+}
+template <size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint16_t, N> MulHigh(const Vec128<uint16_t, N> a,
+                                       const Vec128<uint16_t, N> b) {
+  uint16x8_t hi_lo = vreinterpretq_u16_u32(vmull_u16(a.raw, b.raw));
+  return Vec128<uint16_t, N>(vget_low_u16(vuzp2q_u16(hi_lo, hi_lo)));
+}
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+HWY_INLINE Vec128<int64_t> MulEven(const Vec128<int32_t> a,
+                                   const Vec128<int32_t> b) {
+  int32x4_t a_packed = vuzp1q_s32(a.raw, a.raw);
+  int32x4_t b_packed = vuzp1q_s32(b.raw, b.raw);
+  return Vec128<int64_t>(
+      vmull_s32(vget_low_s32(a_packed), vget_low_s32(b_packed)));
+}
+HWY_INLINE Vec128<uint64_t> MulEven(const Vec128<uint32_t> a,
+                                    const Vec128<uint32_t> b) {
+  uint32x4_t a_packed = vuzp1q_u32(a.raw, a.raw);
+  uint32x4_t b_packed = vuzp1q_u32(b.raw, b.raw);
+  return Vec128<uint64_t>(
+      vmull_u32(vget_low_u32(a_packed), vget_low_u32(b_packed)));
+}
+
+template <size_t N>
+HWY_INLINE Vec128<int64_t, (N + 1) / 2> MulEven(const Vec128<int32_t, N> a,
+                                                const Vec128<int32_t, N> b) {
+  int32x2_t a_packed = vuzp1_s32(a.raw, a.raw);
+  int32x2_t b_packed = vuzp1_s32(b.raw, b.raw);
+  return Vec128<int64_t, (N + 1) / 2>(
+      vget_low_s64(vmull_s32(a_packed, b_packed)));
+}
+template <size_t N>
+HWY_INLINE Vec128<uint64_t, (N + 1) / 2> MulEven(const Vec128<uint32_t, N> a,
+                                                 const Vec128<uint32_t, N> b) {
+  uint32x2_t a_packed = vuzp1_u32(a.raw, a.raw);
+  uint32x2_t b_packed = vuzp1_u32(b.raw, b.raw);
+  return Vec128<uint64_t, (N + 1) / 2>(
+      vget_low_u64(vmull_u32(a_packed, b_packed)));
+}
+
+// ------------------------------ Floating-point mul / div
+
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator*, vmul, _, 2)
+
+// Approximate reciprocal
+HWY_INLINE Vec128<float> ApproximateReciprocal(const Vec128<float> v) {
+  return Vec128<float>(vrecpeq_f32(v.raw));
+}
+template <size_t N>
+HWY_INLINE Vec128<float, N> ApproximateReciprocal(const Vec128<float, N> v) {
+  return Vec128<float, N>(vrecpe_f32(v.raw));
+}
+
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator/, vdiv, _, 2)
+#else
+// Not defined on armv7: approximate
+namespace detail {
+
+HWY_INLINE Vec128<float> ReciprocalNewtonRaphsonStep(
+    const Vec128<float> recip, const Vec128<float> divisor) {
+  return Vec128<float>(vrecpsq_f32(recip.raw, divisor.raw));
+}
+template <size_t N>
+HWY_INLINE Vec128<float, N> ReciprocalNewtonRaphsonStep(
+    const Vec128<float, N> recip, Vec128<float, N> divisor) {
+  return Vec128<float, N>(vrecps_f32(recip.raw, divisor.raw));
+}
+
+}  // namespace detail
+
+template <size_t N>
+HWY_INLINE Vec128<float, N> operator/(const Vec128<float, N> a,
+                                      const Vec128<float, N> b) {
+  auto x = ApproximateReciprocal(b);
+  x *= detail::ReciprocalNewtonRaphsonStep(x, b);
+  x *= detail::ReciprocalNewtonRaphsonStep(x, b);
+  x *= detail::ReciprocalNewtonRaphsonStep(x, b);
+  return a * x;
+}
+#endif
+
+// ------------------------------ Absolute value of difference.
+
+HWY_INLINE Vec128<float> AbsDiff(const Vec128<float> a, const Vec128<float> b) {
+  return Vec128<float>(vabdq_f32(a.raw, b.raw));
+}
+template <size_t N, HWY_IF_LE64(float, N)>
+HWY_INLINE Vec128<float, N> AbsDiff(const Vec128<float, N> a,
+                                    const Vec128<float, N> b) {
+  return Vec128<float, N>(vabd_f32(a.raw, b.raw));
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+// Returns add + mul * x
+#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
+template <size_t N, HWY_IF_LE64(float, N)>
+HWY_INLINE Vec128<float, N> MulAdd(const Vec128<float, N> mul,
+                                   const Vec128<float, N> x,
+                                   const Vec128<float, N> add) {
+  return Vec128<float, N>(vfma_f32(add.raw, mul.raw, x.raw));
+}
+HWY_INLINE Vec128<float> MulAdd(const Vec128<float> mul, const Vec128<float> x,
+                                const Vec128<float> add) {
+  return Vec128<float>(vfmaq_f32(add.raw, mul.raw, x.raw));
+}
+#else
+// Emulate FMA for floats.
+template <size_t N>
+HWY_INLINE Vec128<float, N> MulAdd(const Vec128<float, N> mul,
+                                   const Vec128<float, N> x,
+                                   const Vec128<float, N> add) {
+  return mul * x + add;
+}
+#endif
+
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double, 1> MulAdd(const Vec128<double, 1> mul,
+                                    const Vec128<double, 1> x,
+                                    const Vec128<double, 1> add) {
+  return Vec128<double, 1>(vfma_f64(add.raw, mul.raw, x.raw));
+}
+HWY_INLINE Vec128<double> MulAdd(const Vec128<double> mul,
+                                 const Vec128<double> x,
+                                 const Vec128<double> add) {
+  return Vec128<double>(vfmaq_f64(add.raw, mul.raw, x.raw));
+}
+#endif
+
+// Returns add - mul * x
+#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
+template <size_t N, HWY_IF_LE64(float, N)>
+HWY_INLINE Vec128<float, N> NegMulAdd(const Vec128<float, N> mul,
+                                      const Vec128<float, N> x,
+                                      const Vec128<float, N> add) {
+  return Vec128<float, N>(vfms_f32(add.raw, mul.raw, x.raw));
+}
+HWY_INLINE Vec128<float> NegMulAdd(const Vec128<float> mul,
+                                   const Vec128<float> x,
+                                   const Vec128<float> add) {
+  return Vec128<float>(vfmsq_f32(add.raw, mul.raw, x.raw));
+}
+#else
+// Emulate FMA for floats.
+template <size_t N>
+HWY_INLINE Vec128<float, N> NegMulAdd(const Vec128<float, N> mul,
+                                      const Vec128<float, N> x,
+                                      const Vec128<float, N> add) {
+  return add - mul * x;
+}
+#endif
+
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double, 1> NegMulAdd(const Vec128<double, 1> mul,
+                                       const Vec128<double, 1> x,
+                                       const Vec128<double, 1> add) {
+  return Vec128<double, 1>(vfms_f64(add.raw, mul.raw, x.raw));
+}
+HWY_INLINE Vec128<double> NegMulAdd(const Vec128<double> mul,
+                                    const Vec128<double> x,
+                                    const Vec128<double> add) {
+  return Vec128<double>(vfmsq_f64(add.raw, mul.raw, x.raw));
+}
+#endif
+
+// Returns mul * x - sub
+template <size_t N>
+HWY_INLINE Vec128<float, N> MulSub(const Vec128<float, N> mul,
+                                   const Vec128<float, N> x,
+                                   const Vec128<float, N> sub) {
+  return MulAdd(mul, x, Neg(sub));
+}
+
+// Returns -mul * x - sub
+template <size_t N>
+HWY_INLINE Vec128<float, N> NegMulSub(const Vec128<float, N> mul,
+                                      const Vec128<float, N> x,
+                                      const Vec128<float, N> sub) {
+  return Neg(MulAdd(mul, x, sub));
+}
+
+#if HWY_ARCH_ARM_A64
+template <size_t N>
+HWY_INLINE Vec128<double, N> MulSub(const Vec128<double, N> mul,
+                                    const Vec128<double, N> x,
+                                    const Vec128<double, N> sub) {
+  return MulAdd(mul, x, Neg(sub));
+}
+template <size_t N>
+HWY_INLINE Vec128<double, N> NegMulSub(const Vec128<double, N> mul,
+                                       const Vec128<double, N> x,
+                                       const Vec128<double, N> sub) {
+  return Neg(MulAdd(mul, x, sub));
+}
+#endif
+
+// ------------------------------ Floating-point square root (IfThenZeroElse)
+
+// Approximate reciprocal square root
+HWY_INLINE Vec128<float> ApproximateReciprocalSqrt(const Vec128<float> v) {
+  return Vec128<float>(vrsqrteq_f32(v.raw));
+}
+template <size_t N>
+HWY_INLINE Vec128<float, N> ApproximateReciprocalSqrt(
+    const Vec128<float, N> v) {
+  return Vec128<float, N>(vrsqrte_f32(v.raw));
+}
+
+// Full precision square root
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Sqrt, vsqrt, _, 1)
+#else
+namespace detail {
+
+HWY_INLINE Vec128<float> ReciprocalSqrtStep(const Vec128<float> root,
+                                            const Vec128<float> recip) {
+  return Vec128<float>(vrsqrtsq_f32(root.raw, recip.raw));
+}
+template <size_t N>
+HWY_INLINE Vec128<float, N> ReciprocalSqrtStep(const Vec128<float, N> root,
+                                               Vec128<float, N> recip) {
+  return Vec128<float, N>(vrsqrts_f32(root.raw, recip.raw));
+}
+
+}  // namespace detail
+
+// Not defined on armv7: approximate
+template <size_t N>
+HWY_INLINE Vec128<float, N> Sqrt(const Vec128<float, N> v) {
+  auto recip = ApproximateReciprocalSqrt(v);
+
+  recip *= detail::ReciprocalSqrtStep(v * recip, recip);
+  recip *= detail::ReciprocalSqrtStep(v * recip, recip);
+  recip *= detail::ReciprocalSqrtStep(v * recip, recip);
+
+  const auto root = v * recip;
+  return IfThenZeroElse(v == Zero(Simd<float, N>()), root);
+}
+#endif
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+// There is no 64-bit vmvn, so cast instead of using HWY_NEON_DEF_FUNCTION.
+template <typename T>
+HWY_INLINE Vec128<T> Not(const Vec128<T> v) {
+  const Full128<T> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Vec128<uint8_t>(vmvnq_u8(BitCast(d8, v).raw)));
+}
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_INLINE Vec128<T, N> Not(const Vec128<T, N> v) {
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  using V8 = decltype(Zero(d8));
+  return BitCast(d, V8(vmvn_u8(BitCast(d8, v).raw)));
+}
+
+// ------------------------------ And
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(And, vand, _, 2)
+
+// Uses the u32/64 defined above.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_INLINE Vec128<T, N> And(const Vec128<T, N> a, const Vec128<T, N> b) {
+  const Simd<MakeUnsigned<T>, N> d;
+  return BitCast(Simd<T, N>(), BitCast(d, a) & BitCast(d, b));
+}
+
+// ------------------------------ AndNot
+
+namespace internal {
+// reversed_andnot returns a & ~b.
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(reversed_andnot, vbic, _, 2)
+}  // namespace internal
+
+// Returns ~not_mask & mask.
+template <typename T, size_t N, HWY_IF_NOT_FLOAT(T)>
+HWY_INLINE Vec128<T, N> AndNot(const Vec128<T, N> not_mask,
+                               const Vec128<T, N> mask) {
+  return internal::reversed_andnot(mask, not_mask);
+}
+
+// Uses the u32/64 defined above.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_INLINE Vec128<T, N> AndNot(const Vec128<T, N> not_mask,
+                               const Vec128<T, N> mask) {
+  const Simd<MakeUnsigned<T>, N> du;
+  Vec128<MakeUnsigned<T>, N> ret =
+      internal::reversed_andnot(BitCast(du, mask), BitCast(du, not_mask));
+  return BitCast(Simd<T, N>(), ret);
+}
+
+// ------------------------------ Or
+
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(Or, vorr, _, 2)
+
+// Uses the u32/64 defined above.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_INLINE Vec128<T, N> Or(const Vec128<T, N> a, const Vec128<T, N> b) {
+  const Simd<MakeUnsigned<T>, N> d;
+  return BitCast(Simd<T, N>(), BitCast(d, a) | BitCast(d, b));
+}
+
+// ------------------------------ Xor
+
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(Xor, veor, _, 2)
+
+// Uses the u32/64 defined above.
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_INLINE Vec128<T, N> Xor(const Vec128<T, N> a, const Vec128<T, N> b) {
+  const Simd<MakeUnsigned<T>, N> d;
+  return BitCast(Simd<T, N>(), BitCast(d, a) ^ BitCast(d, b));
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> operator&(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return And(a, b);
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> operator|(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Or(a, b);
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> operator^(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ CopySign
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySign(const Vec128<T, N> magn,
+                              const Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const auto msb = SignBit(Simd<T, N>());
+  return Or(AndNot(msb, magn), And(msb, sign));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySignToAbs(const Vec128<T, N> abs,
+                                   const Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  return Or(abs, And(SignBit(Simd<T, N>()), sign));
+}
+
+// ------------------------------ BroadcastSignBit
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> BroadcastSignBit(const Vec128<T, N> v) {
+  return ShiftRight<sizeof(T) * 8 - 1>(v);
+}
+
+// ================================================== MASK
+
+// ------------------------------ To/from vector
+
+// Mask and Vec have the same representation (true = FF..FF).
+template <typename T, size_t N>
+HWY_INLINE Mask128<T, N> MaskFromVec(const Vec128<T, N> v) {
+  const Simd<MakeUnsigned<T>, N> du;
+  return Mask128<T, N>(BitCast(du, v).raw);
+}
+
+// DEPRECATED
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> VecFromMask(const Mask128<T, N> v) {
+  return BitCast(Simd<T, N>(), Vec128<MakeUnsigned<T>, N>(v.raw));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> VecFromMask(Simd<T, N> d, const Mask128<T, N> v) {
+  return BitCast(d, Vec128<MakeUnsigned<T>, N>(v.raw));
+}
+
+// ------------------------------ RebindMask
+
+template <typename TFrom, typename TTo, size_t N>
+HWY_API Mask128<TTo, N> RebindMask(Simd<TTo, N> dto, Mask128<TFrom, N> m) {
+  static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size");
+  return MaskFromVec(BitCast(dto, VecFromMask(Simd<TFrom, N>(), m)));
+}
+
+// ------------------------------ IfThenElse(mask, yes, no) = mask ? b : a.
+
+#define HWY_NEON_BUILD_TPL_HWY_IF
+#define HWY_NEON_BUILD_RET_HWY_IF(type, size) Vec128<type, size>
+#define HWY_NEON_BUILD_PARAM_HWY_IF(type, size)                 \
+  const Mask128<type, size> mask, const Vec128<type, size> yes, \
+      const Vec128<type, size> no
+#define HWY_NEON_BUILD_ARG_HWY_IF mask.raw, yes.raw, no.raw
+
+HWY_NEON_DEF_FUNCTION_ALL_TYPES(IfThenElse, vbsl, _, HWY_IF)
+
+#undef HWY_NEON_BUILD_TPL_HWY_IF
+#undef HWY_NEON_BUILD_RET_HWY_IF
+#undef HWY_NEON_BUILD_PARAM_HWY_IF
+#undef HWY_NEON_BUILD_ARG_HWY_IF
+
+// mask ? yes : 0
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenElseZero(const Mask128<T, N> mask,
+                                       const Vec128<T, N> yes) {
+  return yes & VecFromMask(Simd<T, N>(), mask);
+}
+
+// mask ? 0 : no
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IfThenZeroElse(const Mask128<T, N> mask,
+                                       const Vec128<T, N> no) {
+  return AndNot(VecFromMask(Simd<T, N>(), mask), no);
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> ZeroIfNegative(Vec128<T, N> v) {
+  const auto zero = Zero(Simd<T, N>());
+  return Max(zero, v);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Not(const Mask128<T, N> m) {
+  const Simd<T, N> d;
+  return MaskFromVec(Not(VecFromMask(d, m)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> And(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> AndNot(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Or(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Xor(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+// ------------------------------ Shuffle2301 (for i64 compares)
+
+// Swap 32-bit halves in 64-bits
+HWY_INLINE Vec128<uint32_t, 2> Shuffle2301(const Vec128<uint32_t, 2> v) {
+  return Vec128<uint32_t, 2>(vrev64_u32(v.raw));
+}
+HWY_INLINE Vec128<int32_t, 2> Shuffle2301(const Vec128<int32_t, 2> v) {
+  return Vec128<int32_t, 2>(vrev64_s32(v.raw));
+}
+HWY_INLINE Vec128<float, 2> Shuffle2301(const Vec128<float, 2> v) {
+  return Vec128<float, 2>(vrev64_f32(v.raw));
+}
+HWY_INLINE Vec128<uint32_t> Shuffle2301(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>(vrev64q_u32(v.raw));
+}
+HWY_INLINE Vec128<int32_t> Shuffle2301(const Vec128<int32_t> v) {
+  return Vec128<int32_t>(vrev64q_s32(v.raw));
+}
+HWY_INLINE Vec128<float> Shuffle2301(const Vec128<float> v) {
+  return Vec128<float>(vrev64q_f32(v.raw));
+}
+
+#define HWY_NEON_BUILD_TPL_HWY_COMPARE
+#define HWY_NEON_BUILD_RET_HWY_COMPARE(type, size) Mask128<type, size>
+#define HWY_NEON_BUILD_PARAM_HWY_COMPARE(type, size) \
+  const Vec128<type, size> a, const Vec128<type, size> b
+#define HWY_NEON_BUILD_ARG_HWY_COMPARE a.raw, b.raw
+
+// ------------------------------ Equality
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator==, vceq, _, HWY_COMPARE)
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(operator==, vceq, _, HWY_COMPARE)
+#else
+// No 64-bit comparisons on armv7: emulate them below, after Shuffle2301.
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator==, vceq, _, HWY_COMPARE)
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(operator==, vceq, _, HWY_COMPARE)
+#endif
+
+// ------------------------------ Strict inequality (signed, float)
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_INTS(operator<, vclt, _, HWY_COMPARE)
+#else
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator<, vclt, _, HWY_COMPARE)
+#endif
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator<, vclt, _, HWY_COMPARE)
+
+// ------------------------------ Weak inequality (float)
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator<=, vcle, _, HWY_COMPARE)
+
+#undef HWY_NEON_BUILD_TPL_HWY_COMPARE
+#undef HWY_NEON_BUILD_RET_HWY_COMPARE
+#undef HWY_NEON_BUILD_PARAM_HWY_COMPARE
+#undef HWY_NEON_BUILD_ARG_HWY_COMPARE
+
+// ------------------------------ ARMv7 i64 compare (Shuffle2301, Eq)
+
+#if HWY_ARCH_ARM_V7
+
+template <size_t N>
+HWY_INLINE Mask128<int64_t, N> operator==(const Vec128<int64_t, N> a,
+                                          const Vec128<int64_t, N> b) {
+  const Simd<int32_t, N * 2> d32;
+  const Simd<int64_t, N> d64;
+  const auto cmp32 = VecFromMask(d32, Eq(BitCast(d32, a), BitCast(d32, b)));
+  const auto cmp64 = cmp32 & Shuffle2301(cmp32);
+  return MaskFromVec(BitCast(d64, cmp64));
+}
+
+template <size_t N>
+HWY_INLINE Mask128<uint64_t, N> operator==(const Vec128<uint64_t, N> a,
+                                           const Vec128<uint64_t, N> b) {
+  const Simd<uint32_t, N * 2> d32;
+  const Simd<uint64_t, N> d64;
+  const auto cmp32 = VecFromMask(d32, Eq(BitCast(d32, a), BitCast(d32, b)));
+  const auto cmp64 = cmp32 & Shuffle2301(cmp32);
+  return MaskFromVec(BitCast(d64, cmp64));
+}
+
+HWY_INLINE Mask128<int64_t> operator<(const Vec128<int64_t> a,
+                                      const Vec128<int64_t> b) {
+  const int64x2_t sub = vqsubq_s64(a.raw, b.raw);
+  return MaskFromVec(BroadcastSignBit(Vec128<int64_t>(sub)));
+}
+HWY_INLINE Mask128<int64_t, 1> operator<(const Vec128<int64_t, 1> a,
+                                         const Vec128<int64_t, 1> b) {
+  const int64x1_t sub = vqsub_s64(a.raw, b.raw);
+  return MaskFromVec(BroadcastSignBit(Vec128<int64_t, 1>(sub)));
+}
+
+#endif
+
+// ------------------------------ Reversed comparisons
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator>(Vec128<T, N> a, Vec128<T, N> b) {
+  return operator<(b, a);
+}
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator>=(Vec128<T, N> a, Vec128<T, N> b) {
+  return operator<=(b, a);
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> FirstN(const Simd<T, N> d, size_t num) {
+  const RebindToSigned<decltype(d)> di;  // Signed comparisons are cheaper.
+  return RebindMask(d, Iota(di, 0) < Set(di, static_cast<MakeSigned<T>>(num)));
+}
+
+// ------------------------------ TestBit (Eq)
+
+#define HWY_NEON_BUILD_TPL_HWY_TESTBIT
+#define HWY_NEON_BUILD_RET_HWY_TESTBIT(type, size) Mask128<type, size>
+#define HWY_NEON_BUILD_PARAM_HWY_TESTBIT(type, size) \
+  Vec128<type, size> v, Vec128<type, size> bit
+#define HWY_NEON_BUILD_ARG_HWY_TESTBIT v.raw, bit.raw
+
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_INTS_UINTS(TestBit, vtst, _, HWY_TESTBIT)
+#else
+// No 64-bit versions on armv7
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(TestBit, vtst, _, HWY_TESTBIT)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(TestBit, vtst, _, HWY_TESTBIT)
+
+template <size_t N>
+HWY_INLINE Mask128<uint64_t, N> TestBit(Vec128<uint64_t, N> v,
+                                        Vec128<uint64_t, N> bit) {
+  return (v & bit) == bit;
+}
+template <size_t N>
+HWY_INLINE Mask128<int64_t, N> TestBit(Vec128<int64_t, N> v,
+                                       Vec128<int64_t, N> bit) {
+  return (v & bit) == bit;
+}
+
+#endif
+#undef HWY_NEON_BUILD_TPL_HWY_TESTBIT
+#undef HWY_NEON_BUILD_RET_HWY_TESTBIT
+#undef HWY_NEON_BUILD_PARAM_HWY_TESTBIT
+#undef HWY_NEON_BUILD_ARG_HWY_TESTBIT
+
+// ------------------------------ Abs i64 (IfThenElse, BroadcastSignBit)
+HWY_INLINE Vec128<int64_t> Abs(const Vec128<int64_t> v) {
+#if HWY_ARCH_ARM_A64
+  return Vec128<int64_t>(vabsq_s64(v.raw));
+#else
+  const auto zero = Zero(Full128<int64_t>());
+  return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v);
+#endif
+}
+HWY_INLINE Vec128<int64_t, 1> Abs(const Vec128<int64_t, 1> v) {
+#if HWY_ARCH_ARM_A64
+  return Vec128<int64_t, 1>(vabs_s64(v.raw));
+#else
+  const auto zero = Zero(Simd<int64_t, 1>());
+  return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v);
+#endif
+}
+
+// ------------------------------ Min (IfThenElse, BroadcastSignBit)
+
+#if HWY_ARCH_ARM_A64
+
+HWY_INLINE Mask128<uint64_t> operator<(Vec128<uint64_t> a, Vec128<uint64_t> b) {
+  return Mask128<uint64_t>(vcltq_u64(a.raw, b.raw));
+}
+HWY_INLINE Mask128<uint64_t, 1> operator<(Vec128<uint64_t, 1> a,
+                                          Vec128<uint64_t, 1> b) {
+  return Mask128<uint64_t, 1>(vclt_u64(a.raw, b.raw));
+}
+
+#endif
+
+// Unsigned
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(Min, vmin, _, 2)
+
+template <size_t N>
+HWY_INLINE Vec128<uint64_t, N> Min(const Vec128<uint64_t, N> a,
+                                   const Vec128<uint64_t, N> b) {
+#if HWY_ARCH_ARM_A64
+  return IfThenElse(b < a, b, a);
+#else
+  const Simd<uint64_t, N> du;
+  const Simd<int64_t, N> di;
+  return BitCast(du, BitCast(di, a) - BitCast(di, detail::SaturatedSub(a, b)));
+#endif
+}
+
+// Signed
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(Min, vmin, _, 2)
+
+template <size_t N>
+HWY_INLINE Vec128<int64_t, N> Min(const Vec128<int64_t, N> a,
+                                  const Vec128<int64_t, N> b) {
+#if HWY_ARCH_ARM_A64
+  return IfThenElse(b < a, b, a);
+#else
+  const Vec128<int64_t, N> sign = detail::SaturatedSub(a, b);
+  return IfThenElse(MaskFromVec(BroadcastSignBit(sign)), a, b);
+#endif
+}
+
+// Float: IEEE minimumNumber on v8, otherwise NaN if any is NaN.
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Min, vminnm, _, 2)
+#else
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Min, vmin, _, 2)
+#endif
+
+// ------------------------------ Max (IfThenElse, BroadcastSignBit)
+
+// Unsigned (no u64)
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(Max, vmax, _, 2)
+
+template <size_t N>
+HWY_INLINE Vec128<uint64_t, N> Max(const Vec128<uint64_t, N> a,
+                                   const Vec128<uint64_t, N> b) {
+#if HWY_ARCH_ARM_A64
+  return IfThenElse(b < a, a, b);
+#else
+  const Simd<uint64_t, N> du;
+  const Simd<int64_t, N> di;
+  return BitCast(du, BitCast(di, b) + BitCast(di, detail::SaturatedSub(a, b)));
+#endif
+}
+
+// Signed (no i64)
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(Max, vmax, _, 2)
+
+template <size_t N>
+HWY_INLINE Vec128<int64_t, N> Max(const Vec128<int64_t, N> a,
+                                  const Vec128<int64_t, N> b) {
+#if HWY_ARCH_ARM_A64
+  return IfThenElse(b < a, a, b);
+#else
+  const Vec128<int64_t, N> sign = detail::SaturatedSub(a, b);
+  return IfThenElse(MaskFromVec(BroadcastSignBit(sign)), b, a);
+#endif
+}
+
+// Float: IEEE maximumNumber on v8, otherwise NaN if any is NaN.
+#if HWY_ARCH_ARM_A64
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Max, vmaxnm, _, 2)
+#else
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Max, vmax, _, 2)
+#endif
+
+// ================================================== MEMORY
+
+// ------------------------------ Load 128
+
+HWY_INLINE Vec128<uint8_t> LoadU(Full128<uint8_t> /* tag */,
+                                 const uint8_t* HWY_RESTRICT aligned) {
+  return Vec128<uint8_t>(vld1q_u8(aligned));
+}
+HWY_INLINE Vec128<uint16_t> LoadU(Full128<uint16_t> /* tag */,
+                                  const uint16_t* HWY_RESTRICT aligned) {
+  return Vec128<uint16_t>(vld1q_u16(aligned));
+}
+HWY_INLINE Vec128<uint32_t> LoadU(Full128<uint32_t> /* tag */,
+                                  const uint32_t* HWY_RESTRICT aligned) {
+  return Vec128<uint32_t>(vld1q_u32(aligned));
+}
+HWY_INLINE Vec128<uint64_t> LoadU(Full128<uint64_t> /* tag */,
+                                  const uint64_t* HWY_RESTRICT aligned) {
+  return Vec128<uint64_t>(vld1q_u64(aligned));
+}
+HWY_INLINE Vec128<int8_t> LoadU(Full128<int8_t> /* tag */,
+                                const int8_t* HWY_RESTRICT aligned) {
+  return Vec128<int8_t>(vld1q_s8(aligned));
+}
+HWY_INLINE Vec128<int16_t> LoadU(Full128<int16_t> /* tag */,
+                                 const int16_t* HWY_RESTRICT aligned) {
+  return Vec128<int16_t>(vld1q_s16(aligned));
+}
+HWY_INLINE Vec128<int32_t> LoadU(Full128<int32_t> /* tag */,
+                                 const int32_t* HWY_RESTRICT aligned) {
+  return Vec128<int32_t>(vld1q_s32(aligned));
+}
+HWY_INLINE Vec128<int64_t> LoadU(Full128<int64_t> /* tag */,
+                                 const int64_t* HWY_RESTRICT aligned) {
+  return Vec128<int64_t>(vld1q_s64(aligned));
+}
+HWY_INLINE Vec128<float> LoadU(Full128<float> /* tag */,
+                               const float* HWY_RESTRICT aligned) {
+  return Vec128<float>(vld1q_f32(aligned));
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double> LoadU(Full128<double> /* tag */,
+                                const double* HWY_RESTRICT aligned) {
+  return Vec128<double>(vld1q_f64(aligned));
+}
+#endif
+
+// ------------------------------ Load 64
+
+HWY_INLINE Vec128<uint8_t, 8> LoadU(Simd<uint8_t, 8> /* tag */,
+                                    const uint8_t* HWY_RESTRICT p) {
+  return Vec128<uint8_t, 8>(vld1_u8(p));
+}
+HWY_INLINE Vec128<uint16_t, 4> LoadU(Simd<uint16_t, 4> /* tag */,
+                                     const uint16_t* HWY_RESTRICT p) {
+  return Vec128<uint16_t, 4>(vld1_u16(p));
+}
+HWY_INLINE Vec128<uint32_t, 2> LoadU(Simd<uint32_t, 2> /* tag */,
+                                     const uint32_t* HWY_RESTRICT p) {
+  return Vec128<uint32_t, 2>(vld1_u32(p));
+}
+HWY_INLINE Vec128<uint64_t, 1> LoadU(Simd<uint64_t, 1> /* tag */,
+                                     const uint64_t* HWY_RESTRICT p) {
+  return Vec128<uint64_t, 1>(vld1_u64(p));
+}
+HWY_INLINE Vec128<int8_t, 8> LoadU(Simd<int8_t, 8> /* tag */,
+                                   const int8_t* HWY_RESTRICT p) {
+  return Vec128<int8_t, 8>(vld1_s8(p));
+}
+HWY_INLINE Vec128<int16_t, 4> LoadU(Simd<int16_t, 4> /* tag */,
+                                    const int16_t* HWY_RESTRICT p) {
+  return Vec128<int16_t, 4>(vld1_s16(p));
+}
+HWY_INLINE Vec128<int32_t, 2> LoadU(Simd<int32_t, 2> /* tag */,
+                                    const int32_t* HWY_RESTRICT p) {
+  return Vec128<int32_t, 2>(vld1_s32(p));
+}
+HWY_INLINE Vec128<int64_t, 1> LoadU(Simd<int64_t, 1> /* tag */,
+                                    const int64_t* HWY_RESTRICT p) {
+  return Vec128<int64_t, 1>(vld1_s64(p));
+}
+HWY_INLINE Vec128<float, 2> LoadU(Simd<float, 2> /* tag */,
+                                  const float* HWY_RESTRICT p) {
+  return Vec128<float, 2>(vld1_f32(p));
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double, 1> LoadU(Simd<double, 1> /* tag */,
+                                   const double* HWY_RESTRICT p) {
+  return Vec128<double, 1>(vld1_f64(p));
+}
+#endif
+
+// ------------------------------ Load 32
+
+// In the following load functions, |a| is purposely undefined.
+// It is a required parameter to the intrinsic, however
+// we don't actually care what is in it, and we don't want
+// to introduce extra overhead by initializing it to something.
+
+HWY_INLINE Vec128<uint8_t, 4> LoadU(Simd<uint8_t, 4> /*tag*/,
+                                    const uint8_t* HWY_RESTRICT p) {
+  uint32x2_t a = Undefined(Simd<uint32_t, 2>()).raw;
+  uint32x2_t b = vld1_lane_u32(reinterpret_cast<const uint32_t*>(p), a, 0);
+  return Vec128<uint8_t, 4>(vreinterpret_u8_u32(b));
+}
+HWY_INLINE Vec128<uint16_t, 2> LoadU(Simd<uint16_t, 2> /*tag*/,
+                                     const uint16_t* HWY_RESTRICT p) {
+  uint32x2_t a = Undefined(Simd<uint32_t, 2>()).raw;
+  uint32x2_t b = vld1_lane_u32(reinterpret_cast<const uint32_t*>(p), a, 0);
+  return Vec128<uint16_t, 2>(vreinterpret_u16_u32(b));
+}
+HWY_INLINE Vec128<uint32_t, 1> LoadU(Simd<uint32_t, 1> /*tag*/,
+                                     const uint32_t* HWY_RESTRICT p) {
+  uint32x2_t a = Undefined(Simd<uint32_t, 2>()).raw;
+  uint32x2_t b = vld1_lane_u32(p, a, 0);
+  return Vec128<uint32_t, 1>(b);
+}
+HWY_INLINE Vec128<int8_t, 4> LoadU(Simd<int8_t, 4> /*tag*/,
+                                   const int8_t* HWY_RESTRICT p) {
+  int32x2_t a = Undefined(Simd<int32_t, 2>()).raw;
+  int32x2_t b = vld1_lane_s32(reinterpret_cast<const int32_t*>(p), a, 0);
+  return Vec128<int8_t, 4>(vreinterpret_s8_s32(b));
+}
+HWY_INLINE Vec128<int16_t, 2> LoadU(Simd<int16_t, 2> /*tag*/,
+                                    const int16_t* HWY_RESTRICT p) {
+  int32x2_t a = Undefined(Simd<int32_t, 2>()).raw;
+  int32x2_t b = vld1_lane_s32(reinterpret_cast<const int32_t*>(p), a, 0);
+  return Vec128<int16_t, 2>(vreinterpret_s16_s32(b));
+}
+HWY_INLINE Vec128<int32_t, 1> LoadU(Simd<int32_t, 1> /*tag*/,
+                                    const int32_t* HWY_RESTRICT p) {
+  int32x2_t a = Undefined(Simd<int32_t, 2>()).raw;
+  int32x2_t b = vld1_lane_s32(p, a, 0);
+  return Vec128<int32_t, 1>(b);
+}
+HWY_INLINE Vec128<float, 1> LoadU(Simd<float, 1> /*tag*/,
+                                  const float* HWY_RESTRICT p) {
+  float32x2_t a = Undefined(Simd<float, 2>()).raw;
+  float32x2_t b = vld1_lane_f32(p, a, 0);
+  return Vec128<float, 1>(b);
+}
+
+// ------------------------------ Load 16
+
+HWY_INLINE Vec128<uint8_t, 2> LoadU(Simd<uint8_t, 2> /*tag*/,
+                                    const uint8_t* HWY_RESTRICT p) {
+  uint16x4_t a = Undefined(Simd<uint16_t, 4>()).raw;
+  uint16x4_t b = vld1_lane_u16(reinterpret_cast<const uint16_t*>(p), a, 0);
+  return Vec128<uint8_t, 2>(vreinterpret_u8_u16(b));
+}
+HWY_INLINE Vec128<uint16_t, 1> LoadU(Simd<uint16_t, 1> /*tag*/,
+                                     const uint16_t* HWY_RESTRICT p) {
+  uint16x4_t a = Undefined(Simd<uint16_t, 4>()).raw;
+  uint16x4_t b = vld1_lane_u16(p, a, 0);
+  return Vec128<uint16_t, 1>(b);
+}
+HWY_INLINE Vec128<int8_t, 2> LoadU(Simd<int8_t, 2> /*tag*/,
+                                   const int8_t* HWY_RESTRICT p) {
+  int16x4_t a = Undefined(Simd<int16_t, 4>()).raw;
+  int16x4_t b = vld1_lane_s16(reinterpret_cast<const int16_t*>(p), a, 0);
+  return Vec128<int8_t, 2>(vreinterpret_s8_s16(b));
+}
+HWY_INLINE Vec128<int16_t, 1> LoadU(Simd<int16_t, 1> /*tag*/,
+                                    const int16_t* HWY_RESTRICT p) {
+  int16x4_t a = Undefined(Simd<int16_t, 4>()).raw;
+  int16x4_t b = vld1_lane_s16(p, a, 0);
+  return Vec128<int16_t, 1>(b);
+}
+
+// ------------------------------ Load 8
+
+HWY_INLINE Vec128<uint8_t, 1> LoadU(Simd<uint8_t, 1> d,
+                                    const uint8_t* HWY_RESTRICT p) {
+  uint8x8_t a = Undefined(d).raw;
+  uint8x8_t b = vld1_lane_u8(p, a, 0);
+  return Vec128<uint8_t, 1>(b);
+}
+
+HWY_INLINE Vec128<int8_t, 1> LoadU(Simd<int8_t, 1> d,
+                                   const int8_t* HWY_RESTRICT p) {
+  int8x8_t a = Undefined(d).raw;
+  int8x8_t b = vld1_lane_s8(p, a, 0);
+  return Vec128<int8_t, 1>(b);
+}
+
+// float16_t uses the same Raw as uint16_t, so forward to that.
+template <size_t N>
+HWY_INLINE Vec128<float16_t, N> LoadU(Simd<float16_t, N> /*d*/,
+                                      const float16_t* HWY_RESTRICT p) {
+  const Simd<uint16_t, N> du16;
+  const auto pu16 = reinterpret_cast<const uint16_t*>(p);
+  return Vec128<float16_t, N>(LoadU(du16, pu16).raw);
+}
+
+// On ARM, Load is the same as LoadU.
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Load(Simd<T, N> d, const T* HWY_RESTRICT p) {
+  return LoadU(d, p);
+}
+
+// 128-bit SIMD => nothing to duplicate, same as an unaligned load.
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_INLINE Vec128<T, N> LoadDup128(Simd<T, N> d,
+                                   const T* const HWY_RESTRICT p) {
+  return LoadU(d, p);
+}
+
+// ------------------------------ Store 128
+
+HWY_INLINE void StoreU(const Vec128<uint8_t> v, Full128<uint8_t> /* tag */,
+                       uint8_t* HWY_RESTRICT aligned) {
+  vst1q_u8(aligned, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<uint16_t> v, Full128<uint16_t> /* tag */,
+                       uint16_t* HWY_RESTRICT aligned) {
+  vst1q_u16(aligned, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<uint32_t> v, Full128<uint32_t> /* tag */,
+                       uint32_t* HWY_RESTRICT aligned) {
+  vst1q_u32(aligned, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<uint64_t> v, Full128<uint64_t> /* tag */,
+                       uint64_t* HWY_RESTRICT aligned) {
+  vst1q_u64(aligned, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<int8_t> v, Full128<int8_t> /* tag */,
+                       int8_t* HWY_RESTRICT aligned) {
+  vst1q_s8(aligned, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<int16_t> v, Full128<int16_t> /* tag */,
+                       int16_t* HWY_RESTRICT aligned) {
+  vst1q_s16(aligned, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<int32_t> v, Full128<int32_t> /* tag */,
+                       int32_t* HWY_RESTRICT aligned) {
+  vst1q_s32(aligned, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<int64_t> v, Full128<int64_t> /* tag */,
+                       int64_t* HWY_RESTRICT aligned) {
+  vst1q_s64(aligned, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<float> v, Full128<float> /* tag */,
+                       float* HWY_RESTRICT aligned) {
+  vst1q_f32(aligned, v.raw);
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE void StoreU(const Vec128<double> v, Full128<double> /* tag */,
+                       double* HWY_RESTRICT aligned) {
+  vst1q_f64(aligned, v.raw);
+}
+#endif
+
+// ------------------------------ Store 64
+
+HWY_INLINE void StoreU(const Vec128<uint8_t, 8> v, Simd<uint8_t, 8> /* tag */,
+                       uint8_t* HWY_RESTRICT p) {
+  vst1_u8(p, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<uint16_t, 4> v, Simd<uint16_t, 4> /* tag */,
+                       uint16_t* HWY_RESTRICT p) {
+  vst1_u16(p, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<uint32_t, 2> v, Simd<uint32_t, 2> /* tag */,
+                       uint32_t* HWY_RESTRICT p) {
+  vst1_u32(p, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<uint64_t, 1> v, Simd<uint64_t, 1> /* tag */,
+                       uint64_t* HWY_RESTRICT p) {
+  vst1_u64(p, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<int8_t, 8> v, Simd<int8_t, 8> /* tag */,
+                       int8_t* HWY_RESTRICT p) {
+  vst1_s8(p, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<int16_t, 4> v, Simd<int16_t, 4> /* tag */,
+                       int16_t* HWY_RESTRICT p) {
+  vst1_s16(p, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<int32_t, 2> v, Simd<int32_t, 2> /* tag */,
+                       int32_t* HWY_RESTRICT p) {
+  vst1_s32(p, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<int64_t, 1> v, Simd<int64_t, 1> /* tag */,
+                       int64_t* HWY_RESTRICT p) {
+  vst1_s64(p, v.raw);
+}
+HWY_INLINE void StoreU(const Vec128<float, 2> v, Simd<float, 2> /* tag */,
+                       float* HWY_RESTRICT p) {
+  vst1_f32(p, v.raw);
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE void StoreU(const Vec128<double, 1> v, Simd<double, 1> /* tag */,
+                       double* HWY_RESTRICT p) {
+  vst1_f64(p, v.raw);
+}
+#endif
+
+// ------------------------------ Store 32
+
+HWY_INLINE void StoreU(const Vec128<uint8_t, 4> v, Simd<uint8_t, 4>,
+                       uint8_t* HWY_RESTRICT p) {
+  uint32x2_t a = vreinterpret_u32_u8(v.raw);
+  vst1_lane_u32(reinterpret_cast<uint32_t*>(p), a, 0);
+}
+HWY_INLINE void StoreU(const Vec128<uint16_t, 2> v, Simd<uint16_t, 2>,
+                       uint16_t* HWY_RESTRICT p) {
+  uint32x2_t a = vreinterpret_u32_u16(v.raw);
+  vst1_lane_u32(reinterpret_cast<uint32_t*>(p), a, 0);
+}
+HWY_INLINE void StoreU(const Vec128<uint32_t, 1> v, Simd<uint32_t, 1>,
+                       uint32_t* HWY_RESTRICT p) {
+  vst1_lane_u32(p, v.raw, 0);
+}
+HWY_INLINE void StoreU(const Vec128<int8_t, 4> v, Simd<int8_t, 4>,
+                       int8_t* HWY_RESTRICT p) {
+  int32x2_t a = vreinterpret_s32_s8(v.raw);
+  vst1_lane_s32(reinterpret_cast<int32_t*>(p), a, 0);
+}
+HWY_INLINE void StoreU(const Vec128<int16_t, 2> v, Simd<int16_t, 2>,
+                       int16_t* HWY_RESTRICT p) {
+  int32x2_t a = vreinterpret_s32_s16(v.raw);
+  vst1_lane_s32(reinterpret_cast<int32_t*>(p), a, 0);
+}
+HWY_INLINE void StoreU(const Vec128<int32_t, 1> v, Simd<int32_t, 1>,
+                       int32_t* HWY_RESTRICT p) {
+  vst1_lane_s32(p, v.raw, 0);
+}
+HWY_INLINE void StoreU(const Vec128<float, 1> v, Simd<float, 1>,
+                       float* HWY_RESTRICT p) {
+  vst1_lane_f32(p, v.raw, 0);
+}
+
+// ------------------------------ Store 16
+
+HWY_INLINE void StoreU(const Vec128<uint8_t, 2> v, Simd<uint8_t, 2>,
+                       uint8_t* HWY_RESTRICT p) {
+  uint16x4_t a = vreinterpret_u16_u8(v.raw);
+  vst1_lane_u16(reinterpret_cast<uint16_t*>(p), a, 0);
+}
+HWY_INLINE void StoreU(const Vec128<uint16_t, 1> v, Simd<uint16_t, 1>,
+                       uint16_t* HWY_RESTRICT p) {
+  vst1_lane_u16(p, v.raw, 0);
+}
+HWY_INLINE void StoreU(const Vec128<int8_t, 2> v, Simd<int8_t, 2>,
+                       int8_t* HWY_RESTRICT p) {
+  int16x4_t a = vreinterpret_s16_s8(v.raw);
+  vst1_lane_s16(reinterpret_cast<int16_t*>(p), a, 0);
+}
+HWY_INLINE void StoreU(const Vec128<int16_t, 1> v, Simd<int16_t, 1>,
+                       int16_t* HWY_RESTRICT p) {
+  vst1_lane_s16(p, v.raw, 0);
+}
+
+// ------------------------------ Store 8
+
+HWY_INLINE void StoreU(const Vec128<uint8_t, 1> v, Simd<uint8_t, 1>,
+                       uint8_t* HWY_RESTRICT p) {
+  vst1_lane_u8(p, v.raw, 0);
+}
+HWY_INLINE void StoreU(const Vec128<int8_t, 1> v, Simd<int8_t, 1>,
+                       int8_t* HWY_RESTRICT p) {
+  vst1_lane_s8(p, v.raw, 0);
+}
+
+// float16_t uses the same Raw as uint16_t, so forward to that.
+template <size_t N>
+HWY_API void StoreU(Vec128<float16_t, N> v, Simd<float16_t, N> /* tag */,
+                    float16_t* HWY_RESTRICT p) {
+  const Simd<uint16_t, N> du16;
+  const auto pu16 = reinterpret_cast<uint16_t*>(p);
+  return StoreU(Vec128<uint16_t, N>(v.raw), du16, pu16);
+}
+
+// On ARM, Store is the same as StoreU.
+template <typename T, size_t N>
+HWY_INLINE void Store(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT p) {
+  StoreU(v, d, p);
+}
+
+// ------------------------------ Non-temporal stores
+
+// Same as aligned stores on non-x86.
+
+template <typename T, size_t N>
+HWY_INLINE void Stream(const Vec128<T, N> v, Simd<T, N> d,
+                       T* HWY_RESTRICT aligned) {
+  Store(v, d, aligned);
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+// Unsigned: zero-extend to full vector.
+HWY_INLINE Vec128<uint16_t> PromoteTo(Full128<uint16_t> /* tag */,
+                                      const Vec128<uint8_t, 8> v) {
+  return Vec128<uint16_t>(vmovl_u8(v.raw));
+}
+HWY_INLINE Vec128<uint32_t> PromoteTo(Full128<uint32_t> /* tag */,
+                                      const Vec128<uint8_t, 4> v) {
+  uint16x8_t a = vmovl_u8(v.raw);
+  return Vec128<uint32_t>(vmovl_u16(vget_low_u16(a)));
+}
+HWY_INLINE Vec128<uint32_t> PromoteTo(Full128<uint32_t> /* tag */,
+                                      const Vec128<uint16_t, 4> v) {
+  return Vec128<uint32_t>(vmovl_u16(v.raw));
+}
+HWY_INLINE Vec128<uint64_t> PromoteTo(Full128<uint64_t> /* tag */,
+                                      const Vec128<uint32_t, 2> v) {
+  return Vec128<uint64_t>(vmovl_u32(v.raw));
+}
+HWY_INLINE Vec128<int16_t> PromoteTo(Full128<int16_t> d,
+                                     const Vec128<uint8_t, 8> v) {
+  return BitCast(d, Vec128<uint16_t>(vmovl_u8(v.raw)));
+}
+HWY_INLINE Vec128<int32_t> PromoteTo(Full128<int32_t> d,
+                                     const Vec128<uint8_t, 4> v) {
+  uint16x8_t a = vmovl_u8(v.raw);
+  return BitCast(d, Vec128<uint32_t>(vmovl_u16(vget_low_u16(a))));
+}
+HWY_INLINE Vec128<int32_t> PromoteTo(Full128<int32_t> d,
+                                     const Vec128<uint16_t, 4> v) {
+  return BitCast(d, Vec128<uint32_t>(vmovl_u16(v.raw)));
+}
+
+// Unsigned: zero-extend to half vector.
+template <size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint16_t, N> PromoteTo(Simd<uint16_t, N> /* tag */,
+                                         const Vec128<uint8_t, N> v) {
+  return Vec128<uint16_t, N>(vget_low_u16(vmovl_u8(v.raw)));
+}
+template <size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_INLINE Vec128<uint32_t, N> PromoteTo(Simd<uint32_t, N> /* tag */,
+                                         const Vec128<uint8_t, N> v) {
+  uint16x8_t a = vmovl_u8(v.raw);
+  return Vec128<uint32_t, N>(vget_low_u32(vmovl_u16(vget_low_u16(a))));
+}
+template <size_t N>
+HWY_INLINE Vec128<uint32_t, N> PromoteTo(Simd<uint32_t, N> /* tag */,
+                                         const Vec128<uint16_t, N> v) {
+  return Vec128<uint32_t, N>(vget_low_u32(vmovl_u16(v.raw)));
+}
+template <size_t N, HWY_IF_LE64(uint64_t, N)>
+HWY_INLINE Vec128<uint64_t, N> PromoteTo(Simd<uint64_t, N> /* tag */,
+                                         const Vec128<uint32_t, N> v) {
+  return Vec128<uint64_t, N>(vget_low_u64(vmovl_u32(v.raw)));
+}
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int16_t, N> PromoteTo(Simd<int16_t, N> d,
+                                        const Vec128<uint8_t, N> v) {
+  return BitCast(d, Vec128<uint16_t, N>(vget_low_u16(vmovl_u8(v.raw))));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                        const Vec128<uint8_t, N> v) {
+  uint16x8_t a = vmovl_u8(v.raw);
+  uint32x4_t b = vmovl_u16(vget_low_u16(a));
+  return Vec128<int32_t, N>(vget_low_s32(vreinterpretq_s32_u32(b)));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                        const Vec128<uint16_t, N> v) {
+  uint32x4_t a = vmovl_u16(v.raw);
+  return Vec128<int32_t, N>(vget_low_s32(vreinterpretq_s32_u32(a)));
+}
+
+// Signed: replicate sign bit to full vector.
+HWY_INLINE Vec128<int16_t> PromoteTo(Full128<int16_t> /* tag */,
+                                     const Vec128<int8_t, 8> v) {
+  return Vec128<int16_t>(vmovl_s8(v.raw));
+}
+HWY_INLINE Vec128<int32_t> PromoteTo(Full128<int32_t> /* tag */,
+                                     const Vec128<int8_t, 4> v) {
+  int16x8_t a = vmovl_s8(v.raw);
+  return Vec128<int32_t>(vmovl_s16(vget_low_s16(a)));
+}
+HWY_INLINE Vec128<int32_t> PromoteTo(Full128<int32_t> /* tag */,
+                                     const Vec128<int16_t, 4> v) {
+  return Vec128<int32_t>(vmovl_s16(v.raw));
+}
+HWY_INLINE Vec128<int64_t> PromoteTo(Full128<int64_t> /* tag */,
+                                     const Vec128<int32_t, 2> v) {
+  return Vec128<int64_t>(vmovl_s32(v.raw));
+}
+
+// Signed: replicate sign bit to half vector.
+template <size_t N>
+HWY_INLINE Vec128<int16_t, N> PromoteTo(Simd<int16_t, N> /* tag */,
+                                        const Vec128<int8_t, N> v) {
+  return Vec128<int16_t, N>(vget_low_s16(vmovl_s8(v.raw)));
+}
+template <size_t N>
+HWY_INLINE Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                        const Vec128<int8_t, N> v) {
+  int16x8_t a = vmovl_s8(v.raw);
+  int32x4_t b = vmovl_s16(vget_low_s16(a));
+  return Vec128<int32_t, N>(vget_low_s32(b));
+}
+template <size_t N>
+HWY_INLINE Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                        const Vec128<int16_t, N> v) {
+  return Vec128<int32_t, N>(vget_low_s32(vmovl_s16(v.raw)));
+}
+template <size_t N>
+HWY_INLINE Vec128<int64_t, N> PromoteTo(Simd<int64_t, N> /* tag */,
+                                        const Vec128<int32_t, N> v) {
+  return Vec128<int64_t, N>(vget_low_s64(vmovl_s32(v.raw)));
+}
+
+#if __ARM_FP & 2
+
+HWY_INLINE Vec128<float> PromoteTo(Full128<float> /* tag */,
+                                   const Vec128<float16_t, 4> v) {
+  const float32x4_t f32 = vcvt_f32_f16(vreinterpret_f16_u16(v.raw));
+  return Vec128<float>(f32);
+}
+template <size_t N>
+HWY_INLINE Vec128<float, N> PromoteTo(Simd<float, N> /* tag */,
+                                      const Vec128<float16_t, N> v) {
+  const float32x4_t f32 = vcvt_f32_f16(vreinterpret_f16_u16(v.raw));
+  return Vec128<float, N>(vget_low_f32(f32));
+}
+
+#else
+
+template <size_t N>
+HWY_INLINE Vec128<float, N> PromoteTo(Simd<float, N> /* tag */,
+                                      const Vec128<float16_t, N> v) {
+  const Simd<int32_t, N> di32;
+  const Simd<uint32_t, N> du32;
+  const Simd<float, N> df32;
+  // Expand to u32 so we can shift.
+  const auto bits16 = PromoteTo(du32, Vec128<uint16_t, N>{v.raw});
+  const auto sign = ShiftRight<15>(bits16);
+  const auto biased_exp = ShiftRight<10>(bits16) & Set(du32, 0x1F);
+  const auto mantissa = bits16 & Set(du32, 0x3FF);
+  const auto subnormal =
+      BitCast(du32, ConvertTo(df32, BitCast(di32, mantissa)) *
+                        Set(df32, 1.0f / 16384 / 1024));
+
+  const auto biased_exp32 = biased_exp + Set(du32, 127 - 15);
+  const auto mantissa32 = ShiftLeft<23 - 10>(mantissa);
+  const auto normal = ShiftLeft<23>(biased_exp32) | mantissa32;
+  const auto bits32 = IfThenElse(biased_exp == Zero(du32), subnormal, normal);
+  return BitCast(df32, ShiftLeft<31>(sign) | bits32);
+}
+
+#endif
+
+#if HWY_ARCH_ARM_A64
+
+HWY_INLINE Vec128<double> PromoteTo(Full128<double> /* tag */,
+                                    const Vec128<float, 2> v) {
+  return Vec128<double>(vcvt_f64_f32(v.raw));
+}
+
+HWY_INLINE Vec128<double, 1> PromoteTo(Simd<double, 1> /* tag */,
+                                       const Vec128<float, 1> v) {
+  return Vec128<double, 1>(vget_low_f64(vcvt_f64_f32(v.raw)));
+}
+
+HWY_INLINE Vec128<double> PromoteTo(Full128<double> /* tag */,
+                                    const Vec128<int32_t, 2> v) {
+  const int64x2_t i64 = vmovl_s32(v.raw);
+  return Vec128<double>(vcvtq_f64_s64(i64));
+}
+
+HWY_INLINE Vec128<double, 1> PromoteTo(Simd<double, 1> /* tag */,
+                                       const Vec128<int32_t, 1> v) {
+  const int64x1_t i64 = vget_low_s64(vmovl_s32(v.raw));
+  return Vec128<double, 1>(vcvt_f64_s64(i64));
+}
+
+#endif
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+// From full vector to half or quarter
+HWY_INLINE Vec128<uint16_t, 4> DemoteTo(Simd<uint16_t, 4> /* tag */,
+                                        const Vec128<int32_t> v) {
+  return Vec128<uint16_t, 4>(vqmovun_s32(v.raw));
+}
+HWY_INLINE Vec128<int16_t, 4> DemoteTo(Simd<int16_t, 4> /* tag */,
+                                       const Vec128<int32_t> v) {
+  return Vec128<int16_t, 4>(vqmovn_s32(v.raw));
+}
+HWY_INLINE Vec128<uint8_t, 4> DemoteTo(Simd<uint8_t, 4> /* tag */,
+                                       const Vec128<int32_t> v) {
+  const uint16x4_t a = vqmovun_s32(v.raw);
+  return Vec128<uint8_t, 4>(vqmovn_u16(vcombine_u16(a, a)));
+}
+HWY_INLINE Vec128<uint8_t, 8> DemoteTo(Simd<uint8_t, 8> /* tag */,
+                                       const Vec128<int16_t> v) {
+  return Vec128<uint8_t, 8>(vqmovun_s16(v.raw));
+}
+HWY_INLINE Vec128<int8_t, 4> DemoteTo(Simd<int8_t, 4> /* tag */,
+                                      const Vec128<int32_t> v) {
+  const int16x4_t a = vqmovn_s32(v.raw);
+  return Vec128<int8_t, 4>(vqmovn_s16(vcombine_s16(a, a)));
+}
+HWY_INLINE Vec128<int8_t, 8> DemoteTo(Simd<int8_t, 8> /* tag */,
+                                      const Vec128<int16_t> v) {
+  return Vec128<int8_t, 8>(vqmovn_s16(v.raw));
+}
+
+// From half vector to partial half
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<uint16_t, N> DemoteTo(Simd<uint16_t, N> /* tag */,
+                                        const Vec128<int32_t, N> v) {
+  return Vec128<uint16_t, N>(vqmovun_s32(vcombine_s32(v.raw, v.raw)));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int16_t, N> DemoteTo(Simd<int16_t, N> /* tag */,
+                                       const Vec128<int32_t, N> v) {
+  return Vec128<int16_t, N>(vqmovn_s32(vcombine_s32(v.raw, v.raw)));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<uint8_t, N> DemoteTo(Simd<uint8_t, N> /* tag */,
+                                       const Vec128<int32_t, N> v) {
+  const uint16x4_t a = vqmovun_s32(vcombine_s32(v.raw, v.raw));
+  return Vec128<uint8_t, N>(vqmovn_u16(vcombine_u16(a, a)));
+}
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<uint8_t, N> DemoteTo(Simd<uint8_t, N> /* tag */,
+                                       const Vec128<int16_t, N> v) {
+  return Vec128<uint8_t, N>(vqmovun_s16(vcombine_s16(v.raw, v.raw)));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int8_t, N> DemoteTo(Simd<int8_t, N> /* tag */,
+                                      const Vec128<int32_t, N> v) {
+  const int16x4_t a = vqmovn_s32(vcombine_s32(v.raw, v.raw));
+  return Vec128<int8_t, N>(vqmovn_s16(vcombine_s16(a, a)));
+}
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int8_t, N> DemoteTo(Simd<int8_t, N> /* tag */,
+                                      const Vec128<int16_t, N> v) {
+  return Vec128<int8_t, N>(vqmovn_s16(vcombine_s16(v.raw, v.raw)));
+}
+
+#if __ARM_FP & 2
+
+HWY_INLINE Vec128<float16_t, 4> DemoteTo(Simd<float16_t, 4> /* tag */,
+                                         const Vec128<float> v) {
+  return Vec128<float16_t, 4>{vreinterpret_u16_f16(vcvt_f16_f32(v.raw))};
+}
+template <size_t N>
+HWY_INLINE Vec128<float16_t, N> DemoteTo(Simd<float16_t, N> /* tag */,
+                                         const Vec128<float, N> v) {
+  const float16x4_t f16 = vcvt_f16_f32(vcombine_f32(v.raw, v.raw));
+  return Vec128<float16_t, N>(vreinterpret_u16_f16(f16));
+}
+
+#else
+
+template <size_t N>
+HWY_INLINE Vec128<float16_t, N> DemoteTo(Simd<float16_t, N> /* tag */,
+                                         const Vec128<float, N> v) {
+  const Simd<int32_t, N> di;
+  const Simd<uint32_t, N> du;
+  const Simd<uint16_t, N> du16;
+  const auto bits32 = BitCast(du, v);
+  const auto sign = ShiftRight<31>(bits32);
+  const auto biased_exp32 = ShiftRight<23>(bits32) & Set(du, 0xFF);
+  const auto mantissa32 = bits32 & Set(du, 0x7FFFFF);
+
+  const auto k15 = Set(di, 15);
+  const auto exp = Min(BitCast(di, biased_exp32) - Set(di, 127), k15);
+  const auto is_tiny = exp < Set(di, -24);
+
+  const auto is_subnormal = exp < Set(di, -14);
+  const auto biased_exp16 =
+      BitCast(du, IfThenZeroElse(is_subnormal, exp + k15));
+  const auto sub_exp = BitCast(du, Set(di, -14) - exp);  // [1, 11)
+  const auto sub_m = (Set(du, 1) << (Set(du, 10) - sub_exp)) +
+                     (mantissa32 >> (Set(du, 13) + sub_exp));
+  const auto mantissa16 = IfThenElse(RebindMask(du, is_subnormal), sub_m,
+                                     ShiftRight<13>(mantissa32));  // <1024
+
+  const auto sign16 = ShiftLeft<15>(sign);
+  const auto normal16 = sign16 | ShiftLeft<10>(biased_exp16) | mantissa16;
+  const auto bits16 = IfThenZeroElse(is_tiny, BitCast(di, normal16));
+  return Vec128<float16_t, N>(DemoteTo(du16, bits16).raw);
+}
+
+#endif
+#if HWY_ARCH_ARM_A64
+
+HWY_INLINE Vec128<float, 2> DemoteTo(Simd<float, 2> /* tag */,
+                                     const Vec128<double> v) {
+  return Vec128<float, 2>(vcvt_f32_f64(v.raw));
+}
+HWY_INLINE Vec128<float, 1> DemoteTo(Simd<float, 1> /* tag */,
+                                     const Vec128<double, 1> v) {
+  return Vec128<float, 1>(vcvt_f32_f64(vcombine_f64(v.raw, v.raw)));
+}
+
+HWY_INLINE Vec128<int32_t, 2> DemoteTo(Simd<int32_t, 2> /* tag */,
+                                       const Vec128<double> v) {
+  const int64x2_t i64 = vcvtq_s64_f64(v.raw);
+  return Vec128<int32_t, 2>(vqmovn_s64(i64));
+}
+HWY_INLINE Vec128<int32_t, 1> DemoteTo(Simd<int32_t, 1> /* tag */,
+                                       const Vec128<double, 1> v) {
+  const int64x1_t i64 = vcvt_s64_f64(v.raw);
+  // There is no i64x1 -> i32x1 narrow, so expand to int64x2_t first.
+  const int64x2_t i64x2 = vcombine_s64(i64, i64);
+  return Vec128<int32_t, 1>(vqmovn_s64(i64x2));
+}
+
+#endif
+
+HWY_API Vec128<uint8_t, 4> U8FromU32(const Vec128<uint32_t> v) {
+  const uint8x16_t org_v = detail::BitCastToByte(v).raw;
+  const uint8x16_t w = vuzp1q_u8(org_v, org_v);
+  return Vec128<uint8_t, 4>(vget_low_u8(vuzp1q_u8(w, w)));
+}
+template <size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_API Vec128<uint8_t, N> U8FromU32(const Vec128<uint32_t, N> v) {
+  const uint8x8_t org_v = detail::BitCastToByte(v).raw;
+  const uint8x8_t w = vuzp1_u8(org_v, org_v);
+  return Vec128<uint8_t, N>(vuzp1_u8(w, w));
+}
+
+// In the following DemoteTo functions, |b| is purposely undefined.
+// The value a needs to be extended to 128 bits so that vqmovn can be
+// used and |b| is undefined so that no extra overhead is introduced.
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wuninitialized")
+
+template <size_t N>
+HWY_INLINE Vec128<uint8_t, N> DemoteTo(Simd<uint8_t, N> /* tag */,
+                                       const Vec128<int32_t> v) {
+  Vec128<uint16_t, N> a = DemoteTo(Simd<uint16_t, N>(), v);
+  Vec128<uint16_t, N> b;
+  uint16x8_t c = vcombine_u16(a.raw, b.raw);
+  return Vec128<uint8_t, N>(vqmovn_u16(c));
+}
+
+template <size_t N>
+HWY_INLINE Vec128<int8_t, N> DemoteTo(Simd<int8_t, N> /* tag */,
+                                      const Vec128<int32_t> v) {
+  Vec128<int16_t, N> a = DemoteTo(Simd<int16_t, N>(), v);
+  Vec128<int16_t, N> b;
+  int16x8_t c = vcombine_s16(a.raw, b.raw);
+  return Vec128<int8_t, N>(vqmovn_s16(c));
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ------------------------------ Convert integer <=> floating-point
+
+HWY_INLINE Vec128<float> ConvertTo(Full128<float> /* tag */,
+                                   const Vec128<int32_t> v) {
+  return Vec128<float>(vcvtq_f32_s32(v.raw));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<float, N> ConvertTo(Simd<float, N> /* tag */,
+                                      const Vec128<int32_t, N> v) {
+  return Vec128<float, N>(vcvt_f32_s32(v.raw));
+}
+
+// Truncates (rounds toward zero).
+HWY_INLINE Vec128<int32_t> ConvertTo(Full128<int32_t> /* tag */,
+                                     const Vec128<float> v) {
+  return Vec128<int32_t>(vcvtq_s32_f32(v.raw));
+}
+template <size_t N, HWY_IF_LE64(float, N)>
+HWY_INLINE Vec128<int32_t, N> ConvertTo(Simd<int32_t, N> /* tag */,
+                                        const Vec128<float, N> v) {
+  return Vec128<int32_t, N>(vcvt_s32_f32(v.raw));
+}
+
+#if HWY_ARCH_ARM_A64
+
+HWY_INLINE Vec128<double> ConvertTo(Full128<double> /* tag */,
+                                    const Vec128<int64_t> v) {
+  return Vec128<double>(vcvtq_f64_s64(v.raw));
+}
+HWY_INLINE Vec128<double, 1> ConvertTo(Simd<double, 1> /* tag */,
+                                       const Vec128<int64_t, 1> v) {
+  return Vec128<double, 1>(vcvt_f64_s64(v.raw));
+}
+
+// Truncates (rounds toward zero).
+HWY_INLINE Vec128<int64_t> ConvertTo(Full128<int64_t> /* tag */,
+                                     const Vec128<double> v) {
+  return Vec128<int64_t>(vcvtq_s64_f64(v.raw));
+}
+HWY_INLINE Vec128<int64_t, 1> ConvertTo(Simd<int64_t, 1> /* tag */,
+                                        const Vec128<double, 1> v) {
+  return Vec128<int64_t, 1>(vcvt_s64_f64(v.raw));
+}
+
+#endif
+
+// ------------------------------ Round (IfThenElse, mask, logical)
+
+#if HWY_ARCH_ARM_A64
+// Toward nearest integer
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Round, vrndn, _, 1)
+
+// Toward zero, aka truncate
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Trunc, vrnd, _, 1)
+
+// Toward +infinity, aka ceiling
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Ceil, vrndp, _, 1)
+
+// Toward -infinity, aka floor
+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Floor, vrndm, _, 1)
+#else
+
+// ------------------------------ Trunc
+
+// ARMv7 only supports truncation to integer. We can either convert back to
+// float (3 floating-point and 2 logic operations) or manipulate the binary32
+// representation, clearing the lowest 23-exp mantissa bits. This requires 9
+// integer operations and 3 constants, which is likely more expensive.
+
+namespace detail {
+
+// The original value is already the desired result if NaN or the magnitude is
+// large (i.e. the value is already an integer).
+template <size_t N>
+HWY_API Mask128<float, N> UseInt(const Vec128<float, N> v) {
+  return Abs(v) < Set(Simd<float, N>(), MantissaEnd<float>());
+}
+
+}  // namespace detail
+
+template <size_t N>
+HWY_INLINE Vec128<float, N> Trunc(const Vec128<float, N> v) {
+  const Simd<float, N> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), int_f, v);
+}
+
+template <size_t N>
+HWY_INLINE Vec128<float, N> Round(const Vec128<float, N> v) {
+  const Simd<float, N> df;
+
+  // ARMv7 also lacks a native NearestInt, but we can instead rely on rounding
+  // (we assume the current mode is nearest-even) after addition with a large
+  // value such that no mantissa bits remain. We may need a compiler flag for
+  // precise floating-point to prevent this from being "optimized" out.
+  const auto max = Set(df, MantissaEnd<float>());
+  const auto large = CopySignToAbs(max, v);
+  const auto added = large + v;
+  const auto rounded = added - large;
+
+  // Keep original if NaN or the magnitude is large (already an int).
+  return IfThenElse(Abs(v) < max, rounded, v);
+}
+
+template <size_t N>
+HWY_INLINE Vec128<float, N> Ceil(const Vec128<float, N> v) {
+  const Simd<float, N> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  // Truncating a positive non-integer ends up smaller; if so, add 1.
+  const auto neg1 = ConvertTo(df, VecFromMask(di, RebindMask(di, int_f < v)));
+
+  return IfThenElse(detail::UseInt(v), int_f - neg1, v);
+}
+
+template <size_t N>
+HWY_INLINE Vec128<float, N> Floor(const Vec128<float, N> v) {
+  const Simd<float, N> df;
+  const Simd<int32_t, N> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  // Truncating a negative non-integer ends up larger; if so, subtract 1.
+  const auto neg1 = ConvertTo(df, VecFromMask(di, RebindMask(di, int_f > v)));
+
+  return IfThenElse(detail::UseInt(v), int_f + neg1, v);
+}
+
+#endif
+
+// ------------------------------ NearestInt (Round)
+
+#if HWY_ARCH_ARM_A64
+
+HWY_INLINE Vec128<int32_t> NearestInt(const Vec128<float> v) {
+  return Vec128<int32_t>(vcvtnq_s32_f32(v.raw));
+}
+template <size_t N, HWY_IF_LE64(float, N)>
+HWY_INLINE Vec128<int32_t, N> NearestInt(const Vec128<float, N> v) {
+  return Vec128<int32_t, N>(vcvtn_s32_f32(v.raw));
+}
+
+#else
+
+template <size_t N>
+HWY_INLINE Vec128<int32_t, N> NearestInt(const Vec128<float, N> v) {
+  const Simd<int32_t, N> di;
+  return ConvertTo(di, Round(v));
+}
+
+#endif
+
+// ================================================== SWIZZLE
+
+// ------------------------------ Extract half
+
+// <= 64 bit: just return different type
+template <typename T, size_t N, HWY_IF_LE64(uint8_t, N)>
+HWY_INLINE Vec128<T, N / 2> LowerHalf(const Vec128<T, N> v) {
+  return Vec128<T, N / 2>(v.raw);
+}
+
+HWY_INLINE Vec128<uint8_t, 8> LowerHalf(const Vec128<uint8_t> v) {
+  return Vec128<uint8_t, 8>(vget_low_u8(v.raw));
+}
+HWY_INLINE Vec128<uint16_t, 4> LowerHalf(const Vec128<uint16_t> v) {
+  return Vec128<uint16_t, 4>(vget_low_u16(v.raw));
+}
+HWY_INLINE Vec128<uint32_t, 2> LowerHalf(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t, 2>(vget_low_u32(v.raw));
+}
+HWY_INLINE Vec128<uint64_t, 1> LowerHalf(const Vec128<uint64_t> v) {
+  return Vec128<uint64_t, 1>(vget_low_u64(v.raw));
+}
+HWY_INLINE Vec128<int8_t, 8> LowerHalf(const Vec128<int8_t> v) {
+  return Vec128<int8_t, 8>(vget_low_s8(v.raw));
+}
+HWY_INLINE Vec128<int16_t, 4> LowerHalf(const Vec128<int16_t> v) {
+  return Vec128<int16_t, 4>(vget_low_s16(v.raw));
+}
+HWY_INLINE Vec128<int32_t, 2> LowerHalf(const Vec128<int32_t> v) {
+  return Vec128<int32_t, 2>(vget_low_s32(v.raw));
+}
+HWY_INLINE Vec128<int64_t, 1> LowerHalf(const Vec128<int64_t> v) {
+  return Vec128<int64_t, 1>(vget_low_s64(v.raw));
+}
+HWY_INLINE Vec128<float, 2> LowerHalf(const Vec128<float> v) {
+  return Vec128<float, 2>(vget_low_f32(v.raw));
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double, 1> LowerHalf(const Vec128<double> v) {
+  return Vec128<double, 1>(vget_low_f64(v.raw));
+}
+#endif
+
+HWY_INLINE Vec128<uint8_t, 8> UpperHalf(const Vec128<uint8_t> v) {
+  return Vec128<uint8_t, 8>(vget_high_u8(v.raw));
+}
+HWY_INLINE Vec128<uint16_t, 4> UpperHalf(const Vec128<uint16_t> v) {
+  return Vec128<uint16_t, 4>(vget_high_u16(v.raw));
+}
+HWY_INLINE Vec128<uint32_t, 2> UpperHalf(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t, 2>(vget_high_u32(v.raw));
+}
+HWY_INLINE Vec128<uint64_t, 1> UpperHalf(const Vec128<uint64_t> v) {
+  return Vec128<uint64_t, 1>(vget_high_u64(v.raw));
+}
+HWY_INLINE Vec128<int8_t, 8> UpperHalf(const Vec128<int8_t> v) {
+  return Vec128<int8_t, 8>(vget_high_s8(v.raw));
+}
+HWY_INLINE Vec128<int16_t, 4> UpperHalf(const Vec128<int16_t> v) {
+  return Vec128<int16_t, 4>(vget_high_s16(v.raw));
+}
+HWY_INLINE Vec128<int32_t, 2> UpperHalf(const Vec128<int32_t> v) {
+  return Vec128<int32_t, 2>(vget_high_s32(v.raw));
+}
+HWY_INLINE Vec128<int64_t, 1> UpperHalf(const Vec128<int64_t> v) {
+  return Vec128<int64_t, 1>(vget_high_s64(v.raw));
+}
+HWY_INLINE Vec128<float, 2> UpperHalf(const Vec128<float> v) {
+  return Vec128<float, 2>(vget_high_f32(v.raw));
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double, 1> UpperHalf(const Vec128<double> v) {
+  return Vec128<double, 1>(vget_high_f64(v.raw));
+}
+#endif
+
+// ------------------------------ Extract from 2x 128-bit at constant offset
+
+// Extracts 128 bits from <hi, lo> by skipping the least-significant kBytes.
+template <int kBytes, typename T>
+HWY_INLINE Vec128<T> CombineShiftRightBytes(const Vec128<T> hi,
+                                            const Vec128<T> lo) {
+  static_assert(0 < kBytes && kBytes < 16, "kBytes must be in [1, 15]");
+  const Full128<uint8_t> d8;
+  return BitCast(Full128<T>(),
+                 Vec128<uint8_t>(vextq_u8(BitCast(d8, lo).raw,
+                                          BitCast(d8, hi).raw, kBytes)));
+}
+
+// ------------------------------ Shift vector by constant #bytes
+
+namespace detail {
+
+// Need to partially specialize because CombineShiftRightBytes<16> and <0> are
+// compile errors.
+template <int kBytes>
+struct ShiftLeftBytesT {
+  template <class T, size_t N>
+  HWY_INLINE Vec128<T, N> operator()(const Vec128<T, N> v) {
+    return CombineShiftRightBytes<16 - kBytes>(v, Zero(Full128<T>()));
+  }
+};
+template <>
+struct ShiftLeftBytesT<0> {
+  template <class T, size_t N>
+  HWY_INLINE Vec128<T, N> operator()(const Vec128<T, N> v) {
+    return v;
+  }
+};
+
+template <int kBytes>
+struct ShiftRightBytesT {
+  template <class T, size_t N>
+  HWY_INLINE Vec128<T, N> operator()(const Vec128<T, N> v) {
+    return CombineShiftRightBytes<kBytes>(Zero(Full128<T>()), v);
+  }
+};
+template <>
+struct ShiftRightBytesT<0> {
+  template <class T, size_t N>
+  HWY_INLINE Vec128<T, N> operator()(const Vec128<T, N> v) {
+    return v;
+  }
+};
+
+}  // namespace detail
+
+// 0x01..0F, kBytes = 1 => 0x02..0F00
+template <int kBytes, typename T, size_t N>
+HWY_INLINE Vec128<T, N> ShiftLeftBytes(const Vec128<T, N> v) {
+  return detail::ShiftLeftBytesT<kBytes>()(v);
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_INLINE Vec128<T, N> ShiftLeftLanes(const Vec128<T, N> v) {
+  const Simd<uint8_t, N * sizeof(T)> d8;
+  const Simd<T, N> d;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, typename T, size_t N>
+HWY_INLINE Vec128<T, N> ShiftRightBytes(const Vec128<T, N> v) {
+  return detail::ShiftRightBytesT<kBytes>()(v);
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_INLINE Vec128<T, N> ShiftRightLanes(const Vec128<T, N> v) {
+  const Simd<uint8_t, N * sizeof(T)> d8;
+  const Simd<T, N> d;
+  return BitCast(d, ShiftRightBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+#if HWY_ARCH_ARM_A64
+// Unsigned
+template <int kLane>
+HWY_INLINE Vec128<uint16_t> Broadcast(const Vec128<uint16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<uint16_t>(vdupq_laneq_u16(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint16_t, N> Broadcast(const Vec128<uint16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint16_t, N>(vdup_lane_u16(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<uint32_t> Broadcast(const Vec128<uint32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<uint32_t>(vdupq_laneq_u32(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_INLINE Vec128<uint32_t, N> Broadcast(const Vec128<uint32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint32_t, N>(vdup_lane_u32(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<uint64_t> Broadcast(const Vec128<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<uint64_t>(vdupq_laneq_u64(v.raw, kLane));
+}
+// Vec128<uint64_t, 1> is defined below.
+
+// Signed
+template <int kLane>
+HWY_INLINE Vec128<int16_t> Broadcast(const Vec128<int16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<int16_t>(vdupq_laneq_s16(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int16_t, N> Broadcast(const Vec128<int16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int16_t, N>(vdup_lane_s16(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<int32_t> Broadcast(const Vec128<int32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<int32_t>(vdupq_laneq_s32(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int32_t, N> Broadcast(const Vec128<int32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int32_t, N>(vdup_lane_s32(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<int64_t> Broadcast(const Vec128<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<int64_t>(vdupq_laneq_s64(v.raw, kLane));
+}
+// Vec128<int64_t, 1> is defined below.
+
+// Float
+template <int kLane>
+HWY_INLINE Vec128<float> Broadcast(const Vec128<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<float>(vdupq_laneq_f32(v.raw, kLane));
+}
+template <int kLane, size_t N, HWY_IF_LE64(float, N)>
+HWY_INLINE Vec128<float, N> Broadcast(const Vec128<float, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<float, N>(vdup_lane_f32(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<double> Broadcast(const Vec128<double> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<double>(vdupq_laneq_f64(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<double, 1> Broadcast(const Vec128<double, 1> v) {
+  static_assert(0 <= kLane && kLane < 1, "Invalid lane");
+  return v;
+}
+
+#else
+// No vdupq_laneq_* on armv7: use vgetq_lane_* + vdupq_n_*.
+
+// Unsigned
+template <int kLane>
+HWY_INLINE Vec128<uint16_t> Broadcast(const Vec128<uint16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<uint16_t>(vdupq_n_u16(vgetq_lane_u16(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint16_t, N> Broadcast(const Vec128<uint16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint16_t, N>(vdup_lane_u16(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<uint32_t> Broadcast(const Vec128<uint32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<uint32_t>(vdupq_n_u32(vgetq_lane_u32(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_INLINE Vec128<uint32_t, N> Broadcast(const Vec128<uint32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint32_t, N>(vdup_lane_u32(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<uint64_t> Broadcast(const Vec128<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<uint64_t>(vdupq_n_u64(vgetq_lane_u64(v.raw, kLane)));
+}
+// Vec128<uint64_t, 1> is defined below.
+
+// Signed
+template <int kLane>
+HWY_INLINE Vec128<int16_t> Broadcast(const Vec128<int16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return Vec128<int16_t>(vdupq_n_s16(vgetq_lane_s16(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int16_t, N> Broadcast(const Vec128<int16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int16_t, N>(vdup_lane_s16(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<int32_t> Broadcast(const Vec128<int32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<int32_t>(vdupq_n_s32(vgetq_lane_s32(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int32_t, N> Broadcast(const Vec128<int32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int32_t, N>(vdup_lane_s32(v.raw, kLane));
+}
+template <int kLane>
+HWY_INLINE Vec128<int64_t> Broadcast(const Vec128<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec128<int64_t>(vdupq_n_s64(vgetq_lane_s64(v.raw, kLane)));
+}
+// Vec128<int64_t, 1> is defined below.
+
+// Float
+template <int kLane>
+HWY_INLINE Vec128<float> Broadcast(const Vec128<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec128<float>(vdupq_n_f32(vgetq_lane_f32(v.raw, kLane)));
+}
+template <int kLane, size_t N, HWY_IF_LE64(float, N)>
+HWY_INLINE Vec128<float, N> Broadcast(const Vec128<float, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<float, N>(vdup_lane_f32(v.raw, kLane));
+}
+
+#endif
+
+template <int kLane>
+HWY_INLINE Vec128<uint64_t, 1> Broadcast(const Vec128<uint64_t, 1> v) {
+  static_assert(0 <= kLane && kLane < 1, "Invalid lane");
+  return v;
+}
+template <int kLane>
+HWY_INLINE Vec128<int64_t, 1> Broadcast(const Vec128<int64_t, 1> v) {
+  static_assert(0 <= kLane && kLane < 1, "Invalid lane");
+  return v;
+}
+
+// ------------------------------ Shuffle bytes with variable indices
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes, i.e.
+// lane indices in [0, 16).
+template <typename T>
+HWY_API Vec128<T> TableLookupBytes(const Vec128<T> bytes,
+                                   const Vec128<T> from) {
+  const Full128<T> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+#if HWY_ARCH_ARM_A64
+  return BitCast(d, Vec128<uint8_t>(vqtbl1q_u8(BitCast(d8, bytes).raw,
+                                               BitCast(d8, from).raw)));
+#else
+  uint8x16_t table0 = BitCast(d8, bytes).raw;
+  uint8x8x2_t table;
+  table.val[0] = vget_low_u8(table0);
+  table.val[1] = vget_high_u8(table0);
+  uint8x16_t idx = BitCast(d8, from).raw;
+  uint8x8_t low = vtbl2_u8(table, vget_low_u8(idx));
+  uint8x8_t hi = vtbl2_u8(table, vget_high_u8(idx));
+  return BitCast(d, Vec128<uint8_t>(vcombine_u8(low, hi)));
+#endif
+}
+
+template <typename T, size_t N, typename TI, HWY_IF_LE64(T, N)>
+HWY_INLINE Vec128<T, N> TableLookupBytes(
+    const Vec128<T, N> bytes,
+    const Vec128<TI, N * sizeof(T) / sizeof(TI)> from) {
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, decltype(Zero(d8))(vtbl1_u8(BitCast(d8, bytes).raw,
+                                                BitCast(d8, from).raw)));
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T, size_t N>
+struct Indices128 {
+  typename detail::Raw128<T, N>::type raw;
+};
+
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_INLINE Indices128<T, N> SetTableIndices(Simd<T, N> d, const int32_t* idx) {
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER)
+  for (size_t i = 0; i < N; ++i) {
+    HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast<int32_t>(N));
+  }
+#endif
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) uint8_t control[16] = {0};
+  for (size_t idx_lane = 0; idx_lane < N; ++idx_lane) {
+    for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) {
+      control[idx_lane * sizeof(T) + idx_byte] =
+          static_cast<uint8_t>(idx[idx_lane] * sizeof(T) + idx_byte);
+    }
+  }
+  return Indices128<T, N>{BitCast(d, Load(d8, control)).raw};
+}
+
+template <size_t N>
+HWY_INLINE Vec128<uint32_t, N> TableLookupLanes(
+    const Vec128<uint32_t, N> v, const Indices128<uint32_t, N> idx) {
+  return TableLookupBytes(v, Vec128<uint32_t, N>{idx.raw});
+}
+template <size_t N>
+HWY_INLINE Vec128<int32_t, N> TableLookupLanes(
+    const Vec128<int32_t, N> v, const Indices128<int32_t, N> idx) {
+  return TableLookupBytes(v, Vec128<int32_t, N>{idx.raw});
+}
+template <size_t N>
+HWY_INLINE Vec128<float, N> TableLookupLanes(const Vec128<float, N> v,
+                                             const Indices128<float, N> idx) {
+  const Simd<int32_t, N> di;
+  const auto idx_i = BitCast(di, Vec128<float, N>{idx.raw});
+  return BitCast(Simd<float, N>(), TableLookupBytes(BitCast(di, v), idx_i));
+}
+
+// ------------------------------ Other shuffles (TableLookupBytes)
+
+// Notation: let Vec128<int32_t> have lanes 3,2,1,0 (0 is least-significant).
+// Shuffle0321 rotates one lane to the right (the previous least-significant
+// lane is now most-significant). These could also be implemented via
+// CombineShiftRightBytes but the shuffle_abcd notation is more convenient.
+
+// Swap 64-bit halves
+template <typename T>
+HWY_INLINE Vec128<T> Shuffle1032(const Vec128<T> v) {
+  return CombineShiftRightBytes<8>(v, v);
+}
+template <typename T>
+HWY_INLINE Vec128<T> Shuffle01(const Vec128<T> v) {
+  return CombineShiftRightBytes<8>(v, v);
+}
+
+// Rotate right 32 bits
+template <typename T>
+HWY_INLINE Vec128<T> Shuffle0321(const Vec128<T> v) {
+  return CombineShiftRightBytes<4>(v, v);
+}
+
+// Rotate left 32 bits
+template <typename T>
+HWY_INLINE Vec128<T> Shuffle2103(const Vec128<T> v) {
+  return CombineShiftRightBytes<12>(v, v);
+}
+
+// Reverse
+template <typename T>
+HWY_INLINE Vec128<T> Shuffle0123(const Vec128<T> v) {
+  static_assert(sizeof(T) == 4,
+                "Shuffle0123 should only be applied to 32-bit types");
+  // TODO(janwas): more efficient implementation?,
+  // It is possible to use two instructions (vrev64q_u32 and vcombine_u32 of the
+  // high/low parts) instead of the extra memory and load.
+  static constexpr uint8_t bytes[16] = {12, 13, 14, 15, 8, 9, 10, 11,
+                                        4,  5,  6,  7,  0, 1, 2,  3};
+  const Full128<uint8_t> d8;
+  const Full128<T> d;
+  return TableLookupBytes(v, BitCast(d, Load(d8, bytes)));
+}
+
+// ------------------------------ Interleave lanes
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use ZipLower/Upper instead (also works with scalar).
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(InterleaveLower, vzip1, _, 2)
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(InterleaveLower, vzip1, _, 2)
+
+HWY_NEON_DEF_FUNCTION_INT_8_16_32(InterleaveUpper, vzip2, _, 2)
+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(InterleaveUpper, vzip2, _, 2)
+
+#if HWY_ARCH_ARM_A64
+// For 64 bit types, we only have the "q" version of the function defined as
+// interleaving 64-wide registers with 64-wide types in them makes no sense.
+HWY_INLINE Vec128<uint64_t> InterleaveLower(const Vec128<uint64_t> a,
+                                            const Vec128<uint64_t> b) {
+  return Vec128<uint64_t>(vzip1q_u64(a.raw, b.raw));
+}
+HWY_INLINE Vec128<int64_t> InterleaveLower(const Vec128<int64_t> a,
+                                           const Vec128<int64_t> b) {
+  return Vec128<int64_t>(vzip1q_s64(a.raw, b.raw));
+}
+
+HWY_INLINE Vec128<uint64_t> InterleaveUpper(const Vec128<uint64_t> a,
+                                            const Vec128<uint64_t> b) {
+  return Vec128<uint64_t>(vzip2q_u64(a.raw, b.raw));
+}
+HWY_INLINE Vec128<int64_t> InterleaveUpper(const Vec128<int64_t> a,
+                                           const Vec128<int64_t> b) {
+  return Vec128<int64_t>(vzip2q_s64(a.raw, b.raw));
+}
+#else
+// ARMv7 emulation.
+HWY_INLINE Vec128<uint64_t> InterleaveLower(const Vec128<uint64_t> a,
+                                            const Vec128<uint64_t> b) {
+  auto flip = CombineShiftRightBytes<8>(a, a);
+  return CombineShiftRightBytes<8>(b, flip);
+}
+HWY_INLINE Vec128<int64_t> InterleaveLower(const Vec128<int64_t> a,
+                                           const Vec128<int64_t> b) {
+  auto flip = CombineShiftRightBytes<8>(a, a);
+  return CombineShiftRightBytes<8>(b, flip);
+}
+
+HWY_INLINE Vec128<uint64_t> InterleaveUpper(const Vec128<uint64_t> a,
+                                            const Vec128<uint64_t> b) {
+  auto flip = CombineShiftRightBytes<8>(b, b);
+  return CombineShiftRightBytes<8>(flip, a);
+}
+HWY_INLINE Vec128<int64_t> InterleaveUpper(const Vec128<int64_t> a,
+                                           const Vec128<int64_t> b) {
+  auto flip = CombineShiftRightBytes<8>(b, b);
+  return CombineShiftRightBytes<8>(flip, a);
+}
+#endif
+
+// Floats
+HWY_INLINE Vec128<float> InterleaveLower(const Vec128<float> a,
+                                         const Vec128<float> b) {
+  return Vec128<float>(vzip1q_f32(a.raw, b.raw));
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double> InterleaveLower(const Vec128<double> a,
+                                          const Vec128<double> b) {
+  return Vec128<double>(vzip1q_f64(a.raw, b.raw));
+}
+#endif
+
+HWY_INLINE Vec128<float> InterleaveUpper(const Vec128<float> a,
+                                         const Vec128<float> b) {
+  return Vec128<float>(vzip2q_f32(a.raw, b.raw));
+}
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<double> InterleaveUpper(const Vec128<double> a,
+                                          const Vec128<double> b) {
+  return Vec128<double>(vzip2q_f64(a.raw, b.raw));
+}
+#endif
+
+// ------------------------------ Zip lanes
+
+// Same as Interleave*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+
+// Full vectors
+HWY_INLINE Vec128<uint16_t> ZipLower(const Vec128<uint8_t> a,
+                                     const Vec128<uint8_t> b) {
+  return Vec128<uint16_t>(vreinterpretq_u16_u8(vzip1q_u8(a.raw, b.raw)));
+}
+HWY_INLINE Vec128<uint32_t> ZipLower(const Vec128<uint16_t> a,
+                                     const Vec128<uint16_t> b) {
+  return Vec128<uint32_t>(vreinterpretq_u32_u16(vzip1q_u16(a.raw, b.raw)));
+}
+HWY_INLINE Vec128<uint64_t> ZipLower(const Vec128<uint32_t> a,
+                                     const Vec128<uint32_t> b) {
+  return Vec128<uint64_t>(vreinterpretq_u64_u32(vzip1q_u32(a.raw, b.raw)));
+}
+
+HWY_INLINE Vec128<int16_t> ZipLower(const Vec128<int8_t> a,
+                                    const Vec128<int8_t> b) {
+  return Vec128<int16_t>(vreinterpretq_s16_s8(vzip1q_s8(a.raw, b.raw)));
+}
+HWY_INLINE Vec128<int32_t> ZipLower(const Vec128<int16_t> a,
+                                    const Vec128<int16_t> b) {
+  return Vec128<int32_t>(vreinterpretq_s32_s16(vzip1q_s16(a.raw, b.raw)));
+}
+HWY_INLINE Vec128<int64_t> ZipLower(const Vec128<int32_t> a,
+                                    const Vec128<int32_t> b) {
+  return Vec128<int64_t>(vreinterpretq_s64_s32(vzip1q_s32(a.raw, b.raw)));
+}
+
+HWY_INLINE Vec128<uint16_t> ZipUpper(const Vec128<uint8_t> a,
+                                     const Vec128<uint8_t> b) {
+  return Vec128<uint16_t>(vreinterpretq_u16_u8(vzip2q_u8(a.raw, b.raw)));
+}
+HWY_INLINE Vec128<uint32_t> ZipUpper(const Vec128<uint16_t> a,
+                                     const Vec128<uint16_t> b) {
+  return Vec128<uint32_t>(vreinterpretq_u32_u16(vzip2q_u16(a.raw, b.raw)));
+}
+HWY_INLINE Vec128<uint64_t> ZipUpper(const Vec128<uint32_t> a,
+                                     const Vec128<uint32_t> b) {
+  return Vec128<uint64_t>(vreinterpretq_u64_u32(vzip2q_u32(a.raw, b.raw)));
+}
+
+HWY_INLINE Vec128<int16_t> ZipUpper(const Vec128<int8_t> a,
+                                    const Vec128<int8_t> b) {
+  return Vec128<int16_t>(vreinterpretq_s16_s8(vzip2q_s8(a.raw, b.raw)));
+}
+HWY_INLINE Vec128<int32_t> ZipUpper(const Vec128<int16_t> a,
+                                    const Vec128<int16_t> b) {
+  return Vec128<int32_t>(vreinterpretq_s32_s16(vzip2q_s16(a.raw, b.raw)));
+}
+HWY_INLINE Vec128<int64_t> ZipUpper(const Vec128<int32_t> a,
+                                    const Vec128<int32_t> b) {
+  return Vec128<int64_t>(vreinterpretq_s64_s32(vzip2q_s32(a.raw, b.raw)));
+}
+
+// Half vectors or less
+template <size_t N, HWY_IF_LE64(uint8_t, N)>
+HWY_INLINE Vec128<uint16_t, (N + 1) / 2> ZipLower(const Vec128<uint8_t, N> a,
+                                                  const Vec128<uint8_t, N> b) {
+  return Vec128<uint16_t, (N + 1) / 2>(
+      vreinterpret_u16_u8(vzip1_u8(a.raw, b.raw)));
+}
+template <size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint32_t, (N + 1) / 2> ZipLower(const Vec128<uint16_t, N> a,
+                                                  const Vec128<uint16_t, N> b) {
+  return Vec128<uint32_t, (N + 1) / 2>(
+      vreinterpret_u32_u16(vzip1_u16(a.raw, b.raw)));
+}
+template <size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_INLINE Vec128<uint64_t, (N + 1) / 2> ZipLower(const Vec128<uint32_t, N> a,
+                                                  const Vec128<uint32_t, N> b) {
+  return Vec128<uint64_t, (N + 1) / 2>(
+      vreinterpret_u64_u32(vzip1_u32(a.raw, b.raw)));
+}
+
+template <size_t N, HWY_IF_LE64(int8_t, N)>
+HWY_INLINE Vec128<int16_t, (N + 1) / 2> ZipLower(const Vec128<int8_t, N> a,
+                                                 const Vec128<int8_t, N> b) {
+  return Vec128<int16_t, (N + 1) / 2>(
+      vreinterpret_s16_s8(vzip1_s8(a.raw, b.raw)));
+}
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int32_t, (N + 1) / 2> ZipLower(const Vec128<int16_t, N> a,
+                                                 const Vec128<int16_t, N> b) {
+  return Vec128<int32_t, (N + 1) / 2>(
+      vreinterpret_s32_s16(vzip1_s16(a.raw, b.raw)));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int64_t, (N + 1) / 2> ZipLower(const Vec128<int32_t, N> a,
+                                                 const Vec128<int32_t, N> b) {
+  return Vec128<int64_t, (N + 1) / 2>(
+      vreinterpret_s64_s32(vzip1_s32(a.raw, b.raw)));
+}
+
+template <size_t N, HWY_IF_LE64(uint8_t, N)>
+HWY_INLINE Vec128<uint16_t, N / 2> ZipUpper(const Vec128<uint8_t, N> a,
+                                            const Vec128<uint8_t, N> b) {
+  return Vec128<uint16_t, N / 2>(vreinterpret_u16_u8(vzip2_u8(a.raw, b.raw)));
+}
+template <size_t N, HWY_IF_LE64(uint16_t, N)>
+HWY_INLINE Vec128<uint32_t, N / 2> ZipUpper(const Vec128<uint16_t, N> a,
+                                            const Vec128<uint16_t, N> b) {
+  return Vec128<uint32_t, N / 2>(vreinterpret_u32_u16(vzip2_u16(a.raw, b.raw)));
+}
+template <size_t N, HWY_IF_LE64(uint32_t, N)>
+HWY_INLINE Vec128<uint64_t, N / 2> ZipUpper(const Vec128<uint32_t, N> a,
+                                            const Vec128<uint32_t, N> b) {
+  return Vec128<uint64_t, N / 2>(vreinterpret_u64_u32(vzip2_u32(a.raw, b.raw)));
+}
+
+template <size_t N, HWY_IF_LE64(int8_t, N)>
+HWY_INLINE Vec128<int16_t, N / 2> ZipUpper(const Vec128<int8_t, N> a,
+                                           const Vec128<int8_t, N> b) {
+  return Vec128<int16_t, N / 2>(vreinterpret_s16_s8(vzip2_s8(a.raw, b.raw)));
+}
+template <size_t N, HWY_IF_LE64(int16_t, N)>
+HWY_INLINE Vec128<int32_t, N / 2> ZipUpper(const Vec128<int16_t, N> a,
+                                           const Vec128<int16_t, N> b) {
+  return Vec128<int32_t, N / 2>(vreinterpret_s32_s16(vzip2_s16(a.raw, b.raw)));
+}
+template <size_t N, HWY_IF_LE64(int32_t, N)>
+HWY_INLINE Vec128<int64_t, N / 2> ZipUpper(const Vec128<int32_t, N> a,
+                                           const Vec128<int32_t, N> b) {
+  return Vec128<int64_t, N / 2>(vreinterpret_s64_s32(vzip2_s32(a.raw, b.raw)));
+}
+
+// ------------------------------ Blocks
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <typename T>
+HWY_INLINE Vec128<T> ConcatLowerLower(const Vec128<T> hi, const Vec128<T> lo) {
+  const Full128<uint64_t> d64;
+  return BitCast(Full128<T>(),
+                 InterleaveLower(BitCast(d64, lo), BitCast(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <typename T>
+HWY_INLINE Vec128<T> ConcatUpperUpper(const Vec128<T> hi, const Vec128<T> lo) {
+  const Full128<uint64_t> d64;
+  return BitCast(Full128<T>(),
+                 InterleaveUpper(BitCast(d64, lo), BitCast(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves)
+template <typename T>
+HWY_INLINE Vec128<T> ConcatLowerUpper(const Vec128<T> hi, const Vec128<T> lo) {
+  return CombineShiftRightBytes<8>(hi, lo);
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <typename T>
+HWY_INLINE Vec128<T> ConcatUpperLower(const Vec128<T> hi, const Vec128<T> lo) {
+  // TODO(janwas): more efficient implementation?
+  alignas(16) const uint8_t kBytes[16] = {
+      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0};
+  const auto vec = BitCast(Full128<T>(), Load(Full128<uint8_t>(), kBytes));
+  return IfThenElse(MaskFromVec(vec), lo, hi);
+}
+
+// ------------------------------ Odd/even lanes
+
+template <typename T>
+HWY_INLINE Vec128<T> OddEven(const Vec128<T> a, const Vec128<T> b) {
+  alignas(16) constexpr uint8_t kBytes[16] = {
+      ((0 / sizeof(T)) & 1) ? 0 : 0xFF,  ((1 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((2 / sizeof(T)) & 1) ? 0 : 0xFF,  ((3 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((4 / sizeof(T)) & 1) ? 0 : 0xFF,  ((5 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((6 / sizeof(T)) & 1) ? 0 : 0xFF,  ((7 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((8 / sizeof(T)) & 1) ? 0 : 0xFF,  ((9 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((10 / sizeof(T)) & 1) ? 0 : 0xFF, ((11 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((12 / sizeof(T)) & 1) ? 0 : 0xFF, ((13 / sizeof(T)) & 1) ? 0 : 0xFF,
+      ((14 / sizeof(T)) & 1) ? 0 : 0xFF, ((15 / sizeof(T)) & 1) ? 0 : 0xFF,
+  };
+  const auto vec = BitCast(Full128<T>(), Load(Full128<uint8_t>(), kBytes));
+  return IfThenElse(MaskFromVec(vec), b, a);
+}
+
+// ================================================== MISC
+
+// ------------------------------ Scatter (Store)
+
+template <typename T, size_t N, typename Offset, HWY_IF_LE128(T, N)>
+HWY_API void ScatterOffset(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT base,
+                           const Vec128<Offset, N> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+
+  alignas(16) T lanes[N];
+  Store(v, d, lanes);
+
+  alignas(16) Offset offset_lanes[N];
+  Store(offset, Simd<Offset, N>(), offset_lanes);
+
+  uint8_t* base_bytes = reinterpret_cast<uint8_t*>(base);
+  for (size_t i = 0; i < N; ++i) {
+    CopyBytes<sizeof(T)>(&lanes[i], base_bytes + offset_lanes[i]);
+  }
+}
+
+template <typename T, size_t N, typename Index, HWY_IF_LE128(T, N)>
+HWY_API void ScatterIndex(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT base,
+                          const Vec128<Index, N> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+
+  alignas(16) T lanes[N];
+  Store(v, d, lanes);
+
+  alignas(16) Index index_lanes[N];
+  Store(index, Simd<Index, N>(), index_lanes);
+
+  for (size_t i = 0; i < N; ++i) {
+    base[index_lanes[i]] = lanes[i];
+  }
+}
+
+// ------------------------------ Gather (Load/Store)
+
+template <typename T, size_t N, typename Offset>
+HWY_API Vec128<T, N> GatherOffset(const Simd<T, N> d,
+                                  const T* HWY_RESTRICT base,
+                                  const Vec128<Offset, N> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+
+  alignas(16) Offset offset_lanes[N];
+  Store(offset, Simd<Offset, N>(), offset_lanes);
+
+  alignas(16) T lanes[N];
+  const uint8_t* base_bytes = reinterpret_cast<const uint8_t*>(base);
+  for (size_t i = 0; i < N; ++i) {
+    CopyBytes<sizeof(T)>(base_bytes + offset_lanes[i], &lanes[i]);
+  }
+  return Load(d, lanes);
+}
+
+template <typename T, size_t N, typename Index>
+HWY_API Vec128<T, N> GatherIndex(const Simd<T, N> d, const T* HWY_RESTRICT base,
+                                 const Vec128<Index, N> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+
+  alignas(16) Index index_lanes[N];
+  Store(index, Simd<Index, N>(), index_lanes);
+
+  alignas(16) T lanes[N];
+  for (size_t i = 0; i < N; ++i) {
+    lanes[i] = base[index_lanes[i]];
+  }
+  return Load(d, lanes);
+}
+
+// ------------------------------ Reductions
+
+namespace detail {
+
+// N=1 for any T: no-op
+template <typename T>
+HWY_API Vec128<T, 1> SumOfLanes(const Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_API Vec128<T, 1> MinOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                const Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_API Vec128<T, 1> MaxOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                const Vec128<T, 1> v) {
+  return v;
+}
+
+// u32/i32/f32: N=2
+template <typename T, HWY_IF_LANE_SIZE(T, 4)>
+HWY_API Vec128<T, 2> SumOfLanes(const Vec128<T, 2> v10) {
+  return v10 + Shuffle2301(v10);
+}
+template <typename T>
+HWY_API Vec128<T, 2> MinOfLanes(hwy::SizeTag<4> /* tag */,
+                                const Vec128<T, 2> v10) {
+  return Min(v10, Shuffle2301(v10));
+}
+template <typename T>
+HWY_API Vec128<T, 2> MaxOfLanes(hwy::SizeTag<4> /* tag */,
+                                const Vec128<T, 2> v10) {
+  return Max(v10, Shuffle2301(v10));
+}
+
+// full vectors
+#if HWY_ARCH_ARM_A64
+HWY_INLINE Vec128<uint32_t> SumOfLanes(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>(vdupq_n_u32(vaddvq_u32(v.raw)));
+}
+HWY_INLINE Vec128<int32_t> SumOfLanes(const Vec128<int32_t> v) {
+  return Vec128<int32_t>(vdupq_n_s32(vaddvq_s32(v.raw)));
+}
+HWY_INLINE Vec128<float> SumOfLanes(const Vec128<float> v) {
+  return Vec128<float>(vdupq_n_f32(vaddvq_f32(v.raw)));
+}
+HWY_INLINE Vec128<uint64_t> SumOfLanes(const Vec128<uint64_t> v) {
+  return Vec128<uint64_t>(vdupq_n_u64(vaddvq_u64(v.raw)));
+}
+HWY_INLINE Vec128<int64_t> SumOfLanes(const Vec128<int64_t> v) {
+  return Vec128<int64_t>(vdupq_n_s64(vaddvq_s64(v.raw)));
+}
+HWY_INLINE Vec128<double> SumOfLanes(const Vec128<double> v) {
+  return Vec128<double>(vdupq_n_f64(vaddvq_f64(v.raw)));
+}
+#else
+// ARMv7 version for everything except doubles.
+HWY_INLINE Vec128<uint32_t> SumOfLanes(const Vec128<uint32_t> v) {
+  uint32x4x2_t v0 = vuzpq_u32(v.raw, v.raw);
+  uint32x4_t c0 = vaddq_u32(v0.val[0], v0.val[1]);
+  uint32x4x2_t v1 = vuzpq_u32(c0, c0);
+  return Vec128<uint32_t>(vaddq_u32(v1.val[0], v1.val[1]));
+}
+HWY_INLINE Vec128<int32_t> SumOfLanes(const Vec128<int32_t> v) {
+  int32x4x2_t v0 = vuzpq_s32(v.raw, v.raw);
+  int32x4_t c0 = vaddq_s32(v0.val[0], v0.val[1]);
+  int32x4x2_t v1 = vuzpq_s32(c0, c0);
+  return Vec128<int32_t>(vaddq_s32(v1.val[0], v1.val[1]));
+}
+HWY_INLINE Vec128<float> SumOfLanes(const Vec128<float> v) {
+  float32x4x2_t v0 = vuzpq_f32(v.raw, v.raw);
+  float32x4_t c0 = vaddq_f32(v0.val[0], v0.val[1]);
+  float32x4x2_t v1 = vuzpq_f32(c0, c0);
+  return Vec128<float>(vaddq_f32(v1.val[0], v1.val[1]));
+}
+HWY_INLINE Vec128<uint64_t> SumOfLanes(const Vec128<uint64_t> v) {
+  return v + CombineShiftRightBytes<8>(v, v);
+}
+HWY_INLINE Vec128<int64_t> SumOfLanes(const Vec128<int64_t> v) {
+  return v + CombineShiftRightBytes<8>(v, v);
+}
+#endif
+
+template <typename T>
+HWY_API Vec128<T> MinOfLanes(hwy::SizeTag<4> /* tag */, const Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = Min(v3210, v1032);
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Min(v20_31_20_31, v31_20_31_20);
+}
+template <typename T>
+HWY_API Vec128<T> MaxOfLanes(hwy::SizeTag<4> /* tag */, const Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = Max(v3210, v1032);
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Max(v20_31_20_31, v31_20_31_20);
+}
+
+// For u64/i64[/f64].
+template <typename T>
+HWY_API Vec128<T> MinOfLanes(hwy::SizeTag<8> /* tag */, const Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return Min(v10, v01);
+}
+template <typename T>
+HWY_API Vec128<T> MaxOfLanes(hwy::SizeTag<8> /* tag */, const Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return Max(v10, v01);
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> SumOfLanes(const Vec128<T, N> v) {
+  return detail::SumOfLanes(v);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MinOfLanes(const Vec128<T, N> v) {
+  return detail::MinOfLanes(hwy::SizeTag<sizeof(T)>(), v);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MaxOfLanes(const Vec128<T, N> v) {
+  return detail::MaxOfLanes(hwy::SizeTag<sizeof(T)>(), v);
+}
+
+// ------------------------------ Mask
+
+namespace detail {
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/,
+                                 const Mask128<T> mask) {
+  alignas(16) constexpr uint8_t kSliceLanes[16] = {
+      1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80,
+  };
+  const Full128<uint8_t> du;
+  const Vec128<uint8_t> values =
+      BitCast(du, VecFromMask(Full128<T>(), mask)) & Load(du, kSliceLanes);
+
+#if HWY_ARCH_ARM_A64
+  // Can't vaddv - we need two separate bytes (16 bits).
+  const uint8x8_t x2 = vget_low_u8(vpaddq_u8(values.raw, values.raw));
+  const uint8x8_t x4 = vpadd_u8(x2, x2);
+  const uint8x8_t x8 = vpadd_u8(x4, x4);
+  return vget_lane_u64(vreinterpret_u64_u8(x8), 0);
+#else
+  // Don't have vpaddq, so keep doubling lane size.
+  const uint16x8_t x2 = vpaddlq_u8(values.raw);
+  const uint32x4_t x4 = vpaddlq_u16(x2);
+  const uint64x2_t x8 = vpaddlq_u32(x4);
+  return (vgetq_lane_u64(x8, 1) << 8) | vgetq_lane_u64(x8, 0);
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/,
+                                 const Mask128<T, N> mask) {
+  // Upper lanes of partial loads are undefined. OnlyActive will fix this if
+  // we load all kSliceLanes so the upper lanes do not pollute the valid bits.
+  alignas(8) constexpr uint8_t kSliceLanes[8] = {1,    2,    4,    8,
+                                                 0x10, 0x20, 0x40, 0x80};
+  const Simd<T, N> d;
+  const Simd<uint8_t, N> du;
+  const Vec128<uint8_t, N> slice(Load(Simd<uint8_t, 8>(), kSliceLanes).raw);
+  const Vec128<uint8_t, N> values = BitCast(du, VecFromMask(d, mask)) & slice;
+
+#if HWY_ARCH_ARM_A64
+  return vaddv_u8(values.raw);
+#else
+  const uint16x4_t x2 = vpaddl_u8(values.raw);
+  const uint32x2_t x4 = vpaddl_u16(x2);
+  const uint64x1_t x8 = vpaddl_u32(x4);
+  return vget_lane_u64(x8, 0);
+#endif
+}
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/,
+                                 const Mask128<T> mask) {
+  alignas(16) constexpr uint16_t kSliceLanes[8] = {1,    2,    4,    8,
+                                                   0x10, 0x20, 0x40, 0x80};
+  const Full128<T> d;
+  const Full128<uint16_t> du;
+  const Vec128<uint16_t> values =
+      BitCast(du, VecFromMask(d, mask)) & Load(du, kSliceLanes);
+#if HWY_ARCH_ARM_A64
+  return vaddvq_u16(values.raw);
+#else
+  const uint32x4_t x2 = vpaddlq_u16(values.raw);
+  const uint64x2_t x4 = vpaddlq_u32(x2);
+  return vgetq_lane_u64(x4, 0) + vgetq_lane_u64(x4, 1);
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/,
+                                 const Mask128<T, N> mask) {
+  // Upper lanes of partial loads are undefined. OnlyActive will fix this if
+  // we load all kSliceLanes so the upper lanes do not pollute the valid bits.
+  alignas(8) constexpr uint16_t kSliceLanes[4] = {1, 2, 4, 8};
+  const Simd<T, N> d;
+  const Simd<uint16_t, N> du;
+  const Vec128<uint16_t, N> slice(Load(Simd<uint16_t, 4>(), kSliceLanes).raw);
+  const Vec128<uint16_t, N> values = BitCast(du, VecFromMask(d, mask)) & slice;
+#if HWY_ARCH_ARM_A64
+  return vaddv_u16(values.raw);
+#else
+  const uint32x2_t x2 = vpaddl_u16(values.raw);
+  const uint64x1_t x4 = vpaddl_u32(x2);
+  return vget_lane_u64(x4, 0);
+#endif
+}
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/,
+                                 const Mask128<T> mask) {
+  alignas(16) constexpr uint32_t kSliceLanes[4] = {1, 2, 4, 8};
+  const Full128<T> d;
+  const Full128<uint32_t> du;
+  const Vec128<uint32_t> values =
+      BitCast(du, VecFromMask(d, mask)) & Load(du, kSliceLanes);
+#if HWY_ARCH_ARM_A64
+  return vaddvq_u32(values.raw);
+#else
+  const uint64x2_t x2 = vpaddlq_u32(values.raw);
+  return vgetq_lane_u64(x2, 0) + vgetq_lane_u64(x2, 1);
+#endif
+}
+
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/,
+                                 const Mask128<T, N> mask) {
+  // Upper lanes of partial loads are undefined. OnlyActive will fix this if
+  // we load all kSliceLanes so the upper lanes do not pollute the valid bits.
+  alignas(8) constexpr uint32_t kSliceLanes[2] = {1, 2};
+  const Simd<T, N> d;
+  const Simd<uint32_t, N> du;
+  const Vec128<uint32_t, N> slice(Load(Simd<uint32_t, 2>(), kSliceLanes).raw);
+  const Vec128<uint32_t, N> values = BitCast(du, VecFromMask(d, mask)) & slice;
+#if HWY_ARCH_ARM_A64
+  return vaddv_u32(values.raw);
+#else
+  const uint64x1_t x2 = vpaddl_u32(values.raw);
+  return vget_lane_u64(x2, 0);
+#endif
+}
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<8> /*tag*/, const Mask128<T> m) {
+  alignas(16) constexpr uint64_t kSliceLanes[2] = {1, 2};
+  const Full128<T> d;
+  const Full128<uint64_t> du;
+  const Vec128<uint64_t> values =
+      BitCast(du, VecFromMask(d, m)) & Load(du, kSliceLanes);
+#if HWY_ARCH_ARM_A64
+  return vaddvq_u64(values.raw);
+#else
+  return vgetq_lane_u64(values.raw, 0) + vgetq_lane_u64(values.raw, 1);
+#endif
+}
+
+template <typename T>
+HWY_INLINE uint64_t BitsFromMask(hwy::SizeTag<8> /*tag*/,
+                                 const Mask128<T, 1> m) {
+  const Simd<T, 1> d;
+  const Simd<uint64_t, 1> du;
+  const Vec128<uint64_t, 1> values =
+      BitCast(du, VecFromMask(d, m)) & Set(du, 1);
+  return vget_lane_u64(values.raw, 0);
+}
+
+// Returns the lowest N for the BitsFromMask result.
+template <typename T, size_t N>
+constexpr uint64_t OnlyActive(uint64_t bits) {
+  return ((N * sizeof(T)) >= 8) ? bits : (bits & ((1ull << N) - 1));
+}
+
+template <typename T, size_t N>
+HWY_INLINE uint64_t BitsFromMask(const Mask128<T, N> mask) {
+  return OnlyActive<T, N>(BitsFromMask(hwy::SizeTag<sizeof(T)>(), mask));
+}
+
+// Returns number of lanes whose mask is set.
+//
+// Masks are either FF..FF or 0. Unfortunately there is no reduce-sub op
+// ("vsubv"). ANDing with 1 would work but requires a constant. Negating also
+// changes each lane to 1 (if mask set) or 0.
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<1> /*tag*/, const Mask128<T> mask) {
+  const Full128<int8_t> di;
+  const int8x16_t ones =
+      vnegq_s8(BitCast(di, VecFromMask(Full128<T>(), mask)).raw);
+
+#if HWY_ARCH_ARM_A64
+  return vaddvq_s8(ones);
+#else
+  const int16x8_t x2 = vpaddlq_s8(ones);
+  const int32x4_t x4 = vpaddlq_s16(x2);
+  const int64x2_t x8 = vpaddlq_s32(x4);
+  return vgetq_lane_s64(x8, 0) + vgetq_lane_s64(x8, 1);
+#endif
+}
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<2> /*tag*/, const Mask128<T> mask) {
+  const Full128<int16_t> di;
+  const int16x8_t ones =
+      vnegq_s16(BitCast(di, VecFromMask(Full128<T>(), mask)).raw);
+
+#if HWY_ARCH_ARM_A64
+  return vaddvq_s16(ones);
+#else
+  const int32x4_t x2 = vpaddlq_s16(ones);
+  const int64x2_t x4 = vpaddlq_s32(x2);
+  return vgetq_lane_s64(x4, 0) + vgetq_lane_s64(x4, 1);
+#endif
+}
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<4> /*tag*/, const Mask128<T> mask) {
+  const Full128<int32_t> di;
+  const int32x4_t ones =
+      vnegq_s32(BitCast(di, VecFromMask(Full128<T>(), mask)).raw);
+
+#if HWY_ARCH_ARM_A64
+  return vaddvq_s32(ones);
+#else
+  const int64x2_t x2 = vpaddlq_s32(ones);
+  return vgetq_lane_s64(x2, 0) + vgetq_lane_s64(x2, 1);
+#endif
+}
+
+template <typename T>
+HWY_INLINE size_t CountTrue(hwy::SizeTag<8> /*tag*/, const Mask128<T> mask) {
+#if HWY_ARCH_ARM_A64
+  const Full128<int64_t> di;
+  const int64x2_t ones =
+      vnegq_s64(BitCast(di, VecFromMask(Full128<T>(), mask)).raw);
+  return vaddvq_s64(ones);
+#else
+  const Full128<uint64_t> du;
+  const auto mask_u = VecFromMask(du, RebindMask(du, mask));
+  const uint64x2_t ones = vshrq_n_u64(mask_u.raw, 63);
+  return vgetq_lane_u64(ones, 0) + vgetq_lane_u64(ones, 1);
+#endif
+}
+
+}  // namespace detail
+
+// Full
+template <typename T>
+HWY_INLINE size_t CountTrue(const Mask128<T> mask) {
+  return detail::CountTrue(hwy::SizeTag<sizeof(T)>(), mask);
+}
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_INLINE size_t CountTrue(const Mask128<T, N> mask) {
+  return PopCount(detail::BitsFromMask(mask));
+}
+
+template <typename T, size_t N>
+HWY_INLINE size_t StoreMaskBits(const Mask128<T, N> mask, uint8_t* p) {
+  const uint64_t bits = detail::BitsFromMask(mask);
+  const size_t kNumBytes = (N + 7) / 8;
+  CopyBytes<kNumBytes>(&bits, p);
+  return kNumBytes;
+}
+
+// Full
+template <typename T>
+HWY_INLINE bool AllFalse(const Mask128<T> m) {
+#if HWY_ARCH_ARM_A64
+  const Full128<uint32_t> d32;
+  const auto m32 = MaskFromVec(BitCast(d32, VecFromMask(Full128<T>(), m)));
+  return (vmaxvq_u32(m32.raw) == 0);
+#else
+  const auto v64 = BitCast(Full128<uint64_t>(), VecFromMask(Full128<T>(), m));
+  uint32x2_t a = vqmovn_u64(v64.raw);
+  return vget_lane_u64(vreinterpret_u64_u32(a), 0) == 0;
+#endif
+}
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_INLINE bool AllFalse(const Mask128<T, N> m) {
+  return detail::BitsFromMask(m) == 0;
+}
+
+template <typename T, size_t N>
+HWY_INLINE bool AllTrue(const Mask128<T, N> m) {
+  const Simd<T, N> d;
+  return AllFalse(VecFromMask(d, m) == Zero(d));
+}
+
+// ------------------------------ Compress
+
+namespace detail {
+
+// Load 8 bytes, replicate into upper half so ZipLower can use the lower half.
+HWY_INLINE Vec128<uint8_t> Load8Bytes(Full128<uint8_t> /*d*/,
+                                      const uint8_t* bytes) {
+  return Vec128<uint8_t>(vreinterpretq_u8_u64(
+      vld1q_dup_u64(reinterpret_cast<const uint64_t*>(bytes))));
+}
+
+// Load 8 bytes and return half-reg with N <= 8 bytes.
+template <size_t N, HWY_IF_LE64(uint8_t, N)>
+HWY_INLINE Vec128<uint8_t, N> Load8Bytes(Simd<uint8_t, N> d,
+                                         const uint8_t* bytes) {
+  return Load(d, bytes);
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IdxFromBits(hwy::SizeTag<2> /*tag*/,
+                                    const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  const Simd<uint16_t, N> du;
+
+  // ARM does not provide an equivalent of AVX2 permutevar, so we need byte
+  // indices for VTBL (one vector's worth for each of 256 combinations of
+  // 8 mask bits). Loading them directly would require 4 KiB. We can instead
+  // store lane indices and convert to byte indices (2*lane + 0..1), with the
+  // doubling baked into the table. AVX2 Compress32 stores eight 4-bit lane
+  // indices (total 1 KiB), broadcasts them into each 32-bit lane and shifts.
+  // Here, 16-bit lanes are too narrow to hold all bits, and unpacking nibbles
+  // is likely more costly than the higher cache footprint from storing bytes.
+  alignas(16) constexpr uint8_t table[256 * 8] = {
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,
+      0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  4,  0,  0,  0,
+      0,  0,  0,  0,  0,  4,  0,  0,  0,  0,  0,  0,  2,  4,  0,  0,  0,  0,
+      0,  0,  0,  2,  4,  0,  0,  0,  0,  0,  6,  0,  0,  0,  0,  0,  0,  0,
+      0,  6,  0,  0,  0,  0,  0,  0,  2,  6,  0,  0,  0,  0,  0,  0,  0,  2,
+      6,  0,  0,  0,  0,  0,  4,  6,  0,  0,  0,  0,  0,  0,  0,  4,  6,  0,
+      0,  0,  0,  0,  2,  4,  6,  0,  0,  0,  0,  0,  0,  2,  4,  6,  0,  0,
+      0,  0,  8,  0,  0,  0,  0,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  0,
+      2,  8,  0,  0,  0,  0,  0,  0,  0,  2,  8,  0,  0,  0,  0,  0,  4,  8,
+      0,  0,  0,  0,  0,  0,  0,  4,  8,  0,  0,  0,  0,  0,  2,  4,  8,  0,
+      0,  0,  0,  0,  0,  2,  4,  8,  0,  0,  0,  0,  6,  8,  0,  0,  0,  0,
+      0,  0,  0,  6,  8,  0,  0,  0,  0,  0,  2,  6,  8,  0,  0,  0,  0,  0,
+      0,  2,  6,  8,  0,  0,  0,  0,  4,  6,  8,  0,  0,  0,  0,  0,  0,  4,
+      6,  8,  0,  0,  0,  0,  2,  4,  6,  8,  0,  0,  0,  0,  0,  2,  4,  6,
+      8,  0,  0,  0,  10, 0,  0,  0,  0,  0,  0,  0,  0,  10, 0,  0,  0,  0,
+      0,  0,  2,  10, 0,  0,  0,  0,  0,  0,  0,  2,  10, 0,  0,  0,  0,  0,
+      4,  10, 0,  0,  0,  0,  0,  0,  0,  4,  10, 0,  0,  0,  0,  0,  2,  4,
+      10, 0,  0,  0,  0,  0,  0,  2,  4,  10, 0,  0,  0,  0,  6,  10, 0,  0,
+      0,  0,  0,  0,  0,  6,  10, 0,  0,  0,  0,  0,  2,  6,  10, 0,  0,  0,
+      0,  0,  0,  2,  6,  10, 0,  0,  0,  0,  4,  6,  10, 0,  0,  0,  0,  0,
+      0,  4,  6,  10, 0,  0,  0,  0,  2,  4,  6,  10, 0,  0,  0,  0,  0,  2,
+      4,  6,  10, 0,  0,  0,  8,  10, 0,  0,  0,  0,  0,  0,  0,  8,  10, 0,
+      0,  0,  0,  0,  2,  8,  10, 0,  0,  0,  0,  0,  0,  2,  8,  10, 0,  0,
+      0,  0,  4,  8,  10, 0,  0,  0,  0,  0,  0,  4,  8,  10, 0,  0,  0,  0,
+      2,  4,  8,  10, 0,  0,  0,  0,  0,  2,  4,  8,  10, 0,  0,  0,  6,  8,
+      10, 0,  0,  0,  0,  0,  0,  6,  8,  10, 0,  0,  0,  0,  2,  6,  8,  10,
+      0,  0,  0,  0,  0,  2,  6,  8,  10, 0,  0,  0,  4,  6,  8,  10, 0,  0,
+      0,  0,  0,  4,  6,  8,  10, 0,  0,  0,  2,  4,  6,  8,  10, 0,  0,  0,
+      0,  2,  4,  6,  8,  10, 0,  0,  12, 0,  0,  0,  0,  0,  0,  0,  0,  12,
+      0,  0,  0,  0,  0,  0,  2,  12, 0,  0,  0,  0,  0,  0,  0,  2,  12, 0,
+      0,  0,  0,  0,  4,  12, 0,  0,  0,  0,  0,  0,  0,  4,  12, 0,  0,  0,
+      0,  0,  2,  4,  12, 0,  0,  0,  0,  0,  0,  2,  4,  12, 0,  0,  0,  0,
+      6,  12, 0,  0,  0,  0,  0,  0,  0,  6,  12, 0,  0,  0,  0,  0,  2,  6,
+      12, 0,  0,  0,  0,  0,  0,  2,  6,  12, 0,  0,  0,  0,  4,  6,  12, 0,
+      0,  0,  0,  0,  0,  4,  6,  12, 0,  0,  0,  0,  2,  4,  6,  12, 0,  0,
+      0,  0,  0,  2,  4,  6,  12, 0,  0,  0,  8,  12, 0,  0,  0,  0,  0,  0,
+      0,  8,  12, 0,  0,  0,  0,  0,  2,  8,  12, 0,  0,  0,  0,  0,  0,  2,
+      8,  12, 0,  0,  0,  0,  4,  8,  12, 0,  0,  0,  0,  0,  0,  4,  8,  12,
+      0,  0,  0,  0,  2,  4,  8,  12, 0,  0,  0,  0,  0,  2,  4,  8,  12, 0,
+      0,  0,  6,  8,  12, 0,  0,  0,  0,  0,  0,  6,  8,  12, 0,  0,  0,  0,
+      2,  6,  8,  12, 0,  0,  0,  0,  0,  2,  6,  8,  12, 0,  0,  0,  4,  6,
+      8,  12, 0,  0,  0,  0,  0,  4,  6,  8,  12, 0,  0,  0,  2,  4,  6,  8,
+      12, 0,  0,  0,  0,  2,  4,  6,  8,  12, 0,  0,  10, 12, 0,  0,  0,  0,
+      0,  0,  0,  10, 12, 0,  0,  0,  0,  0,  2,  10, 12, 0,  0,  0,  0,  0,
+      0,  2,  10, 12, 0,  0,  0,  0,  4,  10, 12, 0,  0,  0,  0,  0,  0,  4,
+      10, 12, 0,  0,  0,  0,  2,  4,  10, 12, 0,  0,  0,  0,  0,  2,  4,  10,
+      12, 0,  0,  0,  6,  10, 12, 0,  0,  0,  0,  0,  0,  6,  10, 12, 0,  0,
+      0,  0,  2,  6,  10, 12, 0,  0,  0,  0,  0,  2,  6,  10, 12, 0,  0,  0,
+      4,  6,  10, 12, 0,  0,  0,  0,  0,  4,  6,  10, 12, 0,  0,  0,  2,  4,
+      6,  10, 12, 0,  0,  0,  0,  2,  4,  6,  10, 12, 0,  0,  8,  10, 12, 0,
+      0,  0,  0,  0,  0,  8,  10, 12, 0,  0,  0,  0,  2,  8,  10, 12, 0,  0,
+      0,  0,  0,  2,  8,  10, 12, 0,  0,  0,  4,  8,  10, 12, 0,  0,  0,  0,
+      0,  4,  8,  10, 12, 0,  0,  0,  2,  4,  8,  10, 12, 0,  0,  0,  0,  2,
+      4,  8,  10, 12, 0,  0,  6,  8,  10, 12, 0,  0,  0,  0,  0,  6,  8,  10,
+      12, 0,  0,  0,  2,  6,  8,  10, 12, 0,  0,  0,  0,  2,  6,  8,  10, 12,
+      0,  0,  4,  6,  8,  10, 12, 0,  0,  0,  0,  4,  6,  8,  10, 12, 0,  0,
+      2,  4,  6,  8,  10, 12, 0,  0,  0,  2,  4,  6,  8,  10, 12, 0,  14, 0,
+      0,  0,  0,  0,  0,  0,  0,  14, 0,  0,  0,  0,  0,  0,  2,  14, 0,  0,
+      0,  0,  0,  0,  0,  2,  14, 0,  0,  0,  0,  0,  4,  14, 0,  0,  0,  0,
+      0,  0,  0,  4,  14, 0,  0,  0,  0,  0,  2,  4,  14, 0,  0,  0,  0,  0,
+      0,  2,  4,  14, 0,  0,  0,  0,  6,  14, 0,  0,  0,  0,  0,  0,  0,  6,
+      14, 0,  0,  0,  0,  0,  2,  6,  14, 0,  0,  0,  0,  0,  0,  2,  6,  14,
+      0,  0,  0,  0,  4,  6,  14, 0,  0,  0,  0,  0,  0,  4,  6,  14, 0,  0,
+      0,  0,  2,  4,  6,  14, 0,  0,  0,  0,  0,  2,  4,  6,  14, 0,  0,  0,
+      8,  14, 0,  0,  0,  0,  0,  0,  0,  8,  14, 0,  0,  0,  0,  0,  2,  8,
+      14, 0,  0,  0,  0,  0,  0,  2,  8,  14, 0,  0,  0,  0,  4,  8,  14, 0,
+      0,  0,  0,  0,  0,  4,  8,  14, 0,  0,  0,  0,  2,  4,  8,  14, 0,  0,
+      0,  0,  0,  2,  4,  8,  14, 0,  0,  0,  6,  8,  14, 0,  0,  0,  0,  0,
+      0,  6,  8,  14, 0,  0,  0,  0,  2,  6,  8,  14, 0,  0,  0,  0,  0,  2,
+      6,  8,  14, 0,  0,  0,  4,  6,  8,  14, 0,  0,  0,  0,  0,  4,  6,  8,
+      14, 0,  0,  0,  2,  4,  6,  8,  14, 0,  0,  0,  0,  2,  4,  6,  8,  14,
+      0,  0,  10, 14, 0,  0,  0,  0,  0,  0,  0,  10, 14, 0,  0,  0,  0,  0,
+      2,  10, 14, 0,  0,  0,  0,  0,  0,  2,  10, 14, 0,  0,  0,  0,  4,  10,
+      14, 0,  0,  0,  0,  0,  0,  4,  10, 14, 0,  0,  0,  0,  2,  4,  10, 14,
+      0,  0,  0,  0,  0,  2,  4,  10, 14, 0,  0,  0,  6,  10, 14, 0,  0,  0,
+      0,  0,  0,  6,  10, 14, 0,  0,  0,  0,  2,  6,  10, 14, 0,  0,  0,  0,
+      0,  2,  6,  10, 14, 0,  0,  0,  4,  6,  10, 14, 0,  0,  0,  0,  0,  4,
+      6,  10, 14, 0,  0,  0,  2,  4,  6,  10, 14, 0,  0,  0,  0,  2,  4,  6,
+      10, 14, 0,  0,  8,  10, 14, 0,  0,  0,  0,  0,  0,  8,  10, 14, 0,  0,
+      0,  0,  2,  8,  10, 14, 0,  0,  0,  0,  0,  2,  8,  10, 14, 0,  0,  0,
+      4,  8,  10, 14, 0,  0,  0,  0,  0,  4,  8,  10, 14, 0,  0,  0,  2,  4,
+      8,  10, 14, 0,  0,  0,  0,  2,  4,  8,  10, 14, 0,  0,  6,  8,  10, 14,
+      0,  0,  0,  0,  0,  6,  8,  10, 14, 0,  0,  0,  2,  6,  8,  10, 14, 0,
+      0,  0,  0,  2,  6,  8,  10, 14, 0,  0,  4,  6,  8,  10, 14, 0,  0,  0,
+      0,  4,  6,  8,  10, 14, 0,  0,  2,  4,  6,  8,  10, 14, 0,  0,  0,  2,
+      4,  6,  8,  10, 14, 0,  12, 14, 0,  0,  0,  0,  0,  0,  0,  12, 14, 0,
+      0,  0,  0,  0,  2,  12, 14, 0,  0,  0,  0,  0,  0,  2,  12, 14, 0,  0,
+      0,  0,  4,  12, 14, 0,  0,  0,  0,  0,  0,  4,  12, 14, 0,  0,  0,  0,
+      2,  4,  12, 14, 0,  0,  0,  0,  0,  2,  4,  12, 14, 0,  0,  0,  6,  12,
+      14, 0,  0,  0,  0,  0,  0,  6,  12, 14, 0,  0,  0,  0,  2,  6,  12, 14,
+      0,  0,  0,  0,  0,  2,  6,  12, 14, 0,  0,  0,  4,  6,  12, 14, 0,  0,
+      0,  0,  0,  4,  6,  12, 14, 0,  0,  0,  2,  4,  6,  12, 14, 0,  0,  0,
+      0,  2,  4,  6,  12, 14, 0,  0,  8,  12, 14, 0,  0,  0,  0,  0,  0,  8,
+      12, 14, 0,  0,  0,  0,  2,  8,  12, 14, 0,  0,  0,  0,  0,  2,  8,  12,
+      14, 0,  0,  0,  4,  8,  12, 14, 0,  0,  0,  0,  0,  4,  8,  12, 14, 0,
+      0,  0,  2,  4,  8,  12, 14, 0,  0,  0,  0,  2,  4,  8,  12, 14, 0,  0,
+      6,  8,  12, 14, 0,  0,  0,  0,  0,  6,  8,  12, 14, 0,  0,  0,  2,  6,
+      8,  12, 14, 0,  0,  0,  0,  2,  6,  8,  12, 14, 0,  0,  4,  6,  8,  12,
+      14, 0,  0,  0,  0,  4,  6,  8,  12, 14, 0,  0,  2,  4,  6,  8,  12, 14,
+      0,  0,  0,  2,  4,  6,  8,  12, 14, 0,  10, 12, 14, 0,  0,  0,  0,  0,
+      0,  10, 12, 14, 0,  0,  0,  0,  2,  10, 12, 14, 0,  0,  0,  0,  0,  2,
+      10, 12, 14, 0,  0,  0,  4,  10, 12, 14, 0,  0,  0,  0,  0,  4,  10, 12,
+      14, 0,  0,  0,  2,  4,  10, 12, 14, 0,  0,  0,  0,  2,  4,  10, 12, 14,
+      0,  0,  6,  10, 12, 14, 0,  0,  0,  0,  0,  6,  10, 12, 14, 0,  0,  0,
+      2,  6,  10, 12, 14, 0,  0,  0,  0,  2,  6,  10, 12, 14, 0,  0,  4,  6,
+      10, 12, 14, 0,  0,  0,  0,  4,  6,  10, 12, 14, 0,  0,  2,  4,  6,  10,
+      12, 14, 0,  0,  0,  2,  4,  6,  10, 12, 14, 0,  8,  10, 12, 14, 0,  0,
+      0,  0,  0,  8,  10, 12, 14, 0,  0,  0,  2,  8,  10, 12, 14, 0,  0,  0,
+      0,  2,  8,  10, 12, 14, 0,  0,  4,  8,  10, 12, 14, 0,  0,  0,  0,  4,
+      8,  10, 12, 14, 0,  0,  2,  4,  8,  10, 12, 14, 0,  0,  0,  2,  4,  8,
+      10, 12, 14, 0,  6,  8,  10, 12, 14, 0,  0,  0,  0,  6,  8,  10, 12, 14,
+      0,  0,  2,  6,  8,  10, 12, 14, 0,  0,  0,  2,  6,  8,  10, 12, 14, 0,
+      4,  6,  8,  10, 12, 14, 0,  0,  0,  4,  6,  8,  10, 12, 14, 0,  2,  4,
+      6,  8,  10, 12, 14, 0,  0,  2,  4,  6,  8,  10, 12, 14};
+
+  const Vec128<uint8_t, 2 * N> byte_idx = Load8Bytes(d8, table + mask_bits * 8);
+  const Vec128<uint16_t, N> pairs = ZipLower(byte_idx, byte_idx);
+  return BitCast(d, pairs + Set(du, 0x0100));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IdxFromBits(hwy::SizeTag<4> /*tag*/,
+                                    const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) constexpr uint8_t packed_array[16 * 16] = {
+      0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      4,  5,  6,  7,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      8,  9,  10, 11, 0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  //
+      12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  //
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, packed_array + 16 * mask_bits));
+}
+
+#if HWY_CAP_INTEGER64 || HWY_CAP_FLOAT64
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> IdxFromBits(hwy::SizeTag<8> /*tag*/,
+                                    const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) constexpr uint8_t packed_array[4 * 16] = {
+      0, 1, 2,  3,  4,  5,  6,  7,  0, 1, 2,  3,  4,  5,  6,  7,  //
+      0, 1, 2,  3,  4,  5,  6,  7,  0, 1, 2,  3,  4,  5,  6,  7,  //
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,  //
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, packed_array + 16 * mask_bits));
+}
+
+#endif
+
+// Helper function called by both Compress and CompressStore - avoids a
+// redundant BitsFromMask in the latter.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, const uint64_t mask_bits) {
+  const auto idx =
+      detail::IdxFromBits<T, N>(hwy::SizeTag<sizeof(T)>(), mask_bits);
+  using D = Simd<T, N>;
+  const RebindToSigned<D> di;
+  return BitCast(D(), TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, const Mask128<T, N> mask) {
+  return detail::Compress(v, detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressStore
+
+template <typename T, size_t N>
+HWY_API size_t CompressStore(Vec128<T, N> v, const Mask128<T, N> mask,
+                             Simd<T, N> d, T* HWY_RESTRICT aligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  Store(detail::Compress(v, mask_bits), d, aligned);
+  return PopCount(mask_bits);
+}
+
+// ------------------------------ StoreInterleaved3
+
+// 128 bits
+HWY_API void StoreInterleaved3(const Vec128<uint8_t> v0,
+                               const Vec128<uint8_t> v1,
+                               const Vec128<uint8_t> v2,
+                               Full128<uint8_t> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  const uint8x16x3_t triple = {v0.raw, v1.raw, v2.raw};
+  vst3q_u8(unaligned, triple);
+}
+
+// 64 bits
+HWY_API void StoreInterleaved3(const Vec128<uint8_t, 8> v0,
+                               const Vec128<uint8_t, 8> v1,
+                               const Vec128<uint8_t, 8> v2,
+                               Simd<uint8_t, 8> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  const uint8x8x3_t triple = {v0.raw, v1.raw, v2.raw};
+  vst3_u8(unaligned, triple);
+}
+
+// <= 32 bits: avoid writing more than N bytes by copying to buffer
+template <size_t N, HWY_IF_LE32(uint8_t, N)>
+HWY_API void StoreInterleaved3(const Vec128<uint8_t, N> v0,
+                               const Vec128<uint8_t, N> v1,
+                               const Vec128<uint8_t, N> v2,
+                               Simd<uint8_t, N> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  alignas(16) uint8_t buf[24];
+  const uint8x8x3_t triple = {v0.raw, v1.raw, v2.raw};
+  vst3_u8(buf, triple);
+  CopyBytes<N * 3>(buf, unaligned);
+}
+
+// ------------------------------ StoreInterleaved4
+
+// 128 bits
+HWY_API void StoreInterleaved4(const Vec128<uint8_t> v0,
+                               const Vec128<uint8_t> v1,
+                               const Vec128<uint8_t> v2,
+                               const Vec128<uint8_t> v3,
+                               Full128<uint8_t> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  const uint8x16x4_t quad = {v0.raw, v1.raw, v2.raw, v3.raw};
+  vst4q_u8(unaligned, quad);
+}
+
+// 64 bits
+HWY_API void StoreInterleaved4(const Vec128<uint8_t, 8> v0,
+                               const Vec128<uint8_t, 8> v1,
+                               const Vec128<uint8_t, 8> v2,
+                               const Vec128<uint8_t, 8> v3,
+                               Simd<uint8_t, 8> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  const uint8x8x4_t quad = {v0.raw, v1.raw, v2.raw, v3.raw};
+  vst4_u8(unaligned, quad);
+}
+
+// <= 32 bits: avoid writing more than N bytes by copying to buffer
+template <size_t N, HWY_IF_LE32(uint8_t, N)>
+HWY_API void StoreInterleaved4(const Vec128<uint8_t, N> v0,
+                               const Vec128<uint8_t, N> v1,
+                               const Vec128<uint8_t, N> v2,
+                               const Vec128<uint8_t, N> v3,
+                               Simd<uint8_t, N> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  alignas(16) uint8_t buf[32];
+  const uint8x8x4_t quad = {v0.raw, v1.raw, v2.raw, v3.raw};
+  vst4_u8(buf, quad);
+  CopyBytes<N * 4>(buf, unaligned);
+}
+
+// ================================================== Operator wrapper
+
+// These apply to all x86_*-inl.h because there are no restrictions on V.
+
+template <class V>
+HWY_API V Add(V a, V b) {
+  return a + b;
+}
+template <class V>
+HWY_API V Sub(V a, V b) {
+  return a - b;
+}
+
+template <class V>
+HWY_API V Mul(V a, V b) {
+  return a * b;
+}
+template <class V>
+HWY_API V Div(V a, V b) {
+  return a / b;
+}
+
+template <class V>
+V Shl(V a, V b) {
+  return a << b;
+}
+template <class V>
+V Shr(V a, V b) {
+  return a >> b;
+}
+
+template <class V>
+HWY_API auto Eq(V a, V b) -> decltype(a == b) {
+  return a == b;
+}
+template <class V>
+HWY_API auto Lt(V a, V b) -> decltype(a == b) {
+  return a < b;
+}
+
+template <class V>
+HWY_API auto Gt(V a, V b) -> decltype(a == b) {
+  return a > b;
+}
+template <class V>
+HWY_API auto Ge(V a, V b) -> decltype(a == b) {
+  return a >= b;
+}
+
+template <class V>
+HWY_API auto Le(V a, V b) -> decltype(a == b) {
+  return a <= b;
+}
+
+namespace detail {  // for code folding
+#if HWY_ARCH_ARM_V7
+#undef vuzp1_s8
+#undef vuzp1_u8
+#undef vuzp1_s16
+#undef vuzp1_u16
+#undef vuzp1_s32
+#undef vuzp1_u32
+#undef vuzp1_f32
+#undef vuzp1q_s8
+#undef vuzp1q_u8
+#undef vuzp1q_s16
+#undef vuzp1q_u16
+#undef vuzp1q_s32
+#undef vuzp1q_u32
+#undef vuzp1q_f32
+#undef vuzp2_s8
+#undef vuzp2_u8
+#undef vuzp2_s16
+#undef vuzp2_u16
+#undef vuzp2_s32
+#undef vuzp2_u32
+#undef vuzp2_f32
+#undef vuzp2q_s8
+#undef vuzp2q_u8
+#undef vuzp2q_s16
+#undef vuzp2q_u16
+#undef vuzp2q_s32
+#undef vuzp2q_u32
+#undef vuzp2q_f32
+#undef vzip1_s8
+#undef vzip1_u8
+#undef vzip1_s16
+#undef vzip1_u16
+#undef vzip1_s32
+#undef vzip1_u32
+#undef vzip1_f32
+#undef vzip1q_s8
+#undef vzip1q_u8
+#undef vzip1q_s16
+#undef vzip1q_u16
+#undef vzip1q_s32
+#undef vzip1q_u32
+#undef vzip1q_f32
+#undef vzip2_s8
+#undef vzip2_u8
+#undef vzip2_s16
+#undef vzip2_u16
+#undef vzip2_s32
+#undef vzip2_u32
+#undef vzip2_f32
+#undef vzip2q_s8
+#undef vzip2q_u8
+#undef vzip2q_s16
+#undef vzip2q_u16
+#undef vzip2q_s32
+#undef vzip2q_u32
+#undef vzip2q_f32
+#endif
+
+#undef HWY_NEON_BUILD_ARG_1
+#undef HWY_NEON_BUILD_ARG_2
+#undef HWY_NEON_BUILD_ARG_3
+#undef HWY_NEON_BUILD_PARAM_1
+#undef HWY_NEON_BUILD_PARAM_2
+#undef HWY_NEON_BUILD_PARAM_3
+#undef HWY_NEON_BUILD_RET_1
+#undef HWY_NEON_BUILD_RET_2
+#undef HWY_NEON_BUILD_RET_3
+#undef HWY_NEON_BUILD_TPL_1
+#undef HWY_NEON_BUILD_TPL_2
+#undef HWY_NEON_BUILD_TPL_3
+#undef HWY_NEON_DEF_FUNCTION
+#undef HWY_NEON_DEF_FUNCTION_ALL_FLOATS
+#undef HWY_NEON_DEF_FUNCTION_ALL_TYPES
+#undef HWY_NEON_DEF_FUNCTION_INT_8
+#undef HWY_NEON_DEF_FUNCTION_INT_16
+#undef HWY_NEON_DEF_FUNCTION_INT_32
+#undef HWY_NEON_DEF_FUNCTION_INT_8_16_32
+#undef HWY_NEON_DEF_FUNCTION_INTS
+#undef HWY_NEON_DEF_FUNCTION_INTS_UINTS
+#undef HWY_NEON_DEF_FUNCTION_TPL
+#undef HWY_NEON_DEF_FUNCTION_UINT_8
+#undef HWY_NEON_DEF_FUNCTION_UINT_16
+#undef HWY_NEON_DEF_FUNCTION_UINT_32
+#undef HWY_NEON_DEF_FUNCTION_UINT_8_16_32
+#undef HWY_NEON_DEF_FUNCTION_UINTS
+#undef HWY_NEON_EVAL
+}  // namespace detail
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/arm_sve-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/arm_sve-inl.h
new file mode 100644
index 0000000000..3d57d35866
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/arm_sve-inl.h
@@ -0,0 +1,1698 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ARM SVE[2] vectors (length not known at compile time).
+// External include guard in highway.h - see comment there.
+
+#include <arm_sve.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hwy/base.h"
+#include "hwy/ops/shared-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <class V>
+struct DFromV_t {};  // specialized in macros
+template <class V>
+using DFromV = typename DFromV_t<V>::type;
+
+template <class V>
+using TFromV = TFromD<DFromV<V>>;
+
+#define HWY_IF_UNSIGNED_V(V) hwy::EnableIf<!IsSigned<TFromV<V>>()>* = nullptr
+#define HWY_IF_SIGNED_V(V) \
+  hwy::EnableIf<IsSigned<TFromV<V>>() && !IsFloat<TFromV<V>>()>* = nullptr
+#define HWY_IF_FLOAT_V(V) hwy::EnableIf<IsFloat<TFromV<V>>()>* = nullptr
+
+// ================================================== MACROS
+
+// Generate specializations and function definitions using X macros. Although
+// harder to read and debug, writing everything manually is too bulky.
+
+namespace detail {  // for code folding
+
+// Unsigned:
+#define HWY_SVE_FOREACH_U08(X_MACRO, NAME, OP) X_MACRO(uint, u, 8, NAME, OP)
+#define HWY_SVE_FOREACH_U16(X_MACRO, NAME, OP) X_MACRO(uint, u, 16, NAME, OP)
+#define HWY_SVE_FOREACH_U32(X_MACRO, NAME, OP) X_MACRO(uint, u, 32, NAME, OP)
+#define HWY_SVE_FOREACH_U64(X_MACRO, NAME, OP) X_MACRO(uint, u, 64, NAME, OP)
+
+// Signed:
+#define HWY_SVE_FOREACH_I08(X_MACRO, NAME, OP) X_MACRO(int, i, 8, NAME, OP)
+#define HWY_SVE_FOREACH_I16(X_MACRO, NAME, OP) X_MACRO(int, i, 16, NAME, OP)
+#define HWY_SVE_FOREACH_I32(X_MACRO, NAME, OP) X_MACRO(int, i, 32, NAME, OP)
+#define HWY_SVE_FOREACH_I64(X_MACRO, NAME, OP) X_MACRO(int, i, 64, NAME, OP)
+
+// Float:
+#define HWY_SVE_FOREACH_F16(X_MACRO, NAME, OP) X_MACRO(float, f, 16, NAME, OP)
+#define HWY_SVE_FOREACH_F32(X_MACRO, NAME, OP) X_MACRO(float, f, 32, NAME, OP)
+#define HWY_SVE_FOREACH_F64(X_MACRO, NAME, OP) X_MACRO(float, f, 64, NAME, OP)
+
+// For all element sizes:
+#define HWY_SVE_FOREACH_U(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U08(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_U16(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_U32(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_U64(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_I(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_I08(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_I16(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_I32(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_I64(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_F(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_F16(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_F32(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_F64(X_MACRO, NAME, OP)
+
+// Commonly used type categories for a given element size:
+#define HWY_SVE_FOREACH_UI16(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U16(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_I16(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_UI32(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U32(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_I32(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH_UI64(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U64(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_I64(X_MACRO, NAME, OP)
+
+// Commonly used type categories:
+#define HWY_SVE_FOREACH_UI(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U(X_MACRO, NAME, OP)        \
+  HWY_SVE_FOREACH_I(X_MACRO, NAME, OP)
+
+#define HWY_SVE_FOREACH(X_MACRO, NAME, OP) \
+  HWY_SVE_FOREACH_U(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_I(X_MACRO, NAME, OP)     \
+  HWY_SVE_FOREACH_F(X_MACRO, NAME, OP)
+
+// Assemble types for use in x-macros
+#define HWY_SVE_T(BASE, BITS) BASE##BITS##_t
+#define HWY_SVE_D(CHAR, BITS) D##CHAR##BITS
+#define HWY_SVE_V(BASE, BITS) sv##BASE##BITS##_t
+
+}  // namespace detail
+
+// TODO(janwas): remove typedefs and only use HWY_SVE_V etc. directly
+
+#define HWY_SPECIALIZE(BASE, CHAR, BITS, NAME, OP)                   \
+  using HWY_SVE_D(CHAR, BITS) =                                      \
+      Simd<HWY_SVE_T(BASE, BITS), HWY_LANES(HWY_SVE_T(BASE, BITS))>; \
+  using V##CHAR##BITS = HWY_SVE_V(BASE, BITS);                       \
+  template <>                                                        \
+  struct DFromV_t<HWY_SVE_V(BASE, BITS)> {                           \
+    using Lane = HWY_SVE_T(BASE, BITS);                              \
+    using type = Simd<Lane, HWY_LANES(Lane)>;                        \
+  };
+using Vf16 = svfloat16_t;
+using Df16 = Simd<float16_t, HWY_LANES(float16_t)>;
+
+HWY_SVE_FOREACH(HWY_SPECIALIZE, _, _)
+#undef HWY_SPECIALIZE
+
+// vector = f(d), e.g. Zero
+#define HWY_SVE_RETV_ARGD(BASE, CHAR, BITS, NAME, OP)           \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_D(CHAR, BITS) d) { \
+    (void)Lanes(d);                                             \
+    return v##OP##_##CHAR##BITS();                              \
+  }
+
+// vector = f(vector), e.g. Not
+#define HWY_SVE_RETV_ARGV(BASE, CHAR, BITS, NAME, OP)           \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) { \
+    return v##OP##_v_##CHAR##BITS(v);                           \
+  }
+
+// vector = f(vector, scalar), e.g. detail::Add
+#define HWY_SVE_RETV_ARGVS(BASE, CHAR, BITS, NAME, OP)         \
+  HWY_API HWY_SVE_V(BASE, BITS)                                \
+      NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_T(BASE, BITS) b) { \
+    return v##OP##_##CHAR##BITS(a, b);                         \
+  }
+
+// vector = f(vector, vector), e.g. Add
+#define HWY_SVE_RETV_ARGVV(BASE, CHAR, BITS, NAME, OP)         \
+  HWY_API HWY_SVE_V(BASE, BITS)                                \
+      NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) { \
+    return v##OP##_vv_##CHAR##BITS(a, b);                      \
+  }
+
+// ================================================== INIT
+
+// ------------------------------ Lanes
+
+// WARNING: we want to query VLMAX/sizeof(T), but this actually changes VL!
+// vlenb is not exposed through intrinsics and vreadvl is not VLMAX.
+#define HWY_SVE_LANES(BASE, CHAR, BITS, NAME, OP) \
+  HWY_API size_t NAME(HWY_SVE_D(CHAR, BITS) /* d */) { return v##OP##BITS(); }
+
+HWY_SVE_FOREACH(HWY_SVE_LANES, Lanes, setvlmax_e)
+#undef HWY_SVE_LANES
+
+// ------------------------------ Zero
+
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGD, Zero, zero)
+
+template <class D>
+using VFromD = decltype(Zero(D()));
+
+// ------------------------------ Set
+// vector = f(d, scalar), e.g. Set
+#define HWY_SVE_SET(BASE, CHAR, BITS, NAME, OP)                  \
+  HWY_API HWY_SVE_V(BASE, BITS)                                  \
+      NAME(HWY_SVE_D(CHAR, BITS) d, HWY_SVE_T(BASE, BITS) arg) { \
+    (void)Lanes(d);                                              \
+    return v##OP##_##CHAR##BITS(arg);                            \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_SET, Set, mv_v_x)
+HWY_SVE_FOREACH_F(HWY_SVE_SET, Set, fmv_v_f)
+#undef HWY_SVE_SET
+
+// ------------------------------ Undefined
+
+HWY_SVE_FOREACH(HWY_SVE_RETV_ARGD, Undefined, undefined)
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+// u8: no change
+#define HWY_SVE_CAST_NOP(BASE, CHAR, BITS, NAME, OP)                           \
+  HWY_API HWY_SVE_V(BASE, BITS) BitCastToByte(HWY_SVE_V(BASE, BITS) v) {       \
+    return v;                                                                  \
+  }                                                                            \
+  HWY_API HWY_SVE_V(BASE, BITS) BitCastFromByte(HWY_SVE_D(CHAR, BITS) /* d */, \
+                                                HWY_SVE_V(BASE, BITS) v) {     \
+    return v;                                                                  \
+  }
+
+// Other integers
+#define HWY_SVE_CAST_UI(BASE, CHAR, BITS, NAME, OP)                   \
+  HWY_API vuint8m##_t BitCastToByte(HWY_SVE_V(BASE, BITS) v) {        \
+    return v##OP##_v_##CHAR##BITS##_u8m(v);                           \
+  }                                                                   \
+  HWY_API HWY_SVE_V(BASE, BITS)                                       \
+      BitCastFromByte(HWY_SVE_D(CHAR, BITS) /* d */, vuint8m##_t v) { \
+    return v##OP##_v_u8m##_##CHAR##BITS(v);                           \
+  }
+
+// Float: first cast to/from unsigned
+#define HWY_SVE_CAST_F(BASE, CHAR, BITS, NAME, OP)                    \
+  HWY_API vuint8m##_t BitCastToByte(HWY_SVE_V(BASE, BITS) v) {        \
+    return v##OP##_v_u##BITS##_u8m(v##OP##_v_f##BITS##_u##BITS(v));   \
+  }                                                                   \
+  HWY_API HWY_SVE_V(BASE, BITS)                                       \
+      BitCastFromByte(HWY_SVE_D(CHAR, BITS) /* d */, vuint8m##_t v) { \
+    return v##OP##_v_u##BITS##_f##BITS(v##OP##_v_u8m##_u##BITS(v));   \
+  }
+
+HWY_SVE_FOREACH_U08(HWY_SVE_CAST_NOP, _, _)
+HWY_SVE_FOREACH_I08(HWY_SVE_CAST_UI, _, reinterpret)
+HWY_SVE_FOREACH_UI16(HWY_SVE_CAST_UI, _, reinterpret)
+HWY_SVE_FOREACH_UI32(HWY_SVE_CAST_UI, _, reinterpret)
+HWY_SVE_FOREACH_UI64(HWY_SVE_CAST_UI, _, reinterpret)
+HWY_SVE_FOREACH_F(HWY_SVE_CAST_F, _, reinterpret)
+
+#undef HWY_SVE_CAST_NOP
+#undef HWY_SVE_CAST_UI
+#undef HWY_SVE_CAST_F
+
+}  // namespace detail
+
+template <class D, class FromV>
+HWY_API VFromD<D> BitCast(D d, FromV v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+namespace detail {
+
+template <class V, class DU = RebindToUnsigned<DFromV<V>>>
+HWY_API VFromD<DU> BitCastToUnsigned(V v) {
+  return BitCast(DU(), v);
+}
+
+}  // namespace detail
+
+// ------------------------------ Iota
+
+namespace detail {
+
+HWY_SVE_FOREACH_U(HWY_SVE_RETV_ARGD, Iota0, id_v)
+
+template <class D, class DU = RebindToUnsigned<D>>
+HWY_API VFromD<DU> Iota0(const D /*d*/) {
+  Lanes(DU());
+  return BitCastToUnsigned(Iota0(DU()));
+}
+
+}  // namespace detail
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGV, Not, not )
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Not(const V v) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), Not(BitCast(DU(), v)));
+}
+
+// ------------------------------ And
+
+// Non-vector version (ideally immediate) for use with Iota0
+namespace detail {
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVS, And, and_vx)
+}  // namespace detail
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVV, And, and)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V And(const V a, const V b) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), And(BitCast(DU(), a), BitCast(DU(), b)));
+}
+
+// ------------------------------ Or
+
+// Scalar argument plus mask. Used by VecFromMask.
+#define HWY_SVE_OR_MASK(BASE, CHAR, BITS, NAME, OP)                 \
+  HWY_API HWY_SVE_V(BASE, BITS)                                     \
+      NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_T(BASE, BITS) imm,      \
+           HWY_SVE_M(MLEN) mask, HWY_SVE_V(BASE, BITS) maskedoff) { \
+    return v##OP##_##CHAR##BITS##_m(mask, maskedoff, v, imm);       \
+  }
+
+namespace detail {
+HWY_SVE_FOREACH_UI(HWY_SVE_OR_MASK, Or, or_vx)
+}  // namespace detail
+
+#undef HWY_SVE_OR_MASK
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVV, Or, or)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Or(const V a, const V b) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), Or(BitCast(DU(), a), BitCast(DU(), b)));
+}
+
+// ------------------------------ Xor
+
+// Non-vector version (ideally immediate) for use with Iota0
+namespace detail {
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVS, Xor, xor_vx)
+}  // namespace detail
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVV, Xor, xor)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Xor(const V a, const V b) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), Xor(BitCast(DU(), a), BitCast(DU(), b)));
+}
+
+// ------------------------------ AndNot
+
+template <class V>
+HWY_API V AndNot(const V not_a, const V b) {
+  return And(Not(not_a), b);
+}
+
+// ------------------------------ CopySign
+
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGVV, CopySign, fsgnj)
+
+template <class V>
+HWY_API V CopySignToAbs(const V abs, const V sign) {
+  // TODO(janwas): separate handling for abs < 0 or same?
+  return CopySign(abs, sign);
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Add
+
+namespace detail {
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVS, Add, add_vx)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGVS, Add, fadd_vf)
+}  // namespace detail
+
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVV, Add, add)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGVV, Add, fadd)
+
+// ------------------------------ Sub
+HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGVV, Sub, sub)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGVV, Sub, fsub)
+
+// ------------------------------ SaturatedAdd
+
+HWY_SVE_FOREACH_U08(HWY_SVE_RETV_ARGVV, SaturatedAdd, saddu)
+HWY_SVE_FOREACH_U16(HWY_SVE_RETV_ARGVV, SaturatedAdd, saddu)
+
+HWY_SVE_FOREACH_I08(HWY_SVE_RETV_ARGVV, SaturatedAdd, sadd)
+HWY_SVE_FOREACH_I16(HWY_SVE_RETV_ARGVV, SaturatedAdd, sadd)
+
+// ------------------------------ SaturatedSub
+
+HWY_SVE_FOREACH_U08(HWY_SVE_RETV_ARGVV, SaturatedSub, ssubu)
+HWY_SVE_FOREACH_U16(HWY_SVE_RETV_ARGVV, SaturatedSub, ssubu)
+
+HWY_SVE_FOREACH_I08(HWY_SVE_RETV_ARGVV, SaturatedSub, ssub)
+HWY_SVE_FOREACH_I16(HWY_SVE_RETV_ARGVV, SaturatedSub, ssub)
+
+// ------------------------------ AverageRound
+
+// TODO(janwas): check vxrm rounding mode
+HWY_SVE_FOREACH_U08(HWY_SVE_RETV_ARGVV, AverageRound, aaddu)
+HWY_SVE_FOREACH_U16(HWY_SVE_RETV_ARGVV, AverageRound, aaddu)
+
+// ------------------------------ ShiftLeft[Same]
+
+// Intrinsics do not define .vi forms, so use .vx instead.
+#define HWY_SVE_SHIFT(BASE, CHAR, BITS, NAME, OP)                  \
+  template <int kBits>                                             \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) {    \
+    return v##OP##_vx_##CHAR##BITS(v, kBits);                      \
+  }                                                                \
+  HWY_API HWY_SVE_V(BASE, BITS)                                    \
+      NAME##Same(HWY_SVE_V(BASE, BITS) v, int bits) {              \
+    return v##OP##_vx_##CHAR##BITS(v, static_cast<uint8_t>(bits)); \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_SHIFT, ShiftLeft, sll)
+
+// ------------------------------ ShiftRight[Same]
+
+HWY_SVE_FOREACH_U(HWY_SVE_SHIFT, ShiftRight, srl)
+HWY_SVE_FOREACH_I(HWY_SVE_SHIFT, ShiftRight, sra)
+
+#undef HWY_SVE_SHIFT
+
+// ------------------------------ Shl
+#define HWY_SVE_SHIFT_VV(BASE, CHAR, BITS, NAME, OP)              \
+  HWY_API HWY_SVE_V(BASE, BITS)                                   \
+      NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_V(BASE, BITS) bits) { \
+    return v##OP##_vv_##CHAR##BITS(v, bits);                      \
+  }
+
+HWY_SVE_FOREACH_U(HWY_SVE_SHIFT_VV, Shl, sll)
+
+#define HWY_SVE_SHIFT_II(BASE, CHAR, BITS, NAME, OP)                    \
+  HWY_API HWY_SVE_V(BASE, BITS)                                         \
+      NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_V(BASE, BITS) bits) {       \
+    return v##OP##_vv_##CHAR##BITS(v, detail::BitCastToUnsigned(bits)); \
+  }
+
+HWY_SVE_FOREACH_I(HWY_SVE_SHIFT_II, Shl, sll)
+
+// ------------------------------ Shr
+
+HWY_SVE_FOREACH_U(HWY_SVE_SHIFT_VV, Shr, srl)
+HWY_SVE_FOREACH_I(HWY_SVE_SHIFT_II, Shr, sra)
+
+#undef HWY_SVE_SHIFT_II
+#undef HWY_SVE_SHIFT_VV
+
+// ------------------------------ Min
+
+HWY_SVE_FOREACH_U(HWY_SVE_RETV_ARGVV, Min, minu)
+HWY_SVE_FOREACH_I(HWY_SVE_RETV_ARGVV, Min, min)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGVV, Min, fmin)
+
+// ------------------------------ Max
+
+namespace detail {
+
+HWY_SVE_FOREACH_U(HWY_SVE_RETV_ARGVS, Max, maxu_vx)
+HWY_SVE_FOREACH_I(HWY_SVE_RETV_ARGVS, Max, max_vx)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGVS, Max, fmax_vf)
+
+}  // namespace detail
+
+HWY_SVE_FOREACH_U(HWY_SVE_RETV_ARGVV, Max, maxu)
+HWY_SVE_FOREACH_I(HWY_SVE_RETV_ARGVV, Max, max)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGVV, Max, fmax)
+
+// ------------------------------ Mul
+
+HWY_SVE_FOREACH_UI16(HWY_SVE_RETV_ARGVV, Mul, mul)
+HWY_SVE_FOREACH_UI32(HWY_SVE_RETV_ARGVV, Mul, mul)
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGVV, Mul, fmul)
+
+// ------------------------------ MulHigh
+
+HWY_SVE_FOREACH_U16(HWY_SVE_RETV_ARGVV, MulHigh, mulhu)
+HWY_SVE_FOREACH_I16(HWY_SVE_RETV_ARGVV, MulHigh, mulh)
+
+// ------------------------------ Div
+
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGVV, Div, fdiv)
+
+// ------------------------------ ApproximateReciprocal
+
+// TODO(janwas): not yet supported in intrinsics
+template <class V>
+HWY_API V ApproximateReciprocal(const V v) {
+  return Set(DFromV<V>(), 1) / v;
+}
+// HWY_SVE_FOREACH_F32(HWY_SVE_RETV_ARGV, ApproximateReciprocal, frece7)
+
+// ------------------------------ Sqrt
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGV, Sqrt, fsqrt)
+
+// ------------------------------ ApproximateReciprocalSqrt
+
+// TODO(janwas): not yet supported in intrinsics
+template <class V>
+HWY_API V ApproximateReciprocalSqrt(const V v) {
+  return ApproximateReciprocal(Sqrt(v));
+}
+// HWY_SVE_FOREACH_F32(HWY_SVE_RETV_ARGV, ApproximateReciprocalSqrt, frsqrte7)
+
+// ------------------------------ MulAdd
+// Note: op is still named vv, not vvv.
+#define HWY_SVE_FMA(BASE, CHAR, BITS, NAME, OP)                \
+  HWY_API HWY_SVE_V(BASE, BITS)                                \
+      NAME(HWY_SVE_V(BASE, BITS) mul, HWY_SVE_V(BASE, BITS) x, \
+           HWY_SVE_V(BASE, BITS) add) {                        \
+    return v##OP##_vv_##CHAR##BITS(add, mul, x);               \
+  }
+
+HWY_SVE_FOREACH_F(HWY_SVE_FMA, MulAdd, fmacc)
+
+// ------------------------------ NegMulAdd
+HWY_SVE_FOREACH_F(HWY_SVE_FMA, NegMulAdd, fnmsac)
+
+// ------------------------------ MulSub
+HWY_SVE_FOREACH_F(HWY_SVE_FMA, MulSub, fmsac)
+
+// ------------------------------ NegMulSub
+HWY_SVE_FOREACH_F(HWY_SVE_FMA, NegMulSub, fnmacc)
+
+#undef HWY_SVE_FMA
+
+// ================================================== COMPARE
+
+// Comparisons set a mask bit to 1 if the condition is true, else 0. The XX in
+// vboolXX_t is a power of two divisor for vector bits. SLEN 8 / LMUL 1 = 1/8th
+// of all bits; SLEN 8 / LMUL 4 = half of all bits.
+
+// mask = f(vector, vector)
+#define HWY_SVE_RETM_ARGVV(BASE, CHAR, BITS, NAME, OP)         \
+  HWY_API HWY_SVE_M(MLEN)                                      \
+      NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) { \
+    (void)Lanes(DFromV<decltype(a)>());                        \
+    return v##OP##_vv_##CHAR##BITS##_b##MLEN(a, b);            \
+  }
+
+// ------------------------------ Eq
+HWY_SVE_FOREACH_UI(HWY_SVE_RETM_ARGVV, Eq, mseq)
+HWY_SVE_FOREACH_F(HWY_SVE_RETM_ARGVV, Eq, mfeq)
+
+// ------------------------------ Ne
+HWY_SVE_FOREACH_UI(HWY_SVE_RETM_ARGVV, Ne, msne)
+HWY_SVE_FOREACH_F(HWY_SVE_RETM_ARGVV, Ne, mfne)
+
+// ------------------------------ Lt
+HWY_SVE_FOREACH_I(HWY_SVE_RETM_ARGVV, Lt, mslt)
+HWY_SVE_FOREACH_F(HWY_SVE_RETM_ARGVV, Lt, mflt)
+
+// ------------------------------ Gt
+
+template <class V>
+HWY_API auto Gt(const V a, const V b) -> decltype(Lt(a, b)) {
+  return Lt(b, a);
+}
+
+// ------------------------------ Le
+HWY_SVE_FOREACH_F(HWY_SVE_RETM_ARGVV, Le, mfle)
+
+#undef HWY_SVE_RETM_ARGVV
+
+// ------------------------------ Ge
+
+template <class V>
+HWY_API auto Ge(const V a, const V b) -> decltype(Le(a, b)) {
+  return Le(b, a);
+}
+
+// ------------------------------ TestBit
+
+template <class V>
+HWY_API auto TestBit(const V a, const V bit) -> decltype(Eq(a, bit)) {
+  return Ne(And(a, bit), Zero(DFromV<V>()));
+}
+
+// ------------------------------ Not
+
+// mask = f(mask)
+#define HWY_SVE_RETM_ARGM(MLEN, NAME, OP)           \
+  HWY_API HWY_SVE_M(MLEN) NAME(HWY_SVE_M(MLEN) m) { \
+    return vm##OP##_m_b##MLEN(m);                   \
+  }
+
+HWY_SVE_FOREACH_B(HWY_SVE_RETM_ARGM, Not, not )
+
+#undef HWY_SVE_RETM_ARGM
+
+// ------------------------------ And
+
+// mask = f(mask_a, mask_b) (note arg2,arg1 order!)
+#define HWY_SVE_RETM_ARGMM(MLEN, NAME, OP)                             \
+  HWY_API HWY_SVE_M(MLEN) NAME(HWY_SVE_M(MLEN) a, HWY_SVE_M(MLEN) b) { \
+    return vm##OP##_mm_b##MLEN(b, a);                                  \
+  }
+
+HWY_SVE_FOREACH_B(HWY_SVE_RETM_ARGMM, And, and)
+
+// ------------------------------ AndNot
+HWY_SVE_FOREACH_B(HWY_SVE_RETM_ARGMM, AndNot, andnot)
+
+// ------------------------------ Or
+HWY_SVE_FOREACH_B(HWY_SVE_RETM_ARGMM, Or, or)
+
+// ------------------------------ Xor
+HWY_SVE_FOREACH_B(HWY_SVE_RETM_ARGMM, Xor, xor)
+
+#undef HWY_SVE_RETM_ARGMM
+
+// ------------------------------ IfThenElse
+#define HWY_SVE_IF_THEN_ELSE(BASE, CHAR, BITS, NAME, OP) \
+  HWY_API HWY_SVE_V(BASE, BITS)                          \
+      NAME(HWY_SVE_M(MLEN) m, HWY_SVE_V(BASE, BITS) yes, \
+           HWY_SVE_V(BASE, BITS) no) {                   \
+    return v##OP##_vvm_##CHAR##BITS(m, no, yes);         \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_IF_THEN_ELSE, IfThenElse, merge)
+
+#undef HWY_SVE_IF_THEN_ELSE
+// ------------------------------ IfThenElseZero
+
+template <class M, class V>
+HWY_API V IfThenElseZero(const M mask, const V yes) {
+  return IfThenElse(mask, yes, Zero(DFromV<V>()));
+}
+
+// ------------------------------ IfThenZeroElse
+
+template <class M, class V>
+HWY_API V IfThenZeroElse(const M mask, const V no) {
+  return IfThenElse(mask, Zero(DFromV<V>()), no);
+}
+
+// ------------------------------ MaskFromVec
+
+template <class V>
+HWY_API auto MaskFromVec(const V v) -> decltype(Eq(v, v)) {
+  return Ne(v, Zero(DFromV<V>()));
+}
+
+template <class D>
+using MFromD = decltype(MaskFromVec(Zero(D())));
+
+template <class D, typename MFrom>
+HWY_API MFromD<D> RebindMask(const D /*d*/, const MFrom mask) {
+  // No need to check lane size/LMUL are the same: if not, casting MFrom to
+  // MFromD<D> would fail.
+  return mask;
+}
+
+// ------------------------------ VecFromMask
+
+template <class D, HWY_IF_NOT_FLOAT_D(D)>
+HWY_API VFromD<D> VecFromMask(const D d, MFromD<D> mask) {
+  const auto v0 = Zero(d);
+  return detail::Or(v0, -1, mask, v0);
+}
+
+template <class D, HWY_IF_FLOAT_D(D)>
+HWY_API VFromD<D> VecFromMask(const D d, MFromD<D> mask) {
+  return BitCast(d, VecFromMask(RebindToUnsigned<D>(), mask));
+}
+
+// ------------------------------ ZeroIfNegative
+
+template <class V>
+HWY_API V ZeroIfNegative(const V v) {
+  const auto v0 = Zero(DFromV<V>());
+  // We already have a zero constant, so avoid IfThenZeroElse.
+  return IfThenElse(Lt(v, v0), v0, v);
+}
+
+// ------------------------------ BroadcastSignBit
+
+template <class V>
+HWY_API V BroadcastSignBit(const V v) {
+  return ShiftRight<sizeof(TFromV<V>) * 8 - 1>(v);
+}
+
+// ------------------------------ AllFalse
+
+#define HWY_SVE_ALL_FALSE(MLEN, NAME, OP)          \
+  HWY_API bool AllFalse(const HWY_SVE_M(MLEN) m) { \
+    return vfirst_m_b##MLEN(m) < 0;                \
+  }
+HWY_SVE_FOREACH_B(HWY_SVE_ALL_FALSE, _, _)
+#undef HWY_SVE_ALL_FALSE
+
+// ------------------------------ AllTrue
+
+#define HWY_SVE_ALL_TRUE(MLEN, NAME, OP)    \
+  HWY_API bool AllTrue(HWY_SVE_M(MLEN) m) { \
+    return AllFalse(vmnot_m_b##MLEN(m));    \
+  }
+HWY_SVE_FOREACH_B(HWY_SVE_ALL_TRUE, _, _)
+#undef HWY_SVE_ALL_TRUE
+
+// ------------------------------ CountTrue
+
+#define HWY_SVE_COUNT_TRUE(MLEN, NAME, OP) \
+  HWY_API size_t CountTrue(HWY_SVE_M(MLEN) m) { return vpopc_m_b##MLEN(m); }
+HWY_SVE_FOREACH_B(HWY_SVE_COUNT_TRUE, _, _)
+#undef HWY_SVE_COUNT_TRUE
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+#define HWY_SVE_LOAD(BASE, CHAR, BITS, NAME, OP)                               \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(                                          \
+      HWY_SVE_D(CHAR, BITS) d, const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT p) { \
+    (void)Lanes(d);                                                            \
+    return v##OP##BITS##_v_##CHAR##BITS(p);                                    \
+  }
+HWY_SVE_FOREACH(HWY_SVE_LOAD, Load, le)
+#undef HWY_SVE_LOAD
+
+// Partial load
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API VFromD<Simd<T, N>> Load(Simd<T, N> d, const T* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// ------------------------------ LoadU
+
+// SVE only requires lane alignment, not natural alignment of the entire vector.
+template <class D>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// ------------------------------ Store
+
+#define HWY_SVE_RET_ARGVDP(BASE, CHAR, BITS, NAME, OP)                \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_D(CHAR, BITS) d, \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT p) {         \
+    (void)Lanes(d);                                                   \
+    return v##OP##BITS##_v_##CHAR##BITS(p, v);                        \
+  }
+HWY_SVE_FOREACH(HWY_SVE_RET_ARGVDP, Store, se)
+#undef HWY_SVE_RET_ARGVDP
+
+// ------------------------------ StoreU
+
+// SVE only requires lane alignment, not natural alignment of the entire vector.
+template <class V, class D>
+HWY_API void StoreU(const V v, D d, TFromD<D>* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+// ------------------------------ Stream
+
+template <class V, class D, typename T>
+HWY_API void Stream(const V v, D d, T* HWY_RESTRICT aligned) {
+  Store(v, d, aligned);
+}
+
+// ------------------------------ ScatterOffset
+
+#define HWY_SVE_SCATTER(BASE, CHAR, BITS, NAME, OP)                         \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_D(CHAR, BITS) /* d */, \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT base,              \
+                    HWY_SVE_V(int, BITS) offset) {                          \
+    return v##OP##ei##BITS##_v_##CHAR##BITS(                                \
+        base, detail::BitCastToUnsigned(offset), v);                        \
+  }
+HWY_SVE_FOREACH(HWY_SVE_SCATTER, ScatterOffset, sx)
+#undef HWY_SVE_SCATTER
+
+// ------------------------------ ScatterIndex
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 4)>
+HWY_API void ScatterIndex(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT base,
+                          const VFromD<RebindToSigned<D>> index) {
+  return ScatterOffset(v, d, base, ShiftLeft<2>(index));
+}
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 8)>
+HWY_API void ScatterIndex(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT base,
+                          const VFromD<RebindToSigned<D>> index) {
+  return ScatterOffset(v, d, base, ShiftLeft<3>(index));
+}
+
+// ------------------------------ GatherOffset
+
+#define HWY_SVE_GATHER(BASE, CHAR, BITS, NAME, OP)          \
+  HWY_API HWY_SVE_V(BASE, BITS)                             \
+      NAME(HWY_SVE_D(CHAR, BITS) /* d */,                   \
+           const HWY_SVE_T(BASE, BITS) * HWY_RESTRICT base, \
+           HWY_SVE_V(int, BITS) offset) {                   \
+    return v##OP##ei##BITS##_v_##CHAR##BITS(                \
+        base, detail::BitCastToUnsigned(offset));           \
+  }
+HWY_SVE_FOREACH(HWY_SVE_GATHER, GatherOffset, lx)
+#undef HWY_SVE_GATHER
+
+// ------------------------------ GatherIndex
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 4)>
+HWY_API VFromD<D> GatherIndex(D d, const TFromD<D>* HWY_RESTRICT base,
+                              const VFromD<RebindToSigned<D>> index) {
+  return GatherOffset(d, base, ShiftLeft<2>(index));
+}
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 8)>
+HWY_API VFromD<D> GatherIndex(D d, const TFromD<D>* HWY_RESTRICT base,
+                              const VFromD<RebindToSigned<D>> index) {
+  return GatherOffset(d, base, ShiftLeft<3>(index));
+}
+
+// ------------------------------ StoreInterleaved3
+
+#define HWY_SVE_STORE3(BASE, CHAR, BITS, NAME, OP)                          \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b,       \
+                    HWY_SVE_V(BASE, BITS) c, HWY_SVE_D(CHAR, BITS) /* d */, \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT unaligned) {       \
+    const v##BASE##BITS##x3_t triple = vcreate_##CHAR##BITS##x3(a, b, c);   \
+    return v##OP##e8_v_##CHAR##BITS##x3(unaligned, triple);                 \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_SVE_STORE3(uint, u, 8, 1, 8, StoreInterleaved3, sseg3)
+HWY_SVE_STORE3(uint, u, 8, 2, 4, StoreInterleaved3, sseg3)
+
+#undef HWY_SVE_STORE3
+
+// ------------------------------ StoreInterleaved4
+
+#define HWY_SVE_STORE4(BASE, CHAR, BITS, NAME, OP)                             \
+  HWY_API void NAME(HWY_SVE_V(BASE, BITS) v0, HWY_SVE_V(BASE, BITS) v1,        \
+                    HWY_SVE_V(BASE, BITS) v2, HWY_SVE_V(BASE, BITS) v3,        \
+                    HWY_SVE_D(CHAR, BITS) /* d */,                             \
+                    HWY_SVE_T(BASE, BITS) * HWY_RESTRICT aligned) {            \
+    const v##BASE##BITS##x4_t quad = vcreate_##CHAR##BITS##x4(v0, v1, v2, v3); \
+    return v##OP##e8_v_##CHAR##BITS##x4(aligned, quad);                        \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_SVE_STORE4(uint, u, 8, 1, 8, StoreInterleaved4, sseg4)
+HWY_SVE_STORE4(uint, u, 8, 2, 4, StoreInterleaved4, sseg4)
+
+#undef HWY_SVE_STORE4
+
+// ================================================== CONVERT
+
+// ------------------------------ PromoteTo U
+
+HWY_API Vu16m2 PromoteTo(Du16m2 /* d */, Vu8m1 v) { return vzext_vf2_u16m2(v); }
+HWY_API Vu16m4 PromoteTo(Du16m4 /* d */, Vu8m2 v) { return vzext_vf2_u16m4(v); }
+HWY_API Vu16m8 PromoteTo(Du16m8 /* d */, Vu8m4 v) { return vzext_vf2_u16m8(v); }
+
+HWY_API Vu32m4 PromoteTo(Du32m4 /* d */, Vu8m1 v) { return vzext_vf4_u32m4(v); }
+HWY_API Vu32m8 PromoteTo(Du32m8 /* d */, Vu8m2 v) { return vzext_vf4_u32m8(v); }
+
+HWY_API Vu32m2 PromoteTo(Du32m2 /* d */, const Vu16m1 v) {
+  return vzext_vf2_u32m2(v);
+}
+HWY_API Vu32m4 PromoteTo(Du32m4 /* d */, const Vu16m2 v) {
+  return vzext_vf2_u32m4(v);
+}
+HWY_API Vu32m8 PromoteTo(Du32m8 /* d */, const Vu16m4 v) {
+  return vzext_vf2_u32m8(v);
+}
+
+HWY_API Vu64m2 PromoteTo(Du64m2 /* d */, const Vu32m1 v) {
+  return vzext_vf2_u64m2(v);
+}
+HWY_API Vu64m4 PromoteTo(Du64m4 /* d */, const Vu32m2 v) {
+  return vzext_vf2_u64m4(v);
+}
+HWY_API Vu64m8 PromoteTo(Du64m8 /* d */, const Vu32m4 v) {
+  return vzext_vf2_u64m8(v);
+}
+
+template <size_t N>
+HWY_API VFromD<Simd<int16_t, N>> PromoteTo(Simd<int16_t, N> d,
+                                           VFromD<Simd<uint8_t, N>> v) {
+  return BitCast(d, PromoteTo(Simd<uint16_t, N>(), v));
+}
+
+template <size_t N>
+HWY_API VFromD<Simd<int32_t, N>> PromoteTo(Simd<int32_t, N> d,
+                                           VFromD<Simd<uint8_t, N>> v) {
+  return BitCast(d, PromoteTo(Simd<uint32_t, N>(), v));
+}
+
+template <size_t N>
+HWY_API VFromD<Simd<int32_t, N>> PromoteTo(Simd<int32_t, N> d,
+                                           VFromD<Simd<uint16_t, N>> v) {
+  return BitCast(d, PromoteTo(Simd<uint32_t, N>(), v));
+}
+
+// ------------------------------ PromoteTo I
+
+HWY_API Vi16m2 PromoteTo(Di16m2 /* d */, Vi8m1 v) { return vsext_vf2_i16m2(v); }
+HWY_API Vi16m4 PromoteTo(Di16m4 /* d */, Vi8m2 v) { return vsext_vf2_i16m4(v); }
+HWY_API Vi16m8 PromoteTo(Di16m8 /* d */, Vi8m4 v) { return vsext_vf2_i16m8(v); }
+
+HWY_API Vi32m4 PromoteTo(Di32m4 /* d */, Vi8m1 v) { return vsext_vf4_i32m4(v); }
+HWY_API Vi32m8 PromoteTo(Di32m8 /* d */, Vi8m2 v) { return vsext_vf4_i32m8(v); }
+
+HWY_API Vi32m2 PromoteTo(Di32m2 /* d */, const Vi16m1 v) {
+  return vsext_vf2_i32m2(v);
+}
+HWY_API Vi32m4 PromoteTo(Di32m4 /* d */, const Vi16m2 v) {
+  return vsext_vf2_i32m4(v);
+}
+HWY_API Vi32m8 PromoteTo(Di32m8 /* d */, const Vi16m4 v) {
+  return vsext_vf2_i32m8(v);
+}
+
+HWY_API Vi64m2 PromoteTo(Di64m2 /* d */, const Vi32m1 v) {
+  return vsext_vf2_i64m2(v);
+}
+HWY_API Vi64m4 PromoteTo(Di64m4 /* d */, const Vi32m2 v) {
+  return vsext_vf2_i64m4(v);
+}
+HWY_API Vi64m8 PromoteTo(Di64m8 /* d */, const Vi32m4 v) {
+  return vsext_vf2_i64m8(v);
+}
+
+// ------------------------------ PromoteTo F
+
+HWY_API Vf32m2 PromoteTo(Df32m2 /* d */, const Vf16m1 v) {
+  return vfwcvt_f_f_v_f32m2(v);
+}
+HWY_API Vf32m4 PromoteTo(Df32m4 /* d */, const Vf16m2 v) {
+  return vfwcvt_f_f_v_f32m4(v);
+}
+HWY_API Vf32m8 PromoteTo(Df32m8 /* d */, const Vf16m4 v) {
+  return vfwcvt_f_f_v_f32m8(v);
+}
+
+HWY_API Vf64m2 PromoteTo(Df64m2 /* d */, const Vf32m1 v) {
+  return vfwcvt_f_f_v_f64m2(v);
+}
+HWY_API Vf64m4 PromoteTo(Df64m4 /* d */, const Vf32m2 v) {
+  return vfwcvt_f_f_v_f64m4(v);
+}
+HWY_API Vf64m8 PromoteTo(Df64m8 /* d */, const Vf32m4 v) {
+  return vfwcvt_f_f_v_f64m8(v);
+}
+
+HWY_API Vf64m2 PromoteTo(Df64m2 /* d */, const Vi32m1 v) {
+  return vfwcvt_f_x_v_f64m2(v);
+}
+HWY_API Vf64m4 PromoteTo(Df64m4 /* d */, const Vi32m2 v) {
+  return vfwcvt_f_x_v_f64m4(v);
+}
+HWY_API Vf64m8 PromoteTo(Df64m8 /* d */, const Vi32m4 v) {
+  return vfwcvt_f_x_v_f64m8(v);
+}
+
+// ------------------------------ DemoteTo U
+
+// First clamp negative numbers to zero to match x86 packus.
+HWY_API Vu16m1 DemoteTo(Du16m1 /* d */, const Vi32m2 v) {
+  return vnclipu_wx_u16m1(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+HWY_API Vu16m2 DemoteTo(Du16m2 /* d */, const Vi32m4 v) {
+  return vnclipu_wx_u16m2(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+HWY_API Vu16m4 DemoteTo(Du16m4 /* d */, const Vi32m8 v) {
+  return vnclipu_wx_u16m4(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+
+HWY_API Vu8m1 DemoteTo(Du8m1 /* d */, const Vi32m4 v) {
+  return vnclipu_wx_u8m1(DemoteTo(Du16m2(), v), 0);
+}
+HWY_API Vu8m2 DemoteTo(Du8m2 /* d */, const Vi32m8 v) {
+  return vnclipu_wx_u8m2(DemoteTo(Du16m4(), v), 0);
+}
+
+HWY_API Vu8m1 DemoteTo(Du8m1 /* d */, const Vi16m2 v) {
+  return vnclipu_wx_u8m1(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+HWY_API Vu8m2 DemoteTo(Du8m2 /* d */, const Vi16m4 v) {
+  return vnclipu_wx_u8m2(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+HWY_API Vu8m4 DemoteTo(Du8m4 /* d */, const Vi16m8 v) {
+  return vnclipu_wx_u8m4(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+
+HWY_API Vu8m1 U8FromU32(const Vu32m4 v) {
+  return vnclipu_wx_u8m1(vnclipu_wx_u16m2(v, 0), 0);
+}
+HWY_API Vu8m2 U8FromU32(const Vu32m8 v) {
+  return vnclipu_wx_u8m2(vnclipu_wx_u16m4(v, 0), 0);
+}
+
+// ------------------------------ DemoteTo I
+
+HWY_API Vi8m1 DemoteTo(Di8m1 /* d */, const Vi16m2 v) {
+  return vnclip_wx_i8m1(v, 0);
+}
+HWY_API Vi8m2 DemoteTo(Di8m2 /* d */, const Vi16m4 v) {
+  return vnclip_wx_i8m2(v, 0);
+}
+HWY_API Vi8m4 DemoteTo(Di8m4 /* d */, const Vi16m8 v) {
+  return vnclip_wx_i8m4(v, 0);
+}
+
+HWY_API Vi16m1 DemoteTo(Di16m1 /* d */, const Vi32m2 v) {
+  return vnclip_wx_i16m1(v, 0);
+}
+HWY_API Vi16m2 DemoteTo(Di16m2 /* d */, const Vi32m4 v) {
+  return vnclip_wx_i16m2(v, 0);
+}
+HWY_API Vi16m4 DemoteTo(Di16m4 /* d */, const Vi32m8 v) {
+  return vnclip_wx_i16m4(v, 0);
+}
+
+HWY_API Vi8m1 DemoteTo(Di8m1 d, const Vi32m4 v) {
+  return DemoteTo(d, DemoteTo(Di16m2(), v));
+}
+HWY_API Vi8m2 DemoteTo(Di8m2 d, const Vi32m8 v) {
+  return DemoteTo(d, DemoteTo(Di16m4(), v));
+}
+
+// ------------------------------ DemoteTo F
+
+HWY_API Vf16m1 DemoteTo(Df16m1 /* d */, const Vf32m2 v) {
+  return vfncvt_rod_f_f_w_f16m1(v);
+}
+HWY_API Vf16m2 DemoteTo(Df16m2 /* d */, const Vf32m4 v) {
+  return vfncvt_rod_f_f_w_f16m2(v);
+}
+HWY_API Vf16m4 DemoteTo(Df16m4 /* d */, const Vf32m8 v) {
+  return vfncvt_rod_f_f_w_f16m4(v);
+}
+
+HWY_API Vf32m1 DemoteTo(Df32m1 /* d */, const Vf64m2 v) {
+  return vfncvt_rod_f_f_w_f32m1(v);
+}
+HWY_API Vf32m2 DemoteTo(Df32m2 /* d */, const Vf64m4 v) {
+  return vfncvt_rod_f_f_w_f32m2(v);
+}
+HWY_API Vf32m4 DemoteTo(Df32m4 /* d */, const Vf64m8 v) {
+  return vfncvt_rod_f_f_w_f32m4(v);
+}
+
+HWY_API Vi32m1 DemoteTo(Di32m1 /* d */, const Vf64m2 v) {
+  return vfncvt_rtz_x_f_w_i32m1(v);
+}
+HWY_API Vi32m2 DemoteTo(Di32m2 /* d */, const Vf64m4 v) {
+  return vfncvt_rtz_x_f_w_i32m2(v);
+}
+HWY_API Vi32m4 DemoteTo(Di32m4 /* d */, const Vf64m8 v) {
+  return vfncvt_rtz_x_f_w_i32m4(v);
+}
+
+// ------------------------------ ConvertTo F
+
+#define HWY_SVE_CONVERT(BASE, CHAR, BITS, NAME, OP)                      \
+  HWY_API HWY_SVE_V(BASE, BITS)                                          \
+      ConvertTo(HWY_SVE_D(CHAR, BITS) /* d */, HWY_SVE_V(int, BITS) v) { \
+    return vfcvt_f_x_v_f##BITS(v);                                       \
+  }                                                                      \
+  /* Truncates (rounds toward zero). */                                  \
+  HWY_API HWY_SVE_V(int, BITS)                                           \
+      ConvertTo(HWY_SVE_D(i, BITS) /* d */, HWY_SVE_V(BASE, BITS) v) {   \
+    return vfcvt_rtz_x_f_v_i##BITS(v);                                   \
+  }                                                                      \
+  /* Uses default rounding mode. */                                      \
+  HWY_API HWY_SVE_V(int, BITS) NearestInt(HWY_SVE_V(BASE, BITS) v) {     \
+    return vfcvt_x_f_v_i##BITS(v);                                       \
+  }
+
+// API only requires f32 but we provide f64 for internal use (otherwise, it
+// seems difficult to implement Iota without a _mf2 vector half).
+HWY_SVE_FOREACH_F(HWY_SVE_CONVERT, _, _)
+#undef HWY_SVE_CONVERT
+
+// ================================================== SWIZZLE
+
+// ------------------------------ Compress
+
+#define HWY_SVE_COMPRESS(BASE, CHAR, BITS, NAME, OP)        \
+  HWY_API HWY_SVE_V(BASE, BITS)                             \
+      NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_M(MLEN) mask) { \
+    return v##OP##_vm_##CHAR##BITS(mask, v, v);             \
+  }
+
+HWY_SVE_FOREACH_UI16(HWY_SVE_COMPRESS, Compress, compress)
+HWY_SVE_FOREACH_UI32(HWY_SVE_COMPRESS, Compress, compress)
+HWY_SVE_FOREACH_UI64(HWY_SVE_COMPRESS, Compress, compress)
+HWY_SVE_FOREACH_F(HWY_SVE_COMPRESS, Compress, compress)
+#undef HWY_SVE_COMPRESS
+
+// ------------------------------ CompressStore
+
+template <class V, class M, class D>
+HWY_API size_t CompressStore(const V v, const M mask, const D d,
+                             TFromD<D>* HWY_RESTRICT aligned) {
+  Store(Compress(v, mask), d, aligned);
+  return CountTrue(mask);
+}
+
+// ------------------------------ TableLookupLanes
+
+template <class D, class DU = RebindToUnsigned<D>>
+HWY_API VFromD<DU> SetTableIndices(D d, const TFromD<DU>* idx) {
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER)
+  const size_t N = Lanes(d);
+  for (size_t i = 0; i < N; ++i) {
+    HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast<TFromD<DU>>(N));
+  }
+#endif
+  return Load(DU(), idx);
+}
+
+// <32bit are not part of Highway API, but used in Broadcast. This limits VLMAX
+// to 2048! We could instead use vrgatherei16.
+#define HWY_SVE_TABLE(BASE, CHAR, BITS, NAME, OP)                \
+  HWY_API HWY_SVE_V(BASE, BITS)                                  \
+      NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_V(uint, BITS) idx) { \
+    return v##OP##_vv_##CHAR##BITS(v, idx);                      \
+  }
+
+HWY_SVE_FOREACH(HWY_SVE_TABLE, TableLookupLanes, rgather)
+#undef HWY_SVE_TABLE
+
+// ------------------------------ Shuffle01
+
+template <class V>
+HWY_API V Shuffle01(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 8, "Defined for 64-bit types");
+  const auto idx = detail::Xor(detail::Iota0(D()), 1);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle2301
+
+template <class V>
+HWY_API V Shuffle2301(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  const auto idx = detail::Xor(detail::Iota0(D()), 1);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle1032
+
+template <class V>
+HWY_API V Shuffle1032(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  const auto idx = detail::Xor(detail::Iota0(D()), 2);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle0123
+
+template <class V>
+HWY_API V Shuffle0123(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  const auto idx = detail::Xor(detail::Iota0(D()), 3);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle2103
+
+template <class V>
+HWY_API V Shuffle2103(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  // This shuffle is a rotation. We can compute subtraction modulo 4 (number of
+  // lanes per 128-bit block) via bitwise ops.
+  const auto i = detail::Xor(detail::Iota0(D()), 1);
+  const auto lsb = detail::And(i, 1);
+  const auto borrow = Add(lsb, lsb);
+  const auto idx = Xor(i, borrow);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle0321
+
+template <class V>
+HWY_API V Shuffle0321(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  // This shuffle is a rotation. We can compute subtraction modulo 4 (number of
+  // lanes per 128-bit block) via bitwise ops.
+  const auto i = detail::Xor(detail::Iota0(D()), 3);
+  const auto lsb = detail::And(i, 1);
+  const auto borrow = Add(lsb, lsb);
+  const auto idx = Xor(i, borrow);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ TableLookupBytes
+
+namespace detail {
+
+// For x86-compatible behaviour mandated by Highway API: TableLookupBytes
+// offsets are implicitly relative to the start of their 128-bit block.
+template <class D>
+constexpr size_t LanesPerBlock(D) {
+  return 16 / sizeof(TFromD<D>);
+}
+
+template <class D, class V>
+HWY_API V OffsetsOf128BitBlocks(const D d, const V iota0) {
+  using T = MakeUnsigned<TFromD<D>>;
+  return detail::And(iota0, static_cast<T>(~(LanesPerBlock(d) - 1)));
+}
+
+}  // namespace detail
+
+template <class V>
+HWY_API V TableLookupBytes(const V v, const V idx) {
+  using D = DFromV<V>;
+  const Repartition<uint8_t, D> d8;
+  const auto offsets128 = detail::OffsetsOf128BitBlocks(d8, detail::Iota0(d8));
+  const auto idx8 = Add(BitCast(d8, idx), offsets128);
+  return BitCast(D(), TableLookupLanes(BitCast(d8, v), idx8));
+}
+
+// ------------------------------ Broadcast
+
+template <int kLane, class V>
+HWY_API V Broadcast(const V v) {
+  const DFromV<V> d;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  static_assert(0 <= kLane && kLane < kLanesPerBlock, "Invalid lane");
+  auto idx = detail::OffsetsOf128BitBlocks(d, detail::Iota0(d));
+  if (kLane != 0) {
+    idx = detail::Add(idx, kLane);
+  }
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ GetLane
+
+#define HWY_SVE_GET_LANE(BASE, CHAR, BITS, NAME, OP)            \
+  HWY_API HWY_SVE_T(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) { \
+    return v##OP##_s_##CHAR##BITS##_##CHAR##BITS(v);            \
+  }
+
+HWY_SVE_FOREACH_UI(HWY_SVE_GET_LANE, GetLane, mv_x)
+HWY_SVE_FOREACH_F(HWY_SVE_GET_LANE, GetLane, fmv_f)
+#undef HWY_SVE_GET_LANE
+
+// ------------------------------ ShiftLeftLanes
+
+// vector = f(vector, vector, size_t)
+#define HWY_SVE_SLIDE(BASE, CHAR, BITS, NAME, OP)                           \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(                                       \
+      HWY_SVE_V(BASE, BITS) dst, HWY_SVE_V(BASE, BITS) src, size_t lanes) { \
+    return v##OP##_vx_##CHAR##BITS(dst, src, lanes);                        \
+  }
+
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_SLIDE, SlideUp, slideup)
+}  // namespace detail
+
+template <size_t kLanes, class V>
+HWY_API V ShiftLeftLanes(const V v) {
+  using D = DFromV<V>;
+  const RebindToSigned<D> di;
+  const auto shifted = detail::SlideUp(v, v, kLanes);
+  // Match x86 semantics by zeroing lower lanes in 128-bit blocks
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(di);
+  const auto idx_mod = detail::And(detail::Iota0(di), kLanesPerBlock - 1);
+  const auto clear = Lt(BitCast(di, idx_mod), Set(di, kLanes));
+  return IfThenZeroElse(clear, shifted);
+}
+
+// ------------------------------ ShiftLeftBytes
+
+template <int kBytes, class V>
+HWY_API V ShiftLeftBytes(const V v) {
+  using D = DFromV<V>;
+  const Repartition<uint8_t, D> d8;
+  Lanes(d8);
+  return BitCast(D(), ShiftLeftLanes<kBytes>(BitCast(d8, v)));
+}
+
+// ------------------------------ ShiftRightLanes
+
+namespace detail {
+HWY_SVE_FOREACH(HWY_SVE_SLIDE, SlideDown, slidedown)
+}  // namespace detail
+
+#undef HWY_SVE_SLIDE
+
+template <size_t kLanes, class V>
+HWY_API V ShiftRightLanes(const V v) {
+  using D = DFromV<V>;
+  const RebindToSigned<D> di;
+  const auto shifted = detail::SlideDown(v, v, kLanes);
+  // Match x86 semantics by zeroing upper lanes in 128-bit blocks
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(di);
+  const auto idx_mod = detail::And(detail::Iota0(di), kLanesPerBlock - 1);
+  const auto keep = Lt(BitCast(di, idx_mod), Set(di, kLanesPerBlock - kLanes));
+  return IfThenElseZero(keep, shifted);
+}
+
+// ------------------------------ ShiftRightBytes
+
+template <int kBytes, class V>
+HWY_API V ShiftRightBytes(const V v) {
+  using D = DFromV<V>;
+  const Repartition<uint8_t, D> d8;
+  Lanes(d8);
+  return BitCast(D(), ShiftRightLanes<kBytes>(BitCast(d8, v)));
+}
+
+// ------------------------------ OddEven
+
+template <class V>
+HWY_API V OddEven(const V a, const V b) {
+  const RebindToUnsigned<DFromV<V>> du;  // Iota0 is unsigned only
+  const auto is_even = Eq(detail::And(detail::Iota0(du), 1), Zero(du));
+  return IfThenElse(is_even, b, a);
+}
+
+// ------------------------------ ConcatUpperLower
+
+template <class V>
+HWY_API V ConcatUpperLower(const V hi, const V lo) {
+  const RebindToSigned<DFromV<V>> di;
+  const auto idx_half = Set(di, Lanes(di) / 2);
+  const auto is_lower_half = Lt(BitCast(di, detail::Iota0(di)), idx_half);
+  return IfThenElse(is_lower_half, lo, hi);
+}
+
+// ------------------------------ ConcatLowerLower
+
+template <class V>
+HWY_API V ConcatLowerLower(const V hi, const V lo) {
+  // Move lower half into upper
+  const auto hi_up = detail::SlideUp(hi, hi, Lanes(DFromV<V>()) / 2);
+  return ConcatUpperLower(hi_up, lo);
+}
+
+// ------------------------------ ConcatUpperUpper
+
+template <class V>
+HWY_API V ConcatUpperUpper(const V hi, const V lo) {
+  // Move upper half into lower
+  const auto lo_down = detail::SlideDown(lo, lo, Lanes(DFromV<V>()) / 2);
+  return ConcatUpperLower(hi, lo_down);
+}
+
+// ------------------------------ ConcatLowerUpper
+
+template <class V>
+HWY_API V ConcatLowerUpper(const V hi, const V lo) {
+  // Move half of both inputs to the other half
+  const auto hi_up = detail::SlideUp(hi, hi, Lanes(DFromV<V>()) / 2);
+  const auto lo_down = detail::SlideDown(lo, lo, Lanes(DFromV<V>()) / 2);
+  return ConcatUpperLower(hi_up, lo_down);
+}
+
+// ------------------------------ InterleaveLower
+
+template <class V>
+HWY_API V InterleaveLower(const V a, const V b) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  const auto i = detail::Iota0(d);
+  const auto idx_mod = ShiftRight<1>(detail::And(i, kLanesPerBlock - 1));
+  const auto idx = Add(idx_mod, detail::OffsetsOf128BitBlocks(d, i));
+  const auto is_even = Eq(detail::And(i, 1), Zero(du));
+  return IfThenElse(is_even, TableLookupLanes(a, idx),
+                    TableLookupLanes(b, idx));
+}
+
+// ------------------------------ InterleaveUpper
+
+template <class V>
+HWY_API V InterleaveUpper(const V a, const V b) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  const auto i = detail::Iota0(d);
+  const auto idx_mod = ShiftRight<1>(detail::And(i, kLanesPerBlock - 1));
+  const auto idx_lower = Add(idx_mod, detail::OffsetsOf128BitBlocks(d, i));
+  const auto idx = detail::Add(idx_lower, kLanesPerBlock / 2);
+  const auto is_even = Eq(detail::And(i, 1), Zero(du));
+  return IfThenElse(is_even, TableLookupLanes(a, idx),
+                    TableLookupLanes(b, idx));
+}
+
+// ------------------------------ ZipLower
+
+template <class V>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> ZipLower(const V a, const V b) {
+  RepartitionToWide<DFromV<V>> dw;
+  return BitCast(dw, InterleaveLower(a, b));
+}
+
+// ------------------------------ ZipUpper
+
+template <class V>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> ZipUpper(const V a, const V b) {
+  RepartitionToWide<DFromV<V>> dw;
+  return BitCast(dw, InterleaveUpper(a, b));
+}
+
+// ------------------------------ Combine
+
+// TODO(janwas): implement after LMUL ext/trunc
+#if 0
+
+template <class V>
+HWY_API V Combine(const V a, const V b) {
+  using D = DFromV<V>;
+  // double LMUL of inputs, then SlideUp with Lanes().
+}
+
+#endif
+
+// ================================================== REDUCE
+
+// vector = f(vector, zero_m1)
+#define HWY_SVE_REDUCE(BASE, CHAR, BITS, NAME, OP)                       \
+  HWY_API HWY_SVE_V(BASE, BITS)                                          \
+      NAME(HWY_SVE_V(BASE, BITS) v, HWY_SVE_V(BASE, BITS, 1) v0) {       \
+    vsetvlmax_e##BITS();                                                 \
+    return Set(                                                          \
+        HWY_SVE_D(CHAR, BITS)(),                                         \
+        GetLane(v##OP##_vs_##CHAR##BITS##_##CHAR##BITS##m1(v0, v, v0))); \
+  }
+
+// ------------------------------ SumOfLanes
+
+namespace detail {
+
+HWY_SVE_FOREACH_UI(HWY_SVE_REDUCE, RedSum, redsum)
+HWY_SVE_FOREACH_F(HWY_SVE_REDUCE, RedSum, fredsum)
+
+}  // namespace detail
+
+template <class V>
+HWY_API V SumOfLanes(const V v) {
+  using T = TFromV<V>;
+  const auto v0 = Zero(Simd<T, HWY_LANES(T)>());  // always m1
+  return detail::RedSum(v, v0);
+}
+
+// ------------------------------ MinOfLanes
+namespace detail {
+
+HWY_SVE_FOREACH_U(HWY_SVE_REDUCE, RedMin, redminu)
+HWY_SVE_FOREACH_I(HWY_SVE_REDUCE, RedMin, redmin)
+HWY_SVE_FOREACH_F(HWY_SVE_REDUCE, RedMin, fredmin)
+
+}  // namespace detail
+
+template <class V>
+HWY_API V MinOfLanes(const V v) {
+  using T = TFromV<V>;
+  const Simd<T, HWY_LANES(T)> d1;  // always m1
+  const auto neutral = Set(d1, HighestValue<T>());
+  return detail::RedMin(v, neutral);
+}
+
+// ------------------------------ MaxOfLanes
+namespace detail {
+
+HWY_SVE_FOREACH_U(HWY_SVE_REDUCE, RedMax, redmaxu)
+HWY_SVE_FOREACH_I(HWY_SVE_REDUCE, RedMax, redmax)
+HWY_SVE_FOREACH_F(HWY_SVE_REDUCE, RedMax, fredmax)
+
+}  // namespace detail
+
+template <class V>
+HWY_API V MaxOfLanes(const V v) {
+  using T = TFromV<V>;
+  const Simd<T, HWY_LANES(T)> d1;  // always m1
+  const auto neutral = Set(d1, LowestValue<T>());
+  return detail::RedMax(v, neutral);
+}
+
+#undef HWY_SVE_REDUCE
+
+// ================================================== Ops with dependencies
+
+// ------------------------------ LoadDup128
+
+template <class D>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* const HWY_RESTRICT p) {
+  // TODO(janwas): set VL
+  const auto loaded = Load(d, p);
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  // Broadcast the first block
+  const auto idx = detail::And(detail::Iota0(d), kLanesPerBlock - 1);
+  return TableLookupLanes(loaded, idx);
+}
+
+// ------------------------------ StoreMaskBits
+#define HWY_SVE_STORE_MASK_BITS(MLEN, NAME, OP)                 \
+  HWY_API size_t StoreMaskBits(HWY_SVE_M(MLEN) m, uint8_t* p) { \
+    /* LMUL=1 is always enough */                               \
+    Simd<uint8_t, HWY_LANES(uint8_t)> d8;                       \
+    const size_t num_bytes = (Lanes(d8) + MLEN - 1) / MLEN;     \
+    /* TODO(janwas): how to convert vbool* to vuint?*/          \
+    /*Store(m, d8, p);*/                                        \
+    (void)m;                                                    \
+    (void)p;                                                    \
+    return num_bytes;                                           \
+  }
+HWY_SVE_FOREACH_B(HWY_SVE_STORE_MASK_BITS, _, _)
+#undef HWY_SVE_STORE_MASK_BITS
+
+// ------------------------------ FirstN (Iota0, Lt, RebindMask, SlideUp)
+
+// Disallow for 8-bit because Iota is likely to overflow.
+template <class D, HWY_IF_NOT_LANE_SIZE_D(D, 1)>
+HWY_API MFromD<D> FirstN(const D d, const size_t n) {
+  const RebindToSigned<D> di;
+  return RebindMask(d, Lt(BitCast(di, detail::Iota0(d)), Set(di, n)));
+}
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 1)>
+HWY_API MFromD<D> FirstN(const D d, const size_t n) {
+  const auto zero = Zero(d);
+  const auto one = Set(d, 1);
+  return Eq(detail::SlideUp(one, zero, n), one);
+}
+
+// ------------------------------ Neg
+
+template <class V, HWY_IF_SIGNED_V(V)>
+HWY_API V Neg(const V v) {
+  return Sub(Zero(DFromV<V>()), v);
+}
+
+// vector = f(vector), but argument is repeated
+#define HWY_SVE_RETV_ARGV2(BASE, CHAR, BITS, NAME, OP)          \
+  HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) { \
+    return v##OP##_vv_##CHAR##BITS(v, v);                       \
+  }
+
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGV2, Neg, fsgnjn)
+
+// ------------------------------ Abs
+
+template <class V, HWY_IF_SIGNED_V(V)>
+HWY_API V Abs(const V v) {
+  return Max(v, Neg(v));
+}
+
+HWY_SVE_FOREACH_F(HWY_SVE_RETV_ARGV2, Abs, fsgnjx)
+
+#undef HWY_SVE_RETV_ARGV2
+
+// ------------------------------ AbsDiff
+
+template <class V>
+HWY_API V AbsDiff(const V a, const V b) {
+  return Abs(Sub(a, b));
+}
+
+// ------------------------------ Round
+
+// IEEE-754 roundToIntegralTiesToEven returns floating-point, but we do not have
+// a dedicated instruction for that. Rounding to integer and converting back to
+// float is correct except when the input magnitude is large, in which case the
+// input was already an integer (because mantissa >> exponent is zero).
+
+namespace detail {
+enum RoundingModes { kNear, kTrunc, kDown, kUp };
+
+template <class V>
+HWY_API auto UseInt(const V v) -> decltype(MaskFromVec(v)) {
+  return Lt(Abs(v), Set(DFromV<V>(), MantissaEnd<TFromV<V>>()));
+}
+
+}  // namespace detail
+
+template <class V>
+HWY_API V Round(const V v) {
+  const DFromV<V> df;
+
+  const auto integer = NearestInt(v);  // round using current mode
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), CopySign(int_f, v), v);
+}
+
+// ------------------------------ Trunc
+
+template <class V>
+HWY_API V Trunc(const V v) {
+  const DFromV<V> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), CopySign(int_f, v), v);
+}
+
+// ------------------------------ Ceil
+
+template <class V>
+HWY_API V Ceil(const V v) {
+  asm volatile("fsrm %0" ::"r"(detail::kUp));
+  const auto ret = Round(v);
+  asm volatile("fsrm %0" ::"r"(detail::kNear));
+  return ret;
+}
+
+// ------------------------------ Floor
+
+template <class V>
+HWY_API V Floor(const V v) {
+  asm volatile("fsrm %0" ::"r"(detail::kDown));
+  const auto ret = Round(v);
+  asm volatile("fsrm %0" ::"r"(detail::kNear));
+  return ret;
+}
+
+// ------------------------------ Iota
+
+template <class D, HWY_IF_UNSIGNED_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  return Add(detail::Iota0(d), Set(d, first));
+}
+
+template <class D, HWY_IF_SIGNED_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  const RebindToUnsigned<D> du;
+  return Add(BitCast(d, detail::Iota0(du)), Set(d, first));
+}
+
+template <class D, HWY_IF_FLOAT_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  const RebindToUnsigned<D> du;
+  const RebindToSigned<D> di;
+  return detail::Add(ConvertTo(d, BitCast(di, detail::Iota0(du))), first);
+}
+
+// ------------------------------ MulEven
+
+// Using vwmul does not work for m8, so use mulh instead. Highway only provides
+// MulHigh for 16-bit, so use a private wrapper.
+namespace detail {
+
+HWY_SVE_FOREACH_U32(HWY_SVE_RETV_ARGVV, MulHigh, mulhu)
+HWY_SVE_FOREACH_I32(HWY_SVE_RETV_ARGVV, MulHigh, mulh)
+
+}  // namespace detail
+
+template <class V>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> MulEven(const V a, const V b) {
+  const DFromV<V> d;
+  Lanes(d);
+  const auto lo = Mul(a, b);
+  const auto hi = detail::MulHigh(a, b);
+  const RepartitionToWide<DFromV<V>> dw;
+  return BitCast(dw, OddEven(detail::SlideUp(hi, hi, 1), lo));
+}
+
+// ================================================== END MACROS
+namespace detail {  // for code folding
+#undef HWY_IF_FLOAT_V
+#undef HWY_IF_SIGNED_V
+#undef HWY_IF_UNSIGNED_V
+
+#undef HWY_SVE_FOREACH
+#undef HWY_SVE_FOREACH_08
+#undef HWY_SVE_FOREACH_16
+#undef HWY_SVE_FOREACH_32
+#undef HWY_SVE_FOREACH_64
+#undef HWY_SVE_FOREACH_B
+#undef HWY_SVE_FOREACH_F
+#undef HWY_SVE_FOREACH_F32
+#undef HWY_SVE_FOREACH_F64
+#undef HWY_SVE_FOREACH_I
+#undef HWY_SVE_FOREACH_I08
+#undef HWY_SVE_FOREACH_I16
+#undef HWY_SVE_FOREACH_I32
+#undef HWY_SVE_FOREACH_I64
+#undef HWY_SVE_FOREACH_U
+#undef HWY_SVE_FOREACH_U08
+#undef HWY_SVE_FOREACH_U16
+#undef HWY_SVE_FOREACH_U32
+#undef HWY_SVE_FOREACH_U64
+#undef HWY_SVE_FOREACH_UI
+#undef HWY_SVE_FOREACH_UI16
+#undef HWY_SVE_FOREACH_UI32
+#undef HWY_SVE_FOREACH_UI64
+
+#undef HWY_SVE_RETV_ARGD
+#undef HWY_SVE_RETV_ARGV
+#undef HWY_SVE_RETV_ARGVS
+#undef HWY_SVE_RETV_ARGVV
+
+#undef HWY_SVE_T
+#undef HWY_SVE_D
+#undef HWY_SVE_V
+#undef HWY_SVE_M
+
+}  // namespace detail
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/rvv-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/rvv-inl.h
new file mode 100644
index 0000000000..6da8720995
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/rvv-inl.h
@@ -0,0 +1,1792 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// RISC-V V vectors (length not known at compile time).
+// External include guard in highway.h - see comment there.
+
+#include <riscv_vector.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hwy/base.h"
+#include "hwy/ops/shared-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <class V>
+struct DFromV_t {};  // specialized in macros
+template <class V>
+using DFromV = typename DFromV_t<V>::type;
+
+template <class V>
+using TFromV = TFromD<DFromV<V>>;
+
+#define HWY_IF_UNSIGNED_V(V) hwy::EnableIf<!IsSigned<TFromV<V>>()>* = nullptr
+#define HWY_IF_SIGNED_V(V) \
+  hwy::EnableIf<IsSigned<TFromV<V>>() && !IsFloat<TFromV<V>>()>* = nullptr
+#define HWY_IF_FLOAT_V(V) hwy::EnableIf<IsFloat<TFromV<V>>()>* = nullptr
+
+// kShift = log2 of multiplier: 0 for m1, 1 for m2, -2 for mf4
+template <typename T, int kShift = 0>
+using Full = Simd<T, (kShift < 0) ? (HWY_LANES(T) >> (-kShift))
+                                  : (HWY_LANES(T) << kShift)>;
+
+// ================================================== MACROS
+
+// Generate specializations and function definitions using X macros. Although
+// harder to read and debug, writing everything manually is too bulky.
+
+namespace detail {  // for code folding
+
+// For all mask sizes: (1/Nth of a register, one bit per lane)
+#define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP) \
+  X_MACRO(64, NAME, OP)                      \
+  X_MACRO(32, NAME, OP)                      \
+  X_MACRO(16, NAME, OP)                      \
+  X_MACRO(8, NAME, OP)                       \
+  X_MACRO(4, NAME, OP)                       \
+  X_MACRO(2, NAME, OP)                       \
+  X_MACRO(1, NAME, OP)
+
+// For given SEW, iterate over all LMUL. Precompute SEW/LMUL => MLEN because the
+// preprocessor cannot easily do it.
+// TODO(janwas): GCC does not yet support fractional LMUL
+#define HWY_RVV_FOREACH_08(X_MACRO, BASE, CHAR, NAME, OP)        \
+  X_MACRO(BASE, CHAR, 8, m1, /*kShift=*/0, /*MLEN=*/8, NAME, OP) \
+  X_MACRO(BASE, CHAR, 8, m2, /*kShift=*/1, /*MLEN=*/4, NAME, OP) \
+  X_MACRO(BASE, CHAR, 8, m4, /*kShift=*/2, /*MLEN=*/2, NAME, OP) \
+  X_MACRO(BASE, CHAR, 8, m8, /*kShift=*/3, /*MLEN=*/1, NAME, OP)
+
+#define HWY_RVV_FOREACH_16(X_MACRO, BASE, CHAR, NAME, OP)          \
+  X_MACRO(BASE, CHAR, 16, m1, /*kShift=*/0, /*MLEN=*/16, NAME, OP) \
+  X_MACRO(BASE, CHAR, 16, m2, /*kShift=*/1, /*MLEN=*/8, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 16, m4, /*kShift=*/2, /*MLEN=*/4, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 16, m8, /*kShift=*/3, /*MLEN=*/2, NAME, OP)
+
+#define HWY_RVV_FOREACH_32(X_MACRO, BASE, CHAR, NAME, OP)          \
+  X_MACRO(BASE, CHAR, 32, m1, /*kShift=*/0, /*MLEN=*/32, NAME, OP) \
+  X_MACRO(BASE, CHAR, 32, m2, /*kShift=*/1, /*MLEN=*/16, NAME, OP) \
+  X_MACRO(BASE, CHAR, 32, m4, /*kShift=*/2, /*MLEN=*/8, NAME, OP)  \
+  X_MACRO(BASE, CHAR, 32, m8, /*kShift=*/3, /*MLEN=*/4, NAME, OP)
+
+#define HWY_RVV_FOREACH_64(X_MACRO, BASE, CHAR, NAME, OP)          \
+  X_MACRO(BASE, CHAR, 64, m1, /*kShift=*/0, /*MLEN=*/64, NAME, OP) \
+  X_MACRO(BASE, CHAR, 64, m2, /*kShift=*/1, /*MLEN=*/32, NAME, OP) \
+  X_MACRO(BASE, CHAR, 64, m4, /*kShift=*/2, /*MLEN=*/16, NAME, OP) \
+  X_MACRO(BASE, CHAR, 64, m8, /*kShift=*/3, /*MLEN=*/8, NAME, OP)
+
+// SEW for unsigned:
+#define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_08(X_MACRO, uint, u, NAME, OP)
+#define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_16(X_MACRO, uint, u, NAME, OP)
+#define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_32(X_MACRO, uint, u, NAME, OP)
+#define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_64(X_MACRO, uint, u, NAME, OP)
+
+// SEW for signed:
+#define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_08(X_MACRO, int, i, NAME, OP)
+#define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_16(X_MACRO, int, i, NAME, OP)
+#define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_32(X_MACRO, int, i, NAME, OP)
+#define HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_64(X_MACRO, int, i, NAME, OP)
+
+// SEW for float:
+#define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_16(X_MACRO, float, f, NAME, OP)
+#define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_32(X_MACRO, float, f, NAME, OP)
+#define HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_64(X_MACRO, float, f, NAME, OP)
+
+// For all combinations of SEW:
+#define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP)
+
+#define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP)
+
+#define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP)
+
+// Commonly used type categories for a given SEW:
+#define HWY_RVV_FOREACH_UI16(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP)        \
+  HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP)
+
+#define HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP)        \
+  HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP)
+
+#define HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP)        \
+  HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP)
+
+// Commonly used type categories:
+#define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_U(X_MACRO, NAME, OP)        \
+  HWY_RVV_FOREACH_I(X_MACRO, NAME, OP)
+
+#define HWY_RVV_FOREACH(X_MACRO, NAME, OP) \
+  HWY_RVV_FOREACH_U(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_I(X_MACRO, NAME, OP)     \
+  HWY_RVV_FOREACH_F(X_MACRO, NAME, OP)
+
+// Assemble types for use in x-macros
+#define HWY_RVV_T(BASE, SEW) BASE##SEW##_t
+#define HWY_RVV_D(CHAR, SEW, LMUL) D##CHAR##SEW##LMUL
+#define HWY_RVV_V(BASE, SEW, LMUL) v##BASE##SEW##LMUL##_t
+#define HWY_RVV_M(MLEN) vbool##MLEN##_t
+
+}  // namespace detail
+
+// TODO(janwas): remove typedefs and only use HWY_RVV_V etc. directly
+
+// Until we have full intrinsic support for fractional LMUL, mixed-precision
+// code can use LMUL 1..8 (adequate unless they need many registers).
+#define HWY_SPECIALIZE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)    \
+  using HWY_RVV_D(CHAR, SEW, LMUL) = Full<HWY_RVV_T(BASE, SEW), SHIFT>; \
+  using V##CHAR##SEW##LMUL = HWY_RVV_V(BASE, SEW, LMUL);                \
+  template <>                                                           \
+  struct DFromV_t<HWY_RVV_V(BASE, SEW, LMUL)> {                         \
+    using Lane = HWY_RVV_T(BASE, SEW);                                  \
+    using type = Full<Lane, SHIFT>;                                     \
+  };
+using Vf16m1 = vfloat16m1_t;
+using Vf16m2 = vfloat16m2_t;
+using Vf16m4 = vfloat16m4_t;
+using Vf16m8 = vfloat16m8_t;
+using Df16m1 = Full<float16_t, 0>;
+using Df16m2 = Full<float16_t, 1>;
+using Df16m4 = Full<float16_t, 2>;
+using Df16m8 = Full<float16_t, 3>;
+
+HWY_RVV_FOREACH(HWY_SPECIALIZE, _, _)
+#undef HWY_SPECIALIZE
+
+// vector = f(d), e.g. Zero
+#define HWY_RVV_RETV_ARGD(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)   \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_D(CHAR, SEW, LMUL) d) { \
+    (void)Lanes(d);                                                       \
+    return v##OP##_##CHAR##SEW##LMUL();                                   \
+  }
+
+// vector = f(vector), e.g. Not
+#define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)   \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return v##OP##_v_##CHAR##SEW##LMUL(v);                                \
+  }
+
+// vector = f(vector, scalar), e.g. detail::Add
+#define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                     \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) {       \
+    return v##OP##_##CHAR##SEW##LMUL(a, b);                              \
+  }
+
+// vector = f(vector, vector), e.g. Add
+#define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                     \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
+    return v##OP##_vv_##CHAR##SEW##LMUL(a, b);                           \
+  }
+
+// ================================================== INIT
+
+// ------------------------------ Lanes
+
+// WARNING: we want to query VLMAX/sizeof(T), but this actually changes VL!
+// vlenb is not exposed through intrinsics and vreadvl is not VLMAX.
+#define HWY_RVV_LANES(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API size_t NAME(HWY_RVV_D(CHAR, SEW, LMUL) /* d */) {         \
+    return v##OP##SEW##LMUL();                                      \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_LANES, Lanes, setvlmax_e)
+#undef HWY_RVV_LANES
+
+// ------------------------------ Zero
+
+HWY_RVV_FOREACH(HWY_RVV_RETV_ARGD, Zero, zero)
+
+template <class D>
+using VFromD = decltype(Zero(D()));
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API VFromD<Full<T>> Zero(Simd<T, N> /*tag*/) {
+  return Zero(Full<T>());
+}
+
+// ------------------------------ Set
+// vector = f(d, scalar), e.g. Set
+#define HWY_RVV_SET(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)    \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                 \
+      NAME(HWY_RVV_D(CHAR, SEW, LMUL) d, HWY_RVV_T(BASE, SEW) arg) { \
+    (void)Lanes(d);                                                  \
+    return v##OP##_##CHAR##SEW##LMUL(arg);                           \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_SET, Set, mv_v_x)
+HWY_RVV_FOREACH_F(HWY_RVV_SET, Set, fmv_v_f)
+#undef HWY_RVV_SET
+
+// Partial vectors
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API VFromD<Simd<T, N>> Set(Simd<T, N> /*tag*/, T arg) {
+  return Set(Full<T>(), arg);
+}
+
+// ------------------------------ Undefined
+
+// RVV vundefined is 'poisoned' such that even XORing a _variable_ initialized
+// by it gives unpredictable results. It should only be used for maskoff, so
+// keep it internal. For the Highway op, just use Zero (single instruction).
+namespace detail {
+HWY_RVV_FOREACH(HWY_RVV_RETV_ARGD, Undefined, undefined)
+}  // namespace detail
+
+template <class D>
+HWY_API VFromD<D> Undefined(D d) {
+  return Zero(d);
+}
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+// u8: no change
+#define HWY_RVV_CAST_NOP(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)    \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                      \
+      BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) {                       \
+    return v;                                                             \
+  }                                                                       \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte(                     \
+      HWY_RVV_D(CHAR, SEW, LMUL) /* d */, HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return v;                                                             \
+  }
+
+// Other integers
+#define HWY_RVV_CAST_UI(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)    \
+  HWY_API vuint8##LMUL##_t BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v);                    \
+  }                                                                      \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte(                    \
+      HWY_RVV_D(CHAR, SEW, LMUL) /* d */, vuint8##LMUL##_t v) {          \
+    return v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v);                    \
+  }
+
+// Float: first cast to/from unsigned
+#define HWY_RVV_CAST_F(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)     \
+  HWY_API vuint8##LMUL##_t BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return v##OP##_v_u##SEW##LMUL##_u8##LMUL(                            \
+        v##OP##_v_f##SEW##LMUL##_u##SEW##LMUL(v));                       \
+  }                                                                      \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte(                    \
+      HWY_RVV_D(CHAR, SEW, LMUL) /* d */, vuint8##LMUL##_t v) {          \
+    return v##OP##_v_u##SEW##LMUL##_f##SEW##LMUL(                        \
+        v##OP##_v_u8##LMUL##_u##SEW##LMUL(v));                           \
+  }
+
+HWY_RVV_FOREACH_U08(HWY_RVV_CAST_NOP, _, _)
+HWY_RVV_FOREACH_I08(HWY_RVV_CAST_UI, _, reinterpret)
+HWY_RVV_FOREACH_UI16(HWY_RVV_CAST_UI, _, reinterpret)
+HWY_RVV_FOREACH_UI32(HWY_RVV_CAST_UI, _, reinterpret)
+HWY_RVV_FOREACH_UI64(HWY_RVV_CAST_UI, _, reinterpret)
+HWY_RVV_FOREACH_F(HWY_RVV_CAST_F, _, reinterpret)
+
+#undef HWY_RVV_CAST_NOP
+#undef HWY_RVV_CAST_UI
+#undef HWY_RVV_CAST_F
+
+}  // namespace detail
+
+template <class D, class FromV>
+HWY_API VFromD<D> BitCast(D d, FromV v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// Partial
+template <typename T, size_t N, class FromV, HWY_IF_LE128(T, N)>
+HWY_API VFromD<Simd<T, N>> BitCast(Simd<T, N> /*tag*/, FromV v) {
+  return BitCast(Full<T>(), v);
+}
+
+namespace detail {
+
+template <class V, class DU = RebindToUnsigned<DFromV<V>>>
+HWY_API VFromD<DU> BitCastToUnsigned(V v) {
+  return BitCast(DU(), v);
+}
+
+}  // namespace detail
+
+// ------------------------------ Iota
+
+namespace detail {
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGD, Iota0, id_v)
+
+template <class D, class DU = RebindToUnsigned<D>>
+HWY_API VFromD<DU> Iota0(const D /*d*/) {
+  Lanes(DU());
+  return BitCastToUnsigned(Iota0(DU()));
+}
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API VFromD<Simd<T, N>> Iota0(Simd<T, N> /*tag*/) {
+  return Iota0(Full<T>());
+}
+
+}  // namespace detail
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGV, Not, not )
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Not(const V v) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), Not(BitCast(DU(), v)));
+}
+
+// ------------------------------ And
+
+// Non-vector version (ideally immediate) for use with Iota0
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVS, And, and_vx)
+}  // namespace detail
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, And, and)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V And(const V a, const V b) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), And(BitCast(DU(), a), BitCast(DU(), b)));
+}
+
+// ------------------------------ Or
+
+// Scalar argument plus mask. Used by VecFromMask.
+#define HWY_RVV_OR_MASK(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)    \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                     \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_T(BASE, SEW) imm,       \
+           HWY_RVV_M(MLEN) mask, HWY_RVV_V(BASE, SEW, LMUL) maskedoff) { \
+    return v##OP##_##CHAR##SEW##LMUL##_m(mask, maskedoff, v, imm);       \
+  }
+
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_OR_MASK, Or, or_vx)
+}  // namespace detail
+
+#undef HWY_RVV_OR_MASK
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, Or, or)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Or(const V a, const V b) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), Or(BitCast(DU(), a), BitCast(DU(), b)));
+}
+
+// ------------------------------ Xor
+
+// Non-vector version (ideally immediate) for use with Iota0
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVS, Xor, xor_vx)
+}  // namespace detail
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, Xor, xor)
+
+template <class V, HWY_IF_FLOAT_V(V)>
+HWY_API V Xor(const V a, const V b) {
+  using DF = DFromV<V>;
+  using DU = RebindToUnsigned<DF>;
+  return BitCast(DF(), Xor(BitCast(DU(), a), BitCast(DU(), b)));
+}
+
+// ------------------------------ AndNot
+
+template <class V>
+HWY_API V AndNot(const V not_a, const V b) {
+  return And(Not(not_a), b);
+}
+
+// ------------------------------ CopySign
+
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, CopySign, fsgnj)
+
+template <class V>
+HWY_API V CopySignToAbs(const V abs, const V sign) {
+  // RVV can also handle abs < 0, so no extra action needed.
+  return CopySign(abs, sign);
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Add
+
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVS, Add, add_vx)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVS, Add, fadd_vf)
+}  // namespace detail
+
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, Add, add)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Add, fadd)
+
+// ------------------------------ Sub
+HWY_RVV_FOREACH_UI(HWY_RVV_RETV_ARGVV, Sub, sub)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Sub, fsub)
+
+// ------------------------------ SaturatedAdd
+
+HWY_RVV_FOREACH_U08(HWY_RVV_RETV_ARGVV, SaturatedAdd, saddu)
+HWY_RVV_FOREACH_U16(HWY_RVV_RETV_ARGVV, SaturatedAdd, saddu)
+
+HWY_RVV_FOREACH_I08(HWY_RVV_RETV_ARGVV, SaturatedAdd, sadd)
+HWY_RVV_FOREACH_I16(HWY_RVV_RETV_ARGVV, SaturatedAdd, sadd)
+
+// ------------------------------ SaturatedSub
+
+HWY_RVV_FOREACH_U08(HWY_RVV_RETV_ARGVV, SaturatedSub, ssubu)
+HWY_RVV_FOREACH_U16(HWY_RVV_RETV_ARGVV, SaturatedSub, ssubu)
+
+HWY_RVV_FOREACH_I08(HWY_RVV_RETV_ARGVV, SaturatedSub, ssub)
+HWY_RVV_FOREACH_I16(HWY_RVV_RETV_ARGVV, SaturatedSub, ssub)
+
+// ------------------------------ AverageRound
+
+// TODO(janwas): check vxrm rounding mode
+HWY_RVV_FOREACH_U08(HWY_RVV_RETV_ARGVV, AverageRound, aaddu)
+HWY_RVV_FOREACH_U16(HWY_RVV_RETV_ARGVV, AverageRound, aaddu)
+
+// ------------------------------ ShiftLeft[Same]
+
+// Intrinsics do not define .vi forms, so use .vx instead.
+#define HWY_RVV_SHIFT(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)       \
+  template <int kBits>                                                    \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return v##OP##_vx_##CHAR##SEW##LMUL(v, kBits);                        \
+  }                                                                       \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                      \
+      NAME##Same(HWY_RVV_V(BASE, SEW, LMUL) v, int bits) {                \
+    return v##OP##_vx_##CHAR##SEW##LMUL(v, static_cast<uint8_t>(bits));   \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_SHIFT, ShiftLeft, sll)
+
+// ------------------------------ ShiftRight[Same]
+
+HWY_RVV_FOREACH_U(HWY_RVV_SHIFT, ShiftRight, srl)
+HWY_RVV_FOREACH_I(HWY_RVV_SHIFT, ShiftRight, sra)
+
+#undef HWY_RVV_SHIFT
+
+// ------------------------------ Shl
+#define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)      \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                        \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
+    return v##OP##_vv_##CHAR##SEW##LMUL(v, bits);                           \
+  }
+
+HWY_RVV_FOREACH_U(HWY_RVV_SHIFT_VV, Shl, sll)
+
+#define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)       \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                         \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) {  \
+    return v##OP##_vv_##CHAR##SEW##LMUL(v, detail::BitCastToUnsigned(bits)); \
+  }
+
+HWY_RVV_FOREACH_I(HWY_RVV_SHIFT_II, Shl, sll)
+
+// ------------------------------ Shr
+
+HWY_RVV_FOREACH_U(HWY_RVV_SHIFT_VV, Shr, srl)
+HWY_RVV_FOREACH_I(HWY_RVV_SHIFT_II, Shr, sra)
+
+#undef HWY_RVV_SHIFT_II
+#undef HWY_RVV_SHIFT_VV
+
+// ------------------------------ Min
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVV, Min, minu)
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVV, Min, min)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Min, fmin)
+
+// ------------------------------ Max
+
+namespace detail {
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVS, Max, maxu_vx)
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVS, Max, max_vx)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVS, Max, fmax_vf)
+
+}  // namespace detail
+
+HWY_RVV_FOREACH_U(HWY_RVV_RETV_ARGVV, Max, maxu)
+HWY_RVV_FOREACH_I(HWY_RVV_RETV_ARGVV, Max, max)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Max, fmax)
+
+// ------------------------------ Mul
+
+HWY_RVV_FOREACH_UI16(HWY_RVV_RETV_ARGVV, Mul, mul)
+HWY_RVV_FOREACH_UI32(HWY_RVV_RETV_ARGVV, Mul, mul)
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Mul, fmul)
+
+// ------------------------------ MulHigh
+
+HWY_RVV_FOREACH_U16(HWY_RVV_RETV_ARGVV, MulHigh, mulhu)
+HWY_RVV_FOREACH_I16(HWY_RVV_RETV_ARGVV, MulHigh, mulh)
+
+// ------------------------------ Div
+
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Div, fdiv)
+
+// ------------------------------ ApproximateReciprocal
+
+// TODO(janwas): not yet supported in intrinsics
+template <class V>
+HWY_API V ApproximateReciprocal(const V v) {
+  return Set(DFromV<V>(), 1) / v;
+}
+// HWY_RVV_FOREACH_F32(HWY_RVV_RETV_ARGV, ApproximateReciprocal, frece7)
+
+// ------------------------------ Sqrt
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGV, Sqrt, fsqrt)
+
+// ------------------------------ ApproximateReciprocalSqrt
+
+// TODO(janwas): not yet supported in intrinsics
+template <class V>
+HWY_API V ApproximateReciprocalSqrt(const V v) {
+  return ApproximateReciprocal(Sqrt(v));
+}
+// HWY_RVV_FOREACH_F32(HWY_RVV_RETV_ARGV, ApproximateReciprocalSqrt, frsqrte7)
+
+// ------------------------------ MulAdd
+// Note: op is still named vv, not vvv.
+#define HWY_RVV_FMA(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)        \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                     \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) mul, HWY_RVV_V(BASE, SEW, LMUL) x, \
+           HWY_RVV_V(BASE, SEW, LMUL) add) {                             \
+    return v##OP##_vv_##CHAR##SEW##LMUL(add, mul, x);                    \
+  }
+
+HWY_RVV_FOREACH_F(HWY_RVV_FMA, MulAdd, fmacc)
+
+// ------------------------------ NegMulAdd
+HWY_RVV_FOREACH_F(HWY_RVV_FMA, NegMulAdd, fnmsac)
+
+// ------------------------------ MulSub
+HWY_RVV_FOREACH_F(HWY_RVV_FMA, MulSub, fmsac)
+
+// ------------------------------ NegMulSub
+HWY_RVV_FOREACH_F(HWY_RVV_FMA, NegMulSub, fnmacc)
+
+#undef HWY_RVV_FMA
+
+// ================================================== COMPARE
+
+// Comparisons set a mask bit to 1 if the condition is true, else 0. The XX in
+// vboolXX_t is a power of two divisor for vector bits. SLEN 8 / LMUL 1 = 1/8th
+// of all bits; SLEN 8 / LMUL 4 = half of all bits.
+
+// mask = f(vector, vector)
+#define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_M(MLEN)                                                \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
+    (void)Lanes(DFromV<decltype(a)>());                                  \
+    return v##OP##_vv_##CHAR##SEW##LMUL##_b##MLEN(a, b);                 \
+  }
+
+// ------------------------------ Eq
+HWY_RVV_FOREACH_UI(HWY_RVV_RETM_ARGVV, Eq, mseq)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVV, Eq, mfeq)
+
+// ------------------------------ Ne
+HWY_RVV_FOREACH_UI(HWY_RVV_RETM_ARGVV, Ne, msne)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVV, Ne, mfne)
+
+// ------------------------------ Lt
+HWY_RVV_FOREACH_I(HWY_RVV_RETM_ARGVV, Lt, mslt)
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVV, Lt, mflt)
+
+// ------------------------------ Gt
+
+template <class V>
+HWY_API auto Gt(const V a, const V b) -> decltype(Lt(a, b)) {
+  return Lt(b, a);
+}
+
+// ------------------------------ Le
+HWY_RVV_FOREACH_F(HWY_RVV_RETM_ARGVV, Le, mfle)
+
+#undef HWY_RVV_RETM_ARGVV
+
+// ------------------------------ Ge
+
+template <class V>
+HWY_API auto Ge(const V a, const V b) -> decltype(Le(a, b)) {
+  return Le(b, a);
+}
+
+// ------------------------------ TestBit
+
+template <class V>
+HWY_API auto TestBit(const V a, const V bit) -> decltype(Eq(a, bit)) {
+  return Ne(And(a, bit), Zero(DFromV<V>()));
+}
+
+// ------------------------------ Not
+
+// mask = f(mask)
+#define HWY_RVV_RETM_ARGM(MLEN, NAME, OP)           \
+  HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) m) { \
+    return vm##OP##_m_b##MLEN(m);                   \
+  }
+
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGM, Not, not )
+
+#undef HWY_RVV_RETM_ARGM
+
+// ------------------------------ And
+
+// mask = f(mask_a, mask_b) (note arg2,arg1 order!)
+#define HWY_RVV_RETM_ARGMM(MLEN, NAME, OP)                             \
+  HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) a, HWY_RVV_M(MLEN) b) { \
+    return vm##OP##_mm_b##MLEN(b, a);                                  \
+  }
+
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, And, and)
+
+// ------------------------------ AndNot
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, AndNot, andnot)
+
+// ------------------------------ Or
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, Or, or)
+
+// ------------------------------ Xor
+HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, Xor, xor)
+
+#undef HWY_RVV_RETM_ARGMM
+
+// ------------------------------ IfThenElse
+#define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                       \
+      NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) yes,              \
+           HWY_RVV_V(BASE, SEW, LMUL) no) {                                \
+    return v##OP##_vvm_##CHAR##SEW##LMUL(m, no, yes);                      \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_IF_THEN_ELSE, IfThenElse, merge)
+
+#undef HWY_RVV_IF_THEN_ELSE
+// ------------------------------ IfThenElseZero
+
+template <class M, class V>
+HWY_API V IfThenElseZero(const M mask, const V yes) {
+  return IfThenElse(mask, yes, Zero(DFromV<V>()));
+}
+
+// ------------------------------ IfThenZeroElse
+
+template <class M, class V>
+HWY_API V IfThenZeroElse(const M mask, const V no) {
+  return IfThenElse(mask, Zero(DFromV<V>()), no);
+}
+
+// ------------------------------ MaskFromVec
+
+template <class V>
+HWY_API auto MaskFromVec(const V v) -> decltype(Eq(v, v)) {
+  return Ne(v, Zero(DFromV<V>()));
+}
+
+template <class D>
+using MFromD = decltype(MaskFromVec(Zero(D())));
+
+template <class D, typename MFrom>
+HWY_API MFromD<D> RebindMask(const D /*d*/, const MFrom mask) {
+  // No need to check lane size/LMUL are the same: if not, casting MFrom to
+  // MFromD<D> would fail.
+  return mask;
+}
+
+// ------------------------------ VecFromMask
+
+template <class D, HWY_IF_NOT_FLOAT_D(D)>
+HWY_API VFromD<D> VecFromMask(const D d, MFromD<D> mask) {
+  const auto v0 = Zero(d);
+  return detail::Or(v0, -1, mask, v0);
+}
+
+template <class D, HWY_IF_FLOAT_D(D)>
+HWY_API VFromD<D> VecFromMask(const D d, MFromD<D> mask) {
+  return BitCast(d, VecFromMask(RebindToUnsigned<D>(), mask));
+}
+
+// ------------------------------ ZeroIfNegative
+
+template <class V>
+HWY_API V ZeroIfNegative(const V v) {
+  const auto v0 = Zero(DFromV<V>());
+  // We already have a zero constant, so avoid IfThenZeroElse.
+  return IfThenElse(Lt(v, v0), v0, v);
+}
+
+// ------------------------------ BroadcastSignBit
+
+template <class V>
+HWY_API V BroadcastSignBit(const V v) {
+  return ShiftRight<sizeof(TFromV<V>) * 8 - 1>(v);
+}
+
+// ------------------------------ AllFalse
+
+#define HWY_RVV_ALL_FALSE(MLEN, NAME, OP)          \
+  HWY_API bool AllFalse(const HWY_RVV_M(MLEN) m) { \
+    return vfirst_m_b##MLEN(m) < 0;                \
+  }
+HWY_RVV_FOREACH_B(HWY_RVV_ALL_FALSE, _, _)
+#undef HWY_RVV_ALL_FALSE
+
+// ------------------------------ AllTrue
+
+#define HWY_RVV_ALL_TRUE(MLEN, NAME, OP)    \
+  HWY_API bool AllTrue(HWY_RVV_M(MLEN) m) { \
+    return AllFalse(vmnot_m_b##MLEN(m));    \
+  }
+HWY_RVV_FOREACH_B(HWY_RVV_ALL_TRUE, _, _)
+#undef HWY_RVV_ALL_TRUE
+
+// ------------------------------ CountTrue
+
+#define HWY_RVV_COUNT_TRUE(MLEN, NAME, OP) \
+  HWY_API size_t CountTrue(HWY_RVV_M(MLEN) m) { return vpopc_m_b##MLEN(m); }
+HWY_RVV_FOREACH_B(HWY_RVV_COUNT_TRUE, _, _)
+#undef HWY_RVV_COUNT_TRUE
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+#define HWY_RVV_LOAD(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                               \
+      NAME(HWY_RVV_D(CHAR, SEW, LMUL) d,                           \
+           const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) {          \
+    (void)Lanes(d);                                                \
+    return v##OP##SEW##_v_##CHAR##SEW##LMUL(p);                    \
+  }
+HWY_RVV_FOREACH(HWY_RVV_LOAD, Load, le)
+#undef HWY_RVV_LOAD
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API VFromD<Simd<T, N>> Load(Simd<T, N> d, const T* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// ------------------------------ LoadU
+
+// RVV only requires lane alignment, not natural alignment of the entire vector.
+template <class D>
+HWY_API VFromD<D> LoadU(D d, const TFromD<D>* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// ------------------------------ Store
+
+#define HWY_RVV_RET_ARGVDP(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v,                        \
+                    HWY_RVV_D(CHAR, SEW, LMUL) d,                        \
+                    HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) {             \
+    (void)Lanes(d);                                                      \
+    return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v);                       \
+  }
+HWY_RVV_FOREACH(HWY_RVV_RET_ARGVDP, Store, se)
+#undef HWY_RVV_RET_ARGVDP
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API void Store(VFromD<Simd<T, N>> v, Simd<T, N> d, T* HWY_RESTRICT p) {
+  return Store(v, Full<T>(), p);
+}
+
+// ------------------------------ StoreU
+
+// RVV only requires lane alignment, not natural alignment of the entire vector.
+template <class V, class D>
+HWY_API void StoreU(const V v, D d, TFromD<D>* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+// ------------------------------ Stream
+
+template <class V, class D, typename T>
+HWY_API void Stream(const V v, D d, T* HWY_RESTRICT aligned) {
+  Store(v, d, aligned);
+}
+
+// ------------------------------ ScatterOffset
+
+#define HWY_RVV_SCATTER(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v,                     \
+                    HWY_RVV_D(CHAR, SEW, LMUL) /* d */,               \
+                    HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base,         \
+                    HWY_RVV_V(int, SEW, LMUL) offset) {               \
+    return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL(                      \
+        base, detail::BitCastToUnsigned(offset), v);                  \
+  }
+HWY_RVV_FOREACH(HWY_RVV_SCATTER, ScatterOffset, sx)
+#undef HWY_RVV_SCATTER
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API void ScatterOffset(VFromD<Simd<T, N>> v, Simd<T, N> d,
+                           T* HWY_RESTRICT base,
+                           VFromD<Simd<MakeSigned<T>, N>> offset) {
+  return ScatterOffset(v, Full<T>(), base, offset);
+}
+
+// ------------------------------ ScatterIndex
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 4)>
+HWY_API void ScatterIndex(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT base,
+                          const VFromD<RebindToSigned<D>> index) {
+  return ScatterOffset(v, d, base, ShiftLeft<2>(index));
+}
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 8)>
+HWY_API void ScatterIndex(VFromD<D> v, D d, TFromD<D>* HWY_RESTRICT base,
+                          const VFromD<RebindToSigned<D>> index) {
+  return ScatterOffset(v, d, base, ShiftLeft<3>(index));
+}
+
+// ------------------------------ GatherOffset
+
+#define HWY_RVV_GATHER(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                 \
+      NAME(HWY_RVV_D(CHAR, SEW, LMUL) /* d */,                       \
+           const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base,           \
+           HWY_RVV_V(int, SEW, LMUL) offset) {                       \
+    return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL(                     \
+        base, detail::BitCastToUnsigned(offset));                    \
+  }
+HWY_RVV_FOREACH(HWY_RVV_GATHER, GatherOffset, lx)
+#undef HWY_RVV_GATHER
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API VFromD<Simd<T, N>> GatherOffset(Simd<T, N> d,
+                                        const T* HWY_RESTRICT base,
+                                        VFromD<Simd<MakeSigned<T>, N>> offset) {
+  return GatherOffset(Full<T>(), base, offset);
+}
+
+// ------------------------------ GatherIndex
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 4)>
+HWY_API VFromD<D> GatherIndex(D d, const TFromD<D>* HWY_RESTRICT base,
+                              const VFromD<RebindToSigned<D>> index) {
+  return GatherOffset(d, base, ShiftLeft<2>(index));
+}
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 8)>
+HWY_API VFromD<D> GatherIndex(D d, const TFromD<D>* HWY_RESTRICT base,
+                              const VFromD<RebindToSigned<D>> index) {
+  return GatherOffset(d, base, ShiftLeft<3>(index));
+}
+
+// ------------------------------ StoreInterleaved3
+
+#define HWY_RVV_STORE3(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)    \
+  HWY_API void NAME(                                                    \
+      HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b,       \
+      HWY_RVV_V(BASE, SEW, LMUL) c, HWY_RVV_D(CHAR, SEW, LMUL) /* d */, \
+      HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) {                  \
+    const v##BASE##SEW##LMUL##x3_t triple =                             \
+        vcreate_##CHAR##SEW##LMUL##x3(a, b, c);                         \
+    return v##OP##e8_v_##CHAR##SEW##LMUL##x3(unaligned, triple);        \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_RVV_STORE3(uint, u, 8, m1, /*kShift=*/0, 8, StoreInterleaved3, sseg3)
+HWY_RVV_STORE3(uint, u, 8, m2, /*kShift=*/1, 4, StoreInterleaved3, sseg3)
+
+#undef HWY_RVV_STORE3
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API void StoreInterleaved3(VFromD<Simd<T, N>> v0, VFromD<Simd<T, N>> v1,
+                               VFromD<Simd<T, N>> v2, Simd<T, N> /*tag*/,
+                               T* unaligned) {
+  return StoreInterleaved3(v0, v1, v2, Full<T>(), unaligned);
+}
+
+// ------------------------------ StoreInterleaved4
+
+#define HWY_RVV_STORE4(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API void NAME(                                                 \
+      HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1,  \
+      HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_V(BASE, SEW, LMUL) v3,  \
+      HWY_RVV_D(CHAR, SEW, LMUL) /* d */,                            \
+      HWY_RVV_T(BASE, SEW) * HWY_RESTRICT aligned) {                 \
+    const v##BASE##SEW##LMUL##x4_t quad =                            \
+        vcreate_##CHAR##SEW##LMUL##x4(v0, v1, v2, v3);               \
+    return v##OP##e8_v_##CHAR##SEW##LMUL##x4(aligned, quad);         \
+  }
+// Segments are limited to 8 registers, so we can only go up to LMUL=2.
+HWY_RVV_STORE4(uint, u, 8, m1, /*kShift=*/0, 8, StoreInterleaved4, sseg4)
+HWY_RVV_STORE4(uint, u, 8, m2, /*kShift=*/1, 4, StoreInterleaved4, sseg4)
+
+#undef HWY_RVV_STORE4
+
+// Partial
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API void StoreInterleaved4(VFromD<Simd<T, N>> v0, VFromD<Simd<T, N>> v1,
+                               VFromD<Simd<T, N>> v2, VFromD<Simd<T, N>> v3,
+                               Simd<T, N> /*tag*/, T* unaligned) {
+  return StoreInterleaved4(v0, v1, v2, v3, Full<T>(), unaligned);
+}
+
+// ================================================== CONVERT
+
+#define HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, LMUL, LMUL_IN) \
+  HWY_API HWY_RVV_V(BASE, BITS, LMUL)                                          \
+      PromoteTo(HWY_RVV_D(CHAR, BITS, LMUL) /*d*/,                             \
+                HWY_RVV_V(BASE_IN, BITS_IN, LMUL_IN) v) {                      \
+    return OP##CHAR##BITS##LMUL(v);                                            \
+  }
+
+// TODO(janwas): GCC does not yet support fractional LMUL
+#define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)     \
+  /*HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2)*/ \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, m1)      \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m2)      \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m4)
+
+#define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)     \
+  /*HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf4)*/ \
+  /*HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, mf2)*/ \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m1)      \
+  HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m2)
+
+// ------------------------------ PromoteTo
+
+HWY_RVV_PROMOTE_X2(vzext_vf2_, uint, u, 16, uint, 8)
+HWY_RVV_PROMOTE_X2(vzext_vf2_, uint, u, 32, uint, 16)
+HWY_RVV_PROMOTE_X2(vzext_vf2_, uint, u, 64, uint, 32)
+HWY_RVV_PROMOTE_X4(vzext_vf4_, uint, u, 32, uint, 8)
+
+HWY_RVV_PROMOTE_X2(vsext_vf2_, int, i, 16, int, 8)
+HWY_RVV_PROMOTE_X2(vsext_vf2_, int, i, 32, int, 16)
+HWY_RVV_PROMOTE_X2(vsext_vf2_, int, i, 64, int, 32)
+HWY_RVV_PROMOTE_X4(vsext_vf4_, int, i, 32, int, 8)
+
+HWY_RVV_PROMOTE_X2(vfwcvt_f_f_v_, float, f, 32, float, 16)
+HWY_RVV_PROMOTE_X2(vfwcvt_f_f_v_, float, f, 64, float, 32)
+
+// i32 to f64
+HWY_RVV_PROMOTE_X2(vfwcvt_f_x_v_, float, f, 64, int, 32)
+
+#undef HWY_RVV_PROMOTE_X4
+#undef HWY_RVV_PROMOTE_X2
+#undef HWY_RVV_PROMOTE
+
+template <size_t N>
+HWY_API VFromD<Simd<int16_t, N>> PromoteTo(Simd<int16_t, N> d,
+                                           VFromD<Simd<uint8_t, N>> v) {
+  return BitCast(d, PromoteTo(Simd<uint16_t, N>(), v));
+}
+
+template <size_t N>
+HWY_API VFromD<Simd<int32_t, N>> PromoteTo(Simd<int32_t, N> d,
+                                           VFromD<Simd<uint8_t, N>> v) {
+  return BitCast(d, PromoteTo(Simd<uint32_t, N>(), v));
+}
+
+template <size_t N>
+HWY_API VFromD<Simd<int32_t, N>> PromoteTo(Simd<int32_t, N> d,
+                                           VFromD<Simd<uint16_t, N>> v) {
+  return BitCast(d, PromoteTo(Simd<uint32_t, N>(), v));
+}
+
+// ------------------------------ DemoteTo U
+
+// First clamp negative numbers to zero to match x86 packus.
+HWY_API Vu16m1 DemoteTo(Du16m1 /* d */, const Vi32m2 v) {
+  return vnclipu_wx_u16m1(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+HWY_API Vu16m2 DemoteTo(Du16m2 /* d */, const Vi32m4 v) {
+  return vnclipu_wx_u16m2(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+HWY_API Vu16m4 DemoteTo(Du16m4 /* d */, const Vi32m8 v) {
+  return vnclipu_wx_u16m4(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+
+HWY_API Vu8m1 DemoteTo(Du8m1 /* d */, const Vi32m4 v) {
+  return vnclipu_wx_u8m1(DemoteTo(Du16m2(), v), 0);
+}
+HWY_API Vu8m2 DemoteTo(Du8m2 /* d */, const Vi32m8 v) {
+  return vnclipu_wx_u8m2(DemoteTo(Du16m4(), v), 0);
+}
+
+HWY_API Vu8m1 DemoteTo(Du8m1 /* d */, const Vi16m2 v) {
+  return vnclipu_wx_u8m1(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+HWY_API Vu8m2 DemoteTo(Du8m2 /* d */, const Vi16m4 v) {
+  return vnclipu_wx_u8m2(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+HWY_API Vu8m4 DemoteTo(Du8m4 /* d */, const Vi16m8 v) {
+  return vnclipu_wx_u8m4(detail::BitCastToUnsigned(detail::Max(v, 0)), 0);
+}
+
+HWY_API Vu8m1 U8FromU32(const Vu32m4 v) {
+  return vnclipu_wx_u8m1(vnclipu_wx_u16m2(v, 0), 0);
+}
+HWY_API Vu8m2 U8FromU32(const Vu32m8 v) {
+  return vnclipu_wx_u8m2(vnclipu_wx_u16m4(v, 0), 0);
+}
+
+// ------------------------------ DemoteTo I
+
+HWY_API Vi8m1 DemoteTo(Di8m1 /* d */, const Vi16m2 v) {
+  return vnclip_wx_i8m1(v, 0);
+}
+HWY_API Vi8m2 DemoteTo(Di8m2 /* d */, const Vi16m4 v) {
+  return vnclip_wx_i8m2(v, 0);
+}
+HWY_API Vi8m4 DemoteTo(Di8m4 /* d */, const Vi16m8 v) {
+  return vnclip_wx_i8m4(v, 0);
+}
+
+HWY_API Vi16m1 DemoteTo(Di16m1 /* d */, const Vi32m2 v) {
+  return vnclip_wx_i16m1(v, 0);
+}
+HWY_API Vi16m2 DemoteTo(Di16m2 /* d */, const Vi32m4 v) {
+  return vnclip_wx_i16m2(v, 0);
+}
+HWY_API Vi16m4 DemoteTo(Di16m4 /* d */, const Vi32m8 v) {
+  return vnclip_wx_i16m4(v, 0);
+}
+
+HWY_API Vi8m1 DemoteTo(Di8m1 d, const Vi32m4 v) {
+  return DemoteTo(d, DemoteTo(Di16m2(), v));
+}
+HWY_API Vi8m2 DemoteTo(Di8m2 d, const Vi32m8 v) {
+  return DemoteTo(d, DemoteTo(Di16m4(), v));
+}
+
+// ------------------------------ DemoteTo F
+
+HWY_API Vf16m1 DemoteTo(Df16m1 /* d */, const Vf32m2 v) {
+  return vfncvt_rod_f_f_w_f16m1(v);
+}
+HWY_API Vf16m2 DemoteTo(Df16m2 /* d */, const Vf32m4 v) {
+  return vfncvt_rod_f_f_w_f16m2(v);
+}
+HWY_API Vf16m4 DemoteTo(Df16m4 /* d */, const Vf32m8 v) {
+  return vfncvt_rod_f_f_w_f16m4(v);
+}
+
+HWY_API Vf32m1 DemoteTo(Df32m1 /* d */, const Vf64m2 v) {
+  return vfncvt_rod_f_f_w_f32m1(v);
+}
+HWY_API Vf32m2 DemoteTo(Df32m2 /* d */, const Vf64m4 v) {
+  return vfncvt_rod_f_f_w_f32m2(v);
+}
+HWY_API Vf32m4 DemoteTo(Df32m4 /* d */, const Vf64m8 v) {
+  return vfncvt_rod_f_f_w_f32m4(v);
+}
+
+HWY_API Vi32m1 DemoteTo(Di32m1 /* d */, const Vf64m2 v) {
+  return vfncvt_rtz_x_f_w_i32m1(v);
+}
+HWY_API Vi32m2 DemoteTo(Di32m2 /* d */, const Vf64m4 v) {
+  return vfncvt_rtz_x_f_w_i32m2(v);
+}
+HWY_API Vi32m4 DemoteTo(Di32m4 /* d */, const Vf64m8 v) {
+  return vfncvt_rtz_x_f_w_i32m4(v);
+}
+
+// ------------------------------ ConvertTo F
+
+#define HWY_RVV_CONVERT(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)          \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) ConvertTo(                                \
+      HWY_RVV_D(CHAR, SEW, LMUL) /* d */, HWY_RVV_V(int, SEW, LMUL) v) {       \
+    return vfcvt_f_x_v_f##SEW##LMUL(v);                                        \
+  }                                                                            \
+  /* Truncates (rounds toward zero). */                                        \
+  HWY_API HWY_RVV_V(int, SEW, LMUL) ConvertTo(HWY_RVV_D(i, SEW, LMUL) /* d */, \
+                                              HWY_RVV_V(BASE, SEW, LMUL) v) {  \
+    return vfcvt_rtz_x_f_v_i##SEW##LMUL(v);                                    \
+  }                                                                            \
+  /* Uses default rounding mode. */                                            \
+  HWY_API HWY_RVV_V(int, SEW, LMUL) NearestInt(HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return vfcvt_x_f_v_i##SEW##LMUL(v);                                        \
+  }
+
+// API only requires f32 but we provide f64 for internal use (otherwise, it
+// seems difficult to implement Iota without a _mf2 vector half).
+HWY_RVV_FOREACH_F(HWY_RVV_CONVERT, _, _)
+#undef HWY_RVV_CONVERT
+
+// Partial
+template <typename T, size_t N, class FromV, HWY_IF_LE128(T, N)>
+HWY_API VFromD<Simd<T, N>> ConvertTo(Simd<T, N> /*tag*/, FromV v) {
+  return ConvertTo(Full<T>(), v);
+}
+
+// ================================================== SWIZZLE
+
+// ------------------------------ Compress
+
+#define HWY_RVV_COMPRESS(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                   \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) mask) {       \
+    return v##OP##_vm_##CHAR##SEW##LMUL(mask, v, v);                   \
+  }
+
+HWY_RVV_FOREACH_UI16(HWY_RVV_COMPRESS, Compress, compress)
+HWY_RVV_FOREACH_UI32(HWY_RVV_COMPRESS, Compress, compress)
+HWY_RVV_FOREACH_UI64(HWY_RVV_COMPRESS, Compress, compress)
+HWY_RVV_FOREACH_F(HWY_RVV_COMPRESS, Compress, compress)
+#undef HWY_RVV_COMPRESS
+
+// ------------------------------ CompressStore
+
+template <class V, class M, class D>
+HWY_API size_t CompressStore(const V v, const M mask, const D d,
+                             TFromD<D>* HWY_RESTRICT aligned) {
+  Store(Compress(v, mask), d, aligned);
+  return CountTrue(mask);
+}
+
+// ------------------------------ TableLookupLanes
+
+template <class D, class DU = RebindToUnsigned<D>>
+HWY_API VFromD<DU> SetTableIndices(D d, const TFromD<DU>* idx) {
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER)
+  const size_t N = Lanes(d);
+  for (size_t i = 0; i < N; ++i) {
+    HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast<TFromD<DU>>(N));
+  }
+#endif
+  return Load(DU(), idx);
+}
+
+// <32bit are not part of Highway API, but used in Broadcast. This limits VLMAX
+// to 2048! We could instead use vrgatherei16.
+#define HWY_RVV_TABLE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)        \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                       \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEW, LMUL) idx) { \
+    return v##OP##_vv_##CHAR##SEW##LMUL(v, idx);                           \
+  }
+
+HWY_RVV_FOREACH(HWY_RVV_TABLE, TableLookupLanes, rgather)
+#undef HWY_RVV_TABLE
+
+// ------------------------------ Shuffle01
+
+template <class V>
+HWY_API V Shuffle01(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 8, "Defined for 64-bit types");
+  const auto idx = detail::Xor(detail::Iota0(D()), 1);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle2301
+
+template <class V>
+HWY_API V Shuffle2301(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  const auto idx = detail::Xor(detail::Iota0(D()), 1);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle1032
+
+template <class V>
+HWY_API V Shuffle1032(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  const auto idx = detail::Xor(detail::Iota0(D()), 2);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle0123
+
+template <class V>
+HWY_API V Shuffle0123(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  const auto idx = detail::Xor(detail::Iota0(D()), 3);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle2103
+
+template <class V>
+HWY_API V Shuffle2103(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  // This shuffle is a rotation. We can compute subtraction modulo 4 (number of
+  // lanes per 128-bit block) via bitwise ops.
+  const auto i = detail::Xor(detail::Iota0(D()), 1);
+  const auto lsb = detail::And(i, 1);
+  const auto borrow = Add(lsb, lsb);
+  const auto idx = Xor(i, borrow);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ Shuffle0321
+
+template <class V>
+HWY_API V Shuffle0321(const V v) {
+  using D = DFromV<V>;
+  static_assert(sizeof(TFromD<D>) == 4, "Defined for 32-bit types");
+  // This shuffle is a rotation. We can compute subtraction modulo 4 (number of
+  // lanes per 128-bit block) via bitwise ops.
+  const auto i = detail::Xor(detail::Iota0(D()), 3);
+  const auto lsb = detail::And(i, 1);
+  const auto borrow = Add(lsb, lsb);
+  const auto idx = Xor(i, borrow);
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ TableLookupBytes
+
+namespace detail {
+
+// For x86-compatible behaviour mandated by Highway API: TableLookupBytes
+// offsets are implicitly relative to the start of their 128-bit block.
+template <class D>
+constexpr size_t LanesPerBlock(D) {
+  return 16 / sizeof(TFromD<D>);
+}
+
+template <class D, class V>
+HWY_API V OffsetsOf128BitBlocks(const D d, const V iota0) {
+  using T = MakeUnsigned<TFromD<D>>;
+  return detail::And(iota0, static_cast<T>(~(LanesPerBlock(d) - 1)));
+}
+}  // namespace detail
+
+template <class V>
+HWY_API V TableLookupBytes(const V v, const V idx) {
+  using D = DFromV<V>;
+  const Repartition<uint8_t, D> d8;
+  const auto offsets128 = detail::OffsetsOf128BitBlocks(d8, detail::Iota0(d8));
+  const auto idx8 = Add(BitCast(d8, idx), offsets128);
+  return BitCast(D(), TableLookupLanes(BitCast(d8, v), idx8));
+}
+
+// ------------------------------ Broadcast
+
+template <int kLane, class V>
+HWY_API V Broadcast(const V v) {
+  const DFromV<V> d;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  static_assert(0 <= kLane && kLane < kLanesPerBlock, "Invalid lane");
+  auto idx = detail::OffsetsOf128BitBlocks(d, detail::Iota0(d));
+  if (kLane != 0) {
+    idx = detail::Add(idx, kLane);
+  }
+  return TableLookupLanes(v, idx);
+}
+
+// ------------------------------ GetLane
+
+#define HWY_RVV_GET_LANE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \
+  HWY_API HWY_RVV_T(BASE, SEW) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) {    \
+    return v##OP##_s_##CHAR##SEW##LMUL##_##CHAR##SEW(v);               \
+  }
+
+HWY_RVV_FOREACH_UI(HWY_RVV_GET_LANE, GetLane, mv_x)
+HWY_RVV_FOREACH_F(HWY_RVV_GET_LANE, GetLane, fmv_f)
+#undef HWY_RVV_GET_LANE
+
+// ------------------------------ ShiftLeftLanes
+
+// vector = f(vector, vector, size_t)
+#define HWY_RVV_SLIDE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)        \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                       \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) dst, HWY_RVV_V(BASE, SEW, LMUL) src, \
+           size_t lanes) {                                                 \
+    return v##OP##_vx_##CHAR##SEW##LMUL(dst, src, lanes);                  \
+  }
+
+namespace detail {
+HWY_RVV_FOREACH(HWY_RVV_SLIDE, SlideUp, slideup)
+}  // namespace detail
+
+template <size_t kLanes, class V>
+HWY_API V ShiftLeftLanes(const V v) {
+  using D = DFromV<V>;
+  const RebindToSigned<D> di;
+  const auto shifted = detail::SlideUp(v, v, kLanes);
+  // Match x86 semantics by zeroing lower lanes in 128-bit blocks
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(di);
+  const auto idx_mod = detail::And(detail::Iota0(di), kLanesPerBlock - 1);
+  const auto clear = Lt(BitCast(di, idx_mod), Set(di, kLanes));
+  return IfThenZeroElse(clear, shifted);
+}
+
+// ------------------------------ ShiftLeftBytes
+
+template <int kBytes, class V>
+HWY_API V ShiftLeftBytes(const V v) {
+  using D = DFromV<V>;
+  const Repartition<uint8_t, D> d8;
+  Lanes(d8);
+  return BitCast(D(), ShiftLeftLanes<kBytes>(BitCast(d8, v)));
+}
+
+// ------------------------------ ShiftRightLanes
+
+namespace detail {
+HWY_RVV_FOREACH(HWY_RVV_SLIDE, SlideDown, slidedown)
+}  // namespace detail
+
+#undef HWY_RVV_SLIDE
+
+template <size_t kLanes, class V>
+HWY_API V ShiftRightLanes(const V v) {
+  using D = DFromV<V>;
+  const RebindToSigned<D> di;
+  const auto shifted = detail::SlideDown(v, v, kLanes);
+  // Match x86 semantics by zeroing upper lanes in 128-bit blocks
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(di);
+  const auto idx_mod = detail::And(detail::Iota0(di), kLanesPerBlock - 1);
+  const auto keep = Lt(BitCast(di, idx_mod), Set(di, kLanesPerBlock - kLanes));
+  return IfThenElseZero(keep, shifted);
+}
+
+// ------------------------------ ShiftRightBytes
+
+template <int kBytes, class V>
+HWY_API V ShiftRightBytes(const V v) {
+  using D = DFromV<V>;
+  const Repartition<uint8_t, D> d8;
+  Lanes(d8);
+  return BitCast(D(), ShiftRightLanes<kBytes>(BitCast(d8, v)));
+}
+
+// ------------------------------ OddEven
+
+template <class V>
+HWY_API V OddEven(const V a, const V b) {
+  const RebindToUnsigned<DFromV<V>> du;  // Iota0 is unsigned only
+  const auto is_even = Eq(detail::And(detail::Iota0(du), 1), Zero(du));
+  return IfThenElse(is_even, b, a);
+}
+
+// ------------------------------ ConcatUpperLower
+
+template <class V>
+HWY_API V ConcatUpperLower(const V hi, const V lo) {
+  const RebindToSigned<DFromV<V>> di;
+  const auto idx_half = Set(di, Lanes(di) / 2);
+  const auto is_lower_half = Lt(BitCast(di, detail::Iota0(di)), idx_half);
+  return IfThenElse(is_lower_half, lo, hi);
+}
+
+// ------------------------------ ConcatLowerLower
+
+template <class V>
+HWY_API V ConcatLowerLower(const V hi, const V lo) {
+  // Move lower half into upper
+  const auto hi_up = detail::SlideUp(hi, hi, Lanes(DFromV<V>()) / 2);
+  return ConcatUpperLower(hi_up, lo);
+}
+
+// ------------------------------ ConcatUpperUpper
+
+template <class V>
+HWY_API V ConcatUpperUpper(const V hi, const V lo) {
+  // Move upper half into lower
+  const auto lo_down = detail::SlideDown(lo, lo, Lanes(DFromV<V>()) / 2);
+  return ConcatUpperLower(hi, lo_down);
+}
+
+// ------------------------------ ConcatLowerUpper
+
+template <class V>
+HWY_API V ConcatLowerUpper(const V hi, const V lo) {
+  // Move half of both inputs to the other half
+  const auto hi_up = detail::SlideUp(hi, hi, Lanes(DFromV<V>()) / 2);
+  const auto lo_down = detail::SlideDown(lo, lo, Lanes(DFromV<V>()) / 2);
+  return ConcatUpperLower(hi_up, lo_down);
+}
+
+// ------------------------------ InterleaveLower
+
+template <class V>
+HWY_API V InterleaveLower(const V a, const V b) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  const auto i = detail::Iota0(d);
+  const auto idx_mod = ShiftRight<1>(detail::And(i, kLanesPerBlock - 1));
+  const auto idx = Add(idx_mod, detail::OffsetsOf128BitBlocks(d, i));
+  const auto is_even = Eq(detail::And(i, 1), Zero(du));
+  return IfThenElse(is_even, TableLookupLanes(a, idx),
+                    TableLookupLanes(b, idx));
+}
+
+// ------------------------------ InterleaveUpper
+
+template <class V>
+HWY_API V InterleaveUpper(const V a, const V b) {
+  const DFromV<V> d;
+  const RebindToUnsigned<decltype(d)> du;
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  const auto i = detail::Iota0(d);
+  const auto idx_mod = ShiftRight<1>(detail::And(i, kLanesPerBlock - 1));
+  const auto idx_lower = Add(idx_mod, detail::OffsetsOf128BitBlocks(d, i));
+  const auto idx = detail::Add(idx_lower, kLanesPerBlock / 2);
+  const auto is_even = Eq(detail::And(i, 1), Zero(du));
+  return IfThenElse(is_even, TableLookupLanes(a, idx),
+                    TableLookupLanes(b, idx));
+}
+
+// ------------------------------ ZipLower
+
+template <class V>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> ZipLower(const V a, const V b) {
+  RepartitionToWide<DFromV<V>> dw;
+  return BitCast(dw, InterleaveLower(a, b));
+}
+
+// ------------------------------ ZipUpper
+
+template <class V>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> ZipUpper(const V a, const V b) {
+  RepartitionToWide<DFromV<V>> dw;
+  return BitCast(dw, InterleaveUpper(a, b));
+}
+
+// ------------------------------ Combine
+
+// TODO(janwas): implement after LMUL ext/trunc
+#if 0
+
+template <class V>
+HWY_API V Combine(const V a, const V b) {
+  using D = DFromV<V>;
+  // double LMUL of inputs, then SlideUp with Lanes().
+}
+
+#endif
+
+// ================================================== REDUCE
+
+// vector = f(vector, zero_m1)
+#define HWY_RVV_REDUCE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)         \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL)                                         \
+      NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, m1) v0) {      \
+    vsetvlmax_e##SEW##LMUL();                                                \
+    return Set(                                                              \
+        HWY_RVV_D(CHAR, SEW, LMUL)(),                                        \
+        GetLane(v##OP##_vs_##CHAR##SEW##LMUL##_##CHAR##SEW##m1(v0, v, v0))); \
+  }
+
+// ------------------------------ SumOfLanes
+
+namespace detail {
+HWY_RVV_FOREACH_UI(HWY_RVV_REDUCE, RedSum, redsum)
+HWY_RVV_FOREACH_F(HWY_RVV_REDUCE, RedSum, fredsum)
+}  // namespace detail
+
+template <class V>
+HWY_API V SumOfLanes(const V v) {
+  using T = TFromV<V>;
+  const auto v0 = Zero(Full<T>());  // always m1
+  return detail::RedSum(v, v0);
+}
+
+// ------------------------------ MinOfLanes
+namespace detail {
+HWY_RVV_FOREACH_U(HWY_RVV_REDUCE, RedMin, redminu)
+HWY_RVV_FOREACH_I(HWY_RVV_REDUCE, RedMin, redmin)
+HWY_RVV_FOREACH_F(HWY_RVV_REDUCE, RedMin, fredmin)
+}  // namespace detail
+
+template <class V>
+HWY_API V MinOfLanes(const V v) {
+  using T = TFromV<V>;
+  const Full<T> d1;  // always m1
+  const auto neutral = Set(d1, HighestValue<T>());
+  return detail::RedMin(v, neutral);
+}
+
+// ------------------------------ MaxOfLanes
+namespace detail {
+HWY_RVV_FOREACH_U(HWY_RVV_REDUCE, RedMax, redmaxu)
+HWY_RVV_FOREACH_I(HWY_RVV_REDUCE, RedMax, redmax)
+HWY_RVV_FOREACH_F(HWY_RVV_REDUCE, RedMax, fredmax)
+}  // namespace detail
+
+template <class V>
+HWY_API V MaxOfLanes(const V v) {
+  using T = TFromV<V>;
+  const Full<T> d1;  // always m1
+  const auto neutral = Set(d1, LowestValue<T>());
+  return detail::RedMax(v, neutral);
+}
+
+#undef HWY_RVV_REDUCE
+
+// ================================================== Ops with dependencies
+
+// ------------------------------ LoadDup128
+
+template <class D>
+HWY_API VFromD<D> LoadDup128(D d, const TFromD<D>* const HWY_RESTRICT p) {
+  // TODO(janwas): set VL
+  const auto loaded = Load(d, p);
+  constexpr size_t kLanesPerBlock = detail::LanesPerBlock(d);
+  // Broadcast the first block
+  const auto idx = detail::And(detail::Iota0(d), kLanesPerBlock - 1);
+  return TableLookupLanes(loaded, idx);
+}
+
+// ------------------------------ StoreMaskBits
+#define HWY_RVV_STORE_MASK_BITS(MLEN, NAME, OP)                 \
+  HWY_API size_t StoreMaskBits(HWY_RVV_M(MLEN) m, uint8_t* p) { \
+    /* LMUL=1 is always enough */                               \
+    Full<uint8_t> d8;                                           \
+    const size_t num_bytes = (Lanes(d8) + MLEN - 1) / MLEN;     \
+    /* TODO(janwas): how to convert vbool* to vuint?*/          \
+    /*Store(m, d8, p);*/                                        \
+    (void)m;                                                    \
+    (void)p;                                                    \
+    return num_bytes;                                           \
+  }
+HWY_RVV_FOREACH_B(HWY_RVV_STORE_MASK_BITS, _, _)
+#undef HWY_RVV_STORE_MASK_BITS
+
+// ------------------------------ FirstN (Iota0, Lt, RebindMask, SlideUp)
+
+// Disallow for 8-bit because Iota is likely to overflow.
+template <class D, HWY_IF_NOT_LANE_SIZE_D(D, 1)>
+HWY_API MFromD<D> FirstN(const D d, const size_t n) {
+  const RebindToSigned<D> di;
+  return RebindMask(d, Lt(BitCast(di, detail::Iota0(d)), Set(di, n)));
+}
+
+template <class D, HWY_IF_LANE_SIZE_D(D, 1)>
+HWY_API MFromD<D> FirstN(const D d, const size_t n) {
+  const auto zero = Zero(d);
+  const auto one = Set(d, 1);
+  return Eq(detail::SlideUp(one, zero, n), one);
+}
+
+// ------------------------------ Neg
+
+template <class V, HWY_IF_SIGNED_V(V)>
+HWY_API V Neg(const V v) {
+  return Sub(Zero(DFromV<V>()), v);
+}
+
+// vector = f(vector), but argument is repeated
+#define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP)  \
+  HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
+    return v##OP##_vv_##CHAR##SEW##LMUL(v, v);                            \
+  }
+
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGV2, Neg, fsgnjn)
+
+// ------------------------------ Abs
+
+template <class V, HWY_IF_SIGNED_V(V)>
+HWY_API V Abs(const V v) {
+  return Max(v, Neg(v));
+}
+
+HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGV2, Abs, fsgnjx)
+
+#undef HWY_RVV_RETV_ARGV2
+
+// ------------------------------ AbsDiff
+
+template <class V>
+HWY_API V AbsDiff(const V a, const V b) {
+  return Abs(Sub(a, b));
+}
+
+// ------------------------------ Round
+
+// IEEE-754 roundToIntegralTiesToEven returns floating-point, but we do not have
+// a dedicated instruction for that. Rounding to integer and converting back to
+// float is correct except when the input magnitude is large, in which case the
+// input was already an integer (because mantissa >> exponent is zero).
+
+namespace detail {
+enum RoundingModes { kNear, kTrunc, kDown, kUp };
+
+template <class V>
+HWY_API auto UseInt(const V v) -> decltype(MaskFromVec(v)) {
+  return Lt(Abs(v), Set(DFromV<V>(), MantissaEnd<TFromV<V>>()));
+}
+}  // namespace detail
+
+template <class V>
+HWY_API V Round(const V v) {
+  const DFromV<V> df;
+
+  const auto integer = NearestInt(v);  // round using current mode
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), CopySign(int_f, v), v);
+}
+
+// ------------------------------ Trunc
+
+template <class V>
+HWY_API V Trunc(const V v) {
+  const DFromV<V> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), CopySign(int_f, v), v);
+}
+
+// ------------------------------ Ceil
+
+template <class V>
+HWY_API V Ceil(const V v) {
+  asm volatile("fsrm %0" ::"r"(detail::kUp));
+  const auto ret = Round(v);
+  asm volatile("fsrm %0" ::"r"(detail::kNear));
+  return ret;
+}
+
+// ------------------------------ Floor
+
+template <class V>
+HWY_API V Floor(const V v) {
+  asm volatile("fsrm %0" ::"r"(detail::kDown));
+  const auto ret = Round(v);
+  asm volatile("fsrm %0" ::"r"(detail::kNear));
+  return ret;
+}
+
+// ------------------------------ Iota
+
+template <class D, HWY_IF_UNSIGNED_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  return Add(detail::Iota0(d), Set(d, first));
+}
+
+template <class D, HWY_IF_SIGNED_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  const RebindToUnsigned<D> du;
+  return Add(BitCast(d, detail::Iota0(du)), Set(d, first));
+}
+
+template <class D, HWY_IF_FLOAT_D(D)>
+HWY_API VFromD<D> Iota(const D d, TFromD<D> first) {
+  const RebindToUnsigned<D> du;
+  const RebindToSigned<D> di;
+  return detail::Add(ConvertTo(d, BitCast(di, detail::Iota0(du))), first);
+}
+
+// ------------------------------ MulEven
+
+// Using vwmul does not work for m8, so use mulh instead. Highway only provides
+// MulHigh for 16-bit, so use a private wrapper.
+namespace detail {
+HWY_RVV_FOREACH_U32(HWY_RVV_RETV_ARGVV, MulHigh, mulhu)
+HWY_RVV_FOREACH_I32(HWY_RVV_RETV_ARGVV, MulHigh, mulh)
+}  // namespace detail
+
+template <class V>
+HWY_API VFromD<RepartitionToWide<DFromV<V>>> MulEven(const V a, const V b) {
+  const DFromV<V> d;
+  Lanes(d);
+  const auto lo = Mul(a, b);
+  const auto hi = detail::MulHigh(a, b);
+  const RepartitionToWide<DFromV<V>> dw;
+  return BitCast(dw, OddEven(detail::SlideUp(hi, hi, 1), lo));
+}
+
+// ================================================== END MACROS
+namespace detail {  // for code folding
+#undef HWY_IF_FLOAT_V
+#undef HWY_IF_SIGNED_V
+#undef HWY_IF_UNSIGNED_V
+
+#undef HWY_RVV_FOREACH
+#undef HWY_RVV_FOREACH_08
+#undef HWY_RVV_FOREACH_16
+#undef HWY_RVV_FOREACH_32
+#undef HWY_RVV_FOREACH_64
+#undef HWY_RVV_FOREACH_B
+#undef HWY_RVV_FOREACH_F
+#undef HWY_RVV_FOREACH_F32
+#undef HWY_RVV_FOREACH_F64
+#undef HWY_RVV_FOREACH_I
+#undef HWY_RVV_FOREACH_I08
+#undef HWY_RVV_FOREACH_I16
+#undef HWY_RVV_FOREACH_I32
+#undef HWY_RVV_FOREACH_I64
+#undef HWY_RVV_FOREACH_U
+#undef HWY_RVV_FOREACH_U08
+#undef HWY_RVV_FOREACH_U16
+#undef HWY_RVV_FOREACH_U32
+#undef HWY_RVV_FOREACH_U64
+#undef HWY_RVV_FOREACH_UI
+#undef HWY_RVV_FOREACH_UI16
+#undef HWY_RVV_FOREACH_UI32
+#undef HWY_RVV_FOREACH_UI64
+
+#undef HWY_RVV_RETV_ARGD
+#undef HWY_RVV_RETV_ARGV
+#undef HWY_RVV_RETV_ARGVS
+#undef HWY_RVV_RETV_ARGVV
+
+#undef HWY_RVV_T
+#undef HWY_RVV_D
+#undef HWY_RVV_V
+#undef HWY_RVV_M
+
+}  // namespace detail
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/scalar-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/scalar-inl.h
new file mode 100644
index 0000000000..a32d88692e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/scalar-inl.h
@@ -0,0 +1,1209 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Single-element vectors and operations.
+// External include guard in highway.h - see comment there.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>  // std::min
+
+#include "hwy/base.h"
+#include "hwy/ops/shared-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Single instruction, single data.
+template <typename T>
+using Sisd = Simd<T, 1>;
+
+// (Wrapper class required for overloading comparison operators.)
+template <typename T>
+struct Vec1 {
+  HWY_INLINE Vec1() = default;
+  Vec1(const Vec1&) = default;
+  Vec1& operator=(const Vec1&) = default;
+  HWY_INLINE explicit Vec1(const T t) : raw(t) {}
+
+  HWY_INLINE Vec1& operator*=(const Vec1 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec1& operator/=(const Vec1 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec1& operator+=(const Vec1 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec1& operator-=(const Vec1 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec1& operator&=(const Vec1 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec1& operator|=(const Vec1 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec1& operator^=(const Vec1 other) {
+    return *this = (*this ^ other);
+  }
+
+  T raw;
+};
+
+// 0 or FF..FF, same size as Vec1.
+template <typename T>
+class Mask1 {
+  using Raw = hwy::MakeUnsigned<T>;
+
+ public:
+  static HWY_INLINE Mask1<T> FromBool(bool b) {
+    Mask1<T> mask;
+    mask.bits = b ? ~Raw(0) : 0;
+    return mask;
+  }
+
+  Raw bits;
+};
+
+// ------------------------------ BitCast
+
+template <typename T, typename FromT>
+HWY_INLINE Vec1<T> BitCast(Sisd<T> /* tag */, Vec1<FromT> v) {
+  static_assert(sizeof(T) <= sizeof(FromT), "Promoting is undefined");
+  T to;
+  CopyBytes<sizeof(FromT)>(&v.raw, &to);
+  return Vec1<T>(to);
+}
+
+// ------------------------------ Set
+
+template <typename T>
+HWY_INLINE Vec1<T> Zero(Sisd<T> /* tag */) {
+  return Vec1<T>(T(0));
+}
+
+template <typename T, typename T2>
+HWY_INLINE Vec1<T> Set(Sisd<T> /* tag */, const T2 t) {
+  return Vec1<T>(static_cast<T>(t));
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> Undefined(Sisd<T> d) {
+  return Zero(d);
+}
+
+template <typename T, typename T2>
+Vec1<T> Iota(const Sisd<T> /* tag */, const T2 first) {
+  return Vec1<T>(static_cast<T>(first));
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+template <typename T>
+HWY_INLINE Vec1<T> Not(const Vec1<T> v) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(~BitCast(du, v).raw));
+}
+
+// ------------------------------ And
+
+template <typename T>
+HWY_INLINE Vec1<T> And(const Vec1<T> a, const Vec1<T> b) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(BitCast(du, a).raw & BitCast(du, b).raw));
+}
+template <typename T>
+HWY_INLINE Vec1<T> operator&(const Vec1<T> a, const Vec1<T> b) {
+  return And(a, b);
+}
+
+// ------------------------------ AndNot
+
+template <typename T>
+HWY_INLINE Vec1<T> AndNot(const Vec1<T> a, const Vec1<T> b) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(~BitCast(du, a).raw & BitCast(du, b).raw));
+}
+
+// ------------------------------ Or
+
+template <typename T>
+HWY_INLINE Vec1<T> Or(const Vec1<T> a, const Vec1<T> b) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(BitCast(du, a).raw | BitCast(du, b).raw));
+}
+template <typename T>
+HWY_INLINE Vec1<T> operator|(const Vec1<T> a, const Vec1<T> b) {
+  return Or(a, b);
+}
+
+// ------------------------------ Xor
+
+template <typename T>
+HWY_INLINE Vec1<T> Xor(const Vec1<T> a, const Vec1<T> b) {
+  using TU = MakeUnsigned<T>;
+  const Sisd<TU> du;
+  return BitCast(Sisd<T>(), Vec1<TU>(BitCast(du, a).raw ^ BitCast(du, b).raw));
+}
+template <typename T>
+HWY_INLINE Vec1<T> operator^(const Vec1<T> a, const Vec1<T> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ CopySign
+
+template <typename T>
+HWY_API Vec1<T> CopySign(const Vec1<T> magn, const Vec1<T> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const auto msb = SignBit(Sisd<T>());
+  return Or(AndNot(msb, magn), And(msb, sign));
+}
+
+template <typename T>
+HWY_API Vec1<T> CopySignToAbs(const Vec1<T> abs, const Vec1<T> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  return Or(abs, And(SignBit(Sisd<T>()), sign));
+}
+
+// ------------------------------ BroadcastSignBit
+
+template <typename T>
+HWY_API Vec1<T> BroadcastSignBit(const Vec1<T> v) {
+  // This is used inside ShiftRight, so we cannot implement in terms of it.
+  return v.raw < 0 ? Vec1<T>(T(-1)) : Vec1<T>(0);
+}
+
+// ------------------------------ Mask
+
+template <typename TFrom, typename TTo>
+HWY_API Mask1<TTo> RebindMask(Sisd<TTo> /*tag*/, Mask1<TFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size");
+  return Mask1<TTo>{m.bits};
+}
+
+// v must be 0 or FF..FF.
+template <typename T>
+HWY_INLINE Mask1<T> MaskFromVec(const Vec1<T> v) {
+  Mask1<T> mask;
+  CopyBytes<sizeof(mask.bits)>(&v.raw, &mask.bits);
+  return mask;
+}
+
+template <typename T>
+Vec1<T> VecFromMask(const Mask1<T> mask) {
+  Vec1<T> v;
+  CopyBytes<sizeof(v.raw)>(&mask.bits, &v.raw);
+  return v;
+}
+
+template <typename T>
+Vec1<T> VecFromMask(Sisd<T> /* tag */, const Mask1<T> mask) {
+  Vec1<T> v;
+  CopyBytes<sizeof(v.raw)>(&mask.bits, &v.raw);
+  return v;
+}
+
+template <typename T>
+HWY_INLINE Mask1<T> FirstN(Sisd<T> /*tag*/, size_t n) {
+  return Mask1<T>::FromBool(n != 0);
+}
+
+// Returns mask ? yes : no.
+template <typename T>
+HWY_INLINE Vec1<T> IfThenElse(const Mask1<T> mask, const Vec1<T> yes,
+                              const Vec1<T> no) {
+  return mask.bits ? yes : no;
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> IfThenElseZero(const Mask1<T> mask, const Vec1<T> yes) {
+  return mask.bits ? yes : Vec1<T>(0);
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> IfThenZeroElse(const Mask1<T> mask, const Vec1<T> no) {
+  return mask.bits ? Vec1<T>(0) : no;
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> ZeroIfNegative(const Vec1<T> v) {
+  return v.raw < 0 ? Vec1<T>(0) : v;
+}
+
+// ------------------------------ Mask logical
+
+template <typename T>
+HWY_API Mask1<T> Not(const Mask1<T> m) {
+  const Sisd<T> d;
+  return MaskFromVec(Not(VecFromMask(d, m)));
+}
+
+template <typename T>
+HWY_API Mask1<T> And(const Mask1<T> a, Mask1<T> b) {
+  const Sisd<T> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask1<T> AndNot(const Mask1<T> a, Mask1<T> b) {
+  const Sisd<T> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask1<T> Or(const Mask1<T> a, Mask1<T> b) {
+  const Sisd<T> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask1<T> Xor(const Mask1<T> a, Mask1<T> b) {
+  const Sisd<T> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+// ================================================== SHIFTS
+
+// ------------------------------ ShiftLeft (BroadcastSignBit)
+
+template <int kBits, typename T>
+HWY_INLINE Vec1<T> ShiftLeft(const Vec1<T> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+  return Vec1<T>(static_cast<hwy::MakeUnsigned<T>>(v.raw) << kBits);
+}
+
+template <int kBits, typename T>
+HWY_INLINE Vec1<T> ShiftRight(const Vec1<T> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+#if __cplusplus >= 202002L
+  // Signed right shift is now guaranteed to be arithmetic (rounding toward
+  // negative infinity, i.e. shifting in the sign bit).
+  return Vec1<T>(v.raw >> kBits);
+#else
+  if (IsSigned<T>()) {
+    // Emulate arithmetic shift using only logical (unsigned) shifts, because
+    // signed shifts are still implementation-defined.
+    using TU = hwy::MakeUnsigned<T>;
+    const Sisd<TU> du;
+    const TU shifted = BitCast(du, v).raw >> kBits;
+    const TU sign = BitCast(du, BroadcastSignBit(v)).raw;
+    const TU upper = sign << (sizeof(TU) * 8 - 1 - kBits);
+    return BitCast(Sisd<T>(), Vec1<TU>(shifted | upper));
+  } else {
+    return Vec1<T>(v.raw >> kBits);  // unsigned, logical shift
+  }
+#endif
+}
+
+// ------------------------------ ShiftLeftSame (BroadcastSignBit)
+
+template <typename T>
+HWY_INLINE Vec1<T> ShiftLeftSame(const Vec1<T> v, int bits) {
+  return Vec1<T>(static_cast<hwy::MakeUnsigned<T>>(v.raw) << bits);
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> ShiftRightSame(const Vec1<T> v, int bits) {
+#if __cplusplus >= 202002L
+  // Signed right shift is now guaranteed to be arithmetic (rounding toward
+  // negative infinity, i.e. shifting in the sign bit).
+  return Vec1<T>(v.raw >> bits);
+#else
+  if (IsSigned<T>()) {
+    // Emulate arithmetic shift using only logical (unsigned) shifts, because
+    // signed shifts are still implementation-defined.
+    using TU = hwy::MakeUnsigned<T>;
+    const Sisd<TU> du;
+    const TU shifted = BitCast(du, v).raw >> bits;
+    const TU sign = BitCast(du, BroadcastSignBit(v)).raw;
+    const TU upper = sign << (sizeof(TU) * 8 - 1 - bits);
+    return BitCast(Sisd<T>(), Vec1<TU>(shifted | upper));
+  } else {
+    return Vec1<T>(v.raw >> bits);  // unsigned, logical shift
+  }
+#endif
+}
+
+// ------------------------------ Shl
+
+// Single-lane => same as ShiftLeftSame except for the argument type.
+template <typename T>
+HWY_INLINE Vec1<T> operator<<(const Vec1<T> v, const Vec1<T> bits) {
+  return ShiftLeftSame(v, static_cast<int>(bits.raw));
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> operator>>(const Vec1<T> v, const Vec1<T> bits) {
+  return ShiftRightSame(v, static_cast<int>(bits.raw));
+}
+
+// ================================================== ARITHMETIC
+
+template <typename T>
+HWY_INLINE Vec1<T> operator+(Vec1<T> a, Vec1<T> b) {
+  const uint64_t a64 = static_cast<uint64_t>(a.raw);
+  const uint64_t b64 = static_cast<uint64_t>(b.raw);
+  return Vec1<T>(static_cast<T>((a64 + b64) & static_cast<uint64_t>(~T(0))));
+}
+HWY_INLINE Vec1<float> operator+(const Vec1<float> a, const Vec1<float> b) {
+  return Vec1<float>(a.raw + b.raw);
+}
+HWY_INLINE Vec1<double> operator+(const Vec1<double> a, const Vec1<double> b) {
+  return Vec1<double>(a.raw + b.raw);
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> operator-(Vec1<T> a, Vec1<T> b) {
+  const uint64_t a64 = static_cast<uint64_t>(a.raw);
+  const uint64_t b64 = static_cast<uint64_t>(b.raw);
+  return Vec1<T>(static_cast<T>((a64 - b64) & static_cast<uint64_t>(~T(0))));
+}
+HWY_INLINE Vec1<float> operator-(const Vec1<float> a, const Vec1<float> b) {
+  return Vec1<float>(a.raw - b.raw);
+}
+HWY_INLINE Vec1<double> operator-(const Vec1<double> a, const Vec1<double> b) {
+  return Vec1<double>(a.raw - b.raw);
+}
+
+// ------------------------------ Saturating addition
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+HWY_INLINE Vec1<uint8_t> SaturatedAdd(const Vec1<uint8_t> a,
+                                      const Vec1<uint8_t> b) {
+  return Vec1<uint8_t>(
+      static_cast<uint8_t>(HWY_MIN(HWY_MAX(0, a.raw + b.raw), 255)));
+}
+HWY_INLINE Vec1<uint16_t> SaturatedAdd(const Vec1<uint16_t> a,
+                                       const Vec1<uint16_t> b) {
+  return Vec1<uint16_t>(
+      static_cast<uint16_t>(HWY_MIN(HWY_MAX(0, a.raw + b.raw), 65535)));
+}
+
+// Signed
+HWY_INLINE Vec1<int8_t> SaturatedAdd(const Vec1<int8_t> a,
+                                     const Vec1<int8_t> b) {
+  return Vec1<int8_t>(
+      static_cast<int8_t>(HWY_MIN(HWY_MAX(-128, a.raw + b.raw), 127)));
+}
+HWY_INLINE Vec1<int16_t> SaturatedAdd(const Vec1<int16_t> a,
+                                      const Vec1<int16_t> b) {
+  return Vec1<int16_t>(
+      static_cast<int16_t>(HWY_MIN(HWY_MAX(-32768, a.raw + b.raw), 32767)));
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+HWY_INLINE Vec1<uint8_t> SaturatedSub(const Vec1<uint8_t> a,
+                                      const Vec1<uint8_t> b) {
+  return Vec1<uint8_t>(
+      static_cast<uint8_t>(HWY_MIN(HWY_MAX(0, a.raw - b.raw), 255)));
+}
+HWY_INLINE Vec1<uint16_t> SaturatedSub(const Vec1<uint16_t> a,
+                                       const Vec1<uint16_t> b) {
+  return Vec1<uint16_t>(
+      static_cast<uint16_t>(HWY_MIN(HWY_MAX(0, a.raw - b.raw), 65535)));
+}
+
+// Signed
+HWY_INLINE Vec1<int8_t> SaturatedSub(const Vec1<int8_t> a,
+                                     const Vec1<int8_t> b) {
+  return Vec1<int8_t>(
+      static_cast<int8_t>(HWY_MIN(HWY_MAX(-128, a.raw - b.raw), 127)));
+}
+HWY_INLINE Vec1<int16_t> SaturatedSub(const Vec1<int16_t> a,
+                                      const Vec1<int16_t> b) {
+  return Vec1<int16_t>(
+      static_cast<int16_t>(HWY_MIN(HWY_MAX(-32768, a.raw - b.raw), 32767)));
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+HWY_INLINE Vec1<uint8_t> AverageRound(const Vec1<uint8_t> a,
+                                      const Vec1<uint8_t> b) {
+  return Vec1<uint8_t>(static_cast<uint8_t>((a.raw + b.raw + 1) / 2));
+}
+HWY_INLINE Vec1<uint16_t> AverageRound(const Vec1<uint16_t> a,
+                                       const Vec1<uint16_t> b) {
+  return Vec1<uint16_t>(static_cast<uint16_t>((a.raw + b.raw + 1) / 2));
+}
+
+// ------------------------------ Absolute value
+
+template <typename T>
+HWY_INLINE Vec1<T> Abs(const Vec1<T> a) {
+  const T i = a.raw;
+  return (i >= 0 || i == hwy::LimitsMin<T>()) ? a : Vec1<T>(-i);
+}
+HWY_INLINE Vec1<float> Abs(const Vec1<float> a) {
+  return Vec1<float>(std::abs(a.raw));
+}
+HWY_INLINE Vec1<double> Abs(const Vec1<double> a) {
+  return Vec1<double>(std::abs(a.raw));
+}
+
+// ------------------------------ min/max
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_INLINE Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
+  return Vec1<T>(HWY_MIN(a.raw, b.raw));
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_INLINE Vec1<T> Min(const Vec1<T> a, const Vec1<T> b) {
+  if (std::isnan(a.raw)) return b;
+  if (std::isnan(b.raw)) return a;
+  return Vec1<T>(HWY_MIN(a.raw, b.raw));
+}
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_INLINE Vec1<T> Max(const Vec1<T> a, const Vec1<T> b) {
+  return Vec1<T>(HWY_MAX(a.raw, b.raw));
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_INLINE Vec1<T> Max(const Vec1<T> a, const Vec1<T> b) {
+  if (std::isnan(a.raw)) return b;
+  if (std::isnan(b.raw)) return a;
+  return Vec1<T>(HWY_MAX(a.raw, b.raw));
+}
+
+// ------------------------------ Floating-point negate
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_INLINE Vec1<T> Neg(const Vec1<T> v) {
+  return Xor(v, SignBit(Sisd<T>()));
+}
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_INLINE Vec1<T> Neg(const Vec1<T> v) {
+  return Zero(Sisd<T>()) - v;
+}
+
+// ------------------------------ mul/div
+
+template <typename T>
+HWY_INLINE Vec1<T> operator*(const Vec1<T> a, const Vec1<T> b) {
+  if (hwy::IsFloat<T>()) {
+    return Vec1<T>(static_cast<T>(double(a.raw) * b.raw));
+  } else if (hwy::IsSigned<T>()) {
+    return Vec1<T>(static_cast<T>(int64_t(a.raw) * b.raw));
+  } else {
+    return Vec1<T>(static_cast<T>(uint64_t(a.raw) * b.raw));
+  }
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> operator/(const Vec1<T> a, const Vec1<T> b) {
+  return Vec1<T>(a.raw / b.raw);
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+HWY_INLINE Vec1<int16_t> MulHigh(const Vec1<int16_t> a, const Vec1<int16_t> b) {
+  return Vec1<int16_t>(static_cast<int16_t>((a.raw * b.raw) >> 16));
+}
+HWY_INLINE Vec1<uint16_t> MulHigh(const Vec1<uint16_t> a,
+                                  const Vec1<uint16_t> b) {
+  // Cast to uint32_t first to prevent overflow. Otherwise the result of
+  // uint16_t * uint16_t is in "int" which may overflow. In practice the result
+  // is the same but this way it is also defined.
+  return Vec1<uint16_t>(static_cast<uint16_t>(
+      (static_cast<uint32_t>(a.raw) * static_cast<uint32_t>(b.raw)) >> 16));
+}
+
+// Multiplies even lanes (0, 2 ..) and returns the double-wide result.
+HWY_INLINE Vec1<int64_t> MulEven(const Vec1<int32_t> a, const Vec1<int32_t> b) {
+  const int64_t a64 = a.raw;
+  return Vec1<int64_t>(a64 * b.raw);
+}
+HWY_INLINE Vec1<uint64_t> MulEven(const Vec1<uint32_t> a,
+                                  const Vec1<uint32_t> b) {
+  const uint64_t a64 = a.raw;
+  return Vec1<uint64_t>(a64 * b.raw);
+}
+
+// Approximate reciprocal
+HWY_INLINE Vec1<float> ApproximateReciprocal(const Vec1<float> v) {
+  // Zero inputs are allowed, but callers are responsible for replacing the
+  // return value with something else (typically using IfThenElse). This check
+  // avoids a ubsan error. The return value is arbitrary.
+  if (v.raw == 0.0f) return Vec1<float>(0.0f);
+  return Vec1<float>(1.0f / v.raw);
+}
+
+// Absolute value of difference.
+HWY_INLINE Vec1<float> AbsDiff(const Vec1<float> a, const Vec1<float> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+template <typename T>
+HWY_INLINE Vec1<T> MulAdd(const Vec1<T> mul, const Vec1<T> x,
+                          const Vec1<T> add) {
+  return mul * x + add;
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> NegMulAdd(const Vec1<T> mul, const Vec1<T> x,
+                             const Vec1<T> add) {
+  return add - mul * x;
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> MulSub(const Vec1<T> mul, const Vec1<T> x,
+                          const Vec1<T> sub) {
+  return mul * x - sub;
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> NegMulSub(const Vec1<T> mul, const Vec1<T> x,
+                             const Vec1<T> sub) {
+  return Neg(mul) * x - sub;
+}
+
+// ------------------------------ Floating-point square root
+
+// Approximate reciprocal square root
+HWY_INLINE Vec1<float> ApproximateReciprocalSqrt(const Vec1<float> v) {
+  float f = v.raw;
+  const float half = f * 0.5f;
+  uint32_t bits;
+  CopyBytes<4>(&f, &bits);
+  // Initial guess based on log2(f)
+  bits = 0x5F3759DF - (bits >> 1);
+  CopyBytes<4>(&bits, &f);
+  // One Newton-Raphson iteration
+  return Vec1<float>(f * (1.5f - (half * f * f)));
+}
+
+// Square root
+HWY_INLINE Vec1<float> Sqrt(const Vec1<float> v) {
+  return Vec1<float>(std::sqrt(v.raw));
+}
+HWY_INLINE Vec1<double> Sqrt(const Vec1<double> v) {
+  return Vec1<double>(std::sqrt(v.raw));
+}
+
+// ------------------------------ Floating-point rounding
+
+template <typename T>
+HWY_INLINE Vec1<T> Round(const Vec1<T> v) {
+  using TI = MakeSigned<T>;
+  if (!(Abs(v).raw < MantissaEnd<T>())) {  // Huge or NaN
+    return v;
+  }
+  const T bias = v.raw < T(0.0) ? T(-0.5) : T(0.5);
+  const TI rounded = static_cast<TI>(v.raw + bias);
+  if (rounded == 0) return CopySignToAbs(Vec1<T>(0), v);
+  // Round to even
+  if ((rounded & 1) && std::abs(rounded - v.raw) == T(0.5)) {
+    return Vec1<T>(static_cast<T>(rounded - (v.raw < T(0) ? -1 : 1)));
+  }
+  return Vec1<T>(static_cast<T>(rounded));
+}
+
+// Round-to-nearest even.
+HWY_INLINE Vec1<int32_t> NearestInt(const Vec1<float> v) {
+  using T = float;
+  using TI = int32_t;
+
+  const T abs = Abs(v).raw;
+  const bool signbit = std::signbit(v.raw);
+
+  if (!(abs < MantissaEnd<T>())) {  // Huge or NaN
+    // Check if too large to cast or NaN
+    if (!(abs <= static_cast<T>(LimitsMax<TI>()))) {
+      return Vec1<TI>(signbit ? LimitsMin<TI>() : LimitsMax<TI>());
+    }
+    return Vec1<int32_t>(static_cast<TI>(v.raw));
+  }
+  const T bias = v.raw < T(0.0) ? T(-0.5) : T(0.5);
+  const TI rounded = static_cast<TI>(v.raw + bias);
+  if (rounded == 0) return Vec1<int32_t>(0);
+  // Round to even
+  if ((rounded & 1) && std::abs(static_cast<T>(rounded) - v.raw) == T(0.5)) {
+    return Vec1<TI>(rounded - (signbit ? -1 : 1));
+  }
+  return Vec1<TI>(rounded);
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> Trunc(const Vec1<T> v) {
+  using TI = MakeSigned<T>;
+  if (!(Abs(v).raw <= MantissaEnd<T>())) {  // Huge or NaN
+    return v;
+  }
+  const TI truncated = static_cast<TI>(v.raw);
+  if (truncated == 0) return CopySignToAbs(Vec1<T>(0), v);
+  return Vec1<T>(static_cast<T>(truncated));
+}
+
+template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
+          class V>
+V Ceiling(const V v) {
+  const Bits kExponentMask = (1ull << kExponentBits) - 1;
+  const Bits kMantissaMask = (1ull << kMantissaBits) - 1;
+  const Bits kBias = kExponentMask / 2;
+
+  Float f = v.raw;
+  const bool positive = f > Float(0.0);
+
+  Bits bits;
+  CopyBytes<sizeof(Bits)>(&v, &bits);
+
+  const int exponent =
+      static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
+  // Already an integer.
+  if (exponent >= kMantissaBits) return v;
+  // |v| <= 1 => 0 or 1.
+  if (exponent < 0) return positive ? V(1) : V(-0.0);
+
+  const Bits mantissa_mask = kMantissaMask >> exponent;
+  // Already an integer
+  if ((bits & mantissa_mask) == 0) return v;
+
+  // Clear fractional bits and round up
+  if (positive) bits += (kMantissaMask + 1) >> exponent;
+  bits &= ~mantissa_mask;
+
+  CopyBytes<sizeof(Bits)>(&bits, &f);
+  return V(f);
+}
+
+template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
+          class V>
+V Floor(const V v) {
+  const Bits kExponentMask = (1ull << kExponentBits) - 1;
+  const Bits kMantissaMask = (1ull << kMantissaBits) - 1;
+  const Bits kBias = kExponentMask / 2;
+
+  Float f = v.raw;
+  const bool negative = f < Float(0.0);
+
+  Bits bits;
+  CopyBytes<sizeof(Bits)>(&v, &bits);
+
+  const int exponent =
+      static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
+  // Already an integer.
+  if (exponent >= kMantissaBits) return v;
+  // |v| <= 1 => -1 or 0.
+  if (exponent < 0) return V(negative ? Float(-1.0) : Float(0.0));
+
+  const Bits mantissa_mask = kMantissaMask >> exponent;
+  // Already an integer
+  if ((bits & mantissa_mask) == 0) return v;
+
+  // Clear fractional bits and round down
+  if (negative) bits += (kMantissaMask + 1) >> exponent;
+  bits &= ~mantissa_mask;
+
+  CopyBytes<sizeof(Bits)>(&bits, &f);
+  return V(f);
+}
+
+// Toward +infinity, aka ceiling
+HWY_INLINE Vec1<float> Ceil(const Vec1<float> v) {
+  return Ceiling<float, uint32_t, 23, 8>(v);
+}
+HWY_INLINE Vec1<double> Ceil(const Vec1<double> v) {
+  return Ceiling<double, uint64_t, 52, 11>(v);
+}
+
+// Toward -infinity, aka floor
+HWY_INLINE Vec1<float> Floor(const Vec1<float> v) {
+  return Floor<float, uint32_t, 23, 8>(v);
+}
+HWY_INLINE Vec1<double> Floor(const Vec1<double> v) {
+  return Floor<double, uint64_t, 52, 11>(v);
+}
+
+// ================================================== COMPARE
+
+template <typename T>
+HWY_INLINE Mask1<T> operator==(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw == b.raw);
+}
+
+template <typename T>
+HWY_INLINE Mask1<T> TestBit(const Vec1<T> v, const Vec1<T> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+template <typename T>
+HWY_INLINE Mask1<T> operator<(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw < b.raw);
+}
+template <typename T>
+HWY_INLINE Mask1<T> operator>(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw > b.raw);
+}
+
+template <typename T>
+HWY_INLINE Mask1<T> operator<=(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw <= b.raw);
+}
+template <typename T>
+HWY_INLINE Mask1<T> operator>=(const Vec1<T> a, const Vec1<T> b) {
+  return Mask1<T>::FromBool(a.raw >= b.raw);
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <typename T>
+HWY_INLINE Vec1<T> Load(Sisd<T> /* tag */, const T* HWY_RESTRICT aligned) {
+  T t;
+  CopyBytes<sizeof(T)>(aligned, &t);
+  return Vec1<T>(t);
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> LoadU(Sisd<T> d, const T* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// In some use cases, "load single lane" is sufficient; otherwise avoid this.
+template <typename T>
+HWY_INLINE Vec1<T> LoadDup128(Sisd<T> d, const T* HWY_RESTRICT aligned) {
+  return Load(d, aligned);
+}
+
+// ------------------------------ Store
+
+template <typename T>
+HWY_INLINE void Store(const Vec1<T> v, Sisd<T> /* tag */,
+                      T* HWY_RESTRICT aligned) {
+  CopyBytes<sizeof(T)>(&v.raw, aligned);
+}
+
+template <typename T>
+HWY_INLINE void StoreU(const Vec1<T> v, Sisd<T> d, T* HWY_RESTRICT p) {
+  return Store(v, d, p);
+}
+
+// ------------------------------ StoreInterleaved3
+
+HWY_API void StoreInterleaved3(const Vec1<uint8_t> v0, const Vec1<uint8_t> v1,
+                               const Vec1<uint8_t> v2, Sisd<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  StoreU(v0, d, unaligned + 0);
+  StoreU(v1, d, unaligned + 1);
+  StoreU(v2, d, unaligned + 2);
+}
+
+HWY_API void StoreInterleaved4(const Vec1<uint8_t> v0, const Vec1<uint8_t> v1,
+                               const Vec1<uint8_t> v2, const Vec1<uint8_t> v3,
+                               Sisd<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  StoreU(v0, d, unaligned + 0);
+  StoreU(v1, d, unaligned + 1);
+  StoreU(v2, d, unaligned + 2);
+  StoreU(v3, d, unaligned + 3);
+}
+
+// ------------------------------ Stream
+
+template <typename T>
+HWY_INLINE void Stream(const Vec1<T> v, Sisd<T> d, T* HWY_RESTRICT aligned) {
+  return Store(v, d, aligned);
+}
+
+// ------------------------------ Scatter
+
+template <typename T, typename Offset>
+HWY_INLINE void ScatterOffset(Vec1<T> v, Sisd<T> d, T* base,
+                              const Vec1<Offset> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+  uint8_t* const base8 = reinterpret_cast<uint8_t*>(base) + offset.raw;
+  return Store(v, d, reinterpret_cast<T*>(base8));
+}
+
+template <typename T, typename Index>
+HWY_INLINE void ScatterIndex(Vec1<T> v, Sisd<T> d, T* HWY_RESTRICT base,
+                             const Vec1<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+  return Store(v, d, base + index.raw);
+}
+
+// ------------------------------ Gather
+
+template <typename T, typename Offset>
+HWY_INLINE Vec1<T> GatherOffset(Sisd<T> d, const T* base,
+                                const Vec1<Offset> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+  const uintptr_t addr = reinterpret_cast<uintptr_t>(base) + offset.raw;
+  return Load(d, reinterpret_cast<const T*>(addr));
+}
+
+template <typename T, typename Index>
+HWY_INLINE Vec1<T> GatherIndex(Sisd<T> d, const T* HWY_RESTRICT base,
+                               const Vec1<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+  return Load(d, base + index.raw);
+}
+
+// ================================================== CONVERT
+
+// ConvertTo and DemoteTo with floating-point input and integer output truncate
+// (rounding toward zero).
+
+template <typename FromT, typename ToT>
+HWY_INLINE Vec1<ToT> PromoteTo(Sisd<ToT> /* tag */, Vec1<FromT> from) {
+  static_assert(sizeof(ToT) > sizeof(FromT), "Not promoting");
+  // For bits Y > X, floatX->floatY and intX->intY are always representable.
+  return Vec1<ToT>(static_cast<ToT>(from.raw));
+}
+
+template <typename FromT, typename ToT, HWY_IF_FLOAT(FromT)>
+HWY_INLINE Vec1<ToT> DemoteTo(Sisd<ToT> /* tag */, Vec1<FromT> from) {
+  static_assert(sizeof(ToT) < sizeof(FromT), "Not demoting");
+
+  // Prevent ubsan errors when converting float to narrower integer/float
+  if (std::isinf(from.raw) ||
+      std::fabs(from.raw) > static_cast<FromT>(HighestValue<ToT>())) {
+    return Vec1<ToT>(std::signbit(from.raw) ? LowestValue<ToT>()
+                                            : HighestValue<ToT>());
+  }
+  return Vec1<ToT>(static_cast<ToT>(from.raw));
+}
+
+template <typename FromT, typename ToT, HWY_IF_NOT_FLOAT(FromT)>
+HWY_INLINE Vec1<ToT> DemoteTo(Sisd<ToT> /* tag */, Vec1<FromT> from) {
+  static_assert(sizeof(ToT) < sizeof(FromT), "Not demoting");
+
+  // Int to int: choose closest value in ToT to `from` (avoids UB)
+  from.raw = std::min<FromT>(std::max<FromT>(LimitsMin<ToT>(), from.raw),
+                             LimitsMax<ToT>());
+  return Vec1<ToT>(static_cast<ToT>(from.raw));
+}
+
+static HWY_INLINE Vec1<float> PromoteTo(Sisd<float> /* tag */,
+                                        const Vec1<float16_t> v) {
+#if HWY_NATIVE_FLOAT16
+  uint16_t bits16;
+  CopyBytes<2>(&v.raw, &bits16);
+#else
+  const uint16_t bits16 = v.raw.bits;
+#endif
+  const uint32_t sign = bits16 >> 15;
+  const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
+  const uint32_t mantissa = bits16 & 0x3FF;
+
+  // Subnormal or zero
+  if (biased_exp == 0) {
+    const float subnormal =
+        (1.0f / 16384) * (static_cast<float>(mantissa) * (1.0f / 1024));
+    return Vec1<float>(sign ? -subnormal : subnormal);
+  }
+
+  // Normalized: convert the representation directly (faster than ldexp/tables).
+  const uint32_t biased_exp32 = biased_exp + (127 - 15);
+  const uint32_t mantissa32 = mantissa << (23 - 10);
+  const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
+  float out;
+  CopyBytes<4>(&bits32, &out);
+  return Vec1<float>(out);
+}
+
+static HWY_INLINE Vec1<float16_t> DemoteTo(Sisd<float16_t> /* tag */,
+                                           const Vec1<float> v) {
+  uint32_t bits32;
+  CopyBytes<4>(&v.raw, &bits32);
+  const uint32_t sign = bits32 >> 31;
+  const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF;
+  const uint32_t mantissa32 = bits32 & 0x7FFFFF;
+
+  const int32_t exp = HWY_MIN(static_cast<int32_t>(biased_exp32) - 127, 15);
+
+  // Tiny or zero => zero.
+  Vec1<float16_t> out;
+  if (exp < -24) {
+#if HWY_NATIVE_FLOAT16
+    const uint16_t zero = 0;
+    CopyBytes<2>(&zero, &out.raw);
+#else
+    out.raw.bits = 0;
+#endif
+    return out;
+  }
+
+  uint32_t biased_exp16, mantissa16;
+
+  // exp = [-24, -15] => subnormal
+  if (exp < -14) {
+    biased_exp16 = 0;
+    const uint32_t sub_exp = static_cast<uint32_t>(-14 - exp);
+    HWY_DASSERT(1 <= sub_exp && sub_exp < 11);
+    mantissa16 = (1 << (10 - sub_exp)) + (mantissa32 >> (13 + sub_exp));
+  } else {
+    // exp = [-14, 15]
+    biased_exp16 = static_cast<uint32_t>(exp + 15);
+    HWY_DASSERT(1 <= biased_exp16 && biased_exp16 < 31);
+    mantissa16 = mantissa32 >> 13;
+  }
+
+  HWY_DASSERT(mantissa16 < 1024);
+  const uint32_t bits16 = (sign << 15) | (biased_exp16 << 10) | mantissa16;
+  HWY_DASSERT(bits16 < 0x10000);
+#if HWY_NATIVE_FLOAT16
+  const uint16_t narrowed = static_cast<uint16_t>(bits16);  // big-endian safe
+  CopyBytes<2>(&narrowed, &out.raw);
+#else
+  out.raw.bits = static_cast<uint16_t>(bits16);
+#endif
+  return out;
+}
+
+template <typename FromT, typename ToT, HWY_IF_FLOAT(FromT)>
+HWY_INLINE Vec1<ToT> ConvertTo(Sisd<ToT> /* tag */, Vec1<FromT> from) {
+  static_assert(sizeof(ToT) == sizeof(FromT), "Should have same size");
+  // float## -> int##: return closest representable value. We cannot exactly
+  // represent LimitsMax<ToT> in FromT, so use double.
+  const double f = static_cast<double>(from.raw);
+  if (std::isinf(from.raw) ||
+      std::fabs(f) > static_cast<double>(LimitsMax<ToT>())) {
+    return Vec1<ToT>(std::signbit(from.raw) ? LimitsMin<ToT>()
+                                            : LimitsMax<ToT>());
+  }
+  return Vec1<ToT>(static_cast<ToT>(from.raw));
+}
+
+template <typename FromT, typename ToT, HWY_IF_NOT_FLOAT(FromT)>
+HWY_INLINE Vec1<ToT> ConvertTo(Sisd<ToT> /* tag */, Vec1<FromT> from) {
+  static_assert(sizeof(ToT) == sizeof(FromT), "Should have same size");
+  // int## -> float##: no check needed
+  return Vec1<ToT>(static_cast<ToT>(from.raw));
+}
+
+HWY_INLINE Vec1<uint8_t> U8FromU32(const Vec1<uint32_t> v) {
+  return DemoteTo(Sisd<uint8_t>(), v);
+}
+
+// ================================================== SWIZZLE
+
+// Unsupported: Shift*Bytes, CombineShiftRightBytes, Interleave*, Shuffle*,
+// UpperHalf - these require more than one lane and/or actual 128-bit vectors.
+
+template <typename T>
+HWY_INLINE T GetLane(const Vec1<T> v) {
+  return v.raw;
+}
+
+template <typename T>
+HWY_INLINE Vec1<T> LowerHalf(Vec1<T> v) {
+  return v;
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+template <int kLane, typename T>
+HWY_INLINE Vec1<T> Broadcast(const Vec1<T> v) {
+  static_assert(kLane == 0, "Scalar only has one lane");
+  return v;
+}
+
+// ------------------------------ Shuffle bytes with variable indices
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes, i.e.
+// indices in [0, sizeof(T)).
+template <typename T>
+HWY_API Vec1<T> TableLookupBytes(const Vec1<T> in, const Vec1<T> from) {
+  uint8_t in_bytes[sizeof(T)];
+  uint8_t from_bytes[sizeof(T)];
+  uint8_t out_bytes[sizeof(T)];
+  CopyBytes<sizeof(T)>(&in, &in_bytes);
+  CopyBytes<sizeof(T)>(&from, &from_bytes);
+  for (size_t i = 0; i < sizeof(T); ++i) {
+    out_bytes[i] = in_bytes[from_bytes[i]];
+  }
+  T out;
+  CopyBytes<sizeof(T)>(&out_bytes, &out);
+  return Vec1<T>{out};
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T>
+struct Indices1 {
+  int raw;
+};
+
+template <typename T>
+HWY_API Indices1<T> SetTableIndices(Sisd<T>, const int32_t* idx) {
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER)
+  HWY_DASSERT(idx[0] == 0);
+#endif
+  return Indices1<T>{idx[0]};
+}
+
+template <typename T>
+HWY_API Vec1<T> TableLookupLanes(const Vec1<T> v, const Indices1<T> /* idx */) {
+  return v;
+}
+
+// ------------------------------ Zip/unpack
+
+HWY_INLINE Vec1<uint16_t> ZipLower(const Vec1<uint8_t> a,
+                                   const Vec1<uint8_t> b) {
+  return Vec1<uint16_t>(static_cast<uint16_t>((uint32_t(b.raw) << 8) + a.raw));
+}
+HWY_INLINE Vec1<uint32_t> ZipLower(const Vec1<uint16_t> a,
+                                   const Vec1<uint16_t> b) {
+  return Vec1<uint32_t>((uint32_t(b.raw) << 16) + a.raw);
+}
+HWY_INLINE Vec1<uint64_t> ZipLower(const Vec1<uint32_t> a,
+                                   const Vec1<uint32_t> b) {
+  return Vec1<uint64_t>((uint64_t(b.raw) << 32) + a.raw);
+}
+HWY_INLINE Vec1<int16_t> ZipLower(const Vec1<int8_t> a, const Vec1<int8_t> b) {
+  return Vec1<int16_t>(static_cast<int16_t>((int32_t(b.raw) << 8) + a.raw));
+}
+HWY_INLINE Vec1<int32_t> ZipLower(const Vec1<int16_t> a,
+                                  const Vec1<int16_t> b) {
+  return Vec1<int32_t>((int32_t(b.raw) << 16) + a.raw);
+}
+HWY_INLINE Vec1<int64_t> ZipLower(const Vec1<int32_t> a,
+                                  const Vec1<int32_t> b) {
+  return Vec1<int64_t>((int64_t(b.raw) << 32) + a.raw);
+}
+
+// ------------------------------ Mask
+
+template <typename T>
+HWY_INLINE bool AllFalse(const Mask1<T> mask) {
+  return mask.bits == 0;
+}
+
+template <typename T>
+HWY_INLINE bool AllTrue(const Mask1<T> mask) {
+  return mask.bits != 0;
+}
+
+template <typename T>
+HWY_INLINE size_t StoreMaskBits(const Mask1<T> mask, uint8_t* p) {
+  *p = AllTrue(mask);
+  return 1;
+}
+template <typename T>
+HWY_INLINE size_t CountTrue(const Mask1<T> mask) {
+  return mask.bits == 0 ? 0 : 1;
+}
+
+template <typename T>
+HWY_API Vec1<T> Compress(Vec1<T> v, const Mask1<T> /* mask */) {
+  // Upper lanes are undefined, so result is the same independent of mask.
+  return v;
+}
+
+// ------------------------------ CompressStore
+
+template <typename T>
+HWY_API size_t CompressStore(Vec1<T> v, const Mask1<T> mask, Sisd<T> d,
+                             T* HWY_RESTRICT aligned) {
+  Store(Compress(v, mask), d, aligned);
+  return CountTrue(mask);
+}
+
+// ------------------------------ Reductions
+
+// Sum of all lanes, i.e. the only one.
+template <typename T>
+HWY_INLINE Vec1<T> SumOfLanes(const Vec1<T> v0) {
+  return v0;
+}
+template <typename T>
+HWY_INLINE Vec1<T> MinOfLanes(const Vec1<T> v) {
+  return v;
+}
+template <typename T>
+HWY_INLINE Vec1<T> MaxOfLanes(const Vec1<T> v) {
+  return v;
+}
+
+// ================================================== Operator wrapper
+
+template <class V>
+HWY_API V Add(V a, V b) {
+  return a + b;
+}
+template <class V>
+HWY_API V Sub(V a, V b) {
+  return a - b;
+}
+
+template <class V>
+HWY_API V Mul(V a, V b) {
+  return a * b;
+}
+template <class V>
+HWY_API V Div(V a, V b) {
+  return a / b;
+}
+
+template <class V>
+V Shl(V a, V b) {
+  return a << b;
+}
+template <class V>
+V Shr(V a, V b) {
+  return a >> b;
+}
+
+template <class V>
+HWY_API auto Eq(V a, V b) -> decltype(a == b) {
+  return a == b;
+}
+template <class V>
+HWY_API auto Lt(V a, V b) -> decltype(a == b) {
+  return a < b;
+}
+
+template <class V>
+HWY_API auto Gt(V a, V b) -> decltype(a == b) {
+  return a > b;
+}
+template <class V>
+HWY_API auto Ge(V a, V b) -> decltype(a == b) {
+  return a >= b;
+}
+
+template <class V>
+HWY_API auto Le(V a, V b) -> decltype(a == b) {
+  return a <= b;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/set_macros-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/set_macros-inl.h
new file mode 100644
index 0000000000..8188d56e3b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/set_macros-inl.h
@@ -0,0 +1,249 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Sets macros based on HWY_TARGET.
+
+// This include guard is toggled by foreach_target, so avoid the usual _H_
+// suffix to prevent copybara from renaming it.
+#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
+#ifdef HWY_SET_MACROS_PER_TARGET
+#undef HWY_SET_MACROS_PER_TARGET
+#else
+#define HWY_SET_MACROS_PER_TARGET
+#endif
+
+#endif  // HWY_SET_MACROS_PER_TARGET
+
+#include "hwy/targets.h"
+
+#undef HWY_NAMESPACE
+#undef HWY_ALIGN
+#undef HWY_LANES
+
+#undef HWY_CAP_INTEGER64
+#undef HWY_CAP_FLOAT64
+#undef HWY_CAP_GE256
+#undef HWY_CAP_GE512
+
+#undef HWY_TARGET_STR
+
+// Before include guard so we redefine HWY_TARGET_STR on each include,
+// governed by the current HWY_TARGET.
+//-----------------------------------------------------------------------------
+// SSE4
+#if HWY_TARGET == HWY_SSE4
+
+#define HWY_NAMESPACE N_SSE4
+#define HWY_ALIGN alignas(16)
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_CAP_INTEGER64 1
+#define HWY_CAP_FLOAT64 1
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_TARGET_STR "sse2,ssse3,sse4.1"
+
+//-----------------------------------------------------------------------------
+// AVX2
+#elif HWY_TARGET == HWY_AVX2
+
+#define HWY_NAMESPACE N_AVX2
+#define HWY_ALIGN alignas(32)
+#define HWY_LANES(T) (32 / sizeof(T))
+
+#define HWY_CAP_INTEGER64 1
+#define HWY_CAP_FLOAT64 1
+#define HWY_CAP_GE256 1
+#define HWY_CAP_GE512 0
+
+#if defined(HWY_DISABLE_BMI2_FMA)
+#define HWY_TARGET_STR "avx,avx2,f16c"
+#else
+#define HWY_TARGET_STR "avx,avx2,bmi,bmi2,fma,f16c"
+#endif
+
+//-----------------------------------------------------------------------------
+// AVX3
+#elif HWY_TARGET == HWY_AVX3
+
+#define HWY_ALIGN alignas(64)
+#define HWY_LANES(T) (64 / sizeof(T))
+
+#define HWY_CAP_INTEGER64 1
+#define HWY_CAP_FLOAT64 1
+#define HWY_CAP_GE256 1
+#define HWY_CAP_GE512 1
+
+#define HWY_NAMESPACE N_AVX3
+
+// Must include AVX2 because an AVX3 test may call AVX2 functions (e.g. when
+// converting to half-vectors). HWY_DISABLE_BMI2_FMA is not relevant because if
+// we have AVX3, we should also have BMI2/FMA.
+#define HWY_TARGET_STR \
+  "avx,avx2,bmi,bmi2,fma,f16c,avx512f,avx512vl,avx512dq,avx512bw"
+
+//-----------------------------------------------------------------------------
+// PPC8
+#elif HWY_TARGET == HWY_PPC8
+
+#define HWY_ALIGN alignas(16)
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_CAP_INTEGER64 1
+#define HWY_CAP_FLOAT64 1
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_NAMESPACE N_PPC8
+
+#define HWY_TARGET_STR "altivec,vsx"
+
+//-----------------------------------------------------------------------------
+// NEON
+#elif HWY_TARGET == HWY_NEON
+
+#define HWY_ALIGN alignas(16)
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_CAP_INTEGER64 1
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#if HWY_ARCH_ARM_A64
+#define HWY_CAP_FLOAT64 1
+#else
+#define HWY_CAP_FLOAT64 0
+#endif
+
+#define HWY_NAMESPACE N_NEON
+
+// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
+
+//-----------------------------------------------------------------------------
+// SVE[2]
+#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE
+
+// SVE only requires lane alignment, not natural alignment of the entire vector.
+#define HWY_ALIGN alignas(8)
+// Upper bound, not the actual lane count!
+#define HWY_LANES(T) (256 / sizeof(T))
+
+#define HWY_CAP_INTEGER64 1
+#define HWY_CAP_FLOAT64 1
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#if HWY_TARGET == HWY_SVE2
+#define HWY_NAMESPACE N_SVE2
+#else
+#define HWY_NAMESPACE N_SVE
+#endif
+
+// HWY_TARGET_STR remains undefined - TODO(janwas): attribute for SVE?
+
+//-----------------------------------------------------------------------------
+// WASM
+#elif HWY_TARGET == HWY_WASM
+
+#define HWY_ALIGN alignas(16)
+#define HWY_LANES(T) (16 / sizeof(T))
+
+#define HWY_CAP_INTEGER64 0
+#define HWY_CAP_FLOAT64 0
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_NAMESPACE N_WASM
+
+#define HWY_TARGET_STR "simd128"
+
+//-----------------------------------------------------------------------------
+// RVV
+#elif HWY_TARGET == HWY_RVV
+
+// RVV only requires lane alignment, not natural alignment of the entire vector,
+// and the compiler already aligns builtin types, so nothing to do here.
+#define HWY_ALIGN
+
+// Arbitrary constant, not the actual lane count! Large enough that we can
+// mul/div by 8 for LMUL. Value matches kMaxVectorSize, see base.h.
+#define HWY_LANES(T) (4096 / sizeof(T))
+
+
+#define HWY_CAP_INTEGER64 1
+#define HWY_CAP_FLOAT64 1
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_NAMESPACE N_RVV
+
+// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
+// (rv64gcv is not a valid target)
+
+//-----------------------------------------------------------------------------
+// SCALAR
+#elif HWY_TARGET == HWY_SCALAR
+
+#define HWY_ALIGN
+// For internal use only; use Lanes(d) instead.
+#define HWY_LANES(T) 1
+
+#define HWY_CAP_INTEGER64 1
+#define HWY_CAP_FLOAT64 1
+#define HWY_CAP_GE256 0
+#define HWY_CAP_GE512 0
+
+#define HWY_NAMESPACE N_SCALAR
+
+// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
+
+#else
+#pragma message("HWY_TARGET does not match any known target")
+#endif  // HWY_TARGET
+
+// Clang <9 requires this be invoked at file scope, before any namespace.
+#undef HWY_BEFORE_NAMESPACE
+#if defined(HWY_TARGET_STR)
+#define HWY_BEFORE_NAMESPACE()        \
+  HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
+  static_assert(true, "For requiring trailing semicolon")
+#else
+// avoids compiler warning if no HWY_TARGET_STR
+#define HWY_BEFORE_NAMESPACE() \
+  static_assert(true, "For requiring trailing semicolon")
+#endif
+
+// Clang <9 requires any namespaces be closed before this macro.
+#undef HWY_AFTER_NAMESPACE
+#if defined(HWY_TARGET_STR)
+#define HWY_AFTER_NAMESPACE() \
+  HWY_POP_ATTRIBUTES          \
+  static_assert(true, "For requiring trailing semicolon")
+#else
+// avoids compiler warning if no HWY_TARGET_STR
+#define HWY_AFTER_NAMESPACE() \
+  static_assert(true, "For requiring trailing semicolon")
+#endif
+
+#undef HWY_ATTR
+#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
+#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
+#else
+#define HWY_ATTR
+#endif
+
+// DEPRECATED
+#undef HWY_GATHER_LANES
+#define HWY_GATHER_LANES(T) HWY_LANES(T)
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/shared-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/shared-inl.h
new file mode 100644
index 0000000000..11a7b616f8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/shared-inl.h
@@ -0,0 +1,125 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Per-target definitions shared by ops/*.h and user code.
+
+#include <cmath>
+
+// Separate header because foreach_target.h re-enables its include guard.
+#include "hwy/ops/set_macros-inl.h"
+
+// Relies on the external include guard in highway.h.
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// SIMD operations are implemented as overloaded functions selected using a
+// "descriptor" D := Simd<T, N>. T is the lane type, N a number of lanes >= 1
+// (always a power of two). Users generally do not choose N directly, but
+// instead use HWY_FULL(T[, LMUL]) (the largest available size). N is not
+// necessarily the actual number of lanes, which is returned by Lanes(D()).
+//
+// Only HWY_FULL(T) and N <= 16 / sizeof(T) are guaranteed to be available - the
+// latter are useful if >128 bit vectors are unnecessary or undesirable.
+template <typename Lane, size_t N>
+struct Simd {
+  constexpr Simd() = default;
+  using T = Lane;
+  static_assert((N & (N - 1)) == 0 && N != 0, "N must be a power of two");
+
+  // Widening/narrowing ops change the number of lanes and/or their type.
+  // To initialize such vectors, we need the corresponding descriptor types:
+
+  // PromoteTo/DemoteTo() with another lane type, but same number of lanes.
+  template <typename NewLane>
+  using Rebind = Simd<NewLane, N>;
+
+  // MulEven() with another lane type, but same total size.
+  // Round up to correctly handle scalars with N=1.
+  template <typename NewLane>
+  using Repartition =
+      Simd<NewLane, (N * sizeof(Lane) + sizeof(NewLane) - 1) / sizeof(NewLane)>;
+
+  // LowerHalf() with the same lane type, but half the lanes.
+  // Round up to correctly handle scalars with N=1.
+  using Half = Simd<T, (N + 1) / 2>;
+
+  // Combine() with the same lane type, but twice the lanes.
+  using Twice = Simd<T, 2 * N>;
+};
+
+template <class D>
+using TFromD = typename D::T;
+
+// Descriptor for the same number of lanes as D, but with the LaneType T.
+template <class T, class D>
+using Rebind = typename D::template Rebind<T>;
+
+template <class D>
+using RebindToSigned = Rebind<MakeSigned<TFromD<D>>, D>;
+template <class D>
+using RebindToUnsigned = Rebind<MakeUnsigned<TFromD<D>>, D>;
+template <class D>
+using RebindToFloat = Rebind<MakeFloat<TFromD<D>>, D>;
+
+// Descriptor for the same total size as D, but with the LaneType T.
+template <class T, class D>
+using Repartition = typename D::template Repartition<T>;
+
+template <class D>
+using RepartitionToWide = Repartition<MakeWide<TFromD<D>>, D>;
+template <class D>
+using RepartitionToNarrow = Repartition<MakeNarrow<TFromD<D>>, D>;
+
+// Descriptor for the same lane type as D, but half the lanes.
+template <class D>
+using Half = typename D::Half;
+
+// Descriptor for the same lane type as D, but twice the lanes.
+template <class D>
+using Twice = typename D::Twice;
+
+// Same as base.h macros but with a Simd<T, N> argument instead of T.
+#define HWY_IF_UNSIGNED_D(D) HWY_IF_UNSIGNED(TFromD<D>)
+#define HWY_IF_SIGNED_D(D) HWY_IF_SIGNED(TFromD<D>)
+#define HWY_IF_FLOAT_D(D) HWY_IF_FLOAT(TFromD<D>)
+#define HWY_IF_NOT_FLOAT_D(D) HWY_IF_NOT_FLOAT(TFromD<D>)
+#define HWY_IF_LANE_SIZE_D(D, bytes) HWY_IF_LANE_SIZE(TFromD<D>, bytes)
+#define HWY_IF_NOT_LANE_SIZE_D(D, bytes) HWY_IF_NOT_LANE_SIZE(TFromD<D>, bytes)
+
+// Compile-time-constant, (typically but not guaranteed) an upper bound on the
+// number of lanes.
+// Prefer instead using Lanes() and dynamic allocation, or Rebind, or
+// `#if HWY_CAP_GE*`.
+template <typename T, size_t N>
+HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t MaxLanes(Simd<T, N>) {
+  return N;
+}
+
+// Targets with non-constexpr Lanes define this themselves.
+#if HWY_TARGET != HWY_RVV && HWY_TARGET != HWY_SVE2 && HWY_TARGET != HWY_SVE
+
+// (Potentially) non-constant actual size of the vector at runtime, subject to
+// the limit imposed by the Simd. Useful for advancing loop counters.
+template <typename T, size_t N>
+HWY_INLINE HWY_MAYBE_UNUSED size_t Lanes(Simd<T, N>) {
+  return N;
+}
+
+#endif
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/wasm_128-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/wasm_128-inl.h
new file mode 100644
index 0000000000..06d70400a4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/wasm_128-inl.h
@@ -0,0 +1,3077 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 128-bit WASM vectors and operations.
+// External include guard in highway.h - see comment there.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <wasm_simd128.h>
+
+#include "hwy/base.h"
+#include "hwy/ops/shared-inl.h"
+
+#ifdef HWY_WASM_OLD_NAMES
+#define wasm_i8x16_shuffle wasm_v8x16_shuffle
+#define wasm_i16x8_shuffle wasm_v16x8_shuffle
+#define wasm_i32x4_shuffle wasm_v32x4_shuffle
+#define wasm_i64x2_shuffle wasm_v64x2_shuffle
+#define wasm_u16x8_extend_low_u8x16 wasm_i16x8_widen_low_u8x16
+#define wasm_u32x4_extend_low_u16x8 wasm_i32x4_widen_low_u16x8
+#define wasm_i32x4_extend_low_i16x8 wasm_i32x4_widen_low_i16x8
+#define wasm_i16x8_extend_low_i8x16 wasm_i16x8_widen_low_i8x16
+#define wasm_u32x4_extend_high_u16x8 wasm_i32x4_widen_high_u16x8
+#define wasm_i32x4_extend_high_i16x8 wasm_i32x4_widen_high_i16x8
+#define wasm_i32x4_trunc_sat_f32x4 wasm_i32x4_trunc_saturate_f32x4
+#define wasm_u8x16_add_sat wasm_u8x16_add_saturate
+#define wasm_u8x16_sub_sat wasm_u8x16_sub_saturate
+#define wasm_u16x8_add_sat wasm_u16x8_add_saturate
+#define wasm_u16x8_sub_sat wasm_u16x8_sub_saturate
+#define wasm_i8x16_add_sat wasm_i8x16_add_saturate
+#define wasm_i8x16_sub_sat wasm_i8x16_sub_saturate
+#define wasm_i16x8_add_sat wasm_i16x8_add_saturate
+#define wasm_i16x8_sub_sat wasm_i16x8_sub_saturate
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <typename T>
+struct Raw128 {
+  using type = __v128_u;
+};
+template <>
+struct Raw128<float> {
+  using type = __f32x4;
+};
+
+template <typename T>
+using Full128 = Simd<T, 16 / sizeof(T)>;
+
+template <typename T, size_t N = 16 / sizeof(T)>
+class Vec128 {
+  using Raw = typename Raw128<T>::type;
+
+ public:
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec128& operator*=(const Vec128 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec128& operator/=(const Vec128 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec128& operator+=(const Vec128 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec128& operator-=(const Vec128 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec128& operator&=(const Vec128 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec128& operator|=(const Vec128 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec128& operator^=(const Vec128 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+// Integer: FF..FF or 0. Float: MSB, all other bits undefined - see README.
+template <typename T, size_t N = 16 / sizeof(T)>
+class Mask128 {
+  using Raw = typename Raw128<T>::type;
+
+ public:
+  Raw raw;
+};
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+HWY_API __v128_u BitCastToInteger(__v128_u v) { return v; }
+HWY_API __v128_u BitCastToInteger(__f32x4 v) {
+  return static_cast<__v128_u>(v);
+}
+HWY_API __v128_u BitCastToInteger(__f64x2 v) {
+  return static_cast<__v128_u>(v);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<uint8_t, N * sizeof(T)> BitCastToByte(Vec128<T, N> v) {
+  return Vec128<uint8_t, N * sizeof(T)>{BitCastToInteger(v.raw)};
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromInteger128 {
+  HWY_INLINE __v128_u operator()(__v128_u v) { return v; }
+};
+template <>
+struct BitCastFromInteger128<float> {
+  HWY_INLINE __f32x4 operator()(__v128_u v) { return static_cast<__f32x4>(v); }
+};
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> BitCastFromByte(Simd<T, N> /* tag */,
+                                     Vec128<uint8_t, N * sizeof(T)> v) {
+  return Vec128<T, N>{BitCastFromInteger128<T>()(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N, typename FromT>
+HWY_API Vec128<T, N> BitCast(Simd<T, N> d,
+                             Vec128<FromT, N * sizeof(T) / sizeof(FromT)> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ Set
+
+// Returns an all-zero vector/part.
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API Vec128<T, N> Zero(Simd<T, N> /* tag */) {
+  return Vec128<T, N>{wasm_i32x4_splat(0)};
+}
+template <size_t N, HWY_IF_LE128(float, N)>
+HWY_API Vec128<float, N> Zero(Simd<float, N> /* tag */) {
+  return Vec128<float, N>{wasm_f32x4_splat(0.0f)};
+}
+
+// Returns a vector/part with all lanes set to "t".
+template <size_t N, HWY_IF_LE128(uint8_t, N)>
+HWY_API Vec128<uint8_t, N> Set(Simd<uint8_t, N> /* tag */, const uint8_t t) {
+  return Vec128<uint8_t, N>{wasm_i8x16_splat(t)};
+}
+template <size_t N, HWY_IF_LE128(uint16_t, N)>
+HWY_API Vec128<uint16_t, N> Set(Simd<uint16_t, N> /* tag */, const uint16_t t) {
+  return Vec128<uint16_t, N>{wasm_i16x8_splat(t)};
+}
+template <size_t N, HWY_IF_LE128(uint32_t, N)>
+HWY_API Vec128<uint32_t, N> Set(Simd<uint32_t, N> /* tag */, const uint32_t t) {
+  return Vec128<uint32_t, N>{wasm_i32x4_splat(t)};
+}
+
+template <size_t N, HWY_IF_LE128(int8_t, N)>
+HWY_API Vec128<int8_t, N> Set(Simd<int8_t, N> /* tag */, const int8_t t) {
+  return Vec128<int8_t, N>{wasm_i8x16_splat(t)};
+}
+template <size_t N, HWY_IF_LE128(int16_t, N)>
+HWY_API Vec128<int16_t, N> Set(Simd<int16_t, N> /* tag */, const int16_t t) {
+  return Vec128<int16_t, N>{wasm_i16x8_splat(t)};
+}
+template <size_t N, HWY_IF_LE128(int32_t, N)>
+HWY_API Vec128<int32_t, N> Set(Simd<int32_t, N> /* tag */, const int32_t t) {
+  return Vec128<int32_t, N>{wasm_i32x4_splat(t)};
+}
+
+template <size_t N, HWY_IF_LE128(float, N)>
+HWY_API Vec128<float, N> Set(Simd<float, N> /* tag */, const float t) {
+  return Vec128<float, N>{wasm_f32x4_splat(t)};
+}
+
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// Returns a vector with uninitialized elements.
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API Vec128<T, N> Undefined(Simd<T, N> d) {
+  return Zero(d);
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// Returns a vector with lane i=[0, N) set to "first" + i.
+template <typename T, size_t N, typename T2>
+Vec128<T, N> Iota(const Simd<T, N> d, const T2 first) {
+  HWY_ALIGN T lanes[16 / sizeof(T)];
+  for (size_t i = 0; i < 16 / sizeof(T); ++i) {
+    lanes[i] = static_cast<T>(first + static_cast<T2>(i));
+  }
+  return Load(d, lanes);
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> operator+(const Vec128<uint8_t, N> a,
+                                     const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_i8x16_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator+(const Vec128<uint16_t, N> a,
+                                      const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_i16x8_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator+(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_i32x4_add(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> operator+(const Vec128<int8_t, N> a,
+                                    const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator+(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_add(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator+(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_add(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> operator+(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_add(a.raw, b.raw)};
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> operator-(const Vec128<uint8_t, N> a,
+                                     const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_i8x16_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator-(Vec128<uint16_t, N> a,
+                                      Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_i16x8_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator-(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_i32x4_sub(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> operator-(const Vec128<int8_t, N> a,
+                                    const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator-(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_sub(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator-(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_sub(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> operator-(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_sub(a.raw, b.raw)};
+}
+
+// ------------------------------ Saturating addition
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> SaturatedAdd(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_add_sat(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> SaturatedAdd(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_add_sat(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> SaturatedAdd(const Vec128<int8_t, N> a,
+                                       const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_add_sat(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> SaturatedAdd(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_add_sat(a.raw, b.raw)};
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> SaturatedSub(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_sub_sat(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> SaturatedSub(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_sub_sat(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> SaturatedSub(const Vec128<int8_t, N> a,
+                                       const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_sub_sat(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> SaturatedSub(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_sub_sat(a.raw, b.raw)};
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> AverageRound(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_avgr(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> AverageRound(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_avgr(a.raw, b.raw)};
+}
+
+// ------------------------------ Absolute value
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+template <size_t N>
+HWY_API Vec128<int8_t, N> Abs(const Vec128<int8_t, N> v) {
+  return Vec128<int8_t, N>{wasm_i8x16_abs(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Abs(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_abs(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Abs(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_abs(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Abs(const Vec128<int64_t, N> v) {
+  // TODO(janwas): use wasm_i64x2_abs when available
+  const Vec128<int64_t, N> mask = wasm_i64x2_shr(v.raw, 63);
+  return ((v ^ mask) - mask);
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> Abs(const Vec128<float, N> v) {
+  return Vec128<float, N>{wasm_f32x4_abs(v.raw)};
+}
+
+// ------------------------------ Shift lanes by constant #bits
+
+// Unsigned
+template <int kBits, size_t N>
+HWY_API Vec128<uint16_t, N> ShiftLeft(const Vec128<uint16_t, N> v) {
+  return Vec128<uint16_t, N>{wasm_i16x8_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint16_t, N> ShiftRight(const Vec128<uint16_t, N> v) {
+  return Vec128<uint16_t, N>{wasm_u16x8_shr(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint32_t, N> ShiftLeft(const Vec128<uint32_t, N> v) {
+  return Vec128<uint32_t, N>{wasm_i32x4_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint32_t, N> ShiftRight(const Vec128<uint32_t, N> v) {
+  return Vec128<uint32_t, N>{wasm_u32x4_shr(v.raw, kBits)};
+}
+
+// Signed
+template <int kBits, size_t N>
+HWY_API Vec128<int16_t, N> ShiftLeft(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int16_t, N> ShiftRight(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_shr(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int32_t, N> ShiftLeft(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_shl(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int32_t, N> ShiftRight(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_shr(v.raw, kBits)};
+}
+
+// 8-bit
+template <int kBits, typename T, size_t N, HWY_IF_LANE_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShiftLeft(const Vec128<T, N> v) {
+  const Simd<T, N> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<T, N> shifted{ShiftLeft<kBits>(Vec128<MakeWide<T>>{v.raw}).raw};
+  return kBits == 1
+             ? (v + v)
+             : (shifted & Set(d8, static_cast<T>((0xFF << kBits) & 0xFF)));
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint8_t, N> ShiftRight(const Vec128<uint8_t, N> v) {
+  const Simd<uint8_t, N> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<uint8_t, N> shifted{
+      ShiftRight<kBits>(Vec128<uint16_t>{v.raw}).raw};
+  return shifted & Set(d8, 0xFF >> kBits);
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<int8_t, N> ShiftRight(const Vec128<int8_t, N> v) {
+  const Simd<int8_t, N> di;
+  const Simd<uint8_t, N> du;
+  const auto shifted = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> kBits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ Shift lanes by same variable #bits
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint16_t, N> ShiftLeftSame(const Vec128<uint16_t, N> v,
+                                          const int bits) {
+  return Vec128<uint16_t, N>{wasm_i16x8_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> ShiftRightSame(const Vec128<uint16_t, N> v,
+                                           const int bits) {
+  return Vec128<uint16_t, N>{wasm_u16x8_shr(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> ShiftLeftSame(const Vec128<uint32_t, N> v,
+                                          const int bits) {
+  return Vec128<uint32_t, N>{wasm_i32x4_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> ShiftRightSame(const Vec128<uint32_t, N> v,
+                                           const int bits) {
+  return Vec128<uint32_t, N>{wasm_u32x4_shr(v.raw, bits)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int16_t, N> ShiftLeftSame(const Vec128<int16_t, N> v,
+                                         const int bits) {
+  return Vec128<int16_t, N>{wasm_i16x8_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> ShiftRightSame(const Vec128<int16_t, N> v,
+                                          const int bits) {
+  return Vec128<int16_t, N>{wasm_i16x8_shr(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> ShiftLeftSame(const Vec128<int32_t, N> v,
+                                         const int bits) {
+  return Vec128<int32_t, N>{wasm_i32x4_shl(v.raw, bits)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> ShiftRightSame(const Vec128<int32_t, N> v,
+                                          const int bits) {
+  return Vec128<int32_t, N>{wasm_i32x4_shr(v.raw, bits)};
+}
+
+// 8-bit
+template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShiftLeftSame(const Vec128<T, N> v, const int bits) {
+  const Simd<T, N> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<T, N> shifted{
+      ShiftLeftSame(Vec128<MakeWide<T>>{v.raw}, bits).raw};
+  return shifted & Set(d8, (0xFF << bits) & 0xFF);
+}
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> ShiftRightSame(Vec128<uint8_t, N> v,
+                                          const int bits) {
+  const Simd<uint8_t, N> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<uint8_t, N> shifted{
+      ShiftRightSame(Vec128<uint16_t>{v.raw}, bits).raw};
+  return shifted & Set(d8, 0xFF >> bits);
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> ShiftRightSame(Vec128<int8_t, N> v, const int bits) {
+  const Simd<int8_t, N> di;
+  const Simd<uint8_t, N> du;
+  const auto shifted = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> bits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ Minimum
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> Min(const Vec128<uint8_t, N> a,
+                               const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> Min(const Vec128<uint16_t, N> a,
+                                const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> Min(const Vec128<uint32_t, N> a,
+                                const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_u32x4_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Min(const Vec128<uint64_t, N> a,
+                                const Vec128<uint64_t, N> b) {
+  alignas(16) float min[4];
+  min[0] =
+      std::min(wasm_u64x2_extract_lane(a, 0), wasm_u64x2_extract_lane(b, 0));
+  min[1] =
+      std::min(wasm_u64x2_extract_lane(a, 1), wasm_u64x2_extract_lane(b, 1));
+  return Vec128<uint64_t, N>{wasm_v128_load(min)};
+  // TODO(janwas): new op?
+  // return Vec128<uint64_t, N>{wasm_u64x2_min(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> Min(const Vec128<int8_t, N> a,
+                              const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Min(const Vec128<int16_t, N> a,
+                               const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Min(const Vec128<int32_t, N> a,
+                               const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_min(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Min(const Vec128<int64_t, N> a,
+                               const Vec128<int64_t, N> b) {
+  alignas(16) float min[4];
+  min[0] =
+      std::min(wasm_i64x2_extract_lane(a, 0), wasm_i64x2_extract_lane(b, 0));
+  min[1] =
+      std::min(wasm_i64x2_extract_lane(a, 1), wasm_i64x2_extract_lane(b, 1));
+  return Vec128<int64_t, N>{wasm_v128_load(min)};
+  // TODO(janwas): new op? (also do not yet have wasm_u64x2_make)
+  // return Vec128<int64_t, N>{wasm_i64x2_min(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> Min(const Vec128<float, N> a,
+                             const Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_min(a.raw, b.raw)};
+}
+
+// ------------------------------ Maximum
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> Max(const Vec128<uint8_t, N> a,
+                               const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{wasm_u8x16_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> Max(const Vec128<uint16_t, N> a,
+                                const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_u16x8_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> Max(const Vec128<uint32_t, N> a,
+                                const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_u32x4_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Max(const Vec128<uint64_t, N> a,
+                                const Vec128<uint64_t, N> b) {
+  alignas(16) float max[4];
+  max[0] =
+      std::max(wasm_u64x2_extract_lane(a, 0), wasm_u64x2_extract_lane(b, 0));
+  max[1] =
+      std::max(wasm_u64x2_extract_lane(a, 1), wasm_u64x2_extract_lane(b, 1));
+  return Vec128<int64_t, N>{wasm_v128_load(max)};
+  // TODO(janwas): new op? (also do not yet have wasm_u64x2_make)
+  // return Vec128<uint64_t, N>{wasm_u64x2_max(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> Max(const Vec128<int8_t, N> a,
+                              const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{wasm_i8x16_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Max(const Vec128<int16_t, N> a,
+                               const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Max(const Vec128<int32_t, N> a,
+                               const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_max(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Max(const Vec128<int64_t, N> a,
+                               const Vec128<int64_t, N> b) {
+  alignas(16) float max[4];
+  max[0] =
+      std::max(wasm_i64x2_extract_lane(a, 0), wasm_i64x2_extract_lane(b, 0));
+  max[1] =
+      std::max(wasm_i64x2_extract_lane(a, 1), wasm_i64x2_extract_lane(b, 1));
+  return Vec128<int64_t, N>{wasm_v128_load(max)};
+  // TODO(janwas): new op? (also do not yet have wasm_u64x2_make)
+  // return Vec128<int64_t, N>{wasm_i64x2_max(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> Max(const Vec128<float, N> a,
+                             const Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_max(a.raw, b.raw)};
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator*(const Vec128<uint16_t, N> a,
+                                      const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{wasm_i16x8_mul(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator*(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{wasm_i32x4_mul(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator*(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{wasm_i16x8_mul(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator*(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{wasm_i32x4_mul(a.raw, b.raw)};
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+template <size_t N>
+HWY_API Vec128<uint16_t, N> MulHigh(const Vec128<uint16_t, N> a,
+                                    const Vec128<uint16_t, N> b) {
+  // TODO(eustas): replace, when implemented in WASM.
+  const auto al = wasm_u32x4_extend_low_u16x8(a.raw);
+  const auto ah = wasm_u32x4_extend_high_u16x8(a.raw);
+  const auto bl = wasm_u32x4_extend_low_u16x8(b.raw);
+  const auto bh = wasm_u32x4_extend_high_u16x8(b.raw);
+  const auto l = wasm_i32x4_mul(al, bl);
+  const auto h = wasm_i32x4_mul(ah, bh);
+  // TODO(eustas): shift-right + narrow?
+  return Vec128<uint16_t, N>{
+      wasm_i16x8_shuffle(l, h, 1, 3, 5, 7, 9, 11, 13, 15)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> MulHigh(const Vec128<int16_t, N> a,
+                                   const Vec128<int16_t, N> b) {
+  // TODO(eustas): replace, when implemented in WASM.
+  const auto al = wasm_i32x4_extend_low_i16x8(a.raw);
+  const auto ah = wasm_i32x4_extend_high_i16x8(a.raw);
+  const auto bl = wasm_i32x4_extend_low_i16x8(b.raw);
+  const auto bh = wasm_i32x4_extend_high_i16x8(b.raw);
+  const auto l = wasm_i32x4_mul(al, bl);
+  const auto h = wasm_i32x4_mul(ah, bh);
+  // TODO(eustas): shift-right + narrow?
+  return Vec128<int16_t, N>{
+      wasm_i16x8_shuffle(l, h, 1, 3, 5, 7, 9, 11, 13, 15)};
+}
+
+// Multiplies even lanes (0, 2 ..) and returns the double-width result.
+template <size_t N>
+HWY_API Vec128<int64_t, (N + 1) / 2> MulEven(const Vec128<int32_t, N> a,
+                                             const Vec128<int32_t, N> b) {
+  // TODO(eustas): replace, when implemented in WASM.
+  const auto kEvenMask = wasm_i32x4_make(0xFFFFFFFF, 0, 0xFFFFFFFF, 0);
+  const auto ae = wasm_v128_and(a.raw, kEvenMask);
+  const auto be = wasm_v128_and(b.raw, kEvenMask);
+  return Vec128<int64_t, (N + 1) / 2>{wasm_i64x2_mul(ae, be)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, (N + 1) / 2> MulEven(const Vec128<uint32_t, N> a,
+                                              const Vec128<uint32_t, N> b) {
+  // TODO(eustas): replace, when implemented in WASM.
+  const auto kEvenMask = wasm_i32x4_make(0xFFFFFFFF, 0, 0xFFFFFFFF, 0);
+  const auto ae = wasm_v128_and(a.raw, kEvenMask);
+  const auto be = wasm_v128_and(b.raw, kEvenMask);
+  return Vec128<uint64_t, (N + 1) / 2>{wasm_i64x2_mul(ae, be)};
+}
+
+// ------------------------------ Negate
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Neg(const Vec128<T, N> v) {
+  return Xor(v, SignBit(Simd<T, N>()));
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> Neg(const Vec128<int8_t, N> v) {
+  return Vec128<int8_t, N>{wasm_i8x16_neg(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Neg(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_neg(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Neg(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_neg(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Neg(const Vec128<int64_t, N> v) {
+  return Vec128<int64_t, N>{wasm_i64x2_neg(v.raw)};
+}
+
+// ------------------------------ Floating-point mul / div
+
+template <size_t N>
+HWY_API Vec128<float, N> operator*(Vec128<float, N> a, Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_mul(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> operator/(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{wasm_f32x4_div(a.raw, b.raw)};
+}
+
+// Approximate reciprocal
+template <size_t N>
+HWY_API Vec128<float, N> ApproximateReciprocal(const Vec128<float, N> v) {
+  // TODO(eustas): replace, when implemented in WASM.
+  const Vec128<float, N> one = Vec128<float, N>{wasm_f32x4_splat(1.0f)};
+  return one / v;
+}
+
+// Absolute value of difference.
+template <size_t N>
+HWY_API Vec128<float, N> AbsDiff(const Vec128<float, N> a,
+                                 const Vec128<float, N> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+// Returns mul * x + add
+template <size_t N>
+HWY_API Vec128<float, N> MulAdd(const Vec128<float, N> mul,
+                                const Vec128<float, N> x,
+                                const Vec128<float, N> add) {
+  // TODO(eustas): replace, when implemented in WASM.
+  // TODO(eustas): is it wasm_f32x4_qfma?
+  return mul * x + add;
+}
+
+// Returns add - mul * x
+template <size_t N>
+HWY_API Vec128<float, N> NegMulAdd(const Vec128<float, N> mul,
+                                   const Vec128<float, N> x,
+                                   const Vec128<float, N> add) {
+  // TODO(eustas): replace, when implemented in WASM.
+  return add - mul * x;
+}
+
+// Returns mul * x - sub
+template <size_t N>
+HWY_API Vec128<float, N> MulSub(const Vec128<float, N> mul,
+                                const Vec128<float, N> x,
+                                const Vec128<float, N> sub) {
+  // TODO(eustas): replace, when implemented in WASM.
+  // TODO(eustas): is it wasm_f32x4_qfms?
+  return mul * x - sub;
+}
+
+// Returns -mul * x - sub
+template <size_t N>
+HWY_API Vec128<float, N> NegMulSub(const Vec128<float, N> mul,
+                                   const Vec128<float, N> x,
+                                   const Vec128<float, N> sub) {
+  // TODO(eustas): replace, when implemented in WASM.
+  return Neg(mul) * x - sub;
+}
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+template <size_t N>
+HWY_API Vec128<float, N> Sqrt(const Vec128<float, N> v) {
+  return Vec128<float, N>{wasm_f32x4_sqrt(v.raw)};
+}
+
+// Approximate reciprocal square root
+template <size_t N>
+HWY_API Vec128<float, N> ApproximateReciprocalSqrt(const Vec128<float, N> v) {
+  // TODO(eustas): find cheaper a way to calculate this.
+  const Vec128<float, N> one = Vec128<float, N>{wasm_f32x4_splat(1.0f)};
+  return one / Sqrt(v);
+}
+
+// ------------------------------ Floating-point rounding
+
+// Toward nearest integer, ties to even
+template <size_t N>
+HWY_API Vec128<float, N> Round(const Vec128<float, N> v) {
+  // IEEE-754 roundToIntegralTiesToEven returns floating-point, but we do not
+  // yet have an instruction for that (f32x4.nearest is not implemented). We
+  // rely on rounding after addition with a large value such that no mantissa
+  // bits remain (assuming the current mode is nearest-even). We may need a
+  // compiler flag for precise floating-point to prevent "optimizing" this out.
+  const Simd<float, N> df;
+  const auto max = Set(df, MantissaEnd<float>());
+  const auto large = CopySignToAbs(max, v);
+  const auto added = large + v;
+  const auto rounded = added - large;
+
+  // Keep original if NaN or the magnitude is large (already an int).
+  return IfThenElse(Abs(v) < max, rounded, v);
+}
+
+namespace detail {
+
+// Truncating to integer and converting back to float is correct except when the
+// input magnitude is large, in which case the input was already an integer
+// (because mantissa >> exponent is zero).
+template <size_t N>
+HWY_API Mask128<float, N> UseInt(const Vec128<float, N> v) {
+  return Abs(v) < Set(Simd<float, N>(), MantissaEnd<float>());
+}
+
+}  // namespace detail
+
+// Toward zero, aka truncate
+template <size_t N>
+HWY_API Vec128<float, N> Trunc(const Vec128<float, N> v) {
+  // TODO(eustas): is it f32x4.trunc? (not implemented yet)
+  const Simd<float, N> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  return IfThenElse(detail::UseInt(v), CopySign(int_f, v), v);
+}
+
+// Toward +infinity, aka ceiling
+template <size_t N>
+HWY_INLINE Vec128<float, N> Ceil(const Vec128<float, N> v) {
+  // TODO(eustas): is it f32x4.ceil? (not implemented yet)
+  const Simd<float, N> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  // Truncating a positive non-integer ends up smaller; if so, add 1.
+  const auto neg1 = ConvertTo(df, VecFromMask(di, RebindMask(di, int_f < v)));
+
+  return IfThenElse(detail::UseInt(v), int_f - neg1, v);
+}
+
+// Toward -infinity, aka floor
+template <size_t N>
+HWY_INLINE Vec128<float, N> Floor(const Vec128<float, N> v) {
+  // TODO(eustas): is it f32x4.floor? (not implemented yet)
+  const Simd<float, N> df;
+  const RebindToSigned<decltype(df)> di;
+
+  const auto integer = ConvertTo(di, v);  // round toward 0
+  const auto int_f = ConvertTo(df, integer);
+
+  // Truncating a negative non-integer ends up larger; if so, subtract 1.
+  const auto neg1 = ConvertTo(df, VecFromMask(di, RebindMask(di, int_f > v)));
+
+  return IfThenElse(detail::UseInt(v), int_f + neg1, v);
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+template <typename TFrom, typename TTo, size_t N>
+HWY_API Mask128<TTo, N> RebindMask(Simd<TTo, N> /*tag*/, Mask128<TFrom, N> m) {
+  static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size");
+  return Mask128<TTo, N>{m.raw};
+}
+
+// ------------------------------ Equality
+
+// Unsigned
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator==(const Vec128<uint8_t, N> a,
+                                       const Vec128<uint8_t, N> b) {
+  return Mask128<uint8_t, N>{wasm_i8x16_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator==(const Vec128<uint16_t, N> a,
+                                        const Vec128<uint16_t, N> b) {
+  return Mask128<uint16_t, N>{wasm_i16x8_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator==(const Vec128<uint32_t, N> a,
+                                        const Vec128<uint32_t, N> b) {
+  return Mask128<uint32_t, N>{wasm_i32x4_eq(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator==(const Vec128<int8_t, N> a,
+                                      const Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{wasm_i8x16_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator==(Vec128<int16_t, N> a,
+                                       Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{wasm_i16x8_eq(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator==(const Vec128<int32_t, N> a,
+                                       const Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{wasm_i32x4_eq(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Mask128<float, N> operator==(const Vec128<float, N> a,
+                                     const Vec128<float, N> b) {
+  return Mask128<float, N>{wasm_f32x4_eq(a.raw, b.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> TestBit(Vec128<T, N> v, Vec128<T, N> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+// ------------------------------ Strict inequality
+
+// Signed/float >
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator>(const Vec128<int8_t, N> a,
+                                     const Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{wasm_i8x16_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator>(const Vec128<int16_t, N> a,
+                                      const Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{wasm_i16x8_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator>(const Vec128<int32_t, N> a,
+                                      const Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{wasm_i32x4_gt(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator>(const Vec128<int64_t, N> a,
+                                      const Vec128<int64_t, N> b) {
+  const Simd<int32_t, N * 2> d32;
+  const auto a32 = BitCast(d32, a);
+  const auto b32 = BitCast(d32, b);
+  // If the upper half is less than or greater, this is the answer.
+  const auto m_gt = a32 < b32;
+
+  // Otherwise, the lower half decides.
+  const auto m_eq = a32 == b32;
+  const auto lo_in_hi = wasm_i32x4_shuffle(m_gt, m_gt, 2, 2, 0, 0);
+  const auto lo_gt = And(m_eq, lo_in_hi);
+
+  const auto gt = Or(lo_gt, m_gt);
+  // Copy result in upper 32 bits to lower 32 bits.
+  return Mask128<int64_t, N>{wasm_i32x4_shuffle(gt, gt, 3, 3, 1, 1)};
+}
+
+template <size_t N>
+HWY_API Mask128<float, N> operator>(const Vec128<float, N> a,
+                                    const Vec128<float, N> b) {
+  return Mask128<float, N>{wasm_f32x4_gt(a.raw, b.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> operator<(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return operator>(b, a);
+}
+
+// ------------------------------ Weak inequality
+
+// Float <= >=
+template <size_t N>
+HWY_API Mask128<float, N> operator<=(const Vec128<float, N> a,
+                                     const Vec128<float, N> b) {
+  return Mask128<float, N>{wasm_f32x4_le(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<float, N> operator>=(const Vec128<float, N> a,
+                                     const Vec128<float, N> b) {
+  return Mask128<float, N>{wasm_f32x4_ge(a.raw, b.raw)};
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> FirstN(const Simd<T, N> d, size_t num) {
+  const RebindToSigned<decltype(d)> di;  // Signed comparisons may be cheaper.
+  return RebindMask(d, Iota(di, 0) < Set(di, static_cast<MakeSigned<T>>(num)));
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Not(Vec128<T, N> v) {
+  return Vec128<T, N>{wasm_v128_not(v.raw)};
+}
+
+// ------------------------------ And
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> And(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{wasm_v128_and(a.raw, b.raw)};
+}
+
+// ------------------------------ AndNot
+
+// Returns ~not_mask & mask.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> AndNot(Vec128<T, N> not_mask, Vec128<T, N> mask) {
+  return Vec128<T, N>{wasm_v128_andnot(mask.raw, not_mask.raw)};
+}
+
+// ------------------------------ Or
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{wasm_v128_or(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{wasm_v128_xor(a.raw, b.raw)};
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator&(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return And(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator|(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Or(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator^(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ CopySign
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySign(const Vec128<T, N> magn,
+                              const Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  const auto msb = SignBit(Simd<T, N>());
+  return Or(AndNot(msb, magn), And(msb, sign));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySignToAbs(const Vec128<T, N> abs,
+                                   const Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+  return Or(abs, And(SignBit(Simd<T, N>()), sign));
+}
+
+// ------------------------------ BroadcastSignBit (compare)
+
+template <typename T, size_t N, HWY_IF_NOT_LANE_SIZE(T, 1)>
+HWY_API Vec128<T, N> BroadcastSignBit(const Vec128<T, N> v) {
+  return ShiftRight<sizeof(T) * 8 - 1>(v);
+}
+template <size_t N>
+HWY_API Vec128<int8_t, N> BroadcastSignBit(const Vec128<int8_t, N> v) {
+  return VecFromMask(Simd<int8_t, N>(), v < Zero(Simd<int8_t, N>()));
+}
+
+// ------------------------------ Mask
+
+// Mask and Vec are the same (true = FF..FF).
+template <typename T, size_t N>
+HWY_API Mask128<T, N> MaskFromVec(const Vec128<T, N> v) {
+  return Mask128<T, N>{v.raw};
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> VecFromMask(Simd<T, N> /* tag */, Mask128<T, N> v) {
+  return Vec128<T, N>{v.raw};
+}
+
+// DEPRECATED
+template <typename T, size_t N>
+HWY_API Vec128<T, N> VecFromMask(const Mask128<T, N> v) {
+  return Vec128<T, N>{v.raw};
+}
+
+// mask ? yes : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElse(Mask128<T, N> mask, Vec128<T, N> yes,
+                                Vec128<T, N> no) {
+  return Vec128<T, N>{wasm_v128_bitselect(yes.raw, no.raw, mask.raw)};
+}
+
+// mask ? yes : 0
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElseZero(Mask128<T, N> mask, Vec128<T, N> yes) {
+  return yes & VecFromMask(Simd<T, N>(), mask);
+}
+
+// mask ? 0 : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenZeroElse(Mask128<T, N> mask, Vec128<T, N> no) {
+  return AndNot(VecFromMask(Simd<T, N>(), mask), no);
+}
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> ZeroIfNegative(Vec128<T, N> v) {
+  const Simd<T, N> d;
+  const auto zero = Zero(d);
+  return IfThenElse(Mask128<T, N>{(v > zero).raw}, v, zero);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Not(const Mask128<T, N> m) {
+  const Simd<T, N> d;
+  return MaskFromVec(Not(VecFromMask(d, m)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> And(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> AndNot(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Or(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Xor(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+// ------------------------------ Shl (BroadcastSignBit, IfThenElse)
+
+// The x86 multiply-by-Pow2() trick will not work because WASM saturates
+// float->int correctly to 2^31-1 (not 2^31). Because WASM's shifts take a
+// scalar count operand, per-lane shift instructions would require extract_lane
+// for each lane, and hoping that shuffle is correctly mapped to a native
+// instruction. Using non-vector shifts would incur a store-load forwarding
+// stall when loading the result vector. We instead test bits of the shift
+// count to "predicate" a shift of the entire vector by a constant.
+
+template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 2)>
+HWY_API Vec128<T, N> operator<<(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const Simd<T, N> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<12>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<8>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftLeft<1>(v), v);
+}
+
+template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 4)>
+HWY_API Vec128<T, N> operator<<(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const Simd<T, N> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<27>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<16>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<8>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftLeft<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftLeft<1>(v), v);
+}
+
+// ------------------------------ Shr (BroadcastSignBit, IfThenElse)
+
+template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 2)>
+HWY_API Vec128<T, N> operator>>(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const Simd<T, N> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<12>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<8>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftRight<1>(v), v);
+}
+
+template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 4)>
+HWY_API Vec128<T, N> operator>>(Vec128<T, N> v, const Vec128<T, N> bits) {
+  const Simd<T, N> d;
+  Mask128<T, N> mask;
+  // Need a signed type for BroadcastSignBit.
+  auto test = BitCast(RebindToSigned<decltype(d)>(), bits);
+  // Move the highest valid bit of the shift count into the sign bit.
+  test = ShiftLeft<27>(test);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<16>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<8>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<4>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  test = ShiftLeft<1>(test);  // next bit (descending order)
+  v = IfThenElse(mask, ShiftRight<2>(v), v);
+
+  mask = RebindMask(d, MaskFromVec(BroadcastSignBit(test)));
+  return IfThenElse(mask, ShiftRight<1>(v), v);
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <typename T>
+HWY_API Vec128<T> Load(Full128<T> /* tag */, const T* HWY_RESTRICT aligned) {
+  return Vec128<T>{wasm_v128_load(aligned)};
+}
+
+// Partial load.
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_API Vec128<T, N> Load(Simd<T, N> /* tag */, const T* HWY_RESTRICT p) {
+  Vec128<T, N> v;
+  CopyBytes<sizeof(T) * N>(p, &v);
+  return v;
+}
+
+// LoadU == Load.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> LoadU(Simd<T, N> d, const T* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// 128-bit SIMD => nothing to duplicate, same as an unaligned load.
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API Vec128<T, N> LoadDup128(Simd<T, N> d, const T* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// ------------------------------ Store
+
+template <typename T>
+HWY_API void Store(Vec128<T> v, Full128<T> /* tag */, T* HWY_RESTRICT aligned) {
+  wasm_v128_store(aligned, v.raw);
+}
+
+// Partial store.
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_API void Store(Vec128<T, N> v, Simd<T, N> /* tag */, T* HWY_RESTRICT p) {
+  CopyBytes<sizeof(T) * N>(&v, p);
+}
+
+HWY_API void Store(const Vec128<float, 1> v, Simd<float, 1> /* tag */,
+                   float* HWY_RESTRICT p) {
+  *p = wasm_f32x4_extract_lane(v.raw, 0);
+}
+
+// StoreU == Store.
+template <typename T, size_t N>
+HWY_API void StoreU(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+// ------------------------------ Non-temporal stores
+
+// Same as aligned stores on non-x86.
+
+template <typename T, size_t N>
+HWY_API void Stream(Vec128<T, N> v, Simd<T, N> /* tag */,
+                    T* HWY_RESTRICT aligned) {
+  wasm_v128_store(aligned, v.raw);
+}
+
+// ------------------------------ Scatter (Store)
+
+template <typename T, size_t N, typename Offset, HWY_IF_LE128(T, N)>
+HWY_API void ScatterOffset(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT base,
+                           const Vec128<Offset, N> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+
+  alignas(16) T lanes[N];
+  Store(v, d, lanes);
+
+  alignas(16) Offset offset_lanes[N];
+  Store(offset, Simd<Offset, N>(), offset_lanes);
+
+  uint8_t* base_bytes = reinterpret_cast<uint8_t*>(base);
+  for (size_t i = 0; i < N; ++i) {
+    CopyBytes<sizeof(T)>(&lanes[i], base_bytes + offset_lanes[i]);
+  }
+}
+
+template <typename T, size_t N, typename Index, HWY_IF_LE128(T, N)>
+HWY_API void ScatterIndex(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT base,
+                          const Vec128<Index, N> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+
+  alignas(16) T lanes[N];
+  Store(v, d, lanes);
+
+  alignas(16) Index index_lanes[N];
+  Store(index, Simd<Index, N>(), index_lanes);
+
+  for (size_t i = 0; i < N; ++i) {
+    base[index_lanes[i]] = lanes[i];
+  }
+}
+
+// ------------------------------ Gather (Load/Store)
+
+template <typename T, size_t N, typename Offset>
+HWY_API Vec128<T, N> GatherOffset(const Simd<T, N> d,
+                                  const T* HWY_RESTRICT base,
+                                  const Vec128<Offset, N> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+
+  alignas(16) Offset offset_lanes[N];
+  Store(offset, Simd<Offset, N>(), offset_lanes);
+
+  alignas(16) T lanes[N];
+  const uint8_t* base_bytes = reinterpret_cast<const uint8_t*>(base);
+  for (size_t i = 0; i < N; ++i) {
+    CopyBytes<sizeof(T)>(base_bytes + offset_lanes[i], &lanes[i]);
+  }
+  return Load(d, lanes);
+}
+
+template <typename T, size_t N, typename Index>
+HWY_API Vec128<T, N> GatherIndex(const Simd<T, N> d, const T* HWY_RESTRICT base,
+                                 const Vec128<Index, N> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+
+  alignas(16) Index index_lanes[N];
+  Store(index, Simd<Index, N>(), index_lanes);
+
+  alignas(16) T lanes[N];
+  for (size_t i = 0; i < N; ++i) {
+    lanes[i] = base[index_lanes[i]];
+  }
+  return Load(d, lanes);
+}
+
+// ================================================== SWIZZLE
+
+// ------------------------------ Extract lane
+
+// Gets the single value stored in a vector/part.
+template <size_t N>
+HWY_API uint8_t GetLane(const Vec128<uint8_t, N> v) {
+  return wasm_i8x16_extract_lane(v.raw, 0);
+}
+template <size_t N>
+HWY_API int8_t GetLane(const Vec128<int8_t, N> v) {
+  return wasm_i8x16_extract_lane(v.raw, 0);
+}
+template <size_t N>
+HWY_API uint16_t GetLane(const Vec128<uint16_t, N> v) {
+  return wasm_i16x8_extract_lane(v.raw, 0);
+}
+template <size_t N>
+HWY_API int16_t GetLane(const Vec128<int16_t, N> v) {
+  return wasm_i16x8_extract_lane(v.raw, 0);
+}
+template <size_t N>
+HWY_API uint32_t GetLane(const Vec128<uint32_t, N> v) {
+  return wasm_i32x4_extract_lane(v.raw, 0);
+}
+template <size_t N>
+HWY_API int32_t GetLane(const Vec128<int32_t, N> v) {
+  return wasm_i32x4_extract_lane(v.raw, 0);
+}
+template <size_t N>
+HWY_API float GetLane(const Vec128<float, N> v) {
+  return wasm_f32x4_extract_lane(v.raw, 0);
+}
+
+// ------------------------------ Extract half
+
+// Returns upper/lower half of a vector.
+template <typename T, size_t N>
+HWY_API Vec128<T, N / 2> LowerHalf(Vec128<T, N> v) {
+  return Vec128<T, N / 2>{v.raw};
+}
+
+// These copy hi into lo (smaller instruction encoding than shifts).
+template <typename T>
+HWY_API Vec128<T, 8 / sizeof(T)> UpperHalf(Vec128<T> v) {
+  // TODO(eustas): use swizzle?
+  return Vec128<T, 8 / sizeof(T)>{wasm_i32x4_shuffle(v.raw, v.raw, 2, 3, 2, 3)};
+}
+template <>
+HWY_INLINE Vec128<float, 2> UpperHalf(Vec128<float> v) {
+  // TODO(eustas): use swizzle?
+  return Vec128<float, 2>{wasm_i32x4_shuffle(v.raw, v.raw, 2, 3, 2, 3)};
+}
+
+// ------------------------------ Shift vector by constant #bytes
+
+// 0x01..0F, kBytes = 1 => 0x02..0F00
+template <int kBytes, typename T>
+HWY_API Vec128<T> ShiftLeftBytes(const Vec128<T> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  const __i8x16 zero = wasm_i8x16_splat(0);
+  switch (kBytes) {
+    case 0:
+      return v;
+
+    case 1:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 0, 1, 2, 3, 4, 5, 6,
+                                          7, 8, 9, 10, 11, 12, 13, 14)};
+
+    case 2:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 0, 1, 2, 3, 4, 5,
+                                          6, 7, 8, 9, 10, 11, 12, 13)};
+
+    case 3:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 0, 1, 2, 3,
+                                          4, 5, 6, 7, 8, 9, 10, 11, 12)};
+
+    case 4:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 0, 1, 2,
+                                          3, 4, 5, 6, 7, 8, 9, 10, 11)};
+
+    case 5:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 0, 1,
+                                          2, 3, 4, 5, 6, 7, 8, 9, 10)};
+
+    case 6:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          0, 1, 2, 3, 4, 5, 6, 7, 8, 9)};
+
+    case 7:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 0, 1, 2, 3, 4, 5, 6, 7, 8)};
+
+    case 8:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 0, 1, 2, 3, 4, 5, 6, 7)};
+
+    case 9:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 0, 1, 2, 3, 4, 5, 6)};
+
+    case 10:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 0, 1, 2, 3, 4, 5)};
+
+    case 11:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 0, 1, 2, 3, 4)};
+
+    case 12:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 0, 1, 2, 3)};
+
+    case 13:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 0, 1, 2)};
+
+    case 14:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 0,
+                                          1)};
+
+    case 15:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 16, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 16,
+                                          0)};
+  }
+  return Vec128<T>{zero};
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftLanes(const Vec128<T, N> v) {
+  const Simd<uint8_t, N * sizeof(T)> d8;
+  const Simd<T, N> d;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, typename T>
+HWY_API Vec128<T> ShiftRightBytes(const Vec128<T> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  const __i8x16 zero = wasm_i8x16_splat(0);
+  switch (kBytes) {
+    case 0:
+      return v;
+
+    case 1:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 1, 2, 3, 4, 5, 6, 7, 8,
+                                          9, 10, 11, 12, 13, 14, 15, 16)};
+
+    case 2:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 2, 3, 4, 5, 6, 7, 8, 9,
+                                          10, 11, 12, 13, 14, 15, 16, 16)};
+
+    case 3:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 3, 4, 5, 6, 7, 8, 9, 10,
+                                          11, 12, 13, 14, 15, 16, 16, 16)};
+
+    case 4:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 4, 5, 6, 7, 8, 9, 10, 11,
+                                          12, 13, 14, 15, 16, 16, 16, 16)};
+
+    case 5:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 5, 6, 7, 8, 9, 10, 11,
+                                          12, 13, 14, 15, 16, 16, 16, 16, 16)};
+
+    case 6:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 6, 7, 8, 9, 10, 11, 12,
+                                          13, 14, 15, 16, 16, 16, 16, 16, 16)};
+
+    case 7:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 7, 8, 9, 10, 11, 12, 13,
+                                          14, 15, 16, 16, 16, 16, 16, 16, 16)};
+
+    case 8:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 8, 9, 10, 11, 12, 13, 14,
+                                          15, 16, 16, 16, 16, 16, 16, 16, 16)};
+
+    case 9:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 9, 10, 11, 12, 13, 14,
+                                          15, 16, 16, 16, 16, 16, 16, 16, 16,
+                                          16)};
+
+    case 10:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 10, 11, 12, 13, 14, 15,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 16,
+                                          16)};
+
+    case 11:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 11, 12, 13, 14, 15, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 16,
+                                          16)};
+
+    case 12:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 12, 13, 14, 15, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 16,
+                                          16)};
+
+    case 13:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 13, 14, 15, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 16,
+                                          16)};
+
+    case 14:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 14, 15, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 16,
+                                          16)};
+
+    case 15:
+      return Vec128<T>{wasm_i8x16_shuffle(v.raw, zero, 15, 16, 16, 16, 16, 16,
+                                          16, 16, 16, 16, 16, 16, 16, 16, 16,
+                                          16)};
+  }
+  return Vec128<T>{zero};
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftRightLanes(const Vec128<T, N> v) {
+  const Simd<uint8_t, N * sizeof(T)> d8;
+  const Simd<T, N> d;
+  return BitCast(d, ShiftRightBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// ------------------------------ Extract from 2x 128-bit at constant offset
+
+// Extracts 128 bits from <hi, lo> by skipping the least-significant kBytes.
+template <int kBytes, typename T>
+HWY_API Vec128<T> CombineShiftRightBytes(const Vec128<T> hi,
+                                         const Vec128<T> lo) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  switch (kBytes) {
+    case 0:
+      return lo;
+
+    case 1:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 1, 2, 3, 4, 5, 6, 7,
+                                          8, 9, 10, 11, 12, 13, 14, 15, 16)};
+
+    case 2:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 2, 3, 4, 5, 6, 7, 8,
+                                          9, 10, 11, 12, 13, 14, 15, 16, 17)};
+
+    case 3:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 3, 4, 5, 6, 7, 8, 9,
+                                          10, 11, 12, 13, 14, 15, 16, 17, 18)};
+
+    case 4:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 4, 5, 6, 7, 8, 9, 10,
+                                          11, 12, 13, 14, 15, 16, 17, 18, 19)};
+
+    case 5:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 5, 6, 7, 8, 9, 10, 11,
+                                          12, 13, 14, 15, 16, 17, 18, 19, 20)};
+
+    case 6:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 6, 7, 8, 9, 10, 11,
+                                          12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                          21)};
+
+    case 7:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 7, 8, 9, 10, 11, 12,
+                                          13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                          22)};
+
+    case 8:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 8, 9, 10, 11, 12, 13,
+                                          14, 15, 16, 17, 18, 19, 20, 21, 22,
+                                          23)};
+
+    case 9:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 9, 10, 11, 12, 13, 14,
+                                          15, 16, 17, 18, 19, 20, 21, 22, 23,
+                                          24)};
+
+    case 10:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 10, 11, 12, 13, 14,
+                                          15, 16, 17, 18, 19, 20, 21, 22, 23,
+                                          24, 25)};
+
+    case 11:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 11, 12, 13, 14, 15,
+                                          16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                          25, 26)};
+
+    case 12:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 12, 13, 14, 15, 16,
+                                          17, 18, 19, 20, 21, 22, 23, 24, 25,
+                                          26, 27)};
+
+    case 13:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 13, 14, 15, 16, 17,
+                                          18, 19, 20, 21, 22, 23, 24, 25, 26,
+                                          27, 28)};
+
+    case 14:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 14, 15, 16, 17, 18,
+                                          19, 20, 21, 22, 23, 24, 25, 26, 27,
+                                          28, 29)};
+
+    case 15:
+      return Vec128<T>{wasm_i8x16_shuffle(lo.raw, hi.raw, 15, 16, 17, 18, 19,
+                                          20, 21, 22, 23, 24, 25, 26, 27, 28,
+                                          29, 30)};
+  }
+  return hi;
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+// Unsigned
+template <int kLane, size_t N>
+HWY_API Vec128<uint16_t, N> Broadcast(const Vec128<uint16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint16_t, N>{wasm_i16x8_shuffle(
+      v.raw, v.raw, kLane, kLane, kLane, kLane, kLane, kLane, kLane, kLane)};
+}
+template <int kLane, size_t N>
+HWY_API Vec128<uint32_t, N> Broadcast(const Vec128<uint32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint32_t, N>{
+      wasm_i32x4_shuffle(v.raw, v.raw, kLane, kLane, kLane, kLane)};
+}
+
+// Signed
+template <int kLane, size_t N>
+HWY_API Vec128<int16_t, N> Broadcast(const Vec128<int16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int16_t, N>{wasm_i16x8_shuffle(
+      v.raw, v.raw, kLane, kLane, kLane, kLane, kLane, kLane, kLane, kLane)};
+}
+template <int kLane, size_t N>
+HWY_API Vec128<int32_t, N> Broadcast(const Vec128<int32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int32_t, N>{
+      wasm_i32x4_shuffle(v.raw, v.raw, kLane, kLane, kLane, kLane)};
+}
+
+// Float
+template <int kLane, size_t N>
+HWY_API Vec128<float, N> Broadcast(const Vec128<float, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<float, N>{
+      wasm_i32x4_shuffle(v.raw, v.raw, kLane, kLane, kLane, kLane)};
+}
+
+// ------------------------------ Shuffle bytes with variable indices
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes, i.e.
+// lane indices in [0, 16).
+template <typename T, size_t N>
+HWY_API Vec128<T, N> TableLookupBytes(const Vec128<T, N> bytes,
+                                      const Vec128<T, N> from) {
+// Not yet available in all engines, see
+// https://github.com/WebAssembly/simd/blob/bdcc304b2d379f4601c2c44ea9b44ed9484fde7e/proposals/simd/ImplementationStatus.md
+// V8 implementation of this had a bug, fixed on 2021-04-03:
+// https://chromium-review.googlesource.com/c/v8/v8/+/2822951
+#if 0
+  return Vec128<T, N>{wasm_i8x16_swizzle(bytes.raw, from.raw)};
+#else
+  alignas(16) uint8_t control[16];
+  alignas(16) uint8_t input[16];
+  alignas(16) uint8_t output[16];
+  wasm_v128_store(control, from.raw);
+  wasm_v128_store(input, bytes.raw);
+  for (size_t i = 0; i < 16; ++i) {
+    output[i] = control[i] < 16 ? input[control[i]] : 0;
+  }
+  return Vec128<T, N>{wasm_v128_load(output)};
+#endif
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let Vec128<int32_t> have lanes 3,2,1,0 (0 is least-significant).
+// Shuffle0321 rotates one lane to the right (the previous least-significant
+// lane is now most-significant). These could also be implemented via
+// CombineShiftRightBytes but the shuffle_abcd notation is more convenient.
+
+// Swap 32-bit halves in 64-bit halves.
+HWY_API Vec128<uint32_t> Shuffle2301(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{wasm_i32x4_shuffle(v.raw, v.raw, 1, 0, 3, 2)};
+}
+HWY_API Vec128<int32_t> Shuffle2301(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{wasm_i32x4_shuffle(v.raw, v.raw, 1, 0, 3, 2)};
+}
+HWY_API Vec128<float> Shuffle2301(const Vec128<float> v) {
+  return Vec128<float>{wasm_i32x4_shuffle(v.raw, v.raw, 1, 0, 3, 2)};
+}
+
+// Swap 64-bit halves
+HWY_API Vec128<uint32_t> Shuffle1032(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{wasm_i64x2_shuffle(v.raw, v.raw, 1, 0)};
+}
+HWY_API Vec128<int32_t> Shuffle1032(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{wasm_i64x2_shuffle(v.raw, v.raw, 1, 0)};
+}
+HWY_API Vec128<float> Shuffle1032(const Vec128<float> v) {
+  return Vec128<float>{wasm_i64x2_shuffle(v.raw, v.raw, 1, 0)};
+}
+
+// Rotate right 32 bits
+HWY_API Vec128<uint32_t> Shuffle0321(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{wasm_i32x4_shuffle(v.raw, v.raw, 1, 2, 3, 0)};
+}
+HWY_API Vec128<int32_t> Shuffle0321(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{wasm_i32x4_shuffle(v.raw, v.raw, 1, 2, 3, 0)};
+}
+HWY_API Vec128<float> Shuffle0321(const Vec128<float> v) {
+  return Vec128<float>{wasm_i32x4_shuffle(v.raw, v.raw, 1, 2, 3, 0)};
+}
+// Rotate left 32 bits
+HWY_API Vec128<uint32_t> Shuffle2103(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{wasm_i32x4_shuffle(v.raw, v.raw, 3, 0, 1, 2)};
+}
+HWY_API Vec128<int32_t> Shuffle2103(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{wasm_i32x4_shuffle(v.raw, v.raw, 3, 0, 1, 2)};
+}
+HWY_API Vec128<float> Shuffle2103(const Vec128<float> v) {
+  return Vec128<float>{wasm_i32x4_shuffle(v.raw, v.raw, 3, 0, 1, 2)};
+}
+
+// Reverse
+HWY_API Vec128<uint32_t> Shuffle0123(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{wasm_i32x4_shuffle(v.raw, v.raw, 3, 2, 1, 0)};
+}
+HWY_API Vec128<int32_t> Shuffle0123(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{wasm_i32x4_shuffle(v.raw, v.raw, 3, 2, 1, 0)};
+}
+HWY_API Vec128<float> Shuffle0123(const Vec128<float> v) {
+  return Vec128<float>{wasm_i32x4_shuffle(v.raw, v.raw, 3, 2, 1, 0)};
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T, size_t N>
+struct Indices128 {
+  __v128_u raw;
+};
+
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API Indices128<T, N> SetTableIndices(Simd<T, N> d, const int32_t* idx) {
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER)
+  for (size_t i = 0; i < N; ++i) {
+    HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast<int32_t>(N));
+  }
+#endif
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) uint8_t control[16] = {0};
+  for (size_t idx_lane = 0; idx_lane < N; ++idx_lane) {
+    for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) {
+      control[idx_lane * sizeof(T) + idx_byte] =
+          static_cast<uint8_t>(idx[idx_lane] * sizeof(T) + idx_byte);
+    }
+  }
+  return Indices128<T, N>{Load(d8, control).raw};
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> TableLookupLanes(
+    const Vec128<uint32_t, N> v, const Indices128<uint32_t, N> idx) {
+  return TableLookupBytes(v, Vec128<uint32_t, N>{idx.raw});
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> TableLookupLanes(const Vec128<int32_t, N> v,
+                                            const Indices128<int32_t, N> idx) {
+  return TableLookupBytes(v, Vec128<int32_t, N>{idx.raw});
+}
+template <size_t N>
+HWY_API Vec128<float, N> TableLookupLanes(const Vec128<float, N> v,
+                                          const Indices128<float, N> idx) {
+  const Simd<int32_t, N> di;
+  const Simd<float, N> df;
+  return BitCast(df,
+                 TableLookupBytes(BitCast(di, v), Vec128<int32_t, N>{idx.raw}));
+}
+
+// ------------------------------ Zip lanes
+
+// Same as Interleave*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+
+template <size_t N>
+HWY_API Vec128<uint16_t, (N + 1) / 2> ZipLower(const Vec128<uint8_t, N> a,
+                                               const Vec128<uint8_t, N> b) {
+  return Vec128<uint16_t, (N + 1) / 2>{wasm_i8x16_shuffle(
+      a.raw, b.raw, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, (N + 1) / 2> ZipLower(const Vec128<uint16_t, N> a,
+                                               const Vec128<uint16_t, N> b) {
+  return Vec128<uint32_t, (N + 1) / 2>{
+      wasm_i16x8_shuffle(a.raw, b.raw, 0, 8, 1, 9, 2, 10, 3, 11)};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, (N + 1) / 2> ZipLower(const Vec128<int8_t, N> a,
+                                              const Vec128<int8_t, N> b) {
+  return Vec128<int16_t, (N + 1) / 2>{wasm_i8x16_shuffle(
+      a.raw, b.raw, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, (N + 1) / 2> ZipLower(const Vec128<int16_t, N> a,
+                                              const Vec128<int16_t, N> b) {
+  return Vec128<int32_t, (N + 1) / 2>{
+      wasm_i16x8_shuffle(a.raw, b.raw, 0, 8, 1, 9, 2, 10, 3, 11)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N / 2> ZipUpper(const Vec128<uint8_t, N> a,
+                                         const Vec128<uint8_t, N> b) {
+  return Vec128<uint16_t, N / 2>{wasm_i8x16_shuffle(a.raw, b.raw, 8, 24, 9, 25,
+                                                    10, 26, 11, 27, 12, 28, 13,
+                                                    29, 14, 30, 15, 31)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N / 2> ZipUpper(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint32_t, N / 2>{
+      wasm_i16x8_shuffle(a.raw, b.raw, 4, 12, 5, 13, 6, 14, 7, 15)};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N / 2> ZipUpper(const Vec128<int8_t, N> a,
+                                        const Vec128<int8_t, N> b) {
+  return Vec128<int16_t, N / 2>{wasm_i8x16_shuffle(a.raw, b.raw, 8, 24, 9, 25,
+                                                   10, 26, 11, 27, 12, 28, 13,
+                                                   29, 14, 30, 15, 31)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N / 2> ZipUpper(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int32_t, N / 2>{
+      wasm_i16x8_shuffle(a.raw, b.raw, 4, 12, 5, 13, 6, 14, 7, 15)};
+}
+
+// ------------------------------ Interleave lanes
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use ZipLower/Upper instead (also works with scalar).
+
+template <typename T>
+HWY_API Vec128<T> InterleaveLower(const Vec128<T> a, const Vec128<T> b) {
+  return Vec128<T>{ZipLower(a, b).raw};
+}
+template <>
+HWY_INLINE Vec128<uint32_t> InterleaveLower<uint32_t>(
+    const Vec128<uint32_t> a, const Vec128<uint32_t> b) {
+  return Vec128<uint32_t>{wasm_i32x4_shuffle(a.raw, b.raw, 0, 4, 1, 5)};
+}
+template <>
+HWY_INLINE Vec128<int32_t> InterleaveLower<int32_t>(const Vec128<int32_t> a,
+                                                    const Vec128<int32_t> b) {
+  return Vec128<int32_t>{wasm_i32x4_shuffle(a.raw, b.raw, 0, 4, 1, 5)};
+}
+template <>
+HWY_INLINE Vec128<float> InterleaveLower<float>(const Vec128<float> a,
+                                                const Vec128<float> b) {
+  return Vec128<float>{wasm_i32x4_shuffle(a.raw, b.raw, 0, 4, 1, 5)};
+}
+
+template <typename T>
+HWY_API Vec128<T> InterleaveUpper(const Vec128<T> a, const Vec128<T> b) {
+  return Vec128<T>{ZipUpper(a, b).raw};
+}
+template <>
+HWY_INLINE Vec128<uint32_t> InterleaveUpper<uint32_t>(
+    const Vec128<uint32_t> a, const Vec128<uint32_t> b) {
+  return Vec128<uint32_t>{wasm_i32x4_shuffle(a.raw, b.raw, 2, 6, 3, 7)};
+}
+template <>
+HWY_INLINE Vec128<int32_t> InterleaveUpper<int32_t>(const Vec128<int32_t> a,
+                                                    const Vec128<int32_t> b) {
+  return Vec128<int32_t>{wasm_i32x4_shuffle(a.raw, b.raw, 2, 6, 3, 7)};
+}
+template <>
+HWY_INLINE Vec128<float> InterleaveUpper<float>(const Vec128<float> a,
+                                                const Vec128<float> b) {
+  return Vec128<float>{wasm_i32x4_shuffle(a.raw, b.raw, 2, 6, 3, 7)};
+}
+
+// ------------------------------ Blocks
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <typename T>
+HWY_API Vec128<T> ConcatLowerLower(const Vec128<T> hi, const Vec128<T> lo) {
+  return Vec128<T>{wasm_i64x2_shuffle(lo.raw, hi.raw, 0, 2)};
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <typename T>
+HWY_API Vec128<T> ConcatUpperUpper(const Vec128<T> hi, const Vec128<T> lo) {
+  return Vec128<T>{wasm_i64x2_shuffle(lo.raw, hi.raw, 1, 3)};
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves)
+template <typename T>
+HWY_API Vec128<T> ConcatLowerUpper(const Vec128<T> hi, const Vec128<T> lo) {
+  return CombineShiftRightBytes<8>(hi, lo);
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <typename T>
+HWY_API Vec128<T> ConcatUpperLower(const Vec128<T> hi, const Vec128<T> lo) {
+  return Vec128<T>{wasm_i64x2_shuffle(lo.raw, hi.raw, 0, 3)};
+}
+
+// ------------------------------ Odd/even lanes
+
+namespace {
+
+template <typename T>
+HWY_API Vec128<T> odd_even_impl(hwy::SizeTag<1> /* tag */, const Vec128<T> a,
+                                const Vec128<T> b) {
+  const Full128<T> d;
+  const Full128<uint8_t> d8;
+  alignas(16) constexpr uint8_t mask[16] = {0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0,
+                                            0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0};
+  return IfThenElse(MaskFromVec(BitCast(d, Load(d8, mask))), b, a);
+}
+template <typename T>
+HWY_API Vec128<T> odd_even_impl(hwy::SizeTag<2> /* tag */, const Vec128<T> a,
+                                const Vec128<T> b) {
+  return Vec128<T>{wasm_i16x8_shuffle(a.raw, b.raw, 8, 1, 10, 3, 12, 5, 14, 7)};
+}
+template <typename T>
+HWY_API Vec128<T> odd_even_impl(hwy::SizeTag<4> /* tag */, const Vec128<T> a,
+                                const Vec128<T> b) {
+  return Vec128<T>{wasm_i32x4_shuffle(a.raw, b.raw, 4, 1, 6, 3)};
+}
+// TODO(eustas): implement
+// template <typename T>
+// HWY_API Vec128<T> odd_even_impl(hwy::SizeTag<8> /* tag */,
+//                                                 const Vec128<T> a,
+//                                                 const Vec128<T> b)
+
+}  // namespace
+
+template <typename T>
+HWY_API Vec128<T> OddEven(const Vec128<T> a, const Vec128<T> b) {
+  return odd_even_impl(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+template <>
+HWY_INLINE Vec128<float> OddEven<float>(const Vec128<float> a,
+                                        const Vec128<float> b) {
+  return Vec128<float>{wasm_i32x4_shuffle(a.raw, b.raw, 4, 1, 6, 3)};
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+// Unsigned: zero-extend.
+template <size_t N>
+HWY_API Vec128<uint16_t, N> PromoteTo(Simd<uint16_t, N> /* tag */,
+                                      const Vec128<uint8_t, N> v) {
+  return Vec128<uint16_t, N>{wasm_u16x8_extend_low_u8x16(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> PromoteTo(Simd<uint32_t, N> /* tag */,
+                                      const Vec128<uint8_t, N> v) {
+  return Vec128<uint32_t, N>{
+      wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(v.raw))};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> PromoteTo(Simd<int16_t, N> /* tag */,
+                                     const Vec128<uint8_t, N> v) {
+  return Vec128<int16_t, N>{wasm_u16x8_extend_low_u8x16(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                     const Vec128<uint8_t, N> v) {
+  return Vec128<int32_t, N>{
+      wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(v.raw))};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> PromoteTo(Simd<uint32_t, N> /* tag */,
+                                      const Vec128<uint16_t, N> v) {
+  return Vec128<uint32_t, N>{wasm_u32x4_extend_low_u16x8(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                     const Vec128<uint16_t, N> v) {
+  return Vec128<int32_t, N>{wasm_u32x4_extend_low_u16x8(v.raw)};
+}
+
+// Signed: replicate sign bit.
+template <size_t N>
+HWY_API Vec128<int16_t, N> PromoteTo(Simd<int16_t, N> /* tag */,
+                                     const Vec128<int8_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_extend_low_i8x16(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                     const Vec128<int8_t, N> v) {
+  return Vec128<int32_t, N>{
+      wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(v.raw))};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                     const Vec128<int16_t, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_extend_low_i16x8(v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> PromoteTo(Simd<double, N> df,
+                                    const Vec128<int32_t, N> v) {
+  // TODO(janwas): use https://github.com/WebAssembly/simd/pull/383
+  alignas(16) int32_t lanes[4];
+  Store(v, Simd<int32_t, N>(), lanes);
+  alignas(16) double lanes64[2];
+  lanes64[0] = lanes[0];
+  lanes64[1] = N >= 2 ? lanes[1] : 0.0;
+  return Load(df, lanes64);
+}
+
+template <size_t N>
+HWY_INLINE Vec128<float, N> PromoteTo(Simd<float, N> /* tag */,
+                                      const Vec128<float16_t, N> v) {
+  const Simd<int32_t, N> di32;
+  const Simd<uint32_t, N> du32;
+  const Simd<float, N> df32;
+  // Expand to u32 so we can shift.
+  const auto bits16 = PromoteTo(du32, Vec128<uint16_t, N>{v.raw});
+  const auto sign = ShiftRight<15>(bits16);
+  const auto biased_exp = ShiftRight<10>(bits16) & Set(du32, 0x1F);
+  const auto mantissa = bits16 & Set(du32, 0x3FF);
+  const auto subnormal =
+      BitCast(du32, ConvertTo(df32, BitCast(di32, mantissa)) *
+                        Set(df32, 1.0f / 16384 / 1024));
+
+  const auto biased_exp32 = biased_exp + Set(du32, 127 - 15);
+  const auto mantissa32 = ShiftLeft<23 - 10>(mantissa);
+  const auto normal = ShiftLeft<23>(biased_exp32) | mantissa32;
+  const auto bits32 = IfThenElse(biased_exp == Zero(du32), subnormal, normal);
+  return BitCast(df32, ShiftLeft<31>(sign) | bits32);
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> DemoteTo(Simd<uint16_t, N> /* tag */,
+                                     const Vec128<int32_t, N> v) {
+  return Vec128<uint16_t, N>{wasm_u16x8_narrow_i32x4(v.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> DemoteTo(Simd<int16_t, N> /* tag */,
+                                    const Vec128<int32_t, N> v) {
+  return Vec128<int16_t, N>{wasm_i16x8_narrow_i32x4(v.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> DemoteTo(Simd<uint8_t, N> /* tag */,
+                                    const Vec128<int32_t, N> v) {
+  const auto intermediate = wasm_i16x8_narrow_i32x4(v.raw, v.raw);
+  return Vec128<uint8_t, N>{
+      wasm_u8x16_narrow_i16x8(intermediate, intermediate)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> DemoteTo(Simd<uint8_t, N> /* tag */,
+                                    const Vec128<int16_t, N> v) {
+  return Vec128<uint8_t, N>{wasm_u8x16_narrow_i16x8(v.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> DemoteTo(Simd<int8_t, N> /* tag */,
+                                   const Vec128<int32_t, N> v) {
+  const auto intermediate = wasm_i16x8_narrow_i32x4(v.raw, v.raw);
+  return Vec128<int8_t, N>{wasm_i8x16_narrow_i16x8(intermediate, intermediate)};
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> DemoteTo(Simd<int8_t, N> /* tag */,
+                                   const Vec128<int16_t, N> v) {
+  return Vec128<int8_t, N>{wasm_i8x16_narrow_i16x8(v.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> DemoteTo(Simd<int32_t, N> di,
+                                    const Vec128<double, N> v) {
+  // TODO(janwas): use https://github.com/WebAssembly/simd/pull/383
+  alignas(16) double lanes64[2];
+  Store(v, Simd<double, N>(), lanes64);
+  alignas(16) int32_t lanes[4] = {static_cast<int32_t>(lanes64[0])};
+  if (N >= 2) lanes[1] = static_cast<int32_t>(lanes64[1]);
+  return Load(di, lanes);
+}
+
+template <size_t N>
+HWY_INLINE Vec128<float16_t, N> DemoteTo(Simd<float16_t, N> /* tag */,
+                                         const Vec128<float, N> v) {
+  const Simd<int32_t, N> di;
+  const Simd<uint32_t, N> du;
+  const Simd<uint16_t, N> du16;
+  const auto bits32 = BitCast(du, v);
+  const auto sign = ShiftRight<31>(bits32);
+  const auto biased_exp32 = ShiftRight<23>(bits32) & Set(du, 0xFF);
+  const auto mantissa32 = bits32 & Set(du, 0x7FFFFF);
+
+  const auto k15 = Set(di, 15);
+  const auto exp = Min(BitCast(di, biased_exp32) - Set(di, 127), k15);
+  const auto is_tiny = exp < Set(di, -24);
+
+  const auto is_subnormal = exp < Set(di, -14);
+  const auto biased_exp16 =
+      BitCast(du, IfThenZeroElse(is_subnormal, exp + k15));
+  const auto sub_exp = BitCast(du, Set(di, -14) - exp);  // [1, 11)
+  const auto sub_m = (Set(du, 1) << (Set(du, 10) - sub_exp)) +
+                     (mantissa32 >> (Set(du, 13) + sub_exp));
+  const auto mantissa16 = IfThenElse(RebindMask(du, is_subnormal), sub_m,
+                                     ShiftRight<13>(mantissa32));  // <1024
+
+  const auto sign16 = ShiftLeft<15>(sign);
+  const auto normal16 = sign16 | ShiftLeft<10>(biased_exp16) | mantissa16;
+  const auto bits16 = IfThenZeroElse(is_tiny, BitCast(di, normal16));
+  return Vec128<float16_t, N>{DemoteTo(du16, bits16).raw};
+}
+
+// For already range-limited input [0, 255].
+template <size_t N>
+HWY_API Vec128<uint8_t, N> U8FromU32(const Vec128<uint32_t, N> v) {
+  const auto intermediate = wasm_i16x8_narrow_i32x4(v.raw, v.raw);
+  return Vec128<uint8_t, N>{
+      wasm_u8x16_narrow_i16x8(intermediate, intermediate)};
+}
+
+// ------------------------------ Convert i32 <=> f32 (Round)
+
+template <size_t N>
+HWY_API Vec128<float, N> ConvertTo(Simd<float, N> /* tag */,
+                                   const Vec128<int32_t, N> v) {
+  return Vec128<float, N>{wasm_f32x4_convert_i32x4(v.raw)};
+}
+// Truncates (rounds toward zero).
+template <size_t N>
+HWY_API Vec128<int32_t, N> ConvertTo(Simd<int32_t, N> /* tag */,
+                                     const Vec128<float, N> v) {
+  return Vec128<int32_t, N>{wasm_i32x4_trunc_sat_f32x4(v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> NearestInt(const Vec128<float, N> v) {
+  return ConvertTo(Simd<int32_t, N>(), Round(v));
+}
+
+// ================================================== MISC
+
+// ------------------------------ Mask
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/,
+                              const Mask128<T, N> mask) {
+  alignas(16) uint64_t lanes[2];
+  wasm_v128_store(lanes, mask.raw);
+
+  constexpr uint64_t kMagic = 0x103070F1F3F80ULL;
+  const uint64_t lo = ((lanes[0] * kMagic) >> 56);
+  const uint64_t hi = ((lanes[1] * kMagic) >> 48) & 0xFF00;
+  return (hi + lo);
+}
+
+template <typename T, size_t N>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/,
+                              const Mask128<T, N> mask) {
+  // Remove useless lower half of each u16 while preserving the sign bit.
+  const __i16x8 zero = wasm_i16x8_splat(0);
+  const Mask128<T> mask8{wasm_i8x16_narrow_i16x8(mask.raw, zero)};
+  return BitsFromMask(hwy::SizeTag<1>(), mask8);
+}
+
+template <typename T, size_t N>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/,
+                              const Mask128<T, N> mask) {
+  const __i32x4 mask_i = static_cast<__i32x4>(mask.raw);
+  const __i32x4 slice = wasm_i32x4_make(1, 2, 4, 8);
+  const __i32x4 sliced_mask = wasm_v128_and(mask_i, slice);
+  alignas(16) uint32_t lanes[4];
+  wasm_v128_store(lanes, sliced_mask);
+  return lanes[0] | lanes[1] | lanes[2] | lanes[3];
+}
+
+// Returns the lowest N bits for the BitsFromMask result.
+template <typename T, size_t N>
+constexpr uint64_t OnlyActive(uint64_t bits) {
+  return ((N * sizeof(T)) == 16) ? bits : bits & ((1ull << N) - 1);
+}
+
+// Returns 0xFF for bytes with index >= N, otherwise 0.
+template <size_t N>
+constexpr __i8x16 BytesAbove() {
+  return /**/
+      (N == 0)    ? wasm_i32x4_make(-1, -1, -1, -1)
+      : (N == 4)  ? wasm_i32x4_make(0, -1, -1, -1)
+      : (N == 8)  ? wasm_i32x4_make(0, 0, -1, -1)
+      : (N == 12) ? wasm_i32x4_make(0, 0, 0, -1)
+      : (N == 16) ? wasm_i32x4_make(0, 0, 0, 0)
+      : (N == 2)  ? wasm_i16x8_make(0, -1, -1, -1, -1, -1, -1, -1)
+      : (N == 6)  ? wasm_i16x8_make(0, 0, 0, -1, -1, -1, -1, -1)
+      : (N == 10) ? wasm_i16x8_make(0, 0, 0, 0, 0, -1, -1, -1)
+      : (N == 14) ? wasm_i16x8_make(0, 0, 0, 0, 0, 0, 0, -1)
+      : (N == 1)  ? wasm_i8x16_make(0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+                                   -1, -1, -1, -1, -1)
+      : (N == 3)  ? wasm_i8x16_make(0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+                                   -1, -1, -1, -1)
+      : (N == 5)  ? wasm_i8x16_make(0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1,
+                                   -1, -1, -1, -1)
+      : (N == 7)  ? wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1,
+                                   -1, -1, -1)
+      : (N == 9)  ? wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1,
+                                   -1, -1, -1)
+      : (N == 11)
+          ? wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1)
+      : (N == 13)
+          ? wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1)
+          : wasm_i8x16_make(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1);
+}
+
+template <typename T, size_t N>
+HWY_API uint64_t BitsFromMask(const Mask128<T, N> mask) {
+  return OnlyActive<T, N>(BitsFromMask(hwy::SizeTag<sizeof(T)>(), mask));
+}
+
+template <typename T>
+HWY_API size_t CountTrue(hwy::SizeTag<1> tag, const Mask128<T> m) {
+  return PopCount(BitsFromMask(tag, m));
+}
+
+template <typename T>
+HWY_API size_t CountTrue(hwy::SizeTag<2> tag, const Mask128<T> m) {
+  return PopCount(BitsFromMask(tag, m));
+}
+
+template <typename T>
+HWY_API size_t CountTrue(hwy::SizeTag<4> /*tag*/, const Mask128<T> m) {
+  const __i32x4 var_shift = wasm_i32x4_make(1, 2, 4, 8);
+  const __i32x4 shifted_bits = wasm_v128_and(m.raw, var_shift);
+  alignas(16) uint64_t lanes[2];
+  wasm_v128_store(lanes, shifted_bits);
+  return PopCount(lanes[0] | lanes[1]);
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_INLINE size_t StoreMaskBits(const Mask128<T, N> mask, uint8_t* p) {
+  const uint64_t bits = detail::BitsFromMask(mask);
+  const size_t kNumBytes = (N + 7) / 8;
+  CopyBytes<kNumBytes>(&bits, p);
+  return kNumBytes;
+}
+
+template <typename T>
+HWY_API size_t CountTrue(const Mask128<T> m) {
+  return detail::CountTrue(hwy::SizeTag<sizeof(T)>(), m);
+}
+
+// Partial vector
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_API size_t CountTrue(const Mask128<T, N> m) {
+  // Ensure all undefined bytes are 0.
+  const Mask128<T, N> mask{detail::BytesAbove<N * sizeof(T)>()};
+  return CountTrue(Mask128<T>{AndNot(mask, m).raw});
+}
+
+// Full vector, type-independent
+template <typename T>
+HWY_API bool AllFalse(const Mask128<T> m) {
+#if 0
+  // Casting followed by wasm_i8x16_any_true results in wasm error:
+  // i32.eqz[0] expected type i32, found i8x16.popcnt of type s128
+  const auto v8 = BitCast(Full128<int8_t>(), VecFromMask(Full128<T>(), m));
+  return !wasm_i8x16_any_true(v8.raw);
+#else
+  return (wasm_i64x2_extract_lane(m.raw, 0) |
+          wasm_i64x2_extract_lane(m.raw, 1)) == 0;
+#endif
+}
+
+// Full vector, type-dependent
+namespace detail {
+template <typename T>
+HWY_API bool AllTrue(hwy::SizeTag<1> /*tag*/, const Mask128<T> m) {
+  return wasm_i8x16_all_true(m.raw);
+}
+template <typename T>
+HWY_API bool AllTrue(hwy::SizeTag<2> /*tag*/, const Mask128<T> m) {
+  return wasm_i16x8_all_true(m.raw);
+}
+template <typename T>
+HWY_API bool AllTrue(hwy::SizeTag<4> /*tag*/, const Mask128<T> m) {
+  return wasm_i32x4_all_true(m.raw);
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API bool AllTrue(const Mask128<T> m) {
+  return detail::AllTrue(hwy::SizeTag<sizeof(T)>(), m);
+}
+
+// Partial vectors
+
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_API bool AllFalse(const Mask128<T, N> m) {
+  // Ensure all undefined bytes are 0.
+  const Mask128<T, N> mask{detail::BytesAbove<N * sizeof(T)>()};
+  return AllFalse(Mask128<T>{AndNot(mask, m).raw});
+}
+
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_API bool AllTrue(const Mask128<T, N> m) {
+  // Ensure all undefined bytes are FF.
+  const Mask128<T, N> mask{detail::BytesAbove<N * sizeof(T)>()};
+  return AllTrue(Mask128<T>{Or(mask, m).raw});
+}
+
+// ------------------------------ Compress
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Idx16x8FromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Simd<T, N> d;
+  const Rebind<uint8_t, decltype(d)> d8;
+  const Simd<uint16_t, N> du;
+
+  // We need byte indices for TableLookupBytes (one vector's worth for each of
+  // 256 combinations of 8 mask bits). Loading them directly requires 4 KiB. We
+  // can instead store lane indices and convert to byte indices (2*lane + 0..1),
+  // with the doubling baked into the table. Unpacking nibbles is likely more
+  // costly than the higher cache footprint from storing bytes.
+  alignas(16) constexpr uint8_t table[256 * 8] = {
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,
+      0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  4,  0,  0,  0,
+      0,  0,  0,  0,  0,  4,  0,  0,  0,  0,  0,  0,  2,  4,  0,  0,  0,  0,
+      0,  0,  0,  2,  4,  0,  0,  0,  0,  0,  6,  0,  0,  0,  0,  0,  0,  0,
+      0,  6,  0,  0,  0,  0,  0,  0,  2,  6,  0,  0,  0,  0,  0,  0,  0,  2,
+      6,  0,  0,  0,  0,  0,  4,  6,  0,  0,  0,  0,  0,  0,  0,  4,  6,  0,
+      0,  0,  0,  0,  2,  4,  6,  0,  0,  0,  0,  0,  0,  2,  4,  6,  0,  0,
+      0,  0,  8,  0,  0,  0,  0,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  0,
+      2,  8,  0,  0,  0,  0,  0,  0,  0,  2,  8,  0,  0,  0,  0,  0,  4,  8,
+      0,  0,  0,  0,  0,  0,  0,  4,  8,  0,  0,  0,  0,  0,  2,  4,  8,  0,
+      0,  0,  0,  0,  0,  2,  4,  8,  0,  0,  0,  0,  6,  8,  0,  0,  0,  0,
+      0,  0,  0,  6,  8,  0,  0,  0,  0,  0,  2,  6,  8,  0,  0,  0,  0,  0,
+      0,  2,  6,  8,  0,  0,  0,  0,  4,  6,  8,  0,  0,  0,  0,  0,  0,  4,
+      6,  8,  0,  0,  0,  0,  2,  4,  6,  8,  0,  0,  0,  0,  0,  2,  4,  6,
+      8,  0,  0,  0,  10, 0,  0,  0,  0,  0,  0,  0,  0,  10, 0,  0,  0,  0,
+      0,  0,  2,  10, 0,  0,  0,  0,  0,  0,  0,  2,  10, 0,  0,  0,  0,  0,
+      4,  10, 0,  0,  0,  0,  0,  0,  0,  4,  10, 0,  0,  0,  0,  0,  2,  4,
+      10, 0,  0,  0,  0,  0,  0,  2,  4,  10, 0,  0,  0,  0,  6,  10, 0,  0,
+      0,  0,  0,  0,  0,  6,  10, 0,  0,  0,  0,  0,  2,  6,  10, 0,  0,  0,
+      0,  0,  0,  2,  6,  10, 0,  0,  0,  0,  4,  6,  10, 0,  0,  0,  0,  0,
+      0,  4,  6,  10, 0,  0,  0,  0,  2,  4,  6,  10, 0,  0,  0,  0,  0,  2,
+      4,  6,  10, 0,  0,  0,  8,  10, 0,  0,  0,  0,  0,  0,  0,  8,  10, 0,
+      0,  0,  0,  0,  2,  8,  10, 0,  0,  0,  0,  0,  0,  2,  8,  10, 0,  0,
+      0,  0,  4,  8,  10, 0,  0,  0,  0,  0,  0,  4,  8,  10, 0,  0,  0,  0,
+      2,  4,  8,  10, 0,  0,  0,  0,  0,  2,  4,  8,  10, 0,  0,  0,  6,  8,
+      10, 0,  0,  0,  0,  0,  0,  6,  8,  10, 0,  0,  0,  0,  2,  6,  8,  10,
+      0,  0,  0,  0,  0,  2,  6,  8,  10, 0,  0,  0,  4,  6,  8,  10, 0,  0,
+      0,  0,  0,  4,  6,  8,  10, 0,  0,  0,  2,  4,  6,  8,  10, 0,  0,  0,
+      0,  2,  4,  6,  8,  10, 0,  0,  12, 0,  0,  0,  0,  0,  0,  0,  0,  12,
+      0,  0,  0,  0,  0,  0,  2,  12, 0,  0,  0,  0,  0,  0,  0,  2,  12, 0,
+      0,  0,  0,  0,  4,  12, 0,  0,  0,  0,  0,  0,  0,  4,  12, 0,  0,  0,
+      0,  0,  2,  4,  12, 0,  0,  0,  0,  0,  0,  2,  4,  12, 0,  0,  0,  0,
+      6,  12, 0,  0,  0,  0,  0,  0,  0,  6,  12, 0,  0,  0,  0,  0,  2,  6,
+      12, 0,  0,  0,  0,  0,  0,  2,  6,  12, 0,  0,  0,  0,  4,  6,  12, 0,
+      0,  0,  0,  0,  0,  4,  6,  12, 0,  0,  0,  0,  2,  4,  6,  12, 0,  0,
+      0,  0,  0,  2,  4,  6,  12, 0,  0,  0,  8,  12, 0,  0,  0,  0,  0,  0,
+      0,  8,  12, 0,  0,  0,  0,  0,  2,  8,  12, 0,  0,  0,  0,  0,  0,  2,
+      8,  12, 0,  0,  0,  0,  4,  8,  12, 0,  0,  0,  0,  0,  0,  4,  8,  12,
+      0,  0,  0,  0,  2,  4,  8,  12, 0,  0,  0,  0,  0,  2,  4,  8,  12, 0,
+      0,  0,  6,  8,  12, 0,  0,  0,  0,  0,  0,  6,  8,  12, 0,  0,  0,  0,
+      2,  6,  8,  12, 0,  0,  0,  0,  0,  2,  6,  8,  12, 0,  0,  0,  4,  6,
+      8,  12, 0,  0,  0,  0,  0,  4,  6,  8,  12, 0,  0,  0,  2,  4,  6,  8,
+      12, 0,  0,  0,  0,  2,  4,  6,  8,  12, 0,  0,  10, 12, 0,  0,  0,  0,
+      0,  0,  0,  10, 12, 0,  0,  0,  0,  0,  2,  10, 12, 0,  0,  0,  0,  0,
+      0,  2,  10, 12, 0,  0,  0,  0,  4,  10, 12, 0,  0,  0,  0,  0,  0,  4,
+      10, 12, 0,  0,  0,  0,  2,  4,  10, 12, 0,  0,  0,  0,  0,  2,  4,  10,
+      12, 0,  0,  0,  6,  10, 12, 0,  0,  0,  0,  0,  0,  6,  10, 12, 0,  0,
+      0,  0,  2,  6,  10, 12, 0,  0,  0,  0,  0,  2,  6,  10, 12, 0,  0,  0,
+      4,  6,  10, 12, 0,  0,  0,  0,  0,  4,  6,  10, 12, 0,  0,  0,  2,  4,
+      6,  10, 12, 0,  0,  0,  0,  2,  4,  6,  10, 12, 0,  0,  8,  10, 12, 0,
+      0,  0,  0,  0,  0,  8,  10, 12, 0,  0,  0,  0,  2,  8,  10, 12, 0,  0,
+      0,  0,  0,  2,  8,  10, 12, 0,  0,  0,  4,  8,  10, 12, 0,  0,  0,  0,
+      0,  4,  8,  10, 12, 0,  0,  0,  2,  4,  8,  10, 12, 0,  0,  0,  0,  2,
+      4,  8,  10, 12, 0,  0,  6,  8,  10, 12, 0,  0,  0,  0,  0,  6,  8,  10,
+      12, 0,  0,  0,  2,  6,  8,  10, 12, 0,  0,  0,  0,  2,  6,  8,  10, 12,
+      0,  0,  4,  6,  8,  10, 12, 0,  0,  0,  0,  4,  6,  8,  10, 12, 0,  0,
+      2,  4,  6,  8,  10, 12, 0,  0,  0,  2,  4,  6,  8,  10, 12, 0,  14, 0,
+      0,  0,  0,  0,  0,  0,  0,  14, 0,  0,  0,  0,  0,  0,  2,  14, 0,  0,
+      0,  0,  0,  0,  0,  2,  14, 0,  0,  0,  0,  0,  4,  14, 0,  0,  0,  0,
+      0,  0,  0,  4,  14, 0,  0,  0,  0,  0,  2,  4,  14, 0,  0,  0,  0,  0,
+      0,  2,  4,  14, 0,  0,  0,  0,  6,  14, 0,  0,  0,  0,  0,  0,  0,  6,
+      14, 0,  0,  0,  0,  0,  2,  6,  14, 0,  0,  0,  0,  0,  0,  2,  6,  14,
+      0,  0,  0,  0,  4,  6,  14, 0,  0,  0,  0,  0,  0,  4,  6,  14, 0,  0,
+      0,  0,  2,  4,  6,  14, 0,  0,  0,  0,  0,  2,  4,  6,  14, 0,  0,  0,
+      8,  14, 0,  0,  0,  0,  0,  0,  0,  8,  14, 0,  0,  0,  0,  0,  2,  8,
+      14, 0,  0,  0,  0,  0,  0,  2,  8,  14, 0,  0,  0,  0,  4,  8,  14, 0,
+      0,  0,  0,  0,  0,  4,  8,  14, 0,  0,  0,  0,  2,  4,  8,  14, 0,  0,
+      0,  0,  0,  2,  4,  8,  14, 0,  0,  0,  6,  8,  14, 0,  0,  0,  0,  0,
+      0,  6,  8,  14, 0,  0,  0,  0,  2,  6,  8,  14, 0,  0,  0,  0,  0,  2,
+      6,  8,  14, 0,  0,  0,  4,  6,  8,  14, 0,  0,  0,  0,  0,  4,  6,  8,
+      14, 0,  0,  0,  2,  4,  6,  8,  14, 0,  0,  0,  0,  2,  4,  6,  8,  14,
+      0,  0,  10, 14, 0,  0,  0,  0,  0,  0,  0,  10, 14, 0,  0,  0,  0,  0,
+      2,  10, 14, 0,  0,  0,  0,  0,  0,  2,  10, 14, 0,  0,  0,  0,  4,  10,
+      14, 0,  0,  0,  0,  0,  0,  4,  10, 14, 0,  0,  0,  0,  2,  4,  10, 14,
+      0,  0,  0,  0,  0,  2,  4,  10, 14, 0,  0,  0,  6,  10, 14, 0,  0,  0,
+      0,  0,  0,  6,  10, 14, 0,  0,  0,  0,  2,  6,  10, 14, 0,  0,  0,  0,
+      0,  2,  6,  10, 14, 0,  0,  0,  4,  6,  10, 14, 0,  0,  0,  0,  0,  4,
+      6,  10, 14, 0,  0,  0,  2,  4,  6,  10, 14, 0,  0,  0,  0,  2,  4,  6,
+      10, 14, 0,  0,  8,  10, 14, 0,  0,  0,  0,  0,  0,  8,  10, 14, 0,  0,
+      0,  0,  2,  8,  10, 14, 0,  0,  0,  0,  0,  2,  8,  10, 14, 0,  0,  0,
+      4,  8,  10, 14, 0,  0,  0,  0,  0,  4,  8,  10, 14, 0,  0,  0,  2,  4,
+      8,  10, 14, 0,  0,  0,  0,  2,  4,  8,  10, 14, 0,  0,  6,  8,  10, 14,
+      0,  0,  0,  0,  0,  6,  8,  10, 14, 0,  0,  0,  2,  6,  8,  10, 14, 0,
+      0,  0,  0,  2,  6,  8,  10, 14, 0,  0,  4,  6,  8,  10, 14, 0,  0,  0,
+      0,  4,  6,  8,  10, 14, 0,  0,  2,  4,  6,  8,  10, 14, 0,  0,  0,  2,
+      4,  6,  8,  10, 14, 0,  12, 14, 0,  0,  0,  0,  0,  0,  0,  12, 14, 0,
+      0,  0,  0,  0,  2,  12, 14, 0,  0,  0,  0,  0,  0,  2,  12, 14, 0,  0,
+      0,  0,  4,  12, 14, 0,  0,  0,  0,  0,  0,  4,  12, 14, 0,  0,  0,  0,
+      2,  4,  12, 14, 0,  0,  0,  0,  0,  2,  4,  12, 14, 0,  0,  0,  6,  12,
+      14, 0,  0,  0,  0,  0,  0,  6,  12, 14, 0,  0,  0,  0,  2,  6,  12, 14,
+      0,  0,  0,  0,  0,  2,  6,  12, 14, 0,  0,  0,  4,  6,  12, 14, 0,  0,
+      0,  0,  0,  4,  6,  12, 14, 0,  0,  0,  2,  4,  6,  12, 14, 0,  0,  0,
+      0,  2,  4,  6,  12, 14, 0,  0,  8,  12, 14, 0,  0,  0,  0,  0,  0,  8,
+      12, 14, 0,  0,  0,  0,  2,  8,  12, 14, 0,  0,  0,  0,  0,  2,  8,  12,
+      14, 0,  0,  0,  4,  8,  12, 14, 0,  0,  0,  0,  0,  4,  8,  12, 14, 0,
+      0,  0,  2,  4,  8,  12, 14, 0,  0,  0,  0,  2,  4,  8,  12, 14, 0,  0,
+      6,  8,  12, 14, 0,  0,  0,  0,  0,  6,  8,  12, 14, 0,  0,  0,  2,  6,
+      8,  12, 14, 0,  0,  0,  0,  2,  6,  8,  12, 14, 0,  0,  4,  6,  8,  12,
+      14, 0,  0,  0,  0,  4,  6,  8,  12, 14, 0,  0,  2,  4,  6,  8,  12, 14,
+      0,  0,  0,  2,  4,  6,  8,  12, 14, 0,  10, 12, 14, 0,  0,  0,  0,  0,
+      0,  10, 12, 14, 0,  0,  0,  0,  2,  10, 12, 14, 0,  0,  0,  0,  0,  2,
+      10, 12, 14, 0,  0,  0,  4,  10, 12, 14, 0,  0,  0,  0,  0,  4,  10, 12,
+      14, 0,  0,  0,  2,  4,  10, 12, 14, 0,  0,  0,  0,  2,  4,  10, 12, 14,
+      0,  0,  6,  10, 12, 14, 0,  0,  0,  0,  0,  6,  10, 12, 14, 0,  0,  0,
+      2,  6,  10, 12, 14, 0,  0,  0,  0,  2,  6,  10, 12, 14, 0,  0,  4,  6,
+      10, 12, 14, 0,  0,  0,  0,  4,  6,  10, 12, 14, 0,  0,  2,  4,  6,  10,
+      12, 14, 0,  0,  0,  2,  4,  6,  10, 12, 14, 0,  8,  10, 12, 14, 0,  0,
+      0,  0,  0,  8,  10, 12, 14, 0,  0,  0,  2,  8,  10, 12, 14, 0,  0,  0,
+      0,  2,  8,  10, 12, 14, 0,  0,  4,  8,  10, 12, 14, 0,  0,  0,  0,  4,
+      8,  10, 12, 14, 0,  0,  2,  4,  8,  10, 12, 14, 0,  0,  0,  2,  4,  8,
+      10, 12, 14, 0,  6,  8,  10, 12, 14, 0,  0,  0,  0,  6,  8,  10, 12, 14,
+      0,  0,  2,  6,  8,  10, 12, 14, 0,  0,  0,  2,  6,  8,  10, 12, 14, 0,
+      4,  6,  8,  10, 12, 14, 0,  0,  0,  4,  6,  8,  10, 12, 14, 0,  2,  4,
+      6,  8,  10, 12, 14, 0,  0,  2,  4,  6,  8,  10, 12, 14};
+
+  const Vec128<uint8_t, 2 * N> byte_idx{Load(d8, table + mask_bits * 8).raw};
+  const Vec128<uint16_t, N> pairs = ZipLower(byte_idx, byte_idx);
+  return BitCast(d, pairs + Set(du, 0x0100));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Idx32x4FromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) constexpr uint8_t packed_array[16 * 16] = {
+      0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      4,  5,  6,  7,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      8,  9,  10, 11, 0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  //
+      12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  //
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, packed_array + 16 * mask_bits));
+}
+
+#if HWY_CAP_INTEGER64 || HWY_CAP_FLOAT64
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Idx64x2FromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) constexpr uint8_t packed_array[4 * 16] = {
+      0, 1, 2,  3,  4,  5,  6,  7,  0, 1, 2,  3,  4,  5,  6,  7,  //
+      0, 1, 2,  3,  4,  5,  6,  7,  0, 1, 2,  3,  4,  5,  6,  7,  //
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,  //
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, packed_array + 16 * mask_bits));
+}
+
+#endif
+
+// Helper functions called by both Compress and CompressStore - avoids a
+// redundant BitsFromMask in the latter.
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(hwy::SizeTag<2> /*tag*/, Vec128<T, N> v,
+                              const uint64_t mask_bits) {
+  const auto idx = detail::Idx16x8FromBits<T, N>(mask_bits);
+  using D = Simd<T, N>;
+  const RebindToSigned<D> di;
+  return BitCast(D(), TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(hwy::SizeTag<4> /*tag*/, Vec128<T, N> v,
+                              const uint64_t mask_bits) {
+  const auto idx = detail::Idx32x4FromBits<T, N>(mask_bits);
+  using D = Simd<T, N>;
+  const RebindToSigned<D> di;
+  return BitCast(D(), TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+}
+
+#if HWY_CAP_INTEGER64 || HWY_CAP_FLOAT64
+
+template <typename T, size_t N>
+HWY_API Vec128<uint64_t, N> Compress(hwy::SizeTag<8> /*tag*/,
+                                     Vec128<uint64_t, N> v,
+                                     const uint64_t mask_bits) {
+  const auto idx = detail::Idx64x2FromBits<uint64_t, N>(mask_bits);
+  using D = Simd<T, N>;
+  const RebindToSigned<D> di;
+  return BitCast(D(), TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+}
+
+#endif
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, const Mask128<T, N> mask) {
+  return detail::Compress(hwy::SizeTag<sizeof(T)>(), v,
+                          detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressStore
+
+template <typename T, size_t N>
+HWY_API size_t CompressStore(Vec128<T, N> v, const Mask128<T, N> mask,
+                             Simd<T, N> d, T* HWY_RESTRICT aligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  Store(detail::Compress(hwy::SizeTag<sizeof(T)>(), v, mask_bits), d, aligned);
+  return PopCount(mask_bits);
+}
+
+// ------------------------------ StoreInterleaved3 (CombineShiftRightBytes,
+// TableLookupBytes)
+
+// 128 bits
+HWY_API void StoreInterleaved3(const Vec128<uint8_t> a, const Vec128<uint8_t> b,
+                               const Vec128<uint8_t> c, Full128<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  const auto k5 = Set(d, 5);
+  const auto k6 = Set(d, 6);
+
+  // Shuffle (a,b,c) vector bytes to (MSB on left): r5, bgr[4:0].
+  // 0x80 so lanes to be filled from other vectors are 0 for blending.
+  alignas(16) static constexpr uint8_t tbl_r0[16] = {
+      0, 0x80, 0x80, 1, 0x80, 0x80, 2, 0x80, 0x80,  //
+      3, 0x80, 0x80, 4, 0x80, 0x80, 5};
+  alignas(16) static constexpr uint8_t tbl_g0[16] = {
+      0x80, 0, 0x80, 0x80, 1, 0x80,  //
+      0x80, 2, 0x80, 0x80, 3, 0x80, 0x80, 4, 0x80, 0x80};
+  const auto shuf_r0 = Load(d, tbl_r0);
+  const auto shuf_g0 = Load(d, tbl_g0);  // cannot reuse r0 due to 5 in MSB
+  const auto shuf_b0 = CombineShiftRightBytes<15>(shuf_g0, shuf_g0);
+  const auto r0 = TableLookupBytes(a, shuf_r0);  // 5..4..3..2..1..0
+  const auto g0 = TableLookupBytes(b, shuf_g0);  // ..4..3..2..1..0.
+  const auto b0 = TableLookupBytes(c, shuf_b0);  // .4..3..2..1..0..
+  const auto int0 = r0 | g0 | b0;
+  StoreU(int0, d, unaligned + 0 * 16);
+
+  // Second vector: g10,r10, bgr[9:6], b5,g5
+  const auto shuf_r1 = shuf_b0 + k6;  // .A..9..8..7..6..
+  const auto shuf_g1 = shuf_r0 + k5;  // A..9..8..7..6..5
+  const auto shuf_b1 = shuf_g0 + k5;  // ..9..8..7..6..5.
+  const auto r1 = TableLookupBytes(a, shuf_r1);
+  const auto g1 = TableLookupBytes(b, shuf_g1);
+  const auto b1 = TableLookupBytes(c, shuf_b1);
+  const auto int1 = r1 | g1 | b1;
+  StoreU(int1, d, unaligned + 1 * 16);
+
+  // Third vector: bgr[15:11], b10
+  const auto shuf_r2 = shuf_b1 + k6;  // ..F..E..D..C..B.
+  const auto shuf_g2 = shuf_r1 + k5;  // .F..E..D..C..B..
+  const auto shuf_b2 = shuf_g1 + k5;  // F..E..D..C..B..A
+  const auto r2 = TableLookupBytes(a, shuf_r2);
+  const auto g2 = TableLookupBytes(b, shuf_g2);
+  const auto b2 = TableLookupBytes(c, shuf_b2);
+  const auto int2 = r2 | g2 | b2;
+  StoreU(int2, d, unaligned + 2 * 16);
+}
+
+// 64 bits
+HWY_API void StoreInterleaved3(const Vec128<uint8_t, 8> a,
+                               const Vec128<uint8_t, 8> b,
+                               const Vec128<uint8_t, 8> c, Simd<uint8_t, 8> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // Use full vectors for the shuffles and first result.
+  const Full128<uint8_t> d_full;
+  const auto k5 = Set(d_full, 5);
+  const auto k6 = Set(d_full, 6);
+
+  const Vec128<uint8_t> full_a{a.raw};
+  const Vec128<uint8_t> full_b{b.raw};
+  const Vec128<uint8_t> full_c{c.raw};
+
+  // Shuffle (a,b,c) vector bytes to (MSB on left): r5, bgr[4:0].
+  // 0x80 so lanes to be filled from other vectors are 0 for blending.
+  alignas(16) static constexpr uint8_t tbl_r0[16] = {
+      0, 0x80, 0x80, 1, 0x80, 0x80, 2, 0x80, 0x80,  //
+      3, 0x80, 0x80, 4, 0x80, 0x80, 5};
+  alignas(16) static constexpr uint8_t tbl_g0[16] = {
+      0x80, 0, 0x80, 0x80, 1, 0x80,  //
+      0x80, 2, 0x80, 0x80, 3, 0x80, 0x80, 4, 0x80, 0x80};
+  const auto shuf_r0 = Load(d_full, tbl_r0);
+  const auto shuf_g0 = Load(d_full, tbl_g0);  // cannot reuse r0 due to 5 in MSB
+  const auto shuf_b0 = CombineShiftRightBytes<15>(shuf_g0, shuf_g0);
+  const auto r0 = TableLookupBytes(full_a, shuf_r0);  // 5..4..3..2..1..0
+  const auto g0 = TableLookupBytes(full_b, shuf_g0);  // ..4..3..2..1..0.
+  const auto b0 = TableLookupBytes(full_c, shuf_b0);  // .4..3..2..1..0..
+  const auto int0 = r0 | g0 | b0;
+  StoreU(int0, d_full, unaligned + 0 * 16);
+
+  // Second (HALF) vector: bgr[7:6], b5,g5
+  const auto shuf_r1 = shuf_b0 + k6;  // ..7..6..
+  const auto shuf_g1 = shuf_r0 + k5;  // .7..6..5
+  const auto shuf_b1 = shuf_g0 + k5;  // 7..6..5.
+  const auto r1 = TableLookupBytes(full_a, shuf_r1);
+  const auto g1 = TableLookupBytes(full_b, shuf_g1);
+  const auto b1 = TableLookupBytes(full_c, shuf_b1);
+  const decltype(Zero(d)) int1{(r1 | g1 | b1).raw};
+  StoreU(int1, d, unaligned + 1 * 16);
+}
+
+// <= 32 bits
+template <size_t N, HWY_IF_LE32(uint8_t, N)>
+HWY_API void StoreInterleaved3(const Vec128<uint8_t, N> a,
+                               const Vec128<uint8_t, N> b,
+                               const Vec128<uint8_t, N> c,
+                               Simd<uint8_t, N> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // Use full vectors for the shuffles and result.
+  const Full128<uint8_t> d_full;
+
+  const Vec128<uint8_t> full_a{a.raw};
+  const Vec128<uint8_t> full_b{b.raw};
+  const Vec128<uint8_t> full_c{c.raw};
+
+  // Shuffle (a,b,c) vector bytes to bgr[3:0].
+  // 0x80 so lanes to be filled from other vectors are 0 for blending.
+  alignas(16) static constexpr uint8_t tbl_r0[16] = {
+      0,    0x80, 0x80, 1,   0x80, 0x80, 2, 0x80, 0x80, 3, 0x80, 0x80,  //
+      0x80, 0x80, 0x80, 0x80};
+  const auto shuf_r0 = Load(d_full, tbl_r0);
+  const auto shuf_g0 = CombineShiftRightBytes<15>(shuf_r0, shuf_r0);
+  const auto shuf_b0 = CombineShiftRightBytes<14>(shuf_r0, shuf_r0);
+  const auto r0 = TableLookupBytes(full_a, shuf_r0);  // ......3..2..1..0
+  const auto g0 = TableLookupBytes(full_b, shuf_g0);  // .....3..2..1..0.
+  const auto b0 = TableLookupBytes(full_c, shuf_b0);  // ....3..2..1..0..
+  const auto int0 = r0 | g0 | b0;
+  alignas(16) uint8_t buf[16];
+  StoreU(int0, d_full, buf);
+  CopyBytes<N * 3>(buf, unaligned);
+}
+
+// ------------------------------ StoreInterleaved4
+
+// 128 bits
+HWY_API void StoreInterleaved4(const Vec128<uint8_t> v0,
+                               const Vec128<uint8_t> v1,
+                               const Vec128<uint8_t> v2,
+                               const Vec128<uint8_t> v3, Full128<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // let a,b,c,d denote v0..3.
+  const auto ba0 = ZipLower(v0, v1);  // b7 a7 .. b0 a0
+  const auto dc0 = ZipLower(v2, v3);  // d7 c7 .. d0 c0
+  const auto ba8 = ZipUpper(v0, v1);
+  const auto dc8 = ZipUpper(v2, v3);
+  const auto dcba_0 = ZipLower(ba0, dc0);  // d..a3 d..a0
+  const auto dcba_4 = ZipUpper(ba0, dc0);  // d..a7 d..a4
+  const auto dcba_8 = ZipLower(ba8, dc8);  // d..aB d..a8
+  const auto dcba_C = ZipUpper(ba8, dc8);  // d..aF d..aC
+  StoreU(BitCast(d, dcba_0), d, unaligned + 0 * 16);
+  StoreU(BitCast(d, dcba_4), d, unaligned + 1 * 16);
+  StoreU(BitCast(d, dcba_8), d, unaligned + 2 * 16);
+  StoreU(BitCast(d, dcba_C), d, unaligned + 3 * 16);
+}
+
+// 64 bits
+HWY_API void StoreInterleaved4(const Vec128<uint8_t, 8> in0,
+                               const Vec128<uint8_t, 8> in1,
+                               const Vec128<uint8_t, 8> in2,
+                               const Vec128<uint8_t, 8> in3,
+                               Simd<uint8_t, 8> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // Use full vectors to reduce the number of stores.
+  const Vec128<uint8_t> v0{in0.raw};
+  const Vec128<uint8_t> v1{in1.raw};
+  const Vec128<uint8_t> v2{in2.raw};
+  const Vec128<uint8_t> v3{in3.raw};
+  // let a,b,c,d denote v0..3.
+  const auto ba0 = ZipLower(v0, v1);       // b7 a7 .. b0 a0
+  const auto dc0 = ZipLower(v2, v3);       // d7 c7 .. d0 c0
+  const auto dcba_0 = ZipLower(ba0, dc0);  // d..a3 d..a0
+  const auto dcba_4 = ZipUpper(ba0, dc0);  // d..a7 d..a4
+  const Full128<uint8_t> d_full;
+  StoreU(BitCast(d_full, dcba_0), d_full, unaligned + 0 * 16);
+  StoreU(BitCast(d_full, dcba_4), d_full, unaligned + 1 * 16);
+}
+
+// <= 32 bits
+template <size_t N, HWY_IF_LE32(uint8_t, N)>
+HWY_API void StoreInterleaved4(const Vec128<uint8_t, N> in0,
+                               const Vec128<uint8_t, N> in1,
+                               const Vec128<uint8_t, N> in2,
+                               const Vec128<uint8_t, N> in3,
+                               Simd<uint8_t, N> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // Use full vectors to reduce the number of stores.
+  const Vec128<uint8_t> v0{in0.raw};
+  const Vec128<uint8_t> v1{in1.raw};
+  const Vec128<uint8_t> v2{in2.raw};
+  const Vec128<uint8_t> v3{in3.raw};
+  // let a,b,c,d denote v0..3.
+  const auto ba0 = ZipLower(v0, v1);       // b3 a3 .. b0 a0
+  const auto dc0 = ZipLower(v2, v3);       // d3 c3 .. d0 c0
+  const auto dcba_0 = ZipLower(ba0, dc0);  // d..a3 d..a0
+  alignas(16) uint8_t buf[16];
+  const Full128<uint8_t> d_full;
+  StoreU(BitCast(d_full, dcba_0), d_full, buf);
+  CopyBytes<4 * N>(buf, unaligned);
+}
+
+// ------------------------------ Reductions
+
+namespace detail {
+
+// N=1 for any T: no-op
+template <typename T>
+HWY_API Vec128<T, 1> SumOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                const Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_API Vec128<T, 1> MinOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                const Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_API Vec128<T, 1> MaxOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                const Vec128<T, 1> v) {
+  return v;
+}
+
+// u32/i32/f32:
+
+// N=2
+template <typename T>
+HWY_API Vec128<T, 2> SumOfLanes(hwy::SizeTag<4> /* tag */,
+                                const Vec128<T, 2> v10) {
+  return v10 + Vec128<T, 2>{Shuffle2301(Vec128<T>{v10.raw}).raw};
+}
+template <typename T>
+HWY_API Vec128<T, 2> MinOfLanes(hwy::SizeTag<4> /* tag */,
+                                const Vec128<T, 2> v10) {
+  return Min(v10, Vec128<T, 2>{Shuffle2301(Vec128<T>{v10.raw}).raw});
+}
+template <typename T>
+HWY_API Vec128<T, 2> MaxOfLanes(hwy::SizeTag<4> /* tag */,
+                                const Vec128<T, 2> v10) {
+  return Max(v10, Vec128<T, 2>{Shuffle2301(Vec128<T>{v10.raw}).raw});
+}
+
+// N=4 (full)
+template <typename T>
+HWY_API Vec128<T> SumOfLanes(hwy::SizeTag<4> /* tag */, const Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = v3210 + v1032;
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return v20_31_20_31 + v31_20_31_20;
+}
+template <typename T>
+HWY_API Vec128<T> MinOfLanes(hwy::SizeTag<4> /* tag */, const Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = Min(v3210, v1032);
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Min(v20_31_20_31, v31_20_31_20);
+}
+template <typename T>
+HWY_API Vec128<T> MaxOfLanes(hwy::SizeTag<4> /* tag */, const Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = Max(v3210, v1032);
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Max(v20_31_20_31, v31_20_31_20);
+}
+
+// u64/i64/f64:
+
+// N=2 (full)
+template <typename T>
+HWY_API Vec128<T> SumOfLanes(hwy::SizeTag<8> /* tag */, const Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return v10 + v01;
+}
+template <typename T>
+HWY_API Vec128<T> MinOfLanes(hwy::SizeTag<8> /* tag */, const Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return Min(v10, v01);
+}
+template <typename T>
+HWY_API Vec128<T> MaxOfLanes(hwy::SizeTag<8> /* tag */, const Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return Max(v10, v01);
+}
+
+}  // namespace detail
+
+// Supported for u/i/f 32/64. Returns the same value in each lane.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> SumOfLanes(const Vec128<T, N> v) {
+  return detail::SumOfLanes(hwy::SizeTag<sizeof(T)>(), v);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MinOfLanes(const Vec128<T, N> v) {
+  return detail::MinOfLanes(hwy::SizeTag<sizeof(T)>(), v);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MaxOfLanes(const Vec128<T, N> v) {
+  return detail::MaxOfLanes(hwy::SizeTag<sizeof(T)>(), v);
+}
+
+// ================================================== Operator wrapper
+
+template <class V>
+HWY_API V Add(V a, V b) {
+  return a + b;
+}
+template <class V>
+HWY_API V Sub(V a, V b) {
+  return a - b;
+}
+
+template <class V>
+HWY_API V Mul(V a, V b) {
+  return a * b;
+}
+template <class V>
+HWY_API V Div(V a, V b) {
+  return a / b;
+}
+
+template <class V>
+V Shl(V a, V b) {
+  return a << b;
+}
+template <class V>
+V Shr(V a, V b) {
+  return a >> b;
+}
+
+template <class V>
+HWY_API auto Eq(V a, V b) -> decltype(a == b) {
+  return a == b;
+}
+template <class V>
+HWY_API auto Lt(V a, V b) -> decltype(a == b) {
+  return a < b;
+}
+
+template <class V>
+HWY_API auto Gt(V a, V b) -> decltype(a == b) {
+  return a > b;
+}
+template <class V>
+HWY_API auto Ge(V a, V b) -> decltype(a == b) {
+  return a >= b;
+}
+
+template <class V>
+HWY_API auto Le(V a, V b) -> decltype(a == b) {
+  return a <= b;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/x86_128-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/x86_128-inl.h
new file mode 100644
index 0000000000..fc27527459
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/x86_128-inl.h
@@ -0,0 +1,3761 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 128-bit vectors and SSE4 instructions, plus some AVX2 and AVX512-VL
+// operations when compiling for those targets.
+// External include guard in highway.h - see comment there.
+
+#include <emmintrin.h>
+#include <smmintrin.h>  // SSE4
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hwy/base.h"
+#include "hwy/ops/shared-inl.h"
+
+// Clang 3.9 generates VINSERTF128 instead of the desired VBROADCASTF128,
+// which would free up port5. However, inline assembly isn't supported on
+// MSVC, results in incorrect output on GCC 8.3, and raises "invalid output size
+// for constraint" errors on Clang (https://gcc.godbolt.org/z/-Jt_-F), hence we
+// disable it.
+#ifndef HWY_LOADDUP_ASM
+#define HWY_LOADDUP_ASM 0
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <typename T>
+struct Raw128 {
+  using type = __m128i;
+};
+template <>
+struct Raw128<float> {
+  using type = __m128;
+};
+template <>
+struct Raw128<double> {
+  using type = __m128d;
+};
+
+template <typename T>
+using Full128 = Simd<T, 16 / sizeof(T)>;
+
+template <typename T, size_t N = 16 / sizeof(T)>
+class Vec128 {
+  using Raw = typename Raw128<T>::type;
+
+ public:
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec128& operator*=(const Vec128 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec128& operator/=(const Vec128 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec128& operator+=(const Vec128 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec128& operator-=(const Vec128 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec128& operator&=(const Vec128 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec128& operator|=(const Vec128 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec128& operator^=(const Vec128 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+// Integer: FF..FF or 0. Float: MSB, all other bits undefined - see README.
+template <typename T, size_t N = 16 / sizeof(T)>
+class Mask128 {
+  using Raw = typename Raw128<T>::type;
+
+ public:
+  Raw raw;
+};
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+HWY_API __m128i BitCastToInteger(__m128i v) { return v; }
+HWY_API __m128i BitCastToInteger(__m128 v) { return _mm_castps_si128(v); }
+HWY_API __m128i BitCastToInteger(__m128d v) { return _mm_castpd_si128(v); }
+
+template <typename T, size_t N>
+HWY_API Vec128<uint8_t, N * sizeof(T)> BitCastToByte(Vec128<T, N> v) {
+  return Vec128<uint8_t, N * sizeof(T)>{BitCastToInteger(v.raw)};
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromInteger128 {
+  HWY_INLINE __m128i operator()(__m128i v) { return v; }
+};
+template <>
+struct BitCastFromInteger128<float> {
+  HWY_INLINE __m128 operator()(__m128i v) { return _mm_castsi128_ps(v); }
+};
+template <>
+struct BitCastFromInteger128<double> {
+  HWY_INLINE __m128d operator()(__m128i v) { return _mm_castsi128_pd(v); }
+};
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> BitCastFromByte(Simd<T, N> /* tag */,
+                                Vec128<uint8_t, N * sizeof(T)> v) {
+  return Vec128<T, N>{BitCastFromInteger128<T>()(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N, typename FromT>
+HWY_API Vec128<T, N> BitCast(Simd<T, N> d,
+                             Vec128<FromT, N * sizeof(T) / sizeof(FromT)> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ Set
+
+// Returns an all-zero vector/part.
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API Vec128<T, N> Zero(Simd<T, N> /* tag */) {
+  return Vec128<T, N>{_mm_setzero_si128()};
+}
+template <size_t N, HWY_IF_LE128(float, N)>
+HWY_API Vec128<float, N> Zero(Simd<float, N> /* tag */) {
+  return Vec128<float, N>{_mm_setzero_ps()};
+}
+template <size_t N, HWY_IF_LE128(double, N)>
+HWY_API Vec128<double, N> Zero(Simd<double, N> /* tag */) {
+  return Vec128<double, N>{_mm_setzero_pd()};
+}
+
+// Returns a vector/part with all lanes set to "t".
+template <size_t N, HWY_IF_LE128(uint8_t, N)>
+HWY_API Vec128<uint8_t, N> Set(Simd<uint8_t, N> /* tag */, const uint8_t t) {
+  return Vec128<uint8_t, N>{_mm_set1_epi8(static_cast<char>(t))};  // NOLINT
+}
+template <size_t N, HWY_IF_LE128(uint16_t, N)>
+HWY_API Vec128<uint16_t, N> Set(Simd<uint16_t, N> /* tag */, const uint16_t t) {
+  return Vec128<uint16_t, N>{_mm_set1_epi16(static_cast<short>(t))};  // NOLINT
+}
+template <size_t N, HWY_IF_LE128(uint32_t, N)>
+HWY_API Vec128<uint32_t, N> Set(Simd<uint32_t, N> /* tag */, const uint32_t t) {
+  return Vec128<uint32_t, N>{_mm_set1_epi32(static_cast<int>(t))};
+}
+template <size_t N, HWY_IF_LE128(uint64_t, N)>
+HWY_API Vec128<uint64_t, N> Set(Simd<uint64_t, N> /* tag */, const uint64_t t) {
+  return Vec128<uint64_t, N>{
+      _mm_set1_epi64x(static_cast<long long>(t))};  // NOLINT
+}
+template <size_t N, HWY_IF_LE128(int8_t, N)>
+HWY_API Vec128<int8_t, N> Set(Simd<int8_t, N> /* tag */, const int8_t t) {
+  return Vec128<int8_t, N>{_mm_set1_epi8(static_cast<char>(t))};  // NOLINT
+}
+template <size_t N, HWY_IF_LE128(int16_t, N)>
+HWY_API Vec128<int16_t, N> Set(Simd<int16_t, N> /* tag */, const int16_t t) {
+  return Vec128<int16_t, N>{_mm_set1_epi16(static_cast<short>(t))};  // NOLINT
+}
+template <size_t N, HWY_IF_LE128(int32_t, N)>
+HWY_API Vec128<int32_t, N> Set(Simd<int32_t, N> /* tag */, const int32_t t) {
+  return Vec128<int32_t, N>{_mm_set1_epi32(t)};
+}
+template <size_t N, HWY_IF_LE128(int64_t, N)>
+HWY_API Vec128<int64_t, N> Set(Simd<int64_t, N> /* tag */, const int64_t t) {
+  return Vec128<int64_t, N>{
+      _mm_set1_epi64x(static_cast<long long>(t))};  // NOLINT
+}
+template <size_t N, HWY_IF_LE128(float, N)>
+HWY_API Vec128<float, N> Set(Simd<float, N> /* tag */, const float t) {
+  return Vec128<float, N>{_mm_set1_ps(t)};
+}
+template <size_t N, HWY_IF_LE128(double, N)>
+HWY_API Vec128<double, N> Set(Simd<double, N> /* tag */, const double t) {
+  return Vec128<double, N>{_mm_set1_pd(t)};
+}
+
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// Returns a vector with uninitialized elements.
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API Vec128<T, N> Undefined(Simd<T, N> /* tag */) {
+  // Available on Clang 6.0, GCC 6.2, ICC 16.03, MSVC 19.14. All but ICC
+  // generate an XOR instruction.
+  return Vec128<T, N>{_mm_undefined_si128()};
+}
+template <size_t N, HWY_IF_LE128(float, N)>
+HWY_API Vec128<float, N> Undefined(Simd<float, N> /* tag */) {
+  return Vec128<float, N>{_mm_undefined_ps()};
+}
+template <size_t N, HWY_IF_LE128(double, N)>
+HWY_API Vec128<double, N> Undefined(Simd<double, N> /* tag */) {
+  return Vec128<double, N>{_mm_undefined_pd()};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ------------------------------ GetLane
+
+// Gets the single value stored in a vector/part.
+template <size_t N>
+HWY_API uint8_t GetLane(const Vec128<uint8_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw) & 0xFF;
+}
+template <size_t N>
+HWY_API int8_t GetLane(const Vec128<int8_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw) & 0xFF;
+}
+template <size_t N>
+HWY_API uint16_t GetLane(const Vec128<uint16_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw) & 0xFFFF;
+}
+template <size_t N>
+HWY_API int16_t GetLane(const Vec128<int16_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw) & 0xFFFF;
+}
+template <size_t N>
+HWY_API uint32_t GetLane(const Vec128<uint32_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw);
+}
+template <size_t N>
+HWY_API int32_t GetLane(const Vec128<int32_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw);
+}
+template <size_t N>
+HWY_API float GetLane(const Vec128<float, N> v) {
+  return _mm_cvtss_f32(v.raw);
+}
+template <size_t N>
+HWY_API uint64_t GetLane(const Vec128<uint64_t, N> v) {
+#if HWY_ARCH_X86_32
+  alignas(16) uint64_t lanes[2];
+  Store(v, Simd<uint64_t, N>(), lanes);
+  return lanes[0];
+#else
+  return _mm_cvtsi128_si64(v.raw);
+#endif
+}
+template <size_t N>
+HWY_API int64_t GetLane(const Vec128<int64_t, N> v) {
+#if HWY_ARCH_X86_32
+  alignas(16) int64_t lanes[2];
+  Store(v, Simd<int64_t, N>(), lanes);
+  return lanes[0];
+#else
+  return _mm_cvtsi128_si64(v.raw);
+#endif
+}
+template <size_t N>
+HWY_API double GetLane(const Vec128<double, N> v) {
+  return _mm_cvtsd_f64(v.raw);
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ And
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> And(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{_mm_and_si128(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<float, N> And(const Vec128<float, N> a,
+                             const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_and_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> And(const Vec128<double, N> a,
+                              const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_and_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ AndNot
+
+// Returns ~not_mask & mask.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> AndNot(Vec128<T, N> not_mask, Vec128<T, N> mask) {
+  return Vec128<T, N>{_mm_andnot_si128(not_mask.raw, mask.raw)};
+}
+template <size_t N>
+HWY_API Vec128<float, N> AndNot(const Vec128<float, N> not_mask,
+                                const Vec128<float, N> mask) {
+  return Vec128<float, N>{_mm_andnot_ps(not_mask.raw, mask.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> AndNot(const Vec128<double, N> not_mask,
+                                 const Vec128<double, N> mask) {
+  return Vec128<double, N>{_mm_andnot_pd(not_mask.raw, mask.raw)};
+}
+
+// ------------------------------ Or
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Or(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{_mm_or_si128(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> Or(const Vec128<float, N> a,
+                            const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_or_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Or(const Vec128<double, N> a,
+                             const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_or_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Xor(Vec128<T, N> a, Vec128<T, N> b) {
+  return Vec128<T, N>{_mm_xor_si128(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> Xor(const Vec128<float, N> a,
+                             const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_xor_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Xor(const Vec128<double, N> a,
+                              const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_xor_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Not
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Not(const Vec128<T, N> v) {
+  using TU = MakeUnsigned<T>;
+#if HWY_TARGET == HWY_AVX3
+  const __m128i vu = BitCast(Simd<TU, N>(), v).raw;
+  return BitCast(Simd<T, N>(),
+                 Vec128<TU, N>{_mm_ternarylogic_epi32(vu, vu, vu, 0x55)});
+#else
+  return Xor(v, BitCast(Simd<T, N>(), Vec128<TU, N>{_mm_set1_epi32(-1)}));
+#endif
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator&(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return And(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator|(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Or(a, b);
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> operator^(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ CopySign
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySign(const Vec128<T, N> magn,
+                              const Vec128<T, N> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+
+  const Simd<T, N> d;
+  const auto msb = SignBit(d);
+
+#if HWY_TARGET == HWY_AVX3
+  const Rebind<MakeUnsigned<T>, decltype(d)> du;
+  // Truth table for msb, magn, sign | bitwise msb ? sign : mag
+  //                  0    0     0   |  0
+  //                  0    0     1   |  0
+  //                  0    1     0   |  1
+  //                  0    1     1   |  1
+  //                  1    0     0   |  0
+  //                  1    0     1   |  1
+  //                  1    1     0   |  0
+  //                  1    1     1   |  1
+  // The lane size does not matter because we are not using predication.
+  const __m128i out = _mm_ternarylogic_epi32(
+      BitCast(du, msb).raw, BitCast(du, magn).raw, BitCast(du, sign).raw, 0xAC);
+  return BitCast(d, decltype(Zero(du)){out});
+#else
+  return Or(AndNot(msb, magn), And(msb, sign));
+#endif
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> CopySignToAbs(const Vec128<T, N> abs,
+                                   const Vec128<T, N> sign) {
+#if HWY_TARGET == HWY_AVX3
+  // AVX3 can also handle abs < 0, so no extra action needed.
+  return CopySign(abs, sign);
+#else
+  return Or(abs, And(SignBit(Simd<T, N>()), sign));
+#endif
+}
+
+// ------------------------------ Mask
+
+// Mask and Vec are the same (true = FF..FF).
+template <typename T, size_t N>
+HWY_API Mask128<T, N> MaskFromVec(const Vec128<T, N> v) {
+  return Mask128<T, N>{v.raw};
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> VecFromMask(const Mask128<T, N> v) {
+  return Vec128<T, N>{v.raw};
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> VecFromMask(const Simd<T, N> /* tag */,
+                                 const Mask128<T, N> v) {
+  return Vec128<T, N>{v.raw};
+}
+
+// mask ? yes : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElse(Mask128<T, N> mask, Vec128<T, N> yes,
+                                Vec128<T, N> no) {
+  return Vec128<T, N>{_mm_blendv_epi8(no.raw, yes.raw, mask.raw)};
+}
+template <size_t N>
+HWY_API Vec128<float, N> IfThenElse(const Mask128<float, N> mask,
+                                    const Vec128<float, N> yes,
+                                    const Vec128<float, N> no) {
+  return Vec128<float, N>{_mm_blendv_ps(no.raw, yes.raw, mask.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> IfThenElse(const Mask128<double, N> mask,
+                                     const Vec128<double, N> yes,
+                                     const Vec128<double, N> no) {
+  return Vec128<double, N>{_mm_blendv_pd(no.raw, yes.raw, mask.raw)};
+}
+
+// mask ? yes : 0
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenElseZero(Mask128<T, N> mask, Vec128<T, N> yes) {
+  return yes & VecFromMask(Simd<T, N>(), mask);
+}
+
+// mask ? 0 : no
+template <typename T, size_t N>
+HWY_API Vec128<T, N> IfThenZeroElse(Mask128<T, N> mask, Vec128<T, N> no) {
+  return AndNot(VecFromMask(Simd<T, N>(), mask), no);
+}
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> ZeroIfNegative(Vec128<T, N> v) {
+  const Simd<T, N> d;
+  return IfThenElse(MaskFromVec(v), Zero(d), v);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Not(const Mask128<T, N> m) {
+  const Simd<T, N> d;
+  return MaskFromVec(Not(VecFromMask(d, m)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> And(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> AndNot(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Or(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> Xor(const Mask128<T, N> a, Mask128<T, N> b) {
+  const Simd<T, N> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+template <typename TFrom, typename TTo, size_t N>
+HWY_API Mask128<TTo, N> RebindMask(Simd<TTo, N> /*tag*/, Mask128<TFrom, N> m) {
+  static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size");
+  const Simd<TFrom, N> d;
+  return MaskFromVec(BitCast(Simd<TTo, N>(), VecFromMask(d, m)));
+}
+
+// ------------------------------ Equality
+
+// Unsigned
+template <size_t N>
+HWY_API Mask128<uint8_t, N> operator==(const Vec128<uint8_t, N> a,
+                                       const Vec128<uint8_t, N> b) {
+  return Mask128<uint8_t, N>{_mm_cmpeq_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint16_t, N> operator==(const Vec128<uint16_t, N> a,
+                                        const Vec128<uint16_t, N> b) {
+  return Mask128<uint16_t, N>{_mm_cmpeq_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint32_t, N> operator==(const Vec128<uint32_t, N> a,
+                                        const Vec128<uint32_t, N> b) {
+  return Mask128<uint32_t, N>{_mm_cmpeq_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<uint64_t, N> operator==(const Vec128<uint64_t, N> a,
+                                        const Vec128<uint64_t, N> b) {
+  return Mask128<uint64_t, N>{_mm_cmpeq_epi64(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator==(const Vec128<int8_t, N> a,
+                                      const Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{_mm_cmpeq_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator==(Vec128<int16_t, N> a,
+                                       Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{_mm_cmpeq_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator==(const Vec128<int32_t, N> a,
+                                       const Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{_mm_cmpeq_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator==(const Vec128<int64_t, N> a,
+                                       const Vec128<int64_t, N> b) {
+  return Mask128<int64_t, N>{_mm_cmpeq_epi64(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Mask128<float, N> operator==(const Vec128<float, N> a,
+                                     const Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmpeq_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator==(const Vec128<double, N> a,
+                                      const Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmpeq_pd(a.raw, b.raw)};
+}
+
+template <typename T, size_t N>
+HWY_API Mask128<T, N> TestBit(Vec128<T, N> v, Vec128<T, N> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+// ------------------------------ Strict inequality
+
+// Signed/float <
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator<(const Vec128<int8_t, N> a,
+                                     const Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{_mm_cmpgt_epi8(b.raw, a.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator<(const Vec128<int16_t, N> a,
+                                      const Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{_mm_cmpgt_epi16(b.raw, a.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator<(const Vec128<int32_t, N> a,
+                                      const Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{_mm_cmpgt_epi32(b.raw, a.raw)};
+}
+template <size_t N>
+HWY_API Mask128<float, N> operator<(const Vec128<float, N> a,
+                                    const Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmplt_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator<(const Vec128<double, N> a,
+                                     const Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmplt_pd(a.raw, b.raw)};
+}
+
+// Signed/float >
+template <size_t N>
+HWY_API Mask128<int8_t, N> operator>(const Vec128<int8_t, N> a,
+                                     const Vec128<int8_t, N> b) {
+  return Mask128<int8_t, N>{_mm_cmpgt_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int16_t, N> operator>(const Vec128<int16_t, N> a,
+                                      const Vec128<int16_t, N> b) {
+  return Mask128<int16_t, N>{_mm_cmpgt_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<int32_t, N> operator>(const Vec128<int32_t, N> a,
+                                      const Vec128<int32_t, N> b) {
+  return Mask128<int32_t, N>{_mm_cmpgt_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<float, N> operator>(const Vec128<float, N> a,
+                                    const Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmpgt_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator>(const Vec128<double, N> a,
+                                     const Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmpgt_pd(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator>(const Vec128<int64_t, N> a,
+                                      const Vec128<int64_t, N> b) {
+#if HWY_TARGET == HWY_SSE4  // SSE4.1
+  // If the upper half is less than or greater, this is the answer.
+  const __m128i m_gt = _mm_cmpgt_epi32(a.raw, b.raw);
+
+  // Otherwise, the lower half decides.
+  const __m128i m_eq = _mm_cmpeq_epi32(a.raw, b.raw);
+  const __m128i lo_in_hi = _mm_shuffle_epi32(m_gt, _MM_SHUFFLE(2, 2, 0, 0));
+  const __m128i lo_gt = _mm_and_si128(m_eq, lo_in_hi);
+
+  const __m128i gt = _mm_or_si128(lo_gt, m_gt);
+  // Copy result in upper 32 bits to lower 32 bits.
+  return Mask128<int64_t, N>{_mm_shuffle_epi32(gt, _MM_SHUFFLE(3, 3, 1, 1))};
+#else
+  return Mask128<int64_t, N>{_mm_cmpgt_epi64(a.raw, b.raw)};  // SSE4.2
+#endif
+}
+
+template <size_t N>
+HWY_API Mask128<int64_t, N> operator<(const Vec128<int64_t, N> a,
+                                      const Vec128<int64_t, N> b) {
+  return operator>(b, a);
+}
+
+// ------------------------------ Weak inequality
+
+// Float <= >=
+template <size_t N>
+HWY_API Mask128<float, N> operator<=(const Vec128<float, N> a,
+                                     const Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmple_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator<=(const Vec128<double, N> a,
+                                      const Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmple_pd(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<float, N> operator>=(const Vec128<float, N> a,
+                                     const Vec128<float, N> b) {
+  return Mask128<float, N>{_mm_cmpge_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Mask128<double, N> operator>=(const Vec128<double, N> a,
+                                      const Vec128<double, N> b) {
+  return Mask128<double, N>{_mm_cmpge_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API Mask128<T, N> FirstN(const Simd<T, N> d, size_t num) {
+  const RebindToSigned<decltype(d)> di;  // Signed comparisons are cheaper.
+  return RebindMask(d, Iota(di, 0) < Set(di, static_cast<MakeSigned<T>>(num)));
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> operator+(const Vec128<uint8_t, N> a,
+                                     const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_add_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator+(const Vec128<uint16_t, N> a,
+                                      const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_add_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator+(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{_mm_add_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> operator+(const Vec128<uint64_t, N> a,
+                                      const Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{_mm_add_epi64(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> operator+(const Vec128<int8_t, N> a,
+                                    const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_add_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator+(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_add_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator+(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{_mm_add_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> operator+(const Vec128<int64_t, N> a,
+                                     const Vec128<int64_t, N> b) {
+  return Vec128<int64_t, N>{_mm_add_epi64(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> operator+(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_add_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator+(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_add_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> operator-(const Vec128<uint8_t, N> a,
+                                     const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_sub_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator-(Vec128<uint16_t, N> a,
+                                      Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_sub_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator-(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{_mm_sub_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> operator-(const Vec128<uint64_t, N> a,
+                                      const Vec128<uint64_t, N> b) {
+  return Vec128<uint64_t, N>{_mm_sub_epi64(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> operator-(const Vec128<int8_t, N> a,
+                                    const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_sub_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator-(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_sub_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator-(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{_mm_sub_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> operator-(const Vec128<int64_t, N> a,
+                                     const Vec128<int64_t, N> b) {
+  return Vec128<int64_t, N>{_mm_sub_epi64(a.raw, b.raw)};
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> operator-(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_sub_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator-(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_sub_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Saturating addition
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> SaturatedAdd(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_adds_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> SaturatedAdd(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_adds_epu16(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> SaturatedAdd(const Vec128<int8_t, N> a,
+                                       const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_adds_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> SaturatedAdd(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_adds_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> SaturatedSub(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_subs_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> SaturatedSub(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_subs_epu16(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> SaturatedSub(const Vec128<int8_t, N> a,
+                                       const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_subs_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> SaturatedSub(const Vec128<int16_t, N> a,
+                                        const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_subs_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ AverageRound
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> AverageRound(const Vec128<uint8_t, N> a,
+                                        const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_avg_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> AverageRound(const Vec128<uint16_t, N> a,
+                                         const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_avg_epu16(a.raw, b.raw)};
+}
+
+// ------------------------------ Abs
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+template <size_t N>
+HWY_API Vec128<int8_t, N> Abs(const Vec128<int8_t, N> v) {
+#if HWY_COMPILER_MSVC
+  // Workaround for incorrect codegen? (reaches breakpoint)
+  const auto zero = Zero(Simd<int8_t, N>());
+  return Vec128<int8_t, N>{_mm_max_epi8(v.raw, (zero - v).raw)};
+#else
+  return Vec128<int8_t, N>{_mm_abs_epi8(v.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Abs(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{_mm_abs_epi16(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Abs(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{_mm_abs_epi32(v.raw)};
+}
+// i64 is implemented after BroadcastSignBit.
+template <size_t N>
+HWY_API Vec128<float, N> Abs(const Vec128<float, N> v) {
+  const Vec128<int32_t, N> mask{_mm_set1_epi32(0x7FFFFFFF)};
+  return v & BitCast(Simd<float, N>(), mask);
+}
+template <size_t N>
+HWY_API Vec128<double, N> Abs(const Vec128<double, N> v) {
+  const Vec128<int64_t, N> mask{_mm_set1_epi64x(0x7FFFFFFFFFFFFFFFLL)};
+  return v & BitCast(Simd<double, N>(), mask);
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator*(const Vec128<uint16_t, N> a,
+                                      const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_mullo_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator*(const Vec128<uint32_t, N> a,
+                                      const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{_mm_mullo_epi32(a.raw, b.raw)};
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator*(const Vec128<int16_t, N> a,
+                                     const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_mullo_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator*(const Vec128<int32_t, N> a,
+                                     const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{_mm_mullo_epi32(a.raw, b.raw)};
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+template <size_t N>
+HWY_API Vec128<uint16_t, N> MulHigh(const Vec128<uint16_t, N> a,
+                                    const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_mulhi_epu16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> MulHigh(const Vec128<int16_t, N> a,
+                                   const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_mulhi_epi16(a.raw, b.raw)};
+}
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+template <size_t N>
+HWY_API Vec128<int64_t, (N + 1) / 2> MulEven(const Vec128<int32_t, N> a,
+                                             const Vec128<int32_t, N> b) {
+  return Vec128<int64_t, (N + 1) / 2>{_mm_mul_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, (N + 1) / 2> MulEven(const Vec128<uint32_t, N> a,
+                                              const Vec128<uint32_t, N> b) {
+  return Vec128<uint64_t, (N + 1) / 2>{_mm_mul_epu32(a.raw, b.raw)};
+}
+
+// ------------------------------ ShiftLeft
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint16_t, N> ShiftLeft(const Vec128<uint16_t, N> v) {
+  return Vec128<uint16_t, N>{_mm_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint32_t, N> ShiftLeft(const Vec128<uint32_t, N> v) {
+  return Vec128<uint32_t, N>{_mm_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint64_t, N> ShiftLeft(const Vec128<uint64_t, N> v) {
+  return Vec128<uint64_t, N>{_mm_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<int16_t, N> ShiftLeft(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{_mm_slli_epi16(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int32_t, N> ShiftLeft(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{_mm_slli_epi32(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int64_t, N> ShiftLeft(const Vec128<int64_t, N> v) {
+  return Vec128<int64_t, N>{_mm_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, typename T, size_t N, HWY_IF_LANE_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShiftLeft(const Vec128<T, N> v) {
+  const Simd<T, N> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<T, N> shifted{ShiftLeft<kBits>(Vec128<MakeWide<T>>{v.raw}).raw};
+  return kBits == 1
+             ? (v + v)
+             : (shifted & Set(d8, static_cast<T>((0xFF << kBits) & 0xFF)));
+}
+
+// ------------------------------ ShiftRight
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint16_t, N> ShiftRight(const Vec128<uint16_t, N> v) {
+  return Vec128<uint16_t, N>{_mm_srli_epi16(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint32_t, N> ShiftRight(const Vec128<uint32_t, N> v) {
+  return Vec128<uint32_t, N>{_mm_srli_epi32(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<uint64_t, N> ShiftRight(const Vec128<uint64_t, N> v) {
+  return Vec128<uint64_t, N>{_mm_srli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<uint8_t, N> ShiftRight(const Vec128<uint8_t, N> v) {
+  const Simd<uint8_t, N> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<uint8_t, N> shifted{
+      ShiftRight<kBits>(Vec128<uint16_t>{v.raw}).raw};
+  return shifted & Set(d8, 0xFF >> kBits);
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<int16_t, N> ShiftRight(const Vec128<int16_t, N> v) {
+  return Vec128<int16_t, N>{_mm_srai_epi16(v.raw, kBits)};
+}
+template <int kBits, size_t N>
+HWY_API Vec128<int32_t, N> ShiftRight(const Vec128<int32_t, N> v) {
+  return Vec128<int32_t, N>{_mm_srai_epi32(v.raw, kBits)};
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<int8_t, N> ShiftRight(const Vec128<int8_t, N> v) {
+  const Simd<int8_t, N> di;
+  const Simd<uint8_t, N> du;
+  const auto shifted = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> kBits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// i64 is implemented after BroadcastSignBit.
+
+// ------------------------------ BroadcastSignBit (ShiftRight, compare, mask)
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> BroadcastSignBit(const Vec128<int8_t, N> v) {
+  return VecFromMask(v < Zero(Simd<int8_t, N>()));
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> BroadcastSignBit(const Vec128<int16_t, N> v) {
+  return ShiftRight<15>(v);
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> BroadcastSignBit(const Vec128<int32_t, N> v) {
+  return ShiftRight<31>(v);
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> BroadcastSignBit(const Vec128<int64_t, N> v) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<int64_t, N>{_mm_srai_epi64(v.raw, 63)};
+#elif HWY_TARGET == HWY_AVX2
+  return VecFromMask(v < Zero(Simd<int64_t, N>()));
+#else
+  // Efficient Gt() requires SSE4.2 but we only have SSE4.1. BLENDVPD requires
+  // two constants and domain crossing. 32-bit shift avoids generating a zero.
+  const Simd<int32_t, N * 2> d32;
+  const auto sign = ShiftRight<31>(BitCast(d32, v));
+  return Vec128<int64_t, N>{
+      _mm_shuffle_epi32(sign.raw, _MM_SHUFFLE(3, 3, 1, 1))};
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> Abs(const Vec128<int64_t, N> v) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<int64_t, N>{_mm_abs_epi64(v.raw)};
+#else
+  const auto zero = Zero(Simd<int64_t,N>());
+  return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v);
+#endif
+}
+
+template <int kBits, size_t N>
+HWY_API Vec128<int64_t, N> ShiftRight(const Vec128<int64_t, N> v) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<int64_t, N>{_mm_srai_epi64(v.raw, kBits)};
+#else
+  const Simd<int64_t, N> di;
+  const Simd<uint64_t, N> du;
+  const auto right = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto sign = ShiftLeft<64 - kBits>(BroadcastSignBit(v));
+  return right | sign;
+#endif
+}
+
+// ------------------------------ ShiftLeftSame
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> ShiftLeftSame(const Vec128<uint16_t, N> v,
+                                          const int bits) {
+  return Vec128<uint16_t, N>{_mm_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> ShiftLeftSame(const Vec128<uint32_t, N> v,
+                                          const int bits) {
+  return Vec128<uint32_t, N>{_mm_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> ShiftLeftSame(const Vec128<uint64_t, N> v,
+                                          const int bits) {
+  return Vec128<uint64_t, N>{_mm_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> ShiftLeftSame(const Vec128<int16_t, N> v,
+                                         const int bits) {
+  return Vec128<int16_t, N>{_mm_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> ShiftLeftSame(const Vec128<int32_t, N> v,
+                                         const int bits) {
+  return Vec128<int32_t, N>{_mm_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> ShiftLeftSame(const Vec128<int64_t, N> v,
+                                         const int bits) {
+  return Vec128<int64_t, N>{_mm_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 1)>
+HWY_API Vec128<T, N> ShiftLeftSame(const Vec128<T, N> v, const int bits) {
+  const Simd<T, N> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<T, N> shifted{
+      ShiftLeftSame(Vec128<MakeWide<T>>{v.raw}, bits).raw};
+  return shifted & Set(d8, (0xFF << bits) & 0xFF);
+}
+
+// ------------------------------ ShiftRightSame (BroadcastSignBit)
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> ShiftRightSame(const Vec128<uint16_t, N> v,
+                                           const int bits) {
+  return Vec128<uint16_t, N>{_mm_srl_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> ShiftRightSame(const Vec128<uint32_t, N> v,
+                                           const int bits) {
+  return Vec128<uint32_t, N>{_mm_srl_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> ShiftRightSame(const Vec128<uint64_t, N> v,
+                                           const int bits) {
+  return Vec128<uint64_t, N>{_mm_srl_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> ShiftRightSame(Vec128<uint8_t, N> v,
+                                          const int bits) {
+  const Simd<uint8_t, N> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec128<uint8_t, N> shifted{
+      ShiftRightSame(Vec128<uint16_t>{v.raw}, bits).raw};
+  return shifted & Set(d8, 0xFF >> bits);
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> ShiftRightSame(const Vec128<int16_t, N> v,
+                                          const int bits) {
+  return Vec128<int16_t, N>{_mm_sra_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> ShiftRightSame(const Vec128<int32_t, N> v,
+                                          const int bits) {
+  return Vec128<int32_t, N>{_mm_sra_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> ShiftRightSame(const Vec128<int64_t, N> v,
+                                          const int bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<int64_t, N>{_mm_sra_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+#else
+  const Simd<int64_t, N> di;
+  const Simd<uint64_t, N> du;
+  const auto right = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto sign = ShiftLeftSame(BroadcastSignBit(v), 64 - bits);
+  return right | sign;
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> ShiftRightSame(Vec128<int8_t, N> v, const int bits) {
+  const Simd<int8_t, N> di;
+  const Simd<uint8_t, N> du;
+  const auto shifted = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> bits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ Negate
+
+template <typename T, size_t N, HWY_IF_FLOAT(T)>
+HWY_API Vec128<T, N> Neg(const Vec128<T, N> v) {
+  return Xor(v, SignBit(Simd<T, N>()));
+}
+
+template <typename T, size_t N, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec128<T, N> Neg(const Vec128<T, N> v) {
+  return Zero(Simd<T, N>()) - v;
+}
+
+// ------------------------------ Floating-point mul / div
+
+template <size_t N>
+HWY_API Vec128<float, N> operator*(Vec128<float, N> a, Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_mul_ps(a.raw, b.raw)};
+}
+HWY_API Vec128<float, 1> operator*(const Vec128<float, 1> a,
+                                   const Vec128<float, 1> b) {
+  return Vec128<float, 1>{_mm_mul_ss(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator*(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_mul_pd(a.raw, b.raw)};
+}
+HWY_API Vec128<double, 1> operator*(const Vec128<double, 1> a,
+                                    const Vec128<double, 1> b) {
+  return Vec128<double, 1>{_mm_mul_sd(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> operator/(const Vec128<float, N> a,
+                                   const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_div_ps(a.raw, b.raw)};
+}
+HWY_API Vec128<float, 1> operator/(const Vec128<float, 1> a,
+                                   const Vec128<float, 1> b) {
+  return Vec128<float, 1>{_mm_div_ss(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> operator/(const Vec128<double, N> a,
+                                    const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_div_pd(a.raw, b.raw)};
+}
+HWY_API Vec128<double, 1> operator/(const Vec128<double, 1> a,
+                                    const Vec128<double, 1> b) {
+  return Vec128<double, 1>{_mm_div_sd(a.raw, b.raw)};
+}
+
+// Approximate reciprocal
+template <size_t N>
+HWY_API Vec128<float, N> ApproximateReciprocal(const Vec128<float, N> v) {
+  return Vec128<float, N>{_mm_rcp_ps(v.raw)};
+}
+HWY_API Vec128<float, 1> ApproximateReciprocal(const Vec128<float, 1> v) {
+  return Vec128<float, 1>{_mm_rcp_ss(v.raw)};
+}
+
+// Absolute value of difference.
+template <size_t N>
+HWY_API Vec128<float, N> AbsDiff(const Vec128<float, N> a,
+                                 const Vec128<float, N> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+// Returns mul * x + add
+template <size_t N>
+HWY_API Vec128<float, N> MulAdd(const Vec128<float, N> mul,
+                                const Vec128<float, N> x,
+                                const Vec128<float, N> add) {
+#if HWY_TARGET == HWY_SSE4
+  return mul * x + add;
+#else
+  return Vec128<float, N>{_mm_fmadd_ps(mul.raw, x.raw, add.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<double, N> MulAdd(const Vec128<double, N> mul,
+                                 const Vec128<double, N> x,
+                                 const Vec128<double, N> add) {
+#if HWY_TARGET == HWY_SSE4
+  return mul * x + add;
+#else
+  return Vec128<double, N>{_mm_fmadd_pd(mul.raw, x.raw, add.raw)};
+#endif
+}
+
+// Returns add - mul * x
+template <size_t N>
+HWY_API Vec128<float, N> NegMulAdd(const Vec128<float, N> mul,
+                                   const Vec128<float, N> x,
+                                   const Vec128<float, N> add) {
+#if HWY_TARGET == HWY_SSE4
+  return add - mul * x;
+#else
+  return Vec128<float, N>{_mm_fnmadd_ps(mul.raw, x.raw, add.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<double, N> NegMulAdd(const Vec128<double, N> mul,
+                                    const Vec128<double, N> x,
+                                    const Vec128<double, N> add) {
+#if HWY_TARGET == HWY_SSE4
+  return add - mul * x;
+#else
+  return Vec128<double, N>{_mm_fnmadd_pd(mul.raw, x.raw, add.raw)};
+#endif
+}
+
+// Returns mul * x - sub
+template <size_t N>
+HWY_API Vec128<float, N> MulSub(const Vec128<float, N> mul,
+                                const Vec128<float, N> x,
+                                const Vec128<float, N> sub) {
+#if HWY_TARGET == HWY_SSE4
+  return mul * x - sub;
+#else
+  return Vec128<float, N>{_mm_fmsub_ps(mul.raw, x.raw, sub.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<double, N> MulSub(const Vec128<double, N> mul,
+                                 const Vec128<double, N> x,
+                                 const Vec128<double, N> sub) {
+#if HWY_TARGET == HWY_SSE4
+  return mul * x - sub;
+#else
+  return Vec128<double, N>{_mm_fmsub_pd(mul.raw, x.raw, sub.raw)};
+#endif
+}
+
+// Returns -mul * x - sub
+template <size_t N>
+HWY_API Vec128<float, N> NegMulSub(const Vec128<float, N> mul,
+                                   const Vec128<float, N> x,
+                                   const Vec128<float, N> sub) {
+#if HWY_TARGET == HWY_SSE4
+  return Neg(mul) * x - sub;
+#else
+  return Vec128<float, N>{_mm_fnmsub_ps(mul.raw, x.raw, sub.raw)};
+#endif
+}
+template <size_t N>
+HWY_API Vec128<double, N> NegMulSub(const Vec128<double, N> mul,
+                                    const Vec128<double, N> x,
+                                    const Vec128<double, N> sub) {
+#if HWY_TARGET == HWY_SSE4
+  return Neg(mul) * x - sub;
+#else
+  return Vec128<double, N>{_mm_fnmsub_pd(mul.raw, x.raw, sub.raw)};
+#endif
+}
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+template <size_t N>
+HWY_API Vec128<float, N> Sqrt(const Vec128<float, N> v) {
+  return Vec128<float, N>{_mm_sqrt_ps(v.raw)};
+}
+HWY_API Vec128<float, 1> Sqrt(const Vec128<float, 1> v) {
+  return Vec128<float, 1>{_mm_sqrt_ss(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Sqrt(const Vec128<double, N> v) {
+  return Vec128<double, N>{_mm_sqrt_pd(v.raw)};
+}
+HWY_API Vec128<double, 1> Sqrt(const Vec128<double, 1> v) {
+  return Vec128<double, 1>{_mm_sqrt_sd(_mm_setzero_pd(), v.raw)};
+}
+
+// Approximate reciprocal square root
+template <size_t N>
+HWY_API Vec128<float, N> ApproximateReciprocalSqrt(const Vec128<float, N> v) {
+  return Vec128<float, N>{_mm_rsqrt_ps(v.raw)};
+}
+HWY_API Vec128<float, 1> ApproximateReciprocalSqrt(const Vec128<float, 1> v) {
+  return Vec128<float, 1>{_mm_rsqrt_ss(v.raw)};
+}
+
+// ------------------------------ Floating-point rounding
+
+// Toward nearest integer, ties to even
+template <size_t N>
+HWY_API Vec128<float, N> Round(const Vec128<float, N> v) {
+  return Vec128<float, N>{
+      _mm_round_ps(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Round(const Vec128<double, N> v) {
+  return Vec128<double, N>{
+      _mm_round_pd(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+
+// Toward zero, aka truncate
+template <size_t N>
+HWY_API Vec128<float, N> Trunc(const Vec128<float, N> v) {
+  return Vec128<float, N>{
+      _mm_round_ps(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Trunc(const Vec128<double, N> v) {
+  return Vec128<double, N>{
+      _mm_round_pd(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+
+// Toward +infinity, aka ceiling
+template <size_t N>
+HWY_API Vec128<float, N> Ceil(const Vec128<float, N> v) {
+  return Vec128<float, N>{
+      _mm_round_ps(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Ceil(const Vec128<double, N> v) {
+  return Vec128<double, N>{
+      _mm_round_pd(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+
+// Toward -infinity, aka floor
+template <size_t N>
+HWY_API Vec128<float, N> Floor(const Vec128<float, N> v) {
+  return Vec128<float, N>{
+      _mm_round_ps(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Floor(const Vec128<double, N> v) {
+  return Vec128<double, N>{
+      _mm_round_pd(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+
+// ------------------------------ Min (Gt, IfThenElse)
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> Min(const Vec128<uint8_t, N> a,
+                               const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_min_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> Min(const Vec128<uint16_t, N> a,
+                                const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_min_epu16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> Min(const Vec128<uint32_t, N> a,
+                                const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{_mm_min_epu32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Min(const Vec128<uint64_t, N> a,
+                                const Vec128<uint64_t, N> b) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<uint64_t, N>{_mm_min_epu64(a.raw, b.raw)};
+#else
+  const Simd<uint64_t, N> du;
+  const Simd<int64_t, N> di;
+  const auto msb = Set(du, 1ull << 63);
+  const auto gt = RebindMask(du, BitCast(di, a ^ msb) > BitCast(di, b ^ msb));
+  return IfThenElse(gt, b, a);
+#endif
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> Min(const Vec128<int8_t, N> a,
+                              const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_min_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Min(const Vec128<int16_t, N> a,
+                               const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_min_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Min(const Vec128<int32_t, N> a,
+                               const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{_mm_min_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Min(const Vec128<int64_t, N> a,
+                               const Vec128<int64_t, N> b) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<int64_t, N>{_mm_min_epi64(a.raw, b.raw)};
+#else
+  return IfThenElse(a < b, a, b);
+#endif
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> Min(const Vec128<float, N> a,
+                             const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_min_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Min(const Vec128<double, N> a,
+                              const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_min_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Max (Gt, IfThenElse)
+
+// Unsigned
+template <size_t N>
+HWY_API Vec128<uint8_t, N> Max(const Vec128<uint8_t, N> a,
+                               const Vec128<uint8_t, N> b) {
+  return Vec128<uint8_t, N>{_mm_max_epu8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint16_t, N> Max(const Vec128<uint16_t, N> a,
+                                const Vec128<uint16_t, N> b) {
+  return Vec128<uint16_t, N>{_mm_max_epu16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> Max(const Vec128<uint32_t, N> a,
+                                const Vec128<uint32_t, N> b) {
+  return Vec128<uint32_t, N>{_mm_max_epu32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> Max(const Vec128<uint64_t, N> a,
+                                const Vec128<uint64_t, N> b) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<uint64_t, N>{_mm_max_epu64(a.raw, b.raw)};
+#else
+  const Simd<uint64_t, N> du;
+  const Simd<int64_t, N> di;
+  const auto msb = Set(du, 1ull << 63);
+  const auto gt = RebindMask(du, BitCast(di, a ^ msb) > BitCast(di, b ^ msb));
+  return IfThenElse(gt, a, b);
+#endif
+}
+
+// Signed
+template <size_t N>
+HWY_API Vec128<int8_t, N> Max(const Vec128<int8_t, N> a,
+                              const Vec128<int8_t, N> b) {
+  return Vec128<int8_t, N>{_mm_max_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> Max(const Vec128<int16_t, N> a,
+                               const Vec128<int16_t, N> b) {
+  return Vec128<int16_t, N>{_mm_max_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> Max(const Vec128<int32_t, N> a,
+                               const Vec128<int32_t, N> b) {
+  return Vec128<int32_t, N>{_mm_max_epi32(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> Max(const Vec128<int64_t, N> a,
+                               const Vec128<int64_t, N> b) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<int64_t, N>{_mm_max_epi64(a.raw, b.raw)};
+#else
+  return IfThenElse(a < b, b, a);
+#endif
+}
+
+// Float
+template <size_t N>
+HWY_API Vec128<float, N> Max(const Vec128<float, N> a,
+                             const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_max_ps(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> Max(const Vec128<double, N> a,
+                              const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_max_pd(a.raw, b.raw)};
+}
+
+
+// ================================================== MEMORY
+
+// Clang static analysis claims the memory immediately after a partial vector
+// store is uninitialized, and also flags the input to partial loads (at least
+// for loadl_pd) as "garbage". This is a false alarm because msan does not
+// raise errors. We work around this by using CopyBytes instead of intrinsics,
+// but only for the analyzer to avoid potentially bad code generation.
+// Unfortunately __clang_analyzer__ was not defined for clang-tidy prior to v7.
+#ifndef HWY_SAFE_PARTIAL_LOAD_STORE
+#if defined(__clang_analyzer__) || \
+    (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 700)
+#define HWY_SAFE_PARTIAL_LOAD_STORE 1
+#else
+#define HWY_SAFE_PARTIAL_LOAD_STORE 0
+#endif
+#endif  // HWY_SAFE_PARTIAL_LOAD_STORE
+
+// ------------------------------ Load
+
+template <typename T>
+HWY_API Vec128<T> Load(Full128<T> /* tag */, const T* HWY_RESTRICT aligned) {
+  return Vec128<T>{_mm_load_si128(reinterpret_cast<const __m128i*>(aligned))};
+}
+HWY_API Vec128<float> Load(Full128<float> /* tag */,
+                           const float* HWY_RESTRICT aligned) {
+  return Vec128<float>{_mm_load_ps(aligned)};
+}
+HWY_API Vec128<double> Load(Full128<double> /* tag */,
+                            const double* HWY_RESTRICT aligned) {
+  return Vec128<double>{_mm_load_pd(aligned)};
+}
+
+template <typename T>
+HWY_API Vec128<T> LoadU(Full128<T> /* tag */, const T* HWY_RESTRICT p) {
+  return Vec128<T>{_mm_loadu_si128(reinterpret_cast<const __m128i*>(p))};
+}
+HWY_API Vec128<float> LoadU(Full128<float> /* tag */,
+                            const float* HWY_RESTRICT p) {
+  return Vec128<float>{_mm_loadu_ps(p)};
+}
+HWY_API Vec128<double> LoadU(Full128<double> /* tag */,
+                             const double* HWY_RESTRICT p) {
+  return Vec128<double>{_mm_loadu_pd(p)};
+}
+
+template <typename T>
+HWY_API Vec128<T, 8 / sizeof(T)> Load(Simd<T, 8 / sizeof(T)> /* tag */,
+                                      const T* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128i v = _mm_setzero_si128();
+  CopyBytes<8>(p, &v);
+  return Vec128<T, 8 / sizeof(T)>{v};
+#else
+  return Vec128<T, 8 / sizeof(T)>{
+      _mm_loadl_epi64(reinterpret_cast<const __m128i*>(p))};
+#endif
+}
+
+HWY_API Vec128<float, 2> Load(Simd<float, 2> /* tag */,
+                              const float* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128 v = _mm_setzero_ps();
+  CopyBytes<8>(p, &v);
+  return Vec128<float, 2>{v};
+#else
+  const __m128 hi = _mm_setzero_ps();
+  return Vec128<float, 2>{_mm_loadl_pi(hi, reinterpret_cast<const __m64*>(p))};
+#endif
+}
+
+HWY_API Vec128<double, 1> Load(Simd<double, 1> /* tag */,
+                               const double* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128d v = _mm_setzero_pd();
+  CopyBytes<8>(p, &v);
+  return Vec128<double, 1>{v};
+#else
+  return Vec128<double, 1>{_mm_load_sd(p)};
+#endif
+}
+
+HWY_API Vec128<float, 1> Load(Simd<float, 1> /* tag */,
+                              const float* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128 v = _mm_setzero_ps();
+  CopyBytes<4>(p, &v);
+  return Vec128<float, 1>{v};
+#else
+  return Vec128<float, 1>{_mm_load_ss(p)};
+#endif
+}
+
+// Any <= 32 bit except <float, 1>
+template <typename T, size_t N, HWY_IF_LE32(T, N)>
+HWY_API Vec128<T, N> Load(Simd<T, N> /* tag */, const T* HWY_RESTRICT p) {
+  constexpr size_t kSize = sizeof(T) * N;
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  __m128 v = _mm_setzero_ps();
+  CopyBytes<kSize>(p, &v);
+  return Vec128<T, N>{v};
+#else
+  // TODO(janwas): load_ss?
+  int32_t bits;
+  CopyBytes<kSize>(p, &bits);
+  return Vec128<T, N>{_mm_cvtsi32_si128(bits)};
+#endif
+}
+
+// For < 128 bit, LoadU == Load.
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_API Vec128<T, N> LoadU(Simd<T, N> d, const T* HWY_RESTRICT p) {
+  return Load(d, p);
+}
+
+// 128-bit SIMD => nothing to duplicate, same as an unaligned load.
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API Vec128<T, N> LoadDup128(Simd<T, N> d, const T* HWY_RESTRICT p) {
+  return LoadU(d, p);
+}
+
+// ------------------------------ Store
+
+template <typename T>
+HWY_API void Store(Vec128<T> v, Full128<T> /* tag */, T* HWY_RESTRICT aligned) {
+  _mm_store_si128(reinterpret_cast<__m128i*>(aligned), v.raw);
+}
+HWY_API void Store(const Vec128<float> v, Full128<float> /* tag */,
+                   float* HWY_RESTRICT aligned) {
+  _mm_store_ps(aligned, v.raw);
+}
+HWY_API void Store(const Vec128<double> v, Full128<double> /* tag */,
+                   double* HWY_RESTRICT aligned) {
+  _mm_store_pd(aligned, v.raw);
+}
+
+template <typename T>
+HWY_API void StoreU(Vec128<T> v, Full128<T> /* tag */, T* HWY_RESTRICT p) {
+  _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v.raw);
+}
+HWY_API void StoreU(const Vec128<float> v, Full128<float> /* tag */,
+                    float* HWY_RESTRICT p) {
+  _mm_storeu_ps(p, v.raw);
+}
+HWY_API void StoreU(const Vec128<double> v, Full128<double> /* tag */,
+                    double* HWY_RESTRICT p) {
+  _mm_storeu_pd(p, v.raw);
+}
+
+template <typename T>
+HWY_API void Store(Vec128<T, 8 / sizeof(T)> v, Simd<T, 8 / sizeof(T)> /* tag */,
+                   T* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  CopyBytes<8>(&v, p);
+#else
+  _mm_storel_epi64(reinterpret_cast<__m128i*>(p), v.raw);
+#endif
+}
+HWY_API void Store(const Vec128<float, 2> v, Simd<float, 2> /* tag */,
+                   float* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  CopyBytes<8>(&v, p);
+#else
+  _mm_storel_pi(reinterpret_cast<__m64*>(p), v.raw);
+#endif
+}
+HWY_API void Store(const Vec128<double, 1> v, Simd<double, 1> /* tag */,
+                   double* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  CopyBytes<8>(&v, p);
+#else
+  _mm_storel_pd(p, v.raw);
+#endif
+}
+
+// Any <= 32 bit except <float, 1>
+template <typename T, size_t N, HWY_IF_LE32(T, N)>
+HWY_API void Store(Vec128<T, N> v, Simd<T, N> /* tag */, T* HWY_RESTRICT p) {
+  CopyBytes<sizeof(T) * N>(&v, p);
+}
+HWY_API void Store(const Vec128<float, 1> v, Simd<float, 1> /* tag */,
+                   float* HWY_RESTRICT p) {
+#if HWY_SAFE_PARTIAL_LOAD_STORE
+  CopyBytes<4>(&v, p);
+#else
+  _mm_store_ss(p, v.raw);
+#endif
+}
+
+// For < 128 bit, StoreU == Store.
+template <typename T, size_t N, HWY_IF_LE64(T, N)>
+HWY_API void StoreU(const Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT p) {
+  Store(v, d, p);
+}
+
+// ------------------------------ Non-temporal stores
+
+// On clang6, we see incorrect code generated for _mm_stream_pi, so
+// round even partial vectors up to 16 bytes.
+template <typename T, size_t N>
+HWY_API void Stream(Vec128<T, N> v, Simd<T, N> /* tag */,
+                    T* HWY_RESTRICT aligned) {
+  _mm_stream_si128(reinterpret_cast<__m128i*>(aligned), v.raw);
+}
+template <size_t N>
+HWY_API void Stream(const Vec128<float, N> v, Simd<float, N> /* tag */,
+                    float* HWY_RESTRICT aligned) {
+  _mm_stream_ps(aligned, v.raw);
+}
+template <size_t N>
+HWY_API void Stream(const Vec128<double, N> v, Simd<double, N> /* tag */,
+                    double* HWY_RESTRICT aligned) {
+  _mm_stream_pd(aligned, v.raw);
+}
+
+// ------------------------------ Scatter
+
+// Work around warnings in the intrinsic definitions (passing -1 as a mask).
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+
+// Unfortunately the GCC/Clang intrinsics do not accept int64_t*.
+using GatherIndex64 = long long int;  // NOLINT(google-runtime-int)
+static_assert(sizeof(GatherIndex64) == 8, "Must be 64-bit type");
+
+#if HWY_TARGET == HWY_AVX3
+namespace detail {
+
+template <typename T, size_t N>
+HWY_API void ScatterOffset(hwy::SizeTag<4> /* tag */, Vec128<T, N> v,
+                           Simd<T, N> /* tag */, T* HWY_RESTRICT base,
+                           const Vec128<int32_t, N> offset) {
+  if (N == 4) {
+    _mm_i32scatter_epi32(base, offset.raw, v.raw, 1);
+  } else {
+    const __mmask8 mask = (1u << N) - 1;
+    _mm_mask_i32scatter_epi32(base, mask, offset.raw, v.raw, 1);
+  }
+}
+template <typename T, size_t N>
+HWY_API void ScatterIndex(hwy::SizeTag<4> /* tag */, Vec128<T, N> v,
+                          Simd<T, N> /* tag */, T* HWY_RESTRICT base,
+                          const Vec128<int32_t, N> index) {
+  if (N == 4) {
+    _mm_i32scatter_epi32(base, index.raw, v.raw, 4);
+  } else {
+    const __mmask8 mask = (1u << N) - 1;
+    _mm_mask_i32scatter_epi32(base, mask, index.raw, v.raw, 4);
+  }
+}
+
+template <typename T, size_t N>
+HWY_API void ScatterOffset(hwy::SizeTag<8> /* tag */, Vec128<T, N> v,
+                           Simd<T, N> /* tag */, T* HWY_RESTRICT base,
+                           const Vec128<int64_t, N> offset) {
+  if (N == 2) {
+    _mm_i64scatter_epi64(base, offset.raw, v.raw, 1);
+  } else {
+    const __mmask8 mask = (1u << N) - 1;
+    _mm_mask_i64scatter_epi64(base, mask, offset.raw, v.raw, 1);
+  }
+}
+template <typename T, size_t N>
+HWY_API void ScatterIndex(hwy::SizeTag<8> /* tag */, Vec128<T, N> v,
+                          Simd<T, N> /* tag */, T* HWY_RESTRICT base,
+                          const Vec128<int64_t, N> index) {
+  if (N == 2) {
+    _mm_i64scatter_epi64(base, index.raw, v.raw, 8);
+  } else {
+    const __mmask8 mask = (1u << N) - 1;
+    _mm_mask_i64scatter_epi64(base, mask, index.raw, v.raw, 8);
+  }
+}
+
+}  // namespace detail
+
+template <typename T, size_t N, typename Offset>
+HWY_API void ScatterOffset(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT base,
+                           const Vec128<Offset, N> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+  return detail::ScatterOffset(hwy::SizeTag<sizeof(T)>(), v, d, base, offset);
+}
+template <typename T, size_t N, typename Index>
+HWY_API void ScatterIndex(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT base,
+                          const Vec128<Index, N> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+  return detail::ScatterIndex(hwy::SizeTag<sizeof(T)>(), v, d, base, index);
+}
+
+template <size_t N>
+HWY_INLINE void ScatterOffset(Vec128<float, N> v, Simd<float, N> /* tag */,
+                              float* HWY_RESTRICT base,
+                              const Vec128<int32_t, N> offset) {
+  if (N == 4) {
+    _mm_i32scatter_ps(base, offset.raw, v.raw, 1);
+  } else {
+    const __mmask8 mask = (1u << N) - 1;
+    _mm_mask_i32scatter_ps(base, mask, offset.raw, v.raw, 1);
+  }
+}
+template <size_t N>
+HWY_INLINE void ScatterIndex(Vec128<float, N> v, Simd<float, N> /* tag */,
+                             float* HWY_RESTRICT base,
+                             const Vec128<int32_t, N> index) {
+  if (N == 4) {
+    _mm_i32scatter_ps(base, index.raw, v.raw, 4);
+  } else {
+    const __mmask8 mask = (1u << N) - 1;
+    _mm_mask_i32scatter_ps(base, mask, index.raw, v.raw, 4);
+  }
+}
+
+template <size_t N>
+HWY_INLINE void ScatterOffset(Vec128<double, N> v, Simd<double, N> /* tag */,
+                              double* HWY_RESTRICT base,
+                              const Vec128<int64_t, N> offset) {
+  if (N == 2) {
+    _mm_i64scatter_pd(base, offset.raw, v.raw, 1);
+  } else {
+    const __mmask8 mask = (1u << N) - 1;
+    _mm_mask_i64scatter_pd(base, mask, offset.raw, v.raw, 1);
+  }
+}
+template <size_t N>
+HWY_INLINE void ScatterIndex(Vec128<double, N> v, Simd<double, N> /* tag */,
+                             double* HWY_RESTRICT base,
+                             const Vec128<int64_t, N> index) {
+  if (N == 2) {
+    _mm_i64scatter_pd(base, index.raw, v.raw, 8);
+  } else {
+    const __mmask8 mask = (1u << N) - 1;
+    _mm_mask_i64scatter_pd(base, mask, index.raw, v.raw, 8);
+  }
+}
+#else  // HWY_TARGET == HWY_AVX3
+
+template <typename T, size_t N, typename Offset, HWY_IF_LE128(T, N)>
+HWY_API void ScatterOffset(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT base,
+                           const Vec128<Offset, N> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+
+  alignas(16) T lanes[N];
+  Store(v, d, lanes);
+
+  alignas(16) Offset offset_lanes[N];
+  Store(offset, Simd<Offset, N>(), offset_lanes);
+
+  uint8_t* base_bytes = reinterpret_cast<uint8_t*>(base);
+  for (size_t i = 0; i < N; ++i) {
+    CopyBytes<sizeof(T)>(&lanes[i], base_bytes + offset_lanes[i]);
+  }
+}
+
+template <typename T, size_t N, typename Index, HWY_IF_LE128(T, N)>
+HWY_API void ScatterIndex(Vec128<T, N> v, Simd<T, N> d, T* HWY_RESTRICT base,
+                          const Vec128<Index, N> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+
+  alignas(16) T lanes[N];
+  Store(v, d, lanes);
+
+  alignas(16) Index index_lanes[N];
+  Store(index, Simd<Index, N>(), index_lanes);
+
+  for (size_t i = 0; i < N; ++i) {
+    base[index_lanes[i]] = lanes[i];
+  }
+}
+
+#endif
+
+// ------------------------------ Gather (Load/Store)
+
+#if HWY_TARGET == HWY_SSE4
+
+template <typename T, size_t N, typename Offset>
+HWY_API Vec128<T, N> GatherOffset(const Simd<T, N> d,
+                                  const T* HWY_RESTRICT base,
+                                  const Vec128<Offset, N> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+
+  alignas(16) Offset offset_lanes[N];
+  Store(offset, Simd<Offset, N>(), offset_lanes);
+
+  alignas(16) T lanes[N];
+  const uint8_t* base_bytes = reinterpret_cast<const uint8_t*>(base);
+  for (size_t i = 0; i < N; ++i) {
+    CopyBytes<sizeof(T)>(base_bytes + offset_lanes[i], &lanes[i]);
+  }
+  return Load(d, lanes);
+}
+
+template <typename T, size_t N, typename Index>
+HWY_API Vec128<T, N> GatherIndex(const Simd<T, N> d, const T* HWY_RESTRICT base,
+                                 const Vec128<Index, N> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+
+  alignas(16) Index index_lanes[N];
+  Store(index, Simd<Index, N>(), index_lanes);
+
+  alignas(16) T lanes[N];
+  for (size_t i = 0; i < N; ++i) {
+    lanes[i] = base[index_lanes[i]];
+  }
+  return Load(d, lanes);
+}
+
+#else
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> GatherOffset(hwy::SizeTag<4> /* tag */, Simd<T, N> /* d */,
+                                  const T* HWY_RESTRICT base,
+                                  const Vec128<int32_t, N> offset) {
+  return Vec128<T, N>{_mm_i32gather_epi32(
+      reinterpret_cast<const int32_t*>(base), offset.raw, 1)};
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> GatherIndex(hwy::SizeTag<4> /* tag */, Simd<T, N> /* d */,
+                                 const T* HWY_RESTRICT base,
+                                 const Vec128<int32_t, N> index) {
+  return Vec128<T, N>{_mm_i32gather_epi32(
+      reinterpret_cast<const int32_t*>(base), index.raw, 4)};
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> GatherOffset(hwy::SizeTag<8> /* tag */, Simd<T, N> /* d */,
+                                  const T* HWY_RESTRICT base,
+                                  const Vec128<int64_t, N> offset) {
+  return Vec128<T, N>{_mm_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), offset.raw, 1)};
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> GatherIndex(hwy::SizeTag<8> /* tag */, Simd<T, N> /* d */,
+                                 const T* HWY_RESTRICT base,
+                                 const Vec128<int64_t, N> index) {
+  return Vec128<T, N>{_mm_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), index.raw, 8)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N, typename Offset>
+HWY_API Vec128<T, N> GatherOffset(Simd<T, N> d, const T* HWY_RESTRICT base,
+                                  const Vec128<Offset, N> offset) {
+  return detail::GatherOffset(hwy::SizeTag<sizeof(T)>(), d, base, offset);
+}
+template <typename T, size_t N, typename Index>
+HWY_API Vec128<T, N> GatherIndex(Simd<T, N> d, const T* HWY_RESTRICT base,
+                                 const Vec128<Index, N> index) {
+  return detail::GatherIndex(hwy::SizeTag<sizeof(T)>(), d, base, index);
+}
+
+template <size_t N>
+HWY_API Vec128<float, N> GatherOffset(Simd<float, N> /* tag */,
+                                      const float* HWY_RESTRICT base,
+                                      const Vec128<int32_t, N> offset) {
+  return Vec128<float, N>{_mm_i32gather_ps(base, offset.raw, 1)};
+}
+template <size_t N>
+HWY_API Vec128<float, N> GatherIndex(Simd<float, N> /* tag */,
+                                     const float* HWY_RESTRICT base,
+                                     const Vec128<int32_t, N> index) {
+  return Vec128<float, N>{_mm_i32gather_ps(base, index.raw, 4)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> GatherOffset(Simd<double, N> /* tag */,
+                                       const double* HWY_RESTRICT base,
+                                       const Vec128<int64_t, N> offset) {
+  return Vec128<double, N>{_mm_i64gather_pd(base, offset.raw, 1)};
+}
+template <size_t N>
+HWY_API Vec128<double, N> GatherIndex(Simd<double, N> /* tag */,
+                                      const double* HWY_RESTRICT base,
+                                      const Vec128<int64_t, N> index) {
+  return Vec128<double, N>{_mm_i64gather_pd(base, index.raw, 8)};
+}
+
+#endif  // HWY_TARGET != HWY_SSE4
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== SWIZZLE
+
+// ------------------------------ Extract half
+
+// Returns upper/lower half of a vector.
+template <typename T, size_t N>
+HWY_API Vec128<T, N / 2> LowerHalf(Vec128<T, N> v) {
+  return Vec128<T, N / 2>{v.raw};
+}
+
+// These copy hi into lo (smaller instruction encoding than shifts).
+template <typename T>
+HWY_API Vec128<T, 8 / sizeof(T)> UpperHalf(Vec128<T> v) {
+  return Vec128<T, 8 / sizeof(T)>{_mm_unpackhi_epi64(v.raw, v.raw)};
+}
+template <>
+HWY_INLINE Vec128<float, 2> UpperHalf(Vec128<float> v) {
+  return Vec128<float, 2>{_mm_movehl_ps(v.raw, v.raw)};
+}
+template <>
+HWY_INLINE Vec128<double, 1> UpperHalf(Vec128<double> v) {
+  return Vec128<double, 1>{_mm_unpackhi_pd(v.raw, v.raw)};
+}
+
+// ------------------------------ Shift vector by constant #bytes
+
+// 0x01..0F, kBytes = 1 => 0x02..0F00
+template <int kBytes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftBytes(const Vec128<T, N> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  return Vec128<T, N>{_mm_slli_si128(v.raw, kBytes)};
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftLeftLanes(const Vec128<T, N> v) {
+  const Simd<uint8_t, N * sizeof(T)> d8;
+  const Simd<T, N> d;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftRightBytes(const Vec128<T, N> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  return Vec128<T, N>{_mm_srli_si128(v.raw, kBytes)};
+}
+
+template <int kLanes, typename T, size_t N>
+HWY_API Vec128<T, N> ShiftRightLanes(const Vec128<T, N> v) {
+  const Simd<uint8_t, N * sizeof(T)> d8;
+  const Simd<T, N> d;
+  return BitCast(d, ShiftRightBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// ------------------------------ Extract from 2x 128-bit at constant offset
+
+// Extracts 128 bits from <hi, lo> by skipping the least-significant kBytes.
+template <int kBytes, typename T>
+HWY_API Vec128<T> CombineShiftRightBytes(const Vec128<T> hi,
+                                         const Vec128<T> lo) {
+  const Full128<uint8_t> d8;
+  const Vec128<uint8_t> extracted_bytes{
+      _mm_alignr_epi8(BitCast(d8, hi).raw, BitCast(d8, lo).raw, kBytes)};
+  return BitCast(Full128<T>(), extracted_bytes);
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+// Unsigned
+template <int kLane, size_t N>
+HWY_API Vec128<uint16_t, N> Broadcast(const Vec128<uint16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  if (kLane < 4) {
+    const __m128i lo = _mm_shufflelo_epi16(v.raw, (0x55 * kLane) & 0xFF);
+    return Vec128<uint16_t, N>{_mm_unpacklo_epi64(lo, lo)};
+  } else {
+    const __m128i hi = _mm_shufflehi_epi16(v.raw, (0x55 * (kLane - 4)) & 0xFF);
+    return Vec128<uint16_t, N>{_mm_unpackhi_epi64(hi, hi)};
+  }
+}
+template <int kLane, size_t N>
+HWY_API Vec128<uint32_t, N> Broadcast(const Vec128<uint32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint32_t, N>{_mm_shuffle_epi32(v.raw, 0x55 * kLane)};
+}
+template <int kLane, size_t N>
+HWY_API Vec128<uint64_t, N> Broadcast(const Vec128<uint64_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<uint64_t, N>{_mm_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44)};
+}
+
+// Signed
+template <int kLane, size_t N>
+HWY_API Vec128<int16_t, N> Broadcast(const Vec128<int16_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  if (kLane < 4) {
+    const __m128i lo = _mm_shufflelo_epi16(v.raw, (0x55 * kLane) & 0xFF);
+    return Vec128<int16_t, N>{_mm_unpacklo_epi64(lo, lo)};
+  } else {
+    const __m128i hi = _mm_shufflehi_epi16(v.raw, (0x55 * (kLane - 4)) & 0xFF);
+    return Vec128<int16_t, N>{_mm_unpackhi_epi64(hi, hi)};
+  }
+}
+template <int kLane, size_t N>
+HWY_API Vec128<int32_t, N> Broadcast(const Vec128<int32_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int32_t, N>{_mm_shuffle_epi32(v.raw, 0x55 * kLane)};
+}
+template <int kLane, size_t N>
+HWY_API Vec128<int64_t, N> Broadcast(const Vec128<int64_t, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<int64_t, N>{_mm_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44)};
+}
+
+// Float
+template <int kLane, size_t N>
+HWY_API Vec128<float, N> Broadcast(const Vec128<float, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<float, N>{_mm_shuffle_ps(v.raw, v.raw, 0x55 * kLane)};
+}
+template <int kLane, size_t N>
+HWY_API Vec128<double, N> Broadcast(const Vec128<double, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return Vec128<double, N>{_mm_shuffle_pd(v.raw, v.raw, 3 * kLane)};
+}
+
+// ------------------------------ Shuffle bytes with variable indices
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes, i.e.
+// lane indices in [0, 16).
+template <typename T, size_t N>
+HWY_API Vec128<T, N> TableLookupBytes(const Vec128<T, N> bytes,
+                                      const Vec128<T, N> from) {
+  return Vec128<T, N>{_mm_shuffle_epi8(bytes.raw, from.raw)};
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let Vec128<int32_t> have lanes 3,2,1,0 (0 is least-significant).
+// Shuffle0321 rotates one lane to the right (the previous least-significant
+// lane is now most-significant). These could also be implemented via
+// CombineShiftRightBytes but the shuffle_abcd notation is more convenient.
+
+// Swap 32-bit halves in 64-bit halves.
+HWY_API Vec128<uint32_t> Shuffle2301(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{_mm_shuffle_epi32(v.raw, 0xB1)};
+}
+HWY_API Vec128<int32_t> Shuffle2301(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{_mm_shuffle_epi32(v.raw, 0xB1)};
+}
+HWY_API Vec128<float> Shuffle2301(const Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, 0xB1)};
+}
+
+// Swap 64-bit halves
+HWY_API Vec128<uint32_t> Shuffle1032(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{_mm_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec128<int32_t> Shuffle1032(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{_mm_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec128<float> Shuffle1032(const Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, 0x4E)};
+}
+HWY_API Vec128<uint64_t> Shuffle01(const Vec128<uint64_t> v) {
+  return Vec128<uint64_t>{_mm_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec128<int64_t> Shuffle01(const Vec128<int64_t> v) {
+  return Vec128<int64_t>{_mm_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec128<double> Shuffle01(const Vec128<double> v) {
+  return Vec128<double>{_mm_shuffle_pd(v.raw, v.raw, 1)};
+}
+
+// Rotate right 32 bits
+HWY_API Vec128<uint32_t> Shuffle0321(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{_mm_shuffle_epi32(v.raw, 0x39)};
+}
+HWY_API Vec128<int32_t> Shuffle0321(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{_mm_shuffle_epi32(v.raw, 0x39)};
+}
+HWY_API Vec128<float> Shuffle0321(const Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, 0x39)};
+}
+// Rotate left 32 bits
+HWY_API Vec128<uint32_t> Shuffle2103(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{_mm_shuffle_epi32(v.raw, 0x93)};
+}
+HWY_API Vec128<int32_t> Shuffle2103(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{_mm_shuffle_epi32(v.raw, 0x93)};
+}
+HWY_API Vec128<float> Shuffle2103(const Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, 0x93)};
+}
+
+// Reverse
+HWY_API Vec128<uint32_t> Shuffle0123(const Vec128<uint32_t> v) {
+  return Vec128<uint32_t>{_mm_shuffle_epi32(v.raw, 0x1B)};
+}
+HWY_API Vec128<int32_t> Shuffle0123(const Vec128<int32_t> v) {
+  return Vec128<int32_t>{_mm_shuffle_epi32(v.raw, 0x1B)};
+}
+HWY_API Vec128<float> Shuffle0123(const Vec128<float> v) {
+  return Vec128<float>{_mm_shuffle_ps(v.raw, v.raw, 0x1B)};
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T, size_t N>
+struct Indices128 {
+  __m128i raw;
+};
+
+template <typename T, size_t N, HWY_IF_LE128(T, N)>
+HWY_API Indices128<T, N> SetTableIndices(Simd<T, N> d, const int32_t* idx) {
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER)
+  for (size_t i = 0; i < N; ++i) {
+    HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast<int32_t>(N));
+  }
+#endif
+
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) uint8_t control[16] = {0};
+  for (size_t idx_lane = 0; idx_lane < N; ++idx_lane) {
+    for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) {
+      control[idx_lane * sizeof(T) + idx_byte] =
+          static_cast<uint8_t>(idx[idx_lane] * sizeof(T) + idx_byte);
+    }
+  }
+  return Indices128<T, N>{Load(d8, control).raw};
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> TableLookupLanes(
+    const Vec128<uint32_t, N> v, const Indices128<uint32_t, N> idx) {
+  return TableLookupBytes(v, Vec128<uint32_t, N>{idx.raw});
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> TableLookupLanes(const Vec128<int32_t, N> v,
+                                            const Indices128<int32_t, N> idx) {
+  return TableLookupBytes(v, Vec128<int32_t, N>{idx.raw});
+}
+template <size_t N>
+HWY_API Vec128<float, N> TableLookupLanes(const Vec128<float, N> v,
+                                          const Indices128<float, N> idx) {
+  const Simd<int32_t, N> di;
+  const Simd<float, N> df;
+  return BitCast(df,
+                 TableLookupBytes(BitCast(di, v), Vec128<int32_t, N>{idx.raw}));
+}
+
+// ------------------------------ Interleave lanes
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use ZipLower/Upper instead (also works with scalar).
+
+HWY_API Vec128<uint8_t> InterleaveLower(const Vec128<uint8_t> a,
+                                        const Vec128<uint8_t> b) {
+  return Vec128<uint8_t>{_mm_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec128<uint16_t> InterleaveLower(const Vec128<uint16_t> a,
+                                         const Vec128<uint16_t> b) {
+  return Vec128<uint16_t>{_mm_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec128<uint32_t> InterleaveLower(const Vec128<uint32_t> a,
+                                         const Vec128<uint32_t> b) {
+  return Vec128<uint32_t>{_mm_unpacklo_epi32(a.raw, b.raw)};
+}
+HWY_API Vec128<uint64_t> InterleaveLower(const Vec128<uint64_t> a,
+                                         const Vec128<uint64_t> b) {
+  return Vec128<uint64_t>{_mm_unpacklo_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec128<int8_t> InterleaveLower(const Vec128<int8_t> a,
+                                       const Vec128<int8_t> b) {
+  return Vec128<int8_t>{_mm_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec128<int16_t> InterleaveLower(const Vec128<int16_t> a,
+                                        const Vec128<int16_t> b) {
+  return Vec128<int16_t>{_mm_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec128<int32_t> InterleaveLower(const Vec128<int32_t> a,
+                                        const Vec128<int32_t> b) {
+  return Vec128<int32_t>{_mm_unpacklo_epi32(a.raw, b.raw)};
+}
+HWY_API Vec128<int64_t> InterleaveLower(const Vec128<int64_t> a,
+                                        const Vec128<int64_t> b) {
+  return Vec128<int64_t>{_mm_unpacklo_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec128<float> InterleaveLower(const Vec128<float> a,
+                                      const Vec128<float> b) {
+  return Vec128<float>{_mm_unpacklo_ps(a.raw, b.raw)};
+}
+HWY_API Vec128<double> InterleaveLower(const Vec128<double> a,
+                                       const Vec128<double> b) {
+  return Vec128<double>{_mm_unpacklo_pd(a.raw, b.raw)};
+}
+
+HWY_API Vec128<uint8_t> InterleaveUpper(const Vec128<uint8_t> a,
+                                        const Vec128<uint8_t> b) {
+  return Vec128<uint8_t>{_mm_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec128<uint16_t> InterleaveUpper(const Vec128<uint16_t> a,
+                                         const Vec128<uint16_t> b) {
+  return Vec128<uint16_t>{_mm_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec128<uint32_t> InterleaveUpper(const Vec128<uint32_t> a,
+                                         const Vec128<uint32_t> b) {
+  return Vec128<uint32_t>{_mm_unpackhi_epi32(a.raw, b.raw)};
+}
+HWY_API Vec128<uint64_t> InterleaveUpper(const Vec128<uint64_t> a,
+                                         const Vec128<uint64_t> b) {
+  return Vec128<uint64_t>{_mm_unpackhi_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec128<int8_t> InterleaveUpper(const Vec128<int8_t> a,
+                                       const Vec128<int8_t> b) {
+  return Vec128<int8_t>{_mm_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec128<int16_t> InterleaveUpper(const Vec128<int16_t> a,
+                                        const Vec128<int16_t> b) {
+  return Vec128<int16_t>{_mm_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec128<int32_t> InterleaveUpper(const Vec128<int32_t> a,
+                                        const Vec128<int32_t> b) {
+  return Vec128<int32_t>{_mm_unpackhi_epi32(a.raw, b.raw)};
+}
+HWY_API Vec128<int64_t> InterleaveUpper(const Vec128<int64_t> a,
+                                        const Vec128<int64_t> b) {
+  return Vec128<int64_t>{_mm_unpackhi_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec128<float> InterleaveUpper(const Vec128<float> a,
+                                      const Vec128<float> b) {
+  return Vec128<float>{_mm_unpackhi_ps(a.raw, b.raw)};
+}
+HWY_API Vec128<double> InterleaveUpper(const Vec128<double> a,
+                                       const Vec128<double> b) {
+  return Vec128<double>{_mm_unpackhi_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Zip lanes
+
+// Same as interleave_*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+
+template <size_t N>
+HWY_API Vec128<uint16_t, (N + 1) / 2> ZipLower(const Vec128<uint8_t, N> a,
+                                               const Vec128<uint8_t, N> b) {
+  return Vec128<uint16_t, (N + 1) / 2>{_mm_unpacklo_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, (N + 1) / 2> ZipLower(const Vec128<uint16_t, N> a,
+                                               const Vec128<uint16_t, N> b) {
+  return Vec128<uint32_t, (N + 1) / 2>{_mm_unpacklo_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, (N + 1) / 2> ZipLower(const Vec128<uint32_t, N> a,
+                                               const Vec128<uint32_t, N> b) {
+  return Vec128<uint64_t, (N + 1) / 2>{_mm_unpacklo_epi32(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, (N + 1) / 2> ZipLower(const Vec128<int8_t, N> a,
+                                              const Vec128<int8_t, N> b) {
+  return Vec128<int16_t, (N + 1) / 2>{_mm_unpacklo_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, (N + 1) / 2> ZipLower(const Vec128<int16_t, N> a,
+                                              const Vec128<int16_t, N> b) {
+  return Vec128<int32_t, (N + 1) / 2>{_mm_unpacklo_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, (N + 1) / 2> ZipLower(const Vec128<int32_t, N> a,
+                                              const Vec128<int32_t, N> b) {
+  return Vec128<int64_t, (N + 1) / 2>{_mm_unpacklo_epi32(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint16_t, (N + 1) / 2> ZipUpper(const Vec128<uint8_t, N> a,
+                                               const Vec128<uint8_t, N> b) {
+  return Vec128<uint16_t, (N + 1) / 2>{_mm_unpackhi_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, (N + 1) / 2> ZipUpper(const Vec128<uint16_t, N> a,
+                                               const Vec128<uint16_t, N> b) {
+  return Vec128<uint32_t, (N + 1) / 2>{_mm_unpackhi_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, (N + 1) / 2> ZipUpper(const Vec128<uint32_t, N> a,
+                                               const Vec128<uint32_t, N> b) {
+  return Vec128<uint64_t, (N + 1) / 2>{_mm_unpackhi_epi32(a.raw, b.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, (N + 1) / 2> ZipUpper(const Vec128<int8_t, N> a,
+                                              const Vec128<int8_t, N> b) {
+  return Vec128<int16_t, (N + 1) / 2>{_mm_unpackhi_epi8(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, (N + 1) / 2> ZipUpper(const Vec128<int16_t, N> a,
+                                              const Vec128<int16_t, N> b) {
+  return Vec128<int32_t, (N + 1) / 2>{_mm_unpackhi_epi16(a.raw, b.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, (N + 1) / 2> ZipUpper(const Vec128<int32_t, N> a,
+                                              const Vec128<int32_t, N> b) {
+  return Vec128<int64_t, (N + 1) / 2>{_mm_unpackhi_epi32(a.raw, b.raw)};
+}
+
+// ------------------------------ Blocks
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <typename T>
+HWY_API Vec128<T> ConcatLowerLower(const Vec128<T> hi, const Vec128<T> lo) {
+  const Full128<uint64_t> d64;
+  return BitCast(Full128<T>(),
+                 InterleaveLower(BitCast(d64, lo), BitCast(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <typename T>
+HWY_API Vec128<T> ConcatUpperUpper(const Vec128<T> hi, const Vec128<T> lo) {
+  const Full128<uint64_t> d64;
+  return BitCast(Full128<T>(),
+                 InterleaveUpper(BitCast(d64, lo), BitCast(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves)
+template <typename T>
+HWY_API Vec128<T> ConcatLowerUpper(const Vec128<T> hi, const Vec128<T> lo) {
+  return CombineShiftRightBytes<8>(hi, lo);
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <typename T>
+HWY_API Vec128<T> ConcatUpperLower(const Vec128<T> hi, const Vec128<T> lo) {
+  return Vec128<T>{_mm_blend_epi16(hi.raw, lo.raw, 0x0F)};
+}
+template <>
+HWY_INLINE Vec128<float> ConcatUpperLower(const Vec128<float> hi,
+                                          const Vec128<float> lo) {
+  return Vec128<float>{_mm_blend_ps(hi.raw, lo.raw, 3)};
+}
+template <>
+HWY_INLINE Vec128<double> ConcatUpperLower(const Vec128<double> hi,
+                                           const Vec128<double> lo) {
+  return Vec128<double>{_mm_blend_pd(hi.raw, lo.raw, 1)};
+}
+
+// ------------------------------ OddEven (IfThenElse)
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEven(hwy::SizeTag<1> /* tag */, const Vec128<T, N> a,
+                             const Vec128<T, N> b) {
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  alignas(16) constexpr uint8_t mask[16] = {0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0,
+                                            0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0};
+  return IfThenElse(MaskFromVec(BitCast(d, Load(d8, mask))), b, a);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEven(hwy::SizeTag<2> /* tag */, const Vec128<T, N> a,
+                             const Vec128<T, N> b) {
+  return Vec128<T, N>{_mm_blend_epi16(a.raw, b.raw, 0x55)};
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEven(hwy::SizeTag<4> /* tag */, const Vec128<T, N> a,
+                             const Vec128<T, N> b) {
+  return Vec128<T, N>{_mm_blend_epi16(a.raw, b.raw, 0x33)};
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEven(hwy::SizeTag<8> /* tag */, const Vec128<T, N> a,
+                             const Vec128<T, N> b) {
+  return Vec128<T, N>{_mm_blend_epi16(a.raw, b.raw, 0x0F)};
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> OddEven(const Vec128<T, N> a, const Vec128<T, N> b) {
+  return detail::OddEven(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+template <size_t N>
+HWY_INLINE Vec128<float, N> OddEven(const Vec128<float, N> a,
+                                    const Vec128<float, N> b) {
+  return Vec128<float, N>{_mm_blend_ps(a.raw, b.raw, 5)};
+}
+
+template <size_t N>
+HWY_INLINE Vec128<double, N> OddEven(const Vec128<double, N> a,
+                                     const Vec128<double, N> b) {
+  return Vec128<double, N>{_mm_blend_pd(a.raw, b.raw, 1)};
+}
+
+// ------------------------------ Shl (ZipLower, Mul)
+
+// Use AVX2/3 variable shifts where available, otherwise multiply by powers of
+// two from loading float exponents, which is considerably faster (according
+// to LLVM-MCA) than scalar or testing bits: https://gcc.godbolt.org/z/9G7Y9v.
+
+#if HWY_TARGET != HWY_AVX3
+namespace detail {
+
+// Returns 2^v for use as per-lane multipliers to emulate 16-bit shifts.
+template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 2)>
+HWY_API Vec128<MakeUnsigned<T>, N> Pow2(const Vec128<T, N> v) {
+  const Simd<T, N> d;
+  const Repartition<float, decltype(d)> df;
+  const auto zero = Zero(d);
+  // Move into exponent (this u16 will become the upper half of an f32)
+  const auto exp = ShiftLeft<23 - 16>(v);
+  const auto upper = exp + Set(d, 0x3F80);  // upper half of 1.0f
+  // Insert 0 into lower halves for reinterpreting as binary32.
+  const auto f0 = ZipLower(zero, upper);
+  const auto f1 = ZipUpper(zero, upper);
+  // See comment below.
+  const Vec128<int32_t, N> bits0{_mm_cvtps_epi32(BitCast(df, f0).raw)};
+  const Vec128<int32_t, N> bits1{_mm_cvtps_epi32(BitCast(df, f1).raw)};
+  return Vec128<MakeUnsigned<T>, N>{_mm_packus_epi32(bits0.raw, bits1.raw)};
+}
+
+// Same, for 32-bit shifts.
+template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 4)>
+HWY_API Vec128<MakeUnsigned<T>, N> Pow2(const Vec128<T, N> v) {
+  const Simd<T, N> d;
+  const auto exp = ShiftLeft<23>(v);
+  const auto f = exp + Set(d, 0x3F800000);  // 1.0f
+  // Do not use ConvertTo because we rely on the native 0x80..00 overflow
+  // behavior. cvt instead of cvtt should be equivalent, but avoids test
+  // failure under GCC 10.2.1.
+  return Vec128<MakeUnsigned<T>, N>{_mm_cvtps_epi32(_mm_castsi128_ps(f.raw))};
+}
+
+}  // namespace detail
+#endif  // HWY_TARGET != HWY_AVX3
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator<<(const Vec128<uint16_t, N> v,
+                                       const Vec128<uint16_t, N> bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<uint16_t, N>{_mm_sllv_epi16(v.raw, bits.raw)};
+#else
+  return v * detail::Pow2(bits);
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator<<(const Vec128<uint32_t, N> v,
+                                       const Vec128<uint32_t, N> bits) {
+#if HWY_TARGET == HWY_SSE4
+  return v * detail::Pow2(bits);
+#else
+  return Vec128<uint32_t, N>{_mm_sllv_epi32(v.raw, bits.raw)};
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N> operator<<(const Vec128<uint64_t, N> v,
+                                       const Vec128<uint64_t, N> bits) {
+#if HWY_TARGET == HWY_SSE4
+  // Individual shifts and combine
+  const __m128i out0 = _mm_sll_epi64(v.raw, bits.raw);
+  const __m128i bits1 = _mm_unpackhi_epi64(bits.raw, bits.raw);
+  const __m128i out1 = _mm_sll_epi64(v.raw, bits1);
+  return Vec128<uint64_t, N>{_mm_blend_epi16(out0, out1, 0xF0)};
+#else
+  return Vec128<uint64_t, N>{_mm_sllv_epi64(v.raw, bits.raw)};
+#endif
+}
+
+// Signed left shift is the same as unsigned.
+template <typename T, size_t N, HWY_IF_SIGNED(T)>
+HWY_API Vec128<T, N> operator<<(const Vec128<T, N> v, const Vec128<T, N> bits) {
+  const Simd<T, N> di;
+  const Simd<MakeUnsigned<T>, N> du;
+  return BitCast(di, BitCast(du, v) << BitCast(du, bits));
+}
+
+// ------------------------------ Shr (mul, mask, BroadcastSignBit)
+
+// Use AVX2+ variable shifts except for the SSE4 target or 16-bit. There, we use
+// widening multiplication by powers of two obtained by loading float exponents,
+// followed by a constant right-shift. This is still faster than a scalar or
+// bit-test approach: https://gcc.godbolt.org/z/9G7Y9v.
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> operator>>(const Vec128<uint16_t, N> in,
+                                       const Vec128<uint16_t, N> bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<uint16_t, N>{_mm_srlv_epi16(in.raw, bits.raw)};
+#else
+  const Simd<uint16_t, N> d;
+  // For bits=0, we cannot mul by 2^16, so fix the result later.
+  const auto out = MulHigh(in, detail::Pow2(Set(d, 16) - bits));
+  // Replace output with input where bits == 0.
+  return IfThenElse(bits == Zero(d), in, out);
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<uint32_t, N> operator>>(const Vec128<uint32_t, N> in,
+                                       const Vec128<uint32_t, N> bits) {
+#if HWY_TARGET == HWY_SSE4
+  // 32x32 -> 64 bit mul, then shift right by 32.
+  const Simd<uint32_t, N> d32;
+  // Move odd lanes into position for the second mul. Shuffle more gracefully
+  // handles N=1 than repartitioning to u64 and shifting 32 bits right.
+  const Vec128<uint32_t, N> in31{_mm_shuffle_epi32(in.raw, 0x31)};
+  // For bits=0, we cannot mul by 2^32, so fix the result later.
+  const auto mul = detail::Pow2(Set(d32, 32) - bits);
+  const auto out20 = ShiftRight<32>(MulEven(in, mul));  // z 2 z 0
+  const Vec128<uint32_t, N> mul31{_mm_shuffle_epi32(mul.raw, 0x31)};
+  // No need to shift right, already in the correct position.
+  const auto out31 = MulEven(in31, mul31);  // 3 ? 1 ?
+  // OddEven is defined below, avoid the dependency.
+  const Vec128<uint32_t, N> out{_mm_blend_epi16(out31.raw, out20.raw, 0x33)};
+  // Replace output with input where bits == 0.
+  return IfThenElse(bits == Zero(d32), in, out);
+#else
+  return Vec128<uint32_t, N>{_mm_srlv_epi32(in.raw, bits.raw)};
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<uint64_t, N> operator>>(const Vec128<uint64_t, N> v,
+                                       const Vec128<uint64_t, N> bits) {
+#if HWY_TARGET == HWY_SSE4
+  // Individual shifts and combine
+  const __m128i out0 = _mm_srl_epi64(v.raw, bits.raw);
+  const __m128i bits1 = _mm_unpackhi_epi64(bits.raw, bits.raw);
+  const __m128i out1 = _mm_srl_epi64(v.raw, bits1);
+  return Vec128<uint64_t, N>{_mm_blend_epi16(out0, out1, 0xF0)};
+#else
+  return Vec128<uint64_t, N>{_mm_srlv_epi64(v.raw, bits.raw)};
+#endif
+}
+
+#if HWY_TARGET != HWY_AVX3
+namespace detail {
+
+// Also used in x86_256-inl.h.
+template <class DI, class V>
+HWY_API V SignedShr(const DI di, const V v, const V count_i) {
+  const RebindToUnsigned<DI> du;
+  const auto count = BitCast(du, count_i);  // same type as value to shift
+  // Clear sign and restore afterwards. This is preferable to shifting the MSB
+  // downwards because Shr is somewhat more expensive than Shl.
+  const auto sign = BroadcastSignBit(v);
+  const auto abs = BitCast(du, v ^ sign);  // off by one, but fixed below
+  return BitCast(di, abs >> count) ^ sign;
+}
+
+}  // namespace detail
+#endif  // HWY_TARGET != HWY_AVX3
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> operator>>(const Vec128<int16_t, N> v,
+                                      const Vec128<int16_t, N> bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<int16_t, N>{_mm_srav_epi16(v.raw, bits.raw)};
+#else
+  return detail::SignedShr(Simd<int16_t, N>(), v, bits);
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> operator>>(const Vec128<int32_t, N> v,
+                                      const Vec128<int32_t, N> bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<int32_t, N>{_mm_srav_epi32(v.raw, bits.raw)};
+#else
+  return detail::SignedShr(Simd<int32_t, N>(), v, bits);
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> operator>>(const Vec128<int64_t, N> v,
+                                      const Vec128<int64_t, N> bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec128<int64_t, N>{_mm_srav_epi64(v.raw, bits.raw)};
+#else
+  return detail::SignedShr(Simd<int64_t, N>(), v, bits);
+#endif
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+// Unsigned: zero-extend.
+template <size_t N>
+HWY_API Vec128<uint16_t, N> PromoteTo(Simd<uint16_t, N> /* tag */,
+                                      const Vec128<uint8_t, N> v) {
+  return Vec128<uint16_t, N>{_mm_cvtepu8_epi16(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> PromoteTo(Simd<uint32_t, N> /* tag */,
+                                      const Vec128<uint8_t, N> v) {
+  return Vec128<uint32_t, N>{_mm_cvtepu8_epi32(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int16_t, N> PromoteTo(Simd<int16_t, N> /* tag */,
+                                     const Vec128<uint8_t, N> v) {
+  return Vec128<int16_t, N>{_mm_cvtepu8_epi16(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                     const Vec128<uint8_t, N> v) {
+  return Vec128<int32_t, N>{_mm_cvtepu8_epi32(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint32_t, N> PromoteTo(Simd<uint32_t, N> /* tag */,
+                                      const Vec128<uint16_t, N> v) {
+  return Vec128<uint32_t, N>{_mm_cvtepu16_epi32(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                     const Vec128<uint16_t, N> v) {
+  return Vec128<int32_t, N>{_mm_cvtepu16_epi32(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<uint64_t, N> PromoteTo(Simd<uint64_t, N> /* tag */,
+                                      const Vec128<uint32_t, N> v) {
+  return Vec128<uint64_t, N>{_mm_cvtepu32_epi64(v.raw)};
+}
+
+// Signed: replicate sign bit.
+template <size_t N>
+HWY_API Vec128<int16_t, N> PromoteTo(Simd<int16_t, N> /* tag */,
+                                     const Vec128<int8_t, N> v) {
+  return Vec128<int16_t, N>{_mm_cvtepi8_epi16(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                     const Vec128<int8_t, N> v) {
+  return Vec128<int32_t, N>{_mm_cvtepi8_epi32(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int32_t, N> PromoteTo(Simd<int32_t, N> /* tag */,
+                                     const Vec128<int16_t, N> v) {
+  return Vec128<int32_t, N>{_mm_cvtepi16_epi32(v.raw)};
+}
+template <size_t N>
+HWY_API Vec128<int64_t, N> PromoteTo(Simd<int64_t, N> /* tag */,
+                                     const Vec128<int32_t, N> v) {
+  return Vec128<int64_t, N>{_mm_cvtepi32_epi64(v.raw)};
+}
+
+// Workaround for origin tracking bug in Clang msan prior to 11.0
+// (spurious "uninitialized memory" for TestF16 with "ORIGIN: invalid")
+#if defined(MEMORY_SANITIZER) && \
+    (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1100)
+#define HWY_INLINE_F16 HWY_NOINLINE
+#else
+#define HWY_INLINE_F16 HWY_INLINE
+#endif
+template <size_t N>
+HWY_INLINE_F16 Vec128<float, N> PromoteTo(Simd<float, N> /* tag */,
+                                          const Vec128<float16_t, N> v) {
+#if HWY_TARGET == HWY_SSE4
+  const Simd<int32_t, N> di32;
+  const Simd<uint32_t, N> du32;
+  const Simd<float, N> df32;
+  // Expand to u32 so we can shift.
+  const auto bits16 = PromoteTo(du32, Vec128<uint16_t, N>{v.raw});
+  const auto sign = ShiftRight<15>(bits16);
+  const auto biased_exp = ShiftRight<10>(bits16) & Set(du32, 0x1F);
+  const auto mantissa = bits16 & Set(du32, 0x3FF);
+  const auto subnormal =
+      BitCast(du32, ConvertTo(df32, BitCast(di32, mantissa)) *
+                        Set(df32, 1.0f / 16384 / 1024));
+
+  const auto biased_exp32 = biased_exp + Set(du32, 127 - 15);
+  const auto mantissa32 = ShiftLeft<23 - 10>(mantissa);
+  const auto normal = ShiftLeft<23>(biased_exp32) | mantissa32;
+  const auto bits32 = IfThenElse(biased_exp == Zero(du32), subnormal, normal);
+  return BitCast(df32, ShiftLeft<31>(sign) | bits32);
+#else
+  return Vec128<float, N>{_mm_cvtph_ps(v.raw)};
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> PromoteTo(Simd<double, N> /* tag */,
+                                    const Vec128<float, N> v) {
+  return Vec128<double, N>{_mm_cvtps_pd(v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> PromoteTo(Simd<double, N> /* tag */,
+                                    const Vec128<int32_t, N> v) {
+  return Vec128<double, N>{_mm_cvtepi32_pd(v.raw)};
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template <size_t N>
+HWY_API Vec128<uint16_t, N> DemoteTo(Simd<uint16_t, N> /* tag */,
+                                     const Vec128<int32_t, N> v) {
+  return Vec128<uint16_t, N>{_mm_packus_epi32(v.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int16_t, N> DemoteTo(Simd<int16_t, N> /* tag */,
+                                    const Vec128<int32_t, N> v) {
+  return Vec128<int16_t, N>{_mm_packs_epi32(v.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> DemoteTo(Simd<uint8_t, N> /* tag */,
+                                    const Vec128<int32_t, N> v) {
+  const __m128i u16 = _mm_packus_epi32(v.raw, v.raw);
+  // packus treats the input as signed; we want unsigned. Clear the MSB to get
+  // unsigned saturation to u8.
+  const __m128i i16 = _mm_and_si128(u16, _mm_set1_epi16(0x7FFF));
+  return Vec128<uint8_t, N>{_mm_packus_epi16(i16, i16)};
+}
+
+template <size_t N>
+HWY_API Vec128<uint8_t, N> DemoteTo(Simd<uint8_t, N> /* tag */,
+                                    const Vec128<int16_t, N> v) {
+  return Vec128<uint8_t, N>{_mm_packus_epi16(v.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> DemoteTo(Simd<int8_t, N> /* tag */,
+                                   const Vec128<int32_t, N> v) {
+  const __m128i i16 = _mm_packs_epi32(v.raw, v.raw);
+  return Vec128<int8_t, N>{_mm_packs_epi16(i16, i16)};
+}
+
+template <size_t N>
+HWY_API Vec128<int8_t, N> DemoteTo(Simd<int8_t, N> /* tag */,
+                                   const Vec128<int16_t, N> v) {
+  return Vec128<int8_t, N>{_mm_packs_epi16(v.raw, v.raw)};
+}
+
+template <size_t N>
+HWY_INLINE Vec128<float16_t, N> DemoteTo(Simd<float16_t, N> /* tag */,
+                                         const Vec128<float, N> v) {
+#if HWY_TARGET == HWY_SSE4
+  const Simd<int32_t, N> di;
+  const Simd<uint32_t, N> du;
+  const Simd<uint16_t, N> du16;
+  const Simd<float16_t, N> df16;
+  const auto bits32 = BitCast(du, v);
+  const auto sign = ShiftRight<31>(bits32);
+  const auto biased_exp32 = ShiftRight<23>(bits32) & Set(du, 0xFF);
+  const auto mantissa32 = bits32 & Set(du, 0x7FFFFF);
+
+  const auto k15 = Set(di, 15);
+  const auto exp = Min(BitCast(di, biased_exp32) - Set(di, 127), k15);
+  const auto is_tiny = exp < Set(di, -24);
+
+  const auto is_subnormal = exp < Set(di, -14);
+  const auto biased_exp16 =
+      BitCast(du, IfThenZeroElse(is_subnormal, exp + k15));
+  const auto sub_exp = BitCast(du, Set(di, -14) - exp);  // [1, 11)
+  const auto sub_m = (Set(du, 1) << (Set(du, 10) - sub_exp)) +
+                     (mantissa32 >> (Set(du, 13) + sub_exp));
+  const auto mantissa16 = IfThenElse(RebindMask(du, is_subnormal), sub_m,
+                                     ShiftRight<13>(mantissa32));  // <1024
+
+  const auto sign16 = ShiftLeft<15>(sign);
+  const auto normal16 = sign16 | ShiftLeft<10>(biased_exp16) | mantissa16;
+  const auto bits16 = IfThenZeroElse(is_tiny, BitCast(di, normal16));
+  return BitCast(df16, DemoteTo(du16, bits16));
+#else
+  return Vec128<float16_t, N>{_mm_cvtps_ph(v.raw, _MM_FROUND_NO_EXC)};
+#endif
+}
+
+template <size_t N>
+HWY_INLINE Vec128<float, N> DemoteTo(Simd<float, N> /* tag */,
+                                     const Vec128<double, N> v) {
+  return Vec128<float, N>{_mm_cvtpd_ps(v.raw)};
+}
+
+namespace detail {
+
+// For well-defined float->int demotion in all x86_*-inl.h.
+
+template <size_t N>
+HWY_API auto ClampF64ToI32Max(Simd<double, N> d, decltype(Zero(d)) v)
+    -> decltype(Zero(d)) {
+  // The max can be exactly represented in binary64, so clamping beforehand
+  // prevents x86 conversion from raising an exception and returning 80..00.
+  return Min(v, Set(d, 2147483647.0));
+}
+
+// For ConvertTo float->int of same size, clamping before conversion would
+// change the result because the max integer value is not exactly representable.
+// Instead detect the overflow result after conversion and fix it.
+template <typename TI, size_t N, class DF = Simd<MakeFloat<TI>, N>>
+HWY_API auto FixConversionOverflow(Simd<TI, N> di,
+                                   decltype(Zero(DF())) original,
+                                   decltype(Zero(di).raw) converted_raw)
+    -> decltype(Zero(di)) {
+  // Combinations of original and output sign:
+  //   --: normal <0 or -huge_val to 80..00: OK
+  //   -+: -0 to 0                         : OK
+  //   +-: +huge_val to 80..00             : xor with FF..FF to get 7F..FF
+  //   ++: normal >0                       : OK
+  const auto converted = decltype(Zero(di)){converted_raw};
+  const auto sign_wrong = AndNot(BitCast(di, original), converted);
+  return BitCast(di, Xor(converted, BroadcastSignBit(sign_wrong)));
+}
+
+}  // namespace detail
+
+template <size_t N>
+HWY_INLINE Vec128<int32_t, N> DemoteTo(Simd<int32_t, N> /* tag */,
+                                       const Vec128<double, N> v) {
+  const auto clamped = detail::ClampF64ToI32Max(Simd<double, N>(), v);
+  return Vec128<int32_t, N>{_mm_cvttpd_epi32(clamped.raw)};
+}
+
+// For already range-limited input [0, 255].
+template <size_t N>
+HWY_API Vec128<uint8_t, N> U8FromU32(const Vec128<uint32_t, N> v) {
+  const Simd<uint32_t, N> d32;
+  const Simd<uint8_t, N * 4> d8;
+  alignas(16) static constexpr uint32_t k8From32[4] = {
+      0x0C080400u, 0x0C080400u, 0x0C080400u, 0x0C080400u};
+  // Also replicate bytes into all 32 bit lanes for safety.
+  const auto quad = TableLookupBytes(v, Load(d32, k8From32));
+  return LowerHalf(LowerHalf(BitCast(d8, quad)));
+}
+
+// ------------------------------ Integer <=> fp (ShiftRight, OddEven)
+
+template <size_t N>
+HWY_API Vec128<float, N> ConvertTo(Simd<float, N> /* tag */,
+                                   const Vec128<int32_t, N> v) {
+  return Vec128<float, N>{_mm_cvtepi32_ps(v.raw)};
+}
+
+template <size_t N>
+HWY_API Vec128<double, N> ConvertTo(Simd<double, N> dd,
+                                    const Vec128<int64_t, N> v) {
+#if HWY_TARGET == HWY_AVX3
+  (void)dd;
+  return Vec128<double, N>{_mm_cvtepi64_pd(v.raw)};
+#else
+  // Based on wim's approach (https://stackoverflow.com/questions/41144668/)
+  const Repartition<uint32_t, decltype(dd)> d32;
+  const Repartition<uint64_t, decltype(dd)> d64;
+
+  // Toggle MSB of lower 32-bits and insert exponent for 2^84 + 2^63
+  const auto k84_63 = Set(d64, 0x4530000080000000ULL);
+  const auto v_upper = BitCast(dd, ShiftRight<32>(BitCast(d64, v)) ^ k84_63);
+
+  // Exponent is 2^52, lower 32 bits from v (=> 32-bit OddEven)
+  const auto k52 = Set(d32, 0x43300000);
+  const auto v_lower = BitCast(dd, OddEven(k52, BitCast(d32, v)));
+
+  const auto k84_63_52 = BitCast(dd, Set(d64, 0x4530000080100000ULL));
+  return (v_upper - k84_63_52) + v_lower;  // order matters!
+#endif
+}
+
+// Truncates (rounds toward zero).
+template <size_t N>
+HWY_API Vec128<int32_t, N> ConvertTo(const Simd<int32_t, N> di,
+                                     const Vec128<float, N> v) {
+  return detail::FixConversionOverflow(di, v, _mm_cvttps_epi32(v.raw));
+}
+
+template <size_t N>
+HWY_API Vec128<int64_t, N> ConvertTo(Simd<int64_t, N> di,
+                                     const Vec128<double, N> v) {
+#if HWY_TARGET == HWY_AVX3
+  return detail::FixConversionOverflow(di, v, _mm_cvttpd_epi64(v.raw));
+#else
+  alignas(16) double lanes_d[2];
+  Store(v, Simd<double, N>(), lanes_d);
+  alignas(16) int64_t lanes_i[2];
+  for (size_t i = 0; i < N; ++i) {
+    if (lanes_d[i] >= static_cast<double>(LimitsMax<int64_t>())) {
+      lanes_i[i] = LimitsMax<int64_t>();
+    } else if (lanes_d[i] <= static_cast<double>(LimitsMin<int64_t>())) {
+      lanes_i[i] = LimitsMin<int64_t>();
+    } else {
+      lanes_i[i] = static_cast<int64_t>(lanes_d[i]);
+    }
+  }
+  return Load(di, lanes_i);
+#endif
+}
+
+template <size_t N>
+HWY_API Vec128<int32_t, N> NearestInt(const Vec128<float, N> v) {
+  const Simd<int32_t, N> di;
+  return detail::FixConversionOverflow(di, v, _mm_cvtps_epi32(v.raw));
+}
+
+// ================================================== MISC
+
+// Returns a vector with lane i=[0, N) set to "first" + i.
+template <typename T, size_t N, typename T2, HWY_IF_LE128(T, N)>
+Vec128<T, N> Iota(const Simd<T, N> d, const T2 first) {
+  HWY_ALIGN T lanes[16 / sizeof(T)];
+  for (size_t i = 0; i < 16 / sizeof(T); ++i) {
+    lanes[i] = static_cast<T>(first + static_cast<T2>(i));
+  }
+  return Load(d, lanes);
+}
+
+// ------------------------------ Mask
+
+namespace detail {
+
+constexpr HWY_INLINE uint64_t U64FromInt(int bits) {
+  return static_cast<uint64_t>(static_cast<unsigned>(bits));
+}
+
+template <typename T, size_t N>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/,
+                              const Mask128<T, N> mask) {
+  const Simd<T, N> d;
+  const auto sign_bits = BitCast(d, VecFromMask(d, mask)).raw;
+  return U64FromInt(_mm_movemask_epi8(sign_bits));
+}
+
+template <typename T, size_t N>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/,
+                              const Mask128<T, N> mask) {
+  // Remove useless lower half of each u16 while preserving the sign bit.
+  const auto sign_bits = _mm_packs_epi16(mask.raw, _mm_setzero_si128());
+  return U64FromInt(_mm_movemask_epi8(sign_bits));
+}
+
+template <typename T, size_t N>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/,
+                              const Mask128<T, N> mask) {
+  const Simd<T, N> d;
+  const Simd<float, N> df;
+  const auto sign_bits = BitCast(df, VecFromMask(d, mask));
+  return U64FromInt(_mm_movemask_ps(sign_bits.raw));
+}
+
+template <typename T, size_t N>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<8> /*tag*/,
+                              const Mask128<T, N> mask) {
+  const Simd<T, N> d;
+  const Simd<double, N> df;
+  const auto sign_bits = BitCast(df, VecFromMask(d, mask));
+  return U64FromInt(_mm_movemask_pd(sign_bits.raw));
+}
+
+// Returns the lowest N of the _mm_movemask* bits.
+template <typename T, size_t N>
+constexpr uint64_t OnlyActive(uint64_t bits) {
+  return ((N * sizeof(T)) == 16) ? bits : bits & ((1ull << N) - 1);
+}
+
+template <typename T, size_t N>
+HWY_API uint64_t BitsFromMask(const Mask128<T, N> mask) {
+  return OnlyActive<T, N>(BitsFromMask(hwy::SizeTag<sizeof(T)>(), mask));
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_INLINE size_t StoreMaskBits(const Mask128<T, N> mask, uint8_t* p) {
+  const uint64_t bits = detail::BitsFromMask(mask);
+  const size_t kNumBytes = (N + 7)/8;
+  CopyBytes<kNumBytes>(&bits, p);
+  return kNumBytes;
+}
+
+template <typename T, size_t N>
+HWY_API bool AllFalse(const Mask128<T, N> mask) {
+  // Cheaper than PTEST, which is 2 uop / 3L.
+  return detail::BitsFromMask(mask) == 0;
+}
+
+template <typename T, size_t N>
+HWY_API bool AllTrue(const Mask128<T, N> mask) {
+  constexpr uint64_t kAllBits =
+      detail::OnlyActive<T, N>((1ull << (16 / sizeof(T))) - 1);
+  return detail::BitsFromMask(mask) == kAllBits;
+}
+
+template <typename T, size_t N>
+HWY_API size_t CountTrue(const Mask128<T, N> mask) {
+  return PopCount(detail::BitsFromMask(mask));
+}
+
+// ------------------------------ Compress
+
+namespace detail {
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Idx16x8FromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Simd<T, N> d;
+  const Rebind<uint8_t, decltype(d)> d8;
+  const Simd<uint16_t, N> du;
+
+  // compress_epi16 requires VBMI2 and there is no permutevar_epi16, so we need
+  // byte indices for PSHUFB (one vector's worth for each of 256 combinations of
+  // 8 mask bits). Loading them directly would require 4 KiB. We can instead
+  // store lane indices and convert to byte indices (2*lane + 0..1), with the
+  // doubling baked into the table. AVX2 Compress32 stores eight 4-bit lane
+  // indices (total 1 KiB), broadcasts them into each 32-bit lane and shifts.
+  // Here, 16-bit lanes are too narrow to hold all bits, and unpacking nibbles
+  // is likely more costly than the higher cache footprint from storing bytes.
+  alignas(16) constexpr uint8_t table[256 * 8] = {
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0,
+      0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  4,  0,  0,  0,
+      0,  0,  0,  0,  0,  4,  0,  0,  0,  0,  0,  0,  2,  4,  0,  0,  0,  0,
+      0,  0,  0,  2,  4,  0,  0,  0,  0,  0,  6,  0,  0,  0,  0,  0,  0,  0,
+      0,  6,  0,  0,  0,  0,  0,  0,  2,  6,  0,  0,  0,  0,  0,  0,  0,  2,
+      6,  0,  0,  0,  0,  0,  4,  6,  0,  0,  0,  0,  0,  0,  0,  4,  6,  0,
+      0,  0,  0,  0,  2,  4,  6,  0,  0,  0,  0,  0,  0,  2,  4,  6,  0,  0,
+      0,  0,  8,  0,  0,  0,  0,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  0,
+      2,  8,  0,  0,  0,  0,  0,  0,  0,  2,  8,  0,  0,  0,  0,  0,  4,  8,
+      0,  0,  0,  0,  0,  0,  0,  4,  8,  0,  0,  0,  0,  0,  2,  4,  8,  0,
+      0,  0,  0,  0,  0,  2,  4,  8,  0,  0,  0,  0,  6,  8,  0,  0,  0,  0,
+      0,  0,  0,  6,  8,  0,  0,  0,  0,  0,  2,  6,  8,  0,  0,  0,  0,  0,
+      0,  2,  6,  8,  0,  0,  0,  0,  4,  6,  8,  0,  0,  0,  0,  0,  0,  4,
+      6,  8,  0,  0,  0,  0,  2,  4,  6,  8,  0,  0,  0,  0,  0,  2,  4,  6,
+      8,  0,  0,  0,  10, 0,  0,  0,  0,  0,  0,  0,  0,  10, 0,  0,  0,  0,
+      0,  0,  2,  10, 0,  0,  0,  0,  0,  0,  0,  2,  10, 0,  0,  0,  0,  0,
+      4,  10, 0,  0,  0,  0,  0,  0,  0,  4,  10, 0,  0,  0,  0,  0,  2,  4,
+      10, 0,  0,  0,  0,  0,  0,  2,  4,  10, 0,  0,  0,  0,  6,  10, 0,  0,
+      0,  0,  0,  0,  0,  6,  10, 0,  0,  0,  0,  0,  2,  6,  10, 0,  0,  0,
+      0,  0,  0,  2,  6,  10, 0,  0,  0,  0,  4,  6,  10, 0,  0,  0,  0,  0,
+      0,  4,  6,  10, 0,  0,  0,  0,  2,  4,  6,  10, 0,  0,  0,  0,  0,  2,
+      4,  6,  10, 0,  0,  0,  8,  10, 0,  0,  0,  0,  0,  0,  0,  8,  10, 0,
+      0,  0,  0,  0,  2,  8,  10, 0,  0,  0,  0,  0,  0,  2,  8,  10, 0,  0,
+      0,  0,  4,  8,  10, 0,  0,  0,  0,  0,  0,  4,  8,  10, 0,  0,  0,  0,
+      2,  4,  8,  10, 0,  0,  0,  0,  0,  2,  4,  8,  10, 0,  0,  0,  6,  8,
+      10, 0,  0,  0,  0,  0,  0,  6,  8,  10, 0,  0,  0,  0,  2,  6,  8,  10,
+      0,  0,  0,  0,  0,  2,  6,  8,  10, 0,  0,  0,  4,  6,  8,  10, 0,  0,
+      0,  0,  0,  4,  6,  8,  10, 0,  0,  0,  2,  4,  6,  8,  10, 0,  0,  0,
+      0,  2,  4,  6,  8,  10, 0,  0,  12, 0,  0,  0,  0,  0,  0,  0,  0,  12,
+      0,  0,  0,  0,  0,  0,  2,  12, 0,  0,  0,  0,  0,  0,  0,  2,  12, 0,
+      0,  0,  0,  0,  4,  12, 0,  0,  0,  0,  0,  0,  0,  4,  12, 0,  0,  0,
+      0,  0,  2,  4,  12, 0,  0,  0,  0,  0,  0,  2,  4,  12, 0,  0,  0,  0,
+      6,  12, 0,  0,  0,  0,  0,  0,  0,  6,  12, 0,  0,  0,  0,  0,  2,  6,
+      12, 0,  0,  0,  0,  0,  0,  2,  6,  12, 0,  0,  0,  0,  4,  6,  12, 0,
+      0,  0,  0,  0,  0,  4,  6,  12, 0,  0,  0,  0,  2,  4,  6,  12, 0,  0,
+      0,  0,  0,  2,  4,  6,  12, 0,  0,  0,  8,  12, 0,  0,  0,  0,  0,  0,
+      0,  8,  12, 0,  0,  0,  0,  0,  2,  8,  12, 0,  0,  0,  0,  0,  0,  2,
+      8,  12, 0,  0,  0,  0,  4,  8,  12, 0,  0,  0,  0,  0,  0,  4,  8,  12,
+      0,  0,  0,  0,  2,  4,  8,  12, 0,  0,  0,  0,  0,  2,  4,  8,  12, 0,
+      0,  0,  6,  8,  12, 0,  0,  0,  0,  0,  0,  6,  8,  12, 0,  0,  0,  0,
+      2,  6,  8,  12, 0,  0,  0,  0,  0,  2,  6,  8,  12, 0,  0,  0,  4,  6,
+      8,  12, 0,  0,  0,  0,  0,  4,  6,  8,  12, 0,  0,  0,  2,  4,  6,  8,
+      12, 0,  0,  0,  0,  2,  4,  6,  8,  12, 0,  0,  10, 12, 0,  0,  0,  0,
+      0,  0,  0,  10, 12, 0,  0,  0,  0,  0,  2,  10, 12, 0,  0,  0,  0,  0,
+      0,  2,  10, 12, 0,  0,  0,  0,  4,  10, 12, 0,  0,  0,  0,  0,  0,  4,
+      10, 12, 0,  0,  0,  0,  2,  4,  10, 12, 0,  0,  0,  0,  0,  2,  4,  10,
+      12, 0,  0,  0,  6,  10, 12, 0,  0,  0,  0,  0,  0,  6,  10, 12, 0,  0,
+      0,  0,  2,  6,  10, 12, 0,  0,  0,  0,  0,  2,  6,  10, 12, 0,  0,  0,
+      4,  6,  10, 12, 0,  0,  0,  0,  0,  4,  6,  10, 12, 0,  0,  0,  2,  4,
+      6,  10, 12, 0,  0,  0,  0,  2,  4,  6,  10, 12, 0,  0,  8,  10, 12, 0,
+      0,  0,  0,  0,  0,  8,  10, 12, 0,  0,  0,  0,  2,  8,  10, 12, 0,  0,
+      0,  0,  0,  2,  8,  10, 12, 0,  0,  0,  4,  8,  10, 12, 0,  0,  0,  0,
+      0,  4,  8,  10, 12, 0,  0,  0,  2,  4,  8,  10, 12, 0,  0,  0,  0,  2,
+      4,  8,  10, 12, 0,  0,  6,  8,  10, 12, 0,  0,  0,  0,  0,  6,  8,  10,
+      12, 0,  0,  0,  2,  6,  8,  10, 12, 0,  0,  0,  0,  2,  6,  8,  10, 12,
+      0,  0,  4,  6,  8,  10, 12, 0,  0,  0,  0,  4,  6,  8,  10, 12, 0,  0,
+      2,  4,  6,  8,  10, 12, 0,  0,  0,  2,  4,  6,  8,  10, 12, 0,  14, 0,
+      0,  0,  0,  0,  0,  0,  0,  14, 0,  0,  0,  0,  0,  0,  2,  14, 0,  0,
+      0,  0,  0,  0,  0,  2,  14, 0,  0,  0,  0,  0,  4,  14, 0,  0,  0,  0,
+      0,  0,  0,  4,  14, 0,  0,  0,  0,  0,  2,  4,  14, 0,  0,  0,  0,  0,
+      0,  2,  4,  14, 0,  0,  0,  0,  6,  14, 0,  0,  0,  0,  0,  0,  0,  6,
+      14, 0,  0,  0,  0,  0,  2,  6,  14, 0,  0,  0,  0,  0,  0,  2,  6,  14,
+      0,  0,  0,  0,  4,  6,  14, 0,  0,  0,  0,  0,  0,  4,  6,  14, 0,  0,
+      0,  0,  2,  4,  6,  14, 0,  0,  0,  0,  0,  2,  4,  6,  14, 0,  0,  0,
+      8,  14, 0,  0,  0,  0,  0,  0,  0,  8,  14, 0,  0,  0,  0,  0,  2,  8,
+      14, 0,  0,  0,  0,  0,  0,  2,  8,  14, 0,  0,  0,  0,  4,  8,  14, 0,
+      0,  0,  0,  0,  0,  4,  8,  14, 0,  0,  0,  0,  2,  4,  8,  14, 0,  0,
+      0,  0,  0,  2,  4,  8,  14, 0,  0,  0,  6,  8,  14, 0,  0,  0,  0,  0,
+      0,  6,  8,  14, 0,  0,  0,  0,  2,  6,  8,  14, 0,  0,  0,  0,  0,  2,
+      6,  8,  14, 0,  0,  0,  4,  6,  8,  14, 0,  0,  0,  0,  0,  4,  6,  8,
+      14, 0,  0,  0,  2,  4,  6,  8,  14, 0,  0,  0,  0,  2,  4,  6,  8,  14,
+      0,  0,  10, 14, 0,  0,  0,  0,  0,  0,  0,  10, 14, 0,  0,  0,  0,  0,
+      2,  10, 14, 0,  0,  0,  0,  0,  0,  2,  10, 14, 0,  0,  0,  0,  4,  10,
+      14, 0,  0,  0,  0,  0,  0,  4,  10, 14, 0,  0,  0,  0,  2,  4,  10, 14,
+      0,  0,  0,  0,  0,  2,  4,  10, 14, 0,  0,  0,  6,  10, 14, 0,  0,  0,
+      0,  0,  0,  6,  10, 14, 0,  0,  0,  0,  2,  6,  10, 14, 0,  0,  0,  0,
+      0,  2,  6,  10, 14, 0,  0,  0,  4,  6,  10, 14, 0,  0,  0,  0,  0,  4,
+      6,  10, 14, 0,  0,  0,  2,  4,  6,  10, 14, 0,  0,  0,  0,  2,  4,  6,
+      10, 14, 0,  0,  8,  10, 14, 0,  0,  0,  0,  0,  0,  8,  10, 14, 0,  0,
+      0,  0,  2,  8,  10, 14, 0,  0,  0,  0,  0,  2,  8,  10, 14, 0,  0,  0,
+      4,  8,  10, 14, 0,  0,  0,  0,  0,  4,  8,  10, 14, 0,  0,  0,  2,  4,
+      8,  10, 14, 0,  0,  0,  0,  2,  4,  8,  10, 14, 0,  0,  6,  8,  10, 14,
+      0,  0,  0,  0,  0,  6,  8,  10, 14, 0,  0,  0,  2,  6,  8,  10, 14, 0,
+      0,  0,  0,  2,  6,  8,  10, 14, 0,  0,  4,  6,  8,  10, 14, 0,  0,  0,
+      0,  4,  6,  8,  10, 14, 0,  0,  2,  4,  6,  8,  10, 14, 0,  0,  0,  2,
+      4,  6,  8,  10, 14, 0,  12, 14, 0,  0,  0,  0,  0,  0,  0,  12, 14, 0,
+      0,  0,  0,  0,  2,  12, 14, 0,  0,  0,  0,  0,  0,  2,  12, 14, 0,  0,
+      0,  0,  4,  12, 14, 0,  0,  0,  0,  0,  0,  4,  12, 14, 0,  0,  0,  0,
+      2,  4,  12, 14, 0,  0,  0,  0,  0,  2,  4,  12, 14, 0,  0,  0,  6,  12,
+      14, 0,  0,  0,  0,  0,  0,  6,  12, 14, 0,  0,  0,  0,  2,  6,  12, 14,
+      0,  0,  0,  0,  0,  2,  6,  12, 14, 0,  0,  0,  4,  6,  12, 14, 0,  0,
+      0,  0,  0,  4,  6,  12, 14, 0,  0,  0,  2,  4,  6,  12, 14, 0,  0,  0,
+      0,  2,  4,  6,  12, 14, 0,  0,  8,  12, 14, 0,  0,  0,  0,  0,  0,  8,
+      12, 14, 0,  0,  0,  0,  2,  8,  12, 14, 0,  0,  0,  0,  0,  2,  8,  12,
+      14, 0,  0,  0,  4,  8,  12, 14, 0,  0,  0,  0,  0,  4,  8,  12, 14, 0,
+      0,  0,  2,  4,  8,  12, 14, 0,  0,  0,  0,  2,  4,  8,  12, 14, 0,  0,
+      6,  8,  12, 14, 0,  0,  0,  0,  0,  6,  8,  12, 14, 0,  0,  0,  2,  6,
+      8,  12, 14, 0,  0,  0,  0,  2,  6,  8,  12, 14, 0,  0,  4,  6,  8,  12,
+      14, 0,  0,  0,  0,  4,  6,  8,  12, 14, 0,  0,  2,  4,  6,  8,  12, 14,
+      0,  0,  0,  2,  4,  6,  8,  12, 14, 0,  10, 12, 14, 0,  0,  0,  0,  0,
+      0,  10, 12, 14, 0,  0,  0,  0,  2,  10, 12, 14, 0,  0,  0,  0,  0,  2,
+      10, 12, 14, 0,  0,  0,  4,  10, 12, 14, 0,  0,  0,  0,  0,  4,  10, 12,
+      14, 0,  0,  0,  2,  4,  10, 12, 14, 0,  0,  0,  0,  2,  4,  10, 12, 14,
+      0,  0,  6,  10, 12, 14, 0,  0,  0,  0,  0,  6,  10, 12, 14, 0,  0,  0,
+      2,  6,  10, 12, 14, 0,  0,  0,  0,  2,  6,  10, 12, 14, 0,  0,  4,  6,
+      10, 12, 14, 0,  0,  0,  0,  4,  6,  10, 12, 14, 0,  0,  2,  4,  6,  10,
+      12, 14, 0,  0,  0,  2,  4,  6,  10, 12, 14, 0,  8,  10, 12, 14, 0,  0,
+      0,  0,  0,  8,  10, 12, 14, 0,  0,  0,  2,  8,  10, 12, 14, 0,  0,  0,
+      0,  2,  8,  10, 12, 14, 0,  0,  4,  8,  10, 12, 14, 0,  0,  0,  0,  4,
+      8,  10, 12, 14, 0,  0,  2,  4,  8,  10, 12, 14, 0,  0,  0,  2,  4,  8,
+      10, 12, 14, 0,  6,  8,  10, 12, 14, 0,  0,  0,  0,  6,  8,  10, 12, 14,
+      0,  0,  2,  6,  8,  10, 12, 14, 0,  0,  0,  2,  6,  8,  10, 12, 14, 0,
+      4,  6,  8,  10, 12, 14, 0,  0,  0,  4,  6,  8,  10, 12, 14, 0,  2,  4,
+      6,  8,  10, 12, 14, 0,  0,  2,  4,  6,  8,  10, 12, 14};
+
+  const Vec128<uint8_t, 2 * N> byte_idx{Load(d8, table + mask_bits * 8).raw};
+  const Vec128<uint16_t, N> pairs = ZipLower(byte_idx, byte_idx);
+  return BitCast(d, pairs + Set(du, 0x0100));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Idx32x4FromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  // There are only 4 lanes, so we can afford to load the index vector directly.
+  alignas(16) constexpr uint8_t packed_array[16 * 16] = {
+      0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      4,  5,  6,  7,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      8,  9,  10, 11, 0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 0,  1,  2,  3,  //
+      12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  12, 13, 14, 15, 0,  1,  2,  3,  //
+      8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  0,  1,  2,  3,  //
+      0,  1,  2,  3,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  //
+      4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  //
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, packed_array + 16 * mask_bits));
+}
+
+template <typename T, size_t N>
+HWY_INLINE Vec128<T, N> Idx64x2FromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 4);
+
+  // There are only 2 lanes, so we can afford to load the index vector directly.
+  alignas(16) constexpr uint8_t packed_array[4 * 16] = {
+      0, 1, 2,  3,  4,  5,  6,  7,  0, 1, 2,  3,  4,  5,  6,  7,  //
+      0, 1, 2,  3,  4,  5,  6,  7,  0, 1, 2,  3,  4,  5,  6,  7,  //
+      8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,  3,  4,  5,  6,  7,  //
+      0, 1, 2,  3,  4,  5,  6,  7,  8, 9, 10, 11, 12, 13, 14, 15};
+
+  const Simd<T, N> d;
+  const Repartition<uint8_t, decltype(d)> d8;
+  return BitCast(d, Load(d8, packed_array + 16 * mask_bits));
+}
+
+// Helper function called by both Compress and CompressStore - avoids a
+// redundant BitsFromMask in the latter.
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(hwy::SizeTag<2> /*tag*/, Vec128<T, N> v,
+                              const uint64_t mask_bits) {
+  const auto idx = detail::Idx16x8FromBits<T, N>(mask_bits);
+  using D = Simd<T, N>;
+  const RebindToSigned<D> di;
+  return BitCast(D(), TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(hwy::SizeTag<4> /*tag*/, Vec128<T, N> v,
+                              const uint64_t mask_bits) {
+  using D = Simd<T, N>;
+  using TI = MakeSigned<T>;
+  const Rebind<TI, D> di;
+#if HWY_TARGET == HWY_AVX3
+  return BitCast(D(), Vec128<TI, N>{_mm_maskz_compress_epi32(
+                          mask_bits, BitCast(di, v).raw)});
+#else
+  const auto idx = detail::Idx32x4FromBits<T, N>(mask_bits);
+  return BitCast(D(), TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+#endif
+}
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(hwy::SizeTag<8> /*tag*/, Vec128<T, N> v,
+                              const uint64_t mask_bits) {
+  using D = Simd<T, N>;
+  using TI = MakeSigned<T>;
+  const Rebind<TI, D> di;
+#if HWY_TARGET == HWY_AVX3
+  return BitCast(D(), Vec128<TI, N>{_mm_maskz_compress_epi64(
+                          mask_bits, BitCast(di, v).raw)});
+#else
+  const auto idx = detail::Idx64x2FromBits<T, N>(mask_bits);
+  return BitCast(D(), TableLookupBytes(BitCast(di, v), BitCast(di, idx)));
+#endif
+}
+
+}  // namespace detail
+
+template <typename T, size_t N>
+HWY_API Vec128<T, N> Compress(Vec128<T, N> v, const Mask128<T, N> mask) {
+  return detail::Compress(hwy::SizeTag<sizeof(T)>(), v,
+                          detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressStore
+
+template <typename T, size_t N>
+HWY_API size_t CompressStore(Vec128<T, N> v, const Mask128<T, N> mask,
+                             Simd<T, N> d, T* HWY_RESTRICT aligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  // Avoid _mm_maskmoveu_si128 (>500 cycle latency because it bypasses caches).
+  Store(detail::Compress(hwy::SizeTag<sizeof(T)>(), v, mask_bits), d, aligned);
+  return PopCount(mask_bits);
+}
+
+// ------------------------------ StoreInterleaved3 (CombineShiftRightBytes,
+// TableLookupBytes)
+
+// 128 bits
+HWY_API void StoreInterleaved3(const Vec128<uint8_t> v0,
+                               const Vec128<uint8_t> v1,
+                               const Vec128<uint8_t> v2, Full128<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  const auto k5 = Set(d, 5);
+  const auto k6 = Set(d, 6);
+
+  // Shuffle (v0,v1,v2) vector bytes to (MSB on left): r5, bgr[4:0].
+  // 0x80 so lanes to be filled from other vectors are 0 for blending.
+  alignas(16) static constexpr uint8_t tbl_r0[16] = {
+      0, 0x80, 0x80, 1, 0x80, 0x80, 2, 0x80, 0x80,  //
+      3, 0x80, 0x80, 4, 0x80, 0x80, 5};
+  alignas(16) static constexpr uint8_t tbl_g0[16] = {
+      0x80, 0, 0x80, 0x80, 1, 0x80,  //
+      0x80, 2, 0x80, 0x80, 3, 0x80, 0x80, 4, 0x80, 0x80};
+  const auto shuf_r0 = Load(d, tbl_r0);
+  const auto shuf_g0 = Load(d, tbl_g0);  // cannot reuse r0 due to 5 in MSB
+  const auto shuf_b0 = CombineShiftRightBytes<15>(shuf_g0, shuf_g0);
+  const auto r0 = TableLookupBytes(v0, shuf_r0);  // 5..4..3..2..1..0
+  const auto g0 = TableLookupBytes(v1, shuf_g0);  // ..4..3..2..1..0.
+  const auto b0 = TableLookupBytes(v2, shuf_b0);  // .4..3..2..1..0..
+  const auto int0 = r0 | g0 | b0;
+  StoreU(int0, d, unaligned + 0 * 16);
+
+  // Second vector: g10,r10, bgr[9:6], b5,g5
+  const auto shuf_r1 = shuf_b0 + k6;  // .A..9..8..7..6..
+  const auto shuf_g1 = shuf_r0 + k5;  // A..9..8..7..6..5
+  const auto shuf_b1 = shuf_g0 + k5;  // ..9..8..7..6..5.
+  const auto r1 = TableLookupBytes(v0, shuf_r1);
+  const auto g1 = TableLookupBytes(v1, shuf_g1);
+  const auto b1 = TableLookupBytes(v2, shuf_b1);
+  const auto int1 = r1 | g1 | b1;
+  StoreU(int1, d, unaligned + 1 * 16);
+
+  // Third vector: bgr[15:11], b10
+  const auto shuf_r2 = shuf_b1 + k6;  // ..F..E..D..C..B.
+  const auto shuf_g2 = shuf_r1 + k5;  // .F..E..D..C..B..
+  const auto shuf_b2 = shuf_g1 + k5;  // F..E..D..C..B..A
+  const auto r2 = TableLookupBytes(v0, shuf_r2);
+  const auto g2 = TableLookupBytes(v1, shuf_g2);
+  const auto b2 = TableLookupBytes(v2, shuf_b2);
+  const auto int2 = r2 | g2 | b2;
+  StoreU(int2, d, unaligned + 2 * 16);
+}
+
+// 64 bits
+HWY_API void StoreInterleaved3(const Vec128<uint8_t, 8> v0,
+                               const Vec128<uint8_t, 8> v1,
+                               const Vec128<uint8_t, 8> v2, Simd<uint8_t, 8> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // Use full vectors for the shuffles and first result.
+  const Full128<uint8_t> d_full;
+  const auto k5 = Set(d_full, 5);
+  const auto k6 = Set(d_full, 6);
+
+  const Vec128<uint8_t> full_a{v0.raw};
+  const Vec128<uint8_t> full_b{v1.raw};
+  const Vec128<uint8_t> full_c{v2.raw};
+
+  // Shuffle (v0,v1,v2) vector bytes to (MSB on left): r5, bgr[4:0].
+  // 0x80 so lanes to be filled from other vectors are 0 for blending.
+  alignas(16) static constexpr uint8_t tbl_r0[16] = {
+      0, 0x80, 0x80, 1, 0x80, 0x80, 2, 0x80, 0x80,  //
+      3, 0x80, 0x80, 4, 0x80, 0x80, 5};
+  alignas(16) static constexpr uint8_t tbl_g0[16] = {
+      0x80, 0, 0x80, 0x80, 1, 0x80,  //
+      0x80, 2, 0x80, 0x80, 3, 0x80, 0x80, 4, 0x80, 0x80};
+  const auto shuf_r0 = Load(d_full, tbl_r0);
+  const auto shuf_g0 = Load(d_full, tbl_g0);  // cannot reuse r0 due to 5 in MSB
+  const auto shuf_b0 = CombineShiftRightBytes<15>(shuf_g0, shuf_g0);
+  const auto r0 = TableLookupBytes(full_a, shuf_r0);  // 5..4..3..2..1..0
+  const auto g0 = TableLookupBytes(full_b, shuf_g0);  // ..4..3..2..1..0.
+  const auto b0 = TableLookupBytes(full_c, shuf_b0);  // .4..3..2..1..0..
+  const auto int0 = r0 | g0 | b0;
+  StoreU(int0, d_full, unaligned + 0 * 16);
+
+  // Second (HALF) vector: bgr[7:6], b5,g5
+  const auto shuf_r1 = shuf_b0 + k6;  // ..7..6..
+  const auto shuf_g1 = shuf_r0 + k5;  // .7..6..5
+  const auto shuf_b1 = shuf_g0 + k5;  // 7..6..5.
+  const auto r1 = TableLookupBytes(full_a, shuf_r1);
+  const auto g1 = TableLookupBytes(full_b, shuf_g1);
+  const auto b1 = TableLookupBytes(full_c, shuf_b1);
+  const decltype(Zero(d)) int1{(r1 | g1 | b1).raw};
+  StoreU(int1, d, unaligned + 1 * 16);
+}
+
+// <= 32 bits
+template <size_t N, HWY_IF_LE32(uint8_t, N)>
+HWY_API void StoreInterleaved3(const Vec128<uint8_t, N> v0,
+                               const Vec128<uint8_t, N> v1,
+                               const Vec128<uint8_t, N> v2,
+                               Simd<uint8_t, N> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // Use full vectors for the shuffles and result.
+  const Full128<uint8_t> d_full;
+
+  const Vec128<uint8_t> full_a{v0.raw};
+  const Vec128<uint8_t> full_b{v1.raw};
+  const Vec128<uint8_t> full_c{v2.raw};
+
+  // Shuffle (v0,v1,v2) vector bytes to bgr[3:0].
+  // 0x80 so lanes to be filled from other vectors are 0 for blending.
+  alignas(16) static constexpr uint8_t tbl_r0[16] = {
+      0,    0x80, 0x80, 1,   0x80, 0x80, 2, 0x80, 0x80, 3, 0x80, 0x80,  //
+      0x80, 0x80, 0x80, 0x80};
+  const auto shuf_r0 = Load(d_full, tbl_r0);
+  const auto shuf_g0 = CombineShiftRightBytes<15>(shuf_r0, shuf_r0);
+  const auto shuf_b0 = CombineShiftRightBytes<14>(shuf_r0, shuf_r0);
+  const auto r0 = TableLookupBytes(full_a, shuf_r0);  // ......3..2..1..0
+  const auto g0 = TableLookupBytes(full_b, shuf_g0);  // .....3..2..1..0.
+  const auto b0 = TableLookupBytes(full_c, shuf_b0);  // ....3..2..1..0..
+  const auto int0 = r0 | g0 | b0;
+  alignas(16) uint8_t buf[16];
+  StoreU(int0, d_full, buf);
+  CopyBytes<N * 3>(buf, unaligned);
+}
+
+// ------------------------------ StoreInterleaved4
+
+// 128 bits
+HWY_API void StoreInterleaved4(const Vec128<uint8_t> v0,
+                               const Vec128<uint8_t> v1,
+                               const Vec128<uint8_t> v2,
+                               const Vec128<uint8_t> v3, Full128<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // let a,b,c,d denote v0..3.
+  const auto ba0 = ZipLower(v0, v1);  // b7 a7 .. b0 a0
+  const auto dc0 = ZipLower(v2, v3);  // d7 c7 .. d0 c0
+  const auto ba8 = ZipUpper(v0, v1);
+  const auto dc8 = ZipUpper(v2, v3);
+  const auto dcba_0 = ZipLower(ba0, dc0);  // d..a3 d..a0
+  const auto dcba_4 = ZipUpper(ba0, dc0);  // d..a7 d..a4
+  const auto dcba_8 = ZipLower(ba8, dc8);  // d..aB d..a8
+  const auto dcba_C = ZipUpper(ba8, dc8);  // d..aF d..aC
+  StoreU(BitCast(d, dcba_0), d, unaligned + 0 * 16);
+  StoreU(BitCast(d, dcba_4), d, unaligned + 1 * 16);
+  StoreU(BitCast(d, dcba_8), d, unaligned + 2 * 16);
+  StoreU(BitCast(d, dcba_C), d, unaligned + 3 * 16);
+}
+
+// 64 bits
+HWY_API void StoreInterleaved4(const Vec128<uint8_t, 8> in0,
+                               const Vec128<uint8_t, 8> in1,
+                               const Vec128<uint8_t, 8> in2,
+                               const Vec128<uint8_t, 8> in3,
+                               Simd<uint8_t, 8> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // Use full vectors to reduce the number of stores.
+  const Vec128<uint8_t> v0{in0.raw};
+  const Vec128<uint8_t> v1{in1.raw};
+  const Vec128<uint8_t> v2{in2.raw};
+  const Vec128<uint8_t> v3{in3.raw};
+  // let a,b,c,d denote v0..3.
+  const auto ba0 = ZipLower(v0, v1);       // b7 a7 .. b0 a0
+  const auto dc0 = ZipLower(v2, v3);       // d7 c7 .. d0 c0
+  const auto dcba_0 = ZipLower(ba0, dc0);  // d..a3 d..a0
+  const auto dcba_4 = ZipUpper(ba0, dc0);  // d..a7 d..a4
+  const Full128<uint8_t> d_full;
+  StoreU(BitCast(d_full, dcba_0), d_full, unaligned + 0 * 16);
+  StoreU(BitCast(d_full, dcba_4), d_full, unaligned + 1 * 16);
+}
+
+// <= 32 bits
+template <size_t N, HWY_IF_LE32(uint8_t, N)>
+HWY_API void StoreInterleaved4(const Vec128<uint8_t, N> in0,
+                               const Vec128<uint8_t, N> in1,
+                               const Vec128<uint8_t, N> in2,
+                               const Vec128<uint8_t, N> in3,
+                               Simd<uint8_t, N> /*tag*/,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // Use full vectors to reduce the number of stores.
+  const Vec128<uint8_t> v0{in0.raw};
+  const Vec128<uint8_t> v1{in1.raw};
+  const Vec128<uint8_t> v2{in2.raw};
+  const Vec128<uint8_t> v3{in3.raw};
+  // let a,b,c,d denote v0..3.
+  const auto ba0 = ZipLower(v0, v1);       // b3 a3 .. b0 a0
+  const auto dc0 = ZipLower(v2, v3);       // d3 c3 .. d0 c0
+  const auto dcba_0 = ZipLower(ba0, dc0);  // d..a3 d..a0
+  alignas(16) uint8_t buf[16];
+  const Full128<uint8_t> d_full;
+  StoreU(BitCast(d_full, dcba_0), d_full, buf);
+  CopyBytes<4 * N>(buf, unaligned);
+}
+
+// ------------------------------ Reductions
+
+namespace detail {
+
+// N=1 for any T: no-op
+template <typename T>
+HWY_API Vec128<T, 1> SumOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                const Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_API Vec128<T, 1> MinOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                const Vec128<T, 1> v) {
+  return v;
+}
+template <typename T>
+HWY_API Vec128<T, 1> MaxOfLanes(hwy::SizeTag<sizeof(T)> /* tag */,
+                                const Vec128<T, 1> v) {
+  return v;
+}
+
+// u32/i32/f32:
+
+// N=2
+template <typename T>
+HWY_API Vec128<T, 2> SumOfLanes(hwy::SizeTag<4> /* tag */,
+                                const Vec128<T, 2> v10) {
+  return v10 + Vec128<T, 2>{Shuffle2301(Vec128<T>{v10.raw}).raw};
+}
+template <typename T>
+HWY_API Vec128<T, 2> MinOfLanes(hwy::SizeTag<4> /* tag */,
+                                const Vec128<T, 2> v10) {
+  return Min(v10, Vec128<T, 2>{Shuffle2301(Vec128<T>{v10.raw}).raw});
+}
+template <typename T>
+HWY_API Vec128<T, 2> MaxOfLanes(hwy::SizeTag<4> /* tag */,
+                                const Vec128<T, 2> v10) {
+  return Max(v10, Vec128<T, 2>{Shuffle2301(Vec128<T>{v10.raw}).raw});
+}
+
+// N=4 (full)
+template <typename T>
+HWY_API Vec128<T> SumOfLanes(hwy::SizeTag<4> /* tag */, const Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = v3210 + v1032;
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return v20_31_20_31 + v31_20_31_20;
+}
+template <typename T>
+HWY_API Vec128<T> MinOfLanes(hwy::SizeTag<4> /* tag */, const Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = Min(v3210, v1032);
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Min(v20_31_20_31, v31_20_31_20);
+}
+template <typename T>
+HWY_API Vec128<T> MaxOfLanes(hwy::SizeTag<4> /* tag */, const Vec128<T> v3210) {
+  const Vec128<T> v1032 = Shuffle1032(v3210);
+  const Vec128<T> v31_20_31_20 = Max(v3210, v1032);
+  const Vec128<T> v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Max(v20_31_20_31, v31_20_31_20);
+}
+
+// u64/i64/f64:
+
+// N=2 (full)
+template <typename T>
+HWY_API Vec128<T> SumOfLanes(hwy::SizeTag<8> /* tag */, const Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return v10 + v01;
+}
+template <typename T>
+HWY_API Vec128<T> MinOfLanes(hwy::SizeTag<8> /* tag */, const Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return Min(v10, v01);
+}
+template <typename T>
+HWY_API Vec128<T> MaxOfLanes(hwy::SizeTag<8> /* tag */, const Vec128<T> v10) {
+  const Vec128<T> v01 = Shuffle01(v10);
+  return Max(v10, v01);
+}
+
+}  // namespace detail
+
+// Supported for u/i/f 32/64. Returns the same value in each lane.
+template <typename T, size_t N>
+HWY_API Vec128<T, N> SumOfLanes(const Vec128<T, N> v) {
+  return detail::SumOfLanes(hwy::SizeTag<sizeof(T)>(), v);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MinOfLanes(const Vec128<T, N> v) {
+  return detail::MinOfLanes(hwy::SizeTag<sizeof(T)>(), v);
+}
+template <typename T, size_t N>
+HWY_API Vec128<T, N> MaxOfLanes(const Vec128<T, N> v) {
+  return detail::MaxOfLanes(hwy::SizeTag<sizeof(T)>(), v);
+}
+
+// ================================================== Operator wrapper
+
+// These apply to all x86_*-inl.h because there are no restrictions on V.
+
+template <class V>
+HWY_API V Add(V a, V b) {
+  return a + b;
+}
+template <class V>
+HWY_API V Sub(V a, V b) {
+  return a - b;
+}
+
+template <class V>
+HWY_API V Mul(V a, V b) {
+  return a * b;
+}
+template <class V>
+HWY_API V Div(V a, V b) {
+  return a / b;
+}
+
+template <class V>
+V Shl(V a, V b) {
+  return a << b;
+}
+template <class V>
+V Shr(V a, V b) {
+  return a >> b;
+}
+
+template <class V>
+HWY_API auto Eq(V a, V b) -> decltype(a == b) {
+  return a == b;
+}
+template <class V>
+HWY_API auto Lt(V a, V b) -> decltype(a == b) {
+  return a < b;
+}
+
+template <class V>
+HWY_API auto Gt(V a, V b) -> decltype(a == b) {
+  return a > b;
+}
+template <class V>
+HWY_API auto Ge(V a, V b) -> decltype(a == b) {
+  return a >= b;
+}
+
+template <class V>
+HWY_API auto Le(V a, V b) -> decltype(a == b) {
+  return a <= b;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/x86_256-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/x86_256-inl.h
new file mode 100644
index 0000000000..b934140f0c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/x86_256-inl.h
@@ -0,0 +1,2956 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 256-bit vectors and AVX2 instructions, plus some AVX512-VL operations when
+// compiling for that target.
+// External include guard in highway.h - see comment there.
+
+// WARNING: most operations do not cross 128-bit block boundaries. In
+// particular, "Broadcast", pack and zip behavior may be surprising.
+
+#include <immintrin.h>  // AVX2+
+
+#if defined(_MSC_VER) && defined(__clang__)
+// Including <immintrin.h> should be enough, but Clang's headers helpfully skip
+// including these headers when _MSC_VER is defined, like when using clang-cl.
+// Include these directly here.
+#include <avxintrin.h>
+// avxintrin defines __m256i and must come before avx2intrin.
+#include <avx2intrin.h>
+#include <bmi2intrin.h>  // _pext_u64
+#include <f16cintrin.h>
+#include <fmaintrin.h>
+#include <smmintrin.h>
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+// For half-width vectors. Already includes base.h and shared-inl.h.
+#include "hwy/ops/x86_128-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <typename T>
+struct Raw256 {
+  using type = __m256i;
+};
+template <>
+struct Raw256<float> {
+  using type = __m256;
+};
+template <>
+struct Raw256<double> {
+  using type = __m256d;
+};
+
+template <typename T>
+using Full256 = Simd<T, 32 / sizeof(T)>;
+
+template <typename T>
+class Vec256 {
+  using Raw = typename Raw256<T>::type;
+
+ public:
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec256& operator*=(const Vec256 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec256& operator/=(const Vec256 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec256& operator+=(const Vec256 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec256& operator-=(const Vec256 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec256& operator&=(const Vec256 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec256& operator|=(const Vec256 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec256& operator^=(const Vec256 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+// Integer: FF..FF or 0. Float: MSB, all other bits undefined - see README.
+template <typename T>
+class Mask256 {
+  using Raw = typename Raw256<T>::type;
+
+ public:
+  Raw raw;
+};
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+HWY_API __m256i BitCastToInteger(__m256i v) { return v; }
+HWY_API __m256i BitCastToInteger(__m256 v) { return _mm256_castps_si256(v); }
+HWY_API __m256i BitCastToInteger(__m256d v) { return _mm256_castpd_si256(v); }
+
+template <typename T>
+HWY_API Vec256<uint8_t> BitCastToByte(Vec256<T> v) {
+  return Vec256<uint8_t>{BitCastToInteger(v.raw)};
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromInteger256 {
+  HWY_INLINE __m256i operator()(__m256i v) { return v; }
+};
+template <>
+struct BitCastFromInteger256<float> {
+  HWY_INLINE __m256 operator()(__m256i v) { return _mm256_castsi256_ps(v); }
+};
+template <>
+struct BitCastFromInteger256<double> {
+  HWY_INLINE __m256d operator()(__m256i v) { return _mm256_castsi256_pd(v); }
+};
+
+template <typename T>
+HWY_API Vec256<T> BitCastFromByte(Full256<T> /* tag */, Vec256<uint8_t> v) {
+  return Vec256<T>{BitCastFromInteger256<T>()(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, typename FromT>
+HWY_API Vec256<T> BitCast(Full256<T> d, Vec256<FromT> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ Set
+
+// Returns an all-zero vector.
+template <typename T>
+HWY_API Vec256<T> Zero(Full256<T> /* tag */) {
+  return Vec256<T>{_mm256_setzero_si256()};
+}
+HWY_API Vec256<float> Zero(Full256<float> /* tag */) {
+  return Vec256<float>{_mm256_setzero_ps()};
+}
+HWY_API Vec256<double> Zero(Full256<double> /* tag */) {
+  return Vec256<double>{_mm256_setzero_pd()};
+}
+
+// Returns a vector with all lanes set to "t".
+HWY_API Vec256<uint8_t> Set(Full256<uint8_t> /* tag */, const uint8_t t) {
+  return Vec256<uint8_t>{_mm256_set1_epi8(static_cast<char>(t))};  // NOLINT
+}
+HWY_API Vec256<uint16_t> Set(Full256<uint16_t> /* tag */, const uint16_t t) {
+  return Vec256<uint16_t>{_mm256_set1_epi16(static_cast<short>(t))};  // NOLINT
+}
+HWY_API Vec256<uint32_t> Set(Full256<uint32_t> /* tag */, const uint32_t t) {
+  return Vec256<uint32_t>{_mm256_set1_epi32(static_cast<int>(t))};
+}
+HWY_API Vec256<uint64_t> Set(Full256<uint64_t> /* tag */, const uint64_t t) {
+  return Vec256<uint64_t>{
+      _mm256_set1_epi64x(static_cast<long long>(t))};  // NOLINT
+}
+HWY_API Vec256<int8_t> Set(Full256<int8_t> /* tag */, const int8_t t) {
+  return Vec256<int8_t>{_mm256_set1_epi8(static_cast<char>(t))};  // NOLINT
+}
+HWY_API Vec256<int16_t> Set(Full256<int16_t> /* tag */, const int16_t t) {
+  return Vec256<int16_t>{_mm256_set1_epi16(static_cast<short>(t))};  // NOLINT
+}
+HWY_API Vec256<int32_t> Set(Full256<int32_t> /* tag */, const int32_t t) {
+  return Vec256<int32_t>{_mm256_set1_epi32(t)};
+}
+HWY_API Vec256<int64_t> Set(Full256<int64_t> /* tag */, const int64_t t) {
+  return Vec256<int64_t>{
+      _mm256_set1_epi64x(static_cast<long long>(t))};  // NOLINT
+}
+HWY_API Vec256<float> Set(Full256<float> /* tag */, const float t) {
+  return Vec256<float>{_mm256_set1_ps(t)};
+}
+HWY_API Vec256<double> Set(Full256<double> /* tag */, const double t) {
+  return Vec256<double>{_mm256_set1_pd(t)};
+}
+
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// Returns a vector with uninitialized elements.
+template <typename T>
+HWY_API Vec256<T> Undefined(Full256<T> /* tag */) {
+  // Available on Clang 6.0, GCC 6.2, ICC 16.03, MSVC 19.14. All but ICC
+  // generate an XOR instruction.
+  return Vec256<T>{_mm256_undefined_si256()};
+}
+HWY_API Vec256<float> Undefined(Full256<float> /* tag */) {
+  return Vec256<float>{_mm256_undefined_ps()};
+}
+HWY_API Vec256<double> Undefined(Full256<double> /* tag */) {
+  return Vec256<double>{_mm256_undefined_pd()};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== LOGICAL
+
+// ------------------------------ And
+
+template <typename T>
+HWY_API Vec256<T> And(Vec256<T> a, Vec256<T> b) {
+  return Vec256<T>{_mm256_and_si256(a.raw, b.raw)};
+}
+
+HWY_API Vec256<float> And(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_and_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> And(const Vec256<double> a, const Vec256<double> b) {
+  return Vec256<double>{_mm256_and_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ AndNot
+
+// Returns ~not_mask & mask.
+template <typename T>
+HWY_API Vec256<T> AndNot(Vec256<T> not_mask, Vec256<T> mask) {
+  return Vec256<T>{_mm256_andnot_si256(not_mask.raw, mask.raw)};
+}
+HWY_API Vec256<float> AndNot(const Vec256<float> not_mask,
+                             const Vec256<float> mask) {
+  return Vec256<float>{_mm256_andnot_ps(not_mask.raw, mask.raw)};
+}
+HWY_API Vec256<double> AndNot(const Vec256<double> not_mask,
+                              const Vec256<double> mask) {
+  return Vec256<double>{_mm256_andnot_pd(not_mask.raw, mask.raw)};
+}
+
+// ------------------------------ Or
+
+template <typename T>
+HWY_API Vec256<T> Or(Vec256<T> a, Vec256<T> b) {
+  return Vec256<T>{_mm256_or_si256(a.raw, b.raw)};
+}
+
+HWY_API Vec256<float> Or(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_or_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> Or(const Vec256<double> a, const Vec256<double> b) {
+  return Vec256<double>{_mm256_or_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor
+
+template <typename T>
+HWY_API Vec256<T> Xor(Vec256<T> a, Vec256<T> b) {
+  return Vec256<T>{_mm256_xor_si256(a.raw, b.raw)};
+}
+
+HWY_API Vec256<float> Xor(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_xor_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> Xor(const Vec256<double> a, const Vec256<double> b) {
+  return Vec256<double>{_mm256_xor_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Not
+
+template <typename T>
+HWY_API Vec256<T> Not(const Vec256<T> v) {
+  using TU = MakeUnsigned<T>;
+#if HWY_TARGET == HWY_AVX3
+  const __m256i vu = BitCast(Full256<TU>(), v).raw;
+  return BitCast(Full256<T>(),
+                 Vec256<TU>{_mm256_ternarylogic_epi32(vu, vu, vu, 0x55)});
+#else
+  return Xor(v, BitCast(Full256<T>(), Vec256<TU>{_mm256_set1_epi32(-1)}));
+#endif
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T>
+HWY_API Vec256<T> operator&(const Vec256<T> a, const Vec256<T> b) {
+  return And(a, b);
+}
+
+template <typename T>
+HWY_API Vec256<T> operator|(const Vec256<T> a, const Vec256<T> b) {
+  return Or(a, b);
+}
+
+template <typename T>
+HWY_API Vec256<T> operator^(const Vec256<T> a, const Vec256<T> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ CopySign
+
+template <typename T>
+HWY_API Vec256<T> CopySign(const Vec256<T> magn, const Vec256<T> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+
+  const Full256<T> d;
+  const auto msb = SignBit(d);
+
+#if HWY_TARGET == HWY_AVX3
+  const Rebind<MakeUnsigned<T>, decltype(d)> du;
+  // Truth table for msb, magn, sign | bitwise msb ? sign : mag
+  //                  0    0     0   |  0
+  //                  0    0     1   |  0
+  //                  0    1     0   |  1
+  //                  0    1     1   |  1
+  //                  1    0     0   |  0
+  //                  1    0     1   |  1
+  //                  1    1     0   |  0
+  //                  1    1     1   |  1
+  // The lane size does not matter because we are not using predication.
+  const __m256i out = _mm256_ternarylogic_epi32(
+      BitCast(du, msb).raw, BitCast(du, magn).raw, BitCast(du, sign).raw, 0xAC);
+  return BitCast(d, decltype(Zero(du)){out});
+#else
+  return Or(AndNot(msb, magn), And(msb, sign));
+#endif
+}
+
+template <typename T>
+HWY_API Vec256<T> CopySignToAbs(const Vec256<T> abs, const Vec256<T> sign) {
+#if HWY_TARGET == HWY_AVX3
+  // AVX3 can also handle abs < 0, so no extra action needed.
+  return CopySign(abs, sign);
+#else
+  return Or(abs, And(SignBit(Full256<T>()), sign));
+#endif
+}
+
+// ------------------------------ Mask
+
+// Mask and Vec are the same (true = FF..FF).
+template <typename T>
+HWY_API Mask256<T> MaskFromVec(const Vec256<T> v) {
+  return Mask256<T>{v.raw};
+}
+
+template <typename T>
+HWY_API Vec256<T> VecFromMask(const Mask256<T> v) {
+  return Vec256<T>{v.raw};
+}
+
+template <typename T>
+HWY_API Vec256<T> VecFromMask(Full256<T> /* tag */, const Mask256<T> v) {
+  return Vec256<T>{v.raw};
+}
+
+// ------------------------------ IfThenElse
+
+// mask ? yes : no
+template <typename T>
+HWY_API Vec256<T> IfThenElse(const Mask256<T> mask, const Vec256<T> yes,
+                             const Vec256<T> no) {
+  return Vec256<T>{_mm256_blendv_epi8(no.raw, yes.raw, mask.raw)};
+}
+HWY_API Vec256<float> IfThenElse(const Mask256<float> mask,
+                                 const Vec256<float> yes,
+                                 const Vec256<float> no) {
+  return Vec256<float>{_mm256_blendv_ps(no.raw, yes.raw, mask.raw)};
+}
+HWY_API Vec256<double> IfThenElse(const Mask256<double> mask,
+                                  const Vec256<double> yes,
+                                  const Vec256<double> no) {
+  return Vec256<double>{_mm256_blendv_pd(no.raw, yes.raw, mask.raw)};
+}
+
+// mask ? yes : 0
+template <typename T>
+HWY_API Vec256<T> IfThenElseZero(Mask256<T> mask, Vec256<T> yes) {
+  return yes & VecFromMask(Full256<T>(), mask);
+}
+
+// mask ? 0 : no
+template <typename T>
+HWY_API Vec256<T> IfThenZeroElse(Mask256<T> mask, Vec256<T> no) {
+  return AndNot(VecFromMask(Full256<T>(), mask), no);
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec256<T> ZeroIfNegative(Vec256<T> v) {
+  const auto zero = Zero(Full256<T>());
+  return IfThenElse(MaskFromVec(v), zero, v);
+}
+
+// ------------------------------ Mask logical
+
+template <typename T>
+HWY_API Mask256<T> Not(const Mask256<T> m) {
+  const Full256<T> d;
+  return MaskFromVec(Not(VecFromMask(d, m)));
+}
+
+template <typename T>
+HWY_API Mask256<T> And(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(And(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> AndNot(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(AndNot(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> Or(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(Or(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+template <typename T>
+HWY_API Mask256<T> Xor(const Mask256<T> a, Mask256<T> b) {
+  const Full256<T> d;
+  return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b)));
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+template <typename TFrom, typename TTo>
+HWY_API Mask256<TTo> RebindMask(Full256<TTo> d_to, Mask256<TFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size");
+  return MaskFromVec(BitCast(d_to, VecFromMask(Full256<TFrom>(), m)));
+}
+
+// ------------------------------ Equality
+
+// Unsigned
+HWY_API Mask256<uint8_t> operator==(const Vec256<uint8_t> a,
+                                    const Vec256<uint8_t> b) {
+  return Mask256<uint8_t>{_mm256_cmpeq_epi8(a.raw, b.raw)};
+}
+HWY_API Mask256<uint16_t> operator==(const Vec256<uint16_t> a,
+                                     const Vec256<uint16_t> b) {
+  return Mask256<uint16_t>{_mm256_cmpeq_epi16(a.raw, b.raw)};
+}
+HWY_API Mask256<uint32_t> operator==(const Vec256<uint32_t> a,
+                                     const Vec256<uint32_t> b) {
+  return Mask256<uint32_t>{_mm256_cmpeq_epi32(a.raw, b.raw)};
+}
+HWY_API Mask256<uint64_t> operator==(const Vec256<uint64_t> a,
+                                     const Vec256<uint64_t> b) {
+  return Mask256<uint64_t>{_mm256_cmpeq_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Mask256<int8_t> operator==(const Vec256<int8_t> a,
+                                   const Vec256<int8_t> b) {
+  return Mask256<int8_t>{_mm256_cmpeq_epi8(a.raw, b.raw)};
+}
+HWY_API Mask256<int16_t> operator==(const Vec256<int16_t> a,
+                                    const Vec256<int16_t> b) {
+  return Mask256<int16_t>{_mm256_cmpeq_epi16(a.raw, b.raw)};
+}
+HWY_API Mask256<int32_t> operator==(const Vec256<int32_t> a,
+                                    const Vec256<int32_t> b) {
+  return Mask256<int32_t>{_mm256_cmpeq_epi32(a.raw, b.raw)};
+}
+HWY_API Mask256<int64_t> operator==(const Vec256<int64_t> a,
+                                    const Vec256<int64_t> b) {
+  return Mask256<int64_t>{_mm256_cmpeq_epi64(a.raw, b.raw)};
+}
+
+// Float
+HWY_API Mask256<float> operator==(const Vec256<float> a,
+                                  const Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+HWY_API Mask256<double> operator==(const Vec256<double> a,
+                                   const Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+template <typename T>
+HWY_API Mask256<T> TestBit(const Vec256<T> v, const Vec256<T> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return (v & bit) == bit;
+}
+
+// ------------------------------ Strict inequality
+
+// Pre-9.3 GCC immintrin.h uses char, which may be unsigned, causing cmpgt_epi8
+// to perform an unsigned comparison instead of the intended signed. Workaround
+// is to cast to an explicitly signed type. See https://godbolt.org/z/PL7Ujy
+#if HWY_COMPILER_GCC != 0 && HWY_COMPILER_GCC < 930
+#define HWY_AVX2_GCC_CMPGT8_WORKAROUND 1
+#else
+#define HWY_AVX2_GCC_CMPGT8_WORKAROUND 0
+#endif
+
+// Signed/float <
+HWY_API Mask256<int8_t> operator<(const Vec256<int8_t> a,
+                                  const Vec256<int8_t> b) {
+#if HWY_AVX2_GCC_CMPGT8_WORKAROUND
+  using i8x32 = signed char __attribute__((__vector_size__(32)));
+  return Mask256<int8_t>{static_cast<__m256i>(reinterpret_cast<i8x32>(a.raw) <
+                                              reinterpret_cast<i8x32>(b.raw))};
+#else
+  return Mask256<int8_t>{_mm256_cmpgt_epi8(b.raw, a.raw)};
+#endif
+}
+HWY_API Mask256<int16_t> operator<(const Vec256<int16_t> a,
+                                   const Vec256<int16_t> b) {
+  return Mask256<int16_t>{_mm256_cmpgt_epi16(b.raw, a.raw)};
+}
+HWY_API Mask256<int32_t> operator<(const Vec256<int32_t> a,
+                                   const Vec256<int32_t> b) {
+  return Mask256<int32_t>{_mm256_cmpgt_epi32(b.raw, a.raw)};
+}
+HWY_API Mask256<int64_t> operator<(const Vec256<int64_t> a,
+                                   const Vec256<int64_t> b) {
+  return Mask256<int64_t>{_mm256_cmpgt_epi64(b.raw, a.raw)};
+}
+HWY_API Mask256<float> operator<(const Vec256<float> a, const Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps(a.raw, b.raw, _CMP_LT_OQ)};
+}
+HWY_API Mask256<double> operator<(const Vec256<double> a,
+                                  const Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd(a.raw, b.raw, _CMP_LT_OQ)};
+}
+
+// Signed/float >
+HWY_API Mask256<int8_t> operator>(const Vec256<int8_t> a,
+                                  const Vec256<int8_t> b) {
+#if HWY_AVX2_GCC_CMPGT8_WORKAROUND
+  using i8x32 = signed char __attribute__((__vector_size__(32)));
+  return Mask256<int8_t>{static_cast<__m256i>(reinterpret_cast<i8x32>(a.raw) >
+                                              reinterpret_cast<i8x32>(b.raw))};
+#else
+  return Mask256<int8_t>{_mm256_cmpgt_epi8(a.raw, b.raw)};
+#endif
+}
+HWY_API Mask256<int16_t> operator>(const Vec256<int16_t> a,
+                                   const Vec256<int16_t> b) {
+  return Mask256<int16_t>{_mm256_cmpgt_epi16(a.raw, b.raw)};
+}
+HWY_API Mask256<int32_t> operator>(const Vec256<int32_t> a,
+                                   const Vec256<int32_t> b) {
+  return Mask256<int32_t>{_mm256_cmpgt_epi32(a.raw, b.raw)};
+}
+HWY_API Mask256<int64_t> operator>(const Vec256<int64_t> a,
+                                   const Vec256<int64_t> b) {
+  return Mask256<int64_t>{_mm256_cmpgt_epi64(a.raw, b.raw)};
+}
+HWY_API Mask256<float> operator>(const Vec256<float> a, const Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps(a.raw, b.raw, _CMP_GT_OQ)};
+}
+HWY_API Mask256<double> operator>(const Vec256<double> a,
+                                  const Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd(a.raw, b.raw, _CMP_GT_OQ)};
+}
+
+// ------------------------------ Weak inequality
+
+// Float <= >=
+HWY_API Mask256<float> operator<=(const Vec256<float> a,
+                                  const Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps(a.raw, b.raw, _CMP_LE_OQ)};
+}
+HWY_API Mask256<double> operator<=(const Vec256<double> a,
+                                   const Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd(a.raw, b.raw, _CMP_LE_OQ)};
+}
+HWY_API Mask256<float> operator>=(const Vec256<float> a,
+                                  const Vec256<float> b) {
+  return Mask256<float>{_mm256_cmp_ps(a.raw, b.raw, _CMP_GE_OQ)};
+}
+HWY_API Mask256<double> operator>=(const Vec256<double> a,
+                                   const Vec256<double> b) {
+  return Mask256<double>{_mm256_cmp_pd(a.raw, b.raw, _CMP_GE_OQ)};
+}
+
+// ------------------------------ Min (Gt, IfThenElse)
+
+// Unsigned
+HWY_API Vec256<uint8_t> Min(const Vec256<uint8_t> a, const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_min_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> Min(const Vec256<uint16_t> a,
+                             const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_min_epu16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> Min(const Vec256<uint32_t> a,
+                             const Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_min_epu32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> Min(const Vec256<uint64_t> a,
+                             const Vec256<uint64_t> b) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<uint64_t>{_mm256_min_epu64(a.raw, b.raw)};
+#else
+  const Full256<uint64_t> du;
+  const Full256<int64_t> di;
+  const auto msb = Set(du, 1ull << 63);
+  const auto gt = RebindMask(du, BitCast(di, a ^ msb) > BitCast(di, b ^ msb));
+  return IfThenElse(gt, b, a);
+#endif
+}
+
+// Signed
+HWY_API Vec256<int8_t> Min(const Vec256<int8_t> a, const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_min_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> Min(const Vec256<int16_t> a, const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_min_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> Min(const Vec256<int32_t> a, const Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_min_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> Min(const Vec256<int64_t> a, const Vec256<int64_t> b) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<int64_t>{_mm256_min_epi64(a.raw, b.raw)};
+#else
+  return IfThenElse(a < b, a, b);
+#endif
+}
+
+// Float
+HWY_API Vec256<float> Min(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_min_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> Min(const Vec256<double> a, const Vec256<double> b) {
+  return Vec256<double>{_mm256_min_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Max (Gt, IfThenElse)
+
+// Unsigned
+HWY_API Vec256<uint8_t> Max(const Vec256<uint8_t> a, const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_max_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> Max(const Vec256<uint16_t> a,
+                             const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_max_epu16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> Max(const Vec256<uint32_t> a,
+                             const Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_max_epu32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> Max(const Vec256<uint64_t> a,
+                             const Vec256<uint64_t> b) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<uint64_t>{_mm256_max_epu64(a.raw, b.raw)};
+#else
+  const Full256<uint64_t> du;
+  const Full256<int64_t> di;
+  const auto msb = Set(du, 1ull << 63);
+  const auto gt = RebindMask(du, BitCast(di, a ^ msb) > BitCast(di, b ^ msb));
+  return IfThenElse(gt, a, b);
+#endif
+}
+
+// Signed
+HWY_API Vec256<int8_t> Max(const Vec256<int8_t> a, const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_max_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> Max(const Vec256<int16_t> a, const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_max_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> Max(const Vec256<int32_t> a, const Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_max_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> Max(const Vec256<int64_t> a, const Vec256<int64_t> b) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<int64_t>{_mm256_max_epi64(a.raw, b.raw)};
+#else
+  return IfThenElse(a < b, b, a);
+#endif
+}
+
+// Float
+HWY_API Vec256<float> Max(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_max_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> Max(const Vec256<double> a, const Vec256<double> b) {
+  return Vec256<double>{_mm256_max_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ FirstN (Iota, Lt)
+
+template <typename T>
+HWY_API Mask256<T> FirstN(const Full256<T> d, size_t n) {
+  const RebindToSigned<decltype(d)> di;  // Signed comparisons are cheaper.
+  return RebindMask(d, Iota(di, 0) < Set(di, static_cast<MakeSigned<T>>(n)));
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+HWY_API Vec256<uint8_t> operator+(const Vec256<uint8_t> a,
+                                  const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_add_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> operator+(const Vec256<uint16_t> a,
+                                   const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_add_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> operator+(const Vec256<uint32_t> a,
+                                   const Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_add_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> operator+(const Vec256<uint64_t> a,
+                                   const Vec256<uint64_t> b) {
+  return Vec256<uint64_t>{_mm256_add_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int8_t> operator+(const Vec256<int8_t> a,
+                                 const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_add_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> operator+(const Vec256<int16_t> a,
+                                  const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_add_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> operator+(const Vec256<int32_t> a,
+                                  const Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_add_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> operator+(const Vec256<int64_t> a,
+                                  const Vec256<int64_t> b) {
+  return Vec256<int64_t>{_mm256_add_epi64(a.raw, b.raw)};
+}
+
+// Float
+HWY_API Vec256<float> operator+(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_add_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> operator+(const Vec256<double> a,
+                                 const Vec256<double> b) {
+  return Vec256<double>{_mm256_add_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+HWY_API Vec256<uint8_t> operator-(const Vec256<uint8_t> a,
+                                  const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_sub_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> operator-(const Vec256<uint16_t> a,
+                                   const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_sub_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> operator-(const Vec256<uint32_t> a,
+                                   const Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_sub_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> operator-(const Vec256<uint64_t> a,
+                                   const Vec256<uint64_t> b) {
+  return Vec256<uint64_t>{_mm256_sub_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int8_t> operator-(const Vec256<int8_t> a,
+                                 const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_sub_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> operator-(const Vec256<int16_t> a,
+                                  const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_sub_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> operator-(const Vec256<int32_t> a,
+                                  const Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_sub_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> operator-(const Vec256<int64_t> a,
+                                  const Vec256<int64_t> b) {
+  return Vec256<int64_t>{_mm256_sub_epi64(a.raw, b.raw)};
+}
+
+// Float
+HWY_API Vec256<float> operator-(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_sub_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> operator-(const Vec256<double> a,
+                                 const Vec256<double> b) {
+  return Vec256<double>{_mm256_sub_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Saturating addition
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec256<uint8_t> SaturatedAdd(const Vec256<uint8_t> a,
+                                     const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_adds_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> SaturatedAdd(const Vec256<uint16_t> a,
+                                      const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_adds_epu16(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int8_t> SaturatedAdd(const Vec256<int8_t> a,
+                                    const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_adds_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> SaturatedAdd(const Vec256<int16_t> a,
+                                     const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_adds_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec256<uint8_t> SaturatedSub(const Vec256<uint8_t> a,
+                                     const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_subs_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> SaturatedSub(const Vec256<uint16_t> a,
+                                      const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_subs_epu16(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int8_t> SaturatedSub(const Vec256<int8_t> a,
+                                    const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_subs_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> SaturatedSub(const Vec256<int16_t> a,
+                                     const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_subs_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+HWY_API Vec256<uint8_t> AverageRound(const Vec256<uint8_t> a,
+                                     const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_avg_epu8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> AverageRound(const Vec256<uint16_t> a,
+                                      const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_avg_epu16(a.raw, b.raw)};
+}
+
+// ------------------------------ Absolute value
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+HWY_API Vec256<int8_t> Abs(const Vec256<int8_t> v) {
+#if HWY_COMPILER_MSVC
+  // Workaround for incorrect codegen? (wrong result)
+  const auto zero = Zero(Full256<int8_t>());
+  return Vec256<int8_t>{_mm256_max_epi8(v.raw, (zero - v).raw)};
+#else
+  return Vec256<int8_t>{_mm256_abs_epi8(v.raw)};
+#endif
+}
+HWY_API Vec256<int16_t> Abs(const Vec256<int16_t> v) {
+  return Vec256<int16_t>{_mm256_abs_epi16(v.raw)};
+}
+HWY_API Vec256<int32_t> Abs(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_abs_epi32(v.raw)};
+}
+// i64 is implemented after BroadcastSignBit.
+
+HWY_API Vec256<float> Abs(const Vec256<float> v) {
+  const Vec256<int32_t> mask{_mm256_set1_epi32(0x7FFFFFFF)};
+  return v & BitCast(Full256<float>(), mask);
+}
+HWY_API Vec256<double> Abs(const Vec256<double> v) {
+  const Vec256<int64_t> mask{_mm256_set1_epi64x(0x7FFFFFFFFFFFFFFFLL)};
+  return v & BitCast(Full256<double>(), mask);
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+HWY_API Vec256<uint16_t> operator*(const Vec256<uint16_t> a,
+                                   const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_mullo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> operator*(const Vec256<uint32_t> a,
+                                   const Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_mullo_epi32(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec256<int16_t> operator*(const Vec256<int16_t> a,
+                                  const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_mullo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> operator*(const Vec256<int32_t> a,
+                                  const Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_mullo_epi32(a.raw, b.raw)};
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+HWY_API Vec256<uint16_t> MulHigh(const Vec256<uint16_t> a,
+                                 const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_mulhi_epu16(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> MulHigh(const Vec256<int16_t> a,
+                                const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_mulhi_epi16(a.raw, b.raw)};
+}
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+HWY_API Vec256<int64_t> MulEven(const Vec256<int32_t> a,
+                                const Vec256<int32_t> b) {
+  return Vec256<int64_t>{_mm256_mul_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> MulEven(const Vec256<uint32_t> a,
+                                 const Vec256<uint32_t> b) {
+  return Vec256<uint64_t>{_mm256_mul_epu32(a.raw, b.raw)};
+}
+
+// ------------------------------ ShiftLeft
+
+template <int kBits>
+HWY_API Vec256<uint16_t> ShiftLeft(const Vec256<uint16_t> v) {
+  return Vec256<uint16_t>{_mm256_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint32_t> ShiftLeft(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint64_t> ShiftLeft(const Vec256<uint64_t> v) {
+  return Vec256<uint64_t>{_mm256_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int16_t> ShiftLeft(const Vec256<int16_t> v) {
+  return Vec256<int16_t>{_mm256_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int32_t> ShiftLeft(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int64_t> ShiftLeft(const Vec256<int64_t> v) {
+  return Vec256<int64_t>{_mm256_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, typename T, HWY_IF_LANE_SIZE(T, 1)>
+HWY_API Vec256<T> ShiftLeft(const Vec256<T> v) {
+  const Full256<T> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftLeft<kBits>(BitCast(d16, v)));
+  return kBits == 1
+             ? (v + v)
+             : (shifted & Set(d8, static_cast<T>((0xFF << kBits) & 0xFF)));
+}
+
+// ------------------------------ ShiftRight
+
+template <int kBits>
+HWY_API Vec256<uint16_t> ShiftRight(const Vec256<uint16_t> v) {
+  return Vec256<uint16_t>{_mm256_srli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint32_t> ShiftRight(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_srli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint64_t> ShiftRight(const Vec256<uint64_t> v) {
+  return Vec256<uint64_t>{_mm256_srli_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<uint8_t> ShiftRight(const Vec256<uint8_t> v) {
+  const Full256<uint8_t> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec256<uint8_t> shifted{ShiftRight<kBits>(Vec256<uint16_t>{v.raw}).raw};
+  return shifted & Set(d8, 0xFF >> kBits);
+}
+
+template <int kBits>
+HWY_API Vec256<int16_t> ShiftRight(const Vec256<int16_t> v) {
+  return Vec256<int16_t>{_mm256_srai_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int32_t> ShiftRight(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_srai_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec256<int8_t> ShiftRight(const Vec256<int8_t> v) {
+  const Full256<int8_t> di;
+  const Full256<uint8_t> du;
+  const auto shifted = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> kBits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// i64 is implemented after BroadcastSignBit.
+
+// ------------------------------ BroadcastSignBit (ShiftRight, compare, mask)
+
+HWY_API Vec256<int8_t> BroadcastSignBit(const Vec256<int8_t> v) {
+  return VecFromMask(v < Zero(Full256<int8_t>()));
+}
+
+HWY_API Vec256<int16_t> BroadcastSignBit(const Vec256<int16_t> v) {
+  return ShiftRight<15>(v);
+}
+
+HWY_API Vec256<int32_t> BroadcastSignBit(const Vec256<int32_t> v) {
+  return ShiftRight<31>(v);
+}
+
+HWY_API Vec256<int64_t> BroadcastSignBit(const Vec256<int64_t> v) {
+#if HWY_TARGET == HWY_AVX2
+  return VecFromMask(v < Zero(Full256<int64_t>()));
+#else
+  return Vec256<int64_t>{_mm256_srai_epi64(v.raw, 63)};
+#endif
+}
+
+template <int kBits>
+HWY_API Vec256<int64_t> ShiftRight(const Vec256<int64_t> v) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<int64_t>{_mm256_srai_epi64(v.raw, kBits)};
+#else
+  const Full256<int64_t> di;
+  const Full256<uint64_t> du;
+  const auto right = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto sign = ShiftLeft<64 - kBits>(BroadcastSignBit(v));
+  return right | sign;
+#endif
+}
+
+HWY_API Vec256<int64_t> Abs(const Vec256<int64_t> v) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<int64_t>{_mm256_abs_epi64(v.raw)};
+#else
+  const auto zero = Zero(Full256<int64_t>());
+  return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v);
+#endif
+}
+
+// ------------------------------ ShiftLeftSame
+
+HWY_API Vec256<uint16_t> ShiftLeftSame(const Vec256<uint16_t> v,
+                                       const int bits) {
+  return Vec256<uint16_t>{_mm256_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<uint32_t> ShiftLeftSame(const Vec256<uint32_t> v,
+                                       const int bits) {
+  return Vec256<uint32_t>{_mm256_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<uint64_t> ShiftLeftSame(const Vec256<uint64_t> v,
+                                       const int bits) {
+  return Vec256<uint64_t>{_mm256_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<int16_t> ShiftLeftSame(const Vec256<int16_t> v, const int bits) {
+  return Vec256<int16_t>{_mm256_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<int32_t> ShiftLeftSame(const Vec256<int32_t> v, const int bits) {
+  return Vec256<int32_t>{_mm256_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<int64_t> ShiftLeftSame(const Vec256<int64_t> v, const int bits) {
+  return Vec256<int64_t>{_mm256_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <typename T, HWY_IF_LANE_SIZE(T, 1)>
+HWY_API Vec256<T> ShiftLeftSame(const Vec256<T> v, const int bits) {
+  const Full256<T> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftLeftSame(BitCast(d16, v), bits));
+  return shifted & Set(d8, (0xFF << bits) & 0xFF);
+}
+
+// ------------------------------ ShiftRightSame (BroadcastSignBit)
+
+HWY_API Vec256<uint16_t> ShiftRightSame(const Vec256<uint16_t> v,
+                                        const int bits) {
+  return Vec256<uint16_t>{_mm256_srl_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<uint32_t> ShiftRightSame(const Vec256<uint32_t> v,
+                                        const int bits) {
+  return Vec256<uint32_t>{_mm256_srl_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<uint64_t> ShiftRightSame(const Vec256<uint64_t> v,
+                                        const int bits) {
+  return Vec256<uint64_t>{_mm256_srl_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<uint8_t> ShiftRightSame(Vec256<uint8_t> v, const int bits) {
+  const Full256<uint8_t> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftRightSame(BitCast(d16, v), bits));
+  return shifted & Set(d8, 0xFF >> bits);
+}
+
+HWY_API Vec256<int16_t> ShiftRightSame(const Vec256<int16_t> v,
+                                       const int bits) {
+  return Vec256<int16_t>{_mm256_sra_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec256<int32_t> ShiftRightSame(const Vec256<int32_t> v,
+                                       const int bits) {
+  return Vec256<int32_t>{_mm256_sra_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec256<int64_t> ShiftRightSame(const Vec256<int64_t> v,
+                                       const int bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<int64_t>{_mm256_sra_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+#else
+  const Full256<int64_t> di;
+  const Full256<uint64_t> du;
+  const auto right = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto sign = ShiftLeftSame(BroadcastSignBit(v), 64 - bits);
+  return right | sign;
+#endif
+}
+
+HWY_API Vec256<int8_t> ShiftRightSame(Vec256<int8_t> v, const int bits) {
+  const Full256<int8_t> di;
+  const Full256<uint8_t> du;
+  const auto shifted = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> bits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ Negate
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec256<T> Neg(const Vec256<T> v) {
+  return Xor(v, SignBit(Full256<T>()));
+}
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec256<T> Neg(const Vec256<T> v) {
+  return Zero(Full256<T>()) - v;
+}
+
+// ------------------------------ Floating-point mul / div
+
+HWY_API Vec256<float> operator*(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_mul_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> operator*(const Vec256<double> a,
+                                 const Vec256<double> b) {
+  return Vec256<double>{_mm256_mul_pd(a.raw, b.raw)};
+}
+
+HWY_API Vec256<float> operator/(const Vec256<float> a, const Vec256<float> b) {
+  return Vec256<float>{_mm256_div_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> operator/(const Vec256<double> a,
+                                 const Vec256<double> b) {
+  return Vec256<double>{_mm256_div_pd(a.raw, b.raw)};
+}
+
+// Approximate reciprocal
+HWY_API Vec256<float> ApproximateReciprocal(const Vec256<float> v) {
+  return Vec256<float>{_mm256_rcp_ps(v.raw)};
+}
+
+// Absolute value of difference.
+HWY_API Vec256<float> AbsDiff(const Vec256<float> a, const Vec256<float> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+// Returns mul * x + add
+HWY_API Vec256<float> MulAdd(const Vec256<float> mul, const Vec256<float> x,
+                             const Vec256<float> add) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return mul * x + add;
+#else
+  return Vec256<float>{_mm256_fmadd_ps(mul.raw, x.raw, add.raw)};
+#endif
+}
+HWY_API Vec256<double> MulAdd(const Vec256<double> mul, const Vec256<double> x,
+                              const Vec256<double> add) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return mul * x + add;
+#else
+  return Vec256<double>{_mm256_fmadd_pd(mul.raw, x.raw, add.raw)};
+#endif
+}
+
+// Returns add - mul * x
+HWY_API Vec256<float> NegMulAdd(const Vec256<float> mul, const Vec256<float> x,
+                                const Vec256<float> add) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return add - mul * x;
+#else
+  return Vec256<float>{_mm256_fnmadd_ps(mul.raw, x.raw, add.raw)};
+#endif
+}
+HWY_API Vec256<double> NegMulAdd(const Vec256<double> mul,
+                                 const Vec256<double> x,
+                                 const Vec256<double> add) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return add - mul * x;
+#else
+  return Vec256<double>{_mm256_fnmadd_pd(mul.raw, x.raw, add.raw)};
+#endif
+}
+
+// Returns mul * x - sub
+HWY_API Vec256<float> MulSub(const Vec256<float> mul, const Vec256<float> x,
+                             const Vec256<float> sub) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return mul * x - sub;
+#else
+  return Vec256<float>{_mm256_fmsub_ps(mul.raw, x.raw, sub.raw)};
+#endif
+}
+HWY_API Vec256<double> MulSub(const Vec256<double> mul, const Vec256<double> x,
+                              const Vec256<double> sub) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return mul * x - sub;
+#else
+  return Vec256<double>{_mm256_fmsub_pd(mul.raw, x.raw, sub.raw)};
+#endif
+}
+
+// Returns -mul * x - sub
+HWY_API Vec256<float> NegMulSub(const Vec256<float> mul, const Vec256<float> x,
+                                const Vec256<float> sub) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return Neg(mul * x) - sub;
+#else
+  return Vec256<float>{_mm256_fnmsub_ps(mul.raw, x.raw, sub.raw)};
+#endif
+}
+HWY_API Vec256<double> NegMulSub(const Vec256<double> mul,
+                                 const Vec256<double> x,
+                                 const Vec256<double> sub) {
+#ifdef HWY_DISABLE_BMI2_FMA
+  return Neg(mul * x) - sub;
+#else
+  return Vec256<double>{_mm256_fnmsub_pd(mul.raw, x.raw, sub.raw)};
+#endif
+}
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+HWY_API Vec256<float> Sqrt(const Vec256<float> v) {
+  return Vec256<float>{_mm256_sqrt_ps(v.raw)};
+}
+HWY_API Vec256<double> Sqrt(const Vec256<double> v) {
+  return Vec256<double>{_mm256_sqrt_pd(v.raw)};
+}
+
+// Approximate reciprocal square root
+HWY_API Vec256<float> ApproximateReciprocalSqrt(const Vec256<float> v) {
+  return Vec256<float>{_mm256_rsqrt_ps(v.raw)};
+}
+
+// ------------------------------ Floating-point rounding
+
+// Toward nearest integer, tie to even
+HWY_API Vec256<float> Round(const Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec256<double> Round(const Vec256<double> v) {
+  return Vec256<double>{
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+
+// Toward zero, aka truncate
+HWY_API Vec256<float> Trunc(const Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec256<double> Trunc(const Vec256<double> v) {
+  return Vec256<double>{
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+
+// Toward +infinity, aka ceiling
+HWY_API Vec256<float> Ceil(const Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec256<double> Ceil(const Vec256<double> v) {
+  return Vec256<double>{
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+
+// Toward -infinity, aka floor
+HWY_API Vec256<float> Floor(const Vec256<float> v) {
+  return Vec256<float>{
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec256<double> Floor(const Vec256<double> v) {
+  return Vec256<double>{
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <typename T>
+HWY_API Vec256<T> Load(Full256<T> /* tag */, const T* HWY_RESTRICT aligned) {
+  return Vec256<T>{
+      _mm256_load_si256(reinterpret_cast<const __m256i*>(aligned))};
+}
+HWY_API Vec256<float> Load(Full256<float> /* tag */,
+                           const float* HWY_RESTRICT aligned) {
+  return Vec256<float>{_mm256_load_ps(aligned)};
+}
+HWY_API Vec256<double> Load(Full256<double> /* tag */,
+                            const double* HWY_RESTRICT aligned) {
+  return Vec256<double>{_mm256_load_pd(aligned)};
+}
+
+template <typename T>
+HWY_API Vec256<T> LoadU(Full256<T> /* tag */, const T* HWY_RESTRICT p) {
+  return Vec256<T>{_mm256_loadu_si256(reinterpret_cast<const __m256i*>(p))};
+}
+HWY_API Vec256<float> LoadU(Full256<float> /* tag */,
+                            const float* HWY_RESTRICT p) {
+  return Vec256<float>{_mm256_loadu_ps(p)};
+}
+HWY_API Vec256<double> LoadU(Full256<double> /* tag */,
+                             const double* HWY_RESTRICT p) {
+  return Vec256<double>{_mm256_loadu_pd(p)};
+}
+
+// Loads 128 bit and duplicates into both 128-bit halves. This avoids the
+// 3-cycle cost of moving data between 128-bit halves and avoids port 5.
+template <typename T>
+HWY_API Vec256<T> LoadDup128(Full256<T> /* tag */, const T* HWY_RESTRICT p) {
+#if HWY_LOADDUP_ASM
+  __m256i out;
+  asm("vbroadcasti128 %1, %[reg]" : [ reg ] "=x"(out) : "m"(p[0]));
+  return Vec256<T>{out};
+#else
+  return Vec256<T>{_mm256_broadcastsi128_si256(LoadU(Full128<T>(), p).raw)};
+#endif
+}
+HWY_API Vec256<float> LoadDup128(Full256<float> /* tag */,
+                                 const float* const HWY_RESTRICT p) {
+#if HWY_LOADDUP_ASM
+  __m256 out;
+  asm("vbroadcastf128 %1, %[reg]" : [ reg ] "=x"(out) : "m"(p[0]));
+  return Vec256<float>{out};
+#else
+  return Vec256<float>{_mm256_broadcast_ps(reinterpret_cast<const __m128*>(p))};
+#endif
+}
+HWY_API Vec256<double> LoadDup128(Full256<double> /* tag */,
+                                  const double* const HWY_RESTRICT p) {
+#if HWY_LOADDUP_ASM
+  __m256d out;
+  asm("vbroadcastf128 %1, %[reg]" : [ reg ] "=x"(out) : "m"(p[0]));
+  return Vec256<double>{out};
+#else
+  return Vec256<double>{
+      _mm256_broadcast_pd(reinterpret_cast<const __m128d*>(p))};
+#endif
+}
+
+// ------------------------------ Store
+
+template <typename T>
+HWY_API void Store(Vec256<T> v, Full256<T> /* tag */, T* HWY_RESTRICT aligned) {
+  _mm256_store_si256(reinterpret_cast<__m256i*>(aligned), v.raw);
+}
+HWY_API void Store(const Vec256<float> v, Full256<float> /* tag */,
+                   float* HWY_RESTRICT aligned) {
+  _mm256_store_ps(aligned, v.raw);
+}
+HWY_API void Store(const Vec256<double> v, Full256<double> /* tag */,
+                   double* HWY_RESTRICT aligned) {
+  _mm256_store_pd(aligned, v.raw);
+}
+
+template <typename T>
+HWY_API void StoreU(Vec256<T> v, Full256<T> /* tag */, T* HWY_RESTRICT p) {
+  _mm256_storeu_si256(reinterpret_cast<__m256i*>(p), v.raw);
+}
+HWY_API void StoreU(const Vec256<float> v, Full256<float> /* tag */,
+                    float* HWY_RESTRICT p) {
+  _mm256_storeu_ps(p, v.raw);
+}
+HWY_API void StoreU(const Vec256<double> v, Full256<double> /* tag */,
+                    double* HWY_RESTRICT p) {
+  _mm256_storeu_pd(p, v.raw);
+}
+
+// ------------------------------ Non-temporal stores
+
+template <typename T>
+HWY_API void Stream(Vec256<T> v, Full256<T> /* tag */,
+                    T* HWY_RESTRICT aligned) {
+  _mm256_stream_si256(reinterpret_cast<__m256i*>(aligned), v.raw);
+}
+HWY_API void Stream(const Vec256<float> v, Full256<float> /* tag */,
+                    float* HWY_RESTRICT aligned) {
+  _mm256_stream_ps(aligned, v.raw);
+}
+HWY_API void Stream(const Vec256<double> v, Full256<double> /* tag */,
+                    double* HWY_RESTRICT aligned) {
+  _mm256_stream_pd(aligned, v.raw);
+}
+
+// ------------------------------ Scatter
+
+// Work around warnings in the intrinsic definitions (passing -1 as a mask).
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+
+#if HWY_TARGET == HWY_AVX3
+namespace detail {
+
+template <typename T>
+HWY_API void ScatterOffset(hwy::SizeTag<4> /* tag */, Vec256<T> v,
+                           Full256<T> /* tag */, T* HWY_RESTRICT base,
+                           const Vec256<int32_t> offset) {
+  _mm256_i32scatter_epi32(base, offset.raw, v.raw, 1);
+}
+template <typename T>
+HWY_API void ScatterIndex(hwy::SizeTag<4> /* tag */, Vec256<T> v,
+                          Full256<T> /* tag */, T* HWY_RESTRICT base,
+                          const Vec256<int32_t> index) {
+  _mm256_i32scatter_epi32(base, index.raw, v.raw, 4);
+}
+
+template <typename T>
+HWY_API void ScatterOffset(hwy::SizeTag<8> /* tag */, Vec256<T> v,
+                           Full256<T> /* tag */, T* HWY_RESTRICT base,
+                           const Vec256<int64_t> offset) {
+  _mm256_i64scatter_epi64(base, offset.raw, v.raw, 1);
+}
+template <typename T>
+HWY_API void ScatterIndex(hwy::SizeTag<8> /* tag */, Vec256<T> v,
+                          Full256<T> /* tag */, T* HWY_RESTRICT base,
+                          const Vec256<int64_t> index) {
+  _mm256_i64scatter_epi64(base, index.raw, v.raw, 8);
+}
+
+}  // namespace detail
+
+template <typename T, typename Offset>
+HWY_API void ScatterOffset(Vec256<T> v, Full256<T> d, T* HWY_RESTRICT base,
+                           const Vec256<Offset> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+  return detail::ScatterOffset(hwy::SizeTag<sizeof(T)>(), v, d, base, offset);
+}
+template <typename T, typename Index>
+HWY_API void ScatterIndex(Vec256<T> v, Full256<T> d, T* HWY_RESTRICT base,
+                          const Vec256<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+  return detail::ScatterIndex(hwy::SizeTag<sizeof(T)>(), v, d, base, index);
+}
+
+template <>
+HWY_INLINE void ScatterOffset<float>(Vec256<float> v, Full256<float> /* tag */,
+                                     float* HWY_RESTRICT base,
+                                     const Vec256<int32_t> offset) {
+  _mm256_i32scatter_ps(base, offset.raw, v.raw, 1);
+}
+template <>
+HWY_INLINE void ScatterIndex<float>(Vec256<float> v, Full256<float> /* tag */,
+                                    float* HWY_RESTRICT base,
+                                    const Vec256<int32_t> index) {
+  _mm256_i32scatter_ps(base, index.raw, v.raw, 4);
+}
+
+template <>
+HWY_INLINE void ScatterOffset<double>(Vec256<double> v,
+                                      Full256<double> /* tag */,
+                                      double* HWY_RESTRICT base,
+                                      const Vec256<int64_t> offset) {
+  _mm256_i64scatter_pd(base, offset.raw, v.raw, 1);
+}
+template <>
+HWY_INLINE void ScatterIndex<double>(Vec256<double> v,
+                                     Full256<double> /* tag */,
+                                     double* HWY_RESTRICT base,
+                                     const Vec256<int64_t> index) {
+  _mm256_i64scatter_pd(base, index.raw, v.raw, 8);
+}
+
+#else
+
+template <typename T, typename Offset>
+HWY_API void ScatterOffset(Vec256<T> v, Full256<T> d, T* HWY_RESTRICT base,
+                           const Vec256<Offset> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+
+  constexpr size_t N = 32 / sizeof(T);
+  alignas(32) T lanes[N];
+  Store(v, d, lanes);
+
+  alignas(32) Offset offset_lanes[N];
+  Store(offset, Simd<Offset, N>(), offset_lanes);
+
+  uint8_t* base_bytes = reinterpret_cast<uint8_t*>(base);
+  for (size_t i = 0; i < N; ++i) {
+    CopyBytes<sizeof(T)>(&lanes[i], base_bytes + offset_lanes[i]);
+  }
+}
+
+template <typename T, typename Index>
+HWY_API void ScatterIndex(Vec256<T> v, Full256<T> d, T* HWY_RESTRICT base,
+                          const Vec256<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+
+  constexpr size_t N = 32 / sizeof(T);
+  alignas(32) T lanes[N];
+  Store(v, d, lanes);
+
+  alignas(32) Index index_lanes[N];
+  Store(index, Simd<Index, N>(), index_lanes);
+
+  for (size_t i = 0; i < N; ++i) {
+    base[index_lanes[i]] = lanes[i];
+  }
+}
+
+#endif
+
+// ------------------------------ Gather
+
+namespace detail {
+
+template <typename T>
+HWY_API Vec256<T> GatherOffset(hwy::SizeTag<4> /* tag */, Full256<T> /* tag */,
+                               const T* HWY_RESTRICT base,
+                               const Vec256<int32_t> offset) {
+  return Vec256<T>{_mm256_i32gather_epi32(
+      reinterpret_cast<const int32_t*>(base), offset.raw, 1)};
+}
+template <typename T>
+HWY_API Vec256<T> GatherIndex(hwy::SizeTag<4> /* tag */, Full256<T> /* tag */,
+                              const T* HWY_RESTRICT base,
+                              const Vec256<int32_t> index) {
+  return Vec256<T>{_mm256_i32gather_epi32(
+      reinterpret_cast<const int32_t*>(base), index.raw, 4)};
+}
+
+template <typename T>
+HWY_API Vec256<T> GatherOffset(hwy::SizeTag<8> /* tag */, Full256<T> /* tag */,
+                               const T* HWY_RESTRICT base,
+                               const Vec256<int64_t> offset) {
+  return Vec256<T>{_mm256_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), offset.raw, 1)};
+}
+template <typename T>
+HWY_API Vec256<T> GatherIndex(hwy::SizeTag<8> /* tag */, Full256<T> /* tag */,
+                              const T* HWY_RESTRICT base,
+                              const Vec256<int64_t> index) {
+  return Vec256<T>{_mm256_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), index.raw, 8)};
+}
+
+}  // namespace detail
+
+template <typename T, typename Offset>
+HWY_API Vec256<T> GatherOffset(Full256<T> d, const T* HWY_RESTRICT base,
+                               const Vec256<Offset> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+  return detail::GatherOffset(hwy::SizeTag<sizeof(T)>(), d, base, offset);
+}
+template <typename T, typename Index>
+HWY_API Vec256<T> GatherIndex(Full256<T> d, const T* HWY_RESTRICT base,
+                              const Vec256<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+  return detail::GatherIndex(hwy::SizeTag<sizeof(T)>(), d, base, index);
+}
+
+template <>
+HWY_INLINE Vec256<float> GatherOffset<float>(Full256<float> /* tag */,
+                                             const float* HWY_RESTRICT base,
+                                             const Vec256<int32_t> offset) {
+  return Vec256<float>{_mm256_i32gather_ps(base, offset.raw, 1)};
+}
+template <>
+HWY_INLINE Vec256<float> GatherIndex<float>(Full256<float> /* tag */,
+                                            const float* HWY_RESTRICT base,
+                                            const Vec256<int32_t> index) {
+  return Vec256<float>{_mm256_i32gather_ps(base, index.raw, 4)};
+}
+
+template <>
+HWY_INLINE Vec256<double> GatherOffset<double>(Full256<double> /* tag */,
+                                               const double* HWY_RESTRICT base,
+                                               const Vec256<int64_t> offset) {
+  return Vec256<double>{_mm256_i64gather_pd(base, offset.raw, 1)};
+}
+template <>
+HWY_INLINE Vec256<double> GatherIndex<double>(Full256<double> /* tag */,
+                                              const double* HWY_RESTRICT base,
+                                              const Vec256<int64_t> index) {
+  return Vec256<double>{_mm256_i64gather_pd(base, index.raw, 8)};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== SWIZZLE
+
+template <typename T>
+HWY_API T GetLane(const Vec256<T> v) {
+  return GetLane(LowerHalf(v));
+}
+
+// ------------------------------ Extract half
+
+template <typename T>
+HWY_API Vec128<T> LowerHalf(Vec256<T> v) {
+  return Vec128<T>{_mm256_castsi256_si128(v.raw)};
+}
+template <>
+HWY_INLINE Vec128<float> LowerHalf(Vec256<float> v) {
+  return Vec128<float>{_mm256_castps256_ps128(v.raw)};
+}
+template <>
+HWY_INLINE Vec128<double> LowerHalf(Vec256<double> v) {
+  return Vec128<double>{_mm256_castpd256_pd128(v.raw)};
+}
+
+template <typename T>
+HWY_API Vec128<T> UpperHalf(Vec256<T> v) {
+  return Vec128<T>{_mm256_extracti128_si256(v.raw, 1)};
+}
+template <>
+HWY_INLINE Vec128<float> UpperHalf(Vec256<float> v) {
+  return Vec128<float>{_mm256_extractf128_ps(v.raw, 1)};
+}
+template <>
+HWY_INLINE Vec128<double> UpperHalf(Vec256<double> v) {
+  return Vec128<double>{_mm256_extractf128_pd(v.raw, 1)};
+}
+
+// ------------------------------ ZeroExtendVector
+
+// Unfortunately the initial _mm256_castsi128_si256 intrinsic leaves the upper
+// bits undefined. Although it makes sense for them to be zero (VEX encoded
+// 128-bit instructions zero the upper lanes to avoid large penalties), a
+// compiler could decide to optimize out code that relies on this.
+//
+// The newer _mm256_zextsi128_si256 intrinsic fixes this by specifying the
+// zeroing, but it is not available on GCC until 10.1. For older GCC, we can
+// still obtain the desired code thanks to pattern recognition; note that the
+// expensive insert instruction is not actually generated, see
+// https://gcc.godbolt.org/z/1MKGaP.
+
+template <typename T>
+HWY_API Vec256<T> ZeroExtendVector(Vec128<T> lo) {
+#if !HWY_COMPILER_CLANG && HWY_COMPILER_GCC && (HWY_COMPILER_GCC < 1000)
+  return Vec256<T>{_mm256_inserti128_si256(_mm256_setzero_si256(), lo.raw, 0)};
+#else
+  return Vec256<T>{_mm256_zextsi128_si256(lo.raw)};
+#endif
+}
+template <>
+HWY_INLINE Vec256<float> ZeroExtendVector(Vec128<float> lo) {
+#if !HWY_COMPILER_CLANG && HWY_COMPILER_GCC && (HWY_COMPILER_GCC < 1000)
+  return Vec256<float>{_mm256_insertf128_ps(_mm256_setzero_ps(), lo.raw, 0)};
+#else
+  return Vec256<float>{_mm256_zextps128_ps256(lo.raw)};
+#endif
+}
+template <>
+HWY_INLINE Vec256<double> ZeroExtendVector(Vec128<double> lo) {
+#if !HWY_COMPILER_CLANG && HWY_COMPILER_GCC && (HWY_COMPILER_GCC < 1000)
+  return Vec256<double>{_mm256_insertf128_pd(_mm256_setzero_pd(), lo.raw, 0)};
+#else
+  return Vec256<double>{_mm256_zextpd128_pd256(lo.raw)};
+#endif
+}
+
+// ------------------------------ Combine
+
+template <typename T>
+HWY_API Vec256<T> Combine(Vec128<T> hi, Vec128<T> lo) {
+  const auto lo256 = ZeroExtendVector(lo);
+  return Vec256<T>{_mm256_inserti128_si256(lo256.raw, hi.raw, 1)};
+}
+template <>
+HWY_INLINE Vec256<float> Combine(Vec128<float> hi, Vec128<float> lo) {
+  const auto lo256 = ZeroExtendVector(lo);
+  return Vec256<float>{_mm256_insertf128_ps(lo256.raw, hi.raw, 1)};
+}
+template <>
+HWY_INLINE Vec256<double> Combine(Vec128<double> hi, Vec128<double> lo) {
+  const auto lo256 = ZeroExtendVector(lo);
+  return Vec256<double>{_mm256_insertf128_pd(lo256.raw, hi.raw, 1)};
+}
+
+// ------------------------------ Shift vector by constant #bytes
+
+// 0x01..0F, kBytes = 1 => 0x02..0F00
+template <int kBytes, typename T>
+HWY_API Vec256<T> ShiftLeftBytes(const Vec256<T> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  // This is the same operation as _mm256_bslli_epi128.
+  return Vec256<T>{_mm256_slli_si256(v.raw, kBytes)};
+}
+
+template <int kLanes, typename T>
+HWY_API Vec256<T> ShiftLeftLanes(const Vec256<T> v) {
+  const Full256<uint8_t> d8;
+  const Full256<T> d;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, typename T>
+HWY_API Vec256<T> ShiftRightBytes(const Vec256<T> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  // This is the same operation as _mm256_bsrli_epi128.
+  return Vec256<T>{_mm256_srli_si256(v.raw, kBytes)};
+}
+
+template <int kLanes, typename T>
+HWY_API Vec256<T> ShiftRightLanes(const Vec256<T> v) {
+  const Full256<uint8_t> d8;
+  const Full256<T> d;
+  return BitCast(d, ShiftRightBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// ------------------------------ Extract from 2x 128-bit at constant offset
+
+// Extracts 128 bits from <hi, lo> by skipping the least-significant kBytes.
+template <int kBytes, typename T>
+HWY_API Vec256<T> CombineShiftRightBytes(const Vec256<T> hi,
+                                         const Vec256<T> lo) {
+  const Full256<uint8_t> d8;
+  const Vec256<uint8_t> extracted_bytes{
+      _mm256_alignr_epi8(BitCast(d8, hi).raw, BitCast(d8, lo).raw, kBytes)};
+  return BitCast(Full256<T>(), extracted_bytes);
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+// Unsigned
+template <int kLane>
+HWY_API Vec256<uint16_t> Broadcast(const Vec256<uint16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m256i lo = _mm256_shufflelo_epi16(v.raw, (0x55 * kLane) & 0xFF);
+    return Vec256<uint16_t>{_mm256_unpacklo_epi64(lo, lo)};
+  } else {
+    const __m256i hi =
+        _mm256_shufflehi_epi16(v.raw, (0x55 * (kLane - 4)) & 0xFF);
+    return Vec256<uint16_t>{_mm256_unpackhi_epi64(hi, hi)};
+  }
+}
+template <int kLane>
+HWY_API Vec256<uint32_t> Broadcast(const Vec256<uint32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0x55 * kLane)};
+}
+template <int kLane>
+HWY_API Vec256<uint64_t> Broadcast(const Vec256<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec256<uint64_t>{_mm256_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44)};
+}
+
+// Signed
+template <int kLane>
+HWY_API Vec256<int16_t> Broadcast(const Vec256<int16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m256i lo = _mm256_shufflelo_epi16(v.raw, (0x55 * kLane) & 0xFF);
+    return Vec256<int16_t>{_mm256_unpacklo_epi64(lo, lo)};
+  } else {
+    const __m256i hi =
+        _mm256_shufflehi_epi16(v.raw, (0x55 * (kLane - 4)) & 0xFF);
+    return Vec256<int16_t>{_mm256_unpackhi_epi64(hi, hi)};
+  }
+}
+template <int kLane>
+HWY_API Vec256<int32_t> Broadcast(const Vec256<int32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0x55 * kLane)};
+}
+template <int kLane>
+HWY_API Vec256<int64_t> Broadcast(const Vec256<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec256<int64_t>{_mm256_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44)};
+}
+
+// Float
+template <int kLane>
+HWY_API Vec256<float> Broadcast(Vec256<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x55 * kLane)};
+}
+template <int kLane>
+HWY_API Vec256<double> Broadcast(const Vec256<double> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return Vec256<double>{_mm256_shuffle_pd(v.raw, v.raw, 15 * kLane)};
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let Vec256<int32_t> have lanes 7,6,5,4,3,2,1,0 (0 is
+// least-significant). Shuffle0321 rotates four-lane blocks one lane to the
+// right (the previous least-significant lane is now most-significant =>
+// 47650321). These could also be implemented via CombineShiftRightBytes but
+// the shuffle_abcd notation is more convenient.
+
+// Swap 32-bit halves in 64-bit halves.
+HWY_API Vec256<uint32_t> Shuffle2301(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0xB1)};
+}
+HWY_API Vec256<int32_t> Shuffle2301(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0xB1)};
+}
+HWY_API Vec256<float> Shuffle2301(const Vec256<float> v) {
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0xB1)};
+}
+
+// Swap 64-bit halves
+HWY_API Vec256<uint32_t> Shuffle1032(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec256<int32_t> Shuffle1032(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec256<float> Shuffle1032(const Vec256<float> v) {
+  // Shorter encoding than _mm256_permute_ps.
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x4E)};
+}
+HWY_API Vec256<uint64_t> Shuffle01(const Vec256<uint64_t> v) {
+  return Vec256<uint64_t>{_mm256_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec256<int64_t> Shuffle01(const Vec256<int64_t> v) {
+  return Vec256<int64_t>{_mm256_shuffle_epi32(v.raw, 0x4E)};
+}
+HWY_API Vec256<double> Shuffle01(const Vec256<double> v) {
+  // Shorter encoding than _mm256_permute_pd.
+  return Vec256<double>{_mm256_shuffle_pd(v.raw, v.raw, 5)};
+}
+
+// Rotate right 32 bits
+HWY_API Vec256<uint32_t> Shuffle0321(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0x39)};
+}
+HWY_API Vec256<int32_t> Shuffle0321(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0x39)};
+}
+HWY_API Vec256<float> Shuffle0321(const Vec256<float> v) {
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x39)};
+}
+// Rotate left 32 bits
+HWY_API Vec256<uint32_t> Shuffle2103(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0x93)};
+}
+HWY_API Vec256<int32_t> Shuffle2103(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0x93)};
+}
+HWY_API Vec256<float> Shuffle2103(const Vec256<float> v) {
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x93)};
+}
+
+// Reverse
+HWY_API Vec256<uint32_t> Shuffle0123(const Vec256<uint32_t> v) {
+  return Vec256<uint32_t>{_mm256_shuffle_epi32(v.raw, 0x1B)};
+}
+HWY_API Vec256<int32_t> Shuffle0123(const Vec256<int32_t> v) {
+  return Vec256<int32_t>{_mm256_shuffle_epi32(v.raw, 0x1B)};
+}
+HWY_API Vec256<float> Shuffle0123(const Vec256<float> v) {
+  return Vec256<float>{_mm256_shuffle_ps(v.raw, v.raw, 0x1B)};
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T>
+struct Indices256 {
+  __m256i raw;
+};
+
+template <typename T>
+HWY_API Indices256<T> SetTableIndices(const Full256<T>, const int32_t* idx) {
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER)
+  const size_t N = 32 / sizeof(T);
+  for (size_t i = 0; i < N; ++i) {
+    HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast<int32_t>(N));
+  }
+#endif
+  return Indices256<T>{LoadU(Full256<int32_t>(), idx).raw};
+}
+
+HWY_API Vec256<uint32_t> TableLookupLanes(const Vec256<uint32_t> v,
+                                          const Indices256<uint32_t> idx) {
+  return Vec256<uint32_t>{_mm256_permutevar8x32_epi32(v.raw, idx.raw)};
+}
+HWY_API Vec256<int32_t> TableLookupLanes(const Vec256<int32_t> v,
+                                         const Indices256<int32_t> idx) {
+  return Vec256<int32_t>{_mm256_permutevar8x32_epi32(v.raw, idx.raw)};
+}
+HWY_API Vec256<float> TableLookupLanes(const Vec256<float> v,
+                                       const Indices256<float> idx) {
+  return Vec256<float>{_mm256_permutevar8x32_ps(v.raw, idx.raw)};
+}
+
+// ------------------------------ Interleave lanes
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use ZipLower/Upper instead (also works with scalar).
+
+HWY_API Vec256<uint8_t> InterleaveLower(const Vec256<uint8_t> a,
+                                        const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> InterleaveLower(const Vec256<uint16_t> a,
+                                         const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> InterleaveLower(const Vec256<uint32_t> a,
+                                         const Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_unpacklo_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> InterleaveLower(const Vec256<uint64_t> a,
+                                         const Vec256<uint64_t> b) {
+  return Vec256<uint64_t>{_mm256_unpacklo_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec256<int8_t> InterleaveLower(const Vec256<int8_t> a,
+                                       const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> InterleaveLower(const Vec256<int16_t> a,
+                                        const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> InterleaveLower(const Vec256<int32_t> a,
+                                        const Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_unpacklo_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> InterleaveLower(const Vec256<int64_t> a,
+                                        const Vec256<int64_t> b) {
+  return Vec256<int64_t>{_mm256_unpacklo_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec256<float> InterleaveLower(const Vec256<float> a,
+                                      const Vec256<float> b) {
+  return Vec256<float>{_mm256_unpacklo_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> InterleaveLower(const Vec256<double> a,
+                                       const Vec256<double> b) {
+  return Vec256<double>{_mm256_unpacklo_pd(a.raw, b.raw)};
+}
+
+HWY_API Vec256<uint8_t> InterleaveUpper(const Vec256<uint8_t> a,
+                                        const Vec256<uint8_t> b) {
+  return Vec256<uint8_t>{_mm256_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint16_t> InterleaveUpper(const Vec256<uint16_t> a,
+                                         const Vec256<uint16_t> b) {
+  return Vec256<uint16_t>{_mm256_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> InterleaveUpper(const Vec256<uint32_t> a,
+                                         const Vec256<uint32_t> b) {
+  return Vec256<uint32_t>{_mm256_unpackhi_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> InterleaveUpper(const Vec256<uint64_t> a,
+                                         const Vec256<uint64_t> b) {
+  return Vec256<uint64_t>{_mm256_unpackhi_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec256<int8_t> InterleaveUpper(const Vec256<int8_t> a,
+                                       const Vec256<int8_t> b) {
+  return Vec256<int8_t>{_mm256_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int16_t> InterleaveUpper(const Vec256<int16_t> a,
+                                        const Vec256<int16_t> b) {
+  return Vec256<int16_t>{_mm256_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> InterleaveUpper(const Vec256<int32_t> a,
+                                        const Vec256<int32_t> b) {
+  return Vec256<int32_t>{_mm256_unpackhi_epi32(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> InterleaveUpper(const Vec256<int64_t> a,
+                                        const Vec256<int64_t> b) {
+  return Vec256<int64_t>{_mm256_unpackhi_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec256<float> InterleaveUpper(const Vec256<float> a,
+                                      const Vec256<float> b) {
+  return Vec256<float>{_mm256_unpackhi_ps(a.raw, b.raw)};
+}
+HWY_API Vec256<double> InterleaveUpper(const Vec256<double> a,
+                                       const Vec256<double> b) {
+  return Vec256<double>{_mm256_unpackhi_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Zip lanes
+
+// Same as interleave_*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+
+HWY_API Vec256<uint16_t> ZipLower(const Vec256<uint8_t> a,
+                                  const Vec256<uint8_t> b) {
+  return Vec256<uint16_t>{_mm256_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> ZipLower(const Vec256<uint16_t> a,
+                                  const Vec256<uint16_t> b) {
+  return Vec256<uint32_t>{_mm256_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> ZipLower(const Vec256<uint32_t> a,
+                                  const Vec256<uint32_t> b) {
+  return Vec256<uint64_t>{_mm256_unpacklo_epi32(a.raw, b.raw)};
+}
+
+HWY_API Vec256<int16_t> ZipLower(const Vec256<int8_t> a,
+                                 const Vec256<int8_t> b) {
+  return Vec256<int16_t>{_mm256_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> ZipLower(const Vec256<int16_t> a,
+                                 const Vec256<int16_t> b) {
+  return Vec256<int32_t>{_mm256_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> ZipLower(const Vec256<int32_t> a,
+                                 const Vec256<int32_t> b) {
+  return Vec256<int64_t>{_mm256_unpacklo_epi32(a.raw, b.raw)};
+}
+
+HWY_API Vec256<uint16_t> ZipUpper(const Vec256<uint8_t> a,
+                                  const Vec256<uint8_t> b) {
+  return Vec256<uint16_t>{_mm256_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<uint32_t> ZipUpper(const Vec256<uint16_t> a,
+                                  const Vec256<uint16_t> b) {
+  return Vec256<uint32_t>{_mm256_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<uint64_t> ZipUpper(const Vec256<uint32_t> a,
+                                  const Vec256<uint32_t> b) {
+  return Vec256<uint64_t>{_mm256_unpackhi_epi32(a.raw, b.raw)};
+}
+
+HWY_API Vec256<int16_t> ZipUpper(const Vec256<int8_t> a,
+                                 const Vec256<int8_t> b) {
+  return Vec256<int16_t>{_mm256_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec256<int32_t> ZipUpper(const Vec256<int16_t> a,
+                                 const Vec256<int16_t> b) {
+  return Vec256<int32_t>{_mm256_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec256<int64_t> ZipUpper(const Vec256<int32_t> a,
+                                 const Vec256<int32_t> b) {
+  return Vec256<int64_t>{_mm256_unpackhi_epi32(a.raw, b.raw)};
+}
+
+// ------------------------------ Blocks (LowerHalf, ZeroExtendVector)
+
+// _mm256_broadcastsi128_si256 has 7 cycle latency. _mm256_permute2x128_si256 is
+// slow on Zen1 (8 uops); we can avoid it for LowerLower and UpperLower, and on
+// UpperUpper at the cost of one extra cycle/instruction.
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <typename T>
+HWY_API Vec256<T> ConcatLowerLower(const Vec256<T> hi, const Vec256<T> lo) {
+  return Vec256<T>{_mm256_inserti128_si256(lo.raw, LowerHalf(hi).raw, 1)};
+}
+template <>
+HWY_INLINE Vec256<float> ConcatLowerLower(const Vec256<float> hi,
+                                          const Vec256<float> lo) {
+  return Vec256<float>{_mm256_insertf128_ps(lo.raw, LowerHalf(hi).raw, 1)};
+}
+template <>
+HWY_INLINE Vec256<double> ConcatLowerLower(const Vec256<double> hi,
+                                           const Vec256<double> lo) {
+  return Vec256<double>{_mm256_insertf128_pd(lo.raw, LowerHalf(hi).raw, 1)};
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves / swap blocks)
+template <typename T>
+HWY_API Vec256<T> ConcatLowerUpper(const Vec256<T> hi, const Vec256<T> lo) {
+  return Vec256<T>{_mm256_permute2x128_si256(lo.raw, hi.raw, 0x21)};
+}
+template <>
+HWY_INLINE Vec256<float> ConcatLowerUpper(const Vec256<float> hi,
+                                          const Vec256<float> lo) {
+  return Vec256<float>{_mm256_permute2f128_ps(lo.raw, hi.raw, 0x21)};
+}
+template <>
+HWY_INLINE Vec256<double> ConcatLowerUpper(const Vec256<double> hi,
+                                           const Vec256<double> lo) {
+  return Vec256<double>{_mm256_permute2f128_pd(lo.raw, hi.raw, 0x21)};
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <typename T>
+HWY_API Vec256<T> ConcatUpperLower(const Vec256<T> hi, const Vec256<T> lo) {
+  return Vec256<T>{_mm256_blend_epi32(hi.raw, lo.raw, 0x0F)};
+}
+template <>
+HWY_INLINE Vec256<float> ConcatUpperLower(const Vec256<float> hi,
+                                          const Vec256<float> lo) {
+  return Vec256<float>{_mm256_blend_ps(hi.raw, lo.raw, 0x0F)};
+}
+template <>
+HWY_INLINE Vec256<double> ConcatUpperLower(const Vec256<double> hi,
+                                           const Vec256<double> lo) {
+  return Vec256<double>{_mm256_blend_pd(hi.raw, lo.raw, 3)};
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <typename T>
+HWY_API Vec256<T> ConcatUpperUpper(const Vec256<T> hi, const Vec256<T> lo) {
+  return ConcatUpperLower(hi, ZeroExtendVector(UpperHalf(lo)));
+}
+
+// ------------------------------ Odd/even lanes
+
+namespace detail {
+
+template <typename T>
+HWY_API Vec256<T> OddEven(hwy::SizeTag<1> /* tag */, const Vec256<T> a,
+                          const Vec256<T> b) {
+  const Full256<T> d;
+  const Full256<uint8_t> d8;
+  alignas(32) constexpr uint8_t mask[16] = {0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0,
+                                            0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0};
+  return IfThenElse(MaskFromVec(BitCast(d, LoadDup128(d8, mask))), b, a);
+}
+template <typename T>
+HWY_API Vec256<T> OddEven(hwy::SizeTag<2> /* tag */, const Vec256<T> a,
+                          const Vec256<T> b) {
+  return Vec256<T>{_mm256_blend_epi16(a.raw, b.raw, 0x55)};
+}
+template <typename T>
+HWY_API Vec256<T> OddEven(hwy::SizeTag<4> /* tag */, const Vec256<T> a,
+                          const Vec256<T> b) {
+  return Vec256<T>{_mm256_blend_epi32(a.raw, b.raw, 0x55)};
+}
+template <typename T>
+HWY_API Vec256<T> OddEven(hwy::SizeTag<8> /* tag */, const Vec256<T> a,
+                          const Vec256<T> b) {
+  return Vec256<T>{_mm256_blend_epi32(a.raw, b.raw, 0x33)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec256<T> OddEven(const Vec256<T> a, const Vec256<T> b) {
+  return detail::OddEven(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+template <>
+HWY_INLINE Vec256<float> OddEven<float>(const Vec256<float> a,
+                                        const Vec256<float> b) {
+  return Vec256<float>{_mm256_blend_ps(a.raw, b.raw, 0x55)};
+}
+
+template <>
+HWY_INLINE Vec256<double> OddEven<double>(const Vec256<double> a,
+                                          const Vec256<double> b) {
+  return Vec256<double>{_mm256_blend_pd(a.raw, b.raw, 5)};
+}
+
+// ------------------------------ Shuffle bytes with variable indices
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes, i.e.
+// lane indices in [0, 16).
+template <typename T>
+HWY_API Vec256<T> TableLookupBytes(const Vec256<T> bytes,
+                                   const Vec256<T> from) {
+  return Vec256<T>{_mm256_shuffle_epi8(bytes.raw, from.raw)};
+}
+
+// ------------------------------ Shl (Mul, ZipLower)
+
+#if HWY_TARGET != HWY_AVX3
+namespace detail {
+
+// Returns 2^v for use as per-lane multipliers to emulate 16-bit shifts.
+template <typename T, HWY_IF_LANE_SIZE(T, 2)>
+HWY_API Vec256<MakeUnsigned<T>> Pow2(const Vec256<T> v) {
+  const Full256<T> d;
+  const Full256<float> df;
+  const auto zero = Zero(d);
+  // Move into exponent (this u16 will become the upper half of an f32)
+  const auto exp = ShiftLeft<23 - 16>(v);
+  const auto upper = exp + Set(d, 0x3F80);  // upper half of 1.0f
+  // Insert 0 into lower halves for reinterpreting as binary32.
+  const auto f0 = ZipLower(zero, upper);
+  const auto f1 = ZipUpper(zero, upper);
+  // Do not use ConvertTo because it checks for overflow, which is redundant
+  // because we only care about v in [0, 16).
+  const Vec256<int32_t> bits0{_mm256_cvttps_epi32(BitCast(df, f0).raw)};
+  const Vec256<int32_t> bits1{_mm256_cvttps_epi32(BitCast(df, f1).raw)};
+  return Vec256<MakeUnsigned<T>>{_mm256_packus_epi32(bits0.raw, bits1.raw)};
+}
+
+}  // namespace detail
+#endif  // HWY_TARGET != HWY_AVX3
+
+HWY_API Vec256<uint16_t> operator<<(const Vec256<uint16_t> v,
+                                    const Vec256<uint16_t> bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<uint16_t>{_mm256_sllv_epi16(v.raw, bits.raw)};
+#else
+  return v * detail::Pow2(bits);
+#endif
+}
+
+HWY_API Vec256<uint32_t> operator<<(const Vec256<uint32_t> v,
+                                    const Vec256<uint32_t> bits) {
+  return Vec256<uint32_t>{_mm256_sllv_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec256<uint64_t> operator<<(const Vec256<uint64_t> v,
+                                    const Vec256<uint64_t> bits) {
+  return Vec256<uint64_t>{_mm256_sllv_epi64(v.raw, bits.raw)};
+}
+
+// Signed left shift is the same as unsigned.
+template <typename T, HWY_IF_SIGNED(T)>
+HWY_API Vec256<T> operator<<(const Vec256<T> v, const Vec256<T> bits) {
+  const Full256<T> di;
+  const Full256<MakeUnsigned<T>> du;
+  return BitCast(di, BitCast(du, v) << BitCast(du, bits));
+}
+
+// ------------------------------ Shr (MulHigh, IfThenElse, Not)
+
+HWY_API Vec256<uint16_t> operator>>(const Vec256<uint16_t> v,
+                                    const Vec256<uint16_t> bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<uint16_t>{_mm256_srlv_epi16(v.raw, bits.raw)};
+#else
+  const Full256<uint16_t> d;
+  // For bits=0, we cannot mul by 2^16, so fix the result later.
+  const auto out = MulHigh(v, detail::Pow2(Set(d, 16) - bits));
+  // Replace output with input where bits == 0.
+  return IfThenElse(bits == Zero(d), v, out);
+#endif
+}
+
+HWY_API Vec256<uint32_t> operator>>(const Vec256<uint32_t> v,
+                                    const Vec256<uint32_t> bits) {
+  return Vec256<uint32_t>{_mm256_srlv_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec256<uint64_t> operator>>(const Vec256<uint64_t> v,
+                                    const Vec256<uint64_t> bits) {
+  return Vec256<uint64_t>{_mm256_srlv_epi64(v.raw, bits.raw)};
+}
+
+HWY_API Vec256<int16_t> operator>>(const Vec256<int16_t> v,
+                                   const Vec256<int16_t> bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<int16_t>{_mm256_srav_epi16(v.raw, bits.raw)};
+#else
+  return detail::SignedShr(Full256<int16_t>(), v, bits);
+#endif
+}
+
+HWY_API Vec256<int32_t> operator>>(const Vec256<int32_t> v,
+                                   const Vec256<int32_t> bits) {
+  return Vec256<int32_t>{_mm256_srav_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec256<int64_t> operator>>(const Vec256<int64_t> v,
+                                   const Vec256<int64_t> bits) {
+#if HWY_TARGET == HWY_AVX3
+  return Vec256<int64_t>{_mm256_srav_epi64(v.raw, bits.raw)};
+#else
+  return detail::SignedShr(Full256<int64_t>(), v, bits);
+#endif
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+HWY_API Vec256<float> PromoteTo(Full256<float> /* tag */,
+                                const Vec128<float16_t, 8> v) {
+  return Vec256<float>{_mm256_cvtph_ps(v.raw)};
+}
+
+HWY_API Vec256<double> PromoteTo(Full256<double> /* tag */,
+                                 const Vec128<float, 4> v) {
+  return Vec256<double>{_mm256_cvtps_pd(v.raw)};
+}
+
+HWY_API Vec256<double> PromoteTo(Full256<double> /* tag */,
+                                 const Vec128<int32_t, 4> v) {
+  return Vec256<double>{_mm256_cvtepi32_pd(v.raw)};
+}
+
+// Unsigned: zero-extend.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then Zip* would be faster.
+HWY_API Vec256<uint16_t> PromoteTo(Full256<uint16_t> /* tag */,
+                                   Vec128<uint8_t> v) {
+  return Vec256<uint16_t>{_mm256_cvtepu8_epi16(v.raw)};
+}
+HWY_API Vec256<uint32_t> PromoteTo(Full256<uint32_t> /* tag */,
+                                   Vec128<uint8_t, 8> v) {
+  return Vec256<uint32_t>{_mm256_cvtepu8_epi32(v.raw)};
+}
+HWY_API Vec256<int16_t> PromoteTo(Full256<int16_t> /* tag */,
+                                  Vec128<uint8_t> v) {
+  return Vec256<int16_t>{_mm256_cvtepu8_epi16(v.raw)};
+}
+HWY_API Vec256<int32_t> PromoteTo(Full256<int32_t> /* tag */,
+                                  Vec128<uint8_t, 8> v) {
+  return Vec256<int32_t>{_mm256_cvtepu8_epi32(v.raw)};
+}
+HWY_API Vec256<uint32_t> PromoteTo(Full256<uint32_t> /* tag */,
+                                   Vec128<uint16_t> v) {
+  return Vec256<uint32_t>{_mm256_cvtepu16_epi32(v.raw)};
+}
+HWY_API Vec256<int32_t> PromoteTo(Full256<int32_t> /* tag */,
+                                  Vec128<uint16_t> v) {
+  return Vec256<int32_t>{_mm256_cvtepu16_epi32(v.raw)};
+}
+HWY_API Vec256<uint64_t> PromoteTo(Full256<uint64_t> /* tag */,
+                                   Vec128<uint32_t> v) {
+  return Vec256<uint64_t>{_mm256_cvtepu32_epi64(v.raw)};
+}
+
+// Signed: replicate sign bit.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then ZipUpper/lo followed by
+// signed shift would be faster.
+HWY_API Vec256<int16_t> PromoteTo(Full256<int16_t> /* tag */,
+                                  Vec128<int8_t> v) {
+  return Vec256<int16_t>{_mm256_cvtepi8_epi16(v.raw)};
+}
+HWY_API Vec256<int32_t> PromoteTo(Full256<int32_t> /* tag */,
+                                  Vec128<int8_t, 8> v) {
+  return Vec256<int32_t>{_mm256_cvtepi8_epi32(v.raw)};
+}
+HWY_API Vec256<int32_t> PromoteTo(Full256<int32_t> /* tag */,
+                                  Vec128<int16_t> v) {
+  return Vec256<int32_t>{_mm256_cvtepi16_epi32(v.raw)};
+}
+HWY_API Vec256<int64_t> PromoteTo(Full256<int64_t> /* tag */,
+                                  Vec128<int32_t> v) {
+  return Vec256<int64_t>{_mm256_cvtepi32_epi64(v.raw)};
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+HWY_API Vec128<uint16_t> DemoteTo(Full128<uint16_t> /* tag */,
+                                  const Vec256<int32_t> v) {
+  const __m256i u16 = _mm256_packus_epi32(v.raw, v.raw);
+  // Concatenating lower halves of both 128-bit blocks afterward is more
+  // efficient than an extra input with low block = high block of v.
+  return Vec128<uint16_t>{
+      _mm256_castsi256_si128(_mm256_permute4x64_epi64(u16, 0x88))};
+}
+
+HWY_API Vec128<int16_t> DemoteTo(Full128<int16_t> /* tag */,
+                                 const Vec256<int32_t> v) {
+  const __m256i i16 = _mm256_packs_epi32(v.raw, v.raw);
+  return Vec128<int16_t>{
+      _mm256_castsi256_si128(_mm256_permute4x64_epi64(i16, 0x88))};
+}
+
+HWY_API Vec128<uint8_t, 8> DemoteTo(Simd<uint8_t, 8> /* tag */,
+                                    const Vec256<int32_t> v) {
+  const __m256i u16_blocks = _mm256_packus_epi32(v.raw, v.raw);
+  // Concatenate lower 64 bits of each 128-bit block
+  const __m256i u16_concat = _mm256_permute4x64_epi64(u16_blocks, 0x88);
+  const __m128i u16 = _mm256_castsi256_si128(u16_concat);
+  // packus treats the input as signed; we want unsigned. Clear the MSB to get
+  // unsigned saturation to u8.
+  const __m128i i16 = _mm_and_si128(u16, _mm_set1_epi16(0x7FFF));
+  return Vec128<uint8_t, 8>{_mm_packus_epi16(i16, i16)};
+}
+
+HWY_API Vec128<uint8_t> DemoteTo(Full128<uint8_t> /* tag */,
+                                 const Vec256<int16_t> v) {
+  const __m256i u8 = _mm256_packus_epi16(v.raw, v.raw);
+  return Vec128<uint8_t>{
+      _mm256_castsi256_si128(_mm256_permute4x64_epi64(u8, 0x88))};
+}
+
+HWY_API Vec128<int8_t, 8> DemoteTo(Simd<int8_t, 8> /* tag */,
+                                   const Vec256<int32_t> v) {
+  const __m256i i16_blocks = _mm256_packs_epi32(v.raw, v.raw);
+  // Concatenate lower 64 bits of each 128-bit block
+  const __m256i i16_concat = _mm256_permute4x64_epi64(i16_blocks, 0x88);
+  const __m128i i16 = _mm256_castsi256_si128(i16_concat);
+  return Vec128<int8_t, 8>{_mm_packs_epi16(i16, i16)};
+}
+
+HWY_API Vec128<int8_t> DemoteTo(Full128<int8_t> /* tag */,
+                                const Vec256<int16_t> v) {
+  const __m256i i8 = _mm256_packs_epi16(v.raw, v.raw);
+  return Vec128<int8_t>{
+      _mm256_castsi256_si128(_mm256_permute4x64_epi64(i8, 0x88))};
+}
+
+  // Avoid "value of intrinsic immediate argument '8' is out of range '0 - 7'".
+  // 8 is the correct value of _MM_FROUND_NO_EXC, which is allowed here.
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4556, ignored "-Wsign-conversion")
+
+HWY_API Vec128<float16_t> DemoteTo(Full128<float16_t> /* tag */,
+                                   const Vec256<float> v) {
+  return Vec128<float16_t>{_mm256_cvtps_ph(v.raw, _MM_FROUND_NO_EXC)};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+HWY_API Vec128<float> DemoteTo(Full128<float> /* tag */,
+                               const Vec256<double> v) {
+  return Vec128<float>{_mm256_cvtpd_ps(v.raw)};
+}
+
+HWY_API Vec128<int32_t> DemoteTo(Full128<int32_t> /* tag */,
+                                 const Vec256<double> v) {
+  const auto clamped = detail::ClampF64ToI32Max(Full256<double>(), v);
+  return Vec128<int32_t>{_mm256_cvttpd_epi32(clamped.raw)};
+}
+
+// For already range-limited input [0, 255].
+HWY_API Vec128<uint8_t, 8> U8FromU32(const Vec256<uint32_t> v) {
+  const Full256<uint32_t> d32;
+  alignas(32) static constexpr uint32_t k8From32[8] = {
+      0x0C080400u, ~0u, ~0u, ~0u, ~0u, 0x0C080400u, ~0u, ~0u};
+  // Place first four bytes in lo[0], remaining 4 in hi[1].
+  const auto quad = TableLookupBytes(v, Load(d32, k8From32));
+  // Interleave both quadruplets - OR instead of unpack reduces port5 pressure.
+  const auto lo = LowerHalf(quad);
+  const auto hi = UpperHalf(quad);
+  const auto pair = LowerHalf(lo | hi);
+  return BitCast(Simd<uint8_t, 8>(), pair);
+}
+
+// ------------------------------ Integer <=> fp (ShiftRight, OddEven)
+
+HWY_API Vec256<float> ConvertTo(Full256<float> /* tag */,
+                                const Vec256<int32_t> v) {
+  return Vec256<float>{_mm256_cvtepi32_ps(v.raw)};
+}
+
+HWY_API Vec256<double> ConvertTo(Full256<double> dd, const Vec256<int64_t> v) {
+#if HWY_TARGET == HWY_AVX3
+  (void)dd;
+  return Vec256<double>{_mm256_cvtepi64_pd(v.raw)};
+#else
+  // Based on wim's approach (https://stackoverflow.com/questions/41144668/)
+  const Repartition<uint32_t, decltype(dd)> d32;
+  const Repartition<uint64_t, decltype(dd)> d64;
+
+  // Toggle MSB of lower 32-bits and insert exponent for 2^84 + 2^63
+  const auto k84_63 = Set(d64, 0x4530000080000000ULL);
+  const auto v_upper = BitCast(dd, ShiftRight<32>(BitCast(d64, v)) ^ k84_63);
+
+  // Exponent is 2^52, lower 32 bits from v (=> 32-bit OddEven)
+  const auto k52 = Set(d32, 0x43300000);
+  const auto v_lower = BitCast(dd, OddEven(k52, BitCast(d32, v)));
+
+  const auto k84_63_52 = BitCast(dd, Set(d64, 0x4530000080100000ULL));
+  return (v_upper - k84_63_52) + v_lower;  // order matters!
+#endif
+}
+
+// Truncates (rounds toward zero).
+HWY_API Vec256<int32_t> ConvertTo(Full256<int32_t> d, const Vec256<float> v) {
+  return detail::FixConversionOverflow(d, v, _mm256_cvttps_epi32(v.raw));
+}
+
+HWY_API Vec256<int64_t> ConvertTo(Full256<int64_t> di, const Vec256<double> v) {
+#if HWY_TARGET == HWY_AVX3
+  return detail::FixConversionOverflow(di, v, _mm256_cvttpd_epi64(v.raw));
+#else
+  alignas(32) double lanes_d[4];
+  Store(v, Full256<double>(), lanes_d);
+  alignas(32) int64_t lanes_i[4];
+  for (size_t i = 0; i < 4; ++i) {
+    if (lanes_d[i] >= static_cast<double>(LimitsMax<int64_t>())) {
+      lanes_i[i] = LimitsMax<int64_t>();
+    } else if (lanes_d[i] <= static_cast<double>(LimitsMin<int64_t>())) {
+      lanes_i[i] = LimitsMin<int64_t>();
+    } else {
+      lanes_i[i] = static_cast<int64_t>(lanes_d[i]);
+    }
+  }
+  return Load(di, lanes_i);
+#endif
+}
+
+HWY_API Vec256<int32_t> NearestInt(const Vec256<float> v) {
+  const Full256<int32_t> di;
+  return detail::FixConversionOverflow(di, v, _mm256_cvtps_epi32(v.raw));
+}
+
+// ================================================== MISC
+
+// Returns a vector with lane i=[0, N) set to "first" + i.
+template <typename T, typename T2>
+Vec256<T> Iota(const Full256<T> d, const T2 first) {
+  HWY_ALIGN T lanes[32 / sizeof(T)];
+  for (size_t i = 0; i < 32 / sizeof(T); ++i) {
+    lanes[i] = static_cast<T>(first + static_cast<T2>(i));
+  }
+  return Load(d, lanes);
+}
+
+// ------------------------------ Mask
+
+namespace detail {
+
+template <typename T>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<1> /*tag*/, const Mask256<T> mask) {
+  const Full256<T> d;
+  const Full256<uint8_t> d8;
+  const auto sign_bits = BitCast(d8, VecFromMask(d, mask)).raw;
+  // Prevent sign-extension of 32-bit masks because the intrinsic returns int.
+  return static_cast<uint32_t>(_mm256_movemask_epi8(sign_bits));
+}
+
+template <typename T>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<2> /*tag*/, const Mask256<T> mask) {
+#if HWY_ARCH_X86_64
+  const uint64_t sign_bits8 = BitsFromMask(hwy::SizeTag<1>(), mask);
+  // Skip the bits from the lower byte of each u16 (better not to use the
+  // same packs_epi16 as SSE4, because that requires an extra swizzle here).
+  return _pext_u64(sign_bits8, 0xAAAAAAAAull);
+#else
+  // Slow workaround for 32-bit builds, which lack _pext_u64.
+  // Remove useless lower half of each u16 while preserving the sign bit.
+  // Bytes [0, 8) and [16, 24) have the same sign bits as the input lanes.
+  const auto sign_bits = _mm256_packs_epi16(mask.raw, _mm256_setzero_si256());
+  // Move odd qwords (value zero) to top so they don't affect the mask value.
+  const auto compressed =
+      _mm256_permute4x64_epi64(sign_bits, _MM_SHUFFLE(3, 1, 2, 0));
+  return static_cast<unsigned>(_mm256_movemask_epi8(compressed));
+#endif  // HWY_ARCH_X86_64
+}
+
+template <typename T>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<4> /*tag*/, const Mask256<T> mask) {
+  const Full256<T> d;
+  const Full256<float> df;
+  const auto sign_bits = BitCast(df, VecFromMask(d, mask)).raw;
+  return static_cast<unsigned>(_mm256_movemask_ps(sign_bits));
+}
+
+template <typename T>
+HWY_API uint64_t BitsFromMask(hwy::SizeTag<8> /*tag*/, const Mask256<T> mask) {
+  const Full256<T> d;
+  const Full256<double> df;
+  const auto sign_bits = BitCast(df, VecFromMask(d, mask)).raw;
+  return static_cast<unsigned>(_mm256_movemask_pd(sign_bits));
+}
+
+template <typename T>
+HWY_API uint64_t BitsFromMask(const Mask256<T> mask) {
+  return BitsFromMask(hwy::SizeTag<sizeof(T)>(), mask);
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_INLINE size_t StoreMaskBits(const Mask256<T> mask, uint8_t* p) {
+  const uint64_t bits = detail::BitsFromMask(mask);
+  const size_t kNumBytes = (4 + sizeof(T) - 1) / sizeof(T);
+  CopyBytes<kNumBytes>(&bits, p);
+  return kNumBytes;
+}
+
+template <typename T>
+HWY_API bool AllFalse(const Mask256<T> mask) {
+  // Cheaper than PTEST, which is 2 uop / 3L.
+  return detail::BitsFromMask(mask) == 0;
+}
+
+template <typename T>
+HWY_API bool AllTrue(const Mask256<T> mask) {
+  constexpr uint64_t kAllBits = (1ull << (32 / sizeof(T))) - 1;
+  return detail::BitsFromMask(mask) == kAllBits;
+}
+
+template <typename T>
+HWY_API size_t CountTrue(const Mask256<T> mask) {
+  return PopCount(detail::BitsFromMask(mask));
+}
+
+// ------------------------------ Compress
+
+namespace detail {
+
+HWY_INLINE Vec256<uint32_t> Idx32x8FromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 256);
+  const Full256<uint32_t> d32;
+
+  // We need a masked Iota(). With 8 lanes, there are 256 combinations and a LUT
+  // of SetTableIndices would require 8 KiB, a large part of L1D. The other
+  // alternative is _pext_u64, but this is extremely slow on Zen2 (18 cycles)
+  // and unavailable in 32-bit builds. We instead compress each index into 4
+  // bits, for a total of 1 KiB.
+  alignas(16) constexpr uint32_t packed_array[256] = {
+      0x00000000, 0x00000000, 0x00000001, 0x00000010, 0x00000002, 0x00000020,
+      0x00000021, 0x00000210, 0x00000003, 0x00000030, 0x00000031, 0x00000310,
+      0x00000032, 0x00000320, 0x00000321, 0x00003210, 0x00000004, 0x00000040,
+      0x00000041, 0x00000410, 0x00000042, 0x00000420, 0x00000421, 0x00004210,
+      0x00000043, 0x00000430, 0x00000431, 0x00004310, 0x00000432, 0x00004320,
+      0x00004321, 0x00043210, 0x00000005, 0x00000050, 0x00000051, 0x00000510,
+      0x00000052, 0x00000520, 0x00000521, 0x00005210, 0x00000053, 0x00000530,
+      0x00000531, 0x00005310, 0x00000532, 0x00005320, 0x00005321, 0x00053210,
+      0x00000054, 0x00000540, 0x00000541, 0x00005410, 0x00000542, 0x00005420,
+      0x00005421, 0x00054210, 0x00000543, 0x00005430, 0x00005431, 0x00054310,
+      0x00005432, 0x00054320, 0x00054321, 0x00543210, 0x00000006, 0x00000060,
+      0x00000061, 0x00000610, 0x00000062, 0x00000620, 0x00000621, 0x00006210,
+      0x00000063, 0x00000630, 0x00000631, 0x00006310, 0x00000632, 0x00006320,
+      0x00006321, 0x00063210, 0x00000064, 0x00000640, 0x00000641, 0x00006410,
+      0x00000642, 0x00006420, 0x00006421, 0x00064210, 0x00000643, 0x00006430,
+      0x00006431, 0x00064310, 0x00006432, 0x00064320, 0x00064321, 0x00643210,
+      0x00000065, 0x00000650, 0x00000651, 0x00006510, 0x00000652, 0x00006520,
+      0x00006521, 0x00065210, 0x00000653, 0x00006530, 0x00006531, 0x00065310,
+      0x00006532, 0x00065320, 0x00065321, 0x00653210, 0x00000654, 0x00006540,
+      0x00006541, 0x00065410, 0x00006542, 0x00065420, 0x00065421, 0x00654210,
+      0x00006543, 0x00065430, 0x00065431, 0x00654310, 0x00065432, 0x00654320,
+      0x00654321, 0x06543210, 0x00000007, 0x00000070, 0x00000071, 0x00000710,
+      0x00000072, 0x00000720, 0x00000721, 0x00007210, 0x00000073, 0x00000730,
+      0x00000731, 0x00007310, 0x00000732, 0x00007320, 0x00007321, 0x00073210,
+      0x00000074, 0x00000740, 0x00000741, 0x00007410, 0x00000742, 0x00007420,
+      0x00007421, 0x00074210, 0x00000743, 0x00007430, 0x00007431, 0x00074310,
+      0x00007432, 0x00074320, 0x00074321, 0x00743210, 0x00000075, 0x00000750,
+      0x00000751, 0x00007510, 0x00000752, 0x00007520, 0x00007521, 0x00075210,
+      0x00000753, 0x00007530, 0x00007531, 0x00075310, 0x00007532, 0x00075320,
+      0x00075321, 0x00753210, 0x00000754, 0x00007540, 0x00007541, 0x00075410,
+      0x00007542, 0x00075420, 0x00075421, 0x00754210, 0x00007543, 0x00075430,
+      0x00075431, 0x00754310, 0x00075432, 0x00754320, 0x00754321, 0x07543210,
+      0x00000076, 0x00000760, 0x00000761, 0x00007610, 0x00000762, 0x00007620,
+      0x00007621, 0x00076210, 0x00000763, 0x00007630, 0x00007631, 0x00076310,
+      0x00007632, 0x00076320, 0x00076321, 0x00763210, 0x00000764, 0x00007640,
+      0x00007641, 0x00076410, 0x00007642, 0x00076420, 0x00076421, 0x00764210,
+      0x00007643, 0x00076430, 0x00076431, 0x00764310, 0x00076432, 0x00764320,
+      0x00764321, 0x07643210, 0x00000765, 0x00007650, 0x00007651, 0x00076510,
+      0x00007652, 0x00076520, 0x00076521, 0x00765210, 0x00007653, 0x00076530,
+      0x00076531, 0x00765310, 0x00076532, 0x00765320, 0x00765321, 0x07653210,
+      0x00007654, 0x00076540, 0x00076541, 0x00765410, 0x00076542, 0x00765420,
+      0x00765421, 0x07654210, 0x00076543, 0x00765430, 0x00765431, 0x07654310,
+      0x00765432, 0x07654320, 0x07654321, 0x76543210};
+
+  // No need to mask because _mm256_permutevar8x32_epi32 ignores bits 3..31.
+  // Just shift each copy of the 32 bit LUT to extract its 4-bit fields.
+  // If broadcasting 32-bit from memory incurs the 3-cycle block-crossing
+  // latency, it may be faster to use LoadDup128 and PSHUFB.
+  const auto packed = Set(d32, packed_array[mask_bits]);
+  alignas(32) constexpr uint32_t shifts[8] = {0, 4, 8, 12, 16, 20, 24, 28};
+  return packed >> Load(d32, shifts);
+}
+
+HWY_INLINE Vec256<uint32_t> Idx64x4FromBits(const uint64_t mask_bits) {
+  HWY_DASSERT(mask_bits < 16);
+
+  const Full256<uint32_t> d32;
+
+  // For 64-bit, we still need 32-bit indices because there is no 64-bit
+  // permutevar, but there are only 4 lanes, so we can afford to skip the
+  // unpacking and load the entire index vector directly.
+  alignas(32) constexpr uint32_t packed_array[16 * 8] = {
+      0, 1, 0, 1, 0, 1, 0, 1, /**/ 0, 1, 0, 1, 0, 1, 0, 1,  //
+      2, 3, 0, 1, 0, 1, 0, 1, /**/ 0, 1, 2, 3, 0, 1, 0, 1,  //
+      4, 5, 0, 1, 0, 1, 0, 1, /**/ 0, 1, 4, 5, 0, 1, 0, 1,  //
+      2, 3, 4, 5, 0, 1, 0, 1, /**/ 0, 1, 2, 3, 4, 5, 0, 1,  //
+      6, 7, 0, 1, 0, 1, 0, 1, /**/ 0, 1, 6, 7, 0, 1, 0, 1,  //
+      2, 3, 6, 7, 0, 1, 0, 1, /**/ 0, 1, 2, 3, 6, 7, 0, 1,  //
+      4, 5, 6, 7, 0, 1, 0, 1, /**/ 0, 1, 4, 5, 6, 7, 0, 1,
+      2, 3, 4, 5, 6, 7, 0, 1, /**/ 0, 1, 2, 3, 4, 5, 6, 7};
+  return Load(d32, packed_array + 8 * mask_bits);
+}
+
+// Helper functions called by both Compress and CompressStore - avoids a
+// redundant BitsFromMask in the latter.
+
+template <typename T>
+HWY_API Vec256<T> Compress(hwy::SizeTag<4> /*tag*/, Vec256<T> v,
+                           const uint64_t mask_bits) {
+  const auto vu = BitCast(Full256<uint32_t>(), v);
+#if HWY_TARGET == HWY_AVX3
+  const __m256i ret =
+      _mm256_maskz_compress_epi32(static_cast<__mmask8>(mask_bits), vu.raw);
+#else
+  const Vec256<uint32_t> idx = detail::Idx32x8FromBits(mask_bits);
+  const __m256i ret = _mm256_permutevar8x32_epi32(vu.raw, idx.raw);
+#endif
+  return BitCast(Full256<T>(), Vec256<uint32_t>{ret});
+}
+
+template <typename T>
+HWY_API Vec256<T> Compress(hwy::SizeTag<8> /*tag*/, Vec256<T> v,
+                           const uint64_t mask_bits) {
+  const auto vu = BitCast(Full256<uint64_t>(), v);
+#if HWY_TARGET == HWY_AVX3
+  const __m256i ret =
+      _mm256_maskz_compress_epi64(static_cast<__mmask8>(mask_bits), vu.raw);
+#else
+  const Vec256<uint32_t> idx = detail::Idx64x4FromBits(mask_bits);
+  const __m256i ret = _mm256_permutevar8x32_epi32(vu.raw, idx.raw);
+#endif
+  return BitCast(Full256<T>(), Vec256<uint64_t>{ret});
+}
+
+// Otherwise, defined in x86_512-inl.h so it can use wider vectors.
+#if HWY_TARGET != HWY_AVX3
+
+// LUTs are infeasible for 2^16 possible masks. Promoting to 32-bit and using
+// the native Compress is probably more efficient than 2 LUTs.
+template <typename T>
+HWY_API Vec256<T> Compress(hwy::SizeTag<2> /*tag*/, Vec256<T> v,
+                           const uint64_t mask_bits) {
+  using D = Full256<T>;
+  const Rebind<uint16_t, D> du;
+  const Repartition<int32_t, D> dw;
+  const auto vu16 = BitCast(du, v);  // (required for float16_t inputs)
+  const auto promoted0 = PromoteTo(dw, LowerHalf(vu16));
+  const auto promoted1 = PromoteTo(dw, UpperHalf(vu16));
+
+  const uint64_t mask_bits0 = mask_bits & 0xFF;
+  const uint64_t mask_bits1 = mask_bits >> 8;
+  const auto compressed0 = Compress(hwy::SizeTag<4>(), promoted0, mask_bits0);
+  const auto compressed1 = Compress(hwy::SizeTag<4>(), promoted1, mask_bits1);
+
+  const Half<decltype(du)> dh;
+  const auto demoted0 = ZeroExtendVector(DemoteTo(dh, compressed0));
+  const auto demoted1 = ZeroExtendVector(DemoteTo(dh, compressed1));
+
+  const size_t count0 = PopCount(mask_bits0);
+  // Now combine by shifting demoted1 up. AVX2 lacks VPERMW, so start with
+  // VPERMD for shifting at 4 byte granularity.
+  alignas(32) constexpr int32_t iota4[16] = {0, 0, 0, 0, 0, 0, 0, 0,
+                                             0, 1, 2, 3, 4, 5, 6, 7};
+  const auto indices = SetTableIndices(dw, iota4 + 8 - count0 / 2);
+  const auto shift1_multiple4 =
+      BitCast(du, TableLookupLanes(BitCast(dw, demoted1), indices));
+
+  // Whole-register unconditional shift by 2 bytes.
+  // TODO(janwas): slow on AMD, use 2 shifts + permq + OR instead?
+  const __m256i lo_zz = _mm256_permute2x128_si256(shift1_multiple4.raw,
+                                                  shift1_multiple4.raw, 0x08);
+  const auto shift1_multiple2 =
+      Vec256<uint16_t>{_mm256_alignr_epi8(shift1_multiple4.raw, lo_zz, 14)};
+
+  // Make the shift conditional on the lower bit of count0.
+  const auto m_odd = TestBit(Set(du, count0), Set(du, 1));
+  const auto shifted1 = IfThenElse(m_odd, shift1_multiple2, shift1_multiple4);
+
+  // Blend the lower and shifted upper parts.
+  constexpr uint16_t on = 0xFFFF;
+  alignas(32) constexpr uint16_t lower_lanes[32] = {HWY_REP4(on), HWY_REP4(on),
+                                                    HWY_REP4(on), HWY_REP4(on)};
+  const auto m_lower = MaskFromVec(LoadU(du, lower_lanes + 16 - count0));
+  return BitCast(D(), IfThenElse(m_lower, demoted0, shifted1));
+}
+
+#endif  // HWY_TARGET != HWY_AVX3
+
+}  // namespace detail
+
+// Otherwise, defined in x86_512-inl.h after detail::Compress.
+#if HWY_TARGET != HWY_AVX3
+
+template <typename T>
+HWY_API Vec256<T> Compress(Vec256<T> v, const Mask256<T> mask) {
+  return detail::Compress(hwy::SizeTag<sizeof(T)>(), v,
+                          detail::BitsFromMask(mask));
+}
+
+// ------------------------------ CompressStore
+
+template <typename T>
+HWY_API size_t CompressStore(Vec256<T> v, const Mask256<T> mask, Full256<T> d,
+                             T* HWY_RESTRICT aligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  // NOTE: it is tempting to split inputs into two halves for 16-bit lanes, but
+  // using StoreU to concatenate the results would cause page faults if
+  // `aligned` is the last valid vector. Instead rely on in-register splicing.
+  Store(detail::Compress(hwy::SizeTag<sizeof(T)>(), v, mask_bits), d, aligned);
+  return PopCount(mask_bits);
+}
+
+#endif  // HWY_TARGET != HWY_AVX3
+
+// ------------------------------ StoreInterleaved3 (CombineShiftRightBytes,
+// TableLookupBytes, ConcatUpperLower)
+
+HWY_API void StoreInterleaved3(const Vec256<uint8_t> v0,
+                               const Vec256<uint8_t> v1,
+                               const Vec256<uint8_t> v2, Full256<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  const auto k5 = Set(d, 5);
+  const auto k6 = Set(d, 6);
+
+  // Shuffle (v0,v1,v2) vector bytes to (MSB on left): r5, bgr[4:0].
+  // 0x80 so lanes to be filled from other vectors are 0 for blending.
+  alignas(16) static constexpr uint8_t tbl_r0[16] = {
+      0, 0x80, 0x80, 1, 0x80, 0x80, 2, 0x80, 0x80,  //
+      3, 0x80, 0x80, 4, 0x80, 0x80, 5};
+  alignas(16) static constexpr uint8_t tbl_g0[16] = {
+      0x80, 0, 0x80, 0x80, 1, 0x80,  //
+      0x80, 2, 0x80, 0x80, 3, 0x80, 0x80, 4, 0x80, 0x80};
+  const auto shuf_r0 = LoadDup128(d, tbl_r0);
+  const auto shuf_g0 = LoadDup128(d, tbl_g0);  // cannot reuse r0 due to 5
+  const auto shuf_b0 = CombineShiftRightBytes<15>(shuf_g0, shuf_g0);
+  const auto r0 = TableLookupBytes(v0, shuf_r0);  // 5..4..3..2..1..0
+  const auto g0 = TableLookupBytes(v1, shuf_g0);  // ..4..3..2..1..0.
+  const auto b0 = TableLookupBytes(v2, shuf_b0);  // .4..3..2..1..0..
+  const auto interleaved_10_00 = r0 | g0 | b0;
+
+  // Second vector: g10,r10, bgr[9:6], b5,g5
+  const auto shuf_r1 = shuf_b0 + k6;  // .A..9..8..7..6..
+  const auto shuf_g1 = shuf_r0 + k5;  // A..9..8..7..6..5
+  const auto shuf_b1 = shuf_g0 + k5;  // ..9..8..7..6..5.
+  const auto r1 = TableLookupBytes(v0, shuf_r1);
+  const auto g1 = TableLookupBytes(v1, shuf_g1);
+  const auto b1 = TableLookupBytes(v2, shuf_b1);
+  const auto interleaved_15_05 = r1 | g1 | b1;
+
+  // We want to write the lower halves of the interleaved vectors, then the
+  // upper halves. We could obtain 10_05 and 15_0A via ConcatUpperLower, but
+  // that would require two ununaligned stores. For the lower halves, we can
+  // merge two 128-bit stores for the same swizzling cost:
+  const auto out0 = ConcatLowerLower(interleaved_15_05, interleaved_10_00);
+  StoreU(out0, d, unaligned + 0 * 32);
+
+  // Third vector: bgr[15:11], b10
+  const auto shuf_r2 = shuf_b1 + k6;  // ..F..E..D..C..B.
+  const auto shuf_g2 = shuf_r1 + k5;  // .F..E..D..C..B..
+  const auto shuf_b2 = shuf_g1 + k5;  // F..E..D..C..B..A
+  const auto r2 = TableLookupBytes(v0, shuf_r2);
+  const auto g2 = TableLookupBytes(v1, shuf_g2);
+  const auto b2 = TableLookupBytes(v2, shuf_b2);
+  const auto interleaved_1A_0A = r2 | g2 | b2;
+
+  const auto out1 = ConcatUpperLower(interleaved_10_00, interleaved_1A_0A);
+  StoreU(out1, d, unaligned + 1 * 32);
+
+  const auto out2 = ConcatUpperUpper(interleaved_1A_0A, interleaved_15_05);
+  StoreU(out2, d, unaligned + 2 * 32);
+}
+
+// ------------------------------ StoreInterleaved4
+
+HWY_API void StoreInterleaved4(const Vec256<uint8_t> v0,
+                               const Vec256<uint8_t> v1,
+                               const Vec256<uint8_t> v2,
+                               const Vec256<uint8_t> v3, Full256<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // let a,b,c,d denote v0..3.
+  const auto ba0 = ZipLower(v0, v1);  // b7 a7 .. b0 a0
+  const auto dc0 = ZipLower(v2, v3);  // d7 c7 .. d0 c0
+  const auto ba8 = ZipUpper(v0, v1);
+  const auto dc8 = ZipUpper(v2, v3);
+  const auto dcba_0 = ZipLower(ba0, dc0);  // d..a13 d..a10 | d..a03 d..a00
+  const auto dcba_4 = ZipUpper(ba0, dc0);  // d..a17 d..a14 | d..a07 d..a04
+  const auto dcba_8 = ZipLower(ba8, dc8);  // d..a1B d..a18 | d..a0B d..a08
+  const auto dcba_C = ZipUpper(ba8, dc8);  // d..a1F d..a1C | d..a0F d..a0C
+  // Write lower halves, then upper. vperm2i128 is slow on Zen1 but we can
+  // efficiently combine two lower halves into 256 bits:
+  const auto out0 = BitCast(d, ConcatLowerLower(dcba_4, dcba_0));
+  const auto out1 = BitCast(d, ConcatLowerLower(dcba_C, dcba_8));
+  StoreU(out0, d, unaligned + 0 * 32);
+  StoreU(out1, d, unaligned + 1 * 32);
+  const auto out2 = BitCast(d, ConcatUpperUpper(dcba_4, dcba_0));
+  const auto out3 = BitCast(d, ConcatUpperUpper(dcba_C, dcba_8));
+  StoreU(out2, d, unaligned + 2 * 32);
+  StoreU(out3, d, unaligned + 3 * 32);
+}
+
+// ------------------------------ Reductions
+
+namespace detail {
+
+// Returns sum{lane[i]} in each lane. "v3210" is a replicated 128-bit block.
+// Same logic as x86/128.h, but with Vec256 arguments.
+template <typename T>
+HWY_API Vec256<T> SumOfLanes(hwy::SizeTag<4> /* tag */, const Vec256<T> v3210) {
+  const auto v1032 = Shuffle1032(v3210);
+  const auto v31_20_31_20 = v3210 + v1032;
+  const auto v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return v20_31_20_31 + v31_20_31_20;
+}
+template <typename T>
+HWY_API Vec256<T> MinOfLanes(hwy::SizeTag<4> /* tag */, const Vec256<T> v3210) {
+  const auto v1032 = Shuffle1032(v3210);
+  const auto v31_20_31_20 = Min(v3210, v1032);
+  const auto v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Min(v20_31_20_31, v31_20_31_20);
+}
+template <typename T>
+HWY_API Vec256<T> MaxOfLanes(hwy::SizeTag<4> /* tag */, const Vec256<T> v3210) {
+  const auto v1032 = Shuffle1032(v3210);
+  const auto v31_20_31_20 = Max(v3210, v1032);
+  const auto v20_31_20_31 = Shuffle0321(v31_20_31_20);
+  return Max(v20_31_20_31, v31_20_31_20);
+}
+
+template <typename T>
+HWY_API Vec256<T> SumOfLanes(hwy::SizeTag<8> /* tag */, const Vec256<T> v10) {
+  const auto v01 = Shuffle01(v10);
+  return v10 + v01;
+}
+template <typename T>
+HWY_API Vec256<T> MinOfLanes(hwy::SizeTag<8> /* tag */, const Vec256<T> v10) {
+  const auto v01 = Shuffle01(v10);
+  return Min(v10, v01);
+}
+template <typename T>
+HWY_API Vec256<T> MaxOfLanes(hwy::SizeTag<8> /* tag */, const Vec256<T> v10) {
+  const auto v01 = Shuffle01(v10);
+  return Max(v10, v01);
+}
+
+}  // namespace detail
+
+// Supported for {uif}32x8, {uif}64x4. Returns the sum in each lane.
+template <typename T>
+HWY_API Vec256<T> SumOfLanes(const Vec256<T> vHL) {
+  const Vec256<T> vLH = ConcatLowerUpper(vHL, vHL);
+  return detail::SumOfLanes(hwy::SizeTag<sizeof(T)>(), vLH + vHL);
+}
+template <typename T>
+HWY_API Vec256<T> MinOfLanes(const Vec256<T> vHL) {
+  const Vec256<T> vLH = ConcatLowerUpper(vHL, vHL);
+  return detail::MinOfLanes(hwy::SizeTag<sizeof(T)>(), Min(vLH, vHL));
+}
+template <typename T>
+HWY_API Vec256<T> MaxOfLanes(const Vec256<T> vHL) {
+  const Vec256<T> vLH = ConcatLowerUpper(vHL, vHL);
+  return detail::MaxOfLanes(hwy::SizeTag<sizeof(T)>(), Max(vLH, vHL));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/x86_512-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/x86_512-inl.h
new file mode 100644
index 0000000000..fe34146fa9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/ops/x86_512-inl.h
@@ -0,0 +1,3113 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// 512-bit AVX512 vectors and operations.
+// External include guard in highway.h - see comment there.
+
+// WARNING: most operations do not cross 128-bit block boundaries. In
+// particular, "Broadcast", pack and zip behavior may be surprising.
+
+#include <immintrin.h>  // AVX2+
+#if defined(_MSC_VER) && defined(__clang__)
+// Including <immintrin.h> should be enough, but Clang's headers helpfully skip
+// including these headers when _MSC_VER is defined, like when using clang-cl.
+// Include these directly here.
+#include <smmintrin.h>
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#include <f16cintrin.h>
+#include <fmaintrin.h>
+#include <avx512fintrin.h>
+#include <avx512vlintrin.h>
+#include <avx512bwintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512vlbwintrin.h>
+#include <avx512vldqintrin.h>
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+// For half-width vectors. Already includes base.h and shared-inl.h.
+#include "hwy/ops/x86_256-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+template <typename T>
+struct Raw512 {
+  using type = __m512i;
+};
+template <>
+struct Raw512<float> {
+  using type = __m512;
+};
+template <>
+struct Raw512<double> {
+  using type = __m512d;
+};
+
+template <typename T>
+using Full512 = Simd<T, 64 / sizeof(T)>;
+
+template <typename T>
+class Vec512 {
+  using Raw = typename Raw512<T>::type;
+
+ public:
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  HWY_INLINE Vec512& operator*=(const Vec512 other) {
+    return *this = (*this * other);
+  }
+  HWY_INLINE Vec512& operator/=(const Vec512 other) {
+    return *this = (*this / other);
+  }
+  HWY_INLINE Vec512& operator+=(const Vec512 other) {
+    return *this = (*this + other);
+  }
+  HWY_INLINE Vec512& operator-=(const Vec512 other) {
+    return *this = (*this - other);
+  }
+  HWY_INLINE Vec512& operator&=(const Vec512 other) {
+    return *this = (*this & other);
+  }
+  HWY_INLINE Vec512& operator|=(const Vec512 other) {
+    return *this = (*this | other);
+  }
+  HWY_INLINE Vec512& operator^=(const Vec512 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+// Template arg: sizeof(lane type)
+template <size_t size>
+struct RawMask512 {};
+template <>
+struct RawMask512<1> {
+  using type = __mmask64;
+};
+template <>
+struct RawMask512<2> {
+  using type = __mmask32;
+};
+template <>
+struct RawMask512<4> {
+  using type = __mmask16;
+};
+template <>
+struct RawMask512<8> {
+  using type = __mmask8;
+};
+
+// Mask register: one bit per lane.
+template <typename T>
+class Mask512 {
+ public:
+  using Raw = typename RawMask512<sizeof(T)>::type;
+  Raw raw;
+};
+
+// ------------------------------ BitCast
+
+namespace detail {
+
+HWY_API __m512i BitCastToInteger(__m512i v) { return v; }
+HWY_API __m512i BitCastToInteger(__m512 v) { return _mm512_castps_si512(v); }
+HWY_API __m512i BitCastToInteger(__m512d v) { return _mm512_castpd_si512(v); }
+
+template <typename T>
+HWY_API Vec512<uint8_t> BitCastToByte(Vec512<T> v) {
+  return Vec512<uint8_t>{BitCastToInteger(v.raw)};
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromInteger512 {
+  HWY_INLINE __m512i operator()(__m512i v) { return v; }
+};
+template <>
+struct BitCastFromInteger512<float> {
+  HWY_INLINE __m512 operator()(__m512i v) { return _mm512_castsi512_ps(v); }
+};
+template <>
+struct BitCastFromInteger512<double> {
+  HWY_INLINE __m512d operator()(__m512i v) { return _mm512_castsi512_pd(v); }
+};
+
+template <typename T>
+HWY_API Vec512<T> BitCastFromByte(Full512<T> /* tag */, Vec512<uint8_t> v) {
+  return Vec512<T>{BitCastFromInteger512<T>()(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T, typename FromT>
+HWY_API Vec512<T> BitCast(Full512<T> d, Vec512<FromT> v) {
+  return detail::BitCastFromByte(d, detail::BitCastToByte(v));
+}
+
+// ------------------------------ Set
+
+// Returns an all-zero vector.
+template <typename T>
+HWY_API Vec512<T> Zero(Full512<T> /* tag */) {
+  return Vec512<T>{_mm512_setzero_si512()};
+}
+HWY_API Vec512<float> Zero(Full512<float> /* tag */) {
+  return Vec512<float>{_mm512_setzero_ps()};
+}
+HWY_API Vec512<double> Zero(Full512<double> /* tag */) {
+  return Vec512<double>{_mm512_setzero_pd()};
+}
+
+// Returns a vector with all lanes set to "t".
+HWY_API Vec512<uint8_t> Set(Full512<uint8_t> /* tag */, const uint8_t t) {
+  return Vec512<uint8_t>{_mm512_set1_epi8(static_cast<char>(t))};  // NOLINT
+}
+HWY_API Vec512<uint16_t> Set(Full512<uint16_t> /* tag */, const uint16_t t) {
+  return Vec512<uint16_t>{_mm512_set1_epi16(static_cast<short>(t))};  // NOLINT
+}
+HWY_API Vec512<uint32_t> Set(Full512<uint32_t> /* tag */, const uint32_t t) {
+  return Vec512<uint32_t>{_mm512_set1_epi32(static_cast<int>(t))};
+}
+HWY_API Vec512<uint64_t> Set(Full512<uint64_t> /* tag */, const uint64_t t) {
+  return Vec512<uint64_t>{
+      _mm512_set1_epi64(static_cast<long long>(t))};  // NOLINT
+}
+HWY_API Vec512<int8_t> Set(Full512<int8_t> /* tag */, const int8_t t) {
+  return Vec512<int8_t>{_mm512_set1_epi8(static_cast<char>(t))};  // NOLINT
+}
+HWY_API Vec512<int16_t> Set(Full512<int16_t> /* tag */, const int16_t t) {
+  return Vec512<int16_t>{_mm512_set1_epi16(static_cast<short>(t))};  // NOLINT
+}
+HWY_API Vec512<int32_t> Set(Full512<int32_t> /* tag */, const int32_t t) {
+  return Vec512<int32_t>{_mm512_set1_epi32(t)};
+}
+HWY_API Vec512<int64_t> Set(Full512<int64_t> /* tag */, const int64_t t) {
+  return Vec512<int64_t>{
+      _mm512_set1_epi64(static_cast<long long>(t))};  // NOLINT
+}
+HWY_API Vec512<float> Set(Full512<float> /* tag */, const float t) {
+  return Vec512<float>{_mm512_set1_ps(t)};
+}
+HWY_API Vec512<double> Set(Full512<double> /* tag */, const double t) {
+  return Vec512<double>{_mm512_set1_pd(t)};
+}
+
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// Returns a vector with uninitialized elements.
+template <typename T>
+HWY_API Vec512<T> Undefined(Full512<T> /* tag */) {
+  // Available on Clang 6.0, GCC 6.2, ICC 16.03, MSVC 19.14. All but ICC
+  // generate an XOR instruction.
+  return Vec512<T>{_mm512_undefined_epi32()};
+}
+HWY_API Vec512<float> Undefined(Full512<float> /* tag */) {
+  return Vec512<float>{_mm512_undefined_ps()};
+}
+HWY_API Vec512<double> Undefined(Full512<double> /* tag */) {
+  return Vec512<double>{_mm512_undefined_pd()};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== LOGICAL
+
+// ------------------------------ Not
+
+template <typename T>
+HWY_API Vec512<T> Not(const Vec512<T> v) {
+  using TU = MakeUnsigned<T>;
+  const __m512i vu = BitCast(Full512<TU>(), v).raw;
+  return BitCast(Full512<T>(),
+                 Vec512<TU>{_mm512_ternarylogic_epi32(vu, vu, vu, 0x55)});
+}
+
+// ------------------------------ And
+
+template <typename T>
+HWY_API Vec512<T> And(const Vec512<T> a, const Vec512<T> b) {
+  return Vec512<T>{_mm512_and_si512(a.raw, b.raw)};
+}
+
+HWY_API Vec512<float> And(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_and_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> And(const Vec512<double> a, const Vec512<double> b) {
+  return Vec512<double>{_mm512_and_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ AndNot
+
+// Returns ~not_mask & mask.
+template <typename T>
+HWY_API Vec512<T> AndNot(const Vec512<T> not_mask, const Vec512<T> mask) {
+  return Vec512<T>{_mm512_andnot_si512(not_mask.raw, mask.raw)};
+}
+HWY_API Vec512<float> AndNot(const Vec512<float> not_mask,
+                             const Vec512<float> mask) {
+  return Vec512<float>{_mm512_andnot_ps(not_mask.raw, mask.raw)};
+}
+HWY_API Vec512<double> AndNot(const Vec512<double> not_mask,
+                              const Vec512<double> mask) {
+  return Vec512<double>{_mm512_andnot_pd(not_mask.raw, mask.raw)};
+}
+
+// ------------------------------ Or
+
+template <typename T>
+HWY_API Vec512<T> Or(const Vec512<T> a, const Vec512<T> b) {
+  return Vec512<T>{_mm512_or_si512(a.raw, b.raw)};
+}
+
+HWY_API Vec512<float> Or(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_or_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> Or(const Vec512<double> a, const Vec512<double> b) {
+  return Vec512<double>{_mm512_or_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Xor
+
+template <typename T>
+HWY_API Vec512<T> Xor(const Vec512<T> a, const Vec512<T> b) {
+  return Vec512<T>{_mm512_xor_si512(a.raw, b.raw)};
+}
+
+HWY_API Vec512<float> Xor(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_xor_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> Xor(const Vec512<double> a, const Vec512<double> b) {
+  return Vec512<double>{_mm512_xor_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Operator overloads (internal-only if float)
+
+template <typename T>
+HWY_API Vec512<T> operator&(const Vec512<T> a, const Vec512<T> b) {
+  return And(a, b);
+}
+
+template <typename T>
+HWY_API Vec512<T> operator|(const Vec512<T> a, const Vec512<T> b) {
+  return Or(a, b);
+}
+
+template <typename T>
+HWY_API Vec512<T> operator^(const Vec512<T> a, const Vec512<T> b) {
+  return Xor(a, b);
+}
+
+// ------------------------------ CopySign
+
+template <typename T>
+HWY_API Vec512<T> CopySign(const Vec512<T> magn, const Vec512<T> sign) {
+  static_assert(IsFloat<T>(), "Only makes sense for floating-point");
+
+  const Full512<T> d;
+  const auto msb = SignBit(d);
+
+  const Rebind<MakeUnsigned<T>, decltype(d)> du;
+  // Truth table for msb, magn, sign | bitwise msb ? sign : mag
+  //                  0    0     0   |  0
+  //                  0    0     1   |  0
+  //                  0    1     0   |  1
+  //                  0    1     1   |  1
+  //                  1    0     0   |  0
+  //                  1    0     1   |  1
+  //                  1    1     0   |  0
+  //                  1    1     1   |  1
+  // The lane size does not matter because we are not using predication.
+  const __m512i out = _mm512_ternarylogic_epi32(
+      BitCast(du, msb).raw, BitCast(du, magn).raw, BitCast(du, sign).raw, 0xAC);
+  return BitCast(d, decltype(Zero(du)){out});
+}
+
+template <typename T>
+HWY_API Vec512<T> CopySignToAbs(const Vec512<T> abs, const Vec512<T> sign) {
+  // AVX3 can also handle abs < 0, so no extra action needed.
+  return CopySign(abs, sign);
+}
+
+// ------------------------------ FirstN
+
+// Possibilities for constructing a bitmask of N ones:
+// - kshift* only consider the lowest byte of the shift count, so they would
+//   not correctly handle large n.
+// - Scalar shifts >= 64 are UB.
+// - BZHI has the desired semantics; we assume AVX-512 implies BMI2. However,
+//   we need 64-bit masks for sizeof(T) == 1, so special-case 32-bit builds.
+
+#if HWY_ARCH_X86_32
+namespace detail {
+
+// 32 bit mask is sufficient for lane size >= 2.
+template <typename T, HWY_IF_NOT_LANE_SIZE(T, 1)>
+HWY_API Mask512<T> FirstN(size_t n) {
+  using Bits = typename Mask512<T>::Raw;
+  return Mask512<T>{static_cast<Bits>(_bzhi_u32(~uint32_t(0), n))};
+}
+
+template <typename T, HWY_IF_LANE_SIZE(T, 1)>
+HWY_API Mask512<T> FirstN(size_t n) {
+  const uint64_t bits = n < 64 ? ((1ULL << n) - 1) : ~uint64_t(0);
+  return Mask512<T>{static_cast<__mmask64>(bits)};
+}
+
+}  // namespace detail
+#endif  // HWY_ARCH_X86_32
+
+template <typename T>
+HWY_API Mask512<T> FirstN(const Full512<T> /*tag*/, size_t n) {
+#if HWY_ARCH_X86_64
+  using Bits = typename Mask512<T>::Raw;
+  return Mask512<T>{static_cast<Bits>(_bzhi_u64(~uint64_t(0), n))};
+#else
+  return detail::FirstN<T>(n);
+#endif  // HWY_ARCH_X86_64
+}
+
+// ------------------------------ IfThenElse
+
+// Returns mask ? b : a.
+
+namespace detail {
+
+// Templates for signed/unsigned integer of a particular size.
+template <typename T>
+HWY_API Vec512<T> IfThenElse(hwy::SizeTag<1> /* tag */, const Mask512<T> mask,
+                             const Vec512<T> yes, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_mov_epi8(no.raw, mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_API Vec512<T> IfThenElse(hwy::SizeTag<2> /* tag */, const Mask512<T> mask,
+                             const Vec512<T> yes, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_mov_epi16(no.raw, mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_API Vec512<T> IfThenElse(hwy::SizeTag<4> /* tag */, const Mask512<T> mask,
+                             const Vec512<T> yes, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_mov_epi32(no.raw, mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_API Vec512<T> IfThenElse(hwy::SizeTag<8> /* tag */, const Mask512<T> mask,
+                             const Vec512<T> yes, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_mov_epi64(no.raw, mask.raw, yes.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec512<T> IfThenElse(const Mask512<T> mask, const Vec512<T> yes,
+                             const Vec512<T> no) {
+  return detail::IfThenElse(hwy::SizeTag<sizeof(T)>(), mask, yes, no);
+}
+template <>
+HWY_INLINE Vec512<float> IfThenElse(const Mask512<float> mask,
+                                    const Vec512<float> yes,
+                                    const Vec512<float> no) {
+  return Vec512<float>{_mm512_mask_mov_ps(no.raw, mask.raw, yes.raw)};
+}
+template <>
+HWY_INLINE Vec512<double> IfThenElse(const Mask512<double> mask,
+                                     const Vec512<double> yes,
+                                     const Vec512<double> no) {
+  return Vec512<double>{_mm512_mask_mov_pd(no.raw, mask.raw, yes.raw)};
+}
+
+namespace detail {
+
+template <typename T>
+HWY_API Vec512<T> IfThenElseZero(hwy::SizeTag<1> /* tag */,
+                                 const Mask512<T> mask, const Vec512<T> yes) {
+  return Vec512<T>{_mm512_maskz_mov_epi8(mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_API Vec512<T> IfThenElseZero(hwy::SizeTag<2> /* tag */,
+                                 const Mask512<T> mask, const Vec512<T> yes) {
+  return Vec512<T>{_mm512_maskz_mov_epi16(mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_API Vec512<T> IfThenElseZero(hwy::SizeTag<4> /* tag */,
+                                 const Mask512<T> mask, const Vec512<T> yes) {
+  return Vec512<T>{_mm512_maskz_mov_epi32(mask.raw, yes.raw)};
+}
+template <typename T>
+HWY_API Vec512<T> IfThenElseZero(hwy::SizeTag<8> /* tag */,
+                                 const Mask512<T> mask, const Vec512<T> yes) {
+  return Vec512<T>{_mm512_maskz_mov_epi64(mask.raw, yes.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec512<T> IfThenElseZero(const Mask512<T> mask, const Vec512<T> yes) {
+  return detail::IfThenElseZero(hwy::SizeTag<sizeof(T)>(), mask, yes);
+}
+template <>
+HWY_INLINE Vec512<float> IfThenElseZero(const Mask512<float> mask,
+                                        const Vec512<float> yes) {
+  return Vec512<float>{_mm512_maskz_mov_ps(mask.raw, yes.raw)};
+}
+template <>
+HWY_INLINE Vec512<double> IfThenElseZero(const Mask512<double> mask,
+                                         const Vec512<double> yes) {
+  return Vec512<double>{_mm512_maskz_mov_pd(mask.raw, yes.raw)};
+}
+
+namespace detail {
+
+template <typename T>
+HWY_API Vec512<T> IfThenZeroElse(hwy::SizeTag<1> /* tag */,
+                                 const Mask512<T> mask, const Vec512<T> no) {
+  // xor_epi8/16 are missing, but we have sub, which is just as fast for u8/16.
+  return Vec512<T>{_mm512_mask_sub_epi8(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T>
+HWY_API Vec512<T> IfThenZeroElse(hwy::SizeTag<2> /* tag */,
+                                 const Mask512<T> mask, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_sub_epi16(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T>
+HWY_API Vec512<T> IfThenZeroElse(hwy::SizeTag<4> /* tag */,
+                                 const Mask512<T> mask, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_xor_epi32(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <typename T>
+HWY_API Vec512<T> IfThenZeroElse(hwy::SizeTag<8> /* tag */,
+                                 const Mask512<T> mask, const Vec512<T> no) {
+  return Vec512<T>{_mm512_mask_xor_epi64(no.raw, mask.raw, no.raw, no.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec512<T> IfThenZeroElse(const Mask512<T> mask, const Vec512<T> no) {
+  return detail::IfThenZeroElse(hwy::SizeTag<sizeof(T)>(), mask, no);
+}
+template <>
+HWY_INLINE Vec512<float> IfThenZeroElse(const Mask512<float> mask,
+                                        const Vec512<float> no) {
+  return Vec512<float>{_mm512_mask_xor_ps(no.raw, mask.raw, no.raw, no.raw)};
+}
+template <>
+HWY_INLINE Vec512<double> IfThenZeroElse(const Mask512<double> mask,
+                                         const Vec512<double> no) {
+  return Vec512<double>{_mm512_mask_xor_pd(no.raw, mask.raw, no.raw, no.raw)};
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec512<T> ZeroIfNegative(const Vec512<T> v) {
+  // AVX3 MaskFromVec only looks at the MSB
+  return IfThenZeroElse(MaskFromVec(v), v);
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+HWY_API Vec512<uint8_t> operator+(const Vec512<uint8_t> a,
+                                  const Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_add_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> operator+(const Vec512<uint16_t> a,
+                                   const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_add_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> operator+(const Vec512<uint32_t> a,
+                                   const Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_add_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> operator+(const Vec512<uint64_t> a,
+                                   const Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_add_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> operator+(const Vec512<int8_t> a,
+                                 const Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_add_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> operator+(const Vec512<int16_t> a,
+                                  const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_add_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> operator+(const Vec512<int32_t> a,
+                                  const Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_add_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> operator+(const Vec512<int64_t> a,
+                                  const Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_add_epi64(a.raw, b.raw)};
+}
+
+// Float
+HWY_API Vec512<float> operator+(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_add_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> operator+(const Vec512<double> a,
+                                 const Vec512<double> b) {
+  return Vec512<double>{_mm512_add_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+HWY_API Vec512<uint8_t> operator-(const Vec512<uint8_t> a,
+                                  const Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_sub_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> operator-(const Vec512<uint16_t> a,
+                                   const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_sub_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> operator-(const Vec512<uint32_t> a,
+                                   const Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_sub_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> operator-(const Vec512<uint64_t> a,
+                                   const Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_sub_epi64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> operator-(const Vec512<int8_t> a,
+                                 const Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_sub_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> operator-(const Vec512<int16_t> a,
+                                  const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_sub_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> operator-(const Vec512<int32_t> a,
+                                  const Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_sub_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> operator-(const Vec512<int64_t> a,
+                                  const Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_sub_epi64(a.raw, b.raw)};
+}
+
+// Float
+HWY_API Vec512<float> operator-(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_sub_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> operator-(const Vec512<double> a,
+                                 const Vec512<double> b) {
+  return Vec512<double>{_mm512_sub_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Saturating addition
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec512<uint8_t> SaturatedAdd(const Vec512<uint8_t> a,
+                                     const Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_adds_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> SaturatedAdd(const Vec512<uint16_t> a,
+                                      const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_adds_epu16(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> SaturatedAdd(const Vec512<int8_t> a,
+                                    const Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_adds_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> SaturatedAdd(const Vec512<int16_t> a,
+                                     const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_adds_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+HWY_API Vec512<uint8_t> SaturatedSub(const Vec512<uint8_t> a,
+                                     const Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_subs_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> SaturatedSub(const Vec512<uint16_t> a,
+                                      const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_subs_epu16(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> SaturatedSub(const Vec512<int8_t> a,
+                                    const Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_subs_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> SaturatedSub(const Vec512<int16_t> a,
+                                     const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_subs_epi16(a.raw, b.raw)};
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+HWY_API Vec512<uint8_t> AverageRound(const Vec512<uint8_t> a,
+                                     const Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_avg_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> AverageRound(const Vec512<uint16_t> a,
+                                      const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_avg_epu16(a.raw, b.raw)};
+}
+
+// ------------------------------ Absolute value
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+HWY_API Vec512<int8_t> Abs(const Vec512<int8_t> v) {
+#if HWY_COMPILER_MSVC
+  // Workaround for incorrect codegen? (untested due to internal compiler error)
+  const auto zero = Zero(Full512<int8_t>());
+  return Vec512<int8_t>{_mm512_max_epi8(v.raw, (zero - v).raw)};
+#else
+  return Vec512<int8_t>{_mm512_abs_epi8(v.raw)};
+#endif
+}
+HWY_API Vec512<int16_t> Abs(const Vec512<int16_t> v) {
+  return Vec512<int16_t>{_mm512_abs_epi16(v.raw)};
+}
+HWY_API Vec512<int32_t> Abs(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_abs_epi32(v.raw)};
+}
+HWY_API Vec512<int64_t> Abs(const Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_abs_epi64(v.raw)};
+}
+
+// These aren't native instructions, they also involve AND with constant.
+HWY_API Vec512<float> Abs(const Vec512<float> v) {
+  return Vec512<float>{_mm512_abs_ps(v.raw)};
+}
+HWY_API Vec512<double> Abs(const Vec512<double> v) {
+  return Vec512<double>{_mm512_abs_pd(v.raw)};
+}
+
+// ------------------------------ ShiftLeft
+
+template <int kBits>
+HWY_API Vec512<uint16_t> ShiftLeft(const Vec512<uint16_t> v) {
+  return Vec512<uint16_t>{_mm512_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint32_t> ShiftLeft(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint64_t> ShiftLeft(const Vec512<uint64_t> v) {
+  return Vec512<uint64_t>{_mm512_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int16_t> ShiftLeft(const Vec512<int16_t> v) {
+  return Vec512<int16_t>{_mm512_slli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int32_t> ShiftLeft(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_slli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int64_t> ShiftLeft(const Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_slli_epi64(v.raw, kBits)};
+}
+
+template <int kBits, typename T, HWY_IF_LANE_SIZE(T, 1)>
+HWY_API Vec512<T> ShiftLeft(const Vec512<T> v) {
+  const Full512<T> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftLeft<kBits>(BitCast(d16, v)));
+  return kBits == 1
+             ? (v + v)
+             : (shifted & Set(d8, static_cast<T>((0xFF << kBits) & 0xFF)));
+}
+
+// ------------------------------ ShiftRight
+
+template <int kBits>
+HWY_API Vec512<uint16_t> ShiftRight(const Vec512<uint16_t> v) {
+  return Vec512<uint16_t>{_mm512_srli_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint32_t> ShiftRight(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_srli_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint64_t> ShiftRight(const Vec512<uint64_t> v) {
+  return Vec512<uint64_t>{_mm512_srli_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<uint8_t> ShiftRight(const Vec512<uint8_t> v) {
+  const Full512<uint8_t> d8;
+  // Use raw instead of BitCast to support N=1.
+  const Vec512<uint8_t> shifted{ShiftRight<kBits>(Vec512<uint16_t>{v.raw}).raw};
+  return shifted & Set(d8, 0xFF >> kBits);
+}
+
+template <int kBits>
+HWY_API Vec512<int16_t> ShiftRight(const Vec512<int16_t> v) {
+  return Vec512<int16_t>{_mm512_srai_epi16(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int32_t> ShiftRight(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_srai_epi32(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int64_t> ShiftRight(const Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_srai_epi64(v.raw, kBits)};
+}
+
+template <int kBits>
+HWY_API Vec512<int8_t> ShiftRight(const Vec512<int8_t> v) {
+  const Full512<int8_t> di;
+  const Full512<uint8_t> du;
+  const auto shifted = BitCast(di, ShiftRight<kBits>(BitCast(du, v)));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> kBits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ ShiftLeftSame
+
+HWY_API Vec512<uint16_t> ShiftLeftSame(const Vec512<uint16_t> v,
+                                       const int bits) {
+  return Vec512<uint16_t>{_mm512_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<uint32_t> ShiftLeftSame(const Vec512<uint32_t> v,
+                                       const int bits) {
+  return Vec512<uint32_t>{_mm512_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<uint64_t> ShiftLeftSame(const Vec512<uint64_t> v,
+                                       const int bits) {
+  return Vec512<uint64_t>{_mm512_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int16_t> ShiftLeftSame(const Vec512<int16_t> v, const int bits) {
+  return Vec512<int16_t>{_mm512_sll_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int32_t> ShiftLeftSame(const Vec512<int32_t> v, const int bits) {
+  return Vec512<int32_t>{_mm512_sll_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int64_t> ShiftLeftSame(const Vec512<int64_t> v, const int bits) {
+  return Vec512<int64_t>{_mm512_sll_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+template <typename T, HWY_IF_LANE_SIZE(T, 1)>
+HWY_API Vec512<T> ShiftLeftSame(const Vec512<T> v, const int bits) {
+  const Full512<T> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftLeftSame(BitCast(d16, v), bits));
+  return shifted & Set(d8, (0xFF << bits) & 0xFF);
+}
+
+// ------------------------------ ShiftRightSame
+
+HWY_API Vec512<uint16_t> ShiftRightSame(const Vec512<uint16_t> v,
+                                        const int bits) {
+  return Vec512<uint16_t>{_mm512_srl_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<uint32_t> ShiftRightSame(const Vec512<uint32_t> v,
+                                        const int bits) {
+  return Vec512<uint32_t>{_mm512_srl_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<uint64_t> ShiftRightSame(const Vec512<uint64_t> v,
+                                        const int bits) {
+  return Vec512<uint64_t>{_mm512_srl_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<uint8_t> ShiftRightSame(Vec512<uint8_t> v, const int bits) {
+  const Full512<uint8_t> d8;
+  const RepartitionToWide<decltype(d8)> d16;
+  const auto shifted = BitCast(d8, ShiftRightSame(BitCast(d16, v), bits));
+  return shifted & Set(d8, 0xFF >> bits);
+}
+
+HWY_API Vec512<int16_t> ShiftRightSame(const Vec512<int16_t> v,
+                                       const int bits) {
+  return Vec512<int16_t>{_mm512_sra_epi16(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int32_t> ShiftRightSame(const Vec512<int32_t> v,
+                                       const int bits) {
+  return Vec512<int32_t>{_mm512_sra_epi32(v.raw, _mm_cvtsi32_si128(bits))};
+}
+HWY_API Vec512<int64_t> ShiftRightSame(const Vec512<int64_t> v,
+                                       const int bits) {
+  return Vec512<int64_t>{_mm512_sra_epi64(v.raw, _mm_cvtsi32_si128(bits))};
+}
+
+HWY_API Vec512<int8_t> ShiftRightSame(Vec512<int8_t> v, const int bits) {
+  const Full512<int8_t> di;
+  const Full512<uint8_t> du;
+  const auto shifted = BitCast(di, ShiftRightSame(BitCast(du, v), bits));
+  const auto shifted_sign = BitCast(di, Set(du, 0x80 >> bits));
+  return (shifted ^ shifted_sign) - shifted_sign;
+}
+
+// ------------------------------ Shl
+
+HWY_API Vec512<uint16_t> operator<<(const Vec512<uint16_t> v,
+                                    const Vec512<uint16_t> bits) {
+  return Vec512<uint16_t>{_mm512_sllv_epi16(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<uint32_t> operator<<(const Vec512<uint32_t> v,
+                                    const Vec512<uint32_t> bits) {
+  return Vec512<uint32_t>{_mm512_sllv_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<uint64_t> operator<<(const Vec512<uint64_t> v,
+                                    const Vec512<uint64_t> bits) {
+  return Vec512<uint64_t>{_mm512_sllv_epi64(v.raw, bits.raw)};
+}
+
+// Signed left shift is the same as unsigned.
+template <typename T, HWY_IF_SIGNED(T)>
+HWY_API Vec512<T> operator<<(const Vec512<T> v, const Vec512<T> bits) {
+  const Full512<T> di;
+  const Full512<MakeUnsigned<T>> du;
+  return BitCast(di, BitCast(du, v) << BitCast(du, bits));
+}
+
+// ------------------------------ Shr
+
+HWY_API Vec512<uint16_t> operator>>(const Vec512<uint16_t> v,
+                                    const Vec512<uint16_t> bits) {
+  return Vec512<uint16_t>{_mm512_srlv_epi16(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<uint32_t> operator>>(const Vec512<uint32_t> v,
+                                    const Vec512<uint32_t> bits) {
+  return Vec512<uint32_t>{_mm512_srlv_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<uint64_t> operator>>(const Vec512<uint64_t> v,
+                                    const Vec512<uint64_t> bits) {
+  return Vec512<uint64_t>{_mm512_srlv_epi64(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<int16_t> operator>>(const Vec512<int16_t> v,
+                                   const Vec512<int16_t> bits) {
+  return Vec512<int16_t>{_mm512_srav_epi16(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<int32_t> operator>>(const Vec512<int32_t> v,
+                                   const Vec512<int32_t> bits) {
+  return Vec512<int32_t>{_mm512_srav_epi32(v.raw, bits.raw)};
+}
+
+HWY_API Vec512<int64_t> operator>>(const Vec512<int64_t> v,
+                                   const Vec512<int64_t> bits) {
+  return Vec512<int64_t>{_mm512_srav_epi64(v.raw, bits.raw)};
+}
+
+// ------------------------------ Minimum
+
+// Unsigned
+HWY_API Vec512<uint8_t> Min(const Vec512<uint8_t> a, const Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_min_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> Min(const Vec512<uint16_t> a,
+                             const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_min_epu16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> Min(const Vec512<uint32_t> a,
+                             const Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_min_epu32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> Min(const Vec512<uint64_t> a,
+                             const Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_min_epu64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> Min(const Vec512<int8_t> a, const Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_min_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> Min(const Vec512<int16_t> a, const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_min_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> Min(const Vec512<int32_t> a, const Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_min_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> Min(const Vec512<int64_t> a, const Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_min_epi64(a.raw, b.raw)};
+}
+
+// Float
+HWY_API Vec512<float> Min(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_min_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> Min(const Vec512<double> a, const Vec512<double> b) {
+  return Vec512<double>{_mm512_min_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Maximum
+
+// Unsigned
+HWY_API Vec512<uint8_t> Max(const Vec512<uint8_t> a, const Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_max_epu8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> Max(const Vec512<uint16_t> a,
+                             const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_max_epu16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> Max(const Vec512<uint32_t> a,
+                             const Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_max_epu32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> Max(const Vec512<uint64_t> a,
+                             const Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_max_epu64(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int8_t> Max(const Vec512<int8_t> a, const Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_max_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> Max(const Vec512<int16_t> a, const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_max_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> Max(const Vec512<int32_t> a, const Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_max_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> Max(const Vec512<int64_t> a, const Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_max_epi64(a.raw, b.raw)};
+}
+
+// Float
+HWY_API Vec512<float> Max(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_max_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> Max(const Vec512<double> a, const Vec512<double> b) {
+  return Vec512<double>{_mm512_max_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+HWY_API Vec512<uint16_t> operator*(const Vec512<uint16_t> a,
+                                   const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_mullo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> operator*(const Vec512<uint32_t> a,
+                                   const Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_mullo_epi32(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Vec512<int16_t> operator*(const Vec512<int16_t> a,
+                                  const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_mullo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> operator*(const Vec512<int32_t> a,
+                                  const Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_mullo_epi32(a.raw, b.raw)};
+}
+
+// Returns the upper 16 bits of a * b in each lane.
+HWY_API Vec512<uint16_t> MulHigh(const Vec512<uint16_t> a,
+                                 const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_mulhi_epu16(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> MulHigh(const Vec512<int16_t> a,
+                                const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_mulhi_epi16(a.raw, b.raw)};
+}
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+HWY_API Vec512<int64_t> MulEven(const Vec512<int32_t> a,
+                                const Vec512<int32_t> b) {
+  return Vec512<int64_t>{_mm512_mul_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> MulEven(const Vec512<uint32_t> a,
+                                 const Vec512<uint32_t> b) {
+  return Vec512<uint64_t>{_mm512_mul_epu32(a.raw, b.raw)};
+}
+
+// ------------------------------ Negate
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_API Vec512<T> Neg(const Vec512<T> v) {
+  return Xor(v, SignBit(Full512<T>()));
+}
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_API Vec512<T> Neg(const Vec512<T> v) {
+  return Zero(Full512<T>()) - v;
+}
+
+// ------------------------------ Floating-point mul / div
+
+HWY_API Vec512<float> operator*(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_mul_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> operator*(const Vec512<double> a,
+                                 const Vec512<double> b) {
+  return Vec512<double>{_mm512_mul_pd(a.raw, b.raw)};
+}
+
+HWY_API Vec512<float> operator/(const Vec512<float> a, const Vec512<float> b) {
+  return Vec512<float>{_mm512_div_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> operator/(const Vec512<double> a,
+                                 const Vec512<double> b) {
+  return Vec512<double>{_mm512_div_pd(a.raw, b.raw)};
+}
+
+// Approximate reciprocal
+HWY_API Vec512<float> ApproximateReciprocal(const Vec512<float> v) {
+  return Vec512<float>{_mm512_rcp14_ps(v.raw)};
+}
+
+// Absolute value of difference.
+HWY_API Vec512<float> AbsDiff(const Vec512<float> a, const Vec512<float> b) {
+  return Abs(a - b);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+// Returns mul * x + add
+HWY_API Vec512<float> MulAdd(const Vec512<float> mul, const Vec512<float> x,
+                             const Vec512<float> add) {
+  return Vec512<float>{_mm512_fmadd_ps(mul.raw, x.raw, add.raw)};
+}
+HWY_API Vec512<double> MulAdd(const Vec512<double> mul, const Vec512<double> x,
+                              const Vec512<double> add) {
+  return Vec512<double>{_mm512_fmadd_pd(mul.raw, x.raw, add.raw)};
+}
+
+// Returns add - mul * x
+HWY_API Vec512<float> NegMulAdd(const Vec512<float> mul, const Vec512<float> x,
+                                const Vec512<float> add) {
+  return Vec512<float>{_mm512_fnmadd_ps(mul.raw, x.raw, add.raw)};
+}
+HWY_API Vec512<double> NegMulAdd(const Vec512<double> mul,
+                                 const Vec512<double> x,
+                                 const Vec512<double> add) {
+  return Vec512<double>{_mm512_fnmadd_pd(mul.raw, x.raw, add.raw)};
+}
+
+// Returns mul * x - sub
+HWY_API Vec512<float> MulSub(const Vec512<float> mul, const Vec512<float> x,
+                             const Vec512<float> sub) {
+  return Vec512<float>{_mm512_fmsub_ps(mul.raw, x.raw, sub.raw)};
+}
+HWY_API Vec512<double> MulSub(const Vec512<double> mul, const Vec512<double> x,
+                              const Vec512<double> sub) {
+  return Vec512<double>{_mm512_fmsub_pd(mul.raw, x.raw, sub.raw)};
+}
+
+// Returns -mul * x - sub
+HWY_API Vec512<float> NegMulSub(const Vec512<float> mul, const Vec512<float> x,
+                                const Vec512<float> sub) {
+  return Vec512<float>{_mm512_fnmsub_ps(mul.raw, x.raw, sub.raw)};
+}
+HWY_API Vec512<double> NegMulSub(const Vec512<double> mul,
+                                 const Vec512<double> x,
+                                 const Vec512<double> sub) {
+  return Vec512<double>{_mm512_fnmsub_pd(mul.raw, x.raw, sub.raw)};
+}
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+HWY_API Vec512<float> Sqrt(const Vec512<float> v) {
+  return Vec512<float>{_mm512_sqrt_ps(v.raw)};
+}
+HWY_API Vec512<double> Sqrt(const Vec512<double> v) {
+  return Vec512<double>{_mm512_sqrt_pd(v.raw)};
+}
+
+// Approximate reciprocal square root
+HWY_API Vec512<float> ApproximateReciprocalSqrt(const Vec512<float> v) {
+  return Vec512<float>{_mm512_rsqrt14_ps(v.raw)};
+}
+
+// ------------------------------ Floating-point rounding
+
+// Work around warnings in the intrinsic definitions (passing -1 as a mask).
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+
+// Toward nearest integer, tie to even
+HWY_API Vec512<float> Round(const Vec512<float> v) {
+  return Vec512<float>{_mm512_roundscale_ps(
+      v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec512<double> Round(const Vec512<double> v) {
+  return Vec512<double>{_mm512_roundscale_pd(
+      v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)};
+}
+
+// Toward zero, aka truncate
+HWY_API Vec512<float> Trunc(const Vec512<float> v) {
+  return Vec512<float>{
+      _mm512_roundscale_ps(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec512<double> Trunc(const Vec512<double> v) {
+  return Vec512<double>{
+      _mm512_roundscale_pd(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)};
+}
+
+// Toward +infinity, aka ceiling
+HWY_API Vec512<float> Ceil(const Vec512<float> v) {
+  return Vec512<float>{
+      _mm512_roundscale_ps(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec512<double> Ceil(const Vec512<double> v) {
+  return Vec512<double>{
+      _mm512_roundscale_pd(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)};
+}
+
+// Toward -infinity, aka floor
+HWY_API Vec512<float> Floor(const Vec512<float> v) {
+  return Vec512<float>{
+      _mm512_roundscale_ps(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+HWY_API Vec512<double> Floor(const Vec512<double> v) {
+  return Vec512<double>{
+      _mm512_roundscale_pd(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== COMPARE
+
+// Comparisons set a mask bit to 1 if the condition is true, else 0.
+
+template <typename TFrom, typename TTo>
+HWY_API Mask512<TTo> RebindMask(Full512<TTo> /*tag*/, Mask512<TFrom> m) {
+  static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size");
+  return Mask512<TTo>{m.raw};
+}
+
+namespace detail {
+
+template <typename T>
+HWY_API Mask512<T> TestBit(hwy::SizeTag<1> /*tag*/, const Vec512<T> v,
+                           const Vec512<T> bit) {
+  return Mask512<T>{_mm512_test_epi8_mask(v.raw, bit.raw)};
+}
+template <typename T>
+HWY_API Mask512<T> TestBit(hwy::SizeTag<2> /*tag*/, const Vec512<T> v,
+                           const Vec512<T> bit) {
+  return Mask512<T>{_mm512_test_epi16_mask(v.raw, bit.raw)};
+}
+template <typename T>
+HWY_API Mask512<T> TestBit(hwy::SizeTag<4> /*tag*/, const Vec512<T> v,
+                           const Vec512<T> bit) {
+  return Mask512<T>{_mm512_test_epi32_mask(v.raw, bit.raw)};
+}
+template <typename T>
+HWY_API Mask512<T> TestBit(hwy::SizeTag<8> /*tag*/, const Vec512<T> v,
+                           const Vec512<T> bit) {
+  return Mask512<T>{_mm512_test_epi64_mask(v.raw, bit.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Mask512<T> TestBit(const Vec512<T> v, const Vec512<T> bit) {
+  static_assert(!hwy::IsFloat<T>(), "Only integer vectors supported");
+  return detail::TestBit(hwy::SizeTag<sizeof(T)>(), v, bit);
+}
+
+// ------------------------------ Equality
+
+// Unsigned
+HWY_API Mask512<uint8_t> operator==(const Vec512<uint8_t> a,
+                                    const Vec512<uint8_t> b) {
+  return Mask512<uint8_t>{_mm512_cmpeq_epi8_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<uint16_t> operator==(const Vec512<uint16_t> a,
+                                     const Vec512<uint16_t> b) {
+  return Mask512<uint16_t>{_mm512_cmpeq_epi16_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<uint32_t> operator==(const Vec512<uint32_t> a,
+                                     const Vec512<uint32_t> b) {
+  return Mask512<uint32_t>{_mm512_cmpeq_epi32_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<uint64_t> operator==(const Vec512<uint64_t> a,
+                                     const Vec512<uint64_t> b) {
+  return Mask512<uint64_t>{_mm512_cmpeq_epi64_mask(a.raw, b.raw)};
+}
+
+// Signed
+HWY_API Mask512<int8_t> operator==(const Vec512<int8_t> a,
+                                   const Vec512<int8_t> b) {
+  return Mask512<int8_t>{_mm512_cmpeq_epi8_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int16_t> operator==(const Vec512<int16_t> a,
+                                    const Vec512<int16_t> b) {
+  return Mask512<int16_t>{_mm512_cmpeq_epi16_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int32_t> operator==(const Vec512<int32_t> a,
+                                    const Vec512<int32_t> b) {
+  return Mask512<int32_t>{_mm512_cmpeq_epi32_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int64_t> operator==(const Vec512<int64_t> a,
+                                    const Vec512<int64_t> b) {
+  return Mask512<int64_t>{_mm512_cmpeq_epi64_mask(a.raw, b.raw)};
+}
+
+// Float
+HWY_API Mask512<float> operator==(const Vec512<float> a,
+                                  const Vec512<float> b) {
+  return Mask512<float>{_mm512_cmp_ps_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+HWY_API Mask512<double> operator==(const Vec512<double> a,
+                                   const Vec512<double> b) {
+  return Mask512<double>{_mm512_cmp_pd_mask(a.raw, b.raw, _CMP_EQ_OQ)};
+}
+
+// ------------------------------ Strict inequality
+
+// Signed/float <
+HWY_API Mask512<int8_t> operator<(const Vec512<int8_t> a,
+                                  const Vec512<int8_t> b) {
+  return Mask512<int8_t>{_mm512_cmpgt_epi8_mask(b.raw, a.raw)};
+}
+HWY_API Mask512<int16_t> operator<(const Vec512<int16_t> a,
+                                   const Vec512<int16_t> b) {
+  return Mask512<int16_t>{_mm512_cmpgt_epi16_mask(b.raw, a.raw)};
+}
+HWY_API Mask512<int32_t> operator<(const Vec512<int32_t> a,
+                                   const Vec512<int32_t> b) {
+  return Mask512<int32_t>{_mm512_cmpgt_epi32_mask(b.raw, a.raw)};
+}
+HWY_API Mask512<int64_t> operator<(const Vec512<int64_t> a,
+                                   const Vec512<int64_t> b) {
+  return Mask512<int64_t>{_mm512_cmpgt_epi64_mask(b.raw, a.raw)};
+}
+HWY_API Mask512<float> operator<(const Vec512<float> a, const Vec512<float> b) {
+  return Mask512<float>{_mm512_cmp_ps_mask(a.raw, b.raw, _CMP_LT_OQ)};
+}
+HWY_API Mask512<double> operator<(const Vec512<double> a,
+                                  const Vec512<double> b) {
+  return Mask512<double>{_mm512_cmp_pd_mask(a.raw, b.raw, _CMP_LT_OQ)};
+}
+
+// Signed/float >
+HWY_API Mask512<int8_t> operator>(const Vec512<int8_t> a,
+                                  const Vec512<int8_t> b) {
+  return Mask512<int8_t>{_mm512_cmpgt_epi8_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int16_t> operator>(const Vec512<int16_t> a,
+                                   const Vec512<int16_t> b) {
+  return Mask512<int16_t>{_mm512_cmpgt_epi16_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int32_t> operator>(const Vec512<int32_t> a,
+                                   const Vec512<int32_t> b) {
+  return Mask512<int32_t>{_mm512_cmpgt_epi32_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<int64_t> operator>(const Vec512<int64_t> a,
+                                   const Vec512<int64_t> b) {
+  return Mask512<int64_t>{_mm512_cmpgt_epi64_mask(a.raw, b.raw)};
+}
+HWY_API Mask512<float> operator>(const Vec512<float> a, const Vec512<float> b) {
+  return Mask512<float>{_mm512_cmp_ps_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+HWY_API Mask512<double> operator>(const Vec512<double> a,
+                                  const Vec512<double> b) {
+  return Mask512<double>{_mm512_cmp_pd_mask(a.raw, b.raw, _CMP_GT_OQ)};
+}
+
+// ------------------------------ Weak inequality
+
+// Float <= >=
+HWY_API Mask512<float> operator<=(const Vec512<float> a,
+                                  const Vec512<float> b) {
+  return Mask512<float>{_mm512_cmp_ps_mask(a.raw, b.raw, _CMP_LE_OQ)};
+}
+HWY_API Mask512<double> operator<=(const Vec512<double> a,
+                                   const Vec512<double> b) {
+  return Mask512<double>{_mm512_cmp_pd_mask(a.raw, b.raw, _CMP_LE_OQ)};
+}
+HWY_API Mask512<float> operator>=(const Vec512<float> a,
+                                  const Vec512<float> b) {
+  return Mask512<float>{_mm512_cmp_ps_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+HWY_API Mask512<double> operator>=(const Vec512<double> a,
+                                   const Vec512<double> b) {
+  return Mask512<double>{_mm512_cmp_pd_mask(a.raw, b.raw, _CMP_GE_OQ)};
+}
+
+// ------------------------------ Mask
+
+namespace detail {
+
+template <typename T>
+HWY_API Mask512<T> MaskFromVec(hwy::SizeTag<1> /*tag*/, const Vec512<T> v) {
+  return Mask512<T>{_mm512_movepi8_mask(v.raw)};
+}
+template <typename T>
+HWY_API Mask512<T> MaskFromVec(hwy::SizeTag<2> /*tag*/, const Vec512<T> v) {
+  return Mask512<T>{_mm512_movepi16_mask(v.raw)};
+}
+template <typename T>
+HWY_API Mask512<T> MaskFromVec(hwy::SizeTag<4> /*tag*/, const Vec512<T> v) {
+  return Mask512<T>{_mm512_movepi32_mask(v.raw)};
+}
+template <typename T>
+HWY_API Mask512<T> MaskFromVec(hwy::SizeTag<8> /*tag*/, const Vec512<T> v) {
+  return Mask512<T>{_mm512_movepi64_mask(v.raw)};
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Mask512<T> MaskFromVec(const Vec512<T> v) {
+  return detail::MaskFromVec(hwy::SizeTag<sizeof(T)>(), v);
+}
+// There do not seem to be native floating-point versions of these instructions.
+HWY_API Mask512<float> MaskFromVec(const Vec512<float> v) {
+  return Mask512<float>{MaskFromVec(BitCast(Full512<int32_t>(), v)).raw};
+}
+HWY_API Mask512<double> MaskFromVec(const Vec512<double> v) {
+  return Mask512<double>{MaskFromVec(BitCast(Full512<int64_t>(), v)).raw};
+}
+
+HWY_API Vec512<uint8_t> VecFromMask(const Mask512<uint8_t> v) {
+  return Vec512<uint8_t>{_mm512_movm_epi8(v.raw)};
+}
+HWY_API Vec512<int8_t> VecFromMask(const Mask512<int8_t> v) {
+  return Vec512<int8_t>{_mm512_movm_epi8(v.raw)};
+}
+
+HWY_API Vec512<uint16_t> VecFromMask(const Mask512<uint16_t> v) {
+  return Vec512<uint16_t>{_mm512_movm_epi16(v.raw)};
+}
+HWY_API Vec512<int16_t> VecFromMask(const Mask512<int16_t> v) {
+  return Vec512<int16_t>{_mm512_movm_epi16(v.raw)};
+}
+
+HWY_API Vec512<uint32_t> VecFromMask(const Mask512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_movm_epi32(v.raw)};
+}
+HWY_API Vec512<int32_t> VecFromMask(const Mask512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_movm_epi32(v.raw)};
+}
+HWY_API Vec512<float> VecFromMask(const Mask512<float> v) {
+  return Vec512<float>{_mm512_castsi512_ps(_mm512_movm_epi32(v.raw))};
+}
+
+HWY_API Vec512<uint64_t> VecFromMask(const Mask512<uint64_t> v) {
+  return Vec512<uint64_t>{_mm512_movm_epi64(v.raw)};
+}
+HWY_API Vec512<int64_t> VecFromMask(const Mask512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_movm_epi64(v.raw)};
+}
+HWY_API Vec512<double> VecFromMask(const Mask512<double> v) {
+  return Vec512<double>{_mm512_castsi512_pd(_mm512_movm_epi64(v.raw))};
+}
+
+template <typename T>
+HWY_API Vec512<T> VecFromMask(Full512<T> /* tag */, const Mask512<T> v) {
+  return VecFromMask(v);
+}
+
+// ------------------------------ Mask logical
+
+// For Clang and GCC, mask intrinsics (KORTEST) weren't added until recently.
+#if !defined(HWY_COMPILER_HAS_MASK_INTRINSICS) &&         \
+    (HWY_COMPILER_MSVC != 0 || HWY_COMPILER_GCC >= 700 || \
+     HWY_COMPILER_CLANG >= 800)
+#define HWY_COMPILER_HAS_MASK_INTRINSICS 1
+#else
+#define HWY_COMPILER_HAS_MASK_INTRINSICS 0
+#endif
+
+namespace detail {
+
+template <typename T>
+HWY_API Mask512<T> Not(hwy::SizeTag<1> /*tag*/, const Mask512<T> m) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_knot_mask64(m.raw)};
+#else
+  return Mask512<T>{~m.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> Not(hwy::SizeTag<2> /*tag*/, const Mask512<T> m) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_knot_mask32(m.raw)};
+#else
+  return Mask512<T>{~m.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> Not(hwy::SizeTag<4> /*tag*/, const Mask512<T> m) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_knot_mask16(m.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(~m.raw & 0xFFFF)};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> Not(hwy::SizeTag<8> /*tag*/, const Mask512<T> m) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_knot_mask8(m.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(~m.raw & 0xFF)};
+#endif
+}
+
+template <typename T>
+HWY_API Mask512<T> And(hwy::SizeTag<1> /*tag*/, const Mask512<T> a,
+                       const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kand_mask64(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw & b.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> And(hwy::SizeTag<2> /*tag*/, const Mask512<T> a,
+                       const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kand_mask32(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw & b.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> And(hwy::SizeTag<4> /*tag*/, const Mask512<T> a,
+                       const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kand_mask16(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> And(hwy::SizeTag<8> /*tag*/, const Mask512<T> a,
+                       const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kand_mask8(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(a.raw & b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_API Mask512<T> AndNot(hwy::SizeTag<1> /*tag*/, const Mask512<T> a,
+                          const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kandn_mask64(a.raw, b.raw)};
+#else
+  return Mask512<T>{~a.raw & b.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> AndNot(hwy::SizeTag<2> /*tag*/, const Mask512<T> a,
+                          const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kandn_mask32(a.raw, b.raw)};
+#else
+  return Mask512<T>{~a.raw & b.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> AndNot(hwy::SizeTag<4> /*tag*/, const Mask512<T> a,
+                          const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kandn_mask16(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(~a.raw & b.raw)};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> AndNot(hwy::SizeTag<8> /*tag*/, const Mask512<T> a,
+                          const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kandn_mask8(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(~a.raw & b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_API Mask512<T> Or(hwy::SizeTag<1> /*tag*/, const Mask512<T> a,
+                      const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kor_mask64(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw | b.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> Or(hwy::SizeTag<2> /*tag*/, const Mask512<T> a,
+                      const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kor_mask32(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw | b.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> Or(hwy::SizeTag<4> /*tag*/, const Mask512<T> a,
+                      const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kor_mask16(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(a.raw | b.raw)};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> Or(hwy::SizeTag<8> /*tag*/, const Mask512<T> a,
+                      const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kor_mask8(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(a.raw | b.raw)};
+#endif
+}
+
+template <typename T>
+HWY_API Mask512<T> Xor(hwy::SizeTag<1> /*tag*/, const Mask512<T> a,
+                       const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxor_mask64(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw ^ b.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> Xor(hwy::SizeTag<2> /*tag*/, const Mask512<T> a,
+                       const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxor_mask32(a.raw, b.raw)};
+#else
+  return Mask512<T>{a.raw ^ b.raw};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> Xor(hwy::SizeTag<4> /*tag*/, const Mask512<T> a,
+                       const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxor_mask16(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint16_t>(a.raw ^ b.raw)};
+#endif
+}
+template <typename T>
+HWY_API Mask512<T> Xor(hwy::SizeTag<8> /*tag*/, const Mask512<T> a,
+                       const Mask512<T> b) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return Mask512<T>{_kxor_mask8(a.raw, b.raw)};
+#else
+  return Mask512<T>{static_cast<uint8_t>(a.raw ^ b.raw)};
+#endif
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Mask512<T> Not(const Mask512<T> m) {
+  return detail::Not(hwy::SizeTag<sizeof(T)>(), m);
+}
+
+template <typename T>
+HWY_API Mask512<T> And(const Mask512<T> a, Mask512<T> b) {
+  return detail::And(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask512<T> AndNot(const Mask512<T> a, Mask512<T> b) {
+  return detail::AndNot(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask512<T> Or(const Mask512<T> a, Mask512<T> b) {
+  return detail::Or(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+template <typename T>
+HWY_API Mask512<T> Xor(const Mask512<T> a, Mask512<T> b) {
+  return detail::Xor(hwy::SizeTag<sizeof(T)>(), a, b);
+}
+
+// ------------------------------ BroadcastSignBit (ShiftRight, compare, mask)
+
+HWY_API Vec512<int8_t> BroadcastSignBit(const Vec512<int8_t> v) {
+  return VecFromMask(v < Zero(Full512<int8_t>()));
+}
+
+HWY_API Vec512<int16_t> BroadcastSignBit(const Vec512<int16_t> v) {
+  return ShiftRight<15>(v);
+}
+
+HWY_API Vec512<int32_t> BroadcastSignBit(const Vec512<int32_t> v) {
+  return ShiftRight<31>(v);
+}
+
+HWY_API Vec512<int64_t> BroadcastSignBit(const Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_srai_epi64(v.raw, 63)};
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <typename T>
+HWY_API Vec512<T> Load(Full512<T> /* tag */, const T* HWY_RESTRICT aligned) {
+  return Vec512<T>{
+      _mm512_load_si512(reinterpret_cast<const __m512i*>(aligned))};
+}
+HWY_API Vec512<float> Load(Full512<float> /* tag */,
+                           const float* HWY_RESTRICT aligned) {
+  return Vec512<float>{_mm512_load_ps(aligned)};
+}
+HWY_API Vec512<double> Load(Full512<double> /* tag */,
+                            const double* HWY_RESTRICT aligned) {
+  return Vec512<double>{_mm512_load_pd(aligned)};
+}
+
+template <typename T>
+HWY_API Vec512<T> LoadU(Full512<T> /* tag */, const T* HWY_RESTRICT p) {
+  return Vec512<T>{_mm512_loadu_si512(reinterpret_cast<const __m512i*>(p))};
+}
+HWY_API Vec512<float> LoadU(Full512<float> /* tag */,
+                            const float* HWY_RESTRICT p) {
+  return Vec512<float>{_mm512_loadu_ps(p)};
+}
+HWY_API Vec512<double> LoadU(Full512<double> /* tag */,
+                             const double* HWY_RESTRICT p) {
+  return Vec512<double>{_mm512_loadu_pd(p)};
+}
+
+// Loads 128 bit and duplicates into both 128-bit halves. This avoids the
+// 3-cycle cost of moving data between 128-bit halves and avoids port 5.
+template <typename T>
+HWY_API Vec512<T> LoadDup128(Full512<T> /* tag */,
+                             const T* const HWY_RESTRICT p) {
+  // Clang 3.9 generates VINSERTF128 which is slower, but inline assembly leads
+  // to "invalid output size for constraint" without -mavx512:
+  // https://gcc.godbolt.org/z/-Jt_-F
+#if HWY_LOADDUP_ASM
+  __m512i out;
+  asm("vbroadcasti128 %1, %[reg]" : [ reg ] "=x"(out) : "m"(p[0]));
+  return Vec512<T>{out};
+#else
+  const auto x4 = LoadU(Full128<T>(), p);
+  return Vec512<T>{_mm512_broadcast_i32x4(x4.raw)};
+#endif
+}
+HWY_API Vec512<float> LoadDup128(Full512<float> /* tag */,
+                                 const float* const HWY_RESTRICT p) {
+#if HWY_LOADDUP_ASM
+  __m512 out;
+  asm("vbroadcastf128 %1, %[reg]" : [ reg ] "=x"(out) : "m"(p[0]));
+  return Vec512<float>{out};
+#else
+  const __m128 x4 = _mm_loadu_ps(p);
+  return Vec512<float>{_mm512_broadcast_f32x4(x4)};
+#endif
+}
+
+HWY_API Vec512<double> LoadDup128(Full512<double> /* tag */,
+                                  const double* const HWY_RESTRICT p) {
+#if HWY_LOADDUP_ASM
+  __m512d out;
+  asm("vbroadcastf128 %1, %[reg]" : [ reg ] "=x"(out) : "m"(p[0]));
+  return Vec512<double>{out};
+#else
+  const __m128d x2 = _mm_loadu_pd(p);
+  return Vec512<double>{_mm512_broadcast_f64x2(x2)};
+#endif
+}
+
+// ------------------------------ Store
+
+template <typename T>
+HWY_API void Store(const Vec512<T> v, Full512<T> /* tag */,
+                   T* HWY_RESTRICT aligned) {
+  _mm512_store_si512(reinterpret_cast<__m512i*>(aligned), v.raw);
+}
+HWY_API void Store(const Vec512<float> v, Full512<float> /* tag */,
+                   float* HWY_RESTRICT aligned) {
+  _mm512_store_ps(aligned, v.raw);
+}
+HWY_API void Store(const Vec512<double> v, Full512<double> /* tag */,
+                   double* HWY_RESTRICT aligned) {
+  _mm512_store_pd(aligned, v.raw);
+}
+
+template <typename T>
+HWY_API void StoreU(const Vec512<T> v, Full512<T> /* tag */,
+                    T* HWY_RESTRICT p) {
+  _mm512_storeu_si512(reinterpret_cast<__m512i*>(p), v.raw);
+}
+HWY_API void StoreU(const Vec512<float> v, Full512<float> /* tag */,
+                    float* HWY_RESTRICT p) {
+  _mm512_storeu_ps(p, v.raw);
+}
+HWY_API void StoreU(const Vec512<double> v, Full512<double>,
+                    double* HWY_RESTRICT p) {
+  _mm512_storeu_pd(p, v.raw);
+}
+
+// ------------------------------ Non-temporal stores
+
+template <typename T>
+HWY_API void Stream(const Vec512<T> v, Full512<T> /* tag */,
+                    T* HWY_RESTRICT aligned) {
+  _mm512_stream_si512(reinterpret_cast<__m512i*>(aligned), v.raw);
+}
+HWY_API void Stream(const Vec512<float> v, Full512<float> /* tag */,
+                    float* HWY_RESTRICT aligned) {
+  _mm512_stream_ps(aligned, v.raw);
+}
+HWY_API void Stream(const Vec512<double> v, Full512<double>,
+                    double* HWY_RESTRICT aligned) {
+  _mm512_stream_pd(aligned, v.raw);
+}
+
+// ------------------------------ Scatter
+
+// Work around warnings in the intrinsic definitions (passing -1 as a mask).
+HWY_DIAGNOSTICS(push)
+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+
+namespace detail {
+
+template <typename T>
+HWY_API void ScatterOffset(hwy::SizeTag<4> /* tag */, Vec512<T> v,
+                           Full512<T> /* tag */, T* HWY_RESTRICT base,
+                           const Vec512<int32_t> offset) {
+  _mm512_i32scatter_epi32(base, offset.raw, v.raw, 1);
+}
+template <typename T>
+HWY_API void ScatterIndex(hwy::SizeTag<4> /* tag */, Vec512<T> v,
+                          Full512<T> /* tag */, T* HWY_RESTRICT base,
+                          const Vec512<int32_t> index) {
+  _mm512_i32scatter_epi32(base, index.raw, v.raw, 4);
+}
+
+template <typename T>
+HWY_API void ScatterOffset(hwy::SizeTag<8> /* tag */, Vec512<T> v,
+                           Full512<T> /* tag */, T* HWY_RESTRICT base,
+                           const Vec512<int64_t> offset) {
+  _mm512_i64scatter_epi64(base, offset.raw, v.raw, 1);
+}
+template <typename T>
+HWY_API void ScatterIndex(hwy::SizeTag<8> /* tag */, Vec512<T> v,
+                          Full512<T> /* tag */, T* HWY_RESTRICT base,
+                          const Vec512<int64_t> index) {
+  _mm512_i64scatter_epi64(base, index.raw, v.raw, 8);
+}
+
+}  // namespace detail
+
+template <typename T, typename Offset>
+HWY_API void ScatterOffset(Vec512<T> v, Full512<T> d, T* HWY_RESTRICT base,
+                           const Vec512<Offset> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+  return detail::ScatterOffset(hwy::SizeTag<sizeof(T)>(), v, d, base, offset);
+}
+template <typename T, typename Index>
+HWY_API void ScatterIndex(Vec512<T> v, Full512<T> d, T* HWY_RESTRICT base,
+                          const Vec512<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+  return detail::ScatterIndex(hwy::SizeTag<sizeof(T)>(), v, d, base, index);
+}
+
+template <>
+HWY_INLINE void ScatterOffset<float>(Vec512<float> v, Full512<float> /* tag */,
+                                     float* HWY_RESTRICT base,
+                                     const Vec512<int32_t> offset) {
+  _mm512_i32scatter_ps(base, offset.raw, v.raw, 1);
+}
+template <>
+HWY_INLINE void ScatterIndex<float>(Vec512<float> v, Full512<float> /* tag */,
+                                    float* HWY_RESTRICT base,
+                                    const Vec512<int32_t> index) {
+  _mm512_i32scatter_ps(base, index.raw, v.raw, 4);
+}
+
+template <>
+HWY_INLINE void ScatterOffset<double>(Vec512<double> v,
+                                      Full512<double> /* tag */,
+                                      double* HWY_RESTRICT base,
+                                      const Vec512<int64_t> offset) {
+  _mm512_i64scatter_pd(base, offset.raw, v.raw, 1);
+}
+template <>
+HWY_INLINE void ScatterIndex<double>(Vec512<double> v,
+                                     Full512<double> /* tag */,
+                                     double* HWY_RESTRICT base,
+                                     const Vec512<int64_t> index) {
+  _mm512_i64scatter_pd(base, index.raw, v.raw, 8);
+}
+
+// ------------------------------ Gather
+
+namespace detail {
+
+template <typename T>
+HWY_API Vec512<T> GatherOffset(hwy::SizeTag<4> /* tag */, Full512<T> /* tag */,
+                               const T* HWY_RESTRICT base,
+                               const Vec512<int32_t> offset) {
+  return Vec512<T>{_mm512_i32gather_epi32(offset.raw, base, 1)};
+}
+template <typename T>
+HWY_API Vec512<T> GatherIndex(hwy::SizeTag<4> /* tag */, Full512<T> /* tag */,
+                              const T* HWY_RESTRICT base,
+                              const Vec512<int32_t> index) {
+  return Vec512<T>{_mm512_i32gather_epi32(index.raw, base, 4)};
+}
+
+template <typename T>
+HWY_API Vec512<T> GatherOffset(hwy::SizeTag<8> /* tag */, Full512<T> /* tag */,
+                               const T* HWY_RESTRICT base,
+                               const Vec512<int64_t> offset) {
+  return Vec512<T>{_mm512_i64gather_epi64(offset.raw, base, 1)};
+}
+template <typename T>
+HWY_API Vec512<T> GatherIndex(hwy::SizeTag<8> /* tag */, Full512<T> /* tag */,
+                              const T* HWY_RESTRICT base,
+                              const Vec512<int64_t> index) {
+  return Vec512<T>{_mm512_i64gather_epi64(index.raw, base, 8)};
+}
+
+}  // namespace detail
+
+template <typename T, typename Offset>
+HWY_API Vec512<T> GatherOffset(Full512<T> d, const T* HWY_RESTRICT base,
+                               const Vec512<Offset> offset) {
+static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
+  return detail::GatherOffset(hwy::SizeTag<sizeof(T)>(), d, base, offset);
+}
+template <typename T, typename Index>
+HWY_API Vec512<T> GatherIndex(Full512<T> d, const T* HWY_RESTRICT base,
+                              const Vec512<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "Must match for portability");
+  return detail::GatherIndex(hwy::SizeTag<sizeof(T)>(), d, base, index);
+}
+
+template <>
+HWY_INLINE Vec512<float> GatherOffset<float>(Full512<float> /* tag */,
+                                             const float* HWY_RESTRICT base,
+                                             const Vec512<int32_t> offset) {
+  return Vec512<float>{_mm512_i32gather_ps(offset.raw, base, 1)};
+}
+template <>
+HWY_INLINE Vec512<float> GatherIndex<float>(Full512<float> /* tag */,
+                                            const float* HWY_RESTRICT base,
+                                            const Vec512<int32_t> index) {
+  return Vec512<float>{_mm512_i32gather_ps(index.raw, base, 4)};
+}
+
+template <>
+HWY_INLINE Vec512<double> GatherOffset<double>(Full512<double> /* tag */,
+                                               const double* HWY_RESTRICT base,
+                                               const Vec512<int64_t> offset) {
+  return Vec512<double>{_mm512_i64gather_pd(offset.raw, base, 1)};
+}
+template <>
+HWY_INLINE Vec512<double> GatherIndex<double>(Full512<double> /* tag */,
+                                              const double* HWY_RESTRICT base,
+                                              const Vec512<int64_t> index) {
+  return Vec512<double>{_mm512_i64gather_pd(index.raw, base, 8)};
+}
+
+HWY_DIAGNOSTICS(pop)
+
+// ================================================== SWIZZLE
+
+template <typename T>
+HWY_API T GetLane(const Vec512<T> v) {
+  return GetLane(LowerHalf(v));
+}
+
+// ------------------------------ Extract half
+
+template <typename T>
+HWY_API Vec256<T> LowerHalf(Vec512<T> v) {
+  return Vec256<T>{_mm512_castsi512_si256(v.raw)};
+}
+template <>
+HWY_INLINE Vec256<float> LowerHalf(Vec512<float> v) {
+  return Vec256<float>{_mm512_castps512_ps256(v.raw)};
+}
+template <>
+HWY_INLINE Vec256<double> LowerHalf(Vec512<double> v) {
+  return Vec256<double>{_mm512_castpd512_pd256(v.raw)};
+}
+
+template <typename T>
+HWY_API Vec256<T> UpperHalf(Vec512<T> v) {
+  return Vec256<T>{_mm512_extracti32x8_epi32(v.raw, 1)};
+}
+template <>
+HWY_INLINE Vec256<float> UpperHalf(Vec512<float> v) {
+  return Vec256<float>{_mm512_extractf32x8_ps(v.raw, 1)};
+}
+template <>
+HWY_INLINE Vec256<double> UpperHalf(Vec512<double> v) {
+  return Vec256<double>{_mm512_extractf64x4_pd(v.raw, 1)};
+}
+
+// ------------------------------ ZeroExtendVector
+
+// Unfortunately the initial _mm512_castsi256_si512 intrinsic leaves the upper
+// bits undefined. Although it makes sense for them to be zero (EVEX encoded
+// instructions have that effect), a compiler could decide to optimize out code
+// that relies on this.
+//
+// The newer _mm512_zextsi256_si512 intrinsic fixes this by specifying the
+// zeroing, but it is not available on GCC until 10.1. For older GCC, we can
+// still obtain the desired code thanks to pattern recognition; note that the
+// expensive insert instruction is not actually generated, see
+// https://gcc.godbolt.org/z/1MKGaP.
+
+template <typename T>
+HWY_API Vec512<T> ZeroExtendVector(Vec256<T> lo) {
+#if !HWY_COMPILER_CLANG && HWY_COMPILER_GCC && (HWY_COMPILER_GCC < 1000)
+  return Vec512<T>{_mm512_inserti32x8(_mm512_setzero_si512(), lo.raw, 0)};
+#else
+  return Vec512<T>{_mm512_zextsi256_si512(lo.raw)};
+#endif
+}
+template <>
+HWY_INLINE Vec512<float> ZeroExtendVector(Vec256<float> lo) {
+#if !HWY_COMPILER_CLANG && HWY_COMPILER_GCC && (HWY_COMPILER_GCC < 1000)
+  return Vec512<float>{_mm512_insertf32x8(_mm512_setzero_ps(), lo.raw, 0)};
+#else
+  return Vec512<float>{_mm512_zextps256_ps512(lo.raw)};
+#endif
+}
+template <>
+HWY_INLINE Vec512<double> ZeroExtendVector(Vec256<double> lo) {
+#if !HWY_COMPILER_CLANG && HWY_COMPILER_GCC && (HWY_COMPILER_GCC < 1000)
+  return Vec512<double>{_mm512_insertf64x4(_mm512_setzero_pd(), lo.raw, 0)};
+#else
+  return Vec512<double>{_mm512_zextpd256_pd512(lo.raw)};
+#endif
+}
+
+// ------------------------------ Combine
+
+template <typename T>
+HWY_API Vec512<T> Combine(Vec256<T> hi, Vec256<T> lo) {
+  const auto lo512 = ZeroExtendVector(lo);
+  return Vec512<T>{_mm512_inserti32x8(lo512.raw, hi.raw, 1)};
+}
+template <>
+HWY_INLINE Vec512<float> Combine(Vec256<float> hi, Vec256<float> lo) {
+  const auto lo512 = ZeroExtendVector(lo);
+  return Vec512<float>{_mm512_insertf32x8(lo512.raw, hi.raw, 1)};
+}
+template <>
+HWY_INLINE Vec512<double> Combine(Vec256<double> hi, Vec256<double> lo) {
+  const auto lo512 = ZeroExtendVector(lo);
+  return Vec512<double>{_mm512_insertf64x4(lo512.raw, hi.raw, 1)};
+}
+
+// ------------------------------ Shift vector by constant #bytes
+
+// 0x01..0F, kBytes = 1 => 0x02..0F00
+template <int kBytes, typename T>
+HWY_API Vec512<T> ShiftLeftBytes(const Vec512<T> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  return Vec512<T>{_mm512_bslli_epi128(v.raw, kBytes)};
+}
+
+template <int kLanes, typename T>
+HWY_API Vec512<T> ShiftLeftLanes(const Vec512<T> v) {
+  const Full512<uint8_t> d8;
+  const Full512<T> d;
+  return BitCast(d, ShiftLeftBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, typename T>
+HWY_API Vec512<T> ShiftRightBytes(const Vec512<T> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  return Vec512<T>{_mm512_bsrli_epi128(v.raw, kBytes)};
+}
+
+template <int kLanes, typename T>
+HWY_API Vec512<T> ShiftRightLanes(const Vec512<T> v) {
+  const Full512<uint8_t> d8;
+  const Full512<T> d;
+  return BitCast(d, ShiftRightBytes<kLanes * sizeof(T)>(BitCast(d8, v)));
+}
+
+// ------------------------------ Extract from 2x 128-bit at constant offset
+
+// Extracts 128 bits from <hi, lo> by skipping the least-significant kBytes.
+template <int kBytes, typename T>
+HWY_API Vec512<T> CombineShiftRightBytes(const Vec512<T> hi,
+                                         const Vec512<T> lo) {
+  const Full512<uint8_t> d8;
+  const Vec512<uint8_t> extracted_bytes{
+      _mm512_alignr_epi8(BitCast(d8, hi).raw, BitCast(d8, lo).raw, kBytes)};
+  return BitCast(Full512<T>(), extracted_bytes);
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+// Unsigned
+template <int kLane>
+HWY_API Vec512<uint16_t> Broadcast(const Vec512<uint16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m512i lo = _mm512_shufflelo_epi16(v.raw, (0x55 * kLane) & 0xFF);
+    return Vec512<uint16_t>{_mm512_unpacklo_epi64(lo, lo)};
+  } else {
+    const __m512i hi =
+        _mm512_shufflehi_epi16(v.raw, (0x55 * (kLane - 4)) & 0xFF);
+    return Vec512<uint16_t>{_mm512_unpackhi_epi64(hi, hi)};
+  }
+}
+template <int kLane>
+HWY_API Vec512<uint32_t> Broadcast(const Vec512<uint32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = static_cast<_MM_PERM_ENUM>(0x55 * kLane);
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, perm)};
+}
+template <int kLane>
+HWY_API Vec512<uint64_t> Broadcast(const Vec512<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = kLane ? _MM_PERM_DCDC : _MM_PERM_BABA;
+  return Vec512<uint64_t>{_mm512_shuffle_epi32(v.raw, perm)};
+}
+
+// Signed
+template <int kLane>
+HWY_API Vec512<int16_t> Broadcast(const Vec512<int16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m512i lo = _mm512_shufflelo_epi16(v.raw, (0x55 * kLane) & 0xFF);
+    return Vec512<int16_t>{_mm512_unpacklo_epi64(lo, lo)};
+  } else {
+    const __m512i hi =
+        _mm512_shufflehi_epi16(v.raw, (0x55 * (kLane - 4)) & 0xFF);
+    return Vec512<int16_t>{_mm512_unpackhi_epi64(hi, hi)};
+  }
+}
+template <int kLane>
+HWY_API Vec512<int32_t> Broadcast(const Vec512<int32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = static_cast<_MM_PERM_ENUM>(0x55 * kLane);
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, perm)};
+}
+template <int kLane>
+HWY_API Vec512<int64_t> Broadcast(const Vec512<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = kLane ? _MM_PERM_DCDC : _MM_PERM_BABA;
+  return Vec512<int64_t>{_mm512_shuffle_epi32(v.raw, perm)};
+}
+
+// Float
+template <int kLane>
+HWY_API Vec512<float> Broadcast(const Vec512<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = static_cast<_MM_PERM_ENUM>(0x55 * kLane);
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, perm)};
+}
+template <int kLane>
+HWY_API Vec512<double> Broadcast(const Vec512<double> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  constexpr _MM_PERM_ENUM perm = static_cast<_MM_PERM_ENUM>(0xFF * kLane);
+  return Vec512<double>{_mm512_shuffle_pd(v.raw, v.raw, perm)};
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let Vec512<int32_t> have lanes 7,6,5,4,3,2,1,0 (0 is
+// least-significant). Shuffle0321 rotates four-lane blocks one lane to the
+// right (the previous least-significant lane is now most-significant =>
+// 47650321). These could also be implemented via CombineShiftRightBytes but
+// the shuffle_abcd notation is more convenient.
+
+// Swap 32-bit halves in 64-bit halves.
+HWY_API Vec512<uint32_t> Shuffle2301(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_CDAB)};
+}
+HWY_API Vec512<int32_t> Shuffle2301(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_CDAB)};
+}
+HWY_API Vec512<float> Shuffle2301(const Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_CDAB)};
+}
+
+// Swap 64-bit halves
+HWY_API Vec512<uint32_t> Shuffle1032(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<int32_t> Shuffle1032(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<float> Shuffle1032(const Vec512<float> v) {
+  // Shorter encoding than _mm512_permute_ps.
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<uint64_t> Shuffle01(const Vec512<uint64_t> v) {
+  return Vec512<uint64_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<int64_t> Shuffle01(const Vec512<int64_t> v) {
+  return Vec512<int64_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_BADC)};
+}
+HWY_API Vec512<double> Shuffle01(const Vec512<double> v) {
+  // Shorter encoding than _mm512_permute_pd.
+  return Vec512<double>{_mm512_shuffle_pd(v.raw, v.raw, _MM_PERM_BBBB)};
+}
+
+// Rotate right 32 bits
+HWY_API Vec512<uint32_t> Shuffle0321(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_ADCB)};
+}
+HWY_API Vec512<int32_t> Shuffle0321(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_ADCB)};
+}
+HWY_API Vec512<float> Shuffle0321(const Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_ADCB)};
+}
+// Rotate left 32 bits
+HWY_API Vec512<uint32_t> Shuffle2103(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_CBAD)};
+}
+HWY_API Vec512<int32_t> Shuffle2103(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_CBAD)};
+}
+HWY_API Vec512<float> Shuffle2103(const Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_CBAD)};
+}
+
+// Reverse
+HWY_API Vec512<uint32_t> Shuffle0123(const Vec512<uint32_t> v) {
+  return Vec512<uint32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_ABCD)};
+}
+HWY_API Vec512<int32_t> Shuffle0123(const Vec512<int32_t> v) {
+  return Vec512<int32_t>{_mm512_shuffle_epi32(v.raw, _MM_PERM_ABCD)};
+}
+HWY_API Vec512<float> Shuffle0123(const Vec512<float> v) {
+  return Vec512<float>{_mm512_shuffle_ps(v.raw, v.raw, _MM_PERM_ABCD)};
+}
+
+// ------------------------------ TableLookupLanes
+
+// Returned by SetTableIndices for use by TableLookupLanes.
+template <typename T>
+struct Indices512 {
+  __m512i raw;
+};
+
+template <typename T>
+HWY_API Indices512<T> SetTableIndices(const Full512<T>, const int32_t* idx) {
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER)
+  const size_t N = 64 / sizeof(T);
+  for (size_t i = 0; i < N; ++i) {
+    HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast<int32_t>(N));
+  }
+#endif
+  return Indices512<T>{LoadU(Full512<int32_t>(), idx).raw};
+}
+
+HWY_API Vec512<uint32_t> TableLookupLanes(const Vec512<uint32_t> v,
+                                          const Indices512<uint32_t> idx) {
+  return Vec512<uint32_t>{_mm512_permutexvar_epi32(idx.raw, v.raw)};
+}
+HWY_API Vec512<int32_t> TableLookupLanes(const Vec512<int32_t> v,
+                                         const Indices512<int32_t> idx) {
+  return Vec512<int32_t>{_mm512_permutexvar_epi32(idx.raw, v.raw)};
+}
+HWY_API Vec512<float> TableLookupLanes(const Vec512<float> v,
+                                       const Indices512<float> idx) {
+  return Vec512<float>{_mm512_permutexvar_ps(idx.raw, v.raw)};
+}
+
+// ------------------------------ Interleave lanes
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use ZipLower/Upper instead (also works with scalar).
+
+HWY_API Vec512<uint8_t> InterleaveLower(const Vec512<uint8_t> a,
+                                        const Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> InterleaveLower(const Vec512<uint16_t> a,
+                                         const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> InterleaveLower(const Vec512<uint32_t> a,
+                                         const Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_unpacklo_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> InterleaveLower(const Vec512<uint64_t> a,
+                                         const Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_unpacklo_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec512<int8_t> InterleaveLower(const Vec512<int8_t> a,
+                                       const Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> InterleaveLower(const Vec512<int16_t> a,
+                                        const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> InterleaveLower(const Vec512<int32_t> a,
+                                        const Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_unpacklo_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> InterleaveLower(const Vec512<int64_t> a,
+                                        const Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_unpacklo_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec512<float> InterleaveLower(const Vec512<float> a,
+                                      const Vec512<float> b) {
+  return Vec512<float>{_mm512_unpacklo_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> InterleaveLower(const Vec512<double> a,
+                                       const Vec512<double> b) {
+  return Vec512<double>{_mm512_unpacklo_pd(a.raw, b.raw)};
+}
+
+HWY_API Vec512<uint8_t> InterleaveUpper(const Vec512<uint8_t> a,
+                                        const Vec512<uint8_t> b) {
+  return Vec512<uint8_t>{_mm512_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint16_t> InterleaveUpper(const Vec512<uint16_t> a,
+                                         const Vec512<uint16_t> b) {
+  return Vec512<uint16_t>{_mm512_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> InterleaveUpper(const Vec512<uint32_t> a,
+                                         const Vec512<uint32_t> b) {
+  return Vec512<uint32_t>{_mm512_unpackhi_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> InterleaveUpper(const Vec512<uint64_t> a,
+                                         const Vec512<uint64_t> b) {
+  return Vec512<uint64_t>{_mm512_unpackhi_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec512<int8_t> InterleaveUpper(const Vec512<int8_t> a,
+                                       const Vec512<int8_t> b) {
+  return Vec512<int8_t>{_mm512_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int16_t> InterleaveUpper(const Vec512<int16_t> a,
+                                        const Vec512<int16_t> b) {
+  return Vec512<int16_t>{_mm512_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> InterleaveUpper(const Vec512<int32_t> a,
+                                        const Vec512<int32_t> b) {
+  return Vec512<int32_t>{_mm512_unpackhi_epi32(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> InterleaveUpper(const Vec512<int64_t> a,
+                                        const Vec512<int64_t> b) {
+  return Vec512<int64_t>{_mm512_unpackhi_epi64(a.raw, b.raw)};
+}
+
+HWY_API Vec512<float> InterleaveUpper(const Vec512<float> a,
+                                      const Vec512<float> b) {
+  return Vec512<float>{_mm512_unpackhi_ps(a.raw, b.raw)};
+}
+HWY_API Vec512<double> InterleaveUpper(const Vec512<double> a,
+                                       const Vec512<double> b) {
+  return Vec512<double>{_mm512_unpackhi_pd(a.raw, b.raw)};
+}
+
+// ------------------------------ Zip lanes
+
+// Same as interleave_*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+
+HWY_API Vec512<uint16_t> ZipLower(const Vec512<uint8_t> a,
+                                  const Vec512<uint8_t> b) {
+  return Vec512<uint16_t>{_mm512_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> ZipLower(const Vec512<uint16_t> a,
+                                  const Vec512<uint16_t> b) {
+  return Vec512<uint32_t>{_mm512_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> ZipLower(const Vec512<uint32_t> a,
+                                  const Vec512<uint32_t> b) {
+  return Vec512<uint64_t>{_mm512_unpacklo_epi32(a.raw, b.raw)};
+}
+
+HWY_API Vec512<int16_t> ZipLower(const Vec512<int8_t> a,
+                                 const Vec512<int8_t> b) {
+  return Vec512<int16_t>{_mm512_unpacklo_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> ZipLower(const Vec512<int16_t> a,
+                                 const Vec512<int16_t> b) {
+  return Vec512<int32_t>{_mm512_unpacklo_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> ZipLower(const Vec512<int32_t> a,
+                                 const Vec512<int32_t> b) {
+  return Vec512<int64_t>{_mm512_unpacklo_epi32(a.raw, b.raw)};
+}
+
+HWY_API Vec512<uint16_t> ZipUpper(const Vec512<uint8_t> a,
+                                  const Vec512<uint8_t> b) {
+  return Vec512<uint16_t>{_mm512_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<uint32_t> ZipUpper(const Vec512<uint16_t> a,
+                                  const Vec512<uint16_t> b) {
+  return Vec512<uint32_t>{_mm512_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<uint64_t> ZipUpper(const Vec512<uint32_t> a,
+                                  const Vec512<uint32_t> b) {
+  return Vec512<uint64_t>{_mm512_unpackhi_epi32(a.raw, b.raw)};
+}
+
+HWY_API Vec512<int16_t> ZipUpper(const Vec512<int8_t> a,
+                                 const Vec512<int8_t> b) {
+  return Vec512<int16_t>{_mm512_unpackhi_epi8(a.raw, b.raw)};
+}
+HWY_API Vec512<int32_t> ZipUpper(const Vec512<int16_t> a,
+                                 const Vec512<int16_t> b) {
+  return Vec512<int32_t>{_mm512_unpackhi_epi16(a.raw, b.raw)};
+}
+HWY_API Vec512<int64_t> ZipUpper(const Vec512<int32_t> a,
+                                 const Vec512<int32_t> b) {
+  return Vec512<int64_t>{_mm512_unpackhi_epi32(a.raw, b.raw)};
+}
+
+// ------------------------------ Concat* halves
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <typename T>
+HWY_API Vec512<T> ConcatLowerLower(const Vec512<T> hi, const Vec512<T> lo) {
+  return Vec512<T>{_mm512_shuffle_i32x4(lo.raw, hi.raw, _MM_PERM_BABA)};
+}
+template <>
+HWY_INLINE Vec512<float> ConcatLowerLower(const Vec512<float> hi,
+                                          const Vec512<float> lo) {
+  return Vec512<float>{_mm512_shuffle_f32x4(lo.raw, hi.raw, _MM_PERM_BABA)};
+}
+template <>
+HWY_INLINE Vec512<double> ConcatLowerLower(const Vec512<double> hi,
+                                           const Vec512<double> lo) {
+  return Vec512<double>{_mm512_shuffle_f64x2(lo.raw, hi.raw, _MM_PERM_BABA)};
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <typename T>
+HWY_API Vec512<T> ConcatUpperUpper(const Vec512<T> hi, const Vec512<T> lo) {
+  return Vec512<T>{_mm512_shuffle_i32x4(lo.raw, hi.raw, _MM_PERM_DCDC)};
+}
+template <>
+HWY_INLINE Vec512<float> ConcatUpperUpper(const Vec512<float> hi,
+                                          const Vec512<float> lo) {
+  return Vec512<float>{_mm512_shuffle_f32x4(lo.raw, hi.raw, _MM_PERM_DCDC)};
+}
+template <>
+HWY_INLINE Vec512<double> ConcatUpperUpper(const Vec512<double> hi,
+                                           const Vec512<double> lo) {
+  return Vec512<double>{_mm512_shuffle_f64x2(lo.raw, hi.raw, _MM_PERM_DCDC)};
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves / swap blocks)
+template <typename T>
+HWY_API Vec512<T> ConcatLowerUpper(const Vec512<T> hi, const Vec512<T> lo) {
+  return Vec512<T>{_mm512_shuffle_i32x4(lo.raw, hi.raw, 0x4E)};
+}
+template <>
+HWY_INLINE Vec512<float> ConcatLowerUpper(const Vec512<float> hi,
+                                          const Vec512<float> lo) {
+  return Vec512<float>{_mm512_shuffle_f32x4(lo.raw, hi.raw, 0x4E)};
+}
+template <>
+HWY_INLINE Vec512<double> ConcatLowerUpper(const Vec512<double> hi,
+                                           const Vec512<double> lo) {
+  return Vec512<double>{_mm512_shuffle_f64x2(lo.raw, hi.raw, 0x4E)};
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <typename T>
+HWY_API Vec512<T> ConcatUpperLower(const Vec512<T> hi, const Vec512<T> lo) {
+  // There are no imm8 blend in AVX512. Use blend16 because 32-bit masks
+  // are efficiently loaded from 32-bit regs.
+  const __mmask32 mask = /*_cvtu32_mask32 */ (0x0000FFFF);
+  return Vec512<T>{_mm512_mask_blend_epi16(mask, hi.raw, lo.raw)};
+}
+template <>
+HWY_INLINE Vec512<float> ConcatUpperLower(const Vec512<float> hi,
+                                          const Vec512<float> lo) {
+  const __mmask16 mask = /*_cvtu32_mask16 */ (0x00FF);
+  return Vec512<float>{_mm512_mask_blend_ps(mask, hi.raw, lo.raw)};
+}
+template <>
+HWY_INLINE Vec512<double> ConcatUpperLower(const Vec512<double> hi,
+                                           const Vec512<double> lo) {
+  const __mmask8 mask = /*_cvtu32_mask8 */ (0x0F);
+  return Vec512<double>{_mm512_mask_blend_pd(mask, hi.raw, lo.raw)};
+}
+
+// ------------------------------ Odd/even lanes
+
+template <typename T>
+HWY_API Vec512<T> OddEven(const Vec512<T> a, const Vec512<T> b) {
+  constexpr size_t s = sizeof(T);
+  constexpr int shift = s == 1 ? 0 : s == 2 ? 32 : s == 4 ? 48 : 56;
+  return IfThenElse(Mask512<T>{0x5555555555555555ull >> shift}, b, a);
+}
+
+// ------------------------------ Shuffle bytes with variable indices
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes, i.e.
+// lane indices in [0, 16).
+template <typename T>
+HWY_API Vec512<T> TableLookupBytes(const Vec512<T> bytes,
+                                   const Vec512<T> from) {
+  return Vec512<T>{_mm512_shuffle_epi8(bytes.raw, from.raw)};
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+HWY_API Vec512<float> PromoteTo(Full512<float> /* tag */,
+                                const Vec256<float16_t> v) {
+  return Vec512<float>{_mm512_cvtph_ps(v.raw)};
+}
+
+HWY_API Vec512<double> PromoteTo(Full512<double> /* tag */, Vec256<float> v) {
+  return Vec512<double>{_mm512_cvtps_pd(v.raw)};
+}
+
+HWY_API Vec512<double> PromoteTo(Full512<double> /* tag */, Vec256<int32_t> v) {
+  return Vec512<double>{_mm512_cvtepi32_pd(v.raw)};
+}
+
+// Unsigned: zero-extend.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then Zip* would be faster.
+HWY_API Vec512<uint16_t> PromoteTo(Full512<uint16_t> /* tag */,
+                                   Vec256<uint8_t> v) {
+  return Vec512<uint16_t>{_mm512_cvtepu8_epi16(v.raw)};
+}
+HWY_API Vec512<uint32_t> PromoteTo(Full512<uint32_t> /* tag */,
+                                   Vec128<uint8_t> v) {
+  return Vec512<uint32_t>{_mm512_cvtepu8_epi32(v.raw)};
+}
+HWY_API Vec512<int16_t> PromoteTo(Full512<int16_t> /* tag */,
+                                  Vec256<uint8_t> v) {
+  return Vec512<int16_t>{_mm512_cvtepu8_epi16(v.raw)};
+}
+HWY_API Vec512<int32_t> PromoteTo(Full512<int32_t> /* tag */,
+                                  Vec128<uint8_t> v) {
+  return Vec512<int32_t>{_mm512_cvtepu8_epi32(v.raw)};
+}
+HWY_API Vec512<uint32_t> PromoteTo(Full512<uint32_t> /* tag */,
+                                   Vec256<uint16_t> v) {
+  return Vec512<uint32_t>{_mm512_cvtepu16_epi32(v.raw)};
+}
+HWY_API Vec512<int32_t> PromoteTo(Full512<int32_t> /* tag */,
+                                  Vec256<uint16_t> v) {
+  return Vec512<int32_t>{_mm512_cvtepu16_epi32(v.raw)};
+}
+HWY_API Vec512<uint64_t> PromoteTo(Full512<uint64_t> /* tag */,
+                                   Vec256<uint32_t> v) {
+  return Vec512<uint64_t>{_mm512_cvtepu32_epi64(v.raw)};
+}
+
+// Signed: replicate sign bit.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then ZipUpper/lo followed by
+// signed shift would be faster.
+HWY_API Vec512<int16_t> PromoteTo(Full512<int16_t> /* tag */,
+                                  Vec256<int8_t> v) {
+  return Vec512<int16_t>{_mm512_cvtepi8_epi16(v.raw)};
+}
+HWY_API Vec512<int32_t> PromoteTo(Full512<int32_t> /* tag */,
+                                  Vec128<int8_t> v) {
+  return Vec512<int32_t>{_mm512_cvtepi8_epi32(v.raw)};
+}
+HWY_API Vec512<int32_t> PromoteTo(Full512<int32_t> /* tag */,
+                                  Vec256<int16_t> v) {
+  return Vec512<int32_t>{_mm512_cvtepi16_epi32(v.raw)};
+}
+HWY_API Vec512<int64_t> PromoteTo(Full512<int64_t> /* tag */,
+                                  Vec256<int32_t> v) {
+  return Vec512<int64_t>{_mm512_cvtepi32_epi64(v.raw)};
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+HWY_API Vec256<uint16_t> DemoteTo(Full256<uint16_t> /* tag */,
+                                  const Vec512<int32_t> v) {
+  const Vec512<uint16_t> u16{_mm512_packus_epi32(v.raw, v.raw)};
+
+  // Compress even u64 lanes into 256 bit.
+  alignas(64) static constexpr uint64_t kLanes[8] = {0, 2, 4, 6, 0, 2, 4, 6};
+  const auto idx64 = Load(Full512<uint64_t>(), kLanes);
+  const Vec512<uint16_t> even{_mm512_permutexvar_epi64(idx64.raw, u16.raw)};
+  return LowerHalf(even);
+}
+
+HWY_API Vec256<int16_t> DemoteTo(Full256<int16_t> /* tag */,
+                                 const Vec512<int32_t> v) {
+  const Vec512<int16_t> i16{_mm512_packs_epi32(v.raw, v.raw)};
+
+  // Compress even u64 lanes into 256 bit.
+  alignas(64) static constexpr uint64_t kLanes[8] = {0, 2, 4, 6, 0, 2, 4, 6};
+  const auto idx64 = Load(Full512<uint64_t>(), kLanes);
+  const Vec512<int16_t> even{_mm512_permutexvar_epi64(idx64.raw, i16.raw)};
+  return LowerHalf(even);
+}
+
+HWY_API Vec128<uint8_t, 16> DemoteTo(Full128<uint8_t> /* tag */,
+                                     const Vec512<int32_t> v) {
+  const Vec512<uint16_t> u16{_mm512_packus_epi32(v.raw, v.raw)};
+  // packus treats the input as signed; we want unsigned. Clear the MSB to get
+  // unsigned saturation to u8.
+  const Vec512<int16_t> i16{
+      _mm512_and_si512(u16.raw, _mm512_set1_epi16(0x7FFF))};
+  const Vec512<uint8_t> u8{_mm512_packus_epi16(i16.raw, i16.raw)};
+
+  alignas(16) static constexpr uint32_t kLanes[4] = {0, 4, 8, 12};
+  const auto idx32 = LoadDup128(Full512<uint32_t>(), kLanes);
+  const Vec512<uint8_t> fixed{_mm512_permutexvar_epi32(idx32.raw, u8.raw)};
+  return LowerHalf(LowerHalf(fixed));
+}
+
+HWY_API Vec256<uint8_t> DemoteTo(Full256<uint8_t> /* tag */,
+                                 const Vec512<int16_t> v) {
+  const Vec512<uint8_t> u8{_mm512_packus_epi16(v.raw, v.raw)};
+
+  // Compress even u64 lanes into 256 bit.
+  alignas(64) static constexpr uint64_t kLanes[8] = {0, 2, 4, 6, 0, 2, 4, 6};
+  const auto idx64 = Load(Full512<uint64_t>(), kLanes);
+  const Vec512<uint8_t> even{_mm512_permutexvar_epi64(idx64.raw, u8.raw)};
+  return LowerHalf(even);
+}
+
+HWY_API Vec128<int8_t, 16> DemoteTo(Full128<int8_t> /* tag */,
+                                    const Vec512<int32_t> v) {
+  const Vec512<int16_t> i16{_mm512_packs_epi32(v.raw, v.raw)};
+  const Vec512<int8_t> i8{_mm512_packs_epi16(i16.raw, i16.raw)};
+
+  alignas(16) static constexpr uint32_t kLanes[16] = {0, 4, 8, 12, 0, 4, 8, 12,
+                                                      0, 4, 8, 12, 0, 4, 8, 12};
+  const auto idx32 = LoadDup128(Full512<uint32_t>(), kLanes);
+  const Vec512<int8_t> fixed{_mm512_permutexvar_epi32(idx32.raw, i8.raw)};
+  return LowerHalf(LowerHalf(fixed));
+}
+
+HWY_API Vec256<int8_t> DemoteTo(Full256<int8_t> /* tag */,
+                                const Vec512<int16_t> v) {
+  const Vec512<int8_t> u8{_mm512_packs_epi16(v.raw, v.raw)};
+
+  // Compress even u64 lanes into 256 bit.
+  alignas(64) static constexpr uint64_t kLanes[8] = {0, 2, 4, 6, 0, 2, 4, 6};
+  const auto idx64 = Load(Full512<uint64_t>(), kLanes);
+  const Vec512<int8_t> even{_mm512_permutexvar_epi64(idx64.raw, u8.raw)};
+  return LowerHalf(even);
+}
+
+HWY_API Vec256<float16_t> DemoteTo(Full256<float16_t> /* tag */,
+                                   const Vec512<float> v) {
+  // Work around warnings in the intrinsic definitions (passing -1 as a mask).
+  HWY_DIAGNOSTICS(push)
+  HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion")
+  return Vec256<float16_t>{_mm512_cvtps_ph(v.raw, _MM_FROUND_NO_EXC)};
+  HWY_DIAGNOSTICS(pop)
+}
+
+HWY_API Vec256<float> DemoteTo(Full256<float> /* tag */,
+                               const Vec512<double> v) {
+  return Vec256<float>{_mm512_cvtpd_ps(v.raw)};
+}
+
+HWY_API Vec256<int32_t> DemoteTo(Full256<int32_t> /* tag */,
+                                 const Vec512<double> v) {
+  const auto clamped = detail::ClampF64ToI32Max(Full512<double>(), v);
+  return Vec256<int32_t>{_mm512_cvttpd_epi32(clamped.raw)};
+}
+
+// For already range-limited input [0, 255].
+HWY_API Vec128<uint8_t, 16> U8FromU32(const Vec512<uint32_t> v) {
+  const Full512<uint32_t> d32;
+  // In each 128 bit block, gather the lower byte of 4 uint32_t lanes into the
+  // lowest 4 bytes.
+  alignas(16) static constexpr uint32_t k8From32[4] = {0x0C080400u, ~0u, ~0u,
+                                                       ~0u};
+  const auto quads = TableLookupBytes(v, LoadDup128(d32, k8From32));
+  // Gather the lowest 4 bytes of 4 128-bit blocks.
+  alignas(16) static constexpr uint32_t kIndex32[4] = {0, 4, 8, 12};
+  const Vec512<uint8_t> bytes{
+      _mm512_permutexvar_epi32(LoadDup128(d32, kIndex32).raw, quads.raw)};
+  return LowerHalf(LowerHalf(bytes));
+}
+
+// ------------------------------ Convert integer <=> floating point
+
+HWY_API Vec512<float> ConvertTo(Full512<float> /* tag */,
+                                const Vec512<int32_t> v) {
+  return Vec512<float>{_mm512_cvtepi32_ps(v.raw)};
+}
+
+HWY_API Vec512<double> ConvertTo(Full512<double> /* tag */,
+                                 const Vec512<int64_t> v) {
+  return Vec512<double>{_mm512_cvtepi64_pd(v.raw)};
+}
+
+// Truncates (rounds toward zero).
+HWY_API Vec512<int32_t> ConvertTo(Full512<int32_t> d, const Vec512<float> v) {
+  return detail::FixConversionOverflow(d, v, _mm512_cvttps_epi32(v.raw));
+}
+HWY_API Vec512<int64_t> ConvertTo(Full512<int64_t> di, const Vec512<double> v) {
+  return detail::FixConversionOverflow(di, v, _mm512_cvttpd_epi64(v.raw));
+}
+
+HWY_API Vec512<int32_t> NearestInt(const Vec512<float> v) {
+  const Full512<int32_t> di;
+  return detail::FixConversionOverflow(di, v, _mm512_cvtps_epi32(v.raw));
+}
+
+// ================================================== MISC
+
+// Returns a vector with lane i=[0, N) set to "first" + i.
+template <typename T, typename T2>
+Vec512<T> Iota(const Full512<T> d, const T2 first) {
+  HWY_ALIGN T lanes[64 / sizeof(T)];
+  for (size_t i = 0; i < 64 / sizeof(T); ++i) {
+    lanes[i] = static_cast<T>(first + static_cast<T2>(i));
+  }
+  return Load(d, lanes);
+}
+
+// ------------------------------ Mask
+
+// Beware: the suffix indicates the number of mask bits, not lane size!
+
+namespace detail {
+
+template <typename T>
+HWY_API bool AllFalse(hwy::SizeTag<1> /*tag*/, const Mask512<T> v) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask64_u8(v.raw, v.raw);
+#else
+  return v.raw == 0;
+#endif
+}
+template <typename T>
+HWY_API bool AllFalse(hwy::SizeTag<2> /*tag*/, const Mask512<T> v) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask32_u8(v.raw, v.raw);
+#else
+  return v.raw == 0;
+#endif
+}
+template <typename T>
+HWY_API bool AllFalse(hwy::SizeTag<4> /*tag*/, const Mask512<T> v) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask16_u8(v.raw, v.raw);
+#else
+  return v.raw == 0;
+#endif
+}
+template <typename T>
+HWY_API bool AllFalse(hwy::SizeTag<8> /*tag*/, const Mask512<T> v) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestz_mask8_u8(v.raw, v.raw);
+#else
+  return v.raw == 0;
+#endif
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API bool AllFalse(const Mask512<T> v) {
+  return detail::AllFalse(hwy::SizeTag<sizeof(T)>(), v);
+}
+
+namespace detail {
+
+template <typename T>
+HWY_API bool AllTrue(hwy::SizeTag<1> /*tag*/, const Mask512<T> v) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask64_u8(v.raw, v.raw);
+#else
+  return v.raw == 0xFFFFFFFFFFFFFFFFull;
+#endif
+}
+template <typename T>
+HWY_API bool AllTrue(hwy::SizeTag<2> /*tag*/, const Mask512<T> v) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask32_u8(v.raw, v.raw);
+#else
+  return v.raw == 0xFFFFFFFFull;
+#endif
+}
+template <typename T>
+HWY_API bool AllTrue(hwy::SizeTag<4> /*tag*/, const Mask512<T> v) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask16_u8(v.raw, v.raw);
+#else
+  return v.raw == 0xFFFFull;
+#endif
+}
+template <typename T>
+HWY_API bool AllTrue(hwy::SizeTag<8> /*tag*/, const Mask512<T> v) {
+#if HWY_COMPILER_HAS_MASK_INTRINSICS
+  return _kortestc_mask8_u8(v.raw, v.raw);
+#else
+  return v.raw == 0xFFull;
+#endif
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API bool AllTrue(const Mask512<T> v) {
+  return detail::AllTrue(hwy::SizeTag<sizeof(T)>(), v);
+}
+
+template <typename T>
+HWY_INLINE size_t StoreMaskBits(const Mask512<T> mask, uint8_t* p) {
+  const size_t kNumBytes = 8 / sizeof(T);
+  CopyBytes<kNumBytes>(&mask.raw, p);
+  return kNumBytes;
+}
+
+template <typename T>
+HWY_API size_t CountTrue(const Mask512<T> mask) {
+  return PopCount(mask.raw);
+}
+
+// ------------------------------ Compress
+
+HWY_API Vec512<uint32_t> Compress(Vec512<uint32_t> v,
+                                  const Mask512<uint32_t> mask) {
+  return Vec512<uint32_t>{_mm512_maskz_compress_epi32(mask.raw, v.raw)};
+}
+HWY_API Vec512<int32_t> Compress(Vec512<int32_t> v,
+                                 const Mask512<int32_t> mask) {
+  return Vec512<int32_t>{_mm512_maskz_compress_epi32(mask.raw, v.raw)};
+}
+
+HWY_API Vec512<uint64_t> Compress(Vec512<uint64_t> v,
+                                  const Mask512<uint64_t> mask) {
+  return Vec512<uint64_t>{_mm512_maskz_compress_epi64(mask.raw, v.raw)};
+}
+HWY_API Vec512<int64_t> Compress(Vec512<int64_t> v,
+                                 const Mask512<int64_t> mask) {
+  return Vec512<int64_t>{_mm512_maskz_compress_epi64(mask.raw, v.raw)};
+}
+
+HWY_API Vec512<float> Compress(Vec512<float> v, const Mask512<float> mask) {
+  return Vec512<float>{_mm512_maskz_compress_ps(mask.raw, v.raw)};
+}
+
+HWY_API Vec512<double> Compress(Vec512<double> v, const Mask512<double> mask) {
+  return Vec512<double>{_mm512_maskz_compress_pd(mask.raw, v.raw)};
+}
+
+namespace detail {
+
+// Ignore IDE redefinition error for these two functions: if this header is
+// included, then the functions weren't actually defined in x86_256-inl.h.
+template <typename T>
+HWY_API Vec256<T> Compress(hwy::SizeTag<2> /*tag*/, Vec256<T> v,
+                           const uint64_t mask_bits) {
+  using D = Full256<T>;
+  const Rebind<uint16_t, D> du;
+  const Rebind<int32_t, D> dw;       // 512-bit, not 256!
+  const auto vu16 = BitCast(du, v);  // (required for float16_t inputs)
+  const Mask512<int32_t> mask{static_cast<__mmask16>(mask_bits)};
+  return BitCast(D(), DemoteTo(du, Compress(PromoteTo(dw, vu16), mask)));
+}
+
+}  // namespace detail
+
+template <typename T>
+HWY_API Vec256<T> Compress(Vec256<T> v, const Mask256<T> mask) {
+  return detail::Compress(hwy::SizeTag<sizeof(T)>(), v,
+                          detail::BitsFromMask(mask));
+}
+
+// Expands to 32-bit, compresses, concatenate demoted halves.
+template <typename T, HWY_IF_LANE_SIZE(T, 2)>
+HWY_API Vec512<T> Compress(Vec512<T> v, const Mask512<T> mask) {
+  using D = Full512<T>;
+  const Rebind<uint16_t, D> du;
+  const Repartition<int32_t, D> dw;
+  const auto vu16 = BitCast(du, v);  // (required for float16_t inputs)
+  const auto promoted0 = PromoteTo(dw, LowerHalf(vu16));
+  const auto promoted1 = PromoteTo(dw, UpperHalf(vu16));
+
+  const Mask512<int32_t> mask0{static_cast<__mmask16>(mask.raw & 0xFFFF)};
+  const Mask512<int32_t> mask1{static_cast<__mmask16>(mask.raw >> 16)};
+  const auto compressed0 = Compress(promoted0, mask0);
+  const auto compressed1 = Compress(promoted1, mask1);
+
+  const Half<decltype(du)> dh;
+  const auto demoted0 = ZeroExtendVector(DemoteTo(dh, compressed0));
+  const auto demoted1 = ZeroExtendVector(DemoteTo(dh, compressed1));
+
+  // Concatenate into single vector by shifting upper with writemask.
+  const size_t num0 = CountTrue(mask0);
+  const __mmask32 m_upper = ~((1u << num0) - 1);
+  alignas(64) uint16_t iota[64] = {
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+  const auto idx = LoadU(du, iota + 32 - num0);
+  return Vec512<T>{_mm512_mask_permutexvar_epi16(demoted0.raw, m_upper, idx.raw,
+                                                 demoted1.raw)};
+}
+
+// ------------------------------ CompressStore
+
+template <typename T>
+HWY_API size_t CompressStore(Vec256<T> v, const Mask256<T> mask, Full256<T> d,
+                             T* HWY_RESTRICT aligned) {
+  const uint64_t mask_bits = detail::BitsFromMask(mask);
+  Store(detail::Compress(hwy::SizeTag<sizeof(T)>(), v, mask_bits), d, aligned);
+  return PopCount(mask_bits);
+}
+
+template <typename T, HWY_IF_LANE_SIZE(T, 2)>
+HWY_API size_t CompressStore(Vec512<T> v, const Mask512<T> mask, Full512<T> d,
+                             T* HWY_RESTRICT aligned) {
+  // NOTE: it is tempting to split inputs into two halves for 16-bit lanes, but
+  // using StoreU to concatenate the results would cause page faults if
+  // `aligned` is the last valid vector. Instead rely on in-register splicing.
+  Store(Compress(v, mask), d, aligned);
+  return CountTrue(mask);
+}
+
+HWY_API size_t CompressStore(Vec512<uint32_t> v, const Mask512<uint32_t> mask,
+                             Full512<uint32_t> /* tag */,
+                             uint32_t* HWY_RESTRICT aligned) {
+  _mm512_mask_compressstoreu_epi32(aligned, mask.raw, v.raw);
+  return CountTrue(mask);
+}
+HWY_API size_t CompressStore(Vec512<int32_t> v, const Mask512<int32_t> mask,
+                             Full512<int32_t> /* tag */,
+                             int32_t* HWY_RESTRICT aligned) {
+  _mm512_mask_compressstoreu_epi32(aligned, mask.raw, v.raw);
+  return CountTrue(mask);
+}
+
+HWY_API size_t CompressStore(Vec512<uint64_t> v, const Mask512<uint64_t> mask,
+                             Full512<uint64_t> /* tag */,
+                             uint64_t* HWY_RESTRICT aligned) {
+  _mm512_mask_compressstoreu_epi64(aligned, mask.raw, v.raw);
+  return CountTrue(mask);
+}
+HWY_API size_t CompressStore(Vec512<int64_t> v, const Mask512<int64_t> mask,
+                             Full512<int64_t> /* tag */,
+                             int64_t* HWY_RESTRICT aligned) {
+  _mm512_mask_compressstoreu_epi64(aligned, mask.raw, v.raw);
+  return CountTrue(mask);
+}
+
+HWY_API size_t CompressStore(Vec512<float> v, const Mask512<float> mask,
+                             Full512<float> /* tag */,
+                             float* HWY_RESTRICT aligned) {
+  _mm512_mask_compressstoreu_ps(aligned, mask.raw, v.raw);
+  return CountTrue(mask);
+}
+
+HWY_API size_t CompressStore(Vec512<double> v, const Mask512<double> mask,
+                             Full512<double> /* tag */,
+                             double* HWY_RESTRICT aligned) {
+  _mm512_mask_compressstoreu_pd(aligned, mask.raw, v.raw);
+  return CountTrue(mask);
+}
+
+// ------------------------------ StoreInterleaved3 (CombineShiftRightBytes,
+// TableLookupBytes)
+
+HWY_API void StoreInterleaved3(const Vec512<uint8_t> a, const Vec512<uint8_t> b,
+                               const Vec512<uint8_t> c, Full512<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  const auto k5 = Set(d, 5);
+  const auto k6 = Set(d, 6);
+
+  // Shuffle (a,b,c) vector bytes to (MSB on left): r5, bgr[4:0].
+  // 0x80 so lanes to be filled from other vectors are 0 for blending.
+  alignas(16) static constexpr uint8_t tbl_r0[16] = {
+      0, 0x80, 0x80, 1, 0x80, 0x80, 2, 0x80, 0x80,  //
+      3, 0x80, 0x80, 4, 0x80, 0x80, 5};
+  alignas(16) static constexpr uint8_t tbl_g0[16] = {
+      0x80, 0, 0x80, 0x80, 1, 0x80,  //
+      0x80, 2, 0x80, 0x80, 3, 0x80, 0x80, 4, 0x80, 0x80};
+  const auto shuf_r0 = LoadDup128(d, tbl_r0);
+  const auto shuf_g0 = LoadDup128(d, tbl_g0);  // cannot reuse r0 due to 5
+  const auto shuf_b0 = CombineShiftRightBytes<15>(shuf_g0, shuf_g0);
+  const auto r0 = TableLookupBytes(a, shuf_r0);  // 5..4..3..2..1..0
+  const auto g0 = TableLookupBytes(b, shuf_g0);  // ..4..3..2..1..0.
+  const auto b0 = TableLookupBytes(c, shuf_b0);  // .4..3..2..1..0..
+  const auto i = (r0 | g0 | b0).raw;  // low byte in each 128bit: 30 20 10 00
+
+  // Second vector: g10,r10, bgr[9:6], b5,g5
+  const auto shuf_r1 = shuf_b0 + k6;  // .A..9..8..7..6..
+  const auto shuf_g1 = shuf_r0 + k5;  // A..9..8..7..6..5
+  const auto shuf_b1 = shuf_g0 + k5;  // ..9..8..7..6..5.
+  const auto r1 = TableLookupBytes(a, shuf_r1);
+  const auto g1 = TableLookupBytes(b, shuf_g1);
+  const auto b1 = TableLookupBytes(c, shuf_b1);
+  const auto j = (r1 | g1 | b1).raw;  // low byte in each 128bit: 35 25 15 05
+
+  // Third vector: bgr[15:11], b10
+  const auto shuf_r2 = shuf_b1 + k6;  // ..F..E..D..C..B.
+  const auto shuf_g2 = shuf_r1 + k5;  // .F..E..D..C..B..
+  const auto shuf_b2 = shuf_g1 + k5;  // F..E..D..C..B..A
+  const auto r2 = TableLookupBytes(a, shuf_r2);
+  const auto g2 = TableLookupBytes(b, shuf_g2);
+  const auto b2 = TableLookupBytes(c, shuf_b2);
+  const auto k = (r2 | g2 | b2).raw;  // low byte in each 128bit: 3A 2A 1A 0A
+
+  // To obtain 10 0A 05 00 in one vector, transpose "rows" into "columns".
+  const auto k3_k0_i3_i0 = _mm512_shuffle_i64x2(i, k, _MM_SHUFFLE(3, 0, 3, 0));
+  const auto i1_i2_j0_j1 = _mm512_shuffle_i64x2(j, i, _MM_SHUFFLE(1, 2, 0, 1));
+  const auto j2_j3_k1_k2 = _mm512_shuffle_i64x2(k, j, _MM_SHUFFLE(2, 3, 1, 2));
+
+  // Alternating order, most-significant 128 bits from the second arg.
+  const __mmask8 m = 0xCC;
+  const auto i1_k0_j0_i0 = _mm512_mask_blend_epi64(m, k3_k0_i3_i0, i1_i2_j0_j1);
+  const auto j2_i2_k1_j1 = _mm512_mask_blend_epi64(m, i1_i2_j0_j1, j2_j3_k1_k2);
+  const auto k3_j3_i3_k2 = _mm512_mask_blend_epi64(m, j2_j3_k1_k2, k3_k0_i3_i0);
+
+  StoreU(Vec512<uint8_t>{i1_k0_j0_i0}, d, unaligned + 0 * 64);  //  10 0A 05 00
+  StoreU(Vec512<uint8_t>{j2_i2_k1_j1}, d, unaligned + 1 * 64);  //  25 20 1A 15
+  StoreU(Vec512<uint8_t>{k3_j3_i3_k2}, d, unaligned + 2 * 64);  //  3A 35 30 2A
+}
+
+// ------------------------------ StoreInterleaved4
+
+HWY_API void StoreInterleaved4(const Vec512<uint8_t> v0,
+                               const Vec512<uint8_t> v1,
+                               const Vec512<uint8_t> v2,
+                               const Vec512<uint8_t> v3, Full512<uint8_t> d,
+                               uint8_t* HWY_RESTRICT unaligned) {
+  // let a,b,c,d denote v0..3.
+  const auto ba0 = ZipLower(v0, v1);  // b7 a7 .. b0 a0
+  const auto dc0 = ZipLower(v2, v3);  // d7 c7 .. d0 c0
+  const auto ba8 = ZipUpper(v0, v1);
+  const auto dc8 = ZipUpper(v2, v3);
+  const auto i = ZipLower(ba0, dc0).raw;  // 4x128bit: d..a3 d..a0
+  const auto j = ZipUpper(ba0, dc0).raw;  // 4x128bit: d..a7 d..a4
+  const auto k = ZipLower(ba8, dc8).raw;  // 4x128bit: d..aB d..a8
+  const auto l = ZipUpper(ba8, dc8).raw;  // 4x128bit: d..aF d..aC
+  // 128-bit blocks were independent until now; transpose 4x4.
+  const auto j1_j0_i1_i0 = _mm512_shuffle_i64x2(i, j, _MM_SHUFFLE(1, 0, 1, 0));
+  const auto l1_l0_k1_k0 = _mm512_shuffle_i64x2(k, l, _MM_SHUFFLE(1, 0, 1, 0));
+  const auto j3_j2_i3_i2 = _mm512_shuffle_i64x2(i, j, _MM_SHUFFLE(3, 2, 3, 2));
+  const auto l3_l2_k3_k2 = _mm512_shuffle_i64x2(k, l, _MM_SHUFFLE(3, 2, 3, 2));
+  constexpr int k20 = _MM_SHUFFLE(2, 0, 2, 0);
+  constexpr int k31 = _MM_SHUFFLE(3, 1, 3, 1);
+  const auto l0_k0_j0_i0 = _mm512_shuffle_i64x2(j1_j0_i1_i0, l1_l0_k1_k0, k20);
+  const auto l1_k1_j1_i1 = _mm512_shuffle_i64x2(j1_j0_i1_i0, l1_l0_k1_k0, k31);
+  const auto l2_k2_j2_i2 = _mm512_shuffle_i64x2(j3_j2_i3_i2, l3_l2_k3_k2, k20);
+  const auto l3_k3_j3_i3 = _mm512_shuffle_i64x2(j3_j2_i3_i2, l3_l2_k3_k2, k31);
+  StoreU(Vec512<uint8_t>{l0_k0_j0_i0}, d, unaligned + 0 * 64);
+  StoreU(Vec512<uint8_t>{l1_k1_j1_i1}, d, unaligned + 1 * 64);
+  StoreU(Vec512<uint8_t>{l2_k2_j2_i2}, d, unaligned + 2 * 64);
+  StoreU(Vec512<uint8_t>{l3_k3_j3_i3}, d, unaligned + 3 * 64);
+}
+
+// ------------------------------ Reductions
+
+// Returns the sum in each lane.
+HWY_API Vec512<int32_t> SumOfLanes(const Vec512<int32_t> v) {
+  return Set(Full512<int32_t>(), _mm512_reduce_add_epi32(v.raw));
+}
+HWY_API Vec512<int64_t> SumOfLanes(const Vec512<int64_t> v) {
+  return Set(Full512<int64_t>(), _mm512_reduce_add_epi64(v.raw));
+}
+HWY_API Vec512<uint32_t> SumOfLanes(const Vec512<uint32_t> v) {
+  return BitCast(Full512<uint32_t>(),
+                 SumOfLanes(BitCast(Full512<int32_t>(), v)));
+}
+HWY_API Vec512<uint64_t> SumOfLanes(const Vec512<uint64_t> v) {
+  return BitCast(Full512<uint64_t>(),
+                 SumOfLanes(BitCast(Full512<int64_t>(), v)));
+}
+HWY_API Vec512<float> SumOfLanes(const Vec512<float> v) {
+  return Set(Full512<float>(), _mm512_reduce_add_ps(v.raw));
+}
+HWY_API Vec512<double> SumOfLanes(const Vec512<double> v) {
+  return Set(Full512<double>(), _mm512_reduce_add_pd(v.raw));
+}
+
+// Returns the minimum in each lane.
+HWY_API Vec512<int32_t> MinOfLanes(const Vec512<int32_t> v) {
+  return Set(Full512<int32_t>(), _mm512_reduce_min_epi32(v.raw));
+}
+HWY_API Vec512<int64_t> MinOfLanes(const Vec512<int64_t> v) {
+  return Set(Full512<int64_t>(), _mm512_reduce_min_epi64(v.raw));
+}
+HWY_API Vec512<uint32_t> MinOfLanes(const Vec512<uint32_t> v) {
+  return Set(Full512<uint32_t>(), _mm512_reduce_min_epu32(v.raw));
+}
+HWY_API Vec512<uint64_t> MinOfLanes(const Vec512<uint64_t> v) {
+  return Set(Full512<uint64_t>(), _mm512_reduce_min_epu64(v.raw));
+}
+HWY_API Vec512<float> MinOfLanes(const Vec512<float> v) {
+  return Set(Full512<float>(), _mm512_reduce_min_ps(v.raw));
+}
+HWY_API Vec512<double> MinOfLanes(const Vec512<double> v) {
+  return Set(Full512<double>(), _mm512_reduce_min_pd(v.raw));
+}
+
+// Returns the maximum in each lane.
+HWY_API Vec512<int32_t> MaxOfLanes(const Vec512<int32_t> v) {
+  return Set(Full512<int32_t>(), _mm512_reduce_max_epi32(v.raw));
+}
+HWY_API Vec512<int64_t> MaxOfLanes(const Vec512<int64_t> v) {
+  return Set(Full512<int64_t>(), _mm512_reduce_max_epi64(v.raw));
+}
+HWY_API Vec512<uint32_t> MaxOfLanes(const Vec512<uint32_t> v) {
+  return Set(Full512<uint32_t>(), _mm512_reduce_max_epu32(v.raw));
+}
+HWY_API Vec512<uint64_t> MaxOfLanes(const Vec512<uint64_t> v) {
+  return Set(Full512<uint64_t>(), _mm512_reduce_max_epu64(v.raw));
+}
+HWY_API Vec512<float> MaxOfLanes(const Vec512<float> v) {
+  return Set(Full512<float>(), _mm512_reduce_max_ps(v.raw));
+}
+HWY_API Vec512<double> MaxOfLanes(const Vec512<double> v) {
+  return Set(Full512<double>(), _mm512_reduce_max_pd(v.raw));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc
new file mode 100644
index 0000000000..f910ccd07c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.cc
@@ -0,0 +1,286 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/targets.h"
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <atomic>
+#include <limits>
+
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+#include "sanitizer/common_interface_defs.h"  // __sanitizer_print_stack_trace
+#endif                                        // defined(*_SANITIZER)
+
+#if HWY_ARCH_X86
+#include <xmmintrin.h>
+#if HWY_COMPILER_MSVC
+#include <intrin.h>
+#else  // HWY_COMPILER_MSVC
+#include <cpuid.h>
+#endif  // HWY_COMPILER_MSVC
+#endif  // HWY_ARCH_X86
+
+namespace hwy {
+namespace {
+
+#if HWY_ARCH_X86
+
+bool IsBitSet(const uint32_t reg, const int index) {
+  return (reg & (1U << index)) != 0;
+}
+
+// Calls CPUID instruction with eax=level and ecx=count and returns the result
+// in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd).
+void Cpuid(const uint32_t level, const uint32_t count,
+           uint32_t* HWY_RESTRICT abcd) {
+#if HWY_COMPILER_MSVC
+  int regs[4];
+  __cpuidex(regs, level, count);
+  for (int i = 0; i < 4; ++i) {
+    abcd[i] = regs[i];
+  }
+#else  // HWY_COMPILER_MSVC
+  uint32_t a;
+  uint32_t b;
+  uint32_t c;
+  uint32_t d;
+  __cpuid_count(level, count, a, b, c, d);
+  abcd[0] = a;
+  abcd[1] = b;
+  abcd[2] = c;
+  abcd[3] = d;
+#endif  // HWY_COMPILER_MSVC
+}
+
+// Returns the lower 32 bits of extended control register 0.
+// Requires CPU support for "OSXSAVE" (see below).
+uint32_t ReadXCR0() {
+#if HWY_COMPILER_MSVC
+  return static_cast<uint32_t>(_xgetbv(0));
+#else  // HWY_COMPILER_MSVC
+  uint32_t xcr0, xcr0_high;
+  const uint32_t index = 0;
+  asm volatile(".byte 0x0F, 0x01, 0xD0"
+               : "=a"(xcr0), "=d"(xcr0_high)
+               : "c"(index));
+  return xcr0;
+#endif  // HWY_COMPILER_MSVC
+}
+
+#endif  // HWY_ARCH_X86
+
+// Not function-local => no compiler-generated locking.
+std::atomic<uint32_t> supported_{0};  // Not yet initialized
+
+// When running tests, this value can be set to the mocked supported targets
+// mask. Only written to from a single thread before the test starts.
+uint32_t supported_targets_for_test_ = 0;
+
+// Mask of targets disabled at runtime with DisableTargets.
+uint32_t supported_mask_{std::numeric_limits<uint32_t>::max()};
+
+#if HWY_ARCH_X86
+// Bits indicating which instruction set extensions are supported.
+constexpr uint32_t kSSE = 1 << 0;
+constexpr uint32_t kSSE2 = 1 << 1;
+constexpr uint32_t kSSE3 = 1 << 2;
+constexpr uint32_t kSSSE3 = 1 << 3;
+constexpr uint32_t kSSE41 = 1 << 4;
+constexpr uint32_t kSSE42 = 1 << 5;
+constexpr uint32_t kGroupSSE4 = kSSE | kSSE2 | kSSE3 | kSSSE3 | kSSE41 | kSSE42;
+
+constexpr uint32_t kAVX = 1u << 6;
+constexpr uint32_t kAVX2 = 1u << 7;
+constexpr uint32_t kFMA = 1u << 8;
+constexpr uint32_t kLZCNT = 1u << 9;
+constexpr uint32_t kBMI = 1u << 10;
+constexpr uint32_t kBMI2 = 1u << 11;
+
+// We normally assume BMI/BMI2/FMA are available if AVX2 is. This allows us to
+// use BZHI and (compiler-generated) MULX. However, VirtualBox lacks them
+// [https://www.virtualbox.org/ticket/15471]. Thus we provide the option of
+// avoiding using and requiring these so AVX2 can still be used.
+#ifdef HWY_DISABLE_BMI2_FMA
+constexpr uint32_t kGroupAVX2 = kAVX | kAVX2 | kLZCNT;
+#else
+constexpr uint32_t kGroupAVX2 = kAVX | kAVX2 | kFMA | kLZCNT | kBMI | kBMI2;
+#endif
+
+constexpr uint32_t kAVX512F = 1u << 12;
+constexpr uint32_t kAVX512VL = 1u << 13;
+constexpr uint32_t kAVX512DQ = 1u << 14;
+constexpr uint32_t kAVX512BW = 1u << 15;
+constexpr uint32_t kGroupAVX3 = kAVX512F | kAVX512VL | kAVX512DQ | kAVX512BW;
+#endif  // HWY_ARCH_X86
+
+}  // namespace
+
+HWY_NORETURN void HWY_FORMAT(3, 4)
+    Abort(const char* file, int line, const char* format, ...) {
+  char buf[2000];
+  va_list args;
+  va_start(args, format);
+  vsnprintf(buf, sizeof(buf), format, args);
+  va_end(args);
+
+  fprintf(stderr, "Abort at %s:%d: %s\n", file, line, buf);
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+    defined(THREAD_SANITIZER)
+  // If compiled with any sanitizer print a stack trace. This call doesn't crash
+  // the program, instead the trap below will crash it also allowing gdb to
+  // break there.
+  __sanitizer_print_stack_trace();
+#endif  // defined(*_SANITIZER)
+  fflush(stderr);
+
+#if HWY_COMPILER_MSVC
+  abort();  // Compile error without this due to HWY_NORETURN.
+#else
+  __builtin_trap();
+#endif
+}
+
+void DisableTargets(uint32_t disabled_targets) {
+  supported_mask_ = ~(disabled_targets & ~uint32_t(HWY_ENABLED_BASELINE));
+  // We can call Update() here to initialize the mask but that will trigger a
+  // call to SupportedTargets() which we use in tests to tell whether any of the
+  // highway dynamic dispatch functions were used.
+  chosen_target.DeInit();
+}
+
+void SetSupportedTargetsForTest(uint32_t targets) {
+  // Reset the cached supported_ value to 0 to force a re-evaluation in the
+  // next call to SupportedTargets() which will use the mocked value set here
+  // if not zero.
+  supported_.store(0, std::memory_order_release);
+  supported_targets_for_test_ = targets;
+  chosen_target.DeInit();
+}
+
+bool SupportedTargetsCalledForTest() {
+  return supported_.load(std::memory_order_acquire) != 0;
+}
+
+uint32_t SupportedTargets() {
+  uint32_t bits = supported_.load(std::memory_order_acquire);
+  // Already initialized?
+  if (HWY_LIKELY(bits != 0)) {
+    return bits & supported_mask_;
+  }
+
+  // When running tests, this allows to mock the current supported targets.
+  if (HWY_UNLIKELY(supported_targets_for_test_ != 0)) {
+    // Store the value to signal that this was used.
+    supported_.store(supported_targets_for_test_, std::memory_order_release);
+    return supported_targets_for_test_ & supported_mask_;
+  }
+
+  bits = HWY_SCALAR;
+
+#if HWY_ARCH_X86
+  uint32_t flags = 0;
+  uint32_t abcd[4];
+
+  Cpuid(0, 0, abcd);
+  const uint32_t max_level = abcd[0];
+
+  // Standard feature flags
+  Cpuid(1, 0, abcd);
+  flags |= IsBitSet(abcd[3], 25) ? kSSE : 0;
+  flags |= IsBitSet(abcd[3], 26) ? kSSE2 : 0;
+  flags |= IsBitSet(abcd[2], 0) ? kSSE3 : 0;
+  flags |= IsBitSet(abcd[2], 9) ? kSSSE3 : 0;
+  flags |= IsBitSet(abcd[2], 19) ? kSSE41 : 0;
+  flags |= IsBitSet(abcd[2], 20) ? kSSE42 : 0;
+  flags |= IsBitSet(abcd[2], 12) ? kFMA : 0;
+  flags |= IsBitSet(abcd[2], 28) ? kAVX : 0;
+  const bool has_osxsave = IsBitSet(abcd[2], 27);
+
+  // Extended feature flags
+  Cpuid(0x80000001U, 0, abcd);
+  flags |= IsBitSet(abcd[2], 5) ? kLZCNT : 0;
+
+  // Extended features
+  if (max_level >= 7) {
+    Cpuid(7, 0, abcd);
+    flags |= IsBitSet(abcd[1], 3) ? kBMI : 0;
+    flags |= IsBitSet(abcd[1], 5) ? kAVX2 : 0;
+    flags |= IsBitSet(abcd[1], 8) ? kBMI2 : 0;
+
+    flags |= IsBitSet(abcd[1], 16) ? kAVX512F : 0;
+    flags |= IsBitSet(abcd[1], 17) ? kAVX512DQ : 0;
+    flags |= IsBitSet(abcd[1], 30) ? kAVX512BW : 0;
+    flags |= IsBitSet(abcd[1], 31) ? kAVX512VL : 0;
+  }
+
+  // Verify OS support for XSAVE, without which XMM/YMM registers are not
+  // preserved across context switches and are not safe to use.
+  if (has_osxsave) {
+    const uint32_t xcr0 = ReadXCR0();
+    // XMM
+    if (!IsBitSet(xcr0, 1)) {
+      flags = 0;
+    }
+    // YMM
+    if (!IsBitSet(xcr0, 2)) {
+      flags &= ~kGroupAVX2;
+    }
+    // ZMM + opmask
+    if ((xcr0 & 0x70) != 0x70) {
+      flags &= ~kGroupAVX3;
+    }
+  }
+
+  // Set target bit(s) if all their group's flags are all set.
+  if ((flags & kGroupAVX3) == kGroupAVX3) {
+    bits |= HWY_AVX3;
+  }
+  if ((flags & kGroupAVX2) == kGroupAVX2) {
+    bits |= HWY_AVX2;
+  }
+  if ((flags & kGroupSSE4) == kGroupSSE4) {
+    bits |= HWY_SSE4;
+  }
+#else
+  // TODO(janwas): detect for other platforms
+  bits = HWY_ENABLED_BASELINE;
+#endif  // HWY_ARCH_X86
+
+  if ((bits & HWY_ENABLED_BASELINE) != HWY_ENABLED_BASELINE) {
+    fprintf(stderr, "WARNING: CPU supports %zx but software requires %x\n",
+            size_t(bits), HWY_ENABLED_BASELINE);
+  }
+
+  supported_.store(bits, std::memory_order_release);
+  return bits & supported_mask_;
+}
+
+// Declared in targets.h
+ChosenTarget chosen_target;
+
+void ChosenTarget::Update() {
+  // The supported variable contains the current CPU supported targets shifted
+  // to the location expected by the ChosenTarget mask. We enabled SCALAR
+  // regardless of whether it was compiled since it is also used as the
+  // fallback mechanism to the baseline target.
+  uint32_t supported = HWY_CHOSEN_TARGET_SHIFT(hwy::SupportedTargets()) |
+                       HWY_CHOSEN_TARGET_MASK_SCALAR;
+  mask_.store(supported);
+}
+
+}  // namespace hwy
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.h
new file mode 100644
index 0000000000..8339763730
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets.h
@@ -0,0 +1,518 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_TARGETS_H_
+#define HIGHWAY_HWY_TARGETS_H_
+
+#include <vector>
+
+// For SIMD module implementations and their callers. Defines which targets to
+// generate and call.
+
+#include "hwy/base.h"
+
+//------------------------------------------------------------------------------
+// Optional configuration
+
+// See ../quick_reference.md for documentation of these macros.
+
+// Uncomment to override the default baseline determined from predefined macros:
+ #define HWY_BASELINE_TARGETS (HWY_SCALAR)
+
+// Uncomment to override the default blocklist:
+// #define HWY_BROKEN_TARGETS HWY_AVX3
+
+// Uncomment to definitely avoid generating those target(s):
+ #define HWY_DISABLED_TARGETS (~HWY_SCALAR)
+
+// Uncomment to avoid emitting BMI/BMI2/FMA instructions (allows generating
+// AVX2 target for VMs which support AVX2 but not the other instruction sets)
+// #define HWY_DISABLE_BMI2_FMA
+
+//------------------------------------------------------------------------------
+// Targets
+
+// Unique bit value for each target. A lower value is "better" (e.g. more lanes)
+// than a higher value within the same group/platform - see HWY_STATIC_TARGET.
+//
+// All values are unconditionally defined so we can test HWY_TARGETS without
+// first checking the HWY_ARCH_*.
+//
+// The C99 preprocessor evaluates #if expressions using intmax_t types, so we
+// can use 32-bit literals.
+
+// 1,2,4: reserved
+#define HWY_AVX3 8
+#define HWY_AVX2 16
+// 32: reserved for AVX
+#define HWY_SSE4 64
+// 0x80, 0x100, 0x200: reserved for SSSE3, SSE3, SSE2
+
+// The highest bit in the HWY_TARGETS mask that a x86 target can have. Used for
+// dynamic dispatch. All x86 target bits must be lower or equal to
+// (1 << HWY_HIGHEST_TARGET_BIT_X86) and they can only use
+// HWY_MAX_DYNAMIC_TARGETS in total.
+#define HWY_HIGHEST_TARGET_BIT_X86 9
+
+#define HWY_SVE2 0x400
+#define HWY_SVE 0x800
+// 0x1000 reserved for Helium
+#define HWY_NEON 0x2000
+
+#define HWY_HIGHEST_TARGET_BIT_ARM 13
+
+// 0x4000, 0x8000 reserved
+#define HWY_PPC8 0x10000  // v2.07 or 3
+// 0x20000, 0x40000 reserved for prior VSX/AltiVec
+
+#define HWY_HIGHEST_TARGET_BIT_PPC 18
+
+// 0x80000 reserved
+#define HWY_WASM 0x100000
+
+#define HWY_HIGHEST_TARGET_BIT_WASM 20
+
+// 0x200000, 0x400000, 0x800000 reserved
+
+#define HWY_RVV 0x1000000
+
+#define HWY_HIGHEST_TARGET_BIT_RVV 24
+
+// 0x2000000, 0x4000000, 0x8000000, 0x10000000 reserved
+
+#define HWY_SCALAR 0x20000000
+
+#define HWY_HIGHEST_TARGET_BIT_SCALAR 29
+
+// Cannot use higher values, otherwise HWY_TARGETS computation might overflow.
+
+//------------------------------------------------------------------------------
+// Set default blocklists
+
+// Disabled means excluded from enabled at user's request. A separate config
+// macro allows disabling without deactivating the blocklist below.
+#ifndef HWY_DISABLED_TARGETS
+#define HWY_DISABLED_TARGETS 0
+#endif
+
+// Broken means excluded from enabled due to known compiler issues. Allow the
+// user to override this blocklist without any guarantee of success.
+#ifndef HWY_BROKEN_TARGETS
+
+// x86 clang-6: we saw multiple AVX2/3 compile errors and in one case invalid
+// SSE4 codegen (possibly only for msan), so disable all those targets.
+#if HWY_ARCH_X86 && (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 700)
+#define HWY_BROKEN_TARGETS (HWY_SSE4 | HWY_AVX2 | HWY_AVX3)
+// This entails a major speed reduction, so warn unless the user explicitly
+// opts in to scalar-only.
+#if !defined(HWY_COMPILE_ONLY_SCALAR)
+#pragma message("x86 Clang <= 6: define HWY_COMPILE_ONLY_SCALAR or upgrade.")
+#endif
+
+// 32-bit may fail to compile AVX2/3.
+#elif HWY_ARCH_X86_32
+#define HWY_BROKEN_TARGETS (HWY_AVX2 | HWY_AVX3)
+
+// MSVC AVX3 support is buggy: https://github.com/Mysticial/Flops/issues/16
+#elif HWY_COMPILER_MSVC != 0
+#define HWY_BROKEN_TARGETS (HWY_AVX3)
+
+// armv7be has not been tested and is not yet supported.
+#elif HWY_ARCH_ARM_V7 && (defined(__ARM_BIG_ENDIAN) || defined(__BIG_ENDIAN))
+#define HWY_BROKEN_TARGETS (HWY_NEON)
+
+#else
+#define HWY_BROKEN_TARGETS 0
+#endif
+
+#endif  // HWY_BROKEN_TARGETS
+
+// Enabled means not disabled nor blocklisted.
+#define HWY_ENABLED(targets) \
+  ((targets) & ~((HWY_DISABLED_TARGETS) | (HWY_BROKEN_TARGETS)))
+
+//------------------------------------------------------------------------------
+// Detect baseline targets using predefined macros
+
+// Baseline means the targets for which the compiler is allowed to generate
+// instructions, implying the target CPU would have to support them. Do not use
+// this directly because it does not take the blocklist into account. Allow the
+// user to override this without any guarantee of success.
+#ifndef HWY_BASELINE_TARGETS
+
+// Also check HWY_ARCH to ensure that simulating unknown platforms ends up with
+// HWY_TARGET == HWY_SCALAR.
+
+#if HWY_ARCH_WASM && defined(__wasm_simd128__)
+#define HWY_BASELINE_WASM HWY_WASM
+#else
+#define HWY_BASELINE_WASM 0
+#endif
+
+// Avoid choosing the PPC target until we have an implementation.
+#if HWY_ARCH_PPC && defined(__VSX__) && 0
+#define HWY_BASELINE_PPC8 HWY_PPC8
+#else
+#define HWY_BASELINE_PPC8 0
+#endif
+
+// Avoid choosing the SVE[2] targets the implementation is ready.
+#if HWY_ARCH_ARM && defined(__ARM_FEATURE_SVE2) && 0
+#define HWY_BASELINE_SVE2 HWY_SVE2
+#else
+#define HWY_BASELINE_SVE2 0
+#endif
+
+#if HWY_ARCH_ARM && defined(__ARM_FEATURE_SVE) && 0
+#define HWY_BASELINE_SVE HWY_SVE
+#else
+#define HWY_BASELINE_SVE 0
+#endif
+
+// GCC 4.5.4 only defines __ARM_NEON__; 5.4 defines both.
+#if HWY_ARCH_ARM && (defined(__ARM_NEON__) || defined(__ARM_NEON))
+#define HWY_BASELINE_NEON HWY_NEON
+#else
+#define HWY_BASELINE_NEON 0
+#endif
+
+// MSVC does not set SSE4_1, but it does set AVX; checking for the latter means
+// we at least get SSE4 on machines supporting AVX but not AVX2.
+// https://stackoverflow.com/questions/18563978/
+#if HWY_ARCH_X86 && \
+    (defined(__SSE4_1__) || (HWY_COMPILER_MSVC != 0 && defined(__AVX__)))
+#define HWY_BASELINE_SSE4 HWY_SSE4
+#else
+#define HWY_BASELINE_SSE4 0
+#endif
+
+#if HWY_ARCH_X86 && defined(__AVX2__)
+#define HWY_BASELINE_AVX2 HWY_AVX2
+#else
+#define HWY_BASELINE_AVX2 0
+#endif
+
+#if HWY_ARCH_X86 && defined(__AVX512F__)
+#define HWY_BASELINE_AVX3 HWY_AVX3
+#else
+#define HWY_BASELINE_AVX3 0
+#endif
+
+#if HWY_ARCH_RVV && defined(__riscv_vector)
+#define HWY_BASELINE_RVV HWY_RVV
+#else
+#define HWY_BASELINE_RVV 0
+#endif
+
+#define HWY_BASELINE_TARGETS                                                \
+  (HWY_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 | HWY_BASELINE_SVE2 | \
+   HWY_BASELINE_SVE | HWY_BASELINE_NEON | HWY_BASELINE_SSE4 |               \
+   HWY_BASELINE_AVX2 | HWY_BASELINE_AVX3 | HWY_BASELINE_RVV)
+
+#endif  // HWY_BASELINE_TARGETS
+
+//------------------------------------------------------------------------------
+// Choose target for static dispatch
+
+#define HWY_ENABLED_BASELINE HWY_ENABLED(HWY_BASELINE_TARGETS)
+#if HWY_ENABLED_BASELINE == 0
+#error "At least one baseline target must be defined and enabled"
+#endif
+
+// Best baseline, used for static dispatch. This is the least-significant 1-bit
+// within HWY_ENABLED_BASELINE and lower bit values imply "better".
+#define HWY_STATIC_TARGET (HWY_ENABLED_BASELINE & -HWY_ENABLED_BASELINE)
+
+// Start by assuming static dispatch. If we later use dynamic dispatch, this
+// will be defined to other targets during the multiple-inclusion, and finally
+// return to the initial value. Defining this outside begin/end_target ensures
+// inl headers successfully compile by themselves (required by Bazel).
+#define HWY_TARGET HWY_STATIC_TARGET
+
+//------------------------------------------------------------------------------
+// Choose targets for dynamic dispatch according to one of four policies
+
+#if (defined(HWY_COMPILE_ONLY_SCALAR) + defined(HWY_COMPILE_ONLY_STATIC) + \
+     defined(HWY_COMPILE_ALL_ATTAINABLE)) > 1
+#error "Invalid config: can only define a single policy for targets"
+#endif
+
+// Attainable means enabled and the compiler allows intrinsics (even when not
+// allowed to autovectorize). Used in 3 and 4.
+#if HWY_ARCH_X86
+#define HWY_ATTAINABLE_TARGETS \
+  HWY_ENABLED(HWY_SCALAR | HWY_SSE4 | HWY_AVX2 | HWY_AVX3)
+#else
+#define HWY_ATTAINABLE_TARGETS HWY_ENABLED_BASELINE
+#endif
+
+// 1) For older compilers: disable all SIMD (could also set HWY_DISABLED_TARGETS
+// to ~HWY_SCALAR, but this is more explicit).
+#if defined(HWY_COMPILE_ONLY_SCALAR)
+#undef HWY_STATIC_TARGET
+#define HWY_STATIC_TARGET HWY_SCALAR  // override baseline
+#define HWY_TARGETS HWY_SCALAR
+
+// 2) For forcing static dispatch without code changes (removing HWY_EXPORT)
+#elif defined(HWY_COMPILE_ONLY_STATIC)
+#define HWY_TARGETS HWY_STATIC_TARGET
+
+// 3) For tests: include all attainable targets (in particular: scalar)
+#elif defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST)
+#define HWY_TARGETS HWY_ATTAINABLE_TARGETS
+
+// 4) Default: attainable WITHOUT non-best baseline. This reduces code size by
+// excluding superseded targets, in particular scalar.
+#else
+#define HWY_TARGETS (HWY_ATTAINABLE_TARGETS & (2 * HWY_STATIC_TARGET - 1))
+
+#endif  // target policy
+
+// HWY_ONCE and the multiple-inclusion mechanism rely on HWY_STATIC_TARGET being
+// one of the dynamic targets. This also implies HWY_TARGETS != 0 and
+// (HWY_TARGETS & HWY_ENABLED_BASELINE) != 0.
+#if (HWY_TARGETS & HWY_STATIC_TARGET) == 0
+#error "Logic error: best baseline should be included in dynamic targets"
+#endif
+
+//------------------------------------------------------------------------------
+
+namespace hwy {
+
+// Returns (cached) bitfield of enabled targets that are supported on this CPU.
+// Implemented in supported_targets.cc; unconditionally compiled to support the
+// use case of binary-only distributions. The HWY_SUPPORTED_TARGETS wrapper may
+// allow eliding calls to this function.
+uint32_t SupportedTargets();
+
+// Disable from runtime dispatch the mask of compiled in targets. Targets that
+// were not enabled at compile time are ignored. This function is useful to
+// disable a target supported by the CPU that is known to have bugs or when a
+// lower target is desired. For this reason, attempts to disable targets which
+// are in HWY_ENABLED_BASELINE have no effect so SupportedTargets() always
+// returns at least the baseline target.
+void DisableTargets(uint32_t disabled_targets);
+
+// Single target: reduce code size by eliding the call and conditional branches
+// inside Choose*() functions.
+#if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
+#define HWY_SUPPORTED_TARGETS HWY_TARGETS
+#else
+#define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
+#endif
+
+// Set the mock mask of CPU supported targets instead of the actual CPU
+// supported targets computed in SupportedTargets(). The return value of
+// SupportedTargets() will still be affected by the DisabledTargets() mask
+// regardless of this mock, to prevent accidentally adding targets that are
+// known to be buggy in the current CPU. Call with a mask of 0 to disable the
+// mock and use the actual CPU supported targets instead.
+void SetSupportedTargetsForTest(uint32_t targets);
+
+// Returns whether the SupportedTargets() function was called since the last
+// SetSupportedTargetsForTest() call.
+bool SupportedTargetsCalledForTest();
+
+// Return the list of targets in HWY_TARGETS supported by the CPU as a list of
+// individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
+// is affected by the current SetSupportedTargetsForTest() mock if any.
+HWY_INLINE std::vector<uint32_t> SupportedAndGeneratedTargets() {
+  std::vector<uint32_t> ret;
+  for (uint32_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
+       targets = targets & (targets - 1)) {
+    uint32_t current_target = targets & ~(targets - 1);
+    ret.push_back(current_target);
+  }
+  return ret;
+}
+
+static inline HWY_MAYBE_UNUSED const char* TargetName(uint32_t target) {
+  switch (target) {
+#if HWY_ARCH_X86
+    case HWY_SSE4:
+      return "SSE4";
+    case HWY_AVX2:
+      return "AVX2";
+    case HWY_AVX3:
+      return "AVX3";
+#endif
+
+#if HWY_ARCH_ARM
+    case HWY_SVE2:
+      return "SVE2";
+    case HWY_SVE:
+      return "SVE";
+    case HWY_NEON:
+      return "Neon";
+#endif
+
+#if HWY_ARCH_PPC
+    case HWY_PPC8:
+      return "Power8";
+#endif
+
+#if HWY_ARCH_WASM
+    case HWY_WASM:
+      return "Wasm";
+#endif
+
+#if HWY_ARCH_RVV
+    case HWY_RVV:
+      return "RVV";
+#endif
+
+    case HWY_SCALAR:
+      return "Scalar";
+
+    default:
+      return "Unknown";  // must satisfy gtest IsValidParamName()
+  }
+}
+
+// The maximum number of dynamic targets on any architecture is defined by
+// HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
+
+// For the ChosenTarget mask and index we use a different bit arrangement than
+// in the HWY_TARGETS mask. Only the targets involved in the current
+// architecture are used in this mask, and therefore only the least significant
+// (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the uint32_t mask are used. The least
+// significant bit is set when the mask is not initialized, the next
+// HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
+// HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
+// that position and the next more significant bit is used for the scalar
+// target. Because of this we need to define equivalent values for HWY_TARGETS
+// in this representation.
+// This mask representation allows to use ctz() on this mask and obtain a small
+// number that's used as an index of the table for dynamic dispatch. In this
+// way the first entry is used when the mask is uninitialized, the following
+// HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
+// scalar.
+
+// The HWY_SCALAR bit in the ChosenTarget mask format.
+#define HWY_CHOSEN_TARGET_MASK_SCALAR (1u << (HWY_MAX_DYNAMIC_TARGETS + 1))
+
+// Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
+// current architecture.
+#define HWY_CHOSEN_TARGET_SHIFT(X)                                    \
+  ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
+    ((1u << HWY_MAX_DYNAMIC_TARGETS) - 1))                            \
+   << 1)
+
+// The HWY_TARGETS mask in the ChosenTarget mask format.
+#define HWY_CHOSEN_TARGET_MASK_TARGETS \
+  (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1u)
+
+#if HWY_ARCH_X86
+// Maximum number of dynamic targets, changing this value is an ABI incompatible
+// change
+#define HWY_MAX_DYNAMIC_TARGETS 10
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
+// These must match the order in which the HWY_TARGETS are defined
+// starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
+// HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
+// HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
+// corresponds to the best target. Don't include a "," at the end of the list.
+#define HWY_CHOOSE_TARGET_LIST(func_name)        \
+  nullptr,                        /* reserved */ \
+      nullptr,                    /* reserved */ \
+      nullptr,                    /* reserved */ \
+      HWY_CHOOSE_AVX3(func_name), /* AVX3 */     \
+      HWY_CHOOSE_AVX2(func_name), /* AVX2 */     \
+      nullptr,                    /* AVX */      \
+      HWY_CHOOSE_SSE4(func_name), /* SSE4 */     \
+      nullptr,                    /* SSSE3 */    \
+      nullptr,                    /* SSE3 */     \
+      nullptr                     /* SSE2 */
+
+#elif HWY_ARCH_ARM
+// See HWY_ARCH_X86 above for details.
+#define HWY_MAX_DYNAMIC_TARGETS 4
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
+#define HWY_CHOOSE_TARGET_LIST(func_name)       \
+  HWY_CHOOSE_SVE2(func_name),    /* SVE2 */     \
+      HWY_CHOOSE_SVE(func_name), /* SVE */      \
+      nullptr,                   /* reserved */ \
+      HWY_CHOOSE_NEON(func_name) /* NEON */
+
+#elif HWY_ARCH_PPC
+// See HWY_ARCH_X86 above for details.
+#define HWY_MAX_DYNAMIC_TARGETS 5
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
+#define HWY_CHOOSE_TARGET_LIST(func_name)        \
+  nullptr,                        /* reserved */ \
+      nullptr,                    /* reserved */ \
+      HWY_CHOOSE_PPC8(func_name), /* PPC8 */     \
+      nullptr,                    /* VSX */      \
+      nullptr                     /* AltiVec */
+
+#elif HWY_ARCH_WASM
+// See HWY_ARCH_X86 above for details.
+#define HWY_MAX_DYNAMIC_TARGETS 4
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
+#define HWY_CHOOSE_TARGET_LIST(func_name)       \
+  nullptr,                       /* reserved */ \
+      nullptr,                   /* reserved */ \
+      nullptr,                   /* reserved */ \
+      HWY_CHOOSE_WASM(func_name) /* WASM */
+
+#elif HWY_ARCH_RVV
+// See HWY_ARCH_X86 above for details.
+#define HWY_MAX_DYNAMIC_TARGETS 4
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
+#define HWY_CHOOSE_TARGET_LIST(func_name)       \
+  nullptr,                       /* reserved */ \
+      nullptr,                   /* reserved */ \
+      nullptr,                   /* reserved */ \
+      HWY_CHOOSE_RVV(func_name) /* RVV */
+
+#else
+// Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
+// still creating single-entry tables in HWY_EXPORT to ensure portability.
+#define HWY_MAX_DYNAMIC_TARGETS 1
+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
+#endif
+
+struct ChosenTarget {
+ public:
+  // Update the ChosenTarget mask based on the current CPU supported
+  // targets.
+  void Update();
+
+  // Reset the ChosenTarget to the uninitialized state.
+  void DeInit() { mask_.store(1); }
+
+  // Whether the ChosenTarget was initialized. This is useful to know whether
+  // any HWY_DYNAMIC_DISPATCH function was called.
+  bool IsInitialized() const { return mask_.load() != 1; }
+
+  // Return the index in the dynamic dispatch table to be used by the current
+  // CPU. Note that this method must be in the header file so it uses the value
+  // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
+  // calls it, which may be different from others. This allows to only consider
+  // those targets that were actually compiled in this module.
+  size_t HWY_INLINE GetIndex() const {
+    return hwy::Num0BitsBelowLS1Bit_Nonzero32(mask_.load() &
+                                              HWY_CHOSEN_TARGET_MASK_TARGETS);
+  }
+
+ private:
+  // Initialized to 1 so GetChosenTargetIndex() returns 0.
+  std::atomic<uint32_t> mask_{1};
+};
+
+extern ChosenTarget chosen_target;
+
+}  // namespace hwy
+
+#endif  // HIGHWAY_HWY_TARGETS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets_test.cc
new file mode 100644
index 0000000000..4cb9291d15
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/targets_test.cc
@@ -0,0 +1,102 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/targets.h"
+
+#include "hwy/tests/test_util-inl.h"
+
+namespace fake {
+
+#define DECLARE_FUNCTION(TGT)                        \
+  namespace N_##TGT {                                \
+    uint32_t FakeFunction(int) { return HWY_##TGT; } \
+  }
+
+DECLARE_FUNCTION(AVX3)
+DECLARE_FUNCTION(AVX2)
+DECLARE_FUNCTION(SSE4)
+DECLARE_FUNCTION(NEON)
+DECLARE_FUNCTION(PPC8)
+DECLARE_FUNCTION(WASM)
+DECLARE_FUNCTION(RVV)
+DECLARE_FUNCTION(SCALAR)
+
+HWY_EXPORT(FakeFunction);
+
+void CheckFakeFunction() {
+#define CHECK_ARRAY_ENTRY(TGT)                                              \
+  if ((HWY_TARGETS & HWY_##TGT) != 0) {                                     \
+    hwy::SetSupportedTargetsForTest(HWY_##TGT);                             \
+    /* Calling Update() first to make &HWY_DYNAMIC_DISPATCH() return */     \
+    /* the pointer to the already cached function. */                       \
+    hwy::chosen_target.Update();                                            \
+    EXPECT_EQ(uint32_t(HWY_##TGT), HWY_DYNAMIC_DISPATCH(FakeFunction)(42)); \
+    /* Calling DeInit() will test that the initializer function */          \
+    /* also calls the right function. */                                    \
+    hwy::chosen_target.DeInit();                                            \
+    EXPECT_EQ(uint32_t(HWY_##TGT), HWY_DYNAMIC_DISPATCH(FakeFunction)(42)); \
+    /* Second call uses the cached value from the previous call. */         \
+    EXPECT_EQ(uint32_t(HWY_##TGT), HWY_DYNAMIC_DISPATCH(FakeFunction)(42)); \
+  }
+  CHECK_ARRAY_ENTRY(AVX3)
+  CHECK_ARRAY_ENTRY(AVX2)
+  CHECK_ARRAY_ENTRY(SSE4)
+  CHECK_ARRAY_ENTRY(NEON)
+  CHECK_ARRAY_ENTRY(PPC8)
+  CHECK_ARRAY_ENTRY(WASM)
+  CHECK_ARRAY_ENTRY(RVV)
+  CHECK_ARRAY_ENTRY(SCALAR)
+#undef CHECK_ARRAY_ENTRY
+}
+
+}  // namespace fake
+
+namespace hwy {
+
+class HwyTargetsTest : public testing::Test {
+ protected:
+  void TearDown() override {
+    SetSupportedTargetsForTest(0);
+    DisableTargets(0);  // Reset the mask.
+  }
+};
+
+// Test that the order in the HWY_EXPORT static array matches the expected
+// value of the target bits. This is only checked for the targets that are
+// enabled in the current compilation.
+TEST_F(HwyTargetsTest, ChosenTargetOrderTest) { fake::CheckFakeFunction(); }
+
+TEST_F(HwyTargetsTest, DisabledTargetsTest) {
+  DisableTargets(~0u);
+  // Check that the baseline can't be disabled.
+  HWY_ASSERT(HWY_ENABLED_BASELINE == SupportedTargets());
+
+  DisableTargets(0);  // Reset the mask.
+  uint32_t current_targets = SupportedTargets();
+  if ((current_targets & ~HWY_ENABLED_BASELINE) == 0) {
+    // We can't test anything else if the only compiled target is the baseline.
+    return;
+  }
+  // Get the lowest bit in the mask (the best target) and disable that one.
+  uint32_t lowest_target = current_targets & (~current_targets + 1);
+  // The lowest target shouldn't be one in the baseline.
+  HWY_ASSERT((lowest_target & ~HWY_ENABLED_BASELINE) != 0);
+  DisableTargets(lowest_target);
+
+  // Check that the other targets are still enabled.
+  HWY_ASSERT((lowest_target ^ current_targets) == SupportedTargets());
+  DisableTargets(0);  // Reset the mask.
+}
+
+}  // namespace hwy
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/arithmetic_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/arithmetic_test.cc
new file mode 100644
index 0000000000..07086356e6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/arithmetic_test.cc
@@ -0,0 +1,1249 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <limits>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/arithmetic_test.cc"
+#include "hwy/foreach_target.h"
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct TestPlusMinus {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v2 = Iota(d, T(2));
+    const auto v3 = Iota(d, T(3));
+    const auto v4 = Iota(d, T(4));
+
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    for (size_t i = 0; i < N; ++i) {
+      lanes[i] = static_cast<T>((2 + i) + (3 + i));
+    }
+    HWY_ASSERT_VEC_EQ(d, lanes.get(), v2 + v3);
+    HWY_ASSERT_VEC_EQ(d, Set(d, 2), Sub(v4, v2));
+
+    for (size_t i = 0; i < N; ++i) {
+      lanes[i] = static_cast<T>((2 + i) + (4 + i));
+    }
+    auto sum = v2;
+    sum = Add(sum, v4);  // sum == 6,8..
+    HWY_ASSERT_VEC_EQ(d, Load(d, lanes.get()), sum);
+
+    sum = Sub(sum, v4);
+    HWY_ASSERT_VEC_EQ(d, v2, sum);
+  }
+};
+
+HWY_NOINLINE void TestAllPlusMinus() {
+  ForAllTypes(ForPartialVectors<TestPlusMinus>());
+}
+
+struct TestUnsignedSaturatingArithmetic {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vi = Iota(d, 1);
+    const auto vm = Set(d, LimitsMax<T>());
+
+    HWY_ASSERT_VEC_EQ(d, Add(v0, v0), SaturatedAdd(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, Add(v0, vi), SaturatedAdd(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, Add(v0, vm), SaturatedAdd(v0, vm));
+    HWY_ASSERT_VEC_EQ(d, vm, SaturatedAdd(vi, vm));
+    HWY_ASSERT_VEC_EQ(d, vm, SaturatedAdd(vm, vm));
+
+    HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(vi, vi));
+    HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(vi, vm));
+    HWY_ASSERT_VEC_EQ(d, Sub(vm, vi), SaturatedSub(vm, vi));
+  }
+};
+
+struct TestSignedSaturatingArithmetic {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vpm = Set(d, LimitsMax<T>());
+    // Ensure all lanes are positive, even if Iota wraps around
+    const auto vi = Or(And(Iota(d, 0), vpm), Set(d, 1));
+    const auto vn = Sub(v0, vi);
+    const auto vnm = Set(d, LimitsMin<T>());
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), Gt(vi, v0));
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), Lt(vn, v0));
+
+    HWY_ASSERT_VEC_EQ(d, v0, SaturatedAdd(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, vi, SaturatedAdd(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, vpm, SaturatedAdd(v0, vpm));
+    HWY_ASSERT_VEC_EQ(d, vpm, SaturatedAdd(vi, vpm));
+    HWY_ASSERT_VEC_EQ(d, vpm, SaturatedAdd(vpm, vpm));
+
+    HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, Sub(v0, vi), SaturatedSub(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, vn, SaturatedSub(vn, v0));
+    HWY_ASSERT_VEC_EQ(d, vnm, SaturatedSub(vnm, vi));
+    HWY_ASSERT_VEC_EQ(d, vnm, SaturatedSub(vnm, vpm));
+  }
+};
+
+HWY_NOINLINE void TestAllSaturatingArithmetic() {
+  const ForPartialVectors<TestUnsignedSaturatingArithmetic> test_unsigned;
+  test_unsigned(uint8_t());
+  test_unsigned(uint16_t());
+
+  const ForPartialVectors<TestSignedSaturatingArithmetic> test_signed;
+  test_signed(int8_t());
+  test_signed(int16_t());
+}
+
+struct TestAverage {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto v1 = Set(d, T(1));
+    const auto v2 = Set(d, T(2));
+
+    HWY_ASSERT_VEC_EQ(d, v0, AverageRound(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, v1, AverageRound(v0, v1));
+    HWY_ASSERT_VEC_EQ(d, v1, AverageRound(v1, v1));
+    HWY_ASSERT_VEC_EQ(d, v2, AverageRound(v1, v2));
+    HWY_ASSERT_VEC_EQ(d, v2, AverageRound(v2, v2));
+  }
+};
+
+HWY_NOINLINE void TestAllAverage() {
+  const ForPartialVectors<TestAverage> test;
+  test(uint8_t());
+  test(uint16_t());
+}
+
+struct TestAbs {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vp1 = Set(d, T(1));
+    const auto vn1 = Set(d, T(-1));
+    const auto vpm = Set(d, LimitsMax<T>());
+    const auto vnm = Set(d, LimitsMin<T>());
+
+    HWY_ASSERT_VEC_EQ(d, v0, Abs(v0));
+    HWY_ASSERT_VEC_EQ(d, vp1, Abs(vp1));
+    HWY_ASSERT_VEC_EQ(d, vp1, Abs(vn1));
+    HWY_ASSERT_VEC_EQ(d, vpm, Abs(vpm));
+    HWY_ASSERT_VEC_EQ(d, vnm, Abs(vnm));
+  }
+};
+
+struct TestFloatAbs {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vp1 = Set(d, T(1));
+    const auto vn1 = Set(d, T(-1));
+    const auto vp2 = Set(d, T(0.01));
+    const auto vn2 = Set(d, T(-0.01));
+
+    HWY_ASSERT_VEC_EQ(d, v0, Abs(v0));
+    HWY_ASSERT_VEC_EQ(d, vp1, Abs(vp1));
+    HWY_ASSERT_VEC_EQ(d, vp1, Abs(vn1));
+    HWY_ASSERT_VEC_EQ(d, vp2, Abs(vp2));
+    HWY_ASSERT_VEC_EQ(d, vp2, Abs(vn2));
+  }
+};
+
+HWY_NOINLINE void TestAllAbs() {
+  ForSignedTypes(ForPartialVectors<TestAbs>());
+  ForFloatTypes(ForPartialVectors<TestFloatAbs>());
+}
+
+template <bool kSigned>
+struct TestLeftShifts {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    if (kSigned) {
+      // Also test positive values
+      TestLeftShifts</*kSigned=*/false>()(t, d);
+    }
+
+    using TI = MakeSigned<T>;
+    using TU = MakeUnsigned<T>;
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+
+    const auto values = Iota(d, kSigned ? -TI(N) : TI(0));  // value to shift
+    constexpr size_t kMaxShift = (sizeof(T) * 8) - 1;
+
+    // 0
+    HWY_ASSERT_VEC_EQ(d, values, ShiftLeft<0>(values));
+    HWY_ASSERT_VEC_EQ(d, values, ShiftLeftSame(values, 0));
+
+    // 1
+    for (size_t i = 0; i < N; ++i) {
+      const T value = kSigned ? T(i) - T(N) : T(i);
+      expected[i] = T(TU(value) << 1);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeft<1>(values));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftSame(values, 1));
+
+    // max
+    for (size_t i = 0; i < N; ++i) {
+      const T value = kSigned ? T(i) - T(N) : T(i);
+      expected[i] = T(TU(value) << kMaxShift);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeft<kMaxShift>(values));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftSame(values, kMaxShift));
+  }
+};
+
+template <bool kSigned>
+struct TestVariableLeftShifts {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    if (kSigned) {
+      // Also test positive values
+      TestVariableLeftShifts</*kSigned=*/false>()(t, d);
+    }
+
+    using TI = MakeSigned<T>;
+    using TU = MakeUnsigned<T>;
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+
+    const auto v0 = Zero(d);
+    const auto v1 = Set(d, 1);
+    const auto values = Iota(d, kSigned ? -TI(N) : TI(0));  // value to shift
+
+    constexpr size_t kMaxShift = (sizeof(T) * 8) - 1;
+    const auto max_shift = Set(d, kMaxShift);
+    const auto small_shifts = And(Iota(d, 0), max_shift);
+    const auto large_shifts = max_shift - small_shifts;
+
+    // Same: 0
+    HWY_ASSERT_VEC_EQ(d, values, Shl(values, v0));
+
+    // Same: 1
+    for (size_t i = 0; i < N; ++i) {
+      const T value = kSigned ? T(i) - T(N) : T(i);
+      expected[i] = T(TU(value) << 1);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shl(values, v1));
+
+    // Same: max
+    for (size_t i = 0; i < N; ++i) {
+      const T value = kSigned ? T(i) - T(N) : T(i);
+      expected[i] = T(TU(value) << kMaxShift);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shl(values, max_shift));
+
+    // Variable: small
+    for (size_t i = 0; i < N; ++i) {
+      const T value = kSigned ? T(i) - T(N) : T(i);
+      expected[i] = T(TU(value) << (i & kMaxShift));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shl(values, small_shifts));
+
+    // Variable: large
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = T(TU(1) << (kMaxShift - (i & kMaxShift)));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shl(v1, large_shifts));
+  }
+};
+
+struct TestUnsignedRightShifts {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+
+    const auto values = Iota(d, 0);
+
+    const T kMax = LimitsMax<T>();
+    constexpr size_t kMaxShift = (sizeof(T) * 8) - 1;
+
+    // Shift by 0
+    HWY_ASSERT_VEC_EQ(d, values, ShiftRight<0>(values));
+    HWY_ASSERT_VEC_EQ(d, values, ShiftRightSame(values, 0));
+
+    // Shift by 1
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = T(T(i & kMax) >> 1);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRight<1>(values));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightSame(values, 1));
+
+    // max
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = T(T(i & kMax) >> kMaxShift);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRight<kMaxShift>(values));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightSame(values, kMaxShift));
+  }
+};
+
+struct TestVariableUnsignedRightShifts {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+
+    const auto v0 = Zero(d);
+    const auto v1 = Set(d, 1);
+    const auto values = Iota(d, 0);
+
+    const T kMax = LimitsMax<T>();
+    const auto max = Set(d, kMax);
+
+    constexpr size_t kMaxShift = (sizeof(T) * 8) - 1;
+    const auto max_shift = Set(d, kMaxShift);
+    const auto small_shifts = And(Iota(d, 0), max_shift);
+    const auto large_shifts = max_shift - small_shifts;
+
+    // Same: 0
+    HWY_ASSERT_VEC_EQ(d, values, Shr(values, v0));
+
+    // Same: 1
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = T(T(i & kMax) >> 1);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shr(values, v1));
+
+    // Same: max
+    HWY_ASSERT_VEC_EQ(d, v0, Shr(values, max_shift));
+
+    // Variable: small
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = T(i) >> (i & kMaxShift);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shr(values, small_shifts));
+
+    // Variable: Large
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = kMax >> (kMaxShift - (i & kMaxShift));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shr(max, large_shifts));
+  }
+};
+
+template <int kAmount, typename T>
+T RightShiftNegative(T val) {
+  // C++ shifts are implementation-defined for negative numbers, and we have
+  // seen divisions replaced with shifts, so resort to bit operations.
+  using TU = hwy::MakeUnsigned<T>;
+  TU bits;
+  CopyBytes<sizeof(T)>(&val, &bits);
+
+  const TU shifted = bits >> kAmount;
+
+  const TU all = ~TU(0);
+  const size_t num_zero = sizeof(TU) * 8 - 1 - kAmount;
+  const TU sign_extended = static_cast<TU>((all << num_zero) & LimitsMax<TU>());
+
+  bits = shifted | sign_extended;
+  CopyBytes<sizeof(T)>(&bits, &val);
+  return val;
+}
+
+class TestSignedRightShifts {
+ public:
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+    constexpr T kMin = LimitsMin<T>();
+    constexpr T kMax = LimitsMax<T>();
+    constexpr size_t kMaxShift = (sizeof(T) * 8) - 1;
+
+    // First test positive values, negative are checked below.
+    const auto v0 = Zero(d);
+    const auto values = Iota(d, 0) & Set(d, kMax);
+
+    // Shift by 0
+    HWY_ASSERT_VEC_EQ(d, values, ShiftRight<0>(values));
+    HWY_ASSERT_VEC_EQ(d, values, ShiftRightSame(values, 0));
+
+    // Shift by 1
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = T(T(i & kMax) >> 1);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRight<1>(values));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightSame(values, 1));
+
+    // max
+    HWY_ASSERT_VEC_EQ(d, v0, ShiftRight<kMaxShift>(values));
+    HWY_ASSERT_VEC_EQ(d, v0, ShiftRightSame(values, kMaxShift));
+
+    // Even negative value
+    Test<0>(kMin, d, __LINE__);
+    Test<1>(kMin, d, __LINE__);
+    Test<2>(kMin, d, __LINE__);
+    Test<kMaxShift>(kMin, d, __LINE__);
+
+    const T odd = static_cast<T>(kMin + 1);
+    Test<0>(odd, d, __LINE__);
+    Test<1>(odd, d, __LINE__);
+    Test<2>(odd, d, __LINE__);
+    Test<kMaxShift>(odd, d, __LINE__);
+  }
+
+ private:
+  template <int kAmount, typename T, class D>
+  void Test(T val, D d, int line) {
+    const auto expected = Set(d, RightShiftNegative<kAmount>(val));
+    const auto in = Set(d, val);
+    const char* file = __FILE__;
+    AssertVecEqual(d, expected, ShiftRight<kAmount>(in), file, line);
+    AssertVecEqual(d, expected, ShiftRightSame(in, kAmount), file, line);
+  }
+};
+
+struct TestVariableSignedRightShifts {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    using TU = MakeUnsigned<T>;
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+
+    constexpr T kMin = LimitsMin<T>();
+    constexpr T kMax = LimitsMax<T>();
+
+    constexpr size_t kMaxShift = (sizeof(T) * 8) - 1;
+
+    // First test positive values, negative are checked below.
+    const auto v0 = Zero(d);
+    const auto positive = Iota(d, 0) & Set(d, kMax);
+
+    // Shift by 0
+    HWY_ASSERT_VEC_EQ(d, positive, ShiftRight<0>(positive));
+    HWY_ASSERT_VEC_EQ(d, positive, ShiftRightSame(positive, 0));
+
+    // Shift by 1
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = T(T(i & kMax) >> 1);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRight<1>(positive));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightSame(positive, 1));
+
+    // max
+    HWY_ASSERT_VEC_EQ(d, v0, ShiftRight<kMaxShift>(positive));
+    HWY_ASSERT_VEC_EQ(d, v0, ShiftRightSame(positive, kMaxShift));
+
+    const auto max_shift = Set(d, kMaxShift);
+    const auto small_shifts = And(Iota(d, 0), max_shift);
+    const auto large_shifts = max_shift - small_shifts;
+
+    const auto negative = Iota(d, kMin);
+
+    // Test varying negative to shift
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = RightShiftNegative<1>(static_cast<T>(kMin + i));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shr(negative, Set(d, 1)));
+
+    // Shift MSB right by small amounts
+    for (size_t i = 0; i < N; ++i) {
+      const size_t amount = i & kMaxShift;
+      const TU shifted = ~((1ull << (kMaxShift - amount)) - 1);
+      CopyBytes<sizeof(T)>(&shifted, &expected[i]);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shr(Set(d, kMin), small_shifts));
+
+    // Shift MSB right by large amounts
+    for (size_t i = 0; i < N; ++i) {
+      const size_t amount = kMaxShift - (i & kMaxShift);
+      const TU shifted = ~((1ull << (kMaxShift - amount)) - 1);
+      CopyBytes<sizeof(T)>(&shifted, &expected[i]);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Shr(Set(d, kMin), large_shifts));
+  }
+};
+
+HWY_NOINLINE void TestAllShifts() {
+  ForUnsignedTypes(ForPartialVectors<TestLeftShifts</*kSigned=*/false>>());
+  ForSignedTypes(ForPartialVectors<TestLeftShifts</*kSigned=*/true>>());
+  ForUnsignedTypes(ForPartialVectors<TestUnsignedRightShifts>());
+  ForSignedTypes(ForPartialVectors<TestSignedRightShifts>());
+}
+
+HWY_NOINLINE void TestAllVariableShifts() {
+  const ForPartialVectors<TestLeftShifts</*kSigned=*/false>> shl_u;
+  const ForPartialVectors<TestLeftShifts</*kSigned=*/true>> shl_s;
+  const ForPartialVectors<TestUnsignedRightShifts> shr_u;
+  const ForPartialVectors<TestSignedRightShifts> shr_s;
+
+  shl_u(uint16_t());
+  shr_u(uint16_t());
+
+  shl_u(uint32_t());
+  shr_u(uint32_t());
+
+  shl_s(int16_t());
+  shr_s(int16_t());
+
+  shl_s(int32_t());
+  shr_s(int32_t());
+
+#if HWY_CAP_INTEGER64
+  shl_u(uint64_t());
+  shr_u(uint64_t());
+
+  shl_s(int64_t());
+  shr_s(int64_t());
+#endif
+}
+
+struct TestUnsignedMinMax {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    // Leave headroom such that v1 < v2 even after wraparound.
+    const auto mod = And(Iota(d, 0), Set(d, LimitsMax<T>() >> 1));
+    const auto v1 = Add(mod, Set(d, 1));
+    const auto v2 = Add(mod, Set(d, 2));
+    HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2));
+    HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2));
+    HWY_ASSERT_VEC_EQ(d, v0, Min(v1, v0));
+    HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v0));
+
+    const auto vmin = Set(d, LimitsMin<T>());
+    const auto vmax = Set(d, LimitsMax<T>());
+
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, vmax));
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(vmax, vmin));
+
+    HWY_ASSERT_VEC_EQ(d, vmax, Max(vmin, vmax));
+    HWY_ASSERT_VEC_EQ(d, vmax, Max(vmax, vmin));
+  }
+};
+
+struct TestSignedMinMax {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // Leave headroom such that v1 < v2 even after wraparound.
+    const auto mod = And(Iota(d, 0), Set(d, LimitsMax<T>() >> 1));
+    const auto v1 = Add(mod, Set(d, 1));
+    const auto v2 = Add(mod, Set(d, 2));
+    const auto v_neg = Sub(Zero(d), v1);
+    HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2));
+    HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2));
+    HWY_ASSERT_VEC_EQ(d, v_neg, Min(v1, v_neg));
+    HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v_neg));
+
+    const auto v0 = Zero(d);
+    const auto vmin = Set(d, LimitsMin<T>());
+    const auto vmax = Set(d, LimitsMax<T>());
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(v0, vmin));
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, Max(v0, vmin));
+    HWY_ASSERT_VEC_EQ(d, v0, Max(vmin, v0));
+
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, vmax));
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(vmax, vmin));
+
+    HWY_ASSERT_VEC_EQ(d, vmax, Max(vmin, vmax));
+    HWY_ASSERT_VEC_EQ(d, vmax, Max(vmax, vmin));
+  }
+};
+
+struct TestFloatMinMax {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v1 = Iota(d, 1);
+    const auto v2 = Iota(d, 2);
+    const auto v_neg = Iota(d, -T(Lanes(d)));
+    HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2));
+    HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2));
+    HWY_ASSERT_VEC_EQ(d, v_neg, Min(v1, v_neg));
+    HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v_neg));
+
+    const auto v0 = Zero(d);
+    const auto vmin = Set(d, T(-1E30));
+    const auto vmax = Set(d, T(1E30));
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(v0, vmin));
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, Max(v0, vmin));
+    HWY_ASSERT_VEC_EQ(d, v0, Max(vmin, v0));
+
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, vmax));
+    HWY_ASSERT_VEC_EQ(d, vmin, Min(vmax, vmin));
+
+    HWY_ASSERT_VEC_EQ(d, vmax, Max(vmin, vmax));
+    HWY_ASSERT_VEC_EQ(d, vmax, Max(vmax, vmin));
+  }
+};
+
+HWY_NOINLINE void TestAllMinMax() {
+  ForUnsignedTypes(ForPartialVectors<TestUnsignedMinMax>());
+  ForSignedTypes(ForPartialVectors<TestSignedMinMax>());
+  ForFloatTypes(ForPartialVectors<TestFloatMinMax>());
+}
+
+struct TestUnsignedMul {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto v1 = Set(d, T(1));
+    const auto vi = Iota(d, 1);
+    const auto vj = Iota(d, 3);
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+
+    HWY_ASSERT_VEC_EQ(d, v0, Mul(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, v1, Mul(v1, v1));
+    HWY_ASSERT_VEC_EQ(d, vi, Mul(v1, vi));
+    HWY_ASSERT_VEC_EQ(d, vi, Mul(vi, v1));
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = static_cast<T>((1 + i) * (1 + i));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), vi * vi);
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = static_cast<T>((1 + i) * (3 + i));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Mul(vi, vj));
+
+    const T max = LimitsMax<T>();
+    const auto vmax = Set(d, max);
+    HWY_ASSERT_VEC_EQ(d, vmax, Mul(vmax, v1));
+    HWY_ASSERT_VEC_EQ(d, vmax, Mul(v1, vmax));
+
+    const size_t bits = sizeof(T) * 8;
+    const uint64_t mask = (1ull << bits) - 1;
+    const T max2 = (uint64_t(max) * max) & mask;
+    HWY_ASSERT_VEC_EQ(d, Set(d, max2), Mul(vmax, vmax));
+  }
+};
+
+struct TestSignedMul {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+
+    const auto v0 = Zero(d);
+    const auto v1 = Set(d, T(1));
+    const auto vi = Iota(d, 1);
+    const auto vn = Iota(d, -T(N));  // no i8 supported, so no wraparound
+    HWY_ASSERT_VEC_EQ(d, v0, Mul(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, v1, Mul(v1, v1));
+    HWY_ASSERT_VEC_EQ(d, vi, Mul(v1, vi));
+    HWY_ASSERT_VEC_EQ(d, vi, Mul(vi, v1));
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = static_cast<T>((1 + i) * (1 + i));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Mul(vi, vi));
+
+    for (int i = 0; i < static_cast<int>(N); ++i) {
+      expected[i] = static_cast<T>((-T(N) + i) * (1 + i));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Mul(vn, vi));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Mul(vi, vn));
+  }
+};
+
+HWY_NOINLINE void TestAllMul() {
+  const ForPartialVectors<TestUnsignedMul> test_unsigned;
+  // No u8.
+  test_unsigned(uint16_t());
+  test_unsigned(uint32_t());
+  // No u64.
+
+  const ForPartialVectors<TestSignedMul> test_signed;
+  // No i8.
+  test_signed(int16_t());
+  test_signed(int32_t());
+  // No i64.
+}
+
+struct TestMulHigh {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    using Wide = MakeWide<T>;
+    const size_t N = Lanes(d);
+    auto in_lanes = AllocateAligned<T>(N);
+    auto expected_lanes = AllocateAligned<T>(N);
+
+    const auto vi = Iota(d, 1);
+    const auto vni = Iota(d, -T(N));  // no i8 supported, so no wraparound
+
+    const auto v0 = Zero(d);
+    HWY_ASSERT_VEC_EQ(d, v0, MulHigh(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, MulHigh(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, v0, MulHigh(vi, v0));
+
+    // Large positive squared
+    for (size_t i = 0; i < N; ++i) {
+      in_lanes[i] = LimitsMax<T>() >> i;
+      expected_lanes[i] = (Wide(in_lanes[i]) * in_lanes[i]) >> 16;
+    }
+    auto v = Load(d, in_lanes.get());
+    HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(v, v));
+
+    // Large positive * small positive
+    for (int i = 0; i < static_cast<int>(N); ++i) {
+      expected_lanes[i] = static_cast<T>((Wide(in_lanes[i]) * T(1 + i)) >> 16);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(v, vi));
+    HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(vi, v));
+
+    // Large positive * small negative
+    for (size_t i = 0; i < N; ++i) {
+      expected_lanes[i] = (Wide(in_lanes[i]) * T(i - N)) >> 16;
+    }
+    HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(v, vni));
+    HWY_ASSERT_VEC_EQ(d, expected_lanes.get(), MulHigh(vni, v));
+  }
+};
+
+HWY_NOINLINE void TestAllMulHigh() {
+  ForPartialVectors<TestMulHigh> test;
+  test(int16_t());
+  test(uint16_t());
+}
+
+struct TestMulEven {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    using Wide = MakeWide<T>;
+    const Repartition<Wide, D> d2;
+    const auto v0 = Zero(d);
+    HWY_ASSERT_VEC_EQ(d2, Zero(d2), MulEven(v0, v0));
+
+    const size_t N = Lanes(d);
+    auto in_lanes = AllocateAligned<T>(N);
+    auto expected = AllocateAligned<Wide>(Lanes(d2));
+    for (size_t i = 0; i < N; i += 2) {
+      in_lanes[i + 0] = LimitsMax<T>() >> i;
+      if (N != 1) {
+        in_lanes[i + 1] = 1;  // unused
+      }
+      expected[i / 2] = Wide(in_lanes[i + 0]) * in_lanes[i + 0];
+    }
+
+    const auto v = Load(d, in_lanes.get());
+    HWY_ASSERT_VEC_EQ(d2, expected.get(), MulEven(v, v));
+  }
+};
+
+HWY_NOINLINE void TestAllMulEven() {
+  ForPartialVectors<TestMulEven> test;
+  test(int32_t());
+  test(uint32_t());
+}
+
+struct TestMulAdd {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto k0 = Zero(d);
+    const auto kNeg0 = Set(d, T(-0.0));
+    const auto v1 = Iota(d, 1);
+    const auto v2 = Iota(d, 2);
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+    HWY_ASSERT_VEC_EQ(d, k0, MulAdd(k0, k0, k0));
+    HWY_ASSERT_VEC_EQ(d, v2, MulAdd(k0, v1, v2));
+    HWY_ASSERT_VEC_EQ(d, v2, MulAdd(v1, k0, v2));
+    HWY_ASSERT_VEC_EQ(d, k0, NegMulAdd(k0, k0, k0));
+    HWY_ASSERT_VEC_EQ(d, v2, NegMulAdd(k0, v1, v2));
+    HWY_ASSERT_VEC_EQ(d, v2, NegMulAdd(v1, k0, v2));
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = static_cast<T>((i + 1) * (i + 2));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), MulAdd(v2, v1, k0));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), MulAdd(v1, v2, k0));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulAdd(Neg(v2), v1, k0));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulAdd(v1, Neg(v2), k0));
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = static_cast<T>((i + 2) * (i + 2) + (i + 1));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), MulAdd(v2, v2, v1));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulAdd(Neg(v2), v2, v1));
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = -T(i + 2) * (i + 2) + (1 + i);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulAdd(v2, v2, v1));
+
+    HWY_ASSERT_VEC_EQ(d, k0, MulSub(k0, k0, k0));
+    HWY_ASSERT_VEC_EQ(d, kNeg0, NegMulSub(k0, k0, k0));
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = -T(i + 2);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(k0, v1, v2));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(v1, k0, v2));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(Neg(k0), v1, v2));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(v1, Neg(k0), v2));
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = static_cast<T>((i + 1) * (i + 2));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(v1, v2, k0));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(v2, v1, k0));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(Neg(v1), v2, k0));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(v2, Neg(v1), k0));
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = static_cast<T>((i + 2) * (i + 2) - (1 + i));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), MulSub(v2, v2, v1));
+    HWY_ASSERT_VEC_EQ(d, expected.get(), NegMulSub(Neg(v2), v2, v1));
+  }
+};
+
+HWY_NOINLINE void TestAllMulAdd() {
+  ForFloatTypes(ForPartialVectors<TestMulAdd>());
+}
+
+struct TestDiv {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v = Iota(d, T(-2));
+    const auto v1 = Set(d, T(1));
+
+    // Unchanged after division by 1.
+    HWY_ASSERT_VEC_EQ(d, v, Div(v, v1));
+
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = (T(i) - 2) / T(2);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Div(v, Set(d, T(2))));
+  }
+};
+
+HWY_NOINLINE void TestAllDiv() { ForFloatTypes(ForPartialVectors<TestDiv>()); }
+
+struct TestApproximateReciprocal {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v = Iota(d, T(-2));
+    const auto nonzero = IfThenElse(Eq(v, Zero(d)), Set(d, T(1)), v);
+    const size_t N = Lanes(d);
+    auto input = AllocateAligned<T>(N);
+    Store(nonzero, d, input.get());
+
+    auto actual = AllocateAligned<T>(N);
+    Store(ApproximateReciprocal(nonzero), d, actual.get());
+
+    double max_l1 = 0.0;
+    for (size_t i = 0; i < N; ++i) {
+      max_l1 = std::max<double>(max_l1, std::abs((1.0 / input[i]) - actual[i]));
+    }
+    const double max_rel = max_l1 / std::abs(1.0 / input[N - 1]);
+    printf("max err %f\n", max_rel);
+
+    HWY_ASSERT(max_rel < 0.002);
+  }
+};
+
+HWY_NOINLINE void TestAllApproximateReciprocal() {
+  ForPartialVectors<TestApproximateReciprocal>()(float());
+}
+
+struct TestSquareRoot {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto vi = Iota(d, 0);
+    HWY_ASSERT_VEC_EQ(d, vi, Sqrt(vi * vi));
+  }
+};
+
+HWY_NOINLINE void TestAllSquareRoot() {
+  ForFloatTypes(ForPartialVectors<TestSquareRoot>());
+}
+
+struct TestReciprocalSquareRoot {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v = Set(d, 123.0f);
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    Store(ApproximateReciprocalSqrt(v), d, lanes.get());
+    for (size_t i = 0; i < N; ++i) {
+      float err = lanes[i] - 0.090166f;
+      if (err < 0.0f) err = -err;
+      HWY_ASSERT(err < 1E-4f);
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllReciprocalSquareRoot() {
+  ForPartialVectors<TestReciprocalSquareRoot>()(float());
+}
+
+template <typename T, class D>
+AlignedFreeUniquePtr<T[]> RoundTestCases(T /*unused*/, D d, size_t& padded) {
+  const T eps = std::numeric_limits<T>::epsilon();
+  const T test_cases[] = {
+      // +/- 1
+      T(1), T(-1),
+      // +/- 0
+      T(0), T(-0),
+      // near 0
+      T(0.4), T(-0.4),
+      // +/- integer
+      T(4), T(-32),
+      // positive near limit
+      MantissaEnd<T>() - T(1.5), MantissaEnd<T>() + T(1.5),
+      // negative near limit
+      -MantissaEnd<T>() - T(1.5), -MantissaEnd<T>() + T(1.5),
+      // +/- huge (but still fits in float)
+      T(1E34), T(-1E35),
+      // positive tiebreak
+      T(1.5), T(2.5),
+      // negative tiebreak
+      T(-1.5), T(-2.5),
+      // positive +/- delta
+      T(2.0001), T(3.9999),
+      // negative +/- delta
+      T(-999.9999), T(-998.0001),
+      // positive +/- epsilon
+      T(1) + eps, T(1) - eps,
+      // negative +/- epsilon
+      T(-1) + eps, T(-1) - eps,
+      // +/- infinity
+      std::numeric_limits<T>::infinity(), -std::numeric_limits<T>::infinity(),
+      // qNaN
+      GetLane(NaN(d))};
+  const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
+  const size_t N = Lanes(d);
+  padded = RoundUpTo(kNumTestCases, N);  // allow loading whole vectors
+  auto in = AllocateAligned<T>(padded);
+  auto expected = AllocateAligned<T>(padded);
+  std::copy(test_cases, test_cases + kNumTestCases, in.get());
+  std::fill(in.get() + kNumTestCases, in.get() + padded, T(0));
+  return in;
+}
+
+struct TestRound {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    size_t padded;
+    auto in = RoundTestCases(t, d, padded);
+    auto expected = AllocateAligned<T>(padded);
+
+    for (size_t i = 0; i < padded; ++i) {
+      // Avoid [std::]round, which does not round to nearest *even*.
+      // NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see
+      // https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html
+      expected[i] = nearbyint(in[i]);
+    }
+    for (size_t i = 0; i < padded; i += Lanes(d)) {
+      HWY_ASSERT_VEC_EQ(d, &expected[i], Round(Load(d, &in[i])));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllRound() {
+  ForFloatTypes(ForPartialVectors<TestRound>());
+}
+
+struct TestNearestInt {
+  template <typename TF, class DF>
+  HWY_NOINLINE void operator()(TF tf, const DF df) {
+    using TI = MakeSigned<TF>;
+    const RebindToSigned<DF> di;
+
+    size_t padded;
+    auto in = RoundTestCases(tf, df, padded);
+    auto expected = AllocateAligned<TI>(padded);
+
+    constexpr double max = static_cast<double>(LimitsMax<TI>());
+    for (size_t i = 0; i < padded; ++i) {
+      if (std::isnan(in[i])) {
+        // We replace NaN with 0 below (no_nan)
+        expected[i] = 0;
+      } else if (std::isinf(in[i]) || double(std::abs(in[i])) >= max) {
+        // Avoid undefined result for lrintf
+        expected[i] = std::signbit(in[i]) ? LimitsMin<TI>() : LimitsMax<TI>();
+      } else {
+        expected[i] = lrintf(in[i]);
+      }
+    }
+    for (size_t i = 0; i < padded; i += Lanes(df)) {
+      const auto v = Load(df, &in[i]);
+      const auto no_nan = IfThenElse(Eq(v, v), v, Zero(df));
+      HWY_ASSERT_VEC_EQ(di, &expected[i], NearestInt(no_nan));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllNearestInt() {
+  ForPartialVectors<TestNearestInt>()(float());
+}
+
+struct TestTrunc {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    size_t padded;
+    auto in = RoundTestCases(t, d, padded);
+    auto expected = AllocateAligned<T>(padded);
+
+    for (size_t i = 0; i < padded; ++i) {
+      // NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see
+      // https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html
+      expected[i] = trunc(in[i]);
+    }
+    for (size_t i = 0; i < padded; i += Lanes(d)) {
+      HWY_ASSERT_VEC_EQ(d, &expected[i], Trunc(Load(d, &in[i])));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllTrunc() {
+  ForFloatTypes(ForPartialVectors<TestTrunc>());
+}
+
+struct TestCeil {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    size_t padded;
+    auto in = RoundTestCases(t, d, padded);
+    auto expected = AllocateAligned<T>(padded);
+
+    for (size_t i = 0; i < padded; ++i) {
+      expected[i] = std::ceil(in[i]);
+    }
+    for (size_t i = 0; i < padded; i += Lanes(d)) {
+      HWY_ASSERT_VEC_EQ(d, &expected[i], Ceil(Load(d, &in[i])));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllCeil() {
+  ForFloatTypes(ForPartialVectors<TestCeil>());
+}
+
+struct TestFloor {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    size_t padded;
+    auto in = RoundTestCases(t, d, padded);
+    auto expected = AllocateAligned<T>(padded);
+
+    for (size_t i = 0; i < padded; ++i) {
+      expected[i] = std::floor(in[i]);
+    }
+    for (size_t i = 0; i < padded; i += Lanes(d)) {
+      HWY_ASSERT_VEC_EQ(d, &expected[i], Floor(Load(d, &in[i])));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllFloor() {
+  ForFloatTypes(ForPartialVectors<TestFloor>());
+}
+
+struct TestSumOfLanes {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto in_lanes = AllocateAligned<T>(N);
+
+    // Lane i = bit i, higher lanes 0
+    double sum = 0.0;
+    // Avoid setting sign bit and cap at double precision
+    constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51);
+    for (size_t i = 0; i < N; ++i) {
+      in_lanes[i] = i < kBits ? static_cast<T>(1ull << i) : 0;
+      sum += static_cast<double>(in_lanes[i]);
+    }
+    HWY_ASSERT_VEC_EQ(d, Set(d, T(sum)), SumOfLanes(Load(d, in_lanes.get())));
+
+    // Lane i = i (iota) to include upper lanes
+    sum = 0.0;
+    for (size_t i = 0; i < N; ++i) {
+      sum += static_cast<double>(i);
+    }
+    HWY_ASSERT_VEC_EQ(d, Set(d, T(sum)), SumOfLanes(Iota(d, 0)));
+  }
+};
+
+HWY_NOINLINE void TestAllSumOfLanes() {
+  const ForPartialVectors<TestSumOfLanes> sum;
+
+  // No u8/u16/i8/i16.
+  sum(uint32_t());
+  sum(int32_t());
+
+#if HWY_CAP_INTEGER64
+  sum(uint64_t());
+  sum(int64_t());
+#endif
+
+  ForFloatTypes(sum);
+}
+
+struct TestMinOfLanes {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto in_lanes = AllocateAligned<T>(N);
+
+    // Lane i = bit i, higher lanes = 2 (not the minimum)
+    T min = HighestValue<T>();
+    // Avoid setting sign bit and cap at double precision
+    constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51);
+    for (size_t i = 0; i < N; ++i) {
+      in_lanes[i] = i < kBits ? static_cast<T>(1ull << i) : 2;
+      min = std::min(min, in_lanes[i]);
+    }
+    HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(Load(d, in_lanes.get())));
+
+    // Lane i = N - i to include upper lanes
+    min = HighestValue<T>();
+    for (size_t i = 0; i < N; ++i) {
+      in_lanes[i] = static_cast<T>(N - i);  // no 8-bit T so no wraparound
+      min = std::min(min, in_lanes[i]);
+    }
+    HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(Load(d, in_lanes.get())));
+  }
+};
+
+struct TestMaxOfLanes {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto in_lanes = AllocateAligned<T>(N);
+
+    T max = LowestValue<T>();
+    // Avoid setting sign bit and cap at double precision
+    constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51);
+    for (size_t i = 0; i < N; ++i) {
+      in_lanes[i] = i < kBits ? static_cast<T>(1ull << i) : 0;
+      max = std::max(max, in_lanes[i]);
+    }
+    HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(Load(d, in_lanes.get())));
+
+    // Lane i = i to include upper lanes
+    max = LowestValue<T>();
+    for (size_t i = 0; i < N; ++i) {
+      in_lanes[i] = static_cast<T>(i);  // no 8-bit T so no wraparound
+      max = std::max(max, in_lanes[i]);
+    }
+    HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(Load(d, in_lanes.get())));
+  }
+};
+
+HWY_NOINLINE void TestAllMinMaxOfLanes() {
+  const ForPartialVectors<TestMinOfLanes> min;
+  const ForPartialVectors<TestMaxOfLanes> max;
+
+  // No u8/u16/i8/i16.
+  min(uint32_t());
+  max(uint32_t());
+  min(int32_t());
+  max(int32_t());
+
+#if HWY_CAP_INTEGER64
+  min(uint64_t());
+  max(uint64_t());
+  min(int64_t());
+  max(int64_t());
+#endif
+
+  ForFloatTypes(min);
+  ForFloatTypes(max);
+}
+
+struct TestAbsDiff {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto in_lanes_a = AllocateAligned<T>(N);
+    auto in_lanes_b = AllocateAligned<T>(N);
+    auto out_lanes = AllocateAligned<T>(N);
+    for (size_t i = 0; i < N; ++i) {
+      in_lanes_a[i] = static_cast<T>((i ^ 1u) << i);
+      in_lanes_b[i] = static_cast<T>(i << i);
+      out_lanes[i] = std::abs(in_lanes_a[i] - in_lanes_b[i]);
+    }
+    const auto a = Load(d, in_lanes_a.get());
+    const auto b = Load(d, in_lanes_b.get());
+    const auto expected = Load(d, out_lanes.get());
+    HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(a, b));
+    HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(b, a));
+  }
+};
+
+HWY_NOINLINE void TestAllAbsDiff() {
+  ForPartialVectors<TestAbsDiff>()(float());
+}
+
+struct TestNeg {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vn = Set(d, T(-3));
+    const auto vp = Set(d, T(3));
+    HWY_ASSERT_VEC_EQ(d, v0, Neg(v0));
+    HWY_ASSERT_VEC_EQ(d, vp, Neg(vn));
+    HWY_ASSERT_VEC_EQ(d, vn, Neg(vp));
+  }
+};
+
+HWY_NOINLINE void TestAllNeg() {
+  ForSignedTypes(ForPartialVectors<TestNeg>());
+  ForFloatTypes(ForPartialVectors<TestNeg>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(HwyArithmeticTest);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllPlusMinus);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllSaturatingArithmetic);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllShifts);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllVariableShifts);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMinMax);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAverage);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAbs);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMul);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMulHigh);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMulEven);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMulAdd);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllDiv);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllApproximateReciprocal);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllSquareRoot);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllReciprocalSquareRoot);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllSumOfLanes);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMinMaxOfLanes);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllRound);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllNearestInt);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllTrunc);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllCeil);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllFloor);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAbsDiff);
+HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllNeg);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/combine_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/combine_test.cc
new file mode 100644
index 0000000000..4f7942f67c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/combine_test.cc
@@ -0,0 +1,287 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/combine_test.cc"
+#include "hwy/foreach_target.h"
+
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+// Not yet implemented
+#if HWY_TARGET != HWY_RVV
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct TestLowerHalf {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Half<D> d2;
+
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    std::fill(lanes.get(), lanes.get() + N, T(0));
+    const auto v = Iota(d, 1);
+    Store(LowerHalf(v), d2, lanes.get());
+    size_t i = 0;
+    for (; i < Lanes(d2); ++i) {
+      HWY_ASSERT_EQ(T(1 + i), lanes[i]);
+    }
+    // Other half remains unchanged
+    for (; i < N; ++i) {
+      HWY_ASSERT_EQ(T(0), lanes[i]);
+    }
+  }
+};
+
+struct TestLowerQuarter {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Half<Half<D>> d4;
+
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    std::fill(lanes.get(), lanes.get() + N, T(0));
+    const auto v = Iota(d, 1);
+    const auto lo = LowerHalf(LowerHalf(v));
+    Store(lo, d4, lanes.get());
+    size_t i = 0;
+    for (; i < Lanes(d4); ++i) {
+      HWY_ASSERT_EQ(T(i + 1), lanes[i]);
+    }
+    // Upper 3/4 remain unchanged
+    for (; i < N; ++i) {
+      HWY_ASSERT_EQ(T(0), lanes[i]);
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllLowerHalf() {
+  constexpr size_t kDiv = 1;
+  ForAllTypes(ForPartialVectors<TestLowerHalf, kDiv, /*kMinLanes=*/2>());
+  ForAllTypes(ForPartialVectors<TestLowerQuarter, kDiv, /*kMinLanes=*/4>());
+}
+
+struct TestUpperHalf {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // Scalar does not define UpperHalf.
+#if HWY_TARGET != HWY_SCALAR
+    const Half<D> d2;
+
+    const auto v = Iota(d, 1);
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    std::fill(lanes.get(), lanes.get() + N, T(0));
+
+    Store(UpperHalf(v), d2, lanes.get());
+    size_t i = 0;
+    for (; i < Lanes(d2); ++i) {
+      HWY_ASSERT_EQ(T(Lanes(d2) + 1 + i), lanes[i]);
+    }
+    // Other half remains unchanged
+    for (; i < N; ++i) {
+      HWY_ASSERT_EQ(T(0), lanes[i]);
+    }
+#else
+    (void)d;
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllUpperHalf() {
+  ForAllTypes(ForGE128Vectors<TestUpperHalf>());
+}
+
+struct TestZeroExtendVector {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+#if HWY_CAP_GE256
+    const Twice<D> d2;
+
+    const auto v = Iota(d, 1);
+    const size_t N2 = Lanes(d2);
+    auto lanes = AllocateAligned<T>(N2);
+    Store(v, d, &lanes[0]);
+    Store(v, d, &lanes[N2 / 2]);
+
+    const auto ext = ZeroExtendVector(v);
+    Store(ext, d2, lanes.get());
+
+    size_t i = 0;
+    // Lower half is unchanged
+    for (; i < N2 / 2; ++i) {
+      HWY_ASSERT_EQ(T(1 + i), lanes[i]);
+    }
+    // Upper half is zero
+    for (; i < N2; ++i) {
+      HWY_ASSERT_EQ(T(0), lanes[i]);
+    }
+#else
+    (void)d;
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllZeroExtendVector() {
+  ForAllTypes(ForExtendableVectors<TestZeroExtendVector>());
+}
+
+struct TestCombine {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+#if HWY_CAP_GE256
+    const Twice<D> d2;
+    const size_t N2 = Lanes(d2);
+    auto lanes = AllocateAligned<T>(N2);
+
+    const auto lo = Iota(d, 1);
+    const auto hi = Iota(d, N2 / 2 + 1);
+    const auto combined = Combine(hi, lo);
+    Store(combined, d2, lanes.get());
+
+    const auto expected = Iota(d2, 1);
+    HWY_ASSERT_VEC_EQ(d2, expected, combined);
+#else
+    (void)d;
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllCombine() {
+  ForAllTypes(ForExtendableVectors<TestCombine>());
+}
+
+
+template <int kBytes>
+struct TestCombineShiftRightBytesR {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+// Scalar does not define CombineShiftRightBytes.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+    const Repartition<uint8_t, D> d8;
+    const size_t N8 = Lanes(d8);
+    const auto lo = BitCast(d, Iota(d8, 1));
+    const auto hi = BitCast(d, Iota(d8, 1 + N8));
+
+    auto expected = AllocateAligned<T>(Lanes(d));
+    uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
+
+    const size_t kBlockSize = 16;
+    for (size_t i = 0; i < N8; ++i) {
+      const size_t block = i / kBlockSize;
+      const size_t lane = i % kBlockSize;
+      const size_t first_lo = block * kBlockSize;
+      const size_t idx = lane + kBytes;
+      const size_t offset = (idx < kBlockSize) ? 0 : N8 - kBlockSize;
+      const bool at_end = idx >= 2 * kBlockSize;
+      expected_bytes[i] =
+          at_end ? 0 : static_cast<uint8_t>(first_lo + idx + 1 + offset);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(),
+                      CombineShiftRightBytes<kBytes>(hi, lo));
+
+    TestCombineShiftRightBytesR<kBytes - 1>()(t, d);
+#else
+    (void)t;
+    (void)d;
+#endif  // #if HWY_TARGET != HWY_SCALAR
+  }
+};
+
+template <int kLanes>
+struct TestCombineShiftRightLanesR {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+// Scalar does not define CombineShiftRightBytes (needed for *Lanes).
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+    const Repartition<uint8_t, D> d8;
+    const size_t N8 = Lanes(d8);
+    const auto lo = BitCast(d, Iota(d8, 1));
+    const auto hi = BitCast(d, Iota(d8, 1 + N8));
+
+    auto expected = AllocateAligned<T>(Lanes(d));
+
+    uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
+
+    const size_t kBlockSize = 16;
+    for (size_t i = 0; i < N8; ++i) {
+      const size_t block = i / kBlockSize;
+      const size_t lane = i % kBlockSize;
+      const size_t first_lo = block * kBlockSize;
+      const size_t idx = lane + kLanes * sizeof(T);
+      const size_t offset = (idx < kBlockSize) ? 0 : N8 - kBlockSize;
+      const bool at_end = idx >= 2 * kBlockSize;
+      expected_bytes[i] =
+          at_end ? 0 : static_cast<uint8_t>(first_lo + idx + 1 + offset);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(),
+                      CombineShiftRightLanes<kLanes>(hi, lo));
+
+    TestCombineShiftRightBytesR<kLanes - 1>()(t, d);
+#else
+    (void)t;
+    (void)d;
+#endif  // #if HWY_TARGET != HWY_SCALAR
+  }
+};
+
+template <>
+struct TestCombineShiftRightBytesR<0> {
+  template <class T, class D>
+  void operator()(T /*unused*/, D /*unused*/) {}
+};
+
+template <>
+struct TestCombineShiftRightLanesR<0> {
+  template <class T, class D>
+  void operator()(T /*unused*/, D /*unused*/) {}
+};
+
+struct TestCombineShiftRight {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    TestCombineShiftRightBytesR<15>()(t, d);
+    TestCombineShiftRightLanesR<16 / sizeof(T) - 1>()(t, d);
+  }
+};
+
+HWY_NOINLINE void TestAllCombineShiftRight() {
+  ForAllTypes(ForGE128Vectors<TestCombineShiftRight>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(HwyCombineTest);
+HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllLowerHalf);
+HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllUpperHalf);
+HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllZeroExtendVector);
+HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllCombine);
+HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllCombineShiftRight);
+}  // namespace hwy
+#endif
+
+#else
+int main(int, char**) { return 0; }
+#endif  // HWY_TARGET != HWY_RVV
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/compare_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/compare_test.cc
new file mode 100644
index 0000000000..9e7803b87a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/compare_test.cc
@@ -0,0 +1,217 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>  // memset
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/compare_test.cc"
+#include "hwy/foreach_target.h"
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// All types.
+struct TestMask {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+
+    std::fill(lanes.get(), lanes.get() + N, T(0));
+    const auto actual_false = MaskFromVec(Load(d, lanes.get()));
+    HWY_ASSERT_MASK_EQ(d, MaskFalse(d), actual_false);
+
+    memset(lanes.get(), 0xFF, N * sizeof(T));
+    const auto actual_true = MaskFromVec(Load(d, lanes.get()));
+    HWY_ASSERT_MASK_EQ(d, MaskTrue(d), actual_true);
+  }
+};
+
+HWY_NOINLINE void TestAllMask() { ForAllTypes(ForPartialVectors<TestMask>()); }
+
+// All types.
+struct TestEquality {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v2 = Iota(d, 2);
+    const auto v2b = Iota(d, 2);
+    const auto v3 = Iota(d, 3);
+
+    const auto mask_false = MaskFalse(d);
+    const auto mask_true = MaskTrue(d);
+
+    HWY_ASSERT_MASK_EQ(d, mask_false, Eq(v2, v3));
+    HWY_ASSERT_MASK_EQ(d, mask_true, Eq(v2, v2));
+    HWY_ASSERT_MASK_EQ(d, mask_true, Eq(v2, v2b));
+  }
+};
+
+HWY_NOINLINE void TestAllEquality() {
+  ForAllTypes(ForPartialVectors<TestEquality>());
+}
+
+// a > b should be true, verify that for Gt/Lt and with swapped args.
+template <class D>
+void EnsureGreater(D d, TFromD<D> a, TFromD<D> b, const char* file, int line) {
+  const auto mask_false = MaskFalse(d);
+  const auto mask_true = MaskTrue(d);
+
+  const auto va = Set(d, a);
+  const auto vb = Set(d, b);
+  AssertMaskEqual(d, mask_true, Gt(va, vb), file, line);
+  AssertMaskEqual(d, mask_false, Lt(va, vb), file, line);
+
+  // Swapped order
+  AssertMaskEqual(d, mask_false, Gt(vb, va), file, line);
+  AssertMaskEqual(d, mask_true, Lt(vb, va), file, line);
+
+  // Also ensure irreflexive
+  AssertMaskEqual(d, mask_false, Gt(va, va), file, line);
+  AssertMaskEqual(d, mask_false, Gt(vb, vb), file, line);
+  AssertMaskEqual(d, mask_false, Lt(va, va), file, line);
+  AssertMaskEqual(d, mask_false, Lt(vb, vb), file, line);
+}
+
+#define HWY_ENSURE_GREATER(d, a, b) EnsureGreater(d, a, b, __FILE__, __LINE__)
+
+struct TestStrictInt {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const T min = LimitsMin<T>();
+    const T max = LimitsMax<T>();
+    const auto v0 = Zero(d);
+    const auto v2 = And(Iota(d, T(2)), Set(d, 127));  // 0..127
+    const auto vn = Neg(v2) - Set(d, 1);              // -1..-128
+
+    const auto mask_false = MaskFalse(d);
+    const auto mask_true = MaskTrue(d);
+
+    // Individual values of interest
+    HWY_ENSURE_GREATER(d, 2, 1);
+    HWY_ENSURE_GREATER(d, 1, 0);
+    HWY_ENSURE_GREATER(d, 0, -1);
+    HWY_ENSURE_GREATER(d, -1, -2);
+    HWY_ENSURE_GREATER(d, max, max / 2);
+    HWY_ENSURE_GREATER(d, max, 1);
+    HWY_ENSURE_GREATER(d, max, 0);
+    HWY_ENSURE_GREATER(d, max, -1);
+    HWY_ENSURE_GREATER(d, max, min);
+    HWY_ENSURE_GREATER(d, 0, min);
+    HWY_ENSURE_GREATER(d, min / 2, min);
+
+    // Also use Iota to ensure lanes are independent
+    HWY_ASSERT_MASK_EQ(d, mask_true, Gt(v2, vn));
+    HWY_ASSERT_MASK_EQ(d, mask_true, Lt(vn, v2));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, vn));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, v2));
+
+    HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v0, v0));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v2));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Lt(vn, vn));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v0));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v2, v2));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, vn));
+  }
+};
+
+HWY_NOINLINE void TestAllStrictInt() {
+  ForSignedTypes(ForExtendableVectors<TestStrictInt>());
+}
+
+struct TestStrictFloat {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const T huge_neg = -1E35;
+    const T huge_pos = 1E36;
+    const auto v0 = Zero(d);
+    const auto v2 = Iota(d, T(2));
+    const auto vn = Neg(v2);
+
+    const auto mask_false = MaskFalse(d);
+    const auto mask_true = MaskTrue(d);
+
+    // Individual values of interest
+    HWY_ENSURE_GREATER(d, 2, 1);
+    HWY_ENSURE_GREATER(d, 1, 0);
+    HWY_ENSURE_GREATER(d, 0, -1);
+    HWY_ENSURE_GREATER(d, -1, -2);
+    HWY_ENSURE_GREATER(d, huge_pos, 1);
+    HWY_ENSURE_GREATER(d, huge_pos, 0);
+    HWY_ENSURE_GREATER(d, huge_pos, -1);
+    HWY_ENSURE_GREATER(d, huge_pos, huge_neg);
+    HWY_ENSURE_GREATER(d, 0, huge_neg);
+
+    // Also use Iota to ensure lanes are independent
+    HWY_ASSERT_MASK_EQ(d, mask_true, Gt(v2, vn));
+    HWY_ASSERT_MASK_EQ(d, mask_true, Lt(vn, v2));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, vn));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, v2));
+
+    HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v0, v0));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v2));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Lt(vn, vn));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v0));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v2, v2));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, vn));
+  }
+};
+
+HWY_NOINLINE void TestAllStrictFloat() {
+  ForFloatTypes(ForExtendableVectors<TestStrictFloat>());
+}
+
+struct TestWeakFloat {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v2 = Iota(d, 2);
+    const auto vn = Iota(d, -T(Lanes(d)));
+
+    const auto mask_false = MaskFalse(d);
+    const auto mask_true = MaskTrue(d);
+
+    HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, v2));
+    HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, vn));
+
+    HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, vn));
+    HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, v2));
+
+    HWY_ASSERT_MASK_EQ(d, mask_false, Le(v2, vn));
+    HWY_ASSERT_MASK_EQ(d, mask_false, Ge(vn, v2));
+  }
+};
+
+HWY_NOINLINE void TestAllWeakFloat() {
+  ForFloatTypes(ForPartialVectors<TestWeakFloat>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(HwyCompareTest);
+HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllMask);
+HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEquality);
+HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllStrictInt);
+HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllStrictFloat);
+HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllWeakFloat);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/convert_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/convert_test.cc
new file mode 100644
index 0000000000..870955fcaf
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/convert_test.cc
@@ -0,0 +1,568 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/convert_test.cc"
+#include "hwy/foreach_target.h"
+
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Cast and ensure bytes are the same. Called directly from TestAllBitCast or
+// via TestBitCastFrom.
+template <typename ToT>
+struct TestBitCast {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const Repartition<ToT, D> dto;
+    HWY_ASSERT_EQ(Lanes(d) * sizeof(T), Lanes(dto) * sizeof(ToT));
+    const auto vf = Iota(d, 1);
+    const auto vt = BitCast(dto, vf);
+    // Must return the same bits
+    auto from_lanes = AllocateAligned<T>(Lanes(d));
+    auto to_lanes = AllocateAligned<ToT>(Lanes(dto));
+    Store(vf, d, from_lanes.get());
+    Store(vt, dto, to_lanes.get());
+    HWY_ASSERT(
+        BytesEqual(from_lanes.get(), to_lanes.get(), Lanes(d) * sizeof(T)));
+  }
+};
+
+// From D to all types.
+struct TestBitCastFrom {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    TestBitCast<uint8_t>()(t, d);
+    TestBitCast<uint16_t>()(t, d);
+    TestBitCast<uint32_t>()(t, d);
+#if HWY_CAP_INTEGER64
+    TestBitCast<uint64_t>()(t, d);
+#endif
+    TestBitCast<int8_t>()(t, d);
+    TestBitCast<int16_t>()(t, d);
+    TestBitCast<int32_t>()(t, d);
+#if HWY_CAP_INTEGER64
+    TestBitCast<int64_t>()(t, d);
+#endif
+    TestBitCast<float>()(t, d);
+#if HWY_CAP_FLOAT64
+    TestBitCast<double>()(t, d);
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllBitCast() {
+  // For HWY_SCALAR and partial vectors, we can only cast to same-sized types:
+  // the former can't partition its single lane, and the latter can be smaller
+  // than a destination type.
+  const ForPartialVectors<TestBitCast<uint8_t>> to_u8;
+  to_u8(uint8_t());
+  to_u8(int8_t());
+
+  const ForPartialVectors<TestBitCast<int8_t>> to_i8;
+  to_i8(uint8_t());
+  to_i8(int8_t());
+
+  const ForPartialVectors<TestBitCast<uint16_t>> to_u16;
+  to_u16(uint16_t());
+  to_u16(int16_t());
+
+  const ForPartialVectors<TestBitCast<int16_t>> to_i16;
+  to_i16(uint16_t());
+  to_i16(int16_t());
+
+  const ForPartialVectors<TestBitCast<uint32_t>> to_u32;
+  to_u32(uint32_t());
+  to_u32(int32_t());
+  to_u32(float());
+
+  const ForPartialVectors<TestBitCast<int32_t>> to_i32;
+  to_i32(uint32_t());
+  to_i32(int32_t());
+  to_i32(float());
+
+#if HWY_CAP_INTEGER64
+  const ForPartialVectors<TestBitCast<uint64_t>> to_u64;
+  to_u64(uint64_t());
+  to_u64(int64_t());
+#if HWY_CAP_FLOAT64
+  to_u64(double());
+#endif
+
+  const ForPartialVectors<TestBitCast<int64_t>> to_i64;
+  to_i64(uint64_t());
+  to_i64(int64_t());
+#if HWY_CAP_FLOAT64
+  to_i64(double());
+#endif
+#endif  // HWY_CAP_INTEGER64
+
+  const ForPartialVectors<TestBitCast<float>> to_float;
+  to_float(uint32_t());
+  to_float(int32_t());
+  to_float(float());
+
+#if HWY_CAP_FLOAT64
+  const ForPartialVectors<TestBitCast<double>> to_double;
+  to_double(double());
+#if HWY_CAP_INTEGER64
+  to_double(uint64_t());
+  to_double(int64_t());
+#endif  // HWY_CAP_INTEGER64
+#endif  // HWY_CAP_FLOAT64
+
+  // For non-scalar vectors, we can cast all types to all.
+  ForAllTypes(ForGE128Vectors<TestBitCastFrom>());
+}
+
+template <typename ToT>
+struct TestPromoteTo {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D from_d) {
+    static_assert(sizeof(T) < sizeof(ToT), "Input type must be narrower");
+    const Rebind<ToT, D> to_d;
+
+    const size_t N = Lanes(from_d);
+    auto from = AllocateAligned<T>(N);
+    auto expected = AllocateAligned<ToT>(N);
+
+    RandomState rng;
+    for (size_t rep = 0; rep < 200; ++rep) {
+      for (size_t i = 0; i < N; ++i) {
+        const uint64_t bits = rng();
+        memcpy(&from[i], &bits, sizeof(T));
+        expected[i] = from[i];
+      }
+
+      HWY_ASSERT_VEC_EQ(to_d, expected.get(),
+                        PromoteTo(to_d, Load(from_d, from.get())));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllPromoteTo() {
+  const ForPartialVectors<TestPromoteTo<uint16_t>, 2> to_u16div2;
+  to_u16div2(uint8_t());
+
+  const ForPartialVectors<TestPromoteTo<uint32_t>, 4> to_u32div4;
+  to_u32div4(uint8_t());
+
+  const ForPartialVectors<TestPromoteTo<uint32_t>, 2> to_u32div2;
+  to_u32div2(uint16_t());
+
+  const ForPartialVectors<TestPromoteTo<int16_t>, 2> to_i16div2;
+  to_i16div2(uint8_t());
+  to_i16div2(int8_t());
+
+  const ForPartialVectors<TestPromoteTo<int32_t>, 2> to_i32div2;
+  to_i32div2(uint16_t());
+  to_i32div2(int16_t());
+
+  const ForPartialVectors<TestPromoteTo<int32_t>, 4> to_i32div4;
+  to_i32div4(uint8_t());
+  to_i32div4(int8_t());
+
+  // Must test f16 separately because we can only load/store/convert them.
+
+#if HWY_CAP_INTEGER64
+  const ForPartialVectors<TestPromoteTo<uint64_t>, 2> to_u64div2;
+  to_u64div2(uint32_t());
+
+  const ForPartialVectors<TestPromoteTo<int64_t>, 2> to_i64div2;
+  to_i64div2(int32_t());
+#endif
+
+#if HWY_CAP_FLOAT64
+  const ForPartialVectors<TestPromoteTo<double>, 2> to_f64div2;
+  to_f64div2(int32_t());
+  to_f64div2(float());
+#endif
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+bool IsFinite(T t) {
+  return std::isfinite(t);
+}
+// Wrapper avoids calling std::isfinite for integer types (ambiguous).
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+bool IsFinite(T /*unused*/) {
+  return true;
+}
+
+template <typename ToT>
+struct TestDemoteTo {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D from_d) {
+    static_assert(!IsFloat<ToT>(), "Use TestDemoteToFloat for float output");
+    static_assert(sizeof(T) > sizeof(ToT), "Input type must be wider");
+    const Rebind<ToT, D> to_d;
+
+    const size_t N = Lanes(from_d);
+    auto from = AllocateAligned<T>(N);
+    auto expected = AllocateAligned<ToT>(N);
+
+    // Narrower range in the wider type, for clamping before we cast
+    const T min = LimitsMin<ToT>();
+    const T max = LimitsMax<ToT>();
+
+    RandomState rng;
+    for (size_t rep = 0; rep < 1000; ++rep) {
+      for (size_t i = 0; i < N; ++i) {
+        do {
+          const uint64_t bits = rng();
+          memcpy(&from[i], &bits, sizeof(T));
+        } while (!IsFinite(from[i]));
+        expected[i] = static_cast<ToT>(std::min(std::max(min, from[i]), max));
+      }
+
+      HWY_ASSERT_VEC_EQ(to_d, expected.get(),
+                        DemoteTo(to_d, Load(from_d, from.get())));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllDemoteToInt() {
+  ForDemoteVectors<TestDemoteTo<uint8_t>, 2>()(int16_t());
+  ForDemoteVectors<TestDemoteTo<uint8_t>, 4>()(int32_t());
+
+  ForDemoteVectors<TestDemoteTo<int8_t>, 2>()(int16_t());
+  ForDemoteVectors<TestDemoteTo<int8_t>, 4>()(int32_t());
+
+  const ForDemoteVectors<TestDemoteTo<uint16_t>, 2> to_u16;
+  to_u16(int32_t());
+
+  const ForDemoteVectors<TestDemoteTo<int16_t>, 2> to_i16;
+  to_i16(int32_t());
+}
+
+HWY_NOINLINE void TestAllDemoteToMixed() {
+#if HWY_CAP_FLOAT64
+  const ForDemoteVectors<TestDemoteTo<int32_t>, 2> to_i32;
+  to_i32(double());
+#endif
+}
+
+template <typename ToT>
+struct TestDemoteToFloat {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D from_d) {
+    // For floats, we clamp differently and cannot call LimitsMin.
+    static_assert(IsFloat<ToT>(), "Use TestDemoteTo for integer output");
+    static_assert(sizeof(T) > sizeof(ToT), "Input type must be wider");
+    const Rebind<ToT, D> to_d;
+
+    const size_t N = Lanes(from_d);
+    auto from = AllocateAligned<T>(N);
+    auto expected = AllocateAligned<ToT>(N);
+
+    RandomState rng;
+    for (size_t rep = 0; rep < 1000; ++rep) {
+      for (size_t i = 0; i < N; ++i) {
+        do {
+          const uint64_t bits = rng();
+          memcpy(&from[i], &bits, sizeof(T));
+        } while (!IsFinite(from[i]));
+        const T magn = std::abs(from[i]);
+        const T max_abs = HighestValue<ToT>();
+        // NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see
+        // https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html
+        const T clipped = copysign(std::min(magn, max_abs), from[i]);
+        expected[i] = static_cast<ToT>(clipped);
+      }
+
+      HWY_ASSERT_VEC_EQ(to_d, expected.get(),
+                        DemoteTo(to_d, Load(from_d, from.get())));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllDemoteToFloat() {
+  // Must test f16 separately because we can only load/store/convert them.
+
+#if HWY_CAP_FLOAT64
+  const ForDemoteVectors<TestDemoteToFloat<float>, 2> to_float;
+  to_float(double());
+#endif
+}
+
+template <class D>
+AlignedFreeUniquePtr<float[]> F16TestCases(D d, size_t& padded) {
+  const float test_cases[] = {
+      // +/- 1
+      1.0f, -1.0f,
+      // +/- 0
+      0.0f, -0.0f,
+      // near 0
+      0.25f, -0.25f,
+      // +/- integer
+      4.0f, -32.0f,
+      // positive near limit
+      65472.0f, 65504.0f,
+      // negative near limit
+      -65472.0f, -65504.0f,
+      // positive +/- delta
+      2.00390625f, 3.99609375f,
+      // negative +/- delta
+      -2.00390625f, -3.99609375f,
+      // No infinity/NaN - implementation-defined due to ARM.
+  };
+  const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
+  const size_t N = Lanes(d);
+  padded = RoundUpTo(kNumTestCases, N);  // allow loading whole vectors
+  auto in = AllocateAligned<float>(padded);
+  auto expected = AllocateAligned<float>(padded);
+  std::copy(test_cases, test_cases + kNumTestCases, in.get());
+  std::fill(in.get() + kNumTestCases, in.get() + padded, 0.0f);
+  return in;
+}
+
+struct TestF16 {
+  template <typename TF32, class DF32>
+  HWY_NOINLINE void operator()(TF32 /*t*/, DF32 d32) {
+    size_t padded;
+    auto in = F16TestCases(d32, padded);
+    using TF16 = float16_t;
+    const Rebind<TF16, DF32> d16;
+    const size_t N = Lanes(d32);  // same count for f16
+    auto temp16 = AllocateAligned<TF16>(N);
+
+    for (size_t i = 0; i < padded; i += N) {
+      const auto loaded = Load(d32, &in[i]);
+      Store(DemoteTo(d16, loaded), d16, temp16.get());
+      HWY_ASSERT_VEC_EQ(d32, loaded, PromoteTo(d32, Load(d16, temp16.get())));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllF16() { ForDemoteVectors<TestF16, 2>()(float()); }
+
+struct TestConvertU8 {
+  template <typename T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, const D du32) {
+    const Rebind<uint8_t, D> du8;
+    auto lanes8 = AllocateAligned<uint8_t>(Lanes(du8));
+    Store(Iota(du8, 0), du8, lanes8.get());
+    HWY_ASSERT_VEC_EQ(du8, Iota(du8, 0), U8FromU32(Iota(du32, 0)));
+    HWY_ASSERT_VEC_EQ(du8, Iota(du8, 0x7F), U8FromU32(Iota(du32, 0x7F)));
+  }
+};
+
+HWY_NOINLINE void TestAllConvertU8() {
+  ForDemoteVectors<TestConvertU8, 4>()(uint32_t());
+}
+
+// Separate function to attempt to work around a compiler bug on ARM: when this
+// is merged with TestIntFromFloat, outputs match a previous Iota(-(N+1)) input.
+struct TestIntFromFloatHuge {
+  template <typename TF, class DF>
+  HWY_NOINLINE void operator()(TF /*unused*/, const DF df) {
+    // Still does not work, although ARMv7 manual says that float->int
+    // saturates, i.e. chooses the nearest representable value.
+#if HWY_TARGET != HWY_NEON
+    using TI = MakeSigned<TF>;
+    const Rebind<TI, DF> di;
+
+    // Huge positive (lvalue works around GCC bug, tested with 10.2.1, where
+    // the expected i32 value is otherwise 0x80..00).
+    const auto expected_max = Set(di, LimitsMax<TI>());
+    HWY_ASSERT_VEC_EQ(di, expected_max, ConvertTo(di, Set(df, TF(1E20))));
+
+    // Huge negative (also lvalue for safety, but GCC bug was not triggered)
+    const auto expected_min = Set(di, LimitsMin<TI>());
+    HWY_ASSERT_VEC_EQ(di, expected_min, ConvertTo(di, Set(df, TF(-1E20))));
+#else
+    (void)df;
+#endif
+  }
+};
+
+struct TestIntFromFloat {
+  template <typename TF, class DF>
+  HWY_NOINLINE void operator()(TF /*unused*/, const DF df) {
+    using TI = MakeSigned<TF>;
+    const Rebind<TI, DF> di;
+    const size_t N = Lanes(df);
+
+    // Integer positive
+    HWY_ASSERT_VEC_EQ(di, Iota(di, TI(4)), ConvertTo(di, Iota(df, TF(4.0))));
+
+    // Integer negative
+    HWY_ASSERT_VEC_EQ(di, Iota(di, -TI(N)), ConvertTo(di, Iota(df, -TF(N))));
+
+    // Above positive
+    HWY_ASSERT_VEC_EQ(di, Iota(di, TI(2)), ConvertTo(di, Iota(df, TF(2.001))));
+
+    // Below positive
+    HWY_ASSERT_VEC_EQ(di, Iota(di, TI(3)), ConvertTo(di, Iota(df, TF(3.9999))));
+
+    const TF eps = static_cast<TF>(0.0001);
+    // Above negative
+    HWY_ASSERT_VEC_EQ(di, Iota(di, -TI(N)),
+                      ConvertTo(di, Iota(df, -TF(N + 1) + eps)));
+
+    // Below negative
+    HWY_ASSERT_VEC_EQ(di, Iota(di, -TI(N + 1)),
+                      ConvertTo(di, Iota(df, -TF(N + 1) - eps)));
+
+    // TF does not have enough precision to represent TI.
+    const double min = static_cast<double>(LimitsMin<TI>());
+    const double max = static_cast<double>(LimitsMax<TI>());
+
+    // Also check random values.
+    auto from = AllocateAligned<TF>(N);
+    auto expected = AllocateAligned<TI>(N);
+    RandomState rng;
+    for (size_t rep = 0; rep < 1000; ++rep) {
+      for (size_t i = 0; i < N; ++i) {
+        do {
+          const uint64_t bits = rng();
+          memcpy(&from[i], &bits, sizeof(TF));
+        } while (!std::isfinite(from[i]));
+        if (from[i] >= max) {
+          expected[i] = LimitsMax<TI>();
+        } else if (from[i] <= min) {
+          expected[i] = LimitsMin<TI>();
+        } else {
+          expected[i] = static_cast<TI>(from[i]);
+        }
+      }
+
+      HWY_ASSERT_VEC_EQ(di, expected.get(),
+                        ConvertTo(di, Load(df, from.get())));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllIntFromFloat() {
+  ForFloatTypes(ForPartialVectors<TestIntFromFloatHuge>());
+  ForFloatTypes(ForPartialVectors<TestIntFromFloat>());
+}
+
+struct TestFloatFromInt {
+  template <typename TI, class DI>
+  HWY_NOINLINE void operator()(TI /*unused*/, const DI di) {
+    using TF = MakeFloat<TI>;
+    const Rebind<TF, DI> df;
+    const size_t N = Lanes(df);
+
+    // Integer positive
+    HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4.0)), ConvertTo(df, Iota(di, TI(4))));
+
+    // Integer negative
+    HWY_ASSERT_VEC_EQ(df, Iota(df, -TF(N)), ConvertTo(df, Iota(di, -TI(N))));
+
+    // Max positive
+    HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMax<TI>())),
+                      ConvertTo(df, Set(di, LimitsMax<TI>())));
+
+    // Min negative
+    HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMin<TI>())),
+                      ConvertTo(df, Set(di, LimitsMin<TI>())));
+  }
+};
+
+HWY_NOINLINE void TestAllFloatFromInt() {
+  ForPartialVectors<TestFloatFromInt>()(int32_t());
+#if HWY_CAP_FLOAT64 && HWY_CAP_INTEGER64
+  ForPartialVectors<TestFloatFromInt>()(int64_t());
+#endif
+}
+
+struct TestI32F64 {
+  template <typename TF, class DF>
+  HWY_NOINLINE void operator()(TF /*unused*/, const DF df) {
+    using TI = int32_t;
+    const Rebind<TI, DF> di;
+    const size_t N = Lanes(df);
+
+    // Integer positive
+    HWY_ASSERT_VEC_EQ(di, Iota(di, TI(4)), DemoteTo(di, Iota(df, TF(4.0))));
+    HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4.0)), PromoteTo(df, Iota(di, TI(4))));
+
+    // Integer negative
+    HWY_ASSERT_VEC_EQ(di, Iota(di, -TI(N)), DemoteTo(di, Iota(df, -TF(N))));
+    HWY_ASSERT_VEC_EQ(df, Iota(df, -TF(N)), PromoteTo(df, Iota(di, -TI(N))));
+
+    // Above positive
+    HWY_ASSERT_VEC_EQ(di, Iota(di, TI(2)), DemoteTo(di, Iota(df, TF(2.001))));
+    HWY_ASSERT_VEC_EQ(df, Iota(df, TF(2.0)), PromoteTo(df, Iota(di, TI(2))));
+
+    // Below positive
+    HWY_ASSERT_VEC_EQ(di, Iota(di, TI(3)), DemoteTo(di, Iota(df, TF(3.9999))));
+    HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4.0)), PromoteTo(df, Iota(di, TI(4))));
+
+    const TF eps = static_cast<TF>(0.0001);
+    // Above negative
+    HWY_ASSERT_VEC_EQ(di, Iota(di, -TI(N)),
+                      DemoteTo(di, Iota(df, -TF(N + 1) + eps)));
+    HWY_ASSERT_VEC_EQ(df, Iota(df, TF(-4.0)), PromoteTo(df, Iota(di, TI(-4))));
+
+    // Below negative
+    HWY_ASSERT_VEC_EQ(di, Iota(di, -TI(N + 1)),
+                      DemoteTo(di, Iota(df, -TF(N + 1) - eps)));
+    HWY_ASSERT_VEC_EQ(df, Iota(df, TF(-2.0)), PromoteTo(df, Iota(di, TI(-2))));
+
+    // Huge positive float
+    HWY_ASSERT_VEC_EQ(di, Set(di, LimitsMax<TI>()),
+                      DemoteTo(di, Set(df, TF(1E12))));
+
+    // Huge negative float
+    HWY_ASSERT_VEC_EQ(di, Set(di, LimitsMin<TI>()),
+                      DemoteTo(di, Set(df, TF(-1E12))));
+
+    // Max positive int
+    HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMax<TI>())),
+                      PromoteTo(df, Set(di, LimitsMax<TI>())));
+
+    // Min negative int
+    HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMin<TI>())),
+                      PromoteTo(df, Set(di, LimitsMin<TI>())));
+  }
+};
+
+HWY_NOINLINE void TestAllI32F64() {
+#if HWY_CAP_FLOAT64
+  ForDemoteVectors<TestI32F64, 2>()(double());
+#endif
+}
+
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(HwyConvertTest);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllBitCast);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllPromoteTo);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllDemoteToInt);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllDemoteToMixed);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllDemoteToFloat);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllF16);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllConvertU8);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllIntFromFloat);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllFloatFromInt);
+HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllI32F64);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/list_targets.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/list_targets.cc
new file mode 100644
index 0000000000..4b0cdcedd2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/list_targets.cc
@@ -0,0 +1,34 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Simple tool to print the list of targets that were compiled in when building
+// this tool.
+
+#include <stdio.h>
+
+#include "hwy/highway.h"
+
+void PrintTargets(const char* msg, uint32_t targets) {
+  fprintf(stderr, "%s", msg);
+  for (unsigned x = targets; x != 0; x = x & (x - 1)) {
+    fprintf(stderr, " %s", hwy::TargetName(x & (~x + 1)));
+  }
+  fprintf(stderr, "\n");
+}
+
+int main() {
+  PrintTargets("Compiled HWY_TARGETS:", HWY_TARGETS);
+  PrintTargets("HWY_BASELINE_TARGETS:", HWY_BASELINE_TARGETS);
+  return 0;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/logical_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/logical_test.cc
new file mode 100644
index 0000000000..c409c1117d
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/logical_test.cc
@@ -0,0 +1,730 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>  // memcmp
+
+#include "hwy/base.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/logical_test.cc"
+#include "hwy/foreach_target.h"
+
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct TestLogicalInteger {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vi = Iota(d, 0);
+    const auto ones = VecFromMask(d, Eq(v0, v0));
+    const auto v1 = Set(d, 1);
+    const auto vnot1 = Set(d, ~T(1));
+
+    HWY_ASSERT_VEC_EQ(d, v0, Not(ones));
+    HWY_ASSERT_VEC_EQ(d, ones, Not(v0));
+    HWY_ASSERT_VEC_EQ(d, v1, Not(vnot1));
+    HWY_ASSERT_VEC_EQ(d, vnot1, Not(v1));
+
+    HWY_ASSERT_VEC_EQ(d, v0, And(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, v0, And(vi, v0));
+    HWY_ASSERT_VEC_EQ(d, vi, And(vi, vi));
+
+    HWY_ASSERT_VEC_EQ(d, vi, Or(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, vi, Or(vi, v0));
+    HWY_ASSERT_VEC_EQ(d, vi, Or(vi, vi));
+
+    HWY_ASSERT_VEC_EQ(d, vi, Xor(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, vi, Xor(vi, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, Xor(vi, vi));
+
+    HWY_ASSERT_VEC_EQ(d, vi, AndNot(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, v0, AndNot(vi, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, AndNot(vi, vi));
+
+    auto v = vi;
+    v = And(v, vi);
+    HWY_ASSERT_VEC_EQ(d, vi, v);
+    v = And(v, v0);
+    HWY_ASSERT_VEC_EQ(d, v0, v);
+
+    v = Or(v, vi);
+    HWY_ASSERT_VEC_EQ(d, vi, v);
+    v = Or(v, v0);
+    HWY_ASSERT_VEC_EQ(d, vi, v);
+
+    v = Xor(v, vi);
+    HWY_ASSERT_VEC_EQ(d, v0, v);
+    v = Xor(v, v0);
+    HWY_ASSERT_VEC_EQ(d, v0, v);
+  }
+};
+
+HWY_NOINLINE void TestAllLogicalInteger() {
+  ForIntegerTypes(ForPartialVectors<TestLogicalInteger>());
+}
+
+struct TestLogicalFloat {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vi = Iota(d, 0);
+
+    HWY_ASSERT_VEC_EQ(d, v0, And(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, v0, And(vi, v0));
+    HWY_ASSERT_VEC_EQ(d, vi, And(vi, vi));
+
+    HWY_ASSERT_VEC_EQ(d, vi, Or(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, vi, Or(vi, v0));
+    HWY_ASSERT_VEC_EQ(d, vi, Or(vi, vi));
+
+    HWY_ASSERT_VEC_EQ(d, vi, Xor(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, vi, Xor(vi, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, Xor(vi, vi));
+
+    HWY_ASSERT_VEC_EQ(d, vi, AndNot(v0, vi));
+    HWY_ASSERT_VEC_EQ(d, v0, AndNot(vi, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, AndNot(vi, vi));
+
+    auto v = vi;
+    v = And(v, vi);
+    HWY_ASSERT_VEC_EQ(d, vi, v);
+    v = And(v, v0);
+    HWY_ASSERT_VEC_EQ(d, v0, v);
+
+    v = Or(v, vi);
+    HWY_ASSERT_VEC_EQ(d, vi, v);
+    v = Or(v, v0);
+    HWY_ASSERT_VEC_EQ(d, vi, v);
+
+    v = Xor(v, vi);
+    HWY_ASSERT_VEC_EQ(d, v0, v);
+    v = Xor(v, v0);
+    HWY_ASSERT_VEC_EQ(d, v0, v);
+  }
+};
+
+HWY_NOINLINE void TestAllLogicalFloat() {
+  ForFloatTypes(ForPartialVectors<TestLogicalFloat>());
+}
+
+struct TestCopySign {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vp = Iota(d, 1);
+    const auto vn = Iota(d, T(-1E5));  // assumes N < 10^5
+
+    // Zero remains zero regardless of sign
+    HWY_ASSERT_VEC_EQ(d, v0, CopySign(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, CopySign(v0, vp));
+    HWY_ASSERT_VEC_EQ(d, v0, CopySign(v0, vn));
+    HWY_ASSERT_VEC_EQ(d, v0, CopySignToAbs(v0, v0));
+    HWY_ASSERT_VEC_EQ(d, v0, CopySignToAbs(v0, vp));
+    HWY_ASSERT_VEC_EQ(d, v0, CopySignToAbs(v0, vn));
+
+    // Positive input, positive sign => unchanged
+    HWY_ASSERT_VEC_EQ(d, vp, CopySign(vp, vp));
+    HWY_ASSERT_VEC_EQ(d, vp, CopySignToAbs(vp, vp));
+
+    // Positive input, negative sign => negated
+    HWY_ASSERT_VEC_EQ(d, Neg(vp), CopySign(vp, vn));
+    HWY_ASSERT_VEC_EQ(d, Neg(vp), CopySignToAbs(vp, vn));
+
+    // Negative input, negative sign => unchanged
+    HWY_ASSERT_VEC_EQ(d, vn, CopySign(vn, vn));
+
+    // Negative input, positive sign => negated
+    HWY_ASSERT_VEC_EQ(d, Neg(vn), CopySign(vn, vp));
+  }
+};
+
+HWY_NOINLINE void TestAllCopySign() {
+  ForFloatTypes(ForPartialVectors<TestCopySign>());
+}
+
+struct TestFirstN {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    auto mask_lanes = AllocateAligned<T>(N);
+
+    // GCC workaround: we previously used zero to indicate true because we can
+    // safely compare with that value. However, that hits an ICE for u64x1 on
+    // GCC 8.3 but not 8.4, even if the implementation of operator== is
+    // simplified to return zero. Using MaskFromVec avoids this, and requires
+    // FF..FF and 0 constants.
+    T on;
+    memset(&on, 0xFF, sizeof(on));
+    const T off = 0;
+
+    for (size_t len = 0; len <= N; ++len) {
+      for (size_t i = 0; i < N; ++i) {
+        mask_lanes[i] = i < len ? on : off;
+      }
+      const auto mask_vals = Load(d, mask_lanes.get());
+      const auto mask = MaskFromVec(mask_vals);
+      HWY_ASSERT_MASK_EQ(d, mask, FirstN(d, len));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllFirstN() {
+  ForAllTypes(ForPartialVectors<TestFirstN>());
+}
+
+struct TestIfThenElse {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    RandomState rng;
+
+    const size_t N = Lanes(d);
+    auto in1 = AllocateAligned<T>(N);
+    auto in2 = AllocateAligned<T>(N);
+    auto mask_lanes = AllocateAligned<T>(N);
+    auto expected = AllocateAligned<T>(N);
+
+    // NOTE: reverse polarity (mask is true iff lane == 0) because we cannot
+    // reliably compare against all bits set (NaN for float types).
+    const T off = 1;
+
+    // Each lane should have a chance of having mask=true.
+    for (size_t rep = 0; rep < 50; ++rep) {
+      for (size_t i = 0; i < N; ++i) {
+        in1[i] = static_cast<T>(Random32(&rng));
+        in2[i] = static_cast<T>(Random32(&rng));
+        mask_lanes[i] = (Random32(&rng) & 1024) ? off : T(0);
+      }
+
+      const auto v1 = Load(d, in1.get());
+      const auto v2 = Load(d, in2.get());
+      const auto mask = Eq(Load(d, mask_lanes.get()), Zero(d));
+
+      for (size_t i = 0; i < N; ++i) {
+        expected[i] = (mask_lanes[i] == off) ? in2[i] : in1[i];
+      }
+      HWY_ASSERT_VEC_EQ(d, expected.get(), IfThenElse(mask, v1, v2));
+
+      for (size_t i = 0; i < N; ++i) {
+        expected[i] = mask_lanes[i] ? T(0) : in1[i];
+      }
+      HWY_ASSERT_VEC_EQ(d, expected.get(), IfThenElseZero(mask, v1));
+
+      for (size_t i = 0; i < N; ++i) {
+        expected[i] = mask_lanes[i] ? in2[i] : T(0);
+      }
+      HWY_ASSERT_VEC_EQ(d, expected.get(), IfThenZeroElse(mask, v2));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllIfThenElse() {
+  ForAllTypes(ForPartialVectors<TestIfThenElse>());
+}
+
+struct TestMaskVec {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    RandomState rng;
+
+    const size_t N = Lanes(d);
+    auto mask_lanes = AllocateAligned<T>(N);
+
+    // Each lane should have a chance of having mask=true.
+    for (size_t rep = 0; rep < 100; ++rep) {
+      for (size_t i = 0; i < N; ++i) {
+        mask_lanes[i] = static_cast<T>(Random32(&rng) & 1);
+      }
+
+      const auto mask = RebindMask(d, Eq(Load(d, mask_lanes.get()), Zero(d)));
+      HWY_ASSERT_MASK_EQ(d, mask, MaskFromVec(VecFromMask(d, mask)));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllMaskVec() {
+  const ForPartialVectors<TestMaskVec> test;
+
+  test(uint16_t());
+  test(int16_t());
+  // TODO(janwas): float16_t - cannot compare yet
+
+  test(uint32_t());
+  test(int32_t());
+  test(float());
+
+#if HWY_CAP_INTEGER64
+  test(uint64_t());
+  test(int64_t());
+#endif
+#if HWY_CAP_FLOAT64
+  test(double());
+#endif
+}
+
+struct TestCompress {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    RandomState rng;
+
+    using TU = MakeUnsigned<T>;
+    const Rebind<TU, D> du;
+    const size_t N = Lanes(d);
+    auto in_lanes = AllocateAligned<T>(N);
+    auto mask_lanes = AllocateAligned<TU>(N);
+    auto expected = AllocateAligned<T>(N);
+    auto actual = AllocateAligned<T>(N);
+
+    // Each lane should have a chance of having mask=true.
+    for (size_t rep = 0; rep < 100; ++rep) {
+      size_t expected_pos = 0;
+      for (size_t i = 0; i < N; ++i) {
+        const uint64_t bits = Random32(&rng);
+        in_lanes[i] = T();  // cannot initialize float16_t directly.
+        CopyBytes<sizeof(T)>(&bits, &in_lanes[i]);
+        mask_lanes[i] = static_cast<TU>(Random32(&rng) & 1);
+        if (mask_lanes[i] == 0) {  // Zero means true (easier to compare)
+          expected[expected_pos++] = in_lanes[i];
+        }
+      }
+
+      const auto in = Load(d, in_lanes.get());
+      const auto mask = RebindMask(d, Eq(Load(du, mask_lanes.get()), Zero(du)));
+
+      Store(Compress(in, mask), d, actual.get());
+      // Upper lanes are undefined.
+      for (size_t i = 0; i < expected_pos; ++i) {
+        HWY_ASSERT(memcmp(&actual[i], &expected[i], sizeof(T)) == 0);
+      }
+
+      // Also check CompressStore in the same way.
+      memset(actual.get(), 0, N * sizeof(T));
+      const size_t num_written = CompressStore(in, mask, d, actual.get());
+      HWY_ASSERT_EQ(expected_pos, num_written);
+      for (size_t i = 0; i < expected_pos; ++i) {
+        HWY_ASSERT(memcmp(&actual[i], &expected[i], sizeof(T)) == 0);
+      }
+    }
+  }
+};
+
+#if 0
+namespace detail {  // for code folding
+void PrintCompress16x8Tables() {
+  constexpr size_t N = 8;  // 128-bit SIMD
+  for (uint64_t code = 0; code < 1ull << N; ++code) {
+    std::array<uint8_t, N> indices{0};
+    size_t pos = 0;
+    for (size_t i = 0; i < N; ++i) {
+      if (code & (1ull << i)) {
+        indices[pos++] = i;
+      }
+    }
+
+    // Doubled (for converting lane to byte indices)
+    for (size_t i = 0; i < N; ++i) {
+      printf("%d,", 2 * indices[i]);
+    }
+  }
+  printf("\n");
+}
+
+// Compressed to nibbles
+void PrintCompress32x8Tables() {
+  constexpr size_t N = 8;  // AVX2
+  for (uint64_t code = 0; code < 1ull << N; ++code) {
+    std::array<uint32_t, N> indices{0};
+    size_t pos = 0;
+    for (size_t i = 0; i < N; ++i) {
+      if (code & (1ull << i)) {
+        indices[pos++] = i;
+      }
+    }
+
+    // Convert to nibbles
+    uint64_t packed = 0;
+    for (size_t i = 0; i < N; ++i) {
+      HWY_ASSERT(indices[i] < 16);
+      packed += indices[i] << (i * 4);
+    }
+
+    HWY_ASSERT(packed < (1ull << 32));
+    printf("0x%08x,", static_cast<uint32_t>(packed));
+  }
+  printf("\n");
+}
+
+// Pairs of 32-bit lane indices
+void PrintCompress64x4Tables() {
+  constexpr size_t N = 4;  // AVX2
+  for (uint64_t code = 0; code < 1ull << N; ++code) {
+    std::array<uint32_t, N> indices{0};
+    size_t pos = 0;
+    for (size_t i = 0; i < N; ++i) {
+      if (code & (1ull << i)) {
+        indices[pos++] = i;
+      }
+    }
+
+    for (size_t i = 0; i < N; ++i) {
+      printf("%d,%d,", 2 * indices[i], 2 * indices[i] + 1);
+    }
+  }
+  printf("\n");
+}
+
+// 4-tuple of byte indices
+void PrintCompress32x4Tables() {
+  using T = uint32_t;
+  constexpr size_t N = 4;  // SSE4
+  for (uint64_t code = 0; code < 1ull << N; ++code) {
+    std::array<uint32_t, N> indices{0};
+    size_t pos = 0;
+    for (size_t i = 0; i < N; ++i) {
+      if (code & (1ull << i)) {
+        indices[pos++] = i;
+      }
+    }
+
+    for (size_t i = 0; i < N; ++i) {
+      for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) {
+        printf("%zu,", sizeof(T) * indices[i] + idx_byte);
+      }
+    }
+  }
+  printf("\n");
+}
+
+// 8-tuple of byte indices
+void PrintCompress64x2Tables() {
+  using T = uint64_t;
+  constexpr size_t N = 2;  // SSE4
+  for (uint64_t code = 0; code < 1ull << N; ++code) {
+    std::array<uint32_t, N> indices{0};
+    size_t pos = 0;
+    for (size_t i = 0; i < N; ++i) {
+      if (code & (1ull << i)) {
+        indices[pos++] = i;
+      }
+    }
+
+    for (size_t i = 0; i < N; ++i) {
+      for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) {
+        printf("%zu,", sizeof(T) * indices[i] + idx_byte);
+      }
+    }
+  }
+  printf("\n");
+}
+}  // namespace detail
+#endif
+
+HWY_NOINLINE void TestAllCompress() {
+  // detail::PrintCompress32x8Tables();
+  // detail::PrintCompress64x4Tables();
+  // detail::PrintCompress32x4Tables();
+  // detail::PrintCompress64x2Tables();
+  // detail::PrintCompress16x8Tables();
+
+  const ForPartialVectors<TestCompress> test;
+
+  test(uint16_t());
+  test(int16_t());
+  test(float16_t());
+
+  test(uint32_t());
+  test(int32_t());
+  test(float());
+
+#if HWY_CAP_INTEGER64
+  test(uint64_t());
+  test(int64_t());
+#endif
+#if HWY_CAP_FLOAT64
+  test(double());
+#endif
+}
+
+struct TestZeroIfNegative {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v0 = Zero(d);
+    const auto vp = Iota(d, 1);
+    const auto vn = Iota(d, T(-1E5));  // assumes N < 10^5
+
+    // Zero and positive remain unchanged
+    HWY_ASSERT_VEC_EQ(d, v0, ZeroIfNegative(v0));
+    HWY_ASSERT_VEC_EQ(d, vp, ZeroIfNegative(vp));
+
+    // Negative are all replaced with zero
+    HWY_ASSERT_VEC_EQ(d, v0, ZeroIfNegative(vn));
+  }
+};
+
+HWY_NOINLINE void TestAllZeroIfNegative() {
+  ForFloatTypes(ForPartialVectors<TestZeroIfNegative>());
+}
+
+struct TestBroadcastSignBit {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto s0 = Zero(d);
+    const auto s1 = Set(d, -1);  // all bit set
+    const auto vpos = And(Iota(d, 0), Set(d, LimitsMax<T>()));
+    const auto vneg = s1 - vpos;
+
+    HWY_ASSERT_VEC_EQ(d, s0, BroadcastSignBit(vpos));
+    HWY_ASSERT_VEC_EQ(d, s0, BroadcastSignBit(Set(d, LimitsMax<T>())));
+
+    HWY_ASSERT_VEC_EQ(d, s1, BroadcastSignBit(vneg));
+    HWY_ASSERT_VEC_EQ(d, s1, BroadcastSignBit(Set(d, LimitsMin<T>())));
+    HWY_ASSERT_VEC_EQ(d, s1, BroadcastSignBit(Set(d, LimitsMin<T>() / 2)));
+  }
+};
+
+HWY_NOINLINE void TestAllBroadcastSignBit() {
+  ForSignedTypes(ForPartialVectors<TestBroadcastSignBit>());
+}
+
+struct TestTestBit {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t kNumBits = sizeof(T) * 8;
+    for (size_t i = 0; i < kNumBits; ++i) {
+      const auto bit1 = Set(d, 1ull << i);
+      const auto bit2 = Set(d, 1ull << ((i + 1) % kNumBits));
+      const auto bit3 = Set(d, 1ull << ((i + 2) % kNumBits));
+      const auto bits12 = Or(bit1, bit2);
+      const auto bits23 = Or(bit2, bit3);
+      HWY_ASSERT(AllTrue(TestBit(bit1, bit1)));
+      HWY_ASSERT(AllTrue(TestBit(bits12, bit1)));
+      HWY_ASSERT(AllTrue(TestBit(bits12, bit2)));
+
+      HWY_ASSERT(AllFalse(TestBit(bits12, bit3)));
+      HWY_ASSERT(AllFalse(TestBit(bits23, bit1)));
+      HWY_ASSERT(AllFalse(TestBit(bit1, bit2)));
+      HWY_ASSERT(AllFalse(TestBit(bit2, bit1)));
+      HWY_ASSERT(AllFalse(TestBit(bit1, bit3)));
+      HWY_ASSERT(AllFalse(TestBit(bit3, bit1)));
+      HWY_ASSERT(AllFalse(TestBit(bit2, bit3)));
+      HWY_ASSERT(AllFalse(TestBit(bit3, bit2)));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllTestBit() {
+  ForIntegerTypes(ForPartialVectors<TestTestBit>());
+}
+
+struct TestAllTrueFalse {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto zero = Zero(d);
+    auto v = zero;
+
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    std::fill(lanes.get(), lanes.get() + N, T(0));
+
+    auto mask_lanes = AllocateAligned<T>(N);
+
+    HWY_ASSERT(AllTrue(Eq(v, zero)));
+    HWY_ASSERT(!AllFalse(Eq(v, zero)));
+
+    // Single lane implies AllFalse = !AllTrue. Otherwise, there are multiple
+    // lanes and one is nonzero.
+    const bool expected_all_false = (N != 1);
+
+    // Set each lane to nonzero and back to zero
+    for (size_t i = 0; i < N; ++i) {
+      lanes[i] = T(1);
+      v = Load(d, lanes.get());
+
+      // GCC 10.2.1 workaround: AllTrue(Eq(v, zero)) is true but should not be.
+      // Assigning to an lvalue is insufficient but storing to memory prevents
+      // the bug; so does Print of VecFromMask(d, Eq(v, zero)).
+      Store(VecFromMask(d, Eq(v, zero)), d, mask_lanes.get());
+      HWY_ASSERT(!AllTrue(MaskFromVec(Load(d, mask_lanes.get()))));
+
+      HWY_ASSERT(expected_all_false ^ AllFalse(Eq(v, zero)));
+
+      lanes[i] = T(-1);
+      v = Load(d, lanes.get());
+      HWY_ASSERT(!AllTrue(Eq(v, zero)));
+      HWY_ASSERT(expected_all_false ^ AllFalse(Eq(v, zero)));
+
+      // Reset to all zero
+      lanes[i] = T(0);
+      v = Load(d, lanes.get());
+      HWY_ASSERT(AllTrue(Eq(v, zero)));
+      HWY_ASSERT(!AllFalse(Eq(v, zero)));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllAllTrueFalse() {
+  ForAllTypes(ForPartialVectors<TestAllTrueFalse>());
+}
+
+class TestStoreMaskBits {
+ public:
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*t*/, D d) {
+    // TODO(janwas): remove once implemented (cast or vse1)
+#if HWY_TARGET != HWY_RVV
+    RandomState rng;
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    const size_t expected_bytes = (N + 7) / 8;
+    auto bits = AllocateAligned<uint8_t>(expected_bytes);
+
+    for (size_t rep = 0; rep < 100; ++rep) {
+      // Generate random mask pattern.
+      for (size_t i = 0; i < N; ++i) {
+        lanes[i] = static_cast<T>((rng() & 1024) ? 1 : 0);
+      }
+      const auto mask = Load(d, lanes.get()) == Zero(d);
+
+      const size_t bytes_written = StoreMaskBits(mask, bits.get());
+
+      HWY_ASSERT_EQ(expected_bytes, bytes_written);
+      size_t i = 0;
+      // Stored bits must match original mask
+      for (; i < N; ++i) {
+        const bool bit = (bits[i / 8] & (1 << (i % 8))) != 0;
+        HWY_ASSERT_EQ(bit, lanes[i] == 0);
+      }
+      // Any partial bits in the last byte must be zero
+      for (; i < 8 * bytes_written; ++i) {
+        const int bit = (bits[i / 8] & (1 << (i % 8)));
+        HWY_ASSERT_EQ(bit, 0);
+      }
+    }
+#else
+    (void)d;
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllStoreMaskBits() {
+  ForAllTypes(ForPartialVectors<TestStoreMaskBits>());
+}
+
+struct TestCountTrue {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    // For all combinations of zero/nonzero state of subset of lanes:
+    const size_t max_lanes = std::min(N, size_t(10));
+
+    auto lanes = AllocateAligned<T>(N);
+    std::fill(lanes.get(), lanes.get() + N, T(1));
+
+    for (size_t code = 0; code < (1ull << max_lanes); ++code) {
+      // Number of zeros written = number of mask lanes that are true.
+      size_t expected = 0;
+      for (size_t i = 0; i < max_lanes; ++i) {
+        lanes[i] = T(1);
+        if (code & (1ull << i)) {
+          ++expected;
+          lanes[i] = T(0);
+        }
+      }
+
+      const auto mask = Eq(Load(d, lanes.get()), Zero(d));
+      const size_t actual = CountTrue(mask);
+      HWY_ASSERT_EQ(expected, actual);
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllCountTrue() {
+  ForAllTypes(ForPartialVectors<TestCountTrue>());
+}
+
+struct TestLogicalMask {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto m0 = MaskFalse(d);
+    const auto m_all = MaskTrue(d);
+
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    std::fill(lanes.get(), lanes.get() + N, T(1));
+
+    HWY_ASSERT_MASK_EQ(d, m0, Not(m_all));
+    HWY_ASSERT_MASK_EQ(d, m_all, Not(m0));
+
+    // For all combinations of zero/nonzero state of subset of lanes:
+    const size_t max_lanes = std::min(N, size_t(6));
+    for (size_t code = 0; code < (1ull << max_lanes); ++code) {
+      for (size_t i = 0; i < max_lanes; ++i) {
+        lanes[i] = T(1);
+        if (code & (1ull << i)) {
+          lanes[i] = T(0);
+        }
+      }
+
+      const auto m = Eq(Load(d, lanes.get()), Zero(d));
+
+      HWY_ASSERT_MASK_EQ(d, m0, Xor(m, m));
+      HWY_ASSERT_MASK_EQ(d, m0, AndNot(m, m));
+      HWY_ASSERT_MASK_EQ(d, m0, AndNot(m_all, m));
+
+      HWY_ASSERT_MASK_EQ(d, m, Or(m, m));
+      HWY_ASSERT_MASK_EQ(d, m, Or(m0, m));
+      HWY_ASSERT_MASK_EQ(d, m, Or(m, m0));
+      HWY_ASSERT_MASK_EQ(d, m, Xor(m0, m));
+      HWY_ASSERT_MASK_EQ(d, m, Xor(m, m0));
+      HWY_ASSERT_MASK_EQ(d, m, And(m, m));
+      HWY_ASSERT_MASK_EQ(d, m, And(m_all, m));
+      HWY_ASSERT_MASK_EQ(d, m, And(m, m_all));
+      HWY_ASSERT_MASK_EQ(d, m, AndNot(m0, m));
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllLogicalMask() {
+  ForAllTypes(ForPartialVectors<TestLogicalMask>());
+}
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(HwyLogicalTest);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllLogicalInteger);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllLogicalFloat);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllCopySign);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllFirstN);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllIfThenElse);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllMaskVec);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllCompress);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllZeroIfNegative);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllBroadcastSignBit);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllTestBit);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllAllTrueFalse);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllStoreMaskBits);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllCountTrue);
+HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllLogicalMask);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/memory_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/memory_test.cc
new file mode 100644
index 0000000000..5303e0d060
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/memory_test.cc
@@ -0,0 +1,421 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Ensure incompabilities with Windows macros (e.g. #define StoreFence) are
+// detected. Must come before Highway headers.
+#if defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/memory_test.cc"
+#include "hwy/cache_control.h"
+#include "hwy/foreach_target.h"
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct TestLoadStore {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    const auto hi = Iota(d, 1 + N);
+    const auto lo = Iota(d, 1);
+    auto lanes = AllocateAligned<T>(2 * N);
+    Store(hi, d, &lanes[N]);
+    Store(lo, d, &lanes[0]);
+
+    // Aligned load
+    const auto lo2 = Load(d, &lanes[0]);
+    HWY_ASSERT_VEC_EQ(d, lo2, lo);
+
+    // Aligned store
+    auto lanes2 = AllocateAligned<T>(2 * N);
+    Store(lo2, d, &lanes2[0]);
+    Store(hi, d, &lanes2[N]);
+    for (size_t i = 0; i < 2 * N; ++i) {
+      HWY_ASSERT_EQ(lanes[i], lanes2[i]);
+    }
+
+    // Unaligned load
+    const auto vu = LoadU(d, &lanes[1]);
+    auto lanes3 = AllocateAligned<T>(N);
+    Store(vu, d, lanes3.get());
+    for (size_t i = 0; i < N; ++i) {
+      HWY_ASSERT_EQ(T(i + 2), lanes3[i]);
+    }
+
+    // Unaligned store
+    StoreU(lo2, d, &lanes2[N / 2]);
+    size_t i = 0;
+    for (; i < N / 2; ++i) {
+      HWY_ASSERT_EQ(lanes[i], lanes2[i]);
+    }
+    for (; i < 3 * N / 2; ++i) {
+      HWY_ASSERT_EQ(T(i - N / 2 + 1), lanes2[i]);
+    }
+    // Subsequent values remain unchanged.
+    for (; i < 2 * N; ++i) {
+      HWY_ASSERT_EQ(T(i + 1), lanes2[i]);
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllLoadStore() {
+  ForAllTypes(ForPartialVectors<TestLoadStore>());
+}
+
+struct TestStoreInterleaved3 {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+
+    RandomState rng;
+
+    // Data to be interleaved
+    auto bytes = AllocateAligned<uint8_t>(3 * N);
+    for (size_t i = 0; i < 3 * N; ++i) {
+      bytes[i] = static_cast<uint8_t>(Random32(&rng) & 0xFF);
+    }
+    const auto in0 = Load(d, &bytes[0 * N]);
+    const auto in1 = Load(d, &bytes[1 * N]);
+    const auto in2 = Load(d, &bytes[2 * N]);
+
+    // Interleave here, ensure vector results match scalar
+    auto expected = AllocateAligned<T>(4 * N);
+    auto actual_aligned = AllocateAligned<T>(4 * N + 1);
+    T* actual = actual_aligned.get() + 1;
+
+    for (size_t rep = 0; rep < 100; ++rep) {
+      for (size_t i = 0; i < N; ++i) {
+        expected[3 * i + 0] = bytes[0 * N + i];
+        expected[3 * i + 1] = bytes[1 * N + i];
+        expected[3 * i + 2] = bytes[2 * N + i];
+        // Ensure we do not write more than 3*N bytes
+        expected[3 * N + i] = actual[3 * N + i] = 0;
+      }
+      StoreInterleaved3(in0, in1, in2, d, actual);
+      size_t pos = 0;
+      if (!BytesEqual(expected.get(), actual, 4 * N, &pos)) {
+        Print(d, "in0", in0, pos / 3);
+        Print(d, "in1", in1, pos / 3);
+        Print(d, "in2", in2, pos / 3);
+        const size_t i = pos - pos % 3;
+        fprintf(stderr, "interleaved %d %d %d  %d %d %d\n", actual[i],
+                actual[i + 1], actual[i + 2], actual[i + 3], actual[i + 4],
+                actual[i + 5]);
+        HWY_ASSERT(false);
+      }
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllStoreInterleaved3() {
+#if HWY_TARGET == HWY_RVV
+  // Segments are limited to 8 registers, so we can only go up to LMUL=2.
+  const ForExtendableVectors<TestStoreInterleaved3, 4> test;
+#else
+  const ForPartialVectors<TestStoreInterleaved3> test;
+#endif
+  test(uint8_t());
+}
+
+struct TestStoreInterleaved4 {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+
+    RandomState rng;
+
+    // Data to be interleaved
+    auto bytes = AllocateAligned<uint8_t>(4 * N);
+    for (size_t i = 0; i < 4 * N; ++i) {
+      bytes[i] = static_cast<uint8_t>(Random32(&rng) & 0xFF);
+    }
+    const auto in0 = Load(d, &bytes[0 * N]);
+    const auto in1 = Load(d, &bytes[1 * N]);
+    const auto in2 = Load(d, &bytes[2 * N]);
+    const auto in3 = Load(d, &bytes[3 * N]);
+
+    // Interleave here, ensure vector results match scalar
+    auto expected = AllocateAligned<T>(5 * N);
+    auto actual_aligned = AllocateAligned<T>(5 * N + 1);
+    T* actual = actual_aligned.get() + 1;
+
+    for (size_t rep = 0; rep < 100; ++rep) {
+      for (size_t i = 0; i < N; ++i) {
+        expected[4 * i + 0] = bytes[0 * N + i];
+        expected[4 * i + 1] = bytes[1 * N + i];
+        expected[4 * i + 2] = bytes[2 * N + i];
+        expected[4 * i + 3] = bytes[3 * N + i];
+        // Ensure we do not write more than 4*N bytes
+        expected[4 * N + i] = actual[4 * N + i] = 0;
+      }
+      StoreInterleaved4(in0, in1, in2, in3, d, actual);
+      size_t pos = 0;
+      if (!BytesEqual(expected.get(), actual, 5 * N, &pos)) {
+        Print(d, "in0", in0, pos / 4);
+        Print(d, "in1", in1, pos / 4);
+        Print(d, "in2", in2, pos / 4);
+        Print(d, "in3", in3, pos / 4);
+        const size_t i = pos;
+        fprintf(stderr, "interleaved %d %d %d %d  %d %d %d %d\n", actual[i],
+                actual[i + 1], actual[i + 2], actual[i + 3], actual[i + 4],
+                actual[i + 5], actual[i + 6], actual[i + 7]);
+        HWY_ASSERT(false);
+      }
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllStoreInterleaved4() {
+#if HWY_TARGET == HWY_RVV
+  // Segments are limited to 8 registers, so we can only go up to LMUL=2.
+  const ForExtendableVectors<TestStoreInterleaved4, 4> test;
+#else
+  const ForPartialVectors<TestStoreInterleaved4> test;
+#endif
+  test(uint8_t());
+}
+
+struct TestLoadDup128 {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // Scalar does not define LoadDup128.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+    constexpr size_t N128 = 16 / sizeof(T);
+    alignas(16) T lanes[N128];
+    for (size_t i = 0; i < N128; ++i) {
+      lanes[i] = static_cast<T>(1 + i);
+    }
+
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = static_cast<T>(i % N128 + 1);
+    }
+
+    HWY_ASSERT_VEC_EQ(d, expected.get(), LoadDup128(d, lanes));
+#else
+    (void)d;
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllLoadDup128() {
+  ForAllTypes(ForGE128Vectors<TestLoadDup128>());
+}
+
+struct TestStream {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v = Iota(d, T(1));
+    const size_t affected_bytes =
+        (Lanes(d) * sizeof(T) + HWY_STREAM_MULTIPLE - 1) &
+        ~size_t(HWY_STREAM_MULTIPLE - 1);
+    const size_t affected_lanes = affected_bytes / sizeof(T);
+    auto out = AllocateAligned<T>(2 * affected_lanes);
+    std::fill(out.get(), out.get() + 2 * affected_lanes, T(0));
+
+    Stream(v, d, out.get());
+    StoreFence();
+    const auto actual = Load(d, out.get());
+    HWY_ASSERT_VEC_EQ(d, v, actual);
+    // Ensure Stream didn't modify more memory than expected
+    for (size_t i = affected_lanes; i < 2 * affected_lanes; ++i) {
+      HWY_ASSERT_EQ(T(0), out[i]);
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllStream() {
+  const ForPartialVectors<TestStream> test;
+  // No u8,u16.
+  test(uint32_t());
+  test(uint64_t());
+  // No i8,i16.
+  test(int32_t());
+  test(int64_t());
+  ForFloatTypes(test);
+}
+
+// Assumes little-endian byte order!
+struct TestScatter {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    using Offset = MakeSigned<T>;
+
+    const size_t N = Lanes(d);
+    const size_t range = 4 * N;                  // number of items to scatter
+    const size_t max_bytes = range * sizeof(T);  // upper bound on offset
+
+    RandomState rng;
+
+    // Data to be scattered
+    auto bytes = AllocateAligned<uint8_t>(max_bytes);
+    for (size_t i = 0; i < max_bytes; ++i) {
+      bytes[i] = static_cast<uint8_t>(Random32(&rng) & 0xFF);
+    }
+    const auto data = Load(d, reinterpret_cast<const T*>(bytes.get()));
+
+    // Scatter into these regions, ensure vector results match scalar
+    auto expected = AllocateAligned<T>(range);
+    auto actual = AllocateAligned<T>(range);
+
+    const Rebind<Offset, D> d_offsets;
+    auto offsets = AllocateAligned<Offset>(N);  // or indices
+
+    for (size_t rep = 0; rep < 100; ++rep) {
+      // Byte offsets
+      std::fill(expected.get(), expected.get() + range, T(0));
+      std::fill(actual.get(), actual.get() + range, T(0));
+      for (size_t i = 0; i < N; ++i) {
+        offsets[i] =
+            static_cast<Offset>(Random32(&rng) % (max_bytes - sizeof(T)));
+        CopyBytes<sizeof(T)>(
+            bytes.get() + i * sizeof(T),
+            reinterpret_cast<uint8_t*>(expected.get()) + offsets[i]);
+      }
+      const auto voffsets = Load(d_offsets, offsets.get());
+      ScatterOffset(data, d, actual.get(), voffsets);
+      if (!BytesEqual(expected.get(), actual.get(), max_bytes)) {
+        Print(d, "Data", data);
+        Print(d_offsets, "Offsets", voffsets);
+        HWY_ASSERT(false);
+      }
+
+      // Indices
+      std::fill(expected.get(), expected.get() + range, T(0));
+      std::fill(actual.get(), actual.get() + range, T(0));
+      for (size_t i = 0; i < N; ++i) {
+        offsets[i] = static_cast<Offset>(Random32(&rng) % range);
+        CopyBytes<sizeof(T)>(bytes.get() + i * sizeof(T),
+                             &expected[offsets[i]]);
+      }
+      const auto vindices = Load(d_offsets, offsets.get());
+      ScatterIndex(data, d, actual.get(), vindices);
+      if (!BytesEqual(expected.get(), actual.get(), max_bytes)) {
+        Print(d, "Data", data);
+        Print(d_offsets, "Indices", vindices);
+        HWY_ASSERT(false);
+      }
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllScatter() {
+  // No u8,u16,i8,i16.
+  const ForPartialVectors<TestScatter> test;
+  test(uint32_t());
+  test(int32_t());
+
+#if HWY_CAP_INTEGER64
+  test(uint64_t());
+  test(int64_t());
+#endif
+
+  ForFloatTypes(test);
+}
+
+struct TestGather {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    using Offset = MakeSigned<T>;
+
+    const size_t N = Lanes(d);
+
+    RandomState rng;
+
+    // Data to be gathered from
+    const size_t max_bytes = 4 * N * sizeof(T);  // upper bound on offset
+    auto bytes = AllocateAligned<uint8_t>(max_bytes);
+    for (size_t i = 0; i < max_bytes; ++i) {
+      bytes[i] = static_cast<uint8_t>(Random32(&rng) & 0xFF);
+    }
+
+    auto expected = AllocateAligned<T>(N);
+    auto offsets = AllocateAligned<Offset>(N);
+    auto indices = AllocateAligned<Offset>(N);
+
+    for (size_t rep = 0; rep < 100; ++rep) {
+      // Offsets
+      for (size_t i = 0; i < N; ++i) {
+        offsets[i] =
+            static_cast<Offset>(Random32(&rng) % (max_bytes - sizeof(T)));
+        CopyBytes<sizeof(T)>(bytes.get() + offsets[i], &expected[i]);
+      }
+
+      const Rebind<Offset, D> d_offset;
+      const T* base = reinterpret_cast<const T*>(bytes.get());
+      auto actual = GatherOffset(d, base, Load(d_offset, offsets.get()));
+      HWY_ASSERT_VEC_EQ(d, expected.get(), actual);
+
+      // Indices
+      for (size_t i = 0; i < N; ++i) {
+        indices[i] =
+            static_cast<Offset>(Random32(&rng) % (max_bytes / sizeof(T)));
+        CopyBytes<sizeof(T)>(base + indices[i], &expected[i]);
+      }
+      actual = GatherIndex(d, base, Load(d_offset, indices.get()));
+      HWY_ASSERT_VEC_EQ(d, expected.get(), actual);
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllGather() {
+  // No u8,u16,i8,i16.
+  const ForPartialVectors<TestGather> test;
+  test(uint32_t());
+  test(int32_t());
+
+#if HWY_CAP_INTEGER64
+  test(uint64_t());
+  test(int64_t());
+#endif
+  ForFloatTypes(test);
+}
+
+HWY_NOINLINE void TestAllCache() {
+  LoadFence();
+  StoreFence();
+  int test = 0;
+  Prefetch(&test);
+  FlushCacheline(&test);
+  Pause();
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(HwyMemoryTest);
+HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllLoadStore);
+HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllStoreInterleaved3);
+HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllStoreInterleaved4);
+HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllLoadDup128);
+HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllStream);
+HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllScatter);
+HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllGather);
+HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllCache);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/swizzle_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/swizzle_test.cc
new file mode 100644
index 0000000000..565dc115e4
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/swizzle_test.cc
@@ -0,0 +1,644 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/swizzle_test.cc"
+#include "hwy/foreach_target.h"
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct TestShiftBytes {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // Scalar does not define Shift*Bytes.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+    const Repartition<uint8_t, D> du8;
+    const size_t N8 = Lanes(du8);
+
+    // Zero remains zero
+    const auto v0 = Zero(d);
+    HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(v0));
+    HWY_ASSERT_VEC_EQ(d, v0, ShiftRightBytes<1>(v0));
+
+    // Zero after shifting out the high/low byte
+    auto bytes = AllocateAligned<uint8_t>(N8);
+    std::fill(bytes.get(), bytes.get() + N8, 0);
+    bytes[N8 - 1] = 0x7F;
+    const auto vhi = BitCast(d, Load(du8, bytes.get()));
+    bytes[N8 - 1] = 0;
+    bytes[0] = 0x7F;
+    const auto vlo = BitCast(d, Load(du8, bytes.get()));
+    HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(vhi));
+    HWY_ASSERT_VEC_EQ(d, v0, ShiftRightBytes<1>(vlo));
+
+    // Check expected result with Iota
+    const size_t N = Lanes(d);
+    auto in = AllocateAligned<T>(N);
+    const uint8_t* in_bytes = reinterpret_cast<const uint8_t*>(in.get());
+    const auto v = BitCast(d, Iota(du8, 1));
+    Store(v, d, in.get());
+
+    auto expected = AllocateAligned<T>(N);
+    uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
+
+    const size_t kBlockSize = HWY_MIN(N8, 16);
+    for (size_t block = 0; block < N8; block += kBlockSize) {
+      expected_bytes[block] = 0;
+      memcpy(expected_bytes + block + 1, in_bytes + block, kBlockSize - 1);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftBytes<1>(v));
+
+    for (size_t block = 0; block < N8; block += kBlockSize) {
+      memcpy(expected_bytes + block, in_bytes + block + 1, kBlockSize - 1);
+      expected_bytes[block + kBlockSize - 1] = 0;
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightBytes<1>(v));
+#else
+    (void)d;
+#endif  // #if HWY_TARGET != HWY_SCALAR
+  }
+};
+
+HWY_NOINLINE void TestAllShiftBytes() {
+  ForIntegerTypes(ForGE128Vectors<TestShiftBytes>());
+}
+
+struct TestShiftLanes {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // Scalar does not define Shift*Lanes.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+    const auto v = Iota(d, T(1));
+    const size_t N = Lanes(d);
+    auto expected = AllocateAligned<T>(N);
+
+    HWY_ASSERT_VEC_EQ(d, v, ShiftLeftLanes<0>(v));
+    HWY_ASSERT_VEC_EQ(d, v, ShiftRightLanes<0>(v));
+
+    constexpr size_t kLanesPerBlock = 16 / sizeof(T);
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = (i % kLanesPerBlock) == 0 ? T(0) : T(i);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftLanes<1>(v));
+
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] =
+          (i % kLanesPerBlock) == (kLanesPerBlock - 1) ? T(0) : T(2 + i);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightLanes<1>(v));
+#else
+    (void)d;
+#endif  // #if HWY_TARGET != HWY_SCALAR
+  }
+};
+
+HWY_NOINLINE void TestAllShiftLanes() {
+  ForAllTypes(ForGE128Vectors<TestShiftLanes>());
+}
+
+template <typename D, int kLane>
+struct TestBroadcastR {
+  HWY_NOINLINE void operator()() const {
+// TODO(janwas): fix failure
+#if HWY_TARGET != HWY_WASM
+    using T = typename D::T;
+    const D d;
+    const size_t N = Lanes(d);
+    auto in_lanes = AllocateAligned<T>(N);
+    std::fill(in_lanes.get(), in_lanes.get() + N, T(0));
+    const size_t blockN = HWY_MIN(N * sizeof(T), 16) / sizeof(T);
+    // Need to set within each 128-bit block
+    for (size_t block = 0; block < N; block += blockN) {
+      in_lanes[block + kLane] = static_cast<T>(block + 1);
+    }
+    const auto in = Load(d, in_lanes.get());
+    auto expected = AllocateAligned<T>(N);
+    for (size_t block = 0; block < N; block += blockN) {
+      for (size_t i = 0; i < blockN; ++i) {
+        expected[block + i] = T(block + 1);
+      }
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), Broadcast<kLane>(in));
+
+    TestBroadcastR<D, kLane - 1>()();
+#endif
+  }
+};
+
+template <class D>
+struct TestBroadcastR<D, -1> {
+  void operator()() const {}
+};
+
+struct TestBroadcast {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    TestBroadcastR<D, HWY_MIN(MaxLanes(d), 16 / sizeof(T)) - 1>()();
+  }
+};
+
+HWY_NOINLINE void TestAllBroadcast() {
+  const ForPartialVectors<TestBroadcast> test;
+  // No u8.
+  test(uint16_t());
+  test(uint32_t());
+#if HWY_CAP_INTEGER64
+  test(uint64_t());
+#endif
+
+  // No i8.
+  test(int16_t());
+  test(int32_t());
+#if HWY_CAP_INTEGER64
+  test(int64_t());
+#endif
+
+  ForFloatTypes(test);
+}
+
+struct TestTableLookupBytes {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    RandomState rng;
+    const size_t N = Lanes(d);
+    const size_t N8 = Lanes(Repartition<uint8_t, D>());
+    auto in_bytes = AllocateAligned<uint8_t>(N8);
+    for (size_t i = 0; i < N8; ++i) {
+      in_bytes[i] = Random32(&rng) & 0xFF;
+    }
+    const auto in =
+        BitCast(d, Load(d, reinterpret_cast<const T*>(in_bytes.get())));
+
+    // Enough test data; for larger vectors, upper lanes will be zero.
+    const uint8_t index_bytes_source[64] = {
+        // Same index as source, multiple outputs from same input,
+        // unused input (9), ascending/descending and nonconsecutive neighbors.
+        0,  2,  1, 2, 15, 12, 13, 14, 6,  7,  8,  5,  4,  3,  10, 11,
+        11, 10, 3, 4, 5,  8,  7,  6,  14, 13, 12, 15, 2,  1,  2,  0,
+        4,  3,  2, 2, 5,  6,  7,  7,  15, 15, 15, 15, 15, 15, 0,  1};
+    auto index_bytes = AllocateAligned<uint8_t>(N8);
+    for (size_t i = 0; i < N8; ++i) {
+      index_bytes[i] = (i < 64) ? index_bytes_source[i] : 0;
+      // Avoid undefined results / asan error for scalar by capping indices.
+      if (index_bytes[i] >= N * sizeof(T)) {
+        index_bytes[i] = static_cast<uint8_t>(N * sizeof(T) - 1);
+      }
+    }
+    const auto indices = Load(d, reinterpret_cast<const T*>(index_bytes.get()));
+    auto expected = AllocateAligned<T>(N);
+    uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
+
+    // Byte indices wrap around
+    const size_t mod = HWY_MIN(N8, 256);
+    for (size_t block = 0; block < N8; block += 16) {
+      for (size_t i = 0; i < 16 && (block + i) < N8; ++i) {
+        const uint8_t index = index_bytes[block + i];
+        expected_bytes[block + i] = in_bytes[(block + index) % mod];
+      }
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), TableLookupBytes(in, indices));
+  }
+};
+
+HWY_NOINLINE void TestAllTableLookupBytes() {
+  ForIntegerTypes(ForPartialVectors<TestTableLookupBytes>());
+}
+
+struct TestTableLookupLanes {
+#if HWY_TARGET == HWY_RVV
+  using Index = uint32_t;
+#else
+  using Index = int32_t;
+#endif
+
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+#if HWY_TARGET != HWY_SCALAR
+    const size_t N = Lanes(d);
+    auto idx = AllocateAligned<Index>(N);
+    std::fill(idx.get(), idx.get() + N, Index(0));
+    auto expected = AllocateAligned<T>(N);
+    const auto v = Iota(d, 1);
+
+    if (N <= 8) {  // Test all permutations
+      for (size_t i0 = 0; i0 < N; ++i0) {
+        idx[0] = static_cast<Index>(i0);
+
+        for (size_t i1 = 0; i1 < N; ++i1) {
+          if (N >= 2) idx[1] = static_cast<Index>(i1);
+          for (size_t i2 = 0; i2 < N; ++i2) {
+            if (N >= 4) idx[2] = static_cast<Index>(i2);
+            for (size_t i3 = 0; i3 < N; ++i3) {
+              if (N >= 4) idx[3] = static_cast<Index>(i3);
+
+              for (size_t i = 0; i < N; ++i) {
+                expected[i] = static_cast<T>(idx[i] + 1);  // == v[idx[i]]
+              }
+
+              const auto opaque = SetTableIndices(d, idx.get());
+              const auto actual = TableLookupLanes(v, opaque);
+              HWY_ASSERT_VEC_EQ(d, expected.get(), actual);
+            }
+          }
+        }
+      }
+    } else {
+      // Too many permutations to test exhaustively; choose one with repeated
+      // and cross-block indices and ensure indices do not exceed #lanes.
+      // For larger vectors, upper lanes will be zero.
+      HWY_ALIGN Index idx_source[16] = {1,  3,  2,  2,  8, 1, 7, 6,
+                                        15, 14, 14, 15, 4, 9, 8, 5};
+      for (size_t i = 0; i < N; ++i) {
+        idx[i] = (i < 16) ? idx_source[i] : 0;
+        // Avoid undefined results / asan error for scalar by capping indices.
+        if (idx[i] >= static_cast<Index>(N)) {
+          idx[i] = static_cast<Index>(N - 1);
+        }
+        expected[i] = static_cast<T>(idx[i] + 1);  // == v[idx[i]]
+      }
+
+      const auto opaque = SetTableIndices(d, idx.get());
+      const auto actual = TableLookupLanes(v, opaque);
+      HWY_ASSERT_VEC_EQ(d, expected.get(), actual);
+    }
+#else
+    (void)d;
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllTableLookupLanes() {
+  const ForPartialVectors<TestTableLookupLanes> test;
+  test(uint32_t());
+  test(int32_t());
+  test(float());
+}
+
+struct TestInterleave {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    using TU = MakeUnsigned<T>;
+    const size_t N = Lanes(d);
+    auto even_lanes = AllocateAligned<T>(N);
+    auto odd_lanes = AllocateAligned<T>(N);
+    auto expected = AllocateAligned<T>(N);
+    for (size_t i = 0; i < N; ++i) {
+      even_lanes[i] = static_cast<T>(2 * i + 0);
+      odd_lanes[i] = static_cast<T>(2 * i + 1);
+    }
+    const auto even = Load(d, even_lanes.get());
+    const auto odd = Load(d, odd_lanes.get());
+
+    const size_t blockN = 16 / sizeof(T);
+    for (size_t i = 0; i < Lanes(d); ++i) {
+      const size_t block = i / blockN;
+      const size_t index = (i % blockN) + block * 2 * blockN;
+      expected[i] = static_cast<T>(index & LimitsMax<TU>());
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), InterleaveLower(even, odd));
+
+    for (size_t i = 0; i < Lanes(d); ++i) {
+      const size_t block = i / blockN;
+      expected[i] = T((i % blockN) + block * 2 * blockN + blockN);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), InterleaveUpper(even, odd));
+  }
+};
+
+HWY_NOINLINE void TestAllInterleave() {
+  // Not supported by HWY_SCALAR: Interleave(f32, f32) would return f32x2.
+  ForAllTypes(ForGE128Vectors<TestInterleave>());
+}
+
+struct TestZipLower {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    using WideT = MakeWide<T>;
+    static_assert(sizeof(T) * 2 == sizeof(WideT), "Must be double-width");
+    static_assert(IsSigned<T>() == IsSigned<WideT>(), "Must have same sign");
+    const size_t N = Lanes(d);
+    auto even_lanes = AllocateAligned<T>(N);
+    auto odd_lanes = AllocateAligned<T>(N);
+    for (size_t i = 0; i < N; ++i) {
+      even_lanes[i] = static_cast<T>(2 * i + 0);
+      odd_lanes[i] = static_cast<T>(2 * i + 1);
+    }
+    const auto even = Load(d, even_lanes.get());
+    const auto odd = Load(d, odd_lanes.get());
+
+    const Repartition<WideT, D> dw;
+    auto expected = AllocateAligned<WideT>(Lanes(dw));
+    const WideT blockN = static_cast<WideT>(16 / sizeof(WideT));
+    for (size_t i = 0; i < Lanes(dw); ++i) {
+      const size_t block = i / blockN;
+      // Value of least-significant lane in lo-vector.
+      const WideT lo =
+          static_cast<WideT>(2 * (i % blockN) + 4 * block * blockN);
+      const WideT kBits = static_cast<WideT>(sizeof(T) * 8);
+      expected[i] =
+          static_cast<WideT>((static_cast<WideT>(lo + 1) << kBits) + lo);
+    }
+    HWY_ASSERT_VEC_EQ(dw, expected.get(), ZipLower(even, odd));
+  }
+};
+
+struct TestZipUpper {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    using WideT = MakeWide<T>;
+    static_assert(sizeof(T) * 2 == sizeof(WideT), "Must be double-width");
+    static_assert(IsSigned<T>() == IsSigned<WideT>(), "Must have same sign");
+    const size_t N = Lanes(d);
+    auto even_lanes = AllocateAligned<T>(N);
+    auto odd_lanes = AllocateAligned<T>(N);
+    for (size_t i = 0; i < Lanes(d); ++i) {
+      even_lanes[i] = static_cast<T>(2 * i + 0);
+      odd_lanes[i] = static_cast<T>(2 * i + 1);
+    }
+    const auto even = Load(d, even_lanes.get());
+    const auto odd = Load(d, odd_lanes.get());
+
+    const Repartition<WideT, D> dw;
+    auto expected = AllocateAligned<WideT>(Lanes(dw));
+
+    constexpr WideT blockN = static_cast<WideT>(16 / sizeof(WideT));
+    for (size_t i = 0; i < Lanes(dw); ++i) {
+      const size_t block = i / blockN;
+      const WideT lo =
+          static_cast<WideT>(2 * (i % blockN) + 4 * block * blockN);
+      const WideT kBits = static_cast<WideT>(sizeof(T) * 8);
+      expected[i] = static_cast<WideT>(
+          (static_cast<WideT>(lo + 2 * blockN + 1) << kBits) + lo + 2 * blockN);
+    }
+    HWY_ASSERT_VEC_EQ(dw, expected.get(), ZipUpper(even, odd));
+  }
+};
+
+HWY_NOINLINE void TestAllZip() {
+  const ForPartialVectors<TestZipLower, 2> lower_unsigned;
+  // TODO(janwas): fix
+#if HWY_TARGET != HWY_RVV
+  lower_unsigned(uint8_t());
+#endif
+  lower_unsigned(uint16_t());
+#if HWY_CAP_INTEGER64
+  lower_unsigned(uint32_t());  // generates u64
+#endif
+
+  const ForPartialVectors<TestZipLower, 2> lower_signed;
+#if HWY_TARGET != HWY_RVV
+  lower_signed(int8_t());
+#endif
+  lower_signed(int16_t());
+#if HWY_CAP_INTEGER64
+  lower_signed(int32_t());  // generates i64
+#endif
+
+  const ForGE128Vectors<TestZipUpper> upper_unsigned;
+#if HWY_TARGET != HWY_RVV
+  upper_unsigned(uint8_t());
+#endif
+  upper_unsigned(uint16_t());
+#if HWY_CAP_INTEGER64
+  upper_unsigned(uint32_t());  // generates u64
+#endif
+
+  const ForGE128Vectors<TestZipUpper> upper_signed;
+#if HWY_TARGET != HWY_RVV
+  upper_signed(int8_t());
+#endif
+  upper_signed(int16_t());
+#if HWY_CAP_INTEGER64
+  upper_signed(int32_t());  // generates i64
+#endif
+
+  // No float - concatenating f32 does not result in a f64
+}
+
+class TestSpecialShuffle32 {
+ public:
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v = Iota(d, 0);
+
+#define VERIFY_LANES_32(d, v, i3, i2, i1, i0) \
+  VerifyLanes32((d), (v), (i3), (i2), (i1), (i0), __FILE__, __LINE__)
+
+    VERIFY_LANES_32(d, Shuffle2301(v), 2, 3, 0, 1);
+    VERIFY_LANES_32(d, Shuffle1032(v), 1, 0, 3, 2);
+    VERIFY_LANES_32(d, Shuffle0321(v), 0, 3, 2, 1);
+    VERIFY_LANES_32(d, Shuffle2103(v), 2, 1, 0, 3);
+    VERIFY_LANES_32(d, Shuffle0123(v), 0, 1, 2, 3);
+
+#undef VERIFY_LANES_32
+  }
+
+ private:
+  template <class D, class V>
+  HWY_NOINLINE void VerifyLanes32(D d, V v, const int i3, const int i2,
+                                  const int i1, const int i0,
+                                  const char* filename, const int line) {
+    using T = typename D::T;
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    Store(v, d, lanes.get());
+    const std::string name = TypeName(lanes[0], N);
+    constexpr size_t kBlockN = 16 / sizeof(T);
+    for (int block = 0; block < static_cast<int>(N); block += kBlockN) {
+      AssertEqual(T(block + i3), lanes[block + 3], name, filename, line);
+      AssertEqual(T(block + i2), lanes[block + 2], name, filename, line);
+      AssertEqual(T(block + i1), lanes[block + 1], name, filename, line);
+      AssertEqual(T(block + i0), lanes[block + 0], name, filename, line);
+    }
+  }
+};
+
+class TestSpecialShuffle64 {
+ public:
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const auto v = Iota(d, 0);
+    VerifyLanes64(d, Shuffle01(v), 0, 1, __FILE__, __LINE__);
+  }
+
+ private:
+  template <class D, class V>
+  HWY_NOINLINE void VerifyLanes64(D d, V v, const int i1, const int i0,
+                                  const char* filename, const int line) {
+    using T = typename D::T;
+    const size_t N = Lanes(d);
+    auto lanes = AllocateAligned<T>(N);
+    Store(v, d, lanes.get());
+    const std::string name = TypeName(lanes[0], N);
+    constexpr size_t kBlockN = 16 / sizeof(T);
+    for (int block = 0; block < static_cast<int>(N); block += kBlockN) {
+      AssertEqual(T(block + i1), lanes[block + 1], name, filename, line);
+      AssertEqual(T(block + i0), lanes[block + 0], name, filename, line);
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllSpecialShuffles() {
+  const ForGE128Vectors<TestSpecialShuffle32> test32;
+  test32(uint32_t());
+  test32(int32_t());
+  test32(float());
+
+#if HWY_CAP_INTEGER64
+  const ForGE128Vectors<TestSpecialShuffle64> test64;
+  test64(uint64_t());
+  test64(int64_t());
+#endif
+
+#if HWY_CAP_FLOAT64
+  const ForGE128Vectors<TestSpecialShuffle64> test_d;
+  test_d(double());
+#endif
+}
+
+struct TestConcatHalves {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // TODO(janwas): fix
+#if HWY_TARGET != HWY_RVV
+    // Construct inputs such that interleaved halves == iota.
+    const auto expected = Iota(d, 1);
+
+    const size_t N = Lanes(d);
+    auto lo = AllocateAligned<T>(N);
+    auto hi = AllocateAligned<T>(N);
+    size_t i;
+    for (i = 0; i < N / 2; ++i) {
+      lo[i] = static_cast<T>(1 + i);
+      hi[i] = static_cast<T>(lo[i] + T(N) / 2);
+    }
+    for (; i < N; ++i) {
+      lo[i] = hi[i] = 0;
+    }
+
+    HWY_ASSERT_VEC_EQ(d, expected,
+                      ConcatLowerLower(Load(d, hi.get()), Load(d, lo.get())));
+
+    // Same for high blocks.
+    for (i = 0; i < N / 2; ++i) {
+      lo[i] = hi[i] = 0;
+    }
+    for (; i < N; ++i) {
+      lo[i] = static_cast<T>(1 + i - N / 2);
+      hi[i] = static_cast<T>(lo[i] + T(N) / 2);
+    }
+
+    HWY_ASSERT_VEC_EQ(d, expected,
+                      ConcatUpperUpper(Load(d, hi.get()), Load(d, lo.get())));
+#else
+    (void)d;
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllConcatHalves() {
+  ForAllTypes(ForGE128Vectors<TestConcatHalves>());
+}
+
+struct TestConcatLowerUpper {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    // TODO(janwas): fix
+#if HWY_TARGET != HWY_RVV
+    const size_t N = Lanes(d);
+    // Middle part of Iota(1) == Iota(1 + N / 2).
+    const auto lo = Iota(d, 1);
+    const auto hi = Iota(d, 1 + N);
+    HWY_ASSERT_VEC_EQ(d, Iota(d, 1 + N / 2), ConcatLowerUpper(hi, lo));
+#else
+    (void)d;
+#endif
+  }
+};
+
+HWY_NOINLINE void TestAllConcatLowerUpper() {
+  ForAllTypes(ForGE128Vectors<TestConcatLowerUpper>());
+}
+
+struct TestConcatUpperLower {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    const auto lo = Iota(d, 1);
+    const auto hi = Iota(d, 1 + N);
+    auto expected = AllocateAligned<T>(N);
+    size_t i = 0;
+    for (; i < N / 2; ++i) {
+      expected[i] = static_cast<T>(1 + i);
+    }
+    for (; i < N; ++i) {
+      expected[i] = static_cast<T>(1 + i + N);
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), ConcatUpperLower(hi, lo));
+  }
+};
+
+HWY_NOINLINE void TestAllConcatUpperLower() {
+  ForAllTypes(ForGE128Vectors<TestConcatUpperLower>());
+}
+
+struct TestOddEven {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T /*unused*/, D d) {
+    const size_t N = Lanes(d);
+    const auto even = Iota(d, 1);
+    const auto odd = Iota(d, 1 + N);
+    auto expected = AllocateAligned<T>(N);
+    for (size_t i = 0; i < N; ++i) {
+      expected[i] = static_cast<T>(1 + i + ((i & 1) ? N : 0));
+    }
+    HWY_ASSERT_VEC_EQ(d, expected.get(), OddEven(odd, even));
+  }
+};
+
+HWY_NOINLINE void TestAllOddEven() {
+  ForAllTypes(ForGE128Vectors<TestOddEven>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(HwySwizzleTest);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllShiftBytes);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllShiftLanes);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllBroadcast);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllTableLookupBytes);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllTableLookupLanes);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllInterleave);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllZip);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllSpecialShuffles);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllConcatHalves);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllConcatLowerUpper);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllConcatUpperLower);
+HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllOddEven);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/test_util-inl.h b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/test_util-inl.h
new file mode 100644
index 0000000000..f62ebeeddd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/test_util-inl.h
@@ -0,0 +1,566 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Normal include guard for non-SIMD portion of this header.
+#ifndef HWY_TESTS_TEST_UTIL_H_
+#define HWY_TESTS_TEST_UTIL_H_
+
+// Helper functions for use by *_test.cc.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <cstddef>
+#include <string>
+#include <utility>  // std::forward
+
+#include "hwy/aligned_allocator.h"
+#include "hwy/base.h"
+#include "hwy/highway.h"
+
+#include "gtest/gtest.h"
+
+namespace hwy {
+
+// The maximum vector size used in tests when defining test data. DEPRECATED.
+constexpr size_t kTestMaxVectorSize = 64;
+
+// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead
+// used INSTANTIATE_TEST_CASE_P which is now deprecated.
+#ifdef INSTANTIATE_TEST_SUITE_P
+#define HWY_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P
+#else
+#define HWY_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#endif
+
+// Helper class to run parametric tests using the hwy target as parameter. To
+// use this define the following in your test:
+//   class MyTestSuite : public TestWithParamTarget {
+//    ...
+//   };
+//   HWY_TARGET_INSTANTIATE_TEST_SUITE_P(MyTestSuite);
+//   TEST_P(MyTestSuite, MyTest) { ... }
+class TestWithParamTarget : public testing::TestWithParam<uint32_t> {
+ protected:
+  void SetUp() override { SetSupportedTargetsForTest(GetParam()); }
+
+  void TearDown() override {
+    // Check that the parametric test calls SupportedTargets() when the source
+    // was compiled with more than one target. In the single-target case only
+    // static dispatch will be used anyway.
+#if (HWY_TARGETS & (HWY_TARGETS - 1)) != 0
+    EXPECT_TRUE(SupportedTargetsCalledForTest())
+        << "This hwy target parametric test doesn't use dynamic-dispatch and "
+           "doesn't need to be parametric.";
+#endif
+    SetSupportedTargetsForTest(0);
+  }
+};
+
+// Function to convert the test parameter of a TestWithParamTarget for
+// displaying it in the gtest test name.
+static inline std::string TestParamTargetName(
+    const testing::TestParamInfo<uint32_t>& info) {
+  return TargetName(info.param);
+}
+
+#define HWY_TARGET_INSTANTIATE_TEST_SUITE_P(suite)              \
+  HWY_GTEST_INSTANTIATE_TEST_SUITE_P(                           \
+      suite##Group, suite,                                      \
+      testing::ValuesIn(::hwy::SupportedAndGeneratedTargets()), \
+      ::hwy::TestParamTargetName)
+
+// Helper class similar to TestWithParamTarget to run parametric tests that
+// depend on the target and another parametric test. If you need to use multiple
+// extra parameters use a std::tuple<> of them and ::testing::Generate(...) as
+// the generator. To use this class define the following in your test:
+//   class MyTestSuite : public TestWithParamTargetT<int> {
+//    ...
+//   };
+//   HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(MyTestSuite, ::testing::Range(0, 9));
+//   TEST_P(MyTestSuite, MyTest) { ... GetParam() .... }
+template <typename T>
+class TestWithParamTargetAndT
+    : public ::testing::TestWithParam<std::tuple<uint32_t, T>> {
+ public:
+  // Expose the parametric type here so it can be used by the
+  // HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T macro.
+  using HwyParamType = T;
+
+ protected:
+  void SetUp() override {
+    SetSupportedTargetsForTest(std::get<0>(
+        ::testing::TestWithParam<std::tuple<uint32_t, T>>::GetParam()));
+  }
+
+  void TearDown() override {
+    // Check that the parametric test calls SupportedTargets() when the source
+    // was compiled with more than one target. In the single-target case only
+    // static dispatch will be used anyway.
+#if (HWY_TARGETS & (HWY_TARGETS - 1)) != 0
+    EXPECT_TRUE(SupportedTargetsCalledForTest())
+        << "This hwy target parametric test doesn't use dynamic-dispatch and "
+           "doesn't need to be parametric.";
+#endif
+    SetSupportedTargetsForTest(0);
+  }
+
+  T GetParam() {
+    return std::get<1>(
+        ::testing::TestWithParam<std::tuple<uint32_t, T>>::GetParam());
+  }
+};
+
+template <typename T>
+std::string TestParamTargetNameAndT(
+    const testing::TestParamInfo<std::tuple<uint32_t, T>>& info) {
+  return std::string(TargetName(std::get<0>(info.param))) + "_" +
+         ::testing::PrintToString(std::get<1>(info.param));
+}
+
+#define HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(suite, generator)     \
+  HWY_GTEST_INSTANTIATE_TEST_SUITE_P(                               \
+      suite##Group, suite,                                          \
+      ::testing::Combine(                                           \
+          testing::ValuesIn(::hwy::SupportedAndGeneratedTargets()), \
+          generator),                                               \
+      ::hwy::TestParamTargetNameAndT<suite::HwyParamType>)
+
+// Helper macro to export a function and define a test that tests it. This is
+// equivalent to do a HWY_EXPORT of a void(void) function and run it in a test:
+//   class MyTestSuite : public TestWithParamTarget {
+//    ...
+//   };
+//   HWY_TARGET_INSTANTIATE_TEST_SUITE_P(MyTestSuite);
+//   HWY_EXPORT_AND_TEST_P(MyTestSuite, MyTest);
+#define HWY_EXPORT_AND_TEST_P(suite, func_name)                   \
+  HWY_EXPORT(func_name);                                          \
+  TEST_P(suite, func_name) { HWY_DYNAMIC_DISPATCH(func_name)(); } \
+  static_assert(true, "For requiring trailing semicolon")
+
+#define HWY_EXPORT_AND_TEST_P_T(suite, func_name)                           \
+  HWY_EXPORT(func_name);                                                    \
+  TEST_P(suite, func_name) { HWY_DYNAMIC_DISPATCH(func_name)(GetParam()); } \
+  static_assert(true, "For requiring trailing semicolon")
+
+#define HWY_BEFORE_TEST(suite)                      \
+  class suite : public hwy::TestWithParamTarget {}; \
+  HWY_TARGET_INSTANTIATE_TEST_SUITE_P(suite);       \
+  static_assert(true, "For requiring trailing semicolon")
+
+// 64-bit random generator (Xorshift128+). Much smaller state than std::mt19937,
+// which triggers a compiler bug.
+class RandomState {
+ public:
+  explicit RandomState(const uint64_t seed = 0x123456789ull) {
+    s0_ = SplitMix64(seed + 0x9E3779B97F4A7C15ull);
+    s1_ = SplitMix64(s0_);
+  }
+
+  HWY_INLINE uint64_t operator()() {
+    uint64_t s1 = s0_;
+    const uint64_t s0 = s1_;
+    const uint64_t bits = s1 + s0;
+    s0_ = s0;
+    s1 ^= s1 << 23;
+    s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+    s1_ = s1;
+    return bits;
+  }
+
+ private:
+  static uint64_t SplitMix64(uint64_t z) {
+    z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
+    z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
+    return z ^ (z >> 31);
+  }
+
+  uint64_t s0_;
+  uint64_t s1_;
+};
+
+static HWY_INLINE uint32_t Random32(RandomState* rng) {
+  return static_cast<uint32_t>((*rng)());
+}
+
+// Prevents the compiler from eliding the computations that led to "output".
+// Works by indicating to the compiler that "output" is being read and modified.
+// The +r constraint avoids unnecessary writes to memory, but only works for
+// built-in types.
+template <class T>
+inline void PreventElision(T&& output) {
+#if HWY_COMPILER_MSVC
+  (void)output;
+#else   // HWY_COMPILER_MSVC
+  asm volatile("" : "+r"(output) : : "memory");
+#endif  // HWY_COMPILER_MSVC
+}
+
+// Returns a name for the vector/part/scalar. The type prefix is u/i/f for
+// unsigned/signed/floating point, followed by the number of bits per lane;
+// then 'x' followed by the number of lanes. Example: u8x16. This is useful for
+// understanding which instantiation of a generic test failed.
+template <typename T>
+static inline std::string TypeName(T /*unused*/, size_t N) {
+  const char prefix = IsFloat<T>() ? 'f' : (IsSigned<T>() ? 'i' : 'u');
+  char name[64];
+  // Omit the xN suffix for scalars.
+  if (N == 1) {
+    snprintf(name, sizeof(name), "%c%zu", prefix, sizeof(T) * 8);
+  } else {
+    snprintf(name, sizeof(name), "%c%zux%zu", prefix, sizeof(T) * 8, N);
+  }
+  return name;
+}
+
+// String comparison
+
+template <typename T1, typename T2>
+inline bool BytesEqual(const T1* p1, const T2* p2, const size_t size,
+                       size_t* pos = nullptr) {
+  const uint8_t* bytes1 = reinterpret_cast<const uint8_t*>(p1);
+  const uint8_t* bytes2 = reinterpret_cast<const uint8_t*>(p2);
+  for (size_t i = 0; i < size; ++i) {
+    if (bytes1[i] != bytes2[i]) {
+      fprintf(stderr, "Mismatch at byte %zu of %zu: %d != %d (%s, %s)\n", i,
+              size, bytes1[i], bytes2[i], TypeName(T1(), 1).c_str(),
+              TypeName(T2(), 1).c_str());
+      if (pos != nullptr) {
+        *pos = i;
+      }
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool StringsEqual(const char* s1, const char* s2) {
+  while (*s1 == *s2++) {
+    if (*s1++ == '\0') return true;
+  }
+  return false;
+}
+
+}  // namespace hwy
+
+#endif  // HWY_TESTS_TEST_UTIL_H_
+
+// Per-target include guard
+#if defined(HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
+#undef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
+#else
+#define HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Prints lanes around `lane`, in memory order.
+template <class D>
+HWY_NOINLINE void Print(const D d, const char* caption, const Vec<D> v,
+                        intptr_t lane = 0) {
+  using T = TFromD<D>;
+  const size_t N = Lanes(d);
+  auto lanes = AllocateAligned<T>(N);
+  Store(v, d, lanes.get());
+  const size_t begin = static_cast<size_t>(std::max<intptr_t>(0, lane - 2));
+  const size_t end = std::min(begin + 7, N);
+  fprintf(stderr, "%s %s [%zu+ ->]:\n  ", TypeName(T(), N).c_str(), caption,
+          begin);
+  for (size_t i = begin; i < end; ++i) {
+    fprintf(stderr, "%g,", double(lanes[i]));
+  }
+  if (begin >= end) fprintf(stderr, "(out of bounds)");
+  fprintf(stderr, "\n");
+}
+
+static HWY_NORETURN HWY_NOINLINE void NotifyFailure(
+    const char* filename, const int line, const char* type_name,
+    const size_t lane, const char* expected, const char* actual) {
+  hwy::Abort(filename, line,
+             "%s, %s lane %zu mismatch: expected '%s', got '%s'.\n",
+             hwy::TargetName(HWY_TARGET), type_name, lane, expected, actual);
+}
+
+template <class Out, class In>
+inline Out BitCast(const In& in) {
+  static_assert(sizeof(Out) == sizeof(In), "");
+  Out out;
+  CopyBytes<sizeof(out)>(&in, &out);
+  return out;
+}
+
+// Computes the difference in units of last place between x and y.
+template <typename TF>
+MakeUnsigned<TF> ComputeUlpDelta(TF x, TF y) {
+  static_assert(IsFloat<TF>(), "Only makes sense for floating-point");
+  using TU = MakeUnsigned<TF>;
+
+  // Handle -0 == 0 and infinities.
+  if (x == y) return 0;
+
+  // Consider "equal" if both are NaN, so we can verify an expected NaN.
+  // Needs a special case because there are many possible NaN representations.
+  if (std::isnan(x) && std::isnan(y)) return 0;
+
+  // NOTE: no need to check for differing signs; they will result in large
+  // differences, which is fine, and we avoid overflow.
+
+  const TU ux = BitCast<TU>(x);
+  const TU uy = BitCast<TU>(y);
+  // Avoid unsigned->signed cast: 2's complement is only guaranteed by C++20.
+  return std::max(ux, uy) - std::min(ux, uy);
+}
+
+template <typename T, HWY_IF_NOT_FLOAT(T)>
+HWY_NOINLINE bool IsEqual(const T expected, const T actual) {
+  return expected == actual;
+}
+
+template <typename T, HWY_IF_FLOAT(T)>
+HWY_NOINLINE bool IsEqual(const T expected, const T actual) {
+  return ComputeUlpDelta(expected, actual) <= 1;
+}
+
+// Compare non-vector, non-string T.
+template <typename T>
+HWY_NOINLINE void AssertEqual(const T expected, const T actual,
+                              const std::string& type_name,
+                              const char* filename = "", const int line = -1,
+                              const size_t lane = 0) {
+  if (!IsEqual(expected, actual)) {
+    char expected_str[100];
+    snprintf(expected_str, sizeof(expected_str), "%g", double(expected));
+    char actual_str[100];
+    snprintf(actual_str, sizeof(actual_str), "%g", double(actual));
+    NotifyFailure(filename, line, type_name.c_str(), lane, expected_str,
+                  actual_str);
+  }
+}
+
+static HWY_NOINLINE HWY_MAYBE_UNUSED void AssertStringEqual(
+    const char* expected, const char* actual, const char* filename = "",
+    const int line = -1, const size_t lane = 0) {
+  if (!hwy::StringsEqual(expected, actual)) {
+    NotifyFailure(filename, line, "string", lane, expected, actual);
+  }
+}
+
+// Compare expected vector to vector.
+template <class D, class V>
+HWY_NOINLINE void AssertVecEqual(D d, const V expected, const V actual,
+                                 const char* filename, const int line) {
+  using T = TFromD<D>;
+  const size_t N = Lanes(d);
+  auto expected_lanes = AllocateAligned<T>(N);
+  auto actual_lanes = AllocateAligned<T>(N);
+  Store(expected, d, expected_lanes.get());
+  Store(actual, d, actual_lanes.get());
+  for (size_t i = 0; i < N; ++i) {
+    if (!IsEqual(expected_lanes[i], actual_lanes[i])) {
+      fprintf(stderr, "\n\n");
+      Print(d, "expect", expected, i);
+      Print(d, "actual", actual, i);
+
+      char expected_str[100];
+      snprintf(expected_str, sizeof(expected_str), "%g",
+               double(expected_lanes[i]));
+      char actual_str[100];
+      snprintf(actual_str, sizeof(actual_str), "%g", double(actual_lanes[i]));
+
+      NotifyFailure(filename, line, hwy::TypeName(T(), N).c_str(), i,
+                    expected_str, actual_str);
+    }
+  }
+}
+
+// Compare expected lanes to vector.
+template <class D>
+HWY_NOINLINE void AssertVecEqual(D d, const TFromD<D>* expected, Vec<D> actual,
+                                 const char* filename, int line) {
+  AssertVecEqual(d, LoadU(d, expected), actual, filename, line);
+}
+
+template <class D>
+HWY_NOINLINE void AssertMaskEqual(D d, Mask<D> a, Mask<D> b,
+                                  const char* filename, int line) {
+  AssertVecEqual(d, VecFromMask(d, a), VecFromMask(d, b), filename, line);
+
+  const std::string type_name = TypeName(TFromD<D>(), Lanes(d));
+  AssertEqual(CountTrue(a), CountTrue(b), type_name, filename, line, 0);
+  AssertEqual(AllTrue(a), AllTrue(b), type_name, filename, line, 0);
+  AssertEqual(AllFalse(a), AllFalse(b), type_name, filename, line, 0);
+
+  // TODO(janwas): StoreMaskBits
+}
+
+template <class D>
+HWY_NOINLINE Mask<D> MaskTrue(const D d) {
+  const auto v0 = Zero(d);
+  return Eq(v0, v0);
+}
+
+template <class D>
+HWY_NOINLINE Mask<D> MaskFalse(const D d) {
+  // Lt is only for signed types and we cannot yet cast mask types.
+  return Eq(Zero(d), Set(d, 1));
+}
+
+#ifndef HWY_ASSERT_EQ
+
+#define HWY_ASSERT_EQ(expected, actual) \
+  AssertEqual(expected, actual, hwy::TypeName(expected, 1), __FILE__, __LINE__)
+
+#define HWY_ASSERT_STRING_EQ(expected, actual) \
+  AssertStringEqual(expected, actual, __FILE__, __LINE__)
+
+#define HWY_ASSERT_VEC_EQ(d, expected, actual) \
+  AssertVecEqual(d, expected, actual, __FILE__, __LINE__)
+
+#define HWY_ASSERT_MASK_EQ(d, expected, actual) \
+  AssertMaskEqual(d, expected, actual, __FILE__, __LINE__)
+
+#endif  // HWY_ASSERT_EQ
+
+// Helpers for instantiating tests with combinations of lane types / counts.
+
+// For all powers of two in [kMinLanes, N * kMinLanes] (so that recursion stops
+// at N == 0)
+template <typename T, size_t N, size_t kMinLanes, class Test>
+struct ForeachSizeR {
+  static void Do() {
+    static_assert(N != 0, "End of recursion");
+    Test()(T(), Simd<T, N * kMinLanes>());
+    ForeachSizeR<T, N / 2, kMinLanes, Test>::Do();
+  }
+};
+
+// Base case to stop the recursion.
+template <typename T, size_t kMinLanes, class Test>
+struct ForeachSizeR<T, 0, kMinLanes, Test> {
+  static void Do() {}
+};
+
+// These adapters may be called directly, or via For*Types:
+
+// Calls Test for all powers of two in [kMinLanes, HWY_LANES(T) / kDivLanes].
+template <class Test, size_t kDivLanes = 1, size_t kMinLanes = 1>
+struct ForPartialVectors {
+  template <typename T>
+  void operator()(T /*unused*/) const {
+#if HWY_TARGET == HWY_RVV
+    // Only m1..8 for now, can ignore kMaxLanes because HWY_*_LANES are full.
+    ForeachSizeR<T, 8 / kDivLanes, HWY_LANES(T), Test>::Do();
+#else
+    ForeachSizeR<T, HWY_LANES(T) / kDivLanes / kMinLanes, kMinLanes,
+                 Test>::Do();
+#endif
+  }
+};
+
+// Calls Test for all vectors that can be demoted log2(kFactor) times.
+template <class Test, size_t kFactor>
+struct ForDemoteVectors {
+  template <typename T>
+  void operator()(T /*unused*/) const {
+#if HWY_TARGET == HWY_RVV
+    // Only m1..8 for now.
+    ForeachSizeR<T, 8 / kFactor, kFactor * HWY_LANES(T), Test>::Do();
+#else
+    ForeachSizeR<T, HWY_LANES(T), 1, Test>::Do();
+#endif
+  }
+};
+
+// Calls Test for all powers of two in [128 bits, max bits].
+template <class Test>
+struct ForGE128Vectors {
+  template <typename T>
+  void operator()(T /*unused*/) const {
+#if HWY_TARGET == HWY_RVV
+    ForeachSizeR<T, 8, HWY_LANES(T), Test>::Do();
+#else
+    ForeachSizeR<T, HWY_LANES(T) / (16 / sizeof(T)), (16 / sizeof(T)),
+                 Test>::Do();
+
+#endif
+  }
+};
+
+// Calls Test for all vectors that can be expanded by kFactor.
+template <class Test, size_t kFactor = 2>
+struct ForExtendableVectors {
+  template <typename T>
+  void operator()(T /*unused*/) const {
+#if HWY_TARGET == HWY_RVV
+    ForeachSizeR<T, 8 / kFactor, HWY_LANES(T), Test>::Do();
+#else
+    ForeachSizeR<T, HWY_LANES(T) / kFactor / (16 / sizeof(T)), (16 / sizeof(T)),
+                 Test>::Do();
+#endif
+  }
+};
+
+// Type lists to shorten call sites:
+
+template <class Func>
+void ForSignedTypes(const Func& func) {
+  func(int8_t());
+  func(int16_t());
+  func(int32_t());
+#if HWY_CAP_INTEGER64
+  func(int64_t());
+#endif
+}
+
+template <class Func>
+void ForUnsignedTypes(const Func& func) {
+  func(uint8_t());
+  func(uint16_t());
+  func(uint32_t());
+#if HWY_CAP_INTEGER64
+  func(uint64_t());
+#endif
+}
+
+template <class Func>
+void ForIntegerTypes(const Func& func) {
+  ForSignedTypes(func);
+  ForUnsignedTypes(func);
+}
+
+template <class Func>
+void ForFloatTypes(const Func& func) {
+  func(float());
+#if HWY_CAP_FLOAT64
+  func(double());
+#endif
+}
+
+template <class Func>
+void ForAllTypes(const Func& func) {
+  ForIntegerTypes(func);
+  ForFloatTypes(func);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#endif  // per-target include guard
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/test_util_test.cc b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/test_util_test.cc
new file mode 100644
index 0000000000..b0f5edf52a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/highway/hwy/tests/test_util_test.cc
@@ -0,0 +1,102 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/test_util_test.cc"
+#include "hwy/foreach_target.h"
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct TestName {
+  template <class T, class D>
+  HWY_NOINLINE void operator()(T t, D d) {
+    char num[10];
+    std::string expected = IsFloat<T>() ? "f" : (IsSigned<T>() ? "i" : "u");
+    snprintf(num, sizeof(num), "%zu", sizeof(T) * 8);
+    expected += num;
+
+    const size_t N = Lanes(d);
+    if (N != 1) {
+      expected += 'x';
+      snprintf(num, sizeof(num), "%zu", N);
+      expected += num;
+    }
+    const std::string actual = TypeName(t, N);
+    if (expected != actual) {
+      NotifyFailure(__FILE__, __LINE__, expected.c_str(), 0, expected.c_str(),
+                    actual.c_str());
+    }
+  }
+};
+
+HWY_NOINLINE void TestAllName() { ForAllTypes(ForPartialVectors<TestName>()); }
+
+struct TestEqualInteger {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*t*/) const {
+    HWY_ASSERT(IsEqual(T(0), T(0)));
+    HWY_ASSERT(IsEqual(T(1), T(1)));
+    HWY_ASSERT(IsEqual(T(-1), T(-1)));
+    HWY_ASSERT(IsEqual(LimitsMin<T>(), LimitsMin<T>()));
+
+    HWY_ASSERT(!IsEqual(T(0), T(1)));
+    HWY_ASSERT(!IsEqual(T(1), T(0)));
+    HWY_ASSERT(!IsEqual(T(1), T(-1)));
+    HWY_ASSERT(!IsEqual(T(-1), T(1)));
+    HWY_ASSERT(!IsEqual(LimitsMin<T>(), LimitsMax<T>()));
+    HWY_ASSERT(!IsEqual(LimitsMax<T>(), LimitsMin<T>()));
+  }
+};
+
+struct TestEqualFloat {
+  template <class T>
+  HWY_NOINLINE void operator()(T /*t*/) const {
+    HWY_ASSERT(IsEqual(T(0), T(0)));
+    HWY_ASSERT(IsEqual(T(1), T(1)));
+    HWY_ASSERT(IsEqual(T(-1), T(-1)));
+    HWY_ASSERT(IsEqual(MantissaEnd<T>(), MantissaEnd<T>()));
+
+    HWY_ASSERT(!IsEqual(T(0), T(1)));
+    HWY_ASSERT(!IsEqual(T(1), T(0)));
+    HWY_ASSERT(!IsEqual(T(1), T(-1)));
+    HWY_ASSERT(!IsEqual(T(-1), T(1)));
+    HWY_ASSERT(!IsEqual(LowestValue<T>(), HighestValue<T>()));
+    HWY_ASSERT(!IsEqual(HighestValue<T>(), LowestValue<T>()));
+  }
+};
+
+HWY_NOINLINE void TestAllEqual() {
+  ForIntegerTypes(TestEqualInteger());
+  ForFloatTypes(TestEqualFloat());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+}  // namespace HWY_NAMESPACE
+}  // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+HWY_BEFORE_TEST(TestUtilTest);
+HWY_EXPORT_AND_TEST_P(TestUtilTest, TestAllName);
+HWY_EXPORT_AND_TEST_P(TestUtilTest, TestAllEqual);
+}  // namespace hwy
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/AUTHORS b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/AUTHORS
new file mode 100644
index 0000000000..dd801a9d1a
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/AUTHORS
@@ -0,0 +1,52 @@
+
+Main Author
+------------
+Marti Maria 
+
+
+Contributors 
+------------
+Bob Friesenhahn 
+Kai-Uwe Behrmann
+Stuart Nixon
+Jordi Vilar
+Richard Hughes
+Auke Nauta
+Chris Evans (Google)
+Lorenzo Ridolfi 
+Robin Watts (Artifex)
+Shawn Pedersen 
+Andrew Brygin 
+Samuli Suominen 
+Florian H�ch
+Aurelien Jarno 
+Claudiu Cebuc
+Michael Vhrel (Artifex)
+Michal Cihar 
+Daniel Kaneider 
+Mateusz Jurczyk (Google)
+Paul Miller
+S�bastien L�on
+Christian Schmitz
+XhmikosR
+Stanislav Brabec (SuSe)
+Leonhard Gruenschloss (Google)
+Patrick Noffke
+Christopher James Halse Rogers
+John Hein
+Thomas Weber (Debian)
+Mark Allen
+Noel Carboni
+Sergei Trofimovic
+
+Special Thanks 
+--------------
+Artifex software
+AlienSkin software
+Jan Morovic
+Jos Vernon (WebSupergoo)
+Harald Schneider (Maxon)
+Christian Albrecht 
+Dimitrios Anastassakis 
+Lemke Software 
+Tim Zaman
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/COPYING b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/COPYING
new file mode 100644
index 0000000000..fda5c9eb57
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/COPYING
@@ -0,0 +1,8 @@
+Little CMS
+Copyright (c) 1998-2011 Marti Maria Saguer
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/Makefile.am b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/Makefile.am
new file mode 100644
index 0000000000..7dbe0e43bd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/Makefile.am
@@ -0,0 +1,7 @@
+#
+# Makefile for include directory
+# Based on a work by Bob Friesenhahn
+
+include_HEADERS = lcms2.h lcms2_plugin.h
+
+EXTRA_DIST = lcms2.h lcms2_plugin.h
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/Makefile.in b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/Makefile.in
new file mode 100644
index 0000000000..22901ea0af
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/Makefile.in
@@ -0,0 +1,590 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for include directory
+# Based on a work by Bob Friesenhahn
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = include
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \
+	$(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(includedir)"
+HEADERS = $(include_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+include_HEADERS = lcms2.h lcms2_plugin.h
+EXTRA_DIST = lcms2.h lcms2_plugin.h
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign include/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign include/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-includeHEADERS: $(include_HEADERS)
+	@$(NORMAL_INSTALL)
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
+	fi; \
+	for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; \
+	done | $(am__base_list) | \
+	while read files; do \
+	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
+	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
+	done
+
+uninstall-includeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+	for dir in "$(DESTDIR)$(includedir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-includeHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-includeHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libtool cscopelist-am ctags ctags-am distclean \
+	distclean-generic distclean-libtool distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-includeHEADERS install-info install-info-am \
+	install-man install-pdf install-pdf-am install-ps \
+	install-ps-am install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-includeHEADERS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/lcms2.h b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/lcms2.h
new file mode 100644
index 0000000000..76dec4b441
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/lcms2.h
@@ -0,0 +1,1916 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+// Version 2.10alpha
+//
+
+#ifndef _lcms2_H
+
+// ********** Configuration toggles ****************************************
+
+// Uncomment this one if you are using big endian machines
+// #define CMS_USE_BIG_ENDIAN   1
+
+// Uncomment this one if your compiler/machine does NOT support the
+// "long long" type.
+// #define CMS_DONT_USE_INT64        1
+
+// Uncomment this if your compiler doesn't work with fast floor function
+// #define CMS_DONT_USE_FAST_FLOOR 1
+
+// Uncomment this line if you want lcms to use the black point tag in profile,
+// if commented, lcms will compute the black point by its own.
+// It is safer to leave it commented out
+// #define CMS_USE_PROFILE_BLACK_POINT_TAG    1
+
+// Uncomment this line if you are compiling as C++ and want a C++ API
+// #define CMS_USE_CPP_API
+
+// Uncomment this line if you need strict CGATS syntax. Makes CGATS files to
+// require "KEYWORD" on undefined identifiers, keep it commented out unless needed
+// #define CMS_STRICT_CGATS  1
+
+// Uncomment to get rid of the tables for "half" float support
+// #define CMS_NO_HALF_SUPPORT 1
+
+// Uncomment to get rid of pthreads/windows dependency
+// #define CMS_NO_PTHREADS  1
+
+// Uncomment this for special windows mutex initialization (see lcms2_internal.h)
+// #define CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+
+// Uncomment this to remove the "CMSREGISTER" storage class
+// #define CMS_NO_REGISTER_KEYWORD 1
+
+// ********** End of configuration toggles ******************************
+
+// Needed for streams
+#include <stdio.h>
+
+// Needed for portability (C99 per 7.1.2)
+#include <limits.h>
+#include <time.h>
+#include <stddef.h>
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+extern "C" {
+#   endif
+#endif
+
+// Version/release
+#define LCMS_VERSION        2100
+
+// I will give the chance of redefining basic types for compilers that are not fully C99 compliant
+#ifndef CMS_BASIC_TYPES_ALREADY_DEFINED
+
+// Base types
+typedef unsigned char        cmsUInt8Number;   // That is guaranteed by the C99 spec
+typedef signed char          cmsInt8Number;    // That is guaranteed by the C99 spec
+
+#if CHAR_BIT != 8
+#  error "Unable to find 8 bit type, unsupported compiler"
+#endif
+
+// IEEE float storage numbers
+typedef float                cmsFloat32Number;
+typedef double               cmsFloat64Number;
+
+// 16-bit base types
+#if (USHRT_MAX == 65535U)
+ typedef unsigned short      cmsUInt16Number;
+#elif (UINT_MAX == 65535U)
+ typedef unsigned int        cmsUInt16Number;
+#else
+#  error "Unable to find 16 bits unsigned type, unsupported compiler"
+#endif
+
+#if (SHRT_MAX == 32767)
+  typedef  short             cmsInt16Number;
+#elif (INT_MAX == 32767)
+  typedef  int               cmsInt16Number;
+#else
+#  error "Unable to find 16 bits signed type, unsupported compiler"
+#endif
+
+// 32-bit base type
+#if (UINT_MAX == 4294967295U)
+ typedef unsigned int        cmsUInt32Number;
+#elif (ULONG_MAX == 4294967295U)
+ typedef unsigned long       cmsUInt32Number;
+#else
+#  error "Unable to find 32 bit unsigned type, unsupported compiler"
+#endif
+
+#if (INT_MAX == +2147483647)
+ typedef  int                cmsInt32Number;
+#elif (LONG_MAX == +2147483647)
+ typedef  long               cmsInt32Number;
+#else
+#  error "Unable to find 32 bit signed type, unsupported compiler"
+#endif
+
+// 64-bit base types
+#ifndef CMS_DONT_USE_INT64
+#  if (ULONG_MAX  == 18446744073709551615U)
+    typedef unsigned long   cmsUInt64Number;
+#  elif (ULLONG_MAX == 18446744073709551615U)
+      typedef unsigned long long   cmsUInt64Number;
+#  else
+#     define CMS_DONT_USE_INT64 1
+#  endif
+#  if (LONG_MAX == +9223372036854775807)
+      typedef  long          cmsInt64Number;
+#  elif (LLONG_MAX == +9223372036854775807)
+      typedef  long long     cmsInt64Number;
+#  else
+#     define CMS_DONT_USE_INT64 1
+#  endif
+#endif
+#endif
+
+// Handle "register" keyword
+#if defined(CMS_NO_REGISTER_KEYWORD) && !defined(CMS_DLL) && !defined(CMS_DLL_BUILD) 
+#  define CMSREGISTER
+#else
+#  define CMSREGISTER register
+#endif
+
+// In the case 64 bit numbers are not supported by the compiler
+#ifdef CMS_DONT_USE_INT64
+    typedef cmsUInt32Number      cmsUInt64Number[2];
+    typedef cmsInt32Number       cmsInt64Number[2];
+#endif
+
+// Derivative types
+typedef cmsUInt32Number      cmsSignature;
+typedef cmsUInt16Number      cmsU8Fixed8Number;
+typedef cmsInt32Number       cmsS15Fixed16Number;
+typedef cmsUInt32Number      cmsU16Fixed16Number;
+
+// Boolean type, which will be using the native integer
+typedef int                  cmsBool;
+
+// Try to detect windows
+#if defined (_WIN32) || defined(_WIN64) || defined(WIN32) || defined(_WIN32_)
+#  define CMS_IS_WINDOWS_ 1
+#endif
+
+#ifdef _MSC_VER
+#  define CMS_IS_WINDOWS_ 1
+#endif
+
+#ifdef __BORLANDC__
+#  define CMS_IS_WINDOWS_ 1
+#endif
+
+// Try to detect big endian platforms. This list can be endless, so primarily rely on the configure script
+// on Unix-like systems, and allow it to be set on the compiler command line using
+// -DCMS_USE_BIG_ENDIAN or something similar
+#ifdef CMS_USE_BIG_ENDIAN // set at compiler command line takes overall precedence
+
+#  if CMS_USE_BIG_ENDIAN == 0
+#    undef CMS_USE_BIG_ENDIAN
+#  endif
+
+#else // CMS_USE_BIG_ENDIAN
+
+#  ifdef WORDS_BIGENDIAN // set by configure (or explicitly on compiler command line)
+#    define CMS_USE_BIG_ENDIAN 1
+#  else // WORDS_BIGENDIAN
+// Fall back to platform/compiler specific tests
+#    if defined(__sgi__) || defined(__sgi) || defined(sparc)
+#      define CMS_USE_BIG_ENDIAN      1
+#    endif
+
+#    if defined(__s390__) || defined(__s390x__)
+#      define CMS_USE_BIG_ENDIAN   1
+#    endif
+
+#    ifdef macintosh
+#      ifdef __BIG_ENDIAN__
+#        define CMS_USE_BIG_ENDIAN      1
+#      endif
+#      ifdef __LITTLE_ENDIAN__
+#        undef CMS_USE_BIG_ENDIAN
+#      endif
+#    endif
+#  endif  // WORDS_BIGENDIAN
+
+#  if defined(_HOST_BIG_ENDIAN) || defined(__BIG_ENDIAN__)
+#    define CMS_USE_BIG_ENDIAN      1
+#  endif
+
+#endif  // CMS_USE_BIG_ENDIAN
+
+
+// Calling convention -- this is hardly platform and compiler dependent
+#ifdef CMS_IS_WINDOWS_
+#  if defined(CMS_DLL) || defined(CMS_DLL_BUILD)
+#     ifdef __BORLANDC__
+#        define CMSEXPORT       __stdcall _export
+#        define CMSAPI
+#     else
+#        define CMSEXPORT      __stdcall
+#        ifdef CMS_DLL_BUILD
+#            define CMSAPI    __declspec(dllexport)
+#        else
+#           define CMSAPI     __declspec(dllimport)
+#        endif
+#     endif
+#  else
+#     define CMSEXPORT
+#     define CMSAPI
+#  endif
+#else  // not Windows
+#  ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#     define CMSEXPORT
+#     define CMSAPI    __attribute__((visibility("default")))
+#  else
+#     define CMSEXPORT
+#     define CMSAPI
+#  endif
+#endif  // CMS_IS_WINDOWS_
+
+#ifdef HasTHREADS
+# if HasTHREADS == 1
+#    undef CMS_NO_PTHREADS
+# else
+#    define CMS_NO_PTHREADS 1
+# endif
+#endif
+
+// Some common definitions
+#define cmsMAX_PATH     256
+
+#ifndef FALSE
+#       define FALSE 0
+#endif
+#ifndef TRUE
+#       define TRUE  1
+#endif
+
+// D50 XYZ normalized to Y=1.0
+#define cmsD50X  0.9642
+#define cmsD50Y  1.0
+#define cmsD50Z  0.8249
+
+// V4 perceptual black
+#define cmsPERCEPTUAL_BLACK_X  0.00336
+#define cmsPERCEPTUAL_BLACK_Y  0.0034731
+#define cmsPERCEPTUAL_BLACK_Z  0.00287
+
+// Definitions in ICC spec
+#define cmsMagicNumber  0x61637370     // 'acsp'
+#define lcmsSignature   0x6c636d73     // 'lcms'
+
+
+// Base ICC type definitions
+typedef enum {
+    cmsSigChromaticityType                  = 0x6368726D,  // 'chrm'
+    cmsSigColorantOrderType                 = 0x636C726F,  // 'clro'
+    cmsSigColorantTableType                 = 0x636C7274,  // 'clrt'
+    cmsSigCrdInfoType                       = 0x63726469,  // 'crdi'
+    cmsSigCurveType                         = 0x63757276,  // 'curv'
+    cmsSigDataType                          = 0x64617461,  // 'data'
+    cmsSigDictType                          = 0x64696374,  // 'dict'
+    cmsSigDateTimeType                      = 0x6474696D,  // 'dtim'
+    cmsSigDeviceSettingsType                = 0x64657673,  // 'devs'
+    cmsSigLut16Type                         = 0x6d667432,  // 'mft2'
+    cmsSigLut8Type                          = 0x6d667431,  // 'mft1'
+    cmsSigLutAtoBType                       = 0x6d414220,  // 'mAB '
+    cmsSigLutBtoAType                       = 0x6d424120,  // 'mBA '
+    cmsSigMeasurementType                   = 0x6D656173,  // 'meas'
+    cmsSigMultiLocalizedUnicodeType         = 0x6D6C7563,  // 'mluc'
+    cmsSigMultiProcessElementType           = 0x6D706574,  // 'mpet'
+    cmsSigNamedColorType                    = 0x6E636f6C,  // 'ncol' -- DEPRECATED!
+    cmsSigNamedColor2Type                   = 0x6E636C32,  // 'ncl2'
+    cmsSigParametricCurveType               = 0x70617261,  // 'para'
+    cmsSigProfileSequenceDescType           = 0x70736571,  // 'pseq'
+    cmsSigProfileSequenceIdType             = 0x70736964,  // 'psid'
+    cmsSigResponseCurveSet16Type            = 0x72637332,  // 'rcs2'
+    cmsSigS15Fixed16ArrayType               = 0x73663332,  // 'sf32'
+    cmsSigScreeningType                     = 0x7363726E,  // 'scrn'
+    cmsSigSignatureType                     = 0x73696720,  // 'sig '
+    cmsSigTextType                          = 0x74657874,  // 'text'
+    cmsSigTextDescriptionType               = 0x64657363,  // 'desc'
+    cmsSigU16Fixed16ArrayType               = 0x75663332,  // 'uf32'
+    cmsSigUcrBgType                         = 0x62666420,  // 'bfd '
+    cmsSigUInt16ArrayType                   = 0x75693136,  // 'ui16'
+    cmsSigUInt32ArrayType                   = 0x75693332,  // 'ui32'
+    cmsSigUInt64ArrayType                   = 0x75693634,  // 'ui64'
+    cmsSigUInt8ArrayType                    = 0x75693038,  // 'ui08'
+    cmsSigVcgtType                          = 0x76636774,  // 'vcgt'
+    cmsSigViewingConditionsType             = 0x76696577,  // 'view'
+    cmsSigXYZType                           = 0x58595A20   // 'XYZ '
+
+
+} cmsTagTypeSignature;
+
+// Base ICC tag definitions
+typedef enum {
+    cmsSigAToB0Tag                          = 0x41324230,  // 'A2B0'
+    cmsSigAToB1Tag                          = 0x41324231,  // 'A2B1'
+    cmsSigAToB2Tag                          = 0x41324232,  // 'A2B2'
+    cmsSigBlueColorantTag                   = 0x6258595A,  // 'bXYZ'
+    cmsSigBlueMatrixColumnTag               = 0x6258595A,  // 'bXYZ'
+    cmsSigBlueTRCTag                        = 0x62545243,  // 'bTRC'
+    cmsSigBToA0Tag                          = 0x42324130,  // 'B2A0'
+    cmsSigBToA1Tag                          = 0x42324131,  // 'B2A1'
+    cmsSigBToA2Tag                          = 0x42324132,  // 'B2A2'
+    cmsSigCalibrationDateTimeTag            = 0x63616C74,  // 'calt'
+    cmsSigCharTargetTag                     = 0x74617267,  // 'targ'
+    cmsSigChromaticAdaptationTag            = 0x63686164,  // 'chad'
+    cmsSigChromaticityTag                   = 0x6368726D,  // 'chrm'
+    cmsSigColorantOrderTag                  = 0x636C726F,  // 'clro'
+    cmsSigColorantTableTag                  = 0x636C7274,  // 'clrt'
+    cmsSigColorantTableOutTag               = 0x636C6F74,  // 'clot'
+    cmsSigColorimetricIntentImageStateTag   = 0x63696973,  // 'ciis'
+    cmsSigCopyrightTag                      = 0x63707274,  // 'cprt'
+    cmsSigCrdInfoTag                        = 0x63726469,  // 'crdi'
+    cmsSigDataTag                           = 0x64617461,  // 'data'
+    cmsSigDateTimeTag                       = 0x6474696D,  // 'dtim'
+    cmsSigDeviceMfgDescTag                  = 0x646D6E64,  // 'dmnd'
+    cmsSigDeviceModelDescTag                = 0x646D6464,  // 'dmdd'
+    cmsSigDeviceSettingsTag                 = 0x64657673,  // 'devs'
+    cmsSigDToB0Tag                          = 0x44324230,  // 'D2B0'
+    cmsSigDToB1Tag                          = 0x44324231,  // 'D2B1'
+    cmsSigDToB2Tag                          = 0x44324232,  // 'D2B2'
+    cmsSigDToB3Tag                          = 0x44324233,  // 'D2B3'
+    cmsSigBToD0Tag                          = 0x42324430,  // 'B2D0'
+    cmsSigBToD1Tag                          = 0x42324431,  // 'B2D1'
+    cmsSigBToD2Tag                          = 0x42324432,  // 'B2D2'
+    cmsSigBToD3Tag                          = 0x42324433,  // 'B2D3'
+    cmsSigGamutTag                          = 0x67616D74,  // 'gamt'
+    cmsSigGrayTRCTag                        = 0x6b545243,  // 'kTRC'
+    cmsSigGreenColorantTag                  = 0x6758595A,  // 'gXYZ'
+    cmsSigGreenMatrixColumnTag              = 0x6758595A,  // 'gXYZ'
+    cmsSigGreenTRCTag                       = 0x67545243,  // 'gTRC'
+    cmsSigLuminanceTag                      = 0x6C756d69,  // 'lumi'
+    cmsSigMeasurementTag                    = 0x6D656173,  // 'meas'
+    cmsSigMediaBlackPointTag                = 0x626B7074,  // 'bkpt'
+    cmsSigMediaWhitePointTag                = 0x77747074,  // 'wtpt'
+    cmsSigNamedColorTag                     = 0x6E636f6C,  // 'ncol' // Deprecated by the ICC
+    cmsSigNamedColor2Tag                    = 0x6E636C32,  // 'ncl2'
+    cmsSigOutputResponseTag                 = 0x72657370,  // 'resp'
+    cmsSigPerceptualRenderingIntentGamutTag = 0x72696730,  // 'rig0'
+    cmsSigPreview0Tag                       = 0x70726530,  // 'pre0'
+    cmsSigPreview1Tag                       = 0x70726531,  // 'pre1'
+    cmsSigPreview2Tag                       = 0x70726532,  // 'pre2'
+    cmsSigProfileDescriptionTag             = 0x64657363,  // 'desc'
+    cmsSigProfileDescriptionMLTag           = 0x6473636d,  // 'dscm'
+    cmsSigProfileSequenceDescTag            = 0x70736571,  // 'pseq'
+    cmsSigProfileSequenceIdTag              = 0x70736964,  // 'psid'
+    cmsSigPs2CRD0Tag                        = 0x70736430,  // 'psd0'
+    cmsSigPs2CRD1Tag                        = 0x70736431,  // 'psd1'
+    cmsSigPs2CRD2Tag                        = 0x70736432,  // 'psd2'
+    cmsSigPs2CRD3Tag                        = 0x70736433,  // 'psd3'
+    cmsSigPs2CSATag                         = 0x70733273,  // 'ps2s'
+    cmsSigPs2RenderingIntentTag             = 0x70733269,  // 'ps2i'
+    cmsSigRedColorantTag                    = 0x7258595A,  // 'rXYZ'
+    cmsSigRedMatrixColumnTag                = 0x7258595A,  // 'rXYZ'
+    cmsSigRedTRCTag                         = 0x72545243,  // 'rTRC'
+    cmsSigSaturationRenderingIntentGamutTag = 0x72696732,  // 'rig2'
+    cmsSigScreeningDescTag                  = 0x73637264,  // 'scrd'
+    cmsSigScreeningTag                      = 0x7363726E,  // 'scrn'
+    cmsSigTechnologyTag                     = 0x74656368,  // 'tech'
+    cmsSigUcrBgTag                          = 0x62666420,  // 'bfd '
+    cmsSigViewingCondDescTag                = 0x76756564,  // 'vued'
+    cmsSigViewingConditionsTag              = 0x76696577,  // 'view'
+    cmsSigVcgtTag                           = 0x76636774,  // 'vcgt'
+    cmsSigMetaTag                           = 0x6D657461,  // 'meta'
+    cmsSigArgyllArtsTag                     = 0x61727473   // 'arts'
+
+} cmsTagSignature;
+
+
+// ICC Technology tag
+typedef enum {
+    cmsSigDigitalCamera                     = 0x6463616D,  // 'dcam'
+    cmsSigFilmScanner                       = 0x6673636E,  // 'fscn'
+    cmsSigReflectiveScanner                 = 0x7273636E,  // 'rscn'
+    cmsSigInkJetPrinter                     = 0x696A6574,  // 'ijet'
+    cmsSigThermalWaxPrinter                 = 0x74776178,  // 'twax'
+    cmsSigElectrophotographicPrinter        = 0x6570686F,  // 'epho'
+    cmsSigElectrostaticPrinter              = 0x65737461,  // 'esta'
+    cmsSigDyeSublimationPrinter             = 0x64737562,  // 'dsub'
+    cmsSigPhotographicPaperPrinter          = 0x7270686F,  // 'rpho'
+    cmsSigFilmWriter                        = 0x6670726E,  // 'fprn'
+    cmsSigVideoMonitor                      = 0x7669646D,  // 'vidm'
+    cmsSigVideoCamera                       = 0x76696463,  // 'vidc'
+    cmsSigProjectionTelevision              = 0x706A7476,  // 'pjtv'
+    cmsSigCRTDisplay                        = 0x43525420,  // 'CRT '
+    cmsSigPMDisplay                         = 0x504D4420,  // 'PMD '
+    cmsSigAMDisplay                         = 0x414D4420,  // 'AMD '
+    cmsSigPhotoCD                           = 0x4B504344,  // 'KPCD'
+    cmsSigPhotoImageSetter                  = 0x696D6773,  // 'imgs'
+    cmsSigGravure                           = 0x67726176,  // 'grav'
+    cmsSigOffsetLithography                 = 0x6F666673,  // 'offs'
+    cmsSigSilkscreen                        = 0x73696C6B,  // 'silk'
+    cmsSigFlexography                       = 0x666C6578,  // 'flex'
+    cmsSigMotionPictureFilmScanner          = 0x6D706673,  // 'mpfs'
+    cmsSigMotionPictureFilmRecorder         = 0x6D706672,  // 'mpfr'
+    cmsSigDigitalMotionPictureCamera        = 0x646D7063,  // 'dmpc'
+    cmsSigDigitalCinemaProjector            = 0x64636A70   // 'dcpj'
+
+} cmsTechnologySignature;
+
+
+// ICC Color spaces
+typedef enum {
+    cmsSigXYZData                           = 0x58595A20,  // 'XYZ '
+    cmsSigLabData                           = 0x4C616220,  // 'Lab '
+    cmsSigLuvData                           = 0x4C757620,  // 'Luv '
+    cmsSigYCbCrData                         = 0x59436272,  // 'YCbr'
+    cmsSigYxyData                           = 0x59787920,  // 'Yxy '
+    cmsSigRgbData                           = 0x52474220,  // 'RGB '
+    cmsSigGrayData                          = 0x47524159,  // 'GRAY'
+    cmsSigHsvData                           = 0x48535620,  // 'HSV '
+    cmsSigHlsData                           = 0x484C5320,  // 'HLS '
+    cmsSigCmykData                          = 0x434D594B,  // 'CMYK'
+    cmsSigCmyData                           = 0x434D5920,  // 'CMY '
+    cmsSigMCH1Data                          = 0x4D434831,  // 'MCH1'
+    cmsSigMCH2Data                          = 0x4D434832,  // 'MCH2'
+    cmsSigMCH3Data                          = 0x4D434833,  // 'MCH3'
+    cmsSigMCH4Data                          = 0x4D434834,  // 'MCH4'
+    cmsSigMCH5Data                          = 0x4D434835,  // 'MCH5'
+    cmsSigMCH6Data                          = 0x4D434836,  // 'MCH6'
+    cmsSigMCH7Data                          = 0x4D434837,  // 'MCH7'
+    cmsSigMCH8Data                          = 0x4D434838,  // 'MCH8'
+    cmsSigMCH9Data                          = 0x4D434839,  // 'MCH9'
+    cmsSigMCHAData                          = 0x4D434841,  // 'MCHA'
+    cmsSigMCHBData                          = 0x4D434842,  // 'MCHB'
+    cmsSigMCHCData                          = 0x4D434843,  // 'MCHC'
+    cmsSigMCHDData                          = 0x4D434844,  // 'MCHD'
+    cmsSigMCHEData                          = 0x4D434845,  // 'MCHE'
+    cmsSigMCHFData                          = 0x4D434846,  // 'MCHF'
+    cmsSigNamedData                         = 0x6e6d636c,  // 'nmcl'
+    cmsSig1colorData                        = 0x31434C52,  // '1CLR'
+    cmsSig2colorData                        = 0x32434C52,  // '2CLR'
+    cmsSig3colorData                        = 0x33434C52,  // '3CLR'
+    cmsSig4colorData                        = 0x34434C52,  // '4CLR'
+    cmsSig5colorData                        = 0x35434C52,  // '5CLR'
+    cmsSig6colorData                        = 0x36434C52,  // '6CLR'
+    cmsSig7colorData                        = 0x37434C52,  // '7CLR'
+    cmsSig8colorData                        = 0x38434C52,  // '8CLR'
+    cmsSig9colorData                        = 0x39434C52,  // '9CLR'
+    cmsSig10colorData                       = 0x41434C52,  // 'ACLR'
+    cmsSig11colorData                       = 0x42434C52,  // 'BCLR'
+    cmsSig12colorData                       = 0x43434C52,  // 'CCLR'
+    cmsSig13colorData                       = 0x44434C52,  // 'DCLR'
+    cmsSig14colorData                       = 0x45434C52,  // 'ECLR'
+    cmsSig15colorData                       = 0x46434C52,  // 'FCLR'
+    cmsSigLuvKData                          = 0x4C75764B   // 'LuvK'
+
+} cmsColorSpaceSignature;
+
+// ICC Profile Class
+typedef enum {
+    cmsSigInputClass                        = 0x73636E72,  // 'scnr'
+    cmsSigDisplayClass                      = 0x6D6E7472,  // 'mntr'
+    cmsSigOutputClass                       = 0x70727472,  // 'prtr'
+    cmsSigLinkClass                         = 0x6C696E6B,  // 'link'
+    cmsSigAbstractClass                     = 0x61627374,  // 'abst'
+    cmsSigColorSpaceClass                   = 0x73706163,  // 'spac'
+    cmsSigNamedColorClass                   = 0x6e6d636c   // 'nmcl'
+
+} cmsProfileClassSignature;
+
+// ICC Platforms
+typedef enum {
+    cmsSigMacintosh                         = 0x4150504C,  // 'APPL'
+    cmsSigMicrosoft                         = 0x4D534654,  // 'MSFT'
+    cmsSigSolaris                           = 0x53554E57,  // 'SUNW'
+    cmsSigSGI                               = 0x53474920,  // 'SGI '
+    cmsSigTaligent                          = 0x54474E54,  // 'TGNT'
+    cmsSigUnices                            = 0x2A6E6978   // '*nix'   // From argyll -- Not official
+
+} cmsPlatformSignature;
+
+// Reference gamut
+#define  cmsSigPerceptualReferenceMediumGamut         0x70726d67  //'prmg'
+
+// For cmsSigColorimetricIntentImageStateTag
+#define  cmsSigSceneColorimetryEstimates              0x73636F65  //'scoe'
+#define  cmsSigSceneAppearanceEstimates               0x73617065  //'sape'
+#define  cmsSigFocalPlaneColorimetryEstimates         0x66706365  //'fpce'
+#define  cmsSigReflectionHardcopyOriginalColorimetry  0x72686F63  //'rhoc'
+#define  cmsSigReflectionPrintOutputColorimetry       0x72706F63  //'rpoc'
+
+// Multi process elements types
+typedef enum {
+    cmsSigCurveSetElemType              = 0x63767374,  //'cvst'
+    cmsSigMatrixElemType                = 0x6D617466,  //'matf'
+    cmsSigCLutElemType                  = 0x636C7574,  //'clut'
+
+    cmsSigBAcsElemType                  = 0x62414353,  // 'bACS'
+    cmsSigEAcsElemType                  = 0x65414353,  // 'eACS'
+
+    // Custom from here, not in the ICC Spec
+    cmsSigXYZ2LabElemType               = 0x6C327820,  // 'l2x '
+    cmsSigLab2XYZElemType               = 0x78326C20,  // 'x2l '
+    cmsSigNamedColorElemType            = 0x6E636C20,  // 'ncl '
+    cmsSigLabV2toV4                     = 0x32203420,  // '2 4 '
+    cmsSigLabV4toV2                     = 0x34203220,  // '4 2 '
+  
+    // Identities
+    cmsSigIdentityElemType              = 0x69646E20,  // 'idn '
+
+    // Float to floatPCS
+    cmsSigLab2FloatPCS                  = 0x64326C20,  // 'd2l '
+    cmsSigFloatPCS2Lab                  = 0x6C326420,  // 'l2d '
+    cmsSigXYZ2FloatPCS                  = 0x64327820,  // 'd2x '
+    cmsSigFloatPCS2XYZ                  = 0x78326420,  // 'x2d '  
+    cmsSigClipNegativesElemType         = 0x636c7020   // 'clp '
+
+} cmsStageSignature;
+
+// Types of CurveElements
+typedef enum {
+
+    cmsSigFormulaCurveSeg               = 0x70617266, // 'parf'
+    cmsSigSampledCurveSeg               = 0x73616D66, // 'samf'
+    cmsSigSegmentedCurve                = 0x63757266  // 'curf'
+
+} cmsCurveSegSignature;
+
+// Used in ResponseCurveType
+#define  cmsSigStatusA                    0x53746141 //'StaA'
+#define  cmsSigStatusE                    0x53746145 //'StaE'
+#define  cmsSigStatusI                    0x53746149 //'StaI'
+#define  cmsSigStatusT                    0x53746154 //'StaT'
+#define  cmsSigStatusM                    0x5374614D //'StaM'
+#define  cmsSigDN                         0x444E2020 //'DN  '
+#define  cmsSigDNP                        0x444E2050 //'DN P'
+#define  cmsSigDNN                        0x444E4E20 //'DNN '
+#define  cmsSigDNNP                       0x444E4E50 //'DNNP'
+
+// Device attributes, currently defined values correspond to the low 4 bytes
+// of the 8 byte attribute quantity
+#define cmsReflective     0
+#define cmsTransparency   1
+#define cmsGlossy         0
+#define cmsMatte          2
+
+// Common structures in ICC tags
+typedef struct {
+    cmsUInt32Number len;
+    cmsUInt32Number flag;
+    cmsUInt8Number  data[1];
+
+} cmsICCData;
+
+// ICC date time
+typedef struct {
+    cmsUInt16Number      year;
+    cmsUInt16Number      month;
+    cmsUInt16Number      day;
+    cmsUInt16Number      hours;
+    cmsUInt16Number      minutes;
+    cmsUInt16Number      seconds;
+
+} cmsDateTimeNumber;
+
+// ICC XYZ
+typedef struct {
+    cmsS15Fixed16Number  X;
+    cmsS15Fixed16Number  Y;
+    cmsS15Fixed16Number  Z;
+
+} cmsEncodedXYZNumber;
+
+
+typedef union {
+    cmsUInt8Number       ID8[16];
+    cmsUInt16Number      ID16[8];
+    cmsUInt32Number      ID32[4];
+
+} cmsProfileID;
+
+
+// ----------------------------------------------------------------------------------------------
+// ICC profile internal base types. Strictly, shouldn't be declared in this header, but maybe
+// somebody want to use this info for accessing profile header directly, so here it is.
+
+// Profile header -- it is 32-bit aligned, so no issues are expected on alignment
+typedef struct {
+    cmsUInt32Number              size;           // Profile size in bytes
+    cmsSignature                 cmmId;          // CMM for this profile
+    cmsUInt32Number              version;        // Format version number
+    cmsProfileClassSignature     deviceClass;    // Type of profile
+    cmsColorSpaceSignature       colorSpace;     // Color space of data
+    cmsColorSpaceSignature       pcs;            // PCS, XYZ or Lab only
+    cmsDateTimeNumber            date;           // Date profile was created
+    cmsSignature                 magic;          // Magic Number to identify an ICC profile
+    cmsPlatformSignature         platform;       // Primary Platform
+    cmsUInt32Number              flags;          // Various bit settings
+    cmsSignature                 manufacturer;   // Device manufacturer
+    cmsUInt32Number              model;          // Device model number
+    cmsUInt64Number              attributes;     // Device attributes
+    cmsUInt32Number              renderingIntent;// Rendering intent
+    cmsEncodedXYZNumber          illuminant;     // Profile illuminant
+    cmsSignature                 creator;        // Profile creator
+    cmsProfileID                 profileID;      // Profile ID 
+    cmsInt8Number                reserved[28];   // Reserved for future use
+
+} cmsICCHeader;
+
+// ICC base tag
+typedef struct {
+    cmsTagTypeSignature  sig;
+    cmsInt8Number        reserved[4];
+
+} cmsTagBase;
+
+// A tag entry in directory
+typedef struct {
+    cmsTagSignature      sig;            // The tag signature
+    cmsUInt32Number      offset;         // Start of tag
+    cmsUInt32Number      size;           // Size in bytes
+
+} cmsTagEntry;
+
+// ----------------------------------------------------------------------------------------------
+
+// Little CMS specific typedefs
+
+typedef void* cmsHANDLE ;              // Generic handle
+typedef void* cmsHPROFILE;             // Opaque typedefs to hide internals
+typedef void* cmsHTRANSFORM;
+
+#define cmsMAXCHANNELS  16                // Maximum number of channels in ICC profiles
+
+// Format of pixel is defined by one cmsUInt32Number, using bit fields as follows
+//
+//                               2                1          0
+//                          3 2 10987 6 5 4 3 2 1 098 7654 321
+//                          A O TTTTT U Y F P X S EEE CCCC BBB
+//
+//            A: Floating point -- With this flag we can differentiate 16 bits as float and as int
+//            O: Optimized -- previous optimization already returns the final 8-bit value
+//            T: Pixeltype
+//            F: Flavor  0=MinIsBlack(Chocolate) 1=MinIsWhite(Vanilla)
+//            P: Planar? 0=Chunky, 1=Planar
+//            X: swap 16 bps endianness?
+//            S: Do swap? ie, BGR, KYMC
+//            E: Extra samples
+//            C: Channels (Samples per pixel)
+//            B: bytes per sample
+//            Y: Swap first - changes ABGR to BGRA and KCMY to CMYK
+
+#define FLOAT_SH(a)            ((a) << 22)
+#define OPTIMIZED_SH(s)        ((s) << 21)
+#define COLORSPACE_SH(s)       ((s) << 16)
+#define SWAPFIRST_SH(s)        ((s) << 14)
+#define FLAVOR_SH(s)           ((s) << 13)
+#define PLANAR_SH(p)           ((p) << 12)
+#define ENDIAN16_SH(e)         ((e) << 11)
+#define DOSWAP_SH(e)           ((e) << 10)
+#define EXTRA_SH(e)            ((e) << 7)
+#define CHANNELS_SH(c)         ((c) << 3)
+#define BYTES_SH(b)            (b)
+
+// These macros unpack format specifiers into integers
+#define T_FLOAT(a)            (((a)>>22)&1)
+#define T_OPTIMIZED(o)        (((o)>>21)&1)
+#define T_COLORSPACE(s)       (((s)>>16)&31)
+#define T_SWAPFIRST(s)        (((s)>>14)&1)
+#define T_FLAVOR(s)           (((s)>>13)&1)
+#define T_PLANAR(p)           (((p)>>12)&1)
+#define T_ENDIAN16(e)         (((e)>>11)&1)
+#define T_DOSWAP(e)           (((e)>>10)&1)
+#define T_EXTRA(e)            (((e)>>7)&7)
+#define T_CHANNELS(c)         (((c)>>3)&15)
+#define T_BYTES(b)            ((b)&7)
+
+
+// Pixel types
+#define PT_ANY       0    // Don't check colorspace
+                          // 1 & 2 are reserved
+#define PT_GRAY      3
+#define PT_RGB       4
+#define PT_CMY       5
+#define PT_CMYK      6
+#define PT_YCbCr     7
+#define PT_YUV       8      // Lu'v'
+#define PT_XYZ       9
+#define PT_Lab       10
+#define PT_YUVK      11     // Lu'v'K
+#define PT_HSV       12
+#define PT_HLS       13
+#define PT_Yxy       14
+
+#define PT_MCH1      15
+#define PT_MCH2      16
+#define PT_MCH3      17
+#define PT_MCH4      18
+#define PT_MCH5      19
+#define PT_MCH6      20
+#define PT_MCH7      21
+#define PT_MCH8      22
+#define PT_MCH9      23
+#define PT_MCH10     24
+#define PT_MCH11     25
+#define PT_MCH12     26
+#define PT_MCH13     27
+#define PT_MCH14     28
+#define PT_MCH15     29
+
+#define PT_LabV2     30     // Identical to PT_Lab, but using the V2 old encoding
+
+// Some (not all!) representations
+
+#ifndef TYPE_RGB_8      // TYPE_RGB_8 is a very common identifier, so don't include ours
+                        // if user has it already defined.
+
+#define TYPE_GRAY_8            (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(1))
+#define TYPE_GRAY_8_REV        (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(1)|FLAVOR_SH(1))
+#define TYPE_GRAY_16           (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2))
+#define TYPE_GRAY_16_REV       (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2)|FLAVOR_SH(1))
+#define TYPE_GRAY_16_SE        (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_GRAYA_8           (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(1))
+#define TYPE_GRAYA_16          (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(2))
+#define TYPE_GRAYA_16_SE       (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_GRAYA_8_PLANAR    (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_GRAYA_16_PLANAR   (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(2)|PLANAR_SH(1))
+
+#define TYPE_RGB_8             (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_RGB_8_PLANAR      (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_BGR_8             (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_BGR_8_PLANAR      (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_RGB_16            (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_RGB_16_PLANAR     (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_RGB_16_SE         (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_BGR_16            (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_BGR_16_PLANAR     (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_BGR_16_SE         (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+#define TYPE_RGBA_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_RGBA_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_RGBA_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_RGBA_16_PLANAR    (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_RGBA_16_SE        (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+#define TYPE_ARGB_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ARGB_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|SWAPFIRST_SH(1)|PLANAR_SH(1))
+#define TYPE_ARGB_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|SWAPFIRST_SH(1))
+
+#define TYPE_ABGR_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_ABGR_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_ABGR_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_ABGR_16_PLANAR    (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_ABGR_16_SE        (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+#define TYPE_BGRA_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_BGRA_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1)|PLANAR_SH(1))
+#define TYPE_BGRA_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_BGRA_16_SE        (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+
+#define TYPE_CMY_8             (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_CMY_8_PLANAR      (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_CMY_16            (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_CMY_16_PLANAR     (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_CMY_16_SE         (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+#define TYPE_CMYK_8            (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1))
+#define TYPE_CMYKA_8           (COLORSPACE_SH(PT_CMYK)|EXTRA_SH(1)|CHANNELS_SH(4)|BYTES_SH(1))
+#define TYPE_CMYK_8_REV        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1))
+#define TYPE_YUVK_8            TYPE_CMYK_8_REV
+#define TYPE_CMYK_8_PLANAR     (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_CMYK_16           (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2))
+#define TYPE_CMYK_16_REV       (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1))
+#define TYPE_YUVK_16           TYPE_CMYK_16_REV
+#define TYPE_CMYK_16_PLANAR    (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_CMYK_16_SE        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+#define TYPE_KYMC_8            (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC_16           (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC_16_SE        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+#define TYPE_KCMY_8            (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_8_REV        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_16           (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_16_REV       (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_16_SE        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|ENDIAN16_SH(1)|SWAPFIRST_SH(1))
+
+#define TYPE_CMYK5_8           (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(1))
+#define TYPE_CMYK5_16          (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2))
+#define TYPE_CMYK5_16_SE       (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC5_8           (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC5_16          (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC5_16_SE       (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK6_8           (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(1))
+#define TYPE_CMYK6_8_PLANAR    (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_CMYK6_16          (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(2))
+#define TYPE_CMYK6_16_PLANAR   (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_CMYK6_16_SE       (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_CMYK7_8           (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(1))
+#define TYPE_CMYK7_16          (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2))
+#define TYPE_CMYK7_16_SE       (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC7_8           (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC7_16          (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC7_16_SE       (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK8_8           (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(1))
+#define TYPE_CMYK8_16          (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2))
+#define TYPE_CMYK8_16_SE       (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC8_8           (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC8_16          (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC8_16_SE       (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK9_8           (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(1))
+#define TYPE_CMYK9_16          (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2))
+#define TYPE_CMYK9_16_SE       (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC9_8           (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC9_16          (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC9_16_SE       (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK10_8          (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(1))
+#define TYPE_CMYK10_16         (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2))
+#define TYPE_CMYK10_16_SE      (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC10_8          (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC10_16         (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC10_16_SE      (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK11_8          (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(1))
+#define TYPE_CMYK11_16         (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2))
+#define TYPE_CMYK11_16_SE      (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC11_8          (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC11_16         (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC11_16_SE      (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK12_8          (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(1))
+#define TYPE_CMYK12_16         (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2))
+#define TYPE_CMYK12_16_SE      (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC12_8          (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC12_16         (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC12_16_SE      (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+// Colorimetric
+#define TYPE_XYZ_16            (COLORSPACE_SH(PT_XYZ)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_Lab_8             (COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_LabV2_8           (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(1))
+
+#define TYPE_ALab_8            (COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ALabV2_8          (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_Lab_16            (COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_LabV2_16          (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_Yxy_16            (COLORSPACE_SH(PT_Yxy)|CHANNELS_SH(3)|BYTES_SH(2))
+
+// YCbCr
+#define TYPE_YCbCr_8           (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_YCbCr_8_PLANAR    (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_YCbCr_16          (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_YCbCr_16_PLANAR   (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_YCbCr_16_SE       (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// YUV
+#define TYPE_YUV_8             (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_YUV_8_PLANAR      (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_YUV_16            (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_YUV_16_PLANAR     (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_YUV_16_SE         (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// HLS
+#define TYPE_HLS_8             (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_HLS_8_PLANAR      (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_HLS_16            (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_HLS_16_PLANAR     (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_HLS_16_SE         (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// HSV
+#define TYPE_HSV_8             (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_HSV_8_PLANAR      (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_HSV_16            (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_HSV_16_PLANAR     (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_HSV_16_SE         (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// Named color index. Only 16 bits allowed (don't check colorspace)
+#define TYPE_NAMED_COLOR_INDEX (CHANNELS_SH(1)|BYTES_SH(2))
+
+// Float formatters.
+#define TYPE_XYZ_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_XYZ)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_Lab_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_LabA_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_Lab)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_GRAY_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(4))
+#define TYPE_RGB_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4))
+
+#define TYPE_RGBA_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_ARGB_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4)|SWAPFIRST_SH(1))
+#define TYPE_BGR_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|DOSWAP_SH(1))
+#define TYPE_BGRA_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ABGR_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4)|DOSWAP_SH(1))
+
+#define TYPE_CMYK_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(4))
+
+// Floating point formatters.
+// NOTE THAT 'BYTES' FIELD IS SET TO ZERO ON DLB because 8 bytes overflows the bitfield
+#define TYPE_XYZ_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_XYZ)|CHANNELS_SH(3)|BYTES_SH(0))
+#define TYPE_Lab_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(0))
+#define TYPE_GRAY_DBL         (FLOAT_SH(1)|COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(0))
+#define TYPE_RGB_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(0))
+#define TYPE_BGR_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(0)|DOSWAP_SH(1))
+#define TYPE_CMYK_DBL         (FLOAT_SH(1)|COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(0))
+
+// IEEE 754-2008 "half"
+#define TYPE_GRAY_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2))
+#define TYPE_RGB_HALF_FLT     (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_RGBA_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_CMYK_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2))
+
+#define TYPE_RGBA_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_ARGB_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|SWAPFIRST_SH(1))
+#define TYPE_BGR_HALF_FLT     (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_BGRA_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ABGR_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+
+#endif
+
+// Colorspaces
+typedef struct {
+        cmsFloat64Number X;
+        cmsFloat64Number Y;
+        cmsFloat64Number Z;
+
+    } cmsCIEXYZ;
+
+typedef struct {
+        cmsFloat64Number x;
+        cmsFloat64Number y;
+        cmsFloat64Number Y;
+
+    } cmsCIExyY;
+
+typedef struct {
+        cmsFloat64Number L;
+        cmsFloat64Number a;
+        cmsFloat64Number b;
+
+    } cmsCIELab;
+
+typedef struct {
+        cmsFloat64Number L;
+        cmsFloat64Number C;
+        cmsFloat64Number h;
+
+    } cmsCIELCh;
+
+typedef struct {
+        cmsFloat64Number J;
+        cmsFloat64Number C;
+        cmsFloat64Number h;
+
+    } cmsJCh;
+
+typedef struct {
+        cmsCIEXYZ  Red;
+        cmsCIEXYZ  Green;
+        cmsCIEXYZ  Blue;
+
+    } cmsCIEXYZTRIPLE;
+
+typedef struct {
+        cmsCIExyY  Red;
+        cmsCIExyY  Green;
+        cmsCIExyY  Blue;
+
+    } cmsCIExyYTRIPLE;
+
+// Illuminant types for structs below
+#define cmsILLUMINANT_TYPE_UNKNOWN 0x0000000
+#define cmsILLUMINANT_TYPE_D50     0x0000001
+#define cmsILLUMINANT_TYPE_D65     0x0000002
+#define cmsILLUMINANT_TYPE_D93     0x0000003
+#define cmsILLUMINANT_TYPE_F2      0x0000004
+#define cmsILLUMINANT_TYPE_D55     0x0000005
+#define cmsILLUMINANT_TYPE_A       0x0000006
+#define cmsILLUMINANT_TYPE_E       0x0000007
+#define cmsILLUMINANT_TYPE_F8      0x0000008
+
+typedef struct {
+        cmsUInt32Number  Observer;    // 0 = unknown, 1=CIE 1931, 2=CIE 1964
+        cmsCIEXYZ        Backing;     // Value of backing
+        cmsUInt32Number  Geometry;    // 0=unknown, 1=45/0, 0/45 2=0d, d/0
+        cmsFloat64Number Flare;       // 0..1.0
+        cmsUInt32Number  IlluminantType;
+
+    } cmsICCMeasurementConditions;
+
+typedef struct {
+        cmsCIEXYZ       IlluminantXYZ;   // Not the same struct as CAM02,
+        cmsCIEXYZ       SurroundXYZ;     // This is for storing the tag
+        cmsUInt32Number IlluminantType;  // viewing condition
+
+    } cmsICCViewingConditions;
+
+// Get LittleCMS version (for shared objects) -----------------------------------------------------------------------------
+
+CMSAPI int               CMSEXPORT cmsGetEncodedCMMversion(void);
+
+// Support of non-standard functions --------------------------------------------------------------------------------------
+
+CMSAPI int               CMSEXPORT cmsstrcasecmp(const char* s1, const char* s2);
+CMSAPI long int          CMSEXPORT cmsfilelength(FILE* f);
+
+
+// Context handling --------------------------------------------------------------------------------------------------------
+
+// Each context holds its owns globals and its own plug-ins. There is a global context with the id = 0 for lecacy compatibility
+// though using the global context is not recommended. Proper context handling makes lcms more thread-safe.
+
+typedef struct _cmsContext_struct* cmsContext;
+
+CMSAPI cmsContext       CMSEXPORT cmsCreateContext(void* Plugin, void* UserData);
+CMSAPI void             CMSEXPORT cmsDeleteContext(cmsContext ContexID);
+CMSAPI cmsContext       CMSEXPORT cmsDupContext(cmsContext ContextID, void* NewUserData);
+CMSAPI void*            CMSEXPORT cmsGetContextUserData(cmsContext ContextID);
+
+// Plug-In registering  --------------------------------------------------------------------------------------------------
+
+CMSAPI cmsBool           CMSEXPORT cmsPlugin(void* Plugin);
+CMSAPI cmsBool           CMSEXPORT cmsPluginTHR(cmsContext ContextID, void* Plugin);
+CMSAPI void              CMSEXPORT cmsUnregisterPlugins(void);
+CMSAPI void              CMSEXPORT cmsUnregisterPluginsTHR(cmsContext ContextID);
+
+// Error logging ----------------------------------------------------------------------------------------------------------
+
+// There is no error handling at all. When a function fails, it returns proper value.
+// For example, all create functions does return NULL on failure. Other may return FALSE.
+// It may be interesting, for the developer, to know why the function is failing.
+// for that reason, lcms2 does offer a logging function. This function will get
+// an ENGLISH string with some clues on what is going wrong. You can show this
+// info to the end user if you wish, or just create some sort of log on disk.
+// The logging function should NOT terminate the program, as this obviously can leave
+// unfreed resources. It is the programmer's responsibility to check each function
+// return code to make sure it didn't fail.
+
+#define cmsERROR_UNDEFINED                    0
+#define cmsERROR_FILE                         1
+#define cmsERROR_RANGE                        2
+#define cmsERROR_INTERNAL                     3
+#define cmsERROR_NULL                         4
+#define cmsERROR_READ                         5
+#define cmsERROR_SEEK                         6
+#define cmsERROR_WRITE                        7
+#define cmsERROR_UNKNOWN_EXTENSION            8
+#define cmsERROR_COLORSPACE_CHECK             9
+#define cmsERROR_ALREADY_DEFINED              10
+#define cmsERROR_BAD_SIGNATURE                11
+#define cmsERROR_CORRUPTION_DETECTED          12
+#define cmsERROR_NOT_SUITABLE                 13
+
+// Error logger is called with the ContextID when a message is raised. This gives the
+// chance to know which thread is responsible of the warning and any environment associated
+// with it. Non-multithreading applications may safely ignore this parameter.
+// Note that under certain special circumstances, ContextID may be NULL.
+typedef void  (* cmsLogErrorHandlerFunction)(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text);
+
+// Allows user to set any specific logger
+CMSAPI void              CMSEXPORT cmsSetLogErrorHandler(cmsLogErrorHandlerFunction Fn);
+CMSAPI void              CMSEXPORT cmsSetLogErrorHandlerTHR(cmsContext ContextID, cmsLogErrorHandlerFunction Fn);
+
+// Conversions --------------------------------------------------------------------------------------------------------------
+
+// Returns pointers to constant structs
+CMSAPI const cmsCIEXYZ*  CMSEXPORT cmsD50_XYZ(void);
+CMSAPI const cmsCIExyY*  CMSEXPORT cmsD50_xyY(void);
+
+// Colorimetric space conversions
+CMSAPI void              CMSEXPORT cmsXYZ2xyY(cmsCIExyY* Dest, const cmsCIEXYZ* Source);
+CMSAPI void              CMSEXPORT cmsxyY2XYZ(cmsCIEXYZ* Dest, const cmsCIExyY* Source);
+CMSAPI void              CMSEXPORT cmsXYZ2Lab(const cmsCIEXYZ* WhitePoint, cmsCIELab* Lab, const cmsCIEXYZ* xyz);
+CMSAPI void              CMSEXPORT cmsLab2XYZ(const cmsCIEXYZ* WhitePoint, cmsCIEXYZ* xyz, const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsLab2LCh(cmsCIELCh*LCh, const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsLCh2Lab(cmsCIELab* Lab, const cmsCIELCh* LCh);
+
+// Encoding /Decoding on PCS
+CMSAPI void              CMSEXPORT cmsLabEncoded2Float(cmsCIELab* Lab, const cmsUInt16Number wLab[3]);
+CMSAPI void              CMSEXPORT cmsLabEncoded2FloatV2(cmsCIELab* Lab, const cmsUInt16Number wLab[3]);
+CMSAPI void              CMSEXPORT cmsFloat2LabEncoded(cmsUInt16Number wLab[3], const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsFloat2LabEncodedV2(cmsUInt16Number wLab[3], const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsXYZEncoded2Float(cmsCIEXYZ* fxyz, const cmsUInt16Number XYZ[3]);
+CMSAPI void              CMSEXPORT cmsFloat2XYZEncoded(cmsUInt16Number XYZ[3], const cmsCIEXYZ* fXYZ);
+
+// DeltaE metrics
+CMSAPI cmsFloat64Number  CMSEXPORT cmsDeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsCIE94DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsBFDdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsCMCdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2, cmsFloat64Number l, cmsFloat64Number c);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsCIE2000DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2, cmsFloat64Number Kl, cmsFloat64Number Kc, cmsFloat64Number Kh);
+
+// Temperature <-> Chromaticity (Black body)
+CMSAPI cmsBool           CMSEXPORT cmsWhitePointFromTemp(cmsCIExyY* WhitePoint, cmsFloat64Number  TempK);
+CMSAPI cmsBool           CMSEXPORT cmsTempFromWhitePoint(cmsFloat64Number* TempK, const cmsCIExyY* WhitePoint);
+
+// Chromatic adaptation
+CMSAPI cmsBool           CMSEXPORT cmsAdaptToIlluminant(cmsCIEXYZ* Result, const cmsCIEXYZ* SourceWhitePt,
+                                                                           const cmsCIEXYZ* Illuminant,
+                                                                           const cmsCIEXYZ* Value);
+
+// CIECAM02 ---------------------------------------------------------------------------------------------------
+
+// Viewing conditions. Please note those are CAM model viewing conditions, and not the ICC tag viewing
+// conditions, which I'm naming cmsICCViewingConditions to make differences evident. Unfortunately, the tag
+// cannot deal with surround La, Yb and D value so is basically useless to store CAM02 viewing conditions.
+
+
+#define AVG_SURROUND       1
+#define DIM_SURROUND       2
+#define DARK_SURROUND      3
+#define CUTSHEET_SURROUND  4
+
+#define D_CALCULATE        (-1)
+
+typedef struct {
+    cmsCIEXYZ        whitePoint;
+    cmsFloat64Number Yb;
+    cmsFloat64Number La;
+    cmsUInt32Number  surround;
+    cmsFloat64Number D_value;
+
+    } cmsViewingConditions;
+
+CMSAPI cmsHANDLE         CMSEXPORT cmsCIECAM02Init(cmsContext ContextID, const cmsViewingConditions* pVC);
+CMSAPI void              CMSEXPORT cmsCIECAM02Done(cmsHANDLE hModel);
+CMSAPI void              CMSEXPORT cmsCIECAM02Forward(cmsHANDLE hModel, const cmsCIEXYZ* pIn, cmsJCh* pOut);
+CMSAPI void              CMSEXPORT cmsCIECAM02Reverse(cmsHANDLE hModel, const cmsJCh* pIn,    cmsCIEXYZ* pOut);
+
+
+// Tone curves -----------------------------------------------------------------------------------------
+
+// This describes a curve segment. For a table of supported types, see the manual. User can increase the number of
+// available types by using a proper plug-in. Parametric segments allow 10 parameters at most
+
+typedef struct {
+    cmsFloat32Number   x0, x1;           // Domain; for x0 < x <= x1
+    cmsInt32Number     Type;             // Parametric type, Type == 0 means sampled segment. Negative values are reserved
+    cmsFloat64Number   Params[10];       // Parameters if Type != 0
+    cmsUInt32Number    nGridPoints;      // Number of grid points if Type == 0
+    cmsFloat32Number*  SampledPoints;    // Points to an array of floats if Type == 0
+
+} cmsCurveSegment;
+
+// The internal representation is none of your business.
+typedef struct _cms_curve_struct cmsToneCurve;
+
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildSegmentedToneCurve(cmsContext ContextID, cmsUInt32Number nSegments, const cmsCurveSegment Segments[]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildParametricToneCurve(cmsContext ContextID, cmsInt32Number Type, const cmsFloat64Number Params[]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildGamma(cmsContext ContextID, cmsFloat64Number Gamma);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildTabulatedToneCurve16(cmsContext ContextID, cmsUInt32Number nEntries, const cmsUInt16Number values[]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildTabulatedToneCurveFloat(cmsContext ContextID, cmsUInt32Number nEntries, const cmsFloat32Number values[]);
+CMSAPI void              CMSEXPORT cmsFreeToneCurve(cmsToneCurve* Curve);
+CMSAPI void              CMSEXPORT cmsFreeToneCurveTriple(cmsToneCurve* Curve[3]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsDupToneCurve(const cmsToneCurve* Src);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsReverseToneCurve(const cmsToneCurve* InGamma);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsReverseToneCurveEx(cmsUInt32Number nResultSamples, const cmsToneCurve* InGamma);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsJoinToneCurve(cmsContext ContextID, const cmsToneCurve* X,  const cmsToneCurve* Y, cmsUInt32Number nPoints);
+CMSAPI cmsBool           CMSEXPORT cmsSmoothToneCurve(cmsToneCurve* Tab, cmsFloat64Number lambda);
+CMSAPI cmsFloat32Number  CMSEXPORT cmsEvalToneCurveFloat(const cmsToneCurve* Curve, cmsFloat32Number v);
+CMSAPI cmsUInt16Number   CMSEXPORT cmsEvalToneCurve16(const cmsToneCurve* Curve, cmsUInt16Number v);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveMultisegment(const cmsToneCurve* InGamma);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveLinear(const cmsToneCurve* Curve);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveMonotonic(const cmsToneCurve* t);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveDescending(const cmsToneCurve* t);
+CMSAPI cmsInt32Number    CMSEXPORT cmsGetToneCurveParametricType(const cmsToneCurve* t);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsEstimateGamma(const cmsToneCurve* t, cmsFloat64Number Precision);
+
+// Tone curve tabular estimation
+CMSAPI cmsUInt32Number         CMSEXPORT cmsGetToneCurveEstimatedTableEntries(const cmsToneCurve* t);
+CMSAPI const cmsUInt16Number*  CMSEXPORT cmsGetToneCurveEstimatedTable(const cmsToneCurve* t);
+
+
+// Implements pipelines of multi-processing elements -------------------------------------------------------------
+
+// Nothing to see here, move along
+typedef struct _cmsPipeline_struct cmsPipeline;
+typedef struct _cmsStage_struct cmsStage;
+
+// Those are hi-level pipelines
+CMSAPI cmsPipeline*      CMSEXPORT cmsPipelineAlloc(cmsContext ContextID, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels);
+CMSAPI void              CMSEXPORT cmsPipelineFree(cmsPipeline* lut);
+CMSAPI cmsPipeline*      CMSEXPORT cmsPipelineDup(const cmsPipeline* Orig);
+
+CMSAPI cmsContext        CMSEXPORT cmsGetPipelineContextID(const cmsPipeline* lut);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsPipelineInputChannels(const cmsPipeline* lut);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsPipelineOutputChannels(const cmsPipeline* lut);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsPipelineStageCount(const cmsPipeline* lut);
+CMSAPI cmsStage*         CMSEXPORT cmsPipelineGetPtrToFirstStage(const cmsPipeline* lut);
+CMSAPI cmsStage*         CMSEXPORT cmsPipelineGetPtrToLastStage(const cmsPipeline* lut);
+
+CMSAPI void              CMSEXPORT cmsPipelineEval16(const cmsUInt16Number In[], cmsUInt16Number Out[], const cmsPipeline* lut);
+CMSAPI void              CMSEXPORT cmsPipelineEvalFloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsPipeline* lut);
+CMSAPI cmsBool           CMSEXPORT cmsPipelineEvalReverseFloat(cmsFloat32Number Target[], cmsFloat32Number Result[], cmsFloat32Number Hint[], const cmsPipeline* lut);
+CMSAPI cmsBool           CMSEXPORT cmsPipelineCat(cmsPipeline* l1, const cmsPipeline* l2);
+CMSAPI cmsBool           CMSEXPORT cmsPipelineSetSaveAs8bitsFlag(cmsPipeline* lut, cmsBool On);
+
+// Where to place/locate the stages in the pipeline chain
+typedef enum { cmsAT_BEGIN, cmsAT_END } cmsStageLoc;
+
+CMSAPI cmsBool           CMSEXPORT cmsPipelineInsertStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage* mpe);
+CMSAPI void              CMSEXPORT cmsPipelineUnlinkStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage** mpe);
+
+// This function is quite useful to analyze the structure of a Pipeline and retrieve the Stage elements
+// that conform the Pipeline. It should be called with the Pipeline, the number of expected elements and
+// then a list of expected types followed with a list of double pointers to Stage elements. If
+// the function founds a match with current pipeline, it fills the pointers and returns TRUE
+// if not, returns FALSE without touching anything.
+CMSAPI cmsBool           CMSEXPORT cmsPipelineCheckAndRetreiveStages(const cmsPipeline* Lut, cmsUInt32Number n, ...);
+
+// Matrix has double precision and CLUT has only float precision. That is because an ICC profile can encode
+// matrices with far more precision that CLUTS
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocIdentity(cmsContext ContextID, cmsUInt32Number nChannels);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocToneCurves(cmsContext ContextID, cmsUInt32Number nChannels, cmsToneCurve* const Curves[]);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocMatrix(cmsContext ContextID, cmsUInt32Number Rows, cmsUInt32Number Cols, const cmsFloat64Number* Matrix, const cmsFloat64Number* Offset);
+
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLut16bit(cmsContext ContextID, cmsUInt32Number nGridPoints, cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsUInt16Number* Table);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLutFloat(cmsContext ContextID, cmsUInt32Number nGridPoints, cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table);
+
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLut16bitGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsUInt16Number* Table);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLutFloatGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table);
+
+CMSAPI cmsStage*         CMSEXPORT cmsStageDup(cmsStage* mpe);
+CMSAPI void              CMSEXPORT cmsStageFree(cmsStage* mpe);
+CMSAPI cmsStage*         CMSEXPORT cmsStageNext(const cmsStage* mpe);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsStageInputChannels(const cmsStage* mpe);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsStageOutputChannels(const cmsStage* mpe);
+CMSAPI cmsStageSignature CMSEXPORT cmsStageType(const cmsStage* mpe);
+CMSAPI void*             CMSEXPORT cmsStageData(const cmsStage* mpe);
+
+// Sampling
+typedef cmsInt32Number (* cmsSAMPLER16)   (CMSREGISTER const cmsUInt16Number In[],
+                                           CMSREGISTER cmsUInt16Number Out[],
+                                           CMSREGISTER void * Cargo);
+
+typedef cmsInt32Number (* cmsSAMPLERFLOAT)(CMSREGISTER const cmsFloat32Number In[],
+                                           CMSREGISTER cmsFloat32Number Out[],
+                                           CMSREGISTER void * Cargo);
+
+// Use this flag to prevent changes being written to destination
+#define SAMPLER_INSPECT     0x01000000
+
+// For CLUT only
+CMSAPI cmsBool           CMSEXPORT cmsStageSampleCLut16bit(cmsStage* mpe,    cmsSAMPLER16 Sampler, void* Cargo, cmsUInt32Number dwFlags);
+CMSAPI cmsBool           CMSEXPORT cmsStageSampleCLutFloat(cmsStage* mpe, cmsSAMPLERFLOAT Sampler, void* Cargo, cmsUInt32Number dwFlags);
+
+// Slicers
+CMSAPI cmsBool           CMSEXPORT cmsSliceSpace16(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                                   cmsSAMPLER16 Sampler, void * Cargo);
+
+CMSAPI cmsBool           CMSEXPORT cmsSliceSpaceFloat(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                                   cmsSAMPLERFLOAT Sampler, void * Cargo);
+
+// Multilocalized Unicode management ---------------------------------------------------------------------------------------
+
+typedef struct _cms_MLU_struct cmsMLU;
+
+#define  cmsNoLanguage "\0\0"
+#define  cmsNoCountry  "\0\0"
+
+CMSAPI cmsMLU*           CMSEXPORT cmsMLUalloc(cmsContext ContextID, cmsUInt32Number nItems);
+CMSAPI void              CMSEXPORT cmsMLUfree(cmsMLU* mlu);
+CMSAPI cmsMLU*           CMSEXPORT cmsMLUdup(const cmsMLU* mlu);
+
+CMSAPI cmsBool           CMSEXPORT cmsMLUsetASCII(cmsMLU* mlu,
+                                                  const char LanguageCode[3], const char CountryCode[3],
+                                                  const char* ASCIIString);
+CMSAPI cmsBool           CMSEXPORT cmsMLUsetWide(cmsMLU* mlu,
+                                                  const char LanguageCode[3], const char CountryCode[3],
+                                                  const wchar_t* WideString);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsMLUgetASCII(const cmsMLU* mlu,
+                                                  const char LanguageCode[3], const char CountryCode[3],
+                                                  char* Buffer,    cmsUInt32Number BufferSize);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsMLUgetWide(const cmsMLU* mlu,
+                                                 const char LanguageCode[3], const char CountryCode[3],
+                                                 wchar_t* Buffer, cmsUInt32Number BufferSize);
+
+CMSAPI cmsBool           CMSEXPORT cmsMLUgetTranslation(const cmsMLU* mlu,
+                                                         const char LanguageCode[3], const char CountryCode[3],
+                                                         char ObtainedLanguage[3], char ObtainedCountry[3]);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsMLUtranslationsCount(const cmsMLU* mlu);
+
+CMSAPI cmsBool           CMSEXPORT cmsMLUtranslationsCodes(const cmsMLU* mlu,
+                                                             cmsUInt32Number idx,
+                                                             char LanguageCode[3],
+                                                             char CountryCode[3]);
+ 
+// Undercolorremoval & black generation -------------------------------------------------------------------------------------
+
+typedef struct {
+        cmsToneCurve* Ucr;
+        cmsToneCurve* Bg;
+        cmsMLU*       Desc;
+
+} cmsUcrBg;
+
+// Screening ----------------------------------------------------------------------------------------------------------------
+
+#define cmsPRINTER_DEFAULT_SCREENS     0x0001
+#define cmsFREQUENCE_UNITS_LINES_CM    0x0000
+#define cmsFREQUENCE_UNITS_LINES_INCH  0x0002
+
+#define cmsSPOT_UNKNOWN         0
+#define cmsSPOT_PRINTER_DEFAULT 1
+#define cmsSPOT_ROUND           2
+#define cmsSPOT_DIAMOND         3
+#define cmsSPOT_ELLIPSE         4
+#define cmsSPOT_LINE            5
+#define cmsSPOT_SQUARE          6
+#define cmsSPOT_CROSS           7
+
+typedef struct {
+    cmsFloat64Number  Frequency;
+    cmsFloat64Number  ScreenAngle;
+    cmsUInt32Number   SpotShape;
+
+} cmsScreeningChannel;
+
+typedef struct {
+    cmsUInt32Number Flag;
+    cmsUInt32Number nChannels;
+    cmsScreeningChannel Channels[cmsMAXCHANNELS];
+
+} cmsScreening;
+
+
+// Named color -----------------------------------------------------------------------------------------------------------------
+
+typedef struct _cms_NAMEDCOLORLIST_struct cmsNAMEDCOLORLIST;
+
+CMSAPI cmsNAMEDCOLORLIST* CMSEXPORT cmsAllocNamedColorList(cmsContext ContextID,
+                                                           cmsUInt32Number n,
+                                                           cmsUInt32Number ColorantCount,
+                                                           const char* Prefix, const char* Suffix);
+
+CMSAPI void               CMSEXPORT cmsFreeNamedColorList(cmsNAMEDCOLORLIST* v);
+CMSAPI cmsNAMEDCOLORLIST* CMSEXPORT cmsDupNamedColorList(const cmsNAMEDCOLORLIST* v);
+CMSAPI cmsBool            CMSEXPORT cmsAppendNamedColor(cmsNAMEDCOLORLIST* v, const char* Name,
+                                                            cmsUInt16Number PCS[3],
+                                                            cmsUInt16Number Colorant[cmsMAXCHANNELS]);
+
+CMSAPI cmsUInt32Number    CMSEXPORT cmsNamedColorCount(const cmsNAMEDCOLORLIST* v);
+CMSAPI cmsInt32Number     CMSEXPORT cmsNamedColorIndex(const cmsNAMEDCOLORLIST* v, const char* Name);
+
+CMSAPI cmsBool            CMSEXPORT cmsNamedColorInfo(const cmsNAMEDCOLORLIST* NamedColorList, cmsUInt32Number nColor,
+                                                      char* Name,
+                                                      char* Prefix,
+                                                      char* Suffix,
+                                                      cmsUInt16Number* PCS,
+                                                      cmsUInt16Number* Colorant);
+
+// Retrieve named color list from transform
+CMSAPI cmsNAMEDCOLORLIST* CMSEXPORT cmsGetNamedColorList(cmsHTRANSFORM xform);
+
+// Profile sequence -----------------------------------------------------------------------------------------------------
+
+// Profile sequence descriptor. Some fields come from profile sequence descriptor tag, others
+// come from Profile Sequence Identifier Tag
+typedef struct {
+
+    cmsSignature           deviceMfg;
+    cmsSignature           deviceModel;
+    cmsUInt64Number        attributes;
+    cmsTechnologySignature technology;
+    cmsProfileID           ProfileID;
+    cmsMLU*                Manufacturer;
+    cmsMLU*                Model;
+    cmsMLU*                Description;
+
+} cmsPSEQDESC;
+
+typedef struct {
+
+    cmsUInt32Number n;
+    cmsContext      ContextID;
+    cmsPSEQDESC*    seq;
+
+} cmsSEQ;
+
+CMSAPI cmsSEQ*           CMSEXPORT cmsAllocProfileSequenceDescription(cmsContext ContextID, cmsUInt32Number n);
+CMSAPI cmsSEQ*           CMSEXPORT cmsDupProfileSequenceDescription(const cmsSEQ* pseq);
+CMSAPI void              CMSEXPORT cmsFreeProfileSequenceDescription(cmsSEQ* pseq);
+
+// Dictionaries --------------------------------------------------------------------------------------------------------
+
+typedef struct _cmsDICTentry_struct {
+
+    struct _cmsDICTentry_struct* Next;
+
+    cmsMLU *DisplayName;
+    cmsMLU *DisplayValue;
+    wchar_t* Name;
+    wchar_t* Value;
+
+} cmsDICTentry;
+
+CMSAPI cmsHANDLE           CMSEXPORT cmsDictAlloc(cmsContext ContextID);
+CMSAPI void                CMSEXPORT cmsDictFree(cmsHANDLE hDict);
+CMSAPI cmsHANDLE           CMSEXPORT cmsDictDup(cmsHANDLE hDict);
+
+CMSAPI cmsBool             CMSEXPORT cmsDictAddEntry(cmsHANDLE hDict, const wchar_t* Name, const wchar_t* Value, const cmsMLU *DisplayName, const cmsMLU *DisplayValue);
+CMSAPI const cmsDICTentry* CMSEXPORT cmsDictGetEntryList(cmsHANDLE hDict);
+CMSAPI const cmsDICTentry* CMSEXPORT cmsDictNextEntry(const cmsDICTentry* e);
+
+// Access to Profile data ----------------------------------------------------------------------------------------------
+CMSAPI cmsHPROFILE       CMSEXPORT cmsCreateProfilePlaceholder(cmsContext ContextID);
+
+CMSAPI cmsContext        CMSEXPORT cmsGetProfileContextID(cmsHPROFILE hProfile);
+CMSAPI cmsInt32Number    CMSEXPORT cmsGetTagCount(cmsHPROFILE hProfile);
+CMSAPI cmsTagSignature   CMSEXPORT cmsGetTagSignature(cmsHPROFILE hProfile, cmsUInt32Number n);
+CMSAPI cmsBool           CMSEXPORT cmsIsTag(cmsHPROFILE hProfile, cmsTagSignature sig);
+
+// Read and write pre-formatted data
+CMSAPI void*             CMSEXPORT cmsReadTag(cmsHPROFILE hProfile, cmsTagSignature sig);
+CMSAPI cmsBool           CMSEXPORT cmsWriteTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data);
+CMSAPI cmsBool           CMSEXPORT cmsLinkTag(cmsHPROFILE hProfile, cmsTagSignature sig, cmsTagSignature dest);
+CMSAPI cmsTagSignature   CMSEXPORT cmsTagLinkedTo(cmsHPROFILE hProfile, cmsTagSignature sig);
+
+// Read and write raw data
+CMSAPI cmsUInt32Number   CMSEXPORT cmsReadRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, void* Buffer, cmsUInt32Number BufferSize);
+CMSAPI cmsBool           CMSEXPORT cmsWriteRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data, cmsUInt32Number Size);
+
+// Access header data
+#define cmsEmbeddedProfileFalse    0x00000000
+#define cmsEmbeddedProfileTrue     0x00000001
+#define cmsUseAnywhere             0x00000000
+#define cmsUseWithEmbeddedDataOnly 0x00000002
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderFlags(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsGetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number* Flags);
+CMSAPI void              CMSEXPORT cmsGetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID);
+CMSAPI cmsBool           CMSEXPORT cmsGetHeaderCreationDateTime(cmsHPROFILE hProfile, struct tm *Dest);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderRenderingIntent(cmsHPROFILE hProfile);
+
+CMSAPI void              CMSEXPORT cmsSetHeaderFlags(cmsHPROFILE hProfile, cmsUInt32Number Flags);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderManufacturer(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetHeaderManufacturer(cmsHPROFILE hProfile, cmsUInt32Number manufacturer);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderCreator(cmsHPROFILE hProfile);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderModel(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetHeaderModel(cmsHPROFILE hProfile, cmsUInt32Number model);
+CMSAPI void              CMSEXPORT cmsSetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number Flags);
+CMSAPI void              CMSEXPORT cmsSetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID);
+CMSAPI void              CMSEXPORT cmsSetHeaderRenderingIntent(cmsHPROFILE hProfile, cmsUInt32Number RenderingIntent);
+
+CMSAPI cmsColorSpaceSignature
+                         CMSEXPORT cmsGetPCS(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetPCS(cmsHPROFILE hProfile, cmsColorSpaceSignature pcs);
+CMSAPI cmsColorSpaceSignature
+                         CMSEXPORT cmsGetColorSpace(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetColorSpace(cmsHPROFILE hProfile, cmsColorSpaceSignature sig);
+CMSAPI cmsProfileClassSignature
+                         CMSEXPORT cmsGetDeviceClass(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetDeviceClass(cmsHPROFILE hProfile, cmsProfileClassSignature sig);
+CMSAPI void              CMSEXPORT cmsSetProfileVersion(cmsHPROFILE hProfile, cmsFloat64Number Version);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsGetProfileVersion(cmsHPROFILE hProfile);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetEncodedICCversion(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetEncodedICCversion(cmsHPROFILE hProfile, cmsUInt32Number Version);
+
+// How profiles may be used
+#define LCMS_USED_AS_INPUT      0
+#define LCMS_USED_AS_OUTPUT     1
+#define LCMS_USED_AS_PROOF      2
+
+CMSAPI cmsBool           CMSEXPORT cmsIsIntentSupported(cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number UsedDirection);
+CMSAPI cmsBool           CMSEXPORT cmsIsMatrixShaper(cmsHPROFILE hProfile);
+CMSAPI cmsBool           CMSEXPORT cmsIsCLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number UsedDirection);
+
+// Translate form/to our notation to ICC
+CMSAPI cmsColorSpaceSignature   CMSEXPORT _cmsICCcolorSpace(int OurNotation);
+CMSAPI int                      CMSEXPORT _cmsLCMScolorSpace(cmsColorSpaceSignature ProfileSpace);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsChannelsOf(cmsColorSpaceSignature ColorSpace);
+
+// Build a suitable formatter for the colorspace of this profile. nBytes=1 means 8 bits, nBytes=2 means 16 bits. 
+CMSAPI cmsUInt32Number   CMSEXPORT cmsFormatterForColorspaceOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsFormatterForPCSOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat);
+
+
+// Localized info
+typedef enum {
+             cmsInfoDescription  = 0,
+             cmsInfoManufacturer = 1,
+             cmsInfoModel        = 2,
+             cmsInfoCopyright    = 3
+} cmsInfoType;
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetProfileInfo(cmsHPROFILE hProfile, cmsInfoType Info,
+                                                            const char LanguageCode[3], const char CountryCode[3],
+                                                            wchar_t* Buffer, cmsUInt32Number BufferSize);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetProfileInfoASCII(cmsHPROFILE hProfile, cmsInfoType Info,
+                                                            const char LanguageCode[3], const char CountryCode[3],
+                                                            char* Buffer, cmsUInt32Number BufferSize);
+
+// IO handlers ----------------------------------------------------------------------------------------------------------
+
+typedef struct _cms_io_handler cmsIOHANDLER;
+
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromFile(cmsContext ContextID, const char* FileName, const char* AccessMode);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromStream(cmsContext ContextID, FILE* Stream);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromMem(cmsContext ContextID, void *Buffer, cmsUInt32Number size, const char* AccessMode);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromNULL(cmsContext ContextID);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsGetProfileIOhandler(cmsHPROFILE hProfile);
+CMSAPI cmsBool           CMSEXPORT cmsCloseIOhandler(cmsIOHANDLER* io);
+
+// Profile high level functions ------------------------------------------------------------------------------------------
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromFile(const char *ICCProfile, const char *sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromFileTHR(cmsContext ContextID, const char *ICCProfile, const char *sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char* sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromStreamTHR(cmsContext ContextID, FILE* ICCProfile, const char* sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromMem(const void * MemPtr, cmsUInt32Number dwSize);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromMemTHR(cmsContext ContextID, const void * MemPtr, cmsUInt32Number dwSize);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromIOhandlerTHR(cmsContext ContextID, cmsIOHANDLER* io);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromIOhandler2THR(cmsContext ContextID, cmsIOHANDLER* io, cmsBool write);
+CMSAPI cmsBool          CMSEXPORT cmsCloseProfile(cmsHPROFILE hProfile);
+
+CMSAPI cmsBool          CMSEXPORT cmsSaveProfileToFile(cmsHPROFILE hProfile, const char* FileName);
+CMSAPI cmsBool          CMSEXPORT cmsSaveProfileToStream(cmsHPROFILE hProfile, FILE* Stream);
+CMSAPI cmsBool          CMSEXPORT cmsSaveProfileToMem(cmsHPROFILE hProfile, void *MemPtr, cmsUInt32Number* BytesNeeded);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsSaveProfileToIOhandler(cmsHPROFILE hProfile, cmsIOHANDLER* io);
+
+// Predefined virtual profiles ------------------------------------------------------------------------------------------
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateRGBProfileTHR(cmsContext ContextID,
+                                                   const cmsCIExyY* WhitePoint,
+                                                   const cmsCIExyYTRIPLE* Primaries,
+                                                   cmsToneCurve* const TransferFunction[3]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateRGBProfile(const cmsCIExyY* WhitePoint,
+                                                   const cmsCIExyYTRIPLE* Primaries,
+                                                   cmsToneCurve* const TransferFunction[3]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateGrayProfileTHR(cmsContext ContextID,
+                                                    const cmsCIExyY* WhitePoint,
+                                                    const cmsToneCurve* TransferFunction);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateGrayProfile(const cmsCIExyY* WhitePoint,
+                                                    const cmsToneCurve* TransferFunction);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLinearizationDeviceLinkTHR(cmsContext ContextID,
+                                                                cmsColorSpaceSignature ColorSpace,
+                                                                cmsToneCurve* const TransferFunctions[]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLinearizationDeviceLink(cmsColorSpaceSignature ColorSpace,
+                                                                cmsToneCurve* const TransferFunctions[]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateInkLimitingDeviceLinkTHR(cmsContext ContextID,
+                                                              cmsColorSpaceSignature ColorSpace, cmsFloat64Number Limit);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateInkLimitingDeviceLink(cmsColorSpaceSignature ColorSpace, cmsFloat64Number Limit);
+
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab2ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab2Profile(const cmsCIExyY* WhitePoint);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab4ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab4Profile(const cmsCIExyY* WhitePoint);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateXYZProfileTHR(cmsContext ContextID);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateXYZProfile(void);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreate_sRGBProfileTHR(cmsContext ContextID);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreate_sRGBProfile(void);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateBCHSWabstractProfileTHR(cmsContext ContextID,
+                                                             cmsUInt32Number nLUTPoints,
+                                                             cmsFloat64Number Bright,
+                                                             cmsFloat64Number Contrast,
+                                                             cmsFloat64Number Hue,
+                                                             cmsFloat64Number Saturation,
+                                                             cmsUInt32Number TempSrc,
+                                                             cmsUInt32Number TempDest);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateBCHSWabstractProfile(cmsUInt32Number nLUTPoints,
+                                                             cmsFloat64Number Bright,
+                                                             cmsFloat64Number Contrast,
+                                                             cmsFloat64Number Hue,
+                                                             cmsFloat64Number Saturation,
+                                                             cmsUInt32Number TempSrc,
+                                                             cmsUInt32Number TempDest);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateNULLProfileTHR(cmsContext ContextID);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateNULLProfile(void);
+
+// Converts a transform to a devicelink profile
+CMSAPI cmsHPROFILE      CMSEXPORT cmsTransform2DeviceLink(cmsHTRANSFORM hTransform, cmsFloat64Number Version, cmsUInt32Number dwFlags);
+
+// Intents ----------------------------------------------------------------------------------------------
+
+// ICC Intents
+#define INTENT_PERCEPTUAL                              0
+#define INTENT_RELATIVE_COLORIMETRIC                   1
+#define INTENT_SATURATION                              2
+#define INTENT_ABSOLUTE_COLORIMETRIC                   3
+
+// Non-ICC intents
+#define INTENT_PRESERVE_K_ONLY_PERCEPTUAL             10
+#define INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC  11
+#define INTENT_PRESERVE_K_ONLY_SATURATION             12
+#define INTENT_PRESERVE_K_PLANE_PERCEPTUAL            13
+#define INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC 14
+#define INTENT_PRESERVE_K_PLANE_SATURATION            15
+
+// Call with NULL as parameters to get the intent count
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetSupportedIntents(cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetSupportedIntentsTHR(cmsContext ContextID, cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions);
+
+// Flags
+
+#define cmsFLAGS_NOCACHE                  0x0040    // Inhibit 1-pixel cache
+#define cmsFLAGS_NOOPTIMIZE               0x0100    // Inhibit optimizations
+#define cmsFLAGS_NULLTRANSFORM            0x0200    // Don't transform anyway
+
+// Proofing flags
+#define cmsFLAGS_GAMUTCHECK               0x1000    // Out of Gamut alarm
+#define cmsFLAGS_SOFTPROOFING             0x4000    // Do softproofing
+
+// Misc
+#define cmsFLAGS_BLACKPOINTCOMPENSATION   0x2000
+#define cmsFLAGS_NOWHITEONWHITEFIXUP      0x0004    // Don't fix scum dot
+#define cmsFLAGS_HIGHRESPRECALC           0x0400    // Use more memory to give better accuracy
+#define cmsFLAGS_LOWRESPRECALC            0x0800    // Use less memory to minimize resources
+
+// For devicelink creation
+#define cmsFLAGS_8BITS_DEVICELINK         0x0008   // Create 8 bits devicelinks
+#define cmsFLAGS_GUESSDEVICECLASS         0x0020   // Guess device class (for transform2devicelink)
+#define cmsFLAGS_KEEP_SEQUENCE            0x0080   // Keep profile sequence for devicelink creation
+
+// Specific to a particular optimizations
+#define cmsFLAGS_FORCE_CLUT               0x0002    // Force CLUT optimization
+#define cmsFLAGS_CLUT_POST_LINEARIZATION  0x0001    // create postlinearization tables if possible
+#define cmsFLAGS_CLUT_PRE_LINEARIZATION   0x0010    // create prelinearization tables if possible
+
+// Specific to unbounded mode
+#define cmsFLAGS_NONEGATIVES              0x8000    // Prevent negative numbers in floating point transforms
+
+// Copy alpha channels when transforming           
+#define cmsFLAGS_COPY_ALPHA               0x04000000 // Alpha channels are copied on cmsDoTransform()
+
+// Fine-tune control over number of gridpoints
+#define cmsFLAGS_GRIDPOINTS(n)           (((n) & 0xFF) << 16)
+
+// CRD special
+#define cmsFLAGS_NODEFAULTRESOURCEDEF     0x01000000
+
+// Transforms ---------------------------------------------------------------------------------------------------
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateTransformTHR(cmsContext ContextID,
+                                                  cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateTransform(cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateProofingTransformTHR(cmsContext ContextID,
+                                                  cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsHPROFILE Proofing,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number ProofingIntent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateProofingTransform(cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsHPROFILE Proofing,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number ProofingIntent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateMultiprofileTransformTHR(cmsContext ContextID,
+                                                  cmsHPROFILE hProfiles[],
+                                                  cmsUInt32Number nProfiles,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateMultiprofileTransform(cmsHPROFILE hProfiles[],
+                                                  cmsUInt32Number nProfiles,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateExtendedTransform(cmsContext ContextID,
+                                                   cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[],
+                                                   cmsBool  BPC[],
+                                                   cmsUInt32Number Intents[],
+                                                   cmsFloat64Number AdaptationStates[],
+                                                   cmsHPROFILE hGamutProfile,
+                                                   cmsUInt32Number nGamutPCSposition,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsUInt32Number dwFlags);
+
+CMSAPI void             CMSEXPORT cmsDeleteTransform(cmsHTRANSFORM hTransform);
+
+CMSAPI void             CMSEXPORT cmsDoTransform(cmsHTRANSFORM Transform,
+                                                 const void * InputBuffer,
+                                                 void * OutputBuffer,
+                                                 cmsUInt32Number Size);
+
+CMSAPI void             CMSEXPORT cmsDoTransformStride(cmsHTRANSFORM Transform,   // Deprecated
+                                                 const void * InputBuffer,
+                                                 void * OutputBuffer,
+                                                 cmsUInt32Number Size,
+                                                 cmsUInt32Number Stride);
+
+CMSAPI void             CMSEXPORT cmsDoTransformLineStride(cmsHTRANSFORM  Transform,
+                                                 const void* InputBuffer,
+                                                 void* OutputBuffer,
+                                                 cmsUInt32Number PixelsPerLine,
+                                                 cmsUInt32Number LineCount,
+                                                 cmsUInt32Number BytesPerLineIn,
+                                                 cmsUInt32Number BytesPerLineOut,
+                                                 cmsUInt32Number BytesPerPlaneIn,
+                                                 cmsUInt32Number BytesPerPlaneOut);
+
+
+CMSAPI void             CMSEXPORT cmsSetAlarmCodes(const cmsUInt16Number NewAlarm[cmsMAXCHANNELS]);
+CMSAPI void             CMSEXPORT cmsGetAlarmCodes(cmsUInt16Number NewAlarm[cmsMAXCHANNELS]);
+
+
+CMSAPI void             CMSEXPORT cmsSetAlarmCodesTHR(cmsContext ContextID, 
+                                                          const cmsUInt16Number AlarmCodes[cmsMAXCHANNELS]);
+CMSAPI void             CMSEXPORT cmsGetAlarmCodesTHR(cmsContext ContextID, 
+                                                          cmsUInt16Number AlarmCodes[cmsMAXCHANNELS]);
+
+
+
+// Adaptation state for absolute colorimetric intent
+CMSAPI cmsFloat64Number CMSEXPORT cmsSetAdaptationState(cmsFloat64Number d);
+CMSAPI cmsFloat64Number CMSEXPORT cmsSetAdaptationStateTHR(cmsContext ContextID, cmsFloat64Number d);
+
+
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+CMSAPI cmsContext       CMSEXPORT cmsGetTransformContextID(cmsHTRANSFORM hTransform);
+
+// Grab the input/output formats
+CMSAPI cmsUInt32Number CMSEXPORT cmsGetTransformInputFormat(cmsHTRANSFORM hTransform);
+CMSAPI cmsUInt32Number CMSEXPORT cmsGetTransformOutputFormat(cmsHTRANSFORM hTransform);
+
+// For backwards compatibility
+CMSAPI cmsBool          CMSEXPORT cmsChangeBuffersFormat(cmsHTRANSFORM hTransform,
+                                                         cmsUInt32Number InputFormat,
+                                                         cmsUInt32Number OutputFormat);
+
+
+
+// PostScript ColorRenderingDictionary and ColorSpaceArray ----------------------------------------------------
+
+typedef enum { cmsPS_RESOURCE_CSA, cmsPS_RESOURCE_CRD } cmsPSResourceType;
+
+// lcms2 unified method to access postscript color resources
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetPostScriptColorResource(cmsContext ContextID,
+                                                                cmsPSResourceType Type,
+                                                                cmsHPROFILE hProfile,
+                                                                cmsUInt32Number Intent,
+                                                                cmsUInt32Number dwFlags,
+                                                                cmsIOHANDLER* io);
+
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetPostScriptCSA(cmsContext ContextID, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags, void* Buffer, cmsUInt32Number dwBufferLen);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetPostScriptCRD(cmsContext ContextID, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags, void* Buffer, cmsUInt32Number dwBufferLen);
+
+
+// IT8.7 / CGATS.17-200x handling -----------------------------------------------------------------------------
+
+CMSAPI cmsHANDLE        CMSEXPORT cmsIT8Alloc(cmsContext ContextID);
+CMSAPI void             CMSEXPORT cmsIT8Free(cmsHANDLE hIT8);
+
+// Tables
+CMSAPI cmsUInt32Number  CMSEXPORT cmsIT8TableCount(cmsHANDLE hIT8);
+CMSAPI cmsInt32Number   CMSEXPORT cmsIT8SetTable(cmsHANDLE hIT8, cmsUInt32Number nTable);
+
+// Persistence
+CMSAPI cmsHANDLE        CMSEXPORT cmsIT8LoadFromFile(cmsContext ContextID, const char* cFileName);
+CMSAPI cmsHANDLE        CMSEXPORT cmsIT8LoadFromMem(cmsContext ContextID, const void *Ptr, cmsUInt32Number len);
+// CMSAPI cmsHANDLE        CMSEXPORT cmsIT8LoadFromIOhandler(cmsContext ContextID, cmsIOHANDLER* io);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SaveToFile(cmsHANDLE hIT8, const char* cFileName);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SaveToMem(cmsHANDLE hIT8, void *MemPtr, cmsUInt32Number* BytesNeeded);
+
+// Properties
+CMSAPI const char*      CMSEXPORT cmsIT8GetSheetType(cmsHANDLE hIT8);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetSheetType(cmsHANDLE hIT8, const char* Type);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetComment(cmsHANDLE hIT8, const char* cComment);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyStr(cmsHANDLE hIT8, const char* cProp, const char *Str);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyDbl(cmsHANDLE hIT8, const char* cProp, cmsFloat64Number Val);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyHex(cmsHANDLE hIT8, const char* cProp, cmsUInt32Number Val);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char* SubKey, const char *Buffer);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyUncooked(cmsHANDLE hIT8, const char* Key, const char* Buffer);
+
+
+CMSAPI const char*      CMSEXPORT cmsIT8GetProperty(cmsHANDLE hIT8, const char* cProp);
+CMSAPI cmsFloat64Number CMSEXPORT cmsIT8GetPropertyDbl(cmsHANDLE hIT8, const char* cProp);
+CMSAPI const char*      CMSEXPORT cmsIT8GetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char *SubKey);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsIT8EnumProperties(cmsHANDLE hIT8, char ***PropertyNames);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsIT8EnumPropertyMulti(cmsHANDLE hIT8, const char* cProp, const char ***SubpropertyNames);
+
+// Datasets
+CMSAPI const char*      CMSEXPORT cmsIT8GetDataRowCol(cmsHANDLE hIT8, int row, int col);
+CMSAPI cmsFloat64Number CMSEXPORT cmsIT8GetDataRowColDbl(cmsHANDLE hIT8, int row, int col);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataRowCol(cmsHANDLE hIT8, int row, int col,
+                                                const char* Val);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataRowColDbl(cmsHANDLE hIT8, int row, int col,
+                                                cmsFloat64Number Val);
+
+CMSAPI const char*      CMSEXPORT cmsIT8GetData(cmsHANDLE hIT8, const char* cPatch, const char* cSample);
+
+
+CMSAPI cmsFloat64Number CMSEXPORT cmsIT8GetDataDbl(cmsHANDLE hIT8, const char* cPatch, const char* cSample);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetData(cmsHANDLE hIT8, const char* cPatch,
+                                                const char* cSample,
+                                                const char *Val);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataDbl(cmsHANDLE hIT8, const char* cPatch,
+                                                const char* cSample,
+                                                cmsFloat64Number Val);
+
+CMSAPI int              CMSEXPORT cmsIT8FindDataFormat(cmsHANDLE hIT8, const char* cSample);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataFormat(cmsHANDLE hIT8, int n, const char *Sample);
+CMSAPI int              CMSEXPORT cmsIT8EnumDataFormat(cmsHANDLE hIT8, char ***SampleNames);
+
+CMSAPI const char*      CMSEXPORT cmsIT8GetPatchName(cmsHANDLE hIT8, int nPatch, char* buffer);
+CMSAPI int              CMSEXPORT cmsIT8GetPatchByName(cmsHANDLE hIT8, const char *cPatch);
+
+// The LABEL extension
+CMSAPI int              CMSEXPORT cmsIT8SetTableByLabel(cmsHANDLE hIT8, const char* cSet, const char* cField, const char* ExpectedType);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetIndexColumn(cmsHANDLE hIT8, const char* cSample);
+
+// Formatter for double
+CMSAPI void             CMSEXPORT cmsIT8DefineDblFormat(cmsHANDLE hIT8, const char* Formatter);
+
+// Gamut boundary description routines ------------------------------------------------------------------------------
+
+CMSAPI cmsHANDLE        CMSEXPORT cmsGBDAlloc(cmsContext ContextID);
+CMSAPI void             CMSEXPORT cmsGBDFree(cmsHANDLE hGBD);
+CMSAPI cmsBool          CMSEXPORT cmsGDBAddPoint(cmsHANDLE hGBD, const cmsCIELab* Lab);
+CMSAPI cmsBool          CMSEXPORT cmsGDBCompute(cmsHANDLE  hGDB, cmsUInt32Number dwFlags);
+CMSAPI cmsBool          CMSEXPORT cmsGDBCheckPoint(cmsHANDLE hGBD, const cmsCIELab* Lab);
+
+// Feature detection  ----------------------------------------------------------------------------------------------
+
+// Estimate the black point
+CMSAPI cmsBool          CMSEXPORT cmsDetectBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags);
+CMSAPI cmsBool          CMSEXPORT cmsDetectDestinationBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags);
+
+// Estimate total area coverage
+CMSAPI cmsFloat64Number CMSEXPORT cmsDetectTAC(cmsHPROFILE hProfile);
+
+
+// Poor man's gamut mapping
+CMSAPI cmsBool          CMSEXPORT cmsDesaturateLab(cmsCIELab* Lab,
+                                                   double amax, double amin,
+                                                   double bmax, double bmin);
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+    }
+#   endif
+#endif
+
+#define _lcms2_H
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/lcms2_plugin.h b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/lcms2_plugin.h
new file mode 100644
index 0000000000..43fa01e3fb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/include/lcms2_plugin.h
@@ -0,0 +1,665 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+// This is the plug-in header file. Normal LittleCMS clients should not use it.
+// It is provided for plug-in writters that may want to access the support
+// functions to do low level operations. All plug-in related structures
+// are defined here. Including this file forces to include the standard API too.
+
+#ifndef _lcms_plugin_H
+
+// Deal with Microsoft's attempt at deprecating C standard runtime functions
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1400)
+#      ifndef _CRT_SECURE_NO_DEPRECATE
+#        define _CRT_SECURE_NO_DEPRECATE
+#      endif
+#      ifndef _CRT_SECURE_NO_WARNINGS
+#        define _CRT_SECURE_NO_WARNINGS
+#      endif
+#    endif
+#endif
+
+#ifndef _lcms2_H
+#include "lcms2.h"
+#endif
+
+// We need some standard C functions.
+#include <stdlib.h>
+#include <math.h>
+#include <stdarg.h>
+#include <memory.h>
+#include <string.h>
+
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+extern "C" {
+#   endif
+#endif
+
+// Vector & Matrix operations -----------------------------------------------------------------------
+
+// Axis of the matrix/array. No specific meaning at all.
+#define VX      0
+#define VY      1
+#define VZ      2
+
+// Vectors
+typedef struct {
+    cmsFloat64Number n[3];
+
+    } cmsVEC3;
+
+// 3x3 Matrix
+typedef struct {
+    cmsVEC3 v[3];
+
+    } cmsMAT3;
+
+CMSAPI void               CMSEXPORT _cmsVEC3init(cmsVEC3* r, cmsFloat64Number x, cmsFloat64Number y, cmsFloat64Number z);
+CMSAPI void               CMSEXPORT _cmsVEC3minus(cmsVEC3* r, const cmsVEC3* a, const cmsVEC3* b);
+CMSAPI void               CMSEXPORT _cmsVEC3cross(cmsVEC3* r, const cmsVEC3* u, const cmsVEC3* v);
+CMSAPI cmsFloat64Number   CMSEXPORT _cmsVEC3dot(const cmsVEC3* u, const cmsVEC3* v);
+CMSAPI cmsFloat64Number   CMSEXPORT _cmsVEC3length(const cmsVEC3* a);
+CMSAPI cmsFloat64Number   CMSEXPORT _cmsVEC3distance(const cmsVEC3* a, const cmsVEC3* b);
+
+CMSAPI void               CMSEXPORT _cmsMAT3identity(cmsMAT3* a);
+CMSAPI cmsBool            CMSEXPORT _cmsMAT3isIdentity(const cmsMAT3* a);
+CMSAPI void               CMSEXPORT _cmsMAT3per(cmsMAT3* r, const cmsMAT3* a, const cmsMAT3* b);
+CMSAPI cmsBool            CMSEXPORT _cmsMAT3inverse(const cmsMAT3* a, cmsMAT3* b);
+CMSAPI cmsBool            CMSEXPORT _cmsMAT3solve(cmsVEC3* x, cmsMAT3* a, cmsVEC3* b);
+CMSAPI void               CMSEXPORT _cmsMAT3eval(cmsVEC3* r, const cmsMAT3* a, const cmsVEC3* v);
+
+
+// Error logging  -------------------------------------------------------------------------------------
+
+CMSAPI void               CMSEXPORT  cmsSignalError(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *ErrorText, ...);
+
+// Memory management ----------------------------------------------------------------------------------
+
+CMSAPI void*              CMSEXPORT _cmsMalloc(cmsContext ContextID, cmsUInt32Number size);
+CMSAPI void*              CMSEXPORT _cmsMallocZero(cmsContext ContextID, cmsUInt32Number size);
+CMSAPI void*              CMSEXPORT _cmsCalloc(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size);
+CMSAPI void*              CMSEXPORT _cmsRealloc(cmsContext ContextID, void* Ptr, cmsUInt32Number NewSize);
+CMSAPI void               CMSEXPORT _cmsFree(cmsContext ContextID, void* Ptr);
+CMSAPI void*              CMSEXPORT _cmsDupMem(cmsContext ContextID, const void* Org, cmsUInt32Number size);
+
+// I/O handler ----------------------------------------------------------------------------------
+
+struct _cms_io_handler {
+
+    void* stream;   // Associated stream, which is implemented differently depending on media.
+
+    cmsContext        ContextID;
+    cmsUInt32Number   UsedSpace;
+    cmsUInt32Number   ReportedSize;
+    char              PhysicalFile[cmsMAX_PATH];
+
+    cmsUInt32Number   (* Read)(struct _cms_io_handler* iohandler, void *Buffer,
+                                                                  cmsUInt32Number size,
+                                                                  cmsUInt32Number count);
+    cmsBool           (* Seek)(struct _cms_io_handler* iohandler, cmsUInt32Number offset);
+    cmsBool           (* Close)(struct _cms_io_handler* iohandler);
+    cmsUInt32Number   (* Tell)(struct _cms_io_handler* iohandler);
+    cmsBool           (* Write)(struct _cms_io_handler* iohandler, cmsUInt32Number size,
+                                                                   const void* Buffer);
+};
+
+// Endianness adjust functions
+CMSAPI cmsUInt16Number   CMSEXPORT  _cmsAdjustEndianess16(cmsUInt16Number Word);
+CMSAPI cmsUInt32Number   CMSEXPORT  _cmsAdjustEndianess32(cmsUInt32Number Value);
+CMSAPI void              CMSEXPORT  _cmsAdjustEndianess64(cmsUInt64Number* Result, cmsUInt64Number* QWord);
+
+// Helper IO functions
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt8Number(cmsIOHANDLER* io,  cmsUInt8Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt16Number(cmsIOHANDLER* io, cmsUInt16Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt32Number(cmsIOHANDLER* io, cmsUInt32Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadFloat32Number(cmsIOHANDLER* io, cmsFloat32Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsRead15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadXYZNumber(cmsIOHANDLER* io, cmsCIEXYZ* XYZ);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, cmsUInt16Number* Array);
+
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt8Number(cmsIOHANDLER* io, cmsUInt8Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt16Number(cmsIOHANDLER* io, cmsUInt16Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt32Number(cmsIOHANDLER* io, cmsUInt32Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteFloat32Number(cmsIOHANDLER* io, cmsFloat32Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWrite15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteXYZNumber(cmsIOHANDLER* io, const cmsCIEXYZ* XYZ);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, const cmsUInt16Number* Array);
+
+// ICC base tag
+typedef struct {
+    cmsTagTypeSignature  sig;
+    cmsInt8Number        reserved[4];
+
+} _cmsTagBase;
+
+// Type base helper functions
+CMSAPI cmsTagTypeSignature  CMSEXPORT _cmsReadTypeBase(cmsIOHANDLER* io);
+CMSAPI cmsBool              CMSEXPORT _cmsWriteTypeBase(cmsIOHANDLER* io, cmsTagTypeSignature sig);
+
+// Alignment functions
+CMSAPI cmsBool             CMSEXPORT _cmsReadAlignment(cmsIOHANDLER* io);
+CMSAPI cmsBool             CMSEXPORT _cmsWriteAlignment(cmsIOHANDLER* io);
+
+// To deal with text streams. 2K at most
+CMSAPI cmsBool             CMSEXPORT _cmsIOPrintf(cmsIOHANDLER* io, const char* frm, ...);
+
+// Fixed point helper functions
+CMSAPI cmsFloat64Number    CMSEXPORT _cms8Fixed8toDouble(cmsUInt16Number fixed8);
+CMSAPI cmsUInt16Number     CMSEXPORT _cmsDoubleTo8Fixed8(cmsFloat64Number val);
+
+CMSAPI cmsFloat64Number    CMSEXPORT _cms15Fixed16toDouble(cmsS15Fixed16Number fix32);
+CMSAPI cmsS15Fixed16Number CMSEXPORT _cmsDoubleTo15Fixed16(cmsFloat64Number v);
+
+// Date/time helper functions
+CMSAPI void                CMSEXPORT _cmsEncodeDateTimeNumber(cmsDateTimeNumber *Dest, const struct tm *Source);
+CMSAPI void                CMSEXPORT _cmsDecodeDateTimeNumber(const cmsDateTimeNumber *Source, struct tm *Dest);
+
+//----------------------------------------------------------------------------------------------------------
+
+// Shared callbacks for user data
+typedef void     (* _cmsFreeUserDataFn)(cmsContext ContextID, void* Data);
+typedef void*    (* _cmsDupUserDataFn)(cmsContext ContextID, const void* Data);
+
+//----------------------------------------------------------------------------------------------------------
+
+// Plug-in foundation
+#define cmsPluginMagicNumber                 0x61637070     // 'acpp'
+
+#define cmsPluginMemHandlerSig               0x6D656D48     // 'memH'
+#define cmsPluginInterpolationSig            0x696E7048     // 'inpH'
+#define cmsPluginParametricCurveSig          0x70617248     // 'parH'
+#define cmsPluginFormattersSig               0x66726D48     // 'frmH
+#define cmsPluginTagTypeSig                  0x74797048     // 'typH'
+#define cmsPluginTagSig                      0x74616748     // 'tagH'
+#define cmsPluginRenderingIntentSig          0x696E7448     // 'intH'
+#define cmsPluginMultiProcessElementSig      0x6D706548     // 'mpeH'
+#define cmsPluginOptimizationSig             0x6F707448     // 'optH'
+#define cmsPluginTransformSig                0x7A666D48     // 'xfmH'
+#define cmsPluginMutexSig                    0x6D747A48     // 'mtxH'
+
+typedef struct _cmsPluginBaseStruct {
+
+        cmsUInt32Number                Magic;               // 'acpp' signature
+        cmsUInt32Number                ExpectedVersion;     // Expected version of LittleCMS
+        cmsUInt32Number                Type;                // Type of plug-in
+        struct _cmsPluginBaseStruct*   Next;                // For multiple plugin definition. NULL for end of list.
+
+} cmsPluginBase;
+
+// Maximum number of types in a plugin array
+#define MAX_TYPES_IN_LCMS_PLUGIN    20
+
+//----------------------------------------------------------------------------------------------------------
+
+// Memory handler. Each new plug-in type replaces current behaviour
+
+typedef void* (* _cmsMallocFnPtrType)(cmsContext ContextID, cmsUInt32Number size); 
+typedef void  (* _cmsFreeFnPtrType)(cmsContext ContextID, void *Ptr);
+typedef void* (* _cmsReallocFnPtrType)(cmsContext ContextID, void* Ptr, cmsUInt32Number NewSize);
+
+typedef void* (* _cmsMalloZerocFnPtrType)(cmsContext ContextID, cmsUInt32Number size); 
+typedef void* (* _cmsCallocFnPtrType)(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size);
+typedef void* (* _cmsDupFnPtrType)(cmsContext ContextID, const void* Org, cmsUInt32Number size);
+
+typedef struct {
+
+        cmsPluginBase base;
+
+        // Required
+        _cmsMallocFnPtrType  MallocPtr;
+        _cmsFreeFnPtrType    FreePtr;
+        _cmsReallocFnPtrType ReallocPtr;
+
+        // Optional
+       _cmsMalloZerocFnPtrType MallocZeroPtr;
+       _cmsCallocFnPtrType     CallocPtr;
+       _cmsDupFnPtrType        DupPtr;
+
+} cmsPluginMemHandler;
+
+
+// ------------------------------------------------------------------------------------------------------------------
+
+// Interpolation. 16 bits and floating point versions.
+struct _cms_interp_struc;
+
+// Interpolation callbacks
+
+// 16 bits forward interpolation. This function performs precision-limited linear interpolation
+// and is supposed to be quite fast. Implementation may be tetrahedral or trilinear, and plug-ins may
+// choose to implement any other interpolation algorithm.
+typedef void (* _cmsInterpFn16)(CMSREGISTER const cmsUInt16Number Input[],
+                                CMSREGISTER cmsUInt16Number Output[],
+                                CMSREGISTER const struct _cms_interp_struc* p);
+
+// Floating point forward interpolation. Full precision interpolation using floats. This is not a
+// time critical function. Implementation may be tetrahedral or trilinear, and plug-ins may
+// choose to implement any other interpolation algorithm.
+typedef void (* _cmsInterpFnFloat)(cmsFloat32Number const Input[],
+                                   cmsFloat32Number Output[],
+                                   const struct _cms_interp_struc* p);
+
+
+
+// This type holds a pointer to an interpolator that can be either 16 bits or float
+typedef union {
+    _cmsInterpFn16       Lerp16;            // Forward interpolation in 16 bits
+    _cmsInterpFnFloat    LerpFloat;         // Forward interpolation in floating point
+} cmsInterpFunction;
+
+// Flags for interpolator selection
+#define CMS_LERP_FLAGS_16BITS             0x0000        // The default
+#define CMS_LERP_FLAGS_FLOAT              0x0001        // Requires different implementation
+#define CMS_LERP_FLAGS_TRILINEAR          0x0100        // Hint only
+
+
+#define MAX_INPUT_DIMENSIONS 8
+
+typedef struct _cms_interp_struc {  // Used on all interpolations. Supplied by lcms2 when calling the interpolation function
+
+    cmsContext ContextID;     // The calling thread
+
+    cmsUInt32Number dwFlags;  // Keep original flags
+    cmsUInt32Number nInputs;  // != 1 only in 3D interpolation
+    cmsUInt32Number nOutputs; // != 1 only in 3D interpolation
+
+    cmsUInt32Number nSamples[MAX_INPUT_DIMENSIONS];  // Valid on all kinds of tables
+    cmsUInt32Number Domain[MAX_INPUT_DIMENSIONS];    // Domain = nSamples - 1
+
+    cmsUInt32Number opta[MAX_INPUT_DIMENSIONS];     // Optimization for 3D CLUT. This is the number of nodes premultiplied for each
+                                                    // dimension. For example, in 7 nodes, 7, 7^2 , 7^3, 7^4, etc. On non-regular
+                                                    // Samplings may vary according of the number of nodes for each dimension.
+
+    const void *Table;                // Points to the actual interpolation table
+    cmsInterpFunction Interpolation;  // Points to the function to do the interpolation
+
+ } cmsInterpParams;
+
+// Interpolators factory
+typedef cmsInterpFunction (* cmsInterpFnFactory)(cmsUInt32Number nInputChannels, cmsUInt32Number nOutputChannels, cmsUInt32Number dwFlags);
+
+// The plug-in
+typedef struct {
+    cmsPluginBase base;
+
+    // Points to a user-supplied function which implements the factory
+    cmsInterpFnFactory InterpolatorsFactory;
+
+} cmsPluginInterpolation;
+
+//----------------------------------------------------------------------------------------------------------
+
+// Parametric curves. A negative type means same function but analytically inverted. Max. number of params is 10
+
+// Evaluator callback for user-supplied parametric curves. May implement more than one type
+typedef  cmsFloat64Number (* cmsParametricCurveEvaluator)(cmsInt32Number Type, const cmsFloat64Number Params[10], cmsFloat64Number R);
+
+// Plug-in may implement an arbitrary number of parametric curves
+typedef struct {
+    cmsPluginBase base;
+
+    cmsUInt32Number nFunctions;                                     // Number of supported functions
+    cmsUInt32Number FunctionTypes[MAX_TYPES_IN_LCMS_PLUGIN];        // The identification types
+    cmsUInt32Number ParameterCount[MAX_TYPES_IN_LCMS_PLUGIN];       // Number of parameters for each function
+
+    cmsParametricCurveEvaluator    Evaluator;                       // The evaluator
+
+} cmsPluginParametricCurves;
+//----------------------------------------------------------------------------------------------------------
+
+// Formatters. This plug-in adds new handlers, replacing them if they already exist. Formatters dealing with
+// cmsFloat32Number (bps = 4) or double (bps = 0) types are requested via FormatterFloat callback. Others come across
+// Formatter16 callback
+
+struct _cmstransform_struct;
+
+typedef cmsUInt8Number* (* cmsFormatter16)(CMSREGISTER struct _cmstransform_struct* CMMcargo,
+                                           CMSREGISTER cmsUInt16Number Values[],
+                                           CMSREGISTER cmsUInt8Number* Buffer,
+                                           CMSREGISTER cmsUInt32Number Stride);
+
+typedef cmsUInt8Number* (* cmsFormatterFloat)(struct _cmstransform_struct* CMMcargo,
+                                              cmsFloat32Number Values[],
+                                              cmsUInt8Number*  Buffer,
+                                              cmsUInt32Number  Stride);
+
+// This type holds a pointer to a formatter that can be either 16 bits or cmsFloat32Number
+typedef union {
+    cmsFormatter16    Fmt16;
+    cmsFormatterFloat FmtFloat;
+
+} cmsFormatter;
+
+#define CMS_PACK_FLAGS_16BITS       0x0000
+#define CMS_PACK_FLAGS_FLOAT        0x0001
+
+typedef enum { cmsFormatterInput=0, cmsFormatterOutput=1 } cmsFormatterDirection;
+
+typedef cmsFormatter (* cmsFormatterFactory)(cmsUInt32Number Type,           // Specific type, i.e. TYPE_RGB_8
+                                             cmsFormatterDirection Dir,
+                                             cmsUInt32Number dwFlags);      // precision
+
+// Plug-in may implement an arbitrary number of formatters
+typedef struct {
+    cmsPluginBase          base;
+    cmsFormatterFactory    FormattersFactory;
+
+} cmsPluginFormatters;
+
+//----------------------------------------------------------------------------------------------------------
+
+// Tag type handler. Each type is free to return anything it wants, and it is up to the caller to
+// know in advance what is the type contained in the tag.
+typedef struct _cms_typehandler_struct {
+
+        cmsTagTypeSignature Signature;     // The signature of the type
+
+        // Allocates and reads items
+        void *   (* ReadPtr)(struct _cms_typehandler_struct* self,
+                             cmsIOHANDLER*      io,
+                             cmsUInt32Number*   nItems,
+                             cmsUInt32Number    SizeOfTag);
+
+        // Writes n Items
+        cmsBool  (* WritePtr)(struct _cms_typehandler_struct* self,
+                              cmsIOHANDLER*     io,
+                              void*             Ptr,
+                              cmsUInt32Number   nItems);
+
+        // Duplicate an item or array of items
+        void*   (* DupPtr)(struct _cms_typehandler_struct* self,
+                           const void *Ptr,
+                           cmsUInt32Number n);
+
+        // Free all resources
+        void    (* FreePtr)(struct _cms_typehandler_struct* self,
+                            void *Ptr);
+
+        // Additional parameters used by the calling thread
+        cmsContext       ContextID;
+        cmsUInt32Number  ICCVersion;
+
+} cmsTagTypeHandler;
+
+// Each plug-in implements a single type
+typedef struct {
+        cmsPluginBase      base;
+        cmsTagTypeHandler  Handler;
+
+} cmsPluginTagType;
+
+//----------------------------------------------------------------------------------------------------------
+
+// This is the tag plugin, which identifies tags. For writing, a pointer to function is provided.
+// This function should return the desired type for this tag, given the version of profile
+// and the data being serialized.
+typedef struct {
+
+    cmsUInt32Number     ElemCount;          // If this tag needs an array, how many elements should keep
+
+    // For reading.
+    cmsUInt32Number     nSupportedTypes;    // In how many types this tag can come (MAX_TYPES_IN_LCMS_PLUGIN maximum)
+    cmsTagTypeSignature SupportedTypes[MAX_TYPES_IN_LCMS_PLUGIN];
+
+    // For writing
+    cmsTagTypeSignature (* DecideType)(cmsFloat64Number ICCVersion, const void *Data);
+
+} cmsTagDescriptor;
+
+// Plug-in implements a single tag
+typedef struct {
+    cmsPluginBase    base;
+
+    cmsTagSignature  Signature;
+    cmsTagDescriptor Descriptor;
+
+} cmsPluginTag;
+
+//----------------------------------------------------------------------------------------------------------
+
+// Custom intents. This function should join all profiles specified in the array in
+// a single LUT. Any custom intent in the chain redirects to custom function. If more than
+// one custom intent is found, the one located first is invoked. Usually users should use only one
+// custom intent, so mixing custom intents in same multiprofile transform is not supported.
+
+typedef cmsPipeline* (* cmsIntentFn)( cmsContext       ContextID,
+                                      cmsUInt32Number  nProfiles,
+                                      cmsUInt32Number  Intents[],
+                                      cmsHPROFILE      hProfiles[],
+                                      cmsBool          BPC[],
+                                      cmsFloat64Number AdaptationStates[],
+                                      cmsUInt32Number  dwFlags);
+
+
+// Each plug-in defines a single intent number.
+typedef struct {
+    cmsPluginBase     base;
+    cmsUInt32Number   Intent;
+    cmsIntentFn       Link;
+    char              Description[256];
+
+} cmsPluginRenderingIntent;
+
+
+// The default ICC intents (perceptual, saturation, rel.col and abs.col)
+CMSAPI cmsPipeline*  CMSEXPORT _cmsDefaultICCintents(cmsContext       ContextID,
+                                                     cmsUInt32Number  nProfiles,
+                                                     cmsUInt32Number  Intents[],
+                                                     cmsHPROFILE      hProfiles[],
+                                                     cmsBool          BPC[],
+                                                     cmsFloat64Number AdaptationStates[],
+                                                     cmsUInt32Number  dwFlags);
+
+
+//----------------------------------------------------------------------------------------------------------
+
+// Pipelines, Multi Process Elements.
+
+typedef void (* _cmsStageEvalFn)     (const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage* mpe);
+typedef void*(* _cmsStageDupElemFn)  (cmsStage* mpe);
+typedef void (* _cmsStageFreeElemFn) (cmsStage* mpe);
+
+
+// This function allocates a generic MPE
+CMSAPI cmsStage* CMSEXPORT _cmsStageAllocPlaceholder(cmsContext ContextID,
+                                cmsStageSignature     Type,
+                                cmsUInt32Number       InputChannels,
+                                cmsUInt32Number       OutputChannels,
+                                _cmsStageEvalFn       EvalPtr,            // Points to fn that evaluates the element (always in floating point)
+                                _cmsStageDupElemFn    DupElemPtr,         // Points to a fn that duplicates the stage
+                                _cmsStageFreeElemFn   FreePtr,            // Points to a fn that sets the element free
+                                void*                 Data);              // A generic pointer to whatever memory needed by the element
+typedef struct {
+      cmsPluginBase     base;
+      cmsTagTypeHandler Handler;
+
+}  cmsPluginMultiProcessElement;
+
+
+// Data kept in "Element" member of cmsStage
+
+// Curves
+typedef struct {
+    cmsUInt32Number nCurves;
+    cmsToneCurve**  TheCurves;
+
+} _cmsStageToneCurvesData;
+
+// Matrix
+typedef struct {
+    cmsFloat64Number*  Double;          // floating point for the matrix
+    cmsFloat64Number*  Offset;          // The offset
+
+} _cmsStageMatrixData;
+
+// CLUT
+typedef struct {
+
+    union {                       // Can have only one of both representations at same time
+        cmsUInt16Number*  T;      // Points to the table 16 bits table
+        cmsFloat32Number* TFloat; // Points to the cmsFloat32Number table
+
+    } Tab;
+
+    cmsInterpParams* Params;
+    cmsUInt32Number  nEntries;
+    cmsBool          HasFloatValues;
+
+} _cmsStageCLutData;
+
+
+//----------------------------------------------------------------------------------------------------------
+// Optimization. Using this plug-in, additional optimization strategies may be implemented.
+// The function should return TRUE if any optimization is done on the LUT, this terminates
+// the optimization  search. Or FALSE if it is unable to optimize and want to give a chance
+// to the rest of optimizers.
+
+typedef void     (* _cmsOPTeval16Fn)(CMSREGISTER const cmsUInt16Number In[],
+                                     CMSREGISTER cmsUInt16Number Out[],
+                                     CMSREGISTER const void* Data);
+
+
+typedef cmsBool  (* _cmsOPToptimizeFn)(cmsPipeline** Lut,
+                                       cmsUInt32Number  Intent,
+                                       cmsUInt32Number* InputFormat,
+                                       cmsUInt32Number* OutputFormat,
+                                       cmsUInt32Number* dwFlags);
+
+// This function may be used to set the optional evaluator and a block of private data. If private data is being used, an optional
+// duplicator and free functions should also be specified in order to duplicate the LUT construct. Use NULL to inhibit such functionality.
+
+CMSAPI void CMSEXPORT _cmsPipelineSetOptimizationParameters(cmsPipeline* Lut,
+                                               _cmsOPTeval16Fn Eval16,
+                                               void* PrivateData,
+                                               _cmsFreeUserDataFn FreePrivateDataFn,
+                                               _cmsDupUserDataFn DupPrivateDataFn);
+
+typedef struct {
+      cmsPluginBase     base;
+
+      // Optimize entry point
+      _cmsOPToptimizeFn  OptimizePtr;
+
+}  cmsPluginOptimization;
+
+//----------------------------------------------------------------------------------------------------------
+// Full xform
+
+typedef struct {
+       cmsUInt32Number BytesPerLineIn;
+       cmsUInt32Number BytesPerLineOut;
+       cmsUInt32Number BytesPerPlaneIn;
+       cmsUInt32Number BytesPerPlaneOut;
+
+} cmsStride;
+
+typedef void     (* _cmsTransformFn)(struct _cmstransform_struct *CMMcargo,   // Legacy function, handles just ONE scanline.
+                                     const void* InputBuffer,
+                                     void* OutputBuffer,
+                                     cmsUInt32Number Size,
+                                     cmsUInt32Number Stride);                 // Stride in bytes to the next plana in planar formats
+
+
+typedef void     (*_cmsTransform2Fn)(struct _cmstransform_struct *CMMcargo,
+                                     const void* InputBuffer,
+                                     void* OutputBuffer,
+                                     cmsUInt32Number PixelsPerLine,     
+                                     cmsUInt32Number LineCount,          
+                                     const cmsStride* Stride);  
+
+typedef cmsBool  (* _cmsTransformFactory)(_cmsTransformFn* xform,
+                                         void** UserData,
+                                         _cmsFreeUserDataFn* FreePrivateDataFn,
+                                         cmsPipeline** Lut,
+                                         cmsUInt32Number* InputFormat,
+                                         cmsUInt32Number* OutputFormat,
+                                         cmsUInt32Number* dwFlags);
+
+typedef cmsBool  (* _cmsTransform2Factory)(_cmsTransform2Fn* xform,
+                                         void** UserData,
+                                         _cmsFreeUserDataFn* FreePrivateDataFn,
+                                         cmsPipeline** Lut,
+                                         cmsUInt32Number* InputFormat,
+                                         cmsUInt32Number* OutputFormat,
+                                         cmsUInt32Number* dwFlags);
+
+
+// Retrieve user data as specified by the factory
+CMSAPI void   CMSEXPORT _cmsSetTransformUserData(struct _cmstransform_struct *CMMcargo, void* ptr, _cmsFreeUserDataFn FreePrivateDataFn);
+CMSAPI void * CMSEXPORT _cmsGetTransformUserData(struct _cmstransform_struct *CMMcargo);
+
+
+// Retrieve formatters
+CMSAPI void   CMSEXPORT _cmsGetTransformFormatters16   (struct _cmstransform_struct *CMMcargo, cmsFormatter16* FromInput, cmsFormatter16* ToOutput);
+CMSAPI void   CMSEXPORT _cmsGetTransformFormattersFloat(struct _cmstransform_struct *CMMcargo, cmsFormatterFloat* FromInput, cmsFormatterFloat* ToOutput);
+
+typedef struct {
+      cmsPluginBase     base;
+
+      // Transform entry point
+      union {
+             _cmsTransformFactory        legacy_xform;
+             _cmsTransform2Factory       xform;
+      } factories;
+
+}  cmsPluginTransform;
+
+//----------------------------------------------------------------------------------------------------------
+// Mutex 
+
+typedef void*    (* _cmsCreateMutexFnPtrType)(cmsContext ContextID);
+typedef void     (* _cmsDestroyMutexFnPtrType)(cmsContext ContextID, void* mtx);
+typedef cmsBool  (* _cmsLockMutexFnPtrType)(cmsContext ContextID, void* mtx);
+typedef void     (* _cmsUnlockMutexFnPtrType)(cmsContext ContextID, void* mtx);
+
+typedef struct {
+      cmsPluginBase     base;
+
+     _cmsCreateMutexFnPtrType  CreateMutexPtr;
+     _cmsDestroyMutexFnPtrType DestroyMutexPtr;
+     _cmsLockMutexFnPtrType    LockMutexPtr;
+     _cmsUnlockMutexFnPtrType  UnlockMutexPtr;
+
+}  cmsPluginMutex;
+
+CMSAPI void*   CMSEXPORT _cmsCreateMutex(cmsContext ContextID);
+CMSAPI void    CMSEXPORT _cmsDestroyMutex(cmsContext ContextID, void* mtx);
+CMSAPI cmsBool CMSEXPORT _cmsLockMutex(cmsContext ContextID, void* mtx);
+CMSAPI void    CMSEXPORT _cmsUnlockMutex(cmsContext ContextID, void* mtx);
+
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+    }
+#   endif
+#endif
+
+#define _lcms_plugin_H
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/Makefile.am b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/Makefile.am
new file mode 100644
index 0000000000..1d7ded8c39
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/Makefile.am
@@ -0,0 +1,31 @@
+#
+# Makefile for building lcms 2 library
+#
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+
+# CFLAGS = -pedantic -Wall -std=c99 -O3
+
+includedir = ${prefix}/include
+
+# Shared libraries built in this directory
+lib_LTLIBRARIES = liblcms2.la
+
+LIBRARY_CURRENT    = @LIBRARY_CURRENT@
+LIBRARY_REVISION   = @LIBRARY_REVISION@
+LIBRARY_AGE        = @LIBRARY_AGE@
+
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
+
+liblcms2_la_LDFLAGS = -no-undefined \
+  -version-info $(LIBRARY_CURRENT):$(LIBRARY_REVISION):$(LIBRARY_AGE)
+
+liblcms2_la_LIBADD = $(LCMS_LIB_DEPLIBS)
+
+liblcms2_la_SOURCES = \
+  cmscnvrt.c cmserr.c cmsgamma.c cmsgmt.c cmsintrp.c cmsio0.c cmsio1.c cmslut.c \
+  cmsplugin.c cmssm.c cmsmtrx.c cmspack.c cmspcs.c cmswtpnt.c cmsxform.c \
+  cmssamp.c cmsnamed.c cmscam02.c cmsvirt.c cmstypes.c cmscgats.c cmsps2.c cmsopt.c \
+  cmshalf.c cmsalpha.c lcms2_internal.h
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/Makefile.in b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/Makefile.in
new file mode 100644
index 0000000000..5d699a0b86
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/Makefile.in
@@ -0,0 +1,723 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building lcms 2 library
+#
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(libdir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+liblcms2_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
+am_liblcms2_la_OBJECTS = cmscnvrt.lo cmserr.lo cmsgamma.lo cmsgmt.lo \
+	cmsintrp.lo cmsio0.lo cmsio1.lo cmslut.lo cmsplugin.lo \
+	cmssm.lo cmsmtrx.lo cmspack.lo cmspcs.lo cmswtpnt.lo \
+	cmsxform.lo cmssamp.lo cmsnamed.lo cmscam02.lo cmsvirt.lo \
+	cmstypes.lo cmscgats.lo cmsps2.lo cmsopt.lo cmshalf.lo \
+	cmsalpha.lo
+liblcms2_la_OBJECTS = $(am_liblcms2_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+liblcms2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(liblcms2_la_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(liblcms2_la_SOURCES)
+DIST_SOURCES = $(liblcms2_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+
+# CFLAGS = -pedantic -Wall -std=c99 -O3
+includedir = ${prefix}/include
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+
+# Shared libraries built in this directory
+lib_LTLIBRARIES = liblcms2.la
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
+liblcms2_la_LDFLAGS = -no-undefined \
+  -version-info $(LIBRARY_CURRENT):$(LIBRARY_REVISION):$(LIBRARY_AGE)
+
+liblcms2_la_LIBADD = $(LCMS_LIB_DEPLIBS)
+liblcms2_la_SOURCES = \
+  cmscnvrt.c cmserr.c cmsgamma.c cmsgmt.c cmsintrp.c cmsio0.c cmsio1.c cmslut.c \
+  cmsplugin.c cmssm.c cmsmtrx.c cmspack.c cmspcs.c cmswtpnt.c cmsxform.c \
+  cmssamp.c cmsnamed.c cmscam02.c cmsvirt.c cmstypes.c cmscgats.c cmsps2.c cmsopt.c \
+  cmshalf.c cmsalpha.c lcms2_internal.h
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign src/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+	}
+
+uninstall-libLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  $(am__strip_dir) \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+	done
+
+clean-libLTLIBRARIES:
+	-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+	@list='$(lib_LTLIBRARIES)'; \
+	locs=`for p in $$list; do echo $$p; done | \
+	      sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+	      sort -u`; \
+	test -z "$$locs" || { \
+	  echo rm -f $${locs}; \
+	  rm -f $${locs}; \
+	}
+
+liblcms2.la: $(liblcms2_la_OBJECTS) $(liblcms2_la_DEPENDENCIES) $(EXTRA_liblcms2_la_DEPENDENCIES) 
+	$(AM_V_CCLD)$(liblcms2_la_LINK) -rpath $(libdir) $(liblcms2_la_OBJECTS) $(liblcms2_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsalpha.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmscam02.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmscgats.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmscnvrt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmserr.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsgamma.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsgmt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmshalf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsintrp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsio0.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsio1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmslut.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsmtrx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsnamed.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsopt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmspack.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmspcs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsplugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsps2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmssamp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmssm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmstypes.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsvirt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmswtpnt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsxform.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+	for dir in "$(DESTDIR)$(libdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+	mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \
+	ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-libLTLIBRARIES install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp
new file mode 100644
index 0000000000..ae9f3d9a11
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsalpha.cpp
@@ -0,0 +1,635 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Alpha copy ------------------------------------------------------------------------------------------------------------------
+
+// This macro return words stored as big endian
+#define CHANGE_ENDIAN(w)    (cmsUInt16Number) ((cmsUInt16Number) ((w)<<8)|((w)>>8))
+
+
+// Floor to byte, taking care of saturation
+cmsINLINE cmsUInt8Number _cmsQuickSaturateByte(cmsFloat64Number d)
+{
+       d += 0.5;
+       if (d <= 0) return 0;
+       if (d >= 255.0) return 255;
+
+       return (cmsUInt8Number) _cmsQuickFloorWord(d);
+}
+
+
+// Return the size in bytes of a given formatter
+static
+cmsUInt32Number trueBytesSize(cmsUInt32Number Format)
+{
+    cmsUInt32Number fmt_bytes = T_BYTES(Format);
+
+    // For double, the T_BYTES field returns zero
+    if (fmt_bytes == 0)
+        return sizeof(double);
+
+    // Otherwise, it is already correct for all formats
+    return fmt_bytes;
+}
+
+
+// Several format converters
+
+typedef void(*cmsFormatterAlphaFn)(void* dst, const void* src);
+
+
+// From 8
+
+static
+void copy8(void* dst, const void* src)
+{
+       memmove(dst, src, 1);
+}
+
+static
+void from8to16(void* dst, const void* src)
+{
+       cmsUInt8Number n = *(cmsUInt8Number*)src;
+       *(cmsUInt16Number*) dst = FROM_8_TO_16(n);
+}
+
+static
+void from8to16SE(void* dst, const void* src)
+{
+    cmsUInt8Number n = *(cmsUInt8Number*)src;    
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(FROM_8_TO_16(n));
+}
+
+static
+void from8toFLT(void* dst, const void* src)
+{
+       *(cmsFloat32Number*)dst = (*(cmsUInt8Number*)src) / 255.0f;
+}
+
+static
+void from8toDBL(void* dst, const void* src)
+{
+       *(cmsFloat64Number*)dst = (*(cmsUInt8Number*)src) / 255.0;
+}
+
+static
+void from8toHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = (*(cmsUInt8Number*)src) / 255.0f;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+// From 16
+
+static
+void from16to8(void* dst, const void* src)
+{
+       cmsUInt16Number n = *(cmsUInt16Number*)src;
+       *(cmsUInt8Number*) dst = FROM_16_TO_8(n);
+}
+
+static
+void from16SEto8(void* dst, const void* src)
+{
+    cmsUInt16Number n = *(cmsUInt16Number*)src;
+    *(cmsUInt8Number*)dst = FROM_16_TO_8(CHANGE_ENDIAN(n));
+}
+
+static
+void copy16(void* dst, const void* src)
+{
+       memmove(dst, src, 2);
+}
+
+static
+void from16to16(void* dst, const void* src)
+{
+    cmsUInt16Number n = *(cmsUInt16Number*)src;
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(n);
+}
+
+void from16toFLT(void* dst, const void* src)
+{
+       *(cmsFloat32Number*)dst = (*(cmsUInt16Number*)src) / 65535.0f;
+}
+
+void from16SEtoFLT(void* dst, const void* src)
+{
+    *(cmsFloat32Number*)dst = (CHANGE_ENDIAN(*(cmsUInt16Number*)src)) / 65535.0f;
+}
+
+void from16toDBL(void* dst, const void* src)
+{
+       *(cmsFloat64Number*)dst = (*(cmsUInt16Number*)src) / 65535.0f;
+}
+
+void from16SEtoDBL(void* dst, const void* src)
+{
+    *(cmsFloat64Number*)dst = (CHANGE_ENDIAN(*(cmsUInt16Number*)src)) / 65535.0f;
+}
+
+static
+void from16toHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = (*(cmsUInt16Number*)src) / 65535.0f;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void from16SEtoHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+    cmsFloat32Number n = (CHANGE_ENDIAN(*(cmsUInt16Number*)src)) / 65535.0f;
+    *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+// From Float
+
+static
+void fromFLTto8(void* dst, const void* src)
+{
+    cmsFloat32Number n = *(cmsFloat32Number*)src;
+    *(cmsUInt8Number*)dst = _cmsQuickSaturateByte(n * 255.0f);
+}
+
+static
+void fromFLTto16(void* dst, const void* src)
+{
+    cmsFloat32Number n = *(cmsFloat32Number*)src;
+    *(cmsUInt16Number*)dst = _cmsQuickSaturateWord(n * 65535.0f);
+}
+
+static
+void fromFLTto16SE(void* dst, const void* src)
+{
+    cmsFloat32Number n = *(cmsFloat32Number*)src;
+    cmsUInt16Number i = _cmsQuickSaturateWord(n * 65535.0f);
+
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(i);
+}
+
+static
+void copy32(void* dst, const void* src)
+{
+    memmove(dst, src, sizeof(cmsFloat32Number));
+}
+
+static
+void fromFLTtoDBL(void* dst, const void* src)
+{
+    cmsFloat32Number n = *(cmsFloat32Number*)src;
+    *(cmsFloat64Number*)dst = (cmsFloat64Number)n;
+}
+
+static
+void fromFLTtoHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = *(cmsFloat32Number*)src;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+
+// From HALF
+
+static
+void fromHLFto8(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = _cmsHalf2Float(*(cmsUInt16Number*)src);
+       *(cmsUInt8Number*)dst = _cmsQuickSaturateByte(n * 255.0f);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+
+}
+
+static
+void fromHLFto16(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = _cmsHalf2Float(*(cmsUInt16Number*)src);
+       *(cmsUInt16Number*)dst = _cmsQuickSaturateWord(n * 65535.0f);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void fromHLFto16SE(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+    cmsFloat32Number n = _cmsHalf2Float(*(cmsUInt16Number*)src);
+    cmsUInt16Number i = _cmsQuickSaturateWord(n * 65535.0f);
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(i);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+static
+void fromHLFtoFLT(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       *(cmsFloat32Number*)dst = _cmsHalf2Float(*(cmsUInt16Number*)src);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void fromHLFtoDBL(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       *(cmsFloat64Number*)dst = (cmsFloat64Number)_cmsHalf2Float(*(cmsUInt16Number*)src);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+// From double
+static
+void fromDBLto8(void* dst, const void* src)
+{
+       cmsFloat64Number n = *(cmsFloat64Number*)src;
+       *(cmsUInt8Number*)dst = _cmsQuickSaturateByte(n * 255.0);
+}
+
+static
+void fromDBLto16(void* dst, const void* src)
+{
+       cmsFloat64Number n = *(cmsFloat64Number*)src;
+       *(cmsUInt16Number*)dst = _cmsQuickSaturateWord(n * 65535.0f);
+}
+
+static
+void fromDBLto16SE(void* dst, const void* src)
+{
+    cmsFloat64Number n = *(cmsFloat64Number*)src;
+    cmsUInt16Number  i = _cmsQuickSaturateWord(n * 65535.0f);
+    *(cmsUInt16Number*)dst = CHANGE_ENDIAN(i);
+}
+static
+void fromDBLtoFLT(void* dst, const void* src)
+{
+       cmsFloat64Number n = *(cmsFloat64Number*)src;
+       *(cmsFloat32Number*)dst = (cmsFloat32Number) n;
+}
+
+static
+void fromDBLtoHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = (cmsFloat32Number) *(cmsFloat64Number*)src;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void copy64(void* dst, const void* src)
+{
+       memmove(dst, src, sizeof(cmsFloat64Number));
+}
+
+
+// Returns the position (x or y) of the formatter in the table of functions
+static
+int FormatterPos(cmsUInt32Number frm)
+{
+    cmsUInt32Number  b = T_BYTES(frm);
+
+    if (b == 0 && T_FLOAT(frm))
+        return 5; // DBL
+#ifndef CMS_NO_HALF_SUPPORT
+    if (b == 2 && T_FLOAT(frm))
+        return 3; // HLF
+#endif
+    if (b == 4 && T_FLOAT(frm))
+        return 4; // FLT
+    if (b == 2 && !T_FLOAT(frm))
+        return 1; // 16
+    if (b == 1 && !T_FLOAT(frm))
+        return 0; // 8
+    if (b == 2 && T_ENDIAN16(frm))
+        return 3;
+    return -1; // not recognized
+}
+
+// Obtains a alpha-to-alpha funmction formatter
+static
+cmsFormatterAlphaFn _cmsGetFormatterAlpha(cmsContext id, cmsUInt32Number in, cmsUInt32Number out)
+{
+static cmsFormatterAlphaFn FormattersAlpha[6][6] = {
+
+       /* from 8 */  { copy8,       from8to16,   from8to16SE,   from8toHLF,   from8toFLT,    from8toDBL    },
+       /* from 16*/  { from16to8,   copy16,      from16to16,    from16toHLF,  from16toFLT,   from16toDBL   },
+       /* from 16SE*/{ from16SEto8, from16to16,  copy16,        from16SEtoHLF,from16SEtoFLT, from16SEtoDBL },
+       /* from HLF*/ { fromHLFto8,  fromHLFto16, fromHLFto16SE, copy16,       fromHLFtoFLT,  fromHLFtoDBL  },
+       /* from FLT*/ { fromFLTto8,  fromFLTto16, fromFLTto16SE, fromFLTtoHLF, copy32,        fromFLTtoDBL  },
+       /* from DBL*/ { fromDBLto8,  fromDBLto16, fromDBLto16SE, fromDBLtoHLF, fromDBLtoFLT,  copy64 }};
+
+        int in_n  = FormatterPos(in);
+        int out_n = FormatterPos(out);
+
+        if (in_n < 0 || out_n < 0 || in_n > 4 || out_n > 4) {
+
+               cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "Unrecognized alpha channel width");
+               return NULL;
+        }
+
+        return FormattersAlpha[in_n][out_n];
+}
+
+
+
+// This function computes the distance from each component to the next one in bytes. 
+static
+void ComputeIncrementsForChunky(cmsUInt32Number Format,                                 
+                                cmsUInt32Number ComponentStartingOrder[], 
+                                cmsUInt32Number ComponentPointerIncrements[])
+{
+       cmsUInt32Number channels[cmsMAXCHANNELS];
+       cmsUInt32Number extra = T_EXTRA(Format);
+       cmsUInt32Number nchannels = T_CHANNELS(Format);
+       cmsUInt32Number total_chans = nchannels + extra;
+       cmsUInt32Number i;
+       cmsUInt32Number channelSize = trueBytesSize(Format);
+       cmsUInt32Number pixelSize = channelSize * total_chans;
+       
+	   // Sanity check
+	   if (total_chans <= 0 || total_chans >= cmsMAXCHANNELS)
+		   return;
+
+        memset(channels, 0, sizeof(channels));
+
+       // Separation is independent of starting point and only depends on channel size
+       for (i = 0; i < extra; i++)
+              ComponentPointerIncrements[i] = pixelSize;
+
+       // Handle do swap
+       for (i = 0; i < total_chans; i++)
+       {
+              if (T_DOSWAP(Format)) {
+                     channels[i] = total_chans - i - 1;
+              }
+              else {
+                     channels[i] = i;
+              }
+       }
+
+       // Handle swap first (ROL of positions), example CMYK -> KCMY | 0123 -> 3012
+       if (T_SWAPFIRST(Format) && total_chans > 1) {
+              
+              cmsUInt32Number tmp = channels[0];
+              for (i = 0; i < total_chans-1; i++)
+                     channels[i] = channels[i + 1];
+
+              channels[total_chans - 1] = tmp;
+       }
+
+       // Handle size
+       if (channelSize > 1)
+              for (i = 0; i < total_chans; i++) {
+                     channels[i] *= channelSize;
+              }
+
+       for (i = 0; i < extra; i++)
+              ComponentStartingOrder[i] = channels[i + nchannels];
+}
+
+
+
+//  On planar configurations, the distance is the stride added to any non-negative
+static
+void ComputeIncrementsForPlanar(cmsUInt32Number Format, 
+                                cmsUInt32Number BytesPerPlane,
+                                cmsUInt32Number ComponentStartingOrder[], 
+                                cmsUInt32Number ComponentPointerIncrements[])
+{
+       cmsUInt32Number channels[cmsMAXCHANNELS];       
+       cmsUInt32Number extra = T_EXTRA(Format);
+       cmsUInt32Number nchannels = T_CHANNELS(Format);
+       cmsUInt32Number total_chans = nchannels + extra;
+       cmsUInt32Number i;
+       cmsUInt32Number channelSize = trueBytesSize(Format);
+      
+       // Sanity check
+       if (total_chans <= 0 || total_chans >= cmsMAXCHANNELS)
+           return;
+
+       memset(channels, 0, sizeof(channels));
+
+       // Separation is independent of starting point and only depends on channel size
+       for (i = 0; i < extra; i++)
+              ComponentPointerIncrements[i] = channelSize;
+
+       // Handle do swap
+       for (i = 0; i < total_chans; i++)
+       {
+              if (T_DOSWAP(Format)) {
+                     channels[i] = total_chans - i - 1;
+              }
+              else {
+                     channels[i] = i;
+              }
+       }
+
+       // Handle swap first (ROL of positions), example CMYK -> KCMY | 0123 -> 3012
+       if (T_SWAPFIRST(Format) && total_chans > 0) {
+
+              cmsUInt32Number tmp = channels[0];
+              for (i = 0; i < total_chans - 1; i++)
+                     channels[i] = channels[i + 1];
+
+              channels[total_chans - 1] = tmp;
+       }
+
+       // Handle size
+       for (i = 0; i < total_chans; i++) {
+              channels[i] *= BytesPerPlane;
+       }
+
+       for (i = 0; i < extra; i++)
+              ComponentStartingOrder[i] = channels[i + nchannels];
+}
+
+
+
+// Dispatcher por chunky and planar RGB
+static
+void  ComputeComponentIncrements(cmsUInt32Number Format,
+                                 cmsUInt32Number BytesPerPlane,
+                                 cmsUInt32Number ComponentStartingOrder[], 
+                                 cmsUInt32Number ComponentPointerIncrements[])
+{
+       if (T_PLANAR(Format)) {
+
+              ComputeIncrementsForPlanar(Format,  BytesPerPlane, ComponentStartingOrder, ComponentPointerIncrements);
+       }
+       else {
+              ComputeIncrementsForChunky(Format,  ComponentStartingOrder, ComponentPointerIncrements);
+       }
+
+}
+
+
+
+// Handles extra channels copying alpha if requested by the flags
+void _cmsHandleExtraChannels(_cmsTRANSFORM* p, const void* in,
+                                               void* out,
+                                               cmsUInt32Number PixelsPerLine,
+                                               cmsUInt32Number LineCount,
+                                               const cmsStride* Stride)
+{
+    cmsUInt32Number i, j, k;
+    cmsUInt32Number nExtra;
+    cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS];
+    cmsUInt32Number SourceIncrements[cmsMAXCHANNELS];
+    cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS];
+    cmsUInt32Number DestIncrements[cmsMAXCHANNELS];
+
+    cmsFormatterAlphaFn copyValueFn;
+
+    // Make sure we need some copy
+    if (!(p->dwOriginalFlags & cmsFLAGS_COPY_ALPHA))
+        return;
+
+    // Exit early if in-place color-management is occurring - no need to copy extra channels to themselves.
+    if (p->InputFormat == p->OutputFormat && in == out)
+        return;
+
+    // Make sure we have same number of alpha channels. If not, just return as this should be checked at transform creation time.
+    nExtra = T_EXTRA(p->InputFormat);
+    if (nExtra != T_EXTRA(p->OutputFormat))
+        return;
+
+    // Anything to do?
+    if (nExtra == 0)
+        return;
+
+    // Compute the increments 
+    ComputeComponentIncrements(p->InputFormat, Stride->BytesPerPlaneIn, SourceStartingOrder, SourceIncrements);
+    ComputeComponentIncrements(p->OutputFormat, Stride->BytesPerPlaneOut, DestStartingOrder, DestIncrements);
+
+    // Check for conversions 8, 16, half, float, dbl
+    copyValueFn = _cmsGetFormatterAlpha(p->ContextID, p->InputFormat, p->OutputFormat);
+
+    if (nExtra == 1) { // Optimized routine for copying a single extra channel quickly
+
+        cmsUInt8Number* SourcePtr;
+        cmsUInt8Number* DestPtr;
+
+        cmsUInt32Number SourceStrideIncrement = 0;
+        cmsUInt32Number DestStrideIncrement = 0;
+
+        // The loop itself
+        for (i = 0; i < LineCount; i++) {
+
+            // Prepare pointers for the loop
+            SourcePtr = (cmsUInt8Number*)in + SourceStartingOrder[0] + SourceStrideIncrement;
+            DestPtr = (cmsUInt8Number*)out + DestStartingOrder[0] + DestStrideIncrement;
+
+            for (j = 0; j < PixelsPerLine; j++) {
+
+                copyValueFn(DestPtr, SourcePtr);
+
+                SourcePtr += SourceIncrements[0];
+                DestPtr += DestIncrements[0];
+            }
+
+            SourceStrideIncrement += Stride->BytesPerLineIn;
+            DestStrideIncrement += Stride->BytesPerLineOut;
+        }
+
+    }
+    else { // General case with more than one extra channel
+
+        cmsUInt8Number* SourcePtr[cmsMAXCHANNELS];
+        cmsUInt8Number* DestPtr[cmsMAXCHANNELS];
+
+        cmsUInt32Number SourceStrideIncrements[cmsMAXCHANNELS];
+        cmsUInt32Number DestStrideIncrements[cmsMAXCHANNELS];
+
+        memset(SourceStrideIncrements, 0, sizeof(SourceStrideIncrements));
+        memset(DestStrideIncrements, 0, sizeof(DestStrideIncrements));
+
+        // The loop itself       
+        for (i = 0; i < LineCount; i++) {
+
+            // Prepare pointers for the loop
+            for (j = 0; j < nExtra; j++) {
+
+                SourcePtr[j] = (cmsUInt8Number*)in + SourceStartingOrder[j] + SourceStrideIncrements[j];
+                DestPtr[j] = (cmsUInt8Number*)out + DestStartingOrder[j] + DestStrideIncrements[j];
+            }
+
+            for (j = 0; j < PixelsPerLine; j++) {
+
+                for (k = 0; k < nExtra; k++) {
+
+                    copyValueFn(DestPtr[k], SourcePtr[k]);
+
+                    SourcePtr[k] += SourceIncrements[k];
+                    DestPtr[k] += DestIncrements[k];
+                }
+            }
+
+            for (j = 0; j < nExtra; j++) {
+
+                SourceStrideIncrements[j] += Stride->BytesPerLineIn;
+                DestStrideIncrements[j] += Stride->BytesPerLineOut;
+            }
+        }
+    }
+}
+
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp
new file mode 100644
index 0000000000..9cc49fbf20
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscam02.cpp
@@ -0,0 +1,486 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// CIECAM 02 appearance model. Many thanks to Jordi Vilar for the debugging.
+
+// ---------- Implementation --------------------------------------------
+
+typedef struct  {
+
+    cmsFloat64Number XYZ[3];
+    cmsFloat64Number RGB[3];
+    cmsFloat64Number RGBc[3];
+    cmsFloat64Number RGBp[3];
+    cmsFloat64Number RGBpa[3];
+    cmsFloat64Number a, b, h, e, H, A, J, Q, s, t, C, M;
+    cmsFloat64Number abC[2];
+    cmsFloat64Number abs[2];
+    cmsFloat64Number abM[2];
+
+} CAM02COLOR;
+
+typedef struct  {
+
+    CAM02COLOR adoptedWhite;
+    cmsFloat64Number LA, Yb;
+    cmsFloat64Number F, c, Nc;
+    cmsUInt32Number surround;
+    cmsFloat64Number n, Nbb, Ncb, z, FL, D;
+
+    cmsContext ContextID;
+
+} cmsCIECAM02;
+
+
+static
+cmsFloat64Number compute_n(cmsCIECAM02* pMod)
+{
+    return (pMod -> Yb / pMod -> adoptedWhite.XYZ[1]);
+}
+
+static
+cmsFloat64Number compute_z(cmsCIECAM02* pMod)
+{
+    return (1.48 + pow(pMod -> n, 0.5));
+}
+
+static
+cmsFloat64Number computeNbb(cmsCIECAM02* pMod)
+{
+    return (0.725 * pow((1.0 / pMod -> n), 0.2));
+}
+
+static
+cmsFloat64Number computeFL(cmsCIECAM02* pMod)
+{
+    cmsFloat64Number k, FL;
+
+    k = 1.0 / ((5.0 * pMod->LA) + 1.0);
+    FL = 0.2 * pow(k, 4.0) * (5.0 * pMod->LA) + 0.1 *
+        (pow((1.0 - pow(k, 4.0)), 2.0)) *
+        (pow((5.0 * pMod->LA), (1.0 / 3.0)));
+
+    return FL;
+}
+
+static
+cmsFloat64Number computeD(cmsCIECAM02* pMod)
+{
+    cmsFloat64Number D;
+
+    D = pMod->F - (1.0/3.6)*(exp(((-pMod ->LA-42) / 92.0)));
+
+    return D;
+}
+
+
+static
+CAM02COLOR XYZtoCAT02(CAM02COLOR clr)
+{
+    clr.RGB[0] = (clr.XYZ[0] *  0.7328) + (clr.XYZ[1] *  0.4296) + (clr.XYZ[2] * -0.1624);
+    clr.RGB[1] = (clr.XYZ[0] * -0.7036) + (clr.XYZ[1] *  1.6975) + (clr.XYZ[2] *  0.0061);
+    clr.RGB[2] = (clr.XYZ[0] *  0.0030) + (clr.XYZ[1] *  0.0136) + (clr.XYZ[2] *  0.9834);
+
+    return clr;
+}
+
+static
+CAM02COLOR ChromaticAdaptation(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+
+    for (i = 0; i < 3; i++) {
+        clr.RGBc[i] = ((pMod -> adoptedWhite.XYZ[1] *
+            (pMod->D / pMod -> adoptedWhite.RGB[i])) +
+            (1.0 - pMod->D)) * clr.RGB[i];
+    }
+
+    return clr;
+}
+
+
+static
+CAM02COLOR CAT02toHPE(CAM02COLOR clr)
+{
+    cmsFloat64Number M[9];
+
+    M[0] =(( 0.38971 *  1.096124) + (0.68898 * 0.454369) + (-0.07868 * -0.009628));
+    M[1] =(( 0.38971 * -0.278869) + (0.68898 * 0.473533) + (-0.07868 * -0.005698));
+    M[2] =(( 0.38971 *  0.182745) + (0.68898 * 0.072098) + (-0.07868 *  1.015326));
+    M[3] =((-0.22981 *  1.096124) + (1.18340 * 0.454369) + ( 0.04641 * -0.009628));
+    M[4] =((-0.22981 * -0.278869) + (1.18340 * 0.473533) + ( 0.04641 * -0.005698));
+    M[5] =((-0.22981 *  0.182745) + (1.18340 * 0.072098) + ( 0.04641 *  1.015326));
+    M[6] =(-0.009628);
+    M[7] =(-0.005698);
+    M[8] =( 1.015326);
+
+    clr.RGBp[0] = (clr.RGBc[0] * M[0]) +  (clr.RGBc[1] * M[1]) + (clr.RGBc[2] * M[2]);
+    clr.RGBp[1] = (clr.RGBc[0] * M[3]) +  (clr.RGBc[1] * M[4]) + (clr.RGBc[2] * M[5]);
+    clr.RGBp[2] = (clr.RGBc[0] * M[6]) +  (clr.RGBc[1] * M[7]) + (clr.RGBc[2] * M[8]);
+
+    return  clr;
+}
+
+static
+CAM02COLOR NonlinearCompression(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+    cmsFloat64Number temp;
+
+    for (i = 0; i < 3; i++) {
+        if (clr.RGBp[i] < 0) {
+
+            temp = pow((-1.0 * pMod->FL * clr.RGBp[i] / 100.0), 0.42);
+            clr.RGBpa[i] = (-1.0 * 400.0 * temp) / (temp + 27.13) + 0.1;
+        }
+        else {
+            temp = pow((pMod->FL * clr.RGBp[i] / 100.0), 0.42);
+            clr.RGBpa[i] = (400.0 * temp) / (temp + 27.13) + 0.1;
+        }
+    }
+
+    clr.A = (((2.0 * clr.RGBpa[0]) + clr.RGBpa[1] +
+        (clr.RGBpa[2] / 20.0)) - 0.305) * pMod->Nbb;
+
+    return clr;
+}
+
+static
+CAM02COLOR ComputeCorrelates(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsFloat64Number a, b, temp, e, t, r2d, d2r;
+
+    a = clr.RGBpa[0] - (12.0 * clr.RGBpa[1] / 11.0) + (clr.RGBpa[2] / 11.0);
+    b = (clr.RGBpa[0] + clr.RGBpa[1] - (2.0 * clr.RGBpa[2])) / 9.0;
+
+    r2d = (180.0 / 3.141592654);
+    if (a == 0) {
+        if (b == 0)     clr.h = 0;
+        else if (b > 0) clr.h = 90;
+        else            clr.h = 270;
+    }
+    else if (a > 0) {
+        temp = b / a;
+        if (b > 0)       clr.h = (r2d * atan(temp));
+        else if (b == 0) clr.h = 0;
+        else             clr.h = (r2d * atan(temp)) + 360;
+    }
+    else {
+        temp = b / a;
+        clr.h = (r2d * atan(temp)) + 180;
+    }
+
+    d2r = (3.141592654 / 180.0);
+    e = ((12500.0 / 13.0) * pMod->Nc * pMod->Ncb) *
+        (cos((clr.h * d2r + 2.0)) + 3.8);
+
+    if (clr.h < 20.14) {
+        temp = ((clr.h + 122.47)/1.2) + ((20.14 - clr.h)/0.8);
+        clr.H = 300 + (100*((clr.h + 122.47)/1.2)) / temp;
+    }
+    else if (clr.h < 90.0) {
+        temp = ((clr.h - 20.14)/0.8) + ((90.00 - clr.h)/0.7);
+        clr.H = (100*((clr.h - 20.14)/0.8)) / temp;
+    }
+    else if (clr.h < 164.25) {
+        temp = ((clr.h - 90.00)/0.7) + ((164.25 - clr.h)/1.0);
+        clr.H = 100 + ((100*((clr.h - 90.00)/0.7)) / temp);
+    }
+    else if (clr.h < 237.53) {
+        temp = ((clr.h - 164.25)/1.0) + ((237.53 - clr.h)/1.2);
+        clr.H = 200 + ((100*((clr.h - 164.25)/1.0)) / temp);
+    }
+    else {
+        temp = ((clr.h - 237.53)/1.2) + ((360 - clr.h + 20.14)/0.8);
+        clr.H = 300 + ((100*((clr.h - 237.53)/1.2)) / temp);
+    }
+
+    clr.J = 100.0 * pow((clr.A / pMod->adoptedWhite.A),
+        (pMod->c * pMod->z));
+
+    clr.Q = (4.0 / pMod->c) * pow((clr.J / 100.0), 0.5) *
+        (pMod->adoptedWhite.A + 4.0) * pow(pMod->FL, 0.25);
+
+    t = (e * pow(((a * a) + (b * b)), 0.5)) /
+        (clr.RGBpa[0] + clr.RGBpa[1] +
+        ((21.0 / 20.0) * clr.RGBpa[2]));
+
+    clr.C = pow(t, 0.9) * pow((clr.J / 100.0), 0.5) *
+        pow((1.64 - pow(0.29, pMod->n)), 0.73);
+
+    clr.M = clr.C * pow(pMod->FL, 0.25);
+    clr.s = 100.0 * pow((clr.M / clr.Q), 0.5);
+
+    return clr;
+}
+
+
+static
+CAM02COLOR InverseCorrelates(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+
+    cmsFloat64Number t, e, p1, p2, p3, p4, p5, hr, d2r;
+    d2r = 3.141592654 / 180.0;
+
+    t = pow( (clr.C / (pow((clr.J / 100.0), 0.5) *
+        (pow((1.64 - pow(0.29, pMod->n)), 0.73)))),
+        (1.0 / 0.9) );
+    e = ((12500.0 / 13.0) * pMod->Nc * pMod->Ncb) *
+        (cos((clr.h * d2r + 2.0)) + 3.8);
+
+    clr.A = pMod->adoptedWhite.A * pow(
+           (clr.J / 100.0),
+           (1.0 / (pMod->c * pMod->z)));
+
+    p1 = e / t;
+    p2 = (clr.A / pMod->Nbb) + 0.305;
+    p3 = 21.0 / 20.0;
+
+    hr = clr.h * d2r;
+
+    if (fabs(sin(hr)) >= fabs(cos(hr))) {
+        p4 = p1 / sin(hr);
+        clr.b = (p2 * (2.0 + p3) * (460.0 / 1403.0)) /
+            (p4 + (2.0 + p3) * (220.0 / 1403.0) *
+            (cos(hr) / sin(hr)) - (27.0 / 1403.0) +
+            p3 * (6300.0 / 1403.0));
+        clr.a = clr.b * (cos(hr) / sin(hr));
+    }
+    else {
+        p5 = p1 / cos(hr);
+        clr.a = (p2 * (2.0 + p3) * (460.0 / 1403.0)) /
+            (p5 + (2.0 + p3) * (220.0 / 1403.0) -
+            ((27.0 / 1403.0) - p3 * (6300.0 / 1403.0)) *
+            (sin(hr) / cos(hr)));
+        clr.b = clr.a * (sin(hr) / cos(hr));
+    }
+
+    clr.RGBpa[0] = ((460.0 / 1403.0) * p2) +
+              ((451.0 / 1403.0) * clr.a) +
+              ((288.0 / 1403.0) * clr.b);
+    clr.RGBpa[1] = ((460.0 / 1403.0) * p2) -
+              ((891.0 / 1403.0) * clr.a) -
+              ((261.0 / 1403.0) * clr.b);
+    clr.RGBpa[2] = ((460.0 / 1403.0) * p2) -
+              ((220.0 / 1403.0) * clr.a) -
+              ((6300.0 / 1403.0) * clr.b);
+
+    return clr;
+}
+
+static
+CAM02COLOR InverseNonlinearity(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+    cmsFloat64Number c1;
+
+    for (i = 0; i < 3; i++) {
+        if ((clr.RGBpa[i] - 0.1) < 0) c1 = -1;
+        else                               c1 = 1;
+        clr.RGBp[i] = c1 * (100.0 / pMod->FL) *
+            pow(((27.13 * fabs(clr.RGBpa[i] - 0.1)) /
+            (400.0 - fabs(clr.RGBpa[i] - 0.1))),
+            (1.0 / 0.42));
+    }
+
+    return clr;
+}
+
+static
+CAM02COLOR HPEtoCAT02(CAM02COLOR clr)
+{
+    cmsFloat64Number M[9];
+
+    M[0] = (( 0.7328 *  1.910197) + (0.4296 * 0.370950));
+    M[1] = (( 0.7328 * -1.112124) + (0.4296 * 0.629054));
+    M[2] = (( 0.7328 *  0.201908) + (0.4296 * 0.000008) - 0.1624);
+    M[3] = ((-0.7036 *  1.910197) + (1.6975 * 0.370950));
+    M[4] = ((-0.7036 * -1.112124) + (1.6975 * 0.629054));
+    M[5] = ((-0.7036 *  0.201908) + (1.6975 * 0.000008) + 0.0061);
+    M[6] = (( 0.0030 *  1.910197) + (0.0136 * 0.370950));
+    M[7] = (( 0.0030 * -1.112124) + (0.0136 * 0.629054));
+    M[8] = (( 0.0030 *  0.201908) + (0.0136 * 0.000008) + 0.9834);;
+
+    clr.RGBc[0] = (clr.RGBp[0] * M[0]) + (clr.RGBp[1] * M[1]) + (clr.RGBp[2] * M[2]);
+    clr.RGBc[1] = (clr.RGBp[0] * M[3]) + (clr.RGBp[1] * M[4]) + (clr.RGBp[2] * M[5]);
+    clr.RGBc[2] = (clr.RGBp[0] * M[6]) + (clr.RGBp[1] * M[7]) + (clr.RGBp[2] * M[8]);
+    return clr;
+}
+
+
+static
+CAM02COLOR InverseChromaticAdaptation(CAM02COLOR clr,  cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+    for (i = 0; i < 3; i++) {
+        clr.RGB[i] = clr.RGBc[i] /
+            ((pMod->adoptedWhite.XYZ[1] * pMod->D / pMod->adoptedWhite.RGB[i]) + 1.0 - pMod->D);
+    }
+    return clr;
+}
+
+
+static
+CAM02COLOR CAT02toXYZ(CAM02COLOR clr)
+{
+    clr.XYZ[0] = (clr.RGB[0] *  1.096124) + (clr.RGB[1] * -0.278869) + (clr.RGB[2] *  0.182745);
+    clr.XYZ[1] = (clr.RGB[0] *  0.454369) + (clr.RGB[1] *  0.473533) + (clr.RGB[2] *  0.072098);
+    clr.XYZ[2] = (clr.RGB[0] * -0.009628) + (clr.RGB[1] * -0.005698) + (clr.RGB[2] *  1.015326);
+
+    return clr;
+}
+
+
+cmsHANDLE  CMSEXPORT cmsCIECAM02Init(cmsContext ContextID, const cmsViewingConditions* pVC)
+{
+    cmsCIECAM02* lpMod;
+
+    _cmsAssert(pVC != NULL);
+
+    if((lpMod = (cmsCIECAM02*) _cmsMallocZero(ContextID, sizeof(cmsCIECAM02))) == NULL) {
+        return NULL;
+    }
+
+    lpMod ->ContextID = ContextID;
+
+    lpMod ->adoptedWhite.XYZ[0] = pVC ->whitePoint.X;
+    lpMod ->adoptedWhite.XYZ[1] = pVC ->whitePoint.Y;
+    lpMod ->adoptedWhite.XYZ[2] = pVC ->whitePoint.Z;
+
+    lpMod -> LA       = pVC ->La;
+    lpMod -> Yb       = pVC ->Yb;
+    lpMod -> D        = pVC ->D_value;
+    lpMod -> surround = pVC ->surround;
+
+    switch (lpMod -> surround) {
+
+
+    case CUTSHEET_SURROUND:
+        lpMod->F = 0.8;
+        lpMod->c = 0.41;
+        lpMod->Nc = 0.8;
+        break;
+
+    case DARK_SURROUND:
+        lpMod -> F  = 0.8;
+        lpMod -> c  = 0.525;
+        lpMod -> Nc = 0.8;
+        break;
+
+    case DIM_SURROUND:
+        lpMod -> F  = 0.9;
+        lpMod -> c  = 0.59;
+        lpMod -> Nc = 0.95;
+        break;
+
+    default:
+        // Average surround
+        lpMod -> F  = 1.0;
+        lpMod -> c  = 0.69;
+        lpMod -> Nc = 1.0;
+    }
+
+    lpMod -> n   = compute_n(lpMod);
+    lpMod -> z   = compute_z(lpMod);
+    lpMod -> Nbb = computeNbb(lpMod);
+    lpMod -> FL  = computeFL(lpMod);
+
+    if (lpMod -> D == D_CALCULATE) {
+        lpMod -> D   = computeD(lpMod);
+    }
+
+    lpMod -> Ncb = lpMod -> Nbb;
+
+    lpMod -> adoptedWhite = XYZtoCAT02(lpMod -> adoptedWhite);
+    lpMod -> adoptedWhite = ChromaticAdaptation(lpMod -> adoptedWhite, lpMod);
+    lpMod -> adoptedWhite = CAT02toHPE(lpMod -> adoptedWhite);
+    lpMod -> adoptedWhite = NonlinearCompression(lpMod -> adoptedWhite, lpMod);
+
+    return (cmsHANDLE) lpMod;
+
+}
+
+void CMSEXPORT cmsCIECAM02Done(cmsHANDLE hModel)
+{
+    cmsCIECAM02* lpMod = (cmsCIECAM02*) hModel;
+
+    if (lpMod) _cmsFree(lpMod ->ContextID, lpMod);
+}
+
+
+void CMSEXPORT cmsCIECAM02Forward(cmsHANDLE hModel, const cmsCIEXYZ* pIn, cmsJCh* pOut)
+{
+    CAM02COLOR clr;
+    cmsCIECAM02* lpMod = (cmsCIECAM02*) hModel;
+  
+    _cmsAssert(lpMod != NULL);
+    _cmsAssert(pIn != NULL);
+    _cmsAssert(pOut != NULL);
+
+    memset(&clr, 0, sizeof(clr));
+
+    clr.XYZ[0] = pIn ->X;
+    clr.XYZ[1] = pIn ->Y;
+    clr.XYZ[2] = pIn ->Z;
+
+    clr = XYZtoCAT02(clr);
+    clr = ChromaticAdaptation(clr, lpMod);
+    clr = CAT02toHPE(clr);
+    clr = NonlinearCompression(clr, lpMod);
+    clr = ComputeCorrelates(clr, lpMod);
+
+    pOut ->J = clr.J;
+    pOut ->C = clr.C;
+    pOut ->h = clr.h;
+}
+
+void CMSEXPORT cmsCIECAM02Reverse(cmsHANDLE hModel, const cmsJCh* pIn, cmsCIEXYZ* pOut)
+{
+    CAM02COLOR clr;
+    cmsCIECAM02* lpMod = (cmsCIECAM02*) hModel;
+    
+    _cmsAssert(lpMod != NULL);
+    _cmsAssert(pIn != NULL);
+    _cmsAssert(pOut != NULL);
+
+    memset(&clr, 0, sizeof(clr));
+
+    clr.J = pIn -> J;
+    clr.C = pIn -> C;
+    clr.h = pIn -> h;
+
+    clr = InverseCorrelates(clr, lpMod);
+    clr = InverseNonlinearity(clr, lpMod);
+    clr = HPEtoCAT02(clr);
+    clr = InverseChromaticAdaptation(clr, lpMod);
+    clr = CAT02toXYZ(clr);
+
+    pOut ->X = clr.XYZ[0];
+    pOut ->Y = clr.XYZ[1];
+    pOut ->Z = clr.XYZ[2];
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp
new file mode 100644
index 0000000000..bd263573b1
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscgats.cpp
@@ -0,0 +1,2795 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2018 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// IT8.7 / CGATS.17-200x handling -----------------------------------------------------------------------------
+
+
+#define MAXID        128     // Max length of identifier
+#define MAXSTR      1024     // Max length of string
+#define MAXTABLES    255     // Max Number of tables in a single stream
+#define MAXINCLUDE    20     // Max number of nested includes
+
+#define DEFAULT_DBL_FORMAT  "%.10g" // Double formatting
+
+#ifdef CMS_IS_WINDOWS_
+#    include <io.h>
+#    define DIR_CHAR    '\\'
+#else
+#    define DIR_CHAR    '/'
+#endif
+
+
+// Symbols
+typedef enum {
+
+        SUNDEFINED,
+        SINUM,      // Integer
+        SDNUM,      // Real
+        SIDENT,     // Identifier
+        SSTRING,    // string
+        SCOMMENT,   // comment
+        SEOLN,      // End of line
+        SEOF,       // End of stream
+        SSYNERROR,  // Syntax error found on stream
+
+        // Keywords
+
+        SBEGIN_DATA,
+        SBEGIN_DATA_FORMAT,
+        SEND_DATA,
+        SEND_DATA_FORMAT,
+        SKEYWORD,
+        SDATA_FORMAT_ID,
+        SINCLUDE
+
+    } SYMBOL;
+
+
+// How to write the value
+typedef enum {
+
+        WRITE_UNCOOKED,
+        WRITE_STRINGIFY,
+        WRITE_HEXADECIMAL,
+        WRITE_BINARY,
+        WRITE_PAIR
+
+    } WRITEMODE;
+
+// Linked list of variable names
+typedef struct _KeyVal {
+
+        struct _KeyVal*  Next;
+        char*            Keyword;       // Name of variable
+        struct _KeyVal*  NextSubkey;    // If key is a dictionary, points to the next item
+        char*            Subkey;        // If key is a dictionary, points to the subkey name
+        char*            Value;         // Points to value
+        WRITEMODE        WriteAs;       // How to write the value
+
+   } KEYVALUE;
+
+
+// Linked list of memory chunks (Memory sink)
+typedef struct _OwnedMem {
+
+        struct _OwnedMem* Next;
+        void *            Ptr;          // Point to value
+
+   } OWNEDMEM;
+
+// Suballocator
+typedef struct _SubAllocator {
+
+         cmsUInt8Number* Block;
+         cmsUInt32Number BlockSize;
+         cmsUInt32Number Used;
+
+    } SUBALLOCATOR;
+
+// Table. Each individual table can hold properties and rows & cols
+typedef struct _Table {
+
+        char SheetType[MAXSTR];               // The first row of the IT8 (the type)
+
+        int            nSamples, nPatches;    // Cols, Rows
+        int            SampleID;              // Pos of ID
+
+        KEYVALUE*      HeaderList;            // The properties
+
+        char**         DataFormat;            // The binary stream descriptor
+        char**         Data;                  // The binary stream
+
+    } TABLE;
+
+// File stream being parsed
+typedef struct _FileContext {
+        char           FileName[cmsMAX_PATH];    // File name if being read from file
+        FILE*          Stream;                   // File stream or NULL if holded in memory
+    } FILECTX;
+
+// This struct hold all information about an open IT8 handler.
+typedef struct {
+
+
+        cmsUInt32Number  TablesCount;                     // How many tables in this stream
+        cmsUInt32Number  nTable;                          // The actual table
+
+        TABLE Tab[MAXTABLES];
+
+        // Memory management
+        OWNEDMEM*      MemorySink;            // The storage backend
+        SUBALLOCATOR   Allocator;             // String suballocator -- just to keep it fast
+
+        // Parser state machine
+        SYMBOL             sy;                // Current symbol
+        int                ch;                // Current character
+
+        cmsInt32Number     inum;              // integer value
+        cmsFloat64Number   dnum;              // real value
+
+        char           id[MAXID];             // identifier
+        char           str[MAXSTR];           // string
+
+        // Allowed keywords & datasets. They have visibility on whole stream
+        KEYVALUE*      ValidKeywords;
+        KEYVALUE*      ValidSampleID;
+
+        char*          Source;                // Points to loc. being parsed
+        cmsInt32Number lineno;                // line counter for error reporting
+
+        FILECTX*       FileStack[MAXINCLUDE]; // Stack of files being parsed
+        cmsInt32Number IncludeSP;             // Include Stack Pointer
+
+        char*          MemoryBlock;           // The stream if holded in memory
+
+        char           DoubleFormatter[MAXID];// Printf-like 'cmsFloat64Number' formatter
+
+        cmsContext    ContextID;              // The threading context
+
+   } cmsIT8;
+
+
+// The stream for save operations
+typedef struct {
+
+        FILE* stream;   // For save-to-file behaviour
+
+        cmsUInt8Number* Base;
+        cmsUInt8Number* Ptr;        // For save-to-mem behaviour
+        cmsUInt32Number Used;
+        cmsUInt32Number Max;
+
+    } SAVESTREAM;
+
+
+// ------------------------------------------------------ cmsIT8 parsing routines
+
+
+// A keyword
+typedef struct {
+
+        const char *id;
+        SYMBOL sy;
+
+   } KEYWORD;
+
+// The keyword->symbol translation table. Sorting is required.
+static const KEYWORD TabKeys[] = {
+
+        {"$INCLUDE",               SINCLUDE},   // This is an extension!
+        {".INCLUDE",               SINCLUDE},   // This is an extension!
+
+        {"BEGIN_DATA",             SBEGIN_DATA },
+        {"BEGIN_DATA_FORMAT",      SBEGIN_DATA_FORMAT },
+        {"DATA_FORMAT_IDENTIFIER", SDATA_FORMAT_ID},
+        {"END_DATA",               SEND_DATA},
+        {"END_DATA_FORMAT",        SEND_DATA_FORMAT},
+        {"KEYWORD",                SKEYWORD}
+        };
+
+#define NUMKEYS (sizeof(TabKeys)/sizeof(KEYWORD))
+
+// Predefined properties
+
+// A property
+typedef struct {
+        const char *id;    // The identifier
+        WRITEMODE as;      // How is supposed to be written
+    } PROPERTY;
+
+static PROPERTY PredefinedProperties[] = {
+
+        {"NUMBER_OF_FIELDS", WRITE_UNCOOKED},    // Required - NUMBER OF FIELDS
+        {"NUMBER_OF_SETS",   WRITE_UNCOOKED},    // Required - NUMBER OF SETS
+        {"ORIGINATOR",       WRITE_STRINGIFY},   // Required - Identifies the specific system, organization or individual that created the data file.
+        {"FILE_DESCRIPTOR",  WRITE_STRINGIFY},   // Required - Describes the purpose or contents of the data file.
+        {"CREATED",          WRITE_STRINGIFY},   // Required - Indicates date of creation of the data file.
+        {"DESCRIPTOR",       WRITE_STRINGIFY},   // Required  - Describes the purpose or contents of the data file.
+        {"DIFFUSE_GEOMETRY", WRITE_STRINGIFY},   // The diffuse geometry used. Allowed values are "sphere" or "opal".
+        {"MANUFACTURER",     WRITE_STRINGIFY},
+        {"MANUFACTURE",      WRITE_STRINGIFY},   // Some broken Fuji targets does store this value
+        {"PROD_DATE",        WRITE_STRINGIFY},   // Identifies year and month of production of the target in the form yyyy:mm.
+        {"SERIAL",           WRITE_STRINGIFY},   // Uniquely identifies individual physical target.
+
+        {"MATERIAL",         WRITE_STRINGIFY},    // Identifies the material on which the target was produced using a code
+                                                  // uniquely identifying th e material. This is intend ed to be used for IT8.7
+                                                  // physical targets only (i.e . IT8.7/1 a nd IT8.7/2).
+
+        {"INSTRUMENTATION",  WRITE_STRINGIFY},    // Used to report the specific instrumentation used (manufacturer and
+                                                  // model number) to generate the data reported. This data will often
+                                                  // provide more information about the particular data collected than an
+                                                  // extensive list of specific details. This is particularly important for
+                                                  // spectral data or data derived from spectrophotometry.
+
+        {"MEASUREMENT_SOURCE", WRITE_STRINGIFY},  // Illumination used for spectral measurements. This data helps provide
+                                                  // a guide to the potential for issues of paper fluorescence, etc.
+
+        {"PRINT_CONDITIONS", WRITE_STRINGIFY},     // Used to define the characteristics of the printed sheet being reported.
+                                                   // Where standard conditions have been defined (e.g., SWOP at nominal)
+                                                   // named conditions may suffice. Otherwise, detailed information is
+                                                   // needed.
+
+        {"SAMPLE_BACKING",   WRITE_STRINGIFY},     // Identifies the backing material used behind the sample during
+                                                   // measurement. Allowed values are "black", "white", or {"na".
+                                                  
+        {"CHISQ_DOF",        WRITE_STRINGIFY},     // Degrees of freedom associated with the Chi squared statistic
+                                                   // below properties are new in recent specs:
+
+        {"MEASUREMENT_GEOMETRY", WRITE_STRINGIFY}, // The type of measurement, either reflection or transmission, should be indicated
+                                                   // along with details of the geometry and the aperture size and shape. For example,
+                                                   // for transmission measurements it is important to identify 0/diffuse, diffuse/0,
+                                                   // opal or integrating sphere, etc. For reflection it is important to identify 0/45,
+                                                   // 45/0, sphere (specular included or excluded), etc.
+
+       {"FILTER",            WRITE_STRINGIFY},     // Identifies the use of physical filter(s) during measurement. Typically used to
+                                                   // denote the use of filters such as none, D65, Red, Green or Blue.
+                                                  
+       {"POLARIZATION",      WRITE_STRINGIFY},     // Identifies the use of a physical polarization filter during measurement. Allowed
+                                                   // values are {"yes", "white", "none" or "na".
+
+       {"WEIGHTING_FUNCTION", WRITE_PAIR},         // Indicates such functions as: the CIE standard observer functions used in the
+                                                   // calculation of various data parameters (2 degree and 10 degree), CIE standard
+                                                   // illuminant functions used in the calculation of various data parameters (e.g., D50,
+                                                   // D65, etc.), density status response, etc. If used there shall be at least one
+                                                   // name-value pair following the WEIGHTING_FUNCTION tag/keyword. The first attribute
+                                                   // in the set shall be {"name" and shall identify the particular parameter used.
+                                                   // The second shall be {"value" and shall provide the value associated with that name.
+                                                   // For ASCII data, a string containing the Name and Value attribute pairs shall follow
+                                                   // the weighting function keyword. A semi-colon separates attribute pairs from each
+                                                   // other and within the attribute the name and value are separated by a comma.
+
+       {"COMPUTATIONAL_PARAMETER", WRITE_PAIR},    // Parameter that is used in computing a value from measured data. Name is the name
+                                                   // of the calculation, parameter is the name of the parameter used in the calculation
+                                                   // and value is the value of the parameter.
+                                                   
+       {"TARGET_TYPE",        WRITE_STRINGIFY},    // The type of target being measured, e.g. IT8.7/1, IT8.7/3, user defined, etc.
+                                                  
+       {"COLORANT",           WRITE_STRINGIFY},    // Identifies the colorant(s) used in creating the target.
+                                                  
+       {"TABLE_DESCRIPTOR",   WRITE_STRINGIFY},    // Describes the purpose or contents of a data table.
+                                                  
+       {"TABLE_NAME",         WRITE_STRINGIFY}     // Provides a short name for a data table.
+};
+
+#define NUMPREDEFINEDPROPS (sizeof(PredefinedProperties)/sizeof(PROPERTY))
+
+
+// Predefined sample types on dataset
+static const char* PredefinedSampleID[] = {
+        "SAMPLE_ID",      // Identifies sample that data represents
+        "STRING",         // Identifies label, or other non-machine readable value.
+                          // Value must begin and end with a " symbol
+
+        "CMYK_C",         // Cyan component of CMYK data expressed as a percentage
+        "CMYK_M",         // Magenta component of CMYK data expressed as a percentage
+        "CMYK_Y",         // Yellow component of CMYK data expressed as a percentage
+        "CMYK_K",         // Black component of CMYK data expressed as a percentage
+        "D_RED",          // Red filter density
+        "D_GREEN",        // Green filter density
+        "D_BLUE",         // Blue filter density
+        "D_VIS",          // Visual filter density
+        "D_MAJOR_FILTER", // Major filter d ensity
+        "RGB_R",          // Red component of RGB data
+        "RGB_G",          // Green component of RGB data
+        "RGB_B",          // Blue com ponent of RGB data
+        "SPECTRAL_NM",    // Wavelength of measurement expressed in nanometers
+        "SPECTRAL_PCT",   // Percentage reflectance/transmittance
+        "SPECTRAL_DEC",   // Reflectance/transmittance
+        "XYZ_X",          // X component of tristimulus data
+        "XYZ_Y",          // Y component of tristimulus data
+        "XYZ_Z",          // Z component of tristimulus data
+        "XYY_X",          // x component of chromaticity data
+        "XYY_Y",          // y component of chromaticity data
+        "XYY_CAPY",       // Y component of tristimulus data
+        "LAB_L",          // L* component of Lab data
+        "LAB_A",          // a* component of Lab data
+        "LAB_B",          // b* component of Lab data
+        "LAB_C",          // C*ab component of Lab data
+        "LAB_H",          // hab component of Lab data
+        "LAB_DE",         // CIE dE
+        "LAB_DE_94",      // CIE dE using CIE 94
+        "LAB_DE_CMC",     // dE using CMC
+        "LAB_DE_2000",    // CIE dE using CIE DE 2000
+        "MEAN_DE",        // Mean Delta E (LAB_DE) of samples compared to batch average
+                          // (Used for data files for ANSI IT8.7/1 and IT8.7/2 targets)
+        "STDEV_X",        // Standard deviation of X (tristimulus data)
+        "STDEV_Y",        // Standard deviation of Y (tristimulus data)
+        "STDEV_Z",        // Standard deviation of Z (tristimulus data)
+        "STDEV_L",        // Standard deviation of L*
+        "STDEV_A",        // Standard deviation of a*
+        "STDEV_B",        // Standard deviation of b*
+        "STDEV_DE",       // Standard deviation of CIE dE
+        "CHI_SQD_PAR"};   // The average of the standard deviations of L*, a* and b*. It is
+                          // used to derive an estimate of the chi-squared parameter which is
+                          // recommended as the predictor of the variability of dE
+
+#define NUMPREDEFINEDSAMPLEID (sizeof(PredefinedSampleID)/sizeof(char *))
+
+//Forward declaration of some internal functions
+static void* AllocChunk(cmsIT8* it8, cmsUInt32Number size);
+
+// Checks whatever c is a separator
+static
+cmsBool isseparator(int c)
+{
+    return (c == ' ') || (c == '\t') ; 
+}
+
+// Checks whatever c is a valid identifier char
+static
+cmsBool ismiddle(int c)
+{
+   return (!isseparator(c) && (c != '#') && (c !='\"') && (c != '\'') && (c > 32) && (c < 127));
+}
+
+// Checks whatsever c is a valid identifier middle char.
+static
+cmsBool isidchar(int c)
+{
+   return isalnum(c) || ismiddle(c);
+}
+
+// Checks whatsever c is a valid identifier first char.
+static
+cmsBool isfirstidchar(int c)
+{
+     return !isdigit(c) && ismiddle(c);
+}
+
+// Guess whether the supplied path looks like an absolute path
+static
+cmsBool isabsolutepath(const char *path)
+{
+    char ThreeChars[4];
+
+    if(path == NULL)
+        return FALSE;
+    if (path[0] == 0)
+        return FALSE;
+
+    strncpy(ThreeChars, path, 3);
+    ThreeChars[3] = 0;
+
+    if(ThreeChars[0] == DIR_CHAR)
+        return TRUE;
+
+#ifdef  CMS_IS_WINDOWS_
+    if (isalpha((int) ThreeChars[0]) && ThreeChars[1] == ':')
+        return TRUE;
+#endif
+    return FALSE;
+}
+
+
+// Makes a file path based on a given reference path
+// NOTE: this function doesn't check if the path exists or even if it's legal
+static
+cmsBool BuildAbsolutePath(const char *relPath, const char *basePath, char *buffer, cmsUInt32Number MaxLen)
+{
+    char *tail;
+    cmsUInt32Number len;
+
+    // Already absolute?
+    if (isabsolutepath(relPath)) {
+
+        strncpy(buffer, relPath, MaxLen);
+        buffer[MaxLen-1] = 0;
+        return TRUE;
+    }
+
+    // No, search for last
+    strncpy(buffer, basePath, MaxLen);
+    buffer[MaxLen-1] = 0;
+
+    tail = strrchr(buffer, DIR_CHAR);
+    if (tail == NULL) return FALSE;    // Is not absolute and has no separators??
+
+    len = (cmsUInt32Number) (tail - buffer);
+    if (len >= MaxLen) return FALSE;
+
+    // No need to assure zero terminator over here
+    strncpy(tail + 1, relPath, MaxLen - len);
+
+    return TRUE;
+}
+
+
+// Make sure no exploit is being even tried
+static
+const char* NoMeta(const char* str)
+{
+    if (strchr(str, '%') != NULL)
+        return "**** CORRUPTED FORMAT STRING ***";
+
+    return str;
+}
+
+// Syntax error
+static
+cmsBool SynError(cmsIT8* it8, const char *Txt, ...)
+{
+    char Buffer[256], ErrMsg[1024];
+    va_list args;
+
+    va_start(args, Txt);
+    vsnprintf(Buffer, 255, Txt, args);
+    Buffer[255] = 0;
+    va_end(args);
+
+    snprintf(ErrMsg, 1023, "%s: Line %d, %s", it8->FileStack[it8 ->IncludeSP]->FileName, it8->lineno, Buffer);
+    ErrMsg[1023] = 0;
+    it8->sy = SSYNERROR;
+    cmsSignalError(it8 ->ContextID, cmsERROR_CORRUPTION_DETECTED, "%s", ErrMsg);
+    return FALSE;
+}
+
+// Check if current symbol is same as specified. issue an error else.
+static
+cmsBool Check(cmsIT8* it8, SYMBOL sy, const char* Err)
+{
+        if (it8 -> sy != sy)
+                return SynError(it8, NoMeta(Err));
+        return TRUE;
+}
+
+// Read Next character from stream
+static
+void NextCh(cmsIT8* it8)
+{
+    if (it8 -> FileStack[it8 ->IncludeSP]->Stream) {
+
+        it8 ->ch = fgetc(it8 ->FileStack[it8 ->IncludeSP]->Stream);
+
+        if (feof(it8 -> FileStack[it8 ->IncludeSP]->Stream))  {
+
+            if (it8 ->IncludeSP > 0) {
+
+                fclose(it8 ->FileStack[it8->IncludeSP--]->Stream);
+                it8 -> ch = ' ';                            // Whitespace to be ignored
+
+            } else
+                it8 ->ch = 0;   // EOF
+        }
+    }
+    else {
+        it8->ch = *it8->Source;
+        if (it8->ch) it8->Source++;
+    }
+}
+
+
+// Try to see if current identifier is a keyword, if so return the referred symbol
+static
+SYMBOL BinSrchKey(const char *id)
+{
+    int l = 1;
+    int r = NUMKEYS;
+    int x, res;
+
+    while (r >= l)
+    {
+        x = (l+r)/2;
+        res = cmsstrcasecmp(id, TabKeys[x-1].id);
+        if (res == 0) return TabKeys[x-1].sy;
+        if (res < 0) r = x - 1;
+        else l = x + 1;
+    }
+
+    return SUNDEFINED;
+}
+
+
+// 10 ^n
+static
+cmsFloat64Number xpow10(int n)
+{
+    return pow(10, (cmsFloat64Number) n);
+}
+
+
+//  Reads a Real number, tries to follow from integer number
+static
+void ReadReal(cmsIT8* it8, cmsInt32Number inum)
+{
+    it8->dnum = (cmsFloat64Number)inum;
+
+    while (isdigit(it8->ch)) {
+
+        it8->dnum = (cmsFloat64Number)it8->dnum * 10.0 + (cmsFloat64Number)(it8->ch - '0');
+        NextCh(it8);
+    }
+
+    if (it8->ch == '.') {        // Decimal point
+
+        cmsFloat64Number frac = 0.0;      // fraction
+        int prec = 0;                     // precision
+
+        NextCh(it8);               // Eats dec. point
+
+        while (isdigit(it8->ch)) {
+
+            frac = frac * 10.0 + (cmsFloat64Number)(it8->ch - '0');
+            prec++;
+            NextCh(it8);
+        }
+
+        it8->dnum = it8->dnum + (frac / xpow10(prec));
+    }
+
+    // Exponent, example 34.00E+20
+    if (toupper(it8->ch) == 'E') {
+
+        cmsInt32Number e;
+        cmsInt32Number sgn;
+
+        NextCh(it8); sgn = 1;
+
+        if (it8->ch == '-') {
+
+            sgn = -1; NextCh(it8);
+        }
+        else
+            if (it8->ch == '+') {
+
+                sgn = +1;
+                NextCh(it8);
+            }
+
+        e = 0;
+        while (isdigit(it8->ch)) {
+
+            cmsInt32Number digit = (it8->ch - '0');
+
+            if ((cmsFloat64Number)e * 10.0 + (cmsFloat64Number)digit < (cmsFloat64Number)+2147483647.0)
+                e = e * 10 + digit;
+
+            NextCh(it8);
+        }
+
+        e = sgn*e;
+        it8->dnum = it8->dnum * xpow10(e);
+    }
+}
+
+// Parses a float number
+// This can not call directly atof because it uses locale dependent
+// parsing, while CCMX files always use . as decimal separator
+static
+cmsFloat64Number ParseFloatNumber(const char *Buffer)
+{
+    cmsFloat64Number dnum = 0.0;
+    int sign = 1;
+
+    // keep safe
+    if (Buffer == NULL) return 0.0;
+
+    if (*Buffer == '-' || *Buffer == '+') {
+
+        sign = (*Buffer == '-') ? -1 : 1;
+        Buffer++;
+    }
+
+
+    while (*Buffer && isdigit((int)*Buffer)) {
+
+        dnum = dnum * 10.0 + (*Buffer - '0');
+        if (*Buffer) Buffer++;
+    }
+
+    if (*Buffer == '.') {
+
+        cmsFloat64Number frac = 0.0;      // fraction
+        int prec = 0;                     // precision
+
+        if (*Buffer) Buffer++;
+
+        while (*Buffer && isdigit((int)*Buffer)) {
+
+            frac = frac * 10.0 + (*Buffer - '0');
+            prec++;
+            if (*Buffer) Buffer++;
+        }
+
+        dnum = dnum + (frac / xpow10(prec));
+    }
+
+    // Exponent, example 34.00E+20
+    if (*Buffer && toupper(*Buffer) == 'E') {
+
+        int e;
+        int sgn;
+
+        if (*Buffer) Buffer++;
+        sgn = 1;
+
+        if (*Buffer == '-') {
+
+            sgn = -1;
+            if (*Buffer) Buffer++;
+        }
+        else
+            if (*Buffer == '+') {
+
+                sgn = +1;
+                if (*Buffer) Buffer++;
+            }
+
+        e = 0;
+        while (*Buffer && isdigit((int)*Buffer)) {
+
+            cmsInt32Number digit = (*Buffer - '0');
+
+            if ((cmsFloat64Number)e * 10.0 + digit < (cmsFloat64Number)+2147483647.0)
+                e = e * 10 + digit;
+
+            if (*Buffer) Buffer++;
+        }
+
+        e = sgn*e;
+        dnum = dnum * xpow10(e);
+    }
+
+    return sign * dnum;
+}
+
+
+// Reads next symbol
+static
+void InSymbol(cmsIT8* it8)
+{
+    CMSREGISTER char *idptr;
+    CMSREGISTER int k;
+    SYMBOL key;
+    int sng;
+    
+    do {
+
+        while (isseparator(it8->ch))
+            NextCh(it8);
+
+        if (isfirstidchar(it8->ch)) {          // Identifier
+
+            k = 0;
+            idptr = it8->id;
+
+            do {
+
+                if (++k < MAXID) *idptr++ = (char) it8->ch;
+
+                NextCh(it8);
+
+            } while (isidchar(it8->ch));
+
+            *idptr = '\0';
+
+
+            key = BinSrchKey(it8->id);
+            if (key == SUNDEFINED) it8->sy = SIDENT;
+            else it8->sy = key;
+
+        }
+        else                         // Is a number?
+            if (isdigit(it8->ch) || it8->ch == '.' || it8->ch == '-' || it8->ch == '+')
+            {
+                int sign = 1;
+
+                if (it8->ch == '-') {
+                    sign = -1;
+                    NextCh(it8);
+                }
+
+                it8->inum = 0;
+                it8->sy   = SINUM;
+
+                if (it8->ch == '0') {          // 0xnnnn (Hexa) or 0bnnnn (Binary)
+
+                    NextCh(it8);
+                    if (toupper(it8->ch) == 'X') {
+
+                        int j;
+
+                        NextCh(it8);
+                        while (isxdigit(it8->ch))
+                        {
+                            it8->ch = toupper(it8->ch);
+                            if (it8->ch >= 'A' && it8->ch <= 'F')  j = it8->ch -'A'+10;
+                            else j = it8->ch - '0';
+
+                            if ((cmsFloat64Number) it8->inum * 16.0 + (cmsFloat64Number) j > (cmsFloat64Number)+2147483647.0)
+                            {
+                                SynError(it8, "Invalid hexadecimal number");
+                                return;
+                            }
+
+                            it8->inum = it8->inum * 16 + j;
+                            NextCh(it8);
+                        }
+                        return;
+                    }
+
+                    if (toupper(it8->ch) == 'B') {  // Binary
+
+                        int j;
+
+                        NextCh(it8);
+                        while (it8->ch == '0' || it8->ch == '1')
+                        {
+                            j = it8->ch - '0';
+
+                            if ((cmsFloat64Number) it8->inum * 2.0 + j > (cmsFloat64Number)+2147483647.0)
+                            {
+                                SynError(it8, "Invalid binary number");
+                                return;
+                            }
+
+                            it8->inum = it8->inum * 2 + j;
+                            NextCh(it8);
+                        }
+                        return;
+                    }
+                }
+
+
+                while (isdigit(it8->ch)) {
+
+                    cmsInt32Number digit = (it8->ch - '0');
+
+                    if ((cmsFloat64Number) it8->inum * 10.0 + (cmsFloat64Number) digit > (cmsFloat64Number) +2147483647.0) {
+                        ReadReal(it8, it8->inum);
+                        it8->sy = SDNUM;
+                        it8->dnum *= sign;
+                        return;
+                    }
+
+                    it8->inum = it8->inum * 10 + digit;
+                    NextCh(it8);
+                }
+
+                if (it8->ch == '.') {
+
+                    ReadReal(it8, it8->inum);
+                    it8->sy = SDNUM;
+                    it8->dnum *= sign;
+                    return;
+                }
+
+                it8 -> inum *= sign;
+
+                // Special case. Numbers followed by letters are taken as identifiers
+
+                if (isidchar(it8 ->ch)) {
+
+                    if (it8 ->sy == SINUM) {
+
+                        snprintf(it8->id, 127, "%d", it8->inum);
+                    }
+                    else {
+
+                        snprintf(it8->id, 127, it8 ->DoubleFormatter, it8->dnum);
+                    }
+
+                    k = (int) strlen(it8 ->id);
+                    idptr = it8 ->id + k;
+                    do {
+
+                        if (++k < MAXID) *idptr++ = (char) it8->ch;
+
+                        NextCh(it8);
+
+                    } while (isidchar(it8->ch));
+
+                    *idptr = '\0';
+                    it8->sy = SIDENT;
+                }
+                return;
+
+            }
+            else
+                switch ((int) it8->ch) {
+
+        // EOF marker -- ignore it
+        case '\x1a':
+            NextCh(it8);
+            break;
+
+        // Eof stream markers
+        case 0:
+        case -1:
+            it8->sy = SEOF;
+            break;
+
+
+        // Next line
+        case '\r':
+            NextCh(it8);
+            if (it8 ->ch == '\n') 
+                NextCh(it8);
+            it8->sy = SEOLN;
+            it8->lineno++;
+            break;
+
+        case '\n':
+            NextCh(it8);
+            it8->sy = SEOLN;
+            it8->lineno++;
+            break;
+
+        // Comment
+        case '#':
+            NextCh(it8);
+            while (it8->ch && it8->ch != '\n' && it8->ch != '\r')
+                NextCh(it8);
+
+            it8->sy = SCOMMENT;
+            break;
+
+        // String.
+        case '\'':
+        case '\"':
+            idptr = it8->str;
+            sng = it8->ch;
+            k = 0;
+            NextCh(it8);
+
+            while (k < (MAXSTR-1) && it8->ch != sng) {
+
+                if (it8->ch == '\n'|| it8->ch == '\r') k = MAXSTR+1;
+                else {
+                    *idptr++ = (char) it8->ch;
+                    NextCh(it8);
+                    k++;
+                }
+            }
+
+            it8->sy = SSTRING;
+            *idptr = '\0';
+            NextCh(it8);
+            break;
+
+
+        default:
+            SynError(it8, "Unrecognized character: 0x%x", it8 ->ch);
+            return;
+            }
+
+    } while (it8->sy == SCOMMENT);
+
+    // Handle the include special token
+
+    if (it8 -> sy == SINCLUDE) {
+
+                FILECTX* FileNest;
+
+                if(it8 -> IncludeSP >= (MAXINCLUDE-1)) {
+
+                    SynError(it8, "Too many recursion levels");
+                    return;
+                }
+
+                InSymbol(it8);
+                if (!Check(it8, SSTRING, "Filename expected")) return;
+
+                FileNest = it8 -> FileStack[it8 -> IncludeSP + 1];
+                if(FileNest == NULL) {
+
+                    FileNest = it8 ->FileStack[it8 -> IncludeSP + 1] = (FILECTX*)AllocChunk(it8, sizeof(FILECTX));
+                    //if(FileNest == NULL)
+                    //  TODO: how to manage out-of-memory conditions?
+                }
+
+                if (BuildAbsolutePath(it8->str,
+                                      it8->FileStack[it8->IncludeSP]->FileName,
+                                      FileNest->FileName, cmsMAX_PATH-1) == FALSE) {
+                    SynError(it8, "File path too long");
+                    return;
+                }
+
+                FileNest->Stream = fopen(FileNest->FileName, "rt");
+                if (FileNest->Stream == NULL) {
+
+                        SynError(it8, "File %s not found", FileNest->FileName);
+                        return;
+                }
+                it8->IncludeSP++;
+
+                it8 ->ch = ' ';
+                InSymbol(it8);
+    }
+
+}
+
+// Checks end of line separator
+static
+cmsBool CheckEOLN(cmsIT8* it8)
+{
+        if (!Check(it8, SEOLN, "Expected separator")) return FALSE;
+        while (it8 -> sy == SEOLN)
+                        InSymbol(it8);
+        return TRUE;
+
+}
+
+// Skip a symbol
+
+static
+void Skip(cmsIT8* it8, SYMBOL sy)
+{
+        if (it8->sy == sy && it8->sy != SEOF)
+                        InSymbol(it8);
+}
+
+
+// Skip multiple EOLN
+static
+void SkipEOLN(cmsIT8* it8)
+{
+    while (it8->sy == SEOLN) {
+             InSymbol(it8);
+    }
+}
+
+
+// Returns a string holding current value
+static
+cmsBool GetVal(cmsIT8* it8, char* Buffer, cmsUInt32Number max, const char* ErrorTitle)
+{
+    switch (it8->sy) {
+
+    case SEOLN:   // Empty value
+                  Buffer[0]=0;
+                  break;
+    case SIDENT:  strncpy(Buffer, it8->id, max);
+                  Buffer[max-1]=0;
+                  break;
+    case SINUM:   snprintf(Buffer, max, "%d", it8 -> inum); break;
+    case SDNUM:   snprintf(Buffer, max, it8->DoubleFormatter, it8 -> dnum); break;
+    case SSTRING: strncpy(Buffer, it8->str, max);
+                  Buffer[max-1] = 0;
+                  break;
+
+
+    default:
+         return SynError(it8, "%s", ErrorTitle);
+    }
+
+    Buffer[max] = 0;
+    return TRUE;
+}
+
+// ---------------------------------------------------------- Table
+
+static
+TABLE* GetTable(cmsIT8* it8)
+{
+   if ((it8 -> nTable >= it8 ->TablesCount)) {
+
+           SynError(it8, "Table %d out of sequence", it8 -> nTable);
+           return it8 -> Tab;
+   }
+
+   return it8 ->Tab + it8 ->nTable;
+}
+
+// ---------------------------------------------------------- Memory management
+
+
+// Frees an allocator and owned memory
+void CMSEXPORT cmsIT8Free(cmsHANDLE hIT8)
+{
+   cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    if (it8 == NULL)
+        return;
+
+    if (it8->MemorySink) {
+
+        OWNEDMEM* p;
+        OWNEDMEM* n;
+
+        for (p = it8->MemorySink; p != NULL; p = n) {
+
+            n = p->Next;
+            if (p->Ptr) _cmsFree(it8 ->ContextID, p->Ptr);
+            _cmsFree(it8 ->ContextID, p);
+        }
+    }
+
+    if (it8->MemoryBlock)
+        _cmsFree(it8 ->ContextID, it8->MemoryBlock);
+
+    _cmsFree(it8 ->ContextID, it8);
+}
+
+
+// Allocates a chunk of data, keep linked list
+static
+void* AllocBigBlock(cmsIT8* it8, cmsUInt32Number size)
+{
+    OWNEDMEM* ptr1;
+    void* ptr = _cmsMallocZero(it8->ContextID, size);
+
+    if (ptr != NULL) {
+
+        ptr1 = (OWNEDMEM*) _cmsMallocZero(it8 ->ContextID, sizeof(OWNEDMEM));
+
+        if (ptr1 == NULL) {
+
+            _cmsFree(it8 ->ContextID, ptr);
+            return NULL;
+        }
+
+        ptr1-> Ptr        = ptr;
+        ptr1-> Next       = it8 -> MemorySink;
+        it8 -> MemorySink = ptr1;
+    }
+
+    return ptr;
+}
+
+
+// Suballocator.
+static
+void* AllocChunk(cmsIT8* it8, cmsUInt32Number size)
+{
+    cmsUInt32Number Free = it8 ->Allocator.BlockSize - it8 ->Allocator.Used;
+    cmsUInt8Number* ptr;
+
+    size = _cmsALIGNMEM(size);
+
+    if (size > Free) {
+
+        if (it8 -> Allocator.BlockSize == 0)
+
+                it8 -> Allocator.BlockSize = 20*1024;
+        else
+                it8 ->Allocator.BlockSize *= 2;
+
+        if (it8 ->Allocator.BlockSize < size)
+                it8 ->Allocator.BlockSize = size;
+
+        it8 ->Allocator.Used = 0;
+        it8 ->Allocator.Block = (cmsUInt8Number*)  AllocBigBlock(it8, it8 ->Allocator.BlockSize);
+    }
+
+    ptr = it8 ->Allocator.Block + it8 ->Allocator.Used;
+    it8 ->Allocator.Used += size;
+
+    return (void*) ptr;
+
+}
+
+
+// Allocates a string
+static
+char *AllocString(cmsIT8* it8, const char* str)
+{
+    cmsUInt32Number Size = (cmsUInt32Number) strlen(str)+1;
+    char *ptr;
+
+
+    ptr = (char *) AllocChunk(it8, Size);
+    if (ptr) strncpy (ptr, str, Size-1);
+
+    return ptr;
+}
+
+// Searches through linked list
+
+static
+cmsBool IsAvailableOnList(KEYVALUE* p, const char* Key, const char* Subkey, KEYVALUE** LastPtr)
+{
+    if (LastPtr) *LastPtr = p;
+
+    for (;  p != NULL; p = p->Next) {
+
+        if (LastPtr) *LastPtr = p;
+
+        if (*Key != '#') { // Comments are ignored
+
+            if (cmsstrcasecmp(Key, p->Keyword) == 0)
+                break;
+        }
+    }
+
+    if (p == NULL)
+        return FALSE;
+
+    if (Subkey == 0)
+        return TRUE;
+
+    for (; p != NULL; p = p->NextSubkey) {
+
+        if (p ->Subkey == NULL) continue;
+
+        if (LastPtr) *LastPtr = p;
+
+        if (cmsstrcasecmp(Subkey, p->Subkey) == 0)
+            return TRUE;
+    }
+
+    return FALSE;
+}
+
+
+
+// Add a property into a linked list
+static
+KEYVALUE* AddToList(cmsIT8* it8, KEYVALUE** Head, const char *Key, const char *Subkey, const char* xValue, WRITEMODE WriteAs)
+{
+    KEYVALUE* p;
+    KEYVALUE* last;
+
+
+    // Check if property is already in list
+
+    if (IsAvailableOnList(*Head, Key, Subkey, &p)) {
+
+        // This may work for editing properties
+
+        //     return SynError(it8, "duplicate key <%s>", Key);
+    }
+    else {
+
+        last = p;
+
+        // Allocate the container
+        p = (KEYVALUE*) AllocChunk(it8, sizeof(KEYVALUE));
+        if (p == NULL)
+        {
+            SynError(it8, "AddToList: out of memory");
+            return NULL;
+        }
+
+        // Store name and value
+        p->Keyword = AllocString(it8, Key);
+        p->Subkey = (Subkey == NULL) ? NULL : AllocString(it8, Subkey);
+
+        // Keep the container in our list
+        if (*Head == NULL) {
+            *Head = p;
+        }
+        else
+        {
+            if (Subkey != NULL && last != NULL) {
+
+                last->NextSubkey = p;
+
+                // If Subkey is not null, then last is the last property with the same key,
+                // but not necessarily is the last property in the list, so we need to move
+                // to the actual list end
+                while (last->Next != NULL)
+                         last = last->Next;
+            }
+
+            if (last != NULL) last->Next = p;
+        }
+
+        p->Next    = NULL;
+        p->NextSubkey = NULL;
+    }
+
+    p->WriteAs = WriteAs;
+
+    if (xValue != NULL) {
+
+        p->Value   = AllocString(it8, xValue);
+    }
+    else {
+        p->Value   = NULL;
+    }
+
+    return p;
+}
+
+static
+KEYVALUE* AddAvailableProperty(cmsIT8* it8, const char* Key, WRITEMODE as)
+{
+    return AddToList(it8, &it8->ValidKeywords, Key, NULL, NULL, as);
+}
+
+
+static
+KEYVALUE* AddAvailableSampleID(cmsIT8* it8, const char* Key)
+{
+    return AddToList(it8, &it8->ValidSampleID, Key, NULL, NULL, WRITE_UNCOOKED);
+}
+
+
+static
+void AllocTable(cmsIT8* it8)
+{
+    TABLE* t;
+
+    t = it8 ->Tab + it8 ->TablesCount;
+
+    t->HeaderList = NULL;
+    t->DataFormat = NULL;
+    t->Data       = NULL;
+
+    it8 ->TablesCount++;
+}
+
+
+cmsInt32Number CMSEXPORT cmsIT8SetTable(cmsHANDLE  IT8, cmsUInt32Number nTable)
+{
+     cmsIT8* it8 = (cmsIT8*) IT8;
+
+     if (nTable >= it8 ->TablesCount) {
+
+         if (nTable == it8 ->TablesCount) {
+
+             AllocTable(it8);
+         }
+         else {
+             SynError(it8, "Table %d is out of sequence", nTable);
+             return -1;
+         }
+     }
+
+     it8 ->nTable = nTable;
+
+     return (cmsInt32Number) nTable;
+}
+
+
+
+// Init an empty container
+cmsHANDLE  CMSEXPORT cmsIT8Alloc(cmsContext ContextID)
+{
+    cmsIT8* it8;
+    cmsUInt32Number i;
+
+    it8 = (cmsIT8*) _cmsMallocZero(ContextID, sizeof(cmsIT8));
+    if (it8 == NULL) return NULL;
+
+    AllocTable(it8);
+
+    it8->MemoryBlock = NULL;
+    it8->MemorySink  = NULL;
+
+    it8 ->nTable = 0;
+
+    it8->ContextID = ContextID;
+    it8->Allocator.Used = 0;
+    it8->Allocator.Block = NULL;
+    it8->Allocator.BlockSize = 0;
+
+    it8->ValidKeywords = NULL;
+    it8->ValidSampleID = NULL;
+
+    it8 -> sy = SUNDEFINED;
+    it8 -> ch = ' ';
+    it8 -> Source = NULL;
+    it8 -> inum = 0;
+    it8 -> dnum = 0.0;
+
+    it8->FileStack[0] = (FILECTX*)AllocChunk(it8, sizeof(FILECTX));
+    it8->IncludeSP   = 0;
+    it8 -> lineno = 1;
+
+    strcpy(it8->DoubleFormatter, DEFAULT_DBL_FORMAT);
+    cmsIT8SetSheetType((cmsHANDLE) it8, "CGATS.17");
+
+    // Initialize predefined properties & data
+
+    for (i=0; i < NUMPREDEFINEDPROPS; i++)
+            AddAvailableProperty(it8, PredefinedProperties[i].id, PredefinedProperties[i].as);
+
+    for (i=0; i < NUMPREDEFINEDSAMPLEID; i++)
+            AddAvailableSampleID(it8, PredefinedSampleID[i]);
+
+
+   return (cmsHANDLE) it8;
+}
+
+
+const char* CMSEXPORT cmsIT8GetSheetType(cmsHANDLE hIT8)
+{
+        return GetTable((cmsIT8*) hIT8)->SheetType;
+}
+
+cmsBool CMSEXPORT cmsIT8SetSheetType(cmsHANDLE hIT8, const char* Type)
+{
+        TABLE* t = GetTable((cmsIT8*) hIT8);
+
+        strncpy(t ->SheetType, Type, MAXSTR-1);
+        t ->SheetType[MAXSTR-1] = 0;
+        return TRUE;
+}
+
+cmsBool CMSEXPORT cmsIT8SetComment(cmsHANDLE hIT8, const char* Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    if (!Val) return FALSE;
+    if (!*Val) return FALSE;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, "# ", NULL, Val, WRITE_UNCOOKED) != NULL;
+}
+
+// Sets a property
+cmsBool CMSEXPORT cmsIT8SetPropertyStr(cmsHANDLE hIT8, const char* Key, const char *Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    if (!Val) return FALSE;
+    if (!*Val) return FALSE;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, Key, NULL, Val, WRITE_STRINGIFY) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyDbl(cmsHANDLE hIT8, const char* cProp, cmsFloat64Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buffer[1024];
+
+    snprintf(Buffer, 1023, it8->DoubleFormatter, Val);
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, cProp, NULL, Buffer, WRITE_UNCOOKED) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyHex(cmsHANDLE hIT8, const char* cProp, cmsUInt32Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buffer[1024];
+
+    snprintf(Buffer, 1023, "%u", Val);
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, cProp, NULL, Buffer, WRITE_HEXADECIMAL) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyUncooked(cmsHANDLE hIT8, const char* Key, const char* Buffer)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, Key, NULL, Buffer, WRITE_UNCOOKED) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char* SubKey, const char *Buffer)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, Key, SubKey, Buffer, WRITE_PAIR) != NULL;
+}
+
+// Gets a property
+const char* CMSEXPORT cmsIT8GetProperty(cmsHANDLE hIT8, const char* Key)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE* p;
+
+    if (IsAvailableOnList(GetTable(it8) -> HeaderList, Key, NULL, &p))
+    {
+        return p -> Value;
+    }
+    return NULL;
+}
+
+
+cmsFloat64Number CMSEXPORT cmsIT8GetPropertyDbl(cmsHANDLE hIT8, const char* cProp)
+{
+    const char *v = cmsIT8GetProperty(hIT8, cProp);
+
+    if (v == NULL) return 0.0;
+
+    return ParseFloatNumber(v);
+}
+
+const char* CMSEXPORT cmsIT8GetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char *SubKey)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE* p;
+
+    if (IsAvailableOnList(GetTable(it8) -> HeaderList, Key, SubKey, &p)) {
+        return p -> Value;
+    }
+    return NULL;
+}
+
+// ----------------------------------------------------------------- Datasets
+
+
+static
+void AllocateDataFormat(cmsIT8* it8)
+{
+    TABLE* t = GetTable(it8);
+
+    if (t -> DataFormat) return;    // Already allocated
+
+    t -> nSamples  = (int) cmsIT8GetPropertyDbl(it8, "NUMBER_OF_FIELDS");
+
+    if (t -> nSamples <= 0) {
+
+        SynError(it8, "AllocateDataFormat: Unknown NUMBER_OF_FIELDS");
+        t -> nSamples = 10;
+        }
+
+    t -> DataFormat = (char**) AllocChunk (it8, ((cmsUInt32Number) t->nSamples + 1) * sizeof(char *));
+    if (t->DataFormat == NULL) {
+
+        SynError(it8, "AllocateDataFormat: Unable to allocate dataFormat array");
+    }
+
+}
+
+static
+const char *GetDataFormat(cmsIT8* it8, int n)
+{
+    TABLE* t = GetTable(it8);
+
+    if (t->DataFormat)
+        return t->DataFormat[n];
+
+    return NULL;
+}
+
+static
+cmsBool SetDataFormat(cmsIT8* it8, int n, const char *label)
+{
+    TABLE* t = GetTable(it8);
+
+    if (!t->DataFormat)
+        AllocateDataFormat(it8);
+
+    if (n > t -> nSamples) {
+        SynError(it8, "More than NUMBER_OF_FIELDS fields.");
+        return FALSE;
+    }
+
+    if (t->DataFormat) {
+        t->DataFormat[n] = AllocString(it8, label);
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataFormat(cmsHANDLE  h, int n, const char *Sample)
+{
+    cmsIT8* it8 = (cmsIT8*)h;
+    return SetDataFormat(it8, n, Sample);
+}
+
+static
+void AllocateDataSet(cmsIT8* it8)
+{
+    TABLE* t = GetTable(it8);
+
+    if (t -> Data) return;    // Already allocated
+
+    t-> nSamples   = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_FIELDS"));
+    t-> nPatches   = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_SETS"));
+
+    if (t -> nSamples < 0 || t->nSamples > 0x7ffe || t->nPatches < 0 || t->nPatches > 0x7ffe)
+    {
+        SynError(it8, "AllocateDataSet: too much data");
+    }
+    else {
+        t->Data = (char**)AllocChunk(it8, ((cmsUInt32Number)t->nSamples + 1) * ((cmsUInt32Number)t->nPatches + 1) * sizeof(char*));
+        if (t->Data == NULL) {
+
+            SynError(it8, "AllocateDataSet: Unable to allocate data array");
+        }
+    }
+
+}
+
+static
+char* GetData(cmsIT8* it8, int nSet, int nField)
+{
+    TABLE* t = GetTable(it8);
+    int nSamples    = t -> nSamples;
+    int nPatches    = t -> nPatches;
+
+    if (nSet >= nPatches || nField >= nSamples)
+        return NULL;
+
+    if (!t->Data) return NULL;
+    return t->Data [nSet * nSamples + nField];
+}
+
+static
+cmsBool SetData(cmsIT8* it8, int nSet, int nField, const char *Val)
+{
+    TABLE* t = GetTable(it8);
+
+    if (!t->Data)
+        AllocateDataSet(it8);
+
+    if (!t->Data) return FALSE;
+
+    if (nSet > t -> nPatches || nSet < 0) {
+
+            return SynError(it8, "Patch %d out of range, there are %d patches", nSet, t -> nPatches);
+    }
+
+    if (nField > t ->nSamples || nField < 0) {
+            return SynError(it8, "Sample %d out of range, there are %d samples", nField, t ->nSamples);
+
+    }
+
+    t->Data [nSet * t -> nSamples + nField] = AllocString(it8, Val);
+    return TRUE;
+}
+
+
+// --------------------------------------------------------------- File I/O
+
+
+// Writes a string to file
+static
+void WriteStr(SAVESTREAM* f, const char *str)
+{
+    cmsUInt32Number len;
+
+    if (str == NULL)
+        str = " ";
+
+    // Length to write
+    len = (cmsUInt32Number) strlen(str);
+    f ->Used += len;
+
+
+    if (f ->stream) {   // Should I write it to a file?
+
+        if (fwrite(str, 1, len, f->stream) != len) {
+            cmsSignalError(0, cmsERROR_WRITE, "Write to file error in CGATS parser");
+            return;
+        }
+
+    }
+    else {  // Or to a memory block?
+
+        if (f ->Base) {   // Am I just counting the bytes?
+
+            if (f ->Used > f ->Max) {
+
+                 cmsSignalError(0, cmsERROR_WRITE, "Write to memory overflows in CGATS parser");
+                 return;
+            }
+
+            memmove(f ->Ptr, str, len);
+            f->Ptr += len;
+        }
+
+    }
+}
+
+
+// Write formatted
+
+static
+void Writef(SAVESTREAM* f, const char* frm, ...)
+{
+    char Buffer[4096];
+    va_list args;
+
+    va_start(args, frm);
+    vsnprintf(Buffer, 4095, frm, args);
+    Buffer[4095] = 0;
+    WriteStr(f, Buffer);
+    va_end(args);
+
+}
+
+// Writes full header
+static
+void WriteHeader(cmsIT8* it8, SAVESTREAM* fp)
+{
+    KEYVALUE* p;
+    TABLE* t = GetTable(it8);
+
+    // Writes the type
+    WriteStr(fp, t->SheetType);
+    WriteStr(fp, "\n");
+
+    for (p = t->HeaderList; (p != NULL); p = p->Next)
+    {
+        if (*p ->Keyword == '#') {
+
+            char* Pt;
+
+            WriteStr(fp, "#\n# ");
+            for (Pt = p ->Value; *Pt; Pt++) {
+
+
+                Writef(fp, "%c", *Pt);
+
+                if (*Pt == '\n') {
+                    WriteStr(fp, "# ");
+                }
+            }
+
+            WriteStr(fp, "\n#\n");
+            continue;
+        }
+
+
+        if (!IsAvailableOnList(it8-> ValidKeywords, p->Keyword, NULL, NULL)) {
+
+#ifdef CMS_STRICT_CGATS
+            WriteStr(fp, "KEYWORD\t\"");
+            WriteStr(fp, p->Keyword);
+            WriteStr(fp, "\"\n");
+#endif
+
+            AddAvailableProperty(it8, p->Keyword, WRITE_UNCOOKED);
+        }
+
+        WriteStr(fp, p->Keyword);
+        if (p->Value) {
+
+            switch (p ->WriteAs) {
+
+            case WRITE_UNCOOKED:
+                    Writef(fp, "\t%s", p ->Value);
+                    break;
+
+            case WRITE_STRINGIFY:
+                    Writef(fp, "\t\"%s\"", p->Value );
+                    break;
+
+            case WRITE_HEXADECIMAL:
+                    Writef(fp, "\t0x%X", atoi(p ->Value));
+                    break;
+
+            case WRITE_BINARY:
+                    Writef(fp, "\t0x%B", atoi(p ->Value));
+                    break;
+
+            case WRITE_PAIR:
+                    Writef(fp, "\t\"%s,%s\"", p->Subkey, p->Value);
+                    break;
+
+            default: SynError(it8, "Unknown write mode %d", p ->WriteAs);
+                     return;
+            }
+        }
+
+        WriteStr (fp, "\n");
+    }
+
+}
+
+
+// Writes the data format
+static
+void WriteDataFormat(SAVESTREAM* fp, cmsIT8* it8)
+{
+    int i, nSamples;
+    TABLE* t = GetTable(it8);
+
+    if (!t -> DataFormat) return;
+
+       WriteStr(fp, "BEGIN_DATA_FORMAT\n");
+       WriteStr(fp, " ");
+       nSamples = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_FIELDS"));
+
+       for (i = 0; i < nSamples; i++) {
+
+              WriteStr(fp, t->DataFormat[i]);
+              WriteStr(fp, ((i == (nSamples-1)) ? "\n" : "\t"));
+          }
+
+       WriteStr (fp, "END_DATA_FORMAT\n");
+}
+
+
+// Writes data array
+static
+void WriteData(SAVESTREAM* fp, cmsIT8* it8)
+{
+       int  i, j;
+       TABLE* t = GetTable(it8);
+
+       if (!t->Data) return;
+
+       WriteStr (fp, "BEGIN_DATA\n");
+
+       t->nPatches = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_SETS"));
+
+       for (i = 0; i < t-> nPatches; i++) {
+
+              WriteStr(fp, " ");
+
+              for (j = 0; j < t->nSamples; j++) {
+
+                     char *ptr = t->Data[i*t->nSamples+j];
+
+                     if (ptr == NULL) WriteStr(fp, "\"\"");
+                     else {
+                         // If value contains whitespace, enclose within quote
+
+                         if (strchr(ptr, ' ') != NULL) {
+
+                             WriteStr(fp, "\"");
+                             WriteStr(fp, ptr);
+                             WriteStr(fp, "\"");
+                         }
+                         else
+                            WriteStr(fp, ptr);
+                     }
+
+                     WriteStr(fp, ((j == (t->nSamples-1)) ? "\n" : "\t"));
+              }
+       }
+       WriteStr (fp, "END_DATA\n");
+}
+
+
+
+// Saves whole file
+cmsBool CMSEXPORT cmsIT8SaveToFile(cmsHANDLE hIT8, const char* cFileName)
+{
+    SAVESTREAM sd;
+    cmsUInt32Number i;
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    memset(&sd, 0, sizeof(sd));
+
+    sd.stream = fopen(cFileName, "wt");
+    if (!sd.stream) return FALSE;
+
+    for (i=0; i < it8 ->TablesCount; i++) {
+
+            cmsIT8SetTable(hIT8, i);
+            WriteHeader(it8, &sd);
+            WriteDataFormat(&sd, it8);
+            WriteData(&sd, it8);
+    }
+
+    if (fclose(sd.stream) != 0) return FALSE;
+
+    return TRUE;
+}
+
+
+// Saves to memory
+cmsBool CMSEXPORT cmsIT8SaveToMem(cmsHANDLE hIT8, void *MemPtr, cmsUInt32Number* BytesNeeded)
+{
+    SAVESTREAM sd;
+    cmsUInt32Number i;
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    memset(&sd, 0, sizeof(sd));
+
+    sd.stream = NULL;
+    sd.Base   = (cmsUInt8Number*)  MemPtr;
+    sd.Ptr    = sd.Base;
+
+    sd.Used = 0;
+
+    if (sd.Base)
+        sd.Max  = *BytesNeeded;     // Write to memory?
+    else
+        sd.Max  = 0;                // Just counting the needed bytes
+
+    for (i=0; i < it8 ->TablesCount; i++) {
+
+        cmsIT8SetTable(hIT8, i);
+        WriteHeader(it8, &sd);
+        WriteDataFormat(&sd, it8);
+        WriteData(&sd, it8);
+    }
+
+    sd.Used++;  // The \0 at the very end
+
+    if (sd.Base)
+        *sd.Ptr = 0;
+
+    *BytesNeeded = sd.Used;
+
+    return TRUE;
+}
+
+
+// -------------------------------------------------------------- Higher level parsing
+
+static
+cmsBool DataFormatSection(cmsIT8* it8)
+{
+    int iField = 0;
+    TABLE* t = GetTable(it8);
+
+    InSymbol(it8);   // Eats "BEGIN_DATA_FORMAT"
+    CheckEOLN(it8);
+
+    while (it8->sy != SEND_DATA_FORMAT &&
+        it8->sy != SEOLN &&
+        it8->sy != SEOF &&
+        it8->sy != SSYNERROR)  {
+
+            if (it8->sy != SIDENT) {
+
+                return SynError(it8, "Sample type expected");
+            }
+
+            if (!SetDataFormat(it8, iField, it8->id)) return FALSE;
+            iField++;
+
+            InSymbol(it8);
+            SkipEOLN(it8);
+       }
+
+       SkipEOLN(it8);
+       Skip(it8, SEND_DATA_FORMAT);
+       SkipEOLN(it8);
+
+       if (iField != t ->nSamples) {
+           SynError(it8, "Count mismatch. NUMBER_OF_FIELDS was %d, found %d\n", t ->nSamples, iField);
+
+
+       }
+
+       return TRUE;
+}
+
+
+
+static
+cmsBool DataSection (cmsIT8* it8)
+{
+    int  iField = 0;
+    int  iSet   = 0;
+    char Buffer[256];
+    TABLE* t = GetTable(it8);
+
+    InSymbol(it8);   // Eats "BEGIN_DATA"
+    CheckEOLN(it8);
+
+    if (!t->Data)
+        AllocateDataSet(it8);
+
+    while (it8->sy != SEND_DATA && it8->sy != SEOF)
+    {
+        if (iField >= t -> nSamples) {
+            iField = 0;
+            iSet++;
+
+        }
+
+        if (it8->sy != SEND_DATA && it8->sy != SEOF) {
+
+            if (!GetVal(it8, Buffer, 255, "Sample data expected"))
+                return FALSE;
+
+            if (!SetData(it8, iSet, iField, Buffer))
+                return FALSE;
+
+            iField++;
+
+            InSymbol(it8);
+            SkipEOLN(it8);
+        }
+    }
+
+    SkipEOLN(it8);
+    Skip(it8, SEND_DATA);
+    SkipEOLN(it8);
+
+    // Check for data completion.
+
+    if ((iSet+1) != t -> nPatches)
+        return SynError(it8, "Count mismatch. NUMBER_OF_SETS was %d, found %d\n", t ->nPatches, iSet+1);
+
+    return TRUE;
+}
+
+
+
+
+static
+cmsBool HeaderSection(cmsIT8* it8)
+{
+    char VarName[MAXID];
+    char Buffer[MAXSTR];
+    KEYVALUE* Key;
+
+        while (it8->sy != SEOF &&
+               it8->sy != SSYNERROR &&
+               it8->sy != SBEGIN_DATA_FORMAT &&
+               it8->sy != SBEGIN_DATA) {
+
+
+        switch (it8 -> sy) {
+
+        case SKEYWORD:
+                InSymbol(it8);
+                if (!GetVal(it8, Buffer, MAXSTR-1, "Keyword expected")) return FALSE;
+                if (!AddAvailableProperty(it8, Buffer, WRITE_UNCOOKED)) return FALSE;
+                InSymbol(it8);
+                break;
+
+
+        case SDATA_FORMAT_ID:
+                InSymbol(it8);
+                if (!GetVal(it8, Buffer, MAXSTR-1, "Keyword expected")) return FALSE;
+                if (!AddAvailableSampleID(it8, Buffer)) return FALSE;
+                InSymbol(it8);
+                break;
+
+
+        case SIDENT:
+            strncpy(VarName, it8->id, MAXID - 1);
+            VarName[MAXID - 1] = 0;
+
+            if (!IsAvailableOnList(it8->ValidKeywords, VarName, NULL, &Key)) {
+
+#ifdef CMS_STRICT_CGATS
+                return SynError(it8, "Undefined keyword '%s'", VarName);
+#else
+                Key = AddAvailableProperty(it8, VarName, WRITE_UNCOOKED);
+                if (Key == NULL) return FALSE;
+#endif
+            }
+
+            InSymbol(it8);
+            if (!GetVal(it8, Buffer, MAXSTR - 1, "Property data expected")) return FALSE;
+
+            if (Key->WriteAs != WRITE_PAIR) {
+                AddToList(it8, &GetTable(it8)->HeaderList, VarName, NULL, Buffer,
+                    (it8->sy == SSTRING) ? WRITE_STRINGIFY : WRITE_UNCOOKED);
+            }
+            else {
+                const char *Subkey;
+                char *Nextkey;
+                if (it8->sy != SSTRING)
+                    return SynError(it8, "Invalid value '%s' for property '%s'.", Buffer, VarName);
+
+                // chop the string as a list of "subkey, value" pairs, using ';' as a separator
+                for (Subkey = Buffer; Subkey != NULL; Subkey = Nextkey)
+                {
+                    char *Value, *temp;
+
+                    //  identify token pair boundary
+                    Nextkey = (char*)strchr(Subkey, ';');
+                    if (Nextkey)
+                        *Nextkey++ = '\0';
+
+                    // for each pair, split the subkey and the value
+                    Value = (char*)strrchr(Subkey, ',');
+                    if (Value == NULL)
+                        return SynError(it8, "Invalid value for property '%s'.", VarName);
+
+                    // gobble the spaces before the coma, and the coma itself
+                    temp = Value++;
+                    do *temp-- = '\0'; while (temp >= Subkey && *temp == ' ');
+
+                    // gobble any space at the right
+                    temp = Value + strlen(Value) - 1;
+                    while (*temp == ' ') *temp-- = '\0';
+
+                    // trim the strings from the left
+                    Subkey += strspn(Subkey, " ");
+                    Value += strspn(Value, " ");
+
+                    if (Subkey[0] == 0 || Value[0] == 0)
+                        return SynError(it8, "Invalid value for property '%s'.", VarName);
+                    AddToList(it8, &GetTable(it8)->HeaderList, VarName, Subkey, Value, WRITE_PAIR);
+                }
+            }
+
+            InSymbol(it8);
+            break;
+
+
+        case SEOLN: break;
+
+        default:
+                return SynError(it8, "expected keyword or identifier");
+        }
+
+    SkipEOLN(it8);
+    }
+
+    return TRUE;
+
+}
+
+
+static
+void ReadType(cmsIT8* it8, char* SheetTypePtr)
+{
+    cmsInt32Number cnt = 0;
+
+    // First line is a very special case.
+
+    while (isseparator(it8->ch))
+            NextCh(it8);
+
+    while (it8->ch != '\r' && it8 ->ch != '\n' && it8->ch != '\t' && it8 -> ch != 0) {
+
+        if (cnt++ < MAXSTR) 
+            *SheetTypePtr++= (char) it8 ->ch;
+        NextCh(it8);
+    }
+
+    *SheetTypePtr = 0;
+}
+
+
+static
+cmsBool ParseIT8(cmsIT8* it8, cmsBool nosheet)
+{
+    char* SheetTypePtr = it8 ->Tab[0].SheetType;
+
+    if (nosheet == 0) {
+        ReadType(it8, SheetTypePtr);
+    }
+
+    InSymbol(it8);
+
+    SkipEOLN(it8);
+
+    while (it8-> sy != SEOF &&
+           it8-> sy != SSYNERROR) {
+
+            switch (it8 -> sy) {
+
+            case SBEGIN_DATA_FORMAT:
+                    if (!DataFormatSection(it8)) return FALSE;
+                    break;
+
+            case SBEGIN_DATA:
+
+                    if (!DataSection(it8)) return FALSE;
+
+                    if (it8 -> sy != SEOF) {
+
+                            AllocTable(it8);
+                            it8 ->nTable = it8 ->TablesCount - 1;
+
+                            // Read sheet type if present. We only support identifier and string.
+                            // <ident> <eoln> is a type string
+                            // anything else, is not a type string
+                            if (nosheet == 0) {
+
+                                if (it8 ->sy == SIDENT) {
+
+                                    // May be a type sheet or may be a prop value statement. We cannot use insymbol in
+                                    // this special case...
+                                     while (isseparator(it8->ch))
+                                         NextCh(it8);
+
+                                     // If a newline is found, then this is a type string
+                                    if (it8 ->ch == '\n' || it8->ch == '\r') {
+
+                                         cmsIT8SetSheetType(it8, it8 ->id);
+                                         InSymbol(it8);
+                                    }
+                                    else
+                                    {
+                                        // It is not. Just continue
+                                        cmsIT8SetSheetType(it8, "");
+                                    }
+                                }
+                                else
+                                    // Validate quoted strings
+                                    if (it8 ->sy == SSTRING) {
+                                        cmsIT8SetSheetType(it8, it8 ->str);
+                                        InSymbol(it8);
+                                    }
+                           }
+
+                    }
+                    break;
+
+            case SEOLN:
+                    SkipEOLN(it8);
+                    break;
+
+            default:
+                    if (!HeaderSection(it8)) return FALSE;
+           }
+
+    }
+
+    return (it8 -> sy != SSYNERROR);
+}
+
+
+
+// Init useful pointers
+
+static
+void CookPointers(cmsIT8* it8)
+{
+    int idField, i;
+    char* Fld;
+    cmsUInt32Number j;
+    cmsUInt32Number nOldTable = it8 ->nTable;
+
+    for (j=0; j < it8 ->TablesCount; j++) {
+
+    TABLE* t = it8 ->Tab + j;
+
+    t -> SampleID = 0;
+    it8 ->nTable = j;
+
+    for (idField = 0; idField < t -> nSamples; idField++)
+    {
+        if (t ->DataFormat == NULL){
+            SynError(it8, "Undefined DATA_FORMAT");
+            return;
+        }
+
+        Fld = t->DataFormat[idField];
+        if (!Fld) continue;
+
+
+        if (cmsstrcasecmp(Fld, "SAMPLE_ID") == 0) {
+
+            t -> SampleID = idField;
+
+            for (i=0; i < t -> nPatches; i++) {
+
+                char *Data = GetData(it8, i, idField);
+                if (Data) {
+                    char Buffer[256];
+
+                    strncpy(Buffer, Data, 255);
+                    Buffer[255] = 0;
+
+                    if (strlen(Buffer) <= strlen(Data))
+                        strcpy(Data, Buffer);
+                    else
+                        SetData(it8, i, idField, Buffer);
+
+                }
+            }
+
+        }
+
+        // "LABEL" is an extension. It keeps references to forward tables
+
+        if ((cmsstrcasecmp(Fld, "LABEL") == 0) || Fld[0] == '$' ) {
+
+                    // Search for table references...
+                    for (i=0; i < t -> nPatches; i++) {
+
+                            char *Label = GetData(it8, i, idField);
+
+                            if (Label) {
+
+                                cmsUInt32Number k;
+
+                                // This is the label, search for a table containing
+                                // this property
+
+                                for (k=0; k < it8 ->TablesCount; k++) {
+
+                                    TABLE* Table = it8 ->Tab + k;
+                                    KEYVALUE* p;
+
+                                    if (IsAvailableOnList(Table->HeaderList, Label, NULL, &p)) {
+
+                                        // Available, keep type and table
+                                        char Buffer[256];
+
+                                        char *Type  = p ->Value;
+                                        int  nTable = (int) k;
+
+                                        snprintf(Buffer, 255, "%s %d %s", Label, nTable, Type );
+
+                                        SetData(it8, i, idField, Buffer);
+                                    }
+                                }
+
+
+                            }
+
+                    }
+
+
+        }
+
+    }
+    }
+
+    it8 ->nTable = nOldTable;
+}
+
+// Try to infere if the file is a CGATS/IT8 file at all. Read first line
+// that should be something like some printable characters plus a \n
+// returns 0 if this is not like a CGATS, or an integer otherwise. This integer is the number of words in first line?
+static
+int IsMyBlock(const cmsUInt8Number* Buffer, cmsUInt32Number n)
+{
+    int words = 1, space = 0, quot = 0;
+    cmsUInt32Number i;
+
+    if (n < 10) return 0;   // Too small
+
+    if (n > 132)
+        n = 132;
+
+    for (i = 1; i < n; i++) {
+
+        switch(Buffer[i])
+        {
+        case '\n':
+        case '\r':
+            return ((quot == 1) || (words > 2)) ? 0 : words;
+        case '\t':
+        case ' ':
+            if(!quot && !space)
+                space = 1;
+            break;
+        case '\"':
+            quot = !quot;
+            break;
+        default:
+            if (Buffer[i] < 32) return 0;
+            if (Buffer[i] > 127) return 0;
+            words += space;
+            space = 0;
+            break;
+        }
+    }
+
+    return 0;
+}
+
+
+static
+cmsBool IsMyFile(const char* FileName)
+{
+   FILE *fp;
+   cmsUInt32Number Size;
+   cmsUInt8Number Ptr[133];
+
+   fp = fopen(FileName, "rt");
+   if (!fp) {
+       cmsSignalError(0, cmsERROR_FILE, "File '%s' not found", FileName);
+       return FALSE;
+   }
+
+   Size = (cmsUInt32Number) fread(Ptr, 1, 132, fp);
+
+   if (fclose(fp) != 0)
+       return FALSE;
+
+   Ptr[Size] = '\0';
+
+   return IsMyBlock(Ptr, Size);
+}
+
+// ---------------------------------------------------------- Exported routines
+
+
+cmsHANDLE  CMSEXPORT cmsIT8LoadFromMem(cmsContext ContextID, const void *Ptr, cmsUInt32Number len)
+{
+    cmsHANDLE hIT8;
+    cmsIT8*  it8;
+    int type;
+
+    _cmsAssert(Ptr != NULL);
+    _cmsAssert(len != 0);
+
+    type = IsMyBlock((const cmsUInt8Number*)Ptr, len);
+    if (type == 0) return NULL;
+
+    hIT8 = cmsIT8Alloc(ContextID);
+    if (!hIT8) return NULL;
+
+    it8 = (cmsIT8*) hIT8;
+    it8 ->MemoryBlock = (char*) _cmsMalloc(ContextID, len + 1);
+    if (it8->MemoryBlock == NULL)
+    {
+        cmsIT8Free(hIT8);
+        return FALSE;
+    }
+
+    strncpy(it8 ->MemoryBlock, (const char*) Ptr, len);
+    it8 ->MemoryBlock[len] = 0;
+
+    strncpy(it8->FileStack[0]->FileName, "", cmsMAX_PATH-1);
+    it8-> Source = it8 -> MemoryBlock;
+
+    if (!ParseIT8(it8, type-1)) {
+
+        cmsIT8Free(hIT8);
+        return FALSE;
+    }
+
+    CookPointers(it8);
+    it8 ->nTable = 0;
+
+    _cmsFree(ContextID, it8->MemoryBlock);
+    it8 -> MemoryBlock = NULL;
+
+    return hIT8;
+
+
+}
+
+
+cmsHANDLE  CMSEXPORT cmsIT8LoadFromFile(cmsContext ContextID, const char* cFileName)
+{
+
+     cmsHANDLE hIT8;
+     cmsIT8*  it8;
+     int type;
+
+     _cmsAssert(cFileName != NULL);
+
+     type = IsMyFile(cFileName);
+     if (type == 0) return NULL;
+
+     hIT8 = cmsIT8Alloc(ContextID);
+     it8 = (cmsIT8*) hIT8;
+     if (!hIT8) return NULL;
+
+
+     it8 ->FileStack[0]->Stream = fopen(cFileName, "rt");
+
+     if (!it8 ->FileStack[0]->Stream) {
+         cmsIT8Free(hIT8);
+         return NULL;
+     }
+
+
+    strncpy(it8->FileStack[0]->FileName, cFileName, cmsMAX_PATH-1);
+    it8->FileStack[0]->FileName[cmsMAX_PATH-1] = 0;
+
+    if (!ParseIT8(it8, type-1)) {
+
+            fclose(it8 ->FileStack[0]->Stream);
+            cmsIT8Free(hIT8);
+            return NULL;
+    }
+
+    CookPointers(it8);
+    it8 ->nTable = 0;
+
+    if (fclose(it8 ->FileStack[0]->Stream)!= 0) {
+            cmsIT8Free(hIT8);
+            return NULL;
+    }
+
+    return hIT8;
+
+}
+
+int CMSEXPORT cmsIT8EnumDataFormat(cmsHANDLE hIT8, char ***SampleNames)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+
+    if (SampleNames)
+        *SampleNames = t -> DataFormat;
+    return t -> nSamples;
+}
+
+
+cmsUInt32Number CMSEXPORT cmsIT8EnumProperties(cmsHANDLE hIT8, char ***PropertyNames)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE* p;
+    cmsUInt32Number n;
+    char **Props;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+
+    // Pass#1 - count properties
+
+    n = 0;
+    for (p = t -> HeaderList;  p != NULL; p = p->Next) {
+        n++;
+    }
+
+
+    Props = (char **) AllocChunk(it8, sizeof(char *) * n);
+
+    // Pass#2 - Fill pointers
+    n = 0;
+    for (p = t -> HeaderList;  p != NULL; p = p->Next) {
+        Props[n++] = p -> Keyword;
+    }
+
+    *PropertyNames = Props;
+    return n;
+}
+
+cmsUInt32Number CMSEXPORT cmsIT8EnumPropertyMulti(cmsHANDLE hIT8, const char* cProp, const char ***SubpropertyNames)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE *p, *tmp;
+    cmsUInt32Number n;
+    const char **Props;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+
+    t = GetTable(it8);
+
+    if(!IsAvailableOnList(t->HeaderList, cProp, NULL, &p)) {
+        *SubpropertyNames = 0;
+        return 0;
+    }
+
+    // Pass#1 - count properties
+
+    n = 0;
+    for (tmp = p;  tmp != NULL; tmp = tmp->NextSubkey) {
+        if(tmp->Subkey != NULL)
+            n++;
+    }
+
+
+    Props = (const char **) AllocChunk(it8, sizeof(char *) * n);
+
+    // Pass#2 - Fill pointers
+    n = 0;
+    for (tmp = p;  tmp != NULL; tmp = tmp->NextSubkey) {
+        if(tmp->Subkey != NULL)
+            Props[n++] = p ->Subkey;
+    }
+
+    *SubpropertyNames = Props;
+    return n;
+}
+
+static
+int LocatePatch(cmsIT8* it8, const char* cPatch)
+{
+    int i;
+    const char *data;
+    TABLE* t = GetTable(it8);
+
+    for (i=0; i < t-> nPatches; i++) {
+
+        data = GetData(it8, i, t->SampleID);
+
+        if (data != NULL) {
+
+                if (cmsstrcasecmp(data, cPatch) == 0)
+                        return i;
+                }
+        }
+
+        // SynError(it8, "Couldn't find patch '%s'\n", cPatch);
+        return -1;
+}
+
+
+static
+int LocateEmptyPatch(cmsIT8* it8)
+{
+    int i;
+    const char *data;
+    TABLE* t = GetTable(it8);
+
+    for (i=0; i < t-> nPatches; i++) {
+
+        data = GetData(it8, i, t->SampleID);
+
+        if (data == NULL)
+            return i;
+
+    }
+
+    return -1;
+}
+
+static
+int LocateSample(cmsIT8* it8, const char* cSample)
+{
+    int i;
+    const char *fld;
+    TABLE* t = GetTable(it8);
+
+    for (i=0; i < t->nSamples; i++) {
+
+        fld = GetDataFormat(it8, i);
+        if (fld != NULL) {
+            if (cmsstrcasecmp(fld, cSample) == 0)
+                return i;
+        }
+    }
+
+    return -1;
+
+}
+
+
+int CMSEXPORT cmsIT8FindDataFormat(cmsHANDLE hIT8, const char* cSample)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return LocateSample(it8, cSample);
+}
+
+
+
+const char* CMSEXPORT cmsIT8GetDataRowCol(cmsHANDLE hIT8, int row, int col)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return GetData(it8, row, col);
+}
+
+
+cmsFloat64Number CMSEXPORT cmsIT8GetDataRowColDbl(cmsHANDLE hIT8, int row, int col)
+{
+    const char* Buffer;
+
+    Buffer = cmsIT8GetDataRowCol(hIT8, row, col);
+
+    if (Buffer == NULL) return 0.0;
+
+    return ParseFloatNumber(Buffer);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataRowCol(cmsHANDLE hIT8, int row, int col, const char* Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return SetData(it8, row, col, Val);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataRowColDbl(cmsHANDLE hIT8, int row, int col, cmsFloat64Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buff[256];
+
+    _cmsAssert(hIT8 != NULL);
+
+    snprintf(Buff, 255, it8->DoubleFormatter, Val);
+
+    return SetData(it8, row, col, Buff);
+}
+
+
+
+const char* CMSEXPORT cmsIT8GetData(cmsHANDLE hIT8, const char* cPatch, const char* cSample)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    int iField, iSet;
+
+    _cmsAssert(hIT8 != NULL);
+
+    iField = LocateSample(it8, cSample);
+    if (iField < 0) {
+        return NULL;
+    }
+
+    iSet = LocatePatch(it8, cPatch);
+    if (iSet < 0) {
+            return NULL;
+    }
+
+    return GetData(it8, iSet, iField);
+}
+
+
+cmsFloat64Number CMSEXPORT cmsIT8GetDataDbl(cmsHANDLE  it8, const char* cPatch, const char* cSample)
+{
+    const char* Buffer;
+
+    Buffer = cmsIT8GetData(it8, cPatch, cSample);
+
+    return ParseFloatNumber(Buffer);
+}
+
+
+
+cmsBool CMSEXPORT cmsIT8SetData(cmsHANDLE hIT8, const char* cPatch, const char* cSample, const char *Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    int iField, iSet;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+
+    iField = LocateSample(it8, cSample);
+
+    if (iField < 0)
+        return FALSE;
+
+    if (t-> nPatches == 0) {
+
+        AllocateDataFormat(it8);
+        AllocateDataSet(it8);
+        CookPointers(it8);
+    }
+
+    if (cmsstrcasecmp(cSample, "SAMPLE_ID") == 0) {
+
+        iSet   = LocateEmptyPatch(it8);
+        if (iSet < 0) {
+            return SynError(it8, "Couldn't add more patches '%s'\n", cPatch);
+        }
+
+        iField = t -> SampleID;
+    }
+    else {
+        iSet = LocatePatch(it8, cPatch);
+        if (iSet < 0) {
+            return FALSE;
+        }
+    }
+
+    return SetData(it8, iSet, iField, Val);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataDbl(cmsHANDLE hIT8, const char* cPatch,
+                                   const char* cSample,
+                                   cmsFloat64Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buff[256];
+
+    _cmsAssert(hIT8 != NULL);
+
+    snprintf(Buff, 255, it8->DoubleFormatter, Val);
+    return cmsIT8SetData(hIT8, cPatch, cSample, Buff);
+}
+
+// Buffer should get MAXSTR at least
+
+const char* CMSEXPORT cmsIT8GetPatchName(cmsHANDLE hIT8, int nPatch, char* buffer)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    TABLE* t;
+    char* Data;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+    Data = GetData(it8, nPatch, t->SampleID);
+
+    if (!Data) return NULL;
+    if (!buffer) return Data;
+
+    strncpy(buffer, Data, MAXSTR-1);
+    buffer[MAXSTR-1] = 0;
+    return buffer;
+}
+
+int CMSEXPORT cmsIT8GetPatchByName(cmsHANDLE hIT8, const char *cPatch)
+{
+    _cmsAssert(hIT8 != NULL);
+
+    return LocatePatch((cmsIT8*)hIT8, cPatch);
+}
+
+cmsUInt32Number CMSEXPORT cmsIT8TableCount(cmsHANDLE hIT8)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return it8 ->TablesCount;
+}
+
+// This handles the "LABEL" extension.
+// Label, nTable, Type
+
+int CMSEXPORT cmsIT8SetTableByLabel(cmsHANDLE hIT8, const char* cSet, const char* cField, const char* ExpectedType)
+{
+    const char* cLabelFld;
+    char Type[256], Label[256];
+    cmsUInt32Number nTable;
+
+    _cmsAssert(hIT8 != NULL);
+
+    if (cField != NULL && *cField == 0)
+            cField = "LABEL";
+
+    if (cField == NULL)
+            cField = "LABEL";
+
+    cLabelFld = cmsIT8GetData(hIT8, cSet, cField);
+    if (!cLabelFld) return -1;
+
+    if (sscanf(cLabelFld, "%255s %u %255s", Label, &nTable, Type) != 3)
+            return -1;
+
+    if (ExpectedType != NULL && *ExpectedType == 0)
+        ExpectedType = NULL;
+
+    if (ExpectedType) {
+
+        if (cmsstrcasecmp(Type, ExpectedType) != 0) return -1;
+    }
+
+    return cmsIT8SetTable(hIT8, nTable);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetIndexColumn(cmsHANDLE hIT8, const char* cSample)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    int pos;
+
+    _cmsAssert(hIT8 != NULL);
+
+    pos = LocateSample(it8, cSample);
+    if(pos == -1)
+        return FALSE;
+
+    it8->Tab[it8->nTable].SampleID = pos;
+    return TRUE;
+}
+
+
+void CMSEXPORT cmsIT8DefineDblFormat(cmsHANDLE hIT8, const char* Formatter)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    if (Formatter == NULL)
+        strcpy(it8->DoubleFormatter, DEFAULT_DBL_FORMAT);
+    else
+        strncpy(it8->DoubleFormatter, Formatter, sizeof(it8->DoubleFormatter));
+
+    it8 ->DoubleFormatter[sizeof(it8 ->DoubleFormatter)-1] = 0;
+}
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp
new file mode 100644
index 0000000000..706c450212
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmscnvrt.cpp
@@ -0,0 +1,1162 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// Link several profiles to obtain a single LUT modelling the whole color transform. Intents, Black point
+// compensation and Adaptation parameters may vary across profiles. BPC and Adaptation refers to the PCS
+// after the profile. I.e, BPC[0] refers to connexion between profile(0) and profile(1)
+cmsPipeline* _cmsLinkProfiles(cmsContext     ContextID,
+                              cmsUInt32Number nProfiles,
+                              cmsUInt32Number Intents[],
+                              cmsHPROFILE     hProfiles[],
+                              cmsBool         BPC[],
+                              cmsFloat64Number AdaptationStates[],
+                              cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+// This is the default routine for ICC-style intents. A user may decide to override it by using a plugin.
+// Supported intents are perceptual, relative colorimetric, saturation and ICC-absolute colorimetric
+static
+cmsPipeline* DefaultICCintents(cmsContext     ContextID,
+                               cmsUInt32Number nProfiles,
+                               cmsUInt32Number Intents[],
+                               cmsHPROFILE     hProfiles[],
+                               cmsBool         BPC[],
+                               cmsFloat64Number AdaptationStates[],
+                               cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+// This is the entry for black-preserving K-only intents, which are non-ICC. Last profile have to be a output profile
+// to do the trick (no devicelinks allowed at that position)
+static
+cmsPipeline*  BlackPreservingKOnlyIntents(cmsContext     ContextID,
+                                          cmsUInt32Number nProfiles,
+                                          cmsUInt32Number Intents[],
+                                          cmsHPROFILE     hProfiles[],
+                                          cmsBool         BPC[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+// This is the entry for black-plane preserving, which are non-ICC. Again, Last profile have to be a output profile
+// to do the trick (no devicelinks allowed at that position)
+static
+cmsPipeline*  BlackPreservingKPlaneIntents(cmsContext     ContextID,
+                                           cmsUInt32Number nProfiles,
+                                           cmsUInt32Number Intents[],
+                                           cmsHPROFILE     hProfiles[],
+                                           cmsBool         BPC[],
+                                           cmsFloat64Number AdaptationStates[],
+                                           cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+
+// This is a structure holding implementations for all supported intents.
+typedef struct _cms_intents_list {
+
+    cmsUInt32Number Intent;
+    char            Description[256];
+    cmsIntentFn     Link;
+    struct _cms_intents_list*  Next;
+
+} cmsIntentsList;
+
+
+// Built-in intents
+static cmsIntentsList DefaultIntents[] = {
+
+    { INTENT_PERCEPTUAL,                            "Perceptual",                                   DefaultICCintents,            &DefaultIntents[1] },
+    { INTENT_RELATIVE_COLORIMETRIC,                 "Relative colorimetric",                        DefaultICCintents,            &DefaultIntents[2] },
+    { INTENT_SATURATION,                            "Saturation",                                   DefaultICCintents,            &DefaultIntents[3] },
+    { INTENT_ABSOLUTE_COLORIMETRIC,                 "Absolute colorimetric",                        DefaultICCintents,            &DefaultIntents[4] },
+    { INTENT_PRESERVE_K_ONLY_PERCEPTUAL,            "Perceptual preserving black ink",              BlackPreservingKOnlyIntents,  &DefaultIntents[5] },
+    { INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC, "Relative colorimetric preserving black ink",   BlackPreservingKOnlyIntents,  &DefaultIntents[6] },
+    { INTENT_PRESERVE_K_ONLY_SATURATION,            "Saturation preserving black ink",              BlackPreservingKOnlyIntents,  &DefaultIntents[7] },
+    { INTENT_PRESERVE_K_PLANE_PERCEPTUAL,           "Perceptual preserving black plane",            BlackPreservingKPlaneIntents, &DefaultIntents[8] },
+    { INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC,"Relative colorimetric preserving black plane", BlackPreservingKPlaneIntents, &DefaultIntents[9] },
+    { INTENT_PRESERVE_K_PLANE_SATURATION,           "Saturation preserving black plane",            BlackPreservingKPlaneIntents, NULL }
+};
+
+
+// A pointer to the beginning of the list
+_cmsIntentsPluginChunkType _cmsIntentsPluginChunk = { NULL };
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginIntentsList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsIntentsPluginChunkType newHead = { NULL };
+   cmsIntentsList*  entry;
+   cmsIntentsList*  Anterior = NULL;
+   _cmsIntentsPluginChunkType* head = (_cmsIntentsPluginChunkType*) src->chunks[IntentPlugin];
+
+    // Walk the list copying all nodes
+   for (entry = head->Intents;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            cmsIntentsList *newEntry = ( cmsIntentsList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(cmsIntentsList));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.Intents == NULL)
+                newHead.Intents = newEntry;
+    }
+
+  ctx ->chunks[IntentPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsIntentsPluginChunkType));
+}
+
+void  _cmsAllocIntentsPluginChunk(struct _cmsContext_struct* ctx, 
+                                         const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+
+        // Copy all linked list
+        DupPluginIntentsList(ctx, src);
+    }
+    else {
+        static _cmsIntentsPluginChunkType IntentsPluginChunkType = { NULL };
+        ctx ->chunks[IntentPlugin] = _cmsSubAllocDup(ctx ->MemPool, &IntentsPluginChunkType, sizeof(_cmsIntentsPluginChunkType));
+    }
+}
+
+
+// Search the list for a suitable intent. Returns NULL if not found
+static
+cmsIntentsList* SearchIntent(cmsContext ContextID, cmsUInt32Number Intent)
+{
+    _cmsIntentsPluginChunkType* ctx = ( _cmsIntentsPluginChunkType*) _cmsContextGetClientChunk(ContextID, IntentPlugin);
+    cmsIntentsList* pt;
+
+    for (pt = ctx -> Intents; pt != NULL; pt = pt -> Next)
+        if (pt ->Intent == Intent) return pt;
+
+    for (pt = DefaultIntents; pt != NULL; pt = pt -> Next)
+        if (pt ->Intent == Intent) return pt;
+
+    return NULL;
+}
+
+// Black point compensation. Implemented as a linear scaling in XYZ. Black points
+// should come relative to the white point. Fills an matrix/offset element m
+// which is organized as a 4x4 matrix.
+static
+void ComputeBlackPointCompensation(const cmsCIEXYZ* BlackPointIn,
+                                   const cmsCIEXYZ* BlackPointOut,
+                                   cmsMAT3* m, cmsVEC3* off)
+{
+  cmsFloat64Number ax, ay, az, bx, by, bz, tx, ty, tz;
+
+   // Now we need to compute a matrix plus an offset m and of such of
+   // [m]*bpin + off = bpout
+   // [m]*D50  + off = D50
+   //
+   // This is a linear scaling in the form ax+b, where
+   // a = (bpout - D50) / (bpin - D50)
+   // b = - D50* (bpout - bpin) / (bpin - D50)
+
+   tx = BlackPointIn->X - cmsD50_XYZ()->X;
+   ty = BlackPointIn->Y - cmsD50_XYZ()->Y;
+   tz = BlackPointIn->Z - cmsD50_XYZ()->Z;
+
+   ax = (BlackPointOut->X - cmsD50_XYZ()->X) / tx;
+   ay = (BlackPointOut->Y - cmsD50_XYZ()->Y) / ty;
+   az = (BlackPointOut->Z - cmsD50_XYZ()->Z) / tz;
+
+   bx = - cmsD50_XYZ()-> X * (BlackPointOut->X - BlackPointIn->X) / tx;
+   by = - cmsD50_XYZ()-> Y * (BlackPointOut->Y - BlackPointIn->Y) / ty;
+   bz = - cmsD50_XYZ()-> Z * (BlackPointOut->Z - BlackPointIn->Z) / tz;
+
+   _cmsVEC3init(&m ->v[0], ax, 0,  0);
+   _cmsVEC3init(&m ->v[1], 0, ay,  0);
+   _cmsVEC3init(&m ->v[2], 0,  0,  az);
+   _cmsVEC3init(off, bx, by, bz);
+
+}
+
+
+// Approximate a blackbody illuminant based on CHAD information
+static
+cmsFloat64Number CHAD2Temp(const cmsMAT3* Chad)
+{
+    // Convert D50 across inverse CHAD to get the absolute white point
+    cmsVEC3 d, s;
+    cmsCIEXYZ Dest;
+    cmsCIExyY DestChromaticity;
+    cmsFloat64Number TempK;
+    cmsMAT3 m1, m2;
+
+    m1 = *Chad;
+    if (!_cmsMAT3inverse(&m1, &m2)) return FALSE;
+
+    s.n[VX] = cmsD50_XYZ() -> X;
+    s.n[VY] = cmsD50_XYZ() -> Y;
+    s.n[VZ] = cmsD50_XYZ() -> Z;
+
+    _cmsMAT3eval(&d, &m2, &s);
+
+    Dest.X = d.n[VX];
+    Dest.Y = d.n[VY];
+    Dest.Z = d.n[VZ];
+
+    cmsXYZ2xyY(&DestChromaticity, &Dest);
+
+    if (!cmsTempFromWhitePoint(&TempK, &DestChromaticity))
+        return -1.0;
+
+    return TempK;
+}
+
+// Compute a CHAD based on a given temperature
+static
+    void Temp2CHAD(cmsMAT3* Chad, cmsFloat64Number Temp)
+{
+    cmsCIEXYZ White;
+    cmsCIExyY ChromaticityOfWhite;
+
+    cmsWhitePointFromTemp(&ChromaticityOfWhite, Temp);
+    cmsxyY2XYZ(&White, &ChromaticityOfWhite);
+    _cmsAdaptationMatrix(Chad, NULL, &White, cmsD50_XYZ());
+}
+
+// Join scalings to obtain relative input to absolute and then to relative output.
+// Result is stored in a 3x3 matrix
+static
+cmsBool  ComputeAbsoluteIntent(cmsFloat64Number AdaptationState,
+                               const cmsCIEXYZ* WhitePointIn,
+                               const cmsMAT3* ChromaticAdaptationMatrixIn,
+                               const cmsCIEXYZ* WhitePointOut,
+                               const cmsMAT3* ChromaticAdaptationMatrixOut,
+                               cmsMAT3* m)
+{
+    cmsMAT3 Scale, m1, m2, m3, m4;
+
+    // TODO: Follow Marc Mahy's recommendation to check if CHAD is same by using M1*M2 == M2*M1. If so, do nothing.
+    // TODO: Add support for ArgyllArts tag
+
+    // Adaptation state
+    if (AdaptationState == 1.0) {
+
+        // Observer is fully adapted. Keep chromatic adaptation.
+        // That is the standard V4 behaviour
+        _cmsVEC3init(&m->v[0], WhitePointIn->X / WhitePointOut->X, 0, 0);
+        _cmsVEC3init(&m->v[1], 0, WhitePointIn->Y / WhitePointOut->Y, 0);
+        _cmsVEC3init(&m->v[2], 0, 0, WhitePointIn->Z / WhitePointOut->Z);
+
+    }
+    else  {
+
+        // Incomplete adaptation. This is an advanced feature.
+        _cmsVEC3init(&Scale.v[0], WhitePointIn->X / WhitePointOut->X, 0, 0);
+        _cmsVEC3init(&Scale.v[1], 0,  WhitePointIn->Y / WhitePointOut->Y, 0);
+        _cmsVEC3init(&Scale.v[2], 0, 0,  WhitePointIn->Z / WhitePointOut->Z);
+
+
+        if (AdaptationState == 0.0) {
+        
+            m1 = *ChromaticAdaptationMatrixOut;
+            _cmsMAT3per(&m2, &m1, &Scale);
+            // m2 holds CHAD from output white to D50 times abs. col. scaling
+
+            // Observer is not adapted, undo the chromatic adaptation
+            _cmsMAT3per(m, &m2, ChromaticAdaptationMatrixOut);
+
+            m3 = *ChromaticAdaptationMatrixIn;
+            if (!_cmsMAT3inverse(&m3, &m4)) return FALSE;
+            _cmsMAT3per(m, &m2, &m4);
+
+        } else {
+
+            cmsMAT3 MixedCHAD;
+            cmsFloat64Number TempSrc, TempDest, Temp;
+
+            m1 = *ChromaticAdaptationMatrixIn;
+            if (!_cmsMAT3inverse(&m1, &m2)) return FALSE;
+            _cmsMAT3per(&m3, &m2, &Scale);
+            // m3 holds CHAD from input white to D50 times abs. col. scaling
+
+            TempSrc  = CHAD2Temp(ChromaticAdaptationMatrixIn);
+            TempDest = CHAD2Temp(ChromaticAdaptationMatrixOut);
+
+            if (TempSrc < 0.0 || TempDest < 0.0) return FALSE; // Something went wrong
+
+            if (_cmsMAT3isIdentity(&Scale) && fabs(TempSrc - TempDest) < 0.01) {
+
+                _cmsMAT3identity(m);
+                return TRUE;
+            }
+
+            Temp = (1.0 - AdaptationState) * TempDest + AdaptationState * TempSrc;
+
+            // Get a CHAD from whatever output temperature to D50. This replaces output CHAD
+            Temp2CHAD(&MixedCHAD, Temp);
+
+            _cmsMAT3per(m, &m3, &MixedCHAD);
+        }
+
+    }
+    return TRUE;
+
+}
+
+// Just to see if m matrix should be applied
+static
+cmsBool IsEmptyLayer(cmsMAT3* m, cmsVEC3* off)
+{
+    cmsFloat64Number diff = 0;
+    cmsMAT3 Ident;
+    int i;
+
+    if (m == NULL && off == NULL) return TRUE;  // NULL is allowed as an empty layer
+    if (m == NULL && off != NULL) return FALSE; // This is an internal error
+
+    _cmsMAT3identity(&Ident);
+
+    for (i=0; i < 3*3; i++)
+        diff += fabs(((cmsFloat64Number*)m)[i] - ((cmsFloat64Number*)&Ident)[i]);
+
+    for (i=0; i < 3; i++)
+        diff += fabs(((cmsFloat64Number*)off)[i]);
+
+
+    return (diff < 0.002);
+}
+
+
+// Compute the conversion layer
+static
+cmsBool ComputeConversion(cmsUInt32Number i, 
+                          cmsHPROFILE hProfiles[],
+                          cmsUInt32Number Intent,
+                          cmsBool BPC,
+                          cmsFloat64Number AdaptationState,
+                          cmsMAT3* m, cmsVEC3* off)
+{
+
+    int k;
+
+    // m  and off are set to identity and this is detected latter on
+    _cmsMAT3identity(m);
+    _cmsVEC3init(off, 0, 0, 0);
+
+    // If intent is abs. colorimetric,
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC) {
+
+        cmsCIEXYZ WhitePointIn, WhitePointOut;
+        cmsMAT3 ChromaticAdaptationMatrixIn, ChromaticAdaptationMatrixOut;
+
+        _cmsReadMediaWhitePoint(&WhitePointIn,  hProfiles[i-1]);
+        _cmsReadCHAD(&ChromaticAdaptationMatrixIn, hProfiles[i-1]);
+
+        _cmsReadMediaWhitePoint(&WhitePointOut,  hProfiles[i]);
+        _cmsReadCHAD(&ChromaticAdaptationMatrixOut, hProfiles[i]);
+
+        if (!ComputeAbsoluteIntent(AdaptationState,
+                                  &WhitePointIn,  &ChromaticAdaptationMatrixIn,
+                                  &WhitePointOut, &ChromaticAdaptationMatrixOut, m)) return FALSE;
+
+    }
+    else {
+        // Rest of intents may apply BPC.
+
+        if (BPC) {
+
+            cmsCIEXYZ BlackPointIn, BlackPointOut;
+
+            cmsDetectBlackPoint(&BlackPointIn,  hProfiles[i-1], Intent, 0);
+            cmsDetectDestinationBlackPoint(&BlackPointOut, hProfiles[i], Intent, 0);
+
+            // If black points are equal, then do nothing
+            if (BlackPointIn.X != BlackPointOut.X ||
+                BlackPointIn.Y != BlackPointOut.Y ||
+                BlackPointIn.Z != BlackPointOut.Z)
+                    ComputeBlackPointCompensation(&BlackPointIn, &BlackPointOut, m, off);
+        }
+    }
+
+    // Offset should be adjusted because the encoding. We encode XYZ normalized to 0..1.0,
+    // to do that, we divide by MAX_ENCODEABLE_XZY. The conversion stage goes XYZ -> XYZ so
+    // we have first to convert from encoded to XYZ and then convert back to encoded.
+    // y = Mx + Off
+    // x = x'c
+    // y = M x'c + Off
+    // y = y'c; y' = y / c
+    // y' = (Mx'c + Off) /c = Mx' + (Off / c)
+
+    for (k=0; k < 3; k++) {
+        off ->n[k] /= MAX_ENCODEABLE_XYZ;
+    }
+
+    return TRUE;
+}
+
+
+// Add a conversion stage if needed. If a matrix/offset m is given, it applies to XYZ space
+static
+cmsBool AddConversion(cmsPipeline* Result, cmsColorSpaceSignature InPCS, cmsColorSpaceSignature OutPCS, cmsMAT3* m, cmsVEC3* off)
+{
+    cmsFloat64Number* m_as_dbl = (cmsFloat64Number*) m;
+    cmsFloat64Number* off_as_dbl = (cmsFloat64Number*) off;
+
+    // Handle PCS mismatches. A specialized stage is added to the LUT in such case
+    switch (InPCS) {
+
+    case cmsSigXYZData: // Input profile operates in XYZ
+
+        switch (OutPCS) {
+
+        case cmsSigXYZData:  // XYZ -> XYZ
+            if (!IsEmptyLayer(m, off) &&
+                !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)))
+                return FALSE;
+            break;
+
+        case cmsSigLabData:  // XYZ -> Lab
+            if (!IsEmptyLayer(m, off) &&
+                !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)))
+                return FALSE;
+            if (!cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocXYZ2Lab(Result ->ContextID)))
+                return FALSE;
+            break;
+
+        default:
+            return FALSE;   // Colorspace mismatch
+        }
+        break;
+
+    case cmsSigLabData: // Input profile operates in Lab
+
+        switch (OutPCS) {
+
+        case cmsSigXYZData:  // Lab -> XYZ
+
+            if (!cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocLab2XYZ(Result ->ContextID)))
+                return FALSE;
+            if (!IsEmptyLayer(m, off) &&
+                !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)))
+                return FALSE;
+            break;
+
+        case cmsSigLabData:  // Lab -> Lab
+
+            if (!IsEmptyLayer(m, off)) {
+                if (!cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocLab2XYZ(Result ->ContextID)) ||
+                    !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)) ||
+                    !cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocXYZ2Lab(Result ->ContextID)))
+                    return FALSE;
+            }
+            break;
+
+        default:
+            return FALSE;  // Mismatch
+        }
+        break;
+
+        // On colorspaces other than PCS, check for same space
+    default:
+        if (InPCS != OutPCS) return FALSE;
+        break;
+    }
+
+    return TRUE;
+}
+
+
+// Is a given space compatible with another?
+static
+cmsBool ColorSpaceIsCompatible(cmsColorSpaceSignature a, cmsColorSpaceSignature b)
+{
+    // If they are same, they are compatible.
+    if (a == b) return TRUE;
+
+    // Check for MCH4 substitution of CMYK
+    if ((a == cmsSig4colorData) && (b == cmsSigCmykData)) return TRUE;
+    if ((a == cmsSigCmykData) && (b == cmsSig4colorData)) return TRUE;
+
+    // Check for XYZ/Lab. Those spaces are interchangeable as they can be computed one from other.
+    if ((a == cmsSigXYZData) && (b == cmsSigLabData)) return TRUE;
+    if ((a == cmsSigLabData) && (b == cmsSigXYZData)) return TRUE;
+
+    return FALSE;
+}
+
+
+// Default handler for ICC-style intents
+static
+cmsPipeline* DefaultICCintents(cmsContext       ContextID,
+                               cmsUInt32Number  nProfiles,
+                               cmsUInt32Number  TheIntents[],
+                               cmsHPROFILE      hProfiles[],
+                               cmsBool          BPC[],
+                               cmsFloat64Number AdaptationStates[],
+                               cmsUInt32Number  dwFlags)
+{
+    cmsPipeline* Lut = NULL;
+    cmsPipeline* Result;
+    cmsHPROFILE hProfile;
+    cmsMAT3 m;
+    cmsVEC3 off;
+    cmsColorSpaceSignature ColorSpaceIn, ColorSpaceOut = cmsSigLabData, CurrentColorSpace;
+    cmsProfileClassSignature ClassSig;
+    cmsUInt32Number  i, Intent;
+
+    // For safety
+    if (nProfiles == 0) return NULL;
+
+    // Allocate an empty LUT for holding the result. 0 as channel count means 'undefined'
+    Result = cmsPipelineAlloc(ContextID, 0, 0);
+    if (Result == NULL) return NULL;
+
+    CurrentColorSpace = cmsGetColorSpace(hProfiles[0]);
+
+    for (i=0; i < nProfiles; i++) {
+
+        cmsBool  lIsDeviceLink, lIsInput;
+
+        hProfile      = hProfiles[i];
+        ClassSig      = cmsGetDeviceClass(hProfile);
+        lIsDeviceLink = (ClassSig == cmsSigLinkClass || ClassSig == cmsSigAbstractClass );
+
+        // First profile is used as input unless devicelink or abstract
+        if ((i == 0) && !lIsDeviceLink) {
+            lIsInput = TRUE;
+        }
+        else {
+          // Else use profile in the input direction if current space is not PCS
+        lIsInput      = (CurrentColorSpace != cmsSigXYZData) &&
+                        (CurrentColorSpace != cmsSigLabData);
+        }
+
+        Intent        = TheIntents[i];
+
+        if (lIsInput || lIsDeviceLink) {
+
+            ColorSpaceIn    = cmsGetColorSpace(hProfile);
+            ColorSpaceOut   = cmsGetPCS(hProfile);
+        }
+        else {
+
+            ColorSpaceIn    = cmsGetPCS(hProfile);
+            ColorSpaceOut   = cmsGetColorSpace(hProfile);
+        }
+
+        if (!ColorSpaceIsCompatible(ColorSpaceIn, CurrentColorSpace)) {
+
+            cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "ColorSpace mismatch");
+            goto Error;
+        }
+
+        // If devicelink is found, then no custom intent is allowed and we can
+        // read the LUT to be applied. Settings don't apply here.
+        if (lIsDeviceLink || ((ClassSig == cmsSigNamedColorClass) && (nProfiles == 1))) {
+
+            // Get the involved LUT from the profile
+            Lut = _cmsReadDevicelinkLUT(hProfile, Intent);
+            if (Lut == NULL) goto Error;
+
+            // What about abstract profiles?
+             if (ClassSig == cmsSigAbstractClass && i > 0) {
+                if (!ComputeConversion(i, hProfiles, Intent, BPC[i], AdaptationStates[i], &m, &off)) goto Error;
+             }
+             else {
+                _cmsMAT3identity(&m);
+                _cmsVEC3init(&off, 0, 0, 0);
+             }
+
+
+            if (!AddConversion(Result, CurrentColorSpace, ColorSpaceIn, &m, &off)) goto Error;
+
+        }
+        else {
+
+            if (lIsInput) {
+                // Input direction means non-pcs connection, so proceed like devicelinks
+                Lut = _cmsReadInputLUT(hProfile, Intent);
+                if (Lut == NULL) goto Error;
+            }
+            else {
+
+                // Output direction means PCS connection. Intent may apply here
+                Lut = _cmsReadOutputLUT(hProfile, Intent);
+                if (Lut == NULL) goto Error;
+
+
+                if (!ComputeConversion(i, hProfiles, Intent, BPC[i], AdaptationStates[i], &m, &off)) goto Error;
+                if (!AddConversion(Result, CurrentColorSpace, ColorSpaceIn, &m, &off)) goto Error;
+
+            }
+        }
+
+        // Concatenate to the output LUT
+        if (!cmsPipelineCat(Result, Lut))
+            goto Error;
+
+        cmsPipelineFree(Lut);
+        Lut = NULL;
+
+        // Update current space
+        CurrentColorSpace = ColorSpaceOut;
+    }
+
+    // Check for non-negatives clip
+    if (dwFlags & cmsFLAGS_NONEGATIVES) {
+
+           if (ColorSpaceOut == cmsSigGrayData ||
+                  ColorSpaceOut == cmsSigRgbData ||
+                  ColorSpaceOut == cmsSigCmykData) {
+
+                  cmsStage* clip = _cmsStageClipNegatives(Result->ContextID, cmsChannelsOf(ColorSpaceOut));
+                  if (clip == NULL) goto Error;
+
+                  if (!cmsPipelineInsertStage(Result, cmsAT_END, clip))
+                         goto Error;
+           }
+
+    }
+
+    return Result;
+
+Error:
+
+    if (Lut != NULL) cmsPipelineFree(Lut);
+    if (Result != NULL) cmsPipelineFree(Result);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+
+// Wrapper for DLL calling convention
+cmsPipeline*  CMSEXPORT _cmsDefaultICCintents(cmsContext     ContextID,
+                                              cmsUInt32Number nProfiles,
+                                              cmsUInt32Number TheIntents[],
+                                              cmsHPROFILE     hProfiles[],
+                                              cmsBool         BPC[],
+                                              cmsFloat64Number AdaptationStates[],
+                                              cmsUInt32Number dwFlags)
+{
+    return DefaultICCintents(ContextID, nProfiles, TheIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+}
+
+// Black preserving intents ---------------------------------------------------------------------------------------------
+
+// Translate black-preserving intents to ICC ones
+static
+cmsUInt32Number TranslateNonICCIntents(cmsUInt32Number Intent)
+{
+    switch (Intent) {
+        case INTENT_PRESERVE_K_ONLY_PERCEPTUAL:
+        case INTENT_PRESERVE_K_PLANE_PERCEPTUAL:
+            return INTENT_PERCEPTUAL;
+
+        case INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC:
+        case INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC:
+            return INTENT_RELATIVE_COLORIMETRIC;
+
+        case INTENT_PRESERVE_K_ONLY_SATURATION:
+        case INTENT_PRESERVE_K_PLANE_SATURATION:
+            return INTENT_SATURATION;
+
+        default: return Intent;
+    }
+}
+
+// Sampler for Black-only preserving CMYK->CMYK transforms
+
+typedef struct {
+    cmsPipeline*    cmyk2cmyk;      // The original transform
+    cmsToneCurve*   KTone;          // Black-to-black tone curve
+
+} GrayOnlyParams;
+
+
+// Preserve black only if that is the only ink used
+static
+int BlackPreservingGrayOnlySampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    GrayOnlyParams* bp = (GrayOnlyParams*) Cargo;
+
+    // If going across black only, keep black only
+    if (In[0] == 0 && In[1] == 0 && In[2] == 0) {
+
+        // TAC does not apply because it is black ink!
+        Out[0] = Out[1] = Out[2] = 0;
+        Out[3] = cmsEvalToneCurve16(bp->KTone, In[3]);
+        return TRUE;
+    }
+
+    // Keep normal transform for other colors
+    bp ->cmyk2cmyk ->Eval16Fn(In, Out, bp ->cmyk2cmyk->Data);
+    return TRUE;
+}
+
+// This is the entry for black-preserving K-only intents, which are non-ICC
+static
+cmsPipeline*  BlackPreservingKOnlyIntents(cmsContext     ContextID,
+                                          cmsUInt32Number nProfiles,
+                                          cmsUInt32Number TheIntents[],
+                                          cmsHPROFILE     hProfiles[],
+                                          cmsBool         BPC[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number dwFlags)
+{
+    GrayOnlyParams  bp;
+    cmsPipeline*    Result;
+    cmsUInt32Number ICCIntents[256];
+    cmsStage*         CLUT;
+    cmsUInt32Number i, nGridPoints;
+
+
+    // Sanity check
+    if (nProfiles < 1 || nProfiles > 255) return NULL;
+
+    // Translate black-preserving intents to ICC ones
+    for (i=0; i < nProfiles; i++)
+        ICCIntents[i] = TranslateNonICCIntents(TheIntents[i]);
+
+    // Check for non-cmyk profiles
+    if (cmsGetColorSpace(hProfiles[0]) != cmsSigCmykData ||
+        cmsGetColorSpace(hProfiles[nProfiles-1]) != cmsSigCmykData)
+           return DefaultICCintents(ContextID, nProfiles, ICCIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+
+    memset(&bp, 0, sizeof(bp));
+
+    // Allocate an empty LUT for holding the result
+    Result = cmsPipelineAlloc(ContextID, 4, 4);
+    if (Result == NULL) return NULL;
+
+    // Create a LUT holding normal ICC transform
+    bp.cmyk2cmyk = DefaultICCintents(ContextID,
+        nProfiles,
+        ICCIntents,
+        hProfiles,
+        BPC,
+        AdaptationStates,
+        dwFlags);
+
+    if (bp.cmyk2cmyk == NULL) goto Error;
+
+    // Now, compute the tone curve
+    bp.KTone = _cmsBuildKToneCurve(ContextID,
+        4096,
+        nProfiles,
+        ICCIntents,
+        hProfiles,
+        BPC,
+        AdaptationStates,
+        dwFlags);
+
+    if (bp.KTone == NULL) goto Error;
+
+
+    // How many gridpoints are we going to use?
+    nGridPoints = _cmsReasonableGridpointsByColorspace(cmsSigCmykData, dwFlags);
+
+    // Create the CLUT. 16 bits
+    CLUT = cmsStageAllocCLut16bit(ContextID, nGridPoints, 4, 4, NULL);
+    if (CLUT == NULL) goto Error;
+
+    // This is the one and only MPE in this LUT
+    if (!cmsPipelineInsertStage(Result, cmsAT_BEGIN, CLUT))
+        goto Error;
+
+    // Sample it. We cannot afford pre/post linearization this time.
+    if (!cmsStageSampleCLut16bit(CLUT, BlackPreservingGrayOnlySampler, (void*) &bp, 0))
+        goto Error;
+
+    // Get rid of xform and tone curve
+    cmsPipelineFree(bp.cmyk2cmyk);
+    cmsFreeToneCurve(bp.KTone);
+
+    return Result;
+
+Error:
+
+    if (bp.cmyk2cmyk != NULL) cmsPipelineFree(bp.cmyk2cmyk);
+    if (bp.KTone != NULL)  cmsFreeToneCurve(bp.KTone);
+    if (Result != NULL) cmsPipelineFree(Result);
+    return NULL;
+
+}
+
+// K Plane-preserving CMYK to CMYK ------------------------------------------------------------------------------------
+
+typedef struct {
+
+    cmsPipeline*     cmyk2cmyk;     // The original transform
+    cmsHTRANSFORM    hProofOutput;  // Output CMYK to Lab (last profile)
+    cmsHTRANSFORM    cmyk2Lab;      // The input chain
+    cmsToneCurve*    KTone;         // Black-to-black tone curve
+    cmsPipeline*     LabK2cmyk;     // The output profile
+    cmsFloat64Number MaxError;
+
+    cmsHTRANSFORM    hRoundTrip;
+    cmsFloat64Number MaxTAC;
+
+
+} PreserveKPlaneParams;
+
+
+// The CLUT will be stored at 16 bits, but calculations are performed at cmsFloat32Number precision
+static
+int BlackPreservingSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    int i;
+    cmsFloat32Number Inf[4], Outf[4];
+    cmsFloat32Number LabK[4];
+    cmsFloat64Number SumCMY, SumCMYK, Error, Ratio;
+    cmsCIELab ColorimetricLab, BlackPreservingLab;
+    PreserveKPlaneParams* bp = (PreserveKPlaneParams*) Cargo;
+
+    // Convert from 16 bits to floating point
+    for (i=0; i < 4; i++)
+        Inf[i] = (cmsFloat32Number) (In[i] / 65535.0);
+
+    // Get the K across Tone curve
+    LabK[3] = cmsEvalToneCurveFloat(bp ->KTone, Inf[3]);
+
+    // If going across black only, keep black only
+    if (In[0] == 0 && In[1] == 0 && In[2] == 0) {
+
+        Out[0] = Out[1] = Out[2] = 0;
+        Out[3] = _cmsQuickSaturateWord(LabK[3] * 65535.0);
+        return TRUE;
+    }
+
+    // Try the original transform,
+    cmsPipelineEvalFloat( Inf, Outf, bp ->cmyk2cmyk);
+
+    // Store a copy of the floating point result into 16-bit
+    for (i=0; i < 4; i++)
+            Out[i] = _cmsQuickSaturateWord(Outf[i] * 65535.0);
+
+    // Maybe K is already ok (mostly on K=0)
+    if ( fabs(Outf[3] - LabK[3]) < (3.0 / 65535.0) ) {
+        return TRUE;
+    }
+
+    // K differ, measure and keep Lab measurement for further usage
+    // this is done in relative colorimetric intent
+    cmsDoTransform(bp->hProofOutput, Out, &ColorimetricLab, 1);
+
+    // Is not black only and the transform doesn't keep black.
+    // Obtain the Lab of output CMYK. After that we have Lab + K
+    cmsDoTransform(bp ->cmyk2Lab, Outf, LabK, 1);
+
+    // Obtain the corresponding CMY using reverse interpolation
+    // (K is fixed in LabK[3])
+    if (!cmsPipelineEvalReverseFloat(LabK, Outf, Outf, bp ->LabK2cmyk)) {
+
+        // Cannot find a suitable value, so use colorimetric xform
+        // which is already stored in Out[]
+        return TRUE;
+    }
+
+    // Make sure to pass through K (which now is fixed)
+    Outf[3] = LabK[3];
+
+    // Apply TAC if needed
+    SumCMY   = Outf[0]  + Outf[1] + Outf[2];
+    SumCMYK  = SumCMY + Outf[3];
+
+    if (SumCMYK > bp ->MaxTAC) {
+
+        Ratio = 1 - ((SumCMYK - bp->MaxTAC) / SumCMY);
+        if (Ratio < 0)
+            Ratio = 0;
+    }
+    else
+       Ratio = 1.0;
+
+    Out[0] = _cmsQuickSaturateWord(Outf[0] * Ratio * 65535.0);     // C
+    Out[1] = _cmsQuickSaturateWord(Outf[1] * Ratio * 65535.0);     // M
+    Out[2] = _cmsQuickSaturateWord(Outf[2] * Ratio * 65535.0);     // Y
+    Out[3] = _cmsQuickSaturateWord(Outf[3] * 65535.0);
+
+    // Estimate the error (this goes 16 bits to Lab DBL)
+    cmsDoTransform(bp->hProofOutput, Out, &BlackPreservingLab, 1);
+    Error = cmsDeltaE(&ColorimetricLab, &BlackPreservingLab);
+    if (Error > bp -> MaxError)
+        bp->MaxError = Error;
+
+    return TRUE;
+}
+
+// This is the entry for black-plane preserving, which are non-ICC
+static
+cmsPipeline* BlackPreservingKPlaneIntents(cmsContext     ContextID,
+                                          cmsUInt32Number nProfiles,
+                                          cmsUInt32Number TheIntents[],
+                                          cmsHPROFILE     hProfiles[],
+                                          cmsBool         BPC[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number dwFlags)
+{
+    PreserveKPlaneParams bp;
+    cmsPipeline*    Result = NULL;
+    cmsUInt32Number ICCIntents[256];
+    cmsStage*         CLUT;
+    cmsUInt32Number i, nGridPoints;
+    cmsHPROFILE hLab;
+
+    // Sanity check
+    if (nProfiles < 1 || nProfiles > 255) return NULL;
+
+    // Translate black-preserving intents to ICC ones
+    for (i=0; i < nProfiles; i++)
+        ICCIntents[i] = TranslateNonICCIntents(TheIntents[i]);
+
+    // Check for non-cmyk profiles
+    if (cmsGetColorSpace(hProfiles[0]) != cmsSigCmykData ||
+        !(cmsGetColorSpace(hProfiles[nProfiles-1]) == cmsSigCmykData ||
+        cmsGetDeviceClass(hProfiles[nProfiles-1]) == cmsSigOutputClass))
+           return  DefaultICCintents(ContextID, nProfiles, ICCIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+
+    // Allocate an empty LUT for holding the result
+    Result = cmsPipelineAlloc(ContextID, 4, 4);
+    if (Result == NULL) return NULL;
+
+
+    memset(&bp, 0, sizeof(bp));
+
+    // We need the input LUT of the last profile, assuming this one is responsible of
+    // black generation. This LUT will be searched in inverse order.
+    bp.LabK2cmyk = _cmsReadInputLUT(hProfiles[nProfiles-1], INTENT_RELATIVE_COLORIMETRIC);
+    if (bp.LabK2cmyk == NULL) goto Cleanup;
+
+    // Get total area coverage (in 0..1 domain)
+    bp.MaxTAC = cmsDetectTAC(hProfiles[nProfiles-1]) / 100.0;
+    if (bp.MaxTAC <= 0) goto Cleanup;
+
+
+    // Create a LUT holding normal ICC transform
+    bp.cmyk2cmyk = DefaultICCintents(ContextID,
+                                         nProfiles,
+                                         ICCIntents,
+                                         hProfiles,
+                                         BPC,
+                                         AdaptationStates,
+                                         dwFlags);
+    if (bp.cmyk2cmyk == NULL) goto Cleanup;
+
+    // Now the tone curve
+    bp.KTone = _cmsBuildKToneCurve(ContextID, 4096, nProfiles,
+                                   ICCIntents,
+                                   hProfiles,
+                                   BPC,
+                                   AdaptationStates,
+                                   dwFlags);
+    if (bp.KTone == NULL) goto Cleanup;
+
+    // To measure the output, Last profile to Lab
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    bp.hProofOutput = cmsCreateTransformTHR(ContextID, hProfiles[nProfiles-1],
+                                         CHANNELS_SH(4)|BYTES_SH(2), hLab, TYPE_Lab_DBL,
+                                         INTENT_RELATIVE_COLORIMETRIC,
+                                         cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+    if ( bp.hProofOutput == NULL) goto Cleanup;
+
+    // Same as anterior, but lab in the 0..1 range
+    bp.cmyk2Lab = cmsCreateTransformTHR(ContextID, hProfiles[nProfiles-1],
+                                         FLOAT_SH(1)|CHANNELS_SH(4)|BYTES_SH(4), hLab,
+                                         FLOAT_SH(1)|CHANNELS_SH(3)|BYTES_SH(4),
+                                         INTENT_RELATIVE_COLORIMETRIC,
+                                         cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+    if (bp.cmyk2Lab == NULL) goto Cleanup;
+    cmsCloseProfile(hLab);
+
+    // Error estimation (for debug only)
+    bp.MaxError = 0;
+
+    // How many gridpoints are we going to use?
+    nGridPoints = _cmsReasonableGridpointsByColorspace(cmsSigCmykData, dwFlags);
+
+
+    CLUT = cmsStageAllocCLut16bit(ContextID, nGridPoints, 4, 4, NULL);
+    if (CLUT == NULL) goto Cleanup;
+
+    if (!cmsPipelineInsertStage(Result, cmsAT_BEGIN, CLUT))
+        goto Cleanup;
+
+    cmsStageSampleCLut16bit(CLUT, BlackPreservingSampler, (void*) &bp, 0);
+
+Cleanup:
+
+    if (bp.cmyk2cmyk) cmsPipelineFree(bp.cmyk2cmyk);
+    if (bp.cmyk2Lab) cmsDeleteTransform(bp.cmyk2Lab);
+    if (bp.hProofOutput) cmsDeleteTransform(bp.hProofOutput);
+
+    if (bp.KTone) cmsFreeToneCurve(bp.KTone);
+    if (bp.LabK2cmyk) cmsPipelineFree(bp.LabK2cmyk);
+
+    return Result;
+}
+
+// Link routines ------------------------------------------------------------------------------------------------------
+
+// Chain several profiles into a single LUT. It just checks the parameters and then calls the handler
+// for the first intent in chain. The handler may be user-defined. Is up to the handler to deal with the
+// rest of intents in chain. A maximum of 255 profiles at time are supported, which is pretty reasonable.
+cmsPipeline* _cmsLinkProfiles(cmsContext     ContextID,
+                              cmsUInt32Number nProfiles,
+                              cmsUInt32Number TheIntents[],
+                              cmsHPROFILE     hProfiles[],
+                              cmsBool         BPC[],
+                              cmsFloat64Number AdaptationStates[],
+                              cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsIntentsList* Intent;
+
+    // Make sure a reasonable number of profiles is provided
+    if (nProfiles <= 0 || nProfiles > 255) {
+         cmsSignalError(ContextID, cmsERROR_RANGE, "Couldn't link '%d' profiles", nProfiles);
+        return NULL;
+    }
+
+    for (i=0; i < nProfiles; i++) {
+
+        // Check if black point is really needed or allowed. Note that
+        // following Adobe's document:
+        // BPC does not apply to devicelink profiles, nor to abs colorimetric,
+        // and applies always on V4 perceptual and saturation.
+
+        if (TheIntents[i] == INTENT_ABSOLUTE_COLORIMETRIC)
+            BPC[i] = FALSE;
+
+        if (TheIntents[i] == INTENT_PERCEPTUAL || TheIntents[i] == INTENT_SATURATION) {
+
+            // Force BPC for V4 profiles in perceptual and saturation
+            if (cmsGetEncodedICCversion(hProfiles[i]) >= 0x4000000)
+                BPC[i] = TRUE;
+        }
+    }
+
+    // Search for a handler. The first intent in the chain defines the handler. That would
+    // prevent using multiple custom intents in a multiintent chain, but the behaviour of
+    // this case would present some issues if the custom intent tries to do things like
+    // preserve primaries. This solution is not perfect, but works well on most cases.
+
+    Intent = SearchIntent(ContextID, TheIntents[0]);
+    if (Intent == NULL) {
+        cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported intent '%d'", TheIntents[0]);
+        return NULL;
+    }
+
+    // Call the handler
+    return Intent ->Link(ContextID, nProfiles, TheIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+// Get information about available intents. nMax is the maximum space for the supplied "Codes"
+// and "Descriptions" the function returns the total number of intents, which may be greater
+// than nMax, although the matrices are not populated beyond this level.
+cmsUInt32Number CMSEXPORT cmsGetSupportedIntentsTHR(cmsContext ContextID, cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions)
+{
+    _cmsIntentsPluginChunkType* ctx = ( _cmsIntentsPluginChunkType*) _cmsContextGetClientChunk(ContextID, IntentPlugin);
+    cmsIntentsList* pt;
+    cmsUInt32Number nIntents;
+
+
+    for (nIntents=0, pt = ctx->Intents; pt != NULL; pt = pt -> Next)
+    {
+        if (nIntents < nMax) {
+            if (Codes != NULL)
+                Codes[nIntents] = pt ->Intent;
+
+            if (Descriptions != NULL)
+                Descriptions[nIntents] = pt ->Description;
+        }
+
+        nIntents++;
+    }
+
+    for (nIntents=0, pt = DefaultIntents; pt != NULL; pt = pt -> Next)
+    {
+        if (nIntents < nMax) {
+            if (Codes != NULL)
+                Codes[nIntents] = pt ->Intent;
+
+            if (Descriptions != NULL)
+                Descriptions[nIntents] = pt ->Description;
+        }
+
+        nIntents++;
+    }
+    return nIntents;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetSupportedIntents(cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions)
+{
+    return cmsGetSupportedIntentsTHR(NULL, nMax, Codes, Descriptions);
+}
+
+// The plug-in registration. User can add new intents or override default routines
+cmsBool  _cmsRegisterRenderingIntentPlugin(cmsContext id, cmsPluginBase* Data)
+{
+    _cmsIntentsPluginChunkType* ctx = ( _cmsIntentsPluginChunkType*) _cmsContextGetClientChunk(id, IntentPlugin);
+    cmsPluginRenderingIntent* Plugin = (cmsPluginRenderingIntent*) Data;
+    cmsIntentsList* fl;
+
+    // Do we have to reset the custom intents?
+    if (Data == NULL) {
+
+        ctx->Intents = NULL;
+        return TRUE;
+    }
+
+    fl = (cmsIntentsList*) _cmsPluginMalloc(id, sizeof(cmsIntentsList));
+    if (fl == NULL) return FALSE;
+
+
+    fl ->Intent  = Plugin ->Intent;
+    strncpy(fl ->Description, Plugin ->Description, sizeof(fl ->Description)-1);
+    fl ->Description[sizeof(fl ->Description)-1] = 0;
+
+    fl ->Link    = Plugin ->Link;
+
+    fl ->Next = ctx ->Intents;
+    ctx ->Intents = fl;
+
+    return TRUE;
+}
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp
new file mode 100644
index 0000000000..bb386eaaf2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmserr.cpp
@@ -0,0 +1,663 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#include "lcms2_internal.h"
+
+
+// This function is here to help applications to prevent mixing lcms versions on header and shared objects.
+int CMSEXPORT cmsGetEncodedCMMversion(void)
+{
+       return LCMS_VERSION;
+}
+
+// I am so tired about incompatibilities on those functions that here are some replacements
+// that hopefully would be fully portable.
+
+// compare two strings ignoring case
+int CMSEXPORT cmsstrcasecmp(const char* s1, const char* s2)
+{
+    CMSREGISTER const unsigned char *us1 = (const unsigned char *)s1,
+                                 *us2 = (const unsigned char *)s2;
+
+    while (toupper(*us1) == toupper(*us2++))
+        if (*us1++ == '\0')
+            return 0;
+
+    return (toupper(*us1) - toupper(*--us2));
+}
+
+// long int because C99 specifies ftell in such way (7.19.9.2)
+long int CMSEXPORT cmsfilelength(FILE* f)
+{
+    long int p , n;
+
+    p = ftell(f); // register current file position
+    if (p == -1L) 
+        return -1L;
+
+    if (fseek(f, 0, SEEK_END) != 0) {
+        return -1L;
+    }
+
+    n = ftell(f);
+    fseek(f, p, SEEK_SET); // file position restored
+
+    return n;
+}
+
+
+// Memory handling ------------------------------------------------------------------
+//
+// This is the interface to low-level memory management routines. By default a simple
+// wrapping to malloc/free/realloc is provided, although there is a limit on the max
+// amount of memoy that can be reclaimed. This is mostly as a safety feature to prevent 
+// bogus or evil code to allocate huge blocks that otherwise lcms would never need.
+
+#define MAX_MEMORY_FOR_ALLOC  ((cmsUInt32Number)(1024U*1024U*512U))
+
+// User may override this behaviour by using a memory plug-in, which basically replaces
+// the default memory management functions. In this case, no check is performed and it
+// is up to the plug-in writter to keep in the safe side. There are only three functions
+// required to be implemented: malloc, realloc and free, although the user may want to
+// replace the optional mallocZero, calloc and dup as well.
+
+cmsBool   _cmsRegisterMemHandlerPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// *********************************************************************************
+
+// This is the default memory allocation function. It does a very coarse
+// check of amount of memory, just to prevent exploits
+static
+void* _cmsMallocDefaultFn(cmsContext ContextID, cmsUInt32Number size)
+{
+    if (size > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never allow over maximum
+
+    return (void*) malloc(size);
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+// Generic allocate & zero
+static
+void* _cmsMallocZeroDefaultFn(cmsContext ContextID, cmsUInt32Number size)
+{
+    void *pt = _cmsMalloc(ContextID, size);
+    if (pt == NULL) return NULL;
+
+    memset(pt, 0, size);
+    return pt;
+}
+
+
+// The default free function. The only check proformed is against NULL pointers
+static
+void _cmsFreeDefaultFn(cmsContext ContextID, void *Ptr)
+{
+    // free(NULL) is defined a no-op by C99, therefore it is safe to
+    // avoid the check, but it is here just in case...
+
+    if (Ptr) free(Ptr);
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+// The default realloc function. Again it checks for exploits. If Ptr is NULL,
+// realloc behaves the same way as malloc and allocates a new block of size bytes.
+static
+void* _cmsReallocDefaultFn(cmsContext ContextID, void* Ptr, cmsUInt32Number size)
+{
+
+    if (size > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never realloc over 512Mb
+
+    return realloc(Ptr, size);
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+
+// The default calloc function. Allocates an array of num elements, each one of size bytes
+// all memory is initialized to zero.
+static
+void* _cmsCallocDefaultFn(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size)
+{
+    cmsUInt32Number Total = num * size;
+
+    // Preserve calloc behaviour
+    if (Total == 0) return NULL;
+
+    // Safe check for overflow.
+    if (num >= UINT_MAX / size) return NULL;
+
+    // Check for overflow
+    if (Total < num || Total < size) {
+        return NULL;
+    }
+
+    if (Total > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never alloc over 512Mb
+
+    return _cmsMallocZero(ContextID, Total);
+}
+
+// Generic block duplication
+static
+void* _cmsDupDefaultFn(cmsContext ContextID, const void* Org, cmsUInt32Number size)
+{
+    void* mem;
+
+    if (size > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never dup over 512Mb
+
+    mem = _cmsMalloc(ContextID, size);
+
+    if (mem != NULL && Org != NULL)
+        memmove(mem, Org, size);
+
+    return mem;
+}
+
+
+// Pointers to memory manager functions in Context0
+_cmsMemPluginChunkType _cmsMemPluginChunk = { _cmsMallocDefaultFn, _cmsMallocZeroDefaultFn, _cmsFreeDefaultFn, 
+                                              _cmsReallocDefaultFn, _cmsCallocDefaultFn,    _cmsDupDefaultFn
+                                            };
+
+
+// Reset and duplicate memory manager
+void _cmsAllocMemPluginChunk(struct _cmsContext_struct* ctx, const struct _cmsContext_struct* src)
+{
+    _cmsAssert(ctx != NULL);
+
+    if (src != NULL) {    
+
+        // Duplicate
+        ctx ->chunks[MemPlugin] = _cmsSubAllocDup(ctx ->MemPool, src ->chunks[MemPlugin], sizeof(_cmsMemPluginChunkType));  
+    }
+    else {
+
+        // To reset it, we use the default allocators, which cannot be overridden
+        ctx ->chunks[MemPlugin] = &ctx ->DefaultMemoryManager;
+    } 
+}
+
+// Auxiliary to fill memory management functions from plugin (or context 0 defaults)
+void _cmsInstallAllocFunctions(cmsPluginMemHandler* Plugin, _cmsMemPluginChunkType* ptr)
+{
+    if (Plugin == NULL) {
+
+        memcpy(ptr, &_cmsMemPluginChunk, sizeof(_cmsMemPluginChunk));
+    }
+    else {
+
+        ptr ->MallocPtr  = Plugin -> MallocPtr;
+        ptr ->FreePtr    = Plugin -> FreePtr;
+        ptr ->ReallocPtr = Plugin -> ReallocPtr;
+
+        // Make sure we revert to defaults
+        ptr ->MallocZeroPtr= _cmsMallocZeroDefaultFn;
+        ptr ->CallocPtr    = _cmsCallocDefaultFn;
+        ptr ->DupPtr       = _cmsDupDefaultFn;
+      
+        if (Plugin ->MallocZeroPtr != NULL) ptr ->MallocZeroPtr = Plugin -> MallocZeroPtr;
+        if (Plugin ->CallocPtr != NULL)     ptr ->CallocPtr     = Plugin -> CallocPtr;
+        if (Plugin ->DupPtr != NULL)        ptr ->DupPtr        = Plugin -> DupPtr;
+        
+    }
+}
+
+
+// Plug-in replacement entry
+cmsBool  _cmsRegisterMemHandlerPlugin(cmsContext ContextID, cmsPluginBase *Data)
+{
+    cmsPluginMemHandler* Plugin = (cmsPluginMemHandler*) Data;     
+    _cmsMemPluginChunkType* ptr;
+
+    // NULL forces to reset to defaults. In this special case, the defaults are stored in the context structure. 
+    // Remaining plug-ins does NOT have any copy in the context structure, but this is somehow special as the
+    // context internal data should be malloce'd by using those functions. 
+    if (Data == NULL) {
+
+       struct _cmsContext_struct* ctx = ( struct _cmsContext_struct*) ContextID;
+
+       // Return to the default allocators
+        if (ContextID != NULL) {
+            ctx->chunks[MemPlugin] = (void*) &ctx->DefaultMemoryManager;
+        }
+        return TRUE;
+    }
+
+    // Check for required callbacks
+    if (Plugin -> MallocPtr == NULL ||
+        Plugin -> FreePtr == NULL ||
+        Plugin -> ReallocPtr == NULL) return FALSE;
+
+    // Set replacement functions
+    ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    if (ptr == NULL) 
+        return FALSE;
+
+    _cmsInstallAllocFunctions(Plugin, ptr);
+    return TRUE;
+}
+
+// Generic allocate
+void* CMSEXPORT _cmsMalloc(cmsContext ContextID, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr ->MallocPtr(ContextID, size);
+}
+
+// Generic allocate & zero
+void* CMSEXPORT _cmsMallocZero(cmsContext ContextID, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr->MallocZeroPtr(ContextID, size);
+}
+
+// Generic calloc
+void* CMSEXPORT _cmsCalloc(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr->CallocPtr(ContextID, num, size);
+}
+
+// Generic reallocate
+void* CMSEXPORT _cmsRealloc(cmsContext ContextID, void* Ptr, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr->ReallocPtr(ContextID, Ptr, size);
+}
+
+// Generic free memory
+void CMSEXPORT _cmsFree(cmsContext ContextID, void* Ptr)
+{
+    if (Ptr != NULL) {
+        _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+        ptr ->FreePtr(ContextID, Ptr);
+    }
+}
+
+// Generic block duplication
+void* CMSEXPORT _cmsDupMem(cmsContext ContextID, const void* Org, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr ->DupPtr(ContextID, Org, size);
+}
+
+// ********************************************************************************************
+
+// Sub allocation takes care of many pointers of small size. The memory allocated in
+// this way have be freed at once. Next function allocates a single chunk for linked list
+// I prefer this method over realloc due to the big inpact on xput realloc may have if
+// memory is being swapped to disk. This approach is safer (although that may not be true on all platforms)
+static
+_cmsSubAllocator_chunk* _cmsCreateSubAllocChunk(cmsContext ContextID, cmsUInt32Number Initial)
+{
+    _cmsSubAllocator_chunk* chunk;
+
+    // 20K by default
+    if (Initial == 0)
+        Initial = 20*1024;
+
+    // Create the container
+    chunk = (_cmsSubAllocator_chunk*) _cmsMallocZero(ContextID, sizeof(_cmsSubAllocator_chunk));
+    if (chunk == NULL) return NULL;
+
+    // Initialize values
+    chunk ->Block     = (cmsUInt8Number*) _cmsMalloc(ContextID, Initial);
+    if (chunk ->Block == NULL) {
+
+        // Something went wrong
+        _cmsFree(ContextID, chunk);
+        return NULL;
+    }
+
+    chunk ->BlockSize = Initial;
+    chunk ->Used      = 0;
+    chunk ->next      = NULL;
+
+    return chunk;
+}
+
+// The suballocated is nothing but a pointer to the first element in the list. We also keep
+// the thread ID in this structure.
+_cmsSubAllocator* _cmsCreateSubAlloc(cmsContext ContextID, cmsUInt32Number Initial)
+{
+    _cmsSubAllocator* sub;
+
+    // Create the container
+    sub = (_cmsSubAllocator*) _cmsMallocZero(ContextID, sizeof(_cmsSubAllocator));
+    if (sub == NULL) return NULL;
+
+    sub ->ContextID = ContextID;
+
+    sub ->h = _cmsCreateSubAllocChunk(ContextID, Initial);
+    if (sub ->h == NULL) {
+        _cmsFree(ContextID, sub);
+        return NULL;
+    }
+
+    return sub;
+}
+
+
+// Get rid of whole linked list
+void _cmsSubAllocDestroy(_cmsSubAllocator* sub)
+{
+    _cmsSubAllocator_chunk *chunk, *n;
+
+    for (chunk = sub ->h; chunk != NULL; chunk = n) {
+
+        n = chunk->next;
+        if (chunk->Block != NULL) _cmsFree(sub ->ContextID, chunk->Block);
+        _cmsFree(sub ->ContextID, chunk);
+    }
+
+    // Free the header
+    _cmsFree(sub ->ContextID, sub);
+}
+
+
+// Get a pointer to small memory block.
+void*  _cmsSubAlloc(_cmsSubAllocator* sub, cmsUInt32Number size)
+{
+    cmsUInt32Number Free = sub -> h ->BlockSize - sub -> h -> Used;
+    cmsUInt8Number* ptr;
+
+    size = _cmsALIGNMEM(size);
+
+    // Check for memory. If there is no room, allocate a new chunk of double memory size.
+    if (size > Free) {
+
+        _cmsSubAllocator_chunk* chunk;
+        cmsUInt32Number newSize;
+
+        newSize = sub -> h ->BlockSize * 2;
+        if (newSize < size) newSize = size;
+
+        chunk = _cmsCreateSubAllocChunk(sub -> ContextID, newSize);
+        if (chunk == NULL) return NULL;
+
+        // Link list
+        chunk ->next = sub ->h;
+        sub ->h    = chunk;
+
+    }
+
+    ptr =  sub -> h ->Block + sub -> h ->Used;
+    sub -> h -> Used += size;
+
+    return (void*) ptr;
+}
+
+// Duplicate in pool
+void* _cmsSubAllocDup(_cmsSubAllocator* s, const void *ptr, cmsUInt32Number size)
+{
+    void *NewPtr;
+    
+    // Dup of null pointer is also NULL
+    if (ptr == NULL)
+        return NULL;
+
+    NewPtr = _cmsSubAlloc(s, size);
+
+    if (ptr != NULL && NewPtr != NULL) {
+        memcpy(NewPtr, ptr, size);
+    }
+
+    return NewPtr;
+}
+
+
+
+// Error logging ******************************************************************
+
+// There is no error handling at all. When a function fails, it returns proper value.
+// For example, all create functions does return NULL on failure. Other return FALSE
+// It may be interesting, for the developer, to know why the function is failing.
+// for that reason, lcms2 does offer a logging function. This function does receive
+// a ENGLISH string with some clues on what is going wrong. You can show this
+// info to the end user, or just create some sort of log.
+// The logging function should NOT terminate the program, as this obviously can leave
+// resources. It is the programmer's responsibility to check each function return code
+// to make sure it didn't fail.
+
+// Error messages are limited to MAX_ERROR_MESSAGE_LEN
+
+#define MAX_ERROR_MESSAGE_LEN   1024
+
+// ---------------------------------------------------------------------------------------------------------
+
+// This is our default log error
+static void DefaultLogErrorHandlerFunction(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text);
+
+// Context0 storage, which is global
+_cmsLogErrorChunkType _cmsLogErrorChunk = { DefaultLogErrorHandlerFunction };
+
+// Allocates and inits error logger container for a given context. If src is NULL, only initializes the value
+// to the default. Otherwise, it duplicates the value. The interface is standard across all context clients
+void _cmsAllocLogErrorChunk(struct _cmsContext_struct* ctx, 
+                            const struct _cmsContext_struct* src)
+{    
+    static _cmsLogErrorChunkType LogErrorChunk = { DefaultLogErrorHandlerFunction };
+    void* from;
+     
+     if (src != NULL) {
+        from = src ->chunks[Logger];       
+    }
+    else {
+       from = &LogErrorChunk;
+    }
+    
+    ctx ->chunks[Logger] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsLogErrorChunkType));   
+}
+
+// The default error logger does nothing.
+static
+void DefaultLogErrorHandlerFunction(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text)
+{
+    // fprintf(stderr, "[lcms]: %s\n", Text);
+    // fflush(stderr);
+
+     cmsUNUSED_PARAMETER(ContextID);
+     cmsUNUSED_PARAMETER(ErrorCode);
+     cmsUNUSED_PARAMETER(Text);
+}
+
+// Change log error, context based
+void CMSEXPORT cmsSetLogErrorHandlerTHR(cmsContext ContextID, cmsLogErrorHandlerFunction Fn)
+{
+    _cmsLogErrorChunkType* lhg = (_cmsLogErrorChunkType*) _cmsContextGetClientChunk(ContextID, Logger);
+
+    if (lhg != NULL) {
+
+        if (Fn == NULL)
+            lhg -> LogErrorHandler = DefaultLogErrorHandlerFunction;
+        else
+            lhg -> LogErrorHandler = Fn;
+    }
+}
+
+// Change log error, legacy
+void CMSEXPORT cmsSetLogErrorHandler(cmsLogErrorHandlerFunction Fn)
+{
+    cmsSetLogErrorHandlerTHR(NULL, Fn);    
+}
+
+// Log an error
+// ErrorText is a text holding an english description of error.
+void CMSEXPORT cmsSignalError(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *ErrorText, ...)
+{
+    va_list args;
+    char Buffer[MAX_ERROR_MESSAGE_LEN];
+    _cmsLogErrorChunkType* lhg;
+
+
+    va_start(args, ErrorText);
+    vsnprintf(Buffer, MAX_ERROR_MESSAGE_LEN-1, ErrorText, args);
+    va_end(args);
+
+    // Check for the context, if specified go there. If not, go for the global
+    lhg = (_cmsLogErrorChunkType*) _cmsContextGetClientChunk(ContextID, Logger);
+    if (lhg ->LogErrorHandler) {
+        lhg ->LogErrorHandler(ContextID, ErrorCode, Buffer);
+    }   
+}
+
+// Utility function to print signatures
+void _cmsTagSignature2String(char String[5], cmsTagSignature sig)
+{
+    cmsUInt32Number be;
+
+    // Convert to big endian
+    be = _cmsAdjustEndianess32((cmsUInt32Number) sig);
+
+    // Move chars
+    memmove(String, &be, 4);
+
+    // Make sure of terminator
+    String[4] = 0;
+}
+
+//--------------------------------------------------------------------------------------------------
+
+
+static
+void* defMtxCreate(cmsContext id)
+{
+    _cmsMutex* ptr_mutex = (_cmsMutex*) _cmsMalloc(id, sizeof(_cmsMutex));
+    _cmsInitMutexPrimitive(ptr_mutex);
+    return (void*) ptr_mutex;   
+}
+
+static
+void defMtxDestroy(cmsContext id, void* mtx)
+{
+    _cmsDestroyMutexPrimitive((_cmsMutex *) mtx); 
+    _cmsFree(id, mtx);
+}
+
+static
+cmsBool defMtxLock(cmsContext id, void* mtx)
+{
+    cmsUNUSED_PARAMETER(id);
+    return _cmsLockPrimitive((_cmsMutex *) mtx) == 0;     
+}
+
+static
+void defMtxUnlock(cmsContext id, void* mtx)
+{
+    cmsUNUSED_PARAMETER(id);
+    _cmsUnlockPrimitive((_cmsMutex *) mtx); 
+}
+
+
+
+// Pointers to memory manager functions in Context0
+_cmsMutexPluginChunkType _cmsMutexPluginChunk = { defMtxCreate, defMtxDestroy, defMtxLock, defMtxUnlock };
+
+// Allocate and init mutex container.
+void _cmsAllocMutexPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src)
+{
+    static _cmsMutexPluginChunkType MutexChunk = {defMtxCreate, defMtxDestroy, defMtxLock, defMtxUnlock };
+    void* from;
+     
+     if (src != NULL) {
+        from = src ->chunks[MutexPlugin];       
+    }
+    else {
+       from = &MutexChunk;
+    }
+    
+    ctx ->chunks[MutexPlugin] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsMutexPluginChunkType));   
+}
+
+// Register new ways to transform
+cmsBool  _cmsRegisterMutexPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginMutex* Plugin = (cmsPluginMutex*) Data;
+    _cmsMutexPluginChunkType* ctx = ( _cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (Data == NULL) {
+
+        // No lock routines
+        ctx->CreateMutexPtr = NULL; 
+        ctx->DestroyMutexPtr = NULL; 
+        ctx->LockMutexPtr = NULL;
+        ctx ->UnlockMutexPtr = NULL;
+        return TRUE;
+    }
+
+    // Factory callback is required
+    if (Plugin ->CreateMutexPtr == NULL || Plugin ->DestroyMutexPtr == NULL || 
+        Plugin ->LockMutexPtr == NULL || Plugin ->UnlockMutexPtr == NULL) return FALSE;
+
+
+    ctx->CreateMutexPtr  = Plugin->CreateMutexPtr;
+    ctx->DestroyMutexPtr = Plugin ->DestroyMutexPtr;
+    ctx ->LockMutexPtr   = Plugin ->LockMutexPtr;
+    ctx ->UnlockMutexPtr = Plugin ->UnlockMutexPtr;
+
+    // All is ok
+    return TRUE;
+}
+
+// Generic Mutex fns
+void* CMSEXPORT _cmsCreateMutex(cmsContext ContextID)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->CreateMutexPtr == NULL) return NULL;
+
+    return ptr ->CreateMutexPtr(ContextID);
+}
+
+void CMSEXPORT _cmsDestroyMutex(cmsContext ContextID, void* mtx)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->DestroyMutexPtr != NULL) {
+
+        ptr ->DestroyMutexPtr(ContextID, mtx);
+    }
+}
+
+cmsBool CMSEXPORT _cmsLockMutex(cmsContext ContextID, void* mtx)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->LockMutexPtr == NULL) return TRUE;
+
+    return ptr ->LockMutexPtr(ContextID, mtx);
+}
+
+void CMSEXPORT _cmsUnlockMutex(cmsContext ContextID, void* mtx)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->UnlockMutexPtr != NULL) {
+
+        ptr ->UnlockMutexPtr(ContextID, mtx);
+    }
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp
new file mode 100644
index 0000000000..8bd212c6e6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgamma.cpp
@@ -0,0 +1,1433 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2013 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+#include "lcms2_internal.h"
+
+// Tone curves are powerful constructs that can contain curves specified in diverse ways.
+// The curve is stored in segments, where each segment can be sampled or specified by parameters.
+// a 16.bit simplification of the *whole* curve is kept for optimization purposes. For float operation,
+// each segment is evaluated separately. Plug-ins may be used to define new parametric schemes,
+// each plug-in may define up to MAX_TYPES_IN_LCMS_PLUGIN functions types. For defining a function,
+// the plug-in should provide the type id, how many parameters each type has, and a pointer to
+// a procedure that evaluates the function. In the case of reverse evaluation, the evaluator will
+// be called with the type id as a negative value, and a sampled version of the reversed curve
+// will be built.
+
+// ----------------------------------------------------------------- Implementation
+// Maxim number of nodes
+#define MAX_NODES_IN_CURVE   4097
+#define MINUS_INF            (-1E22F)
+#define PLUS_INF             (+1E22F)
+
+// The list of supported parametric curves
+typedef struct _cmsParametricCurvesCollection_st {
+
+    cmsUInt32Number nFunctions;                                     // Number of supported functions in this chunk
+    cmsInt32Number  FunctionTypes[MAX_TYPES_IN_LCMS_PLUGIN];        // The identification types
+    cmsUInt32Number ParameterCount[MAX_TYPES_IN_LCMS_PLUGIN];       // Number of parameters for each function
+
+    cmsParametricCurveEvaluator Evaluator;                          // The evaluator
+
+    struct _cmsParametricCurvesCollection_st* Next; // Next in list
+
+} _cmsParametricCurvesCollection;
+
+// This is the default (built-in) evaluator
+static cmsFloat64Number DefaultEvalParametricFn(cmsInt32Number Type, const cmsFloat64Number Params[], cmsFloat64Number R);
+
+// The built-in list
+static _cmsParametricCurvesCollection DefaultCurves = {
+    9,                                  // # of curve types
+    { 1, 2, 3, 4, 5, 6, 7, 8, 108 },    // Parametric curve ID
+    { 1, 3, 4, 5, 7, 4, 5, 5, 1 },      // Parameters by type
+    DefaultEvalParametricFn,            // Evaluator
+    NULL                                // Next in chain
+};
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginCurvesList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsCurvesPluginChunkType newHead = { NULL };
+   _cmsParametricCurvesCollection*  entry;
+   _cmsParametricCurvesCollection*  Anterior = NULL;
+   _cmsCurvesPluginChunkType* head = (_cmsCurvesPluginChunkType*) src->chunks[CurvesPlugin];
+
+    _cmsAssert(head != NULL);
+
+    // Walk the list copying all nodes
+   for (entry = head->ParametricCurves;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            _cmsParametricCurvesCollection *newEntry = ( _cmsParametricCurvesCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsParametricCurvesCollection));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.ParametricCurves == NULL)
+                newHead.ParametricCurves = newEntry;
+    }
+
+  ctx ->chunks[CurvesPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsCurvesPluginChunkType));
+}
+
+// The allocator have to follow the chain
+void _cmsAllocCurvesPluginChunk(struct _cmsContext_struct* ctx, 
+                                const struct _cmsContext_struct* src)
+{
+    _cmsAssert(ctx != NULL);
+
+    if (src != NULL) {
+
+        // Copy all linked list
+       DupPluginCurvesList(ctx, src);
+    }
+    else {
+        static _cmsCurvesPluginChunkType CurvesPluginChunk = { NULL };
+        ctx ->chunks[CurvesPlugin] = _cmsSubAllocDup(ctx ->MemPool, &CurvesPluginChunk, sizeof(_cmsCurvesPluginChunkType));
+    }
+}
+
+
+// The linked list head
+_cmsCurvesPluginChunkType _cmsCurvesPluginChunk = { NULL };
+
+// As a way to install new parametric curves
+cmsBool _cmsRegisterParametricCurvesPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    _cmsCurvesPluginChunkType* ctx = ( _cmsCurvesPluginChunkType*) _cmsContextGetClientChunk(ContextID, CurvesPlugin);
+    cmsPluginParametricCurves* Plugin = (cmsPluginParametricCurves*) Data;
+    _cmsParametricCurvesCollection* fl;
+
+    if (Data == NULL) {
+
+          ctx -> ParametricCurves =  NULL;
+          return TRUE;
+    }
+
+    fl = (_cmsParametricCurvesCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsParametricCurvesCollection));
+    if (fl == NULL) return FALSE;
+
+    // Copy the parameters
+    fl ->Evaluator  = Plugin ->Evaluator;
+    fl ->nFunctions = Plugin ->nFunctions;
+
+    // Make sure no mem overwrites
+    if (fl ->nFunctions > MAX_TYPES_IN_LCMS_PLUGIN)
+        fl ->nFunctions = MAX_TYPES_IN_LCMS_PLUGIN;
+
+    // Copy the data
+    memmove(fl->FunctionTypes,  Plugin ->FunctionTypes,   fl->nFunctions * sizeof(cmsUInt32Number));
+    memmove(fl->ParameterCount, Plugin ->ParameterCount,  fl->nFunctions * sizeof(cmsUInt32Number));
+
+    // Keep linked list
+    fl ->Next = ctx->ParametricCurves;
+    ctx->ParametricCurves = fl;
+
+    // All is ok
+    return TRUE;
+}
+
+
+// Search in type list, return position or -1 if not found
+static
+int IsInSet(int Type, _cmsParametricCurvesCollection* c)
+{
+    int i;
+
+    for (i=0; i < (int) c ->nFunctions; i++)
+        if (abs(Type) == c ->FunctionTypes[i]) return i;
+
+    return -1;
+}
+
+
+// Search for the collection which contains a specific type
+static
+_cmsParametricCurvesCollection *GetParametricCurveByType(cmsContext ContextID, int Type, int* index)
+{
+    _cmsParametricCurvesCollection* c;
+    int Position;
+    _cmsCurvesPluginChunkType* ctx = ( _cmsCurvesPluginChunkType*) _cmsContextGetClientChunk(ContextID, CurvesPlugin);
+
+    for (c = ctx->ParametricCurves; c != NULL; c = c ->Next) {
+
+        Position = IsInSet(Type, c);
+
+        if (Position != -1) {
+            if (index != NULL)
+                *index = Position;
+            return c;
+        }
+    }
+    // If none found, revert for defaults
+    for (c = &DefaultCurves; c != NULL; c = c ->Next) {
+
+        Position = IsInSet(Type, c);
+
+        if (Position != -1) {
+            if (index != NULL)
+                *index = Position;
+            return c;
+        }
+    }
+
+    return NULL;
+}
+
+// Low level allocate, which takes care of memory details. nEntries may be zero, and in this case
+// no optimation curve is computed. nSegments may also be zero in the inverse case, where only the
+// optimization curve is given. Both features simultaneously is an error
+static
+cmsToneCurve* AllocateToneCurveStruct(cmsContext ContextID, cmsUInt32Number nEntries,
+                                      cmsUInt32Number nSegments, const cmsCurveSegment* Segments,
+                                      const cmsUInt16Number* Values)
+{
+    cmsToneCurve* p;
+    cmsUInt32Number i;
+
+    // We allow huge tables, which are then restricted for smoothing operations
+    if (nEntries > 65530) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Couldn't create tone curve of more than 65530 entries");
+        return NULL;
+    }
+
+    if (nEntries == 0 && nSegments == 0) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Couldn't create tone curve with zero segments and no table");
+        return NULL;
+    }
+
+    // Allocate all required pointers, etc.
+    p = (cmsToneCurve*) _cmsMallocZero(ContextID, sizeof(cmsToneCurve));
+    if (!p) return NULL;
+
+    // In this case, there are no segments
+    if (nSegments == 0) {
+        p ->Segments = NULL;
+        p ->Evals = NULL;
+    }
+    else {
+        p ->Segments = (cmsCurveSegment*) _cmsCalloc(ContextID, nSegments, sizeof(cmsCurveSegment));
+        if (p ->Segments == NULL) goto Error;
+
+        p ->Evals    = (cmsParametricCurveEvaluator*) _cmsCalloc(ContextID, nSegments, sizeof(cmsParametricCurveEvaluator));
+        if (p ->Evals == NULL) goto Error;
+    }
+
+    p -> nSegments = nSegments;
+
+    // This 16-bit table contains a limited precision representation of the whole curve and is kept for
+    // increasing xput on certain operations.
+    if (nEntries == 0) {
+        p ->Table16 = NULL;
+    }
+    else {
+       p ->Table16 = (cmsUInt16Number*)  _cmsCalloc(ContextID, nEntries, sizeof(cmsUInt16Number));
+       if (p ->Table16 == NULL) goto Error;
+    }
+
+    p -> nEntries  = nEntries;
+
+    // Initialize members if requested
+    if (Values != NULL && (nEntries > 0)) {
+
+        for (i=0; i < nEntries; i++)
+            p ->Table16[i] = Values[i];
+    }
+
+    // Initialize the segments stuff. The evaluator for each segment is located and a pointer to it
+    // is placed in advance to maximize performance.
+    if (Segments != NULL && (nSegments > 0)) {
+
+        _cmsParametricCurvesCollection *c;
+
+        p ->SegInterp = (cmsInterpParams**) _cmsCalloc(ContextID, nSegments, sizeof(cmsInterpParams*));
+        if (p ->SegInterp == NULL) goto Error;
+
+        for (i=0; i < nSegments; i++) {
+
+            // Type 0 is a special marker for table-based curves
+            if (Segments[i].Type == 0)
+                p ->SegInterp[i] = _cmsComputeInterpParams(ContextID, Segments[i].nGridPoints, 1, 1, NULL, CMS_LERP_FLAGS_FLOAT);
+
+            memmove(&p ->Segments[i], &Segments[i], sizeof(cmsCurveSegment));
+
+            if (Segments[i].Type == 0 && Segments[i].SampledPoints != NULL)
+                p ->Segments[i].SampledPoints = (cmsFloat32Number*) _cmsDupMem(ContextID, Segments[i].SampledPoints, sizeof(cmsFloat32Number) * Segments[i].nGridPoints);
+            else
+                p ->Segments[i].SampledPoints = NULL;
+
+
+            c = GetParametricCurveByType(ContextID, Segments[i].Type, NULL);
+            if (c != NULL)
+                    p ->Evals[i] = c ->Evaluator;
+        }
+    }
+
+    p ->InterpParams = _cmsComputeInterpParams(ContextID, p ->nEntries, 1, 1, p->Table16, CMS_LERP_FLAGS_16BITS);
+    if (p->InterpParams != NULL)
+        return p;
+
+Error:
+    if (p -> Segments) _cmsFree(ContextID, p ->Segments);
+    if (p -> Evals) _cmsFree(ContextID, p -> Evals);
+    if (p ->Table16) _cmsFree(ContextID, p ->Table16);
+    _cmsFree(ContextID, p);
+    return NULL;
+}
+
+
+// Parametric Fn using floating point
+static
+cmsFloat64Number DefaultEvalParametricFn(cmsInt32Number Type, const cmsFloat64Number Params[], cmsFloat64Number R)
+{
+    cmsFloat64Number e, Val, disc;
+
+    switch (Type) {
+
+   // X = Y ^ Gamma
+    case 1:
+        if (R < 0) {
+
+            if (fabs(Params[0] - 1.0) < MATRIX_DET_TOLERANCE)
+                Val = R;
+            else
+                Val = 0;
+        }
+        else
+            Val = pow(R, Params[0]);
+        break;
+
+    // Type 1 Reversed: X = Y ^1/gamma
+    case -1:
+        if (R < 0) {
+
+            if (fabs(Params[0] - 1.0) < MATRIX_DET_TOLERANCE)
+                Val = R;
+            else
+                Val = 0;
+        }
+        else
+        {
+            if (fabs(Params[0]) < MATRIX_DET_TOLERANCE)
+                Val = PLUS_INF;
+            else
+                Val = pow(R, 1 / Params[0]);
+        }
+        break;
+
+    // CIE 122-1966
+    // Y = (aX + b)^Gamma  | X >= -b/a
+    // Y = 0               | else
+    case 2:
+    {
+
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            disc = -Params[2] / Params[1];
+
+            if (R >= disc) {
+
+                e = Params[1] * R + Params[2];
+
+                if (e > 0)
+                    Val = pow(e, Params[0]);
+                else
+                    Val = 0;
+            }
+            else
+                Val = 0;
+        }
+    }
+    break;
+
+     // Type 2 Reversed
+     // X = (Y ^1/g  - b) / a
+     case -2:
+     {
+         if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+             fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+         {
+             Val = 0;
+         }
+         else
+         {
+             if (R < 0)
+                 Val = 0;
+             else
+                 Val = (pow(R, 1.0 / Params[0]) - Params[2]) / Params[1];
+
+             if (Val < 0)
+                 Val = 0;
+         }
+     }         
+     break;
+
+
+    // IEC 61966-3
+    // Y = (aX + b)^Gamma | X <= -b/a
+    // Y = c              | else
+    case 3:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            disc = -Params[2] / Params[1];
+            if (disc < 0)
+                disc = 0;
+
+            if (R >= disc) {
+
+                e = Params[1] * R + Params[2];
+
+                if (e > 0)
+                    Val = pow(e, Params[0]) + Params[3];
+                else
+                    Val = 0;
+            }
+            else
+                Val = Params[3];
+        }
+    }
+    break;
+
+
+    // Type 3 reversed
+    // X=((Y-c)^1/g - b)/a      | (Y>=c)
+    // X=-b/a                   | (Y<c)
+    case -3:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            if (R >= Params[3]) {
+
+                e = R - Params[3];
+
+                if (e > 0)
+                    Val = (pow(e, 1 / Params[0]) - Params[2]) / Params[1];
+                else
+                    Val = 0;
+            }
+            else {
+                Val = -Params[2] / Params[1];
+            }
+        }
+    }
+    break;
+
+
+    // IEC 61966-2.1 (sRGB)
+    // Y = (aX + b)^Gamma | X >= d
+    // Y = cX             | X < d
+    case 4:
+        if (R >= Params[4]) {
+
+            e = Params[1]*R + Params[2];
+
+            if (e > 0)
+                Val = pow(e, Params[0]);
+            else
+                Val = 0;
+        }
+        else
+            Val = R * Params[3];
+        break;
+
+    // Type 4 reversed
+    // X=((Y^1/g-b)/a)    | Y >= (ad+b)^g
+    // X=Y/c              | Y< (ad+b)^g
+    case -4:
+    {
+        if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[1]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[3]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            e = Params[1] * Params[4] + Params[2];
+            if (e < 0)
+                disc = 0;
+            else
+                disc = pow(e, Params[0]);
+
+            if (R >= disc) {
+
+                Val = (pow(R, 1.0 / Params[0]) - Params[2]) / Params[1];
+            }
+            else {
+                Val = R / Params[3];
+            }
+        }
+    }
+    break;
+
+
+    // Y = (aX + b)^Gamma + e | X >= d
+    // Y = cX + f             | X < d
+    case 5:
+        if (R >= Params[4]) {
+
+            e = Params[1]*R + Params[2];
+
+            if (e > 0)
+                Val = pow(e, Params[0]) + Params[5];
+            else
+                Val = Params[5];
+        }
+        else
+            Val = R*Params[3] + Params[6];
+        break;
+
+
+    // Reversed type 5
+    // X=((Y-e)1/g-b)/a   | Y >=(ad+b)^g+e), cd+f
+    // X=(Y-f)/c          | else
+    case -5:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[3]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            disc = Params[3] * Params[4] + Params[6];
+            if (R >= disc) {
+
+                e = R - Params[5];
+                if (e < 0)
+                    Val = 0;
+                else
+                    Val = (pow(e, 1.0 / Params[0]) - Params[2]) / Params[1];
+            }
+            else {
+                Val = (R - Params[6]) / Params[3];
+            }
+        }
+    }
+    break;
+
+
+    // Types 6,7,8 comes from segmented curves as described in ICCSpecRevision_02_11_06_Float.pdf
+    // Type 6 is basically identical to type 5 without d
+
+    // Y = (a * X + b) ^ Gamma + c
+    case 6:
+        e = Params[1]*R + Params[2];
+
+        if (e < 0)
+            Val = Params[3];
+        else
+            Val = pow(e, Params[0]) + Params[3];
+        break;
+
+    // ((Y - c) ^1/Gamma - b) / a
+    case -6:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            e = R - Params[3];
+            if (e < 0)
+                Val = 0;
+            else
+                Val = (pow(e, 1.0 / Params[0]) - Params[2]) / Params[1];
+        }
+    }
+    break;
+
+
+    // Y = a * log (b * X^Gamma + c) + d
+    case 7:
+
+       e = Params[2] * pow(R, Params[0]) + Params[3];
+       if (e <= 0)
+           Val = Params[4];
+       else
+           Val = Params[1]*log10(e) + Params[4];
+       break;
+
+    // (Y - d) / a = log(b * X ^Gamma + c)
+    // pow(10, (Y-d) / a) = b * X ^Gamma + c
+    // pow((pow(10, (Y-d) / a) - c) / b, 1/g) = X
+    case -7:
+    {
+        if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[1]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[2]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            Val = pow((pow(10.0, (R - Params[4]) / Params[1]) - Params[3]) / Params[2], 1.0 / Params[0]);
+        }
+    }
+    break;
+
+
+   //Y = a * b^(c*X+d) + e
+   case 8:
+       Val = (Params[0] * pow(Params[1], Params[2] * R + Params[3]) + Params[4]);
+       break;
+
+
+   // Y = (log((y-e) / a) / log(b) - d ) / c
+   // a=0, b=1, c=2, d=3, e=4,
+   case -8:
+
+       disc = R - Params[4];
+       if (disc < 0) Val = 0;
+       else
+       {
+           if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+               fabs(Params[2]) < MATRIX_DET_TOLERANCE)
+           {
+               Val = 0;
+           }
+           else
+           {
+               Val = (log(disc / Params[0]) / log(Params[1]) - Params[3]) / Params[2];
+           }
+       }
+       break;
+
+   // S-Shaped: (1 - (1-x)^1/g)^1/g
+   case 108:
+       if (fabs(Params[0]) < MATRIX_DET_TOLERANCE)
+           Val = 0;
+       else
+           Val = pow(1.0 - pow(1 - R, 1/Params[0]), 1/Params[0]);
+      break;
+
+    // y = (1 - (1-x)^1/g)^1/g
+    // y^g = (1 - (1-x)^1/g)
+    // 1 - y^g = (1-x)^1/g
+    // (1 - y^g)^g = 1 - x
+    // 1 - (1 - y^g)^g
+    case -108:
+        Val = 1 - pow(1 - pow(R, Params[0]), Params[0]);
+        break;
+
+    default:
+        // Unsupported parametric curve. Should never reach here
+        return 0;
+    }
+
+    return Val;
+}
+
+// Evaluate a segmented function for a single value. Return -Inf if no valid segment found .
+// If fn type is 0, perform an interpolation on the table
+static
+cmsFloat64Number EvalSegmentedFn(const cmsToneCurve *g, cmsFloat64Number R)
+{
+    int i;
+    cmsFloat32Number Out32;
+    cmsFloat64Number Out;
+
+    for (i = (int) g->nSegments - 1; i >= 0; --i) {
+
+        // Check for domain
+        if ((R > g->Segments[i].x0) && (R <= g->Segments[i].x1)) {
+
+            // Type == 0 means segment is sampled
+            if (g->Segments[i].Type == 0) {
+
+                cmsFloat32Number R1 = (cmsFloat32Number)(R - g->Segments[i].x0) / (g->Segments[i].x1 - g->Segments[i].x0);
+
+                // Setup the table (TODO: clean that)
+                g->SegInterp[i]->Table = g->Segments[i].SampledPoints;
+
+                g->SegInterp[i]->Interpolation.LerpFloat(&R1, &Out32, g->SegInterp[i]);
+                Out = (cmsFloat64Number) Out32;
+
+            }
+            else {
+                Out = g->Evals[i](g->Segments[i].Type, g->Segments[i].Params, R);
+            }
+
+            if (isinf(Out))
+                return PLUS_INF;
+            else
+            {
+                if (isinf(-Out))
+                    return MINUS_INF;
+            }
+
+            return Out;
+        }
+    }
+
+    return MINUS_INF;
+}
+
+// Access to estimated low-res table
+cmsUInt32Number CMSEXPORT cmsGetToneCurveEstimatedTableEntries(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+    return t ->nEntries;
+}
+
+const cmsUInt16Number* CMSEXPORT cmsGetToneCurveEstimatedTable(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+    return t ->Table16;
+}
+
+
+// Create an empty gamma curve, by using tables. This specifies only the limited-precision part, and leaves the
+// floating point description empty.
+cmsToneCurve* CMSEXPORT cmsBuildTabulatedToneCurve16(cmsContext ContextID, cmsUInt32Number nEntries, const cmsUInt16Number Values[])
+{
+    return AllocateToneCurveStruct(ContextID, nEntries, 0, NULL, Values);
+}
+
+static
+cmsUInt32Number EntriesByGamma(cmsFloat64Number Gamma)
+{
+    if (fabs(Gamma - 1.0) < 0.001) return 2;
+    return 4096;
+}
+
+
+// Create a segmented gamma, fill the table
+cmsToneCurve* CMSEXPORT cmsBuildSegmentedToneCurve(cmsContext ContextID,
+                                                   cmsUInt32Number nSegments, const cmsCurveSegment Segments[])
+{
+    cmsUInt32Number i;
+    cmsFloat64Number R, Val;
+    cmsToneCurve* g;
+    cmsUInt32Number nGridPoints = 4096;
+
+    _cmsAssert(Segments != NULL);
+
+    // Optimizatin for identity curves.
+    if (nSegments == 1 && Segments[0].Type == 1) {
+
+        nGridPoints = EntriesByGamma(Segments[0].Params[0]);
+    }
+
+    g = AllocateToneCurveStruct(ContextID, nGridPoints, nSegments, Segments, NULL);
+    if (g == NULL) return NULL;
+
+    // Once we have the floating point version, we can approximate a 16 bit table of 4096 entries
+    // for performance reasons. This table would normally not be used except on 8/16 bits transforms.
+    for (i = 0; i < nGridPoints; i++) {
+
+        R   = (cmsFloat64Number) i / (nGridPoints-1);
+
+        Val = EvalSegmentedFn(g, R);
+
+        // Round and saturate
+        g ->Table16[i] = _cmsQuickSaturateWord(Val * 65535.0);
+    }
+
+    return g;
+}
+
+// Use a segmented curve to store the floating point table
+cmsToneCurve* CMSEXPORT cmsBuildTabulatedToneCurveFloat(cmsContext ContextID, cmsUInt32Number nEntries, const cmsFloat32Number values[])
+{
+    cmsCurveSegment Seg[3];
+
+    // A segmented tone curve should have function segments in the first and last positions
+    // Initialize segmented curve part up to 0 to constant value = samples[0]
+    Seg[0].x0 = MINUS_INF;
+    Seg[0].x1 = 0;
+    Seg[0].Type = 6;
+
+    Seg[0].Params[0] = 1;
+    Seg[0].Params[1] = 0;
+    Seg[0].Params[2] = 0;
+    Seg[0].Params[3] = values[0];
+    Seg[0].Params[4] = 0;
+
+    // From zero to 1
+    Seg[1].x0 = 0;
+    Seg[1].x1 = 1.0;
+    Seg[1].Type = 0;
+
+    Seg[1].nGridPoints = nEntries;
+    Seg[1].SampledPoints = (cmsFloat32Number*) values;
+
+    // Final segment is constant = lastsample
+    Seg[2].x0 = 1.0;
+    Seg[2].x1 = PLUS_INF;
+    Seg[2].Type = 6;
+    
+    Seg[2].Params[0] = 1;
+    Seg[2].Params[1] = 0;
+    Seg[2].Params[2] = 0;
+    Seg[2].Params[3] = values[nEntries-1];
+    Seg[2].Params[4] = 0;
+    
+
+    return cmsBuildSegmentedToneCurve(ContextID, 3, Seg);
+}
+
+// Parametric curves
+//
+// Parameters goes as: Curve, a, b, c, d, e, f
+// Type is the ICC type +1
+// if type is negative, then the curve is analytically inverted
+cmsToneCurve* CMSEXPORT cmsBuildParametricToneCurve(cmsContext ContextID, cmsInt32Number Type, const cmsFloat64Number Params[])
+{
+    cmsCurveSegment Seg0;
+    int Pos = 0;
+    cmsUInt32Number size;
+    _cmsParametricCurvesCollection* c = GetParametricCurveByType(ContextID, Type, &Pos);
+
+    _cmsAssert(Params != NULL);
+
+    if (c == NULL) {
+        cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Invalid parametric curve type %d", Type);
+        return NULL;
+    }
+
+    memset(&Seg0, 0, sizeof(Seg0));
+
+    Seg0.x0   = MINUS_INF;
+    Seg0.x1   = PLUS_INF;
+    Seg0.Type = Type;
+
+    size = c->ParameterCount[Pos] * sizeof(cmsFloat64Number);
+    memmove(Seg0.Params, Params, size);
+
+    return cmsBuildSegmentedToneCurve(ContextID, 1, &Seg0);
+}
+
+
+
+// Build a gamma table based on gamma constant
+cmsToneCurve* CMSEXPORT cmsBuildGamma(cmsContext ContextID, cmsFloat64Number Gamma)
+{
+    return cmsBuildParametricToneCurve(ContextID, 1, &Gamma);
+}
+
+
+// Free all memory taken by the gamma curve
+void CMSEXPORT cmsFreeToneCurve(cmsToneCurve* Curve)
+{
+    cmsContext ContextID;
+
+    if (Curve == NULL) return;
+
+    ContextID = Curve ->InterpParams->ContextID;
+
+    _cmsFreeInterpParams(Curve ->InterpParams);
+
+    if (Curve -> Table16)
+        _cmsFree(ContextID, Curve ->Table16);
+
+    if (Curve ->Segments) {
+
+        cmsUInt32Number i;
+
+        for (i=0; i < Curve ->nSegments; i++) {
+
+            if (Curve ->Segments[i].SampledPoints) {
+                _cmsFree(ContextID, Curve ->Segments[i].SampledPoints);
+            }
+
+            if (Curve ->SegInterp[i] != 0)
+                _cmsFreeInterpParams(Curve->SegInterp[i]);
+        }
+
+        _cmsFree(ContextID, Curve ->Segments);
+        _cmsFree(ContextID, Curve ->SegInterp);
+    }
+
+    if (Curve -> Evals)
+        _cmsFree(ContextID, Curve -> Evals);
+
+    if (Curve) _cmsFree(ContextID, Curve);
+}
+
+// Utility function, free 3 gamma tables
+void CMSEXPORT cmsFreeToneCurveTriple(cmsToneCurve* Curve[3])
+{
+
+    _cmsAssert(Curve != NULL);
+
+    if (Curve[0] != NULL) cmsFreeToneCurve(Curve[0]);
+    if (Curve[1] != NULL) cmsFreeToneCurve(Curve[1]);
+    if (Curve[2] != NULL) cmsFreeToneCurve(Curve[2]);
+
+    Curve[0] = Curve[1] = Curve[2] = NULL;
+}
+
+
+// Duplicate a gamma table
+cmsToneCurve* CMSEXPORT cmsDupToneCurve(const cmsToneCurve* In)
+{
+    if (In == NULL) return NULL;
+
+    return  AllocateToneCurveStruct(In ->InterpParams ->ContextID, In ->nEntries, In ->nSegments, In ->Segments, In ->Table16);
+}
+
+// Joins two curves for X and Y. Curves should be monotonic.
+// We want to get
+//
+//      y = Y^-1(X(t))
+//
+cmsToneCurve* CMSEXPORT cmsJoinToneCurve(cmsContext ContextID,
+                                      const cmsToneCurve* X,
+                                      const cmsToneCurve* Y, cmsUInt32Number nResultingPoints)
+{
+    cmsToneCurve* out = NULL;
+    cmsToneCurve* Yreversed = NULL;
+    cmsFloat32Number t, x;
+    cmsFloat32Number* Res = NULL;
+    cmsUInt32Number i;
+
+
+    _cmsAssert(X != NULL);
+    _cmsAssert(Y != NULL);
+
+    Yreversed = cmsReverseToneCurveEx(nResultingPoints, Y);
+    if (Yreversed == NULL) goto Error;
+
+    Res = (cmsFloat32Number*) _cmsCalloc(ContextID, nResultingPoints, sizeof(cmsFloat32Number));
+    if (Res == NULL) goto Error;
+
+    //Iterate
+    for (i=0; i <  nResultingPoints; i++) {
+
+        t = (cmsFloat32Number) i / (nResultingPoints-1);
+        x = cmsEvalToneCurveFloat(X,  t);
+        Res[i] = cmsEvalToneCurveFloat(Yreversed, x);
+    }
+
+    // Allocate space for output
+    out = cmsBuildTabulatedToneCurveFloat(ContextID, nResultingPoints, Res);
+
+Error:
+
+    if (Res != NULL) _cmsFree(ContextID, Res);
+    if (Yreversed != NULL) cmsFreeToneCurve(Yreversed);
+
+    return out;
+}
+
+
+
+// Get the surrounding nodes. This is tricky on non-monotonic tables
+static
+int GetInterval(cmsFloat64Number In, const cmsUInt16Number LutTable[], const struct _cms_interp_struc* p)
+{
+    int i;
+    int y0, y1;
+
+    // A 1 point table is not allowed
+    if (p -> Domain[0] < 1) return -1;
+
+    // Let's see if ascending or descending.
+    if (LutTable[0] < LutTable[p ->Domain[0]]) {
+
+        // Table is overall ascending
+        for (i = (int) p->Domain[0] - 1; i >= 0; --i) {
+
+            y0 = LutTable[i];
+            y1 = LutTable[i+1];
+
+            if (y0 <= y1) { // Increasing
+                if (In >= y0 && In <= y1) return i;
+            }
+            else
+                if (y1 < y0) { // Decreasing
+                    if (In >= y1 && In <= y0) return i;
+                }
+        }
+    }
+    else {
+        // Table is overall descending
+        for (i=0; i < (int) p -> Domain[0]; i++) {
+
+            y0 = LutTable[i];
+            y1 = LutTable[i+1];
+
+            if (y0 <= y1) { // Increasing
+                if (In >= y0 && In <= y1) return i;
+            }
+            else
+                if (y1 < y0) { // Decreasing
+                    if (In >= y1 && In <= y0) return i;
+                }
+        }
+    }
+
+    return -1;
+}
+
+// Reverse a gamma table
+cmsToneCurve* CMSEXPORT cmsReverseToneCurveEx(cmsUInt32Number nResultSamples, const cmsToneCurve* InCurve)
+{
+    cmsToneCurve *out;
+    cmsFloat64Number a = 0, b = 0, y, x1, y1, x2, y2;
+    int i, j;
+    int Ascending;
+
+    _cmsAssert(InCurve != NULL);
+
+    // Try to reverse it analytically whatever possible
+ 
+    if (InCurve ->nSegments == 1 && InCurve ->Segments[0].Type > 0 && 
+        /* InCurve -> Segments[0].Type <= 5 */ 
+        GetParametricCurveByType(InCurve ->InterpParams->ContextID, InCurve ->Segments[0].Type, NULL) != NULL) {
+
+        return cmsBuildParametricToneCurve(InCurve ->InterpParams->ContextID,
+                                       -(InCurve -> Segments[0].Type),
+                                       InCurve -> Segments[0].Params);
+    }
+
+    // Nope, reverse the table.
+    out = cmsBuildTabulatedToneCurve16(InCurve ->InterpParams->ContextID, nResultSamples, NULL);
+    if (out == NULL)
+        return NULL;
+
+    // We want to know if this is an ascending or descending table
+    Ascending = !cmsIsToneCurveDescending(InCurve);
+
+    // Iterate across Y axis
+    for (i=0; i < (int) nResultSamples; i++) {
+
+        y = (cmsFloat64Number) i * 65535.0 / (nResultSamples - 1);
+
+        // Find interval in which y is within.
+        j = GetInterval(y, InCurve->Table16, InCurve->InterpParams);
+        if (j >= 0) {
+
+
+            // Get limits of interval
+            x1 = InCurve ->Table16[j];
+            x2 = InCurve ->Table16[j+1];
+
+            y1 = (cmsFloat64Number) (j * 65535.0) / (InCurve ->nEntries - 1);
+            y2 = (cmsFloat64Number) ((j+1) * 65535.0 ) / (InCurve ->nEntries - 1);
+
+            // If collapsed, then use any
+            if (x1 == x2) {
+
+                out ->Table16[i] = _cmsQuickSaturateWord(Ascending ? y2 : y1);
+                continue;
+
+            } else {
+
+                // Interpolate
+                a = (y2 - y1) / (x2 - x1);
+                b = y2 - a * x2;
+            }
+        }
+
+        out ->Table16[i] = _cmsQuickSaturateWord(a* y + b);
+    }
+
+
+    return out;
+}
+
+// Reverse a gamma table
+cmsToneCurve* CMSEXPORT cmsReverseToneCurve(const cmsToneCurve* InGamma)
+{
+    _cmsAssert(InGamma != NULL);
+
+    return cmsReverseToneCurveEx(4096, InGamma);
+}
+
+// From: Eilers, P.H.C. (1994) Smoothing and interpolation with finite
+// differences. in: Graphic Gems IV, Heckbert, P.S. (ed.), Academic press.
+//
+// Smoothing and interpolation with second differences.
+//
+//   Input:  weights (w), data (y): vector from 1 to m.
+//   Input:  smoothing parameter (lambda), length (m).
+//   Output: smoothed vector (z): vector from 1 to m.
+
+static
+cmsBool smooth2(cmsContext ContextID, cmsFloat32Number w[], cmsFloat32Number y[], 
+                cmsFloat32Number z[], cmsFloat32Number lambda, int m)
+{
+    int i, i1, i2;
+    cmsFloat32Number *c, *d, *e;
+    cmsBool st;
+
+
+    c = (cmsFloat32Number*) _cmsCalloc(ContextID, MAX_NODES_IN_CURVE, sizeof(cmsFloat32Number));
+    d = (cmsFloat32Number*) _cmsCalloc(ContextID, MAX_NODES_IN_CURVE, sizeof(cmsFloat32Number));
+    e = (cmsFloat32Number*) _cmsCalloc(ContextID, MAX_NODES_IN_CURVE, sizeof(cmsFloat32Number));
+
+    if (c != NULL && d != NULL && e != NULL) {
+
+
+    d[1] = w[1] + lambda;
+    c[1] = -2 * lambda / d[1];
+    e[1] = lambda /d[1];
+    z[1] = w[1] * y[1];
+    d[2] = w[2] + 5 * lambda - d[1] * c[1] *  c[1];
+    c[2] = (-4 * lambda - d[1] * c[1] * e[1]) / d[2];
+    e[2] = lambda / d[2];
+    z[2] = w[2] * y[2] - c[1] * z[1];
+
+    for (i = 3; i < m - 1; i++) {
+        i1 = i - 1; i2 = i - 2;
+        d[i]= w[i] + 6 * lambda - c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2];
+        c[i] = (-4 * lambda -d[i1] * c[i1] * e[i1])/ d[i];
+        e[i] = lambda / d[i];
+        z[i] = w[i] * y[i] - c[i1] * z[i1] - e[i2] * z[i2];
+    }
+
+    i1 = m - 2; i2 = m - 3;
+
+    d[m - 1] = w[m - 1] + 5 * lambda -c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2];
+    c[m - 1] = (-2 * lambda - d[i1] * c[i1] * e[i1]) / d[m - 1];
+    z[m - 1] = w[m - 1] * y[m - 1] - c[i1] * z[i1] - e[i2] * z[i2];
+    i1 = m - 1; i2 = m - 2;
+
+    d[m] = w[m] + lambda - c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2];
+    z[m] = (w[m] * y[m] - c[i1] * z[i1] - e[i2] * z[i2]) / d[m];
+    z[m - 1] = z[m - 1] / d[m - 1] - c[m - 1] * z[m];
+
+    for (i = m - 2; 1<= i; i--)
+        z[i] = z[i] / d[i] - c[i] * z[i + 1] - e[i] * z[i + 2];
+
+      st = TRUE;
+    }
+    else st = FALSE;
+
+    if (c != NULL) _cmsFree(ContextID, c);
+    if (d != NULL) _cmsFree(ContextID, d);
+    if (e != NULL) _cmsFree(ContextID, e);
+
+    return st;
+}
+
+// Smooths a curve sampled at regular intervals.
+cmsBool  CMSEXPORT cmsSmoothToneCurve(cmsToneCurve* Tab, cmsFloat64Number lambda)
+{
+    cmsBool SuccessStatus = TRUE;
+    cmsFloat32Number *w, *y, *z;
+    cmsUInt32Number i, nItems, Zeros, Poles;
+
+    if (Tab != NULL && Tab->InterpParams != NULL)
+    {
+        cmsContext ContextID = Tab->InterpParams->ContextID;
+
+        if (!cmsIsToneCurveLinear(Tab)) // Only non-linear curves need smoothing
+        {
+            nItems = Tab->nEntries;
+            if (nItems < MAX_NODES_IN_CURVE)
+            {
+                // Allocate one more item than needed
+                w = (cmsFloat32Number *)_cmsCalloc(ContextID, nItems + 1, sizeof(cmsFloat32Number));
+                y = (cmsFloat32Number *)_cmsCalloc(ContextID, nItems + 1, sizeof(cmsFloat32Number));
+                z = (cmsFloat32Number *)_cmsCalloc(ContextID, nItems + 1, sizeof(cmsFloat32Number));
+
+                if (w != NULL && y != NULL && z != NULL) // Ensure no memory allocation failure
+                {
+                    memset(w, 0, (nItems + 1) * sizeof(cmsFloat32Number));
+                    memset(y, 0, (nItems + 1) * sizeof(cmsFloat32Number));
+                    memset(z, 0, (nItems + 1) * sizeof(cmsFloat32Number));
+
+                    for (i = 0; i < nItems; i++)
+                    {
+                        y[i + 1] = (cmsFloat32Number)Tab->Table16[i];
+                        w[i + 1] = 1.0;
+                    }
+
+                    if (smooth2(ContextID, w, y, z, (cmsFloat32Number)lambda, (int)nItems))
+                    {
+                        // Do some reality - checking...
+
+                        Zeros = Poles = 0;
+                        for (i = nItems; i > 1; --i)
+                        {
+                            if (z[i] == 0.) Zeros++;
+                            if (z[i] >= 65535.) Poles++;
+                            if (z[i] < z[i - 1])
+                            {
+                                cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Non-Monotonic.");
+                                SuccessStatus = FALSE;
+                                break;
+                            }
+                        }
+
+                        if (SuccessStatus && Zeros > (nItems / 3))
+                        {
+                            cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Degenerated, mostly zeros.");
+                            SuccessStatus = FALSE;
+                        }
+
+                        if (SuccessStatus && Poles > (nItems / 3))
+                        {
+                            cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Degenerated, mostly poles.");
+                            SuccessStatus = FALSE;
+                        }
+
+                        if (SuccessStatus) // Seems ok
+                        {
+                            for (i = 0; i < nItems; i++)
+                            {
+                                // Clamp to cmsUInt16Number
+                                Tab->Table16[i] = _cmsQuickSaturateWord(z[i + 1]);
+                            }
+                        }
+                    }
+                    else // Could not smooth
+                    {
+                        cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Function smooth2 failed.");
+                        SuccessStatus = FALSE;
+                    }
+                }
+                else // One or more buffers could not be allocated
+                {
+                    cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Could not allocate memory.");
+                    SuccessStatus = FALSE;
+                }
+
+                if (z != NULL)
+                    _cmsFree(ContextID, z);
+
+                if (y != NULL)
+                    _cmsFree(ContextID, y);
+
+                if (w != NULL)
+                    _cmsFree(ContextID, w);
+            }
+            else // too many items in the table
+            {
+                cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Too many points.");
+                SuccessStatus = FALSE;
+            }
+        }
+    }
+    else // Tab parameter or Tab->InterpParams is NULL
+    {
+        // Can't signal an error here since the ContextID is not known at this point
+        SuccessStatus = FALSE;
+    }
+
+    return SuccessStatus;
+}
+
+// Is a table linear? Do not use parametric since we cannot guarantee some weird parameters resulting
+// in a linear table. This way assures it is linear in 12 bits, which should be enough in most cases.
+cmsBool CMSEXPORT cmsIsToneCurveLinear(const cmsToneCurve* Curve)
+{
+    int i;
+    int diff;
+
+    _cmsAssert(Curve != NULL);
+
+    for (i=0; i < (int) Curve ->nEntries; i++) {
+
+        diff = abs((int) Curve->Table16[i] - (int) _cmsQuantizeVal(i, Curve ->nEntries));
+        if (diff > 0x0f)
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Same, but for monotonicity
+cmsBool  CMSEXPORT cmsIsToneCurveMonotonic(const cmsToneCurve* t)
+{
+    cmsUInt32Number n;
+    int i, last;
+    cmsBool lDescending;
+
+    _cmsAssert(t != NULL);
+
+    // Degenerated curves are monotonic? Ok, let's pass them
+    n = t ->nEntries;
+    if (n < 2) return TRUE;
+
+    // Curve direction
+    lDescending = cmsIsToneCurveDescending(t);
+
+    if (lDescending) {
+
+        last = t ->Table16[0];
+
+        for (i = 1; i < (int) n; i++) {
+
+            if (t ->Table16[i] - last > 2) // We allow some ripple
+                return FALSE;
+            else
+                last = t ->Table16[i];
+
+        }
+    }
+    else {
+
+        last = t ->Table16[n-1];
+
+        for (i = (int) n - 2; i >= 0; --i) {
+
+            if (t ->Table16[i] - last > 2)
+                return FALSE;
+            else
+                last = t ->Table16[i];
+
+        }
+    }
+
+    return TRUE;
+}
+
+// Same, but for descending tables
+cmsBool  CMSEXPORT cmsIsToneCurveDescending(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+
+    return t ->Table16[0] > t ->Table16[t ->nEntries-1];
+}
+
+
+// Another info fn: is out gamma table multisegment?
+cmsBool  CMSEXPORT cmsIsToneCurveMultisegment(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+
+    return t -> nSegments > 1;
+}
+
+cmsInt32Number  CMSEXPORT cmsGetToneCurveParametricType(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+
+    if (t -> nSegments != 1) return 0;
+    return t ->Segments[0].Type;
+}
+
+// We need accuracy this time
+cmsFloat32Number CMSEXPORT cmsEvalToneCurveFloat(const cmsToneCurve* Curve, cmsFloat32Number v)
+{
+    _cmsAssert(Curve != NULL);
+
+    // Check for 16 bits table. If so, this is a limited-precision tone curve
+    if (Curve ->nSegments == 0) {
+
+        cmsUInt16Number In, Out;
+
+        In = (cmsUInt16Number) _cmsQuickSaturateWord(v * 65535.0);
+        Out = cmsEvalToneCurve16(Curve, In);
+
+        return (cmsFloat32Number) (Out / 65535.0);
+    }
+
+    return (cmsFloat32Number) EvalSegmentedFn(Curve, v);
+}
+
+// We need xput over here
+cmsUInt16Number CMSEXPORT cmsEvalToneCurve16(const cmsToneCurve* Curve, cmsUInt16Number v)
+{
+    cmsUInt16Number out;
+
+    _cmsAssert(Curve != NULL);
+
+    Curve ->InterpParams ->Interpolation.Lerp16(&v, &out, Curve ->InterpParams);
+    return out;
+}
+
+
+// Least squares fitting.
+// A mathematical procedure for finding the best-fitting curve to a given set of points by
+// minimizing the sum of the squares of the offsets ("the residuals") of the points from the curve.
+// The sum of the squares of the offsets is used instead of the offset absolute values because
+// this allows the residuals to be treated as a continuous differentiable quantity.
+//
+// y = f(x) = x ^ g
+//
+// R  = (yi - (xi^g))
+// R2 = (yi - (xi^g))2
+// SUM R2 = SUM (yi - (xi^g))2
+//
+// dR2/dg = -2 SUM x^g log(x)(y - x^g)
+// solving for dR2/dg = 0
+//
+// g = 1/n * SUM(log(y) / log(x))
+
+cmsFloat64Number CMSEXPORT cmsEstimateGamma(const cmsToneCurve* t, cmsFloat64Number Precision)
+{
+    cmsFloat64Number gamma, sum, sum2;
+    cmsFloat64Number n, x, y, Std;
+    cmsUInt32Number i;
+
+    _cmsAssert(t != NULL);
+
+    sum = sum2 = n = 0;
+
+    // Excluding endpoints
+    for (i=1; i < (MAX_NODES_IN_CURVE-1); i++) {
+
+        x = (cmsFloat64Number) i / (MAX_NODES_IN_CURVE-1);
+        y = (cmsFloat64Number) cmsEvalToneCurveFloat(t, (cmsFloat32Number) x);
+
+        // Avoid 7% on lower part to prevent
+        // artifacts due to linear ramps
+
+        if (y > 0. && y < 1. && x > 0.07) {
+
+            gamma = log(y) / log(x);
+            sum  += gamma;
+            sum2 += gamma * gamma;
+            n++;
+        }
+    }
+
+    // Take a look on SD to see if gamma isn't exponential at all
+    Std = sqrt((n * sum2 - sum * sum) / (n*(n-1)));
+
+    if (Std > Precision)
+        return -1.0;
+
+    return (sum / n);   // The mean
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp
new file mode 100644
index 0000000000..0cf36cd06f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsgmt.cpp
@@ -0,0 +1,590 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// Auxiliary: append a Lab identity after the given sequence of profiles
+// and return the transform. Lab profile is closed, rest of profiles are kept open.
+cmsHTRANSFORM _cmsChain2Lab(cmsContext            ContextID,
+                            cmsUInt32Number        nProfiles,
+                            cmsUInt32Number        InputFormat,
+                            cmsUInt32Number        OutputFormat,
+                            const cmsUInt32Number  Intents[],
+                            const cmsHPROFILE      hProfiles[],
+                            const cmsBool          BPC[],
+                            const cmsFloat64Number AdaptationStates[],
+                            cmsUInt32Number        dwFlags)
+{
+    cmsHTRANSFORM xform;
+    cmsHPROFILE   hLab;
+    cmsHPROFILE   ProfileList[256];
+    cmsBool       BPCList[256];
+    cmsFloat64Number AdaptationList[256];
+    cmsUInt32Number IntentList[256];
+    cmsUInt32Number i;
+
+    // This is a rather big number and there is no need of dynamic memory
+    // since we are adding a profile, 254 + 1 = 255 and this is the limit
+    if (nProfiles > 254) return NULL;
+
+    // The output space
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) return NULL;
+
+    // Create a copy of parameters
+    for (i=0; i < nProfiles; i++) {
+
+        ProfileList[i]    = hProfiles[i];
+        BPCList[i]        = BPC[i];
+        AdaptationList[i] = AdaptationStates[i];
+        IntentList[i]     = Intents[i];
+    }
+
+    // Place Lab identity at chain's end.
+    ProfileList[nProfiles]    = hLab;
+    BPCList[nProfiles]        = 0;
+    AdaptationList[nProfiles] = 1.0;
+    IntentList[nProfiles]     = INTENT_RELATIVE_COLORIMETRIC;
+
+    // Create the transform
+    xform = cmsCreateExtendedTransform(ContextID, nProfiles + 1, ProfileList,
+                                       BPCList,
+                                       IntentList,
+                                       AdaptationList,
+                                       NULL, 0,
+                                       InputFormat,
+                                       OutputFormat,
+                                       dwFlags);
+
+    cmsCloseProfile(hLab);
+
+    return xform;
+}
+
+
+// Compute K -> L* relationship. Flags may include black point compensation. In this case,
+// the relationship is assumed from the profile with BPC to a black point zero.
+static
+cmsToneCurve* ComputeKToLstar(cmsContext            ContextID,
+                               cmsUInt32Number       nPoints,
+                               cmsUInt32Number       nProfiles,
+                               const cmsUInt32Number Intents[],
+                               const cmsHPROFILE     hProfiles[],
+                               const cmsBool         BPC[],
+                               const cmsFloat64Number AdaptationStates[],
+                               cmsUInt32Number dwFlags)
+{
+    cmsToneCurve* out = NULL;
+    cmsUInt32Number i;
+    cmsHTRANSFORM xform;
+    cmsCIELab Lab;
+    cmsFloat32Number cmyk[4];
+    cmsFloat32Number* SampledPoints;
+
+    xform = _cmsChain2Lab(ContextID, nProfiles, TYPE_CMYK_FLT, TYPE_Lab_DBL, Intents, hProfiles, BPC, AdaptationStates, dwFlags);
+    if (xform == NULL) return NULL;
+
+    SampledPoints = (cmsFloat32Number*) _cmsCalloc(ContextID, nPoints, sizeof(cmsFloat32Number));
+    if (SampledPoints  == NULL) goto Error;
+
+    for (i=0; i < nPoints; i++) {
+
+        cmyk[0] = 0;
+        cmyk[1] = 0;
+        cmyk[2] = 0;
+        cmyk[3] = (cmsFloat32Number) ((i * 100.0) / (nPoints-1));
+
+        cmsDoTransform(xform, cmyk, &Lab, 1);
+        SampledPoints[i]= (cmsFloat32Number) (1.0 - Lab.L / 100.0); // Negate K for easier operation
+    }
+
+    out = cmsBuildTabulatedToneCurveFloat(ContextID, nPoints, SampledPoints);
+
+Error:
+
+    cmsDeleteTransform(xform);
+    if (SampledPoints) _cmsFree(ContextID, SampledPoints);
+
+    return out;
+}
+
+
+// Compute Black tone curve on a CMYK -> CMYK transform. This is done by
+// using the proof direction on both profiles to find K->L* relationship
+// then joining both curves. dwFlags may include black point compensation.
+cmsToneCurve* _cmsBuildKToneCurve(cmsContext        ContextID,
+                                   cmsUInt32Number   nPoints,
+                                   cmsUInt32Number   nProfiles,
+                                   const cmsUInt32Number Intents[],
+                                   const cmsHPROFILE hProfiles[],
+                                   const cmsBool     BPC[],
+                                   const cmsFloat64Number AdaptationStates[],
+                                   cmsUInt32Number   dwFlags)
+{
+    cmsToneCurve *in, *out, *KTone;
+
+    // Make sure CMYK -> CMYK
+    if (cmsGetColorSpace(hProfiles[0]) != cmsSigCmykData ||
+        cmsGetColorSpace(hProfiles[nProfiles-1])!= cmsSigCmykData) return NULL;
+
+
+    // Make sure last is an output profile
+    if (cmsGetDeviceClass(hProfiles[nProfiles - 1]) != cmsSigOutputClass) return NULL;
+
+    // Create individual curves. BPC works also as each K to L* is
+    // computed as a BPC to zero black point in case of L*
+    in  = ComputeKToLstar(ContextID, nPoints, nProfiles - 1, Intents, hProfiles, BPC, AdaptationStates, dwFlags);
+    if (in == NULL) return NULL;
+
+    out = ComputeKToLstar(ContextID, nPoints, 1,
+                            Intents + (nProfiles - 1),
+                            &hProfiles [nProfiles - 1],
+                            BPC + (nProfiles - 1),
+                            AdaptationStates + (nProfiles - 1),
+                            dwFlags);
+    if (out == NULL) {
+        cmsFreeToneCurve(in);
+        return NULL;
+    }
+
+    // Build the relationship. This effectively limits the maximum accuracy to 16 bits, but
+    // since this is used on black-preserving LUTs, we are not losing  accuracy in any case
+    KTone = cmsJoinToneCurve(ContextID, in, out, nPoints);
+
+    // Get rid of components
+    cmsFreeToneCurve(in); cmsFreeToneCurve(out);
+
+    // Something went wrong...
+    if (KTone == NULL) return NULL;
+
+    // Make sure it is monotonic
+    if (!cmsIsToneCurveMonotonic(KTone)) {
+        cmsFreeToneCurve(KTone);
+        return NULL;
+    }
+
+    return KTone;
+}
+
+
+// Gamut LUT Creation -----------------------------------------------------------------------------------------
+
+// Used by gamut & softproofing
+
+typedef struct {
+
+    cmsHTRANSFORM hInput;               // From whatever input color space. 16 bits to DBL
+    cmsHTRANSFORM hForward, hReverse;   // Transforms going from Lab to colorant and back
+    cmsFloat64Number Thereshold;        // The thereshold after which is considered out of gamut
+
+    } GAMUTCHAIN;
+
+// This sampler does compute gamut boundaries by comparing original
+// values with a transform going back and forth. Values above ERR_THERESHOLD
+// of maximum are considered out of gamut.
+
+#define ERR_THERESHOLD      5
+
+
+static
+int GamutSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    GAMUTCHAIN*  t = (GAMUTCHAIN* ) Cargo;
+    cmsCIELab LabIn1, LabOut1;
+    cmsCIELab LabIn2, LabOut2;
+    cmsUInt16Number Proof[cmsMAXCHANNELS], Proof2[cmsMAXCHANNELS];
+    cmsFloat64Number dE1, dE2, ErrorRatio;
+
+    // Assume in-gamut by default.
+    ErrorRatio = 1.0;
+
+    // Convert input to Lab
+    cmsDoTransform(t -> hInput, In, &LabIn1, 1);
+
+    // converts from PCS to colorant. This always
+    // does return in-gamut values,
+    cmsDoTransform(t -> hForward, &LabIn1, Proof, 1);
+
+    // Now, do the inverse, from colorant to PCS.
+    cmsDoTransform(t -> hReverse, Proof, &LabOut1, 1);
+
+    memmove(&LabIn2, &LabOut1, sizeof(cmsCIELab));
+
+    // Try again, but this time taking Check as input
+    cmsDoTransform(t -> hForward, &LabOut1, Proof2, 1);
+    cmsDoTransform(t -> hReverse, Proof2, &LabOut2, 1);
+
+    // Take difference of direct value
+    dE1 = cmsDeltaE(&LabIn1, &LabOut1);
+
+    // Take difference of converted value
+    dE2 = cmsDeltaE(&LabIn2, &LabOut2);
+
+
+    // if dE1 is small and dE2 is small, value is likely to be in gamut
+    if (dE1 < t->Thereshold && dE2 < t->Thereshold)
+        Out[0] = 0;
+    else {
+
+        // if dE1 is small and dE2 is big, undefined. Assume in gamut
+        if (dE1 < t->Thereshold && dE2 > t->Thereshold)
+            Out[0] = 0;
+        else
+            // dE1 is big and dE2 is small, clearly out of gamut
+            if (dE1 > t->Thereshold && dE2 < t->Thereshold)
+                Out[0] = (cmsUInt16Number) _cmsQuickFloor((dE1 - t->Thereshold) + .5);
+            else  {
+
+                // dE1 is big and dE2 is also big, could be due to perceptual mapping
+                // so take error ratio
+                if (dE2 == 0.0)
+                    ErrorRatio = dE1;
+                else
+                    ErrorRatio = dE1 / dE2;
+
+                if (ErrorRatio > t->Thereshold)
+                    Out[0] = (cmsUInt16Number)  _cmsQuickFloor((ErrorRatio - t->Thereshold) + .5);
+                else
+                    Out[0] = 0;
+            }
+    }
+
+
+    return TRUE;
+}
+
+// Does compute a gamut LUT going back and forth across pcs -> relativ. colorimetric intent -> pcs
+// the dE obtained is then annotated on the LUT. Values truly out of gamut are clipped to dE = 0xFFFE
+// and values changed are supposed to be handled by any gamut remapping, so, are out of gamut as well.
+//
+// **WARNING: This algorithm does assume that gamut remapping algorithms does NOT move in-gamut colors,
+// of course, many perceptual and saturation intents does not work in such way, but relativ. ones should.
+
+cmsPipeline* _cmsCreateGamutCheckPipeline(cmsContext ContextID,
+                                          cmsHPROFILE hProfiles[],
+                                          cmsBool  BPC[],
+                                          cmsUInt32Number Intents[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number nGamutPCSposition,
+                                          cmsHPROFILE hGamut)
+{
+    cmsHPROFILE hLab;
+    cmsPipeline* Gamut;
+    cmsStage* CLUT;
+    cmsUInt32Number dwFormat;
+    GAMUTCHAIN Chain;
+    cmsUInt32Number nChannels, nGridpoints;
+    cmsColorSpaceSignature ColorSpace;
+    cmsUInt32Number i;
+    cmsHPROFILE ProfileList[256];
+    cmsBool     BPCList[256];
+    cmsFloat64Number AdaptationList[256];
+    cmsUInt32Number IntentList[256];
+
+    memset(&Chain, 0, sizeof(GAMUTCHAIN));
+
+
+    if (nGamutPCSposition <= 0 || nGamutPCSposition > 255) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Wrong position of PCS. 1..255 expected, %d found.", nGamutPCSposition);
+        return NULL;
+    }
+
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) return NULL;
+
+
+    // The figure of merit. On matrix-shaper profiles, should be almost zero as
+    // the conversion is pretty exact. On LUT based profiles, different resolutions
+    // of input and output CLUT may result in differences.
+
+    if (cmsIsMatrixShaper(hGamut)) {
+
+        Chain.Thereshold = 1.0;
+    }
+    else {
+        Chain.Thereshold = ERR_THERESHOLD;
+    }
+
+
+    // Create a copy of parameters
+    for (i=0; i < nGamutPCSposition; i++) {
+        ProfileList[i]    = hProfiles[i];
+        BPCList[i]        = BPC[i];
+        AdaptationList[i] = AdaptationStates[i];
+        IntentList[i]     = Intents[i];
+    }
+
+    // Fill Lab identity
+    ProfileList[nGamutPCSposition] = hLab;
+    BPCList[nGamutPCSposition] = 0;
+    AdaptationList[nGamutPCSposition] = 1.0;
+    IntentList[nGamutPCSposition] = INTENT_RELATIVE_COLORIMETRIC;
+
+
+    ColorSpace  = cmsGetColorSpace(hGamut);
+
+    nChannels   = cmsChannelsOf(ColorSpace);
+    nGridpoints = _cmsReasonableGridpointsByColorspace(ColorSpace, cmsFLAGS_HIGHRESPRECALC);
+    dwFormat    = (CHANNELS_SH(nChannels)|BYTES_SH(2));
+
+    // 16 bits to Lab double
+    Chain.hInput = cmsCreateExtendedTransform(ContextID,
+        nGamutPCSposition + 1,
+        ProfileList,
+        BPCList,
+        IntentList,
+        AdaptationList,
+        NULL, 0,
+        dwFormat, TYPE_Lab_DBL,
+        cmsFLAGS_NOCACHE);
+
+
+    // Does create the forward step. Lab double to device
+    dwFormat    = (CHANNELS_SH(nChannels)|BYTES_SH(2));
+    Chain.hForward = cmsCreateTransformTHR(ContextID,
+        hLab, TYPE_Lab_DBL,
+        hGamut, dwFormat,
+        INTENT_RELATIVE_COLORIMETRIC,
+        cmsFLAGS_NOCACHE);
+
+    // Does create the backwards step
+    Chain.hReverse = cmsCreateTransformTHR(ContextID, hGamut, dwFormat,
+        hLab, TYPE_Lab_DBL,
+        INTENT_RELATIVE_COLORIMETRIC,
+        cmsFLAGS_NOCACHE);
+
+
+    // All ok?
+    if (Chain.hInput && Chain.hForward && Chain.hReverse) {
+
+        // Go on, try to compute gamut LUT from PCS. This consist on a single channel containing
+        // dE when doing a transform back and forth on the colorimetric intent.
+
+        Gamut = cmsPipelineAlloc(ContextID, 3, 1);
+        if (Gamut != NULL) {
+
+            CLUT = cmsStageAllocCLut16bit(ContextID, nGridpoints, nChannels, 1, NULL);
+            if (!cmsPipelineInsertStage(Gamut, cmsAT_BEGIN, CLUT)) {
+                cmsPipelineFree(Gamut);
+                Gamut = NULL;
+            } 
+            else {
+                cmsStageSampleCLut16bit(CLUT, GamutSampler, (void*) &Chain, 0);
+            }
+        }
+    }
+    else
+        Gamut = NULL;   // Didn't work...
+
+    // Free all needed stuff.
+    if (Chain.hInput)   cmsDeleteTransform(Chain.hInput);
+    if (Chain.hForward) cmsDeleteTransform(Chain.hForward);
+    if (Chain.hReverse) cmsDeleteTransform(Chain.hReverse);
+    if (hLab) cmsCloseProfile(hLab);
+
+    // And return computed hull
+    return Gamut;
+}
+
+// Total Area Coverage estimation ----------------------------------------------------------------
+
+typedef struct {
+    cmsUInt32Number  nOutputChans;
+    cmsHTRANSFORM    hRoundTrip;
+    cmsFloat32Number MaxTAC;
+    cmsFloat32Number MaxInput[cmsMAXCHANNELS];
+
+} cmsTACestimator;
+
+
+// This callback just accounts the maximum ink dropped in the given node. It does not populate any
+// memory, as the destination table is NULL. Its only purpose it to know the global maximum.
+static
+int EstimateTAC(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void * Cargo)
+{
+    cmsTACestimator* bp = (cmsTACestimator*) Cargo;
+    cmsFloat32Number RoundTrip[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+    cmsFloat32Number Sum;
+
+
+    // Evaluate the xform
+    cmsDoTransform(bp->hRoundTrip, In, RoundTrip, 1);
+
+    // All all amounts of ink
+    for (Sum=0, i=0; i < bp ->nOutputChans; i++)
+            Sum += RoundTrip[i];
+
+    // If above maximum, keep track of input values
+    if (Sum > bp ->MaxTAC) {
+
+            bp ->MaxTAC = Sum;
+
+            for (i=0; i < bp ->nOutputChans; i++) {
+                bp ->MaxInput[i] = In[i];
+            }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(Out);
+}
+
+
+// Detect Total area coverage of the profile
+cmsFloat64Number CMSEXPORT cmsDetectTAC(cmsHPROFILE hProfile)
+{
+    cmsTACestimator bp;
+    cmsUInt32Number dwFormatter;
+    cmsUInt32Number GridPoints[MAX_INPUT_DIMENSIONS];
+    cmsHPROFILE hLab;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    // TAC only works on output profiles
+    if (cmsGetDeviceClass(hProfile) != cmsSigOutputClass) {
+        return 0;
+    }
+
+    // Create a fake formatter for result
+    dwFormatter = cmsFormatterForColorspaceOfProfile(hProfile, 4, TRUE);
+
+    bp.nOutputChans = T_CHANNELS(dwFormatter);
+    bp.MaxTAC = 0;    // Initial TAC is 0
+
+    //  for safety
+    if (bp.nOutputChans >= cmsMAXCHANNELS) return 0;
+
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) return 0;
+    // Setup a roundtrip on perceptual intent in output profile for TAC estimation
+    bp.hRoundTrip = cmsCreateTransformTHR(ContextID, hLab, TYPE_Lab_16,
+                                          hProfile, dwFormatter, INTENT_PERCEPTUAL, cmsFLAGS_NOOPTIMIZE|cmsFLAGS_NOCACHE);
+
+    cmsCloseProfile(hLab);
+    if (bp.hRoundTrip == NULL) return 0;
+
+    // For L* we only need black and white. For C* we need many points
+    GridPoints[0] = 6;
+    GridPoints[1] = 74;
+    GridPoints[2] = 74;
+
+
+    if (!cmsSliceSpace16(3, GridPoints, EstimateTAC, &bp)) {
+        bp.MaxTAC = 0;
+    }
+
+    cmsDeleteTransform(bp.hRoundTrip);
+
+    // Results in %
+    return bp.MaxTAC;
+}
+
+
+// Carefully,  clamp on CIELab space.
+
+cmsBool CMSEXPORT cmsDesaturateLab(cmsCIELab* Lab,
+                                   double amax, double amin,
+                                   double bmax, double bmin)
+{
+
+    // Whole Luma surface to zero
+
+    if (Lab -> L < 0) {
+
+        Lab-> L = Lab->a = Lab-> b = 0.0;
+        return FALSE;
+    }
+
+    // Clamp white, DISCARD HIGHLIGHTS. This is done
+    // in such way because icc spec doesn't allow the
+    // use of L>100 as a highlight means.
+
+    if (Lab->L > 100)
+        Lab -> L = 100;
+
+    // Check out gamut prism, on a, b faces
+
+    if (Lab -> a < amin || Lab->a > amax||
+        Lab -> b < bmin || Lab->b > bmax) {
+
+            cmsCIELCh LCh;
+            double h, slope;
+
+            // Falls outside a, b limits. Transports to LCh space,
+            // and then do the clipping
+
+
+            if (Lab -> a == 0.0) { // Is hue exactly 90?
+
+                // atan will not work, so clamp here
+                Lab -> b = Lab->b < 0 ? bmin : bmax;
+                return TRUE;
+            }
+
+            cmsLab2LCh(&LCh, Lab);
+
+            slope = Lab -> b / Lab -> a;
+            h = LCh.h;
+
+            // There are 4 zones
+
+            if ((h >= 0. && h < 45.) ||
+                (h >= 315 && h <= 360.)) {
+
+                    // clip by amax
+                    Lab -> a = amax;
+                    Lab -> b = amax * slope;
+            }
+            else
+                if (h >= 45. && h < 135.)
+                {
+                    // clip by bmax
+                    Lab -> b = bmax;
+                    Lab -> a = bmax / slope;
+                }
+                else
+                    if (h >= 135. && h < 225.) {
+                        // clip by amin
+                        Lab -> a = amin;
+                        Lab -> b = amin * slope;
+
+                    }
+                    else
+                        if (h >= 225. && h < 315.) {
+                            // clip by bmin
+                            Lab -> b = bmin;
+                            Lab -> a = bmin / slope;
+                        }
+                        else  {
+                            cmsSignalError(0, cmsERROR_RANGE, "Invalid angle");
+                            return FALSE;
+                        }
+
+    }
+
+    return TRUE;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp
new file mode 100644
index 0000000000..66c2701134
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmshalf.cpp
@@ -0,0 +1,535 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+//
+#include "lcms2_internal.h"
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// This code is inspired in the paper "Fast Half Float Conversions"
+// by Jeroen van der Zijp
+
+static cmsUInt32Number Mantissa[2048] = {
+
+0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000,
+0x34c00000, 0x34e00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000,
+0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000,
+0x35900000, 0x35980000, 0x35a00000, 0x35a80000, 0x35b00000, 0x35b80000,
+0x35c00000, 0x35c80000, 0x35d00000, 0x35d80000, 0x35e00000, 0x35e80000,
+0x35f00000, 0x35f80000, 0x36000000, 0x36040000, 0x36080000, 0x360c0000,
+0x36100000, 0x36140000, 0x36180000, 0x361c0000, 0x36200000, 0x36240000,
+0x36280000, 0x362c0000, 0x36300000, 0x36340000, 0x36380000, 0x363c0000,
+0x36400000, 0x36440000, 0x36480000, 0x364c0000, 0x36500000, 0x36540000,
+0x36580000, 0x365c0000, 0x36600000, 0x36640000, 0x36680000, 0x366c0000,
+0x36700000, 0x36740000, 0x36780000, 0x367c0000, 0x36800000, 0x36820000,
+0x36840000, 0x36860000, 0x36880000, 0x368a0000, 0x368c0000, 0x368e0000,
+0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369a0000,
+0x369c0000, 0x369e0000, 0x36a00000, 0x36a20000, 0x36a40000, 0x36a60000,
+0x36a80000, 0x36aa0000, 0x36ac0000, 0x36ae0000, 0x36b00000, 0x36b20000,
+0x36b40000, 0x36b60000, 0x36b80000, 0x36ba0000, 0x36bc0000, 0x36be0000,
+0x36c00000, 0x36c20000, 0x36c40000, 0x36c60000, 0x36c80000, 0x36ca0000,
+0x36cc0000, 0x36ce0000, 0x36d00000, 0x36d20000, 0x36d40000, 0x36d60000,
+0x36d80000, 0x36da0000, 0x36dc0000, 0x36de0000, 0x36e00000, 0x36e20000,
+0x36e40000, 0x36e60000, 0x36e80000, 0x36ea0000, 0x36ec0000, 0x36ee0000,
+0x36f00000, 0x36f20000, 0x36f40000, 0x36f60000, 0x36f80000, 0x36fa0000,
+0x36fc0000, 0x36fe0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,
+0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000,
+0x370a0000, 0x370b0000, 0x370c0000, 0x370d0000, 0x370e0000, 0x370f0000,
+0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,
+0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371a0000, 0x371b0000,
+0x371c0000, 0x371d0000, 0x371e0000, 0x371f0000, 0x37200000, 0x37210000,
+0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
+0x37280000, 0x37290000, 0x372a0000, 0x372b0000, 0x372c0000, 0x372d0000,
+0x372e0000, 0x372f0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000,
+0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,
+0x373a0000, 0x373b0000, 0x373c0000, 0x373d0000, 0x373e0000, 0x373f0000,
+0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000,
+0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374a0000, 0x374b0000,
+0x374c0000, 0x374d0000, 0x374e0000, 0x374f0000, 0x37500000, 0x37510000,
+0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
+0x37580000, 0x37590000, 0x375a0000, 0x375b0000, 0x375c0000, 0x375d0000,
+0x375e0000, 0x375f0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000,
+0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000,
+0x376a0000, 0x376b0000, 0x376c0000, 0x376d0000, 0x376e0000, 0x376f0000,
+0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000,
+0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377a0000, 0x377b0000,
+0x377c0000, 0x377d0000, 0x377e0000, 0x377f0000, 0x37800000, 0x37808000,
+0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
+0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000,
+0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,
+0x378a0000, 0x378a8000, 0x378b0000, 0x378b8000, 0x378c0000, 0x378c8000,
+0x378d0000, 0x378d8000, 0x378e0000, 0x378e8000, 0x378f0000, 0x378f8000,
+0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,
+0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000,
+0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000,
+0x37990000, 0x37998000, 0x379a0000, 0x379a8000, 0x379b0000, 0x379b8000,
+0x379c0000, 0x379c8000, 0x379d0000, 0x379d8000, 0x379e0000, 0x379e8000,
+0x379f0000, 0x379f8000, 0x37a00000, 0x37a08000, 0x37a10000, 0x37a18000,
+0x37a20000, 0x37a28000, 0x37a30000, 0x37a38000, 0x37a40000, 0x37a48000,
+0x37a50000, 0x37a58000, 0x37a60000, 0x37a68000, 0x37a70000, 0x37a78000,
+0x37a80000, 0x37a88000, 0x37a90000, 0x37a98000, 0x37aa0000, 0x37aa8000,
+0x37ab0000, 0x37ab8000, 0x37ac0000, 0x37ac8000, 0x37ad0000, 0x37ad8000,
+0x37ae0000, 0x37ae8000, 0x37af0000, 0x37af8000, 0x37b00000, 0x37b08000,
+0x37b10000, 0x37b18000, 0x37b20000, 0x37b28000, 0x37b30000, 0x37b38000,
+0x37b40000, 0x37b48000, 0x37b50000, 0x37b58000, 0x37b60000, 0x37b68000,
+0x37b70000, 0x37b78000, 0x37b80000, 0x37b88000, 0x37b90000, 0x37b98000,
+0x37ba0000, 0x37ba8000, 0x37bb0000, 0x37bb8000, 0x37bc0000, 0x37bc8000,
+0x37bd0000, 0x37bd8000, 0x37be0000, 0x37be8000, 0x37bf0000, 0x37bf8000,
+0x37c00000, 0x37c08000, 0x37c10000, 0x37c18000, 0x37c20000, 0x37c28000,
+0x37c30000, 0x37c38000, 0x37c40000, 0x37c48000, 0x37c50000, 0x37c58000,
+0x37c60000, 0x37c68000, 0x37c70000, 0x37c78000, 0x37c80000, 0x37c88000,
+0x37c90000, 0x37c98000, 0x37ca0000, 0x37ca8000, 0x37cb0000, 0x37cb8000,
+0x37cc0000, 0x37cc8000, 0x37cd0000, 0x37cd8000, 0x37ce0000, 0x37ce8000,
+0x37cf0000, 0x37cf8000, 0x37d00000, 0x37d08000, 0x37d10000, 0x37d18000,
+0x37d20000, 0x37d28000, 0x37d30000, 0x37d38000, 0x37d40000, 0x37d48000,
+0x37d50000, 0x37d58000, 0x37d60000, 0x37d68000, 0x37d70000, 0x37d78000,
+0x37d80000, 0x37d88000, 0x37d90000, 0x37d98000, 0x37da0000, 0x37da8000,
+0x37db0000, 0x37db8000, 0x37dc0000, 0x37dc8000, 0x37dd0000, 0x37dd8000,
+0x37de0000, 0x37de8000, 0x37df0000, 0x37df8000, 0x37e00000, 0x37e08000,
+0x37e10000, 0x37e18000, 0x37e20000, 0x37e28000, 0x37e30000, 0x37e38000,
+0x37e40000, 0x37e48000, 0x37e50000, 0x37e58000, 0x37e60000, 0x37e68000,
+0x37e70000, 0x37e78000, 0x37e80000, 0x37e88000, 0x37e90000, 0x37e98000,
+0x37ea0000, 0x37ea8000, 0x37eb0000, 0x37eb8000, 0x37ec0000, 0x37ec8000,
+0x37ed0000, 0x37ed8000, 0x37ee0000, 0x37ee8000, 0x37ef0000, 0x37ef8000,
+0x37f00000, 0x37f08000, 0x37f10000, 0x37f18000, 0x37f20000, 0x37f28000,
+0x37f30000, 0x37f38000, 0x37f40000, 0x37f48000, 0x37f50000, 0x37f58000,
+0x37f60000, 0x37f68000, 0x37f70000, 0x37f78000, 0x37f80000, 0x37f88000,
+0x37f90000, 0x37f98000, 0x37fa0000, 0x37fa8000, 0x37fb0000, 0x37fb8000,
+0x37fc0000, 0x37fc8000, 0x37fd0000, 0x37fd8000, 0x37fe0000, 0x37fe8000,
+0x37ff0000, 0x37ff8000, 0x38000000, 0x38004000, 0x38008000, 0x3800c000,
+0x38010000, 0x38014000, 0x38018000, 0x3801c000, 0x38020000, 0x38024000,
+0x38028000, 0x3802c000, 0x38030000, 0x38034000, 0x38038000, 0x3803c000,
+0x38040000, 0x38044000, 0x38048000, 0x3804c000, 0x38050000, 0x38054000,
+0x38058000, 0x3805c000, 0x38060000, 0x38064000, 0x38068000, 0x3806c000,
+0x38070000, 0x38074000, 0x38078000, 0x3807c000, 0x38080000, 0x38084000,
+0x38088000, 0x3808c000, 0x38090000, 0x38094000, 0x38098000, 0x3809c000,
+0x380a0000, 0x380a4000, 0x380a8000, 0x380ac000, 0x380b0000, 0x380b4000,
+0x380b8000, 0x380bc000, 0x380c0000, 0x380c4000, 0x380c8000, 0x380cc000,
+0x380d0000, 0x380d4000, 0x380d8000, 0x380dc000, 0x380e0000, 0x380e4000,
+0x380e8000, 0x380ec000, 0x380f0000, 0x380f4000, 0x380f8000, 0x380fc000,
+0x38100000, 0x38104000, 0x38108000, 0x3810c000, 0x38110000, 0x38114000,
+0x38118000, 0x3811c000, 0x38120000, 0x38124000, 0x38128000, 0x3812c000,
+0x38130000, 0x38134000, 0x38138000, 0x3813c000, 0x38140000, 0x38144000,
+0x38148000, 0x3814c000, 0x38150000, 0x38154000, 0x38158000, 0x3815c000,
+0x38160000, 0x38164000, 0x38168000, 0x3816c000, 0x38170000, 0x38174000,
+0x38178000, 0x3817c000, 0x38180000, 0x38184000, 0x38188000, 0x3818c000,
+0x38190000, 0x38194000, 0x38198000, 0x3819c000, 0x381a0000, 0x381a4000,
+0x381a8000, 0x381ac000, 0x381b0000, 0x381b4000, 0x381b8000, 0x381bc000,
+0x381c0000, 0x381c4000, 0x381c8000, 0x381cc000, 0x381d0000, 0x381d4000,
+0x381d8000, 0x381dc000, 0x381e0000, 0x381e4000, 0x381e8000, 0x381ec000,
+0x381f0000, 0x381f4000, 0x381f8000, 0x381fc000, 0x38200000, 0x38204000,
+0x38208000, 0x3820c000, 0x38210000, 0x38214000, 0x38218000, 0x3821c000,
+0x38220000, 0x38224000, 0x38228000, 0x3822c000, 0x38230000, 0x38234000,
+0x38238000, 0x3823c000, 0x38240000, 0x38244000, 0x38248000, 0x3824c000,
+0x38250000, 0x38254000, 0x38258000, 0x3825c000, 0x38260000, 0x38264000,
+0x38268000, 0x3826c000, 0x38270000, 0x38274000, 0x38278000, 0x3827c000,
+0x38280000, 0x38284000, 0x38288000, 0x3828c000, 0x38290000, 0x38294000,
+0x38298000, 0x3829c000, 0x382a0000, 0x382a4000, 0x382a8000, 0x382ac000,
+0x382b0000, 0x382b4000, 0x382b8000, 0x382bc000, 0x382c0000, 0x382c4000,
+0x382c8000, 0x382cc000, 0x382d0000, 0x382d4000, 0x382d8000, 0x382dc000,
+0x382e0000, 0x382e4000, 0x382e8000, 0x382ec000, 0x382f0000, 0x382f4000,
+0x382f8000, 0x382fc000, 0x38300000, 0x38304000, 0x38308000, 0x3830c000,
+0x38310000, 0x38314000, 0x38318000, 0x3831c000, 0x38320000, 0x38324000,
+0x38328000, 0x3832c000, 0x38330000, 0x38334000, 0x38338000, 0x3833c000,
+0x38340000, 0x38344000, 0x38348000, 0x3834c000, 0x38350000, 0x38354000,
+0x38358000, 0x3835c000, 0x38360000, 0x38364000, 0x38368000, 0x3836c000,
+0x38370000, 0x38374000, 0x38378000, 0x3837c000, 0x38380000, 0x38384000,
+0x38388000, 0x3838c000, 0x38390000, 0x38394000, 0x38398000, 0x3839c000,
+0x383a0000, 0x383a4000, 0x383a8000, 0x383ac000, 0x383b0000, 0x383b4000,
+0x383b8000, 0x383bc000, 0x383c0000, 0x383c4000, 0x383c8000, 0x383cc000,
+0x383d0000, 0x383d4000, 0x383d8000, 0x383dc000, 0x383e0000, 0x383e4000,
+0x383e8000, 0x383ec000, 0x383f0000, 0x383f4000, 0x383f8000, 0x383fc000,
+0x38400000, 0x38404000, 0x38408000, 0x3840c000, 0x38410000, 0x38414000,
+0x38418000, 0x3841c000, 0x38420000, 0x38424000, 0x38428000, 0x3842c000,
+0x38430000, 0x38434000, 0x38438000, 0x3843c000, 0x38440000, 0x38444000,
+0x38448000, 0x3844c000, 0x38450000, 0x38454000, 0x38458000, 0x3845c000,
+0x38460000, 0x38464000, 0x38468000, 0x3846c000, 0x38470000, 0x38474000,
+0x38478000, 0x3847c000, 0x38480000, 0x38484000, 0x38488000, 0x3848c000,
+0x38490000, 0x38494000, 0x38498000, 0x3849c000, 0x384a0000, 0x384a4000,
+0x384a8000, 0x384ac000, 0x384b0000, 0x384b4000, 0x384b8000, 0x384bc000,
+0x384c0000, 0x384c4000, 0x384c8000, 0x384cc000, 0x384d0000, 0x384d4000,
+0x384d8000, 0x384dc000, 0x384e0000, 0x384e4000, 0x384e8000, 0x384ec000,
+0x384f0000, 0x384f4000, 0x384f8000, 0x384fc000, 0x38500000, 0x38504000,
+0x38508000, 0x3850c000, 0x38510000, 0x38514000, 0x38518000, 0x3851c000,
+0x38520000, 0x38524000, 0x38528000, 0x3852c000, 0x38530000, 0x38534000,
+0x38538000, 0x3853c000, 0x38540000, 0x38544000, 0x38548000, 0x3854c000,
+0x38550000, 0x38554000, 0x38558000, 0x3855c000, 0x38560000, 0x38564000,
+0x38568000, 0x3856c000, 0x38570000, 0x38574000, 0x38578000, 0x3857c000,
+0x38580000, 0x38584000, 0x38588000, 0x3858c000, 0x38590000, 0x38594000,
+0x38598000, 0x3859c000, 0x385a0000, 0x385a4000, 0x385a8000, 0x385ac000,
+0x385b0000, 0x385b4000, 0x385b8000, 0x385bc000, 0x385c0000, 0x385c4000,
+0x385c8000, 0x385cc000, 0x385d0000, 0x385d4000, 0x385d8000, 0x385dc000,
+0x385e0000, 0x385e4000, 0x385e8000, 0x385ec000, 0x385f0000, 0x385f4000,
+0x385f8000, 0x385fc000, 0x38600000, 0x38604000, 0x38608000, 0x3860c000,
+0x38610000, 0x38614000, 0x38618000, 0x3861c000, 0x38620000, 0x38624000,
+0x38628000, 0x3862c000, 0x38630000, 0x38634000, 0x38638000, 0x3863c000,
+0x38640000, 0x38644000, 0x38648000, 0x3864c000, 0x38650000, 0x38654000,
+0x38658000, 0x3865c000, 0x38660000, 0x38664000, 0x38668000, 0x3866c000,
+0x38670000, 0x38674000, 0x38678000, 0x3867c000, 0x38680000, 0x38684000,
+0x38688000, 0x3868c000, 0x38690000, 0x38694000, 0x38698000, 0x3869c000,
+0x386a0000, 0x386a4000, 0x386a8000, 0x386ac000, 0x386b0000, 0x386b4000,
+0x386b8000, 0x386bc000, 0x386c0000, 0x386c4000, 0x386c8000, 0x386cc000,
+0x386d0000, 0x386d4000, 0x386d8000, 0x386dc000, 0x386e0000, 0x386e4000,
+0x386e8000, 0x386ec000, 0x386f0000, 0x386f4000, 0x386f8000, 0x386fc000,
+0x38700000, 0x38704000, 0x38708000, 0x3870c000, 0x38710000, 0x38714000,
+0x38718000, 0x3871c000, 0x38720000, 0x38724000, 0x38728000, 0x3872c000,
+0x38730000, 0x38734000, 0x38738000, 0x3873c000, 0x38740000, 0x38744000,
+0x38748000, 0x3874c000, 0x38750000, 0x38754000, 0x38758000, 0x3875c000,
+0x38760000, 0x38764000, 0x38768000, 0x3876c000, 0x38770000, 0x38774000,
+0x38778000, 0x3877c000, 0x38780000, 0x38784000, 0x38788000, 0x3878c000,
+0x38790000, 0x38794000, 0x38798000, 0x3879c000, 0x387a0000, 0x387a4000,
+0x387a8000, 0x387ac000, 0x387b0000, 0x387b4000, 0x387b8000, 0x387bc000,
+0x387c0000, 0x387c4000, 0x387c8000, 0x387cc000, 0x387d0000, 0x387d4000,
+0x387d8000, 0x387dc000, 0x387e0000, 0x387e4000, 0x387e8000, 0x387ec000,
+0x387f0000, 0x387f4000, 0x387f8000, 0x387fc000, 0x38000000, 0x38002000,
+0x38004000, 0x38006000, 0x38008000, 0x3800a000, 0x3800c000, 0x3800e000,
+0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801a000,
+0x3801c000, 0x3801e000, 0x38020000, 0x38022000, 0x38024000, 0x38026000,
+0x38028000, 0x3802a000, 0x3802c000, 0x3802e000, 0x38030000, 0x38032000,
+0x38034000, 0x38036000, 0x38038000, 0x3803a000, 0x3803c000, 0x3803e000,
+0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804a000,
+0x3804c000, 0x3804e000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,
+0x38058000, 0x3805a000, 0x3805c000, 0x3805e000, 0x38060000, 0x38062000,
+0x38064000, 0x38066000, 0x38068000, 0x3806a000, 0x3806c000, 0x3806e000,
+0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807a000,
+0x3807c000, 0x3807e000, 0x38080000, 0x38082000, 0x38084000, 0x38086000,
+0x38088000, 0x3808a000, 0x3808c000, 0x3808e000, 0x38090000, 0x38092000,
+0x38094000, 0x38096000, 0x38098000, 0x3809a000, 0x3809c000, 0x3809e000,
+0x380a0000, 0x380a2000, 0x380a4000, 0x380a6000, 0x380a8000, 0x380aa000,
+0x380ac000, 0x380ae000, 0x380b0000, 0x380b2000, 0x380b4000, 0x380b6000,
+0x380b8000, 0x380ba000, 0x380bc000, 0x380be000, 0x380c0000, 0x380c2000,
+0x380c4000, 0x380c6000, 0x380c8000, 0x380ca000, 0x380cc000, 0x380ce000,
+0x380d0000, 0x380d2000, 0x380d4000, 0x380d6000, 0x380d8000, 0x380da000,
+0x380dc000, 0x380de000, 0x380e0000, 0x380e2000, 0x380e4000, 0x380e6000,
+0x380e8000, 0x380ea000, 0x380ec000, 0x380ee000, 0x380f0000, 0x380f2000,
+0x380f4000, 0x380f6000, 0x380f8000, 0x380fa000, 0x380fc000, 0x380fe000,
+0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810a000,
+0x3810c000, 0x3810e000, 0x38110000, 0x38112000, 0x38114000, 0x38116000,
+0x38118000, 0x3811a000, 0x3811c000, 0x3811e000, 0x38120000, 0x38122000,
+0x38124000, 0x38126000, 0x38128000, 0x3812a000, 0x3812c000, 0x3812e000,
+0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813a000,
+0x3813c000, 0x3813e000, 0x38140000, 0x38142000, 0x38144000, 0x38146000,
+0x38148000, 0x3814a000, 0x3814c000, 0x3814e000, 0x38150000, 0x38152000,
+0x38154000, 0x38156000, 0x38158000, 0x3815a000, 0x3815c000, 0x3815e000,
+0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816a000,
+0x3816c000, 0x3816e000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,
+0x38178000, 0x3817a000, 0x3817c000, 0x3817e000, 0x38180000, 0x38182000,
+0x38184000, 0x38186000, 0x38188000, 0x3818a000, 0x3818c000, 0x3818e000,
+0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819a000,
+0x3819c000, 0x3819e000, 0x381a0000, 0x381a2000, 0x381a4000, 0x381a6000,
+0x381a8000, 0x381aa000, 0x381ac000, 0x381ae000, 0x381b0000, 0x381b2000,
+0x381b4000, 0x381b6000, 0x381b8000, 0x381ba000, 0x381bc000, 0x381be000,
+0x381c0000, 0x381c2000, 0x381c4000, 0x381c6000, 0x381c8000, 0x381ca000,
+0x381cc000, 0x381ce000, 0x381d0000, 0x381d2000, 0x381d4000, 0x381d6000,
+0x381d8000, 0x381da000, 0x381dc000, 0x381de000, 0x381e0000, 0x381e2000,
+0x381e4000, 0x381e6000, 0x381e8000, 0x381ea000, 0x381ec000, 0x381ee000,
+0x381f0000, 0x381f2000, 0x381f4000, 0x381f6000, 0x381f8000, 0x381fa000,
+0x381fc000, 0x381fe000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,
+0x38208000, 0x3820a000, 0x3820c000, 0x3820e000, 0x38210000, 0x38212000,
+0x38214000, 0x38216000, 0x38218000, 0x3821a000, 0x3821c000, 0x3821e000,
+0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822a000,
+0x3822c000, 0x3822e000, 0x38230000, 0x38232000, 0x38234000, 0x38236000,
+0x38238000, 0x3823a000, 0x3823c000, 0x3823e000, 0x38240000, 0x38242000,
+0x38244000, 0x38246000, 0x38248000, 0x3824a000, 0x3824c000, 0x3824e000,
+0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825a000,
+0x3825c000, 0x3825e000, 0x38260000, 0x38262000, 0x38264000, 0x38266000,
+0x38268000, 0x3826a000, 0x3826c000, 0x3826e000, 0x38270000, 0x38272000,
+0x38274000, 0x38276000, 0x38278000, 0x3827a000, 0x3827c000, 0x3827e000,
+0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828a000,
+0x3828c000, 0x3828e000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,
+0x38298000, 0x3829a000, 0x3829c000, 0x3829e000, 0x382a0000, 0x382a2000,
+0x382a4000, 0x382a6000, 0x382a8000, 0x382aa000, 0x382ac000, 0x382ae000,
+0x382b0000, 0x382b2000, 0x382b4000, 0x382b6000, 0x382b8000, 0x382ba000,
+0x382bc000, 0x382be000, 0x382c0000, 0x382c2000, 0x382c4000, 0x382c6000,
+0x382c8000, 0x382ca000, 0x382cc000, 0x382ce000, 0x382d0000, 0x382d2000,
+0x382d4000, 0x382d6000, 0x382d8000, 0x382da000, 0x382dc000, 0x382de000,
+0x382e0000, 0x382e2000, 0x382e4000, 0x382e6000, 0x382e8000, 0x382ea000,
+0x382ec000, 0x382ee000, 0x382f0000, 0x382f2000, 0x382f4000, 0x382f6000,
+0x382f8000, 0x382fa000, 0x382fc000, 0x382fe000, 0x38300000, 0x38302000,
+0x38304000, 0x38306000, 0x38308000, 0x3830a000, 0x3830c000, 0x3830e000,
+0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831a000,
+0x3831c000, 0x3831e000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,
+0x38328000, 0x3832a000, 0x3832c000, 0x3832e000, 0x38330000, 0x38332000,
+0x38334000, 0x38336000, 0x38338000, 0x3833a000, 0x3833c000, 0x3833e000,
+0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834a000,
+0x3834c000, 0x3834e000, 0x38350000, 0x38352000, 0x38354000, 0x38356000,
+0x38358000, 0x3835a000, 0x3835c000, 0x3835e000, 0x38360000, 0x38362000,
+0x38364000, 0x38366000, 0x38368000, 0x3836a000, 0x3836c000, 0x3836e000,
+0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837a000,
+0x3837c000, 0x3837e000, 0x38380000, 0x38382000, 0x38384000, 0x38386000,
+0x38388000, 0x3838a000, 0x3838c000, 0x3838e000, 0x38390000, 0x38392000,
+0x38394000, 0x38396000, 0x38398000, 0x3839a000, 0x3839c000, 0x3839e000,
+0x383a0000, 0x383a2000, 0x383a4000, 0x383a6000, 0x383a8000, 0x383aa000,
+0x383ac000, 0x383ae000, 0x383b0000, 0x383b2000, 0x383b4000, 0x383b6000,
+0x383b8000, 0x383ba000, 0x383bc000, 0x383be000, 0x383c0000, 0x383c2000,
+0x383c4000, 0x383c6000, 0x383c8000, 0x383ca000, 0x383cc000, 0x383ce000,
+0x383d0000, 0x383d2000, 0x383d4000, 0x383d6000, 0x383d8000, 0x383da000,
+0x383dc000, 0x383de000, 0x383e0000, 0x383e2000, 0x383e4000, 0x383e6000,
+0x383e8000, 0x383ea000, 0x383ec000, 0x383ee000, 0x383f0000, 0x383f2000,
+0x383f4000, 0x383f6000, 0x383f8000, 0x383fa000, 0x383fc000, 0x383fe000,
+0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840a000,
+0x3840c000, 0x3840e000, 0x38410000, 0x38412000, 0x38414000, 0x38416000,
+0x38418000, 0x3841a000, 0x3841c000, 0x3841e000, 0x38420000, 0x38422000,
+0x38424000, 0x38426000, 0x38428000, 0x3842a000, 0x3842c000, 0x3842e000,
+0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843a000,
+0x3843c000, 0x3843e000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,
+0x38448000, 0x3844a000, 0x3844c000, 0x3844e000, 0x38450000, 0x38452000,
+0x38454000, 0x38456000, 0x38458000, 0x3845a000, 0x3845c000, 0x3845e000,
+0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846a000,
+0x3846c000, 0x3846e000, 0x38470000, 0x38472000, 0x38474000, 0x38476000,
+0x38478000, 0x3847a000, 0x3847c000, 0x3847e000, 0x38480000, 0x38482000,
+0x38484000, 0x38486000, 0x38488000, 0x3848a000, 0x3848c000, 0x3848e000,
+0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849a000,
+0x3849c000, 0x3849e000, 0x384a0000, 0x384a2000, 0x384a4000, 0x384a6000,
+0x384a8000, 0x384aa000, 0x384ac000, 0x384ae000, 0x384b0000, 0x384b2000,
+0x384b4000, 0x384b6000, 0x384b8000, 0x384ba000, 0x384bc000, 0x384be000,
+0x384c0000, 0x384c2000, 0x384c4000, 0x384c6000, 0x384c8000, 0x384ca000,
+0x384cc000, 0x384ce000, 0x384d0000, 0x384d2000, 0x384d4000, 0x384d6000,
+0x384d8000, 0x384da000, 0x384dc000, 0x384de000, 0x384e0000, 0x384e2000,
+0x384e4000, 0x384e6000, 0x384e8000, 0x384ea000, 0x384ec000, 0x384ee000,
+0x384f0000, 0x384f2000, 0x384f4000, 0x384f6000, 0x384f8000, 0x384fa000,
+0x384fc000, 0x384fe000, 0x38500000, 0x38502000, 0x38504000, 0x38506000,
+0x38508000, 0x3850a000, 0x3850c000, 0x3850e000, 0x38510000, 0x38512000,
+0x38514000, 0x38516000, 0x38518000, 0x3851a000, 0x3851c000, 0x3851e000,
+0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852a000,
+0x3852c000, 0x3852e000, 0x38530000, 0x38532000, 0x38534000, 0x38536000,
+0x38538000, 0x3853a000, 0x3853c000, 0x3853e000, 0x38540000, 0x38542000,
+0x38544000, 0x38546000, 0x38548000, 0x3854a000, 0x3854c000, 0x3854e000,
+0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855a000,
+0x3855c000, 0x3855e000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,
+0x38568000, 0x3856a000, 0x3856c000, 0x3856e000, 0x38570000, 0x38572000,
+0x38574000, 0x38576000, 0x38578000, 0x3857a000, 0x3857c000, 0x3857e000,
+0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858a000,
+0x3858c000, 0x3858e000, 0x38590000, 0x38592000, 0x38594000, 0x38596000,
+0x38598000, 0x3859a000, 0x3859c000, 0x3859e000, 0x385a0000, 0x385a2000,
+0x385a4000, 0x385a6000, 0x385a8000, 0x385aa000, 0x385ac000, 0x385ae000,
+0x385b0000, 0x385b2000, 0x385b4000, 0x385b6000, 0x385b8000, 0x385ba000,
+0x385bc000, 0x385be000, 0x385c0000, 0x385c2000, 0x385c4000, 0x385c6000,
+0x385c8000, 0x385ca000, 0x385cc000, 0x385ce000, 0x385d0000, 0x385d2000,
+0x385d4000, 0x385d6000, 0x385d8000, 0x385da000, 0x385dc000, 0x385de000,
+0x385e0000, 0x385e2000, 0x385e4000, 0x385e6000, 0x385e8000, 0x385ea000,
+0x385ec000, 0x385ee000, 0x385f0000, 0x385f2000, 0x385f4000, 0x385f6000,
+0x385f8000, 0x385fa000, 0x385fc000, 0x385fe000, 0x38600000, 0x38602000,
+0x38604000, 0x38606000, 0x38608000, 0x3860a000, 0x3860c000, 0x3860e000,
+0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861a000,
+0x3861c000, 0x3861e000, 0x38620000, 0x38622000, 0x38624000, 0x38626000,
+0x38628000, 0x3862a000, 0x3862c000, 0x3862e000, 0x38630000, 0x38632000,
+0x38634000, 0x38636000, 0x38638000, 0x3863a000, 0x3863c000, 0x3863e000,
+0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864a000,
+0x3864c000, 0x3864e000, 0x38650000, 0x38652000, 0x38654000, 0x38656000,
+0x38658000, 0x3865a000, 0x3865c000, 0x3865e000, 0x38660000, 0x38662000,
+0x38664000, 0x38666000, 0x38668000, 0x3866a000, 0x3866c000, 0x3866e000,
+0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867a000,
+0x3867c000, 0x3867e000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,
+0x38688000, 0x3868a000, 0x3868c000, 0x3868e000, 0x38690000, 0x38692000,
+0x38694000, 0x38696000, 0x38698000, 0x3869a000, 0x3869c000, 0x3869e000,
+0x386a0000, 0x386a2000, 0x386a4000, 0x386a6000, 0x386a8000, 0x386aa000,
+0x386ac000, 0x386ae000, 0x386b0000, 0x386b2000, 0x386b4000, 0x386b6000,
+0x386b8000, 0x386ba000, 0x386bc000, 0x386be000, 0x386c0000, 0x386c2000,
+0x386c4000, 0x386c6000, 0x386c8000, 0x386ca000, 0x386cc000, 0x386ce000,
+0x386d0000, 0x386d2000, 0x386d4000, 0x386d6000, 0x386d8000, 0x386da000,
+0x386dc000, 0x386de000, 0x386e0000, 0x386e2000, 0x386e4000, 0x386e6000,
+0x386e8000, 0x386ea000, 0x386ec000, 0x386ee000, 0x386f0000, 0x386f2000,
+0x386f4000, 0x386f6000, 0x386f8000, 0x386fa000, 0x386fc000, 0x386fe000,
+0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870a000,
+0x3870c000, 0x3870e000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,
+0x38718000, 0x3871a000, 0x3871c000, 0x3871e000, 0x38720000, 0x38722000,
+0x38724000, 0x38726000, 0x38728000, 0x3872a000, 0x3872c000, 0x3872e000,
+0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873a000,
+0x3873c000, 0x3873e000, 0x38740000, 0x38742000, 0x38744000, 0x38746000,
+0x38748000, 0x3874a000, 0x3874c000, 0x3874e000, 0x38750000, 0x38752000,
+0x38754000, 0x38756000, 0x38758000, 0x3875a000, 0x3875c000, 0x3875e000,
+0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876a000,
+0x3876c000, 0x3876e000, 0x38770000, 0x38772000, 0x38774000, 0x38776000,
+0x38778000, 0x3877a000, 0x3877c000, 0x3877e000, 0x38780000, 0x38782000,
+0x38784000, 0x38786000, 0x38788000, 0x3878a000, 0x3878c000, 0x3878e000,
+0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879a000,
+0x3879c000, 0x3879e000, 0x387a0000, 0x387a2000, 0x387a4000, 0x387a6000,
+0x387a8000, 0x387aa000, 0x387ac000, 0x387ae000, 0x387b0000, 0x387b2000,
+0x387b4000, 0x387b6000, 0x387b8000, 0x387ba000, 0x387bc000, 0x387be000,
+0x387c0000, 0x387c2000, 0x387c4000, 0x387c6000, 0x387c8000, 0x387ca000,
+0x387cc000, 0x387ce000, 0x387d0000, 0x387d2000, 0x387d4000, 0x387d6000,
+0x387d8000, 0x387da000, 0x387dc000, 0x387de000, 0x387e0000, 0x387e2000,
+0x387e4000, 0x387e6000, 0x387e8000, 0x387ea000, 0x387ec000, 0x387ee000,
+0x387f0000, 0x387f2000, 0x387f4000, 0x387f6000, 0x387f8000, 0x387fa000,
+0x387fc000, 0x387fe000
+};
+
+static cmsUInt16Number Offset[64] = {
+0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0000, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400
+};
+
+static cmsUInt32Number Exponent[64] = {
+0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,
+0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000,
+0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000,
+0x09000000, 0x09800000, 0x0a000000, 0x0a800000, 0x0b000000, 0x0b800000,
+0x0c000000, 0x0c800000, 0x0d000000, 0x0d800000, 0x0e000000, 0x0e800000,
+0x0f000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000,
+0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,
+0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
+0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8a000000, 0x8a800000,
+0x8b000000, 0x8b800000, 0x8c000000, 0x8c800000, 0x8d000000, 0x8d800000,
+0x8e000000, 0x8e800000, 0x8f000000, 0xc7800000
+};
+
+static cmsUInt16Number Base[512] = {
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
+0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00,
+0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400,
+0x4800, 0x4c00, 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00,
+0x7000, 0x7400, 0x7800, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,
+0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400,
+0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
+0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400,
+0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00
+};
+
+static cmsUInt8Number  Shift[512] = {
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
+0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
+0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x0d
+};
+
+cmsFloat32Number CMSEXPORT _cmsHalf2Float(cmsUInt16Number h)
+{
+    union {
+        cmsFloat32Number flt;
+        cmsUInt32Number  num;
+    } out;
+
+    int n = h >> 10;
+
+    out.num = Mantissa[  (h & 0x3ff) + Offset[ n ] ] + Exponent[ n ];
+    return out.flt;
+}
+
+cmsUInt16Number CMSEXPORT _cmsFloat2Half(cmsFloat32Number flt)
+{
+    union {
+        cmsFloat32Number flt;
+        cmsUInt32Number  num;
+    } in;
+
+    cmsUInt32Number n, j;
+
+    in.flt = flt;
+    n = in.num;
+    j = (n >> 23) & 0x1ff;
+
+    return (cmsUInt16Number) ((cmsUInt32Number) Base[ j ] + (( n & 0x007fffff) >> Shift[ j ]));
+}
+
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp
new file mode 100644
index 0000000000..31c5aa46ef
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsintrp.cpp
@@ -0,0 +1,1521 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// This module incorporates several interpolation routines, for 1 to 8 channels on input and
+// up to 65535 channels on output. The user may change those by using the interpolation plug-in
+
+// Some people may want to compile as C++ with all warnings on, in this case make compiler silent
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1400)
+#       pragma warning( disable : 4365 )
+#    endif
+#endif
+
+// Interpolation routines by default
+static cmsInterpFunction DefaultInterpolatorsFactory(cmsUInt32Number nInputChannels, cmsUInt32Number nOutputChannels, cmsUInt32Number dwFlags);
+
+// This is the default factory
+_cmsInterpPluginChunkType _cmsInterpPluginChunk = { NULL };
+
+// The interpolation plug-in memory chunk allocator/dup
+void _cmsAllocInterpPluginChunk(struct _cmsContext_struct* ctx, const struct _cmsContext_struct* src)
+{
+    void* from;
+
+    _cmsAssert(ctx != NULL);
+
+    if (src != NULL) {
+        from = src ->chunks[InterpPlugin];       
+    }
+    else { 
+        static _cmsInterpPluginChunkType InterpPluginChunk = { NULL };
+
+        from = &InterpPluginChunk;
+    }
+
+    _cmsAssert(from != NULL);
+    ctx ->chunks[InterpPlugin] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsInterpPluginChunkType));
+}
+
+
+// Main plug-in entry
+cmsBool  _cmsRegisterInterpPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginInterpolation* Plugin = (cmsPluginInterpolation*) Data;
+    _cmsInterpPluginChunkType* ptr = (_cmsInterpPluginChunkType*) _cmsContextGetClientChunk(ContextID, InterpPlugin);
+
+    if (Data == NULL) {
+
+        ptr ->Interpolators = NULL;
+        return TRUE;
+    }
+
+    // Set replacement functions
+    ptr ->Interpolators = Plugin ->InterpolatorsFactory;
+    return TRUE;
+}
+
+
+// Set the interpolation method
+cmsBool _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p)
+{      
+    _cmsInterpPluginChunkType* ptr = (_cmsInterpPluginChunkType*) _cmsContextGetClientChunk(ContextID, InterpPlugin);
+
+    p ->Interpolation.Lerp16 = NULL;
+
+   // Invoke factory, possibly in the Plug-in
+    if (ptr ->Interpolators != NULL)
+        p ->Interpolation = ptr->Interpolators(p -> nInputs, p ->nOutputs, p ->dwFlags);
+    
+    // If unsupported by the plug-in, go for the LittleCMS default.
+    // If happens only if an extern plug-in is being used
+    if (p ->Interpolation.Lerp16 == NULL)
+        p ->Interpolation = DefaultInterpolatorsFactory(p ->nInputs, p ->nOutputs, p ->dwFlags);
+
+    // Check for valid interpolator (we just check one member of the union)
+    if (p ->Interpolation.Lerp16 == NULL) {
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+// This function precalculates as many parameters as possible to speed up the interpolation.
+cmsInterpParams* _cmsComputeInterpParamsEx(cmsContext ContextID,
+                                           const cmsUInt32Number nSamples[],
+                                           cmsUInt32Number InputChan, cmsUInt32Number OutputChan,
+                                           const void *Table,
+                                           cmsUInt32Number dwFlags)
+{
+    cmsInterpParams* p;
+    cmsUInt32Number i;
+
+    // Check for maximum inputs
+    if (InputChan > MAX_INPUT_DIMENSIONS) {
+             cmsSignalError(ContextID, cmsERROR_RANGE, "Too many input channels (%d channels, max=%d)", InputChan, MAX_INPUT_DIMENSIONS);
+            return NULL;
+    }
+
+    // Creates an empty object
+    p = (cmsInterpParams*) _cmsMallocZero(ContextID, sizeof(cmsInterpParams));
+    if (p == NULL) return NULL;
+
+    // Keep original parameters
+    p -> dwFlags  = dwFlags;
+    p -> nInputs  = InputChan;
+    p -> nOutputs = OutputChan;
+    p ->Table     = Table;
+    p ->ContextID  = ContextID;
+
+    // Fill samples per input direction and domain (which is number of nodes minus one)
+    for (i=0; i < InputChan; i++) {
+
+        p -> nSamples[i] = nSamples[i];
+        p -> Domain[i]   = nSamples[i] - 1;
+    }
+
+    // Compute factors to apply to each component to index the grid array
+    p -> opta[0] = p -> nOutputs;
+    for (i=1; i < InputChan; i++)
+        p ->opta[i] = p ->opta[i-1] * nSamples[InputChan-i];
+
+
+    if (!_cmsSetInterpolationRoutine(ContextID, p)) {
+         cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported interpolation (%d->%d channels)", InputChan, OutputChan);
+        _cmsFree(ContextID, p);
+        return NULL;
+    }
+
+    // All seems ok
+    return p;
+}
+
+
+// This one is a wrapper on the anterior, but assuming all directions have same number of nodes
+cmsInterpParams* CMSEXPORT _cmsComputeInterpParams(cmsContext ContextID, cmsUInt32Number nSamples, 
+                                                   cmsUInt32Number InputChan, cmsUInt32Number OutputChan, const void* Table, cmsUInt32Number dwFlags)
+{
+    int i;
+    cmsUInt32Number Samples[MAX_INPUT_DIMENSIONS];
+
+    // Fill the auxiliary array
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Samples[i] = nSamples;
+
+    // Call the extended function
+    return _cmsComputeInterpParamsEx(ContextID, Samples, InputChan, OutputChan, Table, dwFlags);
+}
+
+
+// Free all associated memory
+void CMSEXPORT _cmsFreeInterpParams(cmsInterpParams* p)
+{
+    if (p != NULL) _cmsFree(p ->ContextID, p);
+}
+
+
+// Inline fixed point interpolation
+cmsINLINE CMS_NO_SANITIZE cmsUInt16Number LinearInterp(cmsS15Fixed16Number a, cmsS15Fixed16Number l, cmsS15Fixed16Number h)
+{
+    cmsUInt32Number dif = (cmsUInt32Number) (h - l) * a + 0x8000;
+    dif = (dif >> 16) + l;
+    return (cmsUInt16Number) (dif);
+}
+
+
+//  Linear interpolation (Fixed-point optimized)
+static
+void LinLerp1D(CMSREGISTER const cmsUInt16Number Value[],
+               CMSREGISTER cmsUInt16Number Output[],
+               CMSREGISTER const cmsInterpParams* p)
+{
+    cmsUInt16Number y1, y0;
+    int cell0, rest;
+    int val3;
+    const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+
+    // if last value...
+    if (Value[0] == 0xffff) {
+
+        Output[0] = LutTable[p -> Domain[0]];      
+    }
+    else
+    {
+        val3 = p->Domain[0] * Value[0];
+        val3 = _cmsToFixedDomain(val3);    // To fixed 15.16
+
+        cell0 = FIXED_TO_INT(val3);             // Cell is 16 MSB bits
+        rest = FIXED_REST_TO_INT(val3);        // Rest is 16 LSB bits
+
+        y0 = LutTable[cell0];
+        y1 = LutTable[cell0 + 1];
+
+        Output[0] = LinearInterp(rest, y0, y1);
+    }
+}
+
+// To prevent out of bounds indexing
+cmsINLINE cmsFloat32Number fclamp(cmsFloat32Number v) 
+{
+    return ((v < 1.0e-9f) || isnan(v)) ? 0.0f : (v > 1.0f ? 1.0f : v);
+}
+
+// Floating-point version of 1D interpolation
+static
+void LinLerp1Dfloat(const cmsFloat32Number Value[],
+                    cmsFloat32Number Output[],
+                    const cmsInterpParams* p)
+{
+       cmsFloat32Number y1, y0;
+       cmsFloat32Number val2, rest;
+       int cell0, cell1;
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+
+       val2 = fclamp(Value[0]);
+
+       // if last value...
+       if (val2 == 1.0) {
+           Output[0] = LutTable[p -> Domain[0]];          
+       }
+       else
+       {
+           val2 *= p->Domain[0];
+
+           cell0 = (int)floor(val2);
+           cell1 = (int)ceil(val2);
+
+           // Rest is 16 LSB bits
+           rest = val2 - cell0;
+
+           y0 = LutTable[cell0];
+           y1 = LutTable[cell1];
+
+           Output[0] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+
+// Eval gray LUT having only one input channel
+static CMS_NO_SANITIZE
+void Eval1Input(CMSREGISTER const cmsUInt16Number Input[],
+                CMSREGISTER cmsUInt16Number Output[],
+                CMSREGISTER const cmsInterpParams* p16)
+{
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, k1, rk, K0, K1;
+       int v;
+       cmsUInt32Number OutChan;
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+
+       v = Input[0] * p16 -> Domain[0];
+       fk = _cmsToFixedDomain(v);
+
+       k0 = FIXED_TO_INT(fk);
+       rk = (cmsUInt16Number) FIXED_REST_TO_INT(fk);
+
+       k1 = k0 + (Input[0] != 0xFFFFU ? 1 : 0);
+
+       K0 = p16 -> opta[0] * k0;
+       K1 = p16 -> opta[0] * k1;
+
+       for (OutChan=0; OutChan < p16->nOutputs; OutChan++) {
+
+           Output[OutChan] = LinearInterp(rk, LutTable[K0+OutChan], LutTable[K1+OutChan]);
+       }
+}
+
+
+
+// Eval gray LUT having only one input channel
+static
+void Eval1InputFloat(const cmsFloat32Number Value[],
+                     cmsFloat32Number Output[],
+                     const cmsInterpParams* p)
+{
+    cmsFloat32Number y1, y0;
+    cmsFloat32Number val2, rest;
+    int cell0, cell1;
+    cmsUInt32Number OutChan;
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+
+    val2 = fclamp(Value[0]);
+
+    // if last value...
+    if (val2 == 1.0) {
+
+        y0 = LutTable[p->Domain[0]];
+
+        for (OutChan = 0; OutChan < p->nOutputs; OutChan++) {
+            Output[OutChan] = y0;
+        }        
+    }
+    else
+    {
+        val2 *= p->Domain[0];
+
+        cell0 = (int)floor(val2);
+        cell1 = (int)ceil(val2);
+
+        // Rest is 16 LSB bits
+        rest = val2 - cell0;
+
+        cell0 *= p->opta[0];
+        cell1 *= p->opta[0];
+
+        for (OutChan = 0; OutChan < p->nOutputs; OutChan++) {
+
+            y0 = LutTable[cell0 + OutChan];
+            y1 = LutTable[cell1 + OutChan];
+
+            Output[OutChan] = y0 + (y1 - y0) * rest;
+        }
+    }
+}
+
+// Bilinear interpolation (16 bits) - cmsFloat32Number version
+static
+void BilinearInterpFloat(const cmsFloat32Number Input[],
+                         cmsFloat32Number Output[],
+                         const cmsInterpParams* p)
+
+{
+#   define LERP(a,l,h)    (cmsFloat32Number) ((l)+(((h)-(l))*(a)))
+#   define DENS(i,j)      (LutTable[(i)+(j)+OutChan])
+
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+    cmsFloat32Number      px, py;
+    int        x0, y0,
+               X0, Y0, X1, Y1;
+    int        TotalOut, OutChan;
+    cmsFloat32Number      fx, fy,
+        d00, d01, d10, d11,
+        dx0, dx1,
+        dxy;
+
+    TotalOut   = p -> nOutputs;
+    px = fclamp(Input[0]) * p->Domain[0];
+    py = fclamp(Input[1]) * p->Domain[1];
+
+    x0 = (int) _cmsQuickFloor(px); fx = px - (cmsFloat32Number) x0;
+    y0 = (int) _cmsQuickFloor(py); fy = py - (cmsFloat32Number) y0;
+
+    X0 = p -> opta[1] * x0;
+    X1 = X0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[1]);
+
+    Y0 = p -> opta[0] * y0;
+    Y1 = Y0 + (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d00 = DENS(X0, Y0);
+        d01 = DENS(X0, Y1);
+        d10 = DENS(X1, Y0);
+        d11 = DENS(X1, Y1);
+
+        dx0 = LERP(fx, d00, d10);
+        dx1 = LERP(fx, d01, d11);
+
+        dxy = LERP(fy, dx0, dx1);
+
+        Output[OutChan] = dxy;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+// Bilinear interpolation (16 bits) - optimized version
+static CMS_NO_SANITIZE
+void BilinearInterp16(CMSREGISTER const cmsUInt16Number Input[],
+                      CMSREGISTER cmsUInt16Number Output[],
+                      CMSREGISTER const cmsInterpParams* p)
+
+{
+#define DENS(i,j) (LutTable[(i)+(j)+OutChan])
+#define LERP(a,l,h)     (cmsUInt16Number) (l + ROUND_FIXED_TO_INT(((h-l)*a)))
+
+           const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+           int        OutChan, TotalOut;
+           cmsS15Fixed16Number    fx, fy;
+  CMSREGISTER int        rx, ry;
+           int        x0, y0;
+  CMSREGISTER int        X0, X1, Y0, Y1;
+           int        d00, d01, d10, d11,
+                      dx0, dx1,
+                      dxy;
+
+    TotalOut   = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    x0  = FIXED_TO_INT(fx);
+    rx  = FIXED_REST_TO_INT(fx);    // Rest in 0..1.0 domain
+
+
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    y0  = FIXED_TO_INT(fy);
+    ry  = FIXED_REST_TO_INT(fy);
+
+
+    X0 = p -> opta[1] * x0;
+    X1 = X0 + (Input[0] == 0xFFFFU ? 0 : p->opta[1]);
+
+    Y0 = p -> opta[0] * y0;
+    Y1 = Y0 + (Input[1] == 0xFFFFU ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d00 = DENS(X0, Y0);
+        d01 = DENS(X0, Y1);
+        d10 = DENS(X1, Y0);
+        d11 = DENS(X1, Y1);
+
+        dx0 = LERP(rx, d00, d10);
+        dx1 = LERP(rx, d01, d11);
+
+        dxy = LERP(ry, dx0, dx1);
+
+        Output[OutChan] = (cmsUInt16Number) dxy;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+
+// Trilinear interpolation (16 bits) - cmsFloat32Number version
+static
+void TrilinearInterpFloat(const cmsFloat32Number Input[],
+                          cmsFloat32Number Output[],
+                          const cmsInterpParams* p)
+
+{
+#   define LERP(a,l,h)      (cmsFloat32Number) ((l)+(((h)-(l))*(a)))
+#   define DENS(i,j,k)      (LutTable[(i)+(j)+(k)+OutChan])
+
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+    cmsFloat32Number      px, py, pz;
+    int        x0, y0, z0,
+               X0, Y0, Z0, X1, Y1, Z1;
+    int        TotalOut, OutChan;
+    cmsFloat32Number      fx, fy, fz,
+        d000, d001, d010, d011,
+        d100, d101, d110, d111,
+        dx00, dx01, dx10, dx11,
+        dxy0, dxy1, dxyz;
+
+    TotalOut   = p -> nOutputs;
+
+    // We need some clipping here
+    px = fclamp(Input[0]) * p->Domain[0];
+    py = fclamp(Input[1]) * p->Domain[1];
+    pz = fclamp(Input[2]) * p->Domain[2];
+
+    x0 = (int) floor(px); fx = px - (cmsFloat32Number) x0;  // We need full floor funcionality here
+    y0 = (int) floor(py); fy = py - (cmsFloat32Number) y0;
+    z0 = (int) floor(pz); fz = pz - (cmsFloat32Number) z0;
+
+    X0 = p -> opta[2] * x0;
+    X1 = X0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = Y0 + (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = Z0 + (fclamp(Input[2]) >= 1.0 ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d000 = DENS(X0, Y0, Z0);
+        d001 = DENS(X0, Y0, Z1);
+        d010 = DENS(X0, Y1, Z0);
+        d011 = DENS(X0, Y1, Z1);
+
+        d100 = DENS(X1, Y0, Z0);
+        d101 = DENS(X1, Y0, Z1);
+        d110 = DENS(X1, Y1, Z0);
+        d111 = DENS(X1, Y1, Z1);
+
+
+        dx00 = LERP(fx, d000, d100);
+        dx01 = LERP(fx, d001, d101);
+        dx10 = LERP(fx, d010, d110);
+        dx11 = LERP(fx, d011, d111);
+
+        dxy0 = LERP(fy, dx00, dx10);
+        dxy1 = LERP(fy, dx01, dx11);
+
+        dxyz = LERP(fz, dxy0, dxy1);
+
+        Output[OutChan] = dxyz;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+// Trilinear interpolation (16 bits) - optimized version
+static CMS_NO_SANITIZE
+void TrilinearInterp16(CMSREGISTER const cmsUInt16Number Input[],
+                       CMSREGISTER cmsUInt16Number Output[],
+                       CMSREGISTER const cmsInterpParams* p)
+
+{
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+#define LERP(a,l,h)     (cmsUInt16Number) (l + ROUND_FIXED_TO_INT(((h-l)*a)))
+
+           const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+           int        OutChan, TotalOut;
+           cmsS15Fixed16Number    fx, fy, fz;
+  CMSREGISTER int        rx, ry, rz;
+           int        x0, y0, z0;
+  CMSREGISTER int        X0, X1, Y0, Y1, Z0, Z1;
+           int        d000, d001, d010, d011,
+                      d100, d101, d110, d111,
+                      dx00, dx01, dx10, dx11,
+                      dxy0, dxy1, dxyz;
+
+    TotalOut   = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    x0  = FIXED_TO_INT(fx);
+    rx  = FIXED_REST_TO_INT(fx);    // Rest in 0..1.0 domain
+
+
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    y0  = FIXED_TO_INT(fy);
+    ry  = FIXED_REST_TO_INT(fy);
+
+    fz = _cmsToFixedDomain((int) Input[2] * p -> Domain[2]);
+    z0 = FIXED_TO_INT(fz);
+    rz = FIXED_REST_TO_INT(fz);
+
+
+    X0 = p -> opta[2] * x0;
+    X1 = X0 + (Input[0] == 0xFFFFU ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = Y0 + (Input[1] == 0xFFFFU ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = Z0 + (Input[2] == 0xFFFFU ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d000 = DENS(X0, Y0, Z0);
+        d001 = DENS(X0, Y0, Z1);
+        d010 = DENS(X0, Y1, Z0);
+        d011 = DENS(X0, Y1, Z1);
+
+        d100 = DENS(X1, Y0, Z0);
+        d101 = DENS(X1, Y0, Z1);
+        d110 = DENS(X1, Y1, Z0);
+        d111 = DENS(X1, Y1, Z1);
+
+
+        dx00 = LERP(rx, d000, d100);
+        dx01 = LERP(rx, d001, d101);
+        dx10 = LERP(rx, d010, d110);
+        dx11 = LERP(rx, d011, d111);
+
+        dxy0 = LERP(ry, dx00, dx10);
+        dxy1 = LERP(ry, dx01, dx11);
+
+        dxyz = LERP(rz, dxy0, dxy1);
+
+        Output[OutChan] = (cmsUInt16Number) dxyz;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+
+// Tetrahedral interpolation, using Sakamoto algorithm.
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+static
+void TetrahedralInterpFloat(const cmsFloat32Number Input[],
+                            cmsFloat32Number Output[],
+                            const cmsInterpParams* p)
+{
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+    cmsFloat32Number     px, py, pz;
+    int        x0, y0, z0,
+               X0, Y0, Z0, X1, Y1, Z1;
+    cmsFloat32Number     rx, ry, rz;
+    cmsFloat32Number     c0, c1=0, c2=0, c3=0;
+    int                  OutChan, TotalOut;
+
+    TotalOut   = p -> nOutputs;
+
+    // We need some clipping here
+    px = fclamp(Input[0]) * p->Domain[0];
+    py = fclamp(Input[1]) * p->Domain[1];
+    pz = fclamp(Input[2]) * p->Domain[2];
+
+    x0 = (int) floor(px); rx = (px - (cmsFloat32Number) x0);  // We need full floor functionality here
+    y0 = (int) floor(py); ry = (py - (cmsFloat32Number) y0);
+    z0 = (int) floor(pz); rz = (pz - (cmsFloat32Number) z0);
+
+
+    X0 = p -> opta[2] * x0;
+    X1 = X0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = Y0 + (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = Z0 + (fclamp(Input[2]) >= 1.0 ? 0 : p->opta[0]);
+
+    for (OutChan=0; OutChan < TotalOut; OutChan++) {
+
+       // These are the 6 Tetrahedral
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz) {
+
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+        }
+        else
+            if (rx >= rz && rz >= ry) {
+
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+            }
+            else
+                if (rz >= rx && rx >= ry) {
+
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+
+                }
+                else
+                    if (ry >= rx && rx >= rz) {
+
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                    }
+                    else
+                        if (ry >= rz && rz >= rx) {
+
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                        }
+                        else
+                            if (rz >= ry && ry >= rx) {
+
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+
+                            }
+                            else  {
+                                c1 = c2 = c3 = 0;
+                            }
+
+       Output[OutChan] = c0 + c1 * rx + c2 * ry + c3 * rz;
+       }
+
+}
+
+#undef DENS
+
+
+
+
+static CMS_NO_SANITIZE
+void TetrahedralInterp16(CMSREGISTER const cmsUInt16Number Input[],
+                         CMSREGISTER cmsUInt16Number Output[],
+                         CMSREGISTER const cmsInterpParams* p)
+{
+    const cmsUInt16Number* LutTable = (cmsUInt16Number*) p -> Table;
+    cmsS15Fixed16Number fx, fy, fz;
+    cmsS15Fixed16Number rx, ry, rz;
+    int x0, y0, z0;
+    cmsS15Fixed16Number c0, c1, c2, c3, Rest;
+    cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
+    cmsUInt32Number TotalOut = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    fz = _cmsToFixedDomain((int) Input[2] * p -> Domain[2]);
+
+    x0 = FIXED_TO_INT(fx);
+    y0 = FIXED_TO_INT(fy);
+    z0 = FIXED_TO_INT(fz);
+
+    rx = FIXED_REST_TO_INT(fx);
+    ry = FIXED_REST_TO_INT(fy);
+    rz = FIXED_REST_TO_INT(fz);
+
+    X0 = p -> opta[2] * x0;
+    X1 = (Input[0] == 0xFFFFU ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = (Input[1] == 0xFFFFU ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = (Input[2] == 0xFFFFU ? 0 : p->opta[0]);
+
+    LutTable = &LutTable[X0+Y0+Z0];
+
+    // Output should be computed as x = ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest))
+    // which expands as: x = (Rest + ((Rest+0x7fff)/0xFFFF) + 0x8000)>>16
+    // This can be replaced by: t = Rest+0x8001, x = (t + (t>>16))>>16
+    // at the cost of being off by one at 7fff and 17ffe.
+
+    if (rx >= ry) {
+        if (ry >= rz) {
+            Y1 += X1;
+            Z1 += Y1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c3 -= c2;
+                c2 -= c1;
+                c1 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else if (rz >= rx) {
+            X1 += Z1;
+            Y1 += X1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c2 -= c1;
+                c1 -= c3;
+                c3 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else {
+            Z1 += X1;
+            Y1 += Z1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c2 -= c3;
+                c3 -= c1;
+                c1 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        }
+    } else {
+        if (rx >= rz) {
+            X1 += Y1;
+            Z1 += X1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c3 -= c1;
+                c1 -= c2;
+                c2 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else if (ry >= rz) {
+            Z1 += Y1;
+            X1 += Z1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c1 -= c3;
+                c3 -= c2;
+                c2 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else {
+            Y1 += Z1;
+            X1 += Y1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c1 -= c2;
+                c2 -= c3;
+                c3 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        }
+    }
+}
+
+
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+static CMS_NO_SANITIZE
+void Eval4Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                     CMSREGISTER cmsUInt16Number Output[],
+                     CMSREGISTER const cmsInterpParams* p16)
+{
+    const cmsUInt16Number* LutTable;
+    cmsS15Fixed16Number fk;
+    cmsS15Fixed16Number k0, rk;
+    int K0, K1;
+    cmsS15Fixed16Number    fx, fy, fz;
+    cmsS15Fixed16Number    rx, ry, rz;
+    int                    x0, y0, z0;
+    cmsS15Fixed16Number    X0, X1, Y0, Y1, Z0, Z1;
+    cmsUInt32Number i;
+    cmsS15Fixed16Number    c0, c1, c2, c3, Rest;
+    cmsUInt32Number        OutChan;
+    cmsUInt16Number        Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+
+
+    fk  = _cmsToFixedDomain((int) Input[0] * p16 -> Domain[0]);
+    fx  = _cmsToFixedDomain((int) Input[1] * p16 -> Domain[1]);
+    fy  = _cmsToFixedDomain((int) Input[2] * p16 -> Domain[2]);
+    fz  = _cmsToFixedDomain((int) Input[3] * p16 -> Domain[3]);
+
+    k0  = FIXED_TO_INT(fk);
+    x0  = FIXED_TO_INT(fx);
+    y0  = FIXED_TO_INT(fy);
+    z0  = FIXED_TO_INT(fz);
+
+    rk  = FIXED_REST_TO_INT(fk);
+    rx  = FIXED_REST_TO_INT(fx);
+    ry  = FIXED_REST_TO_INT(fy);
+    rz  = FIXED_REST_TO_INT(fz);
+
+    K0 = p16 -> opta[3] * k0;
+    K1 = K0 + (Input[0] == 0xFFFFU ? 0 : p16->opta[3]);
+
+    X0 = p16 -> opta[2] * x0;
+    X1 = X0 + (Input[1] == 0xFFFFU ? 0 : p16->opta[2]);
+
+    Y0 = p16 -> opta[1] * y0;
+    Y1 = Y0 + (Input[2] == 0xFFFFU ? 0 : p16->opta[1]);
+
+    Z0 = p16 -> opta[0] * z0;
+    Z1 = Z0 + (Input[3] == 0xFFFFU ? 0 : p16->opta[0]);
+
+    LutTable = (cmsUInt16Number*) p16 -> Table;
+    LutTable += K0;
+
+    for (OutChan=0; OutChan < p16 -> nOutputs; OutChan++) {
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz) {
+
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+        }
+        else
+            if (rx >= rz && rz >= ry) {
+
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+            }
+            else
+                if (rz >= rx && rx >= ry) {
+
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+
+                }
+                else
+                    if (ry >= rx && rx >= rz) {
+
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                    }
+                    else
+                        if (ry >= rz && rz >= rx) {
+
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                        }
+                        else
+                            if (rz >= ry && ry >= rx) {
+
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+
+                            }
+                            else {
+                                c1 = c2 = c3 = 0;
+                            }
+
+        Rest = c1 * rx + c2 * ry + c3 * rz;
+
+        Tmp1[OutChan] = (cmsUInt16Number)(c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
+    }
+
+
+    LutTable = (cmsUInt16Number*) p16 -> Table;
+    LutTable += K1;
+
+    for (OutChan=0; OutChan < p16 -> nOutputs; OutChan++) {
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz) {
+
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+        }
+        else
+            if (rx >= rz && rz >= ry) {
+
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+            }
+            else
+                if (rz >= rx && rx >= ry) {
+
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+
+                }
+                else
+                    if (ry >= rx && rx >= rz) {
+
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                    }
+                    else
+                        if (ry >= rz && rz >= rx) {
+
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                        }
+                        else
+                            if (rz >= ry && ry >= rx) {
+
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+
+                            }
+                            else  {
+                                c1 = c2 = c3 = 0;
+                            }
+
+        Rest = c1 * rx + c2 * ry + c3 * rz;
+
+        Tmp2[OutChan] = (cmsUInt16Number) (c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
+    }
+
+
+
+    for (i=0; i < p16 -> nOutputs; i++) {
+        Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+    }
+}
+#undef DENS
+
+
+// For more that 3 inputs (i.e., CMYK)
+// evaluate two 3-dimensional interpolations and then linearly interpolate between them.
+
+
+static
+void Eval4InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[3] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[3]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 3*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       TetrahedralInterpFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+       TetrahedralInterpFloat(Input + 1,  Tmp2, &p1);
+
+       for (i=0; i < p -> nOutputs; i++)
+       {
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+static CMS_NO_SANITIZE
+void Eval5Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+
+                 CMSREGISTER const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[4] * k0;
+       K1 = p16 -> opta[4] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 4*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval4Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval4Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+
+}
+
+
+static
+void Eval5InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[4] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[4]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 4*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval4InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval4InputsFloat(Input + 1,  Tmp2, &p1);
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+
+static CMS_NO_SANITIZE
+void Eval6Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+                 CMSREGISTER const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[5] * k0;
+       K1 = p16 -> opta[5] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 5*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval5Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval5Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+
+}
+
+
+static
+void Eval6InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[5] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[5]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 5*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval5InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval5InputsFloat(Input + 1,  Tmp2, &p1);
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+static CMS_NO_SANITIZE
+void Eval7Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+                 CMSREGISTER const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[6] * k0;
+       K1 = p16 -> opta[6] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 6*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval6Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval6Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+}
+
+
+static
+void Eval7InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[6] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[6]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 6*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval6InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval6InputsFloat(Input + 1,  Tmp2, &p1);
+
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+
+       }
+}
+
+static CMS_NO_SANITIZE
+void Eval8Inputs(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+                 CMSREGISTER const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[7] * k0;
+       K1 = p16 -> opta[7] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 7*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval7Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+       Eval7Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+}
+
+
+
+static
+void Eval8InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[7] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[7]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 7*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval7InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval7InputsFloat(Input + 1,  Tmp2, &p1);
+
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+// The default factory
+static
+cmsInterpFunction DefaultInterpolatorsFactory(cmsUInt32Number nInputChannels, cmsUInt32Number nOutputChannels, cmsUInt32Number dwFlags)
+{
+
+    cmsInterpFunction Interpolation;
+    cmsBool  IsFloat     = (dwFlags & CMS_LERP_FLAGS_FLOAT);
+    cmsBool  IsTrilinear = (dwFlags & CMS_LERP_FLAGS_TRILINEAR);
+
+    memset(&Interpolation, 0, sizeof(Interpolation));
+
+    // Safety check
+    if (nInputChannels >= 4 && nOutputChannels >= MAX_STAGE_CHANNELS)
+        return Interpolation;
+
+    switch (nInputChannels) {
+
+           case 1: // Gray LUT / linear
+
+               if (nOutputChannels == 1) {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = LinLerp1Dfloat;
+                   else
+                       Interpolation.Lerp16 = LinLerp1D;
+
+               }
+               else {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = Eval1InputFloat;
+                   else
+                       Interpolation.Lerp16 = Eval1Input;
+               }
+               break;
+
+           case 2: // Duotone
+               if (IsFloat)
+                      Interpolation.LerpFloat =  BilinearInterpFloat;
+               else
+                      Interpolation.Lerp16    =  BilinearInterp16;
+               break;
+
+           case 3:  // RGB et al
+
+               if (IsTrilinear) {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = TrilinearInterpFloat;
+                   else
+                       Interpolation.Lerp16 = TrilinearInterp16;
+               }
+               else {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = TetrahedralInterpFloat;
+                   else {
+
+                       Interpolation.Lerp16 = TetrahedralInterp16;
+                   }
+               }
+               break;
+
+           case 4:  // CMYK lut
+
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval4InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval4Inputs;
+               break;
+
+           case 5: // 5 Inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval5InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval5Inputs;
+               break;
+
+           case 6: // 6 Inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval6InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval6Inputs;
+               break;
+
+           case 7: // 7 inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval7InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval7Inputs;
+               break;
+
+           case 8: // 8 inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval8InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval8Inputs;
+               break;
+
+               break;
+
+           default:
+               Interpolation.Lerp16 = NULL;
+    }
+
+    return Interpolation;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp
new file mode 100644
index 0000000000..ffebfa36b3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio0.cpp
@@ -0,0 +1,1946 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Generic I/O, tag dictionary management, profile struct
+
+// IOhandlers are abstractions used by littleCMS to read from whatever file, stream,
+// memory block or any storage. Each IOhandler provides implementations for read,
+// write, seek and tell functions. LittleCMS code deals with IO across those objects.
+// In this way, is easier to add support for new storage media.
+
+// NULL stream, for taking care of used space -------------------------------------
+
+// NULL IOhandler basically does nothing but keep track on how many bytes have been
+// written. This is handy when creating profiles, where the file size is needed in the
+// header. Then, whole profile is serialized across NULL IOhandler and a second pass
+// writes the bytes to the pertinent IOhandler.
+
+typedef struct {
+    cmsUInt32Number Pointer;         // Points to current location
+} FILENULL;
+
+static
+cmsUInt32Number NULLRead(cmsIOHANDLER* iohandler, void *Buffer, cmsUInt32Number size, cmsUInt32Number count)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    cmsUInt32Number len = size * count;
+    ResData -> Pointer += len;
+    return count;
+
+    cmsUNUSED_PARAMETER(Buffer);
+}
+
+static
+cmsBool  NULLSeek(cmsIOHANDLER* iohandler, cmsUInt32Number offset)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    ResData ->Pointer = offset;
+    return TRUE;
+}
+
+static
+cmsUInt32Number NULLTell(cmsIOHANDLER* iohandler)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+    return ResData -> Pointer;
+}
+
+static
+cmsBool  NULLWrite(cmsIOHANDLER* iohandler, cmsUInt32Number size, const void *Ptr)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    ResData ->Pointer += size;
+    if (ResData ->Pointer > iohandler->UsedSpace)
+        iohandler->UsedSpace = ResData ->Pointer;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(Ptr);
+}
+
+static
+cmsBool  NULLClose(cmsIOHANDLER* iohandler)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    _cmsFree(iohandler ->ContextID, ResData);
+    _cmsFree(iohandler ->ContextID, iohandler);
+    return TRUE;
+}
+
+// The NULL IOhandler creator
+cmsIOHANDLER*  CMSEXPORT cmsOpenIOhandlerFromNULL(cmsContext ContextID)
+{
+    struct _cms_io_handler* iohandler = NULL;
+    FILENULL* fm = NULL;
+
+    iohandler = (struct _cms_io_handler*) _cmsMallocZero(ContextID, sizeof(struct _cms_io_handler));
+    if (iohandler == NULL) return NULL;
+
+    fm = (FILENULL*) _cmsMallocZero(ContextID, sizeof(FILENULL));
+    if (fm == NULL) goto Error;
+
+    fm ->Pointer = 0;
+
+    iohandler ->ContextID = ContextID;
+    iohandler ->stream  = (void*) fm;
+    iohandler ->UsedSpace = 0;
+    iohandler ->ReportedSize = 0;
+    iohandler ->PhysicalFile[0] = 0;
+
+    iohandler ->Read    = NULLRead;
+    iohandler ->Seek    = NULLSeek;
+    iohandler ->Close   = NULLClose;
+    iohandler ->Tell    = NULLTell;
+    iohandler ->Write   = NULLWrite;
+
+    return iohandler;
+
+Error:    
+    if (iohandler) _cmsFree(ContextID, iohandler);
+    return NULL;
+
+}
+
+
+// Memory-based stream --------------------------------------------------------------
+
+// Those functions implements an iohandler which takes a block of memory as storage medium.
+
+typedef struct {
+    cmsUInt8Number* Block;    // Points to allocated memory
+    cmsUInt32Number Size;     // Size of allocated memory
+    cmsUInt32Number Pointer;  // Points to current location
+    int FreeBlockOnClose;     // As title
+
+} FILEMEM;
+
+static
+cmsUInt32Number MemoryRead(struct _cms_io_handler* iohandler, void *Buffer, cmsUInt32Number size, cmsUInt32Number count)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+    cmsUInt8Number* Ptr;
+    cmsUInt32Number len = size * count;
+
+    if (ResData -> Pointer + len > ResData -> Size){
+
+        len = (ResData -> Size - ResData -> Pointer);
+        cmsSignalError(iohandler ->ContextID, cmsERROR_READ, "Read from memory error. Got %d bytes, block should be of %d bytes", len, count * size);
+        return 0;
+    }
+
+    Ptr  = ResData -> Block;
+    Ptr += ResData -> Pointer;
+    memmove(Buffer, Ptr, len);
+    ResData -> Pointer += len;
+
+    return count;
+}
+
+// SEEK_CUR is assumed
+static
+cmsBool  MemorySeek(struct _cms_io_handler* iohandler, cmsUInt32Number offset)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (offset > ResData ->Size) {
+        cmsSignalError(iohandler ->ContextID, cmsERROR_SEEK,  "Too few data; probably corrupted profile");
+        return FALSE;
+    }
+
+    ResData ->Pointer = offset;
+    return TRUE;
+}
+
+// Tell for memory
+static
+cmsUInt32Number MemoryTell(struct _cms_io_handler* iohandler)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (ResData == NULL) return 0;
+    return ResData -> Pointer;
+}
+
+
+// Writes data to memory, also keeps used space for further reference.
+static
+cmsBool MemoryWrite(struct _cms_io_handler* iohandler, cmsUInt32Number size, const void *Ptr)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (ResData == NULL) return FALSE; // Housekeeping
+
+    // Check for available space. Clip.
+    if (ResData->Pointer + size > ResData->Size) {
+        size = ResData ->Size - ResData->Pointer;
+    }
+      
+    if (size == 0) return TRUE;     // Write zero bytes is ok, but does nothing
+
+    memmove(ResData ->Block + ResData ->Pointer, Ptr, size);
+    ResData ->Pointer += size;
+
+    if (ResData ->Pointer > iohandler->UsedSpace)
+        iohandler->UsedSpace = ResData ->Pointer;
+
+    return TRUE;
+}
+
+
+static
+cmsBool  MemoryClose(struct _cms_io_handler* iohandler)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (ResData ->FreeBlockOnClose) {
+
+        if (ResData ->Block) _cmsFree(iohandler ->ContextID, ResData ->Block);
+    }
+
+    _cmsFree(iohandler ->ContextID, ResData);
+    _cmsFree(iohandler ->ContextID, iohandler);
+
+    return TRUE;
+}
+
+// Create a iohandler for memory block. AccessMode=='r' assumes the iohandler is going to read, and makes
+// a copy of the memory block for letting user to free the memory after invoking open profile. In write
+// mode ("w"), Buffer points to the begin of memory block to be written.
+cmsIOHANDLER* CMSEXPORT cmsOpenIOhandlerFromMem(cmsContext ContextID, void *Buffer, cmsUInt32Number size, const char* AccessMode)
+{
+    cmsIOHANDLER* iohandler = NULL;
+    FILEMEM* fm = NULL;
+
+    _cmsAssert(AccessMode != NULL);
+
+    iohandler = (cmsIOHANDLER*) _cmsMallocZero(ContextID, sizeof(cmsIOHANDLER));
+    if (iohandler == NULL) return NULL;
+
+    switch (*AccessMode) {
+
+    case 'r':
+        fm = (FILEMEM*) _cmsMallocZero(ContextID, sizeof(FILEMEM));
+        if (fm == NULL) goto Error;
+
+        if (Buffer == NULL) {
+            cmsSignalError(ContextID, cmsERROR_READ, "Couldn't read profile from NULL pointer");
+            goto Error;
+        }
+
+        fm ->Block = (cmsUInt8Number*) _cmsMalloc(ContextID, size);
+        if (fm ->Block == NULL) {
+
+            _cmsFree(ContextID, fm);
+            _cmsFree(ContextID, iohandler);
+            cmsSignalError(ContextID, cmsERROR_READ, "Couldn't allocate %ld bytes for profile", size);
+            return NULL;
+        }
+
+
+        memmove(fm->Block, Buffer, size);
+        fm ->FreeBlockOnClose = TRUE;
+        fm ->Size    = size;
+        fm ->Pointer = 0;
+        iohandler -> ReportedSize = size;
+        break;
+
+    case 'w':
+        fm = (FILEMEM*) _cmsMallocZero(ContextID, sizeof(FILEMEM));
+        if (fm == NULL) goto Error;
+
+        fm ->Block = (cmsUInt8Number*) Buffer;
+        fm ->FreeBlockOnClose = FALSE;
+        fm ->Size    = size;
+        fm ->Pointer = 0;
+        iohandler -> ReportedSize = 0;
+        break;
+
+    default:
+        cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown access mode '%c'", *AccessMode);
+        return NULL;
+    }
+
+    iohandler ->ContextID = ContextID;
+    iohandler ->stream  = (void*) fm;
+    iohandler ->UsedSpace = 0;
+    iohandler ->PhysicalFile[0] = 0;
+
+    iohandler ->Read    = MemoryRead;
+    iohandler ->Seek    = MemorySeek;
+    iohandler ->Close   = MemoryClose;
+    iohandler ->Tell    = MemoryTell;
+    iohandler ->Write   = MemoryWrite;
+
+    return iohandler;
+
+Error:
+    if (fm) _cmsFree(ContextID, fm);
+    if (iohandler) _cmsFree(ContextID, iohandler);
+    return NULL;
+}
+
+// File-based stream -------------------------------------------------------
+
+// Read count elements of size bytes each. Return number of elements read
+static
+cmsUInt32Number FileRead(cmsIOHANDLER* iohandler, void *Buffer, cmsUInt32Number size, cmsUInt32Number count)
+{
+    cmsUInt32Number nReaded = (cmsUInt32Number) fread(Buffer, size, count, (FILE*) iohandler->stream);
+
+    if (nReaded != count) {
+            cmsSignalError(iohandler ->ContextID, cmsERROR_FILE, "Read error. Got %d bytes, block should be of %d bytes", nReaded * size, count * size);
+            return 0;
+    }
+
+    return nReaded;
+}
+
+// Position file pointer in the file
+static
+cmsBool  FileSeek(cmsIOHANDLER* iohandler, cmsUInt32Number offset)
+{
+    if (fseek((FILE*) iohandler ->stream, (long) offset, SEEK_SET) != 0) {
+
+       cmsSignalError(iohandler ->ContextID, cmsERROR_FILE, "Seek error; probably corrupted file");
+       return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Returns file pointer position or 0 on error, which is also a valid position.
+static
+cmsUInt32Number FileTell(cmsIOHANDLER* iohandler)
+{
+    long t = ftell((FILE*)iohandler ->stream);
+    if (t == -1L) {
+        cmsSignalError(iohandler->ContextID, cmsERROR_FILE, "Tell error; probably corrupted file");
+        return 0;
+    }
+
+    return (cmsUInt32Number)t;
+}
+
+// Writes data to stream, also keeps used space for further reference. Returns TRUE on success, FALSE on error
+static
+cmsBool  FileWrite(cmsIOHANDLER* iohandler, cmsUInt32Number size, const void* Buffer)
+{
+    if (size == 0) return TRUE;  // We allow to write 0 bytes, but nothing is written
+
+    iohandler->UsedSpace += size;
+    return (fwrite(Buffer, size, 1, (FILE*)iohandler->stream) == 1);
+}
+
+// Closes the file
+static
+cmsBool  FileClose(cmsIOHANDLER* iohandler)
+{
+    if (fclose((FILE*) iohandler ->stream) != 0) return FALSE;
+    _cmsFree(iohandler ->ContextID, iohandler);
+    return TRUE;
+}
+
+// Create a iohandler for disk based files.
+cmsIOHANDLER* CMSEXPORT cmsOpenIOhandlerFromFile(cmsContext ContextID, const char* FileName, const char* AccessMode)
+{
+    cmsIOHANDLER* iohandler = NULL;
+    FILE* fm = NULL;
+    cmsInt32Number fileLen;
+
+    _cmsAssert(FileName != NULL);
+    _cmsAssert(AccessMode != NULL);
+
+    iohandler = (cmsIOHANDLER*) _cmsMallocZero(ContextID, sizeof(cmsIOHANDLER));
+    if (iohandler == NULL) return NULL;
+
+    switch (*AccessMode) {
+
+    case 'r':
+        fm = fopen(FileName, "rb");
+        if (fm == NULL) {
+            _cmsFree(ContextID, iohandler);
+             cmsSignalError(ContextID, cmsERROR_FILE, "File '%s' not found", FileName);
+            return NULL;
+        }                                     
+        fileLen = cmsfilelength(fm);
+        if (fileLen < 0)
+        {
+            fclose(fm);
+            _cmsFree(ContextID, iohandler);
+            cmsSignalError(ContextID, cmsERROR_FILE, "Cannot get size of file '%s'", FileName);
+            return NULL;
+        }
+
+        iohandler -> ReportedSize = (cmsUInt32Number) fileLen;
+        break;
+
+    case 'w':
+        fm = fopen(FileName, "wb");
+        if (fm == NULL) {
+            _cmsFree(ContextID, iohandler);
+             cmsSignalError(ContextID, cmsERROR_FILE, "Couldn't create '%s'", FileName);
+            return NULL;
+        }
+        iohandler -> ReportedSize = 0;
+        break;
+
+    default:
+        _cmsFree(ContextID, iohandler);
+         cmsSignalError(ContextID, cmsERROR_FILE, "Unknown access mode '%c'", *AccessMode);
+        return NULL;
+    }
+
+    iohandler ->ContextID = ContextID;
+    iohandler ->stream = (void*) fm;
+    iohandler ->UsedSpace = 0;
+
+    // Keep track of the original file    
+    strncpy(iohandler -> PhysicalFile, FileName, sizeof(iohandler -> PhysicalFile)-1);
+    iohandler -> PhysicalFile[sizeof(iohandler -> PhysicalFile)-1] = 0;
+
+    iohandler ->Read    = FileRead;
+    iohandler ->Seek    = FileSeek;
+    iohandler ->Close   = FileClose;
+    iohandler ->Tell    = FileTell;
+    iohandler ->Write   = FileWrite;
+
+    return iohandler;
+}
+
+// Create a iohandler for stream based files
+cmsIOHANDLER* CMSEXPORT cmsOpenIOhandlerFromStream(cmsContext ContextID, FILE* Stream)
+{
+    cmsIOHANDLER* iohandler = NULL;
+    cmsInt32Number fileSize;
+
+    fileSize = cmsfilelength(Stream);
+    if (fileSize < 0)
+    {
+        cmsSignalError(ContextID, cmsERROR_FILE, "Cannot get size of stream");
+        return NULL;
+    }
+
+    iohandler = (cmsIOHANDLER*) _cmsMallocZero(ContextID, sizeof(cmsIOHANDLER));
+    if (iohandler == NULL) return NULL;
+
+    iohandler -> ContextID = ContextID;
+    iohandler -> stream = (void*) Stream;
+    iohandler -> UsedSpace = 0;
+    iohandler -> ReportedSize = (cmsUInt32Number) fileSize;
+    iohandler -> PhysicalFile[0] = 0;
+
+    iohandler ->Read    = FileRead;
+    iohandler ->Seek    = FileSeek;
+    iohandler ->Close   = FileClose;
+    iohandler ->Tell    = FileTell;
+    iohandler ->Write   = FileWrite;
+
+    return iohandler;
+}
+
+
+
+// Close an open IO handler
+cmsBool CMSEXPORT cmsCloseIOhandler(cmsIOHANDLER* io)
+{
+    return io -> Close(io);
+}
+
+// -------------------------------------------------------------------------------------------------------
+
+cmsIOHANDLER* CMSEXPORT cmsGetProfileIOhandler(cmsHPROFILE hProfile)
+{
+	_cmsICCPROFILE* Icc = (_cmsICCPROFILE*)hProfile;
+
+	if (Icc == NULL) return NULL;
+	return Icc->IOhandler;
+}
+
+// Creates an empty structure holding all required parameters
+cmsHPROFILE CMSEXPORT cmsCreateProfilePlaceholder(cmsContext ContextID)
+{
+    time_t now = time(NULL);
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) _cmsMallocZero(ContextID, sizeof(_cmsICCPROFILE));
+    if (Icc == NULL) return NULL;
+
+    Icc ->ContextID = ContextID;
+
+    // Set it to empty
+    Icc -> TagCount   = 0;
+
+    // Set default version
+    Icc ->Version =  0x02100000;
+
+    // Set creation date/time
+    memmove(&Icc ->Created, gmtime(&now), sizeof(Icc ->Created));
+
+    // Create a mutex if the user provided proper plugin. NULL otherwise
+    Icc ->UsrMutex = _cmsCreateMutex(ContextID);
+
+    // Return the handle
+    return (cmsHPROFILE) Icc;
+}
+
+cmsContext CMSEXPORT cmsGetProfileContextID(cmsHPROFILE hProfile)
+{
+     _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+
+    if (Icc == NULL) return NULL;
+    return Icc -> ContextID;
+}
+
+
+// Return the number of tags
+cmsInt32Number CMSEXPORT cmsGetTagCount(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    if (Icc == NULL) return -1;
+
+    return  (cmsInt32Number) Icc->TagCount;
+}
+
+// Return the tag signature of a given tag number
+cmsTagSignature CMSEXPORT cmsGetTagSignature(cmsHPROFILE hProfile, cmsUInt32Number n)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+
+    if (n > Icc->TagCount) return (cmsTagSignature) 0;  // Mark as not available
+    if (n >= MAX_TABLE_TAG) return (cmsTagSignature) 0; // As double check
+
+    return Icc ->TagNames[n];
+}
+
+
+static
+int SearchOneTag(_cmsICCPROFILE* Profile, cmsTagSignature sig)
+{
+    int i;
+
+    for (i=0; i < (int) Profile -> TagCount; i++) {
+
+        if (sig == Profile -> TagNames[i])
+            return i;
+    }
+
+    return -1;
+}
+
+// Search for a specific tag in tag dictionary. Returns position or -1 if tag not found.
+// If followlinks is turned on, then the position of the linked tag is returned
+int _cmsSearchTag(_cmsICCPROFILE* Icc, cmsTagSignature sig, cmsBool lFollowLinks)
+{
+    int n;
+    cmsTagSignature LinkedSig;
+
+    do {
+
+        // Search for given tag in ICC profile directory
+        n = SearchOneTag(Icc, sig);
+        if (n < 0)
+            return -1;        // Not found
+
+        if (!lFollowLinks)
+            return n;         // Found, don't follow links
+
+        // Is this a linked tag?
+        LinkedSig = Icc ->TagLinked[n];
+
+        // Yes, follow link
+        if (LinkedSig != (cmsTagSignature) 0) {
+            sig = LinkedSig;
+        }
+
+    } while (LinkedSig != (cmsTagSignature) 0);
+
+    return n;
+}
+
+// Deletes a tag entry
+
+static
+void _cmsDeleteTagByPos(_cmsICCPROFILE* Icc, int i)
+{
+    _cmsAssert(Icc != NULL);
+    _cmsAssert(i >= 0);
+
+   
+    if (Icc -> TagPtrs[i] != NULL) {
+
+        // Free previous version
+        if (Icc ->TagSaveAsRaw[i]) {
+            _cmsFree(Icc ->ContextID, Icc ->TagPtrs[i]);
+        }
+        else {
+            cmsTagTypeHandler* TypeHandler = Icc ->TagTypeHandlers[i];
+
+            if (TypeHandler != NULL) {
+
+                cmsTagTypeHandler LocalTypeHandler = *TypeHandler;
+                LocalTypeHandler.ContextID = Icc ->ContextID;              // As an additional parameter
+                LocalTypeHandler.ICCVersion = Icc ->Version;
+                LocalTypeHandler.FreePtr(&LocalTypeHandler, Icc -> TagPtrs[i]);
+                Icc ->TagPtrs[i] = NULL;
+            }
+        }
+
+    } 
+}
+
+
+// Creates a new tag entry
+static
+cmsBool _cmsNewTag(_cmsICCPROFILE* Icc, cmsTagSignature sig, int* NewPos)
+{
+    int i;
+
+    // Search for the tag
+    i = _cmsSearchTag(Icc, sig, FALSE);
+    if (i >= 0) {
+
+        // Already exists? delete it
+        _cmsDeleteTagByPos(Icc, i);
+        *NewPos = i;
+    }
+    else  {
+
+        // No, make a new one
+        if (Icc -> TagCount >= MAX_TABLE_TAG) {
+            cmsSignalError(Icc ->ContextID, cmsERROR_RANGE, "Too many tags (%d)", MAX_TABLE_TAG);
+            return FALSE;
+        }
+
+        *NewPos = (int) Icc ->TagCount;
+        Icc -> TagCount++;
+    }
+
+    return TRUE;
+}
+
+
+// Check existence
+cmsBool CMSEXPORT cmsIsTag(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+       _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) (void*) hProfile;
+       return _cmsSearchTag(Icc, sig, FALSE) >= 0;
+}
+
+// Enforces that the profile version is per. spec.
+// Operates on the big endian bytes from the profile.
+// Called before converting to platform endianness.
+// Byte 0 is BCD major version, so max 9.
+// Byte 1 is 2 BCD digits, one per nibble.
+// Reserved bytes 2 & 3 must be 0.
+static 
+cmsUInt32Number _validatedVersion(cmsUInt32Number DWord)
+{
+    cmsUInt8Number* pByte = (cmsUInt8Number*) &DWord;
+    cmsUInt8Number temp1;
+    cmsUInt8Number temp2;
+
+    if (*pByte > 0x09) *pByte = (cmsUInt8Number) 0x09;
+    temp1 = (cmsUInt8Number) (*(pByte+1) & 0xf0);
+    temp2 = (cmsUInt8Number) (*(pByte+1) & 0x0f);
+    if (temp1 > 0x90U) temp1 = 0x90U;
+    if (temp2 > 0x09U) temp2 = 0x09U;
+    *(pByte+1) = (cmsUInt8Number)(temp1 | temp2);
+    *(pByte+2) = (cmsUInt8Number)0;
+    *(pByte+3) = (cmsUInt8Number)0;
+
+    return DWord;
+}
+
+// Read profile header and validate it
+cmsBool _cmsReadHeader(_cmsICCPROFILE* Icc)
+{
+    cmsTagEntry Tag;
+    cmsICCHeader Header;
+    cmsUInt32Number i, j;
+    cmsUInt32Number HeaderSize;
+    cmsIOHANDLER* io = Icc ->IOhandler;
+    cmsUInt32Number TagCount;
+
+
+    // Read the header
+    if (io -> Read(io, &Header, sizeof(cmsICCHeader), 1) != 1) {
+        return FALSE;
+    }
+
+    // Validate file as an ICC profile
+    if (_cmsAdjustEndianess32(Header.magic) != cmsMagicNumber) {
+        cmsSignalError(Icc ->ContextID, cmsERROR_BAD_SIGNATURE, "not an ICC profile, invalid signature");
+        return FALSE;
+    }
+
+    // Adjust endianness of the used parameters
+    Icc -> DeviceClass     = (cmsProfileClassSignature) _cmsAdjustEndianess32(Header.deviceClass);
+    Icc -> ColorSpace      = (cmsColorSpaceSignature)   _cmsAdjustEndianess32(Header.colorSpace);
+    Icc -> PCS             = (cmsColorSpaceSignature)   _cmsAdjustEndianess32(Header.pcs);
+   
+    Icc -> RenderingIntent = _cmsAdjustEndianess32(Header.renderingIntent);
+    Icc -> flags           = _cmsAdjustEndianess32(Header.flags);
+    Icc -> manufacturer    = _cmsAdjustEndianess32(Header.manufacturer);
+    Icc -> model           = _cmsAdjustEndianess32(Header.model);
+    Icc -> creator         = _cmsAdjustEndianess32(Header.creator);
+    
+    _cmsAdjustEndianess64(&Icc -> attributes, &Header.attributes);
+    Icc -> Version         = _cmsAdjustEndianess32(_validatedVersion(Header.version));
+
+    // Get size as reported in header
+    HeaderSize = _cmsAdjustEndianess32(Header.size);
+
+    // Make sure HeaderSize is lower than profile size
+    if (HeaderSize >= Icc ->IOhandler ->ReportedSize)
+            HeaderSize = Icc ->IOhandler ->ReportedSize;
+
+
+    // Get creation date/time
+    _cmsDecodeDateTimeNumber(&Header.date, &Icc ->Created);
+
+    // The profile ID are 32 raw bytes
+    memmove(Icc ->ProfileID.ID32, Header.profileID.ID32, 16);
+
+
+    // Read tag directory
+    if (!_cmsReadUInt32Number(io, &TagCount)) return FALSE;
+    if (TagCount > MAX_TABLE_TAG) {
+
+        cmsSignalError(Icc ->ContextID, cmsERROR_RANGE, "Too many tags (%d)", TagCount);
+        return FALSE;
+    }
+
+
+    // Read tag directory
+    Icc -> TagCount = 0;
+    for (i=0; i < TagCount; i++) {
+
+        if (!_cmsReadUInt32Number(io, (cmsUInt32Number *) &Tag.sig)) return FALSE;
+        if (!_cmsReadUInt32Number(io, &Tag.offset)) return FALSE;
+        if (!_cmsReadUInt32Number(io, &Tag.size)) return FALSE;
+
+        // Perform some sanity check. Offset + size should fall inside file.
+        if (Tag.offset + Tag.size > HeaderSize ||
+            Tag.offset + Tag.size < Tag.offset)
+                  continue;
+
+        Icc -> TagNames[Icc ->TagCount]   = Tag.sig;
+        Icc -> TagOffsets[Icc ->TagCount] = Tag.offset;
+        Icc -> TagSizes[Icc ->TagCount]   = Tag.size;
+
+       // Search for links
+        for (j=0; j < Icc ->TagCount; j++) {
+
+            if ((Icc ->TagOffsets[j] == Tag.offset) &&
+                (Icc ->TagSizes[j]   == Tag.size)) {
+
+                Icc ->TagLinked[Icc ->TagCount] = Icc ->TagNames[j];
+            }
+
+        }
+
+        Icc ->TagCount++;
+    }
+
+    return TRUE;
+}
+
+// Saves profile header
+cmsBool _cmsWriteHeader(_cmsICCPROFILE* Icc, cmsUInt32Number UsedSpace)
+{
+    cmsICCHeader Header;
+    cmsUInt32Number i;
+    cmsTagEntry Tag;
+    cmsUInt32Number Count;
+
+    Header.size        = _cmsAdjustEndianess32(UsedSpace);
+    Header.cmmId       = _cmsAdjustEndianess32(lcmsSignature);
+    Header.version     = _cmsAdjustEndianess32(Icc ->Version);
+
+    Header.deviceClass = (cmsProfileClassSignature) _cmsAdjustEndianess32(Icc -> DeviceClass);
+    Header.colorSpace  = (cmsColorSpaceSignature) _cmsAdjustEndianess32(Icc -> ColorSpace);
+    Header.pcs         = (cmsColorSpaceSignature) _cmsAdjustEndianess32(Icc -> PCS);
+
+    //   NOTE: in v4 Timestamp must be in UTC rather than in local time
+    _cmsEncodeDateTimeNumber(&Header.date, &Icc ->Created);
+
+    Header.magic       = _cmsAdjustEndianess32(cmsMagicNumber);
+
+#ifdef CMS_IS_WINDOWS_
+    Header.platform    = (cmsPlatformSignature) _cmsAdjustEndianess32(cmsSigMicrosoft);
+#else
+    Header.platform    = (cmsPlatformSignature) _cmsAdjustEndianess32(cmsSigMacintosh);
+#endif
+
+    Header.flags        = _cmsAdjustEndianess32(Icc -> flags);
+    Header.manufacturer = _cmsAdjustEndianess32(Icc -> manufacturer);
+    Header.model        = _cmsAdjustEndianess32(Icc -> model);
+
+    _cmsAdjustEndianess64(&Header.attributes, &Icc -> attributes);
+
+    // Rendering intent in the header (for embedded profiles)
+    Header.renderingIntent = _cmsAdjustEndianess32(Icc -> RenderingIntent);
+
+    // Illuminant is always D50
+    Header.illuminant.X = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(cmsD50_XYZ()->X));
+    Header.illuminant.Y = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(cmsD50_XYZ()->Y));
+    Header.illuminant.Z = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(cmsD50_XYZ()->Z));
+
+    // Created by LittleCMS (that's me!)
+    Header.creator      = _cmsAdjustEndianess32(lcmsSignature);
+
+    memset(&Header.reserved, 0, sizeof(Header.reserved));
+
+    // Set profile ID. Endianness is always big endian
+    memmove(&Header.profileID, &Icc ->ProfileID, 16);
+
+    // Dump the header
+    if (!Icc -> IOhandler->Write(Icc->IOhandler, sizeof(cmsICCHeader), &Header)) return FALSE;
+
+    // Saves Tag directory
+
+    // Get true count
+    Count = 0;
+    for (i=0;  i < Icc -> TagCount; i++) {
+        if (Icc ->TagNames[i] != (cmsTagSignature) 0)
+            Count++;
+    }
+
+    // Store number of tags
+    if (!_cmsWriteUInt32Number(Icc ->IOhandler, Count)) return FALSE;
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        if (Icc ->TagNames[i] == (cmsTagSignature) 0) continue;   // It is just a placeholder
+
+        Tag.sig    = (cmsTagSignature) _cmsAdjustEndianess32((cmsUInt32Number) Icc -> TagNames[i]);
+        Tag.offset = _cmsAdjustEndianess32((cmsUInt32Number) Icc -> TagOffsets[i]);
+        Tag.size   = _cmsAdjustEndianess32((cmsUInt32Number) Icc -> TagSizes[i]);
+
+        if (!Icc ->IOhandler -> Write(Icc-> IOhandler, sizeof(cmsTagEntry), &Tag)) return FALSE;
+    }
+
+    return TRUE;
+}
+
+// ----------------------------------------------------------------------- Set/Get several struct members
+
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderRenderingIntent(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> RenderingIntent;
+}
+
+void CMSEXPORT cmsSetHeaderRenderingIntent(cmsHPROFILE hProfile, cmsUInt32Number RenderingIntent)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> RenderingIntent = RenderingIntent;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderFlags(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return (cmsUInt32Number) Icc -> flags;
+}
+
+void CMSEXPORT cmsSetHeaderFlags(cmsHPROFILE hProfile, cmsUInt32Number Flags)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> flags = (cmsUInt32Number) Flags;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderManufacturer(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc ->manufacturer;
+}
+
+void CMSEXPORT cmsSetHeaderManufacturer(cmsHPROFILE hProfile, cmsUInt32Number manufacturer)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> manufacturer = manufacturer;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderCreator(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc ->creator;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderModel(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc ->model;
+}
+
+void CMSEXPORT cmsSetHeaderModel(cmsHPROFILE hProfile, cmsUInt32Number model)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> model = model;
+}
+
+void CMSEXPORT cmsGetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number* Flags)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(Flags, &Icc -> attributes, sizeof(cmsUInt64Number));
+}
+
+void CMSEXPORT cmsSetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number Flags)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(&Icc -> attributes, &Flags, sizeof(cmsUInt64Number));
+}
+
+void CMSEXPORT cmsGetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(ProfileID, Icc ->ProfileID.ID8, 16);
+}
+
+void CMSEXPORT cmsSetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(&Icc -> ProfileID, ProfileID, 16);
+}
+
+cmsBool  CMSEXPORT cmsGetHeaderCreationDateTime(cmsHPROFILE hProfile, struct tm *Dest)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(Dest, &Icc ->Created, sizeof(struct tm));
+    return TRUE;
+}
+
+cmsColorSpaceSignature CMSEXPORT cmsGetPCS(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> PCS;
+}
+
+void CMSEXPORT cmsSetPCS(cmsHPROFILE hProfile, cmsColorSpaceSignature pcs)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> PCS = pcs;
+}
+
+cmsColorSpaceSignature CMSEXPORT cmsGetColorSpace(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> ColorSpace;
+}
+
+void CMSEXPORT cmsSetColorSpace(cmsHPROFILE hProfile, cmsColorSpaceSignature sig)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> ColorSpace = sig;
+}
+
+cmsProfileClassSignature CMSEXPORT cmsGetDeviceClass(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> DeviceClass;
+}
+
+void CMSEXPORT cmsSetDeviceClass(cmsHPROFILE hProfile, cmsProfileClassSignature sig)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> DeviceClass = sig;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetEncodedICCversion(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> Version;
+}
+
+void CMSEXPORT cmsSetEncodedICCversion(cmsHPROFILE hProfile, cmsUInt32Number Version)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> Version = Version;
+}
+
+// Get an hexadecimal number with same digits as v
+static
+cmsUInt32Number BaseToBase(cmsUInt32Number in, int BaseIn, int BaseOut)
+{
+    char Buff[100];
+    int i, len;
+    cmsUInt32Number out;
+
+    for (len=0; in > 0 && len < 100; len++) {
+
+        Buff[len] = (char) (in % BaseIn);
+        in /= BaseIn;
+    }
+
+    for (i=len-1, out=0; i >= 0; --i) {
+        out = out * BaseOut + Buff[i];
+    }
+
+    return out;
+}
+
+void  CMSEXPORT cmsSetProfileVersion(cmsHPROFILE hProfile, cmsFloat64Number Version)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+
+    // 4.2 -> 0x4200000
+
+    Icc -> Version = BaseToBase((cmsUInt32Number) floor(Version * 100.0 + 0.5), 10, 16) << 16;
+}
+
+cmsFloat64Number CMSEXPORT cmsGetProfileVersion(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    cmsUInt32Number n = Icc -> Version >> 16;
+
+    return BaseToBase(n, 16, 10) / 100.0;
+}
+// --------------------------------------------------------------------------------------------------------------
+
+
+// Create profile from IOhandler
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromIOhandlerTHR(cmsContext ContextID, cmsIOHANDLER* io)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = io;
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+// Create profile from IOhandler
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromIOhandler2THR(cmsContext ContextID, cmsIOHANDLER* io, cmsBool write)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = io;
+    if (write) {
+
+        NewIcc -> IsWrite = TRUE;
+        return hEmpty;
+    }
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+
+// Create profile from disk file
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromFileTHR(cmsContext ContextID, const char *lpFileName, const char *sAccess)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = cmsOpenIOhandlerFromFile(ContextID, lpFileName, sAccess);
+    if (NewIcc ->IOhandler == NULL) goto Error;
+
+    if (*sAccess == 'W' || *sAccess == 'w') {
+
+        NewIcc -> IsWrite = TRUE;
+
+        return hEmpty;
+    }
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromFile(const char *ICCProfile, const char *sAccess)
+{
+    return cmsOpenProfileFromFileTHR(NULL, ICCProfile, sAccess);
+}
+
+
+cmsHPROFILE  CMSEXPORT cmsOpenProfileFromStreamTHR(cmsContext ContextID, FILE* ICCProfile, const char *sAccess)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = cmsOpenIOhandlerFromStream(ContextID, ICCProfile);
+    if (NewIcc ->IOhandler == NULL) goto Error;
+
+    if (*sAccess == 'w') {
+
+        NewIcc -> IsWrite = TRUE;
+        return hEmpty;
+    }
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+
+}
+
+cmsHPROFILE  CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char *sAccess)
+{
+    return cmsOpenProfileFromStreamTHR(NULL, ICCProfile, sAccess);
+}
+
+
+// Open from memory block
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromMemTHR(cmsContext ContextID, const void* MemPtr, cmsUInt32Number dwSize)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty;
+
+    hEmpty = cmsCreateProfilePlaceholder(ContextID);
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    // Ok, in this case const void* is casted to void* just because open IO handler
+    // shares read and writing modes. Don't abuse this feature!
+    NewIcc ->IOhandler = cmsOpenIOhandlerFromMem(ContextID, (void*) MemPtr, dwSize, "r");
+    if (NewIcc ->IOhandler == NULL) goto Error;
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromMem(const void* MemPtr, cmsUInt32Number dwSize)
+{
+    return cmsOpenProfileFromMemTHR(NULL, MemPtr, dwSize);
+}
+
+
+
+// Dump tag contents. If the profile is being modified, untouched tags are copied from FileOrig
+static
+cmsBool SaveTags(_cmsICCPROFILE* Icc, _cmsICCPROFILE* FileOrig)
+{
+    cmsUInt8Number* Data;
+    cmsUInt32Number i;
+    cmsUInt32Number Begin;
+    cmsIOHANDLER* io = Icc ->IOhandler;
+    cmsTagDescriptor* TagDescriptor;
+    cmsTagTypeSignature TypeBase;
+    cmsTagTypeSignature Type;
+    cmsTagTypeHandler* TypeHandler;
+    cmsFloat64Number   Version = cmsGetProfileVersion((cmsHPROFILE) Icc);
+    cmsTagTypeHandler LocalTypeHandler;
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        if (Icc ->TagNames[i] == (cmsTagSignature) 0) continue;
+
+        // Linked tags are not written
+        if (Icc ->TagLinked[i] != (cmsTagSignature) 0) continue;
+
+        Icc -> TagOffsets[i] = Begin = io ->UsedSpace;
+
+        Data = (cmsUInt8Number*)  Icc -> TagPtrs[i];
+
+        if (!Data) {
+
+            // Reach here if we are copying a tag from a disk-based ICC profile which has not been modified by user.
+            // In this case a blind copy of the block data is performed
+            if (FileOrig != NULL && Icc -> TagOffsets[i]) {
+
+                cmsUInt32Number TagSize   = FileOrig -> TagSizes[i];
+                cmsUInt32Number TagOffset = FileOrig -> TagOffsets[i];
+                void* Mem;
+
+                if (!FileOrig ->IOhandler->Seek(FileOrig ->IOhandler, TagOffset)) return FALSE;
+
+                Mem = _cmsMalloc(Icc ->ContextID, TagSize);
+                if (Mem == NULL) return FALSE;
+
+                if (FileOrig ->IOhandler->Read(FileOrig->IOhandler, Mem, TagSize, 1) != 1) return FALSE;
+                if (!io ->Write(io, TagSize, Mem)) return FALSE;
+                _cmsFree(Icc ->ContextID, Mem);
+
+                Icc -> TagSizes[i] = (io ->UsedSpace - Begin);
+
+
+                // Align to 32 bit boundary.
+                if (! _cmsWriteAlignment(io))
+                    return FALSE;
+            }
+
+            continue;
+        }
+
+
+        // Should this tag be saved as RAW? If so, tagsizes should be specified in advance (no further cooking is done)
+        if (Icc ->TagSaveAsRaw[i]) {
+
+            if (io -> Write(io, Icc ->TagSizes[i], Data) != 1) return FALSE;
+        }
+        else {
+
+            // Search for support on this tag
+            TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, Icc -> TagNames[i]);
+            if (TagDescriptor == NULL) continue;                        // Unsupported, ignore it
+           
+            if (TagDescriptor ->DecideType != NULL) {
+
+                Type = TagDescriptor ->DecideType(Version, Data);
+            }
+            else {
+
+                Type = TagDescriptor ->SupportedTypes[0];
+            }
+
+            TypeHandler =  _cmsGetTagTypeHandler(Icc->ContextID, Type);
+
+            if (TypeHandler == NULL) {
+                cmsSignalError(Icc ->ContextID, cmsERROR_INTERNAL, "(Internal) no handler for tag %x", Icc -> TagNames[i]);
+                continue;
+            }
+
+            TypeBase = TypeHandler ->Signature;
+            if (!_cmsWriteTypeBase(io, TypeBase))
+                return FALSE;
+
+            LocalTypeHandler = *TypeHandler;
+            LocalTypeHandler.ContextID  = Icc ->ContextID;
+            LocalTypeHandler.ICCVersion = Icc ->Version;
+            if (!LocalTypeHandler.WritePtr(&LocalTypeHandler, io, Data, TagDescriptor ->ElemCount)) {
+
+                char String[5];
+
+                _cmsTagSignature2String(String, (cmsTagSignature) TypeBase);
+                cmsSignalError(Icc ->ContextID, cmsERROR_WRITE, "Couldn't write type '%s'", String);
+                return FALSE;
+            }
+        }
+
+
+        Icc -> TagSizes[i] = (io ->UsedSpace - Begin);
+
+        // Align to 32 bit boundary.
+        if (! _cmsWriteAlignment(io))
+            return FALSE;
+    }
+
+
+    return TRUE;
+}
+
+
+// Fill the offset and size fields for all linked tags
+static
+cmsBool SetLinks( _cmsICCPROFILE* Icc)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        cmsTagSignature lnk = Icc ->TagLinked[i];
+        if (lnk != (cmsTagSignature) 0) {
+
+            int j = _cmsSearchTag(Icc, lnk, FALSE);
+            if (j >= 0) {
+
+                Icc ->TagOffsets[i] = Icc ->TagOffsets[j];
+                Icc ->TagSizes[i]   = Icc ->TagSizes[j];
+            }
+
+        }
+    }
+
+    return TRUE;
+}
+
+// Low-level save to IOHANDLER. It returns the number of bytes used to
+// store the profile, or zero on error. io may be NULL and in this case
+// no data is written--only sizes are calculated
+cmsUInt32Number CMSEXPORT cmsSaveProfileToIOhandler(cmsHPROFILE hProfile, cmsIOHANDLER* io)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    _cmsICCPROFILE Keep;
+    cmsIOHANDLER* PrevIO = NULL;
+    cmsUInt32Number UsedSpace;
+    cmsContext ContextID;
+
+    _cmsAssert(hProfile != NULL);
+    
+    if (!_cmsLockMutex(Icc->ContextID, Icc->UsrMutex)) return 0;
+    memmove(&Keep, Icc, sizeof(_cmsICCPROFILE));
+
+    ContextID = cmsGetProfileContextID(hProfile);
+    PrevIO = Icc ->IOhandler = cmsOpenIOhandlerFromNULL(ContextID);
+    if (PrevIO == NULL) {
+        _cmsUnlockMutex(Icc->ContextID, Icc->UsrMutex);
+        return 0;
+    }
+
+    // Pass #1 does compute offsets
+
+    if (!_cmsWriteHeader(Icc, 0)) goto Error;
+    if (!SaveTags(Icc, &Keep)) goto Error;
+
+    UsedSpace = PrevIO ->UsedSpace;
+
+    // Pass #2 does save to iohandler
+
+    if (io != NULL) {
+
+        Icc ->IOhandler = io;
+        if (!SetLinks(Icc)) goto Error;
+        if (!_cmsWriteHeader(Icc, UsedSpace)) goto Error;
+        if (!SaveTags(Icc, &Keep)) goto Error;
+    }
+
+    memmove(Icc, &Keep, sizeof(_cmsICCPROFILE));
+    if (!cmsCloseIOhandler(PrevIO)) 
+        UsedSpace = 0; // As a error marker
+
+    _cmsUnlockMutex(Icc->ContextID, Icc->UsrMutex);
+
+    return UsedSpace;
+
+
+Error:
+    cmsCloseIOhandler(PrevIO);
+    memmove(Icc, &Keep, sizeof(_cmsICCPROFILE));
+    _cmsUnlockMutex(Icc->ContextID, Icc->UsrMutex);
+
+    return 0;
+}
+
+
+// Low-level save to disk.
+cmsBool  CMSEXPORT cmsSaveProfileToFile(cmsHPROFILE hProfile, const char* FileName)
+{
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsIOHANDLER* io = cmsOpenIOhandlerFromFile(ContextID, FileName, "w");
+    cmsBool rc;
+
+    if (io == NULL) return FALSE;
+
+    rc = (cmsSaveProfileToIOhandler(hProfile, io) != 0);
+    rc &= cmsCloseIOhandler(io);
+
+    if (rc == FALSE) {          // remove() is C99 per 7.19.4.1
+            remove(FileName);   // We have to IGNORE return value in this case
+    }
+    return rc;
+}
+
+// Same as anterior, but for streams
+cmsBool CMSEXPORT cmsSaveProfileToStream(cmsHPROFILE hProfile, FILE* Stream)
+{
+    cmsBool rc;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsIOHANDLER* io = cmsOpenIOhandlerFromStream(ContextID, Stream);
+
+    if (io == NULL) return FALSE;
+
+    rc = (cmsSaveProfileToIOhandler(hProfile, io) != 0);
+    rc &= cmsCloseIOhandler(io);
+
+    return rc;
+}
+
+
+// Same as anterior, but for memory blocks. In this case, a NULL as MemPtr means calculate needed space only
+cmsBool CMSEXPORT cmsSaveProfileToMem(cmsHPROFILE hProfile, void *MemPtr, cmsUInt32Number* BytesNeeded)
+{
+    cmsBool rc;
+    cmsIOHANDLER* io;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    _cmsAssert(BytesNeeded != NULL);
+
+    // Should we just calculate the needed space?
+    if (MemPtr == NULL) {
+
+           *BytesNeeded =  cmsSaveProfileToIOhandler(hProfile, NULL);
+            return (*BytesNeeded == 0) ? FALSE : TRUE;
+    }
+
+    // That is a real write operation
+    io =  cmsOpenIOhandlerFromMem(ContextID, MemPtr, *BytesNeeded, "w");
+    if (io == NULL) return FALSE;
+
+    rc = (cmsSaveProfileToIOhandler(hProfile, io) != 0);
+    rc &= cmsCloseIOhandler(io);
+
+    return rc;
+}
+
+
+
+// Closes a profile freeing any involved resources
+cmsBool  CMSEXPORT cmsCloseProfile(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsBool  rc = TRUE;
+    cmsUInt32Number i;
+
+    if (!Icc) return FALSE;
+
+    // Was open in write mode?
+    if (Icc ->IsWrite) {
+
+        Icc ->IsWrite = FALSE;      // Assure no further writing
+        rc &= cmsSaveProfileToFile(hProfile, Icc ->IOhandler->PhysicalFile);
+    }
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        if (Icc -> TagPtrs[i]) {
+
+            cmsTagTypeHandler* TypeHandler = Icc ->TagTypeHandlers[i];
+
+            if (TypeHandler != NULL) {
+                cmsTagTypeHandler LocalTypeHandler = *TypeHandler;
+
+                LocalTypeHandler.ContextID = Icc ->ContextID;              // As an additional parameters
+                LocalTypeHandler.ICCVersion = Icc ->Version;
+                LocalTypeHandler.FreePtr(&LocalTypeHandler, Icc -> TagPtrs[i]);
+            }
+            else
+                _cmsFree(Icc ->ContextID, Icc ->TagPtrs[i]);
+        }
+    }
+
+    if (Icc ->IOhandler != NULL) {
+        rc &= cmsCloseIOhandler(Icc->IOhandler);
+    }
+
+    _cmsDestroyMutex(Icc->ContextID, Icc->UsrMutex);
+
+    _cmsFree(Icc ->ContextID, Icc);   // Free placeholder memory
+
+    return rc;
+}
+
+
+// -------------------------------------------------------------------------------------------------------------------
+
+
+// Returns TRUE if a given tag is supported by a plug-in
+static
+cmsBool IsTypeSupported(cmsTagDescriptor* TagDescriptor, cmsTagTypeSignature Type)
+{
+    cmsUInt32Number i, nMaxTypes;
+
+    nMaxTypes = TagDescriptor->nSupportedTypes;
+    if (nMaxTypes >= MAX_TYPES_IN_LCMS_PLUGIN)
+        nMaxTypes = MAX_TYPES_IN_LCMS_PLUGIN;
+
+    for (i=0; i < nMaxTypes; i++) {
+        if (Type == TagDescriptor ->SupportedTypes[i]) return TRUE;
+    }
+
+    return FALSE;
+}
+
+
+// That's the main read function
+void* CMSEXPORT cmsReadTag(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsIOHANDLER* io = Icc ->IOhandler;
+    cmsTagTypeHandler* TypeHandler;
+    cmsTagTypeHandler LocalTypeHandler;
+    cmsTagDescriptor*  TagDescriptor;
+    cmsTagTypeSignature BaseType;
+    cmsUInt32Number Offset, TagSize;
+    cmsUInt32Number ElemCount;
+    int n;
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return NULL;
+
+    n = _cmsSearchTag(Icc, sig, TRUE);
+    if (n < 0) goto Error;               // Not found, return NULL
+
+
+    // If the element is already in memory, return the pointer
+    if (Icc -> TagPtrs[n]) {
+
+        if (Icc->TagTypeHandlers[n] == NULL) goto Error;
+
+        // Sanity check
+        BaseType = Icc->TagTypeHandlers[n]->Signature;
+        if (BaseType == 0) goto Error;
+
+        TagDescriptor = _cmsGetTagDescriptor(Icc->ContextID, sig);
+        if (TagDescriptor == NULL) goto Error;
+
+        if (!IsTypeSupported(TagDescriptor, BaseType)) goto Error;
+
+        if (Icc ->TagSaveAsRaw[n]) goto Error;  // We don't support read raw tags as cooked
+
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return Icc -> TagPtrs[n];
+    }
+
+    // We need to read it. Get the offset and size to the file
+    Offset    = Icc -> TagOffsets[n];
+    TagSize   = Icc -> TagSizes[n];
+
+    if (TagSize < 8) goto Error;
+
+    // Seek to its location
+    if (!io -> Seek(io, Offset))
+        goto Error;
+
+    // Search for support on this tag
+    TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, sig);
+    if (TagDescriptor == NULL) {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, sig);
+
+        // An unknown element was found.
+        cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown tag type '%s' found.", String);
+        goto Error;     // Unsupported.
+    }
+
+    // if supported, get type and check if in list
+    BaseType = _cmsReadTypeBase(io);
+    if (BaseType == 0) goto Error;
+
+    if (!IsTypeSupported(TagDescriptor, BaseType)) goto Error;
+   
+    TagSize  -= 8;       // Already read by the type base logic
+
+    // Get type handler
+    TypeHandler = _cmsGetTagTypeHandler(Icc ->ContextID, BaseType);
+    if (TypeHandler == NULL) goto Error;
+    LocalTypeHandler = *TypeHandler;
+
+
+    // Read the tag
+    Icc -> TagTypeHandlers[n] = TypeHandler;
+
+    LocalTypeHandler.ContextID = Icc ->ContextID;
+    LocalTypeHandler.ICCVersion = Icc ->Version;
+    Icc -> TagPtrs[n] = LocalTypeHandler.ReadPtr(&LocalTypeHandler, io, &ElemCount, TagSize);
+
+    // The tag type is supported, but something wrong happened and we cannot read the tag.
+    // let know the user about this (although it is just a warning)
+    if (Icc -> TagPtrs[n] == NULL) {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, sig);
+        cmsSignalError(Icc ->ContextID, cmsERROR_CORRUPTION_DETECTED, "Corrupted tag '%s'", String);
+        goto Error;
+    }
+
+    // This is a weird error that may be a symptom of something more serious, the number of
+    // stored item is actually less than the number of required elements.
+    if (ElemCount < TagDescriptor ->ElemCount) {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, sig);
+        cmsSignalError(Icc ->ContextID, cmsERROR_CORRUPTION_DETECTED, "'%s' Inconsistent number of items: expected %d, got %d",
+            String, TagDescriptor ->ElemCount, ElemCount);
+        goto Error;
+    }
+
+
+    // Return the data
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return Icc -> TagPtrs[n];
+
+
+    // Return error and unlock tha data
+Error:
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return NULL;
+}
+
+
+// Get true type of data
+cmsTagTypeSignature _cmsGetTagTrueType(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsTagTypeHandler* TypeHandler;
+    int n;
+
+    // Search for given tag in ICC profile directory
+    n = _cmsSearchTag(Icc, sig, TRUE);
+    if (n < 0) return (cmsTagTypeSignature) 0;                // Not found, return NULL
+
+    // Get the handler. The true type is there
+    TypeHandler =  Icc -> TagTypeHandlers[n];
+    return TypeHandler ->Signature;
+}
+
+
+// Write a single tag. This just keeps track of the tak into a list of "to be written". If the tag is already
+// in that list, the previous version is deleted.
+cmsBool CMSEXPORT cmsWriteTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsTagTypeHandler* TypeHandler = NULL;
+    cmsTagTypeHandler LocalTypeHandler;
+    cmsTagDescriptor* TagDescriptor = NULL;
+    cmsTagTypeSignature Type;
+    int i;
+    cmsFloat64Number Version;
+    char TypeString[5], SigString[5];
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return FALSE;
+
+    // To delete tags.
+    if (data == NULL) {
+
+         // Delete the tag
+         i = _cmsSearchTag(Icc, sig, FALSE);
+         if (i >= 0) {
+                
+             // Use zero as a mark of deleted 
+             _cmsDeleteTagByPos(Icc, i);
+             Icc ->TagNames[i] = (cmsTagSignature) 0;
+             _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+             return TRUE;
+         }
+         // Didn't find the tag
+        goto Error;
+    }
+
+    if (!_cmsNewTag(Icc, sig, &i)) goto Error;
+
+    // This is not raw
+    Icc ->TagSaveAsRaw[i] = FALSE;
+
+    // This is not a link
+    Icc ->TagLinked[i] = (cmsTagSignature) 0;
+
+    // Get information about the TAG.
+    TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, sig);
+    if (TagDescriptor == NULL){
+         cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported tag '%x'", sig);
+        goto Error;
+    }
+
+
+    // Now we need to know which type to use. It depends on the version.
+    Version = cmsGetProfileVersion(hProfile);
+
+    if (TagDescriptor ->DecideType != NULL) {
+
+        // Let the tag descriptor to decide the type base on depending on
+        // the data. This is useful for example on parametric curves, where
+        // curves specified by a table cannot be saved as parametric and needs
+        // to be casted to single v2-curves, even on v4 profiles.
+
+        Type = TagDescriptor ->DecideType(Version, data);
+    }
+    else {
+
+        Type = TagDescriptor ->SupportedTypes[0];
+    }
+
+    // Does the tag support this type?
+    if (!IsTypeSupported(TagDescriptor, Type)) {
+
+        _cmsTagSignature2String(TypeString, (cmsTagSignature) Type);
+        _cmsTagSignature2String(SigString,  sig);
+
+        cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported type '%s' for tag '%s'", TypeString, SigString);
+        goto Error;
+    }
+
+    // Does we have a handler for this type?
+    TypeHandler =  _cmsGetTagTypeHandler(Icc->ContextID, Type);
+    if (TypeHandler == NULL) {
+
+        _cmsTagSignature2String(TypeString, (cmsTagSignature) Type);
+        _cmsTagSignature2String(SigString,  sig);
+
+        cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported type '%s' for tag '%s'", TypeString, SigString);
+        goto Error;           // Should never happen
+    }
+
+
+    // Fill fields on icc structure
+    Icc ->TagTypeHandlers[i]  = TypeHandler;
+    Icc ->TagNames[i]         = sig;
+    Icc ->TagSizes[i]         = 0;
+    Icc ->TagOffsets[i]       = 0;
+
+    LocalTypeHandler = *TypeHandler;
+    LocalTypeHandler.ContextID  = Icc ->ContextID;
+    LocalTypeHandler.ICCVersion = Icc ->Version;
+    Icc ->TagPtrs[i]            = LocalTypeHandler.DupPtr(&LocalTypeHandler, data, TagDescriptor ->ElemCount);
+
+    if (Icc ->TagPtrs[i] == NULL)  {
+
+        _cmsTagSignature2String(TypeString, (cmsTagSignature) Type);
+        _cmsTagSignature2String(SigString,  sig);
+        cmsSignalError(Icc ->ContextID, cmsERROR_CORRUPTION_DETECTED, "Malformed struct in type '%s' for tag '%s'", TypeString, SigString);
+
+        goto Error;
+    }
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return TRUE;
+
+Error:
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return FALSE;
+
+}
+
+// Read and write raw data. The only way those function would work and keep consistence with normal read and write
+// is to do an additional step of serialization. That means, readRaw would issue a normal read and then convert the obtained
+// data to raw bytes by using the "write" serialization logic. And vice-versa. I know this may end in situations where
+// raw data written does not exactly correspond with the raw data proposed to cmsWriteRaw data, but this approach allows
+// to write a tag as raw data and the read it as handled.
+
+cmsUInt32Number CMSEXPORT cmsReadRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, void* data, cmsUInt32Number BufferSize)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    void *Object;
+    int i;
+    cmsIOHANDLER* MemIO;
+    cmsTagTypeHandler* TypeHandler = NULL;
+    cmsTagTypeHandler LocalTypeHandler;
+    cmsTagDescriptor* TagDescriptor = NULL;
+    cmsUInt32Number rc;
+    cmsUInt32Number Offset, TagSize;
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return 0;
+
+    // Search for given tag in ICC profile directory
+    i = _cmsSearchTag(Icc, sig, TRUE);
+    if (i < 0) goto Error;                 // Not found, 
+
+    // It is already read?
+    if (Icc -> TagPtrs[i] == NULL) {
+
+        // No yet, get original position
+        Offset   = Icc ->TagOffsets[i];
+        TagSize  = Icc ->TagSizes[i];
+
+        // read the data directly, don't keep copy
+        if (data != NULL) {
+
+            if (BufferSize < TagSize)
+                TagSize = BufferSize;
+
+            if (!Icc ->IOhandler ->Seek(Icc ->IOhandler, Offset)) goto Error;
+            if (!Icc ->IOhandler ->Read(Icc ->IOhandler, data, 1, TagSize)) goto Error;
+
+            _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+            return TagSize;
+        }
+
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return Icc ->TagSizes[i];
+    }
+
+    // The data has been already read, or written. But wait!, maybe the user chose to save as
+    // raw data. In this case, return the raw data directly
+    if (Icc ->TagSaveAsRaw[i]) {
+
+        if (data != NULL)  {
+
+            TagSize  = Icc ->TagSizes[i];
+            if (BufferSize < TagSize)
+                TagSize = BufferSize;
+
+            memmove(data, Icc ->TagPtrs[i], TagSize);
+
+            _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+            return TagSize;
+        }
+
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return Icc ->TagSizes[i];
+    }
+
+    // Already read, or previously set by cmsWriteTag(). We need to serialize that
+    // data to raw in order to maintain consistency.
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    Object = cmsReadTag(hProfile, sig);
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return 0;
+
+    if (Object == NULL) goto Error;
+
+    // Now we need to serialize to a memory block: just use a memory iohandler
+
+    if (data == NULL) {
+        MemIO = cmsOpenIOhandlerFromNULL(cmsGetProfileContextID(hProfile));
+    } else{
+        MemIO = cmsOpenIOhandlerFromMem(cmsGetProfileContextID(hProfile), data, BufferSize, "w");
+    }
+    if (MemIO == NULL) goto Error;
+
+    // Obtain type handling for the tag
+    TypeHandler = Icc ->TagTypeHandlers[i];
+    TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, sig);
+    if (TagDescriptor == NULL) {
+        cmsCloseIOhandler(MemIO);
+        goto Error;
+    }
+    
+    if (TypeHandler == NULL) goto Error;
+
+    // Serialize
+    LocalTypeHandler = *TypeHandler;
+    LocalTypeHandler.ContextID  = Icc ->ContextID;
+    LocalTypeHandler.ICCVersion = Icc ->Version;
+
+    if (!_cmsWriteTypeBase(MemIO, TypeHandler ->Signature)) {
+        cmsCloseIOhandler(MemIO);
+        goto Error;
+    }
+
+    if (!LocalTypeHandler.WritePtr(&LocalTypeHandler, MemIO, Object, TagDescriptor ->ElemCount)) {
+        cmsCloseIOhandler(MemIO);
+        goto Error;
+    }
+
+    // Get Size and close
+    rc = MemIO ->Tell(MemIO);
+    cmsCloseIOhandler(MemIO);      // Ignore return code this time
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return rc;
+
+Error:
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return 0;
+}
+
+// Similar to the anterior. This function allows to write directly to the ICC profile any data, without
+// checking anything. As a rule, mixing Raw with cooked doesn't work, so writing a tag as raw and then reading
+// it as cooked without serializing does result into an error. If that is what you want, you will need to dump
+// the profile to memry or disk and then reopen it.
+cmsBool CMSEXPORT cmsWriteRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data, cmsUInt32Number Size)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    int i;
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return 0;
+
+    if (!_cmsNewTag(Icc, sig, &i)) {
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+         return FALSE;
+    }
+
+    // Mark the tag as being written as RAW
+    Icc ->TagSaveAsRaw[i] = TRUE;
+    Icc ->TagNames[i]     = sig;
+    Icc ->TagLinked[i]    = (cmsTagSignature) 0;
+
+    // Keep a copy of the block
+    Icc ->TagPtrs[i]  = _cmsDupMem(Icc ->ContextID, data, Size);
+    Icc ->TagSizes[i] = Size;
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+
+    if (Icc->TagPtrs[i] == NULL) {           
+           Icc->TagNames[i] = (cmsTagSignature) 0;
+           return FALSE;
+    }
+    return TRUE;
+}
+
+// Using this function you can collapse several tag entries to the same block in the profile
+cmsBool CMSEXPORT cmsLinkTag(cmsHPROFILE hProfile, cmsTagSignature sig, cmsTagSignature dest)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    int i;
+
+     if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return FALSE;
+
+    if (!_cmsNewTag(Icc, sig, &i)) {
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return FALSE;
+    }
+
+    // Keep necessary information
+    Icc ->TagSaveAsRaw[i] = FALSE;
+    Icc ->TagNames[i]     = sig;
+    Icc ->TagLinked[i]    = dest;
+
+    Icc ->TagPtrs[i]    = NULL;
+    Icc ->TagSizes[i]   = 0;
+    Icc ->TagOffsets[i] = 0;
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return TRUE;
+}
+
+
+// Returns the tag linked to sig, in the case two tags are sharing same resource
+cmsTagSignature  CMSEXPORT cmsTagLinkedTo(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    int i;
+
+    // Search for given tag in ICC profile directory
+    i = _cmsSearchTag(Icc, sig, FALSE);
+    if (i < 0) return (cmsTagSignature) 0;                 // Not found, return 0
+
+    return Icc -> TagLinked[i];
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp
new file mode 100644
index 0000000000..ae7ebe0eff
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsio1.cpp
@@ -0,0 +1,1029 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Read tags using low-level functions, provides necessary glue code to adapt versions, etc.
+
+// LUT tags
+static const cmsTagSignature Device2PCS16[]   =  {cmsSigAToB0Tag,     // Perceptual
+                                                  cmsSigAToB1Tag,     // Relative colorimetric
+                                                  cmsSigAToB2Tag,     // Saturation
+                                                  cmsSigAToB1Tag };   // Absolute colorimetric
+
+static const cmsTagSignature Device2PCSFloat[] = {cmsSigDToB0Tag,     // Perceptual
+                                                  cmsSigDToB1Tag,     // Relative colorimetric
+                                                  cmsSigDToB2Tag,     // Saturation
+                                                  cmsSigDToB3Tag };   // Absolute colorimetric
+
+static const cmsTagSignature PCS2Device16[]    = {cmsSigBToA0Tag,     // Perceptual
+                                                  cmsSigBToA1Tag,     // Relative colorimetric
+                                                  cmsSigBToA2Tag,     // Saturation
+                                                  cmsSigBToA1Tag };   // Absolute colorimetric
+
+static const cmsTagSignature PCS2DeviceFloat[] = {cmsSigBToD0Tag,     // Perceptual
+                                                  cmsSigBToD1Tag,     // Relative colorimetric
+                                                  cmsSigBToD2Tag,     // Saturation
+                                                  cmsSigBToD3Tag };   // Absolute colorimetric
+
+
+// Factors to convert from 1.15 fixed point to 0..1.0 range and vice-versa
+#define InpAdj   (1.0/MAX_ENCODEABLE_XYZ)     // (65536.0/(65535.0*2.0))
+#define OutpAdj  (MAX_ENCODEABLE_XYZ)         // ((2.0*65535.0)/65536.0)
+
+// Several resources for gray conversions.
+static const cmsFloat64Number GrayInputMatrix[] = { (InpAdj*cmsD50X),  (InpAdj*cmsD50Y),  (InpAdj*cmsD50Z) };
+static const cmsFloat64Number OneToThreeInputMatrix[] = { 1, 1, 1 };
+static const cmsFloat64Number PickYMatrix[] = { 0, (OutpAdj*cmsD50Y), 0 };
+static const cmsFloat64Number PickLstarMatrix[] = { 1, 0, 0 };
+
+// Get a media white point fixing some issues found in certain old profiles
+cmsBool  _cmsReadMediaWhitePoint(cmsCIEXYZ* Dest, cmsHPROFILE hProfile)
+{
+    cmsCIEXYZ* Tag;
+
+    _cmsAssert(Dest != NULL);
+
+    Tag = (cmsCIEXYZ*) cmsReadTag(hProfile, cmsSigMediaWhitePointTag);
+
+    // If no wp, take D50
+    if (Tag == NULL) {
+        *Dest = *cmsD50_XYZ();
+        return TRUE;
+    }
+
+    // V2 display profiles should give D50
+    if (cmsGetEncodedICCversion(hProfile) < 0x4000000) {
+
+        if (cmsGetDeviceClass(hProfile) == cmsSigDisplayClass) {
+            *Dest = *cmsD50_XYZ();
+            return TRUE;
+        }
+    }
+
+    // All seems ok
+    *Dest = *Tag;
+    return TRUE;
+}
+
+
+// Chromatic adaptation matrix. Fix some issues as well
+cmsBool  _cmsReadCHAD(cmsMAT3* Dest, cmsHPROFILE hProfile)
+{
+    cmsMAT3* Tag;
+
+    _cmsAssert(Dest != NULL);
+
+    Tag = (cmsMAT3*) cmsReadTag(hProfile, cmsSigChromaticAdaptationTag);
+
+    if (Tag != NULL) {
+        *Dest = *Tag;
+        return TRUE;
+    }
+
+    // No CHAD available, default it to identity
+    _cmsMAT3identity(Dest);
+
+    // V2 display profiles should give D50
+    if (cmsGetEncodedICCversion(hProfile) < 0x4000000) {
+
+        if (cmsGetDeviceClass(hProfile) == cmsSigDisplayClass) {
+
+            cmsCIEXYZ* White = (cmsCIEXYZ*) cmsReadTag(hProfile, cmsSigMediaWhitePointTag);
+
+            if (White == NULL) {
+
+                _cmsMAT3identity(Dest);
+                return TRUE;
+            }
+
+            return _cmsAdaptationMatrix(Dest, NULL, White, cmsD50_XYZ());
+        }
+    }
+
+    return TRUE;
+}
+
+
+// Auxiliary, read colorants as a MAT3 structure. Used by any function that needs a matrix-shaper
+static
+cmsBool ReadICCMatrixRGB2XYZ(cmsMAT3* r, cmsHPROFILE hProfile)
+{
+    cmsCIEXYZ *PtrRed, *PtrGreen, *PtrBlue;
+
+    _cmsAssert(r != NULL);
+
+    PtrRed   = (cmsCIEXYZ *) cmsReadTag(hProfile, cmsSigRedColorantTag);
+    PtrGreen = (cmsCIEXYZ *) cmsReadTag(hProfile, cmsSigGreenColorantTag);
+    PtrBlue  = (cmsCIEXYZ *) cmsReadTag(hProfile, cmsSigBlueColorantTag);
+
+    if (PtrRed == NULL || PtrGreen == NULL || PtrBlue == NULL)
+        return FALSE;
+
+    _cmsVEC3init(&r -> v[0], PtrRed -> X, PtrGreen -> X,  PtrBlue -> X);
+    _cmsVEC3init(&r -> v[1], PtrRed -> Y, PtrGreen -> Y,  PtrBlue -> Y);
+    _cmsVEC3init(&r -> v[2], PtrRed -> Z, PtrGreen -> Z,  PtrBlue -> Z);
+
+    return TRUE;
+}
+
+
+// Gray input pipeline
+static
+cmsPipeline* BuildGrayInputMatrixPipeline(cmsHPROFILE hProfile)
+{
+    cmsToneCurve *GrayTRC;
+    cmsPipeline* Lut;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    GrayTRC = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGrayTRCTag);
+    if (GrayTRC == NULL) return NULL;
+
+    Lut = cmsPipelineAlloc(ContextID, 1, 3);
+    if (Lut == NULL)
+        goto Error;
+
+    if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+        // In this case we implement the profile as an  identity matrix plus 3 tone curves
+        cmsUInt16Number Zero[2] = { 0x8080, 0x8080 };
+        cmsToneCurve* EmptyTab;
+        cmsToneCurve* LabCurves[3];
+
+        EmptyTab = cmsBuildTabulatedToneCurve16(ContextID, 2, Zero);
+
+        if (EmptyTab == NULL)
+            goto Error;
+
+        LabCurves[0] = GrayTRC;
+        LabCurves[1] = EmptyTab;
+        LabCurves[2] = EmptyTab;
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3,  1, OneToThreeInputMatrix, NULL)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 3, LabCurves))) {
+                cmsFreeToneCurve(EmptyTab);
+                goto Error;
+        }
+
+        cmsFreeToneCurve(EmptyTab);
+
+    }
+    else  {
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 1, &GrayTRC)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3,  1, GrayInputMatrix, NULL)))
+            goto Error;
+    }
+
+    return Lut;
+
+Error:
+    cmsFreeToneCurve(GrayTRC);
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// RGB Matrix shaper
+static
+cmsPipeline* BuildRGBInputMatrixShaper(cmsHPROFILE hProfile)
+{
+    cmsPipeline* Lut;
+    cmsMAT3 Mat;
+    cmsToneCurve *Shapes[3];
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    int i, j;
+
+    if (!ReadICCMatrixRGB2XYZ(&Mat, hProfile)) return NULL;
+
+    // XYZ PCS in encoded in 1.15 format, and the matrix output comes in 0..0xffff range, so
+    // we need to adjust the output by a factor of (0x10000/0xffff) to put data in
+    // a 1.16 range, and then a >> 1 to obtain 1.15. The total factor is (65536.0)/(65535.0*2)
+
+    for (i=0; i < 3; i++)
+        for (j=0; j < 3; j++)
+            Mat.v[i].n[j] *= InpAdj;
+
+
+    Shapes[0] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigRedTRCTag);
+    Shapes[1] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGreenTRCTag);
+    Shapes[2] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigBlueTRCTag);
+
+    if (!Shapes[0] || !Shapes[1] || !Shapes[2])
+        return NULL;
+
+    Lut = cmsPipelineAlloc(ContextID, 3, 3);
+    if (Lut != NULL) {
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 3, Shapes)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3, 3, (cmsFloat64Number*) &Mat, NULL)))
+            goto Error;
+
+        // Note that it is certainly possible a single profile would have a LUT based
+        // tag for output working in lab and a matrix-shaper for the fallback cases. 
+        // This is not allowed by the spec, but this code is tolerant to those cases    
+        if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocXYZ2Lab(ContextID)))
+                goto Error;
+        }
+
+    }
+
+    return Lut;
+
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+
+// Read the DToAX tag, adjusting the encoding of Lab or XYZ if neded
+static
+cmsPipeline* _cmsReadFloatInputTag(cmsHPROFILE hProfile, cmsTagSignature tagFloat)
+{
+    cmsContext ContextID       = cmsGetProfileContextID(hProfile);
+    cmsPipeline* Lut           = cmsPipelineDup((cmsPipeline*) cmsReadTag(hProfile, tagFloat));
+    cmsColorSpaceSignature spc = cmsGetColorSpace(hProfile);
+    cmsColorSpaceSignature PCS = cmsGetPCS(hProfile);
+    
+    if (Lut == NULL) return NULL;
+    
+    // input and output of transform are in lcms 0..1 encoding.  If XYZ or Lab spaces are used, 
+    //  these need to be normalized into the appropriate ranges (Lab = 100,0,0, XYZ=1.0,1.0,1.0)
+    if ( spc == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToLabFloat(ContextID)))
+            goto Error;
+    }
+    else if (spc == cmsSigXYZData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToXyzFloat(ContextID)))
+            goto Error;
+    }
+    
+    if ( PCS == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromLabFloat(ContextID)))
+            goto Error;
+    }
+    else if( PCS == cmsSigXYZData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromXyzFloat(ContextID)))
+            goto Error;
+    }
+    
+    return Lut;
+
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+// Read and create a BRAND NEW MPE LUT from a given profile. All stuff dependent of version, etc
+// is adjusted here in order to create a LUT that takes care of all those details.
+// We add intent = 0xffffffff as a way to read matrix shaper always, no matter of other LUT
+cmsPipeline* CMSEXPORT _cmsReadInputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsTagTypeSignature OriginalType;
+    cmsTagSignature tag16;
+    cmsTagSignature tagFloat;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    // On named color, take the appropriate tag
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        cmsPipeline* Lut;
+        cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*) cmsReadTag(hProfile, cmsSigNamedColor2Tag);
+
+        if (nc == NULL) return NULL;
+
+        Lut = cmsPipelineAlloc(ContextID, 0, 0);
+        if (Lut == NULL) {
+            cmsFreeNamedColorList(nc);
+            return NULL;
+        }
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocNamedColor(nc, TRUE)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID))) {
+            cmsPipelineFree(Lut);
+            return NULL;
+        }
+        return Lut;
+    }
+
+    // This is an attempt to reuse this function to retrieve the matrix-shaper as pipeline no
+    // matter other LUT are present and have precedence. Intent = 0xffffffff can be used for that.
+    if (Intent <= INTENT_ABSOLUTE_COLORIMETRIC) {
+
+        tag16 = Device2PCS16[Intent];
+        tagFloat = Device2PCSFloat[Intent];
+
+        if (cmsIsTag(hProfile, tagFloat)) {  // Float tag takes precedence
+
+            // Floating point LUT are always V4, but the encoding range is no
+            // longer 0..1.0, so we need to add an stage depending on the color space
+            return _cmsReadFloatInputTag(hProfile, tagFloat);
+        }
+
+        // Revert to perceptual if no tag is found
+        if (!cmsIsTag(hProfile, tag16)) {
+            tag16 = Device2PCS16[0];
+        }
+
+        if (cmsIsTag(hProfile, tag16)) { // Is there any LUT-Based table?
+
+            // Check profile version and LUT type. Do the necessary adjustments if needed
+
+            // First read the tag
+            cmsPipeline* Lut = (cmsPipeline*) cmsReadTag(hProfile, tag16);
+            if (Lut == NULL) return NULL;
+
+            // After reading it, we have now info about the original type
+            OriginalType =  _cmsGetTagTrueType(hProfile, tag16);
+
+            // The profile owns the Lut, so we need to copy it
+            Lut = cmsPipelineDup(Lut);
+
+            // We need to adjust data only for Lab16 on output
+            if (OriginalType != cmsSigLut16Type || cmsGetPCS(hProfile) != cmsSigLabData)
+                return Lut;
+
+            // If the input is Lab, add also a conversion at the begin
+            if (cmsGetColorSpace(hProfile) == cmsSigLabData &&
+                !cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV4ToV2(ContextID)))
+                goto Error;
+
+            // Add a matrix for conversion V2 to V4 Lab PCS
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+                goto Error;
+
+            return Lut;
+Error:
+            cmsPipelineFree(Lut);
+            return NULL;
+        }
+    }
+
+    // Lut was not found, try to create a matrix-shaper
+
+    // Check if this is a grayscale profile.
+    if (cmsGetColorSpace(hProfile) == cmsSigGrayData) {
+
+        // if so, build appropriate conversion tables.
+        // The tables are the PCS iluminant, scaled across GrayTRC
+        return BuildGrayInputMatrixPipeline(hProfile);
+    }
+
+    // Not gray, create a normal matrix-shaper
+    return BuildRGBInputMatrixShaper(hProfile);
+}
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Gray output pipeline.
+// XYZ -> Gray or Lab -> Gray. Since we only know the GrayTRC, we need to do some assumptions. Gray component will be
+// given by Y on XYZ PCS and by L* on Lab PCS, Both across inverse TRC curve.
+// The complete pipeline on XYZ is Matrix[3:1] -> Tone curve and in Lab Matrix[3:1] -> Tone Curve as well.
+
+static
+cmsPipeline* BuildGrayOutputPipeline(cmsHPROFILE hProfile)
+{
+    cmsToneCurve *GrayTRC, *RevGrayTRC;
+    cmsPipeline* Lut;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    GrayTRC = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGrayTRCTag);
+    if (GrayTRC == NULL) return NULL;
+
+    RevGrayTRC = cmsReverseToneCurve(GrayTRC);
+    if (RevGrayTRC == NULL) return NULL;
+
+    Lut = cmsPipelineAlloc(ContextID, 3, 1);
+    if (Lut == NULL) {
+        cmsFreeToneCurve(RevGrayTRC);
+        return NULL;
+    }
+
+    if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 1,  3, PickLstarMatrix, NULL)))
+            goto Error;
+    }
+    else  {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 1,  3, PickYMatrix, NULL)))
+            goto Error;
+    }
+
+    if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 1, &RevGrayTRC)))
+        goto Error;
+
+    cmsFreeToneCurve(RevGrayTRC);
+    return Lut;
+
+Error:
+    cmsFreeToneCurve(RevGrayTRC);
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+static
+cmsPipeline* BuildRGBOutputMatrixShaper(cmsHPROFILE hProfile)
+{
+    cmsPipeline* Lut;
+    cmsToneCurve *Shapes[3], *InvShapes[3];
+    cmsMAT3 Mat, Inv;
+    int i, j;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    if (!ReadICCMatrixRGB2XYZ(&Mat, hProfile))
+        return NULL;
+
+    if (!_cmsMAT3inverse(&Mat, &Inv))
+        return NULL;
+
+    // XYZ PCS in encoded in 1.15 format, and the matrix input should come in 0..0xffff range, so
+    // we need to adjust the input by a << 1 to obtain a 1.16 fixed and then by a factor of
+    // (0xffff/0x10000) to put data in 0..0xffff range. Total factor is (2.0*65535.0)/65536.0;
+
+    for (i=0; i < 3; i++)
+        for (j=0; j < 3; j++)
+            Inv.v[i].n[j] *= OutpAdj;
+
+    Shapes[0] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigRedTRCTag);
+    Shapes[1] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGreenTRCTag);
+    Shapes[2] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigBlueTRCTag);
+
+    if (!Shapes[0] || !Shapes[1] || !Shapes[2])
+        return NULL;
+
+    InvShapes[0] = cmsReverseToneCurve(Shapes[0]);
+    InvShapes[1] = cmsReverseToneCurve(Shapes[1]);
+    InvShapes[2] = cmsReverseToneCurve(Shapes[2]);
+
+    if (!InvShapes[0] || !InvShapes[1] || !InvShapes[2]) {
+        return NULL;
+    }
+
+    Lut = cmsPipelineAlloc(ContextID, 3, 3);
+    if (Lut != NULL) {
+
+        // Note that it is certainly possible a single profile would have a LUT based
+        // tag for output working in lab and a matrix-shaper for the fallback cases. 
+        // This is not allowed by the spec, but this code is tolerant to those cases    
+        if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLab2XYZ(ContextID)))
+                goto Error;
+        }
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3, 3, (cmsFloat64Number*) &Inv, NULL)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 3, InvShapes)))
+            goto Error;
+    }
+
+    cmsFreeToneCurveTriple(InvShapes);
+    return Lut;
+Error:
+    cmsFreeToneCurveTriple(InvShapes);
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+// Change CLUT interpolation to trilinear
+static
+void ChangeInterpolationToTrilinear(cmsPipeline* Lut)
+{
+    cmsStage* Stage;
+
+    for (Stage = cmsPipelineGetPtrToFirstStage(Lut);
+        Stage != NULL;
+        Stage = cmsStageNext(Stage)) {
+
+            if (cmsStageType(Stage) == cmsSigCLutElemType) {
+
+                _cmsStageCLutData* CLUT = (_cmsStageCLutData*) Stage ->Data;
+
+                CLUT ->Params->dwFlags |= CMS_LERP_FLAGS_TRILINEAR;
+                _cmsSetInterpolationRoutine(Lut->ContextID, CLUT ->Params);
+            }
+    }
+}
+
+
+// Read the DToAX tag, adjusting the encoding of Lab or XYZ if neded
+static
+cmsPipeline* _cmsReadFloatOutputTag(cmsHPROFILE hProfile, cmsTagSignature tagFloat)
+{
+    cmsContext ContextID       = cmsGetProfileContextID(hProfile);
+    cmsPipeline* Lut           = cmsPipelineDup((cmsPipeline*) cmsReadTag(hProfile, tagFloat));
+    cmsColorSpaceSignature PCS = cmsGetPCS(hProfile);
+    cmsColorSpaceSignature dataSpace = cmsGetColorSpace(hProfile);
+    
+    if (Lut == NULL) return NULL;
+    
+    // If PCS is Lab or XYZ, the floating point tag is accepting data in the space encoding,
+    // and since the formatter has already accommodated to 0..1.0, we should undo this change
+    if ( PCS == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToLabFloat(ContextID)))
+            goto Error;
+    }
+    else
+        if (PCS == cmsSigXYZData)
+        {
+            if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToXyzFloat(ContextID)))
+                goto Error;
+        }
+    
+    // the output can be Lab or XYZ, in which case normalisation is needed on the end of the pipeline
+    if ( dataSpace == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromLabFloat(ContextID)))
+            goto Error;
+    }
+    else if (dataSpace == cmsSigXYZData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromXyzFloat(ContextID)))
+            goto Error;
+    }
+    
+    return Lut;
+
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// Create an output MPE LUT from agiven profile. Version mismatches are handled here
+cmsPipeline* CMSEXPORT _cmsReadOutputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsTagTypeSignature OriginalType;
+    cmsTagSignature tag16;
+    cmsTagSignature tagFloat;
+    cmsContext ContextID  = cmsGetProfileContextID(hProfile);
+
+
+    if (Intent <= INTENT_ABSOLUTE_COLORIMETRIC) {
+
+        tag16 = PCS2Device16[Intent];
+        tagFloat = PCS2DeviceFloat[Intent];
+
+        if (cmsIsTag(hProfile, tagFloat)) {  // Float tag takes precedence
+
+            // Floating point LUT are always V4
+            return _cmsReadFloatOutputTag(hProfile, tagFloat);
+        }
+
+        // Revert to perceptual if no tag is found
+        if (!cmsIsTag(hProfile, tag16)) {
+            tag16 = PCS2Device16[0];
+        }
+
+        if (cmsIsTag(hProfile, tag16)) { // Is there any LUT-Based table?
+
+            // Check profile version and LUT type. Do the necessary adjustments if needed
+
+            // First read the tag
+            cmsPipeline* Lut = (cmsPipeline*) cmsReadTag(hProfile, tag16);
+            if (Lut == NULL) return NULL;
+
+            // After reading it, we have info about the original type
+            OriginalType =  _cmsGetTagTrueType(hProfile, tag16);
+
+            // The profile owns the Lut, so we need to copy it
+            Lut = cmsPipelineDup(Lut);
+            if (Lut == NULL) return NULL;
+
+            // Now it is time for a controversial stuff. I found that for 3D LUTS using
+            // Lab used as indexer space,  trilinear interpolation should be used
+            if (cmsGetPCS(hProfile) == cmsSigLabData)
+                ChangeInterpolationToTrilinear(Lut);
+
+            // We need to adjust data only for Lab and Lut16 type
+            if (OriginalType != cmsSigLut16Type || cmsGetPCS(hProfile) != cmsSigLabData)
+                return Lut;
+
+            // Add a matrix for conversion V4 to V2 Lab PCS
+            if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV4ToV2(ContextID)))
+                goto Error;
+
+            // If the output is Lab, add also a conversion at the end
+            if (cmsGetColorSpace(hProfile) == cmsSigLabData)
+                if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+                    goto Error;
+
+            return Lut;
+Error:
+            cmsPipelineFree(Lut);
+            return NULL;
+        }
+    }
+
+    // Lut not found, try to create a matrix-shaper
+
+    // Check if this is a grayscale profile.
+    if (cmsGetColorSpace(hProfile) == cmsSigGrayData) {
+
+        // if so, build appropriate conversion tables.
+        // The tables are the PCS iluminant, scaled across GrayTRC
+        return BuildGrayOutputPipeline(hProfile);
+    }
+
+    // Not gray, create a normal matrix-shaper, which only operates in XYZ space  
+    return BuildRGBOutputMatrixShaper(hProfile);
+}
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Read the AToD0 tag, adjusting the encoding of Lab or XYZ if neded
+static
+cmsPipeline* _cmsReadFloatDevicelinkTag(cmsHPROFILE hProfile, cmsTagSignature tagFloat)
+{
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsPipeline* Lut = cmsPipelineDup((cmsPipeline*)cmsReadTag(hProfile, tagFloat));
+    cmsColorSpaceSignature PCS = cmsGetPCS(hProfile);
+    cmsColorSpaceSignature spc = cmsGetColorSpace(hProfile);
+
+    if (Lut == NULL) return NULL;
+
+    if (spc == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToLabFloat(ContextID)))
+            goto Error;
+    }
+    else
+        if (spc == cmsSigXYZData)
+        {
+            if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToXyzFloat(ContextID)))
+                goto Error;
+        }
+
+    if (PCS == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromLabFloat(ContextID)))
+            goto Error;
+    }
+    else
+        if (PCS == cmsSigXYZData)
+        {
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromXyzFloat(ContextID)))
+                goto Error;
+        }
+
+    return Lut;
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// This one includes abstract profiles as well. Matrix-shaper cannot be obtained on that device class. The
+// tag name here may default to AToB0
+cmsPipeline* CMSEXPORT _cmsReadDevicelinkLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsPipeline* Lut;
+    cmsTagTypeSignature OriginalType;
+    cmsTagSignature tag16;
+    cmsTagSignature tagFloat;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+
+    if (Intent > INTENT_ABSOLUTE_COLORIMETRIC)
+        return NULL;
+
+    tag16 = Device2PCS16[Intent];
+    tagFloat = Device2PCSFloat[Intent];
+
+    // On named color, take the appropriate tag
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*)cmsReadTag(hProfile, cmsSigNamedColor2Tag);
+
+        if (nc == NULL) return NULL;
+
+        Lut = cmsPipelineAlloc(ContextID, 0, 0);
+        if (Lut == NULL)
+            goto Error;
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocNamedColor(nc, FALSE)))
+            goto Error;
+
+        if (cmsGetColorSpace(hProfile) == cmsSigLabData)
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+                goto Error;
+
+        return Lut;
+    Error:
+        cmsPipelineFree(Lut);
+        cmsFreeNamedColorList(nc);
+        return NULL;
+    }
+
+
+    if (cmsIsTag(hProfile, tagFloat)) {  // Float tag takes precedence
+
+        // Floating point LUT are always V
+        return _cmsReadFloatDevicelinkTag(hProfile, tagFloat);
+    }
+
+    tagFloat = Device2PCSFloat[0];
+    if (cmsIsTag(hProfile, tagFloat)) {
+
+        return cmsPipelineDup((cmsPipeline*)cmsReadTag(hProfile, tagFloat));
+    }
+
+    if (!cmsIsTag(hProfile, tag16)) {  // Is there any LUT-Based table?
+
+        tag16 = Device2PCS16[0];
+        if (!cmsIsTag(hProfile, tag16)) return NULL;
+    }
+
+    // Check profile version and LUT type. Do the necessary adjustments if needed
+
+    // Read the tag
+    Lut = (cmsPipeline*)cmsReadTag(hProfile, tag16);
+    if (Lut == NULL) return NULL;
+
+    // The profile owns the Lut, so we need to copy it
+    Lut = cmsPipelineDup(Lut);
+    if (Lut == NULL) return NULL;
+
+    // Now it is time for a controversial stuff. I found that for 3D LUTS using
+    // Lab used as indexer space,  trilinear interpolation should be used
+    if (cmsGetPCS(hProfile) == cmsSigLabData)
+        ChangeInterpolationToTrilinear(Lut);
+
+    // After reading it, we have info about the original type
+    OriginalType = _cmsGetTagTrueType(hProfile, tag16);
+
+    // We need to adjust data for Lab16 on output
+    if (OriginalType != cmsSigLut16Type) return Lut;
+
+    // Here it is possible to get Lab on both sides
+
+    if (cmsGetColorSpace(hProfile) == cmsSigLabData) {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV4ToV2(ContextID)))
+            goto Error2;
+    }
+
+    if (cmsGetPCS(hProfile) == cmsSigLabData) {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+            goto Error2;
+    }
+
+    return Lut;
+
+Error2:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Returns TRUE if the profile is implemented as matrix-shaper
+cmsBool  CMSEXPORT cmsIsMatrixShaper(cmsHPROFILE hProfile)
+{
+    switch (cmsGetColorSpace(hProfile)) {
+
+    case cmsSigGrayData:
+
+        return cmsIsTag(hProfile, cmsSigGrayTRCTag);
+
+    case cmsSigRgbData:
+
+        return (cmsIsTag(hProfile, cmsSigRedColorantTag) &&
+                cmsIsTag(hProfile, cmsSigGreenColorantTag) &&
+                cmsIsTag(hProfile, cmsSigBlueColorantTag) &&
+                cmsIsTag(hProfile, cmsSigRedTRCTag) &&
+                cmsIsTag(hProfile, cmsSigGreenTRCTag) &&
+                cmsIsTag(hProfile, cmsSigBlueTRCTag));
+
+    default:
+
+        return FALSE;
+    }
+}
+
+// Returns TRUE if the intent is implemented as CLUT
+cmsBool  CMSEXPORT cmsIsCLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number UsedDirection)
+{
+    const cmsTagSignature* TagTable;
+
+    // For devicelinks, the supported intent is that one stated in the header
+    if (cmsGetDeviceClass(hProfile) == cmsSigLinkClass) {
+            return (cmsGetHeaderRenderingIntent(hProfile) == Intent);
+    }
+
+    switch (UsedDirection) {
+
+       case LCMS_USED_AS_INPUT: TagTable = Device2PCS16; break;
+       case LCMS_USED_AS_OUTPUT:TagTable = PCS2Device16; break;
+
+       // For proofing, we need rel. colorimetric in output. Let's do some recursion
+       case LCMS_USED_AS_PROOF:
+           return cmsIsIntentSupported(hProfile, Intent, LCMS_USED_AS_INPUT) &&
+                  cmsIsIntentSupported(hProfile, INTENT_RELATIVE_COLORIMETRIC, LCMS_USED_AS_OUTPUT);
+
+       default:
+           cmsSignalError(cmsGetProfileContextID(hProfile), cmsERROR_RANGE, "Unexpected direction (%d)", UsedDirection);
+           return FALSE;
+    }
+
+    return cmsIsTag(hProfile, TagTable[Intent]);
+
+}
+
+
+// Return info about supported intents
+cmsBool  CMSEXPORT cmsIsIntentSupported(cmsHPROFILE hProfile,
+                                        cmsUInt32Number Intent, cmsUInt32Number UsedDirection)
+{
+
+    if (cmsIsCLUT(hProfile, Intent, UsedDirection)) return TRUE;
+
+    // Is there any matrix-shaper? If so, the intent is supported. This is a bit odd, since V2 matrix shaper
+    // does not fully support relative colorimetric because they cannot deal with non-zero black points, but
+    // many profiles claims that, and this is certainly not true for V4 profiles. Lets answer "yes" no matter
+    // the accuracy would be less than optimal in rel.col and v2 case.
+
+    return cmsIsMatrixShaper(hProfile);
+}
+
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Read both, profile sequence description and profile sequence id if present. Then combine both to
+// create qa unique structure holding both. Shame on ICC to store things in such complicated way.
+cmsSEQ* _cmsReadProfileSequence(cmsHPROFILE hProfile)
+{
+    cmsSEQ* ProfileSeq;
+    cmsSEQ* ProfileId;
+    cmsSEQ* NewSeq;
+    cmsUInt32Number i;
+
+    // Take profile sequence description first
+    ProfileSeq = (cmsSEQ*) cmsReadTag(hProfile, cmsSigProfileSequenceDescTag);
+
+    // Take profile sequence ID
+    ProfileId  = (cmsSEQ*) cmsReadTag(hProfile, cmsSigProfileSequenceIdTag);
+
+    if (ProfileSeq == NULL && ProfileId == NULL) return NULL;
+
+    if (ProfileSeq == NULL) return cmsDupProfileSequenceDescription(ProfileId);
+    if (ProfileId  == NULL) return cmsDupProfileSequenceDescription(ProfileSeq);
+
+    // We have to mix both together. For that they must agree
+    if (ProfileSeq ->n != ProfileId ->n) return cmsDupProfileSequenceDescription(ProfileSeq);
+
+    NewSeq = cmsDupProfileSequenceDescription(ProfileSeq);
+
+    // Ok, proceed to the mixing
+    if (NewSeq != NULL) {
+        for (i=0; i < ProfileSeq ->n; i++) {
+
+            memmove(&NewSeq ->seq[i].ProfileID, &ProfileId ->seq[i].ProfileID, sizeof(cmsProfileID));
+            NewSeq ->seq[i].Description = cmsMLUdup(ProfileId ->seq[i].Description);
+        }
+    }
+    return NewSeq;
+}
+
+// Dump the contents of profile sequence in both tags (if v4 available)
+cmsBool _cmsWriteProfileSequence(cmsHPROFILE hProfile, const cmsSEQ* seq)
+{
+    if (!cmsWriteTag(hProfile, cmsSigProfileSequenceDescTag, seq)) return FALSE;
+
+    if (cmsGetEncodedICCversion(hProfile) >= 0x4000000) {
+
+            if (!cmsWriteTag(hProfile, cmsSigProfileSequenceIdTag, seq)) return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+// Auxiliary, read and duplicate a MLU if found.
+static
+cmsMLU* GetMLUFromProfile(cmsHPROFILE h, cmsTagSignature sig)
+{
+    cmsMLU* mlu = (cmsMLU*) cmsReadTag(h, sig);
+    if (mlu == NULL) return NULL;
+
+    return cmsMLUdup(mlu);
+}
+
+// Create a sequence description out of an array of profiles
+cmsSEQ* _cmsCompileProfileSequence(cmsContext ContextID, cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[])
+{
+    cmsUInt32Number i;
+    cmsSEQ* seq = cmsAllocProfileSequenceDescription(ContextID, nProfiles);
+
+    if (seq == NULL) return NULL;
+
+    for (i=0; i < nProfiles; i++) {
+
+        cmsPSEQDESC* ps = &seq ->seq[i];
+        cmsHPROFILE h = hProfiles[i];
+        cmsTechnologySignature* techpt;
+
+        cmsGetHeaderAttributes(h, &ps ->attributes);
+        cmsGetHeaderProfileID(h, ps ->ProfileID.ID8);
+        ps ->deviceMfg   = cmsGetHeaderManufacturer(h);
+        ps ->deviceModel = cmsGetHeaderModel(h);
+
+        techpt = (cmsTechnologySignature*) cmsReadTag(h, cmsSigTechnologyTag);
+        if (techpt == NULL)
+            ps ->technology   =  (cmsTechnologySignature) 0;
+        else
+            ps ->technology   = *techpt;
+
+        ps ->Manufacturer = GetMLUFromProfile(h,  cmsSigDeviceMfgDescTag);
+        ps ->Model        = GetMLUFromProfile(h,  cmsSigDeviceModelDescTag);
+        ps ->Description  = GetMLUFromProfile(h, cmsSigProfileDescriptionTag);
+
+    }
+
+    return seq;
+}
+
+// -------------------------------------------------------------------------------------------------------------------
+
+
+static
+const cmsMLU* GetInfo(cmsHPROFILE hProfile, cmsInfoType Info)
+{
+    cmsTagSignature sig;
+
+    switch (Info) {
+
+    case cmsInfoDescription:
+        sig = cmsSigProfileDescriptionTag;
+        break;
+
+    case cmsInfoManufacturer:
+        sig = cmsSigDeviceMfgDescTag;
+        break;
+
+    case cmsInfoModel:
+        sig = cmsSigDeviceModelDescTag;
+         break;
+
+    case cmsInfoCopyright:
+        sig = cmsSigCopyrightTag;
+        break;
+
+    default: return NULL;
+    }
+
+
+    return (cmsMLU*) cmsReadTag(hProfile, sig);
+}
+
+
+
+cmsUInt32Number CMSEXPORT cmsGetProfileInfo(cmsHPROFILE hProfile, cmsInfoType Info,
+                                            const char LanguageCode[3], const char CountryCode[3],
+                                            wchar_t* Buffer, cmsUInt32Number BufferSize)
+{
+    const cmsMLU* mlu = GetInfo(hProfile, Info);
+    if (mlu == NULL) return 0;
+
+    return cmsMLUgetWide(mlu, LanguageCode, CountryCode, Buffer, BufferSize);
+}
+
+
+cmsUInt32Number  CMSEXPORT cmsGetProfileInfoASCII(cmsHPROFILE hProfile, cmsInfoType Info,
+                                                          const char LanguageCode[3], const char CountryCode[3],
+                                                          char* Buffer, cmsUInt32Number BufferSize)
+{
+    const cmsMLU* mlu = GetInfo(hProfile, Info);
+    if (mlu == NULL) return 0;
+
+    return cmsMLUgetASCII(mlu, LanguageCode, CountryCode, Buffer, BufferSize);
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp
new file mode 100644
index 0000000000..01906a12c3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmslut.cpp
@@ -0,0 +1,1843 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// Allocates an empty multi profile element
+cmsStage* CMSEXPORT _cmsStageAllocPlaceholder(cmsContext ContextID,
+                                cmsStageSignature Type,
+                                cmsUInt32Number InputChannels,
+                                cmsUInt32Number OutputChannels,
+                                _cmsStageEvalFn     EvalPtr,
+                                _cmsStageDupElemFn  DupElemPtr,
+                                _cmsStageFreeElemFn FreePtr,
+                                void*             Data)
+{
+    cmsStage* ph = (cmsStage*) _cmsMallocZero(ContextID, sizeof(cmsStage));
+
+    if (ph == NULL) return NULL;
+
+
+    ph ->ContextID = ContextID;
+
+    ph ->Type       = Type;
+    ph ->Implements = Type;   // By default, no clue on what is implementing
+
+    ph ->InputChannels  = InputChannels;
+    ph ->OutputChannels = OutputChannels;
+    ph ->EvalPtr        = EvalPtr;
+    ph ->DupElemPtr     = DupElemPtr;
+    ph ->FreePtr        = FreePtr;
+    ph ->Data           = Data;
+
+    return ph;
+}
+
+
+static
+void EvaluateIdentity(const cmsFloat32Number In[],
+                            cmsFloat32Number Out[],
+                      const cmsStage *mpe)
+{
+    memmove(Out, In, mpe ->InputChannels * sizeof(cmsFloat32Number));
+}
+
+
+cmsStage* CMSEXPORT cmsStageAllocIdentity(cmsContext ContextID, cmsUInt32Number nChannels)
+{
+    return _cmsStageAllocPlaceholder(ContextID,
+                                   cmsSigIdentityElemType,
+                                   nChannels, nChannels,
+                                   EvaluateIdentity,
+                                   NULL,
+                                   NULL,
+                                   NULL);
+ }
+
+// Conversion functions. From floating point to 16 bits
+static
+void FromFloatTo16(const cmsFloat32Number In[], cmsUInt16Number Out[], cmsUInt32Number n)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < n; i++) {
+        Out[i] = _cmsQuickSaturateWord(In[i] * 65535.0);
+    }
+}
+
+// From 16 bits to floating point
+static
+void From16ToFloat(const cmsUInt16Number In[], cmsFloat32Number Out[], cmsUInt32Number n)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < n; i++) {
+        Out[i] = (cmsFloat32Number) In[i] / 65535.0F;
+    }
+}
+
+
+// This function is quite useful to analyze the structure of a LUT and retrieve the MPE elements
+// that conform the LUT. It should be called with the LUT, the number of expected elements and
+// then a list of expected types followed with a list of cmsFloat64Number pointers to MPE elements. If
+// the function founds a match with current pipeline, it fills the pointers and returns TRUE
+// if not, returns FALSE without touching anything. Setting pointers to NULL does bypass
+// the storage process.
+cmsBool  CMSEXPORT cmsPipelineCheckAndRetreiveStages(const cmsPipeline* Lut, cmsUInt32Number n, ...)
+{
+    va_list args;
+    cmsUInt32Number i;
+    cmsStage* mpe;
+    cmsStageSignature Type;
+    void** ElemPtr;
+
+    // Make sure same number of elements
+    if (cmsPipelineStageCount(Lut) != n) return FALSE;
+
+    va_start(args, n);
+
+    // Iterate across asked types
+    mpe = Lut ->Elements;
+    for (i=0; i < n; i++) {
+
+        // Get asked type. cmsStageSignature is promoted to int by compiler
+        Type  = (cmsStageSignature)va_arg(args, int);
+        if (mpe ->Type != Type) {
+
+            va_end(args);       // Mismatch. We are done.
+            return FALSE;
+        }
+        mpe = mpe ->Next;
+    }
+
+    // Found a combination, fill pointers if not NULL
+    mpe = Lut ->Elements;
+    for (i=0; i < n; i++) {
+
+        ElemPtr = va_arg(args, void**);
+        if (ElemPtr != NULL)
+            *ElemPtr = mpe;
+
+        mpe = mpe ->Next;
+    }
+
+    va_end(args);
+    return TRUE;
+}
+
+// Below there are implementations for several types of elements. Each type may be implemented by a
+// evaluation function, a duplication function, a function to free resources and a constructor.
+
+// *************************************************************************************************
+// Type cmsSigCurveSetElemType (curves)
+// *************************************************************************************************
+
+cmsToneCurve** _cmsStageGetPtrToCurveSet(const cmsStage* mpe)
+{
+    _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) mpe ->Data;
+
+    return Data ->TheCurves;
+}
+
+static
+void EvaluateCurves(const cmsFloat32Number In[],
+                    cmsFloat32Number Out[],
+                    const cmsStage *mpe)
+{
+    _cmsStageToneCurvesData* Data;
+    cmsUInt32Number i;
+
+    _cmsAssert(mpe != NULL);
+
+    Data = (_cmsStageToneCurvesData*) mpe ->Data;
+    if (Data == NULL) return;
+
+    if (Data ->TheCurves == NULL) return;
+
+    for (i=0; i < Data ->nCurves; i++) {
+        Out[i] = cmsEvalToneCurveFloat(Data ->TheCurves[i], In[i]);
+    }
+}
+
+static
+void CurveSetElemTypeFree(cmsStage* mpe)
+{
+    _cmsStageToneCurvesData* Data;
+    cmsUInt32Number i;
+
+    _cmsAssert(mpe != NULL);
+
+    Data = (_cmsStageToneCurvesData*) mpe ->Data;
+    if (Data == NULL) return;
+
+    if (Data ->TheCurves != NULL) {
+        for (i=0; i < Data ->nCurves; i++) {
+            if (Data ->TheCurves[i] != NULL)
+                cmsFreeToneCurve(Data ->TheCurves[i]);
+        }
+    }
+    _cmsFree(mpe ->ContextID, Data ->TheCurves);
+    _cmsFree(mpe ->ContextID, Data);
+}
+
+
+static
+void* CurveSetDup(cmsStage* mpe)
+{
+    _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) mpe ->Data;
+    _cmsStageToneCurvesData* NewElem;
+    cmsUInt32Number i;
+
+    NewElem = (_cmsStageToneCurvesData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageToneCurvesData));
+    if (NewElem == NULL) return NULL;
+
+    NewElem ->nCurves   = Data ->nCurves;
+    NewElem ->TheCurves = (cmsToneCurve**) _cmsCalloc(mpe ->ContextID, NewElem ->nCurves, sizeof(cmsToneCurve*));
+
+    if (NewElem ->TheCurves == NULL) goto Error;
+
+    for (i=0; i < NewElem ->nCurves; i++) {
+
+        // Duplicate each curve. It may fail.
+        NewElem ->TheCurves[i] = cmsDupToneCurve(Data ->TheCurves[i]);
+        if (NewElem ->TheCurves[i] == NULL) goto Error;
+
+
+    }
+    return (void*) NewElem;
+
+Error:
+
+    if (NewElem ->TheCurves != NULL) {
+        for (i=0; i < NewElem ->nCurves; i++) {
+            if (NewElem ->TheCurves[i])
+                cmsFreeToneCurve(NewElem ->TheCurves[i]);
+        }
+    }
+    _cmsFree(mpe ->ContextID, NewElem ->TheCurves);
+    _cmsFree(mpe ->ContextID, NewElem);
+    return NULL;
+}
+
+
+// Curves == NULL forces identity curves
+cmsStage* CMSEXPORT cmsStageAllocToneCurves(cmsContext ContextID, cmsUInt32Number nChannels, cmsToneCurve* const Curves[])
+{
+    cmsUInt32Number i;
+    _cmsStageToneCurvesData* NewElem;
+    cmsStage* NewMPE;
+
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCurveSetElemType, nChannels, nChannels,
+                                     EvaluateCurves, CurveSetDup, CurveSetElemTypeFree, NULL );
+    if (NewMPE == NULL) return NULL;
+
+    NewElem = (_cmsStageToneCurvesData*) _cmsMallocZero(ContextID, sizeof(_cmsStageToneCurvesData));
+    if (NewElem == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+
+    NewElem ->nCurves   = nChannels;
+    NewElem ->TheCurves = (cmsToneCurve**) _cmsCalloc(ContextID, nChannels, sizeof(cmsToneCurve*));
+    if (NewElem ->TheCurves == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    for (i=0; i < nChannels; i++) {
+
+        if (Curves == NULL) {
+            NewElem ->TheCurves[i] = cmsBuildGamma(ContextID, 1.0);
+        }
+        else {
+            NewElem ->TheCurves[i] = cmsDupToneCurve(Curves[i]);
+        }
+
+        if (NewElem ->TheCurves[i] == NULL) {
+            cmsStageFree(NewMPE);
+            return NULL;
+        }
+
+    }
+
+   return NewMPE;
+}
+
+
+// Create a bunch of identity curves
+cmsStage* CMSEXPORT _cmsStageAllocIdentityCurves(cmsContext ContextID, cmsUInt32Number nChannels)
+{
+    cmsStage* mpe = cmsStageAllocToneCurves(ContextID, nChannels, NULL);
+
+    if (mpe == NULL) return NULL;
+    mpe ->Implements = cmsSigIdentityElemType;
+    return mpe;
+}
+
+
+// *************************************************************************************************
+// Type cmsSigMatrixElemType (Matrices)
+// *************************************************************************************************
+
+
+// Special care should be taken here because precision loss. A temporary cmsFloat64Number buffer is being used
+static
+void EvaluateMatrix(const cmsFloat32Number In[],
+                    cmsFloat32Number Out[],
+                    const cmsStage *mpe)
+{
+    cmsUInt32Number i, j;
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+    cmsFloat64Number Tmp;
+
+    // Input is already in 0..1.0 notation
+    for (i=0; i < mpe ->OutputChannels; i++) {
+
+        Tmp = 0;
+        for (j=0; j < mpe->InputChannels; j++) {
+            Tmp += In[j] * Data->Double[i*mpe->InputChannels + j];
+        }
+
+        if (Data ->Offset != NULL)
+            Tmp += Data->Offset[i];
+
+        Out[i] = (cmsFloat32Number) Tmp;
+    }
+
+
+    // Output in 0..1.0 domain
+}
+
+
+// Duplicate a yet-existing matrix element
+static
+void* MatrixElemDup(cmsStage* mpe)
+{
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+    _cmsStageMatrixData* NewElem;
+    cmsUInt32Number sz;
+
+    NewElem = (_cmsStageMatrixData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageMatrixData));
+    if (NewElem == NULL) return NULL;
+
+    sz = mpe ->InputChannels * mpe ->OutputChannels;
+
+    NewElem ->Double = (cmsFloat64Number*) _cmsDupMem(mpe ->ContextID, Data ->Double, sz * sizeof(cmsFloat64Number)) ;
+
+    if (Data ->Offset)
+        NewElem ->Offset = (cmsFloat64Number*) _cmsDupMem(mpe ->ContextID,
+                                                Data ->Offset, mpe -> OutputChannels * sizeof(cmsFloat64Number)) ;
+
+    return (void*) NewElem;
+}
+
+
+static
+void MatrixElemTypeFree(cmsStage* mpe)
+{
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+    if (Data == NULL)
+        return;
+    if (Data ->Double)
+        _cmsFree(mpe ->ContextID, Data ->Double);
+
+    if (Data ->Offset)
+        _cmsFree(mpe ->ContextID, Data ->Offset);
+
+    _cmsFree(mpe ->ContextID, mpe ->Data);
+}
+
+
+
+cmsStage*  CMSEXPORT cmsStageAllocMatrix(cmsContext ContextID, cmsUInt32Number Rows, cmsUInt32Number Cols,
+                                     const cmsFloat64Number* Matrix, const cmsFloat64Number* Offset)
+{
+    cmsUInt32Number i, n;
+    _cmsStageMatrixData* NewElem;
+    cmsStage* NewMPE;
+
+    n = Rows * Cols;
+
+    // Check for overflow
+    if (n == 0) return NULL;
+    if (n >= UINT_MAX / Cols) return NULL;
+    if (n >= UINT_MAX / Rows) return NULL;
+    if (n < Rows || n < Cols) return NULL;
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigMatrixElemType, Cols, Rows,
+                                     EvaluateMatrix, MatrixElemDup, MatrixElemTypeFree, NULL );
+    if (NewMPE == NULL) return NULL;
+
+
+    NewElem = (_cmsStageMatrixData*) _cmsMallocZero(ContextID, sizeof(_cmsStageMatrixData));
+    if (NewElem == NULL) return NULL;
+
+
+    NewElem ->Double = (cmsFloat64Number*) _cmsCalloc(ContextID, n, sizeof(cmsFloat64Number));
+
+    if (NewElem->Double == NULL) {
+        MatrixElemTypeFree(NewMPE);
+        return NULL;
+    }
+
+    for (i=0; i < n; i++) {
+        NewElem ->Double[i] = Matrix[i];
+    }
+
+
+    if (Offset != NULL) {
+
+        NewElem ->Offset = (cmsFloat64Number*) _cmsCalloc(ContextID, Rows, sizeof(cmsFloat64Number));
+        if (NewElem->Offset == NULL) {
+           MatrixElemTypeFree(NewMPE);
+           return NULL;
+        }
+
+        for (i=0; i < Rows; i++) {
+                NewElem ->Offset[i] = Offset[i];
+        }
+
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+    return NewMPE;
+}
+
+
+// *************************************************************************************************
+// Type cmsSigCLutElemType
+// *************************************************************************************************
+
+
+// Evaluate in true floating point
+static
+void EvaluateCLUTfloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+
+    Data -> Params ->Interpolation.LerpFloat(In, Out, Data->Params);
+}
+
+
+// Convert to 16 bits, evaluate, and back to floating point
+static
+void EvaluateCLUTfloatIn16(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+    cmsUInt16Number In16[MAX_STAGE_CHANNELS], Out16[MAX_STAGE_CHANNELS];
+
+    _cmsAssert(mpe ->InputChannels  <= MAX_STAGE_CHANNELS);
+    _cmsAssert(mpe ->OutputChannels <= MAX_STAGE_CHANNELS);
+
+    FromFloatTo16(In, In16, mpe ->InputChannels);
+    Data -> Params ->Interpolation.Lerp16(In16, Out16, Data->Params);
+    From16ToFloat(Out16, Out,  mpe ->OutputChannels);
+}
+
+
+// Given an hypercube of b dimensions, with Dims[] number of nodes by dimension, calculate the total amount of nodes
+static
+cmsUInt32Number CubeSize(const cmsUInt32Number Dims[], cmsUInt32Number b)
+{
+    cmsUInt32Number rv, dim;
+
+    _cmsAssert(Dims != NULL);
+
+    for (rv = 1; b > 0; b--) {
+
+        dim = Dims[b-1];
+        if (dim == 0) return 0;  // Error
+
+        rv *= dim;
+
+        // Check for overflow
+        if (rv > UINT_MAX / dim) return 0;
+    }
+
+    return rv;
+}
+
+static
+void* CLUTElemDup(cmsStage* mpe)
+{
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+    _cmsStageCLutData* NewElem;
+
+
+    NewElem = (_cmsStageCLutData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageCLutData));
+    if (NewElem == NULL) return NULL;
+
+    NewElem ->nEntries       = Data ->nEntries;
+    NewElem ->HasFloatValues = Data ->HasFloatValues;
+
+    if (Data ->Tab.T) {
+
+        if (Data ->HasFloatValues) {
+            NewElem ->Tab.TFloat = (cmsFloat32Number*) _cmsDupMem(mpe ->ContextID, Data ->Tab.TFloat, Data ->nEntries * sizeof (cmsFloat32Number));
+            if (NewElem ->Tab.TFloat == NULL)
+                goto Error;
+        } else {
+            NewElem ->Tab.T = (cmsUInt16Number*) _cmsDupMem(mpe ->ContextID, Data ->Tab.T, Data ->nEntries * sizeof (cmsUInt16Number));
+            if (NewElem ->Tab.T == NULL)
+                goto Error;
+        }
+    }
+
+    NewElem ->Params   = _cmsComputeInterpParamsEx(mpe ->ContextID,
+                                                   Data ->Params ->nSamples,
+                                                   Data ->Params ->nInputs,
+                                                   Data ->Params ->nOutputs,
+                                                   NewElem ->Tab.T,
+                                                   Data ->Params ->dwFlags);
+    if (NewElem->Params != NULL)
+        return (void*) NewElem;
+ Error:
+    if (NewElem->Tab.T)
+        // This works for both types
+        _cmsFree(mpe ->ContextID, NewElem -> Tab.T);
+    _cmsFree(mpe ->ContextID, NewElem);
+    return NULL;
+}
+
+
+static
+void CLutElemTypeFree(cmsStage* mpe)
+{
+
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+
+    // Already empty
+    if (Data == NULL) return;
+
+    // This works for both types
+    if (Data -> Tab.T)
+        _cmsFree(mpe ->ContextID, Data -> Tab.T);
+
+    _cmsFreeInterpParams(Data ->Params);
+    _cmsFree(mpe ->ContextID, mpe ->Data);
+}
+
+
+// Allocates a 16-bit multidimensional CLUT. This is evaluated at 16-bit precision. Table may have different
+// granularity on each dimension.
+cmsStage* CMSEXPORT cmsStageAllocCLut16bitGranular(cmsContext ContextID,
+                                         const cmsUInt32Number clutPoints[],
+                                         cmsUInt32Number inputChan,
+                                         cmsUInt32Number outputChan,
+                                         const cmsUInt16Number* Table)
+{
+    cmsUInt32Number i, n;
+    _cmsStageCLutData* NewElem;
+    cmsStage* NewMPE;
+
+    _cmsAssert(clutPoints != NULL);
+
+    if (inputChan > MAX_INPUT_DIMENSIONS) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Too many input channels (%d channels, max=%d)", inputChan, MAX_INPUT_DIMENSIONS);
+        return NULL;
+    }
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCLutElemType, inputChan, outputChan,
+                                     EvaluateCLUTfloatIn16, CLUTElemDup, CLutElemTypeFree, NULL );
+
+    if (NewMPE == NULL) return NULL;
+
+    NewElem = (_cmsStageCLutData*) _cmsMallocZero(ContextID, sizeof(_cmsStageCLutData));
+    if (NewElem == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+
+    NewElem -> nEntries = n = outputChan * CubeSize(clutPoints, inputChan);
+    NewElem -> HasFloatValues = FALSE;
+
+    if (n == 0) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+
+    NewElem ->Tab.T  = (cmsUInt16Number*) _cmsCalloc(ContextID, n, sizeof(cmsUInt16Number));
+    if (NewElem ->Tab.T == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    if (Table != NULL) {
+        for (i=0; i < n; i++) {
+            NewElem ->Tab.T[i] = Table[i];
+        }
+    }
+
+    NewElem ->Params = _cmsComputeInterpParamsEx(ContextID, clutPoints, inputChan, outputChan, NewElem ->Tab.T, CMS_LERP_FLAGS_16BITS);
+    if (NewElem ->Params == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    return NewMPE;
+}
+
+cmsStage* CMSEXPORT cmsStageAllocCLut16bit(cmsContext ContextID,
+                                    cmsUInt32Number nGridPoints,
+                                    cmsUInt32Number inputChan,
+                                    cmsUInt32Number outputChan,
+                                    const cmsUInt16Number* Table)
+{
+    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+    int i;
+
+   // Our resulting LUT would be same gridpoints on all dimensions
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Dimensions[i] = nGridPoints;
+
+    return cmsStageAllocCLut16bitGranular(ContextID, Dimensions, inputChan, outputChan, Table);
+}
+
+
+cmsStage* CMSEXPORT cmsStageAllocCLutFloat(cmsContext ContextID,
+                                       cmsUInt32Number nGridPoints,
+                                       cmsUInt32Number inputChan,
+                                       cmsUInt32Number outputChan,
+                                       const cmsFloat32Number* Table)
+{
+   cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+   int i;
+
+    // Our resulting LUT would be same gridpoints on all dimensions
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Dimensions[i] = nGridPoints;
+
+    return cmsStageAllocCLutFloatGranular(ContextID, Dimensions, inputChan, outputChan, Table);
+}
+
+
+
+cmsStage* CMSEXPORT cmsStageAllocCLutFloatGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table)
+{
+    cmsUInt32Number i, n;
+    _cmsStageCLutData* NewElem;
+    cmsStage* NewMPE;
+
+    _cmsAssert(clutPoints != NULL);
+
+    if (inputChan > MAX_INPUT_DIMENSIONS) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Too many input channels (%d channels, max=%d)", inputChan, MAX_INPUT_DIMENSIONS);
+        return NULL;
+    }
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCLutElemType, inputChan, outputChan,
+                                             EvaluateCLUTfloat, CLUTElemDup, CLutElemTypeFree, NULL);
+    if (NewMPE == NULL) return NULL;
+
+
+    NewElem = (_cmsStageCLutData*) _cmsMallocZero(ContextID, sizeof(_cmsStageCLutData));
+    if (NewElem == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+
+    // There is a potential integer overflow on conputing n and nEntries.
+    NewElem -> nEntries = n = outputChan * CubeSize(clutPoints, inputChan);
+    NewElem -> HasFloatValues = TRUE;
+
+    if (n == 0) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewElem ->Tab.TFloat  = (cmsFloat32Number*) _cmsCalloc(ContextID, n, sizeof(cmsFloat32Number));
+    if (NewElem ->Tab.TFloat == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    if (Table != NULL) {
+        for (i=0; i < n; i++) {
+            NewElem ->Tab.TFloat[i] = Table[i];
+        }
+    }
+
+    NewElem ->Params = _cmsComputeInterpParamsEx(ContextID, clutPoints,  inputChan, outputChan, NewElem ->Tab.TFloat, CMS_LERP_FLAGS_FLOAT);
+    if (NewElem ->Params == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    return NewMPE;
+}
+
+
+static
+int IdentitySampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void * Cargo)
+{
+    int nChan = *(int*) Cargo;
+    int i;
+
+    for (i=0; i < nChan; i++)
+        Out[i] = In[i];
+
+    return 1;
+}
+
+// Creates an MPE that just copies input to output
+cmsStage* CMSEXPORT _cmsStageAllocIdentityCLut(cmsContext ContextID, cmsUInt32Number nChan)
+{
+    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+    cmsStage* mpe ;
+    int i;
+
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Dimensions[i] = 2;
+
+    mpe = cmsStageAllocCLut16bitGranular(ContextID, Dimensions, nChan, nChan, NULL);
+    if (mpe == NULL) return NULL;
+
+    if (!cmsStageSampleCLut16bit(mpe, IdentitySampler, &nChan, 0)) {
+        cmsStageFree(mpe);
+        return NULL;
+    }
+
+    mpe ->Implements = cmsSigIdentityElemType;
+    return mpe;
+}
+
+
+
+// Quantize a value 0 <= i < MaxSamples to 0..0xffff
+cmsUInt16Number CMSEXPORT _cmsQuantizeVal(cmsFloat64Number i, cmsUInt32Number MaxSamples)
+{
+    cmsFloat64Number x;
+
+    x = ((cmsFloat64Number) i * 65535.) / (cmsFloat64Number) (MaxSamples - 1);
+    return _cmsQuickSaturateWord(x);
+}
+
+
+// This routine does a sweep on whole input space, and calls its callback
+// function on knots. returns TRUE if all ok, FALSE otherwise.
+cmsBool CMSEXPORT cmsStageSampleCLut16bit(cmsStage* mpe, cmsSAMPLER16 Sampler, void * Cargo, cmsUInt32Number dwFlags)
+{
+    int i, t, index, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsUInt32Number nInputs, nOutputs;
+    cmsUInt32Number* nSamples;
+    cmsUInt16Number In[MAX_INPUT_DIMENSIONS+1], Out[MAX_STAGE_CHANNELS];
+    _cmsStageCLutData* clut;
+
+    if (mpe == NULL) return FALSE;
+
+    clut = (_cmsStageCLutData*) mpe->Data;
+
+    if (clut == NULL) return FALSE;
+
+    nSamples = clut->Params ->nSamples;
+    nInputs  = clut->Params ->nInputs;
+    nOutputs = clut->Params ->nOutputs;
+
+    if (nInputs <= 0) return FALSE;
+    if (nOutputs <= 0) return FALSE;
+    if (nInputs > MAX_INPUT_DIMENSIONS) return FALSE;
+    if (nOutputs >= MAX_STAGE_CHANNELS) return FALSE;
+
+    memset(In, 0, sizeof(In));
+    memset(Out, 0, sizeof(Out));
+
+    nTotalPoints = CubeSize(nSamples, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    index = 0;
+    for (i = 0; i < (int) nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int)nInputs - 1; t >= 0; --t) {
+
+            cmsUInt32Number  Colorant = rest % nSamples[t];
+
+            rest /= nSamples[t];
+
+            In[t] = _cmsQuantizeVal(Colorant, nSamples[t]);
+        }
+
+        if (clut ->Tab.T != NULL) {
+            for (t = 0; t < (int)nOutputs; t++)
+                Out[t] = clut->Tab.T[index + t];
+        }
+
+        if (!Sampler(In, Out, Cargo))
+            return FALSE;
+
+        if (!(dwFlags & SAMPLER_INSPECT)) {
+
+            if (clut ->Tab.T != NULL) {
+                for (t=0; t < (int) nOutputs; t++)
+                    clut->Tab.T[index + t] = Out[t];
+            }
+        }
+
+        index += nOutputs;
+    }
+
+    return TRUE;
+}
+
+// Same as anterior, but for floating point
+cmsBool CMSEXPORT cmsStageSampleCLutFloat(cmsStage* mpe, cmsSAMPLERFLOAT Sampler, void * Cargo, cmsUInt32Number dwFlags)
+{
+    int i, t, index, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsUInt32Number nInputs, nOutputs;
+    cmsUInt32Number* nSamples;
+    cmsFloat32Number In[MAX_INPUT_DIMENSIONS+1], Out[MAX_STAGE_CHANNELS];
+    _cmsStageCLutData* clut = (_cmsStageCLutData*) mpe->Data;
+
+    nSamples = clut->Params ->nSamples;
+    nInputs  = clut->Params ->nInputs;
+    nOutputs = clut->Params ->nOutputs;
+
+    if (nInputs <= 0) return FALSE;
+    if (nOutputs <= 0) return FALSE;
+    if (nInputs  > MAX_INPUT_DIMENSIONS) return FALSE;
+    if (nOutputs >= MAX_STAGE_CHANNELS) return FALSE;
+
+    nTotalPoints = CubeSize(nSamples, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    index = 0;
+    for (i = 0; i < (int)nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int) nInputs-1; t >=0; --t) {
+
+            cmsUInt32Number  Colorant = rest % nSamples[t];
+
+            rest /= nSamples[t];
+
+            In[t] =  (cmsFloat32Number) (_cmsQuantizeVal(Colorant, nSamples[t]) / 65535.0);
+        }
+
+        if (clut ->Tab.TFloat != NULL) {
+            for (t=0; t < (int) nOutputs; t++)
+                Out[t] = clut->Tab.TFloat[index + t];
+        }
+
+        if (!Sampler(In, Out, Cargo))
+            return FALSE;
+
+        if (!(dwFlags & SAMPLER_INSPECT)) {
+
+            if (clut ->Tab.TFloat != NULL) {
+                for (t=0; t < (int) nOutputs; t++)
+                    clut->Tab.TFloat[index + t] = Out[t];
+            }
+        }
+
+        index += nOutputs;
+    }
+
+    return TRUE;
+}
+
+
+
+// This routine does a sweep on whole input space, and calls its callback
+// function on knots. returns TRUE if all ok, FALSE otherwise.
+cmsBool CMSEXPORT cmsSliceSpace16(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                         cmsSAMPLER16 Sampler, void * Cargo)
+{
+    int i, t, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsUInt16Number In[cmsMAXCHANNELS];
+
+    if (nInputs >= cmsMAXCHANNELS) return FALSE;
+
+    nTotalPoints = CubeSize(clutPoints, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    for (i = 0; i < (int) nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int) nInputs-1; t >=0; --t) {
+
+            cmsUInt32Number  Colorant = rest % clutPoints[t];
+
+            rest /= clutPoints[t];
+            In[t] = _cmsQuantizeVal(Colorant, clutPoints[t]);
+
+        }
+
+        if (!Sampler(In, NULL, Cargo))
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+cmsInt32Number CMSEXPORT cmsSliceSpaceFloat(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                            cmsSAMPLERFLOAT Sampler, void * Cargo)
+{
+    int i, t, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsFloat32Number In[cmsMAXCHANNELS];
+
+    if (nInputs >= cmsMAXCHANNELS) return FALSE;
+
+    nTotalPoints = CubeSize(clutPoints, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    for (i = 0; i < (int) nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int) nInputs-1; t >=0; --t) {
+
+            cmsUInt32Number  Colorant = rest % clutPoints[t];
+
+            rest /= clutPoints[t];
+            In[t] =  (cmsFloat32Number) (_cmsQuantizeVal(Colorant, clutPoints[t]) / 65535.0);
+
+        }
+
+        if (!Sampler(In, NULL, Cargo))
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+// ********************************************************************************
+// Type cmsSigLab2XYZElemType
+// ********************************************************************************
+
+
+static
+void EvaluateLab2XYZ(const cmsFloat32Number In[],
+                     cmsFloat32Number Out[],
+                     const cmsStage *mpe)
+{
+    cmsCIELab Lab;
+    cmsCIEXYZ XYZ;
+    const cmsFloat64Number XYZadj = MAX_ENCODEABLE_XYZ;
+
+    // V4 rules
+    Lab.L = In[0] * 100.0;
+    Lab.a = In[1] * 255.0 - 128.0;
+    Lab.b = In[2] * 255.0 - 128.0;
+
+    cmsLab2XYZ(NULL, &XYZ, &Lab);
+
+    // From XYZ, range 0..19997 to 0..1.0, note that 1.99997 comes from 0xffff
+    // encoded as 1.15 fixed point, so 1 + (32767.0 / 32768.0)
+
+    Out[0] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.X / XYZadj);
+    Out[1] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Y / XYZadj);
+    Out[2] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Z / XYZadj);
+    return;
+
+    cmsUNUSED_PARAMETER(mpe);
+}
+
+
+// No dup or free routines needed, as the structure has no pointers in it.
+cmsStage* CMSEXPORT _cmsStageAllocLab2XYZ(cmsContext ContextID)
+{
+    return _cmsStageAllocPlaceholder(ContextID, cmsSigLab2XYZElemType, 3, 3, EvaluateLab2XYZ, NULL, NULL, NULL);
+}
+
+// ********************************************************************************
+
+// v2 L=100 is supposed to be placed on 0xFF00. There is no reasonable
+// number of gridpoints that would make exact match. However, a prelinearization
+// of 258 entries, would map 0xFF00 exactly on entry 257, and this is good to avoid scum dot.
+// Almost all what we need but unfortunately, the rest of entries should be scaled by
+// (255*257/256) and this is not exact.
+
+cmsStage* _cmsStageAllocLabV2ToV4curves(cmsContext ContextID)
+{
+    cmsStage* mpe;
+    cmsToneCurve* LabTable[3];
+    int i, j;
+
+    LabTable[0] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
+    LabTable[1] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
+    LabTable[2] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
+
+    for (j=0; j < 3; j++) {
+
+        if (LabTable[j] == NULL) {
+            cmsFreeToneCurveTriple(LabTable);
+            return NULL;
+        }
+
+        // We need to map * (0xffff / 0xff00), that's same as (257 / 256)
+        // So we can use 258-entry tables to do the trick (i / 257) * (255 * 257) * (257 / 256);
+        for (i=0; i < 257; i++)  {
+
+            LabTable[j]->Table16[i] = (cmsUInt16Number) ((i * 0xffff + 0x80) >> 8);
+        }
+
+        LabTable[j] ->Table16[257] = 0xffff;
+    }
+
+    mpe = cmsStageAllocToneCurves(ContextID, 3, LabTable);
+    cmsFreeToneCurveTriple(LabTable);
+
+    if (mpe == NULL) return NULL;
+    mpe ->Implements = cmsSigLabV2toV4;
+    return mpe;
+}
+
+// ********************************************************************************
+
+// Matrix-based conversion, which is more accurate, but slower and cannot properly be saved in devicelink profiles
+cmsStage* CMSEXPORT _cmsStageAllocLabV2ToV4(cmsContext ContextID)
+{
+    static const cmsFloat64Number V2ToV4[] = { 65535.0/65280.0, 0, 0,
+                                     0, 65535.0/65280.0, 0,
+                                     0, 0, 65535.0/65280.0
+                                     };
+
+    cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, V2ToV4, NULL);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigLabV2toV4;
+    return mpe;
+}
+
+
+// Reverse direction
+cmsStage* CMSEXPORT _cmsStageAllocLabV4ToV2(cmsContext ContextID)
+{
+    static const cmsFloat64Number V4ToV2[] = { 65280.0/65535.0, 0, 0,
+                                     0, 65280.0/65535.0, 0,
+                                     0, 0, 65280.0/65535.0
+                                     };
+
+     cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, V4ToV2, NULL);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigLabV4toV2;
+    return mpe;
+}
+
+
+// To Lab to float. Note that the MPE gives numbers in normal Lab range
+// and we need 0..1.0 range for the formatters
+// L* : 0...100 => 0...1.0  (L* / 100)
+// ab* : -128..+127 to 0..1  ((ab* + 128) / 255)
+
+cmsStage* _cmsStageNormalizeFromLabFloat(cmsContext ContextID)
+{
+    static const cmsFloat64Number a1[] = {
+        1.0/100.0, 0, 0,
+        0, 1.0/255.0, 0,
+        0, 0, 1.0/255.0
+    };
+
+    static const cmsFloat64Number o1[] = {
+        0,
+        128.0/255.0,
+        128.0/255.0
+    };
+
+    cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, a1, o1);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigLab2FloatPCS;
+    return mpe;
+}
+
+// Fom XYZ to floating point PCS
+cmsStage* _cmsStageNormalizeFromXyzFloat(cmsContext ContextID)
+{
+#define n (32768.0/65535.0)
+    static const cmsFloat64Number a1[] = {
+        n, 0, 0,
+        0, n, 0,
+        0, 0, n
+    };
+#undef n
+
+    cmsStage *mpe =  cmsStageAllocMatrix(ContextID, 3, 3, a1, NULL);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigXYZ2FloatPCS;
+    return mpe;
+}
+
+cmsStage* _cmsStageNormalizeToLabFloat(cmsContext ContextID)
+{
+    static const cmsFloat64Number a1[] = {
+        100.0, 0, 0,
+        0, 255.0, 0,
+        0, 0, 255.0
+    };
+
+    static const cmsFloat64Number o1[] = {
+        0,
+        -128.0,
+        -128.0
+    };
+
+    cmsStage *mpe =  cmsStageAllocMatrix(ContextID, 3, 3, a1, o1);
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigFloatPCS2Lab;
+    return mpe;
+}
+
+cmsStage* _cmsStageNormalizeToXyzFloat(cmsContext ContextID)
+{
+#define n (65535.0/32768.0)
+
+    static const cmsFloat64Number a1[] = {
+        n, 0, 0,
+        0, n, 0,
+        0, 0, n
+    };
+#undef n
+
+    cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, a1, NULL);
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigFloatPCS2XYZ;
+    return mpe;
+}
+
+// Clips values smaller than zero
+static
+void Clipper(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+       cmsUInt32Number i;
+       for (i = 0; i < mpe->InputChannels; i++) {
+
+              cmsFloat32Number n = In[i];
+              Out[i] = n < 0 ? 0 : n;
+       }
+}
+
+cmsStage*  _cmsStageClipNegatives(cmsContext ContextID, cmsUInt32Number nChannels)
+{
+       return _cmsStageAllocPlaceholder(ContextID, cmsSigClipNegativesElemType,
+              nChannels, nChannels, Clipper, NULL, NULL, NULL);
+}
+
+// ********************************************************************************
+// Type cmsSigXYZ2LabElemType
+// ********************************************************************************
+
+static
+void EvaluateXYZ2Lab(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    cmsCIELab Lab;
+    cmsCIEXYZ XYZ;
+    const cmsFloat64Number XYZadj = MAX_ENCODEABLE_XYZ;
+
+    // From 0..1.0 to XYZ
+
+    XYZ.X = In[0] * XYZadj;
+    XYZ.Y = In[1] * XYZadj;
+    XYZ.Z = In[2] * XYZadj;
+
+    cmsXYZ2Lab(NULL, &Lab, &XYZ);
+
+    // From V4 Lab to 0..1.0
+
+    Out[0] = (cmsFloat32Number) (Lab.L / 100.0);
+    Out[1] = (cmsFloat32Number) ((Lab.a + 128.0) / 255.0);
+    Out[2] = (cmsFloat32Number) ((Lab.b + 128.0) / 255.0);
+    return;
+
+    cmsUNUSED_PARAMETER(mpe);
+}
+
+cmsStage* CMSEXPORT _cmsStageAllocXYZ2Lab(cmsContext ContextID)
+{
+    return _cmsStageAllocPlaceholder(ContextID, cmsSigXYZ2LabElemType, 3, 3, EvaluateXYZ2Lab, NULL, NULL, NULL);
+
+}
+
+// ********************************************************************************
+
+// For v4, S-Shaped curves are placed in a/b axis to increase resolution near gray
+
+cmsStage* _cmsStageAllocLabPrelin(cmsContext ContextID)
+{
+    cmsToneCurve* LabTable[3];
+    cmsFloat64Number Params[1] =  {2.4} ;
+
+    LabTable[0] = cmsBuildGamma(ContextID, 1.0);
+    LabTable[1] = cmsBuildParametricToneCurve(ContextID, 108, Params);
+    LabTable[2] = cmsBuildParametricToneCurve(ContextID, 108, Params);
+
+    return cmsStageAllocToneCurves(ContextID, 3, LabTable);
+}
+
+
+// Free a single MPE
+void CMSEXPORT cmsStageFree(cmsStage* mpe)
+{
+    if (mpe ->FreePtr)
+        mpe ->FreePtr(mpe);
+
+    _cmsFree(mpe ->ContextID, mpe);
+}
+
+
+cmsUInt32Number  CMSEXPORT cmsStageInputChannels(const cmsStage* mpe)
+{
+    return mpe ->InputChannels;
+}
+
+cmsUInt32Number  CMSEXPORT cmsStageOutputChannels(const cmsStage* mpe)
+{
+    return mpe ->OutputChannels;
+}
+
+cmsStageSignature CMSEXPORT cmsStageType(const cmsStage* mpe)
+{
+    return mpe -> Type;
+}
+
+void* CMSEXPORT cmsStageData(const cmsStage* mpe)
+{
+    return mpe -> Data;
+}
+
+cmsStage*  CMSEXPORT cmsStageNext(const cmsStage* mpe)
+{
+    return mpe -> Next;
+}
+
+
+// Duplicates an MPE
+cmsStage* CMSEXPORT cmsStageDup(cmsStage* mpe)
+{
+    cmsStage* NewMPE;
+
+    if (mpe == NULL) return NULL;
+    NewMPE = _cmsStageAllocPlaceholder(mpe ->ContextID,
+                                     mpe ->Type,
+                                     mpe ->InputChannels,
+                                     mpe ->OutputChannels,
+                                     mpe ->EvalPtr,
+                                     mpe ->DupElemPtr,
+                                     mpe ->FreePtr,
+                                     NULL);
+    if (NewMPE == NULL) return NULL;
+
+    NewMPE ->Implements = mpe ->Implements;
+
+    if (mpe ->DupElemPtr) {
+
+        NewMPE ->Data = mpe ->DupElemPtr(mpe);
+
+        if (NewMPE->Data == NULL) {
+
+            cmsStageFree(NewMPE);
+            return NULL;
+        }
+
+    } else {
+
+        NewMPE ->Data       = NULL;
+    }
+
+    return NewMPE;
+}
+
+
+// ***********************************************************************************************************
+
+// This function sets up the channel count
+static
+cmsBool BlessLUT(cmsPipeline* lut)
+{
+    // We can set the input/output channels only if we have elements.
+    if (lut ->Elements != NULL) {
+
+        cmsStage* prev;
+        cmsStage* next;
+        cmsStage* First;
+        cmsStage* Last;
+
+        First  = cmsPipelineGetPtrToFirstStage(lut);
+        Last   = cmsPipelineGetPtrToLastStage(lut);
+
+        if (First == NULL || Last == NULL) return FALSE;
+
+        lut->InputChannels = First->InputChannels;
+        lut->OutputChannels = Last->OutputChannels;
+
+        // Check chain consistency
+        prev = First;
+        next = prev->Next;
+
+        while (next != NULL)
+        {
+            if (next->InputChannels != prev->OutputChannels)
+                return FALSE;
+
+            next = next->Next;
+            prev = prev->Next;
+    }
+}
+
+    return TRUE;    
+}
+
+
+// Default to evaluate the LUT on 16 bit-basis. Precision is retained.
+static
+void _LUTeval16(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[],  CMSREGISTER const void* D)
+{
+    cmsPipeline* lut = (cmsPipeline*) D;
+    cmsStage *mpe;
+    cmsFloat32Number Storage[2][MAX_STAGE_CHANNELS];
+    int Phase = 0, NextPhase;
+
+    From16ToFloat(In, &Storage[Phase][0], lut ->InputChannels);
+
+    for (mpe = lut ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+             NextPhase = Phase ^ 1;
+             mpe ->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe);
+             Phase = NextPhase;
+    }
+
+
+    FromFloatTo16(&Storage[Phase][0], Out, lut ->OutputChannels);
+}
+
+
+
+// Does evaluate the LUT on cmsFloat32Number-basis.
+static
+void _LUTevalFloat(CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32Number Out[], const void* D)
+{
+    cmsPipeline* lut = (cmsPipeline*) D;
+    cmsStage *mpe;
+    cmsFloat32Number Storage[2][MAX_STAGE_CHANNELS];
+    int Phase = 0, NextPhase;
+
+    memmove(&Storage[Phase][0], In, lut ->InputChannels  * sizeof(cmsFloat32Number));
+
+    for (mpe = lut ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+              NextPhase = Phase ^ 1;
+              mpe ->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe);
+              Phase = NextPhase;
+    }
+
+    memmove(Out, &Storage[Phase][0], lut ->OutputChannels * sizeof(cmsFloat32Number));
+}
+
+
+// LUT Creation & Destruction
+cmsPipeline* CMSEXPORT cmsPipelineAlloc(cmsContext ContextID, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels)
+{
+       cmsPipeline* NewLUT;
+
+       // A value of zero in channels is allowed as placeholder
+       if (InputChannels >= cmsMAXCHANNELS ||
+           OutputChannels >= cmsMAXCHANNELS) return NULL;
+
+       NewLUT = (cmsPipeline*) _cmsMallocZero(ContextID, sizeof(cmsPipeline));
+       if (NewLUT == NULL) return NULL;
+
+       NewLUT -> InputChannels  = InputChannels;
+       NewLUT -> OutputChannels = OutputChannels;
+
+       NewLUT ->Eval16Fn    = _LUTeval16;
+       NewLUT ->EvalFloatFn = _LUTevalFloat;
+       NewLUT ->DupDataFn   = NULL;
+       NewLUT ->FreeDataFn  = NULL;
+       NewLUT ->Data        = NewLUT;
+       NewLUT ->ContextID   = ContextID;
+
+       if (!BlessLUT(NewLUT))
+       {
+           _cmsFree(ContextID, NewLUT);
+           return NULL;
+       }
+
+       return NewLUT;
+}
+
+cmsContext CMSEXPORT cmsGetPipelineContextID(const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    return lut ->ContextID;
+}
+
+cmsUInt32Number CMSEXPORT cmsPipelineInputChannels(const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    return lut ->InputChannels;
+}
+
+cmsUInt32Number CMSEXPORT cmsPipelineOutputChannels(const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    return lut ->OutputChannels;
+}
+
+// Free a profile elements LUT
+void CMSEXPORT cmsPipelineFree(cmsPipeline* lut)
+{
+    cmsStage *mpe, *Next;
+
+    if (lut == NULL) return;
+
+    for (mpe = lut ->Elements;
+        mpe != NULL;
+        mpe = Next) {
+
+            Next = mpe ->Next;
+            cmsStageFree(mpe);
+    }
+
+    if (lut ->FreeDataFn) lut ->FreeDataFn(lut ->ContextID, lut ->Data);
+
+    _cmsFree(lut ->ContextID, lut);
+}
+
+
+// Default to evaluate the LUT on 16 bit-basis.
+void CMSEXPORT cmsPipelineEval16(const cmsUInt16Number In[], cmsUInt16Number Out[],  const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    lut ->Eval16Fn(In, Out, lut->Data);
+}
+
+
+// Does evaluate the LUT on cmsFloat32Number-basis.
+void CMSEXPORT cmsPipelineEvalFloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    lut ->EvalFloatFn(In, Out, lut);
+}
+
+
+
+// Duplicates a LUT
+cmsPipeline* CMSEXPORT cmsPipelineDup(const cmsPipeline* lut)
+{
+    cmsPipeline* NewLUT;
+    cmsStage *NewMPE, *Anterior = NULL, *mpe;
+    cmsBool  First = TRUE;
+
+    if (lut == NULL) return NULL;
+
+    NewLUT = cmsPipelineAlloc(lut ->ContextID, lut ->InputChannels, lut ->OutputChannels);
+    if (NewLUT == NULL) return NULL;
+
+    for (mpe = lut ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+             NewMPE = cmsStageDup(mpe);
+
+             if (NewMPE == NULL) {
+                 cmsPipelineFree(NewLUT);
+                 return NULL;
+             }
+
+             if (First) {
+                 NewLUT ->Elements = NewMPE;
+                 First = FALSE;
+             }
+             else {
+                if (Anterior != NULL) 
+                    Anterior ->Next = NewMPE;
+             }
+
+            Anterior = NewMPE;
+    }
+
+    NewLUT ->Eval16Fn    = lut ->Eval16Fn;
+    NewLUT ->EvalFloatFn = lut ->EvalFloatFn;
+    NewLUT ->DupDataFn   = lut ->DupDataFn;
+    NewLUT ->FreeDataFn  = lut ->FreeDataFn;
+
+    if (NewLUT ->DupDataFn != NULL)
+        NewLUT ->Data = NewLUT ->DupDataFn(lut ->ContextID, lut->Data);
+
+
+    NewLUT ->SaveAs8Bits    = lut ->SaveAs8Bits;
+
+    if (!BlessLUT(NewLUT))
+    {
+        _cmsFree(lut->ContextID, NewLUT);
+        return NULL;
+    }
+
+    return NewLUT;
+}
+
+
+int CMSEXPORT cmsPipelineInsertStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage* mpe)
+{
+    cmsStage* Anterior = NULL, *pt;
+
+    if (lut == NULL || mpe == NULL)
+        return FALSE;
+
+    switch (loc) {
+
+        case cmsAT_BEGIN:
+            mpe ->Next = lut ->Elements;
+            lut ->Elements = mpe;
+            break;
+
+        case cmsAT_END:
+
+            if (lut ->Elements == NULL)
+                lut ->Elements = mpe;
+            else {
+
+                for (pt = lut ->Elements;
+                     pt != NULL;
+                     pt = pt -> Next) Anterior = pt;
+                
+                Anterior ->Next = mpe;
+                mpe ->Next = NULL;
+            }
+            break;
+        default:;
+            return FALSE;
+    }
+
+    return BlessLUT(lut);    
+}
+
+// Unlink an element and return the pointer to it
+void CMSEXPORT cmsPipelineUnlinkStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage** mpe)
+{
+    cmsStage *Anterior, *pt, *Last;
+    cmsStage *Unlinked = NULL;
+
+
+    // If empty LUT, there is nothing to remove
+    if (lut ->Elements == NULL) {
+        if (mpe) *mpe = NULL;
+        return;
+    }
+
+    // On depending on the strategy...
+    switch (loc) {
+
+        case cmsAT_BEGIN:
+            {
+                cmsStage* elem = lut ->Elements;
+
+                lut ->Elements = elem -> Next;
+                elem ->Next = NULL;
+                Unlinked = elem;
+
+            }
+            break;
+
+        case cmsAT_END:
+            Anterior = Last = NULL;
+            for (pt = lut ->Elements;
+                pt != NULL;
+                pt = pt -> Next) {
+                    Anterior = Last;
+                    Last = pt;
+            }
+
+            Unlinked = Last;  // Next already points to NULL
+
+            // Truncate the chain
+            if (Anterior)
+                Anterior ->Next = NULL;
+            else
+                lut ->Elements = NULL;
+            break;
+        default:;
+    }
+
+    if (mpe)
+        *mpe = Unlinked;
+    else
+        cmsStageFree(Unlinked);
+
+    // May fail, but we ignore it
+    BlessLUT(lut);
+}
+
+
+// Concatenate two LUT into a new single one
+cmsBool  CMSEXPORT cmsPipelineCat(cmsPipeline* l1, const cmsPipeline* l2)
+{
+    cmsStage* mpe;
+
+    // If both LUTS does not have elements, we need to inherit
+    // the number of channels
+    if (l1 ->Elements == NULL && l2 ->Elements == NULL) {
+        l1 ->InputChannels  = l2 ->InputChannels;
+        l1 ->OutputChannels = l2 ->OutputChannels;
+    }
+
+    // Cat second
+    for (mpe = l2 ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+            // We have to dup each element
+            if (!cmsPipelineInsertStage(l1, cmsAT_END, cmsStageDup(mpe)))
+                return FALSE;
+    }
+
+    return BlessLUT(l1);    
+}
+
+
+cmsBool CMSEXPORT cmsPipelineSetSaveAs8bitsFlag(cmsPipeline* lut, cmsBool On)
+{
+    cmsBool Anterior = lut ->SaveAs8Bits;
+
+    lut ->SaveAs8Bits = On;
+    return Anterior;
+}
+
+
+cmsStage* CMSEXPORT cmsPipelineGetPtrToFirstStage(const cmsPipeline* lut)
+{
+    return lut ->Elements;
+}
+
+cmsStage* CMSEXPORT cmsPipelineGetPtrToLastStage(const cmsPipeline* lut)
+{
+    cmsStage *mpe, *Anterior = NULL;
+
+    for (mpe = lut ->Elements; mpe != NULL; mpe = mpe ->Next)
+        Anterior = mpe;
+
+    return Anterior;
+}
+
+cmsUInt32Number CMSEXPORT cmsPipelineStageCount(const cmsPipeline* lut)
+{
+    cmsStage *mpe;
+    cmsUInt32Number n;
+
+    for (n=0, mpe = lut ->Elements; mpe != NULL; mpe = mpe ->Next)
+            n++;
+
+    return n;
+}
+
+// This function may be used to set the optional evaluator and a block of private data. If private data is being used, an optional
+// duplicator and free functions should also be specified in order to duplicate the LUT construct. Use NULL to inhibit such functionality.
+void CMSEXPORT _cmsPipelineSetOptimizationParameters(cmsPipeline* Lut,
+                                        _cmsOPTeval16Fn Eval16,
+                                        void* PrivateData,
+                                        _cmsFreeUserDataFn FreePrivateDataFn,
+                                        _cmsDupUserDataFn  DupPrivateDataFn)
+{
+
+    Lut ->Eval16Fn = Eval16;
+    Lut ->DupDataFn = DupPrivateDataFn;
+    Lut ->FreeDataFn = FreePrivateDataFn;
+    Lut ->Data = PrivateData;
+}
+
+
+// ----------------------------------------------------------- Reverse interpolation
+// Here's how it goes. The derivative Df(x) of the function f is the linear
+// transformation that best approximates f near the point x. It can be represented
+// by a matrix A whose entries are the partial derivatives of the components of f
+// with respect to all the coordinates. This is know as the Jacobian
+//
+// The best linear approximation to f is given by the matrix equation:
+//
+// y-y0 = A (x-x0)
+//
+// So, if x0 is a good "guess" for the zero of f, then solving for the zero of this
+// linear approximation will give a "better guess" for the zero of f. Thus let y=0,
+// and since y0=f(x0) one can solve the above equation for x. This leads to the
+// Newton's method formula:
+//
+// xn+1 = xn - A-1 f(xn)
+//
+// where xn+1 denotes the (n+1)-st guess, obtained from the n-th guess xn in the
+// fashion described above. Iterating this will give better and better approximations
+// if you have a "good enough" initial guess.
+
+
+#define JACOBIAN_EPSILON            0.001f
+#define INVERSION_MAX_ITERATIONS    30
+
+// Increment with reflexion on boundary
+static
+void IncDelta(cmsFloat32Number *Val)
+{
+    if (*Val < (1.0 - JACOBIAN_EPSILON))
+
+        *Val += JACOBIAN_EPSILON;
+
+    else
+        *Val -= JACOBIAN_EPSILON;
+
+}
+
+
+
+// Euclidean distance between two vectors of n elements each one
+static
+cmsFloat32Number EuclideanDistance(cmsFloat32Number a[], cmsFloat32Number b[], int n)
+{
+    cmsFloat32Number sum = 0;
+    int i;
+
+    for (i=0; i < n; i++) {
+        cmsFloat32Number dif = b[i] - a[i];
+        sum +=  dif * dif;
+    }
+
+    return sqrtf(sum);
+}
+
+
+// Evaluate a LUT in reverse direction. It only searches on 3->3 LUT. Uses Newton method
+//
+// x1 <- x - [J(x)]^-1 * f(x)
+//
+// lut: The LUT on where to do the search
+// Target: LabK, 3 values of Lab plus destination K which is fixed
+// Result: The obtained CMYK
+// Hint:   Location where begin the search
+
+cmsBool CMSEXPORT cmsPipelineEvalReverseFloat(cmsFloat32Number Target[],
+                                              cmsFloat32Number Result[],
+                                              cmsFloat32Number Hint[],
+                                              const cmsPipeline* lut)
+{
+    cmsUInt32Number  i, j;
+    cmsFloat64Number  error, LastError = 1E20;
+    cmsFloat32Number  fx[4], x[4], xd[4], fxd[4];
+    cmsVEC3 tmp, tmp2;
+    cmsMAT3 Jacobian;
+    
+    // Only 3->3 and 4->3 are supported
+    if (lut ->InputChannels != 3 && lut ->InputChannels != 4) return FALSE;
+    if (lut ->OutputChannels != 3) return FALSE;
+   
+    // Take the hint as starting point if specified
+    if (Hint == NULL) {
+
+        // Begin at any point, we choose 1/3 of CMY axis
+        x[0] = x[1] = x[2] = 0.3f;
+    }
+    else {
+
+        // Only copy 3 channels from hint...
+        for (j=0; j < 3; j++)
+            x[j] = Hint[j];
+    }
+
+    // If Lut is 4-dimensions, then grab target[3], which is fixed
+    if (lut ->InputChannels == 4) {
+        x[3] = Target[3];
+    }
+    else x[3] = 0; // To keep lint happy
+
+
+    // Iterate
+    for (i = 0; i < INVERSION_MAX_ITERATIONS; i++) {
+
+        // Get beginning fx
+        cmsPipelineEvalFloat(x, fx, lut);
+
+        // Compute error
+        error = EuclideanDistance(fx, Target, 3);
+
+        // If not convergent, return last safe value
+        if (error >= LastError)
+            break;
+
+        // Keep latest values
+        LastError     = error;
+        for (j=0; j < lut ->InputChannels; j++)
+                Result[j] = x[j];
+
+        // Found an exact match?
+        if (error <= 0)
+            break;
+
+        // Obtain slope (the Jacobian)
+        for (j = 0; j < 3; j++) {
+
+            xd[0] = x[0];
+            xd[1] = x[1];
+            xd[2] = x[2];
+            xd[3] = x[3];  // Keep fixed channel
+
+            IncDelta(&xd[j]);
+
+            cmsPipelineEvalFloat(xd, fxd, lut);
+
+            Jacobian.v[0].n[j] = ((fxd[0] - fx[0]) / JACOBIAN_EPSILON);
+            Jacobian.v[1].n[j] = ((fxd[1] - fx[1]) / JACOBIAN_EPSILON);
+            Jacobian.v[2].n[j] = ((fxd[2] - fx[2]) / JACOBIAN_EPSILON);
+        }
+
+        // Solve system
+        tmp2.n[0] = fx[0] - Target[0];
+        tmp2.n[1] = fx[1] - Target[1];
+        tmp2.n[2] = fx[2] - Target[2];
+
+        if (!_cmsMAT3solve(&tmp, &Jacobian, &tmp2))
+            return FALSE;
+
+        // Move our guess
+        x[0] -= (cmsFloat32Number) tmp.n[0];
+        x[1] -= (cmsFloat32Number) tmp.n[1];
+        x[2] -= (cmsFloat32Number) tmp.n[2];
+
+        // Some clipping....
+        for (j=0; j < 3; j++) {
+            if (x[j] < 0) x[j] = 0;
+            else
+                if (x[j] > 1.0) x[j] = 1.0;
+        }
+    }
+
+    return TRUE;
+}
+
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp
new file mode 100644
index 0000000000..a83d39ddb6
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsmtrx.cpp
@@ -0,0 +1,176 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+#define DSWAP(x, y)     {cmsFloat64Number tmp = (x); (x)=(y); (y)=tmp;}
+
+
+// Initiate a vector
+void CMSEXPORT _cmsVEC3init(cmsVEC3* r, cmsFloat64Number x, cmsFloat64Number y, cmsFloat64Number z)
+{
+    r -> n[VX] = x;
+    r -> n[VY] = y;
+    r -> n[VZ] = z;
+}
+
+// Vector subtraction
+void CMSEXPORT _cmsVEC3minus(cmsVEC3* r, const cmsVEC3* a, const cmsVEC3* b)
+{
+  r -> n[VX] = a -> n[VX] - b -> n[VX];
+  r -> n[VY] = a -> n[VY] - b -> n[VY];
+  r -> n[VZ] = a -> n[VZ] - b -> n[VZ];
+}
+
+// Vector cross product
+void CMSEXPORT _cmsVEC3cross(cmsVEC3* r, const cmsVEC3* u, const cmsVEC3* v)
+{
+    r ->n[VX] = u->n[VY] * v->n[VZ] - v->n[VY] * u->n[VZ];
+    r ->n[VY] = u->n[VZ] * v->n[VX] - v->n[VZ] * u->n[VX];
+    r ->n[VZ] = u->n[VX] * v->n[VY] - v->n[VX] * u->n[VY];
+}
+
+// Vector dot product
+cmsFloat64Number CMSEXPORT _cmsVEC3dot(const cmsVEC3* u, const cmsVEC3* v)
+{
+    return u->n[VX] * v->n[VX] + u->n[VY] * v->n[VY] + u->n[VZ] * v->n[VZ];
+}
+
+// Euclidean length
+cmsFloat64Number CMSEXPORT _cmsVEC3length(const cmsVEC3* a)
+{
+    return sqrt(a ->n[VX] * a ->n[VX] +
+                a ->n[VY] * a ->n[VY] +
+                a ->n[VZ] * a ->n[VZ]);
+}
+
+// Euclidean distance
+cmsFloat64Number CMSEXPORT _cmsVEC3distance(const cmsVEC3* a, const cmsVEC3* b)
+{
+    cmsFloat64Number d1 = a ->n[VX] - b ->n[VX];
+    cmsFloat64Number d2 = a ->n[VY] - b ->n[VY];
+    cmsFloat64Number d3 = a ->n[VZ] - b ->n[VZ];
+
+    return sqrt(d1*d1 + d2*d2 + d3*d3);
+}
+
+
+
+// 3x3 Identity
+void CMSEXPORT _cmsMAT3identity(cmsMAT3* a)
+{
+    _cmsVEC3init(&a-> v[0], 1.0, 0.0, 0.0);
+    _cmsVEC3init(&a-> v[1], 0.0, 1.0, 0.0);
+    _cmsVEC3init(&a-> v[2], 0.0, 0.0, 1.0);
+}
+
+static
+cmsBool CloseEnough(cmsFloat64Number a, cmsFloat64Number b)
+{
+    return fabs(b - a) < (1.0 / 65535.0);
+}
+
+
+cmsBool CMSEXPORT _cmsMAT3isIdentity(const cmsMAT3* a)
+{
+    cmsMAT3 Identity;
+    int i, j;
+
+    _cmsMAT3identity(&Identity);
+
+    for (i=0; i < 3; i++)
+        for (j=0; j < 3; j++)
+            if (!CloseEnough(a ->v[i].n[j], Identity.v[i].n[j])) return FALSE;
+
+    return TRUE;
+}
+
+
+// Multiply two matrices
+void CMSEXPORT _cmsMAT3per(cmsMAT3* r, const cmsMAT3* a, const cmsMAT3* b)
+{
+#define ROWCOL(i, j) \
+    a->v[i].n[0]*b->v[0].n[j] + a->v[i].n[1]*b->v[1].n[j] + a->v[i].n[2]*b->v[2].n[j]
+
+    _cmsVEC3init(&r-> v[0], ROWCOL(0,0), ROWCOL(0,1), ROWCOL(0,2));
+    _cmsVEC3init(&r-> v[1], ROWCOL(1,0), ROWCOL(1,1), ROWCOL(1,2));
+    _cmsVEC3init(&r-> v[2], ROWCOL(2,0), ROWCOL(2,1), ROWCOL(2,2));
+
+#undef ROWCOL //(i, j)
+}
+
+
+
+// Inverse of a matrix b = a^(-1)
+cmsBool  CMSEXPORT _cmsMAT3inverse(const cmsMAT3* a, cmsMAT3* b)
+{
+   cmsFloat64Number det, c0, c1, c2;
+
+   c0 =  a -> v[1].n[1]*a -> v[2].n[2] - a -> v[1].n[2]*a -> v[2].n[1];
+   c1 = -a -> v[1].n[0]*a -> v[2].n[2] + a -> v[1].n[2]*a -> v[2].n[0];
+   c2 =  a -> v[1].n[0]*a -> v[2].n[1] - a -> v[1].n[1]*a -> v[2].n[0];
+
+   det = a -> v[0].n[0]*c0 + a -> v[0].n[1]*c1 + a -> v[0].n[2]*c2;
+
+   if (fabs(det) < MATRIX_DET_TOLERANCE) return FALSE;  // singular matrix; can't invert
+
+   b -> v[0].n[0] = c0/det;
+   b -> v[0].n[1] = (a -> v[0].n[2]*a -> v[2].n[1] - a -> v[0].n[1]*a -> v[2].n[2])/det;
+   b -> v[0].n[2] = (a -> v[0].n[1]*a -> v[1].n[2] - a -> v[0].n[2]*a -> v[1].n[1])/det;
+   b -> v[1].n[0] = c1/det;
+   b -> v[1].n[1] = (a -> v[0].n[0]*a -> v[2].n[2] - a -> v[0].n[2]*a -> v[2].n[0])/det;
+   b -> v[1].n[2] = (a -> v[0].n[2]*a -> v[1].n[0] - a -> v[0].n[0]*a -> v[1].n[2])/det;
+   b -> v[2].n[0] = c2/det;
+   b -> v[2].n[1] = (a -> v[0].n[1]*a -> v[2].n[0] - a -> v[0].n[0]*a -> v[2].n[1])/det;
+   b -> v[2].n[2] = (a -> v[0].n[0]*a -> v[1].n[1] - a -> v[0].n[1]*a -> v[1].n[0])/det;
+
+   return TRUE;
+}
+
+
+// Solve a system in the form Ax = b
+cmsBool  CMSEXPORT _cmsMAT3solve(cmsVEC3* x, cmsMAT3* a, cmsVEC3* b)
+{
+    cmsMAT3 m, a_1;
+
+    memmove(&m, a, sizeof(cmsMAT3));
+
+    if (!_cmsMAT3inverse(&m, &a_1)) return FALSE;  // Singular matrix
+
+    _cmsMAT3eval(x, &a_1, b);
+    return TRUE;
+}
+
+// Evaluate a vector across a matrix
+void CMSEXPORT _cmsMAT3eval(cmsVEC3* r, const cmsMAT3* a, const cmsVEC3* v)
+{
+    r->n[VX] = a->v[0].n[VX]*v->n[VX] + a->v[0].n[VY]*v->n[VY] + a->v[0].n[VZ]*v->n[VZ];
+    r->n[VY] = a->v[1].n[VX]*v->n[VX] + a->v[1].n[VY]*v->n[VY] + a->v[1].n[VZ]*v->n[VZ];
+    r->n[VZ] = a->v[2].n[VX]*v->n[VX] + a->v[2].n[VY]*v->n[VY] + a->v[2].n[VZ]*v->n[VZ];
+}
+
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp
new file mode 100644
index 0000000000..773e4d2091
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsnamed.cpp
@@ -0,0 +1,962 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Multilocalized unicode objects. That is an attempt to encapsulate i18n.
+
+
+// Allocates an empty multi localizad unicode object
+cmsMLU* CMSEXPORT cmsMLUalloc(cmsContext ContextID, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu;
+
+    // nItems should be positive if given
+    if (nItems <= 0) nItems = 2;
+
+    // Create the container
+    mlu = (cmsMLU*) _cmsMallocZero(ContextID, sizeof(cmsMLU));
+    if (mlu == NULL) return NULL;
+
+    mlu ->ContextID = ContextID;
+
+    // Create entry array
+    mlu ->Entries = (_cmsMLUentry*) _cmsCalloc(ContextID, nItems, sizeof(_cmsMLUentry));
+    if (mlu ->Entries == NULL) {
+        _cmsFree(ContextID, mlu);
+        return NULL;
+    }
+
+    // Ok, keep indexes up to date
+    mlu ->AllocatedEntries    = nItems;
+    mlu ->UsedEntries         = 0;
+
+    return mlu;
+}
+
+
+// Grows a mempool table for a MLU. Each time this function is called, mempool size is multiplied times two.
+static
+cmsBool GrowMLUpool(cmsMLU* mlu)
+{
+    cmsUInt32Number size;
+    void *NewPtr;
+
+    // Sanity check
+    if (mlu == NULL) return FALSE;
+
+    if (mlu ->PoolSize == 0)
+        size = 256;
+    else
+        size = mlu ->PoolSize * 2;
+
+    // Check for overflow
+    if (size < mlu ->PoolSize) return FALSE;
+
+    // Reallocate the pool
+    NewPtr = _cmsRealloc(mlu ->ContextID, mlu ->MemPool, size);
+    if (NewPtr == NULL) return FALSE;
+
+
+    mlu ->MemPool  = NewPtr;
+    mlu ->PoolSize = size;
+
+    return TRUE;
+}
+
+
+// Grows a entry table for a MLU. Each time this function is called, table size is multiplied times two.
+static
+cmsBool GrowMLUtable(cmsMLU* mlu)
+{
+    cmsUInt32Number AllocatedEntries;
+    _cmsMLUentry *NewPtr;
+
+    // Sanity check
+    if (mlu == NULL) return FALSE;
+
+    AllocatedEntries = mlu ->AllocatedEntries * 2;
+
+    // Check for overflow
+    if (AllocatedEntries / 2 != mlu ->AllocatedEntries) return FALSE;
+
+    // Reallocate the memory
+    NewPtr = (_cmsMLUentry*)_cmsRealloc(mlu ->ContextID, mlu ->Entries, AllocatedEntries*sizeof(_cmsMLUentry));
+    if (NewPtr == NULL) return FALSE;
+
+    mlu ->Entries          = NewPtr;
+    mlu ->AllocatedEntries = AllocatedEntries;
+
+    return TRUE;
+}
+
+
+// Search for a specific entry in the structure. Language and Country are used.
+static
+int SearchMLUEntry(cmsMLU* mlu, cmsUInt16Number LanguageCode, cmsUInt16Number CountryCode)
+{
+    cmsUInt32Number i;
+
+    // Sanity check
+    if (mlu == NULL) return -1;
+
+    // Iterate whole table
+    for (i=0; i < mlu ->UsedEntries; i++) {
+
+        if (mlu ->Entries[i].Country  == CountryCode &&
+            mlu ->Entries[i].Language == LanguageCode) return (int) i;
+    }
+
+    // Not found
+    return -1;
+}
+
+// Add a block of characters to the intended MLU. Language and country are specified.
+// Only one entry for Language/country pair is allowed.
+static
+cmsBool AddMLUBlock(cmsMLU* mlu, cmsUInt32Number size, const wchar_t *Block,
+                     cmsUInt16Number LanguageCode, cmsUInt16Number CountryCode)
+{
+    cmsUInt32Number Offset;
+    cmsUInt8Number* Ptr;
+
+    // Sanity check
+    if (mlu == NULL) return FALSE;
+
+    // Is there any room available?
+    if (mlu ->UsedEntries >= mlu ->AllocatedEntries) {
+        if (!GrowMLUtable(mlu)) return FALSE;
+    }
+
+    // Only one ASCII string
+    if (SearchMLUEntry(mlu, LanguageCode, CountryCode) >= 0) return FALSE;  // Only one  is allowed!
+
+    // Check for size
+    while ((mlu ->PoolSize - mlu ->PoolUsed) < size) {
+
+            if (!GrowMLUpool(mlu)) return FALSE;
+    }
+
+    Offset = mlu ->PoolUsed;
+
+    Ptr = (cmsUInt8Number*) mlu ->MemPool;
+    if (Ptr == NULL) return FALSE;
+
+    // Set the entry
+    memmove(Ptr + Offset, Block, size);
+    mlu ->PoolUsed += size;
+
+    mlu ->Entries[mlu ->UsedEntries].StrW     = Offset;
+    mlu ->Entries[mlu ->UsedEntries].Len      = size;
+    mlu ->Entries[mlu ->UsedEntries].Country  = CountryCode;
+    mlu ->Entries[mlu ->UsedEntries].Language = LanguageCode;
+    mlu ->UsedEntries++;
+
+    return TRUE;
+}
+
+// Convert from a 3-char code to a cmsUInt16Number. It is done in this way because some
+// compilers don't properly align beginning of strings
+
+static
+cmsUInt16Number strTo16(const char str[3])
+{   
+    const cmsUInt8Number* ptr8 = (const cmsUInt8Number*)str;
+    cmsUInt16Number n = (cmsUInt16Number)(((cmsUInt16Number)ptr8[0] << 8) | ptr8[1]);
+
+    return n;
+}
+
+static
+void strFrom16(char str[3], cmsUInt16Number n)
+{
+    str[0] = (char)(n >> 8);
+    str[1] = (char)n;
+    str[2] = (char)0;
+
+}
+
+// Add an ASCII entry. Do not add any \0 termination (ICC1v43_2010-12.pdf page 61)
+cmsBool CMSEXPORT cmsMLUsetASCII(cmsMLU* mlu, const char LanguageCode[3], const char CountryCode[3], const char* ASCIIString)
+{
+    cmsUInt32Number i, len = (cmsUInt32Number) strlen(ASCIIString);
+    wchar_t* WStr;
+    cmsBool  rc;
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+
+    if (mlu == NULL) return FALSE;
+
+    WStr = (wchar_t*) _cmsCalloc(mlu ->ContextID, len,  sizeof(wchar_t));
+    if (WStr == NULL) return FALSE;
+
+    for (i=0; i < len; i++)
+        WStr[i] = (wchar_t) ASCIIString[i];
+
+    rc = AddMLUBlock(mlu, len  * sizeof(wchar_t), WStr, Lang, Cntry);
+
+    _cmsFree(mlu ->ContextID, WStr);
+    return rc;
+
+}
+
+// We don't need any wcs support library
+static
+cmsUInt32Number mywcslen(const wchar_t *s)
+{
+    const wchar_t *p;
+
+    p = s;
+    while (*p)
+        p++;
+
+    return (cmsUInt32Number)(p - s);
+}
+
+// Add a wide entry. Do not add any \0 terminator (ICC1v43_2010-12.pdf page 61)
+cmsBool  CMSEXPORT cmsMLUsetWide(cmsMLU* mlu, const char Language[3], const char Country[3], const wchar_t* WideString)
+{
+    cmsUInt16Number Lang  = strTo16(Language);
+    cmsUInt16Number Cntry = strTo16(Country);
+    cmsUInt32Number len;
+
+    if (mlu == NULL) return FALSE;
+    if (WideString == NULL) return FALSE;
+
+    len = (cmsUInt32Number) (mywcslen(WideString)) * sizeof(wchar_t);
+    return AddMLUBlock(mlu, len, WideString, Lang, Cntry);
+}
+
+// Duplicating a MLU is as easy as copying all members
+cmsMLU* CMSEXPORT cmsMLUdup(const cmsMLU* mlu)
+{
+    cmsMLU* NewMlu = NULL;
+
+    // Duplicating a NULL obtains a NULL
+    if (mlu == NULL) return NULL;
+
+    NewMlu = cmsMLUalloc(mlu ->ContextID, mlu ->UsedEntries);
+    if (NewMlu == NULL) return NULL;
+
+    // Should never happen
+    if (NewMlu ->AllocatedEntries < mlu ->UsedEntries)
+        goto Error;
+
+    // Sanitize...
+    if (NewMlu ->Entries == NULL || mlu ->Entries == NULL)  goto Error;
+
+    memmove(NewMlu ->Entries, mlu ->Entries, mlu ->UsedEntries * sizeof(_cmsMLUentry));
+    NewMlu ->UsedEntries = mlu ->UsedEntries;
+
+    // The MLU may be empty
+    if (mlu ->PoolUsed == 0) {
+        NewMlu ->MemPool = NULL;
+    }
+    else {
+        // It is not empty
+        NewMlu ->MemPool = _cmsMalloc(mlu ->ContextID, mlu ->PoolUsed);
+        if (NewMlu ->MemPool == NULL) goto Error;
+    }
+
+    NewMlu ->PoolSize = mlu ->PoolUsed;
+
+    if (NewMlu ->MemPool == NULL || mlu ->MemPool == NULL) goto Error;
+
+    memmove(NewMlu ->MemPool, mlu->MemPool, mlu ->PoolUsed);
+    NewMlu ->PoolUsed = mlu ->PoolUsed;
+
+    return NewMlu;
+
+Error:
+
+    if (NewMlu != NULL) cmsMLUfree(NewMlu);
+    return NULL;
+}
+
+// Free any used memory
+void CMSEXPORT cmsMLUfree(cmsMLU* mlu)
+{
+    if (mlu) {
+
+        if (mlu -> Entries) _cmsFree(mlu ->ContextID, mlu->Entries);
+        if (mlu -> MemPool) _cmsFree(mlu ->ContextID, mlu->MemPool);
+
+        _cmsFree(mlu ->ContextID, mlu);
+    }
+}
+
+
+// The algorithm first searches for an exact match of country and language, if not found it uses
+// the Language. If none is found, first entry is used instead.
+static
+const wchar_t* _cmsMLUgetWide(const cmsMLU* mlu,
+                              cmsUInt32Number *len,
+                              cmsUInt16Number LanguageCode, cmsUInt16Number CountryCode,
+                              cmsUInt16Number* UsedLanguageCode, cmsUInt16Number* UsedCountryCode)
+{
+    cmsUInt32Number i;
+    int Best = -1;
+    _cmsMLUentry* v;
+
+    if (mlu == NULL) return NULL;
+
+    if (mlu -> AllocatedEntries <= 0) return NULL;
+
+    for (i=0; i < mlu ->UsedEntries; i++) {
+
+        v = mlu ->Entries + i;
+
+        if (v -> Language == LanguageCode) {
+
+            if (Best == -1) Best = (int) i;
+
+            if (v -> Country == CountryCode) {
+
+                if (UsedLanguageCode != NULL) *UsedLanguageCode = v ->Language;
+                if (UsedCountryCode  != NULL) *UsedCountryCode = v ->Country;
+
+                if (len != NULL) *len = v ->Len;
+
+                return (wchar_t*) ((cmsUInt8Number*) mlu ->MemPool + v -> StrW);        // Found exact match
+            }
+        }
+    }
+
+    // No string found. Return First one
+    if (Best == -1)
+        Best = 0;
+
+    v = mlu ->Entries + Best;
+
+    if (UsedLanguageCode != NULL) *UsedLanguageCode = v ->Language;
+    if (UsedCountryCode  != NULL) *UsedCountryCode = v ->Country;
+
+    if (len != NULL) *len   = v ->Len;
+
+    return(wchar_t*) ((cmsUInt8Number*) mlu ->MemPool + v ->StrW);
+}
+
+
+// Obtain an ASCII representation of the wide string. Setting buffer to NULL returns the len
+cmsUInt32Number CMSEXPORT cmsMLUgetASCII(const cmsMLU* mlu,
+                                       const char LanguageCode[3], const char CountryCode[3],
+                                       char* Buffer, cmsUInt32Number BufferSize)
+{
+    const wchar_t *Wide;
+    cmsUInt32Number  StrLen = 0;
+    cmsUInt32Number ASCIIlen, i;
+
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+
+    // Sanitize
+    if (mlu == NULL) return 0;
+
+    // Get WideChar
+    Wide = _cmsMLUgetWide(mlu, &StrLen, Lang, Cntry, NULL, NULL);
+    if (Wide == NULL) return 0;
+
+    ASCIIlen = StrLen / sizeof(wchar_t);
+
+    // Maybe we want only to know the len?
+    if (Buffer == NULL) return ASCIIlen + 1; // Note the zero at the end
+
+    // No buffer size means no data
+    if (BufferSize <= 0) return 0;
+
+    // Some clipping may be required
+    if (BufferSize < ASCIIlen + 1)
+        ASCIIlen = BufferSize - 1;
+
+    // Precess each character
+    for (i=0; i < ASCIIlen; i++) {
+
+        if (Wide[i] == 0)
+            Buffer[i] = 0;
+        else
+            Buffer[i] = (char) Wide[i];
+    }
+
+    // We put a termination "\0"
+    Buffer[ASCIIlen] = 0;
+    return ASCIIlen + 1;
+}
+
+// Obtain a wide representation of the MLU, on depending on current locale settings
+cmsUInt32Number CMSEXPORT cmsMLUgetWide(const cmsMLU* mlu,
+                                      const char LanguageCode[3], const char CountryCode[3],
+                                      wchar_t* Buffer, cmsUInt32Number BufferSize)
+{
+    const wchar_t *Wide;
+    cmsUInt32Number  StrLen = 0;
+
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+
+    // Sanitize
+    if (mlu == NULL) return 0;
+
+    Wide = _cmsMLUgetWide(mlu, &StrLen, Lang, Cntry, NULL, NULL);
+    if (Wide == NULL) return 0;
+
+    // Maybe we want only to know the len?
+    if (Buffer == NULL) return StrLen + sizeof(wchar_t);
+
+  // No buffer size means no data
+    if (BufferSize <= 0) return 0;
+
+    // Some clipping may be required
+    if (BufferSize < StrLen + sizeof(wchar_t))
+        StrLen = BufferSize - + sizeof(wchar_t);
+
+    memmove(Buffer, Wide, StrLen);
+    Buffer[StrLen / sizeof(wchar_t)] = 0;
+
+    return StrLen + sizeof(wchar_t);
+}
+
+
+// Get also the language and country
+CMSAPI cmsBool CMSEXPORT cmsMLUgetTranslation(const cmsMLU* mlu,
+                                              const char LanguageCode[3], const char CountryCode[3],
+                                              char ObtainedLanguage[3], char ObtainedCountry[3])
+{
+    const wchar_t *Wide;
+
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+    cmsUInt16Number ObtLang, ObtCode;
+
+    // Sanitize
+    if (mlu == NULL) return FALSE;
+
+    Wide = _cmsMLUgetWide(mlu, NULL, Lang, Cntry, &ObtLang, &ObtCode);
+    if (Wide == NULL) return FALSE;
+
+    // Get used language and code
+    strFrom16(ObtainedLanguage, ObtLang);
+    strFrom16(ObtainedCountry, ObtCode);
+
+    return TRUE;
+}
+
+
+
+// Get the number of translations in the MLU object
+cmsUInt32Number CMSEXPORT cmsMLUtranslationsCount(const cmsMLU* mlu)
+{
+    if (mlu == NULL) return 0;
+    return mlu->UsedEntries;
+}
+
+// Get the language and country codes for a specific MLU index
+cmsBool CMSEXPORT cmsMLUtranslationsCodes(const cmsMLU* mlu,
+                                          cmsUInt32Number idx,
+                                          char LanguageCode[3],
+                                          char CountryCode[3])
+{
+    _cmsMLUentry *entry;
+
+    if (mlu == NULL) return FALSE;
+
+    if (idx >= mlu->UsedEntries) return FALSE;
+
+    entry = &mlu->Entries[idx];
+    
+    strFrom16(LanguageCode, entry->Language);
+    strFrom16(CountryCode, entry->Country);
+
+    return TRUE;
+}
+
+
+// Named color lists --------------------------------------------------------------------------------------------
+
+// Grow the list to keep at least NumElements
+static
+cmsBool  GrowNamedColorList(cmsNAMEDCOLORLIST* v)
+{
+    cmsUInt32Number size;
+    _cmsNAMEDCOLOR * NewPtr;
+
+    if (v == NULL) return FALSE;
+
+    if (v ->Allocated == 0)
+        size = 64;   // Initial guess
+    else
+        size = v ->Allocated * 2;
+
+    // Keep a maximum color lists can grow, 100K entries seems reasonable
+    if (size > 1024 * 100) {
+        _cmsFree(v->ContextID, (void*) v->List);
+        v->List = NULL;
+        return FALSE;
+    }
+
+    NewPtr = (_cmsNAMEDCOLOR*) _cmsRealloc(v ->ContextID, v ->List, size * sizeof(_cmsNAMEDCOLOR));
+    if (NewPtr == NULL)
+        return FALSE;
+
+    v ->List      = NewPtr;
+    v ->Allocated = size;
+    return TRUE;
+}
+
+// Allocate a list for n elements
+cmsNAMEDCOLORLIST* CMSEXPORT cmsAllocNamedColorList(cmsContext ContextID, cmsUInt32Number n, cmsUInt32Number ColorantCount, const char* Prefix, const char* Suffix)
+{
+    cmsNAMEDCOLORLIST* v = (cmsNAMEDCOLORLIST*) _cmsMallocZero(ContextID, sizeof(cmsNAMEDCOLORLIST));
+
+    if (v == NULL) return NULL;
+
+    v ->List      = NULL;
+    v ->nColors   = 0;
+    v ->ContextID  = ContextID;
+
+    while (v -> Allocated < n) {
+        if (!GrowNamedColorList(v)) {
+            _cmsFree(ContextID, (void*) v);
+            return NULL;
+        }
+    }
+
+    strncpy(v ->Prefix, Prefix, sizeof(v ->Prefix)-1);
+    strncpy(v ->Suffix, Suffix, sizeof(v ->Suffix)-1);
+    v->Prefix[32] = v->Suffix[32] = 0;
+
+    v -> ColorantCount = ColorantCount;
+
+    return v;
+}
+
+// Free a list
+void CMSEXPORT cmsFreeNamedColorList(cmsNAMEDCOLORLIST* v)
+{
+    if (v == NULL) return;
+    if (v ->List) _cmsFree(v ->ContextID, v ->List);
+    _cmsFree(v ->ContextID, v);
+}
+
+cmsNAMEDCOLORLIST* CMSEXPORT cmsDupNamedColorList(const cmsNAMEDCOLORLIST* v)
+{
+    cmsNAMEDCOLORLIST* NewNC;
+
+    if (v == NULL) return NULL;
+
+    NewNC= cmsAllocNamedColorList(v ->ContextID, v -> nColors, v ->ColorantCount, v ->Prefix, v ->Suffix);
+    if (NewNC == NULL) return NULL;
+
+    // For really large tables we need this
+    while (NewNC ->Allocated < v ->Allocated){
+        if (!GrowNamedColorList(NewNC)) return NULL;
+    }
+
+    memmove(NewNC ->Prefix, v ->Prefix, sizeof(v ->Prefix));
+    memmove(NewNC ->Suffix, v ->Suffix, sizeof(v ->Suffix));
+    NewNC ->ColorantCount = v ->ColorantCount;
+    memmove(NewNC->List, v ->List, v->nColors * sizeof(_cmsNAMEDCOLOR));
+    NewNC ->nColors = v ->nColors;
+    return NewNC;
+}
+
+
+// Append a color to a list. List pointer may change if reallocated
+cmsBool  CMSEXPORT cmsAppendNamedColor(cmsNAMEDCOLORLIST* NamedColorList,
+                                       const char* Name,
+                                       cmsUInt16Number PCS[3], cmsUInt16Number Colorant[cmsMAXCHANNELS])
+{
+    cmsUInt32Number i;
+
+    if (NamedColorList == NULL) return FALSE;
+
+    if (NamedColorList ->nColors + 1 > NamedColorList ->Allocated) {
+        if (!GrowNamedColorList(NamedColorList)) return FALSE;
+    }
+
+    for (i=0; i < NamedColorList ->ColorantCount; i++)
+        NamedColorList ->List[NamedColorList ->nColors].DeviceColorant[i] = Colorant == NULL ? (cmsUInt16Number)0 : Colorant[i];
+
+    for (i=0; i < 3; i++)
+        NamedColorList ->List[NamedColorList ->nColors].PCS[i] = PCS == NULL ? (cmsUInt16Number) 0 : PCS[i];
+
+    if (Name != NULL) {
+
+        strncpy(NamedColorList ->List[NamedColorList ->nColors].Name, Name, cmsMAX_PATH-1);
+        NamedColorList ->List[NamedColorList ->nColors].Name[cmsMAX_PATH-1] = 0;
+
+    }
+    else
+        NamedColorList ->List[NamedColorList ->nColors].Name[0] = 0;
+
+
+    NamedColorList ->nColors++;
+    return TRUE;
+}
+
+// Returns number of elements
+cmsUInt32Number CMSEXPORT cmsNamedColorCount(const cmsNAMEDCOLORLIST* NamedColorList)
+{
+     if (NamedColorList == NULL) return 0;
+     return NamedColorList ->nColors;
+}
+
+// Info aboout a given color
+cmsBool  CMSEXPORT cmsNamedColorInfo(const cmsNAMEDCOLORLIST* NamedColorList, cmsUInt32Number nColor,
+                                     char* Name,
+                                     char* Prefix,
+                                     char* Suffix,
+                                     cmsUInt16Number* PCS,
+                                     cmsUInt16Number* Colorant)
+{
+    if (NamedColorList == NULL) return FALSE;
+
+    if (nColor >= cmsNamedColorCount(NamedColorList)) return FALSE;
+
+    // strcpy instead of strncpy because many apps are using small buffers
+    if (Name) strcpy(Name, NamedColorList->List[nColor].Name);
+    if (Prefix) strcpy(Prefix, NamedColorList->Prefix);
+    if (Suffix) strcpy(Suffix, NamedColorList->Suffix);
+    if (PCS)
+        memmove(PCS, NamedColorList ->List[nColor].PCS, 3*sizeof(cmsUInt16Number));
+
+    if (Colorant)
+        memmove(Colorant, NamedColorList ->List[nColor].DeviceColorant,
+                                sizeof(cmsUInt16Number) * NamedColorList ->ColorantCount);
+
+
+    return TRUE;
+}
+
+// Search for a given color name (no prefix or suffix)
+cmsInt32Number CMSEXPORT cmsNamedColorIndex(const cmsNAMEDCOLORLIST* NamedColorList, const char* Name)
+{
+    cmsUInt32Number i;
+    cmsUInt32Number n;
+
+    if (NamedColorList == NULL) return -1;
+    n = cmsNamedColorCount(NamedColorList);
+    for (i=0; i < n; i++) {
+        if (cmsstrcasecmp(Name,  NamedColorList->List[i].Name) == 0)
+            return (cmsInt32Number) i;
+    }
+
+    return -1;
+}
+
+// MPE support -----------------------------------------------------------------------------------------------------------------
+
+static
+void FreeNamedColorList(cmsStage* mpe)
+{
+    cmsNAMEDCOLORLIST* List = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    cmsFreeNamedColorList(List);
+}
+
+static
+void* DupNamedColorList(cmsStage* mpe)
+{
+    cmsNAMEDCOLORLIST* List = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    return cmsDupNamedColorList(List);
+}
+
+static
+void EvalNamedColorPCS(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    cmsUInt16Number index = (cmsUInt16Number) _cmsQuickSaturateWord(In[0] * 65535.0);
+
+    if (index >= NamedColorList-> nColors) {
+        cmsSignalError(NamedColorList ->ContextID, cmsERROR_RANGE, "Color %d out of range", index);
+        Out[0] = Out[1] = Out[2] = 0.0f;
+    }
+    else {
+
+            // Named color always uses Lab
+            Out[0] = (cmsFloat32Number) (NamedColorList->List[index].PCS[0] / 65535.0);
+            Out[1] = (cmsFloat32Number) (NamedColorList->List[index].PCS[1] / 65535.0);
+            Out[2] = (cmsFloat32Number) (NamedColorList->List[index].PCS[2] / 65535.0);
+    }
+}
+
+static
+void EvalNamedColor(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    cmsUInt16Number index = (cmsUInt16Number) _cmsQuickSaturateWord(In[0] * 65535.0);
+    cmsUInt32Number j;
+
+    if (index >= NamedColorList-> nColors) {
+        cmsSignalError(NamedColorList ->ContextID, cmsERROR_RANGE, "Color %d out of range", index);
+        for (j = 0; j < NamedColorList->ColorantCount; j++)
+            Out[j] = 0.0f;
+
+    }
+    else {
+        for (j=0; j < NamedColorList ->ColorantCount; j++)
+            Out[j] = (cmsFloat32Number) (NamedColorList->List[index].DeviceColorant[j] / 65535.0);
+    }
+}
+
+
+// Named color lookup element
+cmsStage* CMSEXPORT _cmsStageAllocNamedColor(cmsNAMEDCOLORLIST* NamedColorList, cmsBool UsePCS)
+{
+    return _cmsStageAllocPlaceholder(NamedColorList ->ContextID,
+                                   cmsSigNamedColorElemType,
+                                   1, UsePCS ? 3 : NamedColorList ->ColorantCount,
+                                   UsePCS ? EvalNamedColorPCS : EvalNamedColor,
+                                   DupNamedColorList,
+                                   FreeNamedColorList,
+                                   cmsDupNamedColorList(NamedColorList));
+
+}
+
+
+// Retrieve the named color list from a transform. Should be first element in the LUT
+cmsNAMEDCOLORLIST* CMSEXPORT cmsGetNamedColorList(cmsHTRANSFORM xform)
+{
+    _cmsTRANSFORM* v = (_cmsTRANSFORM*) xform;
+    cmsStage* mpe  = v ->Lut->Elements;
+
+    if (mpe ->Type != cmsSigNamedColorElemType) return NULL;
+    return (cmsNAMEDCOLORLIST*) mpe ->Data;
+}
+
+
+// Profile sequence description routines -------------------------------------------------------------------------------------
+
+cmsSEQ* CMSEXPORT cmsAllocProfileSequenceDescription(cmsContext ContextID, cmsUInt32Number n)
+{
+    cmsSEQ* Seq;
+    cmsUInt32Number i;
+
+    if (n == 0) return NULL;
+
+    // In a absolutely arbitrary way, I hereby decide to allow a maxim of 255 profiles linked
+    // in a devicelink. It makes not sense anyway and may be used for exploits, so let's close the door!
+    if (n > 255) return NULL;
+
+    Seq = (cmsSEQ*) _cmsMallocZero(ContextID, sizeof(cmsSEQ));
+    if (Seq == NULL) return NULL;
+
+    Seq -> ContextID = ContextID;
+    Seq -> seq      = (cmsPSEQDESC*) _cmsCalloc(ContextID, n, sizeof(cmsPSEQDESC));
+    Seq -> n        = n;
+
+    if (Seq -> seq == NULL) {
+        _cmsFree(ContextID, Seq);
+        return NULL;
+    }
+
+    for (i=0; i < n; i++) {
+        Seq -> seq[i].Manufacturer = NULL;
+        Seq -> seq[i].Model        = NULL;
+        Seq -> seq[i].Description  = NULL;
+    }
+
+    return Seq;
+}
+
+void CMSEXPORT cmsFreeProfileSequenceDescription(cmsSEQ* pseq)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < pseq ->n; i++) {
+        if (pseq ->seq[i].Manufacturer != NULL) cmsMLUfree(pseq ->seq[i].Manufacturer);
+        if (pseq ->seq[i].Model != NULL) cmsMLUfree(pseq ->seq[i].Model);
+        if (pseq ->seq[i].Description != NULL) cmsMLUfree(pseq ->seq[i].Description);
+    }
+
+    if (pseq ->seq != NULL) _cmsFree(pseq ->ContextID, pseq ->seq);
+    _cmsFree(pseq -> ContextID, pseq);
+}
+
+cmsSEQ* CMSEXPORT cmsDupProfileSequenceDescription(const cmsSEQ* pseq)
+{
+    cmsSEQ *NewSeq;
+    cmsUInt32Number i;
+
+    if (pseq == NULL)
+        return NULL;
+
+    NewSeq = (cmsSEQ*) _cmsMalloc(pseq -> ContextID, sizeof(cmsSEQ));
+    if (NewSeq == NULL) return NULL;
+
+
+    NewSeq -> seq      = (cmsPSEQDESC*) _cmsCalloc(pseq ->ContextID, pseq ->n, sizeof(cmsPSEQDESC));
+    if (NewSeq ->seq == NULL) goto Error;
+
+    NewSeq -> ContextID = pseq ->ContextID;
+    NewSeq -> n        = pseq ->n;
+
+    for (i=0; i < pseq->n; i++) {
+
+        memmove(&NewSeq ->seq[i].attributes, &pseq ->seq[i].attributes, sizeof(cmsUInt64Number));
+
+        NewSeq ->seq[i].deviceMfg   = pseq ->seq[i].deviceMfg;
+        NewSeq ->seq[i].deviceModel = pseq ->seq[i].deviceModel;
+        memmove(&NewSeq ->seq[i].ProfileID, &pseq ->seq[i].ProfileID, sizeof(cmsProfileID));
+        NewSeq ->seq[i].technology  = pseq ->seq[i].technology;
+
+        NewSeq ->seq[i].Manufacturer = cmsMLUdup(pseq ->seq[i].Manufacturer);
+        NewSeq ->seq[i].Model        = cmsMLUdup(pseq ->seq[i].Model);
+        NewSeq ->seq[i].Description  = cmsMLUdup(pseq ->seq[i].Description);
+
+    }
+
+    return NewSeq;
+
+Error:
+
+    cmsFreeProfileSequenceDescription(NewSeq);
+    return NULL;
+}
+
+// Dictionaries --------------------------------------------------------------------------------------------------------
+
+// Dictionaries are just very simple linked lists
+
+
+typedef struct _cmsDICT_struct {
+    cmsDICTentry* head;
+    cmsContext ContextID;
+} _cmsDICT;
+
+
+// Allocate an empty dictionary
+cmsHANDLE CMSEXPORT cmsDictAlloc(cmsContext ContextID)
+{
+    _cmsDICT* dict = (_cmsDICT*) _cmsMallocZero(ContextID, sizeof(_cmsDICT));
+    if (dict == NULL) return NULL;
+
+    dict ->ContextID = ContextID;
+    return (cmsHANDLE) dict;
+
+}
+
+// Dispose resources
+void CMSEXPORT cmsDictFree(cmsHANDLE hDict)
+{
+    _cmsDICT* dict = (_cmsDICT*) hDict;
+    cmsDICTentry *entry, *next;
+
+    _cmsAssert(dict != NULL);
+
+    // Walk the list freeing all nodes
+    entry = dict ->head;
+    while (entry != NULL) {
+
+            if (entry ->DisplayName  != NULL) cmsMLUfree(entry ->DisplayName);
+            if (entry ->DisplayValue != NULL) cmsMLUfree(entry ->DisplayValue);
+            if (entry ->Name != NULL) _cmsFree(dict ->ContextID, entry -> Name);
+            if (entry ->Value != NULL) _cmsFree(dict ->ContextID, entry -> Value);
+
+            // Don't fall in the habitual trap...
+            next = entry ->Next;
+            _cmsFree(dict ->ContextID, entry);
+
+            entry = next;
+    }
+
+    _cmsFree(dict ->ContextID, dict);
+}
+
+
+// Duplicate a wide char string
+static
+wchar_t* DupWcs(cmsContext ContextID, const wchar_t* ptr)
+{
+    if (ptr == NULL) return NULL;
+    return (wchar_t*) _cmsDupMem(ContextID, ptr, (mywcslen(ptr) + 1) * sizeof(wchar_t));
+}
+
+// Add a new entry to the linked list
+cmsBool CMSEXPORT cmsDictAddEntry(cmsHANDLE hDict, const wchar_t* Name, const wchar_t* Value, const cmsMLU *DisplayName, const cmsMLU *DisplayValue)
+{
+    _cmsDICT* dict = (_cmsDICT*) hDict;
+    cmsDICTentry *entry;
+
+    _cmsAssert(dict != NULL);
+    _cmsAssert(Name != NULL);
+
+    entry = (cmsDICTentry*) _cmsMallocZero(dict ->ContextID, sizeof(cmsDICTentry));
+    if (entry == NULL) return FALSE;
+
+    entry ->DisplayName  = cmsMLUdup(DisplayName);
+    entry ->DisplayValue = cmsMLUdup(DisplayValue);
+    entry ->Name         = DupWcs(dict ->ContextID, Name);
+    entry ->Value        = DupWcs(dict ->ContextID, Value);
+
+    entry ->Next = dict ->head;
+    dict ->head = entry;
+
+    return TRUE;
+}
+
+
+// Duplicates an existing dictionary
+cmsHANDLE CMSEXPORT cmsDictDup(cmsHANDLE hDict)
+{
+    _cmsDICT* old_dict = (_cmsDICT*) hDict;
+    cmsHANDLE hNew;
+    cmsDICTentry *entry;
+
+    _cmsAssert(old_dict != NULL);
+
+    hNew  = cmsDictAlloc(old_dict ->ContextID);
+    if (hNew == NULL) return NULL;
+
+    // Walk the list freeing all nodes
+    entry = old_dict ->head;
+    while (entry != NULL) {
+
+        if (!cmsDictAddEntry(hNew, entry ->Name, entry ->Value, entry ->DisplayName, entry ->DisplayValue)) {
+
+            cmsDictFree(hNew);
+            return NULL;
+        }
+
+        entry = entry -> Next;
+    }
+
+    return hNew;
+}
+
+// Get a pointer to the linked list
+const cmsDICTentry* CMSEXPORT cmsDictGetEntryList(cmsHANDLE hDict)
+{
+    _cmsDICT* dict = (_cmsDICT*) hDict;
+
+    if (dict == NULL) return NULL;
+    return dict ->head;
+}
+
+// Helper For external languages
+const cmsDICTentry* CMSEXPORT cmsDictNextEntry(const cmsDICTentry* e)
+{
+     if (e == NULL) return NULL;
+     return e ->Next;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp
new file mode 100644
index 0000000000..5be87bba30
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsopt.cpp
@@ -0,0 +1,1960 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+//----------------------------------------------------------------------------------
+
+// Optimization for 8 bits, Shaper-CLUT (3 inputs only)
+typedef struct {
+
+    cmsContext ContextID;
+
+    const cmsInterpParams* p;   // Tetrahedrical interpolation parameters. This is a not-owned pointer.
+
+    cmsUInt16Number rx[256], ry[256], rz[256];
+    cmsUInt32Number X0[256], Y0[256], Z0[256];  // Precomputed nodes and offsets for 8-bit input data
+
+
+} Prelin8Data;
+
+
+// Generic optimization for 16 bits Shaper-CLUT-Shaper (any inputs)
+typedef struct {
+
+    cmsContext ContextID;
+
+    // Number of channels
+    cmsUInt32Number nInputs;
+    cmsUInt32Number nOutputs;
+
+    _cmsInterpFn16 EvalCurveIn16[MAX_INPUT_DIMENSIONS];       // The maximum number of input channels is known in advance
+    cmsInterpParams*  ParamsCurveIn16[MAX_INPUT_DIMENSIONS];
+
+    _cmsInterpFn16 EvalCLUT;            // The evaluator for 3D grid
+    const cmsInterpParams* CLUTparams;  // (not-owned pointer)
+
+
+    _cmsInterpFn16* EvalCurveOut16;       // Points to an array of curve evaluators in 16 bits (not-owned pointer)
+    cmsInterpParams**  ParamsCurveOut16;  // Points to an array of references to interpolation params (not-owned pointer)
+
+
+} Prelin16Data;
+
+
+// Optimization for matrix-shaper in 8 bits. Numbers are operated in n.14 signed, tables are stored in 1.14 fixed
+
+typedef cmsInt32Number cmsS1Fixed14Number;   // Note that this may hold more than 16 bits!
+
+#define DOUBLE_TO_1FIXED14(x) ((cmsS1Fixed14Number) floor((x) * 16384.0 + 0.5))
+
+typedef struct {
+
+    cmsContext ContextID;
+
+    cmsS1Fixed14Number Shaper1R[256];  // from 0..255 to 1.14  (0.0...1.0)
+    cmsS1Fixed14Number Shaper1G[256];
+    cmsS1Fixed14Number Shaper1B[256];
+
+    cmsS1Fixed14Number Mat[3][3];     // n.14 to n.14 (needs a saturation after that)
+    cmsS1Fixed14Number Off[3];
+
+    cmsUInt16Number Shaper2R[16385];    // 1.14 to 0..255
+    cmsUInt16Number Shaper2G[16385];
+    cmsUInt16Number Shaper2B[16385];
+
+} MatShaper8Data;
+
+// Curves, optimization is shared between 8 and 16 bits
+typedef struct {
+
+    cmsContext ContextID;
+
+    cmsUInt32Number nCurves;      // Number of curves
+    cmsUInt32Number nElements;    // Elements in curves
+    cmsUInt16Number** Curves;     // Points to a dynamically  allocated array
+
+} Curves16Data;
+
+
+// Simple optimizations ----------------------------------------------------------------------------------------------------------
+
+
+// Remove an element in linked chain
+static
+void _RemoveElement(cmsStage** head)
+{
+    cmsStage* mpe = *head;
+    cmsStage* next = mpe ->Next;
+    *head = next;
+    cmsStageFree(mpe);
+}
+
+// Remove all identities in chain. Note that pt actually is a double pointer to the element that holds the pointer.
+static
+cmsBool _Remove1Op(cmsPipeline* Lut, cmsStageSignature UnaryOp)
+{
+    cmsStage** pt = &Lut ->Elements;
+    cmsBool AnyOpt = FALSE;
+
+    while (*pt != NULL) {
+
+        if ((*pt) ->Implements == UnaryOp) {
+            _RemoveElement(pt);
+            AnyOpt = TRUE;
+        }
+        else
+            pt = &((*pt) -> Next);
+    }
+
+    return AnyOpt;
+}
+
+// Same, but only if two adjacent elements are found
+static
+cmsBool _Remove2Op(cmsPipeline* Lut, cmsStageSignature Op1, cmsStageSignature Op2)
+{
+    cmsStage** pt1;
+    cmsStage** pt2;
+    cmsBool AnyOpt = FALSE;
+
+    pt1 = &Lut ->Elements;
+    if (*pt1 == NULL) return AnyOpt;
+
+    while (*pt1 != NULL) {
+
+        pt2 = &((*pt1) -> Next);
+        if (*pt2 == NULL) return AnyOpt;
+
+        if ((*pt1) ->Implements == Op1 && (*pt2) ->Implements == Op2) {
+            _RemoveElement(pt2);
+            _RemoveElement(pt1);
+            AnyOpt = TRUE;
+        }
+        else
+            pt1 = &((*pt1) -> Next);
+    }
+
+    return AnyOpt;
+}
+
+
+static
+cmsBool CloseEnoughFloat(cmsFloat64Number a, cmsFloat64Number b)
+{
+       return fabs(b - a) < 0.00001f;
+}
+
+static
+cmsBool  isFloatMatrixIdentity(const cmsMAT3* a)
+{
+       cmsMAT3 Identity;
+       int i, j;
+
+       _cmsMAT3identity(&Identity);
+
+       for (i = 0; i < 3; i++)
+              for (j = 0; j < 3; j++)
+                     if (!CloseEnoughFloat(a->v[i].n[j], Identity.v[i].n[j])) return FALSE;
+
+       return TRUE;
+}
+// if two adjacent matrices are found, multiply them. 
+static
+cmsBool _MultiplyMatrix(cmsPipeline* Lut)
+{
+       cmsStage** pt1;
+       cmsStage** pt2;
+       cmsStage*  chain;
+       cmsBool AnyOpt = FALSE;
+
+       pt1 = &Lut->Elements;
+       if (*pt1 == NULL) return AnyOpt;
+
+       while (*pt1 != NULL) {
+
+              pt2 = &((*pt1)->Next);
+              if (*pt2 == NULL) return AnyOpt;
+
+              if ((*pt1)->Implements == cmsSigMatrixElemType && (*pt2)->Implements == cmsSigMatrixElemType) {
+
+                     // Get both matrices
+                     _cmsStageMatrixData* m1 = (_cmsStageMatrixData*) cmsStageData(*pt1);
+                     _cmsStageMatrixData* m2 = (_cmsStageMatrixData*) cmsStageData(*pt2);
+                     cmsMAT3 res;
+                     
+                     // Input offset and output offset should be zero to use this optimization
+                     if (m1->Offset != NULL || m2 ->Offset != NULL || 
+                            cmsStageInputChannels(*pt1) != 3 || cmsStageOutputChannels(*pt1) != 3 ||                            
+                            cmsStageInputChannels(*pt2) != 3 || cmsStageOutputChannels(*pt2) != 3)
+                            return FALSE;
+
+                     // Multiply both matrices to get the result
+                     _cmsMAT3per(&res, (cmsMAT3*)m2->Double, (cmsMAT3*)m1->Double);
+
+                     // Get the next in chain after the matrices
+                     chain = (*pt2)->Next;
+
+                     // Remove both matrices
+                     _RemoveElement(pt2);
+                     _RemoveElement(pt1);
+
+                     // Now what if the result is a plain identity?                     
+                     if (!isFloatMatrixIdentity(&res)) {
+
+                            // We can not get rid of full matrix                            
+                            cmsStage* Multmat = cmsStageAllocMatrix(Lut->ContextID, 3, 3, (const cmsFloat64Number*) &res, NULL);
+                            if (Multmat == NULL) return FALSE;  // Should never happen
+
+                            // Recover the chain
+                            Multmat->Next = chain;
+                            *pt1 = Multmat;
+                     }
+
+                     AnyOpt = TRUE;
+              }
+              else
+                     pt1 = &((*pt1)->Next);
+       }
+
+       return AnyOpt;
+}
+
+
+// Preoptimize just gets rif of no-ops coming paired. Conversion from v2 to v4 followed
+// by a v4 to v2 and vice-versa. The elements are then discarded.
+static
+cmsBool PreOptimize(cmsPipeline* Lut)
+{
+    cmsBool AnyOpt = FALSE, Opt;
+
+    do {
+
+        Opt = FALSE;
+
+        // Remove all identities
+        Opt |= _Remove1Op(Lut, cmsSigIdentityElemType);
+
+        // Remove XYZ2Lab followed by Lab2XYZ
+        Opt |= _Remove2Op(Lut, cmsSigXYZ2LabElemType, cmsSigLab2XYZElemType);
+
+        // Remove Lab2XYZ followed by XYZ2Lab
+        Opt |= _Remove2Op(Lut, cmsSigLab2XYZElemType, cmsSigXYZ2LabElemType);
+
+        // Remove V4 to V2 followed by V2 to V4
+        Opt |= _Remove2Op(Lut, cmsSigLabV4toV2, cmsSigLabV2toV4);
+
+        // Remove V2 to V4 followed by V4 to V2
+        Opt |= _Remove2Op(Lut, cmsSigLabV2toV4, cmsSigLabV4toV2);
+
+        // Remove float pcs Lab conversions
+        Opt |= _Remove2Op(Lut, cmsSigLab2FloatPCS, cmsSigFloatPCS2Lab);
+
+        // Remove float pcs Lab conversions
+        Opt |= _Remove2Op(Lut, cmsSigXYZ2FloatPCS, cmsSigFloatPCS2XYZ);
+
+        // Simplify matrix. 
+        Opt |= _MultiplyMatrix(Lut);
+
+        if (Opt) AnyOpt = TRUE;
+
+    } while (Opt);
+
+    return AnyOpt;
+}
+
+static
+void Eval16nop1D(CMSREGISTER const cmsUInt16Number Input[],
+                 CMSREGISTER cmsUInt16Number Output[],
+                 CMSREGISTER const struct _cms_interp_struc* p)
+{
+    Output[0] = Input[0];
+
+    cmsUNUSED_PARAMETER(p);
+}
+
+static
+void PrelinEval16(CMSREGISTER const cmsUInt16Number Input[],
+                  CMSREGISTER cmsUInt16Number Output[],
+                  CMSREGISTER const void* D)
+{
+    Prelin16Data* p16 = (Prelin16Data*) D;
+    cmsUInt16Number  StageABC[MAX_INPUT_DIMENSIONS];
+    cmsUInt16Number  StageDEF[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+
+    for (i=0; i < p16 ->nInputs; i++) {
+
+        p16 ->EvalCurveIn16[i](&Input[i], &StageABC[i], p16 ->ParamsCurveIn16[i]);
+    }
+
+    p16 ->EvalCLUT(StageABC, StageDEF, p16 ->CLUTparams);
+
+    for (i=0; i < p16 ->nOutputs; i++) {
+
+        p16 ->EvalCurveOut16[i](&StageDEF[i], &Output[i], p16 ->ParamsCurveOut16[i]);
+    }
+}
+
+
+static
+void PrelinOpt16free(cmsContext ContextID, void* ptr)
+{
+    Prelin16Data* p16 = (Prelin16Data*) ptr;
+
+    _cmsFree(ContextID, p16 ->EvalCurveOut16);
+    _cmsFree(ContextID, p16 ->ParamsCurveOut16);
+
+    _cmsFree(ContextID, p16);
+}
+
+static
+void* Prelin16dup(cmsContext ContextID, const void* ptr)
+{
+    Prelin16Data* p16 = (Prelin16Data*) ptr;
+    Prelin16Data* Duped = (Prelin16Data*) _cmsDupMem(ContextID, p16, sizeof(Prelin16Data));
+
+    if (Duped == NULL) return NULL;
+
+    Duped->EvalCurveOut16 = (_cmsInterpFn16*) _cmsDupMem(ContextID, p16->EvalCurveOut16, p16->nOutputs * sizeof(_cmsInterpFn16));
+    Duped->ParamsCurveOut16 = (cmsInterpParams**)_cmsDupMem(ContextID, p16->ParamsCurveOut16, p16->nOutputs * sizeof(cmsInterpParams*));
+
+    return Duped;
+}
+
+
+static
+Prelin16Data* PrelinOpt16alloc(cmsContext ContextID,
+                               const cmsInterpParams* ColorMap,
+                               cmsUInt32Number nInputs, cmsToneCurve** In,
+                               cmsUInt32Number nOutputs, cmsToneCurve** Out )
+{
+    cmsUInt32Number i;
+    Prelin16Data* p16 = (Prelin16Data*)_cmsMallocZero(ContextID, sizeof(Prelin16Data));
+    if (p16 == NULL) return NULL;
+
+    p16 ->nInputs = nInputs;
+    p16 ->nOutputs = nOutputs;
+
+
+    for (i=0; i < nInputs; i++) {
+
+        if (In == NULL) {
+            p16 -> ParamsCurveIn16[i] = NULL;
+            p16 -> EvalCurveIn16[i] = Eval16nop1D;
+
+        }
+        else {
+            p16 -> ParamsCurveIn16[i] = In[i] ->InterpParams;
+            p16 -> EvalCurveIn16[i] = p16 ->ParamsCurveIn16[i]->Interpolation.Lerp16;
+        }
+    }
+
+    p16 ->CLUTparams = ColorMap;
+    p16 ->EvalCLUT   = ColorMap ->Interpolation.Lerp16;
+
+
+    p16 -> EvalCurveOut16 = (_cmsInterpFn16*) _cmsCalloc(ContextID, nOutputs, sizeof(_cmsInterpFn16));
+    p16 -> ParamsCurveOut16 = (cmsInterpParams**) _cmsCalloc(ContextID, nOutputs, sizeof(cmsInterpParams* ));
+
+    for (i=0; i < nOutputs; i++) {
+
+        if (Out == NULL) {
+            p16 ->ParamsCurveOut16[i] = NULL;
+            p16 -> EvalCurveOut16[i] = Eval16nop1D;
+        }
+        else {
+
+            p16 ->ParamsCurveOut16[i] = Out[i] ->InterpParams;
+            p16 -> EvalCurveOut16[i] = p16 ->ParamsCurveOut16[i]->Interpolation.Lerp16;
+        }
+    }
+
+    return p16;
+}
+
+
+
+// Resampling ---------------------------------------------------------------------------------
+
+#define PRELINEARIZATION_POINTS 4096
+
+// Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for
+// almost any transform. We use floating point precision and then convert from floating point to 16 bits.
+static
+cmsInt32Number XFormSampler16(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Cargo;
+    cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+
+    _cmsAssert(Lut -> InputChannels < cmsMAXCHANNELS);
+    _cmsAssert(Lut -> OutputChannels < cmsMAXCHANNELS);
+
+    // From 16 bit to floating point
+    for (i=0; i < Lut ->InputChannels; i++)
+        InFloat[i] = (cmsFloat32Number) (In[i] / 65535.0);
+
+    // Evaluate in floating point
+    cmsPipelineEvalFloat(InFloat, OutFloat, Lut);
+
+    // Back to 16 bits representation
+    for (i=0; i < Lut ->OutputChannels; i++)
+        Out[i] = _cmsQuickSaturateWord(OutFloat[i] * 65535.0);
+
+    // Always succeed
+    return TRUE;
+}
+
+// Try to see if the curves of a given MPE are linear
+static
+cmsBool AllCurvesAreLinear(cmsStage* mpe)
+{
+    cmsToneCurve** Curves;
+    cmsUInt32Number i, n;
+
+    Curves = _cmsStageGetPtrToCurveSet(mpe);
+    if (Curves == NULL) return FALSE;
+
+    n = cmsStageOutputChannels(mpe);
+
+    for (i=0; i < n; i++) {
+        if (!cmsIsToneCurveLinear(Curves[i])) return FALSE;
+    }
+
+    return TRUE;
+}
+
+// This function replaces a specific node placed in "At" by the "Value" numbers. Its purpose
+// is to fix scum dot on broken profiles/transforms. Works on 1, 3 and 4 channels
+static
+cmsBool  PatchLUT(cmsStage* CLUT, cmsUInt16Number At[], cmsUInt16Number Value[],
+                  cmsUInt32Number nChannelsOut, cmsUInt32Number nChannelsIn)
+{
+    _cmsStageCLutData* Grid = (_cmsStageCLutData*) CLUT ->Data;
+    cmsInterpParams* p16  = Grid ->Params;
+    cmsFloat64Number px, py, pz, pw;
+    int        x0, y0, z0, w0;
+    int        i, index;
+
+    if (CLUT -> Type != cmsSigCLutElemType) {
+        cmsSignalError(CLUT->ContextID, cmsERROR_INTERNAL, "(internal) Attempt to PatchLUT on non-lut stage");
+        return FALSE;
+    }
+
+    if (nChannelsIn == 4) {
+
+        px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0;
+        py = ((cmsFloat64Number) At[1] * (p16->Domain[1])) / 65535.0;
+        pz = ((cmsFloat64Number) At[2] * (p16->Domain[2])) / 65535.0;
+        pw = ((cmsFloat64Number) At[3] * (p16->Domain[3])) / 65535.0;
+
+        x0 = (int) floor(px);
+        y0 = (int) floor(py);
+        z0 = (int) floor(pz);
+        w0 = (int) floor(pw);
+
+        if (((px - x0) != 0) ||
+            ((py - y0) != 0) ||
+            ((pz - z0) != 0) ||
+            ((pw - w0) != 0)) return FALSE; // Not on exact node
+
+        index = (int) p16 -> opta[3] * x0 +
+                (int) p16 -> opta[2] * y0 +
+                (int) p16 -> opta[1] * z0 +
+                (int) p16 -> opta[0] * w0;
+    }
+    else
+        if (nChannelsIn == 3) {
+
+            px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0;
+            py = ((cmsFloat64Number) At[1] * (p16->Domain[1])) / 65535.0;
+            pz = ((cmsFloat64Number) At[2] * (p16->Domain[2])) / 65535.0;
+           
+            x0 = (int) floor(px);
+            y0 = (int) floor(py);
+            z0 = (int) floor(pz);
+           
+            if (((px - x0) != 0) ||
+                ((py - y0) != 0) ||
+                ((pz - z0) != 0)) return FALSE;  // Not on exact node
+
+            index = (int) p16 -> opta[2] * x0 +
+                    (int) p16 -> opta[1] * y0 +
+                    (int) p16 -> opta[0] * z0;
+        }
+        else
+            if (nChannelsIn == 1) {
+
+                px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0;
+                
+                x0 = (int) floor(px);
+                
+                if (((px - x0) != 0)) return FALSE; // Not on exact node
+
+                index = (int) p16 -> opta[0] * x0;
+            }
+            else {
+                cmsSignalError(CLUT->ContextID, cmsERROR_INTERNAL, "(internal) %d Channels are not supported on PatchLUT", nChannelsIn);
+                return FALSE;
+            }
+
+    for (i = 0; i < (int) nChannelsOut; i++)
+        Grid->Tab.T[index + i] = Value[i];
+
+    return TRUE;
+}
+
+// Auxiliary, to see if two values are equal or very different
+static
+cmsBool WhitesAreEqual(cmsUInt32Number n, cmsUInt16Number White1[], cmsUInt16Number White2[] )
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < n; i++) {
+
+        if (abs(White1[i] - White2[i]) > 0xf000) return TRUE;  // Values are so extremely different that the fixup should be avoided
+        if (White1[i] != White2[i]) return FALSE;
+    }
+    return TRUE;
+}
+
+
+// Locate the node for the white point and fix it to pure white in order to avoid scum dot.
+static
+cmsBool FixWhiteMisalignment(cmsPipeline* Lut, cmsColorSpaceSignature EntryColorSpace, cmsColorSpaceSignature ExitColorSpace)
+{
+    cmsUInt16Number *WhitePointIn, *WhitePointOut;
+    cmsUInt16Number  WhiteIn[cmsMAXCHANNELS], WhiteOut[cmsMAXCHANNELS], ObtainedOut[cmsMAXCHANNELS];
+    cmsUInt32Number i, nOuts, nIns;
+    cmsStage *PreLin = NULL, *CLUT = NULL, *PostLin = NULL;
+
+    if (!_cmsEndPointsBySpace(EntryColorSpace,
+        &WhitePointIn, NULL, &nIns)) return FALSE;
+
+    if (!_cmsEndPointsBySpace(ExitColorSpace,
+        &WhitePointOut, NULL, &nOuts)) return FALSE;
+
+    // It needs to be fixed?
+    if (Lut ->InputChannels != nIns) return FALSE;
+    if (Lut ->OutputChannels != nOuts) return FALSE;
+
+    cmsPipelineEval16(WhitePointIn, ObtainedOut, Lut);
+
+    if (WhitesAreEqual(nOuts, WhitePointOut, ObtainedOut)) return TRUE; // whites already match
+
+    // Check if the LUT comes as Prelin, CLUT or Postlin. We allow all combinations
+    if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &PreLin, &CLUT, &PostLin))
+        if (!cmsPipelineCheckAndRetreiveStages(Lut, 2, cmsSigCurveSetElemType, cmsSigCLutElemType, &PreLin, &CLUT))
+            if (!cmsPipelineCheckAndRetreiveStages(Lut, 2, cmsSigCLutElemType, cmsSigCurveSetElemType, &CLUT, &PostLin))
+                if (!cmsPipelineCheckAndRetreiveStages(Lut, 1, cmsSigCLutElemType, &CLUT))
+                    return FALSE;
+
+    // We need to interpolate white points of both, pre and post curves
+    if (PreLin) {
+
+        cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PreLin);
+
+        for (i=0; i < nIns; i++) {
+            WhiteIn[i] = cmsEvalToneCurve16(Curves[i], WhitePointIn[i]);
+        }
+    }
+    else {
+        for (i=0; i < nIns; i++)
+            WhiteIn[i] = WhitePointIn[i];
+    }
+
+    // If any post-linearization, we need to find how is represented white before the curve, do
+    // a reverse interpolation in this case.
+    if (PostLin) {
+
+        cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PostLin);
+
+        for (i=0; i < nOuts; i++) {
+
+            cmsToneCurve* InversePostLin = cmsReverseToneCurve(Curves[i]);
+            if (InversePostLin == NULL) {
+                WhiteOut[i] = WhitePointOut[i];    
+
+            } else {
+
+                WhiteOut[i] = cmsEvalToneCurve16(InversePostLin, WhitePointOut[i]);
+                cmsFreeToneCurve(InversePostLin);
+            }
+        }
+    }
+    else {
+        for (i=0; i < nOuts; i++)
+            WhiteOut[i] = WhitePointOut[i];
+    }
+
+    // Ok, proceed with patching. May fail and we don't care if it fails
+    PatchLUT(CLUT, WhiteIn, WhiteOut, nOuts, nIns);
+
+    return TRUE;
+}
+
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+// This function creates simple LUT from complex ones. The generated LUT has an optional set of
+// prelinearization curves, a CLUT of nGridPoints and optional postlinearization tables.
+// These curves have to exist in the original LUT in order to be used in the simplified output.
+// Caller may also use the flags to allow this feature.
+// LUTS with all curves will be simplified to a single curve. Parametric curves are lost.
+// This function should be used on 16-bits LUTS only, as floating point losses precision when simplified
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+
+static
+cmsBool OptimizeByResampling(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+    cmsPipeline* Src = NULL;
+    cmsPipeline* Dest = NULL;
+    cmsStage* mpe;
+    cmsStage* CLUT;
+    cmsStage *KeepPreLin = NULL, *KeepPostLin = NULL;
+    cmsUInt32Number nGridPoints;
+    cmsColorSpaceSignature ColorSpace, OutputColorSpace;
+    cmsStage *NewPreLin = NULL;
+    cmsStage *NewPostLin = NULL;
+    _cmsStageCLutData* DataCLUT;
+    cmsToneCurve** DataSetIn;
+    cmsToneCurve** DataSetOut;
+    Prelin16Data* p16;
+
+    // This is a lossy optimization! does not apply in floating-point cases
+    if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
+
+    ColorSpace       = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
+    OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
+
+    // Color space must be specified
+    if (ColorSpace == (cmsColorSpaceSignature)0 ||
+        OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
+
+    nGridPoints      = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
+
+    // For empty LUTs, 2 points are enough
+    if (cmsPipelineStageCount(*Lut) == 0)
+        nGridPoints = 2;
+
+    Src = *Lut;
+
+    // Named color pipelines cannot be optimized either
+    for (mpe = cmsPipelineGetPtrToFirstStage(Src);
+        mpe != NULL;
+        mpe = cmsStageNext(mpe)) {
+            if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
+    }
+
+    // Allocate an empty LUT
+    Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
+    if (!Dest) return FALSE;
+
+    // Prelinearization tables are kept unless indicated by flags
+    if (*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION) {
+
+        // Get a pointer to the prelinearization element
+        cmsStage* PreLin = cmsPipelineGetPtrToFirstStage(Src);
+
+        // Check if suitable
+        if (PreLin && PreLin ->Type == cmsSigCurveSetElemType) {
+
+            // Maybe this is a linear tram, so we can avoid the whole stuff
+            if (!AllCurvesAreLinear(PreLin)) {
+
+                // All seems ok, proceed.
+                NewPreLin = cmsStageDup(PreLin);
+                if(!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, NewPreLin))
+                    goto Error;
+
+                // Remove prelinearization. Since we have duplicated the curve
+                // in destination LUT, the sampling should be applied after this stage.
+                cmsPipelineUnlinkStage(Src, cmsAT_BEGIN, &KeepPreLin);
+            }
+        }
+    }
+
+    // Allocate the CLUT
+    CLUT = cmsStageAllocCLut16bit(Src ->ContextID, nGridPoints, Src ->InputChannels, Src->OutputChannels, NULL);
+    if (CLUT == NULL) goto Error;
+
+    // Add the CLUT to the destination LUT
+    if (!cmsPipelineInsertStage(Dest, cmsAT_END, CLUT)) {
+        goto Error;
+    }
+
+    // Postlinearization tables are kept unless indicated by flags
+    if (*dwFlags & cmsFLAGS_CLUT_POST_LINEARIZATION) {
+
+        // Get a pointer to the postlinearization if present
+        cmsStage* PostLin = cmsPipelineGetPtrToLastStage(Src);
+
+        // Check if suitable
+        if (PostLin && cmsStageType(PostLin) == cmsSigCurveSetElemType) {
+
+            // Maybe this is a linear tram, so we can avoid the whole stuff
+            if (!AllCurvesAreLinear(PostLin)) {
+
+                // All seems ok, proceed.
+                NewPostLin = cmsStageDup(PostLin);
+                if (!cmsPipelineInsertStage(Dest, cmsAT_END, NewPostLin))
+                    goto Error;
+
+                // In destination LUT, the sampling should be applied after this stage.
+                cmsPipelineUnlinkStage(Src, cmsAT_END, &KeepPostLin);
+            }
+        }
+    }
+
+    // Now its time to do the sampling. We have to ignore pre/post linearization
+    // The source LUT without pre/post curves is passed as parameter.
+    if (!cmsStageSampleCLut16bit(CLUT, XFormSampler16, (void*) Src, 0)) {
+Error:
+        // Ops, something went wrong, Restore stages
+        if (KeepPreLin != NULL) {
+            if (!cmsPipelineInsertStage(Src, cmsAT_BEGIN, KeepPreLin)) {
+                _cmsAssert(0); // This never happens
+            }
+        }
+        if (KeepPostLin != NULL) {
+            if (!cmsPipelineInsertStage(Src, cmsAT_END,   KeepPostLin)) {
+                _cmsAssert(0); // This never happens
+            }
+        }
+        cmsPipelineFree(Dest);
+        return FALSE;
+    }
+
+    // Done.
+
+    if (KeepPreLin != NULL) cmsStageFree(KeepPreLin);
+    if (KeepPostLin != NULL) cmsStageFree(KeepPostLin);
+    cmsPipelineFree(Src);
+
+    DataCLUT = (_cmsStageCLutData*) CLUT ->Data;
+
+    if (NewPreLin == NULL) DataSetIn = NULL;
+    else DataSetIn = ((_cmsStageToneCurvesData*) NewPreLin ->Data) ->TheCurves;
+
+    if (NewPostLin == NULL) DataSetOut = NULL;
+    else  DataSetOut = ((_cmsStageToneCurvesData*) NewPostLin ->Data) ->TheCurves;
+
+
+    if (DataSetIn == NULL && DataSetOut == NULL) {
+
+        _cmsPipelineSetOptimizationParameters(Dest, (_cmsOPTeval16Fn) DataCLUT->Params->Interpolation.Lerp16, DataCLUT->Params, NULL, NULL);
+    }
+    else {
+
+        p16 = PrelinOpt16alloc(Dest ->ContextID,
+            DataCLUT ->Params,
+            Dest ->InputChannels,
+            DataSetIn,
+            Dest ->OutputChannels,
+            DataSetOut);
+
+        _cmsPipelineSetOptimizationParameters(Dest, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
+    }
+
+
+    // Don't fix white on absolute colorimetric
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
+        *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP;
+
+    if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
+
+        FixWhiteMisalignment(Dest, ColorSpace, OutputColorSpace);
+    }
+
+    *Lut = Dest;
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(Intent);
+}
+
+
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+// Fixes the gamma balancing of transform. This is described in my paper "Prelinearization Stages on
+// Color-Management Application-Specific Integrated Circuits (ASICs)" presented at NIP24. It only works
+// for RGB transforms. See the paper for more details
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+
+
+// Normalize endpoints by slope limiting max and min. This assures endpoints as well.
+// Descending curves are handled as well.
+static
+void SlopeLimiting(cmsToneCurve* g)
+{
+    int BeginVal, EndVal;
+    int AtBegin = (int) floor((cmsFloat64Number) g ->nEntries * 0.02 + 0.5);   // Cutoff at 2%
+    int AtEnd   = (int) g ->nEntries - AtBegin - 1;                                  // And 98%
+    cmsFloat64Number Val, Slope, beta;
+    int i;
+
+    if (cmsIsToneCurveDescending(g)) {
+        BeginVal = 0xffff; EndVal = 0;
+    }
+    else {
+        BeginVal = 0; EndVal = 0xffff;
+    }
+
+    // Compute slope and offset for begin of curve
+    Val   = g ->Table16[AtBegin];
+    Slope = (Val - BeginVal) / AtBegin;
+    beta  = Val - Slope * AtBegin;
+
+    for (i=0; i < AtBegin; i++)
+        g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
+
+    // Compute slope and offset for the end
+    Val   = g ->Table16[AtEnd];
+    Slope = (EndVal - Val) / AtBegin;   // AtBegin holds the X interval, which is same in both cases
+    beta  = Val - Slope * AtEnd;
+
+    for (i = AtEnd; i < (int) g ->nEntries; i++)
+        g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
+}
+
+
+// Precomputes tables for 8-bit on input devicelink.
+static
+Prelin8Data* PrelinOpt8alloc(cmsContext ContextID, const cmsInterpParams* p, cmsToneCurve* G[3])
+{
+    int i;
+    cmsUInt16Number Input[3];
+    cmsS15Fixed16Number v1, v2, v3;
+    Prelin8Data* p8;
+
+    p8 = (Prelin8Data*)_cmsMallocZero(ContextID, sizeof(Prelin8Data));
+    if (p8 == NULL) return NULL;
+
+    // Since this only works for 8 bit input, values comes always as x * 257,
+    // we can safely take msb byte (x << 8 + x)
+
+    for (i=0; i < 256; i++) {
+
+        if (G != NULL) {
+
+            // Get 16-bit representation
+            Input[0] = cmsEvalToneCurve16(G[0], FROM_8_TO_16(i));
+            Input[1] = cmsEvalToneCurve16(G[1], FROM_8_TO_16(i));
+            Input[2] = cmsEvalToneCurve16(G[2], FROM_8_TO_16(i));
+        }
+        else {
+            Input[0] = FROM_8_TO_16(i);
+            Input[1] = FROM_8_TO_16(i);
+            Input[2] = FROM_8_TO_16(i);
+        }
+
+
+        // Move to 0..1.0 in fixed domain
+        v1 = _cmsToFixedDomain((int) (Input[0] * p -> Domain[0]));
+        v2 = _cmsToFixedDomain((int) (Input[1] * p -> Domain[1]));
+        v3 = _cmsToFixedDomain((int) (Input[2] * p -> Domain[2]));
+
+        // Store the precalculated table of nodes
+        p8 ->X0[i] = (p->opta[2] * FIXED_TO_INT(v1));
+        p8 ->Y0[i] = (p->opta[1] * FIXED_TO_INT(v2));
+        p8 ->Z0[i] = (p->opta[0] * FIXED_TO_INT(v3));
+
+        // Store the precalculated table of offsets
+        p8 ->rx[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v1);
+        p8 ->ry[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v2);
+        p8 ->rz[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v3);
+    }
+
+    p8 ->ContextID = ContextID;
+    p8 ->p = p;
+
+    return p8;
+}
+
+static
+void Prelin8free(cmsContext ContextID, void* ptr)
+{
+    _cmsFree(ContextID, ptr);
+}
+
+static
+void* Prelin8dup(cmsContext ContextID, const void* ptr)
+{
+    return _cmsDupMem(ContextID, ptr, sizeof(Prelin8Data));
+}
+
+
+
+// A optimized interpolation for 8-bit input.
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+static CMS_NO_SANITIZE
+void PrelinEval8(CMSREGISTER const cmsUInt16Number Input[],
+                  CMSREGISTER cmsUInt16Number Output[],
+                  CMSREGISTER const void* D)
+{
+
+    cmsUInt8Number         r, g, b;
+    cmsS15Fixed16Number    rx, ry, rz;
+    cmsS15Fixed16Number    c0, c1, c2, c3, Rest;
+    int                    OutChan;
+    CMSREGISTER cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
+    Prelin8Data* p8 = (Prelin8Data*) D;
+    CMSREGISTER const cmsInterpParams* p = p8 ->p;
+    int                    TotalOut = (int) p -> nOutputs;
+    const cmsUInt16Number* LutTable = (const cmsUInt16Number*) p->Table;
+
+    r = (cmsUInt8Number) (Input[0] >> 8);
+    g = (cmsUInt8Number) (Input[1] >> 8);
+    b = (cmsUInt8Number) (Input[2] >> 8);
+
+    X0 = X1 = (cmsS15Fixed16Number) p8->X0[r];
+    Y0 = Y1 = (cmsS15Fixed16Number) p8->Y0[g];
+    Z0 = Z1 = (cmsS15Fixed16Number) p8->Z0[b];
+
+    rx = p8 ->rx[r];
+    ry = p8 ->ry[g];
+    rz = p8 ->rz[b];
+
+    X1 = X0 + (cmsS15Fixed16Number)((rx == 0) ? 0 :  p ->opta[2]);
+    Y1 = Y0 + (cmsS15Fixed16Number)((ry == 0) ? 0 :  p ->opta[1]);
+    Z1 = Z0 + (cmsS15Fixed16Number)((rz == 0) ? 0 :  p ->opta[0]);
+
+
+    // These are the 6 Tetrahedral
+    for (OutChan=0; OutChan < TotalOut; OutChan++) {
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz)
+        {
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+        }
+        else
+            if (rx >= rz && rz >= ry)
+            {
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+            }
+            else
+                if (rz >= rx && rx >= ry)
+                {
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+                }
+                else
+                    if (ry >= rx && rx >= rz)
+                    {
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+                    }
+                    else
+                        if (ry >= rz && rz >= rx)
+                        {
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+                        }
+                        else
+                            if (rz >= ry && ry >= rx)
+                            {
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+                            }
+                            else  {
+                                c1 = c2 = c3 = 0;
+                            }
+
+        Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+        Output[OutChan] = (cmsUInt16Number) (c0 + ((Rest + (Rest >> 16)) >> 16));
+
+    }
+}
+
+#undef DENS
+
+
+// Curves that contain wide empty areas are not optimizeable
+static
+cmsBool IsDegenerated(const cmsToneCurve* g)
+{
+    cmsUInt32Number i, Zeros = 0, Poles = 0;
+    cmsUInt32Number nEntries = g ->nEntries;
+
+    for (i=0; i < nEntries; i++) {
+
+        if (g ->Table16[i] == 0x0000) Zeros++;
+        if (g ->Table16[i] == 0xffff) Poles++;
+    }
+
+    if (Zeros == 1 && Poles == 1) return FALSE;  // For linear tables
+    if (Zeros > (nEntries / 20)) return TRUE;  // Degenerated, many zeros
+    if (Poles > (nEntries / 20)) return TRUE;  // Degenerated, many poles
+
+    return FALSE;
+}
+
+// --------------------------------------------------------------------------------------------------------------
+// We need xput over here
+
+static
+cmsBool OptimizeByComputingLinearization(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+    cmsPipeline* OriginalLut;
+    cmsUInt32Number nGridPoints;
+    cmsToneCurve *Trans[cmsMAXCHANNELS], *TransReverse[cmsMAXCHANNELS];
+    cmsUInt32Number t, i;
+    cmsFloat32Number v, In[cmsMAXCHANNELS], Out[cmsMAXCHANNELS];
+    cmsBool lIsSuitable, lIsLinear;
+    cmsPipeline* OptimizedLUT = NULL, *LutPlusCurves = NULL;
+    cmsStage* OptimizedCLUTmpe;
+    cmsColorSpaceSignature ColorSpace, OutputColorSpace;
+    cmsStage* OptimizedPrelinMpe;
+    cmsStage* mpe;
+    cmsToneCurve** OptimizedPrelinCurves;
+    _cmsStageCLutData* OptimizedPrelinCLUT;
+
+
+    // This is a lossy optimization! does not apply in floating-point cases
+    if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
+
+    // Only on chunky RGB
+    if (T_COLORSPACE(*InputFormat)  != PT_RGB) return FALSE;
+    if (T_PLANAR(*InputFormat)) return FALSE;
+
+    if (T_COLORSPACE(*OutputFormat) != PT_RGB) return FALSE;
+    if (T_PLANAR(*OutputFormat)) return FALSE;
+
+    // On 16 bits, user has to specify the feature
+    if (!_cmsFormatterIs8bit(*InputFormat)) {
+        if (!(*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION)) return FALSE;
+    }
+
+    OriginalLut = *Lut;
+
+   // Named color pipelines cannot be optimized either
+   for (mpe = cmsPipelineGetPtrToFirstStage(OriginalLut);
+         mpe != NULL;
+         mpe = cmsStageNext(mpe)) {
+            if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
+    }
+
+    ColorSpace       = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
+    OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
+
+    // Color space must be specified
+    if (ColorSpace == (cmsColorSpaceSignature)0 ||
+        OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
+
+    nGridPoints      = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
+
+    // Empty gamma containers
+    memset(Trans, 0, sizeof(Trans));
+    memset(TransReverse, 0, sizeof(TransReverse));
+
+    // If the last stage of the original lut are curves, and those curves are
+    // degenerated, it is likely the transform is squeezing and clipping
+    // the output from previous CLUT. We cannot optimize this case     
+    {
+        cmsStage* last = cmsPipelineGetPtrToLastStage(OriginalLut);
+
+        if (cmsStageType(last) == cmsSigCurveSetElemType) {
+
+            _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*)cmsStageData(last);
+            for (i = 0; i < Data->nCurves; i++) {
+                if (IsDegenerated(Data->TheCurves[i]))
+                    goto Error;
+            }
+        }
+    }
+
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+        Trans[t] = cmsBuildTabulatedToneCurve16(OriginalLut ->ContextID, PRELINEARIZATION_POINTS, NULL);
+        if (Trans[t] == NULL) goto Error;
+    }
+
+    // Populate the curves
+    for (i=0; i < PRELINEARIZATION_POINTS; i++) {
+
+        v = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1));
+
+        // Feed input with a gray ramp
+        for (t=0; t < OriginalLut ->InputChannels; t++)
+            In[t] = v;
+
+        // Evaluate the gray value
+        cmsPipelineEvalFloat(In, Out, OriginalLut);
+
+        // Store result in curve
+        for (t=0; t < OriginalLut ->InputChannels; t++)
+            Trans[t] ->Table16[i] = _cmsQuickSaturateWord(Out[t] * 65535.0);
+    }
+
+    // Slope-limit the obtained curves
+    for (t = 0; t < OriginalLut ->InputChannels; t++)
+        SlopeLimiting(Trans[t]);
+
+    // Check for validity
+    lIsSuitable = TRUE;
+    lIsLinear   = TRUE;
+    for (t=0; (lIsSuitable && (t < OriginalLut ->InputChannels)); t++) {
+
+        // Exclude if already linear
+        if (!cmsIsToneCurveLinear(Trans[t]))
+            lIsLinear = FALSE;
+
+        // Exclude if non-monotonic
+        if (!cmsIsToneCurveMonotonic(Trans[t]))
+            lIsSuitable = FALSE;
+
+        if (IsDegenerated(Trans[t]))
+            lIsSuitable = FALSE;
+    }
+
+    // If it is not suitable, just quit
+    if (!lIsSuitable) goto Error;
+
+    // Invert curves if possible
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+        TransReverse[t] = cmsReverseToneCurveEx(PRELINEARIZATION_POINTS, Trans[t]);
+        if (TransReverse[t] == NULL) goto Error;
+    }
+
+    // Now inset the reversed curves at the begin of transform
+    LutPlusCurves = cmsPipelineDup(OriginalLut);
+    if (LutPlusCurves == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LutPlusCurves, cmsAT_BEGIN, cmsStageAllocToneCurves(OriginalLut ->ContextID, OriginalLut ->InputChannels, TransReverse)))
+        goto Error;
+
+    // Create the result LUT
+    OptimizedLUT = cmsPipelineAlloc(OriginalLut ->ContextID, OriginalLut ->InputChannels, OriginalLut ->OutputChannels);
+    if (OptimizedLUT == NULL) goto Error;
+
+    OptimizedPrelinMpe = cmsStageAllocToneCurves(OriginalLut ->ContextID, OriginalLut ->InputChannels, Trans);
+
+    // Create and insert the curves at the beginning
+    if (!cmsPipelineInsertStage(OptimizedLUT, cmsAT_BEGIN, OptimizedPrelinMpe))
+        goto Error;
+
+    // Allocate the CLUT for result
+    OptimizedCLUTmpe = cmsStageAllocCLut16bit(OriginalLut ->ContextID, nGridPoints, OriginalLut ->InputChannels, OriginalLut ->OutputChannels, NULL);
+
+    // Add the CLUT to the destination LUT
+    if (!cmsPipelineInsertStage(OptimizedLUT, cmsAT_END, OptimizedCLUTmpe))
+        goto Error;
+
+    // Resample the LUT
+    if (!cmsStageSampleCLut16bit(OptimizedCLUTmpe, XFormSampler16, (void*) LutPlusCurves, 0)) goto Error;
+
+    // Free resources
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+
+        if (Trans[t]) cmsFreeToneCurve(Trans[t]);
+        if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
+    }
+
+    cmsPipelineFree(LutPlusCurves);
+
+
+    OptimizedPrelinCurves = _cmsStageGetPtrToCurveSet(OptimizedPrelinMpe);
+    OptimizedPrelinCLUT   = (_cmsStageCLutData*) OptimizedCLUTmpe ->Data;
+
+    // Set the evaluator if 8-bit
+    if (_cmsFormatterIs8bit(*InputFormat)) {
+
+        Prelin8Data* p8 = PrelinOpt8alloc(OptimizedLUT ->ContextID,
+                                                OptimizedPrelinCLUT ->Params,
+                                                OptimizedPrelinCurves);
+        if (p8 == NULL) return FALSE;
+
+        _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval8, (void*) p8, Prelin8free, Prelin8dup);
+
+    }
+    else
+    {
+        Prelin16Data* p16 = PrelinOpt16alloc(OptimizedLUT ->ContextID,
+            OptimizedPrelinCLUT ->Params,
+            3, OptimizedPrelinCurves, 3, NULL);
+        if (p16 == NULL) return FALSE;
+
+        _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
+
+    }
+
+    // Don't fix white on absolute colorimetric
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
+        *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP;
+
+    if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
+
+        if (!FixWhiteMisalignment(OptimizedLUT, ColorSpace, OutputColorSpace)) {
+
+            return FALSE;
+        }
+    }
+
+    // And return the obtained LUT
+
+    cmsPipelineFree(OriginalLut);
+    *Lut = OptimizedLUT;
+    return TRUE;
+
+Error:
+
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+
+        if (Trans[t]) cmsFreeToneCurve(Trans[t]);
+        if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
+    }
+
+    if (LutPlusCurves != NULL) cmsPipelineFree(LutPlusCurves);
+    if (OptimizedLUT != NULL) cmsPipelineFree(OptimizedLUT);
+
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(Intent);
+    cmsUNUSED_PARAMETER(lIsLinear);
+}
+
+
+// Curves optimizer ------------------------------------------------------------------------------------------------------------------
+
+static
+void CurvesFree(cmsContext ContextID, void* ptr)
+{
+     Curves16Data* Data = (Curves16Data*) ptr;
+     cmsUInt32Number i;
+
+     for (i=0; i < Data -> nCurves; i++) {
+
+         _cmsFree(ContextID, Data ->Curves[i]);
+     }
+
+     _cmsFree(ContextID, Data ->Curves);
+     _cmsFree(ContextID, ptr);
+}
+
+static
+void* CurvesDup(cmsContext ContextID, const void* ptr)
+{
+    Curves16Data* Data = (Curves16Data*)_cmsDupMem(ContextID, ptr, sizeof(Curves16Data));
+    cmsUInt32Number i;
+
+    if (Data == NULL) return NULL;
+
+    Data->Curves = (cmsUInt16Number**) _cmsDupMem(ContextID, Data->Curves, Data->nCurves * sizeof(cmsUInt16Number*));
+
+    for (i=0; i < Data -> nCurves; i++) {
+        Data->Curves[i] = (cmsUInt16Number*) _cmsDupMem(ContextID, Data->Curves[i], Data->nElements * sizeof(cmsUInt16Number));
+    }
+
+    return (void*) Data;
+}
+
+// Precomputes tables for 8-bit on input devicelink.
+static
+Curves16Data* CurvesAlloc(cmsContext ContextID, cmsUInt32Number nCurves, cmsUInt32Number nElements, cmsToneCurve** G)
+{
+    cmsUInt32Number i, j;
+    Curves16Data* c16;
+
+    c16 = (Curves16Data*)_cmsMallocZero(ContextID, sizeof(Curves16Data));
+    if (c16 == NULL) return NULL;
+
+    c16 ->nCurves = nCurves;
+    c16 ->nElements = nElements;
+
+    c16->Curves = (cmsUInt16Number**) _cmsCalloc(ContextID, nCurves, sizeof(cmsUInt16Number*));
+    if (c16->Curves == NULL) {
+        _cmsFree(ContextID, c16);
+        return NULL;
+    }
+
+    for (i=0; i < nCurves; i++) {
+
+        c16->Curves[i] = (cmsUInt16Number*) _cmsCalloc(ContextID, nElements, sizeof(cmsUInt16Number));
+
+        if (c16->Curves[i] == NULL) {
+
+            for (j=0; j < i; j++) {
+                _cmsFree(ContextID, c16->Curves[j]);
+            }
+            _cmsFree(ContextID, c16->Curves);
+            _cmsFree(ContextID, c16);
+            return NULL;
+        }
+
+        if (nElements == 256U) {
+
+            for (j=0; j < nElements; j++) {
+
+                c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], FROM_8_TO_16(j));
+            }
+        }
+        else {
+
+            for (j=0; j < nElements; j++) {
+                c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], (cmsUInt16Number) j);
+            }
+        }
+    }
+
+    return c16;
+}
+
+static
+void FastEvaluateCurves8(CMSREGISTER const cmsUInt16Number In[],
+                          CMSREGISTER cmsUInt16Number Out[],
+                          CMSREGISTER const void* D)
+{
+    Curves16Data* Data = (Curves16Data*) D;
+    int x;
+    cmsUInt32Number i;
+
+    for (i=0; i < Data ->nCurves; i++) {
+
+         x = (In[i] >> 8);
+         Out[i] = Data -> Curves[i][x];
+    }
+}
+
+
+static
+void FastEvaluateCurves16(CMSREGISTER const cmsUInt16Number In[],
+                          CMSREGISTER cmsUInt16Number Out[],
+                          CMSREGISTER const void* D)
+{
+    Curves16Data* Data = (Curves16Data*) D;
+    cmsUInt32Number i;
+
+    for (i=0; i < Data ->nCurves; i++) {
+         Out[i] = Data -> Curves[i][In[i]];
+    }
+}
+
+
+static
+void FastIdentity16(CMSREGISTER const cmsUInt16Number In[],
+                    CMSREGISTER cmsUInt16Number Out[],
+                    CMSREGISTER const void* D)
+{
+    cmsPipeline* Lut = (cmsPipeline*) D;
+    cmsUInt32Number i;
+
+    for (i=0; i < Lut ->InputChannels; i++) {
+         Out[i] = In[i];
+    }
+}
+
+
+// If the target LUT holds only curves, the optimization procedure is to join all those
+// curves together. That only works on curves and does not work on matrices.
+static
+cmsBool OptimizeByJoiningCurves(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+    cmsToneCurve** GammaTables = NULL;
+    cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
+    cmsUInt32Number i, j;
+    cmsPipeline* Src = *Lut;
+    cmsPipeline* Dest = NULL;
+    cmsStage* mpe;
+    cmsStage* ObtainedCurves = NULL;
+
+
+    // This is a lossy optimization! does not apply in floating-point cases
+    if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
+
+    //  Only curves in this LUT?
+    for (mpe = cmsPipelineGetPtrToFirstStage(Src);
+         mpe != NULL;
+         mpe = cmsStageNext(mpe)) {
+            if (cmsStageType(mpe) != cmsSigCurveSetElemType) return FALSE;
+    }
+
+    // Allocate an empty LUT
+    Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
+    if (Dest == NULL) return FALSE;
+
+    // Create target curves
+    GammaTables = (cmsToneCurve**) _cmsCalloc(Src ->ContextID, Src ->InputChannels, sizeof(cmsToneCurve*));
+    if (GammaTables == NULL) goto Error;
+
+    for (i=0; i < Src ->InputChannels; i++) {
+        GammaTables[i] = cmsBuildTabulatedToneCurve16(Src ->ContextID, PRELINEARIZATION_POINTS, NULL);
+        if (GammaTables[i] == NULL) goto Error;
+    }
+
+    // Compute 16 bit result by using floating point
+    for (i=0; i < PRELINEARIZATION_POINTS; i++) {
+
+        for (j=0; j < Src ->InputChannels; j++)
+            InFloat[j] = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1));
+
+        cmsPipelineEvalFloat(InFloat, OutFloat, Src);
+
+        for (j=0; j < Src ->InputChannels; j++)
+            GammaTables[j] -> Table16[i] = _cmsQuickSaturateWord(OutFloat[j] * 65535.0);
+    }
+
+    ObtainedCurves = cmsStageAllocToneCurves(Src ->ContextID, Src ->InputChannels, GammaTables);
+    if (ObtainedCurves == NULL) goto Error;
+
+    for (i=0; i < Src ->InputChannels; i++) {
+        cmsFreeToneCurve(GammaTables[i]);
+        GammaTables[i] = NULL;
+    }
+
+    if (GammaTables != NULL) {
+        _cmsFree(Src->ContextID, GammaTables);
+        GammaTables = NULL;
+    }
+
+    // Maybe the curves are linear at the end
+    if (!AllCurvesAreLinear(ObtainedCurves)) {
+       _cmsStageToneCurvesData* Data;
+
+        if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, ObtainedCurves))
+            goto Error;
+        Data = (_cmsStageToneCurvesData*) cmsStageData(ObtainedCurves);
+        ObtainedCurves = NULL;
+
+        // If the curves are to be applied in 8 bits, we can save memory
+        if (_cmsFormatterIs8bit(*InputFormat)) {
+             Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 256, Data ->TheCurves);
+
+             if (c16 == NULL) goto Error;
+             *dwFlags |= cmsFLAGS_NOCACHE;
+            _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves8, c16, CurvesFree, CurvesDup);
+
+        }
+        else {
+             Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 65536, Data ->TheCurves);
+
+             if (c16 == NULL) goto Error;
+             *dwFlags |= cmsFLAGS_NOCACHE;
+            _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves16, c16, CurvesFree, CurvesDup);
+        }
+    }
+    else {
+
+        // LUT optimizes to nothing. Set the identity LUT
+        cmsStageFree(ObtainedCurves);
+        ObtainedCurves = NULL;
+
+        if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageAllocIdentity(Dest ->ContextID, Src ->InputChannels)))
+            goto Error;
+
+        *dwFlags |= cmsFLAGS_NOCACHE;
+        _cmsPipelineSetOptimizationParameters(Dest, FastIdentity16, (void*) Dest, NULL, NULL);
+    }
+
+    // We are done.
+    cmsPipelineFree(Src);
+    *Lut = Dest;
+    return TRUE;
+
+Error:
+
+    if (ObtainedCurves != NULL) cmsStageFree(ObtainedCurves);
+    if (GammaTables != NULL) {
+        for (i=0; i < Src ->InputChannels; i++) {
+            if (GammaTables[i] != NULL) cmsFreeToneCurve(GammaTables[i]);
+        }
+
+        _cmsFree(Src ->ContextID, GammaTables);
+    }
+
+    if (Dest != NULL) cmsPipelineFree(Dest);
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(Intent);
+    cmsUNUSED_PARAMETER(InputFormat);
+    cmsUNUSED_PARAMETER(OutputFormat);
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+// -------------------------------------------------------------------------------------------------------------------------------------
+// LUT is Shaper - Matrix - Matrix - Shaper, which is very frequent when combining two matrix-shaper profiles
+
+
+static
+void  FreeMatShaper(cmsContext ContextID, void* Data)
+{
+    if (Data != NULL) _cmsFree(ContextID, Data);
+}
+
+static
+void* DupMatShaper(cmsContext ContextID, const void* Data)
+{
+    return _cmsDupMem(ContextID, Data, sizeof(MatShaper8Data));
+}
+
+
+// A fast matrix-shaper evaluator for 8 bits. This is a bit ticky since I'm using 1.14 signed fixed point
+// to accomplish some performance. Actually it takes 256x3 16 bits tables and 16385 x 3 tables of 8 bits,
+// in total about 50K, and the performance boost is huge!
+static
+void MatShaperEval16(CMSREGISTER const cmsUInt16Number In[],
+                     CMSREGISTER cmsUInt16Number Out[],
+                     CMSREGISTER const void* D)
+{
+    MatShaper8Data* p = (MatShaper8Data*) D;
+    cmsS1Fixed14Number l1, l2, l3, r, g, b;
+    cmsUInt32Number ri, gi, bi;
+
+    // In this case (and only in this case!) we can use this simplification since
+    // In[] is assured to come from a 8 bit number. (a << 8 | a)
+    ri = In[0] & 0xFFU;
+    gi = In[1] & 0xFFU;
+    bi = In[2] & 0xFFU;
+
+    // Across first shaper, which also converts to 1.14 fixed point
+    r = p->Shaper1R[ri];
+    g = p->Shaper1G[gi];
+    b = p->Shaper1B[bi];
+
+    // Evaluate the matrix in 1.14 fixed point
+    l1 =  (p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b + p->Off[0] + 0x2000) >> 14;
+    l2 =  (p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b + p->Off[1] + 0x2000) >> 14;
+    l3 =  (p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b + p->Off[2] + 0x2000) >> 14;
+
+    // Now we have to clip to 0..1.0 range
+    ri = (l1 < 0) ? 0 : ((l1 > 16384) ? 16384U : (cmsUInt32Number) l1);
+    gi = (l2 < 0) ? 0 : ((l2 > 16384) ? 16384U : (cmsUInt32Number) l2);
+    bi = (l3 < 0) ? 0 : ((l3 > 16384) ? 16384U : (cmsUInt32Number) l3);
+
+    // And across second shaper,
+    Out[0] = p->Shaper2R[ri];
+    Out[1] = p->Shaper2G[gi];
+    Out[2] = p->Shaper2B[bi];
+
+}
+
+// This table converts from 8 bits to 1.14 after applying the curve
+static
+void FillFirstShaper(cmsS1Fixed14Number* Table, cmsToneCurve* Curve)
+{
+    int i;
+    cmsFloat32Number R, y;
+
+    for (i=0; i < 256; i++) {
+
+        R   = (cmsFloat32Number) (i / 255.0);
+        y   = cmsEvalToneCurveFloat(Curve, R);
+
+        if (y < 131072.0)
+            Table[i] = DOUBLE_TO_1FIXED14(y);
+        else
+            Table[i] = 0x7fffffff;
+    }
+}
+
+// This table converts form 1.14 (being 0x4000 the last entry) to 8 bits after applying the curve
+static
+void FillSecondShaper(cmsUInt16Number* Table, cmsToneCurve* Curve, cmsBool Is8BitsOutput)
+{
+    int i;
+    cmsFloat32Number R, Val;
+
+    for (i=0; i < 16385; i++) {
+
+        R   = (cmsFloat32Number) (i / 16384.0);
+        Val = cmsEvalToneCurveFloat(Curve, R);    // Val comes 0..1.0
+
+        if (Val < 0)
+            Val = 0;
+
+        if (Val > 1.0)
+            Val = 1.0;
+
+        if (Is8BitsOutput) {
+
+            // If 8 bits output, we can optimize further by computing the / 257 part.
+            // first we compute the resulting byte and then we store the byte times
+            // 257. This quantization allows to round very quick by doing a >> 8, but
+            // since the low byte is always equal to msb, we can do a & 0xff and this works!
+            cmsUInt16Number w = _cmsQuickSaturateWord(Val * 65535.0);
+            cmsUInt8Number  b = FROM_16_TO_8(w);
+
+            Table[i] = FROM_8_TO_16(b);
+        }
+        else Table[i]  = _cmsQuickSaturateWord(Val * 65535.0);
+    }
+}
+
+// Compute the matrix-shaper structure
+static
+cmsBool SetMatShaper(cmsPipeline* Dest, cmsToneCurve* Curve1[3], cmsMAT3* Mat, cmsVEC3* Off, cmsToneCurve* Curve2[3], cmsUInt32Number* OutputFormat)
+{
+    MatShaper8Data* p;
+    int i, j;
+    cmsBool Is8Bits = _cmsFormatterIs8bit(*OutputFormat);
+
+    // Allocate a big chuck of memory to store precomputed tables
+    p = (MatShaper8Data*) _cmsMalloc(Dest ->ContextID, sizeof(MatShaper8Data));
+    if (p == NULL) return FALSE;
+
+    p -> ContextID = Dest -> ContextID;
+
+    // Precompute tables
+    FillFirstShaper(p ->Shaper1R, Curve1[0]);
+    FillFirstShaper(p ->Shaper1G, Curve1[1]);
+    FillFirstShaper(p ->Shaper1B, Curve1[2]);
+
+    FillSecondShaper(p ->Shaper2R, Curve2[0], Is8Bits);
+    FillSecondShaper(p ->Shaper2G, Curve2[1], Is8Bits);
+    FillSecondShaper(p ->Shaper2B, Curve2[2], Is8Bits);
+
+    // Convert matrix to nFixed14. Note that those values may take more than 16 bits 
+    for (i=0; i < 3; i++) {
+        for (j=0; j < 3; j++) {
+            p ->Mat[i][j] = DOUBLE_TO_1FIXED14(Mat->v[i].n[j]);
+        }
+    }
+
+    for (i=0; i < 3; i++) {
+
+        if (Off == NULL) {
+            p ->Off[i] = 0;
+        }
+        else {
+            p ->Off[i] = DOUBLE_TO_1FIXED14(Off->n[i]);
+        }
+    }
+
+    // Mark as optimized for faster formatter
+    if (Is8Bits)
+        *OutputFormat |= OPTIMIZED_SH(1);
+
+    // Fill function pointers
+    _cmsPipelineSetOptimizationParameters(Dest, MatShaperEval16, (void*) p, FreeMatShaper, DupMatShaper);
+    return TRUE;
+}
+
+//  8 bits on input allows matrix-shaper boot up to 25 Mpixels per second on RGB. That's fast!
+static
+cmsBool OptimizeMatrixShaper(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+       cmsStage* Curve1, *Curve2;
+       cmsStage* Matrix1, *Matrix2;
+       cmsMAT3 res;
+       cmsBool IdentityMat;
+       cmsPipeline* Dest, *Src;
+       cmsFloat64Number* Offset;
+
+       // Only works on RGB to RGB
+       if (T_CHANNELS(*InputFormat) != 3 || T_CHANNELS(*OutputFormat) != 3) return FALSE;
+
+       // Only works on 8 bit input
+       if (!_cmsFormatterIs8bit(*InputFormat)) return FALSE;
+
+       // Seems suitable, proceed
+       Src = *Lut;
+
+       // Check for:
+       // 
+       //    shaper-matrix-matrix-shaper 
+       //    shaper-matrix-shaper
+       // 
+       // Both of those constructs are possible (first because abs. colorimetric). 
+       // additionally, In the first case, the input matrix offset should be zero.
+
+       IdentityMat = FALSE;
+       if (cmsPipelineCheckAndRetreiveStages(Src, 4,
+              cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+              &Curve1, &Matrix1, &Matrix2, &Curve2)) {
+
+              // Get both matrices
+              _cmsStageMatrixData* Data1 = (_cmsStageMatrixData*)cmsStageData(Matrix1);
+              _cmsStageMatrixData* Data2 = (_cmsStageMatrixData*)cmsStageData(Matrix2);
+
+              // Input offset should be zero
+              if (Data1->Offset != NULL) return FALSE;
+
+              // Multiply both matrices to get the result
+              _cmsMAT3per(&res, (cmsMAT3*)Data2->Double, (cmsMAT3*)Data1->Double);
+
+              // Only 2nd matrix has offset, or it is zero 
+              Offset = Data2->Offset;
+
+              // Now the result is in res + Data2 -> Offset. Maybe is a plain identity?
+              if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
+
+                     // We can get rid of full matrix
+                     IdentityMat = TRUE;
+              }
+
+       }
+       else {
+
+              if (cmsPipelineCheckAndRetreiveStages(Src, 3,
+                     cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+                     &Curve1, &Matrix1, &Curve2)) {
+
+                     _cmsStageMatrixData* Data = (_cmsStageMatrixData*)cmsStageData(Matrix1);
+
+                     // Copy the matrix to our result
+                     memcpy(&res, Data->Double, sizeof(res));
+
+                     // Preserve the Odffset (may be NULL as a zero offset)
+                     Offset = Data->Offset;
+
+                     if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
+
+                            // We can get rid of full matrix
+                            IdentityMat = TRUE;
+                     }
+              }
+              else
+                     return FALSE; // Not optimizeable this time
+
+       }
+
+      // Allocate an empty LUT
+    Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
+    if (!Dest) return FALSE;
+
+    // Assamble the new LUT
+    if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageDup(Curve1)))
+        goto Error;
+
+    if (!IdentityMat) {
+
+           if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageAllocMatrix(Dest->ContextID, 3, 3, (const cmsFloat64Number*)&res, Offset)))
+                  goto Error;
+    }
+
+    if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2)))
+        goto Error;
+
+    // If identity on matrix, we can further optimize the curves, so call the join curves routine
+    if (IdentityMat) {
+
+        OptimizeByJoiningCurves(&Dest, Intent, InputFormat, OutputFormat, dwFlags);
+    }
+    else {
+        _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
+        _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
+
+        // In this particular optimization, cache does not help as it takes more time to deal with
+        // the cache that with the pixel handling
+        *dwFlags |= cmsFLAGS_NOCACHE;
+
+        // Setup the optimizarion routines
+        SetMatShaper(Dest, mpeC1 ->TheCurves, &res, (cmsVEC3*) Offset, mpeC2->TheCurves, OutputFormat);
+    }
+
+    cmsPipelineFree(Src);
+    *Lut = Dest;
+    return TRUE;
+Error:
+    // Leave Src unchanged
+    cmsPipelineFree(Dest);
+    return FALSE;
+}
+
+
+// -------------------------------------------------------------------------------------------------------------------------------------
+// Optimization plug-ins
+
+// List of optimizations
+typedef struct _cmsOptimizationCollection_st {
+
+    _cmsOPToptimizeFn  OptimizePtr;
+
+    struct _cmsOptimizationCollection_st *Next;
+
+} _cmsOptimizationCollection;
+
+
+// The built-in list. We currently implement 4 types of optimizations. Joining of curves, matrix-shaper, linearization and resampling
+static _cmsOptimizationCollection DefaultOptimization[] = {
+
+    { OptimizeByJoiningCurves,            &DefaultOptimization[1] },
+    { OptimizeMatrixShaper,               &DefaultOptimization[2] },
+    { OptimizeByComputingLinearization,   &DefaultOptimization[3] },
+    { OptimizeByResampling,               NULL }
+};
+
+// The linked list head
+_cmsOptimizationPluginChunkType _cmsOptimizationPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginOptimizationList(struct _cmsContext_struct* ctx, 
+                               const struct _cmsContext_struct* src)
+{
+   _cmsOptimizationPluginChunkType newHead = { NULL };
+   _cmsOptimizationCollection*  entry;
+   _cmsOptimizationCollection*  Anterior = NULL;
+   _cmsOptimizationPluginChunkType* head = (_cmsOptimizationPluginChunkType*) src->chunks[OptimizationPlugin];
+
+    _cmsAssert(ctx != NULL);
+    _cmsAssert(head != NULL);
+
+    // Walk the list copying all nodes
+   for (entry = head->OptimizationCollection;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            _cmsOptimizationCollection *newEntry = ( _cmsOptimizationCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsOptimizationCollection));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.OptimizationCollection == NULL)
+                newHead.OptimizationCollection = newEntry;
+    }
+
+  ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsOptimizationPluginChunkType));
+}
+
+void  _cmsAllocOptimizationPluginChunk(struct _cmsContext_struct* ctx, 
+                                         const struct _cmsContext_struct* src)
+{
+  if (src != NULL) {
+
+        // Copy all linked list
+       DupPluginOptimizationList(ctx, src);
+    }
+    else {
+        static _cmsOptimizationPluginChunkType OptimizationPluginChunkType = { NULL };
+        ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx ->MemPool, &OptimizationPluginChunkType, sizeof(_cmsOptimizationPluginChunkType));
+    }
+}
+
+
+// Register new ways to optimize
+cmsBool  _cmsRegisterOptimizationPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginOptimization* Plugin = (cmsPluginOptimization*) Data;
+    _cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin);
+    _cmsOptimizationCollection* fl;
+
+    if (Data == NULL) {
+
+        ctx->OptimizationCollection = NULL;
+        return TRUE;
+    }
+
+    // Optimizer callback is required
+    if (Plugin ->OptimizePtr == NULL) return FALSE;
+
+    fl = (_cmsOptimizationCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsOptimizationCollection));
+    if (fl == NULL) return FALSE;
+
+    // Copy the parameters
+    fl ->OptimizePtr = Plugin ->OptimizePtr;
+
+    // Keep linked list
+    fl ->Next = ctx->OptimizationCollection;
+
+    // Set the head
+    ctx ->OptimizationCollection = fl;
+
+    // All is ok
+    return TRUE;
+}
+
+// The entry point for LUT optimization
+cmsBool _cmsOptimizePipeline(cmsContext ContextID,
+                             cmsPipeline**    PtrLut,
+                             cmsUInt32Number  Intent,
+                             cmsUInt32Number* InputFormat,
+                             cmsUInt32Number* OutputFormat,
+                             cmsUInt32Number* dwFlags)
+{
+    _cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin);
+    _cmsOptimizationCollection* Opts;
+    cmsBool AnySuccess = FALSE;
+
+    // A CLUT is being asked, so force this specific optimization
+    if (*dwFlags & cmsFLAGS_FORCE_CLUT) {
+
+        PreOptimize(*PtrLut);
+        return OptimizeByResampling(PtrLut, Intent, InputFormat, OutputFormat, dwFlags);
+    }
+
+    // Anything to optimize?
+    if ((*PtrLut) ->Elements == NULL) {
+        _cmsPipelineSetOptimizationParameters(*PtrLut, FastIdentity16, (void*) *PtrLut, NULL, NULL);
+        return TRUE;
+    }
+
+    // Try to get rid of identities and trivial conversions.
+    AnySuccess = PreOptimize(*PtrLut);
+
+    // After removal do we end with an identity?
+    if ((*PtrLut) ->Elements == NULL) {
+        _cmsPipelineSetOptimizationParameters(*PtrLut, FastIdentity16, (void*) *PtrLut, NULL, NULL);
+        return TRUE;
+    }
+
+    // Do not optimize, keep all precision
+    if (*dwFlags & cmsFLAGS_NOOPTIMIZE)
+        return FALSE;
+
+    // Try plug-in optimizations 
+    for (Opts = ctx->OptimizationCollection;
+         Opts != NULL;
+         Opts = Opts ->Next) {
+
+            // If one schema succeeded, we are done
+            if (Opts ->OptimizePtr(PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) {
+
+                return TRUE;    // Optimized!
+            }
+    }
+
+   // Try built-in optimizations 
+    for (Opts = DefaultOptimization;
+         Opts != NULL;
+         Opts = Opts ->Next) {
+
+            if (Opts ->OptimizePtr(PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) {
+
+                return TRUE;  
+            }
+    }
+
+    // Only simple optimizations succeeded
+    return AnySuccess;
+}
+
+
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp
new file mode 100644
index 0000000000..db34969ef5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspack.cpp
@@ -0,0 +1,3433 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// This module handles all formats supported by lcms. There are two flavors, 16 bits and
+// floating point. Floating point is supported only in a subset, those formats holding
+// cmsFloat32Number (4 bytes per component) and double (marked as 0 bytes per component
+// as special case)
+
+// ---------------------------------------------------------------------------
+
+
+// This macro return words stored as big endian
+#define CHANGE_ENDIAN(w)    (cmsUInt16Number) ((cmsUInt16Number) ((w)<<8)|((w)>>8))
+
+// These macros handles reversing (negative)
+#define REVERSE_FLAVOR_8(x)     ((cmsUInt8Number) (0xff-(x)))
+#define REVERSE_FLAVOR_16(x)    ((cmsUInt16Number)(0xffff-(x)))
+
+// * 0xffff / 0xff00 = (255 * 257) / (255 * 256) = 257 / 256
+cmsINLINE cmsUInt16Number FomLabV2ToLabV4(cmsUInt16Number x)
+{
+    int a = (x << 8 | x) >> 8;  // * 257 / 256
+    if ( a > 0xffff) return 0xffff;
+    return (cmsUInt16Number) a;
+}
+
+// * 0xf00 / 0xffff = * 256 / 257
+cmsINLINE cmsUInt16Number FomLabV4ToLabV2(cmsUInt16Number x)
+{
+    return (cmsUInt16Number) (((x << 8) + 0x80) / 257);
+}
+
+
+typedef struct {
+    cmsUInt32Number Type;
+    cmsUInt32Number Mask;
+    cmsFormatter16  Frm;
+
+} cmsFormatters16;
+
+typedef struct {
+    cmsUInt32Number    Type;
+    cmsUInt32Number    Mask;
+    cmsFormatterFloat  Frm;
+
+} cmsFormattersFloat;
+
+
+#define ANYSPACE        COLORSPACE_SH(31)
+#define ANYCHANNELS     CHANNELS_SH(15)
+#define ANYEXTRA        EXTRA_SH(7)
+#define ANYPLANAR       PLANAR_SH(1)
+#define ANYENDIAN       ENDIAN16_SH(1)
+#define ANYSWAP         DOSWAP_SH(1)
+#define ANYSWAPFIRST    SWAPFIRST_SH(1)
+#define ANYFLAVOR       FLAVOR_SH(1)
+
+
+// Suppress waning about info never being used
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+// Unpacking routines (16 bits) ----------------------------------------------------------------------------------------
+
+
+// Does almost everything but is slow
+static
+cmsUInt8Number* UnrollChunkyBytes(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wIn[],
+                                  CMSREGISTER cmsUInt8Number* accum,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt16Number v;
+    cmsUInt32Number i;
+
+    if (ExtraFirst) {
+        accum += Extra;
+    }
+
+    for (i=0; i < nChan; i++) {
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = FROM_8_TO_16(*accum);
+        v = Reverse ? REVERSE_FLAVOR_16(v) : v;
+        wIn[index] = v;
+        accum++;
+    }
+
+    if (!ExtraFirst) {
+        accum += Extra;
+    }
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+
+}
+
+// Extra channels are just ignored because come in the next planes
+static
+cmsUInt8Number* UnrollPlanarBytes(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wIn[],
+                                  CMSREGISTER cmsUInt8Number* accum,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan     = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap    = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number SwapFirst = T_SWAPFIRST(info ->InputFormat);
+    cmsUInt32Number Reverse   = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = accum;
+
+    if (DoSwap ^ SwapFirst) {
+        accum += T_EXTRA(info -> InputFormat) * Stride;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt16Number v = FROM_8_TO_16(*accum);
+
+        wIn[index] = Reverse ? REVERSE_FLAVOR_16(v) : v;
+        accum += Stride;
+    }
+
+    return (Init + 1);
+}
+
+// Special cases, provided for performance
+static
+cmsUInt8Number* Unroll4Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wIn[],
+                             CMSREGISTER cmsUInt8Number* accum,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // C
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // M
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // Y
+    wIn[3] = FROM_8_TO_16(*accum); accum++; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4BytesReverse(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // C
+    wIn[1] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // M
+    wIn[2] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // Y
+    wIn[3] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4BytesSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                      CMSREGISTER cmsUInt16Number wIn[],
+                                      CMSREGISTER cmsUInt8Number* accum,
+                                      CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[3] = FROM_8_TO_16(*accum); accum++; // K
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // C
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // M
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // Y
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KYMC
+static
+cmsUInt8Number* Unroll4BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[3] = FROM_8_TO_16(*accum); accum++;  // K
+    wIn[2] = FROM_8_TO_16(*accum); accum++;  // Y
+    wIn[1] = FROM_8_TO_16(*accum); accum++;  // M
+    wIn[0] = FROM_8_TO_16(*accum); accum++;  // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4BytesSwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                          CMSREGISTER cmsUInt16Number wIn[],
+                                          CMSREGISTER cmsUInt8Number* accum,
+                                          CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = FROM_8_TO_16(*accum); accum++;  // K
+    wIn[1] = FROM_8_TO_16(*accum); accum++;  // Y
+    wIn[0] = FROM_8_TO_16(*accum); accum++;  // M
+    wIn[3] = FROM_8_TO_16(*accum); accum++;  // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wIn[],
+                             CMSREGISTER cmsUInt8Number* accum,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(*accum); accum++;     // R
+    wIn[1] = FROM_8_TO_16(*accum); accum++;     // G
+    wIn[2] = FROM_8_TO_16(*accum); accum++;     // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3BytesSkip1Swap(CMSREGISTER _cmsTRANSFORM* info,
+                                      CMSREGISTER cmsUInt16Number wIn[],
+                                      CMSREGISTER cmsUInt8Number* accum,
+                                      CMSREGISTER cmsUInt32Number Stride)
+{
+    accum++; // A
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // B
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // G
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // R
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3BytesSkip1SwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info, 
+                                              CMSREGISTER cmsUInt16Number wIn[], 
+                                              CMSREGISTER cmsUInt8Number* accum,
+                                              CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // B
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // G
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // R
+    accum++; // A
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3BytesSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info, 
+                                           CMSREGISTER cmsUInt16Number wIn[], 
+                                           CMSREGISTER cmsUInt8Number* accum,
+                                           CMSREGISTER cmsUInt32Number Stride)
+{
+    accum++; // A
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // R
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // G
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+// BRG
+static
+cmsUInt8Number* Unroll3BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = FROM_8_TO_16(*accum); accum++;     // B
+    wIn[1] = FROM_8_TO_16(*accum); accum++;     // G
+    wIn[0] = FROM_8_TO_16(*accum); accum++;     // R
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollLabV2_8(CMSREGISTER _cmsTRANSFORM* info,
+                              CMSREGISTER cmsUInt16Number wIn[],
+                              CMSREGISTER cmsUInt8Number* accum,
+                              CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // L
+    wIn[1] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // a
+    wIn[2] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // b
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollALabV2_8(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wIn[],
+                               CMSREGISTER cmsUInt8Number* accum,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    accum++;  // A
+    wIn[0] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // L
+    wIn[1] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // a
+    wIn[2] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // b
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollLabV2_16(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wIn[],
+                               CMSREGISTER cmsUInt8Number* accum,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FomLabV2ToLabV4(*(cmsUInt16Number*) accum); accum += 2;     // L
+    wIn[1] = FomLabV2ToLabV4(*(cmsUInt16Number*) accum); accum += 2;     // a
+    wIn[2] = FomLabV2ToLabV4(*(cmsUInt16Number*) accum); accum += 2;     // b
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// for duplex
+static
+cmsUInt8Number* Unroll2Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                                     CMSREGISTER cmsUInt16Number wIn[],
+                                     CMSREGISTER cmsUInt8Number* accum,
+                                     CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(*accum); accum++;     // ch1
+    wIn[1] = FROM_8_TO_16(*accum); accum++;     // ch2
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+
+
+// Monochrome duplicates L into RGB for null-transforms
+static
+cmsUInt8Number* Unroll1Byte(CMSREGISTER _cmsTRANSFORM* info,
+                            CMSREGISTER cmsUInt16Number wIn[],
+                            CMSREGISTER cmsUInt8Number* accum,
+                            CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = FROM_8_TO_16(*accum); accum++;     // L
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Unroll1ByteSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = FROM_8_TO_16(*accum); accum++;     // L
+    accum += 1;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1ByteSkip2(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = FROM_8_TO_16(*accum); accum++;     // L
+    accum += 2;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1ByteReversed(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = REVERSE_FLAVOR_16(FROM_8_TO_16(*accum)); accum++;     // L
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* UnrollAnyWords(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wIn[],
+                               CMSREGISTER cmsUInt8Number* accum,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+   cmsUInt32Number nChan       = T_CHANNELS(info -> InputFormat);
+   cmsUInt32Number SwapEndian  = T_ENDIAN16(info -> InputFormat);
+   cmsUInt32Number DoSwap      = T_DOSWAP(info ->InputFormat);
+   cmsUInt32Number Reverse     = T_FLAVOR(info ->InputFormat);
+   cmsUInt32Number SwapFirst   = T_SWAPFIRST(info -> InputFormat);
+   cmsUInt32Number Extra       = T_EXTRA(info -> InputFormat);
+   cmsUInt32Number ExtraFirst  = DoSwap ^ SwapFirst;
+   cmsUInt32Number i;
+
+    if (ExtraFirst) {
+        accum += Extra * sizeof(cmsUInt16Number);
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt16Number v = *(cmsUInt16Number*) accum;
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        wIn[index] = Reverse ? REVERSE_FLAVOR_16(v) : v;
+
+        accum += sizeof(cmsUInt16Number);
+    }
+
+    if (!ExtraFirst) {
+        accum += Extra * sizeof(cmsUInt16Number);
+    }
+
+    if (Extra == 0 && SwapFirst) {
+
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollPlanarWords(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wIn[],
+                                  CMSREGISTER cmsUInt8Number* accum,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap= T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse= T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapEndian = T_ENDIAN16(info -> InputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = accum;
+
+    if (DoSwap) {
+        accum += T_EXTRA(info -> InputFormat) * Stride;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt16Number v = *(cmsUInt16Number*) accum;
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        wIn[index] = Reverse ? REVERSE_FLAVOR_16(v) : v;
+
+        accum +=  Stride;
+    }
+
+    return (Init + sizeof(cmsUInt16Number));
+}
+
+
+static
+cmsUInt8Number* Unroll4Words(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wIn[],
+                             CMSREGISTER cmsUInt8Number* accum,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // C
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4WordsReverse(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // C
+    wIn[1] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // M
+    wIn[2] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // Y
+    wIn[3] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4WordsSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                      CMSREGISTER cmsUInt16Number wIn[],
+                                      CMSREGISTER cmsUInt8Number* accum,
+                                      CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // K
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // C
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KYMC
+static
+cmsUInt8Number* Unroll4WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // K
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4WordsSwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                          CMSREGISTER cmsUInt16Number wIn[],
+                                          CMSREGISTER cmsUInt8Number* accum,
+                                          CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // K
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3Words(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wIn[],
+                             CMSREGISTER cmsUInt8Number* accum,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2;  // C R
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2;  // M G
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2;  // Y B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2;  // C R
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2;  // M G
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2;  // Y B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3WordsSkip1Swap(CMSREGISTER _cmsTRANSFORM* info,
+                                      CMSREGISTER cmsUInt16Number wIn[],
+                                      CMSREGISTER cmsUInt8Number* accum,
+                                      CMSREGISTER cmsUInt32Number Stride)
+{
+    accum += 2; // A
+    wIn[2] = *(cmsUInt16Number*) accum; accum += 2; // R
+    wIn[1] = *(cmsUInt16Number*) accum; accum += 2; // G
+    wIn[0] = *(cmsUInt16Number*) accum; accum += 2; // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3WordsSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                           CMSREGISTER cmsUInt16Number wIn[],
+                                           CMSREGISTER cmsUInt8Number* accum,
+                                           CMSREGISTER cmsUInt32Number Stride)
+{
+    accum += 2; // A
+    wIn[0] = *(cmsUInt16Number*) accum; accum += 2; // R
+    wIn[1] = *(cmsUInt16Number*) accum; accum += 2; // G
+    wIn[2] = *(cmsUInt16Number*) accum; accum += 2; // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1Word(CMSREGISTER _cmsTRANSFORM* info,
+                            CMSREGISTER cmsUInt16Number wIn[],
+                            CMSREGISTER cmsUInt8Number* accum,
+                            CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = *(cmsUInt16Number*) accum; accum+= 2;   // L
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1WordReversed(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1WordSkip3(CMSREGISTER _cmsTRANSFORM* info,
+                                 CMSREGISTER cmsUInt16Number wIn[],
+                                 CMSREGISTER cmsUInt8Number* accum,
+                                 CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = *(cmsUInt16Number*) accum;
+
+    accum += 8;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll2Words(CMSREGISTER _cmsTRANSFORM* info,
+                                     CMSREGISTER cmsUInt16Number wIn[],
+                                     CMSREGISTER cmsUInt8Number* accum,
+                                     CMSREGISTER cmsUInt32Number Stride)
+{
+    wIn[0] = *(cmsUInt16Number*) accum; accum += 2;    // ch1
+    wIn[1] = *(cmsUInt16Number*) accum; accum += 2;    // ch2
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+// This is a conversion of Lab double to 16 bits
+static
+cmsUInt8Number* UnrollLabDoubleTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number  Stride)
+{
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsCIELab Lab;
+        cmsUInt8Number* pos_L;
+        cmsUInt8Number* pos_a;
+        cmsUInt8Number* pos_b;
+        
+        pos_L = accum;
+        pos_a = accum + Stride;
+        pos_b = accum + Stride * 2;
+
+        Lab.L = *(cmsFloat64Number*) pos_L;
+        Lab.a = *(cmsFloat64Number*) pos_a;
+        Lab.b = *(cmsFloat64Number*) pos_b;
+
+        cmsFloat2LabEncoded(wIn, &Lab);
+        return accum + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        cmsFloat2LabEncoded(wIn, (cmsCIELab*) accum);
+        accum += sizeof(cmsCIELab) + T_EXTRA(info ->InputFormat) * sizeof(cmsFloat64Number);
+        return accum;
+    }
+}
+
+
+// This is a conversion of Lab float to 16 bits
+static
+cmsUInt8Number* UnrollLabFloatTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number  Stride)
+{
+    cmsCIELab Lab;
+    
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsUInt8Number* pos_L;
+        cmsUInt8Number* pos_a;
+        cmsUInt8Number* pos_b;
+
+        pos_L = accum;
+        pos_a = accum + Stride;
+        pos_b = accum + Stride * 2;
+
+        Lab.L = *(cmsFloat32Number*)pos_L;
+        Lab.a = *(cmsFloat32Number*)pos_a;
+        Lab.b = *(cmsFloat32Number*)pos_b;
+
+        cmsFloat2LabEncoded(wIn, &Lab);
+        return accum + sizeof(cmsFloat32Number);
+    }
+    else {
+ 
+        Lab.L = ((cmsFloat32Number*) accum)[0];
+        Lab.a = ((cmsFloat32Number*) accum)[1];
+        Lab.b = ((cmsFloat32Number*) accum)[2];
+
+        cmsFloat2LabEncoded(wIn, &Lab);
+        accum += (3 + T_EXTRA(info ->InputFormat)) * sizeof(cmsFloat32Number);
+        return accum;
+    }
+}
+
+// This is a conversion of XYZ double to 16 bits
+static
+cmsUInt8Number* UnrollXYZDoubleTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wIn[],
+                                    CMSREGISTER cmsUInt8Number* accum,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsUInt8Number* pos_X;
+        cmsUInt8Number* pos_Y;
+        cmsUInt8Number* pos_Z;
+
+        pos_X = accum;
+        pos_Y = accum + Stride;
+        pos_Z = accum + Stride * 2;
+
+        XYZ.X = *(cmsFloat64Number*)pos_X;
+        XYZ.Y = *(cmsFloat64Number*)pos_Y;
+        XYZ.Z = *(cmsFloat64Number*)pos_Z;
+
+        cmsFloat2XYZEncoded(wIn, &XYZ);
+
+        return accum + sizeof(cmsFloat64Number);
+
+    }
+
+    else {
+        cmsFloat2XYZEncoded(wIn, (cmsCIEXYZ*) accum);
+        accum += sizeof(cmsCIEXYZ) + T_EXTRA(info ->InputFormat) * sizeof(cmsFloat64Number);
+
+        return accum;
+    }
+}
+
+// This is a conversion of XYZ float to 16 bits
+static
+cmsUInt8Number* UnrollXYZFloatTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                   CMSREGISTER cmsUInt16Number wIn[],
+                                   CMSREGISTER cmsUInt8Number* accum,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsUInt8Number* pos_X;
+        cmsUInt8Number* pos_Y;
+        cmsUInt8Number* pos_Z;
+
+        pos_X = accum;
+        pos_Y = accum + Stride;
+        pos_Z = accum + Stride * 2;
+
+        XYZ.X = *(cmsFloat32Number*)pos_X;
+        XYZ.Y = *(cmsFloat32Number*)pos_Y;
+        XYZ.Z = *(cmsFloat32Number*)pos_Z;
+
+        cmsFloat2XYZEncoded(wIn, &XYZ);
+
+        return accum + sizeof(cmsFloat32Number);
+
+    }
+
+    else {
+        cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+        cmsCIEXYZ XYZ;
+
+        XYZ.X = Pt[0];
+        XYZ.Y = Pt[1];
+        XYZ.Z = Pt[2];
+        cmsFloat2XYZEncoded(wIn, &XYZ);
+
+        accum += 3 * sizeof(cmsFloat32Number) + T_EXTRA(info ->InputFormat) * sizeof(cmsFloat32Number);
+
+        return accum;
+    }
+}
+
+// Check if space is marked as ink
+cmsINLINE cmsBool IsInkSpace(cmsUInt32Number Type)
+{
+    switch (T_COLORSPACE(Type)) {
+
+     case PT_CMY:
+     case PT_CMYK:
+     case PT_MCH5:
+     case PT_MCH6:
+     case PT_MCH7:
+     case PT_MCH8:
+     case PT_MCH9:
+     case PT_MCH10:
+     case PT_MCH11:
+     case PT_MCH12:
+     case PT_MCH13:
+     case PT_MCH14:
+     case PT_MCH15: return TRUE;
+
+     default: return FALSE;
+    }
+}
+
+// Return the size in bytes of a given formatter
+static
+cmsUInt32Number PixelSize(cmsUInt32Number Format)
+{
+    cmsUInt32Number fmt_bytes = T_BYTES(Format);
+
+    // For double, the T_BYTES field is zero
+    if (fmt_bytes == 0)
+        return sizeof(cmsUInt64Number);
+
+    // Otherwise, it is already correct for all formats
+    return fmt_bytes;
+}
+
+// Inks does come in percentage, remaining cases are between 0..1.0, again to 16 bits
+static
+cmsUInt8Number* UnrollDoubleTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wIn[],
+                                CMSREGISTER cmsUInt8Number* accum,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat64Number v;
+    cmsUInt16Number  vi;
+    cmsUInt32Number i, start = 0;
+    cmsFloat64Number maximum = IsInkSpace(info ->InputFormat) ? 655.35 : 65535.0;
+
+
+    Stride /= PixelSize(info->InputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat32Number) ((cmsFloat64Number*) accum)[(i + start) * Stride];
+        else
+            v = (cmsFloat32Number) ((cmsFloat64Number*) accum)[i + start];
+
+        vi = _cmsQuickSaturateWord(v * maximum);
+
+        if (Reverse)
+            vi = REVERSE_FLAVOR_16(vi);
+
+        wIn[index] = vi;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat64Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat64Number);
+}
+
+
+
+static
+cmsUInt8Number* UnrollFloatTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wIn[],
+                                CMSREGISTER cmsUInt8Number* accum,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan  = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt16Number  vi;
+    cmsUInt32Number i, start = 0;
+    cmsFloat64Number maximum = IsInkSpace(info ->InputFormat) ? 655.35 : 65535.0;
+
+    Stride /= PixelSize(info->InputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[(i + start) * Stride];
+        else
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[i + start];
+
+        vi = _cmsQuickSaturateWord(v * maximum);
+
+        if (Reverse)
+            vi = REVERSE_FLAVOR_16(vi);
+
+        wIn[index] = vi;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat32Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+
+
+
+// For 1 channel, we need to duplicate data (it comes in 0..1.0 range)
+static
+cmsUInt8Number* UnrollDouble1Chan(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wIn[],
+                                  CMSREGISTER cmsUInt8Number* accum,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Inks = (cmsFloat64Number*) accum;
+
+    wIn[0] = wIn[1] = wIn[2] = _cmsQuickSaturateWord(Inks[0] * 65535.0);
+
+    return accum + sizeof(cmsFloat64Number);
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+//-------------------------------------------------------------------------------------------------------------------
+
+// For anything going from cmsFloat32Number
+static
+cmsUInt8Number* UnrollFloatsToFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wIn[],
+                                    cmsUInt8Number* accum,
+                                    cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan  = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat32Number maximum = IsInkSpace(info ->InputFormat) ? 100.0F : 1.0F;
+
+    Stride /= PixelSize(info->InputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[(i + start) * Stride];
+        else
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[i + start];
+
+        v /= maximum;
+
+        wIn[index] = Reverse ? 1 - v : v;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsFloat32Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsFloat32Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat32Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+// For anything going from double
+
+static
+cmsUInt8Number* UnrollDoublesToFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wIn[],
+                                    cmsUInt8Number* accum,
+                                    cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan  = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat64Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat64Number maximum = IsInkSpace(info ->InputFormat) ? 100.0 : 1.0;
+
+    Stride /= PixelSize(info->InputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat64Number) ((cmsFloat64Number*) accum)[(i + start)  * Stride];
+        else
+            v = (cmsFloat64Number) ((cmsFloat64Number*) accum)[i + start];
+
+        v /= maximum;
+
+        wIn[index] = (cmsFloat32Number) (Reverse ? 1.0 - v : v);
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsFloat32Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsFloat32Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat64Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat64Number);
+}
+
+
+
+// From Lab double to cmsFloat32Number
+static
+cmsUInt8Number* UnrollLabDoubleToFloat(_cmsTRANSFORM* info,
+                                       cmsFloat32Number wIn[],
+                                       cmsUInt8Number* accum,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Pt = (cmsFloat64Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        Stride /= PixelSize(info->InputFormat);
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);                 // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[Stride] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[Stride*2] + 128) / 255.0);
+
+        return accum + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);            // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[1] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[2] + 128) / 255.0);
+
+        accum += sizeof(cmsFloat64Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+// From Lab double to cmsFloat32Number
+static
+cmsUInt8Number* UnrollLabFloatToFloat(_cmsTRANSFORM* info,
+                                      cmsFloat32Number wIn[],
+                                      cmsUInt8Number* accum,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        Stride /= PixelSize(info->InputFormat);
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);                 // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[Stride] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[Stride*2] + 128) / 255.0);
+
+        return accum + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);            // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[1] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[2] + 128) / 255.0);
+
+        accum += sizeof(cmsFloat32Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+
+
+// 1.15 fixed point, that means maximum value is MAX_ENCODEABLE_XYZ (0xFFFF)
+static
+cmsUInt8Number* UnrollXYZDoubleToFloat(_cmsTRANSFORM* info,
+                                       cmsFloat32Number wIn[],
+                                       cmsUInt8Number* accum,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Pt = (cmsFloat64Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        Stride /= PixelSize(info->InputFormat);
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[Stride] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[Stride*2] / MAX_ENCODEABLE_XYZ);
+
+        return accum + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[1] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[2] / MAX_ENCODEABLE_XYZ);
+
+        accum += sizeof(cmsFloat64Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+static
+cmsUInt8Number* UnrollXYZFloatToFloat(_cmsTRANSFORM* info,
+                                      cmsFloat32Number wIn[],
+                                      cmsUInt8Number* accum,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        Stride /= PixelSize(info->InputFormat);
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[Stride] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[Stride*2] / MAX_ENCODEABLE_XYZ);
+
+        return accum + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[1] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[2] / MAX_ENCODEABLE_XYZ);
+
+        accum += sizeof(cmsFloat32Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+
+
+// Packing routines -----------------------------------------------------------------------------------------------------------
+
+
+// Generic chunky for byte
+
+static
+cmsUInt8Number* PackAnyBytes(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wOut[],
+                             CMSREGISTER cmsUInt8Number* output,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan  = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> OutputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> OutputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt8Number* swap1;
+    cmsUInt8Number v = 0;
+    cmsUInt32Number i;
+
+    swap1 = output;
+
+    if (ExtraFirst) {
+        output += Extra;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = FROM_16_TO_8(wOut[index]);
+
+        if (Reverse)
+            v = REVERSE_FLAVOR_8(v);
+
+        *output++ = v;
+    }
+
+    if (!ExtraFirst) {
+        output += Extra;
+    }
+
+    if (Extra == 0 && SwapFirst) {
+
+        memmove(swap1 + 1, swap1, nChan-1);
+        *swap1 = v;
+    }
+
+
+    return output;
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+
+static
+cmsUInt8Number* PackAnyWords(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wOut[],
+                             CMSREGISTER cmsUInt8Number* output,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan  = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number SwapEndian = T_ENDIAN16(info -> OutputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> OutputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> OutputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt16Number* swap1;
+    cmsUInt16Number v = 0;
+    cmsUInt32Number i;
+
+    swap1 = (cmsUInt16Number*) output;
+
+    if (ExtraFirst) {
+        output += Extra * sizeof(cmsUInt16Number);
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = wOut[index];
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        if (Reverse)
+            v = REVERSE_FLAVOR_16(v);
+
+        *(cmsUInt16Number*) output = v;
+
+        output += sizeof(cmsUInt16Number);
+    }
+
+    if (!ExtraFirst) {
+        output += Extra * sizeof(cmsUInt16Number);
+    }
+
+    if (Extra == 0 && SwapFirst) {
+
+        memmove(swap1 + 1, swap1, (nChan-1)* sizeof(cmsUInt16Number));
+        *swap1 = v;
+    }
+
+
+    return output;
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* PackPlanarBytes(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan     = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap    = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number SwapFirst = T_SWAPFIRST(info ->OutputFormat);
+    cmsUInt32Number Reverse   = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = output;
+
+
+    if (DoSwap ^ SwapFirst) {
+        output += T_EXTRA(info -> OutputFormat) * Stride;
+    }
+
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt8Number v = FROM_16_TO_8(wOut[index]);
+
+        *(cmsUInt8Number*)  output = (cmsUInt8Number) (Reverse ? REVERSE_FLAVOR_8(v) : v);
+        output += Stride;
+    }
+
+    return (Init + 1);
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* PackPlanarWords(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan      = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number SwapEndian = T_ENDIAN16(info -> OutputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = output;
+    cmsUInt16Number v;
+
+    if (DoSwap) {
+        output += T_EXTRA(info -> OutputFormat) * Stride;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = wOut[index];
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        if (Reverse)
+            v =  REVERSE_FLAVOR_16(v);
+
+        *(cmsUInt16Number*) output = v;
+        output += Stride;
+    }
+
+    return (Init + sizeof(cmsUInt16Number));
+}
+
+// CMYKcm (unrolled for speed)
+
+static
+cmsUInt8Number* Pack6Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[4]);
+    *output++ = FROM_16_TO_8(wOut[5]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KCMYcm
+
+static
+cmsUInt8Number* Pack6BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[5]);
+    *output++ = FROM_16_TO_8(wOut[4]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// CMYKcm
+static
+cmsUInt8Number* Pack6Words(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[4];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[5];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KCMYcm
+static
+cmsUInt8Number* Pack6WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[5];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[4];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack4Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4BytesReverse(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wOut[],
+                                  CMSREGISTER cmsUInt8Number* output,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[0]));
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[1]));
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[2]));
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[3]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack4BytesSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// ABGR
+static
+cmsUInt8Number* Pack4BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4BytesSwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                        CMSREGISTER cmsUInt16Number wOut[],
+                                        CMSREGISTER cmsUInt8Number* output,
+                                        CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4Words(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4WordsReverse(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wOut[],
+                                  CMSREGISTER cmsUInt8Number* output,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[0]);
+    output+= 2;
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[1]);
+    output+= 2;
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[2]);
+    output+= 2;
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[3]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// ABGR
+static
+cmsUInt8Number* Pack4WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// CMYK
+static
+cmsUInt8Number* Pack4WordsBigEndian(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[0]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[1]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[2]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[3]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* PackLabV2_8(CMSREGISTER _cmsTRANSFORM* info,
+                            CMSREGISTER cmsUInt16Number wOut[],
+                            CMSREGISTER cmsUInt8Number* output,
+                            CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[0]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[1]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[2]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* PackALabV2_8(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wOut[],
+                             CMSREGISTER cmsUInt8Number* output,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[0]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[1]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[2]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* PackLabV2_16(CMSREGISTER _cmsTRANSFORM* info,
+                             CMSREGISTER cmsUInt16Number wOut[],
+                             CMSREGISTER cmsUInt8Number* output,
+                             CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = FomLabV4ToLabV2(wOut[0]);
+    output += 2;
+    *(cmsUInt16Number*) output = FomLabV4ToLabV2(wOut[1]);
+    output += 2;
+    *(cmsUInt16Number*) output = FomLabV4ToLabV2(wOut[2]);
+    output += 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3Bytes(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = (wOut[0] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[2] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesSwapOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                        CMSREGISTER cmsUInt16Number wOut[],
+                                        CMSREGISTER cmsUInt8Number* output,
+                                        CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = (wOut[2] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[0] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3Words(CMSREGISTER _cmsTRANSFORM* info,
+                           CMSREGISTER cmsUInt16Number wOut[],
+                           CMSREGISTER cmsUInt8Number* output,
+                           CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsSwap(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsBigEndian(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[0]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[1]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[2]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                                   CMSREGISTER cmsUInt16Number wOut[],
+                                   CMSREGISTER cmsUInt8Number* output,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1Optimized(CMSREGISTER _cmsTRANSFORM* info,
+                                            CMSREGISTER cmsUInt16Number wOut[],
+                                            CMSREGISTER cmsUInt8Number* output,
+                                            CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = (wOut[0] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[2] & 0xFFU);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                            CMSREGISTER cmsUInt16Number wOut[],
+                                            CMSREGISTER cmsUInt8Number* output,
+                                            CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapFirstOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                                     CMSREGISTER cmsUInt16Number wOut[],
+                                                     CMSREGISTER cmsUInt8Number* output,
+                                                     CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = (wOut[0] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[2] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1Swap(CMSREGISTER _cmsTRANSFORM* info,
+                                       CMSREGISTER cmsUInt16Number wOut[],
+                                       CMSREGISTER cmsUInt8Number* output,
+                                       CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                                CMSREGISTER cmsUInt16Number wOut[],
+                                                CMSREGISTER cmsUInt8Number* output,
+                                                CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = (wOut[2] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[0] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                                CMSREGISTER cmsUInt16Number wOut[],
+                                                CMSREGISTER cmsUInt8Number* output,
+                                                CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapSwapFirstOptimized(CMSREGISTER _cmsTRANSFORM* info,
+                                                         CMSREGISTER cmsUInt16Number wOut[],
+                                                         CMSREGISTER cmsUInt8Number* output,
+                                                         CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = (wOut[2] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[0] & 0xFFU);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                                   CMSREGISTER cmsUInt16Number wOut[],
+                                   CMSREGISTER cmsUInt8Number* output,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1Swap(CMSREGISTER _cmsTRANSFORM* info,
+                                       CMSREGISTER cmsUInt16Number wOut[],
+                                       CMSREGISTER cmsUInt8Number* output,
+                                       CMSREGISTER cmsUInt32Number Stride)
+{
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                            CMSREGISTER cmsUInt16Number wOut[],
+                                            CMSREGISTER cmsUInt8Number* output,
+                                            CMSREGISTER cmsUInt32Number Stride)
+{
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1SwapSwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                                CMSREGISTER cmsUInt16Number wOut[],
+                                                CMSREGISTER cmsUInt8Number* output,
+                                                CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+
+static
+cmsUInt8Number* Pack1Byte(CMSREGISTER _cmsTRANSFORM* info,
+                          CMSREGISTER cmsUInt16Number wOut[],
+                          CMSREGISTER cmsUInt8Number* output,
+                          CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1ByteReversed(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wOut[],
+                                  CMSREGISTER cmsUInt8Number* output,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(REVERSE_FLAVOR_16(wOut[0]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1ByteSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1ByteSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                        CMSREGISTER cmsUInt16Number wOut[],
+                                        CMSREGISTER cmsUInt8Number* output,
+                                        CMSREGISTER cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack1Word(CMSREGISTER _cmsTRANSFORM* info,
+                          CMSREGISTER cmsUInt16Number wOut[],
+                          CMSREGISTER cmsUInt8Number* output,
+                          CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1WordReversed(CMSREGISTER _cmsTRANSFORM* info,
+                                  CMSREGISTER cmsUInt16Number wOut[],
+                                  CMSREGISTER cmsUInt8Number* output,
+                                  CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[0]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack1WordBigEndian(CMSREGISTER _cmsTRANSFORM* info,
+                                   CMSREGISTER cmsUInt16Number wOut[],
+                                   CMSREGISTER cmsUInt8Number* output,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[0]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1WordSkip1(CMSREGISTER _cmsTRANSFORM* info,
+                               CMSREGISTER cmsUInt16Number wOut[],
+                               CMSREGISTER cmsUInt8Number* output,
+                               CMSREGISTER cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 4;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack1WordSkip1SwapFirst(CMSREGISTER _cmsTRANSFORM* info,
+                                        CMSREGISTER cmsUInt16Number wOut[],
+                                        CMSREGISTER cmsUInt8Number* output,
+                                        CMSREGISTER cmsUInt32Number Stride)
+{
+    output += 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+// Unencoded Float values -- don't try optimize speed
+static
+cmsUInt8Number* PackLabDoubleFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+
+    if (T_PLANAR(info -> OutputFormat)) {
+
+        cmsCIELab  Lab;
+        cmsFloat64Number* Out = (cmsFloat64Number*) output;
+        cmsLabEncoded2Float(&Lab, wOut);
+
+        Out[0]        = Lab.L;
+        Out[Stride]   = Lab.a;
+        Out[Stride*2] = Lab.b;
+
+        return output + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        cmsLabEncoded2Float((cmsCIELab*) output, wOut);
+        return output + (sizeof(cmsCIELab) + T_EXTRA(info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+}
+
+
+static
+cmsUInt8Number* PackLabFloatFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsCIELab  Lab;
+    cmsLabEncoded2Float(&Lab, wOut);
+
+    if (T_PLANAR(info -> OutputFormat)) {
+       
+        cmsFloat32Number* Out = (cmsFloat32Number*) output;
+    
+        Stride /= PixelSize(info->OutputFormat);
+
+        Out[0]        = (cmsFloat32Number)Lab.L;
+        Out[Stride]   = (cmsFloat32Number)Lab.a;
+        Out[Stride*2] = (cmsFloat32Number)Lab.b;
+
+        return output + sizeof(cmsFloat32Number);
+    }
+    else {
+
+       ((cmsFloat32Number*) output)[0] = (cmsFloat32Number) Lab.L;
+       ((cmsFloat32Number*) output)[1] = (cmsFloat32Number) Lab.a;
+       ((cmsFloat32Number*) output)[2] = (cmsFloat32Number) Lab.b;
+
+        return output + (3 + T_EXTRA(info ->OutputFormat)) * sizeof(cmsFloat32Number);
+    }
+}
+
+static
+cmsUInt8Number* PackXYZDoubleFrom16(CMSREGISTER _cmsTRANSFORM* Info,
+                                    CMSREGISTER cmsUInt16Number wOut[],
+                                    CMSREGISTER cmsUInt8Number* output,
+                                    CMSREGISTER cmsUInt32Number Stride)
+{
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsFloat64Number* Out = (cmsFloat64Number*) output;
+        cmsXYZEncoded2Float(&XYZ, wOut);
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = XYZ.X;
+        Out[Stride]   = XYZ.Y;
+        Out[Stride*2] = XYZ.Z;
+
+        return output + sizeof(cmsFloat64Number);
+
+    }
+    else {
+
+        cmsXYZEncoded2Float((cmsCIEXYZ*) output, wOut);
+
+        return output + (sizeof(cmsCIEXYZ) + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+}
+
+static
+cmsUInt8Number* PackXYZFloatFrom16(CMSREGISTER _cmsTRANSFORM* Info,
+                                   CMSREGISTER cmsUInt16Number wOut[],
+                                   CMSREGISTER cmsUInt8Number* output,
+                                   CMSREGISTER cmsUInt32Number Stride)
+{
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsFloat32Number* Out = (cmsFloat32Number*) output;
+        cmsXYZEncoded2Float(&XYZ, wOut);
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat32Number) XYZ.X;
+        Out[Stride]   = (cmsFloat32Number) XYZ.Y;
+        Out[Stride*2] = (cmsFloat32Number) XYZ.Z;
+
+        return output + sizeof(cmsFloat32Number);
+
+    }
+    else {
+
+        cmsCIEXYZ XYZ;
+        cmsFloat32Number* Out = (cmsFloat32Number*) output;
+        cmsXYZEncoded2Float(&XYZ, wOut);
+
+        Out[0] = (cmsFloat32Number) XYZ.X;
+        Out[1] = (cmsFloat32Number) XYZ.Y;
+        Out[2] = (cmsFloat32Number) XYZ.Z;
+
+        return output + (3 * sizeof(cmsFloat32Number) + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat32Number));
+    }
+}
+
+static
+cmsUInt8Number* PackDoubleFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan      = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> OutputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> OutputFormat);
+    cmsUInt32Number Planar     = T_PLANAR(info -> OutputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsFloat64Number maximum = IsInkSpace(info ->OutputFormat) ? 655.35 : 65535.0;
+    cmsFloat64Number v = 0;
+    cmsFloat64Number* swap1 = (cmsFloat64Number*) output;
+    cmsUInt32Number i, start = 0;
+
+    Stride /= PixelSize(info->OutputFormat);
+
+    if (ExtraFirst)
+        start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = (cmsFloat64Number) wOut[index] / maximum;
+
+        if (Reverse)
+            v = maximum - v;
+
+        if (Planar)
+            ((cmsFloat64Number*) output)[(i + start)  * Stride]= v;
+        else
+            ((cmsFloat64Number*) output)[i + start] = v;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+
+         memmove(swap1 + 1, swap1, (nChan-1)* sizeof(cmsFloat64Number));
+        *swap1 = v;
+    }
+
+    if (T_PLANAR(info -> OutputFormat))
+        return output + sizeof(cmsFloat64Number);
+    else
+        return output + (nChan + Extra) * sizeof(cmsFloat64Number);
+
+}
+
+
+static
+cmsUInt8Number* PackFloatFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat64Number maximum = IsInkSpace(info->OutputFormat) ? 655.35 : 65535.0;
+       cmsFloat64Number v = 0;
+       cmsFloat32Number* swap1 = (cmsFloat32Number*)output;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = (cmsFloat64Number)wOut[index] / maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsFloat32Number*)output)[(i + start) * Stride] = (cmsFloat32Number)v;
+              else
+                     ((cmsFloat32Number*)output)[i + start] = (cmsFloat32Number)v;
+       }
+
+       
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsFloat32Number));
+              *swap1 = (cmsFloat32Number)v;
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsFloat32Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+
+
+// --------------------------------------------------------------------------------------------------------
+
+static
+cmsUInt8Number* PackFloatsFromFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wOut[],
+                                    cmsUInt8Number* output,
+                                    cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat64Number maximum = IsInkSpace(info->OutputFormat) ? 100.0 : 1.0;
+       cmsFloat32Number* swap1 = (cmsFloat32Number*)output;
+       cmsFloat64Number v = 0;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = wOut[index] * maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsFloat32Number*)output)[(i + start)* Stride] = (cmsFloat32Number)v;
+              else
+                     ((cmsFloat32Number*)output)[i + start] = (cmsFloat32Number)v;
+       }
+
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsFloat32Number));
+              *swap1 = (cmsFloat32Number)v;
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsFloat32Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+static
+cmsUInt8Number* PackDoublesFromFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wOut[],
+                                    cmsUInt8Number* output,
+                                    cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat64Number maximum = IsInkSpace(info->OutputFormat) ? 100.0 : 1.0;
+       cmsFloat64Number v = 0;
+       cmsFloat64Number* swap1 = (cmsFloat64Number*)output;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = wOut[index] * maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsFloat64Number*)output)[(i + start) * Stride] = v;
+              else
+                     ((cmsFloat64Number*)output)[i + start] = v;
+       }
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsFloat64Number));
+              *swap1 = v;
+       }
+
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsFloat64Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsFloat64Number);
+
+}
+
+
+
+
+
+static
+cmsUInt8Number* PackLabFloatFromFloat(_cmsTRANSFORM* Info,
+                                      cmsFloat32Number wOut[],
+                                      cmsUInt8Number* output,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Out = (cmsFloat32Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat32Number) (wOut[0] * 100.0);
+        Out[Stride]   = (cmsFloat32Number) (wOut[1] * 255.0 - 128.0);
+        Out[Stride*2] = (cmsFloat32Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat32Number) (wOut[0] * 100.0);
+        Out[1] = (cmsFloat32Number) (wOut[1] * 255.0 - 128.0);
+        Out[2] = (cmsFloat32Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + (sizeof(cmsFloat32Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat32Number));
+    }
+
+}
+
+
+static
+cmsUInt8Number* PackLabDoubleFromFloat(_cmsTRANSFORM* Info,
+                                       cmsFloat32Number wOut[],
+                                       cmsUInt8Number* output,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Out = (cmsFloat64Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat64Number) (wOut[0] * 100.0);
+        Out[Stride]   = (cmsFloat64Number) (wOut[1] * 255.0 - 128.0);
+        Out[Stride*2] = (cmsFloat64Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat64Number) (wOut[0] * 100.0);
+        Out[1] = (cmsFloat64Number) (wOut[1] * 255.0 - 128.0);
+        Out[2] = (cmsFloat64Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + (sizeof(cmsFloat64Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+
+}
+
+
+// From 0..1 range to 0..MAX_ENCODEABLE_XYZ
+static
+cmsUInt8Number* PackXYZFloatFromFloat(_cmsTRANSFORM* Info,
+                                      cmsFloat32Number wOut[],
+                                      cmsUInt8Number* output,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Out = (cmsFloat32Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat32Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[Stride]   = (cmsFloat32Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[Stride*2] = (cmsFloat32Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat32Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[1] = (cmsFloat32Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[2] = (cmsFloat32Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + (sizeof(cmsFloat32Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat32Number));
+    }
+
+}
+
+// Same, but convert to double
+static
+cmsUInt8Number* PackXYZDoubleFromFloat(_cmsTRANSFORM* Info,
+                                       cmsFloat32Number wOut[],
+                                       cmsUInt8Number* output,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Out = (cmsFloat64Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Stride /= PixelSize(Info->OutputFormat);
+
+        Out[0]        = (cmsFloat64Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[Stride]   = (cmsFloat64Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[Stride*2] = (cmsFloat64Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat64Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[1] = (cmsFloat64Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[2] = (cmsFloat64Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + (sizeof(cmsFloat64Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+
+}
+
+
+// ----------------------------------------------------------------------------------------------------------------
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// Decodes an stream of half floats to wIn[] described by input format
+
+static
+cmsUInt8Number* UnrollHalfTo16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wIn[],
+                                CMSREGISTER cmsUInt8Number* accum,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat32Number maximum = IsInkSpace(info ->InputFormat) ? 655.35F : 65535.0F;
+
+
+    Stride /= PixelSize(info->OutputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = _cmsHalf2Float ( ((cmsUInt16Number*) accum)[(i + start) * Stride] );
+        else
+            v = _cmsHalf2Float ( ((cmsUInt16Number*) accum)[i + start] ) ;
+
+        if (Reverse) v = maximum - v;
+
+        wIn[index] = _cmsQuickSaturateWord(v * maximum);
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsUInt16Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsUInt16Number);
+}
+
+// Decodes an stream of half floats to wIn[] described by input format
+
+static
+cmsUInt8Number* UnrollHalfToFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wIn[],
+                                    cmsUInt8Number* accum,
+                                    cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat32Number maximum = IsInkSpace(info ->InputFormat) ? 100.0F : 1.0F;
+
+    Stride /= PixelSize(info->OutputFormat);
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v =  _cmsHalf2Float ( ((cmsUInt16Number*) accum)[(i + start) * Stride] );
+        else
+            v =  _cmsHalf2Float ( ((cmsUInt16Number*) accum)[i + start] ) ;
+
+        v /= maximum;
+
+        wIn[index] = Reverse ? 1 - v : v;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsFloat32Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsFloat32Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsUInt16Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsUInt16Number);
+}
+
+
+static
+cmsUInt8Number* PackHalfFrom16(CMSREGISTER _cmsTRANSFORM* info,
+                                CMSREGISTER cmsUInt16Number wOut[],
+                                CMSREGISTER cmsUInt8Number* output,
+                                CMSREGISTER cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat32Number maximum = IsInkSpace(info->OutputFormat) ? 655.35F : 65535.0F;
+       cmsFloat32Number v = 0;
+       cmsUInt16Number* swap1 = (cmsUInt16Number*)output;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = (cmsFloat32Number)wOut[index] / maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsUInt16Number*)output)[(i + start) * Stride] = _cmsFloat2Half(v);
+              else
+                     ((cmsUInt16Number*)output)[i + start] = _cmsFloat2Half(v);
+       }
+
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsUInt16Number));
+              *swap1 = _cmsFloat2Half(v);
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsUInt16Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsUInt16Number);
+}
+
+
+
+static
+cmsUInt8Number* PackHalfFromFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wOut[],
+                                    cmsUInt8Number* output,
+                                    cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat32Number maximum = IsInkSpace(info->OutputFormat) ? 100.0F : 1.0F;
+       cmsUInt16Number* swap1 = (cmsUInt16Number*)output;
+       cmsFloat32Number v = 0;
+       cmsUInt32Number i, start = 0;
+
+       Stride /= PixelSize(info->OutputFormat);
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+           cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = wOut[index] * maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsUInt16Number*)output)[(i + start)* Stride] = _cmsFloat2Half(v);
+              else
+                     ((cmsUInt16Number*)output)[i + start] = _cmsFloat2Half(v);
+       }
+
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsUInt16Number));
+              *swap1 = (cmsUInt16Number)_cmsFloat2Half(v);
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsUInt16Number);
+       else
+              return output + (nChan + Extra)* sizeof(cmsUInt16Number);
+}
+
+#endif
+
+// ----------------------------------------------------------------------------------------------------------------
+
+
+static const cmsFormatters16 InputFormatters16[] = {
+
+    //    Type                                          Mask                  Function
+    //  ----------------------------   ------------------------------------  ----------------------------
+    { TYPE_Lab_DBL,                                 ANYPLANAR|ANYEXTRA,   UnrollLabDoubleTo16},
+    { TYPE_XYZ_DBL,                                 ANYPLANAR|ANYEXTRA,   UnrollXYZDoubleTo16},
+    { TYPE_Lab_FLT,                                 ANYPLANAR|ANYEXTRA,   UnrollLabFloatTo16},
+    { TYPE_XYZ_FLT,                                 ANYPLANAR|ANYEXTRA,   UnrollXYZFloatTo16},
+    { TYPE_GRAY_DBL,                                                 0,   UnrollDouble1Chan},
+    { FLOAT_SH(1)|BYTES_SH(0), ANYCHANNELS|ANYPLANAR|ANYSWAPFIRST|ANYFLAVOR|
+                                             ANYSWAP|ANYEXTRA|ANYSPACE,   UnrollDoubleTo16},
+    { FLOAT_SH(1)|BYTES_SH(4), ANYCHANNELS|ANYPLANAR|ANYSWAPFIRST|ANYFLAVOR|
+                                             ANYSWAP|ANYEXTRA|ANYSPACE,   UnrollFloatTo16},
+#ifndef CMS_NO_HALF_SUPPORT 
+    { FLOAT_SH(1)|BYTES_SH(2), ANYCHANNELS|ANYPLANAR|ANYSWAPFIRST|ANYFLAVOR|
+                                            ANYEXTRA|ANYSWAP|ANYSPACE,   UnrollHalfTo16},
+#endif
+
+    { CHANNELS_SH(1)|BYTES_SH(1),                              ANYSPACE,  Unroll1Byte},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(1),                  ANYSPACE,  Unroll1ByteSkip1},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(2),                  ANYSPACE,  Unroll1ByteSkip2},
+    { CHANNELS_SH(1)|BYTES_SH(1)|FLAVOR_SH(1),                 ANYSPACE,  Unroll1ByteReversed},
+    { COLORSPACE_SH(PT_MCH2)|CHANNELS_SH(2)|BYTES_SH(1),              0,  Unroll2Bytes},
+
+    { TYPE_LabV2_8,                                                   0,  UnrollLabV2_8 },
+    { TYPE_ALabV2_8,                                                  0,  UnrollALabV2_8 },
+    { TYPE_LabV2_16,                                                  0,  UnrollLabV2_16 },
+
+    { CHANNELS_SH(3)|BYTES_SH(1),                              ANYSPACE,  Unroll3Bytes},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1),                 ANYSPACE,  Unroll3BytesSwap},
+    { CHANNELS_SH(3)|EXTRA_SH(1)|BYTES_SH(1)|DOSWAP_SH(1),     ANYSPACE,  Unroll3BytesSkip1Swap},
+    { CHANNELS_SH(3)|EXTRA_SH(1)|BYTES_SH(1)|SWAPFIRST_SH(1),  ANYSPACE,  Unroll3BytesSkip1SwapFirst},
+
+    { CHANNELS_SH(3)|EXTRA_SH(1)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),  
+                                                               ANYSPACE,  Unroll3BytesSkip1SwapSwapFirst},
+
+    { CHANNELS_SH(4)|BYTES_SH(1),                              ANYSPACE,  Unroll4Bytes},
+    { CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1),                 ANYSPACE,  Unroll4BytesReverse},
+    { CHANNELS_SH(4)|BYTES_SH(1)|SWAPFIRST_SH(1),              ANYSPACE,  Unroll4BytesSwapFirst},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1),                 ANYSPACE,  Unroll4BytesSwap},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1), ANYSPACE,  Unroll4BytesSwapSwapFirst},
+
+    { BYTES_SH(1)|PLANAR_SH(1), ANYFLAVOR|ANYSWAPFIRST|
+                                   ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, UnrollPlanarBytes},
+
+    { BYTES_SH(1),    ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                           ANYEXTRA|ANYCHANNELS|ANYSPACE, UnrollChunkyBytes},
+
+    { CHANNELS_SH(1)|BYTES_SH(2),                              ANYSPACE,  Unroll1Word},
+    { CHANNELS_SH(1)|BYTES_SH(2)|FLAVOR_SH(1),                 ANYSPACE,  Unroll1WordReversed},
+    { CHANNELS_SH(1)|BYTES_SH(2)|EXTRA_SH(3),                  ANYSPACE,  Unroll1WordSkip3},
+
+    { CHANNELS_SH(2)|BYTES_SH(2),                              ANYSPACE,  Unroll2Words},
+    { CHANNELS_SH(3)|BYTES_SH(2),                              ANYSPACE,  Unroll3Words},
+    { CHANNELS_SH(4)|BYTES_SH(2),                              ANYSPACE,  Unroll4Words},
+
+    { CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1),                 ANYSPACE,  Unroll3WordsSwap},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|SWAPFIRST_SH(1),  ANYSPACE,  Unroll3WordsSkip1SwapFirst},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|DOSWAP_SH(1),     ANYSPACE,  Unroll3WordsSkip1Swap},
+    { CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1),                 ANYSPACE,  Unroll4WordsReverse},
+    { CHANNELS_SH(4)|BYTES_SH(2)|SWAPFIRST_SH(1),              ANYSPACE,  Unroll4WordsSwapFirst},
+    { CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1),                 ANYSPACE,  Unroll4WordsSwap},
+    { CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1)|SWAPFIRST_SH(1), ANYSPACE,  Unroll4WordsSwapSwapFirst},
+
+
+    { BYTES_SH(2)|PLANAR_SH(1),  ANYFLAVOR|ANYSWAP|ANYENDIAN|ANYEXTRA|ANYCHANNELS|ANYSPACE,  UnrollPlanarWords},
+    { BYTES_SH(2),  ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYENDIAN|ANYEXTRA|ANYCHANNELS|ANYSPACE,  UnrollAnyWords},
+};
+
+
+
+static const cmsFormattersFloat InputFormattersFloat[] = {
+
+    //    Type                                          Mask                  Function
+    //  ----------------------------   ------------------------------------  ----------------------------
+    {     TYPE_Lab_DBL,                                ANYPLANAR|ANYEXTRA,   UnrollLabDoubleToFloat},
+    {     TYPE_Lab_FLT,                                ANYPLANAR|ANYEXTRA,   UnrollLabFloatToFloat},
+
+    {     TYPE_XYZ_DBL,                                ANYPLANAR|ANYEXTRA,   UnrollXYZDoubleToFloat},
+    {     TYPE_XYZ_FLT,                                ANYPLANAR|ANYEXTRA,   UnrollXYZFloatToFloat},
+
+    {     FLOAT_SH(1)|BYTES_SH(4), ANYPLANAR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|
+                                                      ANYCHANNELS|ANYSPACE,  UnrollFloatsToFloat},
+
+    {     FLOAT_SH(1)|BYTES_SH(0), ANYPLANAR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|
+                                                        ANYCHANNELS|ANYSPACE,  UnrollDoublesToFloat},
+#ifndef CMS_NO_HALF_SUPPORT 
+    {     FLOAT_SH(1)|BYTES_SH(2), ANYPLANAR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|
+                                                        ANYCHANNELS|ANYSPACE,  UnrollHalfToFloat},
+#endif
+};
+
+
+// Bit fields set to one in the mask are not compared
+static
+cmsFormatter _cmsGetStockInputFormatter(cmsUInt32Number dwInput, cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsFormatter fr;
+
+    switch (dwFlags) {
+
+    case CMS_PACK_FLAGS_16BITS: {
+        for (i=0; i < sizeof(InputFormatters16) / sizeof(cmsFormatters16); i++) {
+            const cmsFormatters16* f = InputFormatters16 + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.Fmt16 = f ->Frm;
+                return fr;
+            }
+        }
+    }
+    break;
+
+    case CMS_PACK_FLAGS_FLOAT: {
+        for (i=0; i < sizeof(InputFormattersFloat) / sizeof(cmsFormattersFloat); i++) {
+            const cmsFormattersFloat* f = InputFormattersFloat + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.FmtFloat = f ->Frm;
+                return fr;
+            }
+        }
+    }
+    break;
+
+    default:;
+
+    }
+
+    fr.Fmt16 = NULL;
+    return fr;
+}
+
+static const cmsFormatters16 OutputFormatters16[] = {
+    //    Type                                          Mask                  Function
+    //  ----------------------------   ------------------------------------  ----------------------------
+
+    { TYPE_Lab_DBL,                                      ANYPLANAR|ANYEXTRA,  PackLabDoubleFrom16},
+    { TYPE_XYZ_DBL,                                      ANYPLANAR|ANYEXTRA,  PackXYZDoubleFrom16},
+
+    { TYPE_Lab_FLT,                                      ANYPLANAR|ANYEXTRA,  PackLabFloatFrom16},
+    { TYPE_XYZ_FLT,                                      ANYPLANAR|ANYEXTRA,  PackXYZFloatFrom16},
+    
+    { FLOAT_SH(1)|BYTES_SH(0),      ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                    ANYCHANNELS|ANYPLANAR|ANYEXTRA|ANYSPACE,  PackDoubleFrom16},
+    { FLOAT_SH(1)|BYTES_SH(4),      ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                    ANYCHANNELS|ANYPLANAR|ANYEXTRA|ANYSPACE,  PackFloatFrom16},
+#ifndef CMS_NO_HALF_SUPPORT 
+    { FLOAT_SH(1)|BYTES_SH(2),      ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                    ANYCHANNELS|ANYPLANAR|ANYEXTRA|ANYSPACE,  PackHalfFrom16},
+#endif
+
+    { CHANNELS_SH(1)|BYTES_SH(1),                                  ANYSPACE,  Pack1Byte},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(1),                      ANYSPACE,  Pack1ByteSkip1},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack1ByteSkip1SwapFirst},
+
+    { CHANNELS_SH(1)|BYTES_SH(1)|FLAVOR_SH(1),                     ANYSPACE,  Pack1ByteReversed},
+
+    { TYPE_LabV2_8,                                                       0,  PackLabV2_8 },
+    { TYPE_ALabV2_8,                                                      0,  PackALabV2_8 },
+    { TYPE_LabV2_16,                                                      0,  PackLabV2_16 },
+
+    { CHANNELS_SH(3)|BYTES_SH(1)|OPTIMIZED_SH(1),                  ANYSPACE,  Pack3BytesOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|OPTIMIZED_SH(1),      ANYSPACE,  Pack3BytesAndSkip1Optimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1)|OPTIMIZED_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapFirstOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1)|OPTIMIZED_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapSwapFirstOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|EXTRA_SH(1)|OPTIMIZED_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|OPTIMIZED_SH(1),     ANYSPACE,  Pack3BytesSwapOptimized},
+
+
+
+    { CHANNELS_SH(3)|BYTES_SH(1),                                  ANYSPACE,  Pack3Bytes},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1),                      ANYSPACE,  Pack3BytesAndSkip1},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack3BytesAndSkip1SwapFirst},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapSwapFirst},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|EXTRA_SH(1),         ANYSPACE,  Pack3BytesAndSkip1Swap},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1),                     ANYSPACE,  Pack3BytesSwap},
+    { CHANNELS_SH(6)|BYTES_SH(1),                                  ANYSPACE,  Pack6Bytes},
+    { CHANNELS_SH(6)|BYTES_SH(1)|DOSWAP_SH(1),                     ANYSPACE,  Pack6BytesSwap},
+    { CHANNELS_SH(4)|BYTES_SH(1),                                  ANYSPACE,  Pack4Bytes},
+    { CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1),                     ANYSPACE,  Pack4BytesReverse},
+    { CHANNELS_SH(4)|BYTES_SH(1)|SWAPFIRST_SH(1),                  ANYSPACE,  Pack4BytesSwapFirst},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1),                     ANYSPACE,  Pack4BytesSwap},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),     ANYSPACE,  Pack4BytesSwapSwapFirst},
+
+    { BYTES_SH(1),                 ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackAnyBytes},
+    { BYTES_SH(1)|PLANAR_SH(1),    ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackPlanarBytes},
+
+    { CHANNELS_SH(1)|BYTES_SH(2),                                  ANYSPACE,  Pack1Word},
+    { CHANNELS_SH(1)|BYTES_SH(2)|EXTRA_SH(1),                      ANYSPACE,  Pack1WordSkip1},
+    { CHANNELS_SH(1)|BYTES_SH(2)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack1WordSkip1SwapFirst},
+    { CHANNELS_SH(1)|BYTES_SH(2)|FLAVOR_SH(1),                     ANYSPACE,  Pack1WordReversed},
+    { CHANNELS_SH(1)|BYTES_SH(2)|ENDIAN16_SH(1),                   ANYSPACE,  Pack1WordBigEndian},
+    { CHANNELS_SH(3)|BYTES_SH(2),                                  ANYSPACE,  Pack3Words},
+    { CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1),                     ANYSPACE,  Pack3WordsSwap},
+    { CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1),                   ANYSPACE,  Pack3WordsBigEndian},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1),                      ANYSPACE,  Pack3WordsAndSkip1},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|DOSWAP_SH(1),         ANYSPACE,  Pack3WordsAndSkip1Swap},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack3WordsAndSkip1SwapFirst},
+
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),
+                                                                   ANYSPACE,  Pack3WordsAndSkip1SwapSwapFirst},
+
+    { CHANNELS_SH(4)|BYTES_SH(2),                                  ANYSPACE,  Pack4Words},
+    { CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1),                     ANYSPACE,  Pack4WordsReverse},
+    { CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1),                     ANYSPACE,  Pack4WordsSwap},
+    { CHANNELS_SH(4)|BYTES_SH(2)|ENDIAN16_SH(1),                   ANYSPACE,  Pack4WordsBigEndian},
+
+    { CHANNELS_SH(6)|BYTES_SH(2),                                  ANYSPACE,  Pack6Words},
+    { CHANNELS_SH(6)|BYTES_SH(2)|DOSWAP_SH(1),                     ANYSPACE,  Pack6WordsSwap},
+
+    { BYTES_SH(2)|PLANAR_SH(1),     ANYFLAVOR|ANYENDIAN|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackPlanarWords},
+    { BYTES_SH(2),                  ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYENDIAN|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackAnyWords}
+
+};
+
+
+static const cmsFormattersFloat OutputFormattersFloat[] = {
+    //    Type                                          Mask                                 Function
+    //  ----------------------------   ---------------------------------------------------  ----------------------------
+    {     TYPE_Lab_FLT,                                                ANYPLANAR|ANYEXTRA,   PackLabFloatFromFloat},
+    {     TYPE_XYZ_FLT,                                                ANYPLANAR|ANYEXTRA,   PackXYZFloatFromFloat},
+
+    {     TYPE_Lab_DBL,                                                ANYPLANAR|ANYEXTRA,   PackLabDoubleFromFloat},
+    {     TYPE_XYZ_DBL,                                                ANYPLANAR|ANYEXTRA,   PackXYZDoubleFromFloat},
+
+    {     FLOAT_SH(1)|BYTES_SH(4), ANYPLANAR|
+                             ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE,   PackFloatsFromFloat },
+    {     FLOAT_SH(1)|BYTES_SH(0), ANYPLANAR|
+                             ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE,   PackDoublesFromFloat },
+#ifndef CMS_NO_HALF_SUPPORT 
+    {     FLOAT_SH(1)|BYTES_SH(2),                                   
+                             ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE,   PackHalfFromFloat },
+#endif
+
+};
+
+
+// Bit fields set to one in the mask are not compared
+static
+cmsFormatter _cmsGetStockOutputFormatter(cmsUInt32Number dwInput, cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsFormatter fr;
+
+    // Optimization is only a hint
+    dwInput &= ~OPTIMIZED_SH(1);
+
+    switch (dwFlags)
+    {
+
+     case CMS_PACK_FLAGS_16BITS: {
+
+        for (i=0; i < sizeof(OutputFormatters16) / sizeof(cmsFormatters16); i++) {
+            const cmsFormatters16* f = OutputFormatters16 + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.Fmt16 = f ->Frm;
+                return fr;
+            }
+        }
+        }
+        break;
+
+    case CMS_PACK_FLAGS_FLOAT: {
+
+        for (i=0; i < sizeof(OutputFormattersFloat) / sizeof(cmsFormattersFloat); i++) {
+            const cmsFormattersFloat* f = OutputFormattersFloat + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.FmtFloat = f ->Frm;
+                return fr;
+            }
+        }
+        }
+        break;
+
+    default:;
+
+    }
+
+    fr.Fmt16 = NULL;
+    return fr;
+}
+
+
+typedef struct _cms_formatters_factory_list {
+
+    cmsFormatterFactory Factory;
+    struct _cms_formatters_factory_list *Next;
+
+} cmsFormattersFactoryList;
+
+_cmsFormattersPluginChunkType _cmsFormattersPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupFormatterFactoryList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsFormattersPluginChunkType newHead = { NULL };
+   cmsFormattersFactoryList*  entry;
+   cmsFormattersFactoryList*  Anterior = NULL;
+   _cmsFormattersPluginChunkType* head = (_cmsFormattersPluginChunkType*) src->chunks[FormattersPlugin];
+
+     _cmsAssert(head != NULL);
+
+   // Walk the list copying all nodes
+   for (entry = head->FactoryList;
+       entry != NULL;
+       entry = entry ->Next) {
+
+           cmsFormattersFactoryList *newEntry = ( cmsFormattersFactoryList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(cmsFormattersFactoryList));
+
+           if (newEntry == NULL) 
+               return;
+
+           // We want to keep the linked list order, so this is a little bit tricky
+           newEntry -> Next = NULL;
+           if (Anterior)
+               Anterior -> Next = newEntry;
+
+           Anterior = newEntry;
+
+           if (newHead.FactoryList == NULL)
+               newHead.FactoryList = newEntry;
+   }
+
+   ctx ->chunks[FormattersPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsFormattersPluginChunkType));
+}
+
+// The interpolation plug-in memory chunk allocator/dup
+void _cmsAllocFormattersPluginChunk(struct _cmsContext_struct* ctx, 
+                                    const struct _cmsContext_struct* src)
+{
+      _cmsAssert(ctx != NULL);
+
+     if (src != NULL) {
+        
+         // Duplicate the LIST
+         DupFormatterFactoryList(ctx, src);
+     }
+     else {
+          static _cmsFormattersPluginChunkType FormattersPluginChunk = { NULL };
+          ctx ->chunks[FormattersPlugin] = _cmsSubAllocDup(ctx ->MemPool, &FormattersPluginChunk, sizeof(_cmsFormattersPluginChunkType));
+     }
+}
+
+
+
+// Formatters management
+cmsBool  _cmsRegisterFormattersPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    _cmsFormattersPluginChunkType* ctx = ( _cmsFormattersPluginChunkType*) _cmsContextGetClientChunk(ContextID, FormattersPlugin);
+    cmsPluginFormatters* Plugin = (cmsPluginFormatters*) Data;
+    cmsFormattersFactoryList* fl ;
+
+    // Reset to built-in defaults
+    if (Data == NULL) {
+
+          ctx ->FactoryList = NULL;
+          return TRUE;
+    }
+
+    fl = (cmsFormattersFactoryList*) _cmsPluginMalloc(ContextID, sizeof(cmsFormattersFactoryList));
+    if (fl == NULL) return FALSE;
+
+    fl ->Factory    = Plugin ->FormattersFactory;
+
+    fl ->Next = ctx -> FactoryList;
+    ctx ->FactoryList = fl;
+
+    return TRUE;
+}
+
+cmsFormatter CMSEXPORT _cmsGetFormatter(cmsContext ContextID,
+                                        cmsUInt32Number Type,         // Specific type, i.e. TYPE_RGB_8
+                                        cmsFormatterDirection Dir,
+                                        cmsUInt32Number dwFlags)
+{
+    _cmsFormattersPluginChunkType* ctx = ( _cmsFormattersPluginChunkType*) _cmsContextGetClientChunk(ContextID, FormattersPlugin);
+    cmsFormattersFactoryList* f;
+
+    for (f =ctx->FactoryList; f != NULL; f = f ->Next) {
+
+        cmsFormatter fn = f ->Factory(Type, Dir, dwFlags);
+        if (fn.Fmt16 != NULL) return fn;
+    }
+
+    // Revert to default
+    if (Dir == cmsFormatterInput)
+        return _cmsGetStockInputFormatter(Type, dwFlags);
+    else
+        return _cmsGetStockOutputFormatter(Type, dwFlags);
+}
+
+
+// Return whatever given formatter refers to float values
+cmsBool  _cmsFormatterIsFloat(cmsUInt32Number Type)
+{
+    return T_FLOAT(Type) ? TRUE : FALSE;
+}
+
+// Return whatever given formatter refers to 8 bits
+cmsBool  _cmsFormatterIs8bit(cmsUInt32Number Type)
+{
+    cmsUInt32Number Bytes = T_BYTES(Type);
+
+    return (Bytes == 1);
+}
+
+// Build a suitable formatter for the colorspace of this profile
+cmsUInt32Number CMSEXPORT cmsFormatterForColorspaceOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat)
+{
+
+    cmsColorSpaceSignature ColorSpace      = cmsGetColorSpace(hProfile);
+    cmsUInt32Number        ColorSpaceBits  = (cmsUInt32Number) _cmsLCMScolorSpace(ColorSpace);
+    cmsUInt32Number        nOutputChans    = cmsChannelsOf(ColorSpace);
+    cmsUInt32Number        Float           = lIsFloat ? 1U : 0;
+
+    // Create a fake formatter for result
+    return FLOAT_SH(Float) | COLORSPACE_SH(ColorSpaceBits) | BYTES_SH(nBytes) | CHANNELS_SH(nOutputChans);
+}
+
+// Build a suitable formatter for the colorspace of this profile
+cmsUInt32Number CMSEXPORT cmsFormatterForPCSOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat)
+{
+
+    cmsColorSpaceSignature ColorSpace = cmsGetPCS(hProfile);
+
+    cmsUInt32Number ColorSpaceBits = (cmsUInt32Number) _cmsLCMScolorSpace(ColorSpace);
+    cmsUInt32Number nOutputChans = cmsChannelsOf(ColorSpace);
+    cmsUInt32Number Float = lIsFloat ? 1U : 0;
+
+    // Create a fake formatter for result
+    return FLOAT_SH(Float) | COLORSPACE_SH(ColorSpaceBits) | BYTES_SH(nBytes) | CHANNELS_SH(nOutputChans);
+}
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp
new file mode 100644
index 0000000000..ea70484d5b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmspcs.cpp
@@ -0,0 +1,940 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+//      inter PCS conversions XYZ <-> CIE L* a* b*
+/*
+
+
+       CIE 15:2004 CIELab is defined as:
+
+       L* = 116*f(Y/Yn) - 16                     0 <= L* <= 100
+       a* = 500*[f(X/Xn) - f(Y/Yn)]
+       b* = 200*[f(Y/Yn) - f(Z/Zn)]
+
+       and
+
+              f(t) = t^(1/3)                     1 >= t >  (24/116)^3
+                     (841/108)*t + (16/116)      0 <= t <= (24/116)^3
+
+
+       Reverse transform is:
+
+       X = Xn*[a* / 500 + (L* + 16) / 116] ^ 3   if (X/Xn) > (24/116)
+         = Xn*(a* / 500 + L* / 116) / 7.787      if (X/Xn) <= (24/116)
+
+
+
+       PCS in Lab2 is encoded as:
+
+              8 bit Lab PCS:
+
+                     L*      0..100 into a 0..ff byte.
+                     a*      t + 128 range is -128.0  +127.0
+                     b*
+
+             16 bit Lab PCS:
+
+                     L*     0..100  into a 0..ff00 word.
+                     a*     t + 128  range is  -128.0  +127.9961
+                     b*
+
+
+
+Interchange Space   Component     Actual Range        Encoded Range
+CIE XYZ             X             0 -> 1.99997        0x0000 -> 0xffff
+CIE XYZ             Y             0 -> 1.99997        0x0000 -> 0xffff
+CIE XYZ             Z             0 -> 1.99997        0x0000 -> 0xffff
+
+Version 2,3
+-----------
+
+CIELAB (16 bit)     L*            0 -> 100.0          0x0000 -> 0xff00
+CIELAB (16 bit)     a*            -128.0 -> +127.996  0x0000 -> 0x8000 -> 0xffff
+CIELAB (16 bit)     b*            -128.0 -> +127.996  0x0000 -> 0x8000 -> 0xffff
+
+
+Version 4
+---------
+
+CIELAB (16 bit)     L*            0 -> 100.0          0x0000 -> 0xffff
+CIELAB (16 bit)     a*            -128.0 -> +127      0x0000 -> 0x8080 -> 0xffff
+CIELAB (16 bit)     b*            -128.0 -> +127      0x0000 -> 0x8080 -> 0xffff
+
+*/
+
+// Conversions
+void CMSEXPORT cmsXYZ2xyY(cmsCIExyY* Dest, const cmsCIEXYZ* Source)
+{
+    cmsFloat64Number ISum;
+
+    ISum = 1./(Source -> X + Source -> Y + Source -> Z);
+
+    Dest -> x = (Source -> X) * ISum;
+    Dest -> y = (Source -> Y) * ISum;
+    Dest -> Y = Source -> Y;
+}
+
+void CMSEXPORT cmsxyY2XYZ(cmsCIEXYZ* Dest, const cmsCIExyY* Source)
+{
+    Dest -> X = (Source -> x / Source -> y) * Source -> Y;
+    Dest -> Y = Source -> Y;
+    Dest -> Z = ((1 - Source -> x - Source -> y) / Source -> y) * Source -> Y;
+}
+
+/*
+       The break point (24/116)^3 = (6/29)^3 is a very small amount of tristimulus 
+       primary (0.008856).  Generally, this only happens for 
+       nearly ideal blacks and for some orange / amber colors in transmission mode.  
+       For example, the Z value of the orange turn indicator lamp lens on an 
+       automobile will often be below this value.  But the Z does not 
+       contribute to the perceived color directly.
+*/
+
+static
+cmsFloat64Number f(cmsFloat64Number t)
+{
+    const cmsFloat64Number Limit = (24.0/116.0) * (24.0/116.0) * (24.0/116.0);
+
+    if (t <= Limit)
+        return (841.0/108.0) * t + (16.0/116.0);
+    else
+        return pow(t, 1.0/3.0);
+}
+
+static
+cmsFloat64Number f_1(cmsFloat64Number t)
+{
+    const cmsFloat64Number Limit = (24.0/116.0);
+
+    if (t <= Limit) {
+        return (108.0/841.0) * (t - (16.0/116.0));
+    }
+
+    return t * t * t;
+}
+
+
+// Standard XYZ to Lab. it can handle negative XZY numbers in some cases
+void CMSEXPORT cmsXYZ2Lab(const cmsCIEXYZ* WhitePoint, cmsCIELab* Lab, const cmsCIEXYZ* xyz)
+{
+    cmsFloat64Number fx, fy, fz;
+
+    if (WhitePoint == NULL)
+        WhitePoint = cmsD50_XYZ();
+
+    fx = f(xyz->X / WhitePoint->X);
+    fy = f(xyz->Y / WhitePoint->Y);
+    fz = f(xyz->Z / WhitePoint->Z);
+
+    Lab->L = 116.0*fy - 16.0;
+    Lab->a = 500.0*(fx - fy);
+    Lab->b = 200.0*(fy - fz);
+}
+
+
+// Standard XYZ to Lab. It can return negative XYZ in some cases
+void CMSEXPORT cmsLab2XYZ(const cmsCIEXYZ* WhitePoint, cmsCIEXYZ* xyz,  const cmsCIELab* Lab)
+{
+    cmsFloat64Number x, y, z;
+
+    if (WhitePoint == NULL)
+        WhitePoint = cmsD50_XYZ();
+
+    y = (Lab-> L + 16.0) / 116.0;
+    x = y + 0.002 * Lab -> a;
+    z = y - 0.005 * Lab -> b;
+
+    xyz -> X = f_1(x) * WhitePoint -> X;
+    xyz -> Y = f_1(y) * WhitePoint -> Y;
+    xyz -> Z = f_1(z) * WhitePoint -> Z;
+
+}
+
+static
+cmsFloat64Number L2float2(cmsUInt16Number v)
+{
+    return (cmsFloat64Number) v / 652.800;
+}
+
+// the a/b part
+static
+cmsFloat64Number ab2float2(cmsUInt16Number v)
+{
+    return ((cmsFloat64Number) v / 256.0) - 128.0;
+}
+
+static
+cmsUInt16Number L2Fix2(cmsFloat64Number L)
+{
+    return _cmsQuickSaturateWord(L *  652.8);
+}
+
+static
+cmsUInt16Number ab2Fix2(cmsFloat64Number ab)
+{
+    return _cmsQuickSaturateWord((ab + 128.0) * 256.0);
+}
+
+
+static
+cmsFloat64Number L2float4(cmsUInt16Number v)
+{
+    return (cmsFloat64Number) v / 655.35;
+}
+
+// the a/b part
+static
+cmsFloat64Number ab2float4(cmsUInt16Number v)
+{
+    return ((cmsFloat64Number) v / 257.0) - 128.0;
+}
+
+
+void CMSEXPORT cmsLabEncoded2FloatV2(cmsCIELab* Lab, const cmsUInt16Number wLab[3])
+{
+        Lab->L = L2float2(wLab[0]);
+        Lab->a = ab2float2(wLab[1]);
+        Lab->b = ab2float2(wLab[2]);
+}
+
+
+void CMSEXPORT cmsLabEncoded2Float(cmsCIELab* Lab, const cmsUInt16Number wLab[3])
+{
+        Lab->L = L2float4(wLab[0]);
+        Lab->a = ab2float4(wLab[1]);
+        Lab->b = ab2float4(wLab[2]);
+}
+
+static
+cmsFloat64Number Clamp_L_doubleV2(cmsFloat64Number L)
+{
+    const cmsFloat64Number L_max = (cmsFloat64Number) (0xFFFF * 100.0) / 0xFF00;
+
+    if (L < 0) L = 0;
+    if (L > L_max) L = L_max;
+
+    return L;
+}
+
+
+static
+cmsFloat64Number Clamp_ab_doubleV2(cmsFloat64Number ab)
+{
+    if (ab < MIN_ENCODEABLE_ab2) ab = MIN_ENCODEABLE_ab2;
+    if (ab > MAX_ENCODEABLE_ab2) ab = MAX_ENCODEABLE_ab2;
+
+    return ab;
+}
+
+void CMSEXPORT cmsFloat2LabEncodedV2(cmsUInt16Number wLab[3], const cmsCIELab* fLab)
+{
+    cmsCIELab Lab;
+
+    Lab.L = Clamp_L_doubleV2(fLab ->L);
+    Lab.a = Clamp_ab_doubleV2(fLab ->a);
+    Lab.b = Clamp_ab_doubleV2(fLab ->b);
+
+    wLab[0] = L2Fix2(Lab.L);
+    wLab[1] = ab2Fix2(Lab.a);
+    wLab[2] = ab2Fix2(Lab.b);
+}
+
+
+static
+cmsFloat64Number Clamp_L_doubleV4(cmsFloat64Number L)
+{
+    if (L < 0) L = 0;
+    if (L > 100.0) L = 100.0;
+
+    return L;
+}
+
+static
+cmsFloat64Number Clamp_ab_doubleV4(cmsFloat64Number ab)
+{
+    if (ab < MIN_ENCODEABLE_ab4) ab = MIN_ENCODEABLE_ab4;
+    if (ab > MAX_ENCODEABLE_ab4) ab = MAX_ENCODEABLE_ab4;
+
+    return ab;
+}
+
+static
+cmsUInt16Number L2Fix4(cmsFloat64Number L)
+{
+    return _cmsQuickSaturateWord(L *  655.35);
+}
+
+static
+cmsUInt16Number ab2Fix4(cmsFloat64Number ab)
+{
+    return _cmsQuickSaturateWord((ab + 128.0) * 257.0);
+}
+
+void CMSEXPORT cmsFloat2LabEncoded(cmsUInt16Number wLab[3], const cmsCIELab* fLab)
+{
+    cmsCIELab Lab;
+
+    Lab.L = Clamp_L_doubleV4(fLab ->L);
+    Lab.a = Clamp_ab_doubleV4(fLab ->a);
+    Lab.b = Clamp_ab_doubleV4(fLab ->b);
+
+    wLab[0] = L2Fix4(Lab.L);
+    wLab[1] = ab2Fix4(Lab.a);
+    wLab[2] = ab2Fix4(Lab.b);
+}
+
+// Auxiliary: convert to Radians
+static
+cmsFloat64Number RADIANS(cmsFloat64Number deg)
+{
+    return (deg * M_PI) / 180.;
+}
+
+
+// Auxiliary: atan2 but operating in degrees and returning 0 if a==b==0
+static
+cmsFloat64Number atan2deg(cmsFloat64Number a, cmsFloat64Number b)
+{
+   cmsFloat64Number h;
+
+   if (a == 0 && b == 0)
+            h   = 0;
+    else
+            h = atan2(a, b);
+
+    h *= (180. / M_PI);
+
+    while (h > 360.)
+        h -= 360.;
+
+    while ( h < 0)
+        h += 360.;
+
+    return h;
+}
+
+
+// Auxiliary: Square
+static
+cmsFloat64Number Sqr(cmsFloat64Number v)
+{
+    return v *  v;
+}
+// From cylindrical coordinates. No check is performed, then negative values are allowed
+void CMSEXPORT cmsLab2LCh(cmsCIELCh* LCh, const cmsCIELab* Lab)
+{
+    LCh -> L = Lab -> L;
+    LCh -> C = pow(Sqr(Lab ->a) + Sqr(Lab ->b), 0.5);
+    LCh -> h = atan2deg(Lab ->b, Lab ->a);
+}
+
+
+// To cylindrical coordinates. No check is performed, then negative values are allowed
+void CMSEXPORT cmsLCh2Lab(cmsCIELab* Lab, const cmsCIELCh* LCh)
+{
+    cmsFloat64Number h = (LCh -> h * M_PI) / 180.0;
+
+    Lab -> L = LCh -> L;
+    Lab -> a = LCh -> C * cos(h);
+    Lab -> b = LCh -> C * sin(h);
+}
+
+// In XYZ All 3 components are encoded using 1.15 fixed point
+static
+cmsUInt16Number XYZ2Fix(cmsFloat64Number d)
+{
+    return _cmsQuickSaturateWord(d * 32768.0);
+}
+
+void CMSEXPORT cmsFloat2XYZEncoded(cmsUInt16Number XYZ[3], const cmsCIEXYZ* fXYZ)
+{
+    cmsCIEXYZ xyz;
+
+    xyz.X = fXYZ -> X;
+    xyz.Y = fXYZ -> Y;
+    xyz.Z = fXYZ -> Z;
+
+    // Clamp to encodeable values.
+    if (xyz.Y <= 0) {
+
+        xyz.X = 0;
+        xyz.Y = 0;
+        xyz.Z = 0;
+    }
+
+    if (xyz.X > MAX_ENCODEABLE_XYZ)
+        xyz.X = MAX_ENCODEABLE_XYZ;
+
+    if (xyz.X < 0)
+        xyz.X = 0;
+
+    if (xyz.Y > MAX_ENCODEABLE_XYZ)
+        xyz.Y = MAX_ENCODEABLE_XYZ;
+
+    if (xyz.Y < 0)
+        xyz.Y = 0;
+
+    if (xyz.Z > MAX_ENCODEABLE_XYZ)
+        xyz.Z = MAX_ENCODEABLE_XYZ;
+
+    if (xyz.Z < 0)
+        xyz.Z = 0;
+
+
+    XYZ[0] = XYZ2Fix(xyz.X);
+    XYZ[1] = XYZ2Fix(xyz.Y);
+    XYZ[2] = XYZ2Fix(xyz.Z);
+}
+
+
+//  To convert from Fixed 1.15 point to cmsFloat64Number
+static
+cmsFloat64Number XYZ2float(cmsUInt16Number v)
+{
+    cmsS15Fixed16Number fix32;
+
+    // From 1.15 to 15.16
+    fix32 = v << 1;
+
+    // From fixed 15.16 to cmsFloat64Number
+    return _cms15Fixed16toDouble(fix32);
+}
+
+
+void CMSEXPORT cmsXYZEncoded2Float(cmsCIEXYZ* fXYZ, const cmsUInt16Number XYZ[3])
+{
+    fXYZ -> X = XYZ2float(XYZ[0]);
+    fXYZ -> Y = XYZ2float(XYZ[1]);
+    fXYZ -> Z = XYZ2float(XYZ[2]);
+}
+
+
+// Returns dE on two Lab values
+cmsFloat64Number CMSEXPORT cmsDeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2)
+{
+    cmsFloat64Number dL, da, db;
+
+    dL = fabs(Lab1 -> L - Lab2 -> L);
+    da = fabs(Lab1 -> a - Lab2 -> a);
+    db = fabs(Lab1 -> b - Lab2 -> b);
+
+    return pow(Sqr(dL) + Sqr(da) + Sqr(db), 0.5);
+}
+
+
+// Return the CIE94 Delta E
+cmsFloat64Number CMSEXPORT cmsCIE94DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2)
+{
+    cmsCIELCh LCh1, LCh2;
+    cmsFloat64Number dE, dL, dC, dh, dhsq;
+    cmsFloat64Number c12, sc, sh;
+
+    dL = fabs(Lab1 ->L - Lab2 ->L);
+
+    cmsLab2LCh(&LCh1, Lab1);
+    cmsLab2LCh(&LCh2, Lab2);
+
+    dC  = fabs(LCh1.C - LCh2.C);
+    dE  = cmsDeltaE(Lab1, Lab2);
+
+    dhsq = Sqr(dE) - Sqr(dL) - Sqr(dC);
+    if (dhsq < 0)
+        dh = 0;
+    else
+        dh = pow(dhsq, 0.5);
+
+    c12 = sqrt(LCh1.C * LCh2.C);
+
+    sc = 1.0 + (0.048 * c12);
+    sh = 1.0 + (0.014 * c12);
+
+    return sqrt(Sqr(dL)  + Sqr(dC) / Sqr(sc) + Sqr(dh) / Sqr(sh));
+}
+
+
+// Auxiliary
+static
+cmsFloat64Number ComputeLBFD(const cmsCIELab* Lab)
+{
+  cmsFloat64Number yt;
+
+  if (Lab->L > 7.996969)
+        yt = (Sqr((Lab->L+16)/116)*((Lab->L+16)/116))*100;
+  else
+        yt = 100 * (Lab->L / 903.3);
+
+  return (54.6 * (M_LOG10E * (log(yt + 1.5))) - 9.6);
+}
+
+
+
+// bfd - gets BFD(1:1) difference between Lab1, Lab2
+cmsFloat64Number CMSEXPORT cmsBFDdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2)
+{
+    cmsFloat64Number lbfd1,lbfd2,AveC,Aveh,dE,deltaL,
+        deltaC,deltah,dc,t,g,dh,rh,rc,rt,bfd;
+    cmsCIELCh LCh1, LCh2;
+
+
+    lbfd1 = ComputeLBFD(Lab1);
+    lbfd2 = ComputeLBFD(Lab2);
+    deltaL = lbfd2 - lbfd1;
+
+    cmsLab2LCh(&LCh1, Lab1);
+    cmsLab2LCh(&LCh2, Lab2);
+
+    deltaC = LCh2.C - LCh1.C;
+    AveC = (LCh1.C+LCh2.C)/2;
+    Aveh = (LCh1.h+LCh2.h)/2;
+
+    dE = cmsDeltaE(Lab1, Lab2);
+
+    if (Sqr(dE)>(Sqr(Lab2->L-Lab1->L)+Sqr(deltaC)))
+        deltah = sqrt(Sqr(dE)-Sqr(Lab2->L-Lab1->L)-Sqr(deltaC));
+    else
+        deltah =0;
+
+
+    dc   = 0.035 * AveC / (1 + 0.00365 * AveC)+0.521;
+    g    = sqrt(Sqr(Sqr(AveC))/(Sqr(Sqr(AveC))+14000));
+    t    = 0.627+(0.055*cos((Aveh-254)/(180/M_PI))-
+           0.040*cos((2*Aveh-136)/(180/M_PI))+
+           0.070*cos((3*Aveh-31)/(180/M_PI))+
+           0.049*cos((4*Aveh+114)/(180/M_PI))-
+           0.015*cos((5*Aveh-103)/(180/M_PI)));
+
+    dh    = dc*(g*t+1-g);
+    rh    = -0.260*cos((Aveh-308)/(180/M_PI))-
+           0.379*cos((2*Aveh-160)/(180/M_PI))-
+           0.636*cos((3*Aveh+254)/(180/M_PI))+
+           0.226*cos((4*Aveh+140)/(180/M_PI))-
+           0.194*cos((5*Aveh+280)/(180/M_PI));
+
+    rc = sqrt((AveC*AveC*AveC*AveC*AveC*AveC)/((AveC*AveC*AveC*AveC*AveC*AveC)+70000000));
+    rt = rh*rc;
+
+    bfd = sqrt(Sqr(deltaL)+Sqr(deltaC/dc)+Sqr(deltah/dh)+(rt*(deltaC/dc)*(deltah/dh)));
+
+    return bfd;
+}
+
+
+//  cmc - CMC(l:c) difference between Lab1, Lab2
+cmsFloat64Number CMSEXPORT cmsCMCdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2, cmsFloat64Number l, cmsFloat64Number c)
+{
+  cmsFloat64Number dE,dL,dC,dh,sl,sc,sh,t,f,cmc;
+  cmsCIELCh LCh1, LCh2;
+
+  if (Lab1 ->L == 0 && Lab2 ->L == 0) return 0;
+
+  cmsLab2LCh(&LCh1, Lab1);
+  cmsLab2LCh(&LCh2, Lab2);
+
+
+  dL = Lab2->L-Lab1->L;
+  dC = LCh2.C-LCh1.C;
+
+  dE = cmsDeltaE(Lab1, Lab2);
+
+  if (Sqr(dE)>(Sqr(dL)+Sqr(dC)))
+            dh = sqrt(Sqr(dE)-Sqr(dL)-Sqr(dC));
+  else
+            dh =0;
+
+  if ((LCh1.h > 164) && (LCh1.h < 345))
+      t = 0.56 + fabs(0.2 * cos(((LCh1.h + 168)/(180/M_PI))));
+  else
+      t = 0.36 + fabs(0.4 * cos(((LCh1.h + 35 )/(180/M_PI))));
+
+   sc  = 0.0638   * LCh1.C / (1 + 0.0131  * LCh1.C) + 0.638;
+   sl  = 0.040975 * Lab1->L /(1 + 0.01765 * Lab1->L);
+
+   if (Lab1->L<16)
+         sl = 0.511;
+
+   f   = sqrt((LCh1.C * LCh1.C * LCh1.C * LCh1.C)/((LCh1.C * LCh1.C * LCh1.C * LCh1.C)+1900));
+   sh  = sc*(t*f+1-f);
+   cmc = sqrt(Sqr(dL/(l*sl))+Sqr(dC/(c*sc))+Sqr(dh/sh));
+
+   return cmc;
+}
+
+// dE2000 The weightings KL, KC and KH can be modified to reflect the relative
+// importance of lightness, chroma and hue in different industrial applications
+cmsFloat64Number CMSEXPORT cmsCIE2000DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2,
+                                  cmsFloat64Number Kl, cmsFloat64Number Kc, cmsFloat64Number Kh)
+{
+    cmsFloat64Number L1  = Lab1->L;
+    cmsFloat64Number a1  = Lab1->a;
+    cmsFloat64Number b1  = Lab1->b;
+    cmsFloat64Number C   = sqrt( Sqr(a1) + Sqr(b1) );
+
+    cmsFloat64Number Ls = Lab2 ->L;
+    cmsFloat64Number as = Lab2 ->a;
+    cmsFloat64Number bs = Lab2 ->b;
+    cmsFloat64Number Cs = sqrt( Sqr(as) + Sqr(bs) );
+
+    cmsFloat64Number G = 0.5 * ( 1 - sqrt(pow((C + Cs) / 2 , 7.0) / (pow((C + Cs) / 2, 7.0) + pow(25.0, 7.0) ) ));
+
+    cmsFloat64Number a_p = (1 + G ) * a1;
+    cmsFloat64Number b_p = b1;
+    cmsFloat64Number C_p = sqrt( Sqr(a_p) + Sqr(b_p));
+    cmsFloat64Number h_p = atan2deg(b_p, a_p);
+
+
+    cmsFloat64Number a_ps = (1 + G) * as;
+    cmsFloat64Number b_ps = bs;
+    cmsFloat64Number C_ps = sqrt(Sqr(a_ps) + Sqr(b_ps));
+    cmsFloat64Number h_ps = atan2deg(b_ps, a_ps);
+
+    cmsFloat64Number meanC_p =(C_p + C_ps) / 2;
+
+    cmsFloat64Number hps_plus_hp  = h_ps + h_p;
+    cmsFloat64Number hps_minus_hp = h_ps - h_p;
+
+    cmsFloat64Number meanh_p = fabs(hps_minus_hp) <= 180.000001 ? (hps_plus_hp)/2 :
+                            (hps_plus_hp) < 360 ? (hps_plus_hp + 360)/2 :
+                                                 (hps_plus_hp - 360)/2;
+
+    cmsFloat64Number delta_h = (hps_minus_hp) <= -180.000001 ?  (hps_minus_hp + 360) :
+                            (hps_minus_hp) > 180 ? (hps_minus_hp - 360) :
+                                                    (hps_minus_hp);
+    cmsFloat64Number delta_L = (Ls - L1);
+    cmsFloat64Number delta_C = (C_ps - C_p );
+
+
+    cmsFloat64Number delta_H =2 * sqrt(C_ps*C_p) * sin(RADIANS(delta_h) / 2);
+
+    cmsFloat64Number T = 1 - 0.17 * cos(RADIANS(meanh_p-30))
+                 + 0.24 * cos(RADIANS(2*meanh_p))
+                 + 0.32 * cos(RADIANS(3*meanh_p + 6))
+                 - 0.2  * cos(RADIANS(4*meanh_p - 63));
+
+    cmsFloat64Number Sl = 1 + (0.015 * Sqr((Ls + L1) /2- 50) )/ sqrt(20 + Sqr( (Ls+L1)/2 - 50) );
+
+    cmsFloat64Number Sc = 1 + 0.045 * (C_p + C_ps)/2;
+    cmsFloat64Number Sh = 1 + 0.015 * ((C_ps + C_p)/2) * T;
+
+    cmsFloat64Number delta_ro = 30 * exp( -Sqr(((meanh_p - 275 ) / 25)));
+
+    cmsFloat64Number Rc = 2 * sqrt(( pow(meanC_p, 7.0) )/( pow(meanC_p, 7.0) + pow(25.0, 7.0)));
+
+    cmsFloat64Number Rt = -sin(2 * RADIANS(delta_ro)) * Rc;
+
+    cmsFloat64Number deltaE00 = sqrt( Sqr(delta_L /(Sl * Kl)) +
+                            Sqr(delta_C/(Sc * Kc))  +
+                            Sqr(delta_H/(Sh * Kh))  +
+                            Rt*(delta_C/(Sc * Kc)) * (delta_H / (Sh * Kh)));
+
+    return deltaE00;
+}
+
+// This function returns a number of gridpoints to be used as LUT table. It assumes same number
+// of gripdpoints in all dimensions. Flags may override the choice.
+cmsUInt32Number _cmsReasonableGridpointsByColorspace(cmsColorSpaceSignature Colorspace, cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number nChannels;
+
+    // Already specified?
+    if (dwFlags & 0x00FF0000) {
+            // Yes, grab'em
+            return (dwFlags >> 16) & 0xFF;
+    }
+
+    nChannels = cmsChannelsOf(Colorspace);
+
+    // HighResPrecalc is maximum resolution
+    if (dwFlags & cmsFLAGS_HIGHRESPRECALC) {
+
+        if (nChannels > 4)
+                return 7;       // 7 for Hifi
+
+        if (nChannels == 4)     // 23 for CMYK
+                return 23;
+
+        return 49;      // 49 for RGB and others
+    }
+
+
+    // LowResPrecal is lower resolution
+    if (dwFlags & cmsFLAGS_LOWRESPRECALC) {
+
+        if (nChannels > 4)
+                return 6;       // 6 for more than 4 channels
+
+        if (nChannels == 1)
+                return 33;      // For monochrome
+
+        return 17;              // 17 for remaining
+    }
+
+    // Default values
+    if (nChannels > 4)
+                return 7;       // 7 for Hifi
+
+    if (nChannels == 4)
+                return 17;      // 17 for CMYK
+
+    return 33;                  // 33 for RGB
+}
+
+
+cmsBool  _cmsEndPointsBySpace(cmsColorSpaceSignature Space,
+                             cmsUInt16Number **White,
+                             cmsUInt16Number **Black,
+                             cmsUInt32Number *nOutputs)
+{
+       // Only most common spaces
+
+       static cmsUInt16Number RGBblack[4]  = { 0, 0, 0 };
+       static cmsUInt16Number RGBwhite[4]  = { 0xffff, 0xffff, 0xffff };
+       static cmsUInt16Number CMYKblack[4] = { 0xffff, 0xffff, 0xffff, 0xffff };   // 400% of ink
+       static cmsUInt16Number CMYKwhite[4] = { 0, 0, 0, 0 };
+       static cmsUInt16Number LABblack[4]  = { 0, 0x8080, 0x8080 };               // V4 Lab encoding
+       static cmsUInt16Number LABwhite[4]  = { 0xFFFF, 0x8080, 0x8080 };
+       static cmsUInt16Number CMYblack[4]  = { 0xffff, 0xffff, 0xffff };
+       static cmsUInt16Number CMYwhite[4]  = { 0, 0, 0 };
+       static cmsUInt16Number Grayblack[4] = { 0 };
+       static cmsUInt16Number GrayWhite[4] = { 0xffff };
+
+       switch (Space) {
+
+       case cmsSigGrayData: if (White)    *White = GrayWhite;
+                           if (Black)    *Black = Grayblack;
+                           if (nOutputs) *nOutputs = 1;
+                           return TRUE;
+
+       case cmsSigRgbData:  if (White)    *White = RGBwhite;
+                           if (Black)    *Black = RGBblack;
+                           if (nOutputs) *nOutputs = 3;
+                           return TRUE;
+
+       case cmsSigLabData:  if (White)    *White = LABwhite;
+                           if (Black)    *Black = LABblack;
+                           if (nOutputs) *nOutputs = 3;
+                           return TRUE;
+
+       case cmsSigCmykData: if (White)    *White = CMYKwhite;
+                           if (Black)    *Black = CMYKblack;
+                           if (nOutputs) *nOutputs = 4;
+                           return TRUE;
+
+       case cmsSigCmyData:  if (White)    *White = CMYwhite;
+                           if (Black)    *Black = CMYblack;
+                           if (nOutputs) *nOutputs = 3;
+                           return TRUE;
+
+       default:;
+       }
+
+  return FALSE;
+}
+
+
+
+// Several utilities -------------------------------------------------------
+
+// Translate from our colorspace to ICC representation
+
+cmsColorSpaceSignature CMSEXPORT _cmsICCcolorSpace(int OurNotation)
+{
+       switch (OurNotation) {
+
+       case 1:
+       case PT_GRAY: return cmsSigGrayData;
+
+       case 2:
+       case PT_RGB:  return cmsSigRgbData;
+
+       case PT_CMY:  return cmsSigCmyData;
+       case PT_CMYK: return cmsSigCmykData;
+       case PT_YCbCr:return cmsSigYCbCrData;
+       case PT_YUV:  return cmsSigLuvData;
+       case PT_XYZ:  return cmsSigXYZData;
+
+       case PT_LabV2:
+       case PT_Lab:  return cmsSigLabData;
+
+       case PT_YUVK: return cmsSigLuvKData;
+       case PT_HSV:  return cmsSigHsvData;
+       case PT_HLS:  return cmsSigHlsData;
+       case PT_Yxy:  return cmsSigYxyData;
+
+       case PT_MCH1: return cmsSigMCH1Data;
+       case PT_MCH2: return cmsSigMCH2Data;
+       case PT_MCH3: return cmsSigMCH3Data;
+       case PT_MCH4: return cmsSigMCH4Data;
+       case PT_MCH5: return cmsSigMCH5Data;
+       case PT_MCH6: return cmsSigMCH6Data;
+       case PT_MCH7: return cmsSigMCH7Data;
+       case PT_MCH8: return cmsSigMCH8Data;
+
+       case PT_MCH9:  return cmsSigMCH9Data;
+       case PT_MCH10: return cmsSigMCHAData;
+       case PT_MCH11: return cmsSigMCHBData;
+       case PT_MCH12: return cmsSigMCHCData;
+       case PT_MCH13: return cmsSigMCHDData;
+       case PT_MCH14: return cmsSigMCHEData;
+       case PT_MCH15: return cmsSigMCHFData;
+
+       default:  return (cmsColorSpaceSignature) 0;
+       }
+}
+
+
+int CMSEXPORT _cmsLCMScolorSpace(cmsColorSpaceSignature ProfileSpace)
+{
+    switch (ProfileSpace) {
+
+    case cmsSigGrayData: return  PT_GRAY;
+    case cmsSigRgbData:  return  PT_RGB;
+    case cmsSigCmyData:  return  PT_CMY;
+    case cmsSigCmykData: return  PT_CMYK;
+    case cmsSigYCbCrData:return  PT_YCbCr;
+    case cmsSigLuvData:  return  PT_YUV;
+    case cmsSigXYZData:  return  PT_XYZ;
+    case cmsSigLabData:  return  PT_Lab;
+    case cmsSigLuvKData: return  PT_YUVK;
+    case cmsSigHsvData:  return  PT_HSV;
+    case cmsSigHlsData:  return  PT_HLS;
+    case cmsSigYxyData:  return  PT_Yxy;
+
+    case cmsSig1colorData:
+    case cmsSigMCH1Data: return PT_MCH1;
+
+    case cmsSig2colorData:
+    case cmsSigMCH2Data: return PT_MCH2;
+
+    case cmsSig3colorData:
+    case cmsSigMCH3Data: return PT_MCH3;
+
+    case cmsSig4colorData:
+    case cmsSigMCH4Data: return PT_MCH4;
+
+    case cmsSig5colorData:
+    case cmsSigMCH5Data: return PT_MCH5;
+
+    case cmsSig6colorData:
+    case cmsSigMCH6Data: return PT_MCH6;
+
+    case cmsSigMCH7Data:
+    case cmsSig7colorData:return PT_MCH7;
+
+    case cmsSigMCH8Data:
+    case cmsSig8colorData:return PT_MCH8;
+
+    case cmsSigMCH9Data:
+    case cmsSig9colorData:return PT_MCH9;
+
+    case cmsSigMCHAData:
+    case cmsSig10colorData:return PT_MCH10;
+
+    case cmsSigMCHBData:
+    case cmsSig11colorData:return PT_MCH11;
+
+    case cmsSigMCHCData:
+    case cmsSig12colorData:return PT_MCH12;
+
+    case cmsSigMCHDData:
+    case cmsSig13colorData:return PT_MCH13;
+
+    case cmsSigMCHEData:
+    case cmsSig14colorData:return PT_MCH14;
+
+    case cmsSigMCHFData:
+    case cmsSig15colorData:return PT_MCH15;
+
+    default:  return (cmsColorSpaceSignature) 0;
+    }
+}
+
+
+cmsUInt32Number CMSEXPORT cmsChannelsOf(cmsColorSpaceSignature ColorSpace)
+{
+    switch (ColorSpace) {
+
+    case cmsSigMCH1Data:
+    case cmsSig1colorData:
+    case cmsSigGrayData: return 1;
+
+    case cmsSigMCH2Data:
+    case cmsSig2colorData:  return 2;
+
+    case cmsSigXYZData:
+    case cmsSigLabData:
+    case cmsSigLuvData:
+    case cmsSigYCbCrData:
+    case cmsSigYxyData:
+    case cmsSigRgbData:
+    case cmsSigHsvData:
+    case cmsSigHlsData:
+    case cmsSigCmyData:
+    case cmsSigMCH3Data:
+    case cmsSig3colorData:  return 3;
+
+    case cmsSigLuvKData:
+    case cmsSigCmykData:
+    case cmsSigMCH4Data:
+    case cmsSig4colorData:  return 4;
+
+    case cmsSigMCH5Data:
+    case cmsSig5colorData:  return 5;
+
+    case cmsSigMCH6Data:
+    case cmsSig6colorData:  return 6;
+
+    case cmsSigMCH7Data:
+    case cmsSig7colorData:  return  7;
+
+    case cmsSigMCH8Data:
+    case cmsSig8colorData:  return  8;
+
+    case cmsSigMCH9Data:
+    case cmsSig9colorData:  return  9;
+
+    case cmsSigMCHAData:
+    case cmsSig10colorData: return 10;
+
+    case cmsSigMCHBData:
+    case cmsSig11colorData: return 11;
+
+    case cmsSigMCHCData:
+    case cmsSig12colorData: return 12;
+
+    case cmsSigMCHDData:
+    case cmsSig13colorData: return 13;
+
+    case cmsSigMCHEData:
+    case cmsSig14colorData: return 14;
+
+    case cmsSigMCHFData:
+    case cmsSig15colorData: return 15;
+
+    default: return 3;
+    }
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp
new file mode 100644
index 0000000000..d140dc0da5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsplugin.cpp
@@ -0,0 +1,992 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// ----------------------------------------------------------------------------------
+// Encoding & Decoding support functions
+// ----------------------------------------------------------------------------------
+
+//      Little-Endian to Big-Endian
+
+// Adjust a word value after being read/ before being written from/to an ICC profile
+cmsUInt16Number CMSEXPORT  _cmsAdjustEndianess16(cmsUInt16Number Word)
+{
+#ifndef CMS_USE_BIG_ENDIAN
+
+    cmsUInt8Number* pByte = (cmsUInt8Number*) &Word;
+    cmsUInt8Number tmp;
+
+    tmp = pByte[0];
+    pByte[0] = pByte[1];
+    pByte[1] = tmp;
+#endif
+
+    return Word;
+}
+
+
+// Transports to properly encoded values - note that icc profiles does use big endian notation.
+
+// 1 2 3 4
+// 4 3 2 1
+
+cmsUInt32Number CMSEXPORT  _cmsAdjustEndianess32(cmsUInt32Number DWord)
+{
+#ifndef CMS_USE_BIG_ENDIAN
+
+    cmsUInt8Number* pByte = (cmsUInt8Number*) &DWord;
+    cmsUInt8Number temp1;
+    cmsUInt8Number temp2;
+
+    temp1 = *pByte++;
+    temp2 = *pByte++;
+    *(pByte-1) = *pByte;
+    *pByte++ = temp2;
+    *(pByte-3) = *pByte;
+    *pByte = temp1;
+#endif
+    return DWord;
+}
+
+// 1 2 3 4 5 6 7 8
+// 8 7 6 5 4 3 2 1
+
+void CMSEXPORT  _cmsAdjustEndianess64(cmsUInt64Number* Result, cmsUInt64Number* QWord)
+{
+
+#ifndef CMS_USE_BIG_ENDIAN
+
+    cmsUInt8Number* pIn  = (cmsUInt8Number*) QWord;
+    cmsUInt8Number* pOut = (cmsUInt8Number*) Result;
+
+    _cmsAssert(Result != NULL);
+
+    pOut[7] = pIn[0];
+    pOut[6] = pIn[1];
+    pOut[5] = pIn[2];
+    pOut[4] = pIn[3];
+    pOut[3] = pIn[4];
+    pOut[2] = pIn[5];
+    pOut[1] = pIn[6];
+    pOut[0] = pIn[7];
+
+#else
+    _cmsAssert(Result != NULL);
+
+#  ifdef CMS_DONT_USE_INT64
+    (*Result)[0] = (*QWord)[0];
+    (*Result)[1] = (*QWord)[1];
+#  else
+    *Result = *QWord;
+#  endif
+#endif
+}
+
+// Auxiliary -- read 8, 16 and 32-bit numbers
+cmsBool CMSEXPORT  _cmsReadUInt8Number(cmsIOHANDLER* io, cmsUInt8Number* n)
+{
+    cmsUInt8Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt8Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) *n = tmp;
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadUInt16Number(cmsIOHANDLER* io, cmsUInt16Number* n)
+{
+    cmsUInt16Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt16Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) *n = _cmsAdjustEndianess16(tmp);
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, cmsUInt16Number* Array)
+{
+    cmsUInt32Number i;
+
+    _cmsAssert(io != NULL);
+
+    for (i=0; i < n; i++) {
+
+        if (Array != NULL) {
+            if (!_cmsReadUInt16Number(io, Array + i)) return FALSE;
+        }
+        else {
+            if (!_cmsReadUInt16Number(io, NULL)) return FALSE;
+        }
+
+    }
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadUInt32Number(cmsIOHANDLER* io, cmsUInt32Number* n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt32Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) *n = _cmsAdjustEndianess32(tmp);
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadFloat32Number(cmsIOHANDLER* io, cmsFloat32Number* n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io->Read(io, &tmp, sizeof(cmsUInt32Number), 1) != 1)
+        return FALSE;
+
+    if (n != NULL) {
+
+        tmp = _cmsAdjustEndianess32(tmp);
+        *n = *(cmsFloat32Number*)(void*)&tmp;
+        
+        // Safeguard which covers against absurd values
+        if (*n > 1E+20 || *n < -1E+20) return FALSE;
+
+        #if defined(_MSC_VER) && _MSC_VER < 1800
+           return TRUE;
+        #elif defined (__BORLANDC__)
+           return TRUE;
+        #else
+
+           // fpclassify() required by C99 (only provided by MSVC >= 1800, VS2013 onwards)
+           return ((fpclassify(*n) == FP_ZERO) || (fpclassify(*n) == FP_NORMAL));
+        #endif        
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT   _cmsReadUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n)
+{
+    cmsUInt64Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt64Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) {
+
+        _cmsAdjustEndianess64(n, &tmp);
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT  _cmsRead15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number* n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt32Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) {
+        *n = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32(tmp));
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT  _cmsReadXYZNumber(cmsIOHANDLER* io, cmsCIEXYZ* XYZ)
+{
+    cmsEncodedXYZNumber xyz;
+
+    _cmsAssert(io != NULL);
+
+    if (io ->Read(io, &xyz, sizeof(cmsEncodedXYZNumber), 1) != 1) return FALSE;
+
+    if (XYZ != NULL) {
+
+        XYZ->X = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) xyz.X));
+        XYZ->Y = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) xyz.Y));
+        XYZ->Z = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) xyz.Z));
+    }
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt8Number(cmsIOHANDLER* io, cmsUInt8Number n)
+{
+    _cmsAssert(io != NULL);
+
+    if (io -> Write(io, sizeof(cmsUInt8Number), &n) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt16Number(cmsIOHANDLER* io, cmsUInt16Number n)
+{
+    cmsUInt16Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = _cmsAdjustEndianess16(n);
+    if (io -> Write(io, sizeof(cmsUInt16Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, const cmsUInt16Number* Array)
+{
+    cmsUInt32Number i;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(Array != NULL);
+
+    for (i=0; i < n; i++) {
+        if (!_cmsWriteUInt16Number(io, Array[i])) return FALSE;
+    }
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt32Number(cmsIOHANDLER* io, cmsUInt32Number n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = _cmsAdjustEndianess32(n);
+    if (io -> Write(io, sizeof(cmsUInt32Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT  _cmsWriteFloat32Number(cmsIOHANDLER* io, cmsFloat32Number n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = *(cmsUInt32Number*) (void*) &n;
+    tmp = _cmsAdjustEndianess32(tmp);
+    if (io -> Write(io, sizeof(cmsUInt32Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n)
+{
+    cmsUInt64Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    _cmsAdjustEndianess64(&tmp, n);
+    if (io -> Write(io, sizeof(cmsUInt64Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWrite15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(n));
+    if (io -> Write(io, sizeof(cmsUInt32Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteXYZNumber(cmsIOHANDLER* io, const cmsCIEXYZ* XYZ)
+{
+    cmsEncodedXYZNumber xyz;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(XYZ != NULL);
+
+    xyz.X = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(XYZ->X));
+    xyz.Y = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(XYZ->Y));
+    xyz.Z = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(XYZ->Z));
+
+    return io -> Write(io,  sizeof(cmsEncodedXYZNumber), &xyz);
+}
+
+// from Fixed point 8.8 to double
+cmsFloat64Number CMSEXPORT _cms8Fixed8toDouble(cmsUInt16Number fixed8)
+{
+       cmsUInt8Number  msb, lsb;
+
+       lsb = (cmsUInt8Number) (fixed8 & 0xff);
+       msb = (cmsUInt8Number) (((cmsUInt16Number) fixed8 >> 8) & 0xff);
+
+       return (cmsFloat64Number) ((cmsFloat64Number) msb + ((cmsFloat64Number) lsb / 256.0));
+}
+
+cmsUInt16Number CMSEXPORT _cmsDoubleTo8Fixed8(cmsFloat64Number val)
+{
+    cmsS15Fixed16Number GammaFixed32 = _cmsDoubleTo15Fixed16(val);
+    return  (cmsUInt16Number) ((GammaFixed32 >> 8) & 0xFFFF);
+}
+
+// from Fixed point 15.16 to double
+cmsFloat64Number CMSEXPORT _cms15Fixed16toDouble(cmsS15Fixed16Number fix32)
+{
+    cmsFloat64Number floater, sign, mid;
+    int Whole, FracPart;
+
+    sign  = (fix32 < 0 ? -1 : 1);
+    fix32 = abs(fix32);
+
+    Whole     = (cmsUInt16Number)(fix32 >> 16) & 0xffff;
+    FracPart  = (cmsUInt16Number)(fix32 & 0xffff);
+
+    mid     = (cmsFloat64Number) FracPart / 65536.0;
+    floater = (cmsFloat64Number) Whole + mid;
+
+    return sign * floater;
+}
+
+// from double to Fixed point 15.16
+cmsS15Fixed16Number CMSEXPORT _cmsDoubleTo15Fixed16(cmsFloat64Number v)
+{
+    return ((cmsS15Fixed16Number) floor((v)*65536.0 + 0.5));
+}
+
+// Date/Time functions
+
+void CMSEXPORT _cmsDecodeDateTimeNumber(const cmsDateTimeNumber *Source, struct tm *Dest)
+{
+
+    _cmsAssert(Dest != NULL);
+    _cmsAssert(Source != NULL);
+
+    Dest->tm_sec   = _cmsAdjustEndianess16(Source->seconds);
+    Dest->tm_min   = _cmsAdjustEndianess16(Source->minutes);
+    Dest->tm_hour  = _cmsAdjustEndianess16(Source->hours);
+    Dest->tm_mday  = _cmsAdjustEndianess16(Source->day);
+    Dest->tm_mon   = _cmsAdjustEndianess16(Source->month) - 1;
+    Dest->tm_year  = _cmsAdjustEndianess16(Source->year) - 1900;
+    Dest->tm_wday  = -1;
+    Dest->tm_yday  = -1;
+    Dest->tm_isdst = 0;
+}
+
+void CMSEXPORT _cmsEncodeDateTimeNumber(cmsDateTimeNumber *Dest, const struct tm *Source)
+{
+    _cmsAssert(Dest != NULL);
+    _cmsAssert(Source != NULL);
+
+    Dest->seconds = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_sec);
+    Dest->minutes = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_min);
+    Dest->hours   = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_hour);
+    Dest->day     = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_mday);
+    Dest->month   = _cmsAdjustEndianess16((cmsUInt16Number) (Source->tm_mon + 1));
+    Dest->year    = _cmsAdjustEndianess16((cmsUInt16Number) (Source->tm_year + 1900));
+}
+
+// Read base and return type base
+cmsTagTypeSignature CMSEXPORT _cmsReadTypeBase(cmsIOHANDLER* io)
+{
+    _cmsTagBase Base;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &Base, sizeof(_cmsTagBase), 1) != 1)
+        return (cmsTagTypeSignature) 0;
+
+    return (cmsTagTypeSignature) _cmsAdjustEndianess32(Base.sig);
+}
+
+// Setup base marker
+cmsBool  CMSEXPORT _cmsWriteTypeBase(cmsIOHANDLER* io, cmsTagTypeSignature sig)
+{
+    _cmsTagBase  Base;
+
+    _cmsAssert(io != NULL);
+
+    Base.sig = (cmsTagTypeSignature) _cmsAdjustEndianess32(sig);
+    memset(&Base.reserved, 0, sizeof(Base.reserved));
+    return io -> Write(io, sizeof(_cmsTagBase), &Base);
+}
+
+cmsBool CMSEXPORT _cmsReadAlignment(cmsIOHANDLER* io)
+{
+    cmsUInt8Number  Buffer[4];
+    cmsUInt32Number NextAligned, At;
+    cmsUInt32Number BytesToNextAlignedPos;
+
+    _cmsAssert(io != NULL);
+
+    At = io -> Tell(io);
+    NextAligned = _cmsALIGNLONG(At);
+    BytesToNextAlignedPos = NextAligned - At;
+    if (BytesToNextAlignedPos == 0) return TRUE;
+    if (BytesToNextAlignedPos > 4)  return FALSE;
+
+    return (io ->Read(io, Buffer, BytesToNextAlignedPos, 1) == 1);
+}
+
+cmsBool CMSEXPORT _cmsWriteAlignment(cmsIOHANDLER* io)
+{
+    cmsUInt8Number  Buffer[4];
+    cmsUInt32Number NextAligned, At;
+    cmsUInt32Number BytesToNextAlignedPos;
+
+    _cmsAssert(io != NULL);
+
+    At = io -> Tell(io);
+    NextAligned = _cmsALIGNLONG(At);
+    BytesToNextAlignedPos = NextAligned - At;
+    if (BytesToNextAlignedPos == 0) return TRUE;
+    if (BytesToNextAlignedPos > 4)  return FALSE;
+
+    memset(Buffer, 0, BytesToNextAlignedPos);
+    return io -> Write(io, BytesToNextAlignedPos, Buffer);
+}
+
+
+// To deal with text streams. 2K at most
+cmsBool CMSEXPORT _cmsIOPrintf(cmsIOHANDLER* io, const char* frm, ...)
+{
+    va_list args;
+    int len;
+    cmsUInt8Number Buffer[2048];
+    cmsBool rc;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(frm != NULL);
+
+    va_start(args, frm);
+
+    len = vsnprintf((char*) Buffer, 2047, frm, args);
+    if (len < 0) {
+        va_end(args);
+        return FALSE;   // Truncated, which is a fatal error for us
+    }
+
+    rc = io ->Write(io, (cmsUInt32Number) len, Buffer);
+
+    va_end(args);
+
+    return rc;
+}
+
+
+// Plugin memory management -------------------------------------------------------------------------------------------------
+
+// Specialized malloc for plug-ins, that is freed upon exit.
+void* _cmsPluginMalloc(cmsContext ContextID, cmsUInt32Number size)
+{
+    struct _cmsContext_struct* ctx = _cmsGetContext(ContextID);
+
+    if (ctx ->MemPool == NULL) {
+
+        if (ContextID == NULL) {
+
+            ctx->MemPool = _cmsCreateSubAlloc(0, 2*1024);
+            if (ctx->MemPool == NULL) return NULL;
+        }
+        else {
+            cmsSignalError(ContextID, cmsERROR_CORRUPTION_DETECTED, "NULL memory pool on context");
+            return NULL;
+        }
+    }
+
+    return _cmsSubAlloc(ctx->MemPool, size);
+}
+
+
+// Main plug-in dispatcher
+cmsBool CMSEXPORT cmsPlugin(void* Plug_in)
+{
+    return cmsPluginTHR(NULL, Plug_in);
+}
+
+cmsBool CMSEXPORT cmsPluginTHR(cmsContext id, void* Plug_in)
+{
+    cmsPluginBase* Plugin;
+
+    for (Plugin = (cmsPluginBase*) Plug_in;
+         Plugin != NULL;
+         Plugin = Plugin -> Next) {
+
+            if (Plugin -> Magic != cmsPluginMagicNumber) {
+                cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "Unrecognized plugin");
+                return FALSE;
+            }
+
+            if (Plugin ->ExpectedVersion > LCMS_VERSION) {
+                cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "plugin needs Little CMS %d, current version is %d",
+                    Plugin ->ExpectedVersion, LCMS_VERSION);
+                return FALSE;
+            }
+
+            switch (Plugin -> Type) {
+
+                case cmsPluginMemHandlerSig:
+                    if (!_cmsRegisterMemHandlerPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginInterpolationSig:
+                    if (!_cmsRegisterInterpPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginTagTypeSig:
+                    if (!_cmsRegisterTagTypePlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginTagSig:
+                    if (!_cmsRegisterTagPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginFormattersSig:
+                    if (!_cmsRegisterFormattersPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginRenderingIntentSig:
+                    if (!_cmsRegisterRenderingIntentPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginParametricCurveSig:
+                    if (!_cmsRegisterParametricCurvesPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginMultiProcessElementSig:
+                    if (!_cmsRegisterMultiProcessElementPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginOptimizationSig:
+                    if (!_cmsRegisterOptimizationPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginTransformSig:
+                    if (!_cmsRegisterTransformPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginMutexSig:
+                    if (!_cmsRegisterMutexPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                default:
+                    cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "Unrecognized plugin type '%X'", Plugin -> Type);
+                    return FALSE;
+            }
+    }
+
+    // Keep a reference to the plug-in
+    return TRUE;
+}
+
+
+// Revert all plug-ins to default
+void CMSEXPORT cmsUnregisterPlugins(void)
+{
+    cmsUnregisterPluginsTHR(NULL);
+}
+
+
+// The Global storage for system context. This is the one and only global variable
+// pointers structure. All global vars are referenced here.
+static struct _cmsContext_struct globalContext = {
+
+    NULL,                              // Not in the linked list
+    NULL,                              // No suballocator
+    {
+        NULL,                          //  UserPtr,            
+        &_cmsLogErrorChunk,            //  Logger,
+        &_cmsAlarmCodesChunk,          //  AlarmCodes,
+        &_cmsAdaptationStateChunk,     //  AdaptationState, 
+        &_cmsMemPluginChunk,           //  MemPlugin,
+        &_cmsInterpPluginChunk,        //  InterpPlugin,
+        &_cmsCurvesPluginChunk,        //  CurvesPlugin,
+        &_cmsFormattersPluginChunk,    //  FormattersPlugin,
+        &_cmsTagTypePluginChunk,       //  TagTypePlugin,
+        &_cmsTagPluginChunk,           //  TagPlugin,
+        &_cmsIntentsPluginChunk,       //  IntentPlugin,
+        &_cmsMPETypePluginChunk,       //  MPEPlugin,
+        &_cmsOptimizationPluginChunk,  //  OptimizationPlugin,
+        &_cmsTransformPluginChunk,     //  TransformPlugin,
+        &_cmsMutexPluginChunk          //  MutexPlugin
+    },
+    
+    { NULL, NULL, NULL, NULL, NULL, NULL } // The default memory allocator is not used for context 0
+};
+
+
+// The context pool (linked list head)
+static _cmsMutex _cmsContextPoolHeadMutex = CMS_MUTEX_INITIALIZER;
+static struct _cmsContext_struct* _cmsContextPoolHead = NULL;
+
+// Internal, get associated pointer, with guessing. Never returns NULL.
+struct _cmsContext_struct* _cmsGetContext(cmsContext ContextID)
+{
+    struct _cmsContext_struct* id = (struct _cmsContext_struct*) ContextID;
+    struct _cmsContext_struct* ctx;
+
+
+    // On 0, use global settings
+    if (id == NULL) 
+        return &globalContext;
+
+    // Search
+    for (ctx = _cmsContextPoolHead;
+         ctx != NULL;
+         ctx = ctx ->Next) {
+
+            // Found it?
+            if (id == ctx)
+                return ctx; // New-style context, 
+    }
+
+    return &globalContext;
+}
+
+
+// Internal: get the memory area associanted with each context client
+// Returns the block assigned to the specific zone. Never return NULL.
+void* _cmsContextGetClientChunk(cmsContext ContextID, _cmsMemoryClient mc)
+{
+    struct _cmsContext_struct* ctx;
+    void *ptr;
+
+    if ((int) mc < 0 || mc >= MemoryClientMax) {
+        
+           cmsSignalError(ContextID, cmsERROR_INTERNAL, "Bad context client -- possible corruption");
+
+           // This is catastrophic. Should never reach here
+           _cmsAssert(0);
+
+           // Reverts to global context
+           return globalContext.chunks[UserPtr];
+    }
+    
+    ctx = _cmsGetContext(ContextID);
+    ptr = ctx ->chunks[mc];
+
+    if (ptr != NULL)
+        return ptr;
+
+    // A null ptr means no special settings for that context, and this 
+    // reverts to Context0 globals
+    return globalContext.chunks[mc];    
+}
+
+
+// This function returns the given context its default pristine state,
+// as no plug-ins were declared. There is no way to unregister a single 
+// plug-in, as a single call to cmsPluginTHR() function may register 
+// many different plug-ins simultaneously, then there is no way to 
+// identify which plug-in to unregister.
+void CMSEXPORT cmsUnregisterPluginsTHR(cmsContext ContextID)
+{
+    _cmsRegisterMemHandlerPlugin(ContextID, NULL);
+    _cmsRegisterInterpPlugin(ContextID, NULL);
+    _cmsRegisterTagTypePlugin(ContextID, NULL);
+    _cmsRegisterTagPlugin(ContextID, NULL);
+    _cmsRegisterFormattersPlugin(ContextID, NULL);
+    _cmsRegisterRenderingIntentPlugin(ContextID, NULL);
+    _cmsRegisterParametricCurvesPlugin(ContextID, NULL);
+    _cmsRegisterMultiProcessElementPlugin(ContextID, NULL);
+    _cmsRegisterOptimizationPlugin(ContextID, NULL);
+    _cmsRegisterTransformPlugin(ContextID, NULL);    
+    _cmsRegisterMutexPlugin(ContextID, NULL);
+}
+
+
+// Returns the memory manager plug-in, if any, from the Plug-in bundle
+static
+cmsPluginMemHandler* _cmsFindMemoryPlugin(void* PluginBundle)
+{
+    cmsPluginBase* Plugin;
+
+    for (Plugin = (cmsPluginBase*) PluginBundle;
+        Plugin != NULL;
+        Plugin = Plugin -> Next) {
+
+            if (Plugin -> Magic == cmsPluginMagicNumber && 
+                Plugin -> ExpectedVersion <= LCMS_VERSION && 
+                Plugin -> Type == cmsPluginMemHandlerSig) {
+
+                    // Found!
+                    return (cmsPluginMemHandler*) Plugin;  
+            }
+    }
+
+    // Nope, revert to defaults 
+    return NULL;
+}
+
+
+// Creates a new context with optional associated plug-ins. Caller may also specify an optional pointer to user-defined 
+// data that will be forwarded to plug-ins and logger.
+cmsContext CMSEXPORT cmsCreateContext(void* Plugin, void* UserData)
+{
+    struct _cmsContext_struct* ctx;
+    struct _cmsContext_struct  fakeContext;
+        
+    // See the comments regarding locking in lcms2_internal.h
+    // for an explanation of why we need the following code.
+#ifdef CMS_IS_WINDOWS_
+#ifndef CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+    {
+        static HANDLE _cmsWindowsInitMutex = NULL;
+        static volatile HANDLE* mutex = &_cmsWindowsInitMutex;
+
+        if (*mutex == NULL)
+        {
+            HANDLE p = CreateMutex(NULL, FALSE, NULL);
+            if (p && InterlockedCompareExchangePointer((void **)mutex, (void*)p, NULL) != NULL)
+                CloseHandle(p);
+        }
+        if (*mutex == NULL || WaitForSingleObject(*mutex, INFINITE) == WAIT_FAILED)
+            return NULL;
+        if (((void **)&_cmsContextPoolHeadMutex)[0] == NULL)
+            InitializeCriticalSection(&_cmsContextPoolHeadMutex);
+        if (*mutex == NULL || !ReleaseMutex(*mutex))
+            return NULL;
+    }
+#endif
+#endif
+
+    _cmsInstallAllocFunctions(_cmsFindMemoryPlugin(Plugin), &fakeContext.DefaultMemoryManager);
+    
+    fakeContext.chunks[UserPtr]     = UserData;
+    fakeContext.chunks[MemPlugin]   = &fakeContext.DefaultMemoryManager;
+
+    // Create the context structure.
+    ctx = (struct _cmsContext_struct*) _cmsMalloc(&fakeContext, sizeof(struct _cmsContext_struct));
+    if (ctx == NULL)   
+        return NULL;     // Something very wrong happened!
+
+    // Init the structure and the memory manager
+    memset(ctx, 0, sizeof(struct _cmsContext_struct));
+
+    // Keep memory manager
+    memcpy(&ctx->DefaultMemoryManager, &fakeContext.DefaultMemoryManager, sizeof(_cmsMemPluginChunk)); 
+   
+    // Maintain the linked list (with proper locking)
+    _cmsEnterCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+       ctx ->Next = _cmsContextPoolHead;
+       _cmsContextPoolHead = ctx;
+    _cmsLeaveCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+
+    ctx ->chunks[UserPtr]     = UserData;
+    ctx ->chunks[MemPlugin]   = &ctx->DefaultMemoryManager;
+   
+    // Now we can allocate the pool by using default memory manager
+    ctx ->MemPool = _cmsCreateSubAlloc(ctx, 22 * sizeof(void*));  // default size about 22 pointers
+    if (ctx ->MemPool == NULL) {
+
+         cmsDeleteContext(ctx);
+        return NULL;
+    }
+
+    _cmsAllocLogErrorChunk(ctx, NULL);
+    _cmsAllocAlarmCodesChunk(ctx, NULL);
+    _cmsAllocAdaptationStateChunk(ctx, NULL);
+    _cmsAllocMemPluginChunk(ctx, NULL);
+    _cmsAllocInterpPluginChunk(ctx, NULL);
+    _cmsAllocCurvesPluginChunk(ctx, NULL);
+    _cmsAllocFormattersPluginChunk(ctx, NULL);
+    _cmsAllocTagTypePluginChunk(ctx, NULL);
+    _cmsAllocMPETypePluginChunk(ctx, NULL);
+    _cmsAllocTagPluginChunk(ctx, NULL);
+    _cmsAllocIntentsPluginChunk(ctx, NULL);
+    _cmsAllocOptimizationPluginChunk(ctx, NULL);
+    _cmsAllocTransformPluginChunk(ctx, NULL);
+    _cmsAllocMutexPluginChunk(ctx, NULL);
+
+    // Setup the plug-ins
+    if (!cmsPluginTHR(ctx, Plugin)) {
+    
+        cmsDeleteContext(ctx);
+        return NULL;
+    }
+
+    return (cmsContext) ctx;  
+}
+
+// Duplicates a context with all associated plug-ins. 
+// Caller may specify an optional pointer to user-defined 
+// data that will be forwarded to plug-ins and logger. 
+cmsContext CMSEXPORT cmsDupContext(cmsContext ContextID, void* NewUserData)
+{
+    int i;
+    struct _cmsContext_struct* ctx;
+    const struct _cmsContext_struct* src = _cmsGetContext(ContextID);
+
+    void* userData = (NewUserData != NULL) ? NewUserData : src -> chunks[UserPtr];
+    
+    
+    ctx = (struct _cmsContext_struct*) _cmsMalloc(ContextID, sizeof(struct _cmsContext_struct));
+    if (ctx == NULL)   
+        return NULL;     // Something very wrong happened
+
+    // Setup default memory allocators
+    memcpy(&ctx->DefaultMemoryManager, &src->DefaultMemoryManager, sizeof(ctx->DefaultMemoryManager));
+
+    // Maintain the linked list
+    _cmsEnterCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+       ctx ->Next = _cmsContextPoolHead;
+       _cmsContextPoolHead = ctx;
+    _cmsLeaveCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+
+    ctx ->chunks[UserPtr]    = userData;
+    ctx ->chunks[MemPlugin]  = &ctx->DefaultMemoryManager;
+
+    ctx ->MemPool = _cmsCreateSubAlloc(ctx, 22 * sizeof(void*));
+    if (ctx ->MemPool == NULL) {
+
+         cmsDeleteContext(ctx);
+        return NULL;
+    }
+
+    // Allocate all required chunks.
+    _cmsAllocLogErrorChunk(ctx, src);
+    _cmsAllocAlarmCodesChunk(ctx, src);
+    _cmsAllocAdaptationStateChunk(ctx, src);
+    _cmsAllocMemPluginChunk(ctx, src);
+    _cmsAllocInterpPluginChunk(ctx, src);
+    _cmsAllocCurvesPluginChunk(ctx, src);
+    _cmsAllocFormattersPluginChunk(ctx, src);
+    _cmsAllocTagTypePluginChunk(ctx, src);
+    _cmsAllocMPETypePluginChunk(ctx, src);
+    _cmsAllocTagPluginChunk(ctx, src);
+    _cmsAllocIntentsPluginChunk(ctx, src);
+    _cmsAllocOptimizationPluginChunk(ctx, src);
+    _cmsAllocTransformPluginChunk(ctx, src);
+    _cmsAllocMutexPluginChunk(ctx, src);
+
+    // Make sure no one failed
+    for (i=Logger; i < MemoryClientMax; i++) {
+
+        if (src ->chunks[i] == NULL) {
+            cmsDeleteContext((cmsContext) ctx);
+            return NULL;
+        }
+    }
+
+    return (cmsContext) ctx;
+}
+
+
+/*
+static
+struct _cmsContext_struct* FindPrev(struct _cmsContext_struct* id)
+{
+    struct _cmsContext_struct* prev;
+
+    // Search for previous
+    for (prev = _cmsContextPoolHead; 
+             prev != NULL;
+             prev = prev ->Next)
+    {
+        if (prev ->Next == id)
+            return prev;
+    }
+
+    return NULL;  // List is empty or only one element!
+}
+*/
+
+// Frees any resources associated with the given context, 
+// and destroys the context placeholder. 
+// The ContextID can no longer be used in any THR operation.  
+void CMSEXPORT cmsDeleteContext(cmsContext ContextID)
+{
+    if (ContextID != NULL) {
+
+        struct _cmsContext_struct* ctx = (struct _cmsContext_struct*) ContextID;              
+        struct _cmsContext_struct  fakeContext;  
+        struct _cmsContext_struct* prev;
+
+        memcpy(&fakeContext.DefaultMemoryManager, &ctx->DefaultMemoryManager, sizeof(ctx->DefaultMemoryManager));
+
+        fakeContext.chunks[UserPtr]     = ctx ->chunks[UserPtr];
+        fakeContext.chunks[MemPlugin]   = &fakeContext.DefaultMemoryManager;
+
+        // Get rid of plugins
+        cmsUnregisterPluginsTHR(ContextID); 
+
+        // Since all memory is allocated in the private pool, all what we need to do is destroy the pool
+        if (ctx -> MemPool != NULL)
+              _cmsSubAllocDestroy(ctx ->MemPool);
+        ctx -> MemPool = NULL;
+
+        // Maintain list
+        _cmsEnterCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+        if (_cmsContextPoolHead == ctx) { 
+
+            _cmsContextPoolHead = ctx->Next;
+        }
+        else {
+
+            // Search for previous
+            for (prev = _cmsContextPoolHead; 
+                 prev != NULL;
+                 prev = prev ->Next)
+            {
+                if (prev -> Next == ctx) {
+                    prev -> Next = ctx ->Next;
+                    break;
+                }
+            }
+        }
+        _cmsLeaveCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+
+        // free the memory block itself
+        _cmsFree(&fakeContext, ctx);
+    }
+}
+
+// Returns the user data associated to the given ContextID, or NULL if no user data was attached on context creation
+void* CMSEXPORT cmsGetContextUserData(cmsContext ContextID)
+{
+    return _cmsContextGetClientChunk(ContextID, UserPtr);
+}
+
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp
new file mode 100644
index 0000000000..deab55d11f
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsps2.cpp
@@ -0,0 +1,1597 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// PostScript ColorRenderingDictionary and ColorSpaceArray
+
+
+#define MAXPSCOLS   60      // Columns on tables
+
+/*
+    Implementation
+    --------------
+
+  PostScript does use XYZ as its internal PCS. But since PostScript
+  interpolation tables are limited to 8 bits, I use Lab as a way to
+  improve the accuracy, favoring perceptual results. So, for the creation
+  of each CRD, CSA the profiles are converted to Lab via a device
+  link between  profile -> Lab or Lab -> profile. The PS code necessary to
+  convert Lab <-> XYZ is also included.
+
+
+
+  Color Space Arrays (CSA)
+  ==================================================================================
+
+  In order to obtain precision, code chooses between three ways to implement
+  the device -> XYZ transform. These cases identifies monochrome profiles (often
+  implemented as a set of curves), matrix-shaper and Pipeline-based.
+
+  Monochrome
+  -----------
+
+  This is implemented as /CIEBasedA CSA. The prelinearization curve is
+  placed into /DecodeA section, and matrix equals to D50. Since here is
+  no interpolation tables, I do the conversion directly to XYZ
+
+  NOTE: CLUT-based monochrome profiles are NOT supported. So, cmsFLAGS_MATRIXINPUT
+  flag is forced on such profiles.
+
+    [ /CIEBasedA
+      <<
+            /DecodeA { transfer function } bind
+            /MatrixA [D50]
+            /RangeLMN [ 0.0 cmsD50X 0.0 cmsD50Y 0.0 cmsD50Z ]
+            /WhitePoint [D50]
+            /BlackPoint [BP]
+            /RenderingIntent (intent)
+      >>
+    ]
+
+   On simpler profiles, the PCS is already XYZ, so no conversion is required.
+
+
+   Matrix-shaper based
+   -------------------
+
+   This is implemented both with /CIEBasedABC or /CIEBasedDEF depending on the
+   profile implementation. Since here there are no interpolation tables, I do
+   the conversion directly to XYZ
+
+
+
+    [ /CIEBasedABC
+            <<
+                /DecodeABC [ {transfer1} {transfer2} {transfer3} ]
+                /MatrixABC [Matrix]
+                /RangeLMN [ 0.0 cmsD50X 0.0 cmsD50Y 0.0 cmsD50Z ]
+                /DecodeLMN [ { / 2} dup dup ]
+                /WhitePoint [D50]
+                /BlackPoint [BP]
+                /RenderingIntent (intent)
+            >>
+    ]
+
+
+    CLUT based
+    ----------
+
+     Lab is used in such cases.
+
+    [ /CIEBasedDEF
+            <<
+            /DecodeDEF [ <prelinearization> ]
+            /Table [ p p p [<...>]]
+            /RangeABC [ 0 1 0 1 0 1]
+            /DecodeABC[ <postlinearization> ]
+            /RangeLMN [ -0.236 1.254 0 1 -0.635 1.640 ]
+               % -128/500 1+127/500 0 1  -127/200 1+128/200
+            /MatrixABC [ 1 1 1 1 0 0 0 0 -1]
+            /WhitePoint [D50]
+            /BlackPoint [BP]
+            /RenderingIntent (intent)
+    ]
+
+
+  Color Rendering Dictionaries (CRD)
+  ==================================
+  These are always implemented as CLUT, and always are using Lab. Since CRD are expected to
+  be used as resources, the code adds the definition as well.
+
+  <<
+    /ColorRenderingType 1
+    /WhitePoint [ D50 ]
+    /BlackPoint [BP]
+    /MatrixPQR [ Bradford ]
+    /RangePQR [-0.125 1.375 -0.125 1.375 -0.125 1.375 ]
+    /TransformPQR [
+    {4 index 3 get div 2 index 3 get mul exch pop exch pop exch pop exch pop } bind
+    {4 index 4 get div 2 index 4 get mul exch pop exch pop exch pop exch pop } bind
+    {4 index 5 get div 2 index 5 get mul exch pop exch pop exch pop exch pop } bind
+    ]
+    /MatrixABC <...>
+    /EncodeABC <...>
+    /RangeABC  <.. used for  XYZ -> Lab>
+    /EncodeLMN
+    /RenderTable [ p p p [<...>]]
+
+    /RenderingIntent (Perceptual)
+  >>
+  /Current exch /ColorRendering defineresource pop
+
+
+  The following stages are used to convert from XYZ to Lab
+  --------------------------------------------------------
+
+  Input is given at LMN stage on X, Y, Z
+
+  Encode LMN gives us f(X/Xn), f(Y/Yn), f(Z/Zn)
+
+  /EncodeLMN [
+
+    { 0.964200  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind
+    { 1.000000  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind
+    { 0.824900  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind
+
+    ]
+
+
+  MatrixABC is used to compute f(Y/Yn), f(X/Xn) - f(Y/Yn), f(Y/Yn) - f(Z/Zn)
+
+  | 0  1  0|
+  | 1 -1  0|
+  | 0  1 -1|
+
+  /MatrixABC [ 0 1 0 1 -1 1 0 0 -1 ]
+
+ EncodeABC finally gives Lab values.
+
+  /EncodeABC [
+    { 116 mul  16 sub 100 div  } bind
+    { 500 mul 128 add 255 div  } bind
+    { 200 mul 128 add 255 div  } bind
+    ]
+
+  The following stages are used to convert Lab to XYZ
+  ----------------------------------------------------
+
+    /RangeABC [ 0 1 0 1 0 1]
+    /DecodeABC [ { 100 mul 16 add 116 div } bind
+                 { 255 mul 128 sub 500 div } bind
+                 { 255 mul 128 sub 200 div } bind
+               ]
+
+    /MatrixABC [ 1 1 1 1 0 0 0 0 -1]
+    /DecodeLMN [
+                {dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.964200 mul} bind
+                {dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse } bind
+                {dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.824900 mul} bind
+                ]
+
+
+*/
+
+/*
+
+ PostScript algorithms discussion.
+ =========================================================================================================
+
+  1D interpolation algorithm
+
+
+  1D interpolation (float)
+  ------------------------
+
+    val2 = Domain * Value;
+
+    cell0 = (int) floor(val2);
+    cell1 = (int) ceil(val2);
+
+    rest = val2 - cell0;
+
+    y0 = LutTable[cell0] ;
+    y1 = LutTable[cell1] ;
+
+    y = y0 + (y1 - y0) * rest;
+
+
+
+  PostScript code                   Stack
+  ================================================
+
+  {                                 % v
+    <check 0..1.0>
+    [array]                         % v tab
+    dup                             % v tab tab
+    length 1 sub                    % v tab dom
+
+    3 -1 roll                       % tab dom v
+
+    mul                             % tab val2
+    dup                             % tab val2 val2
+    dup                             % tab val2 val2 val2
+    floor cvi                       % tab val2 val2 cell0
+    exch                            % tab val2 cell0 val2
+    ceiling cvi                     % tab val2 cell0 cell1
+
+    3 index                         % tab val2 cell0 cell1 tab
+    exch                            % tab val2 cell0 tab cell1
+    get                             % tab val2 cell0 y1
+
+    4 -1 roll                       % val2 cell0 y1 tab
+    3 -1 roll                       % val2 y1 tab cell0
+    get                             % val2 y1 y0
+
+    dup                             % val2 y1 y0 y0
+    3 1 roll                        % val2 y0 y1 y0
+
+    sub                             % val2 y0 (y1-y0)
+    3 -1 roll                       % y0 (y1-y0) val2
+    dup                             % y0 (y1-y0) val2 val2
+    floor cvi                       % y0 (y1-y0) val2 floor(val2)
+    sub                             % y0 (y1-y0) rest
+    mul                             % y0 t1
+    add                             % y
+    65535 div                       % result
+
+  } bind
+
+
+*/
+
+
+// This struct holds the memory block currently being write
+typedef struct {
+    _cmsStageCLutData* Pipeline;
+    cmsIOHANDLER* m;
+
+    int FirstComponent;
+    int SecondComponent;
+
+    const char* PreMaj;
+    const char* PostMaj;
+    const char* PreMin;
+    const char* PostMin;
+
+    int  FixWhite;    // Force mapping of pure white
+
+    cmsColorSpaceSignature  ColorSpace;  // ColorSpace of profile
+
+
+} cmsPsSamplerCargo;
+
+static int _cmsPSActualColumn = 0;
+
+
+// Convert to byte
+static
+cmsUInt8Number Word2Byte(cmsUInt16Number w)
+{
+    return (cmsUInt8Number) floor((cmsFloat64Number) w / 257.0 + 0.5);
+}
+
+
+// Convert to byte (using ICC2 notation)
+/*
+static
+cmsUInt8Number L2Byte(cmsUInt16Number w)
+{
+    int ww = w + 0x0080;
+
+    if (ww > 0xFFFF) return 0xFF;
+
+    return (cmsUInt8Number) ((cmsUInt16Number) (ww >> 8) & 0xFF);
+}
+*/
+
+// Write a cooked byte
+
+static
+void WriteByte(cmsIOHANDLER* m, cmsUInt8Number b)
+{
+    _cmsIOPrintf(m, "%02x", b);
+    _cmsPSActualColumn += 2;
+
+    if (_cmsPSActualColumn > MAXPSCOLS) {
+
+        _cmsIOPrintf(m, "\n");
+        _cmsPSActualColumn = 0;
+    }
+}
+
+// ----------------------------------------------------------------- PostScript generation
+
+
+// Removes offending Carriage returns
+static
+char* RemoveCR(const char* txt)
+{
+    static char Buffer[2048];
+    char* pt;
+
+    strncpy(Buffer, txt, 2047);
+    Buffer[2047] = 0;
+    for (pt = Buffer; *pt; pt++)
+            if (*pt == '\n' || *pt == '\r') *pt = ' ';
+
+    return Buffer;
+
+}
+
+static
+void EmitHeader(cmsIOHANDLER* m, const char* Title, cmsHPROFILE hProfile)
+{
+    time_t timer;
+    cmsMLU *Description, *Copyright;
+    char DescASCII[256], CopyrightASCII[256];
+
+    time(&timer);
+
+    Description = (cmsMLU*) cmsReadTag(hProfile, cmsSigProfileDescriptionTag);
+    Copyright   = (cmsMLU*) cmsReadTag(hProfile, cmsSigCopyrightTag);
+
+    DescASCII[0] = DescASCII[255] = 0;
+    CopyrightASCII[0] = CopyrightASCII[255] = 0;
+
+    if (Description != NULL) cmsMLUgetASCII(Description,  cmsNoLanguage, cmsNoCountry, DescASCII,       255);
+    if (Copyright != NULL)   cmsMLUgetASCII(Copyright,    cmsNoLanguage, cmsNoCountry, CopyrightASCII,  255);
+
+    _cmsIOPrintf(m, "%%!PS-Adobe-3.0\n");
+    _cmsIOPrintf(m, "%%\n");
+    _cmsIOPrintf(m, "%% %s\n", Title);
+    _cmsIOPrintf(m, "%% Source: %s\n", RemoveCR(DescASCII));
+    _cmsIOPrintf(m, "%%         %s\n", RemoveCR(CopyrightASCII));
+    _cmsIOPrintf(m, "%% Created: %s", ctime(&timer)); // ctime appends a \n!!!
+    _cmsIOPrintf(m, "%%\n");
+    _cmsIOPrintf(m, "%%%%BeginResource\n");
+
+}
+
+
+// Emits White & Black point. White point is always D50, Black point is the device
+// Black point adapted to D50.
+
+static
+void EmitWhiteBlackD50(cmsIOHANDLER* m, cmsCIEXYZ* BlackPoint)
+{
+
+    _cmsIOPrintf(m, "/BlackPoint [%f %f %f]\n", BlackPoint -> X,
+                                          BlackPoint -> Y,
+                                          BlackPoint -> Z);
+
+    _cmsIOPrintf(m, "/WhitePoint [%f %f %f]\n", cmsD50_XYZ()->X,
+                                          cmsD50_XYZ()->Y,
+                                          cmsD50_XYZ()->Z);
+}
+
+
+static
+void EmitRangeCheck(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m, "dup 0.0 lt { pop 0.0 } if "
+                    "dup 1.0 gt { pop 1.0 } if ");
+
+}
+
+// Does write the intent
+
+static
+void EmitIntent(cmsIOHANDLER* m, cmsUInt32Number RenderingIntent)
+{
+    const char *intent;
+
+    switch (RenderingIntent) {
+
+        case INTENT_PERCEPTUAL:            intent = "Perceptual"; break;
+        case INTENT_RELATIVE_COLORIMETRIC: intent = "RelativeColorimetric"; break;
+        case INTENT_ABSOLUTE_COLORIMETRIC: intent = "AbsoluteColorimetric"; break;
+        case INTENT_SATURATION:            intent = "Saturation"; break;
+
+        default: intent = "Undefined"; break;
+    }
+
+    _cmsIOPrintf(m, "/RenderingIntent (%s)\n", intent );
+}
+
+//
+//  Convert L* to Y
+//
+//      Y = Yn*[ (L* + 16) / 116] ^ 3   if (L*) >= 6 / 29
+//        = Yn*( L* / 116) / 7.787      if (L*) < 6 / 29
+//
+
+/*
+static
+void EmitL2Y(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m,
+            "{ "
+                "100 mul 16 add 116 div "               // (L * 100 + 16) / 116
+                 "dup 6 29 div ge "                     // >= 6 / 29 ?
+                 "{ dup dup mul mul } "                 // yes, ^3 and done
+                 "{ 4 29 div sub 108 841 div mul } "    // no, slope limiting
+            "ifelse } bind ");
+}
+*/
+
+
+// Lab -> XYZ, see the discussion above
+
+static
+void EmitLab2XYZ(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m, "/RangeABC [ 0 1 0 1 0 1]\n");
+    _cmsIOPrintf(m, "/DecodeABC [\n");
+    _cmsIOPrintf(m, "{100 mul  16 add 116 div } bind\n");
+    _cmsIOPrintf(m, "{255 mul 128 sub 500 div } bind\n");
+    _cmsIOPrintf(m, "{255 mul 128 sub 200 div } bind\n");
+    _cmsIOPrintf(m, "]\n");
+    _cmsIOPrintf(m, "/MatrixABC [ 1 1 1 1 0 0 0 0 -1]\n");
+    _cmsIOPrintf(m, "/RangeLMN [ -0.236 1.254 0 1 -0.635 1.640 ]\n");
+    _cmsIOPrintf(m, "/DecodeLMN [\n");
+    _cmsIOPrintf(m, "{dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.964200 mul} bind\n");
+    _cmsIOPrintf(m, "{dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse } bind\n");
+    _cmsIOPrintf(m, "{dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.824900 mul} bind\n");
+    _cmsIOPrintf(m, "]\n");
+}
+
+
+
+// Outputs a table of words. It does use 16 bits
+
+static
+void Emit1Gamma(cmsIOHANDLER* m, cmsToneCurve* Table)
+{
+    cmsUInt32Number i;
+    cmsFloat64Number gamma;
+
+    if (Table == NULL) return; // Error
+
+    if (Table ->nEntries <= 0) return;  // Empty table
+
+    // Suppress whole if identity
+    if (cmsIsToneCurveLinear(Table)) return;
+
+    // Check if is really an exponential. If so, emit "exp"
+    gamma = cmsEstimateGamma(Table, 0.001);
+     if (gamma > 0) {
+            _cmsIOPrintf(m, "{ %g exp } bind ", gamma);
+            return;
+     }
+
+    _cmsIOPrintf(m, "{ ");
+
+    // Bounds check
+    EmitRangeCheck(m);
+
+    // Emit intepolation code
+
+    // PostScript code                      Stack
+    // ===============                      ========================
+                                            // v
+    _cmsIOPrintf(m, " [");
+
+    for (i=0; i < Table->nEntries; i++) {
+        _cmsIOPrintf(m, "%d ", Table->Table16[i]);
+    }
+
+    _cmsIOPrintf(m, "] ");                        // v tab
+
+    _cmsIOPrintf(m, "dup ");                      // v tab tab
+    _cmsIOPrintf(m, "length 1 sub ");             // v tab dom
+    _cmsIOPrintf(m, "3 -1 roll ");                // tab dom v
+    _cmsIOPrintf(m, "mul ");                      // tab val2
+    _cmsIOPrintf(m, "dup ");                      // tab val2 val2
+    _cmsIOPrintf(m, "dup ");                      // tab val2 val2 val2
+    _cmsIOPrintf(m, "floor cvi ");                // tab val2 val2 cell0
+    _cmsIOPrintf(m, "exch ");                     // tab val2 cell0 val2
+    _cmsIOPrintf(m, "ceiling cvi ");              // tab val2 cell0 cell1
+    _cmsIOPrintf(m, "3 index ");                  // tab val2 cell0 cell1 tab
+    _cmsIOPrintf(m, "exch ");                     // tab val2 cell0 tab cell1
+    _cmsIOPrintf(m, "get ");                      // tab val2 cell0 y1
+    _cmsIOPrintf(m, "4 -1 roll ");                // val2 cell0 y1 tab
+    _cmsIOPrintf(m, "3 -1 roll ");                // val2 y1 tab cell0
+    _cmsIOPrintf(m, "get ");                      // val2 y1 y0
+    _cmsIOPrintf(m, "dup ");                      // val2 y1 y0 y0
+    _cmsIOPrintf(m, "3 1 roll ");                 // val2 y0 y1 y0
+    _cmsIOPrintf(m, "sub ");                      // val2 y0 (y1-y0)
+    _cmsIOPrintf(m, "3 -1 roll ");                // y0 (y1-y0) val2
+    _cmsIOPrintf(m, "dup ");                      // y0 (y1-y0) val2 val2
+    _cmsIOPrintf(m, "floor cvi ");                // y0 (y1-y0) val2 floor(val2)
+    _cmsIOPrintf(m, "sub ");                      // y0 (y1-y0) rest
+    _cmsIOPrintf(m, "mul ");                      // y0 t1
+    _cmsIOPrintf(m, "add ");                      // y
+    _cmsIOPrintf(m, "65535 div ");                // result
+
+    _cmsIOPrintf(m, " } bind ");
+}
+
+
+// Compare gamma table
+
+static
+cmsBool GammaTableEquals(cmsUInt16Number* g1, cmsUInt16Number* g2, cmsUInt32Number nEntries)
+{
+    return memcmp(g1, g2, nEntries* sizeof(cmsUInt16Number)) == 0;
+}
+
+
+// Does write a set of gamma curves
+
+static
+void EmitNGamma(cmsIOHANDLER* m, cmsUInt32Number n, cmsToneCurve* g[])
+{
+    cmsUInt32Number i;
+
+    for( i=0; i < n; i++ )
+    {
+        if (g[i] == NULL) return; // Error
+
+        if (i > 0 && GammaTableEquals(g[i-1]->Table16, g[i]->Table16, g[i]->nEntries)) {
+
+            _cmsIOPrintf(m, "dup ");
+        }
+        else {
+            Emit1Gamma(m, g[i]);
+        }
+    }
+
+}
+
+
+
+
+
+// Following code dumps a LUT onto memory stream
+
+
+// This is the sampler. Intended to work in SAMPLER_INSPECT mode,
+// that is, the callback will be called for each knot with
+//
+//          In[]  The grid location coordinates, normalized to 0..ffff
+//          Out[] The Pipeline values, normalized to 0..ffff
+//
+//  Returning a value other than 0 does terminate the sampling process
+//
+//  Each row contains Pipeline values for all but first component. So, I
+//  detect row changing by keeping a copy of last value of first
+//  component. -1 is used to mark beginning of whole block.
+
+static
+int OutputValueSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    cmsPsSamplerCargo* sc = (cmsPsSamplerCargo*) Cargo;
+    cmsUInt32Number i;
+
+
+    if (sc -> FixWhite) {
+
+        if (In[0] == 0xFFFF) {  // Only in L* = 100, ab = [-8..8]
+
+            if ((In[1] >= 0x7800 && In[1] <= 0x8800) &&
+                (In[2] >= 0x7800 && In[2] <= 0x8800)) {
+
+                cmsUInt16Number* Black;
+                cmsUInt16Number* White;
+                cmsUInt32Number nOutputs;
+
+                if (!_cmsEndPointsBySpace(sc ->ColorSpace, &White, &Black, &nOutputs))
+                        return 0;
+
+                for (i=0; i < nOutputs; i++)
+                        Out[i] = White[i];
+            }
+
+
+        }
+    }
+
+
+    // Hadle the parenthesis on rows
+
+    if (In[0] != sc ->FirstComponent) {
+
+            if (sc ->FirstComponent != -1) {
+
+                    _cmsIOPrintf(sc ->m, sc ->PostMin);
+                    sc ->SecondComponent = -1;
+                    _cmsIOPrintf(sc ->m, sc ->PostMaj);
+            }
+
+            // Begin block
+            _cmsPSActualColumn = 0;
+
+            _cmsIOPrintf(sc ->m, sc ->PreMaj);
+            sc ->FirstComponent = In[0];
+    }
+
+
+      if (In[1] != sc ->SecondComponent) {
+
+            if (sc ->SecondComponent != -1) {
+
+                    _cmsIOPrintf(sc ->m, sc ->PostMin);
+            }
+
+            _cmsIOPrintf(sc ->m, sc ->PreMin);
+            sc ->SecondComponent = In[1];
+    }
+
+      // Dump table.
+
+      for (i=0; i < sc -> Pipeline ->Params->nOutputs; i++) {
+
+          cmsUInt16Number wWordOut = Out[i];
+          cmsUInt8Number wByteOut;           // Value as byte
+
+
+          // We always deal with Lab4
+
+          wByteOut = Word2Byte(wWordOut);
+          WriteByte(sc -> m, wByteOut);
+      }
+
+      return 1;
+}
+
+// Writes a Pipeline on memstream. Could be 8 or 16 bits based
+
+static
+void WriteCLUT(cmsIOHANDLER* m, cmsStage* mpe, const char* PreMaj,
+                                             const char* PostMaj,
+                                             const char* PreMin,
+                                             const char* PostMin,
+                                             int FixWhite,
+                                             cmsColorSpaceSignature ColorSpace)
+{
+    cmsUInt32Number i;
+    cmsPsSamplerCargo sc;
+
+    sc.FirstComponent = -1;
+    sc.SecondComponent = -1;
+    sc.Pipeline = (_cmsStageCLutData *) mpe ->Data;
+    sc.m   = m;
+    sc.PreMaj = PreMaj;
+    sc.PostMaj= PostMaj;
+
+    sc.PreMin   = PreMin;
+    sc.PostMin  = PostMin;
+    sc.FixWhite = FixWhite;
+    sc.ColorSpace = ColorSpace;
+
+    _cmsIOPrintf(m, "[");
+
+    for (i=0; i < sc.Pipeline->Params->nInputs; i++)
+        _cmsIOPrintf(m, " %d ", sc.Pipeline->Params->nSamples[i]);
+
+    _cmsIOPrintf(m, " [\n");
+
+    cmsStageSampleCLut16bit(mpe, OutputValueSampler, (void*) &sc, SAMPLER_INSPECT);
+
+    _cmsIOPrintf(m, PostMin);
+    _cmsIOPrintf(m, PostMaj);
+    _cmsIOPrintf(m, "] ");
+
+}
+
+
+// Dumps CIEBasedA Color Space Array
+
+static
+int EmitCIEBasedA(cmsIOHANDLER* m, cmsToneCurve* Curve, cmsCIEXYZ* BlackPoint)
+{
+
+    _cmsIOPrintf(m, "[ /CIEBasedA\n");
+    _cmsIOPrintf(m, "  <<\n");
+
+    _cmsIOPrintf(m, "/DecodeA ");
+
+    Emit1Gamma(m, Curve);
+
+    _cmsIOPrintf(m, " \n");
+
+    _cmsIOPrintf(m, "/MatrixA [ 0.9642 1.0000 0.8249 ]\n");
+    _cmsIOPrintf(m, "/RangeLMN [ 0.0 0.9642 0.0 1.0000 0.0 0.8249 ]\n");
+
+    EmitWhiteBlackD50(m, BlackPoint);
+    EmitIntent(m, INTENT_PERCEPTUAL);
+
+    _cmsIOPrintf(m, ">>\n");
+    _cmsIOPrintf(m, "]\n");
+
+    return 1;
+}
+
+
+// Dumps CIEBasedABC Color Space Array
+
+static
+int EmitCIEBasedABC(cmsIOHANDLER* m, cmsFloat64Number* Matrix, cmsToneCurve** CurveSet, cmsCIEXYZ* BlackPoint)
+{
+    int i;
+
+    _cmsIOPrintf(m, "[ /CIEBasedABC\n");
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "/DecodeABC [ ");
+
+    EmitNGamma(m, 3, CurveSet);
+
+    _cmsIOPrintf(m, "]\n");
+
+    _cmsIOPrintf(m, "/MatrixABC [ " );
+
+    for( i=0; i < 3; i++ ) {
+
+        _cmsIOPrintf(m, "%.6f %.6f %.6f ", Matrix[i + 3*0],
+                                           Matrix[i + 3*1],
+                                           Matrix[i + 3*2]);
+    }
+
+
+    _cmsIOPrintf(m, "]\n");
+
+    _cmsIOPrintf(m, "/RangeLMN [ 0.0 0.9642 0.0 1.0000 0.0 0.8249 ]\n");
+
+    EmitWhiteBlackD50(m, BlackPoint);
+    EmitIntent(m, INTENT_PERCEPTUAL);
+
+    _cmsIOPrintf(m, ">>\n");
+    _cmsIOPrintf(m, "]\n");
+
+
+    return 1;
+}
+
+
+static
+int EmitCIEBasedDEF(cmsIOHANDLER* m, cmsPipeline* Pipeline, cmsUInt32Number Intent, cmsCIEXYZ* BlackPoint)
+{
+    const char* PreMaj;
+    const char* PostMaj;
+    const char* PreMin, *PostMin;
+    cmsStage* mpe;
+
+    mpe = Pipeline ->Elements;
+
+    switch (cmsStageInputChannels(mpe)) {
+    case 3:
+
+            _cmsIOPrintf(m, "[ /CIEBasedDEF\n");
+            PreMaj ="<";
+            PostMaj= ">\n";
+            PreMin = PostMin = "";
+            break;
+    case 4:
+            _cmsIOPrintf(m, "[ /CIEBasedDEFG\n");
+            PreMaj = "[";
+            PostMaj = "]\n";
+            PreMin = "<";
+            PostMin = ">\n";
+            break;
+    default:
+            return 0;
+
+    }
+
+    _cmsIOPrintf(m, "<<\n");
+
+    if (cmsStageType(mpe) == cmsSigCurveSetElemType) {
+
+        _cmsIOPrintf(m, "/DecodeDEF [ ");
+        EmitNGamma(m, cmsStageOutputChannels(mpe), _cmsStageGetPtrToCurveSet(mpe));
+        _cmsIOPrintf(m, "]\n");
+
+        mpe = mpe ->Next;
+    }
+
+    if (cmsStageType(mpe) == cmsSigCLutElemType) {
+
+            _cmsIOPrintf(m, "/Table ");
+            WriteCLUT(m, mpe, PreMaj, PostMaj, PreMin, PostMin, FALSE, (cmsColorSpaceSignature) 0);
+            _cmsIOPrintf(m, "]\n");
+    }
+
+    EmitLab2XYZ(m);
+    EmitWhiteBlackD50(m, BlackPoint);
+    EmitIntent(m, Intent);
+
+    _cmsIOPrintf(m, "   >>\n");
+    _cmsIOPrintf(m, "]\n");
+
+    return 1;
+}
+
+// Generates a curve from a gray profile
+
+static
+cmsToneCurve* ExtractGray2Y(cmsContext ContextID, cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsToneCurve* Out = cmsBuildTabulatedToneCurve16(ContextID, 256, NULL);
+    cmsHPROFILE hXYZ  = cmsCreateXYZProfile();
+    cmsHTRANSFORM xform = cmsCreateTransformTHR(ContextID, hProfile, TYPE_GRAY_8, hXYZ, TYPE_XYZ_DBL, Intent, cmsFLAGS_NOOPTIMIZE);
+    int i;
+
+    if (Out != NULL && xform != NULL) {
+        for (i=0; i < 256; i++) {
+
+            cmsUInt8Number Gray = (cmsUInt8Number) i;
+            cmsCIEXYZ XYZ;
+
+            cmsDoTransform(xform, &Gray, &XYZ, 1);
+
+            Out ->Table16[i] =_cmsQuickSaturateWord(XYZ.Y * 65535.0);
+        }
+    }
+
+    if (xform) cmsDeleteTransform(xform);
+    if (hXYZ) cmsCloseProfile(hXYZ);
+    return Out;
+}
+
+
+
+// Because PostScript has only 8 bits in /Table, we should use
+// a more perceptually uniform space... I do choose Lab.
+
+static
+int WriteInputLUT(cmsIOHANDLER* m, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hLab;
+    cmsHTRANSFORM xform;
+    cmsUInt32Number nChannels;
+    cmsUInt32Number InputFormat;
+    int rc;
+    cmsHPROFILE Profiles[2];
+    cmsCIEXYZ BlackPointAdaptedToD50;
+
+    // Does create a device-link based transform.
+    // The DeviceLink is next dumped as working CSA.
+
+    InputFormat = cmsFormatterForColorspaceOfProfile(hProfile, 2, FALSE);
+    nChannels   = T_CHANNELS(InputFormat);
+
+
+    cmsDetectBlackPoint(&BlackPointAdaptedToD50, hProfile, Intent, 0);
+
+    // Adjust output to Lab4
+    hLab = cmsCreateLab4ProfileTHR(m ->ContextID, NULL);
+
+    Profiles[0] = hProfile;
+    Profiles[1] = hLab;
+
+    xform = cmsCreateMultiprofileTransform(Profiles, 2,  InputFormat, TYPE_Lab_DBL, Intent, 0);
+    cmsCloseProfile(hLab);
+
+    if (xform == NULL) {
+
+        cmsSignalError(m ->ContextID, cmsERROR_COLORSPACE_CHECK, "Cannot create transform Profile -> Lab");
+        return 0;
+    }
+
+    // Only 1, 3 and 4 channels are allowed
+
+    switch (nChannels) {
+
+    case 1: {
+            cmsToneCurve* Gray2Y = ExtractGray2Y(m ->ContextID, hProfile, Intent);
+            EmitCIEBasedA(m, Gray2Y, &BlackPointAdaptedToD50);
+            cmsFreeToneCurve(Gray2Y);
+            }
+            break;
+
+    case 3:
+    case 4: {
+            cmsUInt32Number OutFrm = TYPE_Lab_16;
+            cmsPipeline* DeviceLink;
+            _cmsTRANSFORM* v = (_cmsTRANSFORM*) xform;
+
+            DeviceLink = cmsPipelineDup(v ->Lut);
+            if (DeviceLink == NULL) return 0;
+
+            dwFlags |= cmsFLAGS_FORCE_CLUT;
+            _cmsOptimizePipeline(m->ContextID, &DeviceLink, Intent, &InputFormat, &OutFrm, &dwFlags);
+
+            rc = EmitCIEBasedDEF(m, DeviceLink, Intent, &BlackPointAdaptedToD50);
+            cmsPipelineFree(DeviceLink);
+            if (rc == 0) return 0;
+            }
+            break;
+
+    default:
+
+        cmsSignalError(m ->ContextID, cmsERROR_COLORSPACE_CHECK, "Only 3, 4 channels supported for CSA. This profile has %d channels.", nChannels);
+        return 0;
+    }
+
+
+    cmsDeleteTransform(xform);
+
+    return 1;
+}
+
+static
+cmsFloat64Number* GetPtrToMatrix(const cmsStage* mpe)
+{
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+
+    return Data -> Double;
+}
+
+
+// Does create CSA based on matrix-shaper. Allowed types are gray and RGB based
+static
+int WriteInputMatrixShaper(cmsIOHANDLER* m, cmsHPROFILE hProfile, cmsStage* Matrix, cmsStage* Shaper)
+{
+    cmsColorSpaceSignature ColorSpace;
+    int rc;
+    cmsCIEXYZ BlackPointAdaptedToD50;
+
+    ColorSpace = cmsGetColorSpace(hProfile);
+
+    cmsDetectBlackPoint(&BlackPointAdaptedToD50, hProfile, INTENT_RELATIVE_COLORIMETRIC, 0);
+
+    if (ColorSpace == cmsSigGrayData) {
+
+        cmsToneCurve** ShaperCurve = _cmsStageGetPtrToCurveSet(Shaper);
+        rc = EmitCIEBasedA(m, ShaperCurve[0], &BlackPointAdaptedToD50);
+
+    }
+    else
+        if (ColorSpace == cmsSigRgbData) {
+
+            cmsMAT3 Mat;
+            int i, j;
+
+            memmove(&Mat, GetPtrToMatrix(Matrix), sizeof(Mat));
+
+            for (i = 0; i < 3; i++)
+                for (j = 0; j < 3; j++)
+                    Mat.v[i].n[j] *= MAX_ENCODEABLE_XYZ;
+
+            rc = EmitCIEBasedABC(m, (cmsFloat64Number *)&Mat,
+                _cmsStageGetPtrToCurveSet(Shaper),
+                &BlackPointAdaptedToD50);
+        }
+        else {
+
+            cmsSignalError(m->ContextID, cmsERROR_COLORSPACE_CHECK, "Profile is not suitable for CSA. Unsupported colorspace.");
+            return 0;
+        }
+
+    return rc;
+}
+
+
+
+// Creates a PostScript color list from a named profile data.
+// This is a HP extension, and it works in Lab instead of XYZ
+
+static
+int WriteNamedColorCSA(cmsIOHANDLER* m, cmsHPROFILE hNamedColor, cmsUInt32Number Intent)
+{
+    cmsHTRANSFORM xform;
+    cmsHPROFILE   hLab;
+    cmsUInt32Number i, nColors;
+    char ColorName[cmsMAX_PATH];
+    cmsNAMEDCOLORLIST* NamedColorList;
+
+    hLab  = cmsCreateLab4ProfileTHR(m ->ContextID, NULL);
+    xform = cmsCreateTransform(hNamedColor, TYPE_NAMED_COLOR_INDEX, hLab, TYPE_Lab_DBL, Intent, 0);
+    if (xform == NULL) return 0;
+
+    NamedColorList = cmsGetNamedColorList(xform);
+    if (NamedColorList == NULL) return 0;
+
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "(colorlistcomment) (%s)\n", "Named color CSA");
+    _cmsIOPrintf(m, "(Prefix) [ (Pantone ) (PANTONE ) ]\n");
+    _cmsIOPrintf(m, "(Suffix) [ ( CV) ( CVC) ( C) ]\n");
+
+    nColors   = cmsNamedColorCount(NamedColorList);
+
+
+    for (i=0; i < nColors; i++) {
+
+        cmsUInt16Number In[1];
+        cmsCIELab Lab;
+
+        In[0] = (cmsUInt16Number) i;
+
+        if (!cmsNamedColorInfo(NamedColorList, i, ColorName, NULL, NULL, NULL, NULL))
+                continue;
+
+        cmsDoTransform(xform, In, &Lab, 1);
+        _cmsIOPrintf(m, "  (%s) [ %.3f %.3f %.3f ]\n", ColorName, Lab.L, Lab.a, Lab.b);
+    }
+
+
+
+    _cmsIOPrintf(m, ">>\n");
+
+    cmsDeleteTransform(xform);
+    cmsCloseProfile(hLab);
+    return 1;
+}
+
+
+// Does create a Color Space Array on XYZ colorspace for PostScript usage
+static
+cmsUInt32Number GenerateCSA(cmsContext ContextID,
+                            cmsHPROFILE hProfile,
+                            cmsUInt32Number Intent,
+                            cmsUInt32Number dwFlags,
+                            cmsIOHANDLER* mem)
+{
+    cmsUInt32Number dwBytesUsed;
+    cmsPipeline* lut = NULL;
+    cmsStage* Matrix, *Shaper;
+
+
+    // Is a named color profile?
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        if (!WriteNamedColorCSA(mem, hProfile, Intent)) goto Error;
+    }
+    else {
+
+
+        // Any profile class are allowed (including devicelink), but
+        // output (PCS) colorspace must be XYZ or Lab
+        cmsColorSpaceSignature ColorSpace = cmsGetPCS(hProfile);
+
+        if (ColorSpace != cmsSigXYZData &&
+            ColorSpace != cmsSigLabData) {
+
+                cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "Invalid output color space");
+                goto Error;
+        }
+
+
+        // Read the lut with all necessary conversion stages
+        lut = _cmsReadInputLUT(hProfile, Intent);
+        if (lut == NULL) goto Error;
+
+
+        // Tone curves + matrix can be implemented without any LUT
+        if (cmsPipelineCheckAndRetreiveStages(lut, 2, cmsSigCurveSetElemType, cmsSigMatrixElemType, &Shaper, &Matrix)) {
+
+            if (!WriteInputMatrixShaper(mem, hProfile, Matrix, Shaper)) goto Error;
+
+        }
+        else {
+           // We need a LUT for the rest
+           if (!WriteInputLUT(mem, hProfile, Intent, dwFlags)) goto Error;
+        }
+    }
+
+
+    // Done, keep memory usage
+    dwBytesUsed = mem ->UsedSpace;
+
+    // Get rid of LUT
+    if (lut != NULL) cmsPipelineFree(lut);
+
+    // Finally, return used byte count
+    return dwBytesUsed;
+
+Error:
+    if (lut != NULL) cmsPipelineFree(lut);
+    return 0;
+}
+
+// ------------------------------------------------------ Color Rendering Dictionary (CRD)
+
+
+
+/*
+
+  Black point compensation plus chromatic adaptation:
+
+  Step 1 - Chromatic adaptation
+  =============================
+
+          WPout
+    X = ------- PQR
+          Wpin
+
+  Step 2 - Black point compensation
+  =================================
+
+          (WPout - BPout)*X - WPout*(BPin - BPout)
+    out = ---------------------------------------
+                        WPout - BPin
+
+
+  Algorithm discussion
+  ====================
+
+  TransformPQR(WPin, BPin, WPout, BPout, PQR)
+
+  Wpin,etc= { Xws Yws Zws Pws Qws Rws }
+
+
+  Algorithm             Stack 0...n
+  ===========================================================
+                        PQR BPout WPout BPin WPin
+  4 index 3 get         WPin PQR BPout WPout BPin WPin
+  div                   (PQR/WPin) BPout WPout BPin WPin
+  2 index 3 get         WPout (PQR/WPin) BPout WPout BPin WPin
+  mult                  WPout*(PQR/WPin) BPout WPout BPin WPin
+
+  2 index 3 get         WPout WPout*(PQR/WPin) BPout WPout BPin WPin
+  2 index 3 get         BPout WPout WPout*(PQR/WPin) BPout WPout BPin WPin
+  sub                   (WPout-BPout) WPout*(PQR/WPin) BPout WPout BPin WPin
+  mult                  (WPout-BPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+
+  2 index 3 get         WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  4 index 3 get         BPin WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  3 index 3 get         BPout BPin WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+
+  sub                   (BPin-BPout) WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  mult                  (BPin-BPout)*WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  sub                   (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+
+  3 index 3 get         BPin (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+  3 index 3 get         WPout BPin (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+  exch
+  sub                   (WPout-BPin) (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+  div
+
+  exch pop
+  exch pop
+  exch pop
+  exch pop
+
+*/
+
+
+static
+void EmitPQRStage(cmsIOHANDLER* m, cmsHPROFILE hProfile, int DoBPC, int lIsAbsolute)
+{
+
+
+        if (lIsAbsolute) {
+
+            // For absolute colorimetric intent, encode back to relative
+            // and generate a relative Pipeline
+
+            // Relative encoding is obtained across XYZpcs*(D50/WhitePoint)
+
+            cmsCIEXYZ White;
+
+            _cmsReadMediaWhitePoint(&White, hProfile);
+
+            _cmsIOPrintf(m,"/MatrixPQR [1 0 0 0 1 0 0 0 1 ]\n");
+            _cmsIOPrintf(m,"/RangePQR [ -0.5 2 -0.5 2 -0.5 2 ]\n");
+
+            _cmsIOPrintf(m, "%% Absolute colorimetric -- encode to relative to maximize LUT usage\n"
+                      "/TransformPQR [\n"
+                      "{0.9642 mul %g div exch pop exch pop exch pop exch pop} bind\n"
+                      "{1.0000 mul %g div exch pop exch pop exch pop exch pop} bind\n"
+                      "{0.8249 mul %g div exch pop exch pop exch pop exch pop} bind\n]\n",
+                      White.X, White.Y, White.Z);
+            return;
+        }
+
+
+        _cmsIOPrintf(m,"%% Bradford Cone Space\n"
+                 "/MatrixPQR [0.8951 -0.7502 0.0389 0.2664 1.7135 -0.0685 -0.1614 0.0367 1.0296 ] \n");
+
+        _cmsIOPrintf(m, "/RangePQR [ -0.5 2 -0.5 2 -0.5 2 ]\n");
+
+
+        // No BPC
+
+        if (!DoBPC) {
+
+            _cmsIOPrintf(m, "%% VonKries-like transform in Bradford Cone Space\n"
+                      "/TransformPQR [\n"
+                      "{exch pop exch 3 get mul exch pop exch 3 get div} bind\n"
+                      "{exch pop exch 4 get mul exch pop exch 4 get div} bind\n"
+                      "{exch pop exch 5 get mul exch pop exch 5 get div} bind\n]\n");
+        } else {
+
+            // BPC
+
+            _cmsIOPrintf(m, "%% VonKries-like transform in Bradford Cone Space plus BPC\n"
+                      "/TransformPQR [\n");
+
+            _cmsIOPrintf(m, "{4 index 3 get div 2 index 3 get mul "
+                    "2 index 3 get 2 index 3 get sub mul "
+                    "2 index 3 get 4 index 3 get 3 index 3 get sub mul sub "
+                    "3 index 3 get 3 index 3 get exch sub div "
+                    "exch pop exch pop exch pop exch pop } bind\n");
+
+            _cmsIOPrintf(m, "{4 index 4 get div 2 index 4 get mul "
+                    "2 index 4 get 2 index 4 get sub mul "
+                    "2 index 4 get 4 index 4 get 3 index 4 get sub mul sub "
+                    "3 index 4 get 3 index 4 get exch sub div "
+                    "exch pop exch pop exch pop exch pop } bind\n");
+
+            _cmsIOPrintf(m, "{4 index 5 get div 2 index 5 get mul "
+                    "2 index 5 get 2 index 5 get sub mul "
+                    "2 index 5 get 4 index 5 get 3 index 5 get sub mul sub "
+                    "3 index 5 get 3 index 5 get exch sub div "
+                    "exch pop exch pop exch pop exch pop } bind\n]\n");
+
+        }
+
+
+}
+
+
+static
+void EmitXYZ2Lab(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m, "/RangeLMN [ -0.635 2.0 0 2 -0.635 2.0 ]\n");
+    _cmsIOPrintf(m, "/EncodeLMN [\n");
+    _cmsIOPrintf(m, "{ 0.964200  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind\n");
+    _cmsIOPrintf(m, "{ 1.000000  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind\n");
+    _cmsIOPrintf(m, "{ 0.824900  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind\n");
+    _cmsIOPrintf(m, "]\n");
+    _cmsIOPrintf(m, "/MatrixABC [ 0 1 0 1 -1 1 0 0 -1 ]\n");
+    _cmsIOPrintf(m, "/EncodeABC [\n");
+
+
+    _cmsIOPrintf(m, "{ 116 mul  16 sub 100 div  } bind\n");
+    _cmsIOPrintf(m, "{ 500 mul 128 add 256 div  } bind\n");
+    _cmsIOPrintf(m, "{ 200 mul 128 add 256 div  } bind\n");
+
+
+    _cmsIOPrintf(m, "]\n");
+
+
+}
+
+// Due to impedance mismatch between XYZ and almost all RGB and CMYK spaces
+// I choose to dump LUTS in Lab instead of XYZ. There is still a lot of wasted
+// space on 3D CLUT, but since space seems not to be a problem here, 33 points
+// would give a reasonable accuracy. Note also that CRD tables must operate in
+// 8 bits.
+
+static
+int WriteOutputLUT(cmsIOHANDLER* m, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hLab;
+    cmsHTRANSFORM xform;
+    cmsUInt32Number i, nChannels;
+    cmsUInt32Number OutputFormat;
+    _cmsTRANSFORM* v;
+    cmsPipeline* DeviceLink;
+    cmsHPROFILE Profiles[3];
+    cmsCIEXYZ BlackPointAdaptedToD50;
+    cmsBool lDoBPC = (cmsBool) (dwFlags & cmsFLAGS_BLACKPOINTCOMPENSATION);
+    cmsBool lFixWhite = (cmsBool) !(dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP);
+    cmsUInt32Number InFrm = TYPE_Lab_16;
+    cmsUInt32Number RelativeEncodingIntent;
+    cmsColorSpaceSignature ColorSpace;
+
+
+    hLab = cmsCreateLab4ProfileTHR(m ->ContextID, NULL);
+    if (hLab == NULL) return 0;
+
+    OutputFormat = cmsFormatterForColorspaceOfProfile(hProfile, 2, FALSE);
+    nChannels    = T_CHANNELS(OutputFormat);
+
+    ColorSpace = cmsGetColorSpace(hProfile);
+
+    // For absolute colorimetric, the LUT is encoded as relative in order to preserve precision.
+
+    RelativeEncodingIntent = Intent;
+    if (RelativeEncodingIntent == INTENT_ABSOLUTE_COLORIMETRIC)
+        RelativeEncodingIntent = INTENT_RELATIVE_COLORIMETRIC;
+
+
+    // Use V4 Lab always
+    Profiles[0] = hLab;
+    Profiles[1] = hProfile;
+
+    xform = cmsCreateMultiprofileTransformTHR(m ->ContextID,
+                                              Profiles, 2, TYPE_Lab_DBL,
+                                              OutputFormat, RelativeEncodingIntent, 0);
+    cmsCloseProfile(hLab);
+
+    if (xform == NULL) {
+
+        cmsSignalError(m ->ContextID, cmsERROR_COLORSPACE_CHECK, "Cannot create transform Lab -> Profile in CRD creation");
+        return 0;
+    }
+
+    // Get a copy of the internal devicelink
+    v = (_cmsTRANSFORM*) xform;
+    DeviceLink = cmsPipelineDup(v ->Lut);
+    if (DeviceLink == NULL) return 0;
+
+
+    // We need a CLUT
+    dwFlags |= cmsFLAGS_FORCE_CLUT;
+    _cmsOptimizePipeline(m->ContextID, &DeviceLink, RelativeEncodingIntent, &InFrm, &OutputFormat, &dwFlags);
+
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "/ColorRenderingType 1\n");
+
+
+    cmsDetectBlackPoint(&BlackPointAdaptedToD50, hProfile, Intent, 0);
+
+    // Emit headers, etc.
+    EmitWhiteBlackD50(m, &BlackPointAdaptedToD50);
+    EmitPQRStage(m, hProfile, lDoBPC, Intent == INTENT_ABSOLUTE_COLORIMETRIC);
+    EmitXYZ2Lab(m);
+
+
+    // FIXUP: map Lab (100, 0, 0) to perfect white, because the particular encoding for Lab
+    // does map a=b=0 not falling into any specific node. Since range a,b goes -128..127,
+    // zero is slightly moved towards right, so assure next node (in L=100 slice) is mapped to
+    // zero. This would sacrifice a bit of highlights, but failure to do so would cause
+    // scum dot. Ouch.
+
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
+            lFixWhite = FALSE;
+
+    _cmsIOPrintf(m, "/RenderTable ");
+
+
+    WriteCLUT(m, cmsPipelineGetPtrToFirstStage(DeviceLink), "<", ">\n", "", "", lFixWhite, ColorSpace);
+
+    _cmsIOPrintf(m, " %d {} bind ", nChannels);
+
+    for (i=1; i < nChannels; i++)
+            _cmsIOPrintf(m, "dup ");
+
+    _cmsIOPrintf(m, "]\n");
+
+
+    EmitIntent(m, Intent);
+
+    _cmsIOPrintf(m, ">>\n");
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+        _cmsIOPrintf(m, "/Current exch /ColorRendering defineresource pop\n");
+    }
+
+    cmsPipelineFree(DeviceLink);
+    cmsDeleteTransform(xform);
+
+    return 1;
+}
+
+
+// Builds a ASCII string containing colorant list in 0..1.0 range
+static
+void BuildColorantList(char *Colorant, cmsUInt32Number nColorant, cmsUInt16Number Out[])
+{
+    char Buff[32];
+    cmsUInt32Number j;
+
+    Colorant[0] = 0;
+    if (nColorant > cmsMAXCHANNELS)
+        nColorant = cmsMAXCHANNELS;
+
+    for (j = 0; j < nColorant; j++) {
+
+        snprintf(Buff, 31, "%.3f", Out[j] / 65535.0);
+        Buff[31] = 0;
+        strcat(Colorant, Buff);
+        if (j < nColorant - 1)
+            strcat(Colorant, " ");
+
+    }
+}
+
+
+// Creates a PostScript color list from a named profile data.
+// This is a HP extension.
+
+static
+int WriteNamedColorCRD(cmsIOHANDLER* m, cmsHPROFILE hNamedColor, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsHTRANSFORM xform;
+    cmsUInt32Number i, nColors, nColorant;
+    cmsUInt32Number OutputFormat;
+    char ColorName[cmsMAX_PATH];
+    char Colorant[128];
+    cmsNAMEDCOLORLIST* NamedColorList;
+
+
+    OutputFormat = cmsFormatterForColorspaceOfProfile(hNamedColor, 2, FALSE);
+    nColorant    = T_CHANNELS(OutputFormat);
+
+
+    xform = cmsCreateTransform(hNamedColor, TYPE_NAMED_COLOR_INDEX, NULL, OutputFormat, Intent, dwFlags);
+    if (xform == NULL) return 0;
+
+
+    NamedColorList = cmsGetNamedColorList(xform);
+    if (NamedColorList == NULL) return 0;
+
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "(colorlistcomment) (%s) \n", "Named profile");
+    _cmsIOPrintf(m, "(Prefix) [ (Pantone ) (PANTONE ) ]\n");
+    _cmsIOPrintf(m, "(Suffix) [ ( CV) ( CVC) ( C) ]\n");
+
+    nColors   = cmsNamedColorCount(NamedColorList);
+
+    for (i=0; i < nColors; i++) {
+
+        cmsUInt16Number In[1];
+        cmsUInt16Number Out[cmsMAXCHANNELS];
+
+        In[0] = (cmsUInt16Number) i;
+
+        if (!cmsNamedColorInfo(NamedColorList, i, ColorName, NULL, NULL, NULL, NULL))
+                continue;
+
+        cmsDoTransform(xform, In, Out, 1);
+        BuildColorantList(Colorant, nColorant, Out);
+        _cmsIOPrintf(m, "  (%s) [ %s ]\n", ColorName, Colorant);
+    }
+
+    _cmsIOPrintf(m, "   >>");
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+    _cmsIOPrintf(m, " /Current exch /HPSpotTable defineresource pop\n");
+    }
+
+    cmsDeleteTransform(xform);
+    return 1;
+}
+
+
+
+// This one does create a Color Rendering Dictionary.
+// CRD are always LUT-Based, no matter if profile is
+// implemented as matrix-shaper.
+
+static
+cmsUInt32Number  GenerateCRD(cmsContext ContextID,
+                             cmsHPROFILE hProfile,
+                             cmsUInt32Number Intent, cmsUInt32Number dwFlags,
+                             cmsIOHANDLER* mem)
+{
+    cmsUInt32Number dwBytesUsed;
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+        EmitHeader(mem, "Color Rendering Dictionary (CRD)", hProfile);
+    }
+
+
+    // Is a named color profile?
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        if (!WriteNamedColorCRD(mem, hProfile, Intent, dwFlags)) {
+            return 0;
+        }
+    }
+    else {
+
+        // CRD are always implemented as LUT
+
+        if (!WriteOutputLUT(mem, hProfile, Intent, dwFlags)) {
+            return 0;
+        }
+    }
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+        _cmsIOPrintf(mem, "%%%%EndResource\n");
+        _cmsIOPrintf(mem, "\n%% CRD End\n");
+    }
+
+    // Done, keep memory usage
+    dwBytesUsed = mem ->UsedSpace;
+
+    // Finally, return used byte count
+    return dwBytesUsed;
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+
+
+
+cmsUInt32Number CMSEXPORT cmsGetPostScriptColorResource(cmsContext ContextID,
+                                                               cmsPSResourceType Type,
+                                                               cmsHPROFILE hProfile,
+                                                               cmsUInt32Number Intent,
+                                                               cmsUInt32Number dwFlags,
+                                                               cmsIOHANDLER* io)
+{
+    cmsUInt32Number  rc;
+
+
+    switch (Type) {
+
+        case cmsPS_RESOURCE_CSA:
+            rc = GenerateCSA(ContextID, hProfile, Intent, dwFlags, io);
+            break;
+
+        default:
+        case cmsPS_RESOURCE_CRD:
+            rc = GenerateCRD(ContextID, hProfile, Intent, dwFlags, io);
+            break;
+    }
+
+    return rc;
+}
+
+
+
+cmsUInt32Number CMSEXPORT cmsGetPostScriptCRD(cmsContext ContextID,
+                              cmsHPROFILE hProfile,
+                              cmsUInt32Number Intent, cmsUInt32Number dwFlags,
+                              void* Buffer, cmsUInt32Number dwBufferLen)
+{
+    cmsIOHANDLER* mem;
+    cmsUInt32Number dwBytesUsed;
+
+    // Set up the serialization engine
+    if (Buffer == NULL)
+        mem = cmsOpenIOhandlerFromNULL(ContextID);
+    else
+        mem = cmsOpenIOhandlerFromMem(ContextID, Buffer, dwBufferLen, "w");
+
+    if (!mem) return 0;
+
+    dwBytesUsed =  cmsGetPostScriptColorResource(ContextID, cmsPS_RESOURCE_CRD, hProfile, Intent, dwFlags, mem);
+
+    // Get rid of memory stream
+    cmsCloseIOhandler(mem);
+
+    return dwBytesUsed;
+}
+
+
+
+// Does create a Color Space Array on XYZ colorspace for PostScript usage
+cmsUInt32Number CMSEXPORT cmsGetPostScriptCSA(cmsContext ContextID,
+                                              cmsHPROFILE hProfile,
+                                              cmsUInt32Number Intent,
+                                              cmsUInt32Number dwFlags,
+                                              void* Buffer,
+                                              cmsUInt32Number dwBufferLen)
+{
+    cmsIOHANDLER* mem;
+    cmsUInt32Number dwBytesUsed;
+
+    if (Buffer == NULL)
+        mem = cmsOpenIOhandlerFromNULL(ContextID);
+    else
+        mem = cmsOpenIOhandlerFromMem(ContextID, Buffer, dwBufferLen, "w");
+
+    if (!mem) return 0;
+
+    dwBytesUsed =  cmsGetPostScriptColorResource(ContextID, cmsPS_RESOURCE_CSA, hProfile, Intent, dwFlags, mem);
+
+    // Get rid of memory stream
+    cmsCloseIOhandler(mem);
+
+    return dwBytesUsed;
+
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp
new file mode 100644
index 0000000000..1fc5f5d467
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssamp.cpp
@@ -0,0 +1,547 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+#define cmsmin(a, b) (((a) < (b)) ? (a) : (b))
+#define cmsmax(a, b) (((a) > (b)) ? (a) : (b))
+
+// This file contains routines for resampling and LUT optimization, black point detection
+// and black preservation.
+
+// Black point detection -------------------------------------------------------------------------
+
+
+// PCS -> PCS round trip transform, always uses relative intent on the device -> pcs
+static
+cmsHTRANSFORM CreateRoundtripXForm(cmsHPROFILE hProfile, cmsUInt32Number nIntent)
+{
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsHPROFILE hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    cmsHTRANSFORM xform;
+    cmsBool BPC[4] = { FALSE, FALSE, FALSE, FALSE };
+    cmsFloat64Number States[4] = { 1.0, 1.0, 1.0, 1.0 };
+    cmsHPROFILE hProfiles[4];
+    cmsUInt32Number Intents[4];
+
+    hProfiles[0] = hLab; hProfiles[1] = hProfile; hProfiles[2] = hProfile; hProfiles[3] = hLab;
+    Intents[0]   = INTENT_RELATIVE_COLORIMETRIC; Intents[1] = nIntent; Intents[2] = INTENT_RELATIVE_COLORIMETRIC; Intents[3] = INTENT_RELATIVE_COLORIMETRIC;
+
+    xform =  cmsCreateExtendedTransform(ContextID, 4, hProfiles, BPC, Intents,
+        States, NULL, 0, TYPE_Lab_DBL, TYPE_Lab_DBL, cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+
+    cmsCloseProfile(hLab);
+    return xform;
+}
+
+// Use darker colorants to obtain black point. This works in the relative colorimetric intent and
+// assumes more ink results in darker colors. No ink limit is assumed.
+static
+cmsBool  BlackPointAsDarkerColorant(cmsHPROFILE    hInput,
+                                    cmsUInt32Number Intent,
+                                    cmsCIEXYZ* BlackPoint,
+                                    cmsUInt32Number dwFlags)
+{
+    cmsUInt16Number *Black;
+    cmsHTRANSFORM xform;
+    cmsColorSpaceSignature Space;
+    cmsUInt32Number nChannels;
+    cmsUInt32Number dwFormat;
+    cmsHPROFILE hLab;
+    cmsCIELab  Lab;
+    cmsCIEXYZ  BlackXYZ;
+    cmsContext ContextID = cmsGetProfileContextID(hInput);
+
+    // If the profile does not support input direction, assume Black point 0
+    if (!cmsIsIntentSupported(hInput, Intent, LCMS_USED_AS_INPUT)) {
+
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    // Create a formatter which has n channels and floating point
+    dwFormat = cmsFormatterForColorspaceOfProfile(hInput, 2, FALSE);
+
+   // Try to get black by using black colorant
+    Space = cmsGetColorSpace(hInput);
+
+    // This function returns darker colorant in 16 bits for several spaces
+    if (!_cmsEndPointsBySpace(Space, NULL, &Black, &nChannels)) {
+
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    if (nChannels != T_CHANNELS(dwFormat)) {
+       BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+       return FALSE;
+    }
+
+    // Lab will be used as the output space, but lab2 will avoid recursion
+    hLab = cmsCreateLab2ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) {
+       BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+       return FALSE;
+    }
+
+    // Create the transform
+    xform = cmsCreateTransformTHR(ContextID, hInput, dwFormat,
+                                hLab, TYPE_Lab_DBL, Intent, cmsFLAGS_NOOPTIMIZE|cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hLab);
+
+    if (xform == NULL) {
+
+        // Something went wrong. Get rid of open resources and return zero as black
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    // Convert black to Lab
+    cmsDoTransform(xform, Black, &Lab, 1);
+
+    // Force it to be neutral, clip to max. L* of 50
+    Lab.a = Lab.b = 0;
+    if (Lab.L > 50) Lab.L = 50;
+
+    // Free the resources
+    cmsDeleteTransform(xform);
+
+    // Convert from Lab (which is now clipped) to XYZ.
+    cmsLab2XYZ(NULL, &BlackXYZ, &Lab);
+
+    if (BlackPoint != NULL)
+        *BlackPoint = BlackXYZ;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+// Get a black point of output CMYK profile, discounting any ink-limiting embedded
+// in the profile. For doing that, we use perceptual intent in input direction:
+// Lab (0, 0, 0) -> [Perceptual] Profile -> CMYK -> [Rel. colorimetric] Profile -> Lab
+static
+cmsBool BlackPointUsingPerceptualBlack(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile)
+{
+    cmsHTRANSFORM hRoundTrip;
+    cmsCIELab LabIn, LabOut;
+    cmsCIEXYZ  BlackXYZ;
+
+     // Is the intent supported by the profile?
+    if (!cmsIsIntentSupported(hProfile, INTENT_PERCEPTUAL, LCMS_USED_AS_INPUT)) {
+
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return TRUE;
+    }
+
+    hRoundTrip = CreateRoundtripXForm(hProfile, INTENT_PERCEPTUAL);
+    if (hRoundTrip == NULL) {
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    LabIn.L = LabIn.a = LabIn.b = 0;
+    cmsDoTransform(hRoundTrip, &LabIn, &LabOut, 1);
+
+    // Clip Lab to reasonable limits
+    if (LabOut.L > 50) LabOut.L = 50;
+    LabOut.a = LabOut.b = 0;
+
+    cmsDeleteTransform(hRoundTrip);
+
+    // Convert it to XYZ
+    cmsLab2XYZ(NULL, &BlackXYZ, &LabOut);
+
+    if (BlackPoint != NULL)
+        *BlackPoint = BlackXYZ;
+
+    return TRUE;
+}
+
+// This function shouldn't exist at all -- there is such quantity of broken
+// profiles on black point tag, that we must somehow fix chromaticity to
+// avoid huge tint when doing Black point compensation. This function does
+// just that. There is a special flag for using black point tag, but turned
+// off by default because it is bogus on most profiles. The detection algorithm
+// involves to turn BP to neutral and to use only L component.
+cmsBool CMSEXPORT cmsDetectBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsProfileClassSignature devClass;
+
+    // Make sure the device class is adequate
+    devClass = cmsGetDeviceClass(hProfile);
+    if (devClass == cmsSigLinkClass ||
+        devClass == cmsSigAbstractClass ||
+        devClass == cmsSigNamedColorClass) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+    // Make sure intent is adequate
+    if (Intent != INTENT_PERCEPTUAL &&
+        Intent != INTENT_RELATIVE_COLORIMETRIC &&
+        Intent != INTENT_SATURATION) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+    // v4 + perceptual & saturation intents does have its own black point, and it is
+    // well specified enough to use it. Black point tag is deprecated in V4.
+    if ((cmsGetEncodedICCversion(hProfile) >= 0x4000000) &&
+        (Intent == INTENT_PERCEPTUAL || Intent == INTENT_SATURATION)) {
+
+            // Matrix shaper share MRC & perceptual intents
+            if (cmsIsMatrixShaper(hProfile))
+                return BlackPointAsDarkerColorant(hProfile, INTENT_RELATIVE_COLORIMETRIC, BlackPoint, 0);
+
+            // Get Perceptual black out of v4 profiles. That is fixed for perceptual & saturation intents
+            BlackPoint -> X = cmsPERCEPTUAL_BLACK_X;
+            BlackPoint -> Y = cmsPERCEPTUAL_BLACK_Y;
+            BlackPoint -> Z = cmsPERCEPTUAL_BLACK_Z;
+
+            return TRUE;
+    }
+
+
+#ifdef CMS_USE_PROFILE_BLACK_POINT_TAG
+
+    // v2, v4 rel/abs colorimetric
+    if (cmsIsTag(hProfile, cmsSigMediaBlackPointTag) &&
+        Intent == INTENT_RELATIVE_COLORIMETRIC) {
+
+            cmsCIEXYZ *BlackPtr, BlackXYZ, UntrustedBlackPoint, TrustedBlackPoint, MediaWhite;
+            cmsCIELab Lab;
+
+            // If black point is specified, then use it,
+
+            BlackPtr = cmsReadTag(hProfile, cmsSigMediaBlackPointTag);
+            if (BlackPtr != NULL) {
+
+                BlackXYZ = *BlackPtr;
+                _cmsReadMediaWhitePoint(&MediaWhite, hProfile);
+
+                // Black point is absolute XYZ, so adapt to D50 to get PCS value
+                cmsAdaptToIlluminant(&UntrustedBlackPoint, &MediaWhite, cmsD50_XYZ(), &BlackXYZ);
+
+                // Force a=b=0 to get rid of any chroma
+                cmsXYZ2Lab(NULL, &Lab, &UntrustedBlackPoint);
+                Lab.a = Lab.b = 0;
+                if (Lab.L > 50) Lab.L = 50; // Clip to L* <= 50
+                cmsLab2XYZ(NULL, &TrustedBlackPoint, &Lab);
+
+                if (BlackPoint != NULL)
+                    *BlackPoint = TrustedBlackPoint;
+
+                return TRUE;
+            }
+    }
+#endif
+
+    // That is about v2 profiles.
+
+    // If output profile, discount ink-limiting and that's all
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC &&
+        (cmsGetDeviceClass(hProfile) == cmsSigOutputClass) &&
+        (cmsGetColorSpace(hProfile)  == cmsSigCmykData))
+        return BlackPointUsingPerceptualBlack(BlackPoint, hProfile);
+
+    // Nope, compute BP using current intent.
+    return BlackPointAsDarkerColorant(hProfile, Intent, BlackPoint, dwFlags);
+}
+
+
+
+// ---------------------------------------------------------------------------------------------------------
+
+// Least Squares Fit of a Quadratic Curve to Data
+// http://www.personal.psu.edu/jhm/f90/lectures/lsq2.html
+
+static
+cmsFloat64Number RootOfLeastSquaresFitQuadraticCurve(int n, cmsFloat64Number x[], cmsFloat64Number y[])
+{
+    double sum_x = 0, sum_x2 = 0, sum_x3 = 0, sum_x4 = 0;
+    double sum_y = 0, sum_yx = 0, sum_yx2 = 0;
+    double d, a, b, c;
+    int i;
+    cmsMAT3 m;
+    cmsVEC3 v, res;
+
+    if (n < 4) return 0;
+
+    for (i=0; i < n; i++) {
+
+        double xn = x[i];
+        double yn = y[i];
+
+        sum_x  += xn;
+        sum_x2 += xn*xn;
+        sum_x3 += xn*xn*xn;
+        sum_x4 += xn*xn*xn*xn;
+
+        sum_y += yn;
+        sum_yx += yn*xn;
+        sum_yx2 += yn*xn*xn;
+    }
+
+    _cmsVEC3init(&m.v[0], n,      sum_x,  sum_x2);
+    _cmsVEC3init(&m.v[1], sum_x,  sum_x2, sum_x3);
+    _cmsVEC3init(&m.v[2], sum_x2, sum_x3, sum_x4);
+
+    _cmsVEC3init(&v, sum_y, sum_yx, sum_yx2);
+
+    if (!_cmsMAT3solve(&res, &m, &v)) return 0;
+
+      
+    a = res.n[2];
+    b = res.n[1];
+    c = res.n[0];
+
+    if (fabs(a) < 1.0E-10) {
+    
+        return cmsmin(0, cmsmax(50, -c/b ));
+    }
+    else {
+
+         d = b*b - 4.0 * a * c;
+         if (d <= 0) {
+             return 0;
+         }
+         else {
+
+             double rt = (-b + sqrt(d)) / (2.0 * a);
+
+             return cmsmax(0, cmsmin(50, rt));
+         }
+   }
+
+}
+
+
+
+// Calculates the black point of a destination profile.
+// This algorithm comes from the Adobe paper disclosing its black point compensation method.
+cmsBool CMSEXPORT cmsDetectDestinationBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsColorSpaceSignature ColorSpace;
+    cmsHTRANSFORM hRoundTrip = NULL;
+    cmsCIELab InitialLab, destLab, Lab;
+    cmsFloat64Number inRamp[256], outRamp[256];
+    cmsFloat64Number MinL, MaxL;
+    cmsBool NearlyStraightMidrange = TRUE;  
+    cmsFloat64Number yRamp[256];
+    cmsFloat64Number x[256], y[256];
+    cmsFloat64Number lo, hi;
+    int n, l;
+    cmsProfileClassSignature devClass;
+
+    // Make sure the device class is adequate
+    devClass = cmsGetDeviceClass(hProfile);
+    if (devClass == cmsSigLinkClass ||
+        devClass == cmsSigAbstractClass ||
+        devClass == cmsSigNamedColorClass) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+    // Make sure intent is adequate
+    if (Intent != INTENT_PERCEPTUAL &&
+        Intent != INTENT_RELATIVE_COLORIMETRIC &&
+        Intent != INTENT_SATURATION) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+
+    // v4 + perceptual & saturation intents does have its own black point, and it is
+    // well specified enough to use it. Black point tag is deprecated in V4.
+    if ((cmsGetEncodedICCversion(hProfile) >= 0x4000000) &&
+        (Intent == INTENT_PERCEPTUAL || Intent == INTENT_SATURATION)) {
+
+            // Matrix shaper share MRC & perceptual intents
+            if (cmsIsMatrixShaper(hProfile))
+                return BlackPointAsDarkerColorant(hProfile, INTENT_RELATIVE_COLORIMETRIC, BlackPoint, 0);
+
+            // Get Perceptual black out of v4 profiles. That is fixed for perceptual & saturation intents
+            BlackPoint -> X = cmsPERCEPTUAL_BLACK_X;
+            BlackPoint -> Y = cmsPERCEPTUAL_BLACK_Y;
+            BlackPoint -> Z = cmsPERCEPTUAL_BLACK_Z;
+            return TRUE;
+    }
+
+
+    // Check if the profile is lut based and gray, rgb or cmyk (7.2 in Adobe's document)
+    ColorSpace = cmsGetColorSpace(hProfile);
+    if (!cmsIsCLUT(hProfile, Intent, LCMS_USED_AS_OUTPUT ) ||
+        (ColorSpace != cmsSigGrayData &&
+         ColorSpace != cmsSigRgbData  &&
+         ColorSpace != cmsSigCmykData)) {
+
+        // In this case, handle as input case
+        return cmsDetectBlackPoint(BlackPoint, hProfile, Intent, dwFlags);
+    }
+
+    // It is one of the valid cases!, use Adobe algorithm
+
+    
+    // Set a first guess, that should work on good profiles.
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC) {
+
+        cmsCIEXYZ IniXYZ;
+
+        // calculate initial Lab as source black point
+        if (!cmsDetectBlackPoint(&IniXYZ, hProfile, Intent, dwFlags)) {
+            return FALSE;
+        }
+
+        // convert the XYZ to lab
+        cmsXYZ2Lab(NULL, &InitialLab, &IniXYZ);
+
+    } else {
+
+        // set the initial Lab to zero, that should be the black point for perceptual and saturation
+        InitialLab.L = 0;
+        InitialLab.a = 0;
+        InitialLab.b = 0;
+    }
+
+
+    // Step 2
+    // ======
+
+    // Create a roundtrip. Define a Transform BT for all x in L*a*b*
+    hRoundTrip = CreateRoundtripXForm(hProfile, Intent);
+    if (hRoundTrip == NULL)  return FALSE;
+
+    // Compute ramps
+
+    for (l=0; l < 256; l++) {
+
+        Lab.L = (cmsFloat64Number) (l * 100.0) / 255.0;
+        Lab.a = cmsmin(50, cmsmax(-50, InitialLab.a));
+        Lab.b = cmsmin(50, cmsmax(-50, InitialLab.b));
+
+        cmsDoTransform(hRoundTrip, &Lab, &destLab, 1);
+
+        inRamp[l]  = Lab.L;
+        outRamp[l] = destLab.L;
+    }
+
+    // Make monotonic
+    for (l = 254; l > 0; --l) {
+        outRamp[l] = cmsmin(outRamp[l], outRamp[l+1]);
+    }
+
+    // Check
+    if (! (outRamp[0] < outRamp[255])) {
+
+        cmsDeleteTransform(hRoundTrip);
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+
+    // Test for mid range straight (only on relative colorimetric)
+    NearlyStraightMidrange = TRUE;
+    MinL = outRamp[0]; MaxL = outRamp[255];
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC) {
+      
+        for (l=0; l < 256; l++) {
+
+            if (! ((inRamp[l] <= MinL + 0.2 * (MaxL - MinL) ) ||   
+                (fabs(inRamp[l] - outRamp[l]) < 4.0 )))
+                NearlyStraightMidrange = FALSE;
+        }
+
+        // If the mid range is straight (as determined above) then the 
+        // DestinationBlackPoint shall be the same as initialLab. 
+        // Otherwise, the DestinationBlackPoint shall be determined 
+        // using curve fitting.
+        if (NearlyStraightMidrange) {
+
+            cmsLab2XYZ(NULL, BlackPoint, &InitialLab);
+            cmsDeleteTransform(hRoundTrip);
+            return TRUE;
+        }
+    }
+
+ 
+    // curve fitting: The round-trip curve normally looks like a nearly constant section at the black point,
+    // with a corner and a nearly straight line to the white point.  
+    for (l=0; l < 256; l++) {
+    
+        yRamp[l] = (outRamp[l] - MinL) / (MaxL - MinL);
+    }
+
+    // find the black point using the least squares error quadratic curve fitting
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC) {
+        lo = 0.1;
+        hi = 0.5;
+    }
+    else {
+
+        // Perceptual and saturation
+        lo = 0.03;
+        hi = 0.25;
+    }
+
+    // Capture shadow points for the fitting.
+    n = 0;
+    for (l=0; l < 256; l++) {
+    
+        cmsFloat64Number ff = yRamp[l];
+
+        if (ff >= lo && ff < hi) {
+            x[n] = inRamp[l];
+            y[n] = yRamp[l];
+            n++;
+        }    
+    }
+
+    
+    // No suitable points
+    if (n < 3 ) {
+        cmsDeleteTransform(hRoundTrip);
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+  
+    // fit and get the vertex of quadratic curve
+    Lab.L = RootOfLeastSquaresFitQuadraticCurve(n, x, y);
+
+    if (Lab.L < 0.0) { // clip to zero L* if the vertex is negative
+        Lab.L = 0;
+    }
+
+    Lab.a = InitialLab.a;
+    Lab.b = InitialLab.b;
+
+    cmsLab2XYZ(NULL, BlackPoint, &Lab);
+
+    cmsDeleteTransform(hRoundTrip);
+    return TRUE;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp
new file mode 100644
index 0000000000..a0fdbc86c5
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmssm.cpp
@@ -0,0 +1,736 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// ------------------------------------------------------------------------
+
+// Gamut boundary description by using Jan Morovic's Segment maxima method
+// Many thanks to Jan for allowing me to use his algorithm.
+
+// r = C*
+// alpha = Hab
+// theta = L*
+
+#define SECTORS 16      // number of divisions in alpha and theta
+
+// Spherical coordinates
+typedef struct {
+
+    cmsFloat64Number r;
+    cmsFloat64Number alpha;
+    cmsFloat64Number theta;
+
+} cmsSpherical;
+
+typedef  enum {
+        GP_EMPTY,
+        GP_SPECIFIED,
+        GP_MODELED
+
+    } GDBPointType;
+
+
+typedef struct {
+
+    GDBPointType Type;
+    cmsSpherical p;         // Keep also alpha & theta of maximum
+
+} cmsGDBPoint;
+
+
+typedef struct {
+
+    cmsContext ContextID;
+    cmsGDBPoint Gamut[SECTORS][SECTORS];
+
+} cmsGDB;
+
+
+// A line using the parametric form
+// P = a + t*u
+typedef struct {
+
+    cmsVEC3 a;
+    cmsVEC3 u;
+
+} cmsLine;
+
+
+// A plane using the parametric form
+// Q = b + r*v + s*w
+typedef struct {
+
+    cmsVEC3 b;
+    cmsVEC3 v;
+    cmsVEC3 w;
+
+} cmsPlane;
+
+
+
+// --------------------------------------------------------------------------------------------
+
+// ATAN2() which always returns degree positive numbers
+
+static
+cmsFloat64Number _cmsAtan2(cmsFloat64Number y, cmsFloat64Number x)
+{
+    cmsFloat64Number a;
+
+    // Deal with undefined case
+    if (x == 0.0 && y == 0.0) return 0;
+
+    a = (atan2(y, x) * 180.0) / M_PI;
+
+    while (a < 0) {
+        a += 360;
+    }
+
+    return a;
+}
+
+// Convert to spherical coordinates
+static
+void ToSpherical(cmsSpherical* sp, const cmsVEC3* v)
+{
+
+    cmsFloat64Number L, a, b;
+
+    L = v ->n[VX];
+    a = v ->n[VY];
+    b = v ->n[VZ];
+
+    sp ->r = sqrt( L*L + a*a + b*b );
+
+   if (sp ->r == 0) {
+        sp ->alpha = sp ->theta = 0;
+        return;
+    }
+
+    sp ->alpha = _cmsAtan2(a, b);
+    sp ->theta = _cmsAtan2(sqrt(a*a + b*b), L);
+}
+
+
+// Convert to cartesian from spherical
+static
+void ToCartesian(cmsVEC3* v, const cmsSpherical* sp)
+{
+    cmsFloat64Number sin_alpha;
+    cmsFloat64Number cos_alpha;
+    cmsFloat64Number sin_theta;
+    cmsFloat64Number cos_theta;
+    cmsFloat64Number L, a, b;
+
+    sin_alpha = sin((M_PI * sp ->alpha) / 180.0);
+    cos_alpha = cos((M_PI * sp ->alpha) / 180.0);
+    sin_theta = sin((M_PI * sp ->theta) / 180.0);
+    cos_theta = cos((M_PI * sp ->theta) / 180.0);
+
+    a = sp ->r * sin_theta * sin_alpha;
+    b = sp ->r * sin_theta * cos_alpha;
+    L = sp ->r * cos_theta;
+
+    v ->n[VX] = L;
+    v ->n[VY] = a;
+    v ->n[VZ] = b;
+}
+
+
+// Quantize sector of a spherical coordinate. Saturate 360, 180 to last sector
+// The limits are the centers of each sector, so
+static
+void QuantizeToSector(const cmsSpherical* sp, int* alpha, int* theta)
+{
+    *alpha = (int) floor(((sp->alpha * (SECTORS)) / 360.0) );
+    *theta = (int) floor(((sp->theta * (SECTORS)) / 180.0) );
+
+    if (*alpha >= SECTORS)
+        *alpha = SECTORS-1;
+    if (*theta >= SECTORS)
+        *theta = SECTORS-1;
+}
+
+
+// Line determined by 2 points
+static
+void LineOf2Points(cmsLine* line, cmsVEC3* a, cmsVEC3* b)
+{
+
+    _cmsVEC3init(&line ->a, a ->n[VX], a ->n[VY], a ->n[VZ]);
+    _cmsVEC3init(&line ->u, b ->n[VX] - a ->n[VX],
+                            b ->n[VY] - a ->n[VY],
+                            b ->n[VZ] - a ->n[VZ]);
+}
+
+
+// Evaluate parametric line
+static
+void GetPointOfLine(cmsVEC3* p, const cmsLine* line, cmsFloat64Number t)
+{
+    p ->n[VX] = line ->a.n[VX] + t * line->u.n[VX];
+    p ->n[VY] = line ->a.n[VY] + t * line->u.n[VY];
+    p ->n[VZ] = line ->a.n[VZ] + t * line->u.n[VZ];
+}
+
+
+
+/*
+    Closest point in sector line1 to sector line2 (both are defined as 0 <=t <= 1)
+    http://softsurfer.com/Archive/algorithm_0106/algorithm_0106.htm
+
+    Copyright 2001, softSurfer (www.softsurfer.com)
+    This code may be freely used and modified for any purpose
+    providing that this copyright notice is included with it.
+    SoftSurfer makes no warranty for this code, and cannot be held
+    liable for any real or imagined damage resulting from its use.
+    Users of this code must verify correctness for their application.
+
+*/
+
+static
+cmsBool ClosestLineToLine(cmsVEC3* r, const cmsLine* line1, const cmsLine* line2)
+{
+    cmsFloat64Number a, b, c, d, e, D;
+    cmsFloat64Number sc, sN, sD;
+    //cmsFloat64Number tc; // left for future use
+    cmsFloat64Number tN, tD;
+    cmsVEC3 w0;
+
+    _cmsVEC3minus(&w0, &line1 ->a, &line2 ->a);
+
+    a  = _cmsVEC3dot(&line1 ->u, &line1 ->u);
+    b  = _cmsVEC3dot(&line1 ->u, &line2 ->u);
+    c  = _cmsVEC3dot(&line2 ->u, &line2 ->u);
+    d  = _cmsVEC3dot(&line1 ->u, &w0);
+    e  = _cmsVEC3dot(&line2 ->u, &w0);
+
+    D  = a*c - b * b;      // Denominator
+    sD = tD = D;           // default sD = D >= 0
+
+    if (D <  MATRIX_DET_TOLERANCE) {   // the lines are almost parallel
+
+        sN = 0.0;        // force using point P0 on segment S1
+        sD = 1.0;        // to prevent possible division by 0.0 later
+        tN = e;
+        tD = c;
+    }
+    else {                // get the closest points on the infinite lines
+
+        sN = (b*e - c*d);
+        tN = (a*e - b*d);
+
+        if (sN < 0.0) {       // sc < 0 => the s=0 edge is visible
+
+            sN = 0.0;
+            tN = e;
+            tD = c;
+        }
+        else if (sN > sD) {   // sc > 1 => the s=1 edge is visible
+            sN = sD;
+            tN = e + b;
+            tD = c;
+        }
+    }
+
+    if (tN < 0.0) {           // tc < 0 => the t=0 edge is visible
+
+        tN = 0.0;
+        // recompute sc for this edge
+        if (-d < 0.0)
+            sN = 0.0;
+        else if (-d > a)
+            sN = sD;
+        else {
+            sN = -d;
+            sD = a;
+        }
+    }
+    else if (tN > tD) {      // tc > 1 => the t=1 edge is visible
+
+        tN = tD;
+
+        // recompute sc for this edge
+        if ((-d + b) < 0.0)
+            sN = 0;
+        else if ((-d + b) > a)
+            sN = sD;
+        else {
+            sN = (-d + b);
+            sD = a;
+        }
+    }
+    // finally do the division to get sc and tc
+    sc = (fabs(sN) < MATRIX_DET_TOLERANCE ? 0.0 : sN / sD);
+    //tc = (fabs(tN) < MATRIX_DET_TOLERANCE ? 0.0 : tN / tD); // left for future use.
+
+    GetPointOfLine(r, line1, sc);
+    return TRUE;
+}
+
+
+
+// ------------------------------------------------------------------ Wrapper
+
+
+// Allocate & free structure
+cmsHANDLE  CMSEXPORT cmsGBDAlloc(cmsContext ContextID)
+{
+    cmsGDB* gbd = (cmsGDB*) _cmsMallocZero(ContextID, sizeof(cmsGDB));
+    if (gbd == NULL) return NULL;
+
+    gbd -> ContextID = ContextID;
+
+    return (cmsHANDLE) gbd;
+}
+
+
+void CMSEXPORT cmsGBDFree(cmsHANDLE hGBD)
+{
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    if (hGBD != NULL)
+        _cmsFree(gbd->ContextID, (void*) gbd);
+}
+
+
+// Auxiliary to retrieve a pointer to the segmentr containing the Lab value
+static
+cmsGDBPoint* GetPoint(cmsGDB* gbd, const cmsCIELab* Lab, cmsSpherical* sp)
+{
+    cmsVEC3 v;
+    int alpha, theta;
+
+    // Housekeeping
+    _cmsAssert(gbd != NULL);
+    _cmsAssert(Lab != NULL);
+    _cmsAssert(sp != NULL);
+
+    // Center L* by subtracting half of its domain, that's 50
+    _cmsVEC3init(&v, Lab ->L - 50.0, Lab ->a, Lab ->b);
+
+    // Convert to spherical coordinates
+    ToSpherical(sp, &v);
+
+    if (sp ->r < 0 || sp ->alpha < 0 || sp->theta < 0) {
+         cmsSignalError(gbd ->ContextID, cmsERROR_RANGE, "spherical value out of range");
+         return NULL;
+    }
+
+    // On which sector it falls?
+    QuantizeToSector(sp, &alpha, &theta);
+
+    if (alpha < 0 || theta < 0 || alpha >= SECTORS || theta >= SECTORS) {
+         cmsSignalError(gbd ->ContextID, cmsERROR_RANGE, " quadrant out of range");
+         return NULL;
+    }
+
+    // Get pointer to the sector
+    return &gbd ->Gamut[theta][alpha];
+}
+
+// Add a point to gamut descriptor. Point to add is in Lab color space.
+// GBD is centered on a=b=0 and L*=50
+cmsBool CMSEXPORT cmsGDBAddPoint(cmsHANDLE hGBD, const cmsCIELab* Lab)
+{
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    cmsGDBPoint* ptr;
+    cmsSpherical sp;
+
+
+    // Get pointer to the sector
+    ptr = GetPoint(gbd, Lab, &sp);
+    if (ptr == NULL) return FALSE;
+
+    // If no samples at this sector, add it
+    if (ptr ->Type == GP_EMPTY) {
+
+        ptr -> Type = GP_SPECIFIED;
+        ptr -> p    = sp;
+    }
+    else {
+
+
+        // Substitute only if radius is greater
+        if (sp.r > ptr -> p.r) {
+
+                ptr -> Type = GP_SPECIFIED;
+                ptr -> p    = sp;
+        }
+    }
+
+    return TRUE;
+}
+
+// Check if a given point falls inside gamut
+cmsBool CMSEXPORT cmsGDBCheckPoint(cmsHANDLE hGBD, const cmsCIELab* Lab)
+{
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    cmsGDBPoint* ptr;
+    cmsSpherical sp;
+
+    // Get pointer to the sector
+    ptr = GetPoint(gbd, Lab, &sp);
+    if (ptr == NULL) return FALSE;
+
+    // If no samples at this sector, return no data
+    if (ptr ->Type == GP_EMPTY) return FALSE;
+
+    // In gamut only if radius is greater
+
+    return (sp.r <= ptr -> p.r);
+}
+
+// -----------------------------------------------------------------------------------------------------------------------
+
+// Find near sectors. The list of sectors found is returned on Close[].
+// The function returns the number of sectors as well.
+
+// 24   9  10  11  12
+// 23   8   1   2  13
+// 22   7   *   3  14
+// 21   6   5   4  15
+// 20  19  18  17  16
+//
+// Those are the relative movements
+// {-2,-2}, {-1, -2}, {0, -2}, {+1, -2}, {+2,  -2},
+// {-2,-1}, {-1, -1}, {0, -1}, {+1, -1}, {+2,  -1},
+// {-2, 0}, {-1,  0}, {0,  0}, {+1,  0}, {+2,   0},
+// {-2,+1}, {-1, +1}, {0, +1}, {+1,  +1}, {+2,  +1},
+// {-2,+2}, {-1, +2}, {0, +2}, {+1,  +2}, {+2,  +2}};
+
+
+static
+const struct _spiral {
+
+    int AdvX, AdvY;
+
+    } Spiral[] = { {0,  -1}, {+1, -1}, {+1,  0}, {+1, +1}, {0,  +1}, {-1, +1},
+                   {-1,  0}, {-1, -1}, {-1, -2}, {0,  -2}, {+1, -2}, {+2, -2},
+                   {+2, -1}, {+2,  0}, {+2, +1}, {+2, +2}, {+1, +2}, {0,  +2},
+                   {-1, +2}, {-2, +2}, {-2, +1}, {-2, 0},  {-2, -1}, {-2, -2} };
+
+#define NSTEPS (sizeof(Spiral) / sizeof(struct _spiral))
+
+static
+int FindNearSectors(cmsGDB* gbd, int alpha, int theta, cmsGDBPoint* Close[])
+{
+    int nSectors = 0;
+    int a, t;
+    cmsUInt32Number i;
+    cmsGDBPoint* pt;
+
+    for (i=0; i < NSTEPS; i++) {
+
+        a = alpha + Spiral[i].AdvX;
+        t = theta + Spiral[i].AdvY;
+
+        // Cycle at the end
+        a %= SECTORS;
+        t %= SECTORS;
+
+        // Cycle at the begin
+        if (a < 0) a = SECTORS + a;
+        if (t < 0) t = SECTORS + t;
+
+        pt = &gbd ->Gamut[t][a];
+
+        if (pt -> Type != GP_EMPTY) {
+
+            Close[nSectors++] = pt;
+        }
+    }
+
+    return nSectors;
+}
+
+
+// Interpolate a missing sector. Method identifies whatever this is top, bottom or mid
+static
+cmsBool InterpolateMissingSector(cmsGDB* gbd, int alpha, int theta)
+{
+    cmsSpherical sp;
+    cmsVEC3 Lab;
+    cmsVEC3 Centre;
+    cmsLine ray;
+    int nCloseSectors;
+    cmsGDBPoint* Close[NSTEPS + 1];
+    cmsSpherical closel, templ;
+    cmsLine edge;
+    int k, m;
+
+    // Is that point already specified?
+    if (gbd ->Gamut[theta][alpha].Type != GP_EMPTY) return TRUE;
+
+    // Fill close points
+    nCloseSectors = FindNearSectors(gbd, alpha, theta, Close);
+
+
+    // Find a central point on the sector
+    sp.alpha = (cmsFloat64Number) ((alpha + 0.5) * 360.0) / (SECTORS);
+    sp.theta = (cmsFloat64Number) ((theta + 0.5) * 180.0) / (SECTORS);
+    sp.r     = 50.0;
+
+    // Convert to Cartesian
+    ToCartesian(&Lab, &sp);
+
+    // Create a ray line from centre to this point
+    _cmsVEC3init(&Centre, 50.0, 0, 0);
+    LineOf2Points(&ray, &Lab, &Centre);
+
+    // For all close sectors
+    closel.r = 0.0;
+    closel.alpha = 0;
+    closel.theta = 0;
+
+    for (k=0; k < nCloseSectors; k++) {
+
+        for(m = k+1; m < nCloseSectors; m++) {
+
+            cmsVEC3 temp, a1, a2;
+
+            // A line from sector to sector
+            ToCartesian(&a1, &Close[k]->p);
+            ToCartesian(&a2, &Close[m]->p);
+
+            LineOf2Points(&edge, &a1, &a2);
+
+            // Find a line
+            ClosestLineToLine(&temp, &ray, &edge);
+
+            // Convert to spherical
+            ToSpherical(&templ, &temp);
+
+
+            if ( templ.r > closel.r &&
+                 templ.theta >= (theta*180.0/SECTORS) &&
+                 templ.theta <= ((theta+1)*180.0/SECTORS) &&
+                 templ.alpha >= (alpha*360.0/SECTORS) &&
+                 templ.alpha <= ((alpha+1)*360.0/SECTORS)) {
+
+                closel = templ;
+            }
+        }
+    }
+
+    gbd ->Gamut[theta][alpha].p = closel;
+    gbd ->Gamut[theta][alpha].Type = GP_MODELED;
+
+    return TRUE;
+
+}
+
+
+// Interpolate missing parts. The algorithm fist computes slices at
+// theta=0 and theta=Max.
+cmsBool CMSEXPORT cmsGDBCompute(cmsHANDLE hGBD, cmsUInt32Number dwFlags)
+{
+    int alpha, theta;
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+
+    _cmsAssert(hGBD != NULL);
+
+    // Interpolate black
+    for (alpha = 0; alpha < SECTORS; alpha++) {
+
+        if (!InterpolateMissingSector(gbd, alpha, 0)) return FALSE;
+    }
+
+    // Interpolate white
+    for (alpha = 0; alpha < SECTORS; alpha++) {
+
+        if (!InterpolateMissingSector(gbd, alpha, SECTORS-1)) return FALSE;
+    }
+
+
+    // Interpolate Mid
+    for (theta = 1; theta < SECTORS; theta++) {
+        for (alpha = 0; alpha < SECTORS; alpha++) {
+
+            if (!InterpolateMissingSector(gbd, alpha, theta)) return FALSE;
+        }
+    }
+
+    // Done
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+
+
+
+// --------------------------------------------------------------------------------------------------------
+
+// Great for debug, but not suitable for real use
+
+#if 0
+cmsBool cmsGBDdumpVRML(cmsHANDLE hGBD, const char* fname)
+{
+    FILE* fp;
+    int   i, j;
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    cmsGDBPoint* pt;
+
+    fp = fopen (fname, "wt");
+    if (fp == NULL)
+        return FALSE;
+
+    fprintf (fp, "#VRML V2.0 utf8\n");
+
+    // set the viewing orientation and distance
+    fprintf (fp, "DEF CamTest Group {\n");
+    fprintf (fp, "\tchildren [\n");
+    fprintf (fp, "\t\tDEF Cameras Group {\n");
+    fprintf (fp, "\t\t\tchildren [\n");
+    fprintf (fp, "\t\t\t\tDEF DefaultView Viewpoint {\n");
+    fprintf (fp, "\t\t\t\t\tposition 0 0 340\n");
+    fprintf (fp, "\t\t\t\t\torientation 0 0 1 0\n");
+    fprintf (fp, "\t\t\t\t\tdescription \"default view\"\n");
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t]\n");
+    fprintf (fp, "\t\t},\n");
+    fprintf (fp, "\t]\n");
+    fprintf (fp, "}\n");
+
+    // Output the background stuff
+    fprintf (fp, "Background {\n");
+    fprintf (fp, "\tskyColor [\n");
+    fprintf (fp, "\t\t.5 .5 .5\n");
+    fprintf (fp, "\t]\n");
+    fprintf (fp, "}\n");
+
+    // Output the shape stuff
+    fprintf (fp, "Transform {\n");
+    fprintf (fp, "\tscale .3 .3 .3\n");
+    fprintf (fp, "\tchildren [\n");
+
+    // Draw the axes as a shape:
+    fprintf (fp, "\t\tShape {\n");
+    fprintf (fp, "\t\t\tappearance Appearance {\n");
+    fprintf (fp, "\t\t\t\tmaterial Material {\n");
+    fprintf (fp, "\t\t\t\t\tdiffuseColor 0 0.8 0\n");
+    fprintf (fp, "\t\t\t\t\temissiveColor 1.0 1.0 1.0\n");
+    fprintf (fp, "\t\t\t\t\tshininess 0.8\n");
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t\tgeometry IndexedLineSet {\n");
+    fprintf (fp, "\t\t\t\tcoord Coordinate {\n");
+    fprintf (fp, "\t\t\t\t\tpoint [\n");
+    fprintf (fp, "\t\t\t\t\t0.0 0.0 0.0,\n");
+    fprintf (fp, "\t\t\t\t\t%f 0.0 0.0,\n",  255.0);
+    fprintf (fp, "\t\t\t\t\t0.0 %f 0.0,\n",  255.0);
+    fprintf (fp, "\t\t\t\t\t0.0 0.0 %f]\n",  255.0);
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t\tcoordIndex [\n");
+    fprintf (fp, "\t\t\t\t\t0, 1, -1\n");
+    fprintf (fp, "\t\t\t\t\t0, 2, -1\n");
+    fprintf (fp, "\t\t\t\t\t0, 3, -1]\n");
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t}\n");
+
+
+    fprintf (fp, "\t\tShape {\n");
+    fprintf (fp, "\t\t\tappearance Appearance {\n");
+    fprintf (fp, "\t\t\t\tmaterial Material {\n");
+    fprintf (fp, "\t\t\t\t\tdiffuseColor 0 0.8 0\n");
+    fprintf (fp, "\t\t\t\t\temissiveColor 1 1 1\n");
+    fprintf (fp, "\t\t\t\t\tshininess 0.8\n");
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t\tgeometry PointSet {\n");
+
+    // fill in the points here
+    fprintf (fp, "\t\t\t\tcoord Coordinate {\n");
+    fprintf (fp, "\t\t\t\t\tpoint [\n");
+
+    // We need to transverse all gamut hull.
+    for (i=0; i < SECTORS; i++)
+        for (j=0; j < SECTORS; j++) {
+
+            cmsVEC3 v;
+
+            pt = &gbd ->Gamut[i][j];
+            ToCartesian(&v, &pt ->p);
+
+            fprintf (fp, "\t\t\t\t\t%g %g %g", v.n[0]+50, v.n[1], v.n[2]);
+
+            if ((j == SECTORS - 1) && (i == SECTORS - 1))
+                fprintf (fp, "]\n");
+            else
+                fprintf (fp, ",\n");
+
+        }
+
+        fprintf (fp, "\t\t\t\t}\n");
+
+
+
+    // fill in the face colors
+    fprintf (fp, "\t\t\t\tcolor Color {\n");
+    fprintf (fp, "\t\t\t\t\tcolor [\n");
+
+    for (i=0; i < SECTORS; i++)
+        for (j=0; j < SECTORS; j++) {
+
+           cmsVEC3 v;
+
+            pt = &gbd ->Gamut[i][j];
+
+
+            ToCartesian(&v, &pt ->p);
+
+
+        if (pt ->Type == GP_EMPTY)
+            fprintf (fp, "\t\t\t\t\t%g %g %g", 0.0, 0.0, 0.0);
+        else
+            if (pt ->Type == GP_MODELED)
+                fprintf (fp, "\t\t\t\t\t%g %g %g", 1.0, .5, .5);
+            else {
+                fprintf (fp, "\t\t\t\t\t%g %g %g", 1.0, 1.0, 1.0);
+
+            }
+
+        if ((j == SECTORS - 1) && (i == SECTORS - 1))
+                fprintf (fp, "]\n");
+            else
+                fprintf (fp, ",\n");
+    }
+    fprintf (fp, "\t\t\t}\n");
+
+
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t}\n");
+    fprintf (fp, "\t]\n");
+    fprintf (fp, "}\n");
+
+    fclose (fp);
+
+    return TRUE;
+}
+#endif
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp
new file mode 100644
index 0000000000..521bef085c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmstypes.cpp
@@ -0,0 +1,5633 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Tag Serialization  -----------------------------------------------------------------------------
+// This file implements every single tag and tag type as described in the ICC spec. Some types
+// have been deprecated, like ncl and Data. There is no implementation for those types as there
+// are no profiles holding them. The programmer can also extend this list by defining his own types
+// by using the appropriate plug-in. There are three types of plug ins regarding that. First type
+// allows to define new tags using any existing type. Next plug-in type allows to define new types
+// and the third one is very specific: allows to extend the number of elements in the multiprocessing
+// elements special type.
+//--------------------------------------------------------------------------------------------------
+
+// Some broken types
+#define cmsCorbisBrokenXYZtype    ((cmsTagTypeSignature) 0x17A505B8)
+#define cmsMonacoBrokenCurveType  ((cmsTagTypeSignature) 0x9478ee00)
+
+// This is the linked list that keeps track of the defined types
+typedef struct _cmsTagTypeLinkedList_st {
+
+    cmsTagTypeHandler Handler;
+    struct _cmsTagTypeLinkedList_st* Next;
+
+} _cmsTagTypeLinkedList;
+
+// Some macros to define callbacks.
+#define READ_FN(x)  Type_##x##_Read
+#define WRITE_FN(x) Type_##x##_Write
+#define FREE_FN(x)  Type_##x##_Free
+#define DUP_FN(x)   Type_##x##_Dup
+
+// Helper macro to define a handler. Callbacks do have a fixed naming convention.
+#define TYPE_HANDLER(t, x)  { (t), READ_FN(x), WRITE_FN(x), DUP_FN(x), FREE_FN(x), NULL, 0 }
+
+// Helper macro to define a MPE handler. Callbacks do have a fixed naming convention
+#define TYPE_MPE_HANDLER(t, x)  { (t), READ_FN(x), WRITE_FN(x), GenericMPEdup, GenericMPEfree, NULL, 0 }
+
+// Infinites
+#define MINUS_INF   (-1E22F)
+#define PLUS_INF    (+1E22F)
+
+
+// Register a new type handler. This routine is shared between normal types and MPE. LinkedList points to the optional list head
+static
+cmsBool RegisterTypesPlugin(cmsContext id, cmsPluginBase* Data, _cmsMemoryClient pos)
+{
+    cmsPluginTagType* Plugin = (cmsPluginTagType*) Data;
+    _cmsTagTypePluginChunkType* ctx = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(id, pos);
+    _cmsTagTypeLinkedList *pt;
+
+    // Calling the function with NULL as plug-in would unregister the plug in.
+    if (Data == NULL) {
+
+        // There is no need to set free the memory, as pool is destroyed as a whole.
+        ctx ->TagTypes = NULL;
+        return TRUE;
+    }
+
+    // Registering happens in plug-in memory pool.
+    pt = (_cmsTagTypeLinkedList*) _cmsPluginMalloc(id, sizeof(_cmsTagTypeLinkedList));
+    if (pt == NULL) return FALSE;
+
+    pt ->Handler   = Plugin ->Handler;
+    pt ->Next      = ctx ->TagTypes;
+
+    ctx ->TagTypes = pt;
+     
+    return TRUE;
+}
+
+// Return handler for a given type or NULL if not found. Shared between normal types and MPE. It first tries the additons 
+// made by plug-ins and then the built-in defaults.
+static
+cmsTagTypeHandler* GetHandler(cmsTagTypeSignature sig, _cmsTagTypeLinkedList* PluginLinkedList, _cmsTagTypeLinkedList* DefaultLinkedList)
+{
+    _cmsTagTypeLinkedList* pt;
+
+    for (pt = PluginLinkedList;
+         pt != NULL;
+         pt = pt ->Next) {
+
+            if (sig == pt -> Handler.Signature) return &pt ->Handler;
+    }
+
+    for (pt = DefaultLinkedList;
+         pt != NULL;
+         pt = pt ->Next) {
+
+            if (sig == pt -> Handler.Signature) return &pt ->Handler;
+    }
+
+    return NULL;
+}
+
+
+// Auxiliary to convert UTF-32 to UTF-16 in some cases
+static
+cmsBool _cmsWriteWCharArray(cmsIOHANDLER* io, cmsUInt32Number n, const wchar_t* Array)
+{
+    cmsUInt32Number i;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(!(Array == NULL && n > 0));
+
+    for (i=0; i < n; i++) {
+        if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) Array[i])) return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Auxiliary to read an array of wchar_t
+static
+cmsBool _cmsReadWCharArray(cmsIOHANDLER* io, cmsUInt32Number n, wchar_t* Array)
+{
+    cmsUInt32Number i;
+    cmsUInt16Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    for (i=0; i < n; i++) {
+
+        if (Array != NULL) {
+
+            if (!_cmsReadUInt16Number(io, &tmp)) return FALSE;
+            Array[i] = (wchar_t) tmp;
+        }
+        else {
+            if (!_cmsReadUInt16Number(io, NULL)) return FALSE;
+        }
+
+    }
+    return TRUE;
+}
+
+// To deal with position tables
+typedef cmsBool (* PositionTableEntryFn)(struct _cms_typehandler_struct* self,
+                                             cmsIOHANDLER* io,
+                                             void* Cargo,
+                                             cmsUInt32Number n,
+                                             cmsUInt32Number SizeOfTag);
+
+// Helper function to deal with position tables as described in ICC spec 4.3
+// A table of n elements is read, where first comes n records containing offsets and sizes and
+// then a block containing the data itself. This allows to reuse same data in more than one entry
+static
+cmsBool ReadPositionTable(struct _cms_typehandler_struct* self,
+                              cmsIOHANDLER* io,
+                              cmsUInt32Number Count,
+                              cmsUInt32Number BaseOffset,
+                              void *Cargo,
+                              PositionTableEntryFn ElementFn)
+{
+    cmsUInt32Number i;
+    cmsUInt32Number *ElementOffsets = NULL, *ElementSizes = NULL;
+    cmsUInt32Number currentPosition;
+
+    currentPosition = io->Tell(io);
+
+    // Verify there is enough space left to read at least two cmsUInt32Number items for Count items.
+    if (((io->ReportedSize - currentPosition) / (2 * sizeof(cmsUInt32Number))) < Count)
+        return FALSE;
+
+    // Let's take the offsets to each element
+    ElementOffsets = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementOffsets == NULL) goto Error;
+
+    ElementSizes = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementSizes == NULL) goto Error;
+
+    for (i=0; i < Count; i++) {
+
+        if (!_cmsReadUInt32Number(io, &ElementOffsets[i])) goto Error;
+        if (!_cmsReadUInt32Number(io, &ElementSizes[i])) goto Error;
+
+        ElementOffsets[i] += BaseOffset;
+    }
+
+    // Seek to each element and read it
+    for (i=0; i < Count; i++) {
+
+        if (!io -> Seek(io, ElementOffsets[i])) goto Error;
+
+        // This is the reader callback
+        if (!ElementFn(self, io, Cargo, i, ElementSizes[i])) goto Error;
+    }
+
+    // Success
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return TRUE;
+
+Error:
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return FALSE;
+}
+
+// Same as anterior, but for write position tables
+static
+cmsBool WritePositionTable(struct _cms_typehandler_struct* self,
+                               cmsIOHANDLER* io,
+                               cmsUInt32Number SizeOfTag,
+                               cmsUInt32Number Count,
+                               cmsUInt32Number BaseOffset,
+                               void *Cargo,
+                               PositionTableEntryFn ElementFn)
+{
+    cmsUInt32Number i;
+    cmsUInt32Number DirectoryPos, CurrentPos, Before;
+    cmsUInt32Number *ElementOffsets = NULL, *ElementSizes = NULL;
+
+     // Create table
+    ElementOffsets = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementOffsets == NULL) goto Error;
+
+    ElementSizes = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementSizes == NULL) goto Error;
+
+    // Keep starting position of curve offsets
+    DirectoryPos = io ->Tell(io);
+  
+    // Write a fake directory to be filled latter on
+    for (i=0; i < Count; i++) {
+
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // Offset
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // size
+    }
+
+    // Write each element. Keep track of the size as well.
+    for (i=0; i < Count; i++) {
+
+        Before = io ->Tell(io);
+        ElementOffsets[i] = Before - BaseOffset;
+
+        // Callback to write...
+        if (!ElementFn(self, io, Cargo, i, SizeOfTag)) goto Error;
+
+        // Now the size
+        ElementSizes[i] = io ->Tell(io) - Before;
+    }
+
+    // Write the directory
+    CurrentPos = io ->Tell(io);
+    if (!io ->Seek(io, DirectoryPos)) goto Error;
+
+    for (i=0; i <  Count; i++) {
+        if (!_cmsWriteUInt32Number(io, ElementOffsets[i])) goto Error;
+        if (!_cmsWriteUInt32Number(io, ElementSizes[i])) goto Error;
+    }
+
+    if (!io ->Seek(io, CurrentPos)) goto Error;
+
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return TRUE;
+
+Error:
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return FALSE;
+}
+
+
+// ********************************************************************************
+// Type XYZ. Only one value is allowed
+// ********************************************************************************
+
+//The XYZType contains an array of three encoded values for the XYZ tristimulus
+//values. Tristimulus values must be non-negative. The signed encoding allows for
+//implementation optimizations by minimizing the number of fixed formats.
+
+
+static
+void *Type_XYZ_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsCIEXYZ* xyz;
+
+    *nItems = 0;
+    xyz = (cmsCIEXYZ*) _cmsMallocZero(self ->ContextID, sizeof(cmsCIEXYZ));
+    if (xyz == NULL) return NULL;
+
+    if (!_cmsReadXYZNumber(io, xyz)) {
+        _cmsFree(self ->ContextID, xyz);
+        return NULL;
+    }
+
+    *nItems = 1;
+    return (void*) xyz;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_XYZ_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    return _cmsWriteXYZNumber(io, (cmsCIEXYZ*) Ptr);
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_XYZ_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsCIEXYZ));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_XYZ_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+static
+cmsTagTypeSignature DecideXYZtype(cmsFloat64Number ICCVersion, const void *Data)
+{
+    return cmsSigXYZType;
+
+    cmsUNUSED_PARAMETER(ICCVersion);
+    cmsUNUSED_PARAMETER(Data);
+}
+
+
+// ********************************************************************************
+// Type chromaticity. Only one value is allowed
+// ********************************************************************************
+// The chromaticity tag type provides basic chromaticity data and type of
+// phosphors or colorants of a monitor to applications and utilities.
+
+static
+void *Type_Chromaticity_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsCIExyYTRIPLE* chrm;
+    cmsUInt16Number nChans, Table;
+
+    *nItems = 0;
+    chrm =  (cmsCIExyYTRIPLE*) _cmsMallocZero(self ->ContextID, sizeof(cmsCIExyYTRIPLE));
+    if (chrm == NULL) return NULL;
+
+    if (!_cmsReadUInt16Number(io, &nChans)) goto Error;
+
+    // Let's recover from a bug introduced in early versions of lcms1
+    if (nChans == 0 && SizeOfTag == 32) {
+
+        if (!_cmsReadUInt16Number(io, NULL)) goto Error;
+        if (!_cmsReadUInt16Number(io, &nChans)) goto Error;
+    }
+
+    if (nChans != 3) goto Error;
+
+    if (!_cmsReadUInt16Number(io, &Table)) goto Error;
+
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Red.x)) goto Error;
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Red.y)) goto Error;
+
+    chrm ->Red.Y = 1.0;
+
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Green.x)) goto Error;
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Green.y)) goto Error;
+
+    chrm ->Green.Y = 1.0;
+
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Blue.x)) goto Error;
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Blue.y)) goto Error;
+
+    chrm ->Blue.Y = 1.0;
+
+    *nItems = 1;
+    return (void*) chrm;
+
+Error:
+    _cmsFree(self ->ContextID, (void*) chrm);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  SaveOneChromaticity(cmsFloat64Number x, cmsFloat64Number y, cmsIOHANDLER* io)
+{
+    if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) _cmsDoubleTo15Fixed16(x))) return FALSE;
+    if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) _cmsDoubleTo15Fixed16(y))) return FALSE;
+
+    return TRUE;
+}
+
+static
+cmsBool  Type_Chromaticity_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsCIExyYTRIPLE* chrm = (cmsCIExyYTRIPLE*) Ptr;
+
+    if (!_cmsWriteUInt16Number(io, 3)) return FALSE;        // nChannels
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;        // Table
+
+    if (!SaveOneChromaticity(chrm -> Red.x,   chrm -> Red.y, io)) return FALSE;
+    if (!SaveOneChromaticity(chrm -> Green.x, chrm -> Green.y, io)) return FALSE;
+    if (!SaveOneChromaticity(chrm -> Blue.x,  chrm -> Blue.y, io)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_Chromaticity_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsCIExyYTRIPLE));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_Chromaticity_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigColorantOrderType
+// ********************************************************************************
+
+// This is an optional tag which specifies the laydown order in which colorants will
+// be printed on an n-colorant device. The laydown order may be the same as the
+// channel generation order listed in the colorantTableTag or the channel order of a
+// colour space such as CMYK, in which case this tag is not needed. When this is not
+// the case (for example, ink-towers sometimes use the order KCMY), this tag may be
+// used to specify the laydown order of the colorants.
+
+
+static
+void *Type_ColorantOrderType_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number* ColorantOrder;
+    cmsUInt32Number Count;
+
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    if (Count > cmsMAXCHANNELS) return NULL;
+
+    ColorantOrder = (cmsUInt8Number*) _cmsCalloc(self ->ContextID, cmsMAXCHANNELS, sizeof(cmsUInt8Number));
+    if (ColorantOrder == NULL) return NULL;
+
+    // We use FF as end marker
+    memset(ColorantOrder, 0xFF, cmsMAXCHANNELS * sizeof(cmsUInt8Number));
+
+    if (io ->Read(io, ColorantOrder, sizeof(cmsUInt8Number), Count) != Count) {
+
+        _cmsFree(self ->ContextID, (void*) ColorantOrder);
+        return NULL;
+    }
+
+    *nItems = 1;
+    return (void*) ColorantOrder;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool Type_ColorantOrderType_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt8Number*  ColorantOrder = (cmsUInt8Number*) Ptr;
+    cmsUInt32Number i, sz, Count;
+
+    // Get the length
+    for (Count=i=0; i < cmsMAXCHANNELS; i++) {
+        if (ColorantOrder[i] != 0xFF) Count++;
+    }
+
+    if (!_cmsWriteUInt32Number(io, Count)) return FALSE;
+
+    sz = Count * sizeof(cmsUInt8Number);
+    if (!io -> Write(io, sz, ColorantOrder)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_ColorantOrderType_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, cmsMAXCHANNELS * sizeof(cmsUInt8Number));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_ColorantOrderType_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigS15Fixed16ArrayType
+// ********************************************************************************
+// This type represents an array of generic 4-byte/32-bit fixed point quantity.
+// The number of values is determined from the size of the tag.
+
+static
+void *Type_S15Fixed16_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsFloat64Number*  array_double;
+    cmsUInt32Number i, n;
+
+    *nItems = 0;
+    n = SizeOfTag / sizeof(cmsUInt32Number);
+    array_double = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, n, sizeof(cmsFloat64Number));
+    if (array_double == NULL) return NULL;
+
+    for (i=0; i < n; i++) {
+
+        if (!_cmsRead15Fixed16Number(io, &array_double[i])) {
+
+            _cmsFree(self ->ContextID, array_double);
+            return NULL;
+        }
+    }
+
+    *nItems = n;
+    return (void*) array_double;
+}
+
+static
+cmsBool Type_S15Fixed16_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsFloat64Number* Value = (cmsFloat64Number*) Ptr;
+    cmsUInt32Number i;
+
+    for (i=0; i < nItems; i++) {
+
+        if (!_cmsWrite15Fixed16Number(io, Value[i])) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_S15Fixed16_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsFloat64Number));
+}
+
+
+static
+void Type_S15Fixed16_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigU16Fixed16ArrayType
+// ********************************************************************************
+// This type represents an array of generic 4-byte/32-bit quantity.
+// The number of values is determined from the size of the tag.
+
+
+static
+void *Type_U16Fixed16_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsFloat64Number*  array_double;
+    cmsUInt32Number v;
+    cmsUInt32Number i, n;
+
+    *nItems = 0;
+    n = SizeOfTag / sizeof(cmsUInt32Number);
+    array_double = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, n, sizeof(cmsFloat64Number));
+    if (array_double == NULL) return NULL;
+
+    for (i=0; i < n; i++) {
+
+        if (!_cmsReadUInt32Number(io, &v)) {
+            _cmsFree(self ->ContextID, (void*) array_double);
+            return NULL;
+        }
+
+        // Convert to cmsFloat64Number
+        array_double[i] =  (cmsFloat64Number) (v / 65536.0);
+    }
+
+    *nItems = n;
+    return (void*) array_double;
+}
+
+static
+cmsBool Type_U16Fixed16_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsFloat64Number* Value = (cmsFloat64Number*) Ptr;
+    cmsUInt32Number i;
+
+    for (i=0; i < nItems; i++) {
+
+        cmsUInt32Number v = (cmsUInt32Number) floor(Value[i]*65536.0 + 0.5);
+
+        if (!_cmsWriteUInt32Number(io, v)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_U16Fixed16_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsFloat64Number));
+}
+
+static
+void Type_U16Fixed16_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigSignatureType
+// ********************************************************************************
+//
+// The signatureType contains a four-byte sequence, Sequences of less than four
+// characters are padded at the end with spaces, 20h.
+// Typically this type is used for registered tags that can be displayed on many
+// development systems as a sequence of four characters.
+
+static
+void *Type_Signature_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsSignature* SigPtr = (cmsSignature*) _cmsMalloc(self ->ContextID, sizeof(cmsSignature));
+    if (SigPtr == NULL) return NULL;
+
+     if (!_cmsReadUInt32Number(io, SigPtr)) return NULL;
+     *nItems = 1;
+
+     return SigPtr;
+
+     cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_Signature_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsSignature* SigPtr = (cmsSignature*) Ptr;
+
+    return _cmsWriteUInt32Number(io, *SigPtr);
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_Signature_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsSignature));
+}
+
+static
+void Type_Signature_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigTextType
+// ********************************************************************************
+//
+// The textType is a simple text structure that contains a 7-bit ASCII text string.
+// The length of the string is obtained by subtracting 8 from the element size portion
+// of the tag itself. This string must be terminated with a 00h byte.
+
+static
+void *Type_Text_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    char* Text = NULL;
+    cmsMLU* mlu = NULL;
+
+    // Create a container
+    mlu = cmsMLUalloc(self ->ContextID, 1);
+    if (mlu == NULL) return NULL;
+
+    *nItems = 0;
+
+    // We need to store the "\0" at the end, so +1
+    if (SizeOfTag == UINT_MAX) goto Error;
+
+    Text = (char*) _cmsMalloc(self ->ContextID, SizeOfTag + 1);
+    if (Text == NULL) goto Error;
+
+    if (io -> Read(io, Text, sizeof(char), SizeOfTag) != SizeOfTag) goto Error;
+
+    // Make sure text is properly ended
+    Text[SizeOfTag] = 0;
+    *nItems = 1;
+
+    // Keep the result
+    if (!cmsMLUsetASCII(mlu, cmsNoLanguage, cmsNoCountry, Text)) goto Error;
+
+    _cmsFree(self ->ContextID, Text);
+    return (void*) mlu;
+
+Error:
+    if (mlu != NULL)
+        cmsMLUfree(mlu);
+    if (Text != NULL)
+        _cmsFree(self ->ContextID, Text);
+
+    return NULL;
+}
+
+// The conversion implies to choose a language. So, we choose the actual language.
+static
+cmsBool Type_Text_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+    cmsUInt32Number size;
+    cmsBool  rc;
+    char* Text;
+
+    // Get the size of the string. Note there is an extra "\0" at the end
+    size = cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry, NULL, 0);
+    if (size == 0) return FALSE;       // Cannot be zero!
+
+    // Create memory
+    Text = (char*) _cmsMalloc(self ->ContextID, size);
+    if (Text == NULL) return FALSE;
+
+    cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry, Text, size);
+
+    // Write it, including separator
+    rc = io ->Write(io, size, Text);
+
+    _cmsFree(self ->ContextID, Text);
+    return rc;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_Text_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_Text_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+    cmsMLUfree(mlu);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+cmsTagTypeSignature DecideTextType(cmsFloat64Number ICCVersion, const void *Data)
+{
+    if (ICCVersion >= 4.0)
+        return cmsSigMultiLocalizedUnicodeType;
+
+    return cmsSigTextType;
+
+    cmsUNUSED_PARAMETER(Data);
+}
+
+
+// ********************************************************************************
+// Type cmsSigDataType
+// ********************************************************************************
+
+// General purpose data type
+static
+void *Type_Data_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsICCData* BinData;
+    cmsUInt32Number LenOfData;
+
+    *nItems = 0;
+
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+
+    LenOfData = SizeOfTag - sizeof(cmsUInt32Number);
+    if (LenOfData > INT_MAX) return NULL;
+
+    BinData = (cmsICCData*) _cmsMalloc(self ->ContextID, sizeof(cmsICCData) + LenOfData - 1);
+    if (BinData == NULL) return NULL;
+
+    BinData ->len = LenOfData;
+    if (!_cmsReadUInt32Number(io, &BinData->flag)) {
+        _cmsFree(self ->ContextID, BinData);
+        return NULL;
+    }
+
+    if (io -> Read(io, BinData ->data, sizeof(cmsUInt8Number), LenOfData) != LenOfData) {
+
+        _cmsFree(self ->ContextID, BinData);
+        return NULL;
+    }
+
+    *nItems = 1;
+
+    return (void*) BinData;
+}
+
+
+static
+cmsBool Type_Data_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+   cmsICCData* BinData = (cmsICCData*) Ptr;
+
+   if (!_cmsWriteUInt32Number(io, BinData ->flag)) return FALSE;
+
+   return io ->Write(io, BinData ->len, BinData ->data);
+
+   cmsUNUSED_PARAMETER(nItems);
+   cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_Data_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    cmsICCData* BinData = (cmsICCData*) Ptr;
+
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsICCData) + BinData ->len - 1);
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_Data_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigTextDescriptionType
+// ********************************************************************************
+
+static
+void *Type_Text_Description_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    char* Text = NULL;
+    cmsMLU* mlu = NULL;
+    cmsUInt32Number  AsciiCount;
+    cmsUInt32Number  i, UnicodeCode, UnicodeCount;
+    cmsUInt16Number  ScriptCodeCode, Dummy;
+    cmsUInt8Number   ScriptCodeCount;
+
+    *nItems = 0;
+
+    //  One dword should be there
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+
+    // Read len of ASCII
+    if (!_cmsReadUInt32Number(io, &AsciiCount)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Check for size
+    if (SizeOfTag < AsciiCount) return NULL;
+
+    // All seems Ok, allocate the container
+    mlu = cmsMLUalloc(self ->ContextID, 1);
+    if (mlu == NULL) return NULL;
+
+    // As many memory as size of tag
+    Text = (char*) _cmsMalloc(self ->ContextID, AsciiCount + 1);
+    if (Text == NULL) goto Error;
+
+    // Read it
+    if (io ->Read(io, Text, sizeof(char), AsciiCount) != AsciiCount) goto Error;
+    SizeOfTag -= AsciiCount;
+
+    // Make sure there is a terminator
+    Text[AsciiCount] = 0;
+
+    // Set the MLU entry. From here we can be tolerant to wrong types
+    if (!cmsMLUsetASCII(mlu, cmsNoLanguage, cmsNoCountry, Text)) goto Error;
+    _cmsFree(self ->ContextID, (void*) Text);
+    Text = NULL;
+
+    // Skip Unicode code
+    if (SizeOfTag < 2* sizeof(cmsUInt32Number)) goto Done;
+    if (!_cmsReadUInt32Number(io, &UnicodeCode)) goto Done;
+    if (!_cmsReadUInt32Number(io, &UnicodeCount)) goto Done;
+    SizeOfTag -= 2* sizeof(cmsUInt32Number);
+
+    if (SizeOfTag < UnicodeCount*sizeof(cmsUInt16Number)) goto Done;
+
+    for (i=0; i < UnicodeCount; i++) {
+        if (!io ->Read(io, &Dummy, sizeof(cmsUInt16Number), 1)) goto Done;
+    }
+    SizeOfTag -= UnicodeCount*sizeof(cmsUInt16Number);
+
+    // Skip ScriptCode code if present. Some buggy profiles does have less
+    // data that stricttly required. We need to skip it as this type may come
+    // embedded in other types.
+
+    if (SizeOfTag >= sizeof(cmsUInt16Number) + sizeof(cmsUInt8Number) + 67) {
+
+        if (!_cmsReadUInt16Number(io, &ScriptCodeCode)) goto Done;
+        if (!_cmsReadUInt8Number(io,  &ScriptCodeCount)) goto Done;
+
+        // Skip rest of tag
+        for (i=0; i < 67; i++) {
+            if (!io ->Read(io, &Dummy, sizeof(cmsUInt8Number), 1)) goto Error;
+        }
+    }
+
+Done:
+
+    *nItems = 1;
+    return mlu;
+
+Error:
+    if (Text) _cmsFree(self ->ContextID, (void*) Text);
+    if (mlu) cmsMLUfree(mlu);
+    return NULL;
+}
+
+
+// This tag can come IN UNALIGNED SIZE. In order to prevent issues, we force zeros on description to align it
+static
+cmsBool  Type_Text_Description_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+    char *Text = NULL;
+    wchar_t *Wide = NULL;
+    cmsUInt32Number len, len_text, len_tag_requirement, len_aligned;
+    cmsBool  rc = FALSE;
+    char Filler[68];
+
+    // Used below for writing zeroes
+    memset(Filler, 0, sizeof(Filler));
+
+    // Get the len of string
+    len = cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry, NULL, 0);
+
+    // Specification ICC.1:2001-04 (v2.4.0): It has been found that textDescriptionType can contain misaligned data
+    //(see clause 4.1 for the definition of 'aligned'). Because the Unicode language
+    // code and Unicode count immediately follow the ASCII description, their
+    // alignment is not correct if the ASCII count is not a multiple of four. The
+    // ScriptCode code is misaligned when the ASCII count is odd. Profile reading and
+    // writing software must be written carefully in order to handle these alignment
+    // problems.
+    //
+    // The above last sentence suggest to handle alignment issues in the
+    // parser. The provided example (Table 69 on Page 60) makes this clear. 
+    // The padding only in the ASCII count is not sufficient for a aligned tag
+    // size, with the same text size in ASCII and Unicode.
+
+    // Null strings
+    if (len <= 0) {
+
+        Text = (char*)    _cmsDupMem(self ->ContextID, "", sizeof(char));
+        Wide = (wchar_t*) _cmsDupMem(self ->ContextID, L"", sizeof(wchar_t));
+    }
+    else {
+        // Create independent buffers
+        Text = (char*) _cmsCalloc(self ->ContextID, len, sizeof(char));
+        if (Text == NULL) goto Error;
+
+        Wide = (wchar_t*) _cmsCalloc(self ->ContextID, len, sizeof(wchar_t));
+        if (Wide == NULL) goto Error;
+
+        // Get both representations.
+        cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry,  Text, len * sizeof(char));
+        cmsMLUgetWide(mlu,  cmsNoLanguage, cmsNoCountry,  Wide, len * sizeof(wchar_t));
+    }
+
+    // Tell the real text len including the null terminator and padding
+    len_text = (cmsUInt32Number) strlen(Text) + 1;
+    // Compute an total tag size requirement
+    len_tag_requirement = (8+4+len_text+4+4+2*len_text+2+1+67);
+    len_aligned = _cmsALIGNLONG(len_tag_requirement);
+
+  // * cmsUInt32Number       count;          * Description length
+  // * cmsInt8Number         desc[count]     * NULL terminated ascii string
+  // * cmsUInt32Number       ucLangCode;     * UniCode language code
+  // * cmsUInt32Number       ucCount;        * UniCode description length
+  // * cmsInt16Number        ucDesc[ucCount];* The UniCode description
+  // * cmsUInt16Number       scCode;         * ScriptCode code
+  // * cmsUInt8Number        scCount;        * ScriptCode count
+  // * cmsInt8Number         scDesc[67];     * ScriptCode Description
+
+    if (!_cmsWriteUInt32Number(io, len_text)) goto Error;
+    if (!io ->Write(io, len_text, Text)) goto Error;
+
+    if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // ucLanguageCode
+
+    if (!_cmsWriteUInt32Number(io, len_text)) goto Error;
+    // Note that in some compilers sizeof(cmsUInt16Number) != sizeof(wchar_t)
+    if (!_cmsWriteWCharArray(io, len_text, Wide)) goto Error;
+
+    // ScriptCode Code & count (unused)
+    if (!_cmsWriteUInt16Number(io, 0)) goto Error;
+    if (!_cmsWriteUInt8Number(io, 0)) goto Error;
+
+    if (!io ->Write(io, 67, Filler)) goto Error;
+
+    // possibly add pad at the end of tag
+    if(len_aligned - len_tag_requirement > 0)
+      if (!io ->Write(io, len_aligned - len_tag_requirement, Filler)) goto Error;
+
+    rc = TRUE;
+
+Error:
+    if (Text) _cmsFree(self ->ContextID, Text);
+    if (Wide) _cmsFree(self ->ContextID, Wide);
+
+    return rc;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_Text_Description_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_Text_Description_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+
+    cmsMLUfree(mlu);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+cmsTagTypeSignature DecideTextDescType(cmsFloat64Number ICCVersion, const void *Data)
+{
+    if (ICCVersion >= 4.0)
+        return cmsSigMultiLocalizedUnicodeType;
+
+    return cmsSigTextDescriptionType;
+
+    cmsUNUSED_PARAMETER(Data);
+}
+
+
+// ********************************************************************************
+// Type cmsSigCurveType
+// ********************************************************************************
+
+static
+void *Type_Curve_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number Count;
+    cmsToneCurve* NewGamma;
+    
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+
+    switch (Count) {
+
+           case 0:   // Linear.
+               {
+                   cmsFloat64Number SingleGamma = 1.0;
+
+                   NewGamma = cmsBuildParametricToneCurve(self ->ContextID, 1, &SingleGamma);
+                   if (!NewGamma) return NULL;
+                   *nItems = 1;
+                   return NewGamma;
+               }
+              
+           case 1:  // Specified as the exponent of gamma function
+               {
+                   cmsUInt16Number SingleGammaFixed;
+                   cmsFloat64Number SingleGamma;
+
+                   if (!_cmsReadUInt16Number(io, &SingleGammaFixed)) return NULL;
+                   SingleGamma = _cms8Fixed8toDouble(SingleGammaFixed);
+
+                   *nItems = 1;
+                   return cmsBuildParametricToneCurve(self ->ContextID, 1, &SingleGamma);
+               }
+
+           default:  // Curve
+
+               if (Count > 0x7FFF)
+                   return NULL; // This is to prevent bad guys for doing bad things
+
+               NewGamma = cmsBuildTabulatedToneCurve16(self ->ContextID, Count, NULL);
+               if (!NewGamma) return NULL;
+
+               if (!_cmsReadUInt16Array(io, Count, NewGamma -> Table16)) {
+                   cmsFreeToneCurve(NewGamma);
+                   return NULL;
+               }
+
+               *nItems = 1;
+               return NewGamma;
+    }
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_Curve_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsToneCurve* Curve = (cmsToneCurve*) Ptr;
+
+    if (Curve ->nSegments == 1 && Curve ->Segments[0].Type == 1) {
+
+            // Single gamma, preserve number
+            cmsUInt16Number SingleGammaFixed = _cmsDoubleTo8Fixed8(Curve ->Segments[0].Params[0]);
+
+            if (!_cmsWriteUInt32Number(io, 1)) return FALSE;
+            if (!_cmsWriteUInt16Number(io, SingleGammaFixed)) return FALSE;
+            return TRUE;
+
+    }
+
+    if (!_cmsWriteUInt32Number(io, Curve ->nEntries)) return FALSE;
+    return _cmsWriteUInt16Array(io, Curve ->nEntries, Curve ->Table16);
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_Curve_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupToneCurve((cmsToneCurve*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_Curve_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsToneCurve* gamma = (cmsToneCurve*) Ptr;
+
+    cmsFreeToneCurve(gamma);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigParametricCurveType
+// ********************************************************************************
+
+
+// Decide which curve type to use on writing
+static
+cmsTagTypeSignature DecideCurveType(cmsFloat64Number ICCVersion, const void *Data)
+{
+    cmsToneCurve* Curve = (cmsToneCurve*) Data;
+
+    if (ICCVersion < 4.0) return cmsSigCurveType;
+    if (Curve ->nSegments != 1) return cmsSigCurveType;          // Only 1-segment curves can be saved as parametric
+    if (Curve ->Segments[0].Type < 0) return cmsSigCurveType;    // Only non-inverted curves
+    if (Curve ->Segments[0].Type > 5) return cmsSigCurveType;    // Only ICC parametric curves
+
+    return cmsSigParametricCurveType;
+}
+
+static
+void *Type_ParametricCurve_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    static const int ParamsByType[] = { 1, 3, 4, 5, 7 };
+    cmsFloat64Number Params[10];
+    cmsUInt16Number Type;
+    int i, n;
+    cmsToneCurve* NewGamma;
+
+    if (!_cmsReadUInt16Number(io, &Type)) return NULL;
+    if (!_cmsReadUInt16Number(io, NULL)) return NULL;   // Reserved
+
+    if (Type > 4) {
+
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown parametric curve type '%d'", Type);
+        return NULL;
+    }
+
+    memset(Params, 0, sizeof(Params));
+    n = ParamsByType[Type];
+
+    for (i=0; i < n; i++) {
+
+        if (!_cmsRead15Fixed16Number(io, &Params[i])) return NULL;
+    }
+
+    NewGamma = cmsBuildParametricToneCurve(self ->ContextID, Type+1, Params);
+
+    *nItems = 1;
+    return NewGamma;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_ParametricCurve_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsToneCurve* Curve = (cmsToneCurve*) Ptr;
+    int i, nParams, typen;
+    static const int ParamsByType[] = { 0, 1, 3, 4, 5, 7 };
+
+    typen = Curve -> Segments[0].Type;
+
+    if (Curve ->nSegments > 1 || typen < 1) {
+
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Multisegment or Inverted parametric curves cannot be written");
+        return FALSE;
+    }
+
+    if (typen > 5) {
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported parametric curve");
+        return FALSE;
+    }
+
+    nParams = ParamsByType[typen];
+
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) (Curve ->Segments[0].Type - 1))) return FALSE;
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;        // Reserved
+
+    for (i=0; i < nParams; i++) {
+
+        if (!_cmsWrite15Fixed16Number(io, Curve -> Segments[0].Params[i])) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_ParametricCurve_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupToneCurve((cmsToneCurve*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_ParametricCurve_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsToneCurve* gamma = (cmsToneCurve*) Ptr;
+
+    cmsFreeToneCurve(gamma);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigDateTimeType
+// ********************************************************************************
+
+// A 12-byte value representation of the time and date, where the byte usage is assigned
+// as specified in table 1. The actual values are encoded as 16-bit unsigned integers
+// (uInt16Number - see 5.1.6).
+//
+// All the dateTimeNumber values in a profile shall be in Coordinated Universal Time
+// (UTC, also known as GMT or ZULU Time). Profile writers are required to convert local
+// time to UTC when setting these values. Programmes that display these values may show
+// the dateTimeNumber as UTC, show the equivalent local time (at current locale), or
+// display both UTC and local versions of the dateTimeNumber.
+
+static
+void *Type_DateTime_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsDateTimeNumber timestamp;
+    struct tm * NewDateTime;
+
+    *nItems = 0;
+    NewDateTime = (struct tm*) _cmsMalloc(self ->ContextID, sizeof(struct tm));
+    if (NewDateTime == NULL) return NULL;
+
+    if (io->Read(io, &timestamp, sizeof(cmsDateTimeNumber), 1) != 1) return NULL;
+
+     _cmsDecodeDateTimeNumber(&timestamp, NewDateTime);
+
+     *nItems = 1;
+     return NewDateTime;
+
+     cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_DateTime_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    struct tm * DateTime = (struct tm*) Ptr;
+    cmsDateTimeNumber timestamp;
+
+    _cmsEncodeDateTimeNumber(&timestamp, DateTime);
+    if (!io ->Write(io, sizeof(cmsDateTimeNumber), &timestamp)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_DateTime_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(struct tm));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_DateTime_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+
+// ********************************************************************************
+// Type icMeasurementType
+// ********************************************************************************
+
+/*
+The measurementType information refers only to the internal profile data and is
+meant to provide profile makers an alternative to the default measurement
+specifications.
+*/
+
+static
+void *Type_Measurement_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsICCMeasurementConditions mc;
+
+	
+    memset(&mc, 0, sizeof(mc));
+	
+    if (!_cmsReadUInt32Number(io, &mc.Observer)) return NULL;
+    if (!_cmsReadXYZNumber(io,    &mc.Backing)) return NULL;
+    if (!_cmsReadUInt32Number(io, &mc.Geometry)) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &mc.Flare)) return NULL;
+    if (!_cmsReadUInt32Number(io, &mc.IlluminantType)) return NULL;
+
+    *nItems = 1;
+    return _cmsDupMem(self ->ContextID, &mc, sizeof(cmsICCMeasurementConditions));
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_Measurement_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsICCMeasurementConditions* mc =(cmsICCMeasurementConditions*) Ptr;
+
+    if (!_cmsWriteUInt32Number(io, mc->Observer)) return FALSE;
+    if (!_cmsWriteXYZNumber(io,    &mc->Backing)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, mc->Geometry)) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, mc->Flare)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, mc->IlluminantType)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_Measurement_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+     return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsICCMeasurementConditions));
+
+     cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_Measurement_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigMultiLocalizedUnicodeType
+// ********************************************************************************
+//
+//   Do NOT trust SizeOfTag as there is an issue on the definition of profileSequenceDescTag. See the TechNote from
+//   Max Derhak and Rohit Patil about this: basically the size of the string table should be guessed and cannot be
+//   taken from the size of tag if this tag is embedded as part of bigger structures (profileSequenceDescTag, for instance)
+//
+
+static
+void *Type_MLU_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsMLU* mlu;
+    cmsUInt32Number Count, RecLen, NumOfWchar;
+    cmsUInt32Number SizeOfHeader;
+    cmsUInt32Number  Len, Offset;
+    cmsUInt32Number  i;
+    wchar_t*         Block;
+    cmsUInt32Number  BeginOfThisString, EndOfThisString, LargestPosition;
+
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    if (!_cmsReadUInt32Number(io, &RecLen)) return NULL;
+
+    if (RecLen != 12) {
+
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "multiLocalizedUnicodeType of len != 12 is not supported.");
+        return NULL;
+    }
+
+    mlu = cmsMLUalloc(self ->ContextID, Count);
+    if (mlu == NULL) return NULL;
+
+    mlu ->UsedEntries = Count;
+
+    SizeOfHeader = 12 * Count + sizeof(_cmsTagBase);
+    LargestPosition = 0;
+
+    for (i=0; i < Count; i++) {
+
+        if (!_cmsReadUInt16Number(io, &mlu ->Entries[i].Language)) goto Error;
+        if (!_cmsReadUInt16Number(io, &mlu ->Entries[i].Country))  goto Error;
+
+        // Now deal with Len and offset.
+        if (!_cmsReadUInt32Number(io, &Len)) goto Error;
+        if (!_cmsReadUInt32Number(io, &Offset)) goto Error;
+
+        // Check for overflow
+        if (Offset < (SizeOfHeader + 8)) goto Error;        
+        if (((Offset + Len) < Len) || ((Offset + Len) > SizeOfTag + 8)) goto Error;
+
+        // True begin of the string
+        BeginOfThisString = Offset - SizeOfHeader - 8;
+
+        // Adjust to wchar_t elements
+        mlu ->Entries[i].Len = (Len * sizeof(wchar_t)) / sizeof(cmsUInt16Number);
+        mlu ->Entries[i].StrW = (BeginOfThisString * sizeof(wchar_t)) / sizeof(cmsUInt16Number);
+
+        // To guess maximum size, add offset + len
+        EndOfThisString = BeginOfThisString + Len;
+        if (EndOfThisString > LargestPosition)
+            LargestPosition = EndOfThisString;
+    }
+
+    // Now read the remaining of tag and fill all strings. Subtract the directory
+    SizeOfTag   = (LargestPosition * sizeof(wchar_t)) / sizeof(cmsUInt16Number);
+    if (SizeOfTag == 0)
+    {
+        Block = NULL;
+        NumOfWchar = 0;
+
+    }
+    else
+    {
+        Block = (wchar_t*) _cmsMalloc(self ->ContextID, SizeOfTag);
+        if (Block == NULL) goto Error;
+        NumOfWchar = SizeOfTag / sizeof(wchar_t);
+        if (!_cmsReadWCharArray(io, NumOfWchar, Block)) goto Error;
+    }
+
+    mlu ->MemPool  = Block;
+    mlu ->PoolSize = SizeOfTag;
+    mlu ->PoolUsed = SizeOfTag;
+
+    *nItems = 1;
+    return (void*) mlu;
+
+Error:
+    if (mlu) cmsMLUfree(mlu);
+    return NULL;
+}
+
+static
+cmsBool  Type_MLU_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu =(cmsMLU*) Ptr;
+    cmsUInt32Number HeaderSize;
+    cmsUInt32Number  Len, Offset;
+    cmsUInt32Number i;
+
+    if (Ptr == NULL) {
+
+          // Empty placeholder
+          if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+          if (!_cmsWriteUInt32Number(io, 12)) return FALSE;
+          return TRUE;
+    }
+
+    if (!_cmsWriteUInt32Number(io, mlu ->UsedEntries)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 12)) return FALSE;
+
+    HeaderSize = 12 * mlu ->UsedEntries + sizeof(_cmsTagBase);
+
+    for (i=0; i < mlu ->UsedEntries; i++) {
+
+        Len    =  mlu ->Entries[i].Len;
+        Offset =  mlu ->Entries[i].StrW;
+
+        Len    = (Len * sizeof(cmsUInt16Number)) / sizeof(wchar_t);
+        Offset = (Offset * sizeof(cmsUInt16Number)) / sizeof(wchar_t) + HeaderSize + 8;
+
+        if (!_cmsWriteUInt16Number(io, mlu ->Entries[i].Language)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, mlu ->Entries[i].Country))  return FALSE;
+        if (!_cmsWriteUInt32Number(io, Len)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, Offset)) return FALSE;
+    }
+
+    if (!_cmsWriteWCharArray(io, mlu ->PoolUsed / sizeof(wchar_t), (wchar_t*)  mlu ->MemPool)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_MLU_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_MLU_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsMLUfree((cmsMLU*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigLut8Type
+// ********************************************************************************
+
+// Decide which LUT type to use on writing
+static
+cmsTagTypeSignature DecideLUTtypeA2B(cmsFloat64Number ICCVersion, const void *Data)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Data;
+
+    if (ICCVersion < 4.0) {
+        if (Lut ->SaveAs8Bits) return cmsSigLut8Type;
+        return cmsSigLut16Type;
+    }
+    else {
+         return cmsSigLutAtoBType;
+    }
+}
+
+static
+cmsTagTypeSignature DecideLUTtypeB2A(cmsFloat64Number ICCVersion, const void *Data)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Data;
+
+    if (ICCVersion < 4.0) {
+        if (Lut ->SaveAs8Bits) return cmsSigLut8Type;
+        return cmsSigLut16Type;
+    }
+    else {
+         return cmsSigLutBtoAType;
+    }
+}
+
+/*
+This structure represents a colour transform using tables of 8-bit precision.
+This type contains four processing elements: a 3 by 3 matrix (which shall be
+the identity matrix unless the input colour space is XYZ), a set of one dimensional
+input tables, a multidimensional lookup table, and a set of one dimensional output
+tables. Data is processed using these elements via the following sequence:
+(matrix) -> (1d input tables)  -> (multidimensional lookup table - CLUT) -> (1d output tables)
+
+Byte Position   Field Length (bytes)  Content Encoded as...
+8                  1          Number of Input Channels (i)    uInt8Number
+9                  1          Number of Output Channels (o)   uInt8Number
+10                 1          Number of CLUT grid points (identical for each side) (g) uInt8Number
+11                 1          Reserved for padding (fill with 00h)
+
+12..15             4          Encoded e00 parameter   s15Fixed16Number
+*/
+
+
+// Read 8 bit tables as gamma functions
+static
+cmsBool  Read8bitTables(cmsContext ContextID, cmsIOHANDLER* io, cmsPipeline* lut, cmsUInt32Number nChannels)
+{
+    cmsUInt8Number* Temp = NULL;
+    cmsUInt32Number i, j;
+    cmsToneCurve* Tables[cmsMAXCHANNELS];
+
+    if (nChannels > cmsMAXCHANNELS) return FALSE;
+    if (nChannels <= 0) return FALSE;
+
+    memset(Tables, 0, sizeof(Tables));
+
+    Temp = (cmsUInt8Number*) _cmsMalloc(ContextID, 256);
+    if (Temp == NULL) return FALSE;
+
+    for (i=0; i < nChannels; i++) {
+        Tables[i] = cmsBuildTabulatedToneCurve16(ContextID, 256, NULL);
+        if (Tables[i] == NULL) goto Error;
+    }
+
+    for (i=0; i < nChannels; i++) {
+
+        if (io ->Read(io, Temp, 256, 1) != 1) goto Error;
+
+        for (j=0; j < 256; j++)
+            Tables[i]->Table16[j] = (cmsUInt16Number) FROM_8_TO_16(Temp[j]);
+    }
+
+    _cmsFree(ContextID, Temp);
+    Temp = NULL;
+
+    if (!cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, nChannels, Tables)))
+        goto Error;
+
+    for (i=0; i < nChannels; i++)
+        cmsFreeToneCurve(Tables[i]);
+
+    return TRUE;
+
+Error:
+    for (i=0; i < nChannels; i++) {
+        if (Tables[i]) cmsFreeToneCurve(Tables[i]);
+    }
+
+    if (Temp) _cmsFree(ContextID, Temp);
+    return FALSE;
+}
+
+
+static
+cmsBool Write8bitTables(cmsContext ContextID, cmsIOHANDLER* io, cmsUInt32Number n, _cmsStageToneCurvesData* Tables)
+{
+    int j;
+    cmsUInt32Number i;
+    cmsUInt8Number val;
+
+    for (i=0; i < n; i++) {
+
+        if (Tables) {
+
+            // Usual case of identity curves
+            if ((Tables ->TheCurves[i]->nEntries == 2) && 
+                (Tables->TheCurves[i]->Table16[0] == 0) && 
+                (Tables->TheCurves[i]->Table16[1] == 65535)) {
+
+                    for (j=0; j < 256; j++) {
+                        if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) j)) return FALSE;
+                    }
+            }
+            else 
+                if (Tables ->TheCurves[i]->nEntries != 256) {
+                    cmsSignalError(ContextID, cmsERROR_RANGE, "LUT8 needs 256 entries on prelinearization");
+                    return FALSE;                
+                }
+                else
+                    for (j=0; j < 256; j++) {
+
+                        val = (cmsUInt8Number) FROM_16_TO_8(Tables->TheCurves[i]->Table16[j]);
+
+                        if (!_cmsWriteUInt8Number(io, val)) return FALSE;
+                    }
+        }
+    }
+    return TRUE;
+}
+
+
+// Check overflow
+static
+cmsUInt32Number uipow(cmsUInt32Number n, cmsUInt32Number a, cmsUInt32Number b)
+{
+    cmsUInt32Number rv = 1, rc;
+
+    if (a == 0) return 0;
+    if (n == 0) return 0;
+
+    for (; b > 0; b--) {
+
+        rv *= a;
+
+        // Check for overflow
+        if (rv > UINT_MAX / a) return (cmsUInt32Number) -1;
+
+    }
+
+    rc = rv * n;
+
+    if (rv != rc / n) return (cmsUInt32Number) -1;
+    return rc;
+}
+
+
+// That will create a MPE LUT with Matrix, pre tables, CLUT and post tables.
+// 8 bit lut may be scaled easely to v4 PCS, but we need also to properly adjust
+// PCS on BToAxx tags and AtoB if abstract. We need to fix input direction.
+
+static
+void *Type_LUT8_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number InputChannels, OutputChannels, CLUTpoints;
+    cmsUInt8Number* Temp = NULL;
+    cmsPipeline* NewLUT = NULL;
+    cmsUInt32Number nTabSize, i;
+    cmsFloat64Number Matrix[3*3];
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt8Number(io, &InputChannels)) goto Error;
+    if (!_cmsReadUInt8Number(io, &OutputChannels)) goto Error;
+    if (!_cmsReadUInt8Number(io, &CLUTpoints)) goto Error;
+
+     if (CLUTpoints == 1) goto Error; // Impossible value, 0 for no CLUT and then 2 at least
+
+    // Padding
+    if (!_cmsReadUInt8Number(io, NULL)) goto Error;
+
+    // Do some checking
+    if (InputChannels == 0 || InputChannels > cmsMAXCHANNELS)  goto Error;
+    if (OutputChannels == 0 || OutputChannels > cmsMAXCHANNELS) goto Error;
+
+   // Allocates an empty Pipeline
+    NewLUT = cmsPipelineAlloc(self ->ContextID, InputChannels, OutputChannels);
+    if (NewLUT == NULL) goto Error;
+
+    // Read the Matrix
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[0])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[1])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[2])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[3])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[4])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[5])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[6])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[7])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[8])) goto Error;
+
+
+    // Only operates if not identity...
+    if ((InputChannels == 3) && !_cmsMAT3isIdentity((cmsMAT3*) Matrix)) {
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_BEGIN, cmsStageAllocMatrix(self ->ContextID, 3, 3, Matrix, NULL)))
+            goto Error;
+    }
+
+    // Get input tables
+    if (!Read8bitTables(self ->ContextID, io,  NewLUT, InputChannels)) goto Error;
+
+    // Get 3D CLUT. Check the overflow....
+    nTabSize = uipow(OutputChannels, CLUTpoints, InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) goto Error;
+    if (nTabSize > 0) {
+
+        cmsUInt16Number *PtrW, *T;
+       
+        PtrW = T  = (cmsUInt16Number*) _cmsCalloc(self ->ContextID, nTabSize, sizeof(cmsUInt16Number));
+        if (T  == NULL) goto Error;
+
+        Temp = (cmsUInt8Number*) _cmsMalloc(self ->ContextID, nTabSize);
+        if (Temp == NULL) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+
+        if (io ->Read(io, Temp, nTabSize, 1) != 1) {
+            _cmsFree(self ->ContextID, T);
+            _cmsFree(self ->ContextID, Temp);
+            goto Error;
+        }
+
+        for (i = 0; i < nTabSize; i++) {
+
+            *PtrW++ = FROM_8_TO_16(Temp[i]);
+        }
+        _cmsFree(self ->ContextID, Temp);
+        Temp = NULL;
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, cmsStageAllocCLut16bit(self ->ContextID, CLUTpoints, InputChannels, OutputChannels, T))) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+        _cmsFree(self ->ContextID, T);
+    }
+
+
+    // Get output tables
+    if (!Read8bitTables(self ->ContextID, io,  NewLUT, OutputChannels)) goto Error;
+
+    *nItems = 1;
+    return NewLUT;
+
+Error:
+    if (NewLUT != NULL) cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// We only allow a specific MPE structure: Matrix plus prelin, plus clut, plus post-lin.
+static
+cmsBool  Type_LUT8_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number j, nTabSize;
+    cmsUInt8Number  val;
+    cmsPipeline* NewLUT = (cmsPipeline*) Ptr;
+    cmsStage* mpe;
+    _cmsStageToneCurvesData* PreMPE = NULL, *PostMPE = NULL;
+    _cmsStageMatrixData* MatMPE = NULL;
+    _cmsStageCLutData* clut = NULL;
+    cmsUInt32Number clutPoints;
+
+    // Disassemble the LUT into components.
+    mpe = NewLUT -> Elements;
+    if (mpe ->Type == cmsSigMatrixElemType) {
+
+        MatMPE = (_cmsStageMatrixData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PreMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCLutElemType) {
+        clut  = (_cmsStageCLutData*) mpe -> Data;
+        mpe = mpe ->Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PostMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    // That should be all
+    if (mpe != NULL) {
+        cmsSignalError(mpe->ContextID, cmsERROR_UNKNOWN_EXTENSION, "LUT is not suitable to be saved as LUT8");
+        return FALSE;
+    }
+
+
+    if (clut == NULL)
+        clutPoints = 0;
+    else
+        clutPoints    = clut->Params->nSamples[0];
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) NewLUT ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) NewLUT ->OutputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) clutPoints)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE; // Padding
+
+
+    if (MatMPE != NULL) {
+
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[0])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[1])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[2])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[3])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[4])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[5])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[6])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[7])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[8])) return FALSE;
+
+    }
+    else {
+
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+    }
+
+    // The prelinearization table
+    if (!Write8bitTables(self ->ContextID, io, NewLUT ->InputChannels, PreMPE)) return FALSE;
+
+    nTabSize = uipow(NewLUT->OutputChannels, clutPoints, NewLUT ->InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) return FALSE;
+    if (nTabSize > 0) {
+
+        // The 3D CLUT.
+        if (clut != NULL) {
+
+            for (j=0; j < nTabSize; j++) {
+
+                val = (cmsUInt8Number) FROM_16_TO_8(clut ->Tab.T[j]);
+                if (!_cmsWriteUInt8Number(io, val)) return FALSE;
+            }
+        }
+    }
+
+    // The postlinearization table
+    if (!Write8bitTables(self ->ContextID, io, NewLUT ->OutputChannels, PostMPE)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_LUT8_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUT8_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+// ********************************************************************************
+// Type cmsSigLut16Type
+// ********************************************************************************
+
+// Read 16 bit tables as gamma functions
+static
+cmsBool  Read16bitTables(cmsContext ContextID, cmsIOHANDLER* io, cmsPipeline* lut, 
+                                    cmsUInt32Number nChannels, cmsUInt32Number nEntries)
+{
+    cmsUInt32Number i;
+    cmsToneCurve* Tables[cmsMAXCHANNELS];
+
+    // Maybe an empty table? (this is a lcms extension)
+    if (nEntries <= 0) return TRUE;
+
+    // Check for malicious profiles
+    if (nEntries < 2) return FALSE;
+    if (nChannels > cmsMAXCHANNELS) return FALSE;
+
+    // Init table to zero
+    memset(Tables, 0, sizeof(Tables));
+
+    for (i=0; i < nChannels; i++) {
+
+        Tables[i] = cmsBuildTabulatedToneCurve16(ContextID, nEntries, NULL);
+        if (Tables[i] == NULL) goto Error;
+
+        if (!_cmsReadUInt16Array(io, nEntries, Tables[i]->Table16)) goto Error;
+    }
+
+
+    // Add the table (which may certainly be an identity, but this is up to the optimizer, not the reading code)
+    if (!cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, nChannels, Tables)))
+        goto Error;
+
+    for (i=0; i < nChannels; i++)
+        cmsFreeToneCurve(Tables[i]);
+
+    return TRUE;
+
+Error:
+    for (i=0; i < nChannels; i++) {
+        if (Tables[i]) cmsFreeToneCurve(Tables[i]);
+    }
+
+    return FALSE;
+}
+
+static
+cmsBool Write16bitTables(cmsContext ContextID, cmsIOHANDLER* io, _cmsStageToneCurvesData* Tables)
+{
+    cmsUInt32Number j;
+    cmsUInt32Number i;
+    cmsUInt16Number val;
+    cmsUInt32Number nEntries;
+
+    _cmsAssert(Tables != NULL);
+
+    nEntries = Tables->TheCurves[0]->nEntries;
+
+    for (i=0; i < Tables ->nCurves; i++) {
+
+        for (j=0; j < nEntries; j++) {
+
+            val = Tables->TheCurves[i]->Table16[j];        
+            if (!_cmsWriteUInt16Number(io, val)) return FALSE;
+        }
+    }
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+static
+void *Type_LUT16_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number InputChannels, OutputChannels, CLUTpoints;
+    cmsPipeline* NewLUT = NULL;
+    cmsUInt32Number nTabSize;
+    cmsFloat64Number Matrix[3*3];
+    cmsUInt16Number InputEntries, OutputEntries;
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt8Number(io, &InputChannels)) return NULL;
+    if (!_cmsReadUInt8Number(io, &OutputChannels)) return NULL;
+    if (!_cmsReadUInt8Number(io, &CLUTpoints)) return NULL;   // 255 maximum
+
+    // Padding
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+
+    // Do some checking
+    if (InputChannels == 0 || InputChannels > cmsMAXCHANNELS)  goto Error;
+    if (OutputChannels == 0 || OutputChannels > cmsMAXCHANNELS) goto Error;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, InputChannels, OutputChannels);
+    if (NewLUT == NULL) goto Error;
+
+    // Read the Matrix
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[0])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[1])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[2])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[3])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[4])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[5])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[6])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[7])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[8])) goto Error;
+
+
+    // Only operates on 3 channels
+    if ((InputChannels == 3) && !_cmsMAT3isIdentity((cmsMAT3*) Matrix)) {
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, cmsStageAllocMatrix(self ->ContextID, 3, 3, Matrix, NULL)))
+            goto Error;
+    }
+
+    if (!_cmsReadUInt16Number(io, &InputEntries)) goto Error;
+    if (!_cmsReadUInt16Number(io, &OutputEntries)) goto Error;
+
+    if (InputEntries > 0x7FFF || OutputEntries > 0x7FFF) goto Error;
+    if (CLUTpoints == 1) goto Error; // Impossible value, 0 for no CLUT and then 2 at least
+
+    // Get input tables
+    if (!Read16bitTables(self ->ContextID, io,  NewLUT, InputChannels, InputEntries)) goto Error;
+
+    // Get 3D CLUT
+    nTabSize = uipow(OutputChannels, CLUTpoints, InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) goto Error;
+    if (nTabSize > 0) {
+
+        cmsUInt16Number *T;
+
+        T  = (cmsUInt16Number*) _cmsCalloc(self ->ContextID, nTabSize, sizeof(cmsUInt16Number));
+        if (T  == NULL) goto Error;
+
+        if (!_cmsReadUInt16Array(io, nTabSize, T)) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, cmsStageAllocCLut16bit(self ->ContextID, CLUTpoints, InputChannels, OutputChannels, T))) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+        _cmsFree(self ->ContextID, T);
+    }
+
+
+    // Get output tables
+    if (!Read16bitTables(self ->ContextID, io,  NewLUT, OutputChannels, OutputEntries)) goto Error;
+
+    *nItems = 1;
+    return NewLUT;
+
+Error:
+    if (NewLUT != NULL) cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// We only allow some specific MPE structures: Matrix plus prelin, plus clut, plus post-lin.
+// Some empty defaults are created for missing parts
+
+static
+cmsBool  Type_LUT16_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number nTabSize;
+    cmsPipeline* NewLUT = (cmsPipeline*) Ptr;
+    cmsStage* mpe;
+    _cmsStageToneCurvesData* PreMPE = NULL, *PostMPE = NULL;
+    _cmsStageMatrixData* MatMPE = NULL;
+    _cmsStageCLutData* clut = NULL;
+    cmsUInt32Number i, InputChannels, OutputChannels, clutPoints;
+
+    // Disassemble the LUT into components.
+    mpe = NewLUT -> Elements;
+    if (mpe != NULL && mpe ->Type == cmsSigMatrixElemType) {
+
+        MatMPE = (_cmsStageMatrixData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PreMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCLutElemType) {
+        clut  = (_cmsStageCLutData*) mpe -> Data;
+        mpe = mpe ->Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PostMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    // That should be all
+    if (mpe != NULL) {
+        cmsSignalError(mpe->ContextID, cmsERROR_UNKNOWN_EXTENSION, "LUT is not suitable to be saved as LUT16");
+        return FALSE;
+    }
+
+    InputChannels  = cmsPipelineInputChannels(NewLUT);
+    OutputChannels = cmsPipelineOutputChannels(NewLUT);
+
+    if (clut == NULL)
+        clutPoints = 0;
+    else
+        clutPoints    = clut->Params->nSamples[0];
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) InputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) OutputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) clutPoints)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE; // Padding
+
+
+    if (MatMPE != NULL) {
+
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[0])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[1])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[2])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[3])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[4])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[5])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[6])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[7])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[8])) return FALSE;
+    }
+    else {
+
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+    }
+
+
+    if (PreMPE != NULL) {
+        if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) PreMPE ->TheCurves[0]->nEntries)) return FALSE;
+    } else {
+            if (!_cmsWriteUInt16Number(io, 2)) return FALSE;
+    }
+
+    if (PostMPE != NULL) {
+        if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) PostMPE ->TheCurves[0]->nEntries)) return FALSE;
+    } else {
+        if (!_cmsWriteUInt16Number(io, 2)) return FALSE;
+
+    }
+
+    // The prelinearization table
+
+    if (PreMPE != NULL) {
+        if (!Write16bitTables(self ->ContextID, io, PreMPE)) return FALSE;
+    }
+    else {
+        for (i=0; i < InputChannels; i++) {
+
+            if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+            if (!_cmsWriteUInt16Number(io, 0xffff)) return FALSE;
+        }
+    }
+
+    nTabSize = uipow(OutputChannels, clutPoints, InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) return FALSE;
+    if (nTabSize > 0) {
+        // The 3D CLUT.
+        if (clut != NULL) {
+            if (!_cmsWriteUInt16Array(io, nTabSize, clut->Tab.T)) return FALSE;
+        }
+    }
+
+    // The postlinearization table
+    if (PostMPE != NULL) {
+        if (!Write16bitTables(self ->ContextID, io, PostMPE)) return FALSE;
+    }
+    else {
+        for (i=0; i < OutputChannels; i++) {
+
+            if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+            if (!_cmsWriteUInt16Number(io, 0xffff)) return FALSE;
+        }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_LUT16_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUT16_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigLutAToBType
+// ********************************************************************************
+
+
+// V4 stuff. Read matrix for LutAtoB and LutBtoA
+
+static
+cmsStage* ReadMatrix(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number Offset)
+{
+    cmsFloat64Number dMat[3*3];
+    cmsFloat64Number dOff[3];
+    cmsStage* Mat;
+
+    // Go to address
+    if (!io -> Seek(io, Offset)) return NULL;
+
+    // Read the Matrix
+    if (!_cmsRead15Fixed16Number(io, &dMat[0])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[1])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[2])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[3])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[4])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[5])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[6])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[7])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[8])) return NULL;
+
+    if (!_cmsRead15Fixed16Number(io, &dOff[0])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dOff[1])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dOff[2])) return NULL;
+
+    Mat = cmsStageAllocMatrix(self ->ContextID, 3, 3, dMat, dOff);
+
+     return Mat;
+}
+
+
+
+
+//  V4 stuff. Read CLUT part for LutAtoB and LutBtoA
+
+static
+cmsStage* ReadCLUT(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, 
+                   cmsUInt32Number Offset, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels)
+{
+    cmsUInt8Number  gridPoints8[cmsMAXCHANNELS]; // Number of grid points in each dimension.
+    cmsUInt32Number GridPoints[cmsMAXCHANNELS], i;
+    cmsUInt8Number  Precision;
+    cmsStage* CLUT;
+    _cmsStageCLutData* Data;
+
+    if (!io -> Seek(io, Offset)) return NULL;
+    if (io -> Read(io, gridPoints8, cmsMAXCHANNELS, 1) != 1) return NULL;
+
+
+    for (i=0; i < cmsMAXCHANNELS; i++) {
+
+        if (gridPoints8[i] == 1) return NULL; // Impossible value, 0 for no CLUT and then 2 at least
+        GridPoints[i] = gridPoints8[i];
+    }
+
+    if (!_cmsReadUInt8Number(io, &Precision)) return NULL;
+
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+
+    CLUT = cmsStageAllocCLut16bitGranular(self ->ContextID, GridPoints, InputChannels, OutputChannels, NULL);
+    if (CLUT == NULL) return NULL;
+
+    Data = (_cmsStageCLutData*) CLUT ->Data;
+
+    // Precision can be 1 or 2 bytes
+    if (Precision == 1) {
+
+        cmsUInt8Number  v;
+
+        for (i=0; i < Data ->nEntries; i++) {
+
+            if (io ->Read(io, &v, sizeof(cmsUInt8Number), 1) != 1) {
+                cmsStageFree(CLUT);
+                return NULL;
+            }
+            Data ->Tab.T[i] = FROM_8_TO_16(v);
+        }
+
+    }
+    else
+        if (Precision == 2) {
+
+            if (!_cmsReadUInt16Array(io, Data->nEntries, Data ->Tab.T)) {
+                cmsStageFree(CLUT);
+                return NULL;
+            }
+        }
+        else {
+            cmsStageFree(CLUT);
+            cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown precision of '%d'", Precision);
+            return NULL;
+        }
+
+    return CLUT;
+}
+
+static
+cmsToneCurve* ReadEmbeddedCurve(struct _cms_typehandler_struct* self, cmsIOHANDLER* io)
+{
+    cmsTagTypeSignature  BaseType;
+    cmsUInt32Number nItems;
+
+    BaseType = _cmsReadTypeBase(io);
+    switch (BaseType) {
+
+            case cmsSigCurveType:
+                return (cmsToneCurve*) Type_Curve_Read(self, io, &nItems, 0);
+
+            case cmsSigParametricCurveType:
+                return (cmsToneCurve*) Type_ParametricCurve_Read(self, io, &nItems, 0);
+
+            default:
+                {
+                    char String[5];
+
+                    _cmsTagSignature2String(String, (cmsTagSignature) BaseType);
+                    cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown curve type '%s'", String);
+                }
+                return NULL;
+    }
+}
+
+
+// Read a set of curves from specific offset
+static
+cmsStage* ReadSetOfCurves(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number Offset, cmsUInt32Number nCurves)
+{
+    cmsToneCurve* Curves[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+    cmsStage* Lin = NULL;
+
+    if (nCurves > cmsMAXCHANNELS) return FALSE;
+
+    if (!io -> Seek(io, Offset)) return FALSE;
+
+    for (i=0; i < nCurves; i++)
+        Curves[i] = NULL;
+
+    for (i=0; i < nCurves; i++) {
+
+        Curves[i] = ReadEmbeddedCurve(self, io);
+        if (Curves[i] == NULL) goto Error;
+        if (!_cmsReadAlignment(io)) goto Error;
+
+    }
+
+    Lin = cmsStageAllocToneCurves(self ->ContextID, nCurves, Curves);
+
+Error:
+    for (i=0; i < nCurves; i++)
+        cmsFreeToneCurve(Curves[i]);
+
+    return Lin;
+}
+
+
+// LutAtoB type
+
+// This structure represents a colour transform. The type contains up to five processing
+// elements which are stored in the AtoBTag tag in the following order: a set of one
+// dimensional curves, a 3 by 3 matrix with offset terms, a set of one dimensional curves,
+// a multidimensional lookup table, and a set of one dimensional output curves.
+// Data are processed using these elements via the following sequence:
+//
+//("A" curves) -> (multidimensional lookup table - CLUT) -> ("M" curves) -> (matrix) -> ("B" curves).
+//
+/*
+It is possible to use any or all of these processing elements. At least one processing element
+must be included.Only the following combinations are allowed:
+
+B
+M - Matrix - B
+A - CLUT - B
+A - CLUT - M - Matrix - B
+
+*/
+
+static
+void* Type_LUTA2B_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number      BaseOffset;
+    cmsUInt8Number       inputChan;      // Number of input channels
+    cmsUInt8Number       outputChan;     // Number of output channels
+    cmsUInt32Number      offsetB;        // Offset to first "B" curve
+    cmsUInt32Number      offsetMat;      // Offset to matrix
+    cmsUInt32Number      offsetM;        // Offset to first "M" curve
+    cmsUInt32Number      offsetC;        // Offset to CLUT
+    cmsUInt32Number      offsetA;        // Offset to first "A" curve
+    cmsPipeline* NewLUT = NULL;
+
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!_cmsReadUInt8Number(io, &inputChan)) return NULL;
+    if (!_cmsReadUInt8Number(io, &outputChan)) return NULL;
+
+    if (!_cmsReadUInt16Number(io, NULL)) return NULL;
+
+    if (!_cmsReadUInt32Number(io, &offsetB)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetMat)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetM)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetC)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetA)) return NULL;
+
+    if (inputChan == 0 || inputChan >= cmsMAXCHANNELS) return NULL;
+    if (outputChan == 0 || outputChan >= cmsMAXCHANNELS) return NULL;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, inputChan, outputChan);
+    if (NewLUT == NULL) return NULL;
+
+    if (offsetA!= 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetA, inputChan)))
+            goto Error;
+    }
+
+    if (offsetC != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadCLUT(self, io, BaseOffset + offsetC, inputChan, outputChan)))
+            goto Error;
+    }
+
+    if (offsetM != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetM, outputChan)))
+            goto Error;
+    }
+
+    if (offsetMat != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadMatrix(self, io, BaseOffset + offsetMat)))
+            goto Error;
+    }
+
+    if (offsetB != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetB, outputChan)))
+            goto Error;
+    }
+
+    *nItems = 1;
+    return NewLUT;
+Error:
+    cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// Write a set of curves
+static
+cmsBool  WriteMatrix(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsStage* mpe)
+{
+    _cmsStageMatrixData* m = (_cmsStageMatrixData*) mpe -> Data;
+
+    // Write the Matrix
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[0])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[1])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[2])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[3])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[4])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[5])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[6])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[7])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[8])) return FALSE;
+
+    if (m ->Offset != NULL) {
+
+    if (!_cmsWrite15Fixed16Number(io, m -> Offset[0])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Offset[1])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Offset[2])) return FALSE;
+    }
+    else {
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+
+    }
+
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// Write a set of curves
+static
+cmsBool WriteSetOfCurves(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsTagTypeSignature Type, cmsStage* mpe)
+{
+    cmsUInt32Number i, n;
+    cmsTagTypeSignature CurrentType;
+    cmsToneCurve** Curves;
+
+
+    n      = cmsStageOutputChannels(mpe);
+    Curves = _cmsStageGetPtrToCurveSet(mpe);
+
+    for (i=0; i < n; i++) {
+
+        // If this is a table-based curve, use curve type even on V4
+        CurrentType = Type;
+
+        if ((Curves[i] ->nSegments == 0)||
+            ((Curves[i]->nSegments == 2) && (Curves[i] ->Segments[1].Type == 0)) )
+            CurrentType = cmsSigCurveType;
+        else
+        if (Curves[i] ->Segments[0].Type < 0)
+            CurrentType = cmsSigCurveType;
+
+        if (!_cmsWriteTypeBase(io, CurrentType)) return FALSE;
+
+        switch (CurrentType) {
+
+            case cmsSigCurveType:
+                if (!Type_Curve_Write(self, io, Curves[i], 1)) return FALSE;
+                break;
+
+            case cmsSigParametricCurveType:
+                if (!Type_ParametricCurve_Write(self, io, Curves[i], 1)) return FALSE;
+                break;
+
+            default:
+                {
+                    char String[5];
+
+                    _cmsTagSignature2String(String, (cmsTagSignature) Type);
+                    cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown curve type '%s'", String);
+                }
+                return FALSE;
+        }
+
+        if (!_cmsWriteAlignment(io)) return FALSE;
+    }
+
+
+    return TRUE;
+}
+
+
+static
+cmsBool WriteCLUT(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt8Number  Precision, cmsStage* mpe)
+{
+    cmsUInt8Number  gridPoints[cmsMAXCHANNELS]; // Number of grid points in each dimension.
+    cmsUInt32Number i;
+    _cmsStageCLutData* CLUT = ( _cmsStageCLutData*) mpe -> Data;
+
+    if (CLUT ->HasFloatValues) {
+         cmsSignalError(self ->ContextID, cmsERROR_NOT_SUITABLE, "Cannot save floating point data, CLUT are 8 or 16 bit only");
+         return FALSE;
+    }
+
+    memset(gridPoints, 0, sizeof(gridPoints));
+    for (i=0; i < (cmsUInt32Number) CLUT ->Params ->nInputs; i++)
+        gridPoints[i] = (cmsUInt8Number) CLUT ->Params ->nSamples[i];
+
+    if (!io -> Write(io, cmsMAXCHANNELS*sizeof(cmsUInt8Number), gridPoints)) return FALSE;
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) Precision)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE;
+
+    // Precision can be 1 or 2 bytes
+    if (Precision == 1) {
+
+        for (i=0; i < CLUT->nEntries; i++) {
+
+            if (!_cmsWriteUInt8Number(io, FROM_16_TO_8(CLUT->Tab.T[i]))) return FALSE;
+        }
+    }
+    else
+        if (Precision == 2) {
+
+            if (!_cmsWriteUInt16Array(io, CLUT->nEntries, CLUT ->Tab.T)) return FALSE;
+        }
+        else {
+             cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown precision of '%d'", Precision);
+            return FALSE;
+        }
+
+    if (!_cmsWriteAlignment(io)) return FALSE;
+
+    return TRUE;
+}
+
+
+
+
+static
+cmsBool Type_LUTA2B_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Ptr;
+    cmsUInt32Number inputChan, outputChan;
+    cmsStage *A = NULL, *B = NULL, *M = NULL;
+    cmsStage * Matrix = NULL;
+    cmsStage * CLUT = NULL;
+    cmsUInt32Number offsetB = 0, offsetMat = 0, offsetM = 0, offsetC = 0, offsetA = 0;
+    cmsUInt32Number BaseOffset, DirectoryPos, CurrentPos;
+
+    // Get the base for all offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (Lut ->Elements != NULL)
+        if (!cmsPipelineCheckAndRetreiveStages(Lut, 1, cmsSigCurveSetElemType, &B))
+            if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, &M, &Matrix, &B))
+                if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &A, &CLUT, &B))
+                    if (!cmsPipelineCheckAndRetreiveStages(Lut, 5, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType,
+                        cmsSigMatrixElemType, cmsSigCurveSetElemType, &A, &CLUT, &M, &Matrix, &B)) {
+
+                            cmsSignalError(self->ContextID, cmsERROR_NOT_SUITABLE, "LUT is not suitable to be saved as LutAToB");
+                            return FALSE;
+                    }
+
+    // Get input, output channels
+    inputChan  = cmsPipelineInputChannels(Lut);
+    outputChan = cmsPipelineOutputChannels(Lut);
+
+    // Write channel count
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) inputChan)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) outputChan)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+
+    // Keep directory to be filled latter
+    DirectoryPos = io ->Tell(io);
+
+    // Write the directory
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+
+    if (A != NULL) {
+
+        offsetA = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, A)) return FALSE;
+    }
+
+    if (CLUT != NULL) {
+        offsetC = io ->Tell(io) - BaseOffset;
+        if (!WriteCLUT(self, io, (Lut ->SaveAs8Bits ? 1U : 2U), CLUT)) return FALSE;
+
+    }
+    if (M != NULL) {
+
+        offsetM = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, M)) return FALSE;
+    }
+
+    if (Matrix != NULL) {
+        offsetMat = io ->Tell(io) - BaseOffset;
+        if (!WriteMatrix(self, io, Matrix)) return FALSE;
+    }
+
+    if (B != NULL) {
+
+        offsetB = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, B)) return FALSE;
+    }
+
+    CurrentPos = io ->Tell(io);
+
+    if (!io ->Seek(io, DirectoryPos)) return FALSE;
+
+    if (!_cmsWriteUInt32Number(io, offsetB)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetMat)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetM)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetC)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetA)) return FALSE;
+
+    if (!io ->Seek(io, CurrentPos)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_LUTA2B_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUTA2B_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// LutBToA type
+
+static
+void* Type_LUTB2A_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number       inputChan;      // Number of input channels
+    cmsUInt8Number       outputChan;     // Number of output channels
+    cmsUInt32Number      BaseOffset;     // Actual position in file
+    cmsUInt32Number      offsetB;        // Offset to first "B" curve
+    cmsUInt32Number      offsetMat;      // Offset to matrix
+    cmsUInt32Number      offsetM;        // Offset to first "M" curve
+    cmsUInt32Number      offsetC;        // Offset to CLUT
+    cmsUInt32Number      offsetA;        // Offset to first "A" curve
+    cmsPipeline* NewLUT = NULL;
+
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!_cmsReadUInt8Number(io, &inputChan)) return NULL;
+    if (!_cmsReadUInt8Number(io, &outputChan)) return NULL;
+
+    if (inputChan == 0 || inputChan >= cmsMAXCHANNELS) return NULL;
+    if (outputChan == 0 || outputChan >= cmsMAXCHANNELS) return NULL;
+
+    // Padding
+    if (!_cmsReadUInt16Number(io, NULL)) return NULL;
+
+    if (!_cmsReadUInt32Number(io, &offsetB)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetMat)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetM)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetC)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetA)) return NULL;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, inputChan, outputChan);
+    if (NewLUT == NULL) return NULL;
+
+    if (offsetB != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetB, inputChan)))
+            goto Error;
+    }
+
+    if (offsetMat != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadMatrix(self, io, BaseOffset + offsetMat)))
+            goto Error;
+    }
+
+    if (offsetM != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetM, inputChan)))
+            goto Error;
+    }
+
+    if (offsetC != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadCLUT(self, io, BaseOffset + offsetC, inputChan, outputChan)))
+            goto Error;
+    }
+
+    if (offsetA!= 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetA, outputChan)))
+            goto Error;
+    }
+
+    *nItems = 1;
+    return NewLUT;
+Error:
+    cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+/*
+B
+B - Matrix - M
+B - CLUT - A
+B - Matrix - M - CLUT - A
+*/
+
+static
+cmsBool  Type_LUTB2A_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Ptr;
+    cmsUInt32Number inputChan, outputChan;
+    cmsStage *A = NULL, *B = NULL, *M = NULL;
+    cmsStage *Matrix = NULL;
+    cmsStage *CLUT = NULL;
+    cmsUInt32Number offsetB = 0, offsetMat = 0, offsetM = 0, offsetC = 0, offsetA = 0;
+    cmsUInt32Number BaseOffset, DirectoryPos, CurrentPos;
+
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!cmsPipelineCheckAndRetreiveStages(Lut, 1, cmsSigCurveSetElemType, &B))
+        if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, &B, &Matrix, &M))
+            if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &B, &CLUT, &A))
+                if (!cmsPipelineCheckAndRetreiveStages(Lut, 5, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+                    cmsSigCLutElemType, cmsSigCurveSetElemType, &B, &Matrix, &M, &CLUT, &A)) {
+                        cmsSignalError(self->ContextID, cmsERROR_NOT_SUITABLE, "LUT is not suitable to be saved as LutBToA");
+                        return FALSE;
+                }
+
+    inputChan  = cmsPipelineInputChannels(Lut);
+    outputChan = cmsPipelineOutputChannels(Lut);
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) inputChan)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) outputChan)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+
+    DirectoryPos = io ->Tell(io);
+
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+
+    if (A != NULL) {
+
+        offsetA = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, A)) return FALSE;
+    }
+
+    if (CLUT != NULL) {
+        offsetC = io ->Tell(io) - BaseOffset;
+        if (!WriteCLUT(self, io, (Lut ->SaveAs8Bits ? 1U : 2U), CLUT)) return FALSE;
+
+    }
+    if (M != NULL) {
+
+        offsetM = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, M)) return FALSE;
+    }
+
+    if (Matrix != NULL) {
+        offsetMat = io ->Tell(io) - BaseOffset;
+        if (!WriteMatrix(self, io, Matrix)) return FALSE;
+    }
+
+    if (B != NULL) {
+
+        offsetB = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, B)) return FALSE;
+    }
+
+    CurrentPos = io ->Tell(io);
+
+    if (!io ->Seek(io, DirectoryPos)) return FALSE;
+
+    if (!_cmsWriteUInt32Number(io, offsetB)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetMat)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetM)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetC)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetA)) return FALSE;
+
+    if (!io ->Seek(io, CurrentPos)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+
+static
+void* Type_LUTB2A_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUTB2A_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+
+// ********************************************************************************
+// Type cmsSigColorantTableType
+// ********************************************************************************
+/*
+The purpose of this tag is to identify the colorants used in the profile by a
+unique name and set of XYZ or L*a*b* values to give the colorant an unambiguous
+value. The first colorant listed is the colorant of the first device channel of
+a lut tag. The second colorant listed is the colorant of the second device channel
+of a lut tag, and so on.
+*/
+
+static
+void *Type_ColorantTable_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number i, Count;
+    cmsNAMEDCOLORLIST* List;
+    char Name[34];
+    cmsUInt16Number PCS[3];
+
+
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+
+    if (Count > cmsMAXCHANNELS) {
+        cmsSignalError(self->ContextID, cmsERROR_RANGE, "Too many colorants '%d'", Count);
+        return NULL;
+    }
+
+    List = cmsAllocNamedColorList(self ->ContextID, Count, 0, "", "");
+    for (i=0; i < Count; i++) {
+
+        if (io ->Read(io, Name, 32, 1) != 1) goto Error;
+        Name[32] = 0;
+
+        if (!_cmsReadUInt16Array(io, 3, PCS)) goto Error;
+
+        if (!cmsAppendNamedColor(List, Name, PCS, NULL)) goto Error;
+
+    }
+
+    *nItems = 1;
+    return List;
+
+Error:
+    *nItems = 0;
+    cmsFreeNamedColorList(List);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+
+// Saves a colorant table. It is using the named color structure for simplicity sake
+static
+cmsBool  Type_ColorantTable_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) Ptr;
+    cmsUInt32Number i, nColors;
+
+    nColors = cmsNamedColorCount(NamedColorList);
+
+    if (!_cmsWriteUInt32Number(io, nColors)) return FALSE;
+
+    for (i=0; i < nColors; i++) {
+
+        char root[cmsMAX_PATH];
+        cmsUInt16Number PCS[3];
+
+        memset(root, 0, sizeof(root));
+
+        if (!cmsNamedColorInfo(NamedColorList, i, root, NULL, NULL, PCS, NULL)) return 0;
+        root[32] = 0;
+
+        if (!io ->Write(io, 32, root)) return FALSE;
+        if (!_cmsWriteUInt16Array(io, 3, PCS)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_ColorantTable_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*) Ptr;
+    return (void*) cmsDupNamedColorList(nc);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_ColorantTable_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeNamedColorList((cmsNAMEDCOLORLIST*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigNamedColor2Type
+// ********************************************************************************
+//
+//The namedColor2Type is a count value and array of structures that provide color
+//coordinates for 7-bit ASCII color names. For each named color, a PCS and optional
+//device representation of the color are given. Both representations are 16-bit values.
+//The device representation corresponds to the header's 'color space of data' field.
+//This representation should be consistent with the 'number of device components'
+//field in the namedColor2Type. If this field is 0, device coordinates are not provided.
+//The PCS representation corresponds to the header's PCS field. The PCS representation
+//is always provided. Color names are fixed-length, 32-byte fields including null
+//termination. In order to maintain maximum portability, it is strongly recommended
+//that special characters of the 7-bit ASCII set not be used.
+
+static
+void *Type_NamedColor_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+
+    cmsUInt32Number      vendorFlag;     // Bottom 16 bits for ICC use
+    cmsUInt32Number      count;          // Count of named colors
+    cmsUInt32Number      nDeviceCoords;  // Num of device coordinates
+    char                 prefix[32];     // Prefix for each color name
+    char                 suffix[32];     // Suffix for each color name
+    cmsNAMEDCOLORLIST*   v;
+    cmsUInt32Number      i;
+
+
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &vendorFlag)) return NULL;
+    if (!_cmsReadUInt32Number(io, &count)) return NULL;
+    if (!_cmsReadUInt32Number(io, &nDeviceCoords)) return NULL;
+
+    if (io -> Read(io, prefix, 32, 1) != 1) return NULL;
+    if (io -> Read(io, suffix, 32, 1) != 1) return NULL;
+
+    prefix[31] = suffix[31] = 0;
+
+    v = cmsAllocNamedColorList(self ->ContextID, count, nDeviceCoords, prefix, suffix);
+    if (v == NULL) {
+        cmsSignalError(self->ContextID, cmsERROR_RANGE, "Too many named colors '%d'", count);
+        return NULL;
+    }
+
+    if (nDeviceCoords > cmsMAXCHANNELS) {
+        cmsSignalError(self->ContextID, cmsERROR_RANGE, "Too many device coordinates '%d'", nDeviceCoords);
+        goto Error;
+    }
+    for (i=0; i < count; i++) {
+
+        cmsUInt16Number PCS[3];
+        cmsUInt16Number Colorant[cmsMAXCHANNELS];
+        char Root[33];
+
+        memset(Colorant, 0, sizeof(Colorant));
+        if (io -> Read(io, Root, 32, 1) != 1) goto Error;
+        Root[32] = 0;  // To prevent exploits
+
+        if (!_cmsReadUInt16Array(io, 3, PCS)) goto Error;
+        if (!_cmsReadUInt16Array(io, nDeviceCoords, Colorant)) goto Error;
+
+        if (!cmsAppendNamedColor(v, Root, PCS, Colorant)) goto Error;
+    }
+
+    *nItems = 1;
+    return (void*) v ;
+
+Error:
+    cmsFreeNamedColorList(v);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+// Saves a named color list into a named color profile
+static
+cmsBool Type_NamedColor_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) Ptr;
+    char                prefix[33];     // Prefix for each color name
+    char                suffix[33];     // Suffix for each color name
+    cmsUInt32Number     i, nColors;
+
+    nColors = cmsNamedColorCount(NamedColorList);
+
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, nColors)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, NamedColorList ->ColorantCount)) return FALSE;
+
+    strncpy(prefix, (const char*) NamedColorList->Prefix, 32);
+    strncpy(suffix, (const char*) NamedColorList->Suffix, 32);
+
+    suffix[32] = prefix[32] = 0;
+
+    if (!io ->Write(io, 32, prefix)) return FALSE;
+    if (!io ->Write(io, 32, suffix)) return FALSE;
+
+    for (i=0; i < nColors; i++) {
+
+       cmsUInt16Number PCS[3];
+       cmsUInt16Number Colorant[cmsMAXCHANNELS];
+       char Root[cmsMAX_PATH];
+
+        if (!cmsNamedColorInfo(NamedColorList, i, Root, NULL, NULL, PCS, Colorant)) return 0;
+        Root[32] = 0;
+        if (!io ->Write(io, 32 , Root)) return FALSE;
+        if (!_cmsWriteUInt16Array(io, 3, PCS)) return FALSE;
+        if (!_cmsWriteUInt16Array(io, NamedColorList ->ColorantCount, Colorant)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_NamedColor_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*) Ptr;
+
+    return (void*) cmsDupNamedColorList(nc);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_NamedColor_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeNamedColorList((cmsNAMEDCOLORLIST*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigProfileSequenceDescType
+// ********************************************************************************
+
+// This type is an array of structures, each of which contains information from the
+// header fields and tags from the original profiles which were combined to create
+// the final profile. The order of the structures is the order in which the profiles
+// were combined and includes a structure for the final profile. This provides a
+// description of the profile sequence from source to destination,
+// typically used with the DeviceLink profile.
+
+static
+cmsBool ReadEmbeddedText(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU** mlu, cmsUInt32Number SizeOfTag)
+{
+    cmsTagTypeSignature  BaseType;
+    cmsUInt32Number nItems;
+
+    BaseType = _cmsReadTypeBase(io);
+
+    switch (BaseType) {
+
+       case cmsSigTextType:
+           if (*mlu) cmsMLUfree(*mlu);
+           *mlu = (cmsMLU*)Type_Text_Read(self, io, &nItems, SizeOfTag);
+           return (*mlu != NULL);
+
+       case cmsSigTextDescriptionType:
+           if (*mlu) cmsMLUfree(*mlu);
+           *mlu =  (cmsMLU*) Type_Text_Description_Read(self, io, &nItems, SizeOfTag);
+           return (*mlu != NULL);
+
+           /*
+           TBD: Size is needed for MLU, and we have no idea on which is the available size
+           */
+
+       case cmsSigMultiLocalizedUnicodeType:
+           if (*mlu) cmsMLUfree(*mlu);
+           *mlu =  (cmsMLU*) Type_MLU_Read(self, io, &nItems, SizeOfTag);
+           return (*mlu != NULL);
+
+       default: return FALSE;
+    }
+}
+
+
+static
+void *Type_ProfileSequenceDesc_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* OutSeq;
+    cmsUInt32Number i, Count;
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+
+    OutSeq = cmsAllocProfileSequenceDescription(self ->ContextID, Count);
+    if (OutSeq == NULL) return NULL;
+
+    OutSeq ->n = Count;
+
+    // Get structures as well
+
+    for (i=0; i < Count; i++) {
+
+        cmsPSEQDESC* sec = &OutSeq -> seq[i];
+
+        if (!_cmsReadUInt32Number(io, &sec ->deviceMfg)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt32Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt32Number);
+
+        if (!_cmsReadUInt32Number(io, &sec ->deviceModel)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt32Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt32Number);
+
+        if (!_cmsReadUInt64Number(io, &sec ->attributes)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt64Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt64Number);
+
+        if (!_cmsReadUInt32Number(io, (cmsUInt32Number *)&sec ->technology)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt32Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt32Number);
+
+        if (!ReadEmbeddedText(self, io, &sec ->Manufacturer, SizeOfTag)) goto Error;
+        if (!ReadEmbeddedText(self, io, &sec ->Model, SizeOfTag)) goto Error;
+    }
+
+    *nItems = 1;
+    return OutSeq;
+
+Error:
+    cmsFreeProfileSequenceDescription(OutSeq);
+    return NULL;
+}
+
+
+// Aux--Embed a text description type. It can be of type text description or multilocalized unicode
+// and it depends of the version number passed on cmsTagDescriptor structure instead of stack
+static
+cmsBool  SaveDescription(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU* Text)
+{
+    if (self ->ICCVersion < 0x4000000) {
+
+        if (!_cmsWriteTypeBase(io, cmsSigTextDescriptionType)) return FALSE;
+        return Type_Text_Description_Write(self, io, Text, 1);
+    }
+    else {
+        if (!_cmsWriteTypeBase(io, cmsSigMultiLocalizedUnicodeType)) return FALSE;
+        return Type_MLU_Write(self, io, Text, 1);
+    }
+}
+
+
+static
+cmsBool  Type_ProfileSequenceDesc_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsSEQ* Seq = (cmsSEQ*) Ptr;
+    cmsUInt32Number i;
+
+    if (!_cmsWriteUInt32Number(io, Seq->n)) return FALSE;
+
+    for (i=0; i < Seq ->n; i++) {
+
+        cmsPSEQDESC* sec = &Seq -> seq[i];
+
+        if (!_cmsWriteUInt32Number(io, sec ->deviceMfg)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, sec ->deviceModel)) return FALSE;
+        if (!_cmsWriteUInt64Number(io, &sec ->attributes)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, sec ->technology)) return FALSE;
+
+        if (!SaveDescription(self, io, sec ->Manufacturer)) return FALSE;
+        if (!SaveDescription(self, io, sec ->Model)) return FALSE;
+    }
+
+     return TRUE;
+
+     cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_ProfileSequenceDesc_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupProfileSequenceDescription((cmsSEQ*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_ProfileSequenceDesc_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeProfileSequenceDescription((cmsSEQ*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigProfileSequenceIdType
+// ********************************************************************************
+/*
+In certain workflows using ICC Device Link Profiles, it is necessary to identify the
+original profiles that were combined to create the Device Link Profile.
+This type is an array of structures, each of which contains information for
+identification of a profile used in a sequence
+*/
+
+
+static
+cmsBool ReadSeqID(struct _cms_typehandler_struct* self,
+                                             cmsIOHANDLER* io,
+                                             void* Cargo,
+                                             cmsUInt32Number n,
+                                             cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* OutSeq = (cmsSEQ*) Cargo;
+    cmsPSEQDESC* seq = &OutSeq ->seq[n];
+
+    if (io -> Read(io, seq ->ProfileID.ID8, 16, 1) != 1) return FALSE;
+    if (!ReadEmbeddedText(self, io, &seq ->Description, SizeOfTag)) return FALSE;
+
+    return TRUE;
+}
+
+
+
+static
+void *Type_ProfileSequenceId_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* OutSeq;
+    cmsUInt32Number Count;
+    cmsUInt32Number BaseOffset;
+
+    *nItems = 0;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Get table count
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Allocate an empty structure
+    OutSeq = cmsAllocProfileSequenceDescription(self ->ContextID, Count);
+    if (OutSeq == NULL) return NULL;
+
+
+    // Read the position table
+    if (!ReadPositionTable(self, io, Count, BaseOffset, OutSeq, ReadSeqID)) {
+
+        cmsFreeProfileSequenceDescription(OutSeq);
+        return NULL;
+    }
+
+    // Success
+    *nItems = 1;
+    return OutSeq;
+
+}
+
+
+static
+cmsBool WriteSeqID(struct _cms_typehandler_struct* self,
+                                             cmsIOHANDLER* io,
+                                             void* Cargo,
+                                             cmsUInt32Number n,
+                                             cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* Seq = (cmsSEQ*) Cargo;
+
+    if (!io ->Write(io, 16, Seq ->seq[n].ProfileID.ID8)) return FALSE;
+
+    // Store here the MLU
+    if (!SaveDescription(self, io, Seq ->seq[n].Description)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_ProfileSequenceId_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsSEQ* Seq = (cmsSEQ*) Ptr;
+    cmsUInt32Number BaseOffset;
+
+    // Keep the base offset
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // This is the table count
+    if (!_cmsWriteUInt32Number(io, Seq ->n)) return FALSE;
+
+    // This is the position table and content
+    if (!WritePositionTable(self, io, 0, Seq ->n, BaseOffset, Seq, WriteSeqID)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_ProfileSequenceId_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupProfileSequenceDescription((cmsSEQ*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_ProfileSequenceId_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeProfileSequenceDescription((cmsSEQ*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigUcrBgType
+// ********************************************************************************
+/*
+This type contains curves representing the under color removal and black
+generation and a text string which is a general description of the method used
+for the ucr/bg.
+*/
+
+static
+void *Type_UcrBg_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUcrBg* n = (cmsUcrBg*) _cmsMallocZero(self ->ContextID, sizeof(cmsUcrBg));
+    cmsUInt32Number CountUcr, CountBg;
+    char* ASCIIString;
+
+    *nItems = 0;
+    if (n == NULL) return NULL;
+
+    // First curve is Under color removal
+    if (!_cmsReadUInt32Number(io, &CountUcr)) return NULL;
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    n ->Ucr = cmsBuildTabulatedToneCurve16(self ->ContextID, CountUcr, NULL);
+    if (n ->Ucr == NULL) return NULL;
+
+    if (!_cmsReadUInt16Array(io, CountUcr, n ->Ucr->Table16)) return NULL;
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= CountUcr * sizeof(cmsUInt16Number);
+
+    // Second curve is Black generation
+    if (!_cmsReadUInt32Number(io, &CountBg)) return NULL;
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    n ->Bg = cmsBuildTabulatedToneCurve16(self ->ContextID, CountBg, NULL);
+    if (n ->Bg == NULL) return NULL;
+    if (!_cmsReadUInt16Array(io, CountBg, n ->Bg->Table16)) return NULL;
+    if (SizeOfTag < CountBg * sizeof(cmsUInt16Number)) return NULL;
+    SizeOfTag -= CountBg * sizeof(cmsUInt16Number);
+    if (SizeOfTag == UINT_MAX) return NULL;
+
+    // Now comes the text. The length is specified by the tag size
+    n ->Desc = cmsMLUalloc(self ->ContextID, 1);
+    if (n ->Desc == NULL) return NULL;
+
+    ASCIIString = (char*) _cmsMalloc(self ->ContextID, SizeOfTag + 1);
+    if (io ->Read(io, ASCIIString, sizeof(char), SizeOfTag) != SizeOfTag) return NULL;
+    ASCIIString[SizeOfTag] = 0;
+    cmsMLUsetASCII(n ->Desc, cmsNoLanguage, cmsNoCountry, ASCIIString);
+    _cmsFree(self ->ContextID, ASCIIString);
+
+    *nItems = 1;
+    return (void*) n;
+}
+
+static
+cmsBool  Type_UcrBg_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUcrBg* Value = (cmsUcrBg*) Ptr;
+    cmsUInt32Number TextSize;
+    char* Text;
+
+    // First curve is Under color removal
+    if (!_cmsWriteUInt32Number(io, Value ->Ucr ->nEntries)) return FALSE;
+    if (!_cmsWriteUInt16Array(io, Value ->Ucr ->nEntries, Value ->Ucr ->Table16)) return FALSE;
+
+    // Then black generation
+    if (!_cmsWriteUInt32Number(io, Value ->Bg ->nEntries)) return FALSE;
+    if (!_cmsWriteUInt16Array(io, Value ->Bg ->nEntries, Value ->Bg ->Table16)) return FALSE;
+
+    // Now comes the text. The length is specified by the tag size
+    TextSize = cmsMLUgetASCII(Value ->Desc, cmsNoLanguage, cmsNoCountry, NULL, 0);
+    Text     = (char*) _cmsMalloc(self ->ContextID, TextSize);
+    if (cmsMLUgetASCII(Value ->Desc, cmsNoLanguage, cmsNoCountry, Text, TextSize) != TextSize) return FALSE;
+
+    if (!io ->Write(io, TextSize, Text)) return FALSE;
+    _cmsFree(self ->ContextID, Text);
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_UcrBg_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    cmsUcrBg* Src = (cmsUcrBg*) Ptr;
+    cmsUcrBg* NewUcrBg = (cmsUcrBg*) _cmsMallocZero(self ->ContextID, sizeof(cmsUcrBg));
+
+    if (NewUcrBg == NULL) return NULL;
+
+    NewUcrBg ->Bg   = cmsDupToneCurve(Src ->Bg);
+    NewUcrBg ->Ucr  = cmsDupToneCurve(Src ->Ucr);
+    NewUcrBg ->Desc = cmsMLUdup(Src ->Desc);
+
+    return (void*) NewUcrBg;
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_UcrBg_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+   cmsUcrBg* Src = (cmsUcrBg*) Ptr;
+
+   if (Src ->Ucr) cmsFreeToneCurve(Src ->Ucr);
+   if (Src ->Bg)  cmsFreeToneCurve(Src ->Bg);
+   if (Src ->Desc) cmsMLUfree(Src ->Desc);
+
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigCrdInfoType
+// ********************************************************************************
+
+/*
+This type contains the PostScript product name to which this profile corresponds
+and the names of the companion CRDs. Recall that a single profile can generate
+multiple CRDs. It is implemented as a MLU being the language code "PS" and then
+country varies for each element:
+
+                nm: PostScript product name
+                #0: Rendering intent 0 CRD name
+                #1: Rendering intent 1 CRD name
+                #2: Rendering intent 2 CRD name
+                #3: Rendering intent 3 CRD name
+*/
+
+
+
+// Auxiliary, read an string specified as count + string
+static
+cmsBool  ReadCountAndSting(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU* mlu, cmsUInt32Number* SizeOfTag, const char* Section)
+{
+    cmsUInt32Number Count;
+    char* Text;
+
+    if (*SizeOfTag < sizeof(cmsUInt32Number)) return FALSE;
+
+    if (!_cmsReadUInt32Number(io, &Count)) return FALSE;
+
+    if (Count > UINT_MAX - sizeof(cmsUInt32Number)) return FALSE;
+    if (*SizeOfTag < Count + sizeof(cmsUInt32Number)) return FALSE;
+
+    Text     = (char*) _cmsMalloc(self ->ContextID, Count+1);
+    if (Text == NULL) return FALSE;
+
+    if (io ->Read(io, Text, sizeof(cmsUInt8Number), Count) != Count) {
+        _cmsFree(self ->ContextID, Text);
+        return FALSE;
+    }
+
+    Text[Count] = 0;
+
+    cmsMLUsetASCII(mlu, "PS", Section, Text);
+    _cmsFree(self ->ContextID, Text);
+
+    *SizeOfTag -= (Count + sizeof(cmsUInt32Number));
+    return TRUE;
+}
+
+static
+cmsBool  WriteCountAndSting(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU* mlu, const char* Section)
+{
+ cmsUInt32Number TextSize;
+ char* Text;
+
+    TextSize = cmsMLUgetASCII(mlu, "PS", Section, NULL, 0);
+    Text     = (char*) _cmsMalloc(self ->ContextID, TextSize);
+
+    if (!_cmsWriteUInt32Number(io, TextSize)) return FALSE;
+
+    if (cmsMLUgetASCII(mlu, "PS", Section, Text, TextSize) == 0) return FALSE;
+
+    if (!io ->Write(io, TextSize, Text)) return FALSE;
+    _cmsFree(self ->ContextID, Text);
+
+    return TRUE;
+}
+
+static
+void *Type_CrdInfo_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsMLU* mlu = cmsMLUalloc(self ->ContextID, 5);
+
+    *nItems = 0;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "nm")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#0")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#1")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#2")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#3")) goto Error;
+
+    *nItems = 1;
+    return (void*) mlu;
+
+Error:
+    cmsMLUfree(mlu);
+    return NULL;
+
+}
+
+static
+cmsBool  Type_CrdInfo_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+
+    if (!WriteCountAndSting(self, io, mlu, "nm")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#0")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#1")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#2")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#3")) goto Error;
+
+    return TRUE;
+
+Error:
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_CrdInfo_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_CrdInfo_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    cmsMLUfree((cmsMLU*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+// ********************************************************************************
+// Type cmsSigScreeningType
+// ********************************************************************************
+//
+//The screeningType describes various screening parameters including screen
+//frequency, screening angle, and spot shape.
+
+static
+void *Type_Screening_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsScreening* sc = NULL;
+    cmsUInt32Number i;
+
+    sc = (cmsScreening*) _cmsMallocZero(self ->ContextID, sizeof(cmsScreening));
+    if (sc == NULL) return NULL;
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt32Number(io, &sc ->Flag)) goto Error;
+    if (!_cmsReadUInt32Number(io, &sc ->nChannels)) goto Error;
+
+    if (sc ->nChannels > cmsMAXCHANNELS - 1)
+        sc ->nChannels = cmsMAXCHANNELS - 1;
+
+    for (i=0; i < sc ->nChannels; i++) {
+
+        if (!_cmsRead15Fixed16Number(io, &sc ->Channels[i].Frequency)) goto Error;
+        if (!_cmsRead15Fixed16Number(io, &sc ->Channels[i].ScreenAngle)) goto Error;
+        if (!_cmsReadUInt32Number(io, &sc ->Channels[i].SpotShape)) goto Error;
+    }
+
+
+    *nItems = 1;
+
+    return (void*) sc;
+
+Error:
+    if (sc != NULL)
+        _cmsFree(self ->ContextID, sc);
+
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool Type_Screening_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsScreening* sc = (cmsScreening* ) Ptr;
+    cmsUInt32Number i;
+
+    if (!_cmsWriteUInt32Number(io, sc ->Flag)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, sc ->nChannels)) return FALSE;
+
+    for (i=0; i < sc ->nChannels; i++) {
+
+        if (!_cmsWrite15Fixed16Number(io, sc ->Channels[i].Frequency)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, sc ->Channels[i].ScreenAngle)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, sc ->Channels[i].SpotShape)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_Screening_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+   return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsScreening));
+
+   cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_Screening_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigViewingConditionsType
+// ********************************************************************************
+//
+//This type represents a set of viewing condition parameters including:
+//CIE 'absolute' illuminant white point tristimulus values and CIE 'absolute'
+//surround tristimulus values.
+
+static
+void *Type_ViewingConditions_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsICCViewingConditions* vc = NULL;
+
+    vc = (cmsICCViewingConditions*) _cmsMallocZero(self ->ContextID, sizeof(cmsICCViewingConditions));
+    if (vc == NULL) return NULL;
+
+    *nItems = 0;
+
+    if (!_cmsReadXYZNumber(io, &vc ->IlluminantXYZ)) goto Error;
+    if (!_cmsReadXYZNumber(io, &vc ->SurroundXYZ)) goto Error;
+    if (!_cmsReadUInt32Number(io, &vc ->IlluminantType)) goto Error;
+
+    *nItems = 1;
+
+    return (void*) vc;
+
+Error:
+    if (vc != NULL)
+        _cmsFree(self ->ContextID, vc);
+
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool Type_ViewingConditions_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsICCViewingConditions* sc = (cmsICCViewingConditions* ) Ptr;
+
+    if (!_cmsWriteXYZNumber(io, &sc ->IlluminantXYZ)) return FALSE;
+    if (!_cmsWriteXYZNumber(io, &sc ->SurroundXYZ)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, sc ->IlluminantType)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_ViewingConditions_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+   return _cmsDupMem(self->ContextID, Ptr, sizeof(cmsICCViewingConditions));
+
+   cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_ViewingConditions_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigMultiProcessElementType
+// ********************************************************************************
+
+
+static
+void* GenericMPEdup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsStageDup((cmsStage*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void GenericMPEfree(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    cmsStageFree((cmsStage*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+// Each curve is stored in one or more curve segments, with break-points specified between curve segments.
+// The first curve segment always starts at -Infinity, and the last curve segment always ends at +Infinity. The
+// first and last curve segments shall be specified in terms of a formula, whereas the other segments shall be
+// specified either in terms of a formula, or by a sampled curve.
+
+
+// Read an embedded segmented curve
+static
+cmsToneCurve* ReadSegmentedCurve(struct _cms_typehandler_struct* self, cmsIOHANDLER* io)
+{
+    cmsCurveSegSignature ElementSig;
+    cmsUInt32Number i, j;
+    cmsUInt16Number nSegments;
+    cmsCurveSegment*  Segments;
+    cmsToneCurve* Curve;
+    cmsFloat32Number PrevBreak = MINUS_INF;    // - infinite
+
+    // Take signature and channels for each element.
+     if (!_cmsReadUInt32Number(io, (cmsUInt32Number*) &ElementSig)) return NULL;
+
+     // That should be a segmented curve
+     if (ElementSig != cmsSigSegmentedCurve) return NULL;
+
+     if (!_cmsReadUInt32Number(io, NULL)) return NULL;
+     if (!_cmsReadUInt16Number(io, &nSegments)) return NULL;
+     if (!_cmsReadUInt16Number(io, NULL)) return NULL;
+
+     if (nSegments < 1) return NULL;
+     Segments = (cmsCurveSegment*) _cmsCalloc(self ->ContextID, nSegments, sizeof(cmsCurveSegment));
+     if (Segments == NULL) return NULL;
+
+     // Read breakpoints
+     for (i=0; i < (cmsUInt32Number) nSegments - 1; i++) {
+
+         Segments[i].x0 = PrevBreak;
+         if (!_cmsReadFloat32Number(io, &Segments[i].x1)) goto Error;
+         PrevBreak = Segments[i].x1;
+     }
+
+     Segments[nSegments-1].x0 = PrevBreak;
+     Segments[nSegments-1].x1 = PLUS_INF;     // A big cmsFloat32Number number
+
+     // Read segments
+     for (i=0; i < nSegments; i++) {
+
+          if (!_cmsReadUInt32Number(io, (cmsUInt32Number*) &ElementSig)) goto Error;
+          if (!_cmsReadUInt32Number(io, NULL)) goto Error;
+
+           switch (ElementSig) {
+
+            case cmsSigFormulaCurveSeg: {
+
+                cmsUInt16Number Type;
+                cmsUInt32Number ParamsByType[] = {4, 5, 5 };
+
+                if (!_cmsReadUInt16Number(io, &Type)) goto Error;
+                if (!_cmsReadUInt16Number(io, NULL)) goto Error;
+
+                Segments[i].Type = Type + 6;
+                if (Type > 2) goto Error;
+
+                for (j=0; j < ParamsByType[Type]; j++) {
+
+                    cmsFloat32Number f;
+                    if (!_cmsReadFloat32Number(io, &f)) goto Error;
+                    Segments[i].Params[j] = f;
+                }
+                }
+                break;
+
+
+            case cmsSigSampledCurveSeg: {
+                cmsUInt32Number Count;
+
+                if (!_cmsReadUInt32Number(io, &Count)) goto Error;
+
+                Segments[i].nGridPoints = Count;
+                Segments[i].SampledPoints = (cmsFloat32Number*) _cmsCalloc(self ->ContextID, Count, sizeof(cmsFloat32Number));
+                if (Segments[i].SampledPoints == NULL) goto Error;
+
+                for (j=0; j < Count; j++) {
+                    if (!_cmsReadFloat32Number(io, &Segments[i].SampledPoints[j])) goto Error;
+                }
+                }
+                break;
+
+            default:
+                {
+                char String[5];
+
+                _cmsTagSignature2String(String, (cmsTagSignature) ElementSig);
+                cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown curve element type '%s' found.", String);
+                }
+                goto Error;
+
+         }
+     }
+
+     Curve = cmsBuildSegmentedToneCurve(self ->ContextID, nSegments, Segments);
+
+     for (i=0; i < nSegments; i++) {
+         if (Segments[i].SampledPoints) _cmsFree(self ->ContextID, Segments[i].SampledPoints);
+     }
+     _cmsFree(self ->ContextID, Segments);
+     return Curve;
+
+Error:
+     if (Segments) {
+         for (i=0; i < nSegments; i++) {
+             if (Segments[i].SampledPoints) _cmsFree(self ->ContextID, Segments[i].SampledPoints);
+         }
+         _cmsFree(self ->ContextID, Segments);
+     }
+     return NULL;
+}
+
+
+static
+cmsBool ReadMPECurve(struct _cms_typehandler_struct* self,
+                     cmsIOHANDLER* io,
+                     void* Cargo,
+                     cmsUInt32Number n,
+                     cmsUInt32Number SizeOfTag)
+{
+      cmsToneCurve** GammaTables = ( cmsToneCurve**) Cargo;
+
+      GammaTables[n] = ReadSegmentedCurve(self, io);
+      return (GammaTables[n] != NULL);
+
+      cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+void *Type_MPEcurve_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsStage* mpe = NULL;
+    cmsUInt16Number InputChans, OutputChans;
+    cmsUInt32Number i, BaseOffset;
+    cmsToneCurve** GammaTables;
+
+    *nItems = 0;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+    if (InputChans != OutputChans) return NULL;
+
+    GammaTables = (cmsToneCurve**) _cmsCalloc(self ->ContextID, InputChans, sizeof(cmsToneCurve*));
+    if (GammaTables == NULL) return NULL;
+
+    if (ReadPositionTable(self, io, InputChans, BaseOffset, GammaTables, ReadMPECurve)) {
+
+        mpe = cmsStageAllocToneCurves(self ->ContextID, InputChans, GammaTables);
+    }
+    else {
+        mpe = NULL;
+    }
+
+    for (i=0; i < InputChans; i++) {
+        if (GammaTables[i]) cmsFreeToneCurve(GammaTables[i]);
+    }
+
+    _cmsFree(self ->ContextID, GammaTables);
+    *nItems = (mpe != NULL) ? 1U : 0;
+    return mpe;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+// Write a single segmented curve. NO CHECK IS PERFORMED ON VALIDITY
+static
+cmsBool WriteSegmentedCurve(cmsIOHANDLER* io, cmsToneCurve* g)
+{
+    cmsUInt32Number i, j;
+    cmsCurveSegment* Segments = g ->Segments;
+    cmsUInt32Number nSegments = g ->nSegments;
+
+    if (!_cmsWriteUInt32Number(io, cmsSigSegmentedCurve)) goto Error;
+    if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) nSegments)) goto Error;
+    if (!_cmsWriteUInt16Number(io, 0)) goto Error;
+
+    // Write the break-points
+    for (i=0; i < nSegments - 1; i++) {
+        if (!_cmsWriteFloat32Number(io, Segments[i].x1)) goto Error;
+    }
+
+    // Write the segments
+    for (i=0; i < g ->nSegments; i++) {
+
+        cmsCurveSegment* ActualSeg = Segments + i;
+
+        if (ActualSeg -> Type == 0) {
+
+            // This is a sampled curve
+            if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) cmsSigSampledCurveSeg)) goto Error;
+            if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+            if (!_cmsWriteUInt32Number(io, ActualSeg -> nGridPoints)) goto Error;
+
+            for (j=0; j < g ->Segments[i].nGridPoints; j++) {
+                if (!_cmsWriteFloat32Number(io, ActualSeg -> SampledPoints[j])) goto Error;
+            }
+
+        }
+        else {
+            int Type;
+            cmsUInt32Number ParamsByType[] = { 4, 5, 5 };
+
+            // This is a formula-based
+            if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) cmsSigFormulaCurveSeg)) goto Error;
+            if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+
+            // We only allow 1, 2 and 3 as types
+            Type = ActualSeg ->Type - 6;
+            if (Type > 2 || Type < 0) goto Error;
+
+            if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) Type)) goto Error;
+            if (!_cmsWriteUInt16Number(io, 0)) goto Error;
+
+            for (j=0; j < ParamsByType[Type]; j++) {
+                if (!_cmsWriteFloat32Number(io, (cmsFloat32Number) ActualSeg ->Params[j])) goto Error;
+            }
+        }
+
+        // It seems there is no need to align. Code is here, and for safety commented out
+        // if (!_cmsWriteAlignment(io)) goto Error;
+    }
+
+    return TRUE;
+
+Error:
+    return FALSE;
+}
+
+
+static
+cmsBool WriteMPECurve(struct _cms_typehandler_struct* self,
+                      cmsIOHANDLER* io,
+                      void* Cargo,
+                      cmsUInt32Number n,
+                      cmsUInt32Number SizeOfTag)
+{
+    _cmsStageToneCurvesData* Curves  = (_cmsStageToneCurvesData*) Cargo;
+
+    return WriteSegmentedCurve(io, Curves ->TheCurves[n]);
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+    cmsUNUSED_PARAMETER(self);
+}
+
+// Write a curve, checking first for validity
+static
+cmsBool  Type_MPEcurve_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number BaseOffset;
+    cmsStage* mpe = (cmsStage*) Ptr;
+    _cmsStageToneCurvesData* Curves = (_cmsStageToneCurvesData*) mpe ->Data;
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Write the header. Since those are curves, input and output channels are same
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+
+    if (!WritePositionTable(self, io, 0,
+                                mpe ->InputChannels, BaseOffset, Curves, WriteMPECurve)) return FALSE;
+
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+
+// The matrix is organized as an array of PxQ+Q elements, where P is the number of input channels to the
+// matrix, and Q is the number of output channels. The matrix elements are each float32Numbers. The array
+// is organized as follows:
+// array = [e11, e12, ..., e1P, e21, e22, ..., e2P, ..., eQ1, eQ2, ..., eQP, e1, e2, ..., eQ]
+
+static
+void *Type_MPEmatrix_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsStage* mpe;
+    cmsUInt16Number   InputChans, OutputChans;
+    cmsUInt32Number   nElems, i;
+    cmsFloat64Number* Matrix;
+    cmsFloat64Number* Offsets;
+
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+
+    // Input and output chans may be ANY (up to 0xffff), 
+    // but we choose to limit to 16 channels for now
+    if (InputChans >= cmsMAXCHANNELS) return NULL;
+    if (OutputChans >= cmsMAXCHANNELS) return NULL;
+
+    nElems = (cmsUInt32Number) InputChans * OutputChans;
+
+    Matrix = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, nElems, sizeof(cmsFloat64Number));
+    if (Matrix == NULL) return NULL;
+
+    Offsets = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, OutputChans, sizeof(cmsFloat64Number));
+    if (Offsets == NULL) {
+
+        _cmsFree(self ->ContextID, Matrix);
+        return NULL;
+    }
+
+    for (i=0; i < nElems; i++) {
+
+        cmsFloat32Number v;
+
+        if (!_cmsReadFloat32Number(io, &v)) {
+            _cmsFree(self ->ContextID, Matrix);
+            _cmsFree(self ->ContextID, Offsets);
+            return NULL;
+        }
+        Matrix[i] = v;
+    }
+
+
+    for (i=0; i < OutputChans; i++) {
+
+        cmsFloat32Number v;
+
+        if (!_cmsReadFloat32Number(io, &v)) {
+            _cmsFree(self ->ContextID, Matrix);
+            _cmsFree(self ->ContextID, Offsets);
+            return NULL;
+        }
+        Offsets[i] = v;
+    }
+
+
+    mpe = cmsStageAllocMatrix(self ->ContextID, OutputChans, InputChans, Matrix, Offsets);
+    _cmsFree(self ->ContextID, Matrix);
+    _cmsFree(self ->ContextID, Offsets);
+
+    *nItems = 1;
+
+    return mpe;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_MPEmatrix_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number i, nElems;
+    cmsStage* mpe = (cmsStage*) Ptr;
+    _cmsStageMatrixData* Matrix = (_cmsStageMatrixData*) mpe ->Data;
+
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->OutputChannels)) return FALSE;
+
+    nElems = mpe ->InputChannels * mpe ->OutputChannels;
+
+    for (i=0; i < nElems; i++) {
+        if (!_cmsWriteFloat32Number(io, (cmsFloat32Number) Matrix->Double[i])) return FALSE;
+    }
+
+
+    for (i=0; i < mpe ->OutputChannels; i++) {
+
+        if (Matrix ->Offset == NULL) {
+
+               if (!_cmsWriteFloat32Number(io, 0)) return FALSE;
+        }
+        else {
+               if (!_cmsWriteFloat32Number(io, (cmsFloat32Number) Matrix->Offset[i])) return FALSE;
+        }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+
+static
+void *Type_MPEclut_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsStage* mpe = NULL;
+    cmsUInt16Number InputChans, OutputChans;
+    cmsUInt8Number Dimensions8[16];
+    cmsUInt32Number i, nMaxGrids, GridPoints[MAX_INPUT_DIMENSIONS];
+    _cmsStageCLutData* clut;
+
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+    if (InputChans == 0) goto Error;
+    if (OutputChans == 0) goto Error;
+
+    if (io ->Read(io, Dimensions8, sizeof(cmsUInt8Number), 16) != 16)
+        goto Error;
+
+    // Copy MAX_INPUT_DIMENSIONS at most. Expand to cmsUInt32Number
+    nMaxGrids = InputChans > MAX_INPUT_DIMENSIONS ? (cmsUInt32Number) MAX_INPUT_DIMENSIONS : InputChans;
+
+    for (i = 0; i < nMaxGrids; i++) {
+        if (Dimensions8[i] == 1) goto Error; // Impossible value, 0 for no CLUT and then 2 at least
+        GridPoints[i] = (cmsUInt32Number)Dimensions8[i];
+    }
+    
+    // Allocate the true CLUT
+    mpe = cmsStageAllocCLutFloatGranular(self ->ContextID, GridPoints, InputChans, OutputChans, NULL);
+    if (mpe == NULL) goto Error;
+
+    // Read and sanitize the data
+    clut = (_cmsStageCLutData*) mpe ->Data;
+    for (i=0; i < clut ->nEntries; i++) {
+
+        if (!_cmsReadFloat32Number(io, &clut->Tab.TFloat[i])) goto Error;       
+    }
+
+    *nItems = 1;
+    return mpe;
+
+Error:
+    *nItems = 0;
+    if (mpe != NULL) cmsStageFree(mpe);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// Write a CLUT in floating point
+static
+cmsBool  Type_MPEclut_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt8Number Dimensions8[16];  // 16 because the spec says 16 and not max number of channels
+    cmsUInt32Number i;
+    cmsStage* mpe = (cmsStage*) Ptr;
+    _cmsStageCLutData* clut = (_cmsStageCLutData*) mpe ->Data;
+
+    // Check for maximum number of channels supported by lcms
+    if (mpe -> InputChannels > MAX_INPUT_DIMENSIONS) return FALSE;
+
+    // Only floats are supported in MPE
+    if (clut ->HasFloatValues == FALSE) return FALSE;
+
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->OutputChannels)) return FALSE;
+
+    memset(Dimensions8, 0, sizeof(Dimensions8));
+
+    for (i=0; i < mpe ->InputChannels; i++)
+        Dimensions8[i] = (cmsUInt8Number) clut ->Params ->nSamples[i];
+
+    if (!io ->Write(io, 16, Dimensions8)) return FALSE;
+
+    for (i=0; i < clut ->nEntries; i++) {
+
+        if (!_cmsWriteFloat32Number(io, clut ->Tab.TFloat[i])) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+
+// This is the list of built-in MPE types
+static _cmsTagTypeLinkedList SupportedMPEtypes[] = {
+
+{{ (cmsTagTypeSignature) cmsSigBAcsElemType, NULL, NULL, NULL, NULL, NULL, 0 }, &SupportedMPEtypes[1] },   // Ignore those elements for now
+{{ (cmsTagTypeSignature) cmsSigEAcsElemType, NULL, NULL, NULL, NULL, NULL, 0 }, &SupportedMPEtypes[2] },   // (That's what the spec says)
+
+{TYPE_MPE_HANDLER((cmsTagTypeSignature) cmsSigCurveSetElemType,     MPEcurve),      &SupportedMPEtypes[3] },
+{TYPE_MPE_HANDLER((cmsTagTypeSignature) cmsSigMatrixElemType,       MPEmatrix),     &SupportedMPEtypes[4] },
+{TYPE_MPE_HANDLER((cmsTagTypeSignature) cmsSigCLutElemType,         MPEclut),        NULL },
+};
+
+_cmsTagTypePluginChunkType _cmsMPETypePluginChunk = { NULL };
+
+static
+cmsBool ReadMPEElem(struct _cms_typehandler_struct* self,
+                    cmsIOHANDLER* io,
+                    void* Cargo,
+                    cmsUInt32Number n,
+                    cmsUInt32Number SizeOfTag)
+{
+    cmsStageSignature ElementSig;
+    cmsTagTypeHandler* TypeHandler;
+    cmsUInt32Number nItems;
+    cmsPipeline *NewLUT = (cmsPipeline *) Cargo;
+    _cmsTagTypePluginChunkType* MPETypePluginChunk  = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(self->ContextID, MPEPlugin);
+
+
+    // Take signature and channels for each element.
+    if (!_cmsReadUInt32Number(io, (cmsUInt32Number*) &ElementSig)) return FALSE;
+
+    // The reserved placeholder
+    if (!_cmsReadUInt32Number(io, NULL)) return FALSE;
+
+    // Read diverse MPE types
+    TypeHandler = GetHandler((cmsTagTypeSignature) ElementSig, MPETypePluginChunk ->TagTypes, SupportedMPEtypes);
+    if (TypeHandler == NULL)  {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, (cmsTagSignature) ElementSig);
+
+        // An unknown element was found.
+        cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown MPE type '%s' found.", String);
+        return FALSE;
+    }
+
+    // If no read method, just ignore the element (valid for cmsSigBAcsElemType and cmsSigEAcsElemType)
+    // Read the MPE. No size is given
+    if (TypeHandler ->ReadPtr != NULL) {
+
+        // This is a real element which should be read and processed
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, (cmsStage*) TypeHandler ->ReadPtr(self, io, &nItems, SizeOfTag)))
+            return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+    cmsUNUSED_PARAMETER(n);
+}
+
+
+// This is the main dispatcher for MPE
+static
+void *Type_MPE_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt16Number InputChans, OutputChans;
+    cmsUInt32Number ElementCount;
+    cmsPipeline *NewLUT = NULL;
+    cmsUInt32Number BaseOffset;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Read channels and element count
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+    if (InputChans == 0 || InputChans >= cmsMAXCHANNELS) return NULL;
+    if (OutputChans == 0 || OutputChans >= cmsMAXCHANNELS) return NULL;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, InputChans, OutputChans);
+    if (NewLUT == NULL) return NULL;
+
+    if (!_cmsReadUInt32Number(io, &ElementCount)) goto Error;    
+    if (!ReadPositionTable(self, io, ElementCount, BaseOffset, NewLUT, ReadMPEElem)) goto Error;
+
+    // Check channel count
+    if (InputChans != NewLUT->InputChannels ||
+        OutputChans != NewLUT->OutputChannels) goto Error;
+
+    // Success
+    *nItems = 1;
+    return NewLUT;
+
+    // Error
+Error:    
+    if (NewLUT != NULL) cmsPipelineFree(NewLUT);
+    *nItems = 0;
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+
+// This one is a liitle bit more complex, so we don't use position tables this time.
+static
+cmsBool Type_MPE_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number i, BaseOffset, DirectoryPos, CurrentPos;
+    cmsUInt32Number inputChan, outputChan;
+    cmsUInt32Number ElemCount;
+    cmsUInt32Number *ElementOffsets = NULL, *ElementSizes = NULL, Before;
+    cmsStageSignature ElementSig;
+    cmsPipeline* Lut = (cmsPipeline*) Ptr;
+    cmsStage* Elem = Lut ->Elements;
+    cmsTagTypeHandler* TypeHandler;
+    _cmsTagTypePluginChunkType* MPETypePluginChunk  = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(self->ContextID, MPEPlugin);
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    inputChan  = cmsPipelineInputChannels(Lut);
+    outputChan = cmsPipelineOutputChannels(Lut);
+    ElemCount  = cmsPipelineStageCount(Lut);
+
+    ElementOffsets = (cmsUInt32Number *) _cmsCalloc(self ->ContextID, ElemCount, sizeof(cmsUInt32Number));
+    if (ElementOffsets == NULL) goto Error;
+
+    ElementSizes = (cmsUInt32Number *) _cmsCalloc(self ->ContextID, ElemCount, sizeof(cmsUInt32Number));
+    if (ElementSizes == NULL) goto Error;
+
+    // Write the head
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) inputChan)) goto Error;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) outputChan)) goto Error;
+    if (!_cmsWriteUInt32Number(io, (cmsUInt16Number) ElemCount)) goto Error;
+
+    DirectoryPos = io ->Tell(io);
+
+    // Write a fake directory to be filled latter on
+    for (i=0; i < ElemCount; i++) {
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // Offset
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // size
+    }
+
+    // Write each single tag. Keep track of the size as well.
+    for (i=0; i < ElemCount; i++) {
+
+        ElementOffsets[i] = io ->Tell(io) - BaseOffset;
+
+        ElementSig = Elem ->Type;
+
+        TypeHandler = GetHandler((cmsTagTypeSignature) ElementSig, MPETypePluginChunk->TagTypes, SupportedMPEtypes);
+        if (TypeHandler == NULL)  {
+
+                char String[5];
+
+                _cmsTagSignature2String(String, (cmsTagSignature) ElementSig);
+
+                 // An unknown element was found.
+                 cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Found unknown MPE type '%s'", String);
+                 goto Error;
+        }
+
+        if (!_cmsWriteUInt32Number(io, ElementSig)) goto Error;
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+        Before = io ->Tell(io);
+        if (!TypeHandler ->WritePtr(self, io, Elem, 1)) goto Error;
+        if (!_cmsWriteAlignment(io)) goto Error;
+
+        ElementSizes[i] = io ->Tell(io) - Before;
+
+        Elem = Elem ->Next;
+    }
+
+    // Write the directory
+    CurrentPos = io ->Tell(io);
+
+    if (!io ->Seek(io, DirectoryPos)) goto Error;
+
+    for (i=0; i < ElemCount; i++) {
+        if (!_cmsWriteUInt32Number(io, ElementOffsets[i])) goto Error;
+        if (!_cmsWriteUInt32Number(io, ElementSizes[i])) goto Error;
+    }
+
+    if (!io ->Seek(io, CurrentPos)) goto Error;
+
+    if (ElementOffsets != NULL) _cmsFree(self ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(self ->ContextID, ElementSizes);
+    return TRUE;
+
+Error:
+    if (ElementOffsets != NULL) _cmsFree(self ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(self ->ContextID, ElementSizes);
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_MPE_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_MPE_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigVcgtType
+// ********************************************************************************
+
+
+#define cmsVideoCardGammaTableType    0
+#define cmsVideoCardGammaFormulaType  1
+
+// Used internally
+typedef struct {
+    double Gamma;
+    double Min;
+    double Max;
+} _cmsVCGTGAMMA;
+
+
+static
+void *Type_vcgt_Read(struct _cms_typehandler_struct* self,
+                     cmsIOHANDLER* io,
+                     cmsUInt32Number* nItems,
+                     cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number TagType, n, i;
+    cmsToneCurve** Curves;
+
+    *nItems = 0;
+
+    // Read tag type
+    if (!_cmsReadUInt32Number(io, &TagType)) return NULL;
+
+    // Allocate space for the array
+    Curves = ( cmsToneCurve**) _cmsCalloc(self ->ContextID, 3, sizeof(cmsToneCurve*));
+    if (Curves == NULL) return NULL;
+
+    // There are two possible flavors
+    switch (TagType) {
+
+    // Gamma is stored as a table
+    case cmsVideoCardGammaTableType:
+    {
+       cmsUInt16Number nChannels, nElems, nBytes;
+
+       // Check channel count, which should be 3 (we don't support monochrome this time)
+       if (!_cmsReadUInt16Number(io, &nChannels)) goto Error;
+
+       if (nChannels != 3) {
+           cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported number of channels for VCGT '%d'", nChannels);
+           goto Error;
+       }
+
+       // Get Table element count and bytes per element
+       if (!_cmsReadUInt16Number(io, &nElems)) goto Error;
+       if (!_cmsReadUInt16Number(io, &nBytes)) goto Error;
+
+       // Adobe's quirk fixup. Fixing broken profiles...
+       if (nElems == 256 && nBytes == 1 && SizeOfTag == 1576)
+           nBytes = 2;
+
+
+       // Populate tone curves
+       for (n=0; n < 3; n++) {
+
+           Curves[n] = cmsBuildTabulatedToneCurve16(self ->ContextID, nElems, NULL);
+           if (Curves[n] == NULL) goto Error;
+
+           // On depending on byte depth
+           switch (nBytes) {
+
+           // One byte, 0..255
+           case 1:
+               for (i=0; i < nElems; i++) {
+
+                   cmsUInt8Number v;
+
+                      if (!_cmsReadUInt8Number(io, &v)) goto Error;
+                      Curves[n] ->Table16[i] = FROM_8_TO_16(v);
+               }
+               break;
+
+           // One word 0..65535
+           case 2:
+              if (!_cmsReadUInt16Array(io, nElems, Curves[n]->Table16)) goto Error;
+              break;
+
+          // Unsupported
+           default:
+              cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported bit depth for VCGT '%d'", nBytes * 8);
+              goto Error;
+           }
+       } // For all 3 channels
+    }
+    break;
+
+   // In this case, gamma is stored as a formula
+   case cmsVideoCardGammaFormulaType:
+   {
+       _cmsVCGTGAMMA Colorant[3];
+
+        // Populate tone curves
+       for (n=0; n < 3; n++) {
+
+           double Params[10];
+
+           if (!_cmsRead15Fixed16Number(io, &Colorant[n].Gamma)) goto Error;
+           if (!_cmsRead15Fixed16Number(io, &Colorant[n].Min)) goto Error;
+           if (!_cmsRead15Fixed16Number(io, &Colorant[n].Max)) goto Error;
+
+            // Parametric curve type 5 is:
+            // Y = (aX + b)^Gamma + e | X >= d
+            // Y = cX + f             | X < d
+
+            // vcgt formula is:
+            // Y = (Max - Min) * (X ^ Gamma) + Min
+
+            // So, the translation is
+            // a = (Max - Min) ^ ( 1 / Gamma)
+            // e = Min
+            // b=c=d=f=0
+
+           Params[0] = Colorant[n].Gamma;
+           Params[1] = pow((Colorant[n].Max - Colorant[n].Min), (1.0 / Colorant[n].Gamma));
+           Params[2] = 0;
+           Params[3] = 0;
+           Params[4] = 0;
+           Params[5] = Colorant[n].Min;
+           Params[6] = 0;
+
+           Curves[n] = cmsBuildParametricToneCurve(self ->ContextID, 5, Params);
+           if (Curves[n] == NULL) goto Error;
+       }
+   }
+   break;
+
+   // Unsupported
+   default:
+      cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported tag type for VCGT '%d'", TagType);
+      goto Error;
+   }
+
+   *nItems = 1;
+   return (void*) Curves;
+
+// Regret,  free all resources
+Error:
+
+    cmsFreeToneCurveTriple(Curves);
+    _cmsFree(self ->ContextID, Curves);
+    return NULL;
+
+     cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+// We don't support all flavors, only 16bits tables and formula
+static
+cmsBool Type_vcgt_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsToneCurve** Curves =  (cmsToneCurve**) Ptr;
+    cmsUInt32Number i, j;
+
+    if (cmsGetToneCurveParametricType(Curves[0]) == 5 &&
+        cmsGetToneCurveParametricType(Curves[1]) == 5 &&
+        cmsGetToneCurveParametricType(Curves[2]) == 5) {
+
+            if (!_cmsWriteUInt32Number(io, cmsVideoCardGammaFormulaType)) return FALSE;
+
+            // Save parameters
+            for (i=0; i < 3; i++) {
+
+                _cmsVCGTGAMMA v;
+
+                v.Gamma = Curves[i] ->Segments[0].Params[0];
+                v.Min   = Curves[i] ->Segments[0].Params[5];
+                v.Max   = pow(Curves[i] ->Segments[0].Params[1], v.Gamma) + v.Min;
+
+                if (!_cmsWrite15Fixed16Number(io, v.Gamma)) return FALSE;
+                if (!_cmsWrite15Fixed16Number(io, v.Min)) return FALSE;
+                if (!_cmsWrite15Fixed16Number(io, v.Max)) return FALSE;
+            }
+    }
+
+    else {
+
+        // Always store as a table of 256 words
+        if (!_cmsWriteUInt32Number(io, cmsVideoCardGammaTableType)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, 3)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, 256)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, 2)) return FALSE;
+
+        for (i=0; i < 3; i++) {
+            for (j=0; j < 256; j++) {
+
+                cmsFloat32Number v = cmsEvalToneCurveFloat(Curves[i], (cmsFloat32Number) (j / 255.0));
+                cmsUInt16Number  n = _cmsQuickSaturateWord(v * 65535.0);
+
+                if (!_cmsWriteUInt16Number(io, n)) return FALSE;
+            }
+        }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_vcgt_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    cmsToneCurve** OldCurves =  (cmsToneCurve**) Ptr;
+    cmsToneCurve** NewCurves;
+
+    NewCurves = ( cmsToneCurve**) _cmsCalloc(self ->ContextID, 3, sizeof(cmsToneCurve*));
+    if (NewCurves == NULL) return NULL;
+
+    NewCurves[0] = cmsDupToneCurve(OldCurves[0]);
+    NewCurves[1] = cmsDupToneCurve(OldCurves[1]);
+    NewCurves[2] = cmsDupToneCurve(OldCurves[2]);
+
+    return (void*) NewCurves;
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_vcgt_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeToneCurveTriple((cmsToneCurve**) Ptr);
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigDictType
+// ********************************************************************************
+
+// Single column of the table can point to wchar or MLUC elements. Holds arrays of data
+typedef struct {
+    cmsContext ContextID;
+    cmsUInt32Number *Offsets;
+    cmsUInt32Number *Sizes;
+} _cmsDICelem;
+
+typedef struct {
+    _cmsDICelem Name, Value, DisplayName, DisplayValue;
+
+} _cmsDICarray;
+
+// Allocate an empty array element
+static
+cmsBool AllocElem(cmsContext ContextID, _cmsDICelem* e,  cmsUInt32Number Count)
+{
+    e->Offsets = (cmsUInt32Number *) _cmsCalloc(ContextID, Count, sizeof(cmsUInt32Number));
+    if (e->Offsets == NULL) return FALSE;
+
+    e->Sizes = (cmsUInt32Number *) _cmsCalloc(ContextID, Count, sizeof(cmsUInt32Number));
+    if (e->Sizes == NULL) {
+
+        _cmsFree(ContextID, e -> Offsets);
+        return FALSE;
+    }
+
+    e ->ContextID = ContextID;
+    return TRUE;
+}
+
+// Free an array element
+static
+void FreeElem(_cmsDICelem* e)
+{
+    if (e ->Offsets != NULL)  _cmsFree(e -> ContextID, e -> Offsets);
+    if (e ->Sizes   != NULL)  _cmsFree(e -> ContextID, e -> Sizes);
+    e->Offsets = e ->Sizes = NULL;
+}
+
+// Get rid of whole array
+static
+void FreeArray( _cmsDICarray* a)
+{
+    if (a ->Name.Offsets != NULL) FreeElem(&a->Name);
+    if (a ->Value.Offsets != NULL) FreeElem(&a ->Value);
+    if (a ->DisplayName.Offsets != NULL) FreeElem(&a->DisplayName);
+    if (a ->DisplayValue.Offsets != NULL) FreeElem(&a ->DisplayValue);
+}
+
+
+// Allocate whole array
+static
+cmsBool AllocArray(cmsContext ContextID, _cmsDICarray* a, cmsUInt32Number Count, cmsUInt32Number Length)
+{
+    // Empty values
+    memset(a, 0, sizeof(_cmsDICarray));
+
+    // On depending on record size, create column arrays
+    if (!AllocElem(ContextID, &a ->Name, Count)) goto Error;
+    if (!AllocElem(ContextID, &a ->Value, Count)) goto Error;
+
+    if (Length > 16) {
+        if (!AllocElem(ContextID, &a -> DisplayName, Count)) goto Error;
+
+    }
+    if (Length > 24) {
+        if (!AllocElem(ContextID, &a ->DisplayValue, Count)) goto Error;
+    }
+    return TRUE;
+
+Error:
+    FreeArray(a);
+    return FALSE;
+}
+
+// Read one element
+static
+cmsBool ReadOneElem(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, cmsUInt32Number BaseOffset)
+{
+    if (!_cmsReadUInt32Number(io, &e->Offsets[i])) return FALSE;
+    if (!_cmsReadUInt32Number(io, &e ->Sizes[i])) return FALSE;
+
+    // An offset of zero has special meaning and shal be preserved
+    if (e ->Offsets[i] > 0)
+        e ->Offsets[i] += BaseOffset;
+    return TRUE;
+}
+
+
+static
+cmsBool ReadOffsetArray(cmsIOHANDLER* io,  _cmsDICarray* a, cmsUInt32Number Count, cmsUInt32Number Length, cmsUInt32Number BaseOffset)
+{
+    cmsUInt32Number i;
+
+    // Read column arrays
+    for (i=0; i < Count; i++) {
+
+        if (!ReadOneElem(io, &a -> Name, i, BaseOffset)) return FALSE;
+        if (!ReadOneElem(io, &a -> Value, i, BaseOffset)) return FALSE;
+
+        if (Length > 16) {
+
+            if (!ReadOneElem(io, &a ->DisplayName, i, BaseOffset)) return FALSE;
+
+        }
+
+        if (Length > 24) {
+
+            if (!ReadOneElem(io, & a -> DisplayValue, i, BaseOffset)) return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+
+// Write one element
+static
+cmsBool WriteOneElem(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i)
+{
+    if (!_cmsWriteUInt32Number(io, e->Offsets[i])) return FALSE;
+    if (!_cmsWriteUInt32Number(io, e ->Sizes[i])) return FALSE;
+
+    return TRUE;
+}
+
+static
+cmsBool WriteOffsetArray(cmsIOHANDLER* io,  _cmsDICarray* a, cmsUInt32Number Count, cmsUInt32Number Length)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < Count; i++) {
+
+        if (!WriteOneElem(io, &a -> Name, i)) return FALSE;
+        if (!WriteOneElem(io, &a -> Value, i))  return FALSE;
+
+        if (Length > 16) {
+
+            if (!WriteOneElem(io, &a -> DisplayName, i))  return FALSE;
+        }
+
+        if (Length > 24) {
+
+            if (!WriteOneElem(io, &a -> DisplayValue, i))  return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+static
+cmsBool ReadOneWChar(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, wchar_t ** wcstr)
+{
+
+    cmsUInt32Number nChars;
+
+      // Special case for undefined strings (see ICC Votable
+      // Proposal Submission, Dictionary Type and Metadata TAG Definition)
+      if (e -> Offsets[i] == 0) {
+
+          *wcstr = NULL;
+          return TRUE;
+      }
+
+      if (!io -> Seek(io, e -> Offsets[i])) return FALSE;
+
+      nChars = e ->Sizes[i] / sizeof(cmsUInt16Number);
+
+
+      *wcstr = (wchar_t*) _cmsMallocZero(e ->ContextID, (nChars + 1) * sizeof(wchar_t));
+      if (*wcstr == NULL) return FALSE;
+
+      if (!_cmsReadWCharArray(io, nChars, *wcstr)) {
+          _cmsFree(e ->ContextID, *wcstr);
+          return FALSE;
+      }
+
+      // End of string marker
+      (*wcstr)[nChars] = 0;
+      return TRUE;
+}
+
+static
+cmsUInt32Number mywcslen(const wchar_t *s)
+{
+    const wchar_t *p;
+
+    p = s;
+    while (*p)
+        p++;
+
+    return (cmsUInt32Number)(p - s);
+}
+
+static
+cmsBool WriteOneWChar(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, const wchar_t * wcstr, cmsUInt32Number BaseOffset)
+{
+    cmsUInt32Number Before = io ->Tell(io);
+    cmsUInt32Number n;
+
+    e ->Offsets[i] = Before - BaseOffset;
+
+    if (wcstr == NULL) {
+        e ->Sizes[i] = 0;
+        e ->Offsets[i] = 0;
+        return TRUE;
+    }
+
+    n = mywcslen(wcstr);
+    if (!_cmsWriteWCharArray(io,  n, wcstr)) return FALSE;
+
+    e ->Sizes[i] = io ->Tell(io) - Before;
+    return TRUE;
+}
+
+static
+cmsBool ReadOneMLUC(struct _cms_typehandler_struct* self, cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, cmsMLU** mlu)
+{
+    cmsUInt32Number nItems = 0;
+
+    // A way to get null MLUCs
+    if (e -> Offsets[i] == 0 || e ->Sizes[i] == 0) {
+
+        *mlu = NULL;
+        return TRUE;
+    }
+
+    if (!io -> Seek(io, e -> Offsets[i])) return FALSE;
+
+    *mlu = (cmsMLU*) Type_MLU_Read(self, io, &nItems, e ->Sizes[i]);
+    return *mlu != NULL;
+}
+
+static
+cmsBool WriteOneMLUC(struct _cms_typehandler_struct* self, cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, const cmsMLU* mlu, cmsUInt32Number BaseOffset)
+{
+    cmsUInt32Number Before;
+
+     // Special case for undefined strings (see ICC Votable
+     // Proposal Submission, Dictionary Type and Metadata TAG Definition)
+     if (mlu == NULL) {
+        e ->Sizes[i] = 0;
+        e ->Offsets[i] = 0;
+        return TRUE;
+    }
+
+    Before = io ->Tell(io);
+    e ->Offsets[i] = Before - BaseOffset;
+
+    if (!Type_MLU_Write(self, io, (void*) mlu, 1)) return FALSE;
+
+    e ->Sizes[i] = io ->Tell(io) - Before;
+    return TRUE;
+}
+
+
+static
+void *Type_Dictionary_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+   cmsHANDLE hDict;
+   cmsUInt32Number i, Count, Length;
+   cmsUInt32Number BaseOffset;
+   _cmsDICarray a;
+   wchar_t *NameWCS = NULL, *ValueWCS = NULL;
+   cmsMLU *DisplayNameMLU = NULL, *DisplayValueMLU=NULL;
+   cmsBool rc;
+
+    *nItems = 0;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Get name-value record count
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Get rec length
+    if (!_cmsReadUInt32Number(io, &Length)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Check for valid lengths
+    if (Length != 16 && Length != 24 && Length != 32) {
+         cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown record length in dictionary '%d'", Length);
+         return NULL;
+    }
+
+    // Creates an empty dictionary
+    hDict = cmsDictAlloc(self -> ContextID);
+    if (hDict == NULL) return NULL;
+
+    // On depending on record size, create column arrays
+    if (!AllocArray(self -> ContextID, &a, Count, Length)) goto Error;
+
+    // Read column arrays
+    if (!ReadOffsetArray(io, &a, Count, Length, BaseOffset)) goto Error;
+
+    // Seek to each element and read it
+    for (i=0; i < Count; i++) {
+
+        if (!ReadOneWChar(io, &a.Name, i, &NameWCS)) goto Error;
+        if (!ReadOneWChar(io, &a.Value, i, &ValueWCS)) goto Error;
+
+        if (Length > 16) {
+            if (!ReadOneMLUC(self, io, &a.DisplayName, i, &DisplayNameMLU)) goto Error;
+        }
+
+        if (Length > 24) {
+            if (!ReadOneMLUC(self, io, &a.DisplayValue, i, &DisplayValueMLU)) goto Error;
+        }
+
+        if (NameWCS == NULL || ValueWCS == NULL) {
+        
+            cmsSignalError(self->ContextID, cmsERROR_CORRUPTION_DETECTED, "Bad dictionary Name/Value");        
+            rc = FALSE;
+        }
+        else {
+
+            rc = cmsDictAddEntry(hDict, NameWCS, ValueWCS, DisplayNameMLU, DisplayValueMLU);
+        }
+
+        if (NameWCS != NULL) _cmsFree(self ->ContextID, NameWCS);
+        if (ValueWCS != NULL) _cmsFree(self ->ContextID, ValueWCS);
+        if (DisplayNameMLU != NULL) cmsMLUfree(DisplayNameMLU);
+        if (DisplayValueMLU != NULL) cmsMLUfree(DisplayValueMLU);
+
+        if (!rc) goto Error;
+    }
+
+   FreeArray(&a);
+   *nItems = 1;
+   return (void*) hDict;
+
+Error:
+   FreeArray(&a);
+   cmsDictFree(hDict);
+   return NULL;
+}
+
+
+static
+cmsBool Type_Dictionary_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsHANDLE hDict = (cmsHANDLE) Ptr;
+    const cmsDICTentry* p;
+    cmsBool AnyName, AnyValue;
+    cmsUInt32Number i, Count, Length;
+    cmsUInt32Number DirectoryPos, CurrentPos, BaseOffset;
+   _cmsDICarray a;
+
+    if (hDict == NULL) return FALSE;
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Let's inspect the dictionary
+    Count = 0; AnyName = FALSE; AnyValue = FALSE;
+    for (p = cmsDictGetEntryList(hDict); p != NULL; p = cmsDictNextEntry(p)) {
+
+        if (p ->DisplayName != NULL) AnyName = TRUE;
+        if (p ->DisplayValue != NULL) AnyValue = TRUE;
+        Count++;
+    }
+
+    Length = 16;
+    if (AnyName)  Length += 8;
+    if (AnyValue) Length += 8;
+
+    if (!_cmsWriteUInt32Number(io, Count)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, Length)) return FALSE;
+
+    // Keep starting position of offsets table
+    DirectoryPos = io ->Tell(io);
+
+    // Allocate offsets array
+    if (!AllocArray(self ->ContextID, &a, Count, Length)) goto Error;
+
+    // Write a fake directory to be filled latter on
+    if (!WriteOffsetArray(io, &a, Count, Length)) goto Error;
+
+    // Write each element. Keep track of the size as well.
+    p = cmsDictGetEntryList(hDict);
+    for (i=0; i < Count; i++) {
+
+        if (!WriteOneWChar(io, &a.Name, i,  p ->Name, BaseOffset)) goto Error;
+        if (!WriteOneWChar(io, &a.Value, i, p ->Value, BaseOffset)) goto Error;
+
+        if (p ->DisplayName != NULL) {
+            if (!WriteOneMLUC(self, io, &a.DisplayName, i, p ->DisplayName, BaseOffset)) goto Error;
+        }
+
+        if (p ->DisplayValue != NULL) {
+            if (!WriteOneMLUC(self, io, &a.DisplayValue, i, p ->DisplayValue, BaseOffset)) goto Error;
+        }
+
+       p = cmsDictNextEntry(p);
+    }
+
+    // Write the directory
+    CurrentPos = io ->Tell(io);
+    if (!io ->Seek(io, DirectoryPos)) goto Error;
+
+    if (!WriteOffsetArray(io, &a, Count, Length)) goto Error;
+
+    if (!io ->Seek(io, CurrentPos)) goto Error;
+
+    FreeArray(&a);
+    return TRUE;
+
+Error:
+    FreeArray(&a);
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_Dictionary_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*)  cmsDictDup((cmsHANDLE) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_Dictionary_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsDictFree((cmsHANDLE) Ptr);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type support main routines
+// ********************************************************************************
+
+
+// This is the list of built-in types
+static const _cmsTagTypeLinkedList SupportedTagTypes[] = {
+
+{TYPE_HANDLER(cmsSigChromaticityType,          Chromaticity),       (_cmsTagTypeLinkedList*) &SupportedTagTypes[1] },
+{TYPE_HANDLER(cmsSigColorantOrderType,         ColorantOrderType),  (_cmsTagTypeLinkedList*) &SupportedTagTypes[2] },
+{TYPE_HANDLER(cmsSigS15Fixed16ArrayType,       S15Fixed16),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[3] },
+{TYPE_HANDLER(cmsSigU16Fixed16ArrayType,       U16Fixed16),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[4] },
+{TYPE_HANDLER(cmsSigTextType,                  Text),               (_cmsTagTypeLinkedList*) &SupportedTagTypes[5] },
+{TYPE_HANDLER(cmsSigTextDescriptionType,       Text_Description),   (_cmsTagTypeLinkedList*) &SupportedTagTypes[6] },
+{TYPE_HANDLER(cmsSigCurveType,                 Curve),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[7] },
+{TYPE_HANDLER(cmsSigParametricCurveType,       ParametricCurve),    (_cmsTagTypeLinkedList*) &SupportedTagTypes[8] },
+{TYPE_HANDLER(cmsSigDateTimeType,              DateTime),           (_cmsTagTypeLinkedList*) &SupportedTagTypes[9] },
+{TYPE_HANDLER(cmsSigLut8Type,                  LUT8),               (_cmsTagTypeLinkedList*) &SupportedTagTypes[10] },
+{TYPE_HANDLER(cmsSigLut16Type,                 LUT16),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[11] },
+{TYPE_HANDLER(cmsSigColorantTableType,         ColorantTable),      (_cmsTagTypeLinkedList*) &SupportedTagTypes[12] },
+{TYPE_HANDLER(cmsSigNamedColor2Type,           NamedColor),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[13] },
+{TYPE_HANDLER(cmsSigMultiLocalizedUnicodeType, MLU),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[14] },
+{TYPE_HANDLER(cmsSigProfileSequenceDescType,   ProfileSequenceDesc),(_cmsTagTypeLinkedList*) &SupportedTagTypes[15] },
+{TYPE_HANDLER(cmsSigSignatureType,             Signature),          (_cmsTagTypeLinkedList*) &SupportedTagTypes[16] },
+{TYPE_HANDLER(cmsSigMeasurementType,           Measurement),        (_cmsTagTypeLinkedList*) &SupportedTagTypes[17] },
+{TYPE_HANDLER(cmsSigDataType,                  Data),               (_cmsTagTypeLinkedList*) &SupportedTagTypes[18] },
+{TYPE_HANDLER(cmsSigLutAtoBType,               LUTA2B),             (_cmsTagTypeLinkedList*) &SupportedTagTypes[19] },
+{TYPE_HANDLER(cmsSigLutBtoAType,               LUTB2A),             (_cmsTagTypeLinkedList*) &SupportedTagTypes[20] },
+{TYPE_HANDLER(cmsSigUcrBgType,                 UcrBg),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[21] },
+{TYPE_HANDLER(cmsSigCrdInfoType,               CrdInfo),            (_cmsTagTypeLinkedList*) &SupportedTagTypes[22] },
+{TYPE_HANDLER(cmsSigMultiProcessElementType,   MPE),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[23] },
+{TYPE_HANDLER(cmsSigScreeningType,             Screening),          (_cmsTagTypeLinkedList*) &SupportedTagTypes[24] },
+{TYPE_HANDLER(cmsSigViewingConditionsType,     ViewingConditions),  (_cmsTagTypeLinkedList*) &SupportedTagTypes[25] },
+{TYPE_HANDLER(cmsSigXYZType,                   XYZ),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[26] },
+{TYPE_HANDLER(cmsCorbisBrokenXYZtype,          XYZ),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[27] },
+{TYPE_HANDLER(cmsMonacoBrokenCurveType,        Curve),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[28] },
+{TYPE_HANDLER(cmsSigProfileSequenceIdType,     ProfileSequenceId),  (_cmsTagTypeLinkedList*) &SupportedTagTypes[29] },
+{TYPE_HANDLER(cmsSigDictType,                  Dictionary),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[30] },
+{TYPE_HANDLER(cmsSigVcgtType,                  vcgt),                NULL }
+};
+
+
+_cmsTagTypePluginChunkType _cmsTagTypePluginChunk = { NULL };
+
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupTagTypeList(struct _cmsContext_struct* ctx, 
+                    const struct _cmsContext_struct* src, 
+                    int loc)
+{
+   _cmsTagTypePluginChunkType newHead = { NULL };
+   _cmsTagTypeLinkedList*  entry;
+   _cmsTagTypeLinkedList*  Anterior = NULL;
+   _cmsTagTypePluginChunkType* head = (_cmsTagTypePluginChunkType*) src->chunks[loc];
+
+   // Walk the list copying all nodes
+   for (entry = head->TagTypes;
+       entry != NULL;
+       entry = entry ->Next) {
+
+           _cmsTagTypeLinkedList *newEntry = ( _cmsTagTypeLinkedList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsTagTypeLinkedList));
+
+           if (newEntry == NULL) 
+               return;
+
+           // We want to keep the linked list order, so this is a little bit tricky
+           newEntry -> Next = NULL;
+           if (Anterior)
+               Anterior -> Next = newEntry;
+
+           Anterior = newEntry;
+
+           if (newHead.TagTypes == NULL)
+               newHead.TagTypes = newEntry;
+   }
+
+   ctx ->chunks[loc] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsTagTypePluginChunkType));
+}
+
+
+void _cmsAllocTagTypePluginChunk(struct _cmsContext_struct* ctx, 
+                                 const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+        
+        // Duplicate the LIST
+        DupTagTypeList(ctx, src, TagTypePlugin);
+    }
+    else {
+        static _cmsTagTypePluginChunkType TagTypePluginChunk = { NULL };
+        ctx ->chunks[TagTypePlugin] = _cmsSubAllocDup(ctx ->MemPool, &TagTypePluginChunk, sizeof(_cmsTagTypePluginChunkType));
+    }
+}
+
+void _cmsAllocMPETypePluginChunk(struct _cmsContext_struct* ctx, 
+                               const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+        
+        // Duplicate the LIST
+        DupTagTypeList(ctx, src, MPEPlugin);
+    }
+    else {
+        static _cmsTagTypePluginChunkType TagTypePluginChunk = { NULL };
+        ctx ->chunks[MPEPlugin] = _cmsSubAllocDup(ctx ->MemPool, &TagTypePluginChunk, sizeof(_cmsTagTypePluginChunkType));
+    }
+
+}
+
+
+// Both kind of plug-ins share same structure
+cmsBool  _cmsRegisterTagTypePlugin(cmsContext id, cmsPluginBase* Data)
+{
+    return RegisterTypesPlugin(id, Data, TagTypePlugin);
+}
+
+cmsBool  _cmsRegisterMultiProcessElementPlugin(cmsContext id, cmsPluginBase* Data)
+{
+    return RegisterTypesPlugin(id, Data,MPEPlugin);
+}
+
+
+// Wrapper for tag types
+cmsTagTypeHandler* _cmsGetTagTypeHandler(cmsContext ContextID, cmsTagTypeSignature sig)
+{
+    _cmsTagTypePluginChunkType* ctx = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(ContextID, TagTypePlugin);
+
+    return GetHandler(sig, ctx->TagTypes, (_cmsTagTypeLinkedList*) SupportedTagTypes);
+}
+
+// ********************************************************************************
+// Tag support main routines
+// ********************************************************************************
+
+typedef struct _cmsTagLinkedList_st {
+
+            cmsTagSignature Signature;
+            cmsTagDescriptor Descriptor;
+            struct _cmsTagLinkedList_st* Next;
+
+} _cmsTagLinkedList;
+
+// This is the list of built-in tags. The data of this list can be modified by plug-ins
+static _cmsTagLinkedList SupportedTags[] = {
+
+    { cmsSigAToB0Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutAtoBType, cmsSigLut8Type}, DecideLUTtypeA2B}, &SupportedTags[1]},
+    { cmsSigAToB1Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutAtoBType, cmsSigLut8Type}, DecideLUTtypeA2B}, &SupportedTags[2]},
+    { cmsSigAToB2Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutAtoBType, cmsSigLut8Type}, DecideLUTtypeA2B}, &SupportedTags[3]},
+    { cmsSigBToA0Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutBtoAType, cmsSigLut8Type}, DecideLUTtypeB2A}, &SupportedTags[4]},
+    { cmsSigBToA1Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutBtoAType, cmsSigLut8Type}, DecideLUTtypeB2A}, &SupportedTags[5]},
+    { cmsSigBToA2Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutBtoAType, cmsSigLut8Type}, DecideLUTtypeB2A}, &SupportedTags[6]},
+
+    // Allow corbis  and its broken XYZ type
+    { cmsSigRedColorantTag,         { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, DecideXYZtype}, &SupportedTags[7]},
+    { cmsSigGreenColorantTag,       { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, DecideXYZtype}, &SupportedTags[8]},
+    { cmsSigBlueColorantTag,        { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, DecideXYZtype}, &SupportedTags[9]},
+
+    { cmsSigRedTRCTag,              { 1, 3, { cmsSigCurveType, cmsSigParametricCurveType, cmsMonacoBrokenCurveType }, DecideCurveType}, &SupportedTags[10]},
+    { cmsSigGreenTRCTag,            { 1, 3, { cmsSigCurveType, cmsSigParametricCurveType, cmsMonacoBrokenCurveType }, DecideCurveType}, &SupportedTags[11]},
+    { cmsSigBlueTRCTag,             { 1, 3, { cmsSigCurveType, cmsSigParametricCurveType, cmsMonacoBrokenCurveType }, DecideCurveType}, &SupportedTags[12]},
+
+    { cmsSigCalibrationDateTimeTag, { 1, 1, { cmsSigDateTimeType }, NULL}, &SupportedTags[13]},
+    { cmsSigCharTargetTag,          { 1, 1, { cmsSigTextType },     NULL}, &SupportedTags[14]},
+
+    { cmsSigChromaticAdaptationTag, { 9, 1, { cmsSigS15Fixed16ArrayType }, NULL}, &SupportedTags[15]},
+    { cmsSigChromaticityTag,        { 1, 1, { cmsSigChromaticityType    }, NULL}, &SupportedTags[16]},
+    { cmsSigColorantOrderTag,       { 1, 1, { cmsSigColorantOrderType   }, NULL}, &SupportedTags[17]},
+    { cmsSigColorantTableTag,       { 1, 1, { cmsSigColorantTableType   }, NULL}, &SupportedTags[18]},
+    { cmsSigColorantTableOutTag,    { 1, 1, { cmsSigColorantTableType   }, NULL}, &SupportedTags[19]},
+
+    { cmsSigCopyrightTag,           { 1, 3, { cmsSigTextType,  cmsSigMultiLocalizedUnicodeType, cmsSigTextDescriptionType}, DecideTextType}, &SupportedTags[20]},
+    { cmsSigDateTimeTag,            { 1, 1, { cmsSigDateTimeType }, NULL}, &SupportedTags[21]},
+
+    { cmsSigDeviceMfgDescTag,       { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[22]},
+    { cmsSigDeviceModelDescTag,     { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[23]},
+
+    { cmsSigGamutTag,               { 1, 3, { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[24]},
+
+    { cmsSigGrayTRCTag,             { 1, 2, { cmsSigCurveType, cmsSigParametricCurveType }, DecideCurveType}, &SupportedTags[25]},
+    { cmsSigLuminanceTag,           { 1, 1, { cmsSigXYZType }, NULL}, &SupportedTags[26]},
+
+    { cmsSigMediaBlackPointTag,     { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, NULL}, &SupportedTags[27]},
+    { cmsSigMediaWhitePointTag,     { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, NULL}, &SupportedTags[28]},
+
+    { cmsSigNamedColor2Tag,         { 1, 1, { cmsSigNamedColor2Type }, NULL}, &SupportedTags[29]},
+
+    { cmsSigPreview0Tag,            { 1, 3,  { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[30]},
+    { cmsSigPreview1Tag,            { 1, 3,  { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[31]},
+    { cmsSigPreview2Tag,            { 1, 3,  { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[32]},
+
+    { cmsSigProfileDescriptionTag,  { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[33]},
+    { cmsSigProfileSequenceDescTag, { 1, 1, { cmsSigProfileSequenceDescType }, NULL},  &SupportedTags[34]},
+    { cmsSigTechnologyTag,          { 1, 1, { cmsSigSignatureType }, NULL},  &SupportedTags[35]},
+
+    { cmsSigColorimetricIntentImageStateTag,   { 1, 1, { cmsSigSignatureType }, NULL}, &SupportedTags[36]},
+    { cmsSigPerceptualRenderingIntentGamutTag, { 1, 1, { cmsSigSignatureType }, NULL}, &SupportedTags[37]},
+    { cmsSigSaturationRenderingIntentGamutTag, { 1, 1, { cmsSigSignatureType }, NULL}, &SupportedTags[38]},
+
+    { cmsSigMeasurementTag,         { 1, 1, { cmsSigMeasurementType }, NULL}, &SupportedTags[39]},
+
+    { cmsSigPs2CRD0Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[40]},
+    { cmsSigPs2CRD1Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[41]},
+    { cmsSigPs2CRD2Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[42]},
+    { cmsSigPs2CRD3Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[43]},
+    { cmsSigPs2CSATag,              { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[44]},
+    { cmsSigPs2RenderingIntentTag,  { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[45]},
+
+    { cmsSigViewingCondDescTag,     { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[46]},
+
+    { cmsSigUcrBgTag,               { 1, 1, { cmsSigUcrBgType}, NULL},    &SupportedTags[47]},
+    { cmsSigCrdInfoTag,             { 1, 1, { cmsSigCrdInfoType}, NULL},  &SupportedTags[48]},
+
+    { cmsSigDToB0Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[49]},
+    { cmsSigDToB1Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[50]},
+    { cmsSigDToB2Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[51]},
+    { cmsSigDToB3Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[52]},
+    { cmsSigBToD0Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[53]},
+    { cmsSigBToD1Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[54]},
+    { cmsSigBToD2Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[55]},
+    { cmsSigBToD3Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[56]},
+
+    { cmsSigScreeningDescTag,       { 1, 1, { cmsSigTextDescriptionType },    NULL}, &SupportedTags[57]},
+    { cmsSigViewingConditionsTag,   { 1, 1, { cmsSigViewingConditionsType },  NULL}, &SupportedTags[58]},
+
+    { cmsSigScreeningTag,           { 1, 1, { cmsSigScreeningType},          NULL }, &SupportedTags[59]},
+    { cmsSigVcgtTag,                { 1, 1, { cmsSigVcgtType},               NULL }, &SupportedTags[60]},
+    { cmsSigMetaTag,                { 1, 1, { cmsSigDictType},               NULL }, &SupportedTags[61]},
+    { cmsSigProfileSequenceIdTag,   { 1, 1, { cmsSigProfileSequenceIdType},  NULL }, &SupportedTags[62]},
+
+    { cmsSigProfileDescriptionMLTag,{ 1, 1, { cmsSigMultiLocalizedUnicodeType}, NULL}, &SupportedTags[63]},
+    { cmsSigArgyllArtsTag,          { 9, 1, { cmsSigS15Fixed16ArrayType},    NULL}, NULL}
+
+};
+
+/*
+    Not supported                 Why
+    =======================       =========================================
+    cmsSigOutputResponseTag   ==> WARNING, POSSIBLE PATENT ON THIS SUBJECT!
+    cmsSigNamedColorTag       ==> Deprecated
+    cmsSigDataTag             ==> Ancient, unused
+    cmsSigDeviceSettingsTag   ==> Deprecated, useless
+*/
+
+
+_cmsTagPluginChunkType _cmsTagPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupTagList(struct _cmsContext_struct* ctx, 
+                    const struct _cmsContext_struct* src)
+{
+   _cmsTagPluginChunkType newHead = { NULL };
+   _cmsTagLinkedList*  entry;
+   _cmsTagLinkedList*  Anterior = NULL;
+   _cmsTagPluginChunkType* head = (_cmsTagPluginChunkType*) src->chunks[TagPlugin];
+
+   // Walk the list copying all nodes
+   for (entry = head->Tag;
+       entry != NULL;
+       entry = entry ->Next) {
+
+           _cmsTagLinkedList *newEntry = ( _cmsTagLinkedList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsTagLinkedList));
+
+           if (newEntry == NULL) 
+               return;
+
+           // We want to keep the linked list order, so this is a little bit tricky
+           newEntry -> Next = NULL;
+           if (Anterior)
+               Anterior -> Next = newEntry;
+
+           Anterior = newEntry;
+
+           if (newHead.Tag == NULL)
+               newHead.Tag = newEntry;
+   }
+
+   ctx ->chunks[TagPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsTagPluginChunkType));
+}
+
+void _cmsAllocTagPluginChunk(struct _cmsContext_struct* ctx, 
+                                 const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+
+        DupTagList(ctx, src);
+    }
+    else {
+        static _cmsTagPluginChunkType TagPluginChunk = { NULL };
+        ctx ->chunks[TagPlugin] = _cmsSubAllocDup(ctx ->MemPool, &TagPluginChunk, sizeof(_cmsTagPluginChunkType));
+    }
+
+}
+
+cmsBool  _cmsRegisterTagPlugin(cmsContext id, cmsPluginBase* Data)
+{
+    cmsPluginTag* Plugin = (cmsPluginTag*) Data;
+    _cmsTagLinkedList *pt;
+    _cmsTagPluginChunkType* TagPluginChunk = ( _cmsTagPluginChunkType*) _cmsContextGetClientChunk(id, TagPlugin);
+
+    if (Data == NULL) {
+
+        TagPluginChunk->Tag = NULL;
+        return TRUE;
+    }
+
+    pt = (_cmsTagLinkedList*) _cmsPluginMalloc(id, sizeof(_cmsTagLinkedList));
+    if (pt == NULL) return FALSE;
+
+    pt ->Signature  = Plugin ->Signature;
+    pt ->Descriptor = Plugin ->Descriptor;
+    pt ->Next       = TagPluginChunk ->Tag;
+
+    TagPluginChunk ->Tag = pt;
+    
+    return TRUE;
+}
+
+// Return a descriptor for a given tag or NULL
+cmsTagDescriptor* _cmsGetTagDescriptor(cmsContext ContextID, cmsTagSignature sig)
+{
+    _cmsTagLinkedList* pt;
+    _cmsTagPluginChunkType* TagPluginChunk = ( _cmsTagPluginChunkType*) _cmsContextGetClientChunk(ContextID, TagPlugin);
+
+    for (pt = TagPluginChunk->Tag;
+             pt != NULL;
+             pt = pt ->Next) {
+
+                if (sig == pt -> Signature) return &pt ->Descriptor;
+    }
+
+    for (pt = SupportedTags;
+            pt != NULL;
+            pt = pt ->Next) {
+
+                if (sig == pt -> Signature) return &pt ->Descriptor;
+    }
+
+    return NULL;
+}
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp
new file mode 100644
index 0000000000..b431478eab
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsvirt.cpp
@@ -0,0 +1,1216 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Virtual (built-in) profiles
+// -----------------------------------------------------------------------------------
+
+static
+cmsBool SetTextTags(cmsHPROFILE hProfile, const wchar_t* Description)
+{
+    cmsMLU *DescriptionMLU, *CopyrightMLU;
+    cmsBool  rc = FALSE;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    DescriptionMLU  = cmsMLUalloc(ContextID, 1);
+    CopyrightMLU    = cmsMLUalloc(ContextID, 1);
+
+    if (DescriptionMLU == NULL || CopyrightMLU == NULL) goto Error;
+
+    if (!cmsMLUsetWide(DescriptionMLU,  "en", "US", Description)) goto Error;
+    if (!cmsMLUsetWide(CopyrightMLU,    "en", "US", L"No copyright, use freely")) goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigProfileDescriptionTag,  DescriptionMLU)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigCopyrightTag,           CopyrightMLU)) goto Error;
+
+    rc = TRUE;
+
+Error:
+
+    if (DescriptionMLU)
+        cmsMLUfree(DescriptionMLU);
+    if (CopyrightMLU)
+        cmsMLUfree(CopyrightMLU);
+    return rc;
+}
+
+
+static
+cmsBool  SetSeqDescTag(cmsHPROFILE hProfile, const char* Model)
+{
+    cmsBool  rc = FALSE;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsSEQ* Seq = cmsAllocProfileSequenceDescription(ContextID, 1);
+
+    if (Seq == NULL) return FALSE;
+
+    Seq->seq[0].deviceMfg = (cmsSignature) 0;
+    Seq->seq[0].deviceModel = (cmsSignature) 0;
+
+#ifdef CMS_DONT_USE_INT64
+    Seq->seq[0].attributes[0] = 0;
+    Seq->seq[0].attributes[1] = 0;
+#else
+    Seq->seq[0].attributes = 0;
+#endif
+
+    Seq->seq[0].technology = (cmsTechnologySignature) 0;
+
+    cmsMLUsetASCII( Seq->seq[0].Manufacturer, cmsNoLanguage, cmsNoCountry, "Little CMS");
+    cmsMLUsetASCII( Seq->seq[0].Model,        cmsNoLanguage, cmsNoCountry, Model);
+
+    if (!_cmsWriteProfileSequence(hProfile, Seq)) goto Error;
+
+    rc = TRUE;
+
+Error:
+    if (Seq)
+        cmsFreeProfileSequenceDescription(Seq);
+
+    return rc;
+}
+
+
+
+// This function creates a profile based on White point, primaries and
+// transfer functions.
+cmsHPROFILE CMSEXPORT cmsCreateRGBProfileTHR(cmsContext ContextID,
+                                          const cmsCIExyY* WhitePoint,
+                                          const cmsCIExyYTRIPLE* Primaries,
+                                          cmsToneCurve* const TransferFunction[3])
+{
+    cmsHPROFILE hICC;
+    cmsMAT3 MColorants;
+    cmsCIEXYZTRIPLE Colorants;
+    cmsCIExyY MaxWhite;
+    cmsMAT3 CHAD;
+    cmsCIEXYZ WhitePointXYZ;
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigDisplayClass);
+    cmsSetColorSpace(hICC,       cmsSigRgbData);
+    cmsSetPCS(hICC,              cmsSigXYZData);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+
+    // Implement profile using following tags:
+    //
+    //  1 cmsSigProfileDescriptionTag
+    //  2 cmsSigMediaWhitePointTag
+    //  3 cmsSigRedColorantTag
+    //  4 cmsSigGreenColorantTag
+    //  5 cmsSigBlueColorantTag
+    //  6 cmsSigRedTRCTag
+    //  7 cmsSigGreenTRCTag
+    //  8 cmsSigBlueTRCTag
+    //  9 Chromatic adaptation Tag
+    // This conforms a standard RGB DisplayProfile as says ICC, and then I add (As per addendum II)
+    // 10 cmsSigChromaticityTag
+
+
+    if (!SetTextTags(hICC, L"RGB built-in")) goto Error;
+
+    if (WhitePoint) {
+
+        if (!cmsWriteTag(hICC, cmsSigMediaWhitePointTag, cmsD50_XYZ())) goto Error;
+
+        cmsxyY2XYZ(&WhitePointXYZ, WhitePoint);
+        _cmsAdaptationMatrix(&CHAD, NULL, &WhitePointXYZ, cmsD50_XYZ());
+
+        // This is a V4 tag, but many CMM does read and understand it no matter which version
+        if (!cmsWriteTag(hICC, cmsSigChromaticAdaptationTag, (void*) &CHAD)) goto Error;
+    }
+
+    if (WhitePoint && Primaries) {
+
+        MaxWhite.x =  WhitePoint -> x;
+        MaxWhite.y =  WhitePoint -> y;
+        MaxWhite.Y =  1.0;
+
+        if (!_cmsBuildRGB2XYZtransferMatrix(&MColorants, &MaxWhite, Primaries)) goto Error;
+
+        Colorants.Red.X   = MColorants.v[0].n[0];
+        Colorants.Red.Y   = MColorants.v[1].n[0];
+        Colorants.Red.Z   = MColorants.v[2].n[0];
+
+        Colorants.Green.X = MColorants.v[0].n[1];
+        Colorants.Green.Y = MColorants.v[1].n[1];
+        Colorants.Green.Z = MColorants.v[2].n[1];
+
+        Colorants.Blue.X  = MColorants.v[0].n[2];
+        Colorants.Blue.Y  = MColorants.v[1].n[2];
+        Colorants.Blue.Z  = MColorants.v[2].n[2];
+
+        if (!cmsWriteTag(hICC, cmsSigRedColorantTag,   (void*) &Colorants.Red)) goto Error;
+        if (!cmsWriteTag(hICC, cmsSigBlueColorantTag,  (void*) &Colorants.Blue)) goto Error;
+        if (!cmsWriteTag(hICC, cmsSigGreenColorantTag, (void*) &Colorants.Green)) goto Error;
+    }
+
+
+    if (TransferFunction) {
+
+        // Tries to minimize space. Thanks to Richard Hughes for this nice idea         
+        if (!cmsWriteTag(hICC, cmsSigRedTRCTag,   (void*) TransferFunction[0])) goto Error;
+
+        if (TransferFunction[1] == TransferFunction[0]) {
+
+            if (!cmsLinkTag (hICC, cmsSigGreenTRCTag, cmsSigRedTRCTag)) goto Error;
+
+        } else {
+
+            if (!cmsWriteTag(hICC, cmsSigGreenTRCTag, (void*) TransferFunction[1])) goto Error;
+        }
+
+        if (TransferFunction[2] == TransferFunction[0]) {
+
+            if (!cmsLinkTag (hICC, cmsSigBlueTRCTag, cmsSigRedTRCTag)) goto Error;
+
+        } else {
+
+            if (!cmsWriteTag(hICC, cmsSigBlueTRCTag, (void*) TransferFunction[2])) goto Error;
+        }
+    }
+
+    if (Primaries) {
+        if (!cmsWriteTag(hICC, cmsSigChromaticityTag, (void*) Primaries)) goto Error;
+    }
+
+
+    return hICC;
+
+Error:
+    if (hICC)
+        cmsCloseProfile(hICC);
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateRGBProfile(const cmsCIExyY* WhitePoint,
+                                          const cmsCIExyYTRIPLE* Primaries,
+                                          cmsToneCurve* const TransferFunction[3])
+{
+    return cmsCreateRGBProfileTHR(NULL, WhitePoint, Primaries, TransferFunction);
+}
+
+
+
+// This function creates a profile based on White point and transfer function.
+cmsHPROFILE CMSEXPORT cmsCreateGrayProfileTHR(cmsContext ContextID,
+                                           const cmsCIExyY* WhitePoint,
+                                           const cmsToneCurve* TransferFunction)
+{
+    cmsHPROFILE hICC;
+    cmsCIEXYZ tmp;
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigDisplayClass);
+    cmsSetColorSpace(hICC,       cmsSigGrayData);
+    cmsSetPCS(hICC,              cmsSigXYZData);
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+
+    // Implement profile using following tags:
+    //
+    //  1 cmsSigProfileDescriptionTag
+    //  2 cmsSigMediaWhitePointTag
+    //  3 cmsSigGrayTRCTag
+
+    // This conforms a standard Gray DisplayProfile
+
+    // Fill-in the tags
+
+    if (!SetTextTags(hICC, L"gray built-in")) goto Error;
+
+
+    if (WhitePoint) {
+
+        cmsxyY2XYZ(&tmp, WhitePoint);
+        if (!cmsWriteTag(hICC, cmsSigMediaWhitePointTag, (void*) &tmp)) goto Error;
+    }
+
+    if (TransferFunction) {
+
+        if (!cmsWriteTag(hICC, cmsSigGrayTRCTag, (void*) TransferFunction)) goto Error;
+    }
+
+    return hICC;
+
+Error:
+    if (hICC)
+        cmsCloseProfile(hICC);
+    return NULL;
+}
+
+
+
+cmsHPROFILE CMSEXPORT cmsCreateGrayProfile(const cmsCIExyY* WhitePoint,
+                                                    const cmsToneCurve* TransferFunction)
+{
+    return cmsCreateGrayProfileTHR(NULL, WhitePoint, TransferFunction);
+}
+
+// This is a devicelink operating in the target colorspace with as many transfer functions as components
+
+cmsHPROFILE CMSEXPORT cmsCreateLinearizationDeviceLinkTHR(cmsContext ContextID,
+                                                          cmsColorSpaceSignature ColorSpace,
+                                                          cmsToneCurve* const TransferFunctions[])
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* Pipeline;
+    cmsUInt32Number nChannels;
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigLinkClass);
+    cmsSetColorSpace(hICC,       ColorSpace);
+    cmsSetPCS(hICC,              ColorSpace);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+    // Set up channels
+    nChannels = cmsChannelsOf(ColorSpace);
+
+    // Creates a Pipeline with prelinearization step only
+    Pipeline = cmsPipelineAlloc(ContextID, nChannels, nChannels);
+    if (Pipeline == NULL) goto Error;
+
+
+    // Copy tables to Pipeline
+    if (!cmsPipelineInsertStage(Pipeline, cmsAT_BEGIN, cmsStageAllocToneCurves(ContextID, nChannels, TransferFunctions)))
+        goto Error;
+
+    // Create tags
+    if (!SetTextTags(hICC, L"Linearization built-in")) goto Error;
+    if (!cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) Pipeline)) goto Error;
+    if (!SetSeqDescTag(hICC, "Linearization built-in")) goto Error;
+
+    // Pipeline is already on virtual profile
+    cmsPipelineFree(Pipeline);
+
+    // Ok, done
+    return hICC;
+
+Error:
+    cmsPipelineFree(Pipeline);
+    if (hICC)
+        cmsCloseProfile(hICC);
+
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateLinearizationDeviceLink(cmsColorSpaceSignature ColorSpace,
+                                                                 cmsToneCurve* const TransferFunctions[])
+{
+    return cmsCreateLinearizationDeviceLinkTHR(NULL, ColorSpace, TransferFunctions);
+}
+
+// Ink-limiting algorithm
+//
+//  Sum = C + M + Y + K
+//  If Sum > InkLimit
+//        Ratio= 1 - (Sum - InkLimit) / (C + M + Y)
+//        if Ratio <0
+//              Ratio=0
+//        endif
+//     Else
+//         Ratio=1
+//     endif
+//
+//     C = Ratio * C
+//     M = Ratio * M
+//     Y = Ratio * Y
+//     K: Does not change
+
+static
+int InkLimitingSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    cmsFloat64Number InkLimit = *(cmsFloat64Number *) Cargo;
+    cmsFloat64Number SumCMY, SumCMYK, Ratio;
+
+    InkLimit = (InkLimit * 655.35);
+
+    SumCMY   = In[0]  + In[1] + In[2];
+    SumCMYK  = SumCMY + In[3];
+
+    if (SumCMYK > InkLimit) {
+
+        Ratio = 1 - ((SumCMYK - InkLimit) / SumCMY);
+        if (Ratio < 0)
+            Ratio = 0;
+    }
+    else Ratio = 1;
+
+    Out[0] = _cmsQuickSaturateWord(In[0] * Ratio);     // C
+    Out[1] = _cmsQuickSaturateWord(In[1] * Ratio);     // M
+    Out[2] = _cmsQuickSaturateWord(In[2] * Ratio);     // Y
+
+    Out[3] = In[3];                                 // K (untouched)
+
+    return TRUE;
+}
+
+// This is a devicelink operating in CMYK for ink-limiting
+
+cmsHPROFILE CMSEXPORT cmsCreateInkLimitingDeviceLinkTHR(cmsContext ContextID,
+                                                     cmsColorSpaceSignature ColorSpace,
+                                                     cmsFloat64Number Limit)
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* LUT;
+    cmsStage* CLUT;
+    cmsUInt32Number nChannels;
+
+    if (ColorSpace != cmsSigCmykData) {
+        cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "InkLimiting: Only CMYK currently supported");
+        return NULL;
+    }
+
+    if (Limit < 0.0 || Limit > 400) {
+
+        cmsSignalError(ContextID, cmsERROR_RANGE, "InkLimiting: Limit should be between 0..400");
+        if (Limit < 0) Limit = 0;
+        if (Limit > 400) Limit = 400;
+
+    }
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigLinkClass);
+    cmsSetColorSpace(hICC,       ColorSpace);
+    cmsSetPCS(hICC,              ColorSpace);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+
+    // Creates a Pipeline with 3D grid only
+    LUT = cmsPipelineAlloc(ContextID, 4, 4);
+    if (LUT == NULL) goto Error;
+
+
+    nChannels = cmsChannelsOf(ColorSpace);
+
+    CLUT = cmsStageAllocCLut16bit(ContextID, 17, nChannels, nChannels, NULL);
+    if (CLUT == NULL) goto Error;
+
+    if (!cmsStageSampleCLut16bit(CLUT, InkLimitingSampler, (void*) &Limit, 0)) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, nChannels)) ||
+        !cmsPipelineInsertStage(LUT, cmsAT_END, CLUT) ||
+        !cmsPipelineInsertStage(LUT, cmsAT_END, _cmsStageAllocIdentityCurves(ContextID, nChannels)))
+        goto Error;
+
+    // Create tags
+    if (!SetTextTags(hICC, L"ink-limiting built-in")) goto Error;
+
+    if (!cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) LUT))  goto Error;
+    if (!SetSeqDescTag(hICC, "ink-limiting built-in")) goto Error;
+
+    // cmsPipeline is already on virtual profile
+    cmsPipelineFree(LUT);
+
+    // Ok, done
+    return hICC;
+
+Error:
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hICC != NULL)
+        cmsCloseProfile(hICC);
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateInkLimitingDeviceLink(cmsColorSpaceSignature ColorSpace, cmsFloat64Number Limit)
+{
+    return cmsCreateInkLimitingDeviceLinkTHR(NULL, ColorSpace, Limit);
+}
+
+
+// Creates a fake Lab identity.
+cmsHPROFILE CMSEXPORT cmsCreateLab2ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+
+    hProfile = cmsCreateRGBProfileTHR(ContextID, WhitePoint == NULL ? cmsD50_xyY() : WhitePoint, NULL, NULL);
+    if (hProfile == NULL) return NULL;
+
+    cmsSetProfileVersion(hProfile, 2.1);
+
+    cmsSetDeviceClass(hProfile, cmsSigAbstractClass);
+    cmsSetColorSpace(hProfile,  cmsSigLabData);
+    cmsSetPCS(hProfile,         cmsSigLabData);
+
+    if (!SetTextTags(hProfile, L"Lab identity built-in")) return NULL;
+
+    // An identity LUT is all we need
+    LUT = cmsPipelineAlloc(ContextID, 3, 3);
+    if (LUT == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCLut(ContextID, 3)))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigAToB0Tag, LUT)) goto Error;
+    cmsPipelineFree(LUT);
+
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+
+cmsHPROFILE CMSEXPORT cmsCreateLab2Profile(const cmsCIExyY* WhitePoint)
+{
+    return cmsCreateLab2ProfileTHR(NULL, WhitePoint);
+}
+
+
+// Creates a fake Lab V4 identity.
+cmsHPROFILE CMSEXPORT cmsCreateLab4ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+
+    hProfile = cmsCreateRGBProfileTHR(ContextID, WhitePoint == NULL ? cmsD50_xyY() : WhitePoint, NULL, NULL);
+    if (hProfile == NULL) return NULL;
+
+    cmsSetProfileVersion(hProfile, 4.3);
+
+    cmsSetDeviceClass(hProfile, cmsSigAbstractClass);
+    cmsSetColorSpace(hProfile,  cmsSigLabData);
+    cmsSetPCS(hProfile,         cmsSigLabData);
+
+    if (!SetTextTags(hProfile, L"Lab identity built-in")) goto Error;
+
+    // An empty LUTs is all we need
+    LUT = cmsPipelineAlloc(ContextID, 3, 3);
+    if (LUT == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, 3)))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigAToB0Tag, LUT)) goto Error;
+    cmsPipelineFree(LUT);
+
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateLab4Profile(const cmsCIExyY* WhitePoint)
+{
+    return cmsCreateLab4ProfileTHR(NULL, WhitePoint);
+}
+
+
+// Creates a fake XYZ identity
+cmsHPROFILE CMSEXPORT cmsCreateXYZProfileTHR(cmsContext ContextID)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+
+    hProfile = cmsCreateRGBProfileTHR(ContextID, cmsD50_xyY(), NULL, NULL);
+    if (hProfile == NULL) return NULL;
+
+    cmsSetProfileVersion(hProfile, 4.3);
+
+    cmsSetDeviceClass(hProfile, cmsSigAbstractClass);
+    cmsSetColorSpace(hProfile,  cmsSigXYZData);
+    cmsSetPCS(hProfile,         cmsSigXYZData);
+
+    if (!SetTextTags(hProfile, L"XYZ identity built-in")) goto Error;
+
+    // An identity LUT is all we need
+    LUT = cmsPipelineAlloc(ContextID, 3, 3);
+    if (LUT == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, 3)))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigAToB0Tag, LUT)) goto Error;
+    cmsPipelineFree(LUT);
+
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+
+cmsHPROFILE CMSEXPORT cmsCreateXYZProfile(void)
+{
+    return cmsCreateXYZProfileTHR(NULL);
+}
+
+
+//sRGB Curves are defined by:
+//
+//If  R'sRGB,G'sRGB, B'sRGB < 0.04045
+//
+//    R =  R'sRGB / 12.92
+//    G =  G'sRGB / 12.92
+//    B =  B'sRGB / 12.92
+//
+//
+//else if  R'sRGB,G'sRGB, B'sRGB >= 0.04045
+//
+//    R = ((R'sRGB + 0.055) / 1.055)^2.4
+//    G = ((G'sRGB + 0.055) / 1.055)^2.4
+//    B = ((B'sRGB + 0.055) / 1.055)^2.4
+
+static
+cmsToneCurve* Build_sRGBGamma(cmsContext ContextID)
+{
+    cmsFloat64Number Parameters[5];
+
+    Parameters[0] = 2.4;
+    Parameters[1] = 1. / 1.055;
+    Parameters[2] = 0.055 / 1.055;
+    Parameters[3] = 1. / 12.92;
+    Parameters[4] = 0.04045;
+
+    return cmsBuildParametricToneCurve(ContextID, 4, Parameters);
+}
+
+// Create the ICC virtual profile for sRGB space
+cmsHPROFILE CMSEXPORT cmsCreate_sRGBProfileTHR(cmsContext ContextID)
+{
+       cmsCIExyY       D65 = { 0.3127, 0.3290, 1.0 };
+       cmsCIExyYTRIPLE Rec709Primaries = {
+                                   {0.6400, 0.3300, 1.0},
+                                   {0.3000, 0.6000, 1.0},
+                                   {0.1500, 0.0600, 1.0}
+                                   };
+       cmsToneCurve* Gamma22[3];
+       cmsHPROFILE  hsRGB;
+
+      // cmsWhitePointFromTemp(&D65, 6504);
+       Gamma22[0] = Gamma22[1] = Gamma22[2] = Build_sRGBGamma(ContextID);
+       if (Gamma22[0] == NULL) return NULL;
+
+       hsRGB = cmsCreateRGBProfileTHR(ContextID, &D65, &Rec709Primaries, Gamma22);
+       cmsFreeToneCurve(Gamma22[0]);
+       if (hsRGB == NULL) return NULL;
+
+       if (!SetTextTags(hsRGB, L"sRGB built-in")) {
+           cmsCloseProfile(hsRGB);
+           return NULL;
+       }
+
+       return hsRGB;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreate_sRGBProfile(void)
+{
+    return cmsCreate_sRGBProfileTHR(NULL);
+}
+
+
+
+typedef struct {
+                cmsFloat64Number Brightness;
+                cmsFloat64Number Contrast;
+                cmsFloat64Number Hue;
+                cmsFloat64Number Saturation;
+                cmsBool          lAdjustWP;
+                cmsCIEXYZ WPsrc, WPdest;
+
+} BCHSWADJUSTS, *LPBCHSWADJUSTS;
+
+
+static
+int bchswSampler(CMSREGISTER const cmsUInt16Number In[], CMSREGISTER cmsUInt16Number Out[], CMSREGISTER void* Cargo)
+{
+    cmsCIELab LabIn, LabOut;
+    cmsCIELCh LChIn, LChOut;
+    cmsCIEXYZ XYZ;
+    LPBCHSWADJUSTS bchsw = (LPBCHSWADJUSTS) Cargo;
+
+
+    cmsLabEncoded2Float(&LabIn, In);
+
+
+    cmsLab2LCh(&LChIn, &LabIn);
+
+    // Do some adjusts on LCh
+
+    LChOut.L = LChIn.L * bchsw ->Contrast + bchsw ->Brightness;
+    LChOut.C = LChIn.C + bchsw -> Saturation;
+    LChOut.h = LChIn.h + bchsw -> Hue;
+
+
+    cmsLCh2Lab(&LabOut, &LChOut);
+
+    // Move white point in Lab
+    if (bchsw->lAdjustWP) {
+           cmsLab2XYZ(&bchsw->WPsrc, &XYZ, &LabOut);
+           cmsXYZ2Lab(&bchsw->WPdest, &LabOut, &XYZ);
+    }
+
+    // Back to encoded
+
+    cmsFloat2LabEncoded(Out, &LabOut);
+
+    return TRUE;
+}
+
+
+// Creates an abstract profile operating in Lab space for Brightness,
+// contrast, Saturation and white point displacement
+
+cmsHPROFILE CMSEXPORT cmsCreateBCHSWabstractProfileTHR(cmsContext ContextID,
+                                                       cmsUInt32Number nLUTPoints,
+                                                       cmsFloat64Number Bright,
+                                                       cmsFloat64Number Contrast,
+                                                       cmsFloat64Number Hue,
+                                                       cmsFloat64Number Saturation,
+                                                       cmsUInt32Number TempSrc,
+                                                       cmsUInt32Number TempDest)
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* Pipeline;
+    BCHSWADJUSTS bchsw;
+    cmsCIExyY WhitePnt;
+    cmsStage* CLUT;
+    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+    cmsUInt32Number i;
+
+    bchsw.Brightness = Bright;
+    bchsw.Contrast   = Contrast;
+    bchsw.Hue        = Hue;
+    bchsw.Saturation = Saturation;
+    if (TempSrc == TempDest) {
+
+           bchsw.lAdjustWP = FALSE;
+    }
+    else {
+           bchsw.lAdjustWP = TRUE;
+           cmsWhitePointFromTemp(&WhitePnt, TempSrc);
+           cmsxyY2XYZ(&bchsw.WPsrc, &WhitePnt);
+           cmsWhitePointFromTemp(&WhitePnt, TempDest);
+           cmsxyY2XYZ(&bchsw.WPdest, &WhitePnt);
+     
+    }
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetDeviceClass(hICC,      cmsSigAbstractClass);
+    cmsSetColorSpace(hICC,       cmsSigLabData);
+    cmsSetPCS(hICC,              cmsSigLabData);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+    // Creates a Pipeline with 3D grid only
+    Pipeline = cmsPipelineAlloc(ContextID, 3, 3);
+    if (Pipeline == NULL) {
+        cmsCloseProfile(hICC);
+        return NULL;
+    }
+
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++) Dimensions[i] = nLUTPoints;
+    CLUT = cmsStageAllocCLut16bitGranular(ContextID, Dimensions, 3, 3, NULL);
+    if (CLUT == NULL) goto Error;
+
+
+    if (!cmsStageSampleCLut16bit(CLUT, bchswSampler, (void*) &bchsw, 0)) {
+
+        // Shouldn't reach here
+        goto Error;
+    }
+
+    if (!cmsPipelineInsertStage(Pipeline, cmsAT_END, CLUT)) {
+        goto Error;
+    }
+
+    // Create tags
+    if (!SetTextTags(hICC, L"BCHS built-in")) return NULL;
+
+    cmsWriteTag(hICC, cmsSigMediaWhitePointTag, (void*) cmsD50_XYZ());
+
+    cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) Pipeline);
+
+    // Pipeline is already on virtual profile
+    cmsPipelineFree(Pipeline);
+
+    // Ok, done
+    return hICC;
+
+Error:
+    cmsPipelineFree(Pipeline);
+    cmsCloseProfile(hICC);
+    return NULL;
+}
+
+
+CMSAPI cmsHPROFILE   CMSEXPORT cmsCreateBCHSWabstractProfile(cmsUInt32Number nLUTPoints,
+                                                             cmsFloat64Number Bright,
+                                                             cmsFloat64Number Contrast,
+                                                             cmsFloat64Number Hue,
+                                                             cmsFloat64Number Saturation,
+                                                             cmsUInt32Number TempSrc,
+                                                             cmsUInt32Number TempDest)
+{
+    return cmsCreateBCHSWabstractProfileTHR(NULL, nLUTPoints, Bright, Contrast, Hue, Saturation, TempSrc, TempDest);
+}
+
+
+// Creates a fake NULL profile. This profile return 1 channel as always 0.
+// Is useful only for gamut checking tricks
+cmsHPROFILE CMSEXPORT cmsCreateNULLProfileTHR(cmsContext ContextID)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+    cmsStage* PostLin;
+    cmsStage* OutLin;
+    cmsToneCurve* EmptyTab[3];
+    cmsUInt16Number Zero[2] = { 0, 0 };
+    const cmsFloat64Number PickLstarMatrix[] = { 1, 0, 0 };
+
+    hProfile = cmsCreateProfilePlaceholder(ContextID);
+    if (!hProfile)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hProfile, 4.3);
+
+    if (!SetTextTags(hProfile, L"NULL profile built-in")) goto Error;
+
+
+    cmsSetDeviceClass(hProfile, cmsSigOutputClass);
+    cmsSetColorSpace(hProfile,  cmsSigGrayData);
+    cmsSetPCS(hProfile,         cmsSigLabData);
+
+    // Create a valid ICC 4 structure
+    LUT = cmsPipelineAlloc(ContextID, 3, 1);
+    if (LUT == NULL) goto Error;
+    
+    EmptyTab[0] = EmptyTab[1] = EmptyTab[2] = cmsBuildTabulatedToneCurve16(ContextID, 2, Zero);
+    PostLin = cmsStageAllocToneCurves(ContextID, 3, EmptyTab);
+    OutLin  = cmsStageAllocToneCurves(ContextID, 1, EmptyTab);
+    cmsFreeToneCurve(EmptyTab[0]);
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_END, PostLin))
+        goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_END, cmsStageAllocMatrix(ContextID, 1, 3, PickLstarMatrix, NULL)))
+        goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_END, OutLin))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigBToA0Tag, (void*) LUT)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigMediaWhitePointTag, cmsD50_XYZ())) goto Error;
+
+    cmsPipelineFree(LUT);
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateNULLProfile(void)
+{
+    return cmsCreateNULLProfileTHR(NULL);
+}
+
+
+static
+int IsPCS(cmsColorSpaceSignature ColorSpace)
+{
+    return (ColorSpace == cmsSigXYZData ||
+            ColorSpace == cmsSigLabData);
+}
+
+
+static
+void FixColorSpaces(cmsHPROFILE hProfile,
+                              cmsColorSpaceSignature ColorSpace,
+                              cmsColorSpaceSignature PCS,
+                              cmsUInt32Number dwFlags)
+{
+    if (dwFlags & cmsFLAGS_GUESSDEVICECLASS) {
+
+            if (IsPCS(ColorSpace) && IsPCS(PCS)) {
+
+                    cmsSetDeviceClass(hProfile,      cmsSigAbstractClass);
+                    cmsSetColorSpace(hProfile,       ColorSpace);
+                    cmsSetPCS(hProfile,              PCS);
+                    return;
+            }
+
+            if (IsPCS(ColorSpace) && !IsPCS(PCS)) {
+
+                    cmsSetDeviceClass(hProfile, cmsSigOutputClass);
+                    cmsSetPCS(hProfile,         ColorSpace);
+                    cmsSetColorSpace(hProfile,  PCS);
+                    return;
+            }
+
+            if (IsPCS(PCS) && !IsPCS(ColorSpace)) {
+
+                   cmsSetDeviceClass(hProfile,  cmsSigInputClass);
+                   cmsSetColorSpace(hProfile,   ColorSpace);
+                   cmsSetPCS(hProfile,          PCS);
+                   return;
+            }
+    }
+
+    cmsSetDeviceClass(hProfile,      cmsSigLinkClass);
+    cmsSetColorSpace(hProfile,       ColorSpace);
+    cmsSetPCS(hProfile,              PCS);
+}
+
+
+
+// This function creates a named color profile dumping all the contents of transform to a single profile
+// In this way, LittleCMS may be used to "group" several named color databases into a single profile.
+// It has, however, several minor limitations. PCS is always Lab, which is not very critic since this
+// is the normal PCS for named color profiles.
+static
+cmsHPROFILE CreateNamedColorDevicelink(cmsHTRANSFORM xform)
+{
+    _cmsTRANSFORM* v = (_cmsTRANSFORM*) xform;
+    cmsHPROFILE hICC = NULL;
+    cmsUInt32Number i, nColors;
+    cmsNAMEDCOLORLIST *nc2 = NULL, *Original = NULL;
+
+    // Create an empty placeholder
+    hICC = cmsCreateProfilePlaceholder(v->ContextID);
+    if (hICC == NULL) return NULL;
+
+    // Critical information
+    cmsSetDeviceClass(hICC, cmsSigNamedColorClass);
+    cmsSetColorSpace(hICC, v ->ExitColorSpace);
+    cmsSetPCS(hICC, cmsSigLabData);
+
+    // Tag profile with information
+    if (!SetTextTags(hICC, L"Named color devicelink")) goto Error;
+
+    Original = cmsGetNamedColorList(xform);
+    if (Original == NULL) goto Error;
+
+    nColors = cmsNamedColorCount(Original);
+    nc2     = cmsDupNamedColorList(Original);
+    if (nc2 == NULL) goto Error;
+
+    // Colorant count now depends on the output space
+    nc2 ->ColorantCount = cmsPipelineOutputChannels(v ->Lut);
+
+    // Make sure we have proper formatters
+    cmsChangeBuffersFormat(xform, TYPE_NAMED_COLOR_INDEX,
+        FLOAT_SH(0) | COLORSPACE_SH(_cmsLCMScolorSpace(v ->ExitColorSpace))
+        | BYTES_SH(2) | CHANNELS_SH(cmsChannelsOf(v ->ExitColorSpace)));
+
+    // Apply the transfor to colorants.
+    for (i=0; i < nColors; i++) {
+        cmsDoTransform(xform, &i, nc2 ->List[i].DeviceColorant, 1);
+    }
+
+    if (!cmsWriteTag(hICC, cmsSigNamedColor2Tag, (void*) nc2)) goto Error;
+    cmsFreeNamedColorList(nc2);
+
+    return hICC;
+
+Error:
+    if (hICC != NULL) cmsCloseProfile(hICC);
+    return NULL;
+}
+
+
+// This structure holds information about which MPU can be stored on a profile based on the version
+
+typedef struct {
+    cmsBool              IsV4;             // Is a V4 tag?
+    cmsTagSignature      RequiredTag;      // Set to 0 for both types
+    cmsTagTypeSignature  LutType;          // The LUT type
+    int                  nTypes;           // Number of types (up to 5)
+    cmsStageSignature    MpeTypes[5];      // 5 is the maximum number
+
+} cmsAllowedLUT;
+
+#define cmsSig0 ((cmsTagSignature) 0) 
+
+static const cmsAllowedLUT AllowedLUTTypes[] = {
+
+    { FALSE, cmsSig0,        cmsSigLut16Type, 4, { cmsSigMatrixElemType, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType } },
+    { FALSE, cmsSig0,        cmsSigLut16Type, 3, { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType } },
+    { FALSE, cmsSig0,        cmsSigLut16Type, 2, { cmsSigCurveSetElemType, cmsSigCLutElemType } },
+    { TRUE,  cmsSig0,        cmsSigLutAtoBType, 1, { cmsSigCurveSetElemType } },
+    { TRUE , cmsSigAToB0Tag, cmsSigLutAtoBType,  3,  { cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType } },
+    { TRUE , cmsSigAToB0Tag, cmsSigLutAtoBType,  3,  { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType   } },
+    { TRUE , cmsSigAToB0Tag, cmsSigLutAtoBType,  5,  { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  1,  { cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  3,  { cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  3,  { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  5,  { cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType }}
+};
+
+#define SIZE_OF_ALLOWED_LUT (sizeof(AllowedLUTTypes)/sizeof(cmsAllowedLUT))
+
+// Check a single entry
+static
+cmsBool CheckOne(const cmsAllowedLUT* Tab, const cmsPipeline* Lut)
+{
+    cmsStage* mpe;
+    int n;
+
+    for (n=0, mpe = Lut ->Elements; mpe != NULL; mpe = mpe ->Next, n++) {
+
+        if (n > Tab ->nTypes) return FALSE;
+        if (cmsStageType(mpe) != Tab ->MpeTypes[n]) return FALSE;
+    }
+
+    return (n == Tab ->nTypes);
+}
+
+
+static
+const cmsAllowedLUT* FindCombination(const cmsPipeline* Lut, cmsBool IsV4, cmsTagSignature DestinationTag)
+{
+    cmsUInt32Number n;
+
+    for (n=0; n < SIZE_OF_ALLOWED_LUT; n++) {
+
+        const cmsAllowedLUT* Tab = AllowedLUTTypes + n;
+
+        if (IsV4 ^ Tab -> IsV4) continue;
+        if ((Tab ->RequiredTag != 0) && (Tab ->RequiredTag != DestinationTag)) continue;
+
+        if (CheckOne(Tab, Lut)) return Tab;
+    }
+
+    return NULL;
+}
+
+
+// Does convert a transform into a device link profile
+cmsHPROFILE CMSEXPORT cmsTransform2DeviceLink(cmsHTRANSFORM hTransform, cmsFloat64Number Version, cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hProfile = NULL;
+    cmsUInt32Number FrmIn, FrmOut, ChansIn, ChansOut;
+    int ColorSpaceBitsIn, ColorSpaceBitsOut;
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+    cmsPipeline* LUT = NULL;
+    cmsStage* mpe;
+    cmsContext ContextID = cmsGetTransformContextID(hTransform);
+    const cmsAllowedLUT* AllowedLUT;
+    cmsTagSignature DestinationTag;
+    cmsProfileClassSignature deviceClass; 
+
+    _cmsAssert(hTransform != NULL);
+
+    // Get the first mpe to check for named color
+    mpe = cmsPipelineGetPtrToFirstStage(xform ->Lut);
+
+    // Check if is a named color transform
+    if (mpe != NULL) {
+
+        if (cmsStageType(mpe) == cmsSigNamedColorElemType) {
+            return CreateNamedColorDevicelink(hTransform);
+        }
+    }
+
+    // First thing to do is to get a copy of the transformation
+    LUT = cmsPipelineDup(xform ->Lut);
+    if (LUT == NULL) return NULL;
+
+    // Time to fix the Lab2/Lab4 issue.
+    if ((xform ->EntryColorSpace == cmsSigLabData) && (Version < 4.0)) {
+
+        if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocLabV2ToV4curves(ContextID)))
+            goto Error;
+    }
+
+    // On the output side too
+    if ((xform ->ExitColorSpace) == cmsSigLabData && (Version < 4.0)) {
+
+        if (!cmsPipelineInsertStage(LUT, cmsAT_END, _cmsStageAllocLabV4ToV2(ContextID)))
+            goto Error;
+    }
+
+
+    hProfile = cmsCreateProfilePlaceholder(ContextID);
+    if (!hProfile) goto Error;                    // can't allocate
+
+    cmsSetProfileVersion(hProfile, Version);
+
+    FixColorSpaces(hProfile, xform -> EntryColorSpace, xform -> ExitColorSpace, dwFlags);
+
+    // Optimize the LUT and precalculate a devicelink
+
+    ChansIn  = cmsChannelsOf(xform -> EntryColorSpace);
+    ChansOut = cmsChannelsOf(xform -> ExitColorSpace);
+
+    ColorSpaceBitsIn  = _cmsLCMScolorSpace(xform -> EntryColorSpace);
+    ColorSpaceBitsOut = _cmsLCMScolorSpace(xform -> ExitColorSpace);
+
+    FrmIn  = COLORSPACE_SH(ColorSpaceBitsIn) | CHANNELS_SH(ChansIn)|BYTES_SH(2);
+    FrmOut = COLORSPACE_SH(ColorSpaceBitsOut) | CHANNELS_SH(ChansOut)|BYTES_SH(2);
+
+    deviceClass = cmsGetDeviceClass(hProfile);
+
+     if (deviceClass == cmsSigOutputClass)
+         DestinationTag = cmsSigBToA0Tag;
+     else
+         DestinationTag = cmsSigAToB0Tag;
+
+    // Check if the profile/version can store the result
+    if (dwFlags & cmsFLAGS_FORCE_CLUT)
+        AllowedLUT = NULL;
+    else
+        AllowedLUT = FindCombination(LUT, Version >= 4.0, DestinationTag);
+
+    if (AllowedLUT == NULL) {
+
+        // Try to optimize
+        _cmsOptimizePipeline(ContextID, &LUT, xform ->RenderingIntent, &FrmIn, &FrmOut, &dwFlags);
+        AllowedLUT = FindCombination(LUT, Version >= 4.0, DestinationTag);
+
+    }
+
+    // If no way, then force CLUT that for sure can be written
+    if (AllowedLUT == NULL) {
+
+        cmsStage* FirstStage;
+        cmsStage* LastStage;
+
+        dwFlags |= cmsFLAGS_FORCE_CLUT;
+        _cmsOptimizePipeline(ContextID, &LUT, xform ->RenderingIntent, &FrmIn, &FrmOut, &dwFlags);
+
+        // Put identity curves if needed
+        FirstStage = cmsPipelineGetPtrToFirstStage(LUT);
+        if (FirstStage != NULL && FirstStage ->Type != cmsSigCurveSetElemType)
+             if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, ChansIn)))
+                 goto Error;
+
+        LastStage = cmsPipelineGetPtrToLastStage(LUT);
+        if (LastStage != NULL && LastStage ->Type != cmsSigCurveSetElemType)
+             if (!cmsPipelineInsertStage(LUT, cmsAT_END,   _cmsStageAllocIdentityCurves(ContextID, ChansOut)))
+                 goto Error;
+
+        AllowedLUT = FindCombination(LUT, Version >= 4.0, DestinationTag);
+    }
+
+    // Somethings is wrong...
+    if (AllowedLUT == NULL) {
+        goto Error;
+    }
+
+
+    if (dwFlags & cmsFLAGS_8BITS_DEVICELINK)
+                     cmsPipelineSetSaveAs8bitsFlag(LUT, TRUE);
+
+    // Tag profile with information
+    if (!SetTextTags(hProfile, L"devicelink")) goto Error;
+
+    // Store result
+    if (!cmsWriteTag(hProfile, DestinationTag, LUT)) goto Error;
+
+
+    if (xform -> InputColorant != NULL) {
+           if (!cmsWriteTag(hProfile, cmsSigColorantTableTag, xform->InputColorant)) goto Error;
+    }
+
+    if (xform -> OutputColorant != NULL) {
+           if (!cmsWriteTag(hProfile, cmsSigColorantTableOutTag, xform->OutputColorant)) goto Error;
+    }
+
+    if ((deviceClass == cmsSigLinkClass) && (xform ->Sequence != NULL)) {
+        if (!_cmsWriteProfileSequence(hProfile, xform ->Sequence)) goto Error;
+    }
+
+    // Set the white point
+    if (deviceClass == cmsSigInputClass) {
+        if (!cmsWriteTag(hProfile, cmsSigMediaWhitePointTag, &xform ->EntryWhitePoint)) goto Error;
+    }
+    else {
+         if (!cmsWriteTag(hProfile, cmsSigMediaWhitePointTag, &xform ->ExitWhitePoint)) goto Error;
+    }
+
+  
+    // Per 7.2.15 in spec 4.3
+    cmsSetHeaderRenderingIntent(hProfile, xform ->RenderingIntent);
+
+    cmsPipelineFree(LUT);
+    return hProfile;
+
+Error:
+    if (LUT != NULL) cmsPipelineFree(LUT);
+    cmsCloseProfile(hProfile);
+    return NULL;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp
new file mode 100644
index 0000000000..fab0da2a8c
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmswtpnt.cpp
@@ -0,0 +1,350 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// D50 - Widely used
+const cmsCIEXYZ* CMSEXPORT cmsD50_XYZ(void)
+{
+    static cmsCIEXYZ D50XYZ = {cmsD50X, cmsD50Y, cmsD50Z};
+
+    return &D50XYZ;
+}
+
+const cmsCIExyY* CMSEXPORT cmsD50_xyY(void)
+{
+    static cmsCIExyY D50xyY;
+
+    cmsXYZ2xyY(&D50xyY, cmsD50_XYZ());
+
+    return &D50xyY;
+}
+
+// Obtains WhitePoint from Temperature
+cmsBool  CMSEXPORT cmsWhitePointFromTemp(cmsCIExyY* WhitePoint, cmsFloat64Number TempK)
+{
+    cmsFloat64Number x, y;
+    cmsFloat64Number T, T2, T3;
+    // cmsFloat64Number M1, M2;
+
+    _cmsAssert(WhitePoint != NULL);
+
+    T = TempK;
+    T2 = T*T;            // Square
+    T3 = T2*T;           // Cube
+
+    // For correlated color temperature (T) between 4000K and 7000K:
+
+    if (T >= 4000. && T <= 7000.)
+    {
+        x = -4.6070*(1E9/T3) + 2.9678*(1E6/T2) + 0.09911*(1E3/T) + 0.244063;
+    }
+    else
+        // or for correlated color temperature (T) between 7000K and 25000K:
+
+        if (T > 7000.0 && T <= 25000.0)
+        {
+            x = -2.0064*(1E9/T3) + 1.9018*(1E6/T2) + 0.24748*(1E3/T) + 0.237040;
+        }
+        else {
+            cmsSignalError(0, cmsERROR_RANGE, "cmsWhitePointFromTemp: invalid temp");
+            return FALSE;
+        }
+
+    // Obtain y(x)
+    y = -3.000*(x*x) + 2.870*x - 0.275;
+
+    // wave factors (not used, but here for futures extensions)
+
+    // M1 = (-1.3515 - 1.7703*x + 5.9114 *y)/(0.0241 + 0.2562*x - 0.7341*y);
+    // M2 = (0.0300 - 31.4424*x + 30.0717*y)/(0.0241 + 0.2562*x - 0.7341*y);
+
+    WhitePoint -> x = x;
+    WhitePoint -> y = y;
+    WhitePoint -> Y = 1.0;
+
+    return TRUE;
+}
+
+
+
+typedef struct {
+
+    cmsFloat64Number mirek;  // temp (in microreciprocal kelvin)
+    cmsFloat64Number ut;     // u coord of intersection w/ blackbody locus
+    cmsFloat64Number vt;     // v coord of intersection w/ blackbody locus
+    cmsFloat64Number tt;     // slope of ISOTEMPERATURE. line
+
+    } ISOTEMPERATURE;
+
+static const ISOTEMPERATURE isotempdata[] = {
+//  {Mirek, Ut,       Vt,      Tt      }
+    {0,     0.18006,  0.26352,  -0.24341},
+    {10,    0.18066,  0.26589,  -0.25479},
+    {20,    0.18133,  0.26846,  -0.26876},
+    {30,    0.18208,  0.27119,  -0.28539},
+    {40,    0.18293,  0.27407,  -0.30470},
+    {50,    0.18388,  0.27709,  -0.32675},
+    {60,    0.18494,  0.28021,  -0.35156},
+    {70,    0.18611,  0.28342,  -0.37915},
+    {80,    0.18740,  0.28668,  -0.40955},
+    {90,    0.18880,  0.28997,  -0.44278},
+    {100,   0.19032,  0.29326,  -0.47888},
+    {125,   0.19462,  0.30141,  -0.58204},
+    {150,   0.19962,  0.30921,  -0.70471},
+    {175,   0.20525,  0.31647,  -0.84901},
+    {200,   0.21142,  0.32312,  -1.0182 },
+    {225,   0.21807,  0.32909,  -1.2168 },
+    {250,   0.22511,  0.33439,  -1.4512 },
+    {275,   0.23247,  0.33904,  -1.7298 },
+    {300,   0.24010,  0.34308,  -2.0637 },
+    {325,   0.24702,  0.34655,  -2.4681 },
+    {350,   0.25591,  0.34951,  -2.9641 },
+    {375,   0.26400,  0.35200,  -3.5814 },
+    {400,   0.27218,  0.35407,  -4.3633 },
+    {425,   0.28039,  0.35577,  -5.3762 },
+    {450,   0.28863,  0.35714,  -6.7262 },
+    {475,   0.29685,  0.35823,  -8.5955 },
+    {500,   0.30505,  0.35907,  -11.324 },
+    {525,   0.31320,  0.35968,  -15.628 },
+    {550,   0.32129,  0.36011,  -23.325 },
+    {575,   0.32931,  0.36038,  -40.770 },
+    {600,   0.33724,  0.36051,  -116.45  }
+};
+
+#define NISO sizeof(isotempdata)/sizeof(ISOTEMPERATURE)
+
+
+// Robertson's method
+cmsBool  CMSEXPORT cmsTempFromWhitePoint(cmsFloat64Number* TempK, const cmsCIExyY* WhitePoint)
+{
+    cmsUInt32Number j;
+    cmsFloat64Number us,vs;
+    cmsFloat64Number uj,vj,tj,di,dj,mi,mj;
+    cmsFloat64Number xs, ys;
+
+    _cmsAssert(WhitePoint != NULL);
+    _cmsAssert(TempK != NULL);
+
+    di = mi = 0;
+    xs = WhitePoint -> x;
+    ys = WhitePoint -> y;
+
+    // convert (x,y) to CIE 1960 (u,WhitePoint)
+
+    us = (2*xs) / (-xs + 6*ys + 1.5);
+    vs = (3*ys) / (-xs + 6*ys + 1.5);
+
+
+    for (j=0; j < NISO; j++) {
+
+        uj = isotempdata[j].ut;
+        vj = isotempdata[j].vt;
+        tj = isotempdata[j].tt;
+        mj = isotempdata[j].mirek;
+
+        dj = ((vs - vj) - tj * (us - uj)) / sqrt(1.0 + tj * tj);
+
+        if ((j != 0) && (di/dj < 0.0)) {
+
+            // Found a match
+            *TempK = 1000000.0 / (mi + (di / (di - dj)) * (mj - mi));
+            return TRUE;
+        }
+
+        di = dj;
+        mi = mj;
+    }
+
+    // Not found
+    return FALSE;
+}
+
+
+// Compute chromatic adaptation matrix using Chad as cone matrix
+
+static
+cmsBool ComputeChromaticAdaptation(cmsMAT3* Conversion,
+                                const cmsCIEXYZ* SourceWhitePoint,
+                                const cmsCIEXYZ* DestWhitePoint,
+                                const cmsMAT3* Chad)
+
+{
+
+    cmsMAT3 Chad_Inv;
+    cmsVEC3 ConeSourceXYZ, ConeSourceRGB;
+    cmsVEC3 ConeDestXYZ, ConeDestRGB;
+    cmsMAT3 Cone, Tmp;
+
+
+    Tmp = *Chad;
+    if (!_cmsMAT3inverse(&Tmp, &Chad_Inv)) return FALSE;
+
+    _cmsVEC3init(&ConeSourceXYZ, SourceWhitePoint -> X,
+                             SourceWhitePoint -> Y,
+                             SourceWhitePoint -> Z);
+
+    _cmsVEC3init(&ConeDestXYZ,   DestWhitePoint -> X,
+                             DestWhitePoint -> Y,
+                             DestWhitePoint -> Z);
+
+    _cmsMAT3eval(&ConeSourceRGB, Chad, &ConeSourceXYZ);
+    _cmsMAT3eval(&ConeDestRGB,   Chad, &ConeDestXYZ);
+
+    // Build matrix
+    _cmsVEC3init(&Cone.v[0], ConeDestRGB.n[0]/ConeSourceRGB.n[0],    0.0,  0.0);
+    _cmsVEC3init(&Cone.v[1], 0.0,   ConeDestRGB.n[1]/ConeSourceRGB.n[1],   0.0);
+    _cmsVEC3init(&Cone.v[2], 0.0,   0.0,   ConeDestRGB.n[2]/ConeSourceRGB.n[2]);
+
+
+    // Normalize
+    _cmsMAT3per(&Tmp, &Cone, Chad);
+    _cmsMAT3per(Conversion, &Chad_Inv, &Tmp);
+
+    return TRUE;
+}
+
+// Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
+// The cone matrix can be specified in ConeMatrix. If NULL, Bradford is assumed
+cmsBool  _cmsAdaptationMatrix(cmsMAT3* r, const cmsMAT3* ConeMatrix, const cmsCIEXYZ* FromIll, const cmsCIEXYZ* ToIll)
+{
+    cmsMAT3 LamRigg   = {{ // Bradford matrix
+        {{  0.8951,  0.2664, -0.1614 }},
+        {{ -0.7502,  1.7135,  0.0367 }},
+        {{  0.0389, -0.0685,  1.0296 }}
+    }};
+
+    if (ConeMatrix == NULL)
+        ConeMatrix = &LamRigg;
+
+    return ComputeChromaticAdaptation(r, FromIll, ToIll, ConeMatrix);
+}
+
+// Same as anterior, but assuming D50 destination. White point is given in xyY
+static
+cmsBool _cmsAdaptMatrixToD50(cmsMAT3* r, const cmsCIExyY* SourceWhitePt)
+{
+    cmsCIEXYZ Dn;
+    cmsMAT3 Bradford;
+    cmsMAT3 Tmp;
+
+    cmsxyY2XYZ(&Dn, SourceWhitePt);
+
+    if (!_cmsAdaptationMatrix(&Bradford, NULL, &Dn, cmsD50_XYZ())) return FALSE;
+
+    Tmp = *r;
+    _cmsMAT3per(r, &Bradford, &Tmp);
+
+    return TRUE;
+}
+
+// Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
+// This is just an approximation, I am not handling all the non-linear
+// aspects of the RGB to XYZ process, and assumming that the gamma correction
+// has transitive property in the transformation chain.
+//
+// the alghoritm:
+//
+//            - First I build the absolute conversion matrix using
+//              primaries in XYZ. This matrix is next inverted
+//            - Then I eval the source white point across this matrix
+//              obtaining the coeficients of the transformation
+//            - Then, I apply these coeficients to the original matrix
+//
+cmsBool _cmsBuildRGB2XYZtransferMatrix(cmsMAT3* r, const cmsCIExyY* WhitePt, const cmsCIExyYTRIPLE* Primrs)
+{
+    cmsVEC3 WhitePoint, Coef;
+    cmsMAT3 Result, Primaries;
+    cmsFloat64Number xn, yn;
+    cmsFloat64Number xr, yr;
+    cmsFloat64Number xg, yg;
+    cmsFloat64Number xb, yb;
+
+    xn = WhitePt -> x;
+    yn = WhitePt -> y;
+    xr = Primrs -> Red.x;
+    yr = Primrs -> Red.y;
+    xg = Primrs -> Green.x;
+    yg = Primrs -> Green.y;
+    xb = Primrs -> Blue.x;
+    yb = Primrs -> Blue.y;
+
+    // Build Primaries matrix
+    _cmsVEC3init(&Primaries.v[0], xr,        xg,         xb);
+    _cmsVEC3init(&Primaries.v[1], yr,        yg,         yb);
+    _cmsVEC3init(&Primaries.v[2], (1-xr-yr), (1-xg-yg),  (1-xb-yb));
+
+
+    // Result = Primaries ^ (-1) inverse matrix
+    if (!_cmsMAT3inverse(&Primaries, &Result))
+        return FALSE;
+
+
+    _cmsVEC3init(&WhitePoint, xn/yn, 1.0, (1.0-xn-yn)/yn);
+
+    // Across inverse primaries ...
+    _cmsMAT3eval(&Coef, &Result, &WhitePoint);
+
+    // Give us the Coefs, then I build transformation matrix
+    _cmsVEC3init(&r -> v[0], Coef.n[VX]*xr,          Coef.n[VY]*xg,          Coef.n[VZ]*xb);
+    _cmsVEC3init(&r -> v[1], Coef.n[VX]*yr,          Coef.n[VY]*yg,          Coef.n[VZ]*yb);
+    _cmsVEC3init(&r -> v[2], Coef.n[VX]*(1.0-xr-yr), Coef.n[VY]*(1.0-xg-yg), Coef.n[VZ]*(1.0-xb-yb));
+
+
+    return _cmsAdaptMatrixToD50(r, WhitePt);
+
+}
+
+
+// Adapts a color to a given illuminant. Original color is expected to have
+// a SourceWhitePt white point.
+cmsBool CMSEXPORT cmsAdaptToIlluminant(cmsCIEXYZ* Result,
+                                       const cmsCIEXYZ* SourceWhitePt,
+                                       const cmsCIEXYZ* Illuminant,
+                                       const cmsCIEXYZ* Value)
+{
+    cmsMAT3 Bradford;
+    cmsVEC3 In, Out;
+
+    _cmsAssert(Result != NULL);
+    _cmsAssert(SourceWhitePt != NULL);
+    _cmsAssert(Illuminant != NULL);
+    _cmsAssert(Value != NULL);
+
+    if (!_cmsAdaptationMatrix(&Bradford, NULL, SourceWhitePt, Illuminant)) return FALSE;
+
+    _cmsVEC3init(&In, Value -> X, Value -> Y, Value -> Z);
+    _cmsMAT3eval(&Out, &Bradford, &In);
+
+    Result -> X = Out.n[0];
+    Result -> Y = Out.n[1];
+    Result -> Z = Out.n[2];
+
+    return TRUE;
+}
+
+
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp
new file mode 100644
index 0000000000..d8c69648f8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/cmsxform.cpp
@@ -0,0 +1,1339 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Transformations stuff
+// -----------------------------------------------------------------------
+
+#define DEFAULT_OBSERVER_ADAPTATION_STATE 1.0
+
+// The Context0 observer adaptation state.
+_cmsAdaptationStateChunkType _cmsAdaptationStateChunk = { DEFAULT_OBSERVER_ADAPTATION_STATE };
+
+// Init and duplicate observer adaptation state
+void _cmsAllocAdaptationStateChunk(struct _cmsContext_struct* ctx, 
+                                   const struct _cmsContext_struct* src)
+{
+    static _cmsAdaptationStateChunkType AdaptationStateChunk = { DEFAULT_OBSERVER_ADAPTATION_STATE };
+    void* from;
+     
+    if (src != NULL) {
+        from = src ->chunks[AdaptationStateContext];       
+    }
+    else {
+       from = &AdaptationStateChunk;
+    }
+    
+    ctx ->chunks[AdaptationStateContext] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsAdaptationStateChunkType));     
+}
+
+
+// Sets adaptation state for absolute colorimetric intent in the given context.  Adaptation state applies on all 
+// but cmsCreateExtendedTransformTHR().  Little CMS can handle incomplete adaptation states.
+cmsFloat64Number CMSEXPORT cmsSetAdaptationStateTHR(cmsContext ContextID, cmsFloat64Number d)
+{
+    cmsFloat64Number prev;
+    _cmsAdaptationStateChunkType* ptr = (_cmsAdaptationStateChunkType*) _cmsContextGetClientChunk(ContextID, AdaptationStateContext);
+
+    // Get previous value for return
+    prev = ptr ->AdaptationState;
+
+    // Set the value if d is positive or zero
+    if (d >= 0.0) {
+
+        ptr ->AdaptationState = d;
+    }
+
+    // Always return previous value
+    return prev;
+}
+
+
+// The adaptation state may be defaulted by this function. If you don't like it, use the extended transform routine
+cmsFloat64Number CMSEXPORT cmsSetAdaptationState(cmsFloat64Number d)
+{    
+    return cmsSetAdaptationStateTHR(NULL, d);
+}
+
+// -----------------------------------------------------------------------
+
+// Alarm codes for 16-bit transformations, because the fixed range of containers there are
+// no values left to mark out of gamut. 
+
+#define DEFAULT_ALARM_CODES_VALUE {0x7F00, 0x7F00, 0x7F00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+_cmsAlarmCodesChunkType _cmsAlarmCodesChunk = { DEFAULT_ALARM_CODES_VALUE };
+
+// Sets the codes used to mark out-out-gamut on Proofing transforms for a given context. Values are meant to be 
+// encoded in 16 bits.
+void CMSEXPORT cmsSetAlarmCodesTHR(cmsContext ContextID, const cmsUInt16Number AlarmCodesP[cmsMAXCHANNELS])
+{
+    _cmsAlarmCodesChunkType* ContextAlarmCodes = (_cmsAlarmCodesChunkType*) _cmsContextGetClientChunk(ContextID, AlarmCodesContext);
+       
+    _cmsAssert(ContextAlarmCodes != NULL); // Can't happen
+    
+    memcpy(ContextAlarmCodes->AlarmCodes, AlarmCodesP, sizeof(ContextAlarmCodes->AlarmCodes));    
+}
+
+// Gets the current codes used to mark out-out-gamut on Proofing transforms for the given context.
+// Values are meant to be encoded in 16 bits.
+void CMSEXPORT cmsGetAlarmCodesTHR(cmsContext ContextID, cmsUInt16Number AlarmCodesP[cmsMAXCHANNELS])
+{
+    _cmsAlarmCodesChunkType* ContextAlarmCodes = (_cmsAlarmCodesChunkType*) _cmsContextGetClientChunk(ContextID, AlarmCodesContext);
+
+    _cmsAssert(ContextAlarmCodes != NULL); // Can't happen
+
+    memcpy(AlarmCodesP, ContextAlarmCodes->AlarmCodes, sizeof(ContextAlarmCodes->AlarmCodes));
+}
+
+void CMSEXPORT cmsSetAlarmCodes(const cmsUInt16Number NewAlarm[cmsMAXCHANNELS])
+{
+    _cmsAssert(NewAlarm != NULL);
+
+    cmsSetAlarmCodesTHR(NULL, NewAlarm);
+}
+
+void CMSEXPORT cmsGetAlarmCodes(cmsUInt16Number OldAlarm[cmsMAXCHANNELS])
+{ 
+    _cmsAssert(OldAlarm != NULL);
+    cmsGetAlarmCodesTHR(NULL, OldAlarm);
+}
+
+
+// Init and duplicate alarm codes
+void _cmsAllocAlarmCodesChunk(struct _cmsContext_struct* ctx, 
+                              const struct _cmsContext_struct* src)
+{
+    static _cmsAlarmCodesChunkType AlarmCodesChunk = { DEFAULT_ALARM_CODES_VALUE };
+    void* from;
+     
+    if (src != NULL) {
+        from = src ->chunks[AlarmCodesContext];       
+    }
+    else {
+       from = &AlarmCodesChunk;
+    }
+    
+    ctx ->chunks[AlarmCodesContext] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsAlarmCodesChunkType));     
+}
+
+// -----------------------------------------------------------------------
+
+// Get rid of transform resources
+void CMSEXPORT cmsDeleteTransform(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) hTransform;
+
+    _cmsAssert(p != NULL);
+
+    if (p -> GamutCheck)
+        cmsPipelineFree(p -> GamutCheck);
+
+    if (p -> Lut)
+        cmsPipelineFree(p -> Lut);
+
+    if (p ->InputColorant)
+        cmsFreeNamedColorList(p ->InputColorant);
+
+    if (p -> OutputColorant)
+        cmsFreeNamedColorList(p ->OutputColorant);
+
+    if (p ->Sequence)
+        cmsFreeProfileSequenceDescription(p ->Sequence);
+
+    if (p ->UserData)
+        p ->FreeUserData(p ->ContextID, p ->UserData);
+
+    _cmsFree(p ->ContextID, (void *) p);
+}
+
+// Apply transform.
+void CMSEXPORT cmsDoTransform(cmsHTRANSFORM  Transform,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number Size)
+
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) Transform;
+    cmsStride stride;
+
+    stride.BytesPerLineIn = 0;  // Not used
+    stride.BytesPerLineOut = 0;
+    stride.BytesPerPlaneIn = Size;
+    stride.BytesPerPlaneOut = Size;
+           
+    p -> xform(p, InputBuffer, OutputBuffer, Size, 1, &stride);
+}
+
+
+// This is a legacy stride for planar
+void CMSEXPORT cmsDoTransformStride(cmsHTRANSFORM  Transform,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number Size, cmsUInt32Number Stride)
+
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) Transform;
+    cmsStride stride;
+
+    stride.BytesPerLineIn = 0;  
+    stride.BytesPerLineOut = 0;
+    stride.BytesPerPlaneIn = Stride;
+    stride.BytesPerPlaneOut = Stride;
+
+    p -> xform(p, InputBuffer, OutputBuffer, Size, 1, &stride);
+}
+
+// This is the "fast" function for plugins
+void CMSEXPORT cmsDoTransformLineStride(cmsHTRANSFORM  Transform,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number PixelsPerLine,
+                              cmsUInt32Number LineCount,
+                              cmsUInt32Number BytesPerLineIn,
+                              cmsUInt32Number BytesPerLineOut,
+                              cmsUInt32Number BytesPerPlaneIn,
+                              cmsUInt32Number BytesPerPlaneOut)
+
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) Transform;
+    cmsStride stride;
+
+    stride.BytesPerLineIn = BytesPerLineIn;
+    stride.BytesPerLineOut = BytesPerLineOut;
+    stride.BytesPerPlaneIn = BytesPerPlaneIn;
+    stride.BytesPerPlaneOut = BytesPerPlaneOut;
+
+    p->xform(p, InputBuffer, OutputBuffer, PixelsPerLine, LineCount, &stride);
+}
+
+
+
+// Transform routines ----------------------------------------------------------------------------------------------------------
+
+// Float xform converts floats. Since there are no performance issues, one routine does all job, including gamut check.
+// Note that because extended range, we can use a -1.0 value for out of gamut in this case.
+static
+void FloatXFORM(_cmsTRANSFORM* p,
+                const void* in,
+                void* out, 
+                cmsUInt32Number PixelsPerLine,
+                cmsUInt32Number LineCount,
+                const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsFloat32Number fIn[cmsMAXCHANNELS], fOut[cmsMAXCHANNELS];
+    cmsFloat32Number OutOfGamut;
+    cmsUInt32Number i, j, c, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(fIn, 0, sizeof(fIn));
+    memset(fOut, 0, sizeof(fIn));
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInputFloat(p, fIn, accum, Stride->BytesPerPlaneIn);
+
+            // Any gamut chack to do?
+            if (p->GamutCheck != NULL) {
+
+                // Evaluate gamut marker.
+                cmsPipelineEvalFloat(fIn, &OutOfGamut, p->GamutCheck);
+
+                // Is current color out of gamut?
+                if (OutOfGamut > 0.0) {
+
+                    // Certainly, out of gamut
+                    for (c = 0; c < cmsMAXCHANNELS; c++)
+                        fOut[c] = -1.0;
+
+                }
+                else {
+                    // No, proceed normally
+                    cmsPipelineEvalFloat(fIn, fOut, p->Lut);
+                }
+            }
+            else {
+
+                // No gamut check at all
+                cmsPipelineEvalFloat(fIn, fOut, p->Lut);
+            }
+
+
+            output = p->ToOutputFloat(p, fOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+
+}
+
+
+static
+void NullFloatXFORM(_cmsTRANSFORM* p,
+                    const void* in,
+                    void* out, 
+                    cmsUInt32Number PixelsPerLine,
+                    cmsUInt32Number LineCount,
+                    const cmsStride* Stride)
+
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsFloat32Number fIn[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(fIn, 0, sizeof(fIn));
+
+    for (i = 0; i < LineCount; i++) {
+
+           accum = (cmsUInt8Number*) in + strideIn;
+           output = (cmsUInt8Number*) out + strideOut;
+
+           for (j = 0; j < PixelsPerLine; j++) {
+
+                  accum = p->FromInputFloat(p, fIn, accum, Stride ->BytesPerPlaneIn);
+                  output = p->ToOutputFloat(p, fIn, output, Stride->BytesPerPlaneOut);
+           }
+
+           strideIn += Stride->BytesPerLineIn;
+           strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+// 16 bit precision -----------------------------------------------------------------------------------------------------------
+
+// Null transformation, only applies formatters. No cache
+static
+void NullXFORM(_cmsTRANSFORM* p,
+               const void* in,
+               void* out,
+               cmsUInt32Number PixelsPerLine,
+               cmsUInt32Number LineCount,
+               const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(wIn, 0, sizeof(wIn));
+
+    for (i = 0; i < LineCount; i++) {
+
+           accum = (cmsUInt8Number*)in + strideIn;
+           output = (cmsUInt8Number*)out + strideOut;
+
+           for (j = 0; j < PixelsPerLine; j++) {
+
+                  accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+                  output = p->ToOutput(p, wIn, output, Stride->BytesPerPlaneOut);
+    }
+
+           strideIn += Stride->BytesPerLineIn;
+           strideOut += Stride->BytesPerLineOut;
+    }
+
+}
+
+
+// No gamut check, no cache, 16 bits
+static
+void PrecalculatedXFORM(_cmsTRANSFORM* p,
+                        const void* in,
+                        void* out, 
+                        cmsUInt32Number PixelsPerLine,
+                        cmsUInt32Number LineCount,
+                        const cmsStride* Stride)
+{
+    CMSREGISTER cmsUInt8Number* accum;
+    CMSREGISTER cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+            p->Lut->Eval16Fn(wIn, wOut, p->Lut->Data);
+            output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+
+}
+
+
+// Auxiliary: Handle precalculated gamut check. The retrieval of context may be alittle bit slow, but this function is not critical.
+static
+void TransformOnePixelWithGamutCheck(_cmsTRANSFORM* p,
+                                     const cmsUInt16Number wIn[],
+                                     cmsUInt16Number wOut[])
+{
+    cmsUInt16Number wOutOfGamut;
+
+    p ->GamutCheck ->Eval16Fn(wIn, &wOutOfGamut, p ->GamutCheck ->Data);
+    if (wOutOfGamut >= 1) {
+
+        cmsUInt16Number i;
+        _cmsAlarmCodesChunkType* ContextAlarmCodes = (_cmsAlarmCodesChunkType*) _cmsContextGetClientChunk(p->ContextID, AlarmCodesContext);        
+
+        for (i=0; i < p ->Lut->OutputChannels; i++) {
+
+            wOut[i] = ContextAlarmCodes ->AlarmCodes[i];
+        }
+    }
+    else
+        p ->Lut ->Eval16Fn(wIn, wOut, p -> Lut->Data);
+}
+
+// Gamut check, No cache, 16 bits.
+static
+void PrecalculatedXFORMGamutCheck(_cmsTRANSFORM* p,
+                                  const void* in,
+                                  void* out, 
+                                  cmsUInt32Number PixelsPerLine,
+                                  cmsUInt32Number LineCount,
+                                  const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    for (i = 0; i < LineCount; i++) {
+
+           accum = (cmsUInt8Number*)in + strideIn;
+           output = (cmsUInt8Number*)out + strideOut;
+
+           for (j = 0; j < PixelsPerLine; j++) {
+
+                  accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+                  TransformOnePixelWithGamutCheck(p, wIn, wOut);
+                  output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+           }
+
+           strideIn += Stride->BytesPerLineIn;
+           strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+
+// No gamut check, Cache, 16 bits,
+static
+void CachedXFORM(_cmsTRANSFORM* p,
+                 const void* in,
+                 void* out,
+                 cmsUInt32Number PixelsPerLine,
+                 cmsUInt32Number LineCount,
+                 const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    _cmsCACHE Cache;
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    // Empty buffers for quick memcmp
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    // Get copy of zero cache
+    memcpy(&Cache, &p->Cache, sizeof(Cache));
+
+    strideIn = 0;
+    strideOut = 0;
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+
+            if (memcmp(wIn, Cache.CacheIn, sizeof(Cache.CacheIn)) == 0) {
+
+                memcpy(wOut, Cache.CacheOut, sizeof(Cache.CacheOut));
+            }
+            else {
+                p->Lut->Eval16Fn(wIn, wOut, p->Lut->Data);
+
+                memcpy(Cache.CacheIn, wIn, sizeof(Cache.CacheIn));
+                memcpy(Cache.CacheOut, wOut, sizeof(Cache.CacheOut));
+            }
+
+            output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+// All those nice features together
+static
+void CachedXFORMGamutCheck(_cmsTRANSFORM* p,
+                           const void* in,
+                           void* out, 
+                           cmsUInt32Number PixelsPerLine,
+                           cmsUInt32Number LineCount,
+                           const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    _cmsCACHE Cache;
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    // Empty buffers for quick memcmp
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    // Get copy of zero cache
+    memcpy(&Cache, &p->Cache, sizeof(Cache));
+
+    strideIn = 0;
+    strideOut = 0;
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+
+            if (memcmp(wIn, Cache.CacheIn, sizeof(Cache.CacheIn)) == 0) {
+
+                memcpy(wOut, Cache.CacheOut, sizeof(Cache.CacheOut));
+            }
+            else {
+                TransformOnePixelWithGamutCheck(p, wIn, wOut);
+
+                memcpy(Cache.CacheIn, wIn, sizeof(Cache.CacheIn));
+                memcpy(Cache.CacheOut, wOut, sizeof(Cache.CacheOut));
+            }
+
+            output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+// Transform plug-ins ----------------------------------------------------------------------------------------------------
+
+// List of used-defined transform factories
+typedef struct _cmsTransformCollection_st {
+
+    _cmsTransform2Factory  Factory;
+    cmsBool                OldXform;   // Factory returns xform function in the old style
+
+    struct _cmsTransformCollection_st *Next;
+
+} _cmsTransformCollection;
+
+// The linked list head
+_cmsTransformPluginChunkType _cmsTransformPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginTransformList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsTransformPluginChunkType newHead = { NULL };
+   _cmsTransformCollection*  entry;
+   _cmsTransformCollection*  Anterior = NULL;
+   _cmsTransformPluginChunkType* head = (_cmsTransformPluginChunkType*) src->chunks[TransformPlugin];
+
+    // Walk the list copying all nodes
+   for (entry = head->TransformCollection;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            _cmsTransformCollection *newEntry = ( _cmsTransformCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsTransformCollection));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.TransformCollection == NULL)
+                newHead.TransformCollection = newEntry;
+    }
+
+  ctx ->chunks[TransformPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsTransformPluginChunkType));
+}
+
+// Allocates memory for transform plugin factory
+void _cmsAllocTransformPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+
+        // Copy all linked list
+        DupPluginTransformList(ctx, src);
+    }
+    else {
+        static _cmsTransformPluginChunkType TransformPluginChunkType = { NULL };
+        ctx ->chunks[TransformPlugin] = _cmsSubAllocDup(ctx ->MemPool, &TransformPluginChunkType, sizeof(_cmsTransformPluginChunkType));
+    }
+}
+
+// Adaptor for old versions of plug-in
+static
+void _cmsTransform2toTransformAdaptor(struct _cmstransform_struct *CMMcargo,
+                                      const void* InputBuffer,
+                                      void* OutputBuffer,
+                                      cmsUInt32Number PixelsPerLine,
+                                      cmsUInt32Number LineCount,
+                                      const cmsStride* Stride)
+{
+     
+       cmsUInt32Number i, strideIn, strideOut;
+
+       _cmsHandleExtraChannels(CMMcargo, InputBuffer, OutputBuffer, PixelsPerLine, LineCount, Stride);
+
+       strideIn = 0;
+       strideOut = 0;
+
+       for (i = 0; i < LineCount; i++) {
+
+              void *accum = (cmsUInt8Number*)InputBuffer + strideIn;
+              void *output = (cmsUInt8Number*)OutputBuffer + strideOut;
+
+              CMMcargo->OldXform(CMMcargo, accum, output, PixelsPerLine, Stride->BytesPerPlaneIn);
+
+              strideIn += Stride->BytesPerLineIn;
+              strideOut += Stride->BytesPerLineOut;
+       }
+}
+
+
+
+// Register new ways to transform
+cmsBool  _cmsRegisterTransformPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginTransform* Plugin = (cmsPluginTransform*) Data;
+    _cmsTransformCollection* fl;
+    _cmsTransformPluginChunkType* ctx = ( _cmsTransformPluginChunkType*) _cmsContextGetClientChunk(ContextID,TransformPlugin);
+
+    if (Data == NULL) {
+
+        // Free the chain. Memory is safely freed at exit
+        ctx->TransformCollection = NULL;
+        return TRUE;
+    }
+
+    // Factory callback is required
+    if (Plugin->factories.xform == NULL) return FALSE;
+
+
+    fl = (_cmsTransformCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsTransformCollection));
+    if (fl == NULL) return FALSE;
+
+    // Check for full xform plug-ins previous to 2.8, we would need an adapter in that case
+    if (Plugin->base.ExpectedVersion < 2080) {
+
+           fl->OldXform = TRUE;
+    }
+    else
+           fl->OldXform = FALSE;
+
+    // Copy the parameters
+    fl->Factory = Plugin->factories.xform;
+
+    // Keep linked list
+    fl ->Next = ctx->TransformCollection;
+    ctx->TransformCollection = fl;
+
+    // All is ok
+    return TRUE;
+}
+
+
+void CMSEXPORT _cmsSetTransformUserData(struct _cmstransform_struct *CMMcargo, void* ptr, _cmsFreeUserDataFn FreePrivateDataFn)
+{
+    _cmsAssert(CMMcargo != NULL);
+    CMMcargo ->UserData = ptr;
+    CMMcargo ->FreeUserData = FreePrivateDataFn;
+}
+
+// returns the pointer defined by the plug-in to store private data
+void * CMSEXPORT _cmsGetTransformUserData(struct _cmstransform_struct *CMMcargo)
+{
+    _cmsAssert(CMMcargo != NULL);
+    return CMMcargo ->UserData;
+}
+
+// returns the current formatters
+void CMSEXPORT _cmsGetTransformFormatters16(struct _cmstransform_struct *CMMcargo, cmsFormatter16* FromInput, cmsFormatter16* ToOutput)
+{
+     _cmsAssert(CMMcargo != NULL);
+     if (FromInput) *FromInput = CMMcargo ->FromInput;
+     if (ToOutput)  *ToOutput  = CMMcargo ->ToOutput;
+}
+
+void CMSEXPORT _cmsGetTransformFormattersFloat(struct _cmstransform_struct *CMMcargo, cmsFormatterFloat* FromInput, cmsFormatterFloat* ToOutput)
+{
+     _cmsAssert(CMMcargo != NULL);
+     if (FromInput) *FromInput = CMMcargo ->FromInputFloat;
+     if (ToOutput)  *ToOutput  = CMMcargo ->ToOutputFloat;
+}
+
+
+// Allocate transform struct and set it to defaults. Ask the optimization plug-in about if those formats are proper
+// for separated transforms. If this is the case,
+static
+_cmsTRANSFORM* AllocEmptyTransform(cmsContext ContextID, cmsPipeline* lut,
+                                               cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+     _cmsTransformPluginChunkType* ctx = ( _cmsTransformPluginChunkType*) _cmsContextGetClientChunk(ContextID, TransformPlugin);
+     _cmsTransformCollection* Plugin;
+
+       // Allocate needed memory
+       _cmsTRANSFORM* p = (_cmsTRANSFORM*)_cmsMallocZero(ContextID, sizeof(_cmsTRANSFORM));
+       if (!p) {
+              cmsPipelineFree(lut);
+              return NULL;
+       }
+
+       // Store the proposed pipeline
+       p->Lut = lut;
+
+       // Let's see if any plug-in want to do the transform by itself
+       if (p->Lut != NULL) {
+
+              for (Plugin = ctx->TransformCollection;
+                     Plugin != NULL;
+                     Plugin = Plugin->Next) {
+
+                     if (Plugin->Factory(&p->xform, &p->UserData, &p->FreeUserData, &p->Lut, InputFormat, OutputFormat, dwFlags)) {
+
+                            // Last plugin in the declaration order takes control. We just keep
+                            // the original parameters as a logging. 
+                            // Note that cmsFLAGS_CAN_CHANGE_FORMATTER is not set, so by default 
+                            // an optimized transform is not reusable. The plug-in can, however, change
+                            // the flags and make it suitable.
+
+                            p->ContextID = ContextID;
+                            p->InputFormat = *InputFormat;
+                            p->OutputFormat = *OutputFormat;
+                            p->dwOriginalFlags = *dwFlags;
+
+                            // Fill the formatters just in case the optimized routine is interested.
+                            // No error is thrown if the formatter doesn't exist. It is up to the optimization 
+                            // factory to decide what to do in those cases.
+                            p->FromInput = _cmsGetFormatter(ContextID, *InputFormat, cmsFormatterInput, CMS_PACK_FLAGS_16BITS).Fmt16;
+                            p->ToOutput = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_16BITS).Fmt16;
+                            p->FromInputFloat = _cmsGetFormatter(ContextID, *InputFormat, cmsFormatterInput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+                            p->ToOutputFloat = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+
+                            // Save the day? (Ignore the warning)
+                            if (Plugin->OldXform) {
+                                   p->OldXform = (_cmsTransformFn)(void*) p->xform;
+                                   p->xform = _cmsTransform2toTransformAdaptor;
+                            }
+                             
+                            return p;
+                     }
+              }
+
+              // Not suitable for the transform plug-in, let's check  the pipeline plug-in
+              _cmsOptimizePipeline(ContextID, &p->Lut, Intent, InputFormat, OutputFormat, dwFlags);
+       }
+
+    // Check whatever this is a true floating point transform
+    if (_cmsFormatterIsFloat(*InputFormat) && _cmsFormatterIsFloat(*OutputFormat)) {
+
+        // Get formatter function always return a valid union, but the contents of this union may be NULL.
+        p ->FromInputFloat = _cmsGetFormatter(ContextID, *InputFormat,  cmsFormatterInput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+        p ->ToOutputFloat  = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+        *dwFlags |= cmsFLAGS_CAN_CHANGE_FORMATTER;
+
+        if (p ->FromInputFloat == NULL || p ->ToOutputFloat == NULL) {
+
+            cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported raster format");
+            cmsDeleteTransform(p);
+            return NULL;
+        }
+
+        if (*dwFlags & cmsFLAGS_NULLTRANSFORM) {
+
+            p ->xform = NullFloatXFORM;
+        }
+        else {
+            // Float transforms don't use cache, always are non-NULL
+            p ->xform = FloatXFORM;
+        }
+
+    }
+    else {
+
+        if (*InputFormat == 0 && *OutputFormat == 0) {
+            p ->FromInput = p ->ToOutput = NULL;
+            *dwFlags |= cmsFLAGS_CAN_CHANGE_FORMATTER;
+        }
+        else {
+
+            cmsUInt32Number BytesPerPixelInput;
+
+            p ->FromInput = _cmsGetFormatter(ContextID, *InputFormat,  cmsFormatterInput, CMS_PACK_FLAGS_16BITS).Fmt16;
+            p ->ToOutput  = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_16BITS).Fmt16;
+
+            if (p ->FromInput == NULL || p ->ToOutput == NULL) {
+
+                cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported raster format");
+                cmsDeleteTransform(p);
+                return NULL;
+            }
+
+            BytesPerPixelInput = T_BYTES(p ->InputFormat);
+            if (BytesPerPixelInput == 0 || BytesPerPixelInput >= 2)
+                   *dwFlags |= cmsFLAGS_CAN_CHANGE_FORMATTER;
+
+        }
+
+        if (*dwFlags & cmsFLAGS_NULLTRANSFORM) {
+
+            p ->xform = NullXFORM;
+        }
+        else {
+            if (*dwFlags & cmsFLAGS_NOCACHE) {
+
+                if (*dwFlags & cmsFLAGS_GAMUTCHECK)
+                    p ->xform = PrecalculatedXFORMGamutCheck;  // Gamut check, no cache
+                else
+                    p ->xform = PrecalculatedXFORM;  // No cache, no gamut check
+            }
+            else {
+
+                if (*dwFlags & cmsFLAGS_GAMUTCHECK)
+                    p ->xform = CachedXFORMGamutCheck;    // Gamut check, cache
+                else
+                    p ->xform = CachedXFORM;  // No gamut check, cache
+
+            }
+        }
+    }
+
+    p ->InputFormat     = *InputFormat;
+    p ->OutputFormat    = *OutputFormat;
+    p ->dwOriginalFlags = *dwFlags;
+    p ->ContextID       = ContextID;
+    p ->UserData        = NULL;
+    return p;
+}
+
+static
+cmsBool GetXFormColorSpaces(cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[], cmsColorSpaceSignature* Input, cmsColorSpaceSignature* Output)
+{
+    cmsColorSpaceSignature ColorSpaceIn, ColorSpaceOut;
+    cmsColorSpaceSignature PostColorSpace;
+    cmsUInt32Number i;
+
+    if (nProfiles == 0) return FALSE;
+    if (hProfiles[0] == NULL) return FALSE;
+
+    *Input = PostColorSpace = cmsGetColorSpace(hProfiles[0]);
+
+    for (i=0; i < nProfiles; i++) {
+
+        cmsProfileClassSignature cls;
+        cmsHPROFILE hProfile = hProfiles[i];
+
+        int lIsInput = (PostColorSpace != cmsSigXYZData) &&
+                       (PostColorSpace != cmsSigLabData);
+
+        if (hProfile == NULL) return FALSE;
+
+        cls = cmsGetDeviceClass(hProfile);
+
+        if (cls == cmsSigNamedColorClass) {
+
+            ColorSpaceIn    = cmsSig1colorData;
+            ColorSpaceOut   = (nProfiles > 1) ? cmsGetPCS(hProfile) : cmsGetColorSpace(hProfile);
+        }
+        else
+        if (lIsInput || (cls == cmsSigLinkClass)) {
+
+            ColorSpaceIn    = cmsGetColorSpace(hProfile);
+            ColorSpaceOut   = cmsGetPCS(hProfile);
+        }
+        else
+        {
+            ColorSpaceIn    = cmsGetPCS(hProfile);
+            ColorSpaceOut   = cmsGetColorSpace(hProfile);
+        }
+
+        if (i==0)
+            *Input = ColorSpaceIn;
+
+        PostColorSpace = ColorSpaceOut;
+    }
+
+    *Output = PostColorSpace;
+
+    return TRUE;
+}
+
+// Check colorspace
+static
+cmsBool  IsProperColorSpace(cmsColorSpaceSignature Check, cmsUInt32Number dwFormat)
+{
+    int Space1 = (int) T_COLORSPACE(dwFormat);
+    int Space2 = _cmsLCMScolorSpace(Check);
+
+    if (Space1 == PT_ANY) return TRUE;
+    if (Space1 == Space2) return TRUE;
+
+    if (Space1 == PT_LabV2 && Space2 == PT_Lab) return TRUE;
+    if (Space1 == PT_Lab   && Space2 == PT_LabV2) return TRUE;
+
+    return FALSE;
+}
+
+// ----------------------------------------------------------------------------------------------------------------
+
+// Jun-21-2000: Some profiles (those that comes with W2K) comes
+// with the media white (media black?) x 100. Add a sanity check
+
+static
+void NormalizeXYZ(cmsCIEXYZ* Dest)
+{
+    while (Dest -> X > 2. &&
+           Dest -> Y > 2. &&
+           Dest -> Z > 2.) {
+
+               Dest -> X /= 10.;
+               Dest -> Y /= 10.;
+               Dest -> Z /= 10.;
+       }
+}
+
+static
+void SetWhitePoint(cmsCIEXYZ* wtPt, const cmsCIEXYZ* src)
+{
+    if (src == NULL) {
+        wtPt ->X = cmsD50X;
+        wtPt ->Y = cmsD50Y;
+        wtPt ->Z = cmsD50Z;
+    }
+    else {
+        wtPt ->X = src->X;
+        wtPt ->Y = src->Y;
+        wtPt ->Z = src->Z;
+
+        NormalizeXYZ(wtPt);
+    }
+
+}
+
+// New to lcms 2.0 -- have all parameters available.
+cmsHTRANSFORM CMSEXPORT cmsCreateExtendedTransform(cmsContext ContextID,
+                                                   cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[],
+                                                   cmsBool  BPC[],
+                                                   cmsUInt32Number Intents[],
+                                                   cmsFloat64Number AdaptationStates[],
+                                                   cmsHPROFILE hGamutProfile,
+                                                   cmsUInt32Number nGamutPCSposition,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsUInt32Number dwFlags)
+{
+    _cmsTRANSFORM* xform;    
+    cmsColorSpaceSignature EntryColorSpace;
+    cmsColorSpaceSignature ExitColorSpace;
+    cmsPipeline* Lut;
+    cmsUInt32Number LastIntent = Intents[nProfiles-1];
+
+    // If it is a fake transform
+    if (dwFlags & cmsFLAGS_NULLTRANSFORM)
+    {
+        return AllocEmptyTransform(ContextID, NULL, INTENT_PERCEPTUAL, &InputFormat, &OutputFormat, &dwFlags);
+    }
+
+    // If gamut check is requested, make sure we have a gamut profile
+    if (dwFlags & cmsFLAGS_GAMUTCHECK) {
+        if (hGamutProfile == NULL) dwFlags &= ~cmsFLAGS_GAMUTCHECK;
+    }
+
+    // On floating point transforms, inhibit cache
+    if (_cmsFormatterIsFloat(InputFormat) || _cmsFormatterIsFloat(OutputFormat))
+        dwFlags |= cmsFLAGS_NOCACHE;
+
+    // Mark entry/exit spaces
+    if (!GetXFormColorSpaces(nProfiles, hProfiles, &EntryColorSpace, &ExitColorSpace)) {
+        cmsSignalError(ContextID, cmsERROR_NULL, "NULL input profiles on transform");
+        return NULL;
+    }
+
+    // Check if proper colorspaces
+    if (!IsProperColorSpace(EntryColorSpace, InputFormat)) {
+        cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "Wrong input color space on transform");
+        return NULL;
+    }
+
+    if (!IsProperColorSpace(ExitColorSpace, OutputFormat)) {
+        cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "Wrong output color space on transform");
+        return NULL;
+    }
+
+    // Create a pipeline with all transformations
+    Lut = _cmsLinkProfiles(ContextID, nProfiles, Intents, hProfiles, BPC, AdaptationStates, dwFlags);
+    if (Lut == NULL) {
+        cmsSignalError(ContextID, cmsERROR_NOT_SUITABLE, "Couldn't link the profiles");
+        return NULL;
+    }
+
+    // Check channel count
+    if ((cmsChannelsOf(EntryColorSpace) != cmsPipelineInputChannels(Lut)) ||
+        (cmsChannelsOf(ExitColorSpace)  != cmsPipelineOutputChannels(Lut))) {
+        cmsPipelineFree(Lut);
+        cmsSignalError(ContextID, cmsERROR_NOT_SUITABLE, "Channel count doesn't match. Profile is corrupted");
+        return NULL;
+    }
+
+
+    // All seems ok
+    xform = AllocEmptyTransform(ContextID, Lut, LastIntent, &InputFormat, &OutputFormat, &dwFlags);
+    if (xform == NULL) {
+        return NULL;
+    }
+
+    // Keep values
+    xform ->EntryColorSpace = EntryColorSpace;
+    xform ->ExitColorSpace  = ExitColorSpace;
+    xform ->RenderingIntent = Intents[nProfiles-1];
+
+    // Take white points
+    SetWhitePoint(&xform->EntryWhitePoint, (cmsCIEXYZ*) cmsReadTag(hProfiles[0], cmsSigMediaWhitePointTag));
+    SetWhitePoint(&xform->ExitWhitePoint,  (cmsCIEXYZ*) cmsReadTag(hProfiles[nProfiles-1], cmsSigMediaWhitePointTag));
+   
+
+    // Create a gamut check LUT if requested
+    if (hGamutProfile != NULL && (dwFlags & cmsFLAGS_GAMUTCHECK))
+        xform ->GamutCheck  = _cmsCreateGamutCheckPipeline(ContextID, hProfiles,
+                                                        BPC, Intents,
+                                                        AdaptationStates,
+                                                        nGamutPCSposition,
+                                                        hGamutProfile);
+
+
+    // Try to read input and output colorant table
+    if (cmsIsTag(hProfiles[0], cmsSigColorantTableTag)) {
+
+        // Input table can only come in this way.
+        xform ->InputColorant = cmsDupNamedColorList((cmsNAMEDCOLORLIST*) cmsReadTag(hProfiles[0], cmsSigColorantTableTag));
+    }
+
+    // Output is a little bit more complex.
+    if (cmsGetDeviceClass(hProfiles[nProfiles-1]) == cmsSigLinkClass) {
+
+        // This tag may exist only on devicelink profiles.
+        if (cmsIsTag(hProfiles[nProfiles-1], cmsSigColorantTableOutTag)) {
+
+            // It may be NULL if error
+            xform ->OutputColorant = cmsDupNamedColorList((cmsNAMEDCOLORLIST*) cmsReadTag(hProfiles[nProfiles-1], cmsSigColorantTableOutTag));
+        }
+
+    } else {
+
+        if (cmsIsTag(hProfiles[nProfiles-1], cmsSigColorantTableTag)) {
+
+            xform -> OutputColorant = cmsDupNamedColorList((cmsNAMEDCOLORLIST*) cmsReadTag(hProfiles[nProfiles-1], cmsSigColorantTableTag));
+        }
+    }
+
+    // Store the sequence of profiles
+    if (dwFlags & cmsFLAGS_KEEP_SEQUENCE) {
+        xform ->Sequence = _cmsCompileProfileSequence(ContextID, nProfiles, hProfiles);
+    }
+    else
+        xform ->Sequence = NULL;
+
+    // If this is a cached transform, init first value, which is zero (16 bits only)
+    if (!(dwFlags & cmsFLAGS_NOCACHE)) {
+
+        memset(&xform ->Cache.CacheIn, 0, sizeof(xform ->Cache.CacheIn));
+
+        if (xform ->GamutCheck != NULL) {
+            TransformOnePixelWithGamutCheck(xform, xform ->Cache.CacheIn, xform->Cache.CacheOut);
+        }
+        else {
+
+            xform ->Lut ->Eval16Fn(xform ->Cache.CacheIn, xform->Cache.CacheOut, xform -> Lut->Data);
+        }
+
+    }
+
+    return (cmsHTRANSFORM) xform;
+}
+
+// Multiprofile transforms: Gamut check is not available here, as it is unclear from which profile the gamut comes.
+cmsHTRANSFORM CMSEXPORT cmsCreateMultiprofileTransformTHR(cmsContext ContextID,
+                                                       cmsHPROFILE hProfiles[],
+                                                       cmsUInt32Number nProfiles,
+                                                       cmsUInt32Number InputFormat,
+                                                       cmsUInt32Number OutputFormat,
+                                                       cmsUInt32Number Intent,
+                                                       cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsBool BPC[256];
+    cmsUInt32Number Intents[256];
+    cmsFloat64Number AdaptationStates[256];
+
+    if (nProfiles <= 0 || nProfiles > 255) {
+         cmsSignalError(ContextID, cmsERROR_RANGE, "Wrong number of profiles. 1..255 expected, %d found.", nProfiles);
+        return NULL;
+    }
+
+    for (i=0; i < nProfiles; i++) {
+        BPC[i] = dwFlags & cmsFLAGS_BLACKPOINTCOMPENSATION ? TRUE : FALSE;
+        Intents[i] = Intent;
+        AdaptationStates[i] = cmsSetAdaptationStateTHR(ContextID, -1);
+    }
+
+
+    return cmsCreateExtendedTransform(ContextID, nProfiles, hProfiles, BPC, Intents, AdaptationStates, NULL, 0, InputFormat, OutputFormat, dwFlags);
+}
+
+
+
+cmsHTRANSFORM CMSEXPORT cmsCreateMultiprofileTransform(cmsHPROFILE hProfiles[],
+                                                  cmsUInt32Number nProfiles,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags)
+{
+
+    if (nProfiles <= 0 || nProfiles > 255) {
+         cmsSignalError(NULL, cmsERROR_RANGE, "Wrong number of profiles. 1..255 expected, %d found.", nProfiles);
+         return NULL;
+    }
+
+    return cmsCreateMultiprofileTransformTHR(cmsGetProfileContextID(hProfiles[0]),
+                                                  hProfiles,
+                                                  nProfiles,
+                                                  InputFormat,
+                                                  OutputFormat,
+                                                  Intent,
+                                                  dwFlags);
+}
+
+cmsHTRANSFORM CMSEXPORT cmsCreateTransformTHR(cmsContext ContextID,
+                                              cmsHPROFILE Input,
+                                              cmsUInt32Number InputFormat,
+                                              cmsHPROFILE Output,
+                                              cmsUInt32Number OutputFormat,
+                                              cmsUInt32Number Intent,
+                                              cmsUInt32Number dwFlags)
+{
+
+    cmsHPROFILE hArray[2];
+
+    hArray[0] = Input;
+    hArray[1] = Output;
+
+    return cmsCreateMultiprofileTransformTHR(ContextID, hArray, Output == NULL ? 1U : 2U, InputFormat, OutputFormat, Intent, dwFlags);
+}
+
+CMSAPI cmsHTRANSFORM CMSEXPORT cmsCreateTransform(cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags)
+{
+    return cmsCreateTransformTHR(cmsGetProfileContextID(Input), Input, InputFormat, Output, OutputFormat, Intent, dwFlags);
+}
+
+
+cmsHTRANSFORM CMSEXPORT cmsCreateProofingTransformTHR(cmsContext ContextID,
+                                                   cmsHPROFILE InputProfile,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsHPROFILE OutputProfile,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsHPROFILE ProofingProfile,
+                                                   cmsUInt32Number nIntent,
+                                                   cmsUInt32Number ProofingIntent,
+                                                   cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hArray[4];
+    cmsUInt32Number Intents[4];
+    cmsBool  BPC[4];
+    cmsFloat64Number Adaptation[4];
+    cmsBool  DoBPC = (dwFlags & cmsFLAGS_BLACKPOINTCOMPENSATION) ? TRUE : FALSE;
+
+
+    hArray[0]  = InputProfile; hArray[1] = ProofingProfile; hArray[2]  = ProofingProfile;               hArray[3] = OutputProfile;
+    Intents[0] = nIntent;      Intents[1] = nIntent;        Intents[2] = INTENT_RELATIVE_COLORIMETRIC;  Intents[3] = ProofingIntent;
+    BPC[0]     = DoBPC;        BPC[1] = DoBPC;              BPC[2] = 0;                                 BPC[3] = 0;
+
+    Adaptation[0] = Adaptation[1] = Adaptation[2] = Adaptation[3] = cmsSetAdaptationStateTHR(ContextID, -1);
+
+    if (!(dwFlags & (cmsFLAGS_SOFTPROOFING|cmsFLAGS_GAMUTCHECK)))
+        return cmsCreateTransformTHR(ContextID, InputProfile, InputFormat, OutputProfile, OutputFormat, nIntent, dwFlags);
+
+    return cmsCreateExtendedTransform(ContextID, 4, hArray, BPC, Intents, Adaptation,
+                                        ProofingProfile, 1, InputFormat, OutputFormat, dwFlags);
+
+}
+
+
+cmsHTRANSFORM CMSEXPORT cmsCreateProofingTransform(cmsHPROFILE InputProfile,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsHPROFILE OutputProfile,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsHPROFILE ProofingProfile,
+                                                   cmsUInt32Number nIntent,
+                                                   cmsUInt32Number ProofingIntent,
+                                                   cmsUInt32Number dwFlags)
+{
+    return cmsCreateProofingTransformTHR(cmsGetProfileContextID(InputProfile),
+                                                   InputProfile,
+                                                   InputFormat,
+                                                   OutputProfile,
+                                                   OutputFormat,
+                                                   ProofingProfile,
+                                                   nIntent,
+                                                   ProofingIntent,
+                                                   dwFlags);
+}
+
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+cmsContext CMSEXPORT cmsGetTransformContextID(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+
+    if (xform == NULL) return NULL;
+    return xform -> ContextID;
+}
+
+// Grab the input/output formats
+cmsUInt32Number CMSEXPORT cmsGetTransformInputFormat(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+
+    if (xform == NULL) return 0;
+    return xform->InputFormat;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetTransformOutputFormat(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+
+    if (xform == NULL) return 0;
+    return xform->OutputFormat;
+}
+
+// For backwards compatibility
+cmsBool CMSEXPORT cmsChangeBuffersFormat(cmsHTRANSFORM hTransform,
+                                         cmsUInt32Number InputFormat,
+                                         cmsUInt32Number OutputFormat)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+    cmsFormatter16 FromInput, ToOutput;
+
+
+    // We only can afford to change formatters if previous transform is at least 16 bits
+    if (!(xform ->dwOriginalFlags & cmsFLAGS_CAN_CHANGE_FORMATTER)) {
+
+        cmsSignalError(xform ->ContextID, cmsERROR_NOT_SUITABLE, "cmsChangeBuffersFormat works only on transforms created originally with at least 16 bits of precision");
+        return FALSE;
+    }
+
+    FromInput = _cmsGetFormatter(xform->ContextID, InputFormat,  cmsFormatterInput, CMS_PACK_FLAGS_16BITS).Fmt16;
+    ToOutput  = _cmsGetFormatter(xform->ContextID, OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_16BITS).Fmt16;
+
+    if (FromInput == NULL || ToOutput == NULL) {
+
+        cmsSignalError(xform -> ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported raster format");
+        return FALSE;
+    }
+
+    xform ->InputFormat  = InputFormat;
+    xform ->OutputFormat = OutputFormat;
+    xform ->FromInput    = FromInput;
+    xform ->ToOutput     = ToOutput;
+    return TRUE;
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/lcms2.def b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/lcms2.def
new file mode 100644
index 0000000000..a283b25428
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/lcms2.def
@@ -0,0 +1,342 @@
+LIBRARY     LCMS2.DLL
+
+EXPORTS
+
+_cms15Fixed16toDouble                    =   _cms15Fixed16toDouble
+_cms8Fixed8toDouble                      =   _cms8Fixed8toDouble
+cmsAdaptToIlluminant                     =    cmsAdaptToIlluminant
+_cmsAdjustEndianess16                    =   _cmsAdjustEndianess16
+_cmsAdjustEndianess32                    =   _cmsAdjustEndianess32
+_cmsAdjustEndianess64                    =   _cmsAdjustEndianess64
+cmsAllocNamedColorList                   =   cmsAllocNamedColorList
+cmsAllocProfileSequenceDescription       =   cmsAllocProfileSequenceDescription
+cmsAppendNamedColor                      =   cmsAppendNamedColor
+cmsBFDdeltaE                             =   cmsBFDdeltaE
+cmsBuildGamma                            =   cmsBuildGamma
+cmsBuildParametricToneCurve              =   cmsBuildParametricToneCurve
+cmsBuildSegmentedToneCurve               =   cmsBuildSegmentedToneCurve
+cmsBuildTabulatedToneCurve16             =   cmsBuildTabulatedToneCurve16
+cmsBuildTabulatedToneCurveFloat          =   cmsBuildTabulatedToneCurveFloat
+_cmsCalloc                               =   _cmsCalloc
+cmsChannelsOf                            =    cmsChannelsOf
+cmsCIE2000DeltaE                         =    cmsCIE2000DeltaE
+cmsCIE94DeltaE                           =    cmsCIE94DeltaE
+cmsCIECAM02Done                          =    cmsCIECAM02Done
+cmsCIECAM02Forward                       =    cmsCIECAM02Forward
+cmsCIECAM02Init                          =    cmsCIECAM02Init
+cmsCIECAM02Reverse                       =    cmsCIECAM02Reverse
+cmsCloseIOhandler                        =    cmsCloseIOhandler
+cmsCloseProfile                          =    cmsCloseProfile
+cmsCMCdeltaE                             =    cmsCMCdeltaE
+cmsCreate_sRGBProfile                    =    cmsCreate_sRGBProfile
+cmsCreate_sRGBProfileTHR                 =    cmsCreate_sRGBProfileTHR
+cmsCreateBCHSWabstractProfile            =    cmsCreateBCHSWabstractProfile
+cmsCreateBCHSWabstractProfileTHR         =    cmsCreateBCHSWabstractProfileTHR
+cmsCreateExtendedTransform               =    cmsCreateExtendedTransform
+cmsCreateGrayProfile                     =    cmsCreateGrayProfile
+cmsCreateGrayProfileTHR                  =    cmsCreateGrayProfileTHR
+cmsCreateInkLimitingDeviceLink           =    cmsCreateInkLimitingDeviceLink
+cmsCreateInkLimitingDeviceLinkTHR        =    cmsCreateInkLimitingDeviceLinkTHR
+cmsCreateLab2Profile                     =    cmsCreateLab2Profile
+cmsCreateLab2ProfileTHR                  =    cmsCreateLab2ProfileTHR
+cmsCreateLab4Profile                     =    cmsCreateLab4Profile
+cmsCreateLab4ProfileTHR                  =    cmsCreateLab4ProfileTHR
+cmsCreateLinearizationDeviceLink         =    cmsCreateLinearizationDeviceLink
+cmsCreateLinearizationDeviceLinkTHR      =    cmsCreateLinearizationDeviceLinkTHR
+cmsCreateMultiprofileTransform           =    cmsCreateMultiprofileTransform
+cmsCreateMultiprofileTransformTHR        =    cmsCreateMultiprofileTransformTHR
+cmsCreateNULLProfile                     =    cmsCreateNULLProfile
+cmsCreateNULLProfileTHR                  =    cmsCreateNULLProfileTHR
+cmsCreateProfilePlaceholder              =    cmsCreateProfilePlaceholder
+cmsCreateProofingTransform               =    cmsCreateProofingTransform
+cmsCreateProofingTransformTHR            =    cmsCreateProofingTransformTHR
+cmsCreateRGBProfile                      =    cmsCreateRGBProfile
+cmsCreateRGBProfileTHR                   =    cmsCreateRGBProfileTHR
+cmsCreateTransform                       =    cmsCreateTransform
+cmsCreateTransformTHR                    =    cmsCreateTransformTHR
+cmsCreateXYZProfile                      =    cmsCreateXYZProfile
+cmsCreateXYZProfileTHR                   =    cmsCreateXYZProfileTHR
+cmsD50_xyY                               =    cmsD50_xyY
+cmsD50_XYZ                               =    cmsD50_XYZ
+_cmsDecodeDateTimeNumber                 =    _cmsDecodeDateTimeNumber
+_cmsDefaultICCintents                    =    _cmsDefaultICCintents
+cmsDeleteTransform                       =    cmsDeleteTransform
+cmsDeltaE                                =    cmsDeltaE
+cmsDetectBlackPoint                      =    cmsDetectBlackPoint
+cmsDetectDestinationBlackPoint           =    cmsDetectDestinationBlackPoint
+cmsDetectTAC                             =    cmsDetectTAC
+cmsDesaturateLab                         =    cmsDesaturateLab
+cmsDoTransform                           =    cmsDoTransform
+cmsDoTransformStride                     =    cmsDoTransformStride
+_cmsDoubleTo15Fixed16                    =    _cmsDoubleTo15Fixed16
+_cmsDoubleTo8Fixed8                      =    _cmsDoubleTo8Fixed8
+_cmsDupMem                               =    _cmsDupMem
+cmsDupNamedColorList                     =    cmsDupNamedColorList
+cmsDupProfileSequenceDescription         =    cmsDupProfileSequenceDescription
+cmsDupToneCurve                          =    cmsDupToneCurve
+_cmsEncodeDateTimeNumber                 =    _cmsEncodeDateTimeNumber
+cmsEstimateGamma                         =    cmsEstimateGamma
+cmsGetToneCurveEstimatedTableEntries     =    cmsGetToneCurveEstimatedTableEntries
+cmsGetToneCurveEstimatedTable            =    cmsGetToneCurveEstimatedTable
+cmsEvalToneCurve16                       =    cmsEvalToneCurve16
+cmsEvalToneCurveFloat                    =    cmsEvalToneCurveFloat
+cmsfilelength                            =    cmsfilelength
+cmsFloat2LabEncoded                      =    cmsFloat2LabEncoded
+cmsFloat2LabEncodedV2                    =    cmsFloat2LabEncodedV2
+cmsFloat2XYZEncoded                      =    cmsFloat2XYZEncoded
+cmsFormatterForColorspaceOfProfile       =    cmsFormatterForColorspaceOfProfile
+cmsFormatterForPCSOfProfile              =    cmsFormatterForPCSOfProfile
+_cmsFree                                 =    _cmsFree
+cmsFreeNamedColorList                    =    cmsFreeNamedColorList
+cmsFreeProfileSequenceDescription        =    cmsFreeProfileSequenceDescription
+cmsFreeToneCurve                         =    cmsFreeToneCurve
+cmsFreeToneCurveTriple                   =    cmsFreeToneCurveTriple
+cmsGBDAlloc                              =    cmsGBDAlloc
+cmsGBDFree                               =    cmsGBDFree
+cmsGDBAddPoint                           =    cmsGDBAddPoint
+cmsGDBCheckPoint                         =    cmsGDBCheckPoint
+cmsGDBCompute                            =    cmsGDBCompute
+cmsGetAlarmCodes                         =    cmsGetAlarmCodes
+cmsGetColorSpace                         =    cmsGetColorSpace
+cmsGetDeviceClass                        =    cmsGetDeviceClass
+cmsGetEncodedICCversion                  =    cmsGetEncodedICCversion
+cmsGetHeaderAttributes                   =    cmsGetHeaderAttributes
+cmsGetHeaderCreationDateTime             =    cmsGetHeaderCreationDateTime
+cmsGetHeaderFlags                        =    cmsGetHeaderFlags
+cmsGetHeaderManufacturer                 =    cmsGetHeaderManufacturer
+cmsGetHeaderModel                        =    cmsGetHeaderModel
+cmsGetHeaderProfileID                    =    cmsGetHeaderProfileID
+cmsGetHeaderRenderingIntent              =    cmsGetHeaderRenderingIntent
+cmsGetNamedColorList                     =    cmsGetNamedColorList
+cmsGetPCS                                =    cmsGetPCS
+cmsGetPostScriptColorResource            =    cmsGetPostScriptColorResource
+cmsGetPostScriptCRD                      =    cmsGetPostScriptCRD
+cmsGetPostScriptCSA                      =    cmsGetPostScriptCSA
+cmsGetProfileInfo                        =    cmsGetProfileInfo
+cmsGetProfileInfoASCII                   =    cmsGetProfileInfoASCII
+cmsGetProfileContextID                   =    cmsGetProfileContextID
+cmsGetProfileVersion                     =    cmsGetProfileVersion
+cmsGetSupportedIntents                   =    cmsGetSupportedIntents
+cmsGetTagCount                           =    cmsGetTagCount
+cmsGetTagSignature                       =    cmsGetTagSignature
+cmsGetTransformContextID                 =    cmsGetTransformContextID
+_cmsICCcolorSpace                        =    _cmsICCcolorSpace
+_cmsIOPrintf                             =    _cmsIOPrintf
+cmsIsCLUT                                =    cmsIsCLUT
+cmsIsIntentSupported                     =    cmsIsIntentSupported
+cmsIsMatrixShaper                        =    cmsIsMatrixShaper
+cmsIsTag                                 =    cmsIsTag
+cmsIsToneCurveDescending                 =    cmsIsToneCurveDescending
+cmsIsToneCurveLinear                     =    cmsIsToneCurveLinear
+cmsIsToneCurveMonotonic                  =    cmsIsToneCurveMonotonic
+cmsIsToneCurveMultisegment               =    cmsIsToneCurveMultisegment
+cmsGetToneCurveParametricType            =    cmsGetToneCurveParametricType
+cmsIT8Alloc                              =    cmsIT8Alloc
+cmsIT8DefineDblFormat                    =    cmsIT8DefineDblFormat
+cmsIT8EnumDataFormat                     =    cmsIT8EnumDataFormat
+cmsIT8EnumProperties                     =    cmsIT8EnumProperties
+cmsIT8EnumPropertyMulti                  =    cmsIT8EnumPropertyMulti
+cmsIT8Free                               =    cmsIT8Free
+cmsIT8GetData                            =    cmsIT8GetData
+cmsIT8GetDataDbl                         =    cmsIT8GetDataDbl
+cmsIT8FindDataFormat                     =    cmsIT8FindDataFormat
+cmsIT8GetDataRowCol                      =    cmsIT8GetDataRowCol
+cmsIT8GetDataRowColDbl                   =    cmsIT8GetDataRowColDbl
+cmsIT8GetPatchName                       =    cmsIT8GetPatchName
+cmsIT8GetPatchByName                     =    cmsIT8GetPatchByName
+cmsIT8GetProperty                        =    cmsIT8GetProperty
+cmsIT8GetPropertyDbl                     =    cmsIT8GetPropertyDbl
+cmsIT8GetPropertyMulti                   =    cmsIT8GetPropertyMulti
+cmsIT8GetSheetType                       =    cmsIT8GetSheetType
+cmsIT8LoadFromFile                       =    cmsIT8LoadFromFile
+cmsIT8LoadFromMem                        =    cmsIT8LoadFromMem
+cmsIT8SaveToFile                         =    cmsIT8SaveToFile
+cmsIT8SaveToMem                          =    cmsIT8SaveToMem
+cmsIT8SetComment                         =    cmsIT8SetComment
+cmsIT8SetData                            =    cmsIT8SetData
+cmsIT8SetDataDbl                         =    cmsIT8SetDataDbl
+cmsIT8SetDataFormat                      =    cmsIT8SetDataFormat
+cmsIT8SetDataRowCol                      =    cmsIT8SetDataRowCol
+cmsIT8SetDataRowColDbl                   =    cmsIT8SetDataRowColDbl
+cmsIT8SetPropertyDbl                     =    cmsIT8SetPropertyDbl
+cmsIT8SetPropertyHex                     =    cmsIT8SetPropertyHex
+cmsIT8SetPropertyStr                     =    cmsIT8SetPropertyStr
+cmsIT8SetPropertyMulti                   =    cmsIT8SetPropertyMulti
+cmsIT8SetPropertyUncooked                =    cmsIT8SetPropertyUncooked
+cmsIT8SetSheetType                       =    cmsIT8SetSheetType
+cmsIT8SetTable                           =    cmsIT8SetTable
+cmsIT8SetTableByLabel                    =    cmsIT8SetTableByLabel
+cmsIT8SetIndexColumn                     =    cmsIT8SetIndexColumn
+cmsIT8TableCount                         =    cmsIT8TableCount
+cmsJoinToneCurve                         =    cmsJoinToneCurve
+cmsLab2LCh                               =    cmsLab2LCh
+cmsLab2XYZ                               =    cmsLab2XYZ
+cmsLabEncoded2Float                      =    cmsLabEncoded2Float
+cmsLabEncoded2FloatV2                    =    cmsLabEncoded2FloatV2
+cmsLCh2Lab                               =    cmsLCh2Lab
+_cmsLCMScolorSpace                       =    _cmsLCMScolorSpace
+cmsLinkTag                               =    cmsLinkTag
+cmsTagLinkedTo                           =    cmsTagLinkedTo
+cmsPipelineAlloc                         =    cmsPipelineAlloc
+cmsPipelineCat                           =    cmsPipelineCat
+cmsPipelineCheckAndRetreiveStages        =    cmsPipelineCheckAndRetreiveStages
+cmsPipelineDup                           =    cmsPipelineDup
+cmsPipelineStageCount                    =    cmsPipelineStageCount
+cmsPipelineEval16                        =    cmsPipelineEval16
+cmsPipelineEvalFloat                     =    cmsPipelineEvalFloat
+cmsPipelineEvalReverseFloat              =    cmsPipelineEvalReverseFloat
+cmsPipelineFree                          =    cmsPipelineFree
+cmsPipelineGetPtrToFirstStage            =    cmsPipelineGetPtrToFirstStage
+cmsPipelineGetPtrToLastStage             =    cmsPipelineGetPtrToLastStage
+cmsPipelineInputChannels                 =    cmsPipelineInputChannels
+cmsPipelineInsertStage                   =    cmsPipelineInsertStage
+cmsPipelineOutputChannels                =    cmsPipelineOutputChannels
+cmsPipelineSetSaveAs8bitsFlag            =    cmsPipelineSetSaveAs8bitsFlag
+_cmsPipelineSetOptimizationParameters    =    _cmsPipelineSetOptimizationParameters
+cmsPipelineUnlinkStage                   =    cmsPipelineUnlinkStage
+_cmsMalloc                               =    _cmsMalloc
+_cmsMallocZero                           =    _cmsMallocZero
+_cmsMAT3eval                             =    _cmsMAT3eval
+_cmsMAT3identity                         =    _cmsMAT3identity
+_cmsMAT3inverse                          =    _cmsMAT3inverse
+_cmsMAT3isIdentity                       =    _cmsMAT3isIdentity
+_cmsMAT3per                              =    _cmsMAT3per
+_cmsMAT3solve                            =    _cmsMAT3solve
+cmsMLUalloc                              =    cmsMLUalloc
+cmsMLUdup                                =    cmsMLUdup
+cmsMLUfree                               =    cmsMLUfree
+cmsMLUgetASCII                           =    cmsMLUgetASCII
+cmsMLUgetTranslation                     =    cmsMLUgetTranslation
+cmsMLUgetWide                            =    cmsMLUgetWide
+cmsMLUsetASCII                           =    cmsMLUsetASCII
+cmsMLUsetWide                            =    cmsMLUsetWide
+cmsStageAllocCLut16bit                   =    cmsStageAllocCLut16bit
+cmsStageAllocCLut16bitGranular           =    cmsStageAllocCLut16bitGranular
+cmsStageAllocCLutFloat                   =    cmsStageAllocCLutFloat
+cmsStageAllocCLutFloatGranular           =    cmsStageAllocCLutFloatGranular
+cmsStageAllocToneCurves                  =    cmsStageAllocToneCurves
+cmsStageAllocIdentity                    =    cmsStageAllocIdentity
+cmsStageAllocMatrix                      =    cmsStageAllocMatrix
+_cmsStageAllocPlaceholder                =    _cmsStageAllocPlaceholder
+cmsStageDup                              =    cmsStageDup
+cmsStageFree                             =    cmsStageFree
+cmsStageNext                             =    cmsStageNext
+cmsStageInputChannels                    =    cmsStageInputChannels
+cmsStageOutputChannels                   =    cmsStageOutputChannels
+cmsStageSampleCLut16bit                  =    cmsStageSampleCLut16bit
+cmsStageSampleCLutFloat                  =    cmsStageSampleCLutFloat
+cmsStageType                             =    cmsStageType
+cmsStageData                             =    cmsStageData
+cmsNamedColorCount                       =    cmsNamedColorCount
+cmsNamedColorIndex                       =    cmsNamedColorIndex
+cmsNamedColorInfo                        =    cmsNamedColorInfo
+cmsOpenIOhandlerFromFile                 =    cmsOpenIOhandlerFromFile
+cmsOpenIOhandlerFromMem                  =    cmsOpenIOhandlerFromMem
+cmsOpenIOhandlerFromNULL                 =    cmsOpenIOhandlerFromNULL
+cmsOpenIOhandlerFromStream               =    cmsOpenIOhandlerFromStream
+cmsOpenProfileFromFile                   =    cmsOpenProfileFromFile
+cmsOpenProfileFromFileTHR                =    cmsOpenProfileFromFileTHR
+cmsOpenProfileFromIOhandlerTHR           =    cmsOpenProfileFromIOhandlerTHR
+cmsOpenProfileFromMem                    =    cmsOpenProfileFromMem
+cmsOpenProfileFromMemTHR                 =    cmsOpenProfileFromMemTHR
+cmsOpenProfileFromStream                 =    cmsOpenProfileFromStream
+cmsOpenProfileFromStreamTHR              =    cmsOpenProfileFromStreamTHR
+cmsPlugin                                =    cmsPlugin
+_cmsRead15Fixed16Number                  =    _cmsRead15Fixed16Number
+_cmsReadAlignment                        =    _cmsReadAlignment
+_cmsReadFloat32Number                    =    _cmsReadFloat32Number
+cmsReadRawTag                            =    cmsReadRawTag
+cmsReadTag                               =    cmsReadTag
+_cmsReadTypeBase                         =    _cmsReadTypeBase
+_cmsReadUInt16Array                      =    _cmsReadUInt16Array
+_cmsReadUInt16Number                     =    _cmsReadUInt16Number
+_cmsReadUInt32Number                     =    _cmsReadUInt32Number
+_cmsReadUInt64Number                     =    _cmsReadUInt64Number
+_cmsReadUInt8Number                      =    _cmsReadUInt8Number
+_cmsReadXYZNumber                        =    _cmsReadXYZNumber
+_cmsRealloc                              =    _cmsRealloc
+cmsReverseToneCurve                      =    cmsReverseToneCurve
+cmsReverseToneCurveEx                    =    cmsReverseToneCurveEx
+cmsSaveProfileToFile                     =    cmsSaveProfileToFile
+cmsSaveProfileToIOhandler                =    cmsSaveProfileToIOhandler
+cmsSaveProfileToMem                      =    cmsSaveProfileToMem
+cmsSaveProfileToStream                   =    cmsSaveProfileToStream
+cmsSetAdaptationState                    =    cmsSetAdaptationState
+cmsSetAlarmCodes                         =    cmsSetAlarmCodes
+cmsSetColorSpace                         =    cmsSetColorSpace
+cmsSetDeviceClass                        =    cmsSetDeviceClass
+cmsSetEncodedICCversion                  =    cmsSetEncodedICCversion
+cmsSetHeaderAttributes                   =    cmsSetHeaderAttributes
+cmsSetHeaderFlags                        =    cmsSetHeaderFlags
+cmsSetHeaderManufacturer                 =    cmsSetHeaderManufacturer
+cmsSetHeaderModel                        =    cmsSetHeaderModel
+cmsSetHeaderProfileID                    =    cmsSetHeaderProfileID
+cmsSetHeaderRenderingIntent              =    cmsSetHeaderRenderingIntent
+cmsSetLogErrorHandler                    =    cmsSetLogErrorHandler
+cmsSetPCS                                =    cmsSetPCS
+cmsSetProfileVersion                     =    cmsSetProfileVersion
+cmsSignalError                           =    cmsSignalError
+cmsSmoothToneCurve                       =    cmsSmoothToneCurve
+cmsstrcasecmp                            =    cmsstrcasecmp
+cmsTempFromWhitePoint                    =    cmsTempFromWhitePoint
+cmsTransform2DeviceLink                  =    cmsTransform2DeviceLink
+cmsUnregisterPlugins                     =    cmsUnregisterPlugins
+_cmsVEC3cross                            =    _cmsVEC3cross
+_cmsVEC3distance                         =    _cmsVEC3distance
+_cmsVEC3dot                              =    _cmsVEC3dot
+_cmsVEC3init                             =    _cmsVEC3init
+_cmsVEC3length                           =    _cmsVEC3length
+_cmsVEC3minus                            =    _cmsVEC3minus
+cmsWhitePointFromTemp                    =    cmsWhitePointFromTemp
+_cmsWrite15Fixed16Number                 =    _cmsWrite15Fixed16Number
+_cmsWriteAlignment                       =    _cmsWriteAlignment
+_cmsWriteFloat32Number                   =    _cmsWriteFloat32Number
+cmsWriteRawTag                           =    cmsWriteRawTag
+cmsWriteTag                              =    cmsWriteTag
+_cmsWriteTypeBase                        =    _cmsWriteTypeBase
+_cmsWriteUInt16Array                     =    _cmsWriteUInt16Array
+_cmsWriteUInt16Number                    =    _cmsWriteUInt16Number
+_cmsWriteUInt32Number                    =    _cmsWriteUInt32Number
+_cmsWriteUInt64Number                    =    _cmsWriteUInt64Number
+_cmsWriteUInt8Number                     =    _cmsWriteUInt8Number
+_cmsWriteXYZNumber                       =    _cmsWriteXYZNumber
+cmsxyY2XYZ                               =   cmsxyY2XYZ
+cmsXYZ2Lab                               =   cmsXYZ2Lab
+cmsXYZ2xyY                               =   cmsXYZ2xyY
+cmsXYZEncoded2Float                      =   cmsXYZEncoded2Float
+cmsSliceSpace16                          =   cmsSliceSpace16
+cmsSliceSpaceFloat                       =   cmsSliceSpaceFloat
+cmsChangeBuffersFormat                   =   cmsChangeBuffersFormat
+cmsDictAlloc                             =   cmsDictAlloc
+cmsDictFree                              =   cmsDictFree
+cmsDictDup                               =   cmsDictDup
+cmsDictAddEntry                          =   cmsDictAddEntry
+cmsDictGetEntryList                      =   cmsDictGetEntryList
+cmsDictNextEntry                         =   cmsDictNextEntry
+_cmsGetTransformUserData                 =   _cmsGetTransformUserData
+_cmsSetTransformUserData                 =   _cmsSetTransformUserData
+_cmsGetTransformFormatters16             =   _cmsGetTransformFormatters16
+_cmsGetTransformFormattersFloat          =   _cmsGetTransformFormattersFloat
+cmsGetHeaderCreator                      =   cmsGetHeaderCreator
+cmsPluginTHR                             =   cmsPluginTHR
+cmsGetPipelineContextID                  =   cmsGetPipelineContextID
+cmsGetTransformInputFormat               =   cmsGetTransformInputFormat
+cmsGetTransformOutputFormat              =   cmsGetTransformOutputFormat
+cmsCreateContext                         =   cmsCreateContext            
+cmsDupContext                            =   cmsDupContext               
+cmsDeleteContext                         =   cmsDeleteContext              
+cmsGetContextUserData                    =   cmsGetContextUserData       
+cmsUnregisterPluginsTHR                  =   cmsUnregisterPluginsTHR 
+cmsSetAlarmCodesTHR                      =   cmsSetAlarmCodesTHR     
+cmsGetAlarmCodesTHR                      =   cmsGetAlarmCodesTHR
+cmsSetAdaptationStateTHR                 =   cmsSetAdaptationStateTHR
+cmsSetLogErrorHandlerTHR                 =   cmsSetLogErrorHandlerTHR
+cmsGetSupportedIntentsTHR                =   cmsGetSupportedIntentsTHR
+cmsMLUtranslationsCount                  =   cmsMLUtranslationsCount
+cmsMLUtranslationsCodes                  =   cmsMLUtranslationsCodes
+_cmsCreateMutex                          =   _cmsCreateMutex 
+_cmsDestroyMutex                         =   _cmsDestroyMutex
+_cmsLockMutex                            =   _cmsLockMutex   
+_cmsUnlockMutex                          =   _cmsUnlockMutex 
+cmsGetProfileIOhandler                   =   cmsGetProfileIOhandler
+cmsGetEncodedCMMversion                  =   cmsGetEncodedCMMversion
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/lcms2_internal.h b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/lcms2_internal.h
new file mode 100644
index 0000000000..ceab205685
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lcms/src/lcms2_internal.h
@@ -0,0 +1,1118 @@
+
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#ifndef _lcms_internal_H
+
+// Include plug-in foundation
+#ifndef _lcms_plugin_H
+#   include "lcms2_plugin.h"
+#endif
+
+// ctype is part of C99 as per 7.1.2
+#include <ctype.h>
+
+// assert macro is part of C99 as per 7.2
+#include <assert.h>
+
+// Some needed constants
+#ifndef M_PI
+#       define M_PI        3.14159265358979323846
+#endif
+
+#ifndef M_LOG10E
+#       define M_LOG10E    0.434294481903251827651
+#endif
+
+// BorlandC 5.5, VC2003 are broken on that
+#if defined(__BORLANDC__) || (_MSC_VER < 1400) // 1400 == VC++ 8.0
+#define sinf(x) (float)sin((float)x)
+#define sqrtf(x) (float)sqrt((float)x)
+#endif
+
+
+// Alignment of ICC file format uses 4 bytes (cmsUInt32Number)
+#define _cmsALIGNLONG(x) (((x)+(sizeof(cmsUInt32Number)-1)) & ~(sizeof(cmsUInt32Number)-1))
+
+// Alignment to memory pointer
+
+// (Ultra)SPARC with gcc requires ptr alignment of 8 bytes
+// even though sizeof(void *) is only four: for greatest flexibility
+// allow the build to specify ptr alignment.
+#ifndef CMS_PTR_ALIGNMENT
+# define CMS_PTR_ALIGNMENT sizeof(void *)
+#endif
+
+#define _cmsALIGNMEM(x)  (((x)+(CMS_PTR_ALIGNMENT - 1)) & ~(CMS_PTR_ALIGNMENT - 1))
+
+// Maximum encodeable values in floating point
+#define MAX_ENCODEABLE_XYZ  (1.0 + 32767.0/32768.0)
+#define MIN_ENCODEABLE_ab2  (-128.0)
+#define MAX_ENCODEABLE_ab2  ((65535.0/256.0) - 128.0)
+#define MIN_ENCODEABLE_ab4  (-128.0)
+#define MAX_ENCODEABLE_ab4  (127.0)
+
+// Maximum of channels for internal pipeline evaluation
+#define MAX_STAGE_CHANNELS  128
+
+// Unused parameter warning suppression
+#define cmsUNUSED_PARAMETER(x) ((void)x)
+
+// The specification for "inline" is section 6.7.4 of the C99 standard (ISO/IEC 9899:1999).
+// unfortunately VisualC++ does not conform that
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+#   define cmsINLINE __inline
+#else
+#   define cmsINLINE static inline
+#endif
+
+// Allow signed overflow, we know this is harmless in this particular context 
+#if defined(__clang__)
+#   define CMS_NO_SANITIZE __attribute__((no_sanitize("signed-integer-overflow")))
+#else
+#   define CMS_NO_SANITIZE 
+#endif
+
+// Other replacement functions
+#ifdef _MSC_VER
+# ifndef snprintf
+#       define snprintf  _snprintf
+# endif
+# ifndef vsnprintf
+#       define vsnprintf  _vsnprintf
+# endif
+
+/// Properly define some macros to accommodate
+/// older MSVC versions.
+# if _MSC_VER <= 1700
+        #include <float.h>
+        #define isnan _isnan
+        #define isinf(x) (!_finite((x)))
+# endif
+
+#endif
+
+// A fast way to convert from/to 16 <-> 8 bits
+#define FROM_8_TO_16(rgb) (cmsUInt16Number) ((((cmsUInt16Number) (rgb)) << 8)|(rgb))
+#define FROM_16_TO_8(rgb) (cmsUInt8Number) ((((cmsUInt32Number)(rgb) * 65281U + 8388608U) >> 24) & 0xFFU)
+
+// Code analysis is broken on asserts
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1500)
+#            define _cmsAssert(a)  { assert((a)); __analysis_assume((a)); }
+#     else
+#            define _cmsAssert(a)   assert((a))
+#     endif
+#else
+#      define _cmsAssert(a)   assert((a))
+#endif
+
+//---------------------------------------------------------------------------------
+
+// Determinant lower than that are assumed zero (used on matrix invert)
+#define MATRIX_DET_TOLERANCE    0.0001
+
+//---------------------------------------------------------------------------------
+
+// Fixed point
+#define FIXED_TO_INT(x)         ((x)>>16)
+#define FIXED_REST_TO_INT(x)    ((x)&0xFFFFU)
+#define ROUND_FIXED_TO_INT(x)   (((x)+0x8000)>>16)
+
+cmsINLINE cmsS15Fixed16Number _cmsToFixedDomain(int a)                   { return a + ((a + 0x7fff) / 0xffff); }
+cmsINLINE int                 _cmsFromFixedDomain(cmsS15Fixed16Number a) { return a - ((a + 0x7fff) >> 16); }
+
+// -----------------------------------------------------------------------------------------------------------
+
+// Fast floor conversion logic. Thanks to Sree Kotay and Stuart Nixon
+// note than this only works in the range ..-32767...+32767 because
+// mantissa is interpreted as 15.16 fixed point.
+// The union is to avoid pointer aliasing overoptimization.
+cmsINLINE int _cmsQuickFloor(cmsFloat64Number val)
+{
+#ifdef CMS_DONT_USE_FAST_FLOOR
+    return (int) floor(val);
+#else
+    const cmsFloat64Number _lcms_double2fixmagic = 68719476736.0 * 1.5;  // 2^36 * 1.5, (52-16=36) uses limited precision to floor
+    union {
+        cmsFloat64Number val;
+        int halves[2];
+    } temp;
+
+    temp.val = val + _lcms_double2fixmagic;
+
+#ifdef CMS_USE_BIG_ENDIAN
+    return temp.halves[1] >> 16;
+#else
+    return temp.halves[0] >> 16;
+#endif
+#endif
+}
+
+// Fast floor restricted to 0..65535.0
+cmsINLINE cmsUInt16Number _cmsQuickFloorWord(cmsFloat64Number d)
+{
+    return (cmsUInt16Number) _cmsQuickFloor(d - 32767.0) + 32767U;
+}
+
+// Floor to word, taking care of saturation
+cmsINLINE cmsUInt16Number _cmsQuickSaturateWord(cmsFloat64Number d)
+{
+    d += 0.5;
+    if (d <= 0) return 0;
+    if (d >= 65535.0) return 0xffff;
+
+    return _cmsQuickFloorWord(d);
+}
+
+// Test bed entry points---------------------------------------------------------------
+#define CMSCHECKPOINT CMSAPI
+
+// Pthread support --------------------------------------------------------------------
+#ifndef CMS_NO_PTHREADS
+
+// This is the threading support. Unfortunately, it has to be platform-dependent because 
+// windows does not support pthreads. 
+#ifdef CMS_IS_WINDOWS_
+
+#define WIN32_LEAN_AND_MEAN 1
+#include <windows.h>
+
+
+// The locking scheme in LCMS requires a single 'top level' mutex
+// to work. This is actually implemented on Windows as a
+// CriticalSection, because they are lighter weight. With
+// pthreads, this is statically inited. Unfortunately, windows
+// can't officially statically init critical sections.
+//
+// We can work around this in 2 ways.
+//
+// 1) We can use a proper mutex purely to protect the init
+// of the CriticalSection. This in turns requires us to protect
+// the Mutex creation, which we can do using the snappily
+// named InterlockedCompareExchangePointer API (present on
+// windows XP and above).
+//
+// 2) In cases where we want to work on pre-Windows XP, we
+// can use an even more horrible hack described below.
+//
+// So why wouldn't we always use 2)? Because not calling
+// the init function for a critical section means it fails
+// testing with ApplicationVerifier (and presumably similar
+// tools).
+//
+// We therefore default to 1, and people who want to be able
+// to run on pre-Windows XP boxes can build with:
+//     CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+// defined. This is automatically set for builds using
+// versions of MSVC that don't have this API available.
+//
+// From: http://locklessinc.com/articles/pthreads_on_windows/
+// The pthreads API has an initialization macro that has no correspondence to anything in 
+// the windows API. By investigating the internal definition of the critical section type, 
+// one may work out how to initialize one without calling InitializeCriticalSection(). 
+// The trick here is that InitializeCriticalSection() is not allowed to fail. It tries 
+// to allocate a critical section debug object, but if no memory is available, it sets 
+// the pointer to a specific value. (One would expect that value to be NULL, but it is 
+// actually (void *)-1 for some reason.) Thus we can use this special value for that 
+// pointer, and the critical section code will work.
+
+// The other important part of the critical section type to initialize is the number 
+// of waiters. This controls whether or not the mutex is locked. Fortunately, this 
+// part of the critical section is unlikely to change. Apparently, many programs 
+// already test critical sections to see if they are locked using this value, so 
+// Microsoft felt that it was necessary to keep it set at -1 for an unlocked critical
+// section, even when they changed the underlying algorithm to be more scalable. 
+// The final parts of the critical section object are unimportant, and can be set 
+// to zero for their defaults. This yields to an initialization macro:
+
+typedef CRITICAL_SECTION _cmsMutex;
+
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1800)
+#          pragma warning(disable : 26135)
+#    endif
+#endif
+
+#ifndef CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+// If we are building with a version of MSVC smaller
+// than 1400 (i.e. before VS2005) then we don't have
+// the InterlockedCompareExchangePointer API, so use
+// the old version.
+#    ifdef _MSC_VER
+#       if _MSC_VER < 1400
+#          define CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+#       endif
+#    endif
+#endif
+
+#ifdef CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+#      define CMS_MUTEX_INITIALIZER {(PRTL_CRITICAL_SECTION_DEBUG) -1,-1,0,0,0,0}
+#else
+#      define CMS_MUTEX_INITIALIZER {(PRTL_CRITICAL_SECTION_DEBUG)NULL,-1,0,0,0,0}
+#endif
+
+cmsINLINE int _cmsLockPrimitive(_cmsMutex *m)
+{
+	EnterCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsUnlockPrimitive(_cmsMutex *m)
+{
+	LeaveCriticalSection(m);
+	return 0;
+}
+	
+cmsINLINE int _cmsInitMutexPrimitive(_cmsMutex *m)
+{
+	InitializeCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsDestroyMutexPrimitive(_cmsMutex *m)
+{
+	DeleteCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsEnterCriticalSectionPrimitive(_cmsMutex *m)
+{
+	EnterCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsLeaveCriticalSectionPrimitive(_cmsMutex *m)
+{
+	LeaveCriticalSection(m);
+	return 0;
+}
+
+#else
+
+// Rest of the wide world
+#include <pthread.h>
+
+#define CMS_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+typedef pthread_mutex_t _cmsMutex;
+
+
+cmsINLINE int _cmsLockPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_lock(m);
+}
+
+cmsINLINE int _cmsUnlockPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_unlock(m);
+}
+	
+cmsINLINE int _cmsInitMutexPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_init(m, NULL);
+}
+
+cmsINLINE int _cmsDestroyMutexPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_destroy(m);
+}
+
+cmsINLINE int _cmsEnterCriticalSectionPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_lock(m);
+}
+
+cmsINLINE int _cmsLeaveCriticalSectionPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_unlock(m);
+}
+
+#endif
+#else
+
+#define CMS_MUTEX_INITIALIZER 0
+typedef int _cmsMutex;
+
+
+cmsINLINE int _cmsLockPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsUnlockPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+	
+cmsINLINE int _cmsInitMutexPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsDestroyMutexPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsEnterCriticalSectionPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsLeaveCriticalSectionPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+#endif
+
+// Plug-In registration ---------------------------------------------------------------
+
+// Specialized function for plug-in memory management. No pairing free() since whole pool is freed at once.
+void* _cmsPluginMalloc(cmsContext ContextID, cmsUInt32Number size);
+
+// Memory management
+cmsBool   _cmsRegisterMemHandlerPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Interpolation
+cmsBool  _cmsRegisterInterpPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Parametric curves
+cmsBool  _cmsRegisterParametricCurvesPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Formatters management
+cmsBool  _cmsRegisterFormattersPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Tag type management
+cmsBool  _cmsRegisterTagTypePlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Tag management
+cmsBool  _cmsRegisterTagPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Intent management
+cmsBool  _cmsRegisterRenderingIntentPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Multi Process elements
+cmsBool  _cmsRegisterMultiProcessElementPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Optimization
+cmsBool  _cmsRegisterOptimizationPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Transform
+cmsBool  _cmsRegisterTransformPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Mutex
+cmsBool _cmsRegisterMutexPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// ---------------------------------------------------------------------------------------------------------
+
+// Suballocators. 
+typedef struct _cmsSubAllocator_chunk_st {
+
+    cmsUInt8Number* Block;
+    cmsUInt32Number BlockSize;
+    cmsUInt32Number Used;
+
+    struct _cmsSubAllocator_chunk_st* next;
+
+} _cmsSubAllocator_chunk;
+
+
+typedef struct {
+
+    cmsContext ContextID;
+    _cmsSubAllocator_chunk* h;
+
+} _cmsSubAllocator;
+
+
+_cmsSubAllocator* _cmsCreateSubAlloc(cmsContext ContextID, cmsUInt32Number Initial);
+void              _cmsSubAllocDestroy(_cmsSubAllocator* s);
+void*             _cmsSubAlloc(_cmsSubAllocator* s, cmsUInt32Number size);
+void*             _cmsSubAllocDup(_cmsSubAllocator* s, const void *ptr, cmsUInt32Number size);
+
+// ----------------------------------------------------------------------------------
+
+// The context clients. 
+typedef enum {
+
+    UserPtr,            // User-defined pointer
+    Logger,
+    AlarmCodesContext,
+    AdaptationStateContext, 
+    MemPlugin,
+    InterpPlugin,
+    CurvesPlugin,
+    FormattersPlugin,
+    TagTypePlugin,
+    TagPlugin,
+    IntentPlugin,
+    MPEPlugin,
+    OptimizationPlugin,
+    TransformPlugin,
+    MutexPlugin,
+
+    // Last in list
+    MemoryClientMax
+
+} _cmsMemoryClient;
+
+
+// Container for memory management plug-in.
+typedef struct {
+
+    _cmsMallocFnPtrType     MallocPtr;    
+    _cmsMalloZerocFnPtrType MallocZeroPtr;
+    _cmsFreeFnPtrType       FreePtr;
+    _cmsReallocFnPtrType    ReallocPtr;
+    _cmsCallocFnPtrType     CallocPtr;
+    _cmsDupFnPtrType        DupPtr;
+
+} _cmsMemPluginChunkType;
+
+// Copy memory management function pointers from plug-in to chunk, taking care of missing routines
+void  _cmsInstallAllocFunctions(cmsPluginMemHandler* Plugin, _cmsMemPluginChunkType* ptr);
+
+// Internal structure for context
+struct _cmsContext_struct {
+    
+    struct _cmsContext_struct* Next;  // Points to next context in the new style
+    _cmsSubAllocator* MemPool;        // The memory pool that stores context data
+    
+    void* chunks[MemoryClientMax];    // array of pointers to client chunks. Memory itself is hold in the suballocator. 
+                                      // If NULL, then it reverts to global Context0
+
+    _cmsMemPluginChunkType DefaultMemoryManager;  // The allocators used for creating the context itself. Cannot be overridden
+};
+
+// Returns a pointer to a valid context structure, including the global one if id is zero. 
+// Verifies the magic number.
+struct _cmsContext_struct* _cmsGetContext(cmsContext ContextID);
+
+// Returns the block assigned to the specific zone. 
+void*     _cmsContextGetClientChunk(cmsContext id, _cmsMemoryClient mc);
+
+
+// Chunks of context memory by plug-in client -------------------------------------------------------
+
+// Those structures encapsulates all variables needed by the several context clients (mostly plug-ins)
+
+// Container for error logger -- not a plug-in
+typedef struct {
+
+    cmsLogErrorHandlerFunction LogErrorHandler;  // Set to NULL for Context0 fallback
+
+} _cmsLogErrorChunkType;
+
+// The global Context0 storage for error logger
+extern  _cmsLogErrorChunkType  _cmsLogErrorChunk;
+
+// Allocate and init error logger container. 
+void _cmsAllocLogErrorChunk(struct _cmsContext_struct* ctx, 
+                            const struct _cmsContext_struct* src);
+
+// Container for alarm codes -- not a plug-in
+typedef struct {
+   
+    cmsUInt16Number AlarmCodes[cmsMAXCHANNELS];
+
+} _cmsAlarmCodesChunkType;
+
+// The global Context0 storage for alarm codes
+extern  _cmsAlarmCodesChunkType _cmsAlarmCodesChunk;
+
+// Allocate and init alarm codes container. 
+void _cmsAllocAlarmCodesChunk(struct _cmsContext_struct* ctx, 
+                            const struct _cmsContext_struct* src);
+
+// Container for adaptation state -- not a plug-in
+typedef struct {
+    
+    cmsFloat64Number  AdaptationState;
+
+} _cmsAdaptationStateChunkType;
+
+// The global Context0 storage for adaptation state
+extern  _cmsAdaptationStateChunkType    _cmsAdaptationStateChunk;
+
+// Allocate and init adaptation state container.
+void _cmsAllocAdaptationStateChunk(struct _cmsContext_struct* ctx, 
+                                   const struct _cmsContext_struct* src);
+
+
+// The global Context0 storage for memory management
+extern  _cmsMemPluginChunkType _cmsMemPluginChunk;
+
+// Allocate and init memory management container.
+void _cmsAllocMemPluginChunk(struct _cmsContext_struct* ctx, 
+                             const struct _cmsContext_struct* src);
+
+// Container for interpolation plug-in
+typedef struct {
+
+    cmsInterpFnFactory Interpolators;
+
+} _cmsInterpPluginChunkType;
+
+// The global Context0 storage for interpolation plug-in
+extern  _cmsInterpPluginChunkType _cmsInterpPluginChunk;
+
+// Allocate and init interpolation container.
+void _cmsAllocInterpPluginChunk(struct _cmsContext_struct* ctx, 
+                                const struct _cmsContext_struct* src);
+
+// Container for parametric curves plug-in
+typedef struct {
+
+    struct _cmsParametricCurvesCollection_st* ParametricCurves;
+
+} _cmsCurvesPluginChunkType;
+
+// The global Context0 storage for tone curves plug-in
+extern  _cmsCurvesPluginChunkType _cmsCurvesPluginChunk;
+
+// Allocate and init parametric curves container.
+void _cmsAllocCurvesPluginChunk(struct _cmsContext_struct* ctx, 
+                                                      const struct _cmsContext_struct* src);
+
+// Container for formatters plug-in
+typedef struct {
+
+    struct _cms_formatters_factory_list* FactoryList;
+
+} _cmsFormattersPluginChunkType;
+
+// The global Context0 storage for formatters plug-in
+extern  _cmsFormattersPluginChunkType _cmsFormattersPluginChunk;
+
+// Allocate and init formatters container.
+void _cmsAllocFormattersPluginChunk(struct _cmsContext_struct* ctx, 
+                                                       const struct _cmsContext_struct* src);
+
+// This chunk type is shared by TagType plug-in and MPE Plug-in
+typedef struct {
+
+    struct _cmsTagTypeLinkedList_st* TagTypes;
+
+} _cmsTagTypePluginChunkType;
+
+
+// The global Context0 storage for tag types plug-in
+extern  _cmsTagTypePluginChunkType      _cmsTagTypePluginChunk;
+
+
+// The global Context0 storage for mult process elements plug-in
+extern  _cmsTagTypePluginChunkType      _cmsMPETypePluginChunk;
+
+// Allocate and init Tag types container.
+void _cmsAllocTagTypePluginChunk(struct _cmsContext_struct* ctx, 
+                                                        const struct _cmsContext_struct* src);
+// Allocate and init MPE container.
+void _cmsAllocMPETypePluginChunk(struct _cmsContext_struct* ctx, 
+                                                        const struct _cmsContext_struct* src);
+// Container for tag plug-in
+typedef struct {
+   
+    struct _cmsTagLinkedList_st* Tag;
+
+} _cmsTagPluginChunkType;
+
+
+// The global Context0 storage for tag plug-in
+extern  _cmsTagPluginChunkType _cmsTagPluginChunk;
+
+// Allocate and init Tag container.
+void _cmsAllocTagPluginChunk(struct _cmsContext_struct* ctx, 
+                                                      const struct _cmsContext_struct* src); 
+
+// Container for intents plug-in
+typedef struct {
+
+    struct _cms_intents_list* Intents;
+
+} _cmsIntentsPluginChunkType;
+
+
+// The global Context0 storage for intents plug-in
+extern  _cmsIntentsPluginChunkType _cmsIntentsPluginChunk;
+
+// Allocate and init intents container.
+void _cmsAllocIntentsPluginChunk(struct _cmsContext_struct* ctx, 
+                                                        const struct _cmsContext_struct* src); 
+
+// Container for optimization plug-in
+typedef struct {
+
+    struct _cmsOptimizationCollection_st* OptimizationCollection;
+
+} _cmsOptimizationPluginChunkType;
+
+
+// The global Context0 storage for optimizers plug-in
+extern  _cmsOptimizationPluginChunkType _cmsOptimizationPluginChunk;
+
+// Allocate and init optimizers container.
+void _cmsAllocOptimizationPluginChunk(struct _cmsContext_struct* ctx, 
+                                         const struct _cmsContext_struct* src);
+
+// Container for transform plug-in
+typedef struct {
+
+    struct _cmsTransformCollection_st* TransformCollection;
+
+} _cmsTransformPluginChunkType;
+
+// The global Context0 storage for full-transform replacement plug-in
+extern  _cmsTransformPluginChunkType _cmsTransformPluginChunk;
+
+// Allocate and init transform container.
+void _cmsAllocTransformPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src);
+
+// Container for mutex plug-in
+typedef struct {
+
+    _cmsCreateMutexFnPtrType  CreateMutexPtr;
+    _cmsDestroyMutexFnPtrType DestroyMutexPtr;
+    _cmsLockMutexFnPtrType    LockMutexPtr;
+    _cmsUnlockMutexFnPtrType  UnlockMutexPtr;
+
+} _cmsMutexPluginChunkType;
+
+// The global Context0 storage for mutex plug-in
+extern  _cmsMutexPluginChunkType _cmsMutexPluginChunk;
+
+// Allocate and init mutex container.
+void _cmsAllocMutexPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src);
+
+// ----------------------------------------------------------------------------------
+// MLU internal representation
+typedef struct {
+
+    cmsUInt16Number Language;
+    cmsUInt16Number Country;
+
+    cmsUInt32Number StrW;       // Offset to current unicode string
+    cmsUInt32Number Len;        // Length in bytes
+
+} _cmsMLUentry;
+
+struct _cms_MLU_struct {
+
+    cmsContext ContextID;
+
+    // The directory
+    cmsUInt32Number  AllocatedEntries;
+    cmsUInt32Number  UsedEntries;
+    _cmsMLUentry* Entries;     // Array of pointers to strings allocated in MemPool
+
+    // The Pool
+    cmsUInt32Number PoolSize;  // The maximum allocated size
+    cmsUInt32Number PoolUsed;  // The used size
+    void*  MemPool;            // Pointer to begin of memory pool
+};
+
+// Named color list internal representation
+typedef struct {
+
+    char Name[cmsMAX_PATH];
+    cmsUInt16Number PCS[3];
+    cmsUInt16Number DeviceColorant[cmsMAXCHANNELS];
+
+} _cmsNAMEDCOLOR;
+
+struct _cms_NAMEDCOLORLIST_struct {
+
+    cmsUInt32Number nColors;
+    cmsUInt32Number Allocated;
+    cmsUInt32Number ColorantCount;
+
+    char Prefix[33];      // Prefix and suffix are defined to be 32 characters at most
+    char Suffix[33];
+
+    _cmsNAMEDCOLOR* List;
+
+    cmsContext ContextID;
+};
+
+
+// ----------------------------------------------------------------------------------
+
+// This is the internal struct holding profile details.
+
+// Maximum supported tags in a profile
+#define MAX_TABLE_TAG       100
+
+typedef struct _cms_iccprofile_struct {
+
+    // I/O handler
+    cmsIOHANDLER*            IOhandler;
+
+    // The thread ID
+    cmsContext               ContextID;
+
+    // Creation time
+    struct tm                Created;
+
+    // Only most important items found in ICC profiles
+    cmsUInt32Number          Version;
+    cmsProfileClassSignature DeviceClass;
+    cmsColorSpaceSignature   ColorSpace;
+    cmsColorSpaceSignature   PCS;
+    cmsUInt32Number          RenderingIntent;
+
+    cmsUInt32Number          flags;
+    cmsUInt32Number          manufacturer, model;
+    cmsUInt64Number          attributes;
+    cmsUInt32Number          creator;
+
+    cmsProfileID             ProfileID;
+
+    // Dictionary
+    cmsUInt32Number          TagCount;
+    cmsTagSignature          TagNames[MAX_TABLE_TAG];
+    cmsTagSignature          TagLinked[MAX_TABLE_TAG];           // The tag to which is linked (0=none)
+    cmsUInt32Number          TagSizes[MAX_TABLE_TAG];            // Size on disk
+    cmsUInt32Number          TagOffsets[MAX_TABLE_TAG];
+    cmsBool                  TagSaveAsRaw[MAX_TABLE_TAG];        // True to write uncooked
+    void *                   TagPtrs[MAX_TABLE_TAG];
+    cmsTagTypeHandler*       TagTypeHandlers[MAX_TABLE_TAG];     // Same structure may be serialized on different types
+                                                                 // depending on profile version, so we keep track of the
+                                                                 // type handler for each tag in the list.
+    // Special
+    cmsBool                  IsWrite;
+
+    // Keep a mutex for cmsReadTag -- Note that this only works if the user includes a mutex plugin
+    void *                   UsrMutex;
+
+} _cmsICCPROFILE;
+
+// IO helpers for profiles
+cmsBool              _cmsReadHeader(_cmsICCPROFILE* Icc);
+cmsBool              _cmsWriteHeader(_cmsICCPROFILE* Icc, cmsUInt32Number UsedSpace);
+int                  _cmsSearchTag(_cmsICCPROFILE* Icc, cmsTagSignature sig, cmsBool lFollowLinks);
+
+// Tag types
+cmsTagTypeHandler*   _cmsGetTagTypeHandler(cmsContext ContextID, cmsTagTypeSignature sig);
+cmsTagTypeSignature  _cmsGetTagTrueType(cmsHPROFILE hProfile, cmsTagSignature sig);
+cmsTagDescriptor*    _cmsGetTagDescriptor(cmsContext ContextID, cmsTagSignature sig);
+
+// Error logging ---------------------------------------------------------------------------------------------------------
+
+void                 _cmsTagSignature2String(char String[5], cmsTagSignature sig);
+
+// Interpolation ---------------------------------------------------------------------------------------------------------
+
+CMSCHECKPOINT cmsInterpParams* CMSEXPORT _cmsComputeInterpParams(cmsContext ContextID, cmsUInt32Number nSamples, cmsUInt32Number InputChan, cmsUInt32Number OutputChan, const void* Table, cmsUInt32Number dwFlags);
+cmsInterpParams*                         _cmsComputeInterpParamsEx(cmsContext ContextID, const cmsUInt32Number nSamples[], cmsUInt32Number InputChan, cmsUInt32Number OutputChan, const void* Table, cmsUInt32Number dwFlags);
+CMSCHECKPOINT void             CMSEXPORT _cmsFreeInterpParams(cmsInterpParams* p);
+cmsBool                                  _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p);
+
+// Curves ----------------------------------------------------------------------------------------------------------------
+
+// This struct holds information about a segment, plus a pointer to the function that implements the evaluation.
+// In the case of table-based, Eval pointer is set to NULL
+
+// The gamma function main structure
+struct _cms_curve_struct {
+
+    cmsInterpParams*  InterpParams;  // Private optimizations for interpolation
+
+    cmsUInt32Number   nSegments;     // Number of segments in the curve. Zero for a 16-bit based tables
+    cmsCurveSegment*  Segments;      // The segments
+    cmsInterpParams** SegInterp;     // Array of private optimizations for interpolation in table-based segments
+
+    cmsParametricCurveEvaluator* Evals;  // Evaluators (one per segment)
+
+    // 16 bit Table-based representation follows
+    cmsUInt32Number    nEntries;      // Number of table elements
+    cmsUInt16Number*   Table16;       // The table itself.
+};
+
+
+//  Pipelines & Stages ---------------------------------------------------------------------------------------------
+
+// A single stage
+struct _cmsStage_struct {
+
+    cmsContext          ContextID;
+
+    cmsStageSignature   Type;           // Identifies the stage
+    cmsStageSignature   Implements;     // Identifies the *function* of the stage (for optimizations)
+
+    cmsUInt32Number     InputChannels;  // Input channels -- for optimization purposes
+    cmsUInt32Number     OutputChannels; // Output channels -- for optimization purposes
+
+    _cmsStageEvalFn     EvalPtr;        // Points to fn that evaluates the stage (always in floating point)
+    _cmsStageDupElemFn  DupElemPtr;     // Points to a fn that duplicates the *data* of the stage
+    _cmsStageFreeElemFn FreePtr;        // Points to a fn that sets the *data* of the stage free
+
+    // A generic pointer to whatever memory needed by the stage
+    void*               Data;
+
+    // Maintains linked list (used internally)
+    struct _cmsStage_struct* Next;
+};
+
+
+// Special Stages (cannot be saved)
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocLab2XYZ(cmsContext ContextID);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocXYZ2Lab(cmsContext ContextID);
+cmsStage*                          _cmsStageAllocLabPrelin(cmsContext ContextID);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocLabV2ToV4(cmsContext ContextID);
+cmsStage*                          _cmsStageAllocLabV2ToV4curves(cmsContext ContextID);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocLabV4ToV2(cmsContext ContextID);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocNamedColor(cmsNAMEDCOLORLIST* NamedColorList, cmsBool UsePCS);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocIdentityCurves(cmsContext ContextID, cmsUInt32Number nChannels);
+CMSCHECKPOINT cmsStage*  CMSEXPORT _cmsStageAllocIdentityCLut(cmsContext ContextID, cmsUInt32Number nChan);
+cmsStage*                          _cmsStageNormalizeFromLabFloat(cmsContext ContextID);
+cmsStage*                          _cmsStageNormalizeFromXyzFloat(cmsContext ContextID);
+cmsStage*                          _cmsStageNormalizeToLabFloat(cmsContext ContextID);
+cmsStage*                          _cmsStageNormalizeToXyzFloat(cmsContext ContextID);
+cmsStage*                          _cmsStageClipNegatives(cmsContext ContextID, cmsUInt32Number nChannels);
+
+
+// For curve set only
+cmsToneCurve**     _cmsStageGetPtrToCurveSet(const cmsStage* mpe);
+
+
+// Pipeline Evaluator (in floating point)
+typedef void (* _cmsPipelineEvalFloatFn)(const cmsFloat32Number In[],
+                                         cmsFloat32Number Out[],
+                                         const void* Data);
+
+struct _cmsPipeline_struct {
+
+    cmsStage* Elements;                                // Points to elements chain
+    cmsUInt32Number InputChannels, OutputChannels;
+
+    // Data & evaluators
+    void *Data;
+
+   _cmsOPTeval16Fn         Eval16Fn;
+   _cmsPipelineEvalFloatFn EvalFloatFn;
+   _cmsFreeUserDataFn      FreeDataFn;
+   _cmsDupUserDataFn       DupDataFn;
+
+    cmsContext ContextID;            // Environment
+
+    cmsBool  SaveAs8Bits;            // Implementation-specific: save as 8 bits if possible
+};
+
+// LUT reading & creation -------------------------------------------------------------------------------------------
+
+// Read tags using low-level function, provide necessary glue code to adapt versions, etc. All those return a brand new copy
+// of the LUTS, since ownership of original is up to the profile. The user should free allocated resources.
+
+CMSCHECKPOINT cmsPipeline* CMSEXPORT _cmsReadInputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent);
+CMSCHECKPOINT cmsPipeline* CMSEXPORT _cmsReadOutputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent);
+CMSCHECKPOINT cmsPipeline* CMSEXPORT _cmsReadDevicelinkLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent);
+
+// Special values
+cmsBool           _cmsReadMediaWhitePoint(cmsCIEXYZ* Dest, cmsHPROFILE hProfile);
+cmsBool           _cmsReadCHAD(cmsMAT3* Dest, cmsHPROFILE hProfile);
+
+// Profile linker --------------------------------------------------------------------------------------------------
+
+cmsPipeline* _cmsLinkProfiles(cmsContext         ContextID,
+                              cmsUInt32Number    nProfiles,
+                              cmsUInt32Number    TheIntents[],
+                              cmsHPROFILE        hProfiles[],
+                              cmsBool            BPC[],
+                              cmsFloat64Number   AdaptationStates[],
+                              cmsUInt32Number    dwFlags);
+
+// Sequence --------------------------------------------------------------------------------------------------------
+
+cmsSEQ* _cmsReadProfileSequence(cmsHPROFILE hProfile);
+cmsBool _cmsWriteProfileSequence(cmsHPROFILE hProfile, const cmsSEQ* seq);
+cmsSEQ* _cmsCompileProfileSequence(cmsContext ContextID, cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[]);
+
+
+// LUT optimization ------------------------------------------------------------------------------------------------
+
+CMSCHECKPOINT cmsUInt16Number  CMSEXPORT _cmsQuantizeVal(cmsFloat64Number i, cmsUInt32Number MaxSamples);
+
+cmsUInt32Number  _cmsReasonableGridpointsByColorspace(cmsColorSpaceSignature Colorspace, cmsUInt32Number dwFlags);
+
+cmsBool          _cmsEndPointsBySpace(cmsColorSpaceSignature Space,
+                                      cmsUInt16Number **White,
+                                      cmsUInt16Number **Black,
+                                      cmsUInt32Number *nOutputs);
+
+cmsBool          _cmsOptimizePipeline(cmsContext ContextID,
+                                      cmsPipeline**    Lut,
+                                      cmsUInt32Number  Intent,
+                                      cmsUInt32Number* InputFormat,
+                                      cmsUInt32Number* OutputFormat,
+                                      cmsUInt32Number* dwFlags );
+
+
+// Hi level LUT building ----------------------------------------------------------------------------------------------
+
+cmsPipeline*     _cmsCreateGamutCheckPipeline(cmsContext ContextID,
+                                              cmsHPROFILE hProfiles[],
+                                              cmsBool  BPC[],
+                                              cmsUInt32Number Intents[],
+                                              cmsFloat64Number AdaptationStates[],
+                                              cmsUInt32Number nGamutPCSposition,
+                                              cmsHPROFILE hGamut);
+
+
+// Formatters ------------------------------------------------------------------------------------------------------------
+
+#define cmsFLAGS_CAN_CHANGE_FORMATTER     0x02000000   // Allow change buffer format
+
+cmsBool         _cmsFormatterIsFloat(cmsUInt32Number Type);
+cmsBool         _cmsFormatterIs8bit(cmsUInt32Number Type);
+
+CMSCHECKPOINT cmsFormatter CMSEXPORT _cmsGetFormatter(cmsContext ContextID,
+                                                      cmsUInt32Number Type,          // Specific type, i.e. TYPE_RGB_8
+                                                      cmsFormatterDirection Dir,
+                                                      cmsUInt32Number dwFlags);
+
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// Half float
+CMSCHECKPOINT cmsFloat32Number CMSEXPORT _cmsHalf2Float(cmsUInt16Number h);
+CMSCHECKPOINT cmsUInt16Number  CMSEXPORT _cmsFloat2Half(cmsFloat32Number flt);
+
+#endif
+
+// Transform logic ------------------------------------------------------------------------------------------------------
+
+struct _cmstransform_struct;
+
+typedef struct {
+
+    // 1-pixel cache (16 bits only)
+    cmsUInt16Number CacheIn[cmsMAXCHANNELS];
+    cmsUInt16Number CacheOut[cmsMAXCHANNELS];
+
+} _cmsCACHE;
+
+
+
+// Transformation
+typedef struct _cmstransform_struct {
+
+    cmsUInt32Number InputFormat, OutputFormat; // Keep formats for further reference
+
+    // Points to transform code
+    _cmsTransform2Fn xform;
+
+    // Formatters, cannot be embedded into LUT because cache
+    cmsFormatter16 FromInput;
+    cmsFormatter16 ToOutput;
+
+    cmsFormatterFloat FromInputFloat;
+    cmsFormatterFloat ToOutputFloat;
+
+    // 1-pixel cache seed for zero as input (16 bits, read only)
+    _cmsCACHE Cache;
+
+    // A Pipeline holding the full (optimized) transform
+    cmsPipeline* Lut;
+
+    // A Pipeline holding the gamut check. It goes from the input space to bilevel
+    cmsPipeline* GamutCheck;
+
+    // Colorant tables
+    cmsNAMEDCOLORLIST* InputColorant;       // Input Colorant table
+    cmsNAMEDCOLORLIST* OutputColorant;      // Colorant table (for n chans > CMYK)
+
+    // Informational only
+    cmsColorSpaceSignature EntryColorSpace;
+    cmsColorSpaceSignature ExitColorSpace;
+
+    // White points (informative only)
+    cmsCIEXYZ EntryWhitePoint;
+    cmsCIEXYZ ExitWhitePoint;
+
+    // Profiles used to create the transform
+    cmsSEQ* Sequence;
+
+    cmsUInt32Number  dwOriginalFlags;
+    cmsFloat64Number AdaptationState;
+
+    // The intent of this transform. That is usually the last intent in the profilechain, but may differ
+    cmsUInt32Number RenderingIntent;
+
+    // An id that uniquely identifies the running context. May be null.
+    cmsContext ContextID;
+
+    // A user-defined pointer that can be used to store data for transform plug-ins
+    void* UserData;
+    _cmsFreeUserDataFn FreeUserData;
+
+    // A way to provide backwards compatibility with full xform plugins
+    _cmsTransformFn OldXform;
+
+} _cmsTRANSFORM;
+
+// Copies extra channels from input to output if the original flags in the transform structure
+// instructs to do so. This function is called on all standard transform functions.
+void _cmsHandleExtraChannels(_cmsTRANSFORM* p, const void* in,
+                             void* out, 
+                             cmsUInt32Number PixelsPerLine,
+                             cmsUInt32Number LineCount,
+                             const cmsStride* Stride);
+
+// -----------------------------------------------------------------------------------------------------------------------
+
+cmsHTRANSFORM _cmsChain2Lab(cmsContext             ContextID,
+                            cmsUInt32Number        nProfiles,
+                            cmsUInt32Number        InputFormat,
+                            cmsUInt32Number        OutputFormat,
+                            const cmsUInt32Number  Intents[],
+                            const cmsHPROFILE      hProfiles[],
+                            const cmsBool          BPC[],
+                            const cmsFloat64Number AdaptationStates[],
+                            cmsUInt32Number        dwFlags);
+
+
+cmsToneCurve* _cmsBuildKToneCurve(cmsContext       ContextID,
+                            cmsUInt32Number        nPoints,
+                            cmsUInt32Number        nProfiles,
+                            const cmsUInt32Number  Intents[],
+                            const cmsHPROFILE      hProfiles[],
+                            const cmsBool          BPC[],
+                            const cmsFloat64Number AdaptationStates[],
+                            cmsUInt32Number        dwFlags);
+
+cmsBool   _cmsAdaptationMatrix(cmsMAT3* r, const cmsMAT3* ConeMatrix, const cmsCIEXYZ* FromIll, const cmsCIEXYZ* ToIll);
+
+cmsBool   _cmsBuildRGB2XYZtransferMatrix(cmsMAT3* r, const cmsCIExyY* WhitePoint, const cmsCIExyYTRIPLE* Primaries);
+
+
+#define _lcms_internal_H
+#endif
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/LICENSE b/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/LICENSE
new file mode 100644
index 0000000000..a5fb0603d9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2005-2018 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+    
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp b/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp
new file mode 100644
index 0000000000..a7564e7623
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.cpp
@@ -0,0 +1,6398 @@
+/*
+LodePNG version 20200215
+
+Copyright (c) 2005-2020 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+The manual and changelog are in the header file "lodepng.h"
+Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
+*/
+
+#include "lodepng.h"
+
+#ifdef LODEPNG_COMPILE_DISK
+#include <limits.h> /* LONG_MAX */
+#include <stdio.h> /* file handling */
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+#include <stdlib.h> /* allocations */
+#endif /* LODEPNG_COMPILE_ALLOCATORS */
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
+#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
+#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
+#endif /*_MSC_VER */
+
+const char* LODEPNG_VERSION_STRING = "20200215";
+
+/*
+This source file is built up in the following large parts. The code sections
+with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
+-Tools for C and common code for PNG and Zlib
+-C Code for Zlib (huffman, deflate, ...)
+-C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
+-The C++ wrapper around all of the above
+*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // Tools for C, and common code for PNG and Zlib.                       // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*The malloc, realloc and free functions defined here with "lodepng_" in front
+of the name, so that you can easily change them to others related to your
+platform if needed. Everything else in the code calls these. Pass
+-DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
+#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
+define them in your own project's source files without needing to change
+lodepng source code. Don't forget to remove "static" if you copypaste them
+from here.*/
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+static void* lodepng_malloc(size_t size) {
+#ifdef LODEPNG_MAX_ALLOC
+  if(size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+  return malloc(size);
+}
+
+/* NOTE: when realloc returns NULL, it leaves the original memory untouched */
+static void* lodepng_realloc(void* ptr, size_t new_size) {
+#ifdef LODEPNG_MAX_ALLOC
+  if(new_size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+  return realloc(ptr, new_size);
+}
+
+static void lodepng_free(void* ptr) {
+  free(ptr);
+}
+#else /*LODEPNG_COMPILE_ALLOCATORS*/
+/* TODO: support giving additional void* payload to the custom allocators */
+void* lodepng_malloc(size_t size);
+void* lodepng_realloc(void* ptr, size_t new_size);
+void lodepng_free(void* ptr);
+#endif /*LODEPNG_COMPILE_ALLOCATORS*/
+
+/* convince the compiler to inline a function, for use when this measurably improves performance */
+/* inline is not available in C90, but use it when supported by the compiler */
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || (defined(__cplusplus) && (__cplusplus >= 199711L))
+#define LODEPNG_INLINE inline
+#else
+#define LODEPNG_INLINE /* not available */
+#endif
+
+/* restrict is not available in C90, but use it when supported by the compiler */
+#if (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) ||\
+    (defined(_MSC_VER) && (_MSC_VER >= 1400)) || \
+    (defined(__WATCOMC__) && (__WATCOMC__ >= 1250) && !defined(__cplusplus))
+#define LODEPNG_RESTRICT __restrict
+#else
+#define LODEPNG_RESTRICT /* not available */
+#endif
+
+/* Replacements for C library functions such as memcpy and strlen, to support platforms
+where a full C library is not available. The compiler can recognize them and compile
+to something as fast. */
+
+static void lodepng_memcpy(void* LODEPNG_RESTRICT dst,
+                           const void* LODEPNG_RESTRICT src, size_t size) {
+  size_t i;
+  for(i = 0; i < size; i++) ((char*)dst)[i] = ((const char*)src)[i];
+}
+
+static void lodepng_memset(void* LODEPNG_RESTRICT dst,
+                           int value, size_t num) {
+  size_t i;
+  for(i = 0; i < num; i++) ((char*)dst)[i] = (char)value;
+}
+
+/* does not check memory out of bounds, do not use on untrusted data */
+static size_t lodepng_strlen(const char* a) {
+  const char* orig = a;
+  /* avoid warning about unused function in case of disabled COMPILE... macros */
+  (void)(&lodepng_strlen);
+  while(*a) a++;
+  return (size_t)(a - orig);
+}
+
+#define LODEPNG_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define LODEPNG_ABS(x) ((x) < 0 ? -(x) : (x))
+
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER)
+/* Safely check if adding two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_addofl(size_t a, size_t b, size_t* result) {
+  *result = a + b; /* Unsigned addition is well defined and safe in C90 */
+  return *result < a;
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER)*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Safely check if multiplying two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_mulofl(size_t a, size_t b, size_t* result) {
+  *result = a * b; /* Unsigned multiplication is well defined and safe in C90 */
+  return (a != 0 && *result / a != b);
+}
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/* Safely check if a + b > c, even if overflow could happen. */
+static int lodepng_gtofl(size_t a, size_t b, size_t c) {
+  size_t d;
+  if(lodepng_addofl(a, b, &d)) return 1;
+  return d > c;
+}
+#endif /*LODEPNG_COMPILE_ZLIB*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+/*
+Often in case of an error a value is assigned to a variable and then it breaks
+out of a loop (to go to the cleanup phase of a function). This macro does that.
+It makes the error handling code shorter and more readable.
+
+Example: if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83);
+*/
+#define CERROR_BREAK(errorvar, code){\
+  errorvar = code;\
+  break;\
+}
+
+/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
+#define ERROR_BREAK(code) CERROR_BREAK(error, code)
+
+/*Set error var to the error code, and return it.*/
+#define CERROR_RETURN_ERROR(errorvar, code){\
+  errorvar = code;\
+  return code;\
+}
+
+/*Try the code, if it returns error, also return the error.*/
+#define CERROR_TRY_RETURN(call){\
+  unsigned error = call;\
+  if(error) return error;\
+}
+
+/*Set error var to the error code, and return from the void function.*/
+#define CERROR_RETURN(errorvar, code){\
+  errorvar = code;\
+  return;\
+}
+
+/*
+About uivector, ucvector and string:
+-All of them wrap dynamic arrays or text strings in a similar way.
+-LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
+-The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
+-They're not used in the interface, only internally in this file as static functions.
+-As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
+*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+/*dynamic vector of unsigned ints*/
+typedef struct uivector {
+  unsigned* data;
+  size_t size; /*size in number of unsigned longs*/
+  size_t allocsize; /*allocated size in bytes*/
+} uivector;
+
+static void uivector_cleanup(void* p) {
+  ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
+  lodepng_free(((uivector*)p)->data);
+  ((uivector*)p)->data = NULL;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_resize(uivector* p, size_t size) {
+  size_t allocsize = size * sizeof(unsigned);
+  if(allocsize > p->allocsize) {
+    size_t newsize = (allocsize > p->allocsize * 2u) ? allocsize : ((allocsize * 3u) >> 1u);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data) {
+      p->allocsize = newsize;
+      p->data = (unsigned*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  p->size = size;
+  return 1; /*success*/
+}
+
+static void uivector_init(uivector* p) {
+  p->data = NULL;
+  p->size = p->allocsize = 0;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_push_back(uivector* p, unsigned c) {
+  if(!uivector_resize(p, p->size + 1)) return 0;
+  p->data[p->size - 1] = c;
+  return 1;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+/*dynamic vector of unsigned chars*/
+typedef struct ucvector {
+  unsigned char* data;
+  size_t size; /*used size*/
+  size_t allocsize; /*allocated size*/
+} ucvector;
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_resize(ucvector* p, size_t size) {
+  if(size > p->allocsize) {
+    size_t newsize = (size > p->allocsize * 2u) ? size : ((size * 3u) >> 1u);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data) {
+      p->allocsize = newsize;
+      p->data = (unsigned char*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  p->size = size;
+  return 1; /*success*/
+}
+
+#ifdef LODEPNG_COMPILE_PNG
+
+static void ucvector_cleanup(void* p) {
+  ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0;
+  lodepng_free(((ucvector*)p)->data);
+  ((ucvector*)p)->data = NULL;
+}
+
+static void ucvector_init(ucvector* p) {
+  p->data = NULL;
+  p->size = p->allocsize = 0;
+}
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*you can both convert from vector to buffer&size and vice versa. If you use
+init_buffer to take over a buffer and size, it is not needed to use cleanup*/
+static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size) {
+  p->data = buffer;
+  p->allocsize = p->size = size;
+}
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER)
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_push_back(ucvector* p, unsigned char c) {
+  if(!ucvector_resize(p, p->size + 1)) return 0;
+  p->data[p->size - 1] = c;
+  return 1;
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+/*free string pointer and set it to NULL*/
+static void string_cleanup(char** out) {
+  lodepng_free(*out);
+  *out = NULL;
+}
+
+/* dynamically allocates a new string with a copy of the null terminated input text */
+static char* alloc_string(const char* in) {
+  size_t insize = lodepng_strlen(in);
+  char* out = (char*)lodepng_malloc(insize + 1);
+  if(out) {
+    size_t i;
+    for(i = 0; i != insize; ++i) {
+      out[i] = in[i];
+    }
+    out[i] = 0;
+  }
+  return out;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)
+static unsigned lodepng_read32bitInt(const unsigned char* buffer) {
+  return (((unsigned)buffer[0] << 24u) | ((unsigned)buffer[1] << 16u) |
+         ((unsigned)buffer[2] << 8u) | (unsigned)buffer[3]);
+}
+#endif /*defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)*/
+
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
+/*buffer must have at least 4 allocated bytes available*/
+static void lodepng_set32bitInt(unsigned char* buffer, unsigned value) {
+  buffer[0] = (unsigned char)((value >> 24) & 0xff);
+  buffer[1] = (unsigned char)((value >> 16) & 0xff);
+  buffer[2] = (unsigned char)((value >>  8) & 0xff);
+  buffer[3] = (unsigned char)((value      ) & 0xff);
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / File IO                                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DISK
+
+/* returns negative value on error. This should be pure C compatible, so no fstat. */
+static long lodepng_filesize(const char* filename) {
+  FILE* file;
+  long size;
+  file = fopen(filename, "rb");
+  if(!file) return -1;
+
+  if(fseek(file, 0, SEEK_END) != 0) {
+    fclose(file);
+    return -1;
+  }
+
+  size = ftell(file);
+  /* It may give LONG_MAX as directory size, this is invalid for us. */
+  if(size == LONG_MAX) size = -1;
+
+  fclose(file);
+  return size;
+}
+
+/* load file into buffer that already has the correct allocated size. Returns error code.*/
+static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename) {
+  FILE* file;
+  size_t readsize;
+  file = fopen(filename, "rb");
+  if(!file) return 78;
+
+  readsize = fread(out, 1, size, file);
+  fclose(file);
+
+  if(readsize != size) return 78;
+  return 0;
+}
+
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) {
+  long size = lodepng_filesize(filename);
+  if(size < 0) return 78;
+  *outsize = (size_t)size;
+
+  *out = (unsigned char*)lodepng_malloc((size_t)size);
+  if(!(*out) && size > 0) return 83; /*the above malloc failed*/
+
+  return lodepng_buffer_file(*out, (size_t)size, filename);
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) {
+  FILE* file;
+  file = fopen(filename, "wb" );
+  if(!file) return 79;
+  fwrite(buffer, 1, buffersize, file);
+  fclose(file);
+  return 0;
+}
+
+#endif /*LODEPNG_COMPILE_DISK*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of common code and tools. Begin of Zlib related code.            // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+
+typedef struct {
+  ucvector* data;
+  size_t bp;
+} LodePNGBitWriter;
+
+void LodePNGBitWriter_init(LodePNGBitWriter* writer, ucvector* data) {
+  writer->data = data;
+  writer->bp = 0;
+}
+
+/*TODO: this ignores potential out of memory errors*/
+#define WRITEBIT(/*size_t**/ writer, /*unsigned char*/ bit){\
+  /* append new byte */\
+  if(((writer->bp) & 7u) == 0) ucvector_push_back(writer->data, (unsigned char)0);\
+  (writer->data->data[writer->data->size - 1]) |= (bit << ((writer->bp) & 7u));\
+  ++writer->bp;\
+}
+
+/* LSB of value is written first, and LSB of bytes is used first */
+static void writeBits(LodePNGBitWriter* writer, unsigned value, size_t nbits) {
+  if(nbits == 1) { /* compiler should statically compile this case if nbits == 1 */
+    WRITEBIT(writer, value);
+  } else {
+    /* TODO: increase output size nly once here rather than in each WRITEBIT */
+    size_t i;
+    for(i = 0; i != nbits; ++i) {
+      WRITEBIT(writer, (unsigned char)((value >> i) & 1));
+    }
+  }
+}
+
+/* This one is to use for adding huffman symbol, the value bits are written MSB first */
+static void writeBitsReversed(LodePNGBitWriter* writer, unsigned value, size_t nbits) {
+  size_t i;
+  for(i = 0; i != nbits; ++i) {
+    /* TODO: increase output size only once here rather than in each WRITEBIT */
+    WRITEBIT(writer, (unsigned char)((value >> (nbits - 1u - i)) & 1u));
+  }
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+typedef struct {
+  const unsigned char* data;
+  size_t size; /*size of data in bytes*/
+  size_t bitsize; /*size of data in bits, end of valid bp values, should be 8*size*/
+  size_t bp;
+  unsigned buffer; /*buffer for reading bits. NOTE: 'unsigned' must support at least 32 bits*/
+} LodePNGBitReader;
+
+/* data size argument is in bytes. Returns error if size too large causing overflow */
+static unsigned LodePNGBitReader_init(LodePNGBitReader* reader, const unsigned char* data, size_t size) {
+  size_t temp;
+  reader->data = data;
+  reader->size = size;
+  /* size in bits, return error if overflow (if size_t is 32 bit this supports up to 500MB)  */
+  if(lodepng_mulofl(size, 8u, &reader->bitsize)) return 105;
+  /*ensure incremented bp can be compared to bitsize without overflow even when it would be incremented 32 too much and
+  trying to ensure 32 more bits*/
+  if(lodepng_addofl(reader->bitsize, 64u, &temp)) return 105;
+  reader->bp = 0;
+  reader->buffer = 0;
+  return 0; /*ok*/
+}
+
+/*
+ensureBits functions:
+Ensures the reader can at least read nbits bits in one or more readBits calls,
+safely even if not enough bits are available.
+Returns 1 if there are enough bits available, 0 if not.
+*/
+
+/*See ensureBits documentation above. This one ensures exactly 1 bit */
+/*static unsigned ensureBits1(LodePNGBitReader* reader) {
+  if(reader->bp >= reader->bitsize) return 0;
+  reader->buffer = (unsigned)reader->data[reader->bp >> 3u] >> (reader->bp & 7u);
+  return 1;
+}*/
+
+/*See ensureBits documentation above. This one ensures up to 9 bits */
+static unsigned ensureBits9(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 1u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u);
+    reader->buffer >>= (reader->bp & 7u);
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/*See ensureBits documentation above. This one ensures up to 17 bits */
+static unsigned ensureBits17(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 2u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u);
+    reader->buffer >>= (reader->bp & 7u);
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/*See ensureBits documentation above. This one ensures up to 25 bits */
+static LODEPNG_INLINE unsigned ensureBits25(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 3u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
+    reader->buffer >>= (reader->bp & 7u);
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/*See ensureBits documentation above. This one ensures up to 32 bits */
+static LODEPNG_INLINE unsigned ensureBits32(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 4u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
+    reader->buffer >>= (reader->bp & 7u);
+    reader->buffer |= (((unsigned)reader->data[start + 4] << 24u) << (8u - (reader->bp & 7u)));
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
+    if(start + 3u < size) reader->buffer |= ((unsigned)reader->data[start + 3] << 24u);
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/* Get bits without advancing the bit pointer. Must have enough bits available with ensureBits. Max nbits is 31. */
+static unsigned peekBits(LodePNGBitReader* reader, size_t nbits) {
+  /* The shift allows nbits to be only up to 31. */
+  return reader->buffer & ((1u << nbits) - 1u);
+}
+
+/* Must have enough bits available with ensureBits */
+static void advanceBits(LodePNGBitReader* reader, size_t nbits) {
+  reader->buffer >>= nbits;
+  reader->bp += nbits;
+}
+
+/* Must have enough bits available with ensureBits */
+static unsigned readBits(LodePNGBitReader* reader, size_t nbits) {
+  unsigned result = peekBits(reader, nbits);
+  advanceBits(reader, nbits);
+  return result;
+}
+
+/* Public for testing only. steps and result must have numsteps values. */
+unsigned lode_png_test_bitreader(const unsigned char* data, size_t size,
+                                 size_t numsteps, const size_t* steps, unsigned* result) {
+  size_t i;
+  LodePNGBitReader reader;
+  LodePNGBitReader_init(&reader, data, size);
+  for(i = 0; i < numsteps; i++) {
+    size_t step = steps[i];
+    unsigned ok;
+    if(step > 25) ok = ensureBits32(&reader, step);
+    else if(step > 17) ok = ensureBits25(&reader, step);
+    else if(step > 9) ok = ensureBits17(&reader, step);
+    else ok = ensureBits9(&reader, step);
+    if(!ok) return 0;
+    result[i] = readBits(&reader, step);
+  }
+  return 1;
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+static unsigned reverseBits(unsigned bits, unsigned num) {
+  /*TODO: implement faster lookup table based version when needed*/
+  unsigned i, result = 0;
+  for(i = 0; i < num; i++) result |= ((bits >> (num - i - 1u)) & 1u) << i;
+  return result;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflate - Huffman                                                      / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#define FIRST_LENGTH_CODE_INDEX 257
+#define LAST_LENGTH_CODE_INDEX 285
+/*256 literals, the end code, some length codes, and 2 unused codes*/
+#define NUM_DEFLATE_CODE_SYMBOLS 288
+/*the distance codes have their own symbols, 30 used, 2 unused*/
+#define NUM_DISTANCE_SYMBOLS 32
+/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
+#define NUM_CODE_LENGTH_CODES 19
+
+/*the base lengths represented by codes 257-285*/
+static const unsigned LENGTHBASE[29]
+  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
+     67, 83, 99, 115, 131, 163, 195, 227, 258};
+
+/*the extra bits used by codes 257-285 (added to base length)*/
+static const unsigned LENGTHEXTRA[29]
+  = {0, 0, 0, 0, 0, 0, 0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+      4,  4,  4,   4,   5,   5,   5,   5,   0};
+
+/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
+static const unsigned DISTANCEBASE[30]
+  = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
+     769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
+
+/*the extra bits of backwards distances (added to base)*/
+static const unsigned DISTANCEEXTRA[30]
+  = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
+       8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
+
+/*the order in which "code length alphabet code lengths" are stored as specified by deflate, out of this the huffman
+tree of the dynamic huffman tree lengths is generated*/
+static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
+  = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*
+Huffman tree struct, containing multiple representations of the tree
+*/
+typedef struct HuffmanTree {
+  unsigned* codes; /*the huffman codes (bit patterns representing the symbols)*/
+  unsigned* lengths; /*the lengths of the huffman codes*/
+  unsigned maxbitlen; /*maximum number of bits a single code can get*/
+  unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
+  /* for reading only */
+  unsigned char* table_len; /*length of symbol from lookup table, or max length if secondary lookup needed*/
+  unsigned short* table_value; /*value of symbol from lookup table, or pointer to secondary table if needed*/
+} HuffmanTree;
+
+static void HuffmanTree_init(HuffmanTree* tree) {
+  tree->codes = 0;
+  tree->lengths = 0;
+  tree->table_len = 0;
+  tree->table_value = 0;
+}
+
+static void HuffmanTree_cleanup(HuffmanTree* tree) {
+  lodepng_free(tree->codes);
+  lodepng_free(tree->lengths);
+  lodepng_free(tree->table_len);
+  lodepng_free(tree->table_value);
+}
+
+/* amount of bits for first huffman table lookup (aka root bits), see HuffmanTree_makeTable and huffmanDecodeSymbol.*/
+/* values 8u and 9u work the fastest */
+#define FIRSTBITS 9u
+
+/* a symbol value too big to represent any valid symbol, to indicate reading disallowed huffman bits combination,
+which is possible in case of only 0 or 1 present symbols. */
+#define INVALIDSYMBOL 65535u
+
+/* make table for huffman decoding */
+static unsigned HuffmanTree_makeTable(HuffmanTree* tree) {
+  static const unsigned headsize = 1u << FIRSTBITS; /*size of the first table*/
+  static const unsigned mask = (1u << FIRSTBITS) /*headsize*/ - 1u;
+  size_t i, numpresent, pointer, size; /*total table size*/
+  unsigned* maxlens = (unsigned*)lodepng_malloc(headsize * sizeof(unsigned));
+  if(!maxlens) return 83; /*alloc fail*/
+
+  /* compute maxlens: max total bit length of symbols sharing prefix in the first table*/
+  for(i = 0; i < headsize; ++i) maxlens[i] = 0;
+  for(i = 0; i < tree->numcodes; i++) {
+    unsigned symbol = tree->codes[i];
+    unsigned l = tree->lengths[i];
+    unsigned index;
+    if(l <= FIRSTBITS) continue; /*symbols that fit in first table don't increase secondary table size*/
+    /*get the FIRSTBITS MSBs, the MSBs of the symbol are encoded first. See later comment about the reversing*/
+    index = reverseBits(symbol >> (l - FIRSTBITS), FIRSTBITS);
+    maxlens[index] = LODEPNG_MAX(maxlens[index], l);
+  }
+  /* compute total table size: size of first table plus all secondary tables for symbols longer than FIRSTBITS */
+  size = headsize;
+  for(i = 0; i < headsize; ++i) {
+    unsigned l = maxlens[i];
+    if(l > FIRSTBITS) size += (1u << (l - FIRSTBITS));
+  }
+  tree->table_len = (unsigned char*)lodepng_malloc(size * sizeof(*tree->table_len));
+  tree->table_value = (unsigned short*)lodepng_malloc(size * sizeof(*tree->table_value));
+  if(!tree->table_len || !tree->table_value) {
+    lodepng_free(maxlens);
+    /* freeing tree->table values is done at a higher scope */
+    return 83; /*alloc fail*/
+  }
+  /*initialize with an invalid length to indicate unused entries*/
+  for(i = 0; i < size; ++i) tree->table_len[i] = 16;
+
+  /*fill in the first table for long symbols: max prefix size and pointer to secondary tables*/
+  pointer = headsize;
+  for(i = 0; i < headsize; ++i) {
+    unsigned l = maxlens[i];
+    if(l <= FIRSTBITS) continue;
+    tree->table_len[i] = l;
+    tree->table_value[i] = pointer;
+    pointer += (1u << (l - FIRSTBITS));
+  }
+  lodepng_free(maxlens);
+
+  /*fill in the first table for short symbols, or secondary table for long symbols*/
+  numpresent = 0;
+  for(i = 0; i < tree->numcodes; ++i) {
+    unsigned l = tree->lengths[i];
+    unsigned symbol = tree->codes[i]; /*the huffman bit pattern. i itself is the value.*/
+    /*reverse bits, because the huffman bits are given in MSB first order but the bit reader reads LSB first*/
+    unsigned reverse = reverseBits(symbol, l);
+    if(l == 0) continue;
+    numpresent++;
+
+    if(l <= FIRSTBITS) {
+      /*short symbol, fully in first table, replicated num times if l < FIRSTBITS*/
+      unsigned num = 1u << (FIRSTBITS - l);
+      unsigned j;
+      for(j = 0; j < num; ++j) {
+        /*bit reader will read the l bits of symbol first, the remaining FIRSTBITS - l bits go to the MSB's*/
+        unsigned index = reverse | (j << l);
+        if(tree->table_len[index] != 16) return 55; /*invalid tree: long symbol shares prefix with short symbol*/
+        tree->table_len[index] = l;
+        tree->table_value[index] = i;
+      }
+    } else {
+      /*long symbol, shares prefix with other long symbols in first lookup table, needs second lookup*/
+      /*the FIRSTBITS MSBs of the symbol are the first table index*/
+      unsigned index = reverse & mask;
+      unsigned maxlen = tree->table_len[index];
+      /*log2 of secondary table length, should be >= l - FIRSTBITS*/
+      unsigned tablelen = maxlen - FIRSTBITS;
+      unsigned start = tree->table_value[index]; /*starting index in secondary table*/
+      unsigned num = 1u << (tablelen - (l - FIRSTBITS)); /*amount of entries of this symbol in secondary table*/
+      unsigned j;
+      if(maxlen < l) return 55; /*invalid tree: long symbol shares prefix with short symbol*/
+      for(j = 0; j < num; ++j) {
+        unsigned reverse2 = reverse >> FIRSTBITS; /* l - FIRSTBITS bits */
+        unsigned index2 = start + (reverse2 | (j << (l - FIRSTBITS)));
+        tree->table_len[index2] = l;
+        tree->table_value[index2] = i;
+      }
+    }
+  }
+
+  if(numpresent < 2) {
+    /* In case of exactly 1 symbol, in theory the huffman symbol needs 0 bits,
+    but deflate uses 1 bit instead. In case of 0 symbols, no symbols can
+    appear at all, but such huffman tree could still exist (e.g. if distance
+    codes are never used). In both cases, not all symbols of the table will be
+    filled in. Fill them in with an invalid symbol value so returning them from
+    huffmanDecodeSymbol will cause error. */
+    for(i = 0; i < size; ++i) {
+      if(tree->table_len[i] == 16) {
+        /* As length, use a value smaller than FIRSTBITS for the head table,
+        and a value larger than FIRSTBITS for the secondary table, to ensure
+        valid behavior for advanceBits when reading this symbol. */
+        tree->table_len[i] = (i < headsize) ? 1 : (FIRSTBITS + 1);
+        tree->table_value[i] = INVALIDSYMBOL;
+      }
+    }
+  } else {
+    /* A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
+    If that is not the case (due to too long length codes), the table will not
+    have been fully used, and this is an error (not all bit combinations can be
+    decoded): an oversubscribed huffman tree, indicated by error 55. */
+    for(i = 0; i < size; ++i) {
+      if(tree->table_len[i] == 16) return 55;
+    }
+  }
+
+  return 0;
+}
+
+/*
+Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
+numcodes, lengths and maxbitlen must already be filled in correctly. return
+value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) {
+  unsigned* blcount;
+  unsigned* nextcode;
+  unsigned error = 0;
+  unsigned bits, n;
+
+  tree->codes = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
+  blcount = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned));
+  nextcode = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned));
+  if(!tree->codes || !blcount || !nextcode) error = 83; /*alloc fail*/
+
+  if(!error) {
+    for(n = 0; n != tree->maxbitlen + 1; n++) blcount[n] = nextcode[n] = 0;
+    /*step 1: count number of instances of each code length*/
+    for(bits = 0; bits != tree->numcodes; ++bits) ++blcount[tree->lengths[bits]];
+    /*step 2: generate the nextcode values*/
+    for(bits = 1; bits <= tree->maxbitlen; ++bits) {
+      nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) << 1u;
+    }
+    /*step 3: generate all the codes*/
+    for(n = 0; n != tree->numcodes; ++n) {
+      if(tree->lengths[n] != 0) {
+        tree->codes[n] = nextcode[tree->lengths[n]]++;
+        /*remove superfluous bits from the code*/
+        tree->codes[n] &= ((1u << tree->lengths[n]) - 1u);
+      }
+    }
+  }
+
+  lodepng_free(blcount);
+  lodepng_free(nextcode);
+
+  if(!error) error = HuffmanTree_makeTable(tree);
+  return error;
+}
+
+/*
+given the code lengths (as stored in the PNG file), generate the tree as defined
+by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
+return value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
+                                            size_t numcodes, unsigned maxbitlen) {
+  unsigned i;
+  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+  tree->maxbitlen = maxbitlen;
+  return HuffmanTree_makeFromLengths2(tree);
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
+Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
+
+/*chain node for boundary package merge*/
+typedef struct BPMNode {
+  int weight; /*the sum of all weights in this chain*/
+  unsigned index; /*index of this leaf node (called "count" in the paper)*/
+  struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
+  int in_use;
+} BPMNode;
+
+/*lists of chains*/
+typedef struct BPMLists {
+  /*memory pool*/
+  unsigned memsize;
+  BPMNode* memory;
+  unsigned numfree;
+  unsigned nextfree;
+  BPMNode** freelist;
+  /*two heads of lookahead chains per list*/
+  unsigned listsize;
+  BPMNode** chains0;
+  BPMNode** chains1;
+} BPMLists;
+
+/*creates a new chain node with the given parameters, from the memory in the lists */
+static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail) {
+  unsigned i;
+  BPMNode* result;
+
+  /*memory full, so garbage collect*/
+  if(lists->nextfree >= lists->numfree) {
+    /*mark only those that are in use*/
+    for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
+    for(i = 0; i != lists->listsize; ++i) {
+      BPMNode* node;
+      for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
+      for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
+    }
+    /*collect those that are free*/
+    lists->numfree = 0;
+    for(i = 0; i != lists->memsize; ++i) {
+      if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
+    }
+    lists->nextfree = 0;
+  }
+
+  result = lists->freelist[lists->nextfree++];
+  result->weight = weight;
+  result->index = index;
+  result->tail = tail;
+  return result;
+}
+
+/*sort the leaves with stable mergesort*/
+static void bpmnode_sort(BPMNode* leaves, size_t num) {
+  BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num);
+  size_t width, counter = 0;
+  for(width = 1; width < num; width *= 2) {
+    BPMNode* a = (counter & 1) ? mem : leaves;
+    BPMNode* b = (counter & 1) ? leaves : mem;
+    size_t p;
+    for(p = 0; p < num; p += 2 * width) {
+      size_t q = (p + width > num) ? num : (p + width);
+      size_t r = (p + 2 * width > num) ? num : (p + 2 * width);
+      size_t i = p, j = q, k;
+      for(k = p; k < r; k++) {
+        if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++];
+        else b[k] = a[j++];
+      }
+    }
+    counter++;
+  }
+  if(counter & 1) lodepng_memcpy(leaves, mem, sizeof(*leaves) * num);
+  lodepng_free(mem);
+}
+
+/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
+static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num) {
+  unsigned lastindex = lists->chains1[c]->index;
+
+  if(c == 0) {
+    if(lastindex >= numpresent) return;
+    lists->chains0[c] = lists->chains1[c];
+    lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
+  } else {
+    /*sum of the weights of the head nodes of the previous lookahead chains.*/
+    int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
+    lists->chains0[c] = lists->chains1[c];
+    if(lastindex < numpresent && sum > leaves[lastindex].weight) {
+      lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
+      return;
+    }
+    lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
+    /*in the end we are only interested in the chain of the last list, so no
+    need to recurse if we're at the last one (this gives measurable speedup)*/
+    if(num + 1 < (int)(2 * numpresent - 2)) {
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+    }
+  }
+}
+
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+                                      size_t numcodes, unsigned maxbitlen) {
+  unsigned error = 0;
+  unsigned i;
+  size_t numpresent = 0; /*number of symbols with non-zero frequency*/
+  BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
+
+  if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
+  if((1u << maxbitlen) < (unsigned)numcodes) return 80; /*error: represent all symbols*/
+
+  leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
+  if(!leaves) return 83; /*alloc fail*/
+
+  for(i = 0; i != numcodes; ++i) {
+    if(frequencies[i] > 0) {
+      leaves[numpresent].weight = (int)frequencies[i];
+      leaves[numpresent].index = i;
+      ++numpresent;
+    }
+  }
+
+  lodepng_memset(lengths, 0, numcodes * sizeof(*lengths));
+
+  /*ensure at least two present symbols. There should be at least one symbol
+  according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
+  make these work as well ensure there are at least two symbols. The
+  Package-Merge code below also doesn't work correctly if there's only one
+  symbol, it'd give it the theoretical 0 bits but in practice zlib wants 1 bit*/
+  if(numpresent == 0) {
+    lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
+  } else if(numpresent == 1) {
+    lengths[leaves[0].index] = 1;
+    lengths[leaves[0].index == 0 ? 1 : 0] = 1;
+  } else {
+    BPMLists lists;
+    BPMNode* node;
+
+    bpmnode_sort(leaves, numpresent);
+
+    lists.listsize = maxbitlen;
+    lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
+    lists.nextfree = 0;
+    lists.numfree = lists.memsize;
+    lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
+    lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
+    lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
+
+    if(!error) {
+      for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
+
+      bpmnode_create(&lists, leaves[0].weight, 1, 0);
+      bpmnode_create(&lists, leaves[1].weight, 2, 0);
+
+      for(i = 0; i != lists.listsize; ++i) {
+        lists.chains0[i] = &lists.memory[0];
+        lists.chains1[i] = &lists.memory[1];
+      }
+
+      /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
+      for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
+
+      for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail) {
+        for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
+      }
+    }
+
+    lodepng_free(lists.memory);
+    lodepng_free(lists.freelist);
+    lodepng_free(lists.chains0);
+    lodepng_free(lists.chains1);
+  }
+
+  lodepng_free(leaves);
+  return error;
+}
+
+/*Create the Huffman tree given the symbol frequencies*/
+static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
+                                                size_t mincodes, size_t numcodes, unsigned maxbitlen) {
+  unsigned error = 0;
+  while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
+  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  tree->maxbitlen = maxbitlen;
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+
+  error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
+  if(!error) error = HuffmanTree_makeFromLengths2(tree);
+  return error;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
+static unsigned generateFixedLitLenTree(HuffmanTree* tree) {
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
+  for(i =   0; i <= 143; ++i) bitlen[i] = 8;
+  for(i = 144; i <= 255; ++i) bitlen[i] = 9;
+  for(i = 256; i <= 279; ++i) bitlen[i] = 7;
+  for(i = 280; i <= 287; ++i) bitlen[i] = 8;
+
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
+static unsigned generateFixedDistanceTree(HuffmanTree* tree) {
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*there are 32 distance codes, but 30-31 are unused*/
+  for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/*
+returns the code. The bit reader must already have been ensured at least 15 bits
+*/
+static unsigned huffmanDecodeSymbol(LodePNGBitReader* reader, const HuffmanTree* codetree) {
+  unsigned short code = peekBits(reader, FIRSTBITS);
+  unsigned short l = codetree->table_len[code];
+  unsigned short value = codetree->table_value[code];
+  if(l <= FIRSTBITS) {
+    advanceBits(reader, l);
+    return value;
+  } else {
+    unsigned index2;
+    advanceBits(reader, FIRSTBITS);
+    index2 = value + peekBits(reader, l - FIRSTBITS);
+    advanceBits(reader, codetree->table_len[index2] - FIRSTBITS);
+    return codetree->table_value[index2];
+  }
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Inflator (Decompressor)                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*get the tree of a deflated block with fixed tree, as specified in the deflate specification
+Returns error code.*/
+static unsigned getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) {
+  unsigned error = generateFixedLitLenTree(tree_ll);
+  if(error) return error;
+  return generateFixedDistanceTree(tree_d);
+}
+
+/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
+static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
+                                      LodePNGBitReader* reader) {
+  /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
+  unsigned error = 0;
+  unsigned n, HLIT, HDIST, HCLEN, i;
+
+  /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
+  unsigned* bitlen_ll = 0; /*lit,len code lengths*/
+  unsigned* bitlen_d = 0; /*dist code lengths*/
+  /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
+  unsigned* bitlen_cl = 0;
+  HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
+
+  if(!ensureBits17(reader, 14)) return 49; /*error: the bit pointer is or will go past the memory*/
+
+  /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
+  HLIT =  readBits(reader, 5) + 257;
+  /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
+  HDIST = readBits(reader, 5) + 1;
+  /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
+  HCLEN = readBits(reader, 4) + 4;
+
+  bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
+  if(!bitlen_cl) return 83 /*alloc fail*/;
+
+  HuffmanTree_init(&tree_cl);
+
+  while(!error) {
+    /*read the code length codes out of 3 * (amount of code length codes) bits*/
+    if(lodepng_gtofl(reader->bp, HCLEN * 3, reader->bitsize)) {
+      ERROR_BREAK(50); /*error: the bit pointer is or will go past the memory*/
+    }
+    for(i = 0; i != HCLEN; ++i) {
+      ensureBits9(reader, 3); /*out of bounds already checked above */
+      bitlen_cl[CLCL_ORDER[i]] = readBits(reader, 3);
+    }
+    for(i = HCLEN; i != NUM_CODE_LENGTH_CODES; ++i) {
+      bitlen_cl[CLCL_ORDER[i]] = 0;
+    }
+
+    error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
+    if(error) break;
+
+    /*now we can use this tree to read the lengths for the tree that this function will return*/
+    bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+    bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+    if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
+    lodepng_memset(bitlen_ll, 0, NUM_DEFLATE_CODE_SYMBOLS * sizeof(*bitlen_ll));
+    lodepng_memset(bitlen_d, 0, NUM_DISTANCE_SYMBOLS * sizeof(*bitlen_d));
+
+    /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
+    i = 0;
+    while(i < HLIT + HDIST) {
+      unsigned code;
+      ensureBits25(reader, 22); /* up to 15 bits for huffman code, up to 7 extra bits below*/
+      code = huffmanDecodeSymbol(reader, &tree_cl);
+      if(code <= 15) /*a length code*/ {
+        if(i < HLIT) bitlen_ll[i] = code;
+        else bitlen_d[i - HLIT] = code;
+        ++i;
+      } else if(code == 16) /*repeat previous*/ {
+        unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
+        unsigned value; /*set value to the previous code*/
+
+        if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
+
+        replength += readBits(reader, 2);
+
+        if(i < HLIT + 1) value = bitlen_ll[i - 1];
+        else value = bitlen_d[i - HLIT - 1];
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
+          if(i < HLIT) bitlen_ll[i] = value;
+          else bitlen_d[i - HLIT] = value;
+          ++i;
+        }
+      } else if(code == 17) /*repeat "0" 3-10 times*/ {
+        unsigned replength = 3; /*read in the bits that indicate repeat length*/
+        replength += readBits(reader, 3);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      } else if(code == 18) /*repeat "0" 11-138 times*/ {
+        unsigned replength = 11; /*read in the bits that indicate repeat length*/
+        replength += readBits(reader, 7);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      } else /*if(code == INVALIDSYMBOL)*/ {
+        ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+      }
+      /*check if any of the ensureBits above went out of bounds*/
+      if(reader->bp > reader->bitsize) {
+        /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+        (10=no endcode, 11=wrong jump outside of tree)*/
+        /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+        ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+      }
+    }
+    if(error) break;
+
+    if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
+
+    /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
+    error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
+    if(error) break;
+    error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
+
+    break; /*end of error-while*/
+  }
+
+  lodepng_free(bitlen_cl);
+  lodepng_free(bitlen_ll);
+  lodepng_free(bitlen_d);
+  HuffmanTree_cleanup(&tree_cl);
+
+  return error;
+}
+
+/*inflate a block with dynamic of fixed Huffman tree. btype must be 1 or 2.*/
+static unsigned inflateHuffmanBlock(ucvector* out, size_t* pos, LodePNGBitReader* reader,
+                                    unsigned btype) {
+  unsigned error = 0;
+  HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
+  HuffmanTree tree_d; /*the huffman tree for distance codes*/
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  if(btype == 1) error = getTreeInflateFixed(&tree_ll, &tree_d);
+  else /*if(btype == 2)*/ error = getTreeInflateDynamic(&tree_ll, &tree_d, reader);
+
+  while(!error) /*decode all symbols until end reached, breaks at end code*/ {
+    /*code_ll is literal, length or end code*/
+    unsigned code_ll;
+    ensureBits25(reader, 20); /* up to 15 for the huffman symbol, up to 5 for the length extra bits */
+    code_ll = huffmanDecodeSymbol(reader, &tree_ll);
+    if(code_ll <= 255) /*literal symbol*/ {
+      /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
+      if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
+      out->data[*pos] = (unsigned char)code_ll;
+      ++(*pos);
+    } else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ {
+      unsigned code_d, distance;
+      unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
+      size_t start, backward, length;
+
+      /*part 1: get length base*/
+      length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
+
+      /*part 2: get extra bits and add the value of that to length*/
+      numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
+      if(numextrabits_l != 0) {
+        /* bits already ensured above */
+        length += readBits(reader, numextrabits_l);
+      }
+
+      /*part 3: get distance code*/
+      ensureBits32(reader, 28); /* up to 15 for the huffman symbol, up to 13 for the extra bits */
+      code_d = huffmanDecodeSymbol(reader, &tree_d);
+      if(code_d > 29) {
+        if(code_d <= 31) {
+          ERROR_BREAK(18); /*error: invalid distance code (30-31 are never used)*/
+        } else /* if(code_d == INVALIDSYMBOL) */{
+          ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+        }
+      }
+      distance = DISTANCEBASE[code_d];
+
+      /*part 4: get extra bits from distance*/
+      numextrabits_d = DISTANCEEXTRA[code_d];
+      if(numextrabits_d != 0) {
+        /* bits already ensured above */
+        distance += readBits(reader, numextrabits_d);
+      }
+
+      /*part 5: fill in all the out[n] values based on the length and dist*/
+      start = (*pos);
+      if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
+      backward = start - distance;
+
+      if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/);
+      if(distance < length) {
+        size_t forward;
+        lodepng_memcpy(out->data + *pos, out->data + backward, distance);
+        *pos += distance;
+        for(forward = distance; forward < length; ++forward) {
+          out->data[(*pos)++] = out->data[backward++];
+        }
+      } else {
+        lodepng_memcpy(out->data + *pos, out->data + backward, length);
+        *pos += length;
+      }
+    } else if(code_ll == 256) {
+      break; /*end code, break the loop*/
+    } else /*if(code_ll == INVALIDSYMBOL)*/ {
+      ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+    }
+    /*check if any of the ensureBits above went out of bounds*/
+    if(reader->bp > reader->bitsize) {
+      /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+      (10=no endcode, 11=wrong jump outside of tree)*/
+      /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+      ERROR_BREAK(51); /*error, bit pointer jumps past memory*/
+    }
+  }
+
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned inflateNoCompression(ucvector* out, size_t* pos,
+                                     LodePNGBitReader* reader, const LodePNGDecompressSettings* settings) {
+  size_t bytepos;
+  size_t size = reader->size;
+  unsigned LEN, NLEN, error = 0;
+
+  /*go to first boundary of byte*/
+  bytepos = (reader->bp + 7u) >> 3u;
+
+  /*read LEN (2 bytes) and NLEN (2 bytes)*/
+  if(bytepos + 4 >= size) return 52; /*error, bit pointer will jump past memory*/
+  LEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
+  NLEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
+
+  /*check if 16-bit NLEN is really the one's complement of LEN*/
+  if(!settings->ignore_nlen && LEN + NLEN != 65535) {
+    return 21; /*error: NLEN is not one's complement of LEN*/
+  }
+
+  if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/
+
+  /*read the literal data: LEN bytes are now stored in the out buffer*/
+  if(bytepos + LEN > size) return 23; /*error: reading outside of in buffer*/
+
+  lodepng_memcpy(out->data + *pos, reader->data + bytepos, LEN);
+  *pos += LEN;
+  bytepos += LEN;
+
+  reader->bp = bytepos << 3u;
+
+  return error;
+}
+
+static unsigned lodepng_inflatev(ucvector* out,
+                                 const unsigned char* in, size_t insize,
+                                 const LodePNGDecompressSettings* settings) {
+  unsigned BFINAL = 0;
+  size_t pos = 0; /*byte position in the out buffer*/
+  LodePNGBitReader reader;
+  unsigned error = LodePNGBitReader_init(&reader, in, insize);
+
+  if(error) return error;
+
+  while(!BFINAL) {
+    unsigned BTYPE;
+    if(!ensureBits9(&reader, 3)) return 52; /*error, bit pointer will jump past memory*/
+    BFINAL = readBits(&reader, 1);
+    BTYPE = readBits(&reader, 2);
+
+    if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
+    else if(BTYPE == 0) error = inflateNoCompression(out, &pos, &reader, settings); /*no compression*/
+    else error = inflateHuffmanBlock(out, &pos, &reader, BTYPE); /*compression, BTYPE 01 or 10*/
+
+    if(error) return error;
+  }
+
+  return error;
+}
+
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGDecompressSettings* settings) {
+  unsigned error;
+  ucvector v;
+  ucvector_init_buffer(&v, *out, *outsize);
+  error = lodepng_inflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned inflate(unsigned char** out, size_t* outsize,
+                        const unsigned char* in, size_t insize,
+                        const LodePNGDecompressSettings* settings) {
+  if(settings->custom_inflate) {
+    return settings->custom_inflate(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_inflate(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflator (Compressor)                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
+
+/*search the index in the array, that has the largest value smaller than or equal to the given value,
+given array must be sorted (if no value is smaller, it returns the size of the given array)*/
+static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value) {
+  /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
+  size_t left = 1;
+  size_t right = array_size - 1;
+
+  while(left <= right) {
+    size_t mid = (left + right) >> 1;
+    if(array[mid] >= value) right = mid - 1;
+    else left = mid + 1;
+  }
+  if(left >= array_size || array[left] > value) left--;
+  return left;
+}
+
+static void addLengthDistance(uivector* values, size_t length, size_t distance) {
+  /*values in encoded vector are those used by deflate:
+  0-255: literal bytes
+  256: end
+  257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
+  286-287: invalid*/
+
+  unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
+  unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
+  unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
+  unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
+
+  uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX);
+  uivector_push_back(values, extra_length);
+  uivector_push_back(values, dist_code);
+  uivector_push_back(values, extra_distance);
+}
+
+/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
+bytes as input because 3 is the minimum match length for deflate*/
+static const unsigned HASH_NUM_VALUES = 65536;
+static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
+
+typedef struct Hash {
+  int* head; /*hash value to head circular pos - can be outdated if went around window*/
+  /*circular pos to prev circular pos*/
+  unsigned short* chain;
+  int* val; /*circular pos to hash value*/
+
+  /*TODO: do this not only for zeros but for any repeated byte. However for PNG
+  it's always going to be the zeros that dominate, so not important for PNG*/
+  int* headz; /*similar to head, but for chainz*/
+  unsigned short* chainz; /*those with same amount of zeros*/
+  unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
+} Hash;
+
+static unsigned hash_init(Hash* hash, unsigned windowsize) {
+  unsigned i;
+  hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
+  hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
+  hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+  hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
+  hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  if(!hash->head || !hash->chain || !hash->val  || !hash->headz|| !hash->chainz || !hash->zeros) {
+    return 83; /*alloc fail*/
+  }
+
+  /*initialize hash table*/
+  for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
+
+  for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
+
+  return 0;
+}
+
+static void hash_cleanup(Hash* hash) {
+  lodepng_free(hash->head);
+  lodepng_free(hash->val);
+  lodepng_free(hash->chain);
+
+  lodepng_free(hash->zeros);
+  lodepng_free(hash->headz);
+  lodepng_free(hash->chainz);
+}
+
+
+
+static unsigned getHash(const unsigned char* data, size_t size, size_t pos) {
+  unsigned result = 0;
+  if(pos + 2 < size) {
+    /*A simple shift and xor hash is used. Since the data of PNGs is dominated
+    by zeroes due to the filters, a better hash does not have a significant
+    effect on speed in traversing the chain, and causes more time spend on
+    calculating the hash.*/
+    result ^= ((unsigned)data[pos + 0] << 0u);
+    result ^= ((unsigned)data[pos + 1] << 4u);
+    result ^= ((unsigned)data[pos + 2] << 8u);
+  } else {
+    size_t amount, i;
+    if(pos >= size) return 0;
+    amount = size - pos;
+    for(i = 0; i != amount; ++i) result ^= ((unsigned)data[pos + i] << (i * 8u));
+  }
+  return result & HASH_BIT_MASK;
+}
+
+static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) {
+  const unsigned char* start = data + pos;
+  const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
+  if(end > data + size) end = data + size;
+  data = start;
+  while(data != end && *data == 0) ++data;
+  /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
+  return (unsigned)(data - start);
+}
+
+/*wpos = pos & (windowsize - 1)*/
+static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros) {
+  hash->val[wpos] = (int)hashval;
+  if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
+  hash->head[hashval] = (int)wpos;
+
+  hash->zeros[wpos] = numzeros;
+  if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
+  hash->headz[numzeros] = (int)wpos;
+}
+
+/*
+LZ77-encode the data. Return value is error code. The input are raw bytes, the output
+is in the form of unsigned integers with codes representing for example literal bytes, or
+length/distance pairs.
+It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
+sliding window (of windowsize) is used, and all past bytes in that window can be used as
+the "dictionary". A brute force search through all possible distances would be slow, and
+this hash technique is one out of several ways to speed this up.
+*/
+static unsigned encodeLZ77(uivector* out, Hash* hash,
+                           const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
+                           unsigned minmatch, unsigned nicematch, unsigned lazymatching) {
+  size_t pos;
+  unsigned i, error = 0;
+  /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
+  unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8u;
+  unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
+
+  unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
+  unsigned numzeros = 0;
+
+  unsigned offset; /*the offset represents the distance in LZ77 terminology*/
+  unsigned length;
+  unsigned lazy = 0;
+  unsigned lazylength = 0, lazyoffset = 0;
+  unsigned hashval;
+  unsigned current_offset, current_length;
+  unsigned prev_offset;
+  const unsigned char *lastptr, *foreptr, *backptr;
+  unsigned hashpos;
+
+  if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
+  if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
+
+  if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
+
+  for(pos = inpos; pos < insize; ++pos) {
+    size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
+    unsigned chainlength = 0;
+
+    hashval = getHash(in, insize, pos);
+
+    if(usezeros && hashval == 0) {
+      if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+      else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+    } else {
+      numzeros = 0;
+    }
+
+    updateHashChain(hash, wpos, hashval, numzeros);
+
+    /*the length and offset found for the current position*/
+    length = 0;
+    offset = 0;
+
+    hashpos = hash->chain[wpos];
+
+    lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
+
+    /*search for the longest string*/
+    prev_offset = 0;
+    for(;;) {
+      if(chainlength++ >= maxchainlength) break;
+      current_offset = (unsigned)(hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize);
+
+      if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
+      prev_offset = current_offset;
+      if(current_offset > 0) {
+        /*test the next characters*/
+        foreptr = &in[pos];
+        backptr = &in[pos - current_offset];
+
+        /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
+        if(numzeros >= 3) {
+          unsigned skip = hash->zeros[hashpos];
+          if(skip > numzeros) skip = numzeros;
+          backptr += skip;
+          foreptr += skip;
+        }
+
+        while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ {
+          ++backptr;
+          ++foreptr;
+        }
+        current_length = (unsigned)(foreptr - &in[pos]);
+
+        if(current_length > length) {
+          length = current_length; /*the longest length*/
+          offset = current_offset; /*the offset that is related to this longest length*/
+          /*jump out once a length of max length is found (speed gain). This also jumps
+          out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
+          if(current_length >= nicematch) break;
+        }
+      }
+
+      if(hashpos == hash->chain[hashpos]) break;
+
+      if(numzeros >= 3 && length > numzeros) {
+        hashpos = hash->chainz[hashpos];
+        if(hash->zeros[hashpos] != numzeros) break;
+      } else {
+        hashpos = hash->chain[hashpos];
+        /*outdated hash value, happens if particular value was not encountered in whole last window*/
+        if(hash->val[hashpos] != (int)hashval) break;
+      }
+    }
+
+    if(lazymatching) {
+      if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH) {
+        lazy = 1;
+        lazylength = length;
+        lazyoffset = offset;
+        continue; /*try the next byte*/
+      }
+      if(lazy) {
+        lazy = 0;
+        if(pos == 0) ERROR_BREAK(81);
+        if(length > lazylength + 1) {
+          /*push the previous character as literal*/
+          if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
+        } else {
+          length = lazylength;
+          offset = lazyoffset;
+          hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
+          hash->headz[numzeros] = -1; /*idem*/
+          --pos;
+        }
+      }
+    }
+    if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
+
+    /*encode it as length/distance pair or literal value*/
+    if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ {
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    } else if(length < minmatch || (length == 3 && offset > 4096)) {
+      /*compensate for the fact that longer offsets have more extra bits, a
+      length of only 3 may be not worth it then*/
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    } else {
+      addLengthDistance(out, length, offset);
+      for(i = 1; i < length; ++i) {
+        ++pos;
+        wpos = pos & (windowsize - 1);
+        hashval = getHash(in, insize, pos);
+        if(usezeros && hashval == 0) {
+          if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+          else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+        } else {
+          numzeros = 0;
+        }
+        updateHashChain(hash, wpos, hashval, numzeros);
+      }
+    }
+  } /*end of the loop through each character of input*/
+
+  return error;
+}
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize) {
+  /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
+  2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
+
+  size_t i, j, numdeflateblocks = (datasize + 65534u) / 65535u;
+  unsigned datapos = 0;
+  for(i = 0; i != numdeflateblocks; ++i) {
+    unsigned BFINAL, BTYPE, LEN, NLEN;
+    unsigned char firstbyte;
+
+    BFINAL = (i == numdeflateblocks - 1);
+    BTYPE = 0;
+
+    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1u) << 1u) + ((BTYPE & 2u) << 1u));
+    ucvector_push_back(out, firstbyte);
+
+    LEN = 65535;
+    if(datasize - datapos < 65535u) LEN = (unsigned)datasize - datapos;
+    NLEN = 65535 - LEN;
+
+    ucvector_push_back(out, (unsigned char)(LEN & 255));
+    ucvector_push_back(out, (unsigned char)(LEN >> 8u));
+    ucvector_push_back(out, (unsigned char)(NLEN & 255));
+    ucvector_push_back(out, (unsigned char)(NLEN >> 8u));
+
+    /*Decompressed data*/
+    for(j = 0; j < 65535 && datapos < datasize; ++j) {
+      ucvector_push_back(out, data[datapos++]);
+    }
+  }
+
+  return 0;
+}
+
+/*
+write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
+tree_ll: the tree for lit and len codes.
+tree_d: the tree for distance codes.
+*/
+static void writeLZ77data(LodePNGBitWriter* writer, const uivector* lz77_encoded,
+                          const HuffmanTree* tree_ll, const HuffmanTree* tree_d) {
+  size_t i = 0;
+  for(i = 0; i != lz77_encoded->size; ++i) {
+    unsigned val = lz77_encoded->data[i];
+    writeBitsReversed(writer, tree_ll->codes[val], tree_ll->lengths[val]);
+    if(val > 256) /*for a length code, 3 more things have to be added*/ {
+      unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
+      unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
+      unsigned length_extra_bits = lz77_encoded->data[++i];
+
+      unsigned distance_code = lz77_encoded->data[++i];
+
+      unsigned distance_index = distance_code;
+      unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
+      unsigned distance_extra_bits = lz77_encoded->data[++i];
+
+      writeBits(writer, length_extra_bits, n_length_extra_bits);
+      writeBitsReversed(writer, tree_d->codes[distance_code], tree_d->lengths[distance_code]);
+      writeBits(writer, distance_extra_bits, n_distance_extra_bits);
+    }
+  }
+}
+
+/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
+static unsigned deflateDynamic(LodePNGBitWriter* writer, Hash* hash,
+                               const unsigned char* data, size_t datapos, size_t dataend,
+                               const LodePNGCompressSettings* settings, unsigned final) {
+  unsigned error = 0;
+
+  /*
+  A block is compressed as follows: The PNG data is lz77 encoded, resulting in
+  literal bytes and length/distance pairs. This is then huffman compressed with
+  two huffman trees. One huffman tree is used for the lit and len values ("ll"),
+  another huffman tree is used for the dist values ("d"). These two trees are
+  stored using their code lengths, and to compress even more these code lengths
+  are also run-length encoded and huffman compressed. This gives a huffman tree
+  of code lengths "cl". The code lengths used to describe this third tree are
+  the code length code lengths ("clcl").
+  */
+
+  /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
+  uivector lz77_encoded;
+  HuffmanTree tree_ll; /*tree for lit,len values*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+  HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
+  unsigned* frequencies_ll = 0; /*frequency of lit,len codes*/
+  unsigned* frequencies_d = 0; /*frequency of dist codes*/
+  unsigned* frequencies_cl = 0; /*frequency of code length codes*/
+  unsigned* bitlen_lld = 0; /*lit,len,dist code lengths (int bits), literally (without repeat codes).*/
+  unsigned* bitlen_lld_e = 0; /*bitlen_lld encoded with repeat codes (this is a rudimentary run length compression)*/
+  size_t datasize = dataend - datapos;
+
+  /*
+  If we could call "bitlen_cl" the the code length code lengths ("clcl"), that is the bit lengths of codes to represent
+  tree_cl in CLCL_ORDER, then due to the huffman compression of huffman tree representations ("two levels"), there are
+  some analogies:
+  bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
+  bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
+  bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
+  */
+
+  unsigned BFINAL = final;
+  size_t i;
+  size_t numcodes_ll, numcodes_d, numcodes_lld, numcodes_lld_e, numcodes_cl;
+  unsigned HLIT, HDIST, HCLEN;
+
+  uivector_init(&lz77_encoded);
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+  HuffmanTree_init(&tree_cl);
+  /* could fit on stack, but >1KB is on the larger side so allocate instead */
+  frequencies_ll = (unsigned*)lodepng_malloc(286 * sizeof(*frequencies_ll));
+  frequencies_d = (unsigned*)lodepng_malloc(30 * sizeof(*frequencies_d));
+  frequencies_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl));
+
+  if(!frequencies_ll || !frequencies_d || !frequencies_cl) error = 83; /*alloc fail*/
+
+  /*This while loop never loops due to a break at the end, it is here to
+  allow breaking out of it to the cleanup phase on error conditions.*/
+  while(!error) {
+    lodepng_memset(frequencies_ll, 0, 286 * sizeof(*frequencies_ll));
+    lodepng_memset(frequencies_d, 0, 30 * sizeof(*frequencies_d));
+    lodepng_memset(frequencies_cl, 0, NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl));
+
+    if(settings->use_lz77) {
+      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                         settings->minmatch, settings->nicematch, settings->lazymatching);
+      if(error) break;
+    } else {
+      if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
+      for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
+    }
+
+    /*Count the frequencies of lit, len and dist codes*/
+    for(i = 0; i != lz77_encoded.size; ++i) {
+      unsigned symbol = lz77_encoded.data[i];
+      ++frequencies_ll[symbol];
+      if(symbol > 256) {
+        unsigned dist = lz77_encoded.data[i + 2];
+        ++frequencies_d[dist];
+        i += 3;
+      }
+    }
+    frequencies_ll[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
+
+    /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
+    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll, 257, 286, 15);
+    if(error) break;
+    /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
+    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d, 2, 30, 15);
+    if(error) break;
+
+    numcodes_ll = LODEPNG_MIN(tree_ll.numcodes, 286);
+    numcodes_d = LODEPNG_MIN(tree_d.numcodes, 30);
+    /*store the code lengths of both generated trees in bitlen_lld*/
+    numcodes_lld = numcodes_ll + numcodes_d;
+    bitlen_lld = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld));
+    /*numcodes_lld_e never needs more size than bitlen_lld*/
+    bitlen_lld_e = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld_e));
+    if(!bitlen_lld || !bitlen_lld_e) ERROR_BREAK(83); /*alloc fail*/
+    numcodes_lld_e = 0;
+
+    for(i = 0; i != numcodes_ll; ++i) bitlen_lld[i] = tree_ll.lengths[i];
+    for(i = 0; i != numcodes_d; ++i) bitlen_lld[numcodes_ll + i] = tree_d.lengths[i];
+
+    /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
+    17 (3-10 zeroes), 18 (11-138 zeroes)*/
+    for(i = 0; i != numcodes_lld; ++i) {
+      unsigned j = 0; /*amount of repetitions*/
+      while(i + j + 1 < numcodes_lld && bitlen_lld[i + j + 1] == bitlen_lld[i]) ++j;
+
+      if(bitlen_lld[i] == 0 && j >= 2) /*repeat code for zeroes*/ {
+        ++j; /*include the first zero*/
+        if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ {
+          bitlen_lld_e[numcodes_lld_e++] = 17;
+          bitlen_lld_e[numcodes_lld_e++] = j - 3;
+        } else /*repeat code 18 supports max 138 zeroes*/ {
+          if(j > 138) j = 138;
+          bitlen_lld_e[numcodes_lld_e++] = 18;
+          bitlen_lld_e[numcodes_lld_e++] = j - 11;
+        }
+        i += (j - 1);
+      } else if(j >= 3) /*repeat code for value other than zero*/ {
+        size_t k;
+        unsigned num = j / 6u, rest = j % 6u;
+        bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i];
+        for(k = 0; k < num; ++k) {
+          bitlen_lld_e[numcodes_lld_e++] = 16;
+          bitlen_lld_e[numcodes_lld_e++] = 6 - 3;
+        }
+        if(rest >= 3) {
+          bitlen_lld_e[numcodes_lld_e++] = 16;
+          bitlen_lld_e[numcodes_lld_e++] = rest - 3;
+        }
+        else j -= rest;
+        i += j;
+      } else /*too short to benefit from repeat code*/ {
+        bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i];
+      }
+    }
+
+    /*generate tree_cl, the huffmantree of huffmantrees*/
+    for(i = 0; i != numcodes_lld_e; ++i) {
+      ++frequencies_cl[bitlen_lld_e[i]];
+      /*after a repeat code come the bits that specify the number of repetitions,
+      those don't need to be in the frequencies_cl calculation*/
+      if(bitlen_lld_e[i] >= 16) ++i;
+    }
+
+    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl,
+                                            NUM_CODE_LENGTH_CODES, NUM_CODE_LENGTH_CODES, 7);
+    if(error) break;
+
+    /*compute amount of code-length-code-lengths to output*/
+    numcodes_cl = NUM_CODE_LENGTH_CODES;
+    /*trim zeros at the end (using CLCL_ORDER), but minimum size must be 4 (see HCLEN below)*/
+    while(numcodes_cl > 4u && tree_cl.lengths[CLCL_ORDER[numcodes_cl - 1u]] == 0) {
+      numcodes_cl--;
+    }
+
+    /*
+    Write everything into the output
+
+    After the BFINAL and BTYPE, the dynamic block consists out of the following:
+    - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
+    - (HCLEN+4)*3 bits code lengths of code length alphabet
+    - HLIT + 257 code lengths of lit/length alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - HDIST + 1 code lengths of distance alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - compressed data
+    - 256 (end code)
+    */
+
+    /*Write block type*/
+    writeBits(writer, BFINAL, 1);
+    writeBits(writer, 0, 1); /*first bit of BTYPE "dynamic"*/
+    writeBits(writer, 1, 1); /*second bit of BTYPE "dynamic"*/
+
+    /*write the HLIT, HDIST and HCLEN values*/
+    /*all three sizes take trimmed ending zeroes into account, done either by HuffmanTree_makeFromFrequencies
+    or in the loop for numcodes_cl above, which saves space. */
+    HLIT = (unsigned)(numcodes_ll - 257);
+    HDIST = (unsigned)(numcodes_d - 1);
+    HCLEN = (unsigned)(numcodes_cl - 4);
+    writeBits(writer, HLIT, 5);
+    writeBits(writer, HDIST, 5);
+    writeBits(writer, HCLEN, 4);
+
+    /*write the code lengths of the code length alphabet ("bitlen_cl")*/
+    for(i = 0; i != numcodes_cl; ++i) writeBits(writer, tree_cl.lengths[CLCL_ORDER[i]], 3);
+
+    /*write the lengths of the lit/len AND the dist alphabet*/
+    for(i = 0; i != numcodes_lld_e; ++i) {
+      writeBitsReversed(writer, tree_cl.codes[bitlen_lld_e[i]], tree_cl.lengths[bitlen_lld_e[i]]);
+      /*extra bits of repeat codes*/
+      if(bitlen_lld_e[i] == 16) writeBits(writer, bitlen_lld_e[++i], 2);
+      else if(bitlen_lld_e[i] == 17) writeBits(writer, bitlen_lld_e[++i], 3);
+      else if(bitlen_lld_e[i] == 18) writeBits(writer, bitlen_lld_e[++i], 7);
+    }
+
+    /*write the compressed data symbols*/
+    writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
+    /*error: the length of the end code 256 must be larger than 0*/
+    if(tree_ll.lengths[256] == 0) ERROR_BREAK(64);
+
+    /*write the end code*/
+    writeBitsReversed(writer, tree_ll.codes[256], tree_ll.lengths[256]);
+
+    break; /*end of error-while*/
+  }
+
+  /*cleanup*/
+  uivector_cleanup(&lz77_encoded);
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+  HuffmanTree_cleanup(&tree_cl);
+  lodepng_free(frequencies_ll);
+  lodepng_free(frequencies_d);
+  lodepng_free(frequencies_cl);
+  lodepng_free(bitlen_lld);
+  lodepng_free(bitlen_lld_e);
+
+  return error;
+}
+
+static unsigned deflateFixed(LodePNGBitWriter* writer, Hash* hash,
+                             const unsigned char* data,
+                             size_t datapos, size_t dataend,
+                             const LodePNGCompressSettings* settings, unsigned final) {
+  HuffmanTree tree_ll; /*tree for literal values and length codes*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+
+  unsigned BFINAL = final;
+  unsigned error = 0;
+  size_t i;
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  error = generateFixedLitLenTree(&tree_ll);
+  if(!error) error = generateFixedDistanceTree(&tree_d);
+
+  if(!error) {
+    writeBits(writer, BFINAL, 1);
+    writeBits(writer, 1, 1); /*first bit of BTYPE*/
+    writeBits(writer, 0, 1); /*second bit of BTYPE*/
+
+    if(settings->use_lz77) /*LZ77 encoded*/ {
+      uivector lz77_encoded;
+      uivector_init(&lz77_encoded);
+      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                         settings->minmatch, settings->nicematch, settings->lazymatching);
+      if(!error) writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
+      uivector_cleanup(&lz77_encoded);
+    } else /*no LZ77, but still will be Huffman compressed*/ {
+      for(i = datapos; i < dataend; ++i) {
+        writeBitsReversed(writer, tree_ll.codes[data[i]], tree_ll.lengths[data[i]]);
+      }
+    }
+    /*add END code*/
+    if(!error) writeBitsReversed(writer,tree_ll.codes[256], tree_ll.lengths[256]);
+  }
+
+  /*cleanup*/
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
+                                 const LodePNGCompressSettings* settings) {
+  unsigned error = 0;
+  size_t i, blocksize, numdeflateblocks;
+  Hash hash;
+  LodePNGBitWriter writer;
+
+  LodePNGBitWriter_init(&writer, out);
+
+  if(settings->btype > 2) return 61;
+  else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
+  else if(settings->btype == 1) blocksize = insize;
+  else /*if(settings->btype == 2)*/ {
+    /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
+    blocksize = insize / 8u + 8;
+    if(blocksize < 65536) blocksize = 65536;
+    if(blocksize > 262144) blocksize = 262144;
+  }
+
+  numdeflateblocks = (insize + blocksize - 1) / blocksize;
+  if(numdeflateblocks == 0) numdeflateblocks = 1;
+
+  error = hash_init(&hash, settings->windowsize);
+
+  if(!error) {
+    for(i = 0; i != numdeflateblocks && !error; ++i) {
+      unsigned final = (i == numdeflateblocks - 1);
+      size_t start = i * blocksize;
+      size_t end = start + blocksize;
+      if(end > insize) end = insize;
+
+      if(settings->btype == 1) error = deflateFixed(&writer, &hash, in, start, end, settings, final);
+      else if(settings->btype == 2) error = deflateDynamic(&writer, &hash, in, start, end, settings, final);
+    }
+  }
+
+  hash_cleanup(&hash);
+
+  return error;
+}
+
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGCompressSettings* settings) {
+  unsigned error;
+  ucvector v;
+  ucvector_init_buffer(&v, *out, *outsize);
+  error = lodepng_deflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned deflate(unsigned char** out, size_t* outsize,
+                        const unsigned char* in, size_t insize,
+                        const LodePNGCompressSettings* settings) {
+  if(settings->custom_deflate) {
+    return settings->custom_deflate(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_deflate(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Adler32                                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len) {
+  unsigned s1 = adler & 0xffffu;
+  unsigned s2 = (adler >> 16u) & 0xffffu;
+
+  while(len != 0u) {
+    unsigned i;
+    /*at least 5552 sums can be done before the sums overflow, saving a lot of module divisions*/
+    unsigned amount = len > 5552u ? 5552u : len;
+    len -= amount;
+    for(i = 0; i != amount; ++i) {
+      s1 += (*data++);
+      s2 += s1;
+    }
+    s1 %= 65521u;
+    s2 %= 65521u;
+  }
+
+  return (s2 << 16u) | s1;
+}
+
+/*Return the adler32 of the bytes data[0..len-1]*/
+static unsigned adler32(const unsigned char* data, unsigned len) {
+  return update_adler32(1u, data, len);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Zlib                                                                   / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                 size_t insize, const LodePNGDecompressSettings* settings) {
+  unsigned error = 0;
+  unsigned CM, CINFO, FDICT;
+
+  if(insize < 2) return 53; /*error, size of zlib data too small*/
+  /*read information from zlib header*/
+  if((in[0] * 256 + in[1]) % 31 != 0) {
+    /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
+    return 24;
+  }
+
+  CM = in[0] & 15;
+  CINFO = (in[0] >> 4) & 15;
+  /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
+  FDICT = (in[1] >> 5) & 1;
+  /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
+
+  if(CM != 8 || CINFO > 7) {
+    /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
+    return 25;
+  }
+  if(FDICT != 0) {
+    /*error: the specification of PNG says about the zlib stream:
+      "The additional flags shall not specify a preset dictionary."*/
+    return 26;
+  }
+
+  error = inflate(out, outsize, in + 2, insize - 2, settings);
+  if(error) return error;
+
+  if(!settings->ignore_adler32) {
+    unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
+    unsigned checksum = adler32(*out, (unsigned)(*outsize));
+    if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
+  }
+
+  return 0; /*no error*/
+}
+
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                size_t insize, const LodePNGDecompressSettings* settings) {
+  if(settings->custom_zlib) {
+    return settings->custom_zlib(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_zlib_decompress(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                               size_t insize, const LodePNGCompressSettings* settings) {
+  size_t i;
+  unsigned error;
+  unsigned char* deflatedata = 0;
+  size_t deflatesize = 0;
+
+  error = deflate(&deflatedata, &deflatesize, in, insize, settings);
+
+  *out = NULL;
+  *outsize = 0;
+  if(!error) {
+    *outsize = deflatesize + 6;
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!*out) error = 83; /*alloc fail*/
+  }
+
+  if(!error) {
+    unsigned ADLER32 = adler32(in, (unsigned)insize);
+    /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
+    unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
+    unsigned FLEVEL = 0;
+    unsigned FDICT = 0;
+    unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
+    unsigned FCHECK = 31 - CMFFLG % 31;
+    CMFFLG += FCHECK;
+
+    (*out)[0] = (unsigned char)(CMFFLG >> 8);
+    (*out)[1] = (unsigned char)(CMFFLG & 255);
+    for(i = 0; i != deflatesize; ++i) (*out)[i + 2] = deflatedata[i];
+    lodepng_set32bitInt(&(*out)[*outsize - 4], ADLER32);
+  }
+
+  lodepng_free(deflatedata);
+  return error;
+}
+
+/* compress using the default or custom zlib function */
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings) {
+  if(settings->custom_zlib) {
+    return settings->custom_zlib(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_zlib_compress(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#else /*no LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                size_t insize, const LodePNGDecompressSettings* settings) {
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings) {
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*this is a good tradeoff between speed and compression ratio*/
+#define DEFAULT_WINDOWSIZE 2048
+
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings) {
+  /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
+  settings->btype = 2;
+  settings->use_lz77 = 1;
+  settings->windowsize = DEFAULT_WINDOWSIZE;
+  settings->minmatch = 3;
+  settings->nicematch = 128;
+  settings->lazymatching = 1;
+
+  settings->custom_zlib = 0;
+  settings->custom_deflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
+
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) {
+  settings->ignore_adler32 = 0;
+  settings->ignore_nlen = 0;
+
+  settings->custom_zlib = 0;
+  settings->custom_inflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0, 0};
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of Zlib related code. Begin of PNG related code.                 // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / CRC32                                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+
+#ifndef LODEPNG_NO_COMPILE_CRC
+/* CRC polynomial: 0xedb88320 */
+static unsigned lodepng_crc32_table[256] = {
+           0u, 1996959894u, 3993919788u, 2567524794u,  124634137u, 1886057615u, 3915621685u, 2657392035u,
+   249268274u, 2044508324u, 3772115230u, 2547177864u,  162941995u, 2125561021u, 3887607047u, 2428444049u,
+   498536548u, 1789927666u, 4089016648u, 2227061214u,  450548861u, 1843258603u, 4107580753u, 2211677639u,
+   325883990u, 1684777152u, 4251122042u, 2321926636u,  335633487u, 1661365465u, 4195302755u, 2366115317u,
+   997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
+   901097722u, 1119000684u, 3686517206u, 2898065728u,  853044451u, 1172266101u, 3705015759u, 2882616665u,
+   651767980u, 1373503546u, 3369554304u, 3218104598u,  565507253u, 1454621731u, 3485111705u, 3099436303u,
+   671266974u, 1594198024u, 3322730930u, 2970347812u,  795835527u, 1483230225u, 3244367275u, 3060149565u,
+  1994146192u,   31158534u, 2563907772u, 4023717930u, 1907459465u,  112637215u, 2680153253u, 3904427059u,
+  2013776290u,  251722036u, 2517215374u, 3775830040u, 2137656763u,  141376813u, 2439277719u, 3865271297u,
+  1802195444u,  476864866u, 2238001368u, 4066508878u, 1812370925u,  453092731u, 2181625025u, 4111451223u,
+  1706088902u,  314042704u, 2344532202u, 4240017532u, 1658658271u,  366619977u, 2362670323u, 4224994405u,
+  1303535960u,  984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
+  1131014506u,  879679996u, 2909243462u, 3663771856u, 1141124467u,  855842277u, 2852801631u, 3708648649u,
+  1342533948u,  654459306u, 3188396048u, 3373015174u, 1466479909u,  544179635u, 3110523913u, 3462522015u,
+  1591671054u,  702138776u, 2966460450u, 3352799412u, 1504918807u,  783551873u, 3082640443u, 3233442989u,
+  3988292384u, 2596254646u,   62317068u, 1957810842u, 3939845945u, 2647816111u,   81470997u, 1943803523u,
+  3814918930u, 2489596804u,  225274430u, 2053790376u, 3826175755u, 2466906013u,  167816743u, 2097651377u,
+  4027552580u, 2265490386u,  503444072u, 1762050814u, 4150417245u, 2154129355u,  426522225u, 1852507879u,
+  4275313526u, 2312317920u,  282753626u, 1742555852u, 4189708143u, 2394877945u,  397917763u, 1622183637u,
+  3604390888u, 2714866558u,  953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
+  3624741850u, 2936675148u,  906185462u, 1090812512u, 3747672003u, 2825379669u,  829329135u, 1181335161u,
+  3412177804u, 3160834842u,  628085408u, 1382605366u, 3423369109u, 3138078467u,  570562233u, 1426400815u,
+  3317316542u, 2998733608u,  733239954u, 1555261956u, 3268935591u, 3050360625u,  752459403u, 1541320221u,
+  2607071920u, 3965973030u, 1969922972u,   40735498u, 2617837225u, 3943577151u, 1913087877u,   83908371u,
+  2512341634u, 3803740692u, 2075208622u,  213261112u, 2463272603u, 3855990285u, 2094854071u,  198958881u,
+  2262029012u, 4057260610u, 1759359992u,  534414190u, 2176718541u, 4139329115u, 1873836001u,  414664567u,
+  2282248934u, 4279200368u, 1711684554u,  285281116u, 2405801727u, 4167216745u, 1634467795u,  376229701u,
+  2685067896u, 3608007406u, 1308918612u,  956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
+  2932959818u, 3654703836u, 1088359270u,  936918000u, 2847714899u, 3736837829u, 1202900863u,  817233897u,
+  3183342108u, 3401237130u, 1404277552u,  615818150u, 3134207493u, 3453421203u, 1423857449u,  601450431u,
+  3009837614u, 3294710456u, 1567103746u,  711928724u, 3020668471u, 3272380065u, 1510334235u,  755167117u
+};
+
+/*Return the CRC of the bytes buf[0..len-1].*/
+unsigned lodepng_crc32(const unsigned char* data, size_t length) {
+  unsigned r = 0xffffffffu;
+  size_t i;
+  for(i = 0; i < length; ++i) {
+    r = lodepng_crc32_table[(r ^ data[i]) & 0xffu] ^ (r >> 8u);
+  }
+  return r ^ 0xffffffffu;
+}
+#else /* !LODEPNG_NO_COMPILE_CRC */
+unsigned lodepng_crc32(const unsigned char* data, size_t length);
+#endif /* !LODEPNG_NO_COMPILE_CRC */
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Reading and writing PNG color channel bits                             / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/* The color channel bits of less-than-8-bit pixels are read with the MSB of bytes first,
+so LodePNGBitWriter and LodePNGBitReader can't be used for those. */
+
+static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream) {
+  unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
+  ++(*bitpointer);
+  return result;
+}
+
+/* TODO: make this faster */
+static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) {
+  unsigned result = 0;
+  size_t i;
+  for(i = 0 ; i < nbits; ++i) {
+    result <<= 1u;
+    result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream);
+  }
+  return result;
+}
+
+static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) {
+  /*the current bit in bitstream may be 0 or 1 for this to work*/
+  if(bit == 0) bitstream[(*bitpointer) >> 3u] &=  (unsigned char)(~(1u << (7u - ((*bitpointer) & 7u))));
+  else         bitstream[(*bitpointer) >> 3u] |=  (1u << (7u - ((*bitpointer) & 7u)));
+  ++(*bitpointer);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG chunks                                                             / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+unsigned lodepng_chunk_length(const unsigned char* chunk) {
+  return lodepng_read32bitInt(&chunk[0]);
+}
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk) {
+  unsigned i;
+  for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
+  type[4] = 0; /*null termination char*/
+}
+
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type) {
+  if(lodepng_strlen(type) != 4) return 0;
+  return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
+}
+
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk) {
+  return((chunk[4] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_private(const unsigned char* chunk) {
+  return((chunk[6] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk) {
+  return((chunk[7] & 32) != 0);
+}
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk) {
+  return &chunk[8];
+}
+
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk) {
+  return &chunk[8];
+}
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk) {
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
+  /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
+  unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
+  if(CRC != checksum) return 1;
+  else return 0;
+}
+
+void lodepng_chunk_generate_crc(unsigned char* chunk) {
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
+  lodepng_set32bitInt(chunk + 8 + length, CRC);
+}
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end) {
+  if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/
+  if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+    && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+    /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+    return chunk + 8;
+  } else {
+    size_t total_chunk_length;
+    unsigned char* result;
+    if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end;
+    result = chunk + total_chunk_length;
+    if(result < chunk) return end; /*pointer overflow*/
+    return result;
+  }
+}
+
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end) {
+  if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/
+  if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+    && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+    /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+    return chunk + 8;
+  } else {
+    size_t total_chunk_length;
+    const unsigned char* result;
+    if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end;
+    result = chunk + total_chunk_length;
+    if(result < chunk) return end; /*pointer overflow*/
+    return result;
+  }
+}
+
+unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]) {
+  for(;;) {
+    if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */
+    if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+    chunk = lodepng_chunk_next(chunk, end);
+  }
+}
+
+const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]) {
+  for(;;) {
+    if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */
+    if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+    chunk = lodepng_chunk_next_const(chunk, end);
+  }
+}
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk) {
+  unsigned i;
+  size_t total_chunk_length, new_length;
+  unsigned char *chunk_start, *new_buffer;
+
+  if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return 77;
+  if(lodepng_addofl(*outlength, total_chunk_length, &new_length)) return 77;
+
+  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+  if(!new_buffer) return 83; /*alloc fail*/
+  (*out) = new_buffer;
+  (*outlength) = new_length;
+  chunk_start = &(*out)[new_length - total_chunk_length];
+
+  for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
+
+  return 0;
+}
+
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data) {
+  unsigned i;
+  unsigned char *chunk, *new_buffer;
+  size_t new_length = *outlength;
+  if(lodepng_addofl(new_length, length, &new_length)) return 77;
+  if(lodepng_addofl(new_length, 12, &new_length)) return 77;
+  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+  if(!new_buffer) return 83; /*alloc fail*/
+  (*out) = new_buffer;
+  (*outlength) = new_length;
+  chunk = &(*out)[(*outlength) - length - 12];
+
+  /*1: length*/
+  lodepng_set32bitInt(chunk, (unsigned)length);
+
+  /*2: chunk name (4 letters)*/
+  chunk[4] = (unsigned char)type[0];
+  chunk[5] = (unsigned char)type[1];
+  chunk[6] = (unsigned char)type[2];
+  chunk[7] = (unsigned char)type[3];
+
+  /*3: the data*/
+  for(i = 0; i != length; ++i) chunk[8 + i] = data[i];
+
+  /*4: CRC (of the chunkname characters and the data)*/
+  lodepng_chunk_generate_crc(chunk);
+
+  return 0;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Color types, channels, bits                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*checks if the colortype is valid and the bitdepth bd is allowed for this colortype.
+Return value is a LodePNG error code.*/
+static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) {
+  switch(colortype) {
+    case LCT_GREY:       if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break;
+    case LCT_RGB:        if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    case LCT_PALETTE:    if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break;
+    case LCT_GREY_ALPHA: if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    case LCT_RGBA:       if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    default: return 31; /* invalid color type */
+  }
+  return 0; /*allowed color type / bits combination*/
+}
+
+static unsigned getNumColorChannels(LodePNGColorType colortype) {
+  switch(colortype) {
+    case LCT_GREY: return 1;
+    case LCT_RGB: return 3;
+    case LCT_PALETTE: return 1;
+    case LCT_GREY_ALPHA: return 2;
+    case LCT_RGBA: return 4;
+    default: return 0; /*invalid color type*/
+  }
+}
+
+static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth) {
+  /*bits per pixel is amount of channels * bits per channel*/
+  return getNumColorChannels(colortype) * bitdepth;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+void lodepng_color_mode_init(LodePNGColorMode* info) {
+  info->key_defined = 0;
+  info->key_r = info->key_g = info->key_b = 0;
+  info->colortype = LCT_RGBA;
+  info->bitdepth = 8;
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+/*allocates palette memory if needed, and initializes all colors to black*/
+static void lodepng_color_mode_alloc_palette(LodePNGColorMode* info) {
+  size_t i;
+  /*if the palette is already allocated, it will have size 1024 so no reallocation needed in that case*/
+  /*the palette must have room for up to 256 colors with 4 bytes each.*/
+  if(!info->palette) info->palette = (unsigned char*)lodepng_malloc(1024);
+  if(!info->palette) return; /*alloc fail*/
+  for(i = 0; i != 256; ++i) {
+    /*Initialize all unused colors with black, the value used for invalid palette indices.
+    This is an error according to the PNG spec, but common PNG decoders make it black instead.
+    That makes color conversion slightly faster due to no error handling needed.*/
+    info->palette[i * 4 + 0] = 0;
+    info->palette[i * 4 + 1] = 0;
+    info->palette[i * 4 + 2] = 0;
+    info->palette[i * 4 + 3] = 255;
+  }
+}
+
+void lodepng_color_mode_cleanup(LodePNGColorMode* info) {
+  lodepng_palette_clear(info);
+}
+
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source) {
+  lodepng_color_mode_cleanup(dest);
+  lodepng_memcpy(dest, source, sizeof(LodePNGColorMode));
+  if(source->palette) {
+    dest->palette = (unsigned char*)lodepng_malloc(1024);
+    if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
+    lodepng_memcpy(dest->palette, source->palette, source->palettesize * 4);
+  }
+  return 0;
+}
+
+LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth) {
+  LodePNGColorMode result;
+  lodepng_color_mode_init(&result);
+  result.colortype = colortype;
+  result.bitdepth = bitdepth;
+  return result;
+}
+
+static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b) {
+  size_t i;
+  if(a->colortype != b->colortype) return 0;
+  if(a->bitdepth != b->bitdepth) return 0;
+  if(a->key_defined != b->key_defined) return 0;
+  if(a->key_defined) {
+    if(a->key_r != b->key_r) return 0;
+    if(a->key_g != b->key_g) return 0;
+    if(a->key_b != b->key_b) return 0;
+  }
+  if(a->palettesize != b->palettesize) return 0;
+  for(i = 0; i != a->palettesize * 4; ++i) {
+    if(a->palette[i] != b->palette[i]) return 0;
+  }
+  return 1;
+}
+
+void lodepng_palette_clear(LodePNGColorMode* info) {
+  if(info->palette) lodepng_free(info->palette);
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  if(!info->palette) /*allocate palette if empty*/ {
+    lodepng_color_mode_alloc_palette(info);
+    if(!info->palette) return 83; /*alloc fail*/
+  }
+  if(info->palettesize >= 256) {
+    return 108; /*too many palette values*/
+  }
+  info->palette[4 * info->palettesize + 0] = r;
+  info->palette[4 * info->palettesize + 1] = g;
+  info->palette[4 * info->palettesize + 2] = b;
+  info->palette[4 * info->palettesize + 3] = a;
+  ++info->palettesize;
+  return 0;
+}
+
+/*calculate bits per pixel out of colortype and bitdepth*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info) {
+  return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
+}
+
+unsigned lodepng_get_channels(const LodePNGColorMode* info) {
+  return getNumColorChannels(info->colortype);
+}
+
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info) {
+  return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
+}
+
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info) {
+  return (info->colortype & 4) != 0; /*4 or 6*/
+}
+
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info) {
+  return info->colortype == LCT_PALETTE;
+}
+
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info) {
+  size_t i;
+  for(i = 0; i != info->palettesize; ++i) {
+    if(info->palette[i * 4 + 3] < 255) return 1;
+  }
+  return 0;
+}
+
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) {
+  return info->key_defined
+      || lodepng_is_alpha_type(info)
+      || lodepng_has_palette_alpha(info);
+}
+
+static size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) {
+  size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth);
+  size_t n = (size_t)w * (size_t)h;
+  return ((n / 8u) * bpp) + ((n & 7u) * bpp + 7u) / 8u;
+}
+
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) {
+  return lodepng_get_raw_size_lct(w, h, color->colortype, color->bitdepth);
+}
+
+
+#ifdef LODEPNG_COMPILE_PNG
+
+/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer,
+and in addition has one extra byte per line: the filter byte. So this gives a larger
+result than lodepng_get_raw_size. Set h to 1 to get the size of 1 row including filter byte. */
+static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, unsigned bpp) {
+  /* + 1 for the filter byte, and possibly plus padding bits per line. */
+  /* Ignoring casts, the expression is equal to (w * bpp + 7) / 8 + 1, but avoids overflow of w * bpp */
+  size_t line = ((size_t)(w / 8u) * bpp) + 1u + ((w & 7u) * bpp + 7u) / 8u;
+  return (size_t)h * line;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Safely checks whether size_t overflow can be caused due to amount of pixels.
+This check is overcautious rather than precise. If this check indicates no overflow,
+you can safely compute in a size_t (but not an unsigned):
+-(size_t)w * (size_t)h * 8
+-amount of bytes in IDAT (including filter, padding and Adam7 bytes)
+-amount of bytes in raw color model
+Returns 1 if overflow possible, 0 if not.
+*/
+static int lodepng_pixel_overflow(unsigned w, unsigned h,
+                                  const LodePNGColorMode* pngcolor, const LodePNGColorMode* rawcolor) {
+  size_t bpp = LODEPNG_MAX(lodepng_get_bpp(pngcolor), lodepng_get_bpp(rawcolor));
+  size_t numpixels, total;
+  size_t line; /* bytes per line in worst case */
+
+  if(lodepng_mulofl((size_t)w, (size_t)h, &numpixels)) return 1;
+  if(lodepng_mulofl(numpixels, 8, &total)) return 1; /* bit pointer with 8-bit color, or 8 bytes per channel color */
+
+  /* Bytes per scanline with the expression "(w / 8u) * bpp) + ((w & 7u) * bpp + 7u) / 8u" */
+  if(lodepng_mulofl((size_t)(w / 8u), bpp, &line)) return 1;
+  if(lodepng_addofl(line, ((w & 7u) * bpp + 7u) / 8u, &line)) return 1;
+
+  if(lodepng_addofl(line, 5, &line)) return 1; /* 5 bytes overhead per line: 1 filterbyte, 4 for Adam7 worst case */
+  if(lodepng_mulofl(line, h, &total)) return 1; /* Total bytes in worst case */
+
+  return 0; /* no overflow */
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static void LodePNGUnknownChunks_init(LodePNGInfo* info) {
+  unsigned i;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
+}
+
+static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info) {
+  unsigned i;
+  for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
+}
+
+static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src) {
+  unsigned i;
+
+  LodePNGUnknownChunks_cleanup(dest);
+
+  for(i = 0; i != 3; ++i) {
+    size_t j;
+    dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
+    dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
+    if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
+    for(j = 0; j < src->unknown_chunks_size[i]; ++j) {
+      dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
+    }
+  }
+
+  return 0;
+}
+
+/******************************************************************************/
+
+static void LodePNGText_init(LodePNGInfo* info) {
+  info->text_num = 0;
+  info->text_keys = NULL;
+  info->text_strings = NULL;
+}
+
+static void LodePNGText_cleanup(LodePNGInfo* info) {
+  size_t i;
+  for(i = 0; i != info->text_num; ++i) {
+    string_cleanup(&info->text_keys[i]);
+    string_cleanup(&info->text_strings[i]);
+  }
+  lodepng_free(info->text_keys);
+  lodepng_free(info->text_strings);
+}
+
+static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+  size_t i = 0;
+  dest->text_keys = 0;
+  dest->text_strings = 0;
+  dest->text_num = 0;
+  for(i = 0; i != source->text_num; ++i) {
+    CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
+  }
+  return 0;
+}
+
+void lodepng_clear_text(LodePNGInfo* info) {
+  LodePNGText_cleanup(info);
+}
+
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) {
+  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
+  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
+
+  if(new_keys) info->text_keys = new_keys;
+  if(new_strings) info->text_strings = new_strings;
+
+  if(!new_keys || !new_strings) return 83; /*alloc fail*/
+
+  ++info->text_num;
+
+  info->text_keys[info->text_num - 1] = alloc_string(key);
+  info->text_strings[info->text_num - 1] = alloc_string(str);
+
+  return 0;
+}
+
+/******************************************************************************/
+
+static void LodePNGIText_init(LodePNGInfo* info) {
+  info->itext_num = 0;
+  info->itext_keys = NULL;
+  info->itext_langtags = NULL;
+  info->itext_transkeys = NULL;
+  info->itext_strings = NULL;
+}
+
+static void LodePNGIText_cleanup(LodePNGInfo* info) {
+  size_t i;
+  for(i = 0; i != info->itext_num; ++i) {
+    string_cleanup(&info->itext_keys[i]);
+    string_cleanup(&info->itext_langtags[i]);
+    string_cleanup(&info->itext_transkeys[i]);
+    string_cleanup(&info->itext_strings[i]);
+  }
+  lodepng_free(info->itext_keys);
+  lodepng_free(info->itext_langtags);
+  lodepng_free(info->itext_transkeys);
+  lodepng_free(info->itext_strings);
+}
+
+static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+  size_t i = 0;
+  dest->itext_keys = 0;
+  dest->itext_langtags = 0;
+  dest->itext_transkeys = 0;
+  dest->itext_strings = 0;
+  dest->itext_num = 0;
+  for(i = 0; i != source->itext_num; ++i) {
+    CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
+                                        source->itext_transkeys[i], source->itext_strings[i]));
+  }
+  return 0;
+}
+
+void lodepng_clear_itext(LodePNGInfo* info) {
+  LodePNGIText_cleanup(info);
+}
+
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+                           const char* transkey, const char* str) {
+  char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
+  char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
+  char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
+  char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
+
+  if(new_keys) info->itext_keys = new_keys;
+  if(new_langtags) info->itext_langtags = new_langtags;
+  if(new_transkeys) info->itext_transkeys = new_transkeys;
+  if(new_strings) info->itext_strings = new_strings;
+
+  if(!new_keys || !new_langtags || !new_transkeys || !new_strings) return 83; /*alloc fail*/
+
+  ++info->itext_num;
+
+  info->itext_keys[info->itext_num - 1] = alloc_string(key);
+  info->itext_langtags[info->itext_num - 1] = alloc_string(langtag);
+  info->itext_transkeys[info->itext_num - 1] = alloc_string(transkey);
+  info->itext_strings[info->itext_num - 1] = alloc_string(str);
+
+  return 0;
+}
+
+/* same as set but does not delete */
+static unsigned lodepng_assign_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) {
+  if(profile_size == 0) return 100; /*invalid ICC profile size*/
+
+  info->iccp_name = alloc_string(name);
+  info->iccp_profile = (unsigned char*)lodepng_malloc(profile_size);
+
+  if(!info->iccp_name || !info->iccp_profile) return 83; /*alloc fail*/
+
+  lodepng_memcpy(info->iccp_profile, profile, profile_size);
+  info->iccp_profile_size = profile_size;
+
+  return 0; /*ok*/
+}
+
+unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) {
+  if(info->iccp_name) lodepng_clear_icc(info);
+  info->iccp_defined = 1;
+
+  return lodepng_assign_icc(info, name, profile, profile_size);
+}
+
+void lodepng_clear_icc(LodePNGInfo* info) {
+  string_cleanup(&info->iccp_name);
+  lodepng_free(info->iccp_profile);
+  info->iccp_profile = NULL;
+  info->iccp_profile_size = 0;
+  info->iccp_defined = 0;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+void lodepng_info_init(LodePNGInfo* info) {
+  lodepng_color_mode_init(&info->color);
+  info->interlace_method = 0;
+  info->compression_method = 0;
+  info->filter_method = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  info->background_defined = 0;
+  info->background_r = info->background_g = info->background_b = 0;
+
+  LodePNGText_init(info);
+  LodePNGIText_init(info);
+
+  info->time_defined = 0;
+  info->phys_defined = 0;
+
+  info->gama_defined = 0;
+  info->chrm_defined = 0;
+  info->srgb_defined = 0;
+  info->iccp_defined = 0;
+  info->iccp_name = NULL;
+  info->iccp_profile = NULL;
+
+  LodePNGUnknownChunks_init(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+void lodepng_info_cleanup(LodePNGInfo* info) {
+  lodepng_color_mode_cleanup(&info->color);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  LodePNGText_cleanup(info);
+  LodePNGIText_cleanup(info);
+
+  lodepng_clear_icc(info);
+
+  LodePNGUnknownChunks_cleanup(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+  lodepng_info_cleanup(dest);
+  lodepng_memcpy(dest, source, sizeof(LodePNGInfo));
+  lodepng_color_mode_init(&dest->color);
+  CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  CERROR_TRY_RETURN(LodePNGText_copy(dest, source));
+  CERROR_TRY_RETURN(LodePNGIText_copy(dest, source));
+  if(source->iccp_defined) {
+    CERROR_TRY_RETURN(lodepng_assign_icc(dest, source->iccp_name, source->iccp_profile, source->iccp_profile_size));
+  }
+
+  LodePNGUnknownChunks_init(dest);
+  CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source));
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  return 0;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/
+static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in) {
+  unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/
+  /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/
+  unsigned p = index & m;
+  in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/
+  in = in << (bits * (m - p));
+  if(p == 0) out[index * bits / 8u] = in;
+  else out[index * bits / 8u] |= in;
+}
+
+typedef struct ColorTree ColorTree;
+
+/*
+One node of a color tree
+This is the data structure used to count the number of unique colors and to get a palette
+index for a color. It's like an octree, but because the alpha channel is used too, each
+node has 16 instead of 8 children.
+*/
+struct ColorTree {
+  ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/
+  int index; /*the payload. Only has a meaningful value if this is in the last level*/
+};
+
+static void color_tree_init(ColorTree* tree) {
+  lodepng_memset(tree->children, 0, 16 * sizeof(*tree->children));
+  tree->index = -1;
+}
+
+static void color_tree_cleanup(ColorTree* tree) {
+  int i;
+  for(i = 0; i != 16; ++i) {
+    if(tree->children[i]) {
+      color_tree_cleanup(tree->children[i]);
+      lodepng_free(tree->children[i]);
+    }
+  }
+}
+
+/*returns -1 if color not present, its index otherwise*/
+static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  int bit = 0;
+  for(bit = 0; bit < 8; ++bit) {
+    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+    if(!tree->children[i]) return -1;
+    else tree = tree->children[i];
+  }
+  return tree ? tree->index : -1;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  return color_tree_get(tree, r, g, b, a) >= 0;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*color is not allowed to already exist.
+Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")
+Returns error code, or 0 if ok*/
+static unsigned color_tree_add(ColorTree* tree,
+                               unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) {
+  int bit;
+  for(bit = 0; bit < 8; ++bit) {
+    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+    if(!tree->children[i]) {
+      tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
+      if(!tree->children[i]) return 83; /*alloc fail*/
+      color_tree_init(tree->children[i]);
+    }
+    tree = tree->children[i];
+  }
+  tree->index = (int)index;
+  return 0;
+}
+
+/*put a pixel, given its RGBA color, into image of any color type*/
+static unsigned rgba8ToPixel(unsigned char* out, size_t i,
+                             const LodePNGColorMode* mode, ColorTree* tree /*for palette*/,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  if(mode->colortype == LCT_GREY) {
+    unsigned char gray = r; /*((unsigned short)r + g + b) / 3u;*/
+    if(mode->bitdepth == 8) out[i] = gray;
+    else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = gray;
+    else {
+      /*take the most significant bits of gray*/
+      gray = ((unsigned)gray >> (8u - mode->bitdepth)) & ((1u << mode->bitdepth) - 1u);
+      addColorBits(out, i, mode->bitdepth, gray);
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      out[i * 3 + 0] = r;
+      out[i * 3 + 1] = g;
+      out[i * 3 + 2] = b;
+    } else {
+      out[i * 6 + 0] = out[i * 6 + 1] = r;
+      out[i * 6 + 2] = out[i * 6 + 3] = g;
+      out[i * 6 + 4] = out[i * 6 + 5] = b;
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    int index = color_tree_get(tree, r, g, b, a);
+    if(index < 0) return 82; /*color not in palette*/
+    if(mode->bitdepth == 8) out[i] = index;
+    else addColorBits(out, i, mode->bitdepth, (unsigned)index);
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    unsigned char gray = r; /*((unsigned short)r + g + b) / 3u;*/
+    if(mode->bitdepth == 8) {
+      out[i * 2 + 0] = gray;
+      out[i * 2 + 1] = a;
+    } else if(mode->bitdepth == 16) {
+      out[i * 4 + 0] = out[i * 4 + 1] = gray;
+      out[i * 4 + 2] = out[i * 4 + 3] = a;
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      out[i * 4 + 0] = r;
+      out[i * 4 + 1] = g;
+      out[i * 4 + 2] = b;
+      out[i * 4 + 3] = a;
+    } else {
+      out[i * 8 + 0] = out[i * 8 + 1] = r;
+      out[i * 8 + 2] = out[i * 8 + 3] = g;
+      out[i * 8 + 4] = out[i * 8 + 5] = b;
+      out[i * 8 + 6] = out[i * 8 + 7] = a;
+    }
+  }
+
+  return 0; /*no error*/
+}
+
+/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/
+static void rgba16ToPixel(unsigned char* out, size_t i,
+                         const LodePNGColorMode* mode,
+                         unsigned short r, unsigned short g, unsigned short b, unsigned short a) {
+  if(mode->colortype == LCT_GREY) {
+    unsigned short gray = r; /*((unsigned)r + g + b) / 3u;*/
+    out[i * 2 + 0] = (gray >> 8) & 255;
+    out[i * 2 + 1] = gray & 255;
+  } else if(mode->colortype == LCT_RGB) {
+    out[i * 6 + 0] = (r >> 8) & 255;
+    out[i * 6 + 1] = r & 255;
+    out[i * 6 + 2] = (g >> 8) & 255;
+    out[i * 6 + 3] = g & 255;
+    out[i * 6 + 4] = (b >> 8) & 255;
+    out[i * 6 + 5] = b & 255;
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    unsigned short gray = r; /*((unsigned)r + g + b) / 3u;*/
+    out[i * 4 + 0] = (gray >> 8) & 255;
+    out[i * 4 + 1] = gray & 255;
+    out[i * 4 + 2] = (a >> 8) & 255;
+    out[i * 4 + 3] = a & 255;
+  } else if(mode->colortype == LCT_RGBA) {
+    out[i * 8 + 0] = (r >> 8) & 255;
+    out[i * 8 + 1] = r & 255;
+    out[i * 8 + 2] = (g >> 8) & 255;
+    out[i * 8 + 3] = g & 255;
+    out[i * 8 + 4] = (b >> 8) & 255;
+    out[i * 8 + 5] = b & 255;
+    out[i * 8 + 6] = (a >> 8) & 255;
+    out[i * 8 + 7] = a & 255;
+  }
+}
+
+/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/
+static void getPixelColorRGBA8(unsigned char* r, unsigned char* g,
+                               unsigned char* b, unsigned char* a,
+                               const unsigned char* in, size_t i,
+                               const LodePNGColorMode* mode) {
+  if(mode->colortype == LCT_GREY) {
+    if(mode->bitdepth == 8) {
+      *r = *g = *b = in[i];
+      if(mode->key_defined && *r == mode->key_r) *a = 0;
+      else *a = 255;
+    } else if(mode->bitdepth == 16) {
+      *r = *g = *b = in[i * 2 + 0];
+      if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+      else *a = 255;
+    } else {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = i * mode->bitdepth;
+      unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+      *r = *g = *b = (value * 255) / highest;
+      if(mode->key_defined && value == mode->key_r) *a = 0;
+      else *a = 255;
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2];
+      if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0;
+      else *a = 255;
+    } else {
+      *r = in[i * 6 + 0];
+      *g = in[i * 6 + 2];
+      *b = in[i * 6 + 4];
+      if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+         && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+         && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+      else *a = 255;
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    unsigned index;
+    if(mode->bitdepth == 8) index = in[i];
+    else {
+      size_t j = i * mode->bitdepth;
+      index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+    }
+    /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+    *r = mode->palette[index * 4 + 0];
+    *g = mode->palette[index * 4 + 1];
+    *b = mode->palette[index * 4 + 2];
+    *a = mode->palette[index * 4 + 3];
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    if(mode->bitdepth == 8) {
+      *r = *g = *b = in[i * 2 + 0];
+      *a = in[i * 2 + 1];
+    } else {
+      *r = *g = *b = in[i * 4 + 0];
+      *a = in[i * 4 + 2];
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      *r = in[i * 4 + 0];
+      *g = in[i * 4 + 1];
+      *b = in[i * 4 + 2];
+      *a = in[i * 4 + 3];
+    } else {
+      *r = in[i * 8 + 0];
+      *g = in[i * 8 + 2];
+      *b = in[i * 8 + 4];
+      *a = in[i * 8 + 6];
+    }
+  }
+}
+
+/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color
+mode test cases, optimized to convert the colors much faster, when converting
+to the common case of RGBA with 8 bit per channel. buffer must be RGBA with
+enough memory.*/
+static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels,
+                                const unsigned char* LODEPNG_RESTRICT in,
+                                const LodePNGColorMode* mode) {
+  unsigned num_channels = 4;
+  size_t i;
+  if(mode->colortype == LCT_GREY) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i];
+        buffer[3] = 255;
+      }
+      if(mode->key_defined) {
+        buffer -= numpixels * num_channels;
+        for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+          if(buffer[0] == mode->key_r) buffer[3] = 0;
+        }
+      }
+    } else if(mode->bitdepth == 16) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
+        buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255;
+      }
+    } else {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
+        buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255;
+      }
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        lodepng_memcpy(buffer, &in[i * 3], 3);
+        buffer[3] = 255;
+      }
+      if(mode->key_defined) {
+        buffer -= numpixels * num_channels;
+        for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+          if(buffer[0] == mode->key_r && buffer[1]== mode->key_g && buffer[2] == mode->key_b) buffer[3] = 0;
+        }
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 6 + 0];
+        buffer[1] = in[i * 6 + 2];
+        buffer[2] = in[i * 6 + 4];
+        buffer[3] = mode->key_defined
+           && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+           && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+           && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255;
+      }
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned index = in[i];
+        /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+        lodepng_memcpy(buffer, &mode->palette[index * 4], 4);
+      }
+    } else {
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+        lodepng_memcpy(buffer, &mode->palette[index * 4], 4);
+      }
+    }
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
+        buffer[3] = in[i * 2 + 1];
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
+        buffer[3] = in[i * 4 + 2];
+      }
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      lodepng_memcpy(buffer, in, numpixels * 4);
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 8 + 0];
+        buffer[1] = in[i * 8 + 2];
+        buffer[2] = in[i * 8 + 4];
+        buffer[3] = in[i * 8 + 6];
+      }
+    }
+  }
+}
+
+/*Similar to getPixelColorsRGBA8, but with 3-channel RGB output.*/
+static void getPixelColorsRGB8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels,
+                               const unsigned char* LODEPNG_RESTRICT in,
+                               const LodePNGColorMode* mode) {
+  const unsigned num_channels = 3;
+  size_t i;
+  if(mode->colortype == LCT_GREY) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i];
+      }
+    } else if(mode->bitdepth == 16) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
+      }
+    } else {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
+      }
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      lodepng_memcpy(buffer, in, numpixels * 3);
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 6 + 0];
+        buffer[1] = in[i * 6 + 2];
+        buffer[2] = in[i * 6 + 4];
+      }
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned index = in[i];
+        /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+        lodepng_memcpy(buffer, &mode->palette[index * 4], 3);
+      }
+    } else {
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/
+        lodepng_memcpy(buffer, &mode->palette[index * 4], 3);
+      }
+    }
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
+      }
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        lodepng_memcpy(buffer, &in[i * 4], 3);
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 8 + 0];
+        buffer[1] = in[i * 8 + 2];
+        buffer[2] = in[i * 8 + 4];
+      }
+    }
+  }
+}
+
+/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with
+given color type, but the given color type must be 16-bit itself.*/
+static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a,
+                                const unsigned char* in, size_t i, const LodePNGColorMode* mode) {
+  if(mode->colortype == LCT_GREY) {
+    *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1];
+    if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+    else *a = 65535;
+  } else if(mode->colortype == LCT_RGB) {
+    *r = 256u * in[i * 6 + 0] + in[i * 6 + 1];
+    *g = 256u * in[i * 6 + 2] + in[i * 6 + 3];
+    *b = 256u * in[i * 6 + 4] + in[i * 6 + 5];
+    if(mode->key_defined
+       && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+       && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+       && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+    else *a = 65535;
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1];
+    *a = 256u * in[i * 4 + 2] + in[i * 4 + 3];
+  } else if(mode->colortype == LCT_RGBA) {
+    *r = 256u * in[i * 8 + 0] + in[i * 8 + 1];
+    *g = 256u * in[i * 8 + 2] + in[i * 8 + 3];
+    *b = 256u * in[i * 8 + 4] + in[i * 8 + 5];
+    *a = 256u * in[i * 8 + 6] + in[i * 8 + 7];
+  }
+}
+
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+                         unsigned w, unsigned h) {
+  size_t i;
+  ColorTree tree;
+  size_t numpixels = (size_t)w * (size_t)h;
+  unsigned error = 0;
+
+  if(mode_in->colortype == LCT_PALETTE && !mode_in->palette) {
+    return 107; /* error: must provide palette if input mode is palette */
+  }
+
+  if(lodepng_color_mode_equal(mode_out, mode_in)) {
+    size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
+    for(i = 0; i != numbytes; ++i) out[i] = in[i];
+    return 0;
+  }
+
+  if(mode_out->colortype == LCT_PALETTE) {
+    size_t palettesize = mode_out->palettesize;
+    const unsigned char* palette = mode_out->palette;
+    size_t palsize = (size_t)1u << mode_out->bitdepth;
+    /*if the user specified output palette but did not give the values, assume
+    they want the values of the input color type (assuming that one is palette).
+    Note that we never create a new palette ourselves.*/
+    if(palettesize == 0) {
+      palettesize = mode_in->palettesize;
+      palette = mode_in->palette;
+      /*if the input was also palette with same bitdepth, then the color types are also
+      equal, so copy literally. This to preserve the exact indices that were in the PNG
+      even in case there are duplicate colors in the palette.*/
+      if(mode_in->colortype == LCT_PALETTE && mode_in->bitdepth == mode_out->bitdepth) {
+        size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
+        for(i = 0; i != numbytes; ++i) out[i] = in[i];
+        return 0;
+      }
+    }
+    if(palettesize < palsize) palsize = palettesize;
+    color_tree_init(&tree);
+    for(i = 0; i != palsize; ++i) {
+      const unsigned char* p = &palette[i * 4];
+      error = color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned)i);
+      if(error) break;
+    }
+  }
+
+  if(!error) {
+    if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) {
+      for(i = 0; i != numpixels; ++i) {
+        unsigned short r = 0, g = 0, b = 0, a = 0;
+        getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+        rgba16ToPixel(out, i, mode_out, r, g, b, a);
+      }
+    } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) {
+      getPixelColorsRGBA8(out, numpixels, in, mode_in);
+    } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) {
+      getPixelColorsRGB8(out, numpixels, in, mode_in);
+    } else {
+      unsigned char r = 0, g = 0, b = 0, a = 0;
+      for(i = 0; i != numpixels; ++i) {
+        getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+        error = rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a);
+        if(error) break;
+      }
+    }
+  }
+
+  if(mode_out->colortype == LCT_PALETTE) {
+    color_tree_cleanup(&tree);
+  }
+
+  return error;
+}
+
+
+/* Converts a single rgb color without alpha from one type to another, color bits truncated to
+their bitdepth. In case of single channel (gray or palette), only the r channel is used. Slow
+function, do not use to process all pixels of an image. Alpha channel not supported on purpose:
+this is for bKGD, supporting alpha may prevent it from finding a color in the palette, from the
+specification it looks like bKGD should ignore the alpha values of the palette since it can use
+any palette index but doesn't have an alpha channel. Idem with ignoring color key. */
+unsigned lodepng_convert_rgb(
+    unsigned* r_out, unsigned* g_out, unsigned* b_out,
+    unsigned r_in, unsigned g_in, unsigned b_in,
+    const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in) {
+  unsigned r = 0, g = 0, b = 0;
+  unsigned mul = 65535 / ((1u << mode_in->bitdepth) - 1u); /*65535, 21845, 4369, 257, 1*/
+  unsigned shift = 16 - mode_out->bitdepth;
+
+  if(mode_in->colortype == LCT_GREY || mode_in->colortype == LCT_GREY_ALPHA) {
+    r = g = b = r_in * mul;
+  } else if(mode_in->colortype == LCT_RGB || mode_in->colortype == LCT_RGBA) {
+    r = r_in * mul;
+    g = g_in * mul;
+    b = b_in * mul;
+  } else if(mode_in->colortype == LCT_PALETTE) {
+    if(r_in >= mode_in->palettesize) return 82;
+    r = mode_in->palette[r_in * 4 + 0] * 257u;
+    g = mode_in->palette[r_in * 4 + 1] * 257u;
+    b = mode_in->palette[r_in * 4 + 2] * 257u;
+  } else {
+    return 31;
+  }
+
+  /* now convert to output format */
+  if(mode_out->colortype == LCT_GREY || mode_out->colortype == LCT_GREY_ALPHA) {
+    *r_out = r >> shift ;
+  } else if(mode_out->colortype == LCT_RGB || mode_out->colortype == LCT_RGBA) {
+    *r_out = r >> shift ;
+    *g_out = g >> shift ;
+    *b_out = b >> shift ;
+  } else if(mode_out->colortype == LCT_PALETTE) {
+    unsigned i;
+    /* a 16-bit color cannot be in the palette */
+    if((r >> 8) != (r & 255) || (g >> 8) != (g & 255) || (b >> 8) != (b & 255)) return 82;
+    for(i = 0; i < mode_out->palettesize; i++) {
+      unsigned j = i * 4;
+      if((r >> 8) == mode_out->palette[j + 0] && (g >> 8) == mode_out->palette[j + 1] &&
+          (b >> 8) == mode_out->palette[j + 2]) {
+        *r_out = i;
+        return 0;
+      }
+    }
+    return 82;
+  } else {
+    return 31;
+  }
+
+  return 0;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+void lodepng_color_stats_init(LodePNGColorStats* stats) {
+  /*stats*/
+  stats->colored = 0;
+  stats->key = 0;
+  stats->key_r = stats->key_g = stats->key_b = 0;
+  stats->alpha = 0;
+  stats->numcolors = 0;
+  stats->bits = 1;
+  stats->numpixels = 0;
+  /*settings*/
+  stats->allow_palette = 1;
+  stats->allow_greyscale = 1;
+}
+
+/*function used for debug purposes with C++*/
+/*void printColorStats(LodePNGColorStats* p) {
+  std::cout << "colored: " << (int)p->colored << ", ";
+  std::cout << "key: " << (int)p->key << ", ";
+  std::cout << "key_r: " << (int)p->key_r << ", ";
+  std::cout << "key_g: " << (int)p->key_g << ", ";
+  std::cout << "key_b: " << (int)p->key_b << ", ";
+  std::cout << "alpha: " << (int)p->alpha << ", ";
+  std::cout << "numcolors: " << (int)p->numcolors << ", ";
+  std::cout << "bits: " << (int)p->bits << std::endl;
+}*/
+
+/*Returns how many bits needed to represent given value (max 8 bit)*/
+static unsigned getValueRequiredBits(unsigned char value) {
+  if(value == 0 || value == 255) return 1;
+  /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/
+  if(value % 17 == 0) return value % 85 == 0 ? 2 : 4;
+  return 8;
+}
+
+/*stats must already have been inited. */
+unsigned lodepng_compute_color_stats(LodePNGColorStats* stats,
+                                     const unsigned char* in, unsigned w, unsigned h,
+                                     const LodePNGColorMode* mode_in) {
+  size_t i;
+  ColorTree tree;
+  size_t numpixels = (size_t)w * (size_t)h;
+  unsigned error = 0;
+
+  /* mark things as done already if it would be impossible to have a more expensive case */
+  unsigned colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0;
+  unsigned alpha_done = lodepng_can_have_alpha(mode_in) ? 0 : 1;
+  unsigned numcolors_done = 0;
+  unsigned bpp = lodepng_get_bpp(mode_in);
+  unsigned bits_done = (stats->bits == 1 && bpp == 1) ? 1 : 0;
+  unsigned sixteen = 0; /* whether the input image is 16 bit */
+  unsigned maxnumcolors = 257;
+  if(bpp <= 8) maxnumcolors = LODEPNG_MIN(257, stats->numcolors + (1u << bpp));
+
+  stats->numpixels += numpixels;
+
+  /*if palette not allowed, no need to compute numcolors*/
+  if(!stats->allow_palette) numcolors_done = 1;
+
+  color_tree_init(&tree);
+
+  /*If the stats was already filled in from previous data, fill its palette in tree
+  and mark things as done already if we know they are the most expensive case already*/
+  if(stats->alpha) alpha_done = 1;
+  if(stats->colored) colored_done = 1;
+  if(stats->bits == 16) numcolors_done = 1;
+  if(stats->bits >= bpp) bits_done = 1;
+  if(stats->numcolors >= maxnumcolors) numcolors_done = 1;
+
+  if(!numcolors_done) {
+    for(i = 0; i < stats->numcolors; i++) {
+      const unsigned char* color = &stats->palette[i * 4];
+      error = color_tree_add(&tree, color[0], color[1], color[2], color[3], i);
+      if(error) goto cleanup;
+    }
+  }
+
+  /*Check if the 16-bit input is truly 16-bit*/
+  if(mode_in->bitdepth == 16 && !sixteen) {
+    unsigned short r, g, b, a;
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+      if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
+         (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/ {
+        stats->bits = 16;
+        sixteen = 1;
+        bits_done = 1;
+        numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/
+        break;
+      }
+    }
+  }
+
+  if(sixteen) {
+    unsigned short r = 0, g = 0, b = 0, a = 0;
+
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+
+      if(!colored_done && (r != g || r != b)) {
+        stats->colored = 1;
+        colored_done = 1;
+      }
+
+      if(!alpha_done) {
+        unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b);
+        if(a != 65535 && (a != 0 || (stats->key && !matchkey))) {
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+        } else if(a == 0 && !stats->alpha && !stats->key) {
+          stats->key = 1;
+          stats->key_r = r;
+          stats->key_g = g;
+          stats->key_b = b;
+        } else if(a == 65535 && stats->key && matchkey) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+        }
+      }
+      if(alpha_done && numcolors_done && colored_done && bits_done) break;
+    }
+
+    if(stats->key && !stats->alpha) {
+      for(i = 0; i != numpixels; ++i) {
+        getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+        if(a != 0 && r == stats->key_r && g == stats->key_g && b == stats->key_b) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+        }
+      }
+    }
+  } else /* < 16-bit */ {
+    unsigned char r = 0, g = 0, b = 0, a = 0;
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+
+      if(!bits_done && stats->bits < 8) {
+        /*only r is checked, < 8 bits is only relevant for grayscale*/
+        unsigned bits = getValueRequiredBits(r);
+        if(bits > stats->bits) stats->bits = bits;
+      }
+      bits_done = (stats->bits >= bpp);
+
+      if(!colored_done && (r != g || r != b)) {
+        stats->colored = 1;
+        colored_done = 1;
+        if(stats->bits < 8) stats->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/
+      }
+
+      if(!alpha_done) {
+        unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b);
+        if(a != 255 && (a != 0 || (stats->key && !matchkey))) {
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+          if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        } else if(a == 0 && !stats->alpha && !stats->key) {
+          stats->key = 1;
+          stats->key_r = r;
+          stats->key_g = g;
+          stats->key_b = b;
+        } else if(a == 255 && stats->key && matchkey) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+          if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        }
+      }
+
+      if(!numcolors_done) {
+        if(!color_tree_has(&tree, r, g, b, a)) {
+          error = color_tree_add(&tree, r, g, b, a, stats->numcolors);
+          if(error) goto cleanup;
+          if(stats->numcolors < 256) {
+            unsigned char* p = stats->palette;
+            unsigned n = stats->numcolors;
+            p[n * 4 + 0] = r;
+            p[n * 4 + 1] = g;
+            p[n * 4 + 2] = b;
+            p[n * 4 + 3] = a;
+          }
+          ++stats->numcolors;
+          numcolors_done = stats->numcolors >= maxnumcolors;
+        }
+      }
+
+      if(alpha_done && numcolors_done && colored_done && bits_done) break;
+    }
+
+    if(stats->key && !stats->alpha) {
+      for(i = 0; i != numpixels; ++i) {
+        getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+        if(a != 0 && r == stats->key_r && g == stats->key_g && b == stats->key_b) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          stats->alpha = 1;
+          stats->key = 0;
+          alpha_done = 1;
+          if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        }
+      }
+    }
+
+    /*make the stats's key always 16-bit for consistency - repeat each byte twice*/
+    stats->key_r += (stats->key_r << 8);
+    stats->key_g += (stats->key_g << 8);
+    stats->key_b += (stats->key_b << 8);
+  }
+
+cleanup:
+  color_tree_cleanup(&tree);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*Adds a single color to the color stats. The stats must already have been inited. The color must be given as 16-bit
+(with 2 bytes repeating for 8-bit and 65535 for opaque alpha channel). This function is expensive, do not call it for
+all pixels of an image but only for a few additional values. */
+static unsigned lodepng_color_stats_add(LodePNGColorStats* stats,
+                                        unsigned r, unsigned g, unsigned b, unsigned a) {
+  unsigned error = 0;
+  unsigned char image[8];
+  LodePNGColorMode mode;
+  lodepng_color_mode_init(&mode);
+  image[0] = r >> 8; image[1] = r; image[2] = g >> 8; image[3] = g;
+  image[4] = b >> 8; image[5] = b; image[6] = a >> 8; image[7] = a;
+  mode.bitdepth = 16;
+  mode.colortype = LCT_RGBA;
+  error = lodepng_compute_color_stats(stats, image, 1, 1, &mode);
+  lodepng_color_mode_cleanup(&mode);
+  return error;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*Computes a minimal PNG color model that can contain all colors as indicated by the stats.
+The stats should be computed with lodepng_compute_color_stats.
+mode_in is raw color profile of the image the stats were computed on, to copy palette order from when relevant.
+Minimal PNG color model means the color type and bit depth that gives smallest amount of bits in the output image,
+e.g. gray if only grayscale pixels, palette if less than 256 colors, color key if only single transparent color, ...
+This is used if auto_convert is enabled (it is by default).
+*/
+static unsigned auto_choose_color(LodePNGColorMode* mode_out,
+                                  const LodePNGColorMode* mode_in,
+                                  const LodePNGColorStats* stats) {
+  unsigned error = 0;
+  unsigned palettebits;
+  size_t i, n;
+  size_t numpixels = stats->numpixels;
+  unsigned palette_ok, gray_ok;
+
+  unsigned alpha = stats->alpha;
+  unsigned key = stats->key;
+  unsigned bits = stats->bits;
+
+  mode_out->key_defined = 0;
+
+  if(key && numpixels <= 16) {
+    alpha = 1; /*too few pixels to justify tRNS chunk overhead*/
+    key = 0;
+    if(bits < 8) bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+  }
+
+  gray_ok = !stats->colored;
+  if(!stats->allow_greyscale) gray_ok = 0;
+  if(!gray_ok && bits < 8) bits = 8;
+
+  n = stats->numcolors;
+  palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8));
+  palette_ok = n <= 256 && bits <= 8 && n != 0; /*n==0 means likely numcolors wasn't computed*/
+  if(numpixels < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/
+  if(gray_ok && !alpha && bits <= palettebits) palette_ok = 0; /*gray is less overhead*/
+  if(!stats->allow_palette) palette_ok = 0;
+
+  if(palette_ok) {
+    const unsigned char* p = stats->palette;
+    lodepng_palette_clear(mode_out); /*remove potential earlier palette*/
+    for(i = 0; i != stats->numcolors; ++i) {
+      error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]);
+      if(error) break;
+    }
+
+    mode_out->colortype = LCT_PALETTE;
+    mode_out->bitdepth = palettebits;
+
+    if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize
+        && mode_in->bitdepth == mode_out->bitdepth) {
+      /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/
+      lodepng_color_mode_cleanup(mode_out);
+      lodepng_color_mode_copy(mode_out, mode_in);
+    }
+  } else /*8-bit or 16-bit per channel*/ {
+    mode_out->bitdepth = bits;
+    mode_out->colortype = alpha ? (gray_ok ? LCT_GREY_ALPHA : LCT_RGBA)
+                                : (gray_ok ? LCT_GREY : LCT_RGB);
+    if(key) {
+      unsigned mask = (1u << mode_out->bitdepth) - 1u; /*stats always uses 16-bit, mask converts it*/
+      mode_out->key_r = stats->key_r & mask;
+      mode_out->key_g = stats->key_g & mask;
+      mode_out->key_b = stats->key_b & mask;
+      mode_out->key_defined = 1;
+    }
+  }
+
+  return error;
+}
+
+#endif /* #ifdef LODEPNG_COMPILE_ENCODER */
+
+/*
+Paeth predictor, used by PNG filter type 4
+The parameters are of type short, but should come from unsigned chars, the shorts
+are only needed to make the paeth calculation correct.
+*/
+static unsigned char paethPredictor(short a, short b, short c) {
+  short pa = LODEPNG_ABS(b - c);
+  short pb = LODEPNG_ABS(a - c);
+  short pc = LODEPNG_ABS(a + b - c - c);
+  /* return input value associated with smallest of pa, pb, pc (with certain priority if equal) */
+  if(pb < pa) { a = b; pa = pb; }
+  return (pc < pa) ? c : a;
+}
+
+/*shared values used by multiple Adam7 related functions*/
+
+static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
+static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
+static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
+static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
+
+/*
+Outputs various dimensions and positions in the image related to the Adam7 reduced images.
+passw: output containing the width of the 7 passes
+passh: output containing the height of the 7 passes
+filter_passstart: output containing the index of the start and end of each
+ reduced image with filter bytes
+padded_passstart output containing the index of the start and end of each
+ reduced image when without filter bytes but with padded scanlines
+passstart: output containing the index of the start and end of each reduced
+ image without padding between scanlines, but still padding between the images
+w, h: width and height of non-interlaced image
+bpp: bits per pixel
+"padded" is only relevant if bpp is less than 8 and a scanline or image does not
+ end at a full byte
+*/
+static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8],
+                                size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp) {
+  /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/
+  unsigned i;
+
+  /*calculate width and height in pixels of each pass*/
+  for(i = 0; i != 7; ++i) {
+    passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i];
+    passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i];
+    if(passw[i] == 0) passh[i] = 0;
+    if(passh[i] == 0) passw[i] = 0;
+  }
+
+  filter_passstart[0] = padded_passstart[0] = passstart[0] = 0;
+  for(i = 0; i != 7; ++i) {
+    /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/
+    filter_passstart[i + 1] = filter_passstart[i]
+                            + ((passw[i] && passh[i]) ? passh[i] * (1u + (passw[i] * bpp + 7u) / 8u) : 0);
+    /*bits padded if needed to fill full byte at end of each scanline*/
+    padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7u) / 8u);
+    /*only padded at end of reduced image*/
+    passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7u) / 8u;
+  }
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Decoder                                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*read the information from the header and store it in the LodePNGInfo. return value is error*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state,
+                         const unsigned char* in, size_t insize) {
+  unsigned width, height;
+  LodePNGInfo* info = &state->info_png;
+  if(insize == 0 || in == 0) {
+    CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/
+  }
+  if(insize < 33) {
+    CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/
+  }
+
+  /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/
+  /* TODO: remove this. One should use a new LodePNGState for new sessions */
+  lodepng_info_cleanup(info);
+  lodepng_info_init(info);
+
+  if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
+     || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10) {
+    CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/
+  }
+  if(lodepng_chunk_length(in + 8) != 13) {
+    CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/
+  }
+  if(!lodepng_chunk_type_equals(in + 8, "IHDR")) {
+    CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/
+  }
+
+  /*read the values given in the header*/
+  width = lodepng_read32bitInt(&in[16]);
+  height = lodepng_read32bitInt(&in[20]);
+  /*TODO: remove the undocumented feature that allows to give null pointers to width or height*/
+  if(w) *w = width;
+  if(h) *h = height;
+  info->color.bitdepth = in[24];
+  info->color.colortype = (LodePNGColorType)in[25];
+  info->compression_method = in[26];
+  info->filter_method = in[27];
+  info->interlace_method = in[28];
+
+  /*errors returned only after the parsing so other values are still output*/
+
+  /*error: invalid image size*/
+  if(width == 0 || height == 0) CERROR_RETURN_ERROR(state->error, 93);
+  /*error: invalid colortype or bitdepth combination*/
+  state->error = checkColorValidity(info->color.colortype, info->color.bitdepth);
+  if(state->error) return state->error;
+  /*error: only compression method 0 is allowed in the specification*/
+  if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32);
+  /*error: only filter method 0 is allowed in the specification*/
+  if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33);
+  /*error: only interlace methods 0 and 1 exist in the specification*/
+  if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34);
+
+  if(!state->decoder.ignore_crc) {
+    unsigned CRC = lodepng_read32bitInt(&in[29]);
+    unsigned checksum = lodepng_crc32(&in[12], 17);
+    if(CRC != checksum) {
+      CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/
+    }
+  }
+
+  return state->error;
+}
+
+static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon,
+                                 size_t bytewidth, unsigned char filterType, size_t length) {
+  /*
+  For PNG filter method 0
+  unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte,
+  the filter works byte per byte (bytewidth = 1)
+  precon is the previous unfiltered scanline, recon the result, scanline the current one
+  the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead
+  recon and scanline MAY be the same memory address! precon must be disjoint.
+  */
+
+  size_t i;
+  switch(filterType) {
+    case 0:
+      for(i = 0; i != length; ++i) recon[i] = scanline[i];
+      break;
+    case 1:
+      for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+      for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth];
+      break;
+    case 2:
+      if(precon) {
+        for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i];
+      } else {
+        for(i = 0; i != length; ++i) recon[i] = scanline[i];
+      }
+      break;
+    case 3:
+      if(precon) {
+        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1u);
+        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1u);
+      } else {
+        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1u);
+      }
+      break;
+    case 4:
+      if(precon) {
+        for(i = 0; i != bytewidth; ++i) {
+          recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/
+        }
+
+        /* Unroll independent paths of the paeth predictor. A 6x and 8x version would also be possible but that
+        adds too much code. Whether this actually speeds anything up at all depends on compiler and settings. */
+        if(bytewidth >= 4) {
+          for(; i + 3 < length; i += 4) {
+            size_t j = i - bytewidth;
+            unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1], s2 = scanline[i + 2], s3 = scanline[i + 3];
+            unsigned char r0 = recon[j + 0], r1 = recon[j + 1], r2 = recon[j + 2], r3 = recon[j + 3];
+            unsigned char p0 = precon[i + 0], p1 = precon[i + 1], p2 = precon[i + 2], p3 = precon[i + 3];
+            unsigned char q0 = precon[j + 0], q1 = precon[j + 1], q2 = precon[j + 2], q3 = precon[j + 3];
+            recon[i + 0] = s0 + paethPredictor(r0, p0, q0);
+            recon[i + 1] = s1 + paethPredictor(r1, p1, q1);
+            recon[i + 2] = s2 + paethPredictor(r2, p2, q2);
+            recon[i + 3] = s3 + paethPredictor(r3, p3, q3);
+          }
+        } else if(bytewidth >= 3) {
+          for(; i + 2 < length; i += 3) {
+            size_t j = i - bytewidth;
+            unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1], s2 = scanline[i + 2];
+            unsigned char r0 = recon[j + 0], r1 = recon[j + 1], r2 = recon[j + 2];
+            unsigned char p0 = precon[i + 0], p1 = precon[i + 1], p2 = precon[i + 2];
+            unsigned char q0 = precon[j + 0], q1 = precon[j + 1], q2 = precon[j + 2];
+            recon[i + 0] = s0 + paethPredictor(r0, p0, q0);
+            recon[i + 1] = s1 + paethPredictor(r1, p1, q1);
+            recon[i + 2] = s2 + paethPredictor(r2, p2, q2);
+          }
+        } else if(bytewidth >= 2) {
+          for(; i + 1 < length; i += 2) {
+            size_t j = i - bytewidth;
+            unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1];
+            unsigned char r0 = recon[j + 0], r1 = recon[j + 1];
+            unsigned char p0 = precon[i + 0], p1 = precon[i + 1];
+            unsigned char q0 = precon[j + 0], q1 = precon[j + 1];
+            recon[i + 0] = s0 + paethPredictor(r0, p0, q0);
+            recon[i + 1] = s1 + paethPredictor(r1, p1, q1);
+          }
+        }
+
+        for(; i != length; ++i) {
+          recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]));
+        }
+      } else {
+        for(i = 0; i != bytewidth; ++i) {
+          recon[i] = scanline[i];
+        }
+        for(i = bytewidth; i < length; ++i) {
+          /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/
+          recon[i] = (scanline[i] + recon[i - bytewidth]);
+        }
+      }
+      break;
+    default: return 36; /*error: nonexistent filter type given*/
+  }
+  return 0;
+}
+
+static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+  /*
+  For PNG filter method 0
+  this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times)
+  out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline
+  w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel
+  in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes)
+  */
+
+  unsigned y;
+  unsigned char* prevline = 0;
+
+  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+  size_t bytewidth = (bpp + 7u) / 8u;
+  /*the width of a scanline in bytes, not including the filter type*/
+  size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u;
+
+  for(y = 0; y < h; ++y) {
+    size_t outindex = linebytes * y;
+    size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+    unsigned char filterType = in[inindex];
+
+    CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes));
+
+    prevline = &out[outindex];
+  }
+
+  return 0;
+}
+
+/*
+in: Adam7 interlaced image, with no padding bits between scanlines, but between
+ reduced images so that each reduced image starts at a byte.
+out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h
+bpp: bits per pixel
+out has the following size in bits: w * h * bpp.
+in is possibly bigger due to padding bits between reduced images.
+out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation
+(because that's likely a little bit faster)
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+  unsigned passw[7], passh[7];
+  size_t filter_passstart[8], padded_passstart[8], passstart[8];
+  unsigned i;
+
+  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+  if(bpp >= 8) {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      size_t bytewidth = bpp / 8u;
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+        size_t pixeloutstart = ((ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * (size_t)w
+                             + ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bytewidth;
+        for(b = 0; b < bytewidth; ++b) {
+          out[pixeloutstart + b] = in[pixelinstart + b];
+        }
+      }
+    }
+  } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      unsigned ilinebits = bpp * passw[i];
+      unsigned olinebits = bpp * w;
+      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+        obp = (ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bpp;
+        for(b = 0; b < bpp; ++b) {
+          unsigned char bit = readBitFromReversedStream(&ibp, in);
+          setBitOfReversedStream(&obp, out, bit);
+        }
+      }
+    }
+  }
+}
+
+static void removePaddingBits(unsigned char* out, const unsigned char* in,
+                              size_t olinebits, size_t ilinebits, unsigned h) {
+  /*
+  After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need
+  to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers
+  for the Adam7 code, the color convert code and the output to the user.
+  in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must
+  have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits
+  also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7
+  only useful if (ilinebits - olinebits) is a value in the range 1..7
+  */
+  unsigned y;
+  size_t diff = ilinebits - olinebits;
+  size_t ibp = 0, obp = 0; /*input and output bit pointers*/
+  for(y = 0; y < h; ++y) {
+    size_t x;
+    for(x = 0; x < olinebits; ++x) {
+      unsigned char bit = readBitFromReversedStream(&ibp, in);
+      setBitOfReversedStream(&obp, out, bit);
+    }
+    ibp += diff;
+  }
+}
+
+/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from
+the IDAT chunks (with filter index bytes and possible padding bits)
+return value is error*/
+static unsigned postProcessScanlines(unsigned char* out, unsigned char* in,
+                                     unsigned w, unsigned h, const LodePNGInfo* info_png) {
+  /*
+  This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype.
+  Steps:
+  *) if no Adam7: 1) unfilter 2) remove padding bits (= possible extra bits per scanline if bpp < 8)
+  *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace
+  NOTE: the in buffer will be overwritten with intermediate data!
+  */
+  unsigned bpp = lodepng_get_bpp(&info_png->color);
+  if(bpp == 0) return 31; /*error: invalid colortype*/
+
+  if(info_png->interlace_method == 0) {
+    if(bpp < 8 && w * bpp != ((w * bpp + 7u) / 8u) * 8u) {
+      CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp));
+      removePaddingBits(out, in, w * bpp, ((w * bpp + 7u) / 8u) * 8u, h);
+    }
+    /*we can immediately filter into the out buffer, no other steps needed*/
+    else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp));
+  } else /*interlace_method is 1 (Adam7)*/ {
+    unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8];
+    unsigned i;
+
+    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+    for(i = 0; i != 7; ++i) {
+      CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp));
+      /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline,
+      move bytes instead of bits or move not at all*/
+      if(bpp < 8) {
+        /*remove padding bits in scanlines; after this there still may be padding
+        bits between the different reduced images: each reduced image still starts nicely at a byte*/
+        removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp,
+                          ((passw[i] * bpp + 7u) / 8u) * 8u, passh[i]);
+      }
+    }
+
+    Adam7_deinterlace(out, in, w, h, bpp);
+  }
+
+  return 0;
+}
+
+static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) {
+  unsigned pos = 0, i;
+  color->palettesize = chunkLength / 3u;
+  if(color->palettesize == 0 || color->palettesize > 256) return 38; /*error: palette too small or big*/
+  lodepng_color_mode_alloc_palette(color);
+  if(!color->palette && color->palettesize) {
+    color->palettesize = 0;
+    return 83; /*alloc fail*/
+  }
+
+  for(i = 0; i != color->palettesize; ++i) {
+    color->palette[4 * i + 0] = data[pos++]; /*R*/
+    color->palette[4 * i + 1] = data[pos++]; /*G*/
+    color->palette[4 * i + 2] = data[pos++]; /*B*/
+    color->palette[4 * i + 3] = 255; /*alpha*/
+  }
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) {
+  unsigned i;
+  if(color->colortype == LCT_PALETTE) {
+    /*error: more alpha values given than there are palette entries*/
+    if(chunkLength > color->palettesize) return 39;
+
+    for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i];
+  } else if(color->colortype == LCT_GREY) {
+    /*error: this chunk must be 2 bytes for grayscale image*/
+    if(chunkLength != 2) return 30;
+
+    color->key_defined = 1;
+    color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1];
+  } else if(color->colortype == LCT_RGB) {
+    /*error: this chunk must be 6 bytes for RGB image*/
+    if(chunkLength != 6) return 41;
+
+    color->key_defined = 1;
+    color->key_r = 256u * data[0] + data[1];
+    color->key_g = 256u * data[2] + data[3];
+    color->key_b = 256u * data[4] + data[5];
+  }
+  else return 42; /*error: tRNS chunk not allowed for other color models*/
+
+  return 0; /* OK */
+}
+
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*background color chunk (bKGD)*/
+static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(info->color.colortype == LCT_PALETTE) {
+    /*error: this chunk must be 1 byte for indexed color image*/
+    if(chunkLength != 1) return 43;
+
+    /*error: invalid palette index, or maybe this chunk appeared before PLTE*/
+    if(data[0] >= info->color.palettesize) return 103;
+
+    info->background_defined = 1;
+    info->background_r = info->background_g = info->background_b = data[0];
+  } else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) {
+    /*error: this chunk must be 2 bytes for grayscale image*/
+    if(chunkLength != 2) return 44;
+
+    /*the values are truncated to bitdepth in the PNG file*/
+    info->background_defined = 1;
+    info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1];
+  } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) {
+    /*error: this chunk must be 6 bytes for grayscale image*/
+    if(chunkLength != 6) return 45;
+
+    /*the values are truncated to bitdepth in the PNG file*/
+    info->background_defined = 1;
+    info->background_r = 256u * data[0] + data[1];
+    info->background_g = 256u * data[2] + data[3];
+    info->background_b = 256u * data[4] + data[5];
+  }
+
+  return 0; /* OK */
+}
+
+/*text chunk (tEXt)*/
+static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  char *key = 0, *str = 0;
+  unsigned i;
+
+  while(!error) /*not really a while loop, only used to break on error*/ {
+    unsigned length, string2_begin;
+
+    length = 0;
+    while(length < chunkLength && data[length] != 0) ++length;
+    /*even though it's not allowed by the standard, no error is thrown if
+    there's no null termination char, if the text is empty*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    key[length] = 0;
+    for(i = 0; i != length; ++i) key[i] = (char)data[i];
+
+    string2_begin = length + 1; /*skip keyword null terminator*/
+
+    length = (unsigned)(chunkLength < string2_begin ? 0 : chunkLength - string2_begin);
+    str = (char*)lodepng_malloc(length + 1);
+    if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    str[length] = 0;
+    for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i];
+
+    error = lodepng_add_text(info, key, str);
+
+    break;
+  }
+
+  lodepng_free(key);
+  lodepng_free(str);
+
+  return error;
+}
+
+/*compressed text chunk (zTXt)*/
+static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+                               const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  unsigned i;
+
+  unsigned length, string2_begin;
+  char *key = 0;
+  ucvector decoded;
+
+  ucvector_init(&decoded);
+
+  while(!error) /*not really a while loop, only used to break on error*/ {
+    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+    if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    key[length] = 0;
+    for(i = 0; i != length; ++i) key[i] = (char)data[i];
+
+    if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+    string2_begin = length + 2;
+    if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+
+    length = (unsigned)chunkLength - string2_begin;
+    /*will fail if zlib error, e.g. if length is too small*/
+    error = zlib_decompress(&decoded.data, &decoded.size,
+                            &data[string2_begin],
+                            length, zlibsettings);
+    if(error) break;
+    ucvector_push_back(&decoded, 0);
+
+    error = lodepng_add_text(info, key, (char*)decoded.data);
+
+    break;
+  }
+
+  lodepng_free(key);
+  ucvector_cleanup(&decoded);
+
+  return error;
+}
+
+/*international text chunk (iTXt)*/
+static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+                               const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  unsigned i;
+
+  unsigned length, begin, compressed;
+  char *key = 0, *langtag = 0, *transkey = 0;
+  ucvector decoded;
+  ucvector_init(&decoded); /* TODO: only use in case of compressed text */
+
+  while(!error) /*not really a while loop, only used to break on error*/ {
+    /*Quick check if the chunk length isn't too small. Even without check
+    it'd still fail with other error checks below if it's too short. This just gives a different error code.*/
+    if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/
+
+    /*read the key*/
+    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+    if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    key[length] = 0;
+    for(i = 0; i != length; ++i) key[i] = (char)data[i];
+
+    /*read the compression method*/
+    compressed = data[length + 1];
+    if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+    /*even though it's not allowed by the standard, no error is thrown if
+    there's no null termination char, if the text is empty for the next 3 texts*/
+
+    /*read the langtag*/
+    begin = length + 3;
+    length = 0;
+    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+    langtag = (char*)lodepng_malloc(length + 1);
+    if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    langtag[length] = 0;
+    for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i];
+
+    /*read the transkey*/
+    begin += length + 1;
+    length = 0;
+    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+    transkey = (char*)lodepng_malloc(length + 1);
+    if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    transkey[length] = 0;
+    for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i];
+
+    /*read the actual text*/
+    begin += length + 1;
+
+    length = (unsigned)chunkLength < begin ? 0 : (unsigned)chunkLength - begin;
+
+    if(compressed) {
+      /*will fail if zlib error, e.g. if length is too small*/
+      error = zlib_decompress(&decoded.data, &decoded.size,
+                              &data[begin],
+                              length, zlibsettings);
+      if(error) break;
+      if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size;
+      ucvector_push_back(&decoded, 0);
+    } else {
+      if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/);
+
+      decoded.data[length] = 0;
+      for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i];
+    }
+
+    error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data);
+
+    break;
+  }
+
+  lodepng_free(key);
+  lodepng_free(langtag);
+  lodepng_free(transkey);
+  ucvector_cleanup(&decoded);
+
+  return error;
+}
+
+static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 7) return 73; /*invalid tIME chunk size*/
+
+  info->time_defined = 1;
+  info->time.year = 256u * data[0] + data[1];
+  info->time.month = data[2];
+  info->time.day = data[3];
+  info->time.hour = data[4];
+  info->time.minute = data[5];
+  info->time.second = data[6];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/
+
+  info->phys_defined = 1;
+  info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
+  info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7];
+  info->phys_unit = data[8];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_gAMA(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 4) return 96; /*invalid gAMA chunk size*/
+
+  info->gama_defined = 1;
+  info->gama_gamma = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_cHRM(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 32) return 97; /*invalid cHRM chunk size*/
+
+  info->chrm_defined = 1;
+  info->chrm_white_x = 16777216u * data[ 0] + 65536u * data[ 1] + 256u * data[ 2] + data[ 3];
+  info->chrm_white_y = 16777216u * data[ 4] + 65536u * data[ 5] + 256u * data[ 6] + data[ 7];
+  info->chrm_red_x   = 16777216u * data[ 8] + 65536u * data[ 9] + 256u * data[10] + data[11];
+  info->chrm_red_y   = 16777216u * data[12] + 65536u * data[13] + 256u * data[14] + data[15];
+  info->chrm_green_x = 16777216u * data[16] + 65536u * data[17] + 256u * data[18] + data[19];
+  info->chrm_green_y = 16777216u * data[20] + 65536u * data[21] + 256u * data[22] + data[23];
+  info->chrm_blue_x  = 16777216u * data[24] + 65536u * data[25] + 256u * data[26] + data[27];
+  info->chrm_blue_y  = 16777216u * data[28] + 65536u * data[29] + 256u * data[30] + data[31];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_sRGB(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 1) return 98; /*invalid sRGB chunk size (this one is never ignored)*/
+
+  info->srgb_defined = 1;
+  info->srgb_intent = data[0];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_iCCP(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+                               const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  unsigned i;
+
+  unsigned length, string2_begin;
+  ucvector decoded;
+
+  info->iccp_defined = 1;
+  if(info->iccp_name) lodepng_clear_icc(info);
+
+  for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+  if(length + 2 >= chunkLength) return 75; /*no null termination, corrupt?*/
+  if(length < 1 || length > 79) return 89; /*keyword too short or long*/
+
+  info->iccp_name = (char*)lodepng_malloc(length + 1);
+  if(!info->iccp_name) return 83; /*alloc fail*/
+
+  info->iccp_name[length] = 0;
+  for(i = 0; i != length; ++i) info->iccp_name[i] = (char)data[i];
+
+  if(data[length + 1] != 0) return 72; /*the 0 byte indicating compression must be 0*/
+
+  string2_begin = length + 2;
+  if(string2_begin > chunkLength) return 75; /*no null termination, corrupt?*/
+
+  length = (unsigned)chunkLength - string2_begin;
+  ucvector_init(&decoded);
+  error = zlib_decompress(&decoded.data, &decoded.size,
+                          &data[string2_begin],
+                          length, zlibsettings);
+  if(!error) {
+    if(decoded.size) {
+      info->iccp_profile_size = decoded.size;
+      info->iccp_profile = (unsigned char*)lodepng_malloc(decoded.size);
+      if(info->iccp_profile) {
+        lodepng_memcpy(info->iccp_profile, decoded.data, decoded.size);
+      } else {
+        error = 83; /* alloc fail */
+      }
+    } else {
+      error = 100; /*invalid ICC profile size*/
+    }
+  }
+  ucvector_cleanup(&decoded);
+  return error;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos,
+                               const unsigned char* in, size_t insize) {
+  const unsigned char* chunk = in + pos;
+  unsigned chunkLength;
+  const unsigned char* data;
+  unsigned unhandled = 0;
+  unsigned error = 0;
+
+  if(pos + 4 > insize) return 30;
+  chunkLength = lodepng_chunk_length(chunk);
+  if(chunkLength > 2147483647) return 63;
+  data = lodepng_chunk_data_const(chunk);
+  if(data + chunkLength + 4 > in + insize) return 30;
+
+  if(lodepng_chunk_type_equals(chunk, "PLTE")) {
+    error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "tRNS")) {
+    error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  } else if(lodepng_chunk_type_equals(chunk, "bKGD")) {
+    error = readChunk_bKGD(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "tEXt")) {
+    error = readChunk_tEXt(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
+    error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
+    error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
+    error = readChunk_tIME(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "pHYs")) {
+    error = readChunk_pHYs(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "gAMA")) {
+    error = readChunk_gAMA(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "cHRM")) {
+    error = readChunk_cHRM(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "sRGB")) {
+    error = readChunk_sRGB(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
+    error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  } else {
+    /* unhandled chunk is ok (is not an error) */
+    unhandled = 1;
+  }
+
+  if(!error && !unhandled && !state->decoder.ignore_crc) {
+    if(lodepng_chunk_check_crc(chunk)) return 57; /*invalid CRC*/
+  }
+
+  return error;
+}
+
+/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/
+static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h,
+                          LodePNGState* state,
+                          const unsigned char* in, size_t insize) {
+  unsigned char IEND = 0;
+  const unsigned char* chunk;
+  size_t i;
+  ucvector idat; /*the data from idat chunks*/
+  unsigned char* scanlines = 0;
+  size_t scanlines_size = 0, expected_size = 0;
+  size_t outsize = 0;
+
+  /*for unknown chunk order*/
+  unsigned unknown = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+
+  /* safe output values in case error happens */
+  *out = 0;
+  *w = *h = 0;
+
+  state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/
+  if(state->error) return;
+
+  if(lodepng_pixel_overflow(*w, *h, &state->info_png.color, &state->info_raw)) {
+    CERROR_RETURN(state->error, 92); /*overflow possible due to amount of pixels*/
+  }
+
+  ucvector_init(&idat);
+  chunk = &in[33]; /*first byte of the first chunk after the header*/
+
+  /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
+  IDAT data is put at the start of the in buffer*/
+  while(!IEND && !state->error) {
+    unsigned chunkLength;
+    const unsigned char* data; /*the data in the chunk*/
+
+    /*error: size of the in buffer too small to contain next chunk*/
+    if((size_t)((chunk - in) + 12) > insize || chunk < in) {
+      if(state->decoder.ignore_end) break; /*other errors may still happen though*/
+      CERROR_BREAK(state->error, 30);
+    }
+
+    /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/
+    chunkLength = lodepng_chunk_length(chunk);
+    /*error: chunk length larger than the max PNG chunk size*/
+    if(chunkLength > 2147483647) {
+      if(state->decoder.ignore_end) break; /*other errors may still happen though*/
+      CERROR_BREAK(state->error, 63);
+    }
+
+    if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in) {
+      CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/
+    }
+
+    data = lodepng_chunk_data_const(chunk);
+
+    unknown = 0;
+
+    /*IDAT chunk, containing compressed image data*/
+    if(lodepng_chunk_type_equals(chunk, "IDAT")) {
+      size_t oldsize = idat.size;
+      size_t newsize;
+      if(lodepng_addofl(oldsize, chunkLength, &newsize)) CERROR_BREAK(state->error, 95);
+      if(!ucvector_resize(&idat, newsize)) CERROR_BREAK(state->error, 83 /*alloc fail*/);
+      for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i];
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      critical_pos = 3;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    } else if(lodepng_chunk_type_equals(chunk, "IEND")) {
+      /*IEND chunk*/
+      IEND = 1;
+    } else if(lodepng_chunk_type_equals(chunk, "PLTE")) {
+      /*palette chunk (PLTE)*/
+      state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
+      if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      critical_pos = 2;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    } else if(lodepng_chunk_type_equals(chunk, "tRNS")) {
+      /*palette transparency chunk (tRNS). Even though this one is an ancillary chunk , it is still compiled
+      in without 'LODEPNG_COMPILE_ANCILLARY_CHUNKS' because it contains essential color information that
+      affects the alpha channel of pixels. */
+      state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
+      if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      /*background color chunk (bKGD)*/
+    } else if(lodepng_chunk_type_equals(chunk, "bKGD")) {
+      state->error = readChunk_bKGD(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "tEXt")) {
+      /*text chunk (tEXt)*/
+      if(state->decoder.read_text_chunks) {
+        state->error = readChunk_tEXt(&state->info_png, data, chunkLength);
+        if(state->error) break;
+      }
+    } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
+      /*compressed text chunk (zTXt)*/
+      if(state->decoder.read_text_chunks) {
+        state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+        if(state->error) break;
+      }
+    } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
+      /*international text chunk (iTXt)*/
+      if(state->decoder.read_text_chunks) {
+        state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+        if(state->error) break;
+      }
+    } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
+      state->error = readChunk_tIME(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "pHYs")) {
+      state->error = readChunk_pHYs(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "gAMA")) {
+      state->error = readChunk_gAMA(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "cHRM")) {
+      state->error = readChunk_cHRM(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "sRGB")) {
+      state->error = readChunk_sRGB(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
+      state->error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+      if(state->error) break;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    } else /*it's not an implemented chunk type, so ignore it: skip over the data*/ {
+      /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/
+      if(!state->decoder.ignore_critical && !lodepng_chunk_ancillary(chunk)) {
+        CERROR_BREAK(state->error, 69);
+      }
+
+      unknown = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      if(state->decoder.remember_unknown_chunks) {
+        state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1],
+                                            &state->info_png.unknown_chunks_size[critical_pos - 1], chunk);
+        if(state->error) break;
+      }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    }
+
+    if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/ {
+      if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
+    }
+
+    if(!IEND) chunk = lodepng_chunk_next_const(chunk, in + insize);
+  }
+
+  if(state->info_png.color.colortype == LCT_PALETTE && !state->info_png.color.palette) {
+    state->error = 106; /* error: PNG file must have PLTE chunk if color type is palette */
+  }
+
+  /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
+  If the decompressed size does not match the prediction, the image must be corrupt.*/
+  if(state->info_png.interlace_method == 0) {
+    size_t bpp = lodepng_get_bpp(&state->info_png.color);
+    expected_size = lodepng_get_raw_size_idat(*w, *h, bpp);
+  } else {
+    size_t bpp = lodepng_get_bpp(&state->info_png.color);
+    /*Adam-7 interlaced: expected size is the sum of the 7 sub-images sizes*/
+    expected_size = 0;
+    expected_size += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, bpp);
+    if(*w > 4) expected_size += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, bpp);
+    expected_size += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, bpp);
+    if(*w > 2) expected_size += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, bpp);
+    expected_size += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, bpp);
+    if(*w > 1) expected_size += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, bpp);
+    expected_size += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, bpp);
+  }
+  if(!state->error) {
+    /* This allocated data will be realloced by zlib_decompress, initially at
+    smaller size again. But the fact that it's already allocated at full size
+    here speeds the multiple reallocs up. TODO: make zlib_decompress support
+    receiving already allocated buffer with expected size instead. */
+    scanlines = (unsigned char*)lodepng_malloc(expected_size);
+    if(!scanlines) state->error = 83; /*alloc fail*/
+    scanlines_size = 0;
+  }
+  if(!state->error) {
+    state->error = zlib_decompress(&scanlines, &scanlines_size, idat.data,
+                                   idat.size, &state->decoder.zlibsettings);
+    if(!state->error && scanlines_size != expected_size) state->error = 91; /*decompressed size doesn't match prediction*/
+  }
+  ucvector_cleanup(&idat);
+
+  if(!state->error) {
+    outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
+    *out = (unsigned char*)lodepng_malloc(outsize);
+    if(!*out) state->error = 83; /*alloc fail*/
+  }
+  if(!state->error) {
+    lodepng_memset(*out, 0, outsize);
+    state->error = postProcessScanlines(*out, scanlines, *w, *h, &state->info_png);
+  }
+  lodepng_free(scanlines);
+}
+
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+                        LodePNGState* state,
+                        const unsigned char* in, size_t insize) {
+  *out = 0;
+  decodeGeneric(out, w, h, state, in, insize);
+  if(state->error) return state->error;
+  if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color)) {
+    /*same color type, no copying or converting of data needed*/
+    /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype
+    the raw image has to the end user*/
+    if(!state->decoder.color_convert) {
+      state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color);
+      if(state->error) return state->error;
+    }
+  } else { /*color conversion needed*/
+    unsigned char* data = *out;
+    size_t outsize;
+
+    /*TODO: check if this works according to the statement in the documentation: "The converter can convert
+    from grayscale input color type, to 8-bit grayscale or grayscale with alpha"*/
+    if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA)
+       && !(state->info_raw.bitdepth == 8)) {
+      return 56; /*unsupported color mode conversion*/
+    }
+
+    outsize = lodepng_get_raw_size(*w, *h, &state->info_raw);
+    *out = (unsigned char*)lodepng_malloc(outsize);
+    if(!(*out)) {
+      state->error = 83; /*alloc fail*/
+    }
+    else state->error = lodepng_convert(*out, data, &state->info_raw,
+                                        &state->info_png.color, *w, *h);
+    lodepng_free(data);
+  }
+  return state->error;
+}
+
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in,
+                               size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned error;
+  LodePNGState state;
+  lodepng_state_init(&state);
+  state.info_raw.colortype = colortype;
+  state.info_raw.bitdepth = bitdepth;
+  error = lodepng_decode(out, w, h, &state, in, insize);
+  lodepng_state_cleanup(&state);
+  return error;
+}
+
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) {
+  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) {
+  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename,
+                             LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer = 0;
+  size_t buffersize;
+  unsigned error;
+  /* safe output values in case error happens */
+  *out = 0;
+  *w = *h = 0;
+  error = lodepng_load_file(&buffer, &buffersize, filename);
+  if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth);
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) {
+  return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) {
+  return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings) {
+  settings->color_convert = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  settings->read_text_chunks = 1;
+  settings->remember_unknown_chunks = 0;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  settings->ignore_crc = 0;
+  settings->ignore_critical = 0;
+  settings->ignore_end = 0;
+  lodepng_decompress_settings_init(&settings->zlibsettings);
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+
+void lodepng_state_init(LodePNGState* state) {
+#ifdef LODEPNG_COMPILE_DECODER
+  lodepng_decoder_settings_init(&state->decoder);
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+  lodepng_encoder_settings_init(&state->encoder);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+  lodepng_color_mode_init(&state->info_raw);
+  lodepng_info_init(&state->info_png);
+  state->error = 1;
+}
+
+void lodepng_state_cleanup(LodePNGState* state) {
+  lodepng_color_mode_cleanup(&state->info_raw);
+  lodepng_info_cleanup(&state->info_png);
+}
+
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source) {
+  lodepng_state_cleanup(dest);
+  *dest = *source;
+  lodepng_color_mode_init(&dest->info_raw);
+  lodepng_info_init(&dest->info_png);
+  dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return;
+  dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return;
+}
+
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Encoder                                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*chunkName must be string of 4 characters*/
+static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length) {
+  CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data));
+  out->allocsize = out->size; /*fix the allocsize again*/
+  return 0;
+}
+
+static void writeSignature(ucvector* out) {
+  /*8 bytes PNG signature, aka the magic bytes*/
+  ucvector_push_back(out, 137);
+  ucvector_push_back(out, 80);
+  ucvector_push_back(out, 78);
+  ucvector_push_back(out, 71);
+  ucvector_push_back(out, 13);
+  ucvector_push_back(out, 10);
+  ucvector_push_back(out, 26);
+  ucvector_push_back(out, 10);
+}
+
+static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
+                              LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method) {
+  unsigned char data[13];
+
+  lodepng_set32bitInt(data + 0, w); /*width*/
+  lodepng_set32bitInt(data + 4, h); /*height*/
+  data[8] = (unsigned char)bitdepth; /*bit depth*/
+  data[9] = (unsigned char)colortype; /*color type*/
+  data[10] = 0; /*compression method*/
+  data[11] = 0; /*filter method*/
+  data[12] = interlace_method; /*interlace method*/
+
+  return addChunk(out, "IHDR", data, sizeof(data));
+}
+
+static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info) {
+  unsigned error = 0;
+  size_t i;
+  ucvector PLTE;
+  ucvector_init(&PLTE);
+  for(i = 0; i != info->palettesize * 4; ++i) {
+    /*add all channels except alpha channel*/
+    if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]);
+  }
+  error = addChunk(out, "PLTE", PLTE.data, PLTE.size);
+  ucvector_cleanup(&PLTE);
+
+  return error;
+}
+
+static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info) {
+  unsigned error = 0;
+  size_t i;
+  ucvector tRNS;
+  ucvector_init(&tRNS);
+  if(info->colortype == LCT_PALETTE) {
+    size_t amount = info->palettesize;
+    /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
+    for(i = info->palettesize; i != 0; --i) {
+      if(info->palette[4 * (i - 1) + 3] == 255) --amount;
+      else break;
+    }
+    /*add only alpha channel*/
+    for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]);
+  } else if(info->colortype == LCT_GREY) {
+    if(info->key_defined) {
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
+    }
+  } else if(info->colortype == LCT_RGB) {
+    if(info->key_defined) {
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255));
+    }
+  }
+
+  error = addChunk(out, "tRNS", tRNS.data, tRNS.size);
+  ucvector_cleanup(&tRNS);
+
+  return error;
+}
+
+static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
+                              LodePNGCompressSettings* zlibsettings) {
+  ucvector zlibdata;
+  unsigned error = 0;
+
+  /*compress with the Zlib compressor*/
+  ucvector_init(&zlibdata);
+  error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings);
+  if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size);
+  ucvector_cleanup(&zlibdata);
+
+  return error;
+}
+
+static unsigned addChunk_IEND(ucvector* out) {
+  unsigned error = 0;
+  error = addChunk(out, "IEND", 0, 0);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring) {
+  unsigned error = 0;
+  size_t i;
+  ucvector text;
+  ucvector_init(&text);
+  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&text, 0); /*0 termination char*/
+  for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]);
+  error = addChunk(out, "tEXt", text.data, text.size);
+  ucvector_cleanup(&text);
+
+  return error;
+}
+
+static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
+                              LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  ucvector data, compressed;
+  size_t i, textsize = lodepng_strlen(textstring);
+
+  ucvector_init(&data);
+  ucvector_init(&compressed);
+  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&data, 0); /*0 termination char*/
+  ucvector_push_back(&data, 0); /*compression method: 0*/
+
+  error = zlib_compress(&compressed.data, &compressed.size,
+                        (const unsigned char*)textstring, textsize, zlibsettings);
+  if(!error) {
+    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
+    error = addChunk(out, "zTXt", data.data, data.size);
+  }
+
+  ucvector_cleanup(&compressed);
+  ucvector_cleanup(&data);
+  return error;
+}
+
+static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag,
+                              const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  ucvector data;
+  size_t i, textsize = lodepng_strlen(textstring);
+
+  ucvector_init(&data);
+
+  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&data, 0); /*null termination char*/
+  ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/
+  ucvector_push_back(&data, 0); /*compression method*/
+  for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]);
+  ucvector_push_back(&data, 0); /*null termination char*/
+  for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]);
+  ucvector_push_back(&data, 0); /*null termination char*/
+
+  if(compressed) {
+    ucvector compressed_data;
+    ucvector_init(&compressed_data);
+    error = zlib_compress(&compressed_data.data, &compressed_data.size,
+                          (const unsigned char*)textstring, textsize, zlibsettings);
+    if(!error) {
+      for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]);
+    }
+    ucvector_cleanup(&compressed_data);
+  } else /*not compressed*/ {
+    for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]);
+  }
+
+  if(!error) error = addChunk(out, "iTXt", data.data, data.size);
+  ucvector_cleanup(&data);
+  return error;
+}
+
+static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) {
+  unsigned char data[6];
+  size_t size = 0;
+  if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) {
+    data[0] = (unsigned char)(info->background_r >> 8);
+    data[1] = (unsigned char)(info->background_r & 255);
+    size = 2;
+  } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) {
+    data[0] = (unsigned char)(info->background_r >> 8);
+    data[1] = (unsigned char)(info->background_r & 255);
+    data[2] = (unsigned char)(info->background_g >> 8);
+    data[3] = (unsigned char)(info->background_g & 255);
+    data[4] = (unsigned char)(info->background_b >> 8);
+    data[5] = (unsigned char)(info->background_b & 255);
+    size = 6;
+  } else if(info->color.colortype == LCT_PALETTE) {
+    data[0] =(unsigned char)(info->background_r & 255); /*palette index*/
+    size = 1;
+  }
+  return addChunk(out, "bKGD", data, size);
+}
+
+static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time) {
+  unsigned char data[7];
+  data[0] = (unsigned char)(time->year >> 8);
+  data[1] = (unsigned char)(time->year & 255);
+  data[2] = (unsigned char)time->month;
+  data[3] = (unsigned char)time->day;
+  data[4] = (unsigned char)time->hour;
+  data[5] = (unsigned char)time->minute;
+  data[6] = (unsigned char)time->second;
+  return addChunk(out, "tIME", data, sizeof(data));
+}
+
+static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info) {
+  unsigned char data[9];
+  lodepng_set32bitInt(data + 0, info->phys_x);
+  lodepng_set32bitInt(data + 4, info->phys_y);  data[8] = info->phys_unit;
+  return addChunk(out, "pHYs", data, sizeof(data));
+}
+
+static unsigned addChunk_gAMA(ucvector* out, const LodePNGInfo* info) {
+  unsigned char data[4];
+  lodepng_set32bitInt(data, info->gama_gamma);
+  return addChunk(out, "gAMA", data, sizeof(data));
+}
+
+static unsigned addChunk_cHRM(ucvector* out, const LodePNGInfo* info) {
+  unsigned char data[32];
+  lodepng_set32bitInt(data + 0, info->chrm_white_x);
+  lodepng_set32bitInt(data + 4, info->chrm_white_y);
+  lodepng_set32bitInt(data + 8, info->chrm_red_x);
+  lodepng_set32bitInt(data + 12, info->chrm_red_y);
+  lodepng_set32bitInt(data + 16, info->chrm_green_x);
+  lodepng_set32bitInt(data + 20, info->chrm_green_y);
+  lodepng_set32bitInt(data + 24, info->chrm_blue_x);
+  lodepng_set32bitInt(data + 28, info->chrm_blue_y);
+  return addChunk(out, "cHRM", data, sizeof(data));
+}
+
+static unsigned addChunk_sRGB(ucvector* out, const LodePNGInfo* info) {
+  unsigned char data = info->srgb_intent;
+  return addChunk(out, "sRGB", &data, 1);
+}
+
+static unsigned addChunk_iCCP(ucvector* out, const LodePNGInfo* info, LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  ucvector data, compressed;
+  size_t i;
+
+  ucvector_init(&data);
+  ucvector_init(&compressed);
+  for(i = 0; info->iccp_name[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)info->iccp_name[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&data, 0); /*0 termination char*/
+  ucvector_push_back(&data, 0); /*compression method: 0*/
+
+  error = zlib_compress(&compressed.data, &compressed.size,
+                        info->iccp_profile, info->iccp_profile_size, zlibsettings);
+  if(!error) {
+    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
+    error = addChunk(out, "iCCP", data.data, data.size);
+  }
+
+  ucvector_cleanup(&compressed);
+  ucvector_cleanup(&data);
+  return error;
+}
+
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline,
+                           size_t length, size_t bytewidth, unsigned char filterType) {
+  size_t i;
+  switch(filterType) {
+    case 0: /*None*/
+      for(i = 0; i != length; ++i) out[i] = scanline[i];
+      break;
+    case 1: /*Sub*/
+      for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+      for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth];
+      break;
+    case 2: /*Up*/
+      if(prevline) {
+        for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i];
+      } else {
+        for(i = 0; i != length; ++i) out[i] = scanline[i];
+      }
+      break;
+    case 3: /*Average*/
+      if(prevline) {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1);
+        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1);
+      } else {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1);
+      }
+      break;
+    case 4: /*Paeth*/
+      if(prevline) {
+        /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/
+        for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]);
+        for(i = bytewidth; i < length; ++i) {
+          out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth]));
+        }
+      } else {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+        /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/
+        for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
+      }
+      break;
+    default: return; /*nonexistent filter type given*/
+  }
+}
+
+/* integer binary logarithm */
+static size_t ilog2(size_t i) {
+  size_t result = 0;
+  while(i >= 65536) { result += 16; i >>= 16; }
+  while(i >= 256) { result += 8; i >>= 8; }
+  while(i >= 16) { result += 4; i >>= 4; }
+  while(i >= 2) { result += 1; i >>= 1; }
+  return result;
+}
+
+/* integer approximation for i * log2(i), helper function for LFS_ENTROPY */
+static size_t ilog2i(size_t i) {
+  size_t l;
+  if(i == 0) return 0;
+  l = ilog2(i);
+  /* approximate i*log2(i): l is integer logarithm, ((i - (1u << l)) << 1u)
+  linearly approximates the missing fractional part multiplied by i */
+  return i * l + ((i - (1u << l)) << 1u);
+}
+
+static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
+                       const LodePNGColorMode* color, const LodePNGEncoderSettings* settings) {
+  /*
+  For PNG filter method 0
+  out must be a buffer with as size: h + (w * h * bpp + 7u) / 8u, because there are
+  the scanlines with 1 extra byte per scanline
+  */
+
+  unsigned bpp = lodepng_get_bpp(color);
+  /*the width of a scanline in bytes, not including the filter type*/
+  size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u;
+
+  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+  size_t bytewidth = (bpp + 7u) / 8u;
+  const unsigned char* prevline = 0;
+  unsigned x, y;
+  unsigned error = 0;
+  LodePNGFilterStrategy strategy = settings->filter_strategy;
+
+  /*
+  There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard:
+   *  If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e.
+      use fixed filtering, with the filter None).
+   * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is
+     not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply
+     all five filters and select the filter that produces the smallest sum of absolute values per row.
+  This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true.
+
+  If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed,
+  but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum
+  heuristic is used.
+  */
+  if(settings->filter_palette_zero &&
+     (color->colortype == LCT_PALETTE || color->bitdepth < 8)) strategy = LFS_ZERO;
+
+  if(bpp == 0) return 31; /*error: invalid color type*/
+
+  if(strategy >= LFS_ZERO && strategy <= LFS_FOUR) {
+    unsigned char type = (unsigned char)strategy;
+    for(y = 0; y != h; ++y) {
+      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+      size_t inindex = linebytes * y;
+      out[outindex] = type; /*filter type byte*/
+      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
+      prevline = &in[inindex];
+    }
+  } else if(strategy == LFS_MINSUM) {
+    /*adaptive filtering*/
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t smallest = 0;
+    unsigned char type, bestType = 0;
+
+    for(type = 0; type != 5; ++type) {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) error = 83; /*alloc fail*/
+    }
+
+    if(!error) {
+      for(y = 0; y != h; ++y) {
+        /*try the 5 filter types*/
+        for(type = 0; type != 5; ++type) {
+          size_t sum = 0;
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+
+          /*calculate the sum of the result*/
+          if(type == 0) {
+            for(x = 0; x != linebytes; ++x) sum += (unsigned char)(attempt[type][x]);
+          } else {
+            for(x = 0; x != linebytes; ++x) {
+              /*For differences, each byte should be treated as signed, values above 127 are negative
+              (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there.
+              This means filtertype 0 is almost never chosen, but that is justified.*/
+              unsigned char s = attempt[type][x];
+              sum += s < 128 ? s : (255U - s);
+            }
+          }
+
+          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || sum < smallest) {
+            bestType = type;
+            smallest = sum;
+          }
+        }
+
+        prevline = &in[y * linebytes];
+
+        /*now fill the out values*/
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+      }
+    }
+
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  } else if(strategy == LFS_ENTROPY) {
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t bestSum = 0;
+    unsigned type, bestType = 0;
+    unsigned count[256];
+
+    for(type = 0; type != 5; ++type) {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) error = 83; /*alloc fail*/
+    }
+
+    if(!error) {
+      for(y = 0; y != h; ++y) {
+        /*try the 5 filter types*/
+        for(type = 0; type != 5; ++type) {
+          size_t sum = 0;
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+          lodepng_memset(count, 0, 256 * sizeof(*count));
+          for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
+          ++count[type]; /*the filter type itself is part of the scanline*/
+          for(x = 0; x != 256; ++x) {
+            sum += ilog2i(count[x]);
+          }
+          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || sum > bestSum) {
+            bestType = type;
+            bestSum = sum;
+          }
+        }
+
+        prevline = &in[y * linebytes];
+
+        /*now fill the out values*/
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+      }
+    }
+
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  } else if(strategy == LFS_PREDEFINED) {
+    for(y = 0; y != h; ++y) {
+      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+      size_t inindex = linebytes * y;
+      unsigned char type = settings->predefined_filters[y];
+      out[outindex] = type; /*filter type byte*/
+      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
+      prevline = &in[inindex];
+    }
+  } else if(strategy == LFS_BRUTE_FORCE) {
+    /*brute force filter chooser.
+    deflate the scanline after every filter attempt to see which one deflates best.
+    This is very slow and gives only slightly smaller, sometimes even larger, result*/
+    size_t size[5];
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t smallest = 0;
+    unsigned type = 0, bestType = 0;
+    unsigned char* dummy;
+    LodePNGCompressSettings zlibsettings;
+    lodepng_memcpy(&zlibsettings, &settings->zlibsettings, sizeof(LodePNGCompressSettings));
+    /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
+    to simulate the true case where the tree is the same for the whole image. Sometimes it gives
+    better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
+    cases better compression. It does make this a bit less slow, so it's worth doing this.*/
+    zlibsettings.btype = 1;
+    /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG
+    images only, so disable it*/
+    zlibsettings.custom_zlib = 0;
+    zlibsettings.custom_deflate = 0;
+    for(type = 0; type != 5; ++type) {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) error = 83; /*alloc fail*/
+    }
+    if(!error) {
+      for(y = 0; y != h; ++y) /*try the 5 filter types*/ {
+        for(type = 0; type != 5; ++type) {
+          unsigned testsize = (unsigned)linebytes;
+          /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
+
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+          size[type] = 0;
+          dummy = 0;
+          zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
+          lodepng_free(dummy);
+          /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || size[type] < smallest) {
+            bestType = type;
+            smallest = size[type];
+          }
+        }
+        prevline = &in[y * linebytes];
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+      }
+    }
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  }
+  else return 88; /* unknown filter strategy */
+
+  return error;
+}
+
+static void addPaddingBits(unsigned char* out, const unsigned char* in,
+                           size_t olinebits, size_t ilinebits, unsigned h) {
+  /*The opposite of the removePaddingBits function
+  olinebits must be >= ilinebits*/
+  unsigned y;
+  size_t diff = olinebits - ilinebits;
+  size_t obp = 0, ibp = 0; /*bit pointers*/
+  for(y = 0; y != h; ++y) {
+    size_t x;
+    for(x = 0; x < ilinebits; ++x) {
+      unsigned char bit = readBitFromReversedStream(&ibp, in);
+      setBitOfReversedStream(&obp, out, bit);
+    }
+    /*obp += diff; --> no, fill in some value in the padding bits too, to avoid
+    "Use of uninitialised value of size ###" warning from valgrind*/
+    for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0);
+  }
+}
+
+/*
+in: non-interlaced image with size w*h
+out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with
+ no padding bits between scanlines, but between reduced images so that each
+ reduced image starts at a byte.
+bpp: bits per pixel
+there are no padding bits, not between scanlines, not between reduced images
+in has the following size in bits: w * h * bpp.
+out is possibly bigger due to padding bits between reduced images
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+  unsigned passw[7], passh[7];
+  size_t filter_passstart[8], padded_passstart[8], passstart[8];
+  unsigned i;
+
+  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+  if(bpp >= 8) {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      size_t bytewidth = bpp / 8u;
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
+        size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+        for(b = 0; b < bytewidth; ++b) {
+          out[pixeloutstart + b] = in[pixelinstart + b];
+        }
+      }
+    }
+  } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      unsigned ilinebits = bpp * passw[i];
+      unsigned olinebits = bpp * w;
+      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
+        obp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+        for(b = 0; b < bpp; ++b) {
+          unsigned char bit = readBitFromReversedStream(&ibp, in);
+          setBitOfReversedStream(&obp, out, bit);
+        }
+      }
+    }
+  }
+}
+
+/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image.
+return value is error**/
+static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                    unsigned w, unsigned h,
+                                    const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings) {
+  /*
+  This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps:
+  *) if no Adam7: 1) add padding bits (= possible extra bits per scanline if bpp < 8) 2) filter
+  *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter
+  */
+  unsigned bpp = lodepng_get_bpp(&info_png->color);
+  unsigned error = 0;
+
+  if(info_png->interlace_method == 0) {
+    *outsize = h + (h * ((w * bpp + 7u) / 8u)); /*image size plus an extra byte per scanline + possible padding bits*/
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!(*out) && (*outsize)) error = 83; /*alloc fail*/
+
+    if(!error) {
+      /*non multiple of 8 bits per scanline, padding bits needed per scanline*/
+      if(bpp < 8 && w * bpp != ((w * bpp + 7u) / 8u) * 8u) {
+        unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7u) / 8u));
+        if(!padded) error = 83; /*alloc fail*/
+        if(!error) {
+          addPaddingBits(padded, in, ((w * bpp + 7u) / 8u) * 8u, w * bpp, h);
+          error = filter(*out, padded, w, h, &info_png->color, settings);
+        }
+        lodepng_free(padded);
+      } else {
+        /*we can immediately filter into the out buffer, no other steps needed*/
+        error = filter(*out, in, w, h, &info_png->color, settings);
+      }
+    }
+  } else /*interlace_method is 1 (Adam7)*/ {
+    unsigned passw[7], passh[7];
+    size_t filter_passstart[8], padded_passstart[8], passstart[8];
+    unsigned char* adam7;
+
+    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+    *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!(*out)) error = 83; /*alloc fail*/
+
+    adam7 = (unsigned char*)lodepng_malloc(passstart[7]);
+    if(!adam7 && passstart[7]) error = 83; /*alloc fail*/
+
+    if(!error) {
+      unsigned i;
+
+      Adam7_interlace(adam7, in, w, h, bpp);
+      for(i = 0; i != 7; ++i) {
+        if(bpp < 8) {
+          unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]);
+          if(!padded) ERROR_BREAK(83); /*alloc fail*/
+          addPaddingBits(padded, &adam7[passstart[i]],
+                         ((passw[i] * bpp + 7u) / 8u) * 8u, passw[i] * bpp, passh[i]);
+          error = filter(&(*out)[filter_passstart[i]], padded,
+                         passw[i], passh[i], &info_png->color, settings);
+          lodepng_free(padded);
+        } else {
+          error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]],
+                         passw[i], passh[i], &info_png->color, settings);
+        }
+
+        if(error) break;
+      }
+    }
+
+    lodepng_free(adam7);
+  }
+
+  return error;
+}
+
+/*
+palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA...
+returns 0 if the palette is opaque,
+returns 1 if the palette has a single color with alpha 0 ==> color key
+returns 2 if the palette is semi-translucent.
+*/
+static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize) {
+  size_t i;
+  unsigned key = 0;
+  unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/
+  for(i = 0; i != palettesize; ++i) {
+    if(!key && palette[4 * i + 3] == 0) {
+      r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2];
+      key = 1;
+      i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/
+    }
+    else if(palette[4 * i + 3] != 255) return 2;
+    /*when key, no opaque RGB may have key's RGB*/
+    else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2;
+  }
+  return key;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize) {
+  unsigned char* inchunk = data;
+  while((size_t)(inchunk - data) < datasize) {
+    CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
+    out->allocsize = out->size; /*fix the allocsize again*/
+    inchunk = lodepng_chunk_next(inchunk, data + datasize);
+  }
+  return 0;
+}
+
+static unsigned isGrayICCProfile(const unsigned char* profile, unsigned size) {
+  /*
+  It is a gray profile if bytes 16-19 are "GRAY", rgb profile if bytes 16-19
+  are "RGB ". We do not perform any full parsing of the ICC profile here, other
+  than check those 4 bytes to grayscale profile. Other than that, validity of
+  the profile is not checked. This is needed only because the PNG specification
+  requires using a non-gray color model if there is an ICC profile with "RGB "
+  (sadly limiting compression opportunities if the input data is grayscale RGB
+  data), and requires using a gray color model if it is "GRAY".
+  */
+  if(size < 20) return 0;
+  return profile[16] == 'G' &&  profile[17] == 'R' &&  profile[18] == 'A' &&  profile[19] == 'Y';
+}
+
+static unsigned isRGBICCProfile(const unsigned char* profile, unsigned size) {
+  /* See comment in isGrayICCProfile*/
+  if(size < 20) return 0;
+  return profile[16] == 'R' &&  profile[17] == 'G' &&  profile[18] == 'B' &&  profile[19] == ' ';
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+                        const unsigned char* image, unsigned w, unsigned h,
+                        LodePNGState* state) {
+  unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
+  size_t datasize = 0;
+  ucvector outv;
+  LodePNGInfo info;
+  const LodePNGInfo* info_png = &state->info_png;
+
+  ucvector_init(&outv);
+  lodepng_info_init(&info);
+
+  /*provide some proper output values if error will happen*/
+  *out = 0;
+  *outsize = 0;
+  state->error = 0;
+
+  /*check input values validity*/
+  if((info_png->color.colortype == LCT_PALETTE || state->encoder.force_palette)
+      && (info_png->color.palettesize == 0 || info_png->color.palettesize > 256)) {
+    state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/
+    goto cleanup;
+  }
+  if(state->encoder.zlibsettings.btype > 2) {
+    state->error = 61; /*error: nonexistent btype*/
+    goto cleanup;
+  }
+  if(info_png->interlace_method > 1) {
+    state->error = 71; /*error: nonexistent interlace mode*/
+    goto cleanup;
+  }
+  state->error = checkColorValidity(info_png->color.colortype, info_png->color.bitdepth);
+  if(state->error) goto cleanup; /*error: nonexistent color type given*/
+  state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
+  if(state->error) goto cleanup; /*error: nonexistent color type given*/
+
+  /* color convert and compute scanline filter types */
+  lodepng_info_copy(&info, &state->info_png);
+  if(state->encoder.auto_convert) {
+    LodePNGColorStats stats;
+    lodepng_color_stats_init(&stats);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    if(info_png->iccp_defined &&
+        isGrayICCProfile(info_png->iccp_profile, info_png->iccp_profile_size)) {
+      /*the PNG specification does not allow to use palette with a GRAY ICC profile, even
+      if the palette has only gray colors, so disallow it.*/
+      stats.allow_palette = 0;
+    }
+    if(info_png->iccp_defined &&
+        isRGBICCProfile(info_png->iccp_profile, info_png->iccp_profile_size)) {
+      /*the PNG specification does not allow to use grayscale color with RGB ICC profile, so disallow gray.*/
+      stats.allow_greyscale = 0;
+    }
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+    state->error = lodepng_compute_color_stats(&stats, image, w, h, &state->info_raw);
+    if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    if(info_png->background_defined) {
+      /*the background chunk's color must be taken into account as well*/
+      unsigned r = 0, g = 0, b = 0;
+      LodePNGColorMode mode16 = lodepng_color_mode_make(LCT_RGB, 16);
+      lodepng_convert_rgb(&r, &g, &b, info_png->background_r, info_png->background_g, info_png->background_b, &mode16, &info_png->color);
+      state->error = lodepng_color_stats_add(&stats, r, g, b, 65535);
+      if(state->error) goto cleanup;
+    }
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+    state->error = auto_choose_color(&info.color, &state->info_raw, &stats);
+    if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*also convert the background chunk*/
+    if(info_png->background_defined) {
+      if(lodepng_convert_rgb(&info.background_r, &info.background_g, &info.background_b,
+          info_png->background_r, info_png->background_g, info_png->background_b, &info.color, &info_png->color)) {
+        state->error = 104;
+        goto cleanup;
+      }
+    }
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+  }
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  if(info_png->iccp_defined) {
+    unsigned gray_icc = isGrayICCProfile(info_png->iccp_profile, info_png->iccp_profile_size);
+    unsigned rgb_icc = isRGBICCProfile(info_png->iccp_profile, info_png->iccp_profile_size);
+    unsigned gray_png = info.color.colortype == LCT_GREY || info.color.colortype == LCT_GREY_ALPHA;
+    if(!gray_icc && !rgb_icc) {
+      state->error = 100; /* Disallowed profile color type for PNG */
+      goto cleanup;
+    }
+    if(gray_icc != gray_png) {
+      /*Not allowed to use RGB/RGBA/palette with GRAY ICC profile or vice versa,
+      or in case of auto_convert, it wasn't possible to find appropriate model*/
+      state->error = state->encoder.auto_convert ? 102 : 101;
+      goto cleanup;
+    }
+  }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  if(!lodepng_color_mode_equal(&state->info_raw, &info.color)) {
+    unsigned char* converted;
+    size_t size = ((size_t)w * (size_t)h * (size_t)lodepng_get_bpp(&info.color) + 7u) / 8u;
+
+    converted = (unsigned char*)lodepng_malloc(size);
+    if(!converted && size) state->error = 83; /*alloc fail*/
+    if(!state->error) {
+      state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
+    }
+    if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
+    lodepng_free(converted);
+    if(state->error) goto cleanup;
+  }
+  else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
+
+  /* output all PNG chunks */ {
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    size_t i;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*write signature and chunks*/
+    writeSignature(&outv);
+    /*IHDR*/
+    addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*unknown chunks between IHDR and PLTE*/
+    if(info.unknown_chunks_data[0]) {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]);
+      if(state->error) goto cleanup;
+    }
+    /*color profile chunks must come before PLTE */
+    if(info.iccp_defined) addChunk_iCCP(&outv, &info, &state->encoder.zlibsettings);
+    if(info.srgb_defined) addChunk_sRGB(&outv, &info);
+    if(info.gama_defined) addChunk_gAMA(&outv, &info);
+    if(info.chrm_defined) addChunk_cHRM(&outv, &info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*PLTE*/
+    if(info.color.colortype == LCT_PALETTE) {
+      addChunk_PLTE(&outv, &info.color);
+    }
+    if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA)) {
+      addChunk_PLTE(&outv, &info.color);
+    }
+    /*tRNS*/
+    if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0) {
+      addChunk_tRNS(&outv, &info.color);
+    }
+    if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined) {
+      addChunk_tRNS(&outv, &info.color);
+    }
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*bKGD (must come between PLTE and the IDAt chunks*/
+    if(info.background_defined) {
+      state->error = addChunk_bKGD(&outv, &info);
+      if(state->error) goto cleanup;
+    }
+    /*pHYs (must come before the IDAT chunks)*/
+    if(info.phys_defined) addChunk_pHYs(&outv, &info);
+
+    /*unknown chunks between PLTE and IDAT*/
+    if(info.unknown_chunks_data[1]) {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]);
+      if(state->error) goto cleanup;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*IDAT (multiple IDAT chunks must be consecutive)*/
+    state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings);
+    if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*tIME*/
+    if(info.time_defined) addChunk_tIME(&outv, &info.time);
+    /*tEXt and/or zTXt*/
+    for(i = 0; i != info.text_num; ++i) {
+      if(lodepng_strlen(info.text_keys[i]) > 79) {
+        state->error = 66; /*text chunk too large*/
+        goto cleanup;
+      }
+      if(lodepng_strlen(info.text_keys[i]) < 1) {
+        state->error = 67; /*text chunk too small*/
+        goto cleanup;
+      }
+      if(state->encoder.text_compression) {
+        addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
+      } else {
+        addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
+      }
+    }
+    /*LodePNG version id in text chunk*/
+    if(state->encoder.add_id) {
+      unsigned already_added_id_text = 0;
+      for(i = 0; i != info.text_num; ++i) {
+        const char* k = info.text_keys[i];
+        /* Could use strcmp, but we're not calling or reimplementing this C library function for this use only */
+        if(k[0] == 'L' && k[1] == 'o' && k[2] == 'd' && k[3] == 'e' &&
+           k[4] == 'P' && k[5] == 'N' && k[6] == 'G' && k[7] == '\0') {
+          already_added_id_text = 1;
+          break;
+        }
+      }
+      if(already_added_id_text == 0) {
+        addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
+      }
+    }
+    /*iTXt*/
+    for(i = 0; i != info.itext_num; ++i) {
+      if(lodepng_strlen(info.itext_keys[i]) > 79) {
+        state->error = 66; /*text chunk too large*/
+        goto cleanup;
+      }
+      if(lodepng_strlen(info.itext_keys[i]) < 1) {
+        state->error = 67; /*text chunk too small*/
+        goto cleanup;
+      }
+      addChunk_iTXt(&outv, state->encoder.text_compression,
+                    info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
+                    &state->encoder.zlibsettings);
+    }
+
+    /*unknown chunks between IDAT and IEND*/
+    if(info.unknown_chunks_data[2]) {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]);
+      if(state->error) goto cleanup;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    addChunk_IEND(&outv);
+  }
+
+cleanup:
+  lodepng_info_cleanup(&info);
+  lodepng_free(data);
+
+  /*instead of cleaning the vector up, give it to the output*/
+  *out = outv.data;
+  *outsize = outv.size;
+
+  return state->error;
+}
+
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image,
+                               unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned error;
+  LodePNGState state;
+  lodepng_state_init(&state);
+  state.info_raw.colortype = colortype;
+  state.info_raw.bitdepth = bitdepth;
+  state.info_png.color.colortype = colortype;
+  state.info_png.color.bitdepth = bitdepth;
+  lodepng_encode(out, outsize, image, w, h, &state);
+  error = state.error;
+  lodepng_state_cleanup(&state);
+  return error;
+}
+
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h,
+                             LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth);
+  if(!error) error = lodepng_save_file(buffer, buffersize, filename);
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings) {
+  lodepng_compress_settings_init(&settings->zlibsettings);
+  settings->filter_palette_zero = 1;
+  settings->filter_strategy = LFS_MINSUM;
+  settings->auto_convert = 1;
+  settings->force_palette = 0;
+  settings->predefined_filters = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  settings->add_id = 0;
+  settings->text_compression = 1;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*
+This returns the description of a numerical error code in English. This is also
+the documentation of all the error codes.
+*/
+const char* lodepng_error_text(unsigned code) {
+  switch(code) {
+    case 0: return "no error, everything went ok";
+    case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/
+    case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/
+    case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/
+    case 13: return "problem while processing dynamic deflate block";
+    case 14: return "problem while processing dynamic deflate block";
+    case 15: return "problem while processing dynamic deflate block";
+    /*this error could happen if there are only 0 or 1 symbols present in the huffman code:*/
+    case 16: return "nonexistent code while processing dynamic deflate block";
+    case 17: return "end of out buffer memory reached while inflating";
+    case 18: return "invalid distance code while inflating";
+    case 19: return "end of out buffer memory reached while inflating";
+    case 20: return "invalid deflate block BTYPE encountered while decoding";
+    case 21: return "NLEN is not ones complement of LEN in a deflate block";
+
+    /*end of out buffer memory reached while inflating:
+    This can happen if the inflated deflate data is longer than the amount of bytes required to fill up
+    all the pixels of the image, given the color depth and image dimensions. Something that doesn't
+    happen in a normal, well encoded, PNG image.*/
+    case 22: return "end of out buffer memory reached while inflating";
+    case 23: return "end of in buffer memory reached while inflating";
+    case 24: return "invalid FCHECK in zlib header";
+    case 25: return "invalid compression method in zlib header";
+    case 26: return "FDICT encountered in zlib header while it's not used for PNG";
+    case 27: return "PNG file is smaller than a PNG header";
+    /*Checks the magic file header, the first 8 bytes of the PNG file*/
+    case 28: return "incorrect PNG signature, it's no PNG or corrupted";
+    case 29: return "first chunk is not the header chunk";
+    case 30: return "chunk length too large, chunk broken off at end of file";
+    case 31: return "illegal PNG color type or bpp";
+    case 32: return "illegal PNG compression method";
+    case 33: return "illegal PNG filter method";
+    case 34: return "illegal PNG interlace method";
+    case 35: return "chunk length of a chunk is too large or the chunk too small";
+    case 36: return "illegal PNG filter type encountered";
+    case 37: return "illegal bit depth for this color type given";
+    case 38: return "the palette is too small or too big"; /*0, or more than 256 colors*/
+    case 39: return "tRNS chunk before PLTE or has more entries than palette size";
+    case 40: return "tRNS chunk has wrong size for grayscale image";
+    case 41: return "tRNS chunk has wrong size for RGB image";
+    case 42: return "tRNS chunk appeared while it was not allowed for this color type";
+    case 43: return "bKGD chunk has wrong size for palette image";
+    case 44: return "bKGD chunk has wrong size for grayscale image";
+    case 45: return "bKGD chunk has wrong size for RGB image";
+    case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?";
+    case 49: return "jumped past memory while generating dynamic huffman tree";
+    case 50: return "jumped past memory while generating dynamic huffman tree";
+    case 51: return "jumped past memory while inflating huffman block";
+    case 52: return "jumped past memory while inflating";
+    case 53: return "size of zlib data too small";
+    case 54: return "repeat symbol in tree while there was no value symbol yet";
+    /*jumped past tree while generating huffman tree, this could be when the
+    tree will have more leaves than symbols after generating it out of the
+    given lengths. They call this an oversubscribed dynamic bit lengths tree in zlib.*/
+    case 55: return "jumped past tree while generating huffman tree";
+    case 56: return "given output image colortype or bitdepth not supported for color conversion";
+    case 57: return "invalid CRC encountered (checking CRC can be disabled)";
+    case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)";
+    case 59: return "requested color conversion not supported";
+    case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)";
+    case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)";
+    /*LodePNG leaves the choice of RGB to grayscale conversion formula to the user.*/
+    case 62: return "conversion from color to grayscale not supported";
+    /*(2^31-1)*/
+    case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk";
+    /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/
+    case 64: return "the length of the END symbol 256 in the Huffman tree is 0";
+    case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes";
+    case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
+    case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
+    case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
+    case 71: return "nonexistent interlace mode given to encoder (must be 0 or 1)";
+    case 72: return "while decoding, nonexistent compression method encountering in zTXt or iTXt chunk (it must be 0)";
+    case 73: return "invalid tIME chunk size";
+    case 74: return "invalid pHYs chunk size";
+    /*length could be wrong, or data chopped off*/
+    case 75: return "no null termination char found while decoding text chunk";
+    case 76: return "iTXt chunk too short to contain required bytes";
+    case 77: return "integer overflow in buffer size";
+    case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/
+    case 79: return "failed to open file for writing";
+    case 80: return "tried creating a tree of 0 symbols";
+    case 81: return "lazy matching at pos 0 is impossible";
+    case 82: return "color conversion to palette requested while a color isn't in palette, or index out of bounds";
+    case 83: return "memory allocation failed";
+    case 84: return "given image too small to contain all pixels to be encoded";
+    case 86: return "impossible offset in lz77 encoding (internal bug)";
+    case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined";
+    case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy";
+    case 89: return "text chunk keyword too short or long: must have size 1-79";
+    /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/
+    case 90: return "windowsize must be a power of two";
+    case 91: return "invalid decompressed idat size";
+    case 92: return "integer overflow due to too many pixels";
+    case 93: return "zero width or height is invalid";
+    case 94: return "header chunk must have a size of 13 bytes";
+    case 95: return "integer overflow with combined idat chunk size";
+    case 96: return "invalid gAMA chunk size";
+    case 97: return "invalid cHRM chunk size";
+    case 98: return "invalid sRGB chunk size";
+    case 99: return "invalid sRGB rendering intent";
+    case 100: return "invalid ICC profile color type, the PNG specification only allows RGB or GRAY";
+    case 101: return "PNG specification does not allow RGB ICC profile on gray color types and vice versa";
+    case 102: return "not allowed to set grayscale ICC profile with colored pixels by PNG specification";
+    case 103: return "invalid palette index in bKGD chunk. Maybe it came before PLTE chunk?";
+    case 104: return "invalid bKGD color while encoding (e.g. palette index out of range)";
+    case 105: return "integer overflow of bitsize";
+    case 106: return "PNG file must have PLTE chunk if color type is palette";
+    case 107: return "color convert from palette mode requested without setting the palette data in it";
+    case 108: return "tried to add more than 256 values to a palette";
+  }
+  return "unknown error code";
+}
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // C++ Wrapper                                                          // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng {
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename) {
+  long size = lodepng_filesize(filename.c_str());
+  if(size < 0) return 78;
+  buffer.resize((size_t)size);
+  return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str());
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename) {
+  return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str());
+}
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                    const LodePNGDecompressSettings& settings) {
+  unsigned char* buffer = 0;
+  size_t buffersize = 0;
+  unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                    const LodePNGDecompressSettings& settings) {
+  return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                  const LodePNGCompressSettings& settings) {
+  unsigned char* buffer = 0;
+  size_t buffersize = 0;
+  unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                  const LodePNGCompressSettings& settings) {
+  return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+
+
+#ifdef LODEPNG_COMPILE_PNG
+
+State::State() {
+  lodepng_state_init(this);
+}
+
+State::State(const State& other) {
+  lodepng_state_init(this);
+  lodepng_state_copy(this, &other);
+}
+
+State::~State() {
+  lodepng_state_cleanup(this);
+}
+
+State& State::operator=(const State& other) {
+  lodepng_state_copy(this, &other);
+  return *this;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
+                size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer;
+  unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
+  if(buffer && !error) {
+    State state;
+    state.info_raw.colortype = colortype;
+    state.info_raw.bitdepth = bitdepth;
+    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth) {
+  return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const unsigned char* in, size_t insize) {
+  unsigned char* buffer = NULL;
+  unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize);
+  if(buffer && !error) {
+    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+  }
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const std::vector<unsigned char>& in) {
+  return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  std::vector<unsigned char> buffer;
+  /* safe output values in case error happens */
+  w = h = 0;
+  unsigned error = load_file(buffer, filename);
+  if(error) return error;
+  return decode(out, w, h, buffer, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+  return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                State& state) {
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                State& state) {
+  if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
+  return encode(out, in.empty() ? 0 : &in[0], w, h, state);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned encode(const std::string& filename,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  std::vector<unsigned char> buffer;
+  unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
+  if(!error) error = save_file(buffer, filename);
+  return error;
+}
+
+unsigned encode(const std::string& filename,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+  return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_PNG */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.h b/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.h
new file mode 100644
index 0000000000..6ce3976245
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng.h
@@ -0,0 +1,1945 @@
+/*
+LodePNG version 20200215
+
+Copyright (c) 2005-2020 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#ifndef LODEPNG_H
+#define LODEPNG_H
+
+#include <string.h> /*for size_t*/
+
+extern const char* LODEPNG_VERSION_STRING;
+
+/*
+The following #defines are used to create code sections. They can be disabled
+to disable code sections, which can give faster compile time and smaller binary.
+The "NO_COMPILE" defines are designed to be used to pass as defines to the
+compiler command to disable them without modifying this header, e.g.
+-DLODEPNG_NO_COMPILE_ZLIB for gcc.
+In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
+allow implementing a custom lodepng_crc32.
+*/
+/*deflate & zlib. If disabled, you must specify alternative zlib functions in
+the custom_zlib field of the compress and decompress settings*/
+#ifndef LODEPNG_NO_COMPILE_ZLIB
+#define LODEPNG_COMPILE_ZLIB
+#endif
+
+/*png encoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_PNG
+#define LODEPNG_COMPILE_PNG
+#endif
+
+/*deflate&zlib decoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_DECODER
+#define LODEPNG_COMPILE_DECODER
+#endif
+
+/*deflate&zlib encoder and png encoder*/
+#ifndef LODEPNG_NO_COMPILE_ENCODER
+#define LODEPNG_COMPILE_ENCODER
+#endif
+
+/*the optional built in harddisk file loading and saving functions*/
+#ifndef LODEPNG_NO_COMPILE_DISK
+#define LODEPNG_COMPILE_DISK
+#endif
+
+/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
+#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
+#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
+#endif
+
+/*ability to convert error numerical codes to English text string*/
+#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
+#define LODEPNG_COMPILE_ERROR_TEXT
+#endif
+
+/*Compile the default allocators (C's free, malloc and realloc). If you disable this,
+you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
+source files with custom allocators.*/
+#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
+#define LODEPNG_COMPILE_ALLOCATORS
+#endif
+
+/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
+#ifdef __cplusplus
+#ifndef LODEPNG_NO_COMPILE_CPP
+#define LODEPNG_COMPILE_CPP
+#endif
+#endif
+
+#ifdef LODEPNG_COMPILE_CPP
+#include <vector>
+#include <string>
+#endif /*LODEPNG_COMPILE_CPP*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*The PNG color types (also used for raw image).*/
+typedef enum LodePNGColorType {
+  LCT_GREY = 0, /*grayscale: 1,2,4,8,16 bit*/
+  LCT_RGB = 2, /*RGB: 8,16 bit*/
+  LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/
+  LCT_GREY_ALPHA = 4, /*grayscale with alpha: 8,16 bit*/
+  LCT_RGBA = 6, /*RGB with alpha: 8,16 bit*/
+  /*LCT_MAX_OCTET_VALUE lets the compiler allow this enum to represent any invalid
+  byte value from 0 to 255 that could be present in an invalid PNG file header. Do
+  not use, compare with or set the name LCT_MAX_OCTET_VALUE, instead either use
+  the valid color type names above, or numeric values like 1 or 7 when checking for
+  particular disallowed color type byte values, or cast to integer to print it.*/
+  LCT_MAX_OCTET_VALUE = 255
+} LodePNGColorType;
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Converts PNG data in memory to raw pixel data.
+out: Output parameter. Pointer to buffer that will contain the raw pixel data.
+     After decoding, its size is w * h * (bytes per pixel) bytes larger than
+     initially. Bytes per pixel depends on colortype and bitdepth.
+     Must be freed after usage with free(*out).
+     Note: for 16-bit per channel colors, uses big endian format like PNG does.
+w: Output parameter. Pointer to width of pixel data.
+h: Output parameter. Pointer to height of pixel data.
+in: Memory buffer with the PNG file.
+insize: size of the in buffer.
+colortype: the desired color type for the raw output image. See explanation on PNG color types.
+bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h,
+                               const unsigned char* in, size_t insize,
+                               LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h,
+                          const unsigned char* in, size_t insize);
+
+/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h,
+                          const unsigned char* in, size_t insize);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load PNG from disk, from file with given name.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h,
+                             const char* filename,
+                             LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h,
+                               const char* filename);
+
+/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h,
+                               const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Converts raw pixel data into a PNG image in memory. The colortype and bitdepth
+  of the output PNG image cannot be chosen, they are automatically determined
+  by the colortype, bitdepth and content of the input pixel data.
+  Note: for 16-bit per channel colors, needs big endian format like PNG does.
+out: Output parameter. Pointer to buffer that will contain the PNG image data.
+     Must be freed after usage with free(*out).
+outsize: Output parameter. Pointer to the size in bytes of the out buffer.
+image: The raw pixel data to encode. The size of this buffer should be
+       w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth.
+w: width of the raw pixel data in pixels.
+h: height of the raw pixel data in pixels.
+colortype: the color type of the raw input image. See explanation on PNG color types.
+bitdepth: the bit depth of the raw input image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize,
+                               const unsigned char* image, unsigned w, unsigned h,
+                               LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize,
+                          const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize,
+                          const unsigned char* image, unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned lodepng_encode_file(const char* filename,
+                             const unsigned char* image, unsigned w, unsigned h,
+                             LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32_file(const char* filename,
+                               const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24_file(const char* filename,
+                               const unsigned char* image, unsigned w, unsigned h);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng {
+#ifdef LODEPNG_COMPILE_DECODER
+/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
+is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const unsigned char* in, size_t insize,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::vector<unsigned char>& in,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts PNG file from disk to raw pixel data in memory.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::string& filename,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
+is that of the raw input data. The output PNG color type will be auto chosen.*/
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts 32-bit RGBA raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned encode(const std::string& filename,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(const std::string& filename,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*Returns an English description of the numerical error code.*/
+const char* lodepng_error_text(unsigned code);
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Settings for zlib decompression*/
+typedef struct LodePNGDecompressSettings LodePNGDecompressSettings;
+struct LodePNGDecompressSettings {
+  /* Check LodePNGDecoderSettings for more ignorable errors such as ignore_crc */
+  unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
+  unsigned ignore_nlen; /*ignore complement of len checksum in uncompressed blocks*/
+
+  /*use custom zlib decoder instead of built in one (default: null)*/
+  unsigned (*custom_zlib)(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGDecompressSettings*);
+  /*use custom deflate decoder instead of built in one (default: null)
+  if custom_zlib is not null, custom_inflate is ignored (the zlib format uses deflate)*/
+  unsigned (*custom_inflate)(unsigned char**, size_t*,
+                             const unsigned char*, size_t,
+                             const LodePNGDecompressSettings*);
+
+  const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Settings for zlib compression. Tweaking these settings tweaks the balance
+between speed and compression ratio.
+*/
+typedef struct LodePNGCompressSettings LodePNGCompressSettings;
+struct LodePNGCompressSettings /*deflate = compress*/ {
+  /*LZ77 related settings*/
+  unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/
+  unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/
+  unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/
+  unsigned minmatch; /*minimum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/
+  unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/
+  unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/
+
+  /*use custom zlib encoder instead of built in one (default: null)*/
+  unsigned (*custom_zlib)(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGCompressSettings*);
+  /*use custom deflate encoder instead of built in one (default: null)
+  if custom_zlib is used, custom_deflate is ignored since only the built in
+  zlib function will call custom_deflate*/
+  unsigned (*custom_deflate)(unsigned char**, size_t*,
+                             const unsigned char*, size_t,
+                             const LodePNGCompressSettings*);
+
+  const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGCompressSettings lodepng_default_compress_settings;
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*
+Color mode of an image. Contains all information required to decode the pixel
+bits to RGBA colors. This information is the same as used in the PNG file
+format, and is used both for PNG and raw image data in LodePNG.
+*/
+typedef struct LodePNGColorMode {
+  /*header (IHDR)*/
+  LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/
+  unsigned bitdepth;  /*bits per sample, see PNG standard or documentation further in this header file*/
+
+  /*
+  palette (PLTE and tRNS)
+
+  Dynamically allocated with the colors of the palette, including alpha.
+  This field may not be allocated directly, use lodepng_color_mode_init first,
+  then lodepng_palette_add per color to correctly initialize it (to ensure size
+  of exactly 1024 bytes).
+
+  The alpha channels must be set as well, set them to 255 for opaque images.
+
+  When decoding, by default you can ignore this palette, since LodePNG already
+  fills the palette colors in the pixels of the raw RGBA output.
+
+  The palette is only supported for color type 3.
+  */
+  unsigned char* palette; /*palette in RGBARGBA... order. Must be either 0, or when allocated must have 1024 bytes*/
+  size_t palettesize; /*palette size in number of colors (amount of used bytes is 4 * palettesize)*/
+
+  /*
+  transparent color key (tRNS)
+
+  This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit.
+  For grayscale PNGs, r, g and b will all 3 be set to the same.
+
+  When decoding, by default you can ignore this information, since LodePNG sets
+  pixels with this key to transparent already in the raw RGBA output.
+
+  The color key is only supported for color types 0 and 2.
+  */
+  unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/
+  unsigned key_r;       /*red/grayscale component of color key*/
+  unsigned key_g;       /*green component of color key*/
+  unsigned key_b;       /*blue component of color key*/
+} LodePNGColorMode;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_color_mode_init(LodePNGColorMode* info);
+void lodepng_color_mode_cleanup(LodePNGColorMode* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source);
+/* Makes a temporary LodePNGColorMode that does not need cleanup (no palette) */
+LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth);
+
+void lodepng_palette_clear(LodePNGColorMode* info);
+/*add 1 color to the palette*/
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a);
+
+/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info);
+/*get the amount of color channels used, based on colortype in the struct.
+If a palette is used, it counts as 1 channel.*/
+unsigned lodepng_get_channels(const LodePNGColorMode* info);
+/*is it a grayscale type? (only colortype 0 or 4)*/
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info);
+/*has it got an alpha channel? (only colortype 2 or 6)*/
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info);
+/*has it got a palette? (only colortype 3)*/
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info);
+/*only returns true if there is a palette and there is a value in the palette with alpha < 255.
+Loops through the palette to check this.*/
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info);
+/*
+Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image.
+Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels).
+Returns false if the image can only have opaque pixels.
+In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values,
+or if "key_defined" is true.
+*/
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info);
+/*Returns the byte size of a raw image buffer with given width, height and color mode*/
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*The information of a Time chunk in PNG.*/
+typedef struct LodePNGTime {
+  unsigned year;    /*2 bytes used (0-65535)*/
+  unsigned month;   /*1-12*/
+  unsigned day;     /*1-31*/
+  unsigned hour;    /*0-23*/
+  unsigned minute;  /*0-59*/
+  unsigned second;  /*0-60 (to allow for leap seconds)*/
+} LodePNGTime;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*Information about the PNG image, except pixels, width and height.*/
+typedef struct LodePNGInfo {
+  /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/
+  unsigned compression_method;/*compression method of the original file. Always 0.*/
+  unsigned filter_method;     /*filter method of the original file*/
+  unsigned interlace_method;  /*interlace method of the original file: 0=none, 1=Adam7*/
+  LodePNGColorMode color;     /*color type and bits, palette and transparency of the PNG file*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*
+  Suggested background color chunk (bKGD)
+
+  This uses the same color mode and bit depth as the PNG (except no alpha channel),
+  with values truncated to the bit depth in the unsigned integer.
+
+  For grayscale and palette PNGs, the value is stored in background_r. The values
+  in background_g and background_b are then unused.
+
+  So when decoding, you may get these in a different color mode than the one you requested
+  for the raw pixels.
+
+  When encoding with auto_convert, you must use the color model defined in info_png.color for
+  these values. The encoder normally ignores info_png.color when auto_convert is on, but will
+  use it to interpret these values (and convert copies of them to its chosen color model).
+
+  When encoding, avoid setting this to an expensive color, such as a non-gray value
+  when the image is gray, or the compression will be worse since it will be forced to
+  write the PNG with a more expensive color mode (when auto_convert is on).
+
+  The decoder does not use this background color to edit the color of pixels. This is a
+  completely optional metadata feature.
+  */
+  unsigned background_defined; /*is a suggested background color given?*/
+  unsigned background_r;       /*red/gray/palette component of suggested background color*/
+  unsigned background_g;       /*green component of suggested background color*/
+  unsigned background_b;       /*blue component of suggested background color*/
+
+  /*
+  non-international text chunks (tEXt and zTXt)
+
+  The char** arrays each contain num strings. The actual messages are in
+  text_strings, while text_keys are keywords that give a short description what
+  the actual text represents, e.g. Title, Author, Description, or anything else.
+
+  All the string fields below including keys, names and language tags are null terminated.
+  The PNG specification uses null characters for the keys, names and tags, and forbids null
+  characters to appear in the main text which is why we can use null termination everywhere here.
+
+  A keyword is minimum 1 character and maximum 79 characters long. It's
+  discouraged to use a single line length longer than 79 characters for texts.
+
+  Don't allocate these text buffers yourself. Use the init/cleanup functions
+  correctly and use lodepng_add_text and lodepng_clear_text.
+  */
+  size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
+  char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
+  char** text_strings; /*the actual text*/
+
+  /*
+  international text chunks (iTXt)
+  Similar to the non-international text chunks, but with additional strings
+  "langtags" and "transkeys".
+  */
+  size_t itext_num; /*the amount of international texts in this PNG*/
+  char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
+  char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/
+  char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/
+  char** itext_strings; /*the actual international text - UTF-8 string*/
+
+  /*time chunk (tIME)*/
+  unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/
+  LodePNGTime time;
+
+  /*phys chunk (pHYs)*/
+  unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/
+  unsigned phys_x; /*pixels per unit in x direction*/
+  unsigned phys_y; /*pixels per unit in y direction*/
+  unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
+
+  /*
+  Color profile related chunks: gAMA, cHRM, sRGB, iCPP
+
+  LodePNG does not apply any color conversions on pixels in the encoder or decoder and does not interpret these color
+  profile values. It merely passes on the information. If you wish to use color profiles and convert colors, please
+  use these values with a color management library.
+
+  See the PNG, ICC and sRGB specifications for more information about the meaning of these values.
+  */
+
+  /* gAMA chunk: optional, overridden by sRGB or iCCP if those are present. */
+  unsigned gama_defined; /* Whether a gAMA chunk is present (0 = not present, 1 = present). */
+  unsigned gama_gamma;   /* Gamma exponent times 100000 */
+
+  /* cHRM chunk: optional, overridden by sRGB or iCCP if those are present. */
+  unsigned chrm_defined; /* Whether a cHRM chunk is present (0 = not present, 1 = present). */
+  unsigned chrm_white_x; /* White Point x times 100000 */
+  unsigned chrm_white_y; /* White Point y times 100000 */
+  unsigned chrm_red_x;   /* Red x times 100000 */
+  unsigned chrm_red_y;   /* Red y times 100000 */
+  unsigned chrm_green_x; /* Green x times 100000 */
+  unsigned chrm_green_y; /* Green y times 100000 */
+  unsigned chrm_blue_x;  /* Blue x times 100000 */
+  unsigned chrm_blue_y;  /* Blue y times 100000 */
+
+  /*
+  sRGB chunk: optional. May not appear at the same time as iCCP.
+  If gAMA is also present gAMA must contain value 45455.
+  If cHRM is also present cHRM must contain respectively 31270,32900,64000,33000,30000,60000,15000,6000.
+  */
+  unsigned srgb_defined; /* Whether an sRGB chunk is present (0 = not present, 1 = present). */
+  unsigned srgb_intent;  /* Rendering intent: 0=perceptual, 1=rel. colorimetric, 2=saturation, 3=abs. colorimetric */
+
+  /*
+  iCCP chunk: optional. May not appear at the same time as sRGB.
+
+  LodePNG does not parse or use the ICC profile (except its color space header field for an edge case), a
+  separate library to handle the ICC data (not included in LodePNG) format is needed to use it for color
+  management and conversions.
+
+  For encoding, if iCCP is present, gAMA and cHRM are recommended to be added as well with values that match the ICC
+  profile as closely as possible, if you wish to do this you should provide the correct values for gAMA and cHRM and
+  enable their '_defined' flags since LodePNG will not automatically compute them from the ICC profile.
+
+  For encoding, the ICC profile is required by the PNG specification to be an "RGB" profile for non-gray
+  PNG color types and a "GRAY" profile for gray PNG color types. If you disable auto_convert, you must ensure
+  the ICC profile type matches your requested color type, else the encoder gives an error. If auto_convert is
+  enabled (the default), and the ICC profile is not a good match for the pixel data, this will result in an encoder
+  error if the pixel data has non-gray pixels for a GRAY profile, or a silent less-optimal compression of the pixel
+  data if the pixels could be encoded as grayscale but the ICC profile is RGB.
+
+  To avoid this do not set an ICC profile in the image unless there is a good reason for it, and when doing so
+  make sure you compute it carefully to avoid the above problems.
+  */
+  unsigned iccp_defined;      /* Whether an iCCP chunk is present (0 = not present, 1 = present). */
+  char* iccp_name;            /* Null terminated string with profile name, 1-79 bytes */
+  /*
+  The ICC profile in iccp_profile_size bytes.
+  Don't allocate this buffer yourself. Use the init/cleanup functions
+  correctly and use lodepng_set_icc and lodepng_clear_icc.
+  */
+  unsigned char* iccp_profile;
+  unsigned iccp_profile_size; /* The size of iccp_profile in bytes */
+
+  /* End of color profile related chunks */
+
+
+  /*
+  unknown chunks: chunks not known by LodePNG, passed on byte for byte.
+
+  There are 3 buffers, one for each position in the PNG where unknown chunks can appear.
+  Each buffer contains all unknown chunks for that position consecutively.
+  The 3 positions are:
+  0: between IHDR and PLTE, 1: between PLTE and IDAT, 2: between IDAT and IEND.
+
+  For encoding, do not store critical chunks or known chunks that are enabled with a "_defined" flag
+  above in here, since the encoder will blindly follow this and could then encode an invalid PNG file
+  (such as one with two IHDR chunks or the disallowed combination of sRGB with iCCP). But do use
+  this if you wish to store an ancillary chunk that is not supported by LodePNG (such as sPLT or hIST),
+  or any non-standard PNG chunk.
+
+  Do not allocate or traverse this data yourself. Use the chunk traversing functions declared
+  later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct.
+  */
+  unsigned char* unknown_chunks_data[3];
+  size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGInfo;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_info_init(LodePNGInfo* info);
+void lodepng_info_cleanup(LodePNGInfo* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/
+void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
+
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+                           const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/
+void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/
+
+/*replaces if exists*/
+unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size);
+void lodepng_clear_icc(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*
+Converts raw buffer from one color type to another color type, based on
+LodePNGColorMode structs to describe the input and output color type.
+See the reference manual at the end of this header file to see which color conversions are supported.
+return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported)
+The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel
+of the output color type (lodepng_get_bpp).
+For < 8 bpp images, there should not be padding bits at the end of scanlines.
+For 16-bit per channel colors, uses big endian format like PNG does.
+Return value is LodePNG error code
+*/
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+                         unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Settings for the decoder. This contains settings for the PNG and the Zlib
+decoder, but not the Info settings from the Info structs.
+*/
+typedef struct LodePNGDecoderSettings {
+  LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/
+
+  /* Check LodePNGDecompressSettings for more ignorable errors such as ignore_adler32 */
+  unsigned ignore_crc; /*ignore CRC checksums*/
+  unsigned ignore_critical; /*ignore unknown critical chunks*/
+  unsigned ignore_end; /*ignore issues at end of file if possible (missing IEND chunk, too large chunk, ...)*/
+  /* TODO: make a system involving warnings with levels and a strict mode instead. Other potentially recoverable
+     errors: srgb rendering intent value, size of content of ancillary chunks, more than 79 characters for some
+     strings, placement/combination rules for ancillary chunks, crc of unknown chunks, allowed characters
+     in string keys, etc... */
+
+  unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
+  /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
+  unsigned remember_unknown_chunks;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGDecoderSettings;
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/
+typedef enum LodePNGFilterStrategy {
+  /*every filter at zero*/
+  LFS_ZERO = 0,
+  /*every filter at 1, 2, 3 or 4 (paeth), unlike LFS_ZERO not a good choice, but for testing*/
+  LFS_ONE = 1,
+  LFS_TWO = 2,
+  LFS_THREE = 3,
+  LFS_FOUR = 4,
+  /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/
+  LFS_MINSUM,
+  /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending
+  on the image, this is better or worse than minsum.*/
+  LFS_ENTROPY,
+  /*
+  Brute-force-search PNG filters by compressing each filter for each scanline.
+  Experimental, very slow, and only rarely gives better compression than MINSUM.
+  */
+  LFS_BRUTE_FORCE,
+  /*use predefined_filters buffer: you specify the filter type for each scanline*/
+  LFS_PREDEFINED
+} LodePNGFilterStrategy;
+
+/*Gives characteristics about the integer RGBA colors of the image (count, alpha channel usage, bit depth, ...),
+which helps decide which color model to use for encoding.
+Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/
+typedef struct LodePNGColorStats {
+  unsigned colored; /*not grayscale*/
+  unsigned key; /*image is not opaque and color key is possible instead of full alpha*/
+  unsigned short key_r; /*key values, always as 16-bit, in 8-bit case the byte is duplicated, e.g. 65535 means 255*/
+  unsigned short key_g;
+  unsigned short key_b;
+  unsigned alpha; /*image is not opaque and alpha channel or alpha palette required*/
+  unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16 or allow_palette is disabled.*/
+  unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order, only valid when numcolors is valid*/
+  unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for grayscale only. 16 if 16-bit per channel required.*/
+  size_t numpixels;
+
+  /*user settings for computing/using the stats*/
+  unsigned allow_palette; /*default 1. if 0, disallow choosing palette colortype in auto_choose_color, and don't count numcolors*/
+  unsigned allow_greyscale; /*default 1. if 0, choose RGB or RGBA even if the image only has gray colors*/
+} LodePNGColorStats;
+
+void lodepng_color_stats_init(LodePNGColorStats* stats);
+
+/*Get a LodePNGColorStats of the image. The stats must already have been inited.
+Returns error code (e.g. alloc fail) or 0 if ok.*/
+unsigned lodepng_compute_color_stats(LodePNGColorStats* stats,
+                                     const unsigned char* image, unsigned w, unsigned h,
+                                     const LodePNGColorMode* mode_in);
+
+/*Settings for the encoder.*/
+typedef struct LodePNGEncoderSettings {
+  LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/
+
+  unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/
+
+  /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than
+  8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to
+  completely follow the official PNG heuristic, filter_palette_zero must be true and
+  filter_strategy must be LFS_MINSUM*/
+  unsigned filter_palette_zero;
+  /*Which filter strategy to use when not using zeroes due to filter_palette_zero.
+  Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/
+  LodePNGFilterStrategy filter_strategy;
+  /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with
+  the same length as the amount of scanlines in the image, and each value must <= 5. You
+  have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero
+  must be set to 0 to ensure this is also used on palette or low bitdepth images.*/
+  const unsigned char* predefined_filters;
+
+  /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
+  If colortype is 3, PLTE is _always_ created.*/
+  unsigned force_palette;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*add LodePNG identifier and version as a text chunk, for debugging*/
+  unsigned add_id;
+  /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/
+  unsigned text_compression;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGEncoderSettings;
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+/*The settings, state and information for extended encoding and decoding.*/
+typedef struct LodePNGState {
+#ifdef LODEPNG_COMPILE_DECODER
+  LodePNGDecoderSettings decoder; /*the decoding settings*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+  LodePNGEncoderSettings encoder; /*the encoding settings*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+  LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
+  LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
+  unsigned error;
+} LodePNGState;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_state_init(LodePNGState* state);
+void lodepng_state_cleanup(LodePNGState* state);
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source);
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and
+getting much more information about the PNG image and color mode.
+*/
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+                        LodePNGState* state,
+                        const unsigned char* in, size_t insize);
+
+/*
+Read the PNG header, but not the actual data. This returns only the information
+that is in the IHDR chunk of the PNG, such as width, height and color type. The
+information is placed in the info_png field of the LodePNGState.
+*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h,
+                         LodePNGState* state,
+                         const unsigned char* in, size_t insize);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/*
+Reads one metadata chunk (other than IHDR) of the PNG file and outputs what it
+read in the state. Returns error code on failure.
+Use lodepng_inspect first with a new state, then e.g. lodepng_chunk_find_const
+to find the desired chunk type, and if non null use lodepng_inspect_chunk (with
+chunk_pointer - start_of_file as pos).
+Supports most metadata chunks from the PNG standard (gAMA, bKGD, tEXt, ...).
+Ignores unsupported, unknown, non-metadata or IHDR chunks (without error).
+Requirements: &in[pos] must point to start of a chunk, must use regular
+lodepng_inspect first since format of most other chunks depends on IHDR, and if
+there is a PLTE chunk, that one must be inspected before tRNS or bKGD.
+*/
+unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos,
+                               const unsigned char* in, size_t insize);
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+                        const unsigned char* image, unsigned w, unsigned h,
+                        LodePNGState* state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*
+The lodepng_chunk functions are normally not needed, except to traverse the
+unknown chunks stored in the LodePNGInfo struct, or add new ones to it.
+It also allows traversing the chunks of an encoded PNG file yourself.
+
+The chunk pointer always points to the beginning of the chunk itself, that is
+the first byte of the 4 length bytes.
+
+In the PNG file format, chunks have the following format:
+-4 bytes length: length of the data of the chunk in bytes (chunk itself is 12 bytes longer)
+-4 bytes chunk type (ASCII a-z,A-Z only, see below)
+-length bytes of data (may be 0 bytes if length was 0)
+-4 bytes of CRC, computed on chunk name + data
+
+The first chunk starts at the 8th byte of the PNG file, the entire rest of the file
+exists out of concatenated chunks with the above format.
+
+PNG standard chunk ASCII naming conventions:
+-First byte: uppercase = critical, lowercase = ancillary
+-Second byte: uppercase = public, lowercase = private
+-Third byte: must be uppercase
+-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy
+*/
+
+/*
+Gets the length of the data of the chunk. Total chunk length has 12 bytes more.
+There must be at least 4 bytes to read from. If the result value is too large,
+it may be corrupt data.
+*/
+unsigned lodepng_chunk_length(const unsigned char* chunk);
+
+/*puts the 4-byte type in null terminated string*/
+void lodepng_chunk_type(char type[5], const unsigned char* chunk);
+
+/*check if the type is the given type*/
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type);
+
+/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk);
+
+/*0: public, 1: private (see PNG standard)*/
+unsigned char lodepng_chunk_private(const unsigned char* chunk);
+
+/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk);
+
+/*get pointer to the data of the chunk, where the input points to the header of the chunk*/
+unsigned char* lodepng_chunk_data(unsigned char* chunk);
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk);
+
+/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk);
+
+/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/
+void lodepng_chunk_generate_crc(unsigned char* chunk);
+
+/*
+Iterate to next chunks, allows iterating through all chunks of the PNG file.
+Input must be at the beginning of a chunk (result of a previous lodepng_chunk_next call,
+or the 8th byte of a PNG file which always has the first chunk), or alternatively may
+point to the first byte of the PNG file (which is not a chunk but the magic header, the
+function will then skip over it and return the first real chunk).
+Will output pointer to the start of the next chunk, or at or beyond end of the file if there
+is no more chunk after this or possibly if the chunk is corrupt.
+Start this process at the 8th byte of the PNG file.
+In a non-corrupt PNG file, the last chunk should have name "IEND".
+*/
+unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end);
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end);
+
+/*Finds the first chunk with the given type in the range [chunk, end), or returns NULL if not found.*/
+unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]);
+const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]);
+
+/*
+Appends chunk to the data in out. The given chunk should already have its chunk header.
+The out variable and outlength are updated to reflect the new reallocated buffer.
+Returns error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk);
+
+/*
+Appends new chunk to out. The chunk to append is given by giving its length, type
+and data separately. The type is a 4-letter string.
+The out variable and outlength are updated to reflect the new reallocated buffer.
+Returne error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data);
+
+
+/*Calculate CRC32 of buffer*/
+unsigned lodepng_crc32(const unsigned char* buf, size_t len);
+#endif /*LODEPNG_COMPILE_PNG*/
+
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*
+This zlib part can be used independently to zlib compress and decompress a
+buffer. It cannot be used to create gzip files however, and it only supports the
+part of zlib that is required for PNG, it does not support dictionaries.
+*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGDecompressSettings* settings);
+
+/*
+Decompresses Zlib data. Reallocates the out buffer and appends the data. The
+data must be according to the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize,
+                                 const unsigned char* in, size_t insize,
+                                 const LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Compresses data with Zlib. Reallocates the out buffer and appends the data.
+Zlib adds a small header and trailer around the deflate data.
+The data is output in the format of the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize,
+                               const unsigned char* in, size_t insize,
+                               const LodePNGCompressSettings* settings);
+
+/*
+Find length-limited Huffman code for given frequencies. This function is in the
+public interface only for tests, it's used internally by lodepng_deflate.
+*/
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+                                      size_t numcodes, unsigned maxbitlen);
+
+/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGCompressSettings* settings);
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into buffer. The function allocates the out buffer, and
+after usage you should free it.
+out: output parameter, contains pointer to loaded buffer.
+outsize: output parameter, size of the allocated out buffer
+filename: the path to the file to load
+return value: error code (0 means ok)
+*/
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename);
+
+/*
+Save a file from buffer to disk. Warning, if it exists, this function overwrites
+the file without warning!
+buffer: the buffer to write
+buffersize: size of the buffer to write
+filename: the path to the file to save to
+return value: error code (0 means ok)
+*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+
+#ifdef LODEPNG_COMPILE_CPP
+/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
+namespace lodepng {
+#ifdef LODEPNG_COMPILE_PNG
+class State : public LodePNGState {
+  public:
+    State();
+    State(const State& other);
+    ~State();
+    State& operator=(const State& other);
+};
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Same as other lodepng::decode, but using a State for more settings and information. */
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const unsigned char* in, size_t insize);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const std::vector<unsigned char>& in);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Same as other lodepng::encode, but using a State for more settings and information. */
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                State& state);
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                State& state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into an std::vector.
+return value: error code (0 means ok)
+*/
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
+
+/*
+Save the binary data in an std::vector to a file on disk. The file is overwritten
+without warning.
+*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_PNG */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+/* Zlib-decompress an unsigned char buffer */
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+
+/* Zlib-decompress an std::vector */
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Zlib-compress an unsigned char buffer */
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+
+/* Zlib-compress an std::vector */
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+
+/*
+TODO:
+[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often
+[.] check compatibility with various compilers  - done but needs to be redone for every newer version
+[X] converting color to 16-bit per channel types
+[X] support color profile chunk types (but never let them touch RGB values by default)
+[ ] support all public PNG chunk types (almost done except sBIT, sPLT and hIST)
+[ ] make sure encoder generates no chunks with size > (2^31)-1
+[ ] partial decoding (stream processing)
+[X] let the "isFullyOpaque" function check color keys and transparent palettes too
+[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl"
+[ ] allow treating some errors like warnings, when image is recoverable (e.g. 69, 57, 58)
+[ ] make warnings like: oob palette, checksum fail, data after iend, wrong/unknown crit chunk, no null terminator in text, ...
+[ ] error messages with line numbers (and version)
+[ ] errors in state instead of as return code?
+[ ] new errors/warnings like suspiciously big decompressed ztxt or iccp chunk
+[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
+[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
+[ ] allow user to give data (void*) to custom allocator
+[ ] provide alternatives for C library functions not present on some platforms (memcpy, ...)
+[ ] rename "grey" to "gray" everywhere since "color" also uses US spelling (keep "grey" copies for backwards compatibility)
+*/
+
+#endif /*LODEPNG_H inclusion guard*/
+
+/*
+LodePNG Documentation
+---------------------
+
+0. table of contents
+--------------------
+
+  1. about
+   1.1. supported features
+   1.2. features not supported
+  2. C and C++ version
+  3. security
+  4. decoding
+  5. encoding
+  6. color conversions
+    6.1. PNG color types
+    6.2. color conversions
+    6.3. padding bits
+    6.4. A note about 16-bits per channel and endianness
+  7. error values
+  8. chunks and PNG editing
+  9. compiler support
+  10. examples
+   10.1. decoder C++ example
+   10.2. decoder C example
+  11. state settings reference
+  12. changes
+  13. contact information
+
+
+1. about
+--------
+
+PNG is a file format to store raster images losslessly with good compression,
+supporting different color types and alpha channel.
+
+LodePNG is a PNG codec according to the Portable Network Graphics (PNG)
+Specification (Second Edition) - W3C Recommendation 10 November 2003.
+
+The specifications used are:
+
+*) Portable Network Graphics (PNG) Specification (Second Edition):
+     http://www.w3.org/TR/2003/REC-PNG-20031110
+*) RFC 1950 ZLIB Compressed Data Format version 3.3:
+     http://www.gzip.org/zlib/rfc-zlib.html
+*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3:
+     http://www.gzip.org/zlib/rfc-deflate.html
+
+The most recent version of LodePNG can currently be found at
+http://lodev.org/lodepng/
+
+LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds
+extra functionality.
+
+LodePNG exists out of two files:
+-lodepng.h: the header file for both C and C++
+-lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage
+
+If you want to start using LodePNG right away without reading this doc, get the
+examples from the LodePNG website to see how to use it in code, or check the
+smaller examples in chapter 13 here.
+
+LodePNG is simple but only supports the basic requirements. To achieve
+simplicity, the following design choices were made: There are no dependencies
+on any external library. There are functions to decode and encode a PNG with
+a single function call, and extended versions of these functions taking a
+LodePNGState struct allowing to specify or get more information. By default
+the colors of the raw image are always RGB or RGBA, no matter what color type
+the PNG file uses. To read and write files, there are simple functions to
+convert the files to/from buffers in memory.
+
+This all makes LodePNG suitable for loading textures in games, demos and small
+programs, ... It's less suitable for full fledged image editors, loading PNGs
+over network (it requires all the image data to be available before decoding can
+begin), life-critical systems, ...
+
+1.1. supported features
+-----------------------
+
+The following features are supported by the decoder:
+
+*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image,
+   or the same color type as the PNG
+*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image
+*) Adam7 interlace and deinterlace for any color type
+*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk
+*) support for alpha channels, including RGBA color model, translucent palettes and color keying
+*) zlib decompression (inflate)
+*) zlib compression (deflate)
+*) CRC32 and ADLER32 checksums
+*) colorimetric color profile conversions: currently experimentally available in lodepng_util.cpp only,
+   plus alternatively ability to pass on chroma/gamma/ICC profile information to other color management system.
+*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks.
+*) the following chunks are supported by both encoder and decoder:
+    IHDR: header information
+    PLTE: color palette
+    IDAT: pixel data
+    IEND: the final chunk
+    tRNS: transparency for palettized images
+    tEXt: textual information
+    zTXt: compressed textual information
+    iTXt: international textual information
+    bKGD: suggested background color
+    pHYs: physical dimensions
+    tIME: modification time
+    cHRM: RGB chromaticities
+    gAMA: RGB gamma correction
+    iCCP: ICC color profile
+    sRGB: rendering intent
+
+1.2. features not supported
+---------------------------
+
+The following features are _not_ supported:
+
+*) some features needed to make a conformant PNG-Editor might be still missing.
+*) partial loading/stream processing. All data must be available and is processed in one call.
+*) The following public chunks are not (yet) supported but treated as unknown chunks by LodePNG:
+    sBIT
+    hIST
+    sPLT
+
+
+2. C and C++ version
+--------------------
+
+The C version uses buffers allocated with alloc that you need to free()
+yourself. You need to use init and cleanup functions for each struct whenever
+using a struct from the C version to avoid exploits and memory leaks.
+
+The C++ version has extra functions with std::vectors in the interface and the
+lodepng::State class which is a LodePNGState with constructor and destructor.
+
+These files work without modification for both C and C++ compilers because all
+the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers
+ignore it, and the C code is made to compile both with strict ISO C90 and C++.
+
+To use the C++ version, you need to rename the source file to lodepng.cpp
+(instead of lodepng.c), and compile it with a C++ compiler.
+
+To use the C version, you need to rename the source file to lodepng.c (instead
+of lodepng.cpp), and compile it with a C compiler.
+
+
+3. Security
+-----------
+
+Even if carefully designed, it's always possible that LodePNG contains possible
+exploits. If you discover one, please let me know, and it will be fixed.
+
+When using LodePNG, care has to be taken with the C version of LodePNG, as well
+as the C-style structs when working with C++. The following conventions are used
+for all C-style structs:
+
+-if a struct has a corresponding init function, always call the init function when making a new one
+-if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks
+-if a struct has a corresponding copy function, use the copy function instead of "=".
+ The destination must also be inited already.
+
+
+4. Decoding
+-----------
+
+Decoding converts a PNG compressed image to a raw pixel buffer.
+
+Most documentation on using the decoder is at its declarations in the header
+above. For C, simple decoding can be done with functions such as
+lodepng_decode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_decode. For C++, all decoding can be done with the
+various lodepng::decode functions, and lodepng::State can be used for advanced
+features.
+
+When using the LodePNGState, it uses the following fields for decoding:
+*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here
+*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get
+*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use
+
+LodePNGInfo info_png
+--------------------
+
+After decoding, this contains extra information of the PNG image, except the actual
+pixels, width and height because these are already gotten directly from the decoder
+functions.
+
+It contains for example the original color type of the PNG image, text comments,
+suggested background color, etc... More details about the LodePNGInfo struct are
+at its declaration documentation.
+
+LodePNGColorMode info_raw
+-------------------------
+
+When decoding, here you can specify which color type you want
+the resulting raw image to be. If this is different from the colortype of the
+PNG, then the decoder will automatically convert the result. This conversion
+always works, except if you want it to convert a color PNG to grayscale or to
+a palette with missing colors.
+
+By default, 32-bit color is used for the result.
+
+LodePNGDecoderSettings decoder
+------------------------------
+
+The settings can be used to ignore the errors created by invalid CRC and Adler32
+chunks, and to disable the decoding of tEXt chunks.
+
+There's also a setting color_convert, true by default. If false, no conversion
+is done, the resulting data will be as it was in the PNG (after decompression)
+and you'll have to puzzle the colors of the pixels together yourself using the
+color type information in the LodePNGInfo.
+
+
+5. Encoding
+-----------
+
+Encoding converts a raw pixel buffer to a PNG compressed image.
+
+Most documentation on using the encoder is at its declarations in the header
+above. For C, simple encoding can be done with functions such as
+lodepng_encode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_encode. For C++, all encoding can be done with the
+various lodepng::encode functions, and lodepng::State can be used for advanced
+features.
+
+Like the decoder, the encoder can also give errors. However it gives less errors
+since the encoder input is trusted, the decoder input (a PNG image that could
+be forged by anyone) is not trusted.
+
+When using the LodePNGState, it uses the following fields for encoding:
+*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be.
+*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has
+*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use
+
+LodePNGInfo info_png
+--------------------
+
+When encoding, you use this the opposite way as when decoding: for encoding,
+you fill in the values you want the PNG to have before encoding. By default it's
+not needed to specify a color type for the PNG since it's automatically chosen,
+but it's possible to choose it yourself given the right settings.
+
+The encoder will not always exactly match the LodePNGInfo struct you give,
+it tries as close as possible. Some things are ignored by the encoder. The
+encoder uses, for example, the following settings from it when applicable:
+colortype and bitdepth, text chunks, time chunk, the color key, the palette, the
+background color, the interlace method, unknown chunks, ...
+
+When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk.
+If the palette contains any colors for which the alpha channel is not 255 (so
+there are translucent colors in the palette), it'll add a tRNS chunk.
+
+LodePNGColorMode info_raw
+-------------------------
+
+You specify the color type of the raw image that you give to the input here,
+including a possible transparent color key and palette you happen to be using in
+your raw image data.
+
+By default, 32-bit color is assumed, meaning your input has to be in RGBA
+format with 4 bytes (unsigned chars) per pixel.
+
+LodePNGEncoderSettings encoder
+------------------------------
+
+The following settings are supported (some are in sub-structs):
+*) auto_convert: when this option is enabled, the encoder will
+automatically choose the smallest possible color mode (including color key) that
+can encode the colors of all pixels without information loss.
+*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree,
+   2 = dynamic huffman tree (best compression). Should be 2 for proper
+   compression.
+*) use_lz77: whether or not to use LZ77 for compressed block types. Should be
+   true for proper compression.
+*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value
+   2048 by default, but can be set to 32768 for better, but slow, compression.
+*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE
+   chunk if force_palette is true. This can used as suggested palette to convert
+   to by viewers that don't support more than 256 colors (if those still exist)
+*) add_id: add text chunk "Encoder: LodePNG <version>" to the image.
+*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks.
+  zTXt chunks use zlib compression on the text. This gives a smaller result on
+  large texts but a larger result on small texts (such as a single program name).
+  It's all tEXt or all zTXt though, there's no separate setting per text yet.
+
+
+6. color conversions
+--------------------
+
+An important thing to note about LodePNG, is that the color type of the PNG, and
+the color type of the raw image, are completely independent. By default, when
+you decode a PNG, you get the result as a raw image in the color type you want,
+no matter whether the PNG was encoded with a palette, grayscale or RGBA color.
+And if you encode an image, by default LodePNG will automatically choose the PNG
+color type that gives good compression based on the values of colors and amount
+of colors in the image. It can be configured to let you control it instead as
+well, though.
+
+To be able to do this, LodePNG does conversions from one color mode to another.
+It can convert from almost any color type to any other color type, except the
+following conversions: RGB to grayscale is not supported, and converting to a
+palette when the palette doesn't have a required color is not supported. This is
+not supported on purpose: this is information loss which requires a color
+reduction algorithm that is beyond the scope of a PNG encoder (yes, RGB to gray
+is easy, but there are multiple ways if you want to give some channels more
+weight).
+
+By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB
+color, no matter what color type the PNG has. And by default when encoding,
+LodePNG automatically picks the best color model for the output PNG, and expects
+the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control
+the color format of the images yourself, you can skip this chapter.
+
+6.1. PNG color types
+--------------------
+
+A PNG image can have many color types, ranging from 1-bit color to 64-bit color,
+as well as palettized color modes. After the zlib decompression and unfiltering
+in the PNG image is done, the raw pixel data will have that color type and thus
+a certain amount of bits per pixel. If you want the output raw image after
+decoding to have another color type, a conversion is done by LodePNG.
+
+The PNG specification gives the following color types:
+
+0: grayscale, bit depths 1, 2, 4, 8, 16
+2: RGB, bit depths 8 and 16
+3: palette, bit depths 1, 2, 4 and 8
+4: grayscale with alpha, bit depths 8 and 16
+6: RGBA, bit depths 8 and 16
+
+Bit depth is the amount of bits per pixel per color channel. So the total amount
+of bits per pixel is: amount of channels * bitdepth.
+
+6.2. color conversions
+----------------------
+
+As explained in the sections about the encoder and decoder, you can specify
+color types and bit depths in info_png and info_raw to change the default
+behaviour.
+
+If, when decoding, you want the raw image to be something else than the default,
+you need to set the color type and bit depth you want in the LodePNGColorMode,
+or the parameters colortype and bitdepth of the simple decoding function.
+
+If, when encoding, you use another color type than the default in the raw input
+image, you need to specify its color type and bit depth in the LodePNGColorMode
+of the raw image, or use the parameters colortype and bitdepth of the simple
+encoding function.
+
+If, when encoding, you don't want LodePNG to choose the output PNG color type
+but control it yourself, you need to set auto_convert in the encoder settings
+to false, and specify the color type you want in the LodePNGInfo of the
+encoder (including palette: it can generate a palette if auto_convert is true,
+otherwise not).
+
+If the input and output color type differ (whether user chosen or auto chosen),
+LodePNG will do a color conversion, which follows the rules below, and may
+sometimes result in an error.
+
+To avoid some confusion:
+-the decoder converts from PNG to raw image
+-the encoder converts from raw image to PNG
+-the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image
+-the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG
+-when encoding, the color type in LodePNGInfo is ignored if auto_convert
+ is enabled, it is automatically generated instead
+-when decoding, the color type in LodePNGInfo is set by the decoder to that of the original
+ PNG image, but it can be ignored since the raw image has the color type you requested instead
+-if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion
+ between the color types is done if the color types are supported. If it is not
+ supported, an error is returned. If the types are the same, no conversion is done.
+-even though some conversions aren't supported, LodePNG supports loading PNGs from any
+ colortype and saving PNGs to any colortype, sometimes it just requires preparing
+ the raw image correctly before encoding.
+-both encoder and decoder use the same color converter.
+
+The function lodepng_convert does the color conversion. It is available in the
+interface but normally isn't needed since the encoder and decoder already call
+it.
+
+Non supported color conversions:
+-color to grayscale when non-gray pixels are present: no error is thrown, but
+the result will look ugly because only the red channel is taken (it assumes all
+three channels are the same in this case so ignores green and blue). The reason
+no error is given is to allow converting from three-channel grayscale images to
+one-channel even if there are numerical imprecisions.
+-anything to palette when the palette does not have an exact match for a from-color
+in it: in this case an error is thrown
+
+Supported color conversions:
+-anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA
+-any gray or gray+alpha, to gray or gray+alpha
+-anything to a palette, as long as the palette has the requested colors in it
+-removing alpha channel
+-higher to smaller bitdepth, and vice versa
+
+If you want no color conversion to be done (e.g. for speed or control):
+-In the encoder, you can make it save a PNG with any color type by giving the
+raw color mode and LodePNGInfo the same color mode, and setting auto_convert to
+false.
+-In the decoder, you can make it store the pixel data in the same color type
+as the PNG has, by setting the color_convert setting to false. Settings in
+info_raw are then ignored.
+
+6.3. padding bits
+-----------------
+
+In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines
+have a bit amount that isn't a multiple of 8, then padding bits are used so that each
+scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output.
+The raw input image you give to the encoder, and the raw output image you get from the decoder
+will NOT have these padding bits, e.g. in the case of a 1-bit image with a width
+of 7 pixels, the first pixel of the second scanline will the 8th bit of the first byte,
+not the first bit of a new byte.
+
+6.4. A note about 16-bits per channel and endianness
+----------------------------------------------------
+
+LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like
+for any other color format. The 16-bit values are stored in big endian (most
+significant byte first) in these arrays. This is the opposite order of the
+little endian used by x86 CPU's.
+
+LodePNG always uses big endian because the PNG file format does so internally.
+Conversions to other formats than PNG uses internally are not supported by
+LodePNG on purpose, there are myriads of formats, including endianness of 16-bit
+colors, the order in which you store R, G, B and A, and so on. Supporting and
+converting to/from all that is outside the scope of LodePNG.
+
+This may mean that, depending on your use case, you may want to convert the big
+endian output of LodePNG to little endian with a for loop. This is certainly not
+always needed, many applications and libraries support big endian 16-bit colors
+anyway, but it means you cannot simply cast the unsigned char* buffer to an
+unsigned short* buffer on x86 CPUs.
+
+
+7. error values
+---------------
+
+All functions in LodePNG that return an error code, return 0 if everything went
+OK, or a non-zero code if there was an error.
+
+The meaning of the LodePNG error values can be retrieved with the function
+lodepng_error_text: given the numerical error code, it returns a description
+of the error in English as a string.
+
+Check the implementation of lodepng_error_text to see the meaning of each code.
+
+
+8. chunks and PNG editing
+-------------------------
+
+If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG
+editor that should follow the rules about handling of unknown chunks, or if your
+program is able to read other types of chunks than the ones handled by LodePNG,
+then that's possible with the chunk functions of LodePNG.
+
+A PNG chunk has the following layout:
+
+4 bytes length
+4 bytes type name
+length bytes data
+4 bytes CRC
+
+8.1. iterating through chunks
+-----------------------------
+
+If you have a buffer containing the PNG image data, then the first chunk (the
+IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the
+signature of the PNG and are not part of a chunk. But if you start at byte 8
+then you have a chunk, and can check the following things of it.
+
+NOTE: none of these functions check for memory buffer boundaries. To avoid
+exploits, always make sure the buffer contains all the data of the chunks.
+When using lodepng_chunk_next, make sure the returned value is within the
+allocated memory.
+
+unsigned lodepng_chunk_length(const unsigned char* chunk):
+
+Get the length of the chunk's data. The total chunk length is this length + 12.
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk):
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type):
+
+Get the type of the chunk or compare if it's a certain type
+
+unsigned char lodepng_chunk_critical(const unsigned char* chunk):
+unsigned char lodepng_chunk_private(const unsigned char* chunk):
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk):
+
+Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are).
+Check if the chunk is private (public chunks are part of the standard, private ones not).
+Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical
+chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your
+program doesn't handle that type of unknown chunk.
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk):
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk):
+
+Get a pointer to the start of the data of the chunk.
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk):
+void lodepng_chunk_generate_crc(unsigned char* chunk):
+
+Check if the crc is correct or generate a correct one.
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk):
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk):
+
+Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these
+functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
+data available in the buffer to be able to go to the next chunk.
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk):
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data):
+
+These functions are used to create new chunks that are appended to the data in *out that has
+length *outlength. The append function appends an existing chunk to the new data. The create
+function creates a new chunk with the given parameters and appends it. Type is the 4-letter
+name of the chunk.
+
+8.2. chunks in info_png
+-----------------------
+
+The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3
+buffers (each with size) to contain 3 types of unknown chunks:
+the ones that come before the PLTE chunk, the ones that come between the PLTE
+and the IDAT chunks, and the ones that come after the IDAT chunks.
+It's necessary to make the distinction between these 3 cases because the PNG
+standard forces to keep the ordering of unknown chunks compared to the critical
+chunks, but does not force any other ordering rules.
+
+info_png.unknown_chunks_data[0] is the chunks before PLTE
+info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT
+info_png.unknown_chunks_data[2] is the chunks after IDAT
+
+The chunks in these 3 buffers can be iterated through and read by using the same
+way described in the previous subchapter.
+
+When using the decoder to decode a PNG, you can make it store all unknown chunks
+if you set the option settings.remember_unknown_chunks to 1. By default, this
+option is off (0).
+
+The encoder will always encode unknown chunks that are stored in the info_png.
+If you need it to add a particular chunk that isn't known by LodePNG, you can
+use lodepng_chunk_append or lodepng_chunk_create to the chunk data in
+info_png.unknown_chunks_data[x].
+
+Chunks that are known by LodePNG should not be added in that way. E.g. to make
+LodePNG add a bKGD chunk, set background_defined to true and add the correct
+parameters there instead.
+
+
+9. compiler support
+-------------------
+
+No libraries other than the current standard C library are needed to compile
+LodePNG. For the C++ version, only the standard C++ library is needed on top.
+Add the files lodepng.c(pp) and lodepng.h to your project, include
+lodepng.h where needed, and your program can read/write PNG files.
+
+It is compatible with C90 and up, and C++03 and up.
+
+If performance is important, use optimization when compiling! For both the
+encoder and decoder, this makes a large difference.
+
+Make sure that LodePNG is compiled with the same compiler of the same version
+and with the same settings as the rest of the program, or the interfaces with
+std::vectors and std::strings in C++ can be incompatible.
+
+CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
+
+*) gcc and g++
+
+LodePNG is developed in gcc so this compiler is natively supported. It gives no
+warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++
+version 4.7.1 on Linux, 32-bit and 64-bit.
+
+*) Clang
+
+Fully supported and warning-free.
+
+*) Mingw
+
+The Mingw compiler (a port of gcc for Windows) should be fully supported by
+LodePNG.
+
+*) Visual Studio and Visual C++ Express Edition
+
+LodePNG should be warning-free with warning level W4. Two warnings were disabled
+with pragmas though: warning 4244 about implicit conversions, and warning 4996
+where it wants to use a non-standard function fopen_s instead of the standard C
+fopen.
+
+Visual Studio may want "stdafx.h" files to be included in each source file and
+give an error "unexpected end of file while looking for precompiled header".
+This is not standard C++ and will not be added to the stock LodePNG. You can
+disable it for lodepng.cpp only by right clicking it, Properties, C/C++,
+Precompiled Headers, and set it to Not Using Precompiled Headers there.
+
+NOTE: Modern versions of VS should be fully supported, but old versions, e.g.
+VS6, are not guaranteed to work.
+
+*) Compilers on Macintosh
+
+LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for
+C and C++.
+
+*) Other Compilers
+
+If you encounter problems on any compilers, feel free to let me know and I may
+try to fix it if the compiler is modern and standards compliant.
+
+
+10. examples
+------------
+
+This decoder example shows the most basic usage of LodePNG. More complex
+examples can be found on the LodePNG website.
+
+10.1. decoder C++ example
+-------------------------
+
+#include "lodepng.h"
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  //load and decode
+  std::vector<unsigned char> image;
+  unsigned width, height;
+  unsigned error = lodepng::decode(image, width, height, filename);
+
+  //if there's an error, display it
+  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+
+  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
+}
+
+10.2. decoder C example
+-----------------------
+
+#include "lodepng.h"
+
+int main(int argc, char *argv[]) {
+  unsigned error;
+  unsigned char* image;
+  size_t width, height;
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  error = lodepng_decode32_file(&image, &width, &height, filename);
+
+  if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error));
+
+  / * use image here * /
+
+  free(image);
+  return 0;
+}
+
+11. state settings reference
+----------------------------
+
+A quick reference of some settings to set on the LodePNGState
+
+For decoding:
+
+state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums
+state.decoder.zlibsettings.custom_...: use custom inflate function
+state.decoder.ignore_crc: ignore CRC checksums
+state.decoder.ignore_critical: ignore unknown critical chunks
+state.decoder.ignore_end: ignore missing IEND chunk. May fail if this corruption causes other errors
+state.decoder.color_convert: convert internal PNG color to chosen one
+state.decoder.read_text_chunks: whether to read in text metadata chunks
+state.decoder.remember_unknown_chunks: whether to read in unknown chunks
+state.info_raw.colortype: desired color type for decoded image
+state.info_raw.bitdepth: desired bit depth for decoded image
+state.info_raw....: more color settings, see struct LodePNGColorMode
+state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo
+
+For encoding:
+
+state.encoder.zlibsettings.btype: disable compression by setting it to 0
+state.encoder.zlibsettings.use_lz77: use LZ77 in compression
+state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize
+state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match
+state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching
+state.encoder.zlibsettings.lazymatching: try one more LZ77 matching
+state.encoder.zlibsettings.custom_...: use custom deflate function
+state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png
+state.encoder.filter_palette_zero: PNG filter strategy for palette
+state.encoder.filter_strategy: PNG filter strategy to encode with
+state.encoder.force_palette: add palette even if not encoding to one
+state.encoder.add_id: add LodePNG identifier and version as a text chunk
+state.encoder.text_compression: use compressed text chunks for metadata
+state.info_raw.colortype: color type of raw input image you provide
+state.info_raw.bitdepth: bit depth of raw input image you provide
+state.info_raw: more color settings, see struct LodePNGColorMode
+state.info_png.color.colortype: desired color type if auto_convert is false
+state.info_png.color.bitdepth: desired bit depth if auto_convert is false
+state.info_png.color....: more color settings, see struct LodePNGColorMode
+state.info_png....: more PNG related settings, see struct LodePNGInfo
+
+
+12. changes
+-----------
+
+The version number of LodePNG is the date of the change given in the format
+yyyymmdd.
+
+Some changes aren't backwards compatible. Those are indicated with a (!)
+symbol.
+
+Not all changes are listed here, the commit history in github lists more:
+https://github.com/lvandeve/lodepng
+
+*) 12 jan 2020: (!) added 'end' argument to lodepng_chunk_next to allow correct
+   overflow checks.
+*) 14 aug 2019: around 25% faster decoding thanks to huffman lookup tables.
+*) 15 jun 2019: (!) auto_choose_color API changed (for bugfix: don't use palette
+   if gray ICC profile) and non-ICC LodePNGColorProfile renamed to
+   LodePNGColorStats.
+*) 30 dec 2018: code style changes only: removed newlines before opening braces.
+*) 10 sep 2018: added way to inspect metadata chunks without full decoding.
+*) 19 aug 2018: (!) fixed color mode bKGD is encoded with and made it use
+   palette index in case of palette.
+*) 10 aug 2018: (!) added support for gAMA, cHRM, sRGB and iCCP chunks. This
+   change is backwards compatible unless you relied on unknown_chunks for those.
+*) 11 jun 2018: less restrictive check for pixel size integer overflow
+*) 14 jan 2018: allow optionally ignoring a few more recoverable errors
+*) 17 sep 2017: fix memory leak for some encoder input error cases
+*) 27 nov 2016: grey+alpha auto color model detection bugfix
+*) 18 apr 2016: Changed qsort to custom stable sort (for platforms w/o qsort).
+*) 09 apr 2016: Fixed colorkey usage detection, and better file loading (within
+   the limits of pure C90).
+*) 08 dec 2015: Made load_file function return error if file can't be opened.
+*) 24 okt 2015: Bugfix with decoding to palette output.
+*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding.
+*) 24 aug 2014: Moved to github
+*) 23 aug 2014: Reduced needless memory usage of decoder.
+*) 28 jun 2014: Removed fix_png setting, always support palette OOB for
+    simplicity. Made ColorProfile public.
+*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization.
+*) 22 dec 2013: Power of two windowsize required for optimization.
+*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
+*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
+*) 11 mar 2013: (!) Bugfix with custom free. Changed from "my" to "lodepng_"
+    prefix for the custom allocators and made it possible with a new #define to
+    use custom ones in your project without needing to change lodepng's code.
+*) 28 jan 2013: Bugfix with color key.
+*) 27 okt 2012: Tweaks in text chunk keyword length error handling.
+*) 8 okt 2012: (!) Added new filter strategy (entropy) and new auto color mode.
+    (no palette). Better deflate tree encoding. New compression tweak settings.
+    Faster color conversions while decoding. Some internal cleanups.
+*) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
+*) 1 sep 2012: (!) Removed #define's for giving custom (de)compression functions
+    and made it work with function pointers instead.
+*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
+    and free functions and toggle #defines from compiler flags. Small fixes.
+*) 6 may 2012: (!) Made plugging in custom zlib/deflate functions more flexible.
+*) 22 apr 2012: (!) Made interface more consistent, renaming a lot. Removed
+    redundant C++ codec classes. Reduced amount of structs. Everything changed,
+    but it is cleaner now imho and functionality remains the same. Also fixed
+    several bugs and shrunk the implementation code. Made new samples.
+*) 6 nov 2011: (!) By default, the encoder now automatically chooses the best
+    PNG color model and bit depth, based on the amount and type of colors of the
+    raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
+*) 9 okt 2011: simpler hash chain implementation for the encoder.
+*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching.
+*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking.
+    A bug with the PNG filtertype heuristic was fixed, so that it chooses much
+    better ones (it's quite significant). A setting to do an experimental, slow,
+    brute force search for PNG filter types is added.
+*) 17 aug 2011: (!) changed some C zlib related function names.
+*) 16 aug 2011: made the code less wide (max 120 characters per line).
+*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
+*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
+*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman
+    to optimize long sequences of zeros.
+*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and
+    LodePNG_InfoColor_canHaveAlpha functions for convenience.
+*) 7 nov 2010: added LodePNG_error_text function to get error code description.
+*) 30 okt 2010: made decoding slightly faster
+*) 26 okt 2010: (!) changed some C function and struct names (more consistent).
+     Reorganized the documentation and the declaration order in the header.
+*) 08 aug 2010: only changed some comments and external samples.
+*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version.
+*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers.
+*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could
+    read by ignoring the problem but windows apps couldn't.
+*) 06 jun 2008: added more error checks for out of memory cases.
+*) 26 apr 2008: added a few more checks here and there to ensure more safety.
+*) 06 mar 2008: crash with encoding of strings fixed
+*) 02 feb 2008: support for international text chunks added (iTXt)
+*) 23 jan 2008: small cleanups, and #defines to divide code in sections
+*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor.
+*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder.
+*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added
+    Also various fixes, such as in the deflate and the padding bits code.
+*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved
+    filtering code of encoder.
+*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A
+    C++ wrapper around this provides an interface almost identical to before.
+    Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code
+    are together in these files but it works both for C and C++ compilers.
+*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks
+*) 30 aug 2007: bug fixed which makes this Borland C++ compatible
+*) 09 aug 2007: some VS2005 warnings removed again
+*) 21 jul 2007: deflate code placed in new namespace separate from zlib code
+*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
+*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
+    invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
+*) 02 jun 2007: made the encoder add a tag with version by default
+*) 27 may 2007: zlib and png code separated (but still in the same file),
+    simple encoder/decoder functions added for more simple usage cases
+*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69),
+    moved some examples from here to lodepng_examples.cpp
+*) 12 may 2007: palette decoding bug fixed
+*) 24 apr 2007: changed the license from BSD to the zlib license
+*) 11 mar 2007: very simple addition: ability to encode bKGD chunks.
+*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding
+    palettized PNG images. Plus little interface change with palette and texts.
+*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes.
+    Fixed a bug where the end code of a block had length 0 in the Huffman tree.
+*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented
+    and supported by the encoder, resulting in smaller PNGs at the output.
+*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone.
+*) 24 jan 2007: gave encoder an error interface. Added color conversion from any
+    greyscale type to 8-bit greyscale with or without alpha.
+*) 21 jan 2007: (!) Totally changed the interface. It allows more color types
+    to convert to and is more uniform. See the manual for how it works now.
+*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days:
+    encode/decode custom tEXt chunks, separate classes for zlib & deflate, and
+    at last made the decoder give errors for incorrect Adler32 or Crc.
+*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel.
+*) 29 dec 2006: Added support for encoding images without alpha channel, and
+    cleaned out code as well as making certain parts faster.
+*) 28 dec 2006: Added "Settings" to the encoder.
+*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now.
+    Removed some code duplication in the decoder. Fixed little bug in an example.
+*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter.
+    Fixed a bug of the decoder with 16-bit per color.
+*) 15 okt 2006: Changed documentation structure
+*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the
+    given image buffer, however for now it's not compressed.
+*) 08 sep 2006: (!) Changed to interface with a Decoder class
+*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different
+    way. Renamed decodePNG to decodePNGGeneric.
+*) 29 jul 2006: (!) Changed the interface: image info is now returned as a
+    struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy.
+*) 28 jul 2006: Cleaned the code and added new error checks.
+    Corrected terminology "deflate" into "inflate".
+*) 23 jun 2006: Added SDL example in the documentation in the header, this
+    example allows easy debugging by displaying the PNG and its transparency.
+*) 22 jun 2006: (!) Changed way to obtain error value. Added
+    loadFile function for convenience. Made decodePNG32 faster.
+*) 21 jun 2006: (!) Changed type of info vector to unsigned.
+    Changed position of palette in info vector. Fixed an important bug that
+    happened on PNGs with an uncompressed block.
+*) 16 jun 2006: Internally changed unsigned into unsigned where
+    needed, and performed some optimizations.
+*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them
+    in LodePNG namespace. Changed the order of the parameters. Rewrote the
+    documentation in the header. Renamed files to lodepng.cpp and lodepng.h
+*) 22 apr 2006: Optimized and improved some code
+*) 07 sep 2005: (!) Changed to std::vector interface
+*) 12 aug 2005: Initial release (C++, decoder only)
+
+
+13. contact information
+-----------------------
+
+Feel free to contact me with suggestions, problems, comments, ... concerning
+LodePNG. If you encounter a PNG image that doesn't work properly with this
+decoder, feel free to send it and I'll use it to find and fix the problem.
+
+My email address is (puzzle the account and domain together with an @ symbol):
+Domain: gmail dot com.
+Account: lode dot vandevenne.
+
+
+Copyright (c) 2005-2020 Lode Vandevenne
+*/
diff --git a/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng_util.h b/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng_util.h
new file mode 100644
index 0000000000..97fd804c36
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/third_party/lodepng/lodepng_util.h
@@ -0,0 +1,290 @@
+/*
+LodePNG Utils
+
+Copyright (c) 2005-2020 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+Extra C++ utilities for LodePNG, for convenience.
+Not part of the stable API of lodepng, more loose separate utils.
+*/
+
+#ifndef LODEPNG_UTIL_H
+#define LODEPNG_UTIL_H
+
+#include <string>
+#include <vector>
+#include "lodepng.h"
+
+namespace lodepng {
+
+/*
+Returns info from the header of the PNG by value, purely for convenience.
+Does NOT check for errors. Returns bogus info if the PNG has an error.
+Does not require cleanup of allocated memory because no palette or text chunk
+info is in the LodePNGInfo object after checking only the header of the PNG.
+*/
+LodePNGInfo getPNGHeaderInfo(const std::vector<unsigned char>& png);
+
+/*
+Get the names and sizes of all chunks in the PNG file.
+Returns 0 if ok, non-0 if error happened.
+*/
+unsigned getChunkInfo(std::vector<std::string>& names, std::vector<size_t>& sizes,
+                      const std::vector<unsigned char>& png);
+
+/*
+Returns the names and full chunks (including the name and everything else that
+makes up the chunk) for all chunks except IHDR, PLTE, IDAT and IEND.
+It separates the chunks into 3 separate lists, representing the chunks between
+certain critical chunks: 0: IHDR-PLTE, 1: PLTE-IDAT, 2: IDAT-IEND
+Returns 0 if ok, non-0 if error happened.
+*/
+unsigned getChunks(std::vector<std::string> names[3],
+                   std::vector<std::vector<unsigned char> > chunks[3],
+                   const std::vector<unsigned char>& png);
+
+/*
+Inserts chunks into the given png file. The chunks must be fully encoded,
+including length, type, content and CRC.
+The array index determines where it goes:
+0: between IHDR and PLTE, 1: between PLTE and IDAT, 2: between IDAT and IEND.
+They're appended at the end of those locations within the PNG.
+Returns 0 if ok, non-0 if error happened.
+*/
+unsigned insertChunks(std::vector<unsigned char>& png,
+                      const std::vector<std::vector<unsigned char> > chunks[3]);
+
+/*
+Get the filtertypes of each scanline in this PNG file.
+Returns 0 if ok, 1 if PNG decoding error happened.
+
+For a non-interlaced PNG, it returns one filtertype per scanline, in order.
+
+For interlaced PNGs, it returns a result as if it's not interlaced. It returns
+one filtertype per scanline, in order. The values match pass 6 and 7 of the
+Adam7 interlacing, alternating between the two, so that the values correspond
+the most to their scanlines.
+*/
+unsigned getFilterTypes(std::vector<unsigned char>& filterTypes, const std::vector<unsigned char>& png);
+
+/*
+Get the filtertypes of each scanline in every interlace pass this PNG file.
+Returns 0 if ok, 1 if PNG decoding error happened.
+
+For a non-interlaced PNG, it returns one filtertype per scanline, in order, in
+a single std::vector in filterTypes.
+
+For an interlaced PNG, it returns 7 std::vectors in filterTypes, one for each
+Adam7 pass. The amount of values per pass can be calculated as follows, where
+w and h are the size of the image and all divisions are integer divisions:
+pass 1: (h + 7) / 8
+pass 2: w <= 4 ? 0 : (h + 7) / 8
+pass 3: h <= 4 ? 0 : (h + 7) / 8
+pass 4: w <= 2 ? 0 : (h + 3) / 4
+pass 5: h <= 2 ? 0 : (h + 3) / 4
+pass 6: w <= 1 ? 0 : (h + 1) / 2
+pass 7: h <= 1 ? 0 : (h + 1) / 2
+*/
+unsigned getFilterTypesInterlaced(std::vector<std::vector<unsigned char> >& filterTypes,
+                                  const std::vector<unsigned char>& png);
+
+/*
+Returns the value of the i-th pixel in an image with 1, 2, 4 or 8-bit color.
+E.g. if bits is 4 and i is 5, it returns the 5th nibble (4-bit group), which
+is the second half of the 3th byte, in big endian (PNG's endian order).
+*/
+int getPaletteValue(const unsigned char* data, size_t i, int bits);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+/* Similar to convertRGBModel, but the 'to' model is sRGB. The pixel format
+of in and out must be the same and is given by state_in->info_raw. An
+error may occur if the pixel format cannot contain the new colors (e.g. palette) */
+unsigned convertToSrgb(unsigned char* out, const unsigned char* in,
+                       unsigned w, unsigned h,
+                       const LodePNGState* state_in);
+
+/* Similar to convertRGBModel, but the 'from' model is sRGB. The pixel format
+of in and out must be the same and is given by state_out->info_raw. An
+error may occur if the pixel format cannot contain the new colors (e.g. palette) */
+unsigned convertFromSrgb(unsigned char* out, const unsigned char* in,
+                         unsigned w, unsigned h,
+                         const LodePNGState* state_out);
+
+/*
+Converts from one RGB model to another RGB model.
+Similar to calling convertToXYZ followed by convertFromXYZ, but may be
+more efficient and more precise (e.g. no computation needed when both models
+are the same). See their documentation for more info.
+
+Parameters:
+
+*) out: output pixel data
+*) in: input pixel data
+*) w, h: image size
+*) state_out: output RGB color model in state_out->info_png and byte format in state_out->info_raw.
+*) state_in: output RGB color model in state_in->info_png and byte format in state_in->info_raw
+*) return value: 0 if ok, positive value if error
+*) rendering_intent: 1 for relative, 3 for absolute, should be relative for standard behavior.
+   See description at convertFromXYZ.
+*/
+unsigned convertRGBModel(unsigned char* out, const unsigned char* in,
+                         unsigned w, unsigned h,
+                         const LodePNGState* state_out,
+                         const LodePNGState* state_in,
+                         unsigned rendering_intent);
+
+/*
+Converts the RGB color to the absolute XYZ color space given the RGB color profile
+chunks in the PNG info.
+
+Color space here refers to the different possible RGB spaces with different
+possible chromaticities or whitepoint and XYZ color from colorimetry, not the
+LodePNGColorType that describes the byte based encoding.
+
+You need this function only if the PNG could contain data in an arbitrary RGB
+color space and you wish to output to a display or format that does not provide
+color management for you (so you need to convert rather than pass on the profile
+to it) but expects a certain RGB format (e.g. sRGB). See the background info below.
+
+Supports the gAMA, cHRM, sRGB and iCCP colorimetry chunks. If no colometry chunks are present
+(that is, in state->info_png, the fields gama_defined, chrm_defined, srgb_defined and
+iccp_defined are all 0), it assumes the format is sRGB.
+For more information, see the chunk specifications in the PNG specification.
+
+Some background:
+
+A PNG image contains RGB data inside, but this data may use a specific RGB model (by default sRGB but
+different if colorimetry chunks are given).
+The computer display and/or operating system can have another RGB model (typically sRGB, or wider gamut
+or HDR formats).
+
+The PNG chunks describe what format the data inside has, not the format of the display. To correctly
+display a PNG image on a display, a conversion is needed from the PNG model to the display model if their
+models differ. Some options to achieve that are:
+*) If your use case already supports color management on its own, you can give it the RGB values straight from
+   the PNG image and give it the information from the cHRM, gAMA, sRGB and iCCP chunks (which you can find
+   in the LodePNGInfo), and the color management should then handle it correctly for you. You don't need
+   this function here in that case.
+*) If your use case does not support color management, you may instead want to give it the RGB values in a
+   consistent color model, such as sRGB, but the PNG does not necessarily have it in this desired model.
+   In that case, use the function below (or a similar one from a CMS library if you prefer) to convert it to
+   the absolute color space XYZ, and then you can convert it to the target RGB with the counterpart convertFromXYZ
+   further below.
+
+Parameters:
+
+*) out: 4 floats per pixel, X,Y,Z,alpha color format, in range 0-1 (normally, not clipped if beyond), must
+   be allocated to have 4 * w * h floats available.
+*) whitepoint: output argument, the whitepoint the original RGB data used, given in absolute XYZ. Needed for
+   relative rendering intents: give these values to counterpart function convertFromXYZ.
+*) in: input RGB color, in byte format given by state->info_raw and RGB color profile given by info->info_png
+*) w, h: image size
+*) state (when using a LodePNG decode function that takes a LodePNGState parameter, can directly use that one):
+   state->info_png: PNG info with possibly an RGB color model in cHRM,gAMA and/or sRGB chunks
+   state->info_raw: byte format of in (amount of channels, bit depth)
+*) return value: 0 if ok, positive value if error
+*/
+unsigned convertToXYZ(float* out, float whitepoint[3],
+                      const unsigned char* in, unsigned w, unsigned h,
+                      const LodePNGState* state);
+
+/*
+Same as convertToXYZ but takes floating point input. Slower.
+The main black..white range in 0..1. Does not clip values that are outside that range.
+*/
+unsigned convertToXYZFloat(float* out, float whitepoint[3], const float* in,
+                           unsigned w, unsigned h, const LodePNGState* state);
+
+/*
+Converts XYZ to RGB in the RGB color model given by info and byte format by mode_out.
+If info has no coloremtry chunks, converts to sRGB.
+Parameters:
+*) out: output color in byte format given by state->info_raw and RGB color profile given
+   by info->info_png. Must have enough bytes allocated to contain pixels in the given byte format.
+*) in: 4 floats per pixel, X,Y,Z,alpha color format, in range 0-1 (normally).
+*) whitepoint: input argument, the original whitepoint in absolute XYZ that the pixel data
+   in "in" had back when it was in a previous RGB space. Needed to preserve the whitepoint
+   in the new target RGB space for relative rendering intent.
+*) rendering_intent: the desired rendering intent, with numeric meaning matching the
+   values used by ICC: 0=perceptual, 1=relative, 2=saturation, 3=absolute.
+   Should be 1 for normal use cases, it adapts white to match that of different RGB
+   models which is the best practice. Using 3 may change the color of white and may
+   turn grayscale into colors of a certain tone. Using 0 and 2 will have the same
+   effect as 1 because using those requires more data than the matrix-based RGB profiles
+   supporetd here have.
+*) w, h: image size
+*) state:
+   state->info_png: PNG info with possibly an RGB color profile in cHRM,gAMA and/or sRGB chunks
+   state->info_raw: byte format of out (amount of channels, bit depth)
+*) return value: 0 if ok, positive value if error
+*/
+unsigned convertFromXYZ(unsigned char* out, const float* in, unsigned w, unsigned h,
+                        const LodePNGState* state,
+                        const float whitepoint[3], unsigned rendering_intent);
+
+/*
+Same as convertFromXYZ but outputs the RGB colors in floating point.
+The main black..white range in 0..1. Does not clip values that are outside that range.
+*/
+unsigned convertFromXYZFloat(float* out, const float* in, unsigned w, unsigned h,
+                             const LodePNGState* state,
+                             const float whitepoint[3], unsigned rendering_intent);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*
+The information for extractZlibInfo.
+*/
+struct ZlibBlockInfo {
+  int btype; //block type (0-2)
+  size_t compressedbits; //size of compressed block in bits
+  size_t uncompressedbytes; //size of uncompressed block in bytes
+
+  // only filled in for block type 2
+  size_t treebits; //encoded tree size in bits
+  int hlit; //the HLIT value that was filled in for this tree
+  int hdist; //the HDIST value that was filled in for this tree
+  int hclen; //the HCLEN value that was filled in for this tree
+  std::vector<int> clcl; //19 code length code lengths (compressed tree's tree)
+  std::vector<int> treecodes; //N tree codes, with values 0-18. Values 17 or 18 are followed by the repetition value.
+  std::vector<int> litlenlengths; //288 code lengths for lit/len symbols
+  std::vector<int> distlengths; //32 code lengths for dist symbols
+
+  // only filled in for block types 1 or 2
+  std::vector<int> lz77_lcode; //LZ77 codes. 0-255: literals. 256: end symbol. 257-285: length code of length/dist pairs
+  // the next vectors have the same size as lz77_lcode, but an element only has meaningful value if lz77_lcode contains a length code.
+  std::vector<int> lz77_dcode;
+  std::vector<int> lz77_lbits;
+  std::vector<int> lz77_dbits;
+  std::vector<int> lz77_lvalue;
+  std::vector<int> lz77_dvalue;
+  size_t numlit; //number of lit codes in this block
+  size_t numlen; //number of len codes in this block
+};
+
+//Extracts all info needed from a PNG file to reconstruct the zlib compression exactly.
+void extractZlibInfo(std::vector<ZlibBlockInfo>& zlibinfo, const std::vector<unsigned char>& in);
+
+} // namespace lodepng
+
+#endif /*LODEPNG_UTIL_H inclusion guard*/
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/args.h b/codec/L2/demos/jxlEnc/third_partys/tools/args.h
new file mode 100644
index 0000000000..13f2f5259b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/args.h
@@ -0,0 +1,158 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_ARGS_H_
+#define TOOLS_ARGS_H_
+
+// Helpers for parsing command line arguments. No include guard needed.
+
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"  // DecoderHints
+#include "lib/jxl/gaborish.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jpegxl {
+namespace tools {
+
+static inline bool ParseOverride(const char* arg, jxl::Override* out) {
+  const std::string s_arg(arg);
+  if (s_arg == "1") {
+    *out = jxl::Override::kOn;
+    return true;
+  }
+  if (s_arg == "0") {
+    *out = jxl::Override::kOff;
+    return true;
+  }
+  fprintf(stderr, "Invalid flag, %s must be 0 or 1\n", arg);
+  return JXL_FAILURE("Args");
+}
+
+static inline bool ParseUnsigned(const char* arg, size_t* out) {
+  char* end;
+  *out = static_cast<size_t>(strtoull(arg, &end, 0));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as unsigned integer: %s.\n", arg);
+    return JXL_FAILURE("Args");
+  }
+  return true;
+}
+
+static inline bool ParseUint32(const char* arg, uint32_t* out) {
+  size_t value = 0;
+  bool ret = ParseUnsigned(arg, &value);
+  if (ret) *out = value;
+  return ret;
+}
+
+static inline bool ParseSigned(const char* arg, int* out) {
+  char* end;
+  *out = static_cast<int>(strtol(arg, &end, 0));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as signed integer: %s.\n", arg);
+    return JXL_FAILURE("Args");
+  }
+  return true;
+}
+
+static inline bool ParseFloat(const char* arg, float* out) {
+  char* end;
+  *out = static_cast<float>(strtod(arg, &end));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as float: %s.\n", arg);
+    return JXL_FAILURE("Args");
+  }
+  return true;
+}
+
+static inline bool ParseFloatPair(const char* arg,
+                                  std::pair<float, float>* out) {
+  int parsed = sscanf(arg, "%f,%f", &out->first, &out->second);
+  if (parsed == 1) {
+    out->second = out->first;
+  } else if (parsed != 2) {
+    fprintf(stderr,
+            "Unable to interpret as float pair separated by a comma: %s.\n",
+            arg);
+    return JXL_FAILURE("Args");
+  }
+  return true;
+}
+
+static inline bool ParseDouble(const char* arg, double* out) {
+  char* end;
+  *out = static_cast<double>(strtod(arg, &end));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as double: %s.\n", arg);
+    return JXL_FAILURE("Args");
+  }
+  return true;
+}
+
+static inline bool ParseAndAppendKeyValue(const char* arg,
+                                          jxl::DecoderHints* out) {
+  const char* eq = strchr(arg, '=');
+  if (!eq) {
+    fprintf(stderr, "Expected argument as 'key=value' but received '%s'\n",
+            arg);
+    return false;
+  }
+  std::string key(arg, eq);
+  out->Add(key, std::string(eq + 1));
+  return true;
+}
+
+static inline bool ParsePredictor(const char* arg, jxl::Predictor* out) {
+  char* end;
+  size_t p = static_cast<size_t>(strtoull(arg, &end, 0));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Invalid predictor: %s.\n", arg);
+    return JXL_FAILURE("Args");
+  }
+  if (p >= jxl::kNumModularPredictors) {
+    fprintf(stderr, "Invalid predictor value %zu, must be less than %zu.\n", p,
+            jxl::kNumModularPredictors);
+    return JXL_FAILURE("Args");
+  }
+  *out = static_cast<jxl::Predictor>(p);
+  return true;
+}
+
+static inline bool ParseString(const char* arg, std::string* out) {
+  out->assign(arg);
+  return true;
+}
+
+static inline bool ParseCString(const char* arg, const char** out) {
+  *out = arg;
+  return true;
+}
+
+static inline bool SetBooleanTrue(bool* out) {
+  *out = true;
+  return true;
+}
+
+static inline bool SetBooleanFalse(bool* out) {
+  *out = false;
+  return true;
+}
+
+static inline bool IncrementUnsigned(size_t* out) {
+  (*out)++;
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_ARGS_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/box/box.cc b/codec/L2/demos/jxlEnc/third_partys/tools/box/box.cc
new file mode 100644
index 0000000000..90ee0ab17e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/box/box.cc
@@ -0,0 +1,334 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/box/box.h"
+
+#include "lib/jxl/base/byte_order.h"  // for GetMaximumBrunsliEncodedSize
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jpegxl {
+namespace tools {
+
+namespace {
+// Checks if a + b > size, taking possible integer overflow into account.
+bool OutOfBounds(size_t a, size_t b, size_t size) {
+  size_t pos = a + b;
+  if (pos > size) return true;
+  if (pos < a) return true;  // overflow happened
+  return false;
+}
+}  // namespace
+
+// Parses the header of a BMFF box. Returns the result in a Box struct.
+// Sets the position to the end of the box header after parsing. The data size
+// is output if known, or must be handled by the caller and runs until the end
+// of the container file if not known.
+jxl::Status ParseBoxHeader(const uint8_t** next_in, size_t* available_in,
+                           Box* box) {
+  size_t pos = 0;
+  size_t size = *available_in;
+  const uint8_t* in = *next_in;
+
+  if (OutOfBounds(pos, 8, size)) return JXL_FAILURE("out of bounds");
+
+  const size_t initial_pos = pos;
+
+  // Total box_size including this header itself.
+  uint64_t box_size = LoadBE32(in + pos);
+  memcpy(box->type, in + pos + 4, 4);
+
+  pos += 8;
+
+  if (box_size == 1) {
+    // If the size is 1, it indicates extended size read from 64-bit integer.
+    if (OutOfBounds(pos, 8, size)) return JXL_FAILURE("out of bounds");
+    box_size = LoadBE64(in + pos);
+    pos += 8;
+  }
+
+  if (!memcmp("uuid", box->type, 4)) {
+    if (OutOfBounds(pos, 16, size)) return JXL_FAILURE("out of bounds");
+    memcpy(box->extended_type, in + pos, 16);
+    pos += 16;
+  }
+
+  // This is the end of the box header, the box data begins here. Handle
+  // the data size now.
+  const size_t data_pos = pos;
+  const size_t header_size = data_pos - initial_pos;
+
+  if (box_size != 0) {
+    if (box_size < header_size) {
+      return JXL_FAILURE("invalid box size");
+    }
+    box->data_size_given = true;
+    box->data_size = box_size - header_size;
+  } else {
+    // The size extends to the end of the file. We don't necessarily know the
+    // end of the file here, since the input size may be only part of the full
+    // container file. Indicate the size is not given, the caller must handle
+    // this.
+    box->data_size_given = false;
+    box->data_size = 0;
+  }
+
+  // The remaining bytes are the data. If the box is a full box, the first
+  // bytes of the data have a certain structure but this is to be handled by
+  // the caller for the appropriate box type.
+  *next_in += pos;
+  *available_in -= pos;
+
+  return true;
+}
+
+jxl::Status AppendBoxHeader(const Box& box, jxl::PaddedBytes* out) {
+  bool use_extended = !memcmp("uuid", box.type, 4);
+
+  uint64_t box_size = 0;
+  bool large_size = false;
+  if (box.data_size_given) {
+    box_size = box.data_size + 8 + (use_extended ? 16 : 0);
+    if (box_size >= 0x100000000ull) {
+      large_size = true;
+    }
+  }
+
+  out->resize(out->size() + 4);
+  StoreBE32(large_size ? 1 : box_size, &out->back() - 4 + 1);
+
+  out->resize(out->size() + 4);
+  memcpy(&out->back() - 4 + 1, box.type, 4);
+
+  if (large_size) {
+    out->resize(out->size() + 8);
+    StoreBE64(box_size, &out->back() - 8 + 1);
+  }
+
+  if (use_extended) {
+    out->resize(out->size() + 16);
+    memcpy(&out->back() - 16 + 1, box.extended_type, 16);
+  }
+
+  return true;
+}
+
+bool IsContainerHeader(const uint8_t* data, size_t size) {
+  const uint8_t box_header[] = {0,   0,   0,   0xc, 'J',  'X',
+                                'L', ' ', 0xd, 0xa, 0x87, 0xa};
+  if (size < sizeof(box_header)) return false;
+  return memcmp(box_header, data, sizeof(box_header)) == 0;
+}
+
+jxl::Status DecodeJpegXlContainerOneShot(const uint8_t* data, size_t size,
+                                         JpegXlContainer* container) {
+  const uint8_t* in = data;
+  size_t available_in = size;
+
+  container->exif = nullptr;
+  container->exif_size = 0;
+  container->exfc = nullptr;
+  container->exfc_size = 0;
+  container->xml.clear();
+  container->xmlc.clear();
+  container->jumb = nullptr;
+  container->jumb_size = 0;
+  container->codestream = nullptr;
+  container->codestream_size = 0;
+  container->jpeg_reconstruction = nullptr;
+  container->jpeg_reconstruction_size = 0;
+
+  size_t box_index = 0;
+
+  while (available_in != 0) {
+    Box box;
+    if (!ParseBoxHeader(&in, &available_in, &box)) {
+      return JXL_FAILURE("Invalid box header");
+    }
+
+    size_t data_size = box.data_size_given ? box.data_size : available_in;
+
+    if (box.data_size > available_in) {
+      return JXL_FAILURE("Unexpected end of file");
+    }
+
+    if (box_index == 0) {
+      // TODO(lode): leave out magic signature box?
+      // Must be magic signature box.
+      if (memcmp("JXL ", box.type, 4) != 0) {
+        return JXL_FAILURE("Invalid magic signature");
+      }
+      if (box.data_size != 4) return JXL_FAILURE("Invalid magic signature");
+      if (in[0] != 0xd || in[1] != 0xa || in[2] != 0x87 || in[3] != 0xa) {
+        return JXL_FAILURE("Invalid magic signature");
+      }
+    } else if (box_index == 1) {
+      // Must be ftyp box.
+      if (memcmp("ftyp", box.type, 4) != 0) {
+        return JXL_FAILURE("Invalid ftyp");
+      }
+      if (box.data_size != 12) return JXL_FAILURE("Invalid ftyp");
+      const char* expected = "jxl \0\0\0\0jxl ";
+      if (memcmp(expected, in, 12) != 0) return JXL_FAILURE("Invalid ftyp");
+    } else if (!memcmp("jxli", box.type, 4)) {
+      // TODO(lode): parse JXL frame index box
+      if (container->codestream) {
+        return JXL_FAILURE("frame index must come before codestream");
+      }
+    } else if (!memcmp("jxlc", box.type, 4)) {
+      container->codestream = in;
+      container->codestream_size = data_size;
+    } else if (!memcmp("Exif", box.type, 4)) {
+      if (data_size < 4) return JXL_FAILURE("Invalid Exif");
+      uint32_t tiff_header_offset = LoadBE32(in);
+      if (tiff_header_offset > data_size - 4)
+        return JXL_FAILURE("Invalid Exif tiff header offset");
+      container->exif = in + 4 + tiff_header_offset;
+      container->exif_size = data_size - 4 - tiff_header_offset;
+    } else if (!memcmp("Exfc", box.type, 4)) {
+      container->exfc = in;
+      container->exfc_size = data_size;
+    } else if (!memcmp("xml ", box.type, 4)) {
+      container->xml.emplace_back(in, data_size);
+    } else if (!memcmp("xmlc", box.type, 4)) {
+      container->xmlc.emplace_back(in, data_size);
+    } else if (!memcmp("jumb", box.type, 4)) {
+      container->jumb = in;
+      container->jumb_size = data_size;
+    } else if (!memcmp("jbrd", box.type, 4)) {
+      container->jpeg_reconstruction = in;
+      container->jpeg_reconstruction_size = data_size;
+    } else {
+      // Do nothing: box not recognized here but may be recognizable by
+      // other software.
+    }
+
+    in += data_size;
+    available_in -= data_size;
+    box_index++;
+  }
+
+  return true;
+}
+
+static jxl::Status AppendBoxAndData(const char type[4], const uint8_t* data,
+                                    size_t data_size, jxl::PaddedBytes* out,
+                                    bool exif = false) {
+  Box box;
+  memcpy(box.type, type, 4);
+  box.data_size = data_size + (exif ? 4 : 0);
+  box.data_size_given = true;
+  JXL_RETURN_IF_ERROR(AppendBoxHeader(box, out));
+  // for Exif: always use tiff header offset 0
+  if (exif)
+    for (int i = 0; i < 4; i++) out->push_back(0);
+  out->append(data, data + data_size);
+  return true;
+}
+
+jxl::Status EncodeJpegXlContainerOneShot(const JpegXlContainer& container,
+                                         jxl::PaddedBytes* out) {
+  const unsigned char header[] = {0,   0,   0,    0xc, 'J', 'X', 'L', ' ',
+                                  0xd, 0xa, 0x87, 0xa, 0,   0,   0,   0x14,
+                                  'f', 't', 'y',  'p', 'j', 'x', 'l', ' ',
+                                  0,   0,   0,    0,   'j', 'x', 'l', ' '};
+  size_t header_size = sizeof(header);
+  out->append(header, header + header_size);
+
+  if (container.exif) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("Exif", container.exif,
+                                         container.exif_size, out, true));
+  }
+
+  if (container.exfc) {
+    JXL_RETURN_IF_ERROR(
+        AppendBoxAndData("Exfc", container.exfc, container.exfc_size, out));
+  }
+
+  for (size_t i = 0; i < container.xml.size(); i++) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("xml ", container.xml[i].first,
+                                         container.xml[i].second, out));
+  }
+
+  for (size_t i = 0; i < container.xmlc.size(); i++) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("xmlc", container.xmlc[i].first,
+                                         container.xmlc[i].second, out));
+  }
+
+  if (container.jpeg_reconstruction) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("jbrd", container.jpeg_reconstruction,
+                                         container.jpeg_reconstruction_size,
+                                         out));
+  }
+
+  if (container.codestream) {
+    JXL_RETURN_IF_ERROR(AppendBoxAndData("jxlc", container.codestream,
+                                         container.codestream_size, out));
+  } else {
+    return JXL_FAILURE("must have primary image frame");
+  }
+
+  if (container.jumb) {
+    JXL_RETURN_IF_ERROR(
+        AppendBoxAndData("jumb", container.jumb, container.jumb_size, out));
+  }
+
+  return true;
+}
+
+// TODO(veluca): the format defined here encode some things multiple times. Fix
+// that.
+jxl::Status DecodeJpegXlToJpeg(jxl::DecompressParams params,
+                               const JpegXlContainer& container,
+                               jxl::CodecInOut* io, jxl::ThreadPool* pool) {
+  params.keep_dct = true;
+  if (container.jpeg_reconstruction == nullptr) {
+    return JXL_FAILURE(
+        "Cannot decode to JPEG without a JPEG reconstruction box");
+  }
+
+  io->Main().jpeg_data = jxl::make_unique<jxl::jpeg::JPEGData>();
+
+  JXL_RETURN_IF_ERROR(DecodeJPEGData(
+      jxl::Span<const uint8_t>(container.jpeg_reconstruction,
+                               container.jpeg_reconstruction_size),
+      io->Main().jpeg_data.get()));
+
+  auto& jpeg_data = io->Main().jpeg_data;
+  bool have_exif = false, have_xmp = false;
+  for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+    if (jpeg_data->app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) {
+      if (have_exif)
+        return JXL_FAILURE("Unexpected: more than one Exif box required?");
+      if (jpeg_data->app_data[i].size() != container.exif_size + 9) {
+        return JXL_FAILURE(
+            "Exif box size does not match JPEG reconstruction data");
+      }
+      have_exif = true;
+      memcpy(&jpeg_data->app_data[i][3 + 6], container.exif,
+             container.exif_size);
+    }
+    if (jpeg_data->app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) {
+      if (have_xmp)
+        return JXL_FAILURE("Unexpected: more than one XMP box required?");
+      if (jpeg_data->app_data[i].size() != container.xml[0].second + 32) {
+        return JXL_FAILURE(
+            "XMP box size does not match JPEG reconstruction data");
+      }
+      have_xmp = true;
+      memcpy(&jpeg_data->app_data[i][3 + 29], container.xml[0].first,
+             container.xml[0].second);
+    }
+  }
+
+  JXL_RETURN_IF_ERROR(DecodeFile(
+      params,
+      jxl::Span<const uint8_t>(container.codestream, container.codestream_size),
+      io, pool));
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/box/box.h b/codec/L2/demos/jxlEnc/third_partys/tools/box/box.h
new file mode 100644
index 0000000000..dfbd88a2c3
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/box/box.h
@@ -0,0 +1,122 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tools for reading from / writing to ISOBMFF format for JPEG XL.
+
+#ifndef TOOLS_BOX_BOX_H_
+#define TOOLS_BOX_BOX_H_
+
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/dec_file.h"
+#include "lib/jxl/enc_file.h"
+
+namespace jpegxl {
+namespace tools {
+
+// A top-level box in the box format.
+struct Box {
+  // The type of the box.
+  // If "uuid", use extended_type instead
+  char type[4];
+
+  // The extended_type is only used when type == "uuid".
+  // Extended types are not used in JXL. However, the box format itself
+  // supports this so they are handled correctly.
+  char extended_type[16];
+
+  // Size of the data, excluding box header. The box ends, and next box
+  // begins, at data + size. May not be used if data_size_given is false.
+  uint64_t data_size;
+
+  // If the size is not given, the datasize extends to the end of the file.
+  // If this field is false, the size field may not be used.
+  bool data_size_given;
+};
+
+// Parses the header of a BMFF box. Returns the result in a Box struct.
+// Updates next_in and available_in to point at the data in the box, directly
+// after the header.
+// Sets the data_size if known, or must be handled by the caller and runs until
+// the end of the container file if not known.
+// NOTE: available_in should be at least 8 up to 32 bytes to parse the
+// header without error.
+jxl::Status ParseBoxHeader(const uint8_t** next_in, size_t* available_in,
+                           Box* box);
+
+// TODO(lode): streaming C API
+jxl::Status AppendBoxHeader(const Box& box, jxl::PaddedBytes* out);
+
+// NOTE: after DecodeJpegXlContainerOneShot, the exif etc. pointers point to
+// regions within the input data passed to that function.
+struct JpegXlContainer {
+  // Exif metadata, or null if not present in the container.
+  // The exif data has the format of 'Exif block' as defined in
+  // ISO/IEC23008-12:2017 Clause A.2.1
+  // Here we assume the tiff header offset is 0 and store only the
+  // actual Exif data (starting with the tiff header MM or II)
+  // TODO(lode): support the theoretical case of multiple exif boxes
+  const uint8_t* exif = nullptr;  // Not owned
+  size_t exif_size = 0;
+
+  // Brotli-compressed exif metadata, if present. The data points to the brotli
+  // compressed stream, it is not decompressed here.
+  const uint8_t* exfc = nullptr;  // Not owned
+  size_t exfc_size = 0;
+
+  // XML boxes for XMP. There may be multiple XML boxes.
+  // Each entry points to XML location and provides size.
+  // The memory is not owned.
+  // TODO(lode): for C API, cannot use std::vector.
+  std::vector<std::pair<const uint8_t*, size_t>> xml;
+
+  // Brotli-compressed xml boxes. The bytes are given in brotli-compressed form
+  // and are not decompressed here.
+  std::vector<std::pair<const uint8_t*, size_t>> xmlc;
+
+  // JUMBF superbox data, or null if not present in the container.
+  // The parsing of the nested boxes inside is not handled here.
+  const uint8_t* jumb = nullptr;  // Not owned
+  size_t jumb_size = 0;
+
+  // TODO(lode): add frame index data
+
+  // JPEG reconstruction data, or null if not present in the container.
+  const uint8_t* jpeg_reconstruction = nullptr;
+  size_t jpeg_reconstruction_size = 0;
+
+  // The main JPEG XL codestream, of which there must be 1 in the container.
+  // TODO(lode): support split codestream: there may be multiple jxlp boxes.
+  const uint8_t* codestream = nullptr;  // Not owned
+  size_t codestream_size = 0;
+};
+
+// Returns whether `data` starts with a container header; definitely returns
+// false if `size` is less than 12 bytes.
+bool IsContainerHeader(const uint8_t* data, size_t size);
+
+// NOTE: the input data must remain valid as long as `container` is used,
+// because its exif etc. pointers point to that data.
+jxl::Status DecodeJpegXlContainerOneShot(const uint8_t* data, size_t size,
+                                         JpegXlContainer* container);
+
+// TODO(lode): streaming C API
+jxl::Status EncodeJpegXlContainerOneShot(const JpegXlContainer& container,
+                                         jxl::PaddedBytes* out);
+
+// TODO(veluca): this doesn't really belong here.
+jxl::Status DecodeJpegXlToJpeg(jxl::DecompressParams params,
+                               const JpegXlContainer& container,
+                               jxl::CodecInOut* io,
+                               jxl::ThreadPool* pool = nullptr);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_BOX_BOX_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/cjxl.cc b/codec/L2/demos/jxlEnc/third_partys/tools/cjxl.cc
new file mode 100644
index 0000000000..7736f75368
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/cjxl.cc
@@ -0,0 +1,855 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/cjxl.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#if JPEGXL_ENABLE_JPEG
+#include "lib/extras/codec_jpg.h"
+#endif
+
+#include "lib/extras/time.h"
+#include "lib/jxl/aux_out.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "tools/args.h"
+#include "tools/box/box.h"
+#include "tools/cpu/cpu.h"
+#include "tools/speed_stats.h"
+
+namespace jpegxl {
+namespace tools {
+namespace {
+
+static inline bool ParseSpeedTier(const char* arg, jxl::SpeedTier* out) {
+  return jxl::ParseSpeedTier(arg, out);
+}
+static inline bool ParseColorTransform(const char* arg,
+                                       jxl::ColorTransform* out) {
+  size_t value = 0;
+  bool ret = ParseUnsigned(arg, &value);
+  if (ret && value > 2) ret = false;
+  if (ret) *out = jxl::ColorTransform(value);
+  return ret;
+}
+static inline bool ParseIntensityTarget(const char* arg, float* out) {
+  return ParseFloat(arg, out) && *out > 0;
+}
+static inline bool ParsePhotonNoiseParameter(const char* arg, float* out) {
+  return strncmp(arg, "ISO", 3) == 0 && ParseFloat(arg + 3, out) && *out > 0;
+}
+
+// Proposes a distance to try for a given bpp target. This could depend
+// on the entropy in the image, too, but let's start with something.
+static double ApproximateDistanceForBPP(double bpp) {
+  return 1.704 * pow(bpp, -0.804);
+}
+
+jxl::Status LoadSaliencyMap(const std::string& filename_heatmap,
+                            jxl::ThreadPool* pool, jxl::ImageF* out_map) {
+  jxl::CodecInOut io_heatmap;
+  if (!SetFromFile(filename_heatmap, &io_heatmap, pool)) {
+    return JXL_FAILURE("Could not load heatmap.");
+  }
+  *out_map = std::move(io_heatmap.Main().color()->Plane(0));
+  return true;
+}
+
+// Search algorithm for modular mode instead of Butteraugli distance.
+void SetModularQualityForBitrate(jxl::ThreadPoolInternal* pool,
+                                 const size_t pixels, const double target_size,
+                                 CompressArgs* args) {
+  JXL_ASSERT(args->params.modular_mode);
+
+  CompressArgs s = *args;  // Args for search.
+  float quality = -100 + target_size * 8.0 / pixels * 50;
+  if (quality > 100.f) quality = 100.f;
+  s.params.target_size = 0;
+  s.params.target_bitrate = 0;
+  double best_loss = 1e99;
+  float best_quality = quality;
+  float best_below = -10000.f;
+  float best_below_size = 0;
+  float best_above = 200.f;
+  float best_above_size = pixels * 15.f;
+
+  jxl::CodecInOut io;
+  double decode_mps = 0;
+
+  if (!LoadAll(*args, pool, &io, &decode_mps)) {
+    s.params.quality_pair = std::make_pair(quality, quality);
+    printf("couldn't load image\n");
+    return;
+  }
+
+  for (int i = 0; i < 10; ++i) {
+    s.params.quality_pair = std::make_pair(quality, quality);
+    jxl::PaddedBytes candidate;
+    bool ok =
+        CompressJxl(io, decode_mps, pool, s, &candidate, /*print_stats=*/false);
+    if (!ok) {
+      printf(
+          "Compression error occurred during the search for best size."
+          " Trying with quality %.1f\n",
+          quality);
+      break;
+    }
+    printf("Quality %.2f yields %6zu bytes, %.3f bpp.\n", quality,
+           candidate.size(), candidate.size() * 8.0 / pixels);
+    const double ratio = static_cast<double>(candidate.size()) / target_size;
+    const double loss = std::abs(1.0 - ratio);
+    if (best_loss > loss) {
+      best_quality = quality;
+      best_loss = loss;
+      if (loss < 0.01f) break;
+    }
+    if (quality == 100.f && ratio < 1.f) break;  // can't spend more bits
+    if (ratio > 1.f && quality < best_above) {
+      best_above = quality;
+      best_above_size = candidate.size();
+    }
+    if (ratio < 1.f && quality > best_below) {
+      best_below = quality;
+      best_below_size = candidate.size();
+    }
+    float t =
+        (target_size - best_below_size) / (best_above_size - best_below_size);
+    if (best_above > 100.f && ratio < 1.f) {
+      quality = (quality + 105) / 2;
+    } else if (best_above - best_below > 1000 && ratio > 1.f) {
+      quality -= 1000;
+    } else {
+      quality = best_above * t + best_below * (1.f - t);
+    }
+    if (quality >= 100.f) quality = 100.f;
+  }
+  args->params.quality_pair = std::make_pair(best_quality, best_quality);
+  args->params.target_bitrate = 0;
+  args->params.target_size = 0;
+}
+
+void SetParametersForSizeOrBitrate(jxl::ThreadPoolInternal* pool,
+                                   const size_t pixels, CompressArgs* args) {
+  CompressArgs s = *args;  // Args for search.
+
+  // If fixed size, convert to bitrate.
+  if (s.params.target_size > 0) {
+    s.params.target_bitrate = s.params.target_size * 8.0 / pixels;
+    s.params.target_size = 0;
+  }
+  const double target_size = s.params.target_bitrate * (1 / 8.) * pixels;
+
+  if (args->params.modular_mode) {
+    SetModularQualityForBitrate(pool, pixels, target_size, args);
+    return;
+  }
+
+  double dist = ApproximateDistanceForBPP(s.params.target_bitrate);
+  s.params.target_bitrate = 0;
+  double best_dist = 1.0;
+  double best_loss = 1e99;
+
+  jxl::CodecInOut io;
+  double decode_mps = 0;
+  if (!LoadAll(*args, pool, &io, &decode_mps)) {
+    s.params.butteraugli_distance = static_cast<float>(dist);
+    printf("couldn't load image\n");
+    return;
+  }
+
+  for (int i = 0; i < 7; ++i) {
+    s.params.butteraugli_distance = static_cast<float>(dist);
+    jxl::PaddedBytes candidate;
+    bool ok =
+        CompressJxl(io, decode_mps, pool, s, &candidate, /*print_stats=*/false);
+    if (!ok) {
+      printf(
+          "Compression error occurred during the search for best size. "
+          "Trying with butteraugli distance %.15g\n",
+          best_dist);
+      break;
+    }
+    printf("Butteraugli distance %.3f yields %6zu bytes, %.3f bpp.\n", dist,
+           candidate.size(), candidate.size() * 8.0 / pixels);
+    const double ratio = static_cast<double>(candidate.size()) / target_size;
+    const double loss = std::max(ratio, 1.0 / std::max(ratio, 1e-30));
+    if (best_loss > loss) {
+      best_dist = dist;
+      best_loss = loss;
+    }
+    dist *= ratio;
+    if (dist < 0.01) {
+      dist = 0.01;
+    }
+    if (dist >= 16.0) {
+      dist = 16.0;
+    }
+  }
+  args->params.butteraugli_distance = static_cast<float>(best_dist);
+  args->params.target_bitrate = 0;
+  args->params.target_size = 0;
+}
+
+const char* ModeFromArgs(const CompressArgs& args) {
+  if (args.jpeg_transcode) return "JPEG";
+  if (args.params.modular_mode) return "Modular";
+  return "VarDCT";
+}
+
+std::string QualityFromArgs(const CompressArgs& args) {
+  char buf[100];
+  if (args.jpeg_transcode) {
+    snprintf(buf, sizeof(buf), "lossless transcode");
+  } else if (args.params.modular_mode) {
+    if (args.params.quality_pair.first == 100 &&
+        args.params.quality_pair.second == 100) {
+      snprintf(buf, sizeof(buf), "lossless");
+    } else if (args.params.quality_pair.first !=
+               args.params.quality_pair.second) {
+      snprintf(buf, sizeof(buf), "Q%.2f,%.2f", args.params.quality_pair.first,
+               args.params.quality_pair.second);
+    } else {
+      snprintf(buf, sizeof(buf), "Q%.2f", args.params.quality_pair.first);
+    }
+  } else {
+    snprintf(buf, sizeof(buf), "d%.3f", args.params.butteraugli_distance);
+  }
+  return buf;
+}
+
+void PrintMode(jxl::ThreadPoolInternal* pool, const jxl::CodecInOut& io,
+               const double decode_mps, const CompressArgs& args) {
+  const char* mode = ModeFromArgs(args);
+  const char* speed = SpeedTierName(args.params.speed_tier);
+  const std::string quality = QualityFromArgs(args);
+  fprintf(stderr,
+          "Read %zux%zu image, %.1f MP/s\n"
+          "Encoding [%s%s, %s, %s",
+          io.xsize(), io.ysize(), decode_mps,
+          (args.use_container ? "Container | " : ""), mode, quality.c_str(),
+          speed);
+  if (args.use_container) {
+    if (args.jpeg_transcode) fprintf(stderr, " | JPEG reconstruction data");
+    if (!io.blobs.exif.empty())
+      fprintf(stderr, " | %zu-byte Exif", io.blobs.exif.size());
+    if (!io.blobs.xmp.empty())
+      fprintf(stderr, " | %zu-byte XMP", io.blobs.xmp.size());
+    if (!io.blobs.jumbf.empty())
+      fprintf(stderr, " | %zu-byte JUMBF", io.blobs.jumbf.size());
+  }
+  fprintf(stderr, "], %zu threads.\n", pool->NumWorkerThreads());
+}
+
+}  // namespace
+
+void CompressArgs::AddCommandLineOptions(CommandLineParser* cmdline) {
+  // Positional arguments.
+  cmdline->AddPositionalOption("INPUT", /* required = */ true,
+                               "the input can be PNG"
+#if JPEGXL_ENABLE_APNG
+                               ", APNG"
+#endif
+#if JPEGXL_ENABLE_GIF
+                               ", GIF"
+#endif
+#if JPEGXL_ENABLE_JPEG
+                               ", JPEG"
+#endif
+#if JPEGXL_ENABLE_EXR
+                               ", EXR"
+#endif
+                               ", PPM, PFM, or PGX",
+                               &file_in);
+  cmdline->AddPositionalOption(
+      "OUTPUT", /* required = */ true,
+      "the compressed JXL output file (can be omitted for benchmarking)",
+      &file_out);
+
+  // Flags.
+  // TODO(lode): also add options to add exif/xmp/other metadata in the
+  // container.
+  // TODO(lode): decide on good name for this flag: box, container, bmff, ...
+  cmdline->AddOptionFlag(
+      '\0', "container",
+      "Always encode using container format (default: only if needed)",
+      &use_container, &SetBooleanTrue, 1);
+
+  cmdline->AddOptionFlag('\0', "strip",
+                         "Do not encode using container format (strips "
+                         "Exif/XMP/JPEG bitstream reconstruction data)",
+                         &no_container, &SetBooleanTrue, 2);
+
+  // Target distance/size/bpp
+  opt_distance_id = cmdline->AddOptionValue(
+      'd', "distance", "maxError",
+      ("Max. butteraugli distance, lower = higher quality. Range: 0 .. 15.\n"
+       "    0.0 = mathematically lossless. Default for already-lossy input "
+       "(JPEG/GIF).\n"
+       "    1.0 = visually lossless. Default for other input.\n"
+       "    Recommended range: 0.5 .. 3.0."),
+      &params.butteraugli_distance, &ParseFloat);
+  opt_target_size_id = cmdline->AddOptionValue(
+      '\0', "target_size", "N",
+      ("Aim at file size of N bytes.\n"
+       "    Compresses to 1 % of the target size in ideal conditions.\n"
+       "    Runs the same algorithm as --target_bpp"),
+      &params.target_size, &ParseUnsigned, 1);
+  opt_target_bpp_id = cmdline->AddOptionValue(
+      '\0', "target_bpp", "BPP",
+      ("Aim at file size that has N bits per pixel.\n"
+       "    Compresses to 1 % of the target BPP in ideal conditions."),
+      &params.target_bitrate, &ParseFloat, 1);
+
+  // High-level options
+  opt_quality_id = cmdline->AddOptionValue(
+      'q', "quality", "QUALITY",
+      "Quality setting (is remapped to --distance). Range: -inf .. 100.\n"
+      "    100 = mathematically lossless. Default for already-lossy input "
+      "(JPEG/GIF).\n    Positive quality values roughly match libjpeg quality.",
+      &quality, &ParseFloat);
+
+  cmdline->AddOptionValue(
+      'e', "effort", "EFFORT",
+      "Encoder effort setting. Range: 1 .. 9.\n"
+      "    Default: 7. Higher number is more effort (slower).",
+      &params.speed_tier, &ParseSpeedTier);
+
+  cmdline->AddOptionValue(
+      's', "speed", "ANIMAL",
+      "Deprecated synonym for --effort. Valid values are:\n"
+      "    lightning (1), thunder, falcon, cheetah, hare, wombat, squirrel, "
+      "kitten, tortoise (9)\n"
+      "    Default: squirrel. Values are in order from faster to slower.\n",
+      &params.speed_tier, &ParseSpeedTier, 2);
+
+  cmdline->AddOptionValue('\0', "faster_decoding", "AMOUNT",
+                          "Favour higher decoding speed. 0 = default, higher "
+                          "values give higher speed at the expense of quality",
+                          &params.decoding_speed_tier, &ParseUnsigned, 2);
+
+  cmdline->AddOptionFlag('p', "progressive",
+                         "Enable progressive/responsive decoding.",
+                         &progressive, &SetBooleanTrue);
+
+  cmdline->AddOptionFlag('\0', "premultiply",
+                         "Force premultiplied (associated) alpha.",
+                         &force_premultiplied, &SetBooleanTrue, 1);
+  cmdline->AddOptionValue('\0', "keep_invisible", "0|1",
+                          "force disable/enable preserving color of invisible "
+                          "pixels (default: 1 if lossless, 0 if lossy).",
+                          &params.keep_invisible, &ParseOverride, 1);
+
+  cmdline->AddOptionFlag('\0', "centerfirst",
+                         "Put center groups first in the compressed file.",
+                         &params.centerfirst, &SetBooleanTrue, 1);
+
+  cmdline->AddOptionValue('\0', "center_x", "0..XSIZE",
+                          "Put center groups first in the compressed file.",
+                          &params.center_x, &ParseUnsigned, 1);
+  cmdline->AddOptionValue('\0', "center_y", "0..YSIZE",
+                          "Put center groups first in the compressed file.",
+                          &params.center_y, &ParseUnsigned, 1);
+
+  // Flags.
+  cmdline->AddOptionFlag('\0', "progressive_ac",
+                         "Use the progressive mode for AC.",
+                         &params.progressive_mode, &SetBooleanTrue, 1);
+  cmdline->AddOptionFlag('\0', "qprogressive_ac",
+                         "Use the progressive mode for AC.",
+                         &params.qprogressive_mode, &SetBooleanTrue, 1);
+  cmdline->AddOptionValue('\0', "progressive_dc", "num_dc_frames",
+                          "Use progressive mode for DC.",
+                          &params.progressive_dc, &ParseSigned, 1);
+  cmdline->AddOptionFlag('m', "modular",
+                         "Use the modular mode (lossy / lossless).",
+                         &params.modular_mode, &SetBooleanTrue, 1);
+  cmdline->AddOptionFlag('\0', "use_new_heuristics",
+                         "use new and not yet ready encoder heuristics",
+                         &params.use_new_heuristics, &SetBooleanTrue, 2);
+
+  // JPEG modes: parallel Brunsli, pixels to JPEG, or JPEG to Brunsli
+  cmdline->AddOptionFlag('j', "jpeg_transcode",
+                         "Do lossy transcode of input JPEG file (decode to "
+                         "pixels instead of doing lossless transcode).",
+                         &jpeg_transcode, &SetBooleanFalse, 1);
+
+  opt_num_threads_id = cmdline->AddOptionValue(
+      '\0', "num_threads", "N", "number of worker threads (zero = none).",
+      &num_threads, &ParseUnsigned, 1);
+  cmdline->AddOptionValue('\0', "num_reps", "N", "how many times to compress.",
+                          &num_reps, &ParseUnsigned, 1);
+
+  cmdline->AddOptionValue('\0', "noise", "0|1",
+                          "force disable/enable noise generation.",
+                          &params.noise, &ParseOverride, 1);
+  cmdline->AddOptionValue(
+      '\0', "photon_noise", "ISO3200",
+      "Set the noise to approximately what it would be at a given nominal "
+      "exposure on a 35mm camera. For formats other than 35mm, or when the "
+      "whole sensor was not used, you can multiply the ISO value by the "
+      "equivalence ratio squared, for example by 2.25 for an APS-C camera.",
+      &params.photon_noise_iso, &ParsePhotonNoiseParameter, 0);
+  cmdline->AddOptionValue('\0', "dots", "0|1",
+                          "force disable/enable dots generation.", &params.dots,
+                          &ParseOverride, 1);
+  cmdline->AddOptionValue('\0', "patches", "0|1",
+                          "force disable/enable patches generation.",
+                          &params.patches, &ParseOverride, 1);
+  cmdline->AddOptionValue('\0', "resampling", "1|2|4|8",
+                          "Subsample all color channels by this factor.",
+                          &params.resampling, &ParseUnsigned, 1);
+  cmdline->AddOptionValue(
+      '\0', "ec_resampling", "1|2|4|8",
+      "Subsample all extra channels by this factor. If this value is smaller "
+      "than the resampling of color channels, it will be increased to match.",
+      &params.ec_resampling, &ParseUnsigned, 2);
+  cmdline->AddOptionFlag('\0', "already_downsampled",
+                         "Do not downsample the given input before encoding, "
+                         "but still signal that the decoder should upsample.",
+                         &params.already_downsampled, &SetBooleanTrue, 2);
+
+  cmdline->AddOptionValue(
+      '\0', "epf", "-1..3",
+      "Edge preserving filter level (-1 = choose based on quality, default)",
+      &params.epf, &ParseSigned, 1);
+
+  cmdline->AddOptionValue('\0', "gaborish", "0|1",
+                          "force disable/enable gaborish.", &params.gaborish,
+                          &ParseOverride, 1);
+
+  opt_intensity_target_id = cmdline->AddOptionValue(
+      '\0', "intensity_target", "N",
+      ("Intensity target of monitor in nits, higher\n"
+       "   results in higher quality image. Must be strictly positive.\n"
+       "   Default is 255 for standard images, 4000 for input images known to\n"
+       "   to have PQ or HLG transfer function."),
+      &intensity_target, &ParseIntensityTarget, 1);
+
+  cmdline->AddOptionValue('\0', "saliency_num_progressive_steps", "N", nullptr,
+                          &params.saliency_num_progressive_steps,
+                          &ParseUnsigned, 2);
+  cmdline->AddOptionValue('\0', "saliency_map_filename", "STRING", nullptr,
+                          &saliency_map_filename, &ParseString, 2);
+  cmdline->AddOptionValue('\0', "saliency_threshold", "0..1", nullptr,
+                          &params.saliency_threshold, &ParseFloat, 2);
+  
+  cmdline->AddOptionValue('\0', "xclbin", "STRING", nullptr,
+                          &file_xclbin, &ParseString, 2);
+
+  cmdline->AddOptionValue(
+      'x', "dec-hints", "key=value",
+      "color_space indicates the ColorEncoding, see Description();\n"
+      "icc_pathname refers to a binary file containing an ICC profile.",
+      &dec_hints, &ParseAndAppendKeyValue, 1);
+
+  cmdline->AddOptionValue(
+      '\0', "override_bitdepth", "0=use from image, 1-32=override",
+      "If nonzero, store the given bit depth in the JPEG XL file metadata"
+      " (1-32), instead of using the bit depth from the original input"
+      " image.",
+      &override_bitdepth, &ParseUnsigned, 2);
+
+  opt_color_id = cmdline->AddOptionValue(
+      'c', "colortransform", "0..2", "0=XYB, 1=None, 2=YCbCr",
+      &params.color_transform, &ParseColorTransform, 2);
+
+  // modular mode options
+  cmdline->AddOptionValue(
+      'Q', "mquality", "luma_q[,chroma_q]",
+      "[modular encoding] lossy 'quality' (100=lossless, lower is more lossy)",
+      &params.quality_pair, &ParseFloatPair, 1);
+
+  cmdline->AddOptionValue(
+      'I', "iterations", "F",
+      "[modular encoding] fraction of pixels used to learn MA trees "
+      "(default=0.5, try 0 for no MA and fast decode)",
+      &params.options.nb_repeats, &ParseFloat, 2);
+
+  cmdline->AddOptionValue(
+      'C', "colorspace", "K",
+      ("[modular encoding] color transform: 0=RGB, 1=YCoCg, "
+       "2-37=RCT (default: try several, depending on speed)"),
+      &params.colorspace, &ParseSigned, 1);
+
+  opt_m_group_size_id = cmdline->AddOptionValue(
+      'g', "group-size", "K",
+      ("[modular encoding] set group size to 128 << K "
+       "(default: 1 or 2)"),
+      &params.modular_group_size_shift, &ParseUnsigned, 1);
+
+  cmdline->AddOptionValue(
+      'P', "predictor", "K",
+      "[modular encoding] predictor(s) to use: 0=zero, "
+      "1=left, 2=top, 3=avg0, 4=select, 5=gradient, 6=weighted, "
+      "7=topright, 8=topleft, 9=leftleft, 10=avg1, 11=avg2, 12=avg3, "
+      "13=toptop predictive average "
+      "14=mix 5 and 6, 15=mix everything. Default 14, at slowest speed "
+      "default 15",
+      &params.options.predictor, &ParsePredictor, 1);
+
+  cmdline->AddOptionValue(
+      'E', "extra-properties", "K",
+      "[modular encoding] number of extra MA tree properties to use",
+      &params.options.max_properties, &ParseSigned, 2);
+
+  cmdline->AddOptionValue('\0', "palette", "K",
+                          "[modular encoding] use a palette if image has at "
+                          "most K colors (default: 1024)",
+                          &params.palette_colors, &ParseSigned, 1);
+
+  cmdline->AddOptionFlag(
+      '\0', "lossy-palette",
+      "[modular encoding] quantize to a palette that has fewer entries than "
+      "would be necessary for perfect preservation; for the time being, it is "
+      "recommended to set --palette=0 with this option to use the default "
+      "palette only",
+      &params.lossy_palette, &SetBooleanTrue, 1);
+
+  cmdline->AddOptionValue(
+      'X', "pre-compact", "PERCENT",
+      ("[modular encoding] compact channels (globally) if ratio "
+       "used/range is below this (default: 80%)"),
+      &params.channel_colors_pre_transform_percent, &ParseFloat, 2);
+
+  cmdline->AddOptionValue(
+      'Y', "post-compact", "PERCENT",
+      ("[modular encoding] compact channels (per-group) if ratio "
+       "used/range is below this (default: 80%)"),
+      &params.channel_colors_percent, &ParseFloat, 2);
+
+  cmdline->AddOptionValue('R', "responsive", "K",
+                          "[modular encoding] do Squeeze transform, 0=false, "
+                          "1=true (default: true if lossy, false if lossless)",
+                          &params.responsive, &ParseSigned, 1);
+
+  cmdline->AddOptionFlag('V', "version", "Print version number and exit",
+                         &version, &SetBooleanTrue, 1);
+  cmdline->AddOptionFlag('\0', "quiet", "Be more silent", &quiet,
+                         &SetBooleanTrue, 1);
+  cmdline->AddOptionValue('\0', "print_profile", "0|1",
+                          "Print timing information before exiting",
+                          &print_profile, &ParseOverride, 1);
+
+  cmdline->AddOptionFlag(
+      'v', "verbose",
+      "Verbose output; can be repeated, also applies to help (!).",
+      &params.verbose, &SetBooleanTrue);
+}
+
+jxl::Status CompressArgs::ValidateArgs(const CommandLineParser& cmdline) {
+  params.file_in = file_in;
+  params.file_out = file_out;
+
+  if (file_in == nullptr) {
+    fprintf(stderr, "Missing INPUT filename.\n");
+    return false;
+  }
+
+  bool got_distance = cmdline.GetOption(opt_distance_id)->matched();
+  bool got_target_size = cmdline.GetOption(opt_target_size_id)->matched();
+  bool got_target_bpp = cmdline.GetOption(opt_target_bpp_id)->matched();
+  bool got_quality = cmdline.GetOption(opt_quality_id)->matched();
+  bool got_intensity_target =
+      cmdline.GetOption(opt_intensity_target_id)->matched();
+
+  if (got_quality) {
+    default_settings = false;
+    if (quality < 100) jpeg_transcode = false;
+    // Quality settings roughly match libjpeg qualities.
+    if (quality < 7 || quality == 100 || params.modular_mode) {
+      if (jpeg_transcode == false) params.modular_mode = true;
+      // Internal modular quality to roughly match VarDCT size.
+      if (quality < 7) {
+        params.quality_pair.first = params.quality_pair.second =
+            std::min(35 + (quality - 7) * 3.0f, 100.0f);
+      } else {
+        params.quality_pair.first = params.quality_pair.second =
+            std::min(35 + (quality - 7) * 65.f / 93.f, 100.0f);
+      }
+    } else {
+      if (quality >= 30) {
+        params.butteraugli_distance = 0.1 + (100 - quality) * 0.09;
+      } else {
+        params.butteraugli_distance =
+            6.4 + pow(2.5, (30 - quality) / 5.0f) / 6.25f;
+      }
+    }
+  }
+  if (params.resampling > 1 && !params.already_downsampled)
+    jpeg_transcode = false;
+
+  if (progressive) {
+    params.qprogressive_mode = true;
+    params.responsive = 1;
+    default_settings = false;
+  }
+  if (got_target_size || got_target_bpp || got_intensity_target) {
+    default_settings = false;
+  }
+
+  if (params.progressive_dc < -1 || params.progressive_dc > 2) {
+    fprintf(stderr, "Invalid/out of range progressive_dc (%d), try -1 to 2.\n",
+            params.progressive_dc);
+    return false;
+  }
+
+  if (got_distance) {
+    constexpr float butteraugli_min_dist = 0.1f;
+    constexpr float butteraugli_max_dist = 15.0f;
+    if (!(0 <= params.butteraugli_distance &&
+          params.butteraugli_distance <= butteraugli_max_dist)) {
+      fprintf(stderr, "Invalid/out of range distance, try 0 to %g.\n",
+              butteraugli_max_dist);
+      return false;
+    }
+    if (params.butteraugli_distance > 0) jpeg_transcode = false;
+    if (params.butteraugli_distance == 0) {
+      // Use modular for lossless.
+      if (jpeg_transcode == false) params.modular_mode = true;
+    } else if (params.butteraugli_distance < butteraugli_min_dist) {
+      params.butteraugli_distance = butteraugli_min_dist;
+    }
+    default_settings = false;
+  }
+
+  if (got_target_bpp + got_target_size + got_distance + got_quality > 1) {
+    fprintf(stderr,
+            "You can specify only one of '--distance', '-q', "
+            "'--target_bpp' and '--target_size'. They are all different ways"
+            " to specify the image quality. When in doubt, use --distance."
+            " It gives the most visually consistent results.\n");
+    return false;
+  }
+
+  if (!saliency_map_filename.empty()) {
+    if (!params.progressive_mode) {
+      saliency_map_filename.clear();
+      fprintf(stderr,
+              "Warning: Specifying --saliency_map_filename only makes sense "
+              "for --progressive_ac mode.\n");
+    }
+  }
+
+  if (!params.file_in) {
+    fprintf(stderr, "Missing input filename.\n");
+    return false;
+  }
+
+  if (!cmdline.GetOption(opt_color_id)->matched()) {
+    // default to RGB for lossless modular
+    if (params.modular_mode) {
+      if (params.quality_pair.first != 100 ||
+          params.quality_pair.second != 100) {
+        params.color_transform = jxl::ColorTransform::kXYB;
+      } else {
+        params.color_transform = jxl::ColorTransform::kNone;
+      }
+    }
+  }
+
+  if (override_bitdepth > 32) {
+    fprintf(stderr, "override_bitdepth must be <= 32\n");
+    return false;
+  }
+
+  if (params.epf > 3) {
+    fprintf(stderr, "--epf must be in the 0..3 range\n");
+    return false;
+  }
+
+  // User didn't override num_threads, so we have to compute a default, which
+  // might fail, so only do so when necessary. Don't just check num_threads != 0
+  // because the user may have set it to that.
+  if (!cmdline.GetOption(opt_num_threads_id)->matched()) {
+    cpu::ProcessorTopology topology;
+    if (!cpu::DetectProcessorTopology(&topology)) {
+      // We have seen sporadic failures caused by setaffinity_np.
+      fprintf(stderr,
+              "Failed to choose default num_threads; you can avoid this "
+              "error by specifying a --num_threads N argument.\n");
+      return false;
+    }
+    num_threads = topology.packages * topology.cores_per_package;
+  }
+
+  return true;
+}
+
+jxl::Status CompressArgs::ValidateArgsAfterLoad(
+    const CommandLineParser& cmdline, const jxl::CodecInOut& io) {
+  if (!ValidateArgs(cmdline)) return false;
+  bool got_m_group_size = cmdline.GetOption(opt_m_group_size_id)->matched();
+  if (params.modular_mode && !got_m_group_size) {
+    // Default modular group size: set to 512 if 256 would be silly
+    const size_t kThinImageThr = 256 + 64;
+    const size_t kSmallImageThr = 256 + 128;
+    if (io.xsize() < kThinImageThr || io.ysize() < kThinImageThr ||
+        (io.xsize() < kSmallImageThr && io.ysize() < kSmallImageThr)) {
+      params.modular_group_size_shift = 2;
+    }
+  }
+  if (!io.blobs.exif.empty() || !io.blobs.xmp.empty() ||
+      !io.blobs.jumbf.empty() || !io.blobs.iptc.empty() || jpeg_transcode) {
+    use_container = true;
+  }
+  if (no_container) use_container = false;
+  if (jpeg_transcode && params.modular_mode) {
+    fprintf(stderr,
+            "Error: cannot do lossless JPEG transcode in modular mode.\n");
+    return false;
+  }
+  if (jpeg_transcode) {
+    if (params.progressive_mode || params.qprogressive_mode ||
+        params.progressive_dc > 0) {
+      fprintf(stderr,
+              "Error: progressive lossless JPEG transcode is not yet "
+              "implemented.\n");
+      return false;
+    }
+  }
+  return true;
+}
+
+jxl::Status LoadAll(CompressArgs& args, jxl::ThreadPoolInternal* pool,
+                    jxl::CodecInOut* io, double* decode_mps) {
+  const double t0 = jxl::Now();
+
+  io->target_nits = args.intensity_target;
+  io->dec_hints = args.dec_hints;
+  io->dec_target = (args.jpeg_transcode ? jxl::DecodeTarget::kQuantizedCoeffs
+                                        : jxl::DecodeTarget::kPixels);
+  jxl::Codec input_codec;
+  if (!SetFromFile(args.params.file_in, io, nullptr, &input_codec)) {
+    fprintf(stderr, "Failed to read image %s.\n", args.params.file_in);
+    return false;
+  }
+  if (input_codec != jxl::Codec::kJPG) args.jpeg_transcode = false;
+  if (args.jpeg_transcode) args.params.butteraugli_distance = 0;
+
+  if (input_codec == jxl::Codec::kGIF && args.default_settings) {
+    args.params.modular_mode = true;
+    args.params.quality_pair.first = args.params.quality_pair.second = 100;
+  }
+  if (args.params.modular_mode && args.params.quality_pair.first < 100) {
+    if (io->metadata.m.bit_depth.floating_point_sample) {
+      // for lossy modular, pretend pfm/exr is integer data
+      io->metadata.m.SetUintSamples(12);
+    }
+  }
+  if (args.override_bitdepth != 0) {
+    if (args.override_bitdepth == 32) {
+      io->metadata.m.SetFloat32Samples();
+    } else {
+      io->metadata.m.SetUintSamples(args.override_bitdepth);
+    }
+  }
+  if (args.force_premultiplied) {
+    io->PremultiplyAlpha();
+  }
+
+  jxl::ImageF saliency_map;
+  if (!args.saliency_map_filename.empty()) {
+    if (!LoadSaliencyMap(args.saliency_map_filename, pool, &saliency_map)) {
+      fprintf(stderr, "Failed to read saliency map %s.\n",
+              args.saliency_map_filename.c_str());
+      return false;
+    }
+    args.params.saliency_map = &saliency_map;
+  }
+
+  const double t1 = jxl::Now();
+  const size_t pixels = io->xsize() * io->ysize();
+  *decode_mps = pixels * io->frames.size() * 1E-6 / (t1 - t0);
+
+  return true;
+}
+
+jxl::Status CompressJxl(jxl::CodecInOut& io, double decode_mps,
+                        jxl::ThreadPoolInternal* pool, CompressArgs& args,
+                        jxl::PaddedBytes* compressed, bool print_stats) {
+  JXL_CHECK(pool);
+
+  const size_t pixels = io.xsize() * io.ysize();
+
+  if (args.params.target_size > 0 || args.params.target_bitrate > 0) {
+    // Slow iterative search for parameters that reach target bpp / size.
+    SetParametersForSizeOrBitrate(pool, pixels, &args);
+  }
+
+  if (print_stats) PrintMode(pool, io, decode_mps, args);
+
+  // Final/actual compression run (possibly repeated for benchmarking).
+  jxl::AuxOut aux_out;
+  if (args.inspector_image3f) {
+    aux_out.SetInspectorImage3F(args.inspector_image3f);
+  }
+  SpeedStats stats;
+  jxl::PassesEncoderState passes_encoder_state;
+  if (args.params.use_new_heuristics) {
+    passes_encoder_state.heuristics =
+        jxl::make_unique<jxl::FastEncoderHeuristics>();
+  }
+  std::string xclbinPath = args.file_xclbin;
+  for (size_t i = 0; i < args.num_reps; ++i) {
+    const double t0 = jxl::Now();
+    jxl::Status ok = false;
+    if (io.Main().IsJPEG()) {
+      // TODO(lode): automate this in the encoder. The encoder must in the
+      // beginning choose to either do all in xyb, or all in non-xyb, write
+      // that in the xyb_encoded header flag, and persistently keep that state
+      // to check if every frame uses an allowed color transform.
+      args.params.color_transform = io.Main().color_transform;
+    }
+    ok = EncodeFile(args.params, &io, &passes_encoder_state, compressed,
+                    &aux_out, pool, xclbinPath);
+    if (!ok) {
+      fprintf(stderr, "Failed to compress to %s.\n", ModeFromArgs(args));
+      return false;
+    }
+    const double t1 = jxl::Now();
+    stats.NotifyElapsed(t1 - t0);
+    stats.SetImageSize(io.xsize(), io.ysize());
+  }
+
+  if (print_stats) {
+    const double bpp =
+        static_cast<double>(compressed->size() * jxl::kBitsPerByte) / pixels;
+    fprintf(stderr, "Compressed to %zu bytes (%.3f bpp%s).\n",
+            compressed->size(), bpp / io.frames.size(),
+            io.frames.size() == 1 ? "" : "/frame");
+    JXL_CHECK(stats.Print(args.num_threads));
+    if (args.params.verbose) {
+      aux_out.Print(1);
+    }
+  }
+
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/cjxl.h b/codec/L2/demos/jxlEnc/third_partys/tools/cjxl.h
new file mode 100644
index 0000000000..ba3d7978a2
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/cjxl.h
@@ -0,0 +1,105 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_CJXL_H_
+#define TOOLS_CJXL_H_
+
+#include <stddef.h>
+
+#include <string>
+#include <utility>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/base/thread_pool_internal.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/jxl_inspection.h"
+#include "tools/cmdline.h"
+
+namespace jpegxl {
+namespace tools {
+
+struct CompressArgs {
+  void SetInspectorImage3F(const jxl::InspectorImage3F& inspector) {
+    inspector_image3f = inspector;
+  }
+
+  // Add all the command line options to the CommandLineParser. Note that the
+  // options are tied to the instance that this was called on.
+  void AddCommandLineOptions(CommandLineParser* cmdline);
+
+  // Post-processes and validates the passed arguments, checking whether all
+  // passed options are compatible. Returns whether the validation was
+  // successful.
+  jxl::Status ValidateArgs(const CommandLineParser& cmdline);
+
+  // Validates the arguments again, having loaded the input so sensible defaults
+  // can be chosen based on e.g. dimensions.
+  jxl::Status ValidateArgsAfterLoad(const CommandLineParser& cmdline,
+                                    const jxl::CodecInOut& io);
+
+  // Common flags.
+  bool version = false;
+  bool use_container = false;
+  bool no_container = false;
+  bool quiet = false;
+
+  const char* file_in = nullptr;
+  const char* file_out = nullptr;
+  jxl::Override print_profile = jxl::Override::kDefault;
+
+  // JXL flags
+  jxl::DecoderHints dec_hints;
+  size_t override_bitdepth = 0;
+  jxl::CompressParams params;
+  size_t num_threads;
+  size_t num_reps = 1;
+  float intensity_target = 0;
+
+  // Filename for the user provided saliency-map.
+  std::string saliency_map_filename;
+  std::string file_xclbin;
+
+  // Whether to perform lossless transcoding with kVarDCT or kJPEG encoding.
+  // If true, attempts to load JPEG coefficients instead of pixels.
+  // Reset to false if input image is not a JPEG.
+  bool jpeg_transcode = true;
+
+  float quality = -1001.f;  // Default to lossless if input is already lossy,
+                            // or to VarDCT otherwise.
+  bool progressive = false;
+  bool default_settings = true;
+  bool force_premultiplied = false;
+
+  // Will get passed on to AuxOut.
+  jxl::InspectorImage3F inspector_image3f;
+
+  // References (ids) of specific options to check if they were matched.
+  CommandLineParser::OptionId opt_num_threads_id = -1;
+  CommandLineParser::OptionId opt_distance_id = -1;
+  CommandLineParser::OptionId opt_target_size_id = -1;
+  CommandLineParser::OptionId opt_target_bpp_id = -1;
+  CommandLineParser::OptionId opt_quality_id = -1;
+  CommandLineParser::OptionId opt_near_lossless_id = -1;
+  CommandLineParser::OptionId opt_intensity_target_id = -1;
+  CommandLineParser::OptionId opt_color_id = -1;
+  CommandLineParser::OptionId opt_m_group_size_id = -1;
+};
+
+jxl::Status LoadAll(CompressArgs& args, jxl::ThreadPoolInternal* pool,
+                    jxl::CodecInOut* io, double* decode_mps);
+
+// The input image must already have been loaded into io using LoadAll.
+jxl::Status CompressJxl(jxl::CodecInOut& io, double decode_mps,
+                        jxl::ThreadPoolInternal* pool, CompressArgs& args,
+                        jxl::PaddedBytes* compressed, bool print_stats = true);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_CJXL_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc b/codec/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc
new file mode 100644
index 0000000000..0e0f325ac9
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/cjxl_main.cc
@@ -0,0 +1,134 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include "jxl/encode.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "tools/box/box.h"
+#include "tools/cjxl.h"
+#include "tools/codec_config.h"
+
+namespace jpegxl {
+namespace tools {
+
+int CompressJpegXlMain(int argc, const char* argv[]) {
+  CommandLineParser cmdline;
+  CompressArgs args;
+  args.AddCommandLineOptions(&cmdline);
+
+  if (!cmdline.Parse(argc, argv)) {
+    // Parse already printed the actual error cause.
+    fprintf(stderr, "Use '%s -h' for more information\n", argv[0]);
+    return 1;
+  }
+
+  if (args.version) {
+    fprintf(stdout, "cjxl %s\n",
+            CodecConfigString(JxlEncoderVersion()).c_str());
+    fprintf(stdout, "Copyright (c) the JPEG XL Project\n");
+    return 0;
+  }
+
+  if (!args.quiet) {
+    fprintf(stderr, "JPEG XL encoder %s\n",
+            CodecConfigString(JxlEncoderVersion()).c_str());
+  }
+
+  if (cmdline.HelpFlagPassed()) {
+    cmdline.PrintHelp();
+    return 0;
+  }
+
+  if (!args.ValidateArgs(cmdline)) {
+    // ValidateArgs already printed the actual error cause.
+    fprintf(stderr, "Use '%s -h' for more information\n", argv[0]);
+    return 1;
+  }
+
+  jxl::PaddedBytes compressed;
+
+  jxl::ThreadPoolInternal pool(args.num_threads);
+  jxl::CodecInOut io;
+  double decode_mps = 0;
+  JXL_RETURN_IF_ERROR(LoadAll(args, &pool, &io, &decode_mps));
+
+  // need to validate again because now we know the input
+  if (!args.ValidateArgsAfterLoad(cmdline, io)) {
+    fprintf(stderr, "Use '%s -h' for more information\n", argv[0]);
+    return 1;
+  }
+  if (!args.file_out && !args.quiet) {
+    fprintf(stderr,
+            "No output file specified.\n"
+            "Encoding will be performed, but the result will be discarded.\n");
+  }
+  if (!CompressJxl(io, decode_mps, &pool, args, &compressed, !args.quiet)) {
+    return 1;
+  }
+
+  if (args.use_container) {
+    JpegXlContainer container;
+    container.codestream = compressed.data();
+    container.codestream_size = compressed.size();
+    if (!io.blobs.exif.empty()) {
+      container.exif = io.blobs.exif.data();
+      container.exif_size = io.blobs.exif.size();
+    }
+    auto append_xml = [&container](const jxl::PaddedBytes& bytes) {
+      if (bytes.empty()) return;
+      container.xml.emplace_back(bytes.data(), bytes.size());
+    };
+    append_xml(io.blobs.xmp);
+    if (!io.blobs.jumbf.empty()) {
+      container.jumb = io.blobs.jumbf.data();
+      container.jumb_size = io.blobs.jumbf.size();
+    }
+    jxl::PaddedBytes jpeg_data;
+    if (io.Main().IsJPEG()) {
+      jxl::jpeg::JPEGData data_in = *io.Main().jpeg_data;
+      JXL_RETURN_IF_ERROR(EncodeJPEGData(data_in, &jpeg_data));
+      container.jpeg_reconstruction = jpeg_data.data();
+      container.jpeg_reconstruction_size = jpeg_data.size();
+    }
+    jxl::PaddedBytes container_file;
+    if (!EncodeJpegXlContainerOneShot(container, &container_file)) {
+      fprintf(stderr, "Failed to encode container format\n");
+      return 1;
+    }
+    compressed.swap(container_file);
+    if (!args.quiet) {
+      const size_t pixels = io.xsize() * io.ysize();
+      const double bpp =
+          static_cast<double>(compressed.size() * jxl::kBitsPerByte) / pixels;
+      fprintf(stderr, "Including container: %llu bytes (%.3f bpp%s).\n",
+              static_cast<long long unsigned>(compressed.size()),
+              bpp / io.frames.size(), io.frames.size() == 1 ? "" : "/frame");
+    }
+  }
+  if (args.file_out) {
+    if (!jxl::WriteFile(compressed, args.file_out)) {
+      fprintf(stderr, "Failed to write to \"%s\"\n", args.file_out);
+      return 1;
+    }
+  }
+
+  if (args.print_profile == jxl::Override::kOn) {
+    PROFILER_PRINT_RESULTS();
+  }
+  if (!args.quiet && cmdline.verbosity > 0) {
+    jxl::CacheAligned::PrintStats();
+  }
+  return 0;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
+
+int main(int argc, const char** argv) {
+  return jpegxl::tools::CompressJpegXlMain(argc, argv);
+}
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/cmdline.cc b/codec/L2/demos/jxlEnc/third_partys/tools/cmdline.cc
new file mode 100644
index 0000000000..46b95d7ad7
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/cmdline.cc
@@ -0,0 +1,82 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/cmdline.h"
+
+#include <memory>
+#include <string>
+
+namespace jpegxl {
+namespace tools {
+
+void CommandLineParser::PrintHelp() const {
+  // Use stdout, not stderr, so help can easily be grepped.
+  FILE* out = stdout;
+  fprintf(out, "Usage: %s", program_name_ ? program_name_ : "command");
+
+  for (const auto& option : options_) {
+    if (option->positional()) {
+      if (option->verbosity_level() > verbosity) continue;
+      if (option->required()) {
+        fprintf(out, " %s", option->help_flags().c_str());
+      } else {
+        fprintf(out, " [%s]", option->help_flags().c_str());
+      }
+    }
+  }
+  fprintf(out, " [OPTIONS...]\n");
+
+  bool showed_all = true;
+  for (const auto& option : options_) {
+    if (option->verbosity_level() > verbosity) {
+      showed_all = false;
+      continue;
+    }
+    fprintf(out, " %s\n", option->help_flags().c_str());
+    const char* help_text = option->help_text();
+    if (help_text) {
+      fprintf(out, "    %s\n", help_text);
+    }
+  }
+  fprintf(out, " -h, --help\n    Prints this help message%s.\n",
+          (showed_all ? "" : " (use -v to see more options)"));
+}
+
+bool CommandLineParser::Parse(int argc, const char* argv[]) {
+  if (argc) program_name_ = argv[0];
+  int i = 1;  // argv[0] is the program name.
+  while (i < argc) {
+    if (!strcmp("-h", argv[i]) || !strcmp("--help", argv[i])) {
+      help_ = true;
+      i++;
+      continue;
+    }
+    if (!strcmp("-v", argv[i]) || !strcmp("--verbose", argv[i])) {
+      verbosity++;
+    }
+    bool found = false;
+    for (const auto& option : options_) {
+      if (option->Match(argv[i])) {
+        // Parsing advances the value i on success.
+        const char* arg = argv[i];
+        if (!option->Parse(argc, argv, &i)) {
+          fprintf(stderr, "Error parsing flag %s\n", arg);
+          return false;
+        }
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      // No option matched argv[i].
+      fprintf(stderr, "Unknown argument: %s\n", argv[i]);
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/cmdline.h b/codec/L2/demos/jxlEnc/third_partys/tools/cmdline.h
new file mode 100644
index 0000000000..fc9c6453bb
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/cmdline.h
@@ -0,0 +1,321 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_CMDLINE_H_
+#define TOOLS_CMDLINE_H_
+
+#include <stdio.h>
+#include <string.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jpegxl {
+namespace tools {
+
+class CommandLineParser {
+ public:
+  typedef size_t OptionId;
+
+  // An abstract class for defining command line options.
+  class CmdOptionInterface {
+   public:
+    CmdOptionInterface() = default;
+    virtual ~CmdOptionInterface() = default;
+
+    // Return a string with the option name or available flags.
+    virtual std::string help_flags() const = 0;
+
+    // Return the help string if any, or nullptr if no help string.
+    virtual const char* help_text() const = 0;
+
+    // Return the verbosity level for this option
+    virtual int verbosity_level() const = 0;
+
+    // Return whether the option was passed.
+    virtual bool matched() const = 0;
+
+    // Returns whether this option matches the passed command line argument.
+    virtual bool Match(const char* arg) const = 0;
+
+    // Parses the option. The passed i points to the argument with the flag
+    // that matches either the short or the long name.
+    virtual bool Parse(int argc, const char* argv[], int* i) = 0;
+
+    // Returns whether the option is positional, and therefore will be shown
+    // in the first command line representation of the help output.
+    virtual bool positional() const = 0;
+
+    // Returns whether the option should be displayed as required in the help
+    // output. No effect on validation.
+    virtual bool required() const = 0;
+  };
+
+  // Add a positional argument. Returns the id of the added option or
+  // kOptionError on error.
+  // The "required" flag indicates whether the parameter is mandatory or
+  // optional, but is only used for how it is displayed in the command line
+  // help.
+  OptionId AddPositionalOption(const char* name, bool required,
+                               const char* help_text, const char** storage,
+                               int verbosity_level = 0) {
+    options_.emplace_back(new CmdOptionPositional(name, help_text, storage,
+                                                  verbosity_level, required));
+    return options_.size() - 1;
+  }
+
+  // Add an option with a value of type T. The option can be passed as
+  // '-s <value>' or '--long value' or '--long=value'. The CommandLineParser
+  // parser will call the function parser with the string pointing to '<value>'
+  // in either case. Returns the id of the added option or kOptionError on
+  // error.
+  template <typename T>
+  OptionId AddOptionValue(char short_name, const char* long_name,
+                          const char* metavar, const char* help_text,
+                          T* storage, bool(parser)(const char*, T*),
+                          int verbosity_level = 0) {
+    options_.emplace_back(new CmdOptionFlag<T>(short_name, long_name, metavar,
+                                               help_text, storage, parser,
+                                               verbosity_level));
+    return options_.size() - 1;
+  }
+
+  // Add a flag without a value. Returns the id of the added option or
+  // kOptionError on error.
+  template <typename T>
+  OptionId AddOptionFlag(char short_name, const char* long_name,
+                         const char* help_text, T* storage, bool(parser)(T*),
+                         int verbosity_level = 0) {
+    options_.emplace_back(new CmdOptionFlag<T>(
+        short_name, long_name, help_text, storage, parser, verbosity_level));
+    return options_.size() - 1;
+  }
+
+  const CmdOptionInterface* GetOption(OptionId id) const {
+    JXL_ASSERT(id < options_.size());
+    return options_[id].get();
+  }
+
+  // Print the help message to stdout.
+  void PrintHelp() const;
+
+  // Whether a help flag was specified
+  bool HelpFlagPassed() const { return help_; }
+
+  int verbosity = 0;
+
+  // Parse the command line.
+  bool Parse(int argc, const char* argv[]);
+
+  // Return the remaining positional args
+  std::vector<const char*> PositionalArgs() const;
+
+ private:
+  // A positional argument.
+  class CmdOptionPositional : public CmdOptionInterface {
+   public:
+    CmdOptionPositional(const char* name, const char* help_text,
+                        const char** storage, int verbosity_level,
+                        bool required)
+        : name_(name),
+          help_text_(help_text),
+          storage_(storage),
+          verbosity_level_(verbosity_level),
+          required_(required) {}
+
+    std::string help_flags() const override { return name_; }
+    const char* help_text() const override { return help_text_; }
+    int verbosity_level() const override { return verbosity_level_; }
+    bool matched() const override { return matched_; }
+
+    // Only match non-flag values. This means that you can't pass '-foo' as a
+    // positional argument, but it helps with detecting when passed a flag with
+    // a typo.
+    bool Match(const char* arg) const override {
+      return !matched_ && arg[0] != '-';
+    }
+
+    bool Parse(const int argc, const char* argv[], int* i) override {
+      *storage_ = argv[*i];
+      (*i)++;
+      matched_ = true;
+      return true;
+    }
+
+    bool positional() const override { return true; }
+
+    bool required() const override { return required_; }
+
+   private:
+    const char* name_;
+    const char* help_text_;
+    const char** storage_;
+    const int verbosity_level_;
+    const bool required_;
+
+    bool matched_{false};
+  };
+
+  // A class for handling an option flag like '-v' or '--foo=bar'.
+  template <typename T>
+  class CmdOptionFlag : public CmdOptionInterface {
+   public:
+    // Construct a flag that doesn't take any value, for example '-v' or
+    // '--long'. Passing a value to it raises an error.
+    CmdOptionFlag(char short_name, const char* long_name, const char* help_text,
+                  T* storage, bool(parser)(T*), int verbosity_level)
+        : short_name_(short_name),
+          long_name_(long_name),
+          long_name_len_(long_name ? strlen(long_name) : 0),
+          metavar_(nullptr),
+          help_text_(help_text),
+          storage_(storage),
+          verbosity_level_(verbosity_level) {
+      parser_.parser_no_value_ = parser;
+    }
+
+    // Construct a flag that expects a value to be passed.
+    CmdOptionFlag(char short_name, const char* long_name, const char* metavar,
+                  const char* help_text, T* storage,
+                  bool(parser)(const char* arg, T*), int verbosity_level)
+        : short_name_(short_name),
+          long_name_(long_name),
+          long_name_len_(long_name ? strlen(long_name) : 0),
+          metavar_(metavar ? metavar : ""),
+          help_text_(help_text),
+          storage_(storage),
+          verbosity_level_(verbosity_level) {
+      parser_.parser_with_arg_ = parser;
+    }
+
+    std::string help_flags() const override {
+      std::string ret;
+      if (short_name_) {
+        ret += std::string("-") + short_name_;
+        if (metavar_) ret += std::string(" ") + metavar_;
+        if (long_name_) ret += ", ";
+      }
+      if (long_name_) {
+        ret += std::string("--") + long_name_;
+        if (metavar_) ret += std::string("=") + metavar_;
+      }
+      return ret;
+    }
+    const char* help_text() const override { return help_text_; }
+    int verbosity_level() const override { return verbosity_level_; }
+    bool matched() const override { return matched_; }
+
+    bool Match(const char* arg) const override {
+      return MatchShort(arg) || MatchLong(arg);
+    }
+
+    bool Parse(const int argc, const char* argv[], int* i) override {
+      matched_ = true;
+      if (MatchLong(argv[*i])) {
+        const char* arg = argv[*i] + 2 + long_name_len_;
+        if (arg[0] == '=') {
+          if (metavar_) {
+            // Passed '--long_name=...'.
+            (*i)++;
+            // Skip over the '=' on the LongMatch.
+            arg += 1;
+            return (*parser_.parser_with_arg_)(arg, storage_);
+          } else {
+            fprintf(stderr, "--%s didn't expect any argument passed to it.\n",
+                    argv[*i]);
+            return false;
+          }
+        }
+      }
+      // In any other case, it passed a -s or --long_name
+      (*i)++;
+      if (metavar_) {
+        if (argc <= *i) {
+          fprintf(stderr, "--%s expected an argument but none passed.\n",
+                  argv[*i - 1]);
+          return false;
+        }
+        return (*parser_.parser_with_arg_)(argv[(*i)++], storage_);
+      } else {
+        return (*parser_.parser_no_value_)(storage_);
+      }
+    }
+
+    bool positional() const override { return false; }
+
+    bool required() const override {
+      // Only used for help display of positional arguments.
+      return false;
+    }
+
+   private:
+    // Returns whether arg matches the short_name flag of this option.
+    bool MatchShort(const char* arg) const {
+      if (!short_name_ || arg[0] != '-') return false;
+      return arg[1] == short_name_ && arg[2] == 0;
+    }
+
+    // Returns whether arg matches the long_name flag of this option,
+    // potentially with an argument passed to it.
+    bool MatchLong(const char* arg) const {
+      if (!long_name_ || arg[0] != '-' || arg[1] != '-') return false;
+      arg += 2;  // Skips the '--'
+      if (strncmp(long_name_, arg, long_name_len_) != 0) return false;
+      arg += long_name_len_;
+      // Allow "--long_name=foo" and "--long_name" as long matches.
+      return arg[0] == 0 || arg[0] == '=';
+    }
+
+    // A short option passed as '-X' where X is the char. A value of 0 means
+    // no short option.
+    const char short_name_;
+
+    // A long option name passed as '--long' where 'long' is the name of the
+    // option.
+    const char* long_name_;
+    size_t long_name_len_;
+
+    // The text to display when referring to the value passed to this flag, for
+    // example "N" in the flag '--value N'. If null, this flag accepts no value
+    // and therefore no value must be passed.
+    const char* metavar_;
+
+    // The help string for this flag.
+    const char* help_text_;
+
+    // The pointer to the storage of this flag used when parsing.
+    T* storage_;
+
+    // At which verbosity level do we show this option?
+    int verbosity_level_;
+
+    // The function to use to parse the value when matched. The function used is
+    // parser_with_arg_ when metavar_ is not null (and the value string will be
+    // used) or parser_no_value_ when metavar_ is null.
+    union {
+      bool (*parser_with_arg_)(const char*, T*);
+      bool (*parser_no_value_)(T*);
+    } parser_;
+
+    // Whether this flag was matched.
+    bool matched_{false};
+  };
+
+  const char* program_name_{nullptr};
+
+  std::vector<std::unique_ptr<CmdOptionInterface>> options_;
+
+  // If true, help argument was given, so print help to stdout rather than
+  // stderr.
+  bool help_ = false;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_CMDLINE_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/codec_config.cc b/codec/L2/demos/jxlEnc/third_partys/tools/codec_config.cc
new file mode 100644
index 0000000000..b58c62af7b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/codec_config.cc
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/codec_config.h"
+
+#include <hwy/targets.h>
+
+#include "lib/jxl/base/status.h"
+
+#define JPEGXL_VERSION "61fb7ee"
+namespace jpegxl {
+namespace tools {
+
+std::string CodecConfigString(uint32_t lib_version) {
+  std::string config;
+
+  if (lib_version != 0) {
+    char version_str[20];
+    snprintf(version_str, sizeof(version_str), "v%d.%d.%d ",
+             lib_version / 1000000, (lib_version / 1000) % 1000,
+             lib_version % 1000);
+    config += version_str;
+  }
+
+  std::string version = JPEGXL_VERSION;
+  if (version != "(unknown)") {
+    config += version + ' ';
+  }
+
+#if defined(ADDRESS_SANITIZER)
+  config += " asan ";
+#elif defined(MEMORY_SANITIZER)
+  config += " msan ";
+#elif defined(THREAD_SANITIZER)
+  config += " tsan ";
+#else
+#endif
+
+  bool saw_target = false;
+  config += "[";
+  for (const uint32_t target : hwy::SupportedAndGeneratedTargets()) {
+    config += hwy::TargetName(target);
+    config += ',';
+    saw_target = true;
+  }
+  JXL_ASSERT(saw_target);
+  (void)saw_target;
+  config.resize(config.size() - 1);  // remove trailing comma
+  config += "]";
+
+  return config;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/codec_config.h b/codec/L2/demos/jxlEnc/third_partys/tools/codec_config.h
new file mode 100644
index 0000000000..729c96d4a8
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/codec_config.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_CODEC_CONFIG_H_
+#define TOOLS_CODEC_CONFIG_H_
+
+#include <string>
+
+namespace jpegxl {
+namespace tools {
+
+// Returns a short string describing the codec version (if known) and build
+// settings such as sanitizers and SIMD targets. Used in the benchmark and
+// command-line tools.
+std::string CodecConfigString(uint32_t lib_version);
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_CODEC_CONFIG_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc b/codec/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc
new file mode 100644
index 0000000000..24cc097a4b
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.cc
@@ -0,0 +1,420 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/cpu/cpu.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/jxl/base/arch_macros.h"  // for JXL_ARCH_*
+
+#if JXL_ARCH_X64
+#include <xmmintrin.h>
+#if !JXL_COMPILER_MSVC
+#include <cpuid.h>
+#endif
+#endif
+
+#if JXL_ARCH_PPC
+#include <sys/platform/ppc.h>  // __ppc_get_timebase_freq
+#endif
+
+#if JXL_ARCH_ARM
+#include <unistd.h>  // sysconf
+#endif
+
+#include <string.h>  // memcpy
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "tools/cpu/os_specific.h"
+
+using jxl::CeilLog2Nonzero;
+using jxl::Debug;
+
+namespace jpegxl {
+namespace tools {
+namespace cpu {
+namespace {
+
+#if JXL_ARCH_X64
+
+// For Cpuid.
+#pragma pack(push, 1)
+struct Regs {
+  uint32_t a;
+  uint32_t b;
+  uint32_t c;
+  uint32_t d;
+};
+#pragma pack(pop)
+
+// Calls CPUID instruction with eax=level and ecx=count and fills `r`.
+// The caller must ensure `level` <= the max supported.
+void Cpuid(const uint32_t level, const uint32_t count, Regs* JXL_RESTRICT r) {
+#if JXL_COMPILER_MSVC
+  int regs[4];
+  __cpuidex(regs, level, count);
+  r->a = regs[0];
+  r->b = regs[1];
+  r->c = regs[2];
+  r->d = regs[3];
+#else
+  // WARNING: avoid using __cpuid_count, which is broken: it lacks volatile and
+  // clobber "memory", so the compiler caches CPUID results, not realizing that
+  // CPUID:1b (APIC ID) changes across calls to SetThreadAffinity.
+  __asm__ __volatile__(
+      "xchgq %%rbx,%q1\n"
+      "cpuid\n"
+      "xchgq %%rbx,%q1"
+      : "=a"(r->a), "=r"(r->b), "=c"(r->c), "=d"(r->d)
+      : "0"(level), "2"(count)
+      : "memory");
+#endif
+}
+
+class Info {
+ public:
+  Info() {
+    Regs r;
+    Cpuid(0, 0, &r);
+    max_func_ = r.a;
+
+    char vendor[13];
+    // Note unusual order, reverse of ModR/M encoding.
+    memcpy(&vendor[0], &r.b, 4);
+    memcpy(&vendor[4], &r.d, 4);
+    memcpy(&vendor[8], &r.c, 4);
+    vendor[12] = '\0';
+    intel_ = strcmp(vendor, "GenuineIntel") == 0;
+    amd_ = strcmp(vendor, "AuthenticAMD") == 0;
+
+    Cpuid(0x80000000u, 0, &r);
+    max_ext_func_ = r.a;
+  }
+
+  uint32_t MaxFunc() const { return max_func_; }
+  uint32_t MaxExtFunc() const { return max_ext_func_; }
+  bool Intel() const { return intel_; }
+  bool AMD() const { return amd_; }
+
+  std::string BrandString() const {
+    char brand_string[49];
+    Regs r;
+
+    // Check if brand string is supported (it is on all reasonable Intel/AMD)
+    if (MaxExtFunc() < 0x80000004U) return std::string();
+
+    for (uint32_t i = 0; i < 3; ++i) {
+      Cpuid(0x80000002U + i, 0, &r);
+      memcpy(brand_string + i * 16, &r, sizeof(r));
+    }
+    brand_string[48] = 0;
+    return brand_string;
+  }
+
+ private:
+  uint32_t max_func_;
+  uint32_t max_ext_func_;
+  bool intel_;
+  bool amd_;
+};
+
+// Detects number of packages/cores/logical processors (HT/SMT).
+class X64_Topology {
+  enum ApicType {
+    kCpuid1_8Bit,   // initial APIC ID
+    kCpuidB_32Bit,  // x2APIC ID
+    kCpuid1E_32Bit  // AMD extended APIC ID
+  };
+
+ public:
+  // Enumerates all APIC IDs and partitions them into fields, or returns false
+  // if the topology cannot be detected (e.g. due to missing OS support).
+  static jxl::Status Detect(ProcessorTopology* topology) {
+    const Info info;
+    if (DetectLegacyAMD(info, topology)) return true;
+
+    const ApicType type = DetectApicType(info);
+
+    uint32_t core_bits;
+    uint32_t logical_bits;
+    DetectFieldWidths(info, type, &core_bits, &logical_bits);
+
+    uint32_t total_bits = 0;
+    // Order matters:
+    Field logical(logical_bits, &total_bits);
+    Field core(core_bits, &total_bits);
+    Field package(8, &total_bits);
+
+    // Query ProcessorId on each (accessible) logical processor:
+    ThreadAffinity* original_affinity = GetThreadAffinity();
+    for (int cpu : AvailableCPUs()) {
+      if (!PinThreadToCPU(cpu)) {
+        free(original_affinity);
+        return false;
+      }
+
+      const uint32_t id = ProcessorId(type);
+      logical.AddValue(id);
+      core.AddValue(id);
+      package.AddValue(id);
+    }
+    JXL_CHECK(SetThreadAffinity(original_affinity));
+    free(original_affinity);
+
+    topology->logical_per_core = logical.NumValues();
+    topology->cores_per_package = core.NumValues();
+    topology->packages = package.NumValues();
+    return true;
+  }
+
+ private:
+  // Returns true if this is an old AMD CPU.
+  static jxl::Status DetectLegacyAMD(const Info& info,
+                                     ProcessorTopology* topology) {
+    if (!info.AMD()) return false;
+
+    // "hyperthreads" not set, we have a single logical (no HT nor multicore)
+    Regs r;
+    Cpuid(1, 0, &r);
+    if ((r.d & (1U << 28)) == 0) {
+      topology->logical_per_core = 1;
+      topology->cores_per_package = 1;
+      topology->packages = 1;
+      return true;
+    }
+
+    // cpuid:8_1.c bit 2 is "legacy multicore" but it is still set in
+    // Threadripper 3, so we do not learn anything from it.
+
+    // Use "extended" method like Intel: variable-width fields in APIC ID.
+    return false;
+  }
+
+  static ApicType DetectApicType(const Info& info) {
+    Regs r;
+    Cpuid(1, 0, &r);
+    if (info.MaxFunc() >= 0xB && (r.c & (1u << 21))) {
+      return kCpuidB_32Bit;
+    }
+
+    if (info.AMD() && info.MaxExtFunc() >= 0x8000001E) {
+      Cpuid(0x80000001u, 0, &r);
+      if (r.c & (1u << 22)) {  // topology extensions
+        return kCpuid1E_32Bit;
+      }
+    }
+
+    return kCpuid1_8Bit;
+  }
+
+  // `core_bits`: How many bits in the APIC ID identify the core (per package).
+  // #active cores <= (1 << core_bits).
+  // `logical_bits`: How many bits identify the logical processor (per core).
+  static void DetectFieldWidths_Extended(const Info& info, const ApicType type,
+                                         uint32_t* JXL_RESTRICT core_bits,
+                                         uint32_t* JXL_RESTRICT logical_bits) {
+    *core_bits = 0;
+    *logical_bits = 0;
+
+    Regs r;
+    Cpuid(1, 0, &r);
+    const uint32_t logical_per_package = (r.b >> 16) & 0xFF;
+
+    if (info.Intel() && info.MaxFunc() >= 4) {
+      const bool hyperthreading_support = (r.d & (1U << 28)) != 0;
+
+      Cpuid(4, 0, &r);
+      *core_bits = static_cast<uint32_t>(CeilLog2Nonzero((r.a >> 26) + 1));
+
+      if (hyperthreading_support) {
+        const uint32_t logical_per_core = logical_per_package >> *core_bits;
+        if (logical_per_core != 0) {
+          *logical_bits =
+              static_cast<uint32_t>(CeilLog2Nonzero(logical_per_core));
+        }
+      }
+    }
+
+    if (info.AMD()) {
+      if (info.MaxExtFunc() >= 0x80000008u) {
+        Cpuid(0x80000008u, 0, &r);
+        // AMD 54945 Rev 3.03 documents this as total _threads_ per package;
+        // previously, this was listed as the number of _cores_.
+        uint32_t thread_bits = (r.c >> 12) & 0xF;
+        if (thread_bits == 0) {  // Invalid, ignore
+          const uint32_t num_threads = (r.c & 0xFF) + 1;
+          thread_bits = static_cast<uint32_t>(CeilLog2Nonzero(num_threads));
+        }
+
+        if (type == kCpuid1E_32Bit) {
+          Cpuid(0x8000001Eu, 0, &r);
+          const uint32_t threads_per_core = ((r.b >> 8) & 0xFF) + 1;
+          *logical_bits =
+              static_cast<uint32_t>(CeilLog2Nonzero(threads_per_core));
+          *core_bits = thread_bits - *logical_bits;
+        } else {
+          // There does not seem to be another way to detect SMT, so
+          // assume it is not available.
+          *core_bits = thread_bits;
+          *logical_bits = 0;
+        }
+      } else {
+        // Old AMD => did not support SMT/HT yet.
+        *core_bits =
+            static_cast<uint32_t>(CeilLog2Nonzero(logical_per_package));
+        *logical_bits = 0;
+      }
+    }
+  }
+
+  // Returns whether the CPUID:B method was successful.
+  static bool DetectFieldWidths_B(const Info& info,
+                                  uint32_t* JXL_RESTRICT core_bits,
+                                  uint32_t* JXL_RESTRICT logical_bits) {
+    if (info.MaxFunc() < 0xB) return false;
+
+    bool got_smt = false;
+    bool got_core = false;
+
+    // Prevent spurious uninitialized-variable error
+    *core_bits = *logical_bits = 0;
+
+    for (uint32_t level = 0; level < 16; ++level) {
+      Regs r;
+      Cpuid(0xB, level, &r);
+
+      // We finished all levels if this one has no enabled logical
+      // processors.
+      if ((r.b & 0xFFFF) == 0) break;
+
+      JXL_ASSERT(level == (r.c & 0xFF));  // Sanity check: should match input
+
+      const uint32_t level_type = (r.c >> 8) & 0xFF;
+      const uint32_t level_bits = r.a & 0x1F;
+
+      switch (level_type) {
+        case 0:
+          Debug("Invalid CPUID level %u despite enabled>0", level);
+          break;
+
+        case 1:  // SMT
+          *logical_bits = level_bits;
+          got_smt = true;
+          break;
+
+        case 2:  // core
+          *core_bits = level_bits;
+          got_core = true;
+          break;
+
+        default:
+          Debug("Ignoring CPUID:B level %u type %u (%u bits)\n", level,
+                level_type, level_bits);
+          break;
+      }
+    }
+
+    if (got_core && got_smt) {
+      // Core is actually all logical within a package, so subtract now that
+      // we also know logical_bits.
+      JXL_ASSERT(*core_bits >= *logical_bits);
+      *core_bits -= *logical_bits;
+      return true;
+    }
+
+    // CPUID:B was incomplete
+    return false;
+  }
+
+  // Assumes the current processor is representative of all others!
+  static void DetectFieldWidths(const Info& info, const ApicType type,
+                                uint32_t* JXL_RESTRICT core_bits,
+                                uint32_t* JXL_RESTRICT logical_bits) {
+    // Preferred on Intel, not available on AMD as of TR3.
+    if (type == kCpuidB_32Bit) {
+      if (DetectFieldWidths_B(info, core_bits, logical_bits)) {
+        return;
+      }
+    }
+
+    // CPUID:B not available or failed
+    DetectFieldWidths_Extended(info, type, core_bits, logical_bits);
+  }
+
+  // Variable-length/position field within an xAPIC ID. Counts the total
+  // number of values encountered for all given id.
+  class Field {
+   public:
+    Field(const uint32_t bits, uint32_t* JXL_RESTRICT total_bits)
+        : mask_((1U << bits) - 1), shift_(*total_bits) {
+      *total_bits += bits;
+    }
+
+    void AddValue(const uint32_t id) { values_.insert((id >> shift_) & mask_); }
+
+    size_t NumValues() const { return values_.size(); }
+
+   private:
+    const uint32_t mask_;  // zero for zero-width fields
+    const uint32_t shift_;
+    std::set<uint32_t> values_;
+  };
+
+  // Returns unique identifier of the current logical processor (0 on old CPUs).
+  static uint32_t ProcessorId(const ApicType type) {
+    Regs r;
+
+    switch (type) {
+      case kCpuidB_32Bit:
+        Cpuid(11, 0, &r);
+        JXL_ASSERT(r.b != 0);
+        // Note: whether or not x2APIC is actually supported and enabled, its
+        // lower 8 bits match the initial APIC ID (CPUID:1.b).
+        return r.d;
+
+      case kCpuid1_8Bit:
+        Cpuid(1, 0, &r);
+        return r.b >> 24;
+
+      case kCpuid1E_32Bit:
+        Cpuid(0x8000001E, 0, &r);
+        return r.a;
+    }
+
+    // Unreachable
+    return 0;
+  }
+};
+
+#endif  // JXL_ARCH_*
+
+}  // namespace
+
+jxl::Status DetectProcessorTopology(ProcessorTopology* pt) {
+  if (GetProcessorTopologyFromOS(pt)) return true;
+#if JXL_ARCH_X64
+  if (X64_Topology::Detect(pt)) return true;
+#elif JXL_ARCH_ARM
+  // TODO(deymo): Actually look up the CPU topology and model the big/small
+  // core split.
+  pt->logical_per_core = 1;
+  pt->cores_per_package = sysconf(_SC_NPROCESSORS_ONLN);
+  pt->packages = 1;
+  return true;
+#endif
+  return JXL_FAILURE("Unable to detect processor topology");
+}
+
+}  // namespace cpu
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.h b/codec/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.h
new file mode 100644
index 0000000000..4d2fb54675
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/cpu/cpu.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_CPU_CPU_H_
+#define TOOLS_CPU_CPU_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/status.h"
+
+namespace jpegxl {
+namespace tools {
+namespace cpu {
+
+struct ProcessorTopology {
+  size_t logical_per_core = 1;
+  size_t cores_per_package = 1;
+  size_t packages = 1;
+};
+
+// Relatively expensive, preferably only call once.
+jxl::Status DetectProcessorTopology(ProcessorTopology* pt);
+
+}  // namespace cpu
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_CPU_CPU_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc b/codec/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc
new file mode 100644
index 0000000000..d87e3d72fd
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.cc
@@ -0,0 +1,373 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/cpu/os_specific.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <ctime>
+#include <numeric>
+#include <random>
+
+#include "lib/jxl/base/os_macros.h"  // for JXL_OS_*
+#include "tools/cpu/cpu.h"           // ProcessorTopology
+
+#if JXL_OS_WIN
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif  // NOMINMAX
+#include <windows.h>
+#endif  // JXL_OS_WIN
+
+#if JXL_OS_LINUX
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif  // _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#endif  // JXL_OS_LINUX
+
+#if JXL_OS_MAC
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#include <mach/thread_act.h>
+#include <mach/thread_policy.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#endif  // JXL_OS_MAC
+
+#if JXL_OS_FREEBSD
+#include <sys/cpuset.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#endif  // JXL_OS_FREEBSD
+
+#if JXL_OS_HAIKU
+#include <OS.h>
+#endif  // JXL_OS_HAIKU
+
+using jxl::Status;
+
+namespace jpegxl {
+namespace tools {
+namespace cpu {
+
+// Emulate Linux type (cpu_set_t) + interface on other platforms
+
+#if JXL_OS_FREEBSD
+using cpu_set_t = cpuset_t;
+#elif JXL_OS_WIN || JXL_OS_MAC || JXL_OS_HAIKU
+using cpu_set_t = uint64_t;
+
+static inline void CPU_ZERO(cpu_set_t* set) { *set = 0; }
+
+static inline int CPU_ISSET(int cpu, const cpu_set_t* set) {
+  return (*set & (1ULL << cpu)) != 0;
+}
+
+static inline void CPU_SET(int cpu, cpu_set_t* set) { *set |= (1ULL << cpu); }
+#endif
+
+struct ThreadAffinity {
+  cpu_set_t set;
+};
+
+#if JXL_OS_MAC
+namespace {
+Status GetSystemValue(const char* name, size_t* value) {
+  int64_t value_i64 = 0;
+  size_t size = sizeof(value_i64);
+  const int err = sysctlbyname(name, &value_i64, &size, nullptr, 0);
+  if (err != 0) return JXL_FAILURE("sysctl packages failed");
+  JXL_ASSERT(value_i64 >= 0);
+  *value = static_cast<size_t>(value_i64);
+  return true;
+}
+
+// Returns mask with the lowest N bits set, one per logical processor.
+cpu_set_t SetOfAllLogicalProcessors() {
+  size_t logical;
+  // On failure, assume there is at least one logical processor.
+  if (!GetSystemValue("machdep.cpu.thread_count", &logical)) return 1;
+
+  if (logical > 64) {
+    printf("Warning: more than 64 logical processors, update cpu_set_t");
+    return ~0ull;
+  }
+  if (logical == 64) return ~0ull;
+
+  return (1ull << logical) - 1;
+}
+}  // namespace
+
+#elif JXL_OS_HAIKU
+
+namespace {
+cpu_set_t SetOfAllLogicalProcessors() {
+  system_info info;
+  get_system_info(&info);
+
+  if (info.cpu_count > 64) {
+    printf("Warning: more than 64 logical processors, update cpu_set_t");
+    return ~0ull;
+  }
+  if (info.cpu_count == 64) return ~0ull;
+
+  return (1ull << info.cpu_count) - 1;
+}
+}  // namespace
+
+#endif
+
+Status GetProcessorTopologyFromOS(ProcessorTopology* pt) {
+#if JXL_OS_MAC
+  size_t packages, cores, logical;  // totals, not per package/core!
+  JXL_RETURN_IF_ERROR(GetSystemValue("hw.packages", &packages));
+  JXL_RETURN_IF_ERROR(GetSystemValue("machdep.cpu.core_count", &cores));
+  JXL_RETURN_IF_ERROR(GetSystemValue("machdep.cpu.thread_count", &logical));
+
+  // All succeeded: now set `pt`
+  pt->packages = packages;
+  pt->cores_per_package = cores / packages;
+  pt->logical_per_core = logical / cores;
+
+  return true;
+#elif JXL_OS_HAIKU
+  system_info info;
+  get_system_info(&info);
+  pt->packages = 1;
+  pt->cores_per_package = info.cpu_count;
+  pt->logical_per_core = 1;
+
+  return true;
+
+#else
+  // Not needed on X64 if the affinity APIs work (DetectProcessorTopology will
+  // succeed)
+  return false;
+#endif
+}
+
+ThreadAffinity* GetThreadAffinity() {
+  ThreadAffinity* affinity =
+      static_cast<ThreadAffinity*>(malloc(sizeof(ThreadAffinity)));
+#if JXL_OS_WIN
+  DWORD_PTR process_affinity, system_affinity;
+  const BOOL ok = GetProcessAffinityMask(GetCurrentProcess(), &process_affinity,
+                                         &system_affinity);
+  JXL_CHECK(ok);
+  affinity->set = process_affinity;
+#elif JXL_OS_LINUX
+  CPU_ZERO(&affinity->set);
+  const int err = sched_getaffinity(0, sizeof(cpu_set_t), &affinity->set);
+  JXL_CHECK(err == 0);
+#elif JXL_OS_FREEBSD
+  const pid_t pid = getpid();  // current thread
+  const int err = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, pid,
+                                     sizeof(cpuset_t), &affinity->set);
+  JXL_CHECK(err == 0);
+#elif JXL_OS_MAC || JXL_OS_HAIKU
+  static cpu_set_t all = SetOfAllLogicalProcessors();
+  affinity->set = all;
+#endif
+  return affinity;
+}
+
+namespace {
+
+ThreadAffinity* OriginalThreadAffinity() {
+  static ThreadAffinity* original = GetThreadAffinity();
+  return original;
+}
+
+}  // namespace
+
+Status SetThreadAffinity(ThreadAffinity* affinity) {
+  // Ensure original is initialized before changing.
+  const ThreadAffinity* const original = OriginalThreadAffinity();
+  JXL_CHECK(original != nullptr);
+
+#if JXL_OS_WIN
+  const HANDLE hThread = GetCurrentThread();
+  const DWORD_PTR prev = SetThreadAffinityMask(hThread, affinity->set);
+  if (prev == 0) return JXL_FAILURE("SetThreadAffinityMask failed");
+  return true;
+#elif JXL_OS_LINUX
+  const int err = sched_setaffinity(0, sizeof(cpu_set_t), &affinity->set);
+  if (err != 0) return JXL_FAILURE("sched_setaffinity failed");
+  return true;
+#elif JXL_OS_FREEBSD
+  const pid_t pid = getpid();  // current thread
+  const int err = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, pid,
+                                     sizeof(cpuset_t), &affinity->set);
+  if (err != 0) return JXL_FAILURE("cpuset_setaffinity failed");
+  return true;
+#elif JXL_OS_MAC
+  // As of 2019-03 we are unaware of a way to reliably restrict a thread to
+  // core(s); THREAD_AFFINITY_POLICY is only a hint.
+  (void)affinity;
+  return false;
+#elif JXL_OS_HAIKU
+  // As of 2020-06 Haiku does not support pinning threads to cores.
+  (void)affinity;
+  return false;
+#else
+  printf("Don't know how to SetThreadAffinity on this platform.\n");
+  return false;
+#endif
+}
+
+std::vector<int> AvailableCPUs() {
+  std::vector<int> cpus;
+  cpus.reserve(128);
+#if JXL_OS_WIN || JXL_OS_LINUX || JXL_OS_FREEBSD || JXL_OS_MAC || JXL_OS_HAIKU
+  const ThreadAffinity* const affinity = OriginalThreadAffinity();
+  for (int cpu = 0; cpu < static_cast<int>(sizeof(cpu_set_t)) * 8; ++cpu) {
+    if (CPU_ISSET(cpu, &affinity->set)) {
+      cpus.push_back(static_cast<int>(cpu));
+    }
+  }
+#else
+  cpus.push_back(0);
+#endif
+  return cpus;
+}
+
+Status PinThreadToCPU(const int cpu) {
+#ifdef JXL_DISABLE_PINNING
+  return false;
+#else
+#if JXL_OS_WIN || JXL_OS_LINUX || JXL_OS_FREEBSD || JXL_OS_MAC || JXL_OS_HAIKU
+  ThreadAffinity affinity;
+  CPU_ZERO(&affinity.set);
+  CPU_SET(cpu, &affinity.set);
+  return SetThreadAffinity(&affinity);
+#else
+  return false;
+#endif
+#endif
+}
+
+Status PinThreadToRandomCPU() {
+  std::vector<int> cpus = AvailableCPUs();
+
+  // Remove first two CPUs because interrupts are often pinned to them.
+  JXL_CHECK(cpus.size() > 2);
+  cpus.erase(cpus.begin(), cpus.begin() + 2);
+
+  // Random choice to prevent burning up the same core.
+  std::random_device device;
+  std::ranlux48 generator(device());
+  std::shuffle(cpus.begin(), cpus.end(), generator);
+  const int cpu = cpus.front();
+
+  return PinThreadToCPU(cpu);
+}
+
+namespace {
+
+size_t DetectTotalMemoryMiB() {
+#if JXL_OS_LINUX || JXL_OS_FREEBSD || JXL_OS_MAC
+  const long page_size = sysconf(_SC_PAGESIZE);
+  const long num_pages = sysconf(_SC_PHYS_PAGES);
+  if (page_size == -1 || num_pages == -1) {
+    JXL_WARNING("Failed to detect page size (%ld) and/or num pages (%ld)",
+                page_size, num_pages);
+    return 0;
+  }
+  JXL_ASSERT(page_size > 0 && num_pages > 0);
+  const uint64_t bytes =
+      static_cast<uint64_t>(num_pages) * static_cast<uint64_t>(page_size);
+  return bytes >> 20;
+#elif JXL_OS_WIN
+  MEMORYSTATUSEX ms;
+  ms.dwLength = sizeof(ms);
+  if (!GlobalMemoryStatusEx(&ms)) {
+    JXL_WARNING("Failed to get memory status");
+    return 0;
+  }
+  const uint64_t bytes = ms.ullTotalPhys;
+  // `bytes` excludes nonpaged pool reserved during boot; round up to whole MiB
+  // to improve the estimate.
+  return (bytes + (1U << 20) - 1) >> 20;
+#elif JXL_OS_HAIKU
+  system_info info;
+  get_system_info(&info);
+  return (info.max_pages * B_PAGE_SIZE) >> 20;
+#else
+  JXL_WARNING("Implement DetectTotalMemoryMiB for this platform");
+  return 0;
+#endif
+}
+
+}  // namespace
+
+size_t TotalMemoryMiB() {
+  static size_t mib = DetectTotalMemoryMiB();
+  return mib;
+}
+
+/*
+Status RunCommand(const std::vector<std::string>& args) {
+#if _POSIX_VERSION >= 200112L
+  // Avoid system(), but do not try to be over-zealous about not passing along
+  // some special resources further (such as: inherited-not-marked-FD_CLOEXEC
+  // file descriptors).
+  std::vector<const char*> c_args;
+  c_args.reserve(args.size() + 1);
+  for (size_t i = 0; i < args.size(); ++i) {
+    c_args.push_back(args[i].c_str());
+  }
+  c_args.push_back(nullptr);
+  const pid_t pid = fork();
+  if (pid == -1)  // fork() failed.
+    return false;
+  if (pid != 0) {  // Parent process.
+    int ret_status;
+    if (pid != waitpid(pid, &ret_status, 0)) {
+      return false;  // waitpid() error.
+    }
+    return ret_status == 0;
+  } else {  // Child process.
+    execvp(c_args[0],
+           // Address benign-but-annoying execvp() signature weirdness.
+           const_cast<char* const*>(c_args.data()));
+    JXL_ABORT("Failed to run command.\n");
+  }
+#elif JXL_OS_WIN
+  // Synthesize a string for system(). And warn about it.
+  // TODO(user): Fix this - research the safe way to run a command on Windows.
+  // Likely, the solution is along these lines:
+  // docs.microsoft.com/en-us/windows/desktop/ProcThread/creating-processes
+  std::ostringstream cmd;
+  std::copy(args.begin(), args.end(),
+            std::ostream_iterator<std::string>(cmd, " "));
+  printf(stderr, "Warning: Using system() on string: %s\n", cmd.str.c_str());
+  int ret = system(cmd.str.c_str());
+  if (errno != ENOENT &&  // Windows: Command interpreter not found.
+      ret == 0) {
+    return true;
+  }
+  return false;
+#else
+#error Neither a POSIX-1.2001 nor a Windows System.
+#endif
+}
+*/
+
+}  // namespace cpu
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.h b/codec/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.h
new file mode 100644
index 0000000000..ea7b1ecaf0
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/cpu/os_specific.h
@@ -0,0 +1,62 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_CPU_OS_SPECIFIC_H_
+#define TOOLS_CPU_OS_SPECIFIC_H_
+
+// OS-specific function to query the processor topology and thread affinity.
+
+#include <stddef.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jpegxl {
+namespace tools {
+namespace cpu {
+
+// Called by arch_specific. Returns false if `pt` remains unchanged. Only
+// implemented/needed on OSX.
+struct ProcessorTopology;
+
+jxl::Status GetProcessorTopologyFromOS(ProcessorTopology* pt);
+
+// Returns logical processor numbers in [0, N), where N is the number of bits in
+// the thread's initial affinity (unaffected by any SetThreadAffinity).
+std::vector<int> AvailableCPUs();
+
+// Opaque.
+struct ThreadAffinity;
+
+// Returns current affinity; useful for restoring the original value.
+// Caller must free() the pointer - dynamic allocation is required because
+// ThreadAffinity is an incomplete type.
+ThreadAffinity* GetThreadAffinity();
+
+// Restores a previous affinity returned by GetThreadAffinity.
+jxl::Status SetThreadAffinity(ThreadAffinity* affinity);
+
+// Ensures the thread is running on the specified cpu, and no others.
+// Useful for reducing nanobenchmark variability (fewer context switches).
+// Calls SetThreadAffinity.
+jxl::Status PinThreadToCPU(int cpu);
+
+// Random choice of CPU avoids overloading any one core. Calls PinThreadToCPU.
+jxl::Status PinThreadToRandomCPU();
+
+// Returns total physical memory size [MiB], or 0 if unknown. This function
+// returns a cached value initialized on the first call.
+size_t TotalMemoryMiB();
+
+// Executes a command in a subprocess.
+// Status RunCommand(const std::vector<std::string>& args);
+
+}  // namespace cpu
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_CPU_OS_SPECIFIC_H_
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc b/codec/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc
new file mode 100644
index 0000000000..2aea8a0d5e
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/speed_stats.cc
@@ -0,0 +1,107 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "tools/speed_stats.h"
+
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <string>
+
+#include "lib/jxl/base/robust_statistics.h"
+
+namespace jpegxl {
+namespace tools {
+
+void SpeedStats::NotifyElapsed(double elapsed_seconds) {
+  JXL_ASSERT(elapsed_seconds > 0.0);
+  elapsed_.push_back(elapsed_seconds);
+}
+
+jxl::Status SpeedStats::GetSummary(SpeedStats::Summary* s) {
+  if (elapsed_.empty()) return JXL_FAILURE("Didn't call NotifyElapsed");
+
+  s->min = *std::min_element(elapsed_.begin(), elapsed_.end());
+  s->max = *std::max_element(elapsed_.begin(), elapsed_.end());
+
+  // Single rep
+  if (elapsed_.size() == 1) {
+    s->central_tendency = elapsed_[0];
+    s->variability = 0.0;
+    s->type = "";
+    return true;
+  }
+
+  // Two: skip first (noisier)
+  if (elapsed_.size() == 2) {
+    s->central_tendency = elapsed_[1];
+    s->variability = 0.0;
+    s->type = " second:";
+    return true;
+  }
+
+  // Prefer geomean unless numerically unreliable (too many reps)
+  if (std::pow(elapsed_[0], elapsed_.size()) < 1E100) {
+    double product = 1.0;
+    for (size_t i = 1; i < elapsed_.size(); ++i) {
+      product *= elapsed_[i];
+    }
+
+    s->central_tendency = std::pow(product, 1.0 / (elapsed_.size() - 1));
+    s->variability = 0.0;
+    s->type = " geomean:";
+    return true;
+  }
+
+  // Else: mode
+  std::sort(elapsed_.begin(), elapsed_.end());
+  s->central_tendency = jxl::HalfSampleMode()(elapsed_.data(), elapsed_.size());
+  s->variability = jxl::MedianAbsoluteDeviation(elapsed_, s->central_tendency);
+  s->type = "mode: ";
+  return true;
+}
+
+namespace {
+
+std::string SummaryStat(double value, const char* unit,
+                        const SpeedStats::Summary& s) {
+  if (value == 0.) return "";
+
+  char stat_str[100] = {'\0'};
+  const double value_tendency = value / s.central_tendency;
+  // Note flipped order: higher elapsed = lower mpps.
+  const double value_min = value / s.max;
+  const double value_max = value / s.min;
+
+  int ret = snprintf(stat_str, sizeof(stat_str), ",%s %.2f %s/s [%.2f, %.2f]",
+                     s.type, value_tendency, unit, value_min, value_max);
+  (void)ret;  // ret is unused when JXL_ASSERT is disabled.
+  JXL_ASSERT(ret < static_cast<int>(sizeof(stat_str)));
+  return stat_str;
+}
+
+}  // namespace
+
+jxl::Status SpeedStats::Print(size_t worker_threads) {
+  Summary s;
+  JXL_RETURN_IF_ERROR(GetSummary(&s));
+  std::string mps_stats = SummaryStat(xsize_ * ysize_ * 1e-6, "MP", s);
+  std::string mbs_stats = SummaryStat(file_size_ * 1e-6, "MB", s);
+
+  char variability[20] = {'\0'};
+  if (s.variability != 0.0) {
+    snprintf(variability, sizeof(variability), " (var %.2f)", s.variability);
+  }
+
+  fprintf(stderr, "%zu x %zu%s%s%s, %zu reps, %zu threads.\n", xsize_, ysize_,
+          mps_stats.c_str(), mbs_stats.c_str(), variability, elapsed_.size(),
+          worker_threads);
+  return true;
+}
+
+}  // namespace tools
+}  // namespace jpegxl
diff --git a/codec/L2/demos/jxlEnc/third_partys/tools/speed_stats.h b/codec/L2/demos/jxlEnc/third_partys/tools/speed_stats.h
new file mode 100644
index 0000000000..eec8a58586
--- /dev/null
+++ b/codec/L2/demos/jxlEnc/third_partys/tools/speed_stats.h
@@ -0,0 +1,63 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef TOOLS_SPEED_STATS_H_
+#define TOOLS_SPEED_STATS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jpegxl {
+namespace tools {
+
+class SpeedStats {
+ public:
+  void NotifyElapsed(double elapsed_seconds);
+
+  struct Summary {
+    // How central_tendency was computed - depends on number of reps.
+    const char* type;
+
+    // Elapsed time
+    double central_tendency;
+    double min;
+    double max;
+    double variability;
+  };
+
+  // Non-const, may sort elapsed_.
+  jxl::Status GetSummary(Summary* summary);
+
+  // Sets the image size to allow computing MP/s values.
+  void SetImageSize(size_t xsize, size_t ysize) {
+    xsize_ = xsize;
+    ysize_ = ysize;
+  }
+
+  // Sets the file size to allow computing MB/s values.
+  void SetFileSize(size_t file_size) { file_size_ = file_size; }
+
+  // Calls GetSummary and prints megapixels/sec. SetImageSize() must be called
+  // once before this can be used.
+  jxl::Status Print(size_t worker_threads);
+
+ private:
+  std::vector<double> elapsed_;
+  size_t xsize_ = 0;
+  size_t ysize_ = 0;
+
+  // Size of the source binary file, meaningful when decoding a recompressed
+  // JPEG.
+  size_t file_size_ = 0;
+};
+
+}  // namespace tools
+}  // namespace jpegxl
+
+#endif  // TOOLS_SPEED_STATS_H_
diff --git a/codec/L2/demos/leptonEnc/Makefile b/codec/L2/demos/leptonEnc/Makefile
new file mode 100644
index 0000000000..0898eb0898
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/Makefile
@@ -0,0 +1,331 @@
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to build xclbin application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
+	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u200
+PLATFORM_BLOCKLIST +=  zc
+
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# get global setting
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += -I$(CUR_DIR)/src/ -fmessage-length=0 --sysroot=$(SYSROOT)  -I$(SYSROOT)/usr/include/xrt -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(SYSROOT)/usr/lib -L$(XILINX_VITIS_AIETOOLS)/lib/aarch64.o -Wl,--as-needed -lxilinxopencl -lxrt_coreutil 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+########################## Setting up Host Variables ##########################
+ifeq ($(TARGET),sw_emu)
+CXXFLAGS += -D SW_EMU_TEST
+endif
+ifeq ($(TARGET),hw_emu)
+CXXFLAGS += -D HW_EMU_TEST
+endif
+
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
+#Inclue Required Host Source Files
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/simple_encoder.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/bitops.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/fork_serve.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/thread_handoff.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/socket_serve.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/validation.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/recoder.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/idct.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/jpgcoder.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/jpgcoder_hls.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/uncompressed_components.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/lepton_codec.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/vp8_decoder.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/simple_decoder.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton/vp8_encoder.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/io/ZlibCompression.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/io/Seccomp.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/io/MemReadWriter.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/io/ioutil.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/io/Zlib0.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/util/generic_worker.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/util/memory.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/util/billing.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/util/debug.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/model/JpegArithmeticCoder.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/model/model.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/model/numeric.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/encoder/encoder.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/decoder/decoder.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/encoder/boolwriter.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/decoder/boolreader.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/io/MemMgrAllocator.cc $(XFLIB_DIR)/L2/demos/leptonEnc/host/other/loop_stt.cc $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
+CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/jpegDec -I $(XFLIB_DIR)/L2/include/hw/leptonEnc/jpegDec -I $(XFLIB_DIR)/L2/include/hw/leptonEnc/lepton -I $(XFLIB_DIR)/L2/demos/leptonEnc/kernel -I $(XFLIB_DIR)/L2/demos/leptonEnc/host/lepton -I $(XFLIB_DIR)/L2/demos/leptonEnc/host/other -I $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/util -I $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/model -I $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/encoder -I $(XFLIB_DIR)/L2/demos/leptonEnc/host/vp8/decoder -I $(XFLIB_DIR)/../utils/L1/include/xf_utils_hw -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
+CXXFLAGS += -std=c++14 -fPIC -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label -pthread -L/usr/lib64/ -lcrypto -lz -msse4.2 -DUSE_STANDARD_MEMORY_ALLOCATORS -DUSE_SYSTEM_DEPENDENCIES -DUSE_SYSTEM_LIBRARIES -DHIGH_MEMORY
+
+EXE_NAME := host.exe
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
+
+HOST_ARGS :=  -xclbin $(BUILD_DIR)/lepEnc.xclbin images/
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u200.cfg
+VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/demos/leptonEnc/kernel -I $(XFLIB_DIR)/L2/include/hw/leptonEnc/jpegDec -I $(XFLIB_DIR)/L2/include/hw/leptonEnc/lepton -I $(XFLIB_DIR)/../utils/L1/include/xf_utils_hw -I $(XFLIB_DIR)/../utils/L1/include
+
+else 
+VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/demos/leptonEnc/kernel -I $(XFLIB_DIR)/L2/include/hw/leptonEnc/jpegDec -I $(XFLIB_DIR)/L2/include/hw/leptonEnc/lepton -I $(XFLIB_DIR)/../utils/L1/include/xf_utils_hw -I $(XFLIB_DIR)/../utils/L1/include
+
+endif
+
+######################### binary container global settings ##########################
+VPP_FLAGS_lepEnc +=  -D KERNEL_NAME=lepEnc
+VPP_FLAGS_lepEnc += --hls.clock 300000000:lepEnc
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_lepEnc += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_lepEnc += --kernel_frequency 300
+endif
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS += $(BUILD_DIR)/lepEnc.xclbin
+else
+BINARY_CONTAINERS += $(BUILD_DIR)/lepEnc_pkg.$(LINK_TARGET_FMT)
+BINARY_CONTAINERS_PKG += $(BUILD_DIR)/lepEnc.xclbin
+endif
+
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+$(TEMP_DIR)/lepEnc.xo: $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/multi_cu.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/XModified.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/XAcc_model.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/XAcc_jpegdecoder.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/XAcc_jfifparser.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/XAcc_edges.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/XAcc_dc.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/XAcc_common.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/XAcc_arith.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/XAcc_77.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/multi_cu.cpp $(XFLIB_DIR)/L2/demos/leptonEnc/kernel/jpeg_dec_lepton_enc.cpp 
+	$(ECHO) "Compiling Kernel: lepEnc"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_lepEnc) $(VPP_FLAGS) -k lepEnc -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
+BINARY_CONTAINER_lepEnc_OBJS += $(TEMP_DIR)/lepEnc.xo
+BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_lepEnc_OBJS)
+$(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
+	mkdir -p $(BUILD_DIR)
+	$(VPP) -l $(VPP_FLAGS) --temp_dir $(TEMP_DIR) --report_dir $(BUILD_REPORT_DIR)/lepEnc $(VPP_LDFLAGS)  $(VPP_LDFLAGS_lepEnc) $(AIE_LDFLAGS)   -o $@ $^
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_xrt
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+else
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_sysroot
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+############################## Preparing sdcard folder ##############################
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE := $(SYSROOT)/../../uImage
+else
+K_IMAGE := $(SYSROOT)/../../Image
+endif
+RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
+$(RUN_SCRIPT):
+	rm -rf $(RUN_SCRIPT)
+	@echo 'export LD_LIBRARY_PATH=/mnt:/tmp:$(LIBRARY_PATH)' >> $(RUN_SCRIPT)
+ifneq ($(filter sw_emu hw_emu, $(TARGET)),)
+	@echo 'export XCL_EMULATION_MODE=$(TARGET)' >> $(RUN_SCRIPT)
+endif
+	@echo 'export XILINX_VITIS=/mnt' >> $(RUN_SCRIPT)
+	@echo 'export XILINX_XRT=/usr' >> $(RUN_SCRIPT)
+	@echo 'if [ -f platform_desc.txt  ]; then' >> $(RUN_SCRIPT)
+	@echo '        cp platform_desc.txt /etc/xocl.txt' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo './$(EXE_NAME) $(PKG_HOST_ARGS)' >> $(RUN_SCRIPT)
+	@echo 'return_code=$$?' >> $(RUN_SCRIPT)
+	@echo 'if [ $$return_code -ne 0 ]; then' >> $(RUN_SCRIPT)
+	@echo '        echo "ERROR: Embedded host run failed, RC=$$return_code"' >> $(RUN_SCRIPT)
+	@echo 'else' >> $(RUN_SCRIPT)
+	@echo '        echo "INFO: TEST PASSED, RC=0"' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo 'echo "INFO: Embedded host run completed."' >> $(RUN_SCRIPT)
+	@echo 'exit $$return_code' >> $(RUN_SCRIPT)
+DATA_FILE := 
+DATA_DIR := $(CUR_DIR)/images 
+SD_FILES += $(RUN_SCRIPT)
+SD_FILES += $(EXE_FILE)
+SD_FILES += $(EMCONFIG)
+SD_FILES += xrt.ini
+SD_FILES += $(DATA_FILE)# where define DATAFILE in json
+SD_FILES_WITH_PREFIX = $(foreach sd_file,$(SD_FILES), $(if $(filter $(sd_file),$(wildcard $(sd_file))), --package.sd_file $(sd_file)))
+SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
+PACKAGE_FILES := $(BINARY_CONTAINERS)
+PACKAGE_FILES += $(AIE_CONTAINER)
+SD_CARD := $(CUR_DIR)/package_$(TARGET)
+vck190_dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+	@echo "Generating sd_card folder...."
+	mkdir -p $(SD_CARD)
+	chmod a+rx $(BUILD_DIR)/run_script.sh
+ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+vck190_dfx_hw := true
+endif
+endif
+ifeq ($(vck190_dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
+
+.PHONY: sd_card
+sd_card: $(SD_CARD)
+endif
+############################## Setting Essential Checks and Building Rules ##############################
+RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
+RUN_DEPS += $(SD_CARD)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS) 
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#hw
+ifeq ($(TARGET), hw)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	
+else
+	$(ECHO) "Please copy the content of sd_card folder and data to an SD Card and run on the board"
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: clean cleanall emconfig
+emconfig: $(EMCONFIG)
+
+.PHONY: host
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+
+.PHONY: xclbin
+ifeq ($(HOST_ARCH), x86)
+xclbin:  check_vpp check_xrt $(BINARY_CONTAINERS) 
+else
+xclbin:  check_vpp check_sysroot $(BINARY_CONTAINERS) 
+endif
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
+
+clean: cleanh
\ No newline at end of file
diff --git a/codec/L2/demos/leptonEnc/conn_u200.cfg b/codec/L2/demos/leptonEnc/conn_u200.cfg
new file mode 100644
index 0000000000..556034e5d4
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/conn_u200.cfg
@@ -0,0 +1,31 @@
+[connectivity]
+nk=lepEnc:1:lepEnc_0
+#nk=lepEnc:7:lepEnc_0.lepEnc_1.lepEnc_2.lepEnc_3.lepEnc_4.lepEnc_5.lepEnc_6
+sp=lepEnc_0.datainDDR:DDR[0]
+sp=lepEnc_0.arithInfo:DDR[0]
+sp=lepEnc_0.res:DDR[0]
+#sp=lepEnc_1.datainDDR:DDR[0]
+#sp=lepEnc_1.arithInfo:DDR[0]
+#sp=lepEnc_1.res:DDR[0]
+#sp=lepEnc_2.datainDDR:DDR[0]
+#sp=lepEnc_2.arithInfo:DDR[0]
+#sp=lepEnc_2.res:DDR[0]
+#sp=lepEnc_3.datainDDR:DDR[1]
+#sp=lepEnc_3.arithInfo:DDR[1]
+#sp=lepEnc_3.res:DDR[1]
+#sp=lepEnc_4.datainDDR:DDR[2]
+#sp=lepEnc_4.arithInfo:DDR[2]
+#sp=lepEnc_4.res:DDR[2]
+#sp=lepEnc_5.datainDDR:DDR[2]
+#sp=lepEnc_5.arithInfo:DDR[2]
+#sp=lepEnc_5.res:DDR[2]
+#sp=lepEnc_6.datainDDR:DDR[2]
+#sp=lepEnc_6.arithInfo:DDR[2]
+#sp=lepEnc_6.res:DDR[2]
+slr=lepEnc_0:SLR0
+#slr=lepEnc_1:SLR0
+#slr=lepEnc_2:SLR0
+#slr=lepEnc_3:SLR1
+#slr=lepEnc_4:SLR2
+#slr=lepEnc_5:SLR2
+#slr=lepEnc_6:SLR2
diff --git a/codec/L2/demos/leptonEnc/description.json b/codec/L2/demos/leptonEnc/description.json
new file mode 100644
index 0000000000..aa7764f244
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/description.json
@@ -0,0 +1,157 @@
+{
+    "gui": false, 
+    "name": "Xilinx Lepton Demo", 
+    "description": "", 
+    "flow": "vitis", 
+    "platform_allowlist": [
+        "u200"
+    ], 
+    "platform_blocklist": [
+        "zc"
+    ], 
+    "platform_properties": {
+        "u200": {
+            "v++": {
+                "compiler": {
+                    "clflags": [
+                        "--config PROJECT/conn_u200.cfg"
+                    ]
+                }
+            }
+        }
+    }, 
+    "data": [
+        "./images"
+    ], 
+    "launch": [
+        {
+            "cmd_args": " -xclbin BUILD/lepEnc.xclbin images/", 
+            "name": "generic launch for all flows"
+        }
+    ], 
+    "host": {
+        "host_exe": "host.exe", 
+        "compiler": {
+            "sources": [
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/simple_encoder.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/bitops.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/fork_serve.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/thread_handoff.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/socket_serve.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/validation.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/recoder.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/idct.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/jpgcoder.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/jpgcoder_hls.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/uncompressed_components.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/lepton_codec.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/vp8_decoder.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/simple_decoder.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton/vp8_encoder.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/io/ZlibCompression.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/io/Seccomp.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/io/MemReadWriter.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/io/ioutil.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/io/Zlib0.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/util/generic_worker.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/util/memory.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/util/billing.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/util/debug.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/model/JpegArithmeticCoder.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/model/model.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/model/numeric.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/encoder/encoder.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/decoder/decoder.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/encoder/boolwriter.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/decoder/boolreader.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/io/MemMgrAllocator.cc",
+                "LIB_DIR/L2/demos/leptonEnc/host/other/loop_stt.cc",
+                "LIB_DIR/ext/xcl2/xcl2.cpp"
+            ], 
+            "includepaths": [
+                "LIB_DIR/L2/include/hw/jpegDec",
+                "LIB_DIR/L2/include/hw/leptonEnc/jpegDec",
+                "LIB_DIR/L2/include/hw/leptonEnc/lepton",
+                "LIB_DIR/L2/demos/leptonEnc/kernel",
+                "LIB_DIR/L2/demos/leptonEnc/host/lepton",
+                "LIB_DIR/L2/demos/leptonEnc/host/other",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/util",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/model",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/encoder",
+                "LIB_DIR/L2/demos/leptonEnc/host/vp8/decoder",
+                "LIB_DIR/../utils/L1/include/xf_utils_hw",
+                "LIB_DIR/../utils/L1/include",
+                "LIB_DIR/ext/xcl2"
+            ], 
+            "options": "-std=c++14 -fPIC -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label -pthread -L/usr/lib64/ -lcrypto -lz -msse4.2 -DUSE_STANDARD_MEMORY_ALLOCATORS -DUSE_SYSTEM_DEPENDENCIES -DUSE_SYSTEM_LIBRARIES -DHIGH_MEMORY"
+        }
+    }, 
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/L2/demos/leptonEnc/kernel",
+                "LIB_DIR/L2/include/hw/leptonEnc/jpegDec",
+                "LIB_DIR/L2/include/hw/leptonEnc/lepton",
+                "LIB_DIR/../utils/L1/include/xf_utils_hw",
+                "LIB_DIR/../utils/L1/include"
+            ]
+        } 
+    }, 
+    "containers": [
+        {
+            "name": "leptonEnc",
+            "accelerators": [
+                {
+                    "location": "LIB_DIR/L2/demos/leptonEnc/kernel/multi_cu.cpp",
+                    "files":[
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/jpeg_dec_lepton_enc.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/multi_cu.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/XAcc_77.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/XAcc_arith.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/XAcc_common.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/XAcc_dc.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/XAcc_edges.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/XAcc_jfifparser.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/XAcc_jpegdecoder.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/XAcc_model.cpp",
+                        "LIB_DIR/L2/demos/leptonEnc/kernel/XModified.cpp"
+                    ],
+                    "frequency": 300.0,
+                    "clflags": " -D KERNEL_NAME=lepEnc",
+                    "name": "lepEnc"
+                } 
+            ],
+            "frequency": 300,
+            "name": "lepEnc"
+        }
+    ], 
+    "testinfo": {
+        "disable": false, 
+        "jobs": [
+            {
+                "index": 0, 
+                "dependency": [], 
+                "env": "", 
+                "cmd": "", 
+                "max_memory_MB": {
+                    "vitis_hw_build": 409600, 
+                    "vitis_hw_emu": 286720, 
+                    "vitis_sw_emu": 102400, 
+                    "vitis_hw_run": 102400
+                }, 
+                "max_time_min": {
+                    "vitis_hw_build": 800, 
+                    "vitis_hw_emu": 300, 
+                    "vitis_sw_emu": 60, 
+                    "vitis_hw_run": 10
+                }
+            }
+        ], 
+        "targets": [
+            "vitis_sw_emu", 
+            "vitis_hw_emu", 
+            "vitis_hw"
+        ], 
+        "category": "canary"
+    }
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/Allocator.hh b/codec/L2/demos/leptonEnc/host/io/Allocator.hh
new file mode 100644
index 0000000000..fabd5a06b3
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/Allocator.hh
@@ -0,0 +1,148 @@
+/*  Sirikata Jpeg Memory Allocator -- Texture Transfer management system
+ *  main.cpp
+ *
+ *  Copyright (c) 2015, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _SIRIKATA_JPEG_ARHC_ALLOCATOR_HPP_
+#define _SIRIKATA_JPEG_ARHC_ALLOCATOR_HPP_
+#include <stdlib.h>
+#include "DecoderPlatform.hh"
+namespace Sirikata {
+
+template <class T>
+class JpegAllocator {
+    typedef std::true_type propagate_on_container_move_assignment;
+    typedef std::true_type propagate_on_container_swap;
+    template <class U>
+    friend class JpegAllocator;
+    typedef void*(CustomAllocate)(void* opaque, size_t nmemb, size_t size);
+    typedef void(CustomDeallocate)(void* opaque, void* ptr);
+    // the required functions for lzham (note the requirement of opaque ptr at the end)
+    typedef void*(CustomReallocate)(void* ptr, size_t size, size_t* actualSize, unsigned int movable, void* opaque);
+    typedef size_t(CustomMsize)(void* ptr, void* opaque);
+    CustomAllocate* custom_allocate;
+    CustomDeallocate* custom_deallocate;
+    CustomReallocate* custom_reallocate;
+    CustomMsize* custom_msize;
+    void* opaque;
+    static void* malloc_wrapper(void*, size_t nmemb, size_t size) { return custom_malloc(nmemb * size); }
+    static void free_wrapper(void*, void* ptr) { custom_free(ptr); }
+
+   public:
+    template <class U>
+    bool operator==(const JpegAllocator<U>& other) const {
+        return custom_allocate == other.custom_allocate && custom_deallocate == other.custom_deallocate &&
+               opaque == other.opaque;
+    }
+    template <class U>
+    bool operator!=(const JpegAllocator<U>& other) const {
+        return !((*this) == other);
+    }
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    typedef T* pointer;
+    typedef const T* const_pointer;
+    typedef T& reference;
+    typedef const T& const_reference;
+    typedef T value_type;
+    CustomAllocate* get_custom_allocate() const { return custom_allocate; }
+    CustomDeallocate* get_custom_deallocate() const { return custom_deallocate; }
+    CustomReallocate* get_custom_reallocate() const { return custom_reallocate; }
+    CustomMsize* get_custom_msize() const { return custom_msize; }
+    void* get_custom_state() const { return opaque; }
+    /// starts up with malloc/free implementation
+    JpegAllocator() throw() {
+        custom_allocate = &malloc_wrapper;
+        custom_deallocate = &free_wrapper;
+        custom_reallocate = NULL;
+        custom_msize = NULL;
+        opaque = NULL;
+    }
+    template <class U>
+    struct rebind {
+        typedef JpegAllocator<U> other;
+    };
+    JpegAllocator(const JpegAllocator& other) throw() {
+        custom_allocate = other.custom_allocate;
+        custom_deallocate = other.custom_deallocate;
+        custom_reallocate = other.custom_reallocate;
+        custom_msize = other.custom_msize;
+        opaque = other.opaque;
+    }
+    template <typename U>
+    JpegAllocator(const JpegAllocator<U>& other) throw() {
+        custom_allocate = other.custom_allocate;
+        custom_deallocate = other.custom_deallocate;
+        custom_reallocate = other.custom_reallocate;
+        custom_msize = other.custom_msize;
+        opaque = other.opaque;
+    }
+    ~JpegAllocator() throw() {}
+
+    // this sets up the memory subsystem with the arg for this and all copied allocators
+    void setup_memory_subsystem(size_t arg,
+                                unsigned char alignment,
+                                void*(custom_init)(size_t prealloc_size, unsigned char alignment),
+                                CustomAllocate* custom_allocate,
+                                CustomDeallocate* custom_deallocate,
+                                CustomReallocate* custom_reallocate,
+                                CustomMsize* custom_msize) {
+        this->opaque = custom_init(arg, alignment);
+        this->custom_allocate = custom_allocate;
+        this->custom_deallocate = custom_deallocate;
+        this->custom_reallocate = custom_reallocate;
+        this->custom_msize = custom_msize;
+    }
+    // this tears down all users of this memory subsystem
+    void teardown_memory_subsystem(void (*custom_deinit)(void* opaque)) {
+        (*custom_deinit)(opaque);
+        opaque = NULL;
+    }
+
+    pointer allocate(size_type s, void const* = 0) {
+        if (0 == s) return NULL;
+        pointer temp = (pointer)(*custom_allocate)(opaque, 1, s * sizeof(T));
+        if (temp == NULL) {
+#ifdef __EXCEPTIONS
+
+            throw std::bad_alloc();
+#else
+            custom_exit(ExitCode::TOO_MUCH_MEMORY_NEEDED);
+#endif
+        }
+        return temp;
+    }
+
+    void deallocate(pointer p, size_type) { (*custom_deallocate)(opaque, p); }
+
+    size_type max_size() const throw() { return 0xffffffff / sizeof(T); }
+
+    void construct(pointer p, const T& val) { new ((void*)p) T(val); }
+
+    void destroy(pointer p) { p->~T(); }
+};
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/io/BoundedMemWriter.hh b/codec/L2/demos/leptonEnc/host/io/BoundedMemWriter.hh
new file mode 100644
index 0000000000..29c82008e1
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/BoundedMemWriter.hh
@@ -0,0 +1,53 @@
+#include "Reader.hh"
+#include "MuxReader.hh"
+namespace Sirikata {
+class SIRIKATA_EXPORT BoundedMemWriter : public Sirikata::DecoderWriter {
+    MuxReader::ResizableByteBuffer mBuffer;
+    size_t mWriteCursor;
+    size_t mNumBytesAttemptedToWrite;
+
+   public:
+    BoundedMemWriter(const JpegAllocator<uint8_t>& alloc = JpegAllocator<uint8_t>()) : mBuffer(alloc) {
+        mWriteCursor = 0;
+        mNumBytesAttemptedToWrite = 0;
+    }
+    size_t get_bound() const { return mBuffer.size(); }
+    void set_bound(size_t bound) {
+        mBuffer.resize(bound);
+        mWriteCursor = std::min(bound, mWriteCursor);
+    }
+    void Reset() {
+        mWriteCursor = 0;
+        mNumBytesAttemptedToWrite = 0;
+    }
+    void Close() {
+        mWriteCursor = 0;
+        mNumBytesAttemptedToWrite = 0;
+    }
+    virtual std::pair<Sirikata::uint32, Sirikata::JpegError> Write(const Sirikata::uint8* data, unsigned int size) {
+        mNumBytesAttemptedToWrite += size;
+        unsigned int bounded_size = 0;
+        if (mBuffer.size() > mWriteCursor) {
+            bounded_size = (unsigned int)std::min((size_t)size, mBuffer.size() - mWriteCursor);
+            memcpy(&mBuffer[mWriteCursor], data, bounded_size);
+        }
+        Sirikata::JpegError err = Sirikata::JpegError::nil();
+        if (bounded_size != size) {
+            err = Sirikata::JpegError::errEOF();
+        }
+        mWriteCursor += bounded_size;
+        return std::pair<Sirikata::uint32, Sirikata::JpegError>(bounded_size, err);
+    }
+    MuxReader::ResizableByteBuffer& buffer() { return mBuffer; }
+    size_t bytes_written() const { return mWriteCursor; }
+    const MuxReader::ResizableByteBuffer& buffer() const { return mBuffer; }
+    bool has_exceeded_bound() const { // equivalent to an EOF...needs a write
+        return mBuffer.size() < mNumBytesAttemptedToWrite;
+    }
+    bool has_reached_bound() const { // equivalent to an EOF...needs a write
+        return mBuffer.size() <= mNumBytesAttemptedToWrite;
+    }
+    void write(const void* data, unsigned int size) { Write((const Sirikata::uint8*)data, size); }
+    void write_byte(Sirikata::uint8 data) { Write(&data, 1); }
+};
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/BufferedIO.hh b/codec/L2/demos/leptonEnc/host/io/BufferedIO.hh
new file mode 100644
index 0000000000..e24237c933
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/BufferedIO.hh
@@ -0,0 +1,102 @@
+#include "Reader.hh"
+
+namespace Sirikata {
+
+template <uint32_t bufferSize>
+class BufferedReader : public DecoderReader {
+    uint8_t* mOffset;
+    DecoderReader* mBase;
+    uint8_t mBuffer[bufferSize];
+    uint8_t* end() { return mBuffer + bufferSize; }
+
+   public:
+    BufferedReader(DecoderReader* base) {
+        mBase = base;
+        mOffset = end();
+    }
+    void init(DecoderReader* base) { mBase = base; }
+    std::pair<uint32, JpegError> Read(uint8* data, unsigned int size) {
+        uint32_t remaining = end() - mOffset;
+        if (!remaining) {
+            if (size >= (bufferSize >> 1)) {
+                return mBase->Read(data, size); // buffering won't help much here
+            }
+            std::pair<uint32, JpegError> hasRead = mBase->Read(mBuffer, bufferSize);
+            if (!hasRead.first) {
+                return hasRead;
+            }
+            mOffset = end() - hasRead.first;
+            if (hasRead.first < bufferSize) {
+                memmove(mOffset, mBuffer, hasRead.first);
+            }
+            remaining = hasRead.first;
+            if (hasRead.second) {
+                hasRead.first = 0;
+                return hasRead;
+            }
+        }
+        uint32_t toRead = std::min(size, remaining);
+        memcpy(data, mOffset, toRead);
+        mOffset += toRead;
+        return std::pair<uint32, JpegError>(toRead, JpegError::nil());
+    }
+    ~BufferedReader() {}
+};
+template <uint32_t bufferSize>
+class BufferedWriter {
+    uint8_t* mOffset;
+    uint8_t mBuffer[bufferSize];
+    DecoderWriter* mBase;
+    // writers are guaranteed to consume full data or error
+    std::pair<uint32, JpegError> WriteFull(const uint8* data, unsigned int size) { return mBase->Write(data, size); }
+
+   public:
+    BufferedWriter(DecoderWriter* base) {
+        mBase = base;
+        mOffset = mBuffer;
+    }
+    void init(DecoderWriter* base) { mBase = base; }
+    std::pair<uint32, JpegError> Write(const uint8* data, unsigned int size) {
+        if (size > bufferSize) {
+            std::pair<uint32, JpegError> retval = WriteFull(mBuffer, mOffset - mBuffer);
+            mOffset = mBuffer;
+            if (retval.second != JpegError::nil()) {
+                return std::pair<uint32, JpegError>(0, retval.second);
+            }
+            return WriteFull(data, size);
+        }
+        uint32 origSize = size;
+        uint32 bytesLeft = bufferSize - (mOffset - mBuffer);
+        uint32 toWrite = std::min(size, bytesLeft);
+        memcpy(mOffset, data, toWrite);
+        mOffset += toWrite;
+        if (toWrite == bytesLeft) {
+            std::pair<uint32, JpegError> retval = WriteFull(mBuffer, bufferSize);
+            if (retval.second != JpegError::nil()) {
+                if (retval.first > bytesLeft) {
+                    retval.first = retval.first - bytesLeft;
+                    return retval;
+                }
+                retval.first = 0;
+                return retval;
+            }
+            mOffset = mBuffer;
+        }
+        data += toWrite;
+        size -= toWrite;
+        if (size) {
+            memcpy(mOffset, data, size);
+            mOffset += size;
+        }
+        return std::pair<uint32, JpegError>(origSize, JpegError::nil());
+    }
+    virtual void Close() {
+        if (mOffset != mBuffer) {
+            std::pair<uint32, JpegError> retval = WriteFull(mBuffer, mOffset - mBuffer);
+            (void)retval;
+            mOffset = mBuffer;
+        }
+    }
+    virtual ~BufferedWriter() { Close(); }
+};
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/DecoderPlatform.hh b/codec/L2/demos/leptonEnc/host/io/DecoderPlatform.hh
new file mode 100644
index 0000000000..3a7e34feec
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/DecoderPlatform.hh
@@ -0,0 +1,74 @@
+/*  Sirikata Jpeg Reader -- Texture Transfer management system
+ *  main.cpp
+ *
+ *  Copyright (c) 2015, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#if defined(__linux) || defined(__APPLE__) || defined(BSD)
+#define SIRIKATA_FUNCTION_EXPORT __attribute__((visibility("default")))
+#define SIRIKATA_EXPORT __attribute__((visibility("default")))
+#define SIRIKATA_PLUGIN_EXPORT __attribute__((visibility("default")))
+#else
+#define SIRIKATA_FUNCTION_EXPORT
+#define SIRIKATA_EXPORT
+#define SIRIKATA_PLUGIN_EXPORT
+#define __builtin_expect(x, y) x
+#endif
+#include <stdint.h>
+#include <stddef.h>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include "../vp8/util/memory.hh"
+#define USE_MMAP
+namespace Sirikata {
+
+typedef int64_t int64;
+typedef uint64_t uint64;
+typedef int32_t int32;
+typedef uint32_t uint32;
+typedef uint16_t uint16;
+typedef int16_t int16;
+typedef uint8_t uint8;
+typedef uint8_t byte;
+typedef int8_t int8;
+}
+#ifndef _DECODER_PLATFORM_HH_
+#define _DECODER_PLATFORM_HH_
+#ifdef _WIN32
+#include <io.h>
+inline int write(int fd, const void* data, unsigned int length) {
+    return _write(fd, data, length);
+}
+inline int read(int fd, void* data, unsigned int length) {
+    return _read(fd, data, length);
+}
+inline int close(int fd) {
+    return _close(fd);
+}
+typedef int ssize_t;
+#endif
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/io/Error.hh b/codec/L2/demos/leptonEnc/host/io/Error.hh
new file mode 100644
index 0000000000..c31970e269
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/Error.hh
@@ -0,0 +1,61 @@
+/*  Sirikata Jpeg Texture Transfer -- Texture Transfer management system
+ *  main.cpp
+ *
+ *  Copyright (c) 2015, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _SIRIKATA_JPEG_ARHC_ERROR_HPP_
+#define _SIRIKATA_JPEG_ARHC_ERROR_HPP_
+
+namespace Sirikata {
+
+class JpegError {
+    explicit JpegError(const char*) : mWhat(ERR_MISC) {}
+
+   public:
+    enum ErrorMessage { NO_ERROR, ERR_EOF, ERR_FF00, ERR_SHORT_HUFFMAN, ERR_MISC } mWhat;
+    static JpegError MakeFromStringLiteralOnlyCallFromMacro(const char*) { return JpegError(ERR_MISC, ERR_MISC); }
+    explicit JpegError(ErrorMessage err, ErrorMessage) : mWhat(err) {}
+    JpegError() : mWhat() { // uses default allocator--but it won't allocate, so that's ok
+        mWhat = NO_ERROR;
+    }
+    const char* what() const {
+        if (mWhat == NO_ERROR) return "";
+        if (mWhat == ERR_EOF) return "EOF";
+        if (mWhat == ERR_FF00) return "MissingFF00";
+        if (mWhat == ERR_SHORT_HUFFMAN) return "ShortHuffman";
+        return "MiscError";
+    }
+    operator bool() { return mWhat != NO_ERROR; }
+    static JpegError nil() { return JpegError(); }
+    static JpegError errEOF() { return JpegError(ERR_EOF, ERR_EOF); }
+    static JpegError errMissingFF00() { return JpegError(ERR_FF00, ERR_FF00); }
+    static JpegError errShortHuffmanData() { return JpegError(ERR_SHORT_HUFFMAN, ERR_SHORT_HUFFMAN); }
+};
+#define MakeJpegError(s) JpegError::MakeFromStringLiteralOnlyCallFromMacro("" s)
+#define JpegErrorUnsupportedError(s) MakeJpegError("unsupported JPEG feature: " s)
+#define JpegErrorFormatError(s) MakeJpegError("unsupported JPEG feature: " s)
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/io/MemMgrAllocator.cc b/codec/L2/demos/leptonEnc/host/io/MemMgrAllocator.cc
new file mode 100644
index 0000000000..6fcae742b0
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/MemMgrAllocator.cc
@@ -0,0 +1,485 @@
+//----------------------------------------------------------------
+// Statically-allocated memory manager
+//
+// by Eli Bendersky (eliben@gmail.com)
+//
+// This code is in the public domain.
+
+/*  Sirikata Memory Management system
+ *
+ *
+ *  Copyright (c) 2015 Eli Bendersky, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _WIN32
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+#include <algorithm>
+#include <cstdint>
+#include "DecoderPlatform.hh"
+#include "MemMgrAllocator.hh"
+#if (defined(__APPLE__) || __cplusplus <= 199711L) && !defined(_WIN32)
+#define THREAD_LOCAL_STORAGE __thread
+#else
+#include <atomic>
+#define THREAD_LOCAL_STORAGE thread_local
+#endif
+
+namespace Sirikata {
+using std::size_t;
+union mem_header_union {
+    typedef char Align[16];
+    struct {
+        // Pointer to the next block in the free list
+        //
+        union mem_header_union* next;
+
+        // Size of the block (in quantas of sizeof(mem_header_t))
+        //
+        size_t size;
+    } s;
+
+    // Used to align headers in memory to a boundary
+    //
+    Align align_dummy;
+};
+
+typedef union mem_header_union mem_header_t;
+size_t min_pool_alloc_quantas = 256;
+
+struct MemMgrState {
+    mem_header_t base;
+    // Start of free list
+    //
+    mem_header_t* freep;
+    // Initial empty list
+    //
+    size_t pool_free_pos;
+    // Static pool for new allocations
+    //
+    uint8_t* pool;
+    size_t pool_size;
+    size_t total_ever_allocated;
+    bool used_calloc;
+};
+size_t memmgr_num_memmgrs = 0;
+MemMgrState* memmgrs = NULL;
+size_t memmgr_bytes_allocated = 0;
+#if __cplusplus <= 199711L && !(defined(_WIN32))
+AtomicValue<size_t> bytes_currently_used(0);
+AtomicValue<size_t> bytes_ever_allocated(0);
+#else
+std::atomic<size_t> bytes_currently_used(0);
+std::atomic<size_t> bytes_ever_allocated(0);
+#endif
+THREAD_LOCAL_STORAGE int memmgr_thread_id_plus_one = 0;
+#if __cplusplus <= 199711L && !defined(_WIN32)
+AtomicValue<int> memmgr_allocated_threads((0));
+#else
+std::atomic<int> memmgr_allocated_threads((0));
+#endif
+MemMgrState& get_local_memmgr() {
+    int id = memmgr_thread_id_plus_one;
+    if (!id) {
+        memmgr_thread_id_plus_one = id = ++memmgr_allocated_threads;
+        //        fprintf( stderr, "memmgr_thread_id_plus_one = %d\n", memmgr_thread_id_plus_one);
+        //        fprintf( stderr, "id = %d\n", id);
+        //        fprintf( stderr, "memmgr_num_memmgrs = %d\n", (int)memmgr_num_memmgrs);
+        if (id > (int)memmgr_num_memmgrs) {
+            assert(false &&
+                   "Too many threads have requested access to memory-managers:"
+                   "init with higher thread count");
+            custom_exit(ExitCode::ASSERTION_FAILURE);
+        }
+    }
+    return memmgrs[id - 1];
+}
+/// caution: need to call this once per thread
+void memmgr_destroy() {
+    memmgr_thread_id_plus_one = 0; // only clears this thread
+    if (memmgrs) {
+#if defined(USE_MMAP) && defined(__linux) // only linux guarantees all zeros
+        if (!memmgrs->used_calloc) {
+            munmap(memmgrs, memmgr_bytes_allocated);
+        } else
+#endif
+        {
+            free(memmgrs);
+        }
+    }
+
+    memmgr_bytes_allocated = 0;
+    memmgr_num_memmgrs = 0;
+    memmgrs = NULL;
+    int last = 0;
+    if (memmgr_allocated_threads.load()) {
+        while ((last = --memmgr_allocated_threads) > 0) { // there needed to be at least one
+        }
+        while (last < 0) {
+            ++memmgr_allocated_threads; // this shouldn't hit
+        }
+    }
+}
+void setup_memmgr(MemMgrState& memmgr, uint8_t* data, size_t size) {
+    memset(&memmgr, 0, sizeof(MemMgrState));
+    memmgr.base.s.next = 0;
+    memmgr.base.s.size = 0;
+    memmgr.freep = 0;
+    memmgr.pool_free_pos = 0;
+    memmgr.pool = data;
+    memmgr.pool_size = size;
+}
+void memmgr_init(size_t main_thread_pool_size,
+                 size_t worker_thread_pool_size,
+                 size_t num_workers,
+                 size_t x_min_pool_alloc_quantas,
+                 bool needs_huge_pages) {
+#ifdef __APPLE__
+    // in apple, the thread_local storage winds up different when destroying the thread
+    num_workers *= 2;
+#endif
+    min_pool_alloc_quantas = x_min_pool_alloc_quantas;
+    memmgr_num_memmgrs = num_workers + 1;
+
+    size_t pool_overhead_size = sizeof(MemMgrState) * (1 + num_workers);
+    size_t total_size = pool_overhead_size + main_thread_pool_size + worker_thread_pool_size * num_workers;
+    uint8_t* data = NULL;
+    bool used_calloc = false;
+#if defined(USE_MMAP) && defined(__linux) // only linux guarantees all zeros
+    if (needs_huge_pages) {
+        data =
+            (uint8_t*)mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+        if (data == MAP_FAILED) {
+            const char* error = "Huge pages unsupported: falling back to ordinary pages\n";
+            int ret = write(2, error, strlen(error));
+            (void)ret;
+        }
+    }
+    if (data == MAP_FAILED || !needs_huge_pages) {
+        data = (uint8_t*)mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        if (data == MAP_FAILED) {
+            perror("mmap");
+            data = NULL;
+        }
+    }
+#endif
+    if (!data) {
+        used_calloc = true;
+        data = (uint8_t*)calloc(total_size, 1);
+    }
+    if (!data) {
+        fprintf(stderr, "Insufficient memory: unable to mmap or calloc %lu bytes\n", (unsigned long)total_size);
+        fflush(stderr);
+        exit(37);
+    }
+    memmgrs = (MemMgrState*)data;
+    memmgrs->used_calloc = used_calloc;
+    memmgr_bytes_allocated = pool_overhead_size + main_thread_pool_size + worker_thread_pool_size * num_workers;
+    data += pool_overhead_size;
+    setup_memmgr(memmgrs[0], data, main_thread_pool_size);
+    data += main_thread_pool_size;
+    for (int i = 0; i < (int)num_workers; ++i) {
+        setup_memmgr(memmgrs[i + 1], data, worker_thread_pool_size);
+        data += worker_thread_pool_size;
+    }
+    always_assert((size_t)(data - (uint8_t*)memmgrs) == total_size);
+
+    MemMgrState& main_thread_state = get_local_memmgr();
+    (void)main_thread_state;
+    always_assert(main_thread_state.pool_size == main_thread_pool_size);
+}
+size_t memmgr_size_allocated() {
+    // MemMgrState& memmgr = get_local_memmgr();
+    // return memmgr.pool_free_pos;
+    return bytes_currently_used.load();
+}
+size_t memmgr_size_left() {
+    fprintf(stderr, "call in memmgr_size_left");
+    MemMgrState& memmgr = get_local_memmgr();
+    return memmgr.pool_size - memmgr.pool_free_pos;
+}
+size_t memmgr_total_size_ever_allocated() {
+    return bytes_ever_allocated.load();
+}
+
+void memmgr_print_stats() {
+    fprintf(stderr, "call in memmgr_print_stats");
+    MemMgrState& memmgr = get_local_memmgr();
+    (void)memmgr;
+#ifdef DEBUG_MEMMGR_SUPPORT_STATS
+    mem_header_t* p;
+
+    printf("------ Memory manager stats ------\n\n");
+    printf("Memmgr.Pool: free_pos = %lu (%lu uint8_ts left)\n\n", memmgr.pool_free_pos,
+           memmgr.pool_size - memmgr.pool_free_pos);
+
+    p = (mem_header_t*)memmgr.pool;
+
+    while (p < (mem_header_t*)(memmgr.pool + memmgr.pool_free_pos)) {
+        printf("  * Addr: 0x%8p; Size: %8lu\n", p, p->s.size);
+
+        p += p->s.size;
+    }
+
+    printf("\nFree list:\n\n");
+
+    if (memmgr.freep) {
+        p = memmgr.freep;
+
+        while (1) {
+            printf("  * Addr: 0x%8p; Size: %8lu; Next: 0x%8p\n", p, p->s.size, p->s.next);
+
+            p = p->s.next;
+
+            if (p == memmgr.freep) break;
+        }
+    } else {
+        printf("Empty\n");
+    }
+
+    printf("\n");
+#endif // DEBUG_MEMMGR_SUPPORT_STATS
+}
+
+static mem_header_t* get_mem_from_pool(MemMgrState& memmgr, size_t nquantas, mem_header_t** blessed_zero) {
+    size_t total_req_size;
+
+    mem_header_t* h;
+
+    if (nquantas < min_pool_alloc_quantas) nquantas = min_pool_alloc_quantas;
+
+    total_req_size = nquantas * sizeof(mem_header_t);
+    // fprintf(stderr, "+%ld\n", total_req_size);
+    if (memmgr.pool_free_pos + total_req_size <= memmgr.pool_size) {
+        h = (mem_header_t*)(memmgr.pool + memmgr.pool_free_pos);
+        h->s.size = nquantas;
+        memmgr_free((void*)(h + 1));
+        memmgr.pool_free_pos += total_req_size;
+        bytes_currently_used += total_req_size;
+    } else {
+        *blessed_zero = NULL;
+        return 0;
+    }
+    *blessed_zero = h;
+    return memmgr.freep;
+}
+
+namespace {
+bool is_zero(const void* data, size_t size) {
+    const char* cdata = (const char*)data;
+    struct Zilch {
+        uint64_t a, b;
+    };
+    Zilch zilch = {0, 0};
+    int retval = 0;
+    size_t i;
+    for (i = 0; i + sizeof(zilch) <= size; i += sizeof(zilch)) {
+        retval |= memcmp(cdata + i, &zilch, sizeof(zilch));
+    }
+    if (i != size) {
+        retval |= memcmp(cdata + i, &zilch, size - i);
+    }
+    return retval == 0;
+}
+}
+// Allocations are done in 'quantas' of header size.
+// The search for a free block of adequate size begins at the point 'memmgr.freep'
+// where the last block was found.
+// If a too-big block is found, it is split and the tail is returned (this
+// way the header of the original needs only to have its size adjusted).
+// The pointer returned to the user points to the free space within the block,
+// which begins one quanta after the header.
+//
+void* memmgr_alloc(size_t nuint8_ts) {
+    MemMgrState& memmgr = get_local_memmgr();
+    mem_header_t* blessed_zero = NULL;
+    mem_header_t* p;
+    mem_header_t* prevp;
+
+    // Calculate how many quantas are required: we need enough to house all
+    // the requested uint8_ts, plus the header. The -1 and +1 are there to make sure
+    // that if nuint8_ts is a multiple of nquantas, we don't allocate too much
+    //
+    size_t nquantas = (nuint8_ts + sizeof(mem_header_t) - 1) / sizeof(mem_header_t) + 1;
+    memmgr.total_ever_allocated += std::max(nquantas, min_pool_alloc_quantas) * sizeof(mem_header_t);
+    bytes_ever_allocated += std::max(nquantas, min_pool_alloc_quantas) * sizeof(mem_header_t);
+    // fprintf(stderr, "A %ld\n", std::max(nquantas, min_pool_alloc_quantas) * sizeof(mem_header_t));
+    // First alloc call, and no free list yet ? Use 'base' for an initial
+    // degenerate block of size 0, which points to itself
+    //
+    if ((prevp = memmgr.freep) == 0) {
+        memmgr.base.s.next = memmgr.freep = prevp = &memmgr.base;
+        memmgr.base.s.size = 0;
+    }
+
+    for (p = prevp->s.next;; prevp = p, p = p->s.next) {
+        // big enough ?
+        if (p->s.size >= nquantas) {
+            // exactly ?
+            if (p->s.size == nquantas) {
+                // just eliminate this block from the free list by pointing
+                // its prev's next to its next
+                //
+                prevp->s.next = p->s.next;
+            } else // too big
+            {
+                p->s.size -= nquantas;
+                p += p->s.size;
+                p->s.size = nquantas;
+            }
+
+            memmgr.freep = prevp;
+            if (blessed_zero == p) {
+                assert(is_zero(p + 1, nuint8_ts) && "The item returned from the new pool must be zero");
+                return p + 1;
+            } else {
+#ifndef _WIN32
+                (void)is_zero;
+#endif
+                return memset((p + 1), 0, nuint8_ts); // this makes sure we always return zero'd data
+            }
+        }
+        // Reached end of free list ?
+        // Try to allocate the block from the memmgr.pool. If that succeeds,
+        // get_mem_from_pool adds the new block to the free list and
+        // it will be found in the following iterations. If the call
+        // to get_mem_from_pool doesn't succeed, we've run out of
+        // memory
+        //
+        else if (p == memmgr.freep) {
+            if ((p = get_mem_from_pool(memmgr, nquantas, &blessed_zero)) == 0) {
+#ifdef DEBUG_MEMMGR_FATAL
+                printf("!! Memory allocation failed !!\n");
+#endif
+#ifdef MEMMGR_EXIT_OOM
+                custom_exit(ExitCode::TOO_MUCH_MEMORY_NEEDED);
+#endif
+
+                return 0;
+            }
+        }
+    }
+}
+
+// Scans the free list, starting at memmgr.freep, looking the the place to insert the
+// free block. This is either between two existing blocks or at the end of the
+// list. In any case, if the block being freed is adjacent to either neighbor,
+// the adjacent blocks are combined.
+//
+void memmgr_free(void* ap) {
+    MemMgrState& memmgr = get_local_memmgr();
+    if ((uint8_t*)ap >= memmgr.pool + memmgr.pool_size || (uint8_t*)ap < memmgr.pool) {
+// illegal address or on another thread.
+#ifdef DEBUG_MEMMGR_FATAL
+        fprintf(stderr, "Memory freed on another thread than it was allocated on\n");
+#endif
+        return;
+    }
+    mem_header_t* block;
+    mem_header_t* p;
+
+    // acquire pointer to block header
+    block = ((mem_header_t*)ap) - 1;
+
+    // Find the correct place to place the block in (the free list is sorted by
+    // address, increasing order)
+    //
+    for (p = memmgr.freep; !(block > p && block < p->s.next); p = p->s.next) {
+        // Since the free list is circular, there is one link where a
+        // higher-addressed block points to a lower-addressed block.
+        // This condition checks if the block should be actually
+        // inserted between them
+        //
+        if (p >= p->s.next && (block > p || block < p->s.next)) break;
+    }
+
+    // Try to combine with the higher neighbor
+    //
+    if (block + block->s.size == p->s.next) {
+        block->s.size += p->s.next->s.size;
+        block->s.next = p->s.next->s.next;
+    } else {
+        block->s.next = p->s.next;
+    }
+
+    // Try to combine with the lower neighbor
+    //
+    if (p + p->s.size == block) {
+        p->s.size += block->s.size;
+        p->s.next = block->s.next;
+    } else {
+        p->s.next = block;
+    }
+
+    memmgr.freep = p;
+}
+
+void* MemMgrAllocatorMalloc(void* opaque, size_t nmemb, size_t size) {
+    return memmgr_alloc(nmemb * size);
+}
+void MemMgrAllocatorFree(void* opaque, void* ptr) {
+    memmgr_free(ptr);
+}
+void* MemMgrAllocatorInit(
+    size_t prealloc_size, size_t worker_size, size_t num_workers, unsigned char alignment, bool needs_huge_pages) {
+    memmgr_init(prealloc_size, worker_size, num_workers, 256, needs_huge_pages);
+    fprintf(stderr, "call in MemMgrAllocatorInit\n");
+    return memmgr_alloc(1);
+}
+void MemMgrAllocatorDestroy(void* opaque) {
+    memmgr_free(opaque);
+    memmgr_destroy();
+}
+size_t MemMgrAllocatorMsize(void* ptr, void* opaque) {
+    mem_header_t* block = ((mem_header_t*)ptr) - 1;
+    return block->s.size * sizeof(mem_header_t);
+}
+void* MemMgrAllocatorRealloc(void* ptr, size_t amount, size_t* ret_size, unsigned int movable, void* opaque) {
+    if (amount == 0) {
+        memmgr_free(ptr);
+        return NULL;
+    }
+    size_t ptr_actual_size = 0;
+    if (ptr) {
+        ptr_actual_size = MemMgrAllocatorMsize(ptr, opaque);
+        if (ptr_actual_size >= amount) {
+            *ret_size = ptr_actual_size;
+            return ptr;
+        }
+        if (!movable) {
+            return NULL;
+        }
+    }
+
+    void* retval = memmgr_alloc(amount);
+    *ret_size = MemMgrAllocatorMsize(retval, opaque);
+    if (ptr) {
+        memcpy(retval, ptr, std::min(amount, ptr_actual_size));
+        memmgr_free(ptr);
+    }
+    return retval;
+}
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/MemMgrAllocator.hh b/codec/L2/demos/leptonEnc/host/io/MemMgrAllocator.hh
new file mode 100644
index 0000000000..37e59c15b5
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/MemMgrAllocator.hh
@@ -0,0 +1,82 @@
+//----------------------------------------------------------------
+// Statically-allocated memory manager
+//
+// by Eli Bendersky (eliben@gmail.com)
+//
+// This code is in the public domain.
+
+/*  Sirikata Memory Management system
+ *
+ *
+ *  Copyright (c) 2015 Eli Bendersky, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _MEM_MGR_ALLOCATOR_HH_
+#define _MEM_MGR_ALLOCATOR_HH_
+
+#include "DecoderPlatform.hh"
+namespace Sirikata {
+// Initialize the memory manager. This function should be called
+// exactly once per thread that wishes to allocate memory
+//
+SIRIKATA_FUNCTION_EXPORT void memmgr_init(size_t main_thread_size,
+                                          size_t worker_thread_size,
+                                          size_t num_workers,
+                                          size_t min_pool_alloc_quantas = 256,
+                                          bool needs_huge_pages = false);
+
+// Uninitialize the memory manager. This function should be called
+// exactly once per thread that exits
+SIRIKATA_FUNCTION_EXPORT void memmgr_destroy();
+
+// 'malloc' clone
+//
+SIRIKATA_FUNCTION_EXPORT void* memmgr_alloc(size_t nbytes);
+
+// 'free' clone
+//
+SIRIKATA_FUNCTION_EXPORT void memmgr_free(void* ap);
+
+// Prints statistics about the current state of the memory
+// manager
+//
+SIRIKATA_FUNCTION_EXPORT void memmgr_print_stats();
+SIRIKATA_FUNCTION_EXPORT size_t memmgr_size_allocated();
+SIRIKATA_FUNCTION_EXPORT size_t memmgr_total_size_ever_allocated();
+SIRIKATA_FUNCTION_EXPORT size_t memmgr_size_left();
+}
+namespace Sirikata {
+SIRIKATA_FUNCTION_EXPORT void* MemMgrAllocatorMalloc(void* opaque, size_t nmemb, size_t size);
+SIRIKATA_FUNCTION_EXPORT void MemMgrAllocatorFree(void* opaque, void* ptr);
+SIRIKATA_FUNCTION_EXPORT void* MemMgrAllocatorInit(size_t prealloc_size,
+                                                   size_t worker_size,
+                                                   size_t num_workers,
+                                                   unsigned char alignment);
+SIRIKATA_FUNCTION_EXPORT void MemMgrAllocatorDestroy(void* opaque);
+SIRIKATA_FUNCTION_EXPORT void* MemMgrAllocatorRealloc(
+    void* ptr, size_t size, size_t* actualSize, unsigned int movable, void* opaque);
+SIRIKATA_FUNCTION_EXPORT size_t MemMgrAllocatorMsize(void* ptr, void* opaque);
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/io/MemReadWriter.cc b/codec/L2/demos/leptonEnc/host/io/MemReadWriter.cc
new file mode 100644
index 0000000000..4a63abd12b
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/MemReadWriter.cc
@@ -0,0 +1,32 @@
+#include "MemReadWriter.hh"
+namespace Sirikata {
+std::pair<Sirikata::uint32, Sirikata::JpegError> MemReadWriter::Write(const Sirikata::uint8* data, unsigned int size) {
+    using namespace Sirikata;
+    mBuffer.insert(mBuffer.begin() + mWriteCursor, data, data + size);
+    mWriteCursor += size;
+    return std::pair<Sirikata::uint32, JpegError>(size, JpegError());
+}
+std::pair<Sirikata::uint32, Sirikata::JpegError> MemReadWriter::Read(Sirikata::uint8* data, unsigned int size) {
+    using namespace Sirikata;
+    size_t bytesLeft = mBuffer.size() - mReadCursor;
+    size_t actualBytesRead = size;
+    if (bytesLeft < size) {
+        actualBytesRead = bytesLeft;
+    }
+    if (actualBytesRead > 0) {
+        memcpy(data, &mBuffer[mReadCursor], actualBytesRead);
+    }
+    mReadCursor += actualBytesRead;
+    JpegError err = JpegError();
+    if (actualBytesRead == 0) {
+        err = JpegError::errEOF();
+    }
+    // fprintf(stderr, "%d READ %02x%02x%02x%02x - %02x%02x%02x%02x\n", (uint32)actualBytesRead, data[0],
+    // data[1],data[2], data[3],
+    //        data[actualBytesRead-4],data[actualBytesRead-3],data[actualBytesRead-2],data[actualBytesRead-1]);
+
+    //	The size_t -> Sirikata::uint32 cast is safe because sizeof(size) is <= sizeof(Sirikata::uint32)
+    std::pair<Sirikata::uint32, JpegError> retval(static_cast<Sirikata::uint32>(actualBytesRead), err);
+    return retval;
+}
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/MemReadWriter.hh b/codec/L2/demos/leptonEnc/host/io/MemReadWriter.hh
new file mode 100644
index 0000000000..a78044dd2e
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/MemReadWriter.hh
@@ -0,0 +1,32 @@
+#include "Reader.hh"
+namespace Sirikata {
+class SIRIKATA_EXPORT MemReadWriter : public Sirikata::DecoderWriter, public Sirikata::DecoderReader {
+    std::vector<Sirikata::uint8, JpegAllocator<uint8_t> > mBuffer;
+    size_t mReadCursor;
+    size_t mWriteCursor;
+
+   public:
+    MemReadWriter(const JpegAllocator<uint8_t>& alloc) : mBuffer(alloc) {
+        mReadCursor = 0;
+        mWriteCursor = 0;
+    }
+    void Close() {
+        mReadCursor = 0;
+        mWriteCursor = 0;
+    }
+    void SwapIn(std::vector<Sirikata::uint8, JpegAllocator<uint8_t> >& buffer, size_t offset) {
+        mReadCursor = offset;
+        mWriteCursor = buffer.size();
+        buffer.swap(mBuffer);
+    }
+    void CopyIn(const std::vector<Sirikata::uint8, JpegAllocator<uint8_t> >& buffer, size_t offset) {
+        mReadCursor = offset;
+        mWriteCursor = buffer.size();
+        mBuffer = buffer;
+    }
+    virtual std::pair<Sirikata::uint32, Sirikata::JpegError> Write(const Sirikata::uint8* data, unsigned int size);
+    virtual std::pair<Sirikata::uint32, Sirikata::JpegError> Read(Sirikata::uint8* data, unsigned int size);
+    std::vector<Sirikata::uint8, JpegAllocator<uint8_t> >& buffer() { return mBuffer; }
+    const std::vector<Sirikata::uint8, JpegAllocator<uint8_t> >& buffer() const { return mBuffer; }
+};
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/MuxReader.hh b/codec/L2/demos/leptonEnc/host/io/MuxReader.hh
new file mode 100644
index 0000000000..be7af1db4c
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/MuxReader.hh
@@ -0,0 +1,422 @@
+/*  Sirikata Jpeg Reader -- Texture Transfer management system
+ *  MuxReader.hpp
+ *
+ *  Copyright (c) 2015, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _SIRIKATA_MUX_READER_HPP_
+#define _SIRIKATA_MUX_READER_HPP_
+#include <assert.h>
+#include <algorithm>
+#include "Allocator.hh"
+#include "Error.hh"
+#include "Reader.hh"
+namespace Sirikata {
+
+class SIRIKATA_EXPORT MuxReader {
+   public:
+    class SIRIKATA_EXPORT ResizableByteBuffer {
+       public:
+        uint8_t* mBegin;
+        size_t mSize;
+        size_t mReserved;
+        JpegAllocator<uint8_t> mAlloc;
+        ResizableByteBuffer(const ResizableByteBuffer& other);
+        ResizableByteBuffer& operator=(const ResizableByteBuffer& other);
+
+       public:
+        typedef uint8_t* iterator;
+        typedef const uint8_t* const_iterator;
+        ResizableByteBuffer(const JpegAllocator<uint8_t>& alloc = JpegAllocator<uint8_t>()) : mAlloc(alloc) {
+            mBegin = NULL;
+            mSize = 0;
+            mReserved = 0;
+        }
+        void swap(ResizableByteBuffer& other) {
+            std::swap(mBegin, other.mBegin);
+            std::swap(mSize, other.mSize);
+            std::swap(mReserved, other.mReserved);
+            std::swap(mAlloc, other.mAlloc);
+        }
+        uint8_t& operator[](const size_t offset) {
+            assert(offset < mSize);
+            return mBegin[offset];
+        }
+        uint8_t operator[](const size_t offset) const {
+            assert(offset < mSize);
+            return mBegin[offset];
+        }
+        uint8_t* data() { return mBegin; }
+        uint8_t* begin() { return mBegin; }
+        uint8_t* end() { return mBegin + mSize; }
+        const uint8_t* data() const { return mBegin; }
+        size_t size() const { return mSize; }
+        JpegAllocator<uint8_t> get_allocator() { return mAlloc; }
+        void set_allocator(const JpegAllocator<uint8_t>& new_alloc) {
+            assert(mReserved == 0);
+            mAlloc = new_alloc;
+        }
+        size_t how_much_reserved() const { return mReserved; }
+        void reserve(size_t new_reserved) {
+            if (new_reserved > mReserved) {
+                mReserved = new_reserved;
+                uint8_t* new_begin = (uint8_t*)mAlloc.allocate(mReserved);
+                if (mBegin != NULL) {
+                    memcpy(new_begin, mBegin, mSize);
+                    mAlloc.destroy(mBegin);
+                }
+                mBegin = new_begin;
+            }
+        }
+        void resize(size_t new_size) {
+            if (mReserved < new_size) {
+                mReserved *= 2;
+                if (mReserved < new_size) {
+                    mReserved = new_size;
+                }
+                uint8_t* new_begin = (uint8_t*)mAlloc.allocate(mReserved);
+                if (mBegin != NULL) {
+                    memcpy(new_begin, mBegin, mSize);
+                    mAlloc.destroy(mBegin);
+                }
+                mBegin = new_begin;
+            }
+            assert(mSize <= mReserved);
+            mSize = new_size;
+        }
+        ~ResizableByteBuffer() {
+            if (mBegin) {
+                mAlloc.destroy(mBegin);
+            }
+        }
+    };
+
+   private:
+    typedef Sirikata::DecoderReader Reader;
+    Reader* mReader;
+    static JpegError ReadFull(Reader* r, uint8_t* buffer, uint32_t len) {
+        while (len != 0) {
+            std::pair<uint32, JpegError> ret = r->Read(buffer, len);
+            if (ret.first == 0) {
+                assert(ret.second != JpegError::nil() && "Read of 0 bytes == error");
+                return ret.second; // must have error
+            }
+            buffer += ret.first;
+            len -= ret.first;
+        }
+        return JpegError::nil();
+    }
+
+    JpegError fillBufferOnce(ResizableByteBuffer& incomingBuffer) {
+        uint8_t header[4] = {0, 0, 0};
+        JpegError err = ReadFull(mReader, header, 3);
+        if (err != JpegError::nil()) {
+            return err;
+        }
+        uint8_t stream_id = 0xf & header[0];
+        assert(stream_id < MAX_STREAM_ID && "Stream Id Must be within range");
+        if (stream_id >= MAX_STREAM_ID) {
+            return JpegError::errMissingFF00();
+        }
+        ResizableByteBuffer* buffer = &mBuffer[stream_id];
+        uint8_t flags = (header[0] >> 4) & 3;
+        size_t offset = buffer->size();
+        uint32_t len;
+        if (flags == 0) {
+            len = header[2];
+            len *= 0x100;
+            len += header[1] + 1;
+            buffer->resize(offset + len);
+        } else {
+            len = (1024 << (2 * flags));
+            buffer->resize(offset + len);
+            (*buffer)[offset] = header[1];
+            (*buffer)[offset + 1] = header[2];
+            len -= 2;
+            offset += 2;
+        }
+        JpegError ret = ReadFull(mReader, buffer->data() + offset, len);
+        if (ret == JpegError::nil()) {
+            if (flags == 0) {
+                mOverhead += 3;
+            } else {
+                mOverhead += 1;
+            }
+        }
+        return ret;
+    }
+
+   public:
+    enum { MAX_STREAM_ID = 16 };
+    ResizableByteBuffer mBuffer[MAX_STREAM_ID];
+    uint32_t mOffset[MAX_STREAM_ID];
+    bool eof;
+    size_t mOverhead;
+    MuxReader(const JpegAllocator<uint8_t>& alloc,
+              int num_stream_hint = 4,
+              int stream_hint_reserve_size = 65536,
+              Reader* reader = NULL)
+        : mReader(reader) {
+        eof = false;
+        for (int i = 0; i < MAX_STREAM_ID; ++i) { // assign a better allocator
+            mBuffer[i].set_allocator(alloc);
+            if (i < num_stream_hint) {
+                mBuffer[i].reserve(stream_hint_reserve_size); // prime some of the vectors
+            }
+            mOffset[i] = 0;
+        }
+        mOverhead = 0;
+    }
+    void init(Reader* reader) { mReader = reader; }
+    void fillBufferEntirely(std::pair<ResizableByteBuffer::const_iterator, ResizableByteBuffer::const_iterator>* ret) {
+        bool all_error = false;
+        ResizableByteBuffer ib;
+        while (!all_error) {
+            all_error = true;
+            for (int i = 0; i < MAX_STREAM_ID; ++i) {
+                if (fillBufferOnce(ib) == JpegError::nil()) {
+                    all_error = false;
+                }
+            }
+        }
+        for (int i = 0; i < MAX_STREAM_ID; ++i) {
+            ret[i].first = mBuffer[i].begin() + mOffset[i];
+            ret[i].second = mBuffer[i].end();
+        }
+    }
+    JpegError fillBufferUntil(uint8_t desired_stream_id) {
+        if (eof) {
+            return JpegError::errEOF();
+        }
+        assert(mOffset[desired_stream_id] == mBuffer[desired_stream_id].size());
+        mOffset[desired_stream_id] = 0;
+        ResizableByteBuffer incomingBuffer(mBuffer[desired_stream_id].get_allocator());
+        incomingBuffer.swap(mBuffer[desired_stream_id]);
+        do {
+            JpegError err = JpegError::nil();
+            if ((err = fillBufferOnce(incomingBuffer)) != JpegError::nil()) {
+                return err;
+            }
+        } while (mOffset[desired_stream_id] == mBuffer[desired_stream_id].size());
+        return JpegError::nil();
+    }
+    std::pair<uint32, JpegError> Read(uint8_t stream_id, uint8* data, unsigned int size) {
+        assert(stream_id < MAX_STREAM_ID && "Invalid stream Id; must be less than 16");
+        std::pair<uint32, JpegError> retval(0, JpegError::nil());
+        bool bytes_available = mOffset[stream_id] != mBuffer[stream_id].size();
+        if (bytes_available || (retval.second = fillBufferUntil(stream_id)) == JpegError::nil()) {
+            retval.first = std::min((uint32_t)mBuffer[stream_id].size() - mOffset[stream_id], size);
+            std::memcpy(data, &mBuffer[stream_id][mOffset[stream_id]], retval.first);
+            mOffset[stream_id] += retval.first;
+        }
+        return retval;
+    }
+    size_t getOverhead() const { return mOverhead; }
+    ~MuxReader() {}
+};
+
+class SIRIKATA_EXPORT MuxWriter {
+    typedef Sirikata::DecoderWriter Writer;
+    Writer* mWriter;
+    size_t mOverhead;
+
+   public:
+    enum { MAX_STREAM_ID = MuxReader::MAX_STREAM_ID };
+    enum { MIN_OFFSET = 3 };
+    enum { MAX_BUFFER_LAG = 65537 };
+    std::vector<uint8_t, JpegAllocator<uint8_t> > mBuffer[MAX_STREAM_ID];
+    uint32_t mOffset[MAX_STREAM_ID];
+    uint32_t mFlushed[MAX_STREAM_ID];
+    uint32_t mTotalWritten;
+    uint32_t mLowWaterMark[MAX_STREAM_ID];
+    MuxWriter(Writer* writer, const JpegAllocator<uint8_t>& alloc) : mWriter(writer) {
+        mOverhead = 0;
+        for (uint8_t i = 0; i < MAX_STREAM_ID; ++i) { // assign a better allocator
+            mBuffer[i] = std::vector<uint8_t, JpegAllocator<uint8_t> >(alloc);
+            mOffset[i] = 0;
+            mFlushed[i] = 0;
+            mLowWaterMark[i] = 0;
+        }
+        mTotalWritten = 0;
+    }
+    uint32_t highWaterMark(uint32_t flushed) {
+        if (flushed & 0xffffc000) {
+            return 65536;
+        }
+        if (flushed & 0xfffff000) {
+            return 16384;
+        }
+        return 4096;
+    }
+
+    JpegError flushFull(uint8_t stream_id, uint32_t toBeFlushed) {
+        if (toBeFlushed == 0) {
+            return JpegError::nil();
+        }
+        assert(toBeFlushed + mOffset[stream_id] == mBuffer[stream_id].size());
+        std::pair<uint32_t, JpegError> retval(0, JpegError::nil());
+        do {
+            uint32_t offset = mOffset[stream_id];
+            assert(offset >= MIN_OFFSET);
+            uint32_t toWrite = std::min(toBeFlushed, (uint32_t)65536U);
+            mBuffer[stream_id][offset - MIN_OFFSET] = stream_id;
+            mBuffer[stream_id][offset - MIN_OFFSET + 1] = ((toWrite - 1) & 0xff);
+            mBuffer[stream_id][offset - MIN_OFFSET + 2] = (((toWrite - 1) >> 8) & 0xff);
+            mOverhead += 3;
+            retval = mWriter->Write(&mBuffer[stream_id][offset - MIN_OFFSET], toWrite + MIN_OFFSET);
+            assert((retval.first == toWrite + MIN_OFFSET || retval.second != JpegError::nil()) &&
+                   "Writers must write full");
+            if (retval.second == JpegError::nil()) {
+                mTotalWritten += toWrite;
+                mFlushed[stream_id] += toWrite;
+                mOffset[stream_id] += toWrite;
+                toBeFlushed -= toWrite;
+            } else {
+                break;
+            }
+        } while (toBeFlushed > 0);
+        mOffset[stream_id] = MIN_OFFSET;
+        mBuffer[stream_id].resize(MIN_OFFSET);
+        mLowWaterMark[stream_id] = mTotalWritten;
+        return retval.second;
+    }
+
+    JpegError flushPartial(uint8_t stream_id, uint32_t toBeFlushed) {
+        uint8_t code = stream_id;
+        uint32_t len = 0;
+        if (toBeFlushed < 4096) {
+            assert(false && "We shouldn't reach this");
+            return flushFull(stream_id, toBeFlushed);
+        }
+
+        if (toBeFlushed < 16384) {
+            if (toBeFlushed > 8192) {
+                return flushFull(stream_id, toBeFlushed);
+            }
+            len = 4096;
+            code |= (1 << 4);
+        } else if (toBeFlushed < 65536) {
+            if (toBeFlushed > 32768) {
+                return flushFull(stream_id, toBeFlushed);
+            }
+            len = 16384;
+            code |= (2 << 4);
+        } else {
+            if (toBeFlushed > 131072) {
+                return flushFull(stream_id, toBeFlushed);
+            }
+            len = 65536;
+            code |= (3 << 4);
+        }
+        std::pair<uint32_t, JpegError> retval(0, JpegError::nil());
+        for (uint32_t toWrite = 0; toWrite + len <= toBeFlushed; toWrite += len) {
+            uint32_t offset = mOffset[stream_id];
+            if (offset == mBuffer[stream_id].size()) continue;
+            assert(offset >= MIN_OFFSET);
+            mBuffer[stream_id][offset - 1] = code;
+            mOverhead += 1;
+            retval = mWriter->Write(&mBuffer[stream_id][offset - 1], len + 1);
+            if (retval.first != len + 1) {
+                return retval.second;
+            }
+            mTotalWritten += len;
+            mFlushed[stream_id] += len;
+            mOffset[stream_id] += len;
+            if (mOffset[stream_id] > 65539) {
+                for (std::vector<uint8_t, JpegAllocator<uint8_t> >::iterator
+                         src = mBuffer[stream_id].begin() + mOffset[stream_id],
+                         dst = mBuffer[stream_id].begin() + MIN_OFFSET, ed = mBuffer[stream_id].end();
+                     src != ed; ++src, ++dst) {
+                    *dst = *src;
+                }
+                mBuffer[stream_id].resize(MIN_OFFSET + mBuffer[stream_id].size() - mOffset[stream_id]);
+                mOffset[stream_id] = MIN_OFFSET;
+            }
+        }
+        uint32_t delta = mBuffer[stream_id].size() - mOffset[stream_id];
+        if (delta > mTotalWritten) {
+            mLowWaterMark[stream_id] = 0;
+        } else {
+            // we're already delta behind the ground truth
+            mLowWaterMark[stream_id] = mTotalWritten - delta;
+        }
+        return retval.second;
+    }
+    JpegError flush(uint8_t stream_id) {
+        JpegError retval = JpegError::nil();
+        for (uint8_t i = 0; i < MAX_STREAM_ID; ++i) {
+            uint32_t toBeFlushed = mBuffer[i].size() - mOffset[i];
+            if (i == stream_id || !toBeFlushed) {
+                continue;
+            }
+            bool isUrgent = mTotalWritten - mLowWaterMark[i] > MAX_BUFFER_LAG;
+            if (toBeFlushed < 4096) {
+                if (isUrgent) {
+                    // we need to flush what we have
+                    retval = flushFull(i, toBeFlushed);
+                    assert(mTotalWritten == mLowWaterMark[i]);
+                }
+            } else {
+                if (isUrgent && toBeFlushed < 16384) {
+                    retval = flushFull(i, toBeFlushed);
+                } else {
+                    retval = flushPartial(i, toBeFlushed);
+                }
+            }
+        }
+        uint32_t toBeFlushed = mBuffer[stream_id].size() - mOffset[stream_id];
+        retval = flushPartial(stream_id, toBeFlushed);
+        return retval;
+    }
+    std::pair<uint32, JpegError> Write(uint8_t stream_id, const uint8* data, unsigned int size) {
+        std::pair<uint32, JpegError> retval(size, JpegError::nil());
+        size_t bufferSize = mBuffer[stream_id].size();
+        if (bufferSize == 0) {
+            mBuffer[stream_id].reserve(16387);
+            mBuffer[stream_id].resize(MIN_OFFSET);
+            mOffset[stream_id] = MIN_OFFSET;
+            bufferSize = MIN_OFFSET;
+        }
+        mBuffer[stream_id].insert(mBuffer[stream_id].end(), data, data + size);
+        bufferSize += size;
+        uint32_t hwm = highWaterMark(mFlushed[stream_id]);
+        if (bufferSize >= mOffset[stream_id] + hwm) {
+            retval.second = flush(stream_id);
+        }
+        return retval;
+    }
+    void Close() {
+        for (uint8_t i = 0; i < MAX_STREAM_ID; ++i) {
+            if (mOffset[i] != mBuffer[i].size()) {
+                assert(mBuffer[i].size() - mOffset[i] < 65536);
+                flushFull(i, mBuffer[i].size() - mOffset[i]);
+            }
+        }
+    }
+    size_t getOverhead() const { return mOverhead; }
+    ~MuxWriter() {}
+};
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/io/Reader.hh b/codec/L2/demos/leptonEnc/host/io/Reader.hh
new file mode 100644
index 0000000000..d501ca0748
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/Reader.hh
@@ -0,0 +1,49 @@
+/*  Sirikata Jpeg Reader -- Texture Transfer management system
+ *  main.cpp
+ *
+ *  Copyright (c) 2015, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _SIRIKATA_READER_HPP_
+#define _SIRIKATA_READER_HPP_
+
+#include "Allocator.hh"
+#include "Error.hh"
+namespace Sirikata {
+class SIRIKATA_EXPORT DecoderReader {
+   public:
+    virtual std::pair<uint32, JpegError> Read(uint8* data, unsigned int size) = 0;
+    virtual ~DecoderReader() {}
+};
+class SIRIKATA_EXPORT DecoderWriter {
+   public:
+    virtual std::pair<uint32, JpegError> Write(const uint8* data, unsigned int size) = 0;
+    virtual void Close() = 0;
+    virtual ~DecoderWriter() {}
+};
+
+SIRIKATA_FUNCTION_EXPORT JpegError Copy(DecoderReader& r, DecoderWriter& w, const JpegAllocator<uint8>& alloc);
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/io/Seccomp.cc b/codec/L2/demos/leptonEnc/host/io/Seccomp.cc
new file mode 100644
index 0000000000..90812dac7e
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/Seccomp.cc
@@ -0,0 +1,108 @@
+#include <stdio.h>
+#ifdef __linux
+#include <sys/wait.h>
+//#include <linux/seccomp.h>
+
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include "seccomp-bpf.hh"
+#include <linux/audit.h>
+
+#if defined(__i386__)
+#define ARCH_NR AUDIT_ARCH_I386
+#elif defined(__x86_64__)
+#define ARCH_NR AUDIT_ARCH_X86_64
+#elif defined(__arm__)
+/*
+ * <linux/audit.h> includes <linux/elf-em.h>, which does not define EM_ARM.
+ * <linux/elf.h> only includes <asm/elf.h> if we're in the kernel.
+ */
+#ifndef EM_ARM
+#define EM_ARM 40
+#endif
+#define ARCH_NR AUDIT_ARCH_ARM
+#elif defined(__hppa__)
+#define ARCH_NR AUDIT_ARCH_PARISC
+#elif defined(__ia64__)
+#define ARCH_NR AUDIT_ARCH_IA64
+#elif defined(__mips__)
+#if defined(__mips64)
+#if defined(__MIPSEB__)
+#define ARCH_NR AUDIT_ARCH_MIPS64
+#else
+#define ARCH_NR AUDIT_ARCH_MIPSEL64
+#endif
+#else
+#if defined(__MIPSEB__)
+#define ARCH_NR AUDIT_ARCH_MIPS
+#else
+#define ARCH_NR AUDIT_ARCH_MIPSEL
+#endif
+#endif
+#elif defined(__powerpc64__)
+#define ARCH_NR AUDIT_ARCH_PPC64
+#elif defined(__powerpc__)
+#define ARCH_NR AUDIT_ARCH_PPC
+#elif defined(__s390x__)
+#define ARCH_NR AUDIT_ARCH_S390X
+#elif defined(__s390__)
+#define ARCH_NR AUDIT_ARCH_S390
+#elif defined(__sparc__)
+#if defined(__arch64__)
+#define AUDIT_ARCH_SPARC64
+#else
+#define AUDIT_ARCH_SPARC
+#endif
+#else
+#error "AUDIT_ARCH value unavailable"
+#endif
+
+#endif
+namespace Sirikata {
+bool installStrictSyscallFilter(bool verbose) {
+#ifdef __linux
+    struct sock_filter filter[] = {
+        /* Validate architecture. */
+        VALIDATE_ARCHITECTURE,
+        /* Grab the system call number. */
+        EXAMINE_SYSCALL,
+        /* List allowed syscalls. */
+        ALLOW_SYSCALL(rt_sigreturn),
+#ifdef __NR_sigreturn
+        ALLOW_SYSCALL(sigreturn),
+#endif
+        ALLOW_SYSCALL(exit), ALLOW_SYSCALL(read), ALLOW_SYSCALL(write), KILL_PROCESS,
+    };
+    struct sock_fprog prog;
+    prog.len = (unsigned short)(sizeof(filter) / sizeof(filter[0]));
+    prog.filter = filter;
+    /* if (prctl(PR_SET_SECCOMP,SECCOMP_MODE_STRICT)) {
+         if (verbose) {
+             perror("prctl(SECCOMP)");
+         }
+         if (errno == EINVAL && verbose) {
+             fprintf(stderr, "SECCOMP_MODE_STRICT is not available.\n%s",
+                 "Trying to set a filter to emulate strict mode\n");
+         }
+         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+             if (verbose) {
+                 perror("prctl(NO_NEW_PRIVS)");
+             }
+             exit(1);
+         }
+         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+             if (verbose) {
+                 perror("prctl(SECCOMP)");
+             }
+             exit(1);
+         }
+     }*/
+    return true;
+#else
+    if (verbose) {
+        fprintf(stderr, "SECCOMP not supported on this OS (linux only)\n");
+    }
+    return false;
+#endif
+}
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/Seccomp.hh b/codec/L2/demos/leptonEnc/host/io/Seccomp.hh
new file mode 100644
index 0000000000..65f1bfa939
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/Seccomp.hh
@@ -0,0 +1,3 @@
+namespace Sirikata {
+bool installStrictSyscallFilter(bool verbose);
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/Zlib0.cc b/codec/L2/demos/leptonEnc/host/io/Zlib0.cc
new file mode 100644
index 0000000000..8a14533ca1
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/Zlib0.cc
@@ -0,0 +1,257 @@
+/*  Sirikata Jpeg Texture Transfer -- Zlib implementation
+ *  Zlib0.cpp
+ *
+ *  Copyright (c) 2015, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "../../vp8/util/memory.hh"
+#include <assert.h>
+#include <cstring>
+
+#include "Zlib0.hh"
+namespace Sirikata {
+uint32_t adler32(uint32_t adler, const uint8_t* buf, uint32_t len);
+
+Zlib0Writer::Zlib0Writer(DecoderWriter* stream, int level) {
+    mBase = stream;
+    mBilledBytesLeft = 0;
+    mWritten = 0;
+    mClosed = false;
+    mAdler32 = adler32(0, NULL, 0);
+    assert(level == 0 && "Only support stored/raw/literal zlib");
+    mFileSize = 0;
+}
+
+std::pair<uint32, JpegError> Zlib0Writer::Write(const uint8* data, unsigned int size) {
+    always_assert(mWritten + size <= mFileSize);
+    mAdler32 = adler32(mAdler32, data, size);
+    if (mClosed || mWritten == mFileSize) {
+        return std::pair<uint32, JpegError>(0, JpegError::errEOF());
+    }
+    if (mWritten == 0) {
+        std::pair<uint32, JpegError> retval = writeHeader();
+        if (retval.second != JpegError::nil()) {
+            retval.first = 0;
+            return retval;
+        }
+    }
+    std::pair<uint32, JpegError> retval(0, JpegError::nil());
+    if (mBilledBytesLeft) { // we've already said we'll write these bytes...lets actually write them
+        unsigned int toWrite = std::min(size, (unsigned int)mBilledBytesLeft);
+        retval = mBase->Write(data, toWrite);
+        if (retval.second != JpegError::nil()) {
+            mWritten += retval.first;
+            return retval;
+        }
+        mBilledBytesLeft -= toWrite;
+        mWritten += toWrite;
+        size -= toWrite;
+        data += toWrite;
+    }
+    while (size) {
+        uint8_t buffer[(1 << 16) + 1 + 4];
+        const uint16_t max_size = 65535;
+        if (mWritten + max_size >= mFileSize) {
+            buffer[0] = 0x1; // last block
+            mBilledBytesLeft = mFileSize - mWritten;
+        } else {
+            buffer[0] = 0x0;
+            mBilledBytesLeft = max_size;
+        }
+        buffer[1] = mBilledBytesLeft & 0xff;
+        buffer[2] = (mBilledBytesLeft >> 8) & 0xff;
+        buffer[3] = (~buffer[1]) & 0xff;
+        buffer[4] = (~buffer[2]) & 0xff;
+        uint32_t toSend = 5;
+        uint32_t toWrite = std::min((unsigned int)mBilledBytesLeft, size);
+        std::memcpy(buffer + toSend, data, toWrite);
+        toSend += toWrite;
+        std::pair<uint32, JpegError> retval2 = mBase->Write(buffer, toSend);
+        if (retval2.second != JpegError::nil()) {
+            if (retval2.first > toSend - toWrite) {
+                retval.first += retval2.first - (toSend - toWrite);
+                mWritten += retval2.first - (toSend - toWrite);
+            }
+            retval.second = retval2.second;
+            return retval;
+        }
+        mWritten += toWrite;
+        mBilledBytesLeft -= toWrite;
+        size -= toWrite;
+        data += toWrite;
+        retval.first += toWrite;
+    }
+    return retval;
+}
+
+Zlib0Writer::~Zlib0Writer() {
+    if (!mClosed) {
+        Close();
+    }
+}
+const unsigned int desired_checksum = 31;
+static const uint8_t zlibHeader[2] = {0x78, (desired_checksum - (0x78 << 8) % desired_checksum)};
+std::pair<uint32_t, JpegError> Zlib0Writer::writeHeader() {
+    return mBase->Write(zlibHeader, sizeof(zlibHeader));
+}
+/// writes the adler32 sum
+void Zlib0Writer::Close() {
+    always_assert(mWritten == mFileSize && "Must have written as much as promised");
+    uint8_t adler[4] = {static_cast<uint8_t>((mAdler32 >> 24) & 0xff), static_cast<uint8_t>((mAdler32 >> 16) & 0xff),
+                        static_cast<uint8_t>((mAdler32 >> 8) & 0xff), static_cast<uint8_t>(mAdler32 & 0xff)};
+    std::pair<uint32, JpegError> retval = mBase->Write(adler, 4);
+    if (retval.second != JpegError::nil()) {
+        return;
+    }
+    mClosed = true;
+    mBase->Close();
+}
+size_t Zlib0Writer::getCompressedSize(size_t originalSize) {
+    size_t fullSize = sizeof(zlibHeader);
+    size_t numPackets = originalSize /= 65535 + (originalSize % 65535 ? 1 : 0);
+    fullSize += numPackets * 5;
+    fullSize += 4; // adler32
+    return fullSize;
+}
+
+#define BASE 65521UL /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf, i)        \
+    {                      \
+        adler += (buf)[i]; \
+        sum2 += adler;     \
+    }
+#define DO2(buf, i) \
+    DO1(buf, i);    \
+    DO1(buf, i + 1);
+#define DO4(buf, i) \
+    DO2(buf, i);    \
+    DO2(buf, i + 2);
+#define DO8(buf, i) \
+    DO4(buf, i);    \
+    DO4(buf, i + 4);
+#define DO16(buf) \
+    DO8(buf, 0);  \
+    DO8(buf, 8);
+
+/* use NO_DIVIDE if your processor does not do division in hardware */
+#ifdef NO_DIVIDE
+#define MOD(a)                                    \
+    do {                                          \
+        if (a >= (BASE << 16)) a -= (BASE << 16); \
+        if (a >= (BASE << 15)) a -= (BASE << 15); \
+        if (a >= (BASE << 14)) a -= (BASE << 14); \
+        if (a >= (BASE << 13)) a -= (BASE << 13); \
+        if (a >= (BASE << 12)) a -= (BASE << 12); \
+        if (a >= (BASE << 11)) a -= (BASE << 11); \
+        if (a >= (BASE << 10)) a -= (BASE << 10); \
+        if (a >= (BASE << 9)) a -= (BASE << 9);   \
+        if (a >= (BASE << 8)) a -= (BASE << 8);   \
+        if (a >= (BASE << 7)) a -= (BASE << 7);   \
+        if (a >= (BASE << 6)) a -= (BASE << 6);   \
+        if (a >= (BASE << 5)) a -= (BASE << 5);   \
+        if (a >= (BASE << 4)) a -= (BASE << 4);   \
+        if (a >= (BASE << 3)) a -= (BASE << 3);   \
+        if (a >= (BASE << 2)) a -= (BASE << 2);   \
+        if (a >= (BASE << 1)) a -= (BASE << 1);   \
+        if (a >= BASE) a -= BASE;                 \
+    } while (0)
+#define MOD4(a)                                 \
+    do {                                        \
+        if (a >= (BASE << 4)) a -= (BASE << 4); \
+        if (a >= (BASE << 3)) a -= (BASE << 3); \
+        if (a >= (BASE << 2)) a -= (BASE << 2); \
+        if (a >= (BASE << 1)) a -= (BASE << 1); \
+        if (a >= BASE) a -= BASE;               \
+    } while (0)
+#else
+#define MOD(a) a %= BASE
+#define MOD4(a) a %= BASE
+#endif
+
+uint32_t adler32(uint32_t adler, const uint8_t* buf, uint32_t len) {
+    unsigned long sum2;
+    unsigned n;
+
+    /* split Adler-32 into component sums */
+    sum2 = (adler >> 16) & 0xffff;
+    adler &= 0xffff;
+
+    /* in case user likes doing a byte at a time, keep it fast */
+    if (len == 1) {
+        adler += buf[0];
+        if (adler >= BASE) adler -= BASE;
+        sum2 += adler;
+        if (sum2 >= BASE) sum2 -= BASE;
+        return adler | (sum2 << 16);
+    }
+
+    /* initial Adler-32 value (deferred check for len == 1 speed) */
+    if (buf == NULL) return 1L;
+
+    /* in case short lengths are provided, keep it somewhat fast */
+    if (len < 16) {
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        if (adler >= BASE) adler -= BASE;
+        MOD4(sum2); /* only added so many BASE's */
+        return adler | (sum2 << 16);
+    }
+
+    /* do length NMAX blocks -- requires just one modulo operation */
+    while (len >= NMAX) {
+        len -= NMAX;
+        n = NMAX / 16; /* NMAX is divisible by 16 */
+        do {
+            DO16(buf); /* 16 sums unrolled */
+            buf += 16;
+        } while (--n);
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* do remaining bytes (less than NMAX, still just one modulo) */
+    if (len) { /* avoid modulos if none remaining */
+        while (len >= 16) {
+            len -= 16;
+            DO16(buf);
+            buf += 16;
+        }
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* return recombined sums */
+    return adler | (sum2 << 16);
+}
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/Zlib0.hh b/codec/L2/demos/leptonEnc/host/io/Zlib0.hh
new file mode 100644
index 0000000000..89c191d883
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/Zlib0.hh
@@ -0,0 +1,54 @@
+/*  Sirikata Jpeg Texture Transfer -- Zlib implementation
+ *  Zlib0.hpp
+ *
+ *  Copyright (c) 2015, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "Reader.hh"
+namespace Sirikata {
+/**
+ * Writes a zlib compression stream given an input
+ * Currently only supports nop mode
+ */
+class SIRIKATA_EXPORT Zlib0Writer : public DecoderWriter {
+    DecoderWriter* mBase;
+    // currently the system only works for a preconceived filesize
+    size_t mFileSize;
+    size_t mWritten;
+    uint32_t mAdler32; // adler32 sum
+    bool mClosed;
+    uint16_t mBilledBytesLeft; // how many bytes are left in this block
+    std::pair<uint32, JpegError> writeHeader();
+
+   public:
+    Zlib0Writer(DecoderWriter* stream, int level);
+    void setFullFileSize(size_t size) { mFileSize = size; }
+    virtual std::pair<uint32, JpegError> Write(const uint8* data, unsigned int size);
+    virtual ~Zlib0Writer();
+    /// writes the adler32 sum
+    virtual void Close();
+    static size_t getCompressedSize(size_t originalSize);
+};
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/ZlibCompression.cc b/codec/L2/demos/leptonEnc/host/io/ZlibCompression.cc
new file mode 100644
index 0000000000..ad868ce5c3
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/ZlibCompression.cc
@@ -0,0 +1,286 @@
+/*  Sirikata Jpeg Texture Transfer -- Texture Transfer management system
+ *  ZlibCompression.cpp
+ *
+ *  Copyright (c) 2015, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ZlibCompression.hh"
+
+namespace Sirikata {
+JpegAllocator<uint8_t> alloc;
+
+void* custom_zallocator(void* opaque2, unsigned int nmemb, unsigned int size) {
+    const JpegAllocator<uint8_t>* sub_opaque = (const JpegAllocator<uint8_t>*)opaque2;
+    return sub_opaque->get_custom_allocate()(sub_opaque->get_custom_state(), nmemb, size);
+}
+void custom_zdeallocator(void* opaque2, void* data) {
+    const JpegAllocator<uint8_t>* sub_opaque = (const JpegAllocator<uint8_t>*)opaque2;
+    sub_opaque->get_custom_deallocate()(sub_opaque->get_custom_state(), data);
+}
+std::vector<uint8_t, JpegAllocator<uint8_t> > ZlibDecoderCompressionWriter::Compress(
+    const uint8_t* buffer, size_t size, const JpegAllocator<uint8_t>& alloc) {
+    z_stream strm;
+    memset(&strm, 0, sizeof(z_stream));
+    JpegAllocator<uint8_t> local_alloc;
+    strm.zalloc = &custom_zallocator;
+    strm.zfree = &custom_zdeallocator;
+    strm.opaque = &local_alloc;
+    strm.next_in = (Bytef*)buffer;
+    int ret = deflateInit(&strm, 9);
+    if (ret != Z_OK) {
+        assert(false && "LZMA Incorrectly installed");
+        exit(1); // lzma not installed properly
+    }
+    strm.avail_in = size;
+    std::vector<uint8_t, JpegAllocator<uint8_t> > retval(alloc);
+    retval.resize(compressBound(size));
+    strm.next_out = retval.data();
+    strm.avail_out = retval.size();
+    ret = deflate(&strm, Z_NO_FLUSH);
+    while (true) {
+        if (ret == Z_STREAM_END) {
+            retval.resize(retval.size() - strm.avail_out);
+            deflateEnd(&strm);
+            break;
+        }
+        ret = deflate(&strm, Z_FINISH);
+    }
+    deflateEnd(&strm);
+    return retval;
+}
+std::pair<std::vector<uint8_t, JpegAllocator<uint8_t> >, JpegError> ZlibDecoderDecompressionReader::Decompress(
+    const uint8_t* buffer, size_t size, const JpegAllocator<uint8_t>& alloc) {
+    z_stream strm;
+    memset(&strm, 0, sizeof(z_stream));
+    JpegAllocator<uint8_t> local_alloc;
+    strm.zalloc = &custom_zallocator;
+    strm.zfree = &custom_zdeallocator;
+    strm.opaque = &local_alloc;
+    std::pair<std::vector<uint8_t, JpegAllocator<uint8_t> >, JpegError> retval(
+        std::vector<uint8_t, JpegAllocator<uint8_t> >(alloc), JpegError::nil());
+    retval.first.resize(size * 2);
+    size_t retval_size = 0;
+    int ret = inflateInit(&strm);
+    if (ret != Z_OK) {
+        retval.second = JpegError::errShortHuffmanData();
+        retval.first.clear();
+    } else {
+        size_t avail_bytes = retval.first.size();
+        strm.next_in = (Bytef*)buffer;
+        strm.avail_in = size;
+        strm.next_out = retval.first.data();
+        strm.avail_out = avail_bytes;
+        while (true) {
+            ret = inflate(&strm, strm.avail_in == 0 ? Z_FINISH : Z_NO_FLUSH);
+            if (ret == Z_STREAM_END) {
+                retval_size += avail_bytes - strm.avail_out;
+                if (strm.avail_in != 0) {
+                    retval.second = JpegError::errShortHuffmanData();
+                    break;
+                }
+                retval.first.resize(retval_size);
+                inflateEnd(&strm);
+                break;
+            }
+            if (ret != Z_OK) {
+                retval.second = JpegError::errShortHuffmanData();
+                break;
+            }
+            if (strm.avail_out == 0) {
+                retval_size += avail_bytes - strm.avail_out;
+                retval.first.resize(retval.first.size() * 2);
+                avail_bytes = retval.first.size() - retval_size;
+
+                strm.next_out = retval.first.data() + retval_size;
+                strm.avail_out = avail_bytes;
+            }
+        }
+    }
+    return retval;
+}
+ZlibDecoderDecompressionReader::ZlibDecoderDecompressionReader(DecoderReader* r,
+                                                               bool concatenated,
+                                                               const JpegAllocator<uint8_t>& alloc)
+    : mAlloc(alloc) {
+    mClosed = false;
+    mStreamEndEncountered = false;
+    mBase = r;
+    z_stream tmp;
+    memset(&tmp, 0, sizeof(z_stream));
+    mStream = tmp;
+    mStream.zalloc = &custom_zallocator;
+    mStream.zfree = &custom_zdeallocator;
+    mStream.opaque = &mAlloc;
+
+    int ret = inflateInit(&mStream);
+    mStream.next_in = NULL;
+    mStream.avail_in = 0;
+    if (ret != Z_OK) {
+        switch (ret) {
+            case Z_MEM_ERROR:
+                assert(ret == Z_OK && "the stream decoder had insufficient memory");
+                break;
+            default:
+                assert(ret == Z_OK && "the stream decoder was not initialized properly");
+        }
+    }
+}
+
+std::pair<uint32, JpegError> ZlibDecoderDecompressionReader::Read(uint8* data, unsigned int size) {
+    mStream.next_out = data;
+    mStream.avail_out = size;
+    while (true) {
+        JpegError err = JpegError::nil();
+        int action = Z_NO_FLUSH;
+        if (mStream.avail_in == 0) {
+            mStream.next_in = mReadBuffer;
+            std::pair<uint32, JpegError> bytesRead = mBase->Read(mReadBuffer, sizeof(mReadBuffer));
+            mStream.avail_in = bytesRead.first;
+            err = bytesRead.second;
+            if (bytesRead.first == 0) {
+                action = Z_FINISH;
+            }
+        }
+        int ret = inflate(&mStream, action);
+        if (mStream.avail_out == 0 || ret == Z_STREAM_END) {
+            if (ret == Z_STREAM_END) {
+                mStreamEndEncountered = true;
+            }
+            unsigned int write_size = size - mStream.avail_out;
+            return std::pair<uint32, JpegError>(write_size, JpegError::nil());
+            /*                                                (ret == LZMA_STREAM_END
+                                                             || (ret == LZMA_OK &&write_size > 0))
+                                                             ? JpegError::nil() : err);*/
+        }
+        if (ret != Z_OK) {
+            switch (ret) {
+                case Z_STREAM_ERROR:
+                    return std::pair<uint32, JpegError>(0, MakeJpegError("Invalid XZ magic number"));
+                case Z_DATA_ERROR:
+                case Z_BUF_ERROR:
+                    return std::pair<uint32, JpegError>(size - mStream.avail_out, MakeJpegError("Corrupt xz file"));
+                case Z_MEM_ERROR:
+                    assert(false && "Memory allocation failed");
+                    break;
+                default:
+                    assert(false && "Unknown LZMA error code");
+            }
+        }
+    }
+    return std::pair<uint32, JpegError>(0, MakeJpegError("Unreachable"));
+}
+void ZlibDecoderDecompressionReader::Close() {
+    if (!mClosed) {
+        inflateEnd(&mStream);
+    }
+    mClosed = true;
+}
+ZlibDecoderDecompressionReader::~ZlibDecoderDecompressionReader() {
+    Close();
+}
+
+ZlibDecoderCompressionWriter::ZlibDecoderCompressionWriter(DecoderWriter* w,
+                                                           uint8_t compression_level,
+                                                           const JpegAllocator<uint8_t>& alloc)
+    : mAlloc(alloc) {
+    mClosed = false;
+    mBase = w;
+    z_stream tmp;
+    memset(&tmp, 0, sizeof(z_stream));
+    mStream = tmp;
+    mStream.zalloc = &custom_zallocator;
+    mStream.zfree = &custom_zdeallocator;
+    mStream.opaque = &mAlloc;
+    int ret = deflateInit(&mStream, compression_level);
+    mStream.next_in = NULL;
+    mStream.avail_in = 0;
+    if (ret != Z_OK) {
+        switch (ret) {
+            case Z_MEM_ERROR:
+                assert(ret == Z_OK && "the stream decoder had insufficient memory");
+                break;
+            case Z_STREAM_ERROR:
+                assert(ret == Z_OK && "Specified integrity check but not supported");
+            default:
+                assert(ret == Z_OK && "the stream decoder was not initialized properly");
+        }
+    }
+}
+
+void ZlibDecoderCompressionWriter::Close() {
+    assert(!mClosed);
+    mClosed = true;
+    while (true) {
+        int ret = deflate(&mStream, Z_FINISH);
+        if (mStream.avail_out == 0 || ret == Z_STREAM_END) {
+            size_t write_size = sizeof(mWriteBuffer) - mStream.avail_out;
+            if (write_size > 0) {
+                std::pair<uint32, JpegError> r = mBase->Write(mWriteBuffer, write_size);
+                if (r.second != JpegError::nil()) {
+                    return;
+                }
+                mStream.avail_out = sizeof(mWriteBuffer);
+                mStream.next_out = mWriteBuffer;
+            }
+        }
+        if (ret == Z_STREAM_END) {
+            return;
+        }
+    }
+}
+
+std::pair<uint32, JpegError> ZlibDecoderCompressionWriter::Write(const uint8* data, unsigned int size) {
+    mStream.next_out = mWriteBuffer;
+    mStream.avail_out = sizeof(mWriteBuffer);
+    mStream.avail_in = size;
+    mStream.next_in = (Bytef*)data;
+    std::pair<uint32, JpegError> retval(0, JpegError::nil());
+    while (mStream.avail_in > 0) {
+        int ret = deflate(&mStream, Z_NO_FLUSH);
+        if (mStream.avail_in == 0 || mStream.avail_out == 0 || ret == Z_STREAM_END) {
+            size_t write_size = sizeof(mWriteBuffer) - mStream.avail_out;
+            if (write_size > 0) {
+                std::pair<uint32, JpegError> r = mBase->Write(mWriteBuffer, write_size);
+                mStream.avail_out = sizeof(mWriteBuffer);
+                mStream.next_out = mWriteBuffer;
+                retval.first += r.first;
+                if (r.second != JpegError::nil()) {
+                    retval.second = r.second;
+                    return retval;
+                }
+            }
+        }
+    }
+    return retval;
+}
+
+ZlibDecoderCompressionWriter::~ZlibDecoderCompressionWriter() {
+    if (!mClosed) {
+        Close();
+    }
+    assert(mClosed);
+}
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/ZlibCompression.hh b/codec/L2/demos/leptonEnc/host/io/ZlibCompression.hh
new file mode 100644
index 0000000000..954f1c3ec5
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/ZlibCompression.hh
@@ -0,0 +1,72 @@
+/*  Sirikata Jpeg Texture Transfer -- Texture Transfer management system
+ *  ZlibCompression.hpp
+ *
+ *  Copyright (c) 2015, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "Reader.hh"
+#ifdef USE_SYSTEM_DEPENDENCIES
+#include <zlib.h>
+#else
+#include "../../dependencies/zlib/zlib.h"
+#endif
+namespace Sirikata {
+class SIRIKATA_EXPORT ZlibDecoderDecompressionReader : public DecoderReader {
+   protected:
+    JpegAllocator<uint8_t> mAlloc;
+    unsigned char mReadBuffer[4096];
+    z_stream mStream;
+    DecoderReader* mBase;
+    bool mClosed;
+    bool mStreamEndEncountered;
+
+   public:
+    static std::pair<std::vector<uint8_t, JpegAllocator<uint8_t> >, JpegError> Decompress(
+        const uint8_t* buffer, size_t size, const JpegAllocator<uint8_t>& alloc);
+    ZlibDecoderDecompressionReader(DecoderReader* r, bool concatenated, const JpegAllocator<uint8_t>& alloc);
+    virtual std::pair<uint32, JpegError> Read(uint8* data, unsigned int size);
+    virtual ~ZlibDecoderDecompressionReader();
+    void Close();
+};
+
+class SIRIKATA_EXPORT ZlibDecoderCompressionWriter : public DecoderWriter {
+    JpegAllocator<uint8_t> mAlloc;
+    unsigned char mWriteBuffer[4096];
+    z_stream mStream;
+    DecoderWriter* mBase;
+    bool mClosed;
+
+   public:
+    static std::vector<uint8_t, JpegAllocator<uint8_t> > Compress(const uint8_t* buffer,
+                                                                  size_t size,
+                                                                  const JpegAllocator<uint8_t>& alloc);
+    // compresison level should be a value: 1 through 9
+    ZlibDecoderCompressionWriter(DecoderWriter* w, uint8_t compression_level, const JpegAllocator<uint8_t>& alloc);
+    virtual std::pair<uint32, JpegError> Write(const uint8* data, unsigned int size);
+    virtual ~ZlibDecoderCompressionWriter();
+    virtual void Close();
+};
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/ioutil.cc b/codec/L2/demos/leptonEnc/host/io/ioutil.cc
new file mode 100644
index 0000000000..89ce0e1779
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/ioutil.cc
@@ -0,0 +1,241 @@
+#include "../../vp8/util/memory.hh"
+#include <string.h>
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+#include <thread>
+#define S_IWUSR 0
+#define S_IRUSR 0
+#else
+#include <sys/select.h>
+#endif
+#include "Reader.hh"
+#include "ioutil.hh"
+#ifdef _WIN32
+#include <Windows.h>
+#include <tchar.h>
+#endif
+namespace IOUtil {
+/*
+FileReader * OpenFileOrPipe(const char * filename, int is_pipe, int max_file_size) {
+    int fp = 0;
+    if (!is_pipe) {
+        fp = open(filename, O_RDONLY);
+    }
+    if (fp >= 0) {
+        return new FileReader(fp, max_file_size);
+    }
+    return NULL;
+}
+*/ /*
+ FileWriter * OpenWriteFileOrPipe(const char * filename, int is_pipe) {
+     int fp = 1;
+     if (!is_pipe) {
+         fp = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR | S_IRUSR);
+     }
+     if (fp >= 0) {
+         return new FileWriter(fp, !g_use_seccomp);
+     }
+     return NULL;
+ }
+ */
+
+FileReader* BindFdToReader(int fd, unsigned int max_file_size, bool is_socket) {
+    if (fd >= 0) {
+        return new FileReader(fd, max_file_size, is_socket);
+    }
+    return NULL;
+}
+FileWriter* BindFdToWriter(int fd, bool is_socket) {
+    if (fd >= 0) {
+        return new FileWriter(fd, !g_use_seccomp, is_socket);
+    }
+    return NULL;
+}
+void send_all_and_close(int fd, const uint8_t* data, size_t data_size) {
+    while (data_size > 0) {
+        auto ret = write(fd, data, data_size);
+        if (ret == 0) {
+            break;
+        }
+        if (ret < 0 && errno == EINTR) {
+            continue;
+        }
+        if (ret < 0) {
+            auto local_errno = errno;
+            fprintf(stderr, "Send err %d\n", local_errno);
+            custom_exit(ExitCode::SHORT_READ);
+        }
+        data += ret;
+        data_size -= ret;
+    }
+    while (close(fd) == -1 && errno == EINTR) {
+    }
+}
+void discard_stderr(int fd) {
+    char buffer[4097];
+    buffer[sizeof(buffer) - 1] = '\0';
+    while (true) {
+        auto del = read(fd, buffer, sizeof(buffer) - 1);
+        if (del <= 0) {
+            if (del < 0 && errno == EINTR) {
+                continue;
+            }
+            break;
+        }
+        buffer[del] = '\0';
+        fprintf(stderr, "%s", buffer);
+    }
+}
+SubprocessConnection start_subprocess(int argc, const char** argv, bool pipe_stderr) {
+    SubprocessConnection retval;
+    memset(&retval, 0, sizeof(retval));
+#ifdef _WIN32
+    SECURITY_ATTRIBUTES saAttr;
+    saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
+    saAttr.bInheritHandle = TRUE;
+    saAttr.lpSecurityDescriptor = NULL;
+    HANDLE hChildStd_IN_Rd;
+    HANDLE hChildStd_IN_Wr;
+
+    HANDLE hChildStd_OUT_Rd;
+    HANDLE hChildStd_OUT_Wr;
+
+    HANDLE hChildStd_ERR_Rd;
+    HANDLE hChildStd_ERR_Wr;
+    bool simpler = true;
+    if (!CreatePipe(&hChildStd_OUT_Rd, &hChildStd_OUT_Wr, &saAttr, 0)) {
+        custom_exit(ExitCode::OS_ERROR);
+    }
+    if (!SetHandleInformation(hChildStd_OUT_Rd, HANDLE_FLAG_INHERIT, 0)) {
+        custom_exit(ExitCode::OS_ERROR);
+    }
+    if (pipe_stderr || !simpler) {
+        if (!CreatePipe(&hChildStd_ERR_Rd, &hChildStd_ERR_Wr, &saAttr, 0)) {
+            custom_exit(ExitCode::OS_ERROR);
+        }
+        if (!SetHandleInformation(hChildStd_ERR_Rd, HANDLE_FLAG_INHERIT, 0)) {
+            custom_exit(ExitCode::OS_ERROR);
+        }
+    }
+    if (!CreatePipe(&hChildStd_IN_Rd, &hChildStd_IN_Wr, &saAttr, 0)) {
+        custom_exit(ExitCode::OS_ERROR);
+    }
+    if (!SetHandleInformation(hChildStd_IN_Wr, HANDLE_FLAG_INHERIT, 0)) {
+        custom_exit(ExitCode::OS_ERROR);
+    }
+    PROCESS_INFORMATION piProcInfo;
+    STARTUPINFO siStartInfo;
+    memset(&siStartInfo, 0, sizeof(siStartInfo));
+    memset(&piProcInfo, 0, sizeof(piProcInfo));
+    siStartInfo.cb = sizeof(STARTUPINFO);
+
+    if (pipe_stderr || !simpler) {
+        siStartInfo.hStdError = hChildStd_ERR_Wr;
+    } else {
+        siStartInfo.hStdError = GetStdHandle(STD_ERROR_HANDLE);
+    }
+    siStartInfo.hStdOutput = hChildStd_OUT_Wr;
+    siStartInfo.hStdInput = hChildStd_IN_Rd;
+    siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
+    std::vector<char> command_line;
+    const char* exe_shorthand = "lepton.exe";
+    command_line.insert(command_line.end(), exe_shorthand, exe_shorthand + strlen(exe_shorthand));
+    for (int i = 1; i < argc; ++i) {
+        command_line.push_back(' ');
+        command_line.insert(command_line.end(), argv[i], argv[i] + strlen(argv[i]));
+    }
+    command_line.push_back('\0');
+    if (!CreateProcess(argv[0], &command_line[0],
+                       NULL, // process security attributes
+                       NULL, // primary thread security attributes,
+                       TRUE, // handles inherited,
+                       0,    // flags,
+                       NULL, // use parent environment,
+                       NULL, // use current dir,
+                       &siStartInfo, &piProcInfo)) {
+        fprintf(stderr, "Failed To start subprocess with command line ", command_line);
+        custom_exit(ExitCode::OS_ERROR);
+    }
+    CloseHandle(piProcInfo.hProcess);
+    CloseHandle(piProcInfo.hThread);
+
+    if (pipe_stderr || !simpler) {
+        CloseHandle(hChildStd_ERR_Wr);
+        while ((retval.pipe_stderr = _open_osfhandle((intptr_t)hChildStd_ERR_Rd, O_APPEND | O_RDONLY)) == -1 &&
+               errno == EINTR) {
+        }
+    } else {
+        retval.pipe_stderr = -1;
+    }
+    if (simpler == false && !pipe_stderr) {
+        std::thread discard_stderr(std::bind(&discard_stderr, retval.pipe_stderr));
+        discard_stderr.detach();
+        retval.pipe_stderr = -1;
+    }
+    CloseHandle(hChildStd_OUT_Wr);
+    while ((retval.pipe_stdout = _open_osfhandle((intptr_t)hChildStd_OUT_Rd, O_APPEND | O_RDONLY)) == -1 &&
+           errno == EINTR) {
+    }
+    CloseHandle(hChildStd_IN_Rd);
+    while ((retval.pipe_stdin = _open_osfhandle((intptr_t)hChildStd_IN_Wr, O_APPEND | O_WRONLY)) == -1 &&
+           errno == EINTR) {
+    }
+#else
+    int stdin_pipes[2] = {-1, -1};
+    int stdout_pipes[2] = {-1, -1};
+    int stderr_pipes[2] = {-1, -1};
+    while (pipe(stdin_pipes) < 0 && errno == EINTR) {
+    }
+    while (pipe(stdout_pipes) < 0 && errno == EINTR) {
+    }
+    if (pipe_stderr) {
+        while (pipe(stderr_pipes) < 0 && errno == EINTR) {
+        }
+    }
+    if ((retval.sub_pid = fork()) == 0) {
+        while (close(stdin_pipes[1]) == -1 && errno == EINTR) {
+        }
+        while (close(stdout_pipes[0]) == -1 && errno == EINTR) {
+        }
+        if (pipe_stderr) {
+            while (close(stderr_pipes[0]) == -1 && errno == EINTR) {
+            }
+        }
+        while (close(0) == -1 && errno == EINTR) {
+        }
+        while (dup2(stdin_pipes[0], 0) == -1 && errno == EINTR) {
+        }
+
+        while (close(1) == -1 && errno == EINTR) {
+        }
+        while (dup2(stdout_pipes[1], 1) == -1 && errno == EINTR) {
+        }
+        if (pipe_stderr) {
+            while (close(2) == -1 && errno == EINTR) {
+            }
+            while (dup2(stderr_pipes[1], 2) == -1 && errno == EINTR) {
+            }
+        }
+        std::vector<char*> args(argc + 1);
+        for (int i = 0; i < argc; ++i) {
+            args[i] = (char*)argv[i];
+        }
+        args[argc] = NULL;
+        execvp(args[0], &args[0]);
+    }
+    while (close(stdin_pipes[0]) == -1 && errno == EINTR) {
+    }
+    while (close(stdout_pipes[1]) == -1 && errno == EINTR) {
+    }
+    if (pipe_stderr) {
+        while (close(stderr_pipes[1]) == -1 && errno == EINTR) {
+        }
+    }
+    retval.pipe_stdin = stdin_pipes[1];
+    retval.pipe_stdout = stdout_pipes[0];
+    retval.pipe_stderr = stderr_pipes[0];
+#endif
+    return retval;
+}
+}
diff --git a/codec/L2/demos/leptonEnc/host/io/ioutil.hh b/codec/L2/demos/leptonEnc/host/io/ioutil.hh
new file mode 100644
index 0000000000..8302cb0d1c
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/ioutil.hh
@@ -0,0 +1,132 @@
+#ifndef _SIRIKIATA_IO_UTIL_HH_
+#define _SIRIKIATA_IO_UTIL_HH_
+#ifndef _WIN32
+#include <unistd.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+#include "../vp8/util/nd_array.hh"
+#include "MuxReader.hh"
+namespace Sirikata {
+class DecoderReader;
+class DecoderWriter;
+}
+namespace IOUtil {
+//#ifdef _WIN32
+//    typedef void* HANDLE_or_fd;
+//#else
+typedef int HANDLE_or_fd;
+//#endif
+
+inline Sirikata::uint32 ReadFull(Sirikata::DecoderReader* reader, void* vdata, Sirikata::uint32 size) {
+    using namespace Sirikata;
+    unsigned char* data = reinterpret_cast<unsigned char*>(vdata);
+    uint32 copied = 0;
+    while (copied < size) {
+        std::pair<Sirikata::uint32, Sirikata::JpegError> status = reader->Read(data + copied, size - copied);
+        copied += status.first;
+        if (status.second != JpegError::nil() || status.first == 0) {
+            return copied;
+        }
+    }
+    return copied;
+}
+
+class FileReader : public Sirikata::DecoderReader {
+    int fp;
+    uint32_t total_read;
+    uint32_t max_read;
+    bool is_fd_socket;
+
+   public:
+    FileReader(int ff, int max_read_allowed, bool is_socket) {
+        fp = ff;
+        this->is_fd_socket = is_socket;
+        total_read = 0;
+        max_read = max_read_allowed;
+    }
+    bool is_socket() const { return is_fd_socket; }
+    std::pair<Sirikata::uint32, Sirikata::JpegError> Read(Sirikata::uint8* data, unsigned int size) {
+        if (max_read && total_read + size > max_read) {
+            size = max_read - total_read;
+            if (size == 0) {
+                return std::pair<Sirikata::uint32, Sirikata::JpegError>(0, Sirikata::JpegError::errEOF());
+            }
+        }
+        using namespace Sirikata;
+        do {
+            signed long nread = read(fp, data, size);
+            if (nread <= 0) {
+                if (errno == EINTR) {
+                    continue;
+                }
+                return std::pair<Sirikata::uint32, JpegError>(0, MakeJpegError("Short read"));
+            }
+            total_read += nread;
+            return std::pair<Sirikata::uint32, JpegError>(nread, JpegError::nil());
+        } while (true); // while not EINTR
+    }
+    unsigned int bound() const { return max_read; }
+    size_t length() { return total_read; }
+    size_t getsize() { return total_read; }
+    int get_fd() const { return fp; }
+    void mark_some_bytes_already_read(uint32_t num_bytes) { total_read += num_bytes; }
+};
+class FileWriter : public Sirikata::DecoderWriter {
+    int fp;
+    int total_written;
+    bool close_stream;
+    bool is_fd_socket;
+
+   public:
+    FileWriter(int ff, bool do_close_stream, bool is_fd_socket) {
+        this->is_fd_socket = is_fd_socket;
+        fp = ff;
+        total_written = 0;
+        close_stream = do_close_stream;
+    }
+    bool is_socket() const { return is_fd_socket; }
+    void Close() {
+        if (close_stream) {
+            close(fp); // not always useful (eg during SECCOMP)
+        }
+        fp = -1;
+    }
+    std::pair<Sirikata::uint32, Sirikata::JpegError> Write(const Sirikata::uint8* data, unsigned int size) {
+        using namespace Sirikata;
+        size_t data_written = 0;
+        while (data_written < size) {
+            signed long nwritten = write(fp, data + data_written, size - data_written);
+            if (nwritten <= 0) {
+                if (errno == EINTR) {
+                    continue;
+                }
+                //	The size_t -> Sirikata::uint32 cast is safe because sizeof(size) is <= sizeof(Sirikata::uint32)
+                return std::pair<Sirikata::uint32, JpegError>(static_cast<Sirikata::uint32>(data_written),
+                                                              JpegError::errShortHuffmanData());
+            }
+            data_written += nwritten;
+        }
+        total_written += size;
+        return std::pair<Sirikata::uint32, JpegError>(size, JpegError::nil());
+    }
+    size_t getsize() { return total_written; }
+    int get_fd() const { return fp; }
+};
+
+// SIRIKATA_FUNCTION_EXPORT FileReader * OpenFileOrPipe(const char * filename, int is_pipe, int max_size_read);
+// SIRIKATA_FUNCTION_EXPORT FileWriter * OpenWriteFileOrPipe(const char * filename, int is_pipe);
+
+SIRIKATA_FUNCTION_EXPORT FileReader* BindFdToReader(int fd, uint32_t max_size_read, bool is_socket);
+SIRIKATA_FUNCTION_EXPORT FileWriter* BindFdToWriter(int fd, bool is_socket);
+
+struct SubprocessConnection {
+    HANDLE_or_fd pipe_stdin;
+    HANDLE_or_fd pipe_stdout;
+    HANDLE_or_fd pipe_stderr;
+    int sub_pid;
+};
+SubprocessConnection start_subprocess(int argc, const char** argv, bool pipe_stder);
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/io/seccomp-bpf.hh b/codec/L2/demos/leptonEnc/host/io/seccomp-bpf.hh
new file mode 100644
index 0000000000..a532f991ac
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/io/seccomp-bpf.hh
@@ -0,0 +1,77 @@
+/*
+ * seccomp example for x86 (32-bit and 64-bit) with BPF macros
+ *
+ * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ * Authors:
+ *  Will Drewry <wad@chromium.org>
+ *  Kees Cook <keescook@chromium.org>
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef _SECCOMP_BPF_H_
+#define _SECCOMP_BPF_H_
+
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/prctl.h>
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#endif
+
+#include <linux/unistd.h>
+#include <linux/audit.h>
+#include <linux/filter.h>
+#ifdef HAVE_LINUX_SECCOMP_H
+//# include <linux/seccomp.h>
+#endif
+#ifndef SECCOMP_MODE_FILTER
+#define SECCOMP_MODE_FILTER 2         /* uses user-supplied filter. */
+#define SECCOMP_RET_KILL 0x00000000U  /* kill the task immediately */
+#define SECCOMP_RET_TRAP 0x00030000U  /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+struct seccomp_data {
+    int nr;
+    __u32 arch;
+    __u64 instruction_pointer;
+    __u64 args[6];
+};
+#endif
+#ifndef SYS_SECCOMP
+#define SYS_SECCOMP 1
+#endif
+
+#define syscall_nr (offsetof(struct seccomp_data, nr))
+#define arch_nr (offsetof(struct seccomp_data, arch))
+
+#if defined(__i386__)
+#define REG_SYSCALLREG_EAX
+#define ARCH_NRAUDIT_ARCH_I386
+#elif defined(__x86_64__)
+#define REG_SYSCALLREG_RAX
+#define ARCH_NRAUDIT_ARCH_X86_64
+#else
+#warning "Platform does not support seccomp filter yet"
+#define REG_SYSCALL0
+#define ARCH_NR0
+#endif
+
+#define VALIDATE_ARCHITECTURE                   \
+    BPF_STMT(BPF_LD + BPF_W + BPF_ABS, arch_nr) \
+    , BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARCH_NR, 1, 0), BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL)
+
+#define EXAMINE_SYSCALL BPF_STMT(BPF_LD + BPF_W + BPF_ABS, syscall_nr)
+
+#define ALLOW_SYSCALL(name) \
+    BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_##name, 0, 1), BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW)
+
+#define KILL_PROCESS BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL)
+
+#endif /* _SECCOMP_BPF_H_ */
diff --git a/codec/L2/demos/leptonEnc/host/lepton/.gitignore b/codec/L2/demos/leptonEnc/host/lepton/.gitignore
new file mode 100644
index 0000000000..122c92fd53
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/.gitignore
@@ -0,0 +1,10 @@
+/lepton
+/print-model
+/test_[^hcdi]*
+/test_invariants
+/test_iphone
+/test_iphone_ujg
+/test_hq*
+/test-suite.log
+/test_custom_table.sh.log
+/test_custom_table.sh.trs
diff --git a/codec/L2/demos/leptonEnc/host/lepton/base_coders.hh b/codec/L2/demos/leptonEnc/host/lepton/base_coders.hh
new file mode 100644
index 0000000000..1e3db2fe06
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/base_coders.hh
@@ -0,0 +1,64 @@
+#ifndef _BASE_CODERS_
+#define _BASE_CODERS_
+#include "../vp8/util/nd_array.hh"
+#include "../vp8/util/generic_worker.hh"
+#include "../vp8/util/block_based_image.hh"
+#include "thread_handoff.hh"
+struct GenericWorker;
+enum CodingReturnValue {
+    CODING_ERROR,
+    CODING_DONE,
+    CODING_PARTIAL, // run it again
+};
+class UncompressedComponents;
+
+namespace Sirikata {
+class SwitchableXZBase;
+class DecoderCompressionWriter;
+class DecoderReader;
+class DecoderWriter;
+template <class T>
+class SwitchableDecompressionReader;
+template <class T>
+class SwitchableCompressionWriter;
+}
+namespace IOUtil {
+class FileWriter;
+}
+class BaseDecoder {
+   public:
+    virtual ~BaseDecoder() {}
+    virtual void initialize(Sirikata::DecoderReader* input,
+                            const std::vector<ThreadHandoff>& thread_transition_info) = 0;
+    virtual CodingReturnValue decode_chunk(UncompressedComponents* dst) = 0;
+    virtual void registerWorkers(GenericWorker* workers, unsigned int num_workers) = 0;
+    virtual GenericWorker* getWorker(unsigned int i) = 0;
+    virtual std::vector<ThreadHandoff> initialize_baseline_decoder(
+        const UncompressedComponents* const colldata,
+        Sirikata::Array1d<BlockBasedImagePerChannel<true>, MAX_NUM_THREADS>& framebuffer) = 0;
+    virtual void decode_row(int thread_state_id,
+                            BlockBasedImagePerChannel<true>& image_data, // FIXME: set image_data to true
+                            Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+                            int component,
+                            int curr_y) = 0;
+    virtual size_t get_model_memory_usage() const = 0;
+    virtual size_t get_model_worker_memory_usage() const = 0;
+    virtual void clear_thread_state(int thread_id,
+                                    int target_thread_state,
+                                    BlockBasedImagePerChannel<true>& framebuffer) = 0;
+};
+
+class BaseEncoder {
+   public:
+    virtual ~BaseEncoder() {}
+    virtual void registerWorkers(GenericWorker* workers, unsigned int num_workers) = 0;
+
+    virtual CodingReturnValue encode_chunk(const UncompressedComponents* input,
+                                           IOUtil::FileWriter*,
+                                           const ThreadHandoff* selected_splits,
+                                           unsigned int num_selected_splits) = 0;
+    virtual size_t get_decode_model_memory_usage() const = 0;
+    virtual size_t get_decode_model_worker_memory_usage() const = 0;
+};
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/bitops.cc b/codec/L2/demos/leptonEnc/host/lepton/bitops.cc
new file mode 100644
index 0000000000..9612ea86e2
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/bitops.cc
@@ -0,0 +1,438 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/**
+Copyright (c) 2006...2016, Matthias Stirner and HTW Aalen University
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ **/
+
+/*
+This file contains special classes for bitwise
+reading and writing of arrays
+*/
+#include "../../vp8/util/memory.hh"
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <algorithm>
+#include <assert.h>
+#include "bitops.hh"
+
+#define BUFFER_SIZE 1024 * 1024
+/* -----------------------------------------------
+        constructor for abitreader class
+        ----------------------------------------------- */
+
+abitreader::abitreader(unsigned char* array, int size) {
+    cbyte2 = 0;
+    cbit2 = 0;
+    data2 = array;
+    eof = false;
+    lbyte = size;
+}
+
+/* -----------------------------------------------
+        destructor for abitreader class
+        ----------------------------------------------- */
+
+abitreader::~abitreader(void) {}
+
+/* -----------------------------------------------
+        constructor for abitwriter class
+        ----------------------------------------------- */
+
+abitwriter::abitwriter(int size, int max_file_size) {
+    size_bound = max_file_size;
+    if (size_bound) {
+        size_bound += 8; // 64 bits of padding on the end
+    }
+    fillbit = 1;
+    adds = 65536;
+    cbyte2 = 0;
+    cbit2 = 64;
+    buf = 0;
+
+    error = false;
+    fmem = true;
+    dsize = (size > 0) ? size : adds;
+    data2 = (unsigned char*)custom_calloc(dsize);
+    if (data2 == NULL) {
+        error = true;
+        custom_exit(ExitCode::MALLOCED_NULL);
+        return;
+    }
+    // for ( int i = 0; i < dsize; i++ ) data[i] = 0;
+}
+
+/* -----------------------------------------------
+        destructor for abitwriter class
+        ----------------------------------------------- */
+
+abitwriter::~abitwriter(void) {
+    // free memory if pointer was not given out
+    if (fmem) custom_free(data2);
+}
+
+void aligned_dealloc(unsigned char* data) {
+    if (!data) return;
+    data -= data[-1];
+    custom_free(data);
+}
+unsigned char* aligned_alloc(size_t dsize) {
+    unsigned char* data = (unsigned char*)custom_malloc(dsize + 16);
+    if (data) {
+        size_t rem = (size_t)(data - 0) & 0xf;
+        if (rem) {
+            data += rem;
+            data[-1] = rem;
+        } else {
+            data += 0x10;
+            data[-1] = 0x10;
+        }
+    }
+    return data;
+}
+/* -----------------------------------------------
+        constructor for abytewriter class
+        ----------------------------------------------- */
+
+abytewriter::abytewriter(int size) {
+    adds = 65536;
+    cbyte = 0;
+
+    error = false;
+    fmem = true;
+
+    dsize = (size > 0) ? size : adds;
+    data = aligned_alloc(dsize);
+    if (data == NULL) {
+        error = true;
+        custom_exit(ExitCode::MALLOCED_NULL);
+        return;
+    }
+}
+
+/* -----------------------------------------------
+        destructor for abytewriter class
+        ----------------------------------------------- */
+
+abytewriter::~abytewriter(void) {
+    // free data if pointer is not read
+    if (fmem && data) aligned_dealloc(data);
+}
+
+/* -----------------------------------------------
+        writes 1 byte to abytewriter
+        ----------------------------------------------- */
+
+void abytewriter::write(unsigned char byte) {
+    // safety check for error
+    if (error) return;
+
+    // test if pointer beyond flush threshold
+    if (cbyte >= (dsize - 2)) {
+        if (data) {
+            unsigned char* newData = aligned_alloc(dsize * 2);
+            memcpy(newData, data, dsize);
+            dsize *= 2;
+            aligned_dealloc(data);
+            data = newData;
+        }
+        if (data == NULL) {
+            error = true;
+            custom_exit(ExitCode::MALLOCED_NULL);
+            return;
+        }
+    }
+
+    // write data
+    data[cbyte++] = byte;
+}
+
+/* -----------------------------------------------
+        writes n byte to abytewriter
+        ----------------------------------------------- */
+
+void abytewriter::write_n(unsigned char* byte, int n) {
+    // safety check for error
+    if (error) return;
+
+    // make sure that pointer doesn't get beyond flush threshold
+    while ((cbyte + n) >= (dsize - 2)) {
+        unsigned char* newData = aligned_alloc(dsize * 2);
+        memcpy(newData, data, dsize);
+        dsize *= 2;
+        aligned_dealloc(data);
+        data = newData;
+        if (data == NULL) {
+            error = true;
+            custom_exit(ExitCode::MALLOCED_NULL);
+            return;
+        }
+    }
+
+    // copy data from array
+    while (n-- > 0) data[cbyte++] = *(byte++);
+}
+
+/* -----------------------------------------------
+        gets data array from abytewriter
+        ----------------------------------------------- */
+
+unsigned char* abytewriter::getptr_aligned(void) {
+    // forbid freeing memory
+    fmem = false;
+    return data;
+}
+
+/* -----------------------------------------------
+        peeks into data array from abytewriter
+        ----------------------------------------------- */
+
+unsigned char* abytewriter::peekptr_aligned(void) {
+    return data;
+}
+
+/* -----------------------------------------------
+        gets size of data array from abytewriter
+        ----------------------------------------------- */
+
+int abytewriter::getpos(void) {
+    return cbyte;
+}
+
+/* -----------------------------------------------
+        reset without realloc
+        ----------------------------------------------- */
+
+void abytewriter::reset(void) {
+    // set position of current byte
+    cbyte = 0;
+}
+
+/* -----------------------------------------------
+        constructor for abytewriter class
+        ----------------------------------------------- */
+
+abytereader::abytereader(unsigned char* array, int size) {
+    cbyte = 0;
+    eof = false;
+
+    data = array;
+    lbyte = size;
+
+    if ((data == NULL) || (lbyte == 0)) eof = true;
+}
+
+/* -----------------------------------------------
+        destructor for abytewriter class
+        ----------------------------------------------- */
+
+abytereader::~abytereader(void) {}
+
+/* -----------------------------------------------
+        reads 1 byte from abytereader
+        ----------------------------------------------- */
+
+int abytereader::read(unsigned char* byte) {
+    if (cbyte >= lbyte) {
+        cbyte = lbyte;
+        eof = true;
+        return 0;
+    } else {
+        *byte = data[cbyte++];
+        return 1;
+    }
+}
+
+/* -----------------------------------------------
+        reads n bytes from abytereader
+        ----------------------------------------------- */
+
+int abytereader::read_n(unsigned char* byte, int n) {
+    int nl = lbyte - cbyte;
+    int i;
+
+    if (nl < n) {
+        for (i = 0; i < nl; i++) byte[i] = data[cbyte + i];
+        cbyte = lbyte;
+        eof = true;
+        return nl;
+    } else {
+        for (i = 0; i < n; i++) byte[i] = data[cbyte + i];
+        cbyte += n;
+        return n;
+    }
+}
+
+/* -----------------------------------------------
+        go to position in data
+        ----------------------------------------------- */
+
+void abytereader::seek(int pos) {
+    if (pos >= lbyte) {
+        cbyte = lbyte;
+        eof = true;
+    } else {
+        cbyte = pos;
+        eof = false;
+    }
+}
+
+/* -----------------------------------------------
+        gets size of current data
+        ----------------------------------------------- */
+
+int abytereader::getsize(void) {
+    return lbyte;
+}
+
+/* -----------------------------------------------
+        gets current position from abytereader
+        ----------------------------------------------- */
+
+int abytereader::getpos(void) {
+    return cbyte;
+}
+
+bounded_iostream::bounded_iostream(Sirikata::DecoderWriter* w,
+                                   const std::function<void(Sirikata::DecoderWriter*, size_t)>& size_callback,
+                                   const Sirikata::JpegAllocator<uint8_t>& alloc)
+    : parent(w), err(Sirikata::JpegError::nil()) {
+    this->size_callback = size_callback;
+    buffer_position = 0;
+    byte_position = 0;
+    num_bytes_attempted_to_write = 0;
+    set_bound(0);
+}
+void bounded_iostream::call_size_callback(size_t size) {
+    size_callback(parent, size);
+}
+bool bounded_iostream::chkerr() {
+    return err != Sirikata::JpegError::nil();
+}
+
+void bounded_iostream::set_bound(size_t bound) {
+    flush();
+    byte_bound = bound;
+}
+void bounded_iostream::flush() {
+    if (buffer_position) {
+        write_no_buffer(buffer, buffer_position);
+        buffer_position = 0;
+    }
+}
+void bounded_iostream::close() {
+    flush();
+    parent->Close();
+}
+
+unsigned int bounded_iostream::write_no_buffer(const void* from, size_t bytes_to_write) {
+    // return iostream::write(from,tpsize,dtsize);
+    std::pair<unsigned int, Sirikata::JpegError> retval;
+    if (byte_bound != 0 && byte_position + bytes_to_write > byte_bound) {
+        size_t real_bytes_to_write = byte_bound - byte_position;
+        byte_position += real_bytes_to_write;
+        retval = parent->Write(reinterpret_cast<const unsigned char*>(from), real_bytes_to_write);
+        if (retval.first < real_bytes_to_write) {
+            err = retval.second;
+            return retval.first;
+        }
+        return bytes_to_write; // pretend we wrote it all
+    }
+    size_t total = bytes_to_write;
+    retval = parent->Write(reinterpret_cast<const unsigned char*>(from), total);
+    unsigned int written = retval.first;
+    byte_position += written;
+    if (written < total) {
+        err = retval.second;
+        return written;
+    }
+    return bytes_to_write;
+}
+
+unsigned int bounded_iostream::getsize() {
+    return byte_position;
+}
+
+bounded_iostream::~bounded_iostream() {}
+
+ibytestreamcopier::ibytestreamcopier(Sirikata::DecoderReader* p,
+                                     unsigned int byte_offset,
+                                     unsigned int max_file_size,
+                                     const Sirikata::JpegAllocator<uint8_t>& alloc)
+    : ibytestream(p, byte_offset, alloc), side_channel(alloc) {
+    if (max_file_size) {
+        side_channel.reserve(max_file_size);
+    }
+}
+bool ibytestreamcopier::read_byte(unsigned char* output) {
+    bool retval = ibytestream::read_byte(output);
+    if (retval) {
+        side_channel.push_back(*output);
+    }
+    return retval;
+}
+
+unsigned int ibytestreamcopier::read(unsigned char* output, unsigned int size) {
+    unsigned int retval = ibytestream::read(output, size);
+    if (retval > 0) {
+        side_channel.insert(side_channel.end(), output, output + retval);
+    }
+    return retval;
+}
+ibytestream::ibytestream(Sirikata::DecoderReader* p,
+                         unsigned int byte_offset,
+                         const Sirikata::JpegAllocator<uint8_t>& alloc)
+    : parent(p) {
+    bytes_read = byte_offset;
+}
+
+unsigned int ibytestream::read(unsigned char* output, unsigned int size) {
+    assert(size);
+    if (size == 1) {
+        return read_byte(output) ? 1 : 0;
+    }
+    int retval = IOUtil::ReadFull(parent, output, size);
+    bytes_read += retval;
+    static_assert(sizeof(last_read) == 2, "Last read must hold full jpeg huffman");
+    if (retval >= 2) {
+        memcpy(last_read, output + size - sizeof(last_read), sizeof(last_read));
+    } else if (retval) {
+        last_read[0] = last_read[1];
+        last_read[1] = *output;
+    }
+    return retval;
+}
+
+bool ibytestream::read_byte(unsigned char* output) {
+    unsigned int retval = parent->Read(output, 1).first;
+    if (retval != 0) {
+        last_read[0] = last_read[1];
+        last_read[1] = *output;
+        bytes_read += 1;
+        return true;
+    }
+    return false;
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/bitops.hh b/codec/L2/demos/leptonEnc/host/lepton/bitops.hh
new file mode 100644
index 0000000000..98c3eb941a
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/bitops.hh
@@ -0,0 +1,488 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/**
+Copyright (c) 2006...2016, Matthias Stirner and HTW Aalen University
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ **/
+
+#include <assert.h>
+#include <cstring>
+#define RBITS(c, n) (c & (0xFF >> (8 - n)))
+#define LBITS(c, n) (c >> (8 - n))
+#define MBITS(c, l, r) (RBITS(c, l) >> r)
+#define RBITS16(c, n) (c & (0xFFFFFFFF >> (16 - n)))
+#define LBITS16(c, n) (c >> (16 - n))
+#define MBITS16(c, l, r) (RBITS16(c, l) >> r)
+#define RBITS32(c, n) (c & (0xFFFFFFFF >> (32 - n)))
+#define LBITS32(c, n) (c >> (32 - n))
+#define MBITS32(c, l, r) (RBITS32(c, l) >> r)
+
+#define RBITS64(c, n) (n == 0 ? 0ULL : ((c) & (0xFFFFFFFFFFFFFFFFULL >> (64 - (n)))))
+#define LBITS64(c, n) (c >> (64 - n))
+#define MBITS64(c, l, r) (((r) >= 64) ? 0 : (RBITS64(c, l) >> (r)))
+
+#define BITN(c, n) ((c >> n) & 0x1)
+#define FDIV2(v, p) ((v < 0) ? -((-v) >> p) : (v >> p))
+
+#define BTST_BUFF 1024 * 1024
+
+#include <stdio.h>
+#include <functional>
+#include "../io/Reader.hh"
+#include "../io/ioutil.hh"
+#include "../vp8/util/vpx_config.hh"
+
+/* -----------------------------------------------
+        class to write arrays bitwise
+        ----------------------------------------------- */
+
+class abitwriter {
+    unsigned char* data2;
+
+   public:
+    uint64_t buf;
+    int dsize;
+    int adds;
+    int cbyte2;
+    int cbit2;
+    bool fmem;
+    int size_bound;
+
+   public:
+    void debug() const;
+
+    abitwriter(int size, int size_bound);
+    ~abitwriter(void);
+
+    unsigned char* partial_bytewise_flush() {
+        if (__builtin_expect(bound_reached(), 0)) {
+            return data2;
+        }
+        int partial_byte_bits = (64 - cbit2) & 7;
+        uint64_t xbuf = htobe64(buf);
+        uint32_t bytes_to_write = (64 - (cbit2 + partial_byte_bits)) / 8;
+        uint32_t bits_to_write = (bytes_to_write << 3);
+        memcpy(data2 + cbyte2, &xbuf, bytes_to_write);
+        cbyte2 += bytes_to_write;
+        if (bits_to_write > 63) {
+            buf = 0;
+        } else {
+            buf <<= bits_to_write;
+        }
+        cbit2 += bits_to_write;
+        return data2;
+    }
+    void flush_no_pad() {
+        if (__builtin_expect(bound_reached(), 0)) {
+            return;
+        }
+        always_assert(((64 - cbit2) & 7) == 0);
+        buf = htobe64(buf);
+        uint32_t bytes_to_write = (64 - cbit2) / 8;
+        memcpy(data2 + cbyte2, &buf, bytes_to_write);
+        cbyte2 += bytes_to_write;
+        buf = 0;
+        // assert(cbyte +1 == cbyte2 || cbyte == cbyte2 || cbyte == cbyte2 + 1 || cbyte == cbyte2 + 2 || cbyte == cbyte2
+        // + 3);
+        // assert(memcmp(data2, data, cbyte2) == 0);
+
+        cbit2 = 64;
+    }
+    /* -----------------------------------------------
+     writes n bits to abitwriter
+     ----------------------------------------------- */
+
+    void write(unsigned int val, int nbits) {
+        int nbits2 = nbits;
+        unsigned int val2 = val;
+        assert(nbits <= 64);
+        if (__builtin_expect(cbyte2 > (dsize - 16), false)) {
+            if (bound_reached()) {
+                return;
+            }
+            if (adds < 4096 * 1024) {
+                adds <<= 1;
+            }
+            int new_size = dsize + adds;
+            unsigned char* tmp = (unsigned char*)custom_malloc(new_size);
+            if (tmp == NULL) {
+                error = true;
+                custom_exit(ExitCode::MALLOCED_NULL);
+                return;
+            }
+            memset(tmp + dsize, 0, adds);
+            memcpy(tmp, data2, dsize);
+            custom_free(data2);
+            data2 = tmp;
+            dsize = new_size;
+        }
+
+        // write data
+        if (nbits2 >= cbit2) {
+            buf |= MBITS64(val2, nbits2, (nbits2 - cbit2));
+            nbits2 -= cbit2;
+            cbit2 = 0;
+            flush_no_pad();
+        }
+        if (nbits2 > 0) {
+            uint64_t tmp = (RBITS64(val2, nbits2));
+            if (__builtin_expect(cbit2 < nbits2, 0)) {
+                cbit2 = 0;
+            } else {
+                tmp <<= cbit2 - nbits2;
+                buf |= tmp;
+                cbit2 -= nbits2;
+            }
+        }
+    }
+    void pad(unsigned char fillbit) {
+        int offset = 1;
+        while ((cbit2 & 7) && cbyte2 < size_bound) {
+            write((fillbit & offset) ? 1 : 0, 1);
+            offset <<= 1;
+        }
+        flush_no_pad();
+    }
+    unsigned char* getptr(void) {
+        // data is padded here
+        pad(fillbit);
+        flush_no_pad();
+        // forbid freeing memory
+        fmem = false;
+        // realloc data
+        return data2;
+    }
+    const unsigned char* peekptr(void) {
+        flush_no_pad();
+        return data2;
+    }
+    uint8_t get_num_overhang_bits() { return 64 - cbit2; }
+    bool bound_reached() const { return cbyte2 >= size_bound; }
+    uint8_t get_overhang_byte() const {
+        assert(cbit2 > 56);
+        uint64_t retval = buf;
+        retval >>= 56;
+        return (uint8_t)retval;
+    }
+    void reset_from_overhang_byte_and_num_bits(uint8_t overhang_byte, uint8_t num_bits) {
+        memset(data2, 0, cbyte2);
+        if (size_bound) {
+            size_bound -= cbyte2;
+        }
+        cbyte2 = 0;
+        buf = 0;
+        buf = overhang_byte;
+        buf <<= 56;
+        cbit2 = 64 - num_bits;
+    }
+    void reset() {
+        assert(no_remainder());
+        reset_crystallized_bytes();
+    }
+    void reset_crystallized_bytes() {
+        memset(data2, 0, cbyte2);
+        if (size_bound) {
+            size_bound -= cbyte2;
+        }
+        cbyte2 = 0;
+    }
+    int getpos(void) const { return cbyte2; }
+    bool no_remainder() const { return cbit2 == 64 || bound_reached(); }
+    bool error;
+    unsigned char fillbit;
+};
+
+/* -----------------------------------------------
+        class to read arrays bitwise
+        ----------------------------------------------- */
+
+class abitreader {
+   public:
+    abitreader(unsigned char* array, int size);
+    ~abitreader(void);
+    std::pair<uint8_t, uint8_t> overhang() {
+        uint64_t selected_byte = htobe64(buf);
+        uint8_t rem = (uint8_t)((64 - cbit2) & 7);
+        if (rem != 64) {
+            uint8_t shift_level = (64 - cbit2) - rem;
+            if (shift_level < 64) {
+                selected_byte >>= shift_level;
+            } else {
+                selected_byte = 0;
+            }
+        }
+        uint8_t selected_bits = (uint8_t)selected_byte;
+        selected_bits &= (((1 << rem) - 1) << (8 - rem));
+        return {rem, selected_bits};
+    }
+    unsigned int read(int nbits) {
+        if (__builtin_expect(eof || !nbits, 0)) {
+            return 0;
+        }
+        unsigned int bits_read = 0;
+        unsigned int retval2 = 0;
+        if (__builtin_expect(nbits >= cbit2, 0)) {
+            bits_read = cbit2;
+            retval2 = (RBITS64(buf, cbit2) << (nbits - bits_read)) & ((1 << nbits) - 1);
+            int cur_nbits = nbits - bits_read;
+            buf >>= bits_read;
+            cbit2 -= bits_read;
+            if (cbyte2 == lbyte && cbit2 == 0) {
+                eof = true;
+                return retval2;
+            }
+            if (__builtin_expect(lbyte - cbyte2 < (int)sizeof(buf), 0)) {
+                int new_bytes = std::min((int)sizeof(buf), lbyte - cbyte2);
+                memcpy(&buf, &data2[cbyte2], new_bytes);
+                buf = htobe64(buf);
+                buf >>= (sizeof(buf) - new_bytes) * 8;
+                cbyte2 += new_bytes;
+                cbit2 += new_bytes * 8;
+            } else {
+                memcpy(&buf, &data2[cbyte2], sizeof(buf));
+                buf = htobe64(buf);
+                cbyte2 += sizeof(buf);
+                cbit2 += sizeof(buf) * 8;
+            }
+            if (cbyte2 == lbyte && cbit2 == 0) {
+                eof = true;
+            }
+            if (cur_nbits) {
+                if (cur_nbits <= cbit2) {
+                    retval2 |= MBITS64(buf, cbit2, (cbit2 - cur_nbits));
+                    cbit2 -= cur_nbits;
+                } else {
+                    retval2 |= buf;
+                    buf = 0;
+                    cbit2 = 0;
+                }
+            }
+        } else {
+            retval2 = MBITS64(buf, cbit2, (cbit2 - nbits));
+            cbit2 -= nbits;
+        }
+        return retval2;
+    }
+    bool remainder() {
+        if (cbit2 & 7) {
+            return 8 - (cbit2 & 7);
+        }
+        return 0;
+    }
+    unsigned char unpad(unsigned char fillbit) {
+        if ((cbit2 & 7) == 0 || eof)
+            return fillbit;
+        else {
+            char last_bit = read(1);
+            fillbit = last_bit;
+            int offset = 1;
+            while (cbit2 & 7) {
+                last_bit = read(1);
+                fillbit |= (last_bit << offset);
+                ++offset;
+            }
+            while (offset < 7) {
+                fillbit |= (last_bit << offset);
+                ++offset;
+            }
+        }
+        return fillbit;
+    }
+    int getpos(void) { return cbyte2 - 7 + ((64 - cbit2) >> 3); }
+    uint64_t debug_peek(void) {
+        uint64_t retval = 0;
+        abitreader tmp(*this);
+        bool had_remainder = false;
+        while (tmp.remainder()) {
+            had_remainder = true;
+            retval = tmp.read(tmp.remainder());
+        }
+        for (int i = 0; i < (had_remainder ? 7 : 8); ++i) {
+            uint8_t a = tmp.read(8);
+            retval |= a;
+            retval <<= 8;
+        }
+        return retval;
+    }
+    bool eof;
+
+   private:
+    unsigned char* data2;
+    int cbyte2;
+    int cbit2;
+    uint64_t buf;
+    int lbyte;
+};
+
+/* -----------------------------------------------
+        class to write arrays bytewise
+        ----------------------------------------------- */
+extern void aligned_dealloc(unsigned char*);
+extern unsigned char* aligned_alloc(size_t);
+
+class abytewriter {
+   public:
+    abytewriter(int size);
+    ~abytewriter(void);
+    void write(unsigned char byte);
+    void write_n(unsigned char* byte, int n);
+    unsigned char* getptr_aligned(void);
+    unsigned char* peekptr_aligned(void);
+    int getpos(void);
+    void reset(void);
+    bool error;
+
+   private:
+    unsigned char* data;
+    int dsize;
+    int adds;
+    int cbyte;
+    bool fmem;
+};
+
+/* -----------------------------------------------
+        class to read arrays bytewise
+        ----------------------------------------------- */
+
+class abytereader {
+   public:
+    abytereader(unsigned char* array, int size);
+    ~abytereader(void);
+    int read(unsigned char* byte);
+    int read_n(unsigned char* byte, int n);
+    void seek(int pos);
+    int getsize(void);
+    int getpos(void);
+    bool eof;
+
+   private:
+    unsigned char* data;
+    int lbyte;
+    int cbyte;
+};
+
+/* -----------------------------------------------
+        class for input and output from file or memory
+        ----------------------------------------------- */
+
+class ibytestream {
+    Sirikata::DecoderReader* parent;
+    unsigned int bytes_read;
+
+   public:
+    unsigned char get_last_read() const { return last_read[1]; }
+    unsigned char get_penultimate_read() const { return last_read[0]; }
+    ibytestream(Sirikata::DecoderReader* p,
+                unsigned int starting_byte_offset,
+                const Sirikata::JpegAllocator<uint8_t>& alloc);
+    unsigned int getsize() const { return bytes_read; }
+    bool read_byte(unsigned char* output);
+    unsigned int read(unsigned char* output, unsigned int size);
+    // the biggest allowed huffman code (that may get damaged by truncation)
+    unsigned char last_read[2];
+};
+class ibytestreamcopier : ibytestream { // since we don't use virtual methods... must reimplement
+    std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> > side_channel;
+
+   public:
+    ibytestreamcopier(Sirikata::DecoderReader* p,
+                      unsigned int starting_byte_offset,
+                      unsigned int maximum_file_size,
+                      const Sirikata::JpegAllocator<uint8_t>& alloc);
+    unsigned int getsize() const { return ibytestream::getsize(); }
+    unsigned int get_last_read() const { return ibytestream::get_last_read(); }
+    unsigned int get_penultimate_read() const { return ibytestream::get_penultimate_read(); }
+
+    bool read_byte(unsigned char* output);
+    unsigned int read(unsigned char* output, unsigned int size);
+
+    const std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> >& get_read_data() const { return side_channel; }
+    std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> >& mutate_read_data() { return side_channel; }
+};
+
+class bounded_iostream {
+    enum { buffer_size = 65536 };
+    uint8_t buffer[buffer_size];
+    uint32_t buffer_position;
+    Sirikata::DecoderWriter* parent;
+    unsigned int byte_bound;
+    unsigned int byte_position;
+    unsigned int num_bytes_attempted_to_write;
+    Sirikata::JpegError err;
+    std::function<void(Sirikata::DecoderWriter*, size_t)> size_callback;
+    unsigned int write_no_buffer(const void* from, size_t bytes_to_write);
+
+   public:
+    bounded_iostream(Sirikata::DecoderWriter* parent,
+                     const std::function<void(Sirikata::DecoderWriter*, size_t)>& size_callback,
+                     const Sirikata::JpegAllocator<uint8_t>& alloc);
+    ~bounded_iostream(void);
+    void call_size_callback(size_t size);
+    bool chkerr();
+    unsigned int getsize();
+    unsigned int bytes_written() const {
+        return std::max(byte_position, std::min(byte_position + buffer_position, byte_bound));
+    }
+    void set_bound(size_t bound); // bound of zero = fine
+    size_t get_bound() const { return byte_bound; }
+    bool has_reached_bound() const { return byte_bound && byte_position + buffer_position >= byte_bound; }
+    bool has_exceeded_bound() const { return byte_bound && num_bytes_attempted_to_write > byte_bound; }
+    unsigned int write_byte(uint8_t byte) {
+        ++num_bytes_attempted_to_write;
+        assert(buffer_position < buffer_size && "Full buffer wasn't flushed");
+        buffer[buffer_position++] = byte;
+        if (__builtin_expect(buffer_position == buffer_size, 0)) {
+            buffer_position = 0;
+            write_no_buffer(buffer, buffer_size);
+        }
+        return 1;
+    }
+    unsigned int write(const void* from, unsigned int nbytes) {
+        num_bytes_attempted_to_write += nbytes;
+        size_t bytes_to_write = nbytes;
+        if (__builtin_expect(nbytes + buffer_position > buffer_size, 0)) {
+            if (buffer_position) {
+                write_no_buffer(buffer, buffer_position);
+                buffer_position = 0;
+            }
+            if (bytes_to_write < 64) {
+                memcpy(buffer + buffer_position, from, bytes_to_write);
+                buffer_position += bytes_to_write;
+            } else {
+                return write_no_buffer(from, bytes_to_write);
+            }
+        } else {
+            memcpy(buffer + buffer_position, from, bytes_to_write);
+            buffer_position += bytes_to_write;
+            if (__builtin_expect(buffer_position == buffer_size, 0)) {
+                buffer_position = 0;
+                write_no_buffer(buffer, buffer_size);
+            }
+        }
+        return bytes_to_write;
+    }
+    void flush();
+    void close();
+};
diff --git a/codec/L2/demos/leptonEnc/host/lepton/component_info.hh b/codec/L2/demos/leptonEnc/host/lepton/component_info.hh
new file mode 100644
index 0000000000..207fc2f337
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/component_info.hh
@@ -0,0 +1,21 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#ifndef _COMPONENT_INFO_HH_
+#define _COMPONENT_INFO_HH_
+struct componentInfo {
+    unsigned short* qtable; // quantization table
+    int huffdc;             // no of huffman table (DC)
+    int huffac;             // no of huffman table (AC)
+    int sfv;                // sample factor vertical
+    int sfh;                // sample factor horizontal
+    int mbs;                // blocks in mcu
+    int bcv;                // block count vertical (interleaved)
+    int bch;                // block count horizontal (interleaved)
+    int bc;                 // block count (all) (interleaved)
+    int ncv;                // block count vertical (non interleaved)
+    int nch;                // block count horizontal (non interleaved)
+    int nc;                 // block count (all) (non interleaved)
+    int sid;                // statistical identity
+    int jid;                // jpeg internal id
+};
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/evaluate_int_encodings.py b/codec/L2/demos/leptonEnc/host/lepton/evaluate_int_encodings.py
new file mode 100644
index 0000000000..72a4e71dcc
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/evaluate_int_encodings.py
@@ -0,0 +1,157 @@
+import sys
+from collections import defaultdict
+def load_histogram(fn):
+    ret = defaultdict(dict)
+    with open(fn) as f:
+        for line in f:
+            try:
+                cat,a,b = line.split()
+            except:
+                cat,b = line.split()
+                a = 1
+            if not int(b) in ret[cat]:
+                ret[cat][int(b)] = int(a)
+            else:
+                ret[cat][int(b)] += int(a)
+    return ret
+
+def make_unary_sign_cost():
+    ret = {}
+    for i in range(-1025, 1025):
+        if not i:
+            ret[i] = 1
+        elif i < 0:
+            ret[i] = -i * 2
+        else:
+            ret[i] = i * 2 + 1
+    return ret
+
+def make_unary_cost():
+    ret = {}
+    for i in range(-1025, 1025):
+        if not i:
+            ret[i] = 1
+        elif i < 0:
+            ret[i] = -i + 2
+        else:
+            ret[i] = i + 2
+    return ret
+
+def log2(i):
+    assert i > 0
+    ret = 0
+    while i:
+        i = i // 2
+        if i:
+            ret += 1
+    return ret
+def log2_length(i):
+    if i == 0:
+        return 0
+    if i < 0:
+        i = -i
+    return log2(i if i > 0 else -i) + 1
+
+def make_unary_trunc_cost(n):
+    base_cutoff_cost = n
+    ret = defaultdict(lambda:0)
+    for i in range(-1025, 1025):
+        absi = i if i > 0 else -i
+        l2len = log2_length(absi - n)
+        ret[i] = l2len + 2 + (l2len - 1 if l2len else 0) + base_cutoff_cost
+    for i in range(-n, n + 1):
+        if not i:
+            ret[i] = 1
+        elif i < 0:
+            ret[i] = -i + 2
+        else:
+            ret[i] = i + 2
+    return ret
+
+unary_sign_cost = make_unary_sign_cost()
+unary_cost = make_unary_cost()
+unary_trunc_cost = []
+for i in range(20):
+    unary_trunc_cost.append(make_unary_trunc_cost(i))
+unary_exponent_cost = {0 : 1, # 1 exp
+              1 : 3, # 2 exp, 1 sign
+              2 : 5, # 3 exp, 1 sign, 1 rem
+              3 : 7, # 4 exp, 1 sign, 2 rem
+              4 : 9, # 5 exp, 1 sign
+              5 : 11,# ...
+              6 : 13,
+              7 : 15,
+              8 : 17,
+              9 : 19,
+              10 : 20}
+
+unary_one_case_exponent_cost = {0 : 1, # 1 exp
+              1 : 2, # 2 exp, 1 sign
+              2 : 6, # 3 exp, 1 sign, 1 rem
+              3 : 8, # 4 exp, 1 sign, 2 rem
+              4 : 10, # 5 exp, 1 sign
+              5 : 12,# ...
+              6 : 14,
+              7 : 16,
+              8 : 18,
+              9 : 20,
+              10 : 21,
+              -1 : 3, # 2 exp, 1 sign
+              -2 : 6, # 3 exp, 1 sign, 1 rem
+              -3 : 8, # 4 exp, 1 sign, 2 rem
+              -4 : 10, # 5 exp, 1 sign
+              -5 : 12,# ...
+              -6 : 14,
+              -7 : 16,
+              -8 : 18,
+              -9 : 20,
+              -10 : 21}
+
+binary_cost = {0 : 2, # 2 exp
+               1 : 5, # 4 exp, 1 sign
+               2 : 6, # 4 exp, 1 sign, 1 rem
+               3 : 7, # 4 exp, 1 sign, 2 rem
+               4 : 8, # ...
+              5 : 9,
+              6 : 10,
+              7 : 11,
+              8 : 12,
+              9 : 13,
+              10 : 14}
+def eval_binary_cost(h):
+    count = 0
+    max = 0
+    for i in h:
+        if i > max:
+            max = i
+        count += h[i]
+    bin_cost = log2_length(max)
+    #print "max was ", max, "count was ", count, "cost was ",bin_cost,"per item"
+    return count * bin_cost
+
+def eval_cost(h, c, dolog=False):
+    ret = 0
+    for i in h:
+        cost_index = i;
+        if dolog:
+            cost_index = log2_length(i)
+        if i < 0 and cost_index > 0 and (-cost_index) in c:
+            cost = c[-cost_index] #lets us special case -log2_length
+        else:
+            cost = c[cost_index]
+        ret += h[i] * cost
+    return ret
+for arg in sys.argv[1:]:
+    hists = load_histogram(arg)
+    total_count = 0
+    for name, count in hists.iteritems():
+        print arg + '.' + name, 'pure_bin_cost', eval_binary_cost(count)
+        print arg + '.' + name, 'unary_exp_cost', eval_cost(count, unary_exponent_cost, dolog=True)
+        print arg + '.' + name, 'unary0exp_cost', eval_cost(count, unary_one_case_exponent_cost, dolog=True)
+        print arg + '.' + name, 'binaryexp_cost', eval_cost(count, binary_cost, dolog=True)
+        print arg + '.' + name, 'unary_cost', eval_cost(count, unary_cost)
+        for i in range(len(unary_trunc_cost)):
+            #print i, unary_trunc_cost
+            print arg + '.' + name, 'untrunc' + str(i//10) + str(i%10), \
+              eval_cost(count, unary_trunc_cost[i])
+
diff --git a/codec/L2/demos/leptonEnc/host/lepton/find-big b/codec/L2/demos/leptonEnc/host/lepton/find-big
new file mode 100644
index 0000000000..83dbfde102
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/find-big
@@ -0,0 +1,21 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my @totals;
+
+while (<>) {
+  my ( @counts ) = m{\( (\d+ , \d+) \)}g;
+  for ( my $index = 0; $index < $#counts; $index++ ) {
+    my ( $false, $true ) = split / , /, $counts[ $index ];
+    $totals[ $index ]{ 'total' } += $false + $true;
+    $totals[ $index ]{ 'false' } += $false;
+    $totals[ $index ]{ 'true' } += $true;
+  }
+}
+
+for ( my $index = 0; $index < $#totals; $index++ ) {
+  my $totalref = $totals[ $index ];
+  my $implied_prob = int( 256 * ($totalref->{ false } + 1) / ($totalref->{ false } + $totalref->{ true } + 2) );
+  print "token $index => [ total = $totalref->{ total }, false = $totalref->{ false }, true = $totalref->{ true }, prob = $implied_prob ]\n";
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/fork_serve.cc b/codec/L2/demos/leptonEnc/host/lepton/fork_serve.cc
new file mode 100644
index 0000000000..9c25df43f0
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/fork_serve.cc
@@ -0,0 +1,129 @@
+#include "../../vp8/util/memory.hh"
+#ifndef _WIN32
+
+#include <sys/types.h>
+#include <signal.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <algorithm>
+#if defined(__APPLE__) || defined(BSD)
+#include <sys/wait.h>
+#else
+#include <wait.h>
+#endif
+#include <errno.h>
+#include "jpgcoder.hh"
+#include "../io/ioutil.hh"
+static char hex_nibble(uint8_t val) {
+    if (val < 10) return val + '0';
+    return val - 10 + 'a';
+}
+
+static const char last_prefix[] = "/tmp/";
+static const char last_postfix[2][7] = {".iport", ".oport"};
+static char last_pipes[sizeof(last_postfix) / sizeof(last_postfix[0])][128] = {};
+
+static void name_cur_pipes(FILE* dev_random) {
+    char random_data[16] = {0};
+    auto retval = fread(random_data, 1, sizeof(random_data), dev_random);
+    (void)retval; // dev random should yield reasonable results
+    for (size_t pipe_id = 0; pipe_id < sizeof(last_postfix) / sizeof(last_postfix[0]); ++pipe_id) {
+        memcpy(last_pipes[pipe_id], last_prefix, strlen(last_prefix));
+        size_t offset = strlen(last_prefix);
+        for (size_t i = 0; i < sizeof(random_data); ++i) {
+            always_assert(offset + 3 < sizeof(last_pipes[i]));
+            uint8_t hex = random_data[i];
+            last_pipes[pipe_id][offset] = hex_nibble(hex >> 4);
+            last_pipes[pipe_id][offset + 1] = hex_nibble(hex & 0xf);
+            offset += 2;
+            if (i == 4 || i == 6 || i == 8 || i == 14) {
+                last_pipes[pipe_id][offset] = '-';
+                ++offset;
+            }
+        }
+        memcpy(last_pipes[pipe_id] + offset, last_postfix[pipe_id], sizeof(last_postfix[pipe_id]));
+    }
+}
+
+static void exit_on_stdin(pid_t child) {
+    if (!child) {
+        fclose(stdin);
+        return;
+    }
+    fclose(stdout);
+    getc(stdin);
+    kill(child, SIGQUIT);
+    sleep(1); // 1 second to clean up its temp pipes
+    kill(child, SIGKILL);
+    fclose(stderr);
+    custom_exit(ExitCode::SUCCESS);
+}
+
+static void cleanup_pipes(int) {
+    for (size_t i = 0; i < sizeof(last_pipes) / sizeof(last_pipes[0]); ++i) {
+        if (last_pipes[i][0]) { // if we've started serving pipes
+            unlink(last_pipes[i]);
+        }
+    }
+    custom_exit(ExitCode::EARLY_EXIT);
+}
+void fork_serve() {
+    exit_on_stdin(fork());
+    signal(SIGINT, &cleanup_pipes);
+    signal(SIGQUIT, &cleanup_pipes);
+    signal(SIGTERM, &cleanup_pipes);
+    FILE* dev_random = fopen("/dev/urandom", "rb");
+    while (true) {
+        name_cur_pipes(dev_random);
+        char cur_pipes[sizeof(last_pipes) / sizeof(last_pipes[0])][sizeof(last_pipes[0])];
+        memcpy(cur_pipes, last_pipes, sizeof(cur_pipes));
+        if (mkfifo(last_pipes[0], S_IWUSR | S_IRUSR) == -1) {
+            perror("pipe");
+        }
+        if (mkfifo(last_pipes[1], S_IWUSR | S_IRUSR) == -1) {
+            perror("pipe");
+        }
+        fprintf(stdout, "%s\n%s\n", last_pipes[0], last_pipes[1]);
+        if (fflush(stdout) != 0) {
+            perror("sync");
+        }
+        int reader_pipe = -1;
+        do {
+            reader_pipe = open(cur_pipes[0], O_RDONLY);
+        } while (reader_pipe < 0 && errno == EINTR);
+        int writer_pipe = -1;
+        do {
+            writer_pipe = open(cur_pipes[1], O_WRONLY);
+        } while (writer_pipe < 0 && errno == EINTR);
+        unlink(cur_pipes[0]);
+        unlink(cur_pipes[1]);
+        pid_t serve_file = fork();
+        if (serve_file == 0) {
+            while (close(1) < 0 && errno == EINTR) { // close stdout
+            }
+            // leave stderr open for complaints
+            IOUtil::FileReader reader(reader_pipe, 0, false);
+            IOUtil::FileWriter writer(writer_pipe, false, false);
+            //            process_file(&reader, &writer, 0, false);
+            custom_exit(ExitCode::SUCCESS);
+        } else {
+            int err = -1;
+            do {
+                err = close(reader_pipe);
+            } while (err < 0 && errno == EINTR);
+            do {
+                err = close(writer_pipe);
+            } while (err < 0 && errno == EINTR);
+        }
+        {
+            int status;
+            while (waitpid(-1, &status, WNOHANG) > 0) {
+            }
+        }
+    }
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/fork_serve.hh b/codec/L2/demos/leptonEnc/host/lepton/fork_serve.hh
new file mode 100644
index 0000000000..66324cfe04
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/fork_serve.hh
@@ -0,0 +1,3 @@
+#ifndef _WIN32
+void fork_serve();
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/htables.hh b/codec/L2/demos/leptonEnc/host/lepton/htables.hh
new file mode 100644
index 0000000000..9b293deaba
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/htables.hh
@@ -0,0 +1,144 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/**
+Copyright (c) 2006...2016, Matthias Stirner and HTW Aalen University
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ **/
+
+/* -----------------------------------------------
+        compression helper tables
+        ----------------------------------------------- */
+
+// maxima for each frequency in zigzag order
+const unsigned short int freqmax[] = {1024, 931,  932,  985, 858, 985,  968, 884,  884,  967,  1020, 841,  871,
+                                      840,  1020, 968,  932, 875, 876,  932, 969,  1020, 838,  985,  844,  985,
+                                      838,  1020, 1020, 854, 878, 967,  967, 878,  854,  1020, 854,  871,  886,
+                                      1020, 886,  871,  854, 854, 870,  969, 969,  870,  854,  838,  1010, 838,
+                                      1020, 837,  1020, 969, 969, 1020, 838, 1020, 838,  1020, 1020, 838};
+/*
+const unsigned short int freqmax[] =
+{
+        1024,  924,  924,  942,  838,  942,  924,  854,
+         854,  924, 1020,  837,  871,  838, 1020,  924,
+         924,  854,  854,  924,  924,  942,  838,  942,
+         837,  942,  838,  942,  924,  854,  854,  924,
+         924,  854,  854,  924,  838,  871,  838, 1020,
+         837,  871,  838,  854,  854,  924,  924,  854,
+         854,  838,  942,  838,  942,  837,  924,  854,
+         854,  924,  838,  871,  838,  854,  854,  838
+};
+*/
+
+// maxima for each frequency - IJG DCT float
+const unsigned short int freqmax_float[] = {
+    1024, 924, 942, 924, 1020, 924, 942, 924, 924, 837, 854,  837, 924, 837, 854,  837, 942, 854, 871, 854, 942, 854,
+    871,  854, 924, 837, 854,  837, 924, 837, 854, 837, 1020, 924, 942, 924, 1020, 924, 942, 924, 924, 837, 854, 837,
+    924,  837, 854, 837, 942,  854, 871, 854, 942, 854, 871,  854, 924, 837, 854,  837, 924, 837, 854, 837};
+
+// maxima for each frequency - IJG DCT int
+const unsigned short int freqmax_int[] = {
+    1024, 924, 942, 924, 1020, 924, 942, 924, 924, 838, 854,  838, 924, 838, 854,  838, 942, 854, 871, 854, 942, 854,
+    871,  854, 924, 837, 854,  837, 924, 837, 854, 837, 1020, 924, 942, 924, 1020, 924, 942, 924, 924, 838, 854, 838,
+    924,  838, 854, 838, 942,  854, 871, 854, 942, 854, 871,  854, 924, 838, 854,  838, 924, 838, 854, 838};
+
+// maxima for each frequency - IJG DCT fast
+const unsigned short int freqmax_fast[] = {1024, 931, 985,  968,  1020, 968, 1020, 1020, 932,  858,  884,  840, 932,
+                                           812,  854, 854,  985,  884,  849, 875,  985,  878,  821,  821,  967, 841,
+                                           876,  844, 967,  886,  870,  726, 1020, 932,  985,  967,  1020, 969, 1020,
+                                           1020, 969, 812,  878,  886,  969, 829,  969,  727,  1020, 854,  821, 870,
+                                           1010, 969, 1020, 1020, 1020, 854, 821,  725,  1020, 727,  1020, 510};
+
+// maxima for each frequency - IJG DCT max
+const unsigned short int freqmax_ijg[] = {1024, 931, 985,  968,  1020, 968, 1020, 1020, 932,  858,  884,  840, 932,
+                                          838,  854, 854,  985,  884,  871, 875,  985,  878,  871,  854,  967, 841,
+                                          876,  844, 967,  886,  870,  837, 1020, 932,  985,  967,  1020, 969, 1020,
+                                          1020, 969, 838,  878,  886,  969, 838,  969,  838,  1020, 854,  871, 870,
+                                          1010, 969, 1020, 1020, 1020, 854, 854,  838,  1020, 838,  1020, 838};
+
+// standard scan = zigzag scan
+unsigned char stdscan[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                           22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                           44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+
+// zagzig scan, can be used instead of zigzag scan
+unsigned char zagscan[] = {0,  2,  1,  5,  4,  3,  9,  8,  7,  6,  14, 13, 12, 11, 10, 20, 19, 18, 17, 16, 15, 27,
+                           26, 25, 24, 23, 22, 21, 35, 34, 33, 32, 31, 30, 29, 28, 42, 41, 40, 39, 38, 37, 36, 48,
+                           47, 46, 45, 44, 43, 53, 52, 51, 50, 49, 57, 56, 55, 54, 60, 59, 58, 62, 61, 63};
+
+// zigzag scan reverse conversion table
+const int jpeg_natural_order[] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                  12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                  35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                  58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+
+// scans for each frequency
+const char freqalign[] = {'m', 'v', 'v', 'v', 'v', 'v', 'v', 'v', 'h', 'm', 'v', 'v', 'v', 'v', 'v', 'v',
+                          'h', 'h', 'm', 'v', 'v', 'v', 'v', 'v', 'h', 'h', 'h', 'm', 'v', 'v', 'v', 'v',
+                          'h', 'h', 'h', 'h', 'm', 'v', 'v', 'v', 'h', 'h', 'h', 'h', 'h', 'm', 'v', 'v',
+                          'h', 'h', 'h', 'h', 'h', 'h', 'm', 'v', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'm'};
+
+// chengjie tu subband classification
+const int ctxclass[] = {0, 1, 3, 3, 3, 6, 6, 6, // 0 -> DC (DC subband)
+                        2, 5, 5, 5, 6, 6, 6, 6, // 1 -> PV (principal vertical)
+                        4, 5, 5, 5, 6, 6, 6, 6, // 2 -> PH (principal horizontal)
+                        4, 5, 5, 6, 6, 6, 6, 6, // 3 -> LV (low-frequency vertical)
+                        4, 6, 6, 6, 6, 6, 6, 6, // 4 -> LH (low-frequency horizontal)
+                        6, 6, 6, 6, 6, 6, 6, 6, // 5 -> LD (low-frequency diagonal)
+                        6, 6, 6, 6, 6, 6, 6, 6, // 6 -> HP (high-pass)
+                        6, 6, 6, 6, 6, 6, 6, 6};
+
+// standard huffman tables, found in JPEG specification, Chapter K.3
+const unsigned char std_huff_tables[4][272] = {
+    {// standard luma dc table (0/0)
+     0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B},
+    {// standard chroma dc table (0/1)
+     0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B},
+    {// standard luma ac table (1/0)
+     0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7D, 0x01, 0x02,
+     0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32,
+     0x81, 0x91, 0xA1, 0x08, 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09,
+     0x0A, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+     0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63,
+     0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x83, 0x84, 0x85,
+     0x86, 0x87, 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5,
+     0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5,
+     0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2, 0xE3, 0xE4,
+     0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA},
+    {// standard chroma ac table (1/1)
+     0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04, 0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77, 0x00, 0x01,
+     0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81,
+     0x08, 0x14, 0x42, 0x91, 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0, 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16,
+     0x24, 0x34, 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38,
+     0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A,
+     0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x82, 0x83,
+     0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3,
+     0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3,
+     0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE2, 0xE3,
+     0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA}};
+
+// lengths of standard huffmann tables
+const unsigned char std_huff_lengths[4] = {28, 28, 178, 178};
diff --git a/codec/L2/demos/leptonEnc/host/lepton/idct.cc b/codec/L2/demos/leptonEnc/host/lepton/idct.cc
new file mode 100644
index 0000000000..5ac88d57f7
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/idct.cc
@@ -0,0 +1,593 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#include <immintrin.h>
+#include <tmmintrin.h>
+#include "../vp8/util/aligned_block.hh"
+#include "../vp8/util/mm_mullo_epi32.hh"
+
+namespace idct_local {
+enum {
+    w1 = 2841, // 2048*sqrt(2)*cos(1*pi/16)
+    w2 = 2676, // 2048*sqrt(2)*cos(2*pi/16)
+    w3 = 2408, // 2048*sqrt(2)*cos(3*pi/16)
+    w5 = 1609, // 2048*sqrt(2)*cos(5*pi/16)
+    w6 = 1108, // 2048*sqrt(2)*cos(6*pi/16)
+    w7 = 565,  // 2048*sqrt(2)*cos(7*pi/16)
+
+    w1pw7 = w1 + w7,
+    w1mw7 = w1 - w7,
+    w2pw6 = w2 + w6,
+    w2mw6 = w2 - w6,
+    w3pw5 = w3 + w5,
+    w3mw5 = w3 - w5,
+
+    r2 = 181 // 256/sqrt(2)
+};
+}
+
+#if (!defined(__SSE2__)) && !(_M_IX86_FP >= 1)
+static void idct_scalar(const AlignedBlock& block, const uint16_t q[64], int16_t outp[64], bool ignore_dc) {
+    int32_t intermed[64];
+    using namespace idct_local;
+    // Horizontal 1-D IDCT.
+    for (int y = 0; y < 8; ++y) {
+        int y8 = y * 8;
+        int32_t x0 = (((ignore_dc && y == 0) ? 0 : (block.coefficients_raster(y8 + 0) * q[y8 + 0]) << 11)) + 128;
+        int32_t x1 = (block.coefficients_raster(y8 + 4) * q[y8 + 4]) << 11;
+        int32_t x2 = block.coefficients_raster(y8 + 6) * q[y8 + 6];
+        int32_t x3 = block.coefficients_raster(y8 + 2) * q[y8 + 2];
+        int32_t x4 = block.coefficients_raster(y8 + 1) * q[y8 + 1];
+        int32_t x5 = block.coefficients_raster(y8 + 7) * q[y8 + 7];
+        int32_t x6 = block.coefficients_raster(y8 + 5) * q[y8 + 5];
+        int32_t x7 = block.coefficients_raster(y8 + 3) * q[y8 + 3];
+        // If all the AC components are zero, then the IDCT is trivial.
+        if (x1 == 0 && x2 == 0 && x3 == 0 && x4 == 0 && x5 == 0 && x6 == 0 && x7 == 0) {
+            int32_t dc = (x0 - 128) >> 8; // coefficients[0] << 3
+            intermed[y8 + 0] = dc;
+            intermed[y8 + 1] = dc;
+            intermed[y8 + 2] = dc;
+            intermed[y8 + 3] = dc;
+            intermed[y8 + 4] = dc;
+            intermed[y8 + 5] = dc;
+            intermed[y8 + 6] = dc;
+            intermed[y8 + 7] = dc;
+            continue;
+        }
+
+        // Prescale.
+
+        // Stage 1.
+        int32_t x8 = w7 * (x4 + x5);
+        x4 = x8 + w1mw7 * x4;
+        x5 = x8 - w1pw7 * x5;
+        x8 = w3 * (x6 + x7);
+        x6 = x8 - w3mw5 * x6;
+        x7 = x8 - w3pw5 * x7;
+
+        // Stage 2.
+        x8 = x0 + x1;
+        x0 -= x1;
+        x1 = w6 * (x3 + x2);
+        x2 = x1 - w2pw6 * x2;
+        x3 = x1 + w2mw6 * x3;
+        x1 = x4 + x6;
+        x4 -= x6;
+        x6 = x5 + x7;
+        x5 -= x7;
+
+        // Stage 3.
+        x7 = x8 + x3;
+        x8 -= x3;
+        x3 = x0 + x2;
+        x0 -= x2;
+        x2 = (r2 * (x4 + x5) + 128) >> 8;
+        x4 = (r2 * (x4 - x5) + 128) >> 8;
+
+        // Stage 4.
+        intermed[y8 + 0] = (x7 + x1) >> 8;
+        intermed[y8 + 1] = (x3 + x2) >> 8;
+        intermed[y8 + 2] = (x0 + x4) >> 8;
+        intermed[y8 + 3] = (x8 + x6) >> 8;
+        intermed[y8 + 4] = (x8 - x6) >> 8;
+        intermed[y8 + 5] = (x0 - x4) >> 8;
+        intermed[y8 + 6] = (x3 - x2) >> 8;
+        intermed[y8 + 7] = (x7 - x1) >> 8;
+    }
+
+    // Vertical 1-D IDCT.
+    for (int32_t x = 0; x < 8; ++x) {
+        // Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
+        // However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
+        // we do not bother to check for the all-zero case.
+
+        // Prescale.
+        int32_t y0 = (intermed[8 * 0 + x] << 8) + 8192;
+        int32_t y1 = intermed[8 * 4 + x] << 8;
+        int32_t y2 = intermed[8 * 6 + x];
+        int32_t y3 = intermed[8 * 2 + x];
+        int32_t y4 = intermed[8 * 1 + x];
+        int32_t y5 = intermed[8 * 7 + x];
+        int32_t y6 = intermed[8 * 5 + x];
+        int32_t y7 = intermed[8 * 3 + x];
+
+        // Stage 1.
+        int32_t y8 = w7 * (y4 + y5) + 4;
+        y4 = (y8 + w1mw7 * y4) >> 3;
+        y5 = (y8 - w1pw7 * y5) >> 3;
+        y8 = w3 * (y6 + y7) + 4;
+        y6 = (y8 - w3mw5 * y6) >> 3;
+        y7 = (y8 - w3pw5 * y7) >> 3;
+
+        // Stage 2.
+        y8 = y0 + y1;
+        y0 -= y1;
+        y1 = w6 * (y3 + y2) + 4;
+        y2 = (y1 - w2pw6 * y2) >> 3;
+        y3 = (y1 + w2mw6 * y3) >> 3;
+        y1 = y4 + y6;
+        y4 -= y6;
+        y6 = y5 + y7;
+        y5 -= y7;
+
+        // Stage 3.
+        y7 = y8 + y3;
+        y8 -= y3;
+        y3 = y0 + y2;
+        y0 -= y2;
+        y2 = (r2 * (y4 + y5) + 128) >> 8;
+        y4 = (r2 * (y4 - y5) + 128) >> 8;
+
+        // Stage 4.
+        outp[8 * 0 + x] = (y7 + y1) >> 11;
+        outp[8 * 1 + x] = (y3 + y2) >> 11;
+        outp[8 * 2 + x] = (y0 + y4) >> 11;
+        outp[8 * 3 + x] = (y8 + y6) >> 11;
+        outp[8 * 4 + x] = (y8 - y6) >> 11;
+        outp[8 * 5 + x] = (y0 - y4) >> 11;
+        outp[8 * 6 + x] = (y3 - y2) >> 11;
+        outp[8 * 7 + x] = (y7 - y1) >> 11;
+    }
+    for (int i = 0; i < 64; ++i) {
+        // outp[i]>>=3;
+    }
+}
+#else /* At least SSE2 is available { */
+
+template <int which_vec, int offset, int stride>
+__m128i vget_raster(const AlignedBlock& block) {
+    return _mm_set_epi32(block.coefficients_raster(which_vec + 3 * stride + offset),
+                         block.coefficients_raster(which_vec + 2 * stride + offset),
+                         block.coefficients_raster(which_vec + 1 * stride + offset),
+                         block.coefficients_raster(which_vec + offset));
+}
+template <int offset, int stride>
+__m128i vquantize(int which_vec, __m128i vec, const uint16_t q[64]) {
+    return _mm_mullo_epi32(vec, _mm_set_epi32(q[which_vec + 3 * stride + offset], q[which_vec + 2 * stride + offset],
+                                              q[which_vec + 1 * stride + offset], q[which_vec + offset]));
+}
+
+static __m128i epi32l_to_epi16(__m128i lowvec) {
+    return _mm_shuffle_epi8(lowvec,
+                            _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0xd, 0xc, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
+}
+
+#define TRANSPOSE_128i(row0, row1, row2, row3, ocol0, ocol1, ocol2, ocol3) \
+    do {                                                                   \
+        __m128i intermed0 = _mm_unpacklo_epi32(row0, row1);                \
+        __m128i intermed1 = _mm_unpacklo_epi32(row2, row3);                \
+        __m128i intermed2 = _mm_unpackhi_epi32(row0, row1);                \
+        __m128i intermed3 = _mm_unpackhi_epi32(row2, row3);                \
+        ocol0 = _mm_unpacklo_epi64(intermed0, intermed1);                  \
+        ocol1 = _mm_unpackhi_epi64(intermed0, intermed1);                  \
+        ocol2 = _mm_unpacklo_epi64(intermed2, intermed3);                  \
+        ocol3 = _mm_unpackhi_epi64(intermed2, intermed3);                  \
+    } while (0)
+
+static void idct_sse(const AlignedBlock& block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
+    char vintermed_storage[64 * sizeof(int32_t) + 16];
+    // align intermediate storage to 16 bytes
+    int32_t* vintermed = (int32_t*)(vintermed_storage + 16 - ((vintermed_storage - (char*)nullptr) & 0xf));
+    using namespace idct_local;
+    // Horizontal 1-D IDCT.
+    for (int yvec = 0; yvec < 64; yvec += 32) {
+        __m128i xv0, xv1, xv2, xv3, xv4, xv5, xv6, xv7, xv8;
+        if (yvec == 0) {
+            xv0 = vget_raster<0, 0, 8>(block);
+            xv1 = vget_raster<0, 4, 8>(block);
+            xv2 = vget_raster<0, 6, 8>(block);
+            xv3 = vget_raster<0, 2, 8>(block);
+            xv4 = vget_raster<0, 1, 8>(block);
+            xv5 = vget_raster<0, 7, 8>(block);
+            xv6 = vget_raster<0, 5, 8>(block);
+            xv7 = vget_raster<0, 3, 8>(block);
+            if (__builtin_expect(ignore_dc, true)) {
+#ifdef __SSE4_1__
+                xv0 = _mm_insert_epi32(xv0, 0, 0);
+#else
+                // See http://stackoverflow.com/questions/38384520/is-there-a-sse2-equivalent-for-mm-insert-epi32
+                xv0 = _mm_and_si128(xv0, _mm_set_epi32(-1, -1, -1, 0));
+#endif
+            }
+        } else {
+            xv0 = vget_raster<32, 0, 8>(block);
+            xv1 = vget_raster<32, 4, 8>(block);
+            xv2 = vget_raster<32, 6, 8>(block);
+            xv3 = vget_raster<32, 2, 8>(block);
+            xv4 = vget_raster<32, 1, 8>(block);
+            xv5 = vget_raster<32, 7, 8>(block);
+            xv6 = vget_raster<32, 5, 8>(block);
+            xv7 = vget_raster<32, 3, 8>(block);
+        }
+        xv0 = _mm_add_epi32(_mm_slli_epi32(vquantize<0, 8>(yvec, xv0, q), 11), _mm_set1_epi32(128));
+
+        xv1 = _mm_slli_epi32(vquantize<4, 8>(yvec, xv1, q), 11);
+        xv2 = vquantize<6, 8>(yvec, xv2, q);
+        xv3 = vquantize<2, 8>(yvec, xv3, q);
+        xv4 = vquantize<1, 8>(yvec, xv4, q);
+        xv5 = vquantize<7, 8>(yvec, xv5, q);
+        xv6 = vquantize<5, 8>(yvec, xv6, q);
+        xv7 = vquantize<3, 8>(yvec, xv7, q);
+        // Stage 1.
+        xv8 = _mm_mullo_epi32(_mm_set1_epi32(w7), _mm_add_epi32(xv4, xv5));
+        xv4 = _mm_add_epi32(xv8, _mm_mullo_epi32(_mm_set1_epi32(w1mw7), xv4));
+        xv5 = _mm_sub_epi32(xv8, _mm_mullo_epi32(_mm_set1_epi32(w1pw7), xv5));
+
+        xv8 = _mm_mullo_epi32(_mm_set1_epi32(w3), _mm_add_epi32(xv6, xv7));
+        xv6 = _mm_sub_epi32(xv8, _mm_mullo_epi32(_mm_set1_epi32(w3mw5), xv6));
+        xv7 = _mm_sub_epi32(xv8, _mm_mullo_epi32(_mm_set1_epi32(w3pw5), xv7));
+
+        xv8 = _mm_add_epi32(xv0, xv1);
+        xv0 = _mm_sub_epi32(xv0, xv1);
+        xv1 = _mm_mullo_epi32(_mm_set1_epi32(w6), _mm_add_epi32(xv3, xv2));
+        xv2 = _mm_sub_epi32(xv1, _mm_mullo_epi32(_mm_set1_epi32(w2pw6), xv2));
+        xv3 = _mm_add_epi32(xv1, _mm_mullo_epi32(_mm_set1_epi32(w2mw6), xv3));
+        xv1 = _mm_add_epi32(xv4, xv6);
+        xv4 = _mm_sub_epi32(xv4, xv6);
+        xv6 = _mm_add_epi32(xv5, xv7);
+        xv5 = _mm_sub_epi32(xv5, xv7);
+
+        // Stage 3.
+        xv7 = _mm_add_epi32(xv8, xv3);
+        xv8 = _mm_sub_epi32(xv8, xv3);
+        xv3 = _mm_add_epi32(xv0, xv2);
+        xv0 = _mm_sub_epi32(xv0, xv2);
+        xv2 = _mm_srai_epi32(
+            _mm_add_epi32(_mm_mullo_epi32(_mm_set1_epi32(r2), _mm_add_epi32(xv4, xv5)), _mm_set1_epi32(128)), 8);
+        xv4 = _mm_srai_epi32(
+            _mm_add_epi32(_mm_mullo_epi32(_mm_set1_epi32(r2), _mm_sub_epi32(xv4, xv5)), _mm_set1_epi32(128)), 8);
+        // Stage 4.
+        int index = 0;
+        for (__m128i
+                 row0 = _mm_srai_epi32(_mm_add_epi32(xv7, xv1), 8),
+                 row1 = _mm_srai_epi32(_mm_add_epi32(xv3, xv2), 8), row2 = _mm_srai_epi32(_mm_add_epi32(xv0, xv4), 8),
+                 row3 = _mm_srai_epi32(_mm_add_epi32(xv8, xv6), 8);
+             true; // will break if index == 4 at the end of this loop
+             index += 4, row0 = _mm_srai_epi32(_mm_sub_epi32(xv8, xv6), 8),
+                 row1 = _mm_srai_epi32(_mm_sub_epi32(xv0, xv4), 8), row2 = _mm_srai_epi32(_mm_sub_epi32(xv3, xv2), 8),
+                 row3 = _mm_srai_epi32(_mm_sub_epi32(xv7, xv1), 8)) {
+            __m128i col0, col1, col2, col3;
+            TRANSPOSE_128i(row0, row1, row2, row3, col0, col1, col2, col3);
+
+            _mm_store_si128((__m128i*)(vintermed + index + yvec), col0);
+            _mm_store_si128((__m128i*)(vintermed + index + 8 + yvec), col1);
+            _mm_store_si128((__m128i*)(vintermed + index + 16 + yvec), col2);
+            _mm_store_si128((__m128i*)(vintermed + index + 24 + yvec), col3);
+            if (index == 4) {
+                break; // only iterate twice
+            }
+        }
+    }
+    // Vertical 1-D IDCT.
+    for (uint8_t xvec = 0; xvec < 8; xvec += 4) {
+        __m128i yv0, yv1, yv2, yv3, yv4, yv5, yv6, yv7, yv8;
+        yv0 =
+            _mm_add_epi32(_mm_slli_epi32(_mm_load_si128((const __m128i*)(vintermed + xvec)), 8), _mm_set1_epi32(8192));
+        yv1 = _mm_slli_epi32(_mm_load_si128((const __m128i*)(vintermed + 8 * 4 + xvec)), 8);
+        yv2 = _mm_load_si128((const __m128i*)(vintermed + 8 * 6 + xvec));
+        yv3 = _mm_load_si128((const __m128i*)(vintermed + 8 * 2 + xvec));
+        yv4 = _mm_load_si128((const __m128i*)(vintermed + 8 * 1 + xvec));
+        yv5 = _mm_load_si128((const __m128i*)(vintermed + 8 * 7 + xvec));
+        yv6 = _mm_load_si128((const __m128i*)(vintermed + 8 * 5 + xvec));
+        yv7 = _mm_load_si128((const __m128i*)(vintermed + 8 * 3 + xvec));
+        // Stage 1.
+        yv8 = _mm_add_epi32(_mm_mullo_epi32(_mm_add_epi32(yv4, yv5), _mm_set1_epi32(w7)), _mm_set1_epi32(4));
+        yv4 = _mm_srai_epi32(_mm_add_epi32(yv8, _mm_mullo_epi32(_mm_set1_epi32(w1mw7), yv4)), 3);
+        yv5 = _mm_srai_epi32(_mm_sub_epi32(yv8, _mm_mullo_epi32(_mm_set1_epi32(w1pw7), yv5)), 3);
+        yv8 = _mm_add_epi32(_mm_mullo_epi32(_mm_set1_epi32(w3), _mm_add_epi32(yv6, yv7)), _mm_set1_epi32(4));
+        yv6 = _mm_srai_epi32(_mm_sub_epi32(yv8, _mm_mullo_epi32(_mm_set1_epi32(w3mw5), yv6)), 3);
+        yv7 = _mm_srai_epi32(_mm_sub_epi32(yv8, _mm_mullo_epi32(_mm_set1_epi32(w3pw5), yv7)), 3);
+        // Stage 2.
+        yv8 = _mm_add_epi32(yv0, yv1);
+        yv0 = _mm_sub_epi32(yv0, yv1);
+        yv1 = _mm_add_epi32(_mm_mullo_epi32(_mm_set1_epi32(w6), _mm_add_epi32(yv3, yv2)), _mm_set1_epi32(4));
+        yv2 = _mm_srai_epi32(_mm_sub_epi32(yv1, _mm_mullo_epi32(_mm_set1_epi32(w2pw6), yv2)), 3);
+        yv3 = _mm_srai_epi32(_mm_add_epi32(yv1, _mm_mullo_epi32(_mm_set1_epi32(w2mw6), yv3)), 3);
+        yv1 = _mm_add_epi32(yv4, yv6);
+        yv4 = _mm_sub_epi32(yv4, yv6);
+        yv6 = _mm_add_epi32(yv5, yv7);
+        yv5 = _mm_sub_epi32(yv5, yv7);
+
+        // Stage 3.
+        yv7 = _mm_add_epi32(yv8, yv3);
+        yv8 = _mm_sub_epi32(yv8, yv3);
+        yv3 = _mm_add_epi32(yv0, yv2);
+        yv0 = _mm_sub_epi32(yv0, yv2);
+        yv2 = _mm_srai_epi32(
+            _mm_add_epi32(_mm_mullo_epi32(_mm_set1_epi32(r2), _mm_add_epi32(yv4, yv5)), _mm_set1_epi32(128)), 8);
+        yv4 = _mm_srai_epi32(
+            _mm_add_epi32(_mm_mullo_epi32(_mm_set1_epi32(r2), _mm_sub_epi32(yv4, yv5)), _mm_set1_epi32(128)), 8);
+        __m128i row0 = _mm_srai_epi32(_mm_add_epi32(yv7, yv1), 11);
+        __m128i row1 = _mm_srai_epi32(_mm_add_epi32(yv3, yv2), 11);
+        __m128i row2 = _mm_srai_epi32(_mm_add_epi32(yv0, yv4), 11);
+        __m128i row3 = _mm_srai_epi32(_mm_add_epi32(yv8, yv6), 11);
+        __m128i row4 = _mm_srai_epi32(_mm_sub_epi32(yv8, yv6), 11);
+        __m128i row5 = _mm_srai_epi32(_mm_sub_epi32(yv0, yv4), 11);
+        __m128i row6 = _mm_srai_epi32(_mm_sub_epi32(yv3, yv2), 11);
+        __m128i row7 = _mm_srai_epi32(_mm_sub_epi32(yv7, yv1), 11);
+        __m128i row0short = epi32l_to_epi16(row0);
+        _mm_storel_epi64((__m128i*)(char*)(voutp + xvec), row0short);
+        _mm_storel_epi64((__m128i*)(char*)(voutp + 8 + xvec), epi32l_to_epi16(row1));
+        _mm_storel_epi64((__m128i*)(char*)(voutp + 2 * 8 + xvec), epi32l_to_epi16(row2));
+        _mm_storel_epi64((__m128i*)(char*)(voutp + 3 * 8 + xvec), epi32l_to_epi16(row3));
+        _mm_storel_epi64((__m128i*)(char*)(voutp + 4 * 8 + xvec), epi32l_to_epi16(row4));
+        _mm_storel_epi64((__m128i*)(char*)(voutp + 5 * 8 + xvec), epi32l_to_epi16(row5));
+        _mm_storel_epi64((__m128i*)(char*)(voutp + 6 * 8 + xvec), epi32l_to_epi16(row6));
+        _mm_storel_epi64((__m128i*)(char*)(voutp + 7 * 8 + xvec), epi32l_to_epi16(row7));
+    }
+}
+
+#define vget_raster256(offset, stride, block)                                                                        \
+    _mm256_set_epi32(block.coefficients_raster(7 * stride + offset), block.coefficients_raster(6 * stride + offset), \
+                     block.coefficients_raster(5 * stride + offset), block.coefficients_raster(4 * stride + offset), \
+                     block.coefficients_raster(3 * stride + offset), block.coefficients_raster(2 * stride + offset), \
+                     block.coefficients_raster(1 * stride + offset), block.coefficients_raster(offset))
+
+#define vquantize256(offset, stride, vec, q)                                                                         \
+    _mm256_mullo_epi32(vec, _mm256_set_epi32(q[7 * stride + offset], q[6 * stride + offset], q[5 * stride + offset], \
+                                             q[4 * stride + offset], q[3 * stride + offset], q[2 * stride + offset], \
+                                             q[1 * stride + offset], q[offset]))
+
+#define m256_set_m128i(a, b) _mm256_insertf128_si256(_mm256_castsi128_si256(b), a, 1)
+#define m256_to_epi16(vec)                                                                                      \
+    _mm_or_si128(                                                                                               \
+        _mm_shuffle_epi8(_mm256_extractf128_si256(vec, 0),                                                      \
+                         _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0xd, 0xc, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0)), \
+        _mm_shuffle_epi8(_mm256_extractf128_si256(vec, 1),                                                      \
+                         _mm_set_epi8(0xd, 0xc, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0, -1, -1, -1, -1, -1, -1, -1, -1)))
+/*
+__m128i m256_to_epi16(__m256i vec) {
+    __m128i lo = _mm256_extractf128_si256(vec, 0);
+    __m128i hi = _mm256_extractf128_si256(vec, 1);
+    __m128i lopacked = _mm_shuffle_epi8(lo, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
+                                                         0xd, 0xc, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
+    __m128i hipacked = _mm_shuffle_epi8(hi, _mm_set_epi8(0xd, 0xc, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0,
+                                                         -1, -1, -1, -1, -1, -1, -1, -1));
+    return _mm_or_si128(lopacked, hipacked);
+
+    }*/
+#ifdef __AVX2__
+static void idct_avx(const AlignedBlock& block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
+    // align intermediate storage to 16 bytes
+    using namespace idct_local;
+    // Horizontal 1-D IDCT.
+    __m256i col0, col1, col2, col3, col4, col5, col6, col7;
+    {
+        __m256i xv0, xv1, xv2, xv3, xv4, xv5, xv6, xv7, xv8;
+        xv0 = vget_raster256(0, 8, block);
+        xv1 = vget_raster256(4, 8, block);
+        xv2 = vget_raster256(6, 8, block);
+        xv3 = vget_raster256(2, 8, block);
+        xv4 = vget_raster256(1, 8, block);
+        xv5 = vget_raster256(7, 8, block);
+        xv6 = vget_raster256(5, 8, block);
+        xv7 = vget_raster256(3, 8, block);
+        if (__builtin_expect(ignore_dc, true)) {
+#ifdef _WIN32
+            __m128i zero_first = _mm256_extractf128_si256(xv0, 0);
+            xv0 = _mm256_insertf128_si256(xv0, _mm_insert_epi32(zero_first, 0, 0), 0);
+#else
+            xv0 = _mm256_insert_epi32(xv0, 0, 0);
+#endif
+        }
+        xv0 = _mm256_add_epi32(_mm256_slli_epi32(vquantize256(0, 8, xv0, q), 11), _mm256_set1_epi32(128));
+
+        xv1 = _mm256_slli_epi32(vquantize256(4, 8, xv1, q), 11);
+        xv2 = vquantize256(6, 8, xv2, q);
+        xv3 = vquantize256(2, 8, xv3, q);
+        xv4 = vquantize256(1, 8, xv4, q);
+        xv5 = vquantize256(7, 8, xv5, q);
+        xv6 = vquantize256(5, 8, xv6, q);
+        xv7 = vquantize256(3, 8, xv7, q);
+        // Stage 1.
+        xv8 = _mm256_mullo_epi32(_mm256_set1_epi32(w7), _mm256_add_epi32(xv4, xv5));
+        xv4 = _mm256_add_epi32(xv8, _mm256_mullo_epi32(_mm256_set1_epi32(w1mw7), xv4));
+        xv5 = _mm256_sub_epi32(xv8, _mm256_mullo_epi32(_mm256_set1_epi32(w1pw7), xv5));
+
+        xv8 = _mm256_mullo_epi32(_mm256_set1_epi32(w3), _mm256_add_epi32(xv6, xv7));
+        xv6 = _mm256_sub_epi32(xv8, _mm256_mullo_epi32(_mm256_set1_epi32(w3mw5), xv6));
+        xv7 = _mm256_sub_epi32(xv8, _mm256_mullo_epi32(_mm256_set1_epi32(w3pw5), xv7));
+
+        xv8 = _mm256_add_epi32(xv0, xv1);
+        xv0 = _mm256_sub_epi32(xv0, xv1);
+        xv1 = _mm256_mullo_epi32(_mm256_set1_epi32(w6), _mm256_add_epi32(xv3, xv2));
+        xv2 = _mm256_sub_epi32(xv1, _mm256_mullo_epi32(_mm256_set1_epi32(w2pw6), xv2));
+        xv3 = _mm256_add_epi32(xv1, _mm256_mullo_epi32(_mm256_set1_epi32(w2mw6), xv3));
+        xv1 = _mm256_add_epi32(xv4, xv6);
+        xv4 = _mm256_sub_epi32(xv4, xv6);
+        xv6 = _mm256_add_epi32(xv5, xv7);
+        xv5 = _mm256_sub_epi32(xv5, xv7);
+
+        // Stage 3.
+        xv7 = _mm256_add_epi32(xv8, xv3);
+        xv8 = _mm256_sub_epi32(xv8, xv3);
+        xv3 = _mm256_add_epi32(xv0, xv2);
+        xv0 = _mm256_sub_epi32(xv0, xv2);
+        xv2 = _mm256_srai_epi32(_mm256_add_epi32(_mm256_mullo_epi32(_mm256_set1_epi32(r2), _mm256_add_epi32(xv4, xv5)),
+                                                 _mm256_set1_epi32(128)),
+                                8);
+        xv4 = _mm256_srai_epi32(_mm256_add_epi32(_mm256_mullo_epi32(_mm256_set1_epi32(r2), _mm256_sub_epi32(xv4, xv5)),
+                                                 _mm256_set1_epi32(128)),
+                                8);
+        // Stage 4.
+        __m256i row0 = _mm256_srai_epi32(_mm256_add_epi32(xv7, xv1), 8),
+                row1 = _mm256_srai_epi32(_mm256_add_epi32(xv3, xv2), 8),
+                row2 = _mm256_srai_epi32(_mm256_add_epi32(xv0, xv4), 8),
+                row3 = _mm256_srai_epi32(_mm256_add_epi32(xv8, xv6), 8),
+                row4 = _mm256_srai_epi32(_mm256_sub_epi32(xv8, xv6), 8),
+                row5 = _mm256_srai_epi32(_mm256_sub_epi32(xv0, xv4), 8),
+                row6 = _mm256_srai_epi32(_mm256_sub_epi32(xv3, xv2), 8),
+                row7 = _mm256_srai_epi32(_mm256_sub_epi32(xv7, xv1), 8);
+        __m128i row0lo = _mm256_extractf128_si256(row0, 0);
+        __m128i row1lo = _mm256_extractf128_si256(row1, 0);
+        __m128i row2lo = _mm256_extractf128_si256(row2, 0);
+        __m128i row3lo = _mm256_extractf128_si256(row3, 0);
+        __m128i row4lo = _mm256_extractf128_si256(row4, 0);
+        __m128i row5lo = _mm256_extractf128_si256(row5, 0);
+        __m128i row6lo = _mm256_extractf128_si256(row6, 0);
+        __m128i row7lo = _mm256_extractf128_si256(row7, 0);
+        __m128i col0lo, col1lo, col2lo, col3lo;
+        __m128i col0hi, col1hi, col2hi, col3hi;
+        TRANSPOSE_128i(row0lo, row1lo, row2lo, row3lo, col0lo, col1lo, col2lo, col3lo);
+        TRANSPOSE_128i(row4lo, row5lo, row6lo, row7lo, col0hi, col1hi, col2hi, col3hi);
+        col0 = m256_set_m128i(col0hi, col0lo);
+        col1 = m256_set_m128i(col1hi, col1lo);
+        col2 = m256_set_m128i(col2hi, col2lo);
+        col3 = m256_set_m128i(col3hi, col3lo);
+        __m128i row0hi = _mm256_extractf128_si256(row0, 1);
+        __m128i row1hi = _mm256_extractf128_si256(row1, 1);
+        __m128i row2hi = _mm256_extractf128_si256(row2, 1);
+        __m128i row3hi = _mm256_extractf128_si256(row3, 1);
+        __m128i row4hi = _mm256_extractf128_si256(row4, 1);
+        __m128i row5hi = _mm256_extractf128_si256(row5, 1);
+        __m128i row6hi = _mm256_extractf128_si256(row6, 1);
+        __m128i row7hi = _mm256_extractf128_si256(row7, 1);
+        __m128i col4lo, col5lo, col6lo, col7lo;
+        __m128i col4hi, col5hi, col6hi, col7hi;
+        TRANSPOSE_128i(row0hi, row1hi, row2hi, row3hi, col4lo, col5lo, col6lo, col7lo);
+        TRANSPOSE_128i(row4hi, row5hi, row6hi, row7hi, col4hi, col5hi, col6hi, col7hi);
+        col4 = m256_set_m128i(col4hi, col4lo);
+        col5 = m256_set_m128i(col5hi, col5lo);
+        col6 = m256_set_m128i(col6hi, col6lo);
+        col7 = m256_set_m128i(col7hi, col7lo);
+
+        /*
+                __m256i intermed0 = _mm256_unpacklo_epi32(row0, row1);
+                __m256i intermed2 = _mm256_unpacklo_epi32(row2, row3);
+                __m256i intermed4 = _mm256_unpacklo_epi32(row4, row5);
+                __m256i intermed6 = _mm256_unpacklo_epi32(row6, row7);
+
+                __m256i intermed1 = _mm256_unpackhi_epi32(row0, row1);
+                __m256i intermed3 = _mm256_unpackhi_epi32(row2, row3);
+                __m256i intermed5 = _mm256_unpackhi_epi32(row4, row5);
+                __m256i intermed7 = _mm256_unpackhi_epi32(row6, row7);
+
+                __m256i nearcol0 = _mm256_shuffle_epi32(row0, row2, _MM_SHUFFLE(1,0,1,0));
+                __m256i nearcol1 = _mm256_shuffle_epi32(row0, row2, _MM_SHUFFLE(3,2,3,2));
+                __m256i nearcol2 = _mm256_shuffle_epi32(row1, row3, _MM_SHUFFLE(1,0,1,0));
+                __m256i nearcol3 = _mm256_shuffle_epi32(row1, row3, _MM_SHUFFLE(3,2,3,2));
+
+                __m256i nearcol4 = _mm256_shuffle_epi32(row4, row6, _MM_SHUFFLE(1,0,1,0));
+                __m256i nearcol5 = _mm256_shuffle_epi32(row4, row6, _MM_SHUFFLE(3,2,3,2));
+                __m256i nearcol6 = _mm256_shuffle_epi32(row5, row7, _MM_SHUFFLE(1,0,1,0));
+                __m256i nearcol7 = _mm256_shuffle_epi32(row5, row7, _MM_SHUFFLE(3,2,3,2));
+
+
+
+                col0 = _mm256_permute2x128_si256(intermed0, intermed4, 0x20);
+                col1 = _mm256_permute2x128_si256(intermed1, intermed5, 0x20);
+                col2 = _mm256_permute2x128_si256(intermed2, intermed6, 0x20);
+                col3 = _mm256_permute2x128_si256(intermed3, intermed7, 0x20);
+                col4 = _mm256_permute2x128_si256(intermed0, intermed4, 0x31);
+                col5 = _mm256_permute2x128_si256(intermed1, intermed5, 0x31);
+                col6 = _mm256_permute2x128_si256(intermed2, intermed6, 0x31);
+                col7 = _mm256_permute2x128_si256(intermed3, intermed7, 0x31);
+        */
+    }
+    // Vertical 1-D IDCT.
+    {
+        __m256i yv0, yv1, yv2, yv3, yv4, yv5, yv6, yv7, yv8;
+        yv0 = _mm256_add_epi32(_mm256_slli_epi32(col0, 8), _mm256_set1_epi32(8192));
+        yv1 = _mm256_slli_epi32(col4, 8);
+        yv2 = col6;
+        yv3 = col2;
+        yv4 = col1;
+        yv5 = col7;
+        yv6 = col5;
+        yv7 = col3;
+        // Stage 1.
+        yv8 = _mm256_add_epi32(_mm256_mullo_epi32(_mm256_add_epi32(yv4, yv5), _mm256_set1_epi32(w7)),
+                               _mm256_set1_epi32(4));
+        yv4 = _mm256_srai_epi32(_mm256_add_epi32(yv8, _mm256_mullo_epi32(_mm256_set1_epi32(w1mw7), yv4)), 3);
+        yv5 = _mm256_srai_epi32(_mm256_sub_epi32(yv8, _mm256_mullo_epi32(_mm256_set1_epi32(w1pw7), yv5)), 3);
+        yv8 = _mm256_add_epi32(_mm256_mullo_epi32(_mm256_set1_epi32(w3), _mm256_add_epi32(yv6, yv7)),
+                               _mm256_set1_epi32(4));
+        yv6 = _mm256_srai_epi32(_mm256_sub_epi32(yv8, _mm256_mullo_epi32(_mm256_set1_epi32(w3mw5), yv6)), 3);
+        yv7 = _mm256_srai_epi32(_mm256_sub_epi32(yv8, _mm256_mullo_epi32(_mm256_set1_epi32(w3pw5), yv7)), 3);
+        // Stage 2.
+        yv8 = _mm256_add_epi32(yv0, yv1);
+        yv0 = _mm256_sub_epi32(yv0, yv1);
+        yv1 = _mm256_add_epi32(_mm256_mullo_epi32(_mm256_set1_epi32(w6), _mm256_add_epi32(yv3, yv2)),
+                               _mm256_set1_epi32(4));
+        yv2 = _mm256_srai_epi32(_mm256_sub_epi32(yv1, _mm256_mullo_epi32(_mm256_set1_epi32(w2pw6), yv2)), 3);
+        yv3 = _mm256_srai_epi32(_mm256_add_epi32(yv1, _mm256_mullo_epi32(_mm256_set1_epi32(w2mw6), yv3)), 3);
+        yv1 = _mm256_add_epi32(yv4, yv6);
+        yv4 = _mm256_sub_epi32(yv4, yv6);
+        yv6 = _mm256_add_epi32(yv5, yv7);
+        yv5 = _mm256_sub_epi32(yv5, yv7);
+
+        // Stage 3.
+        yv7 = _mm256_add_epi32(yv8, yv3);
+        yv8 = _mm256_sub_epi32(yv8, yv3);
+        yv3 = _mm256_add_epi32(yv0, yv2);
+        yv0 = _mm256_sub_epi32(yv0, yv2);
+        yv2 = _mm256_srai_epi32(_mm256_add_epi32(_mm256_mullo_epi32(_mm256_set1_epi32(r2), _mm256_add_epi32(yv4, yv5)),
+                                                 _mm256_set1_epi32(128)),
+                                8);
+        yv4 = _mm256_srai_epi32(_mm256_add_epi32(_mm256_mullo_epi32(_mm256_set1_epi32(r2), _mm256_sub_epi32(yv4, yv5)),
+                                                 _mm256_set1_epi32(128)),
+                                8);
+        __m256i row0 = _mm256_srai_epi32(_mm256_add_epi32(yv7, yv1), 11);
+        __m256i row1 = _mm256_srai_epi32(_mm256_add_epi32(yv3, yv2), 11);
+        __m256i row2 = _mm256_srai_epi32(_mm256_add_epi32(yv0, yv4), 11);
+        __m256i row3 = _mm256_srai_epi32(_mm256_add_epi32(yv8, yv6), 11);
+        __m256i row4 = _mm256_srai_epi32(_mm256_sub_epi32(yv8, yv6), 11);
+        __m256i row5 = _mm256_srai_epi32(_mm256_sub_epi32(yv0, yv4), 11);
+        __m256i row6 = _mm256_srai_epi32(_mm256_sub_epi32(yv3, yv2), 11);
+        __m256i row7 = _mm256_srai_epi32(_mm256_sub_epi32(yv7, yv1), 11);
+        _mm_store_si128((__m128i*)(char*)(voutp), m256_to_epi16(row0));
+        _mm_store_si128((__m128i*)(char*)(voutp + 8), m256_to_epi16(row1));
+        _mm_store_si128((__m128i*)(char*)(voutp + 2 * 8), m256_to_epi16(row2));
+        _mm_store_si128((__m128i*)(char*)(voutp + 3 * 8), m256_to_epi16(row3));
+        _mm_store_si128((__m128i*)(char*)(voutp + 4 * 8), m256_to_epi16(row4));
+        _mm_store_si128((__m128i*)(char*)(voutp + 5 * 8), m256_to_epi16(row5));
+        _mm_store_si128((__m128i*)(char*)(voutp + 6 * 8), m256_to_epi16(row6));
+        _mm_store_si128((__m128i*)(char*)(voutp + 7 * 8), m256_to_epi16(row7));
+#ifndef NDEBUG
+
+        static bool nevermore = false;
+        if (!nevermore) {
+            Sirikata::AlignedArray1d<int16_t, 64> test_case;
+            idct_sse(block, q, test_case.begin(), ignore_dc);
+            if (memcmp(test_case.begin(), voutp, 64 * sizeof(int16_t)) != 0) {
+                nevermore = true;
+                idct_sse(block, q, test_case.begin(), ignore_dc);
+                idct_avx(block, q, test_case.begin(), ignore_dc);
+                assert(false);
+            }
+        }
+#endif
+    }
+}
+#endif
+#endif /* } SSE2 or higher is available */
+
+void idct(const AlignedBlock& block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
+#ifdef __AVX2__
+    idct_avx(block, q, voutp, ignore_dc);
+#else
+#if defined(__SSE2__) || (_M_IX86_FP >= 1)
+    idct_sse(block, q, voutp, ignore_dc);
+#else
+    idct_scalar(block, q, voutp, ignore_dc);
+#endif
+#endif
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/idct.hh b/codec/L2/demos/leptonEnc/host/lepton/idct.hh
new file mode 100644
index 0000000000..1de87264b1
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/idct.hh
@@ -0,0 +1,2 @@
+class AlignedBlock;
+void idct(const AlignedBlock& block, const uint16_t quantization[64], int16_t outp[64], bool ignore_dc);
diff --git a/codec/L2/demos/leptonEnc/host/lepton/jpgcoder.cc b/codec/L2/demos/leptonEnc/host/lepton/jpgcoder.cc
new file mode 100644
index 0000000000..d8ad9fe93c
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/jpgcoder.cc
@@ -0,0 +1,5794 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+/**
+Copyright (c) 2006...2016, Matthias Stirner and HTW Aalen University
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ **/
+#include "loop_stt.h"
+extern LoopNodeFactory g_loops;
+#include "../vp8/util/memory.hh"
+#include "../vp8/util/debug.hh"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sstream>
+#include <math.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <ctime>
+#include <memory>
+#include <atomic>
+#include <signal.h>
+#ifndef _WIN32
+#include <sys/time.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+#ifdef __linux
+//#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#endif
+#include <emmintrin.h>
+#include "jpgcoder_hls.hh"
+#include "jpgcoder.hh"
+#include "recoder.hh"
+#include "bitops.hh"
+#include "htables.hh"
+#include "component_info.hh"
+#include "uncompressed_components.hh"
+#include "vp8_decoder.hh"
+#include "vp8_encoder.hh"
+#include "simple_decoder.hh"
+#include "simple_encoder.hh"
+#include "fork_serve.hh"
+#include "socket_serve.hh"
+#include "validation.hh"
+#include "../io/ZlibCompression.hh"
+#include "../io/MemReadWriter.hh"
+#include "../io/BufferedIO.hh"
+#include "../io/Zlib0.hh"
+#include "../io/Seccomp.hh"
+#include <immintrin.h>
+#include <iostream>
+int g_argc = 0;
+const char** g_argv = NULL;
+#ifndef GIT_REVISION
+#include "version.hh"
+#ifndef GIT_REVISION
+#define GIT_REVISION "unknown"
+#endif
+#endif
+bool fast_exit = true;
+#ifdef SKIP_VALIDATION
+bool g_skip_validation = true;
+#else
+bool g_skip_validation = false;
+#endif
+#define QUANT(cmp, bpos) (cmpnfo[cmp].qtable[bpos])
+#define MAX_V(cmp, bpos) ((freqmax[bpos] + QUANT(cmp, bpos) - 1) / QUANT(cmp, bpos))
+
+#define ENVLI(s, v) ((v > 0) ? v : (v - 1) + (1 << s))
+#define DEVLI(s, n) ((s) == 0 ? (n) : (((n) >= (1 << ((s)-1))) ? (n) : (n) + 1 - (1 << (s))))
+#define E_ENVLI(s, v) (v - (1 << s))
+#define E_DEVLI(s, n) (n + (1 << s))
+
+#define COS_DCT(l, s, n) (cos(((2 * l + 1) * s * M_PI) / (2 * n)))
+#define C_DCT(n) ((n == 0) ? (1) : (sqrt(2)))
+#define DCT_SCALE sqrt(8)
+
+#define ABS(v1) ((v1 < 0) ? -v1 : v1)
+#define ABSDIFF(v1, v2) ((v1 > v2) ? (v1 - v2) : (v2 - v1))
+#define IPOS(w, v, h) ((v * w) + h)
+#define NPOS(n1, n2, p) (((p / n1) * n2) + (p % n1))
+#define ROUND_F(v1) ((v1 < 0) ? (int)(v1 - 0.5) : (int)(v1 + 0.5))
+#define B_SHORT(v1, v2) ((((int)v1) << 8) + ((int)v2))
+#define CLAMPED(l, h, v) ((v < l) ? l : (v > h) ? h : v)
+
+#define MEM_ERRMSG "out of memory error"
+#define FRD_ERRMSG "could not read file / file not found: %s"
+#define FWR_ERRMSG "could not write file / file write-protected: %s"
+size_t local_atoi(const char* data);
+namespace TimingHarness {
+
+Sirikata::Array1d<Sirikata::Array1d<uint64_t, NUM_STAGES>, MAX_NUM_THREADS> timing = {{{{0}}}};
+
+uint64_t get_time_us(bool force) {
+#ifndef _WIN32
+    // FIXME
+    if (force || !g_use_seccomp) {
+        struct timeval val = {0, 0};
+        gettimeofday(&val, NULL);
+        uint64_t retval = val.tv_sec;
+        retval *= 1000000;
+        retval += val.tv_usec;
+        return retval;
+    }
+#endif
+    return 0;
+}
+const char* stage_names[] = {FOREACH_TIMING_STAGE(GENERATE_TIMING_STRING) "EOF"};
+void print_results() {
+    if (!g_use_seccomp) {
+        uint64_t earliest_time = get_time_us();
+        for (int i = 0; i < NUM_STAGES; ++i) {
+            for (unsigned int j = 0; j < MAX_NUM_THREADS && j < NUM_THREADS; ++j) {
+                if (timing[j][i] && timing[j][i] < earliest_time) {
+                    earliest_time = timing[j][i];
+                }
+            }
+        }
+        for (int i = 0; i < NUM_STAGES; ++i) {
+            for (unsigned int j = 0; j < MAX_NUM_THREADS && j < NUM_THREADS; ++j) {
+                if (timing[j][i]) {
+                    fprintf(stderr, "%s\t(%d)\t%f\n", stage_names[i], j, (timing[j][i] - earliest_time) * 0.000001);
+                }
+            }
+        }
+    }
+}
+}
+/* -----------------------------------------------
+    struct & enum declarations
+    ----------------------------------------------- */
+enum { JPG_READ_BUFFER_SIZE = 1024 * 256, ABIT_WRITER_PRELOAD = 4096 * 1024 + 1024 };
+
+enum ACTION { comp = 1, forkserve = 2, socketserve = 3, info = 4 };
+
+enum F_TYPE { JPEG = 0, UJG = 1, LEPTON = 2, UNK = 3 };
+
+namespace {
+uint32_t LEtoUint32(const uint8_t* buffer) {
+    uint32_t retval = buffer[3];
+    retval <<= 8;
+    retval |= buffer[2];
+    retval <<= 8;
+    retval |= buffer[1];
+    retval <<= 8;
+    retval |= buffer[0];
+    return retval;
+}
+
+void uint32toLE(uint32_t value, uint8_t* retval) {
+    retval[0] = uint8_t(value & 0xff);
+    retval[1] = uint8_t((value >> 8) & 0xff);
+    retval[2] = uint8_t((value >> 16) & 0xff);
+    retval[3] = uint8_t((value >> 24) & 0xff);
+}
+}
+/* -----------------------------------------------
+    function declarations: main interface
+    ----------------------------------------------- */
+
+// returns the max size of the input file
+int initialize_options(int argc, const char* const* argv);
+void execute(const std::function<bool()>&);
+void show_help(void);
+
+/* -----------------------------------------------
+    function declarations: main functions
+    ----------------------------------------------- */
+
+bool check_file(IOUtil::FileWriter* ujg_out,
+                int fd_in,
+                int fd_out,
+                uint32_t max_file_size,
+                bool force_zlib0,
+                Sirikata::Array1d<uint8_t, 2> two_byte_header,
+                bool is_socket);
+
+template <class stream_reader>
+bool read_jpeg(std::vector<std::pair<uint32_t, uint32_t> >* huff_input_offset, stream_reader* jpg_str_in);
+bool read_jpeg_wrapper(std::vector<std::pair<uint32_t, uint32_t> >* huff_input_offset, ibytestream* jpg_str_in) {
+    return read_jpeg(huff_input_offset, jpg_str_in);
+}
+
+bool read_jpeg_and_copy_to_side_channel(std::vector<std::pair<uint32_t, uint32_t> >* huff_input_offset,
+                                        ibytestreamcopier* jpg_str_in) {
+    return read_jpeg(huff_input_offset, jpg_str_in);
+}
+
+struct MergeJpegProgress;
+bool decode_jpeg(const std::vector<std::pair<uint32_t, uint32_t> >& huff_input_offset,
+                 std::vector<ThreadHandoff>* row_thread_handoffs);
+bool recode_jpeg(void);
+
+bool adapt_icos(void);
+bool check_value_range(void);
+bool write_ujpg_2(int size,
+                  IOUtil::FileWriter* ujg_out,
+                  struct_arith& arith,
+                  uint8_t* res,
+                  uint8_t* origin,
+                  int hdrsK,
+                  uint32_t rst,
+                  ThreadHandoff& myHand);
+bool write_ujpg(std::vector<ThreadHandoff> row_thread_handoffs,
+                std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> >* jpeg_file_raw_bytes,
+                struct_arith& arith,
+                uint8_t* res);
+bool read_ujpg(void);
+unsigned char read_fixed_ujpg_header(void);
+bool reset_buffers(void);
+void process_file_2();
+// ------------------------------------------------------------
+// ------------------------------------------------------------
+// ------------------------------------------------------------
+
+// ------------------------------------------------------------
+
+#include <new>
+#include <cstdlib>
+
+// template <typename T>
+// T* aligned_alloc(std::size_t num) {
+//    void* ptr = nullptr;
+//    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+//    return reinterpret_cast<T*>(ptr);
+//}
+
+// ------------------------------------------------------------
+
+// load the data file (.txt, .bin, .jpg ...)to ptr
+template <typename T>
+int load_dat(T*& data, const std::string& name, int& size) {
+    uint64_t n;
+    std::string fn = name;
+    FILE* f = fopen(fn.c_str(), "rb");
+    std::cout << "WARNING: " << fn << " will be opened for binary read." << std::endl;
+    if (!f) {
+        std::cerr << "ERROR: " << fn << " cannot be opened for binary read." << std::endl;
+        return -1;
+    }
+
+    fseek(f, 0, SEEK_END);
+    n = (uint64_t)ftell(f);
+    data = (T*)aligned_alloc(n);
+    fseek(f, 0, SEEK_SET);
+    size = fread(data, sizeof(char), n, f);
+    fclose(f);
+    std::cout << " entries read from " << fn << std::endl;
+
+    return 0;
+}
+
+//#include "XAcc_jpegdecoder.h"
+//#include "XAcc_jfifparser.h"
+//#include "XAcc_common.h"
+// void JPEGD_LeptonE_kernel(
+//        ap_uint<AXI_WIDTH>* datainDDR,
+//        int size,
+//        struct_arith& arith,
+//        uint8_t* res
+//);
+std::string xclbin_path;
+
+// bool hls_decode_jpeg_kernel(int filecnt,
+//                            std::vector<uint8_t*> datatoDDR,
+//                            std::vector<int> jpgSize,
+//                            std::vector<struct_arith>& arith,
+//                            std::vector<uint8_t*> res,
+//                            std::vector<uint32_t>& left,
+//                            std::vector<uint32_t>& rst);
+// ------------------------------------------------------------
+
+/* -----------------------------------------------
+    function declarations: jpeg-specific
+    ----------------------------------------------- */
+bool is_jpeg_header(Sirikata::Array1d<uint8_t, 2> header) {
+    return header[0] == 0xFF && header[1] == 0xD8;
+}
+
+// baseline single threaded decoding need only two rows of the image in memory
+bool setup_imginfo_jpg(bool only_allocate_two_image_rows);
+bool parse_jfif_jpg(unsigned char type, unsigned int len, unsigned char* segment);
+bool rebuild_header_jpg(void);
+
+int decode_block_seq(abitreader* huffr, huffTree* dctree, huffTree* actree, short* block);
+int encode_block_seq(abitwriter* huffw, huffCodes* dctbl, huffCodes* actbl, short* block);
+
+int decode_dc_prg_fs(abitreader* huffr, huffTree* dctree, short* block);
+int encode_dc_prg_fs(abitwriter* huffw, huffCodes* dctbl, short* block);
+int decode_ac_prg_fs(abitreader* huffr, huffTree* actree, short* block, unsigned int* eobrun, int from, int to);
+int encode_ac_prg_fs(abitwriter* huffw, huffCodes* actbl, short* block, unsigned int* eobrun, int from, int to);
+
+int decode_dc_prg_sa(abitreader* huffr, short* block);
+int encode_dc_prg_sa(abitwriter* huffw, short* block);
+int decode_ac_prg_sa(abitreader* huffr, huffTree* actree, short* block, unsigned int* eobrun, int from, int to);
+int encode_ac_prg_sa(
+    abitwriter* huffw, abytewriter* storw, huffCodes* actbl, short* block, unsigned int* eobrun, int from, int to);
+
+int decode_eobrun_sa(abitreader* huffr, short* block, unsigned int* eobrun, int from, int to);
+int encode_eobrun(abitwriter* huffw, huffCodes* actbl, unsigned int* eobrun);
+int encode_crbits(abitwriter* huffw, abytewriter* storw);
+
+int next_huffcode(abitreader* huffw, huffTree* ctree, Billing min_bill, Billing max_bill);
+int next_mcupos(int* mcu, int* cmp, int* csc, int* sub, int* dpos, int* rstw, int cs_cmpc);
+int next_mcuposn(int* cmp, int* dpos, int* rstw);
+int skip_eobrun(int* cmp, int* dpos, int* rstw, unsigned int* eobrun);
+
+bool build_huffcodes(unsigned char* clen, unsigned char* cval, huffCodes* hc, huffTree* ht);
+
+/* -----------------------------------------------
+    function declarations: developers functions
+    ----------------------------------------------- */
+
+// these are developers functions, they are not needed
+// in any way to compress jpg or decompress ujg
+bool write_hdr(void);
+bool write_huf(void);
+bool write_info(void);
+clock_t pre_byte = 0;
+clock_t post_byte = 0;
+clock_t read_done = 0;
+clock_t overall_start = 0;
+
+/* -----------------------------------------------
+    global variables: data storage
+    ----------------------------------------------- */
+
+size_t g_decompression_memory_bound = 0;
+Sirikata::Array1d<Sirikata::Array1d<unsigned short, 64>, 4> qtables; // quantization tables
+Sirikata::Array1d<Sirikata::Array1d<huffCodes, 4>, 2> hcodes;        // huffman codes
+Sirikata::Array1d<Sirikata::Array1d<huffTree, 4>, 2> htrees;         // huffman decoding trees
+Sirikata::Array1d<Sirikata::Array1d<unsigned char, 4>, 2> htset;     // 1 if huffman table is set
+unsigned char* grbgdata = NULL;                                      // garbage data
+unsigned char* hdrdata = NULL;                                       // header data
+unsigned char* huffdata = NULL;                                      // huffman coded data
+int hufs = 0;                                                        // size of huffman data
+int hdrs = 0;                                                        // size of header
+int zlib_hdrs = 0;                                                   // size of compressed header
+size_t total_framebuffer_allocated = 0;                              // framebuffer allocated
+int grbs = 0;                                                        // size of garbage
+int prefix_grbs = 0;                                                 // size of prefix;
+unsigned char* prefix_grbgdata = NULL; // if prefix_grb is specified, header is not prepended
+
+std::vector<unsigned int> rstp;     // restart markers positions in huffdata
+std::vector<unsigned int> scnp;     // scan start positions in huffdata
+int rstc = 0;                       // count of restart markers
+int scnc = 0;                       // count of scans
+int rsti = 0;                       // restart interval
+char padbit = -1;                   // padbit (for huffman coding)
+std::vector<unsigned char> rst_err; // number of wrong-set RST markers per scan
+std::vector<unsigned int> rst_cnt;
+bool rst_cnt_set = false;
+int max_file_size = 0; // support for truncated jpegs 0 means full jpeg
+size_t start_byte = 0; // support for producing a slice of jpeg
+size_t max_encode_threads =
+#ifdef DEFAULT_SINGLE_THREAD
+    1
+#else
+    MAX_NUM_THREADS
+#endif
+    ;
+UncompressedComponents colldata; // baseline sorted DCT coefficients
+
+/* -----------------------------------------------
+    global variables: info about image
+    ----------------------------------------------- */
+
+// seperate info for each color component
+Sirikata::Array1d<componentInfo, 4> cmpnfo;
+
+int cmpc = 0;      // component count
+int imgwidth = 0;  // width of image
+int imgheight = 0; // height of image
+
+int sfhm = 0;          // max horizontal sample factor
+int sfvm = 0;          // max verical sample factor
+int mcuv = 0;          // mcus per line
+unsigned int mcuh = 0; // mcus per collumn
+int mcuc = 0;          // count of mcus
+bool early_eof_encountered = false;
+
+int max_cmp = 0;      // the maximum component in a truncated image
+int max_bpos = 0;     // the maximum band in a truncated image
+int max_dpos[4] = {}; // the maximum dpos in a truncated image
+int max_sah = 0;      // the maximum bit in a truncated image
+
+void standard_eof(abytewriter* hdrw, abytewriter* huffw) {
+    // get pointer for header data & size
+    hdrdata = hdrw->getptr_aligned();
+    hdrs = hdrw->getpos();
+    // get pointer for huffman data & size
+    huffdata = huffw->getptr_aligned();
+    hufs = huffw->getpos();
+}
+
+void early_eof(abytewriter* hdrw, abytewriter* huffw) {
+    early_eof_encountered = true;
+    standard_eof(hdrw, huffw);
+}
+
+/* -----------------------------------------------
+    global variables: info about current scan
+    ----------------------------------------------- */
+
+int cs_cmpc = 0;                          // component count in current scan
+Sirikata::Array1d<int, 4> cs_cmp = {{0}}; // component numbers  in current scan
+int cs_from = 0;                          // begin - band of current scan ( inclusive )
+int cs_to = 0;                            // end - band of current scan ( inclusive )
+int cs_sah = 0;                           // successive approximation bit pos high
+int cs_sal = 0;                           // successive approximation bit pos low
+void kill_workers(void* workers, uint64_t num_workers);
+
+GenericWorker* get_worker_threads(unsigned int num_workers) {
+    always_assert(num_workers + 1 == NUM_THREADS);
+    if (NUM_THREADS < 2) {
+        return NULL;
+    }
+    GenericWorker* retval = new GenericWorker[num_workers];
+    TimingHarness::timing[0][TimingHarness::TS_THREAD_STARTED] = TimingHarness::get_time_us();
+    custom_atexit(&kill_workers, retval, num_workers);
+    return retval;
+}
+
+VP8ComponentDecoder* makeBoth(bool threaded, bool start_workers) {
+    VP8ComponentDecoder* retval = new VP8ComponentDecoder(threaded);
+    TimingHarness::timing[0][TimingHarness::TS_MODEL_INIT] = TimingHarness::get_time_us();
+    if (start_workers) {
+        retval->registerWorkers(get_worker_threads(NUM_THREADS - 1), NUM_THREADS - 1);
+    }
+    return retval;
+}
+
+BaseEncoder* makeEncoder(bool threaded, bool start_workers) {
+    TimingHarness::timing[0][TimingHarness::TS_MODEL_INIT_BEGIN] = TimingHarness::get_time_us();
+    VP8ComponentEncoder* retval = new VP8ComponentEncoder(threaded);
+    TimingHarness::timing[0][TimingHarness::TS_MODEL_INIT] = TimingHarness::get_time_us();
+    if (start_workers) {
+        retval->registerWorkers(get_worker_threads(NUM_THREADS - 1), NUM_THREADS - 1);
+    }
+    return retval;
+}
+BaseDecoder* makeDecoder(bool threaded, bool start_workers) {
+    return makeBoth(threaded, start_workers);
+}
+/* -----------------------------------------------
+    global variables: info about files
+    ----------------------------------------------- */
+int jpgfilesize;           // size of JPEG file
+int ujgfilesize;           // size of UJG file
+int jpegtype = 0;          // type of JPEG coding: 0->unknown, 1->sequential, 2->progressive
+F_TYPE filetype;           // type of current file
+F_TYPE ofiletype = LEPTON; // desired type of output file
+bool g_do_preload = false;
+std::unique_ptr<BaseEncoder> g_encoder;
+BaseDecoder* g_decoder = NULL;
+std::unique_ptr<BaseDecoder> g_reference_to_free;
+ServiceInfo g_socketserve_info;
+bool g_threaded = true;
+// this overrides the progressive bit in the header so that legacy progressive files may be decoded
+bool g_force_progressive = false;
+bool g_allow_progressive =
+#ifdef DEFAULT_ALLOW_PROGRESSIVE
+    true
+#else
+    false
+#endif
+    ;
+bool g_unkillable = false;
+uint64_t g_time_bound_ms = 0;
+int g_inject_syscall_test = 0;
+bool g_force_zlib0_out = false;
+
+Sirikata::DecoderReader* str_in = NULL; // input stream
+bounded_iostream* str_out = NULL;       // output stream
+// output stream
+// IOUtil::FileWriter * ujg_out = NULL;
+IOUtil::FileReader* ujg_base_in = NULL;
+
+const char** filelist = NULL; // list of files to process
+int file_cnt = 0;             // count of files in list (1 for input only)
+int file_no = 0;              // number of current file
+
+/* -----------------------------------------------
+    global variables: messages
+    ----------------------------------------------- */
+
+std::string errormessage;
+std::atomic<int> errorlevel(0);
+// meaning of errorlevel:
+// -1 -> wrong input
+// 0 -> no error
+// 1 -> warning
+// 2 -> fatal error
+
+/* -----------------------------------------------
+    global variables: settings
+    ----------------------------------------------- */
+
+int verbosity = 0;      // level of verbosity
+bool overwrite = false; // overwrite files yes / no
+int err_tresh = 1;      // error threshold ( proceed on warnings yes (2) / no (1) )
+bool disc_meta = false; // discard meta-info yes / no
+
+bool developer = false; // allow developers functions yes/no
+ACTION action = comp;   // what to do with JPEG/UJG files
+
+FILE* msgout = stderr; // stream for output of messages
+bool pipe_on = false;  // use stdin/stdout instead of filelist
+
+void gen_nop() {}
+void sig_nop(int) {}
+/* -----------------------------------------------
+    global variables: info about program
+    ----------------------------------------------- */
+
+unsigned char ujgversion = 1;
+uint8_t get_current_file_lepton_version() {
+    return ujgversion;
+}
+static const char* appname = "lepton";
+static const unsigned char ujg_header[] = {'U', 'J'};
+static const unsigned char lepton_header[] = {0xcf, 0x84};  // the tau symbol for a tau lepton in utf-8
+static const unsigned char zlepton_header[] = {0xce, 0xb6}; // the zeta symbol for a zlib compressed lepton
+
+FILE* timing_log = NULL;
+char current_operation = '\0';
+#ifdef _WIN32
+clock_t current_operation_begin = 0;
+clock_t current_operation_first_byte = 0;
+clock_t current_operation_end = 0;
+#else
+struct timeval current_operation_begin = {0, 0};
+struct timeval current_operation_first_byte = {0, 0};
+struct timeval current_operation_end = {0, 0};
+#endif
+
+void timing_operation_start(char operation) {
+#ifndef _WIN32
+    if (g_use_seccomp) {
+        return;
+    }
+    current_operation = operation;
+#ifdef _WIN32
+    current_operation_begin = clock();
+    current_operation_first_byte = 0;
+    current_operation_end = 0;
+#else
+    gettimeofday(&current_operation_begin, NULL);
+    memset(&current_operation_first_byte, 0, sizeof(current_operation_first_byte));
+    memset(&current_operation_end, 0, sizeof(current_operation_end));
+#endif
+    fprintf(stderr, "START ACHIEVED %ld %ld\n", (long)current_operation_begin.tv_sec,
+            (long)current_operation_begin.tv_usec);
+#endif
+}
+
+void timing_operation_first_byte(char operation) {
+#ifndef _WIN32
+    if (g_use_seccomp) {
+        return;
+    }
+    assert(current_operation == operation);
+#ifdef _WIN32
+    if (current_operation_first_byte == 0) {
+        current_operation_first_byte = clock();
+    }
+#else
+    if (current_operation_first_byte.tv_sec == 0 && current_operation_first_byte.tv_usec == 0) {
+        gettimeofday(&current_operation_first_byte, NULL);
+        fprintf(stderr, "FIRST BYTE ACHIEVED %ld %ld\n", (long)current_operation_first_byte.tv_sec,
+                (long)current_operation_first_byte.tv_usec);
+    }
+
+#endif
+#endif
+}
+
+void timing_operation_complete(char operation) {
+#ifndef _WIN32
+    if (g_use_seccomp) {
+        return;
+    }
+    assert(current_operation == operation);
+#ifdef _WIN32
+    current_operation_end = clock();
+    if (timing_log) {
+        double begin_to_end = (current_operation_end - current_operation_begin) / (double)CLOCKS_PER_SEC;
+        double begin_to_first_byte = begin_to_end;
+        if (current_operation_first_byte != 0) { // if we were successful
+            begin_to_first_byte = (current_operation_first_byte - current_operation_begin) / (double)CLOCKS_PER_SEC;
+        }
+        fprintf(timing_log, "%c %f %f\n", current_operation, begin_to_first_byte, begin_to_end);
+        fflush(timing_log);
+    }
+    current_operation_end = 0;
+    current_operation_begin = 0;
+    current_operation_first_byte = 0;
+#else
+    gettimeofday(&current_operation_end, NULL);
+    if (timing_log) {
+        double begin = current_operation_begin.tv_sec + (double)current_operation_begin.tv_usec / 1000000.;
+        double end = current_operation_end.tv_sec + (double)current_operation_end.tv_usec / 1000000.;
+        double first_byte =
+            current_operation_first_byte.tv_sec + (double)current_operation_first_byte.tv_usec / 1000000.;
+        double begin_to_end = end - begin;
+        double begin_to_first_byte = begin_to_end;
+        if (current_operation_first_byte.tv_sec != 0) { // if we were successful
+            begin_to_first_byte = first_byte - begin;
+        }
+        fprintf(timing_log, "%c %f %f\n", current_operation, begin_to_first_byte, begin_to_end);
+        fflush(timing_log);
+    }
+    memset(&current_operation_end, 0, sizeof(current_operation_end));
+    memset(&current_operation_begin, 0, sizeof(current_operation_begin));
+    memset(&current_operation_first_byte, 0, sizeof(current_operation_first_byte));
+#endif
+#endif
+}
+
+size_t local_atoi(const char* data) {
+    const char* odata = data;
+    size_t retval = 0;
+    int counter = 0;
+    while (*data) {
+        if (*data >= '0' && *data <= '9') {
+            retval *= 10;
+            retval += *data - '0';
+            ++data;
+            ++counter;
+            if (counter > 16) {
+                fprintf(stderr, "Could not allocate so much memory %s\n", odata);
+                exit(1);
+            }
+        } else if ('M' == *data) {
+            retval *= 1024 * 1024;
+            break;
+        } else if ('K' == *data) {
+            retval *= 1024;
+            break;
+        } else {
+            fprintf(stderr, "Could not allocate alphanumeric memory %s\n", odata);
+            exit(1);
+        }
+    }
+    return retval;
+}
+bool starts_with(const char* a, const char* b) {
+    while (*b) {
+        if (*a != *b) {
+            return false;
+        }
+        ++a;
+        ++b;
+    }
+    return true;
+}
+void compute_thread_mem(
+    const char* arg, size_t* mem_init, size_t* thread_mem_init, bool* needs_huge_pages, bool* avx2upgrade) {
+    if (strcmp(arg, "-hugepages") == 0) {
+        *needs_huge_pages = true;
+    }
+    if (strcmp(arg, "-avx2upgrade") == 0) {
+        *avx2upgrade = true;
+    }
+    if (strstr(arg, "-help")) {
+        show_help();
+        exit(0);
+    }
+    if (strcmp(arg, "-h") == 0) {
+        show_help();
+        exit(0);
+    }
+    if (strcmp(arg, "-xclbin") == 0) {
+        // right input do nothing
+    }
+    const char mem_arg_name[] = "-memory=";
+    const char thread_mem_arg_name[] = "-threadmemory=";
+    if (starts_with(arg, mem_arg_name)) {
+        arg += strlen(mem_arg_name);
+        *mem_init = local_atoi(arg);
+    }
+    if (starts_with(arg, thread_mem_arg_name)) {
+        arg += strlen(thread_mem_arg_name);
+        *thread_mem_init = local_atoi(arg);
+    }
+}
+/* -----------------------------------------------
+    main-function
+    ----------------------------------------------- */
+
+int main(int argc, char** argv) {
+    g_argc = argc;
+    g_argv = (const char**)argv;
+    TimingHarness::timing[0][TimingHarness::TS_MAIN] = TimingHarness::get_time_us(true);
+
+    clock_t begin = 0, end = 1;
+
+    int error_cnt = 0;
+    int warn_cnt = 0;
+
+    int acc_jpgsize = 0;
+    int acc_ujgsize = 0;
+
+    int speed, bpms;
+    float cr;
+
+    errorlevel.store(0);
+
+    // read options from command line
+    initialize_options(argc, argv);
+
+    reset_buffers();
+
+    // process file(s) - this is the main function routine
+    begin = clock();
+
+    process_file_2();
+    if (errorlevel.load() >= err_tresh) error_cnt++;
+    if (errorlevel.load() == 1) warn_cnt++;
+    if (errorlevel.load() < err_tresh) {
+        acc_jpgsize += jpgfilesize;
+        acc_ujgsize += ujgfilesize;
+    }
+    if (!g_use_seccomp) {
+        end = clock();
+    }
+    if (action != socketserve && action != forkserve) {
+        // show statistics
+        fprintf(msgout, "\n\n-> %i file(s) processed, %i error(s), %i warning(s)\n", file_cnt, error_cnt, warn_cnt);
+    }
+    if ((file_cnt > error_cnt) && (verbosity > 0))
+        if (action == comp) {
+            speed = (int)((double)((end - begin) * 1000) / CLOCKS_PER_SEC);
+            bpms = (speed > 0) ? (acc_jpgsize / speed) : acc_jpgsize;
+            cr = (acc_jpgsize > 0) ? (100.0 * acc_ujgsize / acc_jpgsize) : 0;
+
+            fprintf(msgout, " --------------------------------- \n");
+            fprintf(msgout, " time taken        : %8i msec\n", speed);
+            fprintf(msgout, " avrg. byte per ms : %8i byte\n", bpms);
+            fprintf(msgout, " avrg. comp. ratio : %8.2f %%\n", cr);
+            fprintf(msgout, " --------------------------------- \n");
+        }
+
+    return error_cnt == 0 ? 0 : 1;
+}
+
+/* ----------------------- Begin of main interface functions -------------------------- */
+
+/* -----------------------------------------------
+    reads in commandline arguments
+    ----------------------------------------------- */
+char g_dash[] = "-";
+// returns the maximum file size
+int initialize_options(int argc, const char* const* argv) {
+    const char** tmp_flp;
+    int tmp_val;
+    int max_file_size = 0;
+    // get memory for filelist & preset with NULL
+    filelist = (const char**)custom_calloc(argc * sizeof(char*));
+
+    // preset temporary filelist pointer
+    tmp_flp = filelist;
+    // read in arguments
+    int argc_org = argc;
+    while (--argc > 0) {
+        argv++;
+        if (strcmp((*argv), "-xclbin") == 0) {
+            xclbin_path = argv[argc_org - argc];
+            // xclbin_path.assign((*argv),strlen((*argv)) );
+            fprintf(stderr, "xclbin_path is %s \n", argv[argc_org - argc]);
+            fprintf(stderr, "test_path is %s \n", argv[argc_org - argc + 1]);
+            argv++;
+            argc--;
+        } else {
+            *(tmp_flp++) = *argv;
+        }
+    }
+
+    return 0;
+}
+size_t decompression_memory_bound() {
+    if (ofiletype == UJG || filetype == UJG) {
+        return 0;
+    }
+    size_t cumulative_buffer_size = 0;
+    size_t streaming_buffer_size = 0;
+    size_t current_run_size = 0;
+    for (int i = 0; i < colldata.get_num_components(); ++i) {
+        size_t streaming_size = colldata.block_width(i) * 2 * NUM_THREADS * 64 * sizeof(uint16_t);
+        size_t frame_buffer_size = colldata.component_size_allocated(i);
+        if (cs_cmpc != colldata.get_num_components() || jpegtype != 1) {
+            streaming_size = frame_buffer_size;
+        } else if (filetype != JPEG) {
+            if (!g_threaded) {
+                frame_buffer_size = colldata.block_width(i) * 2 * 64 * sizeof(uint16_t);
+
+            } else {
+                frame_buffer_size = streaming_size;
+            }
+        }
+        cumulative_buffer_size += frame_buffer_size;
+        streaming_buffer_size += streaming_size;
+    }
+    current_run_size = cumulative_buffer_size;
+
+    size_t bit_writer_augmentation = 0;
+    if (g_allow_progressive) {
+        for (size_t cur_size = jpgfilesize - 1; cur_size; cur_size >>= 1) {
+            bit_writer_augmentation |= cur_size;
+        }
+        bit_writer_augmentation += 1; // this is used to compute the buffer size of the abit_writer for writing
+    }
+    size_t garbage_augmentation = 0;
+    for (size_t cur_size = hdrs - 1; cur_size; cur_size >>= 1) {
+        garbage_augmentation |= cur_size;
+    }
+    garbage_augmentation += 1; // this is used to compute the buffer size of the abit_writer for writing
+    int non_preloaded_mux = 4096 * 1024 + 131072; // only 1 thread hence only one extra 131072
+    size_t decode_header_needed_size = hdrs + zlib_hdrs * 3;
+    if (zlib_hdrs && zlib_hdrs * 2 < hdrs) {
+        size_t doubled = zlib_hdrs * 2;
+        do {
+            decode_header_needed_size += doubled;
+            doubled *= 2;
+        } while (doubled < (size_t)hdrs);
+    }
+    size_t single_threaded_model_bonus = 0;
+    size_t single_threaded_buffer_bonus =
+        0; // the threads have to save their output to 3/4 of the jpeg before writing it
+    if (g_decoder) {
+        single_threaded_model_bonus += g_decoder->get_model_worker_memory_usage();
+    } else if (g_encoder) {
+        single_threaded_model_bonus += g_encoder->get_decode_model_worker_memory_usage();
+    }
+    if (filetype != JPEG && !g_threaded) {
+        single_threaded_buffer_bonus += jpgfilesize;
+    }
+    size_t abit_writer = 0;
+    if (g_allow_progressive) {
+        if (zlib_hdrs * 3 < ABIT_WRITER_PRELOAD * 2 + 64) {
+            if (zlib_hdrs * 3 < ABIT_WRITER_PRELOAD + 64) {
+                abit_writer += ABIT_WRITER_PRELOAD * 2 + 64; // these can't be reused memory
+            } else {
+                abit_writer += ABIT_WRITER_PRELOAD + 64; // these can't be reused
+            }
+        }
+    } else {
+        abit_writer += 65536 + 64;
+    }
+    if (g_allow_progressive && jpgfilesize > ABIT_WRITER_PRELOAD) {
+        // we currently buffer the whole jpeg in memory while streaming out
+        abit_writer += 3 * jpgfilesize;
+    }
+    size_t total = Sirikata::memmgr_size_allocated();
+    size_t decom_memory_bound =
+        total - current_run_size + streaming_buffer_size - single_threaded_model_bonus + single_threaded_buffer_bonus;
+    if (filetype == JPEG) {
+        decom_memory_bound = streaming_buffer_size + abit_writer + jpgfilesize + sizeof(ProbabilityTablesBase) +
+                             garbage_augmentation + decode_header_needed_size + non_preloaded_mux;
+    }
+    return decom_memory_bound;
+}
+
+void check_decompression_memory_bound_ok() {
+    if (g_decompression_memory_bound) {
+        if (decompression_memory_bound() > g_decompression_memory_bound) {
+            custom_exit(ExitCode::TOO_MUCH_MEMORY_NEEDED);
+        }
+    }
+}
+/* -----------------------------------------------
+    processes one file
+    ----------------------------------------------- */
+void kill_workers(void* workers, uint64_t num_workers) {
+    GenericWorker* generic_workers = (GenericWorker*)workers;
+    if (generic_workers) {
+        for (uint64_t i = 0; i < num_workers; ++i) {
+            if (!generic_workers[i].has_ever_queued_work()) {
+                generic_workers[i].work = &gen_nop;
+                generic_workers[i].activate_work();
+                generic_workers[i].main_wait_for_done();
+            }
+        }
+    }
+}
+void test_syscall_injection(std::atomic<int>* value) {
+#ifndef _WIN32
+    char buf[128 + 1];
+    buf[sizeof(buf) - 1] = 0;
+    value->store(-1);
+    char* ret = getcwd(buf, sizeof(buf) - 1);
+    value->store(ret ? 1 : 2);
+#endif
+}
+bool recode_baseline_jpeg_wrapper() {
+    bool retval = recode_baseline_jpeg(str_out, max_file_size);
+    if (!retval) {
+        errorlevel.store(2);
+        return retval;
+    }
+    // get filesize
+    jpgfilesize = str_out->getsize();
+    if (ujg_base_in) {
+        ujgfilesize = ujg_base_in->getsize();
+    } else {
+        ujgfilesize = 4096 * 1024;
+    }
+#ifndef _WIN32
+    if (!g_use_seccomp) {
+        clock_t final = clock();
+        struct timeval fin = {0, 0};
+        gettimeofday(&fin, NULL);
+        double begin = current_operation_begin.tv_sec + (double)current_operation_begin.tv_usec / 1000000.;
+        double end = fin.tv_sec + (double)fin.tv_usec / 1000000.;
+        double first_byte =
+            current_operation_first_byte.tv_sec + (double)current_operation_first_byte.tv_usec / 1000000.;
+        double begin_to_end = end - begin;
+        double begin_to_first_byte = begin_to_end;
+        if (current_operation_first_byte.tv_sec != 0) { // if we were successful
+            begin_to_first_byte = first_byte - begin;
+        }
+
+        fprintf(stderr, "TIMING (new method): %f to first byte %f total\n", begin_to_first_byte, begin_to_end);
+        (void)final;
+        fprintf(stderr, "Read took: %f\n", (read_done - overall_start) / (double)CLOCKS_PER_SEC);
+    }
+#endif
+    // store last scan & restart positions
+    if (!rstp.empty()) rstp.at(rstc) = hufs;
+
+    return retval;
+}
+
+int open_fdin(const char* ifilename,
+              IOUtil::FileReader* reader,
+              Sirikata::Array1d<uint8_t, 2>& header,
+              bool* is_socket) {
+    int fdin = -1;
+    if (reader != NULL) {
+        *is_socket = reader->is_socket();
+        fdin = reader->get_fd();
+    } else if (strcmp(ifilename, "-") == 0) {
+        fdin = 0;
+        *is_socket = false;
+    } else {
+        *is_socket = false;
+        do {
+            fdin = open(ifilename, O_RDONLY
+#ifdef _WIN32
+                                       | O_BINARY
+#endif
+                        );
+        } while (fdin == -1 && errno == EINTR);
+        if (fdin == -1) {
+            const char* errormessage = "Input file unable to be opened for writing:";
+            while (write(2, errormessage, strlen(errormessage)) == -1 && errno == EINTR) {
+            }
+            while (write(2, ifilename, strlen(ifilename)) == -1 && errno == EINTR) {
+            }
+            while (write(2, "\n", 1) == -1 && errno == EINTR) {
+            }
+        }
+    }
+    ssize_t data_read = 0;
+    do {
+        data_read = read(fdin, &header[0], 2);
+    } while (data_read == -1 && errno == EINTR);
+    if (__builtin_expect(data_read < 2, false)) {
+        do {
+            data_read = read(fdin, &header[1], 1);
+        } while (data_read == -1 && errno == EINTR);
+    }
+    if (data_read < 0) {
+        const char* fail = "Failed to read 2 byte header\n";
+        while (write(2, fail, strlen(fail)) == -1 && errno == EINTR) {
+        }
+    }
+    return fdin;
+}
+
+std::string uniq_filename(std::string filename) {
+    FILE* fp = fopen(filename.c_str(), "rb");
+    while (fp != NULL) {
+        fclose(fp);
+        filename += "_";
+        fp = fopen(filename.c_str(), "rb");
+    }
+    return filename;
+}
+
+std::string postfix_uniq(const std::string& filename, const char* ext) {
+    std::string::size_type where = filename.find_last_of("./\\");
+    if (where == std::string::npos || filename[where] != '.') {
+        return uniq_filename(filename + ext);
+    }
+    return uniq_filename(filename.substr(0, where) + ext);
+}
+
+int open_fdout(const char* ifilename,
+               IOUtil::FileWriter* writer,
+               Sirikata::Array1d<uint8_t, 2> fileid,
+               bool force_compressed_output,
+               bool* is_socket) {
+    if (writer != NULL) {
+        *is_socket = writer->is_socket();
+        return writer->get_fd();
+    }
+    *is_socket = false;
+    if (strcmp(ifilename, "-") == 0) {
+        return 1;
+    }
+    int retval = -1;
+    std::string ofilename;
+    // check file id, determine filetype
+    if (file_no + 1 < file_cnt && ofilename != ifilename) {
+        ofilename = filelist[file_no + 1];
+    } else if (is_jpeg_header(fileid)) {
+        ofilename = postfix_uniq(ifilename, (ofiletype == UJG ? ".ujg" : ".lep"));
+    } else if (((fileid[0] == ujg_header[0]) && (fileid[1] == ujg_header[1])) ||
+               ((fileid[0] == lepton_header[0]) && (fileid[1] == lepton_header[1])) ||
+               ((fileid[0] == zlepton_header[0]) && (fileid[1] == zlepton_header[1]))) {
+        if ((fileid[0] == zlepton_header[0] && fileid[1] == zlepton_header[1]) || force_compressed_output) {
+            ofilename = postfix_uniq(ifilename, ".jpg.z");
+        } else {
+            ofilename = postfix_uniq(ifilename, ".jpg");
+        }
+    }
+    do {
+        retval = open(ofilename.c_str(), O_WRONLY | O_CREAT | O_TRUNC
+#ifdef _WIN32
+                                             | O_BINARY
+#endif
+                      ,
+                      0
+#ifdef _WIN32
+//| S_IREAD| S_IWRITE
+#else
+                          | S_IWUSR | S_IRUSR
+#endif
+                      );
+    } while (retval == -1 && errno == EINTR);
+    if (retval == -1) {
+        const char* errormessage = "Output file unable to be opened for writing:";
+        while (write(2, errormessage, strlen(errormessage)) == -1 && errno == EINTR) {
+        }
+        while (write(2, ofilename.c_str(), ofilename.length()) == -1 && errno == EINTR) {
+        }
+        while (write(2, "\n", 1) == -1 && errno == EINTR) {
+        }
+        custom_exit(ExitCode::FILE_NOT_FOUND);
+    }
+    return retval;
+}
+
+/*void process_file(IOUtil::FileReader* reader,
+                  IOUtil::FileWriter *writer,
+                  int max_file_size,
+                  bool force_zlib0)
+{
+    clock_t begin = 0, end = 1;
+    const char* actionmsg  = NULL;
+    const char* errtypemsg = NULL;
+    int speed, bpms;
+    float cr;
+
+
+    if (g_inject_syscall_test == 2) {
+        unsigned int num_workers = std::max(NUM_THREADS - 1, 1U);
+        GenericWorker* generic_workers = get_worker_threads(num_workers);
+        if (g_inject_syscall_test == 2) {
+            for (size_t i = 0; i < num_workers; ++i) {
+                std::atomic<int> value;
+                value.store(0);
+                generic_workers[i].work = std::bind(&test_syscall_injection, &value);
+                generic_workers[i].activate_work();
+                generic_workers[i].join_via_syscall();
+                if (value.load() < 1) {
+                    abort(); // this should exit_group
+                }
+            }
+            g_threaded = false;
+        }
+    }
+    // main function routine
+    errorlevel.store(0);
+    jpgfilesize = 0;
+    ujgfilesize = 0;
+
+    Sirikata::Array1d<uint8_t, 2> header = {{0, 0}};
+    const char * ifilename = filelist[file_no];
+    bool is_socket = false;
+    int fdin = open_fdin(ifilename, reader, header, &is_socket);
+    int fdout = -1;
+
+    //--------------------------------------------------------
+    //add by zyl
+
+
+
+    std::string in_dir = "./"; // no use by now
+    std::string JPEGFile = ifilename;
+    uint64_t size;
+    uint8_t *datatoDDR;
+       int err = load_dat<uint8_t>(datatoDDR, JPEGFile, in_dir, size);
+       if (err){
+           printf("Alloc buf failed!\n");
+       }
+
+
+
+    //--------------------------------------------------------
+
+
+    if (is_jpeg_header(header) && !g_skip_validation) {
+        //fprintf(stderr, "ENTERED VALIDATION...\n");
+        std::cout<<"###########################"<<std::endl;
+        ExitCode validation_exit_code = ExitCode::SUCCESS;
+        Sirikata::MuxReader::ResizableByteBuffer lepton_data;
+        switch (validateAndCompress(&fdin, &fdout, header, start_byte, max_file_size,
+                                    &validation_exit_code,
+                                    &lepton_data,
+                                    g_argc,
+                                    g_argv,
+                                    is_socket)) {
+          case ValidationContinuation::CONTINUE_AS_JPEG:
+            //fprintf(stderr, "CONTINUE AS JPEG...\n");
+            is_socket = false;
+            break;
+          case ValidationContinuation::CONTINUE_AS_LEPTON:
+            is_socket = false;
+            g_force_zlib0_out = false;
+            force_zlib0 = false;
+            if (ofiletype ==  UJG) {
+                filetype = UJG;
+                header[0] = ujg_header[0];
+                header[1] = ujg_header[1];
+            } else {
+                filetype = LEPTON;
+                header[0] = lepton_header[0];
+                header[1] = lepton_header[1];
+            }
+            //fprintf(stderr, "CONTINUE AS LEPTON...\n");
+            break;
+          case ValidationContinuation::ROUNDTRIP_OK:
+            fdout = open_fdout(ifilename, writer, header, g_force_zlib0_out || force_zlib0, &is_socket);
+            for (size_t data_sent = 0; data_sent < lepton_data.size();) {
+                ssize_t sent = write(fdout,
+                                     lepton_data.data() + data_sent,
+                                     lepton_data.size() - data_sent);
+                if (sent < 0 && errno == EINTR){
+                    continue;
+                }
+                if (sent <= 0) {
+                    custom_exit(ExitCode::SHORT_READ);
+                }
+                data_sent += sent;
+            }
+            //fprintf(stderr, "OK...\n");
+            custom_exit(ExitCode::SUCCESS);
+          case ValidationContinuation::BAD:
+          default:
+            always_assert(validation_exit_code != ExitCode::SUCCESS);
+            custom_exit(validation_exit_code);
+        }
+    } else {
+        std::cout<<"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"<<std::endl;
+        fdout = open_fdout(ifilename, writer, header, g_force_zlib0_out || force_zlib0, &is_socket);
+    }
+    // check input file and determine filetype
+    check_file(fdin, fdout, max_file_size, force_zlib0, header, is_socket);
+    begin = clock();
+    if ( filetype == JPEG )
+    {
+
+
+        if (ofiletype == LEPTON) {
+            if (!g_encoder) {
+                g_encoder.reset(makeEncoder(g_threaded, g_threaded));
+                TimingHarness::timing[0][TimingHarness::TS_MODEL_INIT] = TimingHarness::get_time_us();
+                g_decoder = NULL;
+            } else if (g_threaded && (action == socketserve || action == forkserve)) {
+                g_encoder->registerWorkers(get_worker_threads(NUM_THREADS - 1), NUM_THREADS  - 1);
+            }
+        }else if (ofiletype == UJG) {
+            g_encoder.reset(new SimpleComponentEncoder);
+            g_decoder = NULL;
+        }
+    } else if (filetype == LEPTON) {
+        NUM_THREADS = read_fixed_ujpg_header();
+        if (!g_decoder) {
+            g_decoder = makeDecoder(g_threaded, g_threaded);
+            TimingHarness::timing[0][TimingHarness::TS_MODEL_INIT] = TimingHarness::get_time_us();
+            g_reference_to_free.reset(g_decoder);
+        } else if (NUM_THREADS > 1 && g_threaded && (action == socketserve || action == forkserve)) {
+            g_decoder->registerWorkers(get_worker_threads(NUM_THREADS - 1), NUM_THREADS - 1);
+        }
+    }else if (filetype == UJG) {
+        (void)read_fixed_ujpg_header();
+        g_decoder = new SimpleComponentDecoder;
+        g_reference_to_free.reset(g_decoder);
+    }
+#ifndef _WIN32
+    //FIXME
+    if (g_time_bound_ms) {
+        struct itimerval bound;
+        bound.it_value.tv_sec = g_time_bound_ms / 1000;
+        bound.it_value.tv_usec = (g_time_bound_ms % 1000) * 1000;
+        bound.it_interval.tv_sec = 0;
+        bound.it_interval.tv_usec = 0;
+        int ret = setitimer(ITIMER_REAL, &bound, NULL);
+
+        assert(ret == 0 && "Timer must be able to be set");
+        if (ret != 0) {
+            exit((int)ExitCode::OS_ERROR);
+        }
+    }
+#endif
+    if (g_unkillable) { // only set this after the time bound has been set
+        if (!g_time_bound_ms) {
+            fprintf(stderr, "Only allowed to set unkillable for items with a time bound\n");
+            exit(1);
+        }
+        signal(SIGTERM, &sig_nop);
+#ifndef _WIN32
+        signal(SIGQUIT, &sig_nop);
+#endif
+    }
+
+    if (g_use_seccomp) {
+        Sirikata::installStrictSyscallFilter(true);
+    }
+#ifndef _WIN32
+    if (g_inject_syscall_test == 1) {
+        char buf[128 + 1];
+        buf[sizeof(buf) - 1] = 0;
+        char * ret = getcwd(buf, sizeof(buf) - 1);
+        (void)ret;
+    }
+#endif
+    // get specific action message
+    if ( filetype == UNK ) {
+        actionmsg = "unknown filetype";
+    } else if (action == info) {
+        actionmsg = "Parsing";
+    } else if ( filetype == JPEG ) {
+        actionmsg = "Writing to LEPTON\n";
+    } else {
+        actionmsg = "Decompressing to JPEG\n";
+    }
+
+    if ( verbosity > 0 ) {
+        while (write(2, actionmsg , strlen(actionmsg)) < 0 && errno == EINTR) {}
+    }
+
+
+    std::vector<std::pair<uint32_t, uint32_t> > huff_input_offset;
+    if ( filetype == JPEG )
+    {
+        switch ( action )
+        {
+            case comp:
+            case forkserve:
+            case socketserve:
+                timing_operation_start( 'c' );
+                TimingHarness::timing[0][TimingHarness::TS_READ_STARTED] = TimingHarness::get_time_us();
+                {
+                    std::vector<uint8_t,
+                                Sirikata::JpegAllocator<uint8_t> > jpeg_file_raw_bytes;
+                    unsigned int jpg_ident_offset = 2;
+                    if (start_byte == 0) {
+                        ibytestream str_jpg_in(str_in,
+                                               jpg_ident_offset,
+                                               Sirikata::JpegAllocator<uint8_t>());
+
+                        //execute(std::bind(&read_jpeg_wrapper, &huff_input_offset, &str_jpg_in));
+                        read_jpeg_wrapper( &huff_input_offset, &str_jpg_in);
+
+                    } else {
+                        ibytestreamcopier str_jpg_in(str_in,
+                                                     jpg_ident_offset,
+                                                     max_file_size,
+                                                     Sirikata::JpegAllocator<uint8_t>());
+                        str_jpg_in.mutate_read_data().push_back(0xff);
+                        str_jpg_in.mutate_read_data().push_back(0xd8);
+                       // execute(std::bind(&read_jpeg_and_copy_to_side_channel,
+                        read_jpeg_and_copy_to_side_channel(
+                                          &huff_input_offset, &str_jpg_in);
+                        jpeg_file_raw_bytes.swap(str_jpg_in.mutate_read_data());
+                    }
+                    TimingHarness::timing[0][TimingHarness::TS_JPEG_DECODE_STARTED] =
+                        TimingHarness::timing[0][TimingHarness::TS_READ_FINISHED] = TimingHarness::get_time_us();
+                    std::vector<ThreadHandoff> luma_row_offsets;
+                    //execute(std::bind(&decode_jpeg, huff_input_offset, &luma_row_offsets));
+                    //decode_jpeg( huff_input_offset, &luma_row_offsets);
+                    uint8_t* res = (uint8_t*)malloc(MAX_NUM_PIX);
+                    struct_arith arith;
+                    hls_decode_jpeg_kernel((ap_uint<AXI_WIDTH>*)datatoDDR, (int)size, arith, res);
+                    TimingHarness::timing[0][TimingHarness::TS_JPEG_DECODE_FINISHED]
+                        = TimingHarness::get_time_us();
+                    //execute( check_value_range );
+//to test cosim
+    fprintf( stderr, "=========== arith print ==========\n" );
+        fprintf( stderr, "count = %d\n" , arith.count);
+        fprintf( stderr, "value = %d\n" , arith.value);
+        fprintf( stderr, "pre_byte = %d\n" , arith.pre_byte);
+        fprintf( stderr, "run = %d\n" , arith.run);
+        fprintf( stderr, "pos = %d\n" , arith.pos);
+        fprintf( stderr, "range = %d\n" , arith.range);
+        fprintf( stderr, "isFirst = %d\n" , arith.isFirst);
+        //  for(int pos=0; pos<arith.pos; pos++){
+        //      fprintf( stderr, " %.4x\n" , *(res+pos));
+        //  }
+        fprintf( stderr, "============ end print ==========\n" );
+
+                   // execute(std::bind(&write_ujpg,    std::move(luma_row_offsets), jpeg_file_raw_bytes.empty() ? NULL
+: &jpeg_file_raw_bytes));
+                    std::pair<uint32_t, uint32_t> tmp_pair = huff_input_offset.front();
+                    int tmp_begin = tmp_pair.second;
+                    ThreadHandoff myHandoff;
+                    myHandoff.luma_y_start = 0;
+                    myHandoff.luma_y_end   = 1;
+                    myHandoff.segment_size = size - tmp_begin -2;
+                    myHandoff.overhang_byte = 0;
+                    myHandoff.num_overhang_bits = 0;
+                    myHandoff.last_dc[0] = 0;
+                    myHandoff.last_dc[1] = 0;
+                    myHandoff.last_dc[2] = 0;
+                    write_ujpg_2(ujg_out, arith, res , myHandoff);
+                    //write_ujpg(luma_row_offsets,jpeg_file_raw_bytes.empty() ? NULL : &jpeg_file_raw_bytes, arith,
+res);
+                    //free(datatoDDR);
+                }
+                timing_operation_complete( 'c' );
+                break;
+
+            case info:
+                {
+                    unsigned int jpg_ident_offset = 2;
+                    ibytestream str_jpg_in(str_in, jpg_ident_offset, Sirikata::JpegAllocator<uint8_t>());
+                    execute(std::bind(read_jpeg_wrapper, &huff_input_offset, &str_jpg_in));
+                }
+                execute( write_info );
+                break;
+        }
+    }
+    else if ( filetype == UJG || filetype == LEPTON)
+    {
+        switch ( action )
+        {
+            case comp:
+            case forkserve:
+            case socketserve:
+                if (!g_use_seccomp) {
+                    overall_start = clock();
+                }
+                timing_operation_start( 'd' );
+                TimingHarness::timing[0][TimingHarness::TS_READ_STARTED] = TimingHarness::get_time_us();
+                execute( read_ujpg ); // replace with decompression function!
+                TimingHarness::timing[0][TimingHarness::TS_READ_FINISHED] = TimingHarness::get_time_us();
+                if (!g_use_seccomp) {
+                    read_done = clock();
+                }
+                TimingHarness::timing[0][TimingHarness::TS_JPEG_RECODE_STARTED] = TimingHarness::get_time_us();
+                if (filetype != UJG && !g_allow_progressive) {
+                    execute(recode_baseline_jpeg_wrapper);
+                } else {
+                    execute(recode_jpeg);
+                }
+                timing_operation_complete( 'd' );
+                TimingHarness::timing[0][TimingHarness::TS_JPEG_RECODE_FINISHED] = TimingHarness::get_time_us();
+
+                str_out->close();
+                break;
+            case info:
+                execute( read_ujpg );
+                execute( write_info );
+                break;
+        }
+    }
+    if (!fast_exit) {
+        // close iostreams
+        if ( str_in  != NULL ) delete( str_in  ); str_in  = NULL;
+        if ( str_out != NULL ) delete( str_out ); str_out = NULL;
+//        if ( ujg_out != NULL ) delete( ujg_out ); ujg_out = NULL;
+        // delete if broken or if output not needed
+        if ((!pipe_on) && ((errorlevel.load() >= err_tresh)
+                           || (action != comp && action != forkserve && action != socketserve))) {
+            // FIXME: can't delete broken output--it's gone already
+        }
+    }
+    TimingHarness::timing[0][TimingHarness::TS_DONE] = TimingHarness::get_time_us();
+    TimingHarness::print_results();
+    //if (!g_use_seccomp) {
+        end = clock();
+    //}
+    {
+        size_t bound = decompression_memory_bound();
+        char bound_out[] = "XXXXXXXXXX bytes needed to decompress this file\n";
+        bound_out[0] = '0' + (bound / 1000000000)%10;
+        bound_out[1] = '0' + (bound / 100000000)%10;
+        bound_out[2] = '0' + (bound / 10000000)%10;
+        bound_out[3] = '0' + (bound / 1000000)%10;
+        bound_out[4] = '0' + (bound / 100000)%10;
+        bound_out[5] = '0' + (bound / 10000)%10;
+        bound_out[6] = '0' + (bound / 1000)%10;
+        bound_out[7] = '0' + (bound / 100)%10;
+        bound_out[8] = '0' + (bound / 10)%10;
+        bound_out[9] = '0' + (bound / 1)%10;
+        const char * to_write = bound_out;
+        while(to_write[0] == '0') {
+            ++to_write;
+        }
+        while(write(2, to_write, strlen(to_write)) < 0 && errno == EINTR) {
+        }
+    }
+    print_bill(2);
+    // speed and compression ratio calculation
+    printf("%d, %d, \n", begin, end);
+    speed = (int) ( (double) (( end - begin ) * 1000) / CLOCKS_PER_SEC );
+    bpms  = ( speed > 0 ) ? ( jpgfilesize / speed ) : jpgfilesize;
+    cr    = ( jpgfilesize > 0 ) ? ( 100.0 * ujgfilesize / jpgfilesize ) : 0;
+
+    switch ( verbosity )
+    {
+        case 0:
+          if ( errorlevel.load() < err_tresh ) {
+                if (action == comp ) {
+                    fprintf(stderr, "%d %d\n",(int)ujgfilesize, (int)jpgfilesize);
+                    char percentage_report[]=" XX.XX%\n";
+                    double pct = cr + .005;
+                    percentage_report[0] = '0' + (int)(pct / 100) % 10;
+                    percentage_report[1] = '0' + (int)(pct / 10) % 10;
+                    percentage_report[2] = '0' + (int)(pct) % 10;
+                    percentage_report[4] = '0' + (int)(pct * 10) % 10;
+                    percentage_report[5] = '0' + (int)(pct * 100) % 10;
+                    char * output = percentage_report;
+                    if (cr < 100) {
+                        ++output;
+                    }
+                    while (write(2, output, strlen(output)) < 0 && errno == EINTR) {
+                    }
+                }
+                else {
+                    fprintf( msgout,  "DONE\n" );
+                }
+            }
+            break;
+
+        case 1:
+          if ( errorlevel.load() < err_tresh ) fprintf( msgout,  "DONE\n" );
+            else fprintf( msgout,  "ERROR\n" );
+            break;
+
+        case 2:
+            fprintf( msgout,  "\n----------------------------------------\n" );
+            if ( errorlevel.load() < err_tresh ) fprintf( msgout,  "-> %s OK\n", actionmsg );
+            break;
+    }
+
+    switch ( errorlevel.load() )
+    {
+        case 0:
+            errtypemsg = "none";
+            break;
+
+        case 1:
+            if ( errorlevel.load() < err_tresh )
+                errtypemsg = "warning (ignored)";
+            else
+                errtypemsg = "warning (skipped file)";
+            break;
+
+        case 2:
+            errtypemsg = "fatal error";
+            break;
+    }
+
+    if ( errorlevel.load() > 0 )
+    {
+        if (false && action != socketserve && action != forkserve) {
+            fprintf( stderr, " %s:\n", errtypemsg  );
+            fprintf( stderr, " %s\n", errormessage.c_str() );
+            if ( verbosity > 1 )
+                fprintf( stderr, " (in file \"%s\")\n", filelist[ file_no ] );
+        }
+    }
+  //  if ( (verbosity > 0) && (errorlevel.load() < err_tresh) )
+    if ( action == comp )
+    {
+        fprintf( msgout,  " time taken  : %7i msec\n", speed );
+        fprintf( msgout,  " byte per ms : %7i byte\n", bpms );
+        fprintf( msgout,  " comp. ratio : %7.2f %%\n", cr );
+    }
+
+    if ( ( verbosity > 1 ) && ( action == comp ) )
+        fprintf( msgout,  "\n" );
+    LeptonDebug::dumpDebugData();
+    if (errorlevel.load()) {
+        custom_exit(ExitCode::UNSUPPORTED_JPEG); // custom exit will delete generic_workers
+    } else {
+      //  custom_exit(ExitCode::SUCCESS);
+    }
+    // reset buffers
+    //reset_buffers();
+}*/
+
+void process_file_2() {
+    clock_t begin = 0, end = 1;
+    const char* actionmsg = NULL;
+    const char* errtypemsg = NULL;
+    int speed, bpms;
+    float cr;
+
+    // main function routine
+    errorlevel.store(0);
+    jpgfilesize = 0;
+    ujgfilesize = 0;
+
+    const char* ifilename = filelist[0];
+    int filecnt = 0;
+
+    //--------------------------------------------------------
+    // add by zyl
+
+    std::vector<std::string> JPEGFile_name(1000);
+    std::vector<std::string> JPEGFile_dir(1000);
+
+    struct dirent* ent = nullptr;
+    DIR* benchDir = nullptr;
+
+    benchDir = opendir(ifilename);
+    while (ent = readdir(benchDir)) {
+        if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
+        std::string str1 = ifilename;
+        std::string str2 = ent->d_name;
+        JPEGFile_dir[filecnt] = str1;
+        JPEGFile_name[filecnt] = str2; //+"/"+str2;
+        filecnt++;
+    }
+
+    if (opendir("./lep") == nullptr) {
+        if (system("mkdir -p ./lep/") == -1) std::cout << "ERROR: create dir!" << std::endl;
+    };
+
+    std::vector<IOUtil::FileWriter*> ujg_out(filecnt);
+    std::vector<int> size(filecnt);
+    std::vector<uint8_t*> datatoDDR(filecnt);
+    std::vector<IOUtil::FileWriter*> writer_tmp(filecnt);
+    std::vector<int> fdout(filecnt);
+
+    for (int i = 0; i < filecnt; i++) {
+        std::string tmp = JPEGFile_dir[i] + "/" + JPEGFile_name[i];
+        int err = load_dat<uint8_t>(datatoDDR[i], tmp, size[i]);
+        if (err) {
+            printf("Alloc buf failed!\n");
+        }
+        std::string ofile = "./lep/" + JPEGFile_name[i] + ".lep";
+        fdout[i] = open(ofile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, S_IWUSR | S_IRUSR);
+        writer_tmp[i] = IOUtil::BindFdToWriter(fdout[i], false);
+        ujg_out[i] = writer_tmp[i];
+    }
+
+    filetype = JPEG;
+    begin = clock();
+    if (filetype == JPEG) {
+        if (ofiletype == LEPTON) {
+            if (!g_encoder) {
+                g_encoder.reset(makeEncoder(g_threaded, g_threaded));
+                TimingHarness::timing[0][TimingHarness::TS_MODEL_INIT] = TimingHarness::get_time_us();
+                g_decoder = NULL;
+            } else if (g_threaded && (action == socketserve || action == forkserve)) {
+                g_encoder->registerWorkers(get_worker_threads(NUM_THREADS - 1), NUM_THREADS - 1);
+            }
+        } else {
+            custom_exit(ExitCode::UNSUPPORTED_JPEG);
+        }
+    } else {
+        custom_exit(ExitCode::UNSUPPORTED_JPEG);
+    }
+#ifndef _WIN32
+    // FIXME
+    if (g_time_bound_ms) {
+        struct itimerval bound;
+        bound.it_value.tv_sec = g_time_bound_ms / 1000;
+        bound.it_value.tv_usec = (g_time_bound_ms % 1000) * 1000;
+        bound.it_interval.tv_sec = 0;
+        bound.it_interval.tv_usec = 0;
+        int ret = setitimer(ITIMER_REAL, &bound, NULL);
+
+        assert(ret == 0 && "Timer must be able to be set");
+        if (ret != 0) {
+            exit((int)ExitCode::OS_ERROR);
+        }
+    }
+#endif
+    if (g_unkillable) { // only set this after the time bound has been set
+        if (!g_time_bound_ms) {
+            fprintf(stderr, "Only allowed to set unkillable for items with a time bound\n");
+            exit(1);
+        }
+        signal(SIGTERM, &sig_nop);
+#ifndef _WIN32
+        signal(SIGQUIT, &sig_nop);
+#endif
+    }
+
+    if (g_use_seccomp) {
+        Sirikata::installStrictSyscallFilter(true);
+    }
+#ifndef _WIN32
+    if (g_inject_syscall_test == 1) {
+        char buf[128 + 1];
+        buf[sizeof(buf) - 1] = 0;
+        char* ret = getcwd(buf, sizeof(buf) - 1);
+        (void)ret;
+    }
+#endif
+    // get specific action message
+
+    if (filetype == JPEG) {
+        actionmsg = "Writing to LEPTON\n";
+    }
+
+    if (verbosity > 0) {
+        while (write(2, actionmsg, strlen(actionmsg)) < 0 && errno == EINTR) {
+        }
+    }
+
+    std::vector<std::pair<uint32_t, uint32_t> > huff_input_offset;
+    if (filetype == JPEG) {
+        switch (action) {
+            case comp:
+            case forkserve:
+            case socketserve:
+                timing_operation_start('c');
+                TimingHarness::timing[0][TimingHarness::TS_READ_STARTED] = TimingHarness::get_time_us();
+                {
+                    TimingHarness::timing[0][TimingHarness::TS_JPEG_DECODE_STARTED] =
+                        TimingHarness::timing[0][TimingHarness::TS_READ_FINISHED] = TimingHarness::get_time_us();
+                    //                    std::vector<ThreadHandoff> luma_row_offsets;
+                    std::vector<uint8_t*> res(filecnt);
+                    std::vector<uint32_t> left(filecnt);
+                    std::vector<uint32_t> rst(filecnt);
+                    std::vector<struct_arith> arith(filecnt);
+
+                    for (int i = 0; i < filecnt; i++) {
+                        res[i] = (uint8_t*)malloc(MAX_NUM_PIX);
+                    }
+
+                    //                    for(int i=0;i<filecnt;i++){
+                    hls_decode_jpeg_kernel(xclbin_path, filecnt, datatoDDR, size, arith, res, left, rst);
+                    //                    }
+                    TimingHarness::timing[0][TimingHarness::TS_JPEG_DECODE_FINISHED] = TimingHarness::get_time_us();
+                    // to test cosim
+                    fprintf(stderr, "=========== arith print ==========\n");
+                    fprintf(stderr, "count = %d\n", arith[0].count);
+                    fprintf(stderr, "value = %d\n", arith[0].value);
+                    fprintf(stderr, "pre_byte = %d\n", arith[0].pre_byte);
+                    fprintf(stderr, "run = %d\n", arith[0].run);
+                    fprintf(stderr, "pos = %d\n", arith[0].pos);
+                    fprintf(stderr, "range = %d\n", arith[0].range);
+                    fprintf(stderr, "isFirst = %d\n", arith[0].isFirst);
+                    //  for(int pos=0; pos<arith.pos; pos++){
+                    //      fprintf( stderr, " %.4x\n" , *(res+pos));
+                    //  }
+                    fprintf(stderr, "============ end print ==========\n");
+
+                    std::vector<ThreadHandoff> myHandoff(filecnt);
+                    for (int i = 0; i < filecnt; i++) {
+                        myHandoff[i].luma_y_start = 0;
+                        myHandoff[i].luma_y_end = 1;
+                        myHandoff[i].segment_size = left[i] - 2;
+                        myHandoff[i].overhang_byte = 0;
+                        myHandoff[i].num_overhang_bits = 0;
+                        myHandoff[i].last_dc[0] = 0;
+                        myHandoff[i].last_dc[1] = 0;
+                        myHandoff[i].last_dc[2] = 0;
+                        write_ujpg_2(size[i], ujg_out[i], arith[i], res[i], datatoDDR[i], size[i] - left[i] - 2, rst[i],
+                                     myHandoff[i]);
+                    }
+                    // free(datatoDDR);
+                }
+                timing_operation_complete('c');
+                break;
+
+            case info: {
+                unsigned int jpg_ident_offset = 2;
+                ibytestream str_jpg_in(str_in, jpg_ident_offset, Sirikata::JpegAllocator<uint8_t>());
+                execute(std::bind(read_jpeg_wrapper, &huff_input_offset, &str_jpg_in));
+            }
+                execute(write_info);
+                break;
+        }
+    }
+
+    if (!fast_exit) {
+        // close iostreams
+        if (str_in != NULL) delete (str_in);
+        str_in = NULL;
+        if (str_out != NULL) delete (str_out);
+        str_out = NULL;
+        if (ujg_out[0] != NULL) delete (ujg_out[0]);
+        ujg_out[0] = NULL;
+        if (ujg_out[1] != NULL) delete (ujg_out[1]);
+        ujg_out[1] = NULL;
+        // delete if broken or if output not needed
+        if ((!pipe_on) &&
+            ((errorlevel.load() >= err_tresh) || (action != comp && action != forkserve && action != socketserve))) {
+            // FIXME: can't delete broken output--it's gone already
+        }
+    }
+    TimingHarness::timing[0][TimingHarness::TS_DONE] = TimingHarness::get_time_us();
+    TimingHarness::print_results();
+    // if (!g_use_seccomp) {
+    end = clock();
+    //}
+    {
+        size_t bound = decompression_memory_bound();
+        char bound_out[] = "XXXXXXXXXX bytes needed to decompress this file\n";
+        bound_out[0] = '0' + (bound / 1000000000) % 10;
+        bound_out[1] = '0' + (bound / 100000000) % 10;
+        bound_out[2] = '0' + (bound / 10000000) % 10;
+        bound_out[3] = '0' + (bound / 1000000) % 10;
+        bound_out[4] = '0' + (bound / 100000) % 10;
+        bound_out[5] = '0' + (bound / 10000) % 10;
+        bound_out[6] = '0' + (bound / 1000) % 10;
+        bound_out[7] = '0' + (bound / 100) % 10;
+        bound_out[8] = '0' + (bound / 10) % 10;
+        bound_out[9] = '0' + (bound / 1) % 10;
+        const char* to_write = bound_out;
+        while (to_write[0] == '0') {
+            ++to_write;
+        }
+        while (write(2, to_write, strlen(to_write)) < 0 && errno == EINTR) {
+        }
+    }
+    print_bill(2);
+    // speed and compression ratio calculation
+    printf("%d, %d, \n", begin, end);
+    speed = (int)((double)((end - begin) * 1000) / CLOCKS_PER_SEC);
+    bpms = (speed > 0) ? (jpgfilesize / speed) : jpgfilesize;
+    cr = (jpgfilesize > 0) ? (100.0 * ujgfilesize / jpgfilesize) : 0;
+    switch (verbosity) {
+        case 0:
+            if (errorlevel.load() < err_tresh) {
+                if (action == comp) {
+                    fprintf(stderr, "%d %d\n", (int)ujgfilesize, (int)jpgfilesize);
+                    char percentage_report[] = " XX.XX%\n";
+                    double pct = cr + .005;
+                    percentage_report[0] = '0' + (int)(pct / 100) % 10;
+                    percentage_report[1] = '0' + (int)(pct / 10) % 10;
+                    percentage_report[2] = '0' + (int)(pct) % 10;
+                    percentage_report[4] = '0' + (int)(pct * 10) % 10;
+                    percentage_report[5] = '0' + (int)(pct * 100) % 10;
+                    char* output = percentage_report;
+                    if (cr < 100) {
+                        ++output;
+                    }
+                    while (write(2, output, strlen(output)) < 0 && errno == EINTR) {
+                    }
+                } else {
+                    fprintf(msgout, "DONE\n");
+                }
+            }
+            break;
+
+        case 1:
+            if (errorlevel.load() < err_tresh)
+                fprintf(msgout, "DONE\n");
+            else
+                fprintf(msgout, "ERROR\n");
+            break;
+
+        case 2:
+            fprintf(msgout, "\n----------------------------------------\n");
+            if (errorlevel.load() < err_tresh) fprintf(msgout, "-> %s OK\n", actionmsg);
+            break;
+    }
+
+    switch (errorlevel.load()) {
+        case 0:
+            errtypemsg = "none";
+            break;
+
+        case 1:
+            if (errorlevel.load() < err_tresh)
+                errtypemsg = "warning (ignored)";
+            else
+                errtypemsg = "warning (skipped file)";
+            break;
+
+        case 2:
+            errtypemsg = "fatal error";
+            break;
+    }
+
+    if (errorlevel.load() > 0) {
+        if (false && action != socketserve && action != forkserve) {
+            fprintf(stderr, " %s:\n", errtypemsg);
+            fprintf(stderr, " %s\n", errormessage.c_str());
+            if (verbosity > 1) fprintf(stderr, " (in file \"%s\")\n", filelist[file_no]);
+        }
+    }
+    //  if ( (verbosity > 0) && (errorlevel.load() < err_tresh) )
+    if (action == comp) {
+        fprintf(msgout, " time taken  : %7i msec\n", speed);
+        fprintf(msgout, " byte per ms : %7i byte\n", bpms);
+        fprintf(msgout, " comp. ratio : %7.2f %%\n", cr);
+    }
+
+    if ((verbosity > 1) && (action == comp)) fprintf(msgout, "\n");
+    LeptonDebug::dumpDebugData();
+    if (errorlevel.load()) {
+        custom_exit(ExitCode::UNSUPPORTED_JPEG); // custom exit will delete generic_workers
+    } else {
+        //  custom_exit(ExitCode::SUCCESS);
+    }
+    // reset buffers
+    // reset_buffers();
+}
+
+/* -----------------------------------------------
+    main-function execution routine
+    ----------------------------------------------- */
+
+void execute(const std::function<bool()>& function) {
+    clock_t begin = 0, end = 0;
+    bool success;
+
+    if (errorlevel.load() < err_tresh) {
+        // get statusmessage
+        // function();
+        // write statusmessage
+        // set starttime
+        if (!g_use_seccomp) {
+            begin = clock();
+        }
+        // call function
+        success = function();
+        // set endtime
+        if (!g_use_seccomp) {
+            end = clock();
+        }
+
+        // write statusmessage
+        if (success) {
+            if (verbosity == 2 && !g_use_seccomp) {
+                fprintf(msgout, "%6ims", (int)((double)((end - begin) * 1000) / CLOCKS_PER_SEC));
+            }
+        } else {
+            if (verbosity == 2) {
+                while (write(2, "ERROR\n", strlen("ERROR\n")) < 0 && errno == EINTR) {
+                }
+            }
+        }
+    }
+}
+
+/* -----------------------------------------------
+    shows help in case of wrong input
+    ----------------------------------------------- */
+
+void show_help(void) {
+    fprintf(msgout, "Usage: %s [switches] -in input_file [output_file]", appname);
+    fprintf(msgout, "\n");
+    fprintf(msgout, "\n");
+    fprintf(msgout, " [-version]       Version of lepton codec\n");
+    fprintf(msgout, " [-revision]      Source revision of lepton binary\n");
+    fprintf(msgout, " [-unjailed]      Do not jail this process (use only with trusted data)\n");
+    fprintf(msgout, " [-singlethread]  Do not clone threads to operate on the input file\n");
+    fprintf(msgout, " [-maxchildren]   Max codes to ever spawn at the same time in socket mode\n");
+    fprintf(msgout, " [-preload]       Preload decoding code\n");
+    fprintf(msgout, " [-unkillable]    Ignore SIGTERM and SIGQUIT after alarm timer is set\n");
+    fprintf(msgout, " [-allowprogressive] Allow progressive jpegs through the compressor\n");
+    fprintf(msgout, " [-fork]          Serve requests on a series of pipes [deprecated]\n");
+    fprintf(msgout, " [-zlib0]         Instead of a jpg, return a zlib-compressed jpeg\n");
+    fprintf(msgout, " [-timebound=<>ms]For -socket, enforce a timeout since first byte received\n");
+    fprintf(msgout, " [-trunc=<>]      Truncate input file to N bytes and do not read further\n");
+    fprintf(msgout, " [-memory=<>M]    Upper bound on the amount of memory allocated by main\n");
+    fprintf(msgout, " [-threadmemory=<>M] Bound on the amount of memory allocated by threads\n");
+    fprintf(msgout, " [-hugepages]     Allocate from the hugepages on the system\n");
+    fprintf(msgout, " [-avx2upgrade]   Try to exec <binaryname>-avx if avx is available\n");
+    fprintf(msgout, " [-injectsyscall={1..4}]  Inject a \"chdir\" syscall & check SECCOMP crashes\n");
+    fprintf(msgout, " [-socket]        Serve requests on a Unix Domain Socket\n");
+    fprintf(msgout, " [-socket=<name>] Path to socket (otherwise random path used and printed)\n");
+    fprintf(msgout, " [-listen]        Serve requests on a TCP socket on port 2402\n");
+    fprintf(msgout, " [-listen=<port>] Serve requests on a TCP socket on port <port>\n");
+    fprintf(msgout, " [-zliblisten]        Serve requests on a TCP socket oi port 2403\n");
+    fprintf(msgout, " [-zliblisten=<port>] Serve requests on a TCP socket on port <port>\n");
+    fprintf(msgout, " [-recodememory=<>M] Check that a singlethreaded recode only uses <>M mem\n");
+}
+
+/* ----------------------- End of main interface functions -------------------------- */
+
+/* ----------------------- Begin of main functions -------------------------- */
+
+void nop(Sirikata::DecoderWriter* w, size_t) {}
+void static_cast_to_zlib_and_call(Sirikata::DecoderWriter* w, size_t size) {
+    (static_cast<Sirikata::Zlib0Writer*>(w))->setFullFileSize(size);
+}
+/* -----------------------------------------------
+    check file and determine filetype
+    ----------------------------------------------- */
+unsigned char read_fixed_ujpg_header() {
+    Sirikata::Array1d<unsigned char, 22> header;
+    header.memset(0);
+
+    if (IOUtil::ReadFull(str_in, header.begin(), 22) != 22) {
+        custom_exit(ExitCode::SHORT_READ);
+    }
+    // check version number
+    if (header[0] != 1 && header[0] != 2 && header[0] != ujgversion) {
+        // let us roll out a new version gently
+        fprintf(stderr, "incompatible file, use %s v%i.%i", appname, header[0] / 10, header[0] % 10);
+        custom_exit(ExitCode::VERSION_UNSUPPORTED);
+    }
+    ujgversion = header[0];
+    if (header[1] == 'X') {
+    } else if (header[1] != 'Z' && header[1] != 'Y') {
+        char err[] = "?: Unknown Item in header instead of Z";
+        err[0] = header[1];
+        while (write(2, err, sizeof(err) - 1) < 0 && errno == EINTR) {
+        }
+    }
+    if (header[1] == 'Z' || (header[1] & 1) == ('Y' & 1)) {
+        if (!g_force_progressive) {
+            g_allow_progressive = false;
+        }
+    }
+    unsigned char num_threads_hint = header[2];
+    always_assert(num_threads_hint != 0);
+    if (num_threads_hint < NUM_THREADS && num_threads_hint != 0) {
+        NUM_THREADS = num_threads_hint;
+    }
+    // full size of the original file
+    Sirikata::Array1d<unsigned char, 4>::Slice file_size = header.slice<18, 22>();
+    max_file_size = LEtoUint32(file_size.begin());
+    return NUM_THREADS;
+}
+
+bool check_file(IOUtil::FileWriter* ujg_out,
+                int fd_in,
+                int fd_out,
+                uint32_t max_file_size,
+                bool force_zlib0,
+                Sirikata::Array1d<uint8_t, 2> fileid,
+                bool is_socket) {
+    IOUtil::FileReader* reader = IOUtil::BindFdToReader(fd_in, max_file_size, is_socket);
+    if (!reader) {
+        custom_exit(ExitCode::FILE_NOT_FOUND);
+    }
+    reader->mark_some_bytes_already_read((uint32_t)fileid.size());
+    if (is_socket) {
+        assert(fd_in == fd_out);
+    }
+    IOUtil::FileWriter* writer = IOUtil::BindFdToWriter(fd_out, is_socket);
+    ujg_base_in = reader;
+    // check file id, determine filetype
+    if (is_jpeg_header(fileid)) {
+        str_in = new Sirikata::BufferedReader<JPG_READ_BUFFER_SIZE>(reader);
+        // file is JPEG
+        filetype = JPEG;
+        NUM_THREADS = std::min(NUM_THREADS, (unsigned int)max_encode_threads);
+        // open output stream, check for errors
+        std::cout << "^^^^^^^^" << std::endl;
+        ujg_out = writer;
+    } else if (((fileid[0] == ujg_header[0]) && (fileid[1] == ujg_header[1])) ||
+               ((fileid[0] == lepton_header[0]) && (fileid[1] == lepton_header[1])) ||
+               ((fileid[0] == zlepton_header[0]) && (fileid[1] == zlepton_header[1]))) {
+        str_in = reader;
+        bool compressed_output = (fileid[0] == zlepton_header[0]) && (fileid[1] == zlepton_header[1]);
+        compressed_output = compressed_output || g_force_zlib0_out || force_zlib0;
+        // file is UJG
+        filetype = ((fileid[0] == ujg_header[0]) && (fileid[1] == ujg_header[1])) ? UJG : LEPTON;
+        std::function<void(Sirikata::DecoderWriter*, size_t file_size)> known_size_callback = &nop;
+        Sirikata::DecoderWriter* write_target = writer;
+        if (compressed_output) {
+            Sirikata::Zlib0Writer* zwriter = new Sirikata::Zlib0Writer(writer, 0);
+            known_size_callback = &static_cast_to_zlib_and_call;
+            write_target = zwriter;
+        }
+        str_out = new bounded_iostream(write_target, known_size_callback, Sirikata::JpegAllocator<uint8_t>());
+        if (str_out->chkerr()) {
+            fprintf(stderr, FWR_ERRMSG, filelist[file_no]);
+            errorlevel.store(2);
+            return false;
+        }
+    } else {
+        // file is neither
+        filetype = UNK;
+        fprintf(stderr, "filetype of file \"%s\" is unknown", filelist[file_no]);
+        errorlevel.store(2);
+        return false;
+    }
+
+    return true;
+}
+
+bool is_needed_for_second_block(const std::vector<unsigned char>& segment) {
+    if (segment.size() <= 2) {
+        return true; // don't understand this type of header
+    }
+    if (segment[0] != 0xff) {
+        return true; // don't understand this type of header
+    }
+    switch (segment[1]) {
+        case 0xC4: // DHT segment
+        case 0xDB: // DQT segment
+        case 0xDD: // DRI segment
+        case 0xDA: // Start of scan
+        case 0xC0:
+        case 0xC1:
+        case 0xC2:
+            return true;
+        case 0xD8:
+        case 0xD9:
+            assert(false && "This should be filtered out by the previous loop");
+            return true;
+        default:
+            return false;
+    }
+}
+/* -----------------------------------------------
+    Read in header & image data
+    ----------------------------------------------- */
+unsigned char EOI[2] = {0xFF, 0xD9}; // EOI segment
+template <class input_byte_stream>
+bool read_jpeg(std::vector<std::pair<uint32_t, uint32_t> >* huff_input_offsets, input_byte_stream* jpg_in) {
+    std::vector<unsigned char> segment(1024); // storage for current segment
+    unsigned char type = 0x00;                // type of current marker segment
+    unsigned int len = 0;                     // length of current marker segment
+    unsigned int crst = 0;                    // current rst marker counter
+    unsigned int cpos = 0;                    // rst marker counter
+    unsigned char tmp;
+
+    abytewriter* huffw;
+    abytewriter* hdrw;
+    abytewriter* grbgw;
+
+    // preset count of scans
+    scnc = 0;
+    // start headerwriter
+    hdrw = new abytewriter(4096);
+    hdrs = 0; // size of header data, start with 0
+
+    // start huffman writer
+    huffw = new abytewriter(0);
+    hufs = 0; // size of image data, start with 0
+
+    // JPEG reader loop
+    while (true) {
+        if (type == 0xDA) { // if last marker was sos
+            // switch to huffman data reading mode
+            cpos = 0;
+            crst = 0;
+            while (true) {
+                huff_input_offsets->push_back(std::pair<uint32_t, uint32_t>(huffw->getpos(), jpg_in->getsize()));
+                // read byte from imagedata
+                if (jpg_in->read_byte(&tmp) == false) {
+                    early_eof(hdrw, huffw);
+                    fprintf(stderr, "Early EOF\n");
+                    break;
+                }
+                // non-0xFF loop
+                if (tmp != 0xFF) {
+                    crst = 0;
+                    while (tmp != 0xFF) {
+                        huffw->write(tmp);
+                        if (jpg_in->read_byte(&tmp) == false) {
+                            early_eof(hdrw, huffw);
+                            break;
+                        }
+                    }
+                }
+
+                // treatment of 0xFF
+                if (tmp == 0xFF) {
+                    if (jpg_in->read_byte(&tmp) == false) {
+                        early_eof(hdrw, huffw);
+                        break; // read next byte & check
+                    }
+                    if (tmp == 0x00) {
+                        crst = 0;
+                        // no zeroes needed -> ignore 0x00. write 0xFF
+                        huffw->write(0xFF);
+                        write_byte_bill(Billing::DELIMITERS, false, 1);
+                    } else if (tmp == 0xD0 + (cpos & 7)) { // restart marker
+                        // increment rst counters
+                        write_byte_bill(Billing::DELIMITERS, false, 2);
+                        cpos++;
+                        crst++;
+                        while (rst_cnt.size() <= (size_t)scnc) {
+                            rst_cnt.push_back(0);
+                        }
+                        ++rst_cnt.at(scnc);
+                    } else { // in all other cases leave it to the header parser routines
+                        // store number of falsely set rst markers
+                        if ((int)rst_err.size() < scnc) {
+                            rst_err.insert(rst_err.end(), scnc - rst_err.size(), 0);
+                        }
+                        rst_err.push_back(crst);
+                        // end of current scan
+                        scnc++;
+                        always_assert(rst_err.size() == (size_t)scnc && "All reset errors must be accounted for");
+                        // on with the header parser routines
+                        segment[0] = 0xFF;
+                        segment[1] = tmp;
+                        break;
+                    }
+                } else {
+                    // otherwise this means end-of-file, so break out
+                    break;
+                }
+            }
+        } else {
+            // read in next marker
+            if (jpg_in->read(segment.data(), 2) != 2) break;
+            if (segment[0] != 0xFF) {
+                // ugly fix for incorrect marker segment sizes
+                fprintf(stderr, "size mismatch in marker segment FF %2X", type);
+                errorlevel.store(2);
+                if (type == 0xFE) { //  if last marker was COM try again
+                    if (jpg_in->read(segment.data(), 1) != 1) break;
+                    if (segment[0] == 0xFF) errorlevel.store(1);
+                }
+                if (errorlevel.load() == 2) {
+                    delete (hdrw);
+                    delete (huffw);
+                    return false;
+                }
+            }
+        }
+
+        // read segment type
+        type = segment[1];
+
+        // if EOI is encountered make a quick exit
+        if (type == EOI[1]) {
+            standard_eof(hdrw, huffw);
+            // everything is done here now
+            break;
+        }
+
+        // read in next segments' length and check it
+        if (jpg_in->read(segment.data() + 2, 2) != 2) break;
+        len = 2 + B_SHORT(segment[2], segment[3]);
+        if (len < 4) break;
+
+        // realloc segment data if needed
+        segment.resize(len);
+
+        // read rest of segment, store back in header writer
+        if (jpg_in->read((segment.data() + 4), (len - 4)) != (unsigned short)(len - 4)) break;
+        if (start_byte == 0 || is_needed_for_second_block(segment)) {
+            hdrw->write_n(segment.data(), len);
+        }
+    }
+    // JPEG reader loop end
+
+    // free writers
+    // delete ( hdrw );
+    // delete ( huffw );
+
+    // check if everything went OK
+    if (hdrs == 0) {
+        fprintf(stderr, "unexpected end of data encountered in header");
+        errorlevel.store(2);
+        return false;
+    }
+    if (hufs == 0) {
+        fprintf(stderr, "unexpected end of data encountered in huffman");
+        errorlevel.store(2);
+        return false;
+    }
+
+    // store garbage at EOI
+    grbgw = new abytewriter(1024);
+    unsigned char grb0 = jpg_in->get_penultimate_read();
+    unsigned char grb1 = jpg_in->get_last_read();
+    grbgw->write(grb0); // should be 0xff (except if truncated)
+    grbgw->write(grb1); // should be d9 (except if truncated)
+    while (true) {
+        len = jpg_in->read(segment.data(), segment.size());
+        if (len == 0) break;
+        grbgw->write_n(segment.data(), len);
+    }
+    grbgdata = grbgw->getptr_aligned();
+    grbs = grbgw->getpos();
+    // delete ( grbgw );
+    if (grbs == sizeof(EOI) && 0 == memcmp(grbgdata, EOI, sizeof(EOI))) {
+        grbs = 0;
+        aligned_dealloc(grbgdata);
+        grbgdata = NULL;
+    }
+
+    // get filesize
+    jpgfilesize = jpg_in->getsize();
+
+    // parse header for image info
+    if (!setup_imginfo_jpg(false)) {
+        return false;
+    }
+
+    return true;
+}
+
+enum MergeJpegStreamingStatus {
+    STREAMING_ERROR = 0,
+    STREAMING_SUCCESS = 1,
+    STREAMING_NEED_DATA = 2,
+    STREAMING_DISABLED = 3
+};
+bool aligned_memchr16ff(const unsigned char* local_huff_data) {
+#if 1
+    __m128i buf = _mm_load_si128((__m128i const*)local_huff_data);
+    __m128i ff = _mm_set1_epi8(-1);
+    __m128i res = _mm_cmpeq_epi8(buf, ff);
+    uint32_t movmask = _mm_movemask_epi8(res);
+    bool retval = movmask != 0x0;
+    assert(retval == (memchr(local_huff_data, 0xff, 16) != NULL));
+    return retval;
+#endif
+    return memchr(local_huff_data, 0xff, 16) != NULL;
+}
+unsigned char hex_to_nibble(char val) {
+    if (val >= 'A' && val <= 'F') {
+        return val - 'A' + 10;
+    }
+    if (val >= 'a' && val <= 'f') {
+        return val - 'a' + 10;
+    }
+    return val - '0';
+}
+unsigned char hex_pair_to_byte(char big, char little) {
+    return hex_to_nibble(big) * 16 + hex_to_nibble(little);
+}
+bool hex_to_bin(unsigned char* output, const char* input, size_t output_size) {
+    size_t i = 0;
+    for (; i < output_size && input[i * 2] && input[i * 2 + 1]; ++i) {
+        output[i] = hex_pair_to_byte(input[i * 2], input[i * 2 + 1]);
+    }
+    return i == output_size;
+}
+bool rst_cnt_ok(int scan, unsigned int num_rst_markers_this_scan) {
+    if (rstp.empty()) {
+        return false;
+    }
+    if (!rst_cnt_set) {
+        return true;
+    }
+    return rst_cnt.size() > (size_t)scan - 1 && num_rst_markers_this_scan < rst_cnt.at(scan - 1);
+}
+
+ThreadHandoff crystallize_thread_handoff(abitreader* reader,
+                                         const std::vector<std::pair<uint32_t, uint32_t> >& huff_input_offsets,
+                                         int mcu_y,
+                                         int lastdc[4],
+                                         int luma_mul) {
+    auto iter = std::lower_bound(huff_input_offsets.begin(), huff_input_offsets.end(),
+                                 std::pair<uint32_t, uint32_t>(reader->getpos(), reader->getpos()));
+    uint32_t mapped_item = 0;
+    if (iter != huff_input_offsets.begin()) {
+        --iter;
+    }
+    if (iter != huff_input_offsets.end()) {
+        mapped_item = iter->second;
+        mapped_item += reader->getpos() - iter->first;
+    }
+    // fprintf(stderr, "ROWx (%08lx): %x -> %x\n", reader->debug_peek(), reader->getpos(), mapped_item);
+    ThreadHandoff retval = ThreadHandoff::zero();
+    retval.segment_size = mapped_item; // the caller will need to take the difference of the chosen items
+    // to compute the actual segment size
+    for (unsigned int i = 0; i < 4 && i < sizeof(retval.last_dc) / sizeof(retval.last_dc[0]); ++i) {
+        retval.last_dc[i] = lastdc[i];
+        retval.luma_y_start = luma_mul * mcu_y;
+        retval.luma_y_end = luma_mul * (mcu_y + 1);
+    }
+
+    std::tie(retval.num_overhang_bits, retval.overhang_byte) = reader->overhang();
+
+    /*
+        fprintf(stderr, "%d: %d -> %d  lastdc %d %d %d size %d overhang %d (cnt: %d)\n",
+                mcu_y,
+                retval.luma_y_start,
+                retval.luma_y_end,
+                retval.last_dc[0],
+                retval.last_dc[1],
+                retval.last_dc[2],
+                retval.segment_size,
+                retval.overhang_byte,
+                retval.num_overhang_bits);
+    */
+    return retval;
+}
+
+MergeJpegStreamingStatus merge_jpeg_streaming(MergeJpegProgress* stored_progress,
+                                              const unsigned char* local_huff_data,
+                                              unsigned int max_byte_coded,
+                                              bool flush) {
+    MergeJpegProgress progress(stored_progress);
+    unsigned char SOI[2] = {0xFF, 0xD8}; // SOI segment
+    // unsigned char EOI[ 2 ] = { 0xFF, 0xD9 }; // EOI segment
+
+    unsigned char type = 0x00; // type of current marker segment
+
+    if (progress.ipos == 0 && progress.hpos == 0 && progress.scan == 1 && progress.within_scan == false) {
+        str_out->set_bound(max_file_size - grbs);
+
+        // write SOI
+        str_out->write(SOI, 2);
+    }
+
+    // JPEG writing loop
+    while (true) {
+        if (!progress.within_scan) {
+            progress.within_scan = true;
+            // store current header position
+            unsigned int tmp; // temporary storage variable
+            tmp = progress.hpos;
+
+            // seek till start-of-scan
+            for (type = 0x00; type != 0xDA;) {
+                if ((int)progress.hpos >= hdrs) break;
+                type = hdrdata[progress.hpos + 1];
+                int len = 2 + B_SHORT(hdrdata[progress.hpos + 2], hdrdata[progress.hpos + 3]);
+                progress.hpos += len;
+            }
+            // write header data to file
+            str_out->write(hdrdata + tmp, (progress.hpos - tmp));
+            if ((!g_use_seccomp) && post_byte == 0) {
+                post_byte = clock();
+            }
+
+            // get out if last marker segment type was not SOS
+            if (type != 0xDA) break;
+
+            // (re)set corrected rst pos
+            progress.cpos = 0;
+            progress.ipos = scnp.at(progress.scan - 1);
+        }
+        if ((int)progress.scan >
+            scnc + 1) { // don't want to go beyond our known number of scans (FIXME: danielrh@ is this > or >= )
+            break;
+        }
+        if (progress.ipos < max_byte_coded) {
+            timing_operation_first_byte('d');
+        }
+        // write & expand huffman coded image data
+        unsigned int progress_ipos = progress.ipos;
+        unsigned int progress_scan = scnp.at(progress.scan);
+        unsigned int rstp_progress_rpos = rstp.empty() ? INT_MAX : rstp[progress.rpos];
+        const unsigned char mrk = 0xFF; // marker start
+        const unsigned char stv = 0x00; // 0xFF stuff value
+        for (; progress_ipos & 0xf; progress_ipos++) {
+            if (__builtin_expect(
+                    !(progress_ipos < max_byte_coded && (progress_scan == 0 || progress_ipos < progress_scan)), 0)) {
+                break;
+            }
+            uint8_t byte_to_write = local_huff_data[progress_ipos];
+            str_out->write_byte(byte_to_write);
+            // check current byte, stuff if needed
+            if (__builtin_expect(byte_to_write == 0xFF, 0)) str_out->write_byte(stv);
+            // insert restart markers if needed
+            if (__builtin_expect(progress_ipos == rstp_progress_rpos, 0)) {
+                if (rst_cnt_ok(progress.scan, progress.num_rst_markers_this_scan)) {
+                    const unsigned char rst = 0xD0 + (progress.cpos & 7);
+                    str_out->write_byte(mrk);
+                    str_out->write_byte(rst);
+                    progress.rpos++;
+                    progress.cpos++;
+                    rstp_progress_rpos = rstp.at(progress.rpos);
+                    ++progress.num_rst_markers_this_scan;
+                }
+            }
+        }
+
+        while (true) {
+            if (__builtin_expect(!(progress_ipos + 15 < max_byte_coded &&
+                                   (progress_scan == 0 || progress_ipos + 15 < progress_scan)),
+                                 0)) {
+                break;
+            }
+            if (__builtin_expect(aligned_memchr16ff(local_huff_data + progress_ipos) ||
+                                     (progress_ipos <= rstp_progress_rpos && progress_ipos + 15 >= rstp_progress_rpos),
+                                 0)) {
+                // insert restart markers if needed
+                for (int veci = 0; veci < 16; ++veci, ++progress_ipos) {
+                    if (__builtin_expect(progress_ipos == rstp_progress_rpos, 0)) {
+                        uint8_t byte_to_write = local_huff_data[progress_ipos];
+                        str_out->write_byte(byte_to_write);
+                        // check current byte, stuff if needed
+                        if (__builtin_expect(byte_to_write == 0xFF, 0)) {
+                            str_out->write_byte(stv);
+                        }
+                        if (rst_cnt_ok(progress.scan, progress.num_rst_markers_this_scan)) {
+                            const unsigned char rst = 0xD0 + (progress.cpos & 7);
+                            str_out->write_byte(mrk);
+                            str_out->write_byte(rst);
+                            progress.rpos++;
+                            progress.cpos++;
+                            rstp_progress_rpos = rstp.at(progress.rpos);
+                            ++progress.num_rst_markers_this_scan;
+                        }
+                    } else {
+                        uint8_t byte_to_write = local_huff_data[progress_ipos];
+                        str_out->write_byte(byte_to_write);
+                        // check current byte, stuff if needed
+                        if (__builtin_expect(byte_to_write == 0xFF, 0)) {
+                            str_out->write_byte(stv);
+                        }
+                    }
+                }
+            } else {
+                str_out->write(local_huff_data + progress_ipos, 16);
+                progress_ipos += 16;
+            }
+        }
+        for (;; progress_ipos++) {
+            if (__builtin_expect(
+                    !(progress_ipos < max_byte_coded && (progress_scan == 0 || progress_ipos < progress_scan)), 0)) {
+                break;
+            }
+            uint8_t byte_to_write = local_huff_data[progress_ipos];
+            str_out->write_byte(byte_to_write);
+            // check current byte, stuff if needed
+            if (__builtin_expect(byte_to_write == 0xFF, 0)) str_out->write_byte(stv);
+            // insert restart markers if needed
+            if (__builtin_expect(progress_ipos == rstp_progress_rpos, 0)) {
+                if (rst_cnt_ok(progress.scan, progress.num_rst_markers_this_scan)) {
+                    const unsigned char rst = 0xD0 + (progress.cpos & 7);
+                    str_out->write_byte(mrk);
+                    str_out->write_byte(rst);
+                    progress.rpos++;
+                    progress.cpos++;
+                    rstp_progress_rpos = rstp.at(progress.rpos);
+                    ++progress.num_rst_markers_this_scan;
+                }
+            }
+        }
+        progress.ipos = progress_ipos;
+        if (scnp.at(progress.scan) == 0 && !flush) {
+            return STREAMING_NEED_DATA;
+        }
+        if (progress.ipos >= max_byte_coded && progress.ipos != scnp.at(progress.scan) && !flush) {
+            return STREAMING_NEED_DATA;
+        }
+        // insert false rst markers at end if needed
+        if (progress.scan - 1 < rst_err.size()) {
+            while (rst_err.at(progress.scan - 1) > 0) {
+                const unsigned char rst = 0xD0 + (progress.cpos & 7);
+                str_out->write_byte(mrk);
+                str_out->write_byte(rst);
+                progress.cpos++;
+                rst_err.at(progress.scan - 1)--;
+            }
+        }
+        progress.num_rst_markers_this_scan = 0;
+        progress.within_scan = false;
+        // proceed with next scan
+        progress.scan++;
+        if (str_out->has_reached_bound()) {
+            check_decompression_memory_bound_ok();
+            break;
+        }
+    }
+
+    // write EOI (now EOI is stored in garbage of at least 2 bytes)
+    // this guarantees that we can stop the write in time.
+    // if it used too much memory
+    // str_out->write( EOI, 1, 2 );
+    str_out->set_bound(max_file_size);
+    check_decompression_memory_bound_ok();
+    // write garbage if needed
+    if (grbs > 0) str_out->write(grbgdata, grbs);
+    check_decompression_memory_bound_ok();
+    str_out->flush();
+
+    // errormessage if write error
+    if (str_out->chkerr()) {
+        fprintf(stderr, "write error, possibly drive is full");
+        errorlevel.store(2);
+        return STREAMING_ERROR;
+    }
+    // get filesize
+
+    jpgfilesize = str_out->getsize();
+    // get filesize
+    if (ujg_base_in) {
+        ujgfilesize = ujg_base_in->getsize();
+    } else {
+        ujgfilesize = 4096 * 1024;
+    }
+#ifndef _WIN32
+    // FIXME
+    if (!g_use_seccomp) {
+        clock_t final = clock();
+        struct timeval fin = {0, 0};
+        gettimeofday(&fin, NULL);
+        double begin = current_operation_begin.tv_sec + (double)current_operation_begin.tv_usec / 1000000.;
+        double end = fin.tv_sec + (double)fin.tv_usec / 1000000.;
+        double first_byte =
+            current_operation_first_byte.tv_sec + (double)current_operation_first_byte.tv_usec / 1000000.;
+        double begin_to_end = end - begin;
+        double begin_to_first_byte = begin_to_end;
+        if (current_operation_first_byte.tv_sec != 0) { // if we were successful
+            begin_to_first_byte = first_byte - begin;
+        }
+
+        fprintf(stderr, "TIMING (new method): %f to first byte %f total\n", begin_to_first_byte, begin_to_end);
+        (void)final;
+        /*
+                fprintf(stderr, "TIMING (recode): %f to first byte %f total\n",
+                        (double)(post_byte - pre_byte)/(double)CLOCKS_PER_SEC,
+                        (final - pre_byte)/(double)CLOCKS_PER_SEC);
+                fprintf(stderr, "TIMING(overall): %f to first byte %f total\n",
+                        (post_byte - overall_start)/(double)CLOCKS_PER_SEC,
+                        (final - overall_start)/(double)CLOCKS_PER_SEC);
+        */
+        fprintf(stderr, "Read took: %f\n", (read_done - overall_start) / (double)CLOCKS_PER_SEC);
+    }
+#endif
+    return STREAMING_SUCCESS;
+}
+
+/* -----------------------------------------------
+    JPEG decoding routine
+    ----------------------------------------------- */
+
+bool decode_jpeg(const std::vector<std::pair<uint32_t, uint32_t> >& huff_input_offsets,
+                 std::vector<ThreadHandoff>* luma_row_offset_return) {
+    abitreader* huffr; // bitwise reader for image data
+
+    unsigned char type = 0x00; // type of current marker segment
+    unsigned int len = 0;      // length of current marker segment
+    unsigned int hpos = 0;     // current position in header
+
+    int lastdc[4] = {0, 0, 0, 0};                   // last dc for each component
+    Sirikata::Aligned256Array1d<int16_t, 64> block; // store block for coeffs
+    int peobrun;                                    // previous eobrun
+    unsigned int eobrun;                            // run of eobs
+    int rstw;                                       // restart wait counter
+
+    int cmp, bpos, dpos;
+    int mcu = 0, sub, csc;
+    int eob, sta;
+    bool is_baseline = true;
+    max_cmp = 0;                           // the maximum component in a truncated image
+    max_bpos = 0;                          // the maximum band in a truncated image
+    memset(max_dpos, 0, sizeof(max_dpos)); // the maximum dpos in a truncated image
+    max_sah = 0;                           // the maximum bit in a truncated image
+
+    // open huffman coded image data for input in abitreader
+    huffr = new abitreader(huffdata, hufs);
+    // preset count of scans
+    scnc = 0;
+
+    // JPEG decompression loop
+    //  g_loops.START("LOOP:decompression", WHILE);//g_loops.END();
+    while (true) {
+        // seek till start-of-scan, parse only DHT, DRI and SOS
+        for (type = 0x00; type != 0xDA;) {
+            if ((int)hpos >= hdrs) break;
+            type = hdrdata[hpos + 1];
+            len = 2 + B_SHORT(hdrdata[hpos + 2], hdrdata[hpos + 3]);
+            if ((type == 0xC4) || (type == 0xDA) || (type == 0xDD)) {
+                if (!parse_jfif_jpg(type, len, &(hdrdata[hpos]))) {
+                    delete huffr;
+                    return false;
+                }
+            }
+            hpos += len;
+        }
+
+        // get out if last marker segment type was not SOS
+        if (type != 0xDA) break;
+
+        // check if huffman tables are available
+        for (csc = 0; csc < cs_cmpc; csc++) {
+            cmp = cs_cmp[csc];
+            if (((cs_sal == 0) && (htset[0][cmpnfo[cmp].huffdc] == 0)) ||
+                ((cs_sah > 0) && (htset[1][cmpnfo[cmp].huffac] == 0))) {
+                fprintf(stderr, "huffman table missing in scan%i", scnc);
+                delete huffr;
+                errorlevel.store(2);
+                return false;
+            }
+        }
+
+        // intial variables set for decoding
+        cmp = cs_cmp[0];
+        csc = 0;
+        mcu = 0;
+        sub = 0;
+        dpos = 0;
+        if (!huffr->eof) {
+            max_bpos = std::max(max_bpos, cs_to);
+            // FIXME: not sure why only first bit of cs_sah is examined but 4 bits of it are stored
+            max_sah = std::max(max_sah, std::max(cs_sal, cs_sah));
+            for (int i = 0; i < cs_cmpc; ++i) {
+                max_cmp = std::max(max_cmp, cs_cmp[i]);
+            }
+        }
+        /*
+                // startup
+                luma_row_offset_return->push_back(crystallize_thread_handoff(huffr,
+                                                                             huff_input_offsets,
+                                                                             mcu / mcuh,
+                                                                             lastdc,
+                                                                             cmpnfo[0].bcv / mcuv));
+        */
+        bool do_handoff_print = true;
+        // JPEG imagedata decoding routines
+        while (true) {
+            // (re)set last DCs for diff coding
+            lastdc[0] = 0;
+            lastdc[1] = 0;
+            lastdc[2] = 0;
+            lastdc[3] = 0;
+
+            // (re)set status
+            sta = 0;
+
+            // (re)set eobrun
+            eobrun = 0;
+            peobrun = 0;
+
+            // (re)set rst wait counter
+            rstw = rsti;
+            if (cs_cmpc != colldata.get_num_components()) {
+                if (!g_allow_progressive) {
+                    custom_exit(ExitCode::PROGRESSIVE_UNSUPPORTED);
+                } else {
+                    is_baseline = false;
+                }
+            }
+
+            if (jpegtype != 1) {
+                if (!g_allow_progressive) {
+                    custom_exit(ExitCode::PROGRESSIVE_UNSUPPORTED);
+                } else {
+                    is_baseline = false;
+                }
+            }
+            // decoding for interleaved data
+            if (cs_cmpc > 1) {
+                if (jpegtype == 1) {
+                    //                            std::cout<<"************"<<std::endl;
+                    // ---> sequential interleaved decoding <---
+                    while (sta == 0) {
+                        if (do_handoff_print) {
+                            luma_row_offset_return->push_back(crystallize_thread_handoff(
+                                huffr, huff_input_offsets, mcu / mcuh, lastdc, cmpnfo[0].bcv / mcuv));
+                            do_handoff_print = false;
+                        }
+
+                        if (!huffr->eof) {
+                            max_dpos[cmp] = std::max(dpos, max_dpos[cmp]); // record the max block read
+                        }
+                        // decode block
+                        eob = decode_block_seq(huffr, &(htrees[0][cmpnfo[cmp].huffdc]),
+                                               &(htrees[1][cmpnfo[cmp].huffac]), block.begin());
+                        if (eob > 1 && !block[eob - 1]) {
+                            fprintf(stderr, "cannot encode image with eob after last 0");
+                            errorlevel.store(1);
+                        }
+
+                        // fix dc
+                        block[0] += lastdc[cmp];
+                        lastdc[cmp] = block[0];
+
+                        AlignedBlock& aligned_block = colldata.mutable_block((BlockType)cmp, dpos);
+
+                        // copy to colldata
+                        //                        for ( bpos = 0; bpos < eob; bpos++ ) {
+                        //                            aligned_block.mutable_coefficients_zigzag(bpos) = block[ bpos ];
+                        //                            aligned_block.coef.at(bpos) = block[ bpos ];
+                        //                        }
+                        // check for errors, proceed if no error encountered
+                        int old_mcu = mcu;
+                        if (eob < 0)
+                            sta = -1;
+                        else
+                            sta = next_mcupos(&mcu, &cmp, &csc, &sub, &dpos, &rstw, cs_cmpc);
+                        if (mcu % mcuh == 0 && old_mcu != mcu) {
+                            do_handoff_print = true;
+                            // fprintf(stderr, "ROW %d\n", (int)row_handoff.size());
+                        }
+                        if (huffr->eof) {
+                            sta = 2;
+                            break;
+                        }
+                    }
+                } else if (cs_sah == 0) {
+                    //                            std::cout<<"^^^^^^^^^^^"<<std::endl;
+                    // ---> progressive interleaved DC decoding <---
+                    // ---> succesive approximation first stage <---
+                    while (sta == 0) {
+                        if (do_handoff_print) {
+                            luma_row_offset_return->push_back(crystallize_thread_handoff(
+                                huffr, huff_input_offsets, mcu / mcuh, lastdc, cmpnfo[0].bcv / mcuv));
+                            do_handoff_print = false;
+                        }
+                        if (!huffr->eof)
+                            max_dpos[cmp] = std::max(dpos, max_dpos[cmp]); // record the max block serialized
+                        sta = decode_dc_prg_fs(huffr, &(htrees[0][cmpnfo[cmp].huffdc]), block.begin());
+
+                        // fix dc for diff coding
+                        colldata.set((BlockType)cmp, 0, dpos) = block[0] + lastdc[cmp];
+
+                        uint16_t u_last_dc = lastdc[cmp] = colldata.set((BlockType)cmp, 0, dpos);
+                        u_last_dc <<= cs_sal; // lastdc might be negative--this avoids UB
+                        // bitshift for succesive approximation
+                        colldata.set((BlockType)cmp, 0, dpos) = u_last_dc;
+
+                        // next mcupos if no error happened
+                        int old_mcu = mcu;
+                        if (sta != -1) {
+                            sta = next_mcupos(&mcu, &cmp, &csc, &sub, &dpos, &rstw, cs_cmpc);
+                        }
+                        if (mcu % mcuh == 0 && old_mcu != mcu) {
+                            do_handoff_print = true;
+                            // fprintf(stderr, "ROW %d\n", (int)row_handoff.size());
+                        }
+                        if (huffr->eof) {
+                            sta = 2;
+                            break;
+                        }
+                    }
+                } else {
+                    //                            std::cout<<"%%%%%%%%%%"<<std::endl;
+                    // ---> progressive interleaved DC decoding <---
+                    // ---> succesive approximation later stage <---
+                    while (sta == 0) {
+                        if (!huffr->eof)
+                            max_dpos[cmp] = std::max(dpos, max_dpos[cmp]); // record the max block serialized
+                        // decode next bit
+                        sta = decode_dc_prg_sa(huffr, block.begin());
+
+                        // shift in next bit
+                        colldata.set((BlockType)cmp, 0, dpos) += block[0] << cs_sal;
+
+                        // next mcupos if no error happened
+                        if (sta != -1) sta = next_mcupos(&mcu, &cmp, &csc, &sub, &dpos, &rstw, cs_cmpc);
+                        if (huffr->eof) {
+                            sta = 2;
+                            break;
+                        }
+                    }
+                }
+            } else // decoding for non interleaved data
+            {
+                if (jpegtype == 1) {
+                    //                            std::cout<<"&&&&&&&&&&&&&&"<<std::endl;
+                    int vmul = cmpnfo[0].bcv / mcuv;
+                    int hmul = cmpnfo[0].bch / mcuh;
+                    // ---> sequential non interleaved decoding <---
+                    while (sta == 0) {
+                        if (do_handoff_print) {
+                            luma_row_offset_return->push_back(crystallize_thread_handoff(huffr, huff_input_offsets,
+                                                                                         (dpos / (hmul * vmul)) / mcuh,
+                                                                                         lastdc, cmpnfo[0].bcv / mcuv));
+                            do_handoff_print = false;
+                        }
+                        if (!huffr->eof)
+                            max_dpos[cmp] = std::max(dpos, max_dpos[cmp]); // record the max block serialized
+                        // decode block
+                        eob = decode_block_seq(huffr, &(htrees[0][cmpnfo[cmp].huffdc]),
+                                               &(htrees[1][cmpnfo[cmp].huffac]), block.begin());
+                        if (eob > 1 && !block[eob - 1]) {
+                            fprintf(stderr, "cannot encode image with eob after last 0");
+                            errorlevel.store(1);
+                        }
+                        // fix dc
+                        block[0] += lastdc[cmp];
+                        lastdc[cmp] = block[0];
+
+                        // copy to colldata
+                        AlignedBlock& aligned_block = colldata.mutable_block((BlockType)cmp, dpos);
+                        for (bpos = 0; bpos < eob; bpos++) {
+                            //                            aligned_block.mutable_coefficients_zigzag(bpos) = block[ bpos
+                            //                            ];
+                            aligned_block.coef.at(bpos) = block[bpos];
+                        }
+
+                        // check for errors, proceed if no error encountered
+                        if (eob < 0)
+                            sta = -1;
+                        else
+                            sta = next_mcuposn(&cmp, &dpos, &rstw);
+                        mcu = dpos / (hmul * vmul);
+                        if (cmp == 0 && (mcu % mcuh == 0) && (dpos % (hmul * vmul) == 0)) {
+                            do_handoff_print = true;
+                        }
+                        if (huffr->eof) {
+                            sta = 2;
+                            break;
+                        }
+                    }
+                } else if (cs_to == 0) {
+                    if (cs_sah == 0) {
+                        // ---> progressive non interleaved DC decoding <---
+                        // ---> succesive approximation first stage <---
+                        while (sta == 0) {
+                            //                            std::cout<<"!!!!!!!!!!!!!!"<<std::endl;
+                            if (do_handoff_print) {
+                                luma_row_offset_return->push_back(crystallize_thread_handoff(
+                                    huffr, huff_input_offsets, dpos / cmpnfo[cmp].bch, lastdc, cmpnfo[0].bcv / mcuv));
+                                do_handoff_print = false;
+                            }
+
+                            if (!huffr->eof)
+                                max_dpos[cmp] = std::max(dpos, max_dpos[cmp]); // record the max block serialized
+                            sta = decode_dc_prg_fs(huffr, &(htrees[0][cmpnfo[cmp].huffdc]), block.begin());
+
+                            // fix dc for diff coding
+                            colldata.set((BlockType)cmp, 0, dpos) = block[0] + lastdc[cmp];
+                            lastdc[cmp] = colldata.set((BlockType)cmp, 0, dpos);
+
+                            // bitshift for succesive approximation
+                            colldata.set((BlockType)cmp, 0, dpos) <<= cs_sal;
+
+                            // check for errors, increment dpos otherwise
+                            if (sta != -1) sta = next_mcuposn(&cmp, &dpos, &rstw);
+                            if (cmp == 0 && dpos % cmpnfo[cmp].bch == 0) {
+                                do_handoff_print = true;
+                            }
+                            if (huffr->eof) {
+                                sta = 2;
+                                break;
+                            }
+                        }
+                    } else {
+                        // ---> progressive non interleaved DC decoding <---
+                        // ---> succesive approximation later stage <---
+                        //                            std::cout<<"#######"<<std::endl;
+                        while (sta == 0) {
+                            if (!huffr->eof)
+                                max_dpos[cmp] = std::max(dpos, max_dpos[cmp]); // record the max block serialized
+                            // decode next bit
+                            sta = decode_dc_prg_sa(huffr, block.begin());
+
+                            // shift in next bit
+                            colldata.set((BlockType)cmp, 0, dpos) += block[0] << cs_sal;
+
+                            // check for errors, increment dpos otherwise
+                            if (sta != -1) sta = next_mcuposn(&cmp, &dpos, &rstw);
+                            if (huffr->eof) {
+                                sta = 2;
+                                break;
+                            }
+                        }
+                    }
+                } else {
+                    if (cs_sah == 0) {
+                        //                            std::cout<<"@@@@@@"<<std::endl;
+                        // ---> progressive non interleaved AC decoding <---
+                        // ---> succesive approximation first stage <---
+                        while (sta == 0) {
+                            if (!huffr->eof)
+                                max_dpos[cmp] = std::max(dpos, max_dpos[cmp]); // record the max block serialized
+                            // decode block
+                            eob = decode_ac_prg_fs(huffr, &(htrees[1][cmpnfo[cmp].huffac]), block.begin(), &eobrun,
+                                                   cs_from, cs_to);
+
+                            // check for non optimal coding
+                            if ((eob == cs_from) && (eobrun > 0) && (peobrun > 0) &&
+                                (peobrun < hcodes[1][cmpnfo[cmp].huffac].max_eobrun - 1)) {
+                                fprintf(stderr, "reconstruction of non optimal coding not supported");
+                                errorlevel.store(1);
+                            }
+                            AlignedBlock& aligned_block = colldata.mutable_block((BlockType)cmp, dpos);
+                            // copy to colldata
+                            for (bpos = cs_from; bpos < eob; bpos++) {
+                                uint16_t block_bpos = block[bpos];
+                                block_bpos <<= cs_sal; // prevents UB since block_bpos could be negative
+                                //                                aligned_block.mutable_coefficients_zigzag(bpos) =
+                                //                                block_bpos;
+                                aligned_block.coef.at(bpos) = block[bpos];
+                            }
+                            // check for errors
+                            if (eob < 0)
+                                sta = -1;
+                            else
+                                sta = skip_eobrun(&cmp, &dpos, &rstw, &eobrun);
+
+                            // proceed only if no error encountered
+                            if (sta == 0) sta = next_mcuposn(&cmp, &dpos, &rstw);
+                            if (huffr->eof) {
+                                sta = 2;
+                                break;
+                            }
+                        }
+                    } else {
+                        //                            std::cout<<"$$$$$$$$$$"<<std::endl;
+                        // ---> progressive non interleaved AC decoding <---
+                        // ---> succesive approximation later stage <---
+                        while (sta == 0) {
+                            // copy from colldata
+                            AlignedBlock& aligned_block = colldata.mutable_block((BlockType)cmp, dpos);
+                            for (bpos = cs_from; bpos <= cs_to; bpos++) {
+                                block[bpos] = aligned_block.coefficients_zigzag(bpos);
+                            }
+                            if (eobrun == 0) {
+                                if (!huffr->eof)
+                                    max_dpos[cmp] = std::max(dpos, max_dpos[cmp]); // record the max block serialized
+                                // decode block (long routine)
+                                eob = decode_ac_prg_sa(huffr, &(htrees[1][cmpnfo[cmp].huffac]), block.begin(), &eobrun,
+                                                       cs_from, cs_to);
+
+                                // check for non optimal coding
+                                if ((eob == cs_from) && (eobrun > 0) && (peobrun > 0) &&
+                                    (peobrun < hcodes[1][cmpnfo[cmp].huffac].max_eobrun - 1)) {
+                                    fprintf(stderr, "reconstruction of non optimal coding not supported");
+                                    errorlevel.store(1);
+                                }
+
+                                // store eobrun
+                                peobrun = eobrun;
+                            } else {
+                                if (!huffr->eof)
+                                    max_dpos[cmp] = std::max(dpos, max_dpos[cmp]); // record the max block serialized
+                                // decode block (short routine)
+                                eob = decode_eobrun_sa(huffr, block.begin(), &eobrun, cs_from, cs_to);
+                                if (eob > 1 && !block[eob - 1]) {
+                                    fprintf(stderr, "cannot encode image with eob after last 0");
+                                    errorlevel.store(1);
+                                }
+                            }
+
+                            // copy back to colldata
+                            for (bpos = cs_from; bpos <= cs_to; bpos++) {
+                                uint16_t block_bpos = block[bpos];
+                                block_bpos <<= cs_sal;
+                                aligned_block.mutable_coefficients_zigzag(bpos) += block_bpos;
+                            }
+                            // proceed only if no error encountered
+                            if (eob < 0)
+                                sta = -1;
+                            else
+                                sta = next_mcuposn(&cmp, &dpos, &rstw);
+                            if (huffr->eof) {
+                                sta = 2;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            // unpad huffman reader / check padbit
+            if (padbit != -1) {
+                if (padbit != huffr->unpad(padbit)) {
+                    fprintf(stderr, "inconsistent use of padbits");
+                    padbit = 1;
+                    errorlevel.store(1);
+                }
+            } else {
+                padbit = huffr->unpad(padbit);
+            }
+            // evaluate status
+            if (sta == -1) { // status -1 means error
+                fprintf(stderr, "decode error in scan%i / mcu%i", scnc, (cs_cmpc > 1) ? mcu : dpos);
+                delete huffr;
+                errorlevel.store(2);
+                return false;
+            } else if (sta == 2) { // status 2/3 means done
+                scnc++;            // increment scan counter
+                break;             // leave decoding loop, everything is done here
+            }
+            // else if ( sta == 1 ); // status 1 means restart - so stay in the loop
+        }
+    }
+    // g_loops.END();//g_loops.START("LOOP:decompression", WHILE);//
+    if (early_eof_encountered) {
+        colldata.set_truncation_bounds(max_cmp, max_bpos, max_dpos, max_sah);
+    }
+
+    luma_row_offset_return->push_back(
+        crystallize_thread_handoff(huffr, huff_input_offsets, mcu / mcuh, lastdc, cmpnfo[0].bcv / mcuv));
+
+    // check for unneeded data
+    if (!huffr->eof) {
+        fprintf(stderr, "unneeded data found after coded image data");
+        errorlevel.store(1);
+    }
+
+    // clean up
+    // delete( huffr );
+
+    if (is_baseline) {
+        g_allow_progressive = false;
+    }
+    return true;
+}
+
+/* -----------------------------------------------
+    JPEG encoding routine
+    ----------------------------------------------- */
+
+bool recode_jpeg(void) {
+    if (!g_use_seccomp) {
+        pre_byte = clock();
+    }
+    abitwriter* huffw;  // bitwise writer for image data
+    abytewriter* storw; // bytewise writer for storage of correction bits
+
+    unsigned char type = 0x00; // type of current marker segment
+    unsigned int len = 0;      // length of current marker segment
+    unsigned int hpos = 0;     // current position in header
+
+    int lastdc[4];                                  // last dc for each component
+    Sirikata::Aligned256Array1d<int16_t, 64> block; // store block for coeffs
+    unsigned int eobrun;                            // run of eobs
+    int rstw;                                       // restart wait counter
+
+    int cmp, bpos, dpos;
+    int mcu, sub, csc;
+    int eob, sta;
+    int tmp;
+
+    // open huffman coded image data in abitwriter
+    huffw = new abitwriter(ABIT_WRITER_PRELOAD, max_file_size);
+    huffw->fillbit = padbit;
+
+    // init storage writer
+    storw = new abytewriter(ABIT_WRITER_PRELOAD);
+
+    // preset count of scans and restarts
+    scnc = 0;
+    rstc = 0;
+    MergeJpegProgress streaming_progress;
+
+    // JPEG decompression loop
+    g_loops.START("while:JPEG decompression", WHILE);
+    while (true) {
+        g_loops.CNT(); // g_loops.END();
+        // seek till start-of-scan, parse only DHT, DRI and SOS
+        for (type = 0x00; type != 0xDA;) {
+            if ((int)hpos >= hdrs) break;
+            type = hdrdata[hpos + 1];
+            len = 2 + B_SHORT(hdrdata[hpos + 2], hdrdata[hpos + 3]);
+            if ((type == 0xC4) || (type == 0xDA) || (type == 0xDD)) {
+                if (!parse_jfif_jpg(type, len, &(hdrdata[hpos]))) {
+                    delete huffw;
+                    delete storw;
+                    return false;
+                }
+                int max_scan = 0;
+                for (int i = 0; i < cmpc; ++i) {
+                    max_scan = std::max(max_scan, cmpnfo[i].bcv);
+                }
+                rstp.reserve(max_scan);
+                scnp.reserve(max_scan);
+                hpos += len;
+            } else {
+                hpos += len;
+                continue;
+            }
+        }
+
+        // get out if last marker segment type was not SOS
+        if (type != 0xDA) break;
+
+        // (re)alloc scan positons array
+        while ((int)scnp.size() < scnc + 2) {
+            scnp.push_back(0);
+        }
+
+        // (re)alloc restart marker positons array if needed
+        if (rsti > 0) {
+            tmp = rstc + ((cs_cmpc > 1) ? (mcuc / rsti) : (cmpnfo[cs_cmp[0]].bc / rsti));
+            while ((int)rstp.size() <= tmp) {
+                rstp.push_back((unsigned int)-1);
+            }
+        }
+
+        // intial variables set for encoding
+        cmp = cs_cmp[0];
+        csc = 0;
+        mcu = 0;
+        sub = 0;
+        dpos = 0;
+
+        // store scan position
+        scnp.at(scnc) = huffw->getpos();
+        scnp.at(scnc + 1) = 0; // danielrh@ avoid uninitialized memory when doing progressive writeout
+        bool first_pass = true;
+        // JPEG imagedata encoding routines
+        while (true) {
+            // (re)set last DCs for diff coding
+            lastdc[0] = 0;
+            lastdc[1] = 0;
+            lastdc[2] = 0;
+            lastdc[3] = 0;
+
+            // (re)set status
+            sta = 0;
+
+            // (re)set eobrun
+            eobrun = 0;
+
+            // (re)set rst wait counter
+            rstw = rsti;
+            if (cs_cmpc != colldata.get_num_components() && !g_allow_progressive) {
+                custom_exit(ExitCode::PROGRESSIVE_UNSUPPORTED);
+            }
+            if (jpegtype != 1 && !g_allow_progressive) {
+                custom_exit(ExitCode::PROGRESSIVE_UNSUPPORTED);
+            }
+            if ((jpegtype != 1 || cs_cmpc != colldata.get_num_components()) && colldata.is_memory_optimized(0) &&
+                first_pass) {
+                colldata.init(cmpnfo, cmpc, mcuh, mcuv, false);
+            }
+            first_pass = false;
+            // encoding for interleaved data
+            if (cs_cmpc > 1) {
+                if (jpegtype == 1) {
+                    // ---> sequential interleaved encoding <---
+                    while (sta == 0) {
+                        // copy from colldata
+                        const AlignedBlock& aligned_block = colldata.block((BlockType)cmp, dpos);
+                        // fprintf(stderr, "Reading from cmp(%d) dpos %d\n", cmp, dpos);
+                        for (bpos = 0; bpos < 64; bpos++) {
+                            block[bpos] = aligned_block.coefficients_zigzag(bpos);
+                        }
+                        int16_t dc = block[0];
+                        // diff coding for dc
+                        block[0] -= lastdc[cmp];
+                        lastdc[cmp] = dc;
+
+                        // encode block
+                        eob = encode_block_seq(huffw, &(hcodes[0][cmpnfo[cmp].huffdc]),
+                                               &(hcodes[1][cmpnfo[cmp].huffac]), block.begin());
+
+                        // check for errors, proceed if no error encountered
+                        if (eob < 0)
+                            sta = -1;
+                        else
+                            sta = next_mcupos(&mcu, &cmp, &csc, &sub, &dpos, &rstw, cs_cmpc);
+                        if (sta == 0 && huffw->no_remainder()) {
+                            merge_jpeg_streaming(&streaming_progress, huffw->peekptr(), huffw->getpos(), false);
+                        }
+                        if (str_out->has_exceeded_bound()) {
+                            sta = 2;
+                        }
+                    }
+                } else if (cs_sah == 0) {
+                    // ---> progressive interleaved DC encoding <---
+                    // ---> succesive approximation first stage <---
+                    while (sta == 0) {
+                        // diff coding & bitshifting for dc
+                        tmp = colldata.at((BlockType)cmp, 0, dpos) >> cs_sal;
+                        block[0] = tmp - lastdc[cmp];
+                        lastdc[cmp] = tmp;
+
+                        // encode dc
+                        sta = encode_dc_prg_fs(huffw, &(hcodes[0][cmpnfo[cmp].huffdc]), block.begin());
+
+                        // next mcupos if no error happened
+                        if (sta != -1) sta = next_mcupos(&mcu, &cmp, &csc, &sub, &dpos, &rstw, cs_cmpc);
+                        if (sta == 0 && huffw->no_remainder()) {
+                            merge_jpeg_streaming(&streaming_progress, huffw->peekptr(), huffw->getpos(), false);
+                        }
+                        if (str_out->has_exceeded_bound()) {
+                            sta = 2;
+                        }
+                    }
+                } else {
+                    // ---> progressive interleaved DC encoding <---
+                    // ---> succesive approximation later stage <---
+                    while (sta == 0) {
+                        // fetch bit from current bitplane
+                        block[0] = BITN(colldata.at((BlockType)cmp, 0, dpos), cs_sal);
+
+                        // encode dc correction bit
+                        sta = encode_dc_prg_sa(huffw, block.begin());
+
+                        // next mcupos if no error happened
+                        if (sta != -1) sta = next_mcupos(&mcu, &cmp, &csc, &sub, &dpos, &rstw, cs_cmpc);
+                        if (sta == 0 && huffw->no_remainder()) {
+                            merge_jpeg_streaming(&streaming_progress, huffw->peekptr(), huffw->getpos(), false);
+                        }
+                        if (str_out->has_exceeded_bound()) {
+                            sta = 2;
+                        }
+                    }
+                }
+            } else // encoding for non interleaved data
+            {
+                if (jpegtype == 1) {
+                    // ---> sequential non interleaved encoding <---
+                    while (sta == 0) {
+                        const AlignedBlock& aligned_block = colldata.block((BlockType)cmp, dpos);
+                        // copy from colldata
+                        int16_t dc = block[0] = aligned_block.dc();
+                        for (bpos = 1; bpos < 64; bpos++) block[bpos] = aligned_block.coefficients_zigzag(bpos);
+
+                        // diff coding for dc
+                        block[0] -= lastdc[cmp];
+                        lastdc[cmp] = dc;
+
+                        // encode block
+                        eob = encode_block_seq(huffw, &(hcodes[0][cmpnfo[cmp].huffdc]),
+                                               &(hcodes[1][cmpnfo[cmp].huffac]), block.begin());
+
+                        // check for errors, proceed if no error encountered
+                        if (eob < 0)
+                            sta = -1;
+                        else
+                            sta = next_mcuposn(&cmp, &dpos, &rstw);
+                        if (sta == 0 && huffw->no_remainder()) {
+                            merge_jpeg_streaming(&streaming_progress, huffw->peekptr(), huffw->getpos(), false);
+                        }
+                        if (str_out->has_exceeded_bound()) {
+                            sta = 2;
+                        }
+                    }
+                } else if (cs_to == 0) {
+                    if (cs_sah == 0) {
+                        // ---> progressive non interleaved DC encoding <---
+                        // ---> succesive approximation first stage <---
+                        while (sta == 0) {
+                            // diff coding & bitshifting for dc
+                            tmp = colldata.at((BlockType)cmp, 0, dpos) >> cs_sal;
+                            block[0] = tmp - lastdc[cmp];
+                            lastdc[cmp] = tmp;
+
+                            // encode dc
+                            sta = encode_dc_prg_fs(huffw, &(hcodes[0][cmpnfo[cmp].huffdc]), block.begin());
+
+                            // check for errors, increment dpos otherwise
+                            if (sta != -1) sta = next_mcuposn(&cmp, &dpos, &rstw);
+                            if (sta == 0 && huffw->no_remainder()) {
+                                merge_jpeg_streaming(&streaming_progress, huffw->peekptr(), huffw->getpos(), false);
+                            }
+                            if (str_out->has_exceeded_bound()) {
+                                sta = 2;
+                            }
+                        }
+                    } else {
+                        // ---> progressive non interleaved DC encoding <---
+                        // ---> succesive approximation later stage <---
+                        while (sta == 0) {
+                            // fetch bit from current bitplane
+                            block[0] = BITN(colldata.at((BlockType)cmp, 0, dpos), cs_sal);
+
+                            // encode dc correction bit
+                            sta = encode_dc_prg_sa(huffw, block.begin());
+
+                            // next mcupos if no error happened
+                            if (sta != -1) sta = next_mcuposn(&cmp, &dpos, &rstw);
+                        }
+                        if (sta == 0 && huffw->no_remainder()) {
+                            merge_jpeg_streaming(&streaming_progress, huffw->peekptr(), huffw->getpos(), false);
+                        }
+                        if (str_out->has_exceeded_bound()) {
+                            sta = 2;
+                        }
+                    }
+                } else {
+                    if (cs_sah == 0) {
+                        // ---> progressive non interleaved AC encoding <---
+                        // ---> succesive approximation first stage <---
+                        while (sta == 0) {
+                            const AlignedBlock& aligned_block = colldata.block((BlockType)cmp, dpos);
+                            // copy from colldata
+                            for (bpos = cs_from; bpos <= cs_to; bpos++) {
+                                block[bpos] = FDIV2(aligned_block.coefficients_zigzag(bpos), cs_sal);
+                            }
+                            // encode block
+                            eob = encode_ac_prg_fs(huffw, &(hcodes[1][cmpnfo[cmp].huffac]), block.begin(), &eobrun,
+                                                   cs_from, cs_to);
+
+                            // check for errors, proceed if no error encountered
+                            if (eob < 0)
+                                sta = -1;
+                            else
+                                sta = next_mcuposn(&cmp, &dpos, &rstw);
+                            if (sta == 0 && huffw->no_remainder()) {
+                                merge_jpeg_streaming(&streaming_progress, huffw->peekptr(), huffw->getpos(), false);
+                            }
+                            if (str_out->has_exceeded_bound()) {
+                                sta = 2;
+                            }
+                        }
+
+                        // encode remaining eobrun
+                        encode_eobrun(huffw, &(hcodes[1][cmpnfo[cmp].huffac]), &eobrun);
+
+                    } else {
+                        // ---> progressive non interleaved AC encoding <---
+                        // ---> succesive approximation later stage <---
+                        while (sta == 0) {
+                            const AlignedBlock& aligned_block = colldata.block((BlockType)cmp, dpos);
+                            // copy from colldata
+                            for (bpos = cs_from; bpos <= cs_to; bpos++) {
+                                block[bpos] = FDIV2(aligned_block.coefficients_zigzag(bpos), cs_sal);
+                            }
+                            // encode block
+                            eob = encode_ac_prg_sa(huffw, storw, &(hcodes[1][cmpnfo[cmp].huffac]), block.begin(),
+                                                   &eobrun, cs_from, cs_to);
+
+                            // check for errors, proceed if no error encountered
+                            if (eob < 0)
+                                sta = -1;
+                            else
+                                sta = next_mcuposn(&cmp, &dpos, &rstw);
+                            if (sta == 0 && huffw->no_remainder()) {
+                                merge_jpeg_streaming(&streaming_progress, huffw->peekptr(), huffw->getpos(), false);
+                            }
+                            if (str_out->has_exceeded_bound()) {
+                                sta = 2;
+                            }
+                        }
+
+                        // encode remaining eobrun
+                        encode_eobrun(huffw, &(hcodes[1][cmpnfo[cmp].huffac]), &eobrun);
+
+                        // encode remaining correction bits
+                        encode_crbits(huffw, storw);
+                    }
+                }
+            }
+
+            // pad huffman writer
+            huffw->pad(padbit);
+
+            // evaluate status
+            if (sta == -1) { // status -1 means error
+                fprintf(stderr, "encode error in scan%i / mcu%i", scnc, (cs_cmpc > 1) ? mcu : dpos);
+                delete huffw;
+                errorlevel.store(2);
+                return false;
+            } else if (sta == 2) { // status 2 means done
+                scnc++;            // increment scan counter
+                break;             // leave decoding loop, everything is done here
+            } else if (sta == 1) { // status 1 means restart
+                if (rsti > 0)      // store rstp & stay in the loop
+                    rstp.at(rstc++) = huffw->getpos() - 1;
+            }
+            huffw->flush_no_pad();
+            assert(huffw->no_remainder() && "this should have been padded");
+            if (huffw->no_remainder()) {
+                merge_jpeg_streaming(&streaming_progress, huffw->peekptr(), huffw->getpos(), false);
+            }
+        }
+    }
+    g_loops.END(); // jpeg decompression loop
+
+    // safety check for error in huffwriter
+    if (huffw->error) {
+        delete huffw;
+        fprintf(stderr, MEM_ERRMSG);
+        errorlevel.store(2);
+        return false;
+    }
+
+    // get data into huffdata
+    huffdata = huffw->getptr();
+    hufs = huffw->getpos();
+    always_assert(huffw->no_remainder() && "this should have been padded");
+    merge_jpeg_streaming(&streaming_progress, huffdata, hufs, true);
+    if (!fast_exit) {
+        delete huffw;
+
+        // remove storage writer
+        delete storw;
+    }
+    // store last scan & restart positions
+    scnp.at(scnc) = hufs;
+    if (!rstp.empty()) rstp.at(rstc) = hufs;
+
+    return true;
+}
+
+/* -----------------------------------------------
+    checks range of values, error if out of bounds
+    ----------------------------------------------- */
+
+bool check_value_range(void) {
+    int bad_cmp = 0, bad_bpos = 0, bad_dpos = 0;
+    bool bad_colldata = false;
+    // out of range should never happen with unmodified JPEGs
+    for (int cmp = 0; cmp < cmpc && cmp < 4; cmp++) {
+        int absmax[64];
+        for (int bpos = 0; bpos < 64; bpos++) {
+            absmax[zigzag_to_aligned.at(bpos)] = MAX_V(cmp, bpos);
+        }
+        for (int dpos = 0; dpos < cmpnfo[cmp].bc && dpos <= max_dpos[cmp]; dpos++) {
+            const int16_t* raw_data = colldata.block_nosync((BlockType)cmp, dpos).raw_data();
+            for (int aligned_pos = 0; aligned_pos < 64; ++aligned_pos, ++raw_data) {
+                if ((*raw_data) > absmax[aligned_pos] || (*raw_data) < -absmax[aligned_pos]) {
+                    int bpos = aligned_to_zigzag.at(aligned_pos);
+                    if (!early_eof_encountered) {
+                        fprintf(stderr, "value out of range error: cmp%i, frq%i, val %i, max %i", cmp, bpos,
+                                colldata.at_nosync((BlockType)cmp, bpos, dpos), absmax[aligned_pos]);
+                        errorlevel.store(2);
+                        return false;
+                    }
+                    bad_cmp = cmp;
+                    bad_bpos = bpos;
+                    bad_dpos = dpos;
+                    colldata.set((BlockType)bad_cmp, bad_bpos, bad_dpos) = 0; // zero this puppy out
+                    bad_colldata = true;
+                }
+            }
+        }
+    }
+    if (bad_colldata) {
+        colldata.set((BlockType)bad_cmp, bad_bpos, bad_dpos) = 0; // zero this puppy out
+    }
+    return true;
+}
+
+class ThreadHandoffSegmentCompare {
+   public:
+    bool operator()(const ThreadHandoff& a, const ThreadHandoff& b) const { return a.segment_size < b.segment_size; }
+};
+
+// for debug
+#if 0
+void tm_fun_deb(unsigned char* pc, int size,  char* fname)
+{
+    FILE* fp = fopen(fname, "wb");
+    unsigned char buff[101];
+    unsigned char* pb=buff;
+    fwrite((void*)pc, 1, size, fp);
+    fclose(fp);
+//    for(int i=size-100; i< size; i++){
+//        fprintf(stderr, "%2x ", pc[i]);
+//        *pb = pc[i]; pb++;
+//    }
+    *pb=0;
+    fprintf(stderr,"\n");
+    printf("%s \n", buff);
+}
+#endif
+/* -----------------------------------------------
+    write uncompressed JPEG file
+    ----------------------------------------------- */
+// extern decOutput glb_lepp;
+bool write_ujpg_2(int filesize,
+                  IOUtil::FileWriter* ujg_out,
+                  struct_arith& arith,
+                  uint8_t* res,
+                  uint8_t* origin,
+                  int hdrsK,
+                  uint32_t rst,
+                  ThreadHandoff& myHand) {
+    unsigned char ujpg_mrk[64];
+    bool has_lepton_entropy_coding = (ofiletype == LEPTON || filetype == LEPTON);
+    Sirikata::JpegError err = Sirikata::JpegError::nil();
+    err = ujg_out->Write(lepton_header, 2).second;
+    // store version number
+    ujpg_mrk[0] = ujgversion;
+    ujg_out->Write(ujpg_mrk, 1);
+    Sirikata::MemReadWriter mrw((Sirikata::JpegAllocator<uint8_t>()));
+
+    NUM_THREADS = 1;
+    std::vector<ThreadHandoff> selected_splits(NUM_THREADS);
+    std::vector<int> split_indices(NUM_THREADS);
+    split_indices[NUM_THREADS - 1] = 80; // row_thread_handoffs.size() - 1;//153, 432, 80
+    size_t last_split_index = 0;
+
+    int debug_size_mrw = mrw.buffer().size(); //
+    unsigned char* debug_data_mrw = mrw.buffer().data();
+    // always_assert(start_byte||!selected_splits[0].luma_y_start);
+    // write header to file
+    // marker: "HDR" + [size of header]
+    unsigned char hdr_mrk[] = {'H', 'D', 'R'};
+    err = mrw.Write(hdr_mrk, sizeof(hdr_mrk)).second;
+    uint32toLE(hdrsK, ujpg_mrk);
+    err = mrw.Write(ujpg_mrk, 4).second;
+    // data: data from header
+    mrw.Write(origin + 2, hdrsK);
+
+    // beginning here: recovery information (needed for exact JPEG recovery)
+    // marker: P0D"
+    unsigned char pad_mrk[] = {'P', '0', 'D'}; // 50 30 44 7f
+    err = mrw.Write(pad_mrk, sizeof(pad_mrk)).second;
+    // data: padbit
+    padbit = 127;
+    err = mrw.Write((unsigned char*)&padbit, 1).second;
+
+    // write luma splits
+    unsigned char luma_mrk[1] = {'H'}; // 48 48
+    err = mrw.Write(luma_mrk, sizeof(luma_mrk)).second;
+    // data: serialized luma splits
+    // auto serialized_splits = ThreadHandoff::serialize(&selected_splits[0], selected_splits.size());
+    auto serialized_splits = ThreadHandoff::serialize(&myHand, 1);
+    err = mrw.Write(&serialized_splits[0], serialized_splits.size()).second;
+
+    // unsigned char* pb=serialized_splits[0];
+    //    fprintf(stderr, "serialized_splits: ");
+    //    for(int i=0; i< serialized_splits.size(); i++){
+    //        fprintf(stderr, "%2x ", serialized_splits[i]);
+    //    }
+    /*    if (!rst_cnt.empty()) {
+            unsigned char frs_mrk[] = {'C', 'R', 'S'};
+            err = mrw.Write( frs_mrk, 3 ).second;
+            uint32toLE((uint32_t)rst_cnt.size(), ujpg_mrk);
+            err = mrw.Write( ujpg_mrk, 4).second;
+            for (size_t i = 0; i < rst_cnt.size(); ++i) {
+                uint32toLE((uint32_t)rst_cnt.at(i), ujpg_mrk);
+                err = mrw.Write( ujpg_mrk, 4).second;
+            }
+        }*/
+
+    if (rst != 0) {
+        unsigned char frs_mrk[] = {'C', 'R', 'S'};
+        err = mrw.Write(frs_mrk, 3).second;
+        uint32toLE(1, ujpg_mrk);
+        err = mrw.Write(ujpg_mrk, 4).second;
+        uint32toLE((uint32_t)rst, ujpg_mrk);
+        err = mrw.Write(ujpg_mrk, 4).second;
+    }
+
+    // write number of false set RST markers per scan (if available) to file
+    /*    if (!rst_err.empty()) {
+            // marker: "FRS" + [number of scans]
+            unsigned char frs_mrk[] = {'F', 'R', 'S'};
+            err = mrw.Write( frs_mrk, 3 ).second;
+            uint32toLE((uint32_t)rst_err.size(), ujpg_mrk);
+            err = mrw.Write( ujpg_mrk, 4).second;
+            // data: numbers of false set markers
+            err = mrw.Write( rst_err.data(), rst_err.size() ).second;
+        }*/
+
+    std::vector<unsigned char> rst_err_tmp;
+    rst_err_tmp.clear();
+    rst_err_tmp.push_back(0);
+    if (!rst_err_tmp.empty()) {
+        // marker: "FRS" + [number of scans]
+        unsigned char frs_mrk[] = {'F', 'R', 'S'};
+        err = mrw.Write(frs_mrk, 3).second;
+        uint32toLE((uint32_t)rst_err_tmp.size(), ujpg_mrk);
+        err = mrw.Write(ujpg_mrk, 4).second;
+        // data: numbers of false set markers
+        err = mrw.Write(rst_err_tmp.data(), rst_err_tmp.size()).second;
+    }
+
+    if (early_eof_encountered) {
+        unsigned char early_eof[] = {'E', 'E', 'E'};
+        err = mrw.Write(early_eof, sizeof(early_eof)).second;
+        uint32toLE(max_cmp, ujpg_mrk);
+        uint32toLE(max_bpos, ujpg_mrk + 4);
+        uint32toLE(max_sah, ujpg_mrk + 8);
+        uint32toLE(max_dpos[0], ujpg_mrk + 12);
+        uint32toLE(max_dpos[1], ujpg_mrk + 16);
+        uint32toLE(max_dpos[2], ujpg_mrk + 20);
+        uint32toLE(max_dpos[3], ujpg_mrk + 24);
+        err = mrw.Write(ujpg_mrk, 28).second;
+    }
+    // write garbage (data including and after EOI) (if any) to file
+    if (prefix_grbs > 0 || prefix_grbgdata != NULL) {
+        // marker: "GRB" + [size of garbage]
+        unsigned char grb_mrk[] = {'P', 'G', 'R'};
+        err = mrw.Write(grb_mrk, sizeof(grb_mrk)).second;
+        uint32toLE(prefix_grbs, ujpg_mrk);
+        err = mrw.Write(ujpg_mrk, 4).second;
+        // data: garbage data
+        err = mrw.Write(prefix_grbgdata, prefix_grbs).second;
+    }
+    // write garbage (data including and after EOI) (if any) to file
+    /*    if ( grbs > 0 ) {
+            // marker: "GRB" + [size of garbage]
+            unsigned char grb_mrk[] = {'G', 'R', 'B'};
+            err = mrw.Write( grb_mrk, sizeof(grb_mrk) ).second;
+            uint32toLE(grbs, ujpg_mrk);
+            err = mrw.Write( ujpg_mrk, 4 ).second;
+            // data: garbage data
+            err = mrw.Write( grbgdata, grbs ).second;
+        }*/
+    if (mrw.buffer().size() > 1024 * 1024) {
+        // custom_exit(ExitCode::HEADER_TOO_LARGE);
+    }
+    /*    debug_data_mrw = mrw.buffer().data();
+        debug_size_mrw = mrw.buffer().size();//7871
+        tm_fun_deb(debug_data_mrw, debug_size_mrw, "head_org.txt");*/
+
+    std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> > compressed_header;
+
+    compressed_header = Sirikata::ZlibDecoderCompressionWriter::Compress(mrw.buffer().data(), mrw.buffer().size(),
+                                                                         Sirikata::JpegAllocator<uint8_t>());
+    // debug_data_mrw = compressed_header.data();
+    // debug_size_mrw = compressed_header.size();
+    // tm_fun_deb(debug_data_mrw, debug_size_mrw, "head_cmp.txt");
+    write_byte_bill(Billing::HEADER, false, 2 + hdrs + prefix_grbs + grbs);
+    static_assert(MAX_NUM_THREADS <= 255, "We only have a single byte for num threads");
+    always_assert(NUM_THREADS <= 255);
+
+    unsigned char zed[] = {'\0'};
+    if (start_byte != 0) {
+        zed[0] = (unsigned char)'Y';
+    } else if (g_allow_progressive) {
+        zed[0] = (unsigned char)'X';
+    } else {
+        zed[0] = (unsigned char)'Z';
+    }
+    err = ujg_out->Write(zed, sizeof(zed)).second;
+    unsigned char num_threads[] = {(unsigned char)NUM_THREADS};
+    err = ujg_out->Write(num_threads, sizeof(num_threads)).second;
+    unsigned char zero3[3] = {};
+    err = ujg_out->Write(zero3, sizeof(zero3)).second;
+    unsigned char git_revision[12] = {0}; // we only have 12 chars in the header for this
+    hex_to_bin(git_revision, GIT_REVISION, sizeof(git_revision));
+    err = ujg_out->Write(git_revision, sizeof(git_revision)).second;
+    uint32toLE(filesize, ujpg_mrk);
+    jpgfilesize = filesize;
+    err = ujg_out->Write(ujpg_mrk, 4).second;
+    write_byte_bill(Billing::HEADER, true, 24);
+    uint32toLE((uint32_t)compressed_header.size(), ujpg_mrk);
+    err = ujg_out->Write(ujpg_mrk, 4).second;
+    write_byte_bill(Billing::HEADER, true, 4);
+
+    // debug_data_mrw = compressed_header.data();
+    // debug_size_mrw = compressed_header.size();
+    // tm_fun_deb(debug_data_mrw, debug_size_mrw);
+
+    auto err2 = ujg_out->Write(compressed_header.data(), compressed_header.size());
+
+    write_byte_bill(Billing::HEADER, true, compressed_header.size());
+
+    zlib_hdrs = compressed_header.size();
+    if (err != Sirikata::JpegError::nil() || err2.second != Sirikata::JpegError::nil()) {
+        fprintf(stderr, "write error, possibly drive is full");
+        errorlevel.store(2);
+        return false;
+    }
+
+    unsigned char cmp_mrk[] = {'C', 'M', 'P'};
+    err = ujg_out->Write(cmp_mrk, sizeof(cmp_mrk)).second;
+
+    write_byte_bill(Billing::HEADER, true, 3);
+    clock_t begin = 0, end = 1;
+    begin = clock();
+    //    g_loops.START("LOOP:write_ujpg,while,", WHILE);;//g_loops.END();
+    VP8ComponentEncoder* pEnc = (VP8ComponentEncoder*)(&g_encoder);
+    while (pEnc->vp8_full_encoder(&colldata, ujg_out, &selected_splits[0], selected_splits.size(), arith, res) ==
+           CODING_PARTIAL) {
+        //    	g_loops.CNT();
+    }
+    //    g_loops.END();
+
+    end = clock();
+    printf("%d, %d, %d \n", begin, end, end - begin);
+
+    //    g_loops.PrintTree();
+    // errormessage if write error
+    if (err != Sirikata::JpegError::nil()) {
+        fprintf(stderr, "write error, possibly drive is full");
+        errorlevel.store(2);
+        return false;
+    }
+
+    // get filesize, if avail
+    if (ujg_out) {
+        ujgfilesize = ujg_out->getsize();
+    }
+
+    return true;
+}
+/*bool write_ujpg(std::vector<ThreadHandoff> row_thread_handoffs,
+                std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> >*jpeg_file_raw_bytes,
+                                struct_arith& arith, uint8_t* res
+)
+{
+    unsigned char ujpg_mrk[ 64 ];
+    bool has_lepton_entropy_coding = (ofiletype == LEPTON || filetype == LEPTON );
+    Sirikata::JpegError err = Sirikata::JpegError::nil();
+
+    err = ujg_out->Write( lepton_header, 2 ).second;
+
+    // store version number
+    ujpg_mrk[ 0 ] = ujgversion;
+    ujg_out->Write( ujpg_mrk, 1 );
+    Sirikata::MemReadWriter mrw((Sirikata::JpegAllocator<uint8_t>()));
+    //uint32_t framebuffer_byte_size = row_thread_handoffs.back().segment_size -
+row_thread_handoffs.front().segment_size;
+    //uint32_t num_rows = row_thread_handoffs.size();
+    NUM_THREADS = 1;//std::min(NUM_THREADS, (unsigned int)max_encode_threads);
+    std::vector<ThreadHandoff> selected_splits(NUM_THREADS);
+    std::vector<int> split_indices(NUM_THREADS);
+    split_indices[NUM_THREADS - 1] = row_thread_handoffs.size() - 1;//153, 432
+    size_t last_split_index = 0;
+    for (size_t i = 0; i < selected_splits.size(); ++i) {
+        size_t beginning_of_range = last_split_index;
+        size_t end_of_range = split_indices[i];
+        //fprintf(stderr, "Beginning %ld end %ld\n", beginning_of_range, end_of_range);
+        last_split_index = end_of_range;
+        always_assert( end_of_range < row_thread_handoffs.size() );
+        selected_splits[i] = row_thread_handoffs[ end_of_range ] - row_thread_handoffs[ beginning_of_range ];
+        if (i + 1 == selected_splits.size() && row_thread_handoffs[ end_of_range ].num_overhang_bits) {
+            ++selected_splits[i].segment_size; // need room for that last byte to hold the overhang byte
+        }
+
+    }
+
+
+    always_assert(start_byte||!selected_splits[0].luma_y_start);
+    // write header to file
+    // marker: "HDR" + [size of header]
+    unsigned char hdr_mrk[] = {'H', 'D', 'R'};
+    err = mrw.Write( hdr_mrk, sizeof(hdr_mrk) ).second;
+    uint32toLE(hdrs, ujpg_mrk);
+    err = mrw.Write( ujpg_mrk, 4).second;
+    // data: data from header
+    mrw.Write( hdrdata, hdrs );
+    // beginning here: recovery information (needed for exact JPEG recovery)
+
+    // marker: P0D"
+    unsigned char pad_mrk[] = {'P', '0', 'D'};
+    err = mrw.Write( pad_mrk, sizeof(pad_mrk) ).second;
+    // data: padbit
+    err = mrw.Write( (unsigned char*) &padbit, 1 ).second;
+
+    // write luma splits
+    unsigned char luma_mrk[1] = {'H'};
+    err = mrw.Write( luma_mrk, sizeof(luma_mrk) ).second;
+    // data: serialized luma splits
+    auto serialized_splits = ThreadHandoff::serialize(&selected_splits[0], selected_splits.size());
+    err = mrw.Write(&serialized_splits[0], serialized_splits.size()).second;
+
+    if (!rst_cnt.empty()) {
+        unsigned char frs_mrk[] = {'C', 'R', 'S'};
+        err = mrw.Write( frs_mrk, 3 ).second;
+        uint32toLE((uint32_t)rst_cnt.size(), ujpg_mrk);
+        err = mrw.Write( ujpg_mrk, 4).second;
+        for (size_t i = 0; i < rst_cnt.size(); ++i) {
+            uint32toLE((uint32_t)rst_cnt.at(i), ujpg_mrk);
+            err = mrw.Write( ujpg_mrk, 4).second;
+        }
+    }
+    // write number of false set RST markers per scan (if available) to file
+    if (!rst_err.empty()) {
+        // marker: "FRS" + [number of scans]
+        unsigned char frs_mrk[] = {'F', 'R', 'S'};
+        err = mrw.Write( frs_mrk, 3 ).second;
+        uint32toLE((uint32_t)rst_err.size(), ujpg_mrk);
+        err = mrw.Write( ujpg_mrk, 4).second;
+        // data: numbers of false set markers
+        err = mrw.Write( rst_err.data(), rst_err.size() ).second;
+    }
+    if (early_eof_encountered) {
+        unsigned char early_eof[] = {'E', 'E', 'E'};
+        err = mrw.Write( early_eof, sizeof(early_eof) ).second;
+        uint32toLE(max_cmp, ujpg_mrk);
+        uint32toLE(max_bpos, ujpg_mrk + 4);
+        uint32toLE(max_sah, ujpg_mrk + 8);
+        uint32toLE(max_dpos[0], ujpg_mrk + 12);
+        uint32toLE(max_dpos[1], ujpg_mrk + 16);
+        uint32toLE(max_dpos[2], ujpg_mrk + 20);
+        uint32toLE(max_dpos[3], ujpg_mrk + 24);
+        err = mrw.Write(ujpg_mrk, 28).second;
+    }
+    // write garbage (data including and after EOI) (if any) to file
+    if ( prefix_grbs > 0 || prefix_grbgdata != NULL) {
+        // marker: "GRB" + [size of garbage]
+        unsigned char grb_mrk[] = {'P', 'G', 'R'};
+        err = mrw.Write( grb_mrk, sizeof(grb_mrk) ).second;
+        uint32toLE(prefix_grbs, ujpg_mrk);
+        err = mrw.Write( ujpg_mrk, 4 ).second;
+        // data: garbage data
+        err = mrw.Write( prefix_grbgdata, prefix_grbs ).second;
+    }
+    // write garbage (data including and after EOI) (if any) to file
+    if ( grbs > 0 ) {
+        // marker: "GRB" + [size of garbage]
+        unsigned char grb_mrk[] = {'G', 'R', 'B'};
+        err = mrw.Write( grb_mrk, sizeof(grb_mrk) ).second;
+        uint32toLE(grbs, ujpg_mrk);
+        err = mrw.Write( ujpg_mrk, 4 ).second;
+        // data: garbage data
+        err = mrw.Write( grbgdata, grbs ).second;
+    }
+    if (mrw.buffer().size() > 1024 * 1024) {
+        //custom_exit(ExitCode::HEADER_TOO_LARGE);
+    }
+    std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> > compressed_header;
+
+    compressed_header = Sirikata::ZlibDecoderCompressionWriter::Compress(mrw.buffer().data(),
+                                                             mrw.buffer().size(),
+                                                             Sirikata::JpegAllocator<uint8_t>());
+
+    write_byte_bill(Billing::HEADER, false, 2 + hdrs + prefix_grbs + grbs);
+    static_assert(MAX_NUM_THREADS <= 255, "We only have a single byte for num threads");
+    always_assert(NUM_THREADS <= 255);
+    unsigned char zed[] = {'\0'};
+    if (start_byte != 0) {
+        zed[0] = (unsigned char)'Y';
+    } else if (g_allow_progressive) {
+        zed[0] = (unsigned char)'X';
+    } else {
+        zed[0] = (unsigned char)'Z';
+    }
+    err =  ujg_out->Write(zed, sizeof(zed)).second;
+    unsigned char num_threads[] = {(unsigned char)NUM_THREADS};
+    err =  ujg_out->Write(num_threads, sizeof(num_threads)).second;
+    unsigned char zero3[3] = {};
+    err =  ujg_out->Write(zero3, sizeof(zero3)).second;
+    unsigned char git_revision[12] = {0}; // we only have 12 chars in the header for this
+    hex_to_bin(git_revision, GIT_REVISION, sizeof(git_revision));
+    err = ujg_out->Write(git_revision, sizeof(git_revision) ).second;
+    uint32toLE(jpgfilesize - start_byte, ujpg_mrk);
+    err = ujg_out->Write( ujpg_mrk, 4).second;
+    write_byte_bill(Billing::HEADER, true, 24);
+    uint32toLE((uint32_t)compressed_header.size(), ujpg_mrk);
+    err = ujg_out->Write( ujpg_mrk, 4).second;
+    write_byte_bill(Billing::HEADER, true, 4);
+    auto err2 = ujg_out->Write(compressed_header.data(),
+                               compressed_header.size());
+    write_byte_bill(Billing::HEADER, true, compressed_header.size());
+    zlib_hdrs = compressed_header.size();
+    if (err != Sirikata::JpegError::nil() || err2.second != Sirikata::JpegError::nil()) {
+        fprintf( stderr, "write error, possibly drive is full" );
+        errorlevel.store(2);
+        return false;
+    }
+    unsigned char cmp_mrk[] = {'C', 'M', 'P'};
+    err = ujg_out->Write( cmp_mrk, sizeof(cmp_mrk) ).second;
+    write_byte_bill(Billing::HEADER, true, 3);
+        clock_t begin = 0, end = 1;
+        begin = clock();
+
+    g_loops.START("LOOP:write_ujpg,while,", WHILE);;//g_loops.END();
+    VP8ComponentEncoder* pEnc = (VP8ComponentEncoder*)(&g_encoder);
+    while (pEnc->vp8_full_encoder(&colldata, ujg_out,
+                                   &selected_splits[0], selected_splits.size(),
+                                                                   arith, res) == CODING_PARTIAL) {
+        g_loops.CNT();
+    }
+    g_loops.END();
+
+    end = clock();
+    printf("%d, %d, %d \n", begin, end, end-begin);
+
+
+    g_loops.PrintTree();
+    // errormessage if write error
+    if ( err != Sirikata::JpegError::nil() ) {
+        fprintf( stderr, "write error, possibly drive is full" );
+        errorlevel.store(2);
+        return false;
+    }
+
+    // get filesize, if avail
+    if (ujg_out) {
+        ujgfilesize = ujg_out->getsize();
+    }
+
+
+    return true;
+}
+*/
+/*bool write_ujpg_org(std::vector<ThreadHandoff> row_thread_handoffs,
+                std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> >*jpeg_file_raw_bytes)
+{
+    unsigned char ujpg_mrk[ 64 ];
+    bool has_lepton_entropy_coding = (ofiletype == LEPTON || filetype == LEPTON );
+    Sirikata::JpegError err = Sirikata::JpegError::nil();
+
+    if (!has_lepton_entropy_coding) {
+        // UJG-Header
+        err = ujg_out->Write( ujg_header, 2 ).second;
+    } else {
+        // lepton-Header
+        err = ujg_out->Write( lepton_header, 2 ).second;
+    }
+    // store version number
+    ujpg_mrk[ 0 ] = ujgversion;
+    ujg_out->Write( ujpg_mrk, 1 );
+
+    // discard meta information from header if needed
+    if ( disc_meta )
+        if ( !rebuild_header_jpg() )
+            return false;
+    if (start_byte) {
+        std::vector<ThreadHandoff> local_row_thread_handoffs;
+        for (std::vector<ThreadHandoff>::iterator i = row_thread_handoffs.begin(),
+                 ie = row_thread_handoffs.end(); i != ie; ++i) {
+            auto j = i;
+            ++j;
+            if ((j == ie || i->segment_size >= start_byte)
+                && (max_file_size == 0 || i->segment_size <= max_file_size + start_byte)) {
+                local_row_thread_handoffs.push_back(*i);
+                //fprintf(stderr, "OK: %d (%d %d)\n", i->segment_size, i->luma_y_start, i->luma_y_end);
+            } else {
+                //fprintf(stderr, "XX: %d (%d %d)\n", i->segment_size, i->luma_y_start, i->luma_y_end);
+            }
+        }
+        row_thread_handoffs.swap(local_row_thread_handoffs);
+    }
+    if (start_byte) {
+        always_assert(jpeg_file_raw_bytes);
+    }
+    if (start_byte && jpeg_file_raw_bytes && !row_thread_handoffs.empty()) {
+        if (row_thread_handoffs[0].segment_size >= start_byte) {
+            prefix_grbs = row_thread_handoffs[0].segment_size - start_byte;
+            if (row_thread_handoffs.size() > 1) {
+                if (prefix_grbs) {
+                    --prefix_grbs; //FIXME why is this ?!
+                }
+            }
+        } else {
+            prefix_grbs = 0;
+            custom_exit(ExitCode::ONLY_GARBAGE_NO_JPEG);
+        }
+        if (prefix_grbs > 0) {
+            prefix_grbgdata = aligned_alloc(prefix_grbs);
+            always_assert(jpeg_file_raw_bytes->size() >= (size_t)prefix_grbs + start_byte);
+            memcpy(prefix_grbgdata,
+                   &(*jpeg_file_raw_bytes)[start_byte],
+                   std::min((size_t)prefix_grbs,
+                            jpeg_file_raw_bytes->size() - start_byte));
+        } else {
+            prefix_grbgdata = aligned_alloc(1); // so it's nonnull
+        }
+    }
+    Sirikata::MemReadWriter mrw((Sirikata::JpegAllocator<uint8_t>()));
+#if 0
+    for (uint32_t i = 0; i < row_thread_handoffs.size() ; ++ i) {
+        fprintf(stderr,
+                "Row [%d - %d], %d size %d overhang byte %d num overhang bits %d  dc %d %d %d\n",
+                (int)row_thread_handoffs[i].luma_y_start,
+                (int)row_thread_handoffs[i].luma_y_end,
+                (int)i,
+                (int)row_thread_handoffs[i].segment_size,
+                (int)row_thread_handoffs[i].overhang_byte,
+                (int)row_thread_handoffs[i].num_overhang_bits,
+                (int)row_thread_handoffs[i].last_dc[0],
+                (int)row_thread_handoffs[i].last_dc[1],
+                (int)row_thread_handoffs[i].last_dc[2]);
+    }
+#endif
+    uint32_t framebuffer_byte_size = row_thread_handoffs.back().segment_size - row_thread_handoffs.front().segment_size;
+    uint32_t num_rows = row_thread_handoffs.size();
+    NUM_THREADS = std::min(NUM_THREADS, (unsigned int)max_encode_threads);
+    if (num_rows / 2 < NUM_THREADS) {
+        NUM_THREADS = std::max(num_rows / 2, 1U);
+    }
+    if (framebuffer_byte_size < 125000) {
+        NUM_THREADS = 1;
+    } else if (framebuffer_byte_size < 250000) {
+        NUM_THREADS = std::min(2U, (unsigned int)NUM_THREADS);
+    } else if (framebuffer_byte_size < 500000) {
+        NUM_THREADS = std::min(4U, (unsigned int)NUM_THREADS);
+    }
+    //fprintf(stderr, "Byte size %d num_rows %d Using num threads %u\n", framebuffer_byte_size, num_rows, NUM_THREADS);
+    std::vector<ThreadHandoff> selected_splits(NUM_THREADS);
+    std::vector<int> split_indices(NUM_THREADS);
+    for (uint32_t i = 0; i < NUM_THREADS - 1 ; ++ i) {
+        ThreadHandoff desired_handoff = row_thread_handoffs.back();
+        if(max_file_size && max_file_size + start_byte < desired_handoff.segment_size) {
+            desired_handoff.segment_size += row_thread_handoffs.front().segment_size;
+        }
+        desired_handoff.segment_size -= row_thread_handoffs.front().segment_size;
+
+        desired_handoff.segment_size *= (i + 1);
+        desired_handoff.segment_size /= NUM_THREADS;
+        desired_handoff.segment_size += row_thread_handoffs.front().segment_size;
+        auto split = std::lower_bound(row_thread_handoffs.begin() + 1, row_thread_handoffs.end(),
+                                      desired_handoff,
+                                      ThreadHandoffSegmentCompare());
+        if (split == row_thread_handoffs.begin() && split != row_thread_handoffs.end()) {
+            //++split;
+        } else if (split != row_thread_handoffs.begin() + 1) {
+            --split;
+        }
+        split_indices[i] = split - row_thread_handoffs.begin();
+    }
+    for (uint32_t index = 0; index < NUM_THREADS - 1 ; ++ index) {
+        if (split_indices[index] == split_indices[index + 1]) {
+            for (uint32_t i = 0; i < NUM_THREADS - 1 ; ++ i) {
+                split_indices[i] = (i + 1) * row_thread_handoffs.size() / NUM_THREADS;
+            }
+            break;
+        }
+    }
+    split_indices[NUM_THREADS - 1] = row_thread_handoffs.size() - 1;
+    size_t last_split_index = 0;
+    for (size_t i = 0; i < selected_splits.size(); ++i) {
+        size_t beginning_of_range = last_split_index;
+        size_t end_of_range = split_indices[i];
+        //fprintf(stderr, "Beginning %ld end %ld\n", beginning_of_range, end_of_range);
+        last_split_index = end_of_range;
+        always_assert( end_of_range < row_thread_handoffs.size() );
+        selected_splits[i] = row_thread_handoffs[ end_of_range ] - row_thread_handoffs[ beginning_of_range ];
+        if (i + 1 == selected_splits.size() && row_thread_handoffs[ end_of_range ].num_overhang_bits) {
+            ++selected_splits[i].segment_size; // need room for that last byte to hold the overhang byte
+        }
+#if 0
+        fprintf(stderr, "%d->%d) %d - %d {%ld}\n", selected_splits[i].luma_y_start,
+                selected_splits[i].luma_y_end,
+                row_thread_handoffs[ beginning_of_range ].segment_size,
+                row_thread_handoffs[ end_of_range ].segment_size, row_thread_handoffs.size());
+#endif*/
+/*
+        if (i + 1 == selected_splits.size()) {
+            int tmp = selected_splits[i].segment_size;
+            selected_splits[i].segment_size = jpgfilesize - row_thread_handoffs[ beginning_of_range ].segment_size;
+            fprintf(stderr, "Split size was %x and is %x - %x = %x\n", tmp, jpgfilesize, row_thread_handoffs[ beginning_of_range ].segment_size, selected_splits[i].segment_size);
+        }
+*/ /*
+     }
+ #if 0
+     for (uint32_t i = 0; i < selected_splits.size() ; ++ i) {
+         fprintf(stderr,
+                 "Row [%d - %d] %d size %d overhang byte %d num overhang bits %d  dc %d %d %d\n",
+                 (int)selected_splits[i].luma_y_start,
+                 (int)selected_splits[i].luma_y_end,
+
+                 (int)i,
+                 (int)selected_splits[i].segment_size,
+                 (int)selected_splits[i].overhang_byte,
+                 (int)selected_splits[i].num_overhang_bits,
+                 (int)selected_splits[i].last_dc[0],
+                 (int)selected_splits[i].last_dc[1],
+                 (int)selected_splits[i].last_dc[2]);
+     }
+ #endif
+
+     always_assert(start_byte||!selected_splits[0].luma_y_start);
+     // write header to file
+     // marker: "HDR" + [size of header]
+     unsigned char hdr_mrk[] = {'H', 'D', 'R'};
+     err = mrw.Write( hdr_mrk, sizeof(hdr_mrk) ).second;
+     uint32toLE(hdrs, ujpg_mrk);
+     err = mrw.Write( ujpg_mrk, 4).second;
+     // data: data from header
+     mrw.Write( hdrdata, hdrs );
+     // beginning here: recovery information (needed for exact JPEG recovery)
+
+     // marker: P0D"
+     unsigned char pad_mrk[] = {'P', '0', 'D'};
+     err = mrw.Write( pad_mrk, sizeof(pad_mrk) ).second;
+     // data: padbit
+     err = mrw.Write( (unsigned char*) &padbit, 1 ).second;
+
+     // write luma splits
+     unsigned char luma_mrk[1] = {'H'};
+     err = mrw.Write( luma_mrk, sizeof(luma_mrk) ).second;
+     // data: serialized luma splits
+     auto serialized_splits = ThreadHandoff::serialize(&selected_splits[0], selected_splits.size());
+     err = mrw.Write(&serialized_splits[0], serialized_splits.size()).second;
+
+     if (!rst_cnt.empty()) {
+         unsigned char frs_mrk[] = {'C', 'R', 'S'};
+         err = mrw.Write( frs_mrk, 3 ).second;
+         uint32toLE((uint32_t)rst_cnt.size(), ujpg_mrk);
+         err = mrw.Write( ujpg_mrk, 4).second;
+         for (size_t i = 0; i < rst_cnt.size(); ++i) {
+             uint32toLE((uint32_t)rst_cnt.at(i), ujpg_mrk);
+             err = mrw.Write( ujpg_mrk, 4).second;
+         }
+     }
+     // write number of false set RST markers per scan (if available) to file
+     if (!rst_err.empty()) {
+         // marker: "FRS" + [number of scans]
+         unsigned char frs_mrk[] = {'F', 'R', 'S'};
+         err = mrw.Write( frs_mrk, 3 ).second;
+         uint32toLE((uint32_t)rst_err.size(), ujpg_mrk);
+         err = mrw.Write( ujpg_mrk, 4).second;
+         // data: numbers of false set markers
+         err = mrw.Write( rst_err.data(), rst_err.size() ).second;
+     }
+     if (early_eof_encountered) {
+         unsigned char early_eof[] = {'E', 'E', 'E'};
+         err = mrw.Write( early_eof, sizeof(early_eof) ).second;
+         uint32toLE(max_cmp, ujpg_mrk);
+         uint32toLE(max_bpos, ujpg_mrk + 4);
+         uint32toLE(max_sah, ujpg_mrk + 8);
+         uint32toLE(max_dpos[0], ujpg_mrk + 12);
+         uint32toLE(max_dpos[1], ujpg_mrk + 16);
+         uint32toLE(max_dpos[2], ujpg_mrk + 20);
+         uint32toLE(max_dpos[3], ujpg_mrk + 24);
+         err = mrw.Write(ujpg_mrk, 28).second;
+     }
+     // write garbage (data including and after EOI) (if any) to file
+     if ( prefix_grbs > 0 || prefix_grbgdata != NULL) {
+         // marker: "GRB" + [size of garbage]
+         unsigned char grb_mrk[] = {'P', 'G', 'R'};
+         err = mrw.Write( grb_mrk, sizeof(grb_mrk) ).second;
+         uint32toLE(prefix_grbs, ujpg_mrk);
+         err = mrw.Write( ujpg_mrk, 4 ).second;
+         // data: garbage data
+         err = mrw.Write( prefix_grbgdata, prefix_grbs ).second;
+     }
+     // write garbage (data including and after EOI) (if any) to file
+     if ( grbs > 0 ) {
+         // marker: "GRB" + [size of garbage]
+         unsigned char grb_mrk[] = {'G', 'R', 'B'};
+         err = mrw.Write( grb_mrk, sizeof(grb_mrk) ).second;
+         uint32toLE(grbs, ujpg_mrk);
+         err = mrw.Write( ujpg_mrk, 4 ).second;
+         // data: garbage data
+         err = mrw.Write( grbgdata, grbs ).second;
+     }
+     if (mrw.buffer().size() > 1024 * 1024) {
+         //custom_exit(ExitCode::HEADER_TOO_LARGE);
+     }
+     std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> > compressed_header;
+     compressed_header =
+             Sirikata::ZlibDecoderCompressionWriter::Compress(mrw.buffer().data(),
+                                                              mrw.buffer().size(),
+                                                              Sirikata::JpegAllocator<uint8_t>());
+
+     write_byte_bill(Billing::HEADER, false, 2 + hdrs + prefix_grbs + grbs);
+     static_assert(MAX_NUM_THREADS <= 255, "We only have a single byte for num threads");
+     always_assert(NUM_THREADS <= 255);
+     unsigned char zed[] = {'\0'};
+     if (start_byte != 0) {
+         zed[0] = (unsigned char)'Y';
+     } else if (g_allow_progressive) {
+         zed[0] = (unsigned char)'X';
+     } else {
+         zed[0] = (unsigned char)'Z';
+     }
+     err =  ujg_out->Write(zed, sizeof(zed)).second;
+     unsigned char num_threads[] = {(unsigned char)NUM_THREADS};
+     err =  ujg_out->Write(num_threads, sizeof(num_threads)).second;
+     unsigned char zero3[3] = {};
+     err =  ujg_out->Write(zero3, sizeof(zero3)).second;
+     unsigned char git_revision[12] = {0}; // we only have 12 chars in the header for this
+     hex_to_bin(git_revision, GIT_REVISION, sizeof(git_revision));
+     err = ujg_out->Write(git_revision, sizeof(git_revision) ).second;
+     uint32toLE(jpgfilesize - start_byte, ujpg_mrk);
+     err = ujg_out->Write( ujpg_mrk, 4).second;
+     write_byte_bill(Billing::HEADER, true, 24);
+     uint32toLE((uint32_t)compressed_header.size(), ujpg_mrk);
+     err = ujg_out->Write( ujpg_mrk, 4).second;
+     write_byte_bill(Billing::HEADER, true, 4);
+     auto err2 = ujg_out->Write(compressed_header.data(),
+                                compressed_header.size());
+     write_byte_bill(Billing::HEADER, true, compressed_header.size());
+     zlib_hdrs = compressed_header.size();
+     if (err != Sirikata::JpegError::nil() || err2.second != Sirikata::JpegError::nil()) {
+         fprintf( stderr, "write error, possibly drive is full" );
+         errorlevel.store(2);
+         return false;
+     }
+     unsigned char cmp_mrk[] = {'C', 'M', 'P'};
+     err = ujg_out->Write( cmp_mrk, sizeof(cmp_mrk) ).second;
+     write_byte_bill(Billing::HEADER, true, 3);
+         clock_t begin = 0, end = 1;
+         begin = clock();
+
+     g_loops.START("LOOP:write_ujpg,while,", WHILE);;//g_loops.END();
+     while (g_encoder->encode_chunk(&colldata, ujg_out,
+                                    &selected_splits[0], selected_splits.size()) == CODING_PARTIAL) {
+         g_loops.CNT();
+     }
+     g_loops.END();
+
+     end = clock();
+     printf("%d, %d, %d \n", begin, end, end-begin);
+
+
+     g_loops.PrintTree();
+     // errormessage if write error
+     if ( err != Sirikata::JpegError::nil() ) {
+         fprintf( stderr, "write error, possibly drive is full" );
+         errorlevel.store(2);
+         return false;
+     }
+
+     // get filesize, if avail
+     if (ujg_out) {
+         ujgfilesize = ujg_out->getsize();
+     }
+
+
+     return true;
+ }*/
+/* -----------------------------------------------
+    read uncompressed JPEG file
+    ----------------------------------------------- */
+namespace {
+void mem_nop(void* opaque, void* ptr) {}
+void* mem_init_nop(size_t prealloc_size, uint8_t align) {
+    return NULL;
+}
+void* mem_realloc_nop(void* ptr, size_t size, size_t* actualSize, unsigned int movable, void* opaque) {
+    return NULL;
+}
+}
+bool read_ujpg(void) {
+    using namespace IOUtil;
+    using namespace Sirikata;
+    //    colldata.start_decoder_worker_thread(std::bind(&simple_decoder, &colldata, str_in));
+    unsigned char ujpg_mrk[64];
+    // this is where we will enable seccomp, before reading user data
+    write_byte_bill(Billing::HEADER, true, 24); // for the fixed header
+
+    str_out->call_size_callback(max_file_size);
+    uint32_t compressed_header_size = 0;
+    if (ReadFull(str_in, ujpg_mrk, 4) != 4) {
+        custom_exit(ExitCode::SHORT_READ);
+    }
+    write_byte_bill(Billing::HEADER, true, 4);
+
+    compressed_header_size = LEtoUint32(ujpg_mrk);
+    if (compressed_header_size > 128 * 1024 * 1024 || max_file_size > 128 * 1024 * 1024) {
+        always_assert(false && "Only support images < 128 megs");
+        return false; // bool too big
+    }
+    std::vector<uint8_t, JpegAllocator<uint8_t> > compressed_header_buffer(compressed_header_size);
+    IOUtil::ReadFull(str_in, compressed_header_buffer.data(), compressed_header_buffer.size());
+    MemReadWriter header_reader((JpegAllocator<uint8_t>()));
+    {
+        JpegAllocator<uint8_t> no_free_allocator;
+#ifndef _WIN32
+        no_free_allocator.setup_memory_subsystem(32 * 1024 * 1024, 16, &mem_init_nop, &MemMgrAllocatorMalloc, &mem_nop,
+                                                 &mem_realloc_nop, &MemMgrAllocatorMsize);
+#endif
+        std::pair<std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> >, JpegError> uncompressed_header_buffer(
+            ZlibDecoderDecompressionReader::Decompress(compressed_header_buffer.data(), compressed_header_buffer.size(),
+                                                       no_free_allocator));
+        if (uncompressed_header_buffer.second) {
+            always_assert(false && "Data not properly zlib coded");
+            return false;
+        }
+        zlib_hdrs = compressed_header_buffer.size();
+        header_reader.SwapIn(uncompressed_header_buffer.first, 0);
+    }
+    grbs = sizeof(EOI);
+    grbgdata = EOI; // if we don't have any garbage, assume FFD9 EOI
+    // read header from file
+    ReadFull(&header_reader, ujpg_mrk, 3);
+    // check marker
+    if (memcmp(ujpg_mrk, "HDR", 3) == 0) {
+        // read size of header, alloc memory
+        ReadFull(&header_reader, ujpg_mrk, 4);
+        hdrs = LEtoUint32(ujpg_mrk);
+        hdrdata = (unsigned char*)aligned_alloc(hdrs);
+        if (hdrdata == NULL) {
+            fprintf(stderr, MEM_ERRMSG);
+            errorlevel.store(2);
+            return false;
+        }
+        // read hdrdata
+        ReadFull(&header_reader, hdrdata, hdrs);
+    } else {
+        fprintf(stderr, "HDR marker not found");
+        errorlevel.store(2);
+        return false;
+    }
+    bool memory_optimized_image = (filetype != UJG) && !g_allow_progressive;
+    // parse header for image-info
+    if (!setup_imginfo_jpg(memory_optimized_image)) return false;
+
+    // beginning here: recovery information (needed for exact JPEG recovery)
+
+    // read padbit information from file
+    ReadFull(&header_reader, ujpg_mrk, 3);
+    // check marker
+    if (memcmp(ujpg_mrk, "P0D", 3) == 0) {
+        // This is a more nuanced pad byte that can have different values per bit
+        header_reader.Read(reinterpret_cast<unsigned char*>(&padbit), 1);
+    } else if (memcmp(ujpg_mrk, "PAD", 3) == 0) {
+        // this is a single pad bit that is implied to have all the same values
+        header_reader.Read(reinterpret_cast<unsigned char*>(&padbit), 1);
+        if (!(padbit == 0 || padbit == 1 || padbit == -1)) {
+            while (write(2, "Legacy Padbit must be 0, 1 or -1\n", strlen("Legacy Padbit must be 0, 1 or -1\n")) < 0 &&
+                   errno == EINTR) {
+            }
+            custom_exit(ExitCode::STREAM_INCONSISTENT);
+        }
+        if (padbit == 1) {
+            padbit = 0x7f; // all 6 bits set
+        }
+    } else {
+        fprintf(stderr, "PAD marker not found");
+        errorlevel.store(2);
+        return false;
+    }
+    std::vector<ThreadHandoff> thread_handoff;
+    // read further recovery information if any
+    while (ReadFull(&header_reader, ujpg_mrk, 3) == 3) {
+        // check marker
+        if (memcmp(ujpg_mrk, "CRS", 3) == 0) {
+            rst_cnt_set = true;
+            ReadFull(&header_reader, ujpg_mrk, 4);
+            rst_cnt.resize(LEtoUint32(ujpg_mrk));
+            for (size_t i = 0; i < rst_cnt.size(); ++i) {
+                ReadFull(&header_reader, ujpg_mrk, 4);
+                rst_cnt.at(i) = LEtoUint32(ujpg_mrk);
+            }
+        } else if (memcmp(ujpg_mrk, "HHX", 2) == 0) { // only look at first two bytes
+            size_t to_alloc = ThreadHandoff::get_remaining_data_size_from_two_bytes(ujpg_mrk + 1) + 2;
+            if (to_alloc) {
+                std::vector<unsigned char> data(to_alloc);
+                data[0] = ujpg_mrk[1];
+                data[1] = ujpg_mrk[2];
+                ReadFull(&header_reader, &data[2], to_alloc - 2);
+                thread_handoff = ThreadHandoff::deserialize(&data[0], to_alloc);
+            }
+        } else if (memcmp(ujpg_mrk, "FRS", 3) == 0) {
+            // read number of false set RST markers per scan from file
+            ReadFull(&header_reader, ujpg_mrk, 4);
+            scnc = LEtoUint32(ujpg_mrk);
+
+            rst_err.insert(rst_err.end(), scnc - rst_err.size(), 0);
+            // read data
+            ReadFull(&header_reader, rst_err.data(), scnc);
+        } else if (memcmp(ujpg_mrk, "GRB", 3) == 0) {
+            // read garbage (data after end of JPG) from file
+            ReadFull(&header_reader, ujpg_mrk, 4);
+            grbs = LEtoUint32(ujpg_mrk);
+            grbgdata = aligned_alloc(grbs);
+            if (grbgdata == NULL) {
+                fprintf(stderr, MEM_ERRMSG);
+                errorlevel.store(2);
+                return false;
+            }
+            // read garbage data
+            ReadFull(&header_reader, grbgdata, grbs);
+        } else if (memcmp(ujpg_mrk, "PGR", 3) == 0) {
+            // read prefix garbage (data before beginning of JPG) from file
+            ReadFull(&header_reader, ujpg_mrk, 4);
+            prefix_grbs = LEtoUint32(ujpg_mrk);
+            prefix_grbgdata = aligned_alloc(prefix_grbs);
+            if (prefix_grbgdata == NULL) {
+                fprintf(stderr, MEM_ERRMSG);
+                errorlevel.store(2);
+                return false;
+            }
+            // read garbage data
+            ReadFull(&header_reader, prefix_grbgdata, prefix_grbs);
+        } else if (memcmp(ujpg_mrk, "SIZ", 3) == 0) {
+            // full size of the original file
+            ReadFull(&header_reader, ujpg_mrk, 4);
+            max_file_size = LEtoUint32(ujpg_mrk);
+        } else if (memcmp(ujpg_mrk, "EEE", 3) == 0) {
+            ReadFull(&header_reader, ujpg_mrk, 28);
+            max_cmp = LEtoUint32(ujpg_mrk);
+            max_bpos = LEtoUint32(ujpg_mrk + 4);
+            max_sah = LEtoUint32(ujpg_mrk + 8);
+            max_dpos[0] = LEtoUint32(ujpg_mrk + 12);
+            max_dpos[1] = LEtoUint32(ujpg_mrk + 16);
+            max_dpos[2] = LEtoUint32(ujpg_mrk + 20);
+            max_dpos[3] = LEtoUint32(ujpg_mrk + 24);
+            early_eof_encountered = true;
+            colldata.set_truncation_bounds(max_cmp, max_bpos, max_dpos, max_sah);
+        } else {
+            if (memcmp(ujpg_mrk, "CMP", 3) == 0) {
+                break;
+            } else {
+                fprintf(stderr, "unknown data found");
+                errorlevel.store(2);
+            }
+            return false;
+        }
+    }
+    write_byte_bill(Billing::HEADER, false, 2 + hdrs + prefix_grbs + grbs);
+    write_byte_bill(Billing::HEADER, true, compressed_header_buffer.size());
+
+    ReadFull(str_in, ujpg_mrk, 3);
+    write_byte_bill(Billing::HEADER, true, 3);
+
+    write_byte_bill(Billing::DELIMITERS, true, 4 * NUM_THREADS); // trailing vpx_encode bits
+    write_byte_bill(Billing::HEADER, true, 4);                   // trailing size
+
+    if (memcmp(ujpg_mrk, "CMP", 3) != 0) {
+        always_assert(false && "CMP must be present (uncompressed) in the file");
+        return false; // not a JPG
+    }
+    colldata.signal_worker_should_begin();
+    g_decoder->initialize(str_in, thread_handoff);
+    colldata.start_decoder(g_decoder);
+    return true;
+}
+
+/* -----------------------------------------------
+    set each variable to its initial value
+    ----------------------------------------------- */
+
+bool reset_buffers(void) {
+    int cmp, bpos;
+    int i;
+
+    // -- free buffers --
+
+    // free buffers & set pointers NULL
+    if (hdrdata != NULL) aligned_dealloc(hdrdata);
+    if (huffdata != NULL) aligned_dealloc(huffdata);
+    if (grbgdata != NULL && grbgdata != EOI) aligned_dealloc(grbgdata);
+    rst_err.clear();
+    rstp.resize(0);
+    scnp.resize(0);
+    hdrdata = NULL;
+    huffdata = NULL;
+    grbgdata = NULL;
+
+    // free image arrays
+    colldata.reset();
+
+    // -- set variables --
+
+    // preset componentinfo
+    for (cmp = 0; cmp < 4; cmp++) {
+        cmpnfo[cmp].sfv = -1;
+        cmpnfo[cmp].sfh = -1;
+        cmpnfo[cmp].mbs = -1;
+        cmpnfo[cmp].bcv = -1;
+        cmpnfo[cmp].bch = -1;
+        cmpnfo[cmp].bc = -1;
+        cmpnfo[cmp].ncv = -1;
+        cmpnfo[cmp].nch = -1;
+        cmpnfo[cmp].nc = -1;
+        cmpnfo[cmp].sid = -1;
+        cmpnfo[cmp].jid = -1;
+        cmpnfo[cmp].qtable = NULL;
+        cmpnfo[cmp].huffdc = -1;
+        cmpnfo[cmp].huffac = -1;
+    }
+
+    // preset imgwidth / imgheight / component count
+    imgwidth = 0;
+    imgheight = 0;
+    cmpc = 0;
+
+    // preset mcu info variables / restart interval
+    sfhm = 0;
+    sfvm = 0;
+    mcuc = 0;
+    mcuh = 0;
+    mcuv = 0;
+    rsti = 0;
+    max_file_size = 0; // this file isn't truncated
+    // reset quantization / huffman tables
+    for (i = 0; i < 4; i++) {
+        htset[0][i] = 0;
+        htset[1][i] = 0;
+        for (bpos = 0; bpos < 64; bpos++) qtables[i][bpos] = 0;
+    }
+
+    // preset jpegtype
+    jpegtype = 0;
+
+    // reset padbit
+    padbit = -1;
+
+    return true;
+}
+
+/* ----------------------- End of main functions -------------------------- */
+
+/* ----------------------- Begin of JPEG specific functions -------------------------- */
+
+/* -----------------------------------------------
+    Parses header for imageinfo
+    ----------------------------------------------- */
+bool setup_imginfo_jpg(bool only_allocate_two_image_rows) {
+    unsigned char type = 0x00; // type of current marker segment
+    unsigned int len = 0;      // length of current marker segment
+    unsigned int hpos = 0;     // position in header
+
+    int cmp;
+
+    // header parser loop
+    while ((int)hpos < hdrs) {
+        type = hdrdata[hpos + 1];
+        len = 2 + B_SHORT(hdrdata[hpos + 2], hdrdata[hpos + 3]);
+        // do not parse DHT & DRI
+        if ((type != 0xDA) && (type != 0xC4) && (type != 0xDD)) {
+            if (!parse_jfif_jpg(type, len, &(hdrdata[hpos]))) return false;
+        }
+        hpos += len;
+    }
+
+    // check if information is complete
+    if (cmpc == 0) {
+        fprintf(stderr, "header contains incomplete information");
+        errorlevel.store(2);
+        return false;
+    }
+    for (cmp = 0; cmp < cmpc; cmp++) {
+        if ((cmpnfo[cmp].sfv == 0) || (cmpnfo[cmp].sfh == 0) || (cmpnfo[cmp].qtable == NULL) ||
+            (cmpnfo[cmp].qtable[0] == 0) || (jpegtype == 0)) {
+            fprintf(stderr, "header information is incomplete");
+            errorlevel.store(2);
+            return false;
+        }
+    }
+
+    // do all remaining component info calculations
+    for (cmp = 0; cmp < cmpc; cmp++) {
+        if (cmpnfo[cmp].sfh > sfhm) sfhm = cmpnfo[cmp].sfh;
+        if (cmpnfo[cmp].sfv > sfvm) sfvm = cmpnfo[cmp].sfv;
+    }
+    mcuv = (int)ceil((float)imgheight / (float)(8 * sfhm));
+    mcuh = (int)ceil((float)imgwidth / (float)(8 * sfvm));
+    mcuc = mcuv * mcuh;
+    int maxChromaWidth = 0;
+    int maxChromaHeight = 0;
+    int maxLumaWidth = 0;
+    int maxLumaHeight = 0;
+    for (cmp = 0; cmp < cmpc; cmp++) {
+        cmpnfo[cmp].mbs = cmpnfo[cmp].sfv * cmpnfo[cmp].sfh;
+        cmpnfo[cmp].bcv = mcuv * cmpnfo[cmp].sfh;
+        cmpnfo[cmp].bch = mcuh * cmpnfo[cmp].sfv;
+        cmpnfo[cmp].bc = cmpnfo[cmp].bcv * cmpnfo[cmp].bch;
+        cmpnfo[cmp].ncv = (int)ceil((float)imgheight * ((float)cmpnfo[cmp].sfh / (8.0 * sfhm)));
+        cmpnfo[cmp].nch = (int)ceil((float)imgwidth * ((float)cmpnfo[cmp].sfv / (8.0 * sfvm)));
+        cmpnfo[cmp].nc = cmpnfo[cmp].ncv * cmpnfo[cmp].nch;
+        if (cmp == 0) {
+            maxLumaWidth = cmpnfo[cmp].bch * 8;
+            maxLumaHeight = cmpnfo[cmp].bcv * 8;
+        } else {
+            if (maxChromaWidth < cmpnfo[cmp].bch * 8) {
+                maxChromaWidth = cmpnfo[cmp].bch * 8;
+            }
+            if (maxChromaHeight < cmpnfo[cmp].bcv * 8) {
+                maxChromaHeight = cmpnfo[cmp].bcv * 8;
+            }
+        }
+    }
+    LeptonDebug::setupDebugData(maxLumaWidth, maxLumaHeight, maxChromaWidth, maxChromaHeight);
+
+    // decide components' statistical ids
+    if (cmpc <= 3) {
+        for (cmp = 0; cmp < cmpc; cmp++) cmpnfo[cmp].sid = cmp;
+    } else {
+        for (cmp = 0; cmp < cmpc; cmp++) cmpnfo[cmp].sid = 0;
+    }
+    size_t start_allocated = Sirikata::memmgr_size_allocated();
+    // alloc memory for further operations
+    colldata.init(cmpnfo, cmpc, mcuh, mcuv, jpegtype == 1 && only_allocate_two_image_rows);
+    size_t end_allocated = Sirikata::memmgr_size_allocated();
+    total_framebuffer_allocated = end_allocated - start_allocated;
+    return true;
+}
+
+/* -----------------------------------------------
+    Parse routines for JFIF segments
+    ----------------------------------------------- */
+bool parse_jfif_jpg(unsigned char type, unsigned int len, unsigned char* segment) {
+    unsigned int hpos = 4; // current position in segment, start after segment header
+    int lval, rval;        // temporary variables
+    int skip;
+    int cmp;
+    int i;
+
+    switch (type) {
+        case 0xC4: // DHT segment
+            // build huffman trees & codes
+            while (hpos < len) {
+                lval = LBITS(segment[hpos], 4);
+                rval = RBITS(segment[hpos], 4);
+                if (((lval < 0) || (lval >= 2)) || ((rval < 0) || (rval >= 4))) break;
+
+                hpos++;
+                // build huffman codes & trees
+                if (!build_huffcodes(&(segment[hpos + 0]), &(segment[hpos + 16]), &(hcodes[lval][rval]),
+                                     &(htrees[lval][rval]))) {
+                    errorlevel.store(2);
+                    return false;
+                }
+                htset[lval][rval] = 1;
+
+                skip = 16;
+                for (i = 0; i < 16; i++) skip += (int)segment[hpos + i];
+                hpos += skip;
+            }
+
+            if (hpos != len) {
+                // if we get here, something went wrong
+                fprintf(stderr, "size mismatch in dht marker");
+                errorlevel.store(2);
+                return false;
+            }
+            return true;
+
+        case 0xDB: // DQT segment
+            // copy quantization tables to internal memory
+            while (hpos < len) {
+                lval = LBITS(segment[hpos], 4);
+                rval = RBITS(segment[hpos], 4);
+                if ((lval < 0) || (lval >= 2)) break;
+                if ((rval < 0) || (rval >= 4)) break;
+                hpos++;
+                if (lval == 0) { // 8 bit precision
+                    for (i = 0; i < 64; i++) {
+                        qtables[rval][i] = (unsigned short)segment[hpos + i];
+                        if (qtables[rval][i] == 0) break;
+                    }
+                    hpos += 64;
+                } else { // 16 bit precision
+                    for (i = 0; i < 64; i++) {
+                        qtables[rval][i] = B_SHORT(segment[hpos + (2 * i)], segment[hpos + (2 * i) + 1]);
+                        if (qtables[rval][i] == 0) break;
+                    }
+                    hpos += 128;
+                }
+            }
+
+            if (hpos != len) {
+                // if we get here, something went wrong
+                fprintf(stderr, "size mismatch in dqt marker");
+                errorlevel.store(2);
+                return false;
+            }
+            return true;
+
+        case 0xDD: // DRI segment
+            // define restart interval
+            rsti = B_SHORT(segment[hpos], segment[hpos + 1]);
+            return true;
+
+        case 0xDA: // SOS segment
+            // prepare next scan
+            cs_cmpc = segment[hpos];
+            if (cs_cmpc > cmpc) {
+                fprintf(stderr, "%i components in scan, only %i are allowed", cs_cmpc, cmpc);
+                errorlevel.store(2);
+                return false;
+            }
+            hpos++;
+            for (i = 0; i < cs_cmpc; i++) {
+                for (cmp = 0; (segment[hpos] != cmpnfo[cmp].jid) && (cmp < cmpc); cmp++)
+                    ;
+                if (cmp == cmpc) {
+                    fprintf(stderr, "component id mismatch in start-of-scan");
+                    errorlevel.store(2);
+                    return false;
+                }
+                cs_cmp[i] = cmp;
+                cmpnfo[cmp].huffdc = LBITS(segment[hpos + 1], 4);
+                cmpnfo[cmp].huffac = RBITS(segment[hpos + 1], 4);
+                if ((cmpnfo[cmp].huffdc < 0) || (cmpnfo[cmp].huffdc >= 4) || (cmpnfo[cmp].huffac < 0) ||
+                    (cmpnfo[cmp].huffac >= 4)) {
+                    fprintf(stderr, "huffman table number mismatch");
+                    errorlevel.store(2);
+                    return false;
+                }
+                hpos += 2;
+            }
+            cs_from = segment[hpos + 0];
+            cs_to = segment[hpos + 1];
+            cs_sah = LBITS(segment[hpos + 2], 4);
+            cs_sal = RBITS(segment[hpos + 2], 4);
+            // check for errors
+            if ((cs_from > cs_to) || (cs_from > 63) || (cs_to > 63)) {
+                fprintf(stderr, "spectral selection parameter out of range");
+                errorlevel.store(2);
+                return false;
+            }
+            if ((cs_sah >= 12) || (cs_sal >= 12)) {
+                fprintf(stderr, "successive approximation parameter out of range");
+                errorlevel.store(2);
+                return false;
+            }
+            return true;
+
+        case 0xC0: // SOF0 segment
+        // coding process: baseline DCT
+
+        case 0xC1: // SOF1 segment
+        // coding process: extended sequential DCT
+
+        case 0xC2: // SOF2 segment
+            // coding process: progressive DCT
+
+            // set JPEG coding type
+            if (type == 0xC2)
+                jpegtype = 2;
+            else
+                jpegtype = 1;
+
+            // check data precision, only 8 bit is allowed
+            lval = segment[hpos];
+            if (lval != 8) {
+                fprintf(stderr, "%i bit data precision is not supported", lval);
+                errorlevel.store(2);
+                return false;
+            }
+
+            // image size, height & component count
+            imgheight = B_SHORT(segment[hpos + 1], segment[hpos + 2]);
+            imgwidth = B_SHORT(segment[hpos + 3], segment[hpos + 4]);
+            cmpc = segment[hpos + 5];
+            if (cmpc > 4) {
+                cmpc = 4;
+                fprintf(stderr, "image has %i components, max 4 are supported", cmpc);
+                errorlevel.store(2);
+                return false;
+            }
+            hpos += 6;
+            // components contained in image
+            for (cmp = 0; cmp < cmpc; cmp++) {
+                cmpnfo[cmp].jid = segment[hpos];
+                cmpnfo[cmp].sfv = LBITS(segment[hpos + 1], 4);
+                cmpnfo[cmp].sfh = RBITS(segment[hpos + 1], 4);
+                if (cmpnfo[cmp].sfv > 4 || cmpnfo[cmp].sfh > 4) {
+                    custom_exit(ExitCode::SAMPLING_BEYOND_FOUR_UNSUPPORTED);
+                }
+#ifndef ALLOW_3_OR_4_SCALING_FACTOR
+                if (cmpnfo[cmp].sfv > 2 || cmpnfo[cmp].sfh > 2) {
+                    custom_exit(ExitCode::SAMPLING_BEYOND_TWO_UNSUPPORTED);
+                }
+#endif
+                uint32_t quantization_table_value = segment[hpos + 2];
+                if (quantization_table_value >= qtables.size()) {
+                    errorlevel.store(2);
+                    return false;
+                }
+                cmpnfo[cmp].qtable = qtables[quantization_table_value].begin();
+                hpos += 3;
+            }
+
+            return true;
+
+        case 0xC3: // SOF3 segment
+            // coding process: lossless sequential
+            fprintf(stderr, "sof3 marker found, image is coded lossless");
+            errorlevel.store(2);
+            return false;
+
+        case 0xC5: // SOF5 segment
+            // coding process: differential sequential DCT
+            fprintf(stderr, "sof5 marker found, image is coded diff. sequential");
+            errorlevel.store(2);
+            return false;
+
+        case 0xC6: // SOF6 segment
+            // coding process: differential progressive DCT
+            fprintf(stderr, "sof6 marker found, image is coded diff. progressive");
+            errorlevel.store(2);
+            return false;
+
+        case 0xC7: // SOF7 segment
+            // coding process: differential lossless
+            fprintf(stderr, "sof7 marker found, image is coded diff. lossless");
+            errorlevel.store(2);
+            return false;
+
+        case 0xC9: // SOF9 segment
+            // coding process: arithmetic extended sequential DCT
+            fprintf(stderr, "sof9 marker found, image is coded arithm. sequential");
+            errorlevel.store(2);
+            return false;
+
+        case 0xCA: // SOF10 segment
+            // coding process: arithmetic extended sequential DCT
+            fprintf(stderr, "sof10 marker found, image is coded arithm. progressive");
+            errorlevel.store(2);
+            return false;
+
+        case 0xCB: // SOF11 segment
+            // coding process: arithmetic extended sequential DCT
+            fprintf(stderr, "sof11 marker found, image is coded arithm. lossless");
+            errorlevel.store(2);
+            return false;
+
+        case 0xCD: // SOF13 segment
+            // coding process: arithmetic differntial sequential DCT
+            fprintf(stderr, "sof13 marker found, image is coded arithm. diff. sequential");
+            errorlevel.store(2);
+            return false;
+
+        case 0xCE: // SOF14 segment
+            // coding process: arithmetic differential progressive DCT
+            fprintf(stderr, "sof14 marker found, image is coded arithm. diff. progressive");
+            errorlevel.store(2);
+            return false;
+
+        case 0xCF: // SOF15 segment
+            // coding process: arithmetic differntial lossless
+            fprintf(stderr, "sof15 marker found, image is coded arithm. diff. lossless");
+            errorlevel.store(2);
+            return false;
+
+        case 0xE0: // APP0 segment
+        case 0xE1: // APP1 segment
+        case 0xE2: // APP2 segment
+        case 0xE3: // APP3 segment
+        case 0xE4: // APP4 segment
+        case 0xE5: // APP5 segment
+        case 0xE6: // APP6 segment
+        case 0xE7: // APP7 segment
+        case 0xE8: // APP8 segment
+        case 0xE9: // APP9 segment
+        case 0xEA: // APP10 segment
+        case 0xEB: // APP11 segment
+        case 0xEC: // APP12segment
+        case 0xED: // APP13 segment
+        case 0xEE: // APP14 segment
+        case 0xEF: // APP15 segment
+        case 0xFE: // COM segment
+            // do nothing - return true
+            return true;
+
+        case 0xD0: // RST0 segment
+        case 0xD1: // RST1segment
+        case 0xD2: // RST2 segment
+        case 0xD3: // RST3 segment
+        case 0xD4: // RST4 segment
+        case 0xD5: // RST5 segment
+        case 0xD6: // RST6 segment
+        case 0xD7: // RST7 segment
+            // return errormessage - RST is out of place here
+            fprintf(stderr, "rst marker found out of place");
+            errorlevel.store(2);
+            return false;
+
+        case 0xD8: // SOI segment
+            // return errormessage - start-of-image is out of place here
+            fprintf(stderr, "soi marker found out of place");
+            errorlevel.store(2);
+            return false;
+
+        case 0xD9: // EOI segment
+            // return errormessage - end-of-image is out of place here
+            fprintf(stderr, "eoi marker found out of place");
+            errorlevel.store(2);
+            return false;
+
+        default: // unknown marker segment
+            // return warning
+            fprintf(stderr, "unknown marker found: FF %2X", type);
+            errorlevel.store(1);
+            return true;
+    }
+}
+
+/* -----------------------------------------------
+    JFIF header rebuilding routine
+    ----------------------------------------------- */
+bool rebuild_header_jpg(void) {
+    abytewriter* hdrw; // new header writer
+
+    unsigned char type = 0x00; // type of current marker segment
+    unsigned int len = 0;      // length of current marker segment
+    unsigned int hpos = 0;     // position in header
+
+    // start headerwriter
+    hdrw = new abytewriter(4096);
+
+    // header parser loop
+    while ((int)hpos < hdrs) {
+        type = hdrdata[hpos + 1];
+        len = 2 + B_SHORT(hdrdata[hpos + 2], hdrdata[hpos + 3]);
+        // discard any unneeded meta info
+        if ((type == 0xDA) || (type == 0xC4) || (type == 0xDB) || (type == 0xC0) || (type == 0xC1) || (type == 0xC2) ||
+            (type == 0xDD)) {
+            hdrw->write_n(&(hdrdata[hpos]), len);
+        }
+        hpos += len;
+    }
+
+    // replace current header with the new one
+    custom_free(hdrdata);
+    hdrdata = hdrw->getptr_aligned();
+    hdrs = hdrw->getpos();
+    delete (hdrw);
+
+    return true;
+}
+
+/* -----------------------------------------------
+    sequential block decoding routine
+    ----------------------------------------------- */
+int decode_block_seq(abitreader* huffr, huffTree* dctree, huffTree* actree, short* block) {
+    unsigned short n;
+    unsigned char s;
+    unsigned char z;
+    int eob = 64;
+    int bpos;
+    int hc;
+
+    // decode dc
+    hc = next_huffcode(huffr, dctree, Billing::EXP0_DC, Billing::EXPN_DC);
+    if (hc < 0)
+        return -1; // return error
+    else
+        s = (unsigned char)hc;
+    n = huffr->read(s);
+    if (s) {
+        write_bit_bill(Billing::RES_DC, false, s - 1);
+        write_bit_bill(Billing::SIGN_DC, false, 1);
+    }
+    block[0] = DEVLI(s, n);
+    bool eof_fixup = false;
+    // decode ac
+    for (bpos = 1; bpos < 64;) {
+        // decode next
+        hc = next_huffcode(huffr, actree, is_edge(bpos) ? Billing::BITMAP_EDGE : Billing::BITMAP_7x7,
+                           is_edge(bpos) ? Billing::EXPN_EDGE : Billing::EXPN_7x7);
+        // analyse code
+        if (hc > 0) {
+            z = LBITS(hc, 4);
+            s = RBITS(hc, 4);
+            n = huffr->read(s);
+            if (s) {
+                write_bit_bill(is_edge(bpos) ? Billing::RES_EDGE : Billing::RES_7x7, false, s - 1);
+                write_bit_bill(is_edge(bpos) ? Billing::SIGN_EDGE : Billing::SIGN_7x7, false, 1);
+            }
+            if ((z + bpos) >= 64) {
+                always_assert(huffr->eof && "If 0run is longer than the block must be truncated");
+                for (; bpos < 64; ++bpos) {
+                    block[bpos] = 0;
+                }
+                block[63] = 1; // set the value to something matching the EOB
+                break;
+            }
+            while (z > 0) { // write zeroes
+                block[bpos++] = 0;
+                z--;
+            }
+            block[bpos++] = (short)DEVLI(s, n); // decode cvli
+        } else if (hc == 0) {                   // EOB
+            eob = bpos;
+            // while( bpos < 64 ) // fill remaining block with zeroes
+            //    block[ bpos++ ] = 0;
+            break;
+        } else {
+            return -1; // return error
+        }
+    }
+
+    // return position of eob
+    return eob;
+}
+
+/* -----------------------------------------------
+    progressive DC decoding routine
+    ----------------------------------------------- */
+int decode_dc_prg_fs(abitreader* huffr, huffTree* dctree, short* block) {
+    unsigned short n;
+    unsigned char s;
+    int hc;
+
+    // decode dc
+    hc = next_huffcode(huffr, dctree, Billing::EXP0_DC, Billing::EXPN_DC);
+    if (hc < 0)
+        return -1; // return error
+    else
+        s = (unsigned char)hc;
+    n = huffr->read(s);
+    block[0] = DEVLI(s, n);
+
+    // return 0 if everything is ok
+    return 0;
+}
+
+/* -----------------------------------------------
+    progressive DC encoding routine
+    ----------------------------------------------- */
+int encode_dc_prg_fs(abitwriter* huffw, huffCodes* dctbl, short* block) {
+    unsigned short n;
+    unsigned char s;
+    int tmp;
+
+    // encode DC
+    tmp = block[0];
+    s = uint16bit_length(ABS(tmp));
+    n = ENVLI(s, tmp);
+    huffw->write(dctbl->cval[s], dctbl->clen[s]);
+    huffw->write(n, s);
+
+    // return 0 if everything is ok
+    return 0;
+}
+
+/* -----------------------------------------------
+    progressive AC decoding routine
+    ----------------------------------------------- */
+int decode_ac_prg_fs(abitreader* huffr, huffTree* actree, short* block, unsigned int* eobrun, int from, int to) {
+    unsigned short n;
+    unsigned char s;
+    unsigned char z;
+    int eob = to + 1;
+    int bpos;
+    int hc;
+    int l;
+    int r;
+
+    // check eobrun
+    if ((*eobrun) > 0) {
+        for (bpos = from; bpos <= to;) block[bpos] = 0;
+        (*eobrun)--;
+        return from;
+    }
+
+    // decode ac
+    for (bpos = from; bpos <= to;) {
+        // decode next
+        hc = next_huffcode(huffr, actree, is_edge(bpos) ? Billing::BITMAP_EDGE : Billing::BITMAP_7x7,
+                           is_edge(bpos) ? Billing::EXPN_EDGE : Billing::EXPN_7x7);
+        if (hc < 0) return -1;
+        l = LBITS(hc, 4);
+        r = RBITS(hc, 4);
+        // analyse code
+        if ((l == 15) || (r > 0)) { // decode run/level combination
+            z = l;
+            s = r;
+            n = huffr->read(s);
+            if ((z + bpos) > to) return -1; // run is to long
+            while (z > 0) {                 // write zeroes
+                block[bpos++] = 0;
+                z--;
+            }
+            block[bpos++] = (short)DEVLI(s, n); // decode cvli
+        } else {                                // decode eobrun
+            eob = bpos;
+            s = l;
+            n = huffr->read(s);
+            (*eobrun) = E_DEVLI(s, n);
+            // while( bpos <= to ) // fill remaining block with zeroes
+            //    block[ bpos++ ] = 0;
+            (*eobrun)--; // decrement eobrun ( for this one )
+            break;
+        }
+    }
+
+    // return position of eob
+    return eob;
+}
+
+/* -----------------------------------------------
+    progressive AC encoding routine
+    ----------------------------------------------- */
+int encode_ac_prg_fs(abitwriter* huffw, huffCodes* actbl, short* block, unsigned int* eobrun, int from, int to) {
+    unsigned short n;
+    unsigned char s;
+    unsigned char z;
+    int bpos;
+    int hc;
+    int tmp;
+
+    // encode AC
+    z = 0;
+    for (bpos = from; bpos <= to; bpos++) {
+        // if nonzero is encountered
+        tmp = block[bpos];
+        if (tmp != 0) {
+            // encode eobrun
+            encode_eobrun(huffw, actbl, eobrun);
+            // write remaining zeroes
+            while (z >= 16) {
+                huffw->write(actbl->cval[0xF0], actbl->clen[0xF0]);
+                z -= 16;
+            }
+            // vli encode
+            s = nonzero_bit_length(ABS(tmp));
+            n = ENVLI(s, tmp);
+            hc = ((z << 4) + s);
+            // write to huffman writer
+            huffw->write(actbl->cval[hc], actbl->clen[hc]);
+            huffw->write(n, s);
+            // reset zeroes
+            z = 0;
+        } else { // increment zero counter
+            z++;
+        }
+    }
+
+    // check eob, increment eobrun if needed
+    if (z > 0) {
+        (*eobrun)++;
+        // check eobrun, encode if needed
+        if ((*eobrun) == actbl->max_eobrun) encode_eobrun(huffw, actbl, eobrun);
+        return 1 + to - z;
+    } else {
+        return 1 + to;
+    }
+}
+
+/* -----------------------------------------------
+    progressive DC SA decoding routine
+    ----------------------------------------------- */
+int decode_dc_prg_sa(abitreader* huffr, short* block) {
+    // decode next bit of dc coefficient
+    block[0] = huffr->read(1);
+
+    // return 0 if everything is ok
+    return 0;
+}
+
+/* -----------------------------------------------
+    progressive DC SA encoding routine
+    ----------------------------------------------- */
+int encode_dc_prg_sa(abitwriter* huffw, short* block) {
+    // enocode next bit of dc coefficient
+    huffw->write(block[0], 1);
+
+    // return 0 if everything is ok
+    return 0;
+}
+
+/* -----------------------------------------------
+    progressive AC SA decoding routine
+    ----------------------------------------------- */
+int decode_ac_prg_sa(abitreader* huffr, huffTree* actree, short* block, unsigned int* eobrun, int from, int to) {
+    unsigned short n;
+    unsigned char s;
+    signed char z;
+    signed char v;
+    int bpos = from;
+    int eob = to;
+    int hc;
+    int l;
+    int r;
+
+    // decode AC succesive approximation bits
+    if ((*eobrun) == 0)
+        while (bpos <= to) {
+            // decode next
+            hc = next_huffcode(huffr, actree, is_edge(bpos) ? Billing::BITMAP_EDGE : Billing::BITMAP_7x7,
+                               is_edge(bpos) ? Billing::EXPN_EDGE : Billing::EXPN_7x7);
+
+            if (hc < 0) return -1;
+            l = LBITS(hc, 4);
+            r = RBITS(hc, 4);
+            // analyse code
+            if ((l == 15) || (r > 0)) { // decode run/level combination
+                z = l;
+                s = r;
+                if (s == 0)
+                    v = 0;
+                else if (s == 1) {
+                    n = huffr->read(1);
+                    v = (n == 0) ? -1 : 1; // fast decode vli
+                } else
+                    return -1; // decoding error
+                // write zeroes / write correction bits
+                while (true) {
+                    if (block[bpos] == 0) { // skip zeroes / write value
+                        if (z > 0)
+                            z--;
+                        else {
+                            block[bpos++] = v;
+                            break;
+                        }
+                    } else { // read correction bit
+                        n = huffr->read(1);
+                        block[bpos] = (block[bpos] > 0) ? n : -n;
+                    }
+                    if (bpos++ >= to) return -1; // error check
+                }
+            } else { // decode eobrun
+                eob = bpos;
+                s = l;
+                n = huffr->read(s);
+                (*eobrun) = E_DEVLI(s, n);
+                break;
+            }
+        }
+
+    // read after eob correction bits
+    if ((*eobrun) > 0) {
+        for (; bpos <= to; bpos++) {
+            if (block[bpos] != 0) {
+                n = huffr->read(1);
+                block[bpos] = (block[bpos] > 0) ? n : -n;
+            }
+        }
+        // decrement eobrun
+        (*eobrun)--;
+    }
+
+    // return eob
+    return eob;
+}
+
+/* -----------------------------------------------
+    progressive AC SA encoding routine
+    ----------------------------------------------- */
+int encode_ac_prg_sa(
+    abitwriter* huffw, abytewriter* storw, huffCodes* actbl, short* block, unsigned int* eobrun, int from, int to) {
+    unsigned short n;
+    unsigned char s;
+    unsigned char z;
+    int eob = from;
+    int bpos;
+    int hc;
+    int tmp;
+
+    // check if block contains any newly nonzero coefficients and find out position of eob
+    for (bpos = to; bpos >= from; bpos--) {
+        if ((block[bpos] == 1) || (block[bpos] == -1)) {
+            eob = bpos + 1;
+            break;
+        }
+    }
+
+    // encode eobrun if needed
+    if ((eob > from) && ((*eobrun) > 0)) {
+        encode_eobrun(huffw, actbl, eobrun);
+        encode_crbits(huffw, storw);
+    }
+
+    // encode AC
+    z = 0;
+    for (bpos = from; bpos < eob; bpos++) {
+        tmp = block[bpos];
+        // if zero is encountered
+        if (tmp == 0) {
+            z++;           // increment zero counter
+            if (z == 16) { // write zeroes if needed
+                huffw->write(actbl->cval[0xF0], actbl->clen[0xF0]);
+                encode_crbits(huffw, storw);
+                z = 0;
+            }
+        }
+        // if nonzero is encountered
+        else if ((tmp == 1) || (tmp == -1)) {
+            // vli encode
+            s = nonzero_bit_length(ABS(tmp));
+            n = ENVLI(s, tmp);
+            hc = ((z << 4) + s);
+            // write to huffman writer
+            huffw->write(actbl->cval[hc], actbl->clen[hc]);
+            huffw->write(n, s);
+            // write correction bits
+            encode_crbits(huffw, storw);
+            // reset zeroes
+            z = 0;
+        } else { // store correction bits
+            n = block[bpos] & 0x1;
+            storw->write(n);
+        }
+    }
+
+    // fast processing after eob
+    for (; bpos <= to; bpos++) {
+        if (block[bpos] != 0) { // store correction bits
+            n = block[bpos] & 0x1;
+            storw->write(n);
+        }
+    }
+
+    // check eob, increment eobrun if needed
+    if (eob <= to) {
+        (*eobrun)++;
+        // check eobrun, encode if needed
+        if ((*eobrun) == actbl->max_eobrun) {
+            encode_eobrun(huffw, actbl, eobrun);
+            encode_crbits(huffw, storw);
+        }
+    }
+
+    // return eob
+    return eob;
+}
+
+/* -----------------------------------------------
+    run of EOB SA decoding routine
+    ----------------------------------------------- */
+int decode_eobrun_sa(abitreader* huffr, short* block, unsigned int* eobrun, int from, int to) {
+    unsigned short n;
+    int bpos;
+
+    // fast eobrun decoding routine for succesive approximation
+    for (bpos = from; bpos <= to; bpos++) {
+        if (block[bpos] != 0) {
+            n = huffr->read(1);
+            block[bpos] = (block[bpos] > 0) ? n : -n;
+        }
+    }
+
+    // decrement eobrun
+    (*eobrun)--;
+
+    return 0;
+}
+
+/* -----------------------------------------------
+    run of EOB encoding routine
+    ----------------------------------------------- */
+int encode_eobrun(abitwriter* huffw, huffCodes* actbl, unsigned int* eobrun) {
+    unsigned short n;
+    unsigned int s;
+    int hc;
+
+    if ((*eobrun) > 0) {
+        while ((*eobrun) > actbl->max_eobrun) {
+            huffw->write(actbl->cval[0xE0], actbl->clen[0xE0]);
+            huffw->write(E_ENVLI(14, 32767), 14);
+            (*eobrun) -= actbl->max_eobrun;
+        }
+        s = uint16bit_length((*eobrun));
+        assert(s && "actbl->max_eobrun needs to be > 0");
+        if (s) s--;
+        n = E_ENVLI(s, (*eobrun));
+        hc = (s << 4);
+        huffw->write(actbl->cval[hc], actbl->clen[hc]);
+        huffw->write(n, s);
+        (*eobrun) = 0;
+    }
+
+    return 0;
+}
+
+/* -----------------------------------------------
+    correction bits encoding routine
+    ----------------------------------------------- */
+int encode_crbits(abitwriter* huffw, abytewriter* storw) {
+    unsigned char* data;
+    int len;
+    int i;
+
+    // peek into data from abytewriter
+    len = storw->getpos();
+    if (len == 0) return 0;
+    data = storw->peekptr_aligned();
+
+    // write bits to huffwriter
+    for (i = 0; i < len; i++) huffw->write(data[i], 1);
+
+    // reset abytewriter, discard data
+    storw->reset();
+
+    return 0;
+}
+
+/* -----------------------------------------------
+    returns next code (from huffman-tree & -data)
+    ----------------------------------------------- */
+int next_huffcode(abitreader* huffw, huffTree* ctree, Billing min_bill, Billing max_bill) {
+    int node = 0;
+
+    while (node < 256) {
+#ifndef NDEBUG
+        write_bit_bill(min_bill, false, 1);
+        if (min_bill != max_bill) {
+            min_bill = (Billing)((int)min_bill + 1);
+        }
+#endif
+        node = (huffw->read(1) == 1) ? ctree->r[node] : ctree->l[node];
+        if (node == 0) break;
+    }
+
+    return (node - 256);
+}
+
+/* -----------------------------------------------
+    calculates next position (non interleaved)
+    ----------------------------------------------- */
+int next_mcuposn(int* cmp, int* dpos, int* rstw) {
+    // increment position
+    (*dpos)++;
+
+    // fix for non interleaved mcu - horizontal
+    if (cmpnfo[(*cmp)].bch != cmpnfo[(*cmp)].nch) {
+        if ((*dpos) % cmpnfo[(*cmp)].bch == cmpnfo[(*cmp)].nch) (*dpos) += (cmpnfo[(*cmp)].bch - cmpnfo[(*cmp)].nch);
+    }
+
+    // fix for non interleaved mcu - vertical
+    if (cmpnfo[(*cmp)].bcv != cmpnfo[(*cmp)].ncv) {
+        if ((*dpos) / cmpnfo[(*cmp)].bch == cmpnfo[(*cmp)].ncv) (*dpos) = cmpnfo[(*cmp)].bc;
+    }
+
+    // check position
+    if ((*dpos) >= cmpnfo[(*cmp)].bc)
+        return 2;
+    else if (rsti > 0)
+        if (--(*rstw) == 0) return 1;
+
+    return 0;
+}
+
+/* -----------------------------------------------
+    skips the eobrun, calculates next position
+    ----------------------------------------------- */
+int skip_eobrun(int* cmp, int* dpos, int* rstw, unsigned int* eobrun) {
+    if ((*eobrun) > 0) // error check for eobrun
+    {
+        // compare rst wait counter if needed
+        if (rsti > 0) {
+            if ((int)(*eobrun) > (*rstw))
+                return -1;
+            else
+                (*rstw) -= (*eobrun);
+        }
+
+        // fix for non interleaved mcu - horizontal
+        if (cmpnfo[(*cmp)].bch != cmpnfo[(*cmp)].nch) {
+            (*dpos) += ((((*dpos) % cmpnfo[(*cmp)].bch) + (*eobrun)) / cmpnfo[(*cmp)].nch) *
+                       (cmpnfo[(*cmp)].bch - cmpnfo[(*cmp)].nch);
+        }
+
+        // fix for non interleaved mcu - vertical
+        if (cmpnfo[(*cmp)].bcv != cmpnfo[(*cmp)].ncv) {
+            if ((*dpos) / cmpnfo[(*cmp)].bch >= cmpnfo[(*cmp)].ncv)
+                (*dpos) += (cmpnfo[(*cmp)].bcv - cmpnfo[(*cmp)].ncv) * cmpnfo[(*cmp)].bch;
+        }
+
+        // skip blocks
+        (*dpos) += (*eobrun);
+
+        // reset eobrun
+        (*eobrun) = 0;
+
+        // check position
+        if ((*dpos) == cmpnfo[(*cmp)].bc)
+            return 2;
+        else if ((*dpos) > cmpnfo[(*cmp)].bc)
+            return -1;
+        else if (rsti > 0)
+            if ((*rstw) == 0) return 1;
+    }
+
+    return 0;
+}
+
+/* -----------------------------------------------
+    creates huffman-codes & -trees from dht-data
+    ----------------------------------------------- */
+bool build_huffcodes(unsigned char* clen, unsigned char* cval, huffCodes* hc, huffTree* ht) {
+    int nextfree;
+    int code;
+    int node;
+    int i, j, k;
+
+    // fill with zeroes
+    memset(hc->clen, 0, 256 * sizeof(short));
+    memset(hc->cval, 0, 256 * sizeof(short));
+    memset(ht->l, 0, 256 * sizeof(short));
+    memset(ht->r, 0, 256 * sizeof(short));
+
+    // 1st part -> build huffman codes
+
+    // creating huffman-codes
+    k = 0;
+    code = 0;
+
+    // symbol-value of code is its position in the table
+    for (i = 0; i < 16; i++) {
+        for (j = 0; j < (int)clen[i & 0xff]; j++) {
+            hc->clen[(int)cval[k & 0xff] & 0xff] = 1 + i;
+            hc->cval[(int)cval[k & 0xff] & 0xff] = code;
+
+            k++;
+            code++;
+        }
+        code = code << 1;
+    }
+
+    // find out eobrun max value
+    hc->max_eobrun = 0;
+    for (i = 14; i >= 0; i--) {
+        if (hc->clen[(i << 4) & 255] > 0) {
+            hc->max_eobrun = (2 << i) - 1;
+            break;
+        }
+    }
+
+    // 2nd -> part use codes to build the coding tree
+
+    // initial value for next free place
+    nextfree = 1;
+    const char* huffman_no_space = "Huffman table out of space\n";
+    // work through every code creating links between the nodes (represented through ints)
+    for (i = 0; i < 256; i++) {
+        // (re)set current node
+        node = 0;
+        // go through each code & store path
+        for (j = hc->clen[i] - 1; j > 0; j--) {
+            if (node <= 0xff) {
+                if (BITN(hc->cval[i], j) == 1) {
+                    if (ht->r[node] == 0) {
+                        ht->r[node] = nextfree++;
+                    }
+                    node = ht->r[node];
+                } else {
+                    if (ht->l[node] == 0) {
+                        ht->l[node] = nextfree++;
+                    }
+                    node = ht->l[node];
+                }
+            } else {
+                while (write(2, huffman_no_space, strlen(huffman_no_space)) == -1 && errno == EINTR) {
+                }
+                if (filetype == JPEG) {
+                    return false;
+                }
+            }
+        }
+        if (node <= 0xff) {
+            // last link is number of targetvalue + 256
+            if (hc->clen[i] > 0) {
+                if (BITN(hc->cval[i], 0) == 1) {
+                    ht->r[node] = i + 256;
+                } else {
+                    ht->l[node] = i + 256;
+                }
+            }
+        } else {
+            while (write(2, huffman_no_space, strlen(huffman_no_space)) == -1 && errno == EINTR) {
+            }
+            if (filetype == JPEG) {
+                return false; // we accept any .lep file that was encoded this way
+            }
+        }
+    }
+    return true;
+}
+
+/* ----------------------- End of JPEG specific functions -------------------------- */
+
+/* ----------------------- Begin of developers functions -------------------------- */
+
+/* -----------------------------------------------
+    Writes info to textfile
+    ----------------------------------------------- */
+bool write_info(void) {
+    FILE* fp;
+    const char* fn = "stdout";
+
+    unsigned char type = 0x00; // type of current marker segment
+    unsigned int len = 0;      // length of current marker segment
+    unsigned int hpos = 0;     // position in header
+
+    int cmp, bpos;
+    int i;
+
+    // open file for output
+    fp = stdout;
+    if (fp == NULL) {
+        fprintf(stderr, FWR_ERRMSG, fn);
+        errorlevel.store(2);
+        return false;
+    }
+
+    // info about image
+    fprintf(fp, "<Infofile for JPEG image:>\n\n\n");
+    fprintf(fp, "coding process: %s\n", (jpegtype == 1) ? "sequential" : "progressive");
+    // fprintf( fp, "no of scans: %i\n", scnc );
+    fprintf(fp, "imageheight: %i / imagewidth: %i\n", imgheight, imgwidth);
+    fprintf(fp, "component count: %i\n", cmpc);
+    fprintf(fp, "mcu count: %i/%i/%i (all/v/h)\n\n", mcuc, mcuv, mcuh);
+
+    // info about header
+    fprintf(fp, "\nfile header structure:\n");
+    fprintf(fp, " type  length   hpos\n");
+    // header parser loop
+    for (hpos = 0; (int)hpos < hdrs; hpos += len) {
+        type = hdrdata[hpos + 1];
+        len = 2 + B_SHORT(hdrdata[hpos + 2], hdrdata[hpos + 3]);
+        fprintf(fp, " FF%2X  %6i %6i\n", type, len, hpos);
+    }
+    fprintf(fp, " _END       0 %6i\n", hpos);
+    fprintf(fp, "\n");
+
+    // info about components
+    for (cmp = 0; cmp < cmpc; cmp++) {
+        fprintf(fp, "\n");
+        fprintf(fp, "component number %i ->\n", cmp);
+        fprintf(fp, "sample factors: %i/%i (v/h)\n", cmpnfo[cmp].sfv, cmpnfo[cmp].sfh);
+        fprintf(fp, "blocks per mcu: %i\n", cmpnfo[cmp].mbs);
+        fprintf(fp, "block count (mcu): %i/%i/%i (all/v/h)\n", cmpnfo[cmp].bc, cmpnfo[cmp].bcv, cmpnfo[cmp].bch);
+        fprintf(fp, "block count (sng): %i/%i/%i (all/v/h)\n", cmpnfo[cmp].nc, cmpnfo[cmp].ncv, cmpnfo[cmp].nch);
+        fprintf(fp, "quantiser table ->");
+        for (i = 0; i < 64; i++) {
+            bpos = zigzag[i];
+            if ((i % 8) == 0) fprintf(fp, "\n");
+            fprintf(fp, "%4i, ", QUANT(cmp, bpos));
+        }
+        fprintf(fp, "\n");
+        fprintf(fp, "maximum values ->");
+        for (i = 0; i < 64; i++) {
+            bpos = zigzag[i];
+            if ((i % 8) == 0) fprintf(fp, "\n");
+            fprintf(fp, "%4i, ", MAX_V(cmp, bpos));
+        }
+        fprintf(fp, "\n\n");
+    }
+
+    fclose(fp);
+
+    return true;
+}
+#if 0
+void Sim_DDr_InitImage(WD_AXI* axi_coeff, const UncompressedComponents * const colldata,
+		//Sirikata::Array1d<std::vector<NeighborSummary>, (uint32_t)ColorChannel::NumBlockTypes> num_nonzeros[1],
+		uint16_t        axi_width               [MAX_NUM_COLOR],//colldata->block_width(i);
+		uint16_t        axi_height              [MAX_NUM_COLOR],//colldata->block_width(i);
+		uint8_t         axi_map_row2cmp         [4], //     AXI                   2,1,0,0 2,1,0
+		uint16_t        axi_mcuv,
+		uint8_t         axi_num_cmp_mcu
+		);
+void process_row_range3(
+		WD_AXI*         axi_coeff,
+		decOutput&    lepp,
+        struct_arith&   arith_enc,
+		uint8_t*        res
+       );
+#endif
+/* ----------------------- End of developers functions -------------------------- */
+
+/*
+#ifndef HLS_TEST
+#include "xcl2.hpp"
+//#include "xhpp_context.hpp"
+//#include "xhpp_taskkernel.hpp"
+//#include "xhpp_tasktransfer.hpp"
+//#include "xhpp_bufferhost.hpp"
+//#include "xhpp_graph.hpp"
+//#include "xhpp_scheduler.hpp"
+#endif
+
+int tvdiff(struct timeval* tv0, struct timeval* tv1) {
+    return (tv1->tv_sec - tv0->tv_sec) * 1000000 + (tv1->tv_usec - tv0->tv_usec);
+}
+
+bool hls_decode_jpeg_kernel(int filecnt,
+                            std::vector<uint8_t*> datatoDDR,
+                            std::vector<int> jpgSize,
+                            std::vector<struct_arith>& arith,
+                            std::vector<uint8_t*> res,
+                            std::vector<uint32_t>& left,
+                            std::vector<uint32_t>& rst) {
+#ifdef HLS_TEST
+    for (int i = 0; i < filecnt; i++) {
+        int arith_info[9];
+        jpegDecLeptonEncKernel_0((ap_uint<AXI_WIDTH>*)datatoDDR[i], // uint16_t* datatoDDR,
+                                 (int)jpgSize[i],                   // int size,
+                                 arith_info,
+                                 res[i] // uint8_t* res
+                                 );
+        arith[i].count = arith_info[0];
+        arith[i].value = (uint32_t)arith_info[1];
+        arith[i].pre_byte = (uint8_t)arith_info[2];
+        arith[i].run = (uint16_t)arith_info[3];
+        arith[i].pos = (uint32_t)arith_info[4];
+        arith[i].range = (uint8_t)arith_info[5];
+        arith[i].isFirst = (bool)arith_info[6];
+        left[i] = (uint32_t)arith_info[7];
+        rst[i] = (uint32_t)arith_info[8];
+    }
+#else
+    // platform related operations
+    std::vector<cl::Device> devices = xcl::get_xil_devices();
+    cl::Device device = devices[0];
+
+    // Creating Context and Command Queue for selected Device
+    cl::Context context(device);
+    cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
+    std::string devName = device.getInfo<CL_DEVICE_NAME>();
+    printf("Found Device=%s\n", devName.c_str());
+
+    cl::Program::Binaries xclBins = xcl::import_binary_file(xclbin_path);
+    devices.resize(1);
+    cl::Program program(context, devices, xclBins);
+    int knum = 7;
+    std::vector<cl::Kernel> lepEncKernel(knum);
+    for (int i = 0; i < knum; i++) {
+        // lepEncKernel[i] = cl::Kernel(program, ("lepEnc:{lepEnc_" + std::to_string(i) + "}").c_str());
+        lepEncKernel[i] = cl::Kernel(program, ("lepEnc:{lepEnc_" + std::to_string(i) + "}").c_str());
+    }
+
+    std::cout << "kernel has been created" << std::endl;
+
+    std::vector<uint8_t*> datatoDDR_d(knum);
+    std::vector<int> jpgSize_d(knum);
+    std::vector<int*> arith_info_d(knum);
+    std::vector<uint8_t*> res_d(knum);
+
+    std::vector<cl_mem_ext_ptr_t> mext_datatoDDR(knum);
+    std::vector<cl_mem_ext_ptr_t> mext_arith_info(knum);
+    std::vector<cl_mem_ext_ptr_t> mext_res(knum);
+
+    std::vector<cl::Buffer> datatoDDR_buf(knum);
+    std::vector<cl::Buffer> arith_info_buf(knum);
+    std::vector<cl::Buffer> res_buf(knum);
+
+    uint64_t maxJpgSize = 0;
+    for (int i = 0; i < filecnt; i++) {
+        if (jpgSize[i] > maxJpgSize) maxJpgSize = jpgSize[i];
+    }
+
+    for (int i = 0; i < knum; i++) {
+        datatoDDR_d[i] = aligned_alloc<uint8_t>(maxJpgSize);
+        res_d[i] = aligned_alloc<uint8_t>(maxJpgSize);
+        arith_info_d[i] = aligned_alloc<int>(9);
+
+        if (i < 3) {
+            mext_datatoDDR[i] = {(unsigned int)(0) | XCL_MEM_TOPOLOGY, datatoDDR_d[i]};
+            mext_arith_info[i] = {(unsigned int)(0) | XCL_MEM_TOPOLOGY, arith_info_d[i]};
+            mext_res[i] = {(unsigned int)(0) | XCL_MEM_TOPOLOGY, res_d[i]};
+        } else if (i == 3) {
+            mext_datatoDDR[i] = {(unsigned int)(1) | XCL_MEM_TOPOLOGY, datatoDDR_d[i]};
+            mext_arith_info[i] = {(unsigned int)(1) | XCL_MEM_TOPOLOGY, arith_info_d[i]};
+            mext_res[i] = {(unsigned int)(1) | XCL_MEM_TOPOLOGY, res_d[i]};
+
+        } else {
+            mext_datatoDDR[i] = {(unsigned int)(2) | XCL_MEM_TOPOLOGY, datatoDDR_d[i]};
+            mext_arith_info[i] = {(unsigned int)(2) | XCL_MEM_TOPOLOGY, arith_info_d[i]};
+            mext_res[i] = {(unsigned int)(2) | XCL_MEM_TOPOLOGY, res_d[i]};
+        }
+
+        datatoDDR_buf[i] = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
+                                      (size_t)(maxJpgSize), &mext_datatoDDR[i]);
+
+        res_buf[i] = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
+                                (size_t)(maxJpgSize), &mext_res[i]);
+
+        arith_info_buf[i] = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
+                                       (size_t)(9 * sizeof(int)), &mext_arith_info[i]);
+    }
+
+    int cur_k = 0;
+    for (int i = 0; i < filecnt; i++) {
+        jpgSize_d[cur_k] = jpgSize[i];
+        memcpy(datatoDDR_d[cur_k], datatoDDR[i], jpgSize_d[cur_k]);
+
+        std::vector<cl::Memory> ib;
+        ib.push_back(datatoDDR_buf[cur_k]);
+
+        int j = 0;
+        lepEncKernel[cur_k].setArg(j++, datatoDDR_buf[cur_k]);
+        lepEncKernel[cur_k].setArg(j++, jpgSize_d[cur_k]);
+        lepEncKernel[cur_k].setArg(j++, arith_info_buf[cur_k]);
+        lepEncKernel[cur_k].setArg(j++, res_buf[cur_k]);
+
+        std::vector<cl::Memory> ob;
+        ob.push_back(arith_info_buf[cur_k]);
+        ob.push_back(res_buf[cur_k]);
+
+        std::vector<cl::Event> write_event;
+        std::vector<cl::Event> kernel_event;
+        std::vector<cl::Event> read_event;
+        write_event.resize(1);
+        kernel_event.resize(1);
+        read_event.resize(1);
+
+        q.enqueueMigrateMemObjects(ib, 0, nullptr, &write_event[0]);
+        q.enqueueTask(lepEncKernel[cur_k], &write_event, &kernel_event[0]);
+        q.enqueueMigrateMemObjects(ob, CL_MIGRATE_MEM_OBJECT_HOST, &kernel_event, &read_event[0]);
+        std::cout << "host kernel call" << std::endl;
+        q.finish();
+        std::cout << "host kernel end" << std::endl;
+        arith[i].count = arith_info_d[cur_k][0];
+        arith[i].value = (uint32_t)arith_info_d[cur_k][1];
+        arith[i].pre_byte = (uint8_t)arith_info_d[cur_k][2];
+        arith[i].run = (uint16_t)arith_info_d[cur_k][3];
+        arith[i].pos = (uint32_t)arith_info_d[cur_k][4];
+        arith[i].range = (uint8_t)arith_info_d[cur_k][5];
+        arith[i].isFirst = (bool)arith_info_d[cur_k][6];
+        left[i] = (uint32_t)arith_info_d[cur_k][7];
+        rst[i] = (uint32_t)arith_info_d[cur_k][8];
+        memcpy(res[i], res_d[cur_k], arith[i].pos);
+        std::cout << "kernel " << cur_k << " finish" << std::endl;
+        if (cur_k != 6) {
+            cur_k++;
+        } else {
+            cur_k = 0;
+        }
+    }
+
+    fprintf(stderr, "=========== arith print ==========\n");
+    fprintf(stderr, "count = %d\n", arith[0].count);
+    fprintf(stderr, "value = %d\n", arith[0].value);
+    fprintf(stderr, "pre_byte = %d\n", arith[0].pre_byte);
+    fprintf(stderr, "run = %d\n", arith[0].run);
+    fprintf(stderr, "pos = %d\n", arith[0].pos);
+    fprintf(stderr, "range = %d\n", arith[0].range);
+    fprintf(stderr, "isFirst = %d\n", arith[0].isFirst);
+    fprintf(stderr, "============ end print ==========\n");
+#endif
+    return true;
+}
+*/
+/* ----------------------- End of file -------------------------- */
diff --git a/codec/L2/demos/leptonEnc/host/lepton/jpgcoder.hh b/codec/L2/demos/leptonEnc/host/lepton/jpgcoder.hh
new file mode 100644
index 0000000000..927345886a
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/jpgcoder.hh
@@ -0,0 +1,64 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#ifndef _JPGCODER_HH_
+#define _JPGCODER_HH_
+#include <atomic>
+#include <functional>
+#include "../vp8/util/nd_array.hh"
+#include "../vp8/util/options.hh"
+#include "../io/Reader.hh"
+#include "XAcc_jpegdecoder.hpp"
+#include "XAcc_jfifparser.hpp"
+#include "XAcc_common.hpp"
+//#include "jpeg_dec_lepton_enc_kernel.hpp"
+#include "multi_cu.hpp"
+// extern int cmpc;
+extern uint8_t get_current_file_lepton_version();
+extern std::atomic<int> errorlevel;
+extern std::string errormessage;
+extern uint64_t g_time_bound_ms;
+// bool hls_decode_jpeg_kernel(
+//		ap_uint<AXI_WIDTH>* datatoDDR,
+//		int size,
+//		struct_arith& arith,
+//		uint8_t* res
+//);
+
+namespace IOUtil {
+class FileReader;
+class FileWriter;
+}
+void gen_nop();
+void process_file(IOUtil::FileReader* reader, IOUtil::FileWriter* writer, int file_input_length, bool force_zlib0);
+void check_decompression_memory_bound_ok();
+namespace TimingHarness {
+#define FOREACH_TIMING_STAGE(CB)     \
+    CB(TS_MAIN)                      \
+    CB(TS_MODEL_INIT_BEGIN)          \
+    CB(TS_MODEL_INIT)                \
+    CB(TS_ACCEPT)                    \
+    CB(TS_THREAD_STARTED)            \
+    CB(TS_READ_STARTED)              \
+    CB(TS_READ_FINISHED)             \
+    CB(TS_JPEG_DECODE_STARTED)       \
+    CB(TS_JPEG_DECODE_FINISHED)      \
+    CB(TS_STREAM_MULTIPLEX_STARTED)  \
+    CB(TS_STREAM_MULTIPLEX_FINISHED) \
+    CB(TS_THREAD_WAIT_STARTED)       \
+    CB(TS_THREAD_WAIT_FINISHED)      \
+    CB(TS_ARITH_STARTED)             \
+    CB(TS_ARITH_FINISHED)            \
+    CB(TS_JPEG_RECODE_STARTED)       \
+    CB(TS_JPEG_RECODE_FINISHED)      \
+    CB(TS_STREAM_FLUSH_STARTED)      \
+    CB(TS_STREAM_FLUSH_FINISHED)     \
+    CB(TS_DONE)
+#define MAKE_TIMING_STAGE_ENUM(VALUE) VALUE,
+#define GENERATE_TIMING_STRING(VALUE) #VALUE,
+enum TimingStages_ {
+    FOREACH_TIMING_STAGE(MAKE_TIMING_STAGE_ENUM) NUM_STAGES,
+};
+extern Sirikata::Array1d<Sirikata::Array1d<uint64_t, NUM_STAGES>, MAX_NUM_THREADS> timing;
+extern uint64_t get_time_us(bool force = false);
+void print_results();
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/jpgcoder_hls.cc b/codec/L2/demos/leptonEnc/host/lepton/jpgcoder_hls.cc
new file mode 100644
index 0000000000..dea6364cec
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/jpgcoder_hls.cc
@@ -0,0 +1,412 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "loop_stt.h"
+#include "../vp8/util/memory.hh"
+#include "../vp8/util/debug.hh"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sstream>
+#include <math.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <ctime>
+#include <memory>
+#include <atomic>
+#include <signal.h>
+#ifndef _WIN32
+#include <sys/time.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+#ifdef __linux
+//#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#endif
+#include <emmintrin.h>
+#include "jpgcoder_hls.hh"
+#include "jpgcoder.hh"
+//#include "recoder.hh"
+//#include "bitops.hh"
+//#include "component_info.hh"
+//#include "uncompressed_components.hh"
+//#include "vp8_decoder.hh"
+//#include "vp8_encoder.hh"
+//#include "simple_decoder.hh"
+//#include "simple_encoder.hh"
+//#include "fork_serve.hh"
+//#include "socket_serve.hh"
+//#include "validation.hh"
+//#include "../io/ZlibCompression.hh"
+//#include "../io/MemReadWriter.hh"
+//#include "../io/BufferedIO.hh"
+//#include "../io/Zlib0.hh"
+//#include "../io/Seccomp.hh"
+#ifndef HLS_TEST
+#include "xcl2.hpp"
+//#include "xhpp_context.hpp"
+//#include "xhpp_taskkernel.hpp"
+//#include "xhpp_tasktransfer.hpp"
+//#include "xhpp_bufferhost.hpp"
+//#include "xhpp_graph.hpp"
+//#include "xhpp_scheduler.hpp"
+#endif
+
+int tvdiff(struct timeval* tv0, struct timeval* tv1) {
+    return (tv1->tv_sec - tv0->tv_sec) * 1000000 + (tv1->tv_usec - tv0->tv_usec);
+}
+
+bool hls_decode_jpeg_kernel(std::string xclbin_path,
+                            int filecnt,
+                            std::vector<uint8_t*> datatoDDR,
+                            std::vector<int> jpgSize,
+                            std::vector<struct_arith>& arith,
+                            std::vector<uint8_t*> res,
+                            std::vector<uint32_t>& left,
+                            std::vector<uint32_t>& rst) {
+#ifdef HLS_TEST
+    for (int i = 0; i < filecnt; i++) {
+        int arith_info[9];
+        jpegDecLeptonEncKernel_0((ap_uint<AXI_WIDTH>*)datatoDDR[i], // uint16_t* datatoDDR,
+                                 (int)jpgSize[i],                   // int size,
+                                 arith_info,
+                                 res[i] // uint8_t* res
+                                 );
+        arith[i].count = arith_info[0];
+        arith[i].value = (uint32_t)arith_info[1];
+        arith[i].pre_byte = (uint8_t)arith_info[2];
+        arith[i].run = (uint16_t)arith_info[3];
+        arith[i].pos = (uint32_t)arith_info[4];
+        arith[i].range = (uint8_t)arith_info[5];
+        arith[i].isFirst = (bool)arith_info[6];
+        left[i] = (uint32_t)arith_info[7];
+        rst[i] = (uint32_t)arith_info[8];
+    }
+#else
+    // platform related operations
+    std::vector<cl::Device> devices = xcl::get_xil_devices();
+    cl::Device device = devices[0];
+
+    // Creating Context and Command Queue for selected Device
+    cl::Context context(device);
+    cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
+    std::string devName = device.getInfo<CL_DEVICE_NAME>();
+    printf("Found Device=%s\n", devName.c_str());
+
+    cl::Program::Binaries xclBins = xcl::import_binary_file(xclbin_path);
+    devices.resize(1);
+    cl::Program program(context, devices, xclBins);
+    // knum used to define kernel number
+    int knum = 1;
+    std::vector<cl::Kernel> lepEncKernel(knum);
+    for (int i = 0; i < knum; i++) {
+        // lepEncKernel[i] = cl::Kernel(program, ("lepEnc:{lepEnc_" + std::to_string(i) + "}").c_str());
+        lepEncKernel[i] = cl::Kernel(program, ("lepEnc:{lepEnc_" + std::to_string(i) + "}").c_str());
+    }
+
+    std::cout << "kernel has been created" << std::endl;
+
+    std::vector<uint8_t*> datatoDDR_d(knum);
+    std::vector<int> jpgSize_d(knum);
+    std::vector<int*> arith_info_d(knum);
+    std::vector<uint8_t*> res_d(knum);
+
+    std::vector<cl_mem_ext_ptr_t> mext_datatoDDR(knum);
+    std::vector<cl_mem_ext_ptr_t> mext_arith_info(knum);
+    std::vector<cl_mem_ext_ptr_t> mext_res(knum);
+
+    std::vector<cl::Buffer> datatoDDR_buf(knum);
+    std::vector<cl::Buffer> arith_info_buf(knum);
+    std::vector<cl::Buffer> res_buf(knum);
+
+    uint64_t maxJpgSize = 0;
+    for (int i = 0; i < filecnt; i++) {
+        if (jpgSize[i] > maxJpgSize) maxJpgSize = jpgSize[i];
+    }
+
+    for (int i = 0; i < knum; i++) {
+        datatoDDR_d[i] = aligned_alloc<uint8_t>(maxJpgSize);
+        res_d[i] = aligned_alloc<uint8_t>(maxJpgSize);
+        arith_info_d[i] = aligned_alloc<int>(9);
+
+        if (i < 3) {
+            mext_datatoDDR[i] = {(unsigned int)(0) | XCL_MEM_TOPOLOGY, datatoDDR_d[i]};
+            mext_arith_info[i] = {(unsigned int)(0) | XCL_MEM_TOPOLOGY, arith_info_d[i]};
+            mext_res[i] = {(unsigned int)(0) | XCL_MEM_TOPOLOGY, res_d[i]};
+        } else if (i == 3) {
+            mext_datatoDDR[i] = {(unsigned int)(1) | XCL_MEM_TOPOLOGY, datatoDDR_d[i]};
+            mext_arith_info[i] = {(unsigned int)(1) | XCL_MEM_TOPOLOGY, arith_info_d[i]};
+            mext_res[i] = {(unsigned int)(1) | XCL_MEM_TOPOLOGY, res_d[i]};
+
+        } else {
+            mext_datatoDDR[i] = {(unsigned int)(2) | XCL_MEM_TOPOLOGY, datatoDDR_d[i]};
+            mext_arith_info[i] = {(unsigned int)(2) | XCL_MEM_TOPOLOGY, arith_info_d[i]};
+            mext_res[i] = {(unsigned int)(2) | XCL_MEM_TOPOLOGY, res_d[i]};
+        }
+
+        datatoDDR_buf[i] = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
+                                      (size_t)(maxJpgSize), &mext_datatoDDR[i]);
+
+        res_buf[i] = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
+                                (size_t)(maxJpgSize), &mext_res[i]);
+
+        arith_info_buf[i] = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
+                                       (size_t)(9 * sizeof(int)), &mext_arith_info[i]);
+    }
+
+    int cur_k = 0;
+    for (int i = 0; i < filecnt; i++) {
+        jpgSize_d[cur_k] = jpgSize[i];
+        memcpy(datatoDDR_d[cur_k], datatoDDR[i], jpgSize_d[cur_k]);
+
+        std::vector<cl::Memory> ib;
+        ib.push_back(datatoDDR_buf[cur_k]);
+
+        int j = 0;
+        lepEncKernel[cur_k].setArg(j++, datatoDDR_buf[cur_k]);
+        lepEncKernel[cur_k].setArg(j++, jpgSize_d[cur_k]);
+        lepEncKernel[cur_k].setArg(j++, arith_info_buf[cur_k]);
+        lepEncKernel[cur_k].setArg(j++, res_buf[cur_k]);
+
+        std::vector<cl::Memory> ob;
+        ob.push_back(arith_info_buf[cur_k]);
+        ob.push_back(res_buf[cur_k]);
+
+        std::vector<cl::Event> write_event;
+        std::vector<cl::Event> kernel_event;
+        std::vector<cl::Event> read_event;
+        write_event.resize(1);
+        kernel_event.resize(1);
+        read_event.resize(1);
+
+        q.enqueueMigrateMemObjects(ib, 0, nullptr, &write_event[0]);
+        q.enqueueTask(lepEncKernel[cur_k], &write_event, &kernel_event[0]);
+        q.enqueueMigrateMemObjects(ob, CL_MIGRATE_MEM_OBJECT_HOST, &kernel_event, &read_event[0]);
+        std::cout << "host kernel call" << std::endl;
+        q.finish();
+        std::cout << "host kernel end" << std::endl;
+        arith[i].count = arith_info_d[cur_k][0];
+        arith[i].value = (uint32_t)arith_info_d[cur_k][1];
+        arith[i].pre_byte = (uint8_t)arith_info_d[cur_k][2];
+        arith[i].run = (uint16_t)arith_info_d[cur_k][3];
+        arith[i].pos = (uint32_t)arith_info_d[cur_k][4];
+        arith[i].range = (uint8_t)arith_info_d[cur_k][5];
+        arith[i].isFirst = (bool)arith_info_d[cur_k][6];
+        left[i] = (uint32_t)arith_info_d[cur_k][7];
+        rst[i] = (uint32_t)arith_info_d[cur_k][8];
+        memcpy(res[i], res_d[cur_k], arith[i].pos);
+        std::cout << "kernel " << cur_k << " finish" << std::endl;
+        //        if (cur_k != 6) {
+        //            cur_k++;
+        //        } else {
+        //            cur_k = 0;
+        //        }
+    }
+
+    /*    int knum = 7;
+        xhpp::context cst("xilinx_u200_xdma_201830_2", xclbin_path, xhpp::pipeline);
+        cst.create();
+        // init vbuffers, used in graph
+        std::vector<xhpp::vbuffer::host<uint16_t> > hostb_in(knum, &cst);
+        std::vector<xhpp::vbuffer::host<uint8_t> > hostb_out(knum, &cst);
+        std::vector<xhpp::vbuffer::host<int> > hostb_s1(knum, &cst);
+
+        std::vector<xhpp::vbuffer::device<uint16_t> > devb_in(knum, &cst);
+        std::vector<xhpp::vbuffer::device<uint8_t> > devb_out(knum, &cst);
+        std::vector<xhpp::vbuffer::device<int> > devb_s1(knum, &cst);
+
+        uint64_t maxJpgSize = 0;
+        for (int i = 0; i < filecnt; i++) {
+            if (jpgSize[i] > maxJpgSize) maxJpgSize = jpgSize[i];
+        }
+        // int n_size = 1000000;//1MB for android and small.jpg
+
+        for (int i = 0; i < knum; i++) {
+            hostb_in[i].setsize(maxJpgSize);
+            hostb_out[i].setsize(maxJpgSize);
+            hostb_s1[i].setsize(9);
+
+            devb_in[i].setsize(maxJpgSize);
+            devb_out[i].setsize(maxJpgSize);
+            devb_s1[i].setsize(9);
+        }
+
+        // init tasks
+        std::vector<xhpp::task::data_transfer> tsk1;
+        for (int i = 0; i < knum; i++) {
+            xhpp::task::data_transfer tmp(&cst, xhpp::host2dev);
+            tsk1.push_back(tmp);
+        }
+
+        // xhpp::task::data_transfer tsk2(&cst, xhpp::host2dev);
+        std::vector<xhpp::task::dev_func> tsk2(knum, &cst);
+        std::vector<xhpp::task::data_transfer> tsk3;
+        for (int i = 0; i < knum; i++) {
+            xhpp::task::data_transfer tmp(&cst, xhpp::dev2host);
+            tsk3.push_back(tmp);
+        }
+        std::vector<xhpp::task::data_transfer> tsk4;
+        for (int i = 0; i < knum; i++) {
+            xhpp::task::data_transfer tmp(&cst, xhpp::dev2host);
+            tsk4.push_back(tmp);
+        }
+
+        for (int i = 0; i < knum; i++) {
+            tsk1[i].setparam(&(hostb_in[i]), &devb_in[i]); // host2dev
+            // tsk2.setparam(&hb_b, &db_b);   //host2dev
+
+            int vadd_banks[3] = {0, 0, 0};
+            if (i < 3) {
+                vadd_banks[0] = 0;
+                vadd_banks[1] = 0;
+                vadd_banks[2] = 0;
+            } else if (i == 3) {
+                vadd_banks[0] = 1;
+                vadd_banks[1] = 1;
+                vadd_banks[2] = 1;
+            } else {
+                vadd_banks[0] = 2;
+                vadd_banks[1] = 2;
+                vadd_banks[2] = 2;
+            }
+            // int vadd_banks2[3]={2,2,3};
+            // tsk2.addcu("JPEGD_LeptonE_kernel", 3, vadd_banks);
+            std::string name = "lepEnc:{lepEnc_" + std::to_string(i) + "}";
+            tsk2[i].addcu(name, 3, vadd_banks);
+            // tsk3.addcu("JPEGD_LeptonE_kernel2", 3, vadd_banks2);
+            int sizeint = 0;
+            tsk2[i].setparam(devb_in[i], sizeint,
+                             devb_s1[i], // arith.count,
+                             devb_out[i]);
+            tsk3[i].setparam(&hostb_out[i], &devb_out[i]);
+            tsk4[i].setparam(&hostb_s1[i], &devb_s1[i]);
+        }
+
+        std::vector<xhpp::graph> gr(knum);
+        for (int i = 0; i < knum; i++) {
+            std::string name1 = std::to_string(i * 10 + 1);
+            std::string name2 = std::to_string(i * 10 + 2);
+            std::string name3 = std::to_string(i * 10 + 3);
+            std::string name4 = std::to_string(i * 10 + 4);
+
+            gr[i].addnode(&(tsk1[i]), name1, xhpp::start);
+            gr[i].addnode(&(tsk2[i]), name2);
+            gr[i].addnode(&(tsk3[i]), name3, xhpp::end);
+            gr[i].addnode(&(tsk4[i]), name4, xhpp::end);
+
+            gr[i].addedge(name1, name2);
+            gr[i].addedge(name2, name3);
+            gr[i].addedge(name3, name4);
+
+            gr[i].setup();
+        }
+
+        std::vector<xhpp::engine> sch;
+        for (int i = 0; i < knum; i++) {
+            xhpp::engine tmp(&cst, &(gr[i]));
+            sch.push_back(tmp);
+            sch.back().setup();
+        }
+
+        //----dump setup----
+
+        //---------------------
+        // assign data of topapi, use buffer in the schedule
+        // The value of the vbuffer is overwritten by the buffer input
+        // todo imporve the speed!
+        // int loop_size = 1;
+        xhpp::buffer::host<uint16_t> hdata_in[filecnt](&cst);
+        xhpp::buffer::host<uint8_t> hdata_out[filecnt](&cst);
+        xhpp::buffer::host<int> hdata_s1[filecnt](&cst);
+
+        //	  a.allocate(jpgSize);
+        //	  for(int i = 0; i<size; i++)
+        //		  a[i] = datatoDDR[i];
+        for (int i = 0; i < filecnt; i++) {
+            hdata_in[i].allocate(maxJpgSize);
+            hdata_out[i].allocate(maxJpgSize);
+            hdata_s1[i].allocate(9);
+            int buf_size;
+            if (jpgSize[i] & 1 == 1) {
+                buf_size = (jpgSize[i] + 1) / 2;
+            } else {
+                buf_size = jpgSize[i] / 2;
+            }
+            for (int j = 0; j < buf_size; j++) {
+                ap_uint<AXI_WIDTH>* tmp = (ap_uint<AXI_WIDTH>*)datatoDDR[i];
+                hdata_in[i][j] = tmp[j];
+                hdata_out[i][j] = 0;
+            }
+        }
+
+        // lauch top api
+        //	  for (int i=0; i < loop_size; i++){
+        //	    sch.run("input", &tsk1, &(hdata_in[i]), 0, //0 refers to the 1st param of task1.
+        //	            "output",&tsk3, &(hdata_out[i]), 0,
+        //				"output",&tsk4, &(hdata_s1[i]), 0);
+        //	  }
+        cst.wait();
+        struct timeval tv_r_s, tv_r_e;
+        gettimeofday(&tv_r_s, 0);
+        for (int j = 0; j < filecnt; j++) {
+            int i = j % knum;
+            sch[i].run("input", &(tsk1[i]), &(hdata_in[j]), 0, // 0 refers to the 1st param of task1.
+                       "input", &(tsk2[i]), jpgSize[j], 1, "output", &(tsk3[i]), &(hdata_out[j]),
+                       0, // 0 refers to the 1st param of task2
+                       "output", &(tsk4[i]), &(hdata_s1[j]), 0);
+        }
+        cst.wait();
+        gettimeofday(&tv_r_e, 0);
+        std::cout << "End to End time: " << std::dec << tvdiff(&tv_r_s, &tv_r_e) / 1000 << " ms" << std::endl;
+
+        //--------output data---------
+        for (int s = 0; s < filecnt; s++) {
+            arith[s].count = hdata_s1[s][0];
+            arith[s].value = (uint32_t)hdata_s1[s][1];
+            arith[s].pre_byte = (uint8_t)hdata_s1[s][2];
+            arith[s].run = (uint16_t)hdata_s1[s][3];
+            arith[s].pos = (uint32_t)hdata_s1[s][4];
+            arith[s].range = (uint8_t)hdata_s1[s][5];
+            arith[s].isFirst = (bool)hdata_s1[s][6];
+            left[s] = (uint32_t)hdata_s1[s][7];
+            rst[s] = (uint32_t)hdata_s1[s][8];
+
+            for (int j = 0; j < arith[s].pos; j++) {
+                *(res[s] + j) = hdata_out[s][j];
+            }
+        }*/
+
+    fprintf(stderr, "=========== arith print ==========\n");
+    fprintf(stderr, "count = %d\n", arith[0].count);
+    fprintf(stderr, "value = %d\n", arith[0].value);
+    fprintf(stderr, "pre_byte = %d\n", arith[0].pre_byte);
+    fprintf(stderr, "run = %d\n", arith[0].run);
+    fprintf(stderr, "pos = %d\n", arith[0].pos);
+    fprintf(stderr, "range = %d\n", arith[0].range);
+    fprintf(stderr, "isFirst = %d\n", arith[0].isFirst);
+    //  for(int pos=0; pos<arith.pos; pos++){
+    //  //      fprintf( stderr, " %.4x\n" , *(res+pos));
+    //  //  }
+    fprintf(stderr, "============ end print ==========\n");
+#endif
+    return true;
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/jpgcoder_hls.hh b/codec/L2/demos/leptonEnc/host/lepton/jpgcoder_hls.hh
new file mode 100644
index 0000000000..460740e9aa
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/jpgcoder_hls.hh
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#ifndef _JPGCODER__HLS_HH_
+#define _JPGCODER__HLS_HH_
+#include "XAcc_jpegdecoder.hpp"
+#include "XAcc_jfifparser.hpp"
+#include "XAcc_common.hpp"
+#include "multi_cu.hpp"
+#include <string>
+template <typename T>
+T* aligned_alloc(std::size_t num) {
+    void* ptr = nullptr;
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+    return reinterpret_cast<T*>(ptr);
+}
+bool hls_decode_jpeg_kernel(std::string xclbin_path,
+                            int filecnt,
+                            std::vector<uint8_t*> datatoDDR,
+                            std::vector<int> jpgSize,
+                            std::vector<struct_arith>& arith,
+                            std::vector<uint8_t*> res,
+                            std::vector<uint32_t>& left,
+                            std::vector<uint32_t>& rst);
+bool hls_decode_jpeg_kernel(ap_uint<AXI_WIDTH>* datatoDDR, int size, struct_arith& arith, uint8_t* res);
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/lepton_codec.cc b/codec/L2/demos/leptonEnc/host/lepton/lepton_codec.cc
new file mode 100644
index 0000000000..bb3e7fbeb7
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/lepton_codec.cc
@@ -0,0 +1,207 @@
+#include "lepton_codec.hh"
+#include "uncompressed_components.hh"
+#include "../vp8/decoder/decoder.hh"
+
+template <class Left, class Middle, class Right, bool force_memory_optimization>
+void LeptonCodec::ThreadState::decode_row(Left& left_model,
+                                          Middle& middle_model,
+                                          Right& right_model,
+                                          int curr_y,
+                                          BlockBasedImagePerChannel<force_memory_optimization>& image_data,
+                                          int component_size_in_block) {
+    uint32_t block_width = image_data[(int)middle_model.COLOR]->block_width();
+    if (block_width > 0) {
+        BlockContext context = context_.at((int)middle_model.COLOR);
+        parse_tokens(context, bool_decoder_, left_model, model_); // FIXME
+        int offset = image_data[middle_model.COLOR]->next(context_.at((int)middle_model.COLOR), true, curr_y);
+        if (offset >= component_size_in_block) {
+            return;
+        }
+    }
+    for (unsigned int jpeg_x = 1; jpeg_x + 1 < block_width; jpeg_x++) {
+        BlockContext context = context_.at((int)middle_model.COLOR);
+        parse_tokens(context, bool_decoder_, middle_model, model_); // FIXME
+        int offset = image_data[middle_model.COLOR]->next(context_.at((int)middle_model.COLOR), true, curr_y);
+        if (offset >= component_size_in_block) {
+            return;
+        }
+    }
+    if (block_width > 1) {
+        BlockContext context = context_.at((int)middle_model.COLOR);
+        parse_tokens(context, bool_decoder_, right_model, model_);
+        image_data[middle_model.COLOR]->next(context_.at((int)middle_model.COLOR), false, curr_y);
+    }
+}
+#ifdef ALLOW_FOUR_COLORS
+#define ProbabilityTablesTuple(left, above, right)                      \
+    ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR0>,     \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR1>, \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR2>, \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR3>
+#define EACH_BLOCK_TYPE(left, above, right)                                                                \
+    ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR0>(BlockType::Y, left, above, right),      \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR1>(BlockType::Cb, left, above, right), \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR2>(BlockType::Cr, left, above, right), \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR3>(BlockType::Ck, left, above, right)
+#else
+#define ProbabilityTablesTuple(left, above, right)                      \
+    ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR0>,     \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR1>, \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR2>
+#define EACH_BLOCK_TYPE(left, above, right)                                                                \
+    ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR0>(BlockType::Y, left, above, right),      \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR1>(BlockType::Cb, left, above, right), \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR2>(BlockType::Cr, left, above, right)
+#endif
+
+void LeptonCodec::ThreadState::decode_row_wrapper(
+    BlockBasedImagePerChannel<true>& image_data,
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+    int component,
+    int curr_y) {
+    return decode_row(image_data, component_size_in_blocks, component, curr_y);
+}
+template <bool force_memory_optimization>
+void LeptonCodec::ThreadState::decode_row(
+    BlockBasedImagePerChannel<force_memory_optimization>& image_data,
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+    int component,
+    int curr_y) {
+    using std::tuple;
+    tuple<ProbabilityTablesTuple(false, false, false)> corner(EACH_BLOCK_TYPE(false, false, false));
+    tuple<ProbabilityTablesTuple(true, false, false)> top(EACH_BLOCK_TYPE(true, false, false));
+    tuple<ProbabilityTablesTuple(false, true, true)> midleft(EACH_BLOCK_TYPE(false, true, true));
+    tuple<ProbabilityTablesTuple(true, true, true)> middle(EACH_BLOCK_TYPE(true, true, true));
+    tuple<ProbabilityTablesTuple(true, true, false)> midright(EACH_BLOCK_TYPE(true, true, false));
+    tuple<ProbabilityTablesTuple(false, true, false)> width_one(EACH_BLOCK_TYPE(false, true, false));
+    context_.at(component) = image_data[component]->off_y(curr_y, num_nonzeros_.at(component).begin());
+
+    int block_width = image_data[component]->block_width();
+    if (is_top_row_.at(component)) {
+        is_top_row_.at(component) = false;
+        switch ((BlockType)component) {
+            case BlockType::Y:
+                decode_row(std::get<(int)BlockType::Y>(corner), std::get<(int)BlockType::Y>(top),
+                           std::get<(int)BlockType::Y>(top), curr_y, image_data, component_size_in_blocks[component]);
+                break;
+            case BlockType::Cb:
+                decode_row(std::get<(int)BlockType::Cb>(corner), std::get<(int)BlockType::Cb>(top),
+                           std::get<(int)BlockType::Cb>(top), curr_y, image_data, component_size_in_blocks[component]);
+
+                break;
+            case BlockType::Cr:
+                decode_row(std::get<(int)BlockType::Cr>(corner), std::get<(int)BlockType::Cr>(top),
+                           std::get<(int)BlockType::Cr>(top), curr_y, image_data, component_size_in_blocks[component]);
+
+                break;
+#ifdef ALLOW_FOUR_COLORS
+            case BlockType::Ck:
+                decode_row(std::get<(int)BlockType::Ck>(corner), std::get<(int)BlockType::Ck>(top),
+                           std::get<(int)BlockType::Ck>(top), curr_y, image_data, component_size_in_blocks[component]);
+
+                break;
+#endif
+        }
+    } else if (block_width > 1) {
+        assert(curr_y); // just a sanity check that the zeroth row took the first branch
+        switch ((BlockType)component) {
+            case BlockType::Y:
+                decode_row(std::get<(int)BlockType::Y>(midleft), std::get<(int)BlockType::Y>(middle),
+                           std::get<(int)BlockType::Y>(midright), curr_y, image_data,
+                           component_size_in_blocks[component]);
+
+                break;
+            case BlockType::Cb:
+                decode_row(std::get<(int)BlockType::Cb>(midleft), std::get<(int)BlockType::Cb>(middle),
+                           std::get<(int)BlockType::Cb>(midright), curr_y, image_data,
+                           component_size_in_blocks[component]);
+
+                break;
+            case BlockType::Cr:
+                decode_row(std::get<(int)BlockType::Cr>(midleft), std::get<(int)BlockType::Cr>(middle),
+                           std::get<(int)BlockType::Cr>(midright), curr_y, image_data,
+                           component_size_in_blocks[component]);
+
+                break;
+#ifdef ALLOW_FOUR_COLORS
+            case BlockType::Ck:
+                decode_row(std::get<(int)BlockType::Ck>(midleft), std::get<(int)BlockType::Ck>(middle),
+                           std::get<(int)BlockType::Ck>(midright), curr_y, image_data,
+                           component_size_in_blocks[component]);
+
+                break;
+#endif
+        }
+    } else {
+        assert(curr_y); // just a sanity check that the zeroth row took the first branch
+        assert(block_width == 1);
+        switch ((BlockType)component) {
+            case BlockType::Y:
+                decode_row(std::get<(int)BlockType::Y>(width_one), std::get<(int)BlockType::Y>(width_one),
+                           std::get<(int)BlockType::Y>(width_one), curr_y, image_data,
+                           component_size_in_blocks[component]);
+
+                break;
+            case BlockType::Cb:
+                decode_row(std::get<(int)BlockType::Cb>(width_one), std::get<(int)BlockType::Cb>(width_one),
+                           std::get<(int)BlockType::Cb>(width_one), curr_y, image_data,
+                           component_size_in_blocks[component]);
+
+                break;
+            case BlockType::Cr:
+                decode_row(std::get<(int)BlockType::Cr>(width_one), std::get<(int)BlockType::Cr>(width_one),
+                           std::get<(int)BlockType::Cr>(width_one), curr_y, image_data,
+                           component_size_in_blocks[component]);
+
+                break;
+#ifdef ALLOW_FOUR_COLORS
+            case BlockType::Ck:
+                decode_row(std::get<(int)BlockType::Ck>(width_one), std::get<(int)BlockType::Ck>(width_one),
+                           std::get<(int)BlockType::Ck>(width_one), curr_y, image_data,
+                           component_size_in_blocks[component]);
+
+                break;
+#endif
+        }
+    }
+}
+
+CodingReturnValue LeptonCodec::ThreadState::vp8_decode_thread(unsigned int thread_id,
+                                                              UncompressedComponents* const colldata) {
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks;
+    BlockBasedImagePerChannel<false> image_data;
+    for (int i = 0; i < colldata->get_num_components(); ++i) {
+        component_size_in_blocks[i] = colldata->component_size_in_blocks(i);
+        image_data[i] = &colldata->full_component_write((BlockType)i);
+    }
+    Sirikata::Array1d<uint32_t, (size_t)ColorChannel::NumBlockTypes> max_coded_heights =
+        colldata->get_max_coded_heights();
+    /* deserialize each block in planar order */
+
+    assert(luma_splits_.size() == 2); // not ready to do multiple work items on a thread yet
+    int min_y = luma_splits_[0];
+    int max_y = luma_splits_[1];
+    while (true) {
+        RowSpec cur_row =
+            row_spec_from_index(decode_index_++, image_data, colldata->get_mcu_count_vertical(), max_coded_heights);
+        if (cur_row.done) {
+            break;
+        }
+        if (cur_row.luma_y >= max_y && thread_id + 1 != NUM_THREADS) {
+            break;
+        }
+        if (cur_row.skip) {
+            continue;
+        }
+        if (cur_row.luma_y < min_y) {
+            continue;
+        }
+        decode_row(image_data, component_size_in_blocks, cur_row.component, cur_row.curr_y);
+        if (thread_id == 0) {
+            colldata->worker_update_cmp_progress((BlockType)cur_row.component,
+                                                 image_data[cur_row.component]->block_width());
+        }
+        return CODING_PARTIAL;
+    }
+    return CODING_DONE;
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/lepton_codec.hh b/codec/L2/demos/leptonEnc/host/lepton/lepton_codec.hh
new file mode 100644
index 0000000000..142289905b
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/lepton_codec.hh
@@ -0,0 +1,194 @@
+#ifndef _LEPTON_CODEC_HH_
+#define _LEPTON_CODEC_HH_
+#include "jpgcoder.hh"
+#include "model.hh"
+#include "bool_decoder.hh"
+#include "base_coders.hh"
+class UncompressedComponents;
+
+class LeptonCodec {
+   protected:
+    struct ThreadState {
+        ProbabilityTablesBase model_;
+        BoolDecoder bool_decoder_;
+        // the splits this thread is concerned with...always 1 more than the number of work items
+        std::vector<int> luma_splits_;
+        Sirikata::Array1d<bool, (size_t)ColorChannel::NumBlockTypes> is_top_row_;
+        Sirikata::Array1d<BlockContext, (size_t)ColorChannel::NumBlockTypes> context_;
+        // the last 2 rows of the image for each channel
+        Sirikata::Array1d<std::vector<NeighborSummary>, (size_t)ColorChannel::NumBlockTypes> num_nonzeros_;
+        uint32_t decode_index_;
+        bool is_valid_range_;
+        template <class Left, class Middle, class Right, bool should_force_memory_optimization>
+        void decode_row(Left& left_model,
+                        Middle& middle_model,
+                        Right& right_model,
+                        int curr_y,
+                        BlockBasedImagePerChannel<should_force_memory_optimization>& image_data,
+                        int component_size_in_block);
+        template <bool force_memory_optimization>
+        void decode_row(BlockBasedImagePerChannel<force_memory_optimization>& image_data,
+                        Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_block,
+                        int component,
+                        int curr_y);
+
+        CodingReturnValue vp8_decode_thread(unsigned int thread_id, UncompressedComponents* const colldata);
+
+       private:
+        void decode_row_wrapper(
+            BlockBasedImagePerChannel<true>& image_data,
+            Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+            int component,
+            int curr_y);
+    };
+    static uint32_t gcd(uint32_t a, uint32_t b) {
+        while (b) {
+            uint32_t tmp = a % b;
+            a = b;
+            b = tmp;
+        }
+        return a;
+    }
+
+   public:
+    struct RowSpec {
+        int min_row_luma_y;
+        int next_row_luma_y;
+        int luma_y;
+        int component;
+        int curr_y;
+        int mcu_row_index;
+        bool last_row_to_complete_mcu;
+        bool skip;
+        bool done;
+    };
+    template <class BlockBasedImagePerChannels>
+    static RowSpec row_spec_from_index(
+        uint32_t decode_index,
+        const BlockBasedImagePerChannels& image_data,
+        int mcuv, // number of mcus
+        Sirikata::Array1d<uint32_t, (size_t)ColorChannel::NumBlockTypes> max_coded_heights) {
+        uint32_t num_cmp = (uint32_t)ColorChannel::NumBlockTypes;
+        uint32_t heights[(uint32_t)ColorChannel::NumBlockTypes] = {0};
+        uint32_t component_multiple[(uint32_t)ColorChannel::NumBlockTypes] = {0};
+        uint32_t mcu_multiple = 0;
+        for (uint32_t i = 0; i < num_cmp; ++i) {
+            heights[i] = image_data[i] ? image_data[i]->original_height() : 0;
+            component_multiple[i] = heights[i] / mcuv;
+            mcu_multiple += component_multiple[i];
+        }
+        uint32_t mcu_row = decode_index / mcu_multiple;
+        RowSpec retval = {0, 0, 0, 0, 0, 0, false, false, false};
+        retval.skip = false;
+        retval.done = false;
+        retval.mcu_row_index = mcu_row;
+        uint32_t place_within_scan = decode_index - mcu_row * mcu_multiple;
+        retval.component = num_cmp;
+        retval.min_row_luma_y = (mcu_row)*component_multiple[0];
+        retval.next_row_luma_y = retval.min_row_luma_y + component_multiple[0];
+        retval.luma_y = retval.min_row_luma_y;
+        for (uint32_t i = num_cmp - 1; true; --i) {
+            if (place_within_scan < component_multiple[i]) {
+                retval.component = i;
+                retval.curr_y = mcu_row * component_multiple[i] + place_within_scan;
+                retval.last_row_to_complete_mcu = (place_within_scan + 1 == component_multiple[i] && i == 0);
+                if (retval.curr_y >= int(max_coded_heights[i])) {
+                    retval.skip = true;
+                    retval.done = true; // assume true, but if we find something that needs coding, set false
+                    for (uint32_t j = 0; j < num_cmp - 1; ++j) {
+                        if (mcu_row * component_multiple[j] < max_coded_heights[j]) {
+                            retval.done = false; // we want to make sure to write out any partial rows,
+                            // so set done only when all items in this mcu are really skips
+                            // i.e. round down
+                        }
+                    }
+                }
+                if (i == 0) {
+                    retval.luma_y = retval.curr_y;
+                }
+                break;
+            } else {
+                place_within_scan -= component_multiple[i];
+            }
+            if (i == 0) {
+                assert(false);
+                retval.skip = true;
+                retval.done = true;
+                break;
+            }
+        }
+        return retval;
+    }
+
+   protected:
+    bool do_threading_;
+    GenericWorker* spin_workers_;
+    unsigned int num_registered_workers_;
+    Sirikata::Array1d<ThreadState*, MAX_NUM_THREADS> thread_state_;
+
+    void reset_thread_model_state(int thread_id) {
+        TimingHarness::timing[thread_id][TimingHarness::TS_MODEL_INIT_BEGIN] = TimingHarness::get_time_us();
+
+        if (!thread_state_[thread_id]) {
+            thread_state_[thread_id] = new ThreadState;
+        }
+        thread_state_[thread_id]->model_.model().set_tables_identity();
+        TimingHarness::timing[thread_id][TimingHarness::TS_MODEL_INIT] = TimingHarness::get_time_us();
+    }
+    void registerWorkers(GenericWorker* workers, unsigned int num_workers) {
+        always_assert(num_workers < MAX_NUM_THREADS);
+        always_assert(num_workers + 1 == NUM_THREADS);
+        num_registered_workers_ = num_workers;
+        spin_workers_ = workers;
+        for (unsigned int i = 0; i < num_workers + 1; ++i) {
+            if (!thread_state_[i]) {
+                // thread_state_[i] = new ThreadState;
+                // thread_state_[i]->model_.model().set_tables_identity();
+                // thread_state_[i]->model_.load_probability_tables();
+            }
+        }
+    }
+    size_t model_worker_memory_used() const {
+        size_t retval = 0;
+        for (size_t i = 1; i < thread_state_.size(); ++i) {
+            if (thread_state_[i]) {
+                retval += sizeof(ProbabilityTablesBase);
+            }
+        }
+        return retval;
+    }
+
+    size_t model_memory_used() const {
+        size_t retval = 0;
+        for (size_t i = 0; i < thread_state_.size(); ++i) {
+            if (thread_state_[i]) {
+                retval += sizeof(ProbabilityTablesBase);
+            }
+        }
+        return retval;
+    }
+    LeptonCodec(bool do_threading) {
+        num_registered_workers_ = 0; // need to wait
+        do_threading_ = do_threading;
+        unsigned int num_threads = 1;
+        if (do_threading) {
+            num_threads = NUM_THREADS;
+        }
+        thread_state_.memset(0);
+        always_assert(num_threads <= MAX_NUM_THREADS);
+
+        for (unsigned int i = 0; i < num_threads; ++i) {
+            // thread_state_[i] = new ThreadState;
+            // thread_state_[i]->model_.model().set_tables_identity();
+            // thread_state_[i]->model_.load_probability_tables();
+        }
+    }
+    ~LeptonCodec() {
+        for (unsigned int i = 0; i < thread_state_.size(); ++i) {
+            if (thread_state_[i]) {
+                delete thread_state_[i];
+            }
+        }
+    }
+};
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/print-model.cc b/codec/L2/demos/leptonEnc/host/lepton/print-model.cc
new file mode 100644
index 0000000000..babc6abae1
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/print-model.cc
@@ -0,0 +1,51 @@
+#include "../../vp8/util/memory.hh"
+#include <cstdlib>
+#include <iostream>
+/*
+#include "model.hh"
+bool g_threaded = true;
+using namespace std;
+
+int main(int argc, char* argv[]) {
+    Sirikata::memmgr_init(768 * 1024 * 1024, 64 * 1024 * 1024, 3, 256);
+
+    if (argc <= 0) {
+        abort();
+    }
+    Model::PrintabilitySpecification spec;
+    spec.printability_bitmask = Model::CLOSE_TO_50 | Model::CLOSE_TO_ONE_ANOTHER;
+    spec.tolerance = .25;
+    spec.min_samples = 25;
+    for (int i = 1; i < argc; ++i) {
+        if (strstr(argv[i], "-t") == argv[i] || strstr(argv[i], "-s") == argv[i] || strcmp(argv[i], "-ok") == 0) {
+            double arg = atof(argv[i] + 2);
+            if (argv[i][1] == 't') {
+                spec.tolerance = arg;
+            } else if (argv[i][1] == 'o') {
+                spec.printability_bitmask = Model::PRINTABLE_OK;
+            } else {
+                spec.min_samples = (int64_t)arg;
+            }
+            for (int j = i; j + 1 < argc; ++j) {
+                argv[j] = argv[j + 1];
+            }
+            --argc;
+            --i;
+        }
+    }
+    if (argc != 2 && argc != 3) {
+        cerr << "Usage: " << argv[0] << " FILENAME" << endl;
+    }
+
+    Model model_tables;
+    load_model(model_tables, argv[1]);
+    if (argc > 2) {
+        Model orig_tables;
+        load_model(orig_tables, argv[2]);
+        model_tables.debug_print(&orig_tables, spec);
+    } else {
+        model_tables.debug_print(nullptr, spec);
+    }
+    return EXIT_SUCCESS;
+}
+*/
diff --git a/codec/L2/demos/leptonEnc/host/lepton/recoder.cc b/codec/L2/demos/leptonEnc/host/lepton/recoder.cc
new file mode 100644
index 0000000000..7a4202585c
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/recoder.cc
@@ -0,0 +1,746 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#include <time.h>
+#include <stdio.h>
+#include <iostream>
+
+#include "uncompressed_components.hh"
+#include "recoder.hh"
+#include "bitops.hh"
+#include "lepton_codec.hh"
+#include "../io/BoundedMemWriter.hh"
+#include "../vp8/util/memory.hh"
+#define ENVLI(s, v) ((v > 0) ? v : (v - 1) + (1 << s))
+
+int next_mcuposn(int* cmp, int* dpos, int* rstw);
+extern BaseDecoder* g_decoder;
+extern UncompressedComponents colldata; // baseline sorted DCT coefficients
+
+extern char padbit;
+
+extern Sirikata::Array1d<int, 4> cs_cmp; // component numbers  in current scan
+extern Sirikata::Array1d<componentInfo, 4> cmpnfo;
+
+extern int grbs;                                                            // size of garbage
+extern int hdrs;                                                            // size of header
+extern Sirikata::Array1d<Sirikata::Array1d<unsigned short, 64>, 4> qtables; // quantization tables
+extern Sirikata::Array1d<Sirikata::Array1d<huffCodes, 4>, 2> hcodes;        // huffman codes
+extern Sirikata::Array1d<Sirikata::Array1d<huffTree, 4>, 2> htrees;         // huffman decoding trees
+extern Sirikata::Array1d<Sirikata::Array1d<unsigned char, 4>, 2> htset;     // 1 if huffman table is set
+
+extern unsigned char* grbgdata; // garbage data
+extern unsigned char* hdrdata;  // header data
+extern int rsti;
+extern int mcuv;                           // mcus per line
+extern unsigned int mcuh;                  // mcus per column
+extern int mcuc;                           // count of mcus
+extern std::vector<unsigned char> rst_err; // number of wrong-set RST markers per scan
+extern bool rst_cnt_set;
+extern std::vector<unsigned int> rst_cnt;
+extern int prefix_grbs;                // size of prefix garbage
+extern unsigned char* prefix_grbgdata; // the actual prefix garbage: if present, hdrdata not serialized
+
+void check_decompression_memory_bound_ok();
+
+bool parse_jfif_jpg(unsigned char type, unsigned int len, unsigned char* segment);
+#define B_SHORT(v1, v2) ((((int)v1) << 8) + ((int)v2))
+
+int find_aligned_end_64_scalar(const int16_t* block) {
+    int end = 63;
+    while (end && !block[end]) {
+        --end;
+    }
+    return end;
+}
+int find_aligned_end_64_sse41(const int16_t* block) {
+    unsigned int mask = 0;
+    int iter;
+    for (iter = 56; iter >= 0; iter -= 8) {
+        __m128i row = _mm_load_si128((__m128i*)(block + iter));
+        __m128i row_cmp = _mm_cmpeq_epi16(row, _mm_setzero_si128());
+        mask = _mm_movemask_epi8(row_cmp);
+        if (mask != 0xffff) {
+            break;
+        }
+    }
+    if (mask == 0xffff) {
+        assert(find_aligned_end_64_scalar(block) == 0);
+        return 0;
+    }
+    unsigned int bitpos = 32 - __builtin_clz((~mask) & 0xffff);
+    int retval = iter + ((bitpos >> 1) - 1);
+
+    assert(retval == find_aligned_end_64_scalar(block));
+    return retval;
+}
+#ifdef __AVX2__
+int find_aligned_end_64(const int16_t* block) {
+    uint32_t mask = 0;
+    int iter;
+    for (iter = 48; iter >= 0; iter -= 16) {
+        __m256i row = _mm256_load_si256((const __m256i*)(const char*)(block + iter));
+        __m256i row_cmp = _mm256_cmpeq_epi16(row, _mm256_setzero_si256());
+        mask = _mm256_movemask_epi8(row_cmp);
+        if (mask != 0xffffffffU) {
+            break;
+        }
+    }
+    if (mask == 0xffffffffU) {
+        assert(find_aligned_end_64_scalar(block) == 0);
+        return 0;
+    }
+    unsigned int bitpos = 32 - __builtin_clz((~mask) & 0xffffffffU);
+    int retval = iter + ((bitpos >> 1) - 1);
+
+    assert(retval == find_aligned_end_64_scalar(block));
+    return retval;
+}
+#else
+int find_aligned_end_64(const int16_t* block) {
+    return find_aligned_end_64_sse41(block);
+}
+#endif
+
+static bool aligned_memchr16ff(const unsigned char* local_huff_data) {
+#if !defined(__i386__)
+    __m128i buf = _mm_load_si128((__m128i const*)local_huff_data);
+    __m128i ff = _mm_set1_epi8(-1);
+    __m128i res = _mm_cmpeq_epi8(buf, ff);
+    uint32_t movmask = _mm_movemask_epi8(res);
+    bool retval = movmask != 0x0;
+    assert(retval == (memchr(local_huff_data, 0xff, 16) != NULL));
+    return retval;
+#endif
+    return memchr(local_huff_data, 0xff, 16) != NULL;
+}
+
+/**
+ * This function takes local byte-aligned huffman data and writes it to the file
+ * This function escapes any 0xff bytes found in the huffman data
+ */
+template <class OutputWriter>
+void escape_0xff_huffman_and_write(OutputWriter* str_out,
+                                   const unsigned char* local_huff_data,
+                                   unsigned int max_byte_coded) {
+    unsigned int progress_ipos = 0;
+    // write a single scan
+    {
+        // write & expand huffman coded image data
+        const unsigned char stv = 0x00; // 0xFF stuff value
+        for (; progress_ipos & 0xf; progress_ipos++) {
+            if (__builtin_expect(!(progress_ipos < max_byte_coded), 0)) {
+                break;
+            }
+            uint8_t byte_to_write = local_huff_data[progress_ipos];
+            str_out->write_byte(byte_to_write);
+            // check current byte, stuff if needed
+            if (__builtin_expect(byte_to_write == 0xFF, 0)) {
+                str_out->write_byte(stv);
+                write_byte_bill(Billing::DELIMITERS, false, 1);
+            }
+        }
+
+        while (true) {
+            if (__builtin_expect(!(progress_ipos + 15 < max_byte_coded), 0)) {
+                break;
+            }
+            if (__builtin_expect(aligned_memchr16ff(local_huff_data + progress_ipos), 0)) {
+                // insert restart markers if needed
+                for (int veci = 0; veci < 16; ++veci, ++progress_ipos) {
+                    uint8_t byte_to_write = local_huff_data[progress_ipos];
+                    str_out->write_byte(byte_to_write);
+                    // check current byte, stuff if needed
+                    if (__builtin_expect(byte_to_write == 0xFF, 0)) {
+                        write_byte_bill(Billing::DELIMITERS, false, 1);
+                        str_out->write_byte(stv);
+                    }
+                }
+            } else {
+                str_out->write(local_huff_data + progress_ipos, 16);
+                progress_ipos += 16;
+            }
+        }
+        for (;; progress_ipos++) {
+            if (__builtin_expect(!(progress_ipos < max_byte_coded), 0)) {
+                break;
+            }
+            uint8_t byte_to_write = local_huff_data[progress_ipos];
+            str_out->write_byte(byte_to_write);
+            // check current byte, stuff if needed
+            if (__builtin_expect(byte_to_write == 0xFF, 0)) {
+                write_byte_bill(Billing::DELIMITERS, false, 1);
+                str_out->write_byte(stv);
+            }
+        }
+    }
+}
+
+/* -----------------------------------------------
+    calculates next position for MCU
+    ----------------------------------------------- */
+int next_mcupos(int* mcu, int* cmp, int* csc, int* sub, int* dpos, int* rstw, int cs_cmpc) {
+    int sta = 0; // status
+    unsigned int local_mcuh = mcuh;
+    unsigned int local_mcu = *mcu;
+    unsigned int local_cmp = *cmp;
+    unsigned int local_sub;
+    // increment all counts where needed
+    if ((local_sub = ++(*sub)) >= (unsigned int)cmpnfo[local_cmp].mbs) {
+        local_sub = (*sub) = 0;
+
+        if ((++(*csc)) >= cs_cmpc) {
+            (*csc) = 0;
+            local_cmp = (*cmp) = cs_cmp[0];
+            local_mcu = ++(*mcu);
+            if (local_mcu >= (unsigned int)mcuc) {
+                sta = 2;
+            } else if (rsti > 0) {
+                if (--(*rstw) == 0) {
+                    sta = 1;
+                }
+            }
+        } else {
+            local_cmp = (*cmp) = cs_cmp[(*csc)];
+        }
+    }
+    unsigned int sfh = cmpnfo[local_cmp].sfh;
+    unsigned int sfv = cmpnfo[local_cmp].sfv;
+    // get correct position in image ( x & y )
+    if (sfh > 1) { // to fix mcu order
+        unsigned int mcu_o_mcuh = local_mcu / local_mcuh;
+        unsigned int sub_o_sfv = local_sub / sfv;
+        unsigned int mcu_mod_mcuh = local_mcu - mcu_o_mcuh * local_mcuh;
+        unsigned int sub_mod_sfv = local_sub - sub_o_sfv * sfv;
+        unsigned int local_dpos = mcu_o_mcuh * sfh + sub_o_sfv;
+        local_dpos *= cmpnfo[local_cmp].bch;
+        local_dpos += mcu_mod_mcuh * sfv + sub_mod_sfv;
+        *dpos = local_dpos;
+    } else if (sfv > 1) {
+        // simple calculation to speed up things if simple fixing is enough
+        (*dpos) = local_mcu * cmpnfo[local_cmp].mbs + local_sub;
+    } else {
+        // no calculations needed without subsampling
+        (*dpos) = (*mcu);
+    }
+
+    return sta;
+}
+
+// -----------------------------------------------
+//    sequential block encoding routine
+// -----------------------------------------------
+int encode_block_seq(abitwriter* huffw, huffCodes* dctbl, huffCodes* actbl, short* block) {
+    unsigned short n;
+    unsigned char s;
+    int bpos;
+    int hc;
+    short tmp;
+
+    // encode DC
+    tmp = block[0];
+    s = uint16bit_length(tmp > 0 ? tmp : -tmp);
+    n = ENVLI(s, tmp);
+    huffw->write(dctbl->cval[s], dctbl->clen[s]);
+    write_multi_bit_bill(dctbl->clen[s], false, Billing::EXP0_DC, Billing::EXPN_DC);
+    huffw->write(n, s);
+    if (s) {
+        write_bit_bill(Billing::RES_DC, false, s - 1);
+        write_bit_bill(Billing::SIGN_DC, false, 1);
+    }
+
+    signed z = -1;
+    // encode AC
+    z = 0;
+    int end = find_aligned_end_64(block);
+    for (bpos = 1; bpos <= end; bpos++) {
+        // if nonzero is encountered
+        tmp = block[bpos];
+        if (tmp == 0) {
+            ++z;
+            continue;
+        }
+        // vli encode
+        s = nonzero_bit_length(tmp > 0 ? tmp : -tmp);
+        n = ENVLI(s, tmp);
+        hc = (((z & 0xf) << 4) + s);
+        if (__builtin_expect(z & 0xf0, 0)) {
+            // write remaining zeroes
+            do {
+                huffw->write(actbl->cval[0xF0], actbl->clen[0xF0]);
+                write_multi_bit_bill(actbl->clen[0xF0], false,
+                                     is_edge(bpos) ? Billing::BITMAP_EDGE : Billing::BITMAP_7x7, // this is pure bitmap
+                                     is_edge(bpos) ? Billing::BITMAP_EDGE : Billing::BITMAP_7x7);
+                z -= 16;
+            } while (z & 0xf0);
+        }
+        // write to huffman writer
+        huffw->write(actbl->cval[hc], actbl->clen[hc]);
+        write_multi_bit_bill(actbl->clen[hc], false,
+                             is_edge(bpos) ? Billing::BITMAP_EDGE : Billing::BITMAP_7x7, // this is pure bitmap
+                             is_edge(bpos) ? Billing::EXPN_EDGE : Billing::EXPN_7x7);
+        huffw->write(n, s);
+        if (s) {
+            write_bit_bill(is_edge(bpos) ? Billing::RES_EDGE : Billing::RES_7x7, false, s - 1);
+            write_bit_bill(is_edge(bpos) ? Billing::SIGN_EDGE : Billing::SIGN_7x7, false, 1);
+        }
+
+        // reset zeroes
+        z = 0;
+    }
+    // write eob if needed
+    if (end != 63) {
+        huffw->write(actbl->cval[0x00], actbl->clen[0x00]);
+        write_eob_bill(end, false, actbl->clen[0x00]);
+    }
+
+    return end + 1;
+}
+
+template <class OutputWriter>
+bool recode_one_mcu_row(abitwriter* huffw,
+                        int mcu,
+                        OutputWriter* str_out,
+                        Sirikata::Array1d<int16_t, (size_t)ColorChannel::NumBlockTypes>& lastdc,
+                        const BlockBasedImagePerChannel<true> framebuffer) {
+    int cmp = cs_cmp[0];
+    int csc = 0, sub = 0;
+    int mcumul = cmpnfo[cmp].sfv * cmpnfo[cmp].sfh;
+    int dpos = mcu * mcumul;
+    int rstw = rsti ? rsti - mcu % rsti : 0;
+    unsigned int cumulative_reset_markers = rstw ? mcu / rsti : 0;
+    unsigned char cmpc = 0;
+    for (; cmpc < framebuffer.size() && framebuffer[cmpc] != NULL; ++cmpc) {
+    }
+    Sirikata::Aligned256Array1d<int16_t, 64> block; // store block for coeffs
+    bool end_of_row = false;
+    // JPEG imagedata encoding routines
+    while (!end_of_row) {
+        // (re)set status
+        int sta = 0;
+
+        // ---> sequential interleaved encoding <---
+        while (sta == 0) {
+            // copy from colldata
+            const AlignedBlock& aligned_block = framebuffer[cmp]->raster(dpos);
+            // fprintf(stderr, "Reading from cmp(%d) dpos %d\n", cmp, dpos);
+            for (int bpos = 0; bpos < 64; bpos++) {
+                block[bpos] = aligned_block.coefficients_zigzag(bpos);
+            }
+
+            int16_t dc = block[0];
+            // diff coding for dc
+            block[0] -= lastdc[cmp];
+            lastdc[cmp] = dc;
+
+            // encode block
+            int eob = encode_block_seq(huffw, &(hcodes[0][cmpnfo[cmp].huffdc]), &(hcodes[1][cmpnfo[cmp].huffac]),
+                                       block.begin());
+            int old_mcu = mcu;
+            // check for errors, proceed if no error encountered
+            if (eob < 0)
+                sta = -1;
+            else if (__builtin_expect(framebuffer.size() == 1 || framebuffer[1] == NULL, 0)) {
+                sta = next_mcuposn(&cmp, &dpos, &rstw);
+                mcu = dpos / mcumul;
+            } else {
+                sta = next_mcupos(&mcu, &cmp, &csc, &sub, &dpos, &rstw, cmpc); // we can pass in cmpc instead of CMPC
+            }
+            if (sta == 0 && huffw->no_remainder()) {
+                escape_0xff_huffman_and_write(str_out, huffw->peekptr(), huffw->getpos());
+                huffw->reset();
+            }
+            if (str_out->has_exceeded_bound()) {
+                sta = 2;
+            }
+            if (old_mcu != mcu && mcu % mcuh == 0) {
+                end_of_row = true;
+                if (sta == 0) {
+                    return true;
+                }
+            }
+        }
+
+        // pad huffman writer
+        huffw->pad(padbit);
+        if (huffw->no_remainder()) {
+            escape_0xff_huffman_and_write(str_out, huffw->peekptr(), huffw->getpos());
+            huffw->reset();
+        }
+        // evaluate status
+        if (sta == -1) { // status -1 means error
+            delete huffw;
+            return false;
+        } else if (sta == 2) { // status 2 means done
+            break;             // leave decoding loop, everything is done here
+        } else if (sta == 1) { // status 1 means restart
+            if (rsti > 0) {
+                if (rst_cnt.empty() || (!rst_cnt_set) || cumulative_reset_markers < rst_cnt[0]) {
+                    const unsigned char rst = 0xD0 + (cumulative_reset_markers & 7);
+                    str_out->write_byte(0xFF);
+                    str_out->write_byte(rst);
+                    cumulative_reset_markers++;
+                    write_byte_bill(Billing::DELIMITERS, false, 2);
+                }
+                // (re)set rst wait counter
+                rstw = rsti;
+                // (re)set last DCs for diff coding
+                lastdc.memset(0);
+            }
+        }
+        always_assert(huffw->no_remainder() && "this should have been padded");
+    }
+    return true;
+}
+
+unsigned int handle_initial_segments(bounded_iostream* const str_out) {
+    unsigned int byte_position = 0;
+
+    while (true) {
+        /* step 1: have we exhausted the headers without reaching the scan? */
+        if (static_cast<int>(byte_position + 3) >= hdrs) {
+            std::cerr << "overran headers\n";
+            return -1;
+        }
+
+        /* step 2: verify we are at the start of a segment header */
+        if (hdrdata[byte_position] != 0xff) {
+            std::cerr << "not start of segment\n";
+            return -1;
+        }
+
+        /* step 3: get info about the segment */
+        const unsigned char type = hdrdata[byte_position + 1];
+        const unsigned int len = 2 + B_SHORT(hdrdata[byte_position + 2], hdrdata[byte_position + 3]);
+
+        /* step 4: if it's a DHT (0xC4), DRI (0xDD), or SOS (0xDA), parse to mutable globals */
+        if (type == 0xC4 || type == 0xDD || type == 0xDA) {
+            /* XXX make sure parse_jfif_jpg can't overrun hdrdata */
+            if (!parse_jfif_jpg(type, len, hdrdata + byte_position)) {
+                return -1;
+            }
+        }
+
+        /* step 5: we parsed the header -- accumulate byte position */
+        byte_position += len;
+
+        /* step 6: if it's an SOS (start of scan),
+           then return the byte position -- done with initial headers */
+
+        if (type == 0xDA) {
+            if (prefix_grbgdata) {
+                str_out->write(prefix_grbgdata, prefix_grbs);
+            } else {
+                {
+                    unsigned char SOI[2] = {0xFF, 0xD8}; // SOI segment
+                    // write SOI
+                    str_out->write(SOI, 2);
+                }
+                str_out->write(hdrdata, byte_position);
+            }
+            return byte_position; /* ready for the scan */
+        }
+    }
+}
+
+void abitwriter::debug() const {
+    using std::cerr;
+
+    cerr << "abitwriter: no_remainder=" << no_remainder() << ", getpos=" << getpos() << ", bits=" << cbit2
+         << ", buf=" << std::hex << buf << std::dec << "\n";
+}
+
+// currently returns the overhang byte and num_overhang_bits -- these will be factored out when the encoder serializes
+// them
+template <class BoundedWriter>
+ThreadHandoff recode_row_range(
+    BoundedWriter* stream_out,
+    BlockBasedImagePerChannel<true>& framebuffer,
+    int mcuv,
+    const ThreadHandoff& thread_handoff,
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> max_coded_heights,
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+    int physical_thread_id,
+    int logical_thread_id,
+    abitwriter* huffw) {
+    ThreadHandoff retval = thread_handoff;
+
+    huffw->fillbit = padbit;
+    huffw->reset_from_overhang_byte_and_num_bits(retval.overhang_byte, retval.num_overhang_bits);
+    int decode_index = 0;
+    while (true) {
+        LeptonCodec::RowSpec cur_row =
+            LeptonCodec::row_spec_from_index(decode_index++, framebuffer, mcuv, max_coded_heights);
+        /*
+        fprintf(stderr, "%d] (%d) %d - %d  %d[%d]  [%d %d %d]\n",
+                decode_index,
+                logical_thread_id,
+                thread_handoff.luma_y_start,
+                thread_handoff.luma_y_end,
+                retval.overhang_byte,
+                retval.num_overhang_bits,
+                retval.last_dc[0],
+                retval.last_dc[1],
+                retval.last_dc[2]);
+        */
+        if (cur_row.done) {
+            break;
+        }
+        if (cur_row.skip) {
+            continue;
+        }
+        if (cur_row.min_row_luma_y < thread_handoff.luma_y_start) {
+            continue;
+        }
+        if (cur_row.next_row_luma_y > thread_handoff.luma_y_end) {
+            break; // we're done here
+        }
+        g_decoder->decode_row(physical_thread_id, framebuffer, component_size_in_blocks, cur_row.component,
+                              cur_row.curr_y);
+        if (cur_row.last_row_to_complete_mcu) {
+            if (!recode_one_mcu_row(huffw, cur_row.mcu_row_index * mcuh, stream_out, retval.last_dc, framebuffer)) {
+                custom_exit(ExitCode::CODING_ERROR);
+            }
+            const unsigned char* flushed_data = huffw->partial_bytewise_flush();
+            escape_0xff_huffman_and_write(stream_out, flushed_data, huffw->getpos());
+            huffw->reset_crystallized_bytes();
+            if (!huffw->bound_reached()) {
+                retval.num_overhang_bits = huffw->get_num_overhang_bits();
+                retval.overhang_byte = huffw->get_overhang_byte();
+            } else {
+                retval.num_overhang_bits = 0;
+                retval.overhang_byte = 0;
+            }
+            if (huffw->error) {
+                custom_exit(ExitCode::CODING_ERROR);
+            }
+        }
+    }
+    return retval;
+}
+
+std::pair<int, int> logical_thread_range_from_physical_thread_id(int physical_thread_id, int num_logical_threads) {
+    int num_physical_threads = g_threaded ? NUM_THREADS : 1;
+
+    int logical_thread_start = (physical_thread_id * num_logical_threads) / num_physical_threads;
+    int logical_thread_end =
+        std::min(((physical_thread_id + 1) * num_logical_threads) / num_physical_threads, num_logical_threads);
+    if (num_logical_threads < num_physical_threads) {
+        // this is an optimization so we don't have to call the reset logic as often
+        logical_thread_start = std::min(physical_thread_id, num_logical_threads);
+        logical_thread_end = std::min(physical_thread_id + 1, num_logical_threads);
+    }
+    return std::pair<int, int>(logical_thread_start, logical_thread_end);
+}
+template <class BoundedWriter>
+void recode_physical_thread(BoundedWriter* stream_out,
+                            BlockBasedImagePerChannel<true>& framebuffer,
+                            int mcuv,
+                            const std::vector<ThreadHandoff>& thread_handoffs,
+                            Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> max_coded_heights,
+                            Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+                            int physical_thread_id,
+                            abitwriter* huffw) {
+    int num_logical_threads = thread_handoffs.size();
+
+    int logical_thread_start, logical_thread_end;
+    std::tie(logical_thread_start, logical_thread_end) =
+        logical_thread_range_from_physical_thread_id(physical_thread_id, num_logical_threads);
+    ThreadHandoff th = thread_handoffs[logical_thread_start];
+    size_t original_bound = stream_out->get_bound();
+    bool changed_bounds = false;
+    for (int logical_thread_id = logical_thread_start; logical_thread_id < logical_thread_end; ++logical_thread_id) {
+        TimingHarness::timing[logical_thread_id % MAX_NUM_THREADS][TimingHarness::TS_ARITH_STARTED] =
+            TimingHarness::get_time_us();
+        if (thread_handoffs[logical_thread_id].is_legacy_mode()) {
+            if (logical_thread_id == logical_thread_start) {
+                th.num_overhang_bits = 0; // clean start
+            }
+            ThreadHandoff tmp = thread_handoffs[logical_thread_id];
+            tmp.overhang_byte = th.overhang_byte;
+            tmp.num_overhang_bits = th.num_overhang_bits;
+            memcpy(tmp.last_dc.begin(), th.last_dc.begin(), sizeof(th.last_dc));
+            th = tmp; // copy the dynamic data in
+        } else {
+            assert(memcmp(thread_handoffs[logical_thread_id].last_dc.begin(), th.last_dc.begin(), sizeof(th.last_dc)) ==
+                   0);
+            assert(th.overhang_byte == thread_handoffs[logical_thread_id].overhang_byte);
+            assert(th.num_overhang_bits == thread_handoffs[logical_thread_id].num_overhang_bits);
+            th = thread_handoffs[logical_thread_id];
+            // in the v1 encoding, the first thread's output is unbounded in size but
+            // following threads are bound to their segment_size.
+            // In the future (v2 and beyond) all threads are bound by their segment size
+            bool legacy_truncation_mode = get_current_file_lepton_version() == 1;
+            bool worker_thread_and_many_to_one_mapping =
+                logical_thread_end - logical_thread_start != 1 && logical_thread_id != 0;
+
+            if (worker_thread_and_many_to_one_mapping || !legacy_truncation_mode) {
+                size_t new_bound = stream_out->bytes_written() + thread_handoffs[logical_thread_id].segment_size;
+                if (new_bound < original_bound) {
+                    stream_out->set_bound(new_bound);
+                    changed_bounds = true;
+                } else if (stream_out->get_bound() != original_bound) {
+                    stream_out->set_bound(original_bound);
+                }
+            }
+        }
+        // if (logical_thread_id != physical_thread_id) {
+        g_decoder->clear_thread_state(logical_thread_id, physical_thread_id, framebuffer);
+        //}
+        ThreadHandoff outth = recode_row_range(stream_out, framebuffer, mcuv, th, max_coded_heights,
+                                               component_size_in_blocks, physical_thread_id, logical_thread_id, huffw);
+        if (logical_thread_id + 1 < num_logical_threads && !thread_handoffs[logical_thread_id + 1].is_legacy_mode()) {
+            // make sure we computed the same item that was stored
+            always_assert(outth.num_overhang_bits == thread_handoffs[logical_thread_id + 1].num_overhang_bits);
+            always_assert(outth.overhang_byte == thread_handoffs[logical_thread_id + 1].overhang_byte);
+            always_assert(memcmp(outth.last_dc.begin(), thread_handoffs[logical_thread_id + 1].last_dc.begin(),
+                                 sizeof(outth.last_dc)) == 0);
+            if (physical_thread_id > 0 && stream_out->bytes_written()) { // if 0 are written the bound is not tight
+                always_assert(stream_out->get_bound() == stream_out->bytes_written());
+            }
+        }
+        th = outth;
+        TimingHarness::timing[logical_thread_id % MAX_NUM_THREADS][TimingHarness::TS_ARITH_FINISHED] =
+            TimingHarness::get_time_us();
+    }
+    if (changed_bounds) {
+        stream_out->set_bound(original_bound);
+    }
+}
+void recode_physical_thread_wrapper(
+    Sirikata::BoundedMemWriter* stream_out,
+    BlockBasedImagePerChannel<true>& framebuffer,
+    int mcuv,
+    const std::vector<ThreadHandoff>& thread_handoffs,
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> max_coded_heights,
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+    int physical_thread_id,
+    abitwriter* huffw) {
+    recode_physical_thread(stream_out, framebuffer, mcuv, thread_handoffs, max_coded_heights, component_size_in_blocks,
+                           physical_thread_id, huffw);
+}
+/* -----------------------------------------------
+    JPEG encoding routine
+    ----------------------------------------------- */
+bool recode_baseline_jpeg(bounded_iostream* str_out, int max_file_size) {
+    unsigned int local_bound = max_file_size - grbs;
+    str_out->set_bound(local_bound);
+
+    /* step 1: handle the initial segments */
+    unsigned int byte_position = handle_initial_segments(str_out);
+    if (byte_position == static_cast<unsigned int>(-1)) {
+        return false;
+    }
+    /* step 2: setup multithreaded decoder with framebuffer for each */
+    Sirikata::Array1d<uint32_t, (size_t)ColorChannel::NumBlockTypes> max_coded_heights =
+        colldata.get_max_coded_heights();
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks =
+        colldata.get_component_size_in_blocks();
+    int mcu_count_vertical = colldata.get_mcu_count_vertical();
+    Sirikata::Array1d<BlockBasedImagePerChannel<true>, MAX_NUM_THREADS> framebuffer;
+
+    for (size_t thread_id = 0; thread_id < NUM_THREADS; ++thread_id) {
+        for (int cmp = 0; cmp < colldata.get_num_components(); ++cmp) {
+            framebuffer[thread_id][cmp] = new BlockBasedImageBase<true>;
+            colldata.allocate_channel_framebuffer(cmp, framebuffer[thread_id][cmp], true);
+        }
+        if (!g_threaded) {
+            break;
+        }
+    }
+    std::vector<ThreadHandoff> luma_bounds = g_decoder->initialize_baseline_decoder(&colldata, framebuffer);
+
+    if (luma_bounds.size() && luma_bounds[0].is_legacy_mode()) {
+        g_threaded = false;
+    }
+    /* step 3: decode the scan, row by row */
+    std::tuple<uint8_t, uint8_t, Sirikata::Array1d<int16_t, (size_t)ColorChannel::NumBlockTypes> >
+        overhang_byte_and_bit_count;
+    std::get<0>(overhang_byte_and_bit_count) = 0;
+    std::get<1>(overhang_byte_and_bit_count) = 0;
+    std::get<2>(overhang_byte_and_bit_count).memset(0);
+    Sirikata::JpegAllocator<uint8_t> alloc;
+    Sirikata::Array1d<Sirikata::BoundedMemWriter, MAX_NUM_THREADS - 1> local_buffers;
+    Sirikata::Array1d<abitwriter*, MAX_NUM_THREADS> huffws;
+    huffws.memset(0);
+    for (size_t i = 0; i < NUM_THREADS; ++i) {
+        huffws[i] = new abitwriter(65536, max_file_size);
+    }
+
+    if (g_threaded) {
+        for (unsigned int physical_thread_id = 1; physical_thread_id < (g_threaded ? NUM_THREADS : 1);
+             ++physical_thread_id) {
+            int work_size = 0;
+            int logical_thread_start, logical_thread_end;
+            std::tie(logical_thread_start, logical_thread_end) =
+                logical_thread_range_from_physical_thread_id(physical_thread_id, luma_bounds.size());
+
+            for (int logical_thread_id = logical_thread_start; logical_thread_id < logical_thread_end;
+                 ++logical_thread_id) {
+                work_size += luma_bounds[logical_thread_id].segment_size;
+            }
+            if (!work_size) {
+                work_size = max_file_size;
+            }
+            local_buffers[physical_thread_id - 1].set_bound(work_size);
+            auto work_fn =
+                std::bind(&recode_physical_thread_wrapper, &local_buffers[physical_thread_id - 1],
+                          framebuffer[physical_thread_id], mcu_count_vertical, luma_bounds, max_coded_heights,
+                          component_size_in_blocks, physical_thread_id, huffws[physical_thread_id]);
+            g_decoder->getWorker(physical_thread_id - 1)->work = work_fn;
+            g_decoder->getWorker(physical_thread_id - 1)->activate_work();
+        }
+    }
+    recode_physical_thread(str_out, framebuffer[0], mcu_count_vertical, luma_bounds, max_coded_heights,
+                           component_size_in_blocks, 0, huffws[0]);
+    TimingHarness::timing[0][TimingHarness::TS_JPEG_RECODE_STARTED] = TimingHarness::get_time_us();
+    for (unsigned int physical_thread_id = 1; physical_thread_id < (g_threaded ? NUM_THREADS : 1);
+         ++physical_thread_id) {
+        TimingHarness::timing[physical_thread_id][TimingHarness::TS_THREAD_WAIT_STARTED] = TimingHarness::get_time_us();
+
+        g_decoder->getWorker(physical_thread_id - 1)->main_wait_for_done();
+        TimingHarness::timing[physical_thread_id][TimingHarness::TS_THREAD_WAIT_FINISHED] =
+            TimingHarness::timing[physical_thread_id][TimingHarness::TS_JPEG_RECODE_STARTED] =
+                TimingHarness::get_time_us();
+        size_t bytes_to_copy = local_buffers[physical_thread_id - 1].bytes_written();
+        if (bytes_to_copy) {
+            local_bound -= bytes_to_copy;
+            str_out->write(&local_buffers[physical_thread_id - 1].buffer()[0], bytes_to_copy);
+        }
+        TimingHarness::timing[physical_thread_id][TimingHarness::TS_JPEG_RECODE_FINISHED] =
+            TimingHarness::get_time_us();
+    }
+    if (!rst_err.empty()) {
+        unsigned int cumulative_reset_markers = rsti ? (mcuh * mcuv - 1) / rsti : 0;
+        for (unsigned char i = 0; i < rst_err[0]; ++i) {
+            const unsigned char mrk = 0xFF;
+            const unsigned char rst = 0xD0 + ((cumulative_reset_markers + i) & 7);
+            str_out->write_byte(mrk);
+            str_out->write_byte(rst);
+        }
+    }
+
+    /* step 3: blit any trailing data */
+    if (!str_out->has_reached_bound()) {
+        str_out->write(hdrdata + byte_position, hdrs - byte_position);
+    }
+
+    check_decompression_memory_bound_ok();
+
+    // write EOI (now EOI is stored in garbage of at least 2 bytes)
+    // this guarantees that we can stop the write in time.
+    // if it used too much memory
+    // str_out->write( EOI, 1, 2 );
+    str_out->set_bound(max_file_size);
+    check_decompression_memory_bound_ok();
+    // write garbage if needed
+    if (grbs > 0) str_out->write(grbgdata, grbs);
+    check_decompression_memory_bound_ok();
+    str_out->flush();
+    TimingHarness::timing[0][TimingHarness::TS_JPEG_RECODE_FINISHED] = TimingHarness::get_time_us();
+
+    // errormessage if write error
+    if (str_out->chkerr()) {
+        fprintf(stderr, "write error, possibly drive is full");
+        return false;
+    }
+    return true;
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/recoder.hh b/codec/L2/demos/leptonEnc/host/lepton/recoder.hh
new file mode 100644
index 0000000000..c7a47f51e5
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/recoder.hh
@@ -0,0 +1,52 @@
+struct huffCodes {
+    unsigned short cval[256];
+    unsigned short clen[256];
+    unsigned short max_eobrun;
+};
+
+struct huffTree {
+    unsigned short l[256];
+    unsigned short r[256];
+};
+
+struct MergeJpegProgress {
+    // unsigned int   len ; // length of current marker segment
+    unsigned int hpos;  // current position in header
+    unsigned int ipos;  // current position in imagedata
+    unsigned int rpos;  // current restart marker position
+    unsigned int cpos;  // in scan corrected rst marker position
+    unsigned int scan;  // number of current scan
+    unsigned char type; // type of current marker segment
+    unsigned int num_rst_markers_this_scan;
+    bool within_scan;
+    MergeJpegProgress* parent;
+    MergeJpegProgress() {
+        // len  = 0; // length of current marker segment
+        hpos = 0; // current position in header
+        ipos = 0; // current position in imagedata
+        rpos = 0; // current restart marker position
+        cpos = 0; // in scan corrected rst marker position
+        num_rst_markers_this_scan = 0;
+        scan = 1;    // number of current scan
+        type = 0x00; // type of current marker segment
+        within_scan = false;
+        parent = NULL;
+    }
+    MergeJpegProgress(MergeJpegProgress* par) {
+        memcpy(this, par, sizeof(MergeJpegProgress));
+        parent = par;
+    }
+    ~MergeJpegProgress() {
+        if (parent != NULL) {
+            MergeJpegProgress* origParent = parent->parent;
+            memcpy(parent, this, sizeof(MergeJpegProgress));
+            parent->parent = origParent;
+        }
+    }
+
+   private:
+    MergeJpegProgress(const MergeJpegProgress& other);            // disallow copy construction
+    MergeJpegProgress& operator=(const MergeJpegProgress& other); // disallow gets
+};
+class bounded_iostream;
+bool recode_baseline_jpeg(bounded_iostream* str_out, int max_file_size);
diff --git a/codec/L2/demos/leptonEnc/host/lepton/simple_decoder.cc b/codec/L2/demos/leptonEnc/host/lepton/simple_decoder.cc
new file mode 100644
index 0000000000..39edbd9173
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/simple_decoder.cc
@@ -0,0 +1,92 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+#include "bitops.hh"
+#include "component_info.hh"
+#include "uncompressed_components.hh"
+#include "jpgcoder.hh"
+#include "simple_decoder.hh"
+
+#include <algorithm>
+SimpleComponentDecoder::SimpleComponentDecoder() {
+    str_in = NULL;
+    batch_size = 0;
+    for (unsigned int i = 0; i < sizeof(cur_read_batch) / sizeof(cur_read_batch[0]); ++i) {
+        cur_read_batch[i] = 0;
+        target[i] = 0;
+        started_scan[i] = false;
+    }
+}
+void SimpleComponentDecoder::initialize(Sirikata::DecoderReader* i,
+                                        const std::vector<ThreadHandoff>& thread_transition_info) {
+    this->str_in = i;
+    this->thread_handoffs_ = thread_transition_info;
+}
+
+void SimpleComponentDecoder::decode_row(
+    int thread_state_id,
+    BlockBasedImagePerChannel<true>& image_data, // FIXME: set image_data to true
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+    int component,
+    int curr_y) {
+    custom_exit(ExitCode::ASSERTION_FAILURE);
+}
+BlockType bt_get_cmp(int cur_read_batch[3], int target[3]) {
+    BlockType cmp = BlockType::Y;
+    double cmp_progress = cur_read_batch[(int)cmp] / (double)target[(int)cmp];
+    for (unsigned int icmp = 1; icmp < 3; ++icmp) {
+        if (target[(int)cmp] && cur_read_batch[icmp] != target[icmp]) {
+            double cprogress = cur_read_batch[icmp] / (double)target[icmp];
+            if (cprogress < cmp_progress) {
+                cmp = (BlockType)icmp;
+                cmp_progress = cprogress;
+            }
+        }
+    }
+    return cmp;
+}
+
+CodingReturnValue SimpleComponentDecoder::decode_chunk(UncompressedComponents* colldata) {
+    colldata->worker_update_coefficient_position_progress(64); // we are optimizing for baseline only atm
+    colldata->worker_update_bit_progress(16);                  // we are optimizing for baseline only atm
+                                                               // read actual decompressed coefficient data from file
+    char zero[sizeof(target)] = {0};
+    if (memcmp(target, zero, sizeof(target)) == 0) {
+        unsigned char bs[4] = {0};
+        IOUtil::ReadFull(str_in, bs, sizeof(bs));
+        batch_size = bs[3];
+        batch_size <<= 8;
+        batch_size |= bs[2];
+        batch_size <<= 8;
+        batch_size |= bs[1];
+        batch_size <<= 8;
+        batch_size |= bs[0];
+        for (unsigned int cmp = 0; cmp < 3; ++cmp) {
+            target[cmp] = colldata->component_size_in_blocks(cmp);
+        }
+    }
+    BlockType cmp = bt_get_cmp(cur_read_batch, target);
+    if ((size_t)cmp == sizeof(cur_read_batch) / sizeof(cur_read_batch[0]) ||
+        cur_read_batch[(size_t)cmp] == target[(size_t)cmp]) {
+        return CODING_DONE;
+    }
+    // read coefficient data from file
+    BlockBasedImage& start = colldata->full_component_write(cmp);
+    while (cur_read_batch[(int)cmp] < target[(int)cmp]) {
+        int cur_read_size = std::min((int)batch_size, target[(int)cmp] - cur_read_batch[(int)cmp]);
+        for (int i = 0; i < cur_read_size; ++i) {
+            size_t retval = IOUtil::ReadFull(str_in, &start.raster(cur_read_batch[(int)cmp] + i), sizeof(short) * 64);
+            if (retval != sizeof(short) * 64) {
+                errormessage = "Unexpected end of file blocks";
+                errorlevel = 2;
+                return CODING_ERROR;
+            }
+        }
+        cur_read_batch[(int)cmp] += cur_read_size;
+        colldata->worker_update_cmp_progress(cmp, cur_read_size);
+
+        return CODING_PARTIAL;
+    }
+    assert(false && "UNREACHABLE");
+    return CODING_PARTIAL;
+}
+SimpleComponentDecoder::~SimpleComponentDecoder() {}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/simple_decoder.hh b/codec/L2/demos/leptonEnc/host/lepton/simple_decoder.hh
new file mode 100644
index 0000000000..be2c0de0f3
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/simple_decoder.hh
@@ -0,0 +1,39 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#include "base_coders.hh"
+namespace Sirikata {
+class DecoderReader;
+}
+
+class SimpleComponentDecoder : public BaseDecoder {
+    bool started_scan[4];
+    int cur_read_batch[4];
+    int target[4];
+    Sirikata::DecoderReader* str_in;
+    std::vector<ThreadHandoff> thread_handoffs_;
+    unsigned int batch_size;
+
+   public:
+    SimpleComponentDecoder();
+    ~SimpleComponentDecoder();
+    virtual void initialize(Sirikata::DecoderReader* input, const std::vector<ThreadHandoff>& thread_transition_info);
+
+    CodingReturnValue decode_chunk(UncompressedComponents* colldata);
+    virtual void registerWorkers(GenericWorker*, unsigned int num_workers) {}
+    GenericWorker* getWorker(unsigned int i) { return NULL; }
+    std::vector<ThreadHandoff> initialize_baseline_decoder(
+        const UncompressedComponents* const colldata,
+        Sirikata::Array1d<BlockBasedImagePerChannel<true>, MAX_NUM_THREADS>& framebuffer) {
+        return thread_handoffs_;
+    }
+    void decode_row(int thread_state_id,
+                    BlockBasedImagePerChannel<true>& image_data, // FIXME: set image_data to true
+                    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+                    int component,
+                    int curr_y);
+
+    virtual void clear_thread_state(int thread_id,
+                                    int target_thread_state,
+                                    BlockBasedImagePerChannel<true>& framebuffer) {}
+    size_t get_model_memory_usage() const { return 0; }
+    size_t get_model_worker_memory_usage() const { return 0; }
+};
diff --git a/codec/L2/demos/leptonEnc/host/lepton/simple_encoder.cc b/codec/L2/demos/leptonEnc/host/lepton/simple_encoder.cc
new file mode 100644
index 0000000000..9b3feb5293
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/simple_encoder.cc
@@ -0,0 +1,52 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#include <string.h>
+#include "bitops.hh"
+#include "component_info.hh"
+#include "uncompressed_components.hh"
+#include "jpgcoder.hh"
+#include "simple_encoder.hh"
+#include "../io/ZlibCompression.hh"
+#include <algorithm>
+
+BlockType bt_get_cmp(int cur_read_batch[3], int target[3]);
+SimpleComponentEncoder::SimpleComponentEncoder() {
+    memset(target, 0, sizeof(target));
+    memset(cur_read_batch, 0, sizeof(cur_read_batch));
+}
+CodingReturnValue SimpleComponentEncoder::encode_chunk(const UncompressedComponents* colldata,
+                                                       IOUtil::FileWriter* str_out,
+                                                       const ThreadHandoff* selected_splits,
+                                                       unsigned int num_selected_splits) {
+    // read coefficient data from file
+    unsigned int batch_size = 1600;
+
+    char zero[sizeof(target)] = {0};
+    if (memcmp(target, zero, sizeof(target)) == 0) {
+        unsigned int t24 = 65536 * 256;
+        unsigned char bs[4] = {(unsigned char)(batch_size & 0xff), (unsigned char)((batch_size / 256) & 0xff),
+                               (unsigned char)((batch_size / 65536) & 0xff),
+                               (unsigned char)((batch_size / t24) & 0xff)};
+        str_out->Write(bs, sizeof(bs));
+        for (unsigned int cmp = 0; cmp < 4; ++cmp) {
+            target[cmp] = colldata->component_size_in_blocks(cmp);
+        }
+    }
+    unsigned int cmp = (unsigned int)bt_get_cmp(cur_read_batch, target);
+    if (cmp == sizeof(cur_read_batch) / sizeof(cur_read_batch[0]) || cur_read_batch[cmp] == target[cmp]) {
+        return CODING_DONE;
+    }
+    const BlockBasedImage& start = colldata->full_component_nosync(cmp);
+    while (cur_read_batch[cmp] < target[cmp]) {
+        int cur_write_size = std::min((int)batch_size, target[cmp] - cur_read_batch[cmp]);
+        for (int i = 0; i < cur_write_size; ++i) {
+            str_out->Write(reinterpret_cast<const unsigned char*>(start.raster(cur_read_batch[cmp] + i).raw_data()),
+                           sizeof(short) * 64);
+        }
+        cur_read_batch[cmp] += cur_write_size;
+        return CODING_PARTIAL;
+    }
+    assert(false && "UNREACHABLE");
+    return CODING_PARTIAL;
+}
+
+SimpleComponentEncoder::~SimpleComponentEncoder() {}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/simple_encoder.hh b/codec/L2/demos/leptonEnc/host/lepton/simple_encoder.hh
new file mode 100644
index 0000000000..7b6df4a981
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/simple_encoder.hh
@@ -0,0 +1,18 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#include "base_coders.hh"
+class SimpleComponentEncoder : public BaseEncoder {
+    int cur_read_batch[4];
+    int target[4];
+
+   public:
+    SimpleComponentEncoder();
+    CodingReturnValue encode_chunk(const UncompressedComponents* input,
+                                   IOUtil::FileWriter*,
+                                   const ThreadHandoff* selected_splits,
+                                   unsigned int num_selected_splits);
+
+    virtual void registerWorkers(GenericWorker*, unsigned int num_workers) {}
+    ~SimpleComponentEncoder();
+    size_t get_decode_model_memory_usage() const { return 0; }
+    size_t get_decode_model_worker_memory_usage() const { return 0; }
+};
diff --git a/codec/L2/demos/leptonEnc/host/lepton/socket_serve.cc b/codec/L2/demos/leptonEnc/host/lepton/socket_serve.cc
new file mode 100644
index 0000000000..1048766e26
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/socket_serve.cc
@@ -0,0 +1,366 @@
+#ifndef _WIN32
+#include <sys/types.h>
+#include <signal.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <algorithm>
+#include <netinet/in.h>
+#include <sys/time.h>
+#if defined(__APPLE__) || defined(BSD)
+#include <sys/wait.h>
+#else
+#include <sys/signalfd.h>
+#include <wait.h>
+#endif
+#include <poll.h>
+#include <errno.h>
+#include "../io/Reader.hh"
+#include "socket_serve.hh"
+#include "../../vp8/util/memory.hh"
+#include <set>
+static char hex_nibble(uint8_t val) {
+    if (val < 10) return val + '0';
+    return val - 10 + 'a';
+}
+
+static const char last_prefix[] = "/tmp/";
+static const char last_postfix[] = ".uport";
+static const char zlast_postfix[] = ".z0";
+
+static char socket_name[sizeof((struct sockaddr_un*)0)->sun_path] = {};
+static char zsocket_name[sizeof((struct sockaddr_un*)0)->sun_path] = {};
+static const char lock_ext[] = ".lock";
+bool random_name = false;
+static char socket_lock[sizeof((struct sockaddr_un*)0)->sun_path + sizeof(lock_ext)];
+int lock_file = -1;
+
+bool is_parent_process = true;
+
+static void name_socket(FILE* dev_random) {
+    random_name = true;
+    char random_data[16] = {0};
+    auto retval = fread(random_data, 1, sizeof(random_data), dev_random);
+    (void)retval; // dev random should yield reasonable results
+    memcpy(socket_name, last_prefix, strlen(last_prefix));
+    memcpy(zsocket_name, last_prefix, strlen(last_prefix));
+    size_t offset = strlen(last_prefix);
+    for (size_t i = 0; i < sizeof(random_data); ++i) {
+        always_assert(offset + 3 + sizeof(last_postfix) < sizeof(socket_name));
+        always_assert(offset + 3 + sizeof(zlast_postfix) < sizeof(zsocket_name));
+        uint8_t hex = random_data[i];
+        socket_name[offset] = hex_nibble(hex >> 4);
+        socket_name[offset + 1] = hex_nibble(hex & 0xf);
+        zsocket_name[offset] = hex_nibble(hex >> 4);
+        zsocket_name[offset + 1] = hex_nibble(hex & 0xf);
+        offset += 2;
+        if (i == 4 || i == 6 || i == 8 || i == 14) {
+            socket_name[offset] = '-';
+            zsocket_name[offset] = '-';
+            ++offset;
+        }
+    }
+    always_assert(offset + sizeof(last_postfix) < sizeof(socket_name));
+    always_assert(offset + sizeof(zlast_postfix) < sizeof(zsocket_name));
+    always_assert(offset + sizeof(lock_ext) < sizeof(socket_lock));
+    memcpy(socket_name + offset, last_postfix, sizeof(last_postfix));
+    memcpy(zsocket_name + offset, zlast_postfix, sizeof(zlast_postfix));
+
+    memcpy(socket_lock, socket_name, offset);
+    memcpy(socket_lock + offset, lock_ext, sizeof(lock_ext));
+}
+
+static void cleanup_socket(int) {
+    if (is_parent_process) {
+        unlink(socket_name);
+        unlink(zsocket_name);
+        if (socket_lock[0] && random_name) {
+            unlink(socket_lock);
+        }
+        exit(0);
+        return;
+    }
+    custom_exit(ExitCode::SUCCESS);
+}
+
+static void nop(int) {}
+pid_t accept_new_connection(int active_connection,
+                            const SocketServeWorkFunction& work,
+                            uint32_t global_max_length,
+                            int lock_fd,
+                            bool force_zlib) {
+    pid_t serve_file = fork();
+    if (serve_file == 0) {
+        is_parent_process = false;
+        while (close(1) < 0 && errno == EINTR) { // close stdout
+        }
+        if (lock_fd >= 0) {
+            while (close(lock_fd) < 0 && errno == EINTR) {
+                // close socket lock so future servers may reacquire the lock
+            }
+        }
+        IOUtil::FileReader reader(active_connection, global_max_length, true);
+        IOUtil::FileWriter writer(active_connection, false, true);
+        work(&reader, &writer, global_max_length, force_zlib);
+        custom_exit(ExitCode::SUCCESS);
+    } else {
+        while (close(active_connection) < 0 && errno == EINTR) {
+            // close the Unix Domain Socket
+        }
+    }
+    return serve_file;
+}
+int should_wait_bitmask(size_t children_size, uint32_t max_children) {
+    if (max_children && children_size >= max_children) {
+        return 0;
+    }
+    return WNOHANG;
+}
+
+int make_sigchld_fd() {
+    int fd = -1;
+#if !(defined(__APPLE__) || defined(BSD))
+    sigset_t sigset;
+    int err = sigemptyset(&sigset);
+    always_assert(err == 0);
+    err = sigaddset(&sigset, SIGCHLD);
+    always_assert(err == 0);
+
+    // the signalfd will only receive SIG_BLOCK'd signals
+    err = sigprocmask(SIG_BLOCK, &sigset, NULL);
+    always_assert(err == 0);
+
+    fd = signalfd(-1, &sigset, 0);
+    always_assert(fd != -1);
+#endif
+    return fd;
+}
+void write_num_children(size_t num_children) {
+    if (num_children > 0xff) {
+        num_children = 0xff;
+    }
+    // lets just keep a byte of state about the number of children
+    if (lock_file != -1) {
+        int err;
+        while ((err = lseek(lock_file, 0, SEEK_SET)) < 0 && errno == EINTR) {
+        }
+        uint8_t num_children_byte = (uint8_t)num_children;
+        while ((err = write(lock_file, &num_children_byte, sizeof(num_children_byte))) < 0 && errno == EINTR) {
+        }
+    }
+}
+void serving_loop(int unix_domain_socket_server,
+                  int unix_domain_socket_server_zlib,
+                  int tcp_socket_server,
+                  int tcp_socket_server_zlib,
+                  const SocketServeWorkFunction& work,
+                  uint32_t global_max_length,
+                  uint32_t max_children,
+                  bool do_cleanup_socket,
+                  int lock_fd) {
+    int sigchild_fd = make_sigchld_fd();
+
+    int num_fds = 0;
+    struct pollfd fds[5];
+    if (sigchild_fd != -1) {
+        fds[0].fd = sigchild_fd;
+        fds[0].events = POLLIN | POLLERR | POLLHUP;
+        num_fds += 1;
+    }
+    if (unix_domain_socket_server_zlib != -1) {
+        fds[num_fds].fd = unix_domain_socket_server_zlib;
+        ++num_fds;
+    }
+    if (tcp_socket_server_zlib != -1) {
+        fds[num_fds].fd = tcp_socket_server_zlib;
+        ++num_fds;
+    }
+    if (unix_domain_socket_server != -1) {
+        fds[num_fds].fd = unix_domain_socket_server;
+        ++num_fds;
+    }
+    if (tcp_socket_server != -1) {
+        fds[num_fds].fd = tcp_socket_server;
+        ++num_fds;
+    }
+    for (int i = 0; i < num_fds; ++i) {
+        int err = fcntl(fds[i].fd, F_SETFL, O_NONBLOCK);
+        always_assert(err == 0);
+        fds[i].events = POLLIN;
+    }
+    std::set<pid_t> children;
+    int status;
+    while (true) {
+        write_num_children(children.size());
+        for (pid_t term_pid = 0;
+             (term_pid = waitpid(-1, &status, should_wait_bitmask(children.size(), max_children))) > 0;) {
+            std::set<pid_t>::iterator where = children.find(term_pid);
+            if (where != children.end()) {
+                children.erase(where);
+            } else {
+                fprintf(stderr, "Pid %d not found as child of this\n", term_pid);
+                assert(false && "pid msut be in child\n");
+            }
+            if (WIFEXITED(status)) {
+                fprintf(stderr, "Child %d exited with code %d\n", term_pid, WEXITSTATUS(status));
+            } else if (WIFSIGNALED(status)) {
+                fprintf(stderr, "Child %d exited with signal %d\n", term_pid, WTERMSIG(status));
+            } else {
+                fprintf(stderr, "Child %d exited with another cause: %d\n", term_pid, status);
+            }
+            fflush(stderr);
+            write_num_children(children.size());
+        }
+        int ret = poll(fds, num_fds, sigchild_fd == -1 ? 60 : -1);
+        // need a timeout (30 ms) in case a SIGCHLD was missed between the waitpid and the poll
+        if (ret == 0) { // no events ready, just timed out, check for missed SIGCHLD
+            continue;
+        }
+        if (ret < 0 && errno == EINTR) {
+            continue;
+        }
+        for (int i = 0; i < num_fds; ++i) {
+            if (fds[i].revents & POLLIN) {
+                fds[i].revents = 0;
+                if (fds[i].fd == sigchild_fd) {
+#if !(defined(__APPLE__) || defined(BSD))
+                    struct signalfd_siginfo info;
+                    ssize_t ignore = read(fds[i].fd, &info, sizeof(info));
+                    (void)ignore;
+#endif
+                    continue; // we can't receive on this
+                }
+                struct sockaddr_un client;
+                socklen_t len = sizeof(client);
+                int active_connection = accept(fds[i].fd, (sockaddr*)&client, &len);
+                if (active_connection >= 0) {
+                    unsigned int flags = fcntl(active_connection, F_GETFL, 0);
+                    if (flags & O_NONBLOCK) {
+                        flags &= ~O_NONBLOCK;
+                        // inheritance of nonblocking flag not specified across systems
+                        fcntl(active_connection, F_SETFL, flags);
+                    }
+                    children.insert(accept_new_connection(
+                        active_connection, work, global_max_length, lock_fd,
+                        fds[i].fd == unix_domain_socket_server_zlib || fds[i].fd == tcp_socket_server_zlib));
+                } else {
+                    if (errno != EINTR && errno != EWOULDBLOCK && errno != EAGAIN) {
+                        fprintf(stderr, "Error accepting connection: %s", strerror(errno));
+                        cleanup_socket(0);
+                    }
+                }
+            }
+        }
+    }
+}
+int setup_tcp_socket(int port, int listen_backlog) {
+    int socket_fd = socket(AF_INET, SOCK_STREAM, 0);
+    always_assert(socket_fd > 0);
+    struct sockaddr_in serv_addr;
+    memset(&serv_addr, 0, sizeof(struct sockaddr_in));
+    serv_addr.sin_family = AF_INET;
+    serv_addr.sin_addr.s_addr = INADDR_ANY;
+    serv_addr.sin_port = htons(port);
+
+    int optval = 1;
+    setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
+
+    if (bind(socket_fd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) {
+        custom_exit(ExitCode::COULD_NOT_BIND_PORT);
+    }
+    int err = listen(socket_fd, listen_backlog);
+    always_assert(err == 0);
+    return socket_fd;
+}
+int setup_socket(const char* file_name, int listen_backlog) {
+    int err;
+    int socket_fd = socket(PF_UNIX, SOCK_STREAM, 0);
+    always_assert(socket_fd > 0);
+    struct sockaddr_un address;
+    memset(&address, 0, sizeof(struct sockaddr_un));
+    address.sun_family = AF_UNIX;
+    memcpy(address.sun_path, file_name, std::min(strlen(file_name), sizeof(address.sun_path)));
+    err = bind(socket_fd, (struct sockaddr*)&address, sizeof(address));
+    always_assert(err == 0);
+    err = listen(socket_fd, listen_backlog);
+    int ret = chmod(file_name, 0666);
+    (void)ret;
+    always_assert(err == 0);
+    return socket_fd;
+}
+void socket_serve(const SocketServeWorkFunction& work_fn, uint32_t global_max_length, const ServiceInfo& service_info) {
+    bool do_cleanup_socket = true;
+    int lock_fd = -1;
+    if (service_info.uds != NULL) {
+        do_cleanup_socket = false;
+        size_t len = strlen(service_info.uds);
+        if (len + 1 < sizeof(socket_name)) {
+            memcpy(socket_name, service_info.uds, len);
+            socket_name[len] = '\0';
+        } else {
+            fprintf(stderr, "Path too long for %s\n", service_info.uds);
+            always_assert(false && "input file name too long\n");
+        }
+        memcpy(socket_lock, socket_name, sizeof(socket_name));
+        memcpy(zsocket_name, socket_name, sizeof(socket_name));
+        memcpy(socket_lock + strlen(socket_lock), lock_ext, sizeof(lock_ext));
+        memcpy(zsocket_name + strlen(zsocket_name), zlast_postfix, sizeof(zlast_postfix));
+        do {
+            lock_file = open(socket_lock, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR);
+        } while (lock_file < 0 && errno == EINTR);
+        if (lock_file >= 0) {
+            lock_fd = lock_file;
+            int err = 0;
+            do {
+                err = ::flock(lock_file, LOCK_EX | LOCK_NB);
+            } while (err < 0 && errno == EINTR);
+            if (err == 0) {
+                do {
+                    err = remove(socket_name);
+                } while (err < 0 && errno == EINTR);
+                do {
+                    err = remove(zsocket_name);
+                } while (err < 0 && errno == EINTR);
+                signal(SIGINT, &cleanup_socket);
+                // if we have the lock we can clean it up
+                signal(SIGQUIT, &cleanup_socket);
+                signal(SIGTERM, &cleanup_socket);
+                do_cleanup_socket = true;
+            }
+        }
+    } else {
+        FILE* dev_random = fopen("/dev/urandom", "rb");
+        name_socket(dev_random);
+        fclose(dev_random);
+        do {
+            lock_file = open(socket_lock, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR);
+        } while (lock_file < 0 && errno == EINTR);
+        signal(SIGINT, &cleanup_socket);
+        signal(SIGQUIT, &cleanup_socket);
+        signal(SIGTERM, &cleanup_socket);
+    }
+    signal(SIGCHLD, &nop);
+    // listen
+    int socket_fd = -1;
+    int zsocket_fd = -1;
+    int socket_tcp = -1;
+    int zsocket_tcp = -1;
+    if (service_info.listen_uds) {
+        socket_fd = setup_socket(socket_name, service_info.listen_backlog);
+        zsocket_fd = setup_socket(zsocket_name, service_info.listen_backlog);
+    }
+    if (service_info.listen_tcp) {
+        socket_tcp = setup_tcp_socket(service_info.port, service_info.listen_backlog);
+        zsocket_tcp = setup_tcp_socket(service_info.zlib_port, service_info.listen_backlog);
+    }
+
+    fprintf(stdout, "%s\n", socket_name);
+    fflush(stdout);
+    serving_loop(socket_fd, zsocket_fd, socket_tcp, zsocket_tcp, work_fn, global_max_length, service_info.max_children,
+                 do_cleanup_socket, lock_fd);
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/socket_serve.hh b/codec/L2/demos/leptonEnc/host/lepton/socket_serve.hh
new file mode 100644
index 0000000000..7824a125b8
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/socket_serve.hh
@@ -0,0 +1,32 @@
+#include <functional>
+#include "../io/ioutil.hh"
+
+struct ServiceInfo {
+    bool listen_tcp;
+    int port;
+    int zlib_port;
+    bool listen_uds;
+    int listen_backlog;
+    int max_children;
+    const char* uds;
+    ServiceInfo() {
+        listen_tcp = false;
+        port = 2402;
+        zlib_port = 2403;
+        uds = NULL;
+        listen_uds = true;
+        listen_backlog = 16;
+
+        max_children = 0;
+    }
+};
+
+typedef std::function<void(IOUtil::FileReader*, // data to work upon
+                           IOUtil::FileWriter*, // returned data
+                           uint32_t,            // max_file_length
+                           bool                 // force_zlib
+                           )>
+    SocketServeWorkFunction;
+#ifndef _WIN32
+void socket_serve(const SocketServeWorkFunction& work_fn, uint32_t max_file_length, const ServiceInfo& service_info);
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/test_custom_table.sh b/codec/L2/demos/leptonEnc/host/lepton/test_custom_table.sh
new file mode 100644
index 0000000000..a0863d5cc3
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/test_custom_table.sh
@@ -0,0 +1,47 @@
+#!/bin/sh
+export INPUT_TO_TEST=`dirname $0`/../../images/androidprogressive.jpg
+if [ $# -eq 0 ]; then
+    echo "Using default file $INPUT_TO_TEST"
+else
+    export INPUT_TO_TEST=$1
+fi
+export LEPTON_COMPRESSION_MODEL_OUT="`mktemp /tmp/temp.XXXXXX`"
+export TEST_MODEL="`mktemp /tmp/temp.XXXXXX`"
+export COMPRESSED_LEPTON="`mktemp /tmp/temp.XXXXXX`"
+export ORIGINAL="`mktemp /tmp/temp.XXXXXX`"
+if [ $# -lt 2 ]; then
+    ./lepton -allowprogressive - < "$INPUT_TO_TEST" > "$COMPRESSED_LEPTON"
+    cp "$LEPTON_COMPRESSION_MODEL_OUT" "$TEST_MODEL"
+else
+    for test_item in "$@"; do
+        if [ "$test_item" != "$INPUT_TO_TEST" ]; then
+            ./lepton -allowprogressive - < "$test_item" > "$COMPRESSED_LEPTON"
+            cp "$LEPTON_COMPRESSION_MODEL_OUT" "$TEST_MODEL"
+            export LEPTON_COMPRESSION_MODEL="$TEST_MODEL"
+        else
+            echo "Ignoring $test_item when training model"
+        fi
+    done
+fi
+LEPTON_COMPRESSION_MODEL="$TEST_MODEL" ./lepton -decode -allowprogressive - < "$INPUT_TO_TEST" > "$COMPRESSED_LEPTON"
+LEPTON_COMPRESSION_MODEL="$TEST_MODEL" ./lepton -recode -allowprogressive - < "$COMPRESSED_LEPTON" > "$ORIGINAL"
+if diff -q "$ORIGINAL" "$INPUT_TO_TEST" ; then
+    rm -- "$LEPTON_COMPRESSION_MODEL_OUT"
+    rm -- "$TEST_MODEL"
+    rm -- "$COMPRESSED_LEPTON"
+    rm -- "$ORIGINAL"
+    unset LEPTON_COMPRESSION_MODEL_OUT
+    unset TEST_MODEL
+    unset COMPRESSED_LEPTON
+    unset ORIGINAL
+    exit 0
+fi
+echo compression_model "$LEPTON_COMPRESSION_MODEL_OUT"
+echo test_model "$TEST_MODEL"
+echo compressed_lepton "$COMPRESSED_LEPTON"
+echo roundtrip "$ORIGINAL"
+unset LEPTON_COMPRESSION_MODEL_OUT
+unset TEST_MODEL
+unset COMPRESSED_LEPTON
+unset ORIGINAL
+exit 1
diff --git a/codec/L2/demos/leptonEnc/host/lepton/thread_handoff.cc b/codec/L2/demos/leptonEnc/host/lepton/thread_handoff.cc
new file mode 100644
index 0000000000..fa76acbb36
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/thread_handoff.cc
@@ -0,0 +1,107 @@
+#include "thread_handoff.hh"
+#include "../vp8/util/memory.hh"
+
+std::vector<ThreadHandoff> ThreadHandoff::deserialize(const unsigned char* data, size_t max_size) {
+    if (max_size < 2 || data[0] != 'H') {
+        custom_exit(ExitCode::VERSION_UNSUPPORTED);
+    }
+    ++data;
+    --max_size;
+    int num_threads = data[0];
+    ++data;
+    --max_size;
+    std::vector<ThreadHandoff> retval;
+    if (int(max_size) < BYTES_PER_HANDOFF * num_threads) {
+        custom_exit(ExitCode::VERSION_UNSUPPORTED);
+    }
+    for (int i = 0; i < num_threads; ++i) {
+        ThreadHandoff th = ThreadHandoff::zero();
+        th.luma_y_start = data[0] + data[1] * 0x100;
+        th.segment_size = data[2] + data[3] * 0x100U + data[4] * 0x10000UL + data[5] * 0x1000000UL;
+        th.overhang_byte = data[6];
+        th.num_overhang_bits = data[7];
+        int biggest_value = 7;
+        for (size_t i = 0; i < 4; ++i) {
+            int32_t dc = data[8 + 2 * i] + data[biggest_value = 9 + 2 * i] * 0x100;
+            if (dc >= 32768) {
+                dc -= 65536;
+            }
+            if (i <
+                sizeof(th.last_dc) / sizeof(th.last_dc[0])) { // we store 4 values even if this file isn't 4 channels
+                th.last_dc[i] = dc;
+            }
+        }
+        assert(BYTES_PER_HANDOFF == biggest_value + 1);
+        retval.push_back(th);
+        data += BYTES_PER_HANDOFF;
+    }
+    for (size_t i = 1; i < retval.size(); ++i) {
+        retval[i - 1].luma_y_end = retval[i].luma_y_start;
+    }
+    return retval;
+}
+size_t ThreadHandoff::get_remaining_data_size_from_two_bytes(unsigned char input[2]) {
+    if (input[0] != 'H') {
+        custom_exit(ExitCode::VERSION_UNSUPPORTED);
+    }
+    return input[1] * ThreadHandoff::BYTES_PER_HANDOFF;
+}
+std::vector<unsigned char> ThreadHandoff::serialize(const ThreadHandoff* data, unsigned int num_threads) {
+    always_assert(num_threads == NUM_THREADS);
+    std::vector<unsigned char> retval;
+    retval.reserve(NUM_THREADS * BYTES_PER_HANDOFF + 2);
+    retval.push_back('H');
+    retval.push_back(num_threads);
+    for (unsigned int i = 0; i < num_threads; ++i) {
+        ThreadHandoff th = data[i];
+        retval.push_back(th.luma_y_start & 255);
+        retval.push_back((th.luma_y_start >> 8) & 255);
+        retval.push_back(th.segment_size & 255);
+        retval.push_back((th.segment_size >> 8) & 255);
+        retval.push_back((th.segment_size >> 16) & 255);
+        retval.push_back((th.segment_size >> 24) & 255);
+        retval.push_back(th.overhang_byte);
+        retval.push_back(th.num_overhang_bits);
+        unsigned int dc_values = 0;
+        for (unsigned int i = 0; i < sizeof(th.last_dc) / sizeof(th.last_dc[0]); ++i) {
+            uint16_t dc = th.last_dc[i]; // this will cast to unsigned
+            retval.push_back(dc & 255);
+            retval.push_back((dc >> 8) & 255);
+            ++dc_values;
+        }
+        for (; dc_values < 4; ++dc_values) {
+            retval.push_back(0);
+            retval.push_back(0);
+        }
+    }
+    return retval;
+}
+std::vector<ThreadHandoff> ThreadHandoff::make_rand(int num) {
+    std::vector<ThreadHandoff> retval(num);
+    for (int i = 0; i < num; ++i) {
+        retval[i].luma_y_start = rand() & 65535;
+        retval[i].segment_size = rand();
+        retval[i].overhang_byte = rand() & 255;
+        retval[i].num_overhang_bits = rand() & 7;
+        for (uint32_t j = 0; j < (uint32_t)ColorChannel::NumBlockTypes; ++j) {
+            retval[i].last_dc[j] = rand() & 32767;
+            if (rand() < RAND_MAX / 2) {
+                retval[i].last_dc[j] = -retval[i].last_dc[j];
+            }
+        }
+    }
+    retval[num - 1].luma_y_end = 0;
+    for (size_t i = 1; i < retval.size(); ++i) {
+        retval[i - 1].luma_y_end = retval[i].luma_y_start;
+    }
+    return retval;
+}
+/* combine two ThreadHandoff objects into a range, starting with the initialization
+   of the thread represented by the first object, and continuing until the end
+   of the second object */
+ThreadHandoff ThreadHandoff::operator-(const ThreadHandoff& other) const {
+    ThreadHandoff ret = other;
+    ret.luma_y_end = luma_y_start;
+    ret.segment_size = segment_size - other.segment_size;
+    return ret;
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/thread_handoff.hh b/codec/L2/demos/leptonEnc/host/lepton/thread_handoff.hh
new file mode 100644
index 0000000000..7b9055646f
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/thread_handoff.hh
@@ -0,0 +1,40 @@
+#ifndef THREAD_HANDOFF_HH_
+#define THREAD_HANDOFF_HH_
+#include <vector>
+#include "../vp8/util/options.hh"
+#include "../vp8/util/aligned_block.hh"
+#include "../vp8/util/nd_array.hh"
+
+class ThreadHandoff {
+   public:
+    uint16_t luma_y_start;
+    uint16_t luma_y_end;
+    uint32_t segment_size;
+    uint8_t overhang_byte;
+    uint8_t num_overhang_bits;
+    Sirikata::Array1d<int16_t, (uint32_t)ColorChannel::NumBlockTypes> last_dc;
+    enum {
+        BYTES_PER_HANDOFF = (16 /* luma end is implicit*/ + 32 + 16 * 4 + 8 * 2) / 8,
+        // num_overhang_bits is set to this for legacy formats which must be decoded single threaded
+        LEGACY_OVERHANG_BITS = 0xff
+    };
+    static ThreadHandoff zero() {
+        ThreadHandoff ret;
+        memset(&ret, 0, sizeof(ret));
+        return ret;
+    }
+    bool is_legacy_mode() const { // legacy mode doesn't have access to handoff data
+        return num_overhang_bits == LEGACY_OVERHANG_BITS;
+    }
+    static size_t get_remaining_data_size_from_two_bytes(unsigned char input[2]);
+    static std::vector<ThreadHandoff> deserialize(const unsigned char* data, size_t max_size);
+    static std::vector<unsigned char> serialize(const ThreadHandoff* data, unsigned int num_threads);
+    static std::vector<ThreadHandoff> make_rand(int num_items);
+
+    /* combine two ThreadHandoff objects into a range, starting with the initialization
+       of the thread represented by the first object, and continuing until the end
+       of the second object */
+    ThreadHandoff operator-(const ThreadHandoff& other) const;
+};
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/uncompressed_components.cc b/codec/L2/demos/leptonEnc/host/lepton/uncompressed_components.cc
new file mode 100644
index 0000000000..a08ea5a34e
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/uncompressed_components.cc
@@ -0,0 +1,35 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#include "../../vp8/util/memory.hh"
+#include <thread>
+#include "uncompressed_components.hh"
+#include "component_info.hh"
+
+int UncompressedComponents::max_number_of_blocks = 0;
+
+int gcd(int a, int b) {
+    while (b) {
+        int tmp = a % b;
+        a = b;
+        b = tmp;
+    }
+    return a;
+}
+int lcm(int a, int b) {
+    return a * b / gcd(a, b);
+}
+int UncompressedComponents::min_vertical_luma_multiple() const {
+    return min_vertical_cmp_multiple(0);
+}
+int UncompressedComponents::min_vertical_cmp_multiple(int cmp) const {
+    return min_vertical_extcmp_multiple(&header_[cmp]);
+}
+int UncompressedComponents::min_vertical_extcmp_multiple(const ExtendedComponentInfo* cmpinfo) const {
+    int luma_height = cmpinfo->info_.bcv;
+    /*
+    int overall_gcd = luma_height;
+    for (int i = 1; i< cmpc_; ++i) {
+        int cur_height = header_[i].info_.bcv;
+        overall_gcd = gcd(overall_gcd, cur_height);
+        }*/
+    return luma_height / mcuv_; // luma_height / overall_gcd;
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/uncompressed_components.hh b/codec/L2/demos/leptonEnc/host/lepton/uncompressed_components.hh
new file mode 100644
index 0000000000..a7ed76dc91
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/uncompressed_components.hh
@@ -0,0 +1,246 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+#include <atomic>
+#include <functional>
+#include <algorithm>
+#include <string.h>
+#include <assert.h>
+#include <thread>
+#ifdef _WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include "base_coders.hh"
+#include "component_info.hh"
+#include "../vp8/model/color_context.hh"
+#include "../vp8/util/block_based_image.hh"
+//#include "XAcc_jpegdecoder.h"
+#include "XAcc_common.hpp"
+struct componentInfo;
+
+class Block;
+
+class UncompressedComponents {
+    typedef int CounterType;
+    class ExtendedComponentInfo {
+        ExtendedComponentInfo(const ExtendedComponentInfo&);           // not implemented
+        ExtendedComponentInfo operator=(const ExtendedComponentInfo&); // not implemented
+       public:
+        BlockBasedImage component_;
+        CounterType dpos_block_progress_;
+        componentInfo info_;
+        int trunc_bcv_; // the number of vertical components in this (truncated) image
+        int trunc_bc_;
+        ExtendedComponentInfo() : dpos_block_progress_(0), trunc_bcv_(0), trunc_bc_(0) {}
+    };
+    int cmpc_; // the number of components
+    int mcuh_;
+    int mcuv_;
+    typedef Sirikata::Array1d<ExtendedComponentInfo, 4> ExtendedInfo;
+    ExtendedInfo header_;
+
+    CounterType coefficient_position_progress_;
+    CounterType bit_progress_;
+    CounterType worker_start_read_signal_;
+    int reserved_; // don't want to change memory layout
+    BaseDecoder* decoder_;
+    UncompressedComponents(const UncompressedComponents&);            // not implemented
+    UncompressedComponents& operator=(const UncompressedComponents&); // not implemented
+    int bch_(int component) const { return header_[component].info_.bch; }
+    int bcv_(int component) const { return header_[component].trunc_bcv_; }
+
+   public:
+    UncompressedComponents() : coefficient_position_progress_(0), bit_progress_(0), worker_start_read_signal_(0) {
+        decoder_ = NULL;
+        reserved_ = 0;
+        mcuh_ = 0;
+        mcuv_ = 0;
+        cmpc_ = 0;
+    }
+    unsigned short* get_quantization_tables(BlockType component) const { return header_[(int)component].info_.qtable; }
+    Sirikata::Array1d<uint32_t, (size_t)ColorChannel::NumBlockTypes> get_max_coded_heights() const {
+        Sirikata::Array1d<uint32_t, (size_t)ColorChannel::NumBlockTypes> retval;
+        retval.memset(0);
+        for (int i = 0; i < cmpc_ && i < (int)ColorChannel::NumBlockTypes; ++i) {
+            retval[i] = header_[i].trunc_bcv_;
+        }
+        return retval;
+    }
+    int get_mcu_count_vertical() const { return mcuv_; }
+    int get_mcu_count_horizontal() const { return mcuh_; }
+    bool is_memory_optimized(int cmp) const { return header_[cmp].component_.is_memory_optimized(); }
+    int get_num_components() const { return cmpc_; }
+
+    void worker_update_bit_progress(int add_bit_progress) { bit_progress_ += add_bit_progress; }
+    void worker_update_coefficient_position_progress(int add_coefficient_position_progress) {
+        coefficient_position_progress_ += add_coefficient_position_progress;
+    }
+    void worker_update_cmp_progress(BlockType cmp, int add_bit_progress) {
+        header_[(int)cmp].dpos_block_progress_ += add_bit_progress;
+    }
+    void worker_mark_cmp_finished(BlockType cmp) {
+        CounterType dpos_block_progress_ = header_[(int)cmp].trunc_bc_;
+        header_[(int)cmp].dpos_block_progress_ = dpos_block_progress_;
+    }
+    void start_decoder(BaseDecoder* decoder) { decoder_ = decoder; }
+    CodingReturnValue do_more_work() { return decoder_->decode_chunk(this); }
+    template <bool force_memory_optimized>
+    void allocate_channel_framebuffer(int desired_cmp,
+                                      BlockBasedImageBase<force_memory_optimized>* framebuffer,
+                                      bool memory_optimized = force_memory_optimized) const {
+        uint64_t total_req_blocks = 0;
+        for (int cmp = 0; cmp < (int)header_.size() && cmp < cmpc_; cmp++) {
+            total_req_blocks += header_[cmp].info_.bcv * header_[cmp].info_.bch;
+        }
+        for (int cmp = 0; cmp < (int)header_.size() && cmp < cmpc_; cmp++) {
+            int bc_allocated = header_[cmp].info_.bc;
+            int64_t max_cmp_bc = max_number_of_blocks;
+            max_cmp_bc *= header_[cmp].info_.bcv;
+            max_cmp_bc *= header_[cmp].info_.bch;
+            max_cmp_bc /= total_req_blocks;
+            if (bc_allocated > max_cmp_bc) {
+                bc_allocated = max_cmp_bc - (max_cmp_bc % header_[cmp].info_.bch);
+            }
+            if (cmp == desired_cmp) {
+                framebuffer->init(header_[cmp].info_.bch, header_[cmp].info_.bcv, bc_allocated, memory_optimized);
+                break;
+            }
+        }
+    }
+    void init(Sirikata::Array1d<componentInfo, ExtendedInfo::size0> cmpinfo,
+              int cmpc,
+              int mcuh,
+              int mcuv,
+              bool memory_optimized_image) {
+        mcuh_ = mcuh;
+        mcuv_ = mcuv;
+        if (cmpc > (int)ColorChannel::NumBlockTypes) {
+            cmpc = (int)ColorChannel::NumBlockTypes;
+            // abort here: we probably can't support this kind of image
+            const char* errmsg = "We only support 3 color channels or fewer\n";
+            int err = write(2, errmsg, strlen(errmsg));
+            (void)err;
+            assert(cmpc <= (int)ColorChannel::NumBlockTypes && "We only support 3 color channels or less");
+            custom_exit(ExitCode::UNSUPPORTED_4_COLORS);
+        }
+        cmpc_ = cmpc;
+        for (int cmp = 0; cmp < cmpc; cmp++) {
+            header_[cmp].info_ = cmpinfo[cmp];
+            header_[cmp].trunc_bcv_ = cmpinfo[cmp].bcv;
+            header_[cmp].trunc_bc_ = cmpinfo[cmp].bc;
+        }
+        if (!memory_optimized_image) {
+            for (int cmp = 0; cmp < (int)sizeof(header_) / (int)sizeof(header_[0]) && cmp < cmpc; cmp++) {
+                allocate_channel_framebuffer(cmp, &this->header_[cmp].component_, memory_optimized_image);
+            }
+        }
+    }
+    void set_block_count_dpos(ExtendedComponentInfo* ci, int trunc_bc) {
+        always_assert(ci->info_.bcv == ci->info_.bc / ci->info_.bch + (ci->info_.bc % ci->info_.bch ? 1 : 0));
+        int vertical_scanlines = std::min(trunc_bc / ci->info_.bch + (trunc_bc % ci->info_.bch ? 1 : 0), ci->info_.bcv);
+        int ratio = min_vertical_extcmp_multiple(ci);
+        while (vertical_scanlines % ratio != 0 && vertical_scanlines + 1 <= ci->info_.bcv) {
+            ++vertical_scanlines;
+        }
+        always_assert(vertical_scanlines <= ci->info_.bcv);
+        ci->trunc_bcv_ = vertical_scanlines;
+        ci->trunc_bc_ = trunc_bc;
+    }
+    void set_truncation_bounds(int /*max_cmp*/,
+                               int /*max_bpos*/,
+                               int max_dpos[sizeof(header_) / sizeof(header_[0])],
+                               int /*max_sah*/) {
+        for (int i = 0; i < cmpc_; ++i) {
+            set_block_count_dpos(&header_[i], max_dpos[i] + 1);
+        }
+    }
+    void wait_for_worker_on_bit(int bit) {
+        while (bit >= (bit_progress_ += 0)) {
+            CodingReturnValue retval = do_more_work();
+            if (retval == CODING_ERROR) {
+                assert(false && "Incorrectly coded item");
+                custom_exit(ExitCode::CODING_ERROR);
+            }
+            // fprintf(stderr, "Waiting for bit %d > %d\n", bit, bit_progress_ += 0);
+        }
+    }
+    void wait_for_worker_on_bpos(int bpos) {
+        while (bpos >= (coefficient_position_progress_ += 0)) {
+            CodingReturnValue retval = do_more_work();
+            if (retval == CODING_ERROR) {
+                assert(false && "Incorrectly coded item");
+                custom_exit(ExitCode::CODING_ERROR);
+            }
+            // fprintf(stderr, "Waiting for coefficient_position %d > %d\n", bpos, coefficient_position_progress_ += 0);
+        }
+    }
+    void wait_for_worker_on_dpos(int cmp, int dpos) {
+        dpos = std::min(dpos, header_[cmp].trunc_bc_ - 1);
+        while (dpos >= (header_[cmp].dpos_block_progress_ += 0)) {
+            CodingReturnValue retval = do_more_work();
+            if (retval == CODING_ERROR) {
+                assert(false && "Incorrectly coded item");
+                custom_exit(ExitCode::CODING_ERROR);
+            }
+        }
+    }
+    void signal_worker_should_begin() {
+        // std::atomic_thread_fence(std::memory_order_release);
+        worker_start_read_signal_++;
+    }
+    unsigned int component_size_allocated(int cmp) const { return header_[cmp].component_.bytes_allocated(); }
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> get_component_size_in_blocks() const {
+        Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> retval;
+        retval.memset(0);
+        for (int cmp = 0; cmp < cmpc_; ++cmp) {
+            retval[cmp] = header_[cmp].trunc_bc_;
+        }
+        return retval;
+    }
+    unsigned int component_size_in_blocks(int cmp) const { return header_[cmp].trunc_bc_; }
+    BlockBasedImage& full_component_write(BlockType cmp) { return header_[(int)cmp].component_; }
+    const BlockBasedImage& full_component_nosync(int cmp) const { return header_[cmp].component_; }
+    const AlignedBlock& block(BlockType cmp, int dpos) {
+        wait_for_worker_on_dpos((int)cmp, dpos);
+        return header_[(int)cmp].component_.raster(dpos);
+    }
+    const AlignedBlock& block_nosync(BlockType cmp, int dpos) const {
+        return header_[(int)cmp].component_.raster(dpos);
+    }
+    signed short at_nosync(BlockType cmp, int bpos, int dpos) const {
+        return header_[(int)cmp].component_.raster(dpos).coefficients_zigzag(bpos);
+    }
+    // return the minimum luma multiple for full mcu splits in luma
+    int min_vertical_luma_multiple() const;
+    int min_vertical_cmp_multiple(int cmp) const;
+    int min_vertical_extcmp_multiple(const ExtendedComponentInfo* info) const;
+    int block_height(const int cmp) const { return bcv_(cmp); }
+
+    int block_width(const int cmp) const { return bch_(cmp); }
+
+    int block_width(const BlockType cmp) const { return bch_((int)cmp); }
+
+    void reset() { bit_progress_ -= bit_progress_; }
+    ~UncompressedComponents() { reset(); }
+    static int max_number_of_blocks;
+
+    // the following functions are progressive-only functions (recode_jpeg)
+    // or decode-only functions (decode_jpeg, check_value_range)
+    // these are the only functions able to access the components
+    friend bool decode_jpeg(const std::vector<std::pair<uint32_t, uint32_t> >& huff_byte_offsets,
+                            std::vector<ThreadHandoff>* luma_row_offset_return);
+    friend bool recode_jpeg(void);
+    friend bool check_value_range(void);
+    friend bool hls_decode_jpeg_kernel(ap_uint<AXI_WIDTH>* datatoDDR, int size, struct_arith& arith, uint8_t* res);
+    AlignedBlock& mutable_block(BlockType cmp, int dpos) { return header_[(int)cmp].component_.raster(dpos); }
+
+   private:
+    signed short at(BlockType cmp, int bpos, int dpos) {
+        wait_for_worker_on_dpos((int)cmp, dpos);
+        return header_[(int)cmp].component_.raster(dpos).coefficients_zigzag(bpos);
+    }
+    signed short& set(BlockType cmp, int bpos, int dpos) {
+        return header_[(int)cmp].component_.raster(dpos).mutable_coefficients_zigzag(bpos);
+    }
+};
diff --git a/codec/L2/demos/leptonEnc/host/lepton/validation.cc b/codec/L2/demos/leptonEnc/host/lepton/validation.cc
new file mode 100644
index 0000000000..8ec93ff3a6
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/validation.cc
@@ -0,0 +1,134 @@
+#include "../vp8/util/memory.hh"
+#ifdef _WIN32
+#include <io.h>
+#else
+#include <sys/wait.h>
+#include <unistd.h>
+#endif
+#include <signal.h>
+#include "../vp8/util/nd_array.hh"
+#include "../io/MuxReader.hh"
+#include "../io/ioutil.hh"
+#include "validation.hh"
+ValidationContinuation validateAndCompress(int* reader,
+                                           int* writer,
+                                           Sirikata::Array1d<uint8_t, 2> header,
+                                           size_t start_byte,
+                                           size_t end_byte,
+                                           ExitCode* validation_exit_code,
+                                           Sirikata::MuxReader::ResizableByteBuffer* lepton_data,
+                                           int argc,
+                                           const char** argv,
+                                           bool is_socket) {
+#ifdef _WIN32
+    std::vector<const char*> args;
+    args.push_back(argv[0]);
+    args.push_back("-skiproundtrip");
+    for (int i = 1; i < argc; ++i) {
+        if (argv[i][0] == '-' && strcmp("argv[i]", "-") && strstr(argv[i], "-validat") != argv[i] &&
+            strstr(argv[i], "-verif") != argv[i] && strstr(argv[i], "-socket") != argv[i] &&
+            strstr(argv[i], "-fork") != argv[i] && strstr(argv[i], "-listen") != argv[i] &&
+            strstr(argv[i], "-roundtrip") != argv[i]) {
+            args.push_back(argv[i]);
+        }
+    }
+    args.push_back("-"); // read from stdin, write to stdout
+    // args.push_back("/Users/daniel/Source/Repos/lepton/images/iphone.jpg");
+    // args.push_back("/Users/daniel/Source/Repos/lepton/test.lep");
+    auto encode_pipes = IOUtil::start_subprocess(args.size(), &args[0], false);
+    lepton_data->reserve(4096 * 1024);
+#else
+    int jpeg_input_pipes[2] = {-1, -1};
+    int lepton_output_pipes[2] = {-1, -1};
+    int lepton_roundtrip_send[2] = {-1, -1};
+    int jpeg_roundtrip_recv[2] = {-1, -1};
+    // int err;
+    while (pipe(jpeg_input_pipes) < 0 && errno == EINTR) {
+    }
+    while (pipe(lepton_output_pipes) < 0 && errno == EINTR) {
+    }
+    pid_t encode_pid;
+    pid_t decode_pid;
+    if ((encode_pid = fork()) == 0) { // could also fork/exec here
+        // not yet open -- we will exit before accessed while(close(*fwriter) < 0 && errno == EINTR){}
+        if (*writer != -1 && *writer != *reader) {
+            while (close(*writer) < 0 && errno == EINTR) {
+            }
+        }
+        while (close(*reader) < 0 && errno == EINTR) {
+        }
+        *reader = jpeg_input_pipes[0];
+        *writer = lepton_output_pipes[1];
+        while (close(jpeg_input_pipes[1]) < 0 && errno == EINTR) {
+        }
+        while (close(lepton_output_pipes[0]) < 0 && errno == EINTR) {
+        }
+        return ValidationContinuation::CONTINUE_AS_JPEG;
+    }
+    while (close(jpeg_input_pipes[0]) < 0 && errno == EINTR) {
+    }
+    while (close(lepton_output_pipes[1]) < 0 && errno == EINTR) {
+    }
+
+    while (pipe(lepton_roundtrip_send) < 0 && errno == EINTR) {
+    }
+    while (pipe(jpeg_roundtrip_recv) < 0 && errno == EINTR) {
+    }
+    // we wanna fork the decode here before we allocate 4096 * 1024 bytes here
+    if ((decode_pid = fork()) == 0) { // could also fork/exec here
+        if (*writer != -1 && *writer != *reader) {
+            while (close(*writer) < 0 && errno == EINTR) {
+            }
+        }
+
+        while (close(*reader) < 0 && errno == EINTR) {
+        }
+        // not yet open -- we will exit before accessed while(close(*fwriter) < 0 && errno == EINTR){}
+        while (close(jpeg_input_pipes[1]) < 0 && errno == EINTR) {
+        }
+        while (close(lepton_output_pipes[0]) < 0 && errno == EINTR) {
+        }
+
+        *reader = lepton_roundtrip_send[0];
+        *writer = jpeg_roundtrip_recv[1];
+        while (close(lepton_roundtrip_send[1]) < 0 && errno == EINTR) {
+        }
+        while (close(jpeg_roundtrip_recv[0]) < 0 && errno == EINTR) {
+        }
+
+        return ValidationContinuation::CONTINUE_AS_LEPTON;
+    }
+    while (close(lepton_roundtrip_send[0]) < 0 && errno == EINTR) {
+    }
+    while (close(jpeg_roundtrip_recv[1]) < 0 && errno == EINTR) {
+    }
+
+    lepton_data->reserve(4096 * 1024);
+    int status = 0;
+    while (waitpid(encode_pid, &status, 0) < 0 && errno == EINTR) {
+    } // wait on encode
+    if (WIFEXITED(status)) {
+        int exit_code = WEXITSTATUS(status);
+        if (exit_code != 0) {
+            exit(exit_code);
+        }
+    } else if (WIFSIGNALED(status)) {
+        raise(WTERMSIG(status));
+    }
+    size_t roundtrip_size = 0;
+
+    status = 0;
+    while (waitpid(decode_pid, &status, 0) < 0 && errno == EINTR) {
+    } // wait on encode
+    if (WIFEXITED(status)) {
+        int exit_code = WEXITSTATUS(status);
+        if (exit_code != 0) {
+            exit(exit_code);
+        }
+    } else if (WIFSIGNALED(status)) {
+        raise(WTERMSIG(status));
+    }
+#endif
+    *validation_exit_code = ExitCode::SUCCESS;
+    return ValidationContinuation::ROUNDTRIP_OK;
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/validation.hh b/codec/L2/demos/leptonEnc/host/lepton/validation.hh
new file mode 100644
index 0000000000..ef17a7750e
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/validation.hh
@@ -0,0 +1,17 @@
+enum class ValidationContinuation {
+    ROUNDTRIP_OK,
+    BAD,
+    CONTINUE_AS_JPEG,
+    CONTINUE_AS_LEPTON,
+};
+
+ValidationContinuation validateAndCompress(int* reader,
+                                           int* writer,
+                                           Sirikata::Array1d<uint8_t, 2> header,
+                                           size_t start_byte,
+                                           size_t end_byte,
+                                           ExitCode* validation_exit_code,
+                                           Sirikata::MuxReader::ResizableByteBuffer* output,
+                                           int argc,
+                                           const char** argv,
+                                           bool is_socket);
diff --git a/codec/L2/demos/leptonEnc/host/lepton/vp8_decoder.cc b/codec/L2/demos/leptonEnc/host/lepton/vp8_decoder.cc
new file mode 100644
index 0000000000..9f8b96df79
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/vp8_decoder.cc
@@ -0,0 +1,244 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <tuple>
+
+#include "bitops.hh"
+#include "component_info.hh"
+#include "uncompressed_components.hh"
+#include "jpgcoder.hh"
+#include "vp8_decoder.hh"
+
+#include "../io/Reader.hh"
+#include "../vp8/decoder/decoder.hh"
+using namespace std;
+
+void VP8ComponentDecoder::initialize(Sirikata::DecoderReader* input, const std::vector<ThreadHandoff>& thread_handoff) {
+    str_in = input;
+    mux_reader_.init(input);
+    thread_handoff_ = thread_handoff;
+}
+void VP8ComponentDecoder::decode_row(
+    int target_thread_id,
+    BlockBasedImagePerChannel<true>& image_data, // FIXME: set image_data to true
+    Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+    int component,
+    int curr_y) {
+    thread_state_[target_thread_id]->decode_row(image_data, component_size_in_blocks, component, curr_y);
+}
+
+VP8ComponentDecoder::VP8ComponentDecoder(bool do_threading)
+    : VP8ComponentEncoder(do_threading),
+      mux_reader_(Sirikata::JpegAllocator<uint8_t>(), 8, 4096 * 1024 / MAX_NUM_THREADS + 131072) {
+    if (do_threading) {
+        virtual_thread_id_ = -1; // only using real threads here
+    } else {
+        virtual_thread_id_ = 0;
+    }
+}
+
+VP8ComponentDecoder::~VP8ComponentDecoder() {}
+
+#ifdef ALLOW_FOUR_COLORS
+#define ProbabilityTablesTuple(left, above, right)                  \
+    ProbabilityTables<left, above, right, TEMPLATE_ARG_COLOR0>,     \
+        ProbabilityTables<left, above, right, TEMPLATE_ARG_COLOR1>, \
+        ProbabilityTables<left, above, right, TEMPLATE_ARG_COLOR2>, \
+        ProbabilityTables<left, above, right, TEMPLATE_ARG_COLOR3>
+#define EACH_BLOCK_TYPE(left, above, right) BlockType::Y, BlockType::Cb, BlockType::Cr, BlockType::Ck
+#else
+#define ProbabilityTablesTuple(left, above, right)                  \
+    ProbabilityTables<left, above, right, TEMPLATE_ARG_COLOR0>,     \
+        ProbabilityTables<left, above, right, TEMPLATE_ARG_COLOR1>, \
+        ProbabilityTables<left, above, right, TEMPLATE_ARG_COLOR2>
+#define EACH_BLOCK_TYPE BlockType::Y, BlockType::Cb, BlockType::Cr
+#endif
+
+void VP8ComponentDecoder::clear_thread_state(int thread_id,
+                                             int target_thread_state,
+                                             BlockBasedImagePerChannel<true>& framebuffer) {
+    initialize_thread_id(thread_id, target_thread_state, framebuffer);
+}
+void VP8ComponentDecoder::worker_thread(ThreadState* ts, int thread_id, UncompressedComponents* const colldata) {
+    TimingHarness::timing[thread_id][TimingHarness::TS_ARITH_STARTED] = TimingHarness::get_time_us();
+    while (ts->vp8_decode_thread(thread_id, colldata) == CODING_PARTIAL) {
+    }
+    TimingHarness::timing[thread_id][TimingHarness::TS_ARITH_FINISHED] = TimingHarness::get_time_us();
+}
+template <bool force_memory_optimized>
+void VP8ComponentDecoder::initialize_thread_id(int thread_id,
+                                               int target_thread_state,
+                                               BlockBasedImagePerChannel<force_memory_optimized>& framebuffer) {
+    TimingHarness::timing[thread_id % NUM_THREADS][TimingHarness::TS_STREAM_MULTIPLEX_STARTED] =
+        TimingHarness::get_time_us();
+    // if (thread_id != target_thread_state) {
+    reset_thread_model_state(target_thread_state);
+    //}
+    thread_state_[target_thread_state]->decode_index_ = 0;
+    for (unsigned int i = 0; i < framebuffer.size(); ++i) {
+        if (framebuffer[i] != NULL) {
+            thread_state_[target_thread_state]->is_top_row_.at(i) = true;
+            thread_state_[target_thread_state]->num_nonzeros_.at(i).resize(framebuffer[i]->block_width() << 1);
+            thread_state_[target_thread_state]->context_.at(i) =
+                framebuffer[i]->begin(thread_state_[target_thread_state]->num_nonzeros_.at(i).begin());
+        }
+    }
+    /* initialize the bool decoder */
+    int index = thread_id;
+    always_assert((size_t)index < streams_.size());
+    thread_state_[target_thread_state]->bool_decoder_.init(
+        streams_[index].first != streams_[index].second ? &*streams_[index].first : nullptr,
+        streams_[index].second - streams_[index].first);
+    thread_state_[target_thread_state]->is_valid_range_ = false;
+    thread_state_[target_thread_state]->luma_splits_.resize(2);
+    if ((size_t)index < thread_handoff_.size()) {
+        thread_state_[target_thread_state]->luma_splits_[0] = thread_handoff_[thread_id].luma_y_start;
+        thread_state_[target_thread_state]->luma_splits_[1] = thread_handoff_[thread_id].luma_y_end;
+    } else {
+        thread_state_[target_thread_state]->luma_splits_[0] = thread_handoff_.back().luma_y_end;
+        thread_state_[target_thread_state]->luma_splits_[1] = thread_handoff_.back().luma_y_end;
+    }
+    // fprintf(stderr, "tid: %d   %d -> %d\n", thread_id, thread_state_[target_thread_state]->luma_splits_[0],
+    //        thread_state_[target_thread_state]->luma_splits_[1]);
+    TimingHarness::timing[thread_id % NUM_THREADS][TimingHarness::TS_STREAM_MULTIPLEX_FINISHED] =
+        TimingHarness::get_time_us();
+}
+std::vector<ThreadHandoff> VP8ComponentDecoder::initialize_baseline_decoder(
+    const UncompressedComponents* const colldata,
+    Sirikata::Array1d<BlockBasedImagePerChannel<true>, MAX_NUM_THREADS>& framebuffer) {
+    return initialize_decoder_state(colldata, framebuffer);
+}
+template <bool force_memory_optimized>
+std::vector<ThreadHandoff> VP8ComponentDecoder::initialize_decoder_state(
+    const UncompressedComponents* const colldata,
+    Sirikata::Array1d<BlockBasedImagePerChannel<force_memory_optimized>, MAX_NUM_THREADS>& framebuffer) {
+    if (colldata->get_num_components() > (int)BlockType::Y) {
+        ProbabilityTablesBase::set_quantization_table(BlockType::Y, colldata->get_quantization_tables(BlockType::Y));
+    }
+    if (colldata->get_num_components() > (int)BlockType::Cb) {
+        ProbabilityTablesBase::set_quantization_table(BlockType::Cb, colldata->get_quantization_tables(BlockType::Cb));
+    }
+    if (colldata->get_num_components() > (int)BlockType::Cr) {
+        ProbabilityTablesBase::set_quantization_table(BlockType::Cr, colldata->get_quantization_tables(BlockType::Cr));
+    }
+#ifdef ALLOW_FOUR_COLORS
+    if (colldata->get_num_components() > (int)BlockType::Ck) {
+        ProbabilityTablesBase::set_quantization_table(BlockType::Ck, colldata->get_quantization_tables(BlockType::Ck));
+    }
+#endif
+    if (thread_handoff_.empty()) {
+        /* read and verify "x" mark */
+        unsigned char mark{};
+        const bool ok = str_in->Read(&mark, 1).second == Sirikata::JpegError::nil();
+        if (!ok) {
+            return std::vector<ThreadHandoff>();
+        }
+        ThreadHandoff th;
+        memset(&th, 0, sizeof(th));
+        th.num_overhang_bits = ThreadHandoff::LEGACY_OVERHANG_BITS; // to make sure we don't use this value
+        th.luma_y_end = colldata->block_height(0);
+        thread_handoff_.insert(thread_handoff_.end(), mark, th);
+
+        std::vector<uint16_t> luma_splits_tmp(mark - 1);
+        IOUtil::ReadFull(str_in, luma_splits_tmp.data(), sizeof(uint16_t) * (mark - 1));
+        int sfv_lcm = colldata->min_vertical_luma_multiple();
+        for (int i = 0; i + 1 < mark; ++i) {
+            thread_handoff_[i].luma_y_end = htole16(luma_splits_tmp[i]);
+            if (thread_handoff_[i].luma_y_end % sfv_lcm) {
+                fprintf(stderr, "File Split %d = %d (remainder %d)\n", i, thread_handoff_[i].luma_y_end, sfv_lcm);
+                custom_exit(ExitCode::THREADING_PARTIAL_MCU);
+            }
+        }
+        for (int i = 1; i < mark; ++i) {
+            thread_handoff_[i].luma_y_start = thread_handoff_[i - 1].luma_y_end;
+        }
+    }
+    /* read entire chunk into memory */
+    mux_reader_.fillBufferEntirely(streams_.begin());
+    write_byte_bill(Billing::DELIMITERS, true, mux_reader_.getOverhead());
+    // initialize_thread_id(0, 0, framebuffer[0]);
+    if (do_threading_) {
+        for (unsigned int thread_id = 1; thread_id < NUM_THREADS; ++thread_id) {
+            // initialize_thread_id(thread_id, thread_id, framebuffer[thread_id]);
+        }
+    }
+    if (thread_handoff_.size()) {
+        thread_handoff_.back().luma_y_end = colldata->block_height(0);
+    }
+    return thread_handoff_;
+}
+
+CodingReturnValue VP8ComponentDecoder::decode_chunk(UncompressedComponents* const colldata) {
+    /* cmpc is a global variable with the component count */
+
+    /* construct 4x4 VP8 blocks to hold 8x8 JPEG blocks */
+    if (thread_state_[0] == nullptr || thread_state_[0]->context_[0].isNil()) {
+        /* first call */
+        BlockBasedImagePerChannel<false> framebuffer;
+        framebuffer.memset(0);
+        for (size_t i = 0; i < framebuffer.size() && int(i) < colldata->get_num_components(); ++i) {
+            framebuffer[i] = &colldata->full_component_write((BlockType)i);
+        }
+        Sirikata::Array1d<BlockBasedImagePerChannel<false>, MAX_NUM_THREADS> all_framebuffers;
+        for (size_t i = 0; i < all_framebuffers.size(); ++i) {
+            all_framebuffers[i] = framebuffer;
+        }
+        size_t num_threads_needed = initialize_decoder_state(colldata, all_framebuffers).size();
+        for (size_t i = 0; i < num_threads_needed; ++i) {
+            initialize_thread_id(i, i, framebuffer);
+            if (!do_threading_) {
+                break;
+            }
+        }
+        if (num_threads_needed > NUM_THREADS || num_threads_needed == 0) {
+            return CODING_ERROR;
+        }
+        if (do_threading_) {
+            for (unsigned int thread_id = 1; thread_id < NUM_THREADS; ++thread_id) {
+                spin_workers_[thread_id - 1].work =
+                    std::bind(worker_thread, thread_state_[thread_id], thread_id, colldata);
+                spin_workers_[thread_id - 1].activate_work();
+            }
+        }
+    }
+    TimingHarness::timing[0][TimingHarness::TS_ARITH_STARTED] = TimingHarness::get_time_us();
+    CodingReturnValue ret = thread_state_[0]->vp8_decode_thread(0, colldata);
+    if (ret == CODING_PARTIAL) {
+        return ret;
+    }
+    TimingHarness::timing[0][TimingHarness::TS_ARITH_FINISHED] = TimingHarness::get_time_us();
+    if (do_threading_) {
+        for (unsigned int thread_id = 1; thread_id < NUM_THREADS; ++thread_id) {
+            TimingHarness::timing[thread_id][TimingHarness::TS_THREAD_WAIT_STARTED] = TimingHarness::get_time_us();
+            spin_workers_[thread_id - 1].main_wait_for_done();
+            TimingHarness::timing[thread_id][TimingHarness::TS_THREAD_WAIT_FINISHED] = TimingHarness::get_time_us();
+        }
+        // join on all threads
+    } else {
+        // wait for "threads"
+        virtual_thread_id_ += 1;
+        for (unsigned int thread_id = virtual_thread_id_; thread_id < NUM_THREADS; ++thread_id, ++virtual_thread_id_) {
+            BlockBasedImagePerChannel<false> framebuffer;
+            framebuffer.memset(0);
+            for (size_t i = 0; i < framebuffer.size() && int(i) < colldata->get_num_components(); ++i) {
+                framebuffer[i] = &colldata->full_component_write((BlockType)i);
+            }
+
+            initialize_thread_id(thread_id, 0, framebuffer);
+            TimingHarness::timing[thread_id][TimingHarness::TS_ARITH_STARTED] = TimingHarness::get_time_us();
+            if ((ret = thread_state_[0]->vp8_decode_thread(0, colldata)) == CODING_PARTIAL) {
+                return ret;
+            }
+            TimingHarness::timing[thread_id][TimingHarness::TS_ARITH_FINISHED] = TimingHarness::get_time_us();
+        }
+    }
+    TimingHarness::timing[0][TimingHarness::TS_JPEG_RECODE_STARTED] = TimingHarness::get_time_us();
+    for (int component = 0; component < colldata->get_num_components(); ++component) {
+        colldata->worker_mark_cmp_finished((BlockType)component);
+    }
+    colldata->worker_update_coefficient_position_progress(64);
+    colldata->worker_update_bit_progress(16);
+    return CODING_DONE;
+}
diff --git a/codec/L2/demos/leptonEnc/host/lepton/vp8_decoder.hh b/codec/L2/demos/leptonEnc/host/lepton/vp8_decoder.hh
new file mode 100644
index 0000000000..15d677601f
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/vp8_decoder.hh
@@ -0,0 +1,64 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#include <array>
+#include "base_coders.hh"
+#include "lepton_codec.hh"
+#include "../../io/MuxReader.hh"
+#include "aligned_block.hh"
+#include "bool_decoder.hh"
+#include "vp8_encoder.hh"
+
+class VP8ComponentDecoder : public BaseDecoder, public VP8ComponentEncoder {
+    Sirikata::DecoderReader* str_in{};
+    // const std::vector<uint8_t, Sirikata::JpegAllocator<uint8_t> > *file_;
+    Sirikata::MuxReader mux_reader_;
+    std::vector<ThreadHandoff> thread_handoff_;
+    Sirikata::Array1d<std::pair<Sirikata::MuxReader::ResizableByteBuffer::const_iterator,
+                                Sirikata::MuxReader::ResizableByteBuffer::const_iterator>,
+                      Sirikata::MuxReader::MAX_STREAM_ID>
+        streams_;
+
+    VP8ComponentDecoder(const VP8ComponentDecoder&) = delete;
+    VP8ComponentDecoder& operator=(const VP8ComponentDecoder&) = delete;
+    static void worker_thread(ThreadState*, int thread_id, UncompressedComponents* const colldata);
+    template <bool force_memory_optimized>
+    void initialize_thread_id(int thread_id,
+                              int target_thread_state,
+                              BlockBasedImagePerChannel<force_memory_optimized>& framebuffer);
+
+    int virtual_thread_id_;
+
+   public:
+    VP8ComponentDecoder(bool do_threading);
+    // reads the threading information and uses mux_reader_ to create the streams_
+    // returns the bound of each threads' max_luma (non inclusive) responsibility in the file
+    template <bool force_memory_optimized>
+    std::vector<ThreadHandoff> initialize_decoder_state(
+        const UncompressedComponents* const colldata,
+        // quantization_tables
+        Sirikata::Array1d<BlockBasedImagePerChannel<force_memory_optimized>,
+                          MAX_NUM_THREADS>& framebuffer); // framebuffer
+    virtual std::vector<ThreadHandoff> initialize_baseline_decoder(
+        const UncompressedComponents* const colldata,
+        Sirikata::Array1d<BlockBasedImagePerChannel<true>, MAX_NUM_THREADS>& framebuffer);
+    void registerWorkers(GenericWorker* workers, unsigned int num_workers) {
+        this->VP8ComponentEncoder::registerWorkers(workers, num_workers);
+    }
+    GenericWorker* getWorker(unsigned int i) {
+        always_assert(i < num_registered_workers_);
+        return &spin_workers_[i];
+    }
+    size_t get_model_memory_usage() const { return model_memory_used(); }
+    size_t get_model_worker_memory_usage() const { return model_worker_memory_used(); }
+    ~VP8ComponentDecoder();
+    void initialize(Sirikata::DecoderReader* input, const std::vector<ThreadHandoff>& thread_transition_info);
+    // necessary to implement the BaseDecoder interface. Thin wrapper around vp8_decoder
+    virtual CodingReturnValue decode_chunk(UncompressedComponents* dst);
+    virtual void decode_row(int target_thread_id,
+                            BlockBasedImagePerChannel<true>& image_data, // FIXME: set image_data to true
+                            Sirikata::Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> component_size_in_blocks,
+                            int component,
+                            int curr_y);
+    virtual void clear_thread_state(int thread_id,
+                                    int target_thread_state,
+                                    BlockBasedImagePerChannel<true>& framebuffer);
+};
diff --git a/codec/L2/demos/leptonEnc/host/lepton/vp8_encoder.cc b/codec/L2/demos/leptonEnc/host/lepton/vp8_encoder.cc
new file mode 100644
index 0000000000..b2f8e82406
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/vp8_encoder.cc
@@ -0,0 +1,1632 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#include "../../vp8/util/memory.hh"
+#include <string>
+#include <cassert>
+#include <iostream>
+#include <fstream>
+#ifdef _WIN32
+#include <fcntl.h>
+#endif
+#include "bitops.hh"
+#include "component_info.hh"
+#include "uncompressed_components.hh"
+#include "jpgcoder.hh"
+#include "vp8_encoder.hh"
+
+#include "bool_encoder.hh"
+#include "model.hh"
+#include "numeric.hh"
+
+#include "../vp8/model/model.hh"
+#include "../vp8/encoder/encoder.hh"
+#include "../io/MuxReader.hh"
+#include "loop_stt.h"
+
+#include <ap_int.h>
+#include <hls_stream.h>
+
+//#include "../lepton/XModified.h"
+extern LoopNodeFactory g_loops;
+// void process_row_range3(
+//		WD_AXI*         axi_coeff,
+//		LeptonInput&    lepp,
+//      struct_arith&   arith_enc,
+//		uint8_t*        res
+//       );
+using namespace std;
+typedef Sirikata::MuxReader::ResizableByteBuffer ResizableByteBuffer;
+void printContext(FILE* fp) {
+#ifdef ANNOTATION_ENABLED
+    for (int cm = 0; cm < 3; ++cm) {
+        for (int y = 0; y < Context::H / 8; ++y) {
+            for (int x = 0; x < Context::W / 8; ++x) {
+                for (int by = 0; by < 8; ++by) {
+                    for (int bx = 0; bx < 8; ++bx) {
+                        for (int ctx = 0; ctx < NUMCONTEXT; ++ctx) {
+                            for (int dim = 0; dim < 3; ++dim) {
+                                int val = 0;
+                                val = gctx->p[cm][y][x][by][bx][ctx][dim];
+                                const char* nam = "UNKNOWN";
+                                switch (ctx) {
+                                    case ZDSTSCAN:
+                                        nam = "ZDSTSCAN";
+                                        break;
+                                    case ZEROS7x7:
+                                        nam = "ZEROS7x7";
+                                        break;
+                                    case EXPDC:
+                                        nam = "EXPDC";
+                                        break;
+                                    case RESDC:
+                                        nam = "RESDC";
+                                        break;
+                                    case SIGNDC:
+                                        nam = "SIGNDC";
+                                        break;
+                                    case EXP7x7:
+                                        nam = "EXP7x7";
+                                        break;
+                                    case RES7x7:
+                                        nam = "RES7x7";
+                                        break;
+                                    case SIGN7x7:
+                                        nam = "SIGN7x7";
+                                        break;
+                                    case ZEROS1x8:
+                                        nam = "ZEROS1x8";
+                                        break;
+                                    case ZEROS8x1:
+                                        nam = "ZEROS8x1";
+                                        break;
+                                    case EXP8:
+                                        nam = "EXP8";
+                                        break;
+                                    case THRESH8:
+                                        nam = "THRESH8";
+                                        break;
+                                    case RES8:
+                                        nam = "RES8";
+                                        break;
+                                    case SIGN8:
+                                        nam = "SI#include " emmintrin.h "GN8";
+                                        break;
+                                    default:
+                                        break;
+                                }
+                                if (val != -1 && ctx != ZDSTSCAN) {
+                                    fprintf(fp, "col[%02d] y[%02d]x[%02d] by[%02d]x[%02d] [%s][%d] = %d\n", cm, y, x,
+                                            by, bx, nam, dim, val);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+#endif
+}
+
+VP8ComponentEncoder::VP8ComponentEncoder(bool do_threading) : LeptonCodec(do_threading) {}
+
+CodingReturnValue VP8ComponentEncoder::encode_chunk(const UncompressedComponents* input,
+                                                    IOUtil::FileWriter* output,
+                                                    const ThreadHandoff* selected_splits,
+                                                    unsigned int num_selected_splits) {
+    /// clock_t begin = 0, end = 1;
+    // begin = clock();
+    g_loops.START("PROC:encode_chunk", PROC);
+    g_loops.CNT();
+    return vp8_full_encoder(input, output, selected_splits, num_selected_splits);
+    g_loops.END();
+    // end = clock();
+    // printf("%d, %d, %d \n", begin, end, end-begin);
+}
+
+template <class Left, class Middle, class Right>
+void VP8ComponentEncoder::process_row(
+    ProbabilityTablesBase& pt,
+    Left& left_model,
+    Middle& middle_model,
+    Right& right_model,
+    int curr_y,
+    const UncompressedComponents* const colldata,
+    Sirikata::Array1d<ConstBlockContext, (uint32_t)ColorChannel::NumBlockTypes>& context,
+    BoolEncoder& bool_encoder) {
+    uint32_t block_width = colldata->full_component_nosync((int)middle_model.COLOR).block_width();
+
+    if (block_width > 0) {
+        g_loops.START("BRCH:process_row, bw>0", BRCH);
+        g_loops.CNT();
+        ConstBlockContext state = context.at((int)middle_model.COLOR);
+        const AlignedBlock& block = state.here();
+#ifdef ANNOTATION_ENABLED
+        gctx->cur_cmp = component; // for debug purposes only, not to be used in production
+        gctx->cur_jpeg_x = 0;
+        gctx->cur_jpeg_y = curr_y;
+#endif
+        state.num_nonzeros_here->set_num_nonzeros(block.recalculate_coded_length());
+        serialize_tokens(state, bool_encoder, left_model, pt);
+        uint32_t offset = colldata->full_component_nosync((int)middle_model.COLOR).next(state, true, curr_y);
+        context.at((int)middle_model.COLOR) = state;
+        if (offset >= colldata->component_size_in_blocks(middle_model.COLOR)) {
+            return;
+        }
+    }
+    g_loops.END();
+    g_loops.START("LOOP:process_row, jpeg_x:1 to bw-2", FOR);
+    for (unsigned int jpeg_x = 1; jpeg_x + 1 < block_width; jpeg_x++) {
+        g_loops.CNT();
+        ConstBlockContext state = context.at((int)middle_model.COLOR);
+        const AlignedBlock& block = state.here();
+#ifdef ANNOTATION_ENABLED
+        gctx->cur_cmp = component; // for debug purposes only, not to be used in production
+        gctx->cur_jpeg_x = jpeg_x;
+        gctx->cur_jpeg_y = curr_y;
+#endif
+        state.num_nonzeros_here->set_num_nonzeros(block.recalculate_coded_length()); // FIXME set edge pixels too
+        serialize_tokens(state, bool_encoder, middle_model, pt);
+        uint32_t offset = colldata->full_component_nosync((int)middle_model.COLOR).next(state, true, curr_y);
+        context.at((int)middle_model.COLOR) = state;
+        if (offset >= colldata->component_size_in_blocks(middle_model.COLOR)) {
+            return;
+        }
+    }
+    g_loops.END();
+    g_loops.START("BRCH:process_row, jpeg_x==bw-1", BRCH);
+    if (block_width > 1) {
+        g_loops.CNT();
+        ConstBlockContext state = context.at((int)middle_model.COLOR);
+        const AlignedBlock& block = state.here();
+#ifdef ANNOTATION_ENABLED
+        gctx->cur_cmp = middle_model.COLOR; // for debug purposes only, not to be used in production
+        gctx->cur_jpeg_x = block_width - 1;
+        gctx->cur_jpeg_y = curr_y;
+#endif
+        state.num_nonzeros_here->set_num_nonzeros(block.recalculate_coded_length());
+        serialize_tokens(state, bool_encoder, right_model, pt);
+        colldata->full_component_nosync((int)middle_model.COLOR).next(state, false, curr_y);
+        context.at((int)middle_model.COLOR) = state;
+    }
+    g_loops.END();
+}
+uint32_t aligned_block_cost(const AlignedBlock& block) {
+#ifdef __SSE2__ /* SSE2 or higher instruction set available { */
+    const __m128i zero = _mm_setzero_si128();
+    __m128i v_cost;
+    for (int i = 0; i < 64; i += 8) {
+        __m128i val = _mm_abs_epi16(_mm_load_si128((const __m128i*)(const char*)(block.raw_data() + i)));
+        v_cost = _mm_set1_epi16(0);
+#ifndef __SSE4_1__
+        while (_mm_movemask_epi8(_mm_cmpeq_epi32(val, zero)) != 0xFFFF)
+#else
+        while (!_mm_test_all_zeros(val, val))
+#endif
+        {
+            __m128i mask = _mm_cmpgt_epi16(val, zero);
+            v_cost = _mm_add_epi16(v_cost, _mm_and_si128(mask, _mm_set1_epi16(2)));
+            val = _mm_srli_epi16(val, 1);
+        }
+        v_cost = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 8));
+        v_cost = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 4));
+        v_cost = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 2));
+    }
+    return 16 + _mm_extract_epi16(v_cost, 0);
+#else  /* } No SSE2 instructions { */
+    uint32_t scost = 0;
+    for (int i = 0; i < 64; ++i) {
+        scost += 1 + 2 * uint16bit_length(abs(block.raw_data()[i]));
+    }
+    return scost;
+#endif /* } */
+}
+
+#ifdef ALLOW_FOUR_COLORS
+#define ProbabilityTablesTuple(left, above, right)                      \
+    ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR0>,     \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR1>, \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR2>, \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR3>
+#define EACH_BLOCK_TYPE(left, above, right)                                                                \
+    ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR0>(BlockType::Y, left, above, right),      \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR1>(BlockType::Cb, left, above, right), \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR2>(BlockType::Cr, left, above, right), \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR3>(BlockType::Ck, left, above, right)
+#else
+#define ProbabilityTablesTuple(left, above, right)                      \
+    ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR0>,     \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR1>, \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR2>
+#define EACH_BLOCK_TYPE(left, above, right)                                                                \
+    ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR0>(BlockType::Y, left, above, right),      \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR1>(BlockType::Cb, left, above, right), \
+        ProbabilityTables<left && above && right, TEMPLATE_ARG_COLOR2>(BlockType::Cr, left, above, right)
+#endif
+
+tuple<ProbabilityTablesTuple(false, false, false)> corner(EACH_BLOCK_TYPE(false, false, false));
+tuple<ProbabilityTablesTuple(true, false, false)> top(EACH_BLOCK_TYPE(true, false, false));
+tuple<ProbabilityTablesTuple(false, true, true)> midleft(EACH_BLOCK_TYPE(false, true, true));
+tuple<ProbabilityTablesTuple(true, true, true)> middle(EACH_BLOCK_TYPE(true, true, true));
+tuple<ProbabilityTablesTuple(true, true, false)> midright(EACH_BLOCK_TYPE(true, true, false));
+tuple<ProbabilityTablesTuple(false, true, false)> width_one(EACH_BLOCK_TYPE(false, true, false));
+
+enum IDX_PTB { COR = 0, TOP, MLF, MMD, MRT, ONE }; // Xilinx
+/*
+void VP8ComponentEncoder::process_row_range(unsigned int thread_id,
+                                            const UncompressedComponents * const colldata,
+                                            int min_y,
+                                            int max_y,
+                                            ResizableByteBuffer *stream,
+                                            BoolEncoder *bool_encoder,
+                                            Sirikata::Array1d<std::vector<NeighborSummary>,
+                                                              (uint32_t)ColorChannel::NumBlockTypes
+                                                              > *num_nonzeros) {
+
+    TimingHarness::timing[thread_id][TimingHarness::TS_ARITH_STARTED] = TimingHarness::get_time_us();
+    using namespace Sirikata;
+    Array1d<ConstBlockContext, (uint32_t)ColorChannel::NumBlockTypes> context;
+    for (size_t i = 0; i < context.size(); ++i) {
+        context[i] = colldata->full_component_nosync(i).begin(num_nonzeros->at(i).begin());
+    }
+    uint8_t is_top_row[(uint32_t)ColorChannel::NumBlockTypes];
+    memset(is_top_row, true, sizeof(is_top_row));
+    ProbabilityTablesBase *model = nullptr;
+    if (do_threading_) {
+        reset_thread_model_state(thread_id);
+        model = &thread_state_[thread_id]->model_;
+    } else {
+        reset_thread_model_state(0);
+        model = &thread_state_[0]->model_;
+    }
+    KBlockBasedImagePerChannel<false> image_data;
+    for (int i = 0; i < colldata->get_num_components(); ++i) {
+        image_data[i] = &colldata->full_component_nosync((int)i);
+    }
+    uint32_t encode_index = 0;
+    Array1d<uint32_t, (uint32_t)ColorChannel::NumBlockTypes> max_coded_heights = colldata->get_max_coded_heights();
+    g_loops.START("LOOP:process_row_range, while", WHILE);
+    while(true) {
+        RowSpec cur_row = row_spec_from_index(encode_index++,
+                                              image_data,
+                                              colldata->get_mcu_count_vertical(),
+                                              max_coded_heights);
+        if(cur_row.done) {
+            break;
+        }
+        if (cur_row.luma_y >= max_y && thread_id + 1 != NUM_THREADS) {
+            break;
+        }
+        if (cur_row.skip) {
+            continue;
+        }
+        if (cur_row.luma_y < min_y) {
+            continue;
+        }
+        g_loops.CNT();
+        context[cur_row.component]
+            = image_data.at(cur_row.component)->off_y(cur_row.curr_y,
+                                                      num_nonzeros->at(cur_row.component).begin());
+        // DEBUG only fprintf(stderr, "Thread %d min_y %d - max_y %d cmp[%d] y = %d\n", thread_id, min_y, max_y,
+(int)component, curr_y);
+        int block_width = image_data.at(cur_row.component)->block_width();
+        if (is_top_row[cur_row.component]) {
+                g_loops.START("Brch:process_row_range,while, top_row", BRCH);g_loops.CNT();//g_loops.END();
+            is_top_row[cur_row.component] = false;
+            switch((BlockType)cur_row.component) {
+                case BlockType::Y:
+//                	g_loops.START("BRCH:process_row_range, while, top_row, Y",BRCH);g_loops.CNT();
+                        STTBRCH("BRCH:process_row_range, while, top_row, Y");STTCNT;
+                    process_row2(*model,
+                                (BlockType)cur_row.component,//BlockType::Y,
+                                                        block_width,
+                                                        true,
+                                                        false,
+                           // std::get<(int)BlockType::Y>(corner),
+                          //  std::get<(int)BlockType::Y>(top),
+                           // std::get<(int)BlockType::Y>(top),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);g_loops.END();
+                    break;
+                case BlockType::Cb:
+                        g_loops.START("BRCH:process_row_range, while, top_row, Cb",BRCH); g_loops.CNT();
+                    process_row2(*model,
+                                (BlockType)cur_row.component,//BlockType::Cb,
+                                                        block_width,
+                                                        true,
+                                                        false,
+                          //  std::get<(int)BlockType::Cb>(corner),
+                           // std::get<(int)BlockType::Cb>(top),
+                           // std::get<(int)BlockType::Cb>(top),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);g_loops.END();
+                    break;
+                case BlockType::Cr:
+                        g_loops.START("BRCH:process_row_range, while, top_row, Cr",BRCH);g_loops.CNT();
+                    process_row2(*model,
+                                (BlockType)cur_row.component,//BlockType::Cr,
+                                                        block_width,
+                                                        true,
+                                                        false,
+                         //   std::get<(int)BlockType::Cr>(corner),
+                         //   std::get<(int)BlockType::Cr>(top),
+                         //   std::get<(int)BlockType::Cr>(top),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);g_loops.END();
+                    break;
+#ifdef ALLOW_FOUR_COLORS
+                case BlockType::Ck:
+                    process_row(*model,
+                            std::get<(int)BlockType::Ck>(corner),
+                            std::get<(int)BlockType::Ck>(top),
+                            std::get<(int)BlockType::Ck>(top),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);
+                    break;
+#endif
+                    }
+             g_loops.END();//g_loops.START("process_row_top");g_loops.CNT();//
+        } else if (block_width > 1) {
+                g_loops.START("BRCH:process_row_range, while, !top&bw>1",BRCH);g_loops.CNT();//g_loops.END();//
+            switch((BlockType)cur_row.component) {
+                case BlockType::Y:
+                        g_loops.START("BRCH:process_row_range, while, !top&bw>1,
+Y",BRCH);g_loops.CNT();//g_loops.END();//
+                    process_row2(*model,
+                                (BlockType)cur_row.component,//BlockType::Y,
+                                                        block_width,
+                                                        false,
+                                                        false,
+                          //  std::get<(int)BlockType::Y>(midleft),
+                         //   std::get<(int)BlockType::Y>(middle),
+                         //   std::get<(int)BlockType::Y>(midright),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);
+                    g_loops.END();
+                    break;
+                case BlockType::Cb:
+                        g_loops.START("BRCH:process_row_range, while, !top&bw>1,
+Cb",BRCH);g_loops.CNT();//g_loops.END();//
+                    process_row2(*model,
+                                (BlockType)cur_row.component,//BlockType::Cb,
+                                                        block_width,
+                                                        false,
+                                                        false,
+                          //  std::get<(int)BlockType::Cb>(midleft),
+                         //   std::get<(int)BlockType::Cb>(middle),
+                          //  std::get<(int)BlockType::Cb>(midright),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);
+                    g_loops.END();
+                    break;
+                case BlockType::Cr:
+                        g_loops.START("BRCH:process_row_range,while,!top&bw>1,Cr",BRCH);g_loops.CNT();//g_loops.END();//
+                    process_row2(*model,
+                                (BlockType)cur_row.component,//BlockType::Cr,
+                                                        block_width,
+                                                        false,
+                                                        false,
+                         //   std::get<(int)BlockType::Cr>(midleft),
+                          //  std::get<(int)BlockType::Cr>(middle),
+                           // std::get<(int)BlockType::Cr>(midright),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);
+                    g_loops.END();
+                    break;
+#ifdef ALLOW_FOUR_COLORS
+                case BlockType::Ck:
+                    process_row(*model,
+                            std::get<(int)BlockType::Ck>(midleft),
+                            std::get<(int)BlockType::Ck>(middle),
+                            std::get<(int)BlockType::Ck>(midright),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);
+                    break;
+#endif
+            }
+             g_loops.END();//g_loops.START("process_row_top_!top&bw>1");g_loops.CNT();//
+        } else {
+                g_loops.START("BRCH:process_row_range, while, !top&bw==1", BRCH);g_loops.CNT();//g_loops.END();//
+            always_assert(block_width == 1);
+            switch((BlockType)cur_row.component) {
+                case BlockType::Y:
+                    process_row2(*model,
+                                (BlockType)cur_row.component,//BlockType::Y,
+                                                        block_width,
+                                                        false,
+                                                        true,//Only One
+                          //  std::get<(int)BlockType::Y>(width_one),
+                          //  std::get<(int)BlockType::Y>(width_one),
+                          //  std::get<(int)BlockType::Y>(width_one),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);
+                    break;
+                case BlockType::Cb:
+                    process_row2(*model,
+                                (BlockType)cur_row.component,//BlockType::Cb,
+                                                        block_width,
+                                                        false,
+                                                        true,//Only One
+                         //   std::get<(int)BlockType::Cb>(width_one),
+                         //   std::get<(int)BlockType::Cb>(width_one),
+                         //   std::get<(int)BlockType::Cb>(width_one),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);
+                break;
+                case BlockType::Cr:
+                    process_row2(*model,
+                                (BlockType)cur_row.component,//BlockType::Cr,
+                                                        block_width,
+                                                        false,
+                                                        true,//Only One
+                         //   std::get<(int)BlockType::Cr>(width_one),
+                         //   std::get<(int)BlockType::Cr>(width_one),
+                         //   std::get<(int)BlockType::Cr>(width_one),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);
+                    break;
+#ifdef ALLOW_FOUR_COLORS
+                case BlockType::Ck:
+                    process_row(*model,
+                            std::get<(int)BlockType::Ck>(width_one),
+                            std::get<(int)BlockType::Ck>(width_one),
+                            std::get<(int)BlockType::Ck>(width_one),
+                            cur_row.curr_y,
+                            colldata,
+                            context,
+                            *bool_encoder);
+                    break;
+#endif
+            }
+             g_loops.END();//g_loops.START("process_row_top_width_one");g_loops.CNT();//
+        }
+    }g_loops.END();//("process_row_range_while");
+
+    RowSpec test = row_spec_from_index(encode_index,
+                                       image_data,
+                                       colldata->get_mcu_count_vertical(),
+                                       max_coded_heights);
+
+    if (thread_id == NUM_THREADS - 1 && (test.skip == false || test.done == false)) {
+        fprintf(stderr, "Row spec test: cmp %d luma %d item %d skip %d done %d\n",
+                test.component, test.luma_y, test.curr_y, test.skip, test.done);
+        custom_exit(ExitCode::ASSERTION_FAILURE);
+    }
+    bool_encoder->finish(*stream);
+    TimingHarness::timing[thread_id][TimingHarness::TS_ARITH_FINISHED] = TimingHarness::get_time_us();
+}*/
+
+int load_model_file_fd_output() {
+    const char* out_model_name = getenv("LEPTON_COMPRESSION_MODEL_OUT");
+    if (!out_model_name) {
+        return -1;
+    }
+    return open(out_model_name, O_CREAT | O_TRUNC | O_WRONLY, 0
+#ifndef _WIN32
+                                                                  | S_IWUSR | S_IRUSR
+#endif
+                );
+}
+int model_file_fd = load_model_file_fd_output();
+CodingReturnValue VP8ComponentEncoder::vp8_full_encoder(const UncompressedComponents* const colldata,
+                                                        IOUtil::FileWriter* str_out,
+                                                        const ThreadHandoff* selected_splits,
+                                                        unsigned int num_selected_splits) {
+    /* cmpc is a global variable with the component count */
+    using namespace Sirikata;
+    /* get ready to serialize the blocks */
+
+    BoolEncoder bool_encoder_0;
+    // bool_encoder_0.output_.resize(MAX_NUM_PIX);
+    fprintf(stderr, "Enter a empty process_row_range\n");
+    process_row_range2(0, colldata,
+                       &(bool_encoder_0.output_), // stream[0],
+                       &bool_encoder_0);          // bool_encoder[0]);
+    fprintf(stderr, "Done: a empty process_row_range\n");
+    static_assert(MAX_NUM_THREADS * SIMD_WIDTH <= MuxReader::MAX_STREAM_ID,
+                  "Need to have enough mux streams for all threads and simd width");
+
+    if (do_threading_) {
+        for (unsigned int thread_id = 1; thread_id < NUM_THREADS; ++thread_id) {
+            TimingHarness::timing[thread_id][TimingHarness::TS_THREAD_WAIT_STARTED] = TimingHarness::get_time_us();
+            spin_workers_[thread_id - 1].main_wait_for_done();
+            TimingHarness::timing[thread_id][TimingHarness::TS_THREAD_WAIT_FINISHED] = TimingHarness::get_time_us();
+        }
+    }
+    TimingHarness::timing[0][TimingHarness::TS_STREAM_MULTIPLEX_STARTED] = TimingHarness::get_time_us();
+
+    Sirikata::MuxWriter mux_writer(str_out, JpegAllocator<uint8_t>());
+    size_t stream_data_offset[MuxReader::MAX_STREAM_ID] = {0};
+    bool any_written = true;
+    while (any_written) {
+        any_written = false;
+        // for (int i = 0; i < MuxReader::MAX_STREAM_ID; ++i) {
+        for (int i = 0; i < 1; ++i) {
+            if (bool_encoder_0.output_.size() > stream_data_offset[i]) {
+                any_written = true;
+                size_t max_written = 65536;
+                if (stream_data_offset[i] == 0) {
+                    max_written = 256;
+                } else if (stream_data_offset[i] == 256) {
+                    max_written = 4096;
+                }
+                auto to_write = std::min(max_written, bool_encoder_0.output_.size() - stream_data_offset[i]);
+                stream_data_offset[i] +=
+                    mux_writer.Write(i, &(bool_encoder_0.output_)[stream_data_offset[i]], to_write).first;
+            }
+        }
+    }
+    mux_writer.Close();
+    write_byte_bill(Billing::DELIMITERS, true, mux_writer.getOverhead());
+    // we can probably exit(0) here
+    TimingHarness::timing[0][TimingHarness::TS_STREAM_MULTIPLEX_FINISHED] =
+        TimingHarness::timing[0][TimingHarness::TS_STREAM_FLUSH_STARTED] = TimingHarness::get_time_us();
+    check_decompression_memory_bound_ok(); // this has to happen before last
+    // bytes are written
+    /* possibly write out new probability model */
+    {
+        uint32_t out_file_size = str_out->getsize() + 4; // gotta include the final uint32_t
+        uint32_t file_size = out_file_size;
+        uint8_t out_buffer[sizeof(out_file_size)] = {};
+        for (uint8_t i = 0; i < sizeof(out_file_size); ++i) {
+            out_buffer[i] = out_file_size & 0xff;
+            out_file_size >>= 8;
+        }
+        str_out->Write(out_buffer, sizeof(out_file_size));
+        write_byte_bill(Billing::HEADER, true, sizeof(out_file_size));
+        (void)file_size;
+        always_assert(str_out->getsize() == file_size);
+    }
+
+    if (model_file_fd >= 0) {
+        const char* msg = "Writing new compression model...\n";
+        while (write(2, msg, strlen(msg)) < 0 && errno == EINTR) {
+        }
+
+        std::get<(int)BlockType::Y>(middle).optimize(thread_state_[0]->model_);
+        std::get<(int)BlockType::Y>(middle).serialize(thread_state_[0]->model_, model_file_fd);
+    }
+#ifdef ANNOTATION_ENABLED
+    {
+        FILE* fp = fopen("/tmp/lepton.ctx", "w");
+        printContext(fp);
+        fclose(fp);
+    }
+#endif
+    TimingHarness::timing[0][TimingHarness::TS_STREAM_FLUSH_FINISHED] = TimingHarness::get_time_us();
+    return CODING_DONE;
+}
+
+CodingReturnValue VP8ComponentEncoder::vp8_full_encoder(const UncompressedComponents* const colldata,
+                                                        IOUtil::FileWriter* str_out,
+                                                        const ThreadHandoff* selected_splits,
+                                                        unsigned int num_selected_splits,
+                                                        struct_arith& arith,
+                                                        uint8_t* res) {
+    /* cmpc is a global variable with the component count */
+    using namespace Sirikata;
+    /* get ready to serialize the blocks */
+
+    BoolEncoder bool_encoder_0;
+    // bool_encoder_0.output_.resize(MAX_NUM_PIX);
+    fprintf(stderr, "Enter a empty process_row_range\n");
+    /*process_row_range2(0,
+                      colldata,
+                      &(bool_encoder_0.output_),//stream[0],
+                      &bool_encoder_0);//bool_encoder[0]);*/
+    vpx_writer boolwriter;
+    boolwriter.buffer = res;
+    boolwriter.lowvalue = arith.value;
+    boolwriter.range = arith.range;
+    boolwriter.count = arith.count;
+    boolwriter.pos = arith.pos;
+    boolwriter.run = arith.run;
+    boolwriter.isFirst = arith.isFirst;
+    vpx_stop_encode(&boolwriter);
+    uint32_t pos = boolwriter.pos;
+    bool_encoder_0.output_.mSize = pos;
+    bool_encoder_0.output_.mReserved = MAX_NUM_PIX;
+    bool_encoder_0.output_.mBegin = res;
+    fprintf(stderr, "Done: a empty process_row_range\n");
+
+    static_assert(MAX_NUM_THREADS * SIMD_WIDTH <= MuxReader::MAX_STREAM_ID,
+                  "Need to have enough mux streams for all threads and simd width");
+
+    if (do_threading_) {
+        for (unsigned int thread_id = 1; thread_id < NUM_THREADS; ++thread_id) {
+            TimingHarness::timing[thread_id][TimingHarness::TS_THREAD_WAIT_STARTED] = TimingHarness::get_time_us();
+            spin_workers_[thread_id - 1].main_wait_for_done();
+            TimingHarness::timing[thread_id][TimingHarness::TS_THREAD_WAIT_FINISHED] = TimingHarness::get_time_us();
+        }
+    }
+    TimingHarness::timing[0][TimingHarness::TS_STREAM_MULTIPLEX_STARTED] = TimingHarness::get_time_us();
+
+    Sirikata::MuxWriter mux_writer(str_out, JpegAllocator<uint8_t>());
+    size_t stream_data_offset[MuxReader::MAX_STREAM_ID] = {0};
+    bool any_written = true;
+    while (any_written) {
+        any_written = false;
+        // for (int i = 0; i < MuxReader::MAX_STREAM_ID; ++i) {
+        for (int i = 0; i < 1; ++i) {
+            if (bool_encoder_0.output_.size() > stream_data_offset[i]) {
+                any_written = true;
+                size_t max_written = 65536;
+                if (stream_data_offset[i] == 0) {
+                    max_written = 256;
+                } else if (stream_data_offset[i] == 256) {
+                    max_written = 4096;
+                }
+                auto to_write = std::min(max_written, bool_encoder_0.output_.size() - stream_data_offset[i]);
+                stream_data_offset[i] +=
+                    mux_writer.Write(i, &(bool_encoder_0.output_)[stream_data_offset[i]], to_write).first;
+            }
+        }
+    }
+    mux_writer.Close();
+    write_byte_bill(Billing::DELIMITERS, true, mux_writer.getOverhead());
+    // we can probably exit(0) here
+    TimingHarness::timing[0][TimingHarness::TS_STREAM_MULTIPLEX_FINISHED] =
+        TimingHarness::timing[0][TimingHarness::TS_STREAM_FLUSH_STARTED] = TimingHarness::get_time_us();
+    check_decompression_memory_bound_ok(); // this has to happen before last
+    // bytes are written
+    /* possibly write out new probability model */
+    {
+        uint32_t out_file_size = str_out->getsize() + 4; // gotta include the final uint32_t
+        uint32_t file_size = out_file_size;
+        uint8_t out_buffer[sizeof(out_file_size)] = {};
+        for (uint8_t i = 0; i < sizeof(out_file_size); ++i) {
+            out_buffer[i] = out_file_size & 0xff;
+            out_file_size >>= 8;
+        }
+        str_out->Write(out_buffer, sizeof(out_file_size));
+        write_byte_bill(Billing::HEADER, true, sizeof(out_file_size));
+        (void)file_size;
+        always_assert(str_out->getsize() == file_size);
+    }
+
+    if (model_file_fd >= 0) {
+        const char* msg = "Writing new compression model...\n";
+        while (write(2, msg, strlen(msg)) < 0 && errno == EINTR) {
+        }
+
+        std::get<(int)BlockType::Y>(middle).optimize(thread_state_[0]->model_);
+        std::get<(int)BlockType::Y>(middle).serialize(thread_state_[0]->model_, model_file_fd);
+    }
+#ifdef ANNOTATION_ENABLED
+    {
+        FILE* fp = fopen("/tmp/lepton.ctx", "w");
+        printContext(fp);
+        fclose(fp);
+    }
+#endif
+    TimingHarness::timing[0][TimingHarness::TS_STREAM_FLUSH_FINISHED] = TimingHarness::get_time_us();
+    return CODING_DONE;
+}
+
+////////////////////////////////////////////////////////////////////////////////////
+
+// template<class Left, class Middle, class Right>
+/*void VP8ComponentEncoder:: process_row2(ProbabilityTablesBase &pt,
+                                                                                BlockType color,
+                                                                                int block_width,
+                                                                                bool isTopRow,
+                                                                                bool isOnlyOne,
+                                      //Left & left_model,
+                                      //Middle& middle_model,
+                                      //Right& right_model,
+                                      int curr_y,
+                                      const UncompressedComponents * const colldata,
+                                      Sirikata::Array1d<ConstBlockContext,
+                                              (uint32_t)ColorChannel::NumBlockTypes> &context,
+                                      BoolEncoder &bool_encoder){
+
+    g_loops.START("LOOP:process_row, jpeg_x:=0 to BW", FOR);
+    for ( unsigned int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++ ) {
+        g_loops.CNT();
+        ConstBlockContext state = context.at((int)color);
+        const AlignedBlock &block = state.here();
+        state.num_nonzeros_here->set_num_nonzeros(block.recalculate_coded_length()); //FIXME set edge pixels too
+        bool left        = (jpeg_x==0)             ? false : (block_width>1);
+        bool above       = !isTopRow;
+        bool above_right = isTopRow                ? false : (jpeg_x < block_width);
+        bool has_left    = (jpeg_x+1==block_width) ? false : true;
+        serialize_tokens(
+                        color,
+                                left,
+                                above,
+                                above_right,
+                                state,
+                bool_encoder,
+                pt);
+        uint32_t offset = colldata->full_component_nosync((int)color).next(state, has_left, curr_y);
+        context.at((int)color) = state;
+
+    }g_loops.END();
+
+}*/
+
+void next2(ConstBlockContext& it, bool has_left, int width_, int cur_y) {
+    it.cur += 1;
+    /* if (cur_y==0) {
+         it.above = it.cur + width_;
+
+     } else {
+         it.above = it.cur - width_;
+     }
+     ++it.num_nonzeros_here;
+     ++it.num_nonzeros_above;
+     if (!has_left) {
+         bool cur_row_first = (it.num_nonzeros_here < it.num_nonzeros_above);
+         if (cur_row_first) {
+             it.num_nonzeros_above -= width_;
+             it.num_nonzeros_above -= width_;
+         } else {
+             it.num_nonzeros_here -= width_;
+             it.num_nonzeros_here -= width_;
+         }
+     }*/
+}
+
+void pre_scan_tmp(ConstBlockContext context, uint8_t* num_nonzeros_7x7, uint8_t* eob_x, uint8_t* eob_y) {
+    *num_nonzeros_7x7 = 0; // context.num_nonzeros_here->num_nonzeros();
+    *eob_x = 0;
+    *eob_y = 0;
+    uint8_t num_nonzeros_left_7x7 = *num_nonzeros_7x7;
+    // for (unsigned int zz = 0; zz < 49 && num_nonzeros_left_7x7; ++zz) {
+    for (unsigned int zz = 0; zz < 49; ++zz) {
+        unsigned int coord = unzigzag49[zz];
+        unsigned int b_x = (coord & 7);
+        unsigned int b_y = coord >> 3;
+
+        int16_t coef = context.here().coef.at(zz + AlignedBlock::AC_7x7_INDEX);
+        if (coef != 0) {
+            if (b_x > 0 && b_y > 0) {
+                (*num_nonzeros_7x7)++;
+            }
+
+            if (b_x > *eob_x) *eob_x = b_x;
+            if (b_y > *eob_y) *eob_y = b_y;
+        }
+    }
+}
+void pre_scan_tmp(AlignedBlock* context, uint8_t* num_nonzeros_7x7, uint8_t* eob_x, uint8_t* eob_y) {
+    *num_nonzeros_7x7 = 0; // context.num_nonzeros_here->num_nonzeros();
+    *eob_x = 0;
+    *eob_y = 0;
+    uint8_t num_nonzeros_left_7x7 = *num_nonzeros_7x7;
+    // for (unsigned int zz = 0; zz < 49 && num_nonzeros_left_7x7; ++zz) {
+    for (unsigned int zz = 0; zz < 49; ++zz) {
+        unsigned int coord = unzigzag49[zz];
+        unsigned int b_x = (coord & 7);
+        unsigned int b_y = coord >> 3;
+
+        int16_t coef = context->coef.at(zz + AlignedBlock::AC_7x7_INDEX);
+        if (coef != 0) {
+            if (b_x > 0 && b_y > 0) {
+                (*num_nonzeros_7x7)++;
+            }
+
+            if (b_x > *eob_x) *eob_x = b_x;
+            if (b_y > *eob_y) *eob_y = b_y;
+        }
+    }
+}
+
+void pre_scan_tmp(int16_t* context, uint8_t* num_nonzeros_7x7, uint8_t* eob_x, uint8_t* eob_y) {
+    *num_nonzeros_7x7 = 0; // context.num_nonzeros_here->num_nonzeros();
+    *eob_x = 0;
+    *eob_y = 0;
+    uint8_t num_nonzeros_left_7x7 = *num_nonzeros_7x7;
+    // for (unsigned int zz = 0; zz < 49 && num_nonzeros_left_7x7; ++zz) {
+    for (unsigned int zz = 0; zz < 49; ++zz) {
+        unsigned int coord = unzigzag49[zz];
+        unsigned int b_x = (coord & 7);
+        unsigned int b_y = coord >> 3;
+
+        int16_t coef = context[zz + AlignedBlock::AC_7x7_INDEX];
+        if (coef != 0) {
+            if (b_x > 0 && b_y > 0) {
+                (*num_nonzeros_7x7)++;
+            }
+
+            if (b_x > *eob_x) *eob_x = b_x;
+            if (b_y > *eob_y) *eob_y = b_y;
+        }
+    }
+}
+void pre_scan_tmp_77(ConstBlockContext context, uint8_t* num_nonzeros_7x7, uint8_t* eob_x, uint8_t* eob_y) {
+    *num_nonzeros_7x7 = context.num_nonzeros_here->num_nonzeros();
+    *eob_x = 0;
+    *eob_y = 0;
+    uint8_t num_nonzeros_left_7x7 = *num_nonzeros_7x7;
+    for (unsigned int zz = 0; zz < 49 && num_nonzeros_left_7x7; ++zz) {
+        unsigned int coord = unzigzag49[zz];
+        unsigned int b_x = (coord & 7);
+        unsigned int b_y = coord >> 3;
+
+        int16_t coef = context.here().coef.at(zz + AlignedBlock::AC_7x7_INDEX);
+        if (coef != 0) {
+            if (b_x > *eob_x) *eob_x = b_x;
+            if (b_y > *eob_y) *eob_y = b_y;
+        }
+    }
+}
+void tmp_cp_AlignedBlock(hls_AlignedBlock& des, const AlignedBlock& src) {
+    for (int i = 0; i < 64; i++) {
+        des.coef[i] = src.coef.at(i);
+    }
+}
+void tmp_cp_AlignedBlock(int16_t des[64], AlignedBlock* src) {
+    for (int i = 0; i < 64; i++) {
+        des[i] = src->coef.at(i);
+    }
+}
+void tmp_cp_AlignedBlock(int16_t des[64], const AlignedBlock& src) {
+    for (int i = 0; i < 64; i++) {
+        des[i] = src.coef.at(i);
+    }
+}
+void tmp_cp_Block(int16_t des[64], int16_t src[64]) {
+    for (int i = 0; i < 64; i++) {
+        des[i] = src[i];
+    }
+}
+
+void tmp_cp_AlignedBlock(hls_AlignedBlock& des, hls_AlignedBlock& src) {
+    for (int i = 0; i < 64; i++) {
+        des.coef[i] = src.coef[i];
+    }
+}
+
+#if 0
+//hls::stream<tmp_struct> str_77;
+//hls::stream<tmp_struct> str_edges;
+//hls::stream<tmp_struct> str_dc;
+void tmp_cp_AXI(int16_t des[64], WD_AXI* src)
+{
+	int16_t* pc = (int16_t*)src;
+	for(int i=0; i<64; i++){
+		des[i] = pc[i];
+		//des[i] = src[i/NUM_COEF_AXI].data[i%NUM_COEF_AXI];
+	}
+}
+void Sim_DDr_InitImage(WD_AXI* axi_coeff, const UncompressedComponents * const colldata,
+		//Sirikata::Array1d<std::vector<NeighborSummary>, (uint32_t)ColorChannel::NumBlockTypes> num_nonzeros[1],
+		uint16_t        axi_width               [MAX_NUM_COLOR],//colldata->block_width(i);
+		uint16_t        axi_height              [MAX_NUM_COLOR],//colldata->block_width(i);
+		uint8_t         axi_map_row2cmp         [4], //     AXI                   2,1,0,0 2,1,0
+		uint16_t        axi_mcuv,
+		uint8_t         axi_num_cmp_mcu
+		)
+{
+    int16_t coef_here[64];
+
+    AlignedBlock* state3[MAX_NUM_COLOR] ={
+    		colldata->full_component_nosync(0).image_,
+			colldata->full_component_nosync(1).image_,
+			colldata->full_component_nosync(2).image_
+    };
+    WD_AXI* p_axi = axi_coeff;
+    for( int i_mcuv = 0; i_mcuv < axi_mcuv; i_mcuv++){
+    	for(int idx_cmp = 0; idx_cmp < axi_num_cmp_mcu ; idx_cmp++){
+            uint8_t id_cmp        = axi_map_row2cmp[idx_cmp];
+            uint16_t block_width  = axi_width [id_cmp];
+            uint16_t block_height = axi_height[id_cmp];
+            for ( int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++ ) {
+            	int16_t* pc = (int16_t*)p_axi;
+                tmp_cp_AlignedBlock(pc, state3[id_cmp]);
+                p_axi+= (64/NUM_COEF_AXI);
+                state3[id_cmp]++;
+            }
+    	}
+    }
+}
+
+
+
+void kernel_run(
+    //input
+    WD_AXI          axi_coeff               [MAX_COEF_AXI],
+    uint16_t        axi_width               [MAX_NUM_COLOR],//colldata->block_width(i);
+    uint16_t        axi_height              [MAX_NUM_COLOR],//colldata->block_width(i);
+    uint8_t         axi_map_row2cmp         [4], //     AXI                   2,1,0,0 2,1,0
+    uint8_t         min_nois_thld_x         [MAX_NUM_COLOR][64],
+    uint8_t         min_nois_thld_y         [MAX_NUM_COLOR][64],
+    uint8_t         q_tables                [MAX_NUM_COLOR][8][8],//[64],
+    int32_t         idct_q_table_x          [MAX_NUM_COLOR][8][8],
+    int32_t         idct_q_table_y          [MAX_NUM_COLOR][8][8],
+    int32_t         idct_q_table_l          [MAX_NUM_COLOR][8][8],
+
+    uint16_t        axi_mcuv,
+    uint8_t         axi_num_cmp_mcu,
+    uint8_t         axi_num_cmp,
+    //tmp output
+    uint8_t         axi_res                 [MAX_NUM_PIX],
+    struct_arith    &axi_arith
+){
+
+    bool is_top_row[MAX_NUM_COLOR]= {true, true, true};
+    uint16_t cur_y_cmp[MAX_NUM_COLOR] = {0,0,0};
+
+    uint8_t array_num_nonzeros_7x7_above[MAX_NUM_COLOR][MAX_NUM_BLOCK88_W];
+    int16_t array_coef_above_77         [MAX_NUM_COLOR][MAX_NUM_BLOCK88_W][64];
+    int16_t array_coef_above_edges      [MAX_NUM_COLOR][MAX_NUM_BLOCK88_W][64];
+    uint16_t array_edge_above           [MAX_NUM_COLOR][MAX_NUM_BLOCK88_W][8];//2 uram used;
+
+    WD_AXI*  pcoef = axi_coeff;
+
+    hls::stream<bool>    strm_bit;
+    hls::stream<uint8_t> strm_prob;
+    hls::stream<bool>    strm_e;
+    hls::stream<uint8_t> strm_tab_dbg;
+    unsigned char    range = 128;//boolwriter.range;
+    int              count = -24;//boolwriter.count;
+    unsigned int     value = 0;//boolwriter.lowvalue;
+    unsigned char pre_byte = 0;//boolwriter.pre_byte;
+    unsigned short     run = 0;//boolwriter.run;
+    bool           isFirst = 1;//boolwriter.isFirst;
+    unsigned int       pos = 0;//boolwriter.pos;
+    unsigned int      pos2 = 0;
+    
+    for( int i_mcuv = 0; i_mcuv < axi_mcuv; i_mcuv++){
+        if(i_mcuv==axi_mcuv-1)
+                    i_mcuv=axi_mcuv-1;
+        for(int idx_cmp = 0; idx_cmp < axi_num_cmp_mcu ; idx_cmp++){
+            uint8_t id_cmp        = axi_map_row2cmp[idx_cmp];
+            uint16_t block_width  = axi_width [id_cmp];
+            uint16_t block_height = axi_height[id_cmp];
+            uint16_t cur_y        = cur_y_cmp [id_cmp];
+            bool is_top_row_cmp   = is_top_row[id_cmp];
+
+            hls::stream<coeff_64> str_tmp_coeff_here_edges;
+            hls::stream<coeff_64> str_tmp_coeff_here_dc;
+
+            ///////////////////////////////////////////////////////////////////////////
+            // PRE   //////////////////////////////////////////////////////////////////
+    		hls::stream<ap_int<11> >  coef[8];
+
+            hls::stream<ap_int<11> > strm_coef[8];
+            hls::stream<ap_int<11> > strm_coef_7x7("coef_7x7");
+            hls::stream<ap_int<11> > strm_coef_lft("coef_lft");
+            hls::stream<ap_int<11> > strm_coef_abv("coef_abv");
+            hls::stream<ap_int<11> > strm_coef_abv_lft("coef_abv_lft");
+            hls::stream<ap_int<11> > strm_coef_h[8];
+#pragma HLS stream depth = 64 variable = strm_coef_h
+            hls::stream<ap_int<11> > strm_coef_above_h[8];
+#pragma HLS stream depth = 64 variable = strm_coef_above_h
+            hls::stream<bool>        strm_has_left_h;
+#pragma HLS stream depth = 64 variable = strm_has_left_h
+            hls::stream<bool>        strm_coef_e_h;
+#pragma HLS stream depth = 64 variable = strm_coef_e_h
+
+            hls::stream<ap_int<11> > strm_coef_v[8];
+#pragma HLS stream depth = 64 variable = strm_coef_v
+            hls::stream<ap_int<11> > strm_coef_left_v[8];
+#pragma HLS stream depth = 64 variable = strm_coef_left_v
+            hls::stream<bool>        strm_has_left_v;
+#pragma HLS stream depth = 64 variable = strm_has_left_v
+            hls::stream<bool>        strm_coef_e_v;
+#pragma HLS stream depth = 64 variable = strm_coef_e_v
+            hls::stream<ap_uint<6> > strm_non_zero_cnt("non_zero_cnt");
+            hls::stream<ap_uint<6> > strm_non_zero_cnt_lft("non_zero_cnt_lft");
+            hls::stream<ap_uint<6> > strm_non_zero_cnt_abv("non_zero_cnt_abv");
+            hls::stream<ap_uint<6> > strm_non_zero_7x7("non_zero_7x7");
+            hls::stream<ap_uint<6> > strm_non_zero_h("non_zero_h");
+            hls::stream<ap_uint<6> > strm_non_zero_v("non_zero_v");
+//                hls::stream<ap_uint<3> > strm_coef_cnt_h("coef_cnt_h");
+            hls::stream<ap_uint<3> > strm_coef_cnt_exp_h;
+            hls::stream<ap_uint<3> > strm_coef_cnt_sign_h;
+            hls::stream<ap_uint<3> > strm_coef_cnt_nois_h;
+//                hls::stream<ap_uint<3> > strm_coef_cnt_v("coef_cnt_v");
+            hls::stream<ap_uint<3> > strm_coef_cnt_exp_v;
+            hls::stream<ap_uint<3> > strm_coef_cnt_sign_v;
+            hls::stream<ap_uint<3> > strm_coef_cnt_nois_v;
+            hls::stream<ap_uint<3> > strm_coef_cnt_h_len("coef_cnt_h_len");
+            hls::stream<ap_uint<3> > strm_coef_cnt_v_len("coef_cnt_v_len");
+            hls::stream<ap_uint<3> > strm_lane_h;
+            hls::stream<ap_uint<3> > strm_lane_v;
+
+            hls::stream<ap_uint<3> > strm_eob_x("eob_x");
+            hls::stream<ap_uint<3> > strm_eob_y("eob_y");
+
+            // For DC
+            hls::stream< coef_t> str_rast8[8];
+            hls::stream< coef_t> str_dc1;
+            hls::stream< coef_t> str_dc2;
+
+            //FROM DDR
+            //===================================================================
+            for ( int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++ ) {
+                // Read from DDR
+                int16_t coef_here[64];
+                tmp_cp_AXI(coef_here, pcoef);
+                pcoef += STRIP_COEFF_AXI;
+
+                // For 77
+                for(int i=0;i<8;i++){
+                    for(int j=0;j<8;j++){
+                    	coef[j].write(coef_here[8*i+j]);
+                //        std::cout<<coef_here[8*i+j]<<" ";
+                    }
+                //    std::cout<<std::endl;
+                }
+                //std::cout<<std::endl;
+
+            }
+            //==================================================================
+
+            ap_uint<32> cnt=0;
+            ap_int<11> coef_reg[8];
+#pragma HLS array_partition variable = coef_reg complete dim = 0
+
+            while(cnt<block_width*8){
+#pragma HLS pipeline II = 1
+
+            	coef_reg[0]=coef[0].read();
+            	coef_reg[1]=coef[1].read();
+            	coef_reg[2]=coef[2].read();
+            	coef_reg[3]=coef[3].read();
+            	coef_reg[4]=coef[4].read();
+            	coef_reg[5]=coef[5].read();
+            	coef_reg[6]=coef[6].read();
+            	coef_reg[7]=coef[7].read();
+
+            	strm_coef[0].write(coef_reg[0]);
+            	strm_coef[1].write(coef_reg[1]);
+            	strm_coef[2].write(coef_reg[2]);
+            	strm_coef[3].write(coef_reg[3]);
+            	strm_coef[4].write(coef_reg[4]);
+            	strm_coef[5].write(coef_reg[5]);
+            	strm_coef[6].write(coef_reg[6]);
+            	strm_coef[7].write(coef_reg[7]);
+
+            	str_rast8[0].write(coef_reg[0]);
+            	str_rast8[1].write(coef_reg[1]);
+            	str_rast8[2].write(coef_reg[2]);
+            	str_rast8[3].write(coef_reg[3]);
+            	str_rast8[4].write(coef_reg[4]);
+            	str_rast8[5].write(coef_reg[5]);
+            	str_rast8[6].write(coef_reg[6]);
+            	str_rast8[7].write(coef_reg[7]);
+
+            	if(cnt(2,0)==0){
+            		str_dc1.write(coef_reg[0]);
+            	}
+        		cnt++;
+            }
+
+            preprocess(
+                block_width,
+                id_cmp,
+                is_top_row_cmp,
+                strm_coef,
+                strm_coef_7x7,
+                strm_coef_lft,
+                strm_coef_abv,
+                strm_coef_abv_lft,
+
+                strm_coef_h,
+                strm_coef_above_h,
+                strm_has_left_h,
+                strm_coef_e_h,
+                strm_coef_v,
+                strm_coef_left_v,
+                strm_has_left_v,
+                strm_coef_e_v,
+                strm_non_zero_cnt,
+                strm_non_zero_cnt_lft,
+                strm_non_zero_cnt_abv,
+                strm_non_zero_7x7,
+                strm_non_zero_h,
+                strm_coef_cnt_h_len,
+                strm_lane_h,
+                strm_coef_cnt_v_len,
+                strm_lane_v,
+                strm_eob_x,
+                strm_eob_y
+			);
+            /// PRE ////////////////////////////////////////////////////////////////////
+            ///////////////////////////////////////////////////////////////////////////
+
+
+            //////////////////////////////////////////////////////////////////////
+            //77//////////////////////////////////////////////////////////////////
+
+        	hls::stream<ap_uint<4>  > strm_sel_tab_77;
+        	hls::stream<bool>		  strm_cur_bit_77;
+        	hls::stream<bool>		  strm_e_77;
+        	hls::stream<ap_uint<16> > strm_addr1_77;
+        	hls::stream<ap_uint<16> > strm_addr2_77;
+        	hls::stream<ap_uint<16> > strm_addr3_77;
+        	hls::stream<ap_uint<16> > strm_addr4_77;
+
+            hls_serialize_tokens_77(
+                block_width,
+                !is_top_row_cmp,
+                
+                strm_non_zero_7x7,
+                strm_coef_7x7,
+                strm_coef_abv,
+                strm_coef_lft,
+                strm_coef_abv_lft,
+
+                strm_non_zero_cnt,
+                strm_non_zero_cnt_abv,
+                strm_non_zero_cnt_lft,
+        
+				strm_sel_tab_77,
+				strm_cur_bit_77,
+				strm_e_77,
+				strm_addr1_77,
+				strm_addr2_77,
+				strm_addr3_77,
+				strm_addr4_77
+            );
+                
+
+
+            //77  /////////////////////////////////////////////////////////////////////
+            ///////////////////////////////////////////////////////////////////////////
+
+
+            ////////////////////////////////////////////////////////////////////////
+            //EDGES/////////////////////////////////////////////////////////////////
+
+            hls::stream<bool> strm_cur_bit_h("edge h");
+            hls::stream<ap_uint<6> > strm_nz_77_h;
+            hls::stream<ap_uint<3> > strm_so_far_h;
+
+            hls::stream<ap_uint<4> > strm_length_exp_h("h_length_exp");
+            hls::stream<ap_uint<4> > strm_length_sign_h("h_length_sign");
+            hls::stream<ap_uint<4> > strm_length_nois_h("h_length_nois");
+
+            hls::stream<bool> strm_cur_bit_exp_h("edge h exp");
+            hls::stream<ap_uint<3> > strm_num_nonzero_bin_h;
+            hls::stream<ap_uint<4> > strm_best_prior_exp_h("h_bsr_exp");
+
+            hls::stream<bool> strm_cur_bit_sign_h("edge h sign");
+            hls::stream<ap_uint<2> > strm_tri_sign_h;
+            hls::stream<ap_uint<4> > strm_best_prior_sign_h("h_bsr_sign");
+
+            hls::stream<bool> strm_cur_bit_nois_h("edge h nois");
+            hls::stream<ap_uint<8> > strm_ctx_nois_h;
+            hls::stream<ap_uint<8> > strm_min_nois_h;
+            hls::stream<ap_uint<8> > strm_so_far_nois_h;
+            hls::stream<ap_uint<6> > strm_coord_nois_h;
+
+            hls::stream<bool> strm_cur_bit_v("edge v");
+            hls::stream<ap_uint<6> > strm_nz_77_v;
+            hls::stream<ap_uint<3> > strm_so_far_v;
+
+            hls::stream<ap_uint<4> > strm_length_exp_v("v_length_exp");
+            hls::stream<ap_uint<4> > strm_length_sign_v("v_length_sign");
+            hls::stream<ap_uint<4> > strm_length_nois_v("v_length_nois");
+
+            hls::stream<bool> strm_cur_bit_exp_v("edge v exp");
+            hls::stream<ap_uint<3> > strm_num_nonzero_bin_v;
+            hls::stream<ap_uint<4> > strm_best_prior_exp_v("v_bsr_exp");
+
+            hls::stream<bool> strm_cur_bit_sign_v("edge v_sign");
+            hls::stream<ap_uint<2> > strm_tri_sign_v;
+            hls::stream<ap_uint<4> > strm_best_prior_sign_v("v_bsr_sign");
+
+            hls::stream<bool> strm_cur_bit_nois_v("edge v_nois");
+            hls::stream<ap_uint<8> > strm_ctx_nois_v;
+            hls::stream<ap_uint<8> > strm_min_nois_v;
+            hls::stream<ap_uint<8> > strm_so_far_nois_v;
+            hls::stream<ap_uint<6> > strm_coord_nois_v;
+
+        	hls::stream<ap_uint<4>  > strm_sel_tab_edge("sel_tab");
+        	hls::stream<bool>		  strm_cur_bit_edge;
+        	hls::stream<bool>		  strm_e_edge;
+        	hls::stream<ap_uint<16> > strm_addr1_edge("addr1");
+        	hls::stream<ap_uint<16> > strm_addr2_edge;
+        	hls::stream<ap_uint<16> > strm_addr3_edge;
+        	hls::stream<ap_uint<16> > strm_addr4_edge;
+
+
+            int16_t coef_left_edges[64];
+            bool left        = false;
+            bool above       = !is_top_row_cmp;
+            bool above_right = false;
+
+            //serializing
+            hls_serialize_tokens_edges(
+                block_width,
+                id_cmp!=0,
+                min_nois_thld_x,
+                min_nois_thld_y,
+                left,
+                !is_top_row_cmp,
+                above_right,
+
+                strm_non_zero_h,
+                strm_coef_cnt_h_len,
+                strm_lane_h,
+
+                strm_coef_cnt_v_len,
+                strm_lane_v,
+
+                strm_eob_x,
+                strm_eob_y,
+
+                idct_q_table_x,
+                idct_q_table_y,
+
+                strm_coef_h,
+                strm_coef_above_h,
+                strm_has_left_h,
+                strm_coef_e_h,
+                strm_coef_v,
+                strm_coef_left_v,
+                strm_has_left_v,
+                strm_coef_e_v,
+
+				strm_sel_tab_edge,
+				strm_cur_bit_edge,
+				strm_e_edge,
+				strm_addr1_edge,
+				strm_addr2_edge,
+				strm_addr3_edge,
+				strm_addr4_edge
+            );
+
+
+
+            //EDGES  //////////////////////////////////////////////////////////////////
+            ///////////////////////////////////////////////////////////////////////////
+
+            ///////////////////////////////////////////////////////////////////////////
+            //DC  /////////////////////////////////////////////////////////////////////
+
+                uint8_t q0 = q_tables[id_cmp][0][0];
+                hls::stream<uint8_t> strm_length_dc_exp("dc_length_exp");
+                hls::stream<uint8_t> strm_length_dc_sign("dc_length_sign");
+                hls::stream<uint8_t> strm_length_dc_nois("dc_length_nois");
+
+                hls::stream<bool> strm_dc_cur_bit_exp;
+                hls::stream<ap_uint<4> > strm_dc_addr_1_exp;
+                hls::stream<ap_uint<5> > strm_dc_addr_2_exp;
+
+                hls::stream<bool> strm_dc_cur_bit_sign;
+                hls::stream<ap_uint<4> > strm_dc_addr_0_sign;
+                hls::stream<ap_uint<5> > strm_dc_addr_1_sign;
+
+                hls::stream<bool> strm_dc_cur_bit_nois;
+                hls::stream<ap_uint<5> > strm_dc_addr_0_nois;
+
+            	hls::stream<ap_uint<4>  > strm_sel_tab_dc;
+            	hls::stream<bool>		  strm_cur_bit_dc;
+            	hls::stream<bool>		  strm_e_dc;
+            	hls::stream<ap_uint<16> > strm_addr1_dc;
+            	hls::stream<ap_uint<16> > strm_addr2_dc;
+            	hls::stream<ap_uint<16> > strm_addr3_dc;
+
+                hls_serialize_tokens_dc(
+                    !is_top_row_cmp,
+                    id_cmp,
+                    block_width,
+                    q_tables,
+                    q0,
+
+                    str_rast8,
+                    str_dc1,
+
+					strm_sel_tab_dc,
+					strm_cur_bit_dc,
+					strm_e_dc,
+					strm_addr1_dc,
+					strm_addr2_dc,
+					strm_addr3_dc
+
+                );
+            //DC  /////////////////////////////////////////////////////////////////////
+            ///////////////////////////////////////////////////////////////////////////
+
+            hls::stream<bool>    strm_num_nonzeros_counts_7x7_bit;
+            hls::stream<uint8_t> strm_num_nonzeros_counts_7x7_prob;
+            hls::stream<bool>    strm_num_nonzeros_counts_7x7_e;
+
+            hls::stream<bool>    strm_exponent_counts_bit;
+            hls::stream<uint8_t> strm_exponent_counts_prob;
+            hls::stream<bool>    strm_exponent_counts_e;
+
+            hls::stream<bool>    strm_sign_counts_bit;
+            hls::stream<uint8_t> strm_sign_counts_prob;
+            hls::stream<bool>    strm_sign_counts_e;
+
+            hls::stream<bool>    strm_residual_noise_counts_bit;
+            hls::stream<uint8_t> strm_residual_noise_counts_prob;
+            hls::stream<bool>    strm_residual_noise_counts_e;
+            hls::stream<bool>    strm_block_e;
+
+            hls::stream<bool>    strm_num_nonzeros_counts_8x1_bit;
+            hls::stream<uint8_t> strm_num_nonzeros_counts_8x1_prob;
+            hls::stream<bool>    strm_num_nonzeros_counts_8x1_e;
+        
+            hls::stream<bool>    strm_num_nonzeros_counts_1x8_bit;
+            hls::stream<uint8_t> strm_num_nonzeros_counts_1x8_prob;
+            hls::stream<bool>    strm_num_nonzeros_counts_1x8_e;
+
+            hls::stream<bool>    strm_exponent_counts_x_bit;
+            hls::stream<uint8_t> strm_exponent_counts_x_prob;
+            hls::stream<bool>    strm_exponent_counts_x_e;
+
+            hls::stream<bool>    strm_exponent_counts_dc_bit;
+            hls::stream<uint8_t> strm_exponent_counts_dc_prob;
+            hls::stream<bool>    strm_exponent_counts_dc_e;
+
+            hls::stream<bool>    strm_residual_noise_counts_dc_bit;
+            hls::stream<uint8_t> strm_residual_noise_counts_dc_prob;
+            hls::stream<bool>    strm_residual_noise_counts_dc_e;
+
+            hls::stream<bool>    strm_7x7_bit;
+            hls::stream<uint8_t> strm_7x7_prob;
+            hls::stream<bool>    strm_7x7_e;
+
+            hls::stream<bool>    strm_edge_bit;
+            hls::stream<uint8_t> strm_edge_prob;
+            hls::stream<bool>    strm_edge_e;
+
+            hls::stream<bool>    strm_dc_bit;
+            hls::stream<uint8_t> strm_dc_prob;
+            hls::stream<bool>    strm_dc_e;
+/*
+            hls::stream<bool>    strm_bit;
+            hls::stream<uint8_t> strm_prob;
+            hls::stream<bool>    strm_e;
+            hls::stream<uint8_t> strm_tab_dbg;
+            */
+
+
+            ap_uint<1> ap_color = hls_color_index((int)id_cmp);  
+
+		    hls::stream<bool>    strm_bit_77;
+		    hls::stream<uint8_t> strm_prob_77;
+		    hls::stream<bool>    strm_e_7x7;
+		    hls::stream<uint8_t> strm_tab_dbg_77;
+
+			hls::stream<ap_uint<4>  > strm_sel_tab;
+			hls::stream<bool>		  strm_cur_bit("res_bit");
+			hls::stream<bool>		  strm_e_in("res_e");
+			hls::stream<ap_uint<16> > strm_addr1;
+			hls::stream<ap_uint<16> > strm_addr2;
+			hls::stream<ap_uint<16> > strm_addr3;
+			hls::stream<ap_uint<16> > strm_addr4;
+
+			collect(
+                block_width,
+
+				strm_sel_tab_77,
+				strm_cur_bit_77,
+				strm_e_77,
+				strm_addr1_77,
+				strm_addr2_77,
+				strm_addr3_77,
+				strm_addr4_77,
+
+				strm_sel_tab_edge,
+				strm_cur_bit_edge,
+				strm_e_edge,
+				strm_addr1_edge,
+				strm_addr2_edge,
+				strm_addr3_edge,
+				strm_addr4_edge,
+
+				strm_sel_tab_dc,
+				strm_cur_bit_dc,
+				strm_e_dc,
+				strm_addr1_dc,
+				strm_addr2_dc,
+				strm_addr3_dc,
+
+				strm_sel_tab,
+				strm_cur_bit,
+				strm_e_in,
+				strm_addr1,
+				strm_addr2,
+				strm_addr3,
+				strm_addr4
+
+            );
+
+
+
+			probability_look_up(
+				id_cmp!=0,
+
+				strm_sel_tab,
+				strm_cur_bit,
+				strm_e_in,
+				strm_addr1,
+				strm_addr2,
+				strm_addr3,
+				strm_addr4,
+
+				strm_bit,
+				strm_prob,
+				strm_e,
+				strm_tab_dbg
+			);
+            
+            hls::stream< bool >          strm_pos_o_e;
+            hls::stream< unsigned char > strm_pos_o_byte;
+            
+            vpx_enc_syn(
+                //Iteration for variable
+                &range,//,unsigned char*        range,
+                &count,//,int*                  cnt,
+                &value,//,unsigned int*         value,
+                &pre_byte,//,unsigned char*        pre_byte,
+                &run,//,unsigned short*       run,
+                &isFirst,//,bool*                 br_isFirst,
+                &pos,//,unsigned int*         pos ,
+                //input
+                strm_bit,//,hls::stream<bool>&    strm_bit,
+                strm_prob,//,hls::stream<uint8_t>& strm_prob,
+                strm_e,//,hls::stream<bool>&    strm_e_range,
+                strm_tab_dbg,//,hls::stream<uint8_t>& strm_tab_dbg,
+                //output
+                strm_pos_o_e,//,hls::stream<bool>&    strm_pos_o_e,
+                strm_pos_o_byte//,hls::stream< unsigned char >&strm_pos_o_byte
+            );
+        
+            //===================================================
+            //To DDR
+            bool     e_byte        = strm_pos_o_e.read();
+            while ( !e_byte){
+                    e_byte        = strm_pos_o_e.read();
+                    unsigned char byte = strm_pos_o_byte.read();
+                    axi_res[pos2++]   = byte;
+            }
+            //===================================================
+
+            is_top_row[id_cmp] = false;
+            //Updating current y points for all components
+            cur_y_cmp[id_cmp]++;
+            if(cur_y_cmp[0]>=axi_height[0])
+                break;
+        }//for(int idx_cmp = 0; idx_cmp < axi_num_cmp_mcu ; idx_cmp++)
+
+
+
+    }//("process_row_range_while");for( int i_mcuv = 0; i_mcuv < axi_mcuv; i_mcuv++)
+
+    axi_res[pos++] = pre_byte;
+	for(; run > 0; run--)
+		axi_res[pos++] = 0xff;
+
+	axi_arith.count=count;
+	axi_arith.value=value;
+	axi_arith.pre_byte = pre_byte;
+	axi_arith.run = run;
+	axi_arith.pos = pos;
+	axi_arith.range = range;
+	axi_arith.isFirst = isFirst;
+
+    ///////////////////////////////////////////////////////////////////////////
+    //AC  ////////////////////////////////////////////////////////////////////
+
+}
+#endif
+
+#if 0
+void tmp_Kernel_1(
+	    //input
+		WD_AXI          axi_coeff               [MAX_COEF_AXI],
+		uint16_t        axi_width               [MAX_NUM_COLOR],//colldata->block_width(i);
+		uint16_t        axi_height              [MAX_NUM_COLOR],//colldata->block_width(i);
+		uint8_t         axi_map_row2cmp         [4], //     AXI                   2,1,0,0 2,1,0
+		uint8_t         min_nois_thld_x         [MAX_NUM_COLOR][64],
+		uint8_t         min_nois_thld_y         [MAX_NUM_COLOR][64],
+        uint8_t         q_tables                [MAX_NUM_COLOR][8][8],//[64],
+		int32_t         idct_q_table_x          [MAX_NUM_COLOR][8][8],
+		int32_t         idct_q_table_y          [MAX_NUM_COLOR][8][8],
+		int32_t         idct_q_table_l          [MAX_NUM_COLOR][8][8],
+
+		uint16_t        axi_mcuv,
+		uint8_t         axi_num_cmp_mcu,
+		uint8_t         axi_num_cmp,
+		//tmp output
+        uint8_t         axi_res                 [MAX_NUM_PIX],
+        struct_arith    &axi_arith
+		)
+{
+#pragma HLS ARRAY_PARTITION variable = num_nonzeros_counts_7x7 block factor = 3 dim = 3
+#pragma HLS ARRAY_PARTITION variable = residual_noise_counts block factor = 4 dim = 2
+
+#pragma HLS ARRAY_PARTITION variable = exponent_counts complete dim = 2
+#pragma HLS RESOURCE variable = exponent_counts core = XPM_MEMORY uram
+
+#pragma HLS ARRAY_PARTITION variable = residual_noise_counts block factor = 4 dim = 2
+
+#pragma HLS ARRAY_PARTITION variable = residual_threshold_counts complete dim = 4
+#pragma HLS RESOURCE variable = residual_threshold_counts core = XPM_MEMORY uram
+
+#pragma HLS ARRAY_PARTITION variable = exponent_counts_x complete dim = 4
+#pragma HLS RESOURCE variable = exponent_counts_x core = XPM_MEMORY uram
+	// We have to initial table here. It will take tens of us.
+	//////////////////////////////////////////////////
+    ///// Kernel Begin
+    //////////////////////////////////////////////////
+    init_hlsmodel();
+    kernel_run(
+        //input
+        axi_coeff,
+        axi_width,//colldata->block_width(i);
+        axi_height,//colldata->block_width(i);
+        axi_map_row2cmp, //     AXI                   2,1,0,0 2,1,0
+        min_nois_thld_x,
+        min_nois_thld_y,
+        q_tables,//[64],
+        idct_q_table_x,
+        idct_q_table_y,
+        idct_q_table_l,
+
+        axi_mcuv,
+        axi_num_cmp_mcu,
+        axi_num_cmp,
+        //tmp output
+        axi_res,
+		axi_arith
+    );
+    ///////////////////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////
+    ///// Kernel END
+    //////////////////////////////////////////////////
+}
+#endif
+//#include "XAcc_jpegdecoder.hpp"
+void VP8ComponentEncoder::process_row_range2(unsigned int thread_id,
+                                             const UncompressedComponents* const colldata,
+                                             ResizableByteBuffer* stream,
+                                             BoolEncoder* bool_encoder) {}
+#if 0
+void process_row_range3(
+		WD_AXI*         axi_coeff,
+		LeptonInput&    lepp,
+        struct_arith&   axi_arith,
+		uint8_t*        axi_res
+       ) {
+
+    tmp_Kernel_1(
+        		axi_coeff,        //WD_AXI          axi_coeff               [MAX_NUM_COEF],
+				lepp.axi_width,        //uint16_t        axi_width               [MAX_NUM_COLOR],//colldata->block_width(i);
+				lepp.axi_height,       //uint16_t        axi_height              [MAX_NUM_COLOR],//colldata->block_width(i);
+				lepp.axi_map_row2cmp,  //uint8_t         axi_map_row2cmp         [4], //     AXI                   2,1,0,0 2,1,0
+				lepp.min_nois_thld_x,
+				lepp.min_nois_thld_y,
+				lepp.q_tables,         //uint16_t        q_table                 [MAX_NUM_COLOR][64],  //dqt[2][64]
+				lepp.idct_q_table_x,   //int32_t         idct_q_table            [MAX_NUM_COLOR][64],
+				lepp.idct_q_table_y,   //int32_t         idct_q_table_y          [MAX_NUM_COLOR][64],
+				lepp.idct_q_table_l,   //int32_t         idct_q_table_l          [MAX_NUM_COLOR][64],
+				lepp.axi_mcuv,         //uint16_t        axi_mcuv,
+				lepp.axi_num_cmp_mcu,  //uint8_t         axi_num_cmp_mcu,
+				lepp.axi_num_cmp,      //uint8_t         axi_num_cmp,
+				axi_res,
+                axi_arith
+        		);
+
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/lepton/vp8_encoder.hh b/codec/L2/demos/leptonEnc/host/lepton/vp8_encoder.hh
new file mode 100644
index 0000000000..f808417dfa
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/lepton/vp8_encoder.hh
@@ -0,0 +1,76 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#ifndef _VP8_COMPONENT_ENCODER_HH_
+#define _VP8_COMPONENT_ENCODER_HH_
+#include "base_coders.hh"
+#include "lepton_codec.hh"
+#include "model.hh"
+#include "../io/MuxReader.hh"
+#include "XAcc_77.hpp"
+#include "XAcc_edges.hpp"
+#include "XAcc_dc.hpp"
+#include "XAcc_model.hpp"
+#include "XAcc_arith.hpp"
+#include "XAcc_common.hpp"
+class BoolEncoder;
+class VP8ComponentEncoder : protected LeptonCodec, public BaseEncoder {
+    template <class Left, class Middle, class Right>
+    static void process_row(ProbabilityTablesBase& pt,
+                            Left& left_model,
+                            Middle& middle_model,
+                            Right& right_model,
+                            int curr_y,
+                            const UncompressedComponents* const colldata,
+                            Sirikata::Array1d<ConstBlockContext, (uint32_t)ColorChannel::NumBlockTypes>& context,
+                            BoolEncoder& bool_encoder);
+    void process_row_range(
+        unsigned int thread_id,
+        const UncompressedComponents* const colldata,
+        int min_y,
+        int max_y,
+        Sirikata::MuxReader::ResizableByteBuffer* stream,
+        BoolEncoder* bool_encoder,
+        Sirikata::Array1d<std::vector<NeighborSummary>, (uint32_t)ColorChannel::NumBlockTypes>* num_nonzeros);
+    void process_row_range2(unsigned int thread_id,
+                            const UncompressedComponents* const colldata,
+                            Sirikata::MuxReader::ResizableByteBuffer* stream,
+                            BoolEncoder* bool_encoder);
+    //  template<class Left, class Middle, class Right>
+    static void process_row2(ProbabilityTablesBase& pt,
+                             BlockType color,
+                             int block_width,
+                             bool isTopRow,
+                             bool isOnlyOne,
+                             // Left & left_model,
+                             // Middle& middle_model,
+                             // Right& right_model,
+                             int curr_y,
+                             const UncompressedComponents* const colldata,
+                             Sirikata::Array1d<ConstBlockContext, (uint32_t)ColorChannel::NumBlockTypes>& context,
+                             BoolEncoder& bool_encoder);
+
+   public:
+    VP8ComponentEncoder(bool do_threading);
+    void registerWorkers(GenericWorker* workers, unsigned int num_workers) {
+        always_assert(num_workers + 1 == NUM_THREADS);
+        this->LeptonCodec::registerWorkers(workers, num_workers);
+    }
+
+    CodingReturnValue vp8_full_encoder(const UncompressedComponents* const colldata,
+                                       IOUtil::FileWriter*,
+                                       const ThreadHandoff* selected_splits,
+                                       unsigned int num_selected_splits);
+    CodingReturnValue vp8_full_encoder(const UncompressedComponents* const colldata,
+                                       IOUtil::FileWriter* str_out,
+                                       const ThreadHandoff* selected_splits,
+                                       unsigned int num_selected_splits,
+                                       struct_arith& arith,
+                                       uint8_t* res);
+
+    CodingReturnValue encode_chunk(const UncompressedComponents* input,
+                                   IOUtil::FileWriter*,
+                                   const ThreadHandoff* selected_splits,
+                                   unsigned int num_selected_splits);
+    size_t get_decode_model_memory_usage() const { return model_memory_used(); }
+    size_t get_decode_model_worker_memory_usage() const { return model_worker_memory_used(); }
+};
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/other/loop_stt.cc b/codec/L2/demos/leptonEnc/host/other/loop_stt.cc
new file mode 100644
index 0000000000..2598603c34
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/other/loop_stt.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loop_stt.h"
+
+/*
+LoopNode::LoopNode(){
+                name[0]      = 0;
+                this->id     = 0;
+                AccessTimes  = 0;
+                InitCnt();
+                //TotalCount   = 0;
+                //AverageCount = 0;
+                //MaxCount     = 0;
+                //MinCoun      = 0;
+                num_son      = 0;
+                level        = 0;
+                this->p_father  = NULL;
+                this->hasFather = false;
+};
+LoopNode::LoopNode(char* nm){
+
+                strcpy(this->name, nm);
+                this->id     = 0;
+                AccessTimes  = 0;
+                InitCnt();
+                //TotalCount   = 0;
+                //AverageCount = 0;
+                //MaxCount     = 0;
+                //MinCoun      = 0;
+                num_son      = 0;
+                level        = 0;
+                this->p_father  = NULL;
+                this->hasFather = false;
+};
+*/
+LoopNode::LoopNode(char* nm, LoopNode* pf, int id_in) {
+    strcpy(this->name, nm);
+    this->id = id_in;
+    this->type = ANY;
+    AccessTimes = 0;
+    InitCnt();
+    TotalCount = 0;
+    AverageCount = 0;
+    MaxCount = 0;
+    MinCount = 0x7fffffff;
+    num_son = 0;
+    level = 0;
+    this->p_father = NULL;
+    this->hasFather = false;
+    this->AddFather(pf);
+};
+
+LoopNode::LoopNode(char* nm, LoopNode* pf, int id_in, NType tp) {
+    strcpy(this->name, nm);
+    this->id = id_in;
+    this->type = tp;
+    AccessTimes = 0;
+    InitCnt();
+    TotalCount = 0;
+    AverageCount = 0;
+    MaxCount = 0;
+    MinCount = 0x7fffffff;
+    num_son = 0;
+    level = 0;
+    this->p_father = NULL;
+    this->hasFather = false;
+    this->AddFather(pf);
+};
+
+void LoopNode::AddSon(LoopNode* pson) {
+    this->p_sons[num_son] = pson;
+    this->num_son++;
+};
+
+void LoopNode::AddFather(LoopNode* pf) {
+    this->p_father = pf;
+    this->hasFather = true;
+    if (pf != 0) {
+        this->level = pf->level + 1;
+        this->id_son = pf->num_son;
+        sprintf(fname, "%s-%s%d", pf->fname, Prefix_str[this->type], this->id_son);
+    } else {
+        this->level = 0;
+        this->id_son = 0;
+        sprintf(fname, "%s%d", Prefix_str[this->type], this->id_son);
+    }
+};
+
+LoopNodeFactory g_loops;
diff --git a/codec/L2/demos/leptonEnc/host/other/loop_stt.h b/codec/L2/demos/leptonEnc/host/other/loop_stt.h
new file mode 100644
index 0000000000..df057afd2c
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/other/loop_stt.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// loop_stt.h
+#ifndef _LOOP_STT_H_
+#define _LOOP_STT_H_
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_ACCESS (65536)
+#define ENSTT
+#define STTSTART(a) g_loops.START(a, ANY)
+#define STTFOR(a) g_loops.START(a, FOR)
+#define STTWHILE(a) g_loops.START(a, WHILE)
+#define STTPROC(a) g_loops.START(a, PROC)
+#define STTBRCH(a) g_loops.START(a, BRCH)
+#define STTCNT g_loops.CNT()
+#define STTEND g_loops.END()
+
+//#define STTCNT  g_loops.CntCur();
+//#define STTEND  g_loops.UpLevel();
+enum NType { ANY = 0, FOR, WHILE, PROC, BRCH };
+static char Prefix[5] = {'A', 'L', 'l', 'P', 'B'};
+static char* Prefix_str[5] = {"A", "L", "l", "P", "B"};
+class LoopNode {
+   public:
+    char name[256];
+    char fname[256];
+    int id;
+    int id_son;
+    NType type;
+    //
+    int AccessTimes; // Can be too bit to be stored by static array OnceCount
+    int OnceCount[MAX_ACCESS];
+    int TotalCount;
+    int CurrentCount;
+    int AverageCount;
+    int MaxCount;
+    int MinCount;
+    //
+    int level;
+    bool hasFather;
+    LoopNode* p_father;
+    int num_son;
+    LoopNode* p_sons[512];
+
+   public:
+    // LoopNode();
+    // LoopNode( char* name);
+    LoopNode(char* nm, LoopNode* pf, int id_in);
+    LoopNode(char* nm, LoopNode* pf, int id_in, NType tp);
+    void InitCnt() {
+        for (int i = 0; i < MAX_ACCESS; i++) OnceCount[i] = 0;
+    };
+    void AddSon(LoopNode* pson);
+    void AddFather(LoopNode* pf);
+    int CntNode() {
+        // TotalCount++;
+        return CurrentCount++;
+        // return (this->OnceCount[this->AccessTimes-1]++);
+    }
+    LoopNode* FindSon(char* nm) {
+        for (int i = 0; i < this->num_son; i++) {
+            if (strcmp(this->p_sons[i]->name, nm) == 0) return this->p_sons[i];
+        }
+        return 0;
+    } // FindSon
+    int GetTotalCount() { return TotalCount; }
+    int GetAverageCount() { return GetTotalCount() / AccessTimes; }
+    int GetMaxCount() { return MaxCount; }
+    int GetMinCount() { return MinCount; }
+    int UpdateRecord() {
+        TotalCount += CurrentCount;
+        if (this->MaxCount < CurrentCount) MaxCount = CurrentCount;
+        if (MinCount > CurrentCount) MinCount = CurrentCount;
+        return TotalCount;
+    }
+    void PrintSelf3(bool isMore) {
+        char head[128];
+        for (int i = 0; i < level; i++) head[i] = ' ';
+        head[level] = 0;
+        printf("%s NAME=%s; ", head, this->name);
+        printf("%s L=%d, T=%d, Cnt=%d, Ave=%d, Max=%d, Min=%d \n", head, level, this->AccessTimes,
+               this->GetTotalCount(), this->GetAverageCount(), this->GetMaxCount(), this->GetMinCount());
+        if (isMore) {
+            for (int i = 0; i < this->AccessTimes; i++) printf("[%d, %d]", AccessTimes, OnceCount[AccessTimes]);
+            printf("\n");
+        }
+    } // PrintSelf
+    void PrintSelf(bool isMore, int depth) {
+        char tail[128] = "";
+        for (int i = 0; i < (depth - level); i++) strcat(tail, "   ");
+        printf("%s%s", this->fname, tail);
+        printf(" , %s, Lvl=%d, Accss=%9d, AllCnt=%9d, Ave=%4d, Max=%4d, Min=%4d,", Prefix_str[this->type], level,
+               this->AccessTimes, this->GetTotalCount(), this->GetAverageCount(), this->GetMaxCount(),
+               this->GetMinCount());
+        printf(" NAME=%s \n", this->name);
+        if (isMore) {
+            for (int i = 0; i < this->AccessTimes; i++) printf("[%d, %d]", AccessTimes, OnceCount[AccessTimes]);
+            printf("\n");
+        }
+    } // PrintSelf2
+};
+
+class LoopNodeFactory {
+   public:
+    int num_node;
+    int depth;
+    int depth_max;
+    LoopNode* p_top;
+    LoopNode* p_cur;
+    LoopNodeFactory() {
+        num_node = 0;
+        depth = 0;
+        depth_max = 0;
+        p_top = 0;
+        p_cur = 0;
+    }
+    LoopNode* DownLevel(char* nm, NType tp) {
+        if (num_node == 0) {
+            p_top = new LoopNode(nm, 0, num_node, tp);
+            p_cur = p_top;
+            num_node++;
+        } else {
+            LoopNode* aSon = p_cur->FindSon(nm);
+            if (aSon == 0) {
+                LoopNode* p_new = new LoopNode(nm, p_cur, num_node, tp);
+                p_cur->AddSon(p_new);
+                p_cur = p_new;
+                num_node++;
+            } else {
+                p_cur = aSon;
+            }
+        }
+        p_cur->AccessTimes++;
+        p_cur->CurrentCount = 0;
+        this->depth++;
+        if (depth_max < depth) depth_max = depth;
+        return p_cur;
+    }
+    LoopNode* START(char* nm, NType tp) { return DownLevel(nm, tp); }
+    LoopNode* UpLevel() {
+        p_cur->UpdateRecord();
+        if (depth == 0)
+            return p_top;
+        else
+            p_cur = p_cur->p_father;
+        this->depth--;
+        return p_cur;
+    }
+    LoopNode* END() { return UpLevel(); }
+    int CntCur() { return p_cur->CntNode(); }
+    int CNT() { return CntCur(); }
+    void PrintTree(LoopNode* p_f, bool mode) {
+        p_f->PrintSelf(mode, this->depth_max);
+        if (p_f->num_son == 0)
+            return;
+        else {
+            for (int i = 0; i < p_f->num_son; i++) PrintTree(p_f->p_sons[i], mode);
+        }
+    }
+    void PrintTree() { PrintTree(p_top, false); };
+};
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/other/version.hh b/codec/L2/demos/leptonEnc/host/other/version.hh
new file mode 100644
index 0000000000..6447b8cf66
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/other/version.hh
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define GIT_REVISION ""
+#define SKIP_VALIDATION
diff --git a/codec/L2/demos/leptonEnc/host/vp8/decoder/VPX_AUTHORS b/codec/L2/demos/leptonEnc/host/vp8/decoder/VPX_AUTHORS
new file mode 100644
index 0000000000..883fdc1da8
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/decoder/VPX_AUTHORS
@@ -0,0 +1,131 @@
+Aaron Watry <awatry@gmail.com>
+Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
+Adam Xu <adam@xuyaowu.com>
+Adrian Grange <agrange@google.com>
+Aℓex Converse <aconverse@google.com>
+Ahmad Sharif <asharif@google.com>
+Alexander Voronov <avoronov@graphics.cs.msu.ru>
+Alexis Ballier <aballier@gentoo.org>
+Alok Ahuja <waveletcoeff@gmail.com>
+Alpha Lam <hclam@google.com>
+A.Mahfoodh <ab.mahfoodh@gmail.com>
+Ami Fischman <fischman@chromium.org>
+Andoni Morales Alastruey <ylatuya@gmail.com>
+Andres Mejia <mcitadel@gmail.com>
+Andrew Russell <anrussell@google.com>
+Angie Chiang <angiebird@google.com>
+Aron Rosenberg <arosenberg@logitech.com>
+Attila Nagy <attilanagy@google.com>
+Brion Vibber <bvibber@wikimedia.org>
+changjun.yang <changjun.yang@intel.com>
+Charles 'Buck' Krasic <ckrasic@google.com>
+chm <chm@rock-chips.com>
+Christian Duvivier <cduvivier@google.com>
+Daniel Kang <ddkang@google.com>
+Deb Mukherjee <debargha@google.com>
+Dim Temp <dimtemp0@gmail.com>
+Dmitry Kovalev <dkovalev@google.com>
+Dragan Mrdjan <dmrdjan@mips.com>
+Ed Baker <edward.baker@intel.com>
+Ehsan Akhgari <ehsan.akhgari@gmail.com>
+Erik Niemeyer <erik.a.niemeyer@intel.com>
+Fabio Pedretti <fabio.ped@libero.it>
+Frank Galligan <fgalligan@google.com>
+Fredrik Söderquist <fs@opera.com>
+Fritz Koenig <frkoenig@google.com>
+Gaute Strokkenes <gaute.strokkenes@broadcom.com>
+Geza Lore <gezalore@gmail.com>
+Ghislain MARY <ghislainmary2@gmail.com>
+Giuseppe Scrivano <gscrivano@gnu.org>
+Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
+Guillaume Martres <gmartres@google.com>
+Guillermo Ballester Valor <gbvalor@gmail.com>
+Hangyu Kuang <hkuang@google.com>
+Hanno Böck <hanno@hboeck.de>
+Henrik Lundin <hlundin@google.com>
+Hui Su <huisu@google.com>
+Ivan Maltz <ivanmaltz@google.com>
+Jacek Caban <cjacek@gmail.com>
+Jacky Chen <jackychen@google.com>
+James Berry <jamesberry@google.com>
+James Yu <james.yu@linaro.org>
+James Zern <jzern@google.com>
+Jan Gerber <j@mailb.org>
+Jan Kratochvil <jan.kratochvil@redhat.com>
+Janne Salonen <jsalonen@google.com>
+Jeff Faust <jfaust@google.com>
+Jeff Muizelaar <jmuizelaar@mozilla.com>
+Jeff Petkau <jpet@chromium.org>
+Jia Jia <jia.jia@linaro.org>
+Jim Bankoski <jimbankoski@google.com>
+Jingning Han <jingning@google.com>
+Joey Parrish <joeyparrish@google.com>
+Johann Koenig <johannkoenig@google.com>
+John Koleszar <jkoleszar@google.com>
+Johnny Klonaris <google@jawknee.com>
+John Stark <jhnstrk@gmail.com>
+Joshua Bleecher Snyder <josh@treelinelabs.com>
+Joshua Litt <joshualitt@google.com>
+Julia Robson <juliamrobson@gmail.com>
+Justin Clift <justin@salasaga.org>
+Justin Lebar <justin.lebar@gmail.com>
+KO Myung-Hun <komh@chollian.net>
+Lawrence Velázquez <larryv@macports.org>
+Lou Quillio <louquillio@google.com>
+Luca Barbato <lu_zero@gentoo.org>
+Makoto Kato <makoto.kt@gmail.com>
+Mans Rullgard <mans@mansr.com>
+Marco Paniconi <marpan@google.com>
+Mark Mentovai <mark@chromium.org>
+Martin Ettl <ettl.martin78@googlemail.com>
+Martin Storsjo <martin@martin.st>
+Matthew Heaney <matthewjheaney@chromium.org>
+Michael Kohler <michaelkohler@live.com>
+Mike Frysinger <vapier@chromium.org>
+Mike Hommey <mhommey@mozilla.com>
+Mikhal Shemer <mikhal@google.com>
+Minghai Shang <minghai@google.com>
+Morton Jonuschat <yabawock@gmail.com>
+Nico Weber <thakis@chromium.org>
+Parag Salasakar <img.mips1@gmail.com>
+Pascal Massimino <pascal.massimino@gmail.com>
+Patrik Westin <patrik.westin@gmail.com>
+Paul Wilkins <paulwilkins@google.com>
+Pavol Rusnak <stick@gk2.sk>
+Paweł Hajdan <phajdan@google.com>
+Pengchong Jin <pengchong@google.com>
+Peter de Rivaz <peter.derivaz@gmail.com>
+Philip Jägenstedt <philipj@opera.com>
+Priit Laes <plaes@plaes.org>
+Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
+Rafaël Carré <funman@videolan.org>
+Ralph Giles <giles@xiph.org>
+Rob Bradford <rob@linux.intel.com>
+Ronald S. Bultje <rsbultje@gmail.com>
+Rui Ueyama <ruiu@google.com>
+Sami Pietilä <samipietila@google.com>
+Scott Graham <scottmg@chromium.org>
+Scott LaVarnway <slavarnway@google.com>
+Sean McGovern <gseanmcg@gmail.com>
+Sergey Ulanov <sergeyu@chromium.org>
+Shimon Doodkin <helpmepro1@gmail.com>
+Shunyao Li <shunyaoli@google.com>
+Stefan Holmer <holmer@google.com>
+Suman Sunkara <sunkaras@google.com>
+Taekhyun Kim <takim@nvidia.com>
+Takanori MATSUURA <t.matsuu@gmail.com>
+Tamar Levy <tamar.levy@intel.com>
+Tao Bai <michaelbai@chromium.org>
+Tero Rintaluoma <teror@google.com>
+Thijs Vermeir <thijsvermeir@gmail.com>
+Tim Kopp <tkopp@google.com>
+Timothy B. Terriberry <tterribe@xiph.org>
+Tom Finegan <tomfinegan@google.com>
+Vignesh Venkatasubramanian <vigneshv@google.com>
+Yaowu Xu <yaowu@google.com>
+Yongzhe Wang <yongzhe@google.com>
+Yunqing Wang <yunqingwang@google.com>
+Zoe Liu <zoeliu@google.com>
+Google Inc.
+The Mozilla Foundation
+The Xiph.Org Foundation
diff --git a/codec/L2/demos/leptonEnc/host/vp8/decoder/bool_decoder.hh b/codec/L2/demos/leptonEnc/host/vp8/decoder/bool_decoder.hh
new file mode 100644
index 0000000000..fcd6439d6b
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/decoder/bool_decoder.hh
@@ -0,0 +1,56 @@
+#ifndef BOOL_DECODER_HH
+#define BOOL_DECODER_HH
+#include <vector>
+
+#include "model.hh"
+#include "../../io/Reader.hh"
+#include "JpegArithmeticCoder.hh"
+#include "vpx_bool_reader.hh"
+typedef int8_t TreeNode;
+
+class Branch;
+class SliceReader : public Sirikata::DecoderReader {
+    const uint8_t* buffer_;
+    size_t size_;
+
+   public:
+    SliceReader(const uint8_t* sbuffer, size_t ssize) : buffer_(sbuffer), size_(ssize) {}
+    std::pair<unsigned int, Sirikata::JpegError> Read(unsigned char* data, unsigned int value) {
+        std::pair<unsigned int, Sirikata::JpegError> retval(value, Sirikata::JpegError::nil());
+        if (size_ < retval.first) {
+            retval.first = size_;
+        }
+        if (retval.first) {
+            memcpy(data, buffer_, retval.first);
+            buffer_ += retval.first;
+            size_ -= retval.first;
+        } else {
+            retval.second = Sirikata::JpegError::errEOF();
+        }
+        return retval;
+    }
+};
+class JpegBoolDecoder : public SliceReader {
+    Sirikata::ArithmeticCoder jpeg_coder_;
+
+   public:
+    JpegBoolDecoder(const uint8_t* buffer, size_t size) : SliceReader(buffer, size), jpeg_coder_(false) {}
+    bool get(Branch& branch) { return jpeg_coder_.arith_decode(this, &branch.probability_); }
+};
+
+#ifdef JPEG_ENCODER
+// easier than a typedef so that we can forward declare this class elsewhere
+class BoolDecoder : public JpegBoolDecoder {
+   public:
+    BoolDecoder(const uint8_t* data, size_t size) : JpegBoolDecoder(data, size) {}
+};
+#else
+// easier than a typedef so that we can forward declare this class elsewhere
+class BoolDecoder : public VPXBoolReader {
+   public:
+    BoolDecoder(const uint8_t* data, size_t size) : VPXBoolReader(data, size) {}
+    BoolDecoder() {}
+};
+#endif
+
+#endif /* BOOL_DECODER_HH */
diff --git a/codec/L2/demos/leptonEnc/host/vp8/decoder/boolreader.cc b/codec/L2/demos/leptonEnc/host/vp8/decoder/boolreader.cc
new file mode 100644
index 0000000000..1dd0dc1d80
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/decoder/boolreader.cc
@@ -0,0 +1,45 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "../util/memory.hh"
+#include <stdlib.h>
+#include <string.h>
+//#include "./vpx_config.h"
+
+#include "boolreader.hh"
+
+//#include "vpx_dsp/prob.h"
+//#include "vpx_ports/mem.h"
+//#include "vpx_mem/vpx_mem.h"
+//#include "vpx_util/endian_inl.h"
+
+int r_bitcount = 0;
+
+int vpx_reader_init(vpx_reader* r, const uint8_t* buffer, size_t size) {
+    if (size && !buffer) {
+        return 1;
+    } else {
+        r->buffer_end = buffer + size;
+        r->buffer = buffer;
+        r->value = 0;
+        r->count = -8;
+        r->range = 255;
+        vpx_reader_fill(r);
+        return vpx_read(r, 128, Billing::HEADER) != 0; // marker bit
+    }
+}
+
+const uint8_t* vpx_reader_find_end(vpx_reader* r) {
+    // Find the end of the coded buffer
+    while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) {
+        r->count -= CHAR_BIT;
+        r->buffer--;
+    }
+    return r->buffer;
+}
diff --git a/codec/L2/demos/leptonEnc/host/vp8/decoder/boolreader.hh b/codec/L2/demos/leptonEnc/host/vp8/decoder/boolreader.hh
new file mode 100644
index 0000000000..86eff70ac2
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/decoder/boolreader.hh
@@ -0,0 +1,273 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE banner below
+ *  An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the VPX_AUTHORS file in this directory
+ */
+/*
+Copyright (c) 2010, Google Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+disclaimer.
+
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+disclaimer in the documentation and/or other materials provided with the distribution.
+
+Neither the name of Google nor the names of its contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef VPX_DSP_BITREADER_H_
+#define VPX_DSP_BITREADER_H_
+
+#include <stddef.h>
+#include <assert.h>
+#include <limits.h>
+#include <stdint.h>
+#include "vpx_config.hh"
+#include "billing.hh"
+#include "../model/numeric.hh"
+//#include "vpx_ports/mem.h"
+//#include "vpx/vp8dx.h"
+//#include "vpx/vpx_integer.h"
+//#include "vpx_dsp/prob.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef size_t BD_VALUE;
+
+#define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT)
+
+// This is meant to be a large, positive constant that can still be efficiently
+// loaded as an immediate (on platforms like ARM, for example).
+// Even relatively modest values like 100 would work fine.
+#define LOTS_OF_BITS 0x40000000
+
+typedef struct {
+    // Be careful when reordering this struct, it may impact the cache negatively.
+    BD_VALUE value;
+    unsigned int range;
+    int count;
+    const uint8_t* buffer_end;
+    const uint8_t* buffer;
+    //  vpx_decrypt_cb decrypt_cb;
+    //  void *decrypt_state;
+    uint8_t clear_buffer[sizeof(BD_VALUE) + 1];
+} vpx_reader;
+
+int vpx_reader_init(vpx_reader* r, const uint8_t* buffer, size_t size);
+
+static INLINE void vpx_reader_fill(vpx_reader* r) {
+    const uint8_t* const buffer_end = r->buffer_end;
+    const uint8_t* buffer = r->buffer;
+    const uint8_t* buffer_start = buffer;
+    BD_VALUE value = r->value;
+    int count = r->count;
+    const size_t bytes_left = buffer_end - buffer;
+    const size_t bits_left = bytes_left * CHAR_BIT;
+    int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT);
+
+    if (bits_left > BD_VALUE_SIZE) {
+        const int bits = (shift & 0xfffffff8) + CHAR_BIT;
+        BD_VALUE nv;
+        BD_VALUE big_endian_values;
+        memcpy(&big_endian_values, buffer, sizeof(BD_VALUE));
+        if (sizeof(BD_VALUE) == 8) {
+            big_endian_values = htobe64(big_endian_values);
+        } else {
+            big_endian_values = htobe32(big_endian_values);
+        }
+        nv = big_endian_values >> (BD_VALUE_SIZE - bits);
+        count += bits;
+        buffer += (bits >> 3);
+        value = r->value | (nv << (shift & 0x7));
+    } else {
+        const int bits_over = (int)(shift + CHAR_BIT - bits_left);
+        int loop_end = 0;
+        if (bits_over >= 0) {
+            count += LOTS_OF_BITS;
+            loop_end = bits_over;
+        }
+
+        if (bits_over < 0 || bits_left) {
+            while (shift >= loop_end) {
+                count += CHAR_BIT;
+                value |= (BD_VALUE)*buffer++ << shift;
+                shift -= CHAR_BIT;
+            }
+        }
+    }
+    // NOTE: Variable 'buffer' may not relate to 'r->buffer' after decryption,
+    // so we increase 'r->buffer' by the amount that 'buffer' moved, rather than
+    // assign 'buffer' to 'r->buffer'.
+    r->buffer += buffer - buffer_start;
+    r->value = value;
+    r->count = count;
+}
+
+const uint8_t* vpx_reader_find_end(vpx_reader* r);
+
+// Check if we have reached the end of the buffer.
+//
+// Variable 'count' stores the number of bits in the 'value' buffer, minus
+// 8. The top byte is part of the algorithm, and the remainder is buffered
+// to be shifted into it. So if count == 8, the top 16 bits of 'value' are
+// occupied, 8 for the algorithm and 8 in the buffer.
+//
+// When reading a byte from the user's buffer, count is filled with 8 and
+// one byte is filled into the value buffer. When we reach the end of the
+// data, count is additionally filled with LOTS_OF_BITS. So when
+// count == LOTS_OF_BITS - 1, the user's data has been exhausted.
+//
+// 1 if we have tried to decode bits after the end of stream was encountered.
+// 0 No error.
+#define vpx_reader_has_error(r) ((r)->count > BD_VALUE_SIZE && (r)->count < LOTS_OF_BITS)
+
+extern int r_bitcount;
+constexpr static uint8_t vpx_norm[256] = {
+    0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+/*
+inline unsigned int count_leading_zeros_uint8(uint8_t split) {
+    unsigned int shift = 0;
+    if (split < 128) {
+        shift = 1;
+    }
+    if (split < 64) {
+        shift = 2;
+    }
+    if (split < 32) {
+        shift = 3;
+    }
+    if (split < 16) {
+        shift = 4;
+    }
+    if (split < 8) {
+        shift = 5;
+    }
+    if (split < 4) {
+        shift = 6;
+    }
+    if (split == 1) {
+        shift = 7;
+    }
+    return shift;
+}
+    */
+#ifndef _WIN32
+__attribute__((always_inline))
+#endif
+inline uint8_t
+count_leading_zeros_uint8(uint8_t v) {
+    return vpx_norm[v];
+    assert(v);
+    return __builtin_clz((uint32_t)v) - 24; // slower
+    uint8_t r = 0;                          // result of log2(v) will go here
+    if (v & 0xf0) {
+        r |= 4;
+        v >>= 4;
+    }
+    if (v & 0xc) {
+        v >>= 2;
+        r |= 2;
+    }
+    if (v & 0x2) {
+        v >>= 1;
+        r |= 1;
+    }
+    return 7 - r;
+}
+
+inline bool vpx_reader_fill_and_read(vpx_reader* r, unsigned int split, Billing bill) {
+    BD_VALUE bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT);
+    vpx_reader_fill(r);
+    BD_VALUE value = r->value;
+    bool bit = (value >= bigsplit);
+    int count = r->count;
+
+    unsigned int range;
+
+    if (bit) {
+        range = r->range - split;
+        value = value - bigsplit;
+    } else {
+        range = split;
+    }
+    // unsigned int shift = vpx_norm[range];
+    unsigned int shift = count_leading_zeros_uint8(range);
+    range <<= shift;
+    value <<= shift;
+    count -= shift;
+    write_bit_bill(bill, true, shift);
+    r->value = value;
+    r->count = count;
+    r->range = range;
+
+    return bit;
+}
+#ifndef _WIN32
+__attribute__((always_inline))
+#endif
+inline bool
+vpx_read(vpx_reader* r, int prob, Billing bill) {
+    unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT;
+    BD_VALUE value = r->value;
+    int count = r->count;
+    BD_VALUE bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT);
+    bool bit = (value >= bigsplit);
+    unsigned int range;
+    if (bit) {
+        range = r->range - split;
+        value = value - bigsplit;
+    } else {
+        range = split;
+    }
+    if (__builtin_expect(r->count < 0, 0)) {
+        bit = vpx_reader_fill_and_read(r, split, bill);
+#ifdef DEBUG_ARICODER
+        fprintf(stderr, "R %d %d %d\n", r_bitcount++, prob, bit);
+#endif
+        return bit;
+    }
+    // unsigned int shift = vpx_norm[range];
+    unsigned int shift = count_leading_zeros_uint8(range);
+    range <<= shift;
+    value <<= shift;
+    count -= shift;
+    write_bit_bill(bill, true, shift);
+    r->value = value;
+    r->count = count;
+    r->range = range;
+#ifdef DEBUG_ARICODER
+    fprintf(stderr, "R %d %d %d\n", r_bitcount++, prob, bit);
+#endif
+
+    return bit;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_DSP_BITREADER_H_
diff --git a/codec/L2/demos/leptonEnc/host/vp8/decoder/coefs.hh b/codec/L2/demos/leptonEnc/host/vp8/decoder/coefs.hh
new file mode 100644
index 0000000000..1e109ddc0d
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/decoder/coefs.hh
@@ -0,0 +1,72 @@
+#ifndef _COEFS_HH
+#define _COEFS_HH
+
+#include "bool_decoder.hh"
+#include "model.hh"
+#include <limits>
+class BoolEncoder;
+
+template <uint64_t x, uint64_t n = 32>
+struct static_log2 {
+    enum uint64_t { c = ((x >> n) > 0) ? 1 : 0 };
+    enum uint64_t { value = c * n + static_log2<(x >> (c * n)), n / 2>::value };
+};
+template <>
+struct static_log2<1, 0> {
+    enum uint64_t { value = 0 };
+};
+template <int n>
+struct static_ceil_log2 {
+    enum uint64_t { value = (1 << static_log2<n>::value) < n ? static_log2<n>::value + 1 : static_log2<n>::value };
+};
+
+template <typename intt>
+intt log2(intt v) {
+    constexpr int loop_max = (int)(sizeof(intt) == 1 ? 2 : (sizeof(intt) == 2 ? 3 : (sizeof(intt) == 4 ? 4 : 5)));
+    const intt b[] = {
+        0x2, 0xC, 0xF0, (intt)0xFF00, (intt)0xFFFF0000U, std::numeric_limits<intt>::max() - (intt)0xFFFFFFFFU};
+    const intt S[] = {1, 2, 4, 8, 16, 32};
+
+    register intt r = 0; // result of log2(v) will go here
+
+    for (signed int i = loop_max; i >= 0; i--) // unroll for speed...
+    {
+        if (v & b[i]) {
+            v >>= S[i];
+            r |= S[i];
+        }
+    }
+    return r;
+}
+
+template <int bits, int highest_likely_value>
+int skew_log(int number) {
+    static_assert(static_ceil_log2<highest_likely_value>::value <= bits,
+                  "The highest likely number must be less than the number of bits provided");
+    if (number < highest_likely_value) {
+        return number;
+    }
+    int offset = highest_likely_value - static_log2<highest_likely_value>::value;
+    if (bits <= 8) {
+        offset += log2<uint8_t>((uint8_t)number);
+    } else if (bits <= 16) {
+        offset += log2<uint16_t>((uint16_t)number);
+    } else if (bits <= 32) {
+        offset += log2<uint32_t>((uint32_t)number);
+    } else {
+        offset += log2<uint64_t>((uint64_t)number);
+    }
+    return std::min(offset, (1 << bits));
+}
+
+template <unsigned int prev_coef_contexts = PREV_COEF_CONTEXTS>
+int combine_priors(int16_t a, int16_t b) {
+    const int max_likely_value = 6;
+    int16_t al = skew_log<static_ceil_log2<prev_coef_contexts - 1>::value / 2, max_likely_value>(abs(a));
+    int16_t bl = skew_log<static_ceil_log2<prev_coef_contexts - 1>::value / 2, max_likely_value>(abs(b));
+    int retval =
+        std::min(al + (1U << (static_ceil_log2<prev_coef_contexts - 1>::value / 2)) * bl, prev_coef_contexts - 1);
+    return retval;
+}
+
+#endif /* TOKENS_HH */
diff --git a/codec/L2/demos/leptonEnc/host/vp8/decoder/decoder.cc b/codec/L2/demos/leptonEnc/host/vp8/decoder/decoder.cc
new file mode 100644
index 0000000000..48c076b1c7
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/decoder/decoder.cc
@@ -0,0 +1,293 @@
+#include "bool_decoder.hh"
+#include "boolreader.hh"
+#include "model.hh"
+#include "../../lepton/idct.hh"
+using namespace std;
+
+uint8_t prefix_unremap(uint8_t v) {
+    if (v == 0) {
+        return 0;
+    }
+    return v - 3;
+}
+#define LOG_DELTA_X_EDGE LogTable256[raster_to_aligned.kat<2>() - raster_to_aligned.kat<1>()]
+#define LOG_DELTA_Y_EDGE LogTable256[raster_to_aligned.kat<16>() - raster_to_aligned.kat<8>()]
+#ifdef _WIN32
+#define log_delta_x_edge LOG_DELTA_X_EDGE
+#define log_delta_y_edge LOG_DELTA_Y_EDGE
+
+#else
+enum {
+    log_delta_x_edge = LOG_DELTA_X_EDGE,
+    log_delta_y_edge = LOG_DELTA_Y_EDGE,
+};
+#endif
+
+template <bool all_neighbors_present, BlockType color, bool horizontal>
+void decode_one_edge(BlockContext mcontext,
+                     BoolDecoder& decoder,
+                     ProbabilityTables<all_neighbors_present, color>& probability_tables,
+                     uint8_t num_nonzeros_7x7,
+                     uint8_t est_eob,
+                     ProbabilityTablesBase& pt) {
+    ConstBlockContext context = mcontext.copy();
+    auto prob_edge_eob = horizontal ? probability_tables.x_nonzero_counts_8x1(pt, est_eob, num_nonzeros_7x7)
+                                    : probability_tables.y_nonzero_counts_1x8(pt, est_eob, num_nonzeros_7x7);
+
+    uint8_t aligned_block_offset = raster_to_aligned.at(1);
+    unsigned int log_edge_step = log_delta_x_edge;
+    uint8_t delta = 1;
+    uint8_t zig15offset = 0;
+    if (!horizontal) {
+        delta = 8;
+        log_edge_step = log_delta_y_edge;
+        zig15offset = 7;
+        aligned_block_offset = raster_to_aligned.at(8);
+    }
+    uint8_t num_nonzeros_edge = 0;
+    int16_t decoded_so_far = 0;
+    for (int i = 2; i >= 0; --i) {
+        int cur_bit = decoder.get(prob_edge_eob.at(i, decoded_so_far), Billing::NZ_EDGE) ? 1 : 0;
+        num_nonzeros_edge |= (cur_bit << i);
+        decoded_so_far <<= 1;
+        decoded_so_far |= cur_bit;
+    }
+    if (num_nonzeros_edge > 7) {
+        custom_exit(ExitCode::STREAM_INCONSISTENT);
+    }
+    unsigned int coord = delta;
+    for (int lane = 0; lane < 7 && num_nonzeros_edge; ++lane, coord += delta, ++zig15offset) {
+        ProbabilityTablesBase::CoefficientContext prior = {0, 0, 0};
+        if (ProbabilityTablesBase::MICROVECTORIZE) {
+            if (horizontal) {
+                prior = probability_tables.update_coefficient_context8_horiz(coord, context, num_nonzeros_edge);
+            } else {
+                prior = probability_tables.update_coefficient_context8_vert(coord, context, num_nonzeros_edge);
+            }
+        } else {
+            prior = probability_tables.update_coefficient_context8(coord, context, num_nonzeros_edge);
+        }
+        auto exp_array = probability_tables.exponent_array_x(pt, coord, zig15offset, prior);
+        uint8_t length = 0;
+        bool nonzero = false;
+        auto* exp_branch = exp_array.begin();
+        for (; length != MAX_EXPONENT; ++length) {
+            bool cur_bit = decoder.get(*exp_branch++, (Billing)((int)Billing::BITMAP_EDGE + std::min((int)length, 4)));
+            if (!cur_bit) {
+                break;
+            }
+            nonzero = true;
+        }
+        int16_t coef = 0;
+        if (nonzero) {
+            uint8_t min_threshold = probability_tables.get_noise_threshold(coord);
+            auto& sign_prob = probability_tables.sign_array_8(pt, coord, prior);
+            bool neg = !decoder.get(sign_prob, Billing::SIGN_EDGE);
+            coef = (1 << (length - 1));
+            --num_nonzeros_edge;
+            if (length > 1) {
+                int i = length - 2;
+                if (i >= min_threshold) {
+                    auto thresh_prob =
+                        probability_tables.residual_thresh_array(pt, coord, length, prior, min_threshold);
+                    uint16_t decoded_so_far = 1;
+                    for (; i >= min_threshold; --i) {
+                        int cur_bit = (decoder.get(thresh_prob.at(decoded_so_far), Billing::RES_EDGE) ? 1 : 0);
+                        coef |= (cur_bit << i);
+                        decoded_so_far <<= 1;
+                        if (cur_bit) {
+                            decoded_so_far |= 1;
+                        }
+                        // since we are not strict about rejecting jpegs with out of range coefs
+                        // we just make those less efficient by reusing the same probability bucket
+                        decoded_so_far = std::min(decoded_so_far, (uint16_t)(thresh_prob.size() - 1));
+                    }
+#ifdef ANNOTATION_ENABLED
+                    probability_tables.residual_thresh_array_annot_update(coord, decoded_so_far >> 2);
+#endif
+                }
+                auto res_prob = probability_tables.residual_noise_array_x(pt, coord, prior);
+                for (; i >= 0; --i) {
+                    coef |= ((decoder.get(res_prob.at(i), Billing::RES_EDGE) ? 1 : 0) << i);
+                }
+            }
+            if (neg) {
+                coef = -coef;
+            }
+        }
+        mcontext.here().raw_data()[aligned_block_offset + (lane << log_edge_step)] = coef;
+    }
+}
+
+template <bool all_neighbors_present, BlockType color>
+void decode_edge(BlockContext mcontext,
+                 BoolDecoder& decoder,
+                 ProbabilityTables<all_neighbors_present, color>& probability_tables,
+                 uint8_t num_nonzeros_7x7,
+                 uint8_t eob_x,
+                 uint8_t eob_y,
+                 ProbabilityTablesBase& pt) {
+    decode_one_edge<all_neighbors_present, color, true>(mcontext, decoder, probability_tables, num_nonzeros_7x7, eob_x,
+                                                        pt);
+    decode_one_edge<all_neighbors_present, color, false>(mcontext, decoder, probability_tables, num_nonzeros_7x7, eob_y,
+                                                         pt);
+}
+
+template <bool all_neighbors_present, BlockType color>
+void parse_tokens(BlockContext context,
+                  BoolDecoder& decoder,
+                  ProbabilityTables<all_neighbors_present, color>& probability_tables,
+                  ProbabilityTablesBase& pt) {
+    context.here().bzero();
+    auto num_nonzeros_prob = probability_tables.nonzero_counts_7x7(pt, context.copy());
+    uint8_t num_nonzeros_7x7 = 0;
+    int decoded_so_far = 0;
+    for (int index = 5; index >= 0; --index) {
+        int cur_bit = (decoder.get(num_nonzeros_prob.at(index, decoded_so_far), Billing::NZ_7x7) ? 1 : 0);
+        num_nonzeros_7x7 |= (cur_bit << index);
+        decoded_so_far <<= 1;
+        decoded_so_far |= cur_bit;
+    }
+    if (num_nonzeros_7x7 > 49) {
+        custom_exit(ExitCode::STREAM_INCONSISTENT); // this is a corrupt file: dont decode further
+    }
+    uint8_t eob_x = 0;
+    uint8_t eob_y = 0;
+    uint8_t num_nonzeros_left_7x7 = num_nonzeros_7x7;
+    Sirikata::AlignedArray1d<short, 8> avg;
+    for (unsigned int zz = 0; zz < 49 && num_nonzeros_left_7x7; ++zz) {
+        unsigned int coord = unzigzag49[zz];
+        if ((zz & 7) == 0) {
+#ifdef OPTIMIZED_7x7
+            probability_tables.compute_aavrg_vec(zz, context.copy(), avg.begin());
+#endif
+        }
+        unsigned int b_x = (coord & 7);
+        unsigned int b_y = (coord >> 3);
+        assert((coord & 7) > 0 && (coord >> 3) > 0 && "this does the DC and the lower 7x7 AC");
+        {
+            ProbabilityTablesBase::CoefficientContext prior;
+
+#ifdef OPTIMIZED_7x7
+            prior = probability_tables.update_coefficient_context7x7_precomp(zz, avg[zz & 7], context.copy(),
+                                                                             num_nonzeros_left_7x7);
+#else
+            prior = probability_tables.update_coefficient_context7x7(coord, zz, context.copy(), num_nonzeros_left_7x7);
+#endif
+            auto exp_prob = probability_tables.exponent_array_7x7(pt, coord, zz, prior);
+            uint8_t length;
+            bool nonzero = false;
+            auto exp_branch = exp_prob.begin();
+            for (length = 0; length != MAX_EXPONENT; ++length) {
+                bool cur_bit =
+                    decoder.get(*exp_branch++, (Billing)((unsigned int)Billing::BITMAP_7x7 + std::min((int)length, 4)));
+                if (!cur_bit) {
+                    break;
+                }
+                nonzero = true;
+            }
+            int16_t coef = 0;
+            bool neg = false;
+            if (nonzero) {
+                --num_nonzeros_left_7x7;
+                auto& sign_prob = probability_tables.sign_array_7x7(pt, coord, prior);
+                neg = !decoder.get(sign_prob, Billing::SIGN_7x7);
+                eob_x = std::max(eob_x, (uint8_t)b_x);
+                eob_y = std::max(eob_y, (uint8_t)b_y);
+                coef = (1 << (length - 1));
+                if (length > 1) {
+                    auto res_prob = probability_tables.residual_noise_array_7x7(pt, coord, prior);
+                    for (int i = length - 2; i >= 0; --i) {
+                        coef |= ((decoder.get(res_prob.at(i), Billing::RES_7x7) ? 1 : 0) << i);
+                    }
+                }
+                if (neg) {
+                    coef = -coef;
+                }
+            }
+#ifdef OPTIMIZED_7x7
+            context.here().coef.at(zz + AlignedBlock::AC_7x7_INDEX) = coef;
+#else
+            // this should work in all cases but doesn't utilize that the zz is related
+            context.here().mutable_coefficients_raster(raster_to_aligned.at(coord)) = coef;
+#endif
+        }
+    }
+    decode_edge(context, decoder, probability_tables, num_nonzeros_7x7, eob_x, eob_y, pt);
+    Sirikata::AlignedArray1d<int16_t, 64> outp_sans_dc;
+    int uncertainty = 0;
+    int uncertainty2 = 0;
+    int predicted_dc;
+    if (advanced_dc_prediction) {
+        predicted_dc =
+            probability_tables.adv_predict_dc_pix(context.copy(), outp_sans_dc.begin(), &uncertainty, &uncertainty2);
+    } else {
+        predicted_dc = probability_tables.predict_dc_dct(context.copy());
+    }
+    { // dc
+        uint8_t length;
+        bool nonzero = false;
+        uint16_t len_abs_mxm = uint16bit_length(abs(uncertainty));
+        uint16_t len_abs_offset_to_closest_edge = uint16bit_length(abs(uncertainty2));
+        if (!advanced_dc_prediction) {
+            ProbabilityTablesBase::CoefficientContext prior;
+
+            prior = probability_tables.update_coefficient_context7x7(0, raster_to_aligned.at(0), context.copy(),
+                                                                     num_nonzeros_7x7);
+            len_abs_mxm = prior.bsr_best_prior;
+            len_abs_offset_to_closest_edge = prior.num_nonzeros_bin;
+        }
+        auto exp_prob = probability_tables.exponent_array_dc(pt, len_abs_mxm, len_abs_offset_to_closest_edge);
+        auto* exp_branch = exp_prob.begin();
+        for (length = 0; length < MAX_EXPONENT; ++length) {
+            bool cur_bit = decoder.get(*exp_branch++, (Billing)((int)Billing::EXP0_DC + std::min((int)length, 4)));
+            if (!cur_bit) {
+                break;
+            }
+            nonzero = true;
+        }
+        int16_t coef = 0;
+        if (nonzero) {
+            auto& sign_prob = probability_tables.sign_array_dc(pt, uncertainty, uncertainty2);
+            bool neg = !decoder.get(sign_prob, Billing::SIGN_DC);
+            coef = (1 << (length - 1));
+            if (length > 1) {
+                auto res_prob = probability_tables.residual_array_dc(pt, len_abs_mxm, len_abs_offset_to_closest_edge);
+                for (int i = length - 2; i >= 0; --i) {
+                    coef |= ((decoder.get(res_prob.at(i), Billing::RES_DC) ? 1 : 0) << i);
+                }
+            }
+            if (neg) {
+                coef = -coef;
+            }
+        }
+        context.here().dc() = coef;
+    }
+    context.here().dc() = probability_tables.adv_predict_or_unpredict_dc(context.here().dc(), true, predicted_dc);
+    context.num_nonzeros_here->set_num_nonzeros(num_nonzeros_7x7);
+
+    context.num_nonzeros_here->set_horizontal(
+        outp_sans_dc.begin(), ProbabilityTablesBase::quantization_table((int)color), context.here().dc());
+    context.num_nonzeros_here->set_vertical(outp_sans_dc.begin(), ProbabilityTablesBase::quantization_table((int)color),
+                                            context.here().dc());
+}
+#ifdef ALLOW_FOUR_COLORS
+template void parse_tokens(BlockContext,
+                           BoolDecoder&,
+                           ProbabilityTables<false, BlockType::Ck>&,
+                           ProbabilityTablesBase&);
+template void parse_tokens(BlockContext, BoolDecoder&, ProbabilityTables<true, BlockType::Ck>&, ProbabilityTablesBase&);
+#endif
+
+template void parse_tokens(BlockContext, BoolDecoder&, ProbabilityTables<false, BlockType::Y>&, ProbabilityTablesBase&);
+template void parse_tokens(BlockContext,
+                           BoolDecoder&,
+                           ProbabilityTables<false, BlockType::Cb>&,
+                           ProbabilityTablesBase&);
+template void parse_tokens(BlockContext,
+                           BoolDecoder&,
+                           ProbabilityTables<false, BlockType::Cr>&,
+                           ProbabilityTablesBase&);
+template void parse_tokens(BlockContext, BoolDecoder&, ProbabilityTables<true, BlockType::Y>&, ProbabilityTablesBase&);
+template void parse_tokens(BlockContext, BoolDecoder&, ProbabilityTables<true, BlockType::Cb>&, ProbabilityTablesBase&);
+template void parse_tokens(BlockContext, BoolDecoder&, ProbabilityTables<true, BlockType::Cr>&, ProbabilityTablesBase&);
diff --git a/codec/L2/demos/leptonEnc/host/vp8/decoder/decoder.hh b/codec/L2/demos/leptonEnc/host/vp8/decoder/decoder.hh
new file mode 100644
index 0000000000..3c6da435ba
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/decoder/decoder.hh
@@ -0,0 +1,10 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#ifndef DECODER_HH
+#define DECODER_HH
+template <bool all_neighbors_present, BlockType color>
+void parse_tokens(BlockContext context,
+                  BoolDecoder& data,
+                  ProbabilityTables<all_neighbors_present, color>& probability_tables,
+                  ProbabilityTablesBase& pt);
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/decoder/vpx_bool_reader.hh b/codec/L2/demos/leptonEnc/host/vp8/decoder/vpx_bool_reader.hh
new file mode 100644
index 0000000000..dbbb9fa155
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/decoder/vpx_bool_reader.hh
@@ -0,0 +1,39 @@
+#include "boolreader.hh"
+
+class VPXBoolReader {
+   private:
+    vpx_reader bit_reader;
+#ifdef DEBUG_ARICODER
+    bool any_read;
+#endif
+   public:
+    void init(const uint8_t* buffer, size_t size) { vpx_reader_init(&bit_reader, buffer, size); }
+    VPXBoolReader() {
+#ifdef DEBUG_ARICODER
+        any_read = false;
+#endif
+    }
+    VPXBoolReader(const uint8_t* buffer, size_t size) {
+#ifdef DEBUG_ARICODER
+        any_read = false;
+#endif
+        init(buffer, size);
+    }
+#ifndef _WIN32
+    __attribute__((always_inline))
+#endif
+    bool
+    get(Branch& branch, Billing bill = Billing::RESERVED) {
+#ifdef DEBUG_ARICODER
+        if (!any_read) {
+            any_read = true;
+            static int count = 0;
+            r_bitcount = count * 500000000;
+            count++;
+        }
+#endif
+        bool retval = vpx_read(&bit_reader, branch.prob(), bill);
+        branch.record_obs_and_update(retval);
+        return retval;
+    }
+};
diff --git a/codec/L2/demos/leptonEnc/host/vp8/encoder/VPX_AUTHORS b/codec/L2/demos/leptonEnc/host/vp8/encoder/VPX_AUTHORS
new file mode 100644
index 0000000000..883fdc1da8
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/encoder/VPX_AUTHORS
@@ -0,0 +1,131 @@
+Aaron Watry <awatry@gmail.com>
+Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
+Adam Xu <adam@xuyaowu.com>
+Adrian Grange <agrange@google.com>
+Aℓex Converse <aconverse@google.com>
+Ahmad Sharif <asharif@google.com>
+Alexander Voronov <avoronov@graphics.cs.msu.ru>
+Alexis Ballier <aballier@gentoo.org>
+Alok Ahuja <waveletcoeff@gmail.com>
+Alpha Lam <hclam@google.com>
+A.Mahfoodh <ab.mahfoodh@gmail.com>
+Ami Fischman <fischman@chromium.org>
+Andoni Morales Alastruey <ylatuya@gmail.com>
+Andres Mejia <mcitadel@gmail.com>
+Andrew Russell <anrussell@google.com>
+Angie Chiang <angiebird@google.com>
+Aron Rosenberg <arosenberg@logitech.com>
+Attila Nagy <attilanagy@google.com>
+Brion Vibber <bvibber@wikimedia.org>
+changjun.yang <changjun.yang@intel.com>
+Charles 'Buck' Krasic <ckrasic@google.com>
+chm <chm@rock-chips.com>
+Christian Duvivier <cduvivier@google.com>
+Daniel Kang <ddkang@google.com>
+Deb Mukherjee <debargha@google.com>
+Dim Temp <dimtemp0@gmail.com>
+Dmitry Kovalev <dkovalev@google.com>
+Dragan Mrdjan <dmrdjan@mips.com>
+Ed Baker <edward.baker@intel.com>
+Ehsan Akhgari <ehsan.akhgari@gmail.com>
+Erik Niemeyer <erik.a.niemeyer@intel.com>
+Fabio Pedretti <fabio.ped@libero.it>
+Frank Galligan <fgalligan@google.com>
+Fredrik Söderquist <fs@opera.com>
+Fritz Koenig <frkoenig@google.com>
+Gaute Strokkenes <gaute.strokkenes@broadcom.com>
+Geza Lore <gezalore@gmail.com>
+Ghislain MARY <ghislainmary2@gmail.com>
+Giuseppe Scrivano <gscrivano@gnu.org>
+Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
+Guillaume Martres <gmartres@google.com>
+Guillermo Ballester Valor <gbvalor@gmail.com>
+Hangyu Kuang <hkuang@google.com>
+Hanno Böck <hanno@hboeck.de>
+Henrik Lundin <hlundin@google.com>
+Hui Su <huisu@google.com>
+Ivan Maltz <ivanmaltz@google.com>
+Jacek Caban <cjacek@gmail.com>
+Jacky Chen <jackychen@google.com>
+James Berry <jamesberry@google.com>
+James Yu <james.yu@linaro.org>
+James Zern <jzern@google.com>
+Jan Gerber <j@mailb.org>
+Jan Kratochvil <jan.kratochvil@redhat.com>
+Janne Salonen <jsalonen@google.com>
+Jeff Faust <jfaust@google.com>
+Jeff Muizelaar <jmuizelaar@mozilla.com>
+Jeff Petkau <jpet@chromium.org>
+Jia Jia <jia.jia@linaro.org>
+Jim Bankoski <jimbankoski@google.com>
+Jingning Han <jingning@google.com>
+Joey Parrish <joeyparrish@google.com>
+Johann Koenig <johannkoenig@google.com>
+John Koleszar <jkoleszar@google.com>
+Johnny Klonaris <google@jawknee.com>
+John Stark <jhnstrk@gmail.com>
+Joshua Bleecher Snyder <josh@treelinelabs.com>
+Joshua Litt <joshualitt@google.com>
+Julia Robson <juliamrobson@gmail.com>
+Justin Clift <justin@salasaga.org>
+Justin Lebar <justin.lebar@gmail.com>
+KO Myung-Hun <komh@chollian.net>
+Lawrence Velázquez <larryv@macports.org>
+Lou Quillio <louquillio@google.com>
+Luca Barbato <lu_zero@gentoo.org>
+Makoto Kato <makoto.kt@gmail.com>
+Mans Rullgard <mans@mansr.com>
+Marco Paniconi <marpan@google.com>
+Mark Mentovai <mark@chromium.org>
+Martin Ettl <ettl.martin78@googlemail.com>
+Martin Storsjo <martin@martin.st>
+Matthew Heaney <matthewjheaney@chromium.org>
+Michael Kohler <michaelkohler@live.com>
+Mike Frysinger <vapier@chromium.org>
+Mike Hommey <mhommey@mozilla.com>
+Mikhal Shemer <mikhal@google.com>
+Minghai Shang <minghai@google.com>
+Morton Jonuschat <yabawock@gmail.com>
+Nico Weber <thakis@chromium.org>
+Parag Salasakar <img.mips1@gmail.com>
+Pascal Massimino <pascal.massimino@gmail.com>
+Patrik Westin <patrik.westin@gmail.com>
+Paul Wilkins <paulwilkins@google.com>
+Pavol Rusnak <stick@gk2.sk>
+Paweł Hajdan <phajdan@google.com>
+Pengchong Jin <pengchong@google.com>
+Peter de Rivaz <peter.derivaz@gmail.com>
+Philip Jägenstedt <philipj@opera.com>
+Priit Laes <plaes@plaes.org>
+Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
+Rafaël Carré <funman@videolan.org>
+Ralph Giles <giles@xiph.org>
+Rob Bradford <rob@linux.intel.com>
+Ronald S. Bultje <rsbultje@gmail.com>
+Rui Ueyama <ruiu@google.com>
+Sami Pietilä <samipietila@google.com>
+Scott Graham <scottmg@chromium.org>
+Scott LaVarnway <slavarnway@google.com>
+Sean McGovern <gseanmcg@gmail.com>
+Sergey Ulanov <sergeyu@chromium.org>
+Shimon Doodkin <helpmepro1@gmail.com>
+Shunyao Li <shunyaoli@google.com>
+Stefan Holmer <holmer@google.com>
+Suman Sunkara <sunkaras@google.com>
+Taekhyun Kim <takim@nvidia.com>
+Takanori MATSUURA <t.matsuu@gmail.com>
+Tamar Levy <tamar.levy@intel.com>
+Tao Bai <michaelbai@chromium.org>
+Tero Rintaluoma <teror@google.com>
+Thijs Vermeir <thijsvermeir@gmail.com>
+Tim Kopp <tkopp@google.com>
+Timothy B. Terriberry <tterribe@xiph.org>
+Tom Finegan <tomfinegan@google.com>
+Vignesh Venkatasubramanian <vigneshv@google.com>
+Yaowu Xu <yaowu@google.com>
+Yongzhe Wang <yongzhe@google.com>
+Yunqing Wang <yunqingwang@google.com>
+Zoe Liu <zoeliu@google.com>
+Google Inc.
+The Mozilla Foundation
+The Xiph.Org Foundation
diff --git a/codec/L2/demos/leptonEnc/host/vp8/encoder/bool_encoder.hh b/codec/L2/demos/leptonEnc/host/vp8/encoder/bool_encoder.hh
new file mode 100644
index 0000000000..3402264530
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/encoder/bool_encoder.hh
@@ -0,0 +1,33 @@
+#ifndef BOOL_ENCODER_HH
+#define BOOL_ENCODER_HH
+
+#include <vector>
+#include <iostream>
+
+#include <assert.h>
+#include "branch.hh"
+#include "../../io/MemReadWriter.hh"
+#include "JpegArithmeticCoder.hh"
+#include "vpx_bool_writer.hh"
+/* Routines taken from ISO/IEC 10918-1 : 1993(E) */
+
+class JpegBoolEncoder : public Sirikata::MemReadWriter {
+    Sirikata::ArithmeticCoder jpeg_coder_;
+
+   public:
+    JpegBoolEncoder(const Sirikata::JpegAllocator<unsigned char>& alloc = Sirikata::JpegAllocator<unsigned char>())
+        : MemReadWriter(alloc), jpeg_coder_(true) {}
+    void put(const bool value, Branch& branch) { jpeg_coder_.arith_encode(this, &branch.probability_, value); }
+    void finish(std::vector<uint8_t, Sirikata::JpegAllocator<unsigned char> >& retval) {
+        jpeg_coder_.finish_encode(this);
+
+        return retval.swap(buffer());
+    }
+};
+
+#ifdef JPEG_ENCODER
+class BoolEncoder : public JpegBoolEncoder {};
+#else
+class BoolEncoder : public VPXBoolWriter {};
+#endif
+#endif /* BOOL_ENCODER_HH */
diff --git a/codec/L2/demos/leptonEnc/host/vp8/encoder/boolwriter.cc b/codec/L2/demos/leptonEnc/host/vp8/encoder/boolwriter.cc
new file mode 100644
index 0000000000..cadc3425fa
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/encoder/boolwriter.cc
@@ -0,0 +1,35 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "boolwriter.hh"
+
+int w_bitcount = 0;
+
+void vpx_start_encode(vpx_writer* br, uint8_t* source) {
+    br->lowvalue = 0;
+    br->range = 255;
+    br->count = -24;
+    br->buffer = source;
+    br->pos = 0;
+    br->run = 0;
+    br->isFirst = true;
+    vpx_write(br, 0, 128, Billing::HEADER);
+}
+
+void vpx_stop_encode(vpx_writer* br) {
+    int i;
+
+    for (i = 0; i < 32; i++) vpx_write(br, 0, 128, Billing::DELIMITERS);
+
+    // Ensure there's no ambigous collision with any index marker bytes
+    if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0) br->buffer[br->pos++] = 0;
+}
diff --git a/codec/L2/demos/leptonEnc/host/vp8/encoder/boolwriter.hh b/codec/L2/demos/leptonEnc/host/vp8/encoder/boolwriter.hh
new file mode 100644
index 0000000000..87585fbbb4
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/encoder/boolwriter.hh
@@ -0,0 +1,127 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE banner below
+ *  An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the VPX_AUTHORS file in this directory
+ */
+/*
+Copyright (c) 2010, Google Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+disclaimer.
+
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+disclaimer in the documentation and/or other materials provided with the distribution.
+
+Neither the name of Google nor the names of its contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef VPX_DSP_BITWRITER_H_
+#define VPX_DSP_BITWRITER_H_
+
+#include <stdint.h>
+
+#include "vpx_config.hh"
+#include "billing.hh"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct vpx_writer {
+    unsigned int lowvalue;
+    unsigned int range;
+    int count;
+    unsigned int pos;
+    unsigned short run;
+    bool isFirst;
+    unsigned char pre_byte;
+    uint8_t* buffer;
+} vpx_writer;
+
+void vpx_start_encode(vpx_writer* bc, uint8_t* buffer);
+void vpx_stop_encode(vpx_writer* bc);
+
+extern int w_bitcount;
+
+static INLINE void vpx_write(vpx_writer* br, int bit, int probability, Billing bill) {
+    unsigned int split;
+    int count = br->count;
+    unsigned int range = br->range;
+    unsigned int lowvalue = br->lowvalue;
+    unsigned int shift;
+
+#ifdef DEBUG_ARICODER
+    // if (w_bitcount < 1000) {
+    fprintf(stderr, "W %d %d %d\n", w_bitcount++, probability, bit);
+//}
+#endif
+
+    split = 1 + (((range - 1) * probability) >> 8);
+
+    range = split;
+
+    if (bit) {
+        lowvalue += split;
+        range = br->range - split;
+    }
+    static constexpr uint8_t vpx_norm[256] = {
+        0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    shift = vpx_norm[range];
+
+    range <<= shift;
+    count += shift;
+    write_bit_bill(bill, true, shift);
+
+    if (count >= 0) {
+        int offset = shift - count;
+
+        if ((lowvalue << (offset - 1)) & 0x80000000) {
+            int x = br->pos - 1;
+
+            while (x >= 0 && br->buffer[x] == 0xff) {
+                br->buffer[x] = 0;
+                x--;
+            }
+
+            br->buffer[x] += 1;
+        }
+
+        br->buffer[br->pos++] = (lowvalue >> (24 - offset));
+        lowvalue <<= offset;
+        shift = count;
+        lowvalue &= 0xffffff;
+        count -= 8;
+    }
+
+    lowvalue <<= shift;
+    br->count = count;
+    br->lowvalue = lowvalue;
+    br->range = range;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_DSP_BITWRITER_H_
diff --git a/codec/L2/demos/leptonEnc/host/vp8/encoder/encoder.cc b/codec/L2/demos/leptonEnc/host/vp8/encoder/encoder.cc
new file mode 100644
index 0000000000..3678808ffc
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/encoder/encoder.cc
@@ -0,0 +1,389 @@
+#include "../util/memory.hh"
+#include "bool_encoder.hh"
+#include "boolwriter.hh"
+#include "jpeg_meta.hh"
+#include "numeric.hh"
+#include "model.hh"
+#include "encoder.hh"
+#include <map>
+#include <fstream>
+#include "../../lepton/idct.hh"
+#include "../util/debug.hh"
+using namespace std;
+
+uint8_t prefix_remap(uint8_t v) {
+    if (v == 0) {
+        return 0;
+    }
+    return v + 3;
+}
+#ifdef TRACK_HISTOGRAM
+map<int, int> histogram[3]; // 0 is center, 1 is dc, 2 is edge
+struct Blah {
+    ~Blah() {
+        for (int typ = 0; typ < 3; ++typ) {
+            for (map<int, int>::iterator i = histogram[typ].begin(); i != histogram[typ].end(); ++i) {
+                printf("%c\t%d\t%d\n", 'c' + typ, i->second, i->first);
+            }
+        }
+    }
+} blah;
+#endif
+
+enum {
+    log_delta_x_edge = LogTable256[raster_to_aligned.kat<2>() - raster_to_aligned.kat<1>()],
+    log_delta_y_edge = LogTable256[raster_to_aligned.kat<16>() - raster_to_aligned.kat<8>()]
+};
+
+template <bool all_neighbors_present, BlockType color, bool horizontal>
+void encode_one_edge(ConstBlockContext context,
+                     BoolEncoder& encoder,
+                     ProbabilityTables<all_neighbors_present, color>& probability_tables,
+                     uint8_t num_nonzeros_7x7,
+                     uint8_t est_eob,
+                     ProbabilityTablesBase& pt) {
+    uint8_t num_nonzeros_edge;
+    const AlignedBlock& block = context.here();
+
+    if (horizontal) {
+        num_nonzeros_edge = (!!block.coefficients_raster(1)) + (!!block.coefficients_raster(2)) +
+                            (!!block.coefficients_raster(3)) + (!!block.coefficients_raster(4)) +
+                            (!!block.coefficients_raster(5)) + (!!block.coefficients_raster(6)) +
+                            (!!block.coefficients_raster(7));
+    } else {
+        num_nonzeros_edge = (!!block.coefficients_raster(1 * 8)) + (!!block.coefficients_raster(2 * 8)) +
+                            (!!block.coefficients_raster(3 * 8)) + (!!block.coefficients_raster(4 * 8)) +
+                            (!!block.coefficients_raster(5 * 8)) + (!!block.coefficients_raster(6 * 8)) +
+                            (!!block.coefficients_raster(7 * 8));
+    }
+
+    auto prob_edge_eob = horizontal ? probability_tables.x_nonzero_counts_8x1(pt, est_eob, num_nonzeros_7x7)
+                                    : probability_tables.y_nonzero_counts_1x8(pt, est_eob, num_nonzeros_7x7);
+
+    uint8_t aligned_block_offset = raster_to_aligned.at(1);
+    unsigned int log_edge_step = log_delta_x_edge;
+    uint8_t delta = 1;
+    uint8_t zig15offset = 0;
+    if (!horizontal) {
+        delta = 8;
+        log_edge_step = log_delta_y_edge;
+        zig15offset = 7;
+        aligned_block_offset = raster_to_aligned.at(8);
+    }
+    int16_t serialized_so_far = 0;
+    for (int i = 2; i >= 0; --i) {
+        int cur_bit = (num_nonzeros_edge & (1 << i)) ? 1 : 0;
+        encoder.put(cur_bit, prob_edge_eob.at(i, serialized_so_far), Billing::NZ_EDGE);
+        serialized_so_far <<= 1;
+        serialized_so_far |= cur_bit;
+    }
+
+    unsigned int coord = delta;
+    for (int lane = 0; lane < 7 && num_nonzeros_edge; ++lane, coord += delta, ++zig15offset) {
+        ProbabilityTablesBase::CoefficientContext prior;
+        if (ProbabilityTablesBase::MICROVECTORIZE) {
+            if (horizontal) {
+                prior = probability_tables.update_coefficient_context8_horiz(coord, context, num_nonzeros_edge);
+            } else {
+                prior = probability_tables.update_coefficient_context8_vert(coord, context, num_nonzeros_edge);
+            }
+        } else {
+            prior = probability_tables.update_coefficient_context8(coord, context, num_nonzeros_edge);
+        }
+        auto exp_array = probability_tables.exponent_array_x(pt, coord, zig15offset, prior);
+        int16_t coef = block.raw_data()[aligned_block_offset + (lane << log_edge_step)];
+#ifdef TRACK_HISTOGRAM
+        ++histogram[2][coef];
+#endif
+        uint16_t abs_coef = abs(coef);
+        uint8_t length = bit_length(abs_coef);
+        for (unsigned int i = 0; i < MAX_EXPONENT; ++i) {
+            bool cur_bit = (length != i);
+            encoder.put(cur_bit, exp_array.at(i), (Billing)((unsigned int)Billing::BITMAP_EDGE + std::min(i, 4U)));
+            if (!cur_bit) {
+                break;
+            }
+        }
+        if (length > MAX_EXPONENT) {
+            custom_exit(ExitCode::COEFFICIENT_OUT_OF_RANGE);
+        }
+        if (coef) {
+            uint8_t min_threshold = probability_tables.get_noise_threshold(coord);
+            auto& sign_prob = probability_tables.sign_array_8(pt, coord, prior);
+            encoder.put(coef >= 0, sign_prob, Billing::SIGN_EDGE);
+            --num_nonzeros_edge;
+            if (length > 1) {
+                int i = length - 2;
+                if (i >= min_threshold) {
+                    auto thresh_prob =
+                        probability_tables.residual_thresh_array(pt, coord, length, prior, min_threshold);
+                    uint16_t encoded_so_far = 1;
+                    for (; i >= min_threshold; --i) {
+                        int cur_bit = (abs_coef & (1 << i)) ? 1 : 0;
+                        encoder.put(cur_bit, thresh_prob.at(encoded_so_far), Billing::RES_EDGE);
+                        encoded_so_far <<= 1;
+                        if (cur_bit) {
+                            encoded_so_far |= 1;
+                        }
+                        // since we are not strict about rejecting jpegs with out of range coefs
+                        // we just make those less efficient by reusing the same probability bucket
+                        encoded_so_far = std::min(encoded_so_far, (uint16_t)(thresh_prob.size() - 1));
+                    }
+#ifdef ANNOTATION_ENABLED
+                    probability_tables.residual_thresh_array_annot_update(coord, decoded_so_far >> 2);
+#endif
+                }
+                auto res_prob = probability_tables.residual_noise_array_x(pt, coord, prior);
+                for (; i >= 0; --i) {
+                    encoder.put((abs_coef & (1 << i)) ? 1 : 0, res_prob.at(i), Billing::RES_EDGE);
+                }
+            }
+        }
+    }
+}
+
+template <bool all_neighbors_present, BlockType color>
+void encode_edge(ConstBlockContext context,
+                 BoolEncoder& encoder,
+                 ProbabilityTables<all_neighbors_present, color>& probability_tables,
+                 uint8_t num_nonzeros_7x7,
+                 uint8_t eob_x,
+                 uint8_t eob_y,
+                 ProbabilityTablesBase& pt) {
+    encode_one_edge<all_neighbors_present, color, true>(context, encoder, probability_tables, num_nonzeros_7x7, eob_x,
+                                                        pt);
+    encode_one_edge<all_neighbors_present, color, false>(context, encoder, probability_tables, num_nonzeros_7x7, eob_y,
+                                                         pt);
+}
+// used for debugging
+static int k_debug_block[(int)ColorChannel::NumBlockTypes];
+int total_error = 0;
+int total_signed_error = 0;
+int amd_err = 0;
+int med_err = 0;
+int avg_err = 0;
+int ori_err = 0;
+
+template <bool all_neighbors_present, BlockType color>
+void serialize_tokens(ConstBlockContext context,
+                      BoolEncoder& encoder,
+                      ProbabilityTables<all_neighbors_present, color>& probability_tables,
+                      ProbabilityTablesBase& pt) {
+    auto num_nonzeros_prob = probability_tables.nonzero_counts_7x7(pt, context);
+    int serialized_so_far = 0;
+    uint8_t num_nonzeros_7x7 = context.num_nonzeros_here->num_nonzeros();
+#if 0
+    fprintf(stderr, "7\t%d\n", (int)block.num_nonzeros_7x7());
+    fprintf(stderr, "x\t%d\n", (int)block.num_nonzeros_x());
+    fprintf(stderr, "y\t%d\n", (int)block.num_nonzeros_y());
+#endif
+    for (int index = 5; index >= 0; --index) {
+        int cur_bit = (num_nonzeros_7x7 & (1 << index)) ? 1 : 0;
+        encoder.put(cur_bit, num_nonzeros_prob.at(index, serialized_so_far), Billing::NZ_7x7);
+        serialized_so_far <<= 1;
+        serialized_so_far |= cur_bit;
+    }
+    uint8_t eob_x = 0;
+    uint8_t eob_y = 0;
+    uint8_t num_nonzeros_left_7x7 = num_nonzeros_7x7;
+
+    Sirikata::AlignedArray1d<short, 8> avg;
+    for (unsigned int zz = 0; zz < 49 && num_nonzeros_left_7x7; ++zz) {
+        if ((zz & 7) == 0) {
+#ifdef OPTIMIZED_7x7
+            probability_tables.compute_aavrg_vec(zz, context.copy(), avg.begin());
+#endif
+        }
+
+        unsigned int coord = unzigzag49[zz];
+        unsigned int b_x = (coord & 7);
+        unsigned int b_y = coord >> 3;
+        (void)b_x;
+        (void)b_y;
+        assert(b_x > 0 && b_y > 0 && "this does the DC and the lower 7x7 AC");
+        {
+            // this should work in all cases but doesn't utilize that the zz is related
+            int16_t coef;
+#ifdef OPTIMIZED_7x7
+            coef = context.here().coef.at(zz + AlignedBlock::AC_7x7_INDEX);
+#else
+            // this should work in all cases but doesn't utilize that the zz is related
+            coef = context.here().coefficients_raster(raster_to_aligned.at(coord));
+#endif
+            uint16_t abs_coef = abs(coef);
+#ifdef TRACK_HISTOGRAM
+            ++histogram[0][coef];
+#endif
+            ProbabilityTablesBase::CoefficientContext prior = {0, 0, 0};
+#ifdef OPTIMIZED_7x7
+            prior = probability_tables.update_coefficient_context7x7_precomp(zz, avg[zz & 7], context.copy(),
+                                                                             num_nonzeros_left_7x7);
+#else
+            prior = probability_tables.update_coefficient_context7x7(coord, zz, context.copy(), num_nonzeros_left_7x7);
+#endif
+            auto exp_prob = probability_tables.exponent_array_7x7(pt, coord, zz, prior);
+            uint8_t length = bit_length(abs_coef);
+            for (unsigned int i = 0; i < MAX_EXPONENT; ++i) {
+                bool cur_bit = (length != i);
+
+                encoder.put(cur_bit, exp_prob.at(i), (Billing)((int)Billing::BITMAP_7x7 + std::min((int)i, 4)));
+                if (!cur_bit) {
+                    break;
+                }
+            }
+            if (length > MAX_EXPONENT) {
+                custom_exit(ExitCode::COEFFICIENT_OUT_OF_RANGE);
+            }
+            if (length != 0) {
+                auto& sign_prob = probability_tables.sign_array_7x7(pt, coord, prior);
+                encoder.put(coef >= 0 ? 1 : 0, sign_prob, Billing::SIGN_7x7);
+                --num_nonzeros_left_7x7;
+                eob_x = std::max(eob_x, (uint8_t)b_x);
+                eob_y = std::max(eob_y, (uint8_t)b_y);
+            }
+            if (length > 1) {
+                auto res_prob = probability_tables.residual_noise_array_7x7(pt, coord, prior);
+                assert((abs_coef & (1 << (length - 1))) && "Biggest bit must be set");
+                assert((abs_coef & (1 << (length))) == 0 && "Beyond Biggest bit must be zero");
+
+                for (int i = length - 2; i >= 0; --i) {
+                    encoder.put((abs_coef & (1 << i)), res_prob.at(i), Billing::RES_7x7);
+                }
+            }
+        }
+    }
+    encode_edge(context, encoder, probability_tables, num_nonzeros_7x7, eob_x, eob_y, pt);
+
+    Sirikata::AlignedArray1d<int16_t, 64> outp_sans_dc;
+    int uncertainty = 0; // this is how far off our max estimate vs min estimate is
+    int uncertainty2 = 0;
+    int predicted_val;
+    if (advanced_dc_prediction) {
+        predicted_val =
+            probability_tables.adv_predict_dc_pix(context, outp_sans_dc.begin(), &uncertainty, &uncertainty2);
+    } else {
+        predicted_val = probability_tables.predict_dc_dct(context);
+    }
+    int adv_predicted_dc = probability_tables.adv_predict_or_unpredict_dc(context.here().dc(), false, predicted_val);
+
+    if (context.here().dc() != probability_tables.adv_predict_or_unpredict_dc(adv_predicted_dc, true, predicted_val)) {
+        custom_exit(ExitCode::COEFFICIENT_OUT_OF_RANGE); // value out of range
+    }
+    {
+        // do DC
+        int16_t coef = adv_predicted_dc;
+#ifdef TRACK_HISTOGRAM
+        ++histogram[1][coef];
+#endif
+        uint16_t abs_coef = abs(coef);
+        uint8_t length = bit_length(abs_coef);
+        uint16_t len_abs_mxm = uint16bit_length(abs(uncertainty));
+        uint16_t len_abs_offset_to_closest_edge = uint16bit_length(abs(uncertainty2));
+        if (!advanced_dc_prediction) {
+            ProbabilityTablesBase::CoefficientContext prior;
+
+            prior = probability_tables.update_coefficient_context7x7(0, raster_to_aligned.at(0), context.copy(),
+                                                                     num_nonzeros_7x7);
+            len_abs_mxm = prior.bsr_best_prior;
+            len_abs_offset_to_closest_edge = prior.num_nonzeros_bin;
+        }
+
+        auto exp_prob = probability_tables.exponent_array_dc(pt, len_abs_mxm, len_abs_offset_to_closest_edge);
+        for (unsigned int i = 0; i < MAX_EXPONENT; ++i) {
+            bool cur_bit = (length != i);
+            encoder.put(cur_bit, exp_prob.at(i), (Billing)((int)Billing::EXP0_DC + std::min(i, 4U)));
+            if (!cur_bit) {
+                break;
+            }
+        }
+        if (length > MAX_EXPONENT) {
+            custom_exit(ExitCode::COEFFICIENT_OUT_OF_RANGE);
+        }
+        if (length != 0) {
+            auto& sign_prob = probability_tables.sign_array_dc(pt, uncertainty,
+                                                               // nb: needs mxm
+                                                               // value, not abs
+                                                               uncertainty2);
+            encoder.put(coef >= 0 ? 1 : 0, sign_prob, Billing::SIGN_DC);
+        }
+        if (length > 1) {
+            auto res_prob = probability_tables.residual_array_dc(pt, len_abs_mxm, len_abs_offset_to_closest_edge);
+            assert((abs_coef & (1 << (length - 1))) && "Biggest bit must be set");
+            assert((abs_coef & (1 << (length))) == 0 && "Beyond Biggest bit must be zero");
+            for (int i = length - 2; i >= 0; --i) {
+                encoder.put((abs_coef & (1 << i)), res_prob.at(i), Billing::RES_DC);
+            }
+        }
+    }
+    {
+        int dc = context.here().dc();
+        context.num_nonzeros_here->set_horizontal(outp_sans_dc.begin(),
+                                                  ProbabilityTablesBase::quantization_table((int)color), dc);
+        context.num_nonzeros_here->set_vertical(outp_sans_dc.begin(),
+                                                ProbabilityTablesBase::quantization_table((int)color), dc);
+    }
+
+    if ((!g_threaded) && LeptonDebug::raw_YCbCr[(int)color]) {
+        int16_t outp[64];
+        idct(context.here(), ProbabilityTablesBase::quantization_table((int)color), outp, false);
+        for (int i = 0; i < 64; ++i) {
+            outp[i] >>= 3;
+        }
+
+        double delta = 0;
+        for (int i = 0; i < 64; ++i) {
+            delta += outp[i] - outp_sans_dc[i];
+            // fprintf (stderr, "%d + %d = %d\n", outp_sans_dc[i], context.here().dc(), outp[i]);
+        }
+        delta /= 64;
+        // fprintf (stderr, "==== %f = %f =?= %d\n", delta, delta * 8, context.here().dc());
+
+        int debug_width = LeptonDebug::getDebugWidth((int)color);
+        int offset = k_debug_block[(int)color];
+        for (int y = 0; y < 8; ++y) {
+            for (int x = 0; x < 8; ++x) {
+                LeptonDebug::raw_YCbCr[(int)color][offset + y * debug_width + x] =
+                    std::max(std::min(outp[(y << 3) + x] + 128, 255), 0);
+            }
+        }
+        k_debug_block[(int)color] += 8;
+        if (k_debug_block[(int)color] % debug_width == 0) {
+            k_debug_block[(int)color] += debug_width * 7;
+        }
+    }
+}
+#ifdef ALLOW_FOUR_COLORS
+template void serialize_tokens(ConstBlockContext,
+                               BoolEncoder&,
+                               ProbabilityTables<false, BlockType::Ck>&,
+                               ProbabilityTablesBase&);
+template void serialize_tokens(ConstBlockContext,
+                               BoolEncoder&,
+                               ProbabilityTables<true, BlockType::Ck>&,
+                               ProbabilityTablesBase&);
+#endif
+
+template void serialize_tokens(ConstBlockContext,
+                               BoolEncoder&,
+                               ProbabilityTables<false, BlockType::Y>&,
+                               ProbabilityTablesBase&);
+template void serialize_tokens(ConstBlockContext,
+                               BoolEncoder&,
+                               ProbabilityTables<false, BlockType::Cb>&,
+                               ProbabilityTablesBase&);
+template void serialize_tokens(ConstBlockContext,
+                               BoolEncoder&,
+                               ProbabilityTables<false, BlockType::Cr>&,
+                               ProbabilityTablesBase&);
+template void serialize_tokens(ConstBlockContext,
+                               BoolEncoder&,
+                               ProbabilityTables<true, BlockType::Y>&,
+                               ProbabilityTablesBase&);
+template void serialize_tokens(ConstBlockContext,
+                               BoolEncoder&,
+                               ProbabilityTables<true, BlockType::Cb>&,
+                               ProbabilityTablesBase&);
+template void serialize_tokens(ConstBlockContext,
+                               BoolEncoder&,
+                               ProbabilityTables<true, BlockType::Cr>&,
+                               ProbabilityTablesBase&);
diff --git a/codec/L2/demos/leptonEnc/host/vp8/encoder/encoder.hh b/codec/L2/demos/leptonEnc/host/vp8/encoder/encoder.hh
new file mode 100644
index 0000000000..b420cc7624
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/encoder/encoder.hh
@@ -0,0 +1,89 @@
+/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+#ifndef ENCODER_HH
+#define ENCODER_HH
+#include "model.hh"
+template <bool all_neighbors_present, BlockType color>
+void serialize_tokens(ConstBlockContext context,
+                      BoolEncoder& encoder,
+                      ProbabilityTables<all_neighbors_present, color>& probability_tables,
+                      ProbabilityTablesBase&);
+void serialize_tokens(BlockType color,
+                      bool left_present,
+                      bool above_present,
+                      bool above_right_present,
+                      ConstBlockContext context,
+                      BoolEncoder& encoder,
+                      ProbabilityTablesBase& pt);
+
+void serialize_tokens(BlockType color,
+                      bool left_present,
+                      bool above_present,
+                      bool above_right_present,
+                      uint8_t num_nonzeros_7x7,
+                      uint8_t eob_x,
+                      uint8_t eob_y,
+                      ConstBlockContext context,
+                      BoolEncoder& encoder,
+                      ProbabilityTablesBase& pt);
+
+void serialize_tokens_77(BlockType color,
+                         bool left_present,
+                         bool above_present,
+                         bool above_right_present,
+                         uint8_t num_nonzeros_7x7,
+                         uint8_t eob_x,
+                         uint8_t eob_y,
+                         ConstBlockContext context,
+                         BoolEncoder& encoder,
+                         ProbabilityTablesBase& pt);
+void serialize_tokens_77(ap_uint<1> color,
+                         bool left_present,
+                         bool above_present,
+                         bool above_right_present,
+                         uint8_t num_nonzeros_7x7,
+                         uint8_t num_nonzeros_above,
+                         uint8_t num_nonzeros_left,
+                         int16_t coef_here[64],
+                         int16_t coef_left[64],
+                         int16_t coef_above[64],
+                         int16_t coef_above_left[64],
+                         // ConstBlockContext context,
+                         BoolEncoder& encoder);
+
+void serialize_tokens_edges(BlockType color,
+                            bool left_present,
+                            bool above_present,
+                            bool above_right_present,
+                            uint8_t num_nonzeros_7x7,
+                            uint8_t eob_x,
+                            uint8_t eob_y,
+                            int16_t coef_here[64],
+                            int16_t coef_left[64],
+                            int16_t coef_above[64],
+                            // int16_t coef_above_left[64],
+                            // ConstBlockContext context,
+                            BoolEncoder& encoder);
+
+void serialize_tokens_dc(BlockType color,
+                         bool left_present,
+                         bool above_present,
+                         int16_t dc,
+                         uint16_t q0,
+                         int16_t est_v[8],
+                         int16_t est_h[8],
+                         // struct_ctx_edge* pctx_edge,
+                         BoolEncoder& encoder);
+void serialize_tokens_dc(BlockType color,
+                         bool left_present,
+                         bool above_present,
+                         hls::stream<coef_t>& str_dc_in,
+                         uint16_t q0,
+                         // int16_t outp_sans_dc [64],
+                         // int16_t est_v[8],
+                         // int16_t est_h[8],
+                         hls::stream<pix_edge_t>& str_est_v,
+                         hls::stream<pix_edge_t>& str_est_h,
+                         // struct_ctx_edge* p_ctx_edge,
+                         // ConstBlockContext context,
+                         BoolEncoder& encoder);
+#endif /* ENCODER_HH */
diff --git a/codec/L2/demos/leptonEnc/host/vp8/encoder/vpx_bool_writer.hh b/codec/L2/demos/leptonEnc/host/vp8/encoder/vpx_bool_writer.hh
new file mode 100644
index 0000000000..17398cacab
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/encoder/vpx_bool_writer.hh
@@ -0,0 +1,172 @@
+#include "../util/options.hh"
+#include "boolwriter.hh"
+#include "../../io/MuxReader.hh"
+#include "../../other/loop_stt.h"
+#include "XModified.hpp"
+#include "XAcc_dc.hpp"
+extern LoopNodeFactory g_loops;
+class VPXBoolWriter {
+   public:
+    hls::stream<tmp_struct> str_77;
+    hls::stream<tmp_struct> str_edges;
+    hls::stream<tmp_struct> str_dc;
+    hls::stream<int> str_77_cnt;
+    hls::stream<int> str_edges_cnt;
+    hls::stream<int> str_dc_cnt;
+    int cnt_77, cnt_edges, cnt_dc;
+    vpx_writer boolwriter;
+    Sirikata::MuxReader::ResizableByteBuffer output_;
+
+   private:
+//    vpx_writer boolwriter;
+//    Sirikata::MuxReader::ResizableByteBuffer output_;
+#ifdef DEBUG_ARICODER
+    bool any_written;
+#endif
+    enum { MIN_SIZE = 1024 * 1024 };
+    enum { SIZE_CHECK = 0xfff00000 };
+
+   public:
+    VPXBoolWriter() {
+#ifdef DEBUG_ARICODER
+        any_written = false;
+#endif
+        static_assert(MIN_SIZE & SIZE_CHECK, "min size must be caught by the size check, so allocations happen after");
+        static_assert(((MIN_SIZE - 1) & SIZE_CHECK) == 0, "min size -1 must not be caught by the size check");
+    }
+    void init() {
+#ifdef DEBUG_ARICODER
+        always_assert(!any_written);
+#endif
+        /*	output_.resize((std::max((unsigned int)MIN_SIZE,
+                                         std::min((unsigned int)4096 * 1024,
+                                                  (unsigned int)(5120 * 1024 / NUM_THREADS))))
+                               + 1024);*/
+        output_.resize(805306368);
+        //        vpx_start_encode(&boolwriter, output_.data());
+    }
+    void put(const bool value, Branch& branch, Billing bill) {
+        g_loops.START("PROC:put of AC", PROC);
+        g_loops.CNT();
+        g_loops.END();
+#ifdef DEBUG_ARICODER
+        if (!any_written) {
+            any_written = true;
+            static int count = 0;
+            w_bitcount = count * 500000000;
+            ++count;
+        }
+#endif
+        vpx_write(&boolwriter, value, branch.prob(), bill);
+        if (__builtin_expect(boolwriter.pos & SIZE_CHECK, false)) {
+            // check if we're out of buffer space
+            if (boolwriter.pos + 128 > output_.size()) {
+                output_.resize(output_.size() * 2);
+                boolwriter.buffer = &output_[0]; // reset buffer
+            }
+        }
+        branch.record_obs_and_update(value);
+    }
+    //    stt_dis tmp_dis;//to statistic the distance between two branch
+    void put(const bool value, hls_Branch* branch, Billing bill) {
+        //    	tmp_dis.get_dis(branch);
+        g_loops.START("PROC:put of AC", PROC);
+        g_loops.CNT();
+        g_loops.END();
+        vpx_write(&boolwriter, value, branch->prob2(), bill);
+        /*       if (__builtin_expect(boolwriter.pos & SIZE_CHECK, false)) {
+                   // check if we're out of buffer space
+                   if (boolwriter.pos + 128 > output_.size()) {
+                       output_.resize(output_.size() * 2);
+                       boolwriter.buffer = &output_[0]; //reset buffer
+                   }
+               }*/
+        // uint8_t p_old = branch->prob();
+
+        branch->record_obs_and_update(value);
+        // uint8_t p_new = branch->prob();
+        // Shift_table[p_old][value]=p_new;
+        // if(p_old==128 && value==false){
+        // 	if(p_new!=170)
+        // 		p_new=p_new;
+        //}
+    }
+    void put_77(const bool value, hls_Branch* branch, Billing bill) {
+        tmp_struct tt = {value, branch, (int)bill};
+        this->cnt_77++;
+        this->str_77.write(tt);
+        /*
+        vpx_write(&boolwriter, value, branch->prob2(), bill);
+        if (__builtin_expect(boolwriter.pos & SIZE_CHECK, false)) {
+            // check if we're out of buffer space
+            if (boolwriter.pos + 128 > output_.size()) {
+                output_.resize(output_.size() * 2);
+                boolwriter.buffer = &output_[0]; //reset buffer
+            }
+        }
+
+        branch->record_obs_and_update(value);*/
+    }
+    void put_edges(const bool value, hls_Branch* branch, Billing bill) {
+        tmp_struct tt = {value, branch, (int)bill};
+        this->cnt_edges++;
+        this->str_edges.write(tt);
+    }
+    void put_dc(const bool value, hls_Branch* branch, Billing bill) {
+        tmp_struct tt = {value, branch, (int)bill};
+        this->cnt_dc++;
+        this->str_dc.write(tt);
+    }
+    int end_77() {
+        this->str_77_cnt.write(this->cnt_77);
+        return this->cnt_77;
+    }
+    int end_edges() {
+        this->str_edges_cnt.write(this->cnt_edges);
+        return this->cnt_edges;
+    }
+    int end_dc() {
+        this->str_dc_cnt.write(this->cnt_dc);
+        return this->cnt_dc;
+    }
+    void ColllectPut() {
+        int cnt = this->str_77_cnt.read();
+        for (int i = 0; i < cnt; i++) {
+            tmp_struct tt = this->str_77.read();
+            this->put(tt.value, tt.branch, (Billing)tt.bill);
+        }
+
+        cnt = this->str_edges_cnt.read();
+        for (int i = 0; i < cnt; i++) {
+            tmp_struct tt = this->str_edges.read();
+            this->put(tt.value, tt.branch, (Billing)tt.bill);
+        }
+
+        cnt = this->str_dc_cnt.read();
+        for (int i = 0; i < cnt; i++) {
+            tmp_struct tt = this->str_dc.read();
+            this->put(tt.value, tt.branch, (Billing)tt.bill);
+        }
+    }
+
+    void put(const bool value, Branch& branch, hls_Branch* branch2, Billing bill) {
+        vpx_write(&boolwriter, value, branch.prob(), bill);
+        if (__builtin_expect(boolwriter.pos & SIZE_CHECK, false)) {
+            // check if we're out of buffer space
+            if (boolwriter.pos + 128 > output_.size()) {
+                output_.resize(output_.size() * 2);
+                boolwriter.buffer = &output_[0]; // reset buffer
+            }
+        }
+        uint8_t p1 = branch.prob();
+        uint8_t p2 = branch2->prob2();
+        if (p1 != p2) p2 = p1;
+        branch.record_obs_and_update(value);
+        branch2->record_obs_and_update(value);
+    }
+    void finish(Sirikata::MuxReader::ResizableByteBuffer& finish) {
+        //        vpx_stop_encode(&boolwriter);
+        //        output_.resize(boolwriter.pos);
+        finish.swap(output_);
+    }
+};
diff --git a/codec/L2/demos/leptonEnc/host/vp8/model/JpegArithmeticCoder.cc b/codec/L2/demos/leptonEnc/host/vp8/model/JpegArithmeticCoder.cc
new file mode 100644
index 0000000000..a2f9092582
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/model/JpegArithmeticCoder.cc
@@ -0,0 +1,428 @@
+/*  Sirikata Jpeg Texture Transfer -- Texture Transfer management system
+ *  JpegArithmeticCoder.cc
+ *
+ *  Copyright (c) 2015, The Sirikata Authors
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// This file was adapted from libjpeg-turbo
+
+/*
+Most of libjpeg-turbo inherits the non-restrictive, BSD-style license used by
+libjpeg (see README.)  The TurboJPEG wrapper (both C and Java versions) and
+associated test programs bear a similar license, which is reproduced below:
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+- Neither the name of the libjpeg-turbo Project nor the names of its
+  contributors may be used to endorse or promote products derived from this
+  software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+#include <assert.h>
+#include "JpegArithmeticCoder.hh"
+
+/* The following #define specifies the packing of the four components
+ * into the compact INT32 representation.
+ * Note that this formula must match the actual arithmetic encoder
+ * and decoder implementation.  The implementation has to be changed
+ * if this formula is changed.
+ * The current organization is leaned on Markus Kuhn's JBIG
+ * implementation (jbig_tab.c).
+ */
+
+#define V(i, a, b, c, d) (((int32_t)a << 16) | ((int32_t)c << 8) | ((int32_t)d << 7) | b)
+
+const int32_t jpeg_aritab[256] = {
+    /*
+     * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
+     */
+    V(0, 0x5a1d, 1, 1, 1), V(1, 0x2586, 14, 2, 0), V(2, 0x1114, 16, 3, 0), V(3, 0x080b, 18, 4, 0),
+    V(4, 0x03d8, 20, 5, 0), V(5, 0x01da, 23, 6, 0), V(6, 0x00e5, 25, 7, 0), V(7, 0x006f, 28, 8, 0),
+    V(8, 0x0036, 30, 9, 0), V(9, 0x001a, 33, 10, 0), V(10, 0x000d, 35, 11, 0), V(11, 0x0006, 9, 12, 0),
+    V(12, 0x0003, 10, 13, 0), V(13, 0x0001, 12, 13, 0), V(14, 0x5a7f, 15, 15, 1), V(15, 0x3f25, 36, 16, 0),
+    V(16, 0x2cf2, 38, 17, 0), V(17, 0x207c, 39, 18, 0), V(18, 0x17b9, 40, 19, 0), V(19, 0x1182, 42, 20, 0),
+    V(20, 0x0cef, 43, 21, 0), V(21, 0x09a1, 45, 22, 0), V(22, 0x072f, 46, 23, 0), V(23, 0x055c, 48, 24, 0),
+    V(24, 0x0406, 49, 25, 0), V(25, 0x0303, 51, 26, 0), V(26, 0x0240, 52, 27, 0), V(27, 0x01b1, 54, 28, 0),
+    V(28, 0x0144, 56, 29, 0), V(29, 0x00f5, 57, 30, 0), V(30, 0x00b7, 59, 31, 0), V(31, 0x008a, 60, 32, 0),
+    V(32, 0x0068, 62, 33, 0), V(33, 0x004e, 63, 34, 0), V(34, 0x003b, 32, 35, 0), V(35, 0x002c, 33, 9, 0),
+    V(36, 0x5ae1, 37, 37, 1), V(37, 0x484c, 64, 38, 0), V(38, 0x3a0d, 65, 39, 0), V(39, 0x2ef1, 67, 40, 0),
+    V(40, 0x261f, 68, 41, 0), V(41, 0x1f33, 69, 42, 0), V(42, 0x19a8, 70, 43, 0), V(43, 0x1518, 72, 44, 0),
+    V(44, 0x1177, 73, 45, 0), V(45, 0x0e74, 74, 46, 0), V(46, 0x0bfb, 75, 47, 0), V(47, 0x09f8, 77, 48, 0),
+    V(48, 0x0861, 78, 49, 0), V(49, 0x0706, 79, 50, 0), V(50, 0x05cd, 48, 51, 0), V(51, 0x04de, 50, 52, 0),
+    V(52, 0x040f, 50, 53, 0), V(53, 0x0363, 51, 54, 0), V(54, 0x02d4, 52, 55, 0), V(55, 0x025c, 53, 56, 0),
+    V(56, 0x01f8, 54, 57, 0), V(57, 0x01a4, 55, 58, 0), V(58, 0x0160, 56, 59, 0), V(59, 0x0125, 57, 60, 0),
+    V(60, 0x00f6, 58, 61, 0), V(61, 0x00cb, 59, 62, 0), V(62, 0x00ab, 61, 63, 0), V(63, 0x008f, 61, 32, 0),
+    V(64, 0x5b12, 65, 65, 1), V(65, 0x4d04, 80, 66, 0), V(66, 0x412c, 81, 67, 0), V(67, 0x37d8, 82, 68, 0),
+    V(68, 0x2fe8, 83, 69, 0), V(69, 0x293c, 84, 70, 0), V(70, 0x2379, 86, 71, 0), V(71, 0x1edf, 87, 72, 0),
+    V(72, 0x1aa9, 87, 73, 0), V(73, 0x174e, 72, 74, 0), V(74, 0x1424, 72, 75, 0), V(75, 0x119c, 74, 76, 0),
+    V(76, 0x0f6b, 74, 77, 0), V(77, 0x0d51, 75, 78, 0), V(78, 0x0bb6, 77, 79, 0), V(79, 0x0a40, 77, 48, 0),
+    V(80, 0x5832, 80, 81, 1), V(81, 0x4d1c, 88, 82, 0), V(82, 0x438e, 89, 83, 0), V(83, 0x3bdd, 90, 84, 0),
+    V(84, 0x34ee, 91, 85, 0), V(85, 0x2eae, 92, 86, 0), V(86, 0x299a, 93, 87, 0), V(87, 0x2516, 86, 71, 0),
+    V(88, 0x5570, 88, 89, 1), V(89, 0x4ca9, 95, 90, 0), V(90, 0x44d9, 96, 91, 0), V(91, 0x3e22, 97, 92, 0),
+    V(92, 0x3824, 99, 93, 0), V(93, 0x32b4, 99, 94, 0), V(94, 0x2e17, 93, 86, 0), V(95, 0x56a8, 95, 96, 1),
+    V(96, 0x4f46, 101, 97, 0), V(97, 0x47e5, 102, 98, 0), V(98, 0x41cf, 103, 99, 0), V(99, 0x3c3d, 104, 100, 0),
+    V(100, 0x375e, 99, 93, 0), V(101, 0x5231, 105, 102, 0), V(102, 0x4c0f, 106, 103, 0), V(103, 0x4639, 107, 104, 0),
+    V(104, 0x415e, 103, 99, 0), V(105, 0x5627, 105, 106, 1), V(106, 0x50e7, 108, 107, 0), V(107, 0x4b85, 109, 103, 0),
+    V(108, 0x5597, 110, 109, 0), V(109, 0x504f, 111, 107, 0), V(110, 0x5a10, 110, 111, 1), V(111, 0x5522, 112, 109, 0),
+    V(112, 0x59eb, 112, 111, 1),
+    /*
+     * This last entry is used for fixed probability estimate of 0.5
+     * as recommended in Section 10.3 Table 5 of ITU-T Rec. T.851.
+     */
+    V(113, 0x5a1d, 113, 113, 0),
+
+    V(114, 0x5b12, 65, 65, 1), V(115, 0x5b12, 65, 65, 1), V(116, 0x5b12, 65, 65, 1), V(117, 0x5b12, 65, 65, 1),
+    V(118, 0x5b12, 65, 65, 1), V(119, 0x5b12, 65, 65, 1), V(120, 0x5b12, 65, 65, 1), V(121, 0x5b12, 65, 65, 1),
+    V(122, 0x5b12, 65, 65, 1), V(123, 0x5b12, 65, 65, 1), V(124, 0x5b12, 65, 65, 1), V(125, 0x5b12, 65, 65, 1),
+    V(126, 0x5b12, 65, 65, 1), V(127, 0x5b12, 65, 65, 1), V(128, 0x5b12, 65, 65, 1), V(129, 0x5b12, 65, 65, 1),
+    V(130, 0x5b12, 65, 65, 1), V(131, 0x5b12, 65, 65, 1), V(132, 0x5b12, 65, 65, 1), V(133, 0x5b12, 65, 65, 1),
+    V(134, 0x5b12, 65, 65, 1), V(135, 0x5b12, 65, 65, 1), V(136, 0x5b12, 65, 65, 1), V(137, 0x5b12, 65, 65, 1),
+    V(138, 0x5b12, 65, 65, 1), V(139, 0x5b12, 65, 65, 1), V(140, 0x5b12, 65, 65, 1), V(141, 0x5b12, 65, 65, 1),
+    V(142, 0x5b12, 65, 65, 1), V(143, 0x5b12, 65, 65, 1), V(144, 0x5b12, 65, 65, 1), V(145, 0x5b12, 65, 65, 1),
+    V(146, 0x5b12, 65, 65, 1), V(147, 0x5b12, 65, 65, 1), V(148, 0x5b12, 65, 65, 1), V(149, 0x5b12, 65, 65, 1),
+    V(150, 0x5b12, 65, 65, 1), V(151, 0x5b12, 65, 65, 1), V(152, 0x5b12, 65, 65, 1), V(153, 0x5b12, 65, 65, 1),
+    V(154, 0x5b12, 65, 65, 1), V(155, 0x5b12, 65, 65, 1), V(156, 0x5b12, 65, 65, 1), V(157, 0x5b12, 65, 65, 1),
+    V(158, 0x5b12, 65, 65, 1), V(159, 0x5b12, 65, 65, 1), V(160, 0x5b12, 65, 65, 1),
+
+    V(161, 0x5b12, 65, 65, 1), V(162, 0x5b12, 65, 65, 1), V(163, 0x5b12, 65, 65, 1), V(164, 0x5b12, 65, 65, 1),
+    V(165, 0x5b12, 65, 65, 1), V(166, 0x5b12, 65, 65, 1), V(167, 0x5b12, 65, 65, 1), V(168, 0x5b12, 65, 65, 1),
+    V(169, 0x5b12, 65, 65, 1),
+
+    V(170, 0x5b12, 65, 65, 1), V(171, 0x5b12, 65, 65, 1), V(172, 0x5b12, 65, 65, 1), V(173, 0x5b12, 65, 65, 1),
+    V(174, 0x5b12, 65, 65, 1), V(175, 0x5b12, 65, 65, 1), V(176, 0x5b12, 65, 65, 1), V(177, 0x5b12, 65, 65, 1),
+    V(178, 0x5b12, 65, 65, 1), V(179, 0x5b12, 65, 65, 1), V(180, 0x5b12, 65, 65, 1), V(181, 0x5b12, 65, 65, 1),
+    V(182, 0x5b12, 65, 65, 1), V(183, 0x5b12, 65, 65, 1), V(184, 0x5b12, 65, 65, 1), V(185, 0x5b12, 65, 65, 1),
+    V(186, 0x5b12, 65, 65, 1), V(187, 0x5b12, 65, 65, 1), V(188, 0x5b12, 65, 65, 1), V(189, 0x5b12, 65, 65, 1),
+    V(190, 0x5b12, 65, 65, 1), V(191, 0x5b12, 65, 65, 1), V(192, 0x5b12, 65, 65, 1), V(193, 0x5b12, 65, 65, 1),
+    V(194, 0x5b12, 65, 65, 1), V(195, 0x5b12, 65, 65, 1), V(196, 0x5b12, 65, 65, 1), V(197, 0x5b12, 65, 65, 1),
+    V(198, 0x5b12, 65, 65, 1), V(199, 0x5b12, 65, 65, 1),
+
+    V(200, 0x5b12, 65, 65, 1), V(201, 0x5b12, 65, 65, 1), V(202, 0x5b12, 65, 65, 1), V(203, 0x5b12, 65, 65, 1),
+    V(204, 0x5b12, 65, 65, 1), V(205, 0x5b12, 65, 65, 1), V(206, 0x5b12, 65, 65, 1), V(207, 0x5b12, 65, 65, 1),
+    V(298, 0x5b12, 65, 65, 1), V(209, 0x5b12, 65, 65, 1), V(210, 0x5b12, 65, 65, 1), V(211, 0x5b12, 65, 65, 1),
+    V(212, 0x5b12, 65, 65, 1), V(213, 0x5b12, 65, 65, 1), V(214, 0x5b12, 65, 65, 1), V(215, 0x5b12, 65, 65, 1),
+    V(216, 0x5b12, 65, 65, 1), V(217, 0x5b12, 65, 65, 1), V(218, 0x5b12, 65, 65, 1), V(219, 0x5b12, 65, 65, 1),
+    V(220, 0x5b12, 65, 65, 1), V(221, 0x5b12, 65, 65, 1), V(222, 0x5b12, 65, 65, 1), V(223, 0x5b12, 65, 65, 1),
+    V(224, 0x5b12, 65, 65, 1), V(225, 0x5b12, 65, 65, 1), V(226, 0x5b12, 65, 65, 1), V(227, 0x5b12, 65, 65, 1),
+    V(228, 0x5b12, 65, 65, 1), V(229, 0x5b12, 65, 65, 1), V(230, 0x5b12, 65, 65, 1), V(231, 0x5b12, 65, 65, 1),
+    V(232, 0x5b12, 65, 65, 1), V(233, 0x5b12, 65, 65, 1), V(234, 0x5b12, 65, 65, 1), V(235, 0x5b12, 65, 65, 1),
+    V(236, 0x5b12, 65, 65, 1), V(237, 0x5b12, 65, 65, 1), V(238, 0x5b12, 65, 65, 1), V(239, 0x5b12, 65, 65, 1),
+    V(240, 0x5b12, 65, 65, 1), V(241, 0x5b12, 65, 65, 1), V(242, 0x5b12, 65, 65, 1), V(243, 0x5b12, 65, 65, 1),
+    V(244, 0x5b12, 65, 65, 1), V(245, 0x5b12, 65, 65, 1), V(246, 0x5b12, 65, 65, 1), V(247, 0x5b12, 65, 65, 1),
+    V(248, 0x5b12, 65, 65, 1), V(249, 0x5b12, 65, 65, 1), V(250, 0x5b12, 65, 65, 1), V(251, 0x5b12, 65, 65, 1),
+    V(252, 0x5b12, 65, 65, 1), V(253, 0x5b12, 65, 65, 1), V(254, 0x5b12, 65, 65, 1), V(255, 0x5b12, 65, 65, 1),
+
+};
+
+namespace Sirikata {
+static void emit_byte(int byte_data, DecoderWriter* cinfo) {
+    unsigned char data[1] = {(uint8_t)byte_data};
+    cinfo->Write(data, 1);
+}
+static int get_byte(DecoderReader* cinfo) {
+    static int num_bad = 0;
+    unsigned char data[1] = {0};
+    std::pair<int, JpegError> x = cinfo->Read(data, 1);
+    if (x.first == 0 || x.second != JpegError::nil()) {
+        if (num_bad++ % 2 == 0) {
+            return 0xff;
+        }
+        return 0xd9;
+    }
+    return data[0];
+}
+/*
+ * The core arithmetic encoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Parameter 'val' to be encoded may be 0 or 1 (binary decision).
+ *
+ * Note: I've added full "Pacman" termination support to the
+ * byte output routines, which is equivalent to the optional
+ * Discard_final_zeros procedure (Figure D.15) in the spec.
+ * Thus, we always produce the shortest possible output
+ * stream compliant to the spec (no trailing zero bytes,
+ * except for FF stuffing).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+void ArithmeticCoder::arith_encode(DecoderWriter* cinfo, unsigned char* st, bool val) {
+    ArithmeticCoder* e = this;
+    unsigned char nl, nm;
+    int32_t qe, temp;
+    int sv;
+
+    /* Fetch values from our compact representation of Table D.2:
+     * Qe values and probability estimation state machine
+     */
+    sv = *st;
+    qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */
+    assert(qe != 0);
+    nl = qe & 0xFF;
+    qe >>= 8; /* Next_Index_LPS + Switch_MPS */
+    nm = qe & 0xFF;
+    qe >>= 8; /* Next_Index_MPS */
+
+    /* Encode & estimation procedures per sections D.1.4 & D.1.5 */
+    e->a -= qe;
+    assert(e->a > 0);
+    if ((int)val != (sv >> 7)) {
+        /* Encode the less probable symbol */
+        if (e->a >= qe) {
+            /* If the interval size (qe) for the less probable symbol (LPS)
+             * is larger than the interval size for the MPS, then exchange
+             * the two symbols for coding efficiency, otherwise code the LPS
+             * as usual: */
+            e->c += e->a;
+            e->a = qe;
+            assert(e->a > 0);
+        }
+        *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */
+    } else {
+        /* Encode the more probable symbol */
+        if (e->a >= 0x8000L) return; /* A >= 0x8000 -> ready, no renormalization required */
+        if (e->a < qe) {
+            /* If the interval size (qe) for the less probable symbol (LPS)
+             * is larger than the interval size for the MPS, then exchange
+             * the two symbols for coding efficiency: */
+            e->c += e->a;
+            e->a = qe;
+        }
+        *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */
+    }
+    assert(e->a > 0);
+    /* Renormalization & data output per section D.1.6 */
+    do {
+        e->a <<= 1;
+        e->c <<= 1;
+        if (--e->ct == 0) {
+            /* Another byte is ready for output */
+            temp = e->c >> 19;
+            if (temp > 0xFF) {
+                /* Handle overflow over all stacked 0xFF bytes */
+                if (e->buffer >= 0) {
+                    if (e->zc) do
+                            emit_byte(0x00, cinfo);
+                        while (--e->zc);
+                    emit_byte(e->buffer + 1, cinfo);
+                    if (e->buffer + 1 == 0xFF) emit_byte(0x00, cinfo);
+                }
+                e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */
+                e->sc = 0;
+                /* Note: The 3 spacer bits in the C register guarantee
+                 * that the new buffer byte can't be 0xFF here
+                 * (see page 160 in the P&M JPEG book). */
+                e->buffer = temp & 0xFF; /* new output byte, might overflow later */
+            } else if (temp == 0xFF) {
+                ++e->sc; /* stack 0xFF byte (which might overflow later) */
+            } else {
+                /* Output all stacked 0xFF bytes, they will not overflow any more */
+                if (e->buffer == 0)
+                    ++e->zc;
+                else if (e->buffer >= 0) {
+                    if (e->zc) do
+                            emit_byte(0x00, cinfo);
+                        while (--e->zc);
+                    emit_byte(e->buffer, cinfo);
+                }
+                if (e->sc) {
+                    if (e->zc) do
+                            emit_byte(0x00, cinfo);
+                        while (--e->zc);
+                    do {
+                        emit_byte(0xFF, cinfo);
+                        emit_byte(0x00, cinfo);
+                    } while (--e->sc);
+                }
+                e->buffer = temp & 0xFF; /* new output byte (can still overflow) */
+            }
+            e->c &= 0x7FFFFL;
+            e->ct += 8;
+        }
+    } while (e->a < 0x8000L);
+}
+
+void ArithmeticCoder::finish_encode(DecoderWriter* cinfo) {
+    ArithmeticCoder* e = this;
+    int32_t temp;
+
+    /* Section D.1.8: Termination of encoding */
+
+    /* Find the e->c in the coding interval with the largest
+     * number of trailing zero bits */
+    if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c)
+        e->c = temp + 0x8000L;
+    else
+        e->c = temp;
+    /* Send remaining bytes to output */
+    e->c <<= e->ct;
+    if (e->c & 0xF8000000L) {
+        /* One final overflow has to be handled */
+        if (e->buffer >= 0) {
+            if (e->zc) do
+                    emit_byte(0x00, cinfo);
+                while (--e->zc);
+            emit_byte(e->buffer + 1, cinfo);
+            if (e->buffer + 1 == 0xFF) emit_byte(0x00, cinfo);
+        }
+        e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */
+        e->sc = 0;
+    } else {
+        if (e->buffer == 0)
+            ++e->zc;
+        else if (e->buffer >= 0) {
+            if (e->zc) do
+                    emit_byte(0x00, cinfo);
+                while (--e->zc);
+            emit_byte(e->buffer, cinfo);
+        }
+        if (e->sc) {
+            if (e->zc) do
+                    emit_byte(0x00, cinfo);
+                while (--e->zc);
+            do {
+                emit_byte(0xFF, cinfo);
+                emit_byte(0x00, cinfo);
+            } while (--e->sc);
+        }
+    }
+    /* Output final bytes only if they are not 0x00 */
+    if (e->c & 0x7FFF800L) {
+        if (e->zc) /* output final pending zero bytes */
+            do
+                emit_byte(0x00, cinfo);
+            while (--e->zc);
+        emit_byte((e->c >> 19) & 0xFF, cinfo);
+        if (((e->c >> 19) & 0xFF) == 0xFF) emit_byte(0x00, cinfo);
+        if (e->c & 0x7F800L) {
+            emit_byte((e->c >> 11) & 0xFF, cinfo);
+            if (((e->c >> 11) & 0xFF) == 0xFF) emit_byte(0x00, cinfo);
+        }
+    }
+}
+bool ArithmeticCoder::arith_decode(DecoderReader* cinfo, unsigned char* st) {
+    ArithmeticCoder* e = this;
+    unsigned char nl, nm;
+    int32_t qe, temp;
+    int sv, data;
+
+    /* Renormalization & data input per section D.2.6 */
+    while (e->a < 0x8000L) {
+        if (--e->ct < 0) {
+            /* Need to fetch next data byte */
+            if (e->unread_marker)
+                data = 0; /* stuff zero data */
+            else {
+                data = get_byte(cinfo); /* read next input byte */
+                if (data == 0xFF) {     /* zero stuff or marker code */
+                    do
+                        data = get_byte(cinfo);
+                    while (data == 0xFF); /* swallow extra 0xFF bytes */
+                    if (data == 0)
+                        data = 0xFF; /* discard stuffed zero byte */
+                    else {
+                        /* Note: Different from the Huffman decoder, hitting
+                         * a marker while processing the compressed data
+                         * segment is legal in arithmetic coding.
+                         * The convention is to supply zero data
+                         * then until decoding is complete.
+                         */
+                        e->unread_marker = data;
+                        data = 0;
+                    }
+                }
+            }
+            e->c = (e->c << 8) | data; /* insert data into C register */
+            if ((e->ct += 8) < 0)      /* update bit shift counter */
+                /* Need more initial bytes */
+                if (++e->ct == 0)   /* Got 2 initial bytes -> re-init A and exit loop */
+                    e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
+        }
+        e->a <<= 1;
+    }
+
+    /* Fetch values from our compact representation of Table D.2:
+     * Qe values and probability estimation state machine
+     */
+    sv = *st;
+    qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */
+    nl = qe & 0xFF;
+    qe >>= 8; /* Next_Index_LPS + Switch_MPS */
+    nm = qe & 0xFF;
+    qe >>= 8; /* Next_Index_MPS */
+
+    /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
+    temp = e->a - qe;
+    e->a = temp;
+    temp <<= e->ct;
+    if (e->c >= temp) {
+        e->c -= temp;
+        /* Conditional LPS (less probable symbol) exchange */
+        if (e->a < qe) {
+            e->a = qe;
+            *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */
+        } else {
+            e->a = qe;
+            *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */
+            sv ^= 0x80;             /* Exchange LPS/MPS */
+        }
+    } else if (e->a < 0x8000L) {
+        /* Conditional MPS (more probable symbol) exchange */
+        if (e->a < qe) {
+            *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */
+            sv ^= 0x80;             /* Exchange LPS/MPS */
+        } else {
+            *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */
+        }
+    }
+
+    return sv >> 7;
+}
+}
diff --git a/codec/L2/demos/leptonEnc/host/vp8/model/JpegArithmeticCoder.hh b/codec/L2/demos/leptonEnc/host/vp8/model/JpegArithmeticCoder.hh
new file mode 100644
index 0000000000..320515e7a3
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/model/JpegArithmeticCoder.hh
@@ -0,0 +1,101 @@
+/*  Sirikata Jpeg Texture Transfer -- Texture Transfer management system
+ *  JpegArithmeticCoder.cc
+ *
+ *  Copyright (c) 2015, The Sirikata Authors
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// This file was adapted from libjpeg-turbo
+
+/*
+ * j[cd]arith.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Developed 1997-2009 by Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains portable arithmetic entropy encoding routines for JPEG
+ * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ */
+#ifndef _JPEG_ARITHMETIC_CODER_HH_
+#define _JPEG_ARITHMETIC_CODER_HH_
+
+#include "../../io/Reader.hh"
+
+namespace Sirikata {
+class DecoderWriter;
+class DecoderReader;
+class SIRIKATA_EXPORT ArithmeticCoder {
+    int32_t c; // C register, base of coding interval, layout as in sec D.1.3
+    int32_t a; // A register, normalized size of coding interval
+    int ct;    // bit shift counter, determines # bits left in bit buffer part of C
+    // init: ct = -16  run: ct = 0...7  error: ct = -1
+    // for output, determines whenb yte will be written
+
+    signed int buffer;
+    int sc;
+    int zc;
+
+    // deprecated state for reader
+    int unread_marker;
+
+   public:
+    ArithmeticCoder(bool encoding) {
+        zc = 0;
+        sc = 0;
+        buffer = -1;
+        ct = encoding ? 11 : -16;
+        c = 0;
+        a = encoding ? 0x10000L : 0;
+        unread_marker = 0;
+    }
+    void arith_encode(DecoderWriter* output, unsigned char* state, bool value);
+    void finish_encode(DecoderWriter* output);
+    bool arith_decode(DecoderReader* input, unsigned char* state);
+};
+class SIRIKATA_EXPORT ArithmeticWriter : ArithmeticCoder {
+    DecoderWriter* mBase;
+
+   public:
+    ArithmeticWriter(DecoderWriter* writer) : ArithmeticCoder(true) { mBase = writer; }
+    void WriteBit(unsigned char* state, bool value) { arith_encode(mBase, state, value); }
+    void Finish() { finish_encode(mBase); }
+};
+
+class SIRIKATA_EXPORT ArithmeticReader : ArithmeticCoder {
+    DecoderReader* mBase;
+
+   public:
+    ArithmeticReader(DecoderReader* reader) : ArithmeticCoder(false) { mBase = reader; }
+    bool ReadBit(unsigned char* state) { return arith_decode(mBase, state); }
+};
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/model/branch.hh b/codec/L2/demos/leptonEnc/host/vp8/model/branch.hh
new file mode 100644
index 0000000000..02fdc32bc6
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/model/branch.hh
@@ -0,0 +1,99 @@
+#ifndef _BRANCH_HH_
+#define _BRANCH_HH_
+#include "numeric.hh"
+#include <cmath>
+typedef uint8_t Probability;
+
+//#define VP8_ENCODER 1
+
+//#define JPEG_ENCODER
+// ^^^ if we want to try to use the JPEG spec arithmetic coder, uncomment above
+class Branch {
+   private:
+    uint8_t counts_[2];
+    Probability probability_;
+    friend class JpegBoolDecoder;
+    friend class JpegBoolEncoder;
+
+   public:
+    Probability prob() const { return probability_; }
+    void set_identity() {
+        counts_[0] = 1;
+        counts_[1] = 1;
+        probability_ = 128;
+    }
+    bool is_identity() const { return counts_[0] == 1 && counts_[1] == 1 && probability_ == 128; }
+    static Branch identity() {
+        Branch retval;
+        retval.set_identity();
+        return retval;
+    }
+    uint32_t true_count() const { return counts_[1]; }
+    uint32_t false_count() const { return counts_[0]; }
+    struct ProbUpdate {
+        struct ProbOutcome {
+            uint8_t log_prob;
+        };
+        uint8_t prob;
+        ProbOutcome next[2];
+        uint8_t& log_prob_false() { return next[0].log_prob; }
+        uint8_t& log_prob_true() { return next[1].log_prob; }
+    };
+
+#ifndef _WIN32
+    __attribute__((always_inline))
+#endif
+    void
+    record_obs_and_update(bool obs) {
+        /*
+        static bool pr = true;
+        if (pr) {
+            pr = false;
+            print_prob_update();
+            }*/
+        unsigned int fcount = counts_[0];
+        unsigned int tcount = counts_[1];
+        bool overflow = (counts_[obs]++ == 0xff);
+        if (__builtin_expect(overflow, 0)) { // check less than 512
+            bool neverseen = counts_[!obs] == 1;
+            if (neverseen) {
+                counts_[obs] = 0xff;
+                probability_ = obs ? 0 : 255;
+            } else {
+                counts_[0] = ((1 + (unsigned int)fcount) >> 1);
+                counts_[1] = ((1 + (unsigned int)tcount) >> 1);
+                counts_[obs] = 129;
+                probability_ = optimize(counts_[0] + counts_[1]);
+            }
+        } else {
+            probability_ = optimize(fcount + tcount + 1);
+        }
+    }
+    void normalize() {
+        counts_[0] = ((1 + (unsigned int)counts_[0]) >> 1);
+        counts_[1] = ((1 + (unsigned int)counts_[1]) >> 1);
+    }
+#ifndef _WIN32
+    __attribute__((always_inline))
+#endif
+    Probability
+    optimize(int sum) const {
+        assert(false_count() && true_count());
+#if 0
+      const int prob = (false_count() << 8) / sum;
+#else
+        const int prob = fast_divide18bit_by_10bit(false_count() << 8, sum);
+#endif
+        assert(prob >= 0);
+        assert(prob <= 255);
+
+        return (Probability)prob;
+
+#ifdef JPEG_ENCODER
+#error needs to be updated
+#endif
+    }
+
+    Branch() {}
+};
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/model/color_context.hh b/codec/L2/demos/leptonEnc/host/vp8/model/color_context.hh
new file mode 100644
index 0000000000..e15561f834
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/model/color_context.hh
@@ -0,0 +1,29 @@
+#ifndef _COLOR_CONTEXT_HH_
+#define _COLOR_CONTEXT_HH_
+
+enum class BlockType {
+    Y,
+    Cb,
+    Cr
+#ifdef ALLOW_FOUR_COLORS
+    ,
+    Ck
+#endif
+};
+
+class AlignedBlock;
+
+struct BlockColorContext {
+    uint8_t color; // 0 for Y 1 for Cb and 2 for Cr, 3 for (K in CMYK)
+#ifdef USE_COLOR_VALUES
+    const AlignedBlock* luminance[2][2];
+    const AlignedBlock* chroma;
+#endif
+};
+struct BlockColorContextIndices {
+#ifdef USE_COLOR_VALUES
+    Optional<std::pair<int, int> > luminanceIndex[2][2];
+    Optional<std::pair<int, int> > chromaIndex;
+#endif
+};
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/model/jpeg_meta.hh b/codec/L2/demos/leptonEnc/host/vp8/model/jpeg_meta.hh
new file mode 100644
index 0000000000..a6c3362582
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/model/jpeg_meta.hh
@@ -0,0 +1,240 @@
+#ifndef _JPEG_META_HH
+#define _JPEG_META_HH
+
+#include <vector>
+#include <type_traits>
+#ifndef __APPLE__
+#ifndef BSD
+#ifndef _WIN32
+#include <endian.h>
+#endif
+#endif
+#endif
+static const unsigned char zigzag[] = {0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42,
+                                       3,  8,  12, 17, 25, 30, 41, 43, 9,  11, 18, 24, 31, 40, 44, 53,
+                                       10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
+                                       21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63};
+static const unsigned char unzigzag[] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                         12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                         35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                         58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+static const unsigned char unzigzag49[] = {9,  10, 17, 25, 18, 11, 12, 19, 26, 33, 41, 34, 27, 20, 13, 14, 21,
+                                           28, 35, 42, 49, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58,
+                                           59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+
+// precalculated int base values for 8x8 dct scaled by 8192
+const int icos_base_8192_scaled[64] = {
+    8192,   8192,  8192,   8192,  8192, 8192,  8192,   8192,   11363, 9633,  6436,   2260,  -2260,
+    -6436,  -9633, -11363, 10703, 4433, -4433, -10703, -10703, -4433, 4433,  10703,  9633,  -2260,
+    -11363, -6436, 6436,   11363, 2260, -9633, 8192,   -8192,  -8192, 8192,  8192,   -8192, -8192,
+    8192,   6436,  -11363, 2260,  9633, -9633, -2260,  11363,  -6436, 4433,  -10703, 10703, -4433,
+    -4433,  10703, -10703, 4433,  2260, -6436, 9633,   -11363, 11363, -9633, 6436,   -2260,
+};
+
+const int icos_idct_linear_8192_scaled[64] = {
+    1024, 1420,  1338,  1204,  1024,  805,   554,   283,  1024, 1204,  554,   -283,  -1024, -1420, -1338, -805,
+    1024, 805,   -554,  -1420, -1024, 283,   1338,  1204, 1024, 283,   -1338, -805,  1024,  1204,  -554,  -1420,
+    1024, -283,  -1338, 805,   1024,  -1204, -554,  1420, 1024, -805,  -554,  1420,  -1024, -283,  1338,  -1204,
+    1024, -1204, 554,   283,   -1024, 1420,  -1338, 805,  1024, -1420, 1338,  -1204, 1024,  -805,  554,   -283,
+};
+
+constexpr unsigned char nonzero_to_bin[49][50] = {
+    {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    },
+    {
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    },
+    {
+        0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    },
+    {
+        0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    },
+    {
+        0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    },
+    {
+        0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    },
+    {
+        0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    },
+    {
+        0, 1, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    },
+    {
+        0, 1, 2, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    },
+    {
+        0, 1, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+    }, // <-- cur
+    {
+        0, 1, 2, 3, 4, 5, 5, 6, 6,  6,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,
+        9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  10,
+        10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  7,  7,  8,  8,  8,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10,
+        10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  8,  9,  9,  9,  9,  10, 10, 10, 10, 10, 11, 11, 11, 11, 11,
+        11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  9,  9,  10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 12, 12,
+        12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13,
+        13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 14,
+        14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 14,
+        15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+        15, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16,
+        16, 16, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 15, 15, 15, 16, 16, 16, 17, 17, 17,
+        17, 17, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 16, 16, 17, 17, 17, 17, 18, 18,
+        18, 18, 18, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 17, 17, 18, 18, 18, 18, 19,
+        19, 19, 19, 19, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 18, 19, 19, 19, 19,
+        20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 19, 20, 20, 20,
+        20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 21, 21, 21,
+        21, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 21, 22, 22,
+        22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 22, 23,
+        23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 26, 26, 27, 27, 27, 27, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 29, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 31, 31, 31, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 38, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 39, 40, 40, 40, 40, 41, 41, 41, 41, 41,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 41, 41, 41, 42, 42, 42, 42, 42,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 42, 43, 43, 43, 43, 43,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 43, 44, 44, 44, 44, 44,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 45, 45, 45, 45,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 46, 46, 46,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 47, 47,
+    },
+    {
+        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 48,
+    },
+};
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/model/model.cc b/codec/L2/demos/leptonEnc/host/vp8/model/model.cc
new file mode 100644
index 0000000000..497621d83e
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/model/model.cc
@@ -0,0 +1,383 @@
+#include "../util/memory.hh"
+#include <assert.h>
+#ifdef _WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include <fstream>
+#include <iostream>
+
+#include <emmintrin.h>
+#include "model.hh"
+bool all_branches_identity(const Branch* start, const Branch* end) {
+    for (const Branch* i = start; i != end; ++i) {
+        if (!i->is_identity()) {
+            return false;
+        }
+    }
+    return true;
+}
+void set_branch_range_identity(Branch* start, Branch* end) {
+    if (__builtin_expect(end - start <= 64, 0)) {
+        for (; start != end; ++start) {
+            start->set_identity();
+        }
+        return;
+    }
+#if __AVX__
+    for (int i = 0; i < 32; ++i) {
+        start[i].set_identity();
+    }
+    for (int i = 1; i <= 32; ++i) {
+        end[-i].set_identity();
+    }
+    char* data = (char*)(void*)start;
+    __m256i r0 = _mm256_loadu_si256((const __m256i*)data);
+    __m256i r1 = _mm256_loadu_si256((const __m256i*)(data + 32));
+    __m256i r2 = _mm256_loadu_si256((const __m256i*)(data + 64));
+    size_t offset = data - (char*)0;
+    size_t align = 32 - (offset % 32);
+    char* dataend = (char*)end;
+    size_t offsetend = dataend - (char*)0;
+    __m256i* write_end = (__m256i*)(dataend - (offsetend % 32));
+    __m256i* write_cursor = (__m256i*)(data + align);
+    switch (align % 3) {
+        case 2:
+            _mm256_store_si256(write_cursor, r1);
+            write_cursor += 1;
+        case 1:
+            _mm256_store_si256(write_cursor, r2);
+            write_cursor += 1;
+        case 0:
+            break;
+    }
+    while (write_cursor + 2 < write_end) {
+        _mm256_store_si256(write_cursor, r0);
+        _mm256_store_si256(write_cursor + 1, r1);
+        _mm256_store_si256(write_cursor + 2, r2);
+        write_cursor += 3;
+    }
+
+#else
+    for (int i = 0; i < 16; ++i) {
+        start[i].set_identity();
+    }
+    for (int i = 1; i <= 16; ++i) {
+        end[-i].set_identity();
+    }
+    char* data = (char*)(void*)start;
+    __m128i r0 = _mm_loadu_si128((const __m128i*)data);
+    __m128i r1 = _mm_loadu_si128((const __m128i*)(data + 16));
+    __m128i r2 = _mm_loadu_si128((const __m128i*)(data + 32));
+    size_t offset = data - (char*)0;
+    size_t align = 16 - (offset % 16);
+    char* dataend = (char*)end;
+    size_t offsetend = dataend - (char*)0;
+    __m128i* write_end = (__m128i*)(dataend - (offsetend % 16));
+    __m128i* write_cursor = (__m128i*)(data + align);
+    switch (align % 3) {
+        case 1:
+            _mm_store_si128(write_cursor, r1);
+            write_cursor += 1;
+        case 2:
+            _mm_store_si128(write_cursor, r2);
+            write_cursor += 1;
+        case 0:
+            break;
+    }
+    while (write_cursor + 2 < write_end) {
+        _mm_store_si128(write_cursor, r0);
+        _mm_store_si128(write_cursor + 1, r1);
+        _mm_store_si128(write_cursor + 2, r2);
+        write_cursor += 3;
+    }
+#endif
+    assert(all_branches_identity(start, end));
+}
+
+#ifdef _WIN32
+__declspec(align(16))
+#endif
+    int32_t ProbabilityTablesBase::icos_idct_edge_8192_dequantized_x_[(int)ColorChannel::NumBlockTypes][64]
+#ifndef _WIN32
+    __attribute__((aligned(16)))
+#endif
+    = {{0}};
+
+#ifdef _WIN32
+__declspec(align(16))
+#endif
+    int32_t ProbabilityTablesBase::icos_idct_edge_8192_dequantized_y_[(int)ColorChannel::NumBlockTypes][64]
+#ifndef _WIN32
+    __attribute__((aligned(16)))
+#endif
+    = {{0}};
+#ifdef _WIN32
+__declspec(align(16))
+#endif
+    int32_t ProbabilityTablesBase::icos_idct_linear_8192_dequantized_[(int)ColorChannel::NumBlockTypes][64]
+#ifndef _WIN32
+    __attribute__((aligned(16)))
+#endif
+    = {{0}};
+#ifdef ANNOTATION_ENABLED
+Context* gctx = (Context*)memset(calloc(sizeof(Context), 1), 0xff, sizeof(Context));
+#endif
+
+#ifdef _WIN32
+__declspec(align(16))
+#endif
+    uint16_t ProbabilityTablesBase::quantization_table_[(int)ColorChannel::NumBlockTypes][64]
+#ifndef _WIN32
+    __attribute__((aligned(16)))
+#endif
+    ;
+#ifdef _WIN32
+__declspec(align(16))
+#endif
+    uint16_t ProbabilityTablesBase::freqmax_[(int)ColorChannel::NumBlockTypes][64]
+#ifndef _WIN32
+    __attribute__((aligned(16)))
+#endif
+    ;
+
+#ifdef _WIN32
+__declspec(align(16))
+#endif
+    uint8_t ProbabilityTablesBase::min_noise_threshold_[(int)ColorChannel::NumBlockTypes][64]
+#ifndef _WIN32
+    __attribute__((aligned(16)))
+#endif
+    ;
+
+#ifdef _WIN32
+__declspec(align(16))
+#endif
+    uint8_t ProbabilityTablesBase::bitlen_freqmax_[(int)ColorChannel::NumBlockTypes][64]
+#ifndef _WIN32
+    __attribute__((aligned(16)))
+#endif
+    ;
+int get_sum_median_8(int16_t* dc_estimates) {
+    int len_est = 16;
+    int min_dc, max_dc;
+    for (int start = 0; start < 4; ++start) {
+        if (dc_estimates[start] > dc_estimates[len_est - 1 - start]) {
+            std::swap(dc_estimates[start], dc_estimates[len_est - 1 - start]);
+        }
+        min_dc = dc_estimates[start];
+        max_dc = dc_estimates[len_est - 1 - start];
+        int min_idx = start;
+        int max_idx = len_est - 1 - start;
+        for (int i = start + 1; i < len_est - start - 1; ++i) {
+            if (dc_estimates[i] > max_dc) {
+                max_idx = i;
+                max_dc = dc_estimates[i];
+            }
+            if (dc_estimates[i] < min_dc) {
+                min_idx = i;
+                min_dc = dc_estimates[i];
+            }
+        }
+        dc_estimates[min_idx] = dc_estimates[start];
+        dc_estimates[max_idx] = dc_estimates[len_est - 1 - start];
+        dc_estimates[start] = min_dc;
+        dc_estimates[len_est - 1 - start] = max_dc;
+    }
+    int sum = 0;
+    for (int i = 4; i < len_est - 4; ++i) {
+        sum += dc_estimates[i];
+    }
+    return sum;
+}
+void serialize_model(const Model& model, int output_fp) {
+    size_t left_to_write = sizeof(model);
+    const char* data = reinterpret_cast<const char*>(&model);
+    while (left_to_write) {
+        size_t written;
+#ifdef _WIN32
+        written = _write(output_fp, data, left_to_write);
+#else
+        written = write(output_fp, data, left_to_write);
+#endif
+        if (written <= 0) {
+            if (errno != EINTR) {
+                break;
+            }
+        }
+        left_to_write -= written;
+        data += written;
+    }
+}
+
+void optimize_model(Model& model) {
+    (void)model;
+    // model.forall( [&] ( Branch & x ) { x.optimize(); } );
+}
+
+bool filter(const Branch& a, const Branch* b) {
+#ifndef USE_COUNT_FREE_UPDATE
+    if (a.true_count() == 0 && a.false_count() == 0) {
+        return false;
+    }
+    if (b) {
+        if (a.prob() + 1 == b->prob() || a.prob() == b->prob() + 1 || a.prob() == b->prob()) {
+            return false;
+        }
+    } else {
+        return a.true_count() > 300 && a.false_count() > 300;
+    }
+#endif
+    return true;
+}
+template <class BranchArray>
+void print_helper(const BranchArray& ba,
+                  const BranchArray* other,
+                  const std::string& table_name,
+                  const std::vector<std::string>& names,
+                  std::vector<uint32_t>& values,
+                  Model::PrintabilitySpecification print_branch_bitmask) {
+    values.push_back(0);
+    for (size_t i = 0; i < ba.dimsize(); ++i) {
+        values.back() = i;
+        auto subarray = ba.at(i);
+        auto otherarray = &subarray;
+        otherarray = nullptr;
+        print_helper(subarray, otherarray, table_name, names, values, print_branch_bitmask);
+    }
+    values.pop_back();
+}
+
+bool is_printable(uint64_t true_count,
+                  uint64_t false_count,
+                  double true_false_ratio,
+                  double other_ratio,
+                  bool other,
+                  Model::PrintabilitySpecification spec) {
+    if (other) {
+        if (true_count + false_count >= spec.min_samples) {
+            double delta = true_false_ratio - other_ratio;
+            if (delta < 0) delta = -delta;
+            if (delta < spec.tolerance) {
+                return (Model::CLOSE_TO_ONE_ANOTHER & spec.printability_bitmask) ? true : false;
+            } else {
+                return (Model::PRINTABLE_OK & spec.printability_bitmask) ? true : false;
+            }
+        } else {
+            return (Model::PRINTABLE_INSIGNIFICANT & spec.printability_bitmask) ? true : false;
+        }
+    } else {
+        if (true_count + false_count >= spec.min_samples) {
+            double delta = true_false_ratio - .5;
+            if (delta < 0) delta = -delta;
+            if (delta < spec.tolerance) {
+                return (Model::CLOSE_TO_50 & spec.printability_bitmask) ? true : false;
+            } else {
+                return (Model::PRINTABLE_OK & spec.printability_bitmask) ? true : false;
+            }
+        } else {
+            return (Model::PRINTABLE_INSIGNIFICANT & spec.printability_bitmask) ? true : false;
+        }
+    }
+}
+template <>
+void print_helper(const Branch& ba,
+                  const Branch* other,
+                  const std::string& table_name,
+                  const std::vector<std::string>& names,
+                  std::vector<uint32_t>& values,
+                  Model::PrintabilitySpecification print_branch_bitmask) {
+#ifndef USE_COUNT_FREE_UPDATE
+    double ratio = (ba.true_count() + 1) / (double)(ba.false_count() + ba.true_count() + 2);
+    (void)ratio;
+    double other_ratio = ratio;
+    if (other) {
+        other_ratio = (other->true_count() + 1) / (double)(other->false_count() + other->true_count() + 2);
+    }
+    (void)other_ratio;
+    if (ba.true_count() > 0 || ba.false_count() > 1) {
+        if (is_printable(ba.true_count(), ba.false_count(), ratio, other_ratio, !!other, print_branch_bitmask)) {
+            always_assert(names.size() == values.size());
+            std::cout << table_name << "::";
+            for (size_t i = 0; i < names.size(); ++i) {
+                std::cout << names[i] << '[' << values[i] << ']';
+            }
+            std::cout << " = (" << ba.true_count() << ", " << (ba.false_count() - 1) << ")";
+            if (other) {
+                std::cout << " = (" << other->true_count() << ", " << (other->false_count() - 1) << "}";
+            }
+            std::cout << std::endl;
+        }
+    }
+#endif
+}
+template <class BranchArray>
+void print_all(const BranchArray& ba,
+               const BranchArray* other_ba,
+               const std::string& table_name,
+               const std::vector<std::string>& names,
+               Model::PrintabilitySpecification spec) {
+    std::vector<uint32_t> tmp;
+    print_helper(ba, other_ba, table_name, names, tmp, spec);
+}
+
+const Model& Model::debug_print(const Model* other, Model::PrintabilitySpecification spec) const {
+#ifndef _WIN32
+    print_all(this->num_nonzeros_counts_7x7_, other ? &other->num_nonzeros_counts_7x7_ : nullptr, "NONZERO 7x7",
+              {"cmp", "nbr", "bit", "prevbits"}, spec);
+
+    print_all(this->num_nonzeros_counts_1x8_, other ? &other->num_nonzeros_counts_1x8_ : nullptr, "NONZERO_1x8",
+              {"cmp", "eobx", "num_nonzeros", "bit", "prevbits"}, spec);
+    print_all(this->num_nonzeros_counts_8x1_, other ? &other->num_nonzeros_counts_8x1_ : nullptr, "NONZERO_8x1",
+              {"cmp", "eobx", "num_nonzeros", "bit", "prevbits"}, spec);
+    print_all(this->exponent_counts_dc_, other ? &other->exponent_counts_dc_ : nullptr, "EXP_DC",
+              {"cmp", "num_nonzeros", "neigh_exp", "bit", "prevbits"}, spec);
+    print_all(this->exponent_counts_, other ? &other->exponent_counts_ : nullptr, "EXP7x7",
+              {"cmp", "coef", "num_nonzeros", "neigh_exp", "bit", "prevbits"}, spec);
+    print_all(this->exponent_counts_x_, other ? &other->exponent_counts_x_ : nullptr, "EXP_8x1",
+              {"cmp", "coef", "num_nonzeros", "neigh_exp", "bit", "prevbits"}, spec);
+    print_all(this->residual_noise_counts_, other ? &other->residual_noise_counts_ : nullptr, "NOISE",
+              {"cmp", "coef", "num_nonzeros", "bit"}, spec);
+    print_all(this->residual_threshold_counts_, other ? &other->residual_threshold_counts_ : nullptr, "THRESH8",
+              {"cmp", "max", "exp", "prevbits"}, spec);
+    print_all(this->sign_counts_, other ? &other->sign_counts_ : nullptr, "SIGN", {"cmp", "lakh", "exp"}, spec);
+#endif
+    return *this;
+}
+
+void normalize_model(Model& model) {
+    model.forall([&](Branch& x) { x.normalize(); });
+}
+
+void ProbabilityTablesBase::load_probability_tables() {
+    const char* model_name = getenv("LEPTON_COMPRESSION_MODEL");
+    if (model_name) {
+        const char* msg = "Using good probability tables!\n";
+        while (write(2, msg, strlen(msg)) < 0 && errno == EINTR) {
+        }
+        ProbabilityTables<true, BlockType::Y> model_tables(BlockType::Y, true, true, true);
+        model_tables.load(*this, model_name);
+        model_tables.normalize(*this);
+    }
+}
+
+void reset_model(Model& model) {
+    model.forall([&](Branch& x) { x = Branch(); });
+}
+
+void load_model(Model& model, const char* filename) {
+    FILE* fp = fopen(filename, "rb");
+    if (fp) {
+        const size_t expected_size = fread(&model, 1, sizeof(model), fp);
+        fclose(fp);
+        (void)expected_size;
+        always_assert(sizeof(model) == expected_size && "unexpected model file size.");
+    } else {
+        while (write(2, filename, strlen(filename)) < 0 && errno == EINTR) {
+        }
+        const char* msg = " not found for input model\n";
+        while (write(2, msg, strlen(msg)) < 0 && errno == EINTR) {
+        }
+    }
+}
diff --git a/codec/L2/demos/leptonEnc/host/vp8/model/model.hh b/codec/L2/demos/leptonEnc/host/vp8/model/model.hh
new file mode 100644
index 0000000000..c9564eec3c
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/model/model.hh
@@ -0,0 +1,1912 @@
+#ifndef MODEL_HH
+#define MODEL_HH
+
+#include <vector>
+#include <memory>
+#include <tmmintrin.h>
+#include "../util/debug.hh"
+#include "../util/options.hh"
+#include "../util/nd_array.hh"
+#include "../../lepton/idct.hh"
+#include "numeric.hh"
+#include "branch.hh"
+#include "../util/aligned_block.hh"
+#include "../util/block_based_image.hh"
+#include "../util/mm_mullo_epi32.hh"
+
+class BoolEncoder;
+constexpr bool advanced_dc_prediction = true; // false;//true;
+enum TableParams : unsigned int {
+    MAX_EXPONENT = 11,
+    BLOCK_TYPES = 2, // setting this to 3 gives us ~1% savings.. 2/3 from BLOCK_TYPES=2
+    NUM_NONZEROS_BINS = 10,
+    BSR_BEST_PRIOR_MAX = 11, // 1023 requires 11 bits to describe
+    band_divisor = 1,
+    COEF_BANDS = 64 / band_divisor,
+    ENTROPY_NODES = 15,
+    NUM_NONZEROS_EOB_PRIORS = 66,
+    ZERO_OR_EOB = 3,
+    RESIDUAL_NOISE_FLOOR = 7,
+    COEF_BITS = MAX_EXPONENT - 1, // the last item of the length is always 1
+};
+int get_sum_median_8(int16_t* data16i);
+void set_branch_range_identity(Branch* start, Branch* end);
+
+template <class BranchArray>
+void set_branch_array_identity(BranchArray& branches) {
+    auto begin = branches.begin();
+    auto end = branches.end();
+    set_branch_range_identity(begin, end);
+    /*
+    for (;false&&begin != end; ++begin) {
+        begin->set_identity();
+    }*/
+}
+struct Model {
+    typedef Sirikata::Array4d<Branch, BLOCK_TYPES, 26, 6, 32> NonzeroCounts7x7;
+    NonzeroCounts7x7 num_nonzeros_counts_7x7_;
+
+    typedef Sirikata::Array5d<Branch, BLOCK_TYPES, 8, 8, 3, 4> NonzeroCounts1x8;
+    NonzeroCounts1x8 num_nonzeros_counts_1x8_;
+    NonzeroCounts1x8 num_nonzeros_counts_8x1_;
+
+    typedef Sirikata::Array4d<Branch,
+                              BLOCK_TYPES,
+                              COEF_BANDS,
+                              (8 > NUM_NONZEROS_BINS ? 8 : (unsigned int)NUM_NONZEROS_BINS),
+                              COEF_BITS>
+        ResidualNoiseCounts;
+
+    ResidualNoiseCounts residual_noise_counts_;
+
+    typedef Sirikata::Array2d<Branch, NUMERIC_LENGTH_MAX, COEF_BITS> ResidualNoiseCountsDc;
+
+    ResidualNoiseCountsDc residual_noise_counts_dc_;
+
+    typedef Sirikata::Array4d<Branch,
+                              BLOCK_TYPES,
+                              (1 << (1 + RESIDUAL_NOISE_FLOOR)),
+                              1 + RESIDUAL_NOISE_FLOOR,
+                              1 << RESIDUAL_NOISE_FLOOR>
+        ResidualThresholdCounts;
+
+    ResidualThresholdCounts residual_threshold_counts_;
+
+    typedef Sirikata::Array5d<Branch, BLOCK_TYPES, NUM_NONZEROS_BINS, 15, NUMERIC_LENGTH_MAX, MAX_EXPONENT>
+        ExponentCounts8;
+
+    typedef Sirikata::Array5d<Branch, BLOCK_TYPES, NUM_NONZEROS_BINS, 49, NUMERIC_LENGTH_MAX, MAX_EXPONENT>
+        ExponentCounts7x7;
+
+    typedef Sirikata::Array3d<Branch,
+                              ((unsigned int)NUM_NONZEROS_BINS <= (unsigned int)NUMERIC_LENGTH_MAX
+                                   ? (unsigned int)NUMERIC_LENGTH_MAX
+                                   : (unsigned int)NUM_NONZEROS_BINS),
+                              17 /*any 16 bit number should fit*/,
+                              MAX_EXPONENT>
+        ExponentCountsDC;
+
+    ExponentCounts7x7 exponent_counts_;
+    ExponentCounts8 exponent_counts_x_;
+    ExponentCountsDC exponent_counts_dc_;
+    void set_tables_identity() {
+        set_branch_array_identity(num_nonzeros_counts_7x7_);
+        set_branch_array_identity(num_nonzeros_counts_1x8_);
+        set_branch_array_identity(num_nonzeros_counts_8x1_);
+        set_branch_array_identity(residual_noise_counts_);
+        set_branch_array_identity(residual_noise_counts_dc_);
+        set_branch_array_identity(residual_threshold_counts_);
+        set_branch_array_identity(exponent_counts_);
+        set_branch_array_identity(exponent_counts_x_);
+        set_branch_array_identity(exponent_counts_dc_);
+        set_branch_array_identity(sign_counts_);
+    }
+    typedef Sirikata::Array3d<Branch, BLOCK_TYPES, 4, NUMERIC_LENGTH_MAX> SignCounts;
+    SignCounts sign_counts_;
+
+    template <typename lambda>
+    void forall(const lambda& proc) {
+        num_nonzeros_counts_7x7_.foreach (proc);
+        num_nonzeros_counts_1x8_.foreach (proc);
+        num_nonzeros_counts_8x1_.foreach (proc);
+        exponent_counts_x_.foreach (proc);
+        exponent_counts_.foreach (proc);
+        exponent_counts_dc_.foreach (proc);
+
+        residual_noise_counts_.foreach (proc);
+        residual_threshold_counts_.foreach (proc);
+        sign_counts_.foreach (proc);
+    }
+    enum Printability { PRINTABLE_INSIGNIFICANT = 1, PRINTABLE_OK = 2, CLOSE_TO_50 = 4, CLOSE_TO_ONE_ANOTHER = 8 };
+    struct PrintabilitySpecification {
+        uint64_t printability_bitmask;
+        double tolerance;
+        uint64_t min_samples;
+    };
+    const Model& debug_print(const Model* other, PrintabilitySpecification spec) const;
+};
+
+enum ContextTypes {
+    ZDSTSCAN,
+    ZEROS7x7,
+    EXPDC,
+    RESDC,
+    SIGNDC,
+    EXP7x7,
+    RES7x7,
+    SIGN7x7,
+    ZEROS1x8,
+    ZEROS8x1,
+    EXP8,
+    THRESH8,
+    RES8,
+    SIGN8,
+    NUMCONTEXT
+};
+#if 0
+struct Context {
+    enum {
+        H = 2448,
+        W = 3264
+    };
+    int cur_cmp;
+    int cur_jpeg_x;
+    int cur_jpeg_y;
+    ContextTypes annot;
+    int p[3][H/8][W/8][8][8][NUMCONTEXT][3];
+};
+extern Context *gctx;
+#define ANNOTATION_ENABLED
+#define ANNOTATE_CTX(bpos, annot_type, ctxnum, value) \
+    (gctx->annot = annot_type,                        \
+     gctx->p[gctx->cur_cmp][gctx->cur_jpeg_y][gctx->cur_jpeg_x][bpos / 8][bpos % 8][annot_type][ctxnum] = value)
+#else
+#define ANNOTATE_CTX(bpos, annot_type, ctxnum, value)
+#endif
+
+class Slice;
+void optimize_model(Model& model);
+void serialize_model(const Model& model, int output_fd);
+void reset_model(Model& model);
+void normalize_model(Model& model);
+void load_model(Model& model, const char* filename);
+#ifdef _WIN32
+#define WINALIGN16 __declspec(align(16))
+#define UNIXALIGN16
+#else
+#define WINALIGN16
+#define UNIXALIGN16 __attribute__((aligned(16)))
+#endif
+class ProbabilityTablesBase {
+   protected:
+    Model model_;
+
+    static WINALIGN16 int32_t icos_idct_edge_8192_dequantized_x_[(int)ColorChannel::NumBlockTypes][64] UNIXALIGN16;
+
+    static WINALIGN16 int32_t icos_idct_edge_8192_dequantized_y_[(int)ColorChannel::NumBlockTypes][64] UNIXALIGN16;
+
+    static WINALIGN16 int32_t icos_idct_linear_8192_dequantized_[(int)ColorChannel::NumBlockTypes][64] UNIXALIGN16;
+
+    static WINALIGN16 uint16_t quantization_table_[(int)ColorChannel::NumBlockTypes][64] UNIXALIGN16;
+
+    static WINALIGN16 uint16_t freqmax_[(int)ColorChannel::NumBlockTypes][64] UNIXALIGN16;
+
+    static WINALIGN16 uint8_t bitlen_freqmax_[(int)ColorChannel::NumBlockTypes][64] UNIXALIGN16;
+
+    static WINALIGN16 uint8_t min_noise_threshold_[(int)ColorChannel::NumBlockTypes][64] UNIXALIGN16;
+
+   public:
+    Model& model() { return model_; }
+    void load_probability_tables();
+    static uint16_t* quantization_table(uint8_t color) { return quantization_table_[color]; }
+
+    static uint16_t quantization_table(uint8_t color, uint8_t coef) { return quantization_table_[color][coef]; }
+    static uint16_t freqmax(uint8_t color, uint8_t coef) { return freqmax_[color][coef]; }
+    static uint8_t bitlen_freqmax(uint8_t color, uint8_t coef) { return bitlen_freqmax_[color][coef]; }
+    static uint8_t min_noise_threshold(uint8_t color, uint8_t coef) { return min_noise_threshold_[color][coef]; }
+    static void set_quantization_table(BlockType color, const unsigned short quantization_table[64]) {
+        for (int i = 0; i < 64; ++i) {
+            quantization_table_[(int)color][i] = quantization_table[zigzag[i]];
+        }
+        for (int pixel_row = 0; pixel_row < 8; ++pixel_row) {
+            for (int i = 0; i < 8; ++i) {
+                icos_idct_linear_8192_dequantized((int)color)[pixel_row * 8 + i] =
+                    icos_idct_linear_8192_scaled[pixel_row * 8 + i] * quantization_table_[(int)color][i];
+                icos_idct_edge_8192_dequantized_x((int)color)[pixel_row * 8 + i] =
+                    icos_base_8192_scaled[i * 8] * quantization_table_[(int)color][i * 8 + pixel_row];
+                icos_idct_edge_8192_dequantized_y((int)color)[pixel_row * 8 + i] =
+                    icos_base_8192_scaled[i * 8] * quantization_table_[(int)color][pixel_row * 8 + i];
+            }
+        }
+        static const unsigned short int freqmax[] = {
+            1024, 931, 985, 968, 1020, 968, 1020, 1020, 932,  858, 884, 840, 932,  838, 854,  854,
+            985,  884, 871, 875, 985,  878, 871,  854,  967,  841, 876, 844, 967,  886, 870,  837,
+            1020, 932, 985, 967, 1020, 969, 1020, 1020, 969,  838, 878, 886, 969,  838, 969,  838,
+            1020, 854, 871, 870, 1010, 969, 1020, 1020, 1020, 854, 854, 838, 1020, 838, 1020, 838};
+        for (int coord = 0; coord < 64; ++coord) {
+            freqmax_[(int)color][coord] =
+                (freqmax[coord] + quantization_table_[(int)color][coord] - 1) / quantization_table_[(int)color][coord];
+            uint8_t max_len = uint16bit_length(freqmax_[(int)color][coord]);
+            bitlen_freqmax_[(int)color][coord] = max_len;
+            if (max_len > (int)RESIDUAL_NOISE_FLOOR) {
+                min_noise_threshold_[(int)color][coord] = max_len - RESIDUAL_NOISE_FLOOR;
+            }
+        }
+    }
+    static int32_t* icos_idct_edge_8192_dequantized_x(int color) {
+        return icos_idct_edge_8192_dequantized_x_[(int)color];
+    }
+    static int32_t* icos_idct_edge_8192_dequantized_y(int color) {
+        return icos_idct_edge_8192_dequantized_y_[(int)color];
+    }
+    static int32_t* icos_idct_linear_8192_dequantized(int color) {
+        return icos_idct_linear_8192_dequantized_[(int)color];
+    }
+    struct CoefficientContext {
+        int best_prior;           // lakhani or aavrg depending on coefficient number
+        uint8_t num_nonzeros_bin; // num_nonzeros mapped into a bin
+        uint8_t bsr_best_prior;
+    };
+    enum { VECTORIZE = ::VECTORIZE, MICROVECTORIZE = ::MICROVECTORIZE };
+};
+
+#define USE_TEMPLATIZED_COLOR
+#ifdef USE_TEMPLATIZED_COLOR
+#define TEMPLATE_ARG_COLOR0 BlockType::Y
+#define TEMPLATE_ARG_COLOR1 BlockType::Cb
+#define TEMPLATE_ARG_COLOR2 BlockType::Cr
+#define TEMPLATE_ARG_COLOR3 BlockType::Ck
+
+#else
+#define TEMPLATE_ARG_COLOR0 BlockType::Y
+#define TEMPLATE_ARG_COLOR1 BlockType::Y
+#define TEMPLATE_ARG_COLOR2 BlockType::Y
+#define TEMPLATE_ARG_COLOR3 BlockType::Y
+#endif
+template <bool all_present,
+          BlockType
+#ifdef USE_TEMPLATIZED_COLOR
+              color
+#else
+              deprecated_color
+#endif
+          >
+class ProbabilityTables {
+   public: // xilinx just for convenience
+    // private:
+    typedef ProbabilityTablesBase::CoefficientContext CoefficientContext;
+    const bool left_present;
+    const bool above_present;
+    const bool above_right_present;
+
+   public:
+#ifdef USE_TEMPLATIZED_COLOR
+    enum { COLOR = (int)color };
+    ProbabilityTables(BlockType kcolor, bool in_left_present, bool in_above_present, bool in_above_right_present)
+        : left_present(in_left_present), above_present(in_above_present), above_right_present(in_above_right_present) {
+        always_assert((left_present && above_present && above_right_present) == all_present);
+        always_assert(kcolor == color);
+    }
+#else
+    const BlockType COLOR;
+    ProbabilityTables(BlockType color, bool in_left_present, bool in_above_present, bool in_above_right_present)
+        : left_present(in_left_present),
+          above_present(in_above_present),
+          above_right_present(in_above_right_present),
+          COLOR(color) {
+        always_assert((left_present && right_present && above_right_present) == all_present);
+        static_assert((int)deprecated_color == 0, "Using dynamic color");
+    }
+#endif
+    void reset(ProbabilityTablesBase& base) { reset_model(base.model()); }
+    void load(ProbabilityTablesBase& base, const char* filename) { load_model(base.model(), filename); }
+    int color_index() {
+        if (BLOCK_TYPES == 2) {
+            if (0 == (int)COLOR) {
+                return 0;
+            }
+            return 1;
+        } else {
+            return std::min((int)(BLOCK_TYPES - 1), (int)COLOR);
+        }
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context7x7(int coord,
+                                                                            int aligned_zz,
+                                                                            const ConstBlockContext block,
+                                                                            uint8_t num_nonzeros_left) {
+        ProbabilityTablesBase::CoefficientContext retval;
+        retval.best_prior = compute_aavrg(coord, aligned_zz, block);
+        retval.num_nonzeros_bin = num_nonzeros_to_bin(num_nonzeros_left);
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context7x7_precomp(int aligned_zz,
+                                                                                    int aavrg,
+                                                                                    const ConstBlockContext block,
+                                                                                    uint8_t num_nonzeros_left) {
+        ProbabilityTablesBase::CoefficientContext retval;
+        assert(aavrg == compute_aavrg(aligned_to_raster.at(aligned_zz), aligned_zz, block));
+        // This was to make sure the code was right compute_aavrg_vec(aligned_zz, block);
+        retval.best_prior = aavrg;
+        retval.num_nonzeros_bin = num_nonzeros_to_bin(num_nonzeros_left);
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context8(uint8_t coefficient,
+                                                                          const ConstBlockContext block,
+                                                                          uint8_t num_nonzeros_x) {
+        CoefficientContext retval = {0, 0, 0};
+        if (MICROVECTORIZE) {
+            retval.best_prior = (coefficient & 7) ? compute_lak_horizontal(block, coefficient)
+                                                  : compute_lak_vertical(block, coefficient);
+        } else {
+            retval.best_prior = compute_lak(block, coefficient);
+        }
+        retval.num_nonzeros_bin = num_nonzeros_x;
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context8_horiz(uint8_t coefficient,
+                                                                                const ConstBlockContext block,
+                                                                                uint8_t num_nonzeros_x) {
+        CoefficientContext retval = {0, 0, 0};
+        retval.best_prior = compute_lak_horizontal(block, coefficient);
+        retval.num_nonzeros_bin = num_nonzeros_x;
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context8_vert(uint8_t coefficient,
+                                                                               const ConstBlockContext block,
+                                                                               uint8_t num_nonzeros_x) {
+        CoefficientContext retval = {0, 0, 0};
+        retval.best_prior = compute_lak_vertical(block, coefficient);
+        retval.num_nonzeros_bin = num_nonzeros_x;
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+#define INSTANTIATE_TEMPLATE_METHOD(N)                                                                            \
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context8_templ##N(const ConstBlockContext block, \
+                                                                                   uint8_t num_nonzeros_x) {      \
+        ProbabilityTablesBase::CoefficientContext retval = {0, 0, 0};                                             \
+        retval.best_prior = compute_lak_templ<N>(block);                                                          \
+        retval.num_nonzeros_bin = num_nonzeros_x;                                                                 \
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));                               \
+        return retval;                                                                                            \
+    }
+    INSTANTIATE_TEMPLATE_METHOD(1)
+    INSTANTIATE_TEMPLATE_METHOD(2)
+    INSTANTIATE_TEMPLATE_METHOD(3)
+    INSTANTIATE_TEMPLATE_METHOD(4)
+    INSTANTIATE_TEMPLATE_METHOD(5)
+    INSTANTIATE_TEMPLATE_METHOD(6)
+    INSTANTIATE_TEMPLATE_METHOD(7)
+    INSTANTIATE_TEMPLATE_METHOD(8)
+    INSTANTIATE_TEMPLATE_METHOD(16)
+    INSTANTIATE_TEMPLATE_METHOD(24)
+    INSTANTIATE_TEMPLATE_METHOD(32)
+    INSTANTIATE_TEMPLATE_METHOD(40)
+    INSTANTIATE_TEMPLATE_METHOD(48)
+    INSTANTIATE_TEMPLATE_METHOD(56)
+    Sirikata::Array2d<Branch, 6, 32>::Slice nonzero_counts_7x7(ProbabilityTablesBase& pt,
+                                                               const ConstBlockContext block) {
+        uint8_t num_nonzeros_above = 0;
+        uint8_t num_nonzeros_left = 0;
+        if (all_present || above_present) {
+            num_nonzeros_above = block.nonzeros_above_7x7_unchecked();
+        }
+        if (all_present || left_present) {
+            num_nonzeros_left = block.nonzeros_left_7x7_unchecked();
+        }
+
+        uint8_t num_nonzeros_context = 0;
+        if ((!all_present) && above_present && !left_present) {
+            num_nonzeros_context = (num_nonzeros_above + 1) / 2;
+        } else if ((!all_present) && left_present && !above_present) {
+            num_nonzeros_context = (num_nonzeros_left + 1) / 2;
+        } else if (all_present || (left_present && above_present)) {
+            num_nonzeros_context = (num_nonzeros_above + num_nonzeros_left + 2) / 4;
+        }
+        ANNOTATE_CTX(0, ZEROS7x7, 0, num_nonzeros_context);
+        return pt.model().num_nonzeros_counts_7x7_.at(color_index(), num_nonzeros_to_bin(num_nonzeros_context));
+    }
+    Sirikata::Array2d<Branch, 3u, 4u>::Slice x_nonzero_counts_8x1(ProbabilityTablesBase& pt,
+                                                                  unsigned int eob_x,
+                                                                  unsigned int num_nonzeros) {
+        ANNOTATE_CTX(0, ZEROS8x1, 0, ((num_nonzeros + 3) / 7));
+        ANNOTATE_CTX(0, ZEROS8x1, 1, eob_x);
+        return pt.model().num_nonzeros_counts_8x1_.at(color_index(), eob_x, ((num_nonzeros + 3) / 7));
+    }
+    Sirikata::Array2d<Branch, 3u, 4u>::Slice y_nonzero_counts_1x8(ProbabilityTablesBase& pt,
+                                                                  unsigned int eob_x,
+                                                                  unsigned int num_nonzeros) {
+        ANNOTATE_CTX(0, ZEROS1x8, 0, ((num_nonzeros + 3) / 7));
+        ANNOTATE_CTX(0, ZEROS1x8, 1, eob_x);
+        return pt.model().num_nonzeros_counts_1x8_.at(color_index(), eob_x, ((num_nonzeros + 3) / 7));
+    }
+    Sirikata::Array1d<Branch, MAX_EXPONENT>::Slice exponent_array_x(ProbabilityTablesBase& pt,
+                                                                    int band,
+                                                                    int zig15,
+                                                                    CoefficientContext context) {
+        ANNOTATE_CTX(band, EXP8, 0, context.bsr_best_prior);
+        ANNOTATE_CTX(band, EXP8, 1, context.num_nonzeros);
+        assert((band & 7) == 0 ? ((band >> 3) + 7) : band - 1 == zig15);
+        return pt.model().exponent_counts_x_.at(color_index(), context.num_nonzeros_bin, zig15, context.bsr_best_prior);
+    }
+    Sirikata::Array1d<Branch, MAX_EXPONENT>::Slice exponent_array_7x7(ProbabilityTablesBase& pt,
+                                                                      const unsigned int band,
+                                                                      const unsigned int zig49,
+                                                                      const CoefficientContext context) {
+        ANNOTATE_CTX(band, EXP7x7, 0, context.bsr_best_prior);
+        ANNOTATE_CTX(band, EXP7x7, 1, context.num_nonzeros_bin);
+        return pt.model().exponent_counts_.at(color_index(), context.num_nonzeros_bin, zig49, context.bsr_best_prior);
+    }
+    Sirikata::Array1d<Branch, MAX_EXPONENT>::Slice exponent_array_dc(ProbabilityTablesBase& pt,
+                                                                     uint16_t len_abs_mxm,
+                                                                     uint16_t len_abs_offset_to_closest_edge) {
+        return pt.model().exponent_counts_dc_.at(
+            std::min(len_abs_mxm, (uint16_t)(Model::ExponentCountsDC::size0 - 1)),
+            std::min(len_abs_offset_to_closest_edge, (uint16_t)(Model::ExponentCountsDC::size1 - 1)));
+    }
+    Sirikata::Array1d<Branch, COEF_BITS>::Slice residual_array_dc(ProbabilityTablesBase& pt,
+                                                                  uint16_t len_abs_mxm,
+                                                                  uint16_t len_abs_offset_to_closest_edge) {
+        return pt.model().residual_noise_counts_dc_.at(
+            std::min((uint16_t)(Model::ResidualNoiseCountsDc::size0 - 1), len_abs_mxm));
+    }
+    Sirikata::Array1d<Branch, COEF_BITS>::Slice residual_noise_array_x(ProbabilityTablesBase& pt,
+                                                                       const unsigned int band,
+                                                                       const CoefficientContext context) {
+        ANNOTATE_CTX(band, RES8, 0, num_nonzeros_x);
+        return residual_noise_array_shared(pt, band, context);
+    }
+
+    Sirikata::Array1d<Branch, COEF_BITS>::Slice residual_noise_array_shared(ProbabilityTablesBase& pt,
+                                                                            const unsigned int band,
+                                                                            const CoefficientContext context) {
+        return pt.model().residual_noise_counts_.at(color_index(), band / band_divisor, context.num_nonzeros_bin);
+    }
+    Sirikata::Array1d<Branch, COEF_BITS>::Slice residual_noise_array_7x7(ProbabilityTablesBase& pt,
+                                                                         const unsigned int band,
+                                                                         const CoefficientContext context) {
+        if (band == 0) {
+            ANNOTATE_CTX(0, RESDC, 0, num_nonzeros_to_bin(num_nonzeros));
+        } else {
+            ANNOTATE_CTX(band, RES7x7, 0, num_nonzeros_to_bin(num_nonzeros));
+        }
+        return residual_noise_array_shared(pt, band, context);
+    }
+    unsigned int num_nonzeros_to_bin(uint8_t num_nonzeros) {
+        return nonzero_to_bin[NUM_NONZEROS_BINS - 1][num_nonzeros];
+    }
+    int idct_2d_8x1(const AlignedBlock& block, bool ignore_first, int pixel_row) {
+        int retval = 0;
+        if (!ignore_first) {
+            retval = block.coefficients_raster(0) *
+                     ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 0];
+        }
+        retval += block.coefficients_raster(1) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 1];
+        retval += block.coefficients_raster(2) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 2];
+        retval += block.coefficients_raster(3) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 3];
+        retval += block.coefficients_raster(4) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 4];
+        retval += block.coefficients_raster(5) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 5];
+        retval += block.coefficients_raster(6) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 6];
+        retval += block.coefficients_raster(7) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 7];
+        return retval;
+    }
+
+    int idct_2d_1x8(const AlignedBlock& block, bool ignore_first, int pixel_row) {
+        int retval = 0;
+        if (!ignore_first) {
+            retval =
+                block.dc() * ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 0];
+        }
+        retval += block.coefficients_raster(8) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 1];
+        retval += block.coefficients_raster(16) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 2];
+        retval += block.coefficients_raster(24) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 3];
+        retval += block.coefficients_raster(32) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 4];
+        retval += block.coefficients_raster(40) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 5];
+        retval += block.coefficients_raster(48) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 6];
+        retval += block.coefficients_raster(56) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 7];
+        return retval;
+    }
+
+    int predict_dc_dct(const ConstBlockContext& context) {
+        int prediction = 0;
+        int left_block = 0;
+        int left_edge = 0;
+        int above_block = 0;
+        int above_edge = 0;
+        if (all_present || left_present) {
+            left_block = idct_2d_8x1(context.left_unchecked(), 0, 7);
+            left_edge = idct_2d_8x1(context.here(), 1, 0);
+        }
+        if (all_present || above_present) {
+            above_block = idct_2d_1x8(context.above_unchecked(), 0, 7);
+            above_edge = idct_2d_1x8(context.here(), 1, 0);
+        }
+        if (all_present || left_present) {
+            if (all_present || above_present) {
+                prediction = ((left_block - left_edge) + (above_block - above_edge)) * 4;
+            } else {
+                prediction = (left_block - left_edge) * 8;
+            }
+        } else if (above_present) {
+            prediction = (above_block - above_edge) * 8;
+        }
+        int DCT_RSC = 8192;
+        prediction = std::max(-1024 * DCT_RSC, std::min(1016 * DCT_RSC, prediction));
+        prediction /= ProbabilityTablesBase::quantization_table((int)COLOR, 0);
+        int round = DCT_RSC / 2;
+        if (prediction < 0) {
+            round = -round;
+        }
+        return (prediction + round) / DCT_RSC;
+    }
+    int predict_locoi_dc_deprecated(const ConstBlockContext& context) {
+        if (all_present || left_present) {
+            int a = context.left_unchecked().dc();
+            if (all_present || above_present) {
+                int b = context.above_unchecked().dc();
+                int c = context.above_left_unchecked().dc();
+                if (c >= std::max(a, b)) {
+                    return std::min(a, b);
+                } else if (c <= std::min(a, b)) {
+                    return std::max(a, b);
+                }
+                return a + b - c;
+            } else {
+                return a;
+            }
+        } else if (above_present) {
+            return context.above_unchecked().dc();
+        } else {
+            return 0;
+        }
+    }
+    int predict_or_unpredict_dc(const ConstBlockContext& context, bool recover_original) {
+        int max_value = (1 << (1 + MAX_EXPONENT)) - 1;
+        int min_value = -max_value;
+        int adjustment_factor = 2 * max_value + 1;
+        int retval = // predict_locoi_dc_deprecated(block);
+            predict_dc_dct(context);
+        retval = context.here().dc() + (recover_original ? retval : -retval);
+        if (retval < min_value) retval += adjustment_factor;
+        if (retval > max_value) retval -= adjustment_factor;
+        return retval;
+    }
+#define shift_right_round_zero_epi16(vec, imm8) (_mm_sign_epi16(_mm_srli_epi16(_mm_sign_epi16(vec, vec), imm8), vec));
+    int adv_predict_dc_pix(const ConstBlockContext& context,
+                           int16_t* pixels_sans_dc,
+                           int32_t* uncertainty_val,
+                           int32_t* uncertainty2_val) {
+        uint16_t* q = ProbabilityTablesBase::quantization_table((int)color);
+        idct(context.here(), q, pixels_sans_dc, true);
+
+        Sirikata::AlignedArray1d<int16_t, 16> dc_estimates;
+        dc_estimates.memset(0);
+        int32_t avgmed = 0;
+        if (all_present || left_present || above_present) {
+            if ((VECTORIZE || MICROVECTORIZE)) {
+                if (all_present || above_present) { // above goes first to prime the cache
+                    __m128i neighbor_above = _mm_loadu_si128(
+                        (const __m128i*)(const char*)context.neighbor_context_above_unchecked().horizontal_ptr());
+                    __m128i pixels_sans_dc_reg = _mm_loadu_si128((const __m128i*)(const char*)pixels_sans_dc);
+                    __m128i pixels2_sans_dc_reg = _mm_loadu_si128((const __m128i*)(const char*)(pixels_sans_dc + 8));
+                    __m128i pixels_delta = _mm_sub_epi16(pixels_sans_dc_reg, pixels2_sans_dc_reg);
+                    __m128i pixels_delta_div2 = shift_right_round_zero_epi16(pixels_delta, 1);
+                    __m128i pixels_sans_dc_recentered = _mm_add_epi16(pixels_sans_dc_reg, _mm_set1_epi16(1024));
+                    __m128i above_dc_estimate =
+                        _mm_sub_epi16(_mm_sub_epi16(neighbor_above, pixels_delta_div2), pixels_sans_dc_recentered);
+
+                    _mm_store_si128((__m128i*)(char*)(dc_estimates.begin() + ((all_present || left_present) ? 8 : 0)),
+                                    above_dc_estimate);
+                }
+                if (all_present || left_present) {
+                    const int16_t* horiz_data = context.neighbor_context_left_unchecked().vertical_ptr_except_7();
+                    __m128i neighbor_horiz = _mm_loadu_si128((const __m128i*)(const char*)horiz_data);
+                    // neighbor_horiz = _mm_insert_epi16(neighbor_horiz,
+                    // horiz_data[NeighborSummary::VERTICAL_LAST_PIXEL_OFFSET_FROM_FIRST_PIXEL], 7);
+                    __m128i pixels_sans_dc_reg =
+                        _mm_set_epi16(pixels_sans_dc[56], pixels_sans_dc[48], pixels_sans_dc[40], pixels_sans_dc[32],
+                                      pixels_sans_dc[24], pixels_sans_dc[16], pixels_sans_dc[8], pixels_sans_dc[0]);
+                    __m128i pixels_delta = _mm_sub_epi16(
+                        pixels_sans_dc_reg,
+                        _mm_set_epi16(pixels_sans_dc[57], pixels_sans_dc[49], pixels_sans_dc[41], pixels_sans_dc[33],
+                                      pixels_sans_dc[25], pixels_sans_dc[17], pixels_sans_dc[9], pixels_sans_dc[1]));
+
+                    __m128i pixels_delta_div2 = shift_right_round_zero_epi16(pixels_delta, 1);
+                    __m128i left_dc_estimate = _mm_sub_epi16(_mm_sub_epi16(neighbor_horiz, pixels_delta_div2),
+                                                             _mm_add_epi16(pixels_sans_dc_reg, _mm_set1_epi16(1024)));
+
+                    _mm_store_si128((__m128i*)(char*)dc_estimates.begin(), left_dc_estimate);
+                }
+            } else {
+                if (all_present || left_present) {
+                    for (int i = 0; i < 8; ++i) {
+                        int a = pixels_sans_dc[i << 3] + 1024;
+                        int pixel_delta = pixels_sans_dc[i << 3] - pixels_sans_dc[(i << 3) + 1];
+                        int b =
+                            context.neighbor_context_left_unchecked().vertical(i) - (pixel_delta / 2); // round to zero
+                        dc_estimates[i] = b - a;
+                    }
+                }
+                if (all_present || above_present) {
+                    for (int i = 0; i < 8; ++i) {
+                        int a = pixels_sans_dc[i] + 1024;
+                        int pixel_delta = pixels_sans_dc[i] - pixels_sans_dc[i + 8];
+                        int b = context.neighbor_context_above_unchecked().horizontal(i) -
+                                (pixel_delta / 2); // round to zero
+                        dc_estimates[i + ((all_present || left_present) ? 8 : 0)] = b - a;
+                    }
+                }
+            }
+            int32_t avg_h_v[2] = {0, 0};
+            int32_t min_dc = dc_estimates[0];
+            int32_t max_dc = dc_estimates[0];
+            size_t which_est = 0;
+            for (int vert = 0; vert != 2; ++vert) {
+                for (int i = 0; i < 8; ++which_est, ++i) {
+                    int16_t cur_est = dc_estimates[which_est];
+                    avg_h_v[vert] += cur_est;
+                    if (min_dc > cur_est) {
+                        min_dc = cur_est;
+                    }
+                    if (max_dc < cur_est) {
+                        max_dc = cur_est;
+                    }
+                }
+                if ((!all_present) && (above_present == false || left_present == false)) {
+                    avg_h_v[1] = avg_h_v[0];
+                    break;
+                }
+            }
+            int32_t overall_avg = (avg_h_v[0] + avg_h_v[1]) >> 1;
+            avgmed = overall_avg;
+            *uncertainty_val = (max_dc - min_dc) >> 3;
+            avg_h_v[0] -= avgmed;
+            avg_h_v[1] -= avgmed;
+            int32_t far_afield_value = avg_h_v[1];
+            if (abs(avg_h_v[0]) < abs(avg_h_v[1])) {
+                far_afield_value = avg_h_v[0];
+            }
+            *uncertainty2_val = (far_afield_value) >> 3;
+
+            if (false) { // this is to debug some of the differences
+                debug_print_deltas(context, dc_estimates.begin(), avgmed);
+            }
+        }
+        return ((avgmed / q[0] + 4) >> 3);
+    }
+    void debug_print_deltas(const ConstBlockContext& context, int16_t* dc_estimates, int avgmed) {
+        int actual_dc = context.here().dc();
+        uint16_t* q = ProbabilityTablesBase::quantization_table((int)color);
+        int len_est = ((all_present || (left_present && above_present)) ? 16 : 8);
+        int avg_estimated_dc = 0;
+        int dc_sum = 0;
+        for (int i = 0; i < len_est; ++i) {
+            dc_sum += dc_estimates[i];
+        }
+        avg_estimated_dc = dc_sum;
+        if (all_present || (left_present && above_present)) {
+            avg_estimated_dc >>= 1;
+        }
+
+        avg_estimated_dc = (avg_estimated_dc / q[0] + xIDCTSCALE / 2) >> 3;
+        int16_t dc_copy[16];
+        memcpy(dc_copy, dc_estimates, len_est * sizeof(int16_t));
+        std::sort(dc_copy, dc_copy + len_est);
+        int mmed = dc_copy[len_est / 2];
+        int scaled_med = (mmed / q[0] + 4);
+        int scaled_avgmed = (((avgmed / q[0]) + 4) >> 3);
+        using namespace LeptonDebug;
+        LeptonDebug::med_err += abs(scaled_med - actual_dc);
+        LeptonDebug::amd_err += abs(scaled_avgmed - actual_dc);
+        LeptonDebug::avg_err += abs(avg_estimated_dc - actual_dc);
+        int locoi_pred = predict_locoi_dc_deprecated(context);
+        int predicted_dc = predict_dc_dct(context);
+        LeptonDebug::ori_err += abs(predicted_dc - actual_dc);
+        LeptonDebug::loc_err += abs(locoi_pred - actual_dc);
+
+        fprintf(stderr, "MXM: %d\n", dc_estimates[len_est - 1] - dc_estimates[0]);
+        fprintf(stderr, "MED: %d (%d)\n", scaled_med, LeptonDebug::med_err);
+        fprintf(stderr, "AMD: %d (%d)\n", scaled_avgmed, LeptonDebug::amd_err);
+        fprintf(stderr, "AVG: %d (%d)\n", avg_estimated_dc, LeptonDebug::avg_err);
+        fprintf(stderr, "ORI: %d (%d)\n", predicted_dc, LeptonDebug::ori_err);
+        fprintf(stderr, "LOC: %d (%d)\n", locoi_pred, LeptonDebug::loc_err);
+        fprintf(stderr, "DC : %d\n", actual_dc);
+    }
+    int adv_predict_or_unpredict_dc(int16_t saved_dc, bool recover_original, int predicted_val) {
+        int max_value = (1 << (MAX_EXPONENT - 1));
+        int min_value = -max_value;
+        int adjustment_factor = 2 * max_value + 1;
+        int retval = predicted_val;
+        retval = saved_dc + (recover_original ? retval : -retval);
+        if (retval < min_value) retval += adjustment_factor;
+        if (retval > max_value) retval -= adjustment_factor;
+        return retval;
+    }
+    int compute_aavrg_dc(ConstBlockContext context) {
+        return compute_aavrg(0, raster_to_aligned.at(0), context);
+
+        uint32_t total = 0;
+        if (all_present || left_present) {
+            total += abs(context.left_unchecked().dc());
+        }
+        if (all_present || above_present) {
+            total += abs(context.above_unchecked().dc());
+        }
+        if (all_present || (left_present && above_present)) {
+            constexpr unsigned int log_weight = 5;
+            total *= 13;
+            total += 6 * abs(context.above_left_unchecked().dc());
+            return total >> log_weight;
+        } else {
+            return total;
+        }
+    }
+    int16_t compute_aavrg(unsigned int coord, unsigned int aligned_zz, ConstBlockContext context) {
+        int16_t total = 0;
+        if (all_present || left_present) {
+            total += abs(context.left_unchecked().coefficients_raster(coord));
+        }
+        if (all_present || above_present) {
+            total += abs(context.above_unchecked().coefficients_raster(coord));
+        }
+        if (all_present || (left_present && above_present)) {
+            constexpr unsigned int log_weight = 5;
+            total *= 13;
+            total += 6 * abs(context.above_left_unchecked().coefficients_raster(coord));
+            return ((uint16_t)total) >> log_weight;
+        } else {
+            return total;
+        }
+        // if (block.context().above_right.initialized()) {
+        // total += abs(block.context().above_right.get()->coefficients().at(0));
+        //}
+    }
+#ifdef OPTIMIZED_7x7
+    bool aavrg_vec_matches(__m128i retval, unsigned int aligned_zz, ConstBlockContext context) {
+        short ret[8];
+        _mm_storeu_si128((__m128i*)(char*)ret, retval);
+        short correct[8] = {compute_aavrg(aligned_to_raster.at(aligned_zz), aligned_zz + 0, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 1), aligned_zz + 1, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 2), aligned_zz + 2, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 3), aligned_zz + 3, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 4), aligned_zz + 4, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 5), aligned_zz + 5, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 6), aligned_zz + 6, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 7), aligned_zz + 7, context)};
+        return (memcmp(ret, correct, sizeof(correct)) == 0);
+    }
+    void compute_aavrg_vec(unsigned int aligned_zz, ConstBlockContext context, short* aligned_retval) {
+        _mm_store_si128((__m128i*)(char*)aligned_retval, compute_aavrg_vec(aligned_zz, context));
+    }
+#if defined(__clang__) || defined(__GNUC__)
+#define x_mm_loadu_si64(a) _mm_set1_epi64x(*(uint64_t*)(char*)(a))
+#else
+#define x_mm_loadu_si64 _mm_loadu_si64
+#endif
+    __m128i compute_aavrg_vec(unsigned int aligned_zz, ConstBlockContext context) {
+        if (all_present == false && left_present == false && above_present == false) {
+            return _mm_setzero_si128();
+        }
+        __m128i left;
+        if (all_present || left_present) {
+            left = _mm_abs_epi16(
+                _mm_load_si128((const __m128i*)(const char*)&context.left_unchecked().coef.at(aligned_zz)));
+            if ((!all_present) && !above_present) {
+                return left;
+            }
+        }
+        __m128i above = _mm_setzero_si128();
+        if (all_present || above_present) {
+            above = _mm_abs_epi16(
+                _mm_load_si128((const __m128i*)(const char*)&context.above_unchecked().coef.at(aligned_zz)));
+            if (all_present == false && !left_present) {
+                return above;
+            }
+        }
+        constexpr unsigned int log_weight = 5;
+        __m128i total = _mm_add_epi16(left, above);
+        total =
+            _mm_mullo_epi16(total, _mm_set1_epi16(13)); // approximate (a*2+b*2 + c)/5 as (a *13 + b * 13 + c * 6)/32
+        __m128i aboveleft = _mm_abs_epi16(
+            _mm_load_si128((const __m128i*)(const char*)&context.above_left_unchecked().coef.at(aligned_zz)));
+        total = _mm_add_epi16(total, _mm_mullo_epi16(aboveleft, _mm_set1_epi16(6)));
+        __m128i retval = _mm_srli_epi16(total, log_weight);
+        assert(aavrg_vec_matches(retval, aligned_zz, context));
+        return retval;
+        // if (block.context().above_right.initialized()) {
+        // total += abs(block.context().above_right.get()->coefficients().at(0));
+        //}
+    }
+#endif
+    static int32_t compute_lak_vec(__m128i coeffs_x_low,
+                                   __m128i coeffs_x_high,
+                                   __m128i coeffs_a_low,
+                                   __m128i
+#ifdef _WIN32
+                                       &
+#endif
+                                           indirect_coeffs_a_high,
+                                   const int32_t* icos_deq) {
+        __m128i sign_mask = _mm_set_epi32(-1, 1, -1, 1); // ((i & 1) ? -1 : 1)
+
+        // coeffs_x[i] = ((i & 1) ? -1 : 1) * coeffs_a[i] - coeffs_x[i];
+        coeffs_a_low = _mm_sign_epi32(coeffs_a_low, sign_mask);
+        __m128i coeffs_a_high = _mm_sign_epi32(indirect_coeffs_a_high, sign_mask);
+        coeffs_x_low = _mm_sub_epi32(coeffs_a_low, coeffs_x_low);
+        coeffs_x_high = _mm_sub_epi32(coeffs_a_high, coeffs_x_high);
+
+        __m128i icos_low = _mm_load_si128((const __m128i*)(const char*)icos_deq);
+        __m128i icos_high = _mm_load_si128((const __m128i*)(const char*)(icos_deq + 4));
+        // coeffs_x[i] *= icos[i]
+        __m128i deq_low = _mm_mullo_epi32(coeffs_x_low, icos_low);
+        __m128i deq_high = _mm_mullo_epi32(coeffs_x_high, icos_high);
+
+        __m128i sum = _mm_add_epi32(deq_low, deq_high);
+        sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 8));
+        sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 4));
+        // coeffs_x[0] = sum(coeffs_x)
+        int32_t prediction = _mm_cvtsi128_si32(sum);
+        // if (prediction > 0) { <-- rounding hurts prediction perf and costs compute  this rounding didn't round the
+        // same way as the unvectorized one anyhow
+        //    prediction += icos_deq[0]/2;
+        //} else {
+        //    prediction -= icos_deq[0]/2; // round away from zero
+        //}
+        return prediction / icos_deq[0];
+    }
+#define ITER(x_var, a_var, i, step)                                                                                    \
+    (x_var = _mm_set_epi32(context.here().coefficients_raster(band + step * ((i) + 3)),                                \
+                           context.here().coefficients_raster(band + step * ((i) + 2)),                                \
+                           context.here().coefficients_raster(band + step * ((i) + 1)),                                \
+                           i == 0 ? 0 : context.here().coefficients_raster(band + step * (i))),                        \
+     a_var = _mm_set_epi32(                                                                                            \
+         neighbor.coefficients_raster(band + step * ((i) + 3)), neighbor.coefficients_raster(band + step * ((i) + 2)), \
+         neighbor.coefficients_raster(band + step * ((i) + 1)), neighbor.coefficients_raster(band + step * (i))))
+
+    template <int band>
+#ifndef _WIN32
+    __attribute__((always_inline))
+#endif
+    int32_t
+    compute_lak_templ(const ConstBlockContext& context) {
+        __m128i coeffs_x_low;
+        __m128i coeffs_x_high;
+        __m128i coeffs_a_low;
+        __m128i coeffs_a_high;
+        const int32_t* icos = nullptr;
+        static_assert((band & 7) == 0 || (band >> 3) == 0, "This function only works on edges");
+        if ((band >> 3) == 0) {
+            if (all_present == false && !above_present) {
+                return 0;
+            }
+            const auto& neighbor = context.above_unchecked();
+            ITER(coeffs_x_low, coeffs_a_low, 0, 8);
+            ITER(coeffs_x_high, coeffs_a_high, 4, 8);
+            icos = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_x((int)COLOR) + band * 8;
+        } else {
+            if (all_present == false && !left_present) {
+                return 0;
+            }
+            const auto& neighbor = context.left_unchecked();
+            ITER(coeffs_x_low, coeffs_a_low, 0, 1);
+            ITER(coeffs_x_high, coeffs_a_high, 4, 1);
+            icos = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_y((int)COLOR) + band;
+        }
+        return compute_lak_vec(coeffs_x_low, coeffs_x_high, coeffs_a_low, coeffs_a_high, icos);
+    }
+    int32_t compute_lak_horizontal(const ConstBlockContext& context, unsigned int band) {
+        if (all_present == false && !above_present) {
+            return 0;
+        }
+        __m128i coeffs_x_low;
+        __m128i coeffs_x_high;
+        __m128i coeffs_a_low;
+        __m128i coeffs_a_high;
+        assert(band / 8 == 0 && "this function only works for the top edge");
+        const auto& neighbor = context.above_unchecked();
+        ITER(coeffs_x_low, coeffs_a_low, 0, 8);
+        ITER(coeffs_x_high, coeffs_a_high, 4, 8);
+        const int32_t* icos = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_x((int)COLOR) + band * 8;
+        return compute_lak_vec(coeffs_x_low, coeffs_x_high, coeffs_a_low, coeffs_a_high, icos);
+    }
+    int32_t compute_lak_vertical(const ConstBlockContext& context, unsigned int band) {
+        assert((band & 7) == 0 && "Must be used for veritcal");
+        if (all_present == false && !left_present) {
+            return 0;
+        }
+        __m128i coeffs_x_low;
+        __m128i coeffs_x_high;
+        __m128i coeffs_a_low;
+        __m128i coeffs_a_high;
+        const auto& neighbor = context.left_unchecked();
+        ITER(coeffs_x_low, coeffs_a_low, 0, 1);
+        ITER(coeffs_x_high, coeffs_a_high, 4, 1);
+#undef ITER
+        const int32_t* icos = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_y((int)COLOR) + band;
+        return compute_lak_vec(coeffs_x_low, coeffs_x_high, coeffs_a_low, coeffs_a_high, icos);
+    }
+    int32_t compute_lak(const ConstBlockContext& context, unsigned int band) {
+        int coeffs_x[8];
+        int coeffs_a[8];
+        const int32_t* coef_idct = nullptr;
+        if ((band & 7) && (all_present || above_present)) {
+            // y == 0: we're the x
+            assert(band / 8 == 0); // this function only works for the edge
+            const auto& above = context.above_unchecked();
+            for (int i = 0; i < 8; ++i) {
+                uint8_t cur_coef = band + i * 8;
+                coeffs_x[i] = i ? context.here().coefficients_raster(cur_coef) : 0;
+                coeffs_a[i] = above.coefficients_raster(cur_coef);
+            }
+            coef_idct = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_x((int)COLOR) + band * 8;
+        } else if ((band & 7) == 0 && left_present) {
+            // x == 0: we're the y
+            const auto& left = context.left_unchecked();
+            for (int i = 0; i < 8; ++i) {
+                uint8_t cur_coef = band + i;
+                coeffs_x[i] = i ? context.here().coefficients_raster(cur_coef) : 0;
+                coeffs_a[i] = left.coefficients_raster(cur_coef);
+            }
+            coef_idct = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_y((int)COLOR) + band;
+        } else {
+            return 0;
+        }
+        int prediction =
+            coeffs_a[0] *
+            coef_idct[0]; // rounding towards zero before adding coeffs_a[0] helps ratio slightly, but this is cheaper
+        for (int i = 1; i < 8; ++i) {
+            int sign = (i & 1) ? 1 : -1;
+            prediction -= coef_idct[i] * (coeffs_x[i] + sign * coeffs_a[i]);
+        }
+        prediction /= coef_idct[0];
+        assert(((band & 7) ? compute_lak_horizontal(context, band) : compute_lak_vertical(context, band)) ==
+                   prediction &&
+               "Vectorized version must match sequential version");
+        return prediction;
+    }
+    Sirikata::Array1d<Branch, (1 << RESIDUAL_NOISE_FLOOR)>::Slice residual_thresh_array(
+        ProbabilityTablesBase& pt,
+        const unsigned int band,
+        const uint8_t cur_exponent,
+        const CoefficientContext context,
+        int min_threshold) {
+        uint16_t ctx_abs = abs(context.best_prior);
+        ANNOTATE_CTX(band, THRESH8, 0, ctx_abs >> min_threshold);
+        ANNOTATE_CTX(band, THRESH8, 2, cur_exponent - min_threshold);
+        return pt.model().residual_threshold_counts_.at(
+            color_index(), std::min(ctx_abs >> min_threshold, (uint16_t)Model::ResidualThresholdCounts::size1 - 1),
+            std::min(cur_exponent - min_threshold, Model::ResidualThresholdCounts::size2 - 1));
+    }
+    void residual_thresh_array_annot_update(const unsigned int band, uint16_t cur_serialized_thresh_value) {
+        (void)band;
+        (void)cur_serialized_thresh_value;
+        ANNOTATE_CTX(band, THRESH8, 1, cur_serialized_thresh_value);
+    }
+    enum SignValue {
+        ZERO_SIGN = 0,
+        POSITIVE_SIGN = 1,
+        NEGATIVE_SIGN = 2,
+    };
+    Branch& sign_array_dc(ProbabilityTablesBase& pt, int avg_delta, int offset_to_closest_edge) {
+        ANNOTATE_CTX(0, SIGNDC, 0, 1);
+        return pt.model().sign_counts_.at(color_index(), 0,
+                                          offset_to_closest_edge >= 0 ? offset_to_closest_edge == 0 ? 3 : 2 : 1);
+    }
+    Branch& sign_array_7x7(ProbabilityTablesBase& pt, uint8_t band, CoefficientContext context) {
+        ANNOTATE_CTX(band, SIGN7x7, 0, 0);
+        return pt.model().sign_counts_.at(color_index(), 0, 0);
+    }
+    Branch& sign_array_8(ProbabilityTablesBase& pt, uint8_t band, CoefficientContext context) {
+        int16_t val = context.best_prior;
+        uint8_t ctx0 = context.bsr_best_prior;
+        uint8_t ctx1 = (val == 0 ? 0 : (val > 0 ? 1 : 2));
+        ANNOTATE_CTX(band, SIGN8, 0, ctx0);
+        ANNOTATE_CTX(band, SIGN8, 1, ctx1);
+        return pt.model().sign_counts_.at(color_index(), ctx1, ctx0);
+    }
+
+    uint8_t get_noise_threshold(int coord) { return ProbabilityTablesBase::min_noise_threshold((int)COLOR, coord); }
+    void optimize(ProbabilityTablesBase& pt) { optimize_model(pt.model()); }
+    void serialize(ProbabilityTablesBase& pt, int output_fd) const { serialize_model(pt.model(), output_fd); }
+
+    // this reduces the counts to something easier to override by new data
+    void normalize(ProbabilityTablesBase& pt) { normalize_model(pt.model()); }
+};
+
+class ProbabilityTables2 {
+   public:
+    bool all_present;
+    BlockType color;
+    int COLOR;
+
+   private:
+    typedef ProbabilityTablesBase::CoefficientContext CoefficientContext;
+    const bool left_present;
+    const bool above_present;
+    const bool above_right_present;
+
+   public:
+    //#ifdef USE_TEMPLATIZED_COLOR
+    //   enum {
+    //      COLOR = (int)color
+    // };
+    ProbabilityTables2(BlockType kcolor, bool in_left_present, bool in_above_present, bool in_above_right_present)
+        : left_present(in_left_present), above_present(in_above_present), above_right_present(in_above_right_present) {
+        all_present = left_present && above_present && above_right_present;
+        color = kcolor;
+        COLOR = (int)color;
+        always_assert((left_present && above_present && above_right_present) == all_present);
+        always_assert(kcolor == color);
+    } /*
+ #else
+     const BlockType COLOR;
+     ProbabilityTables(BlockType color,
+                       bool in_left_present,
+                       bool in_above_present,
+                       bool in_above_right_present)
+         : left_present(in_left_present),
+           above_present(in_above_present),
+           above_right_present(in_above_right_present),
+           COLOR(color) {
+         always_assert((left_present && right_present && above_right_present) == all_present);
+         static_assert((int)deprecated_color == 0, "Using dynamic color");
+     }
+ #endif*/
+    void reset(ProbabilityTablesBase& base) { reset_model(base.model()); }
+    void load(ProbabilityTablesBase& base, const char* filename) { load_model(base.model(), filename); }
+    int color_index() {
+        if (BLOCK_TYPES == 2) {
+            if (0 == (int)COLOR) {
+                return 0;
+            }
+            return 1;
+        } else {
+            return std::min((int)(BLOCK_TYPES - 1), (int)COLOR);
+        }
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context7x7(int coord,
+                                                                            int aligned_zz,
+                                                                            const ConstBlockContext block,
+                                                                            uint8_t num_nonzeros_left) {
+        ProbabilityTablesBase::CoefficientContext retval;
+        retval.best_prior = compute_aavrg(coord, block);
+        retval.num_nonzeros_bin = num_nonzeros_to_bin(num_nonzeros_left);
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context7x7_precomp(int aligned_zz,
+                                                                                    int aavrg,
+                                                                                    const ConstBlockContext block,
+                                                                                    uint8_t num_nonzeros_left) {
+        ProbabilityTablesBase::CoefficientContext retval;
+        // assert(aavrg == compute_aavrg(aligned_to_raster.at(aligned_zz), aligned_zz, block));
+        // This was to make sure the code was right compute_aavrg_vec(aligned_zz, block);
+        int aavrg2 = compute_aavrg(aligned_to_raster.at(aligned_zz), aligned_zz, block);
+        retval.best_prior = aavrg;
+        retval.num_nonzeros_bin = num_nonzeros_to_bin(num_nonzeros_left);
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context8(uint8_t coefficient,
+                                                                          const ConstBlockContext block,
+                                                                          uint8_t num_nonzeros_x) {
+        CoefficientContext retval = {0, 0, 0};
+        if (MICROVECTORIZE) {
+            retval.best_prior = (coefficient & 7) ? compute_lak_horizontal(block, coefficient)
+                                                  : compute_lak_vertical(block, coefficient);
+        } else {
+            retval.best_prior = compute_lak(block, coefficient);
+        }
+        retval.num_nonzeros_bin = num_nonzeros_x;
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context8_horiz(uint8_t coefficient,
+                                                                                const ConstBlockContext block,
+                                                                                uint8_t num_nonzeros_x) {
+        CoefficientContext retval = {0, 0, 0};
+        retval.best_prior = compute_lak_horizontal(block, coefficient);
+        retval.num_nonzeros_bin = num_nonzeros_x;
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context8_vert(uint8_t coefficient,
+                                                                               const ConstBlockContext block,
+                                                                               uint8_t num_nonzeros_x) {
+        CoefficientContext retval = {0, 0, 0};
+        retval.best_prior = compute_lak_vertical(block, coefficient);
+        retval.num_nonzeros_bin = num_nonzeros_x;
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));
+        return retval;
+    }
+#define INSTANTIATE_TEMPLATE_METHOD(N)                                                                            \
+    ProbabilityTablesBase::CoefficientContext update_coefficient_context8_templ##N(const ConstBlockContext block, \
+                                                                                   uint8_t num_nonzeros_x) {      \
+        ProbabilityTablesBase::CoefficientContext retval = {0, 0, 0};                                             \
+        retval.best_prior = compute_lak_templ<N>(block);                                                          \
+        retval.num_nonzeros_bin = num_nonzeros_x;                                                                 \
+        retval.bsr_best_prior = bit_length(std::min(abs(retval.best_prior), 1023));                               \
+        return retval;                                                                                            \
+    }
+    INSTANTIATE_TEMPLATE_METHOD(1)
+    INSTANTIATE_TEMPLATE_METHOD(2)
+    INSTANTIATE_TEMPLATE_METHOD(3)
+    INSTANTIATE_TEMPLATE_METHOD(4)
+    INSTANTIATE_TEMPLATE_METHOD(5)
+    INSTANTIATE_TEMPLATE_METHOD(6)
+    INSTANTIATE_TEMPLATE_METHOD(7)
+    INSTANTIATE_TEMPLATE_METHOD(8)
+    INSTANTIATE_TEMPLATE_METHOD(16)
+    INSTANTIATE_TEMPLATE_METHOD(24)
+    INSTANTIATE_TEMPLATE_METHOD(32)
+    INSTANTIATE_TEMPLATE_METHOD(40)
+    INSTANTIATE_TEMPLATE_METHOD(48)
+    INSTANTIATE_TEMPLATE_METHOD(56)
+    Sirikata::Array2d<Branch, 6, 32>::Slice nonzero_counts_7x7(ProbabilityTablesBase& pt,
+                                                               const ConstBlockContext block) {
+        uint8_t num_nonzeros_above = 0;
+        uint8_t num_nonzeros_left = 0;
+        if (all_present || above_present) {
+            num_nonzeros_above = block.nonzeros_above_7x7_unchecked();
+        }
+        if (all_present || left_present) {
+            num_nonzeros_left = block.nonzeros_left_7x7_unchecked();
+        }
+
+        uint8_t num_nonzeros_context = 0;
+        if ((!all_present) && above_present && !left_present) {
+            num_nonzeros_context = (num_nonzeros_above + 1) / 2;
+        } else if ((!all_present) && left_present && !above_present) {
+            num_nonzeros_context = (num_nonzeros_left + 1) / 2;
+        } else if (all_present || (left_present && above_present)) {
+            num_nonzeros_context = (num_nonzeros_above + num_nonzeros_left + 2) / 4;
+        }
+        ANNOTATE_CTX(0, ZEROS7x7, 0, num_nonzeros_context);
+        return pt.model().num_nonzeros_counts_7x7_.at(color_index(), num_nonzeros_to_bin(num_nonzeros_context));
+    }
+    Sirikata::Array2d<Branch, 6, 32>::Slice nonzero_counts_7x7(ProbabilityTablesBase& pt,
+                                                               const ConstBlockContext block,
+                                                               uint8_t* nz_ctx,
+                                                               uint8_t* add26) {
+        uint8_t num_nonzeros_above = 0;
+        uint8_t num_nonzeros_left = 0;
+        if (all_present || above_present) {
+            num_nonzeros_above = block.nonzeros_above_7x7_unchecked();
+        }
+        if (all_present || left_present) {
+            num_nonzeros_left = block.nonzeros_left_7x7_unchecked();
+        }
+
+        uint8_t num_nonzeros_context = 0;
+        if ((!all_present) && above_present && !left_present) {
+            num_nonzeros_context = (num_nonzeros_above + 1) / 2;
+        } else if ((!all_present) && left_present && !above_present) {
+            num_nonzeros_context = (num_nonzeros_left + 1) / 2;
+        } else if (all_present || (left_present && above_present)) {
+            num_nonzeros_context = (num_nonzeros_above + num_nonzeros_left + 2) / 4;
+        }
+        ANNOTATE_CTX(0, ZEROS7x7, 0, num_nonzeros_context);
+        *nz_ctx = num_nonzeros_context;
+        *add26 = num_nonzeros_to_bin(num_nonzeros_context);
+        return pt.model().num_nonzeros_counts_7x7_.at(color_index(), num_nonzeros_to_bin(num_nonzeros_context));
+    }
+    Sirikata::Array2d<Branch, 3u, 4u>::Slice x_nonzero_counts_8x1(ProbabilityTablesBase& pt,
+                                                                  unsigned int eob_x,
+                                                                  unsigned int num_nonzeros) {
+        ANNOTATE_CTX(0, ZEROS8x1, 0, ((num_nonzeros + 3) / 7));
+        ANNOTATE_CTX(0, ZEROS8x1, 1, eob_x);
+        return pt.model().num_nonzeros_counts_8x1_.at(color_index(), eob_x, ((num_nonzeros + 3) / 7));
+    }
+    Sirikata::Array2d<Branch, 3u, 4u>::Slice y_nonzero_counts_1x8(ProbabilityTablesBase& pt,
+                                                                  unsigned int eob_x,
+                                                                  unsigned int num_nonzeros) {
+        ANNOTATE_CTX(0, ZEROS1x8, 0, ((num_nonzeros + 3) / 7));
+        ANNOTATE_CTX(0, ZEROS1x8, 1, eob_x);
+        return pt.model().num_nonzeros_counts_1x8_.at(color_index(), eob_x, ((num_nonzeros + 3) / 7));
+    }
+    Sirikata::Array1d<Branch, MAX_EXPONENT>::Slice exponent_array_x(ProbabilityTablesBase& pt,
+                                                                    int band,
+                                                                    int zig15,
+                                                                    CoefficientContext context) {
+        ANNOTATE_CTX(band, EXP8, 0, context.bsr_best_prior);
+        ANNOTATE_CTX(band, EXP8, 1, context.num_nonzeros);
+        assert((band & 7) == 0 ? ((band >> 3) + 7) : band - 1 == zig15);
+        return pt.model().exponent_counts_x_.at(color_index(), context.num_nonzeros_bin, zig15, context.bsr_best_prior);
+    }
+    Sirikata::Array1d<Branch, MAX_EXPONENT>::Slice exponent_array_7x7(ProbabilityTablesBase& pt,
+                                                                      const unsigned int band,
+                                                                      const unsigned int zig49,
+                                                                      const CoefficientContext context) {
+        ANNOTATE_CTX(band, EXP7x7, 0, context.bsr_best_prior);
+        ANNOTATE_CTX(band, EXP7x7, 1, context.num_nonzeros_bin);
+        return pt.model().exponent_counts_.at(color_index(), context.num_nonzeros_bin, zig49, context.bsr_best_prior);
+    }
+    Sirikata::Array1d<Branch, MAX_EXPONENT>::Slice exponent_array_dc(ProbabilityTablesBase& pt,
+                                                                     uint16_t len_abs_mxm,
+                                                                     uint16_t len_abs_offset_to_closest_edge) {
+        return pt.model().exponent_counts_dc_.at(
+            std::min(len_abs_mxm, (uint16_t)(Model::ExponentCountsDC::size0 - 1)),
+            std::min(len_abs_offset_to_closest_edge, (uint16_t)(Model::ExponentCountsDC::size1 - 1)));
+    }
+    Sirikata::Array1d<Branch, COEF_BITS>::Slice residual_array_dc(ProbabilityTablesBase& pt,
+                                                                  uint16_t len_abs_mxm,
+                                                                  uint16_t len_abs_offset_to_closest_edge) {
+        return pt.model().residual_noise_counts_dc_.at(
+            std::min((uint16_t)(Model::ResidualNoiseCountsDc::size0 - 1), len_abs_mxm));
+    }
+    Sirikata::Array1d<Branch, COEF_BITS>::Slice residual_noise_array_x(ProbabilityTablesBase& pt,
+                                                                       const unsigned int band,
+                                                                       const CoefficientContext context) {
+        ANNOTATE_CTX(band, RES8, 0, num_nonzeros_x);
+        return residual_noise_array_shared(pt, band, context);
+    }
+
+    Sirikata::Array1d<Branch, COEF_BITS>::Slice residual_noise_array_shared(ProbabilityTablesBase& pt,
+                                                                            const unsigned int band,
+                                                                            const CoefficientContext context) {
+        return pt.model().residual_noise_counts_.at(color_index(), band / band_divisor, context.num_nonzeros_bin);
+    }
+    Sirikata::Array1d<Branch, COEF_BITS>::Slice residual_noise_array_7x7(ProbabilityTablesBase& pt,
+                                                                         const unsigned int band,
+                                                                         const CoefficientContext context) {
+        if (band == 0) {
+            ANNOTATE_CTX(0, RESDC, 0, num_nonzeros_to_bin(num_nonzeros));
+        } else {
+            ANNOTATE_CTX(band, RES7x7, 0, num_nonzeros_to_bin(num_nonzeros));
+        }
+        return residual_noise_array_shared(pt, band, context);
+    }
+    unsigned int num_nonzeros_to_bin(uint8_t num_nonzeros) {
+        return nonzero_to_bin[NUM_NONZEROS_BINS - 1][num_nonzeros];
+    }
+    int idct_2d_8x1(const AlignedBlock& block, bool ignore_first, int pixel_row) {
+        int retval = 0;
+        if (!ignore_first) {
+            retval = block.coefficients_raster(0) *
+                     ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 0];
+        }
+        retval += block.coefficients_raster(1) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 1];
+        retval += block.coefficients_raster(2) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 2];
+        retval += block.coefficients_raster(3) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 3];
+        retval += block.coefficients_raster(4) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 4];
+        retval += block.coefficients_raster(5) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 5];
+        retval += block.coefficients_raster(6) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 6];
+        retval += block.coefficients_raster(7) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 7];
+        return retval;
+    }
+
+    int idct_2d_1x8(const AlignedBlock& block, bool ignore_first, int pixel_row) {
+        int retval = 0;
+        if (!ignore_first) {
+            retval =
+                block.dc() * ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 0];
+        }
+        retval += block.coefficients_raster(8) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 1];
+        retval += block.coefficients_raster(16) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 2];
+        retval += block.coefficients_raster(24) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 3];
+        retval += block.coefficients_raster(32) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 4];
+        retval += block.coefficients_raster(40) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 5];
+        retval += block.coefficients_raster(48) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 6];
+        retval += block.coefficients_raster(56) *
+                  ProbabilityTablesBase::icos_idct_linear_8192_dequantized((int)COLOR)[pixel_row * 8 + 7];
+        return retval;
+    }
+
+    int predict_dc_dct(const ConstBlockContext& context) {
+        int prediction = 0;
+        int left_block = 0;
+        int left_edge = 0;
+        int above_block = 0;
+        int above_edge = 0;
+        if (all_present || left_present) {
+            left_block = idct_2d_8x1(context.left_unchecked(), 0, 7);
+            left_edge = idct_2d_8x1(context.here(), 1, 0);
+        }
+        if (all_present || above_present) {
+            above_block = idct_2d_1x8(context.above_unchecked(), 0, 7);
+            above_edge = idct_2d_1x8(context.here(), 1, 0);
+        }
+        if (all_present || left_present) {
+            if (all_present || above_present) {
+                prediction = ((left_block - left_edge) + (above_block - above_edge)) * 4;
+            } else {
+                prediction = (left_block - left_edge) * 8;
+            }
+        } else if (above_present) {
+            prediction = (above_block - above_edge) * 8;
+        }
+        int DCT_RSC = 8192;
+        prediction = std::max(-1024 * DCT_RSC, std::min(1016 * DCT_RSC, prediction));
+        prediction /= ProbabilityTablesBase::quantization_table((int)COLOR, 0);
+        int round = DCT_RSC / 2;
+        if (prediction < 0) {
+            round = -round;
+        }
+        return (prediction + round) / DCT_RSC;
+    }
+    int predict_locoi_dc_deprecated(const ConstBlockContext& context) {
+        if (all_present || left_present) {
+            int a = context.left_unchecked().dc();
+            if (all_present || above_present) {
+                int b = context.above_unchecked().dc();
+                int c = context.above_left_unchecked().dc();
+                if (c >= std::max(a, b)) {
+                    return std::min(a, b);
+                } else if (c <= std::min(a, b)) {
+                    return std::max(a, b);
+                }
+                return a + b - c;
+            } else {
+                return a;
+            }
+        } else if (above_present) {
+            return context.above_unchecked().dc();
+        } else {
+            return 0;
+        }
+    }
+    int predict_or_unpredict_dc(const ConstBlockContext& context, bool recover_original) {
+        int max_value = (1 << (1 + MAX_EXPONENT)) - 1;
+        int min_value = -max_value;
+        int adjustment_factor = 2 * max_value + 1;
+        int retval = // predict_locoi_dc_deprecated(block);
+            predict_dc_dct(context);
+        retval = context.here().dc() + (recover_original ? retval : -retval);
+        if (retval < min_value) retval += adjustment_factor;
+        if (retval > max_value) retval -= adjustment_factor;
+        return retval;
+    }
+#define shift_right_round_zero_epi16(vec, imm8) (_mm_sign_epi16(_mm_srli_epi16(_mm_sign_epi16(vec, vec), imm8), vec));
+    int adv_predict_dc_pix(const ConstBlockContext& context,
+                           int16_t* pixels_sans_dc,
+                           int32_t* uncertainty_val,
+                           int32_t* uncertainty2_val) {
+        uint16_t* q = ProbabilityTablesBase::quantization_table((int)color);
+        idct(context.here(), q, pixels_sans_dc, true);
+
+        Sirikata::AlignedArray1d<int16_t, 16> dc_estimates;
+        dc_estimates.memset(0);
+        int32_t avgmed = 0;
+        if (all_present || left_present || above_present) {
+            if ((VECTORIZE || MICROVECTORIZE)) {
+                if (all_present || above_present) { // above goes first to prime the cache
+                    __m128i neighbor_above = _mm_loadu_si128(
+                        (const __m128i*)(const char*)context.neighbor_context_above_unchecked().horizontal_ptr());
+                    __m128i pixels_sans_dc_reg = _mm_loadu_si128((const __m128i*)(const char*)pixels_sans_dc);
+                    __m128i pixels2_sans_dc_reg = _mm_loadu_si128((const __m128i*)(const char*)(pixels_sans_dc + 8));
+                    __m128i pixels_delta = _mm_sub_epi16(pixels_sans_dc_reg, pixels2_sans_dc_reg);
+                    __m128i pixels_delta_div2 = shift_right_round_zero_epi16(pixels_delta, 1);
+                    __m128i pixels_sans_dc_recentered = _mm_add_epi16(pixels_sans_dc_reg, _mm_set1_epi16(1024));
+                    __m128i above_dc_estimate =
+                        _mm_sub_epi16(_mm_sub_epi16(neighbor_above, pixels_delta_div2), pixels_sans_dc_recentered);
+
+                    _mm_store_si128((__m128i*)(char*)(dc_estimates.begin() + ((all_present || left_present) ? 8 : 0)),
+                                    above_dc_estimate);
+                }
+                if (all_present || left_present) {
+                    const int16_t* horiz_data = context.neighbor_context_left_unchecked().vertical_ptr_except_7();
+                    __m128i neighbor_horiz = _mm_loadu_si128((const __m128i*)(const char*)horiz_data);
+                    // neighbor_horiz = _mm_insert_epi16(neighbor_horiz,
+                    // horiz_data[NeighborSummary::VERTICAL_LAST_PIXEL_OFFSET_FROM_FIRST_PIXEL], 7);
+                    __m128i pixels_sans_dc_reg =
+                        _mm_set_epi16(pixels_sans_dc[56], pixels_sans_dc[48], pixels_sans_dc[40], pixels_sans_dc[32],
+                                      pixels_sans_dc[24], pixels_sans_dc[16], pixels_sans_dc[8], pixels_sans_dc[0]);
+                    __m128i pixels_delta = _mm_sub_epi16(
+                        pixels_sans_dc_reg,
+                        _mm_set_epi16(pixels_sans_dc[57], pixels_sans_dc[49], pixels_sans_dc[41], pixels_sans_dc[33],
+                                      pixels_sans_dc[25], pixels_sans_dc[17], pixels_sans_dc[9], pixels_sans_dc[1]));
+
+                    __m128i pixels_delta_div2 = shift_right_round_zero_epi16(pixels_delta, 1);
+                    __m128i left_dc_estimate = _mm_sub_epi16(_mm_sub_epi16(neighbor_horiz, pixels_delta_div2),
+                                                             _mm_add_epi16(pixels_sans_dc_reg, _mm_set1_epi16(1024)));
+
+                    _mm_store_si128((__m128i*)(char*)dc_estimates.begin(), left_dc_estimate);
+                }
+            } else {
+                if (all_present || left_present) {
+                    for (int i = 0; i < 8; ++i) {
+                        int a = pixels_sans_dc[i << 3] + 1024;
+                        int pixel_delta = pixels_sans_dc[i << 3] - pixels_sans_dc[(i << 3) + 1];
+                        int b =
+                            context.neighbor_context_left_unchecked().vertical(i) - (pixel_delta / 2); // round to zero
+                        dc_estimates[i] = b - a;
+                    }
+                }
+                if (all_present || above_present) {
+                    for (int i = 0; i < 8; ++i) {
+                        int a = pixels_sans_dc[i] + 1024;
+                        int pixel_delta = pixels_sans_dc[i] - pixels_sans_dc[i + 8];
+                        int b = context.neighbor_context_above_unchecked().horizontal(i) -
+                                (pixel_delta / 2); // round to zero
+                        dc_estimates[i + ((all_present || left_present) ? 8 : 0)] = b - a;
+                    }
+                }
+            }
+            int32_t avg_h_v[2] = {0, 0};
+            int32_t min_dc = dc_estimates[0];
+            int32_t max_dc = dc_estimates[0];
+            size_t which_est = 0;
+            for (int vert = 0; vert != 2; ++vert) {
+                for (int i = 0; i < 8; ++which_est, ++i) {
+                    int16_t cur_est = dc_estimates[which_est];
+                    avg_h_v[vert] += cur_est;
+                    if (min_dc > cur_est) {
+                        min_dc = cur_est;
+                    }
+                    if (max_dc < cur_est) {
+                        max_dc = cur_est;
+                    }
+                }
+                if ((!all_present) && (above_present == false || left_present == false)) {
+                    avg_h_v[1] = avg_h_v[0];
+                    break;
+                }
+            }
+            int32_t overall_avg = (avg_h_v[0] + avg_h_v[1]) >> 1;
+            avgmed = overall_avg;
+            *uncertainty_val = (max_dc - min_dc) >> 3;
+            avg_h_v[0] -= avgmed;
+            avg_h_v[1] -= avgmed;
+            int32_t far_afield_value = avg_h_v[1];
+            if (abs(avg_h_v[0]) < abs(avg_h_v[1])) {
+                far_afield_value = avg_h_v[0];
+            }
+            *uncertainty2_val = (far_afield_value) >> 3;
+
+            if (false) { // this is to debug some of the differences
+                debug_print_deltas(context, dc_estimates.begin(), avgmed);
+            }
+        }
+        return ((avgmed / q[0] + 4) >> 3);
+    }
+    int adv_predict_dc_pix2(
+        // const ConstBlockContext&context,
+        bool all_present,
+        bool left_present,
+        bool above_present,
+        uint16_t q0,
+        int16_t est_v[8],
+        int16_t est_h[8],
+        int32_t* uncertainty_val,
+        int32_t* uncertainty2_val) {
+        int16_t dc_estimates[16];
+        for (int i = 0; i < 16; i++) dc_estimates[i] = 0;
+        int32_t avgmed = 0;
+        if (all_present || left_present || above_present) {
+            {
+                if (all_present || left_present) {
+                    for (int i = 0; i < 8; ++i) {
+                        dc_estimates[i] = est_v[i]; // b - a;
+                    }
+                }
+                if (all_present || above_present) {
+                    for (int i = 0; i < 8; ++i) {
+                        dc_estimates[i + ((left_present) ? 8 : 0)] = est_h[i]; // b - a;
+                    }
+                }
+            }
+            int32_t avg_h_v[2] = {0, 0};
+            int32_t min_dc = dc_estimates[0];
+            int32_t max_dc = dc_estimates[0];
+            size_t which_est = 0;
+            for (int vert = 0; vert != 2; ++vert) {
+                for (int i = 0; i < 8; ++which_est, ++i) {
+                    int16_t cur_est = dc_estimates[which_est];
+                    avg_h_v[vert] += cur_est;
+                    if (min_dc > cur_est) {
+                        min_dc = cur_est;
+                    }
+                    if (max_dc < cur_est) {
+                        max_dc = cur_est;
+                    }
+                }
+                if ((!all_present) && (above_present == false || left_present == false)) {
+                    avg_h_v[1] = avg_h_v[0];
+                    break;
+                }
+            }
+            int32_t overall_avg = (avg_h_v[0] + avg_h_v[1]) >> 1;
+            avgmed = overall_avg;
+            *uncertainty_val = (max_dc - min_dc) >> 3;
+            avg_h_v[0] -= avgmed;
+            avg_h_v[1] -= avgmed;
+            int32_t far_afield_value = avg_h_v[1];
+            if (abs(avg_h_v[0]) < abs(avg_h_v[1])) {
+                far_afield_value = avg_h_v[0];
+            }
+            *uncertainty2_val = (far_afield_value) >> 3;
+        }
+        return ((avgmed / q0 + 4) >> 3);
+    }
+    void debug_print_deltas(const ConstBlockContext& context, int16_t* dc_estimates, int avgmed) {
+        int actual_dc = context.here().dc();
+        uint16_t* q = ProbabilityTablesBase::quantization_table((int)color);
+        int len_est = ((all_present || (left_present && above_present)) ? 16 : 8);
+        int avg_estimated_dc = 0;
+        int dc_sum = 0;
+        for (int i = 0; i < len_est; ++i) {
+            dc_sum += dc_estimates[i];
+        }
+        avg_estimated_dc = dc_sum;
+        if (all_present || (left_present && above_present)) {
+            avg_estimated_dc >>= 1;
+        }
+
+        avg_estimated_dc = (avg_estimated_dc / q[0] + xIDCTSCALE / 2) >> 3;
+        int16_t dc_copy[16];
+        memcpy(dc_copy, dc_estimates, len_est * sizeof(int16_t));
+        std::sort(dc_copy, dc_copy + len_est);
+        int mmed = dc_copy[len_est / 2];
+        int scaled_med = (mmed / q[0] + 4);
+        int scaled_avgmed = (((avgmed / q[0]) + 4) >> 3);
+        using namespace LeptonDebug;
+        LeptonDebug::med_err += abs(scaled_med - actual_dc);
+        LeptonDebug::amd_err += abs(scaled_avgmed - actual_dc);
+        LeptonDebug::avg_err += abs(avg_estimated_dc - actual_dc);
+        int locoi_pred = predict_locoi_dc_deprecated(context);
+        int predicted_dc = predict_dc_dct(context);
+        LeptonDebug::ori_err += abs(predicted_dc - actual_dc);
+        LeptonDebug::loc_err += abs(locoi_pred - actual_dc);
+
+        fprintf(stderr, "MXM: %d\n", dc_estimates[len_est - 1] - dc_estimates[0]);
+        fprintf(stderr, "MED: %d (%d)\n", scaled_med, LeptonDebug::med_err);
+        fprintf(stderr, "AMD: %d (%d)\n", scaled_avgmed, LeptonDebug::amd_err);
+        fprintf(stderr, "AVG: %d (%d)\n", avg_estimated_dc, LeptonDebug::avg_err);
+        fprintf(stderr, "ORI: %d (%d)\n", predicted_dc, LeptonDebug::ori_err);
+        fprintf(stderr, "LOC: %d (%d)\n", locoi_pred, LeptonDebug::loc_err);
+        fprintf(stderr, "DC : %d\n", actual_dc);
+    }
+    int adv_predict_or_unpredict_dc(int16_t saved_dc, bool recover_original, int predicted_val) {
+        int max_value = (1 << (MAX_EXPONENT - 1));
+        int min_value = -max_value;
+        int adjustment_factor = 2 * max_value + 1;
+        int retval = predicted_val;
+        retval = saved_dc + (recover_original ? retval : -retval);
+        if (retval < min_value) retval += adjustment_factor;
+        if (retval > max_value) retval -= adjustment_factor;
+        return retval;
+    }
+    int compute_aavrg_dc(ConstBlockContext context) {
+        return compute_aavrg(0, raster_to_aligned.at(0), context);
+
+        uint32_t total = 0;
+        if (all_present || left_present) {
+            total += abs(context.left_unchecked().dc());
+        }
+        if (all_present || above_present) {
+            total += abs(context.above_unchecked().dc());
+        }
+        if (all_present || (left_present && above_present)) {
+            constexpr unsigned int log_weight = 5;
+            total *= 13;
+            total += 6 * abs(context.above_left_unchecked().dc());
+            return total >> log_weight;
+        } else {
+            return total;
+        }
+    }
+    int16_t compute_aavrg(unsigned int coord, unsigned int aligned_zz, ConstBlockContext context) {
+        int16_t total = 0;
+        if (all_present || left_present) {
+            total += abs(context.left_unchecked().coefficients_raster(coord));
+        }
+        if (all_present || above_present) {
+            total += abs(context.above_unchecked().coefficients_raster(coord));
+        }
+        if (all_present || (left_present && above_present)) {
+            constexpr unsigned int log_weight = 5;
+            total *= 13;
+            total += 6 * abs(context.above_left_unchecked().coefficients_raster(coord));
+            return ((uint16_t)total) >> log_weight;
+        } else {
+            return total;
+        }
+        // if (block.context().above_right.initialized()) {
+        // total += abs(block.context().above_right.get()->coefficients().at(0));
+        //}
+    }
+    int16_t compute_aavrg(unsigned int coord, ConstBlockContext context) {
+        int16_t total = 0;
+        if (all_present || left_present) {
+            total += abs(context.left_unchecked().coefficients_raster(coord));
+        }
+        if (all_present || above_present) {
+            total += abs(context.above_unchecked().coefficients_raster(coord));
+        }
+        if (all_present || (left_present && above_present)) {
+            constexpr unsigned int log_weight = 5;
+            total *= 13;
+            total += 6 * abs(context.above_left_unchecked().coefficients_raster(coord));
+            return ((uint16_t)total) >> log_weight;
+        } else {
+            return total;
+        }
+        // if (block.context().above_right.initialized()) {
+        // total += abs(block.context().above_right.get()->coefficients().at(0));
+        //}
+    }
+#ifdef OPTIMIZED_7x7
+    bool aavrg_vec_matches(__m128i retval, unsigned int aligned_zz, ConstBlockContext context) {
+        short ret[8];
+        _mm_storeu_si128((__m128i*)(char*)ret, retval);
+        short correct[8] = {compute_aavrg(aligned_to_raster.at(aligned_zz), aligned_zz + 0, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 1), aligned_zz + 1, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 2), aligned_zz + 2, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 3), aligned_zz + 3, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 4), aligned_zz + 4, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 5), aligned_zz + 5, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 6), aligned_zz + 6, context),
+                            compute_aavrg(aligned_to_raster.at(aligned_zz + 7), aligned_zz + 7, context)};
+        return (memcmp(ret, correct, sizeof(correct)) == 0);
+    }
+    void compute_aavrg_vec(unsigned int aligned_zz, ConstBlockContext context, short* aligned_retval) {
+        _mm_store_si128((__m128i*)(char*)aligned_retval, compute_aavrg_vec(aligned_zz, context));
+    }
+#if defined(__clang__) || defined(__GNUC__)
+#define x_mm_loadu_si64(a) _mm_set1_epi64x(*(uint64_t*)(char*)(a))
+#else
+#define x_mm_loadu_si64 _mm_loadu_si64
+#endif
+    __m128i compute_aavrg_vec(unsigned int aligned_zz, ConstBlockContext context) {
+        if (all_present == false && left_present == false && above_present == false) {
+            return _mm_setzero_si128();
+        }
+        __m128i left;
+        if (all_present || left_present) {
+            left = _mm_abs_epi16(
+                _mm_load_si128((const __m128i*)(const char*)&context.left_unchecked().coef.at(aligned_zz)));
+            if ((!all_present) && !above_present) {
+                return left;
+            }
+        }
+        __m128i above = _mm_setzero_si128();
+        if (all_present || above_present) {
+            above = _mm_abs_epi16(
+                _mm_load_si128((const __m128i*)(const char*)&context.above_unchecked().coef.at(aligned_zz)));
+            if (all_present == false && !left_present) {
+                return above;
+            }
+        }
+        constexpr unsigned int log_weight = 5;
+        __m128i total = _mm_add_epi16(left, above);
+        total =
+            _mm_mullo_epi16(total, _mm_set1_epi16(13)); // approximate (a*2+b*2 + c)/5 as (a *13 + b * 13 + c * 6)/32
+        __m128i aboveleft = _mm_abs_epi16(
+            _mm_load_si128((const __m128i*)(const char*)&context.above_left_unchecked().coef.at(aligned_zz)));
+        total = _mm_add_epi16(total, _mm_mullo_epi16(aboveleft, _mm_set1_epi16(6)));
+        __m128i retval = _mm_srli_epi16(total, log_weight);
+        assert(aavrg_vec_matches(retval, aligned_zz, context));
+        return retval;
+        // if (block.context().above_right.initialized()) {
+        // total += abs(block.context().above_right.get()->coefficients().at(0));
+        //}
+    }
+#endif
+    static int32_t compute_lak_vec(__m128i coeffs_x_low,
+                                   __m128i coeffs_x_high,
+                                   __m128i coeffs_a_low,
+                                   __m128i
+#ifdef _WIN32
+                                       &
+#endif
+                                           indirect_coeffs_a_high,
+                                   const int32_t* icos_deq) {
+        __m128i sign_mask = _mm_set_epi32(-1, 1, -1, 1); // ((i & 1) ? -1 : 1)
+
+        // coeffs_x[i] = ((i & 1) ? -1 : 1) * coeffs_a[i] - coeffs_x[i];
+        coeffs_a_low = _mm_sign_epi32(coeffs_a_low, sign_mask);
+        __m128i coeffs_a_high = _mm_sign_epi32(indirect_coeffs_a_high, sign_mask);
+        coeffs_x_low = _mm_sub_epi32(coeffs_a_low, coeffs_x_low);
+        coeffs_x_high = _mm_sub_epi32(coeffs_a_high, coeffs_x_high);
+
+        __m128i icos_low = _mm_load_si128((const __m128i*)(const char*)icos_deq);
+        __m128i icos_high = _mm_load_si128((const __m128i*)(const char*)(icos_deq + 4));
+        // coeffs_x[i] *= icos[i]
+        __m128i deq_low = _mm_mullo_epi32(coeffs_x_low, icos_low);
+        __m128i deq_high = _mm_mullo_epi32(coeffs_x_high, icos_high);
+
+        __m128i sum = _mm_add_epi32(deq_low, deq_high);
+        sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 8));
+        sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 4));
+        // coeffs_x[0] = sum(coeffs_x)
+        int32_t prediction = _mm_cvtsi128_si32(sum);
+        // if (prediction > 0) { <-- rounding hurts prediction perf and costs compute  this rounding didn't round the
+        // same way as the unvectorized one anyhow
+        //    prediction += icos_deq[0]/2;
+        //} else {
+        //    prediction -= icos_deq[0]/2; // round away from zero
+        //}
+        return prediction / icos_deq[0];
+    }
+#define ITER(x_var, a_var, i, step)                                                                                    \
+    (x_var = _mm_set_epi32(context.here().coefficients_raster(band + step * ((i) + 3)),                                \
+                           context.here().coefficients_raster(band + step * ((i) + 2)),                                \
+                           context.here().coefficients_raster(band + step * ((i) + 1)),                                \
+                           i == 0 ? 0 : context.here().coefficients_raster(band + step * (i))),                        \
+     a_var = _mm_set_epi32(                                                                                            \
+         neighbor.coefficients_raster(band + step * ((i) + 3)), neighbor.coefficients_raster(band + step * ((i) + 2)), \
+         neighbor.coefficients_raster(band + step * ((i) + 1)), neighbor.coefficients_raster(band + step * (i))))
+
+    template <int band>
+#ifndef _WIN32
+    __attribute__((always_inline))
+#endif
+    int32_t
+    compute_lak_templ(const ConstBlockContext& context) {
+        __m128i coeffs_x_low;
+        __m128i coeffs_x_high;
+        __m128i coeffs_a_low;
+        __m128i coeffs_a_high;
+        const int32_t* icos = nullptr;
+        static_assert((band & 7) == 0 || (band >> 3) == 0, "This function only works on edges");
+        if ((band >> 3) == 0) {
+            if (all_present == false && !above_present) {
+                return 0;
+            }
+            const auto& neighbor = context.above_unchecked();
+            ITER(coeffs_x_low, coeffs_a_low, 0, 8);
+            ITER(coeffs_x_high, coeffs_a_high, 4, 8);
+            icos = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_x((int)COLOR) + band * 8;
+        } else {
+            if (all_present == false && !left_present) {
+                return 0;
+            }
+            const auto& neighbor = context.left_unchecked();
+            ITER(coeffs_x_low, coeffs_a_low, 0, 1);
+            ITER(coeffs_x_high, coeffs_a_high, 4, 1);
+            icos = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_y((int)COLOR) + band;
+        }
+        return compute_lak_vec(coeffs_x_low, coeffs_x_high, coeffs_a_low, coeffs_a_high, icos);
+    }
+    int32_t compute_lak_horizontal(const ConstBlockContext& context, unsigned int band) {
+        if (all_present == false && !above_present) {
+            return 0;
+        }
+        __m128i coeffs_x_low;
+        __m128i coeffs_x_high;
+        __m128i coeffs_a_low;
+        __m128i coeffs_a_high;
+        assert(band / 8 == 0 && "this function only works for the top edge");
+        const auto& neighbor = context.above_unchecked();
+        ITER(coeffs_x_low, coeffs_a_low, 0, 8);
+        ITER(coeffs_x_high, coeffs_a_high, 4, 8);
+        const int32_t* icos = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_x((int)COLOR) + band * 8;
+        return compute_lak_vec(coeffs_x_low, coeffs_x_high, coeffs_a_low, coeffs_a_high, icos);
+    }
+    int32_t compute_lak_vertical(const ConstBlockContext& context, unsigned int band) {
+        assert((band & 7) == 0 && "Must be used for veritcal");
+        if (all_present == false && !left_present) {
+            return 0;
+        }
+        __m128i coeffs_x_low;
+        __m128i coeffs_x_high;
+        __m128i coeffs_a_low;
+        __m128i coeffs_a_high;
+        const auto& neighbor = context.left_unchecked();
+        ITER(coeffs_x_low, coeffs_a_low, 0, 1);
+        ITER(coeffs_x_high, coeffs_a_high, 4, 1);
+#undef ITER
+        const int32_t* icos = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_y((int)COLOR) + band;
+        return compute_lak_vec(coeffs_x_low, coeffs_x_high, coeffs_a_low, coeffs_a_high, icos);
+    }
+    int32_t compute_lak(const ConstBlockContext& context, unsigned int band) {
+        int coeffs_x[8];
+        int coeffs_a[8];
+        const int32_t* coef_idct = nullptr;
+        if ((band & 7) && (all_present || above_present)) {
+            // y == 0: we're the x
+            assert(band / 8 == 0); // this function only works for the edge
+            const auto& above = context.above_unchecked();
+            for (int i = 0; i < 8; ++i) {
+                uint8_t cur_coef = band + i * 8;
+                coeffs_x[i] = i ? context.here().coefficients_raster(cur_coef) : 0;
+                coeffs_a[i] = above.coefficients_raster(cur_coef);
+            }
+            coef_idct = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_x((int)COLOR) + band * 8;
+        } else if ((band & 7) == 0 && left_present) {
+            // x == 0: we're the y
+            const auto& left = context.left_unchecked();
+            for (int i = 0; i < 8; ++i) {
+                uint8_t cur_coef = band + i;
+                coeffs_x[i] = i ? context.here().coefficients_raster(cur_coef) : 0;
+                coeffs_a[i] = left.coefficients_raster(cur_coef);
+            }
+            coef_idct = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_y((int)COLOR) + band;
+        } else {
+            return 0;
+        }
+        int prediction =
+            coeffs_a[0] *
+            coef_idct[0]; // rounding towards zero before adding coeffs_a[0] helps ratio slightly, but this is cheaper
+        for (int i = 1; i < 8; ++i) {
+            int sign = (i & 1) ? 1 : -1;
+            prediction -= coef_idct[i] * (coeffs_x[i] + sign * coeffs_a[i]);
+        }
+        prediction /= coef_idct[0];
+        //        assert(((band & 7) ? compute_lak_horizontal(context,band): compute_lak_vertical(context,band)) ==
+        //        prediction
+        //               && "Vectorized version must match sequential version");
+        return prediction;
+    }
+    Sirikata::Array1d<Branch, (1 << RESIDUAL_NOISE_FLOOR)>::Slice residual_thresh_array(
+        ProbabilityTablesBase& pt,
+        const unsigned int band,
+        const uint8_t cur_exponent,
+        const CoefficientContext context,
+        int min_threshold) {
+        uint16_t ctx_abs = abs(context.best_prior);
+        ANNOTATE_CTX(band, THRESH8, 0, ctx_abs >> min_threshold);
+        ANNOTATE_CTX(band, THRESH8, 2, cur_exponent - min_threshold);
+        return pt.model().residual_threshold_counts_.at(
+            color_index(), std::min(ctx_abs >> min_threshold, (uint16_t)Model::ResidualThresholdCounts::size1 - 1),
+            std::min(cur_exponent - min_threshold, Model::ResidualThresholdCounts::size2 - 1));
+    }
+    void residual_thresh_array_annot_update(const unsigned int band, uint16_t cur_serialized_thresh_value) {
+        (void)band;
+        (void)cur_serialized_thresh_value;
+        ANNOTATE_CTX(band, THRESH8, 1, cur_serialized_thresh_value);
+    }
+    enum SignValue {
+        ZERO_SIGN = 0,
+        POSITIVE_SIGN = 1,
+        NEGATIVE_SIGN = 2,
+    };
+    Branch& sign_array_dc(ProbabilityTablesBase& pt, int avg_delta, int offset_to_closest_edge) {
+        ANNOTATE_CTX(0, SIGNDC, 0, 1);
+        return pt.model().sign_counts_.at(color_index(), 0,
+                                          offset_to_closest_edge >= 0 ? offset_to_closest_edge == 0 ? 3 : 2 : 1);
+    }
+    Branch& sign_array_7x7(ProbabilityTablesBase& pt, uint8_t band, CoefficientContext context) {
+        // ANNOTATE_CTX(band, SIGN7x7, 0, 0);
+        return pt.model().sign_counts_.at(color_index(), 0, 0);
+    }
+    Branch& sign_array_8(ProbabilityTablesBase& pt, uint8_t band, CoefficientContext context) {
+        int16_t val = context.best_prior;
+        uint8_t ctx0 = context.bsr_best_prior;
+        uint8_t ctx1 = (val == 0 ? 0 : (val > 0 ? 1 : 2));
+        ANNOTATE_CTX(band, SIGN8, 0, ctx0);
+        ANNOTATE_CTX(band, SIGN8, 1, ctx1);
+        // return pt.model().sign_counts_.at(color_index(), ctx1, ctx0);
+        return pt.model().sign_counts_.at(
+            color_index(), (context.best_prior == 0 ? 0 : (context.best_prior > 0 ? 1 : 2)), context.bsr_best_prior);
+    }
+
+    uint8_t get_noise_threshold(int coord) { return ProbabilityTablesBase::min_noise_threshold((int)COLOR, coord); }
+    void optimize(ProbabilityTablesBase& pt) { optimize_model(pt.model()); }
+    void serialize(ProbabilityTablesBase& pt, int output_fd) const { serialize_model(pt.model(), output_fd); }
+
+    // this reduces the counts to something easier to override by new data
+    void normalize(ProbabilityTablesBase& pt) { normalize_model(pt.model()); }
+};
+
+#endif /* DECODER_HH */
diff --git a/codec/L2/demos/leptonEnc/host/vp8/model/numeric.cc b/codec/L2/demos/leptonEnc/host/vp8/model/numeric.cc
new file mode 100644
index 0000000000..dd8ff0b84d
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/model/numeric.cc
@@ -0,0 +1 @@
+#include "numeric.hh"
diff --git a/codec/L2/demos/leptonEnc/host/vp8/model/numeric.hh b/codec/L2/demos/leptonEnc/host/vp8/model/numeric.hh
new file mode 100644
index 0000000000..5222713da0
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/model/numeric.hh
@@ -0,0 +1,304 @@
+#ifndef _VP8_MODEL_NUMERIC_HH_
+#define _VP8_MODEL_NUMERIC_HH_
+//#define DEBUGDECODE
+// for uint16_t
+#include <cstdint>
+// for pair
+#include <utility>
+// for std::min
+#include <algorithm>
+#include <assert.h>
+#include <immintrin.h>
+#include <tmmintrin.h>
+#include "../util/mm_mullo_epi32.hh"
+
+#ifdef _WIN32
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+static uint32_t __inline __builtin_clz(uint32_t x) {
+    unsigned long r = 0;
+    _BitScanReverse(&r, x);
+    return 31 - r;
+}
+static uint64_t __inline __builtin_clzl(uint64_t x) {
+    uint64_t first_half = x;
+    first_half >>= 16;
+    first_half >>= 16;
+    if (first_half) {
+        return __builtin_clz(first_half);
+    }
+    return 32 + __builtin_clz(x & 0xffffffffU);
+}
+#endif
+
+static constexpr uint8_t LogTable16[16] = {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3};
+static constexpr char LogTable256[256] = {
+#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
+    0,     0,     1,     1,     2,     2,     2,     2,     3,     3,     3,     3,     3,     3,     3,    3,
+    LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7)
+#undef LT
+};
+static constexpr uint8_t LenTable16[16] = {0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4};
+static constexpr char LenTable256[256] = {
+#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
+    0,     1,     2,     2,     3,     3,     3,     3,     4,     4,     4,     4,     4,     4,     4,    4,
+    LT(5), LT(6), LT(6), LT(7), LT(7), LT(7), LT(7), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8)
+#undef LT
+};
+
+#if 0
+inline constexpr uint8_t k16log2(uint16_t v) {
+    return (v & 0xfff0) ? (v & 0xff00) ? (v & 0xf000)
+    ? 12 + LogTable16[v >> 12]
+    : 8 + LogTable16[v>>8]
+    : 4 + LogTable16[v>>4]
+    : LogTable16[v];
+}
+inline constexpr uint8_t uint16bit_length(uint16_t v) {
+    return (v & 0xfff0) ? (v & 0xff00) ? (v & 0xf000)
+    ? 12 + LenTable16[v >> 12]
+    : 8 + LenTable16[v>>8]
+    : 4 + LenTable16[v>>4]
+    : LenTable16[v];
+}
+#else
+inline constexpr uint8_t k16log2(uint16_t v) {
+    return (v & 0xff00) ? 8 + LogTable256[v >> 8] : LogTable256[v];
+}
+inline constexpr uint8_t k16bit_length(uint16_t v) {
+    return (v & 0xff00) ? 8 + LenTable256[v >> 8] : LenTable256[v];
+}
+inline uint8_t uint16log2(uint16_t v) {
+    return 31 - __builtin_clz((uint32_t)v);
+}
+inline uint8_t nonzero_bit_length(uint16_t v) {
+    assert(v);
+    return 32 - __builtin_clz((uint32_t)v);
+}
+inline uint8_t uint16bit_length(uint16_t v) {
+    return v ? 32 - __builtin_clz((uint32_t)v) : 0;
+}
+#endif
+
+constexpr uint8_t log_max_numerator = 18;
+
+inline constexpr uint32_t computeDivisor(uint16_t d) {
+    return ((((1 << k16bit_length(d)) - d) << log_max_numerator) / d) + 1;
+}
+#define COMPUTE_DIVISOR(off)                                                                                  \
+    computeDivisor(off), computeDivisor(off + 1), computeDivisor(off + 2), computeDivisor(off + 3),           \
+        computeDivisor(off + 4), computeDivisor(off + 5), computeDivisor(off + 6), computeDivisor(off + 7),   \
+        computeDivisor(off + 8), computeDivisor(off + 9), computeDivisor(off + 10), computeDivisor(off + 11), \
+        computeDivisor(off + 12), computeDivisor(off + 13), computeDivisor(off + 14), computeDivisor(off + 15)
+#define COMPUTE_DIVISOR_x100(off) \
+    COMPUTE_DIVISOR(off + 0x00)   \
+    , COMPUTE_DIVISOR(off + 0x10), COMPUTE_DIVISOR(off + 0x20), COMPUTE_DIVISOR(off + 0x30)
+
+#define COMPUTE_LOG2(off)                                                                                             \
+    k16log2(off), k16log2(off + 1), k16log2(off + 2), k16log2(off + 3), k16log2(off + 4), k16log2(off + 5),           \
+        k16log2(off + 6), k16log2(off + 7), k16log2(off + 8), k16log2(off + 9), k16log2(off + 10), k16log2(off + 11), \
+        k16log2(off + 12), k16log2(off + 13), k16log2(off + 14), k16log2(off + 15)
+
+#define COMPUTE_LOG2_x100(off) \
+    COMPUTE_LOG2(off + 0x00)   \
+    , COMPUTE_LOG2(off + 0x10), COMPUTE_LOG2(off + 0x20), COMPUTE_LOG2(off + 0x30)
+
+#define COMPUTE_DIVISOR_AND_LOG2(off)                                                                 \
+    {computeDivisor(off), k16log2(off)}, {computeDivisor(off + 1), k16log2(off + 1)},                 \
+        {computeDivisor(off + 2), k16log2(off + 2)}, {computeDivisor(off + 3), k16log2(off + 3)},     \
+        {computeDivisor(off + 4), k16log2(off + 4)}, {computeDivisor(off + 5), k16log2(off + 5)},     \
+        {computeDivisor(off + 6), k16log2(off + 6)}, {computeDivisor(off + 7), k16log2(off + 7)},     \
+        {computeDivisor(off + 8), k16log2(off + 8)}, {computeDivisor(off + 9), k16log2(off + 9)},     \
+        {computeDivisor(off + 10), k16log2(off + 10)}, {computeDivisor(off + 11), k16log2(off + 11)}, \
+        {computeDivisor(off + 12), k16log2(off + 12)}, {computeDivisor(off + 13), k16log2(off + 13)}, \
+        {computeDivisor(off + 14), k16log2(off + 14)}, {                                              \
+        computeDivisor(off + 15), k16log2(off + 15)                                                   \
+    }
+#define COMPUTE_DIVISOR_AND_LOG2_x100(off) \
+    COMPUTE_DIVISOR_AND_LOG2(off + 0x00)   \
+    , COMPUTE_DIVISOR_AND_LOG2(off + 0x10), COMPUTE_DIVISOR_AND_LOG2(off + 0x20), COMPUTE_DIVISOR_AND_LOG2(off + 0x30)
+
+struct DivisorLog2 {
+    uint32_t divisor;
+    uint8_t len;
+};
+static constexpr DivisorLog2 DivisorAndLog2Table[1026] = {{0, 0},
+                                                          {computeDivisor(1), k16log2(1)},
+                                                          {computeDivisor(2), k16log2(2)},
+                                                          {computeDivisor(3), k16log2(3)},
+                                                          {computeDivisor(4), k16log2(4)},
+                                                          {computeDivisor(5), k16log2(5)},
+                                                          {computeDivisor(6), k16log2(6)},
+                                                          {computeDivisor(7), k16log2(7)},
+                                                          {computeDivisor(8), k16log2(8)},
+                                                          {computeDivisor(9), k16log2(9)},
+                                                          {computeDivisor(10), k16log2(10)},
+                                                          {computeDivisor(11), k16log2(11)},
+                                                          {computeDivisor(12), k16log2(12)},
+                                                          {computeDivisor(13), k16log2(13)},
+                                                          {computeDivisor(14), k16log2(14)},
+                                                          {computeDivisor(15), k16log2(15)},
+                                                          COMPUTE_DIVISOR_AND_LOG2(0x10),
+                                                          COMPUTE_DIVISOR_AND_LOG2(0x20),
+                                                          COMPUTE_DIVISOR_AND_LOG2(0x30),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x40),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x80),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0xc0),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x100),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x140),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x180),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x1c0),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x200),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x240),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x280),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x2c0),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x300),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x340),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x380),
+                                                          COMPUTE_DIVISOR_AND_LOG2_x100(0x3c0),
+                                                          {computeDivisor(0x400), k16log2(0x400)},
+                                                          {computeDivisor(0x401), k16log2(0x401)}};
+
+static constexpr uint32_t Log2Table[1026] = {
+    COMPUTE_LOG2_x100(0x00),  COMPUTE_LOG2_x100(0x40),  COMPUTE_LOG2_x100(0x80),
+    COMPUTE_LOG2_x100(0xc0),  COMPUTE_LOG2_x100(0x100), COMPUTE_LOG2_x100(0x140),
+    COMPUTE_LOG2_x100(0x180), COMPUTE_LOG2_x100(0x1c0), COMPUTE_LOG2_x100(0x200),
+    COMPUTE_LOG2_x100(0x240), COMPUTE_LOG2_x100(0x280), COMPUTE_LOG2_x100(0x2c0),
+    COMPUTE_LOG2_x100(0x300), COMPUTE_LOG2_x100(0x340), COMPUTE_LOG2_x100(0x380),
+    COMPUTE_LOG2_x100(0x3c0), k16log2(0x400),           k16log2(0x401)};
+
+static constexpr uint32_t DivisorMultipliers[1026] = {0,
+                                                      computeDivisor(1),
+                                                      computeDivisor(2),
+                                                      computeDivisor(3),
+                                                      computeDivisor(4),
+                                                      computeDivisor(5),
+                                                      computeDivisor(6),
+                                                      computeDivisor(7),
+                                                      computeDivisor(8),
+                                                      computeDivisor(9),
+                                                      computeDivisor(10),
+                                                      computeDivisor(11),
+                                                      computeDivisor(12),
+                                                      computeDivisor(13),
+                                                      computeDivisor(14),
+                                                      computeDivisor(15),
+                                                      COMPUTE_DIVISOR(0x10),
+                                                      COMPUTE_DIVISOR(0x20),
+                                                      COMPUTE_DIVISOR(0x30),
+                                                      COMPUTE_DIVISOR_x100(0x40),
+                                                      COMPUTE_DIVISOR_x100(0x80),
+                                                      COMPUTE_DIVISOR_x100(0xc0),
+                                                      COMPUTE_DIVISOR_x100(0x100),
+                                                      COMPUTE_DIVISOR_x100(0x140),
+                                                      COMPUTE_DIVISOR_x100(0x180),
+                                                      COMPUTE_DIVISOR_x100(0x1c0),
+                                                      COMPUTE_DIVISOR_x100(0x200),
+                                                      COMPUTE_DIVISOR_x100(0x240),
+                                                      COMPUTE_DIVISOR_x100(0x280),
+                                                      COMPUTE_DIVISOR_x100(0x2c0),
+                                                      COMPUTE_DIVISOR_x100(0x300),
+                                                      COMPUTE_DIVISOR_x100(0x340),
+                                                      COMPUTE_DIVISOR_x100(0x380),
+                                                      COMPUTE_DIVISOR_x100(0x3c0),
+                                                      computeDivisor(0x400),
+                                                      computeDivisor(0x401)};
+
+constexpr uint32_t fast_divide18bit_by_10bit(uint32_t num, uint16_t denom) {
+    return ((uint32_t)((DivisorAndLog2Table[denom].divisor * (uint64_t)num) >> log_max_numerator) +
+            ((uint32_t)(num - (((uint64_t)DivisorAndLog2Table[denom].divisor * (uint64_t)num) >> log_max_numerator)) >>
+             1)) >>
+           DivisorAndLog2Table[denom].len;
+}
+constexpr uint16_t fast_divide18bit_by_10bit2(uint16_t num, uint16_t denom) {
+    return ((uint32_t)((DivisorAndLog2Table[denom].divisor * (uint64_t)num) >> log_max_numerator) +
+            ((uint32_t)(num - (((uint64_t)DivisorAndLog2Table[denom].divisor * (uint64_t)num) >> log_max_numerator)) >>
+             1)) >>
+           DivisorAndLog2Table[denom].len;
+}
+constexpr uint32_t fast_divide16bit(uint32_t num, uint16_t denom) {
+    return ((uint32_t)((DivisorAndLog2Table[denom].divisor * (uint32_t)num) >> log_max_numerator) +
+            ((uint32_t)(num - (((uint32_t)DivisorAndLog2Table[denom].divisor * (uint32_t)num) >> log_max_numerator)) >>
+             1)) >>
+           DivisorAndLog2Table[denom].len;
+}
+template <uint16_t denom>
+constexpr uint32_t templ_divide16bit(uint32_t num) {
+    static_assert(denom < 1024, "Only works for denominators < 1024");
+    return ((uint32_t)((DivisorAndLog2Table[denom].divisor * (uint32_t)num) >> log_max_numerator) +
+            ((uint32_t)(num - (((uint32_t)DivisorAndLog2Table[denom].divisor * (uint32_t)num) >> log_max_numerator)) >>
+             1)) >>
+           DivisorAndLog2Table[denom].len;
+}
+
+template <uint16_t denom>
+__m128i divide16bit_vec_signed(__m128i num) {
+    static_assert(denom < 1024, "Only works for denominators < 1024");
+    __m128i m = _mm_set1_epi32(DivisorAndLog2Table[denom].divisor);
+    __m128i abs_num = _mm_abs_epi32(num);
+    __m128i t = _mm_srli_epi32(_mm_mullo_epi32(m, abs_num), log_max_numerator);
+    __m128i n_minus_t = _mm_sub_epi32(abs_num, t);
+    __m128i t_plus_shr = _mm_add_epi32(t, _mm_srli_epi32(n_minus_t, 1));
+    __m128i retval = _mm_srli_epi32(t_plus_shr, DivisorAndLog2Table[denom].len);
+    return _mm_sign_epi32(retval, num);
+}
+template <uint16_t denom>
+__m128i divide16bit_vec(__m128i num) {
+    static_assert(denom < 1024, "Only works for denominators < 1024");
+    __m128i m = _mm_set1_epi32(DivisorAndLog2Table[denom].divisor);
+    __m128i t = _mm_srli_epi32(_mm_mullo_epi32(m, num), log_max_numerator);
+    __m128i n_minus_t = _mm_sub_epi32(num, t);
+    __m128i t_plus_shr = _mm_add_epi32(t, _mm_srli_epi32(n_minus_t, 1));
+    return _mm_srli_epi32(t_plus_shr, DivisorAndLog2Table[denom].len);
+}
+
+inline uint32_t slow_divide18bit_by_10bit(uint32_t num, uint16_t denom) {
+#if 0
+    uint64_t m = DivisorMultipliers[denom];
+    int log2d = k16log2(denom);
+    //assert(log2d==DivisorAndLog2Table[denom].len);
+#else
+    auto dl = DivisorAndLog2Table[denom];
+    uint64_t m = dl.divisor;
+    uint8_t log2d = dl.len;
+#endif
+    uint32_t t = (m * num) >> log_max_numerator;
+    uint32_t n_minus_t = num - t;
+    uint32_t t_plus_shr = t + (n_minus_t >> 1);
+    // assert(uint16bit_length(denom) - 1 == log2d);
+    uint32_t retval = t_plus_shr >> (log2d);
+    // assert(num / denom == retval);
+    return retval;
+}
+
+enum NumericConstants : uint8_t {
+    NUMERIC_LENGTH_MAX = 12,
+    NUMERIC_LENGTH_BITS = 4,
+};
+template <typename intt>
+intt local_log2(intt v) {
+    constexpr int loop_max = (int)(sizeof(intt) == 1 ? 2 : (sizeof(intt) == 2 ? 3 : (sizeof(intt) == 4 ? 4 : 5)));
+    constexpr intt b[] = {0x2, 0xC, 0xF0, (intt)0xFF00, (intt)0xFFFF0000U, (intt)0xFFFFFFFF00000000ULL};
+    constexpr intt S[] = {1, 2, 4, 8, 16, 32};
+    intt r = 0;                                // result of log2(v) will go here
+    for (signed int i = loop_max; i >= 0; i--) // unroll for speed...
+    {
+        if (v & b[i]) {
+            v >>= S[i];
+            r |= S[i];
+        }
+    }
+    return r;
+}
+
+template <typename intt>
+intt bit_length(intt v) {
+    if (sizeof(intt) <= 4) {
+        return v ? 32 - __builtin_clz((uint32_t)v) : 0;
+    } else {
+        return v ? 64 - __builtin_clzl((uint64_t)v) : 0;
+    }
+    return v == 0 ? 0 : local_log2(v) + 1;
+}
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/aligned_block.hh b/codec/L2/demos/leptonEnc/host/vp8/util/aligned_block.hh
new file mode 100644
index 0000000000..bbd822801a
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/aligned_block.hh
@@ -0,0 +1,124 @@
+#ifndef _ALIGNED_BLOCK_HH_
+#define _ALIGNED_BLOCK_HH_
+#include <assert.h>
+#include "nd_array.hh"
+#include "jpeg_meta.hh"
+#include "../model/color_context.hh"
+
+#define OPTIMIZED_7x7
+static constexpr Sirikata::Array1d<uint8_t, 64> jpeg_zigzag_to_raster = {
+    {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,  12, 19, 26, 33, 40, 48,
+     41, 34, 27, 20, 13, 6,  7,  14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
+     30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63}};
+
+static constexpr Sirikata::Array1d<uint8_t, 64> raster_to_jpeg_zigzag = {
+    {0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42, 3,  8,  12, 17, 25, 30,
+     41, 43, 9,  11, 18, 24, 31, 40, 44, 53, 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38,
+     46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63}};
+
+#ifdef OPTIMIZED_7x7
+static constexpr Sirikata::Array1d<uint8_t, 64> aligned_to_raster = {{
+    9,  10, 17, 25, 18, 11, 12, 19, 26, 33, 41, 34, 27, 20, 13, 14, 21, 28, 35, 42, 49, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+    0,                          // DC
+    1,  2,  3,  4,  5,  6,  7,  // 1x8
+    8,  16, 24, 32, 40, 48, 56, // 8x1
+}};
+
+static constexpr Sirikata::Array1d<uint8_t, 64> raster_to_aligned = {
+    {49, 50, 51, 52, 53, 54, 55, 56, 57, 0,  1,  5,  6,  14, 15, 27, 58, 2,  4,  7,  13, 16,
+     26, 28, 59, 3,  8,  12, 17, 25, 29, 38, 60, 9,  11, 18, 24, 30, 37, 39, 61, 10, 19, 23,
+     31, 36, 40, 45, 62, 20, 22, 32, 35, 41, 44, 46, 63, 21, 33, 34, 42, 43, 47, 48}};
+static constexpr Sirikata::Array1d<uint8_t, 64> zigzag_to_aligned = {
+    {49, 50, 57, 58, 0,  51, 52, 1,  2,  59, 60, 3,  4,  5,  53, 54, 6,  7,  8,  9,  61, 62,
+     10, 11, 12, 13, 14, 55, 56, 15, 16, 17, 18, 19, 20, 63, 21, 22, 23, 24, 25, 26, 27, 28,
+     29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}};
+
+static constexpr Sirikata::Array1d<uint8_t, 64> aligned_to_zigzag = {
+    {4,  7,  8,  11, 12, 13, 16, 17, 18, 19, 22, 23, 24, 25, 26, 29, 30, 31, 32, 33, 34, 36,
+     37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
+     59, 60, 61, 62, 63, 0,  1,  5,  6,  14, 15, 27, 28, 2,  3,  9,  10, 20, 21, 35}};
+#else
+#define aligned_to_zigzag raster_to_jpeg_zigzag
+#define zigzag_to_aligned jpeg_zigzag_to_raster
+struct IdentityArray1d {
+    static uint8_t at(uint8_t a) { return a; }
+    template <int a>
+    static constexpr uint8_t kat() {
+        return a;
+    }
+};
+static IdentityArray1d raster_to_aligned;
+static IdentityArray1d aligned_to_raster;
+#endif
+
+class BoolEncoder;
+class BoolDecoder;
+struct BlockColorContext;
+
+enum class ColorChannel {
+    Y,
+    Cb,
+    Cr,
+#ifdef ALLOW_FOUR_COLORS
+    Ck,
+#endif
+    NumBlockTypes
+};
+
+class AlignedBlock {
+#ifdef OPTIMIZED_7x7
+   public:
+#endif
+    Sirikata::Array1d<int16_t, 64> coef = {{{}}};
+    enum Index : uint8_t {
+#ifdef OPTIMIZED_7x7
+        AC_7x7_INDEX = 0,
+        AC_7x7_END = 49,
+        DC_INDEX = 49,
+        ROW_X_INDEX = 50,
+        ROW_X_END = 57,
+        ROW_Y_INDEX = 57,
+        ROW_Y_END = 64
+#else
+        // AC_7x7_INDEX = 9,
+        // AC_7x7_END = 63,
+        DC_INDEX = 0,
+// ROW_X_INDEX = 1,
+// ROW_X_END = 7,
+// ROW_Y_INDEX = 57,
+// ROW_Y_END = 64
+#endif
+    };
+   public:
+    AlignedBlock() {}
+    int16_t* raw_data() { return &coef.at(0); }
+    const int16_t* raw_data() const { return &coef.at(0); }
+    uint8_t recalculate_coded_length() const {
+        uint8_t num_nonzeros_7x7 = 0;
+        /* how many tokens are we going to encode? */
+        for (unsigned int index = 0; index < 64; index++) {
+            unsigned int xy = jpeg_zigzag_to_raster.at(index);
+            unsigned int x = xy & 7;
+            unsigned int y = xy >> 3;
+            if (coef.at(raster_to_aligned.at(xy))) {
+                // coded_length_ = index + 1;
+                if (x > 0 && y > 0) {
+                    ++num_nonzeros_7x7;
+                }
+            }
+        }
+        return num_nonzeros_7x7;
+    }
+    void bzero() { coef.memset(0); }
+    int16_t& dc() { return coef.at(DC_INDEX); }
+    int16_t dc() const { return coef.at(DC_INDEX); }
+
+    int16_t& mutable_coefficients_raster(uint8_t index) { return coef.at(raster_to_aligned.at(index)); }
+    int16_t coefficients_raster(uint8_t index) const { return coef.at(raster_to_aligned.at(index)); }
+
+    int16_t& mutable_coefficients_zigzag(uint8_t index) { return coef.at(zigzag_to_aligned.at(index)); }
+    int16_t coefficients_zigzag(uint8_t index) const { return coef.at(zigzag_to_aligned.at(index)); }
+};
+
+#endif /* BLOCK_HH */
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/arithmetic_code.hh b/codec/L2/demos/leptonEnc/host/vp8/util/arithmetic_code.hh
new file mode 100644
index 0000000000..6c9760a43b
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/arithmetic_code.hh
@@ -0,0 +1,298 @@
+//
+// Generic arithmetic coding. Used both for recoded encoding/decoding
+//
+// Some notes on the data representations used by the encoder and decoder.
+// Uncompressed data:
+//   Symbols: b_1 ... b_n \in {0,1} .
+//   Probabilities: p_1 ... p_n \in [0,1], where p_i estimates the probability that b_i=1.
+// Compressed data:
+//   Arithmetic coding represents a compressed stream of symbols as an
+//   arbitrary-precision number C \in [0,1] .
+//   If the compressed digits in base M are c_k \in {0..M-1}, then
+//   C = \sum_{k=1}^K c_k M^{-k} .
+// Arithmetic coding uses the probabilities p_i to link the symbols b_i with
+// the compressed digits c_k:
+//   C_i = (1-p_i) b_i + p_i C_{i+1} (1-b_i)
+//   C_i \in [0,1]
+//   C_1 = C = \sum_{k=1}^K c_k M^{-k}
+//   C_n is an arbitrary value in [0,1] (normally used to encode a stop bit).
+//
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <limits>
+
+template <typename FixedPoint = uint64_t, typename CompressedDigit = uint16_t, int MinRange = 0>
+struct arithmetic_code {
+   private:
+    static_assert(std::numeric_limits<FixedPoint>::is_exact, "integer types only");
+    static_assert(!std::numeric_limits<FixedPoint>::is_signed, "unsigned integer types only");
+
+    template <typename T>
+    static constexpr bool is_power_of_2(T x) {
+        static_assert(std::numeric_limits<T>::is_exact, "expected integer type");
+        return (x != 0) && (x & (x - 1)) == 0;
+    }
+    template <typename Digit>
+    static constexpr FixedPoint digit_base_for() {
+        static_assert(std::numeric_limits<Digit>::is_exact, "integer types only");
+        static_assert(!std::numeric_limits<Digit>::is_signed, "unsigned integer types only");
+        static_assert(sizeof(FixedPoint) > sizeof(Digit), "digit must be smaller than fixed point");
+        static_assert(sizeof(FixedPoint) % sizeof(Digit) == 0, "digit must divide fixed point evenly");
+        static_assert(is_power_of_2(FixedPoint(std::numeric_limits<Digit>::max()) + 1), "expected power of 2");
+        return FixedPoint(std::numeric_limits<Digit>::max()) + 1;
+    }
+
+   public:
+    // The representation of 1.0 in fixed-point, e.g. 0x80000000 for uint32_t.
+    static constexpr FixedPoint fixed_one = std::numeric_limits<FixedPoint>::max() / 2 + 1;
+    // The base for compressed digit outputs, e.g. 0x10000 for uint16_t.
+    static constexpr FixedPoint digit_base = digit_base_for<CompressedDigit>();
+    // The minimum precision for probability estimates
+    // There is a space-time tradeoff: less precision
+    // means poorer compression, but more precision causes overflow digits more often.
+    static constexpr FixedPoint min_range = MinRange > 0 ? MinRange : (fixed_one / digit_base) / 16;
+    // The maximum range to reach when normalizing.
+    static constexpr FixedPoint max_range = fixed_one;
+
+    static_assert(is_power_of_2(fixed_one), "expected power of 2");
+    static_assert(is_power_of_2(min_range), "expected power of 2");
+    static_assert((fixed_one / digit_base) * digit_base == fixed_one, "expected digit_base to divide fixed_one");
+    static_assert(min_range > 1, "min_range too small");
+    static_assert(min_range < fixed_one / digit_base, "min_range too large");
+
+    // The encoder object takes an output iterator (e.g. to vector or ostream) to
+    // emit compressed digits.
+    // In addition to uncompressed data and compressed digits, the intermediate state is:
+    //   Maximum R (any positive number, typically 2^k)
+    //   Lower and upper bounds x,y \in [0,R)
+    //   Range r = y-x \in [0,R)
+    // Representation invariant:
+    //   C = \sum_{k=1}^{K_i} c_k M^{-k} + (x_i + r_i C_i) M^{-K_i}/R_i
+    //   Base case: K_1 = 0, x_1 = 0, r_1 = R_1
+    // In the base case i=1, K_1=0: C=C_1 is represented as a series of future decisions b_i.
+    // In the final case i=n, K_n=K: C is represented as a string of compressed digits.
+    // The various encoding methods modify K, x, r, R while keeping C fixed.
+    template <typename OutputIterator, typename OutputDigit = typename std::iterator_traits<OutputIterator>::value_type>
+    class encoder {
+        static_assert(std::numeric_limits<OutputDigit>::is_exact, "integer types only");
+        static_assert(!std::numeric_limits<OutputDigit>::is_signed, "unsigned integer types only");
+        static_assert(sizeof(CompressedDigit) % sizeof(OutputDigit) == 0,
+                      "size of compressed digit must be a multiple of size of output digit");
+
+       public:
+        explicit encoder(OutputIterator out) : encoder(out, fixed_one) {}
+        encoder(OutputIterator out, FixedPoint initial_range)
+            : bytes_emitted(0), out(out), low(0), range(initial_range) {}
+        ~encoder() {}
+        size_t get_bytes_emitted() const { return bytes_emitted; }
+        // Symbol is int instead of bool because additional versions of `put()` could
+        // accept more than two symbols, e.g. one could call `put(2, p1, p2, p3)`.
+        size_t put(int symbol, std::function<FixedPoint(FixedPoint)> probability_of_1) {
+            FixedPoint range_of_1 = probability_of_1(range);
+            FixedPoint range_of_0 = range - range_of_1;
+            if (symbol != 0) {
+                low += range_of_0;
+                range = range_of_1;
+            } else {
+                range = range_of_0;
+            }
+            if (range < min_range) {
+                if (range == 0) {
+                    assert(false && "Encoder error: emitted a zero-probability symbol.");
+                    abort();
+                }
+                size_t emitted_before = get_bytes_emitted();
+                while (range < max_range / digit_base) {
+                    renormalize_and_emit_digit<CompressedDigit>();
+                }
+                return get_bytes_emitted() - emitted_before;
+            }
+            return 0;
+        }
+
+        void finish() {
+            // Find largest stop bit 2^k < range, and x such that 2^k divides x,
+            // 2^{k+1} doesn't divide x, and x is in [low, low+range).
+            for (FixedPoint stop_bit = (fixed_one >> 1); stop_bit > 0; stop_bit >>= 1) {
+                FixedPoint x = (low | stop_bit) & ~(stop_bit - 1);
+                if (stop_bit < range && low <= x && x < low + range) {
+                    low = x;
+                    break;
+                }
+            }
+
+            while (low != 0) {
+                range = 1;
+                renormalize_and_emit_digit<OutputDigit>();
+            }
+            range = 0; // mark complete
+        }
+
+       private:
+        template <typename Digit>
+        void renormalize_and_emit_digit() {
+            static constexpr FixedPoint base = digit_base_for<Digit>();
+            static constexpr FixedPoint most_significant_digit = fixed_one / base;
+            static_assert(is_power_of_2(most_significant_digit), "expected power of 2");
+
+            // Check for a carry bit, and cascade from lowest overflow digit to highest.
+            if (low >= fixed_one) {
+                for (int i = overflow.size() - 1; i >= 0; i--) {
+                    if (++overflow[i] != 0) break;
+                }
+                low -= fixed_one;
+            }
+            assert(low < fixed_one);
+
+            // Compare the minimum and maximum possible values of the top digit.
+            // If different, defer emitting the digit until we're sure we won't have to carry.
+            Digit digit = Digit(low / most_significant_digit);
+            if (digit != Digit((low + range - 1) / most_significant_digit)) {
+                assert(range < most_significant_digit);
+                overflow.push_back(digit);
+            } else {
+                for (CompressedDigit overflow_digit : overflow) {
+                    emit_digit(overflow_digit);
+                }
+                overflow.clear();
+                emit_digit(digit);
+            }
+
+            // Subtract away the emitted/overflowed digit and renormalize.
+            low -= digit * most_significant_digit;
+            low *= base;
+            range *= base;
+        }
+
+        // Emit a CompressedDigit as one or more OutputDigits. Loop should be
+        // unrolled by the compiler.
+        template <typename Digit>
+        void emit_digit(Digit digit) {
+            for (int i = sizeof(Digit) - sizeof(OutputDigit); i >= 0; i -= sizeof(OutputDigit)) {
+                *out++ = OutputDigit(digit >> (8 * i));
+            }
+            bytes_emitted += sizeof(digit);
+        }
+        size_t bytes_emitted;
+        // Output digits are emitted to this iterator as they are produced.
+        OutputIterator out;
+        // The lower bound x, initialized to 0. (When overflow.size() > 0, low is
+        // the fractional digits of x/R_0.)
+        FixedPoint low;
+        // The range r, which starts as fixed-point 1.0.
+        FixedPoint range;
+        // High digits of x. If overflow.size() = s, then R = R_0 M^s (where R_0 = fixed_one).
+        std::vector<CompressedDigit> overflow;
+    };
+
+    // The decoder object takes an input iterator (e.g. from vector or istream)
+    // to read compressed digits.
+    // In addition to uncompressed data and compressed digits, the intermediate state is:
+    //   TODO(ctl) document the state, representation invariant, and decoding transitions.
+    template <typename InputIterator, typename InputDigit = typename std::iterator_traits<InputIterator>::value_type>
+    class decoder {
+        static_assert(std::numeric_limits<InputDigit>::is_exact, "integer types only");
+        static_assert(!std::numeric_limits<InputDigit>::is_signed, "unsigned integer types only");
+        static_assert(sizeof(CompressedDigit) % sizeof(InputDigit) == 0,
+                      "size of compressed digit must be a multiple of size of input digit");
+
+       public:
+        explicit decoder(InputIterator in, InputIterator end = InputIterator()) : decoder(in, end, fixed_one) {}
+        decoder(InputIterator in, InputIterator end, FixedPoint initial_range) : in(in), end(end) {
+            // Initialize the decoder state by reading in bits until range ~ initial_range.
+            next_digit = consume_digit_aligned();
+            low = next_digit / digit_alignment;
+            range = digit_base / digit_alignment;
+            while (range < initial_range) {
+                renormalize_and_consume_digit();
+            }
+            assert(range == initial_range); // Should be true if we set digit_alignment correctly.
+        }
+
+        int get(std::function<FixedPoint(FixedPoint)> probability_of_1) {
+            FixedPoint range_of_1 = probability_of_1(range);
+            FixedPoint range_of_0 = range - range_of_1;
+            int symbol = (low >= range_of_0);
+            if (symbol != 0) {
+                low -= range_of_0;
+                range = range_of_1;
+            } else {
+                range = range_of_0;
+            }
+            if (range < min_range) {
+                while (range < max_range / digit_base) {
+                    renormalize_and_consume_digit();
+                }
+            }
+            return symbol;
+        }
+
+       private:
+        static constexpr CompressedDigit digit_alignment = std::numeric_limits<FixedPoint>::max() / fixed_one + 1;
+        static_assert(is_power_of_2(digit_alignment), "");
+        static_assert((fixed_one / digit_base) * digit_alignment ==
+                          (std::numeric_limits<FixedPoint>::max() / digit_base) + 1,
+                      "expected fixed_one > max/digit_base");
+        static_assert(is_power_of_2(digit_base / digit_alignment), "expected digit_base > digit_alignment");
+
+        void renormalize_and_consume_digit() {
+            assert(low < fixed_one / digit_base);
+
+            CompressedDigit digit = consume_digit();
+            low = low * digit_base + digit;
+            range *= digit_base;
+        }
+
+        // Consume a CompressedDigit. Because our initialization is not
+        // digit-aligned, we have to bit-align the reads here.
+        CompressedDigit consume_digit() {
+            CompressedDigit in_digit = consume_digit_aligned();
+            CompressedDigit digit = ((next_digit * (digit_base / digit_alignment)) | (in_digit / digit_alignment));
+            next_digit = in_digit;
+            return digit;
+        }
+
+        // Consume a CompressedDigit as one or more InputDigits. Loop should be
+        // unrolled by the compiler.
+        CompressedDigit consume_digit_aligned() {
+            CompressedDigit digit = 0;
+            for (int i = sizeof(CompressedDigit) - sizeof(InputDigit); i >= 0; i -= sizeof(InputDigit)) {
+                digit *= digit_base_for<InputDigit>();
+                if (in != end) {
+                    digit |= CompressedDigit(InputDigit(*in++));
+                }
+            }
+            return digit;
+        }
+
+        // Input digits are read from this iterator.
+        InputIterator in, end;
+        // The last digit read from the input - the lower bits are still to be used.
+        CompressedDigit next_digit;
+        // The offset z from the lower bound.
+        FixedPoint low;
+        // The range r, which is initialized to fixed-point 1.0.
+        FixedPoint range;
+    };
+};
+
+template <typename Coder = arithmetic_code<>, typename OutputContainer>
+typename Coder::template encoder<std::back_insert_iterator<OutputContainer>, typename OutputContainer::value_type>
+make_encoder(OutputContainer* container) {
+    auto it = std::back_inserter(*container);
+    typedef typename OutputContainer::value_type OutputDigit;
+    return typename Coder::template encoder<decltype(it), OutputDigit>(it);
+}
+
+template <typename Coder = arithmetic_code<>, typename InputContainer>
+typename Coder::template decoder<typename InputContainer::const_iterator, typename InputContainer::value_type>
+make_decoder(const InputContainer& container) {
+    auto begin = std::begin(container), end = std::end(container);
+    typedef typename InputContainer::value_type InputDigit;
+    return typename Coder::template decoder<decltype(begin), InputDigit>(begin, end);
+}
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/billing.cc b/codec/L2/demos/leptonEnc/host/vp8/util/billing.cc
new file mode 100644
index 0000000000..28ed9f0d5d
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/billing.cc
@@ -0,0 +1,182 @@
+#ifdef _WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include <errno.h>
+#include "billing.hh"
+#define BILLING_MAP_INIT(X) 0,
+
+Sirikata::Array1d<Sirikata::Array1d<std::atomic<uint32_t>, (uint32_t)Billing::NUM_BILLING_ELEMENTS>, 2> billing_map;
+void write_string(int fd, const char* write_ptr) {
+    while (write(fd, write_ptr, strlen(write_ptr)) < 0 && errno == EINTR) {
+    }
+}
+void write_number(int fd, int64_t number) {
+    char output[32];
+    memset(output, 0, sizeof(output));
+    char* write_ptr = output + 1;
+    {
+        write_ptr[0] = '0' + (number / 1000000000) % 10;
+        write_ptr[1] = '0' + (number / 100000000) % 10;
+        write_ptr[2] = '0' + (number / 10000000) % 10;
+        write_ptr[3] = '0' + (number / 1000000) % 10;
+        write_ptr[4] = '0' + (number / 100000) % 10;
+        write_ptr[5] = '0' + (number / 10000) % 10;
+        write_ptr[6] = '0' + (number / 1000) % 10;
+        write_ptr[7] = '0' + (number / 100) % 10;
+        write_ptr[8] = '0' + (number / 10) % 10;
+        write_ptr[9] = '0' + (number / 1) % 10;
+        while (write_ptr[0] == '0') {
+            ++write_ptr;
+        }
+    }
+    if (number < 0) {
+        --write_ptr;
+        write_ptr[0] = '-';
+    } else if (number == 0) {
+        --write_ptr;
+        write_ptr[0] = '0';
+    }
+    write_string(fd, write_ptr);
+}
+
+void write_pct(int fd, double ratio) {
+    write_number(fd, (int64_t)(ratio * 100));
+    while (write(fd, ".", 1) < 0 && errno == EINTR) {
+    }
+    write_number(fd, (int64_t)((int64_t)(ratio * 100000) % 1000));
+}
+
+template <class T>
+void print_item(int fd, const char* name, const T& uncompressed, const T& compressed) {
+    write_string(fd, name);
+    write_string(fd, ": ");
+    write_number(fd, uncompressed / 8);
+    write_string(fd, ".");
+    write_number(fd, uncompressed % 8);
+    write_string(fd, " vs ");
+    write_number(fd, compressed / 8);
+    write_string(fd, ".");
+    write_number(fd, compressed % 8);
+    write_string(fd, " = ");
+    double x = compressed;
+    if (uncompressed) {
+        x /= uncompressed;
+    } else {
+        x = 0;
+    }
+    write_pct(fd, x);
+    write_string(fd, "%\n");
+}
+
+void fixup_bill() {
+    size_t edge_cost = billing_map[0][(int)Billing::BITMAP_EDGE].load();
+    edge_cost += billing_map[0][(int)Billing::EXP1_EDGE].load();
+    edge_cost += billing_map[0][(int)Billing::EXP2_EDGE].load();
+    edge_cost += billing_map[0][(int)Billing::EXP3_EDGE].load();
+    edge_cost += billing_map[0][(int)Billing::EXPN_EDGE].load();
+    edge_cost += billing_map[0][(int)Billing::SIGN_EDGE].load();
+    edge_cost += billing_map[0][(int)Billing::RES_EDGE].load();
+
+    size_t cost_7x7 = billing_map[0][(int)Billing::BITMAP_7x7].load();
+    cost_7x7 += billing_map[0][(int)Billing::EXP1_7x7].load();
+    cost_7x7 += billing_map[0][(int)Billing::EXP2_7x7].load();
+    cost_7x7 += billing_map[0][(int)Billing::EXP3_7x7].load();
+    cost_7x7 += billing_map[0][(int)Billing::EXPN_7x7].load();
+    cost_7x7 += billing_map[0][(int)Billing::SIGN_7x7].load();
+    cost_7x7 += billing_map[0][(int)Billing::RES_7x7].load();
+    // we only track overall EOB cost... we divide this among edge vs 7x7 by
+    // using the ratio of other bits used by edge vs 7x7
+    (void)cost_7x7;
+    (void)edge_cost;
+    /*
+    size_t non_nonzero_cost = cost_7x7 + edge_cost;
+    size_t num_nonzero_cost = billing_map[0][(int)Billing::NZ_7x7].load()
+        + billing_map[0][(int)Billing::NZ_EDGE].load();
+    billing_map[0][(int)Billing::NZ_7x7] -= billing_map[0][(int)Billing::NZ_7x7].load();
+    billing_map[0][(int)Billing::NZ_EDGE] -= billing_map[0][(int)Billing::NZ_EDGE].load();
+    billing_map[0][(int)Billing::NZ_EDGE] += num_nonzero_cost * edge_cost / non_nonzero_cost;
+    billing_map[0][(int)Billing::NZ_7x7] += num_nonzero_cost * cost_7x7 / non_nonzero_cost;
+    */
+    // we also tally some of the bitmap cost to EOB cost, since the "not eob" idea gets
+    // partially paid for in the bitmap huffman code cost
+    /*
+    uint32_t bitmap = billing_map[0][(int)Billing::BITMAP_7x7];
+    billing_map[0][(int)Billing::BITMAP_7x7] -= bitmap/2;
+    billing_map[0][(int)Billing::NZ_7x7] += bitmap/2;
+
+    bitmap = billing_map[0][(int)Billing::BITMAP_EDGE];
+    billing_map[0][(int)Billing::BITMAP_EDGE] -= bitmap/2;
+    billing_map[0][(int)Billing::NZ_EDGE] += bitmap/2;
+    */
+    // not all signs are created equal in jpeg spec
+    // this balances positive and negative by using the cost of unpredicted signs in
+    // lepton-encoded jpegs to get the 'right' cost in normal jpeg
+    double sign_ratio = billing_map[1][(int)Billing::SIGN_7x7] / (double)billing_map[0][(int)Billing::SIGN_7x7];
+    int delta_7x7 = billing_map[1][(int)Billing::SIGN_7x7] - billing_map[0][(int)Billing::SIGN_7x7];
+    billing_map[0][(int)Billing::SIGN_7x7] += delta_7x7;
+    billing_map[0][(int)Billing::EXP1_7x7] -= delta_7x7;
+    int delta_edge = sign_ratio * billing_map[0][(int)Billing::SIGN_EDGE] - billing_map[0][(int)Billing::SIGN_EDGE];
+    billing_map[0][(int)Billing::SIGN_EDGE] += delta_edge;
+    billing_map[0][(int)Billing::EXP1_EDGE] -= delta_edge;
+
+    int delta_dc = sign_ratio * billing_map[0][(int)Billing::SIGN_DC] - billing_map[0][(int)Billing::SIGN_DC];
+    billing_map[0][(int)Billing::SIGN_DC] += delta_dc;
+    billing_map[0][(int)Billing::EXP1_DC] -= delta_dc;
+}
+
+void print_bill(int fd) {
+#ifndef NDEBUG
+    fixup_bill(); // we made some approximations in mapping the JPEG spec to the new billing items
+    write_string(fd, "::::BILL::::\n");
+    size_t totals[2] = {0, 0};
+    size_t totals_edge[2] = {0, 0};
+    size_t totals_other[2] = {0, 0};
+    size_t totals7x7[2] = {0, 0};
+    size_t totals_dc[2] = {0, 0};
+    for (int i = 0; i < (int)Billing::NUM_BILLING_ELEMENTS; ++i) {
+        if (billing_map[0][i] || billing_map[1][i]) {
+            totals[0] += billing_map[0][i];
+            totals[1] += billing_map[1][i];
+            print_item(fd, BillingString((Billing)i), billing_map[0][i], billing_map[1][i]);
+        }
+    }
+    for (int comp = 0; comp < 2; ++comp) {
+        totals_other[comp] += billing_map[comp][(int)Billing::HEADER].load();
+        totals_other[comp] += billing_map[comp][(int)Billing::DELIMITERS].load();
+
+        totals_edge[comp] += billing_map[comp][(int)Billing::NZ_EDGE].load();
+        totals_edge[comp] += billing_map[comp][(int)Billing::BITMAP_EDGE].load();
+        totals_edge[comp] += billing_map[comp][(int)Billing::EXP1_EDGE].load();
+        totals_edge[comp] += billing_map[comp][(int)Billing::EXP2_EDGE].load();
+        totals_edge[comp] += billing_map[comp][(int)Billing::EXP3_EDGE].load();
+        totals_edge[comp] += billing_map[comp][(int)Billing::EXPN_EDGE].load();
+        totals_edge[comp] += billing_map[comp][(int)Billing::SIGN_EDGE].load();
+        totals_edge[comp] += billing_map[comp][(int)Billing::RES_EDGE].load();
+
+        totals7x7[comp] += billing_map[comp][(int)Billing::NZ_7x7].load();
+        totals7x7[comp] += billing_map[comp][(int)Billing::BITMAP_7x7].load();
+        totals7x7[comp] += billing_map[comp][(int)Billing::EXP1_7x7].load();
+        totals7x7[comp] += billing_map[comp][(int)Billing::EXP2_7x7].load();
+        totals7x7[comp] += billing_map[comp][(int)Billing::EXP3_7x7].load();
+        totals7x7[comp] += billing_map[comp][(int)Billing::EXPN_7x7].load();
+        totals7x7[comp] += billing_map[comp][(int)Billing::SIGN_7x7].load();
+        totals7x7[comp] += billing_map[comp][(int)Billing::RES_7x7].load();
+
+        totals_dc[comp] += billing_map[comp][(int)Billing::EXP0_DC].load();
+        totals_dc[comp] += billing_map[comp][(int)Billing::EXP1_DC].load();
+        totals_dc[comp] += billing_map[comp][(int)Billing::EXP2_DC].load();
+        totals_dc[comp] += billing_map[comp][(int)Billing::EXP3_DC].load();
+        totals_dc[comp] += billing_map[comp][(int)Billing::EXPN_DC].load();
+        totals_dc[comp] += billing_map[comp][(int)Billing::SIGN_DC].load();
+        totals_dc[comp] += billing_map[comp][(int)Billing::RES_DC].load();
+    }
+    print_item(fd, "Overall 7x7", totals7x7[0], totals7x7[1]);
+    print_item(fd, "Overall Edge", totals_edge[0], totals_edge[1]);
+    print_item(fd, "Overall DC", totals_dc[0], totals_dc[1]);
+    print_item(fd, "Overall Misc", totals_other[0], totals_other[1]);
+    print_item(fd, "Total", totals[0], totals[1]);
+    write_string(fd, "::::::::::::\n");
+#endif
+}
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/billing.hh b/codec/L2/demos/leptonEnc/host/vp8/util/billing.hh
new file mode 100644
index 0000000000..8b0a149e98
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/billing.hh
@@ -0,0 +1,142 @@
+#include <atomic>
+#include "memory.hh"
+#include "nd_array.hh"
+#ifndef _BILLING_HH_
+#define _BILLING_HH_
+#define FOREACH_BILLING_TYPE(CB) \
+    CB(HEADER)                   \
+    CB(DELIMITERS)               \
+    CB(RESERVED)                 \
+    CB(NZ_7x7)                   \
+    CB(BITMAP_7x7)               \
+    CB(EXP1_7x7)                 \
+    CB(EXP2_7x7)                 \
+    CB(EXP3_7x7)                 \
+    CB(EXPN_7x7)                 \
+    CB(SIGN_7x7)                 \
+    CB(RES_7x7)                  \
+    CB(NZ_EDGE)                  \
+    CB(BITMAP_EDGE)              \
+    CB(EXP1_EDGE)                \
+    CB(EXP2_EDGE)                \
+    CB(EXP3_EDGE)                \
+    CB(EXPN_EDGE)                \
+    CB(SIGN_EDGE)                \
+    CB(RES_EDGE)                 \
+    CB(EXP0_DC)                  \
+    CB(EXP1_DC)                  \
+    CB(EXP2_DC)                  \
+    CB(EXP3_DC)                  \
+    CB(EXPN_DC)                  \
+    CB(SIGN_DC)                  \
+    CB(RES_DC)
+
+#define BILLING_ENUM_CB(Name) Name,
+
+enum class Billing { FOREACH_BILLING_TYPE(BILLING_ENUM_CB) NUM_BILLING_ELEMENTS };
+#undef BILLING_ENUM_CB
+#define BILLING_STRING_CB(Name) #Name,
+inline const char* BillingString(Billing bt) {
+    static const char* const string_data[] = {FOREACH_BILLING_TYPE(BILLING_STRING_CB) "UNREACHABLE"};
+    unsigned long long which = (unsigned long long)bt;
+    if (which < sizeof(string_data) / sizeof(string_data[0])) {
+        return string_data[which];
+    }
+    static char data[] = "XXXX_BILLING_DATA_BEYOND_BILLING_DATA_ARRAY";
+    data[0] = (which / 1000) + '0';
+    data[1] = (which / 100 % 10) + '0';
+    data[2] = (which / 10 % 10) + '0';
+    data[3] = (which % 10) + '0';
+    return data;
+}
+extern Sirikata::Array1d<typename Sirikata::Array1d<std::atomic<uint32_t>, (uint32_t)Billing::NUM_BILLING_ELEMENTS>, 2>
+    billing_map;
+
+inline void write_bit_bill(Billing bt, bool is_compressed, uint32_t num_bits) {
+#ifndef NDEBUG
+    assert((uint32_t)bt < (uint32_t)Billing::NUM_BILLING_ELEMENTS);
+    if (is_compressed && bt == Billing::HEADER) {
+        // fprintf(stderr, "Header; %f bytes\n", num_bits / 8.0);
+    }
+    if (num_bits) {
+        billing_map[is_compressed ? 1 : 0][(uint32_t)bt] += num_bits; // only happens in NDEBUG
+    }
+#endif
+}
+
+inline void write_multi_bit_bill(uint32_t num_bits, bool is_compressed, Billing start_range, Billing end_range) {
+#ifndef NDEBUG
+    assert((uint32_t)start_range < (uint32_t)Billing::NUM_BILLING_ELEMENTS);
+    assert((uint32_t)end_range < (uint32_t)Billing::NUM_BILLING_ELEMENTS);
+    for (uint32_t i = 0; i < num_bits; ++i) {
+        ++billing_map[is_compressed ? 1 : 0][std::min(i + (uint32_t)start_range,
+                                                      (uint32_t)end_range)]; // only happens in NDEBUG
+    }
+#endif
+}
+inline void write_byte_bill(Billing bt, bool is_compressed, uint32_t num_bytes) {
+#ifndef NDEBUG
+    if (num_bytes) {
+        write_bit_bill(bt, is_compressed, num_bytes << 3);
+    }
+#endif
+}
+#undef BILLING_STRING_CB
+inline void write_eob_bill(int coefficient, bool encode, uint32_t num_bits) {
+#ifndef NDEBUG
+    uint32_t num_edge_bits = 1;
+    uint32_t num_7x7_bits = 1;
+    if (coefficient > 46) {
+        num_7x7_bits = 7;
+    }
+    if (coefficient > 30) {
+        num_7x7_bits = 6;
+    }
+    if (coefficient > 18) {
+        num_7x7_bits = 5;
+    }
+    if (coefficient > 12) {
+        num_7x7_bits = 4;
+    }
+    if (coefficient > 8) {
+        num_7x7_bits = 3;
+    }
+    if (coefficient > 3) {
+        num_7x7_bits = 2;
+    }
+    if (coefficient > 0) {
+        num_edge_bits = 2;
+    }
+    if (coefficient > 2) {
+        num_edge_bits = 3;
+    }
+    if (coefficient > 10) {
+        num_edge_bits = 4;
+    }
+    uint32_t num_tot_bits = 0;
+    for (uint32_t i = 0; i < (uint32_t)Billing::NUM_BILLING_ELEMENTS; ++i) {
+        num_tot_bits += billing_map[encode ? 1 : 0][i];
+    }
+    uint32_t rand_val = (num_tot_bits / 7) % (num_edge_bits + num_7x7_bits);
+    if (rand_val < num_edge_bits) {
+        write_bit_bill(Billing::NZ_EDGE, encode, num_bits);
+    } else {
+        write_bit_bill(Billing::NZ_7x7, encode, num_bits);
+    }
+#endif
+}
+
+void print_bill(int fd);
+
+inline bool is_edge(int bpos) {
+#ifdef NDEBUG
+    (void)bpos;
+    return false;
+#else
+    assert(bpos < 64);
+    return bpos == 0 || bpos == 1 || bpos == 5 || bpos == 6 || bpos == 14 || bpos == 15 || bpos == 27 || bpos == 28 ||
+           bpos == 2 || bpos == 3 || bpos == 9 || bpos == 10 || bpos == 20 || bpos == 21 || bpos == 35;
+#endif
+}
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/block_based_image.hh b/codec/L2/demos/leptonEnc/host/vp8/util/block_based_image.hh
new file mode 100644
index 0000000000..e62570f08c
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/block_based_image.hh
@@ -0,0 +1,236 @@
+#ifndef _BLOCK_BASED_IMAGE_HH_
+#define _BLOCK_BASED_IMAGE_HH_
+#include "memory.hh"
+#include "aligned_block.hh"
+#include "block_context.hh"
+#include <map>
+extern bool g_allow_progressive;
+template <bool force_memory_optimization = false>
+class BlockBasedImageBase {
+   public: // Xilinx for CCS (Change Coding Style)
+    typedef AlignedBlock Block;
+    Block* image_;
+    uint32_t width_;
+    uint32_t nblocks_;
+    uint8_t* storage_;
+    uint32_t theoretical_component_height_;
+    // if true, this image only contains 2 rows during decode
+    bool memory_optimized_image_;
+    BlockBasedImageBase(const BlockBasedImageBase&) = delete;
+    BlockBasedImageBase& operator=(const BlockBasedImageBase&) = delete;
+
+   public:
+    BlockBasedImageBase() : memory_optimized_image_(force_memory_optimization) {
+        image_ = nullptr;
+        storage_ = nullptr;
+        width_ = 0;
+        nblocks_ = 0;
+        theoretical_component_height_ = 0;
+    }
+    bool is_memory_optimized() const { return force_memory_optimization || memory_optimized_image_; }
+    uint32_t block_width() const { return width_; }
+    size_t bytes_allocated() const { return 32 + nblocks_ * sizeof(Block); }
+    size_t blocks_allocated() const { return nblocks_; }
+    size_t original_height() const { return theoretical_component_height_; }
+    void init(uint32_t width, uint32_t height, uint32_t nblocks, bool memory_optimized_image) {
+        theoretical_component_height_ = height;
+        if (force_memory_optimization) {
+            always_assert(memory_optimized_image && "MemoryOptimized must match template");
+        }
+        memory_optimized_image_ = force_memory_optimization || memory_optimized_image;
+        always_assert(nblocks <= width * height);
+        width_ = width;
+        if (force_memory_optimization || memory_optimized_image_) {
+#ifdef ALLOW_3_OR_4_SCALING_FACTOR
+            nblocks = width * 4;
+#else
+            nblocks = width * 2;
+#endif
+        }
+        nblocks_ = nblocks;
+        storage_ = (uint8_t*)custom_calloc(nblocks * sizeof(Block) + 31);
+        size_t offset = storage_ - (uint8_t*)nullptr;
+        if (offset & 31) { // needs alignment adjustment
+            image_ = (Block*)(storage_ + 32 - (offset & 31));
+        } else { // already aligned
+            image_ = (Block*)storage_;
+        }
+    }
+    BlockContext begin(std::vector<NeighborSummary>::iterator num_nonzeros_begin) {
+        return {image_, nullptr, num_nonzeros_begin, num_nonzeros_begin + width_};
+    }
+    ConstBlockContext begin(std::vector<NeighborSummary>::iterator num_nonzeros_begin) const {
+        return {image_, nullptr, num_nonzeros_begin, num_nonzeros_begin + width_};
+    }
+    BlockContext off_y(int y, std::vector<NeighborSummary>::iterator num_nonzeros_begin) {
+        if (force_memory_optimization || memory_optimized_image_) {
+#ifdef ALLOW_3_OR_4_SCALING_FACTOR
+            return {image_ + width_ * (y & 3), image_ + ((y + 3) & 3) * width_,
+                    (y & 1) ? num_nonzeros_begin + width_ : num_nonzeros_begin,
+                    (y & 1) ? num_nonzeros_begin : num_nonzeros_begin + width_};
+#else
+            return {(y & 1) ? image_ + width_ : image_, (y & 1) ? image_ : image_ + width_,
+                    (y & 1) ? num_nonzeros_begin + width_ : num_nonzeros_begin,
+                    (y & 1) ? num_nonzeros_begin : num_nonzeros_begin + width_};
+#endif
+        }
+        return {image_ + width_ * y, (y != 0) ? image_ + width_ * (y - 1) : nullptr,
+                (y & 1) ? num_nonzeros_begin + width_ : num_nonzeros_begin,
+                (y & 1) ? num_nonzeros_begin : num_nonzeros_begin + width_};
+    }
+    ConstBlockContext off_y(int y, std::vector<NeighborSummary>::iterator num_nonzeros_begin) const {
+        if (force_memory_optimization || memory_optimized_image_) {
+#ifdef ALLOW_3_OR_4_SCALING_FACTOR
+            return {image_ + width_ * (y & 3), image_ + ((y + 3) & 3) * width_,
+                    (y & 1) ? num_nonzeros_begin + width_ : num_nonzeros_begin,
+                    (y & 1) ? num_nonzeros_begin : num_nonzeros_begin + width_};
+#else
+            return {(y & 1) ? image_ + width_ : image_, (y & 1) ? image_ : image_ + width_,
+                    (y & 1) ? num_nonzeros_begin + width_ : num_nonzeros_begin,
+                    (y & 1) ? num_nonzeros_begin : num_nonzeros_begin + width_};
+#endif
+        }
+        return {image_ + width_ * y, (y != 0) ? image_ + width_ * (y - 1) : nullptr,
+                (y & 1) ? num_nonzeros_begin + width_ : num_nonzeros_begin,
+                (y & 1) ? num_nonzeros_begin : num_nonzeros_begin + width_};
+    }
+    template <class BlockContext>
+    uint32_t next(BlockContext& it, bool has_left, int component_y) const {
+        it.cur += 1;
+        ptrdiff_t offset = it.cur - image_;
+        uint32_t retval = offset;
+        if (force_memory_optimization || memory_optimized_image_) {
+#ifdef ALLOW_3_OR_4_SCALING_FACTOR
+            if (__builtin_expect(offset == (width_ << 2), 0)) {
+                retval = offset = 0;
+                it.cur = image_;
+            }
+            if (retval >= (width_ << 1)) {
+                retval -= (width_ << 1);
+            }
+            if (retval >= width_) {
+                retval -= width_;
+            }
+            retval += width_ * component_y;
+#else
+            if (__builtin_expect(offset == (width_ << 1), 0)) {
+                retval = offset = 0;
+                it.cur = image_;
+            }
+            if (retval >= width_) {
+                retval -= width_;
+            }
+            retval += width_ * component_y;
+#endif
+        }
+        if (__builtin_expect(offset < width_, 0)) {
+#ifdef ALLOW_3_OR_4_SCALING_FACTOR
+            it.above = it.cur + 3 * width_;
+#else
+            it.above = it.cur + width_;
+#endif
+        } else {
+            it.above = it.cur - width_;
+        }
+        ++it.num_nonzeros_here;
+        ++it.num_nonzeros_above;
+        if (!has_left) {
+            bool cur_row_first = (it.num_nonzeros_here < it.num_nonzeros_above);
+            if (cur_row_first) {
+                it.num_nonzeros_above -= width_;
+                it.num_nonzeros_above -= width_;
+            } else {
+                it.num_nonzeros_here -= width_;
+                it.num_nonzeros_here -= width_;
+            }
+        }
+        return retval;
+    }
+    AlignedBlock& at(uint32_t y, uint32_t x) {
+        uint32_t index;
+        if (force_memory_optimization || memory_optimized_image_) {
+#ifdef ALLOW_3_OR_4_SCALING_FACTOR
+            index = x + (y & 3) * width_;
+#else
+            index = (y & 1) ? width_ + x : x;
+#endif
+            if (__builtin_expect(x >= width_, 0)) {
+                custom_exit(ExitCode::OOM);
+            }
+        } else {
+            index = y * width_ + x;
+            if (__builtin_expect(index >= nblocks_, 0)) {
+                custom_exit(ExitCode::OOM);
+            }
+        }
+        return image_[index];
+    }
+    const AlignedBlock& at(uint32_t y, uint32_t x) const {
+        uint32_t index;
+        if (force_memory_optimization || memory_optimized_image_) {
+#ifdef ALLOW_3_OR_4_SCALING_FACTOR
+            index = x + (y & 3) * width_;
+#else
+            index = (y & 1) ? width_ + x : x;
+#endif
+            if (__builtin_expect(x >= width_, 0)) {
+                custom_exit(ExitCode::OOM);
+            }
+        } else {
+            index = y * width_ + x;
+            if (__builtin_expect(index >= nblocks_, 0)) {
+                custom_exit(ExitCode::OOM);
+            }
+        }
+        return image_[index];
+    }
+
+    AlignedBlock& raster(uint32_t offset) {
+        if (force_memory_optimization || memory_optimized_image_) {
+#ifdef ALLOW_3_OR_4_SCALING_FACTOR
+            offset = offset % (width_ << 2);
+#else
+            offset = offset % (width_ << 1);
+#endif
+            assert(offset <= nblocks_ && "we mod offset by width_: it is < nblocks_");
+        } else if (offset >= nblocks_) {
+            custom_exit(ExitCode::OOM);
+        }
+        return image_[offset];
+    }
+    const AlignedBlock& raster(uint32_t offset) const {
+        if (force_memory_optimization || memory_optimized_image_) {
+#ifdef ALLOW_3_OR_4_SCALING_FACTOR
+            offset = offset % (width_ << 2);
+#else
+            offset = offset % (width_ << 1);
+#endif
+            assert(offset <= nblocks_ && "we mod offset by width_: it is < nblocks_");
+        } else if (__builtin_expect(offset >= nblocks_, 0)) {
+            custom_exit(ExitCode::OOM);
+        }
+        return image_[offset];
+    }
+};
+class BlockBasedImage : public BlockBasedImageBase<false> {
+    BlockBasedImage(const BlockBasedImage&) = delete;
+    BlockBasedImage& operator=(const BlockBasedImage&) = delete;
+
+   public:
+    BlockBasedImage() {}
+};
+template <bool force_memory_optimization = false>
+class BlockBasedImagePerChannel
+    : public Sirikata::Array1d<BlockBasedImageBase<force_memory_optimization>*, (uint32_t)ColorChannel::NumBlockTypes> {
+   public:
+    BlockBasedImagePerChannel() { this->memset(0); }
+};
+
+template <bool force_memory_optimization = false>
+class KBlockBasedImagePerChannel : public Sirikata::Array1d<const BlockBasedImageBase<force_memory_optimization>*,
+                                                            (uint32_t)ColorChannel::NumBlockTypes> {
+   public:
+    KBlockBasedImagePerChannel() { this->memset(0); }
+};
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/block_context.hh b/codec/L2/demos/leptonEnc/host/vp8/util/block_context.hh
new file mode 100644
index 0000000000..fafb3a74fe
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/block_context.hh
@@ -0,0 +1,146 @@
+#ifndef _BLOCK_CONTEXT_HH_
+#define _BLOCK_CONTEXT_HH_
+#include "options.hh"
+#include "tmmintrin.h"
+enum { IDCTSCALE = 1, xIDCTSCALE = 8 };
+struct NeighborSummary {
+    enum { VERTICAL_LAST_PIXEL_OFFSET_FROM_FIRST_PIXEL = 14 };
+    int16_t edge_pixels[16];
+    uint8_t num_nonzeros_;
+    uint8_t num_nonzeros() const { return num_nonzeros_; }
+    void set_num_nonzeros(uint8_t nz) { num_nonzeros_ = nz; }
+    int16_t horizontal(int index) const { return edge_pixels[index + 8]; }
+    int16_t vertical(int index) const { return edge_pixels[index]; }
+    const int16_t* vertical_ptr_except_7() const { return &edge_pixels[0]; }
+    const int16_t* horizontal_ptr() const { return &edge_pixels[8]; }
+#define shift_right_round_zero_epi16(vec, imm8) (_mm_sign_epi16(_mm_srli_epi16(_mm_sign_epi16(vec, vec), imm8), vec));
+    void set_horizontal(int16_t* data_aligned, uint16_t* quantization_table, int16_t dc) {
+        if (VECTORIZE) {
+            __m128i cur_row = _mm_load_si128((const __m128i*)(data_aligned + 56));
+            __m128i prev_row = _mm_load_si128((const __m128i*)(data_aligned + 48));
+            __m128i delta = _mm_sub_epi16(cur_row, prev_row);
+            __m128i half_delta = shift_right_round_zero_epi16(delta, 1);
+            __m128i pred_row = _mm_add_epi16(_mm_add_epi16(cur_row, half_delta), _mm_set1_epi16(128 * xIDCTSCALE));
+            pred_row = _mm_add_epi16(pred_row, _mm_set1_epi16(quantization_table[0] * dc));
+            _mm_storeu_si128((__m128i*)&edge_pixels[8], pred_row);
+        } else {
+            for (int i = 0; i < 8; ++i) {
+                int delta = data_aligned[i + 56] - data_aligned[i + 48];
+                // if (i == 7) delta = 0;
+                edge_pixels[i + 8] = dc * quantization_table[0] + data_aligned[i + 56] + 128 * xIDCTSCALE + (delta / 2);
+            }
+        }
+    }
+    void set_vertical(int16_t* data, uint16_t* quantization_table, int16_t dc) {
+        if (VECTORIZE) {
+            __m128i cur_row =
+                _mm_set_epi16(data[63], data[55], data[47], data[39], data[31], data[23], data[15], data[7]);
+            __m128i prev_row =
+                _mm_set_epi16(data[62], data[54], data[46], data[38], data[30], data[22], data[14], data[6]);
+            __m128i delta = _mm_sub_epi16(cur_row, prev_row);
+            __m128i half_delta = shift_right_round_zero_epi16(delta, 1);
+            __m128i pred_row = _mm_add_epi16(_mm_add_epi16(cur_row, half_delta), _mm_set1_epi16(128 * xIDCTSCALE));
+            pred_row = _mm_add_epi16(pred_row, _mm_set1_epi16(quantization_table[0] * dc));
+            _mm_storeu_si128((__m128i*)&edge_pixels[0], pred_row);
+        } else {
+            for (int i = 0; i < 8; ++i) {
+                int delta = data[i * 8 + 7] - data[i * 8 + 6];
+                // if (i == 7) delta = 0;
+                edge_pixels[i] = dc * quantization_table[0] + data[i * 8 + 7] + 128 * xIDCTSCALE + (delta / 2);
+            }
+        }
+    }
+    void set_horizontal_dc_included(int* data) {
+        for (int i = 0; i < 8; ++i) {
+            int delta = data[i + 56] - data[i + 48];
+            // if (i == 7) delta = 0;
+            edge_pixels[i + 8] = data[i + 56] + delta / 2;
+        }
+    }
+    void set_vertical_dc_included(int* data) {
+        for (int i = 0; i < 7; ++i) {
+            int delta = data[i * 8 + 7] - data[i * 8 + 6];
+            // if (i == 7) delta = 0;
+            edge_pixels[i] = data[i * 8 + 7] + delta / 2;
+        }
+    }
+};
+
+// in raytracing we usually find that having 32 bit offsets to pointers ends up being more
+// efficient in our datastructures, since array offseting instructions are so fast.
+template <class ABlock>
+struct MBlockContext {
+    ABlock* cur;
+    ABlock* above; // offset from cur; 0 for unavail
+    std::vector<NeighborSummary>::iterator num_nonzeros_here;
+    std::vector<NeighborSummary>::iterator num_nonzeros_above;
+    MBlockContext() {
+        std::memset(this, 0, sizeof(*this));
+        cur = nullptr;
+        above = nullptr;
+    }
+    MBlockContext(ABlock* cur,
+                  ABlock* above,
+                  std::vector<NeighborSummary>::iterator num_nonzeros_here,
+                  std::vector<NeighborSummary>::iterator num_nonzeros_above) {
+        std::memset(this, 0, sizeof(*this));
+        this->cur = cur;
+        this->above = above;
+        this->num_nonzeros_here = num_nonzeros_here;
+        this->num_nonzeros_above = num_nonzeros_above;
+    }
+    MBlockContext<const AlignedBlock> copy() const { return {cur, above, num_nonzeros_here, num_nonzeros_above}; }
+    constexpr const ABlock& here() const { return cur[0]; }
+    constexpr const ABlock& left_unchecked() const { return cur[-1]; }
+    constexpr const ABlock& above_unchecked() const { return above[0]; }
+    constexpr const ABlock& above_left_unchecked() const { return above[-1]; }
+    static MBlockContext nil() {
+        MBlockContext retval;
+        memset(&retval, 0, sizeof(retval));
+        retval.cur = nullptr;
+        retval.above = nullptr;
+        return retval;
+    }
+    bool isNil() { return cur == nullptr && above == nullptr; }
+    ABlock& here() { return cur[0]; }
+    ABlock& left_unchecked() { return cur[-1]; }
+    ABlock& above_unchecked() { return above[0]; }
+    ABlock& above_left_unchecked() { return above[-1]; }
+    bool num_nonzeros_check(uint8_t nz7x7, ABlock& block) const {
+        int nz = 0;
+        for (int i = 1; i < 8; ++i) {
+            for (int j = 1; j < 8; ++j) {
+                if (block.coefficients_raster(i * 8 + j)) {
+                    ++nz;
+                }
+            }
+        }
+        if (nz == nz7x7) {
+            return true;
+        }
+        return false;
+    }
+    uint8_t nonzeros_above_7x7_unchecked() const {
+        // too slow // assert(num_nonzeros_check(*num_nonzeros_above, above_unchecked()));
+        return num_nonzeros_above->num_nonzeros();
+    }
+    uint8_t nonzeros_left_7x7_unchecked() const {
+        std::vector<NeighborSummary>::iterator tmp = num_nonzeros_here;
+        --tmp;
+        // too slow // assert(num_nonzeros_check(*tmp, left_unchecked()));
+        return tmp->num_nonzeros();
+    }
+    const NeighborSummary& neighbor_context_above_unchecked() const {
+        // too slow // assert(num_nonzeros_check(*num_nonzeros_above, above_unchecked()));
+        return *num_nonzeros_above;
+    }
+    const NeighborSummary& neighbor_context_left_unchecked() const {
+        std::vector<NeighborSummary>::iterator tmp = num_nonzeros_here;
+        --tmp;
+        // too slow // assert(num_nonzeros_check(*tmp, left_unchecked()));
+        return *tmp;
+    }
+};
+typedef MBlockContext<AlignedBlock> BlockContext;
+typedef MBlockContext<const AlignedBlock> ConstBlockContext;
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/debug.cc b/codec/L2/demos/leptonEnc/host/vp8/util/debug.cc
new file mode 100644
index 0000000000..2fb4cd804d
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/debug.cc
@@ -0,0 +1,109 @@
+#include <sys/types.h>
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+#else
+#include <unistd.h>
+#include <sys/fcntl.h>
+#endif
+#include <errno.h>
+#include "debug.hh"
+#include "memory.hh"
+
+namespace LeptonDebug {
+int med_err;
+int amd_err;
+int avg_err;
+int ori_err;
+int loc_err;
+int luma_debug_width;
+int luma_debug_height;
+
+int chroma_debug_width;
+int chroma_debug_height;
+int getDebugWidth(int color) {
+    return color == 0 ? luma_debug_width : chroma_debug_width;
+}
+int getDebugHeight(int color) {
+    return color == 0 ? luma_debug_height : chroma_debug_height;
+}
+
+#if defined(DUMP_RAW_IMAGE)
+int load_raw_fd_output(const char* fname) {
+    return open(fname, O_CREAT | O_TRUNC | O_WRONLY, S_IWUSR | S_IRUSR);
+}
+char* serialize_unsigned_int(unsigned int value, char* output, bool term = true) {
+    int counter = value;
+    char* end = output;
+    do {
+        ++end;
+        counter /= 10;
+    } while (counter);
+    if (term) {
+        *end = 0;
+    }
+    char* retval = end;
+    do {
+        *--end = '0' + value % 10;
+        value /= 10;
+    } while (value);
+    return retval;
+}
+static ptrdiff_t write_full(int fd, unsigned char* data, size_t size) {
+    size_t total_written = 0;
+    ptrdiff_t written = 0;
+    do {
+        written = write(fd, data + total_written, size - total_written);
+        if (written <= 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+            return -1;
+        }
+        total_written += written;
+    } while (total_written < size);
+    return total_written;
+}
+void dumpDebugFile(int fd, int width, int height, unsigned char* data) {
+    char header[256] = "P5\n";
+    char* width_end = serialize_unsigned_int(width, header + 3);
+    *width_end = ' ';
+    ++width_end;
+    width_end = serialize_unsigned_int(height, width_end);
+    *width_end = ' ';
+    ++width_end;
+    width_end = serialize_unsigned_int(255, width_end);
+    *width_end = '\n';
+    ++width_end;
+    *width_end = '\0';
+    write_full(fd, (unsigned char*)header, width_end - header);
+    write_full(fd, data, width * height);
+}
+void dumpDebugData() {
+    dumpDebugFile(raw_decoded_fp_Y, luma_debug_width, luma_debug_height, raw_YCbCr[0]);
+    dumpDebugFile(raw_decoded_fp_Cb, chroma_debug_width, chroma_debug_height, raw_YCbCr[1]);
+    dumpDebugFile(raw_decoded_fp_Cr, chroma_debug_width, chroma_debug_height, raw_YCbCr[2]);
+}
+void setupDebugData(int lumaWidth, int lumaHeight, int chromaWidth, int chromaHeight) {
+    raw_YCbCr[0] = (unsigned char*)custom_calloc(lumaWidth * lumaHeight);
+    raw_YCbCr[1] = (unsigned char*)custom_calloc(chromaWidth * chromaHeight);
+    raw_YCbCr[2] = (unsigned char*)custom_calloc(chromaWidth * chromaHeight);
+    luma_debug_width = lumaWidth;
+    luma_debug_height = lumaHeight;
+    chroma_debug_width = chromaWidth;
+    chroma_debug_height = chromaHeight;
+}
+#else
+
+int load_raw_fd_output(const char* fname) {
+    return -1;
+}
+void dumpDebugData() {}
+void setupDebugData(int lumaWidth, int lumaHeight, int chromaWidth, int chromaHeight) {}
+
+#endif
+int raw_decoded_fp_Y = load_raw_fd_output("/tmp/raw_Y.pgm");
+int raw_decoded_fp_Cb = load_raw_fd_output("/tmp/raw_Cb.pgm");
+int raw_decoded_fp_Cr = load_raw_fd_output("/tmp/raw_Cr.pgm");
+unsigned char* raw_YCbCr[4] = {nullptr, nullptr, nullptr, nullptr};
+}
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/debug.hh b/codec/L2/demos/leptonEnc/host/vp8/util/debug.hh
new file mode 100644
index 0000000000..464efbcba2
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/debug.hh
@@ -0,0 +1,19 @@
+#ifndef _DEBUG_HH_
+#define _DEBUG_HH_
+extern bool g_threaded;
+namespace LeptonDebug {
+extern int raw_decoded_fp_Y;
+extern int raw_decoded_fp_Cb;
+extern int raw_decoded_fp_Cr;
+extern int med_err;
+extern int amd_err;
+extern int avg_err;
+extern int ori_err;
+extern int loc_err;
+extern unsigned char* raw_YCbCr[4];
+int getDebugWidth(int color);
+int getDebugHeight(int color);
+void dumpDebugData();
+void setupDebugData(int lumaWidth, int lumaHeight, int chromaWidth, int chromaHeight);
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/generic_worker.cc b/codec/L2/demos/leptonEnc/host/vp8/util/generic_worker.cc
new file mode 100644
index 0000000000..198a43b39a
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/generic_worker.cc
@@ -0,0 +1,140 @@
+#include "memory.hh"
+#include <emmintrin.h>
+#include <assert.h>
+#ifdef _WIN32
+#include <io.h>
+#include <Windows.h>
+#include <fcntl.h>
+#else
+#include <unistd.h>
+#endif
+#include <errno.h>
+#ifdef __linux
+//#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#endif
+#include <signal.h>
+#include "generic_worker.hh"
+#include "../../io/Seccomp.hh"
+
+const bool use_pipes = true;
+void GenericWorker::_generic_respond_to_main(uint8_t arg) {
+    work_done_++;
+    if (use_pipes) {
+        while (write(work_done_pipe[1], &arg, 1) < 0 && errno == EINTR) {
+        }
+    }
+}
+
+void GenericWorker::wait_for_work() {
+    bool sandbox_at_desired_level = true;
+    if (g_use_seccomp) {
+        Sirikata::installStrictSyscallFilter(true);
+    }
+    _generic_respond_to_main(0); // startup
+    char data = 0;
+    if (use_pipes) {
+        int err = 0;
+        while ((err = read(new_work_pipe[0], &data, 1)) < 0 && errno == EINTR) {
+        }
+        if (err <= 0) {
+            set_close_thread_handle(work_done_pipe[1]);
+            custom_terminate_this_thread(0);
+            return;
+        }
+    }
+    set_close_thread_handle(work_done_pipe[1]);
+    while (!new_work_exists_.load(std::memory_order_relaxed)) {
+        _mm_pause();
+    }
+    if (new_work_exists_.load()) { // enforce memory ordering
+        if (sandbox_at_desired_level) {
+            work();
+        }
+    } else {
+        always_assert(false); // invariant violated
+    }
+    _generic_respond_to_main(sandbox_at_desired_level ? 1 : 2);
+    reset_close_thread_handle();
+    custom_terminate_this_thread(0); // cleanly exit the thread with an allowed syscall
+}
+
+bool GenericWorker::is_done() {
+    if (work_done_.load(std::memory_order_relaxed) > 0) {
+        return work_done_.load() != 0; // enforce memory ordering
+    }
+    return false;
+}
+
+void GenericWorker::activate_work() {
+    ++new_work_exists_;
+    char data = 0;
+    while (write(new_work_pipe[1], &data, 1) < 0 && errno == EINTR) {
+    }
+}
+#ifdef _WIN32
+int make_pipe(int pipes[2]) {
+    HANDLE read_pipe, write_pipe;
+    if (CreatePipe(&read_pipe, &write_pipe, NULL, 65536)) {
+        pipes[0] = _open_osfhandle((intptr_t)read_pipe, O_RDONLY);
+        pipes[1] = _open_osfhandle((intptr_t)write_pipe, O_WRONLY);
+        return 0;
+    }
+    errno = EINVAL;
+    return -1;
+}
+#else
+int make_pipe(int pipes[2]) {
+    return pipe(pipes);
+}
+#endif
+Sirikata::Array1d<int, 2> GenericWorker::initiate_pipe() {
+    int pipes[2] = {-1, -1};
+    if (use_pipes) {
+        while (make_pipe(pipes) != 0 && errno == EINTR) {
+        }
+    }
+    Sirikata::Array1d<int, 2> retval;
+    retval.at(0) = pipes[0];
+    retval.at(1) = pipes[1];
+    return retval;
+}
+void GenericWorker::_generic_wait(uint8_t expected_arg) {
+    if (use_pipes) {
+        char data = 0;
+        while (read(work_done_pipe[0], &data, 1) < 0 && errno == EINTR) {
+        }
+        if (data != expected_arg) {
+            char err[] = "x: Worker thread out of memory.\n";
+            err[0] = '0' + expected_arg;
+            while (write(2, err, strlen(err)) < 0 && errno == EINTR) {
+            }
+            custom_exit(ExitCode::THREAD_PROTOCOL_ERROR);
+        }
+    }
+
+    while (!is_done()) {
+        _mm_pause();
+    }
+    work_done_.load(); // enforce memory ordering
+}
+void GenericWorker::_wait_for_child_to_begin() {
+    always_assert(!child_begun); // make sure this has work to do
+    _generic_wait(0);
+    --work_done_;
+    child_begun = true;
+}
+void GenericWorker::join_via_syscall() {
+#ifndef _WIN32
+    signal(SIGPIPE, SIG_IGN);
+#endif
+    while (close(work_done_pipe.at(0)) && errno == EINTR) {
+    }
+    child_.join();
+}
+void GenericWorker::main_wait_for_done() {
+    always_assert(new_work_exists_.load()); // make sure this has work to do
+    _generic_wait(1);
+}
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/generic_worker.hh b/codec/L2/demos/leptonEnc/host/vp8/util/generic_worker.hh
new file mode 100644
index 0000000000..ff0daf3713
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/generic_worker.hh
@@ -0,0 +1,36 @@
+#include <atomic>
+#include <functional>
+#include <thread>
+#include "nd_array.hh"
+#include "options.hh"
+struct GenericWorker {
+    bool child_begun;
+    std::atomic<int> new_work_exists_;
+    std::atomic<int> work_done_;
+    std::function<void()> work;
+    Sirikata::Array1d<int, 2> new_work_pipe;
+    Sirikata::Array1d<int, 2> work_done_pipe;
+    static Sirikata::Array1d<int, 2> initiate_pipe();
+    GenericWorker()
+        : child_begun(false),
+          new_work_exists_(0),
+          work_done_(0),
+          new_work_pipe(initiate_pipe()),
+          work_done_pipe(initiate_pipe()),
+          child_(std::bind(&GenericWorker::wait_for_work, this)) {
+        // need to make sure child sets up seccomp properly before proceeding
+        _wait_for_child_to_begin();
+    }
+    void activate_work();
+    bool is_done();
+    void main_wait_for_done();
+    void wait_for_work();
+    bool has_ever_queued_work() { return new_work_exists_.load() != 0; }
+    void join_via_syscall();
+
+   private:
+    std::thread child_; // this must come after other members, so items are initialized first
+    void _wait_for_child_to_begin();
+    void _generic_wait(uint8_t expected_response);
+    void _generic_respond_to_main(uint8_t arg);
+};
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/memory.cc b/codec/L2/demos/leptonEnc/host/vp8/util/memory.cc
new file mode 100644
index 0000000000..1383a279ef
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/memory.cc
@@ -0,0 +1,228 @@
+#include <immintrin.h>
+#include "options.hh"
+#include "memory.hh"
+#ifdef _WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include <errno.h>
+#ifdef __linux
+#include <sys/syscall.h>
+#endif
+#ifdef _WIN32
+#define USE_STANDARD_MEMORY_ALLOCATORS
+#endif
+#if defined(__APPLE__) || (__cplusplus <= 199711L && !defined(_WIN32))
+#define THREAD_LOCAL_STORAGE __thread
+#else
+#define THREAD_LOCAL_STORAGE thread_local
+#endif
+unsigned int NUM_THREADS = MAX_NUM_THREADS;
+const char* ExitString(ExitCode ec) {
+    FOREACH_EXIT_CODE(GENERATE_EXIT_CODE_RETURN)
+    static char data[] = "XXXX_EXIT_CODE_BEYOND_EXIT_CODE_ARRAY";
+    data[0] = ((int)ec / 1000) + '0';
+    data[1] = ((int)ec / 100 % 10) + '0';
+    data[2] = ((int)ec / 10 % 10) + '0';
+    data[3] = ((int)ec % 10) + '0';
+    return data;
+}
+extern "C" {
+void always_assert_exit(bool value, const char* expr, const char* file, int line) {
+    // if (!value) {
+    while (write(2, "Assert Failed: ", strlen("Assert Failed: ")) < 0 && errno == EINTR) {
+    }
+    while (write(2, expr, strlen(expr)) < 0 && errno == EINTR) {
+    }
+    while (write(2, " at (", 5) < 0 && errno == EINTR) {
+    }
+    while (write(2, file, strlen(file)) < 0 && errno == EINTR) {
+    }
+    while (write(2, ":", 1) < 0 && errno == EINTR) {
+    }
+    fprintf(stderr, "%d)\n", line);
+    if (!g_use_seccomp) {
+        abort();
+    }
+    custom_exit(ExitCode::ASSERTION_FAILURE);
+    //}
+}
+void* custom_malloc(size_t size) {
+#ifdef USE_STANDARD_MEMORY_ALLOCATORS
+#ifdef _WIN32
+    return _aligned_malloc(size, 32);
+#else
+    void* ptr;
+    int retval = posix_memalign(&ptr, 32, size);
+    if (!g_use_seccomp) {
+        assert(retval == 0 && "posix_memalign returned non-zero");
+    }
+    if (retval != 0) {
+        custom_exit(ExitCode::MALLOCED_NULL);
+    }
+    return ptr;
+#endif
+#else
+    fprintf(stderr, "call in custom_calloc with not std mem alloc L72\n");
+    void* retval = Sirikata::memmgr_alloc(size);
+    if (retval == 0) { // did malloc succeed?
+        if (!g_use_seccomp) {
+            assert(false && "Out of memory error");
+        }
+        custom_exit(ExitCode::OOM); // ran out of memory
+    }
+    return retval;
+#endif
+}
+
+void* custom_realloc(void* old, size_t size) {
+#ifdef USE_STANDARD_MEMORY_ALLOCATORS
+#ifdef _WIN32
+    return _aligned_realloc(old, size, 32);
+#else
+    void* unaligned_retval = realloc(old, size);
+    void* retval = custom_malloc(size);
+    memcpy(retval, unaligned_retval, size);
+    free(unaligned_retval);
+    return retval;
+#endif
+#else
+    size_t actual_size = 0;
+    void* retval = Sirikata::MemMgrAllocatorRealloc(old, size, &actual_size, true, NULL);
+    if (retval == 0) { // did malloc succeed?
+        if (!g_use_seccomp) {
+            assert(false && "Out of memory error");
+        }
+        custom_exit(ExitCode::OOM); // ran out of memory
+    }
+    return retval;
+#endif
+}
+void custom_free(void* ptr) {
+#ifdef USE_STANDARD_MEMORY_ALLOCATORS
+#ifdef _WIN32
+    _aligned_free(ptr);
+#else
+    free(ptr);
+#endif
+#else
+    Sirikata::memmgr_free(ptr);
+#endif
+}
+void* bzero32(void* aligned_32) {
+#if __AVX2__
+    _mm256_store_si256((__m256i*)aligned_32, _mm256_setzero_si256());
+#else
+    _mm_store_si128((__m128i*)aligned_32, _mm_setzero_si128());
+    _mm_store_si128(((__m128i*)aligned_32) + 1, _mm_setzero_si128());
+#endif
+    return aligned_32;
+}
+void* custom_calloc(size_t size) {
+#ifdef USE_STANDARD_MEMORY_ALLOCATORS
+#ifdef _WIN32
+    return _aligned_recalloc(bzero32(_aligned_malloc(32, 32)), 1, size, 32);
+#else
+    fprintf(stderr, "call in custom_calloc else\n");
+    return memset(custom_malloc(size), 0, size);
+#endif
+#else
+    fprintf(stderr, "call in custom_calloc with not std mem alloc\n");
+    void* retval = Sirikata::memmgr_alloc(size); // guaranteed to return 0'd memory
+    if (retval == 0) {                           // did malloc succeed?
+        if (!g_use_seccomp) {
+            assert(false && "Out of memory error");
+        }
+        custom_exit(ExitCode::OOM); // ran out of memory
+    }
+    return retval;
+#endif
+}
+}
+bool g_use_seccomp =
+#ifndef __linux
+    false
+#else
+    false // true
+#endif
+    ;
+void* operator new(size_t size) throw(std::bad_alloc) {
+    void* ptr = custom_malloc(size);
+    if (ptr == 0) { // did malloc succeed?
+        if (!g_use_seccomp) {
+            assert(false && "Out of memory error");
+        }
+        custom_exit(ExitCode::OOM); // ran out of memory
+    }
+    return ptr;
+}
+
+void* operator new[](size_t size) throw(std::bad_alloc) {
+    void* ptr = custom_malloc(size);
+    if (ptr == 0) { // did malloc succeed?
+        if (!g_use_seccomp) {
+            assert(false && "Out of memory error");
+        }
+        custom_exit(ExitCode::OOM); // ran out of memory
+    }
+    return ptr;
+}
+
+void operator delete(void* ptr) throw() {
+    custom_free(ptr);
+}
+void operator delete[](void* ptr) throw() {
+    custom_free(ptr);
+}
+THREAD_LOCAL_STORAGE int l_emergency_close_signal = -1;
+THREAD_LOCAL_STORAGE void (*atexit_f)(void*, uint64_t) = nullptr;
+THREAD_LOCAL_STORAGE void* atexit_arg0 = nullptr;
+THREAD_LOCAL_STORAGE uint64_t atexit_arg1 = 0;
+void custom_atexit(void (*atexit)(void*, uint64_t), void* arg0, uint64_t arg1) {
+    assert(!atexit_f);
+    atexit_f = atexit;
+    atexit_arg0 = arg0;
+    atexit_arg1 = arg1;
+}
+void close_thread_handle() {
+    if (l_emergency_close_signal != -1) {
+        const unsigned char close_data[1] = {255};
+        int handle = l_emergency_close_signal;
+        while (write(handle, close_data, 1) < 0 && errno == EINTR) {
+        }
+    }
+}
+void set_close_thread_handle(int handle) {
+    assert(l_emergency_close_signal == -1);
+    l_emergency_close_signal = handle;
+}
+void reset_close_thread_handle() {
+    l_emergency_close_signal = -1;
+}
+
+void custom_terminate_this_thread(uint8_t exit_code) {
+    close_thread_handle();
+#ifdef __linux
+    syscall(SYS_exit, exit_code);
+#endif
+}
+void custom_exit(ExitCode exit_code) {
+    close_thread_handle();
+    if (atexit_f) {
+        (*atexit_f)(atexit_arg0, atexit_arg1);
+        atexit_f = nullptr;
+    }
+    if (exit_code != ExitCode::SUCCESS) {
+        while (write(2, ExitString(exit_code), strlen(ExitString(exit_code))) < 0 && errno == EINTR) {
+        }
+        while (write(2, "\n", 1) < 0 && errno == EINTR) {
+        }
+    }
+#ifdef __linux
+    syscall(SYS_exit, (int)exit_code);
+#else
+    exit((int)exit_code);
+#endif
+    abort();
+}
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/memory.hh b/codec/L2/demos/leptonEnc/host/vp8/util/memory.hh
new file mode 100644
index 0000000000..a043810074
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/memory.hh
@@ -0,0 +1,104 @@
+#ifndef _MEMORY_HH_
+#define _MEMORY_HH_
+#if defined(__cplusplus) || defined(c_plusplus)
+#include <new>
+#include <cstdlib>
+#include <assert.h>
+#include <cstdio>
+#include <cstring>
+#include "../../io/DecoderPlatform.hh"
+#include "../../io/MemMgrAllocator.hh"
+extern bool g_use_seccomp;
+
+#define FOREACH_EXIT_CODE(CB)                \
+    CB(SUCCESS, 0)                           \
+    CB(ASSERTION_FAILURE, 1)                 \
+    CB(CODING_ERROR, 2)                      \
+    CB(SHORT_READ, 3)                        \
+    CB(UNSUPPORTED_4_COLORS, 4)              \
+    CB(THREAD_PROTOCOL_ERROR, 5)             \
+    CB(COEFFICIENT_OUT_OF_RANGE, 6)          \
+    CB(STREAM_INCONSISTENT, 7)               \
+    CB(PROGRESSIVE_UNSUPPORTED, 8)           \
+    CB(FILE_NOT_FOUND, 9)                    \
+    CB(SAMPLING_BEYOND_TWO_UNSUPPORTED, 10)  \
+    CB(SAMPLING_BEYOND_FOUR_UNSUPPORTED, 11) \
+    CB(THREADING_PARTIAL_MCU, 12)            \
+    CB(VERSION_UNSUPPORTED, 13)              \
+    CB(ONLY_GARBAGE_NO_JPEG, 14)             \
+    CB(OS_ERROR, 33)                         \
+    CB(HEADER_TOO_LARGE, 34)                 \
+    CB(DIMENSIONS_TOO_LARGE, 35)             \
+    CB(MALLOCED_NULL, 36)                    \
+    CB(OOM, 37)                              \
+    CB(TOO_MUCH_MEMORY_NEEDED, 38)           \
+    CB(EARLY_EXIT, 40)                       \
+    CB(ROUNDTRIP_FAILURE, 41)                \
+    CB(UNSUPPORTED_JPEG, 42)                 \
+    CB(COULD_NOT_BIND_PORT, 127)
+
+#define MAKE_EXIT_CODE_ENUM(ITEM, VALUE) ITEM = VALUE,
+#define GENERATE_EXIT_CODE_RETURN(ITEM, VALUE) \
+    {                                          \
+        if ((ec) == ExitCode::ITEM) {          \
+            return #ITEM;                      \
+        }                                      \
+    }
+
+#if __cplusplus <= 199711L && !defined(_WIN32)
+namespace ExitCode {
+enum ExitCode_ {
+#else
+enum class ExitCode {
+#endif
+    FOREACH_EXIT_CODE(MAKE_EXIT_CODE_ENUM)
+#if __cplusplus > 199711L || defined(_WIN32)
+};
+#else
+};
+}
+#endif
+
+#if __cplusplus > 199711L || defined(_WIN32)
+#ifndef _WIN32
+[[noreturn]]
+#endif
+    void
+    custom_exit(ExitCode exit_code);
+#else
+void custom_exit(ExitCode::ExitCode_ exit_code);
+#endif
+#define always_assert(EXPR) always_assert_outer((EXPR), #EXPR, __FILE__, __LINE__)
+void custom_terminate_this_thread(uint8_t exit_code);
+typedef void atexit_type(void*, uint64_t);
+void custom_atexit(atexit_type* atexit, void* arg0, uint64_t arg1);
+extern "C" {
+#else
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#endif
+#if __cplusplus > 199711L
+[[noreturn]]
+#endif
+    void
+    always_assert_exit(bool value, const char* expr, const char* file, int line);
+
+inline void always_assert_outer(bool value, const char* expr, const char* file, int line) {
+    if (__builtin_expect(!value, 0)) {
+        always_assert_exit(value, expr, file, line);
+    }
+}
+
+void* custom_malloc(size_t size);
+void* custom_realloc(void* old, size_t size);
+void custom_free(void* ptr);
+
+void* custom_calloc(size_t size);
+void set_close_thread_handle(int handle);
+void reset_close_thread_handle();
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+
+#endif
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/mm_mullo_epi32.hh b/codec/L2/demos/leptonEnc/host/vp8/util/mm_mullo_epi32.hh
new file mode 100644
index 0000000000..e82e71ab13
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/mm_mullo_epi32.hh
@@ -0,0 +1,47 @@
+/**
+    # $FreeBSD$
+        #       @(#)COPYRIGHT   8.2 (Berkeley) 3/21/94
+
+        The compilation of software known as the FreeBSD Ports Collection is
+        distributed under the following terms:
+
+        Copyright (C) 1994-2016 The FreeBSD Project. All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+        modification, are permitted provided that the following conditions
+        are met:
+        1. Redistributions of source code must retain the above copyright
+           notice, this list of conditions and the following disclaimer.
+        2. Redistributions in binary form must reproduce the above copyright
+           notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+        THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+        ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+        IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+        ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+        FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+        DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+        OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+        HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+        LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+        OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+        SUCH DAMAGE.
+*/
+
+#if defined(__SSE2__) && !defined(__SSE4_1__) && !defined(MM_MULLO_EPI32_H)
+#define MM_MULLO_EPI32_H
+
+#include <immintrin.h>
+// See:	http://stackoverflow.com/questions/10500766/sse-multiplication-of-4-32-bit-integers
+// and	https://software.intel.com/en-us/forums/intel-c-compiler/topic/288768
+static inline __m128i fallback_mm_mullo_epi32(const __m128i& a, const __m128i& b) {
+    __m128i tmp1 = _mm_mul_epu32(a, b);                                       /* mul 2,0*/
+    __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); /* mul 3,1 */
+    return _mm_unpacklo_epi32(                                                /* shuffle results to [63..0] and pack */
+                              _mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)),
+                              _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0)));
+}
+#define _mm_mullo_epi32 fallback_mm_mullo_epi32
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/mmap.cc b/codec/L2/demos/leptonEnc/host/vp8/util/mmap.cc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/nd_array.hh b/codec/L2/demos/leptonEnc/host/vp8/util/nd_array.hh
new file mode 100644
index 0000000000..3bd9d3d9df
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/nd_array.hh
@@ -0,0 +1,1060 @@
+/*  Sirikata Utilities -- Sirikata Array Utilities
+ *  ArrayNd.hpp
+ *
+ *  Copyright (c) 2009, Daniel Reiter Horn
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are
+ *  met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name of Sirikata nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SIRIKATA_ARRAY_ND_HPP_
+#define _SIRIKATA_ARRAY_ND_HPP_
+#include <assert.h>
+#include <cstddef>
+#include <cstring>
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus > 199711L || defined(_WIN32)
+#include <cstdint>
+#else
+#include <assert.h>
+#include <stdint.h>
+#define constexpr
+#define NOCONSTEXPR
+#endif
+#ifdef AVOID_ARRAY_BOUNDS_CHECKS
+#define ARRAY_BOUNDS_ASSERT(x) assert(x)
+#else
+#include "memory.hh"
+#define ARRAY_BOUNDS_ASSERT(x) always_assert(x)
+#endif
+
+namespace Sirikata {
+
+template <class T>
+class ReferenceType {
+   public:
+    typedef typename T::Array BaseArrayType;
+    typedef typename T::Array* ArrayType;
+    static BaseArrayType& dereference(BaseArrayType* a) { return *a; }
+    static const BaseArrayType& dereference(const BaseArrayType* a) { return *a; }
+};
+template <class T>
+class DirectType {
+   public:
+    typedef typename T::Array BaseArrayType;
+    typedef typename T::Array ArrayType;
+    static BaseArrayType& dereference(BaseArrayType& a) { return a; }
+    static const BaseArrayType& dereference(const BaseArrayType& a) { return a; }
+};
+class RoundToPow2 {
+   public:
+    enum { SHOULD_ROUND_POW2 = 1 };
+};
+class DontRoundPow2 {
+   public:
+    enum { SHOULD_ROUND_POW2 = 0 };
+};
+template <uint32_t v, class ShouldRound>
+class RoundP2 {
+   public:
+    enum RoundingResult {
+        value = (ShouldRound::SHOULD_ROUND_POW2 && (v & (v - 1))
+                     ? (1 + (v | (v >> 1) | (v >> 2) | (v >> 4) | (v >> 8) | (v >> 16)))
+                     : v)
+    };
+    typedef char
+        ARRAY_BOUNDS_ASSERT_power_of_two_constraint[(value & (value - 1)) == 0 || !ShouldRound::SHOULD_ROUND_POW2 ? 1
+                                                                                                                  : -1];
+};
+template <class T, uint32_t s0, class ShouldRoundPow2>
+struct ArrayBaseType1d {
+    typedef T Array[RoundP2 < s0 ? s0 : 1, ShouldRoundPow2 > ::value];
+};
+template <class T, uint32_t s0, uint32_t s1, class ShouldRoundPow2>
+struct ArrayBaseType2d {
+    typedef T Array[RoundP2<s0, ShouldRoundPow2>::value][RoundP2<s1, ShouldRoundPow2>::value];
+};
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, class ShouldRoundPow2>
+struct ArrayBaseType3d {
+    typedef T Array[RoundP2<s0, ShouldRoundPow2>::value][RoundP2<s1, ShouldRoundPow2>::value]
+                   [RoundP2<s2, ShouldRoundPow2>::value];
+};
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3, class ShouldRoundPow2>
+struct ArrayBaseType4d {
+    typedef T Array[RoundP2<s0, ShouldRoundPow2>::value][RoundP2<s1, ShouldRoundPow2>::value]
+                   [RoundP2<s2, ShouldRoundPow2>::value][RoundP2<s3, ShouldRoundPow2>::value];
+};
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3, uint32_t s4, class ShouldRoundPow2>
+struct ArrayBaseType5d {
+    typedef T Array[RoundP2<s0, ShouldRoundPow2>::value][RoundP2<s1, ShouldRoundPow2>::value]
+                   [RoundP2<s2, ShouldRoundPow2>::value][RoundP2<s3, ShouldRoundPow2>::value]
+                   [RoundP2<s4, ShouldRoundPow2>::value];
+};
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3, uint32_t s4, uint32_t s5, class ShouldRoundPow2>
+struct ArrayBaseType6d {
+    typedef T Array[RoundP2<s0, ShouldRoundPow2>::value][RoundP2<s1, ShouldRoundPow2>::value]
+                   [RoundP2<s2, ShouldRoundPow2>::value][RoundP2<s3, ShouldRoundPow2>::value]
+                   [RoundP2<s4, ShouldRoundPow2>::value][RoundP2<s5, ShouldRoundPow2>::value];
+};
+template <class T,
+          uint32_t s0,
+          uint32_t s1,
+          uint32_t s2,
+          uint32_t s3,
+          uint32_t s4,
+          uint32_t s5,
+          uint32_t s6,
+          class ShouldRoundPow2>
+struct ArrayBaseType7d {
+    typedef T Array[RoundP2<s0, ShouldRoundPow2>::value][RoundP2<s1, ShouldRoundPow2>::value]
+                   [RoundP2<s2, ShouldRoundPow2>::value][RoundP2<s3, ShouldRoundPow2>::value]
+                   [RoundP2<s4, ShouldRoundPow2>::value][RoundP2<s5, ShouldRoundPow2>::value]
+                   [RoundP2<s6, ShouldRoundPow2>::value];
+};
+
+template <class T,
+          uint32_t s0,
+          class ShouldRoundPow2 = DontRoundPow2,
+          class IsReferenceType = DirectType<ArrayBaseType1d<T, s0, ShouldRoundPow2> > >
+struct Array1d {
+    typedef typename ArrayBaseType1d<T, s0, ShouldRoundPow2>::Array Array;
+    typedef IsReferenceType IsReference;
+    typename IsReference::ArrayType data;
+    typedef Array1d<T, s0, ShouldRoundPow2, ReferenceType<ArrayBaseType1d<T, s0, ShouldRoundPow2> > > Slice;
+    enum Sizes { size0 = s0 };
+    static constexpr uint32_t size() { return s0; }
+    static constexpr uint32_t dimsize() { return s0; }
+    static constexpr uint32_t dimension() { return 1; }
+    T& at(uint32_t i0) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        return IsReference::dereference(data)[i0];
+    }
+    template <int index>
+    constexpr T kat() const {
+        static_assert(index < s0, "template argument must be within bound");
+        return data[index];
+    }
+    const T& at(uint32_t i0) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        return IsReference::dereference(data)[i0];
+    }
+    T& operator[](uint32_t i0) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        return IsReference::dereference(data)[i0];
+    }
+    const T& operator[](uint32_t i0) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        return IsReference::dereference(data)[i0];
+    }
+    template <class StartEnd>
+    typename Array1d<T, StartEnd::END - StartEnd::START, ShouldRoundPow2>::Slice slice(const StartEnd& range) {
+        return slice<StartEnd::START, StartEnd::END>();
+    }
+    template <class StartEnd>
+    typename Array1d<T, StartEnd::END - StartEnd::START, ShouldRoundPow2>::Slice slice(const StartEnd& range) const {
+        return slice<StartEnd::START, StartEnd::END>();
+    }
+    template <uint32_t kstart, uint32_t kend>
+    typename Array1d<T, kend - kstart, ShouldRoundPow2>::Slice slice() {
+        uint8_t ARRAY_BOUNDS_ASSERT_slice_legal[kend > s0 ? -1 : 1];
+        uint8_t ARRAY_BOUNDS_ASSERT_slice_start_legal[kend < kstart ? -1 : 1];
+        (void)ARRAY_BOUNDS_ASSERT_slice_legal;
+        (void)ARRAY_BOUNDS_ASSERT_slice_start_legal;
+        const typename Array1d<T, kend - kstart, ShouldRoundPow2>::Slice retval = {
+            (typename Array1d<T, kend - kstart, ShouldRoundPow2>::Slice::IsReference::ArrayType) &
+            IsReference::dereference(data)[kstart]};
+        return retval;
+    }
+    template <uint32_t start, uint32_t end>
+    const typename Array1d<T, end - start, ShouldRoundPow2>::Slice slice() const {
+        uint8_t ARRAY_BOUNDS_ASSERT_slice_legal[end > s0 ? -1 : 1];
+        uint8_t ARRAY_BOUNDS_ASSERT_slice_start_legal[end < start ? -1 : 1];
+        (void)ARRAY_BOUNDS_ASSERT_slice_legal;
+        (void)ARRAY_BOUNDS_ASSERT_slice_start_legal;
+        const typename Array1d<T, end - start, ShouldRoundPow2>::Slice retval = {
+            (typename Array1d<T, end - start, ShouldRoundPow2>::Slice::IsReference::ArrayType) &
+            IsReference::dereference(data)[start]};
+        return retval;
+    }
+
+    template <uint32_t new_size>
+    typename Array1d<T, new_size, ShouldRoundPow2>::Slice dynslice(uint32_t start) {
+        uint8_t ARRAY_BOUNDS_ASSERT_slice_size_legal[new_size > s0 ? -1 : 1];
+        (void)ARRAY_BOUNDS_ASSERT_slice_size_legal;
+        ARRAY_BOUNDS_ASSERT(start + new_size <= s0 && "slice must fit within original array");
+        const typename Array1d<T, new_size, ShouldRoundPow2>::Slice retval = {
+            (typename Array1d<T, new_size, ShouldRoundPow2>::Slice::IsReference::ArrayType) &
+            IsReference::dereference(data)[start]};
+        return retval;
+    }
+
+    void memset(uint8_t val) { std::memset(data, val, sizeof(Array)); }
+    template <class F>
+    void foreach (const F& f) {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            f(IsReference::dereference(data)[i0]);
+        }
+    }
+    template <class F>
+    void foreach (const F& f) const {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            f(IsReference::dereference(data)[i0]);
+        }
+    }
+    T* begin() { return (T*)data; }
+    const T* begin() const { return (const T*)data; }
+    T* end() { return (T*)data + s0; }
+    const T* end() const { return (const T*)data + s0; }
+};
+
+template <class T,
+          uint32_t s0,
+          uint32_t s1,
+          class ShouldRoundPow2 = DontRoundPow2,
+          class IsReferenceType = DirectType<ArrayBaseType2d<T, s0, s1, ShouldRoundPow2> > >
+struct Array2d {
+    typedef typename ArrayBaseType2d<T, s0, s1, ShouldRoundPow2>::Array Array;
+    typedef IsReferenceType IsReference;
+    typename IsReference::ArrayType data;
+    typedef Array2d<T, s0, s1, ShouldRoundPow2, ReferenceType<ArrayBaseType2d<T, s0, s1, ShouldRoundPow2> > > Slice;
+    enum Sizes { size0 = s0, size1 = s1 };
+    static constexpr Array1d<uint32_t, 2, ShouldRoundPow2> size() {
+#ifdef NOCONSTEXPR
+        Array1d<uint32_t, 2, ShouldRoundPow2> retval = {{s0, s1}};
+        return retval;
+#else
+        return {{s0, s1}};
+#endif
+    }
+    static uint32_t dimension() { return 2; }
+    static constexpr uint32_t dimsize() { return s0; }
+    T& raster(uint32_t index) {
+        ARRAY_BOUNDS_ASSERT(index < s0 * s1);
+        return (&IsReference::dereference(data)[0][0])[index];
+    }
+    const T& raster(uint32_t index) const {
+        ARRAY_BOUNDS_ASSERT(index < s0 * s1);
+        return (&IsReference::dereference(data)[0][0])[index];
+    }
+    T& at(uint32_t i0, uint32_t i1) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        return IsReference::dereference(data)[i0][i1];
+    }
+    const T& at(uint32_t i0, uint32_t i1) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        return IsReference::dereference(data)[i0][i1];
+    }
+    typename Array1d<T, s1, ShouldRoundPow2>::Slice at(uint32_t i0) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        typename Array1d<T, s1, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    const typename Array1d<T, s1, ShouldRoundPow2>::Slice at(uint32_t i0) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        const typename Array1d<T, s1, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    void memset(uint8_t val) { std::memset(data, val, sizeof(Array)); }
+    template <class F>
+    void foreach (const F& f) {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                f(IsReference::dereference(data)[i0][i1]);
+            }
+        }
+    }
+    template <class F>
+    void foreach (const F& f) const {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                f(IsReference::dereference(data)[i0][i1]);
+            }
+        }
+    }
+    T* begin() { return (T*)data; }
+    const T* begin() const { return (const T*)data; }
+    T* end() { return (T*)data + s0 * s1; }
+    const T* end() const { return (const T*)data + s0 * s1; }
+};
+
+template <class T,
+          uint32_t s0,
+          uint32_t s1,
+          uint32_t s2,
+          class ShouldRoundPow2 = DontRoundPow2,
+          class IsReferenceType = DirectType<ArrayBaseType3d<T, s0, s1, s2, ShouldRoundPow2> > >
+struct Array3d {
+    typedef typename ArrayBaseType3d<T, s0, s1, s2, ShouldRoundPow2>::Array Array;
+    typedef IsReferenceType IsReference;
+    typename IsReference::ArrayType data;
+    typedef Array3d<T, s0, s1, s2, ShouldRoundPow2, ReferenceType<ArrayBaseType3d<T, s0, s1, s2, ShouldRoundPow2> > >
+        Slice;
+
+    enum Sizes { size0 = s0, size1 = s1, size2 = s2 };
+    static constexpr Array1d<uint32_t, 3, DontRoundPow2> size() {
+#ifdef NOCONSTEXPR
+        Array1d<uint32_t, 3, DontRoundPow2> retval = {{s0, s1, s2}};
+        return retval;
+#else
+        return {{s0, s1, s2}};
+#endif
+    }
+    static uint32_t dimension() { return 3; }
+    static constexpr uint32_t dimsize() { return s0; }
+
+    T& at(uint32_t i0, uint32_t i1, uint32_t i2) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        return IsReference::dereference(data)[i0][i1][i2];
+    }
+    const T& at(uint32_t i0, uint32_t i1, uint32_t i2) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        return IsReference::dereference(data)[i0][i1][i2];
+    }
+    typename Array1d<T, s2, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        typename Array1d<T, s2, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1]};
+        return retval;
+    }
+    const typename Array1d<T, s2, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        const typename Array1d<T, s2, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1]};
+        return retval;
+    }
+    typename Array2d<T, s1, s2, ShouldRoundPow2>::Slice at(uint32_t i0) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        typename Array2d<T, s1, s2, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    const typename Array2d<T, s1, s2, ShouldRoundPow2>::Slice at(uint32_t i0) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        const typename Array2d<T, s1, s2, ShouldRoundPow2>::Slice retval = {
+            (typename Array2d<T, s1, s2, ShouldRoundPow2>::IsReference::ArrayType*)&IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    void memset(uint8_t val) { memset(data, val, sizeof(Array)); }
+    template <class F>
+    void foreach (const F& f) {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    f(IsReference::dereference(data)[i0][i1][i2]);
+                }
+            }
+        }
+    }
+    template <class F>
+    void foreach (const F& f) const {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    f(IsReference::dereference(data)[i0][i1][i2]);
+                }
+            }
+        }
+    }
+    T* begin() { return (T*)data; }
+    const T* begin() const { return (const T*)data; }
+    T* end() { return (T*)data + s0 * s1 * s2; }
+    const T* end() const { return (const T*)data + s0 * s1 * s2; }
+};
+
+template <class T,
+          uint32_t s0,
+          uint32_t s1,
+          uint32_t s2,
+          uint32_t s3,
+          class ShouldRoundPow2 = DontRoundPow2,
+          class IsReferenceType = DirectType<ArrayBaseType4d<T, s0, s1, s2, s3, ShouldRoundPow2> > >
+struct Array4d {
+    typedef typename ArrayBaseType4d<T, s0, s1, s2, s3, ShouldRoundPow2>::Array Array;
+    typedef Array4d<T,
+                    s0,
+                    s1,
+                    s2,
+                    s3,
+                    ShouldRoundPow2,
+                    ReferenceType<ArrayBaseType4d<T, s0, s1, s2, s3, ShouldRoundPow2> > >
+        Slice;
+    typedef IsReferenceType IsReference;
+    typename IsReference::ArrayType data;
+    enum Sizes { size0 = s0, size1 = s1, size2 = s2, size3 = s3 };
+
+    static constexpr Array1d<uint32_t, 4, ShouldRoundPow2> size() {
+#ifdef NOCONSTEXPR
+        Array1d<uint32_t, 4, ShouldRoundPow2> retval = {{s0, s1, s2, s3}};
+        return retval;
+#else
+        return {{s0, s1, s2, s3}};
+#endif
+    }
+    static uint32_t dimension() { return 4; }
+    static constexpr uint32_t dimsize() { return s0; }
+
+    T& at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        return IsReference::dereference(data)[i0][i1][i2][i3];
+    }
+    const T& at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        return IsReference::dereference(data)[i0][i1][i2][i3];
+    }
+    typename Array1d<T, s3, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1, uint32_t i2) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        typename Array1d<T, s3, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1][i2]};
+        return retval;
+    }
+    const typename Array1d<T, s3, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1, uint32_t i2) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        const typename Array1d<T, s3, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1][i2]};
+        return retval;
+    }
+    typename Array2d<T, s2, s3, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        typename Array2d<T, s2, s3, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1]};
+        return retval;
+    }
+    const typename Array2d<T, s2, s3, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        const typename Array2d<T, s2, s3, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1]};
+        return retval;
+    }
+
+    typename Array3d<T, s1, s2, s3, ShouldRoundPow2>::Slice at(uint32_t i0) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        typename Array3d<T, s1, s2, s3, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    const typename Array3d<T, s1, s2, s3, ShouldRoundPow2>::Slice at(uint32_t i0) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        const typename Array3d<T, s1, s2, s3, ShouldRoundPow2>::Slice retval = {
+            (typename Array3d<T, s1, s2, s3, ShouldRoundPow2>::IsReference::ArrayType*)(&IsReference::dereference(
+                data)[i0])};
+        return retval;
+    }
+    void memset(uint8_t val) { memset(data, val, sizeof(Array)); }
+    template <class F>
+    void foreach (const F& f) {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    for (uint32_t i3 = 0; i3 < s3; ++i3) {
+                        f(IsReference::dereference(data)[i0][i1][i2][i3]);
+                    }
+                }
+            }
+        }
+    }
+    template <class F>
+    void foreach (const F& f) const {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    for (uint32_t i3 = 0; i3 < s3; ++i3) {
+                        f(IsReference::dereference(data)[i0][i1][i2][i3]);
+                    }
+                }
+            }
+        }
+    }
+    T* begin() { return (T*)data; }
+    const T* begin() const { return (const T*)data; }
+    T* end() { return (T*)data + s0 * s1 * s2 * s3; }
+    const T* end() const { return (const T*)data + s0 * s1 * s2 * s3; }
+};
+
+template <class T,
+          uint32_t s0,
+          uint32_t s1,
+          uint32_t s2,
+          uint32_t s3,
+          uint32_t s4,
+          class ShouldRoundPow2 = DontRoundPow2,
+          class IsReferenceType = DirectType<ArrayBaseType5d<T, s0, s1, s2, s3, s4, ShouldRoundPow2> > >
+struct Array5d {
+    typedef typename ArrayBaseType5d<T, s0, s1, s2, s3, s4, ShouldRoundPow2>::Array Array;
+    typedef IsReferenceType IsReference;
+    typename IsReference::ArrayType data;
+
+    typedef Array5d<T,
+                    s0,
+                    s1,
+                    s2,
+                    s3,
+                    s4,
+                    ShouldRoundPow2,
+                    ReferenceType<ArrayBaseType5d<T, s0, s1, s2, s3, s4, ShouldRoundPow2> > >
+        Slice;
+    enum Sizes { size0 = s0, size1 = s1, size2 = s2, size3 = s3, size4 = s4 };
+    static constexpr Array1d<uint32_t, 5, DontRoundPow2> size() {
+#ifdef NOCONSTEXPR
+        Array1d<uint32_t, 5, DontRoundPow2> retval = {{s0, s1, s2, s3, s4}};
+        return retval;
+#else
+        return {{s0, s1, s2, s3, s4}};
+#endif
+    }
+    static uint32_t dimension() { return 5; }
+    static constexpr uint32_t dimsize() { return s0; }
+
+    T& at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        return IsReference::dereference(data)[i0][i1][i2][i3][i4];
+    }
+    const T& at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        return IsReference::dereference(data)[i0][i1][i2][i3][i4];
+    }
+    typename Array1d<T, s4, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        typename Array1d<T, s4, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1][i2][i3]};
+        return retval;
+    }
+    const typename Array1d<T, s4, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        const typename Array1d<T, s4, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0][i1][i2][i3]};
+        return retval;
+    }
+    typename Array2d<T, s3, s4, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1, uint32_t i2) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        typename Array2d<T, s3, s4, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1][i2]};
+        return retval;
+    }
+    const typename Array2d<T, s3, s4, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1, uint32_t i2) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        const typename Array2d<T, s3, s4, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0][i1][i2]};
+        return retval;
+    }
+
+    typename Array4d<T, s1, s2, s3, s4, ShouldRoundPow2>::Slice at(uint32_t i0) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        typename Array4d<T, s1, s2, s3, s4, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    const typename Array4d<T, s1, s2, s3, s4, ShouldRoundPow2>::Slice at(uint32_t i0) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        const typename Array4d<T, s1, s2, s3, s4, ShouldRoundPow2>::Slice retval = {
+            (typename Sirikata::ReferenceType<
+                typename Sirikata::ArrayBaseType4d<T, s1, s2, s3, s4, ShouldRoundPow2> >::ArrayType) &
+            IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    void memset(uint8_t val) { memset(data, val, sizeof(Array)); }
+    template <class F>
+    void foreach (const F& f) {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    for (uint32_t i3 = 0; i3 < s3; ++i3) {
+                        for (uint32_t i4 = 0; i4 < s4; ++i4) {
+                            f(IsReference::dereference(data)[i0][i1][i2][i3][i4]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    template <class F>
+    void foreach (const F& f) const {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    for (uint32_t i3 = 0; i3 < s3; ++i3) {
+                        for (uint32_t i4 = 0; i4 < s4; ++i4) {
+                            f(IsReference::dereference(data)[i0][i1][i2][i3][i4]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    T* begin() { return (T*)data; }
+    const T* begin() const { return (const T*)data; }
+    T* end() { return (T*)data + s0 * s1 * s2 * s3 * s4; }
+    const T* end() const { return (const T*)data + s0 * s1 * s2 * s3 * s4; }
+};
+
+template <class T,
+          uint32_t s0,
+          uint32_t s1,
+          uint32_t s2,
+          uint32_t s3,
+          uint32_t s4,
+          uint32_t s5,
+          class ShouldRoundPow2 = DontRoundPow2,
+          class IsReferenceType = DirectType<ArrayBaseType6d<T, s0, s1, s2, s3, s4, s5, ShouldRoundPow2> > >
+struct Array6d {
+    typedef typename ArrayBaseType6d<T, s0, s1, s2, s3, s4, s5, ShouldRoundPow2>::Array Array;
+    typedef IsReferenceType IsReference;
+    typename IsReference::ArrayType data;
+    typedef Array6d<T,
+                    s0,
+                    s1,
+                    s2,
+                    s3,
+                    s4,
+                    s5,
+                    ShouldRoundPow2,
+                    ReferenceType<ArrayBaseType6d<T, s0, s1, s2, s3, s4, s5, ShouldRoundPow2> > >
+        Slice;
+
+    enum Sizes {
+        size0 = s0,
+        size1 = s1,
+        size2 = s2,
+        size3 = s3,
+        size4 = s4,
+        size5 = s5,
+    };
+    static constexpr Array1d<uint32_t, 6, DontRoundPow2> size() {
+#ifdef NOCONSTEXPR
+        Array1d<uint32_t, 6, DontRoundPow2> retval = {{s0, s1, s2, s3, s4, s5}};
+        return retval;
+#else
+        return {{s0, s1, s2, s3, s4, s5}};
+#endif
+    }
+
+    static uint32_t dimension() { return 6; }
+    static constexpr uint32_t dimsize() { return s0; }
+
+    T& at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, uint32_t i5) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        ARRAY_BOUNDS_ASSERT(i5 < s5);
+        return IsReference::dereference(data)[i0][i1][i2][i3][i4][i5];
+    }
+    const T& at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, uint32_t i5) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        ARRAY_BOUNDS_ASSERT(i5 < s5);
+        return IsReference::dereference(data)[i0][i1][i2][i3][i4][i5];
+    }
+    typename Array1d<T, s5, ShouldRoundPow2>::Slice at(
+        uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        typename Array1d<T, s5, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1][i2][i3][i4]};
+        return retval;
+    }
+    const typename Array1d<T, s5, ShouldRoundPow2>::Slice at(
+        uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        const typename Array1d<T, s5, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0][i1][i2][i3][i4]};
+        return retval;
+    }
+    typename Array2d<T, s4, s5, ShouldRoundPow2>::Slice at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        typename Array2d<T, s4, s5, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0][i1][i2][i3]};
+        return retval;
+    }
+    const typename Array2d<T, s4, s5, ShouldRoundPow2>::Slice at(uint32_t i0,
+                                                                 uint32_t i1,
+                                                                 uint32_t i2,
+                                                                 uint32_t i3) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        const typename Array2d<T, s4, s5, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0][i1][i2][i3]};
+        return retval;
+    }
+
+    typename Array5d<T, s1, s2, s3, s4, s5, ShouldRoundPow2>::Slice at(uint32_t i0) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        typename Array5d<T, s1, s2, s3, s4, s5, ShouldRoundPow2>::Slice retval = {&IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    const typename Array5d<T, s1, s2, s3, s4, s5, ShouldRoundPow2>::Slice at(uint32_t i0) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        const typename Array5d<T, s1, s2, s3, s4, s5, ShouldRoundPow2>::Slice retval = {
+            (typename Sirikata::ReferenceType<
+                typename Sirikata::ArrayBaseType5d<T, s1, s2, s3, s4, s5, ShouldRoundPow2> >::ArrayType) &
+            IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    void memset(uint8_t val) { memset(data, val, sizeof(Array)); }
+    template <class F>
+    void foreach (const F& f) {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    for (uint32_t i3 = 0; i3 < s3; ++i3) {
+                        for (uint32_t i4 = 0; i4 < s4; ++i4) {
+                            for (uint32_t i5 = 0; i5 < s5; ++i5) {
+                                f(IsReference::dereference(data)[i0][i1][i2][i3][i4][i5]);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    template <class F>
+    void foreach (const F& f) const {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    for (uint32_t i3 = 0; i3 < s3; ++i3) {
+                        for (uint32_t i4 = 0; i4 < s4; ++i4) {
+                            for (uint32_t i5 = 0; i5 < s5; ++i5) {
+                                f(IsReference::dereference(data)[i0][i1][i2][i3][i4][i5]);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    T* begin() { return (T*)data; }
+    const T* begin() const { return (const T*)data; }
+    T* end() { return (T*)data + s0 * s1 * s2 * s3 * s4 * s5; }
+    const T* end() const { return (const T*)data + s0 * s1 * s2 * s3 * s4 * s5; }
+};
+
+template <class T,
+          uint32_t s0,
+          uint32_t s1,
+          uint32_t s2,
+          uint32_t s3,
+          uint32_t s4,
+          uint32_t s5,
+          uint32_t s6,
+          class ShouldRoundPow2 = DontRoundPow2,
+          class IsReferenceType = DirectType<ArrayBaseType7d<T, s0, s1, s2, s3, s4, s5, s6, ShouldRoundPow2> > >
+struct Array7d {
+    typedef typename ArrayBaseType7d<T, s0, s1, s2, s3, s4, s5, s6, ShouldRoundPow2>::Array Array;
+    typedef IsReferenceType IsReference;
+    typename IsReference::ArrayType data;
+    typedef Array7d<T,
+                    s0,
+                    s1,
+                    s2,
+                    s3,
+                    s4,
+                    s5,
+                    s6,
+                    ShouldRoundPow2,
+                    ReferenceType<ArrayBaseType7d<T, s0, s1, s2, s3, s4, s5, s6, ShouldRoundPow2> > >
+        Slice;
+    enum Sizes {
+        size0 = s0,
+        size1 = s1,
+        size2 = s2,
+        size3 = s3,
+        size4 = s4,
+        size5 = s5,
+        size6 = s6,
+    };
+    static constexpr Array1d<uint32_t, 7, DontRoundPow2> size() {
+#ifdef NOCONSTEXPR
+        Array1d<uint32_t, 7, DontRoundPow2> retval = {{s0, s1, s2, s3, s4, s5, s6}};
+        return retval;
+#else
+        return {{s0, s1, s2, s3, s4, s5, s6}};
+#endif
+    }
+    static uint32_t dimension() { return 7; }
+    static constexpr uint32_t dimsize() { return s0; }
+
+    T& at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, uint32_t i5, uint32_t i6) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        ARRAY_BOUNDS_ASSERT(i5 < s5);
+        ARRAY_BOUNDS_ASSERT(i6 < s6);
+        return IsReference::dereference(data)[i0][i1][i2][i3][i4][i5][i6];
+    }
+    const T& at(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, uint32_t i5, uint32_t i6) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        ARRAY_BOUNDS_ASSERT(i5 < s5);
+        ARRAY_BOUNDS_ASSERT(i6 < s6);
+        return IsReference::dereference(data)[i0][i1][i2][i3][i4][i5][i6];
+    }
+    typename Array1d<T, s6, ShouldRoundPow2>::Slice at(
+        uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, uint32_t i5) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        ARRAY_BOUNDS_ASSERT(i5 < s5);
+        typename Array1d<T, s6, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0][i1][i2][i3][i4][i5]};
+        return retval;
+    }
+    const typename Array1d<T, s6, ShouldRoundPow2>::Slice at(
+        uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, uint32_t i5) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        ARRAY_BOUNDS_ASSERT(i5 < s5);
+        const typename Array1d<T, s6, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0][i1][i2][i3][i4][i5]};
+        return retval;
+    }
+    typename Array2d<T, s5, s6, ShouldRoundPow2>::Slice at(
+        uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        typename Array2d<T, s5, s6, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0][i1][i2][i3][i4]};
+        return retval;
+    }
+    const typename Array2d<T, s5, s6, ShouldRoundPow2>::Slice at(
+        uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        ARRAY_BOUNDS_ASSERT(i1 < s1);
+        ARRAY_BOUNDS_ASSERT(i2 < s2);
+        ARRAY_BOUNDS_ASSERT(i3 < s3);
+        ARRAY_BOUNDS_ASSERT(i4 < s4);
+        const typename Array2d<T, s5, s6, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0][i1][i2][i3][i4]};
+        return retval;
+    }
+
+    typename Array6d<T, s1, s2, s3, s4, s5, s6, ShouldRoundPow2>::Slice at(uint32_t i0) {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        typename Array6d<T, s1, s2, s3, s4, s5, s6, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    const typename Array6d<T, s1, s2, s3, s4, s5, s6, ShouldRoundPow2>::Slice at(uint32_t i0) const {
+        ARRAY_BOUNDS_ASSERT(i0 < s0);
+        const typename Array6d<T, s1, s2, s3, s4, s5, s6, ShouldRoundPow2>::Slice retval = {
+            &IsReference::dereference(data)[i0]};
+        return retval;
+    }
+    void memset(uint8_t val) { std::memset(data, val, sizeof(Array)); }
+    template <class F>
+    void foreach (const F& f) {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    for (uint32_t i3 = 0; i3 < s3; ++i3) {
+                        for (uint32_t i4 = 0; i4 < s4; ++i4) {
+                            for (uint32_t i5 = 0; i5 < s5; ++i5) {
+                                for (uint32_t i6 = 0; i6 < s6; ++i6) {
+                                    f(IsReference::dereference(data)[i0][i1][i2][i3][i4][i5][i6]);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    template <class F>
+    void foreach (const F& f) const {
+        for (uint32_t i0 = 0; i0 < s0; ++i0) {
+            for (uint32_t i1 = 0; i1 < s1; ++i1) {
+                for (uint32_t i2 = 0; i2 < s2; ++i2) {
+                    for (uint32_t i3 = 0; i3 < s3; ++i3) {
+                        for (uint32_t i4 = 0; i4 < s4; ++i4) {
+                            for (uint32_t i5 = 0; i5 < s5; ++i5) {
+                                for (uint32_t i6 = 0; i6 < s6; ++i6) {
+                                    f(IsReference::dereference(data)[i0][i1][i2][i3][i4][i5][i6]);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    T* begin() { return (T*)data; }
+    const T* begin() const { return (const T*)data; }
+    T* end() { return (T*)data + s0 * s1 * s2 * s3 * s4 * s5 * s6; }
+    const T* end() const { return (const T*)data + s0 * s1 * s2 * s3 * s4 * s5 * s6; }
+};
+
+template <class Slice>
+struct AlignedArrayNd : public Slice {
+    uint8_t backingStore[sizeof(typename Slice::Array) + 15];
+    AlignedArrayNd() { init(); }
+    AlignedArrayNd(const AlignedArrayNd& other) {
+        // need to memcpy around to the aligned areas
+        init();
+        *this = other;
+    }
+    AlignedArrayNd& operator=(const AlignedArrayNd& other) {
+        memcpy(this->data, other.data, sizeof(typename Slice::Array));
+        return *this;
+    }
+
+   private:
+    void init() {
+        uint8_t* begin = NULL;
+        size_t offset = ((backingStore - begin) & 15);
+        if (offset == 0) {
+            this->data = (typename Slice::Array*)backingStore;
+        } else {
+            this->data = (typename Slice::Array*)(backingStore + 16 - offset);
+        }
+    }
+};
+
+template <class Slice>
+struct Aligned256ArrayNd : public Slice {
+    uint8_t backingStore[sizeof(typename Slice::Array) + 31];
+    Aligned256ArrayNd() { init(); }
+    Aligned256ArrayNd(const Aligned256ArrayNd& other) {
+        // need to memcpy around to the aligned areas
+        init();
+        *this = other;
+    }
+    Aligned256ArrayNd& operator=(const Aligned256ArrayNd& other) {
+        memcpy(this->data, other.data, sizeof(typename Slice::Array));
+        return *this;
+    }
+
+   private:
+    void init() {
+        uint8_t* begin = NULL;
+        size_t offset = ((backingStore - begin) & 31);
+        if (offset == 0) {
+            this->data = (typename Slice::Array*)backingStore;
+        } else {
+            this->data = (typename Slice::Array*)(backingStore + 32 - offset);
+        }
+    }
+};
+
+template <class T, uint32_t s0>
+struct AlignedArray1d : AlignedArrayNd<typename Array1d<T, s0, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1>
+struct AlignedArray2d : AlignedArrayNd<typename Array2d<T, s0, s1, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2>
+struct AlignedArray3d : AlignedArrayNd<typename Array3d<T, s0, s1, s2, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3>
+struct AlignedArray4d : AlignedArrayNd<typename Array4d<T, s0, s1, s2, s3, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3, uint32_t s4>
+struct AlignedArray5d : AlignedArrayNd<typename Array5d<T, s0, s1, s2, s3, s4, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3, uint32_t s4, uint32_t s5>
+struct AlignedArray6d : AlignedArrayNd<typename Array6d<T, s0, s1, s2, s3, s4, s5, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3, uint32_t s4, uint32_t s5, uint32_t s6>
+struct AlignedArray7d : AlignedArrayNd<typename Array7d<T, s0, s1, s2, s3, s4, s5, s6, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0>
+struct Aligned256Array1d : Aligned256ArrayNd<typename Array1d<T, s0, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1>
+struct Aligned256Array2d : Aligned256ArrayNd<typename Array2d<T, s0, s1, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2>
+struct Aligned256Array3d : Aligned256ArrayNd<typename Array3d<T, s0, s1, s2, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3>
+struct Aligned256Array4d : Aligned256ArrayNd<typename Array4d<T, s0, s1, s2, s3, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3, uint32_t s4>
+struct Aligned256Array5d : Aligned256ArrayNd<typename Array5d<T, s0, s1, s2, s3, s4, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3, uint32_t s4, uint32_t s5>
+struct Aligned256Array6d : Aligned256ArrayNd<typename Array6d<T, s0, s1, s2, s3, s4, s5, RoundToPow2>::Slice> {};
+
+template <class T, uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3, uint32_t s4, uint32_t s5, uint32_t s6>
+struct Aligned256Array7d : Aligned256ArrayNd<typename Array7d<T, s0, s1, s2, s3, s4, s5, s6, RoundToPow2>::Slice> {};
+}
+
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus > 199711L
+#undef constexpr
+#undef NOCONSTEXPR
+#endif
+#endif //_SIRIKATA_ARRAY_ND_HPP_
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/options.hh b/codec/L2/demos/leptonEnc/host/vp8/util/options.hh
new file mode 100644
index 0000000000..8a4e0ca082
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/options.hh
@@ -0,0 +1,11 @@
+#ifndef _OPTIONS_HH_
+#define _OPTIONS_HH_
+
+enum {
+    VECTORIZE = 0,
+    MICROVECTORIZE = 0,
+    MAX_NUM_THREADS = 1, // 8,
+    SIMD_WIDTH = 1
+};
+extern unsigned int NUM_THREADS;
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/vp8/util/vpx_config.hh b/codec/L2/demos/leptonEnc/host/vp8/util/vpx_config.hh
new file mode 100644
index 0000000000..2413e0fcee
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/vp8/util/vpx_config.hh
@@ -0,0 +1,53 @@
+#ifndef VPX_CONFIG_H_
+#define VPX_CONFIG_H_
+
+//#define DEBUG_ARICODER
+#ifdef DEBUG_ARICODER
+#include <cstdio>
+#endif
+#define INLINE inline
+#endif
+
+#ifdef _WIN32
+#include <intrin.h>
+// FIXME: this assumes windows platforms are little endian
+#define htobe64 _byteswap_uint64
+#define be64toh _byteswap_uint64
+#define htobe32 _byteswap_ulong
+#define be32toh _byteswap_ulong
+#define htobe16 _byteswap_ushort
+#define be16toh _byteswap_ushort
+#define htole64(x) (x)
+#define htole32(x) (x)
+#define htole16(x) (x)
+#define le64toh(x) (x)
+#define le32toh(x) (x)
+#define le16toh(x) (x)
+
+#else
+#ifdef __APPLE__
+#include <libkern/OSByteOrder.h>
+#define htobe64 OSSwapHostToBigInt64
+#define be64toh OSSwapBigToHostInt64
+#define htobe32 OSSwapHostToBigInt32
+#define be32toh OSSwapBigToHostInt32
+#define htobe16 OSSwapHostToBigInt16
+#define be16toh OSSwapBigToHostInt16
+
+#define htole64 OSSwapHostToLittleInt64
+#define le64toh OSSwapLittleToHostInt64
+#define htole32 OSSwapHostToLittleInt32
+#define le32toh OSSwapLittleToHostInt32
+#define htole16 OSSwapHostToLittleInt16
+#define le16toh OSSwapLittleToHostInt16
+#else
+#ifndef _BSD_SOURCE
+#define _BSD_SOURCE /* See feature_test_macros(7) */
+#endif
+#ifdef BSD
+#include <sys/endian.h>
+#else
+#include <endian.h>
+#endif
+#endif
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp.hpp
new file mode 100644
index 0000000000..e0021e9241
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2019, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_XILINX_
+#define _XHPP_XILINX_
+
+#include "xhpp_enums.hpp"
+#include "xhpp_error.hpp"
+#include "xhpp_context.hpp"
+#include "xhpp_event.hpp"
+
+#include "xhpp_bufferbase.hpp"
+#include "xhpp_bufferhost.hpp"
+#include "xhpp_bufferdevice.hpp"
+
+#include "xhpp_taskbase.hpp"
+// #include "xhpp_taskhost.hpp"
+#include "xhpp_taskkernel.hpp"
+#include "xhpp_tasktransfer.hpp"
+
+#include "xhpp_graph.hpp"
+#include "xhpp_scheduler.hpp"
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_bufferbase.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_bufferbase.hpp
new file mode 100644
index 0000000000..5ff18e68fc
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_bufferbase.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_BASEBUFFER_
+#define _XHPP_BASEBUFFER_
+
+#include "xhpp_context.hpp"
+
+namespace xhpp {
+
+namespace buffer {
+
+//! base class of buffer objects
+class base {
+   protected:
+    xhpp::context* xctx = nullptr;
+
+   public:
+    //! constructor
+    base(xhpp::context* ctx) { xctx = ctx; }
+
+    // //! allocation
+    // virtual int allocate(unsigned int) = 0;
+
+    //! buffer (body) allocation
+    virtual int bodyallocate(const int = 0) = 0;
+
+    //! buffer (shadow) allocation
+    virtual int bodyshadowallocate(const unsigned int) = 0;
+
+    //! buffer (body) free
+    virtual int bodyrelease() = 0;
+
+    //! buffer (shadow) free
+    virtual int shadowrelease() = 0;
+
+    //! buffer (body and shadow) free
+    virtual int bodyshadowrelease() = 0;
+
+    //! starting/ending vbuffer, do not allocate
+    virtual bool startingendingallocate() = 0;
+};
+
+}; // end of namespace buffer
+
+}; // end of namespace xhpp
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_bufferdevice.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_bufferdevice.hpp
new file mode 100644
index 0000000000..649c961aa8
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_bufferdevice.hpp
@@ -0,0 +1,365 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_DEVICEBUFFER_
+#define _XHPP_DEVICEBUFFER_
+
+// #define _XHPP_DEBUG_
+
+#include "xhpp_event.hpp"
+#include "xhpp_bufferbase.hpp"
+#include "xhpp_bufferhost.hpp"
+
+#include "CL/cl.h"
+#include "CL/cl_ext_xilinx.h"
+
+#include <iostream>
+
+namespace xhpp {
+
+namespace task { // pre-declaration
+template <typename _BUF_H, typename _BUF_D>
+class tran_impl;
+};
+namespace task {
+class dev_func;
+}
+
+namespace vbuffer {
+
+template <class T>
+class host;
+
+template <typename _BUF_H, typename _BUF_D>
+class tran_impl;
+
+//! virtual device buffer objects
+template <class T>
+class device : public buffer::base {
+    // friend class
+    friend class xhpp::vbuffer::host<T>;
+
+    template <typename _BUF_H, typename _BUF_D>
+    friend class xhpp::task::tran_impl;
+
+    friend class xhpp::task::dev_func;
+
+   private:
+    unsigned int xsize = 0;    //! buffer size
+    std::vector<cl_mem> xmems; //! cl_mem objs
+
+    bool _sizeset = false;
+    bool _bodyallocated = false;
+    bool _shadowallocated = false;
+    unsigned int _shadowsize = 0;
+
+    bool _vbuffer_startorend = false;
+    // xhpp::host_buffer<T>* hbmap;
+
+   public:
+    std::vector<int> bankinfo;
+
+    //! constructors
+    device(xhpp::context* ctx) : buffer::base(ctx) { xmems.resize(1); };
+
+    device(xhpp::context* ctx, bool vbuf_startorend) : buffer::base(ctx) {
+        xmems.resize(1);
+        _vbuffer_startorend = vbuf_startorend;
+    };
+
+    device(xhpp::context* ctx, unsigned int num, cl_mem mem) : buffer::base(ctx) {
+        xmems.resize(1);
+        setsize(num);
+        xmems[0] = mem;
+    };
+
+    //! set buffer size
+    int setsize(unsigned int num) {
+        if (num <= 0) {
+            throw xhpp::error("ERROR: xhpp::buffer::device::setsize(), input parameter should be larger than zero.\n");
+        }
+        xsize = num;
+        _sizeset = true;
+        return 0;
+    };
+
+    //    protected:
+
+    //! allocate buffer
+    int allocate(const unsigned int n, const int bank = 0) {
+        cl_mem_ext_ptr_t ptr;
+        if (bank == 0) {
+            ptr.flags = XCL_MEM_DDR_BANK0;
+        } else if (bank == 1) {
+            ptr.flags = XCL_MEM_DDR_BANK1;
+        } else if (bank == 2) {
+            ptr.flags = XCL_MEM_DDR_BANK2;
+        } else if (bank == 3) {
+            ptr.flags = XCL_MEM_DDR_BANK3;
+        } else {
+            throw xhpp::error("ERROR: xhpp::buffer::device::allocate(), bank should be 0 ~ 4.\n");
+        };
+        ptr.obj = 0;
+        ptr.param = 0;
+
+        cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX;
+        cl_int err;
+        xmems[n] = clCreateBuffer(xctx->xcontext, flags, xsize * sizeof(T), &ptr, &err);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error("ERROR: xhpp::buffer::device::allocate(), allocation error.\n");
+        };
+
+        //! let buffer be resident
+        err = clEnqueueMigrateMemObjects(xctx->xqueue, 1, &xmems[n], CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED, 0, NULL,
+                                         NULL);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error("ERROR: xhpp::buffer::device::allocate(), allocation (being resident) error.\n");
+        };
+
+        xctx->wait();
+
+        return 0;
+    };
+
+    //! buffer (body) allocation
+    int bodyallocate(const int bank = 0) {
+        if (_bodyallocated == false) {
+            // std::cout <<"allocate bankinfo[0] "<<bankinfo[0] <<std::endl;
+            allocate(0, bank);
+            _bodyallocated = true;
+        };
+        return 0;
+    };
+
+    //! buffer (body and shadow) allocation
+    //      int bodyshadowallocate(unsigned int nsize, const int* banks=NULL){
+    int bodyshadowallocate(unsigned int nsize) {
+        if (nsize > 0) {
+            _shadowsize = nsize - 1;
+            xmems.resize(nsize);
+
+            bodyallocate(bankinfo[0]); // body
+
+            if (_shadowallocated == false) { // shadow
+                for (int i = 1; i < nsize; i++) {
+                    if (xctx->pattern == xhpp::linear) {
+                        allocate(i, bankinfo[i]);
+#ifdef _XHPP_DEBUG_
+                        std::cout << "allocating bankinfo[" << i << "] " << bankinfo[i] << std::endl;
+#endif
+                    } else { // pipeline mode, the 12, 34, 56 used bankinfo are the same.
+                        allocate(i, bankinfo[i / 2]);
+#ifdef _XHPP_DEBUG_
+                        std::cout << "allocating bankinfo[" << i << "] " << bankinfo[i / 2] << std::endl;
+#endif
+                    }
+                };
+                _shadowallocated = true;
+            };
+        };
+        return 0;
+    };
+
+    //! if dev vbuffer is starting or ending point, do not allocate to real buffer.
+    bool startingendingallocate() { return _vbuffer_startorend; }
+
+    //! buffer (body) free
+    int bodyrelease() {
+        if (_bodyallocated) {
+            clReleaseMemObject(xmems[0]);
+            _bodyallocated = false;
+        };
+
+        if (_shadowallocated == false) {
+            xsize = 0;
+        };
+        return 0;
+    };
+
+    //! buffer (shadow) free
+    int shadowrelease() {
+        if (_shadowallocated) {
+            for (int i = 0; i < _shadowsize; i++) {
+                clReleaseMemObject(xmems[i + 1]);
+            }
+            _shadowallocated = false;
+        };
+
+        if (_bodyallocated == false) {
+            xsize = 0;
+        }
+        return 0;
+    };
+
+    //! buffer (body and shadow) free
+    int bodyshadowrelease() {
+        bodyrelease();
+        shadowrelease();
+        return 0;
+    };
+
+    //! non-blocking copy from host to device
+    int copy_from_host(xhpp::vbuffer::host<T>* hbuf,
+                       xhpp::event* waitevt,
+                       xhpp::event* evt,
+                       const int rcfrom = 0,
+                       const int rcto = 0) {
+        cl_int err = clEnqueueWriteBuffer(xctx->xqueue, xmems[rcto], CL_FALSE, 0, xsize * sizeof(T),
+                                          (hbuf->xmems)[rcfrom], waitevt->size(), waitevt->data(), evt->data());
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::device::copy_from_device(), non-blocking data transfer from host "
+                              "buffer to device buffer error.\n");
+        };
+//   clFinish(xctx->xqueue);
+#ifdef _XHPP_DEBUG_
+        std::cout << "buffer from_host \n";
+#endif
+        return 0;
+    };
+
+    //! non-blocking copy from deivce to host
+    int copy_to_host(xhpp::vbuffer::host<T>* hbuf,
+                     xhpp::event* waitevt,
+                     xhpp::event* evt,
+                     const int rcfrom = 0,
+                     const int rcto = 0) {
+        cl_int err = clEnqueueReadBuffer(xctx->xqueue, xmems[rcfrom], CL_FALSE, 0, xsize * sizeof(T),
+                                         (hbuf->xmems)[rcto], waitevt->size(), waitevt->data(), evt->data());
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::device::copy_to_host(), non-blocking data transfer from device "
+                              "buffer to host buffer error.\n");
+        };
+#ifdef _XHPP_DEBUG_
+        std::cout << "Buffer to_host \n";
+#endif
+        return 0;
+    };
+
+    //! blocking copy from host to device
+    inline int copy_from_host(xhpp::vbuffer::host<T>* hbuf, const int rcfrom = 0, const int rcto = 0) {
+        cl_int err = clEnqueueWriteBuffer(xctx->xqueue, xmems[rcto], CL_TRUE, 0, xsize * sizeof(T),
+                                          (hbuf->xmems)[rcfrom], 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::device::copy_from_host(), blocking data transfer from host buffer "
+                              "to device buffer error.\n");
+        };
+#ifdef _XHPP_DEBUG_
+        std::cout << "buffer from_host \n";
+#endif
+        return 0;
+    };
+
+    //! blocking copy from device to host
+    inline int copy_to_host(xhpp::vbuffer::host<T>* hbuf, const int rcfrom = 0, const int rcto = 0) {
+        cl_int err = clEnqueueReadBuffer(xctx->xqueue, xmems[rcfrom], CL_TRUE, 0, xsize * sizeof(T),
+                                         (hbuf->xmems)[rcto], 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::device::copy_to_host(), blocking data transfer from device buffer "
+                              "to host buffer error.\n");
+        };
+#ifdef _XHPP_DEBUG_
+        std::cout << "Buffer to_host \n";
+#endif
+        return 0;
+    };
+
+    //! blocking copy from device to device
+    int copy_to_device(xhpp::vbuffer::device<T>* dbuf) {
+        cl_int err =
+            clEnqueueCopyBuffer(xctx->xqueue, xmems[0], (dbuf->xmems)[0], 0, 0, xsize * sizeof(T), 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::device::copy_to_device(), blocking data transfer from device "
+                              "buffer to device buffer error.\n");
+        };
+        //        clFinish(xctx->xqueue);
+        return 0;
+    };
+
+    //! non-blocking copy from device to device
+    int copy_to_device(xhpp::vbuffer::device<T>* dbuf, xhpp::event* waitevt, xhpp::event* evt) {
+        cl_int err = clEnqueueCopyBuffer(xctx->xqueue, xmems[0], (dbuf->xmems)[0], 0, 0, xsize * sizeof(T),
+                                         waitevt->size(), waitevt->data(), evt->data());
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::device::copy_to_device(), non-blocking data transfer from device "
+                              "buffer to device buffer error.\n");
+        };
+        //        clFinish(xctx->xqueue);
+        return 0;
+    };
+
+}; // end of class vbuffer::device
+}; // end of namespace vbuffer
+
+namespace buffer {
+
+template <class T>
+class host;
+
+//! device buffer objects
+template <class T>
+class device : public vbuffer::device<T> {
+   public:
+    //! constructors
+    device(xhpp::context* ctx) : vbuffer::device<T>(ctx){};
+    device(xhpp::context* ctx, unsigned int num, cl_mem mem) : vbuffer::device<T>(ctx, num, mem){};
+
+    //! buffer allocation
+    int allocate(unsigned int num, const int bank = 0) {
+#ifdef _XHPP_DEBUG_
+        std::cout << "in buffer allocate" << std::endl;
+#endif
+        vbuffer::device<T>::setsize(num);
+        vbuffer::device<T>::bodyallocate(bank); // TODO
+        return 0;
+    };
+
+    //! buffer free
+    int release() {
+        vbuffer::device<T>::bodyrelease();
+        return 0;
+    };
+
+    //! blocking copy from device to host
+    int copy_from_host(xhpp::buffer::host<T>* hbuf) { return vbuffer::device<T>::copy_from_host(hbuf); };
+
+    //! blocking copy from host to device
+    int copy_to_host(xhpp::buffer::host<T>* hbuf) { return vbuffer::device<T>::copy_to_host(hbuf); };
+
+    //! blocking copy from device to device
+    int copy_to_device(xhpp::buffer::device<T>* dbuf) { return vbuffer::device<T>::copy_to_device(dbuf); };
+    //! non-blocking copy from device to device
+    int copy_to_device(xhpp::buffer::device<T>* dbuf, xhpp::event* waitevt, xhpp::event* evt) {
+        return vbuffer::device<T>::copy_to_device(dbuf, waitevt, evt);
+    };
+};
+
+}; // end of namespace buffer
+}; // end of namespace xhpp
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_bufferhost.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_bufferhost.hpp
new file mode 100644
index 0000000000..917b5a45fc
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_bufferhost.hpp
@@ -0,0 +1,319 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_HOSTBUFFER_
+#define _XHPP_HOSTBUFFER_
+
+#include "xhpp_error.hpp"
+#include "xhpp_context.hpp"
+#include "xhpp_bufferbase.hpp"
+#include "xhpp_bufferdevice.hpp"
+#include "xhpp_event.hpp"
+
+#include <stdlib.h>
+#include <iostream>
+#include <vector>
+
+namespace xhpp {
+
+namespace task { // pre-declaration
+template <typename _BUF_H, typename _BUF_D>
+class tran_impl;
+};
+
+namespace vbuffer {
+
+template <class T>
+class device;
+
+//! virtual host buffer objects
+template <class T>
+class host : public buffer::base {
+    // friend class
+    friend class xhpp::vbuffer::device<T>;
+
+    template <typename _BUF_H, typename _BUF_D>
+    friend class xhpp::task::tran_impl;
+
+   private:
+    unsigned int xsize = 0;
+    std::vector<T*> xmems; //! data
+
+    bool _sizeset = false;
+    bool _bodyallocated = false;
+    bool _shadowallocated = false;
+    unsigned int _shadowsize = 0;
+
+   public:
+    //! constructors
+    host(xhpp::context* ctx) : buffer::base(ctx) { xmems.resize(1); };
+
+    host(xhpp::context* ctx, unsigned int num, T* data) : buffer::base(ctx) {
+        xmems.resize(1);
+        setsize(num);
+        xmems[0] = data;
+    };
+
+    //! set buffer size
+    int setsize(unsigned int num) {
+        if (num <= 0) {
+            throw xhpp::error("ERROR: xhpp::buffer::host::setsize(), input parameter should be larger than zero.\n");
+        }
+        xsize = num;
+        _sizeset = true;
+        return 0;
+    };
+
+    //    protected:
+
+    //! buffer allocation (4K alignment)
+    int bodyallocate(const int bank = 0) {
+        void* ptr = nullptr;
+        if (posix_memalign(&ptr, 4096, xsize * sizeof(T)))
+            throw xhpp::error("ERROR: xhpp::buffer::host::bodyallocate(), allocation error.\n");
+        xmems[0] = reinterpret_cast<T*>(ptr);
+
+        _bodyallocated = true;
+        return 0;
+    };
+
+    //! buffer (body and shadow) allocation
+    //      int bodyshadowallocate(unsigned int nsize, const int* banks=NULL){
+    int bodyshadowallocate(unsigned int nsize) {
+        if (nsize > 0) {
+            _shadowsize = nsize - 1;
+            xmems.resize(nsize);
+
+            bodyallocate(); // body
+
+            if (_shadowallocated == false) { // shadow
+                void* ptr = nullptr;
+                // std::cout <<"nsize is "<<nsize<< std::endl;
+                for (int i = 1; i < nsize; i++) {
+                    if (posix_memalign(&ptr, 4096, xsize * sizeof(T)))
+                        throw xhpp::error(
+                            "ERROR: xhpp::buffer::host::bodyshadowallocate(), shadow allocation error.\n");
+                    xmems[i] = reinterpret_cast<T*>(ptr);
+                };
+                _shadowallocated = true;
+            };
+        };
+        return 0;
+    };
+
+    bool startingendingallocate() { return false; }
+
+    //! buffer (body) free
+    int bodyrelease() {
+        if (_bodyallocated) {
+            free(xmems[0]);
+            _bodyallocated = false;
+        }
+
+        if (_shadowallocated == false) {
+            xsize = 0;
+        }
+        return 0;
+    };
+
+    //! buffer (shadow) free
+    int shadowrelease() {
+        if (_shadowallocated) {
+            free(xmems[1]);
+            _shadowallocated = false;
+        };
+
+        if (_bodyallocated == false) {
+            xsize = 0;
+        }
+        return 0;
+    };
+
+    //! buffer (body and shadow) free
+    int bodyshadowrelease() {
+        bodyrelease();
+        shadowrelease();
+        return 0;
+    };
+
+    //! body buffer allocated or not
+    inline bool bodyallocated() { return _bodyallocated; };
+
+    //! shadow buffer allocated or not
+    inline bool shadowallocated() { return _shadowallocated; };
+
+    //! operator []
+    inline T& operator[](const unsigned int num) {
+        if (num >= xsize) {
+            throw xhpp::error("ERROR: xhpp::buffer::host operator[], access out of range.\n");
+        };
+        return xmems[0][num];
+    };
+
+    //! return size of buffer
+    inline unsigned int size() { return xsize; };
+
+    //! return pointer of underlying array
+    inline T* dataptr(const int rc = 0) { return xmems[rc]; };
+
+    //! set values
+    inline int set(const T x, const int rc = 0) {
+        for (int i = 0; i < xsize; i++) {
+            xmems[rc][i] = x;
+        };
+        return 0;
+    };
+
+    //! set values
+    inline int set(const T* vecx, const int rc = 0) {
+        memcpy(xmems[rc], vecx, xsize * sizeof(T));
+        return 0;
+    };
+
+    //! non-blocking copy from host to device
+    int copy_to_device(xhpp::vbuffer::device<T>* dbuf,
+                       xhpp::event* waitevt,
+                       xhpp::event* evt,
+                       const int rcfrom = 0,
+                       const int rcto = 0) {
+        cl_int err = clEnqueueWriteBuffer(xctx->xqueue, (dbuf->xmems)[rcto], CL_FALSE, 0, xsize * sizeof(T),
+                                          xmems[rcfrom], waitevt->size(), waitevt->data(), evt->data());
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::host::copy_to_device(), non-blocking data transfer from host "
+                              "buffer to device buffer error.\n");
+        };
+#ifdef _XHPP_DEBUG_
+        std::cout << "buffer from_host \n";
+#endif
+        return 0;
+    };
+
+    //! non-blocking copy from device to host
+    int copy_from_device(xhpp::vbuffer::device<T>* dbuf,
+                         xhpp::event* waitevt,
+                         xhpp::event* evt,
+                         const int rcfrom = 0,
+                         const int rcto = 0) {
+        cl_int err = clEnqueueReadBuffer(xctx->xqueue, (dbuf->xmems)[rcfrom], CL_FALSE, 0, xsize * sizeof(T),
+                                         xmems[rcto], waitevt->size(), waitevt->data(), evt->data());
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::host::copy_from_device(), non-blocking data transfer from device "
+                              "buffer to host buffer error.\n");
+        };
+#ifdef _XHPP_DEBUG_
+        std::cout << "Buffer to_host \n";
+#endif
+        return 0;
+    };
+
+    //! blocking copy from device to host
+    inline int copy_from_device(xhpp::vbuffer::device<T>* dbuf, const int rcfrom = 0, const int rcto = 0) {
+        cl_int err = clEnqueueReadBuffer(xctx->xqueue, (dbuf->xmems)[rcfrom], CL_TRUE, 0, xsize * sizeof(T),
+                                         xmems[rcto], 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::host::copy_from_device(), blocking data transfer from device "
+                              "buffer to host buffer error.\n");
+        };
+#ifdef _XHPP_DEBUG_
+        std::cout << "buffer from_host \n";
+#endif
+        return 0;
+    };
+
+    //! blocking copy from deivce to host
+    inline int copy_to_device(xhpp::vbuffer::device<T>* dbuf, const int rcfrom = 0, const int rcto = 0) {
+        cl_int err = clEnqueueWriteBuffer(xctx->xqueue, (dbuf->xmems)[rcto], CL_TRUE, 0, xsize * sizeof(T),
+                                          xmems[rcfrom], 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            throw xhpp::error(err,
+                              "ERROR: xhpp::buffer::host::copy_to_device(), blocking data transfer from host buffer to "
+                              "device buffer error.\n");
+        };
+#ifdef _XHPP_DEBUG_
+        std::cout << "Buffer to_host \n";
+#endif
+        return 0;
+    };
+
+}; // end of class vbuffer::host
+}; // end of namespace vbuffer
+
+namespace buffer {
+
+template <class T>
+class device;
+
+//! host buffer objects
+template <class T>
+class host : public vbuffer::host<T> {
+   public:
+    //! constructor
+    host(xhpp::context* ctx) : vbuffer::host<T>(ctx){};
+    host(xhpp::context* ctx, unsigned int num, T* data) : vbuffer::host<T>(ctx, num, data){};
+
+    //! buffer allocation (4K alignment)
+    int allocate(unsigned int num) {
+        vbuffer::host<T>::setsize(num);
+        vbuffer::host<T>::bodyallocate();
+        return 0;
+    };
+
+    //! buffer free
+    int release() {
+        vbuffer::host<T>::bodyrelease();
+        vbuffer::host<T>::shadowrelease();
+        return 0;
+    };
+
+    //! operator []
+    inline T& operator[](const unsigned int num) { return vbuffer::host<T>::operator[](num); }
+
+    //! return size of buffer
+    inline unsigned int size() { return vbuffer::host<T>::size(); };
+
+    //! pointer of underlying array
+    inline T* dataptr(const int rc = 0) { return vbuffer::host<T>::dataptr(); };
+
+    //! buffer allocated or not
+    inline bool allocated() { return vbuffer::host<T>::bodyallocated(); };
+
+    //! set value to all the elements
+    inline int set(const T x) { return vbuffer::host<T>::set(x); };
+
+    //! set values
+    inline int set(const T* vecx) { return vbuffer::host<T>::set(vecx); };
+
+    //! blocking copy from device to host
+    int copy_from_device(xhpp::buffer::device<T>* dbuf) { return vbuffer::host<T>::copy_from_device(dbuf); };
+
+    //! blocking copy from host to device
+    int copy_to_device(xhpp::buffer::device<T>* dbuf) { return vbuffer::host<T>::copy_to_device(dbuf); };
+};
+
+}; // end of namespace buffer
+}; // end of namespace xhpp
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_context.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_context.hpp
new file mode 100644
index 0000000000..be42677aab
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_context.hpp
@@ -0,0 +1,380 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_CONTEXT_
+#define _XHPP_CONTEXT_
+
+#include "CL/cl.h"
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <cstring>
+
+#include "xhpp_error.hpp"
+#include "xhpp_enums.hpp"
+
+// #define _XHPP_DEBUG_
+// #define _XHPP_INFO_
+
+namespace xhpp {
+
+// pre-declaration
+namespace vbuffer {
+template <class T>
+class device;
+
+template <class T>
+class host;
+}
+namespace task {
+class dev_func;
+}
+
+//! context object
+class context {
+    // friend class
+    template <class T>
+    friend class xhpp::vbuffer::device;
+
+    template <class T>
+    friend class xhpp::vbuffer::host;
+
+    friend class xhpp::task::dev_func;
+
+   private:
+    unsigned int xplatformCount = 0; //! number of platforms
+    unsigned int xplatformSelect;    //! platform selected
+    cl_platform_id xplatform[16] = {0};
+
+    unsigned int xdeviceCount = 0; //! number of devices
+    cl_device_id xdeviceSelect;    //! device selected
+    cl_device_id xdevice[16];
+
+    // public:
+    cl_context xcontext;     //! cl context object
+    cl_command_queue xqueue; //! cl queue object
+    cl_program xprogram;     //! cl program object
+
+    std::string xclbinname; //! xclbin name
+    size_t xclbinsize = 0;  //! xclbin size
+    char* xclbin;
+
+    std::string xsaname; //! xsa name
+
+    bool _contextcreated = false;
+
+    //    friend class xhpp::vbuffer::device;
+
+   public:
+    Pattern pattern = xhpp::linear;
+
+    //! constructor
+    context(const std::string XSA, const std::string xclbinName, Pattern patternin = xhpp::linear) {
+        xsaname = XSA;
+        xsaname.erase(0, xsaname.find_first_not_of(" ")); // remove space
+        xsaname.erase(xsaname.find_last_not_of(" ") + 1);
+        xclbinname = xclbinName;
+        pattern = patternin;
+    };
+
+    //! context create
+    int create() {
+        createclplatform();
+        createcldevice();
+        createclcontext();
+        createclcmdqueue();
+        loadxclbin();
+        createclprogram();
+
+#ifdef _XHPP_INFO_
+        std::cout << "INFO: Context create.\n";
+#endif
+        _contextcreated = true;
+        return 0;
+    };
+
+    //! update pattern
+    int pattern_update(Pattern patternin) { pattern = patternin; }
+
+    //! context release
+    int release() {
+        if (_contextcreated == true) {
+            releaseclprogram();
+            delete xclbin;
+            releaseclcmdqueue();
+            releaseclcontext();
+            releasecldevice();
+
+#ifdef _XHPP_INFO_
+            std::cout << "INFO: Context free.\n";
+#endif
+            _contextcreated = false;
+        };
+        return 0;
+    };
+
+    //! wait queue
+    int wait() {
+        clFinish(xqueue);
+        return 0;
+    };
+
+    //! hw info
+    int hwinfo() {
+        unsigned int platformCount = 0;
+        unsigned int platformSelect;
+        cl_platform_id platform[16] = {0};
+        unsigned int deviceCount = 0;
+        cl_device_id deviceSelect;
+        cl_device_id device[16];
+
+        cl_int err;
+
+        // platform and device
+        err = clGetPlatformIDs(0, 0, &platformCount);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::hwinfo(), clGetPlatformIDs count error.\n");
+        };
+        err = clGetPlatformIDs(16, platform, &platformCount);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::hwinfo(), clGetPlatformIDs platform error.\n");
+        };
+
+        std::cout << "INFO: Number of platforms: " << platformCount << std::endl;
+
+        char platformName[256];
+        for (unsigned int i = 0; i < platformCount; i++) {
+            // platform info
+            err = clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, 256, platformName, 0);
+
+            err = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &deviceCount);
+            if ((err != CL_SUCCESS) || (deviceCount == 0)) {
+                throw xhpp::error(err, "ERROR: xhpp::context::hwinfo(), clGetDeviceIDs count error.\n");
+            };
+
+            std::cout << "      - Platform " << i << ": " << platformName << ", number of devices: " << deviceCount
+                      << std::endl;
+
+            err = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_ACCELERATOR, deviceCount, device, NULL);
+            if (err != CL_SUCCESS) {
+                throw xhpp::error(err, "ERROR: xhpp::context::hwinfo(), clGetDeviceIDs device error.\n");
+            };
+
+            // devices in platform info
+            for (unsigned int idev = 0; idev < deviceCount; idev++) {
+                char namestr[256];
+                err = clGetDeviceInfo(device[idev], CL_DEVICE_NAME, 256, namestr, 0);
+                if (err != CL_SUCCESS) {
+                    throw xhpp::error(err, "ERROR: xhpp::context::hwinfo(), clGetDeviceInfo error.\n");
+                };
+                std::cout << "        * Device " << idev << ": " << namestr << std::endl;
+            };
+        };
+        return 0;
+    };
+
+   private:
+    //! cl platform create
+    int createclplatform() {
+        // number of platforms
+        cl_int err;
+        err = clGetPlatformIDs(0, 0, &xplatformCount);
+        if ((err != CL_SUCCESS) || (xplatformCount == 0)) {
+            throw xhpp::error(err, "ERROR: xhpp::context::createclplatform(), clGetPlatformIDs count error.\n");
+        };
+
+        // get platforms
+        err = clGetPlatformIDs(16, xplatform, &xplatformCount);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::createclplatform(), clGetPlatformIDs platform error.\n");
+        };
+
+        // select platform
+        unsigned int iplat = 0;
+        for (unsigned int i = 0; i < xplatformCount; i++) {
+            char platformName[256];
+            cl_int err = clGetPlatformInfo(xplatform[i], CL_PLATFORM_NAME, 256, platformName, 0);
+            if (strcmp(platformName, "Xilinx") != 0) {
+                iplat++;
+                continue; // skip non-Xilinx platform
+            } else {
+                xplatformSelect = i;
+#ifdef _XHPP_INFO_
+                printf("INFO: Selected Platform: %s\n", platformName);
+#endif
+                break;
+            };
+
+            if (iplat == xplatformCount) {
+                throw xhpp::error("ERROR: No Xilinx platform found.\n");
+            };
+        };
+        return 0;
+    };
+
+    //! cl device create
+    int createcldevice() {
+        cl_int err;
+        err = clGetDeviceIDs(xplatform[xplatformSelect], CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &xdeviceCount);
+        if ((err != CL_SUCCESS) || (xdeviceCount == 0)) {
+            throw xhpp::error(err, "ERROR: xhpp::context::createcldevice(), clGetDeviceIDs count error.\n");
+        };
+
+        err = clGetDeviceIDs(xplatform[xplatformSelect], CL_DEVICE_TYPE_ACCELERATOR, xdeviceCount, xdevice, NULL);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::createcldevice(), clGetDeviceIDs device error.\n");
+        };
+
+        unsigned int idev = 0;
+        for (unsigned int i = 0; i < xdeviceCount; i++) {
+            char namechar[256];
+            err = clGetDeviceInfo(xdevice[idev], CL_DEVICE_NAME, 256, namechar, 0);
+            if (err != CL_SUCCESS) {
+                throw xhpp::error(err, "ERROR: xhpp::context::createcldevice(), clGetDeviceInfo error.\n");
+            };
+
+            std::string namestr(namechar);
+            namestr.erase(0, namestr.find_first_not_of(" ")); // remove white space in string
+            namestr.erase(namestr.find_last_not_of(" ") + 1);
+
+            // check xsa
+            if (xsaname == namestr) {
+                xdeviceSelect = xdevice[idev];
+                break;
+            } else {
+                idev++;
+                continue;
+            };
+        };
+
+        // check required xsa found or not
+        if (idev == xdeviceCount) {
+            throw xhpp::error(err, "ERROR: xhpp::context::createcldevice(), required XSA not found.\n");
+        };
+        return 0;
+    };
+
+    //! release cl device
+    int releasecldevice() {
+        for (unsigned int idev = 0; idev < xdeviceCount; idev++) {
+            clReleaseDevice(xdevice[idev]);
+        };
+        return 0;
+    };
+
+    //! cl context create
+    int createclcontext() {
+        cl_int err;
+        xcontext = clCreateContext(NULL, 1, &xdeviceSelect, NULL, NULL, &err);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::createclcontext(), clCreateContext error.\n");
+        };
+#ifdef _XHPP_INFO_
+        printf("INFO: cl Context created \n");
+#endif
+        return 0;
+    };
+
+    //! release cl context
+    int releaseclcontext() {
+        cl_int err = clReleaseContext(xcontext);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::releaseclcontext(), clReleaseContext error.\n");
+        };
+        return 0;
+    };
+
+    //! cl command queue create
+    int createclcmdqueue() {
+        cl_int err;
+        xqueue = clCreateCommandQueue(xcontext, xdeviceSelect, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::createclcmdqueue(), clCreateCommandQueue error.\n");
+        };
+#ifdef _XHPP_INFO_
+        printf("INFO: cl Command Queue created \n");
+#endif
+        return 0;
+    };
+
+    //! release cl command queue
+    int releaseclcmdqueue() {
+        cl_int err = clReleaseCommandQueue(xqueue);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::releaseclcmdqueue(), clReleaseCommandQueue error.\n");
+        };
+        return 0;
+    };
+
+    //! cl program create
+    int createclprogram() {
+        cl_int err;
+        xprogram = clCreateProgramWithBinary(xcontext, 1, &xdeviceSelect, &xclbinsize, (const unsigned char**)&xclbin,
+                                             0, &err);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::createclprogram(), clCreateProgramWithBinary error.\n");
+        };
+#ifdef _XHPP_INFO_
+        printf("INFO: Program created \n");
+#endif
+        return 0;
+    };
+
+    //! release cl program
+    int releaseclprogram() {
+        int err = clReleaseProgram(xprogram);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error(err, "ERROR: xhpp::context::releasecldevice(), clReleaseProgram error.\n");
+        };
+        return 0;
+    };
+
+    //! load xclbin
+    int loadxclbin() {
+        std::ifstream stream(xclbinname, std::ifstream::binary);
+        if (!stream) {
+            throw xhpp::error("ERROR: xhpp::context::loadxclbin(), find xclbin error.\n");
+        };
+
+        stream.seekg(0, stream.end);
+        xclbinsize = stream.tellg();
+        stream.seekg(0, stream.beg);
+
+        xclbin = new char[xclbinsize + 1];
+        stream.read(xclbin, xclbinsize);
+        if (!stream) {
+            throw xhpp::error("ERROR: xhpp::context::loadxclbin(), read xclbin error.\n");
+        };
+        stream.close();
+        xclbin[xclbinsize] = 0;
+
+#ifdef _XHPP_INFO_
+        printf("INFO: xclbin load \n");
+#endif
+        return 0;
+    };
+
+}; // end of class context
+}; // end of namespace
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_enums.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_enums.hpp
new file mode 100644
index 0000000000..b8034f7d2d
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_enums.hpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef _XHPP_ENUMS_H_
+#define _XHPP_ENUMS_H_
+
+namespace xhpp {
+
+//! graph running modes
+enum Pattern { linear = 1, pipeline = 2 };
+
+//! data transfer mode enum
+enum DataTransMode { host2dev = 1, dev2host = 2 };
+};
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_error.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_error.hpp
new file mode 100644
index 0000000000..eff91db04c
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_error.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_ERROR_
+#define _XHPP_ERROR_
+
+#include <stdexcept>
+#include <string>
+
+namespace xhpp {
+
+class error : public std::runtime_error {
+    unsigned int err_code; //! error code
+
+   public:
+    error(unsigned int ec, const std::string& what = "") : std::runtime_error(what), err_code(ec){};
+
+    error(const std::string& what) : std::runtime_error(what), err_code(0){};
+};
+}
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_event.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_event.hpp
new file mode 100644
index 0000000000..dbb39d1a53
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_event.hpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef _XHPP_EVENT_
+#define _XHPP_EVENT_
+
+#include "CL/cl.h"
+#include <vector>
+
+namespace xhpp {
+//! xhpp event objects
+class event {
+   private:
+   public:
+    std::vector<cl_event> evtvec;
+
+    event(size_t n) : evtvec(n){};
+
+    event(){};
+
+    size_t size() { return evtvec.size(); };
+
+    void resize(size_t n) { return evtvec.resize(n); };
+
+    cl_event* data() {
+        if (evtvec.size() == 0)
+            return nullptr;
+        else
+            return evtvec.data();
+    }
+
+    cl_event& operator[](size_t n) { return evtvec[n]; }
+};
+};
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_graph.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_graph.hpp
new file mode 100644
index 0000000000..fbcce2ba05
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_graph.hpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_GRAPH_
+#define _XHPP_GRAPH_
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+
+#include "xhpp_taskbase.hpp"
+
+namespace xhpp {
+
+//! node type enum
+enum NodeTYPE { middle = 0, start = 1, end = 2 };
+
+//! Graph object
+class graph {
+    //  private:
+   public:
+    bool issetup = false;
+
+    //! number of nodes
+    unsigned int numnodes = 0;
+
+    //! number of edges
+    unsigned int numedges = 0;
+
+    //! node list (node, name, type)
+    std::vector<xhpp::task::base*> nodes;
+    std::vector<std::string> nodesname;
+    std::vector<NodeTYPE> nodestype;
+
+    //! edge list
+    std::vector<std::pair<unsigned int, unsigned int> > edges;
+
+    //! father event list
+    std::vector<std::vector<unsigned int> > fatherevtlists;
+
+    //! child event list
+    std::vector<std::vector<unsigned int> > childevtlists;
+
+   private:
+    //! father events (events that this one depends on)
+    std::vector<unsigned int> FatherEventList(const unsigned int nd) {
+        std::vector<unsigned int> fatherevt;
+        for (auto s = edges.begin(); s < edges.end(); s++) {
+            if (s->second == nd) {
+                fatherevt.push_back(s->first);
+            };
+        };
+        return fatherevt;
+    };
+
+    //! child events (events depending on this)
+    std::vector<unsigned int> ChildEventList(const unsigned int nd) {
+        std::vector<unsigned int> childevt;
+        for (auto s = edges.begin(); s < edges.end(); s++) {
+            if (s->first == nd) {
+                childevt.push_back(s->second);
+            };
+        };
+        return childevt;
+    };
+
+   public:
+    //! add node to graph
+    template <class TASK>
+    int addnode(TASK* node, std::string name, NodeTYPE nt = xhpp::middle) {
+        nodes.push_back(node);
+        nodesname.push_back(name);
+        nodestype.push_back(nt);
+        numnodes += 1;
+        return 0;
+    };
+
+    //! add edge to graph
+    int addedge(std::string node_s, std::string node_e) {
+        auto its = std::find(nodesname.begin(), nodesname.end(), node_s);
+        auto ite = std::find(nodesname.begin(), nodesname.end(), node_e);
+
+        if (its != nodesname.end() && ite != nodesname.end()) {
+            auto idxs = std::distance(nodesname.begin(), its);
+            auto idxe = std::distance(nodesname.begin(), ite);
+            edges.push_back(std::pair<unsigned int, unsigned int>(idxs, idxe));
+        } else {
+            throw xhpp::error("ERROR: xhpp::graph::addedge(), node does not existe.\n");
+        };
+        numedges += 1;
+        return 0;
+    };
+
+    //! setup graph
+    int setup() {
+        for (unsigned int i = 0; i < numnodes; i++) {
+            std::vector<unsigned int> fevt = FatherEventList(i);
+            std::vector<unsigned int> cevt = ChildEventList(i);
+            fatherevtlists.push_back(fevt);
+            childevtlists.push_back(cevt);
+        };
+        issetup = true;
+        return 0;
+    };
+
+    //! release graph
+    int release() {
+        nodes.resize(0);
+        nodesname.resize(0);
+        nodestype.resize(0);
+        edges.resize(0);
+        for (unsigned int i = 0; i < numnodes; i++) {
+            fatherevtlists[i].resize(0);
+            childevtlists[i].resize(0);
+        };
+        fatherevtlists.resize(0);
+        childevtlists.resize(0);
+        numnodes = 0;
+        numedges = 0;
+        issetup = false;
+    };
+
+    //! return number of nodes
+    unsigned int GetNumNodes() { return numnodes; };
+
+    //! return number of edges
+    unsigned int GetNumEdges() { return numedges; };
+
+    //! Get father events (events that this one depends on)
+    std::vector<unsigned int> GetFatherEvent(const unsigned int nd) {
+        if (issetup == false) {
+            throw xhpp::error("ERROR: xhpp::graph::GetFatherEvent(), graph is not setup.\n");
+        };
+        return fatherevtlists[nd];
+    };
+
+    //! Get child events (events depending on this)
+    std::vector<unsigned int> GetChildEvent(const unsigned int nd) {
+        if (issetup == false) {
+            throw xhpp::error("ERROR: xhpp::graph::GetChildEvent(), graph is not setup.\n");
+        };
+        return childevtlists[nd];
+    };
+
+    int removenode(){
+        // TODO
+    };
+
+    int removeedge(){
+        // TODO
+    };
+
+    int check(){
+        // TODO
+    };
+
+    int debug(){
+        // std::cout << " ** DEBUG INFO ** " << std::endl;
+        // nodes[0]->run();
+        // nodes[1]->run();
+        // nodes[2]->run();
+        // std::cout << nodes.size() << std::endl;
+        // std::cout << edges[0].first << " " << edges[0].second << std::endl;
+        // std::cout << edges[1].first << " " << edges[1].second << std::endl;
+    };
+
+    void drawgraph() {
+        std::cout << "drawing the graph..." << std::endl;
+        std::ofstream myfile;
+        myfile.open("gr.dot");
+        myfile << "digraph g { \n";
+        for (auto s = edges.begin(); s != edges.end(); s++) {
+            auto node1st = nodesname.begin();
+            auto node2nd = nodesname.begin();
+            std::advance(node1st, s->first);
+            std::advance(node2nd, s->second);
+            std::cout << *node1st << " -> " << *node2nd << std::endl;
+            myfile << "    " << *node1st << "->" << *node2nd << "\n";
+        }
+        myfile << "}\n";
+    };
+
+}; // end of class graph
+}; // end of namesapce
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_scheduler.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_scheduler.hpp
new file mode 100644
index 0000000000..325321aad6
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_scheduler.hpp
@@ -0,0 +1,261 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2019, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_SCHEDULER_
+#define _XHPP_SCHEDULER_
+
+#include "xhpp_event.hpp"
+#include "xhpp_graph.hpp"
+#include "xhpp_context.hpp"
+#include "xhpp_taskbase.hpp"
+#include "xhpp_enums.hpp"
+#include <string>
+#include <sys/time.h>
+
+namespace xhpp {
+
+//! scheduler
+class engine {
+   private:
+    //! list of event
+    std::vector<xhpp::event> evt;
+
+    //! graph
+    xhpp::graph* xgr;
+
+    //! context
+    xhpp::context* xctx;
+
+    //!
+    unsigned int NumSubmit = 0;
+
+    //! num of resource = body + shadowsize
+    unsigned int NumDepth = 1;
+
+    //! event to start the first host func task.
+    cl_event ent0;
+
+    //! global counter to indicate whether to use buffer or shadow buffer
+    int shadow_rc = 0;
+
+    //! to record the num of calling data_inout API.
+    int inout_rc = 0; // TODO: same as numsubmit? If so, delete one.
+
+   public:
+    // ------------------------------------------------------------
+
+    int tvdiff(struct timeval* tv0, struct timeval* tv1) {
+        return (tv1->tv_sec - tv0->tv_sec) * 1000000 + (tv1->tv_usec - tv0->tv_usec);
+    }
+
+    // ------------------------------------------------------------
+
+    engine(xhpp::context* ctx, xhpp::graph* gr) {
+        xctx = ctx;
+        xgr = gr;
+        evt.resize(gr->GetNumNodes());
+    };
+
+    //! get the max number of CU between all nodes
+    int getnumcu() {
+        int maxcu = 0;
+        for (int i = 0; i < xgr->GetNumNodes(); i++) {
+            int numcu = (xgr->nodes)[i]->numofcu();
+            if (maxcu < numcu) {
+                maxcu = numcu;
+            }
+        }
+        return maxcu;
+    }
+
+    //! setup the scheduler, allocate the body and shadow resources
+    int setup() {
+        int maxcu = getnumcu();
+        if ((xctx->pattern) == xhpp::linear) { // linear mode
+            std::cout << "INFO: Engine is setup with linear mode." << std::endl;
+            NumDepth = maxcu;
+            std::cout << "NumDepth = " << NumDepth << std::endl;
+            for (int i = 0; i < xgr->GetNumNodes(); i++) {
+                (xgr->nodes)[i]->setupbodyshadow(maxcu); // TODO: allocate only necessary buffers
+            }
+        } else { // pipeline mode
+            std::cout << "INFO: Engine is setup with pipeline mode." << std::endl;
+            NumDepth = 2 * maxcu;
+            std::cout << "NumDepth = " << NumDepth << std::endl;
+            for (int i = 0; i < xgr->GetNumNodes(); i++) {
+                (xgr->nodes)[i]->setupbodyshadow(NumDepth); // TODO: allocate only necessary buffers
+                std::cout << "passe " << i << std::endl;
+            }
+        }
+    };
+
+    //! submit each node in graph
+    int submitnode(unsigned int nd) {
+        std::vector<unsigned int> fatherevt = xgr->GetFatherEvent(nd); // father events
+        std::vector<unsigned int> childevt = xgr->GetChildEvent(nd);   // child events
+        xhpp::event waitevt(0);
+        // events need to be wait
+        int numfatherevt = fatherevt.size();
+        int numchildevt;
+        if (NumSubmit < NumDepth) {
+            numchildevt = 0;
+        } else {
+            numchildevt = childevt.size();
+        }
+        int numwaitevt = numfatherevt + numchildevt;
+
+        waitevt.resize(numwaitevt);
+
+        for (int i = 0; i < numfatherevt; i++) {
+            waitevt[i] = evt[NumSubmit][fatherevt[i]];
+        };
+        for (int i = 0; i < numchildevt; i++) {
+            waitevt[i + numfatherevt] = evt[NumSubmit - NumDepth][childevt[i]];
+        };
+
+        // output event
+        xhpp::event outevt(1);
+
+        int idxin, idxout;
+        int idxrun = 0;
+        int nt = (xgr->nodestype)[nd];
+        if (nt == xhpp::start) {
+            idxin = 0;
+            idxout = NumSubmit % NumDepth;
+        } else if (nt == xhpp::end) {
+            idxin = NumSubmit % NumDepth;
+            idxout = 0;
+        } else {
+            idxin = NumSubmit % NumDepth;
+            idxout = NumSubmit % NumDepth;
+        }
+        idxrun = NumSubmit % NumDepth;
+
+// submit
+// idxin, idxout are introduced to present the data input/output buffer index: the body buffer or shadow buffer.
+// idxrun refers to which kenel is used, the body or the shadow (and which shadow kernel.)
+#ifdef _XHPP_TIMING_
+        struct timeval st_time, end_time;
+        xctx->wait();
+        gettimeofday(&st_time, 0);
+        for (int kk = 0; kk < 6; kk++) {
+#endif
+            (xgr->nodes)[nd]->submit(&waitevt, &outevt, idxin, idxout, idxrun); // idxrun = numsubmit??
+#ifdef _XHPP_TIMING_
+            xctx->wait();
+        }
+        gettimeofday(&end_time, 0);
+        int exec_time = tvdiff(&st_time, &end_time);
+        std::cout << "Execution time of task:  " << nd << " is " << exec_time / 6 << " us." << std::endl;
+#endif
+        evt[NumSubmit][nd] = outevt[0]; // comment: pointer of pointer
+        return 0;
+    };
+
+    //! submit graph by submitting each node.
+    int submitgraph() {
+        // std::cout <<"------------- submitting graph -------------" << std::endl;
+        evt.resize(NumSubmit + 1);
+        evt[NumSubmit].resize(xgr->GetNumNodes());
+        for (int i = 0; i < xgr->GetNumNodes(); i++) {
+            submitnode(i);
+            // xctx->wait();
+            std::cout << "node: " << i << " passed" << std::endl;
+        }
+        if ((xctx->pattern) == xhpp::linear) { // linear mode
+            xctx->wait();
+        }
+    }
+
+    //! for the interface task, input data
+    template <typename TASK, typename PARAM>
+    int data_input(TASK* node, PARAM* param, int order) {
+        node->updateparam(inout_rc, order, param);
+    };
+    //! for the interface task, input data
+    template <typename TASK, typename PARAM>
+    int data_input(TASK* node, PARAM param, int order) {
+        node->updateparam(inout_rc, order, param);
+    };
+    //! for the interface task, output data
+    template <typename TASK, typename PARAM>
+    int data_output(TASK* node, PARAM* param, int order) {
+        node->updateparam(inout_rc, order, param);
+    };
+    //! for the interface task, output data
+    template <typename TASK, typename PARAM>
+    int data_output(TASK* node, PARAM param, int order) {
+        node->updateparam(inout_rc, order, param);
+    };
+    //! the last run
+    template <typename TASK, typename PARAM>
+    int run(std::string inorout, TASK* node, PARAM* param, int order) {
+        if (inorout == "input") {
+            data_input(node, param, order);
+        } else if (inorout == "output") {
+            data_output(node, param, order);
+        };
+        submitgraph();
+        NumSubmit++;
+        inout_rc = (inout_rc + 1) % NumDepth;
+        return 0;
+    };
+    //! the last run
+    template <typename TASK, typename PARAM>
+    int run(std::string inorout, TASK* node, PARAM param, int order) {
+        if (inorout == "input") {
+            data_input(node, param, order);
+        } else if (inorout == "output") {
+            data_output(node, param, order);
+        };
+        submitgraph();
+        NumSubmit++;
+        inout_rc = (inout_rc + 1) % NumDepth;
+        return 0;
+    };
+    //! the top API of data input and output
+    template <typename TASK, typename PARAM, typename... __Args>
+    int run(std::string inorout, TASK* node, PARAM* param, int order, __Args... __args) {
+        if (inorout == "input") {
+            data_input(node, param, order);
+        } else if (inorout == "output") {
+            data_output(node, param, order);
+        }
+        run(__args...);
+    };
+    //! the top API of data input and output
+    template <typename TASK, typename PARAM, typename... __Args>
+    int run(std::string inorout, TASK* node, PARAM param, int order, __Args... __args) {
+        if (inorout == "input") {
+            data_input(node, param, order);
+        } else if (inorout == "output") {
+            data_output(node, param, order);
+        }
+        run(__args...);
+    };
+
+}; // end of class
+
+}; // end of namespace
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_taskbase.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_taskbase.hpp
new file mode 100644
index 0000000000..3180b90e23
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_taskbase.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_BASETASK_
+#define _XHPP_BASETASK_
+
+#include "xhpp_event.hpp"
+#include "xhpp_context.hpp"
+#include "xhpp_enums.hpp"
+
+namespace xhpp {
+namespace task {
+
+//! base class of task objects
+class base {
+   public:
+    xhpp::context* xctx;
+
+    //! constructor
+    base(xhpp::context* ctx) { xctx = ctx; }
+
+    //! deconstructor
+    ~base(){};
+
+    //! number of CUs
+    virtual int numofcu() = 0;
+
+    //! setup body and shadow
+    virtual int setupbodyshadow(unsigned int) = 0;
+
+    //! release
+    virtual int release() = 0;
+
+    //! submit (non-blocking)
+    virtual int submit(
+        xhpp::event* waitevt, xhpp::event* outevt, const int rcin = 0, const int rcout = 0, const int rcrun = 0) = 0;
+
+    //! run (blocking submit)
+    virtual int run(const int rcin = 0, const int rcout = 0, const int rcrun = 0) = 0;
+};
+
+}; // end of namespace task
+}; // end of namespace xhpp
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_taskhost.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_taskhost.hpp
new file mode 100644
index 0000000000..fb51b41261
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_taskhost.hpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_TASK_
+#define _XHPP_TASK_
+#include <iostream>
+#include <functional>
+#include <memory>
+#include <tuple>
+#include "taskbase.hpp"
+namespace xhpp {
+namespace task {
+//! base class of task_impl class
+class task_impl_base {
+   public:
+    ~task_impl_base(){};
+    virtual void task_impl_run() = 0;
+};
+
+//! task_impl class, which is used to achieve running the loaded host tasks.
+template <typename _Callable>
+class task_impl : public task_impl_base {
+   public:
+    _Callable task_impl_func;
+    task_impl(_Callable&& __f) : task_impl_func(std::forward<_Callable>(__f)){};
+    void task_impl_run() { task_impl_func(); }
+};
+class host_func : public base {
+   private:
+    typedef std::shared_ptr<task_impl_base> task_shared_ptr;
+    task_shared_ptr tptr;
+    cl_int err;
+
+    //! task pointer creation
+    template <typename _Callable>
+    std::shared_ptr<task_impl<_Callable> > taskptr(_Callable&& __f) {
+        return std::make_shared<task_impl<_Callable> >(std::forward<_Callable>(__f));
+    }
+    cl_event evt_new;
+
+   public:
+    std::string type = "host_task";
+
+    //! number of CUs
+    int _numcu = 1;
+    int numofcu() { return _numcu; }
+
+    /*      unsigned int _shadowsize = 0;
+          //! update _shadowsize for kernel task
+          int updateshadowsize(const Pattern pattern){
+    //TODO
+              return _shadowsize;
+          }
+          */
+
+    //! constructor
+    host_func(xhpp::context* ctx) : base(ctx){};
+    //! assign/copy host task tsk1==> tsk2
+    //...
+    //! load host function to host_task
+    template <typename _Callable, typename... _Args>
+    int setup(_Callable&& __f, _Args&&... __args) {
+        this->tptr = taskptr(std::__bind_simple(std::forward<_Callable>(__f), std::forward<_Args>(__args)...));
+        std::cout << "assinged new host_task" << std::endl;
+        return 0;
+    }
+    int setarg() {}
+    //! lauch the task
+    int run() {
+        std::cout << std::endl << "------running the callback task-------" << std::endl;
+        tptr->task_impl_run();
+        cl_int err = clSetUserEventStatus(evt_new, CL_COMPLETE);
+        if (err != CL_SUCCESS) {
+            std::cout << "set user event failed" << std::endl;
+        }
+        return 0;
+    }
+    //! release
+    int release() {
+        std::cout << "releasing the task" << std::endl;
+        return 0;
+    }
+    ~host_func() {}
+
+    static void callback_run(cl_event event, cl_int status, void* data) {
+        auto self = reinterpret_cast<host_func*>(data);
+        self->run();
+    }
+
+    int setupshadow(unsigned int){};
+
+    /*      cl_event submit_e (xhpp::context* ctx, cl_event event){
+            cl_int err = clSetEventCallback(event, CL_COMPLETE, callback_run, this);
+            if(err == CL_SUCCESS){
+              std::cout << "task submitted successfully ..." << std::endl;
+            }
+            evt_new = clCreateUserEvent(ctx->xcontext, &err);
+            return evt_new;
+            }
+    */
+    int submit(xhpp::context* ctx, xhpp::event* waitevt, xhpp::event* evt, const int rcin, const int rcout) {
+        evt_new = clCreateUserEvent(ctx->xcontext, &err);
+        if (err != CL_SUCCESS) {
+            std::cout << "failed to create host task event" << std::endl;
+        }
+        evt->data()[0] = evt_new;
+        err = clSetEventCallback(waitevt->data()[0], CL_COMPLETE, callback_run, this);
+        if (err == CL_SUCCESS) {
+            std::cout << "task submitted successfully ..." << std::endl;
+            return 0;
+        } else {
+            return 1;
+        }
+    };
+};
+
+} // task
+} // xhpp
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_taskkernel.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_taskkernel.hpp
new file mode 100644
index 0000000000..b5c9908311
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_taskkernel.hpp
@@ -0,0 +1,352 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_KERNELTASK_
+#define _XHPP_KERNELTASK_
+
+#include <iostream>
+#include <string>
+#include <functional>
+#include <memory>
+#include <tuple>
+
+#include "xhpp_taskbase.hpp"
+#include "xhpp_context.hpp"
+#include "xhpp_bufferdevice.hpp"
+#include "xhpp_enums.hpp"
+
+// #define _XHPP_DEBUG_
+
+namespace xhpp {
+namespace task {
+
+//! kernel function class
+class dev_func : public base {
+   private:
+    // cl_kernel mkernel;
+    std::vector<cl_kernel> mkernels;
+
+    //! param list
+    class dev_func_arglist {
+       public:
+        std::vector<size_t> argsize; // sizeof(arg)
+        std::vector<void*> argvalue;
+        std::vector<xhpp::buffer::base*> devbuffer;
+        std::vector<int> startendmark; // when the input arg is starting or ending device vbuffer, mark as 1
+    } arglist;
+
+    //! number of CUs
+    int _numcu = 0;
+    int numofcu() { return _numcu; }
+
+    //! kernel CU name
+    std::vector<std::string> _cuname;
+    std::vector<int> _cubank;
+    //! kernel params: number of device buffer type
+    int db_n = 0; // vadd: db_n = 3
+
+    bool _cuadded = false;
+    bool _paramset = false;
+
+    bool _bodyallocated = false;
+    bool _bodyshadowallocated = false;
+
+    unsigned int _shadowsize = 0;
+
+    //! update _shadowsize for kernel task
+    // for lienar mode, shadowsize=0,
+    // pipeline mode, shadowsize=1
+    int updateshadowsize(const Pattern pattern) {
+        if (pattern == pipeline) {
+            _shadowsize = 1;
+        }
+        return _shadowsize;
+    }
+
+    //! device buffer type arg index
+    int argidx_db = 0;
+
+    //! setup one kernel arg with dataype T, the params other than dbuf args.
+    template <typename T>
+    int set_single_arg(T& value) {
+        // push to arg list
+        arglist.argsize.push_back(sizeof(T));
+        arglist.startendmark.push_back(0);
+        for (int i = 0; i < _numcu * (_shadowsize + 1); i++) {
+            arglist.argvalue.push_back((void*)(&value));
+        }
+    };
+
+    //! setup one kernel arg with dataype cl_mem, the device buffer params
+    template <typename T>
+    int set_single_arg(xhpp::vbuffer::device<T>& value) {
+        // push to arg list
+        arglist.argsize.push_back(sizeof(cl_mem));
+        if (value._vbuffer_startorend == false) {
+            arglist.startendmark.push_back(0);
+        } else {
+            arglist.startendmark.push_back(1);
+        }
+        value.xmems.resize(_numcu * (_shadowsize + 1));
+        for (int i = 0; i < _numcu * (_shadowsize + 1); i++) {
+            arglist.argvalue.push_back((void*)(&(value.xmems[i])));
+        }
+        arglist.devbuffer.push_back(&value);
+
+        // bankinfo is added to the devicebuffer
+        for (int i = 0; i < _numcu; i++) {
+            value.bankinfo.push_back(_cubank[argidx_db + i * db_n]);
+#ifdef _XHPP_DEBUG_
+            std::cout << "bank info is " << _cubank[argidx_db + i * db_n] << std::endl;
+#endif
+        }
+        argidx_db++;
+        return 0;
+    };
+
+    //! setup the last kernel arg
+    template <typename T>
+    int setargs(T& arg1) {
+        set_single_arg(arg1);
+        return 0;
+    };
+
+    //! setup kernel args recursively
+    template <typename T, typename... U>
+    int setargs(T& arg1, U&... __args) {
+        set_single_arg(arg1);
+        setargs(__args...);
+        return 0;
+    };
+
+    //! allocate cl_kernel and set cl args
+    int allocatesetclarg(int ncu, int nsh) {
+        cl_int err;
+        int n = ncu * (_shadowsize + 1) + nsh;
+
+        std::cout << "n= " << n << std::endl;
+        // create
+        mkernels[n] = clCreateKernel(xctx->xprogram, _cuname[ncu].c_str(), &err);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error("ERROR: xhpp::task::dev_func::allocatesetclarg(), clCreateKernel error.\n");
+        };
+
+        // set args
+        int argid = 0;
+        int nparam = arglist.argsize.size();
+        for (int s = 0; s < nparam; s++) {
+            size_t ss = (arglist.argsize)[s];
+            int nv = s * (_numcu * (_shadowsize + 1)) + n;
+            void* vv = (arglist.argvalue)[nv];
+            if ((arglist.startendmark)[s] == 0) {
+                int err = clSetKernelArg(mkernels[n], argid++, ss, vv);
+                if (err != CL_SUCCESS) {
+                    throw xhpp::error("ERROR: xhpp::task::dev_func::allocatesetclarg(), clSetKernelArg error.\n");
+                };
+            } else {
+                std::cout << "not setting arg for " << s << std::endl;
+                argid++;
+            }
+        };
+        std::cout << "---------" << std::endl;
+        return 0;
+    };
+
+   public:
+    //! constructor
+    dev_func(xhpp::context* ctx) : base(ctx){};
+
+    //! deconstructor
+    ~dev_func(){};
+
+    //! add CU
+    int addcu(std::string cuname, int param_num, int* bank) {
+        _numcu += 1;
+        _cuname.push_back(cuname);
+        db_n = param_num;
+        for (int i = 0; i < param_num; i++) {
+            _cubank.push_back(bank[i]);
+        }
+        _cuadded = true;
+        return 0;
+    };
+
+    //! set parameters
+    template <typename... _Args>
+    int setparam(_Args&... __args) {
+        updateshadowsize(xctx->pattern);
+        mkernels.resize(_numcu * (_shadowsize + 1));
+        std::cout << "_shadowsize = " << _shadowsize << std::endl;
+        int res = setargs(__args...); // set args
+        _paramset = true;
+        return res;
+    };
+
+    // //! setup the kernel
+    // template<typename... _Args>
+    // int setup(std::string krn_name, _Args & ...  __args){
+    //   krnname = krn_name;
+    //   int res = setparam(__args...);
+    //   return 0;
+    // };
+
+    //! setup kernel (body)
+    int setupbody() {
+        if (_bodyallocated == false) {
+            // buffer
+            for (int s = 0; s < arglist.devbuffer.size(); s++) {
+                (arglist.devbuffer)[s]->bodyallocate(_cubank[0]);
+            };
+            // kernel obj
+            allocatesetclarg(0, 0);
+            _bodyallocated = true;
+        };
+        return 0;
+    };
+
+    //! setup kernel (body and shadow)
+    int setupbodyshadow(unsigned int _nsize) {
+        if (_bodyshadowallocated == false) {
+            // buffer
+            for (int s = 0; s < arglist.devbuffer.size(); s++) {
+                if ((arglist.devbuffer)[s]->startingendingallocate() == false) {
+                    (arglist.devbuffer)[s]->bodyshadowallocate(_nsize);
+                } else {
+                    std::cout << "not allocate buffer for starting or ending vbuffer" << std::endl;
+                }
+            };
+            // kernel obj
+            // std::cout <<"_numcu and _shadowsize is " <<_numcu <<"," <<_shadowsize <<std::endl;
+            for (int i = 0; i < _numcu; i++) {
+                for (int j = 0; j < (_shadowsize + 1); j++) {
+                    allocatesetclarg(i, j);
+                };
+            };
+            _bodyshadowallocated = true;
+        }
+        return 0;
+    };
+
+    //! update parameters, scalar
+    template <class T>
+    int updateparam(const int rcrun, const int nparam, T param) {
+        cl_int err = clSetKernelArg(mkernels[rcrun], nparam, sizeof(T), &param);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error("ERROR: xhpp::task::dev_func::updateparam, clSetKernelArg error.\n");
+        };
+    }
+
+    //! update parameters, virtual device buffer
+    template <class T>
+    int updateparam(const int rcrun, const int nparam, xhpp::vbuffer::device<T>*& param) {
+        cl_int err = clSetKernelArg(mkernels[rcrun], nparam, sizeof(cl_mem), &(param.xmems[0]));
+        if (err != CL_SUCCESS) {
+            throw xhpp::error("ERROR: xhpp::task::dev_func::updateparam, clSetKernelArg error.\n");
+        };
+    };
+
+    //! update parameters, device buffer
+    template <class T>
+    int updateparam(const int rcrun, const int nparam, xhpp::buffer::device<T>*& param) {
+        cl_int err = clSetKernelArg(mkernels[rcrun], nparam, sizeof(cl_mem), &(param->xmems[0]));
+        if (err != CL_SUCCESS) {
+            throw xhpp::error("ERROR: xhpp::task::dev_func::updateparam, clSetKernelArg error.\n");
+        };
+    };
+
+    //! update parameters, virtual host buffer error out
+    template <class T>
+    int updateparam(const int rcrun, const int nparam, xhpp::vbuffer::host<T>*& param) {
+        throw xhpp::error("ERROR: xhpp::task::dev_func::updateparam, input should not be virtual host buffer.\n");
+    }
+
+    //! update parameters, host buffer error out
+    template <class T>
+    int updateparam(const int rcrun, const int nparam, xhpp::buffer::host<T>*& param) {
+        throw xhpp::error("ERROR: xhpp::task::dev_func::updateparam, input should not be host buffer.\n");
+    }
+
+    //! submit a kernel task
+    int submit(
+        xhpp::event* waitevt, xhpp::event* outevt, const int rcin = 0, const int rcout = 0, const int rcrun = 0) {
+        size_t globalsize[] = {1, 1, 1};
+        size_t localsize[] = {1, 1, 1};
+        cl_int err;
+        std::cout << "before kernel" << std::endl;
+        err = clEnqueueNDRangeKernel(xctx->xqueue, mkernels[rcrun], 1, NULL, globalsize, localsize, waitevt->size(),
+                                     waitevt->data(), outevt->data());
+        std::cout << "kernel end" << std::endl;
+        if (err != CL_SUCCESS) {
+            throw xhpp::error("ERROR: xhpp::task::dev_func::submit(), kernel launch error.\n");
+        };
+        return 0;
+    };
+
+    //! blocking run
+    int run(const int rcin = 0, const int rcout = 0, const int rcrun = 0) {
+        size_t globalsize[] = {1, 1, 1};
+        size_t localsize[] = {1, 1, 1};
+        cl_int err;
+        err = clEnqueueNDRangeKernel(xctx->xqueue, mkernels[rcrun], 1, NULL, globalsize, localsize, 0, NULL, NULL);
+        if (err != CL_SUCCESS) {
+            throw xhpp::error("ERROR: xhpp::task::dev_func::run(), kernel launch error.\n");
+        };
+        xctx->wait();
+        return 0;
+    };
+
+    //! release task
+    int release() {
+        _numcu = 0;
+        _cuname.resize(0);
+        _cuadded = false;
+        _paramset = false;
+        if (_bodyshadowallocated == true) {
+            for (int s = 0; s < arglist.devbuffer.size(); s++) { // buffer
+                (arglist.devbuffer)[s]->bodyshadowrelease();
+            };
+
+            if (_bodyallocated = true) {
+                clReleaseKernel(mkernels[0]);
+                _bodyallocated = false;
+            }
+            for (int i = 1; i < _numcu * (_shadowsize + 1); i++) {
+                clReleaseKernel(mkernels[i]);
+            };
+            _bodyshadowallocated = false;
+            _bodyallocated = false;
+        } else if (_bodyallocated = true) {
+            clReleaseKernel(mkernels[0]);
+            for (int s = 0; s < arglist.devbuffer.size(); s++) { // buffer
+                (arglist.devbuffer)[s]->bodyrelease();
+            };
+            _bodyallocated = false;
+        }
+        return 0;
+    };
+};
+
+}; // end of namespace task
+}; // end of namespace xhpp
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/host/xhpp/xhpp_tasktransfer.hpp b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_tasktransfer.hpp
new file mode 100644
index 0000000000..fcfac23dfc
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/host/xhpp/xhpp_tasktransfer.hpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**********
+           Copyright (c) 2018, Xilinx, Inc.
+           All rights reserved.
+
+           TODO
+
+**********/
+
+#ifndef _XHPP_TASKTRANSFER_
+#define _XHPP_TASKTRANSFER_
+
+#include <iostream>
+#include <string>
+#include <functional>
+#include <memory>
+#include <tuple>
+
+#include "xhpp_event.hpp"
+#include "xhpp_context.hpp"
+#include "xhpp_bufferhost.hpp"
+#include "xhpp_bufferdevice.hpp"
+#include "xhpp_taskbase.hpp"
+#include "xhpp_enums.hpp"
+
+namespace xhpp {
+
+namespace task {
+
+//! base class to save data of setup()
+class tran_impl_base {
+   public:
+    ~tran_impl_base(){};
+
+    virtual int setupbody() = 0;
+
+    virtual int setupbodyshadow(unsigned int) = 0;
+
+    virtual int releasebody() = 0;
+    virtual int releasebodyshadow() = 0;
+
+    //! non-blocking data transfer
+    virtual int tran_impl_run(
+        const DataTransMode tmode, xhpp::event* waitevt, xhpp::event* outevt, const int rcin, const int rcout) = 0;
+
+    //! blocking data transfer
+    virtual int tran_impl_run(const DataTransMode tmode, const int rcin, const int rcout) = 0;
+
+    virtual int updateparam(const int n, void* param) = 0;
+
+    virtual int release() = 0;
+
+    // virtual void watch() = 0;
+};
+
+//! tran_impl calss to save data of setparam
+template <typename _BUF_H, typename _BUF_D>
+class tran_impl : public tran_impl_base {
+   private:
+    _BUF_H* hbptr;
+    _BUF_D* dbptr;
+
+    //! execute non-blocking data transfer
+    void tran_impl_func(_BUF_H* hbuf,
+                        _BUF_D* dbuf,
+                        const DataTransMode tmode,
+                        xhpp::event* waitevt,
+                        xhpp::event* outevt,
+                        const int rcin,
+                        const int rcout) {
+        if (tmode == host2dev) {
+            dbuf->copy_from_host(hbuf, waitevt, outevt, rcin, rcout);
+        } else if (tmode == dev2host) {
+            dbuf->copy_to_host(hbuf, waitevt, outevt, rcin, rcout);
+        };
+    };
+
+    //! execute blocking data transfer
+    void tran_impl_func(_BUF_H* hbuf, _BUF_D* dbuf, const DataTransMode tmode, const int rcin, const int rcout) {
+        if (tmode == host2dev) {
+            dbuf->copy_from_host(hbuf, rcin, rcout);
+        } else if (tmode == dev2host) {
+            dbuf->copy_to_host(hbuf, rcout, rcin);
+        };
+    };
+
+   public:
+    //! constructor
+    tran_impl(_BUF_H* _hbuf, _BUF_D* _dbuf) : hbptr(_hbuf), dbptr(_dbuf){};
+
+    //! deconstructor
+    ~tran_impl(){};
+
+    //! setup/allocate (body) buffer
+    inline int setupbody() {
+        hbptr->bodyallocate();
+        dbptr->bodyallocate();
+        return 0;
+    };
+
+    //! setup/allocate (shadow) buffer
+    inline int setupbodyshadow(unsigned int nsize) {
+        hbptr->bodyshadowallocate(nsize);
+        dbptr->bodyshadowallocate(nsize);
+        return 0;
+    };
+
+    //! release (body) buffer
+    inline int releasebody() {
+        hbptr->bodyrelease();
+        dbptr->bodyrelease();
+        return 0;
+    };
+
+    //! setup/allocate (body and shadow) buffer
+    inline int releasebodyshadow() {
+        hbptr->bodyshadowrelease();
+        dbptr->bodyshadowrelease();
+        return 0;
+    };
+
+    //! execute non-blocking data transfer
+    inline int tran_impl_run(
+        const DataTransMode tmode, xhpp::event* waitevt, xhpp::event* outevt, const int rcin, const int rcout) {
+        tran_impl_func(hbptr, dbptr, tmode, waitevt, outevt, rcin, rcout);
+    };
+
+    //! execute blocking data transfer
+    inline int tran_impl_run(const DataTransMode tmode, const int rcin, const int rcout) {
+        tran_impl_func(hbptr, dbptr, tmode, rcin, rcout);
+    };
+
+    //! update parameters
+    inline int updateparam(const int n, void* buf) {
+        if (n == 0) {
+            hbptr = (_BUF_H*)buf;
+        } else if (n == 1) {
+            dbptr = (_BUF_D*)buf;
+        } else {
+            throw xhpp::error("ERROR: xhpp::task::data_transfer::updateparam(), first parameter is out of range.\n");
+        };
+    };
+
+    //! release
+    int release(){};
+
+}; // end of class
+
+//! data transfer task class
+class data_transfer : public base {
+   private:
+    DataTransMode tmode; //! mode
+    tran_impl_base* tranptr;
+
+    //! setup data transfer task
+    template <typename BUF_H, typename BUF_D>
+    inline int setup(BUF_H* hb, BUF_D* db) {
+        tranptr = new tran_impl<BUF_H, BUF_D>(hb, db);
+        return 0;
+    };
+
+   public:
+    //! constructor
+    data_transfer(xhpp::context* ctx, DataTransMode mode) : base(ctx) { tmode = mode; };
+
+    //! deconstructor
+    ~data_transfer(){};
+
+    //! number of CUs
+    int _numcu = 1;
+    int numofcu() { return _numcu; }
+
+    // TODO: event/dep is not added
+
+    //! set parameters of transfer task
+    template <typename BUF_H, typename BUF_D>
+    inline int setparam(BUF_H* hb, BUF_D* db) {
+        return setup(hb, db);
+    };
+
+    //! update parameters, error out scalar
+    template <class T>
+    int updateparam(const int nparam, T& param, const int rcrun) {
+        throw xhpp::error("ERROR: xhpp::task::data_transfer::updateparam, input should not be scalar.\n");
+        return 1;
+    }
+
+    //! update virtual host buffer parameters
+    // For data transfer, the process is hb==>db[0]/db[1].
+    // updateparam() only responds for update the param for the user side. aka. replace hb by in[i].
+    // No hb[0] hb[1] is required. And for the replacement, no idxin/idxout/idxrun is required.
+    // The db[0]/db[1]/db[mcu] index is updated in the node submit part.
+    // PS: in reality, only host buffer are updated by the user.
+    template <class T>
+    int updateparam(const int rcrun, const int nparam, xhpp::vbuffer::host<T>* param) {
+        return tranptr->updateparam(0, param);
+    };
+
+    //! update host buffer parameters
+    template <class T>
+    int updateparam(const int rcrun, const int nparam, xhpp::buffer::host<T>* param) {
+        return tranptr->updateparam(0, param);
+    };
+
+    //! update virtual device buffer parameters
+    template <class T>
+    int updateparam(const int rcrun, const int nparam, xhpp::vbuffer::device<T>* param) {
+        return tranptr->updateparam(1, param);
+    };
+
+    //! update device buffer parameters
+    template <class T>
+    int updateparam(const int rcrun, const int nparam, xhpp::buffer::device<T>* param) {
+        return tranptr->updateparam(1, param);
+    };
+
+    //! setup body
+    int setupbody() { return tranptr->setupbody(); };
+
+    //! setup body and shadow
+    int setupbodyshadow(unsigned int nsize) { return tranptr->setupbodyshadow(nsize); };
+
+    //! release body
+    int releasebody() { return tranptr->releasebody(); };
+
+    //! release body and shadow
+    int releasebodyshadow() { return tranptr->releasebodyshadow(); };
+
+    //! submit non-blocking data transfer task
+    int submit(
+        xhpp::event* waitevt, xhpp::event* outevt, const int rcin = 0, const int rcout = 0, const int rcrun = 0) {
+        // std::cout <<"data transfer using rcin, rcout "<<rcin<<", " <<rcout <<std::endl;
+        return tranptr->tran_impl_run(tmode, waitevt, outevt, rcin, rcout); // TODO: check rcin/rcout with rcfrom/rcto
+    };
+
+    //! run blocking data transfer task
+    int run(const int rcin = 0, const int rcout = 0, const int rcrun = 0) { // TODO: check rcin/rcout with rcfrom/rcto
+        return tranptr->tran_impl_run(tmode, rcin, rcout);
+    };
+
+    //! release task
+    int release() {
+        tranptr->release();
+        free(tranptr);
+        return 0;
+    };
+}; // end of class
+
+}; // end of namespace task
+}; // end of namespace xhpp
+
+#endif
diff --git a/codec/L2/demos/leptonEnc/images/t0.jpg b/codec/L2/demos/leptonEnc/images/t0.jpg
new file mode 100644
index 0000000000..8d04e74c72
Binary files /dev/null and b/codec/L2/demos/leptonEnc/images/t0.jpg differ
diff --git a/codec/L2/demos/leptonEnc/kernel/XAcc_77.cpp b/codec/L2/demos/leptonEnc/kernel/XAcc_77.cpp
new file mode 100644
index 0000000000..99e5bbb45e
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/XAcc_77.cpp
@@ -0,0 +1,1824 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "XAcc_77.hpp"
+
+namespace xf {
+namespace codec {
+namespace details {
+
+// ------------------------------------------------------------
+void line_buf_write_h(ap_uint<32> len, hls::stream<ap_int<11> > coef[7], hls::stream<ap_uint<77> >& lb_write) {
+    ap_int<11> coef_h_buff[7];
+#pragma HLS ARRAY_PARTITION variable = coef_h_buff complete dim = 1
+
+    for (int j = 0; j < len; j++) {
+#pragma HLS pipeline II = 1
+        for (int i = 0; i < 7; i++) {
+#pragma HLS unroll
+            coef_h_buff[i] = coef[i].read();
+        }
+
+        lb_write.write((coef_h_buff[6], coef_h_buff[5], coef_h_buff[4], coef_h_buff[3], coef_h_buff[2], coef_h_buff[1],
+                        coef_h_buff[0]));
+    }
+}
+
+// ------------------------------------------------------------
+void line_buf_ctrl_h(ap_uint<32> len,
+                     ap_uint<3> id_cmp,
+                     bool is_top_row,
+                     hls::stream<ap_uint<77> >& strm_write,
+                     hls::stream<ap_uint<77> >& strm_read) {
+    static ap_uint<72> coef_abv_uram[3][1024];
+#pragma HLS bind_storage variable = coef_abv_uram type = RAM_2P impl = URAM
+    static ap_uint<5> coef_abv_bram[3][1024];
+
+    ap_uint<77> coef_abv_r;
+    ap_uint<77> coef_abv_w;
+
+    ap_uint<32> cnt = 0;
+
+    while (cnt < len) {
+#pragma HLS pipeline II = 1
+        if (!is_top_row) {
+            coef_abv_r(76, 5) = coef_abv_uram[id_cmp][cnt];
+            coef_abv_r(4, 0) = coef_abv_bram[id_cmp][cnt];
+            strm_read.write(coef_abv_r);
+        }
+
+        coef_abv_w = strm_write.read();
+        coef_abv_uram[id_cmp][cnt] = coef_abv_w(76, 5);
+        coef_abv_bram[id_cmp][cnt] = coef_abv_w(4, 0);
+
+        cnt++;
+    }
+}
+
+// ------------------------------------------------------------
+void line_buf_ctrl_nz(ap_uint<32> len,
+                      ap_uint<3> id_cmp,
+                      bool is_top_row,
+                      hls::stream<ap_uint<6> >& strm_write,
+                      hls::stream<ap_uint<6> >& strm_read) {
+    static ap_uint<6> nz_abv_ram[3][1024];
+    ap_uint<6> nz = 0;
+    ap_uint<32> cnt = 0;
+
+    while (cnt < len) {
+#pragma HLS pipeline II = 1
+        if (!is_top_row) {
+            strm_read.write(nz_abv_ram[id_cmp][cnt]);
+        } else {
+            strm_read.write(0);
+        }
+
+        nz_abv_ram[id_cmp][cnt] = strm_write.read();
+        cnt++;
+    }
+}
+
+// ------------------------------------------------------------
+void line_buf_ctrl_77(ap_uint<32> len,
+                      ap_uint<3> id_cmp,
+                      bool is_top_row,
+                      hls::stream<ap_uint<77> >& strm_write,
+                      hls::stream<ap_uint<77> >& strm_read) {
+    static ap_uint<72> coef_abv_uram[3][7168];
+#pragma HLS bind_storage variable = coef_abv_uram type = RAM_2P impl = URAM
+    static ap_uint<5> coef_abv_bram[3][7168];
+
+    ap_uint<77> coef_abv_r;
+    ap_uint<77> coef_abv_w;
+
+    ap_uint<32> cnt = 0;
+
+    while (cnt < len * 7) {
+#pragma HLS pipeline II = 1
+        if (!is_top_row) {
+            coef_abv_r(76, 5) = coef_abv_uram[id_cmp][cnt];
+            coef_abv_r(4, 0) = coef_abv_bram[id_cmp][cnt];
+            strm_read.write(coef_abv_r);
+        }
+
+        coef_abv_w = strm_write.read();
+        coef_abv_uram[id_cmp][cnt] = coef_abv_w(76, 5);
+        coef_abv_bram[id_cmp][cnt] = coef_abv_w(4, 0);
+
+        cnt++;
+    }
+}
+
+// ------------------------------------------------------------
+void line_buf_read_77(ap_uint<32> len,
+                      bool is_top,
+
+                      hls::stream<ap_uint<77> >& strm_read,
+
+                      hls::stream<ap_int<11> > coef_abv[49],
+                      hls::stream<ap_int<11> > coef_abv_h[49]) {
+    ap_int<11> abv_buff[49];
+#pragma HLS array_partition variable = abv_buff complete dim = 0
+    ap_int<77> lb_read;
+
+    ap_uint<32> cnt = 0;
+    ap_uint<32> i = 0;
+
+    while (i < len * 7) {
+#pragma HLS pipeline II = 1
+        if (!is_top)
+            lb_read = strm_read.read();
+        else
+            lb_read = 0;
+
+        for (int j = 0; j < 7; j++) {
+#pragma HLS unroll
+            abv_buff[cnt * 7 + j] = lb_read(j * 11 + 10, j * 11);
+        }
+
+        i++;
+        if (cnt != 6)
+            cnt++;
+        else {
+            for (int j = 0; j < 49; j++) {
+#pragma HLS unroll
+                coef_abv[j].write(abv_buff[j]);
+                coef_abv_h[j].write(abv_buff[j]);
+            }
+            cnt = 0;
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void store_7x7(ap_uint<32> len,
+               hls::stream<ap_int<11> > coef[8],
+
+               hls::stream<ap_uint<6> >& non_zero_cnt,
+               hls::stream<ap_uint<3> >& eob_x,
+               hls::stream<ap_uint<3> >& eob_y,
+               hls::stream<ap_int<11> > coef_out[49],
+               hls::stream<ap_int<11> > coef_out_h[49],
+               hls::stream<ap_int<11> > coef_out_v[49],
+
+               hls::stream<ap_uint<77> >& lb_write,
+               hls::stream<ap_uint<6> >& lb_nz_write) {
+    ap_int<11> reg_coef_7x7[8];
+    ap_int<11> lb_w_coef[7];
+
+    ap_int<11> coef_7x7_buff[49];
+#pragma HLS array_partition variable = coef_7x7_buff complete dim = 0
+
+    ap_uint<3> reg_eob_x;
+    ap_uint<3> reg_eob_y;
+    ap_uint<1> non_zero_h[7] = {0, 0, 0, 0, 0, 0, 0};
+    ap_uint<1> non_zero_v[7] = {0, 0, 0, 0, 0, 0, 0};
+    ap_uint<6> reg_non_zero_cnt = 0;
+    int block_cnt = 0;
+    int cnt = 0;
+
+READ_8_COEF7x7_LOOP:
+    while (block_cnt < len) {
+#pragma HLS pipeline II = 1
+
+        if (cnt == 0) {
+            for (int j = 0; j < 8; j++) {
+#pragma HLS unroll
+                reg_coef_7x7[j] = coef[j].read();
+                coef_7x7_buff[8 * cnt + j] = reg_coef_7x7[j];
+                reg_non_zero_cnt = reg_non_zero_cnt + (reg_coef_7x7[j].or_reduce());
+            }
+            non_zero_h[0] = (reg_coef_7x7[0].or_reduce()) || (reg_coef_7x7[1].or_reduce()) ||
+                            (reg_coef_7x7[5].or_reduce()) || (reg_coef_7x7[6].or_reduce());
+            non_zero_h[1] =
+                (reg_coef_7x7[2].or_reduce()) || (reg_coef_7x7[4].or_reduce()) || (reg_coef_7x7[7].or_reduce());
+            non_zero_h[2] = (reg_coef_7x7[3].or_reduce());
+
+            non_zero_v[0] =
+                (reg_coef_7x7[0].or_reduce()) || (reg_coef_7x7[2].or_reduce()) || (reg_coef_7x7[3].or_reduce());
+            non_zero_v[1] = (reg_coef_7x7[1].or_reduce()) || (reg_coef_7x7[4].or_reduce());
+            non_zero_v[2] = (reg_coef_7x7[5].or_reduce()) || (reg_coef_7x7[7].or_reduce());
+            non_zero_v[3] = (reg_coef_7x7[6].or_reduce());
+
+            lb_write.write((reg_coef_7x7[6], reg_coef_7x7[5], reg_coef_7x7[4], reg_coef_7x7[3], reg_coef_7x7[2],
+                            reg_coef_7x7[1], reg_coef_7x7[0]));
+            lb_w_coef[0] = reg_coef_7x7[7];
+
+            cnt++;
+
+        }
+
+        else if (cnt == 1) {
+            for (int j = 0; j < 8; j++) {
+#pragma HLS unroll
+                reg_coef_7x7[j] = coef[j].read();
+                coef_7x7_buff[8 * cnt + j] = reg_coef_7x7[j];
+                reg_non_zero_cnt = reg_non_zero_cnt + (reg_coef_7x7[j].or_reduce());
+            }
+            non_zero_h[0] = non_zero_h[0] || (reg_coef_7x7[6].or_reduce()) || (reg_coef_7x7[7].or_reduce());
+            non_zero_h[1] = non_zero_h[1] || (reg_coef_7x7[5].or_reduce());
+            non_zero_h[2] = non_zero_h[2] || (reg_coef_7x7[0] || reg_coef_7x7[4].or_reduce());
+            non_zero_h[3] = (reg_coef_7x7[1].or_reduce()) || (reg_coef_7x7[3].or_reduce());
+            non_zero_h[4] = (reg_coef_7x7[2].or_reduce());
+
+            non_zero_v[0] = non_zero_v[0] || (reg_coef_7x7[1].or_reduce()) || (reg_coef_7x7[2].or_reduce());
+            non_zero_v[1] = non_zero_v[1] || (reg_coef_7x7[0].or_reduce()) || (reg_coef_7x7[3].or_reduce());
+            non_zero_v[2] = non_zero_v[2] || (reg_coef_7x7[4].or_reduce());
+            non_zero_v[3] = non_zero_v[3] || (reg_coef_7x7[5].or_reduce());
+            non_zero_v[4] = (reg_coef_7x7[6].or_reduce());
+            non_zero_v[5] = (reg_coef_7x7[7].or_reduce());
+
+            lb_write.write((reg_coef_7x7[5], reg_coef_7x7[4], reg_coef_7x7[3], reg_coef_7x7[2], reg_coef_7x7[1],
+                            reg_coef_7x7[0], lb_w_coef[0]));
+            lb_w_coef[0] = reg_coef_7x7[6];
+            lb_w_coef[1] = reg_coef_7x7[7];
+
+            cnt++;
+        }
+
+        else if (cnt == 2) {
+            for (int j = 0; j < 8; j++) {
+#pragma HLS unroll
+                reg_coef_7x7[j] = coef[j].read();
+                coef_7x7_buff[8 * cnt + j] = reg_coef_7x7[j];
+                reg_non_zero_cnt = reg_non_zero_cnt + (reg_coef_7x7[j].or_reduce());
+            }
+            non_zero_h[1] = non_zero_h[1] || (reg_coef_7x7[0].or_reduce());
+            non_zero_h[2] = non_zero_h[2] || (reg_coef_7x7[1].or_reduce());
+            non_zero_h[3] = non_zero_h[3] || (reg_coef_7x7[2].or_reduce());
+            non_zero_h[4] = non_zero_h[4] || (reg_coef_7x7[3].or_reduce()) || (reg_coef_7x7[7].or_reduce());
+            non_zero_h[5] = (reg_coef_7x7[4].or_reduce()) || (reg_coef_7x7[6].or_reduce());
+            non_zero_h[6] = (reg_coef_7x7[5].or_reduce());
+
+            non_zero_v[0] = non_zero_v[0] || (reg_coef_7x7[4].or_reduce()) || (reg_coef_7x7[5].or_reduce());
+            non_zero_v[1] = non_zero_v[1] || (reg_coef_7x7[3].or_reduce()) || (reg_coef_7x7[6].or_reduce());
+            non_zero_v[2] = non_zero_v[2] || (reg_coef_7x7[2].or_reduce()) || (reg_coef_7x7[7].or_reduce());
+            non_zero_v[3] = non_zero_v[3] || (reg_coef_7x7[1].or_reduce());
+            non_zero_v[4] = non_zero_v[4] || (reg_coef_7x7[0].or_reduce());
+
+            lb_write.write((reg_coef_7x7[4], reg_coef_7x7[3], reg_coef_7x7[2], reg_coef_7x7[1], reg_coef_7x7[0],
+                            lb_w_coef[1], lb_w_coef[0]));
+            lb_w_coef[0] = reg_coef_7x7[5];
+            lb_w_coef[1] = reg_coef_7x7[6];
+            lb_w_coef[2] = reg_coef_7x7[7];
+
+            cnt++;
+        }
+
+        else if (cnt == 3) {
+            for (int j = 0; j < 8; j++) {
+#pragma HLS unroll
+                reg_coef_7x7[j] = coef[j].read();
+                coef_7x7_buff[8 * cnt + j] = reg_coef_7x7[j];
+                reg_non_zero_cnt = reg_non_zero_cnt + (reg_coef_7x7[j].or_reduce());
+            }
+            non_zero_h[0] = non_zero_h[0] || (reg_coef_7x7[3].or_reduce());
+            non_zero_h[1] = non_zero_h[1] || (reg_coef_7x7[2].or_reduce()) || (reg_coef_7x7[4].or_reduce());
+            non_zero_h[2] = non_zero_h[2] || (reg_coef_7x7[1].or_reduce()) || (reg_coef_7x7[5].or_reduce());
+            non_zero_h[3] = non_zero_h[3] || (reg_coef_7x7[0].or_reduce()) || (reg_coef_7x7[6].or_reduce());
+            non_zero_h[4] = non_zero_h[4] || (reg_coef_7x7[7].or_reduce());
+
+            non_zero_v[3] = non_zero_v[3] || (reg_coef_7x7[0].or_reduce()) || (reg_coef_7x7[7].or_reduce());
+            non_zero_v[4] = non_zero_v[4] || (reg_coef_7x7[1].or_reduce()) || (reg_coef_7x7[6].or_reduce());
+            non_zero_v[5] = non_zero_v[5] || (reg_coef_7x7[2].or_reduce()) || (reg_coef_7x7[5].or_reduce());
+            non_zero_v[6] = (reg_coef_7x7[3].or_reduce()) || (reg_coef_7x7[4].or_reduce());
+
+            lb_write.write((reg_coef_7x7[3], reg_coef_7x7[2], reg_coef_7x7[1], reg_coef_7x7[0], lb_w_coef[2],
+                            lb_w_coef[1], lb_w_coef[0]));
+            lb_w_coef[0] = reg_coef_7x7[4];
+            lb_w_coef[1] = reg_coef_7x7[5];
+            lb_w_coef[2] = reg_coef_7x7[6];
+            lb_w_coef[3] = reg_coef_7x7[7];
+
+            cnt++;
+        }
+
+        else if (cnt == 4) {
+            for (int j = 0; j < 8; j++) {
+#pragma HLS unroll
+                reg_coef_7x7[j] = coef[j].read();
+                coef_7x7_buff[8 * cnt + j] = reg_coef_7x7[j];
+                reg_non_zero_cnt = reg_non_zero_cnt + (reg_coef_7x7[j].or_reduce());
+            }
+            non_zero_h[2] = non_zero_h[2] || (reg_coef_7x7[6].or_reduce());
+            non_zero_h[3] = non_zero_h[3] || (reg_coef_7x7[5].or_reduce()) || (reg_coef_7x7[7].or_reduce());
+            non_zero_h[4] = non_zero_h[4] || (reg_coef_7x7[4].or_reduce());
+            non_zero_h[5] = non_zero_h[5] || (reg_coef_7x7[0].or_reduce()) || (reg_coef_7x7[3].or_reduce());
+            non_zero_h[6] = non_zero_h[6] || (reg_coef_7x7[1].or_reduce()) || (reg_coef_7x7[2].or_reduce());
+
+            non_zero_v[1] = non_zero_v[1] || (reg_coef_7x7[1].or_reduce());
+            non_zero_v[2] = non_zero_v[2] || (reg_coef_7x7[0].or_reduce()) || (reg_coef_7x7[2].or_reduce());
+            non_zero_v[3] = non_zero_v[3] || (reg_coef_7x7[3].or_reduce());
+            non_zero_v[4] = non_zero_v[4] || (reg_coef_7x7[4].or_reduce());
+            non_zero_v[5] = non_zero_v[5] || (reg_coef_7x7[5].or_reduce());
+            non_zero_v[6] = non_zero_v[6] || (reg_coef_7x7[6].or_reduce()) || (reg_coef_7x7[7].or_reduce());
+
+            lb_write.write((reg_coef_7x7[2], reg_coef_7x7[1], reg_coef_7x7[0], lb_w_coef[3], lb_w_coef[2], lb_w_coef[1],
+                            lb_w_coef[0]));
+            lb_w_coef[0] = reg_coef_7x7[3];
+            lb_w_coef[1] = reg_coef_7x7[4];
+            lb_w_coef[2] = reg_coef_7x7[5];
+            lb_w_coef[3] = reg_coef_7x7[6];
+            lb_w_coef[4] = reg_coef_7x7[7];
+
+            cnt++;
+        }
+
+        else if (cnt == 5) {
+            for (int j = 0; j < 8; j++) {
+#pragma HLS unroll
+                reg_coef_7x7[j] = coef[j].read();
+                coef_7x7_buff[8 * cnt + j] = reg_coef_7x7[j];
+                reg_non_zero_cnt = reg_non_zero_cnt + (reg_coef_7x7[j].or_reduce());
+            }
+            non_zero_h[4] = non_zero_h[4] || (reg_coef_7x7[0].or_reduce()) || (reg_coef_7x7[5].or_reduce());
+            non_zero_h[5] = non_zero_h[5] || (reg_coef_7x7[1].or_reduce()) || (reg_coef_7x7[4].or_reduce()) ||
+                            (reg_coef_7x7[6].or_reduce());
+            non_zero_h[6] = non_zero_h[6] || (reg_coef_7x7[2].or_reduce()) || (reg_coef_7x7[3].or_reduce()) ||
+                            (reg_coef_7x7[7].or_reduce());
+
+            non_zero_v[3] = non_zero_v[3] || (reg_coef_7x7[2].or_reduce());
+            non_zero_v[4] = non_zero_v[4] || (reg_coef_7x7[1].or_reduce()) || (reg_coef_7x7[3].or_reduce());
+            non_zero_v[5] = non_zero_v[5] || (reg_coef_7x7[0].or_reduce()) || (reg_coef_7x7[4].or_reduce()) ||
+                            (reg_coef_7x7[7].or_reduce());
+            non_zero_v[6] = non_zero_v[6] || (reg_coef_7x7[5].or_reduce()) || (reg_coef_7x7[6].or_reduce());
+
+            lb_write.write((reg_coef_7x7[1], reg_coef_7x7[0], lb_w_coef[4], lb_w_coef[3], lb_w_coef[2], lb_w_coef[1],
+                            lb_w_coef[0]));
+            lb_w_coef[0] = reg_coef_7x7[2];
+            lb_w_coef[1] = reg_coef_7x7[3];
+            lb_w_coef[2] = reg_coef_7x7[4];
+            lb_w_coef[3] = reg_coef_7x7[5];
+            lb_w_coef[4] = reg_coef_7x7[6];
+            lb_w_coef[5] = reg_coef_7x7[7];
+
+            cnt++;
+        }
+
+        else if (cnt == 6) {
+            coef_7x7_buff[48] = coef[0].read();
+
+            lb_write.write((coef_7x7_buff[48], lb_w_coef[5], lb_w_coef[4], lb_w_coef[3], lb_w_coef[2], lb_w_coef[1],
+                            lb_w_coef[0]));
+
+            non_zero_h[6] = non_zero_h[6] || (coef_7x7_buff[48].or_reduce());
+            non_zero_v[6] = non_zero_v[6] || (coef_7x7_buff[48].or_reduce());
+            if (coef_7x7_buff[48].or_reduce()) reg_non_zero_cnt = reg_non_zero_cnt + 1;
+
+            non_zero_cnt.write(reg_non_zero_cnt);
+            lb_nz_write.write(reg_non_zero_cnt);
+
+            /*
+                        for(int i=0;i<7;i++){
+            #pragma HLS unroll
+                            for(int j=0;j<7;j++){
+            #pragma HLS unroll
+                                non_zero_h[i]=non_zero_h[i]||(coef_7x7_buff[unzigzag_7x7[7*i+j]].or_reduce());
+                                non_zero_v[j]=non_zero_v[j]||(coef_7x7_buff[unzigzag_7x7[7*i+j]].or_reduce());
+                            }
+                        }*/
+
+            if (non_zero_h[6] == 1)
+                reg_eob_y = 7;
+            else if (non_zero_h[5] == 1)
+                reg_eob_y = 6;
+            else if (non_zero_h[4] == 1)
+                reg_eob_y = 5;
+            else if (non_zero_h[3] == 1)
+                reg_eob_y = 4;
+            else if (non_zero_h[2] == 1)
+                reg_eob_y = 3;
+            else if (non_zero_h[1] == 1)
+                reg_eob_y = 2;
+            else if (non_zero_h[0] == 1)
+                reg_eob_y = 1;
+            else
+                reg_eob_y = 0;
+
+            if (non_zero_v[6] == 1)
+                reg_eob_x = 7;
+            else if (non_zero_v[5] == 1)
+                reg_eob_x = 6;
+            else if (non_zero_v[4] == 1)
+                reg_eob_x = 5;
+            else if (non_zero_v[3] == 1)
+                reg_eob_x = 4;
+            else if (non_zero_v[2] == 1)
+                reg_eob_x = 3;
+            else if (non_zero_v[1] == 1)
+                reg_eob_x = 2;
+            else if (non_zero_v[0] == 1)
+                reg_eob_x = 1;
+            else
+                reg_eob_x = 0;
+
+            eob_x.write(reg_eob_x);
+            eob_y.write(reg_eob_y);
+            for (int i = 0; i < 49; i++) {
+#pragma HLS unroll
+                coef_out[i].write(coef_7x7_buff[i]);
+                coef_out_h[i].write(coef_7x7_buff[i]);
+                coef_out_v[i].write(coef_7x7_buff[i]);
+            }
+            block_cnt++;
+            cnt = 0;
+            reg_non_zero_cnt = 0;
+        }
+    }
+}
+
+// ------------------------------------------------------------
+
+void forward_7x7(ap_uint<32> len,
+                 hls::stream<ap_int<11> > coef[49],
+                 hls::stream<ap_int<11> > coef_above[49],
+                 hls::stream<ap_uint<6> >& non_zero_cnt_in,
+
+                 hls::stream<ap_int<11> >& coef_7x7,
+                 hls::stream<ap_int<11> >& coef_lft,
+                 hls::stream<ap_int<11> >& coef_abv,
+                 hls::stream<ap_int<11> >& coef_abv_lft,
+                 hls::stream<ap_uint<6> >& non_zero_cnt_out,
+                 hls::stream<ap_uint<6> >& non_zero_cnt_lft_out,
+                 hls::stream<ap_uint<6> >& non_zero_7x7_out,
+                 hls::stream<ap_uint<6> >& non_zero_h_out
+                 //    hls::stream<ap_uint<6> >& non_zero_v_out
+                 ) {
+    ap_uint<6> reg_non_zero = 0;
+    ap_uint<6> nz_lft = 0;
+    ap_int<11> coef_buff[49];
+#pragma HLS array_partition variable = coef_buff complete dim = 0
+    ap_int<11> coef_lft_buff[49];
+#pragma HLS array_partition variable = coef_lft_buff complete dim = 0
+    ap_int<11> coef_abv_buff[49];
+#pragma HLS array_partition variable = coef_abv_buff complete dim = 0
+    ap_int<11> coef_abv_left_buff[49];
+#pragma HLS array_partition variable = coef_abv_left_buff complete dim = 0
+
+    // for(int j=0;j<len;j++){
+    // 1. init
+    for (int i = 0; i < 49; i++) {
+#pragma HLS unroll
+        coef_lft_buff[i] = coef_buff[i];
+        coef_buff[i] = coef[i].read();
+        coef_abv_left_buff[i] = coef_abv_buff[i];
+        coef_abv_buff[i] = coef_above[i].read();
+    }
+    non_zero_cnt_lft_out.write(nz_lft);
+    reg_non_zero = non_zero_cnt_in.read();
+    non_zero_cnt_out.write(reg_non_zero);
+    non_zero_7x7_out.write(reg_non_zero);
+    non_zero_h_out.write(reg_non_zero);
+    nz_lft = reg_non_zero;
+
+    int j = 0;
+    int i = 0;
+
+PUSH_COEF7x7_LOOP:
+    while (j < len) {
+#pragma HLS pipeline II = 1
+
+        //        for(int i=0;reg_non_zero.or_reduce();i++){
+
+        //        	_XF_IMAGE_PRINT("Forw num = %d , reduce = %d, %d\n",\
+//        			(int)reg_non_zero, (int)reg_non_zero.or_reduce(), (int)coef_buff[i].or_reduce());
+
+        // 2. write out
+        if (reg_non_zero.or_reduce()) {
+            coef_7x7.write(coef_buff[i]);
+            coef_abv.write(coef_abv_buff[i]);
+            if (j != 0) {
+                coef_lft.write(coef_lft_buff[i]);
+                coef_abv_lft.write(coef_abv_left_buff[i]);
+            } else {
+                coef_lft.write(0);
+                coef_abv_lft.write(0);
+            }
+            if (coef_buff[i].or_reduce()) reg_non_zero--;
+            i++;
+        } // loop0
+
+        // 3. update
+        if (!reg_non_zero.or_reduce()) {
+            i = 0;
+            if (j < len - 1) {
+                for (int i = 0; i < 49; i++) {
+#pragma HLS unroll
+                    coef_lft_buff[i] = coef_buff[i];
+                    coef_buff[i] = coef[i].read();
+                    coef_abv_left_buff[i] = coef_abv_buff[i];
+                    coef_abv_buff[i] = coef_above[i].read();
+                }
+                non_zero_cnt_lft_out.write(nz_lft);
+                reg_non_zero = non_zero_cnt_in.read();
+                non_zero_cnt_out.write(reg_non_zero);
+                non_zero_7x7_out.write(reg_non_zero);
+                non_zero_h_out.write(reg_non_zero);
+                nz_lft = reg_non_zero;
+            }
+            j++;
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void push_h_edge(ap_uint<32> len,
+                 bool is_top_row,
+                 hls::stream<ap_int<11> > coef[7],
+                 hls::stream<ap_int<11> > coef_77_here[49],
+                 hls::stream<ap_int<11> > coef_77_above[49],
+                 hls::stream<ap_int<11> > coef_here_h[8],
+                 hls::stream<ap_int<11> > coef_abov_h[8],
+                 hls::stream<bool>& has_left,
+                 hls::stream<bool>& coef_end,
+
+                 hls::stream<ap_uint<3> >& coef_cnt_h_len,
+                 hls::stream<ap_uint<3> >& lane_h,
+                 hls::stream<ap_uint<77> >& lb_read) {
+    ap_int<11> coef_buff[49];
+#pragma HLS array_partition variable = coef_buff complete dim = 0
+    ap_int<11> coef_buff_abv[49];
+#pragma HLS array_partition variable = coef_buff complete dim = 0
+    ap_int<11> coef_h_buff[7];
+#pragma HLS ARRAY_PARTITION variable = coef_h_buff complete dim = 1
+    ap_int<11> coef_abv_h_buff[7];
+#pragma HLS ARRAY_PARTITION variable = coef_abv_h_buff complete dim = 1
+    ap_uint<3> reg_coef_cnt_h = 0;
+
+    ap_uint<77> lb_read_reg;
+
+    // 1.init
+    for (int i = 0; i < 49; i++) {
+#pragma HLS unroll
+        coef_buff[i] = coef_77_here[i].read();
+        coef_buff_abv[i] = coef_77_above[i].read();
+    }
+
+    for (int i = 0; i < 7; i++) {
+#pragma HLS unroll
+        coef_h_buff[i] = coef[i].read();
+        reg_coef_cnt_h = reg_coef_cnt_h + (coef_h_buff[i].or_reduce());
+    }
+    coef_cnt_h_len.write(reg_coef_cnt_h);
+
+    if (!is_top_row) {
+        lb_read_reg = lb_read.read();
+    } else {
+        lb_read_reg = 0;
+    }
+
+// 2.loop
+PUSH_HOR_EDGE_LOOP:
+    int cnt = 0;
+    int j = 0;
+    while (j < len) {
+#pragma HLS pipeline II = 1
+
+        // PUSH_HOR_EDGE_LOOP:
+        // for(int i=0;reg_coef_cnt_h.or_reduce();i++){
+
+        //        	_XF_IMAGE_PRINT("edge_h num = %d , reduce = %d, %d\n",\
+//        			(int)reg_coef_cnt_h, (int)reg_coef_cnt_h.or_reduce(), (int)coef_h_buff[cnt].or_reduce() );
+        if (reg_coef_cnt_h.or_reduce()) {
+            lane_h.write(cnt);
+            coef_end.write(false);
+
+            if (j == 0)
+                has_left.write(true);
+            else
+                has_left.write(false);
+
+            if (cnt == 0) {
+                coef_here_h[1].write(coef_buff[0]);
+                coef_here_h[2].write(coef_buff[2]);
+                coef_here_h[3].write(coef_buff[3]);
+                coef_here_h[4].write(coef_buff[9]);
+                coef_here_h[5].write(coef_buff[10]);
+                coef_here_h[6].write(coef_buff[20]);
+                coef_here_h[7].write(coef_buff[21]);
+
+                coef_abov_h[1].write(coef_buff_abv[0]);
+                coef_abov_h[2].write(coef_buff_abv[2]);
+                coef_abov_h[3].write(coef_buff_abv[3]);
+                coef_abov_h[4].write(coef_buff_abv[9]);
+                coef_abov_h[5].write(coef_buff_abv[10]);
+                coef_abov_h[6].write(coef_buff_abv[20]);
+                coef_abov_h[7].write(coef_buff_abv[21]);
+            } else if (cnt == 1) {
+                coef_here_h[1].write(coef_buff[1]);
+                coef_here_h[2].write(coef_buff[4]);
+                coef_here_h[3].write(coef_buff[8]);
+                coef_here_h[4].write(coef_buff[11]);
+                coef_here_h[5].write(coef_buff[19]);
+                coef_here_h[6].write(coef_buff[22]);
+                coef_here_h[7].write(coef_buff[33]);
+
+                coef_abov_h[1].write(coef_buff_abv[1]);
+                coef_abov_h[2].write(coef_buff_abv[4]);
+                coef_abov_h[3].write(coef_buff_abv[8]);
+                coef_abov_h[4].write(coef_buff_abv[11]);
+                coef_abov_h[5].write(coef_buff_abv[19]);
+                coef_abov_h[6].write(coef_buff_abv[22]);
+                coef_abov_h[7].write(coef_buff_abv[33]);
+            } else if (cnt == 2) {
+                coef_here_h[1].write(coef_buff[5]);
+                coef_here_h[2].write(coef_buff[7]);
+                coef_here_h[3].write(coef_buff[12]);
+                coef_here_h[4].write(coef_buff[18]);
+                coef_here_h[5].write(coef_buff[23]);
+                coef_here_h[6].write(coef_buff[32]);
+                coef_here_h[7].write(coef_buff[34]);
+
+                coef_abov_h[1].write(coef_buff_abv[5]);
+                coef_abov_h[2].write(coef_buff_abv[7]);
+                coef_abov_h[3].write(coef_buff_abv[12]);
+                coef_abov_h[4].write(coef_buff_abv[18]);
+                coef_abov_h[5].write(coef_buff_abv[23]);
+                coef_abov_h[6].write(coef_buff_abv[32]);
+                coef_abov_h[7].write(coef_buff_abv[34]);
+            } else if (cnt == 3) {
+                coef_here_h[1].write(coef_buff[6]);
+                coef_here_h[2].write(coef_buff[13]);
+                coef_here_h[3].write(coef_buff[17]);
+                coef_here_h[4].write(coef_buff[24]);
+                coef_here_h[5].write(coef_buff[31]);
+                coef_here_h[6].write(coef_buff[35]);
+                coef_here_h[7].write(coef_buff[42]);
+
+                coef_abov_h[1].write(coef_buff_abv[6]);
+                coef_abov_h[2].write(coef_buff_abv[13]);
+                coef_abov_h[3].write(coef_buff_abv[17]);
+                coef_abov_h[4].write(coef_buff_abv[24]);
+                coef_abov_h[5].write(coef_buff_abv[31]);
+                coef_abov_h[6].write(coef_buff_abv[35]);
+                coef_abov_h[7].write(coef_buff_abv[42]);
+            } else if (cnt == 4) {
+                coef_here_h[1].write(coef_buff[14]);
+                coef_here_h[2].write(coef_buff[16]);
+                coef_here_h[3].write(coef_buff[25]);
+                coef_here_h[4].write(coef_buff[30]);
+                coef_here_h[5].write(coef_buff[36]);
+                coef_here_h[6].write(coef_buff[41]);
+                coef_here_h[7].write(coef_buff[43]);
+
+                coef_abov_h[1].write(coef_buff_abv[14]);
+                coef_abov_h[2].write(coef_buff_abv[16]);
+                coef_abov_h[3].write(coef_buff_abv[25]);
+                coef_abov_h[4].write(coef_buff_abv[30]);
+                coef_abov_h[5].write(coef_buff_abv[36]);
+                coef_abov_h[6].write(coef_buff_abv[41]);
+                coef_abov_h[7].write(coef_buff_abv[43]);
+            } else if (cnt == 5) {
+                coef_here_h[1].write(coef_buff[15]);
+                coef_here_h[2].write(coef_buff[26]);
+                coef_here_h[3].write(coef_buff[29]);
+                coef_here_h[4].write(coef_buff[37]);
+                coef_here_h[5].write(coef_buff[40]);
+                coef_here_h[6].write(coef_buff[44]);
+                coef_here_h[7].write(coef_buff[47]);
+
+                coef_abov_h[1].write(coef_buff_abv[15]);
+                coef_abov_h[2].write(coef_buff_abv[26]);
+                coef_abov_h[3].write(coef_buff_abv[29]);
+                coef_abov_h[4].write(coef_buff_abv[37]);
+                coef_abov_h[5].write(coef_buff_abv[40]);
+                coef_abov_h[6].write(coef_buff_abv[44]);
+                coef_abov_h[7].write(coef_buff_abv[47]);
+            } else if (cnt == 6) {
+                coef_here_h[1].write(coef_buff[27]);
+                coef_here_h[2].write(coef_buff[28]);
+                coef_here_h[3].write(coef_buff[38]);
+                coef_here_h[4].write(coef_buff[39]);
+                coef_here_h[5].write(coef_buff[45]);
+                coef_here_h[6].write(coef_buff[46]);
+                coef_here_h[7].write(coef_buff[48]);
+
+                coef_abov_h[1].write(coef_buff_abv[27]);
+                coef_abov_h[2].write(coef_buff_abv[28]);
+                coef_abov_h[3].write(coef_buff_abv[38]);
+                coef_abov_h[4].write(coef_buff_abv[39]);
+                coef_abov_h[5].write(coef_buff_abv[45]);
+                coef_abov_h[6].write(coef_buff_abv[46]);
+                coef_abov_h[7].write(coef_buff_abv[48]);
+            }
+            // 3.write out
+            coef_here_h[0].write(coef_h_buff[cnt]);
+            coef_abov_h[0].write(lb_read_reg(cnt * 11 + 10, cnt * 11));
+            if (coef_h_buff[cnt].or_reduce()) reg_coef_cnt_h--;
+            cnt++;
+
+        } // loop0
+
+        // 4.update
+        if (!reg_coef_cnt_h.or_reduce()) {
+            if (j < len - 1) { // read
+
+                for (int i = 0; i < 49; i++) {
+#pragma HLS unroll
+                    coef_buff[i] = coef_77_here[i].read();
+                    coef_buff_abv[i] = coef_77_above[i].read();
+                }
+                for (int i = 0; i < 7; i++) {
+#pragma HLS unroll
+                    coef_h_buff[i] = coef[i].read();
+                    reg_coef_cnt_h = reg_coef_cnt_h + (coef_h_buff[i].or_reduce());
+                }
+                coef_cnt_h_len.write(reg_coef_cnt_h);
+                if (!is_top_row) {
+                    lb_read_reg = lb_read.read();
+                } else {
+                    lb_read_reg = 0;
+                }
+            }
+
+            cnt = 0;
+            j++;
+        } // loop1
+    }
+    coef_end.write(true);
+}
+
+// ------------------------------------------------------------
+void push_v_edge(ap_uint<32> len,
+                 hls::stream<ap_int<11> > coef[7],
+                 hls::stream<ap_int<11> > coef_77_here[49],
+
+                 hls::stream<ap_int<11> > coef_here_v[8],
+                 hls::stream<ap_int<11> > coef_left_v[8],
+                 hls::stream<bool>& has_left,
+                 hls::stream<bool>& coef_end,
+
+                 //    hls::stream<ap_uint<3> >& coef_cnt_exp_v,
+                 //    hls::stream<ap_uint<3> >& coef_cnt_sign_v,
+                 //    hls::stream<ap_uint<3> >& coef_cnt_nois_v,
+                 hls::stream<ap_uint<3> >& coef_cnt_v_len,
+                 hls::stream<ap_uint<3> >& lane_v) {
+    ap_int<11> coef_v_buff[7];
+#pragma HLS ARRAY_PARTITION variable = coef_v_buff complete dim = 1
+    ap_int<11> coef_v_buff_lft[7];
+#pragma HLS ARRAY_PARTITION variable = coef_v_buff_lft complete dim = 1
+    ap_int<11> coef_buff[49];
+#pragma HLS array_partition variable = coef_buff complete dim = 0
+    ap_int<11> coef_buff_lft[49];
+#pragma HLS array_partition variable = coef_buff complete dim = 0
+
+    ap_uint<3> reg_coef_cnt_v = 0;
+
+    // 1.init
+    for (int i = 0; i < 49; i++) {
+#pragma HLS unroll
+        coef_buff_lft[i] = coef_buff[i];
+        coef_buff[i] = coef_77_here[i].read();
+    }
+
+    for (int i = 0; i < 7; i++) {
+#pragma HLS unroll
+        coef_v_buff_lft[i] = coef_v_buff[i];
+        coef_v_buff[i] = coef[i].read();
+        reg_coef_cnt_v = reg_coef_cnt_v + (coef_v_buff[i].or_reduce());
+    }
+    coef_cnt_v_len.write(reg_coef_cnt_v);
+
+    // 2.loop
+    int j = 0;
+    int cnt = 0;
+PUSH_VER_EDGE_LOOP:
+    while (j < len) {
+#pragma HLS pipeline II = 1
+
+        if (reg_coef_cnt_v.or_reduce()) {
+            coef_end.write(false);
+            lane_v.write(cnt);
+
+            if (j == 0)
+                has_left.write(true);
+            else
+                has_left.write(false);
+
+            if (cnt == 0) {
+                coef_here_v[1].write(coef_buff[0]);
+                coef_here_v[2].write(coef_buff[1]);
+                coef_here_v[3].write(coef_buff[5]);
+                coef_here_v[4].write(coef_buff[6]);
+                coef_here_v[5].write(coef_buff[14]);
+                coef_here_v[6].write(coef_buff[15]);
+                coef_here_v[7].write(coef_buff[27]);
+
+                coef_left_v[1].write(coef_buff_lft[0]);
+                coef_left_v[2].write(coef_buff_lft[1]);
+                coef_left_v[3].write(coef_buff_lft[5]);
+                coef_left_v[4].write(coef_buff_lft[6]);
+                coef_left_v[5].write(coef_buff_lft[14]);
+                coef_left_v[6].write(coef_buff_lft[15]);
+                coef_left_v[7].write(coef_buff_lft[27]);
+            } else if (cnt == 1) {
+                coef_here_v[1].write(coef_buff[2]);
+                coef_here_v[2].write(coef_buff[4]);
+                coef_here_v[3].write(coef_buff[7]);
+                coef_here_v[4].write(coef_buff[13]);
+                coef_here_v[5].write(coef_buff[16]);
+                coef_here_v[6].write(coef_buff[26]);
+                coef_here_v[7].write(coef_buff[28]);
+
+                coef_left_v[1].write(coef_buff_lft[2]);
+                coef_left_v[2].write(coef_buff_lft[4]);
+                coef_left_v[3].write(coef_buff_lft[7]);
+                coef_left_v[4].write(coef_buff_lft[13]);
+                coef_left_v[5].write(coef_buff_lft[16]);
+                coef_left_v[6].write(coef_buff_lft[26]);
+                coef_left_v[7].write(coef_buff_lft[28]);
+            } else if (cnt == 2) {
+                coef_here_v[1].write(coef_buff[3]);
+                coef_here_v[2].write(coef_buff[8]);
+                coef_here_v[3].write(coef_buff[12]);
+                coef_here_v[4].write(coef_buff[17]);
+                coef_here_v[5].write(coef_buff[25]);
+                coef_here_v[6].write(coef_buff[29]);
+                coef_here_v[7].write(coef_buff[38]);
+
+                coef_left_v[1].write(coef_buff_lft[3]);
+                coef_left_v[2].write(coef_buff_lft[8]);
+                coef_left_v[3].write(coef_buff_lft[12]);
+                coef_left_v[4].write(coef_buff_lft[17]);
+                coef_left_v[5].write(coef_buff_lft[25]);
+                coef_left_v[6].write(coef_buff_lft[29]);
+                coef_left_v[7].write(coef_buff_lft[38]);
+            } else if (cnt == 3) {
+                coef_here_v[1].write(coef_buff[9]);
+                coef_here_v[2].write(coef_buff[11]);
+                coef_here_v[3].write(coef_buff[18]);
+                coef_here_v[4].write(coef_buff[24]);
+                coef_here_v[5].write(coef_buff[30]);
+                coef_here_v[6].write(coef_buff[37]);
+                coef_here_v[7].write(coef_buff[39]);
+
+                coef_left_v[1].write(coef_buff_lft[9]);
+                coef_left_v[2].write(coef_buff_lft[11]);
+                coef_left_v[3].write(coef_buff_lft[18]);
+                coef_left_v[4].write(coef_buff_lft[24]);
+                coef_left_v[5].write(coef_buff_lft[30]);
+                coef_left_v[6].write(coef_buff_lft[37]);
+                coef_left_v[7].write(coef_buff_lft[39]);
+
+            } else if (cnt == 4) {
+                coef_here_v[1].write(coef_buff[10]);
+                coef_here_v[2].write(coef_buff[19]);
+                coef_here_v[3].write(coef_buff[23]);
+                coef_here_v[4].write(coef_buff[31]);
+                coef_here_v[5].write(coef_buff[36]);
+                coef_here_v[6].write(coef_buff[40]);
+                coef_here_v[7].write(coef_buff[45]);
+
+                coef_left_v[1].write(coef_buff_lft[10]);
+                coef_left_v[2].write(coef_buff_lft[19]);
+                coef_left_v[3].write(coef_buff_lft[23]);
+                coef_left_v[4].write(coef_buff_lft[31]);
+                coef_left_v[5].write(coef_buff_lft[36]);
+                coef_left_v[6].write(coef_buff_lft[40]);
+                coef_left_v[7].write(coef_buff_lft[45]);
+            } else if (cnt == 5) {
+                coef_here_v[1].write(coef_buff[20]);
+                coef_here_v[2].write(coef_buff[22]);
+                coef_here_v[3].write(coef_buff[32]);
+                coef_here_v[4].write(coef_buff[35]);
+                coef_here_v[5].write(coef_buff[41]);
+                coef_here_v[6].write(coef_buff[44]);
+                coef_here_v[7].write(coef_buff[46]);
+
+                coef_left_v[1].write(coef_buff_lft[20]);
+                coef_left_v[2].write(coef_buff_lft[22]);
+                coef_left_v[3].write(coef_buff_lft[32]);
+                coef_left_v[4].write(coef_buff_lft[35]);
+                coef_left_v[5].write(coef_buff_lft[41]);
+                coef_left_v[6].write(coef_buff_lft[44]);
+                coef_left_v[7].write(coef_buff_lft[46]);
+            } else if (cnt == 6) {
+                coef_here_v[1].write(coef_buff[21]);
+                coef_here_v[2].write(coef_buff[33]);
+                coef_here_v[3].write(coef_buff[34]);
+                coef_here_v[4].write(coef_buff[42]);
+                coef_here_v[5].write(coef_buff[43]);
+                coef_here_v[6].write(coef_buff[47]);
+                coef_here_v[7].write(coef_buff[48]);
+
+                coef_left_v[1].write(coef_buff_lft[21]);
+                coef_left_v[2].write(coef_buff_lft[33]);
+                coef_left_v[3].write(coef_buff_lft[34]);
+                coef_left_v[4].write(coef_buff_lft[42]);
+                coef_left_v[5].write(coef_buff_lft[43]);
+                coef_left_v[6].write(coef_buff_lft[47]);
+                coef_left_v[7].write(coef_buff_lft[48]);
+            }
+            // 3.write out
+            coef_here_v[0].write(coef_v_buff[cnt]);
+            coef_left_v[0].write(coef_v_buff_lft[cnt]);
+            if (coef_v_buff[cnt].or_reduce()) reg_coef_cnt_v--;
+            cnt++;
+
+        } // loop0
+
+        // 4.update
+        if (!reg_coef_cnt_v.or_reduce()) {
+            if (j < len - 1) { // read
+
+                for (int i = 0; i < 49; i++) {
+#pragma HLS unroll
+                    coef_buff_lft[i] = coef_buff[i];
+                    coef_buff[i] = coef_77_here[i].read();
+                }
+
+                for (int i = 0; i < 7; i++) {
+#pragma HLS unroll
+                    coef_v_buff_lft[i] = coef_v_buff[i];
+                    coef_v_buff[i] = coef[i].read();
+                    reg_coef_cnt_v = reg_coef_cnt_v + (coef_v_buff[i].or_reduce());
+                }
+                coef_cnt_v_len.write(reg_coef_cnt_v);
+            }
+
+            cnt = 0;
+            j++;
+
+        } // loop1
+    }
+    //}
+    coef_end.write(true);
+}
+
+// ------------------------------------------------------------
+void dispatch(ap_uint<32> len,
+              hls::stream<ap_int<11> > coef[8],
+              hls::stream<ap_int<11> > coef_to_7x7[8],
+              hls::stream<ap_int<11> > coef_to_h[7],
+              hls::stream<ap_int<11> > coef_to_h_lb[7],
+              hls::stream<ap_int<11> > coef_to_v[7]) {
+    int cnt = 0;
+    int block_cnt = 0;
+    ap_int<11> coef_reg[8];
+    ap_int<11> h_reg[7];
+    ap_int<11> v_reg[7];
+
+    while (block_cnt < len) {
+#pragma HLS pipeline II = 1
+
+        coef_reg[0] = coef[0].read();
+        coef_reg[1] = coef[1].read();
+        coef_reg[2] = coef[2].read();
+        coef_reg[3] = coef[3].read();
+        coef_reg[4] = coef[4].read();
+        coef_reg[5] = coef[5].read();
+        coef_reg[6] = coef[6].read();
+        coef_reg[7] = coef[7].read();
+
+        if (cnt == 0) {
+            coef_to_h[0].write(coef_reg[1]);
+            coef_to_h_lb[0].write(coef_reg[1]);
+            coef_to_v[0].write(coef_reg[2]);
+            coef_to_v[1].write(coef_reg[3]);
+            coef_to_7x7[0].write(coef_reg[4]);
+            coef_to_h[1].write(coef_reg[5]);
+            coef_to_h[2].write(coef_reg[6]);
+            coef_to_h_lb[1].write(coef_reg[5]);
+            coef_to_h_lb[2].write(coef_reg[6]);
+            coef_to_7x7[1].write(coef_reg[7]);
+            cnt++;
+        }
+
+        else if (cnt == 1) {
+            coef_to_7x7[2].write(coef_reg[0]);
+            coef_to_v[2].write(coef_reg[1]);
+            coef_to_v[3].write(coef_reg[2]);
+            coef_to_7x7[3].write(coef_reg[3]);
+            coef_to_7x7[4].write(coef_reg[4]);
+            coef_to_7x7[5].write(coef_reg[5]);
+            coef_to_h[3].write(coef_reg[6]);
+            coef_to_h[4].write(coef_reg[7]);
+            coef_to_h_lb[3].write(coef_reg[6]);
+            coef_to_h_lb[4].write(coef_reg[7]);
+            cnt++;
+        }
+
+        else if (cnt == 2) {
+            coef_to_7x7[6].write(coef_reg[0]);
+            coef_to_7x7[7].write(coef_reg[1]);
+            coef_to_7x7[0].write(coef_reg[2]);
+            coef_to_7x7[1].write(coef_reg[3]);
+            coef_to_v[4].write(coef_reg[4]);
+            coef_to_v[5].write(coef_reg[5]);
+            coef_to_7x7[2].write(coef_reg[6]);
+            coef_to_7x7[3].write(coef_reg[7]);
+            cnt++;
+        }
+
+        else if (cnt == 3) {
+            coef_to_7x7[4].write(coef_reg[0]);
+            coef_to_7x7[5].write(coef_reg[1]);
+            coef_to_7x7[6].write(coef_reg[2]);
+            coef_to_h[5].write(coef_reg[3]);
+            coef_to_h[6].write(coef_reg[4]);
+            coef_to_h_lb[5].write(coef_reg[3]);
+            coef_to_h_lb[6].write(coef_reg[4]);
+            coef_to_7x7[7].write(coef_reg[5]);
+            coef_to_7x7[0].write(coef_reg[6]);
+            coef_to_7x7[1].write(coef_reg[7]);
+            cnt++;
+        }
+
+        else if (cnt == 4) {
+            coef_to_7x7[2].write(coef_reg[0]);
+            coef_to_7x7[3].write(coef_reg[1]);
+            coef_to_7x7[4].write(coef_reg[2]);
+            coef_to_v[6].write(coef_reg[3]);
+            coef_to_7x7[5].write(coef_reg[4]);
+            coef_to_7x7[6].write(coef_reg[5]);
+            coef_to_7x7[7].write(coef_reg[6]);
+            coef_to_7x7[0].write(coef_reg[7]);
+            cnt++;
+
+        }
+
+        else if (cnt == 5) {
+            coef_to_7x7[1].write(coef_reg[0]);
+            coef_to_7x7[2].write(coef_reg[1]);
+            coef_to_7x7[3].write(coef_reg[2]);
+            coef_to_7x7[4].write(coef_reg[3]);
+            coef_to_7x7[5].write(coef_reg[4]);
+            coef_to_7x7[6].write(coef_reg[5]);
+            coef_to_7x7[7].write(coef_reg[6]);
+            coef_to_7x7[0].write(coef_reg[7]);
+            cnt++;
+
+        }
+
+        else if (cnt == 6) {
+            coef_to_7x7[1].write(coef_reg[0]);
+            coef_to_7x7[2].write(coef_reg[1]);
+            coef_to_7x7[3].write(coef_reg[2]);
+            coef_to_7x7[4].write(coef_reg[3]);
+            coef_to_7x7[5].write(coef_reg[4]);
+            coef_to_7x7[6].write(coef_reg[5]);
+            coef_to_7x7[7].write(coef_reg[6]);
+            coef_to_7x7[0].write(coef_reg[7]);
+            cnt++;
+        }
+
+        else if (cnt == 7) {
+            coef_to_7x7[1].write(coef_reg[0]);
+            coef_to_7x7[2].write(coef_reg[1]);
+            coef_to_7x7[3].write(coef_reg[2]);
+            coef_to_7x7[4].write(coef_reg[3]);
+            coef_to_7x7[5].write(coef_reg[4]);
+            coef_to_7x7[6].write(coef_reg[5]);
+            coef_to_7x7[7].write(coef_reg[6]);
+            coef_to_7x7[0].write(coef_reg[7]);
+            cnt = 0;
+            block_cnt++;
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void duplicate_coef(hls::stream<ap_int<11> > coef[8],
+                    ap_uint<32> len,
+                    hls::stream<ap_int<11> > strm_coef[8],
+                    hls::stream<coef_t> str_rast8[8],
+                    hls::stream<coef_t>& str_dc_in) {
+    ap_uint<32> cnt = 0;
+    ap_int<11> coef_reg[8];
+#pragma HLS array_partition variable = coef_reg complete dim = 0
+
+    while (cnt < len * 8) {
+#pragma HLS pipeline II = 1
+
+        coef_reg[0] = coef[0].read();
+        coef_reg[1] = coef[1].read();
+        coef_reg[2] = coef[2].read();
+        coef_reg[3] = coef[3].read();
+        coef_reg[4] = coef[4].read();
+        coef_reg[5] = coef[5].read();
+        coef_reg[6] = coef[6].read();
+        coef_reg[7] = coef[7].read();
+
+        strm_coef[0].write(coef_reg[0]);
+        strm_coef[1].write(coef_reg[1]);
+        strm_coef[2].write(coef_reg[2]);
+        strm_coef[3].write(coef_reg[3]);
+        strm_coef[4].write(coef_reg[4]);
+        strm_coef[5].write(coef_reg[5]);
+        strm_coef[6].write(coef_reg[6]);
+        strm_coef[7].write(coef_reg[7]);
+
+        str_rast8[0].write(coef_reg[0]);
+        str_rast8[1].write(coef_reg[1]);
+        str_rast8[2].write(coef_reg[2]);
+        str_rast8[3].write(coef_reg[3]);
+        str_rast8[4].write(coef_reg[4]);
+        str_rast8[5].write(coef_reg[5]);
+        str_rast8[6].write(coef_reg[6]);
+        str_rast8[7].write(coef_reg[7]);
+
+        if (cnt(2, 0) == 0) {
+            str_dc_in.write(coef_reg[0]);
+        }
+        cnt++;
+    }
+}
+
+// ------------------------------------------------------------
+void preprocess(ap_uint<32> len,
+                ap_uint<3> id_cmp,
+                bool is_top_row,
+                hls::stream<ap_int<11> > coef[8],
+
+                hls::stream<ap_int<11> >& coef_7x7,
+                hls::stream<ap_int<11> >& coef_lft,
+                hls::stream<ap_int<11> >& coef_abv,
+                hls::stream<ap_int<11> >& coef_abv_lft,
+                hls::stream<ap_int<11> > coef_h[8],
+                hls::stream<ap_int<11> > coef_above_h[8],
+                hls::stream<bool>& strm_has_left_h,
+                hls::stream<bool>& coef_h_e,
+                hls::stream<ap_int<11> > coef_v[8],
+                hls::stream<ap_int<11> > coef_left_v[8],
+                hls::stream<bool>& strm_has_left_v,
+                hls::stream<bool>& coef_v_e,
+                hls::stream<ap_uint<6> >& non_zero_cnt,
+                hls::stream<ap_uint<6> >& non_zero_cnt_lft,
+                hls::stream<ap_uint<6> >& non_zero_cnt_abv,
+                hls::stream<ap_uint<6> >& non_zero_7x7,
+                hls::stream<ap_uint<6> >& non_zero_h_out,
+                hls::stream<ap_uint<3> >& coef_cnt_h_len,
+                hls::stream<ap_uint<3> >& strm_lane_h,
+                hls::stream<ap_uint<3> >& coef_cnt_v_len,
+                hls::stream<ap_uint<3> >& strm_lane_v,
+                hls::stream<ap_uint<3> >& eob_x,
+                hls::stream<ap_uint<3> >& eob_y) {
+#pragma HLS INLINE
+#pragma HLS dataflow
+
+    // clang-format off
+    hls::stream<ap_int<11> > coef_to_7x7[8];
+#pragma HLS stream depth=8 variable=coef_to_7x7
+#pragma HLS array_partition variable=coef_to_7x7 complete dim=0
+
+    hls::stream<ap_int<11> > coef_to_h[7];
+#pragma HLS stream depth=2 variable=coef_to_h
+#pragma HLS array_partition variable=coef_to_h complete dim=0
+
+    hls::stream<ap_int<11> > coef_to_h_lb[7];
+#pragma HLS stream depth=2 variable=coef_to_h
+#pragma HLS array_partition variable=coef_to_h_lb complete dim=0
+
+    hls::stream<ap_int<11> > coef_to_v[7];
+#pragma HLS stream depth=2 variable=coef_to_v
+#pragma HLS array_partition variable=coef_to_v complete dim=0
+
+    hls::stream<ap_int<11> > coef_buff[49];
+#pragma HLS stream depth=2 variable=coef_buff
+//#pragma HLS array_partition variable=coef_buff complete dim=0
+
+    hls::stream<ap_int<11> > coef_buff_h[49];
+#pragma HLS stream depth=2 variable=coef_buff_h
+
+    hls::stream<ap_int<11> > coef_buff_v[49];
+#pragma HLS stream depth=2 variable=coef_buff_v
+
+    hls::stream<ap_int<11> > coef_abv_buff[49];
+#pragma HLS stream depth=2 variable=coef_abv_buff
+//#pragma HLS array_partition variable=coef_abv_buff complete dim=0
+
+    hls::stream<ap_int<11> > coef_abv_buff_h[49];
+#pragma HLS stream depth=2 variable=coef_abv_buff_h
+
+    static hls::stream<ap_uint<6> > non_zero_cnt_7x7("midle_non_zero");
+#pragma HLS stream depth=2 variable=non_zero_cnt_7x7
+
+	static hls::stream<ap_uint<77> > lb_write("write");
+#pragma HLS stream depth=2 variable=lb_write
+	static hls::stream<ap_uint<77> > lb_read("read");
+#pragma HLS stream depth=2 variable=lb_read
+
+	static hls::stream<ap_uint<6> > lb_nz_write("nz_write");
+#pragma HLS stream depth=2 variable=lb_nz_write
+
+	static hls::stream<ap_uint<77> > lb_write_h("h_write");
+#pragma HLS stream depth=2 variable=lb_write_h
+	static hls::stream<ap_uint<77> > lb_read_h("h_read");
+#pragma HLS stream depth=2 variable=lb_read_h
+    // clang-format on
+
+    dispatch(len, coef, coef_to_7x7, coef_to_h, coef_to_h_lb, coef_to_v);
+
+    store_7x7(len, coef_to_7x7, non_zero_cnt_7x7, eob_x, eob_y, coef_buff, coef_buff_h, coef_buff_v, lb_write,
+              lb_nz_write);
+    line_buf_ctrl_77(len, id_cmp, is_top_row, lb_write, lb_read);
+    line_buf_read_77(len, is_top_row, lb_read, coef_abv_buff, coef_abv_buff_h);
+
+    line_buf_write_h(len, coef_to_h_lb, lb_write_h);
+    line_buf_ctrl_h(len, id_cmp, is_top_row, lb_write_h, lb_read_h);
+    push_h_edge(len, is_top_row, coef_to_h, coef_buff_h, coef_abv_buff_h, coef_h, coef_above_h, strm_has_left_h,
+                coef_h_e,
+                //			coef_cnt_exp_h,
+                //			coef_cnt_sign_h,
+                //			coef_cnt_nois_h,
+                coef_cnt_h_len, strm_lane_h, lb_read_h);
+
+    push_v_edge(len, coef_to_v, coef_buff_v, coef_v, coef_left_v, strm_has_left_v, coef_v_e,
+                //			coef_cnt_exp_v,
+                //			coef_cnt_sign_v,
+                //			coef_cnt_nois_v,
+                coef_cnt_v_len, strm_lane_v);
+
+    line_buf_ctrl_nz(len, id_cmp, is_top_row, lb_nz_write, non_zero_cnt_abv);
+    forward_7x7(len, coef_buff, coef_abv_buff, non_zero_cnt_7x7, coef_7x7, coef_lft, coef_abv, coef_abv_lft,
+                non_zero_cnt, non_zero_cnt_lft, non_zero_7x7, non_zero_h_out);
+}
+
+//==========================================================================================
+
+// ------------------------------------------------------------
+ap_int<16> compute_aavrg(
+    bool above_present, ap_int<11> coef_above, bool left_present, ap_int<11> coef_left, ap_int<11> coef_above_left) {
+#pragma HLS INLINE
+    ap_int<16> total = 0;
+
+    if (left_present) {
+        total = total + hls::abs(coef_left);
+    }
+    if (above_present) {
+        total = total + hls::abs(coef_above);
+    }
+    if (left_present && above_present) {
+        total = total * 13;
+        total = total + (6 * hls::abs(coef_above_left));
+        return total >> 5;
+    } else {
+        return total;
+    }
+    return total;
+}
+
+// ------------------------------------------------------------
+void encode_num_nonzero_7x7(ap_uint<32> len,
+                            bool above_present,
+                            hls::stream<ap_uint<6> >& strm_cur_nonzeros_cnt,
+                            hls::stream<ap_uint<6> >& strm_lft_nonzeros_cnt,
+                            hls::stream<ap_uint<6> >& strm_abv_nonzeros_cnt,
+
+                            hls::stream<ap_uint<4> >& strm_nonzero_bin) {
+    ap_uint<5> serialized_so_far = 0;
+    ap_uint<6> ap_nz_bin = 0;
+    ap_uint<6> num_nonzeros_above;
+    ap_uint<6> num_nonzeros_left;
+    ap_uint<6> num_nonzeros_7x7;
+    ap_uint<32> i = 0;
+
+NONZERO_7X7_LINE:
+    while (i < len) {
+#pragma HLS pipeline II = 1
+        serialized_so_far = 0;
+        ap_nz_bin = 0;
+        num_nonzeros_above = strm_abv_nonzeros_cnt.read();
+        num_nonzeros_left = strm_lft_nonzeros_cnt.read();
+        num_nonzeros_7x7 = strm_cur_nonzeros_cnt.read();
+
+        if (above_present && i == 0) {
+            ap_nz_bin = (num_nonzeros_above + 1) >> 1;
+        } else if (i != 0 && !above_present) {
+            ap_nz_bin = (num_nonzeros_left + 1) >> 1;
+        } else if (i != 0 && above_present) {
+            ap_nz_bin = (num_nonzeros_above + num_nonzeros_left + 2) >> 2;
+        }
+
+        strm_nonzero_bin.write(hls_nonzero_to_bin_9[ap_nz_bin]);
+        i++;
+    }
+}
+
+// ------------------------------------------------------------
+void prepare_7x7(ap_uint<32> len,
+                 hls::stream<ap_int<11> >& strm_coef_here,
+                 bool above_present,
+                 hls::stream<ap_int<11> >& strm_coef_above,
+                 hls::stream<ap_int<11> >& strm_coef_left,
+                 hls::stream<ap_int<11> >& strm_coef_above_left,
+                 hls::stream<ap_uint<6> >& strm_num_nonzeros,
+
+                 hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+                 hls::stream<ap_uint<4> >& strm_7x7_length,
+
+                 hls::stream<ap_uint<4> >& strm_num_nonzeros_bin,
+                 hls::stream<ap_uint<4> >& strm_bsr_best_prior,
+
+                 hls::stream<bool>& strm_cur_bit_sign,
+
+                 hls::stream<ap_uint<11> >& strm_abs_coef,
+                 hls::stream<ap_uint<6> >& strm_coord) {
+    ap_int<16> avg;
+    ap_uint<6> num_nonzeros;
+    unsigned int zz;
+    ap_uint<32> i = 0;
+
+    // 1. init
+    num_nonzeros = strm_num_nonzeros.read();
+    strm_num_nonzeros_7x7.write(num_nonzeros);
+    zz = 0;
+
+PREPARE_7X7_LINE:
+    while (i < len) {
+#pragma HLS pipeline II = 1
+
+        //		while(num_nonzeros){
+        //#pragma HLS loop_tripcount max=5 min=0
+        //#pragma HLS pipeline II=1
+        if (num_nonzeros) {
+            ap_uint<6> coord = (ap_uint<6>)hls_unzigzag49[zz];
+
+            avg = compute_aavrg(above_present, strm_coef_above.read(), i != 0, strm_coef_left.read(),
+                                strm_coef_above_left.read());
+            ap_int<11> coef;
+            coef = strm_coef_here.read();
+
+            ap_int<11> abs_coef = hls::abs(coef);
+
+            ap_uint<4> bsr_best_prior;
+            bsr_best_prior = hls::min(16 - hls::abs(avg).countLeadingZeros(), 10);
+
+            ap_uint<6> num_nonzeros_bin;
+            num_nonzeros_bin = hls_nonzero_to_bin_9[num_nonzeros];
+
+            ap_uint<4> length;
+            length = 11 - abs_coef.countLeadingZeros();
+
+            // 2. write out
+            strm_7x7_length.write(length);
+
+            strm_num_nonzeros_bin.write(num_nonzeros_bin);
+            strm_bsr_best_prior.write(bsr_best_prior);
+            strm_abs_coef.write(abs_coef);
+            strm_coord.write(coord);
+            if (length != 0) {
+                strm_cur_bit_sign.write(coef >= 0 ? 1 : 0);
+                num_nonzeros--;
+            }
+            zz++;
+        }
+        // 3. update
+        if (!num_nonzeros) {
+            if (i < len - 1) {
+                num_nonzeros = strm_num_nonzeros.read();
+                strm_num_nonzeros_7x7.write(num_nonzeros);
+            }
+            zz = 0;
+            i++;
+        }
+        //}
+    }
+}
+
+// ------------------------------------------------------------
+void push_bit_7x7(ap_uint<32> len,
+                  hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+                  hls::stream<ap_uint<4> >& strm_7x7_length,
+
+                  hls::stream<ap_uint<4> >& strm_nonzero_bin,
+
+                  hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+                  hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+                  hls::stream<bool>& strm_cur_bit_sign,
+
+                  hls::stream<ap_uint<11> >& strm_abs_coef,
+                  hls::stream<ap_uint<6> >& strm_7x7_coord_nois,
+
+                  hls::stream<ap_uint<4> >& strm_sel_tab,
+                  hls::stream<bool>& strm_cur_bit,
+                  hls::stream<bool>& strm_e,
+                  hls::stream<ap_uint<16> >& strm_addr1,
+                  hls::stream<ap_uint<16> >& strm_addr2,
+                  hls::stream<ap_uint<16> >& strm_addr3,
+                  hls::stream<ap_uint<16> >& strm_addr4) {
+    ap_uint<5> serialized_so_far = 0;
+    ap_uint<32> j = 0;
+    ap_uint<6> nz;
+    ap_uint<4> length;
+    ap_uint<11> abs_coef;
+
+    while (j < len) {
+        serialized_so_far = 0;
+        ap_uint<6> nz = strm_num_nonzeros_7x7.read();
+        ap_uint<4> nonzero_bin_reg = strm_nonzero_bin.read();
+        for (int index = 5; index >= 0; --index) {
+#pragma HLS pipeline II = 1
+            strm_sel_tab.write(NZ_CNT_7x7);
+            bool cur_bit = nz[index];
+
+            strm_cur_bit.write(cur_bit);
+            strm_e.write(false);
+            strm_addr1.write(nonzero_bin_reg);
+            strm_addr2.write(index);
+            strm_addr3.write(serialized_so_far);
+            strm_addr4.write(0);
+
+            serialized_so_far <<= 1;
+            serialized_so_far.set(0, cur_bit);
+        }
+
+        int zz = 0;
+    EXPONENT_7X7_OUTER:
+        while (nz) {
+#pragma HLS loop_tripcount max = 5 min = 0
+            length = strm_7x7_length.read();
+            ap_uint<4> num_nonzeros_bin = strm_7x7_num_nonzero_bin.read();
+            ap_uint<4> bsr_best_prior = strm_7x7_bsr_best_prior.read();
+            int i = 0;
+        EXPONENT_7X7_INNER:
+            while (i < length + 1) {
+#pragma HLS loop_tripcount max = 3 min = 1
+#pragma HLS pipeline II = 1
+                bool cur_bit = (length != i);
+                strm_sel_tab.write(EXP_CNT);
+                strm_cur_bit.write(cur_bit);
+                strm_e.write(false);
+                strm_addr1.write(num_nonzeros_bin);
+                strm_addr2.write(zz);
+                strm_addr3.write(bsr_best_prior);
+                strm_addr4.write(i);
+                i++;
+            }
+
+            if (length != 0) {
+                strm_sel_tab.write(SIGN_CNT);
+                strm_cur_bit.write(strm_cur_bit_sign.read());
+                strm_e.write(false);
+                strm_addr1.write(0);
+                strm_addr2.write(0);
+                strm_addr3.write(0);
+                strm_addr4.write(0);
+                nz--;
+            }
+
+            abs_coef = strm_abs_coef.read();
+            unsigned int coord = strm_7x7_coord_nois.read();
+            int k = 0;
+        NOISE_7X7_INNER:
+            while (k <= length - 2) {
+#pragma HLS loop_tripcount max = 3 min = 1
+#pragma HLS pipeline II = 1
+                strm_sel_tab.write(NOIS_CNT);
+                strm_cur_bit.write(abs_coef[length - 2 - k]);
+                strm_e.write(false);
+                strm_addr1.write(coord);
+                strm_addr2.write(num_nonzeros_bin);
+                strm_addr3.write(length - 2 - k);
+                strm_addr4.write(0);
+                k++;
+            }
+
+            zz++;
+        }
+        strm_e.write(true);
+        j++;
+    }
+}
+
+// ------------------------------------------------------------
+void push_bit_7x7_v2(ap_uint<32> len,
+                     hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+                     hls::stream<ap_uint<4> >& strm_7x7_length,
+
+                     hls::stream<ap_uint<4> >& strm_nonzero_bin,
+
+                     hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+                     hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+                     hls::stream<bool>& strm_cur_bit_sign,
+
+                     hls::stream<ap_uint<11> >& strm_abs_coef,
+                     hls::stream<ap_uint<6> >& strm_7x7_coord_nois,
+
+                     hls::stream<ap_uint<4> >& strm_sel_tab,
+                     hls::stream<bool>& strm_cur_bit,
+                     hls::stream<short>& strm_len,
+                     //	hls::stream<bool>		 & strm_e,
+                     hls::stream<ap_uint<16> >& strm_addr1,
+                     hls::stream<ap_uint<16> >& strm_addr2,
+                     hls::stream<ap_uint<16> >& strm_addr3,
+                     hls::stream<ap_uint<16> >& strm_addr4) {
+    ap_uint<5> serialized_so_far = 0;
+    ap_uint<32> j = 0;
+    ap_uint<6> nz;
+    ap_uint<4> length;
+    ap_uint<11> abs_coef;
+    ap_uint<4> num_nonzeros_bin;
+    ap_uint<4> bsr_best_prior;
+    unsigned int coord;
+    ap_uint<4> nonzero_bin_reg;
+
+    ap_uint<8> state = 1;
+    ap_uint<8> index = 5;
+    int i = 0;
+    int k = 0;
+    int zz = 0;
+    bool quit = false;
+    short cnt = 0;
+
+    quit = false;
+    serialized_so_far = 0;
+    nz = strm_num_nonzeros_7x7.read();
+    nonzero_bin_reg = strm_nonzero_bin.read();
+    index = 5;
+    zz = 0;
+    cnt = 0;
+    state = 1;
+
+    while (!quit) {
+#pragma HLS pipeline II = 1
+        if (state == 1) {
+            strm_sel_tab.write(NZ_CNT_7x7);
+            bool cur_bit = nz[index];
+
+            strm_cur_bit.write(cur_bit);
+            cnt++;
+            strm_addr1.write(nonzero_bin_reg);
+            strm_addr2.write(index);
+            strm_addr3.write(serialized_so_far);
+            strm_addr4.write(0);
+
+            serialized_so_far <<= 1;
+            serialized_so_far.set(0, cur_bit);
+            if (index == 0 && nz != 0) {
+                length = strm_7x7_length.read();
+                num_nonzeros_bin = strm_7x7_num_nonzero_bin.read();
+                bsr_best_prior = strm_7x7_bsr_best_prior.read();
+
+                abs_coef = strm_abs_coef.read();
+                coord = strm_7x7_coord_nois.read();
+                i = 0;
+                state = 3;
+            } else if (index == 0 && nz == 0 && j != len - 1) {
+                strm_len.write(cnt);
+                serialized_so_far = 0;
+                nz = strm_num_nonzeros_7x7.read();
+                nonzero_bin_reg = strm_nonzero_bin.read();
+                index = 5;
+                zz = 0;
+                cnt = 0;
+                j++;
+            } else if (index == 0 && nz == 0 && j == len - 1) {
+                zz++;
+                strm_len.write(cnt);
+                cnt = 0;
+                quit = true;
+            } else {
+                index--;
+            }
+        } else if (state == 3) {
+            bool cur_bit = (length != i);
+            strm_sel_tab.write(EXP_CNT);
+            strm_cur_bit.write(cur_bit);
+            cnt++;
+            strm_addr1.write(num_nonzeros_bin);
+            strm_addr2.write(zz);
+            strm_addr3.write(bsr_best_prior);
+            strm_addr4.write(i);
+            if (i == length && length != 0) {
+                state = 4;
+            } else if (i == length && length == 0 && nz != 0) {
+                zz++;
+                length = strm_7x7_length.read();
+                num_nonzeros_bin = strm_7x7_num_nonzero_bin.read();
+                bsr_best_prior = strm_7x7_bsr_best_prior.read();
+
+                abs_coef = strm_abs_coef.read();
+                coord = strm_7x7_coord_nois.read();
+                i = 0;
+                state = 3;
+            } else if (i == length && length == 0 && nz == 0 && j != len - 1) {
+                strm_len.write(cnt);
+                serialized_so_far = 0;
+                nz = strm_num_nonzeros_7x7.read();
+                nonzero_bin_reg = strm_nonzero_bin.read();
+                index = 5;
+                zz = 0;
+                cnt = 0;
+                j++;
+                state = 1;
+            } else if (i == length && length == 0 && nz == 0 && j == len - 1) {
+                strm_len.write(cnt);
+                cnt = 0;
+                quit = true;
+            } else {
+                i++;
+            }
+        } else if (state == 4) {
+            strm_sel_tab.write(SIGN_CNT);
+            strm_cur_bit.write(strm_cur_bit_sign.read());
+            cnt++;
+            strm_addr1.write(0);
+            strm_addr2.write(0);
+            strm_addr3.write(0);
+            strm_addr4.write(0);
+            nz--;
+            k = 0;
+            if (k <= length - 2) {
+                state = 5;
+            } else if (nz != 0) {
+                zz++;
+                length = strm_7x7_length.read();
+                num_nonzeros_bin = strm_7x7_num_nonzero_bin.read();
+                bsr_best_prior = strm_7x7_bsr_best_prior.read();
+
+                abs_coef = strm_abs_coef.read();
+                coord = strm_7x7_coord_nois.read();
+                i = 0;
+                state = 3;
+            } else if (nz == 0 && j != len - 1) {
+                strm_len.write(cnt);
+                serialized_so_far = 0;
+                nz = strm_num_nonzeros_7x7.read();
+                nonzero_bin_reg = strm_nonzero_bin.read();
+                index = 5;
+                zz = 0;
+                cnt = 0;
+                j++;
+                state = 1;
+            } else if (nz == 0 && j == len - 1) {
+                strm_len.write(cnt);
+                cnt = 0;
+                quit = true;
+            }
+        } else if (state == 5) {
+            strm_sel_tab.write(NOIS_CNT);
+            strm_cur_bit.write(abs_coef[length - 2 - k]);
+            cnt++;
+            strm_addr1.write(coord);
+            strm_addr2.write(num_nonzeros_bin);
+            strm_addr3.write(length - 2 - k);
+            strm_addr4.write(0);
+            if (k == length - 2 && nz != 0) {
+                zz++;
+                length = strm_7x7_length.read();
+                num_nonzeros_bin = strm_7x7_num_nonzero_bin.read();
+                bsr_best_prior = strm_7x7_bsr_best_prior.read();
+
+                abs_coef = strm_abs_coef.read();
+                coord = strm_7x7_coord_nois.read();
+                i = 0;
+                state = 3;
+
+            } else if (k == length - 2 && nz == 0 && j != len - 1) {
+                strm_len.write(cnt);
+                serialized_so_far = 0;
+                nz = strm_num_nonzeros_7x7.read();
+                nonzero_bin_reg = strm_nonzero_bin.read();
+                index = 5;
+                zz = 0;
+                cnt = 0;
+                j++;
+                state = 1;
+            } else if (k == length - 2 && nz == 0 && j == len - 1) {
+                strm_len.write(cnt);
+                cnt = 0;
+                quit = true;
+            }
+            k++;
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void hls_serialize_tokens_77(ap_uint<32> len,
+                             bool above_present,
+                             hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+                             hls::stream<ap_int<11> >& strm_coef_here,
+                             hls::stream<ap_int<11> >& strm_coef_above,
+                             hls::stream<ap_int<11> >& strm_coef_left,
+                             hls::stream<ap_int<11> >& strm_coef_above_left,
+
+                             hls::stream<ap_uint<6> >& strm_nz_cur,
+                             hls::stream<ap_uint<6> >& strm_nz_abv,
+                             hls::stream<ap_uint<6> >& strm_nz_lft,
+
+                             hls::stream<ap_uint<4> >& strm_sel_tab,
+                             hls::stream<bool>& strm_cur_bit,
+                             hls::stream<short>& strm_len,
+                             //	hls::stream<bool>		 & strm_e,
+                             hls::stream<ap_uint<16> >& strm_addr1,
+                             hls::stream<ap_uint<16> >& strm_addr2,
+                             hls::stream<ap_uint<16> >& strm_addr3,
+                             hls::stream<ap_uint<16> >& strm_addr4
+
+                             ) {
+#pragma HLS INLINE
+#pragma HLS dataflow
+
+    // clang-format off
+    static hls::stream<ap_uint<11> > strm_abs_coef("coef_abs");
+#pragma HLS stream depth=32 variable=strm_abs_coef
+
+    hls::stream<ap_uint<4> > strm_nonzero_bin_tmp("strm_nz_bin");
+#pragma HLS stream depth=32 variable=strm_nonzero_bin_tmp
+    hls::stream<ap_uint<6> > strm_7x7_nz("strm_77_nz");
+#pragma HLS stream depth=32 variable=strm_7x7_nz
+    hls::stream<ap_uint<4> > strm_7x7_length("strm_77_len");
+#pragma HLS stream depth=32 variable=strm_7x7_length
+
+    hls::stream<ap_uint<4> > strm_7x7_num_nonzero_bin("strm_nz");
+#pragma HLS stream depth=32 variable=strm_7x7_num_nonzero_bin
+    hls::stream<ap_uint<4> > strm_7x7_bsr_best_prior("strm_bsr");
+#pragma HLS stream depth=32 variable=strm_7x7_bsr_best_prior
+
+    hls::stream<bool> strm_7x7_cur_bit_sign_tmp("strm_sign_bit");
+#pragma HLS stream depth=32 variable=strm_7x7_cur_bit_sign_tmp
+    hls::stream<ap_uint<6> > strm_coord("strm_coord");
+#pragma HLS stream depth=32 variable=strm_coord
+    // clang-format on
+
+    encode_num_nonzero_7x7(len, above_present, strm_nz_cur, strm_nz_lft, strm_nz_abv, strm_nonzero_bin_tmp);
+
+    prepare_7x7(len, strm_coef_here, above_present, strm_coef_above, strm_coef_left, strm_coef_above_left,
+                strm_num_nonzeros_7x7,
+
+                strm_7x7_nz, strm_7x7_length,
+
+                strm_7x7_num_nonzero_bin, strm_7x7_bsr_best_prior,
+
+                strm_7x7_cur_bit_sign_tmp,
+
+                strm_abs_coef, strm_coord);
+
+    push_bit_7x7_v2(len, strm_7x7_nz, strm_7x7_length,
+
+                    strm_nonzero_bin_tmp,
+
+                    strm_7x7_num_nonzero_bin, strm_7x7_bsr_best_prior,
+
+                    strm_7x7_cur_bit_sign_tmp,
+
+                    strm_abs_coef, strm_coord,
+
+                    strm_sel_tab, strm_cur_bit, strm_len,
+                    //		strm_e,
+                    strm_addr1, strm_addr2, strm_addr3, strm_addr4);
+}
+
+// ------------------------------------------------------------
+void pre_serialize_tokens_77(ap_uint<32> len,
+                             bool above_present,
+                             hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+                             hls::stream<ap_int<11> >& strm_coef_here,
+                             hls::stream<ap_int<11> >& strm_coef_above,
+                             hls::stream<ap_int<11> >& strm_coef_left,
+                             hls::stream<ap_int<11> >& strm_coef_above_left,
+
+                             hls::stream<ap_uint<6> >& strm_nz_cur,
+                             hls::stream<ap_uint<6> >& strm_nz_abv,
+                             hls::stream<ap_uint<6> >& strm_nz_lft,
+
+                             hls::stream<ap_uint<4> >& strm_nonzero_bin_tmp,
+
+                             hls::stream<ap_uint<6> >& strm_7x7_nz,
+                             hls::stream<ap_uint<4> >& strm_7x7_length,
+
+                             hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+                             hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+                             hls::stream<bool>& strm_7x7_cur_bit_sign_tmp,
+
+                             hls::stream<ap_uint<11> >& strm_abs_coef,
+                             hls::stream<ap_uint<6> >& strm_coord
+
+                             ) {
+#pragma HLS INLINE
+#pragma HLS DATAFLOW
+
+    encode_num_nonzero_7x7(len, above_present, strm_nz_cur, strm_nz_lft, strm_nz_abv, strm_nonzero_bin_tmp);
+
+    prepare_7x7(len, strm_coef_here, above_present, strm_coef_above, strm_coef_left, strm_coef_above_left,
+                strm_num_nonzeros_7x7,
+
+                strm_7x7_nz, strm_7x7_length,
+
+                strm_7x7_num_nonzero_bin, strm_7x7_bsr_best_prior,
+
+                strm_7x7_cur_bit_sign_tmp,
+
+                strm_abs_coef, strm_coord);
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/XAcc_arith.cpp b/codec/L2/demos/leptonEnc/kernel/XAcc_arith.cpp
new file mode 100644
index 0000000000..5bb06e3202
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/XAcc_arith.cpp
@@ -0,0 +1,562 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// XAcc_arith.cpp
+#include "XAcc_arith.hpp"
+
+////TMEP
+namespace xf {
+namespace codec {
+namespace details {
+
+// ------------------------------------------------------------
+void vpx_enc_range_noDSP(
+    // void vpx_enc_range(
+
+    // input
+    unsigned char* br_range,
+    hls::stream<bool>& strm_bit,
+    hls::stream<uint8_t>& strm_prob,
+    hls::stream<bool>& strm_e_range,
+    hls::stream<uint8_t>& strm_tab_dbg,
+    // output
+    hls::stream<bool>& strm_range_o_e,
+    hls::stream<ap_uint<3> >& strm_range_o_shift,
+    hls::stream<unsigned char>& strm_range_o_split) { // Iteration for variable range
+    unsigned char range = *br_range;
+    bool e_range = strm_e_range.read();
+    while (!e_range) {
+#pragma HLS pipeline II = 1
+        e_range = strm_e_range.read();
+        bool bit1 = strm_bit.read();
+        uint8_t prob = strm_prob.read();
+
+        unsigned char split1, split_1;
+        ap_uint<3> shift1;
+        //#pragma HLS bind_storage variable = split_1 core = Mul_LUT
+        split_1 = (((range - 1) * prob) >> 8);
+
+        if (bit1)
+            range = range - split_1 - 1;
+        else
+            range = split_1 + 1;
+
+        ap_uint<8> range2 = range;
+        shift1 = range2.countLeadingZeros();
+        range <<= shift1;
+
+        if (bit1 || shift1) {
+            strm_range_o_shift.write(shift1);
+            strm_range_o_split.write(bit1 == true ? split_1 + 1 : 0);
+            strm_range_o_e.write(false);
+        }
+        uint8_t dbg = strm_tab_dbg.read();
+    }
+    strm_range_o_e.write(true);
+    *br_range = range;
+} // End of iteration for range /////////////////////////////////////////////////
+
+// ------------------------------------------------------------
+void vpx_enc_range_org_DSP_NorLzd(
+    // input
+    unsigned char* br_range,
+    hls::stream<bool>& strm_bit,
+    hls::stream<uint8_t>& strm_prob,
+    hls::stream<bool>& strm_e_range,
+    hls::stream<uint8_t>& strm_tab_dbg,
+    // output
+    hls::stream<bool>& strm_range_o_e,
+    hls::stream<ap_uint<3> >& strm_range_o_shift,
+    hls::stream<unsigned char>& strm_range_o_split) { // Iteration for variable range
+    unsigned char range = *br_range;
+    bool e_range = strm_e_range.read();
+    while (!e_range) {
+#pragma HLS pipeline II = 1
+        e_range = strm_e_range.read();
+        bool bit1 = strm_bit.read();
+        uint8_t prob = strm_prob.read();
+
+        unsigned char split1;
+        ap_uint<3> shift1;
+        split1 = 1 + (((range - 1) * prob) >> 8);
+
+        if (bit1)
+            range = range - split1;
+        else
+            range = split1;
+
+        ap_uint<8> range2 = range;
+        shift1 = range2.countLeadingZeros();
+        range <<= shift1;
+
+        if (bit1 || shift1) {
+            strm_range_o_shift.write(shift1);
+            strm_range_o_split.write(bit1 == true ? split1 : 0);
+            strm_range_o_e.write(false);
+        }
+        uint8_t dbg = strm_tab_dbg.read();
+    }
+    strm_range_o_e.write(true);
+    *br_range = range;
+} // End of iteration for range /////////////////////////////////////////////////
+
+// ------------------------------------------------------------
+void vpx_enc_range_DSP_fastLzd(
+    // input
+    unsigned char* br_range,
+    hls::stream<bool>& strm_bit,
+    hls::stream<uint8_t>& strm_prob,
+    hls::stream<bool>& strm_e_range,
+    hls::stream<uint8_t>& strm_tab_dbg,
+    // output
+    hls::stream<bool>& strm_range_o_e,
+    hls::stream<ap_uint<3> >& strm_range_o_shift,
+    hls::stream<unsigned char>& strm_range_o_split) { // Iteration for variable range
+    const ap_uint<2> tt[16] = {0, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
+    unsigned char range = *br_range;
+    bool e_range = strm_e_range.read();
+    while (!e_range) {
+#pragma HLS pipeline II = 1
+        e_range = strm_e_range.read();
+        bool bit1 = strm_bit.read();
+        uint8_t prob = strm_prob.read();
+
+        unsigned char split1;
+        ap_uint<3> shift1;
+        split1 = 1 + (((range - 1) * prob) >> 8);
+
+        unsigned char range0, range1;
+        range1 = range - split1;
+        range0 = split1;
+
+        ap_uint<3> s0 = 0;
+        ap_uint<3> s1 = 0;
+        if (range0 & 0xF0)
+            s0 = tt[range0 >> 4];
+        else
+            s0 = 4 + tt[range0];
+        if (range1 & 0xF0)
+            s1 = tt[range1 >> 4];
+        else
+            s1 = 4 + tt[range1];
+        range0 <<= s0;
+        range1 <<= s1;
+
+        if (bit1) {
+            range = range1;
+            shift1 = s1;
+        } else {
+            range = range0;
+            shift1 = s0;
+        }
+
+        if (bit1 || shift1) {
+            strm_range_o_shift.write(shift1);
+            strm_range_o_split.write(bit1 == true ? split1 : 0);
+            strm_range_o_e.write(false);
+        }
+        uint8_t dbg = strm_tab_dbg.read();
+    }
+    strm_range_o_e.write(true);
+    *br_range = range;
+} // End of iteration for range /////////////////////////////////////////////////
+// void vpx_enc_range_NoDsp_tt8Lzd(// No dsp, faster Leading zero detecting
+
+// ------------------------------------------------------------
+void vpx_enc_range(
+    // input
+    unsigned char* br_range,
+    hls::stream<bool>& strm_bit,
+    hls::stream<uint8_t>& strm_prob,
+    hls::stream<bool>& strm_e_range,
+    hls::stream<uint8_t>& strm_tab_dbg,
+    // output
+    hls::stream<bool>& strm_range_o_e,
+    hls::stream<ap_uint<3> >& strm_range_o_shift,
+    hls::stream<unsigned char>& strm_range_o_split) { // Iteration for variable range
+    const ap_uint<2> tt[16] = {0, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
+    unsigned char range = *br_range;
+    bool e_range = strm_e_range.read();
+    while (!e_range) {
+#pragma HLS pipeline II = 1
+        e_range = strm_e_range.read();
+        bool bit1 = strm_bit.read();
+        uint8_t prob = strm_prob.read();
+
+        unsigned char split_1;
+        ap_uint<3> shift1;
+        //#pragma HLS bind_storage variable = split_1 core = Mul_LUT
+        // split1 = 1 + (((range - 1) * prob) >> 8);
+        split_1 = (((range - 1) * prob) >> 8);
+        unsigned char range0, range1;
+        range1 = range - split_1 - 1;
+        range0 = split_1 + 1;
+
+        ap_uint<3> s0 = 0;
+        ap_uint<3> s1 = 0;
+        if (range0 & 0xF0)
+            s0 = tt[range0 >> 4];
+        else
+            s0 = 4 + tt[range0];
+        if (range1 & 0xF0)
+            s1 = tt[range1 >> 4];
+        else
+            s1 = 4 + tt[range1];
+        range0 <<= s0;
+        range1 <<= s1;
+
+        if (bit1) {
+            range = range1;
+            shift1 = s1;
+        } else {
+            range = range0;
+            shift1 = s0;
+        }
+
+        if (bit1 || shift1) {
+            strm_range_o_shift.write(shift1);
+            strm_range_o_split.write(bit1 == true ? split_1 + 1 : 0);
+            strm_range_o_e.write(false);
+        }
+        uint8_t dbg = strm_tab_dbg.read();
+    }
+    strm_range_o_e.write(true);
+    *br_range = range;
+} // End of iteration for range /////////////////////////////////////////////////
+
+// void vpx_enc_range(
+
+// ------------------------------------------------------------
+void vpx_enc_range_lut_lzd(
+    // input
+    unsigned char* br_range,
+    hls::stream<bool>& strm_bit,
+    hls::stream<uint8_t>& strm_prob,
+    hls::stream<bool>& strm_e_range,
+    hls::stream<uint8_t>& strm_tab_dbg,
+    // output
+    hls::stream<bool>& strm_range_o_e,
+    hls::stream<ap_uint<3> >& strm_range_o_shift,
+    hls::stream<unsigned char>& strm_range_o_split) { // Iteration for variable range
+    const ap_uint<2> tt[16] = {0, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
+    const ap_uint<3> tt256[256] = {
+        0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    const ap_uint<3> tt128[128] = {0, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                   2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    const ap_uint<3> tt64[64] = {0, 5, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    unsigned char range = *br_range;
+    bool e_range = strm_e_range.read();
+    while (!e_range) {
+#pragma HLS pipeline II = 1
+        e_range = strm_e_range.read();
+        bool bit1 = strm_bit.read();
+        uint8_t prob = strm_prob.read();
+
+        unsigned char split_1;
+        ap_uint<3> shift1;
+        //#pragma HLS bind_storage variable = split_1 core = Mul_LUT
+        // split1 = 1 + (((range - 1) * prob) >> 8);
+        split_1 = (((range - 1) * prob) >> 8);
+        unsigned char range0, range1;
+        range1 = range - split_1 - 1;
+        range0 = split_1 + 1;
+
+        ap_uint<3> s0 = 0;
+        ap_uint<3> s1 = 0;
+        // if (range0&0xF0) s0 = tt[range0>>4];
+        // else s0 = 4 + tt[range0];
+        // if (range1&0xF0) s1 = tt[range1>>4];
+        // else s1 = 4 + tt[range1];
+        ////////////////////////////
+        s0 = tt256[range0];
+        s1 = tt256[range1];
+        ///////////////////////////////////////
+        // if (range0&0xFc) s0 = tt64[range0>>2];
+        // else s0 = 2 + tt64[range0];
+        // if (range1&0xFc) s1 = tt64[range1>>2];
+        // else s1 = 2 + tt64[range1];
+
+        range0 <<= s0;
+        range1 <<= s1;
+
+        if (bit1) {
+            range = range1;
+            shift1 = s1;
+        } else {
+            range = range0;
+            shift1 = s0;
+        }
+
+        if (bit1 || shift1) {
+            strm_range_o_shift.write(shift1);
+            strm_range_o_split.write(bit1 == true ? split_1 + 1 : 0);
+            strm_range_o_e.write(false);
+        }
+        uint8_t dbg = strm_tab_dbg.read();
+    }
+    strm_range_o_e.write(true);
+    *br_range = range;
+} // End of iteration for range /////////////////////////////////////////////////
+
+// ------------------------------------------------------------
+void vpx_enc_value(
+    //
+    int* br_count,
+    unsigned int* br_lowvalue,
+    hls::stream<bool>& strm_range_o_e,
+    hls::stream<ap_uint<3> >& strm_range_o_shift,
+    hls::stream<unsigned char>& strm_range_o_split,
+    // Outout ////////////////////
+    hls::stream<bool>& strm_value_o_e,
+    hls::stream<bool>& strm_value_o_cy,
+    hls::stream<unsigned char>& strm_value_o_byte) { // Iteration for variable value and count
+    unsigned char cnt24 = *br_count + 24;
+    ap_uint<32> value = *br_lowvalue;
+    // Pre-reading//////////////////////
+    bool e_value = strm_range_o_e.read();
+    while (!e_value) {
+#pragma HLS pipeline II = 1
+        e_value = strm_range_o_e.read();
+        ap_uint<3> shift = strm_range_o_shift.read();
+        unsigned char split = strm_range_o_split.read();
+
+        value += split;
+        unsigned char pre_byte = value(cnt24 + 7, cnt24);
+        bool cy = value[cnt24 + 8];
+        ap_uint<32> value2;
+
+        if (cnt24 > 0) value2(cnt24 - 1, 0) = value(cnt24 - 1, 0);
+
+        value2(31, cnt24) = 0;
+
+        bool isBE24 = (cnt24 + shift) >= 24;
+        int cnt_sh = cnt24 + shift;
+
+        if (isBE24) {
+            cnt24 = cnt24 + shift - 8;
+            value2 <<= shift;
+            value = value2;
+
+            strm_value_o_e.write(false);
+            strm_value_o_cy.write(cy);
+            strm_value_o_byte.write(pre_byte);
+        } else {
+            cnt24 = cnt24 + shift;
+            value <<= shift;
+        }
+    }
+    strm_value_o_e.write(true);
+    *br_lowvalue = value;
+    *br_count = (int)cnt24 - 24;
+}
+
+// ------------------------------------------------------------
+void vpx_enc_run(unsigned char* br_pre_byte,
+                 unsigned short* br_run,
+                 bool* br_isFirst,
+                 hls::stream<bool>& strm_value_o_e,
+                 hls::stream<bool>& strm_value_o_cy,
+                 hls::stream<unsigned char>& strm_value_o_byte,
+                 // Outout ////////////////////
+                 hls::stream<bool>& strm_CyByte_o_e,
+                 hls::stream<bool>& strm_CyByte_o_cy,
+                 hls::stream<unsigned char>& strm_CyByte_o_byte,
+                 hls::stream<unsigned short>& strm_CyByte_o_run) { // Iteration for variable
+    unsigned char pre_byte = *br_pre_byte;
+    unsigned short run = *br_run;
+    // Pre-reading//////////////////////
+    bool e_cy_preByte = strm_value_o_e.read();
+    while (!e_cy_preByte) {
+        e_cy_preByte = strm_value_o_e.read();
+        bool cy = strm_value_o_cy.read();
+        unsigned char new_byte = strm_value_o_byte.read();
+        if (*br_isFirst) {
+            pre_byte = new_byte;
+            *br_isFirst = false;
+        } else {
+            if (new_byte == 0xff) {
+                run++;
+            } else { // if not 0xff, must emit pre_byte and run-byte if any
+                // br.buffer[pos++] = pre_byte + (cy?1:0);
+                // for(; run > 0; run--)
+                //	br.buffer[pos++] = cy?0:0xff;
+                strm_CyByte_o_cy.write(cy);
+                strm_CyByte_o_byte.write(pre_byte);
+                strm_CyByte_o_run.write(run);
+                strm_CyByte_o_e.write(false);
+                pre_byte = new_byte;
+                run = 0;
+            }
+        }
+
+    } // while
+    strm_CyByte_o_e.write(true);
+    *br_pre_byte = pre_byte;
+    *br_run = run;
+}
+
+// ------------------------------------------------------------
+void vpx_enc_pos(unsigned int* br_pos,
+                 hls::stream<bool>& strm_CyByte_o_e,
+                 hls::stream<bool>& strm_CyByte_o_cy,
+                 hls::stream<unsigned char>& strm_CyByte_o_byte,
+                 hls::stream<unsigned short>& strm_CyByte_o_run,
+                 // Outout ////////////////////
+                 hls::stream<bool>& strm_pos_o_e,
+                 hls::stream<ap_uint<8> >& strm_pos_o_byte) { // Iteration for variable pos
+    unsigned int pos = *br_pos;
+    // Pre-reading//////////////////////
+    bool e_pos = strm_CyByte_o_e.read();
+    while (!e_pos) {
+        e_pos = strm_CyByte_o_e.read();
+        bool cy = strm_CyByte_o_cy.read();
+        unsigned char byte = strm_CyByte_o_byte.read();
+        unsigned char rn = strm_CyByte_o_run.read();
+        strm_pos_o_byte.write(byte + (cy ? 1 : 0));
+#ifndef __SYNTHESIS__
+//				uint8_t test1 = byte + (cy?1:0);
+//				fprintf( stderr,  " %.4x\n" , test1);
+#endif
+        pos++;
+        strm_pos_o_e.write(false);
+        for (; rn > 0; rn--) {
+            strm_pos_o_byte.write(cy ? 0 : 0xff);
+            pos++;
+            strm_pos_o_e.write(false);
+        }
+    }
+    *br_pos = pos;
+    // strm_pos_o_e.write(true);
+}
+
+// ------------------------------------------------------------
+void vpx_enc_syn(
+    // Iteration for variable
+    unsigned char* range,
+    int* cnt,
+    unsigned int* value,
+    unsigned char* pre_byte,
+    unsigned short* run,
+    bool* br_isFirst,
+    unsigned int* pos,
+    // input
+    hls::stream<bool>& strm_bit,
+    hls::stream<uint8_t>& strm_prob,
+    hls::stream<bool>& strm_e_range,
+    hls::stream<uint8_t>& strm_tab_dbg,
+    // output
+    hls::stream<bool>& strm_pos_o_e,
+    hls::stream<ap_uint<8> >& strm_pos_o_byte) {
+#pragma HLS dataflow
+    // clang-format off
+	 hls::stream< bool >          strm_range_o_e;
+	#pragma HLS stream depth=64 variable=strm_range_o_e
+	#pragma HLS bind_storage variable=strm_range_o_e type=FIFO impl=LUTRAM
+	    hls::stream< ap_uint<3> >    strm_range_o_shift;
+	#pragma HLS stream depth=64 variable=strm_range_o_shift
+	#pragma HLS bind_storage variable=strm_range_o_shift type=FIFO impl=LUTRAM
+	    hls::stream< unsigned char > strm_range_o_split;
+	#pragma HLS stream depth=64 variable=strm_range_o_split
+	#pragma HLS bind_storage variable=strm_range_o_split type=FIFO impl=LUTRAM
+    // clang-format on
+
+    vpx_enc_range(
+        // input
+        range,        // unsigned char* br_range,
+        strm_bit,     // hls::stream<bool>&    strm_bit,
+        strm_prob,    // hls::stream<uint8_t>& strm_prob,
+        strm_e_range, // hls::stream<bool>&    strm_e_range,
+        strm_tab_dbg, // hls::stream<uint8_t>& strm_tab_dbg,
+        // output
+        strm_range_o_e,     // hls::stream< bool >          &strm_range_o_e,
+        strm_range_o_shift, // hls::stream< ap_uint<3> >    &strm_range_o_shift,
+        strm_range_o_split  // hls::stream< unsigned char > &strm_range_o_split
+        );
+
+    // clang-format off
+    hls::stream< bool >          strm_value_o_e;
+#pragma HLS stream depth=64 variable=strm_value_o_e
+#pragma HLS bind_storage variable=strm_value_o_e type=FIFO impl=LUTRAM
+    hls::stream< bool >          strm_value_o_cy;
+#pragma HLS stream depth=64 variable=strm_value_o_cy
+#pragma HLS bind_storage variable=strm_value_o_cy type=FIFO impl=LUTRAM
+    hls::stream< unsigned char > strm_value_o_byte;
+#pragma HLS stream depth=64 variable=strm_value_o_byte
+#pragma HLS bind_storage variable=strm_value_o_byte type=FIFO impl=LUTRAM
+    // clang-format on
+
+    vpx_enc_value(cnt, value,
+                  strm_range_o_e,     // hls::stream< bool >          &strm_range_o_e,
+                  strm_range_o_shift, // hls::stream< ap_uint<3> >    &strm_range_o_shift,
+                  strm_range_o_split, // hls::stream< unsigned char > &strm_range_o_split,
+                  // Outout ////////////////////
+                  strm_value_o_e,   // hls::stream< bool >          &strm_value_o_e,
+                  strm_value_o_cy,  // hls::stream< bool >          &strm_value_o_cy,
+                  strm_value_o_byte // hls::stream< unsigned char > &strm_value_o_byte
+                  );
+
+    // clang-format off
+    hls::stream< bool >          strm_CyByte_o_e;
+#pragma HLS stream depth=64 variable=strm_CyByte_o_e
+#pragma HLS bind_storage variable=strm_CyByte_o_e type=FIFO impl=LUTRAM
+    hls::stream< bool >          strm_CyByte_o_cy;
+#pragma HLS stream depth=64 variable=strm_CyByte_o_cy
+#pragma HLS bind_storage variable=strm_CyByte_o_cy type=FIFO impl=LUTRAM
+    hls::stream< unsigned char > strm_CyByte_o_byte;
+#pragma HLS stream depth=64 variable=strm_CyByte_o_byte
+#pragma HLS bind_storage variable=strm_CyByte_o_byte type=FIFO impl=LUTRAM
+    hls::stream< unsigned short> strm_CyByte_o_run;
+#pragma HLS stream depth=64 variable=strm_CyByte_o_run
+#pragma HLS bind_storage variable=strm_CyByte_o_run type=FIFO impl=LUTRAM
+    // clang-format on
+
+    vpx_enc_run(pre_byte,          // unsigned char  *br_pre_byte,
+                run,               // unsigned short *br_run,
+                br_isFirst,        // bool* br_isFirst,
+                strm_value_o_e,    // hls::stream< bool >          &strm_value_o_e,
+                strm_value_o_cy,   // hls::stream< bool >          &strm_value_o_cy,
+                strm_value_o_byte, // hls::stream< unsigned char > &strm_value_o_byte,
+                // Outout ////////////////////
+                strm_CyByte_o_e,    // hls::stream< bool >          &strm_CyByte_o_e,
+                strm_CyByte_o_cy,   // hls::stream< bool >          &strm_CyByte_o_cy,
+                strm_CyByte_o_byte, // hls::stream< unsigned char > &strm_CyByte_o_byte,
+                strm_CyByte_o_run   // hls::stream< unsigned short> &strm_CyByte_o_run
+                );
+
+    vpx_enc_pos(pos,                // unsigned int* br_pos,
+                strm_CyByte_o_e,    // hls::stream< bool >          &strm_CyByte_o_e,
+                strm_CyByte_o_cy,   // hls::stream< bool >          &strm_CyByte_o_cy,
+                strm_CyByte_o_byte, // hls::stream< unsigned char > &strm_CyByte_o_byte,
+                strm_CyByte_o_run,  // hls::stream< unsigned short> &strm_CyByte_o_run,
+                // Outout ////////////////////
+                strm_pos_o_e,   // hls::stream< bool >          strm_pos_o_e,
+                strm_pos_o_byte // hls::stream< unsigned char > strm_pos_o_byte
+                );
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/XAcc_common.cpp b/codec/L2/demos/leptonEnc/kernel/XAcc_common.cpp
new file mode 100644
index 0000000000..994b7253d0
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/XAcc_common.cpp
@@ -0,0 +1,992 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "XAcc_common.hpp"
+#include "XAcc_model.hpp"
+#include "XAcc_lepjpegdecoder.hpp"
+#include "XAcc_lepjfifparser.hpp"
+#include "XAcc_arith.hpp"
+#include "XAcc_77.hpp"
+#include "XAcc_edges.hpp"
+#include "XAcc_dc.hpp"
+#include "stream_to_axi.hpp"
+
+namespace xf {
+namespace codec {
+namespace details {
+
+// ------------------------------------------------------------
+void pre_lepton_encoder_line(ap_uint<32> len,
+                             uint8_t id_cmp,
+                             bool is_top_row_cmp,
+
+                             uint8_t q_tables0[MAX_NUM_COLOR][8][8],
+                             uint8_t q0,
+                             int32_t idct_q_table_x[3][8][8],
+                             int32_t idct_q_table_y[3][8][8],
+                             uint8_t min_nois_thld_x[3][64],
+                             uint8_t min_nois_thld_y[3][64],
+
+                             hls::stream<ap_int<11> > coef[8],
+
+                             // 7x7
+                             hls::stream<ap_uint<4> >& strm_nonzero_bin_tmp,
+
+                             hls::stream<ap_uint<6> >& strm_7x7_nz,
+                             hls::stream<ap_uint<4> >& strm_7x7_length,
+
+                             hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+                             hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+                             hls::stream<bool>& strm_7x7_cur_bit_sign_tmp,
+
+                             hls::stream<ap_uint<11> >& strm_abs_coef,
+                             hls::stream<ap_uint<6> >& strm_coord,
+
+                             // edge
+                             // from preprossess to edge
+                             hls::stream<ap_uint<6> >& non_zero_h_out,
+                             hls::stream<ap_uint<3> >& coef_cnt_h_len,
+                             hls::stream<ap_uint<3> >& coef_cnt_v_len,
+                             hls::stream<ap_uint<3> >& eob_x,
+                             hls::stream<ap_uint<3> >& eob_y,
+
+                             hls::stream<ap_uint<4> >& strm_length_exp_h,
+
+                             hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+                             hls::stream<bool>& strm_cur_bit_sign_h,
+                             hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+                             hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+                             hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+                             hls::stream<ap_uint<8> >& strm_min_nois_h,
+                             hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+                             hls::stream<ap_uint<4> >& strm_length_exp_v,
+
+                             hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+                             hls::stream<bool>& strm_cur_bit_sign_v,
+                             hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+                             hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+                             hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+                             hls::stream<ap_uint<8> >& strm_min_nois_v,
+                             hls::stream<ap_uint<6> >& strm_coord_nois_v,
+
+                             // dc
+                             hls::stream<int16_t>& strm_coef_dc,
+                             hls::stream<int>& strm_uncertainty,
+                             hls::stream<int>& strm_uncertainty2) {
+#pragma HLS DATAFLOW
+// clang-format off
+ #pragma HLS ARRAY_PARTITION variable = idct_q_table_x complete dim=3
+ #pragma HLS ARRAY_PARTITION variable = idct_q_table_y complete dim=3
+ #pragma HLS ARRAY_PARTITION variable = q_tables0 		complete dim=2
+    // clang-format on
+
+    // clang-format off
+     hls::stream<ap_int<11> > 			strm_coef[8];
+#pragma HLS stream depth=1024 variable=	strm_coef
+#pragma HLS bind_storage variable = 		strm_coef type=FIFO impl=LUTRAM
+#pragma HLS array_partition variable=   strm_coef complete dim=0
+
+	hls::stream< coef_t> str_rast8[8];
+#pragma HLS stream depth=1024 variable=str_rast8
+#pragma HLS bind_storage variable=str_rast8 type=FIFO impl=LUTRAM
+#pragma HLS array_partition variable=str_rast8 complete dim=0
+
+	hls::stream< coef_t> str_dc_in;
+#pragma HLS stream depth=1024 variable=str_dc_in
+#pragma HLS bind_storage variable=str_dc_in type=FIFO impl=LUTRAM
+    // clang-format on
+    duplicate_coef(coef, len, strm_coef, str_rast8, str_dc_in);
+
+    // clang-format off
+     static hls::stream<ap_uint<6> > 	 strm_num_nonzeros_7x7;
+#pragma HLS stream depth=256   variable = strm_num_nonzeros_7x7
+#pragma HLS bind_storage 		   variable = strm_num_nonzeros_7x7 type=FIFO impl=LUTRAM
+
+     static hls::stream<ap_int<11> > 	   strm_coef_here;
+#pragma HLS stream depth=256    variable = strm_coef_here
+#pragma HLS bind_storage 			variable = strm_coef_here type=FIFO impl=LUTRAM
+     static hls::stream<ap_int<11> > 	 strm_coef_above;
+#pragma HLS stream depth=256 	variable=strm_coef_above
+#pragma HLS bind_storage 			variable=strm_coef_above type=FIFO impl=LUTRAM
+     static hls::stream<ap_int<11> > 	strm_coef_above_left;
+#pragma HLS stream depth=256 variable=strm_coef_above_left
+#pragma HLS bind_storage variable=strm_coef_above_left type=FIFO impl=LUTRAM
+     static hls::stream<ap_int<11> > strm_coef_left;
+#pragma HLS stream depth=256 variable=strm_coef_left
+#pragma HLS bind_storage variable=strm_coef_left type=FIFO impl=LUTRAM
+
+     static hls::stream<ap_uint<6> > strm_cur_nonzeros_cnt;
+#pragma HLS stream depth=256 variable=strm_cur_nonzeros_cnt
+#pragma HLS bind_storage variable=strm_cur_nonzeros_cnt type=FIFO impl=LUTRAM
+     static hls::stream<ap_uint<6> > strm_lft_nonzeros_cnt;
+#pragma HLS stream depth=256 variable=strm_lft_nonzeros_cnt
+#pragma HLS bind_storage variable=strm_lft_nonzeros_cnt type=FIFO impl=LUTRAM
+     static hls::stream<ap_uint<6> > strm_abv_nonzeros_cnt;
+#pragma HLS stream depth=256 variable=strm_abv_nonzeros_cnt
+#pragma HLS bind_storage variable=strm_abv_nonzeros_cnt type=FIFO impl=LUTRAM
+
+     hls::stream<ap_int<11> > coef_h[8];
+#pragma HLS bind_storage variable=coef_h type=FIFO impl=LUTRAM
+#pragma HLS stream depth=64 variable=coef_h
+#pragma HLS array_partition variable=coef_h complete dim=0
+
+     hls::stream<ap_int<11> > coef_above_h[8];
+#pragma HLS bind_storage variable=coef_above_h type=FIFO impl=LUTRAM
+#pragma HLS stream depth=64 variable=coef_above_h
+#pragma HLS array_partition variable=coef_above_h complete dim=0
+
+ 	hls::stream<bool> strm_has_left_h;
+#pragma HLS stream depth=64 variable=strm_has_left_h
+
+     hls::stream<bool> coef_e_h;
+#pragma HLS stream depth=64 variable=coef_e_h
+
+     hls::stream<ap_int<11> > coef_v[8];
+#pragma HLS bind_storage variable=coef_v type=FIFO impl=LUTRAM
+#pragma HLS stream depth=64 variable=coef_v
+#pragma HLS array_partition variable=coef_v complete dim=0
+
+     hls::stream<ap_int<11> > coef_left_v[8];
+#pragma HLS bind_storage variable=coef_left_v type=FIFO impl=LUTRAM
+#pragma HLS stream depth=64 variable=coef_left_v
+#pragma HLS array_partition variable=coef_left_v complete dim=0
+
+ 	hls::stream<bool> strm_has_left_v;
+#pragma HLS stream depth=64 variable=strm_has_left_v
+
+     hls::stream<bool> coef_e_v;
+#pragma HLS stream depth=64 variable=coef_e_v
+
+     hls::stream<ap_uint<3> > strm_lane_h("lane_h");
+#pragma HLS stream depth=64 variable=strm_lane_h
+#pragma HLS bind_storage variable=strm_lane_h type=FIFO impl=LUTRAM
+
+     hls::stream<ap_uint<3> > strm_lane_v("lane_v");
+#pragma HLS stream depth=64 variable=strm_lane_v
+#pragma HLS bind_storage variable=strm_lane_v type=FIFO impl=LUTRAM
+    // clang-format on
+    preprocess(len, id_cmp, is_top_row_cmp, strm_coef,
+
+               // to 77
+               strm_coef_here, strm_coef_left, strm_coef_above, strm_coef_above_left,
+
+               // to edge
+               coef_h, coef_above_h, strm_has_left_h, coef_e_h, coef_v, coef_left_v, strm_has_left_v, coef_e_v,
+
+               // to 77
+               strm_cur_nonzeros_cnt, strm_lft_nonzeros_cnt, strm_abv_nonzeros_cnt, strm_num_nonzeros_7x7,
+
+               // to edge
+               non_zero_h_out, coef_cnt_h_len, strm_lane_h, coef_cnt_v_len, strm_lane_v, eob_x, eob_y);
+
+    pre_serialize_tokens_77(len, !is_top_row_cmp,
+
+                            strm_num_nonzeros_7x7, strm_coef_here, strm_coef_above, strm_coef_left,
+                            strm_coef_above_left,
+
+                            strm_cur_nonzeros_cnt, strm_abv_nonzeros_cnt, strm_lft_nonzeros_cnt,
+
+                            strm_nonzero_bin_tmp,
+
+                            strm_7x7_nz, strm_7x7_length,
+
+                            strm_7x7_num_nonzero_bin, strm_7x7_bsr_best_prior,
+
+                            strm_7x7_cur_bit_sign_tmp,
+
+                            strm_abs_coef, strm_coord);
+
+    pre_serialize_tokens_edges(len, id_cmp != 0, min_nois_thld_x, min_nois_thld_y, false, !is_top_row_cmp, false,
+                               // non_zero_h_out,
+                               // coef_cnt_h_len,
+                               strm_lane_h,
+
+                               // coef_cnt_v_len,
+                               strm_lane_v,
+
+                               // eob_x,
+                               // eob_y,
+
+                               idct_q_table_x, idct_q_table_y,
+
+                               coef_h, coef_above_h, strm_has_left_h, coef_e_h, coef_v, coef_left_v, strm_has_left_v,
+                               coef_e_v,
+
+                               strm_length_exp_h,
+
+                               strm_best_prior_exp_h,
+
+                               strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                               strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_nois_h, strm_coord_nois_h,
+
+                               ////
+                               strm_length_exp_v,
+
+                               strm_best_prior_exp_v,
+
+                               strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                               strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_nois_v, strm_coord_nois_v);
+
+    pre_serialize_tokens_dc(!is_top_row_cmp, id_cmp, len, q_tables0, q0,
+
+                            str_rast8, str_dc_in,
+
+                            strm_coef_dc, strm_uncertainty, strm_uncertainty2);
+}
+// ------------------------------------------------------------
+void push_lepton_encoder_line(ap_uint<32> len,
+                              uint8_t id_cmp,
+                              // bool is_top_row_cmp,
+
+                              // 7x7
+                              hls::stream<ap_uint<4> >& strm_nonzero_bin_tmp,
+
+                              hls::stream<ap_uint<6> >& strm_7x7_nz,
+                              hls::stream<ap_uint<4> >& strm_7x7_length,
+
+                              hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+                              hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+                              hls::stream<bool>& strm_7x7_cur_bit_sign_tmp,
+
+                              hls::stream<ap_uint<11> >& strm_abs_coef,
+                              hls::stream<ap_uint<6> >& strm_coord,
+
+                              // edge
+                              // from preprossess to edge
+                              hls::stream<ap_uint<6> >& non_zero_h_out,
+                              hls::stream<ap_uint<3> >& coef_cnt_h_len,
+                              hls::stream<ap_uint<3> >& coef_cnt_v_len,
+                              hls::stream<ap_uint<3> >& eob_x,
+                              hls::stream<ap_uint<3> >& eob_y,
+
+                              hls::stream<ap_uint<4> >& strm_length_exp_h,
+
+                              hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+                              hls::stream<bool>& strm_cur_bit_sign_h,
+                              hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+                              hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+                              hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+                              hls::stream<ap_uint<8> >& strm_min_nois_h,
+                              hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+                              hls::stream<ap_uint<4> >& strm_length_exp_v,
+
+                              hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+                              hls::stream<bool>& strm_cur_bit_sign_v,
+                              hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+                              hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+                              hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+                              hls::stream<ap_uint<8> >& strm_min_nois_v,
+                              hls::stream<ap_uint<6> >& strm_coord_nois_v,
+
+                              // dc
+                              hls::stream<int16_t>& strm_coef,
+                              hls::stream<int>& strm_uncertainty,
+                              hls::stream<int>& strm_uncertainty2,
+
+                              // output
+                              unsigned char* range,
+                              int* count,
+                              unsigned int* value,
+                              unsigned char* pre_byte,
+                              unsigned short* run,
+                              bool* br_isFirst,
+                              unsigned int* pos,
+
+                              hls::stream<bool>& strm_pos_o_e,
+                              hls::stream<ap_uint<8> >& strm_pos_o_byte) {
+#pragma HLS DATAFLOW
+    // clang-format off
+	hls::stream<ap_uint<4>  > strm_sel_tab_77;
+#pragma HLS stream depth=1024 variable=strm_sel_tab_77
+#pragma HLS bind_storage variable=strm_sel_tab_77 type=FIFO impl=LUTRAM
+	hls::stream<bool>		  strm_cur_bit_77("bit_77");
+#pragma HLS stream depth=1024 variable=strm_cur_bit_77
+#pragma HLS bind_storage variable=strm_cur_bit_77 type=FIFO impl=LUTRAM
+
+    hls::stream<short>         strm_len_77("len_77");
+#pragma HLS stream depth=128 variable=strm_len_77
+#pragma HLS bind_storage variable=strm_len_77 type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr1_77;
+#pragma HLS stream depth=1024 variable=strm_addr1_77
+#pragma HLS bind_storage variable=strm_addr1_77 type=FIFO impl=BRAM
+	hls::stream<ap_uint<16> > strm_addr2_77;
+#pragma HLS stream depth=1024 variable=strm_addr2_77
+#pragma HLS bind_storage variable=strm_addr2_77 type=FIFO impl=BRAM
+	hls::stream<ap_uint<16> > strm_addr3_77;
+#pragma HLS stream depth=1024 variable=strm_addr3_77
+#pragma HLS bind_storage variable=strm_addr3_77 type=FIFO impl=BRAM
+	hls::stream<ap_uint<16> > strm_addr4_77;
+#pragma HLS stream depth=1024 variable=strm_addr4_77
+#pragma HLS bind_storage variable=strm_addr4_77 type=FIFO impl=BRAM
+    // clang-format on
+
+    push_bit_7x7_v2(len, strm_7x7_nz, strm_7x7_length,
+
+                    strm_nonzero_bin_tmp,
+
+                    strm_7x7_num_nonzero_bin, strm_7x7_bsr_best_prior,
+
+                    strm_7x7_cur_bit_sign_tmp,
+
+                    strm_abs_coef, strm_coord,
+
+                    strm_sel_tab_77, strm_cur_bit_77,
+                    //		strm_e_77,
+                    strm_len_77, strm_addr1_77, strm_addr2_77, strm_addr3_77, strm_addr4_77);
+
+    // clang-format off
+	hls::stream<ap_uint<4>  > strm_sel_tab_edge("sel_tab");
+#pragma HLS stream depth=512 variable=strm_sel_tab_edge
+#pragma HLS bind_storage variable=strm_sel_tab_edge type=FIFO impl=LUTRAM
+	hls::stream<bool>		  strm_cur_bit_edge("bit_edge");
+#pragma HLS stream depth=512 variable=strm_cur_bit_edge
+#pragma HLS bind_storage variable=strm_cur_bit_edge type=FIFO impl=LUTRAM
+	hls::stream<bool>		  strm_e_edge("e_edge");
+#pragma HLS stream depth=512 variable=strm_e_edge
+#pragma HLS bind_storage variable=strm_e_edge type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr1_edge("addr1");
+#pragma HLS stream depth=512 variable=strm_addr1_edge
+#pragma HLS bind_storage variable=strm_addr1_edge type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr2_edge;
+#pragma HLS stream depth=512 variable=strm_addr2_edge
+#pragma HLS bind_storage variable=strm_addr2_edge type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr3_edge;
+#pragma HLS stream depth=512 variable=strm_addr3_edge
+#pragma HLS bind_storage variable=strm_addr3_edge type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr4_edge;
+#pragma HLS stream depth=512 variable=strm_addr4_edge
+#pragma HLS bind_storage variable=strm_addr4_edge type=FIFO impl=LUTRAM
+
+	hls::stream<short >         	  strm_edge_len;
+#pragma HLS stream depth=128 variable=strm_edge_len
+#pragma HLS bind_storage         variable=strm_edge_len type=FIFO impl=LUTRAM
+    // clang-format on
+    push_bit_edge_0(len, non_zero_h_out,
+
+                    coef_cnt_h_len, eob_x, strm_length_exp_h,
+
+                    strm_best_prior_exp_h,
+
+                    strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                    strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_nois_h, strm_coord_nois_h,
+
+                    coef_cnt_v_len, eob_y,
+
+                    strm_length_exp_v,
+
+                    strm_best_prior_exp_v,
+
+                    strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                    strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_nois_v, strm_coord_nois_v,
+
+                    strm_edge_len,
+
+                    strm_sel_tab_edge, strm_cur_bit_edge,
+                    // strm_e_edge,
+                    strm_addr1_edge, strm_addr2_edge, strm_addr3_edge, strm_addr4_edge);
+
+    // clang-format off
+	hls::stream<ap_uint<4>  > strm_sel_tab_dc("sel_tab");
+#pragma HLS stream depth=512 variable=strm_sel_tab_dc
+#pragma HLS bind_storage variable=strm_sel_tab_dc type=FIFO impl=LUTRAM
+	hls::stream<bool>		  strm_cur_bit_dc("bit_dc");
+#pragma HLS stream depth=512 variable=strm_cur_bit_dc
+#pragma HLS bind_storage variable=strm_cur_bit_dc type=FIFO impl=LUTRAM
+	hls::stream<short>		  strm_len_dc("len_dc");
+#pragma HLS stream depth=128 variable=strm_len_dc
+#pragma HLS bind_storage variable=strm_len_dc type=FIFO impl=LUTRAM
+
+	hls::stream<ap_uint<16> > strm_addr1_dc("addr1");
+#pragma HLS stream depth=512 variable=strm_addr1_dc
+#pragma HLS bind_storage variable=strm_addr1_dc type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr2_dc;
+#pragma HLS stream depth=512 variable=strm_addr2_dc
+#pragma HLS bind_storage variable=strm_addr2_dc type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr3_dc;
+#pragma HLS stream depth=512 variable=strm_addr3_dc
+#pragma HLS bind_storage variable=strm_addr3_dc type=FIFO impl=LUTRAM
+    // clang-format on
+    dc_push_bit_v2(len, strm_coef, strm_uncertainty, strm_uncertainty2,
+
+                   strm_sel_tab_dc, strm_cur_bit_dc,
+                   //		strm_e_dc,
+                   strm_len_dc, strm_addr1_dc, strm_addr2_dc, strm_addr3_dc);
+
+    // clang-format off
+	hls::stream<ap_uint<4>  > strm_sel_tab;
+#pragma HLS stream depth=32 variable=strm_sel_tab
+#pragma HLS bind_storage variable=strm_sel_tab type=FIFO impl=LUTRAM
+	hls::stream<bool>		  strm_cur_bit("res_bit");
+#pragma HLS stream depth=32 variable=strm_cur_bit
+#pragma HLS bind_storage variable=strm_cur_bit type=FIFO impl=LUTRAM
+	hls::stream<bool>		  strm_e_in("res_e");
+#pragma HLS stream depth=32 variable=strm_e_in
+#pragma HLS bind_storage variable=strm_e_in type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr1;
+#pragma HLS stream depth=32 variable=strm_addr1
+#pragma HLS bind_storage variable=strm_addr1 type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr2;
+#pragma HLS stream depth=32 variable=strm_addr2
+#pragma HLS bind_storage variable=strm_addr2 type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr3;
+#pragma HLS stream depth=32 variable=strm_addr3
+#pragma HLS bind_storage variable=strm_addr3_dc type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<16> > strm_addr4;
+#pragma HLS stream depth=32 variable=strm_addr4
+#pragma HLS bind_storage variable=strm_addr4 type=FIFO impl=LUTRAM
+    // clang-format on
+
+    // clang-format off
+    // clang-format on
+    collect<short, short, short>(len,
+
+                                 strm_sel_tab_77, strm_cur_bit_77,
+                                 //		strm_e_77,
+                                 strm_len_77, strm_addr1_77, strm_addr2_77, strm_addr3_77, strm_addr4_77,
+
+                                 strm_sel_tab_edge, strm_cur_bit_edge, strm_edge_len,
+                                 // strm_e_edge,
+                                 strm_addr1_edge, strm_addr2_edge, strm_addr3_edge, strm_addr4_edge,
+
+                                 strm_sel_tab_dc, strm_cur_bit_dc,
+                                 //		strm_e_dc,
+                                 strm_len_dc, strm_addr1_dc, strm_addr2_dc, strm_addr3_dc,
+
+                                 strm_sel_tab, strm_cur_bit, strm_e_in, strm_addr1, strm_addr2, strm_addr3, strm_addr4);
+
+    // clang-format off
+    hls::stream<bool>    strm_bit;
+#pragma HLS stream depth=256 variable=strm_bit
+#pragma HLS bind_storage variable=strm_bit type=FIFO impl=LUTRAM
+    hls::stream<uint8_t> strm_prob;
+#pragma HLS stream depth=256 variable=strm_prob
+#pragma HLS bind_storage variable=strm_prob type=FIFO impl=LUTRAM
+    hls::stream<bool>    strm_e;
+#pragma HLS stream depth=256 variable=strm_e
+#pragma HLS bind_storage variable=strm_e type=FIFO impl=LUTRAM
+    hls::stream<uint8_t> strm_tab_dbg;
+#pragma HLS stream depth=256 variable=strm_tab_dbg
+#pragma HLS bind_storage variable=strm_tab_dbg type=FIFO impl=LUTRAM
+    // clang-format on
+    probability_look_up(id_cmp != 0,
+
+                        strm_sel_tab, strm_cur_bit, strm_e_in, strm_addr1, strm_addr2, strm_addr3, strm_addr4,
+
+                        strm_bit, strm_prob, strm_e, strm_tab_dbg);
+
+    vpx_enc_syn(
+        // Iteration for variable
+        range, count, value, pre_byte, run, br_isFirst, pos,
+        // input
+        strm_bit, strm_prob, strm_e, strm_tab_dbg,
+        // output
+        strm_pos_o_e, strm_pos_o_byte);
+}
+// ------------------------------------------------------------
+void LeptonE_pre_engine(
+    // input
+    hls::stream<ap_int<11> > coef[8],
+
+    uint16_t axi_width[MAX_NUM_COLOR], // colldata->block_width(i);
+    uint8_t axi_map_row2cmp[4],        //     AXI                   2,1,0,0 2,1,0
+    uint8_t min_nois_thld_x[MAX_NUM_COLOR][64],
+    uint8_t min_nois_thld_y[MAX_NUM_COLOR][64],
+    uint8_t q_tables[MAX_NUM_COLOR][8][8], //[64],
+    int32_t idct_q_table_x[MAX_NUM_COLOR][8][8],
+    int32_t idct_q_table_y[MAX_NUM_COLOR][8][8],
+
+    uint16_t axi_mcuv,
+    uint8_t axi_num_cmp_mcu,
+
+    // output
+
+    // 7x7
+    hls::stream<ap_uint<4> >& strm_nonzero_bin_tmp,
+
+    hls::stream<ap_uint<6> >& strm_7x7_nz,
+    hls::stream<ap_uint<4> >& strm_7x7_length,
+
+    hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+    hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+    hls::stream<bool>& strm_7x7_cur_bit_sign_tmp,
+
+    hls::stream<ap_uint<11> >& strm_abs_coef,
+    hls::stream<ap_uint<6> >& strm_coord,
+
+    // edge
+    // from preprossess to edge
+    hls::stream<ap_uint<6> >& non_zero_h_out,
+    hls::stream<ap_uint<3> >& coef_cnt_h_len,
+    hls::stream<ap_uint<3> >& coef_cnt_v_len,
+    hls::stream<ap_uint<3> >& eob_x,
+    hls::stream<ap_uint<3> >& eob_y,
+
+    hls::stream<ap_uint<4> >& strm_length_exp_h,
+
+    hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+    hls::stream<bool>& strm_cur_bit_sign_h,
+    hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+    hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+    hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+    hls::stream<ap_uint<8> >& strm_min_nois_h,
+    hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+    hls::stream<ap_uint<4> >& strm_length_exp_v,
+
+    hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+    hls::stream<bool>& strm_cur_bit_sign_v,
+    hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+    hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+    hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+    hls::stream<ap_uint<8> >& strm_min_nois_v,
+    hls::stream<ap_uint<6> >& strm_coord_nois_v,
+
+    // dc
+    hls::stream<int16_t>& strm_coef_dc,
+    hls::stream<int>& strm_uncertainty,
+    hls::stream<int>& strm_uncertainty2) {
+    bool is_top_row[MAX_NUM_COLOR] = {true, true, true};
+// clang-format off
+ #pragma HLS ARRAY_PARTITION variable=axi_map_row2cmp complete dim=0
+ #pragma HLS ARRAY_PARTITION variable=axi_width 		 complete dim=0
+ #pragma HLS ARRAY_PARTITION variable=is_top_row      complete dim=0
+    // clang-format on
+
+    for (int i_mcuv = 0; i_mcuv < axi_mcuv; i_mcuv++) {
+        for (int idx_cmp = 0; idx_cmp < axi_num_cmp_mcu; idx_cmp++) {
+            uint8_t id_cmp = axi_map_row2cmp[idx_cmp];
+            uint16_t block_width = axi_width[id_cmp];
+            bool is_top_row_cmp = is_top_row[id_cmp];
+            uint8_t q0 = q_tables[id_cmp][0][0];
+
+            pre_lepton_encoder_line(block_width, id_cmp, is_top_row_cmp,
+
+                                    q_tables, q0, idct_q_table_x, idct_q_table_y, min_nois_thld_x, min_nois_thld_y,
+
+                                    coef,
+                                    // 77
+                                    strm_nonzero_bin_tmp,
+
+                                    strm_7x7_nz, strm_7x7_length,
+
+                                    strm_7x7_num_nonzero_bin, strm_7x7_bsr_best_prior,
+
+                                    strm_7x7_cur_bit_sign_tmp,
+
+                                    strm_abs_coef, strm_coord,
+                                    // edge
+                                    non_zero_h_out, coef_cnt_h_len, coef_cnt_v_len, eob_x, eob_y,
+
+                                    strm_length_exp_h,
+
+                                    strm_best_prior_exp_h,
+
+                                    strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                                    strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_nois_h, strm_coord_nois_h,
+
+                                    ////
+                                    strm_length_exp_v,
+
+                                    strm_best_prior_exp_v,
+
+                                    strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                                    strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_nois_v, strm_coord_nois_v,
+                                    // dc
+                                    strm_coef_dc, strm_uncertainty, strm_uncertainty2);
+            is_top_row[id_cmp] = false;
+        } // for(int idx_cmp = 0; idx_cmp < axi_num_cmp_mcu ; idx_cmp++)
+    }     //("process_row_range_while");for( int i_mcuv = 0; i_mcuv < axi_mcuv; i_mcuv++)
+}
+// ------------------------------------------------------------
+void LeptonE_push_engine(
+    // input
+    // hls::stream<ap_int<11> >  coef[8],
+
+    uint16_t axi_width[MAX_NUM_COLOR], // colldata->block_width(i);
+    uint8_t axi_map_row2cmp[4],        //     AXI                   2,1,0,0 2,1,0
+
+    uint16_t axi_mcuv,
+    uint8_t axi_num_cmp_mcu,
+
+    // 7x7
+    hls::stream<ap_uint<4> >& strm_nonzero_bin_tmp,
+
+    hls::stream<ap_uint<6> >& strm_7x7_nz,
+    hls::stream<ap_uint<4> >& strm_7x7_length,
+
+    hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+    hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+    hls::stream<bool>& strm_7x7_cur_bit_sign_tmp,
+
+    hls::stream<ap_uint<11> >& strm_abs_coef,
+    hls::stream<ap_uint<6> >& strm_coord,
+
+    // edge
+    // from preprossess to edge
+    hls::stream<ap_uint<6> >& non_zero_h_out,
+    hls::stream<ap_uint<3> >& coef_cnt_h_len,
+    hls::stream<ap_uint<3> >& coef_cnt_v_len,
+    hls::stream<ap_uint<3> >& eob_x,
+    hls::stream<ap_uint<3> >& eob_y,
+
+    hls::stream<ap_uint<4> >& strm_length_exp_h,
+
+    hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+    hls::stream<bool>& strm_cur_bit_sign_h,
+    hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+    hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+    hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+    hls::stream<ap_uint<8> >& strm_min_nois_h,
+    hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+    hls::stream<ap_uint<4> >& strm_length_exp_v,
+
+    hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+    hls::stream<bool>& strm_cur_bit_sign_v,
+    hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+    hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+    hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+    hls::stream<ap_uint<8> >& strm_min_nois_v,
+    hls::stream<ap_uint<6> >& strm_coord_nois_v,
+
+    // dc
+    hls::stream<int16_t>& strm_coef,
+    hls::stream<int>& strm_uncertainty,
+    hls::stream<int>& strm_uncertainty2,
+
+    // output
+    struct_arith& axi_arith,
+    hls::stream<bool>& strm_pos_o_e,
+    hls::stream<ap_uint<8> >& strm_pos_o_byte
+
+    ) {
+    bool is_top_row[MAX_NUM_COLOR] = {true, true, true};
+// clang-format off
+//#pragma HLS ARRAY_PARTITION variable=axi_map_row2cmp complete dim=0
+#pragma HLS ARRAY_PARTITION variable=axi_width 		 complete dim=0
+#pragma HLS ARRAY_PARTITION variable=is_top_row      complete dim=0
+    // clang-format on
+
+    unsigned char range = 128;  // boolwriter.range;
+    int count = -24;            // boolwriter.count;
+    unsigned int value = 0;     // boolwriter.lowvalue;
+    unsigned char pre_byte = 0; // boolwriter.pre_byte;
+    unsigned short run = 0;     // boolwriter.run;
+    bool isFirst = 1;           // boolwriter.isFirst;
+    unsigned int pos = 0;       // boolwriter.pos;
+    unsigned int pos2 = 0;
+
+    for (int i_mcuv = 0; i_mcuv < axi_mcuv; i_mcuv++) {
+        for (int idx_cmp = 0; idx_cmp < axi_num_cmp_mcu; idx_cmp++) {
+            uint8_t id_cmp = axi_map_row2cmp[idx_cmp];
+            uint16_t block_width = axi_width[id_cmp];
+
+            push_lepton_encoder_line(block_width, id_cmp,
+                                     // 77
+                                     strm_nonzero_bin_tmp,
+
+                                     strm_7x7_nz, strm_7x7_length,
+
+                                     strm_7x7_num_nonzero_bin, strm_7x7_bsr_best_prior,
+
+                                     strm_7x7_cur_bit_sign_tmp,
+
+                                     strm_abs_coef, strm_coord,
+                                     // edge
+                                     non_zero_h_out, coef_cnt_h_len, coef_cnt_v_len, eob_x, eob_y,
+
+                                     strm_length_exp_h,
+
+                                     strm_best_prior_exp_h,
+
+                                     strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                                     strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_nois_h, strm_coord_nois_h,
+
+                                     ////
+                                     strm_length_exp_v,
+
+                                     strm_best_prior_exp_v,
+
+                                     strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                                     strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_nois_v, strm_coord_nois_v,
+                                     // dc
+                                     strm_coef, strm_uncertainty, strm_uncertainty2,
+
+                                     &range, &count, &value, &pre_byte, &run, &isFirst, &pos,
+
+                                     strm_pos_o_e, strm_pos_o_byte);
+
+        } // for(int idx_cmp = 0; idx_cmp < axi_num_cmp_mcu ; idx_cmp++)
+    }     //("process_row_range_while");for( int i_mcuv = 0; i_mcuv < axi_mcuv; i_mcuv++)
+
+    strm_pos_o_e.write(true);
+    axi_arith.count = count;
+    axi_arith.value = value;
+    axi_arith.pre_byte = pre_byte;
+    axi_arith.run = run;
+    axi_arith.pos = pos;
+    axi_arith.range = range;
+    axi_arith.isFirst = isFirst;
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+
+namespace xf {
+namespace codec {
+namespace details {
+void kernel_LeptonE_strmIn_engine(
+    // input
+    hls::stream<ap_int<11> > coef[8],
+
+    uint16_t axi_width[MAX_NUM_COLOR], // colldata->block_width(i);
+    uint8_t axi_map_row2cmp[4],        //     AXI                   2,1,0,0 2,1,0
+    uint8_t min_nois_thld_x[MAX_NUM_COLOR][64],
+    uint8_t min_nois_thld_y[MAX_NUM_COLOR][64],
+    uint8_t q_tables[MAX_NUM_COLOR][8][8], //[64],
+    int32_t idct_q_table_x[MAX_NUM_COLOR][8][8],
+    int32_t idct_q_table_y[MAX_NUM_COLOR][8][8],
+
+    uint16_t axi_mcuv,
+    uint8_t axi_num_cmp_mcu,
+
+    // output
+    struct_arith& axi_arith,
+    hls::stream<bool>& strm_pos_o_e,
+    hls::stream<ap_uint<8> >& strm_pos_o_byte
+
+    ) {
+#pragma HLS DATAFLOW
+
+// clang-format off
+#pragma HLS ARRAY_PARTITION variable=axi_map_row2cmp complete dim=0
+#pragma HLS ARRAY_PARTITION variable=axi_width 		 complete dim=0
+//#pragma HLS ARRAY_PARTITION variable=is_top_row      complete dim=0
+#pragma HLS ARRAY_PARTITION variable=idct_q_table_x  complete dim=3
+#pragma HLS ARRAY_PARTITION variable=idct_q_table_y  complete dim=3
+    // clang-format on
+
+    // clang-format off
+    hls::stream<ap_uint<4> > strm_nonzero_bin_tmp("strm_nz_bin");
+#pragma HLS stream depth=32 variable=strm_nonzero_bin_tmp
+    hls::stream<ap_uint<6> > strm_7x7_nz("strm_77_nz");
+#pragma HLS stream depth=32 variable=strm_7x7_nz
+    hls::stream<ap_uint<4> > strm_7x7_length("strm_77_len");
+#pragma HLS stream depth=32 variable=strm_7x7_length
+
+    hls::stream<ap_uint<4> > strm_7x7_num_nonzero_bin("strm_nz");
+#pragma HLS stream depth=32 variable=strm_7x7_num_nonzero_bin
+    hls::stream<ap_uint<4> > strm_7x7_bsr_best_prior("strm_bsr");
+#pragma HLS stream depth=32 variable=strm_7x7_bsr_best_prior
+
+    hls::stream<bool> strm_7x7_cur_bit_sign_tmp("strm_sign_bit");
+#pragma HLS stream depth=32 variable=strm_7x7_cur_bit_sign_tmp
+
+    static hls::stream<ap_uint<11> > strm_abs_coef("coef_abs");
+#pragma HLS stream depth=32 variable=strm_abs_coef
+    hls::stream<ap_uint<6> > strm_coord("strm_coord");
+#pragma HLS stream depth=32 variable=strm_coord
+    // clang-format on
+
+    // edge
+    // clang-format off
+    static hls::stream<ap_uint<3> > coef_cnt_h_len("coef_cnt_h_len");
+#pragma HLS bind_storage variable=coef_cnt_h_len type=FIFO impl=LUTRAM
+#pragma HLS stream depth=512 variable=coef_cnt_h_len
+
+    static hls::stream<ap_uint<3> > coef_cnt_v_len("coef_cnt_v_len");
+#pragma HLS bind_storage variable=coef_cnt_v_len type=FIFO impl=LUTRAM
+#pragma HLS stream depth=512 variable=coef_cnt_v_len
+
+    static hls::stream<ap_uint<6> > non_zero_h_out;
+#pragma HLS stream depth=512 variable=non_zero_h_out
+#pragma HLS bind_storage variable=non_zero_h_out type=FIFO impl=LUTRAM
+
+    hls::stream<ap_uint<3> > eob_x("eob_x");
+#pragma HLS stream depth=512 variable=eob_x
+#pragma HLS bind_storage variable=eob_x type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<3> > eob_y("eob_y");
+#pragma HLS stream depth=512 variable=eob_y
+#pragma HLS bind_storage variable=eob_y type=FIFO impl=LUTRAM
+
+
+	hls::stream<ap_uint<4> > strm_best_prior_exp_h("bsr_exp_h");
+#pragma HLS stream depth=512 variable=strm_best_prior_exp_h
+
+	hls::stream<bool> strm_cur_bit_sign_h("sign_bit_h");
+#pragma HLS stream depth=512 variable=strm_cur_bit_sign_h
+	hls::stream<ap_uint<2> > strm_tri_sign_h("tri_sign_h");
+#pragma HLS stream depth=512 variable=strm_tri_sign_h
+
+	static hls::stream<ap_uint<11> > strm_abs_coef_nois_h("abs_coef_h");
+#pragma HLS stream depth=512 variable=strm_abs_coef_nois_h
+	hls::stream<ap_uint<8> > strm_ctx_nois_h;
+#pragma HLS stream depth=512 variable=strm_ctx_nois_h
+	hls::stream<ap_uint<8> > strm_min_nois_h;
+#pragma HLS stream depth=512 variable=strm_min_nois_h
+	hls::stream<ap_uint<6> > strm_coord_nois_h;
+#pragma HLS stream depth=512 variable=strm_coord_nois_h
+
+	hls::stream<ap_uint<4> > strm_best_prior_exp_v;
+#pragma HLS stream depth=512 variable=strm_best_prior_exp_v
+
+	hls::stream<bool> strm_cur_bit_sign_v("sign_bit_v");
+#pragma HLS stream depth=512 variable=strm_cur_bit_sign_v
+	hls::stream<ap_uint<2> > strm_tri_sign_v;
+#pragma HLS stream depth=512 variable=strm_tri_sign_v
+
+	static hls::stream<ap_uint<11> > strm_abs_coef_nois_v("abs_coef_v");
+#pragma HLS stream depth=512 variable=strm_abs_coef_nois_v
+	hls::stream<ap_uint<8> > strm_ctx_nois_v;
+#pragma HLS stream depth=512 variable=strm_ctx_nois_v
+	hls::stream<ap_uint<8> > strm_min_nois_v;
+#pragma HLS stream depth=512 variable=strm_min_nois_v
+	hls::stream<ap_uint<6> > strm_coord_nois_v;
+#pragma HLS stream depth=512 variable=strm_coord_nois_v
+
+	hls::stream<ap_uint<4> > strm_length_exp_h("len_exp_h");
+#pragma HLS stream depth=512 variable=strm_length_exp_h
+	hls::stream<ap_uint<4> > strm_length_exp_v("len_exp_v");
+#pragma HLS stream depth=512 variable=strm_length_exp_v
+    // clang-format on
+
+    // clang-format off
+	hls::stream<int16_t> strm_coef_dc;
+#pragma HLS STREAM variable=strm_coef_dc depth=32 dim=1
+#pragma HLS bind_storage variable=strm_coef_dc type=FIFO impl=LUTRAM
+
+	hls::stream<int>     strm_uncertainty("uncertainty");
+#pragma HLS bind_storage variable=strm_uncertainty type=FIFO impl=LUTRAM
+#pragma HLS STREAM variable=strm_uncertainty depth=32 dim=1
+
+	hls::stream<int>     strm_uncertainty2("uncertainty2");
+#pragma HLS bind_storage variable=strm_uncertainty2 type=FIFO impl=LUTRAM
+#pragma HLS STREAM variable=strm_uncertainty2 depth=32 dim=1
+    // clang-format on
+
+    LeptonE_pre_engine(coef, axi_width,
+                       axi_map_row2cmp, //     AXI                   2,1,0,0 2,1,0
+                       min_nois_thld_x, min_nois_thld_y,
+                       q_tables, //[64],
+                       idct_q_table_x, idct_q_table_y,
+
+                       axi_mcuv, axi_num_cmp_mcu,
+
+                       // 77
+                       strm_nonzero_bin_tmp,
+
+                       strm_7x7_nz, strm_7x7_length,
+
+                       strm_7x7_num_nonzero_bin, strm_7x7_bsr_best_prior,
+
+                       strm_7x7_cur_bit_sign_tmp,
+
+                       strm_abs_coef, strm_coord,
+                       // edge
+                       non_zero_h_out, coef_cnt_h_len, coef_cnt_v_len, eob_x, eob_y,
+
+                       strm_length_exp_h,
+
+                       strm_best_prior_exp_h,
+
+                       strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                       strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_nois_h, strm_coord_nois_h,
+
+                       ////
+                       strm_length_exp_v,
+
+                       strm_best_prior_exp_v,
+
+                       strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                       strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_nois_v, strm_coord_nois_v,
+                       // dc
+                       strm_coef_dc, strm_uncertainty, strm_uncertainty2);
+
+    LeptonE_push_engine(axi_width,
+                        axi_map_row2cmp, //     AXI                   2,1,0,0 2,1,0
+
+                        axi_mcuv, axi_num_cmp_mcu,
+
+                        // 77
+                        strm_nonzero_bin_tmp,
+
+                        strm_7x7_nz, strm_7x7_length,
+
+                        strm_7x7_num_nonzero_bin, strm_7x7_bsr_best_prior,
+
+                        strm_7x7_cur_bit_sign_tmp,
+
+                        strm_abs_coef, strm_coord,
+                        // edge
+                        non_zero_h_out, coef_cnt_h_len, coef_cnt_v_len, eob_x, eob_y,
+
+                        strm_length_exp_h,
+
+                        strm_best_prior_exp_h,
+
+                        strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                        strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_nois_h, strm_coord_nois_h,
+
+                        ////
+                        strm_length_exp_v,
+
+                        strm_best_prior_exp_v,
+
+                        strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                        strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_nois_v, strm_coord_nois_v,
+                        // dc
+                        strm_coef_dc, strm_uncertainty, strm_uncertainty2,
+
+                        axi_arith, strm_pos_o_e, strm_pos_o_byte);
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/XAcc_dc.cpp b/codec/L2/demos/leptonEnc/kernel/XAcc_dc.cpp
new file mode 100644
index 0000000000..71ae00e984
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/XAcc_dc.cpp
@@ -0,0 +1,1037 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "XAcc_dc.hpp"
+#include "hls_math.h"
+#define DEPTH_LBUFF_READ (1024)
+#define NUM_BURST_READ (64)
+#define LG2_NUM_BURST_READ (6)
+enum {
+    w1 = 2841, // 2048*sqrt(2)*cos(1*pi/16)
+    w2 = 2676, // 2048*sqrt(2)*cos(2*pi/16)
+    w3 = 2408, // 2048*sqrt(2)*cos(3*pi/16)
+    w5 = 1609, // 2048*sqrt(2)*cos(5*pi/16)
+    w6 = 1108, // 2048*sqrt(2)*cos(6*pi/16)
+    w7 = 565,  // 2048*sqrt(2)*cos(7*pi/16)
+
+    w1pw7 = w1 + w7,
+    w1mw7 = w1 - w7,
+    w2pw6 = w2 + w6,
+    w2mw6 = w2 - w6,
+    w3pw5 = w3 + w5,
+    w3mw5 = w3 - w5,
+
+    r2 = 181 // 256/sqrt(2)
+};
+typedef ap_int<32> idct1_t;
+typedef ap_int<24> idctm_t;
+
+namespace xf {
+namespace codec {
+namespace details {
+
+// ------------------------------------------------------------
+void hls_read_ddr_to_buff2(WD_AXI des[DEPTH_LBUFF_READ], WD_AXI src[MAX_COEF_AXI], unsigned long offset) {
+#pragma HLS DATA_PACK variable = des
+#pragma HLS DATA_PACK variable = src
+    WD_AXI* p_des = des;
+    WD_AXI* p_src = src;
+    int num_loop = NUM_BURST_READ >> LG2_NUM_BURST_READ;
+    for (int line = 0; line < num_loop; line++) {
+        for (int i = 0; i < NUM_BURST_READ; i++)
+#pragma HLS PIPELINE II = 1
+            p_des[i] = p_src[offset + i];
+        p_des += NUM_BURST_READ;
+        p_src += NUM_BURST_READ;
+    }
+}
+
+// ------------------------------------------------------------
+void hls_read_ddr_to_buff(int in[65536], hls::stream<int>& str_out, int cnt) {
+    int buff[2048];
+    int pass = (cnt + 2047) / 2048;
+    for (int i = 0; i < cnt * cnt; i++) {
+#pragma HLS dataflow
+        for (int k = 0; k < 2048; k++)
+#pragma HLS pipeline
+            buff[k] = in[i * 2048 + k];
+
+        for (int k = 0; k < 2048; k++)
+#pragma HLS pipeline II = 1
+            str_out.write(buff[k]);
+    }
+}
+
+// ------------------------------------------------------------
+void hls_get_estimate_v(uint16_t block_width,
+                        hls::stream<int16_t> strm_v[8],
+                        hls::stream<int16_t> strm_pixels_sans_dc[8][2],
+                        hls::stream<pix_edge_t>& strm_est_v) {
+    pix_edge_t est_v;
+    int16_t pixels_sans_dc[8][8];
+    uint16_t vertical_lft[8];
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+#pragma HLS PIPELINE II = 1
+        for (int i = 0; i < 8; i++) {
+#pragma HLS UNROLL
+            for (int j = 0; j < 2; j++) {
+#pragma HLS UNROLL
+                pixels_sans_dc[i][j] = strm_pixels_sans_dc[i][j].read();
+            }
+        }
+
+        for (int i = 0; i < 8; ++i) {
+#pragma HLS UNROLL
+            int16_t a = pixels_sans_dc[i][0] + 1024;
+            int16_t d = pixels_sans_dc[i][0] - pixels_sans_dc[i][1];
+            int16_t b = strm_v[i].read() - (d / 2);
+            est_v.data[i] = b - a;
+        }
+        strm_est_v.write(est_v);
+    }
+}
+
+// ------------------------------------------------------------
+void hls_get_estimate_h(uint16_t block_width,
+                        hls::stream<int16_t> strm_h[8],
+                        hls::stream<int16_t> strm_pixels_sans_dc[2][8],
+                        hls::stream<pix_edge_t>& strm_est_h) {
+    int16_t pixels_sans_dc[2][8];
+    pix_edge_t est_h;
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+#pragma HLS PIPELINE II = 1
+        for (int i = 0; i < 2; i++) {
+#pragma HLS UNROLL
+            for (int j = 0; j < 8; j++) {
+#pragma HLS UNROLL
+                pixels_sans_dc[i][j] = strm_pixels_sans_dc[i][j].read();
+            }
+        }
+        for (int i = 0; i < 8; ++i) {
+#pragma HLS UNROLL
+            int16_t a = pixels_sans_dc[0][i] + 1024;
+            int16_t d = pixels_sans_dc[0][i] - pixels_sans_dc[1][i];
+            int16_t b = strm_h[i].read() - d / 2;
+            est_h.data[i] = b - a;
+        }
+        strm_est_h.write(est_h);
+    }
+}
+
+// ------------------------------------------------------------
+int16_t hls_adv_predict_dc_pix(bool left_present,
+                               bool above_present,
+                               uint16_t q0,
+                               hls::stream<pix_edge_t>& str_est_v,
+                               hls::stream<pix_edge_t>& str_est_h,
+                               int32_t* uncertainty_val,
+                               int32_t* uncertainty2_val) {
+#pragma HLS inline
+    int16_t avgmed = 0;
+    int16_t avg_h_v[2] = {0, 0};
+#pragma HLS ARRAY_PARTITION variable = avg_h_v complete dim = 0
+    int16_t min_dc = 32767;
+    int16_t max_dc = -32768;
+    int16_t min_dc_v = 32767;
+    int16_t max_dc_v = -32768;
+    int16_t min_dc_h = 32767;
+    int16_t max_dc_h = -32768;
+    pix_edge_t est_v = str_est_v.read();
+    pix_edge_t est_h = str_est_h.read();
+    /*
+    ADV_PREDICT1:
+        for (int i = 0; i < 8; ++i) {
+    #pragma HLS UNROLL
+            min_dc_v = min_dc_v > est_v.data[i] ? est_v.data[i] : min_dc_v;
+            max_dc_v = max_dc_v < est_v.data[i] ? est_v.data[i] : max_dc_v;
+        }
+    ADV_PREDICT2:
+        for (int i = 0; i < 8; ++i) {
+    #pragma HLS UNROLL
+            min_dc_h = min_dc_h > est_h.data[i] ? est_h.data[i] : min_dc_h;
+            max_dc_h = max_dc_h < est_h.data[i] ? est_h.data[i] : max_dc_h;
+        }
+    */
+
+    int16_t min_dc_v_0, min_dc_v_1, min_dc_v_2, min_dc_v_3, min_dc_v_4, min_dc_v_5, min_dc_v_6;
+    int16_t max_dc_v_0, max_dc_v_1, max_dc_v_2, max_dc_v_3, max_dc_v_4, max_dc_v_5, max_dc_v_6;
+    int16_t min_dc_h_0, min_dc_h_1, min_dc_h_2, min_dc_h_3, min_dc_h_4, min_dc_h_5, min_dc_h_6;
+    int16_t max_dc_h_0, max_dc_h_1, max_dc_h_2, max_dc_h_3, max_dc_h_4, max_dc_h_5, max_dc_h_6;
+    min_dc_v_0 = hls::min(est_v.data[0], est_v.data[1]);
+    min_dc_v_1 = hls::min(est_v.data[2], est_v.data[3]);
+    min_dc_v_2 = hls::min(est_v.data[4], est_v.data[5]);
+    min_dc_v_3 = hls::min(est_v.data[6], est_v.data[7]);
+    min_dc_v_4 = hls::min(min_dc_v_0, min_dc_v_1);
+    min_dc_v_5 = hls::min(min_dc_v_2, min_dc_v_3);
+    min_dc_v = hls::min(min_dc_v_4, min_dc_v_5);
+    max_dc_v_0 = hls::max(est_v.data[0], est_v.data[1]);
+    max_dc_v_1 = hls::max(est_v.data[2], est_v.data[3]);
+    max_dc_v_2 = hls::max(est_v.data[4], est_v.data[5]);
+    max_dc_v_3 = hls::max(est_v.data[6], est_v.data[7]);
+    max_dc_v_4 = hls::max(max_dc_v_0, max_dc_v_1);
+    max_dc_v_5 = hls::max(max_dc_v_2, max_dc_v_3);
+    max_dc_v = hls::max(max_dc_v_4, max_dc_v_5);
+    min_dc_h_0 = hls::min(est_h.data[0], est_h.data[1]);
+    min_dc_h_1 = hls::min(est_h.data[2], est_h.data[3]);
+    min_dc_h_2 = hls::min(est_h.data[4], est_h.data[5]);
+    min_dc_h_3 = hls::min(est_h.data[6], est_h.data[7]);
+    min_dc_h_4 = hls::min(min_dc_h_0, min_dc_h_1);
+    min_dc_h_5 = hls::min(min_dc_h_2, min_dc_h_3);
+    min_dc_h = hls::min(min_dc_h_4, min_dc_h_5);
+    max_dc_h_0 = hls::max(est_h.data[0], est_h.data[1]);
+    max_dc_h_1 = hls::max(est_h.data[2], est_h.data[3]);
+    max_dc_h_2 = hls::max(est_h.data[4], est_h.data[5]);
+    max_dc_h_3 = hls::max(est_h.data[6], est_h.data[7]);
+    max_dc_h_4 = hls::max(max_dc_h_0, max_dc_h_1);
+    max_dc_h_5 = hls::max(max_dc_h_2, max_dc_h_3);
+    max_dc_h = hls::max(max_dc_h_4, max_dc_h_5);
+
+    int16_t sum_v = est_v.data[0] + est_v.data[1] + est_v.data[2] + est_v.data[3] + est_v.data[4] + est_v.data[5] +
+                    est_v.data[6] + est_v.data[7];
+    int16_t sum_h = est_h.data[0] + est_h.data[1] + est_h.data[2] + est_h.data[3] + est_h.data[4] + est_h.data[5] +
+                    est_h.data[6] + est_h.data[7];
+
+    if (left_present && above_present) {
+        min_dc = min_dc_v < min_dc_h ? min_dc_v : min_dc_h;
+        max_dc = max_dc_v > max_dc_h ? max_dc_v : max_dc_h;
+        avg_h_v[0] = sum_v;
+        avg_h_v[1] = sum_h;
+    } else if (left_present && !above_present) {
+        min_dc = min_dc_v;
+        max_dc = max_dc_v;
+        avg_h_v[0] = sum_v;
+        avg_h_v[1] = sum_v;
+    } else if (!left_present && above_present) {
+        min_dc = min_dc_h;
+        max_dc = max_dc_h;
+        avg_h_v[0] = sum_h;
+        avg_h_v[1] = sum_h;
+    }
+
+    if (left_present || above_present) {
+        avgmed = (avg_h_v[0] + avg_h_v[1]) >> 1;
+        *uncertainty_val = (max_dc - min_dc) >> 3;
+        avg_h_v[0] -= avgmed;
+        avg_h_v[1] -= avgmed;
+        *uncertainty2_val = _MACRO_ABS(avg_h_v[0]) < _MACRO_ABS(avg_h_v[1]) ? (avg_h_v[0] >> 3) : (avg_h_v[1] >> 3);
+    }
+
+    //    int16_t tmp;
+    //    #pragma HLS resource core=divider_ip variable=tmp
+    int16_t tmp = avgmed / q0;
+    int16_t tmp2 = (tmp + 4) >> 3;
+    ;
+    return tmp2;
+    // return ((avgmed / q0 + 4) >> 3);
+}
+
+// ------------------------------------------------------------
+void hls_idct_h(uint16_t block_width,
+                hls::stream<coef_t> str_rast8[8],
+                const uint8_t q[8][8],
+                hls::stream<idctm_t> strm_intermed[8][8]) {
+    bool ignore_dc = true;
+#pragma HLS ARRAY_PARTITION variable = str_rast8 complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = q complete dim = 2
+
+    // Horizontal 1-D IDCT.
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+        for (int y = 0; y < 8; ++y) {
+#pragma HLS PIPELINE II = 1
+            coef_t c0 = str_rast8[0].read();
+            int y8 = y * 8;
+            int32_t x0 = (((ignore_dc && y == 0) ? 0 : (c0 * q[y][0]) << 11)) + 128;
+            int32_t x1 = (str_rast8[4].read() * q[y][4]) << 11;
+            int32_t x2 = str_rast8[6].read() * q[y][6];
+            int32_t x3 = str_rast8[2].read() * q[y][2];
+            int32_t x4 = str_rast8[1].read() * q[y][1];
+            int32_t x5 = str_rast8[7].read() * q[y][7];
+            int32_t x6 = str_rast8[5].read() * q[y][5];
+            int32_t x7 = str_rast8[3].read() * q[y][3];
+
+            // Prescale.
+
+            // Stage 1.
+            int32_t x8 = w7 * (x4 + x5);
+            x4 = x8 + w1mw7 * x4;
+            x5 = x8 - w1pw7 * x5;
+            x8 = w3 * (x6 + x7);
+            x6 = x8 - w3mw5 * x6;
+            x7 = x8 - w3pw5 * x7;
+
+            // Stage 2.
+            x8 = x0 + x1;
+            x0 -= x1;
+            x1 = w6 * (x3 + x2);
+            x2 = x1 - w2pw6 * x2;
+            x3 = x1 + w2mw6 * x3;
+            x1 = x4 + x6;
+            x4 -= x6;
+            x6 = x5 + x7;
+            x5 -= x7;
+
+            // Stage 3.
+            x7 = x8 + x3;
+            x8 -= x3;
+            x3 = x0 + x2;
+            x0 -= x2;
+            x2 = (r2 * (x4 + x5) + 128) >> 8;
+            x4 = (r2 * (x4 - x5) + 128) >> 8;
+
+            // Stage 4.
+
+            strm_intermed[y][0].write((x7 + x1) >> 8);
+            strm_intermed[y][1].write((x3 + x2) >> 8);
+            strm_intermed[y][2].write((x0 + x4) >> 8);
+            strm_intermed[y][3].write((x8 + x6) >> 8);
+            strm_intermed[y][4].write((x8 - x6) >> 8);
+            strm_intermed[y][5].write((x0 - x4) >> 8);
+            strm_intermed[y][6].write((x3 - x2) >> 8);
+            strm_intermed[y][7].write((x7 - x1) >> 8);
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void hls_idct_v(int16_t block_width,
+                hls::stream<idctm_t> strm_intermed[8][8],
+                hls::stream<int16_t> strm_outp0[8][2],
+                hls::stream<int16_t> strm_outp1[2][8],
+                hls::stream<int16_t> strm_outp2[8][8]) {
+    // Vertical 1-D IDCT.
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+        for (int32_t x = 0; x < 8; ++x) {
+#pragma HLS PIPELINE II = 1
+            // clang-format off
+			// Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
+			// However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
+			// we do not bother to check for the all-zero case.
+            // clang-format on
+
+            // Prescale.
+            int32_t y0 = (strm_intermed[0][x].read() << 8) + 8192;
+            int32_t y1 = strm_intermed[4][x].read() << 8;
+            int32_t y2 = strm_intermed[6][x].read();
+            int32_t y3 = strm_intermed[2][x].read();
+            int32_t y4 = strm_intermed[1][x].read();
+            int32_t y5 = strm_intermed[7][x].read();
+            int32_t y6 = strm_intermed[5][x].read();
+            int32_t y7 = strm_intermed[3][x].read();
+
+            // Stage 1.
+            int32_t y8 = w7 * (y4 + y5) + 4;
+            y4 = (y8 + w1mw7 * y4) >> 3;
+            y5 = (y8 - w1pw7 * y5) >> 3;
+            y8 = w3 * (y6 + y7) + 4;
+            y6 = (y8 - w3mw5 * y6) >> 3;
+            y7 = (y8 - w3pw5 * y7) >> 3;
+
+            // Stage 2.
+            y8 = y0 + y1;
+            y0 -= y1;
+            y1 = w6 * (y3 + y2) + 4;
+            y2 = (y1 - w2pw6 * y2) >> 3;
+            y3 = (y1 + w2mw6 * y3) >> 3;
+            y1 = y4 + y6;
+            y4 -= y6;
+            y6 = y5 + y7;
+            y5 -= y7;
+
+            // Stage 3.
+            y7 = y8 + y3;
+            y8 -= y3;
+            y3 = y0 + y2;
+            y0 -= y2;
+            y2 = (r2 * (y4 + y5) + 128) >> 8;
+            y4 = (r2 * (y4 - y5) + 128) >> 8;
+
+            // Stage 4.
+            if (x < 2) {
+                strm_outp0[0][x].write((y7 + y1) >> 11);
+                strm_outp0[1][x].write((y3 + y2) >> 11);
+                strm_outp0[2][x].write((y0 + y4) >> 11);
+                strm_outp0[3][x].write((y8 + y6) >> 11);
+                strm_outp0[4][x].write((y8 - y6) >> 11);
+                strm_outp0[5][x].write((y0 - y4) >> 11);
+                strm_outp0[6][x].write((y3 - y2) >> 11);
+                strm_outp0[7][x].write((y7 - y1) >> 11);
+            }
+
+            strm_outp1[0][x].write((y7 + y1) >> 11);
+            strm_outp1[1][x].write((y3 + y2) >> 11);
+            /*			strm_outp1[2][x].write((y0 + y4) >> 11);
+                                    strm_outp1[3][x].write((y8 + y6) >> 11);
+                                    strm_outp1[4][x].write((y8 - y6) >> 11);
+                                    strm_outp1[5][x].write((y0 - y4) >> 11);
+                                    strm_outp1[6][x].write((y3 - y2) >> 11);
+                                    strm_outp1[7][x].write((y7 - y1) >> 11);*/
+
+            strm_outp2[0][x].write((y7 + y1) >> 11);
+            strm_outp2[1][x].write((y3 + y2) >> 11);
+            strm_outp2[2][x].write((y0 + y4) >> 11);
+            strm_outp2[3][x].write((y8 + y6) >> 11);
+            strm_outp2[4][x].write((y8 - y6) >> 11);
+            strm_outp2[5][x].write((y0 - y4) >> 11);
+            strm_outp2[6][x].write((y3 - y2) >> 11);
+            strm_outp2[7][x].write((y7 - y1) >> 11);
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void hls_set_edge_here(uint16_t block_width,
+                       hls::stream<coef_t> strm_vertical_left[8],
+                       hls::stream<int16_t> strm_here_h[8],
+                       hls::stream<int16_t> strm_data[8][8],
+                       uint8_t quantization_table_0,
+                       hls::stream<coef_t>& str_dc,
+                       // output
+                       hls::stream<coef_t>& str_dc2) {
+    int16_t data_buf[8][8];
+#pragma HLS ARRAY_PARTITION variable = data_buf complete
+
+    static int16_t left[8];
+#pragma HLS ARRAY_PARTITION variable = left complete
+
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+        for (int i = 0; i < 8; i++) {
+#pragma HLS UNROLL
+            for (int j = 0; j < 8; j++) {
+#pragma HLS UNROLL
+                data_buf[i][j] = strm_data[i][j].read();
+            }
+        }
+
+        int16_t dc = str_dc.read();
+        str_dc2.write(dc);
+        for (int i = 0; i < 8; ++i) {
+#pragma HLS PIPELINE II = 1
+            int delta = data_buf[7][i] - data_buf[6][i];
+            strm_here_h[i].write(dc * quantization_table_0 + data_buf[7][i] + 128 * 8 + (delta / 2));
+            int delta2 = data_buf[i][7] - data_buf[i][6];
+            strm_vertical_left[i].write(left[i]);
+            left[i] = dc * quantization_table_0 + data_buf[i][7] + 128 * 8 + (delta2 / 2);
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void lb_ctrl_dc(ap_uint<2> id_cmp,
+                uint16_t block_width,
+                bool above_present,
+                hls::stream<coef_t> strm_in[8],
+                hls::stream<coef_t> strm_out[8]) {
+    // clang-format off
+	static int16_t array_edge_above_uram_low[MAX_NUM_COLOR][MAX_NUM_BLOCK88_W][4];
+#pragma HLS bind_storage variable=array_edge_above_uram_low type=RAM_2P impl=URAM
+#pragma HLS ARRAY_RESHAPE variable=array_edge_above_uram_low complete dim=3
+
+	static int16_t array_edge_above_uram_high[MAX_NUM_COLOR][MAX_NUM_BLOCK88_W][4];
+#pragma HLS bind_storage variable=array_edge_above_uram_high type=RAM_2P impl=URAM
+#pragma HLS ARRAY_RESHAPE variable=array_edge_above_uram_high complete dim=3
+    // clang-format on
+
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+#pragma HLS PIPELINE II = 1
+        strm_out[0].write(array_edge_above_uram_low[id_cmp][jpeg_x][0]);
+        strm_out[1].write(array_edge_above_uram_low[id_cmp][jpeg_x][1]);
+        strm_out[2].write(array_edge_above_uram_low[id_cmp][jpeg_x][2]);
+        strm_out[3].write(array_edge_above_uram_low[id_cmp][jpeg_x][3]);
+        strm_out[4].write(array_edge_above_uram_high[id_cmp][jpeg_x][0]);
+        strm_out[5].write(array_edge_above_uram_high[id_cmp][jpeg_x][1]);
+        strm_out[6].write(array_edge_above_uram_high[id_cmp][jpeg_x][2]);
+        strm_out[7].write(array_edge_above_uram_high[id_cmp][jpeg_x][3]);
+
+        array_edge_above_uram_low[id_cmp][jpeg_x][0] = strm_in[0].read();
+        array_edge_above_uram_low[id_cmp][jpeg_x][1] = strm_in[1].read();
+        array_edge_above_uram_low[id_cmp][jpeg_x][2] = strm_in[2].read();
+        array_edge_above_uram_low[id_cmp][jpeg_x][3] = strm_in[3].read();
+        array_edge_above_uram_high[id_cmp][jpeg_x][0] = strm_in[4].read();
+        array_edge_above_uram_high[id_cmp][jpeg_x][1] = strm_in[5].read();
+        array_edge_above_uram_high[id_cmp][jpeg_x][2] = strm_in[6].read();
+        array_edge_above_uram_high[id_cmp][jpeg_x][3] = strm_in[7].read();
+    }
+}
+
+// ------------------------------------------------------------
+void hls_dc_stage1(uint16_t block_width,
+                   bool above_present,
+
+                   hls::stream<coef_t> str_rast8[8],
+                   uint8_t q_tables0[MAX_NUM_COLOR][8][8],
+                   uint8_t q0,
+                   ap_uint<2> id_cmp,
+                   hls::stream<coef_t>& str_dc_in,
+                   // output
+                   hls::stream<coef_t>& str_dc_out,
+                   hls::stream<pix_edge_t>& str_est_v,
+                   hls::stream<pix_edge_t>& str_est_h) {
+#pragma HLS INLINE
+#pragma HLS dataflow
+    // clang-format off
+	static hls::stream<idctm_t> strm_intermed[8][8];
+#pragma HLS STREAM variable=strm_intermed depth=2
+#pragma HLS ARRAY_PARTITION variable=strm_intermed complete dim=0
+
+	static hls::stream<int16_t> strm_outp_sans_dc0[8][2];
+#pragma HLS STREAM variable=strm_outp_sans_dc0 depth=4
+#pragma HLS ARRAY_PARTITION variable=strm_outp_sans_dc0 complete dim=0
+
+	static hls::stream<int16_t> strm_outp_sans_dc1[2][8];
+#pragma HLS STREAM variable=strm_outp_sans_dc1 depth=4
+#pragma HLS ARRAY_PARTITION variable=strm_outp_sans_dc1 complete dim=1
+
+	static hls::stream<int16_t> strm_outp_sans_dc2[8][8];
+#pragma HLS STREAM variable=strm_outp_sans_dc2 depth=4
+#pragma HLS ARRAY_PARTITION variable=strm_outp_sans_dc2 complete dim=0
+    // clang-format on
+
+    hls_idct_h(block_width, str_rast8, q_tables0[id_cmp], strm_intermed);
+    hls_idct_v(block_width, strm_intermed, strm_outp_sans_dc0, strm_outp_sans_dc1, strm_outp_sans_dc2);
+
+    // clang-format off
+	static hls::stream< coef_t> strm_lb_in[8];
+#pragma HLS STREAM variable=strm_lb_in depth=32 dim=1
+#pragma HLS bind_storage variable=strm_lb_in type=FIFO impl=LUTRAM
+	static hls::stream< coef_t> strm_lb_out[8];
+#pragma HLS STREAM variable=strm_lb_out depth=32 dim=1
+#pragma HLS bind_storage variable=strm_lb_out type=FIFO impl=LUTRAM
+
+	static hls::stream< coef_t> strm_vertical_left[8];
+#pragma HLS STREAM variable=strm_vertical_left depth=32 dim=1
+#pragma HLS bind_storage variable=strm_vertical_left type=FIFO impl=LUTRAM
+    // clang-format on
+
+    hls_set_edge_here(block_width, strm_vertical_left, strm_lb_in, strm_outp_sans_dc2, q0, str_dc_in, str_dc_out);
+    lb_ctrl_dc(id_cmp, block_width, above_present, strm_lb_in, strm_lb_out);
+
+    hls_get_estimate_v(block_width, strm_vertical_left, strm_outp_sans_dc0, str_est_v);
+    hls_get_estimate_h(block_width, strm_lb_out, strm_outp_sans_dc1, str_est_h);
+}
+
+// ------------------------------------------------------------
+int16_t hls_adv_predict_or_unpredict_dc(int16_t saved_dc, bool recover_original, int16_t predicted_val) {
+    int16_t max_value = (1 << (MAX_EXPONENT_PIX - 1));
+    int16_t min_value = -max_value;
+    int16_t adjustment_factor = 2 * max_value + 1;
+    int16_t retval = predicted_val;
+    retval = saved_dc + (recover_original ? retval : -retval);
+    if (retval < min_value) retval += adjustment_factor;
+    if (retval > max_value) retval -= adjustment_factor;
+    return retval;
+}
+
+// ------------------------------------------------------------
+void dc_push_bit(uint16_t block_width,
+
+                 hls::stream<int16_t>& strm_coef,
+                 hls::stream<int>& strm_uncertainty,
+                 hls::stream<int>& strm_uncertainty2,
+
+                 hls::stream<ap_uint<4> >& strm_sel_tab,
+                 hls::stream<bool>& strm_cur_bit,
+                 hls::stream<bool>& strm_e,
+                 hls::stream<ap_uint<16> >& strm_addr1,
+                 hls::stream<ap_uint<16> >& strm_addr2,
+                 hls::stream<ap_uint<16> >& strm_addr3
+
+                 ) {
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+        int16_t coef = strm_coef.read();
+        int uncertainty = strm_uncertainty.read();
+        int uncertainty2 = strm_uncertainty2.read();
+
+        ap_int<16> abs_coef = _MACRO_ABS(coef);
+        uint8_t length = 16 - abs_coef.countLeadingZeros();
+        ap_int<16> tmp1 = _MACRO_ABS(uncertainty);
+        ap_int<16> tmp2 = _MACRO_ABS(uncertainty2);
+        uint16_t len_abs_mxm = len_abs_mxm = (16 - tmp1.countLeadingZeros());
+        uint16_t len_abs_offset_to_closest_edge = (16 - tmp2.countLeadingZeros());
+        ap_uint<4> addr_2 = _MACRO_MIN(len_abs_mxm, 11); //(uint16_t)(Model::ExponentCountsDC::size0 - 1));
+        ap_uint<5> addr_1 =
+            _MACRO_MIN(len_abs_offset_to_closest_edge, 16); //(uint16_t)(Model::ExponentCountsDC::size1 - 1));
+        ap_uint<4> addr_0;
+    CUR_BIT_OUT1:
+        for (unsigned int i = 0; i < length + 1; ++i) {
+#pragma HLS PIPELINE II = 1
+            addr_0 = i;
+            bool cur_bit = (length != i);
+            strm_sel_tab.write(EXP_CNT_DC);
+            strm_cur_bit.write(cur_bit);
+            strm_e.write(false);
+            strm_addr1.write(addr_2);
+            strm_addr2.write(addr_1);
+            strm_addr3.write(i);
+        }
+
+        if (length != 0) {
+            ap_uint<2> addr_1 = 0;
+            ap_uint<4> addr_0 = uncertainty2 >= 0 ? uncertainty2 == 0 ? 3 : 2 : 1;
+            strm_sel_tab.write(SIGN_CNT);
+            strm_cur_bit.write(coef >= 0);
+            strm_e.write(false);
+            strm_addr1.write(addr_1);
+            strm_addr2.write(addr_0);
+            strm_addr3.write(0);
+        }
+        if (length > 1) {
+            ap_uint<4> addr_1 = _MACRO_MIN(11, len_abs_mxm); // std::min((uint16_t)(11), len_abs_mxm);
+        CUR_BIT_OUT2:
+            for (int i = length - 2; i >= 0; --i) {
+#pragma HLS PIPELINE II = 1
+                ap_uint<4> addr_0 = i;
+
+                strm_sel_tab.write(NOIS_CNT_DC);
+                strm_cur_bit.write(abs_coef[i]);
+                strm_e.write(false);
+                strm_addr1.write(addr_1);
+                strm_addr2.write(i);
+                strm_addr3.write(0);
+            }
+        }
+        strm_e.write(true);
+    }
+}
+
+// ------------------------------------------------------------
+void dc_push_bit_v2(uint16_t block_width,
+
+                    hls::stream<int16_t>& strm_coef,
+                    hls::stream<int>& strm_uncertainty,
+                    hls::stream<int>& strm_uncertainty2,
+
+                    hls::stream<ap_uint<4> >& strm_sel_tab,
+                    hls::stream<bool>& strm_cur_bit,
+                    hls::stream<short>& strm_len,
+                    //		hls::stream<bool>		 & strm_e,
+                    hls::stream<ap_uint<16> >& strm_addr1,
+                    hls::stream<ap_uint<16> >& strm_addr2,
+                    hls::stream<ap_uint<16> >& strm_addr3
+
+                    ) {
+    int state;
+    int cnt = 0;
+    int i = 0;
+    int j = 0;
+    int jpeg_x = 0;
+    int16_t coef;
+    int uncertainty;
+    int uncertainty2;
+
+    ap_int<16> abs_coef;
+    uint8_t length;
+    ap_int<16> tmp1;
+    ap_int<16> tmp2;
+    uint16_t len_abs_mxm;
+    uint16_t len_abs_offset_to_closest_edge;
+
+    ap_uint<4> addr_2;
+    ap_uint<5> addr_1;
+    ap_uint<4> addr_0;
+
+    coef = strm_coef.read();
+    uncertainty = strm_uncertainty.read();
+    uncertainty2 = strm_uncertainty2.read();
+
+    abs_coef = _MACRO_ABS(coef);
+    length = 16 - abs_coef.countLeadingZeros();
+    tmp1 = _MACRO_ABS(uncertainty);
+    tmp2 = _MACRO_ABS(uncertainty2);
+    len_abs_mxm = len_abs_mxm = (16 - tmp1.countLeadingZeros());
+    len_abs_offset_to_closest_edge = (16 - tmp2.countLeadingZeros());
+    addr_2 = _MACRO_MIN(len_abs_mxm, 11);                    //(uint16_t)(Model::ExponentCountsDC::size0 - 1));
+    addr_1 = _MACRO_MIN(len_abs_offset_to_closest_edge, 16); //(uint16_t)(Model::ExponentCountsDC::size1 - 1));
+    i = 0;
+    state = 1;
+
+    while (jpeg_x < block_width) {
+#pragma HLS PIPELINE II = 1
+        if (state == 1) {
+            addr_0 = i;
+            bool cur_bit = (length != i);
+            strm_sel_tab.write(EXP_CNT_DC);
+            strm_cur_bit.write(cur_bit);
+            //                strm_e.write(false);
+            cnt++;
+            strm_addr1.write(addr_2);
+            strm_addr2.write(addr_1);
+            strm_addr3.write(i);
+            if (i == length && length != 0)
+                state = 2;
+            else if (i == length && length == 0) {
+                strm_len.write(cnt);
+                cnt = 0;
+                jpeg_x++;
+                if (jpeg_x < block_width) {
+                    coef = strm_coef.read();
+                    uncertainty = strm_uncertainty.read();
+                    uncertainty2 = strm_uncertainty2.read();
+
+                    abs_coef = _MACRO_ABS(coef);
+                    length = 16 - abs_coef.countLeadingZeros();
+                    tmp1 = _MACRO_ABS(uncertainty);
+                    tmp2 = _MACRO_ABS(uncertainty2);
+                    len_abs_mxm = len_abs_mxm = (16 - tmp1.countLeadingZeros());
+                    len_abs_offset_to_closest_edge = (16 - tmp2.countLeadingZeros());
+                    addr_2 = _MACRO_MIN(len_abs_mxm, 11); //(uint16_t)(Model::ExponentCountsDC::size0 - 1));
+                    addr_1 = _MACRO_MIN(len_abs_offset_to_closest_edge,
+                                        16); //(uint16_t)(Model::ExponentCountsDC::size1 - 1));
+                    i = 0;
+                    state = 1;
+                }
+            } else {
+                i++;
+            }
+        } else if (state == 2) {
+            addr_1 = 0;
+            addr_0 = uncertainty2 >= 0 ? uncertainty2 == 0 ? 3 : 2 : 1;
+            strm_sel_tab.write(SIGN_CNT);
+            strm_cur_bit.write(coef >= 0);
+            //                strm_e.write(false);
+            cnt++;
+            strm_addr1.write(addr_1);
+            strm_addr2.write(addr_0);
+            strm_addr3.write(0);
+
+            if (length > 1) {
+                j = length - 2;
+                state = 4;
+            } else {
+                strm_len.write(cnt);
+                cnt = 0;
+                jpeg_x++;
+                if (jpeg_x < block_width) {
+                    coef = strm_coef.read();
+                    uncertainty = strm_uncertainty.read();
+                    uncertainty2 = strm_uncertainty2.read();
+
+                    abs_coef = _MACRO_ABS(coef);
+                    length = 16 - abs_coef.countLeadingZeros();
+                    tmp1 = _MACRO_ABS(uncertainty);
+                    tmp2 = _MACRO_ABS(uncertainty2);
+                    len_abs_mxm = len_abs_mxm = (16 - tmp1.countLeadingZeros());
+                    len_abs_offset_to_closest_edge = (16 - tmp2.countLeadingZeros());
+                    addr_2 = _MACRO_MIN(len_abs_mxm, 11); //(uint16_t)(Model::ExponentCountsDC::size0 - 1));
+                    addr_1 = _MACRO_MIN(len_abs_offset_to_closest_edge,
+                                        16); //(uint16_t)(Model::ExponentCountsDC::size1 - 1));
+                    i = 0;
+                    state = 1;
+                }
+            }
+        } else if (state == 4) {
+            addr_1 = _MACRO_MIN(11, len_abs_mxm);
+
+            strm_sel_tab.write(NOIS_CNT_DC);
+            strm_cur_bit.write(abs_coef[j]);
+            //                    strm_e.write(false);
+            cnt++;
+            strm_addr1.write(addr_1);
+            strm_addr2.write(j);
+            strm_addr3.write(0);
+
+            if (j == 0) {
+                strm_len.write(cnt);
+                cnt = 0;
+                jpeg_x++;
+                if (jpeg_x < block_width) {
+                    coef = strm_coef.read();
+                    uncertainty = strm_uncertainty.read();
+                    uncertainty2 = strm_uncertainty2.read();
+
+                    abs_coef = _MACRO_ABS(coef);
+                    length = 16 - abs_coef.countLeadingZeros();
+                    tmp1 = _MACRO_ABS(uncertainty);
+                    tmp2 = _MACRO_ABS(uncertainty2);
+                    len_abs_mxm = len_abs_mxm = (16 - tmp1.countLeadingZeros());
+                    len_abs_offset_to_closest_edge = (16 - tmp2.countLeadingZeros());
+                    addr_2 = _MACRO_MIN(len_abs_mxm, 11); //(uint16_t)(Model::ExponentCountsDC::size0 - 1));
+                    addr_1 = _MACRO_MIN(len_abs_offset_to_closest_edge,
+                                        16); //(uint16_t)(Model::ExponentCountsDC::size1 - 1));
+                    i = 0;
+                    state = 1;
+                }
+            }
+            j--;
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void dc_out(uint16_t block_width,
+            bool above_present,
+            hls::stream<coef_t>& str_dc_in,
+            uint16_t q0,
+            hls::stream<pix_edge_t>& str_est_v,
+            hls::stream<pix_edge_t>& str_est_h,
+            hls::stream<int16_t>& strm_coef,
+            hls::stream<int>& strm_uncertainty,
+            hls::stream<int>& strm_uncertainty2) {
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+#pragma HLS PIPELINE II = 1
+
+        int16_t dc = str_dc_in.read();
+        int uncertainty = 0; // this is how far off our max estimate vs min estimate is
+        int uncertainty2 = 0;
+        int predicted_val;
+        bool left = (jpeg_x == 0) ? false : (block_width > 1);
+        // clang-format off
+            predicted_val = hls_adv_predict_dc_pix(
+                                                                    left,
+                                                                    above_present,
+                                                                    q0,
+                                                                    str_est_v,
+                                                                    str_est_h,
+                                                                    &uncertainty,
+                                                                    &uncertainty2);
+        // clang-format on
+        int16_t adv_predicted_dc = hls_adv_predict_or_unpredict_dc(dc, false, predicted_val);
+        strm_coef.write(adv_predicted_dc);
+        strm_uncertainty.write(uncertainty);
+        strm_uncertainty2.write(uncertainty2);
+    }
+}
+
+// ------------------------------------------------------------
+void jpeg_zigzag_to_array(uint16_t block_width, hls::stream<coef_t> strm_in[8], coef_t coef_buff[64]) {
+    for (int j = 0; j < 8; j++) {
+#pragma HLS PIPELINE II = 1
+        for (int i = 0; i < 8; i++) {
+#pragma HLS UNROLL
+            coef_buff[8 * j + i] = strm_in[i].read();
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void array_to_raster(uint16_t block_width, coef_t coef_buff[64], hls::stream<coef_t> strm_out[8]) {
+    int cnt = 0;
+    while (cnt < 8) {
+#pragma HLS PIPELINE II = 1
+        if (cnt == 0) {
+            strm_out[0].write(coef_buff[0]);
+            strm_out[1].write(coef_buff[1]);
+            strm_out[2].write(coef_buff[5]);
+            strm_out[3].write(coef_buff[6]);
+            strm_out[4].write(coef_buff[14]);
+            strm_out[5].write(coef_buff[15]);
+            strm_out[6].write(coef_buff[27]);
+            strm_out[7].write(coef_buff[28]);
+        } else if (cnt == 1) {
+            strm_out[0].write(coef_buff[2]);
+            strm_out[1].write(coef_buff[4]);
+            strm_out[2].write(coef_buff[7]);
+            strm_out[3].write(coef_buff[13]);
+            strm_out[4].write(coef_buff[16]);
+            strm_out[5].write(coef_buff[26]);
+            strm_out[6].write(coef_buff[29]);
+            strm_out[7].write(coef_buff[42]);
+        } else if (cnt == 2) {
+            strm_out[0].write(coef_buff[3]);
+            strm_out[1].write(coef_buff[8]);
+            strm_out[2].write(coef_buff[12]);
+            strm_out[3].write(coef_buff[17]);
+            strm_out[4].write(coef_buff[25]);
+            strm_out[5].write(coef_buff[30]);
+            strm_out[6].write(coef_buff[41]);
+            strm_out[7].write(coef_buff[43]);
+        } else if (cnt == 3) {
+            strm_out[0].write(coef_buff[9]);
+            strm_out[1].write(coef_buff[11]);
+            strm_out[2].write(coef_buff[18]);
+            strm_out[3].write(coef_buff[24]);
+            strm_out[4].write(coef_buff[31]);
+            strm_out[5].write(coef_buff[40]);
+            strm_out[6].write(coef_buff[44]);
+            strm_out[7].write(coef_buff[53]);
+        } else if (cnt == 4) {
+            strm_out[0].write(coef_buff[10]);
+            strm_out[1].write(coef_buff[19]);
+            strm_out[2].write(coef_buff[23]);
+            strm_out[3].write(coef_buff[32]);
+            strm_out[4].write(coef_buff[39]);
+            strm_out[5].write(coef_buff[45]);
+            strm_out[6].write(coef_buff[52]);
+            strm_out[7].write(coef_buff[54]);
+        } else if (cnt == 5) {
+            strm_out[0].write(coef_buff[20]);
+            strm_out[1].write(coef_buff[22]);
+            strm_out[2].write(coef_buff[33]);
+            strm_out[3].write(coef_buff[38]);
+            strm_out[4].write(coef_buff[46]);
+            strm_out[5].write(coef_buff[51]);
+            strm_out[6].write(coef_buff[55]);
+            strm_out[7].write(coef_buff[60]);
+        } else if (cnt == 6) {
+            strm_out[0].write(coef_buff[21]);
+            strm_out[1].write(coef_buff[34]);
+            strm_out[2].write(coef_buff[37]);
+            strm_out[3].write(coef_buff[47]);
+            strm_out[4].write(coef_buff[50]);
+            strm_out[5].write(coef_buff[56]);
+            strm_out[6].write(coef_buff[59]);
+            strm_out[7].write(coef_buff[61]);
+        } else if (cnt == 7) {
+            strm_out[0].write(coef_buff[35]);
+            strm_out[1].write(coef_buff[36]);
+            strm_out[2].write(coef_buff[48]);
+            strm_out[3].write(coef_buff[49]);
+            strm_out[4].write(coef_buff[57]);
+            strm_out[5].write(coef_buff[58]);
+            strm_out[6].write(coef_buff[62]);
+            strm_out[7].write(coef_buff[63]);
+        }
+        cnt++;
+    }
+}
+
+// ------------------------------------------------------------
+void jpeg_zigzag_to_raster(uint16_t block_width, hls::stream<coef_t> strm_in[8], hls::stream<coef_t> strm_out[8]) {
+//#pragma HLS INLINE
+#pragma HLS DATAFLOW
+
+    coef_t coef_buff[64];
+#pragma HLS ARRAY_PARTITION variable = coef_buff complete dim = 0
+
+    jpeg_zigzag_to_array(block_width, strm_in, coef_buff);
+    array_to_raster(block_width, coef_buff, strm_out);
+}
+
+// ------------------------------------------------------------
+void hls_serialize_tokens_dc(bool above_present,
+                             ap_uint<2> id_cmp,
+                             uint16_t block_width,
+                             uint8_t q_tables0[MAX_NUM_COLOR][8][8],
+                             uint8_t q0,
+
+                             hls::stream<coef_t> strm_in[8],
+                             hls::stream<coef_t>& str_dc_in,
+
+                             hls::stream<ap_uint<4> >& strm_sel_tab,
+                             hls::stream<bool>& strm_cur_bit,
+                             hls::stream<short>& strm_len,
+                             //		hls::stream<bool>		 & strm_e,
+                             hls::stream<ap_uint<16> >& strm_addr1,
+                             hls::stream<ap_uint<16> >& strm_addr2,
+                             hls::stream<ap_uint<16> >& strm_addr3
+
+                             ) {
+#pragma HLS INLINE
+#pragma HLS DATAFLOW
+    // clang-format off
+    hls::stream< coef_t> str_dc2;
+#pragma HLS STREAM variable=str_dc2 depth=32 dim=1
+#pragma HLS bind_storage variable=str_dc2 type=FIFO impl=LUTRAM
+
+	hls::stream< pix_edge_t> str_est_v;
+#pragma HLS STREAM variable=str_est_v depth=32 dim=1
+#pragma HLS bind_storage variable=str_est_v type=FIFO impl=LUTRAM
+
+	hls::stream< pix_edge_t> str_est_h;
+#pragma HLS STREAM variable=str_est_h depth=32 dim=1
+#pragma HLS bind_storage variable=str_est_h type=FIFO impl=LUTRAM
+
+    hls::stream<int16_t> strm_coef;
+#pragma HLS STREAM variable=strm_coef depth=32 dim=1
+#pragma HLS bind_storage variable=strm_coef type=FIFO impl=LUTRAM
+
+    hls::stream<int>     strm_uncertainty("uncertainty");
+#pragma HLS bind_storage variable=strm_uncertainty type=FIFO impl=LUTRAM
+#pragma HLS STREAM variable=strm_uncertainty depth=32 dim=1
+
+    hls::stream<int>     strm_uncertainty2("uncertainty2");
+#pragma HLS bind_storage variable=strm_uncertainty2 type=FIFO impl=LUTRAM
+#pragma HLS STREAM variable=strm_uncertainty2 depth=32 dim=1
+
+	hls::stream< coef_t>  str_rast8[8];
+#pragma HLS bind_storage variable=str_rast8 type=FIFO impl=LUTRAM
+#pragma HLS ARRAY_PARTITION variable=str_rast8 complete dim=1
+#pragma HLS STREAM variable=strm_in depth=32 dim=1
+    // clang-format on
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+        jpeg_zigzag_to_raster(block_width, strm_in, str_rast8);
+    }
+#pragma HLS ARRAY_PARTITION variable = q_tables0 complete dim = 2
+
+    hls_dc_stage1(block_width, above_present, str_rast8, q_tables0, q0, id_cmp,
+
+                  str_dc_in, str_dc2,
+                  // Output
+                  str_est_v, str_est_h);
+
+    dc_out(block_width, above_present, str_dc2, q0, str_est_v, str_est_h, strm_coef, strm_uncertainty,
+           strm_uncertainty2);
+
+    dc_push_bit_v2(block_width, strm_coef, strm_uncertainty, strm_uncertainty2,
+
+                   strm_sel_tab, strm_cur_bit, strm_len,
+                   //		strm_e,
+                   strm_addr1, strm_addr2, strm_addr3
+
+                   );
+}
+
+// ------------------------------------------------------------
+void pre_serialize_tokens_dc(bool above_present,
+                             ap_uint<2> id_cmp,
+                             uint16_t block_width,
+                             uint8_t q_tables0[MAX_NUM_COLOR][8][8],
+                             uint8_t q0,
+
+                             hls::stream<coef_t> strm_in[8],
+                             hls::stream<coef_t>& str_dc_in,
+
+                             hls::stream<int16_t>& strm_coef,
+                             hls::stream<int>& strm_uncertainty,
+                             hls::stream<int>& strm_uncertainty2
+
+                             ) {
+#pragma HLS INLINE
+#pragma HLS DATAFLOW
+    // clang-format off
+    hls::stream< coef_t> str_dc2;
+#pragma HLS STREAM variable=str_dc2 depth=32 dim=1
+#pragma HLS bind_storage variable=str_dc2 type=FIFO impl=LUTRAM
+
+	hls::stream< pix_edge_t> str_est_v;
+#pragma HLS STREAM variable=str_est_v depth=32 dim=1
+#pragma HLS bind_storage variable=str_est_v type=FIFO impl=LUTRAM
+
+	hls::stream< pix_edge_t> str_est_h;
+#pragma HLS STREAM variable=str_est_h depth=32 dim=1
+#pragma HLS bind_storage variable=str_est_h type=FIFO impl=LUTRAM
+
+	hls::stream< coef_t>  str_rast8[8];
+#pragma HLS bind_storage variable=str_rast8 type=FIFO impl=LUTRAM
+#pragma HLS ARRAY_PARTITION variable=str_rast8 complete dim=1
+#pragma HLS STREAM variable=strm_in depth=32 dim=1
+    // clang-format on
+    for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+        jpeg_zigzag_to_raster(block_width, strm_in, str_rast8);
+    }
+#pragma HLS ARRAY_PARTITION variable = q_tables0 complete dim = 2
+
+    hls_dc_stage1(block_width, above_present, str_rast8, q_tables0, q0, id_cmp,
+
+                  str_dc_in, str_dc2,
+                  // Output
+                  str_est_v, str_est_h);
+
+    dc_out(block_width, above_present, str_dc2, q0, str_est_v, str_est_h, strm_coef, strm_uncertainty,
+           strm_uncertainty2);
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/XAcc_edges.cpp b/codec/L2/demos/leptonEnc/kernel/XAcc_edges.cpp
new file mode 100644
index 0000000000..a0bb5c8233
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/XAcc_edges.cpp
@@ -0,0 +1,1046 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "XAcc_edges.hpp"
+
+//-------------------------------------------------------------------------
+struct edge_len {
+    ap_uint<2> lennz;
+    ap_uint<4> lenexp;
+    ap_uint<1> lensign;
+    ap_uint<4> lenthr;
+    ap_uint<2> lennos;
+    bool is_h;
+};
+
+struct taken_dat {
+    ap_uint<4> sel_tab;
+    bool cur_bit;
+    bool e;
+    ap_uint<16> addr1;
+    ap_uint<16> addr2;
+    ap_uint<16> addr3;
+    ap_uint<16> addr4;
+};
+
+namespace xf {
+namespace codec {
+namespace details {
+
+// ------------------------------------------------------------
+void push_bit_edge(ap_uint<32> block_width,
+                   hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+
+                   hls::stream<ap_uint<3> >& strm_h_nz_len,
+                   hls::stream<ap_uint<3> >& strm_eob_x,
+                   hls::stream<ap_uint<4> >& strm_length_h,
+
+                   hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+                   hls::stream<bool>& strm_cur_bit_sign_h,
+                   hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+                   hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+                   hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+                   hls::stream<ap_uint<8> >& strm_min_nois_h,
+                   hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+                   hls::stream<ap_uint<3> >& strm_v_nz_len,
+                   hls::stream<ap_uint<3> >& strm_eob_y,
+                   hls::stream<ap_uint<4> >& strm_length_v,
+
+                   hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+                   hls::stream<bool>& strm_cur_bit_sign_v,
+                   hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+                   hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+                   hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+                   hls::stream<ap_uint<8> >& strm_min_nois_v,
+                   hls::stream<ap_uint<6> >& strm_coord_nois_v,
+
+                   hls::stream<ap_uint<4> >& strm_sel_tab,
+                   hls::stream<bool>& strm_cur_bit,
+                   hls::stream<bool>& strm_e,
+                   hls::stream<ap_uint<16> >& strm_addr1,
+                   hls::stream<ap_uint<16> >& strm_addr2,
+                   hls::stream<ap_uint<16> >& strm_addr3,
+                   hls::stream<ap_uint<16> >& strm_addr4) {
+    int j = 0;
+    int i = 0;
+    int cnt = 0;
+    while (j < block_width) {
+        ap_uint<3> serialized_so_far = 0;
+        ap_uint<3> num_nonzeros_edge = strm_h_nz_len.read();
+        ap_uint<6> nz_77 = strm_num_nonzeros_7x7.read();
+        ap_uint<3> eob_x = strm_eob_x.read();
+
+        i = 2;
+        while (i >= 0) {
+#pragma HLS pipeline II = 1
+            bool cur_bit = (num_nonzeros_edge & (1 << i)) ? 1 : 0;
+            strm_sel_tab.write(NZ_CNT_8x1);
+            strm_cur_bit.write(cur_bit);
+            strm_e.write(false);
+            strm_addr1.write(eob_x);
+            strm_addr2.write((nz_77 + 3) / 7);
+            strm_addr3.write(i);
+            strm_addr4.write(serialized_so_far);
+
+            serialized_so_far <<= 1;
+            serialized_so_far.set(0, cur_bit);
+            i--;
+        }
+
+        cnt = 0;
+        while (num_nonzeros_edge > 0) {
+            ap_uint<4> length = strm_length_h.read();
+            ap_uint<4> bsr = strm_best_prior_exp_h.read();
+
+            i = 0;
+            while (i < length + 1) {
+#pragma HLS pipeline II = 1
+                bool cur_bit = (length != i);
+                strm_sel_tab.write(EXP_CNT_X);
+                strm_cur_bit.write(cur_bit);
+                strm_e.write(false);
+                strm_addr1.write(num_nonzeros_edge);
+                strm_addr2.write(cnt);
+                strm_addr3.write(bsr);
+                strm_addr4.write(i);
+                i++;
+            }
+
+            if (length != 0) {
+                strm_sel_tab.write(SIGN_CNT);
+                strm_cur_bit.write(strm_cur_bit_sign_h.read());
+                strm_e.write(false);
+                strm_addr1.write(strm_tri_sign_h.read());
+                strm_addr2.write(bsr);
+                strm_addr3.write(0);
+                strm_addr4.write(0);
+            }
+
+            i = length - 2;
+            ap_uint<8> encoded_so_far = 1;
+            ap_uint<11> abs_coef = strm_abs_coef_nois_h.read();
+            uint16_t ctx_nois = strm_ctx_nois_h.read();
+            uint8_t min_threshold = strm_min_nois_h.read();
+            unsigned int coord = strm_coord_nois_h.read();
+
+            while (i >= min_threshold) {
+#pragma HLS pipeline II = 1
+                bool cur_bit = (abs_coef & (1 << i)) ? 1 : 0;
+                strm_sel_tab.write(THRE_CNT);
+                strm_cur_bit.write(cur_bit);
+                strm_e.write(false);
+                strm_addr1.write(ctx_nois);
+                strm_addr2.write(length - min_threshold);
+                strm_addr3.write(encoded_so_far);
+                strm_addr4.write(0);
+
+                encoded_so_far <<= 1;
+                if (cur_bit) {
+                    encoded_so_far.set(0, cur_bit);
+                }
+                // since we are not strict about rejecting jpegs with out of range coefs
+                // we just make those less efficient by reusing the same probability bucket
+
+                if (encoded_so_far > 127) encoded_so_far = 127;
+                i--;
+            }
+
+            while (i >= 0) {
+#pragma HLS pipeline II = 1
+                bool cur_bit = (abs_coef & (1 << i)) ? 1 : 0;
+                strm_sel_tab.write(NOIS_CNT);
+                strm_cur_bit.write(cur_bit);
+                strm_e.write(false);
+                strm_addr1.write(coord);
+                strm_addr2.write(num_nonzeros_edge);
+                strm_addr3.write(i);
+                strm_addr4.write(0);
+                i--;
+            }
+
+            if (length != 0) num_nonzeros_edge--;
+            cnt++;
+        }
+
+        serialized_so_far = 0;
+        num_nonzeros_edge = strm_v_nz_len.read();
+        ap_uint<3> eob_y = strm_eob_y.read();
+
+        i = 2;
+        while (i >= 0) {
+#pragma HLS pipeline II = 1
+            bool cur_bit = (num_nonzeros_edge & (1 << i)) ? 1 : 0;
+            strm_sel_tab.write(NZ_CNT_1x8);
+            strm_cur_bit.write(cur_bit);
+            strm_e.write(false);
+            strm_addr1.write(eob_y);
+            strm_addr2.write((nz_77 + 3) / 7);
+            strm_addr3.write(i);
+            strm_addr4.write(serialized_so_far);
+
+            serialized_so_far <<= 1;
+            serialized_so_far.set(0, cur_bit);
+            i--;
+        }
+
+        cnt = 7;
+        while (num_nonzeros_edge > 0) {
+            ap_uint<4> length = strm_length_v.read();
+            ap_uint<4> bsr = strm_best_prior_exp_v.read();
+
+            i = 0;
+            while (i < length + 1) {
+#pragma HLS pipeline II = 1
+                bool cur_bit = (length != i);
+                strm_sel_tab.write(EXP_CNT_X);
+                strm_cur_bit.write(cur_bit);
+                strm_e.write(false);
+                strm_addr1.write(num_nonzeros_edge);
+                strm_addr2.write(cnt);
+                strm_addr3.write(bsr);
+                strm_addr4.write(i);
+                i++;
+            }
+
+            if (length != 0) {
+                strm_sel_tab.write(SIGN_CNT);
+                strm_cur_bit.write(strm_cur_bit_sign_v.read());
+                strm_e.write(false);
+                strm_addr1.write(strm_tri_sign_v.read());
+                strm_addr2.write(bsr);
+                strm_addr3.write(0);
+                strm_addr4.write(0);
+            }
+
+            i = length - 2;
+            ap_uint<8> encoded_so_far = 1;
+            ap_uint<11> abs_coef = strm_abs_coef_nois_v.read();
+            uint16_t ctx_nois = strm_ctx_nois_v.read();
+            uint8_t min_threshold = strm_min_nois_v.read();
+            unsigned int coord = strm_coord_nois_v.read();
+
+            while (i >= min_threshold) {
+#pragma HLS pipeline II = 1
+                bool cur_bit = (abs_coef & (1 << i)) ? 1 : 0;
+                strm_sel_tab.write(THRE_CNT);
+                strm_cur_bit.write(cur_bit);
+                strm_e.write(false);
+                strm_addr1.write(ctx_nois);
+                strm_addr2.write(length - min_threshold);
+                strm_addr3.write(encoded_so_far);
+                strm_addr4.write(0);
+
+                encoded_so_far <<= 1;
+                if (cur_bit) {
+                    encoded_so_far.set(0, cur_bit);
+                }
+                // since we are not strict about rejecting jpegs with out of range coefs
+                // we just make those less efficient by reusing the same probability bucket
+
+                if (encoded_so_far > 127) encoded_so_far = 127;
+                i--;
+            }
+
+            while (i >= 0) {
+#pragma HLS pipeline II = 1
+                bool cur_bit = (abs_coef & (1 << i)) ? 1 : 0;
+                strm_sel_tab.write(NOIS_CNT);
+                strm_cur_bit.write(cur_bit);
+                strm_e.write(false);
+                strm_addr1.write(coord);
+                strm_addr2.write(num_nonzeros_edge);
+                strm_addr3.write(i);
+                strm_addr4.write(0);
+                i--;
+            }
+
+            if (length != 0) num_nonzeros_edge--;
+            cnt++;
+        } // num_nonzeros_edge>0
+        strm_e.write(true);
+        j++;
+    }
+}
+
+// ------------------------------------------------------------
+void collect_len_edge(ap_uint<32> block_width,
+
+                      hls::stream<ap_uint<3> >& strm_h_nz_len,
+                      hls::stream<ap_uint<4> >& strm_length_h,
+                      hls::stream<ap_uint<8> >& strm_min_nois_h,
+
+                      hls::stream<ap_uint<3> >& strm_v_nz_len,
+                      hls::stream<ap_uint<4> >& strm_length_v,
+                      hls::stream<ap_uint<8> >& strm_min_nois_v,
+
+                      hls::stream<edge_len>& strm_len,
+                      // hls::stream<edge_len >& strm_len_v,
+                      hls::stream<ap_uint<3> >& strm_h_nz_pass,
+                      hls::stream<ap_uint<3> >& strm_v_nz_pass,
+                      hls::stream<ap_uint<8> >& strm_min_h_pass,
+                      hls::stream<ap_uint<8> >& strm_min_v_pass) {
+    int j = 0;
+    edge_len tmp_len;
+
+    ap_uint<3> num_nonzeros_edge_h = strm_h_nz_len.read();
+    ap_uint<3> num_nonzeros_edge_v = 0;
+    strm_h_nz_pass.write(num_nonzeros_edge_h);
+    tmp_len.lennz = 3;
+    tmp_len.is_h = true;
+
+    ap_int<5> length; // to imp the -1 logic, use one more bit
+    uint8_t min_threshold;
+    while (j < block_width) {
+#pragma HLS PIPELINE II = 1
+
+        if (num_nonzeros_edge_h > 0) {
+            length = strm_length_h.read();
+            min_threshold = strm_min_nois_h.read();
+            strm_min_h_pass.write(min_threshold);
+
+            ////_XF_IMAGE_PRINT("h: nz=%d, len=%d\n", (int)num_nonzeros_edge_h, (int)length );
+        } else if (num_nonzeros_edge_v > 0) {
+            length = strm_length_v.read();
+            min_threshold = strm_min_nois_v.read();
+            strm_min_v_pass.write(min_threshold);
+
+            ////_XF_IMAGE_PRINT("vv: nz=%d, len=%d\n", (int)num_nonzeros_edge_v, (int)length );
+        } else { // only nz
+            length = -1;
+            min_threshold = 0;
+        }
+
+        int over_thr = length - 2 - min_threshold;
+        int over_nos = min_threshold - 1;
+        tmp_len.lenexp = length + 1;
+        tmp_len.lensign = (length > 0);
+        tmp_len.lenthr = (over_thr >= 0) ? (over_thr + 1) : 0;
+        if ((over_thr >= 0 && (over_nos >= 0))) {
+            tmp_len.lennos = min_threshold;
+        } else if (over_thr < 0 && (length - 2 >= 0)) {
+            tmp_len.lennos = length - 1;
+        } else {
+            tmp_len.lennos = 0;
+        }
+
+        strm_len.write(tmp_len);
+
+        // update next loop
+        if (tmp_len.is_h) {
+            if ((num_nonzeros_edge_h > 0) && (length != 0)) { // keep doing
+                num_nonzeros_edge_h--;
+                tmp_len.lennz = 0;
+                tmp_len.is_h = true;
+                if (!num_nonzeros_edge_h) { // read v
+                    num_nonzeros_edge_v = strm_v_nz_len.read();
+                    strm_v_nz_pass.write(num_nonzeros_edge_v);
+                    tmp_len.lennz = 3;
+                    tmp_len.is_h = false;
+                }
+            } else if (!num_nonzeros_edge_h) { // read v
+                num_nonzeros_edge_v = strm_v_nz_len.read();
+                strm_v_nz_pass.write(num_nonzeros_edge_v);
+                tmp_len.lennz = 3;
+                tmp_len.is_h = false;
+            } else { // keep doing
+                tmp_len.lennz = 0;
+                tmp_len.is_h = true;
+            }
+        } else {
+            if ((num_nonzeros_edge_v > 0) && (length != 0)) { // keep doing
+                num_nonzeros_edge_v--;
+                tmp_len.lennz = 0;
+                tmp_len.is_h = false;
+                if (!num_nonzeros_edge_v) { // read h
+                    if ((j < block_width - 1)) {
+                        num_nonzeros_edge_h = strm_h_nz_len.read();
+                        strm_h_nz_pass.write(num_nonzeros_edge_h);
+                        tmp_len.lennz = 3;
+                        tmp_len.is_h = true;
+                    }
+                    j++;
+                }
+            } else if (!num_nonzeros_edge_v) { // read h
+                if ((j < block_width - 1)) {
+                    num_nonzeros_edge_h = strm_h_nz_len.read();
+                    strm_h_nz_pass.write(num_nonzeros_edge_h);
+                    tmp_len.lennz = 3;
+                    tmp_len.is_h = true;
+                }
+                j++;
+            } else { // keep doing
+                tmp_len.lennz = 0;
+                tmp_len.is_h = false;
+            }
+        }
+
+    } // end while
+}
+
+// ------------------------------------------------------------
+void push_bit_edge_len(ap_uint<32> block_width,
+                       hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+
+                       hls::stream<edge_len>& strm_len,
+                       //	hls::stream<edge_len >& strm_len_h,
+                       //	hls::stream<edge_len >& strm_len_v,
+
+                       hls::stream<ap_uint<3> >& strm_h_nz_len,
+                       hls::stream<ap_uint<3> >& strm_eob_x,
+                       // hls::stream<ap_uint<4> >& strm_length_h,
+
+                       hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+                       hls::stream<bool>& strm_cur_bit_sign_h,
+                       hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+                       hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+                       hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+                       hls::stream<ap_uint<8> >& strm_min_nois_h,
+                       hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+                       hls::stream<ap_uint<3> >& strm_v_nz_len,
+                       hls::stream<ap_uint<3> >& strm_eob_y,
+                       // hls::stream<ap_uint<4> >& strm_length_v,
+
+                       hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+                       hls::stream<bool>& strm_cur_bit_sign_v,
+                       hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+                       hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+                       hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+                       hls::stream<ap_uint<8> >& strm_min_nois_v,
+                       hls::stream<ap_uint<6> >& strm_coord_nois_v,
+
+                       hls::stream<short>& strm_edge_len,
+
+                       hls::stream<ap_uint<4> >& strm_sel_tab,
+                       hls::stream<bool>& strm_cur_bit,
+                       // hls::stream<bool>		 & strm_e,
+                       hls::stream<ap_uint<16> >& strm_addr1,
+                       hls::stream<ap_uint<16> >& strm_addr2,
+                       hls::stream<ap_uint<16> >& strm_addr3,
+                       hls::stream<ap_uint<16> >& strm_addr4
+
+                       ) {
+    int j = 0;
+
+    taken_dat taken_dat;
+    // for loop j
+    ap_uint<3> serialized_so_far = 0;
+    ap_uint<3> num_nonzeros_edge = strm_h_nz_len.read();
+    ap_uint<6> nz_77 = strm_num_nonzeros_7x7.read();
+    ap_uint<3> eob_x = strm_eob_x.read();
+    ap_uint<3> eob_y;
+
+    // for loop taken
+    ap_uint<4> bsr;
+    ap_uint<11> abs_coef;
+    uint16_t ctx_nois;
+    uint8_t min_threshold;
+    unsigned int coord;
+    edge_len tmp_len = strm_len.read();
+    ap_uint<4> length = tmp_len.lenexp - 1;
+
+    if (num_nonzeros_edge > 0) {
+        bsr = strm_best_prior_exp_h.read();
+        abs_coef = strm_abs_coef_nois_h.read();
+        ctx_nois = strm_ctx_nois_h.read();
+        min_threshold = strm_min_nois_h.read();
+        coord = strm_coord_nois_h.read();
+    }
+
+    int i_exp = 0;
+    int cnt = 0;
+    int i_thr = length - 2;
+    int i_nos = tmp_len.lenthr.is_zero() ? ((int)length - 2) : ((int)min_threshold - 1);
+
+    ap_uint<8> encoded_so_far = 1;
+
+    short edge_len = 0;
+
+    while (j < block_width) {
+#pragma HLS PIPELINE II = 1
+
+        if (tmp_len.lennz != 0) {
+            int i_nz = tmp_len.lennz - 1;
+            taken_dat.sel_tab = tmp_len.is_h ? NZ_CNT_8x1 : NZ_CNT_1x8;
+            taken_dat.cur_bit = (num_nonzeros_edge & (1 << i_nz)) ? 1 : 0;
+            taken_dat.e = false;
+            taken_dat.addr1 = tmp_len.is_h ? eob_x : eob_y;
+            taken_dat.addr2 = (nz_77 + 3) / 7;
+            taken_dat.addr3 = i_nz;
+            taken_dat.addr4 = serialized_so_far;
+
+            serialized_so_far <<= 1; // init 0
+            serialized_so_far.set(0, taken_dat.cur_bit);
+            tmp_len.lennz--;
+
+        } else if (tmp_len.lenexp != 0) {
+            taken_dat.sel_tab = EXP_CNT_X;
+            taken_dat.cur_bit = (length != i_exp); // init i_exp =0
+            taken_dat.e = false;
+            taken_dat.addr1 = num_nonzeros_edge; //--
+            taken_dat.addr2 = cnt;               //++
+            taken_dat.addr3 = bsr;
+            taken_dat.addr4 = i_exp;
+            i_exp++;
+            tmp_len.lenexp--;
+
+        } else if (tmp_len.lensign != 0) {
+            bool big_sign_hv;
+            ap_uint<2> tri_sign_hv;
+
+            if (tmp_len.is_h) {
+                big_sign_hv = strm_cur_bit_sign_h.read();
+                tri_sign_hv = strm_tri_sign_h.read();
+            } else {
+                big_sign_hv = strm_cur_bit_sign_v.read();
+                tri_sign_hv = strm_tri_sign_v.read();
+            }
+            taken_dat.sel_tab = SIGN_CNT;
+            taken_dat.cur_bit = big_sign_hv;
+            taken_dat.e = false;
+            taken_dat.addr1 = tri_sign_hv;
+            taken_dat.addr2 = bsr;
+            taken_dat.addr3 = 0;
+            taken_dat.addr4 = 0;
+            tmp_len.lensign--;
+
+        } else if (tmp_len.lenthr != 0) {
+            taken_dat.sel_tab = THRE_CNT;
+            taken_dat.cur_bit = (abs_coef & (1 << i_thr)) ? 1 : 0;
+            taken_dat.e = false;
+            taken_dat.addr1 = ctx_nois;
+            taken_dat.addr2 = length - min_threshold;
+            taken_dat.addr3 = encoded_so_far; // init 1
+            taken_dat.addr4 = 0;
+
+            encoded_so_far <<= 1;
+            if (taken_dat.cur_bit) {
+                encoded_so_far.set(0, taken_dat.cur_bit);
+            }
+
+            // since we are not strict about rejecting jpegs with out of range coefs
+            // we just make those less efficient by reusing the same probability bucket
+            if (encoded_so_far > 127) encoded_so_far = 127;
+
+            i_thr--;
+            tmp_len.lenthr--;
+
+        } else if (tmp_len.lennos != 0) {
+            taken_dat.sel_tab = NOIS_CNT;
+            taken_dat.cur_bit = (abs_coef & (1 << i_nos)) ? 1 : 0;
+            taken_dat.e = false;
+            taken_dat.addr1 = coord;
+            taken_dat.addr2 = num_nonzeros_edge;
+            taken_dat.addr3 = i_nos;
+            taken_dat.addr4 = 0;
+
+            i_nos--;
+            tmp_len.lennos--;
+
+        } // end if
+
+        // write out
+        edge_len++;
+        strm_sel_tab.write(taken_dat.sel_tab);
+        strm_cur_bit.write(taken_dat.cur_bit);
+        // strm_e.write(taken_dat.e);
+        strm_addr1.write(taken_dat.addr1);
+        strm_addr2.write(taken_dat.addr2);
+        strm_addr3.write(taken_dat.addr3);
+        strm_addr4.write(taken_dat.addr4);
+
+        // update next loop
+        if ((!tmp_len.lennz) && (!tmp_len.lenexp) && (!tmp_len.lensign) && (!tmp_len.lenthr) && (!tmp_len.lennos)) {
+            if (tmp_len.is_h) {
+                tmp_len = strm_len.read();
+                if ((num_nonzeros_edge > 0) && (length != 0)) { // keep doing
+                    num_nonzeros_edge--;
+                    cnt++;
+                    if (!num_nonzeros_edge) { // read v
+
+                        num_nonzeros_edge = strm_v_nz_len.read();
+                        eob_y = strm_eob_y.read();
+                        serialized_so_far = 0;
+                        cnt = 7;
+                        if (num_nonzeros_edge > 0) {
+                            bsr = strm_best_prior_exp_v.read();
+                            abs_coef = strm_abs_coef_nois_v.read();
+                            ctx_nois = strm_ctx_nois_v.read();
+                            coord = strm_coord_nois_v.read();
+                            min_threshold = strm_min_nois_v.read();
+                        }
+
+                        // tmp_len = strm_len.read();
+                    } else { // read h
+                        // nz_77=strm_num_nonzeros_7x7.read();
+
+                        bsr = strm_best_prior_exp_h.read();
+                        abs_coef = strm_abs_coef_nois_h.read();
+                        ctx_nois = strm_ctx_nois_h.read();
+                        coord = strm_coord_nois_h.read();
+                        min_threshold = strm_min_nois_h.read();
+
+                        // tmp_len = strm_len.read();
+                    }
+                } else if (!num_nonzeros_edge) { // read v
+                    num_nonzeros_edge = strm_v_nz_len.read();
+                    eob_y = strm_eob_y.read();
+                    serialized_so_far = 0;
+                    cnt = 7;
+
+                    if (num_nonzeros_edge > 0) {
+                        bsr = strm_best_prior_exp_v.read();
+                        abs_coef = strm_abs_coef_nois_v.read();
+                        ctx_nois = strm_ctx_nois_v.read();
+                        coord = strm_coord_nois_v.read();
+                        min_threshold = strm_min_nois_v.read();
+                    }
+
+                    // tmp_len = strm_len.read();
+
+                } else { // keep doing
+
+                    bsr = strm_best_prior_exp_h.read();
+                    abs_coef = strm_abs_coef_nois_h.read();
+                    ctx_nois = strm_ctx_nois_h.read();
+                    coord = strm_coord_nois_h.read();
+                    min_threshold = strm_min_nois_h.read();
+
+                    cnt++;
+                    // tmp_len = strm_len.read();
+                }
+
+            } else {
+                if ((num_nonzeros_edge > 0) && (length != 0)) { // keep doing
+                    num_nonzeros_edge--;
+                    cnt++;
+                    if (!num_nonzeros_edge) { // read h
+                        if (j < block_width - 1) {
+                            num_nonzeros_edge = strm_h_nz_len.read();
+                            nz_77 = strm_num_nonzeros_7x7.read();
+                            eob_x = strm_eob_x.read();
+                            tmp_len = strm_len.read();
+                            serialized_so_far = 0;
+                            cnt = 0;
+                            if (num_nonzeros_edge > 0) {
+                                bsr = strm_best_prior_exp_h.read();
+                                abs_coef = strm_abs_coef_nois_h.read();
+                                ctx_nois = strm_ctx_nois_h.read();
+                                coord = strm_coord_nois_h.read();
+                                min_threshold = strm_min_nois_h.read();
+                            }
+                        }
+                        j++;
+                        // strm_e.write(true);//todo
+                        strm_edge_len.write(edge_len);
+                        edge_len = 0;
+                    } else {
+                        tmp_len = strm_len.read();
+
+                        bsr = strm_best_prior_exp_v.read();
+                        abs_coef = strm_abs_coef_nois_v.read();
+                        ctx_nois = strm_ctx_nois_v.read();
+                        coord = strm_coord_nois_v.read();
+                        min_threshold = strm_min_nois_v.read();
+                    }
+                } else if (!num_nonzeros_edge) { // read h
+                    if (j < block_width - 1) {
+                        num_nonzeros_edge = strm_h_nz_len.read();
+                        eob_x = strm_eob_x.read();
+                        nz_77 = strm_num_nonzeros_7x7.read();
+                        tmp_len = strm_len.read();
+                        serialized_so_far = 0;
+                        cnt = 0;
+                        if (num_nonzeros_edge) {
+                            bsr = strm_best_prior_exp_h.read();
+                            abs_coef = strm_abs_coef_nois_h.read();
+                            ctx_nois = strm_ctx_nois_h.read();
+                            coord = strm_coord_nois_h.read();
+                            min_threshold = strm_min_nois_h.read();
+                        }
+                    }
+                    j++;
+                    // strm_e.write(true);//todo
+                    strm_edge_len.write(edge_len);
+                    edge_len = 0;
+                } else { // keep doing
+
+                    tmp_len = strm_len.read();
+                    cnt++;
+
+                    bsr = strm_best_prior_exp_v.read();
+                    abs_coef = strm_abs_coef_nois_v.read();
+                    ctx_nois = strm_ctx_nois_v.read();
+                    coord = strm_coord_nois_v.read();
+                    min_threshold = strm_min_nois_v.read();
+                }
+            } // end else
+
+            //    		if( j<block_width){
+            //    			tmp_len = strm_len.read();
+            //    		}
+            i_exp = 0;
+            length = tmp_len.lenexp - 1;
+            i_thr = length - 2;
+            i_nos = tmp_len.lenthr.is_zero() ? ((int)length - 2) : ((int)min_threshold - 1);
+            encoded_so_far = 1;
+
+            //			if(tmp_len.is_h){
+            //				//_XF_IMAGE_PRINT("h: nz=%d, len=%d\n", (int)num_nonzeros_edge, (int)length );
+            //			}else{
+            //				//_XF_IMAGE_PRINT("vv: nz=%d, len=%d\n", (int)num_nonzeros_edge, (int)length );
+            //			}
+        } // end update
+
+    } // end while
+}
+
+// ------------------------------------------------------------
+void push_bit_edge_0(ap_uint<32> block_width,
+                     hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+
+                     hls::stream<ap_uint<3> >& strm_h_nz_len,
+                     hls::stream<ap_uint<3> >& strm_eob_x,
+                     hls::stream<ap_uint<4> >& strm_length_h,
+
+                     hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+                     hls::stream<bool>& strm_cur_bit_sign_h,
+                     hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+                     hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+                     hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+                     hls::stream<ap_uint<8> >& strm_min_nois_h,
+                     hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+                     hls::stream<ap_uint<3> >& strm_v_nz_len,
+                     hls::stream<ap_uint<3> >& strm_eob_y,
+                     hls::stream<ap_uint<4> >& strm_length_v,
+
+                     hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+                     hls::stream<bool>& strm_cur_bit_sign_v,
+                     hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+                     hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+                     hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+                     hls::stream<ap_uint<8> >& strm_min_nois_v,
+                     hls::stream<ap_uint<6> >& strm_coord_nois_v,
+
+                     hls::stream<short>& strm_edge_len,
+
+                     hls::stream<ap_uint<4> >& strm_sel_tab,
+                     hls::stream<bool>& strm_cur_bit,
+                     // hls::stream<bool>		 & strm_e,
+                     hls::stream<ap_uint<16> >& strm_addr1,
+                     hls::stream<ap_uint<16> >& strm_addr2,
+                     hls::stream<ap_uint<16> >& strm_addr3,
+                     hls::stream<ap_uint<16> >& strm_addr4) {
+#pragma HLS DATAFLOW
+    // clang-format off
+	hls::stream<edge_len >           strm_len;
+#pragma HLS stream depth=32 variable=strm_len
+#pragma HLS bind_storage        variable=strm_len type=FIFO impl=LUTRAM
+	hls::stream<edge_len > 			 strm_len_h;
+	hls::stream<edge_len > 			 strm_len_v;
+#pragma HLS stream depth=32 variable=strm_len_h
+#pragma HLS bind_storage        variable=strm_len_h type=FIFO impl=LUTRAM
+#pragma HLS stream depth=32 variable=strm_len_v
+#pragma HLS bind_storage        variable=strm_len_v type=FIFO impl=LUTRAM
+	hls::stream<ap_uint<3> >         strm_h_nz_pass;
+	hls::stream<ap_uint<3> >         strm_v_nz_pass;
+#pragma HLS stream depth=32 variable=strm_h_nz_pass
+#pragma HLS bind_storage        variable=strm_h_nz_pass type=FIFO impl=LUTRAM
+#pragma HLS stream depth=32 variable=strm_v_nz_pass
+#pragma HLS bind_storage        variable=strm_v_nz_pass type=FIFO impl=LUTRAM
+
+	hls::stream<ap_uint<8> >         strm_min_h_pass;
+#pragma HLS stream depth=32 variable=strm_min_h_pass
+#pragma HLS bind_storage        variable=strm_min_h_pass type=FIFO impl=LUTRAM
+
+	hls::stream<ap_uint<8> >         strm_min_v_pass;
+#pragma HLS stream depth=32 variable=strm_min_v_pass
+#pragma HLS bind_storage        variable=strm_min_v_pass type=FIFO impl=LUTRAM
+
+    // clang-format on
+
+    collect_len_edge(block_width,
+
+                     strm_h_nz_len, strm_length_h, strm_min_nois_h,
+
+                     strm_v_nz_len, strm_length_v, strm_min_nois_v,
+
+                     strm_len,
+                     //		strm_len_h,
+                     //		strm_len_v,
+                     strm_h_nz_pass, strm_v_nz_pass, strm_min_h_pass, strm_min_v_pass);
+
+    push_bit_edge_len(block_width, strm_num_nonzeros_7x7,
+
+                      strm_len,
+                      //			strm_len_h,
+                      //			strm_len_v,
+
+                      strm_h_nz_pass, strm_eob_x,
+                      // strm_length_exp_h,
+
+                      strm_best_prior_exp_h,
+
+                      strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                      strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_h_pass, strm_coord_nois_h,
+
+                      strm_v_nz_pass, strm_eob_y,
+                      // strm_length_exp_v,
+
+                      strm_best_prior_exp_v,
+
+                      strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                      strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_v_pass, strm_coord_nois_v,
+
+                      strm_edge_len,
+
+                      strm_sel_tab, strm_cur_bit,
+                      // strm_e,
+                      strm_addr1, strm_addr2, strm_addr3, strm_addr4);
+}
+
+//-------------------------------------------------------------------------
+void hls_serialize_tokens_edges(ap_uint<32> block_width,
+                                bool ap_color,
+                                uint8_t min_nois_thld_x[MAX_NUM_COLOR][64],
+                                uint8_t min_nois_thld_y[MAX_NUM_COLOR][64],
+                                bool left_present,
+                                bool above_present,
+                                bool above_right_present,
+                                hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7_h,
+                                hls::stream<ap_uint<3> >& strm_h_nz_len,
+                                hls::stream<ap_uint<3> >& strm_lane_h,
+
+                                hls::stream<ap_uint<3> >& strm_v_nz_len,
+                                hls::stream<ap_uint<3> >& strm_lane_v,
+
+                                hls::stream<ap_uint<3> >& strm_eob_x,
+                                hls::stream<ap_uint<3> >& strm_eob_y,
+
+                                int32_t idct_q_table_x[MAX_NUM_COLOR][8][8],
+                                int32_t idct_q_table_y[MAX_NUM_COLOR][8][8],
+
+                                hls::stream<ap_int<11> > strm_coef_h_here[8],
+                                hls::stream<ap_int<11> > strm_coef_h_above[8],
+                                hls::stream<bool>& strm_has_left_h,
+                                hls::stream<bool>& strm_coef_h_end,
+                                hls::stream<ap_int<11> > strm_coef_v_here[8],
+                                hls::stream<ap_int<11> > strm_coef_v_left[8],
+                                hls::stream<bool>& strm_has_left_v,
+                                hls::stream<bool>& strm_coef_v_end,
+
+                                hls::stream<ap_uint<4> >& strm_sel_tab,
+                                hls::stream<bool>& strm_cur_bit,
+                                hls::stream<bool>& strm_e,
+                                hls::stream<ap_uint<16> >& strm_addr1,
+                                hls::stream<ap_uint<16> >& strm_addr2,
+                                hls::stream<ap_uint<16> >& strm_addr3,
+                                hls::stream<ap_uint<16> >& strm_addr4) {
+#pragma HLS INLINE
+#pragma HLS dataflow
+    // clang-format off
+	static hls::stream<ap_uint<11> > strm_abs_coef_nois_h("abs_coef_h");
+#pragma HLS stream depth=32 variable=strm_abs_coef_nois_h
+
+	static hls::stream<ap_uint<11> > strm_abs_coef_nois_v("abs_coef_v");
+#pragma HLS stream depth=32 variable=strm_abs_coef_nois_v
+
+    hls::stream<ap_uint<4> > strm_best_prior_exp_h("bsr_exp_h");
+#pragma HLS stream depth=32 variable=strm_best_prior_exp_h
+
+    hls::stream<bool> strm_cur_bit_sign_h("sign_bit_h");
+#pragma HLS stream depth=32 variable=strm_cur_bit_sign_h
+    hls::stream<ap_uint<2> > strm_tri_sign_h("tri_sign_h");
+#pragma HLS stream depth=32 variable=strm_tri_sign_h
+
+    hls::stream<ap_uint<8> > strm_ctx_nois_h;
+#pragma HLS stream depth=32 variable=strm_ctx_nois_h
+    hls::stream<ap_uint<8> > strm_min_nois_h;
+#pragma HLS stream depth=32 variable=strm_min_nois_h
+//    hls::stream<ap_uint<8> > strm_so_far_nois_h;
+//#pragma HLS stream depth=32 variable=strm_so_far_nois_h
+    hls::stream<ap_uint<6> > strm_coord_nois_h;
+#pragma HLS stream depth=32 variable=strm_coord_nois_h
+
+    hls::stream<ap_uint<4> > strm_best_prior_exp_v;
+#pragma HLS stream depth=32 variable=strm_best_prior_exp_v
+
+    hls::stream<bool> strm_cur_bit_sign_v("sign_bit_v");
+#pragma HLS stream depth=32 variable=strm_cur_bit_sign_v
+    hls::stream<ap_uint<2> > strm_tri_sign_v;
+#pragma HLS stream depth=32 variable=strm_tri_sign_v
+
+    hls::stream<ap_uint<8> > strm_ctx_nois_v;
+#pragma HLS stream depth=32 variable=strm_ctx_nois_v
+    hls::stream<ap_uint<8> > strm_min_nois_v;
+#pragma HLS stream depth=32 variable=strm_min_nois_v
+//    hls::stream<ap_uint<8> > strm_so_far_nois_v;
+//#pragma HLS stream depth=32 variable=strm_so_far_nois_v
+    hls::stream<ap_uint<6> > strm_coord_nois_v;
+#pragma HLS stream depth=32 variable=strm_coord_nois_v
+
+    hls::stream<ap_uint<4> > strm_length_exp_h("len_exp_h");
+#pragma HLS stream depth=32 variable=strm_length_exp_h
+    hls::stream<ap_uint<4> > strm_length_exp_v("len_exp_v");
+#pragma HLS stream depth=32 variable=strm_length_exp_v
+
+//#pragma HLS ARRAY_PARTITION variable=idct_q_table_x complete dim=3
+//#pragma HLS ARRAY_PARTITION variable=idct_q_table_y complete dim=3
+    // clang-format on
+    prepare_edge<true>(block_width, ap_color, min_nois_thld_x, above_present, strm_lane_h, idct_q_table_x,
+
+                       strm_coef_h_here, strm_coef_h_above, strm_has_left_h, strm_coef_h_end,
+
+                       strm_length_exp_h,
+
+                       strm_best_prior_exp_h,
+
+                       strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                       strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_nois_h, strm_coord_nois_h);
+
+    prepare_edge<false>(block_width, ap_color, min_nois_thld_y, left_present, strm_lane_v, idct_q_table_y,
+
+                        strm_coef_v_here, strm_coef_v_left, strm_has_left_v, strm_coef_v_end,
+
+                        strm_length_exp_v,
+
+                        strm_best_prior_exp_v,
+
+                        strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                        strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_nois_v, strm_coord_nois_v);
+
+    push_bit_edge(block_width, strm_num_nonzeros_7x7_h,
+
+                  strm_h_nz_len, strm_eob_x, strm_length_exp_h,
+
+                  strm_best_prior_exp_h,
+
+                  strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                  strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_nois_h, strm_coord_nois_h,
+
+                  strm_v_nz_len, strm_eob_y, strm_length_exp_v,
+
+                  strm_best_prior_exp_v,
+
+                  strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                  strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_nois_v, strm_coord_nois_v,
+
+                  strm_sel_tab, strm_cur_bit, strm_e, strm_addr1, strm_addr2, strm_addr3, strm_addr4);
+}
+
+// ------------------------------------------------------------
+void pre_serialize_tokens_edges(ap_uint<32> block_width,
+                                bool ap_color,
+                                uint8_t min_nois_thld_x[MAX_NUM_COLOR][64],
+                                uint8_t min_nois_thld_y[MAX_NUM_COLOR][64],
+                                bool left_present,
+                                bool above_present,
+                                bool above_right_present,
+                                // hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7_h,
+                                // hls::stream<ap_uint<3> >& strm_h_nz_len,
+                                hls::stream<ap_uint<3> >& strm_lane_h,
+
+                                // hls::stream<ap_uint<3> >& strm_v_nz_len,
+                                hls::stream<ap_uint<3> >& strm_lane_v,
+
+                                // hls::stream<ap_uint<3> >& strm_eob_x,
+                                // hls::stream<ap_uint<3> >& strm_eob_y,
+
+                                int32_t idct_q_table_x[MAX_NUM_COLOR][8][8],
+                                int32_t idct_q_table_y[MAX_NUM_COLOR][8][8],
+
+                                hls::stream<ap_int<11> > strm_coef_h_here[8],
+                                hls::stream<ap_int<11> > strm_coef_h_above[8],
+                                hls::stream<bool>& strm_has_left_h,
+                                hls::stream<bool>& strm_coef_h_end,
+                                hls::stream<ap_int<11> > strm_coef_v_here[8],
+                                hls::stream<ap_int<11> > strm_coef_v_left[8],
+                                hls::stream<bool>& strm_has_left_v,
+                                hls::stream<bool>& strm_coef_v_end,
+
+                                hls::stream<ap_uint<4> >& strm_length_exp_h,
+
+                                hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+                                hls::stream<bool>& strm_cur_bit_sign_h,
+                                hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+                                hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+                                hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+                                hls::stream<ap_uint<8> >& strm_min_nois_h,
+                                hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+                                hls::stream<ap_uint<4> >& strm_length_exp_v,
+
+                                hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+                                hls::stream<bool>& strm_cur_bit_sign_v,
+                                hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+                                hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+                                hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+                                hls::stream<ap_uint<8> >& strm_min_nois_v,
+                                hls::stream<ap_uint<6> >& strm_coord_nois_v) {
+#pragma HLS INLINE
+#pragma HLS dataflow
+
+    prepare_edge<true>(block_width, ap_color, min_nois_thld_x, above_present, strm_lane_h, idct_q_table_x,
+
+                       strm_coef_h_here, strm_coef_h_above, strm_has_left_h, strm_coef_h_end,
+
+                       strm_length_exp_h,
+
+                       strm_best_prior_exp_h,
+
+                       strm_cur_bit_sign_h, strm_tri_sign_h,
+
+                       strm_abs_coef_nois_h, strm_ctx_nois_h, strm_min_nois_h, strm_coord_nois_h);
+
+    prepare_edge<false>(block_width, ap_color, min_nois_thld_y, left_present, strm_lane_v, idct_q_table_y,
+
+                        strm_coef_v_here, strm_coef_v_left, strm_has_left_v, strm_coef_v_end,
+
+                        strm_length_exp_v,
+
+                        strm_best_prior_exp_v,
+
+                        strm_cur_bit_sign_v, strm_tri_sign_v,
+
+                        strm_abs_coef_nois_v, strm_ctx_nois_v, strm_min_nois_v, strm_coord_nois_v);
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/XAcc_jfifparser.cpp b/codec/L2/demos/leptonEnc/kernel/XAcc_jfifparser.cpp
new file mode 100644
index 0000000000..1aee01ec04
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/XAcc_jfifparser.cpp
@@ -0,0 +1,511 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file XAcc_jfifparser.cpp
+ * @brief parser_jpg_top template function implementation and kernel_decoder warpper.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#include "XAcc_lepjfifparser.hpp"
+
+// ------------------------------------------------------------
+#define B_SHORT(v1, v2) ((((int)v1) << 8) + ((int)v2))
+
+namespace xf {
+namespace codec {
+namespace details {
+
+inline void readBytes(int& j, const int& cnt, int& r, int& c) {
+#pragma HLS INLINE
+    j += cnt;
+    r = j >> 1;
+    c = j & 1;
+}
+inline void oneByte(int& j, int& r, int& c) {
+#pragma HLS INLINE
+    j += 1;
+    r = j >> 1;
+    c = j & 1;
+}
+
+// ------------------------------------------------------------
+void SetOtherQtab(decOutput* plep) {
+    // clang-format off
+	ap_uint<16> freqmax_[3][64];
+	static const unsigned short int freqmax[] =
+		        {
+		            1024, 931, 985, 968, 1020, 968, 1020, 1020,
+		            932, 858, 884, 840, 932, 838, 854, 854,
+		            985, 884, 871, 875, 985, 878, 871, 854,
+		            967, 841, 876, 844, 967, 886, 870, 837,
+		            1020, 932, 985, 967, 1020, 969, 1020, 1020,
+		            969, 838, 878, 886, 969, 838, 969, 838,
+		            1020, 854, 871, 870, 1010, 969, 1020, 1020,
+		            1020, 854, 854, 838, 1020, 838, 1020, 838
+		        };
+// clang-format on
+#pragma HLS ARRAY_PARTITION variable = plep->q_tables complete dim = 2
+#pragma HLS ARRAY_PARTITION variable = plep->idct_q_table_x complete dim = 3
+#pragma HLS ARRAY_PARTITION variable = plep->idct_q_table_y complete dim = 3
+    unsigned short RESIDUAL_NOISE_FLOOR = 7;
+    for (int idx_cmp = 0; idx_cmp < plep->axi_num_cmp_mcu; idx_cmp++) {
+        uint8_t c = plep->axi_map_row2cmp[idx_cmp];
+
+        for (int i = 0; i < 64; i++) {
+#pragma HLS pipeline
+            plep->idct_q_table_x[c][i >> 3][i & 7] =
+                hls_icos_base_8192_scaled[(i & 7) << 3] * plep->q_tables[c][i & 7][i >> 3];
+            plep->idct_q_table_y[c][i >> 3][i & 7] =
+                hls_icos_base_8192_scaled[(i & 7) << 3] * plep->q_tables[c][i >> 3][i & 7];
+            // plep->idct_q_table_l[c][i>>3][i&7] = hls_icos_idct_linear_8192_scaled[i]   * plep->q_tables[c][0][i&7];
+            //}
+
+            // for (int coord = 0; coord < 64; ++coord) {
+            freqmax_[c][i] = (freqmax[i] + plep->q_tables[c][i >> 3][i & 7] - 1) / plep->q_tables[c][i >> 3][i & 7];
+            // uint8_t max_len = uint16bit_length(freqmax_[c][i]);
+            uint8_t max_len = 16 - freqmax_[c][i].countLeadingZeros();
+            // bitlen_freqmax_[c][i] = max_len;
+            if (max_len > (int)RESIDUAL_NOISE_FLOOR) {
+                plep->min_nois_thld_x[c][i] = plep->min_nois_thld_y[c][i] = max_len - RESIDUAL_NOISE_FLOOR;
+            } else {
+                plep->min_nois_thld_x[c][i] = plep->min_nois_thld_y[c][i] = 0;
+            }
+
+        } // end for
+    }
+}
+
+// ------------------------------------------------------------
+void decoder_jpg_top(ap_uint<AXI_WIDTH>* ptr,
+                     const int sz,
+                     const int c,
+                     const uint16_t dht_tbl1[2][2][1 << DHT1],
+                     const uint16_t dht_tbl2[2][2][1 << DHT2],
+                     ap_uint<12> hls_cmp,
+
+                     // image info
+                     const uint8_t hls_mbs[MAX_NUM_COLOR],
+                     const img_info img_info,
+
+                     uint32_t& rst_cnt,
+                     hls::stream<ap_uint<24> >& block_strm) {
+#pragma HLS DATAFLOW
+    // clang-format off
+    _XF_IMAGE_PRINT(" ************* start decode %d mcus in FPGA  *************\n", (int)img_info.hls_mcuc);
+    _XF_IMAGE_PRINT(
+    				"  hls_cs_cmpc=%d, hls_mbs[0]=%d, hls_mbs[1]=%d, hls_mbs[2]=%d, \n",
+						img_info.hls_cs_cmpc, hls_mbs[0], hls_mbs[1], hls_mbs[2]);
+
+
+#pragma HLS ARRAY_PARTITION variable = hls_mbs  complete
+//#pragma HLS bind_storage        variable = dht_tbl1 type=RAM_2P impl=LUTRAM
+//#pragma HLS ARRAY_PARTITION variable = dht_tbl1 complete dim = 0
+//#pragma HLS ARRAY_PARTITION variable = dht_tbl2 complete dim = 0
+    //#pragma HLS bind_storage variable=dht_tbl1 type=RAM_2P impl=RAM
+
+    hls::stream<CHType> image_strm("input_strm");
+    hls::stream<bool>   eof_strm("eof_strm");
+#pragma HLS bind_storage variable = image_strm type=FIFO impl=LUTRAM
+#pragma HLS STREAM   variable = image_strm depth = 32
+#pragma HLS bind_storage variable = eof_strm type=FIFO impl=LUTRAM
+#pragma HLS STREAM   variable = eof_strm depth = 32
+    // clang-format on
+
+    // xf::common::utils_hw::axi_to_char_stream<BURST_LENTH, AXI_WIDTH, CHType>(ptr, image_strm, eof_strm, sz, c);
+    xf::common::utils_hw::axiToCharStream<BURST_LENTH, AXI_WIDTH, CHType>(ptr, image_strm, eof_strm, sz, c);
+
+    mcu_decoder(image_strm, eof_strm, dht_tbl1, dht_tbl2, hls_cmp, img_info.hls_cs_cmpc, hls_mbs, img_info.hls_mcuh,
+                img_info.hls_mcuc, rst_cnt, block_strm);
+}
+
+// ------------------------------------------------------------
+
+void parser_jpg_top(ap_uint<AXI_WIDTH>* datatoDDR,
+                    const int size,
+                    int& r,
+                    int& c,
+                    uint16_t dht_tbl1[2][2][1 << DHT1],
+                    uint16_t dht_tbl2[2][2][1 << DHT2],
+                    ap_uint<12>& hls_cmp,
+                    int& left,
+
+                    // image info
+                    img_info& img_info,
+                    uint8_t hls_mbs[MAX_NUM_COLOR],
+                    hls_compInfo hls_compinfo[MAX_NUM_COLOR],
+                    bool& rtn,
+                    decOutput* plep) {
+    ap_uint<AXI_WIDTH>* segment = datatoDDR;
+    int offset = 0;
+    uint8_t b1, b2, b3, b4;
+    b1 = segment[0](7, 0);
+    b2 = segment[0](15, 8);
+    if ((size < 127) | (b1 != 0xFF) | (b2 != 0xD8)) {
+        _XF_IMAGE_PRINT("Header failed\n");
+        rtn = false;
+    }
+    readBytes(offset, 2, r, c);
+    bool scanned = false;
+
+    while (!scanned && rtn) {
+        if (segment[r](c * 8 + 7, c * 8) != 0xFF) {
+            _XF_IMAGE_PRINT("marker+length detect failed\n");
+            rtn = false;
+            break;
+        }
+        oneByte(offset, r, c); // skip marker ff and protect 16 bit length
+        b2 = segment[r](c * 8 + 7, c * 8);
+        oneByte(offset, r, c);
+        uint8_t l1 = segment[r](c * 8 + 7, c * 8);
+        oneByte(offset, r, c);
+        uint8_t l2 = segment[r](c * 8 + 7, c * 8);
+        oneByte(offset, r, c);
+        uint16_t len = B_SHORT(l1, l2);
+        len -= 2;
+
+        if (((b2 & 0xF0) == 0xE0) || (b2 == 0xDD)) { // all APP
+
+            _XF_IMAGE_PRINT("APP or DRI : OFFSET: %.8x\n", offset - 4);
+            readBytes(offset, len, r, c);
+            _XF_IMAGE_PRINT("skip %d Bytes of marker \n", len);
+
+        } else if (b2 == 0xDB) {
+            _XF_IMAGE_PRINT("DQT 0xDB: OFFSET: %.8x\n", offset - 4);
+            // syn_build_DQT(len, offset,r,c, segment, dqt, rtn);
+            while (len >= 64 + 1) {
+                b1 = segment[r](c * 8 + 7, c * 8);
+                oneByte(offset, r, c);
+                if (b1 > 3) { // rtn = false;//19 byte marker
+                    _XF_IMAGE_PRINT(" ERROR: DQT, ERROR idx \n");
+                }
+                for (int j = 0; j < 64; ++j) {
+//#pragma HLS LOOP_TRIPCOUNT min=1 max=1
+#pragma HLS PIPELINE
+                    // dqt[b1][hls_jpeg_zigzag_to_raster[j]] = segment[r](c * 8 + 7, c * 8);
+                    int jzz_x = hls_jpeg_zigzag_to_raster[j] & 7;
+                    int jzz_y = hls_jpeg_zigzag_to_raster[j] >> 3;
+                    if (b1 == 0)
+                        plep->q_tables[0][jzz_y][jzz_x] = segment[r](c * 8 + 7, c * 8);
+                    else {
+                        plep->q_tables[1][jzz_y][jzz_x] = segment[r](c * 8 + 7, c * 8);
+                        plep->q_tables[2][jzz_y][jzz_x] = segment[r](c * 8 + 7, c * 8);
+                    }
+                    // unsigned short vv = segment[r](c * 8 + 7, c * 8);
+                    // dqt[b1][j] = segment[r](c * 8 + 7, c * 8);
+                    oneByte(offset, r, c);
+                }
+                len -= 65;
+            }
+            // if(len) {_XF_IMAGE_PRINT("Decode DQT failed\n");}
+
+        } else if (b2 == 0xC0) {
+            // sof ffc0//min 17 byte marker
+            // 2B B 2B 2B B    B   4   4   B
+            // L P Y  X  CMP idx sfh sfv qidx
+            _XF_IMAGE_PRINT("SOF 0xC0: OFFSET: %.8x\n", offset - 4);
+            // syn_frame_SOF(len, offset,r,c, segment, hls_mbs, rtn, hls_compinfo,
+            //	  hls_mcuc,hls_mcuh,hls_mcuv,hls_cs_cmpc);
+            b1 = segment[r](c * 8 + 7, c * 8);
+            if (b1 != 8) {
+                // rtn=false;
+                _XF_IMAGE_PRINT(" ERROR: SOF, image precision is not 8bit \n");
+            }
+            oneByte(offset, r, c);
+
+            b1 = segment[r](c * 8 + 7, c * 8);
+            oneByte(offset, r, c);
+            b2 = segment[r](c * 8 + 7, c * 8);
+            oneByte(offset, r, c);
+            int height = B_SHORT(b1, b2);
+
+            b1 = segment[r](c * 8 + 7, c * 8);
+            oneByte(offset, r, c);
+            b2 = segment[r](c * 8 + 7, c * 8);
+            oneByte(offset, r, c);
+            int width = B_SHORT(b1, b2);
+            _XF_IMAGE_PRINT("height=%d, width=%d, \n", height, width);
+
+            img_info.hls_cs_cmpc = segment[r](c * 8 + 7, c * 8);
+            oneByte(offset, r, c);
+            if (img_info.hls_cs_cmpc != 3) {
+                // rtn= false;
+                _XF_IMAGE_PRINT("ERROR: SOF, supports only 3 component color jpeg files\n");
+            }
+
+            uint8_t sfhm = 0, sfvm = 0;
+            for (int cmp = 0; cmp < 3; ++cmp) {
+#pragma HLS PIPELINE
+                uint8_t sfv, sfh;
+                oneByte(offset, r, c);
+                b1 = segment[r](c * 8 + 7, c * 8);
+                sfv = b1 >> 4;
+                sfh = b1 & 0x0f;
+
+                if ((sfv & (sfv - 1)) || (sfh & (sfh - 1))) {
+                    // rtn= false;
+                    _XF_IMAGE_PRINT("ERROR: SOF, sfv of sfh \n");
+                }
+
+                hls_compinfo[cmp].sfv = sfv;
+                hls_compinfo[cmp].sfh = sfh;
+                if (hls_compinfo[cmp].sfh > sfhm) sfhm = hls_compinfo[cmp].sfh;
+                if (hls_compinfo[cmp].sfv > sfvm) sfvm = hls_compinfo[cmp].sfv;
+                hls_mbs[cmp] = hls_compinfo[cmp].sfv * hls_compinfo[cmp].sfh;
+                _XF_IMAGE_PRINT("sfv = %d, sfh = %d\n", sfv, sfh);
+                // if (cmp == 0) downsample = sfv > 1;
+                readBytes(offset, 2, r, c);
+            }
+
+            if (hls_mbs[0] == 4) {
+                hls_cmp = 0b110000110000;
+            } else if (hls_mbs[0] == 2) {
+                hls_cmp = 0b110011001100;
+            } else if (hls_mbs[0] == 1) {
+                hls_cmp = 0b110110110110;
+            } else {
+                _XF_IMAGE_PRINT("ERROR: hls_cmpnfo[0].mbs is not 4/2/1 \n");
+            }
+
+            int sub_o_sfh = (height >> 3) / sfhm;
+            int sub_o_sfv = (width >> 3) / sfvm;
+            img_info.hls_mcuv = (height - (sub_o_sfh << 3) * sfhm) ? (sub_o_sfh + 1) : sub_o_sfh;
+            img_info.hls_mcuh = (width - (sub_o_sfv << 3) * sfvm) ? (sub_o_sfv + 1) : sub_o_sfv;
+            // hls_mcuv =  ( int ) ceil( (float) height / (float) ( 8 * sfhm ) );
+            // hls_mcuh =  ( int ) ceil( (float) width  / (float) ( 8 * sfvm ) );
+            img_info.hls_mcuc = img_info.hls_mcuv * img_info.hls_mcuh;
+
+#ifndef __SYNTHESIS__
+            printf("hls_mcuv=%d, hls_mcuh=%d, hls_mcuc=%d, \n", img_info.hls_mcuv, img_info.hls_mcuh,
+                   img_info.hls_mcuc);
+#endif
+            for (int cmp = 0; cmp < 3; cmp++) {
+#pragma HLS PIPELINE
+                hls_compinfo[cmp].mbs = hls_mbs[cmp];
+                hls_compinfo[cmp].bcv = img_info.hls_mcuv * hls_compinfo[cmp].sfh;
+                hls_compinfo[cmp].bch = img_info.hls_mcuh * hls_compinfo[cmp].sfv;
+                hls_compinfo[cmp].bc = hls_compinfo[cmp].bcv * hls_compinfo[cmp].bch;
+            }
+
+        } else if (b2 == 0xC4) {
+            // min 19 byte marker
+            // 2B  4  4       B
+            // L  ac cmp    Vij*256
+            // syn_DHT(len, offset,r,c, segment, dht_tbl1, dht_tbl2, rtn);
+            _XF_IMAGE_PRINT("DHT 0xC4: OFFSET: %.8x\n", offset - 4);
+
+            uint16_t huff_len = 1, cnt, addr_now, addr_gap;
+            const int addr_all = 65536;
+            uint16_t huff_cnt[16];
+            ////////////////////////////////////////////////
+            while (len > 16 + 1) {
+                cnt = 0;
+                b1 = segment[r](c * 8 + 7, c * 8);
+                oneByte(offset, r, c);
+                bool ac = b1 & 0x10;
+                int cmp_huff = b1 & 0x0f;
+                _XF_IMAGE_PRINT(" ac = %d, cmp =%d", ac, cmp_huff);
+                if ((b1 & 0xEC) || (b1 & 0x02)) {             // check 0bxxxdxxdd
+                    _XF_IMAGE_PRINT(" ERROR: DHT failed \n"); // rtn = false;
+                }
+
+                // init huff_cnt
+                for (huff_len = 1; huff_len <= 16; ++huff_len) {
+#pragma HLS PIPELINE
+                    // DHT_segment[ac][cmp_huff].size[huff_len-1] =  segment[r](c*8+7, c*8);
+                    huff_cnt[huff_len - 1] = segment[r](c * 8 + 7, c * 8);
+                    oneByte(offset, r, c);
+                }
+
+                len -= 17;
+
+                // init the val in each address
+                for (huff_len = 1; huff_len <= 16; ++huff_len) {
+                    // cnt = DHT_segment[ac][cmp_huff].size[huff_len - 1];
+                    addr_now = addr_all >> huff_len;
+
+                    _XF_IMAGE_PRINT(" Codes of length %d bits (%.3d total):", huff_len, cnt);
+                    for (int j = 0; j < huff_cnt[huff_len - 1]; ++j) {
+#pragma HLS PIPELINE
+                        b1 = segment[r](c * 8 + 7, c * 8);
+                        // DHT_segment[ac][cmp_huff].val[k] =  b1;
+                        _XF_IMAGE_PRINT(" %.2x", b1);
+                        oneByte(offset, r, c);
+
+                        uint8_t run_vlen = b1;
+                        uint8_t val_len = run_vlen & 0x0F;
+                        uint8_t run_len = (run_vlen & 0xF0) >> 4;
+                        uint8_t total_len = val_len + huff_len;
+                        ap_uint<16> data = 0;
+                        data(4, 0) = total_len;
+                        data(9, 5) = huff_len;
+                        data(13, 10) = run_len;
+                        data[15] = huff_len > DHT1;
+                        //_XF_IMAGE_PRINT(
+                        //    " from [%d] to [%d],huff_len=%d, addr_org=%d\n",
+                        //    (cnt >> DHT_S), (addr_now + cnt) >> DHT_S, huff_len, addr_now >> DHT_S);
+
+                        for (int k = addr_now; k > 0; k -= huff_len > DHT1 ? 1 : (1 << DHT_S)) {
+//#pragma HLS DEPENDENCE array inter false
+#pragma HLS LOOP_TRIPCOUNT min = 1 max = 1
+#pragma HLS PIPELINE II = 1
+
+                            dht_tbl1[ac][cmp_huff][(cnt >> (DHT_S))] = data;
+                            // dht_tbl_align[addr1 + (cnt >> (DHT_S))] = data;
+
+                            if (huff_len > DHT1) {
+                                dht_tbl2[ac][cmp_huff][(cnt % (DHT_M))] = data;
+                                // dht_tbl_align[addr2  + (cnt % (DHT_M))] = data;
+                                //_XF_IMAGE_PRINT(" huff_len > DHT1 :  [%d]  huff_len=%d, total_len=%d, addr_org=%d\n",
+                                //                (cnt) % DHT_M, huff_len, total_len, addr_now % DHT_M);
+                            }
+                            if (huff_len > DHT1)
+                                cnt++;
+                            else
+                                cnt += (1 << DHT_S);
+                        } // end one val
+
+                    } // end one huff_len
+
+                    len -= huff_cnt[huff_len - 1];
+                    //_XF_IMAGE_PRINT(" \n");
+
+                } // end all huff
+                // build_huffman_table3(DHT_segment, ac, cmp_huff, dht_tbl1, dht_tbl2);
+            }
+        } else if (b2 == 0xDA) {
+            // min 12 byte marker
+            // 2B  B      B    4    4 x3      B     B   	B
+            // L  NS     CSj  Tdj  Taj      ss=0 se=63 ahal=0
+            // syn_Scan_decode(size ,len, offset,r,c,  segment, dht_tbl1, dht_tbl2, rtn);
+            _XF_IMAGE_PRINT("Scan 0xDA: OFFSET: %.8x\n", offset - 4);
+            b1 = segment[r](c * 8 + 7, c * 8);
+            if (b1 != 3) {
+                // rtn = false;
+                _XF_IMAGE_PRINT(" ERROR: SOS, ERROR CMP \n");
+            }
+
+            readBytes(offset, 10, r, c);
+            // non-interleaving//to be added
+            _XF_IMAGE_PRINT("Scan DATA: OFFSET: %.8x\n", offset);
+
+            scanned = true;
+
+        } else {
+            _XF_IMAGE_PRINT("Undefined segment  %d\n", b2);
+            rtn = false;
+        }
+
+    } // end while
+
+    left = size - offset;
+    // To full fill the structure plep with hls_compinfo
+    // enum COLOR_FORMAT{C400=0, C420, C422, C444};
+    // COLOR_FORMAT format;
+    if (hls_compinfo[0].sfv == 2 && hls_compinfo[0].sfh == 2)
+        plep->format = C420;
+    else if (hls_compinfo[0].sfv == 1 && hls_compinfo[0].sfh == 1)
+        plep->format = C444;
+    else
+        plep->format = C422;
+    plep->axi_num_cmp = img_info.hls_cs_cmpc;
+    plep->axi_map_row2cmp[0] = 2;
+    plep->axi_map_row2cmp[1] = 1;
+    plep->axi_map_row2cmp[2] = 0;
+    plep->axi_map_row2cmp[3] = 0;
+    if (plep->format == C400)
+        plep->axi_num_cmp_mcu = 1; //? Not very sure
+    else if (plep->format == C420)
+        plep->axi_num_cmp_mcu = 4;
+    else
+        plep->axi_num_cmp_mcu = 3;
+    plep->axi_width[0] = hls_compinfo[0].bch;
+    plep->axi_width[1] =
+        (plep->format == C400) ? 0 : (plep->format == C444) ? hls_compinfo[0].bch : (hls_compinfo[0].bch + 1) >> 1;
+    plep->axi_width[2] =
+        (plep->format == C400) ? 0 : (plep->format == C444) ? hls_compinfo[0].bch : (hls_compinfo[0].bch + 1) >> 1;
+    //
+    plep->axi_height[0] = hls_compinfo[0].bcv;
+    plep->axi_height[1] =
+        (plep->format == C400) ? 0 : (plep->format == C420) ? (hls_compinfo[0].bcv + 1) >> 1 : hls_compinfo[0].bcv;
+    plep->axi_height[2] =
+        (plep->format == C400) ? 0 : (plep->format == C420) ? (hls_compinfo[0].bcv + 1) >> 1 : hls_compinfo[0].bcv;
+    plep->axi_mcuv =
+        (plep->format == C400) ? 0 : (plep->format == C420) ? (hls_compinfo[0].bcv + 1) >> 1 : hls_compinfo[0].bcv;
+    SetOtherQtab(plep);
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+
+namespace xf {
+namespace codec {
+// ------------------------------------------------------------
+
+// @brief Level 2 : kernel for jfif parser + huffman decoder
+
+void kernel_parser_decoder(ap_uint<AXI_WIDTH>* datatoDDR,
+                           const int size,
+
+                           img_info& img_info,
+                           hls_compInfo hls_cmpnfo[MAX_NUM_COLOR],
+                           hls::stream<ap_uint<24> >& block_strm,
+                           bool& rtn,
+                           decOutput* plep) {
+    // clang-format off
+	//uint64_t max_pix = MAX_NUM_PIX;//for 8K*8K
+	uint64_t max_pix = MAX_DEC_PIX;//for 800*800
+#pragma HLS INTERFACE m_axi port = datatoDDR depth = max_pix offset = slave bundle = gmem_in2 \
+    latency = 125 max_read_burst_length = 128
+#pragma HLS INTERFACE s_axilite port=datatoDDR      bundle=control
+#pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    // for offset = r*scale_char + c
+    int r = 0, c = 0;
+    int left = 0;
+    ap_uint<12> hls_cmp;
+    uint32_t rst_cnt;
+    uint8_t hls_mbs[MAX_NUM_COLOR];
+
+    // clang-format off
+    uint16_t 					dht_tbl1[2][2][1 << DHT1];
+    uint16_t 					dht_tbl2[2][2][1 << DHT2];
+#pragma HLS bind_storage variable = dht_tbl1 type=RAM_2P impl=LUTRAM
+#pragma HLS bind_storage variable = dht_tbl2 type=RAM_2P impl=LUTRAM
+    // clang-format on
+
+    // Functions to parser the header before the data burst load from DDR
+    //----------------------------------------------------------
+    details::parser_jpg_top(datatoDDR, size, r, c, dht_tbl1, dht_tbl2, hls_cmp, left, img_info, hls_mbs, hls_cmpnfo,
+                            rtn, plep);
+
+    ap_uint<AXI_WIDTH>* ptr = datatoDDR + r;
+
+    // Functions to decode the huffman code to non(Inverse quantization+IDCT) block coefficient
+    //----------------------------------------------------------
+    details::decoder_jpg_top(ptr, left, c, dht_tbl1, dht_tbl2, hls_cmp, hls_mbs, img_info, rst_cnt, block_strm);
+
+#ifndef __SYNTHESIS__
+    if (!rtn) {
+        fprintf(stderr, "Error: parser the input file! \n");
+    }
+#endif
+}
+
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/XAcc_jpegdecoder.cpp b/codec/L2/demos/leptonEnc/kernel/XAcc_jpegdecoder.cpp
new file mode 100644
index 0000000000..e23b177f68
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/XAcc_jpegdecoder.cpp
@@ -0,0 +1,874 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file XAcc_jpegdecoder.cpp
+ * @brief mcu_decoder template function implementation.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#include "XAcc_lepjpegdecoder.hpp"
+
+#define DEVLI(s, n) ((s) == 0 ? (n) : (((n) >= (1 << ((s)-1))) ? (n) : (n) + 1 - (1 << (s))))
+
+//**************************************
+
+namespace xf {
+namespace codec {
+namespace details {
+
+// ------------------------------------------------------------
+void p1_cache_mcuLine(ap_uint<64> hls_block[8][MAX_NUM_BLOCK88_W * 8],
+                      COLOR_FORMAT fmt,
+                      hls::stream<ap_uint<24> >& block_strm,
+                      int16_t width_cmp0,
+                      int16_t width_cmp1) {
+    int16_t end_blk_line = (fmt == C400)
+                               ? width_cmp0
+                               : (fmt == C420) ? (width_cmp0 << 1) + (width_cmp1 << 1) : width_cmp0 + (width_cmp1 << 1);
+    int i_blk = 0;
+    int cmp2 = 0;
+    int32_t dpos2[3] = {0, 0, 0}; // Y,U,V
+#pragma HLS ARRAY_PARTITION variable = dpos2 complete
+    ap_uint<22> addr[4];
+#pragma HLS ARRAY_PARTITION variable = addr complete dim = 1
+    ap_uint<64> data[4];
+#pragma HLS ARRAY_PARTITION variable = data complete dim = 1
+
+    for (int i = 0; i < width_cmp0 * 8; i++) {
+#pragma HLS pipeline
+        for (int j = 0; j < 8; j++) {
+            hls_block[j][i] = 0;
+        }
+    }
+
+    for (int j = 0; j < 4; j++) {
+#pragma HLS UNROLL
+        addr[j] = 0;
+        data[j] = 0;
+    }
+
+    while (i_blk < end_blk_line) {
+#pragma HLS PIPELINE II = 1
+#pragma HLS DEPENDENCE variable = hls_block inter true RAW distance = 4
+        ap_uint<24> block_coeff = block_strm.read();
+        bool is_endblock = block_coeff[22];
+        ap_uint<8> bpos = block_coeff(21, 16);
+        int16_t block = block_coeff(15, 0);
+
+        ap_uint<14> addr2;
+        if (fmt == C420 && cmp2 == 0) {
+            //////////////////////////
+            ap_uint<12> addr_blk;
+            addr_blk(9, 1) = dpos2[0] >> 2;
+            ;
+            addr_blk(0, 0) = dpos2[0] & 1;
+            addr_blk[10] = 0;
+            addr_blk[11] = 0;
+            addr2 = (addr_blk << 3) + (bpos >> 3);
+        } else {
+            addr2 = (dpos2[cmp2] << 3) + (bpos >> 3);
+        }
+
+        ap_uint<64> ramdata;
+        ap_uint<22> rd_addr = (bpos & 7, addr2);
+        if (rd_addr == addr[3])
+            ramdata = data[3];
+        else if (rd_addr == addr[2])
+            ramdata = data[2];
+        else if (rd_addr == addr[1])
+            ramdata = data[1];
+        else if (rd_addr == addr[0])
+            ramdata = data[0];
+        else
+            ramdata = hls_block[bpos & 7][addr2];
+
+        if (fmt == C420 && cmp2 == 0 && dpos2[0] & 2) {
+            ramdata.range(63, 48) = block;
+        } else if (fmt == C420 && cmp2 == 0) {
+            ramdata.range(47, 32) = block;
+        } else {
+            ramdata.range(16 * (3 - cmp2) - 1, 16 * (2 - cmp2)) = block;
+        }
+
+        hls_block[bpos & 7][addr2] = ramdata;
+
+        addr[0] = addr[1];
+        addr[1] = addr[2];
+        addr[2] = addr[3];
+        addr[3] = (bpos & 7, addr2);
+        data[0] = data[1];
+        data[1] = data[2];
+        data[2] = data[3];
+        data[3] = ramdata;
+
+        // hls_block2[addr2] = block;
+        // std::cout<<addr2<<':'<<block<<' ';
+        // if(is_endblock)std::cout<<std::endl<<"------Above cmp is:"<<cmp2<<"---------"<<std::endl;
+        if (is_endblock) {
+            if (fmt == C444) {
+                dpos2[cmp2]++;
+                cmp2 = (cmp2 == 2) ? 0 : cmp2 + 1;
+            } else if (fmt == C422) {
+                if (cmp2 == 0) {
+                    if ((dpos2[0] & 1) == 1) {
+                        cmp2 = 1;
+                    }
+                    dpos2[0]++;
+                } else if (cmp2 == 1) {
+                    cmp2 = 2;
+                    dpos2[1]++;
+                } else { // cmp==2
+                    cmp2 = 0;
+                    dpos2[2]++;
+                }
+            } else if (fmt == C420) {
+                if (cmp2 == 0) {
+                    if ((dpos2[0] & 3) == 3) {
+                        cmp2 = 1;
+                    }
+                    dpos2[0]++;
+                } else if (cmp2 == 1) {
+                    cmp2 = 2;
+                    dpos2[1]++;
+                } else { // cmp==2
+                    cmp2 = 0;
+                    dpos2[2]++;
+                }
+            } else { // C400
+                cmp2 = 0;
+                dpos2[0]++; // II=2
+            }
+
+            i_blk++;
+        } // end one block
+    }     // end while
+}
+
+void hls_next_mcupos_strm(hls::stream<ap_uint<24> >& block_strm,
+                          const decOutput plep,
+                          // int16_t* hls_block2,
+                          hls::stream<ap_int<11> > strm_coef[8],
+                          uint16_t axi_width[MAX_NUM_COLOR],
+                          uint16_t axi_height[MAX_NUM_COLOR]) {
+    // int sta = 0; // status
+    int test = 0;
+
+    ///////////////////////
+    COLOR_FORMAT fmt = plep.format;
+    uint8_t axi_num_cmp_mcu = (fmt == C400) ? 1 : (fmt == C420) ? 4 : 3;
+    // uint8_t axi_num_cmp_mcu    = (fmt==C400) ? 1 :  3;
+    uint8_t axi_map_row2cmp[4] = {2, 1, 0, 0};
+    // uint16_t end_mcu_v = fmt == C400 ? plep.axi_height[0] : plep.axi_height[1];
+    // uint16_t plep_axi_width[3];
+    // uint16_t block_width_0 = plep.axi_width[0];
+    // uint16_t block_width_1 = plep.axi_width[1];
+    // plep_axi_width[0] = plep.axi_width[0];
+    // plep_axi_width[1] = plep.axi_width[1];
+    // plep_axi_width[2] = plep.axi_width[2];
+    uint16_t end_mcu_v = fmt == C400 ? axi_height[0] : axi_height[1];
+    uint16_t plep_axi_width[3];
+    uint16_t block_width_0 = axi_width[0];
+    uint16_t block_width_1 = axi_width[1];
+    plep_axi_width[0] = axi_width[0];
+    plep_axi_width[1] = axi_width[1];
+    plep_axi_width[2] = axi_width[2];
+
+    // clang-format off
+    ap_uint<64>                      hls_block [8][MAX_NUM_BLOCK88_W*8];
+#pragma HLS bind_storage        variable=hls_block type=RAM_2P impl=URAM
+#pragma HLS ARRAY_PARTITION variable=hls_block complete dim=1
+// clang-format on
+
+#ifndef __SYNTHESIS__
+    fprintf(stderr, "hls_next_mcupos_strm start\n");
+#endif
+
+    for (int i_mucv = 0; i_mucv < end_mcu_v; i_mucv++) {
+#pragma HLS dataflow
+        p1_cache_mcuLine(hls_block,
+                         fmt,           // COLOR_FORMAT fmt,
+                         block_strm,    // hls::stream<ap_uint<24> >& block_strm,
+                         block_width_0, // int16_t width_cmp0,
+                         block_width_1  // int16_t width_cmp1,
+                         );
+
+        for (int idx_cmp = 0; idx_cmp < axi_num_cmp_mcu; idx_cmp++) {
+            uint8_t id_cmp = axi_map_row2cmp[idx_cmp];
+            uint16_t block_width = plep_axi_width[id_cmp];
+
+            for (int jpeg_x = 0; jpeg_x + 0 < block_width; jpeg_x++) {
+                int16_t coef_here[8][8];
+                for (int i = 0; i < 8; i++) {
+#pragma HLS pipeline
+                    for (int j = 0; j < 8; j++) {
+#pragma HLS unroll
+                        strm_coef[j].write(hls_block[j][jpeg_x * 8 + i].range(16 * (idx_cmp + 1) - 1, 16 * idx_cmp));
+                        // std::cout<<coef_here[i][j]<<' ';
+                    }
+                } // end pipeline
+
+            } // end jpeg_x
+        }     // end idx_cmp
+
+    } // end sort
+}
+
+// ------------------------------------------------------------
+void pick_huff_data(hls::stream<CHType>& image_strm,
+                    hls::stream<bool>& eof_strm,
+                    uint32_t& cnt_rst,
+                    hls::stream<sos_data>& huff_sos_strm) {
+#pragma HLS INLINE off
+
+    int test = 0; // for debug
+
+    uint8_t EOI_marker = 0xD9;
+    uint8_t RST_filter = 0xD0;
+    sos_data huff_sos;
+    uint8_t bytes[16];
+    uint8_t tmp[8];
+#pragma HLS ARRAY_PARTITION variable = bytes complete
+#pragma HLS ARRAY_PARTITION variable = tmp complete
+    bool is_ff = false;
+    bool entropy_end = false; // other marker:d8~dd. is marker ff00 ffff
+    int rst_cnt = 0;
+
+    bool eof = eof_strm.read();
+    if (!eof) {
+        ap_uint<CH_W> image = image_strm.read();
+        eof = eof_strm.read();
+        for (int i = 0; i < CH_W / 8; i++) {
+#pragma HLS UNROLL
+            bytes[i] = image(8 * i + 7, 8 * i);
+        }
+    }
+    bool eof_reg = eof;
+
+PICK_HUFF_LOOP:
+    while (!eof_reg) { // eof_reg
+#pragma HLS LOOP_TRIPCOUNT min = 5000 max = 5000
+#pragma HLS PIPELINE II = 1
+        eof_reg = eof; // read one more time to loop the shift buffer
+        if (!eof) {
+            ap_uint<CH_W> image = image_strm.read();
+            eof = eof_strm.read();
+            for (int i = 0; i < CH_W / 8; i++) {
+#pragma HLS UNROLL
+                bytes[CH_W / 8 + i] = image(8 * i + 7, 8 * i);
+                tmp[i] = 0;
+            }
+        }
+        if (eof_reg || eof) {
+            // printf("test");
+        }
+        int idx = 0;
+        int garbage_bytes = 16;
+        bool rst_flag = false;
+        for (int i = 0; i < CH_W / 8; i++) { // check the ff
+#pragma HLS UNROLL
+            bool Redu_data = (bytes[i] == 0xFF) && ((bytes[i + 1] == 0x00));
+            if (!Redu_data || entropy_end) {
+                tmp[idx] = (is_ff | entropy_end) ? 0xFF : bytes[i];
+                idx++; // when !Redu_data or marker entropy_end
+            }
+
+            if (is_ff && ((bytes[i] & RST_filter) == 0xD0)) { // ff dn or ff ff
+                garbage_bytes = idx;
+            }
+
+            if (is_ff && (bytes[i] == 0xD0 + (rst_cnt & 7))) {
+                rst_flag |= true;
+            }
+
+            entropy_end = entropy_end | (is_ff && (bytes[i] == EOI_marker));
+            if (bytes[i] == 0xFF) {
+                is_ff = true;
+            } else {
+                is_ff = false;
+            }
+        }
+
+        if (rst_flag) {
+            rst_cnt++;
+        }
+
+        for (int i = 0; i < CH_W / 8; i++) {
+#pragma HLS UNROLL
+            if (i >= idx) tmp[i] = 0;
+        }
+
+        huff_sos.bits = idx * 8;
+#if (CH_W == 32)
+        huff_sos.data = tmp[0] << 24 | tmp[1] << 16 | tmp[2] << 8 | tmp[3];
+#else
+        huff_sos.data = tmp[0] << 8 | tmp[1];
+#endif
+        huff_sos.garbage_bits = garbage_bytes * 8;
+        huff_sos.end_sos = false; // 7fff + 95 + false
+
+        huff_sos_strm.write(huff_sos);
+
+        // printf("\n  %.2x  %.2x  %.2x  %.2x",tmp[0],tmp[1],tmp[2],tmp[3]);
+        for (int i = 0; i < CH_W / 8; i++) {
+#pragma HLS UNROLL
+            bytes[i] = bytes[i + CH_W / 8];
+        }
+
+#ifndef __SYNTHESIS__
+        test++;
+#endif
+    } // endwhile
+
+    cnt_rst = rst_cnt; //
+// printf("cnt_rst = %d", cnt_rst);
+
+#if (CH_W == 32)
+    huff_sos.bits = CH_W;
+    huff_sos.data = 0xffffffff;
+    huff_sos.garbage_bits = CH_W - 16;
+    huff_sos.end_sos = true;
+    huff_sos_strm.write(huff_sos);
+#else
+    huff_sos.bits = CH_W;
+    huff_sos.data = 0xffff;
+    huff_sos.garbage_bits = CH_W - 16;
+    huff_sos.end_sos = true;
+    huff_sos_strm.write(huff_sos);
+#endif
+}
+
+// ------------------------------------------------------------
+void Huffman_decoder2(
+    // input
+    hls::stream<sos_data>& huff_sos_strm,
+    const uint16_t dht_tbl1[2][2][1 << DHT1],
+    const uint16_t dht_tbl2[2][2][1 << DHT2],
+    const ap_uint<12> cyc_cmp,
+// regs
+#ifndef __SYNTHESIS__
+    const uint8_t hls_cs_cmpc,
+    const uint8_t hls_mbs[MAX_NUM_COLOR],
+    const uint16_t hls_mcuh,
+    const uint32_t hls_mcuc,
+#endif
+    // output
+    hls::stream<ap_uint<24> >& block_strm) {
+
+#pragma HLS INLINE off
+
+    ap_uint<12> hls_cmp = cyc_cmp;
+#pragma HLS bind_storage variable = hls_cmp type = FIFO impl = SRL
+    int16_t lastDC[4] = {0, 0, 0, 0};
+#pragma HLS ARRAY_PARTITION variable = lastDC complete
+
+    // major parameter
+    uint8_t huff_len = 0;    // the length of bits for huffman codes, eq with idx+1 1~16
+    uint8_t run_len = 0;     // the number of zero before the non-zero ac coefficient 0~15
+    uint8_t val_len = 0;     // the length of bits for value, 0~11
+    uint8_t total_len = 0;   // huff_len + val_len 1~27
+    uint8_t dec_len = 0;     // the length of tbl_data bits in buff 1~27
+    ap_uint<24> block_coeff; // 23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val
+#ifndef __SYNTHESIS__
+    uint8_t n_last = 0;
+    uint8_t garbage_bits = 0;
+    int cpmall = 0;
+    int test = 0;
+    int test_in8 = 0;
+    int test_in16 = 0;
+    int test_ov16 = 0;
+    int cmp = 0;
+    int mbs = 0;
+    int n_mcu = 0;
+#else
+    ap_uint<6> n_last = 0;
+    ap_uint<6> garbage_bits = 0;
+#endif
+    bool empty_n = false;
+    bool e = false; // data end
+    bool e_reg2 = false;
+    bool e_reg1 = false;
+
+    // tmp parameter
+    ap_uint<16> input;
+    ap_uint<8> input8;
+    uint8_t bpos = 0;
+    sos_data huff_sos;
+    uint16_t val_i;
+    int16_t block, block_tmp;
+    bool ac = false;
+
+    // major buffer
+    ap_uint<2 * CH_W> buff_huff = 0; // the shift buffer
+    ap_uint<2 * CH_W> buff_tail = 0; // the shift buffer
+    ap_uint<3 * CH_W> buf_reg = 0;   // reg
+    ap_uint<3 * CH_W> buf_reg0 = 0;  // reg
+    ap_uint<3 * CH_W> buf_sft = 0;   // the shift buffer
+    // accurate shift control group, to adjust circuit timing
+    ap_uint<2 * CH_W> buf_dat0 = 0;
+    ap_uint<CH_W> buf_dat1 = 0;
+    ap_uint<CH_W> buf_dat2 = 0;
+    ap_uint<CH_W> buf_dat3 = 0;
+    ap_uint<2 * CH_W> buf1 = 0;
+    ap_uint<2 * CH_W> buf2 = 0;
+
+    // major flag to control state machine
+    bool lookup_tbl2 = false;
+    bool is_rst = false; // todo may be used in hls_next_mcupos2
+    bool val_loop = false;
+    bool next_block_reg = false;
+    bool is_garbage = false;
+
+    // tmp
+    uint16_t tbl1 = 0;
+    uint16_t tbl2 = 0;
+    uint16_t tbl_data = 0;
+    int tmp_bits = 0;
+
+DECODE_LOOP:
+    while (!e_reg2) {
+#pragma HLS LOOP_TRIPCOUNT min = 10000 max = 10000
+#pragma HLS DEPENDENCE array inter false
+#pragma HLS PIPELINE II = 1
+
+        //----------
+        // 1. shift all buffers and read huff_sos
+        n_last = n_last - dec_len;
+        garbage_bits = garbage_bits - dec_len;
+        buff_huff <<= dec_len;
+#if 0
+	    buf_sft = buf_reg;
+	    buf_sft <<= dec_len;
+	    buf_reg = buf_sft;
+#else
+        buf2(2 * CH_W - 1, CH_W) = buf_dat1;
+        buf2(1 * CH_W - 1, 0) = buf_dat2;
+        buf1(2 * CH_W - 1, CH_W) = buf_dat2;
+        buf1(1 * CH_W - 1, 0) = buf_dat3;
+        buf1 <<= dec_len;
+        buf2 <<= dec_len;
+        buf_dat1 = (CHType)(buf2(2 * CH_W - 1, CH_W));
+        buf_dat2 = (CHType)(buf1(2 * CH_W - 1, CH_W));
+        buf_dat3 = buf1(CH_W - 1, 0);
+#endif
+
+        if (garbage_bits == 0) {
+            e_reg2 = e_reg1; // end all blocks
+            is_garbage = false;
+        }
+        if ((n_last < CH_W) && (empty_n)) { // prepare data
+#if 0
+    	buff_tail = buf_reg(3*CH_W-1, 1*CH_W);
+#else
+            buff_tail(2 * CH_W - 1, CH_W) = (CHType)(buf2(2 * CH_W - 1, CH_W));
+            buff_tail(1 * CH_W - 1, 0) = (CHType)(buf1(2 * CH_W - 1, CH_W));
+#endif
+            buff_huff |= buff_tail;
+            tmp_bits = huff_sos.garbage_bits;
+            if (tmp_bits <= 2 * CH_W) {
+                garbage_bits = n_last + huff_sos.garbage_bits;
+                is_garbage = true;
+            }
+            n_last += huff_sos.bits;
+            e_reg1 = e;
+
+            empty_n = false;
+        }
+        if ((empty_n == false) && (!e)) { // read data
+
+            huff_sos = huff_sos_strm.read();
+            e = huff_sos.end_sos;
+#if 0
+        buf_reg0(3*CH_W-1, 2*CH_W) = huff_sos.data ;
+	    buf_reg = buf_reg0 ;
+	    buf_reg>>=n_last;
+#else
+            buf_dat0 = huff_sos.data;
+            buf_dat1 = buf_dat0 >> n_last;
+            buf_dat2 = (buf_dat0 << CH_W) >> n_last;
+            // buf_dat3 = (buf_dat0 << (63 - n_last))<<1 ;//CH_W==32
+            buf_dat3 = (buf_dat0 << (31 - n_last)) << 1; // CH_W==16
+#endif
+            empty_n = true;
+        }
+
+        // decode one huffman code from 32b
+        bool freeze_out = (n_last < CH_W) && (!e_reg1); // unfreeze
+        input = buff_huff(2 * CH_W - 1, 2 * CH_W - 16);
+        input8 = buff_huff(2 * CH_W - 1, 2 * CH_W - 8);
+        is_rst = false;
+#if (CH_W == 16)
+        //----------
+        // 2. look up the table
+        ap_uint<DHT1> addr1 = input(15, 16 - DHT1);
+        ap_uint<DHT2> addr2 = input(DHT2 - 1, 0);
+
+        tbl1 = dht_tbl1[ac][hls_cmp[0]][addr1];
+        tbl2 = dht_tbl2[ac][hls_cmp[0]][addr2];
+
+        dec_len = 0;
+
+        if (!val_loop) {
+            tbl_data = (tbl1 >> 15) ? tbl2 : tbl1;
+
+            total_len = tbl_data & 0x1F;
+            huff_len = (tbl_data >> 5) & 0x1F;
+            run_len = (tbl_data >> 10) & 0x0F;
+            val_len = total_len - huff_len;
+
+            if (!freeze_out) { // if reset, false valid
+                if (input == 0xFFFF) {
+                    // if((garbage_bits<24 && (input8 == 0xFF) &&(!e_reg2) )  ){
+
+                    if (garbage_bits <= 16) {
+                        dec_len = garbage_bits;
+                    } else {
+                        dec_len = garbage_bits - 16;
+                    }
+
+                    ac = false;
+                    freeze_out = true;
+                    val_loop = false;
+                    lastDC[0] = 0;
+                    lastDC[1] = 0;
+                    lastDC[2] = 0;
+                } else {
+                    if (total_len <= 15) {
+                        dec_len = total_len;
+                        freeze_out = false;
+                        val_loop = false;
+                    } else {
+                        dec_len = huff_len;
+                        freeze_out = true;
+                        val_loop = true;
+                    }
+                }
+            }
+        } else {
+            if (!freeze_out) { // wait until there is enough data
+
+                huff_len = 0;
+                total_len = val_len;
+                dec_len = total_len;
+                val_loop = false;
+            }
+        }
+
+#elif (CH_W == 32)
+        //----------
+        // 2. look up the table anyway
+        if (!lookup_tbl2) {
+            ap_uint<DHT1> addr1 = input(15, 16 - DHT1);
+            tbl1 = dht_tbl1[ac][hls_cmp[0]][addr1];
+            lookup_tbl2 = (tbl1 >> 15);
+            if (!lookup_tbl2) {
+                tbl_data = tbl1;
+                total_len = tbl_data & 0x1F;
+                huff_len = (tbl_data >> 5) & 0x1F;
+                run_len = (tbl_data >> 10) & 0x0F;
+                val_len = total_len - huff_len;
+            } else {
+                total_len = 0;
+                lookup_tbl2 = true;
+            }
+
+        } else {
+            ap_uint<DHT2> addr2 = input(DHT2 - 1, 0);
+            tbl2 = dht_tbl2[ac][hls_cmp[0]][addr2];
+            tbl_data = tbl2;
+            total_len = tbl_data & 0x1F;
+            huff_len = (tbl_data >> 5) & 0x1F;
+            run_len = (tbl_data >> 10) & 0x0F;
+            val_len = total_len - huff_len;
+            lookup_tbl2 = false;
+        }
+
+        //----------
+        // 3. update dec_len and pos in the block
+        dec_len = 0;
+
+        if (!freeze_out) { // if reset, false valid
+            if ((input) == 0xFFFF) {
+                dec_len = garbage_bits;
+                ac = false;
+                freeze_out = true;
+                lookup_tbl2 = false;
+                lastDC[0] = 0;
+                lastDC[1] = 0;
+                lastDC[2] = 0;
+            } else {
+                dec_len = total_len;
+                freeze_out = (lookup_tbl2) ? true : false;
+            }
+        }
+
+#endif
+
+        //----------
+        // 3. get the value
+        if (!freeze_out) {
+#ifndef __SYNTHESIS__
+            if (run_len > 0) //&&(test>=187270)
+                _XF_IMAGE_PRINT(" run_len = %d \n", (int)run_len);
+#endif
+
+            if (val_len) {
+                val_i = buff_huff(2 * CH_W - 1 - huff_len, 2 * CH_W - total_len);
+            } else {
+                val_i = 0;
+            }
+            block_tmp = DEVLI(val_len, val_i);
+        }
+
+        bool eob = !freeze_out && ac && ((run_len | val_len) == 0);
+
+        if (!freeze_out) {
+            if (ac) {
+                bpos = bpos + 1 + run_len;
+                block = block_tmp;
+                // if(test>=187270)
+                _XF_IMAGE_PRINT("AC: huff_len = %d , block[%d] = %d\n", (int)huff_len, bpos, (int)block);
+            } else {
+                ac = true;
+                bpos = 0;
+                block = lastDC[0] + block_tmp;
+                lastDC[0] = block;
+                _XF_IMAGE_PRINT("\nDC: huff_len = %d , dc_val_i = %d\n", (int)huff_len, (int)block_tmp);
+            }
+        }
+
+        //----------
+        // 4. write out
+        if (!freeze_out) {
+            if (!eob) {
+                block_coeff[23] = is_rst && (bpos == 63);
+                block_coeff[22] = (bpos == 63);
+                block_coeff(21, 16) = (uint8_t)bpos;
+                block_coeff(15, 0) = block;
+                block_strm.write(block_coeff);
+
+            } else { // is eob W [63]=0
+
+                block_coeff[23] = is_rst;
+                block_coeff[22] = 1;
+                block_coeff(21, 16) = (uint8_t)(63);
+                block_coeff(15, 0) = 0;
+                block_strm.write(block_coeff);
+                _XF_IMAGE_PRINT(" ================ eob [%d,63] \n", bpos);
+            }
+#ifndef __SYNTHESIS__
+            if (total_len <= 8) {
+                test_in8++;
+            } else if (total_len <= 16) {
+                test_in16++;
+            } else {
+                test_ov16++;
+            }
+#endif
+        }
+
+        //----------
+        // 5. next_block update and shift sampling cmp
+        bool next_block = (eob || (!freeze_out && (bpos == 63)));
+        // next_block_reg = freeze_out? next_block_reg : next_block;
+        if (next_block) {
+            ac = false;
+            ap_uint<1> tmp_sft = hls_cmp[0];
+            if (hls_cmp[0] | hls_cmp[1]) {
+                int16_t tmpDC = lastDC[0];
+
+                lastDC[0] = lastDC[1];
+                lastDC[1] = lastDC[2];
+                lastDC[2] = tmpDC;
+            }
+            hls_cmp >>= 1;
+            hls_cmp[11] = tmp_sft;
+
+#ifndef __SYNTHESIS__
+
+            if (cmp < hls_cs_cmpc - 1) {
+                if (mbs < hls_mbs[cmp] - 1) {
+                    mbs++;
+                } else {
+                    mbs = 0;
+                    cmp++;
+                }
+            } else {
+                cmp = 0;
+                n_mcu++;
+            }
+
+            cpmall = hls_mbs[0] + hls_mbs[1] + hls_mbs[2];
+
+            // clang-format off
+            _XF_IMAGE_PRINT(" block decode %d  times !! mcu [%d, %d][%d] block \n", test, (test / (cpmall)) % hls_mcuh,
+                            test / ((cpmall)*hls_mcuh), test % (cpmall)); // test 420
+            _XF_IMAGE_PRINT(" lft_in_buff = %d  n_mcu=%d *****\n\n *********************\n", (int)(n_last - dec_len),n_mcu);
+            if (((test / (cpmall)) % hls_mcuh == 27) && (test / ((cpmall)*hls_mcuh) == 58) && (test % (cpmall) == 0)) {
+                cpmall = hls_mbs[0] + hls_mbs[1] + hls_mbs[2];
+            }
+            // clang-format on
+            test++;
+#endif
+
+        } // end new block
+
+    } // end decode one block and loop all mcu/cmp/mbs
+
+#ifndef __SYNTHESIS__
+    // clang-format off
+    if (test != hls_mcuc * cpmall) {
+        std::cout << "ERROR : there is error blocks!" << std::endl;
+    }
+    std::cout << "run :" << test << " times !!!!!" << std::endl;
+#if (CH_W == 32)
+    std::cout << "test_in8 :" << test_in8 << ", test_in16:" << test_in16 << ", test_ov16:" << test_ov16 << std::endl;
+    int allblock = test_in8 + test_in16 + test_ov16;
+    std::cout << "test_in8 :" << (float)test_in8 / allblock << ", test_in16:" << (float)test_in16 / allblock
+              << ", test_ov16:" << (float)test_ov16 / allblock << std::endl;
+#endif
+// clang-format on
+#endif
+}
+
+// ------------------------------------------------------------
+void mcu_decoder(
+    // input
+    hls::stream<CHType>& image_strm,
+    hls::stream<bool>& eof_strm,
+    const uint16_t dht_tbl1[2][2][1 << DHT1],
+    const uint16_t dht_tbl2[2][2][1 << DHT2],
+    ap_uint<12> hls_cmp,
+
+    // image info
+    const uint8_t hls_cs_cmpc, // component count in current scan
+    const uint8_t hls_mbs[MAX_NUM_COLOR],
+    const uint16_t hls_mcuh, // the horizontal mcu
+    const uint32_t hls_mcuc, // the total mcu
+
+    // output
+    uint32_t& rst_cnt,
+    hls::stream<ap_uint<24> >& block_strm) {
+#pragma HLS DATAFLOW
+
+    // clang-format off
+    hls::stream<sos_data> huff_sos_strm;
+#pragma HLS DATA_PACK variable = huff_sos_strm
+#pragma HLS bind_storage  variable = huff_sos_strm type=FIFO impl=LUTRAM
+#pragma HLS STREAM    variable = huff_sos_strm depth = 32
+    // clang-format on
+
+    pick_huff_data(image_strm, eof_strm, rst_cnt, huff_sos_strm);
+
+    Huffman_decoder2(huff_sos_strm, dht_tbl1, dht_tbl2, hls_cmp,
+#ifndef __SYNTHESIS__
+                     hls_cs_cmpc, hls_mbs, hls_mcuh, hls_mcuc,
+#endif
+                     block_strm);
+}
+
+// ------------------------------------------------------------
+// for JPEG-D
+void hls_next_mcupos2(hls::stream<ap_uint<24> >& block_strm,
+                      int16_t hls_block[MAX_NUM_COLOR * MAXCMP_BC * 64],
+                      int hls_sfv[4],
+                      int hls_sfh[4],
+                      const uint8_t hls_mbs[4],
+                      int hls_bch,
+                      int hls_bc,
+                      int32_t hls_mcuc,
+                      uint8_t hls_cs_cmpc,
+                      int& sta) {
+    // int sta = 0; // status
+    int test = 0;
+
+    int n_mcu = 0;
+    int cmp = 0;
+    int mbs = 0;
+    ap_uint<24> block_coeff;
+    bool is_endblock;
+    uint8_t bpos = 0;
+    int16_t block;
+    //  int lastdc[4] = {0, 0, 0, 0}; // last dc for each component
+    int dpos[MAX_NUM_COLOR] = {0};
+//#pragma HLS ARRAY_PARTITION variable = lastdc complete
+#pragma HLS ARRAY_PARTITION variable = dpos complete
+
+    while (!sta) {
+#pragma HLS PIPELINE II = 1
+        block_coeff = block_strm.read();
+        is_endblock = block_coeff[22];
+        bpos = block_coeff(21, 16);
+        block = block_coeff(15, 0);
+
+        hls_block[(cmp)*hls_bc * 64 + (dpos[cmp]) * 64 + bpos] = block;
+
+        if (is_endblock) {
+            unsigned int sfh = hls_sfh[cmp]; // 2   1    1
+            unsigned int sfv = hls_sfv[cmp]; // 2   2    1
+            if (sfh > 1) {                   // 420 cmp=0
+                if (cmp != 0) {
+                    _XF_IMAGE_PRINT("ERROR: next_mcu 420 case, cmp!=0");
+                    sta = 2;
+                }
+                if (mbs == 0) {
+                    dpos[cmp]++;
+                } else if (mbs == 1) {
+                    dpos[cmp] += hls_bch - 1;
+                } else if (mbs == 2) {
+                    dpos[cmp]++;
+                } else {
+                    if (dpos[cmp] % (2 * hls_bch) == 2 * hls_bch - 1) {
+                        dpos[cmp]++;
+                    } else {
+                        dpos[cmp] -= hls_bch - 1;
+                    }
+                }
+            } else if (sfv > 1) { // 422 cmp=0
+                if (cmp != 0) {
+                    _XF_IMAGE_PRINT("ERROR: next_mcu 422 case, cmp!=0");
+                    sta = 2;
+                }
+                dpos[cmp]++;
+            } else { // 420 cmp=1/2 422 cmp=1/2 444 cmp=0/1/2
+                dpos[cmp]++;
+            }
+            if (n_mcu < hls_mcuc) {
+                if (cmp < hls_cs_cmpc - 1) {
+                    if (mbs < hls_mbs[cmp] - 1) { // 420:4/422:2/444:1
+                        mbs++;
+                    } else {
+                        mbs = 0;
+                        cmp++;
+                    }
+                } else {
+                    cmp = 0;
+                    n_mcu++;
+                    if (n_mcu == hls_mcuc) {
+                        sta = 2;
+                    }
+                }
+            }
+
+            test++;
+        } // end one block
+
+    } // end while
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/XAcc_model.cpp b/codec/L2/demos/leptonEnc/kernel/XAcc_model.cpp
new file mode 100644
index 0000000000..82d3d1c118
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/XAcc_model.cpp
@@ -0,0 +1,460 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "XAcc_model.hpp"
+
+ap_uint<16> num_nonzeros_counts_7x7[2][26][6][33];  // block factor=3 dim=4
+ap_uint<16> num_nonzeros_counts_1x8[2][8][8][3][4]; //
+ap_uint<16> num_nonzeros_counts_8x1[2][8][8][3][4]; //
+
+ap_uint<16> residual_noise_counts[2][64][10][10];     // block factor=4 dim=2 !
+ap_uint<16> residual_noise_counts_dc[12][10];         // !
+ap_uint<64> residual_threshold_counts[2][256][8][32]; // complete dim=4 | uram !
+// ap_uint<64> residual_threshold_counts[2][128][8][16];// complete dim=3 | uram !
+ap_uint<22> addr_thre[5];
+ap_uint<64> data_thre[5];
+
+ap_uint<64> exponent_counts[2][10][49][3][11];   // complete dim=2 | uram
+ap_uint<64> exponent_counts_x[2][10][15][3][11]; // complete dim=4 | uram
+ap_uint<16> exponent_counts_dc[12][17][11];      //
+
+ap_uint<16> sign_counts[2][4][12]; // !
+ap_uint<8> addr_sign[4];
+ap_uint<16> data_sign[4];
+
+namespace xf {
+namespace codec {
+namespace details {
+
+// ------------------------------------------------------------
+void init_hlsmodel() {
+#pragma HLS LOOP_MERGE
+    for (int i = 0; i < 2; i++) {
+        for (int j = 0; j < 26; j++) {
+            for (int k = 0; k < 6; k++) {
+                for (int l = 0; l < 11; l++) {
+#pragma HLS PIPELINE II = 1
+                    num_nonzeros_counts_7x7[i][j][k][l] = 0x0101;
+                    num_nonzeros_counts_7x7[i][j][k][l + 11] = 0x0101;
+                    num_nonzeros_counts_7x7[i][j][k][l + 22] = 0x0101;
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 2; i++) {
+        for (int j = 0; j < 8; j++) {
+            for (int k = 0; k < 8; k++) {
+                for (int l = 0; l < 3; l++) {
+                    for (int m = 0; m < 4; m++) {
+#pragma HLS PIPELINE II = 1
+                        num_nonzeros_counts_1x8[i][j][k][l][m] = 0x0101;
+                        num_nonzeros_counts_8x1[i][j][k][l][m] = 0x0101;
+                    }
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 2; i++) {
+        for (int j = 0; j < 16; j++) {
+            for (int k = 0; k < 10; k++) {
+                for (int l = 0; l < 10; l++) {
+#pragma HLS PIPELINE II = 1
+                    residual_noise_counts[i][j][k][l] = 0x0101;
+                    residual_noise_counts[i][j + 16][k][l] = 0x0101;
+                    residual_noise_counts[i][j + 32][k][l] = 0x0101;
+                    residual_noise_counts[i][j + 48][k][l] = 0x0101;
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 12; i++) {
+        for (int j = 0; j < 10; j++) {
+#pragma HLS PIPELINE II = 1
+            residual_noise_counts_dc[i][j] = 0x0101;
+        }
+    }
+
+    for (int i = 0; i < 2; i++) {
+        for (int j = 0; j < 256; j++) {
+            for (int k = 0; k < 8; k++) {
+#pragma HLS PIPELINE II = 1
+                for (int l = 0; l < 32; l++) {
+#pragma HLS UNROLL
+                    residual_threshold_counts[i][j][k][l] = 0x0101010101010101;
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 2; i++) {
+        for (int k = 0; k < 49; k++) {
+            for (int l = 0; l < 3; l++) {
+                for (int m = 0; m < 11; m++) {
+#pragma HLS PIPELINE II = 1
+                    for (int j = 0; j < 10; j++) {
+#pragma HLS UNROLL
+                        exponent_counts[i][j][k][l][m] = 0x0101010101010101;
+                    }
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 2; i++) {
+        for (int j = 0; j < 10; j++) {
+            for (int k = 0; k < 15; k++) {
+                for (int m = 0; m < 11; m++) {
+#pragma HLS PIPELINE II = 1
+                    for (int l = 0; l < 3; l++) {
+#pragma HLS UNROLL
+                        exponent_counts_x[i][j][k][l][m] = 0x0101010101010101;
+                    }
+                }
+            }
+        }
+    }
+
+    for (int i = 0; i < 12; i++) {
+        for (int j = 0; j < 17; j++) {
+            for (int k = 0; k < 11; k++) {
+#pragma HLS PIPELINE II = 1
+                exponent_counts_dc[i][j][k] = 0x0101;
+            }
+        }
+    }
+
+    for (int i = 0; i < 2; i++) {
+        for (int j = 0; j < 4; j++) {
+            for (int k = 0; k < 12; k++) {
+#pragma HLS PIPELINE II = 1
+                sign_counts[i][j][k] = 0x0101;
+            }
+        }
+    }
+
+    for (int i = 0; i < 4; i++) {
+#pragma HLS PIPELINE II = 1
+        addr_sign[i] = 0;
+        data_sign[i] = 0x0101;
+    }
+
+    for (int i = 0; i < 5; i++) {
+#pragma HLS PIPELINE II = 1
+        addr_thre[i] = 0;
+        data_thre[i] = 0x0101010101010101;
+    }
+}
+
+// ------------------------------------------------------------
+uint8_t calc_prob(ap_uint<16>& cnt) {
+#pragma HLS INLINE
+    if (cnt(7, 0) == 1 && cnt(15, 8) == 255) return 0;
+    if (cnt(7, 0) == 255 && cnt(15, 8) == 1) return 255;
+    return (cnt(7, 0) << 8) / (cnt(7, 0) + cnt(15, 8));
+}
+
+// ------------------------------------------------------------
+void record_and_update(bool obs, ap_uint<16>& cnt) {
+#pragma HLS INLINE
+    if (obs) {
+        if (cnt(15, 8) != 0xff)
+            cnt(15, 8) = cnt(15, 8) + 1;
+        else if (cnt(7, 0) == 1)
+            cnt(15, 8) = 0xff;
+        else {
+            cnt(15, 8) = 129;
+            cnt(7, 0) = (1 + cnt(7, 0)) >> 1;
+        }
+    } else {
+        if (cnt(7, 0) != 0xff)
+            cnt(7, 0) = cnt(7, 0) + 1;
+        else if (cnt(15, 8) == 1)
+            cnt(7, 0) = 0xff;
+        else {
+            cnt(7, 0) = 129;
+            cnt(15, 8) = (1 + cnt(15, 8)) >> 1;
+        }
+    }
+}
+
+// template <class T>
+// void StrmEnd2StrmLen_T(
+//        int num_blk,
+//        hls::stream<bool>& strm_e0,
+//        hls::stream<T>& strm_len0
+//        )
+//{
+//    int cnt_blk=0;
+//    int cnt_len=0;
+//    while(cnt_blk < num_blk){
+//#pragma HLS pipeline II=1
+//        if(strm_e0.read()==false)
+//            cnt_len++;
+//        else{
+//            strm_len0.write(cnt_len);
+//            cnt_len=0;
+//            cnt_blk++;
+//        }
+//
+//    }
+//}
+
+// ------------------------------------------------------------
+void probability_look_up(ap_uint<1> ap_color,
+
+                         hls::stream<ap_uint<4> >& strm_sel_tab,
+                         hls::stream<bool>& strm_cur_bit,
+                         hls::stream<bool>& strm_e_in,
+                         hls::stream<ap_uint<16> >& strm_addr1,
+                         hls::stream<ap_uint<16> >& strm_addr2,
+                         hls::stream<ap_uint<16> >& strm_addr3,
+                         hls::stream<ap_uint<16> >& strm_addr4,
+
+                         hls::stream<bool>& strm_bit,
+                         hls::stream<uint8_t>& strm_prob,
+                         hls::stream<bool>& strm_e,
+                         hls::stream<uint8_t>& strm_tab_dbg
+
+                         ) {
+// clang-format off
+#pragma HLS ARRAY_PARTITION variable=num_nonzeros_counts_7x7 block factor=3 dim=4
+#pragma HLS ARRAY_PARTITION variable=residual_noise_counts block factor=4 dim=2
+
+#pragma HLS ARRAY_PARTITION variable=exponent_counts complete dim=2
+#pragma HLS bind_storage variable=exponent_counts type=RAM_2P impl=URAM
+
+#pragma HLS ARRAY_PARTITION variable=residual_noise_counts block factor=4 dim=2
+
+#pragma HLS ARRAY_PARTITION variable=residual_threshold_counts complete dim=4
+#pragma HLS bind_storage variable=residual_threshold_counts	type=RAM_2P impl=URAM
+
+#pragma HLS ARRAY_PARTITION variable=exponent_counts_x complete dim=4
+#pragma HLS bind_storage variable=exponent_counts_x 		type=RAM_2P impl=URAM
+
+#pragma HLS ARRAY_PARTITION variable=addr_sign complete dim=1
+#pragma HLS ARRAY_PARTITION variable=data_sign complete dim=1
+
+#pragma HLS ARRAY_PARTITION variable=addr_thre complete dim=1
+#pragma HLS ARRAY_PARTITION variable=data_thre complete dim=1
+    // clang-format on
+
+    bool e;
+    ap_uint<16> cnt;
+    ap_uint<16> addr1;
+    ap_uint<16> addr2;
+    ap_uint<16> addr3;
+    ap_uint<16> addr4;
+
+    e = strm_e_in.read();
+    while (!e) {
+#pragma HLS PIPELINE II = 1
+// clang-format off
+#pragma HLS DEPENDENCE variable=num_nonzeros_counts_7x7 	inter false
+#pragma HLS DEPENDENCE variable=exponent_counts 			inter false
+#pragma HLS DEPENDENCE variable=residual_noise_counts 		inter false
+#pragma HLS DEPENDENCE variable=num_nonzeros_counts_8x1 	inter false
+#pragma HLS DEPENDENCE variable=num_nonzeros_counts_1x8 	inter false
+#pragma HLS DEPENDENCE variable=exponent_counts_x 			inter false
+#pragma HLS DEPENDENCE variable=exponent_counts_dc 			inter false
+#pragma HLS DEPENDENCE variable=residual_noise_counts_dc 	inter false
+//#pragma HLS DEPENDENCE variable=data_thre 					inter false
+
+#pragma HLS DEPENDENCE variable=residual_threshold_counts 	inter true RAW distance=5
+#pragma HLS DEPENDENCE variable=sign_counts 				inter true RAW distance=4
+        // clang-format on
+
+        ap_uint<4> sel_tab = strm_sel_tab.read();
+        e = strm_e_in.read();
+        bool value = strm_cur_bit.read();
+        strm_bit.write(value);
+        addr1 = strm_addr1.read();
+        addr2 = strm_addr2.read();
+        addr3 = strm_addr3.read();
+        addr4 = strm_addr4.read();
+
+        if (sel_tab == NZ_CNT_7x7) {
+            cnt = num_nonzeros_counts_7x7[ap_color][addr1][addr2][addr3];
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(NZ_CNT_7x7);
+            record_and_update(value, cnt);
+
+            num_nonzeros_counts_7x7[ap_color][addr1][addr2][addr3] = cnt;
+        } else if (sel_tab == EXP_CNT) {
+            ap_uint<64> ram_data;
+            ram_data = exponent_counts[ap_color][addr1][addr2][addr3(3, 2)][addr4];
+            cnt = ram_data(((addr3(1, 0) + 1) << 4) - 1, addr3(1, 0) << 4);
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(EXP_CNT);
+            record_and_update(value, cnt);
+
+            ram_data(((addr3(1, 0) + 1) << 4) - 1, addr3(1, 0) << 4) = cnt;
+            exponent_counts[ap_color][addr1][addr2][addr3(3, 2)][addr4] = ram_data;
+        } else if (sel_tab == SIGN_CNT) {
+            ap_uint<8> rd_addr = (ap_color, addr1(1, 0), addr2(3, 0));
+            if (rd_addr == addr_sign[3])
+                cnt = data_sign[3];
+            else if (rd_addr == addr_sign[2])
+                cnt = data_sign[2];
+            else if (rd_addr == addr_sign[1])
+                cnt = data_sign[1];
+            else if (rd_addr == addr_sign[0])
+                cnt = data_sign[0];
+            else
+                cnt = sign_counts[ap_color][addr1][addr2];
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(SIGN_CNT);
+
+            record_and_update(value, cnt);
+
+            sign_counts[ap_color][addr1][addr2] = cnt;
+            addr_sign[0] = addr_sign[1];
+            addr_sign[1] = addr_sign[2];
+            addr_sign[2] = addr_sign[3];
+            addr_sign[3] = (ap_color, addr1(1, 0), addr2(3, 0));
+            data_sign[0] = data_sign[1];
+            data_sign[1] = data_sign[2];
+            data_sign[2] = data_sign[3];
+            data_sign[3] = cnt;
+
+        } else if (sel_tab == NOIS_CNT) {
+            cnt = residual_noise_counts[ap_color][addr1][addr2][addr3];
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(NOIS_CNT);
+            record_and_update(value, cnt);
+
+            residual_noise_counts[ap_color][addr1][addr2][addr3] = cnt;
+        } else if (sel_tab == NZ_CNT_8x1) {
+            cnt = num_nonzeros_counts_8x1[ap_color][addr1][addr2][addr3][addr4];
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(NZ_CNT_8x1);
+
+            record_and_update(value, cnt);
+            num_nonzeros_counts_8x1[ap_color][addr1][addr2][addr3][addr4] = cnt;
+        } else if (sel_tab == NZ_CNT_1x8) {
+            cnt = num_nonzeros_counts_1x8[ap_color][addr1][addr2][addr3][addr4];
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(NZ_CNT_1x8);
+
+            record_and_update(value, cnt);
+            num_nonzeros_counts_1x8[ap_color][addr1][addr2][addr3][addr4] = cnt;
+        } else if (sel_tab == EXP_CNT_X) {
+            ap_uint<64> ram_data;
+            ram_data = exponent_counts_x[ap_color][addr1][addr2][addr3(3, 2)][addr4];
+            cnt = ram_data(((addr3(1, 0) + 1) << 4) - 1, addr3(1, 0) << 4);
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(EXP_CNT_X);
+
+            record_and_update(value, cnt);
+            ram_data(((addr3(1, 0) + 1) << 4) - 1, addr3(1, 0) << 4) = cnt;
+            exponent_counts_x[ap_color][addr1][addr2][addr3(3, 2)][addr4] = ram_data;
+        } else if (sel_tab == THRE_CNT) {
+            ap_uint<64> ram_data;
+
+            ap_uint<22> rd_addr = (ap_color, addr1(7, 0), addr2(7, 0), addr3(6, 2));
+            if (rd_addr == addr_thre[4])
+                ram_data = data_thre[4];
+            else if (rd_addr == addr_thre[3])
+                ram_data = data_thre[3];
+            else if (rd_addr == addr_thre[2])
+                ram_data = data_thre[2];
+            else if (rd_addr == addr_thre[1])
+                ram_data = data_thre[1];
+            else if (rd_addr == addr_thre[0])
+                ram_data = data_thre[0];
+            else
+                ram_data = residual_threshold_counts[ap_color][addr1][addr2][addr3(6, 2)];
+
+            if (addr3(1, 0) == 0)
+                cnt = ram_data(15, 0);
+            else if (addr3(1, 0) == 1)
+                cnt = ram_data(31, 16);
+            else if (addr3(1, 0) == 2)
+                cnt = ram_data(47, 32);
+            else if (addr3(1, 0) == 3)
+                cnt = ram_data(63, 48);
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(THRE_CNT);
+
+            record_and_update(value, cnt);
+
+            if (addr3(1, 0) == 0)
+                ram_data(15, 0) = cnt;
+            else if (addr3(1, 0) == 1)
+                ram_data(31, 16) = cnt;
+            else if (addr3(1, 0) == 2)
+                ram_data(47, 32) = cnt;
+            else if (addr3(1, 0) == 3)
+                ram_data(63, 48) = cnt;
+
+            residual_threshold_counts[ap_color][addr1][addr2][addr3(6, 2)] = ram_data;
+
+            addr_thre[0] = addr_thre[1];
+            addr_thre[1] = addr_thre[2];
+            addr_thre[2] = addr_thre[3];
+            addr_thre[3] = addr_thre[4];
+            addr_thre[4] = (ap_color, addr1(7, 0), addr2(7, 0), addr3(6, 2));
+            data_thre[0] = data_thre[1];
+            data_thre[1] = data_thre[2];
+            data_thre[2] = data_thre[3];
+            data_thre[3] = data_thre[4];
+            data_thre[4] = ram_data;
+
+        } else if (sel_tab == EXP_CNT_DC) {
+            cnt = exponent_counts_dc[addr1][addr2][addr3];
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(EXP_CNT_DC);
+
+            record_and_update(value, cnt);
+            exponent_counts_dc[addr1][addr2][addr3] = cnt;
+        } else if (sel_tab == NOIS_CNT_DC) {
+            cnt = residual_noise_counts_dc[addr1][addr2];
+
+            strm_prob.write(calc_prob(cnt));
+            strm_e.write(false);
+            strm_tab_dbg.write(NOIS_CNT_DC);
+
+            record_and_update(value, cnt);
+            residual_noise_counts_dc[addr1][addr2] = cnt;
+        }
+    }
+    //            std::cout<<std::endl<<" min addr1: "<<min_addr1<<" max_addr1: "<<max_addr1<<" min_addr3:
+    //            "<<min_addr3<<" max_addr3: "<<max_addr3<<std::endl<<std::endl;
+    strm_e.write(true);
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/XModified.cpp b/codec/L2/demos/leptonEnc/kernel/XModified.cpp
new file mode 100644
index 0000000000..fa836a2d7c
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/XModified.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "XModified.hpp"
+
+namespace xf {
+namespace codec {
+namespace details {
+
+// ------------------------------------------------------------
+int hls_color_index(int c) {
+#pragma HLS INLINE
+    return (c == 0) ? 0 : 1;
+}
+
+// ------------------------------------------------------------
+uint8_t hls_get_num_nonzeros_context(
+    bool all_present, bool above_present, bool left_present, uint8_t num_nonzeros_above, uint8_t num_nonzeros_left) {
+#pragma HLS INLINE
+    uint8_t num_nonzeros_context = 0;
+    if ((!all_present) && above_present && !left_present) {
+        num_nonzeros_context = (num_nonzeros_above + 1) / 2;
+    } else if ((!all_present) && left_present && !above_present) {
+        num_nonzeros_context = (num_nonzeros_left + 1) / 2;
+    } else if (all_present || (left_present && above_present)) {
+        num_nonzeros_context = (num_nonzeros_above + num_nonzeros_left + 2) / 4;
+    }
+    return num_nonzeros_context;
+}
+
+// ------------------------------------------------------------
+uint16_t abs16(int16_t din) {
+    if (din < 0)
+        return -din;
+    else
+        return din;
+}
+
+// ------------------------------------------------------------
+uint16_t hls_compute_aavrg(bool all_present,
+                           bool left_present,
+                           bool above_present,
+                           uint16_t abs_coef_left,
+                           uint16_t abs_coef_above,     //[64],
+                           uint16_t abs_coef_above_left //[64]
+
+                           ) {
+#pragma HLS inline
+    uint16_t total = 0;
+    if (all_present || left_present) {
+        total += abs_coef_left; //[hls_raster_to_aligned[coord]]);
+    }
+    if (all_present || above_present) {
+        total += abs_coef_above; //[hls_raster_to_aligned[coord]]);
+    }
+    if (all_present || (left_present && above_present)) {
+        constexpr unsigned int log_weight = 5;
+        total *= 13;
+        total += 6 * abs_coef_above_left; //[hls_raster_to_aligned[coord]]);
+        return ((uint16_t)total) >> 5;
+    } else {
+        return total;
+    }
+}
+
+// ------------------------------------------------------------
+/*int32_t hls_compute_lak(
+    // const ConstBlockContext&context,
+    int COLOR,
+    unsigned int band,
+    bool all_present,
+    bool left_present,
+    bool above_present,
+    int16_t coef_here[64],
+    int16_t coef_left[64],
+    int16_t coef_above[64]) {
+    int coeffs_x[8];
+    int coeffs_a[8];
+    const int32_t* coef_idct = nullptr;
+    if ((band & 7) && (all_present || above_present)) {
+        // y == 0: we're the x
+        assert(band / 8 == 0); // this function only works for the edge
+        // const auto &above = context.above_unchecked();
+        for (int i = 0; i < 8; ++i) {
+            uint8_t cur_coef = band + i * 8;
+            coeffs_x[i] = i ? coef_here[hls_raster_to_aligned[cur_coef]] : 0;
+            coeffs_a[i] = coef_above[hls_raster_to_aligned[cur_coef]];
+        }
+        coef_idct = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_x((int)COLOR) + band * 8;
+    } else if ((band & 7) == 0 && left_present) {
+        // x == 0: we're the y
+        // const auto &left = context.left_unchecked();
+        for (int i = 0; i < 8; ++i) {
+            uint8_t cur_coef = band + i;
+            coeffs_x[i] = i ? coef_here[hls_raster_to_aligned[cur_coef]] : 0;
+            coeffs_a[i] = coef_left[hls_raster_to_aligned[cur_coef]];
+        }
+        coef_idct = ProbabilityTablesBase::icos_idct_edge_8192_dequantized_y((int)COLOR) + band;
+    } else {
+        return 0;
+    }
+    int prediction =
+        coeffs_a[0] *
+        coef_idct[0]; // rounding towards zero before adding coeffs_a[0] helps ratio slightly, but this is cheaper
+    for (int i = 1; i < 8; ++i) {
+        int sign = (i & 1) ? 1 : -1;
+        prediction -= coef_idct[i] * (coeffs_x[i] + sign * coeffs_a[i]);
+    }
+    prediction /= coef_idct[0];
+    return prediction;
+}*/
+
+} // namespace details
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/jpeg_dec_lepton_enc.cpp b/codec/L2/demos/leptonEnc/kernel/jpeg_dec_lepton_enc.cpp
new file mode 100644
index 0000000000..07b903e7e3
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/jpeg_dec_lepton_enc.cpp
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "jpeg_dec_lepton_enc.hpp"
+
+// ------------------------------------------------------------
+namespace xf {
+namespace codec {
+namespace details {
+
+void init_parser(ap_uint<16>* datatoDDR,
+                 int size,
+                 int& r,
+                 int& c,
+                 uint16_t dht_tbl1[2][2][1 << DHT1],
+                 uint16_t dht_tbl2[2][2][1 << DHT2],
+                 ap_uint<12>& hls_cmp,
+                 int& left,
+
+                 // image info
+                 img_info& img_info,
+                 uint8_t hls_mbs[MAX_NUM_COLOR],
+                 hls_compInfo hls_compinfo[MAX_NUM_COLOR],
+                 bool& rtn,
+                 decOutput* plep) {
+    //#pragma HLS DATAFLOW
+    // Functions to init
+    //----------------------------------------------------------
+    init_hlsmodel();
+
+    // Functions to parser the header before the data burst load from DDR
+    //----------------------------------------------------------
+    xf::codec::details::parser_jpg_top(datatoDDR, size, r, c, dht_tbl1, dht_tbl2, hls_cmp, left, img_info, hls_mbs,
+                                       hls_compinfo, rtn, plep);
+}
+
+// ------------------------------------------------------------
+
+void jpegD_leptonE_union(ap_uint<AXI_WIDTH>* ptr,
+                         const int sz,
+                         const int c,
+                         const uint16_t dht_tbl1[2][2][1 << DHT1],
+                         const uint16_t dht_tbl2[2][2][1 << DHT2],
+                         ap_uint<12> hls_cmp,
+
+                         // image info
+                         const uint8_t hls_mbs[MAX_NUM_COLOR],
+                         const img_info img_info,
+                         const int left,
+                         decOutput& lepp,
+
+                         // output
+                         uint32_t& rst_cnt,
+                         ap_uint<8>* axi_res, //[MAX_NUM_PIX]
+                         struct_arith& axi_arith,
+                         uint16_t axi_width[MAX_NUM_COLOR],
+                         uint16_t axi_height[MAX_NUM_COLOR],
+                         uint8_t axi_map_row2cmp[4]) {
+#pragma HLS DATAFLOW
+
+    // clang-format off
+	hls::stream<ap_uint<24> >   block_strm;
+#pragma HLS bind_storage variable = block_strm type=FIFO impl=LUTRAM
+#pragma HLS STREAM variable   = block_strm depth = 32
+    // clang-format on
+    // Functions to decode the huffman code to non(Inverse quantization+IDCT) block coefficient
+    //----------------------------------------------------------
+    xf::codec::details::decoder_jpg_top(ptr, left, (ap_uint<1>)c, dht_tbl1, dht_tbl2, hls_cmp, hls_mbs, img_info,
+                                        rst_cnt, block_strm);
+
+    // clang-format off
+	hls::stream<ap_int<11> >         str_coef[8];
+#pragma HLS bind_storage        variable = str_coef   type=FIFO impl=LUTRAM
+//#pragma HLS ARRAY_PARTITION variable = str_coef   complete  dim=1
+#pragma HLS STREAM          variable = str_coef   depth = 1024 dim=1
+    // clang-format on
+    // Copy to aligned block
+    //-----------------------------------------------------------
+    hls_next_mcupos_strm(block_strm, lepp, str_coef, axi_width, axi_height);
+
+    // clang-format off
+ 	hls::stream< bool >          strm_pos_o_e;
+ 	hls::stream< ap_uint<8> > strm_pos_o_byte;
+ #pragma HLS stream depth=256 variable = strm_pos_o_e
+ #pragma HLS bind_storage         variable = strm_pos_o_e    type=FIFO impl=LUTRAM
+ #pragma HLS stream depth=256 variable = strm_pos_o_byte
+ #pragma HLS bind_storage         variable = strm_pos_o_byte type=FIFO impl=LUTRAM
+    // clang-format on
+    // leptonE kernel
+    //-----------------------------------------------------------
+    kernel_LeptonE_strmIn_engine(
+        // input
+        str_coef,
+
+        axi_width, // colldata->block_width(i);
+        // lepp.axi_height,//colldata->block_width(i);
+        axi_map_row2cmp, //     AXI                   2,1,0,0 2,1,0
+        lepp.min_nois_thld_x, lepp.min_nois_thld_y,
+        lepp.q_tables, //[64],
+        lepp.idct_q_table_x, lepp.idct_q_table_y,
+        // lepp.idct_q_table_l,
+        lepp.axi_mcuv, lepp.axi_num_cmp_mcu,
+        // lepp.axi_num_cmp,
+        // output
+        axi_arith, strm_pos_o_e, strm_pos_o_byte);
+
+// clang-format off
+	#ifndef __SYNTHESIS__
+		fprintf(stderr,"stream out start\n");
+	#endif
+    // clang-format on
+    // stream out
+    //-----------------------------------------------------------
+    // xf::common::utils_hw::streamToAxi<32, uint8_t, unsigned char>(strm_pos_o_byte, strm_pos_o_e, axi_res);
+    // template <int _BurstLen, int _WAxi, int _WStrm>
+    // void streamToAxi(ap_uint<_WAxi>* wbuf, hls::stream<ap_uint<_WStrm> >& istrm, hls::stream<bool>& e_istrm) {
+    xf::common::utils_hw::streamToAxi<32, 8, 8>(axi_res, strm_pos_o_byte, strm_pos_o_e);
+}
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+
+// ------------------------------------------------------------
+
+namespace xf {
+namespace codec {
+
+/**
+ * @brief IMGAE Jpeg Decoder Lepton Encoder Kernel
+ * \rst
+ * For detailed document, see :ref:`JpegDecoderLeptonEncoderKernel`.
+ * \endrst
+ * @param datainDDR input image buffer.
+ * @param jpgSize size of input image buffer.
+ * @param arithInfo meta information of output buffer.
+ * @param res output lepton format data buffer.
+ */
+void jpegDecLeptonEnc(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int arithInfo[9], ap_uint<8>* res) {
+    //        #pragma HLS INTERFACE m_axi offset = slave latency = 125 \
+//        num_write_outstanding = 1 num_read_outstanding = 2 \
+//        max_write_burst_length = 2 max_read_burst_length = 32 \
+//        bundle = gmem_in1 port = datainDDR
+    //
+    //        #pragma HLS INTERFACE m_axi offset = slave latency = 125 \
+//        num_write_outstanding = 2 num_read_outstanding = 2 \
+//        max_write_burst_length = 32 max_read_burst_length = 2 \
+//        bundle = gmem_out1 port = res
+    //
+    //        #pragma HLS INTERFACE m_axi offset = slave latency = 32 \
+//        num_write_outstanding = 2 num_read_outstanding = 2 \
+//        max_write_burst_length = 32 max_read_burst_length = 2 \
+//        bundle = gmem_out2 port = arithInfo
+    //
+    //
+    //		#pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+    //		#pragma HLS INTERFACE s_axilite port=res        	bundle=control
+    //		#pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+    //		#pragma HLS INTERFACE s_axilite port=arithInfo      bundle=control
+    //
+    //		#pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // image infos
+    bool rtn = true;
+    decOutput lepp;
+    struct_arith arith;
+    // clang-format off
+//		#pragma HLS ARRAY_PARTITION variable=lepp.axi_map_row2cmp complete dim=0
+//		#pragma HLS ARRAY_PARTITION variable=lepp.axi_width       complete dim=0
+    // clang-format on
+    int r = 0, c = 0; // for offset = r*scale_char + c
+    ap_uint<12> hls_cmp;
+    xf::codec::img_info img_info; // may have some redundant data
+    xf::codec::hls_compInfo hls_cmpnfo[MAX_NUM_COLOR];
+    uint8_t hls_mbs[MAX_NUM_COLOR];
+    int left = 0;
+    // tables
+    uint16_t dqt[2][64];
+    uint16_t dht_tbl1[2][2][1 << DHT1];
+    uint16_t dht_tbl2[2][2][1 << DHT2];
+#pragma HLS bind_storage variable = dht_tbl1 type = RAM_2P impl = LUTRAM
+#pragma HLS bind_storage variable = dht_tbl2 type = RAM_2P impl = LUTRAM
+
+#ifndef __SYNTHESIS__
+    fprintf(stderr, "kernel start!\n");
+#endif
+    // Functions to parser the header and init rams before the data burst load from DDR
+    //----------------------------------------------------------
+    xf::codec::details::init_parser(datainDDR, jpgSize, r, c, dht_tbl1, dht_tbl2, hls_cmp, left, img_info, hls_mbs,
+                                    hls_cmpnfo, rtn, &lepp);
+
+    decOutput lepp2 = lepp;
+    uint16_t axi_width[MAX_NUM_COLOR];
+    uint16_t axi_height[MAX_NUM_COLOR];
+    uint8_t axi_map_row2cmp[4];
+    uint8_t min_nois_thld_x[MAX_NUM_COLOR][64];
+    uint8_t min_nois_thld_y[MAX_NUM_COLOR][64];
+    uint8_t q_tables[MAX_NUM_COLOR][8][8];
+    int32_t idct_q_table_x[MAX_NUM_COLOR][8][8];
+    int32_t idct_q_table_y[MAX_NUM_COLOR][8][8];
+    int32_t idct_q_table_l[MAX_NUM_COLOR][8][8];
+    for (int i = 0; i < MAX_NUM_COLOR; i++) {
+        axi_width[i] = lepp.axi_width[i];
+        axi_height[i] = lepp.axi_height[i];
+    }
+    for (int i = 0; i < 4; i++) {
+        axi_map_row2cmp[i] = lepp.axi_map_row2cmp[i];
+    }
+    for (int i = 0; i < MAX_NUM_COLOR; i++) {
+        for (int j = 0; j < 64; j++) {
+            min_nois_thld_x[i][j] = lepp.min_nois_thld_x[i][j];
+            min_nois_thld_y[i][j] = lepp.min_nois_thld_y[i][j];
+        }
+    }
+    for (int i = 0; i < MAX_NUM_COLOR; i++) {
+        for (int j = 0; j < 8; j++) {
+            for (int k = 0; k < 8; k++) {
+                q_tables[i][j][k] = lepp.q_tables[i][j][k];
+                idct_q_table_x[i][j][k] = lepp.idct_q_table_x[i][j][k];
+                idct_q_table_y[i][j][k] = lepp.idct_q_table_y[i][j][k];
+                idct_q_table_l[i][j][k] = lepp.idct_q_table_l[i][j][k];
+            }
+        }
+    }
+
+    uint32_t rst_cnt;
+    // Functions to burst load from DDR forms the dataflow region
+    //----------------------------------------------------------
+    ap_uint<AXI_WIDTH>* ptr = (ap_uint<AXI_WIDTH>*)datainDDR + r;
+    xf::codec::details::jpegD_leptonE_union(ptr, left, c, dht_tbl1, dht_tbl2, hls_cmp, hls_mbs, img_info, left, lepp2,
+                                            rst_cnt, res, arith, axi_width, axi_height, axi_map_row2cmp);
+
+    // Tails
+    //----------------------------------------------------------
+    res[arith.pos++] = arith.pre_byte;
+    for (int run = arith.run; run > 0; run--) res[arith.pos++] = 0xff;
+
+    // clang-format off
+    	arithInfo[0] = arith.count;
+    	arithInfo[1] = arith.value;
+    	arithInfo[2] = arith.pre_byte;
+    	arithInfo[3] = arith.run ;
+    	arithInfo[4] = arith.pos ;
+    	arithInfo[5] = arith.range ;
+    	arithInfo[6] = arith.isFirst ;
+    	arithInfo[7] = left;
+    	arithInfo[8] = rst_cnt;
+    // clang-format on
+} // extern "C"
+} // namespace codec
+} // namespace xf
diff --git a/codec/L2/demos/leptonEnc/kernel/multi_cu.cpp b/codec/L2/demos/leptonEnc/kernel/multi_cu.cpp
new file mode 100644
index 0000000000..09a1f8b0bf
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/multi_cu.cpp
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "multi_cu.hpp"
+#include <iostream>
+#include <iomanip>
+
+extern "C" void lepEnc(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, ap_uint<8>* res) {
+// clang-format off
+        #pragma HLS INTERFACE m_axi offset = slave latency = 125 \
+        num_write_outstanding = 1 num_read_outstanding = 2 \
+        max_write_burst_length = 2 max_read_burst_length = 32 \
+        bundle = gmem_in1 port = datainDDR
+
+        #pragma HLS INTERFACE m_axi offset = slave latency = 125 \
+        num_write_outstanding = 2 num_read_outstanding = 2 \
+        max_write_burst_length = 32 max_read_burst_length = 2 \
+        bundle = gmem_out1 port = res
+
+        #pragma HLS INTERFACE m_axi offset = slave latency = 32 \
+        num_write_outstanding = 2 num_read_outstanding = 2 \
+        max_write_burst_length = 32 max_read_burst_length = 2 \
+        bundle = gmem_out2 port = arithInfo
+
+
+		#pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+		#pragma HLS INTERFACE s_axilite port=res        	bundle=control
+		#pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+		#pragma HLS INTERFACE s_axilite port=arithInfo      bundle=control
+
+		#pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    //    char* tmp = reinterpret_cast<char*>(datainDDR);
+    //    std::cout << std::endl;
+    //    std::cout << "jpgsize: " << jpgSize << std::endl;
+    //    std::cout << std::hex;
+    //    for (int i = 0; i < jpgSize; i++) {
+    //        if (i % 8 == 0) std::cout << std::endl;
+    //        std::cout << std::setfill('0') << std::setw(8) << (int)(tmp[i]) << " ";
+    //    }
+    //    std::cout << std::dec;
+    //    std::cout << std::endl;
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+
+    //    std::cout << std::endl;
+    //    std::cout << "arith:" << std::endl;
+    //    for (int i = 0; i < 9; i++) {
+    //        std::cout << arithInfo[i] << std::endl;
+    //    }
+    //    std::cout << std::hex;
+    //    std::cout << "res:" << std::endl;
+    //    for (int i = 0; i < arithInfo[4]; i++) {
+    //        if (i % 8 == 0) std::cout << std::endl;
+    //        std::cout << std::setfill('0') << std::setw(8) << (int)res[i] << " ";
+    //    }
+    //    std::cout << std::dec;
+    //    std::cout << std::endl;
+}
+
+//----------------------------------------------------------
+/*extern "C" {
+void jpegDecLeptonEncKernel_0(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_1(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_2(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_3(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_4(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_5(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_6(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_7(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_8(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_9(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_10(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+//----------------------------------------------------------
+void jpegDecLeptonEncKernel_11(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, uint8_t* res) {
+    // clang-format off
+                uint64_t max_pix = MAX_NUM_PIX;
+                #pragma HLS INTERFACE m_axi port = datainDDR  depth = max_pix offset = slave \
+                bundle = gmem_in1 latency = 125  max_read_burst_length = 32
+                #pragma HLS INTERFACE m_axi port = res       depth = max_pix offset = slave   \
+                bundle = gmem_out1 latency = 125 max_read_burst_length = 32
+
+                #pragma HLS INTERFACE m_axi port = arith_info depth = 7      offset=slave \
+                bundle = gmem_out2
+
+                #pragma HLS INTERFACE s_axilite port=datainDDR     	bundle=control
+                #pragma HLS INTERFACE s_axilite port=res        	bundle=control
+                #pragma HLS INTERFACE s_axilite port=jpgSize        bundle=control
+                #pragma HLS INTERFACE s_axilite port=arith_info     bundle=control
+
+                #pragma HLS INTERFACE s_axilite port=return         bundle=control
+    // clang-format on
+
+    xf::codec::jpegDecLeptonEnc(datainDDR, jpgSize, arithInfo, res);
+}
+
+} // extern C*/
diff --git a/codec/L2/demos/leptonEnc/kernel/multi_cu.hpp b/codec/L2/demos/leptonEnc/kernel/multi_cu.hpp
new file mode 100644
index 0000000000..35b5738870
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/kernel/multi_cu.hpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef _MULTIKERNEL_HPP_
+#define _MULTIKERNEL_HPP_
+
+/**
+ * @file jpeg_decoder_lepton_encoder_kernel.h
+ * @brief interface of IMAGE Jpeg Decoder Lepton Encoder kernel.
+ */
+
+#include "jpeg_dec_lepton_enc.hpp"
+
+extern "C" void lepEnc(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, ap_uint<8>* res);
+
+/*extern "C" {
+void jpegDecLeptonEncKernel_0(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_1(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_2(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_3(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_4(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_5(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_6(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_7(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_8(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_9(ap_uint<AXI_WIDTH>* datainDDR,
+                              int jpgSize,
+                              // struct_arith& arith,
+                              int* arithInfo,
+                              uint8_t* res);
+
+void jpegDecLeptonEncKernel_10(ap_uint<AXI_WIDTH>* datainDDR,
+                               int jpgSize,
+                               // struct_arith& arith,
+                               int* arithInfo,
+                               uint8_t* res);
+
+void jpegDecLeptonEncKernel_11(ap_uint<AXI_WIDTH>* datainDDR,
+                               int jpgSize,
+                               // struct_arith& arith,
+                               int* arithInfo,
+                               uint8_t* res);
+}
+*/
+#endif
diff --git a/codec/L2/demos/leptonEnc/utils.mk b/codec/L2/demos/leptonEnc/utils.mk
new file mode 100644
index 0000000000..0ee80e90da
--- /dev/null
+++ b/codec/L2/demos/leptonEnc/utils.mk
@@ -0,0 +1,270 @@
+#
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+
+#get suffix of kernel by PLATFORM
+VITIS_VER = $(shell v++ --version | grep 'v++' | sed 's/^[[:space:]]*//' | sed -e 's/^[*]* v++ v//g' | cut -d " " -f1)
+DEVICE_TYPE = $(shell platforminfo -p $(PLATFORM) | grep 'FPGA Family' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(DEVICE_TYPE), versal)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+LINK_TARGET_FMT := xsa
+else
+LINK_TARGET_FMT := xclbin
+endif
+else
+LINK_TARGET_FMT := xclbin
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+ifeq ($(HOST_ARCH), x86)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#Check OS and setting env for xrt c++ api
+OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
+OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
+
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/demos/pikEnc/Makefile b/codec/L2/demos/pikEnc/Makefile
new file mode 100755
index 0000000000..2dc6ce258d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/Makefile
@@ -0,0 +1,345 @@
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to build xclbin application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
+	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u200
+PLATFORM_BLOCKLIST +=  zc
+
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# get global setting
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += -I$(CUR_DIR)/src/ -fmessage-length=0 --sysroot=$(SYSROOT)  -I$(SYSROOT)/usr/include/xrt -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(SYSROOT)/usr/lib -L$(XILINX_VITIS_AIETOOLS)/lib/aarch64.o -Wl,--as-needed -lxilinxopencl -lxrt_coreutil 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+########################## Setting up Host Variables ##########################
+ifeq ($(TARGET),sw_emu)
+CXXFLAGS += -D SW_EMU_TEST
+endif
+ifeq ($(TARGET),hw_emu)
+CXXFLAGS += -D HW_EMU_TEST
+endif
+
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
+#Inclue Required Host Source Files
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/cpik_main.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/ac_predictions.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/ac_strategy.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/adaptive_quantization.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/adaptive_reconstruction.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/alpha.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/ans_common.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/ans_decode.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/ans_encode.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/ar_control_field.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/arch_specific.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/bilinear_transform.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/block_dictionary.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/butteraugli_comparator.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/butteraugli_distance.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/cache_aligned.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/chroma_from_luma.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/cmdline.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/codec_impl.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/codec_png.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/codec_pnm.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/color_correlation.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/color_encoding.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/compressed_dc.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/compressed_image.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/context_map_decode.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/context_map_encode.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/cpik.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/data_parallel.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/dc_predictor.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/dct.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/dct_util.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/deconvolve.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/descriptive_statistics.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/detect_dots.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/entropy_coder.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/epf.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/epf_target.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/external_image.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/gaborish.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/gauss_blur.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/gradient_map.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/headers.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/huffman_decode.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/huffman_encode.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/image.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/lehmer_code.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/linalg.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/lossless16.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/lossless8.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/lossless_entropy.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/metadata.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/noise.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/opsin_image.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/opsin_inverse.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/opsin_params.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/os_specific.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/padded_bytes.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/pik.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/pik_frame.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/pik_info.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/quant_weights.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/quantizer.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/saliency_map.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/single_image_handler.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/status.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/upscaler.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/yuv_convert.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/yuv_opsin_convert.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/butteraugli/butteraugli.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/simd/targets.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/brotli.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik/color_management.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/encode_order.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/pik_codec_common.cc $(XFLIB_DIR)/L2/demos/pikEnc/host/host_dev.cc $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsalpha.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmscam02.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmscgats.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmscnvrt.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmserr.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsgamma.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsgmt.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmshalf.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsintrp.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsio0.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsio1.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmslut.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsmtrx.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsnamed.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsopt.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmspack.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmspcs.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsplugin.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsps2.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmssamp.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmssm.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmstypes.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsvirt.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmswtpnt.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/src/cmsxform.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lodepng/lodepng.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lodepng/lodepng_util.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/entropy_common.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fseU16.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_compress.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_decompress.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_compress.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_decompress.cpp $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_hq.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/brotli_bit_stream.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment_two_pass.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/dictionary_hash.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/encode.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/encoder_dict.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/literal_cost.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/memory.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/enc/utf8_util.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/common/transform.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/dec/bit_reader.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/dec/decode.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/dec/huffman.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/dec/state.c $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/decode_fuzzer.c 
+CXXFLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/pikEnc/ -I $(XFLIB_DIR)/L2/include/hw/pikEnc/kernel3/ -I $(XFLIB_DIR)/L2/demos/pikEnc/host -I $(XFLIB_DIR)/L2/demos/pikEnc/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lcms/include -I $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/brotli/c/include -I $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/lodepng -I $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib -I $(XFLIB_DIR)/L2/demos/pikEnc/host/third_party
+CXXFLAGS += -O3 
+
+EXE_NAME := host.exe
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
+
+HOST_ARGS :=  --xclbin $(BUILD_DIR)/pikEnc.xclbin images/small32x32.png test.pik --fast
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u200.cfg -g
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/L2/include/hw/pikEnc -I $(XFLIB_DIR)/L2/demos/pikEnc/kernel
+
+else 
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/L2/include/hw/pikEnc -I $(XFLIB_DIR)/L2/demos/pikEnc/kernel
+
+endif
+
+######################### binary container global settings ##########################
+VPP_FLAGS_kernel1Top +=  -D KERNEL_NAME=pikEncKernel1Top
+VPP_FLAGS_kernel1Top += --hls.clock 300000000:pikEncKernel1Top
+VPP_FLAGS_kernel2Top +=  -D KERNEL_NAME=pikEncKernel2Top --hls.pre_tcl $(CUR_DIR)/hls_pre.tcl
+VPP_FLAGS_kernel2Top += --hls.clock 300000000:pikEncKernel2Top
+VPP_FLAGS_kernel3Top +=  -D KERNEL_NAME=pikEncKernel3Top --hls.pre_tcl $(CUR_DIR)/hls_pre.tcl
+VPP_FLAGS_kernel3Top += --hls.clock 300000000:pikEncKernel3Top
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_pikEnc += --clock.defaultFreqHz 200000000
+else
+VPP_LDFLAGS_pikEnc += --kernel_frequency 180
+endif
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS += $(BUILD_DIR)/pikEnc.xclbin
+else
+BINARY_CONTAINERS += $(BUILD_DIR)/pikEnc_pkg.$(LINK_TARGET_FMT)
+BINARY_CONTAINERS_PKG += $(BUILD_DIR)/pikEnc.xclbin
+endif
+
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+$(TEMP_DIR)/pikEncKernel1Top.xo: $(XFLIB_DIR)/L2/demos/pikEnc/kernel/XAccPIKKernel1.cpp 
+	$(ECHO) "Compiling Kernel: pikEncKernel1Top"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_kernel1Top) $(VPP_FLAGS) -k pikEncKernel1Top -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
+BINARY_CONTAINER_pikEnc_OBJS += $(TEMP_DIR)/pikEncKernel1Top.xo
+$(TEMP_DIR)/pikEncKernel2Top.xo: $(XFLIB_DIR)/L2/demos/pikEnc/kernel/XAccPIKKernel2.cpp 
+	$(ECHO) "Compiling Kernel: pikEncKernel2Top"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_kernel2Top) $(VPP_FLAGS) -k pikEncKernel2Top -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
+BINARY_CONTAINER_pikEnc_OBJS += $(TEMP_DIR)/pikEncKernel2Top.xo
+$(TEMP_DIR)/pikEncKernel3Top.xo: $(XFLIB_DIR)/L2/demos/pikEnc/kernel/XAccPIKKernel3.cpp $(XFLIB_DIR)/L2/demos/pikEnc/kernel/kernel3/build_cluster.cpp $(XFLIB_DIR)/L2/demos/pikEnc/kernel/kernel3/ctrl_tokenize.cpp $(XFLIB_DIR)/L2/demos/pikEnc/kernel/kernel3/dc_tokenize.cpp $(XFLIB_DIR)/L2/demos/pikEnc/kernel/kernel3/dc_shrink.cpp $(XFLIB_DIR)/L2/demos/pikEnc/kernel/kernel3/build_table_encode_histo.cpp $(XFLIB_DIR)/L2/demos/pikEnc/kernel/kernel3/ac_tokenize.cpp $(XFLIB_DIR)/L2/demos/pikEnc/kernel/kernel3/ans.cpp $(XFLIB_DIR)/L2/demos/pikEnc/kernel/kernel3/kernel3_common.cpp 
+	$(ECHO) "Compiling Kernel: pikEncKernel3Top"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_kernel3Top) $(VPP_FLAGS) -k pikEncKernel3Top -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
+BINARY_CONTAINER_pikEnc_OBJS += $(TEMP_DIR)/pikEncKernel3Top.xo
+BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_pikEnc_OBJS)
+$(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
+	mkdir -p $(BUILD_DIR)
+	$(VPP) -l $(VPP_FLAGS) --temp_dir $(TEMP_DIR) --report_dir $(BUILD_REPORT_DIR)/pikEnc $(VPP_LDFLAGS)  $(VPP_LDFLAGS_pikEnc) $(AIE_LDFLAGS)   -o $@ $^
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_xrt
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+else
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_sysroot
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+############################## Preparing sdcard folder ##############################
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE := $(SYSROOT)/../../uImage
+else
+K_IMAGE := $(SYSROOT)/../../Image
+endif
+RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
+$(RUN_SCRIPT):
+	rm -rf $(RUN_SCRIPT)
+	@echo 'export LD_LIBRARY_PATH=/mnt:/tmp:$(LIBRARY_PATH)' >> $(RUN_SCRIPT)
+ifneq ($(filter sw_emu hw_emu, $(TARGET)),)
+	@echo 'export XCL_EMULATION_MODE=$(TARGET)' >> $(RUN_SCRIPT)
+endif
+	@echo 'export XILINX_VITIS=/mnt' >> $(RUN_SCRIPT)
+	@echo 'export XILINX_XRT=/usr' >> $(RUN_SCRIPT)
+	@echo 'if [ -f platform_desc.txt  ]; then' >> $(RUN_SCRIPT)
+	@echo '        cp platform_desc.txt /etc/xocl.txt' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo './$(EXE_NAME) $(PKG_HOST_ARGS)' >> $(RUN_SCRIPT)
+	@echo 'return_code=$$?' >> $(RUN_SCRIPT)
+	@echo 'if [ $$return_code -ne 0 ]; then' >> $(RUN_SCRIPT)
+	@echo '        echo "ERROR: Embedded host run failed, RC=$$return_code"' >> $(RUN_SCRIPT)
+	@echo 'else' >> $(RUN_SCRIPT)
+	@echo '        echo "INFO: TEST PASSED, RC=0"' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo 'echo "INFO: Embedded host run completed."' >> $(RUN_SCRIPT)
+	@echo 'exit $$return_code' >> $(RUN_SCRIPT)
+DATA_FILE := 
+DATA_DIR := 
+SD_FILES += $(RUN_SCRIPT)
+SD_FILES += $(EXE_FILE)
+SD_FILES += $(EMCONFIG)
+SD_FILES += xrt.ini
+SD_FILES += $(DATA_FILE)# where define DATAFILE in json
+SD_FILES_WITH_PREFIX = $(foreach sd_file,$(SD_FILES), $(if $(filter $(sd_file),$(wildcard $(sd_file))), --package.sd_file $(sd_file)))
+SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
+PACKAGE_FILES := $(BINARY_CONTAINERS)
+PACKAGE_FILES += $(AIE_CONTAINER)
+SD_CARD := $(CUR_DIR)/package_$(TARGET)
+vck190_dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+	@echo "Generating sd_card folder...."
+	mkdir -p $(SD_CARD)
+	chmod a+rx $(BUILD_DIR)/run_script.sh
+ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+vck190_dfx_hw := true
+endif
+endif
+ifeq ($(vck190_dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
+
+.PHONY: sd_card
+sd_card: $(SD_CARD)
+endif
+############################## Setting Essential Checks and Building Rules ##############################
+RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
+RUN_DEPS += $(SD_CARD)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS) 
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#hw
+ifeq ($(TARGET), hw)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	
+else
+	$(ECHO) "Please copy the content of sd_card folder and data to an SD Card and run on the board"
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: clean cleanall emconfig
+emconfig: $(EMCONFIG)
+
+.PHONY: host
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+
+.PHONY: xclbin
+ifeq ($(HOST_ARCH), x86)
+xclbin:  check_vpp check_xrt $(BINARY_CONTAINERS) 
+else
+xclbin:  check_vpp check_sysroot $(BINARY_CONTAINERS) 
+endif
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
+
+clean: cleanh
diff --git a/codec/L2/demos/pikEnc/README.md b/codec/L2/demos/pikEnc/README.md
new file mode 100644
index 0000000000..fc63aa8fea
--- /dev/null
+++ b/codec/L2/demos/pikEnc/README.md
@@ -0,0 +1,107 @@
+PIK Encoder
+===============
+
+PIK Encoder example resides in ``L2/demos/pikEnc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Executable Usage
+----------------
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in [here](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#building). For getting the design,
+
+```
+   cd L2/demos/pikEnc
+```
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+```
+   make run TARGET=hw DEVICE=xilinx_u200_xdma_201830_2
+```   
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+```
+   ./build_dir.hw.xilinx_u200_xdma_201830_2/host.exe --xclbin build_dir.hw.xilinx_u200_xdma_201830_2/pikEnc.xclbin PNGFilePath PIKFilePath --fast
+```   
+
+PIK Encoder Input Arguments:
+
+```
+   Usage: host.exe -[-xclbin]
+          --xclbin:         the kernel name
+          --fast:           the encoding mode
+          PNGFilePath:      the path to the input *.PNG
+          PIKFilePath:  the path to the output *.pik
+```          
+
+Note: Default arguments are set in Makefile, you can use other [pictures](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#pictures) listed in the table.
+
+* **Example output(Step 4)** 
+
+```
+   Found Platform
+   Platform Name: Xilinx
+   INFO: Found Device=xilinx_u200_xdma_201830_2
+   INFO: Importing build_dir.hw.xilinx_u200_xdma_201830_2/pikEnc.xclbin
+   Loading: 'build_dir.hw.xilinx_u200_xdma_201830_2/pikEnc.xclbin'
+   INFO: Kernel has been created
+   INFO: Finish kernel setup
+   ...
+
+   INFO: Finish kernel execution
+   INFO: Finish E2E execution
+   INFO: Data transfer from host to device: 100 us
+   INFO: Data transfer from device to host: 20 us
+   INFO: Average kernel execution per run: 600 ms
+```
+
+Profiling
+---------
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+
+##### Table 1 IP resources for PIK encoder 
+
+|      IP       |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   |
+|---------------|----------|----------|----------|----------|---------|
+|    Kernel1    |    25    |    93    |    568   |   125920 |  97441  |
+|    Kernel2    |    411   |    252   |    1614  |   309222 |  262543 |
+|    Kernel3    |    178   |    128   |    216   |   114845 |  90011  |
+
+
+##### Table 2 PIK Encoder Performance
+      
+|   Size\Time(ms)  |  Kernel1  |  Kernel2  |  Kernel3  |
+|------------------|-----------|-----------|-----------|
+|     512x512      |    16     |    14     |     7     |
+|    1024x1024     |    52     |    48     |    24     |
+|    2048x2048     |    191    |    180    |    86     |
+
+## License
+
+Licensed using the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
+
+    Copyright 2022 Xilinx, Inc.
+    
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+    Copyright 2022 Xilinx, Inc.
+
+
diff --git a/codec/L2/demos/pikEnc/check.sh b/codec/L2/demos/pikEnc/check.sh
new file mode 100755
index 0000000000..32b92f3075
--- /dev/null
+++ b/codec/L2/demos/pikEnc/check.sh
@@ -0,0 +1 @@
+echo "8a90e20a8819d551608cd5ed757b2970  ./test.pik" | md5sum -c -
diff --git a/codec/L2/demos/pikEnc/conn_u200.cfg b/codec/L2/demos/pikEnc/conn_u200.cfg
new file mode 100755
index 0000000000..6d5103b722
--- /dev/null
+++ b/codec/L2/demos/pikEnc/conn_u200.cfg
@@ -0,0 +1,41 @@
+[hls]
+#pre_tcl=hls_pre.tcl
+
+[connectivity]
+# pikEncKernel 1
+sp=pikEncKernel1Top_1.m_axi_gmem0_0:DDR[0]  
+sp=pikEncKernel1Top_1.m_axi_gmem0_1:DDR[0] 
+sp=pikEncKernel1Top_1.m_axi_gmem1_0:DDR[1] 
+sp=pikEncKernel1Top_1.m_axi_gmem1_1:DDR[1] 
+sp=pikEncKernel1Top_1.m_axi_gmem1_2:DDR[1] 
+#slr=pikEncKernel1Top_1:SLR1
+
+# pikEncKernel 2
+sp=pikEncKernel2Top_1.m_axi_gmem0_0:DDR[1]
+sp=pikEncKernel2Top_1.m_axi_gmem0_1:DDR[1]
+sp=pikEncKernel2Top_1.m_axi_gmem0_2:DDR[1]
+sp=pikEncKernel2Top_1.m_axi_gmem0_3:DDR[1]
+sp=pikEncKernel2Top_1.m_axi_gmem1_0:DDR[2]
+sp=pikEncKernel2Top_1.m_axi_gmem1_1:DDR[2]
+sp=pikEncKernel2Top_1.m_axi_gmem1_2:DDR[2]
+sp=pikEncKernel2Top_1.m_axi_gmem1_3:DDR[2]
+sp=pikEncKernel2Top_1.m_axi_gmem1_4:DDR[2]
+sp=pikEncKernel2Top_1.m_axi_gmem1_5:DDR[2]
+#slr=pikEncKernel2Top_1:SLR0
+
+# pikEncKernel 3
+sp=pikEncKernel3Top_1.m_axi_gmem0_0:DDR[2] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_1:DDR[2] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_2:DDR[2] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_3:DDR[2] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_4:DDR[2] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_5:DDR[2]
+sp=pikEncKernel3Top_1.m_axi_gmem0_6:DDR[2] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_0:DDR[3] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_1:DDR[3] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_2:DDR[3] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_3:DDR[3] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_4:DDR[3] 
+#slr=pikEncKernel3Top_1:SLR2
+
+
diff --git a/codec/L2/demos/pikEnc/conn_u280.cfg b/codec/L2/demos/pikEnc/conn_u280.cfg
new file mode 100755
index 0000000000..056eb280d0
--- /dev/null
+++ b/codec/L2/demos/pikEnc/conn_u280.cfg
@@ -0,0 +1,41 @@
+[hls]
+#pre_tcl=hls_pre.tcl
+
+[connectivity]
+# pikEncKernel 1
+sp=pikEncKernel1Top_1.m_axi_gmem0_0:DDR[0]  
+sp=pikEncKernel1Top_1.m_axi_gmem0_1:DDR[0] 
+sp=pikEncKernel1Top_1.m_axi_gmem1_0:DDR[1] 
+sp=pikEncKernel1Top_1.m_axi_gmem1_1:DDR[1] 
+sp=pikEncKernel1Top_1.m_axi_gmem1_2:DDR[1] 
+#slr=pikEncKernel1Top_1:SLR1
+
+# pikEncKernel 2
+sp=pikEncKernel2Top_1.m_axi_gmem0_0:DDR[1]
+sp=pikEncKernel2Top_1.m_axi_gmem0_1:DDR[1]
+sp=pikEncKernel2Top_1.m_axi_gmem0_2:DDR[1]
+sp=pikEncKernel2Top_1.m_axi_gmem0_3:DDR[1]
+sp=pikEncKernel2Top_1.m_axi_gmem1_0:DDR[0]
+sp=pikEncKernel2Top_1.m_axi_gmem1_1:DDR[0]
+sp=pikEncKernel2Top_1.m_axi_gmem1_2:DDR[0]
+sp=pikEncKernel2Top_1.m_axi_gmem1_3:DDR[0]
+sp=pikEncKernel2Top_1.m_axi_gmem1_4:DDR[0]
+sp=pikEncKernel2Top_1.m_axi_gmem1_5:DDR[0]
+#slr=pikEncKernel2Top_1:SLR0
+
+# pikEncKernel 3
+sp=pikEncKernel3Top_1.m_axi_gmem0_0:DDR[0] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_1:DDR[0] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_2:DDR[0] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_3:DDR[0] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_4:DDR[0] 
+sp=pikEncKernel3Top_1.m_axi_gmem0_5:DDR[0]
+sp=pikEncKernel3Top_1.m_axi_gmem0_6:DDR[0] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_0:DDR[1] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_1:DDR[1] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_2:DDR[1] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_3:DDR[1] 
+sp=pikEncKernel3Top_1.m_axi_gmem1_4:DDR[1] 
+#slr=pikEncKernel3Top_1:SLR2
+
+
diff --git a/codec/L2/demos/pikEnc/description.json b/codec/L2/demos/pikEnc/description.json
new file mode 100644
index 0000000000..6cf33f8dbd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/description.json
@@ -0,0 +1,407 @@
+{
+    "gui": false, 
+    "name": "Xilinx PIK Encoder Demo", 
+    "description": "This example is based on Google's PIK, which was chosen as the base framework for JPEG XL. The pikEnc is based on the 'fast mode' of PIK which can provide better encoding efficnty than most of other still image encoding methods. The pikEnc is based on Xilinx HLS design methodology and optimized for FPGA arthitecture. It can proved higher throughput and lower latency compared to software-based solutions", 
+    "flow": "vitis", 
+    "platform_allowlist": [
+        "u200"
+    ], 
+    "platform_blocklist": [
+        "zc"
+    ], 
+    "platform_properties": {
+        "u200": {
+            "v++": {
+                "compiler": {
+                    "clflags": [
+                        "--config PROJECT/conn_u200.cfg -g"
+                    ]
+                }
+            }
+        }
+    }, 
+    "data": [
+        "./data"
+    ], 
+    "launch": [
+        {
+            "cmd_args": " --xclbin BUILD/pikEnc.xclbin images/small32x32.png test.pik --fast", 
+            "name": "generic launch for all flows"
+        }
+    ],
+    "post_launch": [
+        {
+            "launch_cmd": [
+            ]
+        }
+    ], 
+    "host": {
+        "host_exe": "host.exe", 
+        "compiler": {
+            "sources": [
+                "LIB_DIR/L2/demos/pikEnc/host/pik/cpik_main.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/ac_predictions.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/ac_strategy.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/adaptive_quantization.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/adaptive_reconstruction.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/alpha.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/ans_common.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/ans_decode.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/ans_encode.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/ar_control_field.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/arch_specific.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/bilinear_transform.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/block_dictionary.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/butteraugli_comparator.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/butteraugli_distance.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/cache_aligned.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/chroma_from_luma.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/cmdline.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/codec_impl.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/codec_png.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/codec_pnm.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/color_correlation.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/color_encoding.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/compressed_dc.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/compressed_image.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/context_map_decode.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/context_map_encode.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/cpik.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/data_parallel.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/dc_predictor.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/dct.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/dct_util.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/deconvolve.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/descriptive_statistics.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/detect_dots.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/entropy_coder.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/epf.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/epf_target.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/external_image.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/gaborish.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/gauss_blur.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/gradient_map.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/headers.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/huffman_decode.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/huffman_encode.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/image.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/lehmer_code.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/linalg.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/lossless16.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/lossless8.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/lossless_entropy.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/metadata.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/noise.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/opsin_image.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/opsin_inverse.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/opsin_params.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/os_specific.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/padded_bytes.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/pik.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/pik_frame.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/pik_info.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/quant_weights.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/quantizer.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/saliency_map.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/single_image_handler.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/status.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/upscaler.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/yuv_convert.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/yuv_opsin_convert.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/butteraugli/butteraugli.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/simd/targets.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/brotli.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik/color_management.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/encode_order.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/pik_codec_common.cc",
+                "LIB_DIR/L2/demos/pikEnc/host/host_dev.cc", 
+                "LIB_DIR/ext/xcl2/xcl2.cpp",
+
+
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsalpha.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmscam02.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmscgats.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmscnvrt.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmserr.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsgamma.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsgmt.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmshalf.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsintrp.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsio0.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsio1.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmslut.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsmtrx.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsnamed.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsopt.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmspack.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmspcs.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsplugin.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsps2.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmssamp.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmssm.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmstypes.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsvirt.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmswtpnt.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/src/cmsxform.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lodepng/lodepng.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lodepng/lodepng_util.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/entropy_common.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fseU16.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_compress.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_decompress.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_compress.cpp",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_decompress.cpp",
+
+
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_hq.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/brotli_bit_stream.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment_two_pass.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/dictionary_hash.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/encode.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/encoder_dict.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/literal_cost.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/memory.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/enc/utf8_util.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/common/transform.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/dec/bit_reader.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/dec/decode.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/dec/huffman.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/dec/state.c",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/decode_fuzzer.c"
+
+            ], 
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/L2/include/hw/pikEnc/", 
+                "LIB_DIR/L2/include/hw/pikEnc/kernel3/", 
+                "LIB_DIR/L2/demos/pikEnc/host", 
+                "LIB_DIR/L2/demos/pikEnc/kernel", 
+                "LIB_DIR/ext/xcl2",
+
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lcms/include", 
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/brotli/c/include", 
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/lodepng",
+                "LIB_DIR/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib", 
+                "LIB_DIR/L2/demos/pikEnc/host/third_party"
+            ], 
+            "options": "-O3 "
+        }
+    }, 
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/L2/include/hw", 
+                "LIB_DIR/L2/include/hw/pikEnc", 
+                "LIB_DIR/L2/demos/pikEnc/kernel"
+            ]
+        } 
+    }, 
+    "containers": [
+        {
+            "name": "pikEnc",
+            "accelerators": [
+                {
+                    "location": "LIB_DIR/L2/demos/pikEnc/kernel/XAccPIKKernel1.cpp", 
+                    "frequency": 300.0, 
+                    "clflags": " -D KERNEL_NAME=pikEncKernel1Top", 
+                    "name": "pikEncKernel1Top", 
+                    "num_compute_units": 1, 
+                    "compute_units": [
+                        {
+                            "name": "pikEncKernel1Top", 
+                            "arguments": [
+                                {
+                                    "name": "gmem0_0", 
+                                    "memory": "DDR[0]"
+                                }, 
+                                {
+                                    "name": "gmem0_1", 
+                                    "memory": "DDR[0]"
+                                }, 
+                                {
+                                    "name": "gmem1_0", 
+                                    "memory": "DDR[1]"
+                                }, 
+                                {
+                                    "name": "gmem1_1", 
+                                    "memory": "DDR[1]"
+                                }, 
+                                {
+                                    "name": "gmem1_2", 
+                                    "memory": "DDR[1]"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "location": "LIB_DIR/L2/demos/pikEnc/kernel/XAccPIKKernel2.cpp", 
+                    "frequency": 300.0, 
+                    "clflags":" -D KERNEL_NAME=pikEncKernel2Top --hls.pre_tcl $(CUR_DIR)/hls_pre.tcl",
+                    "name": "pikEncKernel2Top", 
+                    "num_compute_units": 1, 
+                    "compute_units": [
+                        {
+                            "name": "pikEncKernel2Top", 
+                            "arguments": [
+                                {
+                                    "name": "gmem0_0", 
+                                    "memory": "DDR[1]"
+                                }, 
+                                {
+                                    "name": "gmem0_1", 
+                                    "memory": "DDR[1]"
+                                },
+                                {
+                                    "name": "gmem0_2", 
+                                    "memory": "DDR[1]"
+                                }, 
+                                {
+                                    "name": "gmem0_3", 
+                                    "memory": "DDR[1]"
+                                },
+                                {
+                                    "name": "gmem1_0", 
+                                    "memory": "DDR[2]"
+                                }, 
+                                {
+                                    "name": "gmem1_1", 
+                                    "memory": "DDR[2]"
+                                }, 
+                                {
+                                    "name": "gmem1_2", 
+                                    "memory": "DDR[2]"
+                                },
+                                {
+                                    "name": "gmem1_3", 
+                                    "memory": "DDR[2]"
+                                }, 
+                                {
+                                    "name": "gmem1_4", 
+                                    "memory": "DDR[2]"
+                                },
+                                {
+                                    "name": "gmem1_5", 
+                                    "memory": "DDR[2]"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "location": "LIB_DIR/L2/demos/pikEnc/kernel/XAccPIKKernel3.cpp",
+                    "files": [
+                        "LIB_DIR/L2/demos/pikEnc/kernel/kernel3/kernel3_common.cpp",
+                        "LIB_DIR/L2/demos/pikEnc/kernel/kernel3/ans.cpp",
+                        "LIB_DIR/L2/demos/pikEnc/kernel/kernel3/ac_tokenize.cpp",
+                        "LIB_DIR/L2/demos/pikEnc/kernel/kernel3/build_table_encode_histo.cpp",
+                        "LIB_DIR/L2/demos/pikEnc/kernel/kernel3/dc_shrink.cpp",
+                        "LIB_DIR/L2/demos/pikEnc/kernel/kernel3/dc_tokenize.cpp",
+                        "LIB_DIR/L2/demos/pikEnc/kernel/kernel3/ctrl_tokenize.cpp",
+                        "LIB_DIR/L2/demos/pikEnc/kernel/kernel3/build_cluster.cpp"
+                    ],
+                    "frequency": 300.0, 
+                    "clflags": " -D KERNEL_NAME=pikEncKernel3Top --hls.pre_tcl $(CUR_DIR)/hls_pre.tcl", 
+                    "name": "pikEncKernel3Top", 
+                    "num_compute_units": 1, 
+                    "compute_units": [
+                        {
+                            "name": "pikEncKernel3Top", 
+                            "arguments": [
+                                {
+                                    "name": "gmem0_0", 
+                                    "memory": "DDR[2]"
+                                }, 
+                                {
+                                    "name": "gmem0_1", 
+                                    "memory": "DDR[2]"
+                                },
+                                {
+                                    "name": "gmem0_2", 
+                                    "memory": "DDR[2]"
+                                }, 
+                                {
+                                    "name": "gmem0_3", 
+                                    "memory": "DDR[2]"
+                                },
+                                {
+                                    "name": "gmem0_4", 
+                                    "memory": "DDR[2]"
+                                },
+                                {
+                                    "name": "gmem0_5", 
+                                    "memory": "DDR[2]"
+                                }, 
+                                {
+                                    "name": "gmem0_6", 
+                                    "memory": "DDR[2]"
+                                },
+                                {
+                                    "name": "gmem1_0", 
+                                    "memory": "DDR[3]"
+                                }, 
+                                {
+                                    "name": "gmem1_1", 
+                                    "memory": "DDR[3]"
+                                },
+                                {
+                                    "name": "gmem1_2", 
+                                    "memory": "DDR[3]"
+                                },
+                                {
+                                    "name": "gmem1_3", 
+                                    "memory": "DDR[3]"
+                                }, 
+                                {
+                                    "name": "gmem1_4", 
+                                    "memory": "DDR[3]"
+                                } 
+                            ]
+                        }
+                    ]
+                }
+            ], 
+            "frequency": 180 
+        }
+    ], 
+    "testinfo": {
+        "disable": false, 
+        "jobs": [
+            {
+                "index": 0, 
+                "dependency": [], 
+                "env": "", 
+                "cmd": "", 
+                "max_memory_MB": {
+                    "vitis_hw_build": 81920, 
+                    "vitis_hw_emu": 40960, 
+                    "vitis_sw_emu": 10240, 
+                    "vitis_hw_run": 10240
+                }, 
+                "max_time_min": {
+                    "vitis_hw_build": 3200, 
+                    "vitis_hw_emu": 2400, 
+                    "vitis_sw_emu": 480, 
+                    "vitis_hw_run": 10
+                }
+            }
+        ], 
+        "targets": [
+            "vitis_sw_emu", 
+            "vitis_hw_emu", 
+            "vitis_hw"
+        ], 
+        "category": "canary"
+    }
+}
diff --git a/codec/L2/demos/pikEnc/hls_pre.tcl b/codec/L2/demos/pikEnc/hls_pre.tcl
new file mode 100644
index 0000000000..7f571596d6
--- /dev/null
+++ b/codec/L2/demos/pikEnc/hls_pre.tcl
@@ -0,0 +1,2 @@
+#config_dataflow -override_user_fifo_depth 32 -task_level_fifo_depth 32 -scalar_fifo_depth 32 -start_fifo_depth 32
+config_compile -enable_auto_rewind=false
diff --git a/codec/L2/demos/pikEnc/host/encode_order.cc b/codec/L2/demos/pikEnc/host/encode_order.cc
new file mode 100644
index 0000000000..ae919e4eee
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/encode_order.cc
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/encode_order.hpp"
+
+int hls_FindIndexAndRemove(int val, int* s, int len) {
+    int idx = 0;
+    for (int i = 0; i < len; ++i) {
+        if (s[i] == val) {
+            s[i] = -1;
+            break;
+        } else if (s[i] != -1) {
+            ++idx;
+        }
+    }
+    return idx;
+}
+
+void cnt_nz_beforeVal(int32_t order_zigzag[64], int32_t std[64], int32_t lehmer[64], uint8_t& end) {
+#pragma HLS INLINE OFF
+
+    uint8_t tmp_end = 0;
+    for (int i = 0; i < 64; ++i) {
+#pragma HLS PIPELINE II = 1
+
+        uint8_t val = order_zigzag[i]; // 0,2,1,4,3,5,8,7,9,
+        uint8_t cnt = 0;
+        for (int j = 0; j < 64; ++j) {
+#pragma HLS UNROLL
+            if (j < val) {
+                cnt += ((std[j] != 0) ? 1 : 0);
+            }
+        }
+        std[val] = 0; // clear i and cnt nz before
+        lehmer[i] = cnt;
+        if (cnt != 0) {
+            tmp_end = i;
+        }
+    }
+    end = tmp_end;
+}
+
+void hls_EncodeCoeffOrder(hls::stream<int>& strm_order,
+                          int& num_bits, // pos
+                          int& num,
+                          hls::stream<nbits_t>& strm_nbits,
+                          hls::stream<uint16_t>& strm_bits) {
+#pragma HLS INLINE OFF
+
+    num_bits = 0;
+    num = 0;
+
+    int32_t order_zigzag[64];
+    _XF_IMAGE_PRINT("start lehmercode:\n");
+
+    for (int i = 0; i < 64; ++i) {
+#pragma HLS PIPELINE II = 1
+        int tmp = strm_order.read();
+        order_zigzag[i] = hls_kNaturalCoeffOrderLut8[tmp];
+        _XF_IMAGE_PRINT("%d,", (int)(order_zigzag[i]));
+    }
+    _XF_IMAGE_PRINT("\n");
+
+    int32_t lehmer[64];
+    int32_t std[64];
+    for (int i = 0; i < 64; ++i) {
+#pragma HLS PIPELINE II = 1
+        std[i] = i;
+    }
+
+    uint8_t end = 63;
+    cnt_nz_beforeVal(order_zigzag, std, lehmer, end);
+
+    for (int i = 0; i < 64; ++i) {
+        _XF_IMAGE_PRINT("%d,", (int)(lehmer[i]));
+    }
+    _XF_IMAGE_PRINT("\n");
+
+    for (int32_t i = 1; i <= end; ++i) {
+#pragma HLS UNROLL
+        ++lehmer[i];
+    }
+
+    for (int32_t i = 0; i < 64; i += hls_kCoeffOrderCodeSpan) {
+        const int32_t start = (i > 0) ? i : 1;
+        const int32_t end = i + hls_kCoeffOrderCodeSpan;
+        int32_t has_non_zero = 0;
+
+        for (int32_t j = start; j < end; ++j) {
+#pragma HLS UNROLL
+            has_non_zero |= lehmer[j];
+        }
+        if (!has_non_zero) { // all zero in the span -> escape
+            hls_WriteBits_strm(1, 0, num_bits, num, strm_nbits, strm_bits);
+        } else {
+            hls_WriteBits_strm(1, 1, num_bits, num, strm_nbits, strm_bits);
+
+            for (int32_t j = start; j < end; ++j) {
+#pragma HLS PIPELINE II = 1
+
+                // merge
+                int32_t v;
+                assert(lehmer[j] <= 64);
+                for (v = lehmer[j]; v >= 7; v -= 7) {
+                    hls_WriteBits_strm(3, 7, num_bits, num, strm_nbits, strm_bits);
+                }
+                hls_WriteBits_strm(3, v, num_bits, num, strm_nbits, strm_bits);
+            }
+        }
+    }
+}
diff --git a/codec/L2/demos/pikEnc/host/host_dev.cc b/codec/L2/demos/pikEnc/host/host_dev.cc
new file mode 100755
index 0000000000..cd7629ee3d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/host_dev.cc
@@ -0,0 +1,604 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <sys/time.h>
+#include "host_dev.hpp"
+
+#ifndef HLS_TEST
+
+#include "xcl2.hpp"
+#include "xf_utils_sw/logger.hpp"
+
+#define XCL_BANK(n) (((unsigned int)(n)) | XCL_MEM_TOPOLOGY)
+
+#define XCL_BANK0 XCL_BANK(0)
+#define XCL_BANK1 XCL_BANK(1)
+#define XCL_BANK2 XCL_BANK(2)
+#define XCL_BANK3 XCL_BANK(3)
+#define XCL_BANK4 XCL_BANK(4)
+#define XCL_BANK5 XCL_BANK(5)
+#define XCL_BANK6 XCL_BANK(6)
+#define XCL_BANK7 XCL_BANK(7)
+#define XCL_BANK8 XCL_BANK(8)
+#define XCL_BANK9 XCL_BANK(9)
+#define XCL_BANK10 XCL_BANK(10)
+#define XCL_BANK11 XCL_BANK(11)
+#define XCL_BANK12 XCL_BANK(12)
+#define XCL_BANK13 XCL_BANK(13)
+#define XCL_BANK14 XCL_BANK(14)
+#define XCL_BANK15 XCL_BANK(15)
+#define XCL_BANK16 XCL_BANK(16)
+#define XCL_BANK17 XCL_BANK(17)
+#define XCL_BANK18 XCL_BANK(18)
+#define XCL_BANK19 XCL_BANK(19)
+#define XCL_BANK20 XCL_BANK(20)
+#define XCL_BANK21 XCL_BANK(21)
+#define XCL_BANK22 XCL_BANK(22)
+#define XCL_BANK23 XCL_BANK(23)
+#define XCL_BANK24 XCL_BANK(24)
+#define XCL_BANK25 XCL_BANK(25)
+#define XCL_BANK26 XCL_BANK(26)
+#define XCL_BANK27 XCL_BANK(27)
+#define XCL_BANK28 XCL_BANK(28)
+#define XCL_BANK29 XCL_BANK(29)
+#define XCL_BANK30 XCL_BANK(30)
+#define XCL_BANK31 XCL_BANK(31)
+#define XCL_BANK32 XCL_BANK(32)
+#define XCL_BANK33 XCL_BANK(33)
+
+unsigned long diff(const struct timeval* newTime, const struct timeval* oldTime) {
+    return (newTime->tv_sec - oldTime->tv_sec) * 1000000 + (newTime->tv_usec - oldTime->tv_usec);
+}
+
+template <typename T>
+T* aligned_alloc(std::size_t num) {
+    void* ptr = NULL;
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+    return reinterpret_cast<T*>(ptr);
+}
+
+void host_func(std::string xclbinPath,
+               float* dataDDR,
+               ap_uint<AXI_SZ> k1_config[MAX_NUM_CONFIG],
+               ap_uint<AXI_SZ> k2_config[MAX_NUM_CONFIG],
+               ap_uint<AXI_SZ> k3_config[MAX_NUM_CONFIG],
+
+               ap_uint<AXI_SZ> cmap[AXI_CMAP],
+               ap_uint<AXI_SZ> order[MAX_NUM_ORDER],
+               ap_uint<AXI_SZ> quant_field[AXI_QF],
+
+               int len_dc_histo[2 * MAX_DC_GROUP],
+               int len_dc[2 * MAX_DC_GROUP],
+               ap_uint<AXI_SZ> dc_histo_code_out[2 * MAX_DC_GROUP * MAX_DC_HISTO_SIZE],
+               ap_uint<AXI_SZ> dc_code_out[2 * MAX_DC_GROUP * MAX_DC_SIZE],
+
+               int len_ac_histo[MAX_AC_GROUP],
+               int len_ac[MAX_AC_GROUP],
+               ap_uint<AXI_SZ> ac_histo_code_out[MAX_AC_GROUP * MAX_AC_HISTO_SIZE],
+               ap_uint<AXI_SZ> ac_code_out[MAX_AC_GROUP * MAX_AC_SIZE]) {
+    xf::common::utils_sw::Logger logger(std::cout, std::cerr);
+    cl_int fail;
+
+    struct timeval start_time; // End to end time clock start
+    gettimeofday(&start_time, 0);
+
+    // platform related operations
+    std::vector<cl::Device> devices = xcl::get_xil_devices();
+    cl::Device device = devices[0];
+
+    // Creating Context and Command Queue for selected Device
+    cl::Context context(device, NULL, NULL, NULL, &fail);
+    logger.logCreateContext(fail);
+    cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &fail);
+    logger.logCreateCommandQueue(fail);
+    std::string devName = device.getInfo<CL_DEVICE_NAME>();
+    printf("INFO: Found Device=%s\n", devName.c_str());
+    cl::Program::Binaries xclBins = xcl::import_binary_file(xclbinPath);
+
+    devices.resize(1);
+    cl::Program program(context, devices, xclBins, NULL, &fail);
+    logger.logCreateProgram(fail);
+
+    int repInt = 1;
+    // create kernels
+    std::vector<cl::Kernel> pik_kernel1(repInt);
+    std::vector<cl::Kernel> pik_kernel2(repInt);
+    std::vector<cl::Kernel> pik_kernel3(repInt);
+    for (int i = 0; i < repInt; i++) {
+        pik_kernel1[i] = cl::Kernel(program, "pikEncKernel1Top", &fail);
+        logger.logCreateKernel(fail);
+        pik_kernel2[i] = cl::Kernel(program, "pikEncKernel2Top", &fail);
+        logger.logCreateKernel(fail);
+        pik_kernel3[i] = cl::Kernel(program, "pikEncKernel3Top", &fail);
+        logger.logCreateKernel(fail);
+    }
+    std::cout << "INFO: kernel has been created" << std::endl;
+
+    // declare map of host buffers
+    std::cout << "kernel config size:" << MAX_NUM_CONFIG << std::endl;
+    std::cout << "buf size:" << k2_config[8] << std::endl;
+    std::cout << "ac size:" << k2_config[8] << std::endl;
+    std::cout << "dc size:" << k2_config[24] << std::endl;
+    std::cout << "acs size:" << k2_config[13] << std::endl;
+    std::cout << "cmap size:" << k2_config[10] << std::endl;
+    std::cout << "order size:" << k2_config[23] << std::endl;
+    std::cout << "quant size:" << k2_config[15] << std::endl;
+    std::cout << "ac_histo size:" << k3_config[12] * MAX_AC_HISTO_SIZE << std::endl;
+    std::cout << "dc_histo size:" << 2 * k3_config[13] * MAX_DC_HISTO_SIZE << std::endl;
+    std::cout << "ac_code size:" << k3_config[12] * MAX_AC_SIZE << std::endl;
+    std::cout << "dc_code size:" << 2 * k3_config[13] * MAX_DC_SIZE << std::endl;
+
+    ap_uint<32>* hb_config1 = aligned_alloc<ap_uint<32> >(MAX_NUM_CONFIG);
+    ap_uint<32>* hb_config2 = aligned_alloc<ap_uint<32> >(MAX_NUM_CONFIG);
+    ap_uint<32>* hb_config3 = aligned_alloc<ap_uint<32> >(MAX_NUM_CONFIG);
+    float* hb_data_in = aligned_alloc<float>(BUF_DEPTH);
+
+    ap_uint<32>* hb_buf_out = aligned_alloc<ap_uint<32> >(k2_config[8]);
+    ap_uint<32>* hb_qf = aligned_alloc<ap_uint<32> >(k2_config[9]);
+    ap_uint<32>* hb_cmap = aligned_alloc<ap_uint<32> >(k2_config[10]);
+
+    ap_uint<32>* hb_ac = aligned_alloc<ap_uint<32> >(k2_config[8]);
+    ap_uint<32>* hb_dc = aligned_alloc<ap_uint<32> >(k2_config[24]);
+    ap_uint<32>* hb_order = aligned_alloc<ap_uint<32> >(k2_config[23]);
+    ap_uint<32>* hb_strategy = aligned_alloc<ap_uint<32> >(k2_config[13]);
+    ap_uint<32>* hb_block = aligned_alloc<ap_uint<32> >(k2_config[14]);
+    ap_uint<32>* hb_quant = aligned_alloc<ap_uint<32> >(k2_config[15]);
+
+    ap_uint<32>* hb_histo_cfg = aligned_alloc<ap_uint<32> >(4 * k3_config[13] + 2 * k3_config[12]);
+    ap_uint<32>* hb_dc_histo = aligned_alloc<ap_uint<32> >(2 * k3_config[13] * MAX_DC_HISTO_SIZE);
+    ap_uint<32>* hb_dc_code = aligned_alloc<ap_uint<32> >(2 * k3_config[13] * MAX_DC_SIZE);
+    ap_uint<32>* hb_ac_histo = aligned_alloc<ap_uint<32> >(k3_config[12] * MAX_AC_HISTO_SIZE);
+    ap_uint<32>* hb_ac_code = aligned_alloc<ap_uint<32> >(k3_config[12] * MAX_AC_SIZE);
+
+    for (int j = 0; j < MAX_NUM_CONFIG; j++) {
+        hb_config1[j] = k1_config[j];
+        hb_config2[j] = k2_config[j];
+        hb_config3[j] = k3_config[j];
+    }
+
+    for (int j = 0; j < BUF_DEPTH; j++) hb_data_in[j] = dataDDR[j];
+
+    for (int j = 0; j < k2_config[8]; j++) hb_buf_out[j] = 0;
+
+    for (int j = 0; j < k2_config[8]; j++) {
+        hb_ac[j] = 0;
+    }
+
+    for (int j = 0; j < k2_config[24]; j++) {
+        hb_dc[j] = 0;
+    }
+
+    for (int j = 0; j < k2_config[13]; j++) {
+        hb_strategy[j] = 0;
+        hb_block[j] = 0;
+        hb_quant[j] = 0;
+    }
+
+    for (int j = 0; j < k2_config[10]; j++) {
+        hb_cmap[j] = 0;
+    }
+
+    for (int j = 0; j < k2_config[23]; j++) hb_order[j] = 0;
+
+    for (int j = 0; j < 4 * k3_config[13] + 2 * k3_config[12]; j++) {
+        hb_histo_cfg[j] = 0;
+    }
+
+    for (int j = 0; j < 2 * k3_config[13] * MAX_DC_HISTO_SIZE; j++) {
+        hb_dc_histo[j] = 0;
+    }
+
+    for (int j = 0; j < 2 * k3_config[13] * MAX_DC_SIZE; j++) {
+        hb_dc_code[j] = 0;
+    }
+
+    for (int j = 0; j < k3_config[12] * MAX_AC_HISTO_SIZE; j++) {
+        hb_ac_histo[j] = 0;
+    }
+
+    for (int j = 0; j < k3_config[12] * MAX_AC_SIZE; j++) {
+        hb_ac_code[j] = 0;
+    }
+
+    std::vector<cl_mem_ext_ptr_t> mext_o(18);
+    mext_o[0] = {XCL_BANK(0), hb_config1, 0};
+    mext_o[1] = {XCL_BANK(0), hb_data_in, 0};
+    mext_o[2] = {XCL_BANK(1), hb_buf_out, 0};
+    mext_o[3] = {XCL_BANK(1), hb_qf, 0};
+    mext_o[4] = {XCL_BANK(1), hb_cmap, 0};
+
+    mext_o[5] = {XCL_BANK(1), hb_config2, 0};
+    mext_o[6] = {XCL_BANK(2), hb_ac, 0};
+    mext_o[7] = {XCL_BANK(2), hb_dc, 0};
+    mext_o[8] = {XCL_BANK(2), hb_order, 0};
+    mext_o[9] = {XCL_BANK(2), hb_strategy, 0};
+    mext_o[10] = {XCL_BANK(2), hb_block, 0};
+    mext_o[11] = {XCL_BANK(2), hb_quant, 0};
+
+    mext_o[12] = {XCL_BANK(2), hb_config3, 0};
+    mext_o[13] = {XCL_BANK(3), hb_histo_cfg, 0};
+    mext_o[14] = {XCL_BANK(3), hb_ac_histo, 0};
+    mext_o[15] = {XCL_BANK(3), hb_ac_code, 0};
+    mext_o[16] = {XCL_BANK(3), hb_dc_histo, 0};
+    mext_o[17] = {XCL_BANK(3), hb_dc_code, 0};
+
+    // create device buffer and map dev buf to host buf
+    cl::Buffer db_conf1, db_buf_in, db_buf_out, db_qf, db_cmap;
+    cl::Buffer db_conf2, db_ac, db_dc, db_order, db_strategy, db_quant, db_block;
+    cl::Buffer db_conf3, db_histo_cfg, db_dc_histo, db_dc_code, db_ac_histo, db_ac_code;
+
+    db_conf1 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                          sizeof(ap_int<32>) * MAX_NUM_CONFIG, &mext_o[0]);
+    db_buf_in = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                           sizeof(ap_int<32>) * k2_config[8], &mext_o[1]);
+    db_buf_out = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                            sizeof(ap_int<32>) * k2_config[8], &mext_o[2]);
+    db_qf = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                       sizeof(ap_int<32>) * k2_config[9], &mext_o[3]);
+    db_cmap = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                         sizeof(ap_int<32>) * k2_config[10], &mext_o[4]);
+
+    db_conf2 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                          sizeof(ap_int<32>) * MAX_NUM_CONFIG, &mext_o[5]);
+    db_ac = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                       sizeof(ap_int<32>) * k2_config[8], &mext_o[6]);
+    db_dc = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                       sizeof(ap_int<32>) * k2_config[24], &mext_o[7]);
+    db_order = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                          sizeof(ap_int<32>) * k2_config[23], &mext_o[8]);
+    db_strategy = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                             sizeof(ap_int<32>) * k2_config[13], &mext_o[9]);
+    db_block = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                          sizeof(ap_int<32>) * k2_config[14], &mext_o[10]);
+    db_quant = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                          sizeof(ap_int<32>) * k2_config[15], &mext_o[11]);
+
+    db_conf3 = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                          sizeof(ap_int<32>) * MAX_NUM_CONFIG, &mext_o[12]);
+    db_histo_cfg = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                              sizeof(ap_int<32>) * (4 * k3_config[13] + 2 * k3_config[12]), &mext_o[13]);
+    db_ac_histo = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                             sizeof(ap_int<32>) * k3_config[12] * MAX_AC_HISTO_SIZE, &mext_o[14]);
+    db_ac_code = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                            sizeof(ap_int<32>) * k3_config[12] * MAX_AC_SIZE, &mext_o[15]);
+    db_dc_histo = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                             sizeof(ap_int<32>) * 2 * k3_config[13] * MAX_DC_HISTO_SIZE, &mext_o[16]);
+    db_dc_code = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                            sizeof(ap_int<32>) * 2 * k3_config[13] * MAX_DC_SIZE, &mext_o[17]);
+
+    // add buffers to migrate
+    std::vector<cl::Memory> init;
+
+    init.push_back(db_conf1);
+    init.push_back(db_buf_in);
+    init.push_back(db_buf_out);
+    init.push_back(db_qf);
+    init.push_back(db_cmap);
+
+    init.push_back(db_conf2);
+    init.push_back(db_ac);
+    init.push_back(db_dc);
+    init.push_back(db_order);
+    init.push_back(db_strategy);
+    init.push_back(db_block);
+    init.push_back(db_quant);
+
+    init.push_back(db_conf3);
+    init.push_back(db_histo_cfg);
+    init.push_back(db_ac_histo);
+    init.push_back(db_ac_code);
+    init.push_back(db_dc_histo);
+    init.push_back(db_dc_code);
+
+    // migrate data from host to device
+    q.enqueueMigrateMemObjects(init, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED, nullptr, nullptr);
+    q.finish();
+
+    std::vector<cl::Memory> ob_in;
+    std::vector<cl::Memory> ob_out1, ob_out2, ob_out3;
+
+    ob_in.push_back(db_conf1);
+    ob_in.push_back(db_buf_in);
+    ob_out1.push_back(db_buf_out);
+    ob_out1.push_back(db_qf);
+    ob_out1.push_back(db_cmap);
+
+    ob_in.push_back(db_conf2);
+    ob_out2.push_back(db_ac);
+    ob_out2.push_back(db_dc);
+    ob_out2.push_back(db_order);
+    ob_out2.push_back(db_strategy);
+    ob_out2.push_back(db_block);
+    ob_out2.push_back(db_quant);
+
+    ob_in.push_back(db_conf3);
+    ob_out3.push_back(db_histo_cfg);
+    ob_out3.push_back(db_ac_histo);
+    ob_out3.push_back(db_ac_code);
+    ob_out3.push_back(db_dc_histo);
+    ob_out3.push_back(db_dc_code);
+
+    // declare events
+    std::vector<cl::Event> events_write(1);
+    std::vector<std::vector<cl::Event> > events_kernel(3);
+    std::vector<std::vector<cl::Event> > events_read(3);
+    for (int i = 0; i < 3; ++i) {
+        events_kernel[i].resize(1);
+        events_read[i].resize(1);
+    }
+
+    // set kernel args
+    for (int i = 0; i < repInt; i++) {
+        pik_kernel1[i].setArg(0, db_conf1);
+        pik_kernel1[i].setArg(1, db_buf_in);
+        pik_kernel1[i].setArg(2, db_buf_out);
+        pik_kernel1[i].setArg(3, db_cmap);
+        pik_kernel1[i].setArg(4, db_qf);
+
+        pik_kernel2[i].setArg(0, db_conf2);
+        pik_kernel2[i].setArg(1, db_buf_out);
+        pik_kernel2[i].setArg(2, db_qf);
+        pik_kernel2[i].setArg(3, db_cmap);
+        pik_kernel2[i].setArg(4, db_ac);
+        pik_kernel2[i].setArg(5, db_dc);
+        pik_kernel2[i].setArg(6, db_quant);
+        pik_kernel2[i].setArg(7, db_strategy);
+        pik_kernel2[i].setArg(8, db_block);
+        pik_kernel2[i].setArg(9, db_order);
+
+        pik_kernel3[i].setArg(0, db_conf3);
+        pik_kernel3[i].setArg(1, db_ac);
+        pik_kernel3[i].setArg(2, db_dc);
+        pik_kernel3[i].setArg(3, db_quant);
+        pik_kernel3[i].setArg(4, db_strategy);
+        pik_kernel3[i].setArg(5, db_block);
+        pik_kernel3[i].setArg(6, db_order);
+        pik_kernel3[i].setArg(7, db_histo_cfg);
+        pik_kernel3[i].setArg(8, db_dc_histo);
+        pik_kernel3[i].setArg(9, db_dc_code);
+        pik_kernel3[i].setArg(10, db_ac_histo);
+        pik_kernel3[i].setArg(11, db_ac_code);
+    }
+
+    // launch kernel and calculate kernel execution time
+    std::cout << "INFO: Kernel Start" << std::endl;
+
+    // migrate
+    q.enqueueMigrateMemObjects(ob_in, 0, nullptr, &events_write[0]);
+    q.enqueueTask(pik_kernel1[0], &events_write, &events_kernel[0][0]);
+    q.enqueueMigrateMemObjects(ob_out1, 1, &events_kernel[0], &events_read[0][0]);
+    q.enqueueTask(pik_kernel2[0], &events_read[0], &events_kernel[1][0]);
+    q.enqueueMigrateMemObjects(ob_out2, 1, &events_kernel[1], &events_read[1][0]);
+    q.enqueueTask(pik_kernel3[0], &events_read[1], &events_kernel[2][0]);
+    q.enqueueMigrateMemObjects(ob_out3, 1, &events_kernel[2], &events_read[2][0]);
+    q.finish();
+
+    struct timeval end_time;
+    gettimeofday(&end_time, 0);
+    std::cout << "INFO: Finish kernel execution" << std::endl;
+    std::cout << "INFO: Finish E2E execution" << std::endl;
+
+    // print related times
+    unsigned long timeStart, timeEnd, exec_time0;
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Data transfer from host to device: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_read[0][0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_read[0][0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Kernel1 Data transfer from device to host: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_read[1][0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_read[1][0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Kernel2 Data transfer from device to host: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    events_read[2][0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+    events_read[2][0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+    exec_time0 = (timeEnd - timeStart) / 1000.0;
+    std::cout << "INFO: Kernel3 Data transfer from device to host: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    exec_time0 = 0;
+    for (int i = 0; i < 3; ++i) {
+        events_kernel[i][0].getProfilingInfo(CL_PROFILING_COMMAND_START, &timeStart);
+        events_kernel[i][0].getProfilingInfo(CL_PROFILING_COMMAND_END, &timeEnd);
+        exec_time0 += (timeEnd - timeStart) / 1000.0;
+
+        std::cout << "INFO: Kernel" << i + 1 << " execution: " << (timeEnd - timeStart) / 1000.0 << " us\n";
+        std::cout << "-------------------------------------------------------" << std::endl;
+    }
+    std::cout << "INFO: kernel total execution: " << exec_time0 << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+    unsigned long exec_timeE2E = diff(&end_time, &start_time);
+    std::cout << "INFO: FPGA execution time:" << exec_timeE2E << " us\n";
+    std::cout << "-------------------------------------------------------" << std::endl;
+
+    // output
+    for (int i = 0; i < k2_config[10]; i++) {
+        cmap[i] = hb_cmap[i];
+    }
+    std::cout << "cmap finish" << std::endl;
+
+    for (int i = 0; i < k2_config[23]; i++) {
+        order[i] = hb_order[i];
+    }
+    std::cout << "order finish" << std::endl;
+
+    for (int i = 0; i < k2_config[15]; i++) {
+        quant_field[i] = hb_quant[i];
+    }
+    std::cout << "quant_field finish" << std::endl;
+
+    int dc_histo_sum = 0;
+    for (int j = 0; j < 2 * k3_config[13]; j++) {
+        len_dc_histo[j] = hb_histo_cfg[j];
+        dc_histo_sum += len_dc_histo[j];
+        std::cout << "len_dc_h:" << (int)hb_histo_cfg[j] << std::endl;
+    }
+    std::cout << "dc_histo_sum:" << dc_histo_sum << std::endl;
+
+    int ac_histo_sum = 0;
+    for (int j = 0; j < k3_config[12]; j++) {
+        len_ac_histo[j] = hb_histo_cfg[2 * k3_config[13] + j];
+        ac_histo_sum += len_ac_histo[j];
+        std::cout << "len_ac_h:" << (int)hb_histo_cfg[2 * k3_config[13] + j] << std::endl;
+    }
+    std::cout << "ac_histo_sum:" << ac_histo_sum << std::endl;
+
+    int len_dc_sum = 0;
+    for (int j = 0; j < 2 * k3_config[13]; j++) {
+        len_dc[j] = hb_histo_cfg[2 * k3_config[13] + k3_config[12] + j];
+        len_dc_sum += (len_dc[j] + 1) / 2;
+        std::cout << "len_dc_c:" << (int)hb_histo_cfg[2 * k3_config[13] + k3_config[12] + j] << std::endl;
+    }
+    std::cout << "len_dc_sum:" << len_dc_sum << std::endl;
+
+    int len_ac_sum = 0;
+    for (int j = 0; j < k3_config[12]; j++) {
+        len_ac[j] = hb_histo_cfg[4 * k3_config[13] + k3_config[12] + j];
+        len_ac_sum += (len_ac[j] + 1) / 2;
+        std::cout << "len_ac_c:" << (int)hb_histo_cfg[4 * k3_config[13] + k3_config[12] + j] << std::endl;
+    }
+    std::cout << "len_ac_sum:" << len_ac_sum << std::endl;
+
+    const uint64_t num_dc_histo = 2 * k3_config[13] * MAX_DC_HISTO_SIZE;
+    const uint64_t num_dc = 2 * k3_config[13] * MAX_DC_SIZE;
+    const uint64_t num_ac_histo = k3_config[12] * MAX_AC_HISTO_SIZE;
+    const uint64_t num_ac = k3_config[12] * MAX_AC_SIZE;
+    memcpy(dc_histo_code_out, hb_dc_histo, sizeof(ap_uint<AXI_SZ>) * num_dc_histo);
+    memcpy(dc_code_out, hb_dc_code, sizeof(ap_uint<AXI_SZ>) * num_dc);
+    memcpy(ac_histo_code_out, hb_ac_histo, sizeof(ap_uint<AXI_SZ>) * num_ac_histo);
+    memcpy(ac_code_out, hb_ac_code, sizeof(ap_uint<AXI_SZ>) * num_ac);
+
+    std::cout << "k2 order:" << std::endl;
+    for (int i = 0; i < k2_config[6] * k2_config[7]; i++) {
+        for (int j = 0; j < 64 * 3; j++) {
+            std::cout << (int)order[i * 3 * 64 + j] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 quant:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << (int)quant_field[i * k2_config[2] + j] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 global_scale:" << (int)quant_field[k2_config[15] - 2] << std::endl;
+
+    std::cout << "k2 dequant:" << (int)quant_field[k2_config[15] - 1] << std::endl;
+
+    std::cout << "k2 acs:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << (int)hb_strategy[i * k2_config[2] + j] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 block:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << (int)hb_block[i * k2_config[2] + j] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 dc x:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << (int)hb_dc[i * k2_config[2] + j] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 dc y:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << (int)hb_dc[i * k2_config[2] + k2_config[13]] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 dc b:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << (int)hb_dc[i * k2_config[2] + 2 * k2_config[13]] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 ac:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << "id=" << (i * k2_config[2] + j) << " ";
+            for (int k = 0; k < 64; k++) {
+                std::cout << (int)hb_ac[(i * k2_config[2] + j) * 64 + k] << ",";
+            }
+            std::cout << std::endl;
+        }
+    }
+
+    std::cout << "dc_histo_code_out:" << std::endl;
+    for (int j = 0; j < dc_histo_sum; j++) {
+        std::cout << ", " << (int)dc_histo_code_out[j];
+        if (j != 0 && j % 32 == 0) std::cout << std::endl;
+    }
+    std::cout << std::endl;
+
+    std::cout << "dc_code_out:" << std::endl;
+    for (int j = 0; j < len_dc_sum; j++) {
+        std::cout << ", " << (int)dc_code_out[j];
+        if (j != 0 && j % 32 == 0) std::cout << std::endl;
+    }
+    std::cout << std::endl;
+
+    std::cout << "ac_histo_code_out:" << std::endl;
+    for (int j = 0; j < ac_histo_sum; j++) {
+        std::cout << ", " << (int)ac_histo_code_out[j];
+        if (j != 0 && j % 32 == 0) std::cout << std::endl;
+    }
+    std::cout << std::endl;
+
+    std::cout << "ac_code_out:" << std::endl;
+    for (int j = 0; j < len_ac_sum; j++) {
+        std::cout << ", " << (int)ac_code_out[j];
+        if (j != 0 && j % 32 == 0) std::cout << std::endl;
+    }
+    std::cout << std::endl;
+
+    free(hb_buf_out);
+    free(hb_ac);
+    free(hb_dc);
+    free(hb_strategy);
+    free(hb_block);
+    free(hb_cmap);
+    free(hb_order);
+    free(hb_quant);
+    free(hb_histo_cfg);
+    free(hb_dc_histo);
+    free(hb_dc_code);
+    free(hb_ac_histo);
+    free(hb_ac_code);
+}
+
+#endif
diff --git a/codec/L2/demos/pikEnc/host/host_dev.hpp b/codec/L2/demos/pikEnc/host/host_dev.hpp
new file mode 100755
index 0000000000..f592b6f4bf
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/host_dev.hpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _XF_CODEC_HOST_DEV_HPP_
+#define _XF_CODEC_HOST_DEV_HPP_
+
+#include <ap_int.h>
+#include <hls_math.h>
+#include <hls_stream.h>
+
+#include "pik_common.hpp"
+
+#ifndef HLS_TEST
+void host_func(std::string xclbinPath,
+               float* dataDDR,
+               ap_uint<AXI_SZ> k1_config[MAX_NUM_CONFIG],
+               ap_uint<AXI_SZ> k2_config[MAX_NUM_CONFIG],
+               ap_uint<AXI_SZ> k3_config[MAX_NUM_CONFIG],
+
+               ap_uint<AXI_SZ> cmap[AXI_CMAP],
+               ap_uint<AXI_SZ> order[MAX_NUM_ORDER],
+               ap_uint<AXI_SZ> quant_field[AXI_QF],
+
+               int len_dc_histo[2 * MAX_DC_GROUP],
+               int len_dc[2 * MAX_DC_GROUP],
+               ap_uint<AXI_SZ> dc_histo_code_out[2 * MAX_DC_GROUP * MAX_DC_HISTO_SIZE],
+               ap_uint<AXI_SZ> dc_code_out[2 * MAX_DC_GROUP * MAX_DC_SIZE],
+
+               int len_ac_histo[MAX_AC_GROUP],
+               int len_ac[MAX_AC_GROUP],
+               ap_uint<AXI_SZ> ac_histo_code_out[MAX_AC_GROUP * MAX_AC_HISTO_SIZE],
+               ap_uint<AXI_SZ> ac_code_out[MAX_AC_GROUP * MAX_AC_SIZE]);
+#endif
+
+#endif
diff --git a/codec/L2/demos/pikEnc/host/pik/.clang-format b/codec/L2/demos/pikEnc/host/pik/.clang-format
new file mode 100644
index 0000000000..ff5c354782
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/.clang-format
@@ -0,0 +1,89 @@
+---
+Language:        Cpp
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: false
+BraceWrapping:
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+ColumnLimit:     120
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: true
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories:
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IndentCaseLabels: true
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 4
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp03
+TabWidth:        8
+UseTab:          Never
+...
+
diff --git a/codec/L2/demos/pikEnc/host/pik/ac_predictions.cc b/codec/L2/demos/pikEnc/host/pik/ac_predictions.cc
new file mode 100755
index 0000000000..a45d265edd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ac_predictions.cc
@@ -0,0 +1,629 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/ac_predictions.h"
+#include <cstdint>
+#include "pik/ac_strategy.h"
+#include "pik/codec.h"
+#include "pik/color_correlation.h"
+#include "pik/compressed_image_fwd.h"
+#include "pik/data_parallel.h"
+#include "pik/opsin_inverse.h"
+#include "pik/quant_weights.h"
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/bits.h"
+#include "pik/block.h"
+#include "pik/common.h"
+#include "pik/convolve.h"
+#include "pik/dct.h"
+#include "pik/dct_util.h"
+#include "pik/deconvolve.h"
+#include "pik/entropy_coder.h"
+#include "pik/image.h"
+#include "pik/profiler.h"
+#include "pik/quantizer.h"
+#include "pik/resample.h"
+#include "pik/upscaler.h"
+
+namespace pik {
+namespace {
+// Adds or subtracts block to/from "add_to", except low and lowest frequencies.
+// `block` is assumed to be contiguous, `add_to` has `ysize` slices of
+// `xsize`*kBlockDim*kBlockDim coefficients with a stride of `stride`.
+template <bool add>
+SIMD_ATTR void AddBlockExceptLFAndLLFTo(const float* PIK_RESTRICT block,
+                                        size_t xsize, size_t ysize,
+                                        float* PIK_RESTRICT add_to,
+                                        size_t stride) {
+  // TODO(veluca): SIMD-fy
+  PIK_ASSERT(ysize <= 4);
+  // Rows with LF and LLF coefficients.
+  for (size_t y = 0; y < 2 * ysize; y++) {
+    for (size_t x = 2 * xsize; x < xsize * kBlockDim; x++) {
+      if (add) {
+        add_to[y * xsize * kBlockDim + x] += block[y * xsize * kBlockDim + x];
+      } else {
+        add_to[y * xsize * kBlockDim + x] -= block[y * xsize * kBlockDim + x];
+      }
+    }
+  }
+  size_t block_shift =
+      NumZeroBitsBelowLSBNonzero(kBlockDim * kBlockDim * xsize);
+  for (size_t y = 2 * ysize; y < ysize * kBlockDim; y++) {
+    size_t line_start = y * xsize * kBlockDim;
+    size_t block_off = line_start >> block_shift;
+    size_t block_idx = line_start & (xsize * kBlockDim * kBlockDim - 1);
+    line_start = block_off * stride + block_idx;
+    for (size_t x = 0; x < xsize * kBlockDim; x++) {
+      if (add) {
+        add_to[line_start + x] += block[y * xsize * kBlockDim + x];
+      } else {
+        add_to[line_start + x] -= block[y * xsize * kBlockDim + x];
+      }
+    }
+  }
+}
+
+// Un-color-correlates, quantizes, dequantizes and color-correlates the
+// specified coefficients inside the given block, using (c==0,2) or storing
+// (c==1) the y-channel values in y_block. Used by predictors to compute the
+// decoder-side values to compute predictions on. Coefficients are specified as
+// a bit array. Assumes that `block` and `y_block` have the same stride.
+template <size_t c>
+SIMD_ATTR PIK_INLINE void ComputeDecoderCoefficients(
+    const float cmap_factor, const Quantizer& quantizer, uint8_t quant_table,
+    const int32_t quant_ac, const float inv_quant_ac, const uint8_t quant_kind,
+    size_t xsize, size_t ysize, const float* block_src, size_t block_stride,
+    uint64_t coefficients, float* block, size_t out_stride, float* y_block) {
+  // TODO(janwas): restrict ptrs
+#ifdef ADDRESS_SANITIZER
+  PIK_ASSERT(coefficients < 0x1000);
+  PIK_ASSERT(ysize <= 4);
+#endif
+  for (size_t y = 0; y < ysize; y++) {
+    memcpy(block + out_stride * y, block_src + block_stride * y,
+           sizeof(float) * xsize * kBlockDim * kBlockDim);
+  }
+  if (c != 1) {
+    for (size_t y = 0; y < ysize; y++) {
+      for (size_t i = 0; i < xsize * kBlockDim * kBlockDim; i++) {
+        block[y * xsize * kBlockDim * kBlockDim + i] -=
+            y_block[y * xsize * kBlockDim * kBlockDim + i] * cmap_factor;
+      }
+    }
+  }
+  quantizer.QuantizeRoundtripBlockCoefficients<c>(
+      quant_table, quant_ac, quant_kind, xsize, ysize, block, out_stride, block,
+      out_stride, coefficients);
+  if (c != 1) {
+    for (size_t y = 0; y < ysize; y++) {
+      for (size_t i = 0; i < xsize * kBlockDim * kBlockDim; i++) {
+        block[y * xsize * kBlockDim * kBlockDim + i] +=
+            y_block[y * xsize * kBlockDim * kBlockDim + i] * cmap_factor;
+      }
+    }
+  } else {
+    for (size_t i = 0; i < ysize; i++) {
+      memcpy(y_block + out_stride * i, block + out_stride * i,
+             sizeof(float) * xsize * kBlockDim * kBlockDim);
+    }
+  }
+}
+
+static constexpr float k4x4BlurStrength = 2.0007879236394901;
+
+namespace lf_kernel {
+struct LFPredictionBlur {
+  PIK_INLINE const Weights3x3& Weights() const {
+    static constexpr float w0 = 0.41459272584128337;
+    static constexpr float w1 = 0.25489157325704559;
+    static constexpr float w2 = 0.046449679523692139;
+    static const Weights3x3 weights = {
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)},
+        {SIMD_REP4(w1)}, {SIMD_REP4(w0)}, {SIMD_REP4(w1)},
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+}  // namespace lf_kernel
+
+// Subtract predictions, compute decoder-side coefficients, add predictions and
+// compute 2x2 block.
+template <size_t c>
+SIMD_ATTR void ComputeDecoderBlockAnd2x2DC(
+    bool is_border, bool predict_lf, bool predict_hf, AcStrategy acs,
+    const size_t residuals_stride, const size_t pred_stride,
+    const size_t lf2x2_stride, const size_t bx, const Quantizer& quantizer,
+    uint8_t quant_table, int32_t quant_ac,
+    const float* PIK_RESTRICT cmap_factor, const float* PIK_RESTRICT pred[3],
+    float* PIK_RESTRICT residuals[3], float* PIK_RESTRICT lf2x2_row[3],
+    const float* PIK_RESTRICT dc[3], float* PIK_RESTRICT y_residuals_dec) {
+  PROFILER_FUNC;
+  constexpr size_t N = kBlockDim;
+  float* block_start = residuals[c] + N * N * (bx - 1);
+  const float* pred_start = pred[c] + 2 * bx;
+  SIMD_ALIGN float decoder_coeffs[AcStrategy::kMaxCoeffArea] = {};
+  if (!is_border) {
+    if (predict_lf) {
+      // Remove prediction
+      for (size_t y = 0; y < 2 * acs.covered_blocks_y(); y++) {
+        size_t start = y < acs.covered_blocks_y() ? acs.covered_blocks_x() : 0;
+        for (size_t x = start; x < 2 * acs.covered_blocks_x(); x++) {
+          block_start[y * acs.covered_blocks_x() * kBlockDim + x] -=
+              pred_start[y * pred_stride + x];
+        }
+      }
+    }
+
+    // Quantization roundtrip
+    const size_t kind = acs.GetQuantKind();
+    const float inv_quant_ac = quantizer.inv_quant_ac(quant_ac);
+    // 0x302 has bits 1, 8, 9 set.
+    ComputeDecoderCoefficients<c>(
+        cmap_factor[c], quantizer, quant_table, quant_ac, inv_quant_ac, kind,
+        acs.covered_blocks_x(), acs.covered_blocks_y(), block_start,
+        residuals_stride, 0x302, decoder_coeffs,
+        acs.covered_blocks_x() * kBlockDim * kBlockDim, y_residuals_dec);
+
+    if (predict_lf) {
+      // Add back prediction
+      for (size_t y = 0; y < 2 * acs.covered_blocks_y(); y++) {
+        size_t start = y < acs.covered_blocks_y() ? acs.covered_blocks_x() : 0;
+        for (size_t x = 0; x < start; x++) {
+          decoder_coeffs[y * acs.covered_blocks_x() * kBlockDim + x] =
+              block_start[y * acs.covered_blocks_x() * kBlockDim + x];
+        }
+        for (size_t x = start; x < 2 * acs.covered_blocks_x(); x++) {
+          decoder_coeffs[y * acs.covered_blocks_x() * kBlockDim + x] +=
+              pred_start[y * pred_stride + x];
+        }
+      }
+    }
+  } else {
+    decoder_coeffs[0] = dc[c][bx];
+    if (predict_lf) {
+      decoder_coeffs[1] = pred[c][2 * bx + 1];
+      decoder_coeffs[N] = pred[c][pred_stride + 2 * bx];
+      decoder_coeffs[N + 1] = pred[c][pred_stride + 2 * bx + 1];
+    }
+  }
+  if (predict_hf) {
+    acs.DC2x2FromLowFrequencies(decoder_coeffs, N * N * acs.covered_blocks_x(),
+                                lf2x2_row[c] + 2 * bx, lf2x2_stride);
+  }
+}
+
+// Copies the lowest-frequency coefficients from DC- to AC-sized image.
+SIMD_ATTR void CopyLlf(const Image3F& llf, const AcStrategyImage& ac_strategy,
+                       Image3F* PIK_RESTRICT ac64) {
+  PROFILER_FUNC;
+  constexpr size_t N = kBlockDim;
+  constexpr size_t block_size = N * N;
+  const size_t xsize = llf.xsize() - 2;
+  const size_t ysize = llf.ysize() - 2;
+  const size_t llf_stride = llf.PixelsPerRow();
+
+  // Copy (reinterpreted) DC values to 0-th block values.
+  for (size_t c = 0; c < ac64->kNumPlanes; c++) {
+    for (size_t by = 0; by < ysize; ++by) {
+      const float* llf_row = llf.ConstPlaneRow(c, by + 1);
+      float* ac_row = ac64->PlaneRow(c, by);
+      AcStrategyRow strategy_row = ac_strategy.ConstRow(by);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        AcStrategy strategy = strategy_row[bx];
+        if (!strategy.IsFirstBlock()) continue;
+        for (size_t y = 0; y < strategy.covered_blocks_y(); y++) {
+          for (size_t x = 0; x < strategy.covered_blocks_x(); x++) {
+            ac_row[block_size * bx +
+                   strategy.covered_blocks_y() * kBlockDim * y + x] =
+                llf_row[bx + 1 + llf_stride * y + x];
+            std::cout<<"std_llf_index c="<<c<<" by="<<by<<" bx="<<bx<<" y="<<y<<" x="<<x<<std::endl;
+          }
+        }
+      }
+    }
+  }
+}
+
+typedef std::array<std::array<float, 4>, 3> Ub4Kernel;
+
+SIMD_ATTR void ComputeUb4Kernel(const float sigma, Ub4Kernel* out) {
+  for (int j = 0; j < 3; ++j) {
+    for (int k = 0; k < 4; ++k) {
+      out->at(j)[k] = 0.0f;
+    }
+  }
+  std::vector<float> kernel = GaussianKernel(4, sigma);
+  for (int k = 0; k < 4; ++k) {
+    const int split0 = 4 - k;
+    const int split1 = 8 - k;
+    for (int j = 0; j < split0; ++j) {
+      out->at(0)[k] += kernel[j];
+    }
+    for (int j = split0; j < split1; ++j) {
+      out->at(1)[k] += kernel[j];
+    }
+    for (int j = split1; j < kernel.size(); ++j) {
+      out->at(2)[k] += kernel[j];
+    }
+  }
+}
+
+// Adds to "add_to" (DCT) an image defined by the following transformations:
+//  1) Upsample image 4x4 with nearest-neighbor
+//  2) Blur with a Gaussian kernel of radius 4 and given sigma
+//  3) perform TransposedScaledDCT()
+//  4) Zero out the top 2x2 corner of each DCT block
+//  5) Negates the prediction if add is false (so the encoder subtracts, and
+//  the decoder adds)
+template <bool add>
+SIMD_ATTR void UpSample4x4BlurDCT(const Rect& dc_rect, const ImageF& img,
+                                  const Ub4Kernel& kernel,
+                                  const AcStrategyImage& ac_strategy,
+                                  const Rect& acs_rect,
+                                  ImageF* PIK_RESTRICT blur_x,
+                                  ImageF* PIK_RESTRICT add_to) {
+  PROFILER_FUNC;
+  constexpr size_t N = kBlockDim;
+  constexpr size_t block_size = N * N;
+
+  // TODO(robryk): There's no good reason to compute the full DCT here. It's
+  // fine if the output is in pixel space, we just need to zero out top 2x2
+  // DCT coefficients. We can do that by computing a "partial DCT" and
+  // subtracting (we can have two outputs: a positive pixel-space output and a
+  // negative DCT-space output).
+
+  // TODO(robryk): Failing that, merge the blur and DCT into a single linear
+  // operation, if feasible.
+
+  const size_t bx0 = dc_rect.x0();
+  const size_t bxs = dc_rect.xsize();
+  PIK_CHECK(bxs >= 1);
+  const size_t bx1 = bx0 + bxs;
+  const size_t bx_max = DivCeil(add_to->xsize(), block_size);
+  const size_t by0 = dc_rect.y0();
+  const size_t bys = dc_rect.ysize();
+  PIK_CHECK(bys >= 1);
+  const size_t by1 = by0 + bys;
+  const size_t by_max = add_to->ysize();
+  PIK_CHECK(bx1 <= bx_max && by1 <= by_max);
+  const size_t xs = bxs * 2;
+  const size_t ys = bys * 2;
+
+  using D = SIMD_PART(float, SIMD_MIN(SIMD_FULL(float)::N, 8));
+  using V = D::V;
+  const D d;
+  V vw0[4] = {set1(d, kernel[0][0]), set1(d, kernel[0][1]),
+              set1(d, kernel[0][2]), set1(d, kernel[0][3])};
+  V vw1[4] = {set1(d, kernel[1][0]), set1(d, kernel[1][1]),
+              set1(d, kernel[1][2]), set1(d, kernel[1][3])};
+  V vw2[4] = {set1(d, kernel[2][0]), set1(d, kernel[2][1]),
+              set1(d, kernel[2][2]), set1(d, kernel[2][3])};
+
+  PIK_ASSERT(blur_x->xsize() == xs * 4 && blur_x->ysize() == ys + 2);
+  for (size_t y = 0; y < ys + 2; ++y) {
+    const float* PIK_RESTRICT row = img.ConstRow(y + 1);
+    float* const PIK_RESTRICT row_out = blur_x->Row(y);
+    for (int x = 0; x < xs; ++x) {
+      const float v0 = row[x + 1];
+      const float v1 = row[x + 2];
+      const float v2 = row[x + 3];
+      for (int ix = 0; ix < 4; ++ix) {
+        row_out[4 * x + ix] =
+            v0 * kernel[0][ix] + v1 * kernel[1][ix] + v2 * kernel[2][ix];
+      }
+    }
+  }
+
+  {
+    PROFILER_ZONE("dct upsample");
+    for (size_t by = 0; by < bys; ++by) {
+      const D d;
+      SIMD_ALIGN float block[AcStrategy::kMaxCoeffArea];
+      SIMD_ALIGN float coeffs[AcStrategy::kMaxCoeffArea];
+      const size_t out_stride = add_to->PixelsPerRow();
+      const size_t blur_stride = blur_x->PixelsPerRow();
+
+      float* PIK_RESTRICT row_out = add_to->Row(by0 + by);
+      AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(acs_rect, by0 + by);
+      for (int bx = 0; bx < bxs; ++bx) {
+        AcStrategy acs = ac_strategy_row[bx0 + bx];
+        if (!acs.IsFirstBlock()) continue;
+        if (!acs.PredictHF()) continue;
+        for (int idy = 0; idy < acs.covered_blocks_y(); idy++) {
+          const float* PIK_RESTRICT row0d = blur_x->ConstRow(2 * (by + idy));
+          const float* PIK_RESTRICT row1d = row0d + blur_stride;
+          const float* PIK_RESTRICT row2d = row1d + blur_stride;
+          const float* PIK_RESTRICT row3d = row2d + blur_stride;
+          for (int idx = 0; idx < acs.covered_blocks_x(); idx++) {
+            float* PIK_RESTRICT block_ptr =
+                block + AcStrategy::kMaxCoeffBlocks * block_size * idy +
+                8 * idx;
+            for (int ix = 0; ix < 8; ix += d.N) {
+              const auto val0 = load(d, &row0d[(bx + idx) * 8 + ix]);
+              const auto val1 = load(d, &row1d[(bx + idx) * 8 + ix]);
+              const auto val2 = load(d, &row2d[(bx + idx) * 8 + ix]);
+              const auto val3 = load(d, &row3d[(bx + idx) * 8 + ix]);
+              for (int iy = 0; iy < 4; ++iy) {
+                // A mul_add pair is faster but causes 1E-5 difference.
+                const auto vala =
+                    val0 * vw0[iy] + val1 * vw1[iy] + val2 * vw2[iy];
+                const auto valb =
+                    val1 * vw0[iy] + val2 * vw1[iy] + val3 * vw2[iy];
+                store(vala, d, &block_ptr[iy * AcStrategy::kMaxBlockDim + ix]);
+                store(valb, d,
+                      &block_ptr[iy * AcStrategy::kMaxBlockDim +
+                                 AcStrategy::kMaxBlockDim * 4 + ix]);
+              }
+            }
+          }
+        }
+
+        acs.TransformFromPixels(block, AcStrategy::kMaxBlockDim, coeffs,
+                                acs.covered_blocks_x() * kBlockDim * kBlockDim);
+        AddBlockExceptLFAndLLFTo<add>(
+            coeffs, acs.covered_blocks_x(), acs.covered_blocks_y(),
+            row_out + block_size * (bx0 + bx), out_stride);
+      }
+    }
+  }
+}
+
+}  // namespace
+
+// Compute the lowest-frequency coefficients in the DCT block (1x1 for DCT8,
+// 2x2 for DCT16, etc.)
+SIMD_ATTR void ComputeLlf(const Image3F& dc, const AcStrategyImage& ac_strategy,
+                          const Rect& acs_rect, Image3F* PIK_RESTRICT llf) {
+  PROFILER_FUNC;
+  const size_t xsize = dc.xsize();
+  const size_t ysize = dc.ysize();
+  const size_t dc_stride = dc.PixelsPerRow();
+  const size_t llf_stride = llf->PixelsPerRow();
+
+  // Copy (reinterpreted) DC values to LLF image.
+  for (size_t c = 0; c < llf->kNumPlanes; c++) {
+    for (size_t by = 0; by < ysize; ++by) {
+      const bool is_border_y = by == 0 || by == ysize - 1;
+      AcStrategyRow ac_strategy_row =
+          ac_strategy.ConstRow(acs_rect, is_border_y ? 0 : by - 1);
+      const float* dc_row = dc.ConstPlaneRow(c, by);
+      float* llf_row = llf->PlaneRow(c, by);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        const bool is_border = is_border_y || (bx == 0 || bx == xsize - 1);
+        AcStrategy acs = is_border ? AcStrategy(AcStrategy::Type::DCT, 0)
+                                   : ac_strategy_row[bx - 1];
+        acs.LowestFrequenciesFromDC(dc_row + bx, dc_stride, llf_row + bx,
+                                    llf_stride);      
+      }
+    }
+  }
+
+  std::cout << "llf_acs:" << std::endl;
+  for (size_t by = 0; by < ysize; ++by) {
+      const bool is_border_y = by == 0 || by == ysize - 1;
+      AcStrategyRow ac_strategy_row =
+          ac_strategy.ConstRow(acs_rect, is_border_y ? 0 : by - 1);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        const bool is_border = is_border_y || (bx == 0 || bx == xsize - 1);
+        AcStrategy acs = is_border ? AcStrategy(AcStrategy::Type::DCT, 0)
+                                   : ac_strategy_row[bx - 1];
+        std::cout << (int)acs.RawStrategy() << ",";
+    }
+      std::cout << std::endl;
+  }
+
+  std::cout << "std_llf_x:" << std::endl;
+  for (size_t by = 0; by < ysize; ++by) {
+      float* llf_row = llf->PlaneRow(0, by);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        std::cout << llf_row[bx] << ",";
+    }
+      std::cout << std::endl;
+  }
+}
+
+// The LF prediction works as follows:
+// - Blur the initial DC2x2 image (see ComputeSharpDc2x2FromLlf)
+// - Compute the same-size DCT of the resulting blurred image
+SIMD_ATTR void PredictLf(const AcStrategyImage& ac_strategy,
+                         const Rect& acs_rect, const Image3F& llf,
+                         ImageF* tmp2x2, Image3F* lf2x2) {
+  PROFILER_FUNC;
+  const size_t xsize = llf.xsize();
+  const size_t ysize = llf.ysize();
+  const size_t llf_stride = llf.PixelsPerRow();
+  const size_t lf2x2_stride = lf2x2->PixelsPerRow();
+  const size_t tmp2x2_stride = tmp2x2->PixelsPerRow();
+
+  // Plane-wise transforms require 2*4DC*4 = 128KiB active memory. Would be
+  // further subdivided into 2 or more stripes to reduce memory pressure.
+  for (size_t c = 0; c < lf2x2->kNumPlanes; c++) {
+    ImageF* PIK_RESTRICT lf2x2_plane = const_cast<ImageF*>(&lf2x2->Plane(c));
+
+    // Computes the initial DC2x2 from the lowest-frequency coefficients.
+    for (size_t by = 0; by < ysize; ++by) {
+      const bool is_border_y = by == 0 || by == ysize - 1;
+      AcStrategyRow ac_strategy_row =
+          ac_strategy.ConstRow(acs_rect, is_border_y ? 0 : by - 1);
+      float* tmp2x2_row = tmp2x2->Row(2 * by);
+      const float* llf_row = llf.PlaneRow(c, by);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        const bool is_border = is_border_y || (bx == 0 || bx == xsize - 1);
+        AcStrategy acs = is_border ? AcStrategy(AcStrategy::Type::DCT, 0)
+                                   : ac_strategy_row[bx - 1];
+        acs.DC2x2FromLowestFrequencies(llf_row + bx, llf_stride,
+                                       tmp2x2_row + 2 * bx, tmp2x2_stride);
+      }
+    }
+
+    // Smooth out DC2x2.
+    if (xsize * 2 < kConvolveMinWidth) {
+      using Convolution = slow::General3x3Convolution<1, WrapMirror>;
+      Convolution::Run(*tmp2x2, xsize * 2, ysize * 2,
+                       lf_kernel::LFPredictionBlur(), lf2x2_plane);
+    } else {
+      const BorderNeverUsed border;
+      // Parallel doesn't help here for moderate-sized images.
+      const ExecutorLoop executor;
+      ConvolveT<strategy::Symmetric3>::Run(border, executor, *tmp2x2,
+                                           lf_kernel::LFPredictionBlur(),
+                                           lf2x2_plane);
+    }
+
+    // Compute LF coefficients
+    for (size_t by = 0; by < ysize; ++by) {
+      const bool is_border_y = by == 0 || by == ysize - 1;
+      AcStrategyRow ac_strategy_row =
+          ac_strategy.ConstRow(acs_rect, is_border_y ? 0 : by - 1);
+      float* lf2x2_row = lf2x2_plane->Row(2 * by);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        const bool is_border = is_border_y || (bx == 0 || bx == xsize - 1);
+        AcStrategy acs = is_border ? AcStrategy(AcStrategy::Type::DCT, 0)
+                                   : ac_strategy_row[bx - 1];
+        acs.LowFrequenciesFromDC2x2(lf2x2_row + 2 * bx, lf2x2_stride,
+                                    lf2x2_row + 2 * bx, lf2x2_stride);
+      }
+    }
+  }
+}
+
+// Predict dc2x2 from DC values.
+// - Use the LF block (but not the lowest frequency block) as a predictor
+// - Update those values with the actual residuals, and re-compute a 2x
+//   upsampled image out of that as an input for HF predictions.
+// Note: assumes that cmap and quant_cf have the same tile size.
+SIMD_ATTR void PredictLfForEncoder(
+    bool predict_lf, bool predict_hf, const Image3F& dc,
+    const AcStrategyImage& ac_strategy, const ColorCorrelationMap& cmap,
+    const Rect& cmap_rect, const Quantizer& quantizer, const ImageB& quant_cf,
+    const uint8_t quant_cf_map[kMaxQuantControlFieldValue][256],
+    Image3F* PIK_RESTRICT ac64, Image3F* dc2x2) {
+  PROFILER_FUNC;
+  const size_t xsize = dc.xsize();
+  const size_t ysize = dc.ysize();
+  const size_t ac_stride = ac64->PixelsPerRow();
+  const size_t dc2x2_stride = dc2x2->PixelsPerRow();
+  // TODO(user): should not be allocated, when predict_lf == false.
+  Image3F lf2x2(xsize * 2, ysize * 2);
+  {
+    Image3F llf(xsize, ysize);
+    ComputeLlf(dc, ac_strategy, Rect(ac_strategy.ConstRaw()), &llf);
+    CopyLlf(llf, ac_strategy, ac64);
+  }
+}
+
+// Similar to PredictLfForEncoder.
+SIMD_ATTR void UpdateLfForDecoder(const Rect& tile, bool predict_lf,
+                                  bool predict_hf,
+                                  const AcStrategyImage& ac_strategy,
+                                  const Rect& acs_rect, const Image3F& llf,
+                                  Image3F* PIK_RESTRICT ac64,
+                                  Image3F* PIK_RESTRICT dc2x2,
+                                  Image3F* PIK_RESTRICT lf2x2, size_t c) {
+  constexpr size_t N = kBlockDim;
+  constexpr size_t block_size = N * N;
+  const size_t bx0 = tile.x0();
+  const size_t bx1 = bx0 + tile.xsize();
+  const size_t by0 = tile.y0();
+  const size_t by1 = by0 + tile.ysize();
+  const size_t ac_stride = ac64->PixelsPerRow();
+  const size_t dc2x2_stride = predict_hf ? dc2x2->PixelsPerRow() : 0;
+  const size_t lf2x2_stride = predict_lf ? lf2x2->PixelsPerRow() : 0;
+  const size_t llf_stride = llf.PixelsPerRow();
+
+  for (size_t by = by0; by < by1; ++by) {
+    const float* llf_row = llf.ConstPlaneRow(c, by + 1);
+    float* ac_row = ac64->PlaneRow(c, by);
+    AcStrategyRow acs_row = ac_strategy.ConstRow(acs_rect, by);
+    for (size_t bx = bx0; bx < bx1; bx++) {
+      AcStrategy acs = acs_row[bx];
+      if (!acs.IsFirstBlock()) continue;
+      for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+        for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+          ac_row[block_size * bx + acs.covered_blocks_x() * kBlockDim * y + x] =
+              llf_row[bx + 1 + llf_stride * y + x];
+        }
+      }
+    }
+  }
+
+  // Compute decoder-side coefficients, 2x scaled DC image, and subtract
+  // predictions.
+  // Add predictions and compute 2x scaled image to feed to HF predictor
+  if (predict_lf) {
+    for (size_t by = by0; by < by1; ++by) {
+      AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(acs_rect, by);
+      float* PIK_RESTRICT ac_row = ac64->PlaneRow(c, by);
+      const float* PIK_RESTRICT lf2x2_row =
+          lf2x2->ConstPlaneRow(c, 2 * (by + 1));
+      for (size_t bx = bx0; bx < bx1; bx++) {
+        AcStrategy acs = ac_strategy_row[bx];
+        float* PIK_RESTRICT ac_pos = ac_row + bx * block_size;
+        const float* PIK_RESTRICT lf2x2_pos = lf2x2_row + (bx + 1) * 2;
+        if (!acs.IsFirstBlock()) continue;
+        if (predict_lf) {
+          for (size_t y = 0; y < 2 * acs.covered_blocks_y(); y++) {
+            size_t start =
+                y < acs.covered_blocks_y() ? acs.covered_blocks_x() : 0;
+            for (size_t x = start; x < 2 * acs.covered_blocks_x(); x++) {
+              ac_pos[y * acs.covered_blocks_x() * kBlockDim + x] +=
+                  lf2x2_pos[y * lf2x2_stride + x];
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (predict_hf) {
+    for (size_t by = by0; by < by1; ++by) {
+      AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(acs_rect, by);
+      const float* PIK_RESTRICT ac_row = ac64->PlaneRow(c, by);
+      float* PIK_RESTRICT dc2x2_row = dc2x2->PlaneRow(c, 2 * (by + 1));
+      for (size_t bx = bx0; bx < bx1; bx++) {
+        AcStrategy acs = ac_strategy_row[bx];
+        if (!acs.IsFirstBlock()) continue;
+        acs.DC2x2FromLowFrequencies(ac_row + block_size * bx, ac_stride,
+                                    dc2x2_row + 2 * (bx + 1), dc2x2_stride);
+      }
+    }
+  }
+}
+
+SIMD_ATTR void ComputePredictionResiduals(const Image3F& pred2x2,
+                                          const AcStrategyImage& ac_strategy,
+                                          Image3F* PIK_RESTRICT coeffs) {
+  Rect dc_rect(0, 0, pred2x2.xsize() / 2 - 2, pred2x2.ysize() / 2 - 2);
+  Rect acs_rect(0, 0, ac_strategy.xsize(), ac_strategy.ysize());
+  Ub4Kernel kernel;
+  ComputeUb4Kernel(k4x4BlurStrength, &kernel);
+  ImageF blur_x(dc_rect.xsize() * 8, dc_rect.ysize() * 2 + 2);
+  for (int c = 0; c < coeffs->kNumPlanes; ++c) {
+    UpSample4x4BlurDCT</*add=*/false>(dc_rect, pred2x2.Plane(c), kernel,
+                                      ac_strategy, acs_rect, &blur_x,
+                                      const_cast<ImageF*>(&coeffs->Plane(c)));
+  }
+}
+
+void AddPredictions(const Image3F& pred2x2, const AcStrategyImage& ac_strategy,
+                    const Rect& acs_rect, ImageF* PIK_RESTRICT blur_x,
+                    Image3F* PIK_RESTRICT dcoeffs) {
+  PROFILER_FUNC;
+  Rect dc_rect(0, 0, pred2x2.xsize() / 2 - 2, pred2x2.ysize() / 2 - 2);
+  Ub4Kernel kernel;
+  ComputeUb4Kernel(k4x4BlurStrength, &kernel);
+  for (int c = 0; c < dcoeffs->kNumPlanes; ++c) {
+    // Updates dcoeffs _except_ 0HVD.
+    UpSample4x4BlurDCT</*add=*/true>(dc_rect, pred2x2.Plane(c), kernel,
+                                     ac_strategy, acs_rect, blur_x,
+                                     const_cast<ImageF*>(&dcoeffs->Plane(c)));
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/ac_predictions.h b/codec/L2/demos/pikEnc/host/pik/ac_predictions.h
new file mode 100755
index 0000000000..87445a78bf
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ac_predictions.h
@@ -0,0 +1,74 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_AC_PREDICTIONS_H_
+#define PIK_AC_PREDICTIONS_H_
+
+#include "pik/color_correlation.h"
+#include "pik/compressed_image_fwd.h"
+#include "pik/quantizer.h"
+#include "pik/simd/simd.h"
+
+// Encoder-side and decoder-side functions to predict low frequency coefficients
+// from lowest-frequency coefficients, and high frequency coefficients from low
+// frequency coefficients.
+// Given a block of size N, the top (N/8)*(N/8) block of coefficients are the
+// lowest-frequency coefficients. Other coefficients in the top (N/4)*(N/4)
+// block are the low frequency coefficients, and the rest of the block are high
+// frequency coefficients.
+// Lowest frequency coefficients are encoded in DC (as an (N/8)*(N/8) IDCT). We
+// obtain a 2x upsampled image out of this, by computing (N/4)*(N/4) IDCTS of
+// the LLF coefficients (other coefficients are set to 0). Then we smooth this
+// image with a convolution, DCT it to obtain LF coefficients, and use that as a
+// prediction.
+// The process for HF predictions is similar: LF coefficients are IDCT-ed back
+// into a 4x downsampled image, which is 4x upsampled and smoothed with a radius
+// 4 gaussian blur. NxN blocks in the resulting image are then used to predict
+// the HF coefficients, after a DCT.
+
+namespace pik {
+
+// All the `acs_rect`s here define which area of the ac_strategy image should be
+// used to obtain the strategy of the current block from, and are specified in
+// block coordinates.
+
+// Common utilities.
+SIMD_ATTR void ComputeLlf(const Image3F& dc, const AcStrategyImage& ac_strategy,
+                          const Rect& acs_rect, Image3F* PIK_RESTRICT llf);
+SIMD_ATTR void PredictLf(const AcStrategyImage& ac_strategy,
+                         const Rect& acs_rect, const Image3F& llf,
+                         ImageF* tmp2x2, Image3F* lf2x2);
+
+// Encoder API.
+SIMD_ATTR void PredictLfForEncoder(
+    bool predict_lf, bool predict_hf, const Image3F& dc,
+    const AcStrategyImage& ac_strategy, const ColorCorrelationMap& cmap,
+    const Rect& cmap_rect, const Quantizer& quantizer, const ImageB& quant_cf,
+    const uint8_t quant_cf_map[kMaxQuantControlFieldValue][256],
+    Image3F* PIK_RESTRICT ac64, Image3F* dc2x2);
+
+void ComputePredictionResiduals(const Image3F& pred2x2,
+                                const AcStrategyImage& ac_strategy,
+                                Image3F* PIK_RESTRICT coeffs);
+
+// Decoder API. Encoder-decoder API is currently not symmetric. Ideally both
+// should allow tile-wise processing.
+SIMD_ATTR void UpdateLfForDecoder(const Rect& tile, bool predict_lf,
+                                  bool predict_hf,
+                                  const AcStrategyImage& ac_strategy,
+                                  const Rect& acs_rect, const Image3F& llf,
+                                  Image3F* PIK_RESTRICT ac64,
+                                  Image3F* PIK_RESTRICT dc2x2,
+                                  Image3F* PIK_RESTRICT lf2x2, size_t c);
+
+// `blur_x` is preallocated by GroupDecCache.
+void AddPredictions(const Image3F& pred2x2, const AcStrategyImage& ac_strategy,
+                    const Rect& acs_rect, ImageF* PIK_RESTRICT blur_x,
+                    Image3F* PIK_RESTRICT dcoeffs);
+
+}  // namespace pik
+
+#endif  // PIK_AC_PREDICTIONS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/ac_strategy.cc b/codec/L2/demos/pikEnc/host/pik/ac_strategy.cc
new file mode 100755
index 0000000000..3b4a092037
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ac_strategy.cc
@@ -0,0 +1,1562 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/ac_strategy.h"
+#include "pik/block.h"
+#include "pik/common.h"
+#include "pik/dct.h"
+#include "pik/entropy_coder.h"
+#include "pik/image.h"
+#include "pik/opsin_params.h"
+#include "pik/profiler.h"
+#include "pik/quantizer.h"
+#include "pik/simd/simd.h"
+
+#include <iostream>
+
+#define ENABLE_DIAGONAL_LINES_EXPERIMENT 0
+
+namespace pik {
+namespace {
+
+// 1-dimensional DCT's of different sizes (Except DCT2x2)
+// TODO(lode): SIMDify and place in dct_simd_any.h
+template <class V> void DCT2(V &i0, V &i1) {
+  V r0 = (i0 + i1) * 0.5;
+  V r1 = (i0 - i1) * 0.5;
+  i0 = r0;
+  i1 = r1;
+}
+
+template <class V> void IDCT2(V &i0, V &i1) {
+  V r0 = (i0 + i1);
+  V r1 = (i0 - i1);
+  i0 = r0;
+  i1 = r1;
+}
+
+template <class V> void DCT2x2(V &i0, V &i1, V &i2, V &i3) {
+  V r0 = (i0 + i1 + i2 + i3) * 0.25f;
+  V r1 = (i0 - i1 + i2 - i3) * 0.25f;
+  V r2 = (i0 + i1 - i2 - i3) * 0.25f;
+  V r3 = (i0 - i1 - i2 + i3) * 0.25f;
+  i0 = r0;
+  i1 = r1;
+  i2 = r2;
+  i3 = r3;
+}
+
+template <class V> void IDCT2x2(V &i0, V &i1, V &i2, V &i3) {
+  V r0 = i0 + i1 + i2 + i3;
+  V r1 = i0 - i1 + i2 - i3;
+  V r2 = i0 + i1 - i2 - i3;
+  V r3 = i0 - i1 - i2 + i3;
+  i0 = r0;
+  i1 = r1;
+  i2 = r2;
+  i3 = r3;
+}
+
+template <class V> void DCT3(V &i0, V &i1, V &i2) {
+  V r0 = (i0 + i1 + i2) * (1 / 3.0f);
+  V r1 = (i0 - i2) * 0.57735029;
+  V r2 = (i0 + i2) * (1 / 3.0f) - i1 * (2 / 3.0f);
+  i0 = r0;
+  i1 = r1;
+  i2 = r2;
+}
+
+template <class V> void IDCT3(V &i0, V &i1, V &i2) {
+  V t0 = i0;
+  V t1 = i1 * 0.86602540f;
+  V t2a = i2 * 0.5f;
+  V t2b = i2;
+  i0 = t0 + t2a + t1;
+  i1 = t0 - t2b;
+  i2 = t0 + t2a - t1;
+}
+
+template <class V> void DCT4(V &i0, V &i1, V &i2, V &i3) {
+  static const V c2_8 = 1.414213562373095048f; // 2 * cos(2 * pi / 8)
+  V t0 = i0 + i3;
+  V t1 = i1 + i2;
+  V t2 = i0 - i3;
+  V t3 = i1 - i2;
+  V t4 = t0 + t1;
+  V t5 = t0 - t1;
+  V t6 = t2 - t3;
+  V t7 = t3 * c2_8;
+  V t8 = t6 + t7;
+  V t9 = t6 - t7;
+  i0 = t4 * (0.5f / 2);
+  i1 = t8 * (0.653281482438188264f / 2);
+  i2 = t5 * (0.5f / 2);
+  i3 = t9 * (0.270598050073098492f / 2);
+}
+
+template <class V> void IDCT4(V &i0, V &i1, V &i2, V &i3) {
+  static const V c2_8 = 0.7071067811865475244f; // 0.5 / cos(2 * pi / 8)
+  i0 *= (0.5f * 2);
+  i1 *= (0.382683432365089772f * 2);
+  i2 *= (0.5f * 2);
+  i3 *= (0.923879532511286756f * 2);
+  V t0 = i0 + i2;
+  V t1 = i0 - i2;
+  V t2 = i1 + i3;
+  V t3 = i1 - i3;
+  V t4 = t3 * c2_8;
+  V t5 = t2 + t4;
+  V t6 = t0 + t5;
+  V t7 = t1 + t4;
+  V t8 = t0 - t5;
+  V t9 = t1 - t4;
+  i0 = t6;
+  i1 = t7;
+  i2 = t9;
+  i3 = t8;
+}
+
+template <class V> void DCT6(V &i0, V &i1, V &i2, V &i3, V &i4, V &i5) {
+  V t0 = (i1 - i4) * 0.23570227;
+  V r0 = (i0 + i1 + i2 + i3 + i4 + i5) * (1 / 6.0f);
+  V r1 = (i0 - i5) * 0.32197529 + t0 + (i2 - i3) * 0.08627302;
+  V r2 = (i0 - i2 - i3 + i5) * 0.28867514;
+  V r3 = (i0 - i1 - i2 + i3 + i4 - i5) * 0.23570227;
+  V r4 = (i0 + i2 + i3 + i5) * (1 / 6.0f) - (i1 + i4) * (1 / 3.0f);
+  V r5 = (i0 - i5) * 0.08627302 - t0 + (i2 - i3) * 0.32197529;
+  i0 = r0;
+  i1 = r1;
+  i2 = r2;
+  i3 = r3;
+  i4 = r4;
+  i5 = r5;
+}
+
+template <class V> void IDCT6(V &i0, V &i1, V &i2, V &i3, V &i4, V &i5) {
+  // TODO(lode): maybe more multiplies can be removed by combining some terms.
+  V i0a = i0;
+  V i1a = i1 * 0.96592583;
+  V i1b = i1 * 0.70710678;
+  V i1c = i1 * 0.25881905;
+  V i2a = i2 * 0.86602540;
+  V i3a = i3 * 0.70710678;
+  V i4a = i4 * 0.5;
+  V i4b = i4;
+  V i5a = i5 * 0.25881905;
+  V i5b = i5 * 0.70710678;
+  V i5c = i5 * 0.96592583;
+  i0 = i0a + i1a + i3a + i4a + i5a + i2a;
+  i1 = i0a + i1b - i3a - i4b - i5b;
+  i2 = i0a + i1c - i3a + i4a + i5c - i2a;
+  i3 = i0a - i1c + i3a + i4a - i5c - i2a;
+  i4 = i0a - i1b + i3a - i4b + i5b;
+  i5 = i0a - i1a - i3a + i4a - i5a + i2a;
+}
+
+template <class V>
+static void DCT8(V &i0, V &i1, V &i2, V &i3, V &i4, V &i5, V &i6, V &i7) {
+  static const V c1 = 0.707106781186548f; // 1 / sqrt(2)
+  static const V c2 = 0.382683432365090f; // cos(3 * pi / 8)
+  static const V c3 = 1.30656296487638f;  // 1 / (2 * cos(3 * pi / 8))
+  static const V c4 = 0.541196100146197f; // sqrt(2) * cos(3 * pi / 8)
+  const V t00 = i0 + i7;
+  const V t01 = i0 - i7;
+  const V t02 = i3 + i4;
+  const V t03 = i3 - i4;
+  const V t04 = i2 + i5;
+  const V t05 = i2 - i5;
+  const V t06 = i1 + i6;
+  const V t07 = i1 - i6;
+  const V t08 = t00 + t02;
+  const V t09 = t00 - t02;
+  const V t10 = t06 + t04;
+  const V t11 = t06 - t04;
+  const V t12 = t07 + t05;
+  const V t13 = t01 + t07;
+  const V t14 = t05 + t03;
+  const V t15 = t11 + t09;
+  const V t16 = t14 - t13;
+  const V t17 = c1 * t15;
+  const V t18 = c1 * t12;
+  const V t19 = c2 * t16;
+  const V t20 = t01 + t18;
+  const V t21 = t01 - t18;
+  const V t22 = c3 * t13 + t19;
+  const V t23 = c4 * t14 + t19;
+  i0 = (t08 + t10) * (0.353553390593273762f / 2.8284271247461903f);
+  i1 = (t20 + t22) * (0.254897789552079584f / 2.8284271247461903f);
+  i2 = (t09 + t17) * (0.270598050073098492f / 2.8284271247461903f);
+  i3 = (t21 - t23) * (0.30067244346752264f / 2.8284271247461903f);
+  i4 = (t08 - t10) * (0.353553390593273762f / 2.8284271247461903f);
+  i5 = (t21 + t23) * (0.449988111568207852f / 2.8284271247461903f);
+  i6 = (t09 - t17) * (0.653281482438188264f / 2.8284271247461903f);
+  i7 = (t20 - t22) * (1.28145772387075309f / 2.8284271247461903f);
+}
+
+template <class V>
+static void IDCT8(V &i0, V &i1, V &i2, V &i3, V &i4, V &i5, V &i6, V &i7) {
+  static const V c1 = 1.41421356237310; // sqrt(2)
+  static const V c2 = 2.61312592975275; // 1 / cos(3 * pi / 8)
+  static const V c3 = 0.76536686473018; // 2 * cos(3 * pi / 8)
+  static const V c4 = 1.08239220029239; // 2 * sqrt(2) * cos(3 * pi / 8)
+  i0 *= (0.353553390593273762 * 2.8284271247461903f);
+  i1 *= (0.490392640201615225 * 2.8284271247461903f);
+  i2 *= (0.461939766255643378 * 2.8284271247461903f);
+  i3 *= (0.415734806151272619 * 2.8284271247461903f);
+  i4 *= (0.353553390593273762 * 2.8284271247461903f);
+  i5 *= (0.277785116509801112 * 2.8284271247461903f);
+  i6 *= (0.191341716182544886 * 2.8284271247461903f);
+  i7 *= (0.0975451610080641339 * 2.8284271247461903f);
+  const V t00 = i0 + i4;
+  const V t01 = i0 - i4;
+  const V t02 = i6 + i2;
+  const V t03 = i6 - i2;
+  const V t04 = i7 + i1;
+  const V t05 = i7 - i1;
+  const V t06 = i5 + i3;
+  const V t07 = i5 - i3;
+  const V t08 = t04 + t06;
+  const V t09 = t04 - t06;
+  const V t10 = t00 + t02;
+  const V t11 = t00 - t02;
+  const V t12 = t07 - t05;
+  const V t13 = c3 * t12;
+  const V t14 = c1 * t03 + t02;
+  const V t15 = t01 - t14;
+  const V t16 = t01 + t14;
+  const V t17 = c2 * t05 + t13;
+  const V t18 = c4 * t07 + t13;
+  const V t19 = t08 + t17;
+  const V t20 = c1 * t09 + t19;
+  const V t21 = t18 - t20;
+  i0 = t10 + t08;
+  i1 = t15 - t19;
+  i2 = t16 + t20;
+  i3 = t11 + t21;
+  i4 = t11 - t21;
+  i5 = t16 - t20;
+  i6 = t15 + t19;
+  i7 = t10 - t08;
+}
+
+// True if we should try to find a non-trivial AC strategy.
+const constexpr bool kChooseAcStrategy = true;
+
+// Returns the value such that ComputeTransposedScaledDCT<N>() of a block with
+// this value in position (x, y) and 0s everywhere else will have the average of
+// absolute values of 1.
+template <size_t N> constexpr float DCTTotalScale(size_t x, size_t y) {
+  return N * DCTScales<N>()[x] * DCTScales<N>()[y] * L1NormInv<N>()[x] *
+         L1NormInv<N>()[y];
+}
+template <size_t N> constexpr float DCTInvTotalScale(size_t x, size_t y) {
+  return N * IDCTScales<N>()[x] * IDCTScales<N>()[y] * L1Norm<N>()[x] *
+         L1Norm<N>()[y];
+}
+
+// Computes the lowest-frequency LFxLF-sized square in output, which is a
+// DCTN-sized DCT block, by doing a NxN DCT on the input block.
+template <size_t DCTN, size_t LF, size_t N>
+SIMD_ATTR PIK_INLINE void
+ReinterpretingDCT(const float *input, const size_t input_stride, float *output,
+                  const size_t output_stride) {
+  static_assert(LF == N,
+                "ReinterpretingDCT should only be called with LF == N");
+  SIMD_ALIGN float block[N * N] = {};
+  for (size_t y = 0; y < N; y++) {
+    for (size_t x = 0; x < N; x++) {
+      block[y * N + x] = input[y * input_stride + x];
+    }
+  }
+  ComputeTransposedScaledDCT<N>()(FromBlock<N>(block), ScaleToBlock<N>(block));
+  for (size_t y = 0; y < LF; y++) {
+    for (size_t x = 0; x < LF; x++) {
+      output[y * output_stride + x] = block[y * N + x] *
+                                      DCTTotalScale<N>(x, y) *
+                                      DCTInvTotalScale<DCTN>(x, y);
+      //std::cout<<"dct: y="<<y<<" x="<<x<<" dc="<<input[y*input_stride+x]<<" out="<<block[y*N+x]<<" dc_stride="<<input_stride
+      //  	  <<" scale="<<DCTTotalScale<N>(x, y) * DCTInvTotalScale<DCTN>(x, y)<<std::endl;
+    }
+  }
+}
+
+// Inverse of ReinterpretingDCT.
+template <size_t DCTN, size_t LF, size_t N>
+SIMD_ATTR PIK_INLINE void
+ReinterpretingIDCT(const float *input, const size_t input_stride, float *output,
+                   const size_t output_stride) {
+  SIMD_ALIGN float block[N * N] = {};
+  for (size_t y = 0; y < LF; y++) {
+    for (size_t x = 0; x < LF; x++) {
+      block[y * N + x] = input[y * input_stride + x] *
+                         DCTInvTotalScale<N>(x, y) * DCTTotalScale<DCTN>(x, y);
+      //std::cout<<"std_IDCT: id="<<N*y+x<<" value="<<input[y * input_stride + x]<<" scaled="<<block[y * N + x]<<std::endl;
+    }
+  }
+  ComputeTransposedScaledIDCT<N>()(FromBlock<N>(block), ToBlock<N>(block));
+
+  for (size_t y = 0; y < N; y++) {
+    for (size_t x = 0; x < N; x++) {
+      output[y * output_stride + x] = block[y * N + x];
+      //std::cout<<"std_IDCT: id="<<N*y+x<<" idct="<<block[y * N + x]<<std::endl;
+    }
+  }
+}
+
+template <size_t S>
+void DCT2TopBlock(const float *block, size_t stride, float *out) {
+  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
+  static_assert(S % 2 == 0, "S should be even");
+  float temp[kBlockDim * kBlockDim];
+  constexpr size_t num_2x2 = S / 2;
+  for (size_t y = 0; y < num_2x2; y++) {
+    for (size_t x = 0; x < num_2x2; x++) {
+      float c00 = block[y * 2 * stride + x * 2];
+      float c01 = block[y * 2 * stride + x * 2 + 1];
+      float c10 = block[(y * 2 + 1) * stride + x * 2];
+      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
+      float r00 = c00 + c01 + c10 + c11;
+      float r01 = c00 + c01 - c10 - c11;
+      float r10 = c00 - c01 + c10 - c11;
+      float r11 = c00 - c01 - c10 + c11;
+      r00 *= 0.25f;
+      r01 *= 0.25f;
+      r10 *= 0.25f;
+      r11 *= 0.25f;
+      temp[y * kBlockDim + x] = r00;
+      temp[y * kBlockDim + num_2x2 + x] = r01;
+      temp[(y + num_2x2) * kBlockDim + x] = r10;
+      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
+    }
+  }
+  for (size_t y = 0; y < S; y++) {
+    for (size_t x = 0; x < S; x++) {
+      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
+    }
+  }
+}
+
+template <size_t S>
+void IDCT2TopBlock(const float *block, size_t stride_out, float *out) {
+  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
+  static_assert(S % 2 == 0, "S should be even");
+  float temp[kBlockDim * kBlockDim];
+  constexpr size_t num_2x2 = S / 2;
+  for (size_t y = 0; y < num_2x2; y++) {
+    for (size_t x = 0; x < num_2x2; x++) {
+      float c00 = block[y * kBlockDim + x];
+      float c01 = block[y * kBlockDim + num_2x2 + x];
+      float c10 = block[(y + num_2x2) * kBlockDim + x];
+      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
+      float r00 = c00 + c01 + c10 + c11;
+      float r01 = c00 + c01 - c10 - c11;
+      float r10 = c00 - c01 + c10 - c11;
+      float r11 = c00 - c01 - c10 + c11;
+      temp[y * 2 * kBlockDim + x * 2] = r00;
+      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
+      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
+      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
+    }
+  }
+  for (size_t y = 0; y < S; y++) {
+    for (size_t x = 0; x < S; x++) {
+      out[y * stride_out + x] = temp[y * kBlockDim + x];
+    }
+  }
+}
+
+} // namespace
+
+// Macros to index pixels, coefficients or temporary buffer with either x, y
+// coordinates or with a single index. Short name on purpose but undefined
+// below.
+#define C(x, y) coefficients[(y)*kBlockDim + (x)]
+#define P(x, y) pixels[(y)*pixels_stride + (x)]
+#define T(x, y) temp[(y)*8 + (x)]
+#define C1(i) C((i / 8), (i & 7))
+#define P1(i) P((i / 8), (i & 7))
+#define T1(i) temp[(i)]
+#define ARRAYSIZE(a) sizeof(a) / sizeof(*a)
+
+// These definitions are needed before C++17.
+constexpr size_t AcStrategy::kMaxCoeffBlocks;
+constexpr size_t AcStrategy::kMaxBlockDim;
+constexpr size_t AcStrategy::kMaxCoeffArea;
+constexpr size_t AcStrategy::kLLFMaskDim;
+
+// Define hardcoded tables for the specific 45 degree case of the diagonal
+// lines experiment.
+
+// Indices of the 4 groups of pixels for the 4 DC's for the diagonal lines
+// strategy.
+static const size_t kLinesDc00indices[] = {0,  8,  9,  16, 17, 18, 25,
+                                           26, 27, 34, 35, 36, 43, 44,
+                                           45, 52, 53, 54, 61, 62, 63};
+static const size_t kLinesDc01indices[] = {24, 32, 33, 40, 41, 42, 48, 49,
+                                           50, 51, 56, 57, 58, 59, 60};
+static const size_t kLinesDc10indices[] = {3,  4,  5,  6,  7,  12, 13, 14,
+                                           15, 21, 22, 23, 30, 31, 39};
+static const size_t kLinesDc11indices[] = {1,  2,  10, 11, 19, 20, 28,
+                                           29, 37, 38, 46, 47, 55};
+static const size_t kLinesNumDc00 = ARRAYSIZE(kLinesDc00indices);
+static const size_t kLinesNumDc01 = ARRAYSIZE(kLinesDc01indices);
+static const size_t kLinesNumDc10 = ARRAYSIZE(kLinesDc10indices);
+static const size_t kLinesNumDc11 = ARRAYSIZE(kLinesDc11indices);
+
+// Pixel indices of the different diagonal DCT's used in 8x8 block for the
+// diagonal lines strategy.
+static const size_t kLinesDct3indices[][3] = {
+    {6, 7, 15}, {5, 14, 23}, {40, 49, 58}, {48, 56, 57}};
+static const size_t kLinesDct4indices[][4] = {
+    {4, 13, 22, 31}, {3, 12, 30, 39}, {24, 33, 51, 60}, {32, 41, 50, 59}};
+static const size_t kLinesDct6indices[][6] = {{2, 11, 20, 29, 38, 47},
+                                              {1, 10, 19, 37, 46, 55},
+                                              {8, 17, 26, 44, 53, 62},
+                                              {16, 25, 34, 43, 52, 61}};
+static const size_t kLinesDct8indices[][8] = {{0, 9, 18, 27, 36, 45, 54, 63}};
+static const size_t kLinesNumDct3 = ARRAYSIZE(kLinesDct3indices);
+static const size_t kLinesNumDct4 = ARRAYSIZE(kLinesDct4indices);
+static const size_t kLinesNumDct6 = ARRAYSIZE(kLinesDct6indices);
+static const size_t kLinesNumDct8 = ARRAYSIZE(kLinesDct8indices);
+
+// Coefficient indices of the different diagonal DCT's used in 8x8 block for the
+// diagonal lines strategy.
+static const size_t kLinesDctC3indices[][3] = {
+    {6, 7, 15}, {5, 14, 23}, {40, 49, 58}, {48, 56, 57}};
+static const size_t kLinesDctC4indices[][4] = {
+    {4, 13, 22, 31}, {3, 12, 21, 30}, {24, 33, 42, 51}, {32, 41, 50, 59}};
+static const size_t kLinesDctC6indices[][6] = {{2, 11, 20, 29, 38, 47},
+                                               {10, 19, 28, 37, 46, 55},
+                                               {17, 26, 35, 44, 53, 62},
+                                               {16, 25, 34, 43, 52, 61}};
+static const size_t kLinesDctC8indices[][8] = {
+    {18, 27, 36, 45, 54, 39, 60, 63}};
+
+// Computes and returns DC, and also subtracts it from the corresponding pixels.
+static float ComputeDCPart(float *pixels, size_t pixels_stride,
+                           const size_t *indices, size_t num) {
+  float dc = 0;
+  for (size_t i = 0; i < num; i++) {
+    dc += P1(indices[i]);
+  }
+  dc /= num;
+  for (size_t i = 0; i < num; i++) {
+    P1(indices[i]) -= dc;
+  }
+  return dc;
+}
+
+static void RestoreDCPart(float *pixels, size_t pixels_stride,
+                          const size_t *indices, size_t num, float dc) {
+  for (size_t i = 0; i < num; i++) {
+    P1(indices[i]) += dc;
+  }
+}
+
+// Does the diagonal DCT's of size N as defined by the corresponding pixel and
+// coefficient index arrays, for the diagonal lines strategy.
+template <size_t N>
+static void DoDCTs(const size_t indices_p[][N], const size_t indices_c[][N],
+                   size_t num, const float *pixels, size_t pixels_stride,
+                   float *coefficients) {
+  for (size_t i = 0; i < num; i++) {
+    for (size_t j = 0; j < N; j++) {
+      C1(indices_c[i][j]) = P1(indices_p[i][j]);
+    }
+    // C++ has no static_if, so gives error when trying to index indices_c
+    // directly, but turning it into a pointer fixes it.
+    const size_t *indices = &indices_c[i][0];
+    // Nothing to do for N == 1.
+    if (N == 2) {
+      DCT2(C1(indices[0]), C1(indices[1]));
+    }
+    if (N == 3) {
+      DCT3(C1(indices[0]), C1(indices[1]), C1(indices[2]));
+    }
+    if (N == 4) {
+      DCT4(C1(indices[0]), C1(indices[1]), C1(indices[2]), C1(indices[3]));
+    }
+    if (N == 6) {
+      DCT6(C1(indices[0]), C1(indices[1]), C1(indices[2]), C1(indices[3]),
+           C1(indices[4]), C1(indices[5]));
+    }
+    if (N == 8) {
+      DCT8(C1(indices[0]), C1(indices[1]), C1(indices[2]), C1(indices[3]),
+           C1(indices[4]), C1(indices[5]), C1(indices[6]), C1(indices[7]));
+    }
+  }
+}
+
+template <size_t N>
+static void DoIDCTs(const size_t indices_p[][N], const size_t indices_c[][N],
+                    size_t num, float *pixels, size_t pixels_stride,
+                    const float *coefficients) {
+  for (size_t i = 0; i < num; i++) {
+    for (size_t j = 0; j < N; j++) {
+      P1(indices_p[i][j]) = C1(indices_c[i][j]);
+    }
+    const size_t *indices = &indices_p[i][0];
+    // Nothing to do for N == 1.
+    if (N == 2) {
+      IDCT2(P1(indices[0]), P1(indices[1]));
+    }
+    if (N == 3) {
+      IDCT3(P1(indices[0]), P1(indices[1]), P1(indices[2]));
+    }
+    if (N == 4) {
+      IDCT4(P1(indices[0]), P1(indices[1]), P1(indices[2]), P1(indices[3]));
+    }
+    if (N == 6) {
+      IDCT6(P1(indices[0]), P1(indices[1]), P1(indices[2]), P1(indices[3]),
+            P1(indices[4]), P1(indices[5]));
+    }
+    if (N == 8) {
+      IDCT8(P1(indices[0]), P1(indices[1]), P1(indices[2]), P1(indices[3]),
+            P1(indices[4]), P1(indices[5]), P1(indices[6]), P1(indices[7]));
+    }
+  }
+}
+
+SIMD_ATTR void AcStrategy::TransformFromPixels(
+    const float *PIK_RESTRICT pixels, size_t pixels_stride,
+    float *PIK_RESTRICT coefficients, size_t coefficients_stride) const {
+
+  if (block_ != 0)
+    return;
+  switch (strategy_) {
+  case Type::LINES: {
+    SIMD_ALIGN float temp[kBlockDim * kBlockDim];
+
+    for (size_t y = 0; y < 8; y++) {
+      for (size_t x = 0; x < 8; x++) {
+        C(x, y) = 0;
+      }
+    }
+
+    for (size_t y = 0; y < 8; y++) {
+      for (size_t x = 0; x < 8; x++) {
+        T(x, y) = P(x, y);
+      }
+    }
+
+    float dc00 = ComputeDCPart(temp, 8, kLinesDc00indices, kLinesNumDc00);
+    float dc01 = ComputeDCPart(temp, 8, kLinesDc01indices, kLinesNumDc01);
+    float dc10 = ComputeDCPart(temp, 8, kLinesDc10indices, kLinesNumDc10);
+    float dc11 = ComputeDCPart(temp, 8, kLinesDc11indices, kLinesNumDc11);
+    DCT2x2(dc00, dc01, dc10, dc11);
+
+    C(0, 0) = dc00;
+    C(0, 1) = dc01;
+    C(1, 0) = dc10;
+    C(1, 1) = dc11;
+
+    DoDCTs<3>(kLinesDct3indices, kLinesDctC3indices, kLinesNumDct3, temp, 8,
+              coefficients);
+    DoDCTs<4>(kLinesDct4indices, kLinesDctC4indices, kLinesNumDct4, temp, 8,
+              coefficients);
+    DoDCTs<6>(kLinesDct6indices, kLinesDctC6indices, kLinesNumDct6, temp, 8,
+              coefficients);
+    DoDCTs<8>(kLinesDct8indices, kLinesDctC8indices, kLinesNumDct8, temp, 8,
+              coefficients);
+    break;
+  }
+  case Type::IDENTITY: {
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        float block_dc = 0;
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 4; ix++) {
+            block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
+          }
+        }
+        block_dc *= 1.0f / 16;
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 4; ix++) {
+            if (ix == 1 && iy == 1)
+              continue;
+            coefficients[(y + iy * 2) * 8 + x + ix * 2] =
+                pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
+                pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
+          }
+        }
+        coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
+        coefficients[y * 8 + x] = block_dc;
+      }
+    }
+    float block00 = coefficients[0];
+    float block01 = coefficients[1];
+    float block10 = coefficients[8];
+    float block11 = coefficients[9];
+    coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
+    coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
+    coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
+    coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
+    break;
+  }
+  case Type::DCT4X4_NOHF:
+  case Type::DCT4X4: {
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        float block[4 * 4];
+        ComputeTransposedScaledDCT<4>()(
+            FromLines<4>(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
+            ScaleToBlock<4>(block));
+
+        //for(int k=0;k<16;k++)
+        //std::cout<<"std_dct4_before: by="<<y<<" bx="<<x<<" "<<block[k]<<std::endl;
+
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 4; ix++) {
+            coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
+          }
+        }
+      }
+    }
+    float block00 = coefficients[0];
+    float block01 = coefficients[1];
+    float block10 = coefficients[8];
+    float block11 = coefficients[9];
+
+    coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
+    coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
+    coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
+    coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
+
+    //for(int k=0;k<64;k++)
+      //std::cout<<"std_dct4_after: id="<<k<<" "<<coefficients[k]<<std::endl;
+
+    break;
+  }
+  case Type::DCT2X2: {
+    DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
+    DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
+    DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
+    break;
+  }
+  case Type::DCT16X16: {
+    // TODO(veluca): Generalize ScaleToBlock and related classes to handle
+    // non-contiguous blocks.
+    SIMD_ALIGN float output[4 * kBlockDim * kBlockDim];
+    ComputeTransposedScaledDCT<2 * kBlockDim>()(
+        FromLines<2 * kBlockDim>(pixels, pixels_stride),
+        ScaleToBlock<2 * kBlockDim>(output));
+
+    //for(int k=0;k<256;k++)
+      //std::cout<<"std_dct16: k="<<k<<" out="<<output[k]<<std::endl;
+/*
+    std::cout<<"dct16"<<std::endl;
+    for(int by=0;by<2;by++){
+    	for(int bx=0;bx<2;bx++){
+    		for(int y=0;y<8;y++){
+    			for(int x=0;x<8;x++){
+    				std::cout<<output[by*128+y*16+bx*8+x]<<",";
+    			}
+    		}
+    		std::cout<<std::endl;
+    	}
+    }
+
+    std::cout<<"dct16_orig"<<std::endl;
+    for(int by=0;by<16;by++){
+    	for(int bx=0;bx<16;bx++){
+    	    std::cout<<output[by*16+bx]<<",";
+    	}
+    	std::cout<<std::endl;
+    }
+
+    std::cout<<"coefficients_stride="<<coefficients_stride<<std::endl;
+*/
+    for (size_t i = 0; i < 2; i++) {
+      memcpy(coefficients + coefficients_stride * i,
+             output + 2 * kBlockDim * kBlockDim * i,
+             sizeof(float) * 2 * kBlockDim * kBlockDim);
+    }
+    break;
+  }
+  case Type::DCT32X32: {
+    // TODO(veluca): Generalize ScaleToBlock and related classes to handle
+    // non-contiguous blocks.
+    SIMD_ALIGN float output[16 * kBlockDim * kBlockDim];
+    ComputeTransposedScaledDCT<4 * kBlockDim>()(
+        FromLines<4 * kBlockDim>(pixels, pixels_stride),
+        ScaleToBlock<4 * kBlockDim>(output));
+    for (size_t i = 0; i < 4; i++) {
+      memcpy(coefficients + coefficients_stride * i,
+             output + 4 * kBlockDim * kBlockDim * i,
+             sizeof(float) * 4 * kBlockDim * kBlockDim);
+    }
+    break;
+  }
+  case Type::DCT_NOHF:
+  case Type::DCT: {
+    ComputeTransposedScaledDCT<kBlockDim>()(
+        FromLines<kBlockDim>(pixels, pixels_stride),
+        ScaleToBlock<kBlockDim>(coefficients));
+    break;
+  }
+  }
+}
+
+SIMD_ATTR void AcStrategy::TransformToPixels(const float *coefficients,
+                                             size_t coefficients_stride,
+                                             float *pixels,
+                                             size_t pixels_stride) const {
+  if (block_ != 0)
+    return;
+  switch (strategy_) {
+  case Type::LINES: {
+    SIMD_ALIGN float temp[kBlockDim * kBlockDim];
+
+    for (size_t y = 0; y < 8; y++) {
+      for (size_t x = 0; x < 8; x++) {
+        T(x, y) = 0;
+      }
+    }
+
+    DoIDCTs<3>(kLinesDct3indices, kLinesDctC3indices, kLinesNumDct3, temp, 8,
+               coefficients);
+    DoIDCTs<4>(kLinesDct4indices, kLinesDctC4indices, kLinesNumDct4, temp, 8,
+               coefficients);
+    DoIDCTs<6>(kLinesDct6indices, kLinesDctC6indices, kLinesNumDct6, temp, 8,
+               coefficients);
+    DoIDCTs<8>(kLinesDct8indices, kLinesDctC8indices, kLinesNumDct8, temp, 8,
+               coefficients);
+
+    float dc00 = C(0, 0);
+    float dc01 = C(0, 1);
+    float dc10 = C(1, 0);
+    float dc11 = C(1, 1);
+
+    IDCT2x2(dc00, dc01, dc10, dc11);
+
+    RestoreDCPart(temp, 8, kLinesDc00indices, kLinesNumDc00, dc00);
+    RestoreDCPart(temp, 8, kLinesDc01indices, kLinesNumDc01, dc01);
+    RestoreDCPart(temp, 8, kLinesDc10indices, kLinesNumDc10, dc10);
+    RestoreDCPart(temp, 8, kLinesDc11indices, kLinesNumDc11, dc11);
+
+    // 4 pixels were not filled in, interpolate them here
+    // TODO(lode): use bicubic interpolation, and support this in the general
+    // case of working at any angle and fitting any size to any size.
+    T1(21) = (T1(12) + T1(30)) * 0.5f;
+    T1(28) = (T1(19) + T1(37)) * 0.5f;
+    T1(35) = (T1(26) + T1(44)) * 0.5f;
+    T1(42) = (T1(33) + T1(51)) * 0.5f;
+
+    for (size_t y = 0; y < 8; y++) {
+      for (size_t x = 0; x < 8; x++) {
+        P(x, y) = T(x, y);
+      }
+    }
+    break;
+  }
+  case Type::IDENTITY: {
+    float dcs[4] = {};
+    float block00 = coefficients[0];
+    float block01 = coefficients[1];
+    float block10 = coefficients[8];
+    float block11 = coefficients[9];
+    dcs[0] = block00 + block01 + block10 + block11;
+    dcs[1] = block00 + block01 - block10 - block11;
+    dcs[2] = block00 - block01 + block10 - block11;
+    dcs[3] = block00 - block01 - block10 + block11;
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        float block_dc = dcs[y * 2 + x];
+        float residual_sum = 0;
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 4; ix++) {
+            if (ix == 0 && iy == 0)
+              continue;
+            residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
+          }
+        }
+        pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
+            block_dc - residual_sum * (1.0f / 16);
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 4; ix++) {
+            if (ix == 1 && iy == 1)
+              continue;
+            pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
+                coefficients[(y + iy * 2) * 8 + x + ix * 2] +
+                pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
+          }
+        }
+        pixels[y * 4 * pixels_stride + x * 4] =
+            coefficients[(y + 2) * 8 + x + 2] +
+            pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
+      }
+    }
+    break;
+  }
+  case Type::DCT4X4_NOHF:
+  case Type::DCT4X4: {
+    float dcs[4] = {};
+    float block00 = coefficients[0];
+    float block01 = coefficients[1];
+    float block10 = coefficients[8];
+    float block11 = coefficients[9];
+    dcs[0] = block00 + block01 + block10 + block11;
+    dcs[1] = block00 + block01 - block10 - block11;
+    dcs[2] = block00 - block01 + block10 - block11;
+    dcs[3] = block00 - block01 - block10 + block11;
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        float block[4 * 4];
+        block[0] = dcs[y * 2 + x];
+        for (size_t iy = 0; iy < 4; iy++) {
+          for (size_t ix = 0; ix < 4; ix++) {
+            if (ix == 0 && iy == 0)
+              continue;
+            block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
+          }
+        }
+        ComputeTransposedScaledIDCT<4>()(
+            FromBlock<4>(block),
+            ToLines<4>(pixels + y * 4 * pixels_stride + x * 4, pixels_stride));
+      }
+    }
+    break;
+  }
+  case Type::DCT2X2: {
+    SIMD_ALIGN float coeffs[kBlockDim * kBlockDim];
+    memcpy(coeffs, coefficients, sizeof(float) * kBlockDim * kBlockDim);
+    IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
+    IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
+    IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
+    for (size_t y = 0; y < kBlockDim; y++) {
+      for (size_t x = 0; x < kBlockDim; x++) {
+        pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
+      }
+    }
+    break;
+  }
+  case Type::DCT16X16: {
+    // TODO(veluca): Generalize ScaleToBlock and related classes to handle
+    // non-contiguous blocks.
+    SIMD_ALIGN float input[16 * kBlockDim * kBlockDim];
+    for (size_t i = 0; i < 2; i++) {
+      memcpy(input + 2 * kBlockDim * kBlockDim * i,
+             coefficients + coefficients_stride * i,
+             sizeof(float) * 2 * kBlockDim * kBlockDim);
+    }
+    ComputeTransposedScaledIDCT<2 * kBlockDim>()(
+        FromBlock<2 * kBlockDim>(input),
+        ToLines<2 * kBlockDim>(pixels, pixels_stride));
+    break;
+  }
+  case Type::DCT32X32: {
+    // TODO(veluca): Generalize ScaleToBlock and related classes to handle
+    // non-contiguous blocks.
+    SIMD_ALIGN float input[16 * kBlockDim * kBlockDim];
+    for (size_t i = 0; i < 4; i++) {
+      memcpy(input + 4 * kBlockDim * kBlockDim * i,
+             coefficients + coefficients_stride * i,
+             sizeof(float) * 4 * kBlockDim * kBlockDim);
+    }
+    ComputeTransposedScaledIDCT<4 * kBlockDim>()(
+        FromBlock<4 * kBlockDim>(input),
+        ToLines<4 * kBlockDim>(pixels, pixels_stride));
+    break;
+  }
+  case Type::DCT_NOHF:
+  case Type::DCT: {
+    ComputeTransposedScaledIDCT<kBlockDim>()(
+        FromBlock<kBlockDim>(coefficients),
+        ToLines<kBlockDim>(pixels, pixels_stride));
+    break;
+  }
+  }
+}
+
+#undef ARRAYSIZE
+#undef C
+#undef T
+#undef P
+#undef C1
+#undef T1
+#undef P1
+
+SIMD_ATTR void AcStrategy::LowestFrequenciesFromDC(const float *PIK_RESTRICT dc,
+                                                   size_t dc_stride, float *llf,
+                                                   size_t llf_stride) const {
+  if (block_)
+    return;
+  switch (strategy_) {
+  case Type::DCT_NOHF:
+  case Type::DCT:
+  case Type::LINES: {
+    llf[0] = dc[0];
+    std::cout<<"std_dc_:"<<std::setprecision(8)<<dc[0]<<std::endl;
+    break;
+  }
+  case Type::DCT16X16: {
+    float tmp[4] = {};
+    ReinterpretingDCT<2 * kBlockDim, 2, 2>(dc, dc_stride, tmp, 2);
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        llf[y * llf_stride + x] = tmp[y * 2 + x];
+        //std::cout<<"dct16: y="<<y<<" x="<<x<<" dc="<<dc[y*dc_stride+x]<<" out="<<tmp[y*2+x]<<" dc_stride="<<dc_stride<<std::endl;
+      }
+    }
+    break;
+  }
+  case Type::DCT32X32: {
+    float tmp[16] = {};
+    ReinterpretingDCT<4 * kBlockDim, 4, 4>(dc, dc_stride, tmp, 4);
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        llf[y * llf_stride + x] = tmp[y * 4 + x];
+        std::cout<<"dct32: y="<<y<<" x="<<x<<" dc="<<dc[y*dc_stride+x]<<" out="<<tmp[y*4+x]<<" dc_stride="<<dc_stride<<std::endl;
+      }
+    }
+    break;
+  }
+  case Type::DCT2X2:
+  case Type::DCT4X4_NOHF:
+  case Type::DCT4X4:
+  case Type::IDENTITY:
+    llf[0] = dc[0];
+    break;
+  };
+}
+
+SIMD_ATTR void
+AcStrategy::DCFromLowestFrequencies(const float *PIK_RESTRICT block,
+                                    size_t block_stride, float *dc,
+                                    size_t dc_stride) const {
+  if (block_)
+    return;
+  switch (strategy_) {
+  case Type::DCT_NOHF:
+  case Type::DCT:
+  case Type::LINES:
+    dc[0] = block[0];
+    break;
+  case Type::DCT16X16: {
+    float dest[4] = {};
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        dest[2 * y + x] = block[2 * kBlockDim * y + x];
+        //std::cout<<"std_IDCT: id="<<2*y+x<<" value="<<dest[2*y+x]<<std::endl;
+      }
+    }
+    ReinterpretingIDCT<2 * kBlockDim, 2, 2>(dest, 2, dc, dc_stride);
+    break;
+  }
+  case Type::DCT32X32: {
+    float dest[16] = {};
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        dest[4 * y + x] = block[4 * kBlockDim * y + x];
+      }
+    }
+    ReinterpretingIDCT<4 * kBlockDim, 4, 4>(dest, 4, dc, dc_stride);
+    break;
+  }
+  case Type::DCT2X2:
+  case Type::DCT4X4:
+  case Type::DCT4X4_NOHF:
+  case Type::IDENTITY:
+    dc[0] = block[0];
+    break;
+  }
+}
+
+SIMD_ATTR void AcStrategy::DC2x2FromLowestFrequencies(
+    const float *PIK_RESTRICT llf, size_t llf_stride, float *PIK_RESTRICT dc2x2,
+    size_t dc2x2_stride) const {
+  if (block_)
+    return;
+  constexpr size_t N = kBlockDim;
+  switch (strategy_) {
+  case Type::DCT_NOHF:
+  case Type::DCT:
+  case Type::LINES: {
+    ReinterpretingIDCT<N, 1, 2>(llf, 0, dc2x2, dc2x2_stride);
+    break;
+  }
+  case Type::DCT16X16: {
+    float dest[16] = {};
+    dest[0] = llf[0];
+    dest[1] = llf[1];
+    dest[4] = llf[llf_stride];
+    dest[5] = llf[llf_stride + 1];
+    ReinterpretingIDCT<2 * N, 2, 4>(dest, 4, dc2x2, dc2x2_stride);
+    break;
+  }
+  case Type::DCT32X32: {
+    float dest[64] = {};
+    for (size_t iy = 0; iy < 4; iy++) {
+      for (size_t ix = 0; ix < 4; ix++) {
+        dest[iy * 8 + ix] = llf[iy * llf_stride + ix];
+      }
+    }
+    ReinterpretingIDCT<4 * N, 4, 8>(dest, 8, dc2x2, dc2x2_stride);
+    break;
+  }
+  case Type::DCT2X2:
+  case Type::DCT4X4:
+  case Type::DCT4X4_NOHF:
+  case Type::IDENTITY:
+    dc2x2[0] = llf[0];
+    dc2x2[1] = llf[0];
+    dc2x2[dc2x2_stride] = llf[0];
+    dc2x2[dc2x2_stride + 1] = llf[0];
+    break;
+  }
+}
+
+SIMD_ATTR void AcStrategy::DC2x2FromLowFrequencies(const float *block,
+                                                   size_t block_stride,
+                                                   float *dc2x2,
+                                                   size_t dc2x2_stride) const {
+  if (block_)
+    return;
+  switch (strategy_) {
+  case Type::DCT_NOHF:
+  case Type::DCT:
+  case Type::LINES:
+    ReinterpretingIDCT<kBlockDim, 2, 2>(block, kBlockDim, dc2x2, dc2x2_stride);
+    break;
+  case Type::DCT16X16: {
+    float dest[16] = {};
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        dest[4 * y + x] = block[2 * kBlockDim * y + x];
+      }
+    }
+    ReinterpretingIDCT<2 * kBlockDim, 4, 4>(dest, 4, dc2x2, dc2x2_stride);
+    break;
+  }
+  case Type::DCT32X32: {
+    float dest[64] = {};
+    for (size_t y = 0; y < 8; y++) {
+      for (size_t x = 0; x < 8; x++) {
+        dest[8 * y + x] = block[4 * kBlockDim * y + x];
+      }
+    }
+    ReinterpretingIDCT<4 * kBlockDim, 8, 8>(dest, 8, dc2x2, dc2x2_stride);
+    break;
+  }
+  case Type::DCT2X2:
+  case Type::DCT4X4:
+  case Type::DCT4X4_NOHF:
+  case Type::IDENTITY:
+    float block00 = block[0];
+    float block01 = block[1];
+    float block10 = block[kBlockDim];
+    float block11 = block[kBlockDim + 1];
+    dc2x2[0] = block00 + block01 + block10 + block11;
+    dc2x2[1] = block00 + block01 - block10 - block11;
+    dc2x2[dc2x2_stride] = block00 - block01 + block10 - block11;
+    dc2x2[dc2x2_stride + 1] = block00 - block01 - block10 + block11;
+    break;
+  }
+}
+
+SIMD_ATTR void AcStrategy::LowFrequenciesFromDC2x2(const float *dc2x2,
+                                                   size_t dc2x2_stride,
+                                                   float *block,
+                                                   size_t block_stride) const {
+  if (block_)
+    return;
+  switch (strategy_) {
+  case Type::DCT_NOHF:
+  case Type::DCT:
+  case Type::LINES:
+    ReinterpretingDCT<kBlockDim, 2, 2>(dc2x2, dc2x2_stride, block,
+                                       block_stride);
+    break;
+  case Type::DCT16X16: {
+    float dest[16] = {};
+    ReinterpretingDCT<2 * kBlockDim, 4, 4>(dc2x2, dc2x2_stride, dest, 4);
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        block[block_stride * y + x] = dest[y * 4 + x];
+      }
+    }
+    break;
+  }
+  case Type::DCT32X32: {
+    float dest[64] = {};
+    ReinterpretingDCT<4 * kBlockDim, 8, 8>(dc2x2, dc2x2_stride, dest, 8);
+    for (size_t y = 0; y < 8; y++) {
+      for (size_t x = 0; x < 8; x++) {
+        block[block_stride * y + x] = dest[y * 8 + x];
+      }
+    }
+    break;
+  }
+  case Type::DCT2X2:
+  case Type::DCT4X4:
+  case Type::DCT4X4_NOHF:
+  case Type::IDENTITY:
+    float block00 = dc2x2[0];
+    float block01 = dc2x2[1];
+    float block10 = dc2x2[dc2x2_stride];
+    float block11 = dc2x2[dc2x2_stride + 1];
+    block[0] = (block00 + block01 + block10 + block11) * 0.25f;
+    block[1] = (block00 + block01 - block10 - block11) * 0.25f;
+    block[block_stride] = (block00 - block01 + block10 - block11) * 0.25f;
+    block[block_stride + 1] = (block00 - block01 - block10 + block11) * 0.25f;
+  }
+}
+
+void AcStrategyImage::SetFromRaw(const Rect &rect, const ImageB &raw_layers) {
+  PIK_ASSERT(rect.IsInside(layers_));
+  PIK_ASSERT(rect.xsize() <= raw_layers.xsize());
+  PIK_ASSERT(rect.ysize() <= raw_layers.ysize());
+  size_t stride = layers_.PixelsPerRow();
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    uint8_t *PIK_RESTRICT row = rect.Row(&layers_, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      row[x] = INVALID;
+    }
+  }
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    const uint8_t *PIK_RESTRICT row_in = raw_layers.Row(y);
+    uint8_t *PIK_RESTRICT row = rect.Row(&layers_, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      if (row[x] != INVALID)
+        continue;
+      uint8_t raw_strategy = row_in[x];
+#ifdef ADDRESS_SANITIZER
+      PIK_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy));
+#endif
+      AcStrategy acs = AcStrategy::FromRawStrategy(raw_strategy);
+#ifdef ADDRESS_SANITIZER
+      PIK_ASSERT(y + acs.covered_blocks_y() <= rect.ysize());
+      PIK_ASSERT(x + acs.covered_blocks_x() <= rect.xsize());
+#endif
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          row[x + ix + iy * stride] =
+              (raw_strategy << 4) | (iy * acs.covered_blocks_x() + ix);
+        }
+      }
+    }
+  }
+}
+
+void AcStrategyImage::SetFromArray(const Rect &rect, uint32_t data[]) {
+  size_t stride = layers_.PixelsPerRow();
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    uint8_t *PIK_RESTRICT row = rect.Row(&layers_, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      uint8_t raw_strategy = data[y*rect.ysize()+x];
+      row[x + y * stride] = raw_strategy;
+    }
+  }
+}
+
+size_t AcStrategyImage::CountBlocks(AcStrategy::Type type) const {
+  size_t ret = 0;
+  for (size_t y = 0; y < layers_.ysize(); y++) {
+    const uint8_t *PIK_RESTRICT row = layers_.ConstRow(y);
+    for (size_t x = 0; x < layers_.xsize(); x++) {
+      if (row[x] == (static_cast<uint8_t>(type) << 4))
+        ret++;
+    }
+  }
+  return ret;
+}
+
+SIMD_ATTR void FindBestAcStrategy(float butteraugli_target,
+                                  const ImageF *quant_field,
+                                  const DequantMatrices &dequant,
+                                  const Image3F &src, ThreadPool *pool,
+                                  AcStrategyImage *ac_strategy,
+                                  PikInfo *aux_out) {
+  // TODO(veluca): this function does *NOT* know the actual quantization field
+  // values, and thus is not able to make choices taking into account the actual
+  // quantization matrix.
+  PROFILER_FUNC;
+  size_t xsize_blocks = src.xsize() / kBlockDim;
+  size_t ysize_blocks = src.ysize() / kBlockDim;
+  Image3F coeffs = Image3F(xsize_blocks * kBlockDim * kBlockDim, ysize_blocks);
+  TransposedScaledDCT(src, &coeffs);
+  *ac_strategy = AcStrategyImage(xsize_blocks, ysize_blocks);
+  if (!kChooseAcStrategy) {
+    return;
+  }
+  std::vector<bool> disable_dct16(xsize_blocks * ysize_blocks);
+  std::vector<bool> disable_dct32(xsize_blocks * ysize_blocks);
+  const auto disable_large_transforms = [&](int bx, int by) SIMD_ATTR {
+    // If we find a well-fitting DCT4x4 within the larger block,
+    // we disable the larger block.
+    {
+      std::vector<float> blockval(4);
+      // 4x4 DCT needs less focus on B channel, since at that resolution
+      // blue needs to be correct only by average.
+      static const float kColorWeights4x4[3] = {
+          0.60349588292079182, 1.5435289569786645, 0.33080849938060852,
+      };
+      for (int ix = 0; ix < 4; ++ix) {
+        float total_sum = 0;
+        int offx = (ix & 1) * 4;
+        int offy = (ix & 2) * 2;
+        for (size_t c = 0; c < src.kNumPlanes; c++) {
+          float sum = 0;
+          for (size_t iy = 0; iy < 3; iy++) {
+            const float *row0 =
+                src.ConstPlaneRow(c, by * kBlockDim + offy + iy);
+            const float *row1 =
+                src.ConstPlaneRow(c, by * kBlockDim + offy + iy + 1);
+            for (size_t dx = 0; dx < 3; dx++) {
+              int x = bx * kBlockDim + offx + dx;
+              sum += fabs(row0[x] - row0[x + 1]) + fabs(row0[x] - row1[x]);
+            }
+            {
+              int x = bx * kBlockDim + offx + 3;
+              sum += fabs(row0[x] - row1[x]);
+            }
+          }
+          int iy = 3;
+          const float *row0 = src.ConstPlaneRow(c, by * kBlockDim + offy + iy);
+          for (size_t dx = 0; dx < 3; dx++) {
+            int x = bx * kBlockDim + offx + dx;
+            sum += fabs(row0[x] - row0[x + 1]);
+          }
+          total_sum += kColorWeights4x4[c] * sum;
+        }
+        blockval[ix] = total_sum;
+      }
+      float norm2 = 0.0;
+      float norm4 = 0.0;
+      float norm8 = 0.0;
+      for (int ix = 0; ix < 4; ++ix) {
+        float v = blockval[ix];
+        v *= v;
+        norm2 += v;
+        v *= v;
+        norm4 += v;
+        v *= v;
+        norm8 += v;
+      }
+      norm2 = std::pow(norm2 * (1.0 / 4), 0.5);
+      norm4 = std::pow(norm4 * (1.0 / 4), 0.25);
+      norm8 = std::pow(norm8 * (1.0 / 4), 0.125);
+      norm2 += 0.03;
+
+      float kMul1 = 0.86101693093148191;
+      float loss_4x4 = kMul1 * norm8 / norm2;
+      float kMul2 = -0.18168363725368566;
+      loss_4x4 += kMul2 * norm4 / norm2;
+      static const float loss_4x4_limit0 = 1.0861540086721586;
+      if (loss_4x4 >= loss_4x4_limit0) {
+    	  std::cout<<"std_disable16: by="<<by<<" bx="<<bx<<std::endl;
+
+        // Probably not multi-threading safe.
+        disable_dct32[(by & ~3) * xsize_blocks + (bx & ~3)] = true;
+        disable_dct16[(by & ~1) * xsize_blocks + (bx & ~1)] = true;
+      }
+    }
+  };
+  const auto find_block_strategy = [&](int bx, int by) SIMD_ATTR {
+#if ENABLE_DIAGONAL_LINES_EXPERIMENT
+    return AcStrategy::Type::LINES;
+#endif // ENABLE_DIAGONAL_LINES_EXPERIMENT
+    // The quantized symbol distribution contracts with the increasing
+    // butteraugli_target.
+    const float discretization_factor =
+        100 * (6.9654004856811754) / butteraugli_target;
+    // A value below 1.0 to favor 8x8s when all things are equal.
+    // 16x16 has wider reach of oscillations and this part of the
+    // computation is not aware of visual masking. Inhomogeneous
+    // visual masking will propagate accuracy further with 16x16 than
+    // with 8x8 dcts.
+    const float kFavor8x8Dct = 0.978192691479985;
+    float kFavor8x8DctOver32x32 = 0.74742417168628905;
+    if (butteraugli_target >= 6.0) {
+      kFavor8x8DctOver32x32 = 0.737101360945845;
+    }
+    static const float kColorWeights[3] = {
+        0.65285453568125873, 2.4740163893371157, 2.0140216656143393,
+    };
+    // DCT4X4
+    {
+      float blockval[4];
+      // 4x4 DCT needs less focus on B channel, since at that resolution
+      // blue needs to be correct only by average.
+      static const float kColorWeights4x4[3] = {
+          0.76084140985773008, 0.9344031093258709, 0.31536647913297183,
+      };
+      // DCT4X4 collection
+      for (int ix = 0; ix < 4; ++ix) {
+        float total_sum = 0;
+        int offx = (ix & 1) * 4;
+        int offy = (ix & 2) * 2;
+        for (size_t c = 0; c < src.kNumPlanes; c++) {
+          float sum = 0;
+          for (size_t iy = 0; iy < 3; iy++) {
+            const float *row0 =
+                src.ConstPlaneRow(c, by * kBlockDim + offy + iy);
+            const float *row1 =
+                src.ConstPlaneRow(c, by * kBlockDim + offy + iy + 1);
+            for (size_t dx = 0; dx < 3; dx++) {
+              int x = bx * kBlockDim + offx + dx;
+              sum += fabs(row0[x] - row0[x + 1]) + fabs(row0[x] - row1[x]);
+            }
+            {
+              int x = bx * kBlockDim + offx + 3;
+              sum += fabs(row0[x] - row1[x]);
+            }
+          }
+          int iy = 3;
+          const float *row0 = src.ConstPlaneRow(c, by * kBlockDim + offy + iy);
+          for (size_t dx = 0; dx < 3; dx++) {
+            int x = bx * kBlockDim + offx + dx;
+            sum += fabs(row0[x] - row0[x + 1]);
+          }
+          total_sum += kColorWeights4x4[c] * sum;
+        }
+        blockval[ix] = total_sum;
+      }
+      float norm2 = 0.0;
+      float norm4 = 0.0;
+      float norm8 = 0.0;
+      for (int ix = 0; ix < 4; ++ix) {
+        float v = blockval[ix];
+        v *= v;
+        norm2 += v;
+        v *= v;
+        norm4 += v;
+        v *= v;
+        norm8 += v;
+      }
+      norm2 = std::pow(norm2 * (1.0 / 4), 0.5);
+      norm4 = std::pow(norm4 * (1.0 / 4), 0.25);
+      norm8 = std::pow(norm8 * (1.0 / 4), 0.125);
+      norm2 += 0.03;
+
+      float kMul1 = 0.84695221371792806;
+      float loss_4x4 = kMul1 * norm8 / norm2;
+      float kMulCross = 0.24239613587680031;
+      float kMul2 = -0.012220022434342694;
+      loss_4x4 += kMul2 * norm4 / norm2;
+      static const float loss_4x4_limit0 = 1.079485914917413;
+      if (loss_4x4 >= loss_4x4_limit0) {
+        return AcStrategy::Type::DCT4X4;
+      }
+    }
+
+    const float kPow = 0.99263297216052859;
+    const float kPow2 = 0.018823021573462634;
+    const float kExtremityWeight16x16 = 7.77;
+    const float kExtremityWeight32x32 = 7.77;
+    // DCT32
+    if (!disable_dct32[by * xsize_blocks + bx] && bx + 3 < xsize_blocks &&
+        by + 3 < ysize_blocks && (bx & 3) == 0 && (by & 3) == 0) {
+      static const float kDiff = 0.9539527585329598;
+      float dct8x8_entropy = 0;
+      for (size_t c = 0; c < coeffs.kNumPlanes; c++) {
+        float entropy = 0;
+        float min_ext = 1e30;
+        float max_ext = -1e30;
+        for (size_t iy = 0; iy < 4 && by + iy < ysize_blocks; iy++) {
+          const float *row = coeffs.ConstPlaneRow(c, by + iy);
+          for (size_t ix = 0; ix < 4 && bx + ix < xsize_blocks; ix++) {
+            float min8x8 = 1e30;
+            float max8x8 = -1e30;
+            for (int dy = 0; dy < 8; ++dy) {
+              const float *row =
+                  src.ConstPlaneRow(c, (by + iy) * kBlockDim + dy);
+              for (int dx = 0; dx < 8; ++dx) {
+                float v = row[(bx + ix) * kBlockDim + dx];
+                if (v < min8x8)
+                  min8x8 = v;
+                if (v > max8x8)
+                  max8x8 = v;
+              }
+            }
+            float ext = max8x8 - min8x8;
+            if (ext < min_ext)
+              min_ext = ext;
+            if (ext > max_ext)
+              max_ext = ext;
+          }
+          int bx_actual = bx;
+          for (size_t ix = 1; ix < kBlockDim * kBlockDim * 4; ix++) {
+            // Skip the dc values at 0 and 64.
+            if ((ix & 63) == 0) {
+              bx_actual++;
+              continue;
+            }
+            float mul = 1.0f / dequant.Matrix(0, kQuantKindDCT8, c)[ix & 63];
+            float val = mul * row[bx * kBlockDim * kBlockDim + ix];
+            val *= quant_field->ConstRow(by + iy)[bx_actual];
+            float v = fabsf(val) * discretization_factor;
+            entropy += 1 + kDiff - pow(kPow, v) - kDiff * pow(kPow2, v);
+          }
+        }
+        entropy -= kExtremityWeight32x32 * (max_ext - min_ext);
+        dct8x8_entropy += kColorWeights[c] * entropy;
+      }
+
+      float quant_inhomogeneity = 0;
+      float max_quant = -1e30;
+      for (int dy = 0; dy < 4; ++dy) {
+        for (int dx = 0; dx < 4; ++dx) {
+          float quant = quant_field->ConstRow(by + dy)[bx + dx];
+          max_quant = std::max(max_quant, quant);
+          quant_inhomogeneity -= quant;
+        }
+      }
+      quant_inhomogeneity += 16 * max_quant;
+      float kMulInho = (-47.780 * (-4.270639713545533)) / butteraugli_target;
+      dct8x8_entropy += kMulInho * quant_inhomogeneity;
+      float dct32x32_entropy = 0;
+      for (size_t c = 0; c < src.kNumPlanes; c++) {
+        float entropy = 0;
+        SIMD_ALIGN float dct32x32[16 * kBlockDim * kBlockDim] = {};
+        AcStrategy acs(AcStrategy::Type::DCT32X32, 0);
+        acs.TransformFromPixels(
+            src.PlaneRow(c, kBlockDim * by) + kBlockDim * bx,
+            src.PixelsPerRow(), dct32x32, 4 * kBlockDim * kBlockDim);
+        for (size_t k = 0; k < 16 * kBlockDim * kBlockDim; k++) {
+          if (k < 4 || (k < 36 && k > 31) || (k < 68 && k > 63) ||
+              (k < 100 && k > 95)) {
+            // Leave out the lowest frequencies.
+            continue;
+          }
+          float mul = 1.0f / dequant.Matrix(0, kQuantKindDCT32, c)[k];
+          float val = mul * dct32x32[k];
+          val *= max_quant;
+          float v = fabsf(val) * discretization_factor;
+          entropy += 1 + kDiff - pow(kPow, v) - kDiff * pow(kPow2, v);
+        }
+        dct32x32_entropy += kColorWeights[c] * entropy;
+      }
+      if (dct32x32_entropy < kFavor8x8DctOver32x32 * dct8x8_entropy) {
+        return AcStrategy::Type::DCT32X32;
+      }
+    }
+
+    // DCT16
+    if (!disable_dct16[by * xsize_blocks + bx] && bx + 1 < xsize_blocks &&
+        by + 1 < ysize_blocks && (bx & 1) == 0 && (by & 1) == 0) {
+      static const float kDiff = 0.2494383590606063;
+      float dct8x8_entropy = 0;
+      for (size_t c = 0; c < coeffs.kNumPlanes; c++) {
+        float entropy = 0;
+        float min_ext = 1e30;
+        float max_ext = -1e30;
+        for (size_t iy = 0; iy < 2 && by + iy < ysize_blocks; iy++) {
+          for (size_t ix = 0; ix < 2 && bx + ix < xsize_blocks; ix++) {
+            float min8x8 = 1e30;
+            float max8x8 = -1e30;
+            for (int dy = 0; dy < 8; ++dy) {
+              const float *row =
+                  src.ConstPlaneRow(c, (by + iy) * kBlockDim + dy);
+              for (int dx = 0; dx < 8; ++dx) {
+                float v = row[(bx + ix) * kBlockDim + dx];
+                if (v < min8x8)
+                  min8x8 = v;
+                if (v > max8x8)
+                  max8x8 = v;
+              }
+            }
+            float ext = max8x8 - min8x8;
+            if (ext < min_ext)
+              min_ext = ext;
+            if (ext > max_ext)
+              max_ext = ext;
+          }
+          const float *row = coeffs.ConstPlaneRow(c, by + iy);
+          int bx_actual = bx;
+          for (size_t ix = 1; ix < kBlockDim * kBlockDim * 2; ix++) {
+            // Skip the dc values at 0 and 64.
+            if (ix == 64) {
+              bx_actual++;
+              continue;
+            }
+            float mul = 1.0f / dequant.Matrix(0, kQuantKindDCT8, c)[ix & 63];
+            float val = mul * row[bx * kBlockDim * kBlockDim + ix];
+            val *= quant_field->ConstRow(by + iy)[bx_actual];
+            float v = fabsf(val) * discretization_factor;
+            entropy += 1 + kDiff - pow(kPow, v) - kDiff * pow(kPow2, v);
+          }
+        }
+        entropy -= kExtremityWeight16x16 * (max_ext - min_ext);
+        dct8x8_entropy += kColorWeights[c] * entropy;
+      }
+      float max_quant = std::max<float>(
+          std::max<float>(quant_field->ConstRow(by)[bx],
+                           quant_field->ConstRow(by)[bx + 1]),
+          std::max<float>(quant_field->ConstRow(by + 1)[bx],
+                           quant_field->ConstRow(by + 1)[bx + 1]));
+      float quant_inhomogeneity =
+          4 * max_quant -
+          (quant_field->ConstRow(by)[bx] + quant_field->ConstRow(by)[bx + 1] +
+           quant_field->ConstRow(by + 1)[bx] +
+           quant_field->ConstRow(by + 1)[bx + 1]);
+      float kMulInho = (-47.780 * (3.9429727851421288)) / butteraugli_target;
+      dct8x8_entropy += kMulInho * quant_inhomogeneity;
+      float dct16x16_entropy = 0;
+      for (size_t c = 0; c < src.kNumPlanes; c++) {
+        float entropy = 0;
+        SIMD_ALIGN float dct16x16[4 * kBlockDim * kBlockDim] = {};
+        AcStrategy acs = AcStrategy(AcStrategy::Type::DCT16X16, 0);
+        acs.TransformFromPixels(
+            src.PlaneRow(c, kBlockDim * by) + kBlockDim * bx,
+            src.PixelsPerRow(), dct16x16, 2 * kBlockDim * kBlockDim);
+        for (size_t k = 0; k < 4 * kBlockDim * kBlockDim; k++) {
+          if (k < 2 || (k < 18 && k > 15)) {
+            // Leave out the lowest frequencies.
+            continue;
+          }
+          float mul = 1.0f / dequant.Matrix(0, kQuantKindDCT16, c)[k];
+          float val = mul * dct16x16[k];
+          val *= max_quant;
+          float v = fabsf(val) * discretization_factor;
+          entropy += 1 + kDiff - pow(kPow, v) - kDiff * pow(kPow2, v);
+          //std::cout << "std v=" <<v<<" entropy="<< entropy << std::endl;
+        }
+        dct16x16_entropy += kColorWeights[c] * entropy;
+        //std::cout << "std entropy:" << entropy << std::endl;
+      }
+
+      //std::cout<<"std by="<<by<<" bx="<<bx<<" dct16x16_entropy="<<dct16x16_entropy<<" dct8x8_entropy="<<dct8x8_entropy<<std::endl;
+      if (dct16x16_entropy < kFavor8x8Dct * dct8x8_entropy) {
+        return AcStrategy::Type::DCT16X16;
+      }
+    }
+    return AcStrategy::Type::DCT;
+  };
+  ImageB raw_ac_strategy(xsize_blocks, ysize_blocks);
+  RunOnPool(pool, 0, ysize_blocks, [&](int y, int _) {
+    for (size_t x = 0; x < xsize_blocks; x++) {
+      disable_large_transforms(x, y);
+    }
+  });
+  RunOnPool(pool, 0, ysize_blocks, [&](int y, int _) {
+    uint8_t *PIK_RESTRICT row = raw_ac_strategy.Row(y);
+    for (size_t x = 0; x < xsize_blocks; x++) {
+      row[x] = static_cast<uint8_t>(find_block_strategy(x, y));
+    }
+  });
+
+  ac_strategy->SetFromRaw(Rect(raw_ac_strategy), raw_ac_strategy);
+  if (aux_out != nullptr) {
+    aux_out->num_dct2_blocks =
+        ac_strategy->CountBlocks(AcStrategy::Type::DCT2X2);
+    aux_out->num_dct4_blocks =
+        ac_strategy->CountBlocks(AcStrategy::Type::DCT4X4);
+    aux_out->num_dct16_blocks =
+        ac_strategy->CountBlocks(AcStrategy::Type::DCT16X16);
+    aux_out->num_dct32_blocks =
+        ac_strategy->CountBlocks(AcStrategy::Type::DCT32X32);
+  }
+  if (ac_strategy->CountBlocks(AcStrategy::Type::DCT) ==
+      xsize_blocks * ysize_blocks) {
+    *ac_strategy = AcStrategyImage(xsize_blocks, ysize_blocks);
+  }
+  if (WantDebugOutput(aux_out)) {
+    aux_out->DumpImage("ac_strategy_type", ac_strategy->ConstRaw());
+  }
+}
+
+} // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/ac_strategy.h b/codec/L2/demos/pikEnc/host/pik/ac_strategy.h
new file mode 100755
index 0000000000..66dc552818
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ac_strategy.h
@@ -0,0 +1,374 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_AC_STRATEGY_H_
+#define PIK_AC_STRATEGY_H_
+
+#include <stdint.h>
+#include "pik/common.h"
+#include "pik/data_parallel.h"
+#include "pik/dct.h"
+#include "pik/dct_util.h"
+#include "pik/image.h"
+#include "pik/pik_info.h"
+#include "pik/quant_weights.h"
+
+// Defines the different kinds of transforms, and heuristics to choose between
+// them.
+// `AcStrategy` represents what transform should be used, and which sub-block of
+// that transform we are currently in. Note that DCT4x4 is applied on all four
+// 4x4 sub-blocks of an 8x8 block.
+// `AcStrategyImage` defines which strategy should be used for each 8x8 block
+// of the image. The highest 4 bits represent the strategy to be used, the
+// lowest 4 represent the index of the block inside that strategy. Blocks should
+// be aligned, i.e. 32x32 blocks should only start in positions that are
+// multiples of 32.
+// `FindBestAcStrategy` uses heuristics to choose which AC strategy should be
+// used in each block.
+
+namespace pik {
+namespace detail {
+template <size_t SZ, size_t BX, size_t BY>
+static constexpr float ARLowestFrequencyScale() {
+  return SZ * IDCTScales<SZ>()[BX] * IDCTScales<SZ>()[BY] * L1Norm<SZ>()[BX] *
+         L1Norm<SZ>()[BY];
+}
+}  // namespace detail
+
+class AcStrategy {
+ public:
+  // Extremal values for the number of blocks/coefficients of a single strategy.
+  static constexpr size_t kMaxCoeffBlocks = 4;
+  static constexpr size_t kMaxBlockDim = kBlockDim * kMaxCoeffBlocks;
+  static constexpr size_t kMaxCoeffArea = kMaxBlockDim * kMaxBlockDim;
+  static constexpr size_t kLLFMaskDim =
+      DivCeil(kMaxBlockDim, SIMD_FULL(float)::N) * SIMD_FULL(float)::N;
+
+  // Raw strategy types.
+  enum class Type : uint32_t {
+    // Regular block size DCT (value matches kQuantKind)
+    DCT = 0,
+    // Encode pixels without transforming (value matches kQuantKind)
+    IDENTITY = 1,
+    // Use 2-by-2 DCT (value matches kQuantKind)
+    DCT2X2 = 2,
+    // Use 4-by-4 DCT (value matches kQuantKind)
+    DCT4X4 = 3,
+    // Use 16-by-16 DCT
+    DCT16X16 = 4,
+    // Use 32-by-32 DCT
+    DCT32X32 = 5,
+    // Angled lines (currently hardcoded for 45 degrees)
+    LINES = 6,
+    // Use 8-by-8 DCT, no HF prediction
+    DCT_NOHF = 7,
+    // Use 4-by-4 DCT, no HF prediction
+    DCT4X4_NOHF = 8,
+  };
+
+  PIK_INLINE AcStrategy(Type strategy, uint32_t block)
+      : strategy_(strategy), block_(block) {
+#ifdef ADDRESS_SANITIZER
+    PIK_ASSERT(strategy == Type::DCT16X16 || strategy == Type::DCT32X32 ||
+               block == 0);
+    PIK_ASSERT(strategy == Type::DCT32X32 || block < 4);
+    PIK_ASSERT(block < 16);
+#endif
+  }
+
+  // Returns true if this block is the first 8x8 block (i.e. top-left) of a
+  // possibly multi-block strategy.
+  PIK_INLINE bool IsFirstBlock() const { return block_ == 0; }
+
+  // Returns the raw strategy value. Should only be used for tokenization.
+  PIK_INLINE uint8_t RawStrategy() const {
+    return static_cast<uint8_t>(strategy_);
+  }
+
+  PIK_INLINE Type Strategy() const { return strategy_; }
+  PIK_INLINE size_t Block() const { return block_; }
+
+  // Inverse check
+  static PIK_INLINE bool IsRawStrategyValid(uint8_t raw_strategy) {
+    return raw_strategy <= 8;
+  }
+  static PIK_INLINE AcStrategy FromRawStrategy(uint8_t raw_strategy) {
+    return AcStrategy((Type)raw_strategy, 0);
+  }
+
+  // Get the quant kind for this type of strategy.
+  PIK_INLINE size_t GetQuantKind(size_t block = 0) const {
+    static_assert(kMaxCoeffArea == kMaxQuantTableSize,
+                  "Maximum coefficient area should be the same as maximum "
+                  "quant table size!");
+    if (strategy_ == Type::DCT_NOHF) return kQuantKindDCT8;
+    if (strategy_ == Type::DCT4X4_NOHF) return kQuantKindDCT4;
+
+    static_assert(kQuantKindDCT8 == size_t(Type::DCT), "QuantKind != type");
+    static_assert(kQuantKindID == size_t(Type::IDENTITY), "QuantKind != type");
+    static_assert(kQuantKindDCT4 == size_t(Type::DCT4X4), "QuantKind != type");
+    static_assert(kQuantKindDCT2 == size_t(Type::DCT2X2), "QuantKind != type");
+    static_assert(kQuantKindDCT16 == size_t(Type::DCT16X16),
+                  "QuantKind != type");
+    static_assert(kQuantKindDCT32 == size_t(Type::DCT32X32),
+                  "QuantKind != type");
+    static_assert(kQuantKindLines == size_t(Type::LINES), "QuantKind != type");
+    return static_cast<size_t>(strategy_);
+  }
+
+  PIK_INLINE float ARQuantScale() const {
+    if (strategy_ == Type::DCT32X32) return 1.2282996852328099;
+    if (strategy_ == Type::DCT16X16) return 1.1423171621463439;
+    // TODO(veluca): find better values.
+    if (strategy_ == Type::DCT4X4 || strategy_ == Type::DCT4X4_NOHF)
+      return 1.2f;
+    if (strategy_ == Type::DCT2X2) return 1.0098134203870499;
+    return 1.0f;
+  }
+
+  PIK_INLINE bool PredictHF() const {
+    return strategy_ != Type::DCT2X2 && strategy_ != Type::IDENTITY &&
+           strategy_ != Type::DCT_NOHF && strategy_ != Type::DCT4X4_NOHF &&
+           strategy_ != Type::LINES;
+  }
+
+  // Number of 8x8 blocks that this strategy will cover. 0 for non-top-left
+  // blocks inside a multi-block transform.
+  PIK_INLINE size_t covered_blocks_x() const {
+    if (strategy_ == Type::DCT32X32) return block_ == 0 ? 4 : 0;
+    if (strategy_ == Type::DCT16X16) return block_ == 0 ? 2 : 0;
+    return 1;
+  }
+  PIK_INLINE size_t covered_blocks_y() const {
+    if (strategy_ == Type::DCT32X32) return block_ == 0 ? 4 : 0;
+    if (strategy_ == Type::DCT16X16) return block_ == 0 ? 2 : 0;
+    return 1;
+  }
+
+  // 1 / covered_block_x() / covered_block_y(), for fast division.
+  // Should only be called with block_ == 0.
+  PIK_INLINE float inverse_covered_blocks() const {
+#ifdef ADDRESS_SANITIZER
+    PIK_ASSERT(block_ == 0);
+#endif
+    if (strategy_ == Type::DCT32X32) return 1.0f / 16;
+    if (strategy_ == Type::DCT16X16) return 0.25f;
+    return 1.0f;
+  }
+
+  PIK_INLINE float InverseNumACCoefficients() const {
+#ifdef ADDRESS_SANITIZER
+    PIK_ASSERT(block_ == 0);
+#endif
+    if (strategy_ == Type::DCT32X32) return 1.0f / (32 * 32 - 16);
+    if (strategy_ == Type::DCT16X16) return 1.0f / (16 * 16 - 4);
+    return 1.0f / (8 * 8 - 1);
+  }
+
+  const float* ARLowestFrequencyScales(size_t y) {
+    using detail::ARLowestFrequencyScale;
+    switch (strategy_) {
+      case Type::DCT2X2:
+      case Type::IDENTITY:
+      case Type::DCT:
+      case Type::DCT4X4:
+      case Type::DCT_NOHF:
+      case Type::DCT4X4_NOHF:
+      case Type::LINES: {
+        SIMD_ALIGN static const constexpr float scales[kLLFMaskDim] = {1.0f};
+        return scales;
+      }
+      case Type::DCT16X16: {
+        SIMD_ALIGN static const constexpr float scales[2][kLLFMaskDim] = {
+            {ARLowestFrequencyScale<2 * kBlockDim, 0, 0>(),
+             ARLowestFrequencyScale<2 * kBlockDim, 1, 0>()},
+            {ARLowestFrequencyScale<2 * kBlockDim, 0, 1>(),
+             ARLowestFrequencyScale<2 * kBlockDim, 1, 1>()}};
+        return scales[y];
+      }
+      case Type::DCT32X32: {
+        SIMD_ALIGN static const constexpr float scales[4][kLLFMaskDim] = {
+            {
+                ARLowestFrequencyScale<4 * kBlockDim, 0, 0>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 1, 0>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 2, 0>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 3, 0>(),
+            },
+            {
+                ARLowestFrequencyScale<4 * kBlockDim, 0, 1>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 1, 1>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 2, 1>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 3, 1>(),
+            },
+            {
+                ARLowestFrequencyScale<4 * kBlockDim, 0, 2>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 1, 2>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 2, 2>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 3, 2>(),
+            },
+            {
+                ARLowestFrequencyScale<4 * kBlockDim, 0, 3>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 1, 3>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 2, 3>(),
+                ARLowestFrequencyScale<4 * kBlockDim, 3, 3>(),
+            }};
+        return scales[y];
+      }
+    }
+    SIMD_ALIGN static const constexpr float scales[kLLFMaskDim] = {1.0f};
+    return scales;
+  }
+
+  // Pixel to coefficients and vice-versa
+  SIMD_ATTR void TransformFromPixels(const float* PIK_RESTRICT pixels,
+                                     size_t pixels_stride,
+                                     float* PIK_RESTRICT coefficients,
+                                     size_t coefficients_stride) const;
+  SIMD_ATTR void TransformToPixels(const float* PIK_RESTRICT coefficients,
+                                   size_t coefficients_stride,
+                                   float* PIK_RESTRICT pixels,
+                                   size_t pixels_stride) const;
+
+  // Coefficient scattering and gathering.
+  template <typename T>
+  SIMD_ATTR void ScatterCoefficients(const T* PIK_RESTRICT coefficients,
+                                     size_t coefficients_stride,
+                                     T* PIK_RESTRICT blocks,
+                                     size_t blocks_stride) const {
+    if (block_ != 0) return;
+    if (covered_blocks_x() == 4 && covered_blocks_y() == 4) {
+      ScatterBlock<4 * kBlockDim, 4 * kBlockDim>(
+          coefficients, coefficients_stride, blocks, blocks_stride);
+      return;
+    }
+    if (covered_blocks_x() == 2 && covered_blocks_y() == 2) {
+      ScatterBlock<2 * kBlockDim, 2 * kBlockDim>(
+          coefficients, coefficients_stride, blocks, blocks_stride);
+      return;
+    }
+    PIK_ASSERT(covered_blocks_x() == 1 && covered_blocks_y() == 1);
+    memcpy(blocks, coefficients, kBlockDim * kBlockDim * sizeof(T));
+  }
+
+  template <typename T>
+  SIMD_ATTR void GatherCoefficients(const T* PIK_RESTRICT blocks,
+                                    size_t blocks_stride,
+                                    T* PIK_RESTRICT coefficients,
+                                    size_t coefficients_stride) const {
+    if (block_ != 0) return;
+    if (covered_blocks_x() == 4 && covered_blocks_y() == 4) {
+      GatherBlock<4 * kBlockDim, 4 * kBlockDim>(
+          blocks, blocks_stride, coefficients, coefficients_stride);
+      return;
+    }
+    if (covered_blocks_x() == 2 && covered_blocks_y() == 2) {
+      GatherBlock<2 * kBlockDim, 2 * kBlockDim>(
+          blocks, blocks_stride, coefficients, coefficients_stride);
+      return;
+    }
+    PIK_ASSERT(covered_blocks_x() == 1 && covered_blocks_y() == 1);
+    memcpy(coefficients, blocks, kBlockDim * kBlockDim * sizeof(T));
+  }
+
+  // Same as above, but for DC image.
+  SIMD_ATTR void LowestFrequenciesFromDC(const float* PIK_RESTRICT dc,
+                                         size_t dc_stride, float* llf,
+                                         size_t llf_stride) const;
+  SIMD_ATTR void DCFromLowestFrequencies(const float* PIK_RESTRICT block,
+                                         size_t block_stride, float* dc,
+                                         size_t dc_stride) const;
+
+  // Produces a 2x2-upsampled DC block out of the lowest frequencies
+  // (block_size/8) of the image.
+  SIMD_ATTR void DC2x2FromLowestFrequencies(const float* PIK_RESTRICT llf,
+                                            size_t llf_stride,
+                                            float* PIK_RESTRICT dc2x2,
+                                            size_t dc2x2_stride) const;
+
+  // Produces the low frequencies (block_size/4) of the images out of a 2x2
+  // upsampled DC image, and vice-versa.
+  SIMD_ATTR void DC2x2FromLowFrequencies(const float* block,
+                                         size_t block_stride, float* dc2x2,
+                                         size_t dc2x2_stride) const;
+  SIMD_ATTR void LowFrequenciesFromDC2x2(const float* dc2x2,
+                                         size_t dc2x2_stride, float* block,
+                                         size_t block_stride) const;
+
+ private:
+  Type strategy_;
+  uint32_t block_;
+};  // namespace pik
+
+// Class to use a certain row of the AC strategy.
+class AcStrategyRow {
+ public:
+  AcStrategyRow(const uint8_t* row, size_t y) : row_(row) {}
+  AcStrategy operator[](size_t x) const {
+    return AcStrategy((AcStrategy::Type)(row_[x] >> 4), row_[x] & 0xF);
+  }
+
+ private:
+  const uint8_t* PIK_RESTRICT row_;
+};
+
+class AcStrategyImage {
+ public:
+  // A value that does not represent a valid combined AC strategy value.
+  // Used as a sentinel in DecodeAcStrategy.
+  static constexpr uint8_t INVALID = 0xF;
+
+  AcStrategyImage() {}
+  AcStrategyImage(size_t xsize, size_t ysize) : layers_(xsize, ysize) {
+    FillImage((uint8_t)AcStrategy::Type::DCT, &layers_);
+  }
+  AcStrategyImage(AcStrategyImage&&) = default;
+  AcStrategyImage& operator=(AcStrategyImage&&) = default;
+
+  // `rect` is the area to fill with the entire contents of `raw_layers`.
+  void SetFromRaw(const Rect& rect, const ImageB& raw_layers);
+
+  void SetFromArray(const Rect& rect, uint32_t data[]);
+
+  AcStrategyRow ConstRow(size_t y, size_t x_prefix = 0) const {
+    return AcStrategyRow(layers_.ConstRow(y) + x_prefix, y);
+  }
+
+  AcStrategyRow ConstRow(const Rect& rect, size_t y) const {
+    return ConstRow(rect.y0() + y, rect.x0());
+  }
+
+  const ImageB& ConstRaw() const { return layers_; }
+
+  size_t xsize() const { return layers_.xsize(); }
+  size_t ysize() const { return layers_.ysize(); }
+
+  AcStrategyImage Copy(const Rect& rect) const {
+    AcStrategyImage copy;
+    copy.layers_ = CopyImage(rect, layers_);
+    return copy;
+  }
+
+  AcStrategyImage Copy() const { return Copy(Rect(layers_)); }
+
+  // Count the number of blocks of a given type.
+  size_t CountBlocks(AcStrategy::Type type) const;
+
+ private:
+  ImageB layers_;
+};
+
+// `quant_field` is an initial quantization field for this image. `src` is the
+// input image in the XYB color space. `ac_strategy` is the output strategy.
+SIMD_ATTR void FindBestAcStrategy(float butteraugli_target,
+                                  const ImageF* quant_field,
+                                  const DequantMatrices& dequant,
+                                  const Image3F& src, ThreadPool* pool,
+                                  AcStrategyImage* ac_strategy,
+                                  PikInfo* aux_out);
+
+}  // namespace pik
+
+#endif  // PIK_AC_STRATEGY_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/adaptive_quantization.cc b/codec/L2/demos/pikEnc/host/pik/adaptive_quantization.cc
new file mode 100755
index 0000000000..f366d95447
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/adaptive_quantization.cc
@@ -0,0 +1,1163 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/adaptive_quantization.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <vector>
+#include <sys/time.h>
+#include <iostream>
+#include "pik/quant_weights.h"
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/ac_strategy.h"
+#include "pik/approx_cube_root.h"
+#include "pik/butteraugli_comparator.h"
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/compressed_dc.h"
+#include "pik/compressed_image.h"
+#include "pik/dct.h"
+#include "pik/entropy_coder.h"
+#include "pik/gauss_blur.h"
+#include "pik/gradient_map.h"
+#include "pik/image.h"
+#include "pik/opsin_inverse.h"
+#include "pik/profiler.h"
+#include "pik/status.h"
+
+bool FLAGS_log_search_state = false;
+// If true, prints the quantization maps at each iteration.
+bool FLAGS_dump_quant_state = false;
+
+namespace pik {
+namespace {
+
+static const double kQuant64[64] = {
+    0.0,
+    1.3209836994457049,
+    1.3209836994457049,
+    3.8093709838312875,
+    3.8093709838312875,
+    3.24478652961692,
+    3.2932050795619587,
+    3.2773870962477889,
+    3.2833062377265221,
+    0.86300641731251715,
+    0.81525767385332304,
+    2.509455259261768,
+    2.5512715939619355,
+    1.1009460471286041,
+    1.0472242371200757,
+    1.2132776894507991,
+    1.1836672984883603,
+    2.0934299406343144,
+    2.1313160506404598,
+    0.89190317505002714,
+    0.840986927484077,
+    0.70197816830313797,
+    0.76186836723000584,
+    1.3680171732844342,
+    1.3437082798869822,
+    1.2470683713759048,
+    1.1765069429040906,
+    0.35169288065810422,
+    0.41178711526516226,
+    0.53498575397594228,
+    0.60137373453997023,
+    0.70810156203993546,
+    0.68693731722531248,
+    0.27052366537985795,
+    0.22640452462879396,
+    0.198291255325797,
+    0.22234508352308596,
+    0.30485790798913998,
+    0.28530044891325101,
+    0.29524052213046498,
+    0.33185918780276397,
+    0.353582036654603,
+    0.305955548639682,
+    0.365259530060446,
+    0.333159510814334,
+    0.363133568767434,
+    0.334161790012618,
+    0.389194124900511,
+    0.349326306148990,
+    0.390310895605386,
+    0.408666924454222,
+    0.335930464190049,
+    0.359313000261458,
+    0.381109877480420,
+    0.392933763109596,
+    0.359529015172913,
+    0.347676628893596,
+    0.370974565818013,
+    0.350361463992334,
+    0.338064798002449,
+    0.336743523710490,
+    0.296631529585931,
+    0.304517245589665,
+    0.302956514467806,
+};
+
+// Increase precision in 8x8 blocks that are complicated in DCT space.
+SIMD_ATTR void DctModulation(const ImageF& xyb, ImageF* out) {
+  PIK_ASSERT((xyb.xsize() + 7) / 8 == out->xsize());
+  PIK_ASSERT((xyb.ysize() + 7) / 8 == out->ysize());
+  const int32_t* natural_coeff_order = NaturalCoeffOrder();
+  float dct_rescale[64] = {0};
+  {
+    const float* dct_scale = DCTScales<8>();
+    for (int i = 0; i < 64; ++i) {
+      dct_rescale[i] = dct_scale[i / 8] * dct_scale[i % 8];
+    }
+  }
+  for (int y = 0; y < xyb.ysize(); y += 8) {
+    float* const PIK_RESTRICT row_out = out->Row(y / 8);
+    for (int x = 0; x < xyb.xsize(); x += 8) {
+      SIMD_ALIGN float dct[64] = {0};
+      for (int dy = 0; dy < 8; ++dy) {
+        int yclamp = std::min<int>(y + dy, xyb.ysize() - 1);
+        const float* const PIK_RESTRICT row_in = xyb.Row(yclamp);
+        for (int dx = 0; dx < 8; ++dx) {
+          int xclamp = std::min<int>(x + dx, xyb.xsize() - 1);
+          dct[dy * 8 + dx] = row_in[xclamp];
+        }
+      }
+      ComputeTransposedScaledDCT<8>()(FromBlock<8>(dct), ToBlock<8>(dct));
+      double entropyQL2 = 0;
+      double entropyQL4 = 0;
+      double entropyQL8 = 0;
+      for (int k = 1; k < 64; ++k) {
+        int i = natural_coeff_order[k];
+        const float scale = dct_rescale[i];
+        double v = dct[i] * scale;
+        v *= v;
+        static const double kPow = 1.923527252414339;
+        double q = pow(kQuant64[k], kPow);
+        entropyQL2 += q * v;
+        v *= v;
+        entropyQL4 += q * v;
+        v *= v;
+        entropyQL8 += q * v;
+      }
+      entropyQL2 = std::sqrt(entropyQL2);
+      entropyQL4 = std::sqrt(std::sqrt(entropyQL4));
+      entropyQL8 = std::pow(entropyQL8, 0.125);
+      static const double mulQL2 = -0.00072185944355851461;
+      static const double mulQL4 = -1.1783135317666862;
+      static const double mulQL8 = 0.29099162398822259;
+      double v =
+          mulQL2 * entropyQL2 + mulQL4 * entropyQL4 + mulQL8 * entropyQL8;
+      double kMul = 1.1555549005271522;
+      row_out[x / 8] += kMul * v;
+    }
+  }
+}
+
+// Increase precision in 8x8 blocks that have high dynamic range.
+void RangeModulation(const ImageF& xyb, ImageF* out) {
+  PIK_ASSERT((xyb.xsize() + 7) / 8 == out->xsize());
+  PIK_ASSERT((xyb.ysize() + 7) / 8 == out->ysize());
+  for (int y = 0; y < xyb.ysize(); y += 8) {
+    float* const PIK_RESTRICT row_out = out->Row(y / 8);
+    for (int x = 0; x < xyb.xsize(); x += 8) {
+      float minval = 1e30;
+      float maxval = -1e30;
+      for (int dy = 0; dy < 8 && y + dy < xyb.ysize(); ++dy) {
+        const float* const PIK_RESTRICT row_in = xyb.Row(y + dy);
+        for (int dx = 0; dx < 8 && x + dx < xyb.xsize(); ++dx) {
+          float v = row_in[x + dx];
+          if (minval > v) {
+            minval = v;
+          }
+          if (maxval < v) {
+            maxval = v;
+          }
+        }
+      }
+      float range = maxval - minval;
+      static const double mul = 0.67975181715504351;
+      row_out[x / 8] += mul * range;
+    }
+  }
+}
+
+// Change precision in 8x8 blocks that have high frequency content.
+void HfModulation(const ImageF& xyb, ImageF* out) {
+  PIK_ASSERT((xyb.xsize() + 7) / 8 == out->xsize());
+  PIK_ASSERT((xyb.ysize() + 7) / 8 == out->ysize());
+  for (int y = 0; y < xyb.ysize(); y += 8) {
+    float* const PIK_RESTRICT row_out = out->Row(y / 8);
+    for (int x = 0; x < xyb.xsize(); x += 8) {
+      float sum = 0;
+      int n = 0;
+      for (int dy = 0; dy < 8 && y + dy < xyb.ysize(); ++dy) {
+        const float* const PIK_RESTRICT row_in = xyb.Row(y + dy);
+        for (int dx = 0; dx < 7 && x + dx + 1 < xyb.xsize(); ++dx) {
+          float v = fabs(row_in[x + dx] - row_in[x + dx + 1]);
+          sum += v;
+          ++n;
+        }
+      }
+      for (int dy = 0; dy < 7 && y + dy + 1 < xyb.ysize(); ++dy) {
+        const float* const PIK_RESTRICT row_in = xyb.Row(y + dy);
+        const float* const PIK_RESTRICT row_in_next = xyb.Row(y + dy + 1);
+        for (int dx = 0; dx < 8 && x + dx < xyb.xsize(); ++dx) {
+          float v = fabs(row_in[x + dx] - row_in_next[x + dx]);
+          sum += v;
+          ++n;
+        }
+      }
+      if (n != 0) {
+        sum /= n;
+      }
+      static const double kMul = 0.70743567045382239;
+      sum *= kMul;
+      row_out[x / 8] += sum;
+    }
+  }
+}
+
+// We want multiplicative quantization field, so everything until this
+// point has been modulating the exponent.
+void Exp(ImageF* out) {
+  for (int y = 0; y < out->ysize(); ++y) {
+    float* const PIK_RESTRICT row_out = out->Row(y);
+    for (int x = 0; x < out->xsize(); ++x) {
+      row_out[x] = exp(row_out[x]);
+    }
+  }
+}
+
+static double SimpleGamma(double v) {
+  // A simple HDR compatible gamma function.
+  // mul and mul2 represent a scaling difference between pik and butteraugli.
+  static const double mul = 103.34350600371506;
+  static const double mul2 = 1.0 / (67.797075768826289);
+
+  v *= mul;
+
+  static const double kRetMul = mul2 * 18.6580932135;
+  static const double kRetAdd = mul2 * -20.2789020414;
+  static const double kVOffset = 7.14672470003;
+
+  if (v < 0) {
+    // This should happen rarely, but may lead to a NaN, which is rather
+    // undesirable. Since negative photons don't exist we solve the NaNs by
+    // clamping here.
+    v = 0;
+  }
+  return kRetMul * log(v + kVOffset) + kRetAdd;
+}
+
+static double RatioOfCubicRootToSimpleGamma(double v) {
+  // The opsin space in pik is the cubic root of photons, i.e., v * v * v
+  // is related to the number of photons.
+  //
+  // SimpleGamma(v * v * v) is the psychovisual space in butteraugli.
+  // This ratio allows quantization to move from pik's opsin space to
+  // butteraugli's log-gamma space.
+  return v / SimpleGamma(v * v * v);
+}
+
+ImageF DiffPrecompute(const Image3F& xyb, float cutoff) {
+  PROFILER_ZONE("aq DiffPrecompute");
+  PIK_ASSERT(xyb.xsize() > 1);
+  PIK_ASSERT(xyb.ysize() > 1);
+  ImageF result(xyb.xsize(), xyb.ysize());
+  static const double mul0 = 0.046650519741099357;
+
+  // PIK's gamma is 3.0 to be able to decode faster with two muls.
+  // Butteraugli's gamma is matching the gamma of human eye, around 2.6.
+  // We approximate the gamma difference by adding one cubic root into
+  // the adaptive quantization. This gives us a total gamma of 2.6666
+  // for quantization uses.
+  static const double match_gamma_offset = 0.55030107636310233;
+  static const float kOverWeightBorders = 1.4;
+  size_t x1, y1;
+  size_t x2, y2;
+  for (size_t y = 0; y + 1 < xyb.ysize(); ++y) {
+    if (y + 1 < xyb.ysize()) {
+      y2 = y + 1;
+    } else if (y > 0) {
+      y2 = y - 1;
+    } else {
+      y2 = y;
+    }
+    if (y == 0 && xyb.ysize() >= 2) {
+      y1 = y + 1;
+    } else if (y > 0) {
+      y1 = y - 1;
+    } else {
+      y1 = y;
+    }
+    const float* PIK_RESTRICT row_in = xyb.PlaneRow(1, y);
+    const float* PIK_RESTRICT row_in1 = xyb.PlaneRow(1, y1);
+    const float* PIK_RESTRICT row_in2 = xyb.PlaneRow(1, y2);
+    float* const PIK_RESTRICT row_out = result.Row(y);
+    for (size_t x = 0; x + 1 < xyb.xsize(); ++x) {
+      if (x + 1 < xyb.xsize()) {
+        x2 = x + 1;
+      } else if (x > 0) {
+        x2 = x - 1;
+      } else {
+        x2 = x;
+      }
+      if (x == 0 && xyb.xsize() >= 2) {
+        x1 = x + 1;
+      } else if (x > 0) {
+        x1 = x - 1;
+      } else {
+        x1 = x;
+      }
+      float diff =
+          mul0 *
+          (fabs(row_in[x] - row_in[x2]) + fabs(row_in[x] - row_in2[x]) +
+           fabs(row_in[x] - row_in[x1]) + fabs(row_in[x] - row_in1[x]) +
+           3 * (fabs(row_in2[x] - row_in1[x]) + fabs(row_in[x1] - row_in[x2])));
+      diff *= RatioOfCubicRootToSimpleGamma(row_in[x] + match_gamma_offset);
+      row_out[x] = std::min(cutoff, diff);
+    }
+    // Last pixel of the row.
+    {
+      const size_t x = xyb.xsize() - 1;
+      float diff =
+          kOverWeightBorders * 2.0 * mul0 * (fabs(row_in[x] - row_in2[x]));
+      diff *= RatioOfCubicRootToSimpleGamma(row_in[x] + match_gamma_offset);
+      row_out[x] = std::min(cutoff, diff);
+    }
+  }
+  // Last row.
+  {
+    const size_t y = xyb.ysize() - 1;
+    const float* const PIK_RESTRICT row_in = xyb.PlaneRow(1, y);
+    float* const PIK_RESTRICT row_out = result.Row(y);
+    for (size_t x = 0; x + 1 < xyb.xsize(); ++x) {
+      const size_t x2 = x + 1;
+      float diff =
+          kOverWeightBorders * 2.0 * mul0 * fabs(row_in[x] - row_in[x2]);
+      diff *= RatioOfCubicRootToSimpleGamma(row_in[x] + match_gamma_offset);
+      row_out[x] = std::min(cutoff, diff);
+    }
+    // Last pixel of the last row.
+    {
+      const size_t x = xyb.xsize() - 1;
+      row_out[x] = row_out[x - 1];
+    }
+  }
+  return result;
+}
+
+// Expand the average of last three pixels to form a larger image.
+ImageF Expand(const ImageF& img, size_t out_xsize, size_t out_ysize) {
+  PIK_ASSERT(img.xsize() > 0);
+  PIK_ASSERT(img.ysize() > 0);
+  ImageF out(out_xsize, out_ysize);
+  // Expand to columns on right.
+  for (size_t y = 0; y < img.ysize(); ++y) {
+    const float* const PIK_RESTRICT row_in = img.Row(y);
+    float* const PIK_RESTRICT row_out = out.Row(y);
+    memcpy(row_out, row_in, img.xsize() * sizeof(row_out[0]));
+    float lastval = row_in[img.xsize() - 1];
+    if (img.xsize() >= 3) {
+      lastval += row_in[img.xsize() - 3];
+      lastval += row_in[img.xsize() - 2];
+      lastval *= (1.0 / 3);
+    } else if (img.xsize() >= 2) {
+      lastval += row_in[img.xsize() - 2];
+      lastval *= 0.5;
+    }
+    for (size_t x = img.xsize(); x < out_xsize; ++x) {
+      row_out[x] = lastval;
+    }
+  }
+  // Expand to rows at bottom.
+  if (img.ysize() != out_ysize) {
+    for (size_t x = 0; x < out_xsize; ++x) {
+      const size_t ys = img.ysize();
+      float lastval = out.Row(ys - 1)[x];
+      if (ys >= 3) {
+        lastval += out.Row(ys - 2)[x];
+        lastval += out.Row(ys - 3)[x];
+        lastval *= (1.0 / 3);
+      } else if (ys >= 2) {
+        lastval += out.Row(ys - 2)[x];
+        lastval *= 0.5;
+      }
+      for (size_t y = img.ysize(); y < out_ysize; ++y) {
+        out.Row(y)[x] = lastval;
+      }
+    }
+  }
+  return out;
+}
+
+ImageF ComputeMask(const ImageF& diffs) {
+  static const float kBase = 1.329262607500535;
+  static const float kMul1 = 0.010994306366172898;
+  static const float kOffset1 = 0.00683227084849159;
+  static const float kMul2 = -0.1949226495025296;
+  static const float kOffset2 = 0.075052668223305155;
+  ImageF out(diffs.xsize(), diffs.ysize());
+  for (int y = 0; y < diffs.ysize(); ++y) {
+    const float* const PIK_RESTRICT row_in = diffs.Row(y);
+    float* const PIK_RESTRICT row_out = out.Row(y);
+    for (int x = 0; x < diffs.xsize(); ++x) {
+      const float val = row_in[x];
+      // Avoid division by zero.
+      double div = std::max<double>(val + kOffset1, 1e-3);
+      row_out[x] = kBase + kMul1 / div + kMul2 / (val * val + kOffset2);
+    }
+  }
+  return out;
+}
+
+ImageF TileDistMap(const ImageF& distmap, int tile_size, int margin,
+                   const AcStrategyImage& ac_strategy) {
+  PROFILER_FUNC;
+  const int tile_xsize = (distmap.xsize() + tile_size - 1) / tile_size;
+  const int tile_ysize = (distmap.ysize() + tile_size - 1) / tile_size;
+  ImageF tile_distmap(tile_xsize, tile_ysize);
+  size_t distmap_stride = tile_distmap.PixelsPerRow();
+  for (int tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+    AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(tile_y);
+    float* PIK_RESTRICT dist_row = tile_distmap.Row(tile_y);
+    for (int tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+      AcStrategy acs = ac_strategy_row[tile_x];
+      if (!acs.IsFirstBlock()) continue;
+      int this_tile_xsize = acs.covered_blocks_x() * tile_size;
+      int this_tile_ysize = acs.covered_blocks_y() * tile_size;
+      int y_begin = std::max<int>(0, tile_size * tile_y - margin);
+      int y_end = std::min<int>(distmap.ysize(),
+                                tile_size * tile_y + this_tile_ysize + margin);
+      int x_begin = std::max<int>(0, tile_size * tile_x - margin);
+      int x_end = std::min<int>(distmap.xsize(),
+                                tile_size * tile_x + this_tile_xsize + margin);
+      float dist_norm = 0.0;
+      double pixels = 0;
+      for (int y = y_begin; y < y_end; ++y) {
+        float ymul = 1.0;
+        static const float kBorderMul = 0.98f;
+        static const float kCornerMul = 0.7f;
+        if (margin != 0 && (y == y_begin || y == y_end - 1)) {
+          ymul = kBorderMul;
+        }
+        const float* const PIK_RESTRICT row = distmap.Row(y);
+        for (int x = x_begin; x < x_end; ++x) {
+          float xmul = ymul;
+          if (margin != 0 && (x == x_begin || x == x_end - 1)) {
+            if (xmul == 1.0) {
+              xmul = kBorderMul;
+            } else {
+              xmul = kCornerMul;
+            }
+          }
+          float v = row[x];
+          v *= v;
+          v *= v;
+          v *= v;
+          v *= v;
+          dist_norm += xmul * v;
+          pixels += xmul;
+        }
+      }
+      if (pixels == 0) pixels = 1;
+      // 16th norm is less than the max norm, we reduce the difference
+      // with this normalization factor.
+      static const double kTileNorm = 1.2;
+      const double tile_dist = kTileNorm * pow(dist_norm / pixels, 1.0 / 16);
+      dist_row[tile_x] = tile_dist;
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          dist_row[tile_x + distmap_stride * iy + ix] = tile_dist;
+        }
+      }
+    }
+  }
+  return tile_distmap;
+}
+
+ImageF DistToPeakMap(const ImageF& field, float peak_min, int local_radius,
+                     float peak_weight) {
+  ImageF result(field.xsize(), field.ysize());
+  FillImage(-1.0f, &result);
+  for (int y0 = 0; y0 < field.ysize(); ++y0) {
+    for (int x0 = 0; x0 < field.xsize(); ++x0) {
+      int x_min = std::max(0, x0 - local_radius);
+      int y_min = std::max(0, y0 - local_radius);
+      int x_max = std::min<int>(field.xsize(), x0 + 1 + local_radius);
+      int y_max = std::min<int>(field.ysize(), y0 + 1 + local_radius);
+      float local_max = peak_min;
+      for (int y = y_min; y < y_max; ++y) {
+        for (int x = x_min; x < x_max; ++x) {
+          local_max = std::max(local_max, field.Row(y)[x]);
+        }
+      }
+      if (field.Row(y0)[x0] >
+          (1.0f - peak_weight) * peak_min + peak_weight * local_max) {
+        for (int y = y_min; y < y_max; ++y) {
+          for (int x = x_min; x < x_max; ++x) {
+            float dist = std::max(std::abs(y - y0), std::abs(x - x0));
+            float cur_dist = result.Row(y)[x];
+            if (cur_dist < 0.0 || cur_dist > dist) {
+              result.Row(y)[x] = dist;
+            }
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+
+bool AdjustQuantVal(float* const PIK_RESTRICT q, const float d,
+                    const float factor, const float quant_max) {
+  if (*q >= 0.999f * quant_max) return false;
+  const float inv_q = 1.0f / *q;
+  const float adj_inv_q = inv_q - factor / (d + 1.0f);
+  *q = 1.0f / std::max(1.0f / quant_max, adj_inv_q);
+  return true;
+}
+
+void DumpHeatmap(const PikInfo* info, const std::string& label,
+                 const ImageF& image, float good_threshold,
+                 float bad_threshold) {
+  Image3B heatmap =
+      butteraugli::CreateHeatMapImage(image, good_threshold, bad_threshold);
+  char filename[200];
+  snprintf(filename, sizeof(filename), "%s%05d", label.c_str(),
+           info->num_butteraugli_iters);
+  info->DumpImage(filename, heatmap);
+}
+
+void DumpHeatmaps(const PikInfo* info, float ba_target,
+                  const ImageF& quant_field, const ImageF& tile_heatmap) {
+  if (!WantDebugOutput(info)) return;
+  ImageF inv_qmap(quant_field.xsize(), quant_field.ysize());
+  for (size_t y = 0; y < quant_field.ysize(); ++y) {
+    const float* PIK_RESTRICT row_q = quant_field.ConstRow(y);
+    float* PIK_RESTRICT row_inv_q = inv_qmap.Row(y);
+    for (size_t x = 0; x < quant_field.xsize(); ++x) {
+      row_inv_q[x] = 1.0f / row_q[x];  // never zero
+    }
+  }
+  DumpHeatmap(info, "quant_heatmap", inv_qmap, 4.0f * ba_target,
+              6.0f * ba_target);
+  DumpHeatmap(info, "tile_heatmap", tile_heatmap, ba_target, 1.5f * ba_target);
+}
+
+void AdjustQuantField(const AcStrategyImage& ac_strategy, ImageF* quant_field) {
+  // Replace the whole quant_field in non-8x8 blocks with the maximum of each
+  // 8x8 block.
+  size_t stride = quant_field->PixelsPerRow();
+  for (size_t y = 0; y < quant_field->ysize(); ++y) {
+    AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(y);
+    float* PIK_RESTRICT quant_row = quant_field->Row(y);
+    for (size_t x = 0; x < quant_field->xsize(); ++x) {
+      AcStrategy acs = ac_strategy_row[x];
+      PIK_ASSERT(x + acs.covered_blocks_x() <= quant_field->xsize());
+      PIK_ASSERT(y + acs.covered_blocks_y() <= quant_field->ysize());
+      float max = quant_row[x];
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          max = std::max(quant_row[x + ix + iy * stride], max);
+        }
+      }
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          quant_row[x + ix + iy * stride] = max;
+        }
+      }
+    }
+  }
+}
+
+Image3F RoundtripImage(
+    const CompressParams& cparams, const FrameHeader& frame_header,
+    const GroupHeader& header, const Image3F& opsin_orig, const Image3F& opsin,
+    const ColorCorrelationMap& full_cmap,
+    const BlockDictionary& block_dictionary, const AcStrategyImage& ac_strategy,
+    const ImageB& sigma_lut_ids, const Quantizer& quantizer,
+    const ImageB& dequant_control_field,
+    const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+    MultipassManager* multipass_manager) {
+
+  PROFILER_ZONE("enc roundtrip");
+  FrameDecCache frame_dec_cache;
+  frame_dec_cache.ac_strategy = ac_strategy.Copy();
+  PIK_ASSERT(opsin.ysize() % kBlockDim == 0);
+  frame_dec_cache.raw_quant_field = CopyImage(quantizer.RawQuantField());
+  frame_dec_cache.ar_sigma_lut_ids = CopyImage(sigma_lut_ids);
+
+  const size_t xsize_groups = DivCeil(opsin.xsize(), kGroupDim);
+  const size_t ysize_groups = DivCeil(opsin.ysize(), kGroupDim);
+  const size_t num_groups = xsize_groups * ysize_groups;
+
+  PikInfo aux_out;
+
+  FrameEncCache frame_enc_cache;
+  frame_enc_cache.dequant_control_field = CopyImage(dequant_control_field);
+  memcpy(frame_enc_cache.dequant_map, dequant_map,
+         sizeof(uint8_t) * 256 * kMaxQuantControlFieldValue);
+  frame_dec_cache.dequant_control_field = CopyImage(dequant_control_field);
+  memcpy(frame_dec_cache.dequant_map, dequant_map,
+         sizeof(uint8_t) * 256 * kMaxQuantControlFieldValue);
+  InitializeFrameEncCache(frame_header, opsin, ac_strategy, quantizer,
+                          full_cmap, block_dictionary, &frame_enc_cache,
+                          &aux_out);
+
+  frame_dec_cache.dc = CopyImage(frame_enc_cache.dc_dec);
+  frame_dec_cache.gradient = std::move(frame_enc_cache.gradient);
+
+  std::vector<MultipassHandler*> handlers(num_groups);
+  for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+    const size_t gx = group_index % xsize_groups;
+    const size_t gy = group_index / xsize_groups;
+    const Rect rect(gx * kGroupDim, gy * kGroupDim, kGroupDim, kGroupDim,
+                    opsin.xsize(), opsin.ysize());
+    handlers[group_index] =
+        multipass_manager->GetGroupHandler(group_index, rect);
+  }
+
+  Image3F idct(opsin.xsize(), opsin.ysize());
+
+  std::vector<GroupDecCache> group_dec_caches(1);
+
+  std::vector<PikInfo> aux_outs(1);
+
+  for(int group_index = 0; group_index < num_groups; ++group_index) {
+    GroupDecCache* PIK_RESTRICT group_dec_cache = &group_dec_caches[0];
+    PikInfo* my_aux_out = &aux_outs[0];
+    MultipassHandler* handler = handlers[group_index];
+    const Rect& group_rect = handler->PaddedGroupRect();
+    Rect block_group_rect = handler->BlockGroupRect();
+    EncCache cache;
+    InitializeEncCache(frame_header, header, frame_enc_cache, group_rect,
+                       &cache);
+    Quantizer quant = quantizer.Copy(block_group_rect);
+
+    Rect group_in_color_tiles(
+        block_group_rect.x0() / kColorTileDimInBlocks,
+        block_group_rect.y0() / kColorTileDimInBlocks,
+        DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
+        DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
+
+    ComputeCoefficients(quant, full_cmap, group_in_color_tiles,
+                        frame_enc_cache, &cache, my_aux_out);
+
+    InitializeDecCache(frame_dec_cache, group_rect, group_dec_cache);
+    DequantImageAC(quant, full_cmap, group_in_color_tiles, cache.ac,
+                   &frame_dec_cache, group_dec_cache, group_rect, my_aux_out);
+    ReconOpsinImage(frame_header, header, quant, block_group_rect,
+                    &frame_dec_cache, group_dec_cache, &idct, group_rect,
+                    my_aux_out);
+  }
+
+  aux_out.Assimilate(aux_outs[0]);
+
+  multipass_manager->RestoreOpsin(&idct);
+  // Fine to do a PIK_ASSERT instead of error handling, since this only happens
+  // on the encoder side where we can't be fed with invalid data.
+  PIK_CHECK(FinalizeFrameDecoding(&idct, opsin_orig.xsize(), opsin_orig.ysize(),
+                                  frame_header, NoiseParams(), quantizer,
+                                  block_dictionary, &frame_dec_cache));
+  return idct;
+}
+
+static const float kDcQuantPow = 0.57840232344431763;
+static const float kDcQuant = 0.74852919562896747;
+static const float kAcQuant = 0.97136686727219523;
+
+void FindBestQuantization(
+    const Image3F& opsin_orig, const Image3F& opsin_arg,
+    const CompressParams& cparams, const FrameHeader& frame_header,
+    const GroupHeader& header, float butteraugli_target,
+    const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+    const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+    ImageF& quant_field, Quantizer* quantizer,
+    const ImageB& dequant_control_field,
+    const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+    PikInfo* aux_out, MultipassManager* multipass_manager, double rescale) {
+  const float intensity_multiplier = cparams.GetIntensityMultiplier();
+  ButteraugliComparator comparator(opsin_orig, cparams.hf_asymmetry,
+                                   intensity_multiplier);
+  const float initial_quant_dc =
+      InitialQuantDC(butteraugli_target, intensity_multiplier);
+
+  AdjustQuantField(ac_strategy, &quant_field);
+
+  ImageF tile_distmap;
+  ImageF tile_distmap_localopt;
+  ImageF initial_quant_field = CopyImage(quant_field);
+  ImageF last_quant_field = CopyImage(initial_quant_field);
+  ImageF last_tile_distmap_localopt;
+
+  float initial_qf_min, initial_qf_max;
+  ImageMinMax(initial_quant_field, &initial_qf_min, &initial_qf_max);
+
+  float initial_qf_ratio = initial_qf_max / initial_qf_min;
+  float qf_max_deviation_low = std::sqrt(250 / initial_qf_ratio);
+  float asymmetry = 2;
+  if (qf_max_deviation_low < asymmetry) asymmetry = qf_max_deviation_low;
+  float qf_lower = initial_qf_min / (asymmetry * qf_max_deviation_low);
+  float qf_higher = initial_qf_max * (qf_max_deviation_low / asymmetry);
+
+  PIK_ASSERT(qf_higher / qf_lower < 253);
+
+  constexpr int kOriginalComparisonRound = 5;
+  constexpr float kMaximumDistanceIncreaseFactor = 1.015;
+
+  for (int i = 0; i < cparams.max_butteraugli_iters + 1; ++i) {
+    if (FLAGS_dump_quant_state) {
+      printf("\nQuantization field:\n");
+      for (int y = 0; y < quant_field.ysize(); ++y) {
+        for (int x = 0; x < quant_field.xsize(); ++x) {
+          printf(" %.5f", quant_field.Row(y)[x]);
+        }
+        printf("\n");
+      }
+    }
+
+    if (quantizer->SetQuantField(initial_quant_dc, QuantField(quant_field))) {
+
+      Image3F linear = RoundtripImage(
+          cparams, frame_header, header, opsin_orig, opsin_arg, cmap,
+          block_dictionary, ac_strategy, ar_sigma_lut_ids, *quantizer,
+          dequant_control_field, dequant_map, multipass_manager);
+
+      PROFILER_ZONE("enc Butteraugli");
+
+      comparator.Compare(linear);
+
+      static const int kMargins[100] = {0, 0, 0, 1, 2, 1, 1, 1, 0};
+      tile_distmap =
+          TileDistMap(comparator.distmap(), 8, kMargins[i], ac_strategy);
+      tile_distmap_localopt =
+          TileDistMap(comparator.distmap(), 8, 2, ac_strategy);
+      if (WantDebugOutput(aux_out)) {
+        DumpHeatmaps(aux_out, butteraugli_target, quant_field, tile_distmap);
+        ++aux_out->num_butteraugli_iters;
+      }
+
+      if (FLAGS_log_search_state) {
+        float minval, maxval;
+        ImageMinMax(quant_field, &minval, &maxval);
+        printf("\nButteraugli iter: %d/%d\n", i, cparams.max_butteraugli_iters);
+        printf("Butteraugli distance: %f\n", comparator.distance());
+        printf("quant range: %f ... %f  DC quant: %f\n", minval, maxval,
+               initial_quant_dc);
+        if (FLAGS_dump_quant_state) {
+          quantizer->DumpQuantizationMap();
+        }
+      }
+    }
+
+    if (i > kOriginalComparisonRound) {
+      // Undo last round if it made things worse (i.e. increased the quant value
+      // AND the distance in nearby pixels by at least some percentage).
+      for (size_t y = 0; y < quant_field.ysize(); ++y) {
+        float* const PIK_RESTRICT row_q = quant_field.Row(y);
+        const float* const PIK_RESTRICT row_dist = tile_distmap_localopt.Row(y);
+        const float* const PIK_RESTRICT row_last_dist =
+            last_tile_distmap_localopt.Row(y);
+        const float* const PIK_RESTRICT row_last_q = last_quant_field.Row(y);
+        for (size_t x = 0; x < quant_field.xsize(); ++x) {
+          if (row_q[x] > row_last_q[x] &&
+              row_dist[x] > kMaximumDistanceIncreaseFactor * row_last_dist[x]) {
+            row_q[x] = row_last_q[x];
+          }
+        }
+      }
+    }
+    last_quant_field = CopyImage(quant_field);
+    last_tile_distmap_localopt = CopyImage(tile_distmap_localopt);
+    if (i == cparams.max_butteraugli_iters) break;
+
+    double kPow[8] = {
+        0.97524596113492301,
+        1.0424361904568509,
+        0.64984804448911193,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+    };
+    double kPowMod[8] = {
+        0.011236980155043978,
+        0.0061256294105472651,
+        -0.0030115055086858242,
+        0.06929488142351059,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+    };
+    if (i == kOriginalComparisonRound) {
+      // Don't allow optimization to make the quant field a lot worse than
+      // what the initial guess was. This allows the AC field to have enough
+      // precision to reduce the oscillations due to the dc reconstruction.
+      double kInitMul = 0.6;
+      const double kOneMinusInitMul = 1.0 - kInitMul;
+      for (int y = 0; y < quant_field.ysize(); ++y) {
+        float* const PIK_RESTRICT row_q = quant_field.Row(y);
+        const float* const PIK_RESTRICT row_init = initial_quant_field.Row(y);
+        for (int x = 0; x < quant_field.xsize(); ++x) {
+          double clamp = kOneMinusInitMul * row_q[x] + kInitMul * row_init[x];
+          if (row_q[x] < clamp) {
+            row_q[x] = clamp;
+            if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+            if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+          }
+        }
+      }
+    }
+
+    double cur_pow = 0.0;
+    if (i < 7) {
+      cur_pow = kPow[i] + (butteraugli_target - 1.0) * kPowMod[i];
+      if (cur_pow < 0) {
+        cur_pow = 0;
+      }
+    }
+    // pow(x, 0) == 1, so skip pow.
+    if (cur_pow == 0.0) {
+      for (int y = 0; y < quant_field.ysize(); ++y) {
+        const float* const PIK_RESTRICT row_dist = tile_distmap.Row(y);
+        float* const PIK_RESTRICT row_q = quant_field.Row(y);
+        for (int x = 0; x < quant_field.xsize(); ++x) {
+          const float diff = row_dist[x] / butteraugli_target;
+          if (diff >= 1.0f) {
+            row_q[x] *= diff;
+          }
+          if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+          if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+        }
+      }
+    } else {
+      for (int y = 0; y < quant_field.ysize(); ++y) {
+        const float* const PIK_RESTRICT row_dist = tile_distmap.Row(y);
+        float* const PIK_RESTRICT row_q = quant_field.Row(y);
+        for (int x = 0; x < quant_field.xsize(); ++x) {
+          const float diff = row_dist[x] / butteraugli_target;
+          if (diff < 1.0f) {
+            row_q[x] *= pow(diff, cur_pow);
+          } else {
+            row_q[x] *= diff;
+          }
+          if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+          if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+        }
+      }
+    }
+  }
+  quantizer->SetQuantField(initial_quant_dc, QuantField(quant_field));
+}
+
+void FindBestQuantizationHQ(
+    const Image3F& opsin_orig, const Image3F& opsin,
+    const CompressParams& cparams, const FrameHeader& frame_header,
+    const GroupHeader& header, float butteraugli_target,
+    const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+    const AcStrategyImage& ac_strategy, const ImageB& sigma_lut_ids,
+    ImageF& quant_field, Quantizer* quantizer,
+    const ImageB& dequant_control_field,
+    const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+    PikInfo* aux_out, MultipassManager* multipass_manager, double rescale) {
+  const float intensity_multiplier = cparams.GetIntensityMultiplier();
+  const float intensity_multiplier3 = std::cbrt(intensity_multiplier);
+  ButteraugliComparator comparator(opsin_orig, cparams.hf_asymmetry,
+                                   intensity_multiplier);
+  AdjustQuantField(ac_strategy, &quant_field);
+  ImageF best_quant_field = CopyImage(quant_field);
+  float best_butteraugli = 1000.0f;
+  ImageF tile_distmap;
+  static const int kMaxOuterIters = 2;
+  int outer_iter = 0;
+  int butteraugli_iter = 0;
+  int search_radius = 0;
+  float quant_ceil = 5.0f;
+  float quant_dc = intensity_multiplier3 * 1.2f;
+  float best_quant_dc = quant_dc;
+  int num_stalling_iters = 0;
+  int max_iters = cparams.max_butteraugli_iters_guetzli_mode;
+
+  for (;;) {
+    if (FLAGS_dump_quant_state) {
+      printf("\nQuantization field:\n");
+      for (int y = 0; y < quant_field.ysize(); ++y) {
+        for (int x = 0; x < quant_field.xsize(); ++x) {
+          printf(" %.5f", quant_field.Row(y)[x]);
+        }
+        printf("\n");
+      }
+    }
+    float qmin, qmax;
+    ImageMinMax(quant_field, &qmin, &qmax);
+    ++butteraugli_iter;
+    if (quantizer->SetQuantField(quant_dc, QuantField(quant_field))) {
+      Image3F linear = RoundtripImage(
+          cparams, frame_header, header, opsin_orig, opsin, cmap,
+          block_dictionary, ac_strategy, sigma_lut_ids, *quantizer,
+          dequant_control_field, dequant_map, multipass_manager);
+      comparator.Compare(linear);
+      bool best_quant_updated = false;
+      if (comparator.distance() <= best_butteraugli) {
+        best_quant_field = CopyImage(quant_field);
+        best_butteraugli = std::max(comparator.distance(), butteraugli_target);
+        best_quant_updated = true;
+        best_quant_dc = quant_dc;
+        num_stalling_iters = 0;
+      } else if (outer_iter == 0) {
+        ++num_stalling_iters;
+      }
+      tile_distmap = TileDistMap(comparator.distmap(), 8, 0, ac_strategy);
+      if (WantDebugOutput(aux_out)) {
+        DumpHeatmaps(aux_out, butteraugli_target, quant_field, tile_distmap);
+      }
+      if (aux_out) {
+        ++aux_out->num_butteraugli_iters;
+      }
+      if (FLAGS_log_search_state) {
+        float minval, maxval;
+        ImageMinMax(quant_field, &minval, &maxval);
+        printf("\nButteraugli iter: %d/%d%s\n", butteraugli_iter, max_iters,
+               best_quant_updated ? " (*)" : "");
+        printf("Butteraugli distance: %f\n", comparator.distance());
+        printf(
+            "quant range: %f ... %f  DC quant: "
+            "%f\n",
+            minval, maxval, quant_dc);
+        printf("search radius: %d\n", search_radius);
+        if (FLAGS_dump_quant_state) {
+          quantizer->DumpQuantizationMap();
+        }
+      }
+    }
+    if (butteraugli_iter >= max_iters) {
+      break;
+    }
+    bool changed = false;
+    while (!changed && comparator.distance() > butteraugli_target) {
+      for (int radius = 0; radius <= search_radius && !changed; ++radius) {
+        ImageF dist_to_peak_map =
+            DistToPeakMap(tile_distmap, butteraugli_target, radius, 0.0);
+        for (int y = 0; y < quant_field.ysize(); ++y) {
+          float* const PIK_RESTRICT row_q = quant_field.Row(y);
+          const float* const PIK_RESTRICT row_dist = dist_to_peak_map.Row(y);
+          for (int x = 0; x < quant_field.xsize(); ++x) {
+            if (row_dist[x] >= 0.0f) {
+              static const float kAdjSpeed[kMaxOuterIters] = {0.1f, 0.04f};
+              const float factor =
+                  kAdjSpeed[outer_iter] * tile_distmap.Row(y)[x];
+              if (AdjustQuantVal(&row_q[x], row_dist[x], factor, quant_ceil)) {
+                changed = true;
+              }
+            }
+          }
+        }
+      }
+      if (!changed || num_stalling_iters >= 3) {
+        // Try to extend the search parameters.
+        if ((search_radius < 4) &&
+            (qmax < 0.99f * quant_ceil || quant_ceil >= 3.0f + search_radius)) {
+          ++search_radius;
+          continue;
+        }
+        if (quant_dc < 0.4f * quant_ceil - 0.8f) {
+          quant_dc += 0.2f;
+          changed = true;
+          continue;
+        }
+        if (quant_ceil < 8.0f) {
+          quant_ceil += 0.5f;
+          continue;
+        }
+        break;
+      }
+    }
+    if (!changed) {
+      if (++outer_iter == kMaxOuterIters) break;
+      static const float kQuantScale = 0.75f;
+      for (int y = 0; y < quant_field.ysize(); ++y) {
+        for (int x = 0; x < quant_field.xsize(); ++x) {
+          quant_field.Row(y)[x] *= kQuantScale;
+        }
+      }
+      num_stalling_iters = 0;
+    }
+  }
+  quantizer->SetQuantField(best_quant_dc, QuantField(best_quant_field));
+}
+
+ImageF AdaptiveQuantizationMap(const Image3F& img, const ImageF& img_ac,
+                               const CompressParams& cparams) {
+  PROFILER_ZONE("aq AdaptiveQuantMap");
+  static const int kResolution = 8;
+  const size_t out_xsize = (img.xsize() + kResolution - 1) / kResolution;
+  const size_t out_ysize = (img.ysize() + kResolution - 1) / kResolution;
+  if (img.xsize() <= 1) {
+    ImageF out(1, out_ysize);
+    FillImage(1.0f, &out);
+    return out;
+  }
+  if (img.ysize() <= 1) {
+    ImageF out(out_xsize, 1);
+    FillImage(1.0f, &out);
+    return out;
+  }
+  static const float kSigma = 8.2553856725566153;
+  static const int kRadius = static_cast<int>(2 * kSigma + 0.5f);
+  std::vector<float> kernel = GaussianKernel(kRadius, kSigma);
+  static const float kDiffCutoff = 0.11883287948847132;
+  ImageF out = DiffPrecompute(img, kDiffCutoff);
+  out = Expand(out, kResolution * out_xsize, kResolution * out_ysize);
+  out = ConvolveAndSample(out, kernel, kResolution);
+  out = ComputeMask(out);
+//  DctModulation(img_ac, &out);
+//  RangeModulation(img_ac, &out);
+//  HfModulation(img_ac, &out);
+  Exp(&out);
+  return out;
+}
+
+// TODO(veluca): remove or use pool.
+ImageF IntensityAcEstimate(const ImageF& image, float multiplier,
+                           ThreadPool* pool) {
+  constexpr size_t N = kBlockDim;
+  std::vector<float> blur = DCfiedGaussianKernel<N>(5.5);
+  ImageF retval = Convolve(image, blur);
+  for (size_t y = 0; y < retval.ysize(); y++) {
+    float* PIK_RESTRICT retval_row = retval.Row(y);
+    const float* PIK_RESTRICT image_row = image.ConstRow(y);
+    for (size_t x = 0; x < retval.xsize(); ++x) {
+      retval_row[x] = multiplier * (image_row[x] - retval_row[x]);
+    }
+  }
+  return retval;
+}
+
+}  // namespace
+
+float InitialQuantDC(float butteraugli_target, float intensity_multiplier) {
+  const float intensity_multiplier3 = std::cbrt(intensity_multiplier);
+  const float butteraugli_target_dc =
+      std::min<float>(butteraugli_target, pow(butteraugli_target, kDcQuantPow));
+  return intensity_multiplier3 * kDcQuant / butteraugli_target_dc;
+}
+
+ImageF InitialQuantField(double butteraugli_target, double intensity_multiplier,
+                         const Image3F& opsin_orig,
+                         const CompressParams& cparams, ThreadPool* pool,
+                         double rescale) {
+  const float intensity_multiplier3 = std::cbrt(intensity_multiplier);
+  const float quant_ac = intensity_multiplier3 * kAcQuant / butteraugli_target;
+  ImageF intensity_ac =
+      IntensityAcEstimate(opsin_orig.Plane(1), intensity_multiplier3, pool);
+  ImageF quant_field =
+      ScaleImage(quant_ac * (float)rescale,
+                 AdaptiveQuantizationMap(opsin_orig, intensity_ac, cparams));
+  return quant_field;
+}
+
+std::shared_ptr<Quantizer> FindBestQuantizer(
+    const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+    const Image3F& opsin_orig, const Image3F& opsin,
+    const FrameHeader& frame_header, const GroupHeader& header,
+    const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+    const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+    const DequantMatrices* dequant, const ImageB& dequant_control_field,
+    const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+    ImageF& quant_field, PikInfo* aux_out,
+    MultipassManager* multipass_manager, double rescale) {
+  std::shared_ptr<Quantizer> quantizer =
+      std::make_shared<Quantizer>(dequant, xsize_blocks, ysize_blocks);
+  const float intensity_multiplier = cparams.GetIntensityMultiplier();
+  if (cparams.fast_mode) {
+    PROFILER_ZONE("enc fast quant");
+    const float butteraugli_target = cparams.butteraugli_distance;
+    const float quant_dc =
+        InitialQuantDC(butteraugli_target, intensity_multiplier);
+    Rect full(opsin_orig);
+    // TODO(veluca): warn if uniform_quant is set - or honor it
+    AdjustQuantField(ac_strategy, &quant_field);
+    quantizer->SetQuantField(quant_dc, QuantField(quant_field));
+  } else if (cparams.uniform_quant > 0.0) {
+    PROFILER_ZONE("enc SetQuant");
+    quantizer->SetQuant(cparams.uniform_quant * rescale);
+  } else {
+    // Normal PIK encoding to a butteraugli score.
+    PROFILER_ZONE("enc find best2");
+    if (cparams.guetzli_mode) {
+      FindBestQuantizationHQ(opsin_orig, opsin, cparams, frame_header, header,
+                             cparams.butteraugli_distance, cmap,
+                             block_dictionary, ac_strategy, ar_sigma_lut_ids,
+                             quant_field, quantizer.get(),
+                             dequant_control_field, dequant_map, aux_out,
+                             multipass_manager, rescale);
+    } else {
+      FindBestQuantization(opsin_orig, opsin, cparams, frame_header, header,
+                           cparams.butteraugli_distance, cmap, block_dictionary,
+                           ac_strategy, ar_sigma_lut_ids, quant_field,
+                           quantizer.get(), dequant_control_field, dequant_map,
+                           aux_out, multipass_manager, rescale);
+    }
+  }
+  return quantizer;
+}
+
+std::shared_ptr<Quantizer> FindBestQuantizerAvg(float avg, float absavg,
+    const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+    const Image3F& opsin_orig, const Image3F& opsin,
+    const FrameHeader& frame_header, const GroupHeader& header,
+    const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+    const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+    const DequantMatrices* dequant, const ImageB& dequant_control_field,
+    const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+    ImageF& quant_field, PikInfo* aux_out,
+    MultipassManager* multipass_manager, double rescale) {
+  std::shared_ptr<Quantizer> quantizer =
+      std::make_shared<Quantizer>(dequant, xsize_blocks, ysize_blocks);
+  const float intensity_multiplier = cparams.GetIntensityMultiplier();
+  if (cparams.fast_mode) {
+    PROFILER_ZONE("enc fast quant");
+    const float butteraugli_target = cparams.butteraugli_distance;
+    const float quant_dc =
+        InitialQuantDC(butteraugli_target, intensity_multiplier);
+    Rect full(opsin_orig);
+    // TODO(veluca): warn if uniform_quant is set - or honor it
+    ImageF qfOrigin = CopyImage(quant_field);
+    AdjustQuantField(ac_strategy, &quant_field);
+    quantizer->SetQuantFieldOR(avg, absavg, quant_dc, QuantField(quant_field), qfOrigin);
+  } else if (cparams.uniform_quant > 0.0) {
+    PROFILER_ZONE("enc SetQuant");
+    quantizer->SetQuant(cparams.uniform_quant * rescale);
+  } else {
+    // Normal PIK encoding to a butteraugli score.
+    PROFILER_ZONE("enc find best2");
+    if (cparams.guetzli_mode) {
+      FindBestQuantizationHQ(opsin_orig, opsin, cparams, frame_header, header,
+                             cparams.butteraugli_distance, cmap,
+                             block_dictionary, ac_strategy, ar_sigma_lut_ids,
+                             quant_field, quantizer.get(),
+                             dequant_control_field, dequant_map, aux_out,
+                             multipass_manager, rescale);
+    } else {
+      FindBestQuantization(opsin_orig, opsin, cparams, frame_header, header,
+                           cparams.butteraugli_distance, cmap, block_dictionary,
+                           ac_strategy, ar_sigma_lut_ids, quant_field,
+                           quantizer.get(), dequant_control_field, dequant_map,
+                           aux_out, multipass_manager, rescale);
+    }
+  }
+  return quantizer;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/adaptive_quantization.h b/codec/L2/demos/pikEnc/host/pik/adaptive_quantization.h
new file mode 100755
index 0000000000..178fb2f731
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/adaptive_quantization.h
@@ -0,0 +1,66 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ADAPTIVE_QUANTIZATION_H_
+#define PIK_ADAPTIVE_QUANTIZATION_H_
+
+#include <stddef.h>
+
+#include "pik/block_dictionary.h"
+#include "pik/color_correlation.h"
+#include "pik/headers.h"
+#include "pik/image.h"
+#include "pik/multipass_handler.h"
+#include "pik/pik_params.h"
+#include "pik/quantizer.h"
+
+// Heuristics to find a good quantizer for a given image. InitialQuantField
+// produces a quantization field (i.e. relative quantization amounts for each
+// block) out of an opsin-space image. `InitialQuantField` uses heuristics,
+// `FindBestQuantizer` (in non-fast mode) will run multiple encoding-decoding
+// steps and try to improve the given quant field.
+
+namespace pik {
+
+// Returns an image subsampled by kBlockDim in each direction. If the value
+// at pixel (x,y) in the returned image is greater than 1.0, it means that
+// more fine-grained quantization should be used in the corresponding block
+// of the input image, while a value less than 1.0 indicates that less
+// fine-grained quantization should be enough.
+ImageF InitialQuantField(double butteraugli_target, double intensity_multiplier,
+                         const Image3F& opsin_orig,
+                         const CompressParams& cparams, ThreadPool* pool,
+                         double rescale);
+
+float InitialQuantDC(float butteraugli_target, float intensity_multiplier);
+
+// Returns a quantizer that uses an adjusted version of the provided
+// quant_field.
+std::shared_ptr<Quantizer> FindBestQuantizer(
+    const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+    const Image3F& opsin_orig, const Image3F& opsin,
+    const FrameHeader& frame_header, const GroupHeader& header,
+    const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+    const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+    const DequantMatrices* dequant, const ImageB& dequant_control_field,
+    const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+    ImageF& quant_field, PikInfo* aux_out,
+    MultipassManager* multipass_manager, double rescale = 1.0);
+
+std::shared_ptr<Quantizer> FindBestQuantizerAvg(float avg, float absavg,
+    const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+    const Image3F& opsin_orig, const Image3F& opsin,
+    const FrameHeader& frame_header, const GroupHeader& header,
+    const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+    const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+    const DequantMatrices* dequant, const ImageB& dequant_control_field,
+    const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+    ImageF& quant_field, PikInfo* aux_out,
+    MultipassManager* multipass_manager, double rescale = 1.0);
+
+}  // namespace pik
+
+#endif  // PIK_ADAPTIVE_QUANTIZATION_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/adaptive_reconstruction.cc b/codec/L2/demos/pikEnc/host/pik/adaptive_reconstruction.cc
new file mode 100755
index 0000000000..54acaf6b44
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/adaptive_reconstruction.cc
@@ -0,0 +1,453 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/adaptive_reconstruction.h"
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+
+#include "pik/ac_strategy.h"
+#include "pik/block.h"
+#include "pik/common.h"
+#include "pik/data_parallel.h"
+#include "pik/dct.h"
+#include "pik/dct_util.h"
+#include "pik/entropy_coder.h"
+#include "pik/epf.h"
+#include "pik/profiler.h"
+#include "pik/quant_weights.h"
+#include "pik/quantizer.h"
+#include "pik/simd/simd.h"
+
+#ifndef PIK_AR_PRINT_STATS
+#define PIK_AR_PRINT_STATS 0
+#endif
+
+namespace pik {
+namespace {
+
+using DF = SIMD_FULL(float);
+using DI = SIMD_FULL(int32_t);
+using DU = SIMD_FULL(uint32_t);
+using VF = DF::V;
+using VI = DI::V;
+using VU = DU::V;
+
+struct ARStats {
+  SIMD_ATTR void Assimilate(const ARStats& other) {
+    const DU d;
+    for (int c = 0; c < 3; ++c) {
+      const auto low =
+          load_unaligned(d, clamp_lo[c]) + load_unaligned(d, other.clamp_lo[c]);
+      const auto high =
+          load_unaligned(d, clamp_hi[c]) + load_unaligned(d, other.clamp_hi[c]);
+      store_unaligned(low, d, clamp_lo[c]);
+      store_unaligned(high, d, clamp_hi[c]);
+    }
+  }
+
+  static SIMD_ATTR uint32_t Total(const uint32_t* PIK_RESTRICT from) {
+    const DU d;
+    const SIMD_PART(uint32_t, 1) d1;
+    return get_part(d1, ext::sum_of_lanes(load_unaligned(d, from)));
+  }
+
+  static SIMD_ATTR void Add(const VU add, uint32_t* PIK_RESTRICT to) {
+    const DU d;
+    store_unaligned(load_unaligned(d, to) + add, d, to);
+  }
+
+  // Number of values (after aggregating across lanes).
+  uint32_t clamp_lo[3][DU::N] = {{0}};
+  uint32_t clamp_hi[3][DU::N] = {{0}};
+};
+
+// Clamp the difference between the coefficients of the filtered image and the
+// coefficients of the original image (i.e. `correction`) to an interval whose
+// size depends on the values in the non-smoothed image. The interval is
+// scaled according to `interval_scale`.
+template <int c>
+SIMD_ATTR PIK_INLINE void SymmetricClamp(const VF interval_scale,
+                                         float* PIK_RESTRICT block,
+                                         float* PIK_RESTRICT min_ratio,
+                                         ARStats* PIK_RESTRICT stats) {
+  const DF df;
+  const auto half = set1(df, 0.5);
+  const auto upper_bound = half * interval_scale;
+
+  const auto neghalf = set1(df, -0.5);
+  const auto lower_bound = neghalf * interval_scale;
+
+  const auto correction = load(df, block);
+  // Note: this clamping is only for purposes of determining `min_ratio`.
+  const auto clamped = min(max(lower_bound, correction), upper_bound);
+
+  // Integer comparisons are faster than float.
+  const SIMD_FULL(uint32_t) du;
+  const auto correction_u = cast_to(du, correction);
+  const auto zero_u = setzero(du);
+  const auto correction_is_zero = cast_to(df, correction_u == zero_u);
+
+  // Sanity checks
+#ifdef ADDRESS_SANITIZER
+  // clamped=0 can only happen if correction_is_zero.
+  PIK_ASSERT(ext::all_zero(correction_is_zero) ||
+             !ext::all_zero(cast_to(du, clamped) == zero_u));
+
+  // clamped must never change sign vs. correction (else min_ratio is negative).
+  const auto sign = cast_to(df, set1(du, 0x80000000u));
+  const auto changed_sign = (clamped ^ correction) & sign;
+  PIK_ASSERT(ext::all_zero(cast_to(du, changed_sign)));
+#endif
+
+  // ratio := clamped/correction: small if 'correction' was clamped a lot.
+  // If correction == 0, ratio will be large and min_ratio not updated (fine
+  // because zero definitely lies within the quantization interval.)
+  const auto divisor = select(correction, set1(df, 1E-7f), correction_is_zero);
+
+  const auto clamp_ratio = clamped / divisor;
+  store(min(clamp_ratio, load(df, min_ratio)), df, min_ratio);
+
+#if PIK_AR_PRINT_STATS
+  const auto one = set1(du, uint32_t(1));
+  const auto is_low = correction < clamped;
+  ARStats::Add(cast_to(du, is_low) & one, stats->clamp_lo[c]);
+  const auto is_high = correction > clamped;
+  ARStats::Add(cast_to(du, is_high) & one, stats->clamp_hi[c]);
+#endif
+}
+
+// Clamps a block of the filtered image, pointed to by `opsin`, ensuring that it
+// does not get too far away from the values in the corresponding block of the
+// original image, pointed to by `original`. Instead of computing the difference
+// of the DCT of the two images, we compute the DCT of the difference as DCT is
+// a linear operator and this saves some work.
+template <int c>
+SIMD_ATTR PIK_INLINE void UpdateMinRatioOfClampToOriginalDCT(
+    const float* PIK_RESTRICT original, size_t stride,
+    const float* PIK_RESTRICT dequant_matrix, const float inv_quant_ac,
+    const float dc_mul, AcStrategy acs, const float* PIK_RESTRICT filt,
+    float* PIK_RESTRICT min_ratio, float* PIK_RESTRICT block,
+    ARStats* PIK_RESTRICT stats) {
+  const SIMD_FULL(float) df;
+  const SIMD_FULL(uint32_t) du;
+
+  const size_t block_width = kBlockDim * acs.covered_blocks_x();
+  const size_t block_height = kBlockDim * acs.covered_blocks_y();
+
+  for (size_t iy = 0; iy < block_height; iy++) {
+    for (size_t ix = 0; ix < block_width; ix += df.N) {
+      const auto filt_v = load(df, filt + stride * iy + ix);
+      const auto original_v = load(df, original + stride * iy + ix);
+      store(filt_v - original_v, df, block + block_width * iy + ix);
+    }
+  }
+
+  size_t covered_blocks = acs.covered_blocks_x() * acs.covered_blocks_y();
+
+  {
+    SIMD_ALIGN float temp_block[AcStrategy::kMaxCoeffArea];
+    acs.TransformFromPixels(block, block_width, temp_block,
+                            block_width * kBlockDim);
+    memcpy(block, temp_block,
+           covered_blocks * kBlockDim * kBlockDim * sizeof(float));
+  }
+
+  const uint32_t* only_llf_bits = nullptr;
+  if (acs.covered_blocks_x() == 1) {
+    SIMD_ALIGN static const uint32_t only_llf_b[AcStrategy::kLLFMaskDim] = {
+        ~0u};
+    only_llf_bits = only_llf_b;
+  }
+  if (acs.covered_blocks_x() == 2) {
+    SIMD_ALIGN static const uint32_t only_llf_b[AcStrategy::kLLFMaskDim] = {
+        ~0u, ~0u};
+    only_llf_bits = only_llf_b;
+  }
+  if (acs.covered_blocks_x() == 4) {
+    SIMD_ALIGN static const uint32_t only_llf_b[AcStrategy::kLLFMaskDim] = {
+        ~0u, ~0u, ~0u, ~0u};
+    only_llf_bits = only_llf_b;
+  }
+  PIK_ASSERT(only_llf_bits != nullptr);
+  PIK_ASSERT(acs.covered_blocks_y() <= kBlockDim);
+  static_assert(kBlockDim % SIMD_FULL(float)::N == 0,
+                "Block dimension is not a multiple of lane size!");
+
+  // TODO(janwas): template, make covered_blocks* constants
+  // Handle lowest-frequencies and corresponding rows.
+  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+    const size_t row_offset = y * block_width;
+    const float* llf_scales = acs.ARLowestFrequencyScales(y);
+    for (size_t k = 0; k < std::min(AcStrategy::kLLFMaskDim, block_width);
+         k += df.N) {
+      const size_t ofs = row_offset + k;
+      const auto only_llf = cast_to(df, load(du, only_llf_bits + k));
+      const auto cur_dc_mul = load(df, llf_scales + k) * set1(df, dc_mul);
+      const auto ac_mul =
+          load(df, dequant_matrix + ofs) * set1(df, inv_quant_ac);
+      const auto interval_scale = select(ac_mul, cur_dc_mul, only_llf);
+      SymmetricClamp<c>(interval_scale, block + ofs, min_ratio + ofs, stats);
+    }
+    for (size_t k = AcStrategy::kLLFMaskDim; k < block_width; k += df.N) {
+      const size_t ofs = row_offset + k;
+      const auto interval_scale =
+          load(df, dequant_matrix + ofs) * set1(df, inv_quant_ac);
+      SymmetricClamp<c>(interval_scale, block + ofs, min_ratio + ofs, stats);
+    }
+  }
+
+  // All other coefficients
+  for (size_t k = covered_blocks * kBlockDim;
+       k < covered_blocks * kBlockDim * kBlockDim; k += df.N) {
+    const auto interval_scale =
+        load(df, dequant_matrix + k) * set1(df, inv_quant_ac);
+    SymmetricClamp<c>(interval_scale, block + k, min_ratio + k, stats);
+  }
+}
+
+// Clamp by multiplying block[k] by min_ratio[k], then IDCT.
+// DoMul allows disabling the scaling for X as an experiment (disabled).
+template <bool DoMul>
+SIMD_ATTR PIK_INLINE void ClampAndIDCT(
+    float* PIK_RESTRICT block, const size_t block_width,
+    const size_t block_height, const float* PIK_RESTRICT min_ratio,
+    const AcStrategy acs, const float* PIK_RESTRICT original,
+    float* PIK_RESTRICT filt, size_t stride) {
+#ifdef ADDRESS_SANITIZER
+  for (size_t k = 0; k < block_width * block_height; ++k) {
+    PIK_ASSERT(min_ratio[k] >= 0.0f);
+  }
+#endif
+
+  const SIMD_FULL(float) df;
+  for (size_t k = 0; k < block_width * block_height; k += df.N) {
+    const auto mul = DoMul ? load(df, min_ratio + k) : set1(df, 1.0f);
+    const auto scaled = load(df, block + k) * mul;
+    store(scaled, df, block + k);
+  }
+
+  // IDCT
+  SIMD_ALIGN float pixels[AcStrategy::kMaxCoeffArea];
+  acs.TransformToPixels(block, block_width * kBlockDim, pixels, block_width);
+
+  for (size_t iy = 0; iy < block_height; iy++) {
+    for (size_t ix = 0; ix < block_width; ix += df.N) {
+      const auto block_v = load(df, pixels + block_width * iy + ix);
+      const auto original_v = load(df, original + stride * iy + ix);
+      store(block_v + original_v, df, filt + stride * iy + ix);
+    }
+  }
+}
+
+void ComputeResidualSlow(const Image3F& in, const Image3F& smoothed,
+                         Image3F* PIK_RESTRICT residual) {
+  for (int c = 0; c < in.kNumPlanes; ++c) {
+    for (size_t y = 0; y < in.ysize(); ++y) {
+      const float* row_in = in.PlaneRow(c, y);
+      const float* row_smoothed = smoothed.PlaneRow(c, y);
+      float* PIK_RESTRICT row_out = residual->PlaneRow(c, y);
+      for (size_t x = 0; x < in.xsize(); ++x) {
+        row_out[x] = std::abs(row_in[x] - row_smoothed[x]);
+      }
+    }
+  }
+}
+
+// TODO(janwas): template, use actual max_coefs from acs as size.
+struct ARBlocks {
+  float x[AcStrategy::kMaxCoeffArea];
+  uint8_t pad1[CacheAligned::kAlignment];
+  float y[AcStrategy::kMaxCoeffArea];
+  uint8_t pad2[CacheAligned::kAlignment];
+  float b[AcStrategy::kMaxCoeffArea];
+  uint8_t pad3[CacheAligned::kAlignment];
+  float min_ratio[AcStrategy::kMaxCoeffArea];
+};
+
+}  // namespace
+
+Image3F DoDenoise(const Image3F& opsin, const Image3F& opsin_sharp,
+                  const Quantizer& quantizer, const ImageI& raw_quant_field,
+                  const ImageB& sigma_lut_ids,
+                  const AcStrategyImage& ac_strategy,
+                  const EpfParams& epf_params,
+                  AdaptiveReconstructionAux* aux) {
+  if (aux != nullptr) {
+    aux->quant_scale = quantizer.Scale();
+  }
+
+  Image3F smoothed(opsin.xsize(), opsin.ysize());
+
+  const float quant_scale = quantizer.Scale();
+  if (epf_params.enable_adaptive) {
+    Dispatch(TargetBitfield().Best(), EdgePreservingFilter(), opsin,
+             opsin_sharp, &raw_quant_field, quant_scale, sigma_lut_ids,
+             ac_strategy, epf_params, &smoothed,
+             aux ? &aux->epf_stats : nullptr);
+  } else {
+    float stretch;
+    Dispatch(TargetBitfield().Best(), EdgePreservingFilter(), opsin,
+             opsin_sharp, epf_params, aux ? &aux->stretch : &stretch,
+             &smoothed);
+  }
+  return smoothed;
+}
+
+void AdaptiveDCReconstruction(Image3F& dc, const Quantizer& quantizer) {
+  for (size_t c = 0; c < dc.kNumPlanes; c++) {
+    const float half_step = quantizer.inv_quant_dc() *
+                            quantizer.DequantMatrix(0, kQuantKindDCT8, c)[0] *
+                            0.5f;
+    for (size_t y = 0; y < dc.ysize(); y++) {
+      float* PIK_RESTRICT dc_row = dc.PlaneRow(c, y);
+      for (size_t x = 0; x < dc.xsize(); x++) {
+        dc_row[x] = std::max(dc_row[x] - half_step,
+                             std::min(dc_row[x], dc_row[x] + half_step));
+      }
+    }
+  }
+}
+
+SIMD_ATTR Image3F AdaptiveReconstruction(
+    const Image3F& in, const Image3F& non_smoothed, const Quantizer& quantizer,
+    const ImageI& raw_quant_field, const ImageB& quant_cf,
+    const uint8_t quant_cf_map[kMaxQuantControlFieldValue][256],
+    const ImageB& sigma_lut_ids, const AcStrategyImage& ac_strategy,
+    const EpfParams& epf_params,
+    AdaptiveReconstructionAux* aux) {
+  PROFILER_FUNC;
+  PIK_ASSERT(in.xsize() / 8 == sigma_lut_ids.xsize() &&
+             in.ysize() / 8 == sigma_lut_ids.ysize());
+  // Input image should have an integer number of blocks.
+  PIK_ASSERT(in.xsize() % kBlockDim == 0 && in.ysize() % kBlockDim == 0);
+  const size_t xsize_blocks = in.xsize() / kBlockDim;
+  const size_t ysize_blocks = in.ysize() / kBlockDim;
+
+  // Dequantization matrices.
+  const float* PIK_RESTRICT dequant_matrices =
+      quantizer.DequantMatrix(0, kQuantKindDCT8, 0);
+  float dc_mul[3];
+  for (size_t c = 0; c < 3; c++) {
+    dc_mul[c] =
+        quantizer.inv_quant_dc() *
+        dequant_matrices[quantizer.DequantMatrixOffset(0, kQuantKindDCT8, c)];
+  }
+
+  // Modified below (clamped).
+  Image3F filt = DoDenoise(in, non_smoothed, quantizer, raw_quant_field,
+                           sigma_lut_ids, ac_strategy, epf_params, aux);
+  if (aux != nullptr && aux->filtered != nullptr) {
+    *aux->filtered = CopyImage(filt);
+  }
+
+  const size_t stride = filt.PlaneRow(0, 1) - filt.PlaneRow(0, 0);
+  PIK_ASSERT(stride == in.PlaneRow(0, 1) - in.PlaneRow(0, 0));
+
+  ARStats stats;
+
+	  for(int task = 0; task < ysize_blocks; ++task) {
+        const size_t by = task;
+        const int32_t* PIK_RESTRICT row_quant = raw_quant_field.ConstRow(by);
+        const AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(by);
+
+        const float* PIK_RESTRICT row_original_x =
+            non_smoothed.ConstPlaneRow(0, by * kBlockDim);
+        const float* PIK_RESTRICT row_original_y =
+            non_smoothed.ConstPlaneRow(1, by * kBlockDim);
+        const float* PIK_RESTRICT row_original_b =
+            non_smoothed.ConstPlaneRow(2, by * kBlockDim);
+
+        float* PIK_RESTRICT row_filt_x = filt.PlaneRow(0, by * kBlockDim);
+        float* PIK_RESTRICT row_filt_y = filt.PlaneRow(1, by * kBlockDim);
+        float* PIK_RESTRICT row_filt_b = filt.PlaneRow(2, by * kBlockDim);
+
+        const size_t ty = by / kTileDimInBlocks;
+        const uint8_t* row_quant_cf = quant_cf.ConstRow(ty);
+
+        for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+          const int32_t qac = row_quant[bx];
+          const float inv_quant_ac = quantizer.inv_quant_ac(qac);
+          const AcStrategy acs = ac_strategy_row[bx];
+          if (!acs.IsFirstBlock()) continue;
+
+          const size_t tx = bx / kTileDimInBlocks;
+
+          // TODO(janwas): hoist/precompute
+          uint8_t quant_table = quant_cf_map[row_quant_cf[tx]][qac - 1];
+          const float* dequant_matrix_x =
+              dequant_matrices + quantizer.DequantMatrixOffset(
+                                     quant_table, acs.GetQuantKind(), /*c=*/0);
+          const float* dequant_matrix_y =
+              dequant_matrices + quantizer.DequantMatrixOffset(
+                                     quant_table, acs.GetQuantKind(), /*c=*/1);
+          const float* dequant_matrix_b =
+              dequant_matrices + quantizer.DequantMatrixOffset(
+                                     quant_table, acs.GetQuantKind(), /*c=*/2);
+
+          const size_t block_ofs = bx * kBlockDim;
+          const float* PIK_RESTRICT pos_original_x = row_original_x + block_ofs;
+          const float* PIK_RESTRICT pos_original_y = row_original_y + block_ofs;
+          const float* PIK_RESTRICT pos_original_b = row_original_b + block_ofs;
+          float* PIK_RESTRICT pos_filt_x = row_filt_x + block_ofs;
+          float* PIK_RESTRICT pos_filt_y = row_filt_y + block_ofs;
+          float* PIK_RESTRICT pos_filt_b = row_filt_b + block_ofs;
+
+          const size_t block_width = kBlockDim * acs.covered_blocks_x();
+          const size_t block_height = kBlockDim * acs.covered_blocks_y();
+
+          SIMD_ALIGN ARBlocks blocks;
+          const SIMD_FULL(float) df;
+          for (size_t k = 0; k < block_width * block_height; k += df.N) {
+            store(set1(df, 1.0f), df, blocks.min_ratio + k);
+          }
+
+          UpdateMinRatioOfClampToOriginalDCT<0>(
+              pos_original_x, stride, dequant_matrix_x, inv_quant_ac, dc_mul[0],
+              acs, pos_filt_x, blocks.min_ratio, blocks.x, &stats);
+          UpdateMinRatioOfClampToOriginalDCT<1>(
+              pos_original_y, stride, dequant_matrix_y, inv_quant_ac, dc_mul[1],
+              acs, pos_filt_y, blocks.min_ratio, blocks.y, &stats);
+          UpdateMinRatioOfClampToOriginalDCT<2>(
+              pos_original_b, stride, dequant_matrix_b, inv_quant_ac, dc_mul[2],
+              acs, pos_filt_b, blocks.min_ratio, blocks.b, &stats);
+
+          ClampAndIDCT<true>(blocks.b, block_width, block_height,
+                             blocks.min_ratio, acs, pos_original_b, pos_filt_b,
+                             stride);
+
+          ClampAndIDCT<true>(blocks.x, block_width, block_height,
+                             blocks.min_ratio, acs, pos_original_x, pos_filt_x,
+                             stride);
+          ClampAndIDCT<true>(blocks.y, block_width, block_height,
+                             blocks.min_ratio, acs, pos_original_y, pos_filt_y,
+                             stride);
+        }  // bx
+      }  // by
+
+#if PIK_AR_PRINT_STATS
+  printf("Lo/Hi clamped: %5u %5u; %5u %5u; %5u %5u (pixels: %zu)\n",
+         ARStats::Total(stats.clamp_lo[0]), ARStats::Total(stats.clamp_hi[0]),
+         ARStats::Total(stats.clamp_lo[1]), ARStats::Total(stats.clamp_hi[1]),
+         ARStats::Total(stats.clamp_lo[2]), ARStats::Total(stats.clamp_hi[2]),
+         in.xsize() * in.ysize());
+#endif
+
+  if (aux != nullptr) {
+    if (aux->residual != nullptr) {
+      ComputeResidualSlow(in, filt, aux->residual);
+    }
+    if (aux->ac_quant != nullptr) {
+      CopyImageTo(raw_quant_field, aux->ac_quant);
+    }
+    if (aux->ac_quant != nullptr) {
+      CopyImageTo(ac_strategy.ConstRaw(), aux->ac_strategy);
+    }
+  }
+  return filt;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/adaptive_reconstruction.h b/codec/L2/demos/pikEnc/host/pik/adaptive_reconstruction.h
new file mode 100755
index 0000000000..3899dd37dd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/adaptive_reconstruction.h
@@ -0,0 +1,47 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ADAPTIVE_RECONSTRUCTION_H_
+#define PIK_ADAPTIVE_RECONSTRUCTION_H_
+
+// "In-loop" filter: edge-preserving filter + adaptive clamping to DCT interval.
+
+#include "pik/adaptive_reconstruction_fwd.h"
+#include "pik/data_parallel.h"
+#include "pik/epf.h"
+#include "pik/image.h"
+#include "pik/multipass_handler.h"
+#include "pik/quantizer.h"
+
+namespace pik {
+
+// Edge-preserving smoothing plus clamping the result to the quantized interval
+// (which requires `quantizer` to reconstruct the values that
+// were actually quantized). `in` is the image to filter:  opsin AFTER gaborish.
+// `non_smoothed` is BEFORE gaborish.
+SIMD_ATTR Image3F AdaptiveReconstruction(
+    const Image3F& in, const Image3F& non_smoothed, const Quantizer& quantizer,
+    const ImageI& raw_quant_field, const ImageB& quant_cf,
+    const uint8_t quant_cf_map[kMaxQuantControlFieldValue][256],
+    const ImageB& sigma_lut_ids, const AcStrategyImage& ac_strategy,
+    const EpfParams& epf_params,
+    AdaptiveReconstructionAux* aux = nullptr);
+
+// Edge-preserving smoothing plus clamping the result to quantized interval,
+// done on the DC image in pixel space.
+void AdaptiveDCReconstruction(Image3F& dc, const Quantizer& quantizer);
+
+// Calls the edge-preserving filter using proper target dispatching.
+Image3F DoDenoise(const Image3F& opsin, const Image3F& opsin_sharp,
+                  const Quantizer& quantizer, const ImageI& raw_quant_field,
+                  const ImageB& sigma_lut_ids,
+                  const AcStrategyImage& ac_strategy,
+                  const EpfParams& epf_params,
+                  AdaptiveReconstructionAux* aux = nullptr);
+
+}  // namespace pik
+
+#endif  // PIK_ADAPTIVE_RECONSTRUCTION_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/adaptive_reconstruction_fwd.h b/codec/L2/demos/pikEnc/host/pik/adaptive_reconstruction_fwd.h
new file mode 100755
index 0000000000..44362f700f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/adaptive_reconstruction_fwd.h
@@ -0,0 +1,48 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ADAPTIVE_RECONSTRUCTION_FWD_H_
+#define PIK_ADAPTIVE_RECONSTRUCTION_FWD_H_
+
+// Breaks the circular dependency between adaptive_reconstruction.h and
+// pik_info.h.
+
+#include "pik/epf_stats.h"
+#include "pik/image.h"
+
+namespace pik {
+
+// Optional output(s).
+struct AdaptiveReconstructionAux {
+  void Assimilate(const AdaptiveReconstructionAux& other) {
+    epf_stats.Assimilate(other.epf_stats);
+    if (other.stretch != -1.0f) stretch = other.stretch;
+    if (other.quant_scale != -1.0f) quant_scale = other.quant_scale;
+  }
+
+  void Print() const { epf_stats.Print(); }
+
+  // Filled with the multiplier used to scale input pixels to [0, 255].
+  float stretch = -1.0f;
+
+  // Set to Quantizer::Scale().
+  float quant_scale = -1.0f;
+
+  // If not null, filled with difference between input and filtered image.
+  Image3F* residual = nullptr;
+  // If not null, filled with the output of the filter.
+  Image3F* filtered = nullptr;
+  // If not null, filled with raw quant map used to compute sigma.
+  ImageI* ac_quant = nullptr;
+  // If not null, filled with AC strategy (for detecting DCT16)
+  ImageB* ac_strategy = nullptr;
+
+  EpfStats epf_stats;
+};
+
+}  // namespace pik
+
+#endif  // PIK_ADAPTIVE_RECONSTRUCTION_FWD_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/alpha.cc b/codec/L2/demos/pikEnc/host/pik/alpha.cc
new file mode 100755
index 0000000000..9b5d7657a4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/alpha.cc
@@ -0,0 +1,193 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/alpha.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+
+#include <memory>
+#include <vector>
+
+#include "pik/ans_decode.h"
+#include "pik/bit_reader.h"
+#include "pik/dc_predictor.h"
+#include "pik/entropy_coder.h"
+#include "pik/fast_log.h"
+#include "pik/lossless16.h"
+#include "pik/lossless8.h"
+#include "pik/profiler.h"
+#include "pik/status.h"
+#include "pik/write_bits.h"
+
+namespace pik {
+
+namespace {
+
+// TODO(veluca): check if those upper bounds can be improved.
+const constexpr int kRleSymStart[2] = {10, 18};
+
+const constexpr int kMaxRleBits = 31;
+const constexpr int kMaxRleLength =
+    DecodeVarLenUint(kMaxRleBits, (1u << kMaxRleBits) - 1);
+
+}  // namespace
+
+Status EncodeAlpha(const CompressParams& params, const ImageU& plane,
+                   const Rect& rect, int bit_depth, Alpha* alpha) {
+  PIK_ASSERT(bit_depth == 8 || bit_depth == 16);
+  alpha->bytes_per_alpha = bit_depth / 8;  // The encoding format used
+  ImageS alpha_img(rect.xsize(), rect.ysize());
+  if (alpha->bytes_per_alpha == 2) {
+    for (size_t y = 0; y < rect.ysize(); y++) {
+      int16_t* PIK_RESTRICT row = alpha_img.Row(y);
+      const uint16_t* PIK_RESTRICT in = rect.ConstRow(plane, y);
+      for (size_t x = 0; x < rect.xsize(); x++) {
+        row[x] = in[x] - (1 << 15);
+      }
+    }
+  } else {
+    for (size_t y = 0; y < rect.ysize(); y++) {
+      int16_t* PIK_RESTRICT row = alpha_img.Row(y);
+      const uint16_t* PIK_RESTRICT in = rect.ConstRow(plane, y);
+      for (size_t x = 0; x < rect.xsize(); x++) {
+        row[x] = in[x];
+      }
+    }
+  }
+  ImageS residuals(rect.xsize(), rect.ysize());
+  ShrinkY(Rect(alpha_img), alpha_img, Rect(residuals), &residuals);
+  std::string best;
+
+  const size_t rle_sym_start = kRleSymStart[alpha->bytes_per_alpha - 1];
+
+  for (bool rle : {true, false}) {
+    std::vector<std::vector<Token>> tokens(1);
+
+    size_t cnt = 0;
+
+    auto encode_cnt = [&]() {
+      if (cnt > 0) {
+        int nbits, bits;
+        EncodeVarLenUint(cnt - 1, &nbits, &bits);
+        tokens[0].emplace_back(Token(0, rle_sym_start + nbits, nbits, bits));
+        cnt = 0;
+      }
+    };
+    for (size_t y = 0; y < residuals.ysize(); y++) {
+      const int16_t* PIK_RESTRICT row = residuals.ConstRow(y);
+      for (size_t x = 0; x < residuals.xsize(); x++) {
+        if (!rle || row[x]) {
+          encode_cnt();
+          int nbits, bits;
+          EncodeVarLenInt(row[x], &nbits, &bits);
+          PIK_ASSERT(nbits < rle_sym_start);
+          tokens[0].emplace_back(Token(0, nbits, nbits, bits));
+        } else {
+          if (++cnt == kMaxRleLength) {
+            encode_cnt();
+          }
+        }
+      }
+    }
+    encode_cnt();
+
+    std::vector<ANSEncodingData> codes;
+    std::vector<uint8_t> context_map;
+    std::string enc =
+        BuildAndEncodeHistograms(1, tokens, &codes, &context_map, nullptr);
+    enc += WriteTokens(tokens[0], codes, context_map, nullptr);
+    if (best.empty() || best.size() > enc.size()) {
+      best = std::move(enc);
+    }
+  }
+  alpha->encoded.resize(best.size());
+  memcpy(alpha->encoded.data(), best.data(), best.size());
+  return true;
+}
+
+Status DecodeAlpha(const DecompressParams& params, const Alpha& alpha,
+                   ImageU* plane, const Rect& rect) {
+  PROFILER_FUNC;
+  PIK_CHECK(plane->xsize() != 0);
+  if (alpha.bytes_per_alpha != 1 && alpha.bytes_per_alpha != 2) {
+    return PIK_FAILURE("Invalid bytes_per_alpha");
+  }
+
+  BitReader bit_reader(alpha.encoded.data(), alpha.encoded.size());
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  PIK_RETURN_IF_ERROR(
+      DecodeHistograms(&bit_reader, 1, 54, &code, &context_map));
+  ANSSymbolReader decoder(&code);
+
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  const size_t rle_sym_start = kRleSymStart[alpha.bytes_per_alpha - 1];
+
+  ImageS residuals(xsize, ysize);
+  const int histo_idx = context_map[0];
+  size_t skip = 0;
+  for (size_t y = 0; y < ysize; y++) {
+    int16_t* PIK_RESTRICT row = residuals.Row(y);
+    for (size_t x = 0; x < xsize; x++) {
+      if (skip) {
+        row[x] = 0;
+        skip--;
+        continue;
+      }
+      bit_reader.FillBitBuffer();
+      int s = decoder.ReadSymbol(histo_idx, &bit_reader);
+      if (s > 0) {
+        if (s >= rle_sym_start) {
+          s -= rle_sym_start;
+          if (s > kMaxRleBits) {
+            return PIK_FAILURE("Invalid rle nbits");
+          }
+          int bits = bit_reader.ReadBits(s);
+          s = DecodeVarLenUint(s, bits);
+          skip = s;
+          row[x] = 0;
+          continue;
+        }
+        int bits = bit_reader.ReadBits(s);
+        s = DecodeVarLenInt(s, bits);
+      }
+      row[x] = s;
+    }
+  }
+  if (skip != 0) {
+    return PIK_FAILURE("Invalid alpha");
+  }
+  PIK_RETURN_IF_ERROR(bit_reader.JumpToByteBoundary());
+  ImageS alpha_img(xsize, ysize);
+  ExpandY(Rect(alpha_img), residuals, &alpha_img);
+  if (alpha.bytes_per_alpha == 2) {
+    for (size_t y = 0; y < ysize; y++) {
+      const int16_t* PIK_RESTRICT in = alpha_img.ConstRow(y);
+      uint16_t* PIK_RESTRICT row = rect.Row(plane, y);
+      for (size_t x = 0; x < rect.xsize(); x++) {
+        row[x] = in[x] + (1 << 15);
+      }
+    }
+  } else {
+    for (size_t y = 0; y < ysize; y++) {
+      const int16_t* PIK_RESTRICT in = alpha_img.ConstRow(y);
+      uint16_t* PIK_RESTRICT row = rect.Row(plane, y);
+      for (size_t x = 0; x < rect.xsize(); x++) {
+        row[x] = in[x];
+      }
+    }
+  }
+
+  if (!decoder.CheckANSFinalState()) {
+    return PIK_FAILURE("ANS checksum failure.");
+  }
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/alpha.h b/codec/L2/demos/pikEnc/host/pik/alpha.h
new file mode 100755
index 0000000000..cecd1cd536
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/alpha.h
@@ -0,0 +1,28 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ALPHA_H_
+#define PIK_ALPHA_H_
+
+// Encodes/decodes alpha image to/from its compressed representation.
+
+#include "pik/headers.h"
+#include "pik/image.h"
+#include "pik/pik_params.h"
+#include "pik/status.h"
+
+namespace pik {
+
+Status EncodeAlpha(const CompressParams& params, const ImageU& plane,
+                   const Rect& rect, int bit_depth, Alpha* alpha);
+
+// "plane" must be pre-allocated (FileHeader knows the size).
+Status DecodeAlpha(const DecompressParams& params, const Alpha& alpha,
+                   ImageU* plane, const Rect& rect);
+
+}  // namespace pik
+
+#endif  // PIK_ALPHA_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/ans_common.cc b/codec/L2/demos/pikEnc/host/pik/ans_common.cc
new file mode 100755
index 0000000000..3f9a905063
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ans_common.cc
@@ -0,0 +1,25 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/ans_common.h"
+
+#include "pik/status.h"
+
+namespace pik {
+
+std::vector<int> CreateFlatHistogram(int length, int total_count) {
+  PIK_ASSERT(length > 0);
+  PIK_ASSERT(length <= total_count);
+  const int count = total_count / length;
+  std::vector<int> result(length, count);
+  const int rem_counts = total_count % length;
+  for (int i = 0; i < rem_counts; ++i) {
+    ++result[i];
+  }
+  return result;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/ans_common.h b/codec/L2/demos/pikEnc/host/pik/ans_common.h
new file mode 100755
index 0000000000..bbb9025d7d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ans_common.h
@@ -0,0 +1,29 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ANS_COMMON_H_
+#define PIK_ANS_COMMON_H_
+
+#include <vector>
+
+#include "pik/compiler_specific.h"
+
+namespace pik {
+
+// Returns the precision (number of bits) that should be used to store
+// a histogram count such that Log2Floor(count) == logcount.
+PIK_INLINE int GetPopulationCountPrecision(int logcount) {
+  return (logcount + 1) >> 1;
+}
+
+// Returns a histogram where the counts are positive, differ by at most 1,
+// and add up to total_count. The bigger counts (if any) are at the beginning
+// of the histogram.
+std::vector<int> CreateFlatHistogram(int length, int total_count);
+
+}  // namespace pik
+
+#endif  // PIK_ANS_COMMON_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/ans_decode.cc b/codec/L2/demos/pikEnc/host/pik/ans_decode.cc
new file mode 100755
index 0000000000..a3746f02c8
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ans_decode.cc
@@ -0,0 +1,187 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/ans_decode.h"
+
+#include <vector>
+
+#include "pik/ans_common.h"
+#include "pik/fast_log.h"
+
+namespace pik {
+namespace {
+
+// Decodes a number in the range [0..65535], by reading 1 - 20 bits.
+inline int DecodeVarLenUint16(BitReader* input) {
+  if (input->ReadBits(1)) {
+    int nbits = static_cast<int>(input->ReadBits(4));
+    if (nbits == 0) {
+      return 1;
+    } else {
+      return static_cast<int>(input->ReadBits(nbits)) + (1 << nbits);
+    }
+  }
+  return 0;
+}
+
+Status ReadHistogram(int precision_bits, std::vector<int>* counts,
+                     BitReader* input) {
+  int simple_code = input->ReadBits(1);
+  if (simple_code == 1) {
+    int i;
+    int symbols[2] = {0};
+    int max_symbol = 0;
+    const int num_symbols = input->ReadBits(1) + 1;
+    for (i = 0; i < num_symbols; ++i) {
+      symbols[i] = DecodeVarLenUint16(input);
+      if (symbols[i] > max_symbol) max_symbol = symbols[i];
+    }
+    counts->resize(max_symbol + 1);
+    if (num_symbols == 1) {
+      (*counts)[symbols[0]] = 1 << precision_bits;
+    } else {
+      if (symbols[0] == symbols[1]) {  // corrupt data
+        return false;
+      }
+      (*counts)[symbols[0]] = input->ReadBits(precision_bits);
+      (*counts)[symbols[1]] = (1 << precision_bits) - (*counts)[symbols[0]];
+    }
+  } else {
+    int is_flat = input->ReadBits(1);
+    if (is_flat == 1) {
+      int alphabet_size = input->ReadBits(precision_bits);
+      if (alphabet_size == 0) {
+        return PIK_FAILURE("Invalid alphabet size for flat histogram.");
+      }
+      *counts = CreateFlatHistogram(alphabet_size, 1 << precision_bits);
+      return true;
+    }
+    int length = DecodeVarLenUint16(input) + 3;
+    counts->resize(length);
+    int total_count = 0;
+
+    static const uint8_t huff[128][2] = {
+        {2, 6}, {3, 7}, {3, 4}, {4, 1}, {2, 6}, {3, 8}, {3, 5}, {4, 3},
+        {2, 6}, {3, 7}, {3, 4}, {4, 2}, {2, 6}, {3, 8}, {3, 5}, {5, 0},
+        {2, 6}, {3, 7}, {3, 4}, {4, 1}, {2, 6}, {3, 8}, {3, 5}, {4, 3},
+        {2, 6}, {3, 7}, {3, 4}, {4, 2}, {2, 6}, {3, 8}, {3, 5}, {6, 9},
+        {2, 6}, {3, 7}, {3, 4}, {4, 1}, {2, 6}, {3, 8}, {3, 5}, {4, 3},
+        {2, 6}, {3, 7}, {3, 4}, {4, 2}, {2, 6}, {3, 8}, {3, 5}, {5, 0},
+        {2, 6}, {3, 7}, {3, 4}, {4, 1}, {2, 6}, {3, 8}, {3, 5}, {4, 3},
+        {2, 6}, {3, 7}, {3, 4}, {4, 2}, {2, 6}, {3, 8}, {3, 5}, {7, 10},
+        {2, 6}, {3, 7}, {3, 4}, {4, 1}, {2, 6}, {3, 8}, {3, 5}, {4, 3},
+        {2, 6}, {3, 7}, {3, 4}, {4, 2}, {2, 6}, {3, 8}, {3, 5}, {5, 0},
+        {2, 6}, {3, 7}, {3, 4}, {4, 1}, {2, 6}, {3, 8}, {3, 5}, {4, 3},
+        {2, 6}, {3, 7}, {3, 4}, {4, 2}, {2, 6}, {3, 8}, {3, 5}, {6, 9},
+        {2, 6}, {3, 7}, {3, 4}, {4, 1}, {2, 6}, {3, 8}, {3, 5}, {4, 3},
+        {2, 6}, {3, 7}, {3, 4}, {4, 2}, {2, 6}, {3, 8}, {3, 5}, {5, 0},
+        {2, 6}, {3, 7}, {3, 4}, {4, 1}, {2, 6}, {3, 8}, {3, 5}, {4, 3},
+        {2, 6}, {3, 7}, {3, 4}, {4, 2}, {2, 6}, {3, 8}, {3, 5}, {7, 11},
+    };
+    std::vector<int> logcounts(counts->size());
+    int omit_log = -1;
+    int omit_pos = -1;
+    // This array remembers which symbols have an RLE length.
+    std::vector<int> same(counts->size(), 0);
+    for (int i = 0; i < logcounts.size(); ++i) {
+      input->FillBitBuffer();
+      int idx = input->PeekFixedBits<7>();
+      input->Advance(huff[idx][0]);
+      logcounts[i] = huff[idx][1];
+      // The RLE symbol.
+      if (logcounts[i] == 11) {
+        input->FillBitBuffer();
+        int rle_length = input->PeekFixedBits<8>();
+        input->Advance(8);
+        same[i] = rle_length;
+        i += rle_length - 2;
+        continue;
+      }
+      if (logcounts[i] > omit_log) {
+        omit_log = logcounts[i];
+        omit_pos = i;
+      }
+    }
+    // Invalid input, e.g. due to invalid usage of RLE.
+    if (omit_pos < 0) return PIK_FAILURE("Invalid histogram.");
+    int prev = 0;
+    int numsame = 0;
+    for (int i = 0; i < logcounts.size(); ++i) {
+      if (same[i]) {
+        // RLE sequence, let this loop output the same count for the next
+        // iterations.
+        numsame = same[i] - 1;
+        prev = i > 0 ? (*counts)[i - 1] : 0;
+      }
+      if (numsame > 0) {
+        (*counts)[i] = prev;
+        numsame--;
+      } else {
+        int code = logcounts[i];
+        if (i == omit_pos) {
+          continue;
+        } else if (code == 0) {
+          continue;
+        } else if (code == 1) {
+          (*counts)[i] = 1;
+        } else {
+          int bitcount = GetPopulationCountPrecision(code - 1);
+          (*counts)[i] = (1 << (code - 1)) +
+                         (input->ReadBits(bitcount) << (code - 1 - bitcount));
+        }
+      }
+      total_count += (*counts)[i];
+    }
+    (*counts)[omit_pos] = (1 << precision_bits) - total_count;
+    if ((*counts)[omit_pos] <= 0) {
+      // The histogram we've read sums to more than total_count (including at
+      // least 1 for the omitted value).
+      return PIK_FAILURE("Invalid histogram count.");
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+bool DecodeANSCodes(const size_t num_histograms, const size_t max_alphabet_size,
+                    BitReader* in, ANSCode* result) {
+  PIK_ASSERT(max_alphabet_size <= ANS_TAB_SIZE);
+  result->map.resize((num_histograms << ANS_LOG_TAB_SIZE) + 1);
+  result->info.resize(num_histograms << ANS_LOG_TAB_SIZE);
+  for (size_t c = 0; c < num_histograms; ++c) {
+    std::vector<int> counts;
+    if (!ReadHistogram(ANS_LOG_TAB_SIZE, &counts, in)) {
+      return PIK_FAILURE("Invalid histogram bitstream.");
+    }
+    if (counts.size() > max_alphabet_size) {
+      return PIK_FAILURE("Alphabet size is too long.");
+    }
+    const size_t histo_offset = c << ANS_LOG_TAB_SIZE;
+    uint32_t offset = 0;
+    for (size_t i = 0, pos = 0; i < counts.size(); ++i) {
+      const size_t symbol_idx = histo_offset + i;
+      const uint32_t freq = counts[i];
+#if PIK_BYTE_ORDER_LITTLE
+      const uint32_t s32 = offset + (freq << 16);
+      memcpy(&result->info[symbol_idx], &s32, sizeof(s32));
+#else
+      result->info[symbol_idx].offset = static_cast<uint16_t>(offset);
+      result->info[symbol_idx].freq = static_cast<uint16_t>(freq);
+#endif
+      offset += counts[i];
+      if (offset > ANS_TAB_SIZE) {
+        return PIK_FAILURE("Invalid ANS histogram data.");
+      }
+      for (size_t j = 0; j < counts[i]; ++j, ++pos) {
+        result->map[histo_offset + pos] = i;
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/ans_decode.h b/codec/L2/demos/pikEnc/host/pik/ans_decode.h
new file mode 100755
index 0000000000..0f8bf864a7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ans_decode.h
@@ -0,0 +1,82 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ANS_DECODE_H_
+#define PIK_ANS_DECODE_H_
+
+// Library to decode the ANS population counts from the bit-stream and build a
+// decoding table from them.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <vector>
+
+#include "pik/ans_params.h"
+#include "pik/bit_reader.h"
+#include "pik/byte_order.h"
+
+namespace pik {
+struct ANSSymbolInfo {
+  uint16_t offset;
+  uint16_t freq;
+};
+
+struct ANSCode {
+  std::vector<uint16_t> map;
+  // indexed by (entropy_code_id << ANS_LOG_TAB_SIZE) + symbol.
+  std::vector<ANSSymbolInfo> info;
+};
+
+bool DecodeANSCodes(const size_t num_histograms, const size_t max_alphabet_size,
+                    BitReader* in, ANSCode* result);
+
+class ANSSymbolReader {
+ public:
+  ANSSymbolReader(const ANSCode* code) : code_(code) {}
+
+  PIK_INLINE int ReadSymbol(const int histo_idx, BitReader* PIK_RESTRICT br) {
+    if (PIK_UNLIKELY(symbols_left_ == 0)) {
+      state_ = br->ReadBits(16);
+      state_ = (state_ << 16) | br->ReadBits(16);
+      br->FillBitBuffer();
+      symbols_left_ = kANSBufferSize;
+    }
+    const uint32_t res = state_ & (ANS_TAB_SIZE - 1);
+    const int histo_offset = histo_idx << ANS_LOG_TAB_SIZE;
+
+#if PIK_BYTE_ORDER_LITTLE
+    uint32_t s32;
+    memcpy(&s32, &code_->map[histo_offset + res], sizeof(s32));
+    const size_t symbol = s32 & 0xFFFF;
+
+    memcpy(&s32, &code_->info[histo_offset + symbol], sizeof(s32));
+    const uint32_t offset = s32 & 0xFFFF;
+    const uint32_t freq = s32 >> 16;
+    state_ = freq * (state_ >> ANS_LOG_TAB_SIZE) + res - offset;
+#else
+    const uint16_t symbol = code_->map[histo_offset + res];
+    const ANSCode::ANSSymbolInfo s = code_->info[histo_offset + symbol];
+    state_ = s.freq * (state_ >> ANS_LOG_TAB_SIZE) + res - s.offset;
+#endif
+    --symbols_left_;
+    if (PIK_UNLIKELY(state_ < (1u << 16))) {
+      state_ = (state_ << 16) | br->PeekFixedBits<16>();
+      br->Advance(16);
+    }
+    return symbol;
+  }
+
+  bool CheckANSFinalState() { return state_ == (ANS_SIGNATURE << 16); }
+
+ private:
+  size_t symbols_left_ = 0;
+  uint32_t state_ = ANS_SIGNATURE << 16;
+  const ANSCode* code_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_ANS_DECODE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/ans_encode.cc b/codec/L2/demos/pikEnc/host/pik/ans_encode.cc
new file mode 100755
index 0000000000..9a796ceb65
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ans_encode.cc
@@ -0,0 +1,434 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/ans_encode.h"
+
+#include <stdint.h>
+#include <algorithm>
+#include <vector>
+
+#include "pik/ans_common.h"
+#include "pik/fast_log.h"
+
+namespace pik {
+
+namespace {
+
+static const int kMaxNumSymbolsForSmallCode = 4;
+
+void ANSBuildInfoTable(const int* counts, int alphabet_size,
+                       ANSEncSymbolInfo* info) {
+  int total = 0;
+  for (int s = 0; s < alphabet_size; ++s) {
+    const uint32_t freq = counts[s];
+    info[s].freq_ = counts[s];
+    info[s].start_ = total;
+    total += freq;
+#ifdef USE_MULT_BY_RECIPROCAL
+    if (freq != 0) {
+      info[s].ifreq_ =
+          ((1ull << RECIPROCAL_PRECISION) + info[s].freq_ - 1) / info[s].freq_;
+    } else {
+      info[s].ifreq_ = 1;  // shouldn't matter (symbol shoudln't occur), but...
+    }
+#endif
+  }
+}
+
+int EstimateDataBits(const int* histogram, const int* counts, size_t len) {
+  float sum = 0.0f;
+  int total_histogram = 0;
+  int total_counts = 0;
+  for (int i = 0; i < len; ++i) {
+    total_histogram += histogram[i];
+    total_counts += counts[i];
+    if (histogram[i] > 0) {
+      PIK_ASSERT(counts[i] > 0);
+      sum -= histogram[i] * FastLog2(counts[i]);
+    }
+  }
+  if (total_histogram > 0) {
+    PIK_ASSERT(total_counts == ANS_TAB_SIZE);
+    const int log2_total_counts = ANS_LOG_TAB_SIZE;
+    sum += total_histogram * log2_total_counts;
+  }
+  return static_cast<int>(sum + 1.0f);
+}
+
+int EstimateDataBitsFlat(const int* histogram, size_t len) {
+  const float flat_bits = FastLog2(len);
+  int total_histogram = 0;
+  for (int i = 0; i < len; ++i) {
+    total_histogram += histogram[i];
+  }
+  return static_cast<int>(total_histogram * flat_bits + 1.0);
+}
+
+// Static Huffman code for encoding logcounts. The last symbol is used as RLE
+// sequence.
+static const uint8_t kLogCountBitLengths[ANS_LOG_TAB_SIZE + 2] = {
+    5, 4, 4, 4, 3, 3, 2, 3, 3, 6, 7, 7,
+};
+static const uint16_t kLogCountSymbols[ANS_LOG_TAB_SIZE + 2] = {
+    15, 3, 11, 7, 2, 6, 0, 1, 5, 31, 63, 127,
+};
+
+// Returns the difference between largest count that can be represented and is
+// smaller than "count" and smallest representable count larger than "count".
+static int SmallestIncrement(int count) {
+  int bits = Log2Floor(count);
+  int drop_bits = bits - GetPopulationCountPrecision(bits);
+  return (1 << drop_bits);
+}
+
+template <bool minimize_error_of_sum>
+bool RebalanceHistogram(const float* targets, int max_symbol, int table_size,
+                        int* omit_pos, int* counts) {
+  int sum = 0;
+  float sum_nonrounded = 0.0;
+  int remainder_pos = 0;  // if all of them are handled in first loop
+  int remainder_log = -1;
+  for (int n = 0; n < max_symbol; ++n) {
+    if (targets[n] > 0 && targets[n] < 1.0) {
+      counts[n] = 1;
+      sum_nonrounded += targets[n];
+      sum += counts[n];
+    }
+  }
+  const float discount_ratio =
+      (table_size - sum) / (table_size - sum_nonrounded);
+  PIK_ASSERT(discount_ratio > 0);
+  PIK_ASSERT(discount_ratio <= 1.0);
+  // Invariant for minimize_error_of_sum == true:
+  // abs(sum - sum_nonrounded)
+  //   <= SmallestIncrement(max(targets[])) + max_symbol
+  for (int n = 0; n < max_symbol; ++n) {
+    if (targets[n] >= 1.0) {
+      sum_nonrounded += targets[n];
+      counts[n] =
+          static_cast<uint32_t>(targets[n] * discount_ratio);  // truncate
+      if (counts[n] == 0) counts[n] = 1;
+      if (counts[n] == table_size) counts[n] = table_size - 1;
+      // Round the count to the closest nonzero multiple of SmallestIncrement
+      // (when minimize_error_of_sum is false) or one of two closest so as to
+      // keep the sum as close as possible to sum_nonrounded.
+      int inc = SmallestIncrement(counts[n]);
+      counts[n] -= counts[n] & (inc - 1);
+      // TODO(robryk): Should we rescale targets[n]?
+      const float target =
+          minimize_error_of_sum ? (sum_nonrounded - sum) : targets[n];
+      if (counts[n] == 0 ||
+          (target > counts[n] + inc / 2 && counts[n] + inc < table_size)) {
+        counts[n] += inc;
+      }
+      sum += counts[n];
+      const int count_log = Log2FloorNonZero(counts[n]);
+      if (count_log > remainder_log) {
+        remainder_pos = n;
+        remainder_log = count_log;
+      }
+    }
+  }
+  PIK_ASSERT(remainder_pos != -1);
+  counts[remainder_pos] -= sum - table_size;
+  *omit_pos = remainder_pos;
+  return counts[remainder_pos] > 0;
+}
+
+bool NormalizeCounts(int* counts, int* omit_pos, const int length,
+                     const int precision_bits, int* num_symbols, int* symbols) {
+  const int table_size = 1 << precision_bits;  // target sum / table size
+  uint64_t total = 0;
+  int max_symbol = 0;
+  int symbol_count = 0;
+  for (int n = 0; n < length; ++n) {
+    total += counts[n];
+    if (counts[n] > 0) {
+      if (symbol_count < kMaxNumSymbolsForSmallCode) {
+        symbols[symbol_count] = n;
+      }
+      ++symbol_count;
+      max_symbol = n + 1;
+    }
+  }
+  *num_symbols = symbol_count;
+  if (symbol_count == 0) {
+    return true;
+  }
+  if (symbol_count == 1) {
+    counts[symbols[0]] = table_size;
+    return true;
+  }
+  if (symbol_count > table_size)
+    return PIK_FAILURE("Too many entries in an ANS histogram");
+
+  const float norm = 1.f * table_size / total;
+  std::vector<float> targets(max_symbol);
+  for (int n = 0; n < max_symbol; ++n) {
+    targets[n] = norm * counts[n];
+  }
+  if (!RebalanceHistogram<false>(&targets[0], max_symbol, table_size, omit_pos,
+                                 counts)) {
+    // Use an alternative rebalancing mechanism if the one above failed
+    // to create a histogram that is positive wherever the original one was.
+    if (!RebalanceHistogram<true>(&targets[0], max_symbol, table_size, omit_pos,
+                                  counts)) {
+      return PIK_FAILURE("Logic error: couldn't rebalance a histogram");
+    }
+  }
+  return true;
+}
+
+void StoreVarLenUint16(size_t n, size_t* storage_ix, uint8_t* storage) {
+  if (n == 0) {
+    WriteBits(1, 0, storage_ix, storage);
+  } else {
+    WriteBits(1, 1, storage_ix, storage);
+    size_t nbits = Log2FloorNonZero(n);
+    WriteBits(4, nbits, storage_ix, storage);
+    WriteBits(nbits, n - (1ULL << nbits), storage_ix, storage);
+  }
+}
+
+void EncodeCounts(const int* counts, const int alphabet_size,
+                  const int omit_pos, const int num_symbols, const int* symbols,
+                  size_t* storage_ix, uint8_t* storage) {
+  if (num_symbols <= 2) {
+    // Small tree marker to encode 1-2 symbols.
+    WriteBits(1, 1, storage_ix, storage);
+    if (num_symbols == 0) {
+      WriteBits(1, 0, storage_ix, storage);
+      StoreVarLenUint16(0, storage_ix, storage);
+    } else {
+      WriteBits(1, num_symbols - 1, storage_ix, storage);
+      for (int i = 0; i < num_symbols; ++i) {
+        StoreVarLenUint16(symbols[i], storage_ix, storage);
+      }
+    }
+    if (num_symbols == 2) {
+      WriteBits(ANS_LOG_TAB_SIZE, counts[symbols[0]], storage_ix, storage);
+    }
+  } else {
+    // Mark non-small tree.
+    WriteBits(1, 0, storage_ix, storage);
+    // Mark non-flat histogram.
+    WriteBits(1, 0, storage_ix, storage);
+
+    // Precompute sequences for RLE encoding. Contains the number of identical
+    // values starting at a given index. Only contains the value at the first
+    // element of the series.
+    std::vector<int> same(alphabet_size, 0);
+    int last = 0;
+    for (int i = 1; i < alphabet_size; i++) {
+      // Store the sequence length once different symbol reached, or we're at
+      // the end, or the length is longer than we can encode, or we are at
+      // the omit_pos. We don't support including the omit_pos in an RLE
+      // sequence because this value may use a different amoung of log2 bits
+      // than standard, it is too complex to handle in the decoder.
+      if (counts[i] != counts[last] || i + 1 == alphabet_size ||
+          (i - last) >= 255 || i == omit_pos || i == omit_pos + 1) {
+        same[last] = (i - last);
+        last = i + 1;
+      }
+    }
+
+    int length = 0;
+    std::vector<int> logcounts(alphabet_size);
+    int omit_log = 0;
+    for (int i = 0; i < alphabet_size; ++i) {
+      PIK_ASSERT(counts[i] <= ANS_TAB_SIZE);
+      PIK_ASSERT(counts[i] >= 0);
+      if (i == omit_pos) {
+        length = i + 1;
+      } else if (counts[i] > 0) {
+        logcounts[i] = Log2FloorNonZero(counts[i]) + 1;
+        length = i + 1;
+        if (i < omit_pos) {
+          omit_log = std::max(omit_log, logcounts[i] + 1);
+        } else {
+          omit_log = std::max(omit_log, logcounts[i]);
+        }
+      }
+    }
+    logcounts[omit_pos] = omit_log;
+    // Since num_symbols >= 3, we know that length >= 3, therefore we encode
+    // length - 3.
+    StoreVarLenUint16(length - 3, storage_ix, storage);
+
+    // The logcount values are encoded with a static Huffman code.
+    static const size_t kMinReps = 4;
+    size_t rep = ANS_LOG_TAB_SIZE + 1;
+    for (int i = 0; i < length; ++i) {
+      if (i > 0 && same[i - 1] > kMinReps) {
+        // Encode the RLE symbol and skip the repeated ones.
+        WriteBits(kLogCountBitLengths[rep], kLogCountSymbols[rep], storage_ix,
+                  storage);
+        WriteBits(8, same[i - 1], storage_ix, storage);
+        i += same[i - 1] - 2;
+        continue;
+      }
+      WriteBits(kLogCountBitLengths[logcounts[i]],
+                kLogCountSymbols[logcounts[i]], storage_ix, storage);
+    }
+    for (int i = 0; i < length; ++i) {
+      if (i > 0 && same[i - 1] > kMinReps) {
+        // Skip symbols encoded by RLE.
+        i += same[i - 1] - 2;
+        continue;
+      }
+      if (logcounts[i] > 1 && i != omit_pos) {
+        int bitcount = GetPopulationCountPrecision(logcounts[i] - 1);
+        int drop_bits = logcounts[i] - 1 - bitcount;
+        PIK_CHECK((counts[i] & ((1 << drop_bits) - 1)) == 0);
+        WriteBits(bitcount, (counts[i] >> drop_bits) - (1 << bitcount),
+                  storage_ix, storage);
+      }
+    }
+  }
+}
+
+void EncodeFlatHistogram(const int alphabet_size, size_t* storage_ix,
+                         uint8_t* storage) {
+  // Mark non-small tree.
+  WriteBits(1, 0, storage_ix, storage);
+  // Mark uniform histogram.
+  WriteBits(1, 1, storage_ix, storage);
+  // Encode alphabet size.
+  WriteBits(ANS_LOG_TAB_SIZE, alphabet_size, storage_ix, storage);
+}
+
+}  // namespace
+
+void BuildAndStoreANSEncodingData(const int* histogram, int alphabet_size,
+                                  ANSEncSymbolInfo* info, size_t* storage_ix,
+                                  uint8_t* storage) {
+  PIK_ASSERT(alphabet_size <= ANS_TAB_SIZE);
+  int num_symbols;
+  int symbols[kMaxNumSymbolsForSmallCode] = {0};
+  std::vector<int> counts(histogram, histogram + alphabet_size);
+  int omit_pos = 0;
+  PIK_CHECK(NormalizeCounts(counts.data(), &omit_pos, alphabet_size,
+                            ANS_LOG_TAB_SIZE, &num_symbols, symbols));
+  ANSBuildInfoTable(counts.data(), alphabet_size, info);
+  if (storage_ix != nullptr && storage != nullptr) {
+    const int storage_ix0 = *storage_ix;
+    EncodeCounts(counts.data(), alphabet_size, omit_pos, num_symbols, symbols,
+                 storage_ix, storage);
+    if (alphabet_size <= kMaxNumSymbolsForSmallCode) {
+      return;
+    }
+    // Let's see if we can do better in terms of histogram size + data size.
+    const int histo_bits = *storage_ix - storage_ix0;
+    const int data_bits =
+        EstimateDataBits(histogram, counts.data(), alphabet_size);
+    const int histo_bits_flat = ANS_LOG_TAB_SIZE + 2;
+    const int data_bits_flat = EstimateDataBitsFlat(histogram, alphabet_size);
+    if (histo_bits_flat + data_bits_flat < histo_bits + data_bits) {
+      counts = CreateFlatHistogram(alphabet_size, ANS_TAB_SIZE);
+      ANSBuildInfoTable(counts.data(), alphabet_size, info);
+      RewindStorage(storage_ix0, storage_ix, storage);
+      EncodeFlatHistogram(alphabet_size, storage_ix, storage);
+    }
+  }
+}
+
+float ANSPopulationCost(const int* data, int alphabet_size, int total_count) {
+  if (total_count == 0) {
+    return 7;
+  }
+
+  float entropy_bits = total_count * ANS_LOG_TAB_SIZE;
+  int histogram_bits = 0;
+  int count = 0;
+  int length = 0;
+  if (total_count > ANS_TAB_SIZE) {
+    uint64_t total = total_count;
+    for (int i = 0; i < alphabet_size; ++i) {
+      if (data[i] > 0) {
+        ++count;
+        length = i;
+      }
+    }
+    if (count == 1) {
+      return 7;
+    }
+    ++length;
+    const uint64_t max0 = (total * length) >> ANS_LOG_TAB_SIZE;
+    const uint64_t max1 = (max0 * length) >> ANS_LOG_TAB_SIZE;
+    const uint32_t min_base = (total + max0 + max1) >> ANS_LOG_TAB_SIZE;
+    total += min_base * count;
+    const int64_t kFixBits = 32;
+    const int64_t kFixOne = 1LL << kFixBits;
+    const int64_t kDescaleBits = kFixBits - ANS_LOG_TAB_SIZE;
+    const int64_t kDescaleOne = 1LL << kDescaleBits;
+    const int64_t kDescaleMask = kDescaleOne - 1;
+    const uint32_t mult = kFixOne / total;
+    const uint32_t error = kFixOne % total;
+    uint32_t cumul = error;
+    if (error < kDescaleOne) {
+      cumul += (kDescaleOne - error) >> 1;
+    }
+    if (data[0] > 0) {
+      uint64_t c = (uint64_t)(data[0] + min_base) * mult + cumul;
+      float log2count = FastLog2(c >> kDescaleBits);
+      entropy_bits -= data[0] * log2count;
+      cumul = c & kDescaleMask;
+    }
+    for (int i = 1; i < length; ++i) {
+      if (data[i] > 0) {
+        uint64_t c = (uint64_t)(data[i] + min_base) * mult + cumul;
+        float log2count = FastLog2(c >> kDescaleBits);
+        int log2floor = static_cast<int>(log2count);
+        entropy_bits -= data[i] * log2count;
+        histogram_bits += log2floor;
+        histogram_bits += kLogCountBitLengths[log2floor + 1];
+        cumul = c & kDescaleMask;
+      } else {
+        histogram_bits += kLogCountBitLengths[0];
+      }
+    }
+  } else {
+    float log2norm = ANS_LOG_TAB_SIZE - FastLog2(total_count);
+    if (data[0] > 0) {
+      float log2count = FastLog2(data[0]) + log2norm;
+      entropy_bits -= data[0] * log2count;
+      length = 0;
+      ++count;
+    }
+    for (int i = 1; i < alphabet_size; ++i) {
+      if (data[i] > 0) {
+        float log2count = FastLog2(data[i]) + log2norm;
+        int log2floor = static_cast<int>(log2count);
+        entropy_bits -= data[i] * log2count;
+        if (log2floor >= ANS_LOG_TAB_SIZE) {
+          log2floor = ANS_LOG_TAB_SIZE - 1;
+        }
+        histogram_bits += GetPopulationCountPrecision(log2floor);
+        histogram_bits += kLogCountBitLengths[log2floor + 1];
+        length = i;
+        ++count;
+      } else {
+        histogram_bits += kLogCountBitLengths[0];
+      }
+    }
+    ++length;
+  }
+
+  if (count == 1) {
+    return 7;
+  }
+
+  if (count == 2) {
+    return static_cast<int>(entropy_bits) + 1 + 12 + ANS_LOG_TAB_SIZE;
+  }
+
+  int max_bits = 1 + Log2Floor(alphabet_size - 1);
+  histogram_bits += max_bits;
+
+  return histogram_bits + static_cast<int>(entropy_bits) + 1;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/ans_encode.h b/codec/L2/demos/pikEnc/host/pik/ans_encode.h
new file mode 100755
index 0000000000..25d991f7d4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ans_encode.h
@@ -0,0 +1,193 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ANS_ENCODE_H_
+#define PIK_ANS_ENCODE_H_
+
+// Library to encode the ANS population counts to the bit-stream and encode
+// symbols based on the respective distributions.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <vector>
+
+#include "pik/ans_params.h"
+#include "pik/compiler_specific.h"
+#include "pik/write_bits.h"
+
+namespace pik {
+
+#define USE_MULT_BY_RECIPROCAL
+
+// precision must be equal to:  #bits(state_) + #bits(freq)
+#define RECIPROCAL_PRECISION 42
+
+// Data structure representing one element of the encoding table built
+// from a distribution.
+struct ANSEncSymbolInfo {
+  uint16_t freq_;
+  uint16_t start_;
+#ifdef USE_MULT_BY_RECIPROCAL
+  uint64_t ifreq_;
+#endif
+};
+
+void BuildAndStoreANSEncodingData(const int* histogram, int alphabet_size,
+                                  ANSEncSymbolInfo* info, size_t* storage_ix,
+                                  uint8_t* storage);
+
+struct ANSEncodingData {
+  void BuildAndStore(const int* histogram, size_t histo_size,
+                     size_t* storage_ix, uint8_t* storage) {
+    ans_table.resize(histo_size);
+    BuildAndStoreANSEncodingData(histogram, histo_size, ans_table.data(),
+                                 storage_ix, storage);
+  }
+
+  void BuildAndStore(const uint32_t* histogram, size_t histo_size,
+                     size_t* storage_ix, uint8_t* storage) {
+    std::vector<int> counts(histo_size);
+    for (int i = 0; i < histo_size; ++i) {
+      counts[i] = histogram[i];
+    }
+    BuildAndStore(counts.data(), counts.size(), storage_ix, storage);
+  }
+
+  std::vector<ANSEncSymbolInfo> ans_table;
+};
+
+// Returns an estimate of the number of bits required to encode the given
+// histogram (header bits plus data bits).
+float ANSPopulationCost(const int* data, int alphabet_size, int total_count);
+
+class ANSCoder {
+ public:
+  ANSCoder() : state_(ANS_SIGNATURE << 16) {}
+
+  uint32_t PutSymbol(const ANSEncSymbolInfo t, uint8_t* nbits) {
+    uint32_t bits = 0;
+    *nbits = 0;
+    if ((state_ >> (32 - ANS_LOG_TAB_SIZE)) >= t.freq_) {
+      bits = state_ & 0xffff;
+      state_ >>= 16;
+      *nbits = 16;
+    }
+#ifdef USE_MULT_BY_RECIPROCAL
+    // We use mult-by-reciprocal trick, but that requires 64b calc.
+    const uint32_t v = (state_ * t.ifreq_) >> RECIPROCAL_PRECISION;
+    const uint32_t offset = state_ - v * t.freq_ + t.start_;
+    state_ = (v << ANS_LOG_TAB_SIZE) + offset;
+#else
+    state_ = ((state_ / t.freq_) << ANS_LOG_TAB_SIZE) + (state_ % t.freq_) +
+             t.start_;
+#endif
+    return bits;
+  }
+
+  uint32_t GetState() const { return state_; }
+
+ private:
+  uint32_t state_;
+};
+
+// Symbol visitor that collects symbols and raw bits to be encoded.
+class ANSSymbolWriter {
+ public:
+  ANSSymbolWriter(const std::vector<ANSEncodingData>& codes,
+                  const std::vector<uint8_t>& context_map, size_t* storage_ix,
+                  uint8_t* storage)
+      : idx_(0),
+        symbol_idx_(0),
+        code_words_(2 * kANSBufferSize),
+        symbols_(kANSBufferSize),
+        codes_(codes),
+        context_map_(context_map),
+        storage_ix_(storage_ix),
+        storage_(storage) {
+    num_extra_bits_[0] = num_extra_bits_[1] = num_extra_bits_[2] = 0;
+  }
+
+  void VisitBits(size_t nbits, uint64_t bits, int c) {
+    PIK_ASSERT(nbits <= 16);
+    PIK_ASSERT(idx_ < code_words_.size());
+    if (nbits > 0) {
+      code_words_[idx_++] = (bits << 16) + nbits;
+    }
+    num_extra_bits_[c] += nbits;
+  }
+
+  void VisitSymbol(int symbol, int ctx) {
+    PIK_ASSERT(ctx < context_map_.size());
+    PIK_ASSERT(context_map_[ctx] < codes_.size());
+    PIK_ASSERT(symbol < codes_[context_map_[ctx]].ans_table.size());
+    PIK_ASSERT(idx_ < code_words_.size());
+    code_words_[idx_++] = 0xffff;  // Placeholder, to be encoded later.
+    symbols_[symbol_idx_++] = (ctx << 16) + symbol;
+    if (symbol_idx_ == kANSBufferSize) {
+      FlushToBitStream();
+    }
+  }
+
+  size_t num_extra_bits() const {
+    return num_extra_bits_[0] + num_extra_bits_[1] + num_extra_bits_[2];
+  }
+  size_t num_extra_bits(int c) const { return num_extra_bits_[c]; }
+
+  void FlushToBitStream() {
+    const int num_codewords = idx_;
+    ANSCoder ans;
+    int first_symbol = num_codewords;
+    // Replace placeholder code words with actual bits by feeding symbols to the
+    // ANS encoder in a reverse order.
+    for (int i = num_codewords - 1; i >= 0; --i) {
+      const uint32_t cw = code_words_[i];
+      if ((cw & 0xffff) == 0xffff) {
+        const uint32_t sym = symbols_[--symbol_idx_];
+        const uint32_t context = sym >> 16;
+        const uint8_t histo_idx = context_map_[context];
+        const uint32_t symbol = sym & 0xffff;
+        const ANSEncSymbolInfo info = codes_[histo_idx].ans_table[symbol];
+        uint8_t nbits = 0;
+        uint32_t bits = ans.PutSymbol(info, &nbits);
+        code_words_[i] = (bits << 16) + nbits;
+        first_symbol = i;
+      }
+    }
+    for (int i = 0; i < num_codewords; ++i) {
+      if (i == first_symbol) {
+        const uint32_t state = ans.GetState();
+        WriteBits(16, (state >> 16) & 0xffff, storage_ix_, storage_);
+        WriteBits(16, state & 0xffff, storage_ix_, storage_);
+      }
+      const uint32_t cw = code_words_[i];
+      const uint32_t nbits = cw & 0xffff;
+      const uint32_t bits = cw >> 16;
+      WriteBits(nbits, bits, storage_ix_, storage_);
+    }
+    idx_ = 0;
+    PIK_ASSERT(symbol_idx_ == 0);
+  }
+
+ private:
+  int idx_;
+  int symbol_idx_;
+  // Vector of (bits, nbits) pairs to be encoded.
+  std::vector<uint32_t> code_words_;
+  // Vector of (context, symbol) pairs to be encoded.
+  std::vector<uint32_t> symbols_;
+  const std::vector<ANSEncodingData>& codes_;
+  const std::vector<uint8_t>& context_map_;
+  size_t num_extra_bits_[3];
+  size_t* storage_ix_;
+  uint8_t* storage_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_ANS_ENCODE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/ans_params.h b/codec/L2/demos/pikEnc/host/pik/ans_params.h
new file mode 100755
index 0000000000..0276bf8188
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ans_params.h
@@ -0,0 +1,27 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ANS_PARAMS_H_
+#define PIK_ANS_PARAMS_H_
+
+// Common parameters that are needed for both the ANS entropy encoding and
+// decoding methods.
+
+#include <stdint.h>
+#include <cstdlib>
+
+namespace pik {
+
+static const int kANSBufferSize = 1 << 16;
+
+#define ANS_LOG_TAB_SIZE 10
+#define ANS_TAB_SIZE (1 << ANS_LOG_TAB_SIZE)
+#define ANS_TAB_MASK (ANS_TAB_SIZE - 1)
+#define ANS_SIGNATURE 0x13  // Initial state, used as CRC.
+
+}  // namespace pik
+
+#endif  // PIK_ANS_PARAMS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/approx_cube_root.h b/codec/L2/demos/pikEnc/host/pik/approx_cube_root.h
new file mode 100755
index 0000000000..fda95e10c6
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/approx_cube_root.h
@@ -0,0 +1,56 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_APPROX_CUBE_ROOT_H_
+#define PIK_APPROX_CUBE_ROOT_H_
+
+// Fast cube root for XYB color space.
+
+#include <string.h>
+
+#include "pik/compiler_specific.h"
+
+namespace pik {
+
+PIK_INLINE float CubeRootInitialGuess(float y) {
+  int ix;
+  memcpy(&ix, &y, sizeof(ix));
+  // At this point, ix is the integer value corresponding to the binary
+  // representation of the floating point value y. Inspired by the well-known
+  // floating-point recipe for 1/sqrt(y), which takes an initial guess in the
+  // form of <magic constant> - ix / 2, our initial guess has the form
+  // <magic constant> + ix / 3. Since we know the set of all floating
+  // point values that will be the input of the cube root function in pik (see
+  // LinearToXyb() in opsin_image.cc), we can search for the magic constant that
+  // gives the minimum worst-case error. The chosen value here is optimal among
+  // the magic constants whose 8 least significant bits are zero.
+  ix = 0x2a50f200 + ix / 3;
+  float x;
+  memcpy(&x, &ix, sizeof(x));
+  return x;
+}
+
+PIK_INLINE float CubeRootNewtonStep(float y, float xn) {
+  constexpr float kOneThird = 1.0f / 3.0f;
+  // f(x) = x^3 - y
+  // x_{n+1} = x_n - f(x_n) / f'(x_n) =
+  //         = x_n - (x_n^3 - y) / (3 * x_n^2) =
+  //         = 2/3 * x_n + 1/3 * y / x_n^2
+  return kOneThird * (2.0f * xn + y / (xn * xn));
+}
+
+// Returns an approximation of the cube root of y,
+// with an accuracy of about 1e-6 for 0 <= y <= 1.
+PIK_INLINE float ApproxCubeRoot(float y) {
+  const float x0 = CubeRootInitialGuess(y);
+  const float x1 = CubeRootNewtonStep(y, x0);
+  const float x2 = CubeRootNewtonStep(y, x1);
+  return x2;
+}
+
+}  // namespace pik
+
+#endif  // PIK_APPROX_CUBE_ROOT_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/ar_control_field.cc b/codec/L2/demos/pikEnc/host/pik/ar_control_field.cc
new file mode 100755
index 0000000000..74562f491f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ar_control_field.cc
@@ -0,0 +1,109 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/ar_control_field.h"
+#include "pik/adaptive_quantization.h"
+#include "pik/adaptive_reconstruction.h"
+#include "pik/common.h"
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include "pik/opsin_inverse.h"
+
+namespace pik {
+
+// TODO(veluca): Remove and enable by default.
+constexpr bool kUseARField = false;
+
+void FindBestArControlField(float distance, float intensity_target,
+                            const Image3F& opsin,
+                            const AcStrategyImage& ac_strategy,
+                            const ImageF& quant_field,
+                            const DequantMatrices* dequant,
+                            GaborishStrength gaborish, ThreadPool* pool,
+                            ImageB* sigma_lut_ids) {
+  constexpr size_t N = kBlockDim;
+  size_t xsize_blocks = DivCeil(opsin.xsize(), N);
+  size_t ysize_blocks = DivCeil(opsin.ysize(), N);
+
+  *sigma_lut_ids = ImageB(xsize_blocks, ysize_blocks);
+  ZeroFillImage(sigma_lut_ids);
+
+  if (!kUseARField) return;
+
+  float quant_dc = InitialQuantDC(distance, intensity_target);
+  Quantizer quantizer(dequant, xsize_blocks, ysize_blocks);
+  quantizer.SetQuantField(quant_dc, QuantField(quant_field));
+
+  const Image3F* smoothed_ptr;
+  Image3F smoothed;
+  if (gaborish == GaborishStrength::kOff) {
+    smoothed_ptr = &opsin;
+  } else {
+    PIK_CHECK(ConvolveGaborish(opsin, gaborish, pool, &smoothed));
+    smoothed_ptr = &smoothed;
+  }
+
+  Image3F filt =
+      DoDenoise(*smoothed_ptr, opsin, quantizer, quantizer.RawQuantField(),
+                *sigma_lut_ids, ac_strategy, EpfParams());
+
+  constexpr float kChannelWeights[3] = {1.0, 1.0, 0.3};
+  const float kInvPow =
+      1.0f / (kChannelWeights[0] + kChannelWeights[1] + kChannelWeights[2]);
+  constexpr float kStdDevRatioThreshold = 0.75f;
+
+  PIK_ASSERT(filt.PixelsPerRow() == opsin.PixelsPerRow());
+  size_t opsin_stride = opsin.PixelsPerRow();
+  size_t sigma_stride = sigma_lut_ids->PixelsPerRow();
+
+  for (size_t by = 0; by < ysize_blocks; by++) {
+    const float* PIK_RESTRICT filt_row[3] = {filt.ConstPlaneRow(0, by * N),
+                                             filt.ConstPlaneRow(1, by * N),
+                                             filt.ConstPlaneRow(2, by * N)};
+    const float* PIK_RESTRICT in_row[3] = {
+        opsin.ConstPlaneRow(0, by * N),
+        opsin.ConstPlaneRow(1, by * N),
+        opsin.ConstPlaneRow(2, by * N),
+    };
+
+    AcStrategyRow acs_row = ac_strategy.ConstRow(by);
+    uint8_t* PIK_RESTRICT out_row = sigma_lut_ids->Row(by);
+    for (size_t bx = 0; bx < xsize_blocks; bx++) {
+      AcStrategy acs = acs_row[bx];
+      if (!acs.IsFirstBlock()) continue;
+      float avg_ratio = 1;
+      uint8_t lut = 0;
+      for (size_t c = 0; c < 3; c++) {
+        Stats stats_in;
+        Stats stats_filt;
+        for (size_t iy = 0; iy < acs.covered_blocks_y() * N; iy++) {
+          for (size_t ix = 0; ix < acs.covered_blocks_x() * N; ix++) {
+            stats_in.Notify(in_row[c][bx * N + iy * opsin_stride + ix]);
+            stats_filt.Notify(filt_row[c][bx * N + iy * opsin_stride + ix]);
+          }
+        }
+        float in_dev = stats_in.StandardDeviation();
+        float filt_dev = stats_filt.StandardDeviation();
+
+        float r = pow(filt_dev / in_dev, kChannelWeights[c]);
+        if (r > 3) r = 3;
+        if (r < 1e-2) r = 1e-2;
+        avg_ratio *= r;
+      }
+      float ratio = std::pow(avg_ratio, kInvPow);
+      if (ratio < kStdDevRatioThreshold) {
+        lut = 1;
+      }
+      for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+        for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+          out_row[bx + sigma_stride * iy + ix] = lut;
+        }
+      }
+    }
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/ar_control_field.h b/codec/L2/demos/pikEnc/host/pik/ar_control_field.h
new file mode 100755
index 0000000000..db9d3644ba
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/ar_control_field.h
@@ -0,0 +1,27 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_AR_CONTROL_FIELD_H_
+#define PIK_AR_CONTROL_FIELD_H_
+
+#include "pik/ac_strategy.h"
+#include "pik/image.h"
+#include "pik/pik_params.h"
+#include "pik/quant_weights.h"
+
+namespace pik {
+
+void FindBestArControlField(float distance, float intensity_target,
+                            const Image3F& opsin,
+                            const AcStrategyImage& ac_strategy,
+                            const ImageF& quant_field,
+                            const DequantMatrices* dequant,
+                            GaborishStrength gaborish, ThreadPool* pool,
+                            ImageB* sigma_lut_ids);
+
+}
+
+#endif  // PIK_AR_CONTROL_FIELD_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/arch_specific.cc b/codec/L2/demos/pikEnc/host/pik/arch_specific.cc
new file mode 100755
index 0000000000..e6c888e3ba
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/arch_specific.cc
@@ -0,0 +1,150 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/arch_specific.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if PIK_ARCH_X64
+#include <xmmintrin.h>
+#if !PIK_COMPILER_MSVC
+#include <cpuid.h>
+#endif
+#endif
+
+#if PIK_ARCH_PPC
+#include <sys/platform/ppc.h>  // __ppc_get_timebase_freq
+#endif
+
+#include <string.h>  // memcpy
+#include <string>
+
+#include "pik/simd/simd.h"
+
+namespace pik {
+
+#if PIK_ARCH_X64
+
+namespace {
+
+std::string BrandString() {
+  char brand_string[49];
+  uint32_t abcd[4];
+
+  // Check if brand string is supported (it is on all reasonable Intel/AMD)
+  Cpuid(0x80000000U, 0, abcd);
+  if (abcd[0] < 0x80000004U) {
+    return std::string();
+  }
+
+  for (int i = 0; i < 3; ++i) {
+    Cpuid(0x80000002U + i, 0, abcd);
+    memcpy(brand_string + i * 16, &abcd, sizeof(abcd));
+  }
+  brand_string[48] = 0;
+  return brand_string;
+}
+
+}  // namespace
+
+void Cpuid(const uint32_t level, const uint32_t count,
+           uint32_t* PIK_RESTRICT abcd) {
+#if PIK_COMPILER_MSVC
+  int regs[4];
+  __cpuidex(regs, level, count);
+  for (int i = 0; i < 4; ++i) {
+    abcd[i] = regs[i];
+  }
+#else
+  uint32_t a, b, c, d;
+  __cpuid_count(level, count, a, b, c, d);
+  abcd[0] = a;
+  abcd[1] = b;
+  abcd[2] = c;
+  abcd[3] = d;
+#endif
+}
+
+uint32_t ApicId() {
+  uint32_t abcd[4];
+  Cpuid(1, 0, abcd);
+  return abcd[1] >> 24;  // ebx
+}
+
+#endif  // PIK_ARCH_X64
+
+namespace {
+
+double DetectNominalClockRate() {
+#if PIK_ARCH_X64
+  const std::string& brand_string = BrandString();
+  // Brand strings include the maximum configured frequency. These prefixes are
+  // defined by Intel CPUID documentation.
+  const char* prefixes[3] = {"MHz", "GHz", "THz"};
+  const double multipliers[3] = {1E6, 1E9, 1E12};
+  for (size_t i = 0; i < 3; ++i) {
+    const size_t pos_prefix = brand_string.find(prefixes[i]);
+    if (pos_prefix != std::string::npos) {
+      const size_t pos_space = brand_string.rfind(' ', pos_prefix - 1);
+      if (pos_space != std::string::npos) {
+        const std::string digits =
+            brand_string.substr(pos_space + 1, pos_prefix - pos_space - 1);
+        return std::stod(digits) * multipliers[i];
+      }
+    }
+  }
+#elif PIK_ARCH_PPC
+  double freq = -1;
+  char line[200];
+  char* s;
+  char* value;
+
+  FILE* f = fopen("/proc/cpuinfo", "r");
+  if (f != nullptr) {
+    while (fgets(line, sizeof(line), f) != nullptr) {
+      // NOTE: the ':' is the only character we can rely on
+      if (!(value = strchr(line, ':'))) continue;
+      // terminate the valuename
+      *value++ = '\0';
+      // skip any leading spaces
+      while (*value == ' ') value++;
+      if ((s = strchr(value, '\n'))) *s = '\0';
+
+      if (!strncasecmp(line, "clock", strlen("clock")) &&
+          sscanf(value, "%lf", &freq) == 1) {
+        freq *= 1E6;
+        break;
+      }
+    }
+    fclose(f);
+    return freq;
+  }
+#endif
+
+  return 0.0;
+}
+
+}  // namespace
+
+double NominalClockRate() {
+  // Thread-safe caching - this is called several times.
+  static const double cycles_per_second = DetectNominalClockRate();
+  return cycles_per_second;
+}
+
+double InvariantTicksPerSecond() {
+#if PIK_ARCH_PPC
+  static const double cycles_per_second = __ppc_get_timebase_freq();
+  return cycles_per_second;
+#elif PIK_ARCH_X64
+  return NominalClockRate();
+#else
+  return 1E9;  // nanoseconds - matches tsc_timer.h CLOCK_MONOTONIC fallback.
+#endif
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/arch_specific.h b/codec/L2/demos/pikEnc/host/pik/arch_specific.h
new file mode 100755
index 0000000000..c6b89b639c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/arch_specific.h
@@ -0,0 +1,65 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ARCH_SPECIFIC_H_
+#define PIK_ARCH_SPECIFIC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "pik/compiler_specific.h"
+
+namespace pik {
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define PIK_ARCH_X64 1
+#else
+#define PIK_ARCH_X64 0
+#endif
+
+#ifdef __aarch64__
+#define PIK_ARCH_AARCH64 1
+#else
+#define PIK_ARCH_AARCH64 0
+#endif
+
+#if defined(__powerpc64__) || defined(_M_PPC)
+#define PIK_ARCH_PPC 1
+#else
+#define PIK_ARCH_PPC 0
+#endif
+
+// Returns the nominal (without Turbo Boost) CPU clock rate [Hertz]. Useful for
+// (roughly) characterizing the CPU speed.
+double NominalClockRate();
+
+// Returns tsc_timer frequency, useful for converting ticks to seconds. This is
+// unaffected by CPU throttling ("invariant"). Thread-safe. Returns timebase
+// frequency on PPC, NominalClockRate on X64, otherwise 1E9.
+double InvariantTicksPerSecond();
+
+#if PIK_ARCH_X64
+
+// This constant avoids image.h depending on simd.h.
+static constexpr size_t kMaxVectorSize = 32;  // AVX2
+
+// Calls CPUID instruction with eax=level and ecx=count and returns the result
+// in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd).
+void Cpuid(const uint32_t level, const uint32_t count,
+           uint32_t* PIK_RESTRICT abcd);
+
+// Returns the APIC ID of the CPU on which we're currently running.
+uint32_t ApicId();
+
+#else
+
+static constexpr size_t kMaxVectorSize = 16;
+
+#endif  // PIK_ARCH_X64
+
+}  // namespace pik
+
+#endif  // PIK_ARCH_SPECIFIC_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/args.h b/codec/L2/demos/pikEnc/host/pik/args.h
new file mode 100755
index 0000000000..c4c5ca14b1
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/args.h
@@ -0,0 +1,116 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// Helpers for parsing command line arguments. No include guard needed.
+
+#include "pik/pik_params.h"  // Override
+
+#include <stdio.h>
+#include <string>
+
+#include "pik/codec.h"
+#include "pik/status.h"
+
+namespace pik {
+
+static inline bool ParseOverride(const char* arg, Override* out) {
+  const std::string s_arg(arg);
+  if (s_arg == "1") {
+    *out = Override::kOn;
+    return true;
+  }
+  if (s_arg == "0") {
+    *out = Override::kOff;
+    return true;
+  }
+  fprintf(stderr, "Invalid flag, %s must be 0 or 1\n", arg);
+  return PIK_FAILURE("Args");
+}
+
+static inline bool ParseUnsigned(const char* arg, size_t* out) {
+  char* end;
+  *out = static_cast<size_t>(strtoull(arg, &end, 0));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as unsigned integer: %s.\n", arg);
+    return PIK_FAILURE("Args");
+  }
+  return true;
+}
+
+static inline bool ParseSigned(const char* arg, int* out) {
+  char* end;
+  *out = static_cast<int>(strtol(arg, &end, 0));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as signed integer: %s.\n", arg);
+    return PIK_FAILURE("Args");
+  }
+  return true;
+}
+
+static inline bool ParseGaborishStrength(const char* arg, int* out) {
+  size_t strength;
+  if (!ParseUnsigned(arg, &strength)) return false;
+  if (strength >= static_cast<size_t>(GaborishStrength::kMaxValue)) {
+    fprintf(stderr, "Invalid GaborishStrength value: %s.\n", arg);
+    return PIK_FAILURE("Args");
+  }
+  *out = strength;
+  return true;
+}
+
+static inline bool ParseFloat(const char* arg, float* out) {
+  char* end;
+  *out = static_cast<float>(strtod(arg, &end));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as float: %s.\n", arg);
+    return PIK_FAILURE("Args");
+  }
+  return true;
+}
+
+static inline bool ParseDouble(const char* arg, double* out) {
+  char* end;
+  *out = static_cast<double>(strtod(arg, &end));
+  if (end[0] != '\0') {
+    fprintf(stderr, "Unable to interpret as double: %s.\n", arg);
+    return PIK_FAILURE("Args");
+  }
+  return true;
+}
+
+static inline bool ParseAndAppendKeyValue(const char* arg, DecoderHints* out) {
+  const char* eq = strchr(arg, '=');
+  if (!eq) {
+    fprintf(stderr, "Expected argument as 'key=value' but received '%s'\n",
+            arg);
+    return false;
+  }
+  std::string key(arg, eq);
+  out->Add(key, std::string(eq + 1));
+  return true;
+}
+
+static inline bool ParseString(const char* arg, std::string* out) {
+  out->assign(arg);
+  return true;
+}
+
+static inline bool ParseCString(const char* arg, const char** out) {
+  *out = arg;
+  return true;
+}
+
+static inline bool SetBooleanTrue(bool* out) {
+  *out = true;
+  return true;
+}
+
+static inline bool SetBooleanFalse(bool* out) {
+  *out = false;
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/bilinear_transform.cc b/codec/L2/demos/pikEnc/host/pik/bilinear_transform.cc
new file mode 100755
index 0000000000..5fe529636a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/bilinear_transform.cc
@@ -0,0 +1,260 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/bilinear_transform.h"
+#include "pik/common.h"
+#include "pik/image.h"
+
+constexpr bool kUseBilinearTransforms = false;
+
+std::tuple<double, double> ForwardCoordTransform(
+    double x_out, double y_out, double edge,
+    const double *transform_source_coords) {
+  // (0.5, 0.5) <- (source[0], source[1])
+  // (edge - 0.5, 0.5) <- (source[2], source[3])
+  // (edge - 0.5, edge - 0.5) <- (source[4], source[5])
+  // (0.5, edge - 0.5) <- (source[6], source[7])
+
+  const double x_prop = (x_out - 0.5) / (edge - 1.0);
+  const double y_prop = (y_out - 0.5) / (edge - 1.0);
+
+  const double x = transform_source_coords[0] * (1 - x_prop) * (1 - y_prop) +
+                   transform_source_coords[2] * x_prop * (1 - y_prop) +
+                   transform_source_coords[4] * x_prop * y_prop +
+                   transform_source_coords[6] * (1 - x_prop) * y_prop;
+
+  const double y = transform_source_coords[1] * (1 - x_prop) * (1 - y_prop) +
+                   transform_source_coords[3] * x_prop * (1 - y_prop) +
+                   transform_source_coords[5] * x_prop * y_prop +
+                   transform_source_coords[7] * (1 - x_prop) * y_prop;
+
+  return std::tie(x, y);
+}
+
+std::tuple<double, double> ReverseCoordTransform(
+    double x_in, double y_in, double edge,
+    const double *transform_source_coords) {
+  double cur_guess_x = 0.5, cur_guess_y = 0.5;
+  constexpr int kNewtonIters = 10;
+
+  for (int i = 0; i < kNewtonIters; i++) {
+    const double x0 = cur_guess_x, x1 = 1 - cur_guess_x;
+    const double y0 = cur_guess_y, y1 = 1 - cur_guess_y;
+
+    const double guess_out_x = transform_source_coords[0] * x1 * y1 +
+                               transform_source_coords[2] * x0 * y1 +
+                               transform_source_coords[4] * x0 * y0 +
+                               transform_source_coords[6] * x1 * y0;
+    const double guess_out_y = transform_source_coords[1] * x1 * y1 +
+                               transform_source_coords[3] * x0 * y1 +
+                               transform_source_coords[5] * x0 * y0 +
+                               transform_source_coords[7] * x1 * y0;
+    const double j00 =
+        -transform_source_coords[0] * y1 + transform_source_coords[2] * y1 +
+        transform_source_coords[4] * y0 - transform_source_coords[6] * y0;
+    const double j01 =
+        -transform_source_coords[1] * y1 + transform_source_coords[3] * y1 +
+        transform_source_coords[5] * y0 - transform_source_coords[7] * y0;
+    const double j10 =
+        -transform_source_coords[0] * x1 - transform_source_coords[2] * x0 +
+        transform_source_coords[4] * x0 + transform_source_coords[6] * x1;
+    const double j11 =
+        -transform_source_coords[1] * x1 - transform_source_coords[3] * x0 +
+        transform_source_coords[5] * x0 + transform_source_coords[7] * x1;
+
+    const double inv_det_j = 1 / (j00 * j11 - j01 * j10);
+
+    const double res_x = guess_out_x - x_in;
+    const double res_y = guess_out_y - y_in;
+
+    cur_guess_x -= inv_det_j * (j11 * res_x - j10 * res_y);
+    cur_guess_y -= inv_det_j * (-j01 * res_x + j00 * res_y);
+
+    cur_guess_x = std::max(std::min(cur_guess_x, 1.0), 0.0);
+    cur_guess_y = std::max(std::min(cur_guess_y, 1.0), 0.0);
+  }
+
+  cur_guess_x = cur_guess_x * (edge - 1.0) + 0.5;
+  cur_guess_y = cur_guess_y * (edge - 1.0) + 0.5;
+
+  return std::tie(cur_guess_x, cur_guess_y);
+}
+
+// This function implements a standard approximation to a bicubic interpolation
+// via a convolution, originally from this paper:
+// doi:10.1109/tassp.1981.1163711, parametrized by a = -0.5. Which after mild
+// algebra results in the following polynomial to be evaluated with Horner:
+// 2 f0+(f1-fn1) t+(-5 f0+4 f1-f2+2 fn1) t^2+(3 f0-3 f1+f2-fn1) t^3
+double CubicInterp(double t, double fn1, double f0, double f1, double f2) {
+  double h = 3 * f0 - 3 * f1 + f2 - fn1;
+  h = t * h + (-5 * f0 + 4 * f1 - f2 + 2 * fn1);
+  h = t * h + (f1 - fn1);
+  h = t * h + 2 * f0;
+  return h * 0.5;
+}
+
+namespace pik {
+
+double CubicInterpAtCoords(size_t f_x, size_t f_y, double p_x, double p_y,
+                           const ImageF &in_img, const Rect &tile_rect) {
+  constexpr int kNumCubicSamplePoints = 4;
+  double int_vx[kNumCubicSamplePoints];
+
+  for (int i = 0; i < kNumCubicSamplePoints; i++) {
+    const float *cur_row = tile_rect.ConstRow(in_img, f_y - 1 + i);
+    int_vx[i] = CubicInterp(p_x, cur_row[f_x - 1], cur_row[f_x],
+                            cur_row[f_x + 1], cur_row[f_x + 2]);
+  }
+
+  return CubicInterp(p_y, int_vx[0], int_vx[1], int_vx[2], int_vx[3]);
+}
+
+enum class InterpType { kNN, kBilinear, kBicubic };
+
+double DeterminePixValue(double x, double y, const ImageF &in_img,
+                         const Rect &tile_rect, InterpType type) {
+  size_t f_x = static_cast<size_t>(std::floor(x));
+  size_t f_y = static_cast<size_t>(std::floor(y));
+
+  if (f_x < 0 || f_x >= kTileDim || f_y < 0 || f_y >= kTileDim) {
+    return 0;
+  }
+
+  if (type == InterpType::kNN) {
+    return tile_rect.ConstRow(in_img, f_y)[f_x];
+  }
+
+  f_x = static_cast<size_t>(std::floor(x - 0.5));
+  f_y = static_cast<size_t>(std::floor(y - 0.5));
+  const size_t c_x = f_x + 1;
+  const size_t c_y = f_y + 1;
+  const double p_x = x - f_x - 0.5;
+  const double p_y = y - f_y - 0.5;
+
+  if (!(x >= 0.5 && c_x < kTileDim && y >= 0.5 && c_y < kTileDim))
+    return DeterminePixValue(x, y, in_img, tile_rect, InterpType::kNN);
+
+  const float *fy_row = tile_rect.ConstRow(in_img, f_y);
+  const float *cy_row = tile_rect.ConstRow(in_img, c_y);
+
+  if (type == InterpType::kBilinear) {
+    const double v_ff = fy_row[f_x];
+    const double v_fc = cy_row[f_x];
+    const double v_cf = fy_row[c_x];
+    const double v_cc = cy_row[c_x];
+
+    return (1 - p_x) * (1 - p_y) * v_ff + p_x * (1 - p_y) * v_cf +
+           (1 - p_x) * p_y * v_fc + p_x * p_y * v_cc;
+  }
+
+  if (!(f_x >= 1 && c_x + 1 < kTileDim && f_y >= 1 && c_y + 1 < kTileDim))
+    return DeterminePixValue(x, y, in_img, tile_rect, InterpType::kBilinear);
+
+  return CubicInterpAtCoords(f_x, f_y, p_x, p_y, in_img, tile_rect);
+}
+
+const double kTransformInputs[8] = {0.5,
+                                    kTileDim * 0.5,
+                                    kTileDim * 0.5,
+                                    0.5,
+                                    kTileDim - 0.5,
+                                    kTileDim * 0.25,
+                                    kTileDim * 0.5,
+                                    kTileDim - 0.5};
+
+// TODO(user): Separate parameter selection to keep consistency.
+BilinearParams ApplyReverseBilinear(Image3F *opsin) {
+  const size_t xtiles = opsin->xsize() / kTileDim;
+  const size_t ytiles = opsin->ysize() / kTileDim;
+
+  if (!kUseBilinearTransforms) return BilinearParams(xtiles, ytiles);
+
+  ImageF new_tile(kTileDim, kTileDim);
+
+  for (size_t c = 0; c < 3; c++) {
+    const ImageF &in_plane = opsin->Plane(c);
+
+    for (size_t yt = 0; yt < ytiles; yt++) {
+      for (size_t xt = 0; xt < xtiles; xt++) {
+        size_t xbase = xt * kTileDim;
+        size_t ybase = yt * kTileDim;
+        Rect tile_rect(xbase, ybase, kTileDim, kTileDim);
+
+        for (size_t y = 0; y < kTileDim; y++) {
+          float *out_row = new_tile.Row(y);
+          for (size_t x = 0; x < kTileDim; x++) {
+            const double c_x = x + 0.5;
+            const double c_y = y + 0.5;
+
+            double rev_x, rev_y;
+            std::tie(rev_x, rev_y) =
+                ReverseCoordTransform(c_x, c_y, kTileDim, kTransformInputs);
+
+            out_row[x] = DeterminePixValue(rev_x, rev_y, in_plane, tile_rect,
+                                           InterpType::kBicubic);
+          }
+        }
+
+        for (size_t y = 0; y < kTileDim; y++) {
+          float *out_row = tile_rect.PlaneRow(opsin, c, y);
+          const float *in_row = new_tile.ConstRow(y);
+          for (size_t x = 0; x < kTileDim; x++) {
+            out_row[x] = in_row[x];
+          }
+        }
+      }
+    }
+  }
+
+  return BilinearParams(xtiles, ytiles);
+}
+
+void ApplyForwardBilinear(Image3F *opsin, size_t downsample) {
+  if (!kUseBilinearTransforms) return;
+
+  PIK_ASSERT(downsample == 1);
+
+  ImageF new_tile(kTileDim, kTileDim);
+
+  const size_t xtiles = opsin->xsize() / kTileDim;
+  const size_t ytiles = opsin->ysize() / kTileDim;
+
+  for (size_t c = 0; c < 3; c++) {
+    const ImageF &in_plane = opsin->Plane(c);
+    for (size_t yt = 0; yt < ytiles; yt++) {
+      for (size_t xt = 0; xt < xtiles; xt++) {
+        size_t xbase = xt * kTileDim;
+        size_t ybase = yt * kTileDim;
+        Rect tile_rect(xbase, ybase, kTileDim, kTileDim);
+
+        for (size_t y = 0; y < kTileDim; y++) {
+          float *out_row = new_tile.Row(y);
+          for (size_t x = 0; x < kTileDim; x++) {
+            const double c_x = x + 0.5;
+            const double c_y = y + 0.5;
+
+            double src_x, src_y;
+            std::tie(src_x, src_y) =
+                ForwardCoordTransform(c_x, c_y, kTileDim, kTransformInputs);
+
+            out_row[x] = DeterminePixValue(src_x, src_y, in_plane, tile_rect,
+                                           InterpType::kBicubic);
+          }
+        }
+
+        for (size_t y = 0; y < kTileDim; y++) {
+          float *out_row = tile_rect.PlaneRow(opsin, c, y);
+          const float *in_row = new_tile.ConstRow(y);
+          for (size_t x = 0; x < kTileDim; x++) {
+            out_row[x] = in_row[x];
+          }
+        }
+      }
+    }
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/bilinear_transform.h b/codec/L2/demos/pikEnc/host/pik/bilinear_transform.h
new file mode 100755
index 0000000000..d323f96453
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/bilinear_transform.h
@@ -0,0 +1,25 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/image.h"
+
+namespace pik {
+
+constexpr size_t kNumBilinearParams = 8;
+
+struct BilinearParams {
+  BilinearParams(int xtiles, int ytiles)
+      : transform_params(xtiles * ytiles * kNumBilinearParams),
+        is_transform_applied(xtiles * ytiles) {}
+
+  std::vector<double> transform_params;
+  std::vector<bool> is_transform_applied;
+};
+
+BilinearParams ApplyReverseBilinear(Image3F *opsin);
+void ApplyForwardBilinear(Image3F *opsin, size_t downsample);
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/bit_reader.h b/codec/L2/demos/pikEnc/host/pik/bit_reader.h
new file mode 100755
index 0000000000..40724450d9
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/bit_reader.h
@@ -0,0 +1,142 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_BIT_READER_H_
+#define PIK_BIT_READER_H_
+
+// Bounds-checked bit reader; 64-bit buffer with support for deferred refills.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>  // memcpy
+#include <algorithm>
+
+#include "pik/compiler_specific.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Reads bits previously written to memory by WriteBits. Reads 4 bytes (or
+// len % 4 at the end) of input at a time into its 64-bit buffer. Performs
+// bounds-checking, returns all-zero values after the memory buffer is depleted.
+class BitReader {
+ public:
+  // data is not necessarily 4-byte aligned nor padded to RoundUp(len, 4).
+  BitReader(const uint8_t* const PIK_RESTRICT data, const size_t len)
+      : data32_(reinterpret_cast<const uint32_t*>(data)),
+        len32_(len >> 2),
+        len_mod4_(len % 4),
+        buf_(0),
+        pos32_(0),
+        bit_pos_(64) {
+    FillBitBuffer();
+  }
+
+  void FillBitBuffer() {
+    if (PIK_UNLIKELY(bit_pos_ >= 32)) {
+      bit_pos_ -= 32;
+      buf_ >>= 32;
+
+      if (PIK_LIKELY(pos32_ < len32_)) {
+        // Read unaligned (memcpy avoids ubsan warning)
+        uint32_t next;
+        memcpy(&next, data32_ + pos32_, sizeof(next));
+        buf_ |= static_cast<uint64_t>(next) << 32;
+      } else if (pos32_ == len32_) {
+        // Only read the valid bytes.
+        const uint8_t* bytes =
+            reinterpret_cast<const uint8_t*>(data32_ + pos32_);
+        uint64_t next = 0;
+        for (size_t i = 0; i < len_mod4_; ++i) {
+          // Pre-shifted by 32 so we can inject into buf_ directly.
+          // Assumes little-endian byte order.
+          next |= static_cast<uint64_t>(bytes[i]) << (i * 8 + 32);
+        }
+        buf_ |= next;
+      }
+      ++pos32_;
+    }
+  }
+
+  void Advance(size_t num_bits) {
+    PIK_ASSERT(num_bits + bit_pos_ <= 64);
+    bit_pos_ += num_bits;
+  }
+
+  template <size_t N>
+  int PeekFixedBits() const {
+    static_assert(N <= 32, "At most 32 bits may be read.");
+    PIK_ASSERT(N + bit_pos_ <= 64);
+    return (buf_ >> bit_pos_) & ((1ULL << N) - 1);
+  }
+
+  int PeekBits(size_t nbits) const {
+    PIK_ASSERT(nbits <= 32);
+    PIK_ASSERT(nbits + bit_pos_ <= 64);
+    return (buf_ >> bit_pos_) & ((1ULL << nbits) - 1);
+  }
+
+  int ReadBits(size_t nbits) {
+    FillBitBuffer();
+    const int bits = PeekBits(nbits);
+    bit_pos_ += nbits;
+    return bits;
+  }
+
+  template <size_t N>
+  int ReadFixedBits() {
+    FillBitBuffer();
+    const int bits = PeekFixedBits<N>();
+    bit_pos_ += N;
+    return bits;
+  }
+
+  uint16_t GetNextWord() { return static_cast<uint16_t>(ReadBits(16)); }
+
+  void SkipBits(size_t skip) {
+    // Satisfy from existing buffer
+    const size_t consume_buffer = std::min(skip, 64 - bit_pos_);
+    Advance(consume_buffer);
+    PIK_ASSERT(bit_pos_ <= 64);
+    skip -= consume_buffer;
+
+    // Skip entire 32-bit words
+    pos32_ += skip / 32;
+    skip = skip % 32;
+
+    FillBitBuffer();
+    Advance(skip);
+  }
+
+  Status JumpToByteBoundary() {
+    size_t rem = bit_pos_ % 8;
+    if ((rem != 0) && (ReadBits(8 - rem) != 0)) {
+      return PIK_FAILURE("Non-zero padding bits");
+    }
+    return true;
+  }
+
+  size_t BitsRead() const { return 32 * pos32_ + bit_pos_ - 64; }
+
+  // Returns the (rounded up) number of bytes consumed so far.
+  size_t Position() const { return (BitsRead() + 7) / 8; }
+
+  bool Healthy() const { return Position() <= (len32_ << 2) + len_mod4_; }
+
+ private:
+  // *32 counters/pointers are in units of 4 bytes, or 32 bits.
+  const uint32_t* const PIK_RESTRICT data32_;
+  const size_t len32_;
+  const size_t len_mod4_;
+  uint64_t buf_;
+  size_t pos32_;
+  // Next bit == (buf_ >> bit_pos_) & 1. 64 => empty, 32 = upper 32 bits valid.
+  size_t bit_pos_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_BIT_READER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/bits.h b/codec/L2/demos/pikEnc/host/pik/bits.h
new file mode 100755
index 0000000000..f91d80250a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/bits.h
@@ -0,0 +1,100 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_BITS_H_
+#define PIK_BITS_H_
+
+// Specialized instructions for processing register-sized bit arrays.
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#include <stdint.h>
+#include "pik/compiler_specific.h"
+
+static PIK_INLINE int PopCount(const uint32_t x) {
+#ifdef _MSC_VER
+  return _mm_popcnt_u32(x);
+#else
+  return __builtin_popcount(x);
+#endif
+}
+
+// Undefined results for x == 0.
+static PIK_INLINE int NumZeroBitsAboveMSBNonzero(const uint32_t x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  _BitScanReverse(&index, x);
+  return index;
+#else
+  return __builtin_clz(x);
+#endif
+}
+static PIK_INLINE int NumZeroBitsAboveMSBNonzero(const uint64_t x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  _BitScanReverse64(&index, x);
+  return index;
+#else
+  return __builtin_clzl(x);
+#endif
+}
+static PIK_INLINE int NumZeroBitsBelowLSBNonzero(const uint32_t x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  _BitScanForward(&index, x);
+  return index;
+#else
+  return __builtin_ctz(x);
+#endif
+}
+static PIK_INLINE int NumZeroBitsBelowLSBNonzero(const uint64_t x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  _BitScanForward64(&index, x);
+  return index;
+#else
+  return __builtin_ctzl(x);
+#endif
+}
+
+// Returns bit width for x == 0.
+static PIK_INLINE int NumZeroBitsAboveMSB(const uint32_t x) {
+  return (x == 0) ? 32 : NumZeroBitsAboveMSBNonzero(x);
+}
+static PIK_INLINE int NumZeroBitsAboveMSB(const uint64_t x) {
+  return (x == 0) ? 64 : NumZeroBitsAboveMSBNonzero(x);
+}
+static PIK_INLINE int NumZeroBitsBelowLSB(const uint32_t x) {
+  return (x == 0) ? 32 : NumZeroBitsBelowLSBNonzero(x);
+}
+static PIK_INLINE int NumZeroBitsBelowLSB(const uint64_t x) {
+  return (x == 0) ? 64 : NumZeroBitsBelowLSBNonzero(x);
+}
+
+// Returns base-2 logarithm, rounded down.
+static PIK_INLINE int FloorLog2Nonzero(const uint32_t x) {
+  return 31 ^ NumZeroBitsAboveMSBNonzero(x);
+}
+static PIK_INLINE int FloorLog2Nonzero(const uint64_t x) {
+  return 63 ^ NumZeroBitsAboveMSBNonzero(x);
+}
+
+// Returns base-2 logarithm, rounded up.
+static PIK_INLINE int CeilLog2Nonzero(const uint32_t x) {
+  const int floor_log2 = FloorLog2Nonzero(x);
+  if ((x & (x - 1)) == 0) return floor_log2;  // power of two
+  return floor_log2 + 1;
+}
+
+static PIK_INLINE int CeilLog2Nonzero(const uint64_t x) {
+  const int floor_log2 = FloorLog2Nonzero(x);
+  if ((x & (x - 1)) == 0) return floor_log2;  // power of two
+  return floor_log2 + 1;
+}
+
+#endif  // PIK_BITS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/block.h b/codec/L2/demos/pikEnc/host/pik/block.h
new file mode 100755
index 0000000000..8f3a03624e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/block.h
@@ -0,0 +1,209 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_BLOCK_H_
+#define PIK_BLOCK_H_
+
+// Adapters for DCT input/output: from/to contiguous blocks or image rows.
+
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/simd/simd.h"
+
+namespace pik {
+
+// Adapters for source/destination.
+//
+// Block: (x, y) <-> (N * y + x)
+// Lines: (x, y) <-> (stride * y + x)
+//
+// I.e. Block is a specialization of Lines with fixed stride.
+//
+// FromXXX should implement Read and Load (Read vector).
+// ToXXX should implement Write and Store (Write vector).
+
+template <size_t N>
+using BlockDesc = SIMD_PART(float, SIMD_MIN(N, SIMD_FULL(float)::N));
+
+// Here and in the following, the SZ template parameter specifies the number of
+// values to load/store. Needed because we want to handle 4x4 sub-blocks of
+// 16x16 blocks.
+template <size_t N>
+class FromBlock {
+ public:
+  explicit FromBlock(const float* block) : block_(block) {}
+
+  FromBlock View(size_t dx, size_t dy) const {
+    return FromBlock<N>(Address(dx, dy));
+  }
+
+  template <size_t SZ>
+  SIMD_ATTR PIK_INLINE typename BlockDesc<SZ>::V LoadPart(const size_t row,
+                                                          size_t i) const {
+    return load(BlockDesc<SZ>(), block_ + row * N + i);
+  }
+
+  SIMD_ATTR PIK_INLINE typename BlockDesc<N>::V Load(const size_t row,
+                                                     size_t i) const {
+    return LoadPart<N>(row, i);
+  }
+
+  SIMD_ATTR PIK_INLINE float Read(const size_t row, const size_t i) const {
+    return *Address(row, i);
+  }
+
+  constexpr PIK_INLINE const float* Address(const size_t row,
+                                            const size_t i) const {
+    return block_ + row * N + i;
+  }
+
+ private:
+  const float* block_;
+};
+
+template <size_t N>
+class ToBlock {
+ public:
+  explicit ToBlock(float* block) : block_(block) {}
+
+  ToBlock View(size_t dx, size_t dy) const {
+    return ToBlock<N>(Address(dx, dy));
+  }
+
+  template <size_t SZ>
+  SIMD_ATTR PIK_INLINE void StorePart(const typename BlockDesc<SZ>::V& v,
+                                      const size_t row, const size_t i) const {
+    store(v, BlockDesc<SZ>(), Address(row, i));
+  }
+
+  SIMD_ATTR PIK_INLINE void Store(const typename BlockDesc<N>::V& v,
+                                  const size_t row, size_t i) const {
+    return StorePart<N>(v, row, i);
+  }
+
+  SIMD_ATTR PIK_INLINE void Write(float v, const size_t row,
+                                  const size_t i) const {
+    *Address(row, i) = v;
+  }
+
+  constexpr PIK_INLINE float* Address(const size_t row, const size_t i) const {
+    return block_ + row * N + i;
+  }
+
+ private:
+  float* block_;
+};
+
+// Same as ToBlock, but multiplies result by (N * N)
+// TODO(user): perhaps we should get rid of this one.
+template <size_t N>
+class ScaleToBlock {
+ public:
+  explicit SIMD_ATTR ScaleToBlock(float* block) : block_(block) {}
+
+  template <size_t SZ>
+  SIMD_ATTR PIK_INLINE void StorePart(const typename BlockDesc<SZ>::V& v,
+                                      const size_t row, const size_t i) const {
+    using BlockDesc = pik::BlockDesc<SZ>;
+    static const typename BlockDesc::V mul_ = set1(BlockDesc(), 1.0f / (N * N));
+    store(v * mul_, BlockDesc(), Address(row, i));
+  }
+
+  SIMD_ATTR PIK_INLINE void Store(const typename BlockDesc<N>::V& v,
+                                  const size_t row, size_t i) const {
+    return StorePart<N>(v, row, i);
+  }
+
+  SIMD_ATTR PIK_INLINE void Write(float v, const size_t row,
+                                  const size_t i) const {
+    static const float mul_ = 1.0f / (N * N);
+    *Address(row, i) = v * mul_;
+  }
+
+  constexpr PIK_INLINE float* Address(const size_t row, const size_t i) const {
+    return block_ + row * N + i;
+  }
+
+ private:
+  float* block_;
+};
+
+template <size_t N>
+class FromLines {
+ public:
+  FromLines(const float* top_left, size_t stride)
+      : top_left_(top_left), stride_(stride) {}
+
+  FromLines View(size_t dx, size_t dy) const {
+    return FromLines(Address(dx, dy), stride_);
+  }
+
+  template <size_t SZ>
+  SIMD_ATTR PIK_INLINE typename BlockDesc<SZ>::V LoadPart(
+      const size_t row, const size_t i) const {
+    return load(BlockDesc<SZ>(), Address(row, i));
+  }
+
+  SIMD_ATTR PIK_INLINE typename BlockDesc<N>::V Load(const size_t row,
+                                                     size_t i) const {
+    return LoadPart<N>(row, i);
+  }
+
+  SIMD_ATTR PIK_INLINE float Read(const size_t row, const size_t i) const {
+    return *Address(row, i);
+  }
+
+  PIK_INLINE const float* SIMD_RESTRICT Address(const size_t row,
+                                                const size_t i) const {
+    return top_left_ + row * stride_ + i;
+  }
+
+ private:
+  const float* SIMD_RESTRICT top_left_;
+  size_t stride_;  // move to next line by adding this to pointer
+};
+
+// Pointers are restrict-qualified: assumes we don't use both FromLines and
+// ToLines in the same DCT. NOTE: Transpose uses From/ToBlock, not *Lines.
+template <size_t N>
+class ToLines {
+ public:
+  ToLines(float* top_left, size_t stride)
+      : top_left_(top_left), stride_(stride) {}
+
+  ToLines View(const ToLines& other, size_t dx, size_t dy) const {
+    return ToLines(Address(dx, dy), stride_);
+  }
+
+  template <size_t SZ>
+  SIMD_ATTR PIK_INLINE void StorePart(const typename BlockDesc<SZ>::V& v,
+                                      const size_t row, const size_t i) const {
+    store(v, BlockDesc<SZ>(), Address(row, i));
+  }
+
+  SIMD_ATTR PIK_INLINE void Store(const typename BlockDesc<N>::V& v,
+                                  const size_t row, size_t i) const {
+    return StorePart<N>(v, row, i);
+  }
+
+  SIMD_ATTR PIK_INLINE void Write(float v, const size_t row,
+                                  const size_t i) const {
+    *Address(row, i) = v;
+  }
+
+  PIK_INLINE float* SIMD_RESTRICT Address(const size_t row,
+                                          const size_t i) const {
+    return top_left_ + row * stride_ + i;
+  }
+
+ private:
+  float* SIMD_RESTRICT top_left_;
+  size_t stride_;  // move to next line by adding this to pointer
+};
+
+}  // namespace pik
+
+#endif  // PIK_BLOCK_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/block_dictionary.cc b/codec/L2/demos/pikEnc/host/pik/block_dictionary.cc
new file mode 100755
index 0000000000..11e186118a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/block_dictionary.cc
@@ -0,0 +1,990 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/block_dictionary.h"
+#include <sys/types.h>
+#include <cstdint>
+#include <limits>
+#include <vector>
+#include "pik/codec.h"
+#include "pik/detect_dots.h"
+#include "pik/entropy_coder.h"
+#include "pik/image.h"
+#include "pik/opsin_inverse.h"
+#include "pik/robust_statistics.h"
+#include "pik/status.h"
+
+namespace pik {
+constexpr float kBlockScale = 3.5;
+constexpr float kBlockInvScale = 1.0f / kBlockScale;
+constexpr int kMaxBlocks = 1 << 24;
+
+// TODO(veluca): choose some reasonable set of blocks as a static dictionary.
+constexpr int kNumStaticBlocks = 1;
+QuantizedBlock kStaticBlocks[kNumStaticBlocks] = {
+    {1, 3, {{0, 0, 0}, {-11, -8, -11}, {-10, -7, -10}}},
+};
+
+enum Contexts {
+  kNumBlockContext = 0,
+  kBlockSizeContext = 1,
+  kPixelsContextStart = 2,
+  kPixelsContextY = 3,
+  kPixelsContextB = 4,
+  kBlockOffsetContext = 5,
+  kBlockWidthContext = 6,
+  kBlockIdCountContext = 7,
+  kBlockIdSkipContext = 8,
+  kNumBlockDictionaryContexts,
+};
+
+// We can represent numbers up to 2**kMaxBlockDictionarySymbol. As the biggest
+// numbers have an image-size range, 2**32 should be more than enough here.
+constexpr int kMaxBlockDictionarySymbol = 32;
+
+float ScaleForQuantization(float val, size_t c) {
+  return kBlockScale * val / kXybRadius[c];
+}
+int Quantize(float val, size_t c) {
+  return std::round(ScaleForQuantization(val, c));
+}
+
+// BlockInfo contains the data describing a block before quantization
+// and also contains its position.
+struct BlockInfo {
+  Rect rect;
+  const Image3F* image;
+  BlockInfo(const Image3F* image, Rect rect) : rect(rect), image(image) {}
+
+  explicit operator QuantizedBlock() {
+    QuantizedBlock info;
+    info.xsize = rect.xsize();
+    info.ysize = rect.ysize();
+
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t iy = 0; iy < rect.ysize(); iy++) {
+        const float* PIK_RESTRICT row = image->ConstPlaneRow(c, rect.y0() + iy);
+        for (size_t ix = 0; ix < rect.xsize(); ix++) {
+          info.pixels[c][iy * rect.xsize() + ix] =
+              Quantize(row[rect.x0() + ix], c);
+        }
+      }
+    }
+    return info;
+  }
+  bool IsSimilar(const BlockInfo& other) const {
+    if (other.rect.xsize() != rect.xsize()) return false;
+    if (other.rect.ysize() != rect.ysize()) return false;
+    float sum = 0;
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t iy = 0; iy < rect.ysize(); iy++) {
+        const float* PIK_RESTRICT row = image->ConstPlaneRow(c, rect.y0() + iy);
+        const float* PIK_RESTRICT other_row =
+            other.image->ConstPlaneRow(c, other.rect.y0() + iy);
+        for (size_t ix = 0; ix < rect.xsize(); ix++) {
+          float diff = row[rect.x0() + ix] - other_row[other.rect.x0() + ix];
+          sum += diff * diff;
+        }
+      }
+    }
+    if (sum > kDotDistThreshold) return false;
+
+    return true;
+  }
+};
+
+BlockDictionary::BlockDictionary(const std::vector<QuantizedBlock>& dictionary,
+                                 const std::vector<BlockPosition>& positions)
+    : dictionary_(dictionary), positions_(positions) {
+  std::sort(positions_.begin(), positions_.end(),
+            [](const BlockPosition& a, const BlockPosition& b) {
+              return std::make_tuple(a.transform, a.id, a.x, a.y, a.dx, a.dy,
+                                     a.width) <
+                     std::make_tuple(b.transform, b.id, b.x, b.y, b.dx, b.dy,
+                                     b.width);
+            });
+}
+
+std::string BlockDictionary::Encode(PikImageSizeInfo* info) const {
+  std::vector<std::vector<Token>> tokens(1);
+
+  auto add_num = [&](int context, size_t num) {
+    int bits, nbits;
+    EncodeVarLenUint(num, &nbits, &bits);
+    tokens[0].emplace_back(context, nbits, nbits, bits);
+  };
+
+  add_num(kNumBlockContext, dictionary_.size());
+  for (size_t i = 0; i < dictionary_.size(); i++) {
+    const QuantizedBlock& info = dictionary_[i];
+    add_num(kBlockSizeContext,
+            i == 0 ? info.xsize
+                   : PackSigned(info.xsize - dictionary_[i - 1].xsize));
+    add_num(kBlockSizeContext,
+            i == 0 ? info.ysize
+                   : PackSigned(info.ysize - dictionary_[i - 1].ysize));
+    for (size_t c = 0; c < 3; c++) {
+      int ctx = kPixelsContextStart + c;
+      for (size_t iy = 0; iy < info.ysize; iy++) {
+        for (size_t ix = 0; ix < info.xsize; ix++) {
+          int8_t val = info.pixels[c][iy * info.xsize + ix];
+          int8_t pred = 0;
+          if (ix != 0) {
+            pred += info.pixels[c][iy * info.xsize + ix - 1];
+          }
+          if (iy != 0) {
+            pred += info.pixels[c][(iy - 1) * info.xsize + ix];
+          }
+          if (ix != 0 && iy != 0) pred >>= 1;
+          add_num(ctx, PackSigned(val - pred));
+        }
+      }
+    }
+  }
+  for (size_t transform = 0; transform < 2; transform++) {
+    int last_block = -1;
+    size_t last_idx = 0;
+    for (size_t id = 0; id < dictionary_.size() + kNumStaticBlocks; id++) {
+      size_t idx = last_idx;
+      while (idx < positions_.size() && positions_[idx].id == id &&
+             positions_[idx].transform == transform) {
+        idx++;
+      }
+      size_t num = idx - last_idx;
+      if (num == 0) continue;
+      if (last_block + 1 < id) {
+        add_num(kBlockIdCountContext, 0);
+        add_num(kBlockIdSkipContext, id - last_block - 2);
+      }
+      last_block = id;
+      add_num(kBlockIdCountContext, num);
+      for (size_t i = last_idx; i < idx; i++) {
+        const BlockPosition& pos = positions_[i];
+        add_num(
+            kBlockOffsetContext,
+            i == last_idx ? pos.x : PackSigned(pos.x - positions_[i - 1].x));
+        add_num(
+            kBlockOffsetContext,
+            i == last_idx ? pos.y : PackSigned(pos.y - positions_[i - 1].y));
+        PIK_ASSERT(pos.id == id);
+        if (transform) {
+          add_num(kBlockOffsetContext, PackSigned(pos.dx));
+          add_num(kBlockOffsetContext, PackSigned(pos.dy));
+          add_num(kBlockWidthContext, PackSigned(pos.width));
+        }
+      }
+      last_idx = idx;
+    }
+    if (last_block + 1 < dictionary_.size() + kNumStaticBlocks) {
+      add_num(kBlockIdCountContext, 0);
+      add_num(kBlockIdSkipContext,
+              dictionary_.size() + kNumStaticBlocks - 2 - last_block);
+    }
+  }
+
+  std::vector<ANSEncodingData> codes;
+  std::vector<uint8_t> context_map;
+  std::string enc = BuildAndEncodeHistograms(
+      kNumBlockDictionaryContexts, tokens, &codes, &context_map, nullptr);
+  enc += WriteTokens(tokens[0], codes, context_map, nullptr);
+  if (info) {
+    info->total_size += enc.size();
+  }
+  return enc;
+}
+
+Status BlockDictionary::Decode(BitReader* br, size_t xsize, size_t ysize) {
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  PIK_RETURN_IF_ERROR(DecodeHistograms(br, kNumBlockDictionaryContexts,
+                                       kMaxBlockDictionarySymbol, &code,
+                                       &context_map));
+  ANSSymbolReader decoder(&code);
+
+  auto read_num = [&](int context) {
+    br->FillBitBuffer();
+    int s = decoder.ReadSymbol(context_map[context], br);
+    int bits = br->ReadBits(s);
+    return DecodeVarLenUint(s, bits);
+  };
+
+  size_t dict_size = read_num(kNumBlockContext);
+  if (dict_size > kMaxBlocks) {
+    return PIK_FAILURE("Too many blocks in dictionary");
+  }
+
+  dictionary_.resize(dict_size);
+  for (size_t i = 0; i < dictionary_.size(); i++) {
+    QuantizedBlock& info = dictionary_[i];
+    info.xsize = read_num(kBlockSizeContext);
+    info.ysize = read_num(kBlockSizeContext);
+    if (i != 0) {
+      info.xsize = UnpackSigned(info.xsize) + dictionary_[i - 1].xsize;
+      info.ysize = UnpackSigned(info.ysize) + dictionary_[i - 1].ysize;
+    }
+    if (info.xsize > kMaxBlockSize)
+      return PIK_FAILURE("Block xsize is too big: %lu", info.xsize);
+    if (info.ysize > kMaxBlockSize)
+      return PIK_FAILURE("Block ysize is too big: %lu", info.ysize);
+    for (size_t c = 0; c < 3; c++) {
+      int ctx = kPixelsContextStart + c;
+      for (size_t iy = 0; iy < info.ysize; iy++) {
+        for (size_t ix = 0; ix < info.xsize; ix++) {
+          int8_t pred = 0;
+          if (ix != 0) {
+            pred += info.pixels[c][iy * info.xsize + ix - 1];
+          }
+          if (iy != 0) {
+            pred += info.pixels[c][(iy - 1) * info.xsize + ix];
+          }
+          if (ix != 0 && iy != 0) pred >>= 1;
+          info.pixels[c][iy * info.xsize + ix] =
+              UnpackSigned(read_num(ctx)) + pred;
+        }
+      }
+    }
+  }
+
+  for (size_t transform = 0; transform < 2; transform++) {
+    size_t to_skip = 0;
+    for (size_t id = 0; id < dictionary_.size() + kNumStaticBlocks; id++) {
+      if (to_skip > 0) {
+        to_skip--;
+        continue;
+      }
+      size_t id_count = read_num(kBlockIdCountContext);
+      if (id_count > kMaxBlocks) {
+        return PIK_FAILURE("Too many blocks in dictionary");
+      }
+      if (id_count == 0) {
+        to_skip = read_num(kBlockIdSkipContext);
+        continue;
+      }
+      positions_.resize(positions_.size() + id_count);
+      size_t id_start = positions_.size() - id_count;
+      for (size_t i = id_start; i < positions_.size(); i++) {
+        BlockPosition& pos = positions_[i];
+        pos.x = read_num(kBlockOffsetContext);
+        pos.y = read_num(kBlockOffsetContext);
+        pos.id = id;
+        const QuantizedBlock& info =
+            pos.id < dictionary_.size()
+                ? dictionary_[pos.id]
+                : kStaticBlocks[pos.id - dictionary_.size()];
+        if (i != id_start) {
+          pos.x = UnpackSigned(pos.x) + positions_[i - 1].x;
+          pos.y = UnpackSigned(pos.y) + positions_[i - 1].y;
+        }
+        if (pos.x + info.xsize > xsize) {
+          return PIK_FAILURE("Invalid block x (id %lu): at %lu + %lu > %lu",
+                             pos.id, pos.x, info.xsize, xsize);
+        }
+        if (pos.y + info.ysize > ysize) {
+          return PIK_FAILURE("Invalid block y: at %lu + %lu > %lu", pos.y,
+                             info.ysize, ysize);
+        }
+        pos.transform = transform;
+        if (transform) {
+          pos.dx = UnpackSigned(read_num(kBlockOffsetContext));
+          pos.dy = UnpackSigned(read_num(kBlockOffsetContext));
+          pos.width = UnpackSigned(read_num(kBlockWidthContext));
+        }
+      }
+    }
+    if (to_skip > 0) {
+      return PIK_FAILURE("Invalid number of skipped block ids!");
+    }
+  }
+
+  if (!decoder.CheckANSFinalState()) {
+    return PIK_FAILURE("ANS checksum failure.");
+  }
+  PIK_RETURN_IF_ERROR(br->JumpToByteBoundary());
+  return true;
+}
+
+#ifdef PIK_BD_DUMP_IMAGES
+void DumpImage(const Image3F& img) {
+  Image3F linear(img.xsize(), img.ysize());
+  CopyImageTo(img, &linear);
+  OpsinToLinear(&linear, /*pool=*/nullptr);
+
+  CodecContext ctx;
+  CodecInOut io(&ctx);
+  io.SetFromImage(std::move(linear), ctx.c_linear_srgb[0]);
+
+  static size_t cnt = 0;
+  std::string pos = "/tmp/dbg" + std::to_string(cnt++) + ".png";
+  PIK_ASSERT(io.EncodeToFile(ctx.c_srgb[0], 8, pos));
+}
+
+void DumpImage(const ImageF& img) {
+  Image3F l(img.xsize(), img.ysize());
+  PIK_ASSERT(img.PixelsPerRow() == l.PixelsPerRow());
+  std::fill(l.PlaneRow(0, 0),
+            l.PlaneRow(0, 0) + img.PixelsPerRow() * img.ysize(), 0.0f);
+  std::fill(l.PlaneRow(2, 0),
+            l.PlaneRow(2, 0) + img.PixelsPerRow() * img.ysize(), 0.0f);
+  float min, max;
+  ImageMinMax(img, &min, &max);
+  if (min == max) max = 1.0;
+  for (size_t y = 0; y < img.ysize(); y++) {
+    const float* PIK_RESTRICT row = img.Row(y);
+    float* PIK_RESTRICT row_out = l.PlaneRow(1, y);
+    for (size_t x = 0; x < img.xsize(); x++) {
+      row_out[x] = (row[x] - min) / (max - min);
+    }
+  }
+  DumpImage(l);
+}
+#endif
+
+template <bool add>
+void BlockDictionary::Apply(Image3F* opsin, size_t downsampling) const {
+#ifdef PIK_BD_DUMP_IMAGES
+  DumpImage(*opsin);
+#endif
+  if (downsampling != 1) {
+    // TODO(veluca): downsampling not implemented yet.
+    PIK_CHECK(positions_.empty());
+  }
+  // Blocks copied as-is.
+  for (const BlockPosition& pos : positions_) {
+    if (pos.transform) continue;
+    size_t by = pos.y;
+    size_t bx = pos.x;
+    const QuantizedBlock& info =
+        pos.id < dictionary_.size()
+            ? dictionary_[pos.id]
+            : kStaticBlocks[pos.id - dictionary_.size()];
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t iy = 0; iy < info.ysize; iy++) {
+        float* row = opsin->PlaneRow(c, by + iy) + bx;
+        for (size_t ix = 0; ix < info.xsize; ix++) {
+          float val = kBlockInvScale * kXybRadius[c] *
+                      info.pixels[c][iy * info.xsize + ix];
+          if (add) {
+            row[ix] += val;
+          } else {
+            row[ix] -= val;
+          }
+        }
+      }
+    }
+  }
+  // Scaled/rotated blocks.
+  float* PIK_RESTRICT rows[3] = {opsin->PlaneRow(0, 0), opsin->PlaneRow(1, 0),
+                                 opsin->PlaneRow(2, 0)};
+  size_t stride = opsin->PixelsPerRow();
+  for (const BlockPosition& pos : positions_) {
+    if (!pos.transform) continue;
+    float block[3][(2 * kMaxBlockSize + 5) * (2 * kMaxBlockSize + 5)] = {};
+    const QuantizedBlock& info =
+        pos.id < dictionary_.size()
+            ? dictionary_[pos.id]
+            : kStaticBlocks[pos.id - dictionary_.size()];
+    size_t xs = info.xsize;
+    size_t ys = info.ysize;
+    for (size_t c = 0; c < 3; c++) {
+      for (int iy = 0; iy < int(2 * ys) + 1; iy++) {
+        for (int ix = 0; ix < int(2 * xs) + 1; ix++) {
+          float val = 0.0f;
+          for (int dy = -1; dy < 1; dy++) {
+            for (int dx = -1; dx < 1; dx++) {
+              int sy = (iy + dy) / 2;
+              if (sy < 0) sy = 0;
+              if (sy >= ys) sy = ys - 1;
+              int sx = (ix + dx) / 2;
+              if (sx < 0) sx = 0;
+              if (sx >= xs) sx = xs - 1;
+              val += info.pixels[c][sy * info.xsize + sx];
+            }
+          }
+          block[c][(iy + 2) * (2 * xs + 5) + (ix + 2)] = val * 0.25f;
+        }
+      }
+    }
+    float by00 = pos.y;
+    float bx00 = pos.x;
+    float bx01 = bx00 + pos.dx;
+    float by01 = by00 + pos.dy;
+    float xnorm = std::sqrt(pos.dx * pos.dx + pos.dy * pos.dy);
+    float ynorm = pos.width * 0.5f;
+    float invxnorm = 1.0f / xnorm;
+    float invynorm = 1.0f / ynorm;
+    float deltax = pos.dy * invxnorm * ynorm;
+    float deltay = -pos.dx * invxnorm * ynorm;
+    float bx11 = bx01 + deltax;
+    float by11 = by01 + deltay;
+    float bx10 = bx00 + deltax;
+    float by10 = by00 + deltay;
+    float inv_determinant = 1.0f / (pos.dx * deltay - pos.dy * deltax);
+    float inverse_transform[4] = {
+        xs * deltay * inv_determinant, -float(xs) * deltax * inv_determinant,
+        -float(ys) * pos.dy * inv_determinant, ys * pos.dx * inv_determinant};
+    int64_t min_x = std::min(std::min(bx00, bx01), std::min(bx10, bx11));
+    int64_t min_y = std::min(std::min(by00, by01), std::min(by10, by11));
+    int64_t max_x = std::max(std::max(bx00, bx01), std::max(bx10, bx11)) + 1;
+    int64_t max_y = std::max(std::max(by00, by01), std::max(by10, by11)) + 1;
+    if (min_x < 0) min_x = 0;
+    if (min_y < 0) min_y = 0;
+    if (max_x > opsin->xsize()) max_x = opsin->xsize();
+    if (max_y > opsin->ysize()) max_y = opsin->ysize();
+    constexpr float kAntialiasingMargin = 0.3f;
+    float margin_x = kAntialiasingMargin * invxnorm;
+    float margin_y = kAntialiasingMargin * invynorm;
+    for (size_t iy = min_y; iy < max_y; iy++) {
+      for (size_t ix = min_x; ix < max_x; ix++) {
+        float x = ix - bx00 + 0.5f;
+        float y = iy - by00 + 0.5f;
+        float ox = inverse_transform[0] * x + inverse_transform[1] * y;
+        float oy = inverse_transform[2] * x + inverse_transform[3] * y;
+        if (ox >= -margin_x && ox < xs + margin_x && oy >= -margin_y &&
+            oy < ys + margin_y) {
+          ox = 2 * ox + 2;
+          oy = 2 * oy + 2;
+          int floorx = ox;
+          int ceilx = floorx + 1;
+          float fracx = ox - floorx;
+          int floory = oy;
+          int ceily = floory + 1;
+          float fracy = oy - floory;
+          for (size_t c = 0; c < 3; c++) {
+            float val =
+                (fracx * fracy * block[c][ceily * (2 * xs + 5) + ceilx] +
+                 fracx * (1.f - fracy) *
+                     block[c][floory * (2 * xs + 5) + ceilx] +
+                 (1.f - fracx) * fracy *
+                     block[c][ceily * (2 * xs + 5) + floorx] +
+                 (1.f - fracx) * (1.f - fracy) *
+                     block[c][floory * (2 * xs + 5) + floorx]) *
+                kBlockInvScale * kXybRadius[c];
+            if (add) {
+              rows[c][iy * stride + ix] += val;
+            } else {
+              rows[c][iy * stride + ix] -= val;
+            }
+          }
+        }
+      }
+    }
+  }
+#ifdef PIK_BD_DUMP_IMAGES
+  DumpImage(*opsin);
+#endif
+}
+
+void BlockDictionary::AddTo(Image3F* opsin, size_t downsampling) const {
+  Apply</*add=*/true>(opsin, downsampling);
+}
+
+void BlockDictionary::SubtractFrom(Image3F* opsin) const {
+  Apply</*add=*/false>(opsin, /*downsampling=*/1);
+}
+
+namespace {
+
+float Distance(const QuantizedBlock& a, const Image3F& img, size_t bx,
+               size_t by) {
+  float dist = 0.0f;
+  const size_t stride = img.PixelsPerRow();
+  for (size_t c = 0; c < 3; c++) {
+    const float* row = img.ConstPlaneRow(c, by) + bx;
+    for (size_t iy = 0; iy < a.ysize; iy++) {
+      for (size_t ix = 0; ix < a.xsize; ix++) {
+        float d = a.pixels[c][iy * a.xsize + ix] -
+                  ScaleForQuantization(row[iy * stride + ix], c);
+        dist += d * d;
+      }
+    }
+  }
+  return dist;
+}
+
+constexpr size_t kNumExploreSteps = 1;
+constexpr size_t kSmallBlockThreshold = 13;
+constexpr float kDistThreshold = 0.6f;
+// Returns north, south, east and west neighbors, if present.
+size_t Neighbors(size_t x, size_t y, size_t x_max, size_t y_max,
+                 std::array<size_t, 2>* neighbors) {
+  size_t i = 0;
+  if (x != 0) neighbors[i++] = {x - 1, y};
+  if (y != 0) neighbors[i++] = {x, y - 1};
+  if (x != x_max - 1) neighbors[i++] = {x + 1, y};
+  if (y != y_max - 1) neighbors[i++] = {x, y + 1};
+  return i;
+}
+
+struct Site {
+  size_t x;
+  size_t y;
+  size_t steps;
+  constexpr Site(size_t x, size_t y, size_t steps) : x(x), y(y), steps(steps) {}
+};
+
+// Finds a bounding box for a loosly connected component. If steps==1, only
+// active neighboring pixels are added to the component. If `steps` is larger,
+// it is allowed to take a few `steps` through non-active neighboring pixels.
+Rect ConnectedCompenentBounds(ImageI* PIK_RESTRICT active, size_t x, size_t y) {
+  Rect box = Rect(x, y, 0, 0);
+  std::vector<Site> places_to_visit;
+  size_t steps = kNumExploreSteps;
+  Site site(x, y, steps);
+  places_to_visit.emplace_back(site);
+
+  while (!places_to_visit.empty()) {
+    Site site = places_to_visit.back();
+    x = site.x;
+    y = site.y;
+    steps = site.steps;
+    places_to_visit.pop_back();
+
+    if (steps == 0) continue;
+    uint8_t cell_type = active->ConstRow(y)[x];
+    if (cell_type == 2) continue;
+
+    if (cell_type == 1) {
+      size_t xmin = std::min(x, box.x0());
+      size_t ymin = std::min(y, box.y0());
+      size_t xmax = std::max(x + 1, box.x0() + box.xsize());
+      size_t ymax = std::max(y + 1, box.y0() + box.ysize());
+      if (((xmax - xmin) < kMaxBlockSize) && ((ymax - ymin) < kMaxBlockSize)) {
+        box = Rect(xmin, ymin, xmax - xmin, ymax - ymin);
+      } else {
+        continue;
+      }
+    }
+    std::array<size_t, 2> neighbors[4];
+    for (int i = 0;
+         i < Neighbors(x, y, active->xsize(), active->ysize(), neighbors);
+         i++) {
+      size_t new_x = neighbors[i][0];
+      size_t new_y = neighbors[i][1];
+      active->Row(y)[x] = 2;
+      site = {new_x, new_y, cell_type ? kNumExploreSteps : steps - 1};
+      places_to_visit.emplace_back(site);
+    }
+  }
+  return box;
+}
+};  // namespace
+
+static const bool kUseBlockDictionary = false;
+static const bool kUseHardcodedStretchedBlocks = false;
+static const bool KUseDotDetection = true;
+
+BlockDictionary FindBestBlockDictionary(double butteraugli_target,
+                                        const Image3F& opsin) {
+  if (KUseDotDetection) {
+    Image3F without_dots = Image3F(opsin.xsize(), opsin.ysize());
+    Image3F dots(opsin.xsize(), opsin.ysize());
+    ImageI active(opsin.xsize(), opsin.ysize());
+    SplitDots(opsin, &without_dots, &dots);
+    std::vector<QuantizedBlock> quantized_blocks;
+    std::vector<BlockInfo> blocks;
+    std::vector<BlockPosition> positions;
+#ifdef PIK_BD_DUMP_IMAGES
+    DumpImage(dots);
+#endif
+    for (size_t y = 0; y < opsin.ysize(); y++) {
+      const float* PIK_RESTRICT dot_rows[3];
+      const float* PIK_RESTRICT rows[3];
+      for (size_t c = 0; c < 3; c++) {
+        dot_rows[c] = dots.Plane(c).ConstRow(y);
+        rows[c] = without_dots.Plane(c).ConstRow(y);
+      }
+      int32_t* PIK_RESTRICT active_row = active.Row(y);
+      for (size_t x = 0; x < opsin.xsize(); x++) {
+        bool is_block = false;
+        for (size_t c = 0; c < 3; c++) {
+          if (dot_rows[c][x] != 0.0f) {
+            is_block = true;
+          }
+        }
+        active_row[x] = is_block;
+      }
+    }
+
+    for (size_t y = 0; y < opsin.ysize(); y++) {
+      const float* PIK_RESTRICT rows[3];
+      const float* PIK_RESTRICT dot_rows[3];
+      for (size_t c = 0; c < 3; c++) {
+        dot_rows[c] = dots.Plane(c).ConstRow(y);
+        rows[c] = without_dots.Plane(c).ConstRow(y);
+      }
+      for (size_t x = 0; x < opsin.xsize(); x++) {
+        Rect box = ConnectedCompenentBounds(&active, x, y);
+        if (box.xsize() && box.ysize()) {
+          BlockInfo fullinfo(&dots, box);
+          auto it = std::find_if(blocks.begin(), blocks.end(),
+                                 [&](const BlockInfo& other) {
+                                   return other.IsSimilar(fullinfo);
+                                 });
+          if (it == blocks.end()) {
+            positions.emplace_back(box.x0(), box.y0(), quantized_blocks.size());
+            quantized_blocks.push_back(QuantizedBlock(fullinfo));
+            blocks.push_back(fullinfo);
+          } else {
+            positions.emplace_back(box.x0(), box.y0(), it - blocks.begin());
+          }
+          ZeroFillImage(&active, box);
+        }
+      }
+    }
+    return BlockDictionary{quantized_blocks, positions};
+  }
+  if (kUseHardcodedStretchedBlocks) {
+    std::vector<QuantizedBlock> blocks;
+    blocks.push_back(
+        QuantizedBlock{1, 3, {{0, 0, 0}, {-11, -8, -11}, {-10, -7, -10}}});
+    std::vector<BlockPosition> positions;
+    positions.emplace_back(612, 698, 0, 204, 506, -10);
+    return BlockDictionary{blocks, positions};
+  }
+  if (!kUseBlockDictionary) return BlockDictionary{};
+  Image3F background_diff(opsin.xsize(), opsin.ysize());
+  Image3F background(opsin.xsize(), opsin.ysize());
+  const size_t background_stride = background.PixelsPerRow();
+  const int kBackgroundBorderPixels = 8;
+  std::vector<float> values;
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t by = 0; by < opsin.ysize(); by++) {
+      float* PIK_RESTRICT row_background_diff = background_diff.PlaneRow(c, by);
+      float* PIK_RESTRICT row_background = background.PlaneRow(c, by);
+      const float* PIK_RESTRICT row_src = opsin.ConstPlaneRow(c, by);
+      for (size_t bx = 0; bx < opsin.xsize(); bx++) {
+        values.clear();
+        for (int iy = -kBackgroundBorderPixels;
+             iy < 1 + kBackgroundBorderPixels; iy++) {
+          int y = Mirror(by + iy, opsin.ysize());
+          const float* PIK_RESTRICT row = opsin.ConstPlaneRow(c, y);
+          for (int ix = -kBackgroundBorderPixels;
+               ix < 1 + kBackgroundBorderPixels; ix++) {
+            int x = Mirror(bx + ix, opsin.xsize());
+            values.push_back(row[x]);
+          }
+        }
+        float median = Median(&values);
+        row_background[bx] = median;
+        row_background_diff[bx] = row_src[bx] - median;
+      }
+    }
+  }
+
+  ImageB used(opsin.xsize(), opsin.ysize());
+  FillImage(uint8_t(0), &used);
+#ifdef PIK_BD_DUMP_IMAGES
+  ImageF shapes(opsin.xsize(), opsin.ysize());
+  FillImage(0.0f, &shapes);
+#endif
+
+  std::vector<std::array<size_t, 4>> candidates;
+  constexpr float kHighPixelThreshold = 0.1f;
+  constexpr float kHighPixelSumMultiplier = 5.0f;
+
+  constexpr float kBackgroundThreshold = 0.15f;
+  const float* PIK_RESTRICT row_diff = background_diff.ConstPlaneRow(1, 0);
+  const size_t stride_diff = background_diff.PixelsPerRow();
+  uint8_t* PIK_RESTRICT row_used = used.Row(0);
+  const size_t stride_used = used.PixelsPerRow();
+#ifdef PIK_BD_DUMP_IMAGES
+  float* PIK_RESTRICT row_shapes = shapes.Row(0);
+  const size_t stride_shapes = shapes.PixelsPerRow();
+  std::mt19937 generator(1);
+  std::uniform_real_distribution<double> dis(0.5, 1.0);
+#endif
+  std::vector<std::pair<int, int>> deltas;
+  std::vector<std::pair<int, int>> stack;
+  for (size_t by = 0; by < opsin.ysize(); by++) {
+    for (size_t bx = 0; bx < opsin.xsize(); bx++) {
+      if (row_used[by * stride_used + bx]) continue;
+      if (std::fabs(row_diff[by * stride_diff + bx]) < kBackgroundThreshold)
+        continue;
+      deltas.clear();
+      std::pair<int, int> min_delta{};
+      std::pair<int, int> max_delta{};
+      stack.clear();
+      stack.emplace_back(0, 0);
+      float sum_high_pixels = 0;
+      while (!stack.empty()) {
+        std::pair<int, int> delta = stack.back();
+        stack.pop_back();
+        deltas.push_back(delta);
+        if (delta.first < min_delta.first) min_delta.first = delta.first;
+        if (delta.first > max_delta.first) max_delta.first = delta.first;
+        if (delta.second < min_delta.second) min_delta.second = delta.second;
+        if (delta.second > max_delta.second) max_delta.second = delta.second;
+        size_t x = bx + delta.first;
+        size_t y = by + delta.second;
+        if (row_used[y * stride_used + x]) continue;
+        row_used[y * stride_used + x] = true;
+
+        if (std::fabs(row_diff[y * stride_diff + x]) > kHighPixelThreshold) {
+          sum_high_pixels += std::fabs(row_diff[y * stride_diff + x]);
+        }
+
+        int kPixelDeltas[][2] = {{1, 0}, {0, 1},  {-1, 0}, {0, -1},
+                                 {1, 1}, {1, -1}, {-1, 1}, {-1, -1}};
+        for (auto dxy : kPixelDeltas) {
+          int next_x = x + dxy[0];
+          int next_y = y + dxy[1];
+          if (next_x < 0 || next_x >= int64_t(opsin.xsize())) continue;
+          if (next_y < 0 || next_y >= int64_t(opsin.ysize())) continue;
+          if (std::fabs(row_diff[next_y * stride_diff + next_x]) <
+              kBackgroundThreshold)
+            continue;
+          stack.emplace_back(delta.first + dxy[0], delta.second + dxy[1]);
+        }
+      }
+      size_t xsize = max_delta.first - min_delta.first + 1;
+      size_t ysize = max_delta.second - min_delta.second + 1;
+      if (xsize > kMaxBlockSize) continue;
+      if (ysize > kMaxBlockSize) continue;
+      if (sum_high_pixels < kHighPixelSumMultiplier * kHighPixelThreshold)
+        continue;
+      if (min_delta.first + bx + xsize >= opsin.xsize()) continue;
+      if (min_delta.second + by + ysize >= opsin.ysize()) continue;
+      std::array<size_t, 4> candidate;
+      candidate[0] = xsize;
+      candidate[1] = ysize;
+      candidate[2] = min_delta.first + bx;
+      candidate[3] = min_delta.second + by;
+      candidates.push_back(candidate);
+#ifdef PIK_BD_DUMP_IMAGES
+      float val = dis(generator);
+      for (std::pair<int, int> delta : deltas) {
+        int x = bx + delta.first;
+        int y = by + delta.second;
+        row_shapes[y * stride_shapes + x] = val;
+      }
+#endif
+    }
+  }
+  // fprintf(stderr, "%lu\n", candidates.size());
+#ifdef PIK_BD_DUMP_IMAGES
+  DumpImage(background);
+  DumpImage(background_diff);
+  Image3F yonly = CopyImage(background_diff);
+  FillImage(0.0f, const_cast<ImageF*>(&yonly.Plane(0)));
+  FillImage(0.0f, const_cast<ImageF*>(&yonly.Plane(2)));
+  DumpImage(yonly);
+  DumpImage(shapes);
+#endif
+  auto extract_block = [&](size_t xsize, size_t ysize, size_t bx, size_t by) {
+    QuantizedBlock info;
+    info.xsize = xsize;
+    info.ysize = ysize;
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t iy = 0; iy < info.ysize; iy++) {
+        const float* row = background_diff.ConstPlaneRow(c, by + iy);
+        for (size_t ix = 0; ix < info.xsize; ix++) {
+          info.pixels[c][iy * info.xsize + ix] = Quantize(row[bx + ix], c);
+        }
+      }
+    }
+    return info;
+  };
+  auto should_encode = [](const QuantizedBlock& info) {
+    size_t num_zeros = 0;
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t iy = 0; iy < 8; iy++) {
+        for (size_t ix = 0; ix < 8; ix++) {
+          if (info.pixels[c][iy * 8 + ix] == 0) num_zeros++;
+        }
+      }
+    }
+    if (num_zeros == 192) return false;
+    return true;
+  };
+  std::vector<std::pair<size_t, std::array<size_t, 4>>> occurrences;
+  // TODO(veluca): take into account off-by-one errors in bounding boxes.
+  constexpr size_t kMinNumOccurrences = 4;
+  for (size_t i = 0; i < candidates.size(); i++) {
+    QuantizedBlock cand = extract_block(candidates[i][0], candidates[i][1],
+                                        candidates[i][2], candidates[i][3]);
+    if (!should_encode(cand)) continue;
+    size_t count = 0;
+    for (size_t j = 0; j < candidates.size(); j++) {
+      if (i == j) continue;
+      if (candidates[j][0] != candidates[i][0]) continue;
+      if (candidates[j][1] != candidates[i][1]) continue;
+      size_t num_px = candidates[i][0] * candidates[i][1];
+      if (Distance(cand, background_diff, candidates[j][2], candidates[j][3]) <
+          kDistThreshold * num_px) {
+        count++;
+      }
+    }
+    if (count * 2 >= kMinNumOccurrences)
+      occurrences.push_back({count, candidates[i]});
+  }
+  std::sort(occurrences.begin(), occurrences.end(),
+            std::greater<std::pair<size_t, std::array<size_t, 4>>>());
+
+  // fprintf(stderr, "%lu\n", occurrences.size());
+  // for (size_t i = 0; i < occurrences.size(); i++) {
+  //   fprintf(stderr, "%lu ", occurrences[i].first);
+  //}
+  // fprintf(stderr, "\n");
+
+  std::vector<QuantizedBlock> blocks;
+  std::vector<char> taken(occurrences.size());
+  for (size_t i = 0; i < occurrences.size(); i++) {
+    if (taken[i]) continue;
+    QuantizedBlock cand =
+        extract_block(occurrences[i].second[0], occurrences[i].second[1],
+                      occurrences[i].second[2], occurrences[i].second[3]);
+    blocks.push_back(cand);
+    for (size_t j = i + 1; j < occurrences.size(); j++) {
+      if (occurrences[j].second[0] != occurrences[i].second[0]) continue;
+      if (occurrences[j].second[1] != occurrences[i].second[1]) continue;
+      size_t num_px = occurrences[i].second[0] * occurrences[i].second[1];
+      if (Distance(cand, background_diff, occurrences[j].second[2],
+                   occurrences[j].second[3]) < kDistThreshold * num_px) {
+        taken[j] = true;
+      }
+    }
+  }
+
+  std::vector<BlockPosition> positions;
+
+  FillImage(uint8_t(0), &used);
+  std::vector<size_t> counts(blocks.size());
+  std::vector<std::pair<size_t, size_t>> sizes;
+  for (size_t i = 0; i < blocks.size(); i++) {
+    sizes.emplace_back(blocks[i].xsize, blocks[i].ysize);
+  }
+  std::sort(sizes.begin(), sizes.end(),
+            [](std::pair<size_t, size_t> a, std::pair<size_t, size_t> b) {
+              return std::make_pair(a.first * a.second, a.first) >
+                     std::make_pair(b.first * b.second, b.first);
+            });
+  sizes.resize(std::unique(sizes.begin(), sizes.end()) - sizes.begin());
+  constexpr float kMaxBackgroundDiff = 0.3f;
+  constexpr float kSmallBlockDistThresholdPenalty = 0.3f;
+  auto add_matching_block = [&](size_t xs, size_t ys, size_t bx, size_t by) {
+    bool overlap = false;
+    const float* row_background =
+        background.ConstPlaneRow(1, by == 0 ? by : by - 1) +
+        (bx == 0 ? bx : bx - 1);
+    size_t basex = bx == 0 ? 0 : 1;
+    size_t basey = by == 0 ? 0 : 1;
+    float background_min_y = row_background[0];
+    float background_max_y = row_background[0];
+    for (size_t iy = 0; iy < ys; iy++) {
+      if (overlap) break;
+      const uint8_t* used_row = used.ConstRow(by + iy);
+      for (size_t ix = 0; ix < xs; ix++) {
+        float cur_y =
+            row_background[(basey + iy) * background_stride + basex + ix];
+        if (cur_y < background_min_y) background_min_y = cur_y;
+        if (cur_y > background_max_y) background_max_y = cur_y;
+        if (used_row[bx + ix]) {
+          overlap = true;
+          break;
+        }
+      }
+    }
+    if (overlap) return;
+    if (bx != 0 && by != 0 && bx + xs != opsin.xsize() &&
+        by + ys != opsin.ysize() && xs * ys < kSmallBlockThreshold) {
+      for (size_t iy : {size_t(0), ys + 1}) {
+        for (size_t ix = 0; ix < xs + 2; ix++) {
+          float cur_y = row_background[iy * background_stride + ix];
+          if (cur_y < background_min_y) background_min_y = cur_y;
+          if (cur_y > background_max_y) background_max_y = cur_y;
+        }
+      }
+      for (size_t ix : {size_t(0), xs + 1}) {
+        for (size_t iy = 0; iy < ys + 2; iy++) {
+          float cur_y = row_background[iy * background_stride + ix];
+          if (cur_y < background_min_y) background_min_y = cur_y;
+          if (cur_y > background_max_y) background_max_y = cur_y;
+        }
+      }
+    }
+    float max_background_diff = kMaxBackgroundDiff;
+    if (xs * ys < kSmallBlockThreshold)
+      max_background_diff *= kSmallBlockDistThresholdPenalty;
+    if (background_max_y - background_min_y > max_background_diff) return;
+    QuantizedBlock info = extract_block(xs, ys, bx, by);
+    if (!should_encode(info)) return;
+    size_t id = 0;
+    float dist = std::numeric_limits<float>::max();
+    for (size_t i = 0; i < blocks.size(); i++) {
+      if (blocks[i].xsize != xs) continue;
+      if (blocks[i].ysize != ys) continue;
+      float d = Distance(blocks[i], background_diff, bx, by);
+      if (d < dist) {
+        id = i;
+        dist = d;
+      }
+    }
+    size_t numpx = info.xsize * info.ysize;
+    float dist_threshold = kDistThreshold * numpx;
+    if (numpx < kSmallBlockThreshold)
+      dist_threshold *= kSmallBlockDistThresholdPenalty;
+    if (dist > dist_threshold) {
+      return;
+    }
+    if (id == blocks.size()) return;
+    for (size_t iy = 0; iy < ys; iy++) {
+      uint8_t* used_row = used.Row(by + iy);
+      for (size_t ix = 0; ix < xs; ix++) {
+        used_row[bx + ix] = true;
+      }
+    }
+    positions.emplace_back(bx, by, id);
+    counts[id]++;
+  };
+  for (auto p : sizes) {
+    size_t xs = p.first;
+    size_t ys = p.second;
+    if (xs * ys < kSmallBlockThreshold) {
+      for (size_t i = 0; i < candidates.size(); i++) {
+        if (candidates[i][0] != xs || candidates[i][1] != ys) continue;
+        add_matching_block(xs, ys, candidates[i][2], candidates[i][3]);
+      }
+    } else {
+      for (size_t by = 0; by < opsin.ysize() - ys; by++) {
+        for (size_t bx = 0; bx < opsin.xsize() - xs; bx++) {
+          add_matching_block(xs, ys, bx, by);
+        }
+      }
+    }
+  }
+  std::vector<size_t> remap(blocks.size());
+  size_t new_id = 0;
+  for (size_t i = 0; i < blocks.size(); i++) {
+    remap[i] = new_id;
+    if (counts[i] < kMinNumOccurrences) continue;
+    blocks[new_id] = blocks[i];
+    new_id++;
+  }
+  blocks.resize(new_id);
+  size_t newp = 0;
+  for (size_t i = 0; i < positions.size(); i++) {
+    if (counts[positions[i].id] < kMinNumOccurrences) continue;
+    positions[newp] = positions[i];
+    positions[newp].id = remap[positions[newp].id];
+    newp++;
+  }
+  positions.resize(newp);
+  new_id = 0;
+  for (size_t i = 0; i < counts.size(); i++) {
+    if (counts[i] < kMinNumOccurrences) continue;
+    counts[new_id++] = counts[i];
+  }
+  counts.resize(new_id);
+  // for (size_t i : counts) fprintf(stderr, "%lu ", i);
+  // fprintf(stderr, "\n\n");
+  // fprintf(stderr, "%lu %lu\n", blocks.size(), positions.size());
+  // for (size_t i = 0; i < blocks.size(); i++) {
+  //  fprintf(stderr, "(%lu %lu) ", blocks[i].xsize, blocks[i].ysize);
+  //}
+  // fprintf(stderr, "\n");
+  return BlockDictionary{blocks, positions};
+}
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/block_dictionary.h b/codec/L2/demos/pikEnc/host/pik/block_dictionary.h
new file mode 100755
index 0000000000..a8a6da758f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/block_dictionary.h
@@ -0,0 +1,97 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_BLOCK_DICTIONARY_H_
+#define PIK_BLOCK_DICTIONARY_H_
+
+// Chooses reference blocks out of the image, and sets things up to avoid
+// encoding once for each occurrence.
+
+#include <cstddef>
+#include "pik/bit_reader.h"
+#include "pik/image.h"
+#include "pik/opsin_params.h"
+#include "pik/pik_info.h"
+
+#include <vector>
+
+namespace pik {
+
+constexpr size_t kMaxBlockSize = 8;
+constexpr float kDotDistThreshold = 0.02f;
+
+struct QuantizedBlock {
+  size_t xsize;
+  size_t ysize;
+  int8_t pixels[3][kMaxBlockSize * kMaxBlockSize];
+  bool operator==(const QuantizedBlock& other) const {
+    if (xsize != other.xsize) return false;
+    if (ysize != other.ysize) return false;
+    for (size_t c = 0; c < 3; c++) {
+      if (memcmp(pixels[c], other.pixels[c], sizeof(int8_t) * xsize * ysize) !=
+          0)
+        return false;
+    }
+    return true;
+  }
+  bool operator<(const QuantizedBlock& other) const {
+    if (xsize < other.xsize) return true;
+    if (xsize > other.xsize) return false;
+    if (ysize < other.ysize) return true;
+    if (ysize > other.ysize) return false;
+    for (size_t c = 0; c < 3; c++) {
+      int cmp =
+          memcmp(pixels[c], other.pixels[c], sizeof(int8_t) * xsize * ysize);
+      if (cmp > 0) return false;
+      if (cmp < 0) return true;
+    }
+    return false;
+  }
+};
+struct BlockPosition {
+  // Position of top-left corner of the block in the image.
+  size_t x, y;
+  size_t id;
+  bool transform = false;
+  // Offset of top-right corner from top-left one.
+  int64_t dx = 0;
+  int64_t dy = 0;
+  // Measured in half-pixels.
+  int64_t width = 0;
+  BlockPosition() {}
+  BlockPosition(size_t x, size_t y, size_t id) : x(x), y(y), id(id) {}
+  BlockPosition(size_t x, size_t y, size_t id, int64_t dx, int64_t dy,
+                int64_t width)
+      : x(x), y(y), id(id), transform(true), dx(dx), dy(dy) {}
+};
+
+class BlockDictionary {
+ public:
+  BlockDictionary() {}
+  BlockDictionary(const std::vector<QuantizedBlock>& dictionary,
+                  const std::vector<BlockPosition>& positions);
+
+  std::string Encode(PikImageSizeInfo* info) const;
+
+  Status Decode(BitReader* br, size_t xsize, size_t ysize);
+
+  void AddTo(Image3F* opsin, size_t downsampling) const;
+
+  void SubtractFrom(Image3F* opsin) const;
+
+ private:
+  std::vector<QuantizedBlock> dictionary_;
+  std::vector<BlockPosition> positions_;
+  template <bool>
+  void Apply(Image3F* opsin, size_t downsampling) const;
+};
+
+BlockDictionary FindBestBlockDictionary(double butteraugli_target,
+                                        const Image3F& opsin);
+
+}  // namespace pik
+
+#endif  // PIK_BLOCK_DICTIONARY_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/brotli.cc b/codec/L2/demos/pikEnc/host/pik/brotli.cc
new file mode 100755
index 0000000000..add9e0ef4c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/brotli.cc
@@ -0,0 +1,135 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/brotli.h"
+
+#include <string.h>  // memcpy
+#include <memory>
+#include "brotli/decode.h"
+#include "brotli/encode.h"
+#include "pik/status.h"
+
+namespace pik {
+
+Status BrotliCompress(int quality, const uint8_t* in, const size_t in_size,
+                      uint8_t* PIK_RESTRICT out,
+                      size_t* PIK_RESTRICT total_out_size) {
+  std::unique_ptr<BrotliEncoderState, decltype(BrotliEncoderDestroyInstance)*>
+      enc(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr),
+          BrotliEncoderDestroyInstance);
+  if (!enc) return PIK_FAILURE("BrotliEncoderCreateInstance failed");
+
+  BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_QUALITY, quality);
+  BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_LGWIN, 24);
+  BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_LGBLOCK, 0);
+
+  const size_t kBufferSize = 128 * 1024;
+  PaddedBytes temp_buffer(kBufferSize);
+
+  size_t avail_in = in_size;
+  const uint8_t* next_in = in;
+
+  size_t total_out = 0;
+
+  while (1) {
+    size_t out_size;
+    size_t avail_out = kBufferSize;
+    uint8_t* next_out = temp_buffer.data();
+    if (!BrotliEncoderCompressStream(enc.get(), BROTLI_OPERATION_FINISH,
+                                     &avail_in, &next_in, &avail_out, &next_out,
+                                     &total_out)) {
+      return PIK_FAILURE("Brotli compression failed");
+    }
+    out_size = next_out - temp_buffer.data();
+    memcpy(out + *total_out_size, temp_buffer.data(), out_size);
+    *total_out_size += out_size;
+    if (BrotliEncoderIsFinished(enc.get())) break;
+  }
+
+  return true;
+}
+
+Status BrotliCompress(int quality, const PaddedBytes& in,
+                      PaddedBytes* PIK_RESTRICT out) {
+  std::unique_ptr<BrotliEncoderState, decltype(BrotliEncoderDestroyInstance)*>
+      enc(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr),
+          BrotliEncoderDestroyInstance);
+  if (!enc) return PIK_FAILURE("BrotliEncoderCreateInstance failed");
+
+  BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_QUALITY, quality);
+  BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_LGWIN, 24);
+  BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_LGBLOCK, 0);
+
+  const size_t kBufferSize = 128 * 1024;
+  PaddedBytes temp_buffer(kBufferSize);
+
+  size_t insize = in.size();
+  size_t avail_in = insize;
+  const uint8_t* next_in = in.data();
+
+  size_t total_out = 0;
+
+  while (1) {
+    size_t out_size;
+    size_t avail_out = kBufferSize;
+    uint8_t* next_out = temp_buffer.data();
+    if (!BrotliEncoderCompressStream(enc.get(), BROTLI_OPERATION_FINISH,
+                                     &avail_in, &next_in, &avail_out, &next_out,
+                                     &total_out)) {
+      return PIK_FAILURE("Brotli compression failed");
+    }
+    out_size = next_out - temp_buffer.data();
+    out->resize(out->size() + out_size);
+    memcpy(out->data() + out->size() - out_size, temp_buffer.data(), out_size);
+    if (BrotliEncoderIsFinished(enc.get())) break;
+  }
+
+  return true;
+}
+
+Status BrotliDecompress(const uint8_t* in, size_t max_input_size,
+                        size_t max_output_size, size_t* PIK_RESTRICT bytes_read,
+                        PaddedBytes* PIK_RESTRICT out) {
+  std::unique_ptr<BrotliDecoderState, decltype(BrotliDecoderDestroyInstance)*>
+      s(BrotliDecoderCreateInstance(nullptr, nullptr, nullptr),
+        BrotliDecoderDestroyInstance);
+  if (!s) return PIK_FAILURE("BrotliDecoderCreateInstance failed");
+
+  const size_t kBufferSize = 128 * 1024;
+  PaddedBytes temp_buffer(kBufferSize);
+
+  size_t avail_in = max_input_size;
+  if (max_input_size == 0) return false;
+  const uint8_t* next_in = in;
+  BrotliDecoderResult code;
+
+  while (1) {
+    size_t out_size;
+    size_t avail_out = kBufferSize;
+    uint8_t* next_out = temp_buffer.data();
+    code = BrotliDecoderDecompressStream(s.get(), &avail_in, &next_in,
+                                         &avail_out, &next_out, nullptr);
+    out_size = next_out - temp_buffer.data();
+    out->resize(out->size() + out_size);
+    if (out->size() > max_output_size)
+      return PIK_FAILURE("Brotli output too large");
+    memcpy(out->data() + out->size() - out_size, temp_buffer.data(), out_size);
+    if (code != BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) break;
+  }
+  if (code != BROTLI_DECODER_RESULT_SUCCESS)
+    return PIK_FAILURE("Brotli decompression failed");
+  *bytes_read += (max_input_size - avail_in);
+  return true;
+}
+
+Status BrotliDecompress(const PaddedBytes& in, size_t max_output_size,
+                        size_t* PIK_RESTRICT bytes_read,
+                        PaddedBytes* PIK_RESTRICT out) {
+  return BrotliDecompress(in.data(), in.size(), max_output_size, bytes_read,
+                          out);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/brotli.h b/codec/L2/demos/pikEnc/host/pik/brotli.h
new file mode 100755
index 0000000000..caa6cb6848
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/brotli.h
@@ -0,0 +1,41 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_BROTLI_H_
+#define PIK_BROTLI_H_
+
+// Convenience functions for Brotli compression/decompression.
+
+#include <stddef.h>
+#include <stdint.h>
+#include "pik/compiler_specific.h"
+#include "pik/padded_bytes.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Appends to out.
+Status BrotliCompress(int quality, const PaddedBytes& in,
+                      PaddedBytes* PIK_RESTRICT out);
+
+// Appends to out + *total_out_size.
+Status BrotliCompress(int quality, const uint8_t* in, const size_t in_size,
+                      uint8_t* PIK_RESTRICT out,
+                      size_t* PIK_RESTRICT total_out_size);
+
+// Appends to out and ADDS to "bytes_read", which must be pre-initialized.
+Status BrotliDecompress(const uint8_t* in, size_t max_input_size,
+                        size_t max_output_size, size_t* PIK_RESTRICT bytes_read,
+                        PaddedBytes* PIK_RESTRICT out);
+
+// Appends to out and ADDS to "bytes_read", which must be pre-initialized.
+Status BrotliDecompress(const PaddedBytes& in, size_t max_output_size,
+                        size_t* PIK_RESTRICT bytes_read,
+                        PaddedBytes* PIK_RESTRICT out);
+
+}  // namespace pik
+
+#endif  // PIK_BROTLI_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/butteraugli/butteraugli.cc b/codec/L2/demos/pikEnc/host/pik/butteraugli/butteraugli.cc
new file mode 100755
index 0000000000..16a9debd64
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/butteraugli/butteraugli.cc
@@ -0,0 +1,2001 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+//
+// The physical architecture of butteraugli is based on the following naming
+// convention:
+//   * Opsin - dynamics of the photosensitive chemicals in the retina
+//             with their immediate electrical processing
+//   * Xyb - hybrid opponent/trichromatic color space
+//     x is roughly red-subtract-green.
+//     y is yellow.
+//     b is blue.
+//     Xyb values are computed from Opsin mixing, not directly from rgb.
+//   * Mask - for visual masking
+//   * Hf - color modeling for spatially high-frequency features
+//   * Lf - color modeling for spatially low-frequency features
+//   * Diffmap - to cluster and build an image of error between the images
+//   * Blur - to hold the smoothing code
+
+#include "pik/butteraugli/butteraugli.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+
+#define BUTTERAUGLI_RESTRICT PIK_RESTRICT
+
+#ifndef PROFILER_ENABLED
+#define PROFILER_ENABLED 0
+#endif
+#if PROFILER_ENABLED
+#include "pik/profiler.h"
+#else
+#define PROFILER_FUNC
+#define PROFILER_ZONE(name)
+#endif
+
+namespace pik {
+namespace butteraugli {
+
+static inline bool IsNan(const float x) {
+  uint32_t bits;
+  memcpy(&bits, &x, sizeof(bits));
+  const uint32_t bitmask_exp = 0x7F800000;
+  return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF);
+}
+
+static inline bool IsNan(const double x) {
+  uint64_t bits;
+  memcpy(&bits, &x, sizeof(bits));
+  return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) ||
+         (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL);
+}
+
+static inline void CheckImage(const ImageF& image, const char* name) {
+  PROFILER_FUNC;
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row = image.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      if (IsNan(row[x])) {
+        printf("NAN: Image %s @ %zu,%zu (of %zu,%zu)\n", name, x, y,
+               image.xsize(), image.ysize());
+        exit(1);
+      }
+    }
+  }
+}
+
+#if BUTTERAUGLI_ENABLE_CHECKS
+
+#define CHECK_NAN(x, str)                \
+  do {                                   \
+    if (IsNan(x)) {                      \
+      printf("%d: %s\n", __LINE__, str); \
+      abort();                           \
+    }                                    \
+  } while (0)
+
+#define CHECK_IMAGE(image, name) CheckImage(image, name)
+
+#else
+
+#define CHECK_NAN(x, str)
+#define CHECK_IMAGE(image, name)
+
+#endif
+
+// Calculate a 2x2 subsampled image for purposes of recursive butteraugli at
+// multiresolution.
+static Image3F SubSample2x(const Image3F &in) {
+  int xs = (in.xsize() + 1) / 2;
+  int ys = (in.ysize() + 1) / 2;
+  Image3F retval(xs, ys);
+  for (int c = 0; c < 3; ++c) {
+    for (int y = 0; y < ys; ++y) {
+      for (int x = 0; x < xs; ++x) {
+        retval.PlaneRow(c, y)[x] = 0;
+      }
+    }
+  }
+  for (int c = 0; c < 3; ++c) {
+    for (int y = 0; y < in.ysize(); ++y) {
+      for (int x = 0; x < in.xsize(); ++x) {
+        retval.PlaneRow(c, y / 2)[x / 2] += 0.25 * in.PlaneRow(c, y)[x];
+      }
+    }
+    if ((in.xsize() & 1) != 0) {
+      for (int y = 0; y < retval.ysize(); ++y) {
+        int last_column = retval.xsize() - 1;
+        retval.PlaneRow(c, y)[last_column] *= 2.0;
+      }
+    }
+    if ((in.ysize() & 1) != 0) {
+      for (int x = 0; x < retval.xsize(); ++x) {
+        int last_row = retval.ysize() - 1;
+        retval.PlaneRow(c, last_row)[x] *= 2.0;
+      }
+    }
+  }
+  return retval;
+}
+
+// Supersample src by 2x and add it to dest.
+static void AddSupersampled2x(const ImageF &src, float w, ImageF &dest) {
+  for (int y = 0; y < dest.ysize(); ++y) {
+    for (int x = 0; x < dest.xsize(); ++x) {
+      // There will be less errors from the more averaged images.
+      // We take it into account to some extent using a scaler.
+      static const double kHeuristicMixingValue = 0.3;
+      dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w;
+      dest.Row(y)[x] += w * src.Row(y / 2)[x / 2];
+    }
+  }
+}
+
+
+// Purpose of kInternalGoodQualityThreshold:
+// Normalize 'ok' image degradation to 1.0 across different versions of
+// butteraugli.
+static const double kInternalGoodQualityThreshold = 33.23754765778804;
+static const double kGlobalScale = 1.0 / kInternalGoodQualityThreshold;
+
+inline float DotProduct(const float u[3], const float v[3]) {
+  return u[0] * v[0] + u[1] * v[1] + u[2] * v[2];
+}
+
+std::vector<float> ComputeKernel(float sigma) {
+  const float m = 2.25;  // Accuracy increases when m is increased.
+  const float scaler = -1.0 / (2 * sigma * sigma);
+  const int diff = std::max<int>(1, m * fabs(sigma));
+  std::vector<float> kernel(2 * diff + 1);
+  for (int i = -diff; i <= diff; ++i) {
+    kernel[i + diff] = exp(scaler * i * i);
+  }
+  return kernel;
+}
+
+void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel,
+                          const float weight_no_border,
+                          const float border_ratio, const size_t x,
+                          float* BUTTERAUGLI_RESTRICT row_out) {
+  const int offset = kernel.size() / 2;
+  int minx = x < offset ? 0 : x - offset;
+  int maxx = std::min<int>(in.xsize() - 1, x + offset);
+  float weight = 0.0f;
+  for (int j = minx; j <= maxx; ++j) {
+    weight += kernel[j - x + offset];
+  }
+  // Interpolate linearly between the no-border scaling and border scaling.
+  weight = (1.0f - border_ratio) * weight + border_ratio * weight_no_border;
+  float scale = 1.0f / weight;
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y);
+    float sum = 0.0f;
+    for (int j = minx; j <= maxx; ++j) {
+      sum += row_in[j] * kernel[j - x + offset];
+    }
+    row_out[y] = sum * scale;
+  }
+}
+
+// Computes a horizontal convolution and transposes the result.
+void Convolution(const ImageF& in, const std::vector<float>& kernel,
+                 const float border_ratio, ImageF* PIK_RESTRICT out) {
+  PROFILER_FUNC;
+  PIK_CHECK(out->xsize() == in.ysize());
+  PIK_CHECK(out->ysize() == in.xsize());
+  const int len = kernel.size();
+  const int offset = len / 2;
+  float weight_no_border = 0.0f;
+  for (int j = 0; j < len; ++j) {
+    weight_no_border += kernel[j];
+  }
+  const float scale_no_border = 1.0f / weight_no_border;
+  const int border1 = in.xsize() <= offset ? in.xsize() : offset;
+  const int border2 = in.xsize() - offset;
+  float* BUTTERAUGLI_RESTRICT scaled_kernel =
+      (float*)malloc((len / 2 + 1) * sizeof(float));
+  for (int i = 0; i <= len / 2; ++i) {
+    scaled_kernel[i] = kernel[i] * scale_no_border;
+  }
+  // left border
+  for (int x = 0; x < border1; ++x) {
+    ConvolveBorderColumn(in, kernel, weight_no_border, border_ratio, x,
+                         out->Row(x));
+  }
+  // middle
+  switch (len) {
+#if 1  // speed-optimized version
+    case 5: {
+      const float sk0 = scaled_kernel[0];
+      const float sk1 = scaled_kernel[1];
+      const float sk2 = scaled_kernel[2];
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (int x = border1; x < border2; ++x, ++row_in) {
+          float sum = (row_in[0] + row_in[4]) * sk0;
+          sum += (row_in[1] + row_in[3]) * sk1;
+          sum += (row_in[2]) * sk2;
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum;
+        }
+      }
+    } break;
+    case 9: {
+      const float sk0 = scaled_kernel[0];
+      const float sk1 = scaled_kernel[1];
+      const float sk2 = scaled_kernel[2];
+      const float sk3 = scaled_kernel[3];
+      const float sk4 = scaled_kernel[4];
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (int x = border1; x < border2; ++x, ++row_in) {
+          float sum = (row_in[0] + row_in[8]) * sk0;
+          sum += (row_in[1] + row_in[7]) * sk1;
+          sum += (row_in[2] + row_in[6]) * sk2;
+          sum += (row_in[3] + row_in[5]) * sk3;
+          sum += (row_in[4]) * sk4;
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum;
+        }
+      }
+    } break;
+    case 17:
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (int x = border1; x < border2; ++x, ++row_in) {
+          float sum = (row_in[0] + row_in[16]) * scaled_kernel[0];
+          sum += (row_in[1] + row_in[15]) * scaled_kernel[1];
+          sum += (row_in[2] + row_in[14]) * scaled_kernel[2];
+          sum += (row_in[3] + row_in[13]) * scaled_kernel[3];
+          sum += (row_in[4] + row_in[12]) * scaled_kernel[4];
+          sum += (row_in[5] + row_in[11]) * scaled_kernel[5];
+          sum += (row_in[6] + row_in[10]) * scaled_kernel[6];
+          sum += (row_in[7] + row_in[9]) * scaled_kernel[7];
+          sum += (row_in[8]) * scaled_kernel[8];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum;
+        }
+      }
+      break;
+    case 33:
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (int x = border1; x < border2; ++x, ++row_in) {
+          float sum = (row_in[0] + row_in[32]) * scaled_kernel[0];
+          sum += (row_in[1] + row_in[31]) * scaled_kernel[1];
+          sum += (row_in[2] + row_in[30]) * scaled_kernel[2];
+          sum += (row_in[3] + row_in[29]) * scaled_kernel[3];
+          sum += (row_in[4] + row_in[28]) * scaled_kernel[4];
+          sum += (row_in[5] + row_in[27]) * scaled_kernel[5];
+          sum += (row_in[6] + row_in[26]) * scaled_kernel[6];
+          sum += (row_in[7] + row_in[25]) * scaled_kernel[7];
+          sum += (row_in[8] + row_in[24]) * scaled_kernel[8];
+          sum += (row_in[9] + row_in[23]) * scaled_kernel[9];
+          sum += (row_in[10] + row_in[22]) * scaled_kernel[10];
+          sum += (row_in[11] + row_in[21]) * scaled_kernel[11];
+          sum += (row_in[12] + row_in[20]) * scaled_kernel[12];
+          sum += (row_in[13] + row_in[19]) * scaled_kernel[13];
+          sum += (row_in[14] + row_in[18]) * scaled_kernel[14];
+          sum += (row_in[15] + row_in[17]) * scaled_kernel[15];
+          sum += (row_in[16]) * scaled_kernel[16];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum;
+        }
+      }
+      break;
+    case 11:
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (int x = border1; x < border2; ++x, ++row_in) {
+          float sum = (row_in[0] + row_in[10]) * scaled_kernel[0];
+          sum += (row_in[1] + row_in[9]) * scaled_kernel[1];
+          sum += (row_in[2] + row_in[8]) * scaled_kernel[2];
+          sum += (row_in[3] + row_in[7]) * scaled_kernel[3];
+          sum += (row_in[4] + row_in[6]) * scaled_kernel[4];
+          sum += (row_in[5]) * scaled_kernel[5];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum;
+        }
+      }
+      break;
+    case 41:
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (int x = border1; x < border2; ++x, ++row_in) {
+          float sum = (row_in[0] + row_in[40]) * scaled_kernel[0];
+          sum += (row_in[1] + row_in[39]) * scaled_kernel[1];
+          sum += (row_in[2] + row_in[38]) * scaled_kernel[2];
+          sum += (row_in[3] + row_in[37]) * scaled_kernel[3];
+          sum += (row_in[4] + row_in[36]) * scaled_kernel[4];
+          sum += (row_in[5] + row_in[35]) * scaled_kernel[5];
+          sum += (row_in[6] + row_in[34]) * scaled_kernel[6];
+          sum += (row_in[7] + row_in[33]) * scaled_kernel[7];
+          sum += (row_in[8] + row_in[32]) * scaled_kernel[8];
+          sum += (row_in[9] + row_in[31]) * scaled_kernel[9];
+          sum += (row_in[10] + row_in[30]) * scaled_kernel[10];
+          sum += (row_in[11] + row_in[29]) * scaled_kernel[11];
+          sum += (row_in[12] + row_in[28]) * scaled_kernel[12];
+          sum += (row_in[13] + row_in[27]) * scaled_kernel[13];
+          sum += (row_in[14] + row_in[26]) * scaled_kernel[14];
+          sum += (row_in[15] + row_in[25]) * scaled_kernel[15];
+          sum += (row_in[16] + row_in[24]) * scaled_kernel[16];
+          sum += (row_in[17] + row_in[23]) * scaled_kernel[17];
+          sum += (row_in[18] + row_in[22]) * scaled_kernel[18];
+          sum += (row_in[19] + row_in[21]) * scaled_kernel[19];
+          sum += (row_in[20]) * scaled_kernel[20];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum;
+        }
+      }
+      break;
+    case 47:
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+        for (int x = border1; x < border2; ++x, ++row_in) {
+          float sum = (row_in[0] + row_in[46]) * scaled_kernel[0];
+          sum += (row_in[1] + row_in[45]) * scaled_kernel[1];
+          sum += (row_in[2] + row_in[44]) * scaled_kernel[2];
+          sum += (row_in[3] + row_in[43]) * scaled_kernel[3];
+          sum += (row_in[4] + row_in[42]) * scaled_kernel[4];
+          sum += (row_in[5] + row_in[41]) * scaled_kernel[5];
+          sum += (row_in[6] + row_in[40]) * scaled_kernel[6];
+          sum += (row_in[7] + row_in[39]) * scaled_kernel[7];
+          sum += (row_in[8] + row_in[38]) * scaled_kernel[8];
+          sum += (row_in[9] + row_in[37]) * scaled_kernel[9];
+          sum += (row_in[10] + row_in[36]) * scaled_kernel[10];
+          sum += (row_in[11] + row_in[35]) * scaled_kernel[11];
+          sum += (row_in[12] + row_in[34]) * scaled_kernel[12];
+          sum += (row_in[13] + row_in[33]) * scaled_kernel[13];
+          sum += (row_in[14] + row_in[32]) * scaled_kernel[14];
+          sum += (row_in[15] + row_in[31]) * scaled_kernel[15];
+          sum += (row_in[16] + row_in[30]) * scaled_kernel[16];
+          sum += (row_in[17] + row_in[29]) * scaled_kernel[17];
+          sum += (row_in[18] + row_in[28]) * scaled_kernel[18];
+          sum += (row_in[19] + row_in[27]) * scaled_kernel[19];
+          sum += (row_in[20] + row_in[26]) * scaled_kernel[20];
+          sum += (row_in[21] + row_in[25]) * scaled_kernel[21];
+          sum += (row_in[22] + row_in[24]) * scaled_kernel[22];
+          sum += (row_in[23]) * scaled_kernel[23];
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          row_out[y] = sum;
+        }
+      }
+      break;
+    default:
+      //      printf("Warning: Unexpected kernel size! %d\n", len);
+#else
+    default:
+#endif
+      for (size_t y = 0; y < in.ysize(); ++y) {
+        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y);
+        for (int j, x = border1; x < border2; ++x) {
+          const int d = x - offset;
+          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+          float sum = 0.0f;
+          for (j = 0; j <= len / 2; ++j) {
+            sum += row_in[d + j] * scaled_kernel[j];
+          }
+          for (; j < len; ++j) {
+            sum += row_in[d + j] * scaled_kernel[len - 1 - j];
+          }
+          row_out[y] = sum;
+        }
+      }
+  }
+  // right border
+  for (int x = border2; x < in.xsize(); ++x) {
+    ConvolveBorderColumn(in, kernel, weight_no_border, border_ratio, x,
+                         out->Row(x));
+  }
+  free(scaled_kernel);
+}
+
+// A blur somewhat similar to a 2D Gaussian blur.
+// See: https://en.wikipedia.org/wiki/Gaussian_blur
+void Blur(const ImageF& in, float sigma, float border_ratio,
+          ImageF* PIK_RESTRICT out) {
+  std::vector<float> kernel = ComputeKernel(sigma);
+  ImageF tmp(in.ysize(), in.xsize());
+  Convolution(in, kernel, border_ratio, &tmp);
+  Convolution(tmp, kernel, border_ratio, out);
+}
+
+// Clamping linear interpolator.
+inline double InterpolateClampNegative(const double* array, int size,
+                                       double ix) {
+  if (ix < 0) {
+    ix = 0;
+  }
+  int baseix = static_cast<int>(ix);
+  double res;
+  if (baseix >= size - 1) {
+    res = array[size - 1];
+  } else {
+    double mix = ix - baseix;
+    int nextix = baseix + 1;
+    res = array[baseix] + mix * (array[nextix] - array[baseix]);
+  }
+  return res;
+}
+
+double GammaMinArg() {
+  double out0, out1, out2;
+  OpsinAbsorbance(0.0, 0.0, 0.0, &out0, &out1, &out2);
+  return std::min(out0, std::min(out1, out2));
+}
+
+double GammaMaxArg() {
+  double out0, out1, out2;
+  OpsinAbsorbance(255.0, 255.0, 255.0, &out0, &out1, &out2);
+  return std::max(out0, std::max(out1, out2));
+}
+
+double SimpleGamma(double v) {
+  // A simple HDR compatible gamma function.
+  static const double kRetMul = 18.6580932135;
+  static const double kRetAdd = -20.2789020414;
+  static const double kVOffset = 7.14672470003;
+  if (v < 0) {
+    // This should happen rarely, but may lead to a NaN in log, which is
+    // undesirable. Since negative photons don't exist we solve the NaNs by
+    // clamping here.
+    v = 0;
+  }
+  return kRetMul * log(v + kVOffset) + kRetAdd;
+}
+
+static inline double Gamma(double v) {
+  // SimpleGamma must be used when using an intensity_target with butteraugli
+  // to get values above 255.0.
+  // GammaPolynomial is faster but may only be used if the maximum input value
+  // is 255.0.
+  // TODO(lode): allow pik to specify which to use depending on the intensity
+  //             target.
+  return SimpleGamma(v);
+  // return GammaPolynomial(v);
+}
+
+Image3F OpsinDynamicsImage(const Image3F& rgb) {
+  PROFILER_FUNC;
+  Image3F xyb(rgb.xsize(), rgb.ysize());
+  const double kSigma = 1.2;
+  Image3F blurred(rgb.xsize(), rgb.ysize());
+  Blur(rgb.Plane(0), kSigma, 0.0, const_cast<ImageF*>(&blurred.Plane(0)));
+  Blur(rgb.Plane(1), kSigma, 0.0, const_cast<ImageF*>(&blurred.Plane(1)));
+  Blur(rgb.Plane(2), kSigma, 0.0, const_cast<ImageF*>(&blurred.Plane(2)));
+  for (size_t y = 0; y < rgb.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_r = rgb.ConstPlaneRow(0, y);
+    const float* BUTTERAUGLI_RESTRICT row_g = rgb.ConstPlaneRow(1, y);
+    const float* BUTTERAUGLI_RESTRICT row_b = rgb.ConstPlaneRow(2, y);
+    const float* BUTTERAUGLI_RESTRICT row_blurred_r = blurred.ConstPlaneRow(0, y);
+    const float* BUTTERAUGLI_RESTRICT row_blurred_g = blurred.ConstPlaneRow(1, y);
+    const float* BUTTERAUGLI_RESTRICT row_blurred_b = blurred.ConstPlaneRow(2, y);
+    float* BUTTERAUGLI_RESTRICT row_out_x = xyb.PlaneRow(0, y);
+    float* BUTTERAUGLI_RESTRICT row_out_y = xyb.PlaneRow(1, y);
+    float* BUTTERAUGLI_RESTRICT row_out_b = xyb.PlaneRow(2, y);
+    for (size_t x = 0; x < rgb.xsize(); ++x) {
+      float sensitivity[3];
+      {
+        // Calculate sensitivity based on the smoothed image gamma derivative.
+        float pre_mixed0, pre_mixed1, pre_mixed2;
+        OpsinAbsorbance(row_blurred_r[x], row_blurred_g[x], row_blurred_b[x],
+                        &pre_mixed0, &pre_mixed1, &pre_mixed2);
+        // TODO(janwas): use new polynomial to compute Gamma(x)/x derivative.
+        sensitivity[0] = Gamma(pre_mixed0) / pre_mixed0;
+        sensitivity[1] = Gamma(pre_mixed1) / pre_mixed1;
+        sensitivity[2] = Gamma(pre_mixed2) / pre_mixed2;
+      }
+      float cur_mixed0, cur_mixed1, cur_mixed2;
+      OpsinAbsorbance(row_r[x], row_g[x], row_b[x], &cur_mixed0, &cur_mixed1,
+                      &cur_mixed2);
+      cur_mixed0 *= sensitivity[0];
+      cur_mixed1 *= sensitivity[1];
+      cur_mixed2 *= sensitivity[2];
+      RgbToXyb(cur_mixed0, cur_mixed1, cur_mixed2,
+               &row_out_x[x], &row_out_y[x], &row_out_b[x]);
+    }
+  }
+  return xyb;
+}
+
+// Make area around zero less important (remove it).
+static BUTTERAUGLI_INLINE float RemoveRangeAroundZero(float w, float x) {
+  return x > w ? x - w : x < -w ? x + w : 0.0f;
+}
+
+// Make area around zero more important (2x it until the limit).
+static BUTTERAUGLI_INLINE float AmplifyRangeAroundZero(float w, float x) {
+  return x > w ? x + w : x < -w ? x - w : 2.0f * x;
+}
+
+// XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space.
+// Vals space can be converted to L2-norm space (Euclidean and normalized)
+// through visual masking.
+template <class V>
+BUTTERAUGLI_INLINE void XybLowFreqToVals(const V& x, const V& y, const V& b_arg,
+                                         V* BUTTERAUGLI_RESTRICT valx,
+                                         V* BUTTERAUGLI_RESTRICT valy,
+                                         V* BUTTERAUGLI_RESTRICT valb) {
+  static const double xmuli = 9.72240181632;
+  static const double ymuli = 26.2028219456;
+  static const double bmuli = 9.31035596136;
+  static const double y_to_b_muli = -0.415568690394;
+
+  const V xmul(xmuli);
+  const V ymul(ymuli);
+  const V bmul(bmuli);
+  const V y_to_b_mul(y_to_b_muli);
+  const V b = b_arg + y_to_b_mul * y;
+  *valb = b * bmul;
+  *valx = x * xmul;
+  *valy = y * ymul;
+}
+
+static ImageF SuppressInBrightAreas(size_t xsize, size_t ysize, double mul,
+                                    double mul2, double reg, const ImageF& hf,
+                                    const ImageF& brightness) {
+  PROFILER_FUNC;
+  ImageF inew(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* rowhf = hf.Row(y);
+    const float* rowbr = brightness.Row(y);
+    float* rownew = inew.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      float v = rowhf[x];
+      float scaler = mul * reg / (reg + rowbr[x]);
+      rownew[x] = scaler * v;
+    }
+  }
+  return inew;
+}
+
+static float MaximumClamp(float v, float maxval) {
+  static const double kMul = 0.934914340314;
+  if (v >= maxval) {
+    v -= maxval;
+    v *= kMul;
+    v += maxval;
+  } else if (v < -maxval) {
+    v += maxval;
+    v *= kMul;
+    v -= maxval;
+  }
+  return v;
+}
+
+static ImageF MaximumClamping(size_t xsize, size_t ysize, const ImageF& ix,
+                              double yw) {
+  static const double kMul = 0.70036978414;
+  ImageF inew(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* rowx = ix.Row(y);
+    float* rownew = inew.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      double v = rowx[x];
+      if (v >= yw) {
+        v -= yw;
+        v *= kMul;
+        v += yw;
+      } else if (v < -yw) {
+        v += yw;
+        v *= kMul;
+        v -= yw;
+      }
+      rownew[x] = v;
+    }
+  }
+  return inew;
+}
+
+static ImageF SuppressXByY(size_t xsize, size_t ysize, const ImageF& ix,
+                           const ImageF& iy, const double yw) {
+  static const double s = 0.941388349694;
+  ImageF inew(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* rowx = ix.Row(y);
+    const float* rowy = iy.Row(y);
+    float* rownew = inew.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      const double xval = rowx[x];
+      const double yval = rowy[x];
+      const double scaler = s + (yw * (1.0 - s)) / (yw + yval * yval);
+      rownew[x] = scaler * xval;
+    }
+  }
+  return inew;
+}
+
+static void SeparateFrequencies(size_t xsize, size_t ysize,
+                                const Image3F& xyb,
+                                PsychoImage& ps) {
+  PROFILER_FUNC;
+  // Extract lf ...
+  static const double kSigmaLf = 7.15593339443;
+  static const double kSigmaHf = 3.22489901262;
+  static const double kSigmaUhf = 1.56416327805;
+  // Border handling is complicated.
+  static const double border_lf = 0.0;
+  static const double border_mf = 0.0;
+  static const double border_hf = 0.0;
+  ps.mf = Image3F(xsize, ysize);
+  ps.hf[0] = ImageF(xsize, ysize);
+  ps.hf[1] = ImageF(xsize, ysize);
+  ps.lf = Image3F(xyb.xsize(), xyb.ysize());
+  ps.mf = Image3F(xyb.xsize(), xyb.ysize());
+  for (int i = 0; i < 3; ++i) {
+    Blur(xyb.Plane(i), kSigmaLf, border_lf,
+         const_cast<ImageF*>(&ps.lf.Plane(i)));
+
+    // ... and keep everything else in mf.
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        ps.mf.PlaneRow(i, y)[x] =
+            xyb.PlaneRow(i, y)[x] - ps.lf.ConstPlaneRow(i, y)[x];
+      }
+    }
+    if (i == 2) {
+      Blur(ps.mf.Plane(i), kSigmaHf, border_mf,
+           const_cast<ImageF*>(&ps.mf.Plane(i)));
+      break;
+    }
+    // Divide mf into mf and hf.
+    for (size_t y = 0; y < ysize; ++y) {
+      float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y);
+      float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_hf[x] = row_mf[x];
+      }
+    }
+    Blur(ps.mf.Plane(i), kSigmaHf, border_mf,
+         const_cast<ImageF*>(&ps.mf.Plane(i)));
+    static const double kRemoveMfRange = 0.3;
+    static const double kAddMfRange = 0.1;
+    if (i == 0) {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(0, y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y);
+        for (size_t x = 0; x < xsize; ++x) {
+          row_hf[x] -= row_mf[x];
+          row_mf[x] = RemoveRangeAroundZero(kRemoveMfRange, row_mf[x]);
+        }
+      }
+    } else {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(1, y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y);
+        for (size_t x = 0; x < xsize; ++x) {
+          row_hf[x] -= row_mf[x];
+          row_mf[x] = AmplifyRangeAroundZero(kAddMfRange, row_mf[x]);
+        }
+      }
+    }
+  }
+  // Suppress red-green by intensity change in the high freq channels.
+  static const double suppress = 286.09942757;
+  ps.hf[0] = SuppressXByY(xsize, ysize, ps.hf[0], ps.hf[1], suppress);
+
+  ps.uhf[0] = ImageF(xsize, ysize);
+  ps.uhf[1] = ImageF(xsize, ysize);
+  for (int i = 0; i < 2; ++i) {
+    // Divide hf into hf and uhf.
+    for (size_t y = 0; y < ysize; ++y) {
+      float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[i].Row(y);
+      float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_uhf[x] = row_hf[x];
+      }
+    }
+    Blur(ps.hf[i], kSigmaUhf, border_hf, &ps.hf[i]);
+    static const double kRemoveHfRange = 0.12;
+    static const double kAddHfRange = 0.03;
+    static const double kRemoveUhfRange = 0.08;
+    static const double kAddUhfRange = 0.02;
+    static const double kMaxclampHf = 78.7416747972;
+    static const double kMaxclampUhf = 4.62878535439;
+    static double kMulYHf = 1.16155986803;
+    static double kMulYUhf = 2.32552960949;
+    if (i == 0) {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[0].Row(y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y);
+        for (size_t x = 0; x < xsize; ++x) {
+          row_uhf[x] -= row_hf[x];
+          row_hf[x] = RemoveRangeAroundZero(kRemoveHfRange, row_hf[x]);
+          row_uhf[x] = RemoveRangeAroundZero(kRemoveUhfRange, row_uhf[x]);
+        }
+      }
+    } else {
+      for (size_t y = 0; y < ysize; ++y) {
+        float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[1].Row(y);
+        float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y);
+        for (size_t x = 0; x < xsize; ++x) {
+          row_uhf[x] -= row_hf[x];
+          row_hf[x] = MaximumClamp(row_hf[x], kMaxclampHf);
+          row_uhf[x] = MaximumClamp(row_uhf[x], kMaxclampUhf);
+          row_uhf[x] *= kMulYUhf;
+          row_hf[x] *= kMulYHf;
+          row_hf[x] = AmplifyRangeAroundZero(kAddHfRange, row_hf[x]);
+          row_uhf[x] = AmplifyRangeAroundZero(kAddUhfRange, row_uhf[x]);
+        }
+      }
+    }
+  }
+  // Modify range around zero code only concerns the high frequency
+  // planes and only the X and Y channels.
+  // Convert low freq xyb to vals space so that we can do a simple squared sum
+  // diff on the low frequencies later.
+  for (size_t y = 0; y < ysize; ++y) {
+    float* BUTTERAUGLI_RESTRICT row_x = ps.lf.PlaneRow(0, y);
+    float* BUTTERAUGLI_RESTRICT row_y = ps.lf.PlaneRow(1, y);
+    float* BUTTERAUGLI_RESTRICT row_b = ps.lf.PlaneRow(2, y);
+    for (size_t x = 0; x < xsize; ++x) {
+      float valx, valy, valb;
+      XybLowFreqToVals(row_x[x], row_y[x], row_b[x], &valx, &valy, &valb);
+      row_x[x] = valx;
+      row_y[x] = valy;
+      row_b[x] = valb;
+    }
+  }
+}
+
+static void L2Diff(const ImageF& i0, const ImageF& i1, const float w,
+                   Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+  if (w == 0) {
+    return;
+  }
+  for (size_t y = 0; y < i0.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y);
+    const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y);
+    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+    for (size_t x = 0; x < i0.xsize(); ++x) {
+      const float diff = row0[x] - row1[x];
+      row_diff[x] += w * diff * diff;
+    }
+  }
+}
+
+// i0 is the original image.
+// i1 is the deformed copy.
+static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, double w_0gt1,
+                             double w_0lt1,
+                             Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+  if (w_0gt1 == 0 && w_0lt1 == 0) {
+    return;
+  }
+  w_0gt1 *= 0.8;
+  w_0lt1 *= 0.8;
+  for (size_t y = 0; y < i0.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y);
+    const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y);
+    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+    for (size_t x = 0; x < i0.xsize(); ++x) {
+      // Primary symmetric quadratic objective.
+      double diff = row0[x] - row1[x];
+      row_diff[x] += w_0gt1 * diff * diff;
+
+      // Secondary half-open quadratic objectives.
+      const double fabs0 = fabs(row0[x]);
+      const double too_small = 0.4 * fabs0;
+      const double too_big = 1.0 * fabs0;
+
+      if (row0[x] < 0) {
+        if (row1[x] > -too_small) {
+          double v = row1[x] + too_small;
+          row_diff[x] += w_0lt1 * v * v;
+        } else if (row1[x] < -too_big) {
+          double v = -row1[x] - too_big;
+          row_diff[x] += w_0lt1 * v * v;
+        }
+      } else {
+        if (row1[x] < too_small) {
+          double v = too_small - row1[x];
+          row_diff[x] += w_0lt1 * v * v;
+        } else if (row1[x] > too_big) {
+          double v = row1[x] - too_big;
+          row_diff[x] += w_0lt1 * v * v;
+        }
+      }
+    }
+  }
+}
+
+ImageF CalculateDiffmap(const ImageF& diffmap_in) {
+  PROFILER_FUNC;
+  // Take square root.
+  ImageF diffmap(diffmap_in.xsize(), diffmap_in.ysize());
+  static const float kInitialSlope = 100.0f;
+  for (size_t y = 0; y < diffmap.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_in = diffmap_in.Row(y);
+    float* BUTTERAUGLI_RESTRICT row_out = diffmap.Row(y);
+    for (size_t x = 0; x < diffmap.xsize(); ++x) {
+      const float orig_val = row_in[x];
+      // TODO(b/29974893): Until that is fixed do not call sqrt on very small
+      // numbers.
+      row_out[x] = (orig_val < (1.0f / (kInitialSlope * kInitialSlope))
+                        ? kInitialSlope * orig_val
+                        : std::sqrt(orig_val));
+    }
+  }
+  return diffmap;
+}
+
+void MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1,
+                     const size_t xsize, const size_t ysize,
+                     Image3F* BUTTERAUGLI_RESTRICT mask,
+                     Image3F* BUTTERAUGLI_RESTRICT mask_dc,
+                     ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
+  Image3F mask_xyb0(xsize, ysize);
+  Image3F mask_xyb1(xsize, ysize);
+  static const double muls[4] = {
+    0.0,
+    0.0632641915861,
+    0.308212951541,
+    1.16513324377,
+  };
+  for (int i = 0; i < 2; ++i) {
+    double a = muls[2 * i];
+    double b = muls[2 * i + 1];
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* BUTTERAUGLI_RESTRICT row_hf0 = pi0.hf[i].Row(y);
+      const float* BUTTERAUGLI_RESTRICT row_hf1 = pi1.hf[i].Row(y);
+      const float* BUTTERAUGLI_RESTRICT row_uhf0 = pi0.uhf[i].Row(y);
+      const float* BUTTERAUGLI_RESTRICT row_uhf1 = pi1.uhf[i].Row(y);
+      float* BUTTERAUGLI_RESTRICT row0 = mask_xyb0.PlaneRow(i, y);
+      float* BUTTERAUGLI_RESTRICT row1 = mask_xyb1.PlaneRow(i, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row0[x] = a * row_uhf0[x] + b * row_hf0[x];
+        row1[x] = a * row_uhf1[x] + b * row_hf1[x];
+      }
+    }
+  }
+  Mask(mask_xyb0, mask_xyb1, mask, mask_dc, diff_ac);
+}
+
+ButteraugliComparator::ButteraugliComparator(const Image3F& rgb0,
+                                             double hf_asymmetry)
+    : xsize_(rgb0.xsize()),
+      ysize_(rgb0.ysize()),
+      hf_asymmetry_(hf_asymmetry),
+      sub_(nullptr) {
+  if (xsize_ < 8 || ysize_ < 8) {
+    return;
+  }
+  Image3F xyb0 = OpsinDynamicsImage(rgb0);
+  SeparateFrequencies(xsize_, ysize_, xyb0, pi0_);
+
+  // Awful recursive construction of samples of different resolution.
+  // This is an after-thought and possibly somewhat parallel in
+  // functionality with the PsychoImage multi-resolution approach.
+  sub_ = new ButteraugliComparator(SubSample2x(rgb0), hf_asymmetry);
+}
+
+ButteraugliComparator::~ButteraugliComparator() {
+  delete sub_;
+}
+
+
+void ButteraugliComparator::Mask(Image3F* BUTTERAUGLI_RESTRICT mask,
+                                 Image3F* BUTTERAUGLI_RESTRICT mask_dc) const {
+  MaskPsychoImage(pi0_, pi0_, xsize_, ysize_, mask, mask_dc, nullptr);
+}
+
+void ButteraugliComparator::Diffmap(const Image3F& rgb1, ImageF& result) const {
+  PROFILER_FUNC;
+  if (xsize_ < 8 || ysize_ < 8) {
+    return;
+  }
+  DiffmapOpsinDynamicsImage(OpsinDynamicsImage(rgb1), result);
+  if (sub_) {
+    if (sub_->xsize_ < 8 || sub_->ysize_ < 8) {
+      return;
+    }
+    ImageF subresult;
+    sub_->DiffmapOpsinDynamicsImage(
+        OpsinDynamicsImage(SubSample2x(rgb1)), subresult);
+    AddSupersampled2x(subresult, 0.5, result);
+  }
+}
+
+void ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1,
+                                                      ImageF& result) const {
+  PROFILER_FUNC;
+  if (xsize_ < 8 || ysize_ < 8) {
+    return;
+  }
+  PsychoImage pi1;
+  SeparateFrequencies(xsize_, ysize_, xyb1, pi1);
+  result = ImageF(xsize_, ysize_);
+  DiffmapPsychoImage(pi1, result);
+}
+
+void ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1,
+                                               ImageF& result) const {
+  PROFILER_FUNC;
+  if (xsize_ < 8 || ysize_ < 8) {
+    return;
+  }
+  Image3F block_diff_dc(xsize_, ysize_);
+  ZeroFillImage(&block_diff_dc);
+  Image3F block_diff_ac(xsize_, ysize_);
+  ZeroFillImage(&block_diff_ac);
+  static const double wUhfMalta = 6.03816489582;
+  static const double norm1Uhf = 59.752242104;
+  MaltaDiffMap(pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_,
+               wUhfMalta / hf_asymmetry_, norm1Uhf, &block_diff_ac, 1);
+
+  static const double wUhfMaltaX = 22;
+  static const double norm1UhfX = 68.7200152101;
+  MaltaDiffMap(pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_,
+               wUhfMaltaX / hf_asymmetry_, norm1UhfX, &block_diff_ac, 0);
+
+  static const double wHfMalta = 127.682120866;
+  static const double norm1Hf = 113.151889155;
+  MaltaDiffMapLF(pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_),
+                 wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &block_diff_ac,
+                 1);
+
+  static const double wHfMaltaX = 32.298692385;
+  static const double norm1HfX = 0.970464895425;
+  MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], wHfMaltaX * std::sqrt(hf_asymmetry_),
+                 wHfMaltaX / std::sqrt(hf_asymmetry_), norm1HfX, &block_diff_ac,
+                 0);
+
+  static const double wMfMalta = 31.5919393824;
+  static const double norm1Mf = 0.72477821106;
+  MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), wMfMalta, wMfMalta, norm1Mf,
+                 &block_diff_ac, 1);
+
+  static const double wMfMaltaX = 800.0;
+  static const double norm1MfX = 1000;
+  MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), wMfMaltaX, wMfMaltaX,
+                 norm1MfX, &block_diff_ac, 0);
+
+  static const double wmul[9] = {
+    32, 5.0, 0, 1102.34533394, 100, 100, 1.01, 1, 1.745,
+  };
+  for (int c = 0; c < 3; ++c) {
+    if (c < 2) {  // No blue channel error accumulated at HF.
+      L2DiffAsymmetric(pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_,
+                       wmul[c] / hf_asymmetry_, &block_diff_ac, c);
+    }
+    L2Diff(pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac, c);
+    L2Diff(pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc, c);
+  }
+
+  Image3F mask_xyb;
+  Image3F mask_xyb_dc;
+  MaskPsychoImage(pi0_, pi1, xsize_, ysize_, &mask_xyb, &mask_xyb_dc,
+                  const_cast<ImageF*>(&block_diff_ac.Plane(1)));
+
+  result = CalculateDiffmap(
+      CombineChannels(mask_xyb, mask_xyb_dc, block_diff_dc, block_diff_ac));
+}
+
+// Allows PaddedMaltaUnit to call either function via overloading.
+struct MaltaTagLF {};
+struct MaltaTag {};
+
+static float MaltaUnit(MaltaTagLF, const float* BUTTERAUGLI_RESTRICT d,
+                       const int xs) {
+  const int xs3 = 3 * xs;
+  float retval = 0;
+  {
+    // x grows, y constant
+    float sum = d[-4] + d[-2] + d[0] + d[2] + d[4];
+    retval += sum * sum;
+  }
+  {
+    // y grows, x constant
+    float sum = d[-xs3 - xs] + d[-xs - xs] + d[0] + d[xs + xs] + d[xs3 + xs];
+    retval += sum * sum;
+  }
+  {
+    // both grow
+    float sum =
+        d[-xs3 - 3] + d[-xs - xs - 2] + d[0] + d[xs + xs + 2] + d[xs3 + 3];
+    retval += sum * sum;
+  }
+  {
+    // y grows, x shrinks
+    float sum =
+        d[-xs3 + 3] + d[-xs - xs + 2] + d[0] + d[xs + xs - 2] + d[xs3 - 3];
+    retval += sum * sum;
+  }
+  {
+    // y grows -4 to 4, x shrinks 1 -> -1
+    float sum = d[-xs3 - xs + 1] + d[-xs - xs + 1] + d[0] + d[xs + xs - 1] +
+                d[xs3 + xs - 1];
+    retval += sum * sum;
+  }
+  {
+    //  y grows -4 to 4, x grows -1 -> 1
+    float sum = d[-xs3 - xs - 1] + d[-xs - xs - 1] + d[0] + d[xs + xs + 1] +
+                d[xs3 + xs + 1];
+    retval += sum * sum;
+  }
+  {
+    // x grows -4 to 4, y grows -1 to 1
+    float sum = d[-4 - xs] + d[-2 - xs] + d[0] + d[2 + xs] + d[4 + xs];
+    retval += sum * sum;
+  }
+  {
+    // x grows -4 to 4, y shrinks 1 to -1
+    float sum = d[-4 + xs] + d[-2 + xs] + d[0] + d[2 - xs] + d[4 - xs];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1__*______
+       2___*_____
+       3_________
+       4____0____
+       5_________
+       6_____*___
+       7______*__
+       8_________ */
+    float sum =
+        d[-xs3 - 2] + d[-xs - xs - 1] + d[0] + d[xs + xs + 1] + d[xs3 + 2];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1______*__
+       2_____*___
+       3_________
+       4____0____
+       5_________
+       6___*_____
+       7__*______
+       8_________ */
+    float sum =
+        d[-xs3 + 2] + d[-xs - xs + 1] + d[0] + d[xs + xs - 1] + d[xs3 - 2];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1_________
+       2_*_______
+       3__*______
+       4____0____
+       5______*__
+       6_______*_
+       7_________
+       8_________ */
+    float sum =
+        d[-xs - xs - 3] + d[-xs - 2] + d[0] + d[xs + 2] + d[xs + xs + 3];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1_________
+       2_______*_
+       3______*__
+       4____0____
+       5__*______
+       6_*_______
+       7_________
+       8_________ */
+    float sum =
+        d[-xs - xs + 3] + d[-xs + 2] + d[0] + d[xs - 2] + d[xs + xs - 3];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1_________
+       2________*
+       3______*__
+       4____0____
+       5__*______
+       6*________
+       7_________
+       8_________ */
+
+    float sum =
+        d[xs + xs - 4] + d[xs - 2] + d[0] + d[-xs + 2] + d[-xs - xs + 4];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1_________
+       2*________
+       3__*______
+       4____0____
+       5______*__
+       6________*
+       7_________
+       8_________ */
+    float sum =
+        d[-xs - xs - 4] + d[-xs - 2] + d[0] + d[xs + 2] + d[xs + xs + 4];
+    retval += sum * sum;
+  }
+  {
+    /* 0__*______
+       1_________
+       2___*_____
+       3_________
+       4____0____
+       5_________
+       6_____*___
+       7_________
+       8______*__ */
+    float sum = d[-xs3 - xs - 2] + d[-xs - xs - 1] + d[0] + d[xs + xs + 1] +
+                d[xs3 + xs + 2];
+    retval += sum * sum;
+  }
+  {
+    /* 0______*__
+       1_________
+       2_____*___
+       3_________
+       4____0____
+       5_________
+       6___*_____
+       7_________
+       8__*______ */
+    float sum = d[-xs3 - xs + 2] + d[-xs - xs + 1] + d[0] + d[xs + xs - 1] +
+                d[xs3 + xs - 2];
+    retval += sum * sum;
+  }
+  return retval;
+}
+
+static float MaltaUnit(MaltaTag, const float* BUTTERAUGLI_RESTRICT d,
+                       const int xs) {
+  const int xs3 = 3 * xs;
+  float retval = 0;
+  {
+    // x grows, y constant
+    float sum =
+        d[-4] + d[-3] + d[-2] + d[-1] + d[0] + d[1] + d[2] + d[3] + d[4];
+    retval += sum * sum;
+  }
+  {
+    // y grows, x constant
+    float sum = d[-xs3 - xs] + d[-xs3] + d[-xs - xs] + d[-xs] + d[0] + d[xs] +
+                d[xs + xs] + d[xs3] + d[xs3 + xs];
+    retval += sum * sum;
+  }
+  {
+    // both grow
+    float sum = d[-xs3 - 3] + d[-xs - xs - 2] + d[-xs - 1] + d[0] + d[xs + 1] +
+                d[xs + xs + 2] + d[xs3 + 3];
+    retval += sum * sum;
+  }
+  {
+    // y grows, x shrinks
+    float sum = d[-xs3 + 3] + d[-xs - xs + 2] + d[-xs + 1] + d[0] + d[xs - 1] +
+                d[xs + xs - 2] + d[xs3 - 3];
+    retval += sum * sum;
+  }
+  {
+    // y grows -4 to 4, x shrinks 1 -> -1
+    float sum = d[-xs3 - xs + 1] + d[-xs3 + 1] + d[-xs - xs + 1] + d[-xs] +
+                d[0] + d[xs] + d[xs + xs - 1] + d[xs3 - 1] + d[xs3 + xs - 1];
+    retval += sum * sum;
+  }
+  {
+    //  y grows -4 to 4, x grows -1 -> 1
+    float sum = d[-xs3 - xs - 1] + d[-xs3 - 1] + d[-xs - xs - 1] + d[-xs] +
+                d[0] + d[xs] + d[xs + xs + 1] + d[xs3 + 1] + d[xs3 + xs + 1];
+    retval += sum * sum;
+  }
+  {
+    // x grows -4 to 4, y grows -1 to 1
+    float sum = d[-4 - xs] + d[-3 - xs] + d[-2 - xs] + d[-1] + d[0] + d[1] +
+                d[2 + xs] + d[3 + xs] + d[4 + xs];
+    retval += sum * sum;
+  }
+  {
+    // x grows -4 to 4, y shrinks 1 to -1
+    float sum = d[-4 + xs] + d[-3 + xs] + d[-2 + xs] + d[-1] + d[0] + d[1] +
+                d[2 - xs] + d[3 - xs] + d[4 - xs];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1__*______
+       2___*_____
+       3___*_____
+       4____0____
+       5_____*___
+       6_____*___
+       7______*__
+       8_________ */
+    float sum = d[-xs3 - 2] + d[-xs - xs - 1] + d[-xs - 1] + d[0] + d[xs + 1] +
+                d[xs + xs + 1] + d[xs3 + 2];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1______*__
+       2_____*___
+       3_____*___
+       4____0____
+       5___*_____
+       6___*_____
+       7__*______
+       8_________ */
+    float sum = d[-xs3 + 2] + d[-xs - xs + 1] + d[-xs + 1] + d[0] + d[xs - 1] +
+                d[xs + xs - 1] + d[xs3 - 2];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1_________
+       2_*_______
+       3__**_____
+       4____0____
+       5_____**__
+       6_______*_
+       7_________
+       8_________ */
+    float sum = d[-xs - xs - 3] + d[-xs - 2] + d[-xs - 1] + d[0] + d[xs + 1] +
+                d[xs + 2] + d[xs + xs + 3];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1_________
+       2_______*_
+       3_____**__
+       4____0____
+       5__**_____
+       6_*_______
+       7_________
+       8_________ */
+    float sum = d[-xs - xs + 3] + d[-xs + 2] + d[-xs + 1] + d[0] + d[xs - 1] +
+                d[xs - 2] + d[xs + xs - 3];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1_________
+       2_________
+       3______**_
+       4____0*___
+       5__**_____
+       6**_______
+       7_________
+       8_________ */
+
+    float sum = d[xs + xs - 4] + d[xs + xs - 3] + d[xs - 2] + d[xs - 1] + d[0] +
+                d[1] + d[-xs + 2] + d[-xs + 3];
+    retval += sum * sum;
+  }
+  {
+    /* 0_________
+       1_________
+       2**_______
+       3__**_____
+       4____0*___
+       5______**_
+       6_________
+       7_________
+       8_________ */
+    float sum = d[-xs - xs - 4] + d[-xs - xs - 3] + d[-xs - 2] + d[-xs - 1] +
+                d[0] + d[1] + d[xs + 2] + d[xs + 3];
+    retval += sum * sum;
+  }
+  {
+    /* 0__*______
+       1__*______
+       2___*_____
+       3___*_____
+       4____0____
+       5____*____
+       6_____*___
+       7_____*___
+       8_________ */
+    float sum = d[-xs3 - xs - 2] + d[-xs3 - 2] + d[-xs - xs - 1] + d[-xs - 1] +
+                d[0] + d[xs] + d[xs + xs + 1] + d[xs3 + 1];
+    retval += sum * sum;
+  }
+  {
+    /* 0______*__
+       1______*__
+       2_____*___
+       3_____*___
+       4____0____
+       5____*____
+       6___*_____
+       7___*_____
+       8_________ */
+    float sum = d[-xs3 - xs + 2] + d[-xs3 + 2] + d[-xs - xs + 1] + d[-xs + 1] +
+                d[0] + d[xs] + d[xs + xs - 1] + d[xs3 - 1];
+    retval += sum * sum;
+  }
+  return retval;
+}
+
+// Returns MaltaUnit. "fastMode" avoids bounds-checks when x0 and y0 are known
+// to be far enough from the image borders. "diffs" is a packed image.
+template <bool fastMode, class Tag>
+static BUTTERAUGLI_INLINE float PaddedMaltaUnit(
+    float* BUTTERAUGLI_RESTRICT diffs, const size_t x0, const size_t y0,
+    const size_t xsize_, const size_t ysize_) {
+  int ix0 = y0 * xsize_ + x0;
+  const float* BUTTERAUGLI_RESTRICT d = &diffs[ix0];
+  if (fastMode ||
+      (x0 >= 4 && y0 >= 4 && x0 < (xsize_ - 4) && y0 < (ysize_ - 4))) {
+    return MaltaUnit(Tag(), d, xsize_);
+  }
+
+  float borderimage[9 * 9];
+  for (int dy = 0; dy < 9; ++dy) {
+    int y = y0 + dy - 4;
+    if (y < 0 || y >= ysize_) {
+      for (int dx = 0; dx < 9; ++dx) {
+        borderimage[dy * 9 + dx] = 0.0f;
+      }
+    } else {
+      for (int dx = 0; dx < 9; ++dx) {
+        int x = x0 + dx - 4;
+        if (x < 0 || x >= xsize_) {
+          borderimage[dy * 9 + dx] = 0.0f;
+        } else {
+          borderimage[dy * 9 + dx] = diffs[y * xsize_ + x];
+        }
+      }
+    }
+  }
+  return MaltaUnit(Tag(), &borderimage[4 * 9 + 4], 9);
+}
+
+template <class Tag>
+static void MaltaDiffMapImpl(const ImageF& lum0, const ImageF& lum1,
+                             const size_t xsize_, const size_t ysize_,
+                             const double w_0gt1, const double w_0lt1,
+                             const double norm1, const double len,
+                             const double mulli,
+                             Image3F* PIK_RESTRICT block_diff_ac, size_t c) {
+  const float kWeight0 = 0.5;
+  const float kWeight1 = 0.33;
+
+  const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1);
+  const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1);
+  const float norm2_0gt1 = w_pre0gt1 * norm1;
+  const float norm2_0lt1 = w_pre0lt1 * norm1;
+
+  std::vector<float> diffs(ysize_ * xsize_);
+  for (size_t y = 0, ix = 0; y < ysize_; ++y) {
+    const float* BUTTERAUGLI_RESTRICT row0 = lum0.Row(y);
+    const float* BUTTERAUGLI_RESTRICT row1 = lum1.Row(y);
+    for (size_t x = 0; x < xsize_; ++x, ++ix) {
+      const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x]));
+      const float diff = row0[x] - row1[x];
+      const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval);
+
+      // Primary symmetric quadratic objective.
+      diffs[ix] = scaler * diff;
+
+      const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval);
+      const double fabs0 = fabs(row0[x]);
+
+      // Secondary half-open quadratic objectives.
+      const double too_small = 0.55 * fabs0;
+      const double too_big = 1.05 * fabs0;
+
+      if (row0[x] < 0) {
+        if (row1[x] > -too_small) {
+          double impact = scaler2 * (row1[x] + too_small);
+          if (diff < 0) {
+            diffs[ix] -= impact;
+          } else {
+            diffs[ix] += impact;
+          }
+        } else if (row1[x] < -too_big) {
+          double impact = scaler2 * (-row1[x] - too_big);
+          if (diff < 0) {
+            diffs[ix] -= impact;
+          } else {
+            diffs[ix] += impact;
+          }
+        }
+      } else {
+        if (row1[x] < too_small) {
+          double impact = scaler2 * (too_small - row1[x]);
+          if (diff < 0) {
+            diffs[ix] -= impact;
+          } else {
+            diffs[ix] += impact;
+          }
+        } else if (row1[x] > too_big) {
+          double impact = scaler2 * (row1[x] - too_big);
+          if (diff < 0) {
+            diffs[ix] -= impact;
+          } else {
+            diffs[ix] += impact;
+          }
+        }
+      }
+    }
+  }
+
+  size_t y0 = 0;
+  // Top
+  for (; y0 < 4; ++y0) {
+    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+    for (size_t x0 = 0; x0 < xsize_; ++x0) {
+      row_diff[x0] +=
+          PaddedMaltaUnit<false, Tag>(&diffs[0], x0, y0, xsize_, ysize_);
+    }
+  }
+
+  // Middle
+  for (; y0 < ysize_ - 4; ++y0) {
+    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+    size_t x0 = 0;
+    for (; x0 < 4; ++x0) {
+      row_diff[x0] +=
+          PaddedMaltaUnit<false, Tag>(&diffs[0], x0, y0, xsize_, ysize_);
+    }
+    for (; x0 < xsize_ - 4; ++x0) {
+      row_diff[x0] +=
+          PaddedMaltaUnit<true, Tag>(&diffs[0], x0, y0, xsize_, ysize_);
+    }
+
+    for (; x0 < xsize_; ++x0) {
+      row_diff[x0] +=
+          PaddedMaltaUnit<false, Tag>(&diffs[0], x0, y0, xsize_, ysize_);
+    }
+  }
+
+  // Bottom
+  for (; y0 < ysize_; ++y0) {
+    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+    for (size_t x0 = 0; x0 < xsize_; ++x0) {
+      row_diff[x0] +=
+          PaddedMaltaUnit<false, Tag>(&diffs[0], x0, y0, xsize_, ysize_);
+    }
+  }
+}
+
+void ButteraugliComparator::MaltaDiffMap(
+    const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+    const double w_0lt1, const double norm1,
+    Image3F* BUTTERAUGLI_RESTRICT block_diff_ac, size_t c) const {
+  PROFILER_FUNC;
+  const double len = 3.75;
+  static const double mulli = 0.371226387683;
+  MaltaDiffMapImpl<MaltaTag>(lum0, lum1, xsize_, ysize_, w_0gt1, w_0lt1, norm1,
+                             len, mulli, block_diff_ac, c);
+}
+
+void ButteraugliComparator::MaltaDiffMapLF(
+    const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+    const double w_0lt1, const double norm1,
+    Image3F* BUTTERAUGLI_RESTRICT block_diff_ac, size_t c) const {
+  PROFILER_FUNC;
+  const double len = 3.75;
+  static const double mulli = 0.692743715861;
+  MaltaDiffMapImpl<MaltaTagLF>(lum0, lum1, xsize_, ysize_, w_0gt1, w_0lt1,
+                               norm1, len, mulli, block_diff_ac, c);
+}
+
+ImageF ButteraugliComparator::CombineChannels(
+    const Image3F& mask_xyb, const Image3F& mask_xyb_dc,
+    const Image3F& block_diff_dc, const Image3F& block_diff_ac) const {
+  PROFILER_FUNC;
+  ImageF result(xsize_, ysize_);
+  for (size_t y = 0; y < ysize_; ++y) {
+    float* BUTTERAUGLI_RESTRICT row_out = result.Row(y);
+    for (size_t x = 0; x < xsize_; ++x) {
+      float mask[3];
+      float dc_mask[3];
+      float diff_dc[3];
+      float diff_ac[3];
+      for (int i = 0; i < 3; ++i) {
+        mask[i] = mask_xyb.PlaneRow(i, y)[x];
+        dc_mask[i] = mask_xyb_dc.PlaneRow(i, y)[x];
+        diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x];
+        diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x];
+      }
+      row_out[x] = (DotProduct(diff_dc, dc_mask) + DotProduct(diff_ac, mask));
+    }
+  }
+  return result;
+}
+
+double ButteraugliScoreFromDiffmap(const ImageF& diffmap) {
+  PROFILER_FUNC;
+  float retval = 0.0f;
+  for (size_t y = 0; y < diffmap.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y);
+    for (size_t x = 0; x < diffmap.xsize(); ++x) {
+      retval = std::max(retval, row[x]);
+    }
+  }
+  return retval;
+}
+
+// ===== Functions used by Mask only =====
+static std::array<double, 512> MakeMask(double extmul, double extoff,
+                                        double mul, double offset,
+                                        double scaler) {
+  std::array<double, 512> lut;
+  for (int i = 0; i < lut.size(); ++i) {
+    const double c = mul / ((0.01 * scaler * i) + offset);
+    lut[i] = kGlobalScale * (1.0 + extmul * (c + extoff));
+    if (lut[i] < 1e-5) {
+      lut[i] = 1e-5;
+    }
+    assert(lut[i] >= 0.0);
+    lut[i] *= lut[i];
+  }
+  return lut;
+}
+
+double MaskX(double delta) {
+  static const double extmul = 1.71276311069;
+  static const double extoff = 1.84833742945;
+  static const double offset = 0.256336172307;
+  static const double scaler = 231.979765086;
+  static const double mul = 5.01393527954;
+  static const std::array<double, 512> lut =
+                MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+double MaskY(double delta) {
+  static const double extmul = 2.00974476653;
+  static const double extoff = -2.62615693295;
+  static const double offset = 1.19855542596;
+  static const double scaler = 2.73845259583;
+  static const double mul = 5.86347021502;
+  static const std::array<double, 512> lut =
+      MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+double MaskDcX(double delta) {
+  static const double extmul = 6.65259302165;
+  static const double extoff = 3.09609358929;
+  static const double offset = 0.0867311118933;
+  static const double scaler = 20.6187368059;
+  static const double mul = 0.345897482985;
+  static const std::array<double, 512> lut =
+      MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+double MaskDcY(double delta) {
+  static const double extmul = 0.00736792857018;
+  static const double extoff = 39.5486204165;
+  static const double offset = 0.0069689717237;
+  static const double scaler = 6.0;
+  static const double mul = 1.96532671263;
+  static const std::array<double, 512> lut =
+      MakeMask(extmul, extoff, mul, offset, scaler);
+  return InterpolateClampNegative(lut.data(), lut.size(), delta);
+}
+
+ImageF DiffPrecomputeX(const ImageF& xyb0, const ImageF& xyb1,
+                       float mul, float cutoff) {
+  PROFILER_FUNC;
+  const size_t xsize = xyb0.xsize();
+  const size_t ysize = xyb0.ysize();
+  ImageF result(xsize, ysize);
+  size_t x1, y1;
+  size_t x2, y2;
+  for (size_t y = 0; y < ysize; ++y) {
+    if (y + 1 < ysize) {
+      y2 = y + 1;
+    } else if (y > 0) {
+      y2 = y - 1;
+    } else {
+      y2 = y;
+    }
+    if (y == 0 && ysize >= 2) {
+      y1 = y + 1;
+    } else if (y > 0) {
+      y1 = y - 1;
+    } else {
+      y1 = y;
+    }
+    const float* BUTTERAUGLI_RESTRICT row0_in = xyb0.Row(y);
+    const float* BUTTERAUGLI_RESTRICT row1_in = xyb1.Row(y);
+    const float* BUTTERAUGLI_RESTRICT row0_in1 = xyb0.Row(y1);
+    const float* BUTTERAUGLI_RESTRICT row1_in1 = xyb1.Row(y1);
+    const float* BUTTERAUGLI_RESTRICT row0_in2 = xyb0.Row(y2);
+    const float* BUTTERAUGLI_RESTRICT row1_in2 = xyb1.Row(y2);
+    float* BUTTERAUGLI_RESTRICT row_out = result.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      if (x + 1 < xsize) {
+        x2 = x + 1;
+      } else if (x > 0) {
+        x2 = x - 1;
+      } else {
+        x2 = x;
+      }
+      if (x == 0 && xsize >= 2) {
+        x1 = x + 1;
+      } else if (x > 0) {
+        x1 = x - 1;
+      } else {
+        x1 = x;
+      }
+      double sup0 =
+          (fabs(row0_in[x] - row0_in[x2]) +
+           fabs(row0_in[x] - row0_in2[x]) +
+           fabs(row0_in[x] - row0_in[x1]) +
+           fabs(row0_in[x] - row0_in1[x]) +
+           3 * (fabs(row0_in2[x] - row0_in1[x]) +
+                fabs(row0_in[x1] - row0_in[x2])));
+      double sup1 =
+          (fabs(row1_in[x] - row1_in[x2]) +
+           fabs(row1_in[x] - row1_in2[x]) +
+           fabs(row1_in[x] - row1_in[x1]) +
+           fabs(row1_in[x] - row1_in1[x]) +
+           3 * (fabs(row1_in2[x] - row1_in1[x]) +
+                fabs(row1_in[x1] - row1_in[x2])));
+
+      row_out[x] = mul * std::min(sup0, sup1);
+      if (row_out[x] >= cutoff) {
+        row_out[x] = cutoff;
+      }
+      {
+        static const double limit = 0.5 * cutoff;
+        if (row_out[x] >= limit) {
+          row_out[x] += limit;
+          row_out[x] *= 0.5;
+        }
+      }
+      {
+        static const double limit = 0.25 * cutoff;
+        if (row_out[x] >= limit) {
+          row_out[x] += limit;
+          row_out[x] *= 0.5;
+        }
+      }
+    }
+  }
+  return result;
+}
+
+// Precalculates masking for y channel, giving masks for
+// both images back so that they can be used for similarity comparisons
+// too.
+void DiffPrecomputeY(const ImageF& xyb0, const ImageF& xyb1,
+                     float mul, float mul2,
+                     ImageF *out0, ImageF *out1) {
+  PROFILER_FUNC;
+  const size_t xsize = xyb0.xsize();
+  const size_t ysize = xyb0.ysize();
+  size_t x1, y1;
+  size_t x2, y2;
+  for (size_t y = 0; y < ysize; ++y) {
+    if (y + 1 < ysize) {
+      y2 = y + 1;
+    } else if (y > 0) {
+      y2 = y - 1;
+    } else {
+      y2 = y;
+    }
+    if (y == 0 && ysize >= 2) {
+      y1 = y + 1;
+    } else if (y > 0) {
+      y1 = y - 1;
+    } else {
+      y1 = y;
+    }
+    const float* BUTTERAUGLI_RESTRICT row0_in = xyb0.Row(y);
+    const float* BUTTERAUGLI_RESTRICT row1_in = xyb1.Row(y);
+    const float* BUTTERAUGLI_RESTRICT row0_in1 = xyb0.Row(y1);
+    const float* BUTTERAUGLI_RESTRICT row1_in1 = xyb1.Row(y1);
+    const float* BUTTERAUGLI_RESTRICT row0_in2 = xyb0.Row(y2);
+    const float* BUTTERAUGLI_RESTRICT row1_in2 = xyb1.Row(y2);
+    float* BUTTERAUGLI_RESTRICT row_out0 = out0->Row(y);
+    float* BUTTERAUGLI_RESTRICT row_out1 = out1->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      if (x + 1 < xsize) {
+        x2 = x + 1;
+      } else if (x > 0) {
+        x2 = x - 1;
+      } else {
+        x2 = x;
+      }
+      if (x == 0 && xsize >= 2) {
+        x1 = x + 1;
+      } else if (x > 0) {
+        x1 = x - 1;
+      } else {
+        x1 = x;
+      }
+      double sup0 =
+          (fabs(row0_in[x] - row0_in[x2]) +
+           fabs(row0_in[x] - row0_in2[x]) +
+           fabs(row0_in[x] - row0_in[x1]) +
+           fabs(row0_in[x] - row0_in1[x]) +
+           3 * (fabs(row0_in2[x] - row0_in1[x]) +
+                fabs(row0_in[x1] - row0_in[x2])));
+      double sup1 =
+          (fabs(row1_in[x] - row1_in[x2]) +
+           fabs(row1_in[x] - row1_in2[x]) +
+           fabs(row1_in[x] - row1_in[x1]) +
+           fabs(row1_in[x] - row1_in1[x]) +
+           3 * (fabs(row1_in2[x] - row1_in1[x]) +
+                fabs(row1_in[x1] - row1_in[x2])));
+      // kBias makes log behave more linearly.
+      static const double kBias = 7;
+      row_out0[x] = mul * (log(sup0 * sup0 * mul2 + kBias) - log(kBias));
+      row_out1[x] = mul * (log(sup1 * sup1 * mul2 + kBias) - log(kBias));
+    }
+  }
+}
+
+void Mask(const Image3F& xyb0, const Image3F& xyb1,
+          Image3F* BUTTERAUGLI_RESTRICT mask,
+          Image3F* BUTTERAUGLI_RESTRICT mask_dc,
+          ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
+  PROFILER_FUNC;
+  const size_t xsize = xyb0.xsize();
+  const size_t ysize = xyb0.ysize();
+  *mask = Image3F(xsize, ysize);
+  *mask_dc = Image3F(xsize, ysize);
+  static const double muls[2] = {
+    0.175124119693,
+    0.236069675367,
+  };
+  double normalizer = {
+    1.0 / (muls[0] + muls[1]),
+  };
+  static const double r0 = 1.63479141169;
+  static const double r1 = 8.0;
+  static const double r2 = 8.0;
+  static const double border_ratio = 0;
+
+  {
+    // X component
+    static const double mul = 0.533043878407;
+    static const double cutoff = 0.5;
+    ImageF diff = DiffPrecomputeX(xyb0.Plane(0), xyb1.Plane(0), mul, cutoff);
+    ImageF blurred(diff.xsize(), diff.ysize());
+    Blur(diff, r2, border_ratio, &blurred);
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        mask->PlaneRow(0, y)[x] = blurred.Row(y)[x];
+      }
+    }
+  }
+  {
+    // Y component
+    static const double mul = 0.559;
+    static const double mul2 = 1.0;
+    ImageF diff0(xsize, ysize);
+    ImageF diff1(xsize, ysize);
+    ImageF blurred0_a(xsize, ysize);
+    ImageF blurred0_b(xsize, ysize);
+    ImageF blurred1_a(xsize, ysize);
+    ImageF blurred1_b(xsize, ysize);
+    DiffPrecomputeY(xyb0.Plane(1), xyb1.Plane(1), mul, mul2, &diff0, &diff1);
+    Blur(diff0, r0, border_ratio, &blurred0_a);
+    Blur(diff0, r1, border_ratio, &blurred0_b);
+    Blur(diff1, r0, border_ratio, &blurred1_a);
+    Blur(diff1, r1, border_ratio, &blurred1_b);
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        const double val = normalizer * (
+            muls[0] * blurred1_a.Row(y)[x] +
+            muls[1] * blurred1_b.Row(y)[x]);
+        mask->PlaneRow(1, y)[x] = val;
+        if (diff_ac != nullptr) {
+          static const double kMaskToErrorMul = 3.09660544871;
+          double va = blurred0_a.Row(y)[x] - blurred1_a.Row(y)[x];
+          double wa = kMaskToErrorMul * normalizer * muls[0] * va;
+          double vb = blurred0_b.Row(y)[x] - blurred1_b.Row(y)[x];
+          double wb = kMaskToErrorMul * normalizer * muls[1] * vb;
+          diff_ac->Row(y)[x] += wa * wa + wb * wb;
+        }
+      }
+    }
+  }
+  // B component
+  static const double w00 = 425.68063445;
+  static const double w11 = 5.99207318771;
+  static const double w_ytob_hf = 0.0513729628327;
+  static const double w_ytob_lf = 30.6362338596;
+  static const double p1_to_p0 = 0.0812601733358;
+
+  for (size_t y = 0; y < ysize; ++y) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const double s0 = mask->PlaneRow(0, y)[x];
+      const double s1 = mask->PlaneRow(1, y)[x];
+      const double p1 = w11 * s1;
+      const double p0 = w00 * s0 + p1_to_p0 * p1;
+
+      mask->PlaneRow(0, y)[x] = MaskX(p0);
+      mask->PlaneRow(1, y)[x] = MaskY(p1);
+      mask->PlaneRow(2, y)[x] = w_ytob_hf * MaskY(p1);
+      mask_dc->PlaneRow(0, y)[x] = MaskDcX(p0);
+      mask_dc->PlaneRow(1, y)[x] = MaskDcY(p1);
+      mask_dc->PlaneRow(2, y)[x] = w_ytob_lf * MaskDcY(p1);
+    }
+  }
+}
+
+bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1,
+                        double hf_asymmetry, ImageF& result_image) {
+  PROFILER_FUNC;
+  const size_t xsize = rgb0.xsize();
+  const size_t ysize = rgb0.ysize();
+  if (xsize < 1 || ysize < 1) {
+    return PIK_FAILURE("Zero-sized image");
+  }
+  if (!SameSize(rgb0, rgb1)) {
+    return PIK_FAILURE("Size mismatch");
+  }
+  static const int kMax = 8;
+  if (xsize < kMax || ysize < kMax) {
+    // Butteraugli values for small (where xsize or ysize is smaller
+    // than 8 pixels) images are non-sensical, but most likely it is
+    // less disruptive to try to compute something than just give up.
+    // Temporarily extend the borders of the image to fit 8 x 8 size.
+    int xborder = xsize < kMax ? (kMax - xsize) / 2 : 0;
+    int yborder = ysize < kMax ? (kMax - ysize) / 2 : 0;
+    size_t xscaled = std::max<size_t>(kMax, xsize);
+    size_t yscaled = std::max<size_t>(kMax, ysize);
+    Image3F scaled0(xscaled, yscaled);
+    Image3F scaled1(xscaled, yscaled);
+    for (int i = 0; i < 3; ++i) {
+      for (int y = 0; y < yscaled; ++y) {
+        for (int x = 0; x < xscaled; ++x) {
+          size_t x2 = std::min<size_t>(xsize - 1, std::max(0, x - xborder));
+          size_t y2 = std::min<size_t>(ysize - 1, std::max(0, y - yborder));
+          scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2];
+          scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2];
+        }
+      }
+    }
+    ImageF diffmap_scaled;
+    const bool ok =
+        ButteraugliDiffmap(scaled0, scaled1, hf_asymmetry, diffmap_scaled);
+    result_image = ImageF(xsize, ysize);
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t x = 0; x < xsize; ++x) {
+        result_image.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder];
+      }
+    }
+    return ok;
+  }
+  ButteraugliComparator butteraugli(rgb0, hf_asymmetry);
+  butteraugli.Diffmap(rgb1, result_image);
+  return true;
+}
+
+bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1,
+                          float hf_asymmetry, ImageF& diffmap,
+                          double& diffvalue) {
+  if (!ButteraugliDiffmap(rgb0, rgb1, hf_asymmetry, diffmap)) {
+    return false;
+  }
+  diffvalue = ButteraugliScoreFromDiffmap(diffmap);
+  return true;
+}
+
+double ButteraugliFuzzyClass(double score) {
+  static const double fuzzy_width_up = 5.10228441116;
+  static const double fuzzy_width_down = 4.96467433842;
+  static const double m0 = 2.0;
+  static const double scaler = 0.827108297066;
+  double val;
+  if (score < 1.0) {
+    // val in [scaler .. 2.0]
+    val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down));
+    val -= 1.0;  // from [1 .. 2] to [0 .. 1]
+    val *= 2.0 - scaler;  // from [0 .. 1] to [0 .. 2.0 - scaler]
+    val += scaler;  // from [0 .. 2.0 - scaler] to [scaler .. 2.0]
+  } else {
+    // val in [0 .. scaler]
+    val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up));
+    val *= scaler;
+  }
+  return val;
+}
+
+// #define PRINT_OUT_NORMALIZATION
+
+double ButteraugliFuzzyInverse(double seek) {
+  double pos = 0;
+  for (double range = 1.0; range >= 1e-10; range *= 0.5) {
+    double cur = ButteraugliFuzzyClass(pos);
+    if (cur < seek) {
+      pos -= range;
+    } else {
+      pos += range;
+    }
+  }
+#ifdef PRINT_OUT_NORMALIZATION
+  if (seek == 1.0) {
+    fprintf(stderr, "Fuzzy inverse %g\n", pos);
+  }
+#endif
+  return pos;
+}
+
+#ifdef PRINT_OUT_NORMALIZATION
+static double print_out_normalization = ButteraugliFuzzyInverse(1.0);
+#endif
+
+
+
+namespace {
+
+void ScoreToRgb(double score, double good_threshold, double bad_threshold,
+                uint8_t rgb[3]) {
+  double heatmap[12][3] = {
+      {0, 0, 0},
+      {0, 0, 1},
+      {0, 1, 1},
+      {0, 1, 0},  // Good level
+      {1, 1, 0},
+      {1, 0, 0},  // Bad level
+      {1, 0, 1},
+      {0.5, 0.5, 1.0},
+      {1.0, 0.5, 0.5},  // Pastel colors for the very bad quality range.
+      {1.0, 1.0, 0.5},
+      {
+          1,
+          1,
+          1,
+      },
+      {
+          1,
+          1,
+          1,
+      },
+  };
+  if (score < good_threshold) {
+    score = (score / good_threshold) * 0.3;
+  } else if (score < bad_threshold) {
+    score = 0.3 +
+            (score - good_threshold) / (bad_threshold - good_threshold) * 0.15;
+  } else {
+    score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5;
+  }
+  static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]);
+  score = std::min<double>(std::max<double>(score * (kTableSize - 1), 0.0),
+                           kTableSize - 2);
+  int ix = static_cast<int>(score);
+  double mix = score - ix;
+  for (int i = 0; i < 3; ++i) {
+    double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i];
+    rgb[i] = static_cast<uint8_t>(255 * pow(v, 0.5) + 0.5);
+  }
+}
+
+}  // namespace
+
+Image3B CreateHeatMapImage(const ImageF& distmap, double good_threshold,
+                           double bad_threshold) {
+  Image3B heatmap(distmap.xsize(), distmap.ysize());
+  for (size_t y = 0; y < distmap.ysize(); ++y) {
+    const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y);
+    uint8_t* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y);
+    uint8_t* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y);
+    uint8_t* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y);
+    for (size_t x = 0; x < distmap.xsize(); ++x) {
+      const float d = row_distmap[x];
+      uint8_t rgb[3];
+      ScoreToRgb(d, good_threshold, bad_threshold, rgb);
+      row_h0[x] = rgb[0];
+      row_h1[x] = rgb[1];
+      row_h2[x] = rgb[2];
+    }
+  }
+  return heatmap;
+}
+
+}  // namespace butteraugli
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/butteraugli/butteraugli.h b/codec/L2/demos/pikEnc/host/pik/butteraugli/butteraugli.h
new file mode 100755
index 0000000000..ef9d07f3db
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/butteraugli/butteraugli.h
@@ -0,0 +1,309 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+
+#ifndef PIK_BUTTERAUGLI_BUTTERAUGLI_H_
+#define PIK_BUTTERAUGLI_BUTTERAUGLI_H_
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#include "pik/image.h"
+#include "pik/image_ops.h"
+
+#define BUTTERAUGLI_ENABLE_CHECKS 0
+
+// This is the main interface to butteraugli image similarity
+// analysis function.
+
+namespace pik {
+namespace butteraugli {
+
+// ButteraugliInterface defines the public interface for butteraugli.
+//
+// It calculates the difference between rgb0 and rgb1.
+//
+// rgb0 and rgb1 contain the images. rgb0[c][px] and rgb1[c][px] contains
+// the red image for c == 0, green for c == 1, blue for c == 2. Location index
+// px is calculated as y * xsize + x.
+//
+// Value of pixels of images rgb0 and rgb1 need to be represented as raw
+// intensity. Most image formats store gamma corrected intensity in pixel
+// values. This gamma correction has to be removed, by applying the following
+// function:
+// butteraugli_val = 255.0 * pow(png_val / 255.0, gamma);
+// A typical value of gamma is 2.2. It is usually stored in the image header.
+// Take care not to confuse that value with its inverse. The gamma value should
+// be always greater than one.
+// Butteraugli does not work as intended if the caller does not perform
+// gamma correction.
+//
+// hf_asymmetry is a multiplier for penalizing new HF artifacts more than
+// blurring away features (1.0 -> neutral).
+//
+// diffmap will contain an image of the size xsize * ysize, containing
+// localized differences for values px (indexed with the px the same as rgb0
+// and rgb1). diffvalue will give a global score of similarity.
+//
+// A diffvalue smaller than kButteraugliGood indicates that images can be
+// observed as the same image.
+// diffvalue larger than kButteraugliBad indicates that a difference between
+// the images can be observed.
+// A diffvalue between kButteraugliGood and kButteraugliBad indicates that
+// a subtle difference can be observed between the images.
+//
+// Returns true on success.
+
+bool ButteraugliInterface(const Image3F &rgb0,
+                          const Image3F &rgb1,
+                          float hf_asymmetry,
+                          ImageF &diffmap,
+                          double &diffvalue);
+
+const double kButteraugliQuantLow = 0.26;
+const double kButteraugliQuantHigh = 1.454;
+
+// Converts the butteraugli score into fuzzy class values that are continuous
+// at the class boundary. The class boundary location is based on human
+// raters, but the slope is arbitrary. Particularly, it does not reflect
+// the expectation value of probabilities of the human raters. It is just
+// expected that a smoother class boundary will allow for higher-level
+// optimization algorithms to work faster.
+//
+// Returns 2.0 for a perfect match, and 1.0 for 'ok', 0.0 for bad. Because the
+// scoring is fuzzy, a butteraugli score of 0.96 would return a class of
+// around 1.9.
+double ButteraugliFuzzyClass(double score);
+
+// Input values should be in range 0 (bad) to 2 (good). Use
+// kButteraugliNormalization as normalization.
+double ButteraugliFuzzyInverse(double seek);
+
+// Implementation details, don't use anything below or your code will
+// break in the future.
+
+#ifdef _MSC_VER
+#define BUTTERAUGLI_INLINE __forceinline
+#else
+#define BUTTERAUGLI_INLINE inline
+#endif
+
+#ifdef __clang__
+// Early versions of Clang did not support __builtin_assume_aligned.
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned)
+#elif defined(__GNUC__)
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 1
+#else
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 0
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* PIK_RESTRICT aligned = (float*)PIK_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if BUTTERAUGLI_HAS_ASSUME_ALIGNED
+#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
+#else
+#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) (ptr)
+#endif  // BUTTERAUGLI_HAS_ASSUME_ALIGNED
+
+struct PsychoImage {
+  ImageF uhf[2];  // XY
+  ImageF hf[2];   // XY
+  Image3F mf;     // XYB
+  Image3F lf;     // XYB
+};
+
+class ButteraugliComparator {
+ public:
+  ButteraugliComparator(const Image3F &rgb0, double hf_asymmetry);
+  virtual ~ButteraugliComparator();
+
+  // Computes the butteraugli map between the original image given in the
+  // constructor and the distorted image give here.
+  void Diffmap(const Image3F &rgb1, ImageF &result) const;
+
+  // Same as above, but OpsinDynamicsImage() was already applied.
+  void DiffmapOpsinDynamicsImage(const Image3F &xyb1, ImageF &result) const;
+
+  // Same as above, but the frequency decomposition was already applied.
+  void DiffmapPsychoImage(const PsychoImage& ps1, ImageF &result) const;
+
+  void Mask(Image3F *PIK_RESTRICT mask, Image3F *PIK_RESTRICT mask_dc) const;
+
+ private:
+  void MaltaDiffMapLF(const ImageF &y0, const ImageF &y1, double w_0gt1,
+                      double w_0lt1, double normalization,
+                      Image3F *PIK_RESTRICT block_diff_ac, size_t c) const;
+
+  void MaltaDiffMap(const ImageF &y0, const ImageF &y1, double w_0gt1,
+                    double w_0lt1, double normalization,
+                    Image3F *PIK_RESTRICT block_diff_ac, size_t c) const;
+
+  ImageF CombineChannels(const Image3F &scale_xyb, const Image3F &scale_xyb_dc,
+                         const Image3F &block_diff_dc,
+                         const Image3F &block_diff_ac) const;
+
+  const size_t xsize_;
+  const size_t ysize_;
+  float hf_asymmetry_;
+  PsychoImage pi0_;
+  ButteraugliComparator *sub_;
+};
+
+bool ButteraugliDiffmap(const Image3F &rgb0, const Image3F &rgb1,
+                        double hf_asymmetry, ImageF &diffmap);
+
+double ButteraugliScoreFromDiffmap(const ImageF& distmap);
+
+// Generate rgb-representation of the distance between two images.
+Image3B CreateHeatMapImage(const ImageF &distmap, double good_threshold,
+                           double bad_threshold);
+
+// Compute values of local frequency and dc masking based on the activity
+// in the two images.
+void Mask(const Image3F &xyb0, const Image3F &xyb1, Image3F *PIK_RESTRICT mask,
+          Image3F *PIK_RESTRICT mask_dc, ImageF *diff_ac = nullptr);
+
+template <class V>
+BUTTERAUGLI_INLINE void RgbToXyb(const V &r, const V &g, const V &b,
+                                 V *PIK_RESTRICT valx, V *PIK_RESTRICT valy,
+                                 V *PIK_RESTRICT valb) {
+  *valx = r - g;
+  *valy = r + g;
+  *valb = b;
+}
+
+template <class V>
+BUTTERAUGLI_INLINE void OpsinAbsorbance(const V &in0, const V &in1,
+                                        const V &in2, V *PIK_RESTRICT out0,
+                                        V *PIK_RESTRICT out1,
+                                        V *PIK_RESTRICT out2) {
+  // https://en.wikipedia.org/wiki/Photopsin absorbance modeling.
+  static const double mixi0 = 0.257709424788;
+  static const double mixi1 = 0.47396355934;
+  static const double mixi2 = 0.066410860767;
+  static const double mixi3 = 0.95;
+  static const double mixi4 = 0.191380264371;
+  static const double mixi5 = 0.583330953419;
+  static const double mixi6 = 0.0987313588422;
+  static const double mixi7 = 1.2;
+  static const double mixi8 = 0.0783885502602;
+  static const double mixi9 = 0.0623406135216;
+  static const double mixi10 = 0.216921850967;
+  static const double mixi11 = 3.2034793458;
+
+  const V mix0(mixi0);
+  const V mix1(mixi1);
+  const V mix2(mixi2);
+  const V mix3(mixi3);
+  const V mix4(mixi4);
+  const V mix5(mixi5);
+  const V mix6(mixi6);
+  const V mix7(mixi7);
+  const V mix8(mixi8);
+  const V mix9(mixi9);
+  const V mix10(mixi10);
+  const V mix11(mixi11);
+
+  *out0 = mix0 * in0 + mix1 * in1 + mix2 * in2 + mix3;
+  *out1 = mix4 * in0 + mix5 * in1 + mix6 * in2 + mix7;
+  *out2 = mix8 * in0 + mix9 * in1 + mix10 * in2 + mix11;
+}
+
+Image3F OpsinDynamicsImage(const Image3F &rgb);
+
+ImageF Blur(const ImageF& in, float sigma, float border_ratio);
+
+double SimpleGamma(double v);
+
+double GammaMinArg();
+double GammaMaxArg();
+
+// Polynomial evaluation via Clenshaw's scheme (similar to Horner's).
+// Template enables compile-time unrolling of the recursion, but must reside
+// outside of a class due to the specialization.
+template <int INDEX>
+static inline void ClenshawRecursion(const double x, const double *coefficients,
+                                     double *b1, double *b2) {
+  const double x_b1 = x * (*b1);
+  const double t = (x_b1 + x_b1) - (*b2) + coefficients[INDEX];
+  *b2 = *b1;
+  *b1 = t;
+
+  ClenshawRecursion<INDEX - 1>(x, coefficients, b1, b2);
+}
+
+// Base case
+template <>
+inline void ClenshawRecursion<0>(const double x, const double *coefficients,
+                                 double *b1, double *b2) {
+  const double x_b1 = x * (*b1);
+  // The final iteration differs - no 2 * x_b1 here.
+  *b1 = x_b1 - (*b2) + coefficients[0];
+}
+
+// Rational polynomial := dividing two polynomial evaluations. These are easier
+// to find than minimax polynomials.
+struct RationalPolynomial {
+  template <int N>
+  static double EvaluatePolynomial(const double x,
+                                   const double (&coefficients)[N]) {
+    double b1 = 0.0;
+    double b2 = 0.0;
+    ClenshawRecursion<N - 1>(x, coefficients, &b1, &b2);
+    return b1;
+  }
+
+  // Evaluates the polynomial at x (in [min_value, max_value]).
+  inline double operator()(const double x) const {
+    // First normalize to [0, 1].
+    const double x01 = (x - min_value) / (max_value - min_value);
+    // And then to [-1, 1] domain of Chebyshev polynomials.
+    const double xc = 2.0 * x01 - 1.0;
+
+    const double yp = EvaluatePolynomial(xc, p);
+    const double yq = EvaluatePolynomial(xc, q);
+    if (yq == 0.0) return 0.0;
+    return static_cast<float>(yp / yq);
+  }
+
+  // Domain of the polynomials; they are undefined elsewhere.
+  double min_value;
+  double max_value;
+
+  // Coefficients of T_n (Chebyshev polynomials of the first kind).
+  // Degree 5/5 is a compromise between accuracy (0.1%) and numerical stability.
+  double p[5 + 1];
+  double q[5 + 1];
+};
+
+static inline double GammaPolynomial(double value) {
+  static const RationalPolynomial r = {
+    0.971783, 590.188894,
+    {
+      98.7821300963361, 164.273222212631, 92.948112871376,
+      33.8165311212688, 6.91626704983562, 0.556380877028234
+    },
+    {
+      1, 1.64339473427892, 0.89392405219969, 0.298947051776379,
+      0.0507146002577288, 0.00226495093949756
+    }};
+  return r(value);
+}
+
+}  // namespace butteraugli
+}  // namespace pik
+
+#endif  // PIK_BUTTERAUGLI_BUTTERAUGLI_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/butteraugli_comparator.cc b/codec/L2/demos/pikEnc/host/pik/butteraugli_comparator.cc
new file mode 100755
index 0000000000..1764fdb939
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/butteraugli_comparator.cc
@@ -0,0 +1,49 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/butteraugli_comparator.h"
+
+#include "pik/opsin_inverse.h"
+
+namespace pik {
+
+namespace {
+
+Image3F LinearFromOpsin(const Image3F& opsin) {
+  Image3F linear(opsin.xsize(), opsin.ysize());
+  OpsinToLinear(opsin, Rect(opsin), &linear);
+  return linear;
+}
+
+}  // namespace
+
+ButteraugliComparator::ButteraugliComparator(const Image3F& opsin,
+                                             float hf_asymmetry,
+                                             float multiplier)
+    : xsize_(opsin.xsize()),
+      ysize_(opsin.ysize()),
+      comparator_(ScaleImage(multiplier, LinearFromOpsin(opsin)), hf_asymmetry),
+      distance_(0.0),
+      multiplier_(multiplier),
+      distmap_(xsize_, ysize_) {
+  ZeroFillImage(&distmap_);
+}
+
+void ButteraugliComparator::Compare(const Image3F& linear_rgb) {
+  PIK_CHECK(SameSize(distmap_, linear_rgb));
+  if (multiplier_ == 1) {
+    comparator_.Diffmap(linear_rgb, distmap_);
+  } else {
+    comparator_.Diffmap(ScaleImage(multiplier_, linear_rgb), distmap_);
+  }
+  distance_ = butteraugli::ButteraugliScoreFromDiffmap(distmap_);
+}
+
+void ButteraugliComparator::Mask(Image3F* mask, Image3F* mask_dc) {
+  comparator_.Mask(mask, mask_dc);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/butteraugli_comparator.h b/codec/L2/demos/pikEnc/host/pik/butteraugli_comparator.h
new file mode 100755
index 0000000000..6d0a425c19
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/butteraugli_comparator.h
@@ -0,0 +1,39 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_BUTTERAUGLI_COMPARATOR_H_
+#define PIK_BUTTERAUGLI_COMPARATOR_H_
+
+#include "pik/butteraugli/butteraugli.h"
+#include "pik/image.h"
+#include "pik/image_ops.h"
+
+namespace pik {
+
+class ButteraugliComparator {
+ public:
+  ButteraugliComparator(const Image3F& opsin, float hf_asymmetry,
+                        float multiplier);
+
+  void Compare(const Image3F& linear_rgb);
+
+  const ImageF& distmap() const { return distmap_; }
+  float distance() const { return distance_; }
+
+  void Mask(Image3F* mask, Image3F* mask_dc);
+
+ private:
+  const int xsize_;
+  const int ysize_;
+  butteraugli::ButteraugliComparator comparator_;
+  float distance_;
+  float multiplier_;
+  ImageF distmap_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_BUTTERAUGLI_COMPARATOR_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/butteraugli_distance.cc b/codec/L2/demos/pikEnc/host/pik/butteraugli_distance.cc
new file mode 100755
index 0000000000..f635098aaf
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/butteraugli_distance.cc
@@ -0,0 +1,138 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/butteraugli_distance.h"
+
+#include <stddef.h>
+#include <algorithm>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/butteraugli/butteraugli.h"
+#include "pik/gamma_correct.h"
+#include "pik/profiler.h"
+
+namespace pik {
+namespace {
+
+float ButteraugliDistanceLinearSRGB(const Image3F& rgb0, const Image3F& rgb1,
+                                    float hf_asymmetry, ImageF* distmap_out) {
+  ImageF distmap_tmp;
+  ImageF& distmap = distmap_out == nullptr ? distmap_tmp : *distmap_out;
+  PIK_CHECK(butteraugli::ButteraugliDiffmap(rgb0, rgb1, hf_asymmetry, distmap));
+  return butteraugli::ButteraugliScoreFromDiffmap(distmap);
+}
+
+// color is linear, but blending happens in gamma-compressed space using
+// (gamma-compressed) grayscale background color, alpha image represents
+// weights of the sRGB colors in the [0 .. (1 << bit_depth) - 1] interval,
+// output image is in linear space.
+void AlphaBlend(const Image3F& in, const size_t c, float background_linear255,
+                const ImageU& alpha, const uint16_t opaque, Image3F* out) {
+  const float background = LinearToSrgb8Direct(background_linear255);
+
+  for (size_t y = 0; y < out->ysize(); ++y) {
+    const uint16_t* PIK_RESTRICT row_a = alpha.ConstRow(y);
+    const float* PIK_RESTRICT row_i = in.ConstPlaneRow(c, y);
+    float* PIK_RESTRICT row_o = out->PlaneRow(c, y);
+    for (size_t x = 0; x < out->xsize(); ++x) {
+      const uint16_t a = row_a[x];
+      if (a == 0) {
+        row_o[x] = background_linear255;
+      } else if (a == opaque) {
+        row_o[x] = row_i[x];
+      } else {
+        const float w_fg = a * 1.0f / opaque;
+        const float w_bg = 1.0f - w_fg;
+        const float fg = w_fg * LinearToSrgb8Direct(row_i[x]);
+        const float bg = w_bg * background;
+        row_o[x] = Srgb8ToLinearDirect(fg + bg);
+      }
+    }
+  }
+}
+
+const Image3F* AlphaBlend(const CodecInOut& io, const Image3F& linear,
+                          float background_linear255, Image3F* copy) {
+  // No alpha => all opaque.
+  if (!io.HasAlpha()) return &linear;
+
+  *copy = Image3F(linear.xsize(), linear.ysize());
+  const uint16_t opaque = (1U << io.AlphaBits()) - 1;
+  for (size_t c = 0; c < 3; ++c) {
+    AlphaBlend(linear, c, background_linear255, io.alpha(), opaque, copy);
+  }
+  return copy;
+}
+
+}  // namespace
+
+float ButteraugliDistance(const CodecInOut* rgb0, const CodecInOut* rgb1,
+                          float hf_asymmetry, ImageF* distmap,
+                          ThreadPool* pool) {
+  PROFILER_FUNC;
+  // Convert to linear sRGB (unless already in that space)
+  const Image3F* linear_srgb0 = &rgb0->color();
+  Image3F linear_srgb_copy0;
+  if (!rgb0->IsLinearSRGB()) {
+    const ColorEncoding& c = rgb0->Context()->c_linear_srgb[rgb0->IsGray()];
+    PIK_CHECK(rgb0->CopyTo(Rect(rgb0->color()), c, &linear_srgb_copy0, pool));
+    linear_srgb0 = &linear_srgb_copy0;
+  }
+  const Image3F* linear_srgb1 = &rgb1->color();
+  Image3F linear_srgb_copy1;
+  if (!rgb1->IsLinearSRGB()) {
+    const ColorEncoding& c = rgb1->Context()->c_linear_srgb[rgb1->IsGray()];
+    PIK_CHECK(rgb1->CopyTo(Rect(rgb1->color()), c, &linear_srgb_copy1, pool));
+    linear_srgb1 = &linear_srgb_copy1;
+  }
+
+  // No alpha: skip blending, only need a single call to Butteraugli.
+  if (!rgb0->HasAlpha() && !rgb1->HasAlpha()) {
+    return ButteraugliDistanceLinearSRGB(*linear_srgb0, *linear_srgb1,
+                                         hf_asymmetry, distmap);
+  }
+
+  // Blend on black and white backgrounds
+
+  const float black = 0.0f;
+  Image3F copy_black0, copy_black1;
+  const Image3F* blended_black0 =
+      AlphaBlend(*rgb0, *linear_srgb0, black, &copy_black0);
+  const Image3F* blended_black1 =
+      AlphaBlend(*rgb1, *linear_srgb1, black, &copy_black1);
+
+  const float white = 255.0f;
+  Image3F copy_white0, copy_white1;
+  const Image3F* blended_white0 =
+      AlphaBlend(*rgb0, *linear_srgb0, white, &copy_white0);
+  const Image3F* blended_white1 =
+      AlphaBlend(*rgb1, *linear_srgb1, white, &copy_white1);
+
+  ImageF distmap_black, distmap_white;
+  const float dist_black = ButteraugliDistanceLinearSRGB(
+      *blended_black0, *blended_black1, hf_asymmetry, &distmap_black);
+  const float dist_white = ButteraugliDistanceLinearSRGB(
+      *blended_white0, *blended_white1, hf_asymmetry, &distmap_white);
+
+  // distmap and return values are the max of distmap_black/white.
+  if (distmap != nullptr) {
+    const size_t xsize = rgb0->xsize();
+    const size_t ysize = rgb0->ysize();
+    *distmap = ImageF(xsize, ysize);
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* PIK_RESTRICT row_black = distmap_black.ConstRow(y);
+      const float* PIK_RESTRICT row_white = distmap_white.ConstRow(y);
+      float* PIK_RESTRICT row_out = distmap->Row(y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] = std::max(row_black[x], row_white[x]);
+      }
+    }
+  }
+  return std::max(dist_black, dist_white);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/butteraugli_distance.h b/codec/L2/demos/pikEnc/host/pik/butteraugli_distance.h
new file mode 100755
index 0000000000..d0a059379b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/butteraugli_distance.h
@@ -0,0 +1,24 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_BUTTERAUGLI_DISTANCE_H_
+#define PIK_BUTTERAUGLI_DISTANCE_H_
+
+// Facade for returning Butteraugli distance between two images.
+
+#include "pik/codec.h"
+
+namespace pik {
+
+// Returns the butteraugli distance between rgb0 and rgb1.
+// If distmap is not null, it must be the same size as rgb0 and rgb1.
+float ButteraugliDistance(const CodecInOut* rgb0, const CodecInOut* rgb1,
+                          float hf_asymmetry, ImageF* distmap = nullptr,
+                          ThreadPool* pool = nullptr);
+
+}  // namespace pik
+
+#endif  // PIK_BUTTERAUGLI_DISTANCE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/butteraugli_main.cc b/codec/L2/demos/pikEnc/host/pik/butteraugli_main.cc
new file mode 100755
index 0000000000..6f68036b22
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/butteraugli_main.cc
@@ -0,0 +1,57 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include <stdio.h>
+
+#include "pik/butteraugli_distance.h"
+#include "pik/codec.h"
+#include "pik/image.h"
+#include "pik/status.h"
+
+namespace pik {
+namespace {
+
+Status RunButteraugli(const char* pathname1, const char* pathname2) {
+  CodecContext codec_context;
+  CodecInOut io1(&codec_context);
+  ThreadPool pool(4);
+  if (!io1.SetFromFile(pathname1, &pool)) {
+    fprintf(stderr, "Failed to read image from %s\n", pathname1);
+    return false;
+  }
+
+  CodecInOut io2(&codec_context);
+  if (!io2.SetFromFile(pathname2, &pool)) {
+    fprintf(stderr, "Failed to read image from %s\n", pathname2);
+    return false;
+  }
+
+  if (io1.xsize() != io2.xsize()) {
+    fprintf(stderr, "Width mismatch: %zu %zu\n", io1.xsize(), io2.xsize());
+    return false;
+  }
+  if (io1.ysize() != io2.ysize()) {
+    fprintf(stderr, "Height mismatch: %zu %zu\n", io1.ysize(), io2.ysize());
+    return false;
+  }
+
+  const float kHfAsymmetry = 0.8;
+  const float distance =
+      ButteraugliDistance(&io1, &io2, kHfAsymmetry, /*distmap=*/nullptr, &pool);
+  printf("%.10f\n", distance);
+  return true;
+}
+
+}  // namespace
+}  // namespace pik
+
+int main(int argc, char** argv) {
+  if (argc != 3) {
+    fprintf(stderr, "Usage: %s <reference> <distorted>\n", argv[0]);
+    return 1;
+  }
+  return pik::RunButteraugli(argv[1], argv[2]) ? 0 : 1;
+}
diff --git a/codec/L2/demos/pikEnc/host/pik/byte_order.h b/codec/L2/demos/pikEnc/host/pik/byte_order.h
new file mode 100755
index 0000000000..8714bb52ba
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/byte_order.h
@@ -0,0 +1,164 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_BYTE_ORDER_H_
+#define PIK_BYTE_ORDER_H_
+
+#include <stdint.h>
+#include <string.h>  // memcpy
+#include "pik/compiler_specific.h"
+
+#if PIK_COMPILER_MSVC
+#include <intrin.h>  // _byteswap_*
+#else
+#include <x86intrin.h>
+#endif
+
+#if (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#define PIK_BYTE_ORDER_LITTLE 1
+#else
+// This means that we don't know that the byte order is little endian, in
+// this case we use endian-neutral code that works for both little- and
+// big-endian.
+#define PIK_BYTE_ORDER_LITTLE 0
+#endif
+
+// Returns whether the system is little-endian (least-significant byte first).
+#if PIK_BYTE_ORDER_LITTLE
+static constexpr bool IsLittleEndian() { return true; }
+#else
+static inline bool IsLittleEndian() {
+  const uint32_t multibyte = 1;
+  uint8_t byte;
+  memcpy(&byte, &multibyte, 1);
+  return byte == 1;
+}
+#endif
+
+#if PIK_COMPILER_MSVC
+#define PIK_BSWAP32(x) _byteswap_ulong(x)
+#define PIK_BSWAP64(x) _byteswap_uint64(x)
+#else
+#define PIK_BSWAP32(x) __builtin_bswap32(x)
+#define PIK_BSWAP64(x) __builtin_bswap64(x)
+#endif
+
+static PIK_INLINE uint32_t LoadBE16(const uint8_t* p) {
+  const uint32_t byte1 = p[0];
+  const uint32_t byte0 = p[1];
+  return (byte1 << 8) + byte0;
+}
+
+static PIK_INLINE uint32_t LoadLE16(const uint8_t* p) {
+  const uint32_t byte0 = p[0];
+  const uint32_t byte1 = p[1];
+  return (byte1 << 8) + byte0;
+}
+
+static PIK_INLINE uint32_t LoadBE32(const uint8_t* p) {
+#if PIK_BYTE_ORDER_LITTLE
+  uint32_t big;
+  memcpy(&big, p, 4);
+  return PIK_BSWAP32(big);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint32_t byte3 = p[0];
+  const uint32_t byte2 = p[1];
+  const uint32_t byte1 = p[2];
+  const uint32_t byte0 = p[3];
+  return (byte3 << 24) + (byte2 << 16) + (byte1 << 8) + byte0;
+#endif
+}
+
+static PIK_INLINE uint32_t LoadLE32(const uint8_t* p) {
+#if PIK_BYTE_ORDER_LITTLE
+  uint32_t little;
+  memcpy(&little, p, 4);
+  return little;
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  const uint32_t byte0 = p[0];
+  const uint32_t byte1 = p[1];
+  const uint32_t byte2 = p[2];
+  const uint32_t byte3 = p[3];
+  return (byte3 << 24) + (byte2 << 16) + (byte1 << 8) + byte0;
+#endif
+}
+
+static PIK_INLINE void StoreBE16(const uint32_t native, uint8_t* p) {
+  p[0] = (native >> 8) & 0xFF;
+  p[1] = native & 0xFF;
+}
+
+static PIK_INLINE void StoreLE16(const uint32_t native, uint8_t* p) {
+  p[1] = (native >> 8) & 0xFF;
+  p[0] = native & 0xFF;
+}
+
+static PIK_INLINE void StoreBE32(const uint32_t native, uint8_t* p) {
+#if PIK_BYTE_ORDER_LITTLE
+  const uint32_t big = PIK_BSWAP32(native);
+  memcpy(p, &big, 4);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[0] = native >> 24;
+  p[1] = (native >> 16) & 0xFF;
+  p[2] = (native >> 8) & 0xFF;
+  p[3] = native & 0xFF;
+#endif
+}
+
+static PIK_INLINE void StoreLE32(const uint32_t native, uint8_t* p) {
+#if PIK_BYTE_ORDER_LITTLE
+  const uint32_t little = native;
+  memcpy(p, &little, 4);
+#else
+  // Byte-order-independent - can't assume this machine is big endian.
+  p[3] = native >> 24;
+  p[2] = (native >> 16) & 0xFF;
+  p[1] = (native >> 8) & 0xFF;
+  p[0] = native & 0xFF;
+#endif
+}
+
+// Big/Little Endian order.
+struct OrderBE {};
+struct OrderLE {};
+
+// Wrappers for calling from generic code.
+static PIK_INLINE void Store16(OrderBE, const uint32_t native, uint8_t* p) {
+  return StoreBE16(native, p);
+}
+
+static PIK_INLINE void Store16(OrderLE, const uint32_t native, uint8_t* p) {
+  return StoreLE16(native, p);
+}
+
+static PIK_INLINE void Store32(OrderBE, const uint32_t native, uint8_t* p) {
+  return StoreBE32(native, p);
+}
+
+static PIK_INLINE void Store32(OrderLE, const uint32_t native, uint8_t* p) {
+  return StoreLE32(native, p);
+}
+
+static PIK_INLINE uint32_t Load16(OrderBE, const uint8_t* p) {
+  return LoadBE16(p);
+}
+
+static PIK_INLINE uint32_t Load16(OrderLE, const uint8_t* p) {
+  return LoadLE16(p);
+}
+
+static PIK_INLINE uint32_t Load32(OrderBE, const uint8_t* p) {
+  return LoadBE32(p);
+}
+
+static PIK_INLINE uint32_t Load32(OrderLE, const uint8_t* p) {
+  return LoadLE32(p);
+}
+
+#endif  // PIK_BYTE_ORDER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/cache_aligned.cc b/codec/L2/demos/pikEnc/host/pik/cache_aligned.cc
new file mode 100755
index 0000000000..7920078eb0
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/cache_aligned.cc
@@ -0,0 +1,123 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/cache_aligned.h"
+
+#include <sys/mman.h>
+#include <atomic>
+
+namespace pik {
+namespace {
+
+#pragma pack(push, 1)
+struct AllocationHeader {
+  void* allocated;
+  size_t allocated_size;
+  uint8_t left_padding[kMaxVectorSize];
+};
+#pragma pack(pop)
+
+std::atomic<uint64_t> num_allocations{0};
+std::atomic<uint64_t> bytes_in_use{0};
+std::atomic<uint64_t> max_bytes_in_use{0};
+
+}  // namespace
+
+void CacheAligned::PrintStats() {
+  printf("Allocations: %zu (max bytes in use: %E)\n",
+         size_t(num_allocations.load(std::memory_order_relaxed)),
+         double(max_bytes_in_use.load(std::memory_order_relaxed)));
+}
+
+size_t CacheAligned::NextOffset() {
+  static std::atomic<uint32_t> next{0};
+  constexpr uint32_t kGroups = CacheAligned::kAlias / CacheAligned::kAlignment;
+  const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
+  return CacheAligned::kAlignment * group;
+}
+
+// Disabled: slower than malloc + alignment.
+#define PIK_USE_MMAP 0
+
+void* CacheAligned::Allocate(const size_t payload_size, size_t offset) {
+  PIK_ASSERT(payload_size < (1ULL << 63));
+  PIK_ASSERT((offset % kAlignment == 0) && offset <= kAlias);
+
+  // What: | misalign | unused | AllocationHeader |payload
+  // Size: |<= kAlias | offset |                  |payload_size
+  //       ^allocated.^aligned.^header............^payload
+  // The header must immediately precede payload, which must remain aligned.
+  // To avoid wasting space, the header resides at the end of `unused`,
+  // which therefore cannot be empty (offset == 0).
+  if (offset == 0) {
+    offset = kAlignment;  // = round_up(sizeof(AllocationHeader), kAlignment)
+    static_assert(sizeof(AllocationHeader) <= kAlignment, "Else: round up");
+  }
+
+#if PIK_USE_MMAP
+  const size_t allocated_size = offset + payload_size;
+  const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE;
+  void* allocated =
+      mmap(nullptr, allocated_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+  if (allocated == MAP_FAILED) return nullptr;
+  const uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated);
+#else
+  const size_t allocated_size = kAlias + offset + payload_size;
+  void* allocated = malloc(allocated_size);
+  if (allocated == nullptr) return nullptr;
+  // Always round up even if already aligned - we already asked for kAlias
+  // extra bytes and there's no way to give them back.
+  uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias;
+  static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2");
+  static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias");
+  aligned &= ~(kAlias - 1);
+#endif
+
+  // Update statistics (#allocations and max bytes in use)
+  num_allocations.fetch_add(1, std::memory_order_relaxed);
+  const uint64_t prev_bytes =
+      bytes_in_use.fetch_add(allocated_size, std::memory_order_acq_rel);
+  uint64_t expected_max = max_bytes_in_use.load(std::memory_order_acq_rel);
+  for (;;) {
+    const uint64_t desired =
+        std::max(expected_max, prev_bytes + allocated_size);
+    if (max_bytes_in_use.compare_exchange_strong(expected_max, desired,
+                                                 std::memory_order_acq_rel)) {
+      break;
+    }
+  }
+
+  const uintptr_t payload = aligned + offset;  // still aligned
+
+  // Stash `allocated` and payload_size inside header for use by Free().
+  AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
+  header->allocated = allocated;
+  header->allocated_size = allocated_size;
+
+  return PIK_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), 64);
+}
+
+void CacheAligned::Free(const void* aligned_pointer) {
+  if (aligned_pointer == nullptr) {
+    return;
+  }
+  const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+  PIK_ASSERT(payload % kAlignment == 0);
+  const AllocationHeader* header =
+      reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+  // Subtract (2's complement negation).
+  bytes_in_use.fetch_add(~header->allocated_size + 1,
+                         std::memory_order_acq_rel);
+
+#if PIK_USE_MMAP
+  munmap(header->allocated, header->allocated_size);
+#else
+  free(header->allocated);
+#endif
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/cache_aligned.h b/codec/L2/demos/pikEnc/host/pik/cache_aligned.h
new file mode 100755
index 0000000000..cd6481bc5d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/cache_aligned.h
@@ -0,0 +1,108 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CACHE_ALIGNED_H_
+#define PIK_CACHE_ALIGNED_H_
+
+// Memory allocator with support for alignment + misalignment.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>  // memcpy
+#include <atomic>
+#include <memory>
+
+#include "pik/arch_specific.h"
+#include "pik/compiler_specific.h"
+#include "pik/simd/simd.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Functions that depend on the cache line size.
+class CacheAligned {
+ public:
+  static void PrintStats();
+
+  static constexpr size_t kPointerSize = sizeof(void*);
+  static constexpr size_t kCacheLineSize = 64;
+  // To avoid RFOs, match L2 fill size (pairs of lines).
+  static constexpr size_t kAlignment = 2 * kCacheLineSize;
+  // Minimum multiple for which cache set conflicts and/or loads blocked by
+  // preceding stores can occur.
+  static constexpr size_t kAlias = 2048;
+
+  // Returns a 'random' (cyclical) offset suitable for Allocate.
+  static size_t NextOffset();
+
+  // Returns null or memory whose address is congruent to `offset` (mod kAlias).
+  // This reduces cache conflicts and load/store stalls, especially with large
+  // allocations that would otherwise have similar alignments. At least
+  // `payload_size` (which can be zero) bytes will be accessible.
+  static void* Allocate(const size_t payload_size, size_t offset);
+
+  static void* Allocate(const size_t payload_size) {
+    return Allocate(payload_size, NextOffset());
+  }
+
+  static void Free(const void* aligned_pointer);
+
+  // Overwrites `to` without loading it into cache (read-for-ownership).
+  // Copies kCacheLineSize bytes from/to naturally aligned addresses.
+  template <typename T>
+  static SIMD_ATTR void StreamCacheLine(const T* PIK_RESTRICT from,
+                                        T* PIK_RESTRICT to) {
+    static_assert(16 % sizeof(T) == 0, "T must fit in a lane");
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    constexpr size_t kLanes = 16 / sizeof(T);
+    const SIMD_PART(T, kLanes) d;
+    PIK_COMPILER_FENCE;
+    const auto v0 = load(d, from + 0 * kLanes);
+    const auto v1 = load(d, from + 1 * kLanes);
+    const auto v2 = load(d, from + 2 * kLanes);
+    const auto v3 = load(d, from + 3 * kLanes);
+    static_assert(sizeof(v0) * 4 == kCacheLineSize, "Wrong #vectors");
+    // Fences prevent the compiler from reordering loads/stores, which may
+    // interfere with write-combining.
+    PIK_COMPILER_FENCE;
+    stream(v0, d, to + 0 * kLanes);
+    stream(v1, d, to + 1 * kLanes);
+    stream(v2, d, to + 2 * kLanes);
+    stream(v3, d, to + 3 * kLanes);
+    PIK_COMPILER_FENCE;
+#else
+    memcpy(to, from, kCacheLineSize);
+#endif
+  }
+};
+
+// Avoids the need for a function pointer (deleter) in CacheAlignedUniquePtr.
+struct CacheAlignedDeleter {
+  void operator()(uint8_t* aligned_pointer) const {
+    return CacheAligned::Free(aligned_pointer);
+  }
+};
+
+using CacheAlignedUniquePtr = std::unique_ptr<uint8_t[], CacheAlignedDeleter>;
+
+// Does not invoke constructors.
+static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes) {
+  return CacheAlignedUniquePtr(
+      static_cast<uint8_t*>(CacheAligned::Allocate(bytes)),
+      CacheAlignedDeleter());
+}
+
+static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes,
+                                                  const size_t offset) {
+  return CacheAlignedUniquePtr(
+      static_cast<uint8_t*>(CacheAligned::Allocate(bytes, offset)),
+      CacheAlignedDeleter());
+}
+
+}  // namespace pik
+
+#endif  // PIK_CACHE_ALIGNED_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/chroma_from_luma.cc b/codec/L2/demos/pikEnc/host/pik/chroma_from_luma.cc
new file mode 100755
index 0000000000..7e838bd0ce
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/chroma_from_luma.cc
@@ -0,0 +1,508 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/chroma_from_luma.h"
+
+#include <cstdint>
+
+#include "pik/common.h"
+#include "pik/profiler.h"
+#include "pik/quantizer.h"
+#include "pik/simd/simd.h"
+
+namespace pik {
+namespace {
+
+#define PIK_CFL_VERBOSE 0
+#if PIK_CFL_VERBOSE
+// Which block/coefficient to print (absolute, within entire image)
+constexpr size_t kX = 38;
+constexpr size_t kY = 8;
+constexpr size_t kK = 1;
+#endif
+
+// Op{Decorrelate,Restore} are used as template parameters to unify the code
+// that applies or removes decorrelation.
+
+struct OpDecorrelate {
+  static const char* Name() { return "D"; }
+
+  template <class V>
+  SIMD_ATTR V operator()(const V y, const V neg_r, const V x) const {
+    // TODO(janwas): FMA
+    return x - y * neg_r;
+  }
+};
+
+struct OpRestore {
+  static const char* Name() { return "R"; }
+
+  template <class V>
+  SIMD_ATTR V operator()(const V y, const V neg_r, const V x) const {
+    // TODO(janwas): FMA
+    return y * neg_r + x;
+  }
+};
+
+struct OpCopy {
+  static const char* Name() { return "C"; }
+
+  template <class V>
+  SIMD_ATTR V operator()(const V y, const V neg_r, const V x) const {
+    return x;
+  }
+};
+
+// Memory (IIR) for previously restored x/y/b coefficients.
+struct Accumulators {
+  void Add(const float prev_x, const float prev_y, const float prev_b) {
+    sum_xp_ += prev_x * prev_y;
+    sum_yp_ += prev_y * prev_y;
+    sum_bp_ += prev_b * prev_y;
+  }
+
+  // Slightly better (in terms of residual/dct ratio) to avoid slow ramp up
+  // via AddNext; in the first call, overwrite immediately.
+  bool IsZero() const { return sum_yp_ == 0.0f; }
+
+  void AddNext(const float next_weight, const Accumulators& next) {
+    const float prev_weight = 1.0f - next_weight;
+    sum_xp_ = next_weight * next.sum_xp_ + prev_weight * sum_xp_;
+    sum_yp_ = next_weight * next.sum_yp_ + prev_weight * sum_yp_;
+    sum_bp_ = next_weight * next.sum_bp_ + prev_weight * sum_bp_;
+  }
+
+  // Computes r_x,r_b = correlation of yp to xp,bp
+  void EstimateCorrelation(float* PIK_RESTRICT r0_x,
+                           float* PIK_RESTRICT r0_b) const {
+    const float rcp0_yp = sum_yp_ == 0.0f ? 0.0f : 1.0f / sum_yp_;
+    *r0_x = sum_xp_ * rcp0_yp;
+    *r0_b = sum_bp_ * rcp0_yp;
+    // No need to clamp - the sum+memory avoids near-zero Y and |r| < 3.
+  }
+
+  // private:
+  float sum_xp_ = 0.0f;
+  float sum_yp_ = 0.0f;
+  float sum_bp_ = 0.0f;
+};
+
+// Block policy allows the same function to handle AC block or DC coefficient.
+
+// kDCTBlockSize coefficients, first (DC) is invalid.
+class BlockAC {
+ public:
+  static size_t PosFromBX(size_t bx) { return bx * kDCTBlockSize; }
+
+  // Estimates correlation.
+  template <class Op>
+  static SIMD_ATTR PIK_INLINE void Adaptive(
+      const size_t bx, const size_t by,
+
+      const float* PIK_RESTRICT in_y, const float* in_x, const float* in_b,
+
+      const float* PIK_RESTRICT prev_y, const float* PIK_RESTRICT prev_x,
+      const float* PIK_RESTRICT prev_b,
+
+      Accumulators* PIK_RESTRICT acc_dc, Accumulators* PIK_RESTRICT acc_ac,
+      float* out_x, float* out_b, CFL_Stats* stats) {
+    // Can only estimate from low frequencies (189) because the HF predictor
+    // changes HF coefficients.
+    Accumulators next_ac;
+    next_ac.Add(prev_x[1], prev_y[1], prev_b[1]);
+    next_ac.Add(prev_x[8], prev_y[8], prev_b[8]);
+    next_ac.Add(prev_x[9], prev_y[9], prev_b[9]);
+
+    float r_x, r_b;
+    if (acc_ac->IsZero()) {
+      *acc_ac = next_ac;
+    } else {
+      acc_ac->AddNext(0.25f, next_ac);
+    }
+
+    acc_ac->EstimateCorrelation(&r_x, &r_b);
+    if (stats != nullptr) {
+      stats->rx.Notify(r_x);
+      stats->rb.Notify(r_b);
+    }
+
+    ApplyOp<Op>(bx, by, in_y, in_x, in_b, r_x, r_b, out_x, out_b, stats);
+  }
+
+  // Uses predetermined correlation.
+  template <class Op>
+  static SIMD_ATTR PIK_INLINE void Hardcoded(const size_t bx, const size_t by,
+                                             const float* PIK_RESTRICT in_y,
+                                             const float* in_x,
+                                             const float* in_b, float* out_x,
+                                             float* out_b, CFL_Stats* stats) {
+    const float r_x = 0.001f;
+    const float r_b = 0.93f;
+    ApplyOp<Op>(bx, by, in_y, in_x, in_b, r_x, r_b, out_x, out_b, stats);
+  }
+
+  static void Quantize(const size_t c, const Quantizer& quantizer,
+                       const uint8_t quant_table, const int32_t quant_ac,
+                       const float* PIK_RESTRICT from, const size_t from_stride,
+                       float* PIK_RESTRICT to, size_t const to_stride) {
+    const AcStrategy acs(AcStrategy::Type::DCT, 0);
+    PIK_ASSERT(acs.IsFirstBlock());
+    quantizer.QuantizeRoundtripBlockAC(
+        c, quant_table, quant_ac, acs.GetQuantKind(), acs.covered_blocks_x(),
+        acs.covered_blocks_y(), from, from_stride, to, to_stride);
+  }
+
+ private:
+  // Decorrelates/restores one x and b block. in_x,b may alias out_x,b.
+  template <class Op>
+  static SIMD_ATTR PIK_INLINE void ApplyOp(const size_t bx, const size_t by,
+                                           const float* PIK_RESTRICT in_y,
+                                           const float* in_x, const float* in_b,
+                                           const float r_x, const float r_b,
+                                           float* out_x, float* out_b,
+                                           CFL_Stats* stats) {
+#if PIK_CFL_VERBOSE
+    const float saved_x = in_x[kK];
+    const float saved_b = in_b[kK];
+#endif
+
+    for (size_t k = 0; k < kDCTBlockSize; ++k) {
+      out_x[k] = Op()(in_y[k], r_x, in_x[k]);
+      out_b[k] = Op()(in_y[k], r_b, in_b[k]);
+    }
+
+#if PIK_CFL_VERBOSE
+    if (bx == kX && by == kY) {
+      printf("  %s: in %9.4f %9.4f  qY %.4f  r %.4f %.4f  out %.4f %.4f\n",
+             Op::Name(), saved_x, saved_b, in_y[kK], r_x, r_b, out_x[kK],
+             out_b[kK]);
+    }
+#endif
+  }
+};
+
+// Single coefficient
+class BlockDC {
+ public:
+  static size_t PosFromBX(size_t bx) { return bx; }
+
+  // Estimates correlation.
+  template <class Op>
+  static SIMD_ATTR PIK_INLINE void Adaptive(
+      const size_t bx, const size_t by,
+
+      const float* PIK_RESTRICT in_y, const float* in_x, const float* in_b,
+
+      const float* PIK_RESTRICT prev_y, const float* PIK_RESTRICT prev_x,
+      const float* PIK_RESTRICT prev_b,
+
+      Accumulators* PIK_RESTRICT acc_dc, Accumulators* PIK_RESTRICT acc_ac,
+      float* out_x, float* out_b, CFL_Stats* stats) {
+    Accumulators next_dc;
+    next_dc.Add(prev_x[0], prev_y[0], prev_b[0]);
+    if (acc_dc->IsZero()) {
+      *acc_dc = next_dc;
+    } else {
+      acc_dc->AddNext(0.25f, next_dc);
+    }
+    float r_x, r_b;
+    // Adaptive estimator still helpful for DC (better than constant r).
+    acc_dc->EstimateCorrelation(&r_x, &r_b);
+    if (stats != nullptr) {
+      stats->rx.Notify(r_x);
+      stats->rb.Notify(r_b);
+    }
+
+    ApplyOp<Op>(bx, by, in_y, in_x, in_b, r_x, r_b, out_x, out_b, stats);
+  }
+
+  // Uses predetermined correlation.
+  template <class Op>
+  static SIMD_ATTR PIK_INLINE void Hardcoded(const size_t bx, const size_t by,
+                                             const float* PIK_RESTRICT in_y,
+                                             const float* in_x,
+                                             const float* in_b, float* out_x,
+                                             float* out_b, CFL_Stats* stats) {
+    const float r_x = 0.005f;
+    const float r_b = 0.93f;
+    ApplyOp<Op>(bx, by, in_y, in_x, in_b, r_x, r_b, out_x, out_b, stats);
+  }
+
+  static void Quantize(const size_t c, const Quantizer& quantizer,
+                       uint8_t quant_table, const int32_t quant_ac,
+                       const float* PIK_RESTRICT from, const size_t from_stride,
+                       float* PIK_RESTRICT to, size_t const to_stride) {
+    // Always use DCT8 quantization kind for DC
+    const float mul = quantizer.DequantMatrix(0, kQuantKindDCT8, c)[0] *
+                      quantizer.inv_quant_dc();
+    *to = quantizer.QuantizeDC(c, *from) * mul;
+  }
+
+ private:
+  // Decorrelates/restores one x and b block. in_x,b may alias out_x,b.
+  template <class Op>
+  static SIMD_ATTR PIK_INLINE void ApplyOp(const size_t bx, const size_t by,
+                                           const float* PIK_RESTRICT in_y,
+                                           const float* in_x, const float* in_b,
+                                           const float r_x, const float r_b,
+                                           float* out_x, float* out_b,
+                                           CFL_Stats* stats) {
+    const size_t k = 0;
+    out_x[k] = Op()(in_y[k], r_x, in_x[k]);
+    out_b[k] = Op()(in_y[k], r_b, in_b[k]);
+  }
+};
+
+// Fills quantized `residual_xb` and `restored_xb`. `rect_by` < `rect.ysize()`.
+template <class Block>
+SIMD_ATTR void DecorrelateRow(
+    const ImageF& quantized_y, const Image3F& exact_xb, const Rect& rect,
+    const size_t rect_by, Accumulators* PIK_RESTRICT acc_dc,
+    Accumulators* PIK_RESTRICT acc_ac, Accumulators* PIK_RESTRICT acc_dc2,
+    Accumulators* PIK_RESTRICT acc_ac2, const Quantizer& quantizer,
+    const uint8_t quant_table, Image3F* PIK_RESTRICT residual_xb,
+    Image3F* PIK_RESTRICT restored_xb, CFL_Stats* stats) {
+  PROFILER_FUNC;
+
+  // WARNING: do not use rect.*Row because images are in DCT layout.
+  const size_t by = rect.y0() + rect_by;
+  const float* PIK_RESTRICT row_quantized_y = quantized_y.ConstRow(by);
+  const float* PIK_RESTRICT row_exact_x = exact_xb.ConstPlaneRow(0, by);
+  const float* PIK_RESTRICT row_exact_b = exact_xb.ConstPlaneRow(2, by);
+  const int32_t* PIK_RESTRICT row_quant = quantizer.RawQuantField().Row(by);
+  float* PIK_RESTRICT row_residual_x = residual_xb->PlaneRow(0, by);
+  float* PIK_RESTRICT row_residual_b = residual_xb->PlaneRow(2, by);
+  float* PIK_RESTRICT row_restored_x = restored_xb->PlaneRow(0, by);
+  float* PIK_RESTRICT row_restored_b = restored_xb->PlaneRow(2, by);
+
+  // Exact residuals, will be quantized and stored to residual_xb.
+  // TODO(janwas): store in separate image to allow repeated quantization
+  SIMD_ALIGN float residual_x[kDCTBlockSize];
+  SIMD_ALIGN float residual_b[kDCTBlockSize];
+  const size_t from_stride = kDCTBlockSize;
+
+  // Leftmost block
+  {
+    const size_t rect_bx = 0;
+    const size_t bx = rect.x0() + rect_bx;
+    const size_t pos = Block::PosFromBX(bx);  // coefficient
+
+    if (rect_by == 0) {  // Top-left block: use default correlation
+      // Fill residual
+      Block::template Hardcoded<OpDecorrelate>(
+          bx, by, row_quantized_y + pos, row_exact_x + pos, row_exact_b + pos,
+          residual_x, residual_b, stats);
+
+      // residual => row_residual
+      Block::Quantize(/*c=*/0, quantizer, quant_table, row_quant[bx],
+                      residual_x, from_stride, row_residual_x + pos,
+                      residual_xb->PixelsPerRow());
+      Block::Quantize(/*c=*/2, quantizer, quant_table, row_quant[bx],
+                      residual_b, from_stride, row_residual_b + pos,
+                      residual_xb->PixelsPerRow());
+
+      // row_residual => row_restored
+      Block::template Hardcoded<OpRestore>(
+          bx, by, row_quantized_y + pos, row_residual_x + pos,
+          row_residual_b + pos, row_restored_x + pos, row_restored_b + pos,
+          stats);
+    } else {  // Estimate from north block
+      const size_t yp = by - 1;
+      const float* PIK_RESTRICT row_prev_y = quantized_y.ConstRow(yp);
+      const float* PIK_RESTRICT row_prev_x = restored_xb->ConstPlaneRow(0, yp);
+      const float* PIK_RESTRICT row_prev_b = restored_xb->ConstPlaneRow(2, yp);
+      // Fill residual
+      Block::template Adaptive<OpDecorrelate>(
+          bx, by, row_quantized_y + pos, row_exact_x + pos, row_exact_b + pos,
+          row_prev_y + pos, row_prev_x + pos, row_prev_b + pos, acc_dc, acc_ac,
+          residual_x, residual_b, stats);
+
+      // residual => row_residual
+      Block::Quantize(/*c=*/0, quantizer, quant_table, row_quant[bx],
+                      residual_x, from_stride, row_residual_x + pos,
+                      residual_xb->PixelsPerRow());
+      Block::Quantize(/*c=*/2, quantizer, quant_table, row_quant[bx],
+                      residual_b, from_stride, row_residual_b + pos,
+                      residual_xb->PixelsPerRow());
+
+      // row_residual => row_restored
+      Block::template Adaptive<OpRestore>(
+          bx, by, row_quantized_y + pos, row_residual_x + pos,
+          row_residual_b + pos, row_prev_y + pos, row_prev_x + pos,
+          row_prev_b + pos, acc_dc2, acc_ac2, row_restored_x + pos,
+          row_restored_b + pos, stats);
+    }
+  }
+
+  // bx > 0: estimate from west block
+  for (size_t rect_bx = 1; rect_bx < rect.xsize(); ++rect_bx) {
+    const size_t bx = rect.x0() + rect_bx;
+    const size_t pos = Block::PosFromBX(bx);
+    const size_t prev = pos - Block::PosFromBX(1);
+    // Fill residual
+    Block::template Adaptive<OpDecorrelate>(
+        bx, by, row_quantized_y + pos, row_exact_x + pos, row_exact_b + pos,
+        row_quantized_y + prev, row_restored_x + prev, row_restored_b + prev,
+        acc_dc, acc_ac, residual_x, residual_b, stats);
+
+    // residual => row_residual
+    Block::Quantize(/*c=*/0, quantizer, quant_table, row_quant[bx], residual_x,
+                    from_stride, row_residual_x + pos,
+                    residual_xb->PixelsPerRow());
+    Block::Quantize(/*c=*/2, quantizer, quant_table, row_quant[bx], residual_b,
+                    from_stride, row_residual_b + pos,
+                    residual_xb->PixelsPerRow());
+
+    // row_residual => row_restored
+    Block::template Adaptive<OpRestore>(
+        bx, by, row_quantized_y + pos, row_residual_x + pos,
+        row_residual_b + pos, row_quantized_y + prev, row_restored_x + prev,
+        row_restored_b + prev, acc_dc2, acc_ac2, row_restored_x + pos,
+        row_restored_b + pos, stats);
+  }
+}
+
+// residual_xb may be aliased with restored_xb.
+template <class Block>
+SIMD_ATTR void RestoreRow(const ImageF& quantized_y, const Image3F& residual_xb,
+                          const Rect& rect, const size_t rect_by,
+                          Accumulators* PIK_RESTRICT acc_dc,
+                          Accumulators* PIK_RESTRICT acc_ac,
+                          Image3F* restored_xb, CFL_Stats* stats) {
+  PROFILER_FUNC;
+
+  // WARNING: do not use rect.*Row because images are in DCT layout.
+  const size_t by = rect.y0() + rect_by;
+  const float* PIK_RESTRICT row_quantized_y = quantized_y.ConstRow(by);
+  const float* row_residual_x = residual_xb.PlaneRow(0, by);
+  const float* row_residual_b = residual_xb.PlaneRow(2, by);
+  float* row_restored_x = restored_xb->PlaneRow(0, by);
+  float* row_restored_b = restored_xb->PlaneRow(2, by);
+
+  // Leftmost block
+  {
+    const size_t rect_bx = 0;
+    const size_t bx = rect.x0() + rect_bx;
+    const size_t pos = Block::PosFromBX(bx);  // coefficient
+
+    if (rect_by == 0) {  // Top-left block: use default correlation
+      Block::template Hardcoded<OpRestore>(
+          bx, by, row_quantized_y + pos, row_residual_x + pos,
+          row_residual_b + pos, row_restored_x + pos, row_restored_b + pos,
+          stats);
+    } else {  // Estimate correlation from north block
+      const size_t yp = by - 1;
+      Block::template Adaptive<OpRestore>(
+          bx, by, row_quantized_y + pos, row_residual_x + pos,
+          row_residual_b + pos, quantized_y.ConstRow(yp) + pos,
+          restored_xb->ConstPlaneRow(0, yp) + pos,
+          restored_xb->ConstPlaneRow(2, yp) + pos, acc_dc, acc_ac,
+          row_restored_x + pos, row_restored_b + pos, stats);
+    }
+  }
+
+  // bx > 0: estimate from west block
+  for (size_t rect_bx = 1; rect_bx < rect.xsize(); ++rect_bx) {
+    const size_t bx = rect.x0() + rect_bx;
+    const size_t pos = Block::PosFromBX(bx);  // coefficient
+    const size_t prev = pos - Block::PosFromBX(1);
+    Block::template Adaptive<OpRestore>(
+        bx, by, row_quantized_y + pos, row_residual_x + pos,
+        row_residual_b + pos, row_quantized_y + prev, row_restored_x + prev,
+        row_restored_b + prev, acc_dc, acc_ac, row_restored_x + pos,
+        row_restored_b + pos, stats);
+  }
+}
+
+// `image` is Image3F/ImageF, either DCT layout or DC.
+template <class Block, class ImageT>
+void VerifyRectInside(const Rect& rect, const ImageT& image) {
+  const size_t x_end = Block::PosFromBX(rect.x0() + rect.xsize());
+  const size_t y_end = rect.y0() + rect.ysize();  // blocks
+  if (x_end > image.xsize() || y_end > image.ysize()) {
+    PIK_ABORT("Rect(blocks) %zu,%zu %zux%zu Image %zux%zu\n", rect.x0(),
+              rect.y0(), rect.xsize(), rect.ysize(), image.xsize(),
+              image.ysize());
+  }
+}
+
+template <class Block>
+SIMD_ATTR void DecorrelateT(const ImageF& quantized_y, const Image3F& exact_xb,
+                            const Rect& rect, const Quantizer& quantizer,
+                            size_t quant_table,
+                            Image3F* PIK_RESTRICT residual_xb,
+                            Image3F* PIK_RESTRICT restored_xb,
+                            CFL_Stats* stats) {
+  VerifyRectInside<Block>(rect, quantized_y);
+  VerifyRectInside<Block>(rect, exact_xb);
+  VerifyRectInside<Block>(rect, *residual_xb);
+  VerifyRectInside<Block>(rect, *restored_xb);
+  // If these are the same, Update() can't compare before and after.
+  PIK_ASSERT(&exact_xb != residual_xb);
+
+  Accumulators acc_dc, acc_ac;
+  Accumulators acc_dc2, acc_ac2;
+  for (size_t rect_by = 0; rect_by < rect.ysize(); ++rect_by) {
+    DecorrelateRow<Block>(quantized_y, exact_xb, rect, rect_by, &acc_dc,
+                          &acc_ac, &acc_dc2, &acc_ac2, quantizer, quant_table,
+                          residual_xb, restored_xb, stats);
+  }
+
+  if (stats != nullptr) {
+    stats->Update(*residual_xb, *restored_xb, rect, Block::PosFromBX(1));
+  }
+}
+
+template <class Block>
+SIMD_ATTR void RestoreT(const ImageF& quantized_y, const Image3F& residual_xb,
+                        const Rect& rect, Image3F* restored_xb,
+                        CFL_Stats* stats) {
+  VerifyRectInside<Block>(rect, quantized_y);
+  VerifyRectInside<Block>(rect, residual_xb);
+  VerifyRectInside<Block>(rect, *restored_xb);
+
+  Accumulators acc_dc, acc_ac;
+  for (size_t rect_by = 0; rect_by < rect.ysize(); ++rect_by) {
+    RestoreRow<Block>(quantized_y, residual_xb, rect, rect_by, &acc_dc, &acc_ac,
+                      restored_xb, stats);
+  }
+}
+
+}  // namespace
+
+SIMD_ATTR void DecorrelateAC(const ImageF& quantized_y, const Image3F& exact_xb,
+                             const Rect& rect, const Quantizer& quantizer,
+                             uint8_t quant_table,
+                             Image3F* PIK_RESTRICT residual_xb,
+                             Image3F* PIK_RESTRICT restored_xb,
+                             CFL_Stats* stats) {
+  DecorrelateT<BlockAC>(quantized_y, exact_xb, rect, quantizer, quant_table,
+                        residual_xb, restored_xb, stats);
+}
+
+SIMD_ATTR void DecorrelateDC(const ImageF& quantized_y, const Image3F& exact_xb,
+                             const Rect& rect, const Quantizer& quantizer,
+                             uint8_t quant_table,
+                             Image3F* PIK_RESTRICT residual_xb,
+                             Image3F* PIK_RESTRICT restored_xb,
+                             CFL_Stats* stats) {
+  DecorrelateT<BlockDC>(quantized_y, exact_xb, rect, quantizer, quant_table,
+                        residual_xb, restored_xb, stats);
+}
+
+SIMD_ATTR void RestoreAC(const ImageF& quantized_y, const Image3F& residual_xb,
+                         const Rect& rect, Image3F* restored_xb,
+                         CFL_Stats* stats) {
+  RestoreT<BlockAC>(quantized_y, residual_xb, rect, restored_xb, stats);
+}
+
+SIMD_ATTR void RestoreDC(const ImageF& quantized_y, const Image3F& residual_xb,
+                         const Rect& rect, Image3F* restored_xb,
+                         CFL_Stats* stats) {
+  RestoreT<BlockDC>(quantized_y, residual_xb, rect, restored_xb, stats);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/chroma_from_luma.h b/codec/L2/demos/pikEnc/host/pik/chroma_from_luma.h
new file mode 100755
index 0000000000..d94058aac0
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/chroma_from_luma.h
@@ -0,0 +1,55 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CHROMA_FROM_LUMA_H_
+#define PIK_CHROMA_FROM_LUMA_H_
+
+// Chroma-from-luma without side information.
+
+#include "pik/chroma_from_luma_fwd.h"  // CFL_Stats
+#include "pik/common.h"
+#include "pik/descriptive_statistics.h"
+#include "pik/image.h"
+#include "pik/quantizer.h"
+
+namespace pik {
+
+// Sets X and B channels while leaving Y unchanged:
+//   `residual_xb` to quantize(`exact_xb` - r * `quantized_y`),
+//   `restored_xb` to `residual_xb` + r * `quantized_y`,
+// r is computed from previous (in scan order) parts of `restored_xb`. All
+// images are in DCT layout, and `rect` is in units of blocks (must be the
+// same as a subsequent call to RestoreAC). To skip statistics gathering, set
+// `stats` = nullptr.
+void DecorrelateAC(const ImageF& quantized_y, const Image3F& exact_xb,
+                   const Rect& rect, const Quantizer& quantizer,
+                   uint8_t quant_table, Image3F* PIK_RESTRICT residual_xb,
+                   Image3F* PIK_RESTRICT restored_xb, CFL_Stats* stats);
+
+// Sets X and B channels while leaving Y unchanged:
+//   `restored_xb` to `residual_xb` + r * `quantized_y`. Thus, `restored_xb`
+// matches the image returned by DecorrelateAC. All images are in DCT layout,
+// and `rect` is in blocks. `residual_xb` may alias `restored_xb`. To skip
+// statistics gathering, set `stats` = nullptr.
+SIMD_ATTR void RestoreAC(const ImageF& quantized_y, const Image3F& residual_xb,
+                         const Rect& rect, Image3F* restored_xb,
+                         CFL_Stats* stats);
+
+// As above, but all images contain only DC coefficients.
+SIMD_ATTR void DecorrelateDC(const ImageF& quantized_y, const Image3F& exact_xb,
+                             const Rect& rect, const Quantizer& quantizer,
+                             uint8_t quant_table,
+                             Image3F* PIK_RESTRICT residual_xb,
+                             Image3F* PIK_RESTRICT restored_xb,
+                             CFL_Stats* stats);
+
+SIMD_ATTR void RestoreDC(const ImageF& quantized_y, const Image3F& residual_xb,
+                         const Rect& rect, Image3F* restored_xb,
+                         CFL_Stats* stats);
+
+}  // namespace pik
+
+#endif  // PIK_CHROMA_FROM_LUMA_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/chroma_from_luma_fwd.h b/codec/L2/demos/pikEnc/host/pik/chroma_from_luma_fwd.h
new file mode 100755
index 0000000000..66a312ffd8
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/chroma_from_luma_fwd.h
@@ -0,0 +1,127 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CHROMA_FROM_LUMA_FWD_H_
+#define PIK_CHROMA_FROM_LUMA_FWD_H_
+
+// Chroma-from-luma statistics (separate header avoids circular dependencies)
+
+#include "pik/descriptive_statistics.h"
+#include "pik/image.h"
+
+namespace pik {
+
+struct CFL_Stats {
+  CFL_Stats() {
+    for (size_t k = 0; k < 64; ++k) {
+      sum_abs_residual_x[k] = sum_abs_residual_b[k] = sum_abs_restored_x[k] =
+          sum_abs_restored_b[k] = 0.0;
+      smaller_x[k] = smaller_b[k] = 0;
+      total[k] = 0;
+    }
+  }
+
+  void Update(const Image3F& residuals, const Image3F& restored,
+              const Rect& rect, const size_t bx_mul) {
+    PIK_ASSERT(&residuals != &restored);
+
+    for (size_t rect_by = 0; rect_by < rect.ysize(); ++rect_by) {
+      const size_t by = rect.y0() + rect_by;
+      const float* row_restored_x = restored.ConstPlaneRow(0, by);
+      const float* row_restored_b = restored.ConstPlaneRow(2, by);
+      const float* row_residual_x = residuals.ConstPlaneRow(0, by);
+      const float* row_residual_b = residuals.ConstPlaneRow(2, by);
+
+      for (size_t rect_bx = 0; rect_bx < rect.xsize(); ++rect_bx) {
+        const size_t bx = rect.x0() + rect_bx;
+        for (size_t k = 0; k < bx_mul; ++k) {
+          const size_t x = bx * bx_mul + k;
+          total[k] += 1;
+
+          const float abs_residual_x = std::abs(row_residual_x[x]);
+          const float abs_restored_x = std::abs(row_restored_x[x]);
+          sum_abs_residual_x[k] += abs_residual_x;
+          sum_abs_restored_x[k] += abs_restored_x;
+          smaller_x[k] += abs_residual_x <= abs_restored_x;
+          if (abs_restored_x > 1E-6f) {
+            const float ratio = abs_residual_x / abs_restored_x;
+            if (ratio > 1E6) {
+              printf("ratio %E restored %.3f res %.5f at bx %zu(%zu) k %zu\n",
+                     ratio, abs_restored_x, abs_residual_x, bx, bx_mul, k);
+            }
+            ratio_x.Notify(ratio);
+          }
+
+          const float abs_residual_b = std::abs(row_residual_b[x]);
+          const float abs_restored_b = std::abs(row_restored_b[x]);
+          sum_abs_residual_b[k] += abs_residual_b;
+          sum_abs_restored_b[k] += abs_restored_b;
+          smaller_b[k] += abs_residual_b <= abs_restored_b;
+          if (abs_restored_b > 1E-6f) {
+            const float ratio = abs_residual_b / abs_restored_b;
+            ratio_b.Notify(ratio);
+          }
+        }
+      }
+    }
+  }
+
+  void Assimilate(const CFL_Stats& other) {
+    rx.Assimilate(other.rx);
+    rb.Assimilate(other.rb);
+
+    ratio_x.Assimilate(other.ratio_x);
+    ratio_b.Assimilate(other.ratio_b);
+
+    for (size_t k = 0; k < 64; ++k) {
+      sum_abs_restored_x[k] += other.sum_abs_restored_x[k];
+      sum_abs_restored_b[k] += other.sum_abs_restored_b[k];
+      sum_abs_residual_x[k] += other.sum_abs_residual_x[k];
+      sum_abs_residual_b[k] += other.sum_abs_residual_b[k];
+      smaller_x[k] += other.smaller_x[k];
+      smaller_b[k] += other.smaller_b[k];
+      total[k] += other.total[k];
+    }
+  }
+
+  void Print() const {
+    for (size_t k = 0; k < 64; ++k) {
+      if (sum_abs_restored_x[k] == 0.0 && sum_abs_restored_b[k] == 0) {
+        continue;
+      }
+      printf(
+          " %2zu: residual %.3E %.3E restored %.3E %.3E  smaller %6.2f %6.2f\n",
+          k, sum_abs_residual_x[k], sum_abs_residual_b[k],
+          sum_abs_restored_x[k], sum_abs_restored_b[k],
+          100.0 * smaller_x[k] / total[k], 100.0 * smaller_b[k] / total[k]);
+    }
+    printf("%s\n%s\n", ratio_x.ToString().c_str(), ratio_b.ToString().c_str());
+
+    const int flags = Stats::kNoSkewKurt + Stats::kNoGeomean;
+    printf("Corr %s %s\n", rx.ToString(flags).c_str(),
+           rb.ToString(flags).c_str());
+  }
+
+  // Correlation coefficients
+  Stats rx;
+  Stats rb;
+
+  // residual/restored
+  Stats ratio_x;
+  Stats ratio_b;
+
+  double sum_abs_restored_x[64];
+  double sum_abs_restored_b[64];
+  double sum_abs_residual_x[64];
+  double sum_abs_residual_b[64];
+  size_t smaller_x[64];
+  size_t smaller_b[64];
+  size_t total[64];
+};
+
+}  // namespace pik
+
+#endif  // PIK_CHROMA_FROM_LUMA_FWD_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/cluster.h b/codec/L2/demos/pikEnc/host/pik/cluster.h
new file mode 100755
index 0000000000..39e13a11b4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/cluster.h
@@ -0,0 +1,360 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// Functions for clustering similar histograms together.
+
+#ifndef PIK_CLUSTER_H_
+#define PIK_CLUSTER_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <algorithm>
+#include <cmath>
+#include <complex>
+#include <cstdint>
+#include <limits>
+#include <map>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "pik/fast_log.h"
+
+namespace pik {
+
+struct HistogramPair {
+  int idx1;
+  int idx2;
+  float cost_combo;
+  float cost_diff;
+};
+
+inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) {
+  if (p1.cost_diff != p2.cost_diff) {
+    return p1.cost_diff > p2.cost_diff;
+  }
+  return std::abs(p1.idx1 - p1.idx2) > std::abs(p2.idx1 - p2.idx2);
+}
+
+// Returns entropy reduction of the context map when we combine two clusters.
+inline float ClusterCostDiff(int size_a, int size_b) {
+  int size_c = size_a + size_b;
+  return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
+         size_c * FastLog2(size_c);
+}
+
+// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+// it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue.
+template <typename HistogramType>
+void CompareAndPushToQueue(const HistogramType* out, const int* cluster_size,
+                           const float* bit_cost, int idx1, int idx2,
+                           std::vector<HistogramPair>* pairs) {
+  if (idx1 == idx2) {
+    return;
+  }
+  if (idx2 < idx1) {
+    int t = idx2;
+    idx2 = idx1;
+    idx1 = t;
+  }
+  bool store_pair = false;
+  HistogramPair p;
+  p.idx1 = idx1;
+  p.idx2 = idx2;
+  p.cost_diff = 0.5f * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+  p.cost_diff -= bit_cost[idx1];
+  p.cost_diff -= bit_cost[idx2];
+
+  if (out[idx1].total_count_ == 0) {
+    p.cost_combo = bit_cost[idx2];
+    store_pair = true;
+  } else if (out[idx2].total_count_ == 0) {
+    p.cost_combo = bit_cost[idx1];
+    store_pair = true;
+  } else {
+    const float threshold = pairs->empty()
+                                ? std::numeric_limits<float>::max()
+                                : std::max(0.0f, (*pairs)[0].cost_diff);
+    HistogramType combo = out[idx1];
+    combo.AddHistogram(out[idx2]);
+    float cost_combo = combo.PopulationCost();
+    if (cost_combo + p.cost_diff < threshold) {
+      p.cost_combo = cost_combo;
+      store_pair = true;
+    }
+  }
+  if (store_pair) {
+    p.cost_diff += p.cost_combo;
+    if (!pairs->empty() && (pairs->front() < p)) {
+      // Replace the top of the queue if needed.
+      pairs->push_back(pairs->front());
+      pairs->front() = p;
+    } else {
+      pairs->push_back(p);
+    }
+  }
+}
+
+template <typename HistogramType>
+int HistogramCombine(HistogramType* out, int* cluster_size, float* bit_cost,
+                     uint32_t* symbols, int symbols_size, int max_clusters) {
+  float cost_diff_threshold = 0.0f;
+  int min_cluster_size = 1;
+
+  // Uniquify the list of symbols after merging empty clusters.
+  std::vector<int> clusters;
+  clusters.reserve(symbols_size);
+  int sum_of_totals = 0;
+  int first_zero_pop_count_symbol = -1;
+  for (int i = 0; i < symbols_size; ++i) {
+    if (out[symbols[i]].total_count_ == 0) {
+      // Merge the zero pop count histograms into one.
+      if (first_zero_pop_count_symbol == -1) {
+        first_zero_pop_count_symbol = symbols[i];
+        clusters.push_back(symbols[i]);
+      } else {
+        symbols[i] = first_zero_pop_count_symbol;
+      }
+    } else {
+      // Insert all histograms with non-zero pop counts.
+      clusters.push_back(symbols[i]);
+      sum_of_totals += out[symbols[i]].total_count_;
+    }
+  }
+  if (sum_of_totals < 160) {
+    // Use a single histogram if there are only a few samples.
+    // This helps with small images (like 64x64 size) where the
+    // context map is more expensive than the related savings.
+    // TODO: Estimate the the actual difference in bitcost to
+    // make the final decision of this strategy and clustering.
+    *cluster_size = 1;
+    HistogramType combo = out[symbols[0]];
+    for (int i = 1; i < symbols_size; ++i) {
+      combo.AddHistogram(out[symbols[i]]);
+    }
+    out[symbols[0]] = combo;
+    for (int i = 1; i < symbols_size; ++i) {
+      symbols[i] = symbols[0];
+    }
+    return 1;
+  }
+  std::sort(clusters.begin(), clusters.end());
+  clusters.resize(std::unique(clusters.begin(), clusters.end()) -
+                  clusters.begin());
+
+  // We maintain a priority queue of histogram pairs, ordered by the bit cost
+  // reduction. For efficiency, only the front of the queue matters, the rest of
+  // it is unordered.
+  std::vector<HistogramPair> pairs;
+  for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
+    for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
+      CompareAndPushToQueue(out, cluster_size, bit_cost, clusters[idx1],
+                            clusters[idx2], &pairs);
+    }
+  }
+
+  while (clusters.size() > min_cluster_size) {
+    if (pairs[0].cost_diff >= cost_diff_threshold) {
+      cost_diff_threshold = std::numeric_limits<float>::max();
+      min_cluster_size = max_clusters;
+      continue;
+    }
+
+    // Take the best pair from the top of queue.
+    int best_idx1 = pairs[0].idx1;
+    int best_idx2 = pairs[0].idx2;
+    out[best_idx1].AddHistogram(out[best_idx2]);
+    bit_cost[best_idx1] = pairs[0].cost_combo;
+    cluster_size[best_idx1] += cluster_size[best_idx2];
+    for (int i = 0; i < symbols_size; ++i) {
+      if (symbols[i] == best_idx2) {
+        symbols[i] = best_idx1;
+      }
+    }
+    for (auto cluster = clusters.begin(); cluster != clusters.end();
+         ++cluster) {
+      if (*cluster >= best_idx2) {
+        clusters.erase(cluster);
+        break;
+      }
+    }
+
+    // Remove pairs intersecting the just combined best pair.
+    auto copy_to = pairs.begin();
+    for (int i = 0; i < pairs.size(); ++i) {
+      HistogramPair& p = pairs[i];
+      if (p.idx1 == best_idx1 || p.idx2 == best_idx1 || p.idx1 == best_idx2 ||
+          p.idx2 == best_idx2) {
+        // Remove invalid pair from the queue.
+        continue;
+      }
+      if (pairs.front() < p) {
+        // Replace the top of the queue if needed.
+        auto front = pairs.front();
+        pairs.front() = p;
+        *copy_to = front;
+      } else {
+        *copy_to = p;
+      }
+      ++copy_to;
+    }
+    pairs.resize(copy_to - pairs.begin());
+
+    // Push new pairs formed with the combined histogram to the queue.
+    for (int i = 0; i < clusters.size(); ++i) {
+      CompareAndPushToQueue(out, cluster_size, bit_cost, best_idx1, clusters[i],
+                            &pairs);
+    }
+  }
+  return clusters.size();
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// What is the bit cost of moving histogram from cur_symbol to candidate.
+template <typename HistogramType>
+float HistogramBitCostDistance(const HistogramType& histogram,
+                               const HistogramType& candidate,
+                               const float candidate_bit_cost) {
+  if (histogram.total_count_ == 0) {
+    return 0.0;
+  }
+  HistogramType tmp = histogram;
+  tmp.AddHistogram(candidate);
+  return tmp.PopulationCost() - candidate_bit_cost;
+}
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+template <typename HistogramType>
+void HistogramRemap(const HistogramType* in, int in_size, HistogramType* out,
+                    float* bit_cost, uint32_t* symbols) {
+  // Uniquify the list of symbols.
+  std::vector<int> all_symbols(symbols, symbols + in_size);
+  std::sort(all_symbols.begin(), all_symbols.end());
+  all_symbols.resize(std::unique(all_symbols.begin(), all_symbols.end()) -
+                     all_symbols.begin());
+
+  for (int i = 0; i < in_size; ++i) {
+    int best_out = i == 0 ? symbols[0] : symbols[i - 1];
+    float best_bits =
+        HistogramBitCostDistance(in[i], out[best_out], bit_cost[best_out]);
+    for (auto k : all_symbols) {
+      const float cur_bits =
+          HistogramBitCostDistance(in[i], out[k], bit_cost[k]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = k;
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  // Recompute each out based on raw and symbols.
+  for (auto k : all_symbols) {
+    out[k].Clear();
+  }
+  for (int i = 0; i < in_size; ++i) {
+    out[symbols[i]].AddHistogram(in[i]);
+  }
+}
+
+// Reorder histograms in *out so that the new symbols in *symbols come in
+// increasing order.
+template <typename HistogramType>
+void HistogramReindex(std::vector<HistogramType>* out,
+                      std::vector<uint32_t>* symbols) {
+  std::vector<HistogramType> tmp(*out);
+  std::map<int, int> new_index;
+  int next_index = 0;
+  for (int i = 0; i < symbols->size(); ++i) {
+    if (new_index.find((*symbols)[i]) == new_index.end()) {
+      new_index[(*symbols)[i]] = next_index;
+      (*out)[next_index] = tmp[(*symbols)[i]];
+      ++next_index;
+    }
+  }
+  out->resize(next_index);
+  for (int i = 0; i < symbols->size(); ++i) {
+    (*symbols)[i] = new_index[(*symbols)[i]];
+  }
+}
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+// The template parameter HistogramType needs to have Clear(), AddHistogram(),
+// and PopulationCost() methods.
+template <typename HistogramType>
+void ClusterHistograms(const std::vector<HistogramType>& in, int num_contexts,
+                       int num_blocks,
+                       const std::vector<int> block_group_offsets,
+                       int max_histograms, std::vector<HistogramType>* out,
+                       std::vector<uint32_t>* histogram_symbols) {
+  const int in_size = num_contexts * num_blocks;
+  std::vector<int> cluster_size(in_size, 1);
+  std::vector<float> bit_cost(in_size);
+  out->resize(in_size);
+  histogram_symbols->resize(in_size);
+  for (int i = 0; i < in_size; ++i) {
+    (*out)[i] = in[i];
+    bit_cost[i] = in[i].PopulationCost();
+    (*histogram_symbols)[i] = i;
+  }
+
+  // Collapse similar histograms within a block type.
+  if (num_contexts > 1) {
+    for (int i = 0; i < num_blocks; ++i) {
+      HistogramCombine(&(*out)[0], &cluster_size[0], &bit_cost[0],
+                       &(*histogram_symbols)[i * num_contexts], num_contexts,
+                       max_histograms);
+    }
+  }
+
+  static const int kMinClustersForHistogramRemap = 24;
+
+  int num_clusters = 0;
+  if (block_group_offsets.size() > 1) {
+    // Collapse similar histograms within block groups.
+    for (int i = 0; i < block_group_offsets.size(); ++i) {
+      int offset = block_group_offsets[i] * num_contexts;
+      int length = ((i + 1 < block_group_offsets.size()
+                         ? block_group_offsets[i + 1] * num_contexts
+                         : in_size) -
+                    offset);
+      int nclusters = HistogramCombine(
+          &(*out)[0], &cluster_size[0], &bit_cost[0],
+          &(*histogram_symbols)[offset], length, max_histograms);
+      // Find the optimal map from original histograms to the final ones.
+      if (nclusters >= 2 && nclusters < kMinClustersForHistogramRemap) {
+        HistogramRemap(&in[offset], length, &(*out)[0], &bit_cost[0],
+                       &(*histogram_symbols)[offset]);
+      }
+      num_clusters += nclusters;
+    }
+  }
+
+  if (block_group_offsets.size() <= 1 || num_clusters > max_histograms) {
+    // If we did not have block groups or the per-block-group clustering ended
+    // with too many histograms, we have to do one final round of clustering.
+    num_clusters =
+        HistogramCombine(&(*out)[0], &cluster_size[0], &bit_cost[0],
+                         &(*histogram_symbols)[0], in_size, max_histograms);
+    // Find the optimal map from original histograms to the final ones.
+    if (num_clusters >= 2 && num_clusters < kMinClustersForHistogramRemap) {
+      HistogramRemap(&in[0], in_size, &(*out)[0], &bit_cost[0],
+                     &(*histogram_symbols)[0]);
+    }
+  }
+
+  // Convert the context map to a canonical form.
+  HistogramReindex(out, histogram_symbols);
+}
+
+}  // namespace pik
+
+#endif  // PIK_CLUSTER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/cmdline.cc b/codec/L2/demos/pikEnc/host/pik/cmdline.cc
new file mode 100755
index 0000000000..460d3acf7e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/cmdline.cc
@@ -0,0 +1,58 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/cmdline.h"
+
+#include <string>
+
+namespace pik {
+namespace tools {
+
+void CommandLineParser::PrintHelp() const {
+  fprintf(stderr, "Usage: %s [OPTIONS]\n",
+          program_name_ ? program_name_ : "command");
+  for (const auto& option : options_) {
+    fprintf(stderr, " %s\n", option->help_flags().c_str());
+    const char* help_text = option->help_text();
+    if (help_text) {
+      fprintf(stderr, "    %s\n", help_text);
+    }
+  }
+  fprintf(stderr, " --help\n    Prints this help message.\n");
+}
+
+bool CommandLineParser::Parse(int argc, const char* argv[]) {
+  if (argc) program_name_ = argv[0];
+  int i = 1;  // argv[0] is the program name.
+  while (i < argc) {
+    if (!strcmp("--help", argv[i])) {
+      // Returning false on Parse() forces to print the help message.
+      return false;
+    }
+    bool found = false;
+    for (const auto& option : options_) {
+      if (option->Match(argv[i])) {
+        // Parsing advances the value i on success.
+        const char* arg = argv[i];
+        if (!option->Parse(argc, argv, &i)) {
+          fprintf(stderr, "Error parsing flag %s\n", arg);
+          return false;
+        }
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      // No option matched argv[i].
+      fprintf(stderr, "Unknown argument: %s\n", argv[i]);
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace tools
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/cmdline.h b/codec/L2/demos/pikEnc/host/pik/cmdline.h
new file mode 100755
index 0000000000..d8a647426d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/cmdline.h
@@ -0,0 +1,269 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CMDLINE_H_
+#define PIK_CMDLINE_H_
+
+#include "pik/status.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace pik {
+namespace tools {
+
+class CommandLineParser {
+ public:
+  typedef size_t OptionId;
+
+  // An abstract class for defining command line options.
+  class CmdOptionInterface {
+   public:
+    CmdOptionInterface() = default;
+    virtual ~CmdOptionInterface() = default;
+
+    // Return a string with the option name or available flags.
+    virtual std::string help_flags() const = 0;
+
+    // Return the help string if any, or nullptr if no help string.
+    virtual const char* help_text() const = 0;
+
+    // Return whether the option was passed.
+    virtual bool matched() const = 0;
+
+    // Returns whether this option matches the passed command line argument.
+    virtual bool Match(const char* arg) const = 0;
+
+    // Parses the option. The passed i points to the argument with the flag
+    // that matches either the short or the long name.
+    virtual bool Parse(const int argc, const char* argv[], int* i) = 0;
+  };
+
+  // Add a positional argument. Returns the id of the added option or
+  // kOptionError on error.
+  OptionId AddPositionalOption(const char* name, const char* help_text,
+                               const char** storage) {
+    options_.emplace_back(new CmdOptionPositional(name, help_text, storage));
+    return options_.size() - 1;
+  }
+
+  // Add an option with a value of type T. The option can be passed as
+  // '-s <value>' or '--long value' or '--long=value'. The CommandLineParser
+  // parser will call the function parser with the string pointing to '<value>'
+  // in either case. Returns the id of the added option or kOptionError on
+  // error.
+  template <typename T>
+  OptionId AddOptionValue(char short_name, const char* long_name,
+                          const char* metavar, const char* help_text,
+                          T* storage, bool(parser)(const char*, T*)) {
+    options_.emplace_back(new CmdOptionFlag<T>(short_name, long_name, metavar,
+                                               help_text, storage, parser));
+    return options_.size() - 1;
+  }
+
+  // Add a flag without a value. Returns the id of the added option or
+  // kOptionError on error.
+  template <typename T>
+  OptionId AddOptionFlag(char short_name, const char* long_name,
+                         const char* help_text, T* storage, bool(parser)(T*)) {
+    options_.emplace_back(new CmdOptionFlag<T>(short_name, long_name, help_text,
+                                               storage, parser));
+    return options_.size() - 1;
+  }
+
+  const CmdOptionInterface* GetOption(OptionId id) const {
+    PIK_ASSERT(id < options_.size());
+    return options_[id].get();
+  }
+
+  // Print the help message.
+  void PrintHelp() const;
+
+  // Parse the command line.
+  bool Parse(int argc, const char* argv[]);
+
+  // Return the remaining positional args
+  std::vector<const char*> PositionalArgs() const;
+
+ private:
+  // A positional argument.
+  class CmdOptionPositional : public CmdOptionInterface {
+   public:
+    CmdOptionPositional(const char* name, const char* help_text,
+                        const char** storage)
+        : name_(name), help_text_(help_text), storage_(storage) {}
+
+    std::string help_flags() const override { return name_; }
+    const char* help_text() const override { return help_text_; }
+    bool matched() const override { return matched_; }
+
+    // Only match non-flag values. This means that you can't pass '-foo' as a
+    // positional argument, but it helps with detecting when passed a flag with
+    // a typo.
+    bool Match(const char* arg) const override {
+      return !matched_ && arg[0] != '-';
+    }
+
+    bool Parse(const int argc, const char* argv[], int* i) override {
+      *storage_ = argv[*i];
+      (*i)++;
+      matched_ = true;
+      return true;
+    }
+
+   private:
+    const char* name_;
+    const char* help_text_;
+    const char** storage_;
+
+    bool matched_{false};
+  };
+
+  // A class for handling an option flag like '-v' or '--foo=bar'.
+  template <typename T>
+  class CmdOptionFlag : public CmdOptionInterface {
+   public:
+    // Construct a flag that doesn't take any value, for example '-v' or
+    // '--long'. Passing a value to it raises an error.
+    CmdOptionFlag(char short_name, const char* long_name, const char* help_text,
+                  T* storage, bool(parser)(T*))
+        : short_name_(short_name),
+          long_name_(long_name),
+          long_name_len_(long_name ? strlen(long_name) : 0),
+          metavar_(nullptr),
+          help_text_(help_text),
+          storage_(storage) {
+      parser_.parser_no_value_ = parser;
+    }
+
+    // Construct a flag that expects a value to be passed.
+    CmdOptionFlag(char short_name, const char* long_name, const char* metavar,
+                  const char* help_text, T* storage,
+                  bool(parser)(const char* arg, T*))
+        : short_name_(short_name),
+          long_name_(long_name),
+          long_name_len_(long_name ? strlen(long_name) : 0),
+          metavar_(metavar ? metavar : ""),
+          help_text_(help_text),
+          storage_(storage) {
+      parser_.parser_with_arg_ = parser;
+    }
+
+    std::string help_flags() const override {
+      std::string ret;
+      if (short_name_) {
+        ret += std::string("-") + short_name_;
+        if (metavar_) ret += std::string(" ") + metavar_;
+        if (long_name_) ret += ", ";
+      }
+      if (long_name_) {
+        ret += std::string("--") + long_name_;
+        if (metavar_) ret += std::string("=") + metavar_;
+      }
+      return ret;
+    }
+    const char* help_text() const override { return help_text_; }
+    bool matched() const override { return matched_; }
+
+    bool Match(const char* arg) const override {
+      return MatchShort(arg) || MatchLong(arg);
+    }
+
+    bool Parse(const int argc, const char* argv[], int* i) override {
+      matched_ = true;
+      if (MatchLong(argv[*i])) {
+        const char* arg = argv[*i] + 2 + long_name_len_;
+        if (arg[0] == '=') {
+          if (metavar_) {
+            // Passed '--long_name=...'.
+            (*i)++;
+            // Skip over the '=' on the LongMatch.
+            arg += 1;
+            return (*parser_.parser_with_arg_)(arg, storage_);
+          } else {
+            fprintf(stderr, "--%s didn't expect any argument passed to it.\n",
+                    argv[*i]);
+            return false;
+          }
+        }
+      }
+      // In any other case, it passed a -s or --long_name
+      (*i)++;
+      if (metavar_) {
+        if (argc <= *i) {
+          fprintf(stderr, "--%s expected an argument but none passed.\n",
+                  argv[*i - 1]);
+          return false;
+        }
+        return (*parser_.parser_with_arg_)(argv[(*i)++], storage_);
+      } else {
+        return (*parser_.parser_no_value_)(storage_);
+      }
+    }
+
+   private:
+    // Returns whether arg matches the short_name flag of this option.
+    bool MatchShort(const char* arg) const {
+      if (!short_name_ || arg[0] != '-') return false;
+      return arg[1] == short_name_ && arg[2] == 0;
+    }
+
+    // Returns whether arg matches the long_name flag of this option,
+    // potentially with an argument passed to it.
+    bool MatchLong(const char* arg) const {
+      if (!long_name_ || arg[0] != '-' || arg[1] != '-') return false;
+      arg += 2;  // Skips the '--'
+      if (strncmp(long_name_, arg, long_name_len_) != 0) return false;
+      arg += long_name_len_;
+      // Allow "--long_name=foo" and "--long_name" as long matches.
+      return arg[0] == 0 || arg[0] == '=';
+    }
+
+    // A short option passed as '-X' where X is the char. A value of 0 means
+    // no short option.
+    const char short_name_;
+
+    // A long option name passed as '--long' where 'long' is the name of the
+    // option.
+    const char* long_name_;
+    size_t long_name_len_;
+
+    // The text to display when referring to the value passed to this flag, for
+    // example "N" in the flag '--value N'. If null, this flag accepts no value
+    // and therefor no value must be passed.
+    const char* metavar_;
+
+    // The help string for this flag.
+    const char* help_text_;
+
+    // The pointer to the storage of this flag used when parsing.
+    T* storage_;
+
+    // The function to use to parse the value when matched. The function used is
+    // parser_with_arg_ when metavar_ is not null (and the value string will be
+    // used) or parser_no_value_ when metavar_ is null.
+    union {
+      bool (*parser_with_arg_)(const char*, T*);
+      bool (*parser_no_value_)(T*);
+    } parser_;
+
+    // Whether this flag was matched.
+    bool matched_{false};
+  };
+
+  const char* program_name_{nullptr};
+
+  std::vector<std::unique_ptr<CmdOptionInterface>> options_;
+};
+
+}  // namespace tools
+}  // namespace pik
+
+#endif  // PIK_CMDLINE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/codec.h b/codec/L2/demos/pikEnc/host/pik/codec.h
new file mode 100755
index 0000000000..592a41a829
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/codec.h
@@ -0,0 +1,288 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CODEC_H_
+#define PIK_CODEC_H_
+
+// Interface for encoding/decoding images and their metadata.
+
+#include <stddef.h>
+#include <string>
+#include <vector>
+#include "pik/color_management.h"
+#include "pik/common.h"
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include "pik/image_ops.h"
+#include "pik/metadata.h"
+
+namespace pik {
+
+// Per-channel interval, used to convert between (full-range) external and
+// (bounded or unbounded) temp values. See external_image.cc for the definitions
+// of temp/external.
+struct CodecInterval {
+  CodecInterval() {}
+  constexpr CodecInterval(float min, float max) : min(min), width(max - min) {}
+  // Defaults for temp.
+  float min = 0.0f;
+  float width = 1.0f;
+};
+
+using CodecIntervals = std::array<CodecInterval, 4>;  // RGB[A] or Y[A]
+
+// Shared (read-only, no need for thread_local) data. Prefer reusing a single
+// instance to avoid regenerating the color profiles.
+struct CodecContext {
+  CodecContext();
+
+  // Index with CodecInOut.IsGray().
+  const std::array<ColorEncoding, 2> c_srgb;
+  const std::array<ColorEncoding, 2> c_linear_srgb;
+};
+
+// Allows passing arbitrary metadata to decoders (required for PNM).
+class DecoderHints {
+ public:
+  // key=color_space, value=Description(c/pp): specify the ColorEncoding of
+  //   the pixels for decoding. Otherwise, if the codec did not obtain an ICC
+  //   profile from the image, assume sRGB.
+  //
+  // Strings are taken from the command line, so avoid spaces for convenience.
+  void Add(const std::string& key, const std::string& value) {
+    kv_.emplace_back(key, value);
+  }
+
+  // Calls func(key, value) in order of Add.
+  template <class Func>
+  void Foreach(const Func& func) const {
+    for (const KeyValue& kv : kv_) {
+      func(kv.key, kv.value);
+    }
+  }
+
+ private:
+  // Splitting into key/value avoids parsing in each codec.
+  struct KeyValue {
+    KeyValue(const std::string& key, const std::string& value)
+        : key(key), value(value) {}
+
+    std::string key;
+    std::string value;
+  };
+
+  std::vector<KeyValue> kv_;
+};
+
+// Codecs supported by CodecInOut::Encode.
+enum class Codec : uint32_t {
+  kUnknown,  // for CodecFromExtension
+  kPNG,
+  kPNM,
+};
+
+std::vector<Codec> Values(Codec);
+
+// Lower case ASCII including dot, e.g. ".png".
+std::string ExtensionFromCodec(Codec codec);
+Codec CodecFromExtension(const std::string& extension);
+
+// An image and all its metadata, plus functions to encode/decode to/from
+// other image codecs. Also used as the input/output type of PIK.
+class CodecInOut {
+ public:
+  // "codec_context" must remain valid throughout the lifetime of this instance.
+  explicit CodecInOut(const CodecContext* codec_context)
+      : context_(codec_context) {}
+  const CodecContext* Context() const { return context_; }
+
+  // Move-only (allows storing in std::vector).
+  CodecInOut(CodecInOut&&) = default;
+  CodecInOut& operator=(CodecInOut&&) = default;
+
+  // -- SIZE
+
+  const size_t xsize() const { return color().xsize(); }
+  const size_t ysize() const { return color().ysize(); }
+  void ShrinkTo(size_t xsize, size_t ysize) {
+    color_.ShrinkTo(xsize, ysize);
+    if (HasAlpha()) alpha_.ShrinkTo(xsize, ysize);
+  }
+
+  // -- COLOR
+
+  // If c_current.IsGray(), all planes must be identical.
+  void SetFromImage(Image3F&& color, const ColorEncoding& c_current);
+
+  // Sets image data from 8-bit sRGB pixel array in bytes.
+  // Amount of input bytes per pixel must be:
+  // (is_gray ? 1 : 3) + (has_alpha ? 1 : 0)
+  Status SetFromSRGB(size_t xsize, size_t ysize, bool is_gray, bool has_alpha,
+                     const uint8_t* pixels, const uint8_t* end,
+                     ThreadPool* pool = nullptr);
+
+  // Sets image data from 16-bit sRGB data.
+  // Amount of input uint16_t's per pixel must be:
+  // (is_gray ? 1 : 3) + (has_alpha ? 1 : 0)
+  Status SetFromSRGB(size_t xsize, size_t ysize, bool is_gray, bool has_alpha,
+                     const uint16_t* pixels, const uint16_t* end,
+                     ThreadPool* pool = nullptr);
+
+  // Sets image data from sRGB pixel array in bytes.
+  // This low-level function supports both 8-bit and 16-bit data in bytes to
+  // provide efficient access to arbitrary byte order.
+  // Amount of input bytes per pixel must be:
+  // ((is_gray ? 1 : 3) + (has_alpha ? 1 : 0)) * (is_16bit ? 2 : 1)
+  // The ordering of the channels is interleaved RGBA or gray+alpha in that
+  // order.
+  // The 16-bit byte order is given by big_endian, and this has no effect when
+  // is_16bit is false.
+  Status SetFromSRGB(size_t xsize, size_t ysize, bool is_gray, bool has_alpha,
+                     bool is_16bit, bool big_endian,
+                     const uint8_t* pixels, const uint8_t* end,
+                     ThreadPool* pool = nullptr);
+
+  // Decodes "bytes". Sets dec_c_original to c_current (for later encoding).
+  // dec_hints may specify the "color_space" (otherwise, defaults to sRGB).
+  Status SetFromBytes(const PaddedBytes& bytes, ThreadPool* pool = nullptr);
+
+  // Reads from file and calls SetFromBytes.
+  Status SetFromFile(const std::string& pathname, ThreadPool* pool = nullptr);
+
+  const Image3F& color() const { return color_; }
+
+  // Returns whether the color image has identical planes. Once established by
+  // Set*, remains unchanged until a subsequent Set*.
+  bool IsGray() const { return c_current_.IsGray(); }
+
+  const ColorEncoding c_current() const { return c_current_; }
+  bool IsSRGB() const {
+    return c_current_.white_point == WhitePoint::kD65 &&
+           c_current_.primaries == Primaries::kSRGB &&
+           c_current_.transfer_function == TransferFunction::kSRGB;
+  }
+  bool IsLinearSRGB() const {
+    return c_current_.white_point == WhitePoint::kD65 &&
+           c_current_.primaries == Primaries::kSRGB &&
+           IsLinear(c_current_.transfer_function);
+  }
+
+  // Transforms color to c_desired and sets c_current to c_desired. Alpha
+  // remains unchanged.
+  Status TransformTo(const ColorEncoding& c_desired,
+                     ThreadPool* pool = nullptr);
+
+  // Copies this:rect, converts to c_desired, and allocates+fills out.
+  Status CopyTo(const Rect& rect, const ColorEncoding& c_desired, Image3B* out,
+                ThreadPool* pool = nullptr) const;
+  Status CopyTo(const Rect& rect, const ColorEncoding& c_desired, Image3U* out,
+                ThreadPool* pool = nullptr) const;
+  Status CopyTo(const Rect& rect, const ColorEncoding& c_desired, Image3F* out,
+                ThreadPool* pool = nullptr) const;
+  Status CopyToSRGB(const Rect& rect, Image3B* out,
+                    ThreadPool* pool = nullptr) const;
+
+  // TODO(janwas): remove, use Metadata field instead
+  bool HasOriginalBitsPerSample() const { return has_dec_bits_per_sample_; }
+  size_t original_bits_per_sample() const {
+    PIK_ASSERT(HasOriginalBitsPerSample());
+    return dec_bits_per_sample_;
+  }
+  void SetOriginalBitsPerSample(size_t bit_depth) {
+    dec_bits_per_sample_ = bit_depth;
+    has_dec_bits_per_sample_ = true;
+  }
+
+  // -- ALPHA
+
+  bool HasAlpha() const { return alpha_.xsize() != 0; }
+  // Zero if all pixels are transparent.
+  size_t AlphaBits() const {
+    PIK_ASSERT(HasAlpha());
+    return alpha_bits_;
+  }
+  const ImageU& alpha() const {
+    PIK_ASSERT(HasAlpha());
+    return alpha_;
+  }
+
+  void SetAlpha(ImageU&& alpha, size_t alpha_bits) {
+    PIK_CHECK(alpha_bits == 8 || alpha_bits == 16);
+    alpha_bits_ = alpha_bits;
+    alpha_ = std::move(alpha);
+    PIK_CHECK(DivCeil(alpha_.xsize(), kBlockDim) ==
+                  DivCeil(color_.xsize(), kBlockDim) &&
+              DivCeil(alpha_.ysize(), kBlockDim) ==
+                  DivCeil(color_.ysize(), kBlockDim));
+  }
+
+  // Called if all alpha values are opaque.
+  void RemoveAlpha() {
+    alpha_ = ImageU();
+    PIK_ASSERT(!HasAlpha());
+  }
+
+  // -- ENCODER
+
+  // Replaces "bytes" with an encoding of pixels transformed from c_current
+  // color space to c_desired.
+  Status Encode(const Codec codec, const ColorEncoding& c_desired,
+                size_t bits_per_sample, PaddedBytes* bytes,
+                ThreadPool* pool = nullptr) const;
+
+  // Deduces codec, calls Encode and writes to file.
+  Status EncodeToFile(const ColorEncoding& c_desired, size_t bits_per_sample,
+                      const std::string& pathname,
+                      ThreadPool* pool = nullptr) const;
+
+  // -- ENCODER OUTPUT:
+
+  // Size [bytes] of encoded bitstream after encoding / before decoding.
+  mutable size_t enc_size;
+
+  // Encoder-specific function of its bits_per_sample argument. Used to compute
+  // error tolerance in round trips.
+  mutable size_t enc_bits_per_sample;
+
+  // Range of temp channels for rescaling instead of clipping. Not yet supported
+  // by any Codec.
+  mutable CodecIntervals enc_temp_intervals;  // unused
+
+  // -- DECODER INPUT/OUTPUT:
+
+  // Used to set c_current for codecs that lack color space metadata.
+  DecoderHints dec_hints;
+
+  // Color space/ICC profile from the original source.
+  // Used to reconstruct the original image without additional user input.
+  ColorEncoding dec_c_original;
+
+  // -- SHARED:
+
+  // Optional text/EXIF metadata to store into / retrieve from bitstreams.
+  Metadata metadata;
+
+ private:
+  // Initialized by ctor:
+  const CodecContext* context_;  // Not owned, must remain valid.
+
+  // Initialized by Set*:
+  Image3F color_;  // In c_current color space; all planes equal if IsGray().
+  ColorEncoding c_current_;  // Encoding the values in color_ are defined in.
+
+  // Initialized by SetAlpha; only queried if HasAlpha.
+  size_t alpha_bits_;
+  ImageU alpha_;  // Empty or same size as color_.
+
+  // From the original source; may differ from sizeof(T) * kBitsPerBytes.
+  // Used to reconstruct the original image without additional user input.
+  size_t dec_bits_per_sample_;
+  bool has_dec_bits_per_sample_ = false;
+};
+
+}  // namespace pik
+
+#endif  // PIK_CODEC_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/codec_impl.cc b/codec/L2/demos/pikEnc/host/pik/codec_impl.cc
new file mode 100755
index 0000000000..7b6dc84e2f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/codec_impl.cc
@@ -0,0 +1,306 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/codec.h"
+
+#include <algorithm>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/byte_order.h"
+#include "pik/codec_png.h"
+#include "pik/codec_pnm.h"
+#include "pik/common.h"
+#include "pik/epf.h"
+#include "pik/external_image.h"
+#include "pik/file_io.h"
+#include "pik/profiler.h"
+#include "pik/simd/targets.h"
+
+namespace pik {
+namespace {
+
+// Any valid encoding is larger (ensures codecs can read the first few bytes)
+constexpr size_t kMinBytes = 9;
+
+// Returns RGB/Gray pair of ColorEncoding (indexed by IsGray()).
+std::array<ColorEncoding, 2> MakeC2(const Primaries pr,
+                                    const TransferFunction tf) {
+  std::array<ColorEncoding, 2> c2;
+  c2[0].color_space = ColorSpace::kRGB;
+  c2[0].white_point = WhitePoint::kD65;
+  c2[0].primaries = pr;
+  c2[0].transfer_function = tf;
+  PIK_CHECK(ColorManagement::SetProfileFromFields(&c2[0]));
+
+  // Same as above, but gray.
+  c2[1] = c2[0];
+  c2[1].color_space = ColorSpace::kGray;
+  PIK_CHECK(ColorManagement::SetProfileFromFields(&c2[1]));
+  return c2;
+}
+
+Status FromSRGB(const size_t xsize, const size_t ysize, const bool is_gray,
+                const bool has_alpha, const bool is_16bit,
+                const bool big_endian, const uint8_t* pixels,
+                const uint8_t* end, ThreadPool* pool, CodecInOut* io) {
+  const ColorEncoding& c = io->Context()->c_srgb[is_gray];
+  const size_t bits_per_sample = (is_16bit ? 2 : 1) * kBitsPerByte;
+  const uint8_t* bytes = pixels;
+  const uint8_t* bytes_end = reinterpret_cast<const uint8_t*>(end);
+  const ExternalImage external(xsize, ysize, c, has_alpha,
+                               /*alpha_bits=*/ bits_per_sample,
+                               bits_per_sample, big_endian, bytes, bytes_end);
+  const CodecIntervals* temp_intervals = nullptr;  // Don't know min/max.
+  return external.CopyTo(temp_intervals, pool, io);
+}
+
+// Copies interleaved external color; skips any alpha. Caller ensures
+// bits_per_sample matches T, and byte order=native.
+template <typename T>
+void AllocateAndFill(const ExternalImage& external, Image3<T>* out) {
+  PIK_ASSERT(external.IsHealthy());  // Callers must check beforehand.
+
+  // Here we just copy bytes for simplicity; for conversion/byte swapping, use
+  // ExternalImage::CopyTo instead.
+  PIK_CHECK(external.BitsPerSample() == sizeof(T) * kBitsPerByte);
+  PIK_CHECK(external.BigEndian() == !IsLittleEndian());
+
+  const size_t xsize = external.xsize();
+  const size_t ysize = external.ysize();
+  *out = Image3<T>(xsize, ysize);
+  if (external.IsGray()) {
+    if (external.HasAlpha()) {
+      for (size_t y = 0; y < ysize; ++y) {
+        const T* PIK_RESTRICT row =
+            reinterpret_cast<const T*>(external.ConstRow(y));
+        T* PIK_RESTRICT row0 = out->PlaneRow(0, y);
+        T* PIK_RESTRICT row1 = out->PlaneRow(1, y);
+        T* PIK_RESTRICT row2 = out->PlaneRow(2, y);
+        for (size_t x = 0; x < xsize; ++x) {
+          row0[x] = row[2 * x + 0];
+          row1[x] = row[2 * x + 0];
+          row2[x] = row[2 * x + 0];
+        }
+      }
+    } else {
+      for (size_t y = 0; y < ysize; ++y) {
+        const T* PIK_RESTRICT row =
+            reinterpret_cast<const T*>(external.ConstRow(y));
+        T* PIK_RESTRICT row0 = out->PlaneRow(0, y);
+        T* PIK_RESTRICT row1 = out->PlaneRow(1, y);
+        T* PIK_RESTRICT row2 = out->PlaneRow(2, y);
+        for (size_t x = 0; x < xsize; ++x) {
+          row0[x] = row[x];
+          row1[x] = row[x];
+          row2[x] = row[x];
+        }
+      }
+    }
+  } else {
+    if (external.HasAlpha()) {
+      for (size_t y = 0; y < ysize; ++y) {
+        const T* PIK_RESTRICT row =
+            reinterpret_cast<const T*>(external.ConstRow(y));
+        T* PIK_RESTRICT row0 = out->PlaneRow(0, y);
+        T* PIK_RESTRICT row1 = out->PlaneRow(1, y);
+        T* PIK_RESTRICT row2 = out->PlaneRow(2, y);
+
+        for (size_t x = 0; x < xsize; ++x) {
+          row0[x] = row[4 * x + 0];
+          row1[x] = row[4 * x + 1];
+          row2[x] = row[4 * x + 2];
+        }
+      }
+    } else {
+      for (size_t y = 0; y < ysize; ++y) {
+        const T* PIK_RESTRICT row =
+            reinterpret_cast<const T*>(external.ConstRow(y));
+        T* PIK_RESTRICT row0 = out->PlaneRow(0, y);
+        T* PIK_RESTRICT row1 = out->PlaneRow(1, y);
+        T* PIK_RESTRICT row2 = out->PlaneRow(2, y);
+        for (size_t x = 0; x < xsize; ++x) {
+          row0[x] = row[3 * x + 0];
+          row1[x] = row[3 * x + 1];
+          row2[x] = row[3 * x + 2];
+        }
+      }
+    }
+  }
+}
+
+// Copies io:rect, converts, and copies into out.
+template <typename T>
+Status CopyToT(const CodecInOut* io, const Rect& rect,
+               const ColorEncoding& c_desired, ThreadPool* pool,
+               Image3<T>* out) {
+  PROFILER_FUNC;
+  // Changing IsGray is probably a bug.
+  PIK_CHECK(io->IsGray() == c_desired.IsGray());
+
+  const ImageU* alpha = io->HasAlpha() ? &io->alpha() : nullptr;
+  const size_t alpha_bits = io->HasAlpha() ? io->AlphaBits() : 0;
+  const size_t bits_per_sample = sizeof(T) * kBitsPerByte;
+  const bool big_endian = !IsLittleEndian();
+  CodecIntervals* temp_intervals = nullptr;  // Don't need min/max.
+  const ExternalImage external(pool, io->color(), rect, io->c_current(),
+                               c_desired, io->HasAlpha(), alpha,
+                               alpha_bits, bits_per_sample,
+                               big_endian, temp_intervals);
+  PIK_RETURN_IF_ERROR(external.IsHealthy());
+  AllocateAndFill(external, out);
+  return true;
+}
+
+}  // namespace
+
+std::vector<Codec> Values(Codec) { return {Codec::kPNG, Codec::kPNM}; }
+
+std::string ExtensionFromCodec(Codec codec) {
+  switch (codec) {
+    case Codec::kPNG:
+      return ".png";
+    case Codec::kPNM:
+      return ".pfm";
+    case Codec::kUnknown:
+      return std::string();
+  }
+  PIK_ASSERT(false);
+  return std::string();
+}
+
+Codec CodecFromExtension(const std::string& extension) {
+  if (extension == ".png") return Codec::kPNG;
+
+  if (extension == ".pgm") return Codec::kPNM;
+  if (extension == ".ppm") return Codec::kPNM;
+  if (extension == ".pfm") return Codec::kPNM;
+
+  return Codec::kUnknown;
+}
+
+CodecContext::CodecContext()
+    : c_srgb(MakeC2(Primaries::kSRGB, TransferFunction::kSRGB)),
+      c_linear_srgb(MakeC2(Primaries::kSRGB, TransferFunction::kLinear)) {
+  TargetBitfield().Foreach(InitEdgePreservingFilter());
+}
+
+void CodecInOut::SetFromImage(Image3F&& color, const ColorEncoding& c_current) {
+  c_current_ = c_current;
+  color_ = std::move(color);
+}
+
+Status CodecInOut::SetFromSRGB(size_t xsize, size_t ysize, bool is_gray,
+                               bool has_alpha, const uint8_t* pixels,
+                               const uint8_t* end, ThreadPool* pool) {
+  const bool big_endian = false;  // don't care since each sample is a byte
+  SetOriginalBitsPerSample(8);
+  return FromSRGB(xsize, ysize, is_gray, has_alpha, /*is_16bit=*/false,
+                  big_endian, pixels, end, pool, this);
+}
+
+Status CodecInOut::SetFromSRGB(size_t xsize, size_t ysize, bool is_gray,
+                               bool has_alpha, const uint16_t* pixels,
+                               const uint16_t* end, ThreadPool* pool) {
+  SetOriginalBitsPerSample(16);
+  const uint8_t* bytes = reinterpret_cast<const uint8_t*>(pixels);
+  const uint8_t* bytes_end = reinterpret_cast<const uint8_t*>(end);
+  // Given as uint16_t, so is in native order.
+  const bool big_endian = !IsLittleEndian();
+  return FromSRGB(xsize, ysize, is_gray, has_alpha, /*is_16bit=*/true,
+                  big_endian, bytes, bytes_end, pool, this);
+}
+
+Status CodecInOut::SetFromSRGB(size_t xsize, size_t ysize,
+    bool is_gray, bool has_alpha, bool is_16bit, bool big_endian,
+    const uint8_t* pixels, const uint8_t* end, ThreadPool* pool) {
+  SetOriginalBitsPerSample(is_16bit ? 16 : 8);
+  return FromSRGB(xsize, ysize, is_gray, has_alpha, is_16bit,
+                  big_endian, pixels, end, pool, this);
+}
+
+Status CodecInOut::SetFromBytes(const PaddedBytes& bytes, ThreadPool* pool) {
+  if (bytes.size() < kMinBytes) return PIK_FAILURE("Too few bytes");
+
+  if (!DecodeImagePNG(bytes, pool, this) &&
+      !DecodeImagePNM(bytes, pool, this)) {
+    return PIK_FAILURE("Codecs failed to decode");
+  }
+
+  PIK_CHECK(!c_current().icc.empty());  // Must have gotten ICC profile
+  PIK_CHECK(!dec_c_original.icc.empty());
+  return true;
+}
+
+Status CodecInOut::SetFromFile(const std::string& pathname, ThreadPool* pool) {
+  PaddedBytes encoded;
+  return ReadFile(pathname, &encoded) && SetFromBytes(encoded, pool);
+}
+
+Status CodecInOut::TransformTo(const ColorEncoding& c_desired,
+                               ThreadPool* pool) {
+  PROFILER_FUNC;
+  // Changing IsGray is probably a bug.
+  PIK_CHECK(IsGray() == c_desired.IsGray());
+
+  const ImageU* alpha = HasAlpha() ? &alpha_ : nullptr;
+  const size_t alpha_bits = HasAlpha() ? AlphaBits() : 0;
+  const bool big_endian = !IsLittleEndian();
+  CodecIntervals temp_intervals;
+  const ExternalImage external(pool, color_, Rect(color_), c_current_,
+                               c_desired, HasAlpha(), alpha, alpha_bits,
+                               32, big_endian, &temp_intervals);
+  return external.IsHealthy() && external.CopyTo(&temp_intervals, pool, this);
+}
+
+Status CodecInOut::CopyTo(const Rect& rect, const ColorEncoding& c_desired,
+                          Image3B* out, ThreadPool* pool) const {
+  return CopyToT(this, rect, c_desired, pool, out);
+}
+Status CodecInOut::CopyTo(const Rect& rect, const ColorEncoding& c_desired,
+                          Image3U* out, ThreadPool* pool) const {
+  return CopyToT(this, rect, c_desired, pool, out);
+}
+Status CodecInOut::CopyTo(const Rect& rect, const ColorEncoding& c_desired,
+                          Image3F* out, ThreadPool* pool) const {
+  return CopyToT(this, rect, c_desired, pool, out);
+}
+
+Status CodecInOut::CopyToSRGB(const Rect& rect, Image3B* out,
+                              ThreadPool* pool) const {
+  return CopyTo(rect, context_->c_srgb[IsGray()], out, pool);
+}
+
+Status CodecInOut::Encode(const Codec codec, const ColorEncoding& c_desired,
+                          size_t bits_per_sample, PaddedBytes* bytes,
+                          ThreadPool* pool) const {
+  PIK_CHECK(!c_current().icc.empty());
+  PIK_CHECK(!c_desired.icc.empty());
+
+  switch (codec) {
+    case Codec::kPNG:
+      return EncodeImagePNG(this, c_desired, bits_per_sample, pool, bytes);
+    case Codec::kPNM:
+      return EncodeImagePNM(this, c_desired, bits_per_sample, pool, bytes);
+    case Codec::kUnknown:
+      return PIK_FAILURE("Cannot encode using Codec::kUnknown");
+  }
+
+  return PIK_FAILURE("Invalid codec");
+}
+
+Status CodecInOut::EncodeToFile(const ColorEncoding& c_desired,
+                                size_t bits_per_sample,
+                                const std::string& pathname,
+                                ThreadPool* pool) const {
+  const Codec codec = CodecFromExtension(Extension(pathname));
+
+  PaddedBytes encoded;
+  return Encode(codec, c_desired, bits_per_sample, &encoded, pool) &&
+         WriteFile(encoded, pathname);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/codec_png.cc b/codec/L2/demos/pikEnc/host/pik/codec_png.cc
new file mode 100755
index 0000000000..451419d88a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/codec_png.cc
@@ -0,0 +1,694 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/codec_png.h"
+
+#include <string>
+
+#include "third_party/lodepng/lodepng.h"
+#include "pik/byte_order.h"
+#include "pik/common.h"
+#include "pik/external_image.h"
+
+namespace pik {
+namespace {
+
+#define PIK_PNG_VERBOSE 0
+
+// Retrieves XMP and EXIF/IPTC from itext and text.
+class MetadataReaderPNG {
+ public:
+  static Status Decode(const LodePNGInfo& info, Metadata* metadata) {
+    for (unsigned idx_itext = 0; idx_itext < info.itext_num; ++idx_itext) {
+      // We trust these are properly null-terminated by LodePNG.
+      const char* key = info.itext_keys[idx_itext];
+      const char* value = info.itext_strings[idx_itext];
+      if (strstr(key, "XML:com.adobe.xmp")) {
+        metadata->xmp.resize(strlen(value));  // safe, see above
+        memcpy(metadata->xmp.data(), value, metadata->xmp.size());
+      }
+    }
+
+    for (unsigned idx_text = 0; idx_text < info.text_num; ++idx_text) {
+      // We trust these are properly null-terminated by LodePNG.
+      const char* key = info.text_keys[idx_text];
+      const char* value = info.text_strings[idx_text];
+      std::string type;
+      PaddedBytes bytes;
+      if (DecodeBase16(key, value, &type, &bytes)) {
+        if (type == "exif") {
+          if (!metadata->exif.empty()) {
+            fprintf(
+                stderr,
+                "WARNING: overwriting EXIF (%zu bytes) with base16 (%zu bytes)",
+                metadata->exif.size(), bytes.size());
+          }
+          metadata->exif = std::move(bytes);
+        } else if (type == "iptc") {
+          metadata->iptc = std::move(bytes);
+        } else if (type == "xmp") {
+          // Generated by ImageMagick.
+          metadata->xmp.resize(strlen(value));  // safe, see above
+          memcpy(metadata->xmp.data(), value, metadata->xmp.size());
+        } else {
+          fprintf(stderr, "Unknown metadata %s: %zu bytes\n", type.c_str(),
+                  bytes.size());
+        }
+      }
+    }
+
+    return true;
+  }
+
+ private:
+  // Returns false if invalid.
+  static PIK_INLINE Status DecodeNibble(const char c,
+                                        uint32_t* PIK_RESTRICT nibble) {
+    if ('a' <= c && c <= 'f') {
+      *nibble = 10 + c - 'a';
+    } else if ('0' <= c && c <= '9') {
+      *nibble = c - '0';
+    } else {
+      *nibble = 0;
+      return PIK_FAILURE("Invalid metadata nibble");
+    }
+    PIK_ASSERT(*nibble < 16);
+    return true;
+  }
+
+  // We trust key and encoded are null-terminated because they come from
+  // LodePNG.
+  static Status DecodeBase16(const char* key, const char* encoded,
+                             std::string* type, PaddedBytes* bytes) {
+    const char* encoded_end = encoded + strlen(encoded);
+
+    const char* kKey = "Raw profile type ";
+    if (strncmp(key, kKey, strlen(kKey)) != 0) return false;
+    *type = key + strlen(kKey);
+    const size_t kMaxTypeLen = 20;
+    if (type->length() > kMaxTypeLen) return PIK_FAILURE("Type too long");
+
+    // GroupHeader: type and number of bytes
+    char format[10 + kMaxTypeLen];
+    snprintf(format, sizeof(format), "\n%s\n%%8lu%%n", type->c_str());
+    unsigned long bytes_to_decode;
+    int header_len;
+    const int fields = sscanf(encoded, format, &bytes_to_decode, &header_len);
+    if (fields != 1) return PIK_FAILURE("Failed to decode metadata header");
+    PIK_ASSERT(bytes->empty());
+    bytes->reserve(bytes_to_decode);
+
+    // Encoding: base16 with newline after 72 chars.
+    const char* pos = encoded + header_len;
+    for (size_t i = 0; i < bytes_to_decode; ++i) {
+      if (i % 36 == 0) {
+        if (pos + 1 >= encoded_end) return PIK_FAILURE("Truncated base16 1");
+        if (*pos != '\n') return PIK_FAILURE("Expected newline");
+        ++pos;
+      }
+
+      if (pos + 2 >= encoded_end) return PIK_FAILURE("Truncated base16 2");
+      uint32_t nibble0, nibble1;
+      PIK_RETURN_IF_ERROR(DecodeNibble(pos[0], &nibble0));
+      PIK_RETURN_IF_ERROR(DecodeNibble(pos[1], &nibble1));
+      bytes->push_back(static_cast<uint8_t>((nibble1 << 4) + nibble0));
+      pos += 2;
+    }
+    if (pos + 1 != encoded_end) return PIK_FAILURE("Too many encoded bytes");
+    if (pos[0] != '\n') return PIK_FAILURE("Incorrect metadata terminator");
+    return true;
+  }
+};
+
+// Stores XMP and EXIF/IPTC into itext and text.
+class MetadataWriterPNG {
+ public:
+  static Status Encode(const Metadata& metadata,
+                       LodePNGInfo* PIK_RESTRICT info) {
+    if (!metadata.exif.empty()) {
+      PIK_RETURN_IF_ERROR(EncodeBase16("exif", metadata.exif, info));
+    }
+    if (!metadata.iptc.empty()) {
+      PIK_RETURN_IF_ERROR(EncodeBase16("iptc", metadata.iptc, info));
+    }
+
+    if (!metadata.xmp.empty()) {
+      const char* key = "XML:com.adobe.xmp";
+      const std::string text(reinterpret_cast<const char*>(metadata.xmp.data()),
+                             metadata.xmp.size());
+      if (lodepng_add_itext(info, key, "", "", text.c_str()) != 0) {
+        return PIK_FAILURE("Failed to add itext");
+      }
+    }
+
+    return true;
+  }
+
+ private:
+  static PIK_INLINE char EncodeNibble(const uint8_t nibble) {
+    PIK_ASSERT(nibble < 16);
+    return (nibble < 10) ? '0' + nibble : 'a' + nibble - 10;
+  }
+
+  static Status EncodeBase16(const std::string& type, const PaddedBytes& bytes,
+                             LodePNGInfo* PIK_RESTRICT info) {
+    // Encoding: base16 with newline after 72 chars.
+    const size_t base16_size =
+        2 * bytes.size() + DivCeil(bytes.size(), size_t(36)) + 1;
+    std::string base16;
+    base16.reserve(base16_size);
+    for (size_t i = 0; i < bytes.size(); ++i) {
+      if (i % 36 == 0) base16.push_back('\n');
+      base16.push_back(EncodeNibble(bytes[i] & 0x0F));
+      base16.push_back(EncodeNibble(bytes[i] >> 4));
+    }
+    base16.push_back('\n');
+    PIK_ASSERT(base16.length() == base16_size);
+
+    char key[30];
+    snprintf(key, sizeof(key), "Raw profile type %s", type.c_str());
+
+    char header[30];
+    snprintf(header, sizeof(header), "\n%s\n%8lu", type.c_str(), bytes.size());
+
+    const std::string& encoded = std::string(header) + base16;
+    if (lodepng_add_text(info, key, encoded.c_str()) != 0) {
+      return PIK_FAILURE("Failed to add text");
+    }
+
+    return true;
+  }
+};
+
+// Retrieves ColorEncoding from PNG chunks.
+class ColorEncodingReaderPNG {
+ public:
+  // Sets c_original or returns false.
+  Status operator()(const PaddedBytes& bytes, const bool is_gray,
+                    Metadata* metadata, ColorEncoding* c_original) {
+    PIK_RETURN_IF_ERROR(Decode(bytes, metadata));
+
+    const ColorSpace color_space =
+        is_gray ? ColorSpace::kGray : ColorSpace::kRGB;
+
+    if (have_pq_) {
+      ProfileParams pp;
+      pp.color_space = color_space;
+      if (!WhitePointToCIExy(WhitePoint::kD65, &pp.white_point) ||
+          !PrimariesToCIExy(Primaries::k2020, &pp.primaries)) {
+        PIK_NOTIFY_ERROR("Failed to set white point/primaries");
+      }
+      pp.gamma = GammaPQ();
+      pp.rendering_intent = RenderingIntent::kRelative;
+      if (ColorManagement::SetFromParams(pp, c_original)) return true;
+      fprintf(stderr, "Failed to synthesize BT.2100 PQ.\n");
+      // Else: try the actual ICC profile.
+    }
+
+    // ICC overrides anything else if present.
+    if (ColorManagement::SetFromProfile(std::move(icc_), c_original)) {
+      if (have_srgb_) {
+        fprintf(stderr, "Invalid PNG with both sRGB and ICC; ignoring sRGB.\n");
+      }
+      if (is_gray != c_original->IsGray()) {
+        return PIK_FAILURE("Mismatch between ICC and PNG header");
+      }
+      return true;  // it's fine to ignore gAMA/cHRM.
+    }
+
+    // PNG requires that sRGB override gAMA/cHRM.
+    if (have_srgb_) {
+      c_original->rendering_intent = params_.rendering_intent;
+      c_original->SetSRGB(color_space);
+      return ColorManagement::SetProfileFromFields(c_original);
+    }
+
+    // Try to create a custom profile:
+
+    params_.color_space = color_space;
+
+    if (!have_chrm_) {
+#if PIK_PNG_VERBOSE >= 1
+      fprintf(stderr, "No cHRM, assuming sRGB.\n");
+#endif
+      if (!WhitePointToCIExy(WhitePoint::kD65, &params_.white_point) ||
+          !PrimariesToCIExy(Primaries::kSRGB, &params_.primaries)) {
+        PIK_ASSERT(false);  // should always succeed with known enum
+      }
+    }
+
+    if (!have_gama_ || params_.gamma <= 0.0 || params_.gamma > 1.0) {
+#if PIK_PNG_VERBOSE >= 1
+      fprintf(stderr, "No (valid) gAMA nor sRGB, assuming sRGB.\n");
+#endif
+      params_.gamma = GammaSRGB();
+    }
+
+    params_.rendering_intent = RenderingIntent::kPerceptual;
+    if (ColorManagement::SetFromParams(params_, c_original)) return true;
+
+    fprintf(stderr,
+            "DATA LOSS: unable to create an ICC profile for PNG gAMA/cHRM."
+            "Image pixels will be interpreted as sRGB. Please add an ICC"
+            "profile to the input image.\n");
+    c_original->SetSRGB(color_space);
+    return ColorManagement::SetProfileFromFields(c_original);
+  }
+
+ private:
+  Status DecodeICC(const unsigned char* const payload,
+                   const size_t payload_size) {
+    if (payload_size == 0) return PIK_FAILURE("Empty ICC payload");
+    const unsigned char* pos = payload;
+    const unsigned char* end = payload + payload_size;
+
+    // Profile name
+    if (*pos == '\0') return PIK_FAILURE("Expected ICC name");
+    for (size_t i = 0;; ++i) {
+      if (i == 80) return PIK_FAILURE("ICC profile name too long");
+      if (pos == end) return PIK_FAILURE("Not enough bytes for ICC name");
+      if (*pos++ == '\0') break;
+    }
+
+    // Special case for BT.2100 PQ (https://w3c.github.io/png-hdr-pq/) - try to
+    // synthesize the profile because table-based curves are less accurate.
+    // strcmp is safe because we already verified the string is 0-terminated.
+    if (!strcmp(reinterpret_cast<const char*>(payload), "ITUR_2100_PQ_FULL")) {
+      have_pq_ = true;
+    }
+
+    // Skip over compression method (only one is allowed)
+    if (pos == end) return PIK_FAILURE("Not enough bytes for ICC method");
+    if (*pos++ != 0) return PIK_FAILURE("Unsupported ICC method");
+
+    // Decompress
+    unsigned char* icc_buf = nullptr;
+    size_t icc_size = 0;
+    LodePNGDecompressSettings settings;
+    lodepng_decompress_settings_init(&settings);
+    const unsigned err = lodepng_zlib_decompress(
+        &icc_buf, &icc_size, pos, payload_size - (pos - payload), &settings);
+    if (err == 0) {
+      icc_.resize(icc_size);
+      memcpy(icc_.data(), icc_buf, icc_size);
+    }
+    free(icc_buf);
+    return true;
+  }
+
+  // Returns floating-point value from the PNG encoding (times 10^5).
+  static double F64FromU32(const uint32_t x) {
+    return static_cast<int32_t>(x) * 1E-5;
+  }
+
+  Status DecodeSRGB(const unsigned char* payload, const size_t payload_size) {
+    if (payload_size != 1) return PIK_FAILURE("Wrong sRGB size");
+    // (PNG uses the same values as ICC.)
+    params_.rendering_intent = static_cast<RenderingIntent>(payload[0]);
+    have_srgb_ = true;
+    return true;
+  }
+
+  Status DecodeGAMA(const unsigned char* payload, const size_t payload_size) {
+    if (payload_size != 4) return PIK_FAILURE("Wrong gAMA size");
+    params_.gamma = F64FromU32(LoadBE32(payload));
+    have_gama_ = true;
+    return true;
+  }
+
+  Status DecodeCHRM(const unsigned char* payload, const size_t payload_size) {
+    if (payload_size != 32) return PIK_FAILURE("Wrong cHRM size");
+    params_.white_point.x = F64FromU32(LoadBE32(payload + 0));
+    params_.white_point.y = F64FromU32(LoadBE32(payload + 4));
+    params_.primaries.r.x = F64FromU32(LoadBE32(payload + 8));
+    params_.primaries.r.y = F64FromU32(LoadBE32(payload + 12));
+    params_.primaries.g.x = F64FromU32(LoadBE32(payload + 16));
+    params_.primaries.g.y = F64FromU32(LoadBE32(payload + 20));
+    params_.primaries.b.x = F64FromU32(LoadBE32(payload + 24));
+    params_.primaries.b.y = F64FromU32(LoadBE32(payload + 28));
+    have_chrm_ = true;
+    return true;
+  }
+
+  Status DecodeEXIF(const unsigned char* payload, const size_t payload_size,
+                    Metadata* metadata) {
+    // If we already have EXIF, keep the larger one.
+    if (metadata->exif.size() > payload_size) return true;
+    metadata->exif.resize(payload_size);
+    memcpy(metadata->exif.data(), payload, payload_size);
+    return true;
+  }
+
+  Status Decode(const PaddedBytes& bytes, Metadata* metadata) {
+    // Look for colorimetry and metadata chunks in the PNG image. The PNG chunks
+    // begin after the PNG magic header of 8 bytes.
+    const unsigned char* chunk = bytes.data() + 8;
+    const unsigned char* end = bytes.data() + bytes.size();
+    for (;;) {
+      // chunk points to the first field of a PNG chunk. The chunk has
+      // respectively 4 bytes of length, 4 bytes type, length bytes of data,
+      // 4 bytes CRC.
+      if (chunk + 4 >= end) {
+        break;  // Regular end reached.
+      }
+
+      char type_char[5];
+      lodepng_chunk_type(type_char, chunk);
+      std::string type = type_char;
+
+      if (type == "eXIf" || type == "iCCP" || type == "sRGB" ||
+          type == "gAMA" || type == "cHRM") {
+        const unsigned char* payload = lodepng_chunk_data_const(chunk);
+        const size_t payload_size = lodepng_chunk_length(chunk);
+        // The entire chunk needs also 4 bytes of CRC after the payload.
+        if (payload + payload_size + 4 >= end) {
+          PIK_NOTIFY_ERROR("PNG: truncated chunk");
+          break;
+        }
+        if (lodepng_chunk_check_crc(chunk) != 0) {
+          PIK_NOTIFY_ERROR("CRC mismatch in unknown PNG chunk");
+          continue;
+        }
+
+        if (type == "eXIf") {
+          PIK_RETURN_IF_ERROR(DecodeEXIF(payload, payload_size, metadata));
+        } else if (type == "iCCP") {
+          PIK_RETURN_IF_ERROR(DecodeICC(payload, payload_size));
+        } else if (type == "sRGB") {
+          PIK_RETURN_IF_ERROR(DecodeSRGB(payload, payload_size));
+        } else if (type == "gAMA") {
+          PIK_RETURN_IF_ERROR(DecodeGAMA(payload, payload_size));
+        } else if (type == "cHRM") {
+          PIK_RETURN_IF_ERROR(DecodeCHRM(payload, payload_size));
+        }
+      }
+
+      chunk = lodepng_chunk_next_const(chunk);
+    }
+    return true;
+  }
+
+  PaddedBytes icc_;
+
+  bool have_pq_ = false;
+  bool have_srgb_ = false;
+  bool have_gama_ = false;
+  bool have_chrm_ = false;
+  ProfileParams params_;
+};
+
+// Stores ColorEncoding into PNG chunks.
+class ColorEncodingWriterPNG {
+ public:
+  static Status Encode(const ColorEncoding& c, LodePNGInfo* PIK_RESTRICT info) {
+    if (c.icc.empty()) {
+      // Only ALLOW sRGB if no ICC present.
+      PIK_RETURN_IF_ERROR(MaybeAddSRGB(c, info));
+    } else {
+      PIK_RETURN_IF_ERROR(AddICC(c.icc, info));
+    }
+
+    PIK_RETURN_IF_ERROR(MaybeAddGAMA(c, info));
+    PIK_RETURN_IF_ERROR(MaybeAddCHRM(c, info));
+    return true;
+  }
+
+ private:
+  static Status AddChunk(const char* type, const PaddedBytes& payload,
+                         LodePNGInfo* PIK_RESTRICT info) {
+    // Ignore original location/order of chunks; place them in the first group.
+    if (lodepng_chunk_create(&info->unknown_chunks_data[0],
+                             &info->unknown_chunks_size[0], payload.size(),
+                             type, payload.data()) != 0) {
+      return PIK_FAILURE("Failed to add chunk");
+    }
+    return true;
+  }
+
+  static Status AddICC(const PaddedBytes& icc, LodePNGInfo* PIK_RESTRICT info) {
+    LodePNGCompressSettings settings;
+    lodepng_compress_settings_init(&settings);
+    unsigned char* out = nullptr;
+    size_t out_size = 0;
+    if (lodepng_zlib_compress(&out, &out_size, icc.data(), icc.size(),
+                              &settings) != 0) {
+      return PIK_FAILURE("Failed to compress ICC");
+    }
+
+    PaddedBytes payload;
+    payload.resize(3 + out_size);
+    // TODO(janwas): use special name if PQ
+    payload[0] = '1';  // profile name
+    payload[1] = '\0';
+    payload[2] = 0;  // compression method (zlib)
+    memcpy(&payload[3], out, out_size);
+    free(out);
+
+    return AddChunk("iCCP", payload, info);
+  }
+
+  static Status MaybeAddSRGB(const ColorEncoding& c,
+                             LodePNGInfo* PIK_RESTRICT info) {
+    if (!c.IsGray() && c.color_space != ColorSpace::kRGB) return true;
+    if (c.white_point != WhitePoint::kD65) return true;
+    if (c.primaries != Primaries::kSRGB) return true;
+    if (c.transfer_function != TransferFunction::kSRGB) return true;
+
+    PaddedBytes payload;
+    payload.push_back(static_cast<uint8_t>(c.rendering_intent));
+    return AddChunk("sRGB", payload, info);
+  }
+
+  // Returns PNG encoding of floating-point value (times 10^5).
+  static uint32_t U32FromF64(const double x) {
+    return static_cast<int32_t>(std::round(x * 1E5));
+  }
+
+  static Status MaybeAddGAMA(const ColorEncoding& c,
+                             LodePNGInfo* PIK_RESTRICT info) {
+    const double gamma = GammaFromTransferFunction(c.transfer_function);
+    if (gamma == 0.0) return true;
+
+    PaddedBytes payload(4);
+    StoreBE32(U32FromF64(gamma), payload.data());
+    return AddChunk("gAMA", payload, info);
+  }
+
+  static Status MaybeAddCHRM(const ColorEncoding& c,
+                             LodePNGInfo* PIK_RESTRICT info) {
+    CIExy white_point;
+    if (!WhitePointToCIExy(c.white_point, &white_point)) return true;
+    PrimariesCIExy primaries;
+    if (!PrimariesToCIExy(c.primaries, &primaries)) return true;
+
+    PaddedBytes payload(32);
+    StoreBE32(U32FromF64(white_point.x), &payload[0]);
+    StoreBE32(U32FromF64(white_point.y), &payload[4]);
+    StoreBE32(U32FromF64(primaries.r.x), &payload[8]);
+    StoreBE32(U32FromF64(primaries.r.y), &payload[12]);
+    StoreBE32(U32FromF64(primaries.g.x), &payload[16]);
+    StoreBE32(U32FromF64(primaries.g.y), &payload[20]);
+    StoreBE32(U32FromF64(primaries.b.x), &payload[24]);
+    StoreBE32(U32FromF64(primaries.b.y), &payload[28]);
+    return AddChunk("cHRM", payload, info);
+  }
+};
+
+// RAII - ensures state is freed even if returning early.
+struct PNGState {
+  PNGState() { lodepng_state_init(&s); }
+  ~PNGState() { lodepng_state_cleanup(&s); }
+
+  LodePNGState s;
+};
+
+Status CheckGray(const LodePNGColorMode& mode, bool* is_gray) {
+  switch (mode.colortype) {
+    case LCT_GREY:
+    case LCT_GREY_ALPHA:
+      *is_gray = true;
+      return true;
+
+    case LCT_RGB:
+    case LCT_RGBA:
+      *is_gray = false;
+      return true;
+
+    case LCT_PALETTE: {
+      *is_gray = true;
+      for (size_t i = 0; i < mode.palettesize; i++) {
+        if (mode.palette[i * 4] != mode.palette[i * 4 + 1] ||
+            mode.palette[i * 4] != mode.palette[i * 4 + 2]) {
+          *is_gray = false;
+          break;
+        }
+      }
+      return true;
+    }
+
+    default:
+      *is_gray = false;
+      return PIK_FAILURE("Unexpected PNG color type");
+  }
+}
+
+Status CheckAlpha(const LodePNGColorMode& mode, bool* has_alpha) {
+  if (mode.key_defined) {
+    // Color key marks a single color as transparent.
+    *has_alpha = true;
+    return true;
+  }
+
+  switch (mode.colortype) {
+    case LCT_GREY:
+    case LCT_RGB:
+      *has_alpha = false;
+      return true;
+
+    case LCT_GREY_ALPHA:
+    case LCT_RGBA:
+      *has_alpha = true;
+      return true;
+
+    case LCT_PALETTE: {
+      *has_alpha = false;
+      for (size_t i = 0; i < mode.palettesize; i++) {
+        // PNG palettes are always 8-bit.
+        if (mode.palette[i * 4 + 3] != 255) {
+          *has_alpha = true;
+          break;
+        }
+      }
+      return true;
+    }
+
+    default:
+      *has_alpha = false;
+      return PIK_FAILURE("Unexpected PNG color type");
+  }
+}
+
+LodePNGColorType MakeType(const bool is_gray, const bool has_alpha) {
+  if (is_gray) {
+    return has_alpha ? LCT_GREY_ALPHA : LCT_GREY;
+  }
+  return has_alpha ? LCT_RGBA : LCT_RGB;
+}
+
+// Inspects first chunk of the given type and updates state with the information
+// when the chunk is relevant and present in the file.
+Status InspectChunkType(const PaddedBytes& bytes, const std::string& type,
+                        LodePNGState* state) {
+  const unsigned char* chunk = lodepng_chunk_find_const(
+      bytes.data(), bytes.data() + bytes.size(), type.c_str());
+  if (chunk && lodepng_inspect_chunk(state, chunk - bytes.data(), bytes.data(),
+                                     bytes.size()) != 0) {
+    return PIK_FAILURE("Invalid chunk \"%s\" in PNG image", type.c_str());
+  }
+  return true;
+}
+
+}  // namespace
+
+Status DecodeImagePNG(const PaddedBytes& bytes, ThreadPool* pool,
+                      CodecInOut* io) {
+  unsigned w, h;
+  PNGState state;
+  if (lodepng_inspect(&w, &h, &state.s, bytes.data(), bytes.size()) != 0) {
+    return false;  // not an error - just wrong format
+  }
+  // Palette RGB values
+  if (!InspectChunkType(bytes, "PLTE", &state.s)) {
+    return false;
+  }
+  // Transparent color key, or palette transparency
+  if (!InspectChunkType(bytes, "tRNS", &state.s)) {
+    return false;
+  }
+  const LodePNGColorMode& color_mode = state.s.info_png.color;
+
+  bool is_gray, has_alpha;
+  PIK_RETURN_IF_ERROR(CheckGray(color_mode, &is_gray));
+  PIK_RETURN_IF_ERROR(CheckAlpha(color_mode, &has_alpha));
+  // We want LodePNG to promote 1/2/4 bit pixels to 8.
+  size_t bits_per_sample = std::max(color_mode.bitdepth, 8u);
+  io->SetOriginalBitsPerSample(bits_per_sample);
+  if (bits_per_sample != 8 && bits_per_sample != 16) {
+    return PIK_FAILURE("Unexpected PNG bit depth");
+  }
+
+  io->enc_size = bytes.size();
+  io->dec_hints.Foreach([](const std::string& key, const std::string& value) {
+    fprintf(stderr, "PNG decoder ignoring %s hint\n", key.c_str());
+  });
+
+  // Always decode to 8/16-bit RGB/RGBA, not LCT_PALETTE.
+  state.s.info_raw.bitdepth = bits_per_sample;
+  state.s.info_raw.colortype = MakeType(is_gray, has_alpha);
+  unsigned char* out;
+  if (lodepng_decode(&out, &w, &h, &state.s, bytes.data(), bytes.size()) != 0) {
+    return PIK_FAILURE("PNG decode failed");
+  }
+
+  if (!MetadataReaderPNG::Decode(state.s.info_png, &io->metadata)) {
+    fprintf(stderr, "PNG metadata may be incomplete.\n");
+  }
+  ColorEncodingReaderPNG reader;
+  PIK_RETURN_IF_ERROR(
+      reader(bytes, is_gray, &io->metadata, &io->dec_c_original));
+
+  const bool big_endian = true;  // PNG requirement
+  const uint8_t* end = nullptr;  // Don't know.
+  const ExternalImage external(w, h, io->dec_c_original, has_alpha,
+                               /*alpha_bits=*/ bits_per_sample, bits_per_sample,
+                               big_endian, out, end);
+  free(out);
+  const CodecIntervals* temp_intervals = nullptr;  // Don't know min/max.
+  return external.CopyTo(temp_intervals, pool, io);
+}
+
+Status EncodeImagePNG(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes) {
+  io->enc_bits_per_sample = bits_per_sample == 8 ? 8 : 16;
+
+  const ImageU* alpha = io->HasAlpha() ? &io->alpha() : nullptr;
+  const size_t alpha_bits = io->HasAlpha() ? io->AlphaBits() : 0;
+  const bool big_endian = true;              // PNG requirement
+  CodecIntervals* temp_intervals = nullptr;  // Can't store min/max.
+  const ExternalImage external(pool, io->color(), Rect(io->color()),
+                               io->c_current(), c_desired, io->HasAlpha(),
+                               alpha, alpha_bits, io->enc_bits_per_sample,
+                               big_endian, temp_intervals);
+  PIK_RETURN_IF_ERROR(external.IsHealthy());
+
+  PNGState state;
+  // For maximum compatibility, still store 8-bit even if pixels are all zero.
+  state.s.encoder.auto_convert = 0;
+
+  LodePNGInfo* info = &state.s.info_png;
+  info->color.bitdepth = io->enc_bits_per_sample;
+  info->color.colortype = MakeType(io->IsGray(), io->HasAlpha());
+  state.s.info_raw = info->color;
+
+  PIK_RETURN_IF_ERROR(ColorEncodingWriterPNG::Encode(c_desired, info));
+  PIK_RETURN_IF_ERROR(MetadataWriterPNG::Encode(io->metadata, info));
+
+  unsigned char* out = nullptr;
+  size_t out_size = 0;
+  if (lodepng_encode(&out, &out_size, external.Bytes().data(), io->xsize(),
+                     io->ysize(), &state.s) != 0) {
+    return PIK_FAILURE("Failed to encode PNG");
+  }
+  bytes->resize(out_size);
+  memcpy(bytes->data(), out, out_size);
+  free(out);
+
+  io->enc_size = out_size;
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/codec_png.h b/codec/L2/demos/pikEnc/host/pik/codec_png.h
new file mode 100755
index 0000000000..1b795e6743
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/codec_png.h
@@ -0,0 +1,30 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CODEC_PNG_H_
+#define PIK_CODEC_PNG_H_
+
+// Encodes/decodes PNG pixels and metadata in memory.
+
+#include "pik/codec.h"
+#include "pik/color_management.h"
+#include "pik/padded_bytes.h"
+
+namespace pik {
+
+// Decodes "bytes" and transforms to io->c_current color space. io->dec_hints
+// may specify "color_space" and "range" (defaults are sRGB and full-range).
+Status DecodeImagePNG(const PaddedBytes& bytes, ThreadPool* pool,
+                      CodecInOut* io);
+
+// Transforms from io->c_current to io->c_external and encodes into "bytes".
+Status EncodeImagePNG(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes);
+
+}  // namespace pik
+
+#endif  // PIK_CODEC_PNG_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/codec_pnm.cc b/codec/L2/demos/pikEnc/host/pik/codec_pnm.cc
new file mode 100755
index 0000000000..500e73fd4c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/codec_pnm.cc
@@ -0,0 +1,296 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/codec_pnm.h"
+
+#include <string>
+
+#include "pik/bits.h"
+#include "pik/byte_order.h"
+#include "pik/external_image.h"
+#include "pik/fields.h"
+
+namespace pik {
+namespace {
+
+struct HeaderPNM {
+  size_t xsize;
+  size_t ysize;
+  bool is_gray;
+  size_t bits_per_sample;
+  bool big_endian;
+};
+
+class Parser {
+ public:
+  explicit Parser(const PaddedBytes& bytes)
+      : pos_(bytes.data()), end_(pos_ + bytes.size()) {}
+
+  // Sets "pos" to the first non-header byte/pixel on success.
+  Status ParseHeader(HeaderPNM* header, const uint8_t** pos) {
+    // codec_facade ensures we have at least two bytes => no range check here.
+    if (pos_[0] != 'P') return false;
+    const uint8_t type = pos_[1];
+    pos_ += 2;
+
+    switch (type) {
+      case '5':
+        header->is_gray = true;
+        return ParseHeaderPNM(header, pos);
+
+      case '6':
+        header->is_gray = false;
+        return ParseHeaderPNM(header, pos);
+
+      case 'F':
+        header->is_gray = false;
+        return ParseHeaderPFM(header, pos);
+
+      case 'f':
+        header->is_gray = true;
+        return ParseHeaderPFM(header, pos);
+    }
+    return false;
+  }
+
+ private:
+  static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
+  static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+  static bool IsWhitespace(const uint8_t c) {
+    return IsLineBreak(c) || c == '\t' || c == ' ';
+  }
+
+  Status ParseUnsigned(size_t* number) {
+    if (pos_ == end_) return PIK_FAILURE("PNM: reached end before number");
+    if (!IsDigit(*pos_)) return PIK_FAILURE("PNM: expected unsigned number");
+
+    *number = 0;
+    while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+      *number *= 10;
+      *number += *pos_ - '0';
+      ++pos_;
+    }
+
+    return true;
+  }
+
+  Status ParseSigned(double* number) {
+    if (pos_ == end_) return PIK_FAILURE("PNM: reached end before number");
+    if (*pos_ != '-' && *pos_ != '+' && !IsDigit(*pos_)) {
+      return PIK_FAILURE("PNM: expected signed number");
+    }
+
+    const size_t max_size = std::min<ptrdiff_t>(end_ - pos_, 50);
+    const std::string copy(reinterpret_cast<const char*>(pos_), max_size);
+    size_t chars_processed;
+    *number = std::stod(copy, &chars_processed);
+    pos_ += chars_processed;
+
+    return true;
+  }
+
+  Status SkipBlank() {
+    if (pos_ == end_) return PIK_FAILURE("PNM: reached end before blank");
+    if (*pos_ != ' ') return PIK_FAILURE("PNM: expected blank");
+    ++pos_;
+    return true;
+  }
+
+  Status SkipSingleWhitespace() {
+    if (pos_ == end_) return PIK_FAILURE("PNM: reached end before whitespace");
+    if (!IsWhitespace(*pos_)) return PIK_FAILURE("PNM: expected whitespace");
+    ++pos_;
+    return true;
+  }
+
+  Status SkipWhitespace() {
+    if (pos_ == end_) return PIK_FAILURE("PNM: reached end before whitespace");
+    if (!IsWhitespace(*pos_) && *pos_ != '#') {
+      return PIK_FAILURE("PNM: expected whitespace/comment");
+    }
+
+    while (pos_ < end_ && IsWhitespace(*pos_)) {
+      ++pos_;
+    }
+
+    // Comment(s)
+    while (pos_ != end_ && *pos_ == '#') {
+      while (pos_ != end_ && !IsLineBreak(*pos_)) {
+        ++pos_;
+      }
+      // Newline(s)
+      while (pos_ != end_ && IsLineBreak(*pos_)) pos_++;
+    }
+
+    while (pos_ < end_ && IsWhitespace(*pos_)) {
+      ++pos_;
+    }
+    return true;
+  }
+
+  Status ParseHeaderPNM(HeaderPNM* header, const uint8_t** pos) {
+    PIK_RETURN_IF_ERROR(SkipWhitespace());
+    PIK_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+
+    PIK_RETURN_IF_ERROR(SkipWhitespace());
+    PIK_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+
+    PIK_RETURN_IF_ERROR(SkipWhitespace());
+    size_t max_val;
+    PIK_RETURN_IF_ERROR(ParseUnsigned(&max_val));
+    if (max_val == 0 || max_val >= 65536) return PIK_FAILURE("PNM: bad MaxVal");
+    header->bits_per_sample = CeilLog2Nonzero(static_cast<uint32_t>(max_val));
+    header->big_endian = true;
+
+    PIK_RETURN_IF_ERROR(SkipSingleWhitespace());
+
+    *pos = pos_;
+    return true;
+  }
+
+  Status ParseHeaderPFM(HeaderPNM* header, const uint8_t** pos) {
+    PIK_RETURN_IF_ERROR(SkipSingleWhitespace());
+    PIK_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+
+    PIK_RETURN_IF_ERROR(SkipBlank());
+    PIK_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+
+    PIK_RETURN_IF_ERROR(SkipSingleWhitespace());
+    double scale;
+    PIK_RETURN_IF_ERROR(ParseSigned(&scale));
+    header->big_endian = scale >= 0.0;
+    header->bits_per_sample = 32;
+
+    PIK_RETURN_IF_ERROR(SkipSingleWhitespace());
+
+    *pos = pos_;
+    return true;
+  }
+
+  const uint8_t* pos_;
+  const uint8_t* const end_;
+};
+
+constexpr size_t kMaxHeaderSize = 200;
+
+Status EncodeHeader(const ExternalImage& external, char* header,
+                    int* PIK_RESTRICT chars_written) {
+  if (external.HasAlpha()) return PIK_FAILURE("PNM: can't store alpha");
+
+  if (external.BitsPerSample() == 32) {  // PFM
+    const char type = external.IsGray() ? 'f' : 'F';
+    const double scale = external.BigEndian() ? 1.0 : -1.0;
+    snprintf(header, kMaxHeaderSize, "P%c %zu %zu\n%f\n%n", type,
+             external.xsize(), external.ysize(), scale, chars_written);
+  } else {  // PGM/PPM
+    const uint32_t max_val = (1U << external.BitsPerSample()) - 1;
+    if (max_val >= 65536) return PIK_FAILURE("PNM cannot have > 16 bits");
+    const char type = external.IsGray() ? '5' : '6';
+    snprintf(header, kMaxHeaderSize, "P%c\n%zu %zu\n%u\n%n", type,
+             external.xsize(), external.ysize(), max_val, chars_written);
+  }
+  return true;
+}
+
+Status ApplyHints(const bool is_gray, CodecInOut* io) {
+  bool got_color_space = false;
+  Status ok = true;
+
+  io->dec_hints.Foreach([is_gray, io, &got_color_space, &ok](
+                            const std::string& key, const std::string& value) {
+    if (key == "color_space") {
+      ProfileParams pp;
+      if (!ParseDescription(value, &pp) ||
+          !ColorManagement::SetFromParams(pp, &io->dec_c_original)) {
+        fprintf(stderr, "PNM: Failed to apply color_space.\n");
+        ok = false;
+      }
+
+      if (is_gray != io->dec_c_original.IsGray()) {
+        fprintf(stderr, "PNM: mismatch between file and color_space hint.\n");
+        ok = false;
+      }
+
+      got_color_space = true;
+    } else {
+      fprintf(stderr, "PNM decoder ignoring %s hint\n", key.c_str());
+    }
+  });
+
+  if (!got_color_space) {
+    fprintf(stderr, "PNM: no color_space hint given, assuming sRGB.\n");
+    io->dec_c_original.SetSRGB(is_gray ? ColorSpace::kGray : ColorSpace::kRGB);
+    PIK_RETURN_IF_ERROR(
+        ColorManagement::SetProfileFromFields(&io->dec_c_original));
+  }
+
+  if (!ok) return PIK_FAILURE("PNM ApplyHints failed");
+  return true;
+}
+
+}  // namespace
+
+Status DecodeImagePNM(const PaddedBytes& bytes, ThreadPool* pool,
+                      CodecInOut* io) {
+  io->enc_size = bytes.size();
+
+  Parser parser(bytes);
+  HeaderPNM header;
+  const uint8_t* pos;
+  PIK_RETURN_IF_ERROR(parser.ParseHeader(&header, &pos));
+
+  PIK_RETURN_IF_ERROR(ApplyHints(header.is_gray, io));
+  io->SetOriginalBitsPerSample(header.bits_per_sample);
+  io->metadata = Metadata();
+
+  const bool has_alpha = false;
+  const uint8_t* end = bytes.data() + bytes.size();
+  const ExternalImage external(header.xsize, header.ysize, io->dec_c_original,
+                               has_alpha,  /*alpha_bits=*/ 0,
+                               header.bits_per_sample, header.big_endian,
+                               pos, end);
+  const CodecIntervals* temp_intervals = nullptr;  // Don't know min/max.
+  return external.CopyTo(temp_intervals, pool, io);
+}
+
+Status EncodeImagePNM(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes) {
+  io->enc_bits_per_sample = bits_per_sample <= 16 ? bits_per_sample : 32;
+  // Choose native for PFM; PGM/PPM require big-endian.
+  const bool big_endian = (bits_per_sample == 32) ? !IsLittleEndian() : true;
+
+  if (!Bundle::AllDefault(io->metadata)) {
+    fprintf(stderr, "PNM encoder ignoring metadata - use a different codec.\n");
+  }
+  if (!c_desired.IsSRGB()) {
+    fprintf(stderr,
+            "PNM encoder cannot store custom ICC profile; decoder "
+            "will need hint key=color_space to get the same values.\n");
+  }
+
+  const ImageU* alpha = io->HasAlpha() ? &io->alpha() : nullptr;
+  const size_t alpha_bits = io->HasAlpha() ? io->AlphaBits() : 0;
+  CodecIntervals* temp_intervals = nullptr;  // Can't store min/max.
+  ExternalImage external(pool, io->color(), Rect(io->color()), io->c_current(),
+                         c_desired, io->HasAlpha(), alpha, alpha_bits,
+                         io->enc_bits_per_sample, big_endian, temp_intervals);
+  PIK_RETURN_IF_ERROR(external.IsHealthy());
+
+  char header[kMaxHeaderSize];
+  int header_size = 0;
+  PIK_RETURN_IF_ERROR(EncodeHeader(external, header, &header_size));
+
+  const PaddedBytes& pixels = external.Bytes();
+  io->enc_size = header_size + pixels.size();
+  bytes->resize(io->enc_size);
+  memcpy(bytes->data(), header, header_size);
+  memcpy(bytes->data() + header_size, pixels.data(), pixels.size());
+
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/codec_pnm.h b/codec/L2/demos/pikEnc/host/pik/codec_pnm.h
new file mode 100755
index 0000000000..617632932e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/codec_pnm.h
@@ -0,0 +1,31 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CODEC_PNM_H_
+#define PIK_CODEC_PNM_H_
+
+// Encodes/decodes PGM/PPM/PFM pixels in memory.
+
+#include "pik/codec.h"
+#include "pik/color_management.h"
+#include "pik/data_parallel.h"
+#include "pik/padded_bytes.h"
+
+namespace pik {
+
+// Decodes "bytes" and transforms to io->c_current color space. io->dec_hints
+// may specify "color_space" and "range" (defaults are sRGB and full-range).
+Status DecodeImagePNM(const PaddedBytes& bytes, ThreadPool* pool,
+                      CodecInOut* io);
+
+// Transforms from io->c_current to io->c_external and encodes into "bytes".
+Status EncodeImagePNM(const CodecInOut* io, const ColorEncoding& c_desired,
+                      size_t bits_per_sample, ThreadPool* pool,
+                      PaddedBytes* bytes);
+
+}  // namespace pik
+
+#endif  // PIK_CODEC_PNM_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/color_correlation.cc b/codec/L2/demos/pikEnc/host/pik/color_correlation.cc
new file mode 100755
index 0000000000..d4c4dcca3b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/color_correlation.cc
@@ -0,0 +1,314 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/color_correlation.h"
+#include "pik/huffman_decode.h"
+#include "pik/huffman_encode.h"
+#include "pik/write_bits.h"
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/common.h"
+#include "pik/dct_util.h"
+#include "pik/profiler.h"
+#include "pik/quantizer.h"
+
+namespace pik {
+
+namespace {
+template <typename V, typename R>
+inline void FindIndexOfSumMaximum(const V* array, const size_t len, R* idx,
+                                  V* sum) {
+  PIK_ASSERT(len > 0);
+  V maxval = 0;
+  V val = 0;
+  R maxidx = 0;
+  for (size_t i = 1; i < len; ++i) {
+    val += array[i];
+    if (val > maxval) {
+      maxval = val;
+      maxidx = i;
+    }
+  }
+  *idx = maxidx;
+  *sum = maxval;
+}
+
+template <int MAIN_CHANNEL, int SIDE_CHANNEL, int SCALE, int OFFSET>
+void FindBestCorrelation(const Image3F& dct, ImageI* PIK_RESTRICT map,
+                         ImageF* PIK_RESTRICT tmp_map, int* PIK_RESTRICT dc,
+                         float acceptance, const DequantMatrices& dequant) {
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+  constexpr float kScale = SCALE;
+  constexpr float kZeroThresh = kScale * kZeroBiasDefault[SIDE_CHANNEL];
+  // Always use DCT8 quantization values for DC.
+  const float* const PIK_RESTRICT kDequantMatrix =
+      dequant.Matrix(0, kQuantKindDCT8, SIDE_CHANNEL);
+  float qm[block_size];
+  for (int k = 0; k < block_size; ++k) {
+    qm[k] = 1.0f / kDequantMatrix[k];
+  }
+  int32_t d_num_zeros_global[256] = {0};
+  for (int ty = 0; ty < map->ysize(); ++ty) {
+    int* PIK_RESTRICT row_out = map->Row(ty);
+    float* PIK_RESTRICT row_tmp_out = tmp_map->Row(ty);
+    for (int tx = 0; tx < map->xsize(); ++tx) {
+      const int y0 = ty * kColorTileDimInBlocks;
+      const int x0 = tx * kColorTileDimInBlocks * block_size;
+      const int y1 = std::min<int>(y0 + kColorTileDimInBlocks, dct.ysize());
+      const int x1 =
+          std::min<int>(x0 + kColorTileDimInBlocks * block_size, dct.xsize());
+      int32_t d_num_zeros[257] = {0};
+      for (size_t y = y0; y < y1; ++y) {
+        const float* const PIK_RESTRICT row_m =
+            dct.ConstPlaneRow(MAIN_CHANNEL, y);
+        const float* const PIK_RESTRICT row_s =
+            dct.ConstPlaneRow(SIDE_CHANNEL, y);
+        for (size_t x = x0; x < x1; ++x) {
+          if (x % block_size == 0) continue;
+          const float scaled_m = row_m[x] * qm[x % block_size];
+          const float scaled_s =
+              kScale * row_s[x] * qm[x % block_size] + OFFSET * scaled_m;
+          // Increment num_zeros[idx] if
+          //   std::abs(scaled_s - (idx - OFFSET) *
+          //   scaled_m) < kZeroThresh
+          if (std::abs(scaled_m) < 1e-8) {
+            // Range is too narrow, all-or-nothing
+            // strategy should be OK.
+            if (std::abs(scaled_s) < kZeroThresh) {
+              d_num_zeros[0]++;
+            }
+          } else {
+            float from;
+            float to;
+            if (scaled_m > 0) {
+              from = (scaled_s - kZeroThresh) / scaled_m;
+              to = (scaled_s + kZeroThresh) / scaled_m;
+            } else {
+              from = (scaled_s + kZeroThresh) / scaled_m;
+              to = (scaled_s - kZeroThresh) / scaled_m;
+            }
+            if (from < 0.0f) {
+              from = 0.0f;
+            }
+            if (to > 255.0f) {
+              to = 255.0f;
+            }
+            // Instead of clamping the both values
+            // we just check that range is sane.
+            if (from <= to) {
+              d_num_zeros[(int)std::ceil(from)]++;
+              d_num_zeros[(int)std::floor(to + 1)]--;
+            }
+          }
+        }
+      }
+      int best = 0;
+      int32_t best_sum = 0;
+      FindIndexOfSumMaximum(d_num_zeros, 256, &best, &best_sum);
+      for (size_t i = 0; i < 256; ++i) {
+        d_num_zeros_global[i] += d_num_zeros[i];
+      }
+      row_out[tx] = best;
+      row_tmp_out[tx] = (float)best_sum / ((x1 - x0) * (y1 - y0));
+    }
+  }
+
+  int global_best = 0;
+  int32_t global_sum = 0;
+  FindIndexOfSumMaximum(d_num_zeros_global, 256, &global_best, &global_sum);
+  float global_normalized_sum = (float)global_sum / (dct.xsize() * dct.ysize());
+  float normalized_acceptance =
+      acceptance * kColorTileDimInBlocks * kColorTileDimInBlocks * block_size;
+  for (int ty = 0; ty < map->ysize(); ++ty) {
+    int* PIK_RESTRICT row_out = map->Row(ty);
+    float* PIK_RESTRICT row_tmp_out = tmp_map->Row(ty);
+    for (int tx = 0; tx < map->xsize(); ++tx) {
+      // Revert to the global factor used for dc if
+      // the number of zeros is almost the same.
+      if (row_tmp_out[tx] <= global_normalized_sum + normalized_acceptance) {
+        row_out[tx] = global_best;
+      }
+    }
+  }
+  *dc = global_best;
+}
+
+}  // namespace
+
+// "y_plane" may refer to plane#1 of "coeffs"; it is also organized in the
+// block layout (consecutive block coefficient `pixels').
+// Class Dequant applies color correlation maps back.
+SIMD_ATTR void UnapplyColorCorrelationAC(const ColorCorrelationMap& cmap,
+                                         const Rect& cmap_rect,
+                                         const ImageF& y_plane,
+                                         Image3F* coeffs) {
+  constexpr size_t N = kBlockDim;
+  constexpr size_t block_size = N * N;
+  const SIMD_FULL(float) d;
+
+  const size_t xsize_blocks = coeffs->xsize() / block_size;
+  const size_t ysize_blocks = coeffs->ysize();
+  for (size_t y = 0; y < ysize_blocks; ++y) {
+    size_t ty = y / kColorTileDimInBlocks;
+    const int* PIK_RESTRICT row_ytob = cmap_rect.ConstRow(cmap.ytob_map, ty);
+    const int* PIK_RESTRICT row_ytox = cmap_rect.ConstRow(cmap.ytox_map, ty);
+
+    for (size_t x = 0; x < xsize_blocks; ++x) {
+      size_t tx = x / kColorTileDimInBlocks;
+      const float* PIK_RESTRICT row_y = y_plane.Row(y) + x * block_size;
+      float* PIK_RESTRICT row_x = coeffs->PlaneRow(0, y) + x * block_size;
+      float* PIK_RESTRICT row_b = coeffs->PlaneRow(2, y) + x * block_size;
+      const auto ytob = set1(d, ColorCorrelationMap::YtoB(1.0f, row_ytob[tx]));
+      const auto ytox = set1(d, ColorCorrelationMap::YtoX(1.0f, row_ytox[tx]));
+      for (size_t k = 0; k < block_size; k += d.N) {
+        const auto in_y = load(d, row_y + k);
+        const auto in_b = load(d, row_b + k);
+        const auto in_x = load(d, row_x + k);
+        const auto out_b = in_b - ytob * in_y;
+        const auto out_x = in_x - ytox * in_y;
+        store(out_b, d, row_b + k);
+        store(out_x, d, row_x + k);
+      }
+    }
+  }
+}
+
+template <bool decode>
+SIMD_ATTR void ApplyColorCorrelationDC(const ColorCorrelationMap& cmap,
+                                       const ImageF& y_plane_dc,
+                                       Image3F* coeffs_dc) {
+  const SIMD_FULL(float) d;
+  const size_t xsize_blocks = coeffs_dc->xsize();
+  const size_t ysize_blocks = coeffs_dc->ysize();
+
+  const auto ytob = set1(d, ColorCorrelationMap::YtoB(1.0f, cmap.ytob_dc));
+  const auto ytox = set1(d, ColorCorrelationMap::YtoX(1.0f, cmap.ytox_dc));
+
+  //std::cout<<"std ytox="<<cmap.ytox_dc<<std::endl;
+  //std::cout<<"std ytob="<<cmap.ytob_dc<<std::endl;
+  //std::cout<<"std YtoX="<<ytox.raw<<std::endl;
+  //std::cout<<"std YtoB="<<ytob.raw<<std::endl;
+
+  for (size_t y = 0; y < ysize_blocks; ++y) {
+    const float* PIK_RESTRICT row_y = y_plane_dc.Row(y);
+    float* PIK_RESTRICT row_x = coeffs_dc->PlaneRow(0, y);
+    float* PIK_RESTRICT row_b = coeffs_dc->PlaneRow(2, y);
+
+    for (size_t x = 0; x < xsize_blocks; x += d.N) {
+
+      const auto in_y = load(d, row_y + x);
+      const auto in_b = load(d, row_b + x);
+      const auto in_x = load(d, row_x + x);
+
+      //std::cout<<"cor_pre: y="<<y<<" x="<<x<<" X="<<(float)row_x[x]<<" Y="<<(float)row_y[x]<<" B="<<(float)row_b[x]<<std::endl;
+
+      const auto out_b = decode ? in_b + ytob * in_y : in_b - ytob * in_y;
+      const auto out_x = decode ? in_x + ytox * in_y : in_x - ytox * in_y;
+      store(out_b, d, row_b + x);
+      store(out_x, d, row_x + x);
+
+      //std::cout<<"cor_post: y="<<y<<" x="<<x<<" X="<<(float)row_x[x]<<" Y="<<(float)row_y[x]<<" B="<<(float)row_b[x]<<std::endl;
+    }
+  }
+}
+
+template void ApplyColorCorrelationDC<true>(const ColorCorrelationMap&,
+                                            const ImageF&, Image3F*);
+
+template void ApplyColorCorrelationDC<false>(const ColorCorrelationMap&,
+                                             const ImageF&, Image3F*);
+
+void FindBestColorCorrelationMap(const Image3F& opsin,
+                                 const DequantMatrices& dequant,
+                                 ColorCorrelationMap* cmap) {
+  PROFILER_ZONE("enc YTo* correlation");
+
+  constexpr int block_size = kBlockDim * kBlockDim;
+  const size_t xsize_blocks = opsin.xsize() / kBlockDim;
+  const size_t ysize_blocks = opsin.ysize() / kBlockDim;
+  Image3F dct(xsize_blocks * block_size, ysize_blocks);
+  TransposedScaledDCT(opsin, &dct);
+
+  ImageF tmp(DivCeil(opsin.xsize(), kColorTileDim),
+             DivCeil(opsin.ysize(), kColorTileDim));
+
+  // These two coefficients are eligible for optimization.
+  // Perhaps, they also could be made quality-dependent.
+  // Prefer global until 25% more (full) tile coefficients become zero.
+  float y_to_b_acceptance = 0.25f;
+  // Prefer local until 62.5% less (full) tile coefficients become zero.
+  float y_to_x_acceptance = -0.625f;
+
+  FindBestCorrelation</* from Y */ 1, /* to B */ 2, kColorFactorB,
+                      kColorOffsetB>(dct, &cmap->ytob_map, &tmp, &cmap->ytob_dc,
+                                     y_to_b_acceptance, dequant);
+  FindBestCorrelation</* from Y */ 1, /* to X */ 0, kColorFactorX,
+                      kColorOffsetX>(dct, &cmap->ytox_map, &tmp, &cmap->ytox_dc,
+                                     y_to_x_acceptance, dequant);
+}
+
+bool DecodeColorMap(BitReader* PIK_RESTRICT br, ImageI* PIK_RESTRICT ac_map,
+                    int* PIK_RESTRICT dc_val) {
+  HuffmanDecodingData entropy;
+  if (!entropy.ReadFromBitStream(br)) {
+    return PIK_FAILURE("Invalid histogram data.");
+  }
+  HuffmanDecoder decoder;
+  br->FillBitBuffer();
+  *dc_val = decoder.ReadSymbol(entropy, br);
+  for (size_t y = 0; y < ac_map->ysize(); ++y) {
+    int* PIK_RESTRICT row = ac_map->Row(y);
+    for (size_t x = 0; x < ac_map->xsize(); ++x) {
+      br->FillBitBuffer();
+      row[x] = decoder.ReadSymbol(entropy, br);
+    }
+  }
+  PIK_RETURN_IF_ERROR(br->JumpToByteBoundary());
+  return true;
+}
+
+std::string EncodeColorMap(const ImageI& ac_map, const Rect& rect,
+                           const int dc_val, PikImageSizeInfo* info) {
+  PIK_ASSERT(rect.IsInside(ac_map));
+  const size_t max_out_size = rect.xsize() * rect.ysize() + 1024;
+  std::string output(max_out_size, 0);
+  size_t storage_ix = 0;
+  uint8_t* storage = reinterpret_cast<uint8_t*>(&output[0]);
+  storage[0] = 0;
+  std::vector<uint32_t> histogram(256);
+  ++histogram[dc_val];
+  for (int y = 0; y < rect.ysize(); ++y) {
+    for (int x = 0; x < rect.xsize(); ++x) {
+      ++histogram[rect.ConstRow(ac_map, y)[x]];
+    }
+  }
+  std::vector<uint8_t> bit_depths(256);
+  std::vector<uint16_t> bit_codes(256);
+  BuildAndStoreHuffmanTree(histogram.data(), histogram.size(),
+                           bit_depths.data(), bit_codes.data(), &storage_ix,
+                           storage);
+  const size_t histo_bits = storage_ix;
+  WriteBits(bit_depths[dc_val], bit_codes[dc_val], &storage_ix, storage);
+  for (int y = 0; y < rect.ysize(); ++y) {
+    const int* PIK_RESTRICT row = rect.ConstRow(ac_map, y);
+    for (int x = 0; x < rect.xsize(); ++x) {
+      WriteBits(bit_depths[row[x]], bit_codes[row[x]], &storage_ix, storage);
+    }
+  }
+  WriteZeroesToByteBoundary(&storage_ix, storage);
+  PIK_ASSERT((storage_ix >> 3) <= output.size());
+  output.resize(storage_ix >> 3);
+  if (info) {
+    info->histogram_size += histo_bits >> 3;
+    info->entropy_coded_bits += storage_ix - histo_bits;
+    info->total_size += output.size();
+  }
+  return output;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/color_correlation.h b/codec/L2/demos/pikEnc/host/pik/color_correlation.h
new file mode 100755
index 0000000000..b921d90913
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/color_correlation.h
@@ -0,0 +1,112 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_COLOR_CORRELATION_H_
+#define PIK_COLOR_CORRELATION_H_
+
+// Chroma-from-luma, computed using heuristics to determine the best linear
+// model for the X and B channels from the Y channel.
+
+#include "pik/bit_reader.h"
+#include "pik/common.h"
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include "pik/pik_info.h"
+#include "pik/quant_weights.h"
+
+namespace pik {
+
+// Tile is the rectangular grid of blocks that share color correlation
+// parameters ("factor_x/b" such that residual_b = blue - Y * factor_b).
+constexpr size_t kColorTileDim = 64;
+
+static_assert(kColorTileDim % kBlockDim == 0,
+              "Color tile dim should be divisible by block dim");
+constexpr size_t kColorTileDimInBlocks = kColorTileDim / kBlockDim;
+
+static_assert(kTileDimInBlocks % kColorTileDimInBlocks == 0,
+              "Tile dim should be divisible by color tile dim");
+
+constexpr const int32_t kColorFactorX = 256;
+constexpr const int32_t kColorOffsetX = 128;
+constexpr const float kColorScaleX = 1.0f / kColorFactorX;
+
+constexpr const int32_t kColorFactorB = 128;
+constexpr const int32_t kColorOffsetB = 0;
+constexpr const float kColorScaleB = 1.0f / kColorFactorB;
+
+// For dispatching to ColorCorrelationMap::YtoTag overloads.
+struct TagX {};
+struct TagB {};
+
+struct ColorCorrelationMap {
+  ColorCorrelationMap() {}
+  ColorCorrelationMap(size_t xsize, size_t ysize)  // pixels
+      : ytox_dc(128),
+        ytob_dc(120),
+        ytox_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)),
+        ytob_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)) {
+    FillImage(128, &ytox_map);
+    FillImage(120, &ytob_map);
+  }
+
+  // |y| is scaled by some number calculated from x_factor; consequently,
+  // passing 1.0f in place of |y| result will be the scaling factor.
+  constexpr static float YtoX(float y, int32_t x_factor) {
+    return y * (x_factor - kColorOffsetX) * kColorScaleX;
+  }
+
+  // |y| is scaled by some number calculated from b_factor; consequently,
+  // passing 1.0f in place of |y| result will be the scaling factor.
+  constexpr static float YtoB(float y, int32_t b_factor) {
+    return y * (b_factor - kColorOffsetB) * kColorScaleB;
+  }
+
+  constexpr static float YtoTag(TagX, float y, int32_t factor) {
+    return YtoX(y, factor);
+  }
+  constexpr static float YtoTag(TagB, float y, int32_t factor) {
+    return YtoB(y, factor);
+  }
+
+  int32_t ytox_dc;
+  int32_t ytob_dc;
+  ImageI ytox_map;
+  ImageI ytob_map;
+
+  ColorCorrelationMap Copy(const Rect& rect) const {
+    ColorCorrelationMap copy;
+    copy.ytox_dc = ytox_dc;
+    copy.ytob_dc = ytob_dc;
+    copy.ytob_map = CopyImage(rect, ytob_map);
+    copy.ytox_map = CopyImage(rect, ytox_map);
+    return copy;
+  }
+  ColorCorrelationMap Copy() const { return Copy(Rect(ytox_map)); }
+};
+
+SIMD_ATTR void UnapplyColorCorrelationAC(const ColorCorrelationMap& cmap,
+                                         const Rect& cmap_rect,
+                                         const ImageF& y_plane,
+                                         Image3F* coeffs);
+
+template <bool decode>
+SIMD_ATTR void ApplyColorCorrelationDC(const ColorCorrelationMap& cmap,
+                                       const ImageF& y_plane_dc,
+                                       Image3F* coeffs_dc);
+
+void FindBestColorCorrelationMap(const Image3F& opsin,
+                                 const DequantMatrices& dequant,
+                                 ColorCorrelationMap* cmap);
+
+std::string EncodeColorMap(const ImageI& ac_map, const Rect& rect,
+                           const int dc_val, PikImageSizeInfo* info);
+
+bool DecodeColorMap(BitReader* PIK_RESTRICT br, ImageI* PIK_RESTRICT ac_map,
+                    int* PIK_RESTRICT dc_val);
+}  // namespace pik
+
+#endif  // PIK_COLOR_CORRELATION_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/color_encoding.cc b/codec/L2/demos/pikEnc/host/pik/color_encoding.cc
new file mode 100755
index 0000000000..6a351b67fa
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/color_encoding.cc
@@ -0,0 +1,504 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/color_encoding.h"
+
+#include "pik/fields.h"
+
+namespace pik {
+
+// These strings are baked into Description - do not change. Fixed-length
+// simplifies parsing. Despite enum class, invalid bitstreams may lead to
+// invalid enums, so handle them gracefully.
+
+std::string ToString(ColorSpace color_space) {
+  switch (color_space) {
+    case ColorSpace::kRGB:
+      return "RGB";
+    case ColorSpace::kGray:
+      return "Gra";
+    case ColorSpace::kXYZ:
+      return "XYZ";
+    case ColorSpace::kUnknown:
+      return "Unk";
+    case ColorSpace::kYCbCr:
+      return "YCC";
+    case ColorSpace::kICtCp:
+      return "ITP";
+  }
+  return "CS?";
+}
+
+std::string ToString(WhitePoint white_point) {
+  switch (white_point) {
+    case WhitePoint::kD65:
+      return "D65";
+    case WhitePoint::kD60:
+      return "D60";
+    case WhitePoint::kD50:
+      return "D50";
+    case WhitePoint::kE:
+      return "EER";
+    case WhitePoint::kUnknown:
+      return "Unk";
+  }
+  return "WP?";
+}
+
+std::string ToString(Primaries primaries) {
+  switch (primaries) {
+    case Primaries::kSRGB:
+      return "SRG";
+    case Primaries::k2020:
+      return "202";
+    case Primaries::kP3:
+      return "DCI";
+    case Primaries::kUnknown:
+      return "Unk";
+    case Primaries::kAP0:
+      return "AP0";
+    case Primaries::kAP1:
+      return "AP1";
+    case Primaries::kAdobe:
+      return "Ado";
+  }
+  return "PR?";
+}
+
+std::string ToString(TransferFunction transfer_function) {
+  switch (transfer_function) {
+    case TransferFunction::kSRGB:
+      return "SRG";
+    case TransferFunction::kAdobe:
+      return "Ado";
+    case TransferFunction::kLinear:
+      return "Lin";
+    case TransferFunction::k709:
+      return "709";
+    case TransferFunction::kUnknown:
+      return "Unk";
+    case TransferFunction::kPQ:
+      return "PeQ";
+    case TransferFunction::kHLG:
+      return "HLG";
+  }
+  return "TF?";
+}
+
+std::string ToString(RenderingIntent rendering_intent) {
+  switch (rendering_intent) {
+    case RenderingIntent::kPerceptual:
+      return "Per";
+    case RenderingIntent::kRelative:
+      return "Rel";
+    case RenderingIntent::kSaturation:
+      return "Sat";
+    case RenderingIntent::kAbsolute:
+      return "Abs";
+    case RenderingIntent::kUnknown:
+      return "Unk";
+  }
+  return "RI?";
+}
+
+// Returns all enumerators (except unknown). Used by ParseString and tests.
+// The parameter is only used for type dispatch.
+
+std::vector<ColorSpace> Values(ColorSpace) {
+  return {ColorSpace::kRGB, ColorSpace::kGray, ColorSpace::kXYZ,
+          ColorSpace::kYCbCr, ColorSpace::kICtCp};
+}
+
+std::vector<WhitePoint> Values(WhitePoint) {
+  return {WhitePoint::kD65, WhitePoint::kD60, WhitePoint::kD50, WhitePoint::kE};
+}
+
+std::vector<Primaries> Values(Primaries) {
+  return {Primaries::kSRGB, Primaries::k2020, Primaries::kP3,
+          Primaries::kAP0,  Primaries::kAP1,  Primaries::kAdobe};
+}
+
+std::vector<TransferFunction> Values(TransferFunction) {
+  return {TransferFunction::kSRGB, TransferFunction::kLinear,
+          TransferFunction::k709,  TransferFunction::kAdobe,
+          TransferFunction::kPQ,   TransferFunction::kHLG};
+}
+
+std::vector<RenderingIntent> Values(RenderingIntent) {
+  return {RenderingIntent::kPerceptual, RenderingIntent::kRelative,
+          RenderingIntent::kSaturation, RenderingIntent::kAbsolute};
+}
+
+template <typename Enum>
+Enum ValueFromString(const std::string& token) {
+  for (Enum e : Values(Enum::kUnknown)) {
+    if (ToString(e) == token) return e;
+  }
+  return Enum::kUnknown;
+}
+
+Status WhitePointToCIExy(WhitePoint white_point, CIExy* PIK_RESTRICT xy) {
+  switch (white_point) {
+    case WhitePoint::kD65:
+      // = cmsXYZ2xyY from quantized XYZ = {0.950455927, 1.0, 1.089057751}
+      xy->x = 0.312699999963613;
+      xy->y = 0.328999999979602;
+      return true;
+
+    case WhitePoint::kD60:
+      // From https://en.wikipedia.org/wiki/Academy_Color_Encoding_System
+      xy->x = 0.32168;
+      xy->y = 0.33767;
+      return true;
+
+    case WhitePoint::kD50:
+      // = cmsXYZ2xyY from quantized XYZ = {0.96420288, 1.0, 0.82490540}
+      xy->x = 0.345702921221832;
+      xy->y = 0.358537532289711;
+      return true;
+
+    case WhitePoint::kE:
+      xy->x = xy->y = 1.0 / 3;
+      return true;
+
+    case WhitePoint::kUnknown:
+      break;  // handled below
+  }
+  memset(xy, 0, sizeof(*xy));
+  return false;
+}
+
+WhitePoint WhitePointFromCIExy(const CIExy& xy) {
+  if (ApproxEq(xy.x, 0.3127) && ApproxEq(xy.y, 0.3290)) {
+    return WhitePoint::kD65;
+  }
+  if (ApproxEq(xy.x, 0.32168) && ApproxEq(xy.y, 0.33767)) {
+    return WhitePoint::kD60;
+  }
+  if (ApproxEq(xy.x, 0.3457) && ApproxEq(xy.y, 0.3585)) {
+    return WhitePoint::kD50;
+  }
+  if (ApproxEq(xy.x, 1.0 / 3) && ApproxEq(xy.y, 1.0 / 3)) {
+    return WhitePoint::kE;
+  }
+
+  return WhitePoint::kUnknown;
+}
+
+Status PrimariesToCIExy(Primaries primaries, PrimariesCIExy* PIK_RESTRICT xy) {
+  switch (primaries) {
+    case Primaries::kSRGB:
+      xy->r.x = 0.639998686;
+      xy->r.y = 0.330010138;
+      xy->g.x = 0.300003784;
+      xy->g.y = 0.600003357;
+      xy->b.x = 0.150002046;
+      xy->b.y = 0.059997204;
+      return true;
+
+    case Primaries::k2020:
+      xy->r.x = 0.708;
+      xy->r.y = 0.292;
+      xy->g.x = 0.170;
+      xy->g.y = 0.797;
+      xy->b.x = 0.131;
+      xy->b.y = 0.046;
+      return true;
+
+    case Primaries::kP3:
+      xy->r.x = 0.680;
+      xy->r.y = 0.320;
+      xy->g.x = 0.265;
+      xy->g.y = 0.690;
+      xy->b.x = 0.150;
+      xy->b.y = 0.060;
+      return true;
+
+    case Primaries::kAP0:
+      xy->r.x = 0.7347;
+      xy->r.y = 0.2653;
+      xy->g.x = 0.0000;
+      xy->g.y = 1.0000;
+      xy->b.x = 0.0001;
+      xy->b.y = -0.077;
+      return true;
+
+    case Primaries::kAP1:
+      xy->r.x = 0.713;
+      xy->r.y = 0.293;
+      xy->g.x = 0.165;
+      xy->g.y = 0.830;
+      xy->b.x = 0.128;
+      xy->b.y = 0.044;
+      return true;
+
+    case Primaries::kAdobe:
+      xy->r.x = 0.639996511;
+      xy->r.y = 0.329996864;
+      xy->g.x = 0.210005295;
+      xy->g.y = 0.710004866;
+      xy->b.x = 0.149997606;
+      xy->b.y = 0.060003644;
+      return true;
+
+    case Primaries::kUnknown:
+      break;  // handled below
+  }
+  memset(xy, 0, sizeof(*xy));
+  return false;
+}
+
+Primaries PrimariesFromCIExy(const PrimariesCIExy& xy) {
+  if (ApproxEq(xy.r.x, 0.64) && ApproxEq(xy.r.y, 0.33) &&
+      ApproxEq(xy.g.x, 0.30) && ApproxEq(xy.g.y, 0.60) &&
+      ApproxEq(xy.b.x, 0.15) && ApproxEq(xy.b.y, 0.06)) {
+    return Primaries::kSRGB;
+  }
+
+  if (ApproxEq(xy.r.x, 0.708) && ApproxEq(xy.r.y, 0.292) &&
+      ApproxEq(xy.g.x, 0.170) && ApproxEq(xy.g.y, 0.797) &&
+      ApproxEq(xy.b.x, 0.131) && ApproxEq(xy.b.y, 0.046)) {
+    return Primaries::k2020;
+  }
+  if (ApproxEq(xy.r.x, 0.680) && ApproxEq(xy.r.y, 0.320) &&
+      ApproxEq(xy.g.x, 0.265) && ApproxEq(xy.g.y, 0.690) &&
+      ApproxEq(xy.b.x, 0.150) && ApproxEq(xy.b.y, 0.060)) {
+    return Primaries::kP3;
+  }
+  if (ApproxEq(xy.r.x, 0.7347) && ApproxEq(xy.r.y, 0.2653) &&
+      ApproxEq(xy.g.x, 0.0000) && ApproxEq(xy.g.y, 1.0000) &&
+      ApproxEq(xy.b.x, 0.0001) && ApproxEq(xy.b.y, -0.077)) {
+    return Primaries::kAP0;
+  }
+  if (ApproxEq(xy.r.x, 0.713) && ApproxEq(xy.r.y, 0.293) &&
+      ApproxEq(xy.g.x, 0.165) && ApproxEq(xy.g.y, 0.830) &&
+      ApproxEq(xy.b.x, 0.128) && ApproxEq(xy.b.y, 0.044)) {
+    return Primaries::kAP1;
+  }
+  if (ApproxEq(xy.r.x, 0.64) && ApproxEq(xy.r.y, 0.33) &&
+      ApproxEq(xy.g.x, 0.21) && ApproxEq(xy.g.y, 0.71) &&
+      ApproxEq(xy.b.x, 0.15) && ApproxEq(xy.b.y, 0.06)) {
+    return Primaries::kAdobe;
+  }
+
+  return Primaries::kUnknown;
+}
+
+double GammaFromTransferFunction(TransferFunction tf) {
+  if (tf == TransferFunction::kLinear) return GammaLinear();
+  if (tf == TransferFunction::kSRGB) return GammaSRGB();
+  if (tf == TransferFunction::kAdobe) return GammaAdobe();
+  if (tf == TransferFunction::k709) return Gamma709();
+  if (tf == TransferFunction::kPQ) return GammaPQ();
+  if (tf == TransferFunction::kHLG) return GammaHLG();
+  return GammaUnknown();
+}
+
+TransferFunction TransferFunctionFromGamma(double gamma) {
+  if (ApproxEq(gamma, GammaLinear())) return TransferFunction::kLinear;
+  if (ApproxEq(gamma, GammaSRGB())) return TransferFunction::kSRGB;
+  if (ApproxEq(gamma, GammaAdobe())) return TransferFunction::kAdobe;
+  if (ApproxEq(gamma, Gamma709())) return TransferFunction::k709;
+  if (ApproxEq(gamma, GammaPQ())) return TransferFunction::kPQ;
+  if (ApproxEq(gamma, GammaHLG())) return TransferFunction::kHLG;
+  if (ApproxEq(gamma, GammaUnknown())) return TransferFunction::kUnknown;
+  return TransferFunction::kUnknown;
+}
+
+std::string StringFromWhitePoint(const CIExy& xy) {
+  const WhitePoint wp = WhitePointFromCIExy(xy);
+  if (wp != WhitePoint::kUnknown) return ToString(wp);
+  std::string ret("WhitePoint:");
+  ret += std::to_string(xy.x) + ",";
+  ret += std::to_string(xy.y);
+  return ret;
+}
+
+std::string StringFromPrimaries(const PrimariesCIExy& xy) {
+  const Primaries primaries = PrimariesFromCIExy(xy);
+  if (primaries != Primaries::kUnknown) return ToString(primaries);
+  std::string ret("Primaries:");
+  ret += std::to_string(xy.r.x) + ",";
+  ret += std::to_string(xy.r.y) + ";";
+  ret += std::to_string(xy.g.x) + ",";
+  ret += std::to_string(xy.g.y) + ";";
+  ret += std::to_string(xy.b.x) + ",";
+  ret += std::to_string(xy.b.y);
+  return ret;
+}
+
+std::string StringFromGamma(double gamma) {
+  const TransferFunction tf = TransferFunctionFromGamma(gamma);
+  if (tf != TransferFunction::kUnknown) return ToString(tf);
+  return std::string("g") + std::to_string(gamma);
+}
+
+double GammaFromString(const std::string& s) {
+  if (s.length() < 2) return 0.0;
+  if (s[0] == 'g') {
+    return stod(s.substr(1));
+  }
+  const auto transfer_function = ValueFromString<TransferFunction>(s);
+  return GammaFromTransferFunction(transfer_function);
+}
+
+std::string Description(const ColorEncoding& c) {
+  std::string description = ToString(c.color_space);
+
+  if (c.color_space != ColorSpace::kXYZ) {
+    description += "_" + ToString(c.white_point);
+  }
+
+  if (c.color_space != ColorSpace::kGray && c.color_space != ColorSpace::kXYZ) {
+    description += "_" + ToString(c.primaries);
+  }
+
+  description += "_" + ToString(c.rendering_intent);
+
+  description +=
+      "_" + StringFromGamma(GammaFromTransferFunction(c.transfer_function));
+
+  return description;
+}
+
+std::string Description(const ProfileParams& pp) {
+  std::string description = ToString(pp.color_space);
+
+  if (pp.color_space != ColorSpace::kXYZ) {
+    description += "_" + ToString(WhitePointFromCIExy(pp.white_point));
+  }
+
+  if (pp.color_space != ColorSpace::kGray &&
+      pp.color_space != ColorSpace::kXYZ) {
+    description += "_" + ToString(PrimariesFromCIExy(pp.primaries));
+  }
+
+  description += "_" + ToString(pp.rendering_intent);
+
+  // Gamma goes last for easier parsing.
+  description += "_" + StringFromGamma(pp.gamma);
+
+  return description;
+}
+
+Status ParseDescription(const std::string& description,
+                        ProfileParams* PIK_RESTRICT pp) {
+  // "Token" is a 3-character string followed by "_".
+  class Tokenizer {
+   public:
+    Tokenizer(const std::string* tokens) : tokens_(tokens) {}
+    Status Next(std::string* PIK_RESTRICT next) {
+      if (pos_ + 4 > tokens_->length()) return PIK_FAILURE("String too short");
+      if ((*tokens_)[pos_ + 3] != '_') return PIK_FAILURE("Missing terminator");
+      *next = tokens_->substr(pos_, 3);
+      pos_ += 4;
+      return true;
+    }
+
+    std::string Tail() const { return tokens_->substr(pos_); }
+
+   private:
+    const std::string* tokens_;  // not owned
+    size_t pos_ = 0;
+  } tokenizer(&description);
+
+  std::string next;
+  PIK_RETURN_IF_ERROR(tokenizer.Next(&next));
+  pp->color_space = ValueFromString<ColorSpace>(next);
+
+  if (pp->color_space != ColorSpace::kXYZ) {
+    PIK_RETURN_IF_ERROR(tokenizer.Next(&next));
+    const WhitePoint white_point = ValueFromString<WhitePoint>(next);
+    (void)WhitePointToCIExy(white_point, &pp->white_point);
+  } else {
+    memset(&pp->white_point, 0, sizeof(pp->white_point));
+  }
+
+  if (pp->color_space != ColorSpace::kGray &&
+      pp->color_space != ColorSpace::kXYZ) {
+    PIK_RETURN_IF_ERROR(tokenizer.Next(&next));
+    const Primaries primaries = ValueFromString<Primaries>(next);
+    (void)PrimariesToCIExy(primaries, &pp->primaries);
+  } else {
+    memset(&pp->primaries, 0, sizeof(pp->primaries));
+  }
+
+  PIK_RETURN_IF_ERROR(tokenizer.Next(&next));
+  pp->rendering_intent = ValueFromString<RenderingIntent>(next);
+
+  pp->gamma = GammaFromString(tokenizer.Tail());
+
+  return true;
+}
+
+Status ColorEncodingToParams(const ColorEncoding& c,
+                             ProfileParams* PIK_RESTRICT pp) {
+  pp->color_space = c.color_space;
+  pp->gamma = GammaFromTransferFunction(c.transfer_function);
+  pp->rendering_intent = c.rendering_intent;
+
+  // Avoid unnecessary failure by skipping white point/primaries if they are
+  // undefined anyway.
+  if (c.color_space != ColorSpace::kXYZ) {
+    PIK_RETURN_IF_ERROR(WhitePointToCIExy(c.white_point, &pp->white_point));
+  }
+
+  if (c.color_space != ColorSpace::kGray && c.color_space != ColorSpace::kXYZ) {
+    PIK_RETURN_IF_ERROR(PrimariesToCIExy(c.primaries, &pp->primaries));
+  }
+
+  return true;
+}
+
+void SetFieldsFromParams(const ProfileParams& pp,
+                         ColorEncoding* PIK_RESTRICT c) {
+  c->color_space = pp.color_space;
+  c->white_point = WhitePointFromCIExy(pp.white_point);
+  c->primaries = PrimariesFromCIExy(pp.primaries);
+  c->transfer_function = TransferFunctionFromGamma(pp.gamma);
+  c->rendering_intent = pp.rendering_intent;
+}
+
+std::vector<ColorEncoding> AllEncodings() {
+  std::vector<ColorEncoding> all_encodings;
+  all_encodings.reserve(300);
+  ColorEncoding c;
+
+  for (ColorSpace cs : AllValues<ColorSpace>()) {
+    // TODO(janwas): support generating these
+    if (cs == ColorSpace::kYCbCr || cs == ColorSpace::kICtCp) continue;
+    c.color_space = cs;
+
+    for (WhitePoint wp : AllValues<WhitePoint>()) {
+      c.white_point = wp;
+      // XYZ doesn't store a white point and retrieves E.
+      if (cs == ColorSpace::kXYZ && wp != WhitePoint::kE) continue;
+
+      for (Primaries primaries : AllValues<Primaries>()) {
+        c.primaries = primaries;
+
+        for (TransferFunction tf : AllValues<TransferFunction>()) {
+          if (cs == ColorSpace::kXYZ && tf != TransferFunction::kLinear) {
+            continue;
+          }
+          c.transfer_function = tf;
+
+          for (RenderingIntent ri : AllValues<RenderingIntent>()) {
+            c.rendering_intent = ri;
+
+            all_encodings.push_back(c);
+          }
+        }
+      }
+    }
+  }
+
+  return all_encodings;
+}
+
+ColorEncoding::ColorEncoding() { Bundle::Init(this); }
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/color_encoding.h b/codec/L2/demos/pikEnc/host/pik/color_encoding.h
new file mode 100755
index 0000000000..078e73f836
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/color_encoding.h
@@ -0,0 +1,249 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_COLOR_ENCODING_H_
+#define PIK_COLOR_ENCODING_H_
+
+// Metadata for color space conversions.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <cmath>  // std::abs
+#include <string>
+#include <vector>
+
+#include "pik/compiler_specific.h"
+#include "pik/field_encodings.h"
+#include "pik/padded_bytes.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// (All CIE units are for the standard 1931 2 degree observer)
+
+enum class ColorSpace : uint32_t {
+  kRGB = 0,
+  kGray,
+  kXYZ,
+  kUnknown,
+  kYCbCr,  // from BT.2100
+  kICtCp,  // from BT.2100
+  // Future extensions: [6, 10]
+};
+
+enum class WhitePoint : uint32_t {
+  kD65 = 0,  // sRGB/BT.709/P3/BT.2020/Adobe
+  kD60,      // ACES
+  kD50,      // ICC PCS
+  kUnknown,
+  kE,  // XYZ
+  // Future extensions: [5, 10]
+};
+
+enum class Primaries : uint32_t {
+  kSRGB = 0,  // Same as BT.709
+  k2020,      // Same as BT.2100
+  kP3,
+  kUnknown,
+  kAP0,  // from ACES
+  kAP1,  // from ACEScc/ACEScg
+  kAdobe,
+  // Future extensions: [7, 10]
+};
+
+enum TransferFunction : uint32_t {
+  kSRGB = 0,
+  kLinear,
+  kPQ,  // from BT.2100
+  kUnknown,
+  k709,
+  kAdobe,
+  kHLG,  // from BT.2100
+  // Future extensions: [7, 10]
+};
+
+enum class RenderingIntent : uint32_t {
+  // Values match ICC sRGB encodings.
+  kPerceptual = 0,  // good for photos, requires a profile with LUT.
+  kRelative,        // good for logos.
+  kSaturation,      // perhaps useful for CG with fully saturated colors.
+  kAbsolute,        // leaves white point unchanged; good for proofing.
+  kUnknown          // invalid, only used for parsing
+  // Future extensions: [5, 10]
+};
+
+// For generating profile descriptions.
+std::string ToString(ColorSpace color_space);
+std::string ToString(WhitePoint white_point);
+std::string ToString(Primaries primaries);
+std::string ToString(TransferFunction transfer_function);
+std::string ToString(RenderingIntent rendering_intent);
+
+// Used by AllEncodings and ParseDescription.
+std::vector<ColorSpace> Values(ColorSpace);
+std::vector<WhitePoint> Values(WhitePoint);
+std::vector<Primaries> Values(Primaries);
+std::vector<TransferFunction> Values(TransferFunction);
+std::vector<RenderingIntent> Values(RenderingIntent);
+
+// Convenience wrapper: takes care of passing ::kUnknown.
+template <typename Enum>
+std::vector<Enum> AllValues() {
+  return Values(Enum::kUnknown);
+}
+
+// Chromaticity (Y is omitted because it is 1 for primaries/white points)
+struct CIExy {
+  double x = 0.0;
+  double y = 0.0;
+};
+
+struct PrimariesCIExy {
+  CIExy r;
+  CIExy g;
+  CIExy b;
+};
+
+WhitePoint WhitePointFromCIExy(const CIExy& xy);
+// Returns false if white_point == kUnknown.
+Status WhitePointToCIExy(WhitePoint white_point, CIExy* PIK_RESTRICT xy);
+
+Primaries PrimariesFromCIExy(const PrimariesCIExy& xy);
+// Returns false if primaries == kUnknown.
+Status PrimariesToCIExy(Primaries primaries, PrimariesCIExy* PIK_RESTRICT xy);
+
+static inline bool IsLinear(const TransferFunction tf) {
+  return tf == TransferFunction::kLinear;
+}
+
+static inline bool IsSRGB(const TransferFunction tf) {
+  return tf == TransferFunction::kSRGB;
+}
+
+static inline bool IsPQ(const TransferFunction tf) {
+  return tf == TransferFunction::kPQ;
+}
+
+static inline bool Is2100(const TransferFunction tf) {
+  return tf == TransferFunction::kPQ || tf == TransferFunction::kHLG;
+}
+
+// All data required to interpret and translate pixels to a known color space.
+// For most images (i.e. those with a known ICC profile), the encoded size is
+// only 10 bits. Stored in Metadata.
+struct ColorEncoding {
+  ColorEncoding();
+  static const char* Name() { return "ColorEncoding"; }
+
+  bool IsGray() const { return color_space == ColorSpace::kGray; }
+  size_t Channels() const { return IsGray() ? 1 : 3; }
+
+  bool IsSRGB() const {
+    return white_point == WhitePoint::kD65 && primaries == Primaries::kSRGB &&
+           pik::IsSRGB(transfer_function);
+  }
+
+  void SetSRGB(const ColorSpace cs) {
+    icc.clear();
+    PIK_ASSERT(cs == ColorSpace::kGray || cs == ColorSpace::kRGB)
+    color_space = cs;
+    white_point = WhitePoint::kD65;
+    primaries = Primaries::kSRGB;
+    transfer_function = TransferFunction::kSRGB;
+    rendering_intent = RenderingIntent::kPerceptual;
+  }
+
+  template <class Visitor>
+  bool VisitFields(Visitor* PIK_RESTRICT visitor) {
+    visitor->Bytes(BytesEncoding::kBrotli, &icc);
+
+    visitor->Enum(kU32Direct3Plus8, ColorSpace::kRGB, &color_space);
+    visitor->Enum(kU32Direct3Plus8, WhitePoint::kD65, &white_point);
+    visitor->Enum(kU32Direct3Plus8, Primaries::kSRGB, &primaries);
+    visitor->Enum(kU32Direct3Plus8, TransferFunction::kSRGB,
+                  &transfer_function);
+    visitor->Enum(kU32Direct3Plus8, RenderingIntent::kPerceptual,
+                  &rendering_intent);
+
+    return true;
+  }
+
+  // The enum fields should always describe attributes of "icc" except:
+  // - between MaybeRemoveProfile and SetProfileFromFields (icc empty);
+  // - between ctor/setting fields and SetProfileFromFields (icc empty);
+  // - after SetFromProfile of an unusual profile (fields may be kUnknown).
+  PaddedBytes icc;
+  ColorSpace color_space;
+  WhitePoint white_point;  // unused if kXYZ
+  Primaries primaries;     // unused if kGray or kXYZ
+  TransferFunction transfer_function;
+  RenderingIntent rendering_intent;
+
+  bool SameColorSpace(const ColorEncoding& other) const {
+    if (color_space != other.color_space) return false;
+    if (color_space == ColorSpace::kXYZ) return true;
+    if (white_point != other.white_point) return false;
+    if (color_space == ColorSpace::kGray) return true;
+    return primaries == other.primaries;
+  }
+};
+
+// Returns whether the two inputs are approximately equal.
+static inline bool ApproxEq(const double a, const double b) {
+  // Threshold is sufficient for ICC's 15-bit fixed-point numbers.
+  return std::abs(a - b) <= 6E-5;
+}
+
+// Floating-point "gamma" is an alternative to TransferFunction that allows
+// other codecs to specify arbitrary exponents.
+
+// All return values except Linear are arbitrary and only useful for comparison.
+static inline constexpr double GammaUnknown() { return 0.0; }
+static inline constexpr double GammaLinear() { return 1.0; }
+static inline constexpr double GammaSRGB() { return 1.0 / 2.2; }
+static inline constexpr double GammaAdobe() { return 1.0 / 2.19921875; }
+static inline constexpr double Gamma709() { return 1.0 / 2.0; }
+static inline constexpr double GammaPQ() { return 0.15; }
+static inline constexpr double GammaHLG() { return 0.125; }
+
+double GammaFromTransferFunction(TransferFunction tf);  // Returns Gamma*().
+TransferFunction TransferFunctionFromGamma(double gamma);
+
+// For Description.
+std::string StringFromWhitePoint(const CIExy& xy);
+std::string StringFromPrimaries(const PrimariesCIExy& xy);
+std::string StringFromGamma(double gamma);
+double GammaFromString(const std::string& s);
+
+// Sufficient information to create an ICC profile. Used by other image codecs
+// as an alternative to embedding ICC. Same fields as ColorEncoding, but allows
+// arbitrary white point/primaries/gamma.
+struct ProfileParams {
+  ColorSpace color_space;
+  CIExy white_point;         // ignored if kXYZ
+  PrimariesCIExy primaries;  // ignored if kGray or kXYZ
+  double gamma;
+  RenderingIntent rendering_intent;
+};
+
+// Example: "RGB_D65_SRG_Rel_Lin"
+std::string Description(const ColorEncoding& c);  // from fields, not icc
+std::string Description(const ProfileParams& pp);
+Status ParseDescription(const std::string& description,
+                        ProfileParams* PIK_RESTRICT pp);
+
+Status ColorEncodingToParams(const ColorEncoding& c,
+                             ProfileParams* PIK_RESTRICT pp);
+void SetFieldsFromParams(const ProfileParams& pp,
+                         ColorEncoding* PIK_RESTRICT c);
+
+// Returns ColorEncoding with empty ICC profile. Caller must use
+// ColorEncoding::SetProfileFromFields() to generate a profile.
+std::vector<ColorEncoding> AllEncodings();
+
+}  // namespace pik
+
+#endif  // PIK_COLOR_ENCODING_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/color_management.cc b/codec/L2/demos/pikEnc/host/pik/color_management.cc
new file mode 100755
index 0000000000..2a376df02b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/color_management.cc
@@ -0,0 +1,928 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/color_management.h"
+
+#include <mutex>
+#include "lcms2.h"
+
+#include "pik/rational_polynomial.h"
+#include "pik/simd/simd.h"
+
+namespace pik {
+namespace {
+
+#define PIK_CMS_VERBOSE 0
+
+// cms functions (even *THR) are not thread-safe, except cmsDoTransform.
+// To ensure all functions are covered without frequent lock-taking nor risk of
+// recursive lock, we lock in the top-level APIs.
+std::mutex lcms_mutex;
+
+// (LCMS interface requires xyY but we omit the Y for white points/primaries.)
+
+PIK_MUST_USE_RESULT CIExy CIExyFromxyY(const cmsCIExyY& xyY) {
+  CIExy xy;
+  xy.x = xyY.x;
+  xy.y = xyY.y;
+  return xy;
+}
+
+PIK_MUST_USE_RESULT cmsCIExyY xyYFromCIExy(const CIExy& xy) {
+  const cmsCIExyY xyY = {xy.x, xy.y, 1.0};
+  return xyY;
+}
+
+PIK_MUST_USE_RESULT CIExy CIExyFromXYZ(const cmsCIEXYZ& XYZ) {
+  cmsCIExyY xyY;
+  cmsXYZ2xyY(/*Dest=*/&xyY, /*Source=*/&XYZ);
+  return CIExyFromxyY(xyY);
+}
+
+PIK_MUST_USE_RESULT cmsCIEXYZ D50_XYZ() {
+  // Quantized D50 as stored in ICC profiles.
+  return {0.96420288, 1.0, 0.82490540};
+}
+
+// RAII
+
+struct ProfileDeleter {
+  void operator()(void* p) { cmsCloseProfile(p); }
+};
+using Profile = std::unique_ptr<void, ProfileDeleter>;
+
+struct TransformDeleter {
+  void operator()(void* p) { cmsDeleteTransform(p); }
+};
+using Transform = std::unique_ptr<void, TransformDeleter>;
+
+Status CreateProfileXYZ(const cmsContext context,
+                        Profile* PIK_RESTRICT profile) {
+  profile->reset(cmsCreateXYZProfileTHR(context));
+  if (profile->get() == nullptr) return PIK_FAILURE("Failed to create XYZ");
+  return true;
+}
+
+// Multi-Localized Unicode string
+class MLU {
+ public:
+  MLU(const cmsContext context, const char* ascii)
+      : mlu_(cmsMLUalloc(context, 0)) {
+    if (!cmsMLUsetASCII(mlu_, "en", "US", ascii)) {
+      PIK_NOTIFY_ERROR("Failed to set ASCII");
+    }
+  }
+  ~MLU() { cmsMLUfree(mlu_); }
+
+  MLU(const MLU&) = delete;
+  MLU& operator=(const MLU&) = delete;
+  MLU(MLU&&) = delete;
+  MLU& operator=(MLU&&) = delete;
+
+  cmsMLU* get() const { return mlu_; }
+
+ private:
+  cmsMLU* mlu_;
+};
+
+// Sets header and required tags; called by EncodeProfile.
+Status SetTags(const cmsContext context, const Profile& profile,
+               const std::string& profile_description) {
+  cmsHPROFILE p = profile.get();
+
+  // Header
+  cmsSetHeaderFlags(p, 1);  // embedded
+
+  const MLU copyright(
+      context,
+      "Copyright 2018 Google LLC, CC-BY-SA 3.0 Unported license"
+      "(https://creativecommons.org/licenses/by-sa/3.0/legalcode)");
+  const MLU manufacturer(context, "Google");
+  const MLU model(context, "Image codec");
+  const MLU description(context, profile_description.c_str());
+
+  // Required tags
+  bool all_ok = true;
+  all_ok &= cmsWriteTag(p, cmsSigCopyrightTag, copyright.get());
+  all_ok &= cmsWriteTag(p, cmsSigDeviceMfgDescTag, manufacturer.get());
+  all_ok &= cmsWriteTag(p, cmsSigDeviceModelDescTag, model.get());
+  all_ok &= cmsWriteTag(p, cmsSigProfileDescriptionTag, description.get());
+
+  if (!all_ok) return PIK_FAILURE("Failed to write header/tags");
+  return true;
+}
+
+Status EncodeProfile(const cmsContext context, const Profile& profile,
+                     const std::string& description, PaddedBytes* icc) {
+  PIK_RETURN_IF_ERROR(SetTags(context, profile, description));
+
+  cmsUInt32Number size = 0;
+  if (!cmsSaveProfileToMem(profile.get(), nullptr, &size)) {
+    return PIK_FAILURE("Failed to get profile size");
+  }
+  PIK_ASSERT(size != 0);
+
+  icc->resize(size);
+  if (!cmsSaveProfileToMem(profile.get(), icc->data(), &size)) {
+    return PIK_FAILURE("Failed to encode profile");
+  }
+  PIK_ASSERT(size == icc->size());
+  return true;
+}
+
+Status DecodeProfile(const cmsContext context, const PaddedBytes& icc,
+                     Profile* profile) {
+  profile->reset(cmsOpenProfileFromMemTHR(context, icc.data(), icc.size()));
+  if (profile->get() == nullptr) {
+    return PIK_FAILURE("Failed to decode profile");
+  }
+
+  return true;
+}
+
+struct CurveDeleter {
+  void operator()(cmsToneCurve* p) { cmsFreeToneCurve(p); }
+};
+using Curve = std::unique_ptr<cmsToneCurve, CurveDeleter>;
+
+// Definitions for BT.2100-2 transfer functions:
+// "display" is linear light (nits) normalized to [0, 1].
+// "encoded" is a nonlinear encoding (e.g. PQ) in [0, 1].
+// "scene" is a linear function of photon counts, normalized to [0, 1].
+
+// Despite the stated ranges, we need unbounded transfer functions: see
+// http://www.littlecms.com/CIC18_UnboundedCMM.pdf. Inputs can be negative or
+// above 1 due to chromatic adaptation. To avoid severe round-trip errors caused
+// by clamping, we mirror negative inputs via copysign (f(-x) = -f(x), see
+// https://developer.apple.com/documentation/coregraphics/cgcolorspace/1644735-extendedsrgb)
+// and extend the function domains above 1.
+
+// Hybrid Log-Gamma.
+class TF_HLG {
+ public:
+  // EOTF. e = encoded.
+  PIK_INLINE double DisplayFromEncoded(const double e) const {
+    const double lifted = e * (1.0 - kBeta) + kBeta;
+    return OOTF(InvOETF(lifted));
+  }
+
+  // Inverse EOTF. d = display.
+  PIK_INLINE double EncodedFromDisplay(const double d) const {
+    const double lifted = OETF(InvOOTF(d));
+    const double e = (lifted - kBeta) * (1.0 / (1.0 - kBeta));
+    return e;
+  }
+
+ private:
+  // OETF (defines the HLG approach). s = scene, returns encoded.
+  PIK_INLINE double OETF(double s) const {
+    if (s == 0.0) return 0.0;
+    const double original_sign = s;
+    s = std::abs(s);
+
+    if (s <= kDiv12) return std::copysign(std::sqrt(3.0 * s), original_sign);
+
+    const double e = kA * std::log(12 * s - kB) + kC;
+    PIK_ASSERT(e > 0.0);
+    return std::copysign(e, original_sign);
+  }
+
+  // e = encoded, returns scene.
+  PIK_INLINE double InvOETF(double e) const {
+    if (e == 0.0) return 0.0;
+    const double original_sign = e;
+    e = std::abs(e);
+
+    if (e <= 0.5) return std::copysign(e * e * (1.0 / 3), original_sign);
+
+    const double s = (std::exp((e - kC) * kRA) + kB) * kDiv12;
+    PIK_ASSERT(s >= 0);
+    return std::copysign(s, original_sign);
+  }
+
+  // s = scene, returns display.
+  PIK_INLINE double OOTF(const double s) const {
+    // The actual (red channel) OOTF is RD = alpha * YS^(gamma-1) * RS, where
+    // YS = 0.2627 * RS + 0.6780 * GS + 0.0593 * BS. Let alpha = 1 so we return
+    // "display" (normalized [0, 1]) instead of nits. Our transfer function
+    // interface does not allow a dependency on YS. Fortunately, the system
+    // gamma at 334 nits is 1.0, so this reduces to RD = RS.
+    return s;
+  }
+
+  // d = display, returns scene.
+  PIK_INLINE double InvOOTF(const double d) const {
+    return d;  // see OOTF().
+  }
+
+  // Assume 1000:1 contrast @ 200 nits => gamma 0.9
+  static constexpr double kBeta = 0.04;  // = sqrt(3 * contrast^(1/gamma))
+
+  static constexpr double kA = 0.17883277;
+  static constexpr double kRA = 1.0 / kA;
+  static constexpr double kB = 1 - 4 * kA;
+  static constexpr double kC = 0.5599107295;
+  static constexpr double kDiv12 = 1.0 / 12;
+};
+
+// Perceptual Quantization
+class TF_PQ {
+ public:
+  // EOTF (defines the PQ approach). e = encoded.
+  PIK_INLINE double DisplayFromEncoded(double e) const {
+    if (e == 0.0) return 0.0;
+    const double original_sign = e;
+    e = std::abs(e);
+
+    const double xp = std::pow(e, 1.0 / kM2);
+    const double num = std::max(xp - kC1, 0.0);
+    const double den = kC2 - kC3 * xp;
+    PIK_ASSERT(den != 0.0);
+    const double d = std::pow(num / den, 1.0 / kM1);
+    PIK_ASSERT(d >= 0.0);  // Equal for e ~= 1E-9
+    return std::copysign(d, original_sign);
+  }
+
+  // Inverse EOTF. d = display.
+  PIK_INLINE double EncodedFromDisplay(double d) const {
+    if (d == 0.0) return 0.0;
+    const double original_sign = d;
+    d = std::abs(d);
+
+    const double xp = std::pow(d, kM1);
+    const double num = kC1 + xp * kC2;
+    const double den = 1.0 + xp * kC3;
+    const double e = std::pow(num / den, kM2);
+    PIK_ASSERT(e > 0.0);
+    return std::copysign(e, original_sign);
+  }
+
+ private:
+  static constexpr double kM1 = 2610.0 / 16384;
+  static constexpr double kM2 = (2523.0 / 4096) * 128;
+  static constexpr double kC1 = 3424.0 / 4096;
+  static constexpr double kC2 = (2413.0 / 4096) * 32;
+  static constexpr double kC3 = (2392.0 / 4096) * 32;
+};
+
+// sRGB
+class TF_SRGB {
+ public:
+  template <typename V>
+  SIMD_ATTR PIK_INLINE V DisplayFromEncoded(V x) const {
+    const SIMD_FULL(float) d;
+    const SIMD_FULL(uint32_t) du;
+    const V kSign = cast_to(d, set1(du, 0x80000000u));
+    const V original_sign = x & kSign;
+    x = andnot(kSign, x);  // abs
+
+    // Computed via af_cheb_rational (k=100); replicated 4x.
+    SIMD_ALIGN constexpr float p[(4 + 1) * 4] = {
+        2.200248328e-04, 2.200248328e-04, 2.200248328e-04, 2.200248328e-04,
+        1.043637593e-02, 1.043637593e-02, 1.043637593e-02, 1.043637593e-02,
+        1.624820318e-01, 1.624820318e-01, 1.624820318e-01, 1.624820318e-01,
+        7.961564959e-01, 7.961564959e-01, 7.961564959e-01, 7.961564959e-01,
+        8.210152774e-01, 8.210152774e-01, 8.210152774e-01, 8.210152774e-01,
+    };
+    SIMD_ALIGN constexpr float q[(4 + 1) * 4] = {
+        2.631846970e-01,  2.631846970e-01,  2.631846970e-01,  2.631846970e-01,
+        1.076976492e+00,  1.076976492e+00,  1.076976492e+00,  1.076976492e+00,
+        4.987528350e-01,  4.987528350e-01,  4.987528350e-01,  4.987528350e-01,
+        -5.512498495e-02, -5.512498495e-02, -5.512498495e-02, -5.512498495e-02,
+        6.521209011e-03,  6.521209011e-03,  6.521209011e-03,  6.521209011e-03,
+    };
+    const V linear = x * set1(d, kLowDivInv);
+    const V poly = EvalRationalPolynomial(x, p, q);
+    const V magnitude = select(linear, poly, x > set1(d, kThreshSRGBToLinear));
+    return andnot(kSign, magnitude) | original_sign;
+  }
+
+  template <class V>
+  SIMD_ATTR PIK_INLINE V EncodedFromDisplay(V x) const {
+    const SIMD_FULL(float) d;
+    const SIMD_FULL(uint32_t) du;
+    const V kSign = cast_to(d, set1(du, 0x80000000u));
+    const V original_sign = x & kSign;
+    x = andnot(kSign, x);  // abs
+
+    // Computed via af_cheb_rational (k=100); replicated 4x.
+    SIMD_ALIGN constexpr float p[(4 + 1) * 4] = {
+        -5.135152395e-04, -5.135152395e-04, -5.135152395e-04, -5.135152395e-04,
+        5.287254571e-03,  5.287254571e-03,  5.287254571e-03,  5.287254571e-03,
+        3.903842876e-01,  3.903842876e-01,  3.903842876e-01,  3.903842876e-01,
+        1.474205315e+00,  1.474205315e+00,  1.474205315e+00,  1.474205315e+00,
+        7.352629620e-01,  7.352629620e-01,  7.352629620e-01,  7.352629620e-01,
+    };
+    SIMD_ALIGN constexpr float q[(4 + 1) * 4] = {
+        1.004519624e-02, 1.004519624e-02, 1.004519624e-02, 1.004519624e-02,
+        3.036675394e-01, 3.036675394e-01, 3.036675394e-01, 3.036675394e-01,
+        1.340816930e+00, 1.340816930e+00, 1.340816930e+00, 1.340816930e+00,
+        9.258482155e-01, 9.258482155e-01, 9.258482155e-01, 9.258482155e-01,
+        2.424867759e-02, 2.424867759e-02, 2.424867759e-02, 2.424867759e-02,
+    };
+    const V linear = x * set1(d, kLowDiv);
+    const V poly = EvalRationalPolynomial(sqrt(x), p, q);
+    const V magnitude = select(linear, poly, x > set1(d, kThreshLinearToSRGB));
+    return andnot(kSign, magnitude) | original_sign;
+  }
+
+ private:
+  static constexpr float kThreshSRGBToLinear = 0.04045f;
+  static constexpr float kThreshLinearToSRGB = 0.0031308f;
+  static constexpr float kLowDiv = 12.92f;
+  static constexpr float kLowDivInv = 1.0f / kLowDiv;
+};
+
+// NOTE: this is only used to provide a reasonable ICC profile that other
+// software can read. Our own transforms use ExtraTF instead because that is
+// more precise and supports unbounded mode.
+template <class Func>
+cmsToneCurve* CreateTableCurve(const cmsContext context, int32_t N,
+                               const Func& func) {
+  PIK_ASSERT(N <= 4096);  // ICC MFT2 only allows 4K entries
+  // No point using float - LCMS converts to 16-bit for A2B/MFT.
+  std::vector<uint16_t> table;
+  table.reserve(N);
+  for (int32_t i = 0; i < N; ++i) {
+    const float x = static_cast<float>(i) / (N - 1);  // 1.0 at index N - 1.
+    // LCMS requires EOTF (e.g. 2.4 exponent).
+    float y = func.DisplayFromEncoded(x);
+    PIK_ASSERT(y >= 0.0f);
+    // Clamp to table range - necessary for HLG.
+    if (y > 1.0f) y = 1.0f;
+    table.push_back(std::round(y * 65535.0f));  // 1.0 at table value 0xFFFF.
+  }
+  return cmsBuildTabulatedToneCurve16(context, N, table.data());
+}
+
+Curve CreateCurve(const cmsContext context, const double gamma) {
+  // Exponential with linear part. Note that the LittleCMS API reference and
+  // tutorial disagree on the type number.
+  const cmsUInt32Number type = 4;
+
+  PIK_CHECK(0 < gamma && gamma <= 1.0);
+
+  if (ApproxEq(gamma, GammaSRGB())) {
+    constexpr cmsFloat64Number params[5] = {2.4, 1.0 / 1.055, 0.055 / 1.055,
+                                            1.0 / 12.92, 0.04045};
+    return Curve(cmsBuildParametricToneCurve(context, type, params));
+  } else if (ApproxEq(gamma, Gamma709())) {
+    constexpr cmsFloat64Number params[5] = {1.0 / 0.45, 1.0 / 1.099,
+                                            0.099 / 1.099, 1.0 / 4.5, 0.081};
+    return Curve(cmsBuildParametricToneCurve(context, type, params));
+  } else if (ApproxEq(gamma, GammaHLG())) {
+    return Curve(CreateTableCurve(context, 4096, TF_HLG()));
+  } else if (ApproxEq(gamma, GammaPQ())) {
+    return Curve(CreateTableCurve(context, 4096, TF_PQ()));
+  } else {
+    // "gamma" is the OETF exponent; LCMS expects EOTF, so take the reciprocal.
+    // Params after gamma are (in order): (1*x + 0)^gamma, or 1*x if x < 0.
+    const cmsFloat64Number params[5] = {1.0 / gamma, 1.0, 0.0, 1.0, 0.0};
+
+    // WARNING: using cmsBuildGamma results in a bounded curve - LittleCMS
+    // clamps negative outputs to zero. To retain unbounded mode, we use the
+    // same parametric curve type as sRGB.
+    return Curve(cmsBuildParametricToneCurve(context, type, params));
+  }
+}
+
+// Returns false for unsupported color_space and gamma (not an error).
+// Serializes the profile before use to ensure all values are quantized.
+Status MaybeCreateProfile(const cmsContext context, const ProfileParams& pp,
+                          PaddedBytes* PIK_RESTRICT icc) {
+  if (pp.gamma == 0.0) return false;  // Unknown gamma, not an error.
+
+  // (If color_space == kRGB, we'll use this curve for all channels.)
+  const Curve curve = CreateCurve(context, pp.gamma);
+  if (curve == nullptr) return PIK_FAILURE("Failed to create curve");
+
+  const cmsCIExyY wp_xyY = xyYFromCIExy(pp.white_point);
+
+  Profile profile;
+  if (pp.color_space == ColorSpace::kRGB) {
+    const cmsCIExyYTRIPLE primaries_xyY = {xyYFromCIExy(pp.primaries.r),
+                                           xyYFromCIExy(pp.primaries.g),
+                                           xyYFromCIExy(pp.primaries.b)};
+    cmsToneCurve* curves[3] = {curve.get(), curve.get(), curve.get()};
+    profile.reset(
+        cmsCreateRGBProfileTHR(context, &wp_xyY, &primaries_xyY, curves));
+    if (profile.get() == nullptr) return PIK_FAILURE("Failed to create RGB");
+  } else if (pp.color_space == ColorSpace::kGray) {
+    profile.reset(cmsCreateGrayProfileTHR(context, &wp_xyY, curve.get()));
+    if (profile.get() == nullptr) return PIK_FAILURE("Failed to create Gray");
+  } else if (pp.color_space == ColorSpace::kXYZ) {
+    PIK_RETURN_IF_ERROR(CreateProfileXYZ(context, &profile));  // takes lock
+  } else {
+    return false;  // not an error. TODO(janwas): handle others
+  }
+
+  // ICC uses the same values.
+  cmsSetHeaderRenderingIntent(
+      profile.get(), static_cast<cmsUInt32Number>(pp.rendering_intent));
+
+  return EncodeProfile(context, profile, Description(pp), icc);
+}
+
+uint32_t Type32(const ColorEncoding& c) {
+  if (c.IsGray()) return TYPE_GRAY_FLT;
+  if (c.color_space == ColorSpace::kXYZ) return TYPE_XYZ_FLT;
+  return TYPE_RGB_FLT;
+}
+
+uint32_t Type64(const ColorEncoding& c) {
+  if (c.IsGray()) return TYPE_GRAY_DBL;
+  if (c.color_space == ColorSpace::kXYZ) return TYPE_XYZ_DBL;
+  return TYPE_RGB_DBL;
+}
+
+PIK_MUST_USE_RESULT ColorSpace ColorSpaceFromProfile(const Profile& profile) {
+  switch (cmsGetColorSpace(profile.get())) {
+    case cmsSigRgbData:
+      return ColorSpace::kRGB;
+    case cmsSigGrayData:
+      return ColorSpace::kGray;
+    case cmsSigXYZData:
+      return ColorSpace::kXYZ;
+    case cmsSigYCbCrData:
+      return ColorSpace::kYCbCr;
+    default:
+      return ColorSpace::kUnknown;
+  }
+}
+
+// "profile1" is pre-decoded to save time in DetectTransferFunction.
+Status ProfileEquivalentToICC(const cmsContext context, const Profile& profile1,
+                              const PaddedBytes& icc, const ColorEncoding& c) {
+  const uint32_t type_src = Type64(c);
+
+  Profile profile2;
+  PIK_RETURN_IF_ERROR(DecodeProfile(context, icc, &profile2));
+
+  Profile profile_xyz;
+  PIK_RETURN_IF_ERROR(CreateProfileXYZ(context, &profile_xyz));
+
+  const uint32_t intent = INTENT_RELATIVE_COLORIMETRIC;
+  const uint32_t flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_BLACKPOINTCOMPENSATION |
+                         cmsFLAGS_HIGHRESPRECALC;
+  Transform xform1(cmsCreateTransformTHR(context, profile1.get(), type_src,
+                                         profile_xyz.get(), TYPE_XYZ_DBL,
+                                         intent, flags));
+  Transform xform2(cmsCreateTransformTHR(context, profile2.get(), type_src,
+                                         profile_xyz.get(), TYPE_XYZ_DBL,
+                                         intent, flags));
+  if (xform1 == nullptr || xform2 == nullptr) {
+    return PIK_FAILURE("Failed to create transform");
+  }
+
+  double in[3];
+  double out1[3];
+  double out2[3];
+
+  // Uniformly spaced samples from very dark to almost fully bright.
+  const double init = 1E-3;
+  const double step = 0.2;
+
+  if (c.IsGray()) {
+    // Finer sampling and replicate each component.
+    for (in [0] = init; in[0] < 1.0; in[0] += step / 8) {
+      cmsDoTransform(xform1.get(), in, out1, 1);
+      cmsDoTransform(xform2.get(), in, out2, 1);
+      if (!ApproxEq(out1[0], out2[0])) {
+        return false;
+      }
+    }
+  } else {
+    for (in [0] = init; in[0] < 1.0; in[0] += step) {
+      for (in [1] = init; in[1] < 1.0; in[1] += step) {
+        for (in [2] = init; in[2] < 1.0; in[2] += step) {
+          cmsDoTransform(xform1.get(), in, out1, 1);
+          cmsDoTransform(xform2.get(), in, out2, 1);
+          for (size_t i = 0; i < 3; ++i) {
+            if (!ApproxEq(out1[i], out2[i])) {
+              return false;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+// Returns white point that was specified when creating the profile.
+// NOTE: we can't just use cmsSigMediaWhitePointTag because its interpretation
+// differs between ICC versions.
+PIK_MUST_USE_RESULT cmsCIEXYZ UnadaptedWhitePoint(const cmsContext context,
+                                                  const Profile& profile,
+                                                  const ColorEncoding& c) {
+  cmsCIEXYZ XYZ = {1.0, 1.0, 1.0};
+
+  Profile profile_xyz;
+  if (!CreateProfileXYZ(context, &profile_xyz)) return XYZ;
+  // Array arguments are one per profile.
+  cmsHPROFILE profiles[2] = {profile.get(), profile_xyz.get()};
+  // Leave white point unchanged - that is what we're trying to extract.
+  cmsUInt32Number intents[2] = {INTENT_ABSOLUTE_COLORIMETRIC,
+                                INTENT_ABSOLUTE_COLORIMETRIC};
+  cmsBool black_compensation[2] = {0, 0};
+  cmsFloat64Number adaption[2] = {0.0, 0.0};
+  // Only transforming a single pixel, so skip expensive optimizations.
+  cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_HIGHRESPRECALC;
+  Transform xform(cmsCreateExtendedTransform(
+      context, 2, profiles, black_compensation, intents, adaption, nullptr, 0,
+      Type64(c), TYPE_XYZ_DBL, flags));
+
+  // xy are relative, so magnitude does not matter if we ignore output Y.
+  const cmsFloat64Number in[3] = {1.0, 1.0, 1.0};
+  cmsDoTransform(xform.get(), in, &XYZ.X, 1);
+  return XYZ;
+}
+
+PIK_MUST_USE_RESULT Primaries IdentifyPrimaries(const Profile& profile,
+                                                const cmsCIEXYZ& wp_unadapted) {
+  // These were adapted to the profile illuminant before storing in the profile.
+  const cmsCIEXYZ* adapted_r = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigRedColorantTag));
+  const cmsCIEXYZ* adapted_g = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigGreenColorantTag));
+  const cmsCIEXYZ* adapted_b = static_cast<const cmsCIEXYZ*>(
+      cmsReadTag(profile.get(), cmsSigBlueColorantTag));
+  if (adapted_r == nullptr || adapted_g == nullptr || adapted_b == nullptr) {
+    PIK_NOTIFY_ERROR("Failed to retrieve colorants");
+    return Primaries::kUnknown;
+  }
+
+  // TODO(janwas): no longer assume Bradford and D50.
+  // Undo the chromatic adaptation.
+  const cmsCIEXYZ d50 = D50_XYZ();
+
+  cmsCIEXYZ r, g, b;
+  cmsAdaptToIlluminant(&r, &d50, &wp_unadapted, adapted_r);
+  cmsAdaptToIlluminant(&g, &d50, &wp_unadapted, adapted_g);
+  cmsAdaptToIlluminant(&b, &d50, &wp_unadapted, adapted_b);
+
+  const PrimariesCIExy rgb = {CIExyFromXYZ(r), CIExyFromXYZ(g),
+                              CIExyFromXYZ(b)};
+  return PrimariesFromCIExy(rgb);
+}
+
+PIK_MUST_USE_RESULT TransferFunction DetectTransferFunction(
+    const cmsContext context, const ColorEncoding& PIK_RESTRICT c) {
+  ProfileParams pp;
+  // If any fields are unknown, we can't synthesize a matching profile.
+  if (!ColorEncodingToParams(c, &pp)) return TransferFunction::kUnknown;
+
+  Profile profile;
+  if (!DecodeProfile(context, c.icc, &profile)) {
+    return TransferFunction::kUnknown;
+  }
+
+  for (TransferFunction tf : AllValues<TransferFunction>()) {
+    pp.gamma = GammaFromTransferFunction(tf);
+
+    PaddedBytes icc_test;
+    if (MaybeCreateProfile(context, pp, &icc_test) &&
+        ProfileEquivalentToICC(context, profile, icc_test, c)) {
+      return tf;
+    }
+  }
+
+  return TransferFunction::kUnknown;
+}
+
+void ErrorHandler(cmsContext context, cmsUInt32Number code, const char* text) {
+  fprintf(stderr, "LCMS error %u: %s\n", code, text);
+}
+
+// Returns a context for the current thread, creating it if necessary.
+cmsContext GetContext() {
+  static thread_local void* context_;
+  if (context_ == nullptr) {
+    PIK_CHECK(LCMS_VERSION == cmsGetEncodedCMMversion());
+
+    context_ = cmsCreateContext(nullptr, nullptr);
+    PIK_ASSERT(context_ != nullptr);
+
+    cmsSetLogErrorHandlerTHR(static_cast<cmsContext>(context_), &ErrorHandler);
+  }
+  return static_cast<cmsContext>(context_);
+}
+
+}  // namespace
+
+// All functions (except ColorSpaceTransform::Run) must lock lcms_mutex.
+
+Status ColorManagement::SetFromParams(const ProfileParams& pp,
+                                      ColorEncoding* PIK_RESTRICT c) {
+  std::unique_lock<std::mutex> lock(lcms_mutex);
+  const cmsContext context = GetContext();
+  if (!MaybeCreateProfile(context, pp, &c->icc)) {
+    return PIK_FAILURE("Failed to create profile");
+  }
+  SetFieldsFromParams(pp, c);
+  return true;
+}
+
+Status ColorManagement::SetFromProfile(PaddedBytes&& icc,
+                                       ColorEncoding* PIK_RESTRICT c) {
+  if (icc.empty()) return false;
+
+  std::unique_lock<std::mutex> lock(lcms_mutex);
+  const cmsContext context = GetContext();
+
+  Profile profile;
+  PIK_RETURN_IF_ERROR(DecodeProfile(context, icc, &profile));
+
+  c->icc = std::move(icc);
+
+  c->color_space = ColorSpaceFromProfile(profile);
+
+  const cmsCIEXYZ wp_unadapted = UnadaptedWhitePoint(context, profile, *c);
+  c->white_point = WhitePointFromCIExy(CIExyFromXYZ(wp_unadapted));
+
+  // Gray/XYZ profiles don't have primaries.
+  c->primaries = Primaries::kUnknown;
+  if (c->color_space != ColorSpace::kGray &&
+      c->color_space != ColorSpace::kXYZ) {
+    c->primaries = IdentifyPrimaries(profile, wp_unadapted);
+  }
+
+  // XYZ profiles are always linear.
+  c->transfer_function = TransferFunction::kLinear;
+  if (c->color_space != ColorSpace::kXYZ) {
+    // Must come last because it uses the other fields.
+    c->transfer_function = DetectTransferFunction(context, *c);
+  }
+
+  // ICC uses the same values.
+  c->rendering_intent =
+      static_cast<RenderingIntent>(cmsGetHeaderRenderingIntent(profile.get()));
+
+  return true;
+}
+
+Status ColorManagement::SetProfileFromFields(ColorEncoding* PIK_RESTRICT c) {
+  std::unique_lock<std::mutex> lock(lcms_mutex);
+  c->icc.clear();
+  const cmsContext context = GetContext();
+
+  ProfileParams pp;
+  if (!ColorEncodingToParams(*c, &pp)) {
+    return PIK_FAILURE("Cannot create profile from unknown fields");
+  }
+  if (!MaybeCreateProfile(context, pp, &c->icc)) {
+    return PIK_FAILURE("Failed to create profile from fields");
+  }
+  return true;
+}
+
+Status ColorManagement::MaybeRemoveProfile(ColorEncoding* PIK_RESTRICT c) {
+  // Avoid printing an error message when there is no ICC profile.
+  if (c->icc.empty()) return true;
+
+  std::unique_lock<std::mutex> lock(lcms_mutex);
+  const cmsContext context = GetContext();
+
+  Profile profile_old;
+  PIK_RETURN_IF_ERROR(DecodeProfile(context, c->icc, &profile_old));
+
+  ProfileParams pp;
+  PIK_RETURN_IF_ERROR(ColorEncodingToParams(*c, &pp));
+  PaddedBytes icc_new;
+  PIK_RETURN_IF_ERROR(MaybeCreateProfile(context, pp, &icc_new));
+
+  if (!ProfileEquivalentToICC(context, profile_old, icc_new, *c)) {
+    return PIK_FAILURE("Generated profile does not match");
+  }
+
+  c->icc.clear();
+  return true;
+}
+
+ColorSpaceTransform::~ColorSpaceTransform() {
+  std::unique_lock<std::mutex> lock(lcms_mutex);
+  for (void* p : transforms_) {
+    TransformDeleter()(p);
+  }
+}
+
+Status ColorSpaceTransform::Init(const ColorEncoding& c_src,
+                                 const ColorEncoding& c_dst, size_t xsize,
+                                 const size_t num_threads) {
+  std::unique_lock<std::mutex> lock(lcms_mutex);
+#if PIK_CMS_VERBOSE
+  printf("%s -> %s\n", Description(c_src).c_str(), Description(c_dst).c_str());
+#endif
+
+  Profile profile_src, profile_dst;
+  const cmsContext context = GetContext();
+  PIK_RETURN_IF_ERROR(DecodeProfile(context, c_src.icc, &profile_src));
+  PIK_RETURN_IF_ERROR(DecodeProfile(context, c_dst.icc, &profile_dst));
+
+  skip_lcms_ = false;
+  if (c_src.SameColorSpace(c_dst) &&
+      c_src.transfer_function == c_dst.transfer_function) {
+    skip_lcms_ = true;
+#if PIK_CMS_VERBOSE
+    printf("Skip CMS\n");
+#endif
+  }
+
+  // Special-case for BT.2100 HLG/PQ and SRGB <=> linear:
+  if ((Is2100(c_src.transfer_function) && IsLinear(c_dst.transfer_function)) ||
+      (Is2100(c_dst.transfer_function) && IsLinear(c_src.transfer_function)) ||
+      (IsSRGB(c_src.transfer_function) && IsLinear(c_dst.transfer_function)) ||
+      (IsSRGB(c_dst.transfer_function) && IsLinear(c_src.transfer_function))) {
+    // Construct new profiles as if the data were already/still linear.
+    ProfileParams pp_src, pp_dst;
+    PaddedBytes icc_src, icc_dst;
+    Profile new_src, new_dst;
+    // Only enable ExtraTF if profile creation succeeded.
+    if (ColorEncodingToParams(c_src, &pp_src) &&
+        ColorEncodingToParams(c_dst, &pp_dst) &&
+        (pp_src.gamma = pp_dst.gamma = GammaLinear()) && /* assign */
+        MaybeCreateProfile(context, pp_src, &icc_src) &&
+        MaybeCreateProfile(context, pp_dst, &icc_dst) &&
+        DecodeProfile(context, icc_src, &new_src) &&
+        DecodeProfile(context, icc_dst, &new_dst)) {
+      if (c_src.SameColorSpace(c_dst)) {
+        skip_lcms_ = true;
+      }
+#if PIK_CMS_VERBOSE
+      printf("Linear <-> HLG/PQ; skip=%d\n", skip_lcms_);
+#endif
+      profile_src.swap(new_src);
+      profile_dst.swap(new_dst);
+      if (IsLinear(c_dst.transfer_function)) {
+        preprocess_ = IsSRGB(c_src.transfer_function)
+                          ? ExtraTF::kSRGB
+                          : (IsPQ(c_src.transfer_function) ? ExtraTF::kPQ
+                                                           : ExtraTF::kHLG);
+      } else {
+        PIK_ASSERT(IsLinear(c_src.transfer_function));
+        postprocess_ = IsSRGB(c_dst.transfer_function)
+                           ? ExtraTF::kSRGB
+                           : (IsPQ(c_dst.transfer_function) ? ExtraTF::kPQ
+                                                            : ExtraTF::kHLG);
+      }
+    } else {
+      fprintf(stderr, "Failed to create extra linear profiles");
+    }
+  }
+
+  // Type includes color space (XYZ vs RGB), so can be different.
+  const uint32_t type_src = Type32(c_src);
+  const uint32_t type_dst = Type32(c_dst);
+  // Not including alpha channel (copied separately).
+  const size_t channels_src = c_src.Channels();
+  const size_t channels_dst = c_dst.Channels();
+  PIK_CHECK(channels_src == channels_dst);
+#if PIK_CMS_VERBOSE
+  printf("Channels: %zu; Threads: %zu\n", channels_src, num_threads);
+#endif
+
+  transforms_.clear();
+  for (size_t i = 0; i < num_threads; ++i) {
+    const uint32_t intent = static_cast<uint32_t>(c_dst.rendering_intent);
+    const uint32_t flags =
+        cmsFLAGS_BLACKPOINTCOMPENSATION | cmsFLAGS_HIGHRESPRECALC;
+    // NOTE: we're using the current thread's context and assuming all state
+    // modified by cmsDoTransform resides in the transform, not the context.
+    transforms_.emplace_back(cmsCreateTransformTHR(context, profile_src.get(),
+                                                   type_src, profile_dst.get(),
+                                                   type_dst, intent, flags));
+    if (transforms_.back() == nullptr) {
+      return PIK_FAILURE("Failed to create transform");
+    }
+  }
+
+  // Ideally LCMS would convert directly from External to Image3. However,
+  // cmsDoTransformLineStride only accepts 32-bit BytesPerPlaneIn, whereas our
+  // planes can be more than 4 GiB apart. Hence, transform inputs/outputs must
+  // be interleaved. Calling cmsDoTransform for each pixel is expensive
+  // (indirect call). We therefore transform rows, which requires per-thread
+  // buffers. To avoid separate allocations, we use the rows of an image.
+  // Because LCMS apparently also cannot handle <= 16 bit inputs and 32-bit
+  // outputs (or vice versa), we use floating point input/output.
+  buf_src_ = ImageF(xsize * channels_src, num_threads);
+  buf_dst_ = ImageF(xsize * channels_dst, num_threads);
+  xsize_ = xsize;
+  return true;
+}
+
+SIMD_ATTR void ColorSpaceTransform::Run(const size_t thread,
+                                        const float* buf_src, float* buf_dst) {
+  // No lock needed.
+
+  // If ExtraTF, we need a writable buffer; otherwise, only READ from buf_src.
+  float* const xform_src = (preprocess_ == ExtraTF::kNone)
+                               ? const_cast<float*>(buf_src)
+                               : buf_src_.Row(thread);
+
+#if PIK_CMS_VERBOSE
+  const size_t kX = 1;  // pixel index, multiplied by 3 for RGB
+#endif
+
+  switch (preprocess_) {
+    case ExtraTF::kNone:
+      break;
+    case ExtraTF::kPQ:
+      for (size_t i = 0; i < buf_src_.xsize(); ++i) {
+        xform_src[i] = TF_PQ().DisplayFromEncoded(buf_src[i]);
+      }
+#if PIK_CMS_VERBOSE
+      printf("pre in %.4f %.4f %.4f undoPQ %.4f %.4f %.4f\n", buf_src[3 * kX],
+             buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+             xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+      break;
+    case ExtraTF::kHLG:
+      for (size_t i = 0; i < buf_src_.xsize(); ++i) {
+        xform_src[i] = TF_HLG().DisplayFromEncoded(buf_src[i]);
+      }
+#if PIK_CMS_VERBOSE
+      printf("pre in %.4f %.4f %.4f undoHLG %.4f %.4f %.4f\n", buf_src[3 * kX],
+             buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+             xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+      break;
+    case ExtraTF::kSRGB:
+      SIMD_FULL(float) df;
+      for (size_t i = 0; i < buf_src_.xsize(); i += df.N) {
+        const auto val = load(df, buf_src + i);
+        const auto result = TF_SRGB().DisplayFromEncoded(val);
+        store(result, df, xform_src + i);
+      }
+#if PIK_CMS_VERBOSE
+      printf("pre in %.4f %.4f %.4f undoSRGB %.4f %.4f %.4f\n", buf_src[3 * kX],
+             buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+             xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+      break;
+  }
+
+#if PIK_CMS_VERBOSE
+  // Save inputs for printing before in-place transforms overwrite them.
+  const float in0 = xform_src[3 * kX + 0];
+  const float in1 = xform_src[3 * kX + 1];
+  const float in2 = xform_src[3 * kX + 2];
+#endif
+
+  if (skip_lcms_) {
+    if (buf_dst != xform_src) {
+      memcpy(buf_dst, xform_src, buf_dst_.xsize() * sizeof(*buf_dst));
+    }  // else: in-place, no need to copy
+  } else {
+#ifdef ADDRESS_SANITIZER
+    PIK_ASSERT(thread < transforms_.size());
+#endif
+    cmsHTRANSFORM xform = transforms_[thread];
+    cmsDoTransform(xform, xform_src, buf_dst, xsize_);
+  }
+#if PIK_CMS_VERBOSE
+  printf("xform skip%d: %.4f %.4f %.4f (%p) -> (%p) %.4f %.4f %.4f\n",
+         skip_lcms_, in0, in1, in2, xform_src, buf_dst, buf_dst[3 * kX],
+         buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+
+  switch (postprocess_) {
+    case ExtraTF::kNone:
+      break;
+    case ExtraTF::kPQ:
+      for (size_t i = 0; i < buf_dst_.xsize(); ++i) {
+        buf_dst[i] = TF_PQ().EncodedFromDisplay(buf_dst[i]);
+      }
+#if PIK_CMS_VERBOSE
+      printf("after PQ enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+             buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+      break;
+    case ExtraTF::kHLG:
+      for (size_t i = 0; i < buf_dst_.xsize(); ++i) {
+        buf_dst[i] = TF_HLG().EncodedFromDisplay(buf_dst[i]);
+      }
+#if PIK_CMS_VERBOSE
+      printf("after HLG enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+             buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+      break;
+    case ExtraTF::kSRGB:
+      SIMD_FULL(float) df;
+      for (size_t i = 0; i < buf_dst_.xsize(); i += df.N) {
+        const auto val = load(df, buf_dst + i);
+        const auto result = TF_SRGB().EncodedFromDisplay(val);
+        store(result, df, buf_dst + i);
+      }
+#if PIK_CMS_VERBOSE
+      printf("after SRGB enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+             buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+      break;
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/color_management.h b/codec/L2/demos/pikEnc/host/pik/color_management.h
new file mode 100755
index 0000000000..9345962086
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/color_management.h
@@ -0,0 +1,95 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_COLOR_MANAGEMENT_H_
+#define PIK_COLOR_MANAGEMENT_H_
+
+// ICC profiles and color space conversions.
+
+#include <stdint.h>
+#include <memory>
+#include <vector>
+
+#include "pik/color_encoding.h"
+#include "pik/common.h"
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include "pik/padded_bytes.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Thread-safe monostate.
+struct ColorManagement {
+  // Returns false without changing "c" if pp.color_space is unsupported or
+  // pp.gamma is outside (0, 1]. Otherwise, sets fields AND c->icc. Used by
+  // codecs that provide their own non-ICC metadata.
+  static Status SetFromParams(const ProfileParams& pp, ColorEncoding* c);
+
+  // Returns false without changing "c" if "icc" is invalid. Otherwise, sets
+  // fields AND c->icc. Used by image codecs that read embedded ICC profiles.
+  static Status SetFromProfile(PaddedBytes&& icc, ColorEncoding* c);
+
+  // Returns true and clears c->icc if a subsequent SetProfileFromFields
+  // will generate an equivalent profile. If so, there is no need to send the
+  // (large) profile in the bitstream.
+  static Status MaybeRemoveProfile(ColorEncoding* c);
+
+  // Returns true if c->icc was successfully reconstructed from other fields.
+  // This re-establishes the invariant (broken by MaybeRemoveProfile or changing
+  // fields) that fields and c->icc are equivalent. Returning false indicates
+  // the profile is lost/empty, which means ColorSpaceTransform will fail.
+  static Status SetProfileFromFields(ColorEncoding* c);
+};
+
+// Run is thread-safe.
+class ColorSpaceTransform {
+ public:
+  ColorSpaceTransform() {}
+  ~ColorSpaceTransform();
+
+  // Cannot copy (transforms_ holds pointers).
+  ColorSpaceTransform(const ColorSpaceTransform&) = delete;
+  ColorSpaceTransform& operator=(const ColorSpaceTransform&) = delete;
+
+  // "Constructor"; allocates for up to `num_threads`, or returns false.
+  Status Init(const ColorEncoding& c_src, const ColorEncoding& c_dst,
+              size_t xsize, size_t num_threads);
+
+  float* PIK_RESTRICT BufSrc(const size_t thread) {
+    return buf_src_.Row(thread);
+  }
+
+  float* PIK_RESTRICT BufDst(const size_t thread) {
+    return buf_dst_.Row(thread);
+  }
+
+  // buf_X can either be from BufX() or caller-allocated, interleaved storage.
+  // `thread` must be less than the `num_threads` passed to Init.
+  void Run(const size_t thread, const float* buf_src, float* buf_dst);
+
+ private:
+  enum class ExtraTF {
+    kNone,
+    kPQ,
+    kHLG,
+    kSRGB,
+  };
+
+  // One per thread - cannot share because of caching.
+  std::vector<void*> transforms_;
+
+  ImageF buf_src_;
+  ImageF buf_dst_;
+  size_t xsize_;
+  bool skip_lcms_ = false;
+  ExtraTF preprocess_ = ExtraTF::kNone;
+  ExtraTF postprocess_ = ExtraTF::kNone;
+};
+
+}  // namespace pik
+
+#endif  // PIK_COLOR_MANAGEMENT_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/common.h b/codec/L2/demos/pikEnc/host/pik/common.h
new file mode 100755
index 0000000000..7a3110ca84
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/common.h
@@ -0,0 +1,71 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_COMMON_H_
+#define PIK_COMMON_H_
+
+// Shared constants and helper functions.
+
+#include <stddef.h>
+#include <memory>  // unique_ptr
+
+#include "pik/compiler_specific.h"
+
+namespace pik {
+
+constexpr size_t kBitsPerByte = 8;  // more clear than CHAR_BIT
+
+template <typename T>
+constexpr inline T DivCeil(T a, T b) {
+  return (a + b - 1) / b;
+}
+
+template <typename T>
+constexpr T Pi(T multiplier) {
+  return static_cast<T>(multiplier * 3.1415926535897932);
+}
+
+// Block is the square grid of pixels to which an "energy compaction"
+// transformation (e.g. DCT) is applied. Each block has its own AC quantizer.
+constexpr size_t kBlockDim = 8;
+
+constexpr size_t kDCTBlockSize = kBlockDim * kBlockDim;
+
+// Group is the rectangular grid of blocks that can be decoded in parallel. This
+// is different for DC.
+constexpr size_t kDcGroupDimInBlocks = 256;
+constexpr size_t kGroupDim = 512;
+static_assert(kGroupDim % kBlockDim == 0,
+              "Group dim should be divisible by block dim");
+constexpr size_t kGroupDimInBlocks = kGroupDim / kBlockDim;
+
+// We split groups into tiles to increase locality and cache hits.
+const constexpr size_t kTileDim = 64;
+
+static_assert(kTileDim % kBlockDim == 0,
+              "Tile dim should be divisible by block dim");
+constexpr size_t kTileDimInBlocks = kTileDim / kBlockDim;
+
+static_assert(kGroupDimInBlocks % kTileDimInBlocks == 0,
+              "Group dim should be divisible by tile dim");
+
+// Can't rely on C++14 yet.
+template <typename T, typename... Args>
+std::unique_ptr<T> make_unique(Args&&... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+
+// This leads to somewhat better code than pointer arithmetic.
+template <typename T>
+PIK_INLINE T* PIK_RESTRICT ByteOffset(T* PIK_RESTRICT base,
+                                      const intptr_t byte_offset) {
+  const uintptr_t base_addr = reinterpret_cast<uintptr_t>(base);
+  return reinterpret_cast<T*>(base_addr + byte_offset);
+}
+
+}  // namespace pik
+
+#endif  // PIK_COMMON_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/compiler_specific.h b/codec/L2/demos/pikEnc/host/pik/compiler_specific.h
new file mode 100755
index 0000000000..267e3ee4af
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/compiler_specific.h
@@ -0,0 +1,133 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_COMPILER_SPECIFIC_H_
+#define PIK_COMPILER_SPECIFIC_H_
+
+// Macros for compiler version + nonstandard keywords, e.g. __builtin_expect.
+
+#include <stdint.h>
+
+// #if is shorter and safer than #ifdef. *_VERSION are zero if not detected,
+// otherwise 100 * major + minor version. Note that other packages check for
+// #ifdef COMPILER_MSVC, so we cannot use that same name.
+
+#ifdef _MSC_VER
+#define PIK_COMPILER_MSVC _MSC_VER
+#else
+#define PIK_COMPILER_MSVC 0
+#endif
+
+#ifdef __GNUC__
+#define PIK_COMPILER_GCC (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#define PIK_COMPILER_GCC 0
+#endif
+
+#ifdef __clang__
+// For reasons unknown, Forge currently explicitly defines these to 0.0.
+#define PIK_COMPILER_CLANG 1  // (__clang_major__ * 100 + __clang_minor__)
+// Clang pretends to be GCC for compatibility.
+#undef PIK_COMPILER_GCC
+#define PIK_COMPILER_GCC 0
+#else
+#define PIK_COMPILER_CLANG 0
+#endif
+
+#if PIK_COMPILER_MSVC
+#define PIK_RESTRICT __restrict
+#elif PIK_COMPILER_GCC || PIK_COMPILER_CLANG
+#define PIK_RESTRICT __restrict__
+#else
+#define PIK_RESTRICT
+#endif
+
+#if PIK_COMPILER_MSVC
+#define PIK_INLINE __forceinline
+#define PIK_NOINLINE __declspec(noinline)
+#else
+#define PIK_INLINE inline __attribute__((always_inline))
+#define PIK_NOINLINE __attribute__((noinline))
+#endif
+
+#if PIK_COMPILER_MSVC
+#define PIK_NORETURN __declspec(noreturn)
+#elif PIK_COMPILER_GCC || PIK_COMPILER_CLANG
+#define PIK_NORETURN __attribute__((noreturn))
+#endif
+
+#if PIK_COMPILER_MSVC
+#define PIK_UNREACHABLE __assume(false)
+#elif PIK_COMPILER_CLANG || PIK_COMPILER_GCC >= 405
+#define PIK_UNREACHABLE __builtin_unreachable()
+#else
+#define PIK_UNREACHABLE
+#endif
+
+#if PIK_COMPILER_MSVC
+// Unsupported, __assume is not the same.
+#define PIK_LIKELY(expr) expr
+#define PIK_UNLIKELY(expr) expr
+#else
+#define PIK_LIKELY(expr) __builtin_expect(!!(expr), 1)
+#define PIK_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+#endif
+
+#if PIK_COMPILER_MSVC
+#include <intrin.h>
+
+#pragma intrinsic(_ReadWriteBarrier)
+#define PIK_COMPILER_FENCE _ReadWriteBarrier()
+#elif PIK_COMPILER_GCC || PIK_COMPILER_CLANG
+#define PIK_COMPILER_FENCE asm volatile("" : : : "memory")
+#else
+#define PIK_COMPILER_FENCE
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* PIK_RESTRICT aligned = (float*)PIK_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if PIK_COMPILER_CLANG
+// Early versions of Clang did not support __builtin_assume_aligned.
+#define PIK_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned)
+#elif PIK_COMPILER_GCC
+#define PIK_HAS_ASSUME_ALIGNED 1
+#else
+#define PIK_HAS_ASSUME_ALIGNED 0
+#endif
+
+#if PIK_HAS_ASSUME_ALIGNED
+#define PIK_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
+#else
+#define PIK_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */
+#endif
+
+#ifdef __has_attribute
+#define PIK_HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define PIK_HAVE_ATTRIBUTE(x) 0
+#endif
+
+// Raises warnings if the function return value is unused. Should appear as the
+// first part of a function definition/declaration.
+#if PIK_HAVE_ATTRIBUTE(nodiscard)
+#define PIK_MUST_USE_RESULT [[nodiscard]]
+#elif PIK_COMPILER_CLANG && PIK_HAVE_ATTRIBUTE(warn_unused_result)
+#define PIK_MUST_USE_RESULT __attribute__((warn_unused_result))
+#else
+#define PIK_MUST_USE_RESULT
+#endif
+
+#if PIK_HAVE_ATTRIBUTE(__format__)
+#define PIK_FORMAT(idx_fmt, idx_arg) \
+  __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
+#else
+#define PIK_FORMAT(idx_fmt, idx_arg)
+#endif
+
+#endif  // PIK_COMPILER_SPECIFIC_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/compressed_dc.cc b/codec/L2/demos/pikEnc/host/pik/compressed_dc.cc
new file mode 100755
index 0000000000..38ac2bc1bd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/compressed_dc.cc
@@ -0,0 +1,557 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/compressed_dc.h"
+#include <vector>
+
+#include "pik/ac_strategy.h"
+#include "pik/adaptive_reconstruction.h"
+#include "pik/common.h"
+#include "pik/compressed_image_fwd.h"
+#include "pik/data_parallel.h"
+#include "pik/entropy_coder.h"
+#include "pik/gradient_map.h"
+#include "pik/image.h"
+#include "pik/image_ops.h"
+#include "pik/lossless16.h"
+#include "pik/lossless8.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_info.h"
+#include "pik/size_coder.h"
+
+namespace pik {
+namespace {
+
+// If grayscale, only the second channel (y) is encoded.
+bool Image3SCompress(const Image3S& img, const Rect& rect, bool grayscale,
+                     PaddedBytes* bytes) {
+  std::array<int16_t, 3> min;
+  std::array<int16_t, 3> max;
+  Image3MinMax(img, rect, &min, &max);
+  bool fit8 = true;  // If all values fit in 8-bit, use the 8-bit codec.
+  for (int c = 0; c < 3; c++) {
+    if (grayscale && c != 1) continue;
+    bytes->push_back(min[c] & 255);
+    bytes->push_back(min[c] >> 8);
+    if (max[c] - min[c] >= 256) fit8 = false;
+  }
+  bytes->push_back(fit8);
+
+  if (fit8) {
+    if (grayscale) {
+      ImageB image(rect.xsize(), rect.ysize());
+      for (size_t y = 0; y < rect.ysize(); ++y) {
+        const int16_t* PIK_RESTRICT row_in = rect.ConstPlaneRow(img, 1, y);
+        uint8_t* PIK_RESTRICT row_out = image.Row(y);
+        for (size_t x = 0; x < img.xsize(); ++x) {
+          row_out[x] = static_cast<uint8_t>(row_in[x] - min[1]);
+        }
+      }
+      return Grayscale8bit_compress(image, bytes);
+    } else {
+      Image3B image(rect.xsize(), rect.ysize());
+      for (int c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < rect.ysize(); ++y) {
+          const int16_t* PIK_RESTRICT row_in = rect.ConstPlaneRow(img, c, y);
+          uint8_t* PIK_RESTRICT row_out = image.PlaneRow(c, y);
+          for (size_t x = 0; x < img.xsize(); ++x) {
+            row_out[x] = static_cast<uint8_t>(row_in[x] - min[c]);
+          }
+        }
+      }
+      return Colorful8bit_compress(image, bytes);
+    }
+  } else {
+    if (grayscale) {
+      ImageU image(rect.xsize(), rect.ysize());
+      for (size_t y = 0; y < rect.ysize(); ++y) {
+        const int16_t* PIK_RESTRICT row_in = rect.ConstPlaneRow(img, 1, y);
+        uint16_t* PIK_RESTRICT row_out = image.Row(y);
+        for (size_t x = 0; x < img.xsize(); ++x) {
+          row_out[x] = static_cast<uint16_t>(row_in[x] - min[1]);
+        }
+      }
+      return Grayscale16bit_compress(image, bytes);
+    } else {
+      Image3U image(rect.xsize(), rect.ysize());
+      for (int c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < rect.ysize(); ++y) {
+          const int16_t* PIK_RESTRICT row_in = rect.ConstPlaneRow(img, c, y);
+          uint16_t* PIK_RESTRICT row_out = image.PlaneRow(c, y);
+          for (size_t x = 0; x < img.xsize(); ++x) {
+            row_out[x] = static_cast<uint16_t>(row_in[x] - min[c]);
+          }
+        }
+      }
+      return Colorful16bit_compress(image, bytes);
+    }
+  }
+}
+
+// If grayscale, only the second channel (y) is decoded.
+bool Image3SDecompress(const PaddedBytes& bytes, bool grayscale, size_t* pos,
+                       Image3S* result) {
+  if (bytes.size() < *pos + 12) return PIK_FAILURE("Could not decode range");
+  std::array<int16_t, 3> min;
+  for (int c = 0; c < 3; c++) {
+    if (grayscale && c != 1) continue;
+    min[c] = static_cast<int16_t>(bytes[*pos] + (bytes[*pos + 1] << 8));
+    *pos += 2;
+  }
+  bool fit8 = bytes[(*pos)++];
+
+  if (fit8) {
+    if (grayscale) {
+      ImageB image;
+      if (!Grayscale8bit_decompress(bytes, pos, &image)) {
+        return PIK_FAILURE("Failed to decode DC");
+      }
+      *result = Image3S(image.xsize(), image.ysize());
+      for (size_t y = 0; y < result->ysize(); ++y) {
+        const uint8_t* PIK_RESTRICT row_in = image.Row(y);
+        int16_t* PIK_RESTRICT row_out0 = result->PlaneRow(0, y);
+        int16_t* PIK_RESTRICT row_out1 = result->PlaneRow(1, y);
+        int16_t* PIK_RESTRICT row_out2 = result->PlaneRow(2, y);
+        std::fill(row_out0, row_out0 + image.xsize(), 0);
+        for (size_t x = 0; x < image.xsize(); ++x) {
+          row_out1[x] = static_cast<int16_t>(row_in[x]) + min[1];
+        }
+        std::fill(row_out2, row_out2 + image.xsize(), 0);
+      }
+    } else {
+      Image3B image;
+      if (!Colorful8bit_decompress(bytes, pos, &image)) {
+        return PIK_FAILURE("Failed to decode DC");
+      }
+      *result = Image3S(image.xsize(), image.ysize());
+      for (int c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < result->ysize(); ++y) {
+          const uint8_t* PIK_RESTRICT row_in = image.PlaneRow(c, y);
+          int16_t* PIK_RESTRICT row_out = result->PlaneRow(c, y);
+          for (size_t x = 0; x < image.xsize(); ++x) {
+            row_out[x] = static_cast<int16_t>(row_in[x]) + min[c];
+          }
+        }
+      }
+    }
+  } else {
+    if (grayscale) {
+      ImageU image;
+      if (!Grayscale16bit_decompress(bytes, pos, &image)) {
+        return PIK_FAILURE("Failed to decode DC");
+      }
+      *result = Image3S(image.xsize(), image.ysize());
+      for (size_t y = 0; y < result->ysize(); ++y) {
+        const uint16_t* PIK_RESTRICT row_in = image.Row(y);
+        int16_t* PIK_RESTRICT row_out0 = result->PlaneRow(0, y);
+        int16_t* PIK_RESTRICT row_out1 = result->PlaneRow(1, y);
+        int16_t* PIK_RESTRICT row_out2 = result->PlaneRow(2, y);
+        std::fill(row_out0, row_out0 + image.xsize(), 0);
+        for (size_t x = 0; x < image.xsize(); ++x) {
+          row_out1[x] = static_cast<int16_t>(row_in[x]) + min[1];
+        }
+        std::fill(row_out2, row_out2 + image.xsize(), 0);
+      }
+    } else {
+      Image3U image;
+      if (!Colorful16bit_decompress(bytes, pos, &image)) {
+        return PIK_FAILURE("Failed to decode DC");
+      }
+      *result = Image3S(image.xsize(), image.ysize());
+      for (int c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < result->ysize(); ++y) {
+          const uint16_t* PIK_RESTRICT row_in = image.PlaneRow(c, y);
+          int16_t* PIK_RESTRICT row_out = result->PlaneRow(c, y);
+          for (size_t x = 0; x < image.xsize(); ++x) {
+            row_out[x] = static_cast<int16_t>(row_in[x]) + min[c];
+          }
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+// Dequantizes and inverse color-transforms the provided quantized DC, to the
+// window `rect` within the entire output image `enc_cache->dc`.
+SIMD_ATTR void DequantDC(const Image3S& img_dc16, const Rect& rect,
+                         const float* mul_dc, const float ytox_dc,
+                         const float ytob_dc,
+                         FrameDecCache* PIK_RESTRICT frame_dec_cache,
+                         PikInfo* aux_out) {
+  PIK_ASSERT(SameSize(img_dc16, rect));
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+
+  using D = SIMD_FULL(float);
+  constexpr D d;
+  constexpr SIMD_PART(int16_t, D::N) d16;
+  constexpr SIMD_PART(int32_t, D::N) d32;
+
+  const auto dequant_y = set1(d, mul_dc[1]);
+
+  for (size_t by = 0; by < ysize; ++by) {
+    const int16_t* PIK_RESTRICT row_y16 = img_dc16.ConstPlaneRow(1, by);
+    float* PIK_RESTRICT row_y = rect.PlaneRow(&frame_dec_cache->dc, 1, by);
+
+    for (size_t bx = 0; bx < xsize; bx += d.N) {
+      const auto quantized_y16 = load(d16, row_y16 + bx);
+      const auto quantized_y = convert_to(d, convert_to(d32, quantized_y16));
+      const auto dequantized_y = quantized_y * dequant_y;
+      store(dequantized_y, d, row_y + bx);
+    }
+  }
+
+  for (int c = 0; c < 3; c += 2) {  // === for c in {0, 2}
+    const auto y_mul = set1(d, (c == 0) ? ytox_dc : ytob_dc);
+    const auto xb_mul = set1(d, mul_dc[c]);
+    for (size_t by = 0; by < ysize; ++by) {
+      const int16_t* PIK_RESTRICT row_xb16 = img_dc16.ConstPlaneRow(c, by);
+      const float* PIK_RESTRICT row_y =
+          rect.ConstPlaneRow(frame_dec_cache->dc, 1, by);
+      float* PIK_RESTRICT row_xb = rect.PlaneRow(&frame_dec_cache->dc, c, by);
+
+      for (size_t bx = 0; bx < xsize; bx += d.N) {
+        const auto quantized_xb16 = load(d16, row_xb16 + bx);
+        const auto quantized_xb =
+            convert_to(d, convert_to(d32, quantized_xb16));
+
+        const auto out_y = load(d, row_y + bx);
+        const auto dequant_xb = quantized_xb * xb_mul;
+        const auto out_xb = mul_add(y_mul, out_y, dequant_xb);
+        store(out_xb, d, row_xb + bx);
+      }
+    }
+  }
+}
+
+// `rect`: block units
+std::string CompressDCGroup(const Image3S& dc, const Rect& rect,
+                            const AcStrategyImage& ac_strategy,
+                            const ImageI& quant_field,
+                            const ImageB& ar_sigma_lut_ids, bool use_new_dc,
+                            bool grayscale, MultipassManager* manager,
+                            PikImageSizeInfo* dc_info,
+                            PikImageSizeInfo* cfield_info) {
+  std::string dc_code;
+  if (use_new_dc) {
+    PaddedBytes enc_dc;
+    Image3SCompress(dc, rect, grayscale, &enc_dc);
+    dc_code.assign(enc_dc.data(), enc_dc.data() + enc_dc.size());
+  } else {
+    Image3S tmp_dc_residuals(rect.xsize(), rect.ysize());
+    ShrinkDC(rect, dc, &tmp_dc_residuals);
+    dc_code =
+        EncodeImageData(Rect(tmp_dc_residuals), tmp_dc_residuals, dc_info);
+  }
+  std::vector<std::vector<Token>> control_fields_tokens(1);
+
+  TokenizeAcStrategy(rect, ac_strategy, manager->HintAcStrategy(),
+                     &control_fields_tokens[0]);
+
+  TokenizeQuantField(rect, quant_field, manager->HintQuantField(), ac_strategy,
+                     &control_fields_tokens[0]);
+
+  // TODO(veluca): tokenize quantization control field.
+
+  TokenizeARParameters(rect, ar_sigma_lut_ids, ac_strategy,
+                       &control_fields_tokens[0]);
+
+  std::vector<uint8_t> context_map;
+  std::vector<ANSEncodingData> codes;
+  std::string histo_code =
+      BuildAndEncodeHistograms(kNumControlFieldContexts, control_fields_tokens,
+                               &codes, &context_map, cfield_info);
+  std::string control_fields_code =
+      WriteTokens(control_fields_tokens[0], codes, context_map, cfield_info);
+
+  return dc_code + histo_code + control_fields_code;
+}
+
+// `rect`: block units.
+Status DecodeDCGroup(BitReader* reader, const PaddedBytes& compressed,
+                     const Rect& rect, bool use_new_dc, bool grayscale,
+                     const float* mul_dc, const float ytox_dc,
+                     const float ytob_dc, MultipassManager* manager,
+                     FrameDecCache* frame_dec_cache,
+                     GroupDecCache* group_dec_cache, PikInfo* aux_out) {
+  group_dec_cache->InitDecodeDCGroup(rect.xsize(), rect.ysize());
+
+  if (use_new_dc) {
+    PIK_ASSERT(SameSize(rect, group_dec_cache->quantized_dc));
+    size_t dc_pos = reader->Position();
+    if (!Image3SDecompress(compressed, grayscale, &dc_pos,
+                           &group_dec_cache->quantized_dc)) {
+      return PIK_FAILURE("Failed to decode DC");
+    }
+  } else {
+    PIK_ASSERT(SameSize(rect, group_dec_cache->dc_y));
+    PIK_ASSERT(SameSize(group_dec_cache->dc_xz_residuals,
+                        group_dec_cache->dc_xz_expanded));
+    if (!DecodeImage(reader, Rect(group_dec_cache->quantized_dc),
+                     &group_dec_cache->quantized_dc)) {
+      return PIK_FAILURE("Failed to decode DC image");
+    }
+
+    ExpandDC(Rect(group_dec_cache->quantized_dc),
+             &group_dec_cache->quantized_dc, &group_dec_cache->dc_y,
+             &group_dec_cache->dc_xz_residuals,
+             &group_dec_cache->dc_xz_expanded);
+  }
+  PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+  ANSCode code;
+  std::vector<uint8_t> context_map;
+  PIK_RETURN_IF_ERROR(DecodeHistograms(reader, kNumControlFieldContexts, 256,
+                                       &code, &context_map));
+  PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+  ANSSymbolReader control_fields_decoder(&code);
+  ANSSymbolReader strategy_decoder(&code);
+  if (!DecodeAcStrategy(reader, &control_fields_decoder, context_map,
+                        &group_dec_cache->ac_strategy_raw, rect,
+                        &frame_dec_cache->ac_strategy,
+                        manager->HintAcStrategy())) {
+    return PIK_FAILURE("Failed to decode AcStrategy.");
+  }
+
+  if (!DecodeQuantField(reader, &control_fields_decoder, context_map, rect,
+                        frame_dec_cache->ac_strategy,
+                        &frame_dec_cache->raw_quant_field,
+                        manager->HintQuantField())) {
+    return PIK_FAILURE("Failed to decode QuantField.");
+  }
+
+  // TODO(veluca): decode quantization control field.
+
+  if (!DecodeARParameters(reader, &control_fields_decoder, context_map, rect,
+                          frame_dec_cache->ac_strategy,
+                          &frame_dec_cache->ar_sigma_lut_ids)) {
+    return PIK_FAILURE("Failed to decode ARParameters.");
+  }
+
+  if (!control_fields_decoder.CheckANSFinalState()) {
+    return PIK_FAILURE("QuantField: ANS checksum failure.");
+  }
+
+  PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+  DequantDC(group_dec_cache->quantized_dc, rect, mul_dc, ytox_dc, ytob_dc,
+            frame_dec_cache, aux_out);
+  return true;
+}
+
+// TODO(veluca): is this the right constant?
+using DCGroupSizeCoder = SizeCoderT<0x150F0E0C>;
+
+}  // namespace
+
+PaddedBytes EncodeDCGroups(const Quantizer& quantizer,
+                           const FrameEncCache& frame_enc_cache,
+                           const AcStrategyImage& ac_strategy,
+                           MultipassManager* manager, PikImageSizeInfo* dc_info,
+                           PikImageSizeInfo* cfield_info) {
+  PaddedBytes out;
+
+  static_assert(kDcGroupDimInBlocks % kGroupDimInBlocks == 0,
+                "DC group size must be a multiple of AC group size!");
+
+  const size_t xsize_blocks = frame_enc_cache.dc.xsize();
+  const size_t ysize_blocks = frame_enc_cache.dc.ysize();
+  const size_t xsize_groups =
+      DivCeil(frame_enc_cache.dc.xsize(), kDcGroupDimInBlocks);
+  const size_t ysize_groups =
+      DivCeil(frame_enc_cache.dc.ysize(), kDcGroupDimInBlocks);
+
+  const size_t num_groups = xsize_groups * ysize_groups;
+
+  std::vector<PikImageSizeInfo> size_info(num_groups);
+  std::vector<PikImageSizeInfo> cfields_size_info(num_groups);
+
+  std::vector<PaddedBytes> group_codes(num_groups);
+  for (int group_index = 0; group_index < num_groups; ++group_index) {
+    size_t group_pos = 0;
+    const size_t gx = group_index % xsize_groups;
+    const size_t gy = group_index / xsize_groups;
+    const Rect rect(gx * kDcGroupDimInBlocks, gy * kDcGroupDimInBlocks,
+                    kDcGroupDimInBlocks, kDcGroupDimInBlocks, xsize_blocks,
+                    ysize_blocks);
+    std::string group_code = CompressDCGroup(
+        frame_enc_cache.dc, rect, ac_strategy, quantizer.RawQuantField(),
+        frame_enc_cache.ar_sigma_lut_ids, frame_enc_cache.use_new_dc,
+        frame_enc_cache.grayscale_opt, manager, &size_info[group_index],
+        &cfields_size_info[group_index]);
+    group_codes[group_index].resize(group_code.size());
+    Append(group_code, &group_codes[group_index], &group_pos);
+  };
+
+  for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+    if (dc_info != nullptr) {
+      dc_info->Assimilate(size_info[group_index]);
+    }
+    if (cfield_info != nullptr) {
+      cfield_info->Assimilate(cfields_size_info[group_index]);
+    }
+  }
+
+  // Build TOC.
+  PaddedBytes group_toc(DCGroupSizeCoder::MaxSize(num_groups));
+  size_t group_toc_pos = 0;
+  uint8_t* group_toc_storage = group_toc.data();
+  size_t total_groups_size = 0;
+  for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+    size_t group_size = group_codes[group_index].size();
+    DCGroupSizeCoder::Encode(group_size, &group_toc_pos, group_toc_storage);
+    total_groups_size += group_size;
+  }
+  WriteZeroesToByteBoundary(&group_toc_pos, group_toc_storage);
+  group_toc.resize(group_toc_pos / kBitsPerByte);
+
+  PaddedBytes serialized_gradient_map;
+  if (frame_enc_cache.use_gradient) {
+    SerializeGradientMap(frame_enc_cache.gradient, Rect(frame_enc_cache.dc),
+                         quantizer, &serialized_gradient_map);
+  }
+
+  // Push output.
+  size_t pos = 0;
+  out.reserve(group_toc.size() + total_groups_size +
+              serialized_gradient_map.size());
+  out.append(group_toc);
+  pos += group_toc.size() * kBitsPerByte;
+  for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+    const PaddedBytes& group_code = group_codes[group_index];
+    out.append(group_code);
+    pos += group_code.size() * kBitsPerByte;
+  }
+  out.append(serialized_gradient_map);
+
+  return out;
+}
+
+Status DecodeDCGroups(BitReader* reader, const PaddedBytes& compressed,
+                      const FrameHeader& frame_header, size_t xsize_blocks,
+                      size_t ysize_blocks, const Quantizer& quantizer,
+                      const ColorCorrelationMap& cmap, ThreadPool* pool,
+                      MultipassManager* manager,
+                      FrameDecCache* PIK_RESTRICT frame_dec_cache,
+                      std::vector<GroupDecCache>* group_dec_caches,
+                      PikInfo* aux_out) {
+  float mul_dc[3];
+  for (int c = 0; c < 3; ++c) {
+    mul_dc[c] = quantizer.DequantMatrix(0, kQuantKindDCT8, c)[0] *
+                quantizer.inv_quant_dc();
+  }
+
+  frame_dec_cache->dc = Image3F(xsize_blocks, ysize_blocks);
+
+  // Precompute DC inverse color transform.
+  float ytox_dc = ColorCorrelationMap::YtoX(1.0f, cmap.ytox_dc);
+  float ytob_dc = ColorCorrelationMap::YtoB(1.0f, cmap.ytob_dc);
+
+  const size_t xsize_groups = DivCeil(xsize_blocks, kDcGroupDimInBlocks);
+  const size_t ysize_groups = DivCeil(ysize_blocks, kDcGroupDimInBlocks);
+  const size_t num_groups = xsize_groups * ysize_groups;
+
+  // Read TOC.
+  std::vector<size_t> group_offsets;
+  {
+    group_offsets.reserve(num_groups + 1);
+    group_offsets.push_back(0);
+    for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+      const uint32_t size = DCGroupSizeCoder::Decode(reader);
+      group_offsets.push_back(group_offsets.back() + size);
+    }
+    PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+  }
+
+  // Pretend all groups are read.
+  size_t group_codes_begin = reader->Position();
+  reader->SkipBits(group_offsets.back() * kBitsPerByte);
+  if (reader->Position() > compressed.size()) {
+    return PIK_FAILURE("Group code extends after stream end");
+  }
+
+  // Decode groups.
+  std::atomic<int> num_errors{0};
+  std::vector<PikInfo> aux_outs(aux_out ? NumThreads(pool) : 0);
+  const auto process_group = [&](const int group_index, const int thread) {
+    size_t group_code_offset = group_offsets[group_index];
+    size_t group_reader_limit = group_offsets[group_index + 1];
+    // TODO(user): this looks ugly; we should get rid of PaddedBytes parameter
+    //               once it is wrapped into BitReader; otherwise it is easy to
+    //               screw the things up.
+    BitReader group_reader(compressed.data(),
+                           group_codes_begin + group_reader_limit);
+    group_reader.SkipBits((group_codes_begin + group_code_offset) *
+                          kBitsPerByte);
+    const size_t gx = group_index % xsize_groups;
+    const size_t gy = group_index / xsize_groups;
+    const Rect rect(gx * kDcGroupDimInBlocks, gy * kDcGroupDimInBlocks,
+                    kDcGroupDimInBlocks, kDcGroupDimInBlocks, xsize_blocks,
+                    ysize_blocks);
+    GroupDecCache* group_dec_cache = group_dec_caches->data() + thread;
+    PikInfo* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+    if (!DecodeDCGroup(&group_reader, compressed, rect,
+                       frame_dec_cache->use_new_dc, frame_dec_cache->grayscale,
+                       mul_dc, ytox_dc, ytob_dc, manager, frame_dec_cache,
+                       group_dec_cache, my_aux_out)) {
+      num_errors.fetch_add(1, std::memory_order_relaxed);
+      return;
+    }
+  };
+  RunOnPool(pool, 0, num_groups, process_group, "DecodeDCGroup");
+  PIK_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+  if (aux_out != nullptr) {
+    for (size_t thread = 0; thread < NumThreads(pool); ++thread) {
+      aux_out->Assimilate(aux_outs[thread]);
+    }
+  }
+
+  if (frame_header.flags & FrameHeader::kGradientMap) {
+    size_t byte_pos = reader->Position();
+    PIK_RETURN_IF_ERROR(DeserializeGradientMap(
+        xsize_blocks, ysize_blocks,
+        frame_header.flags & FrameHeader::kGrayscaleOpt, quantizer, compressed,
+        &byte_pos, &frame_dec_cache->gradient));
+    reader->SkipBits((byte_pos - reader->Position()) * 8);
+    ApplyGradientMap(frame_dec_cache->gradient, quantizer,
+                     &frame_dec_cache->dc);
+  } else {
+    AdaptiveDCReconstruction(frame_dec_cache->dc, quantizer);
+  }
+  return true;
+}
+
+void InitializeDecCache(const FrameDecCache& frame_dec_cache, const Rect& rect,
+                        GroupDecCache* PIK_RESTRICT group_dec_cache) {
+  const size_t full_xsize_blocks = frame_dec_cache.dc.xsize();
+  const size_t full_ysize_blocks = frame_dec_cache.dc.ysize();
+  const size_t x0_blocks = rect.x0() / kBlockDim;
+  const size_t y0_blocks = rect.y0() / kBlockDim;
+  const size_t xsize_blocks = rect.xsize() / kBlockDim;
+  const size_t ysize_blocks = rect.ysize() / kBlockDim;
+
+  group_dec_cache->InitOnce(xsize_blocks, ysize_blocks);
+
+  // TODO(veluca): avoid this copy.
+  for (size_t c = 0; c < 3; c++) {
+    PIK_ASSERT(xsize_blocks <= group_dec_cache->dc.xsize());
+    PIK_ASSERT(ysize_blocks <= group_dec_cache->dc.ysize());
+    for (size_t y = 0; y < ysize_blocks + 2; y++) {
+      const size_t y_src = SourceCoord(y + y0_blocks, full_ysize_blocks);
+      const float* PIK_RESTRICT row_src =
+          frame_dec_cache.dc.ConstPlaneRow(c, y_src);
+      float* PIK_RESTRICT row_dc = group_dec_cache->dc.PlaneRow(c, y);
+      for (size_t x = 0; x < xsize_blocks + 2; x++) {
+        const size_t x_src = SourceCoord(x + x0_blocks, full_xsize_blocks);
+        row_dc[x] = row_src[x_src];
+      }
+    }
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/compressed_dc.h b/codec/L2/demos/pikEnc/host/pik/compressed_dc.h
new file mode 100755
index 0000000000..fe8ff51fff
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/compressed_dc.h
@@ -0,0 +1,58 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_COMPRESSED_DC_H_
+#define PIK_COMPRESSED_DC_H_
+
+#include <vector>
+#include "pik/color_correlation.h"
+#include "pik/compressed_image_fwd.h"
+#include "pik/data_parallel.h"
+#include "pik/headers.h"
+#include "pik/multipass_handler.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_info.h"
+#include "pik/quantizer.h"
+
+// DC handling functions: encoding and decoding of DC to and from bitstream, and
+// related function to initialize the per-group decoder cache.
+
+namespace pik {
+
+// Encodes the DC-related information from frame_enc_cache: quantized dc itself
+// and gradient map.
+PaddedBytes EncodeDCGroups(const Quantizer& quantizer,
+                           const FrameEncCache& frame_enc_cache,
+                           const AcStrategyImage& ac_strategy,
+                           MultipassManager* manager, PikImageSizeInfo* dc_info,
+                           PikImageSizeInfo* cfield_info);
+
+// Decodes and dequantizes DC, and optionally decodes and applies the
+// gradient map if requested.
+Status DecodeDCGroups(BitReader* reader, const PaddedBytes& compressed,
+                      const FrameHeader& frame_header, size_t xsize_blocks,
+                      size_t ysize_blocks, const Quantizer& quantizer,
+                      const ColorCorrelationMap& cmap, ThreadPool* pool,
+                      MultipassManager* manager,
+                      FrameDecCache* PIK_RESTRICT frame_dec_cache,
+                      std::vector<GroupDecCache>* group_dec_caches,
+                      PikInfo* aux_out);
+
+// Clamps the input coordinate `candidate` to the [0, size) interval, using 1 px
+// of border (extended by cloning, not mirroring).
+PIK_INLINE size_t SourceCoord(size_t candidate, size_t size) {
+  return candidate == 0 ? 0
+                        : (candidate == size + 1 ? size - 1 : candidate - 1);
+}
+
+// Initializes the dec_cache for decoding the `rect` part of the image (in pixel
+// units) from the pass decoder cache.
+void InitializeDecCache(const FrameDecCache& frame_dec_cache, const Rect& rect,
+                        GroupDecCache* PIK_RESTRICT group_dec_cache);
+
+}  // namespace pik
+
+#endif  // PIK_COMPRESSED_DC_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/compressed_image.cc b/codec/L2/demos/pikEnc/host/pik/compressed_image.cc
new file mode 100755
index 0000000000..e514d7bcb8
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/compressed_image.cc
@@ -0,0 +1,1613 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/compressed_image.h"
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/ac_predictions.h"
+#include "pik/ac_strategy.h"
+#include "pik/adaptive_reconstruction.h"
+#include "pik/ans_decode.h"
+#include "pik/bilinear_transform.h"
+#include "pik/block.h"
+#include "pik/butteraugli_distance.h"
+#include "pik/color_correlation.h"
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/compressed_dc.h"
+#include "pik/compressed_image_fwd.h"
+#include "pik/convolve.h"
+#include "pik/dc_predictor.h"
+#include "pik/dct.h"
+#include "pik/dct_util.h"
+#include "pik/deconvolve.h"
+#include "pik/entropy_coder.h"
+#include "pik/epf.h"
+#include "pik/fields.h"
+#include "pik/gaborish.h"
+#include "pik/gauss_blur.h"
+#include "pik/gradient_map.h"
+#include "pik/headers.h"
+#include "pik/huffman_decode.h"
+#include "pik/huffman_encode.h"
+#include "pik/image.h"
+#include "pik/image_ops.h"
+#include "pik/lossless16.h"
+#include "pik/lossless8.h"
+#include "pik/opsin_image.h"
+#include "pik/opsin_inverse.h"
+#include "pik/opsin_params.h"
+#include "pik/pik_params.h"
+#include "pik/profiler.h"
+#include "pik/quantizer.h"
+#include "pik/resample.h"
+#include "pik/resize.h"
+#include "pik/simd/simd.h"
+#include "pik/status.h"
+#include "pik/upscaler.h"
+
+#define USE_K2_DATA true
+
+namespace pik {
+
+namespace {
+
+void ZeroDcValues(Image3F *image, const AcStrategyImage &ac_strategy) {
+  const constexpr size_t N = kBlockDim;
+  const size_t xsize_blocks = image->xsize() / (N * N);
+  const size_t ysize_blocks = image->ysize();
+  for (size_t c = 0; c < image->kNumPlanes; c++) {
+    for (size_t by = 0; by < ysize_blocks; by++) {
+      AcStrategyRow acs_row = ac_strategy.ConstRow(by);
+      float *PIK_RESTRICT stored_values = image->PlaneRow(c, by);
+      for (size_t bx = 0; bx < xsize_blocks; bx++) {
+        AcStrategy acs = acs_row[bx];
+        if (!acs.IsFirstBlock())
+          continue;
+        for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+          for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+            stored_values[bx * N * N + y * acs.covered_blocks_x() * N + x] = 0;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace
+
+constexpr float kIdentityAvgParam = 0.25;
+
+// This struct allow to remove the X and B channels of XYB images, and
+// reconstruct them again from only the Y channel, when the image is grayscale.
+struct GrayXyb {
+  static const constexpr int kM = 16; // Amount of line pieces.
+
+  GrayXyb() { Compute(); }
+
+  void YToXyb(float y, float *x, float *b) const {
+    int i = (int)((y - ysub) * ymul * kM);
+    i = std::min(std::max(0, i), kM - 1);
+    *x = y * y_to_x_slope[i] + y_to_x_constant[i];
+    *b = y * y_to_b_slope[i] + y_to_b_constant[i];
+  }
+
+  void RemoveXB(Image3F *image) const {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      float *PIK_RESTRICT row_x = image->PlaneRow(0, y);
+      float *PIK_RESTRICT row_b = image->PlaneRow(2, y);
+      for (size_t x = 0; x < image->xsize(); x++) {
+        row_x[x] = 0;
+        row_b[x] = 0;
+      }
+    }
+  }
+
+  void RestoreXB(Image3F *image) const {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      const float *PIK_RESTRICT row_y = image->PlaneRow(1, y);
+      float *PIK_RESTRICT row_x = image->PlaneRow(0, y);
+      float *PIK_RESTRICT row_b = image->PlaneRow(2, y);
+      for (size_t x = 0; x < image->xsize(); x++) {
+        YToXyb(row_y[x], &row_x[x], &row_b[x]);
+      }
+    }
+  }
+
+private:
+  void Compute() {
+    static const int kN = 1024;
+    std::vector<float> x(kN);
+    std::vector<float> y(kN);
+    std::vector<float> z(kN);
+    for (int i = 0; i < kN; i++) {
+      float gray = (float)(256.0f * i / kN);
+      LinearToXyb(gray, gray, gray, &x[i], &y[i], &z[i]);
+    }
+
+    float min = y[0];
+    float max = y[kN - 1];
+    int m = 0;
+    int border[kM + 1];
+    for (int i = 0; i < kN; i++) {
+      if (y[i] >= y[0] + (max - min) * m / kM) {
+        border[m] = i;
+        m++;
+      }
+    }
+    border[kM] = kN;
+
+    ysub = min;
+    ymul = 1.0 / (max - min);
+
+    for (int i = 0; i < kM; i++) {
+      LinearRegression(y.data() + border[i], x.data() + border[i],
+                       border[i + 1] - border[i], &y_to_x_constant[i],
+                       &y_to_x_slope[i]);
+      LinearRegression(y.data() + border[i], z.data() + border[i],
+                       border[i + 1] - border[i], &y_to_b_constant[i],
+                       &y_to_b_slope[i]);
+    }
+  }
+
+  // finds a and b such that y ~= b*x + a
+  void LinearRegression(const float *x, const float *y, size_t size, double *a,
+                        double *b) {
+    double mx = 0, my = 0;   // mean
+    double mx2 = 0, my2 = 0; // second moment
+    double mxy = 0;
+    for (size_t i = 0; i < size; i++) {
+      double inv = 1.0 / (i + 1);
+
+      double dx = x[i] - mx;
+      double xn = dx * inv;
+      mx += xn;
+      mx2 += dx * xn * i;
+
+      double dy = y[i] - my;
+      double yn = dy * inv;
+      my += yn;
+      my2 += dy * yn * i;
+
+      mxy += i * xn * yn - mxy * inv;
+    }
+
+    double sx = std::sqrt(mx2 / (size - 1));
+    double sy = std::sqrt(my2 / (size - 1));
+
+    double sumxy = mxy * size + my * mx * size;
+    double r = (sumxy - size * mx * my) / ((size - 1.0) * sx * sy);
+
+    *b = r * sy / sx;
+    *a = my - *b * mx;
+  }
+
+  double y_to_x_slope[kM];
+  double y_to_x_constant[kM];
+  double y_to_b_slope[kM];
+  double y_to_b_constant[kM];
+
+  double ysub;
+  double ymul;
+};
+
+// Gets the singleton GrayXyb instance.
+static const GrayXyb *GetGrayXyb() {
+  static const GrayXyb *kGrayXyb = new GrayXyb;
+  return kGrayXyb;
+}
+
+SIMD_ATTR void InitializeFrameEncCache(
+    const FrameHeader &frame_header, const Image3F &opsin_full,
+    const AcStrategyImage &ac_strategy, const Quantizer &quantizer,
+    const ColorCorrelationMap &cmap, const BlockDictionary &dictionary,
+    FrameEncCache *frame_enc_cache, PikInfo *aux_out) {
+  PROFILER_FUNC;
+  constexpr size_t N = kBlockDim;
+  frame_enc_cache->ac_strategy = ac_strategy.Copy();
+  frame_enc_cache->use_gradient =
+      frame_header.flags & FrameHeader::kGradientMap;
+  frame_enc_cache->grayscale_opt =
+      frame_header.flags & FrameHeader::kGrayscaleOpt;
+  const size_t xsize_blocks = opsin_full.xsize() / N;
+  const size_t ysize_blocks = opsin_full.ysize() / N;
+
+  Image3F opsin = CopyImage(opsin_full);
+  dictionary.SubtractFrom(&opsin);
+
+  ApplyReverseBilinear(&opsin);
+
+  frame_enc_cache->coeffs = Image3F(xsize_blocks * kDCTBlockSize, ysize_blocks);
+  Image3F dc = Image3F(xsize_blocks, ysize_blocks);
+
+  for (int by = 0; by < ysize_blocks; ++by) {
+    for (int c = 0; c < 3; ++c) {
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        AcStrategy acs = ac_strategy.ConstRow(by)[bx];
+        acs.TransformFromPixels(
+            opsin.ConstPlaneRow(c, by * N) + bx * N, opsin.PixelsPerRow(),
+            frame_enc_cache->coeffs.PlaneRow(c, by) + bx * kDCTBlockSize,
+            frame_enc_cache->coeffs.PixelsPerRow());
+      }
+    }
+  }
+
+  /*
+   for (int c = 0; c < 3; ++c) {
+     for (int by = 0; by < ysize_blocks; ++by) {
+       for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+           float* tmp=frame_enc_cache->coeffs.PlaneRow(c, by) + bx*64;
+           for (int k=0;k<64;k++)
+           std::cout<<"std_dct: c="<<c<<" by="<<by<<" bx="<<bx<<" k="<<k<<"
+   "<<tmp[k]<<std::endl;
+       }
+     }
+   }
+   */
+
+  for (int by = 0; by < ysize_blocks; ++by) {
+    for (int c = 0; c < 3; ++c) {
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        AcStrategy acs = ac_strategy.ConstRow(by)[bx];
+        acs.DCFromLowestFrequencies(
+            frame_enc_cache->coeffs.ConstPlaneRow(c, by) + bx * kDCTBlockSize,
+            frame_enc_cache->coeffs.PixelsPerRow(), dc.PlaneRow(c, by) + bx,
+            dc.PixelsPerRow());
+      }
+    }
+  }
+
+  if (aux_out != nullptr) {
+    aux_out->InspectImage3F("compressed_image:InitializeFrameEncCache:dc", dc);
+  }
+
+  constexpr int cY = 1; // Y color channel.
+
+  
+  std::cout<<"std_dc_float:"<<std::endl;
+  for(int c=0;c<3;c++){
+          for(int y=0;y<ysize_blocks;y++){
+                  for(int x=0;x<xsize_blocks;x++){
+                          const float* PIK_RESTRICT row=dc.ConstPlaneRow(c, y);
+                          std::cout<<row[x]<<",";
+                  }
+          std::cout<<std::endl;
+          }
+      std::cout<<std::endl;
+  }
+  
+  {
+    ImageF dec_dc_Y = QuantizeRoundtripDC(quantizer, cY, dc.Plane(cY));
+
+    ApplyColorCorrelationDC</*decode=*/false>(cmap, dec_dc_Y, &dc);
+
+    
+    std::cout<<"std_dc_apply_false:";
+      for(int c=0;c<3;c++){
+              for(int y=0;y<ysize_blocks;y++){
+                      for(int x=0;x<xsize_blocks;x++){
+                              const float* PIK_RESTRICT row=dc.ConstPlaneRow(c,
+    y);
+                              std::cout<<row[x]<<",";
+                      }
+              std::cout<<std::endl;
+              }
+          std::cout<<std::endl;
+      }
+    
+    frame_enc_cache->dc = QuantizeCoeffsDC(dc, quantizer);
+
+    std::cout << "std_dc_x:";
+      for (size_t y = 0; y < frame_enc_cache->dc.ysize(); y++) {
+        const int16_t *PIK_RESTRICT row_in =
+            frame_enc_cache->dc.ConstPlaneRow(0, y);
+        for (size_t x = 0; x < frame_enc_cache->dc.xsize(); x++) {
+          std::cout << row_in[x] << ",";
+        }
+        std::cout << std::endl;
+      }
+
+      std::cout << "std_dc_y:";
+      for (size_t y = 0; y < frame_enc_cache->dc.ysize(); y++) {
+        const int16_t *PIK_RESTRICT row_in =
+            frame_enc_cache->dc.ConstPlaneRow(1, y);
+        for (size_t x = 0; x < frame_enc_cache->dc.xsize(); x++) {
+          std::cout << row_in[x] << ",";
+        }
+        std::cout << std::endl;
+      }
+
+      std::cout << "std_dc_b:";
+      for (size_t y = 0; y < frame_enc_cache->dc.ysize(); y++) {
+        const int16_t *PIK_RESTRICT row_in =
+            frame_enc_cache->dc.ConstPlaneRow(2, y);
+        for (size_t x = 0; x < frame_enc_cache->dc.xsize(); x++) {
+          std::cout << row_in[x] << ",";
+        }
+        std::cout << std::endl;
+      }
+
+    frame_enc_cache->dc_dec =
+        Image3F(frame_enc_cache->dc.xsize(), frame_enc_cache->dc.ysize());
+
+    for (size_t c = 0; c < 3; c++) {
+      const float mul = quantizer.DequantMatrix(0, kQuantKindDCT8, c)[0] *
+                        quantizer.inv_quant_dc();
+      for (size_t y = 0; y < frame_enc_cache->dc.ysize(); y++) {
+        const int16_t *PIK_RESTRICT row_in =
+            frame_enc_cache->dc.ConstPlaneRow(c, y);
+        float *PIK_RESTRICT row_out = frame_enc_cache->dc_dec.PlaneRow(c, y);
+        for (size_t x = 0; x < frame_enc_cache->dc.xsize(); x++) {
+          row_out[x] = row_in[x] * mul;
+        }
+      }
+    }
+
+    // std::cout<<"mul_x="<<quantizer.DequantMatrix(0, kQuantKindDCT8, 0)[0] *
+    // quantizer.inv_quant_dc()<<std::endl;
+    // std::cout<<"mul_y="<<quantizer.DequantMatrix(0, kQuantKindDCT8, 1)[0] *
+    // quantizer.inv_quant_dc()<<std::endl;
+    // std::cout<<"mul_b="<<quantizer.DequantMatrix(0, kQuantKindDCT8, 2)[0] *
+    // quantizer.inv_quant_dc()<<std::endl;
+
+    ApplyColorCorrelationDC</*decode=*/true>(cmap, dec_dc_Y,
+                                             &frame_enc_cache->dc_dec);
+
+    std::cout<<"std_dc_apply_true:";
+      for(int c=0;c<3;c++){
+              for(int y=0;y<ysize_blocks;y++){
+                      for(int x=0;x<xsize_blocks;x++){
+                              const float* PIK_RESTRICT row=frame_enc_cache->dc_dec.PlaneRow(c, y);
+                              std::cout<<row[x]<<",";
+                      }
+              std::cout<<std::endl;
+              }
+          std::cout<<std::endl;
+      }
+
+    AdaptiveDCReconstruction(frame_enc_cache->dc_dec, quantizer);
+
+    std::cout<<"std_AdaptiveDCReconstruction:";
+      for(int c=0;c<3;c++){
+              for(int y=0;y<ysize_blocks;y++){
+                      for(int x=0;x<xsize_blocks;x++){
+                              const float* PIK_RESTRICT row=frame_enc_cache->dc_dec.PlaneRow(c, y);
+                              std::cout<<row[x]<<",";
+                      }
+              }
+      }
+  }
+  if (aux_out != nullptr) {
+    aux_out->InspectImage3F("compressed_image:InitializeFrameEncCache:dc_dec",
+                            frame_enc_cache->dc_dec);
+  }
+}
+
+SIMD_ATTR void InitializeEncCache(const FrameHeader &frame_header,
+                                  const GroupHeader &group_header,
+                                  const FrameEncCache &frame_enc_cache,
+                                  const Rect &group_rect, EncCache *enc_cache) {
+  PROFILER_FUNC;
+  constexpr size_t N = kBlockDim;
+  PIK_ASSERT(!enc_cache->initialized);
+
+  const size_t full_xsize_blocks = frame_enc_cache.dc_dec.xsize();
+  const size_t full_ysize_blocks = frame_enc_cache.dc_dec.ysize();
+  const size_t x0_blocks = group_rect.x0() / N;
+  const size_t y0_blocks = group_rect.y0() / N;
+
+  enc_cache->xsize_blocks = group_rect.xsize() / N;
+  enc_cache->ysize_blocks = group_rect.ysize() / N;
+  enc_cache->predict_lf = false; // frame_header.predict_lf;
+  enc_cache->predict_hf = false; // frame_header.predict_hf;
+  enc_cache->grayscale_opt = frame_enc_cache.grayscale_opt;
+
+  enc_cache->dc_dec =
+      Image3F(enc_cache->xsize_blocks + 2, enc_cache->ysize_blocks + 2);
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t y = 0; y < enc_cache->ysize_blocks + 2; y++) {
+      const size_t y_src = SourceCoord(y + y0_blocks, full_ysize_blocks);
+      const float *row_src = frame_enc_cache.dc_dec.ConstPlaneRow(c, y_src);
+      float *row_dc = enc_cache->dc_dec.PlaneRow(c, y);
+      for (size_t x = 0; x < enc_cache->xsize_blocks + 2; x++) {
+        const size_t x_src = SourceCoord(x + x0_blocks, full_xsize_blocks);
+        row_dc[x] = row_src[x_src];
+      }
+    }
+  }
+
+  std::cout << "std_frame_dc_dec:"<<std::endl; 
+  for (size_t by = 0; by < enc_cache->ysize_blocks; ++by) {
+    const float *PIK_RESTRICT row_x = frame_enc_cache.dc_dec.ConstPlaneRow(0, by);
+    for (size_t bx = 0; bx < enc_cache->xsize_blocks; ++bx) {
+          std::cout << row_x[bx] << ",";
+    }
+    std::cout << std::endl;
+  }
+
+  std::cout << "std_enc_dc_dec:"<<std::endl; 
+  for (size_t by = 0; by < enc_cache->ysize_blocks + 2; ++by) {
+    const float *PIK_RESTRICT row_x = enc_cache->dc_dec.PlaneRow(0, by);
+    for (size_t bx = 0; bx < enc_cache->xsize_blocks + 2; ++bx) {
+          std::cout << row_x[bx] << ",";
+    }
+    std::cout << std::endl;
+  }
+
+  for (size_t by = 0; by < enc_cache->ysize_blocks; ++by) {
+    const float *PIK_RESTRICT row_x = frame_enc_cache.dc_dec.ConstPlaneRow(0, by);
+    const float *PIK_RESTRICT row_y = frame_enc_cache.dc_dec.ConstPlaneRow(1, by);
+    const float *PIK_RESTRICT row_b = frame_enc_cache.dc_dec.ConstPlaneRow(2, by);
+    /*
+    for (size_t bx = 0; bx < enc_cache->xsize_blocks; ++bx) {
+          std::cout << "frame_dc_dec by=" << by << " bx=" << bx
+                    << " inx=" << row_x[bx]
+                    << " iny=" << row_y[bx]
+                    << " inb=" << row_b[bx] << std::endl;
+
+    }
+    */
+  }
+
+  const Rect coeff_rect(x0_blocks * kDCTBlockSize, y0_blocks,
+                        enc_cache->xsize_blocks * kDCTBlockSize,
+                        enc_cache->ysize_blocks);
+
+  enc_cache->coeffs = CopyImage(coeff_rect, frame_enc_cache.coeffs);
+
+  enc_cache->initialized = true;
+
+  enc_cache->ac_strategy = frame_enc_cache.ac_strategy.Copy(Rect(
+      x0_blocks, y0_blocks, enc_cache->xsize_blocks, enc_cache->ysize_blocks));
+}
+
+SIMD_ATTR void ComputeCoefficients(const Quantizer &quantizer,
+                                   const ColorCorrelationMap &cmap,
+                                   const Rect &cmap_rect,
+                                   const FrameEncCache &frame_enc_cache,
+                                   EncCache *enc_cache, PikInfo *aux_out) {
+  PROFILER_FUNC;
+  const size_t xsize_blocks = enc_cache->xsize_blocks;
+  const size_t ysize_blocks = enc_cache->ysize_blocks;
+  PIK_ASSERT(enc_cache->initialized);
+
+  enc_cache->quant_field = CopyImage(quantizer.RawQuantField());
+  ImageI &quant_field = enc_cache->quant_field;
+
+  // TODO(user): it would be better to find & apply correlation here, when
+  // quantization is chosen.
+
+  Image3F coeffs_init;
+  if (aux_out && aux_out->testing_aux.ac_prediction != nullptr) {
+    coeffs_init = CopyImage(enc_cache->coeffs);
+  }
+
+  constexpr int cY = 1;
+/*
+  for (size_t by = 0; by < ysize_blocks; ++by) {
+    const float *PIK_RESTRICT row_x = enc_cache->coeffs.ConstPlaneRow(0, by);
+    const float *PIK_RESTRICT row_y = enc_cache->coeffs.ConstPlaneRow(1, by);
+    const float *PIK_RESTRICT row_b = enc_cache->coeffs.ConstPlaneRow(2, by);
+    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+      for (int k = 0; k < 64; k++) {
+        if (by == 0 && bx == 0)
+          std::cout << "std_pre_llf by=" << by << " bx=" << bx << " k=" << k
+                    << " inx=" << row_x[bx * 64 + k]
+                    << " iny=" << row_y[bx * 64 + k]
+                    << " inb=" << row_b[bx * 64 + k] << std::endl;
+      }
+    }
+  }
+
+  for (size_t by = 0; by < enc_cache->dc_dec.ysize(); ++by) {
+    const float *PIK_RESTRICT row_x = enc_cache->dc_dec.ConstPlaneRow(0, by);
+    const float *PIK_RESTRICT row_y = enc_cache->dc_dec.ConstPlaneRow(1, by);
+    const float *PIK_RESTRICT row_b = enc_cache->dc_dec.ConstPlaneRow(2, by);
+    for (size_t bx = 0; bx < enc_cache->dc_dec.xsize(); ++bx) {
+          std::cout << "std_dc_dec by=" << by << " bx=" << bx
+                    << " inx=" << row_x[bx]
+                    << " iny=" << row_y[bx]
+                    << " inb=" << row_b[bx] << std::endl;
+
+    }
+  }
+*/
+  
+  Image3F pred2x2(enc_cache->dc_dec.xsize() * 2, enc_cache->dc_dec.ysize() * 2);
+  
+  PredictLfForEncoder(
+      false, false, enc_cache->dc_dec,
+      enc_cache->ac_strategy, cmap, cmap_rect, quantizer,
+      frame_enc_cache.dequant_control_field, frame_enc_cache.dequant_map,
+      &enc_cache->coeffs, &pred2x2);
+
+  {
+    Image3F coeffs_ac = CopyImage(enc_cache->coeffs);
+
+    // Pre-quantized, matches what decoder will see.
+    ImageF dec_ac_Y(xsize_blocks * kDCTBlockSize, ysize_blocks);
+    const size_t coeffs_stride = coeffs_ac.PixelsPerRow();
+    const size_t dec_ac_stride = dec_ac_Y.PixelsPerRow();
+
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const float *PIK_RESTRICT row_in = coeffs_ac.ConstPlaneRow(cY, by);
+      float *PIK_RESTRICT row_out = dec_ac_Y.Row(by);
+      AcStrategyRow ac_strategy_row = enc_cache->ac_strategy.ConstRow(by);
+      size_t ty = by / kColorTileDimInBlocks;
+      const uint8_t *row_quant_cf =
+          cmap_rect.ConstRow(frame_enc_cache.dequant_control_field, ty);
+
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        AcStrategy acs = ac_strategy_row[bx];
+        if (!acs.IsFirstBlock())
+          continue;
+        const int32_t quant_ac = quant_field.Row(by)[bx];
+        size_t tx = bx / kColorTileDimInBlocks;
+        uint8_t quant_table =
+            frame_enc_cache.dequant_map[row_quant_cf[tx]][quant_ac];
+
+        quantizer.QuantizeRoundtripBlockAC(
+            cY, quant_table, quant_ac, acs.GetQuantKind(),
+            acs.covered_blocks_x(), acs.covered_blocks_y(),
+            row_in + bx * kDCTBlockSize, coeffs_stride,
+            row_out + bx * kDCTBlockSize, dec_ac_stride);
+      }
+    }
+
+    for (size_t y = 0; y < ((ysize_blocks%4 == 0) ? (ysize_blocks/4):(ysize_blocks/4+1)); ++y) {
+      for (size_t x = 0; x < ((xsize_blocks%4 == 0) ? (xsize_blocks/4):(xsize_blocks/4+1)); ++x) {
+    for (size_t by = 0; by <4; ++by) {
+	      const float *PIK_RESTRICT row_x = coeffs_ac.ConstPlaneRow(0, y*4+by);
+	      const float *PIK_RESTRICT row_b = coeffs_ac.ConstPlaneRow(2, y*4+by);
+      float *PIK_RESTRICT row_y = dec_ac_Y.Row(y*4+by);
+      for (size_t bx = 0; bx < 4; ++bx) {
+    	if((y*4+by<ysize_blocks) && (x*4+bx<xsize_blocks))
+        for (int k = 0; k < 64; k++) {
+            std::cout << "std_corr_in by=" << by << " bx=" << bx << " k=" << k
+            << " x=" << row_x[(x*4+bx) * 64 + k]
+            << " y=" << row_y[(x*4+bx) * 64 + k]
+            << " b=" << row_b[(x*4+bx) * 64 + k]
+			<< std::endl;
+        }
+      }
+    }
+      }
+    }
+
+    UnapplyColorCorrelationAC(cmap, cmap_rect, dec_ac_Y, &coeffs_ac);
+
+    for (size_t y = 0; y < ((ysize_blocks%4 == 0) ? (ysize_blocks/4):(ysize_blocks/4+1)); ++y) {
+          for (size_t x = 0; x < ((xsize_blocks%4 == 0) ? (xsize_blocks/4):(xsize_blocks/4+1)); ++x) {
+    	  for (size_t by = 0; by < 4; ++by) {
+    	  for (size_t bx = 0; bx < 4; ++bx) {
+    	      const float *PIK_RESTRICT row_x = coeffs_ac.ConstPlaneRow(0, y*4+by);
+    	      const float *PIK_RESTRICT row_y = coeffs_ac.ConstPlaneRow(1, y*4+by);
+    	      const float *PIK_RESTRICT row_b = coeffs_ac.ConstPlaneRow(2, y*4+by);
+
+    	if((y*4+by<ysize_blocks) && (x*4+bx<xsize_blocks))
+        for (int k = 0; k < 64; k++) {
+            std::cout << "std_corr_out by=" << by << " bx=" << bx << " k=" << k
+                      << " x=" << row_x[(x*4+bx) * 64 + k]
+                      << " y=" << row_y[(x*4+bx) * 64 + k]
+                      << " b=" << row_b[(x*4+bx) * 64 + k]
+                      << std::endl;
+        }
+    	  }
+    	  }
+      }
+    }
+
+    enc_cache->ac = Image3S(xsize_blocks * kDCTBlockSize, ysize_blocks);
+    size_t ac_stride = enc_cache->ac.PixelsPerRow();
+
+    for (int c = 0; c < 3; ++c) {
+      for (size_t by = 0; by < ysize_blocks; ++by) {
+        const float *PIK_RESTRICT row_in = coeffs_ac.PlaneRow(c, by);
+        int16_t *PIK_RESTRICT row_out = enc_cache->ac.PlaneRow(c, by);
+        const int32_t *row_quant = quant_field.ConstRow(by);
+        AcStrategyRow ac_strategy_row = enc_cache->ac_strategy.ConstRow(by);
+        size_t ty = by / kColorTileDimInBlocks;
+        const uint8_t *row_quant_cf =
+            cmap_rect.ConstRow(frame_enc_cache.dequant_control_field, ty);
+        for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+          AcStrategy acs = ac_strategy_row[bx];
+          if (!acs.IsFirstBlock())
+            continue;
+          int quant_ac = row_quant[bx];
+          size_t tx = bx / kColorTileDimInBlocks;
+          uint8_t quant_table =
+              frame_enc_cache.dequant_map[row_quant_cf[tx]][quant_ac];
+          quantizer.QuantizeBlockAC(
+              quant_table, quant_ac, ac_strategy_row[bx].GetQuantKind(), c,
+              acs.covered_blocks_x(), acs.covered_blocks_y(),
+              row_in + bx * kDCTBlockSize, coeffs_stride,
+              row_out + bx * kDCTBlockSize, ac_stride);
+        }
+      }
+    }
+
+    std::cout << "std_acs:" << std::endl;
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      AcStrategyRow ac_strategy_row = enc_cache->ac_strategy.ConstRow(by);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        AcStrategy acs = ac_strategy_row[bx];
+        std::cout << (int)acs.RawStrategy() << ",";
+      }
+      std::cout << std::endl;
+    }
+
+    std::cout << "std_block:" << std::endl;
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      AcStrategyRow ac_strategy_row = enc_cache->ac_strategy.ConstRow(by);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        AcStrategy acs = ac_strategy_row[bx];
+          std::cout << (int)acs.Block() <<",";
+      }
+      std::cout << std::endl;
+    }
+
+    std::cout << "std_qf:" << std::endl;
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const int32_t *row_quant = quant_field.ConstRow(by);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        int quant_ac = row_quant[bx];
+        std::cout << quant_ac << ",";
+      }
+      std::cout << std::endl;
+    }
+
+  for(int c = 0; c< 3; c++) {
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      int16_t *PIK_RESTRICT row_out = enc_cache->ac.PlaneRow(c, by);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        std::cout <<std::dec<< "std_ac c="<<c<<" id:" << by * xsize_blocks + bx;
+        for (int i = 0; i < 64; i++) {
+            std::cout <<std::dec<<","<< (int)row_out[bx * kDCTBlockSize + i];
+        }
+        std::cout << std::endl;
+      }
+    }
+  }
+  
+  }
+}
+
+PaddedBytes EncodeToBitstream(const EncCache &enc_cache, const Rect &rect,
+                              const Quantizer &quantizer,
+                              const NoiseParams &noise_params, bool fast_mode,
+                              MultipassHandler *handler, PikInfo *info) {
+  PROFILER_FUNC;
+  constexpr size_t N = kBlockDim;
+  PIK_ASSERT(rect.x0() % kTileDim == 0);
+  PIK_ASSERT(rect.xsize() % N == 0);
+  PIK_ASSERT(rect.y0() % kTileDim == 0);
+  PIK_ASSERT(rect.ysize() % N == 0);
+  const size_t xsize_blocks = rect.xsize() / N;
+  const size_t ysize_blocks = rect.ysize() / N;
+  const size_t xsize_tiles = DivCeil(xsize_blocks, kTileDimInBlocks);
+  const size_t ysize_tiles = DivCeil(ysize_blocks, kTileDimInBlocks);
+  const Rect group_acs_qf_area_rect(rect.x0() / N, rect.y0() / N, xsize_blocks,
+                                    ysize_blocks);
+  const Rect tile_rect(rect.x0() / kTileDim, rect.y0() / kTileDim, xsize_tiles,
+                       ysize_tiles);
+
+  PikImageSizeInfo *ac_info =
+      info != nullptr ? &info->layers[kLayerAC] : nullptr;
+  std::string noise_code = EncodeNoise(noise_params);
+
+  const Rect ac_rect(N * rect.x0(), rect.y0() / N, N * rect.xsize(),
+                     rect.ysize() / N);
+
+  // TODO(veluca): do not allocate every call, allocate only what is actually
+  // needed.
+  Image3S ac(enc_cache.ac.xsize(), enc_cache.ac.ysize());
+  size_t enc_stride = enc_cache.ac.PixelsPerRow();
+  size_t ac_stride = ac.PixelsPerRow();
+  // Scatter coefficients. TODO(veluca): remove when large blocks are
+  // encoded all at once.
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t by = 0; by < ysize_blocks; by++) {
+      AcStrategyRow acs_row =
+          enc_cache.ac_strategy.ConstRow(group_acs_qf_area_rect, by);
+      const int16_t *row_in = ac_rect.ConstPlaneRow(enc_cache.ac, c, by);
+      int16_t *row_out = ac_rect.PlaneRow(&ac, c, by);
+      for (size_t bx = 0; bx < xsize_blocks; bx++) {
+        AcStrategy acs = acs_row[bx];
+        acs.ScatterCoefficients(row_in + kDCTBlockSize * bx, enc_stride,
+                                row_out + kDCTBlockSize * bx, ac_stride);
+      }
+    }
+  }
+
+  int32_t order[kOrderContexts * kDCTBlockSize];
+  ComputeCoeffOrder(ac, ac_rect, order);
+
+  std::cout << "std_order: " << std::endl;
+  for (int i = 0; i < 64; i++)
+    std::cout << order[i] << ",";
+  std::cout << std::endl;
+
+  std::string order_code = EncodeCoeffOrders(order, info);
+
+  std::vector<std::vector<Token>> ac_tokens(1);
+
+  for (size_t y = 0; y < ysize_tiles; y++) {
+    for (size_t x = 0; x < xsize_tiles; x++) {
+      const Rect tile_rect(x * kTileDimInBlocks, y * kTileDimInBlocks,
+                           kTileDimInBlocks, kTileDimInBlocks, xsize_blocks,
+                           ysize_blocks);
+      TokenizeCoefficients(order, tile_rect, ac, &ac_tokens[0]);
+    }
+  }
+
+  std::vector<uint8_t> context_map;
+  std::vector<ANSEncodingData> codes;
+  std::string histo_code = "";
+  if (fast_mode) {
+    histo_code =
+        BuildAndEncodeHistogramsFast(ac_tokens, &codes, &context_map, ac_info);
+  } else {
+    histo_code = BuildAndEncodeHistograms(kNumContexts, ac_tokens, &codes,
+                                          &context_map, ac_info);
+  }
+
+  std::string ac_code = WriteTokens(ac_tokens[0], codes, context_map, ac_info);
+
+  if (info) {
+    info->layers[kLayerHeader].total_size += noise_code.size();
+  }
+
+  PaddedBytes out(noise_code.size() + order_code.size() + histo_code.size() +
+                  ac_code.size());
+  size_t byte_pos = 0;
+  Append(noise_code, &out, &byte_pos);
+  Append(order_code, &out, &byte_pos);
+  Append(histo_code, &out, &byte_pos);
+  Append(ac_code, &out, &byte_pos);
+
+  // TODO(veluca): fix this with DC supergroups.
+  float output_size_estimate = out.size() - ac_code.size() - histo_code.size();
+  std::vector<std::array<size_t, 256>> counts(kNumContexts);
+  size_t extra_bits = 0;
+  for (const auto &token_list : ac_tokens) {
+    for (const auto &token : token_list) {
+      counts[token.context][token.symbol]++;
+      extra_bits += token.nbits;
+    }
+  }
+  float entropy_coded_bits = 0;
+  for (size_t ctx = 0; ctx < kNumContexts; ctx++) {
+    size_t total =
+        std::accumulate(counts[ctx].begin(), counts[ctx].end(), size_t(0));
+    if (total == 0)
+      continue; // Prevent div by zero.
+    double entropy = 0;
+    for (size_t i = 0; i < 256; i++) {
+      double p = 1.0 * counts[ctx][i] / total;
+      if (p > 1e-4) {
+        entropy -= p * std::log(p);
+      }
+    }
+    entropy_coded_bits += entropy * total / std::log(2);
+  }
+  output_size_estimate +=
+      static_cast<float>(extra_bits + entropy_coded_bits) / kBitsPerByte;
+  if (info != nullptr)
+    info->entropy_estimate = output_size_estimate;
+  return out;
+}
+
+
+PaddedBytes hls_EncodeToBitstream(const EncCache &enc_cache, const Rect &rect,
+                              const Quantizer &quantizer,
+                              const NoiseParams &noise_params, bool fast_mode,
+                              MultipassHandler *handler, PikInfo *info,
+
+							  ap_uint<32> *ac_x,
+							  ap_uint<32> *ac_y,
+							  ap_uint<32> *ac_b,
+							  ap_uint<32> *k2_order) {
+  PROFILER_FUNC;
+  constexpr size_t N = kBlockDim;
+  PIK_ASSERT(rect.x0() % kTileDim == 0);
+  PIK_ASSERT(rect.xsize() % N == 0);
+  PIK_ASSERT(rect.y0() % kTileDim == 0);
+  PIK_ASSERT(rect.ysize() % N == 0);
+  const size_t xsize_blocks = rect.xsize() / N;
+  const size_t ysize_blocks = rect.ysize() / N;
+  const size_t xsize_tiles = DivCeil(xsize_blocks, kTileDimInBlocks);
+  const size_t ysize_tiles = DivCeil(ysize_blocks, kTileDimInBlocks);
+  const Rect group_acs_qf_area_rect(rect.x0() / N, rect.y0() / N, xsize_blocks,
+                                    ysize_blocks);
+  const Rect tile_rect(rect.x0() / kTileDim, rect.y0() / kTileDim, xsize_tiles,
+                       ysize_tiles);
+
+  PikImageSizeInfo *ac_info =
+      info != nullptr ? &info->layers[kLayerAC] : nullptr;
+  std::string noise_code = EncodeNoise(noise_params);
+
+  const Rect ac_rect(N * rect.x0(), rect.y0() / N, N * rect.xsize(),
+                     rect.ysize() / N);
+
+  // TODO(veluca): do not allocate every call, allocate only what is actually
+  // needed.
+  Image3S ac(enc_cache.ac.xsize(), enc_cache.ac.ysize());
+  size_t enc_stride = enc_cache.ac.PixelsPerRow();
+  size_t ac_stride = ac.PixelsPerRow();
+  // Scatter coefficients. TODO(veluca): remove when large blocks are
+  // encoded all at once.
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t by = 0; by < ysize_blocks; by++) {
+      AcStrategyRow acs_row =
+          enc_cache.ac_strategy.ConstRow(group_acs_qf_area_rect, by);
+      const int16_t *row_in = ac_rect.ConstPlaneRow(enc_cache.ac, c, by);
+      int16_t *row_out = ac_rect.PlaneRow(&ac, c, by);
+      for (size_t bx = 0; bx < xsize_blocks; bx++) {
+        AcStrategy acs = acs_row[bx];
+        acs.ScatterCoefficients(row_in + kDCTBlockSize * bx, enc_stride,
+                                row_out + kDCTBlockSize * bx, ac_stride);
+      }
+    }
+  }
+
+  std::cout << "std_ac_x:" << std::endl;
+    for (size_t by = 0; by < ysize_blocks; by++) {
+      int16_t *PIK_RESTRICT row_out = ac.PlaneRow(0, by);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+    	std::cout<<"id:"<<by*xsize_blocks+bx<<std::endl;
+        for (int i = 0; i < 64; i++)
+          std::cout << row_out[bx * 64 + i] << ",";
+        std::cout << std::endl;
+      }
+    }
+
+    std::cout << "std_ac_y:" << std::endl;
+    for (size_t by = 0; by < ysize_blocks; by++) {
+      int16_t *PIK_RESTRICT row_out = ac.PlaneRow(1, by);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+    	std::cout<<"id:"<<by*xsize_blocks+bx<<std::endl;
+        for (int i = 0; i < 64; i++)
+          std::cout << row_out[bx * 64 + i] << ",";
+        std::cout << std::endl;
+      }
+    }
+
+    std::cout << "std_ac_b:" << std::endl;
+    for (size_t by = 0; by < ysize_blocks; by++) {
+      int16_t *PIK_RESTRICT row_out = ac.PlaneRow(2, by);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+    	std::cout<<"id:"<<by*xsize_blocks+bx<<std::endl;
+        for (int i = 0; i < 64; i++)
+          std::cout << row_out[bx * 64 + i] << ",";
+        std::cout << std::endl;
+      }
+    }
+
+#ifdef USE_K2_DATA
+      for (size_t by = 0; by < ysize_blocks; ++by) {
+        int16_t* PIK_RESTRICT ac_xin = ac.PlaneRow(0, by);
+        int16_t* PIK_RESTRICT ac_yin = ac.PlaneRow(1, by);
+        int16_t* PIK_RESTRICT ac_bin = ac.PlaneRow(2, by);
+
+        for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        	for(int i=0;i<8;i++){
+        		for(int j=0;j<8;j++){
+        			ac_xin[bx*kDCTBlockSize+8*i+j]=ac_x[(by*xsize_blocks+bx)*64+8*i+j];
+        		    ac_yin[bx*kDCTBlockSize+8*i+j]=ac_y[(by*xsize_blocks+bx)*64+8*i+j];
+        		    ac_bin[bx*kDCTBlockSize+8*i+j]=ac_b[(by*xsize_blocks+bx)*64+8*i+j];
+        	  }
+           }
+        }
+      }
+#endif
+
+  int32_t order[kOrderContexts * kDCTBlockSize];
+  ComputeCoeffOrder(ac, ac_rect, order);
+
+  std::cout << "std_order: " << std::endl;
+  for (int i = 0; i < 64*3; i++)
+    std::cout << order[i] << ",";
+  std::cout << std::endl;
+
+#ifdef USE_K2_DATA
+      for (size_t i = 0; i < 64*3; ++i) {
+    	  order[i]=k2_order[i];
+      }
+#endif
+
+  std::string order_code = EncodeCoeffOrders(order, info);
+
+  std::vector<std::vector<Token>> ac_tokens(1);
+
+  for (size_t y = 0; y < ysize_tiles; y++) {
+    for (size_t x = 0; x < xsize_tiles; x++) {
+      const Rect tile_rect(x * kTileDimInBlocks, y * kTileDimInBlocks,
+                           kTileDimInBlocks, kTileDimInBlocks, xsize_blocks,
+                           ysize_blocks);
+      TokenizeCoefficients(order, tile_rect, ac, &ac_tokens[0]);
+    }
+  }
+
+  std::vector<uint8_t> context_map;
+  std::vector<ANSEncodingData> codes;
+  std::string histo_code = "";
+  if (fast_mode) {
+    histo_code =
+        BuildAndEncodeHistogramsFast(ac_tokens, &codes, &context_map, ac_info);
+  } else {
+    histo_code = BuildAndEncodeHistograms(kNumContexts, ac_tokens, &codes,
+                                          &context_map, ac_info);
+  }
+
+  std::string ac_code = WriteTokens(ac_tokens[0], codes, context_map, ac_info);
+
+  if (info) {
+    info->layers[kLayerHeader].total_size += noise_code.size();
+  }
+
+  PaddedBytes out(noise_code.size() + order_code.size() + histo_code.size() +
+                  ac_code.size());
+  size_t byte_pos = 0;
+  Append(noise_code, &out, &byte_pos);
+  Append(order_code, &out, &byte_pos);
+  Append(histo_code, &out, &byte_pos);
+  Append(ac_code, &out, &byte_pos);
+
+  // TODO(veluca): fix this with DC supergroups.
+  float output_size_estimate = out.size() - ac_code.size() - histo_code.size();
+  std::vector<std::array<size_t, 256>> counts(kNumContexts);
+  size_t extra_bits = 0;
+  for (const auto &token_list : ac_tokens) {
+    for (const auto &token : token_list) {
+      counts[token.context][token.symbol]++;
+      extra_bits += token.nbits;
+    }
+  }
+  float entropy_coded_bits = 0;
+  for (size_t ctx = 0; ctx < kNumContexts; ctx++) {
+    size_t total =
+        std::accumulate(counts[ctx].begin(), counts[ctx].end(), size_t(0));
+    if (total == 0)
+      continue; // Prevent div by zero.
+    double entropy = 0;
+    for (size_t i = 0; i < 256; i++) {
+      double p = 1.0 * counts[ctx][i] / total;
+      if (p > 1e-4) {
+        entropy -= p * std::log(p);
+      }
+    }
+    entropy_coded_bits += entropy * total / std::log(2);
+  }
+  output_size_estimate +=
+      static_cast<float>(extra_bits + entropy_coded_bits) / kBitsPerByte;
+  if (info != nullptr)
+    info->entropy_estimate = output_size_estimate;
+  return out;
+}
+
+template <bool first> class Dequant {
+public:
+  Dequant(const Quantizer &quantizer) : quantizer_(quantizer) {
+    dequant_matrices_ = quantizer.DequantMatrix(0, kQuantKindDCT8, 0);
+    inv_global_scale_ = quantizer.InvGlobalScale();
+  }
+
+  // Dequantizes and inverse color-transforms one tile, i.e. the window
+  // `rect` (in block units) within the output image `group_dec_cache->ac`.
+  // Reads the rect `rect16` (in block units) in `img_ac16`. Reads and write
+  // only to the `block_group_rect` part of ac_strategy/quant_field.
+  SIMD_ATTR void DoAC(const Rect &rect16, const Image3S &img_ac16,
+                      const Rect &rect, const Rect &block_group_rect,
+                      const ImageI &img_ytox, const ImageI &img_ytob,
+                      const Rect &cmap_rect,
+                      FrameDecCache *PIK_RESTRICT frame_dec_cache,
+                      GroupDecCache *PIK_RESTRICT group_dec_cache,
+                      PikInfo *aux_out) const {
+    PROFILER_FUNC;
+    PIK_ASSERT(SameSize(rect, rect16));
+    const size_t xsize = rect.xsize(); // [blocks]
+    const size_t ysize = rect.ysize();
+    PIK_ASSERT(img_ac16.xsize() % kDCTBlockSize == 0);
+    PIK_ASSERT(xsize <= img_ac16.xsize() / kDCTBlockSize);
+    PIK_ASSERT(ysize <= img_ac16.ysize());
+    PIK_ASSERT(SameSize(img_ytox, img_ytob));
+
+    using D = SIMD_FULL(float);
+    constexpr D d;
+    constexpr SIMD_PART(int16_t, D::N) d16;
+    constexpr SIMD_PART(int32_t, D::N) d32;
+
+    // Rect representing the current tile inside the current group, in an image
+    // in which each block is 1x1.
+    const Rect block_tile_group_rect(block_group_rect.x0() + rect.x0(),
+                                     block_group_rect.y0() + rect.y0(),
+                                     rect.xsize(), rect.ysize());
+
+    const size_t x0_cmap = rect.x0() / kColorTileDimInBlocks;
+    const size_t y0_cmap = rect.y0() / kColorTileDimInBlocks;
+    const size_t x0_dct = rect.x0() * kDCTBlockSize;
+    const size_t x0_dct16 = rect16.x0() * kDCTBlockSize;
+
+    // TODO(veluca): get rid of acs.Block() and only use acs.IsFirst()
+    for (size_t by = 0; by < ysize; ++by) {
+      const size_t ty = by / kColorTileDimInBlocks;
+      const int16_t *PIK_RESTRICT row_16[3] = {
+          img_ac16.PlaneRow(0, by + rect16.y0()) + x0_dct16,
+          img_ac16.PlaneRow(1, by + rect16.y0()) + x0_dct16,
+          img_ac16.PlaneRow(2, by + rect16.y0()) + x0_dct16};
+      const int *PIK_RESTRICT row_quant_field =
+          block_tile_group_rect.ConstRow(frame_dec_cache->raw_quant_field, by);
+      static_assert(kColorTileDimInBlocks == kTileDimInBlocks,
+                    "Quantization table selection assumes that color tile and "
+                    "tiles have the same size!");
+      const uint8_t *PIK_RESTRICT row_quant_cf =
+          cmap_rect.ConstRow(frame_dec_cache->dequant_control_field,
+                             ty + y0_cmap) +
+          x0_cmap;
+      const int *PIK_RESTRICT row_cmap[3] = {
+          cmap_rect.ConstRow(img_ytox, ty + y0_cmap) + x0_cmap, nullptr,
+          cmap_rect.ConstRow(img_ytob, ty + y0_cmap) + x0_cmap,
+      };
+      float *PIK_RESTRICT row[3] = {
+          group_dec_cache->ac.PlaneRow(0, rect.y0() + by) + x0_dct,
+          group_dec_cache->ac.PlaneRow(1, rect.y0() + by) + x0_dct,
+          group_dec_cache->ac.PlaneRow(2, rect.y0() + by) + x0_dct,
+      };
+
+      AcStrategyRow ac_strategy_row =
+          frame_dec_cache->ac_strategy.ConstRow(block_tile_group_rect, by);
+      for (size_t bx = 0; bx < xsize; ++bx) {
+        const auto scaled_dequant =
+            set1(d, SafeDiv(inv_global_scale_, row_quant_field[bx]));
+        const size_t tx = bx / kColorTileDimInBlocks;
+        uint8_t quant_table =
+            frame_dec_cache
+                ->dequant_map[row_quant_cf[tx]][row_quant_field[bx] - 1];
+
+        size_t kind = ac_strategy_row[bx].GetQuantKind();
+        const float *PIK_RESTRICT dequant_matrix[3] = {
+            &dequant_matrices_[quantizer_.DequantMatrixOffset(quant_table, kind,
+                                                              0) +
+                               ac_strategy_row[bx].Block() * kDCTBlockSize],
+            &dequant_matrices_[quantizer_.DequantMatrixOffset(quant_table, kind,
+                                                              1) +
+                               ac_strategy_row[bx].Block() * kDCTBlockSize],
+            &dequant_matrices_[quantizer_.DequantMatrixOffset(quant_table, kind,
+                                                              2) +
+                               ac_strategy_row[bx].Block() * kDCTBlockSize],
+        };
+        const auto x_cc_mul =
+            set1(d, ColorCorrelationMap::YtoX(1.0f, row_cmap[0][tx]));
+        const auto b_cc_mul =
+            set1(d, ColorCorrelationMap::YtoB(1.0f, row_cmap[2][tx]));
+        for (size_t k = 0; k < kDCTBlockSize; k += d.N) {
+          const size_t x = bx * kDCTBlockSize + k;
+
+          const auto x_mul = load(d, dequant_matrix[0] + k) * scaled_dequant;
+          const auto y_mul = load(d, dequant_matrix[1] + k) * scaled_dequant;
+          const auto b_mul = load(d, dequant_matrix[2] + k) * scaled_dequant;
+
+          const auto quantized_x16 = load(d16, row_16[0] + x);
+          const auto quantized_y16 = load(d16, row_16[1] + x);
+          const auto quantized_b16 = load(d16, row_16[2] + x);
+          const auto quantized_x =
+              convert_to(d, convert_to(d32, quantized_x16));
+          const auto quantized_y =
+              convert_to(d, convert_to(d32, quantized_y16));
+          const auto quantized_b =
+              convert_to(d, convert_to(d32, quantized_b16));
+
+          const auto dequant_x_cc = AdjustQuantBias<0>(quantized_x) * x_mul;
+          const auto dequant_y = AdjustQuantBias<1>(quantized_y) * y_mul;
+          const auto dequant_b_cc = AdjustQuantBias<2>(quantized_b) * b_mul;
+
+          const auto dequant_x = mul_add(x_cc_mul, dequant_y, dequant_x_cc);
+          const auto dequant_b = mul_add(b_cc_mul, dequant_y, dequant_b_cc);
+
+          if (first) {
+            store(dequant_x, d, row[0] + x);
+            store(dequant_y, d, row[1] + x);
+            store(dequant_b, d, row[2] + x);
+          } else {
+            store(dequant_x + load(d, row[0] + x), d, row[0] + x);
+            store(dequant_y + load(d, row[1] + x), d, row[1] + x);
+            store(dequant_b + load(d, row[2] + x), d, row[2] + x);
+          }
+        }
+      }
+    }
+  }
+
+private:
+  static PIK_INLINE float SafeDiv(float num, int32_t div) {
+    return div == 0 ? 1E10f : num / div;
+  }
+
+  // AC dequant
+  const float *PIK_RESTRICT dequant_matrices_;
+  float inv_global_scale_;
+  const Quantizer &quantizer_;
+};
+
+template <bool first>
+bool DecodeFromBitstream(const FrameHeader &frame_header,
+                         const GroupHeader &header,
+                         const PaddedBytes &compressed, BitReader *reader,
+                         const Rect &group_rect, MultipassHandler *handler,
+                         const size_t xsize_blocks, const size_t ysize_blocks,
+                         const ColorCorrelationMap &cmap, const Rect &cmap_rect,
+                         NoiseParams *noise_params, const Quantizer &quantizer,
+                         FrameDecCache *PIK_RESTRICT frame_dec_cache,
+                         GroupDecCache *PIK_RESTRICT group_dec_cache,
+                         PikInfo *aux_out) {
+  PROFILER_FUNC;
+
+  PIK_RETURN_IF_ERROR(DecodeNoise(reader, noise_params));
+
+  PIK_ASSERT(group_rect.x0() % kBlockDim == 0);
+  PIK_ASSERT(group_rect.y0() % kBlockDim == 0);
+  const size_t x0_blocks = DivCeil(group_rect.x0(), kBlockDim);
+  const size_t y0_blocks = DivCeil(group_rect.y0(), kBlockDim);
+  const Rect group_acs_qf_rect(x0_blocks, y0_blocks, xsize_blocks,
+                               ysize_blocks);
+
+  const size_t xsize_tiles = DivCeil(xsize_blocks, kTileDimInBlocks);
+  const size_t ysize_tiles = DivCeil(ysize_blocks, kTileDimInBlocks);
+  const size_t num_tiles = xsize_tiles * ysize_tiles;
+
+  group_dec_cache->InitOnce(xsize_blocks, ysize_blocks);
+
+  int coeff_order[kOrderContexts * kDCTBlockSize];
+  for (size_t c = 0; c < kOrderContexts; ++c) {
+    DecodeCoeffOrder(&coeff_order[c * kDCTBlockSize], reader);
+  }
+  PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+  ANSCode code;
+  std::vector<uint8_t> context_map;
+  // Histogram data size is small and does not require parallelization.
+  PIK_RETURN_IF_ERROR(
+      DecodeHistograms(reader, kNumContexts, 256, &code, &context_map));
+  PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+  Dequant<first> dequant(quantizer);
+
+  SIMD_ALIGN int16_t unscattered[AcStrategy::kMaxCoeffArea];
+  const size_t stride = group_dec_cache->quantized_ac.PixelsPerRow();
+
+  ANSSymbolReader ac_decoder(&code);
+  for (size_t task = 0; task < num_tiles; ++task) {
+    const size_t tile_x = task % xsize_tiles;
+    const size_t tile_y = task / xsize_tiles;
+    const Rect rect(tile_x * kTileDimInBlocks, tile_y * kTileDimInBlocks,
+                    kTileDimInBlocks, kTileDimInBlocks, xsize_blocks,
+                    ysize_blocks);
+    const Rect quantized_rect(0, 0, rect.xsize(), rect.ysize());
+
+    if (!DecodeAC(context_map, coeff_order, reader, &ac_decoder,
+                  &group_dec_cache->quantized_ac, rect,
+                  &group_dec_cache->num_nzeroes)) {
+      return PIK_FAILURE("Failed to decode AC.");
+    }
+    // Unscatter coefficients. TODO(veluca): remove when large blocks are
+    // encoded all at once.
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t by = 0; by < rect.ysize(); by++) {
+        AcStrategyRow acs_row = frame_dec_cache->ac_strategy.ConstRow(
+            group_acs_qf_rect, by + rect.y0());
+        int16_t *PIK_RESTRICT row =
+            group_dec_cache->quantized_ac.PlaneRow(c, by);
+        for (size_t bx = 0; bx < rect.xsize(); bx++) {
+          AcStrategy acs = acs_row[bx + rect.x0()];
+          if (!acs.IsFirstBlock())
+            continue;
+          int16_t *block = row + bx * kDCTBlockSize;
+          acs.GatherCoefficients(block, stride, unscattered,
+                                 acs.covered_blocks_x() * kDCTBlockSize);
+          for (size_t i = 0; i < acs.covered_blocks_y(); i++) {
+            memcpy(block + stride * i,
+                   unscattered + acs.covered_blocks_x() * kDCTBlockSize * i,
+                   sizeof(int16_t) * acs.covered_blocks_x() * kDCTBlockSize);
+          }
+        }
+      }
+    }
+
+    dequant.DoAC(quantized_rect, group_dec_cache->quantized_ac, rect,
+                 group_acs_qf_rect, cmap.ytox_map, cmap.ytob_map, cmap_rect,
+                 frame_dec_cache, group_dec_cache, aux_out);
+  }
+  if (!ac_decoder.CheckANSFinalState()) {
+    return PIK_FAILURE("ANS checksum failure.");
+  }
+  PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+  return true;
+}
+
+template bool DecodeFromBitstream<true>(
+    const FrameHeader &, const GroupHeader &, const PaddedBytes &, BitReader *,
+    const Rect &, MultipassHandler *, const size_t, const size_t,
+    const ColorCorrelationMap &, const Rect &, NoiseParams *, const Quantizer &,
+    FrameDecCache *PIK_RESTRICT, GroupDecCache *PIK_RESTRICT, PikInfo *);
+
+template bool DecodeFromBitstream<false>(
+    const FrameHeader &, const GroupHeader &, const PaddedBytes &, BitReader *,
+    const Rect &, MultipassHandler *, const size_t, const size_t,
+    const ColorCorrelationMap &, const Rect &, NoiseParams *, const Quantizer &,
+    FrameDecCache *PIK_RESTRICT, GroupDecCache *PIK_RESTRICT, PikInfo *);
+
+void DequantImageAC(const Quantizer &quantizer, const ColorCorrelationMap &cmap,
+                    const Rect &cmap_rect, const Image3S &quantized_ac,
+                    FrameDecCache *frame_dec_cache,
+                    GroupDecCache *group_dec_cache, const Rect &group_rect,
+                    PikInfo *aux_out) {
+  PROFILER_ZONE("dequant");
+
+  // Caller must have allocated/filled quantized_dc/ac.
+  PIK_CHECK(quantized_ac.xsize() ==
+                quantizer.RawQuantField().xsize() * kDCTBlockSize &&
+            quantized_ac.ysize() == quantizer.RawQuantField().ysize());
+
+  const size_t xsize_blocks = quantizer.RawQuantField().xsize();
+  const size_t ysize_blocks = quantizer.RawQuantField().ysize();
+  const size_t xsize_tiles = DivCeil(xsize_blocks, kTileDimInBlocks);
+  const size_t ysize_tiles = DivCeil(ysize_blocks, kTileDimInBlocks);
+
+  // Only one pass for roundtrips.
+  Dequant</*first=*/true> dequant(quantizer);
+
+  PIK_ASSERT(group_rect.x0() % kBlockDim == 0 &&
+             group_rect.y0() % kBlockDim == 0 &&
+             group_rect.xsize() % kBlockDim == 0 &&
+             group_rect.ysize() % kBlockDim == 0);
+
+  const Rect block_group_rect(
+      group_rect.x0() / kBlockDim, group_rect.y0() / kBlockDim,
+      group_rect.xsize() / kBlockDim, group_rect.ysize() / kBlockDim);
+
+  for (size_t idx_tile = 0; idx_tile < xsize_tiles * ysize_tiles; ++idx_tile) {
+    const size_t tile_x = idx_tile % xsize_tiles;
+    const size_t tile_y = idx_tile / xsize_tiles;
+    const Rect rect(tile_x * kTileDimInBlocks, tile_y * kTileDimInBlocks,
+                    kTileDimInBlocks, kTileDimInBlocks, xsize_blocks,
+                    ysize_blocks);
+
+    dequant.DoAC(rect, quantized_ac, rect, block_group_rect, cmap.ytox_map,
+                 cmap.ytob_map, cmap_rect, frame_dec_cache, group_dec_cache,
+                 aux_out);
+  }
+}
+
+static SIMD_ATTR void
+InverseIntegralTransform(const size_t xsize_blocks, const size_t ysize_blocks,
+                         const Image3F &ac_image,
+                         const AcStrategyImage &ac_strategy,
+                         const Rect &acs_rect, Image3F *PIK_RESTRICT idct,
+                         const Rect &idct_rect, size_t downsample) {
+  PROFILER_ZONE("IDCT");
+
+  constexpr size_t N = kBlockDim;
+  const size_t idct_stride = idct->PixelsPerRow();
+  const size_t ac_per_row = ac_image.PixelsPerRow();
+
+  if (downsample == 1) {
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const AcStrategyRow &acs_row = ac_strategy.ConstRow(acs_rect, by);
+      for (int c = 0; c < 3; ++c) {
+        const float *PIK_RESTRICT ac_row = ac_image.ConstPlaneRow(c, by);
+        float *PIK_RESTRICT idct_row = idct_rect.PlaneRow(idct, c, by * N);
+
+        for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+          const float *PIK_RESTRICT ac_pos = ac_row + bx * kDCTBlockSize;
+          const AcStrategy &acs = acs_row[bx];
+          float *PIK_RESTRICT idct_pos = idct_row + bx * N;
+
+          acs.TransformToPixels(ac_pos, ac_per_row, idct_pos, idct_stride);
+        }
+      }
+    }
+  } else {
+    float mean_mul = 1.0f / (downsample * downsample);
+    PIK_ASSERT(downsample == 2 || downsample == 4 || downsample == 8);
+    size_t N_downsample = N / downsample;
+    SIMD_ALIGN float pixels[AcStrategy::kMaxCoeffArea];
+
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const AcStrategyRow &acs_row = ac_strategy.ConstRow(acs_rect, by);
+      for (int c = 0; c < 3; ++c) {
+        const float *PIK_RESTRICT ac_row = ac_image.ConstPlaneRow(c, by);
+        float *PIK_RESTRICT idct_row =
+            idct_rect.PlaneRow(idct, c, by * N_downsample);
+
+        for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+          const float *PIK_RESTRICT ac_pos = ac_row + bx * kDCTBlockSize;
+          const AcStrategy &acs = acs_row[bx];
+          float *PIK_RESTRICT idct_pos = idct_row + bx * N_downsample;
+          if (!acs.IsFirstBlock())
+            continue;
+
+          acs.TransformToPixels(ac_pos, ac_per_row, pixels,
+                                acs.covered_blocks_x() * N);
+          for (size_t y = 0; y < acs.covered_blocks_y() * N_downsample; y++) {
+            for (size_t x = 0; x < acs.covered_blocks_x() * N_downsample; x++) {
+              float sum = 0.0f;
+              for (size_t iy = 0; iy < downsample; iy++) {
+                for (size_t ix = 0; ix < downsample; ix++) {
+                  sum += pixels[(y * downsample + iy) * N *
+                                    acs.covered_blocks_x() +
+                                x * downsample + ix];
+                }
+              }
+              idct_pos[y * idct_stride + x] = sum * mean_mul;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void ReconOpsinImage(const FrameHeader &frame_header, const GroupHeader &header,
+                     const Quantizer &quantizer, const Rect &block_group_rect,
+                     FrameDecCache *PIK_RESTRICT frame_dec_cache,
+                     GroupDecCache *PIK_RESTRICT group_dec_cache,
+                     Image3F *PIK_RESTRICT idct, const Rect &idct_rect,
+                     PikInfo *aux_out, size_t downsample) {
+  PROFILER_ZONE("ReconOpsinImage");
+  constexpr size_t N = kBlockDim;
+  const size_t xsize_blocks = block_group_rect.xsize();
+  const size_t ysize_blocks = block_group_rect.ysize();
+  const size_t xsize_tiles = DivCeil(xsize_blocks, kTileDimInBlocks);
+  const size_t ysize_tiles = DivCeil(ysize_blocks, kTileDimInBlocks);
+  const bool predict_lf = frame_header.predict_lf;
+  const bool predict_hf = frame_header.predict_hf;
+
+  // TODO(veluca): this should probably happen upon dequantization of DC. Also,
+  // we should consider doing something similar for AC.
+  if (frame_header.flags & FrameHeader::kGrayscaleOpt) {
+    PROFILER_ZONE("GrayscaleRestoreXB");
+    GetGrayXyb()->RestoreXB(&group_dec_cache->dc);
+  }
+
+  if (aux_out && aux_out->testing_aux.ac_prediction != nullptr) {
+    PROFILER_ZONE("Copy ac_prediction");
+    *aux_out->testing_aux.ac_prediction = CopyImage(group_dec_cache->ac);
+  }
+
+  // Sets dcoeffs.0 from DC (for DCT blocks) and updates HVD.
+  Image3F *PIK_RESTRICT ac64 = &group_dec_cache->ac;
+
+  // Currently llf is temporary storage, but it will be more persistent
+  // in tile-wise processing.
+  ComputeLlf(group_dec_cache->dc, frame_dec_cache->ac_strategy,
+             block_group_rect, &group_dec_cache->llf);
+
+  if (predict_lf) {
+    // dc2x2 plane is borrowed for temporary storage.
+    PredictLf(frame_dec_cache->ac_strategy, block_group_rect,
+              group_dec_cache->llf,
+              const_cast<ImageF *>(&group_dec_cache->pred2x2.Plane(0)),
+              &group_dec_cache->lf2x2);
+  }
+  ZeroFillImage(&group_dec_cache->pred2x2);
+
+  // Compute the border of pred2x2.
+  if (predict_hf) {
+    PROFILER_ZONE("Predict HF");
+    AcStrategy acs(AcStrategy::Type::DCT, 0);
+    const size_t pred2x2_stride = group_dec_cache->pred2x2.PixelsPerRow();
+    if (predict_lf) {
+      const size_t lf2x2_stride = group_dec_cache->lf2x2.PixelsPerRow();
+      float block[N * N] = {};
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t x : {0UL, xsize_blocks + 1}) {
+          for (size_t y = 0; y < ysize_blocks + 2; y++) {
+            const float *row_llf = group_dec_cache->llf.ConstPlaneRow(c, y);
+            const float *row_lf2x2 =
+                group_dec_cache->lf2x2.ConstPlaneRow(c, 2 * y);
+            float *row_pred2x2 = group_dec_cache->pred2x2.PlaneRow(c, 2 * y);
+            block[0] = row_llf[x];
+            block[1] = row_lf2x2[2 * x + 1];
+            block[N] = row_lf2x2[lf2x2_stride + 2 * x];
+            block[N + 1] = row_lf2x2[lf2x2_stride + 2 * x + 1];
+            acs.DC2x2FromLowFrequencies(block, 0 /*not used*/,
+                                        row_pred2x2 + 2 * x, pred2x2_stride);
+          }
+        }
+        for (size_t y : {0UL, ysize_blocks + 1}) {
+          const float *row_llf = group_dec_cache->llf.ConstPlaneRow(c, y);
+          const float *row_lf2x2 =
+              group_dec_cache->lf2x2.ConstPlaneRow(c, 2 * y);
+          float *row_pred2x2 = group_dec_cache->pred2x2.PlaneRow(c, 2 * y);
+          for (size_t x = 0; x < xsize_blocks + 2; x++) {
+            block[0] = row_llf[x];
+            block[1] = row_lf2x2[2 * x + 1];
+            block[N] = row_lf2x2[lf2x2_stride + 2 * x];
+            block[N + 1] = row_lf2x2[lf2x2_stride + 2 * x + 1];
+            acs.DC2x2FromLowFrequencies(block, 0 /*not used*/,
+                                        row_pred2x2 + 2 * x, pred2x2_stride);
+          }
+        }
+      }
+    } else {
+      const size_t llf_stride = group_dec_cache->llf.PixelsPerRow();
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t x : {0UL, xsize_blocks + 1}) {
+          for (size_t y = 0; y < ysize_blocks + 2; y++) {
+            const float *row_llf = group_dec_cache->llf.ConstPlaneRow(c, y);
+            float *row_pred2x2 = group_dec_cache->pred2x2.PlaneRow(c, 2 * y);
+            acs.DC2x2FromLowestFrequencies(row_llf + x, llf_stride,
+                                           row_pred2x2 + 2 * x, pred2x2_stride);
+          }
+        }
+        for (size_t y : {0UL, ysize_blocks + 1}) {
+          const float *row_llf = group_dec_cache->llf.ConstPlaneRow(c, y);
+          float *row_pred2x2 = group_dec_cache->pred2x2.PlaneRow(c, 2 * y);
+          for (size_t x = 0; x < xsize_blocks + 2; x++) {
+            acs.DC2x2FromLowestFrequencies(row_llf + x, llf_stride,
+                                           row_pred2x2 + 2 * x, pred2x2_stride);
+          }
+        }
+      }
+    }
+  }
+
+  // tile_stage is used to make calculation dispatching simple; each pixel
+  // corresponds to tile. Each bit corresponds to stage:
+  // * 0-th bit for calculation or lf2x2 / pred2x2 & initial LF AC update;
+
+  Image3F *PIK_RESTRICT pred2x2_or_null =
+      predict_hf ? &group_dec_cache->pred2x2 : nullptr;
+  Image3F *PIK_RESTRICT lf2x2_or_null =
+      predict_lf ? &group_dec_cache->lf2x2 : nullptr;
+
+  for (size_t c = 0; c < group_dec_cache->ac.kNumPlanes; c++) {
+    PROFILER_ZONE("Reset tile stages");
+    // Reset tile stages.
+    for (size_t ty = 0; ty < ysize_tiles; ++ty) {
+      uint8_t *PIK_RESTRICT tile_stage_row =
+          group_dec_cache->tile_stage.Row(ty);
+      memset(tile_stage_row, 0, xsize_tiles * sizeof(uint8_t));
+      tile_stage_row[xsize_tiles] = 255;
+    }
+    uint8_t *PIK_RESTRICT tile_stage_row =
+        group_dec_cache->tile_stage.Row(ysize_tiles);
+    memset(tile_stage_row, 255, (xsize_tiles + 1) * sizeof(uint8_t));
+
+    for (size_t ty = 0; ty < ysize_tiles; ++ty) {
+      for (size_t tx = 0; tx < xsize_tiles; ++tx) {
+        for (size_t lfty = ty; lfty < ty + 2; ++lfty) {
+          uint8_t *tile_stage_row = group_dec_cache->tile_stage.Row(lfty);
+          for (size_t lftx = tx; lftx < tx + 2; ++lftx) {
+            if ((tile_stage_row[lftx] & 1) != 0)
+              continue;
+            const Rect tile(lftx * kTileDimInBlocks, lfty * kTileDimInBlocks,
+                            kTileDimInBlocks, kTileDimInBlocks, xsize_blocks,
+                            ysize_blocks);
+            UpdateLfForDecoder(tile, predict_lf, predict_hf,
+                               frame_dec_cache->ac_strategy, block_group_rect,
+                               group_dec_cache->llf, ac64, pred2x2_or_null,
+                               lf2x2_or_null, c);
+            tile_stage_row[lftx] |= 1;
+          }
+        }
+        if (predict_hf) {
+          // TODO(user): invoke AddPredictions for (tx, ty) tile here.
+        }
+      }
+    }
+  }
+
+  if (predict_hf) {
+    // TODO(user): make UpSample4x4BlurDCT tile-wise-able.
+    AddPredictions(group_dec_cache->pred2x2, frame_dec_cache->ac_strategy,
+                   block_group_rect, &group_dec_cache->blur_x,
+                   &group_dec_cache->ac);
+  }
+
+  PIK_ASSERT(idct_rect.xsize() == DivCeil(xsize_blocks * N, downsample));
+  PIK_ASSERT(idct_rect.ysize() == DivCeil(ysize_blocks * N, downsample));
+  InverseIntegralTransform(xsize_blocks, ysize_blocks, group_dec_cache->ac,
+                           frame_dec_cache->ac_strategy, block_group_rect, idct,
+                           idct_rect, downsample);
+
+  if (aux_out && aux_out->testing_aux.ac_prediction != nullptr) {
+    PROFILER_ZONE("Subtract ac_prediction");
+    Subtract(group_dec_cache->ac, *aux_out->testing_aux.ac_prediction,
+             aux_out->testing_aux.ac_prediction);
+    ZeroDcValues(aux_out->testing_aux.ac_prediction,
+                 frame_dec_cache->ac_strategy);
+  }
+}
+
+namespace {
+
+Status DoAdaptiveReconstruction(const Image3F &idct,
+                                const FrameHeader &frame_header,
+                                const Quantizer &quantizer,
+                                FrameDecCache *frame_dec_cache,
+                                PikInfo *aux_out, Image3F *PIK_RESTRICT out) {
+  // Since no adaptive reconstruction would want us to return the `idct`
+  // parameter as `out`, which would lead to either a copy or a new memory
+  // handling strategy, we disallow it and require callers to avoid it.
+  PIK_CHECK(frame_header.have_adaptive_reconstruction);
+
+  AdaptiveReconstructionAux *ar_aux =
+      aux_out ? &aux_out->adaptive_reconstruction_aux : nullptr;
+
+  const Image3F *smoothed_ptr;
+  Image3F smoothed;
+  // If no gaborish, the smoothed and non-smoothed inputs are the same.
+  if (frame_header.gaborish == GaborishStrength::kOff) {
+    smoothed_ptr = &idct;
+  } else {
+    PIK_RETURN_IF_ERROR(
+        ConvolveGaborish(idct, frame_header.gaborish, nullptr, &smoothed));
+    smoothed_ptr = &smoothed;
+  }
+
+  *out = AdaptiveReconstruction(
+      *smoothed_ptr, idct, quantizer, frame_dec_cache->raw_quant_field,
+      frame_dec_cache->dequant_control_field, frame_dec_cache->dequant_map,
+      frame_dec_cache->ar_sigma_lut_ids, frame_dec_cache->ac_strategy,
+      frame_header.epf_params, ar_aux);
+  return true;
+}
+
+} // namespace
+
+Status FinalizeFrameDecoding(Image3F *PIK_RESTRICT idct, size_t xsize,
+                             size_t ysize, const FrameHeader &frame_header,
+                             const NoiseParams &noise_params,
+                             const Quantizer &quantizer,
+                             const BlockDictionary &dictionary,
+                             FrameDecCache *frame_dec_cache, PikInfo *aux_out,
+                             size_t downsample) {
+  if (downsample == 1 && frame_header.have_adaptive_reconstruction) {
+    Image3F reconstructed;
+    PIK_RETURN_IF_ERROR(DoAdaptiveReconstruction(*idct, frame_header, quantizer,
+                                                 frame_dec_cache, aux_out,
+                                                 &reconstructed));
+    *idct = std::move(reconstructed);
+  }
+
+  ApplyForwardBilinear(idct, downsample);
+
+  dictionary.AddTo(idct, downsample);
+
+  if (downsample == 1) {
+    if (frame_header.gaborish != GaborishStrength::kOff) {
+      Image3F gaborished;
+      PIK_RETURN_IF_ERROR(
+          ConvolveGaborish(*idct, frame_header.gaborish, nullptr, &gaborished));
+      *idct = std::move(gaborished);
+    }
+
+    if (frame_header.flags & FrameHeader::kNoise) {
+      PROFILER_ZONE("AddNoise");
+      AddNoise(noise_params, idct);
+    }
+  }
+
+  idct->ShrinkTo(DivCeil(xsize, downsample), DivCeil(ysize, downsample));
+  OpsinToLinear(idct);
+
+  return true;
+}
+
+} // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/compressed_image.h b/codec/L2/demos/pikEnc/host/pik/compressed_image.h
new file mode 100755
index 0000000000..62830435ca
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/compressed_image.h
@@ -0,0 +1,128 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_COMPRESSED_IMAGE_H_
+#define PIK_COMPRESSED_IMAGE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "pik/adaptive_reconstruction.h"
+#include "pik/bit_reader.h"
+#include "pik/block_dictionary.h"
+#include "pik/color_correlation.h"
+#include "pik/common.h"
+#include "pik/compressed_image_fwd.h"
+#include "pik/data_parallel.h"
+#include "pik/headers.h"
+#include "pik/image.h"
+#include "pik/multipass_handler.h"
+#include "pik/noise.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_info.h"
+#include "pik/pik_params.h"
+#include "pik/quantizer.h"
+
+#include <ap_int.h>
+#include <hls_stream.h>
+
+// Methods to encode (decode) an image into (from) the bit stream:
+// initialization of per-pass information and per-group information, actual
+// computation of quantized coefficients, and encoding, plus corresponding
+// methods for the decoder.
+
+namespace pik {
+
+struct GradientMap;
+
+// Initialize per-pass information.
+SIMD_ATTR void InitializeFrameEncCache(
+    const FrameHeader& frame_header, const Image3F& opsin_full,
+    const AcStrategyImage& ac_strategy, const Quantizer& quantizer,
+    const ColorCorrelationMap& cmap, const BlockDictionary& dictionary,
+    FrameEncCache* frame_enc_cache, PikInfo* aux_out);
+
+// Initializes the encoder cache, setting parameters from the headers,
+// setting up the `coeffs` and `dc_init` images in enc_cache.
+SIMD_ATTR void InitializeEncCache(const FrameHeader& frame_header,
+                                  const GroupHeader& group_header,
+                                  const FrameEncCache& frame_enc_cache,
+                                  const Rect& group_rect, EncCache* enc_cache);
+
+// Computes quantized coefficients from the non-quantized ones already present
+// in enc_cache.
+SIMD_ATTR void ComputeCoefficients(const Quantizer& quantizer,
+                                   const ColorCorrelationMap& cmap,
+                                   const Rect& cmap_rect,
+                                   const FrameEncCache& frame_enc_cache,
+                                   EncCache* enc_cache,
+                                   PikInfo* aux_out = nullptr);
+
+// Encodes AC quantized coefficients from the given encoder cache.
+PaddedBytes EncodeToBitstream(const EncCache& cache, const Rect& rect,
+                              const Quantizer& quantizer,
+                              const NoiseParams& noise_params, bool fast_mode,
+                              MultipassHandler* handler,
+                              PikInfo* info = nullptr);
+
+// Encodes AC quantized coefficients from the given encoder cache.
+PaddedBytes hls_EncodeToBitstream(const EncCache& cache, const Rect& rect,
+                              const Quantizer& quantizer,
+                              const NoiseParams& noise_params, bool fast_mode,
+                              MultipassHandler* handler,
+                              PikInfo* info = nullptr,
+							  ap_uint<32> *ac_x=0,
+							  ap_uint<32> *ac_y=0,
+							  ap_uint<32> *ac_b=0,
+							  ap_uint<32> *k2_order=0);
+
+// Decodes AC coefficients from the bit stream, populating the AC
+// fields of the decoder cache, and the corresponding rectangles in the global
+// information (quant_field and ac_strategy) in the per-pass decoder cache.
+template <bool first>
+bool DecodeFromBitstream(const FrameHeader& frame_header,
+                         const GroupHeader& header,
+                         const PaddedBytes& compressed, BitReader* reader,
+                         const Rect& group_rect, MultipassHandler* handler,
+                         const size_t xsize_blocks, const size_t ysize_blocks,
+                         const ColorCorrelationMap& cmap, const Rect& cmap_rect,
+                         NoiseParams* noise_params, const Quantizer& quantizer,
+                         FrameDecCache* PIK_RESTRICT frame_dec_cache,
+                         GroupDecCache* PIK_RESTRICT group_dec_cache,
+                         PikInfo* aux_out);
+
+// Dequantizes the provided quantized_ac image into the decoder cache. Used in
+// the encoder loop in adaptive_quantization.cc
+void DequantImageAC(const Quantizer& quantizer, const ColorCorrelationMap& cmap,
+                    const Rect& cmap_rect, const Image3S& quantized_ac,
+                    FrameDecCache* PIK_RESTRICT frame_dec_cache,
+                    GroupDecCache* PIK_RESTRICT group_dec_cache,
+                    const Rect& group_rect, PikInfo* aux_out);
+
+// Applies predictions to de-quantized AC coefficients, copies DC coefficients
+// into AC, and does IDCT. Writes opsin IDCT values into `idct:idct_rect`.
+void ReconOpsinImage(const FrameHeader& frame_header, const GroupHeader& header,
+                     const Quantizer& quantizer, const Rect& block_group_rect,
+                     FrameDecCache* PIK_RESTRICT frame_dec_cache,
+                     GroupDecCache* PIK_RESTRICT group_dec_cache,
+                     Image3F* PIK_RESTRICT idct, const Rect& idct_rect,
+                     PikInfo* aux_out = nullptr, size_t downsample = 1);
+
+// Finalizes the decoding of a pass by running per-pass post processing:
+// smoothing and adaptive reconstruction. Writes linear sRGB to `idct` and
+// shrinks it to `x/ysize` to undo prior padding.
+// TODO(janwas): move NoiseParams into FrameHeader.
+Status FinalizeFrameDecoding(Image3F* PIK_RESTRICT idct, size_t xsize,
+                             size_t ysize, const FrameHeader& frame_header,
+                             const NoiseParams& noise_params,
+                             const Quantizer& quantizer,
+                             const BlockDictionary& block_dictionary,
+                             FrameDecCache* frame_dec_cache,
+                             PikInfo* aux_out = nullptr, size_t downsample = 1);
+
+}  // namespace pik
+
+#endif  // PIK_COMPRESSED_IMAGE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/compressed_image_fwd.h b/codec/L2/demos/pikEnc/host/pik/compressed_image_fwd.h
new file mode 100755
index 0000000000..f1699ef531
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/compressed_image_fwd.h
@@ -0,0 +1,246 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_COMPRESSED_IMAGE_FWD_H_
+#define PIK_COMPRESSED_IMAGE_FWD_H_
+
+#include "pik/ac_strategy.h"
+#include "pik/common.h"
+#include "pik/data_parallel.h"
+#include "pik/gauss_blur.h"
+#include "pik/image.h"
+
+namespace pik {
+
+struct GradientMap {
+  Image3F gradient;  // corners of the gradient map tiles
+  Image3B apply;     // gradient application mask.
+
+  // Size of the DC image
+  size_t xsize_dc;
+  size_t ysize_dc;
+
+  // Size of the gradient map (amount of corner points of tiles, one larger than
+  // amount of tiles in x and y direction)
+  size_t xsize;
+  size_t ysize;
+
+  bool grayscale;
+};
+
+// Contains global information that are computed once per pass.
+struct FrameEncCache {
+  // DCT coefficients for the full image
+  Image3F coeffs;
+
+  Image3F dc_dec;
+  Image3S dc;
+
+  // Enable new Lossless codec for DC. This flag exists only temporarily
+  // as long as both old and new implementation co-exist, and eventually
+  // only the new implementation should remain.
+  bool use_new_dc = false;
+
+  bool use_gradient;
+  bool grayscale_opt = false;
+
+  // Gradient map, if used.
+  GradientMap gradient;
+
+  DequantMatrices matrices{/*need_inv_table=*/true};
+
+  // Control field for dequant matrix selection.
+  ImageB dequant_control_field;
+
+  // Map of dequant control field and adaptive quantization level to
+  // dequantization table.
+  uint8_t dequant_map[kMaxQuantControlFieldValue][256] = {};
+
+  // AC strategy.
+  AcStrategyImage ac_strategy;
+
+  // Per-block indices into LUT for adaptive reconstruction's blur strength.
+  ImageB ar_sigma_lut_ids;
+};
+
+// Working area for ComputeCoefficients
+struct EncCache {
+  bool initialized = false;
+
+  bool grayscale_opt = false;
+
+  size_t xsize_blocks;
+  size_t ysize_blocks;
+
+  // ComputePredictionResiduals
+  Image3F dc_dec;
+
+  // Working value, copied from coeffs_init.
+  Image3F coeffs;
+
+  // AC strategy.
+  AcStrategyImage ac_strategy;
+
+  // Every cell with saliency > threshold will be considered as 'salient'.
+  float saliency_threshold;
+  // Debug parameter: If true, drop non-salient AC part in progressive encoding.
+  bool saliency_debug_skip_nonsalient;
+
+  // Enable/disable predictions. Set in ComputeInitialCoefficients from the
+  // pass header. Current usage is only in progressive mode.
+  bool predict_lf;
+  bool predict_hf;
+
+  // Output values
+  Image3S ac;          // 64 coefs per block, first (DC) is ignored.
+  ImageI quant_field;  // Final values, to be encoded in stream.
+};
+
+// Information that is used at the pass level. All the images here should be
+// accessed through a group rect (either with block units or pixel units).
+struct FrameDecCache {
+  // Enable new Lossless codec for DC. This flag exists only temporarily
+  // as long as both old and new implementation co-exist, and eventually
+  // only the new implementation should remain.
+  bool use_new_dc = false;
+
+  bool grayscale;
+
+  // Full DC of the pass. Note that this will be split in *AC* group sized
+  // chunks for AC predictions (DC group size != AC group size).
+  Image3F dc;
+
+  GradientMap gradient;
+
+  // Raw quant field to be used for adaptive reconstruction.
+  ImageI raw_quant_field;
+
+  AcStrategyImage ac_strategy;
+
+  DequantMatrices matrices{/*need_inv_table=*/false};
+
+  // Control field for dequant matrix selection.
+  ImageB dequant_control_field;
+
+  // Map of dequant control field and adaptive quantization level to
+  // dequantization table.
+  uint8_t dequant_map[kMaxQuantControlFieldValue][256] = {};
+
+  // Per-block indices into LUT for adaptive reconstruction's blur strength.
+  ImageB ar_sigma_lut_ids;
+};
+
+// Temp images required for decoding a single group. Reduces memory allocations
+// for large images because we only initialize min(#threads, #groups) instances.
+struct GroupDecCache {
+  // Separate from InitOnce because the caller only knows the DC group size.
+  void InitDecodeDCGroup(size_t xsize_blocks, size_t ysize_blocks) {
+    if (quantized_dc.xsize() == 0) {
+      quantized_dc = Image3S(kDcGroupDimInBlocks, kDcGroupDimInBlocks);
+      dc_y = ImageS(kDcGroupDimInBlocks, kDcGroupDimInBlocks);
+      dc_xz_residuals = ImageS(kDcGroupDimInBlocks * 2, kDcGroupDimInBlocks);
+      dc_xz_expanded = ImageS(kDcGroupDimInBlocks * 2, kDcGroupDimInBlocks);
+    }
+
+    quantized_dc.ShrinkTo(xsize_blocks, ysize_blocks);
+    dc_y.ShrinkTo(xsize_blocks, ysize_blocks);
+    dc_xz_residuals.ShrinkTo(xsize_blocks * 2, ysize_blocks);
+    dc_xz_expanded.ShrinkTo(xsize_blocks * 2, ysize_blocks);
+    ac_strategy_raw = ImageB(kDcGroupDimInBlocks, kDcGroupDimInBlocks);
+  }
+
+  void InitOnce(size_t xsize_blocks, size_t ysize_blocks) {
+    if (num_nzeroes.xsize() == 0) {
+      // Allocate enough for a whole tile - partial tiles on the right/bottom
+      // border just use a subset. The valid size is passed via Rect.
+
+      ac = Image3F(kGroupDimInBlocks * kDCTBlockSize, kGroupDimInBlocks);
+      dc = Image3F(kGroupDimInBlocks + 2, kGroupDimInBlocks + 2);
+
+      quantized_ac =
+          Image3S(kTileDimInBlocks * kDCTBlockSize, kTileDimInBlocks);
+      num_nzeroes = Image3I(kTileDimInBlocks, kTileDimInBlocks);
+
+      const size_t xsize_tiles = DivCeil(kGroupDimInBlocks, kTileDimInBlocks);
+      const size_t ysize_tiles = DivCeil(kGroupDimInBlocks, kTileDimInBlocks);
+      tile_stage = ImageB(xsize_tiles + 1, ysize_tiles + 1);
+
+      const size_t kWidth2x2 = (kGroupDimInBlocks + 2) * 2;
+      const size_t kHeight2x2 = (kGroupDimInBlocks + 2) * 2;
+
+      // TODO(user): do not allocate when !predict_hf
+      pred2x2 = Image3F(kWidth2x2, kHeight2x2);
+      // TODO(user): do not allocate when !predict_lf
+      lf2x2 = Image3F(kWidth2x2, kHeight2x2);
+      llf = Image3F(kGroupDimInBlocks + 2, kGroupDimInBlocks + 2);
+
+      blur_x = ImageF(kGroupDimInBlocks * 8, kGroupDimInBlocks * 2 + 2);
+    }
+
+    // These images need to have correct sizes (used as loop bounds):
+
+    // Ensure ShrinkTo is safe.
+    PIK_ASSERT(xsize_blocks <= kGroupDimInBlocks);
+    PIK_ASSERT(ysize_blocks <= kGroupDimInBlocks);
+
+    dc.ShrinkTo(xsize_blocks + 2, ysize_blocks + 2);
+    ac.ShrinkTo(xsize_blocks * kDCTBlockSize, ysize_blocks);
+
+    const size_t xsize2x2 = (xsize_blocks + 2) * 2;
+    const size_t ysize2x2 = (ysize_blocks + 2) * 2;
+
+    pred2x2.ShrinkTo(xsize2x2, ysize2x2);
+    llf.ShrinkTo(xsize_blocks + 2, ysize_blocks + 2);
+    lf2x2.ShrinkTo(xsize2x2, ysize2x2);
+
+    blur_x.ShrinkTo(xsize_blocks * 8, ysize_blocks * 2 + 2);
+  }
+
+  // Dequantized output produced by DecodeFromBitstream, DequantImage or
+  // ExtractGroupDC.
+  // TODO(veluca): replace the DC with a pointer + a rect to avoid copies.
+  Image3F dc;
+  Image3F ac;
+
+  // Decode
+  Image3S quantized_ac;
+  // DequantAC
+  Image3I num_nzeroes;
+
+  // DecodeDCGroup
+  Image3S quantized_dc;
+  // TODO(janwas): remove these after use_new_dc
+  ImageS dc_y;
+  ImageS dc_xz_residuals;
+  ImageS dc_xz_expanded;
+
+  ImageB ac_strategy_raw;
+
+  // ReconOpsinImage
+  Image3F pred2x2;
+  Image3F llf;
+  Image3F lf2x2;
+  ImageB tile_stage;
+
+  // AddPredictions
+  ImageF blur_x;
+};
+
+template <size_t N>
+std::vector<float> DCfiedGaussianKernel(float sigma) {
+  std::vector<float> result(3, 0.0);
+  std::vector<float> hires = GaussianKernel<float>(N, sigma);
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < hires.size(); j++) {
+      result[(i + j) / N] += hires[j] / N;
+    }
+  }
+  return result;
+}
+
+}  // namespace pik
+
+#endif  // PIK_COMPRESSED_IMAGE_FWD_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/context_map_decode.cc b/codec/L2/demos/pikEnc/host/pik/context_map_decode.cc
new file mode 100755
index 0000000000..af7cbcf1bd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/context_map_decode.cc
@@ -0,0 +1,120 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/context_map_decode.h"
+
+#include <cstring>
+#include <vector>
+
+#include "pik/huffman_decode.h"
+#include "pik/status.h"
+
+namespace pik {
+
+namespace {
+
+void MoveToFront(uint8_t* v, uint8_t index) {
+  uint8_t value = v[index];
+  uint8_t i = index;
+  for (; i; --i) v[i] = v[i - 1];
+  v[0] = value;
+}
+
+void InverseMoveToFrontTransform(uint8_t* v, int v_len) {
+  uint8_t mtf[256];
+  int i;
+  for (i = 0; i < 256; ++i) {
+    mtf[i] = static_cast<uint8_t>(i);
+  }
+  for (i = 0; i < v_len; ++i) {
+    uint8_t index = v[i];
+    v[i] = mtf[index];
+    if (index) MoveToFront(mtf, index);
+  }
+}
+
+// Decodes a number in the range [0..255], by reading 1 - 11 bits.
+inline int DecodeVarLenUint8(BitReader* input) {
+  if (input->ReadBits(1)) {
+    int nbits = static_cast<int>(input->ReadBits(3));
+    if (nbits == 0) {
+      return 1;
+    } else {
+      return static_cast<int>(input->ReadBits(nbits)) + (1 << nbits);
+    }
+  }
+  return 0;
+}
+
+bool VerifyContextMap(const std::vector<uint8_t>& context_map,
+                      const size_t num_htrees) {
+  std::vector<bool> have_htree(num_htrees);
+  int num_found = 0;
+  for (int i = 0; i < context_map.size(); ++i) {
+    const int htree = context_map[i];
+    if (htree >= num_htrees) {
+      return PIK_FAILURE("Invalid histogram index in context map.");
+    }
+    if (!have_htree[htree]) {
+      have_htree[htree] = true;
+      ++num_found;
+    }
+  }
+  if (num_found != num_htrees) {
+    return PIK_FAILURE("Incomplete context map.");
+  }
+  return true;
+}
+
+}  // namespace
+
+bool DecodeContextMap(std::vector<uint8_t>* context_map, size_t* num_htrees,
+                      BitReader* input) {
+  *num_htrees = DecodeVarLenUint8(input) + 1;
+
+  if (*num_htrees <= 1) {
+    memset(&(*context_map)[0], 0, context_map->size());
+    return true;
+  }
+
+  int max_run_length_prefix = 0;
+  int use_rle_for_zeros = input->ReadBits(1);
+  if (use_rle_for_zeros) {
+    max_run_length_prefix = input->ReadBits(4) + 1;
+  }
+  HuffmanDecodingData entropy;
+  if (!entropy.ReadFromBitStream(input)) {
+    return PIK_FAILURE("Invalid histogram data.");
+  }
+  HuffmanDecoder decoder;
+  int i;
+  for (i = 0; i < context_map->size();) {
+    int code;
+    code = decoder.ReadSymbol(entropy, input);
+    if (code == 0) {
+      (*context_map)[i] = 0;
+      ++i;
+    } else if (code <= max_run_length_prefix) {
+      int reps = 1 + (1 << code) + input->ReadBits(code);
+      while (--reps) {
+        if (i >= context_map->size()) {
+          return PIK_FAILURE("Invalid context map data.");
+        }
+        (*context_map)[i] = 0;
+        ++i;
+      }
+    } else {
+      (*context_map)[i] = static_cast<uint8_t>(code - max_run_length_prefix);
+      ++i;
+    }
+  }
+  if (input->ReadBits(1)) {
+    InverseMoveToFrontTransform(&(*context_map)[0], context_map->size());
+  }
+  return VerifyContextMap(*context_map, *num_htrees);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/context_map_decode.h b/codec/L2/demos/pikEnc/host/pik/context_map_decode.h
new file mode 100755
index 0000000000..43f7305a6f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/context_map_decode.h
@@ -0,0 +1,27 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CONTEXT_MAP_DECODE_H_
+#define PIK_CONTEXT_MAP_DECODE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <vector>
+
+#include "pik/bit_reader.h"
+
+namespace pik {
+
+// Reads the context map from the bit stream. On calling this function,
+// context_map->size() must be the number of possible context ids.
+// Sets *num_htrees to the number of different histogram ids in
+// *context_map.
+bool DecodeContextMap(std::vector<uint8_t>* context_map, size_t* num_htrees,
+                      BitReader* input);
+
+}  // namespace pik
+
+#endif  // PIK_CONTEXT_MAP_DECODE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/context_map_encode.cc b/codec/L2/demos/pikEnc/host/pik/context_map_encode.cc
new file mode 100755
index 0000000000..03e7df93aa
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/context_map_encode.cc
@@ -0,0 +1,165 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// Library to encode the context map.
+
+#include "pik/context_map_encode.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+#include "pik/bits.h"
+#include "pik/compiler_specific.h"
+#include "pik/huffman_encode.h"
+#include "pik/status.h"
+#include "pik/write_bits.h"
+
+namespace pik {
+
+namespace {
+
+void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
+  if (n == 0) {
+    WriteBits(1, 0, storage_ix, storage);
+  } else {
+    WriteBits(1, 1, storage_ix, storage);
+    size_t nbits = FloorLog2Nonzero(static_cast<uint64_t>(n));
+    WriteBits(3, nbits, storage_ix, storage);
+    WriteBits(nbits, n - (1ULL << nbits), storage_ix, storage);
+  }
+}
+
+size_t IndexOf(const std::vector<uint8_t>& v, uint8_t value) {
+  size_t i = 0;
+  for (; i < v.size(); ++i) {
+    if (v[i] == value) return i;
+  }
+  return i;
+}
+
+void MoveToFront(std::vector<uint8_t>* v, size_t index) {
+  uint8_t value = (*v)[index];
+  for (size_t i = index; i != 0; --i) {
+    (*v)[i] = (*v)[i - 1];
+  }
+  (*v)[0] = value;
+}
+
+std::vector<uint8_t> MoveToFrontTransform(const std::vector<uint8_t>& v) {
+  if (v.empty()) return v;
+  uint8_t max_value = *std::max_element(v.begin(), v.end());
+  std::vector<uint8_t> mtf(max_value + 1);
+  for (size_t i = 0; i <= max_value; ++i) mtf[i] = i;
+  std::vector<uint8_t> result(v.size());
+  for (size_t i = 0; i < v.size(); ++i) {
+    size_t index = IndexOf(mtf, v[i]);
+    PIK_ASSERT(index < mtf.size());
+    result[i] = static_cast<uint8_t>(index);
+    MoveToFront(&mtf, index);
+  }
+  return result;
+}
+
+// Finds runs of zeros in v_in and replaces them with a prefix code of the run
+// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
+// shifted by *max_length_prefix. Will not create prefix codes bigger than the
+// initial value of *max_run_length_prefix. The prefix code of run length L is
+// simply Log2Floor(L) and the number of extra bits is the same as the prefix
+// code.
+void RunLengthCodeZeros(const std::vector<uint8_t>& v_in,
+                        uint32_t* max_run_length_prefix,
+                        std::vector<uint32_t>* v_out,
+                        std::vector<uint32_t>* extra_bits) {
+  uint32_t max_reps = 0;
+  for (size_t i = 0; i < v_in.size();) {
+    for (; i < v_in.size() && v_in[i] != 0; ++i) {
+    }
+    uint32_t reps = 0;
+    for (; i < v_in.size() && v_in[i] == 0; ++i) {
+      ++reps;
+    }
+    max_reps = std::max(reps, max_reps);
+  }
+  uint32_t max_prefix = max_reps > 0 ? FloorLog2Nonzero(max_reps) : 0;
+  max_prefix = std::min(max_prefix, *max_run_length_prefix);
+  *max_run_length_prefix = max_prefix;
+  for (size_t i = 0; i < v_in.size();) {
+    if (v_in[i] != 0) {
+      v_out->push_back(v_in[i] + *max_run_length_prefix);
+      extra_bits->push_back(0);
+      ++i;
+    } else {
+      uint32_t reps = 1;
+      for (size_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
+        ++reps;
+      }
+      i += reps;
+      while (reps != 0) {
+        if (reps < (2u << max_prefix)) {
+          uint32_t run_length_prefix = FloorLog2Nonzero(reps);
+          v_out->push_back(run_length_prefix);
+          extra_bits->push_back(reps - (1u << run_length_prefix));
+          break;
+        } else {
+          v_out->push_back(max_prefix);
+          extra_bits->push_back((1u << max_prefix) - 1u);
+          reps -= (2u << max_prefix) - 1u;
+        }
+      }
+    }
+  }
+}
+
+}  // namespace
+
+void EncodeContextMap(const std::vector<uint8_t>& context_map,
+                      size_t num_histograms, size_t* storage_ix,
+                      uint8_t* storage) {
+  StoreVarLenUint8(num_histograms - 1, storage_ix, storage);
+
+  if (num_histograms == 1) {
+    return;
+  }
+  // Alphabet size is 256 + 16 = 272. (We can have 256 clusters and 16 run
+  // length codes).
+  static const int kAlphabetSize = 272;
+
+  std::vector<uint8_t> transformed_symbols = MoveToFrontTransform(context_map);
+  std::vector<uint32_t> rle_symbols;
+  std::vector<uint32_t> extra_bits;
+  uint32_t max_run_length_prefix = 6;
+  RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix, &rle_symbols,
+                     &extra_bits);
+  uint32_t symbol_histogram[kAlphabetSize];
+  memset(symbol_histogram, 0, sizeof(symbol_histogram));
+  for (size_t i = 0; i < rle_symbols.size(); ++i) {
+    ++symbol_histogram[rle_symbols[i]];
+  }
+  bool use_rle = max_run_length_prefix > 0;
+  WriteBits(1, use_rle, storage_ix, storage);
+  if (use_rle) {
+    WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+  }
+  uint8_t bit_depths[kAlphabetSize];
+  uint16_t bit_codes[kAlphabetSize];
+  memset(bit_depths, 0, sizeof(bit_depths));
+  memset(bit_codes, 0, sizeof(bit_codes));
+  BuildAndStoreHuffmanTree(symbol_histogram,
+                           num_histograms + max_run_length_prefix, bit_depths,
+                           bit_codes, storage_ix, storage);
+  for (size_t i = 0; i < rle_symbols.size(); ++i) {
+    WriteBits(bit_depths[rle_symbols[i]], bit_codes[rle_symbols[i]], storage_ix,
+              storage);
+    if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
+      WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
+    }
+  }
+  WriteBits(1, 1, storage_ix, storage);  // use move-to-front
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/context_map_encode.h b/codec/L2/demos/pikEnc/host/pik/context_map_encode.h
new file mode 100755
index 0000000000..08ac5f5c11
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/context_map_encode.h
@@ -0,0 +1,24 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CONTEXT_MAP_ENCODE_H_
+#define PIK_CONTEXT_MAP_ENCODE_H_
+
+#include <stdint.h>
+#include <cstddef>
+#include <vector>
+
+namespace pik {
+
+// Encodes the given context map to the bit stream. The number of different
+// histogram ids is given by num_histograms.
+void EncodeContextMap(const std::vector<uint8_t>& context_map,
+                      size_t num_histograms, size_t* storage_ix,
+                      uint8_t* storage);
+
+}  // namespace pik
+
+#endif  // PIK_CONTEXT_MAP_ENCODE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/convolve.h b/codec/L2/demos/pikEnc/host/pik/convolve.h
new file mode 100755
index 0000000000..984abd491f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/convolve.h
@@ -0,0 +1,1619 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CONVOLVE_H_
+#define PIK_CONVOLVE_H_
+
+// Fast SIMD 2D convolution.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <cassert>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+
+#include "pik/compiler_specific.h"
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include "pik/image_ops.h"
+#include "pik/profiler.h"
+#include "pik/simd/simd.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Usable by any 3x3 kernel; applied as-is without flipping.
+struct Weights3x3 {
+  // top/middle/bottom left/center/right, replicated 4x via SIMD_REP4.
+  float tl[4];
+  float tc[4];
+  float tr[4];
+  float ml[4];
+  float mc[4];
+  float mr[4];
+  float bl[4];
+  float bc[4];
+  float br[4];
+};
+
+struct WeightsSeparable5 {
+  // Horizontal 1D, distances 0..2, each replicated 4x.
+  float horz[3 * 4];
+  float vert[3 * 4];
+};
+
+// For code-folding.
+namespace kernel {
+
+// Holds weights computed at runtime (e.g. inverse of another kernel).
+class Variable3 {
+ public:
+  explicit Variable3(const float tl, const float tc, const float tr,
+                     const float ml, const float mc, const float mr,
+                     const float bl, const float bc, const float br) {
+    for (size_t i = 0; i < 4; ++i) {
+      weights_.tl[i] = tl;
+      weights_.tc[i] = tc;
+      weights_.tr[i] = tr;
+      weights_.ml[i] = ml;
+      weights_.mc[i] = mc;
+      weights_.mr[i] = mr;
+      weights_.bl[i] = bl;
+      weights_.bc[i] = bc;
+      weights_.br[i] = br;
+    }
+  }
+
+  PIK_INLINE const Weights3x3& Weights() const { return weights_; }
+
+ private:
+  Weights3x3 weights_;
+};
+
+// Approximation of the Laplacian.
+struct Laplacian3 {
+  PIK_INLINE const Weights3x3& Weights() const {
+    constexpr float w0 = -4.0f;
+    constexpr float w1 = 1.0f;
+    constexpr float w2 = 0.0f;
+    static constexpr Weights3x3 weights = {
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)},
+        {SIMD_REP4(w1)}, {SIMD_REP4(w0)}, {SIMD_REP4(w1)},
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+
+// Concentrates energy in low-frequency components (e.g. for antialiasing).
+struct Lowpass3 {
+  PIK_INLINE const Weights3x3& Weights() const {
+    // Computed by research/convolve_weights.py's cubic spline approximations of
+    // prolate spheroidal wave functions.
+    constexpr float w0 = 0.36208932f;
+    constexpr float w1 = 0.12820096f;
+    constexpr float w2 = 0.03127668f;
+    static constexpr Weights3x3 weights = {
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)},
+        {SIMD_REP4(w1)}, {SIMD_REP4(w0)}, {SIMD_REP4(w1)},
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+
+struct Lowpass5 {
+  PIK_INLINE const WeightsSeparable5& Weights() const {
+    constexpr float w0 = 0.41714928f;
+    constexpr float w1 = 0.25539268f;
+    constexpr float w2 = 0.03603267f;
+    static constexpr WeightsSeparable5 weights = {
+        {SIMD_REP4(w0), SIMD_REP4(w1), SIMD_REP4(w2)},
+        {SIMD_REP4(w0), SIMD_REP4(w1), SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+
+struct Gaussian5Sigma1 {
+  PIK_INLINE const WeightsSeparable5& Weights() const {
+    constexpr float w0 = 0.38774f;
+    constexpr float w1 = 0.24477f;
+    constexpr float w2 = 0.06136f;
+    static constexpr WeightsSeparable5 weights = {
+        {SIMD_REP4(w0), SIMD_REP4(w1), SIMD_REP4(w2)},
+        {SIMD_REP4(w0), SIMD_REP4(w1), SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+
+struct Gaussian5Sigma2 {
+  PIK_INLINE const WeightsSeparable5& Weights() const {
+    constexpr float w0 = 0.250301f;
+    constexpr float w1 = 0.221461f;
+    constexpr float w2 = 0.153388f;
+    static constexpr WeightsSeparable5 weights = {
+        {SIMD_REP4(w0), SIMD_REP4(w1), SIMD_REP4(w2)},
+        {SIMD_REP4(w0), SIMD_REP4(w1), SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+
+}  // namespace kernel
+
+// Non-vectorized implementations for validation.
+namespace slow {
+
+// Separable kernels, any radius.
+template <int64_t kRadius, class Wrap>
+class SeparableConvolution {
+ public:
+  template <class Kernel>
+  static void Run(const ImageF& in, const size_t xsize, const size_t ysize,
+                  const Kernel& kernel, ImageF* out) {
+    const float* horz_weights = &kernel.Weights().horz[0];
+    const float* vert_weights = &kernel.Weights().vert[0];
+    for (size_t y = 0; y < ysize; ++y) {
+      float* const PIK_RESTRICT row_out = out->Row(y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] =
+            ConvolvePixel(in, xsize, ysize, x, y, horz_weights, vert_weights);
+      }
+    }
+  }
+
+ private:
+  static float ConvolvePixel(const ImageF& in, const size_t xsize,
+                             const size_t ysize, const size_t x, const size_t y,
+                             const float* PIK_RESTRICT horz_weights,
+                             const float* PIK_RESTRICT vert_weights) {
+    float mul = 0.0f;
+    for (int dy = -kRadius; dy <= kRadius; ++dy) {
+      const float wy = vert_weights[std::abs(dy) * 4];
+      const size_t sy = Wrap()(y + dy, ysize);
+      PIK_CHECK(sy < ysize);
+      const float* const PIK_RESTRICT row = in.ConstRow(sy);
+      for (int dx = -kRadius; dx <= kRadius; ++dx) {
+        const float wx = horz_weights[std::abs(dx) * 4];
+        const size_t sx = Wrap()(x + dx, xsize);
+        PIK_CHECK(sx < xsize);
+        mul += row[sx] * wx * wy;
+      }
+    }
+    return mul;
+  }
+};
+
+// Weights i=0..2 are for Manhattan distance i from center.
+template <int64_t kRadius, class Wrap>
+struct Symmetric3x3Convolution {
+  static_assert(kRadius == 1, "Wrong kRadius");
+
+  template <class Kernel>
+  static void Run(const ImageF& in, const size_t xsize, const size_t ysize,
+                  const Kernel& kernel, ImageF* out) {
+    PIK_CHECK(xsize == out->xsize() && ysize == out->ysize());
+    const Weights3x3& weights = kernel.Weights();
+
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* const PIK_RESTRICT row_t = in.ConstRow(Wrap()(y - 1, ysize));
+      const float* const PIK_RESTRICT row_m = in.ConstRow(y);
+      const float* const PIK_RESTRICT row_b = in.ConstRow(Wrap()(y + 1, ysize));
+      float* const PIK_RESTRICT row_out = out->Row(y);
+
+      for (size_t x = 0; x < xsize; ++x) {
+        float mul = row_m[x] * weights.mc[0];
+        const int64_t xm1 = Wrap()(x - 1, xsize);
+        const int64_t xp1 = Wrap()(x + 1, xsize);
+        const float tl = row_t[xm1];
+        const float ml = row_m[xm1];
+        const float bl = row_b[xm1];
+        const float tr = row_t[xp1];
+        const float mr = row_m[xp1];
+        const float br = row_b[xp1];
+        mul += (row_t[x] + row_b[x] + ml + mr) * weights.tc[0];
+        mul += (tl + tr + bl + br) * weights.tl[0];
+        row_out[x] = mul;
+      }
+    }
+  }
+};
+
+template <int64_t kRadius, class Wrap>
+struct General3x3Convolution {
+  static_assert(kRadius == 1, "Wrong kRadius");
+
+  template <class Kernel>
+  static void Run(const ImageF& in, const size_t xsize, const size_t ysize,
+                  const Kernel& kernel, ImageF* out) {
+    PIK_CHECK(xsize == out->xsize() && ysize == out->ysize());
+    const Weights3x3& weights = kernel.Weights();
+
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* const PIK_RESTRICT row_t = in.ConstRow(Wrap()(y - 1, ysize));
+      const float* const PIK_RESTRICT row_m = in.ConstRow(y);
+      const float* const PIK_RESTRICT row_b = in.ConstRow(Wrap()(y + 1, ysize));
+      float* const PIK_RESTRICT row_out = out->Row(y);
+
+      for (size_t x = 0; x < xsize; ++x) {
+        const int64_t xm1 = Wrap()(x - 1, xsize);
+        const int64_t xp1 = Wrap()(x + 1, xsize);
+        const float tl = row_t[xm1];
+        const float ml = row_m[xm1];
+        const float bl = row_b[xm1];
+        const float tr = row_t[xp1];
+        const float mr = row_m[xp1];
+        const float br = row_b[xp1];
+        float r = 0.0f;
+        r += tl * weights.tl[0] + row_t[x] * weights.tc[0] + tr * weights.tr[0];
+        r += ml * weights.ml[0] + row_m[x] * weights.mc[0] + mr * weights.mr[0];
+        r += bl * weights.bl[0] + row_b[x] * weights.bc[0] + br * weights.br[0];
+        row_out[x] = r;
+      }
+    }
+  }
+
+  template <class Kernel>
+  static void Run(const Image3F& in, const size_t xsize, const size_t ysize,
+                  const Kernel& kernel, Image3F* out) {
+    for (int c = 0; c < 3; ++c) {
+      Run(in.Plane(c), xsize, ysize, kernel,
+          const_cast<ImageF*>(&out->Plane(c)));
+    }
+  }
+};
+
+// Slow N*R^2 algorithm in case weights are not separable, but avoids
+// bounds-checking overhead for interior pixels. Weights are the lower-right
+// quadrant of the kernel and need not be pre-normalized.
+template <int64_t kRadius, class Wrap>
+class SymmetricConvolution {
+ public:
+  static void Run(const ImageF& in, const size_t xsize, const size_t ysize,
+                  const float (&weights)[(kRadius + 1) * (kRadius + 1)],
+                  ImageF* out) {
+    // Normalize all weights (expand quadrant into entire kernel)
+    double sum = 0.0f;
+    for (int64_t ky = -kRadius; ky <= kRadius; ky++) {
+      const int64_t wy = std::abs(ky);
+      for (int64_t kx = -kRadius; kx <= kRadius; kx++) {
+        const int64_t wx = std::abs(kx);
+        sum += weights[wy * (kRadius + 1) + wx];
+      }
+    }
+    const float mul = sum == 0.0f ? 1.0f : 1.0 / sum;
+    float normalized[(kRadius + 1) * (kRadius + 1)];
+    for (size_t i = 0; i < (kRadius + 1) * (kRadius + 1); ++i) {
+      normalized[i] = weights[i] * mul;
+    }
+
+    int64_t iy = 0;
+    for (; iy < kRadius; iy++) {
+      ConvolveRow<Wrap>(in, xsize, ysize, iy, normalized, out);
+    }
+    for (; iy < ysize - kRadius; iy++) {
+      ConvolveRow<WrapUnchanged>(in, xsize, ysize, iy, normalized, out);
+    }
+    for (; iy < ysize; iy++) {
+      ConvolveRow<Wrap>(in, xsize, ysize, iy, normalized, out);
+    }
+  }
+
+  static void Run(const Image3F& in, const size_t xsize, const size_t ysize,
+                  const float (&weights)[(kRadius + 1) * (kRadius + 1)],
+                  Image3F* out) {
+    for (int c = 0; c < 3; ++c) {
+      Run(in.Plane(c), xsize, ysize, weights,
+          const_cast<ImageF*>(&out->Plane(c)));
+    }
+  }
+
+ private:
+  template <class WrapX, class WrapY>
+  static float ConvolvePixel(
+      const ImageF& in, const size_t xsize, const size_t ysize,
+      const int64_t ix, const int64_t iy,
+      const float (&weights)[(kRadius + 1) * (kRadius + 1)]) {
+    float sum = 0.0;
+
+    // ix: image; kx: kernel; wx: weight
+    for (int64_t ky = -kRadius; ky <= kRadius; ky++) {
+      const int64_t wy = std::abs(ky);
+      const int64_t y = WrapY()(iy + ky, ysize);
+      const float* PIK_RESTRICT row_in = in.ConstRow(y);
+
+      for (int64_t kx = -kRadius; kx <= kRadius; kx++) {
+        const int64_t wx = std::abs(kx);
+        const int64_t x = WrapX()(ix + kx, xsize);
+
+        sum += row_in[x] * weights[wy * (kRadius + 1) + wx];
+      }
+    }
+    return sum;
+  }
+
+  template <class WrapY>
+  static inline void ConvolveRow(
+      const ImageF& in, const size_t xsize, const size_t ysize,
+      const int64_t iy, const float (&weights)[(kRadius + 1) * (kRadius + 1)],
+      ImageF* PIK_RESTRICT out) {
+    float* PIK_RESTRICT row_out = out->Row(iy);
+    int64_t ix = 0;
+    for (; ix < kRadius; ix++) {
+      row_out[ix] =
+          ConvolvePixel<WrapMirror, WrapY>(in, xsize, ysize, ix, iy, weights);
+    }
+    for (; ix < xsize - kRadius; ix++) {
+      row_out[ix] = ConvolvePixel<WrapUnchanged, WrapY>(in, xsize, ysize, ix,
+                                                        iy, weights);
+    }
+    for (; ix < xsize; ix++) {
+      row_out[ix] =
+          ConvolvePixel<WrapMirror, WrapY>(in, xsize, ysize, ix, iy, weights);
+    }
+  }
+};
+
+}  // namespace slow
+
+// Synthesizes left/right neighbors from a vector of center pixels.
+class Neighbors {
+  using D = SIMD_FULL(float);
+  using V = D::V;
+  static const D d;
+
+ public:
+  // Returns l[i] == c[i - 1].
+  static SIMD_ATTR PIK_INLINE V L1(const V c, const V p) {
+    // For AVX-512: try permutex2var_ps.
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    // c = PONM'LKJI, p = Hxxx'xxxx
+    const V L_H = concat_lo_hi(c, p);
+    return combine_shift_right_bytes<12>(c, L_H);  // ONML'KJIH
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+    return p;
+#else
+    // c = LKJI, p = Hxxx
+    return combine_shift_right_bytes<12>(c, p);  // KJIH
+#endif
+  }
+
+  // Returns l[i] == c[Mirror(i - 1)].
+  static SIMD_ATTR PIK_INLINE V FirstL1(const V c) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    SIMD_ALIGN constexpr int lanes[8] = {0, 0, 1, 2, 3, 4, 5, 6};
+    const auto indices = set_table_indices(d, lanes);
+    // c = PONM'LKJI
+    return table_lookup_lanes(c, indices);  // ONML'KJII
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+    return c;
+#else
+    // c = LKJI
+    return V(_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(2, 1, 0, 0)));  // KJII
+#endif
+  }
+
+  // Returns l[i] == c[Mirror(i - 2)].
+  static SIMD_ATTR PIK_INLINE V FirstL2(const V c) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    SIMD_ALIGN constexpr int lanes[8] = {1, 0, 0, 1, 2, 3, 4, 5};
+    const auto indices = set_table_indices(d, lanes);
+    // c = PONM'LKJI
+    return table_lookup_lanes(c, indices);  // NMLK'JIIJ
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+    return setzero(d);  // unsupported, avoid calling this.
+#else
+    // c = LKJI
+    return V(_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(1, 0, 0, 1)));  // JIIJ
+#endif
+  }
+
+  // Returns r[i] == c[i + 1].
+  static SIMD_ATTR PIK_INLINE V R1(const V c, const V n) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    // c = PONM'LKJI, n = xxxx'xxxQ
+    const V Q_M = concat_lo_hi(n, c);             // Right-aligned (lower lane)
+    return combine_shift_right_bytes<4>(Q_M, c);  // QPON'MLKJ
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+    return n;
+#else
+    // c = LKJI, n = xxxM
+    return combine_shift_right_bytes<4>(n, c);  // MLKJ
+#endif
+  }
+
+  // Returns r[i] == c[i + 1].
+  static SIMD_ATTR PIK_INLINE V LastR1(const V c) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    SIMD_ALIGN constexpr uint32_t lanes[8] = {1, 2, 3, 4, 5, 6, 7, 7};
+    const auto indices = load(SIMD_FULL(uint32_t)(), lanes);
+    // c = PONM'LKJI
+    return V(_mm256_permutevar8x32_ps(c.raw, indices.raw));  // PPON'MLKJ
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+    return c;
+#else
+    // c = LKJI
+    const auto L = broadcast<3>(c);
+    return combine_shift_right_bytes<4>(L, c);  // LLKJ
+#endif
+  }
+};
+
+// Requires kRadius valid (mirrored or neighbor) columns on either side of
+// [0, xsize). It is also safe to load entire vectors. This behavior is
+// required by TFNode with Borders(>0). In other cases, this assumption requires
+// ConvolveT to PadImage, so LeftRightInvalid would be more efficient.
+struct LeftRightValid {};
+
+// No valid values outside [0, xsize), but the strategy may still safely load
+// the preceding vector, and/or round xsize up to the vector lane count. This
+// avoids needing PadImage.
+struct LeftRightInvalid {};
+
+// LeftRightInvalid requires xsize >= SIMD_FULL(float)::N + kConvolveMaxRadius.
+static constexpr size_t kConvolveMaxRadius = 3;
+
+// For use by set_table_indices.
+static inline const int32_t* MirrorLanes(const size_t mod) {
+  SIMD_FULL(float) d;
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  // last  part  mirrored
+  // 01234567| 76543210   loadedReg 76543210 mirroredReg 01234567
+  // 01234567|8 8765432   loadedReg 87654321 mirroredReg 23456788
+  // 01234567|89 987654   loadedReg 98765432 mirroredReg 45678998
+  // 01234567|89A A9876   loadedReg A9876543 mirroredReg 6789AA98
+  // 01234567|89AB BA98
+  // 01234567|89ABC CBA
+  // 01234567|89ABCD DC
+  // 01234567|89ABCDE E   loadedReg EDCBA987 mirroredReg EEDCBA98
+  SIMD_ALIGN static constexpr int32_t idx_lanes[d.N * d.N] = {
+      7, 6, 5, 4, 3, 2, 1, 0,  // 0
+      7, 7, 6, 5, 4, 3, 2, 1,  // 1
+      6, 7, 7, 6, 5, 4, 3, 2,  // 2
+      5, 6, 7, 7, 6, 5, 4, 3,  // 3
+      4, 5, 6, 7, 7, 6, 5, 4,  // 4
+      3, 4, 5, 6, 7, 7, 6, 5,  // 5
+      2, 3, 4, 5, 6, 7, 7, 6,  // 6
+      1, 2, 3, 4, 5, 6, 7, 7,  // 7
+  };
+  return idx_lanes + mod * d.N;
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+  (void)d;         // silence warning about unused d
+  return nullptr;  // do not call
+#else
+  // 0123| 3210   loadedReg 3210 mirroredReg 0123
+  // 0123|4 432   loadedReg 4321 mirroredReg 2344
+  // 0123|45 54   loadedReg 5432 mirroredReg 4554
+  // 0123|456 6   loadedReg 6543 mirroredReg 6654
+  SIMD_ALIGN static constexpr int32_t idx_lanes[d.N * d.N] = {
+      3, 2, 1, 0,  // 0
+      3, 3, 2, 1,  // 1
+      2, 3, 3, 2,  // 2
+      1, 2, 3, 3,  // 3
+  };
+  return idx_lanes + mod * d.N;
+#endif
+}
+
+namespace strategy {
+
+// 3x3 convolution by symmetric kernel with a single scan through the input.
+class Symmetric3 {
+  using D = SIMD_FULL(float);
+  using V = D::V;
+
+ public:
+  static constexpr int64_t kRadius = 1;
+
+  // Only accesses pixels in [0, xsize).
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightInvalid, const float* const PIK_RESTRICT row_m,
+      const size_t xsize, const int64_t stride, const WrapRow& wrap_row,
+      const Weights3x3& weights, float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    // Must load in advance - compiler doesn't understand load_dup128 and
+    // schedules them too late.
+    const V w0 = load_dup128(d, weights.mc);
+    const V w1 = load_dup128(d, weights.tc);
+    const V w2 = load_dup128(d, weights.tl);
+
+    // l, c, r = left, center, right. Leftmost vector: need FirstL1.
+    {
+      const V tc = load_unaligned(d, row_t + 0);
+      const V mc = load_unaligned(d, row_m + 0);
+      const V bc = load_unaligned(d, row_b + 0);
+      const V tl = Neighbors::FirstL1(tc);
+      const V tr = load_unaligned(d, row_t + 0 + 1);
+      const V ml = Neighbors::FirstL1(mc);
+      const V mr = load_unaligned(d, row_m + 0 + 1);
+      const V bl = Neighbors::FirstL1(bc);
+      const V br = load_unaligned(d, row_b + 0 + 1);
+      const V conv =
+          WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+      store(conv, d, row_out + 0);
+    }
+
+    // Loop as long as we can load enough new values:
+    size_t x = d.N;
+    for (; x + d.N + kRadius <= xsize; x += d.N) {
+      const auto conv = ConvolveValid(row_t, row_m, row_b, x, w0, w1, w2);
+      store(conv, d, row_out + x);
+    }
+
+    // For final (partial) vector:
+    const V tc = load_unaligned(d, row_t + x);
+    const V mc = load_unaligned(d, row_m + x);
+    const V bc = load_unaligned(d, row_b + x);
+
+    V tr, mr, br;
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    tr = tc;  // Single-lane => mirrored right neighbor = center value.
+    mr = mc;
+    br = bc;
+#else
+    if (kSizeModN == 0) {
+      // The above loop didn't handle the last vector because it needs an
+      // additional right neighbor (generated via mirroring).
+      auto mirror = set_table_indices(d, MirrorLanes(d.N - 1));
+      tr = table_lookup_lanes(tc, mirror);
+      mr = table_lookup_lanes(mc, mirror);
+      br = table_lookup_lanes(bc, mirror);
+    } else {
+      auto mirror = set_table_indices(d, MirrorLanes((xsize % d.N) - 1));
+      // Loads last valid value into uppermost lane and mirrors.
+      tr = table_lookup_lanes(load_unaligned(d, row_t + xsize - d.N), mirror);
+      mr = table_lookup_lanes(load_unaligned(d, row_m + xsize - d.N), mirror);
+      br = table_lookup_lanes(load_unaligned(d, row_b + xsize - d.N), mirror);
+    }
+#endif
+
+    const V tl = load_unaligned(d, row_t + x - 1);
+    const V ml = load_unaligned(d, row_m + x - 1);
+    const V bl = load_unaligned(d, row_b + x - 1);
+    const V conv = WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+    store(conv, d, row_out + x);
+  }
+
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightValid, const float* const PIK_RESTRICT row_m, const size_t xsize,
+      const int64_t stride, const WrapRow& wrap_row,
+      const Weights3x3& PIK_RESTRICT weights,
+      float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    const V w0 = load_dup128(d, weights.mc);
+    const V w1 = load_dup128(d, weights.tc);
+    const V w2 = load_dup128(d, weights.tl);
+
+    // l, c, r = left, center, right.
+    for (size_t x = 0; x < xsize; x += d.N) {
+      const V conv = ConvolveValid(row_t, row_m, row_b, x, w0, w1, w2);
+      store(conv, d, row_out + x);
+    }
+  }
+
+ private:
+  // Returns sum{x_i * w_i}.
+  template <class V>
+  static SIMD_ATTR PIK_INLINE V WeightedSum(const V tl, const V tc, const V tr,
+                                            const V ml, const V mc, const V mr,
+                                            const V bl, const V bc, const V br,
+                                            const V w0, const V w1,
+                                            const V w2) {
+    const V sum_tb = tc + bc;
+
+    // Faster than 5 mul + 4 FMA.
+    const V mul0 = mc * w0;
+    const V sum_lr = ml + mr;
+
+    const V x1 = sum_tb + sum_lr;
+    const V mul1 = mul_add(x1, w1, mul0);
+
+    const V sum_t2 = tl + tr;
+    const V sum_b2 = bl + br;
+    const V x2 = sum_t2 + sum_b2;
+    const V mul2 = mul_add(x2, w2, mul1);
+    return mul2;
+  }
+
+  static SIMD_ATTR PIK_INLINE V ConvolveValid(const float* PIK_RESTRICT row_t,
+                                              const float* PIK_RESTRICT row_m,
+                                              const float* PIK_RESTRICT row_b,
+                                              const int64_t x, const V w0,
+                                              const V w1, const V w2) {
+    const D d;
+    const V tc = load_unaligned(d, row_t + x);
+    const V mc = load_unaligned(d, row_m + x);
+    const V bc = load_unaligned(d, row_b + x);
+    const V tl = load_unaligned(d, row_t + x - 1);
+    const V tr = load_unaligned(d, row_t + x + 1);
+    const V ml = load_unaligned(d, row_m + x - 1);
+    const V mr = load_unaligned(d, row_m + x + 1);
+    const V bl = load_unaligned(d, row_b + x - 1);
+    const V br = load_unaligned(d, row_b + x + 1);
+    return WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+  }
+};
+
+// 3x3, center column zero, right column = negated left column.
+class GradX3 {
+  using D = SIMD_FULL(float);
+  using V = D::V;
+
+ public:
+  static constexpr int64_t kRadius = 1;
+
+  // Only accesses pixels in [0, xsize).
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightInvalid, const float* const PIK_RESTRICT row_m,
+      const size_t xsize, const int64_t stride, const WrapRow& wrap_row,
+      const Weights3x3& weights, float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    // Must load in advance - compiler doesn't understand load_dup128 and
+    // schedules them too late.
+    const V wtb = load_dup128(d, weights.tl);
+    const V wm = load_dup128(d, weights.ml);
+
+    // l, c, r = left, center, right. Leftmost vector: need FirstL1.
+    {
+      const V tc = load_unaligned(d, row_t + 0);
+      const V mc = load_unaligned(d, row_m + 0);
+      const V bc = load_unaligned(d, row_b + 0);
+      const V tl = Neighbors::FirstL1(tc);
+      const V tr = load_unaligned(d, row_t + 0 + 1);
+      const V ml = Neighbors::FirstL1(mc);
+      const V mr = load_unaligned(d, row_m + 0 + 1);
+      const V bl = Neighbors::FirstL1(bc);
+      const V br = load_unaligned(d, row_b + 0 + 1);
+      const V conv = WeightedSum(tl, tr, ml, mr, bl, br, wtb, wm);
+      store(conv, d, row_out + 0);
+    }
+
+    // Loop as long as we can load enough new values:
+    size_t x = d.N;
+    for (; x + d.N + kRadius <= xsize; x += d.N) {
+      const auto conv = ConvolveValid(row_t, row_m, row_b, x, wtb, wm);
+      store(conv, d, row_out + x);
+    }
+
+    // For final (partial) vector:
+    const V tc = load_unaligned(d, row_t + x);
+    const V mc = load_unaligned(d, row_m + x);
+    const V bc = load_unaligned(d, row_b + x);
+
+    V tr, mr, br;
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    tr = tc;  // Single-lane => mirrored right neighbor = center value.
+    mr = mc;
+    br = bc;
+#else
+    if (kSizeModN == 0) {
+      // The above loop didn't handle the last vector because it needs an
+      // additional right neighbor (generated via mirroring).
+      auto mirror = set_table_indices(d, MirrorLanes(d.N - 1));
+      tr = table_lookup_lanes(tc, mirror);
+      mr = table_lookup_lanes(mc, mirror);
+      br = table_lookup_lanes(bc, mirror);
+    } else {
+      auto mirror = set_table_indices(d, MirrorLanes((xsize % d.N) - 1));
+      // Loads last valid value into uppermost lane and mirrors.
+      tr = table_lookup_lanes(load_unaligned(d, row_t + xsize - d.N), mirror);
+      mr = table_lookup_lanes(load_unaligned(d, row_m + xsize - d.N), mirror);
+      br = table_lookup_lanes(load_unaligned(d, row_b + xsize - d.N), mirror);
+    }
+#endif
+
+    const V tl = load_unaligned(d, row_t + x - 1);
+    const V ml = load_unaligned(d, row_m + x - 1);
+    const V bl = load_unaligned(d, row_b + x - 1);
+    const V conv = WeightedSum(tl, tr, ml, mr, bl, br, wtb, wm);
+    store(conv, d, row_out + x);
+  }
+
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightValid, const float* const PIK_RESTRICT row_m, const size_t xsize,
+      const int64_t stride, const WrapRow& wrap_row,
+      const Weights3x3& PIK_RESTRICT weights,
+      float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    const V wtb = load_dup128(d, weights.tl);
+    const V wm = load_dup128(d, weights.ml);
+
+    // l, c, r = left, center, right.
+    for (size_t x = 0; x < xsize; x += d.N) {
+      const V conv = ConvolveValid(row_t, row_m, row_b, x, wtb, wm);
+      store(conv, d, row_out + x);
+    }
+  }
+
+ private:
+  // Returns sum{x_i * w_i}.
+  template <class V>
+  static SIMD_ATTR PIK_INLINE V WeightedSum(const V tl, const V tr, const V ml,
+                                            const V mr, const V bl, const V br,
+                                            const V wtb, const V wm) {
+    const V sub_m = ml - mr;
+    const V mul_m = sub_m * wm;
+    const V sub_t = tl - tr;
+    const V sub_b = bl - br;
+    const V sum_tb = sub_t + sub_b;
+    return mul_add(sum_tb, wtb, mul_m);
+  }
+
+  static SIMD_ATTR PIK_INLINE V ConvolveValid(const float* PIK_RESTRICT row_t,
+                                              const float* PIK_RESTRICT row_m,
+                                              const float* PIK_RESTRICT row_b,
+                                              const int64_t x, const V wtb,
+                                              const V wm) {
+    const D d;
+    const V tl = load_unaligned(d, row_t + x - 1);
+    const V tr = load_unaligned(d, row_t + x + 1);
+    const V ml = load_unaligned(d, row_m + x - 1);
+    const V mr = load_unaligned(d, row_m + x + 1);
+    const V bl = load_unaligned(d, row_b + x - 1);
+    const V br = load_unaligned(d, row_b + x + 1);
+    return WeightedSum(tl, tr, ml, mr, bl, br, wtb, wm);
+  }
+};
+
+// 3x3, center row zero, bottom row = negated top row.
+class GradY3 {
+  using D = SIMD_FULL(float);
+  using V = D::V;
+
+ public:
+  static constexpr int64_t kRadius = 1;
+
+  // Only accesses pixels in [0, xsize).
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightInvalid, const float* const PIK_RESTRICT row_m,
+      const size_t xsize, const int64_t stride, const WrapRow& wrap_row,
+      const Weights3x3& weights, float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    // Must load in advance - compiler doesn't understand load_dup128 and
+    // schedules them too late.
+    const V wlr = load_dup128(d, weights.tl);
+    const V wc = load_dup128(d, weights.tc);
+
+    // l, c, r = left, center, right. Leftmost vector: need FirstL1.
+    {
+      const V tc = load_unaligned(d, row_t + 0);
+      const V bc = load_unaligned(d, row_b + 0);
+      const V tl = Neighbors::FirstL1(tc);
+      const V tr = load_unaligned(d, row_t + 0 + 1);
+      const V bl = Neighbors::FirstL1(bc);
+      const V br = load_unaligned(d, row_b + 0 + 1);
+      const V conv = WeightedSum(tl, tc, tr, bl, bc, br, wlr, wc);
+      store(conv, d, row_out + 0);
+    }
+
+    // Loop as long as we can load enough new values:
+    size_t x = d.N;
+    for (; x + d.N + kRadius <= xsize; x += d.N) {
+      const auto conv = ConvolveValid(row_t, row_b, x, wlr, wc);
+      store(conv, d, row_out + x);
+    }
+
+    // For final (partial) vector:
+    const V tc = load_unaligned(d, row_t + x);
+    const V bc = load_unaligned(d, row_b + x);
+
+    V tr, br;
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    tr = tc;  // Single-lane => mirrored right neighbor = center value.
+    br = bc;
+#else
+    if (kSizeModN == 0) {
+      // The above loop didn't handle the last vector because it needs an
+      // additional right neighbor (generated via mirroring).
+      auto mirror = set_table_indices(d, MirrorLanes(d.N - 1));
+      tr = table_lookup_lanes(tc, mirror);
+      br = table_lookup_lanes(bc, mirror);
+    } else {
+      auto mirror = set_table_indices(d, MirrorLanes((xsize % d.N) - 1));
+      // Loads last valid value into uppermost lane and mirrors.
+      tr = table_lookup_lanes(load_unaligned(d, row_t + xsize - d.N), mirror);
+      br = table_lookup_lanes(load_unaligned(d, row_b + xsize - d.N), mirror);
+    }
+#endif
+
+    const V tl = load_unaligned(d, row_t + x - 1);
+    const V bl = load_unaligned(d, row_b + x - 1);
+    const V conv = WeightedSum(tl, tc, tr, bl, bc, br, wlr, wc);
+    store(conv, d, row_out + x);
+  }
+
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightValid, const float* const PIK_RESTRICT row_m, const size_t xsize,
+      const int64_t stride, const WrapRow& wrap_row,
+      const Weights3x3& PIK_RESTRICT weights,
+      float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    const V wlr = load_dup128(d, weights.tl);
+    const V wc = load_dup128(d, weights.tc);
+
+    // l, c, r = left, center, right.
+    for (size_t x = 0; x < xsize; x += d.N) {
+      const V conv = ConvolveValid(row_t, row_b, x, wlr, wc);
+      store(conv, d, row_out + x);
+    }
+  }
+
+ private:
+  // Returns sum{x_i * w_i}.
+  template <class V>
+  static SIMD_ATTR PIK_INLINE V WeightedSum(const V tl, const V tc, const V tr,
+                                            const V bl, const V bc, const V br,
+                                            const V wlr, const V wc) {
+    const V sub_c = tc - bc;
+    const V mul_c = sub_c * wc;
+    const V sub_l = tl - bl;
+    const V sub_r = tr - br;
+    const V sum_lr = sub_l + sub_r;
+    return mul_add(sum_lr, wlr, mul_c);
+  }
+
+  static SIMD_ATTR PIK_INLINE V ConvolveValid(const float* PIK_RESTRICT row_t,
+                                              const float* PIK_RESTRICT row_b,
+                                              const int64_t x, const V wlr,
+                                              const V wc) {
+    const D d;
+    const V tc = load_unaligned(d, row_t + x);
+    const V bc = load_unaligned(d, row_b + x);
+    const V tl = load_unaligned(d, row_t + x - 1);
+    const V tr = load_unaligned(d, row_t + x + 1);
+    const V bl = load_unaligned(d, row_b + x - 1);
+    const V br = load_unaligned(d, row_b + x + 1);
+    return WeightedSum(tl, tc, tr, bl, bc, br, wlr, wc);
+  }
+};
+
+// 3x3, all but corners zero, br = -tl.
+class Corner3 {
+  using D = SIMD_FULL(float);
+  using V = D::V;
+
+ public:
+  static constexpr int64_t kRadius = 1;
+
+  // Only accesses pixels in [0, xsize).
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightInvalid, const float* const PIK_RESTRICT row_m,
+      const size_t xsize, const int64_t stride, const WrapRow& wrap_row,
+      const Weights3x3& weights, float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    // Must load in advance - compiler doesn't understand load_dup128 and
+    // schedules them too late.
+    const V w = load_dup128(d, weights.tl);
+
+    // l, c, r = left, center, right. Leftmost vector: need FirstL1.
+    {
+      const V tc = load_unaligned(d, row_t + 0);
+      const V bc = load_unaligned(d, row_b + 0);
+      const V tl = Neighbors::FirstL1(tc);
+      const V tr = load_unaligned(d, row_t + 0 + 1);
+      const V bl = Neighbors::FirstL1(bc);
+      const V br = load_unaligned(d, row_b + 0 + 1);
+      const V conv = WeightedSum(tl, tr, bl, br, w);
+      store(conv, d, row_out + 0);
+    }
+
+    // Loop as long as we can load enough new values:
+    size_t x = d.N;
+    for (; x + d.N + kRadius <= xsize; x += d.N) {
+      const auto conv = ConvolveValid(row_t, row_b, x, w);
+      store(conv, d, row_out + x);
+    }
+
+    // For final (partial) vector:
+    const V tc = load_unaligned(d, row_t + x);
+    const V bc = load_unaligned(d, row_b + x);
+
+    V tr, br;
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    tr = tc;  // Single-lane => mirrored right neighbor = center value.
+    br = bc;
+#else
+    if (kSizeModN == 0) {
+      // The above loop didn't handle the last vector because it needs an
+      // additional right neighbor (generated via mirroring).
+      auto mirror = set_table_indices(d, MirrorLanes(d.N - 1));
+      tr = table_lookup_lanes(tc, mirror);
+      br = table_lookup_lanes(bc, mirror);
+    } else {
+      auto mirror = set_table_indices(d, MirrorLanes((xsize % d.N) - 1));
+      // Loads last valid value into uppermost lane and mirrors.
+      tr = table_lookup_lanes(load_unaligned(d, row_t + xsize - d.N), mirror);
+      br = table_lookup_lanes(load_unaligned(d, row_b + xsize - d.N), mirror);
+    }
+#endif
+
+    const V tl = load_unaligned(d, row_t + x - 1);
+    const V bl = load_unaligned(d, row_b + x - 1);
+    const V conv = WeightedSum(tl, tr, bl, br, w);
+    store(conv, d, row_out + x);
+  }
+
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightValid, const float* const PIK_RESTRICT row_m, const size_t xsize,
+      const int64_t stride, const WrapRow& wrap_row,
+      const Weights3x3& PIK_RESTRICT weights,
+      float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+    const V w = load_dup128(d, weights.tl);
+
+    // l, c, r = left, center, right.
+    for (size_t x = 0; x < xsize; x += d.N) {
+      const V conv = ConvolveValid(row_t, row_b, x, w);
+      store(conv, d, row_out + x);
+    }
+  }
+
+ private:
+  // Returns sum{x_i * w_i}.
+  template <class V>
+  static SIMD_ATTR PIK_INLINE V WeightedSum(const V tl, const V tr, const V bl,
+                                            const V br, const V w) {
+    const V sub_l = tl - bl;
+    const V sub_r = br - tr;
+    const V sum = sub_l + sub_r;
+    return sum * w;
+  }
+
+  static SIMD_ATTR PIK_INLINE V ConvolveValid(const float* PIK_RESTRICT row_t,
+                                              const float* PIK_RESTRICT row_b,
+                                              const int64_t x, const V w) {
+    const D d;
+    const V tl = load_unaligned(d, row_t + x - 1);
+    const V tr = load_unaligned(d, row_t + x + 1);
+    const V bl = load_unaligned(d, row_b + x - 1);
+    const V br = load_unaligned(d, row_b + x + 1);
+    return WeightedSum(tl, tr, bl, br, w);
+  }
+};
+
+// 3x3, NSEW = 1 and corners == 0.
+class Laplacian3 {
+  using D = SIMD_FULL(float);
+  using V = D::V;
+
+ public:
+  static constexpr int64_t kRadius = 1;
+
+  // Only accesses pixels in [0, xsize).
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightInvalid, const float* const PIK_RESTRICT row_m,
+      const size_t xsize, const int64_t stride, const WrapRow& wrap_row,
+      const Weights3x3& weights, float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+    // p, c, n = previous, current, next register.
+    auto tp = load_unaligned(d, row_t);
+    auto tc = load_unaligned(d, row_t + d.N);
+    auto mp = load_unaligned(d, row_m);
+    auto mc = load_unaligned(d, row_m + d.N);
+    auto bp = load_unaligned(d, row_b);
+    auto bc = load_unaligned(d, row_b + d.N);
+
+    const V w0 = load_dup128(d, weights.mc);
+
+    // Leftmost vector: "previous" is actually center.
+    const V sum_lr = Neighbors::FirstL1(mp) + Neighbors::R1(mp, mc);
+    const V conv = mul_add(mp, w0, tp + bp + sum_lr);
+    store(conv, d, row_out + 0);
+
+    // Loop while at least 1 value to load (higher lanes may be uninitialized)
+    size_t x = d.N;
+    for (; x + d.N + 1 <= xsize; x += d.N) {
+      const V tn = load_unaligned(d, row_t + x + d.N);
+      const V mn = load_unaligned(d, row_m + x + d.N);
+      const V bn = load_unaligned(d, row_b + x + d.N);
+
+      // Here, this is faster than the unaligned loads in Symmetric3!
+      const V sum_lr = Neighbors::L1(mc, mp) + Neighbors::R1(mc, mn);
+      const V conv = mul_add(mc, w0, (tc + bc) + sum_lr);
+      store(conv, d, row_out + x);
+
+      tp = tc;
+      tc = tn;
+      mp = mc;
+      mc = mn;
+      bp = bc;
+      bc = bn;
+    }
+
+#if SIMD_TARGET_VALUE == SIMD_NONE
+#else
+    // Not a whole vector => need to pad "center" via mirroring.
+    if ((xsize % d.N) != 0) {
+      // Last valid value in uppermost lane.
+      const V t_last = load_unaligned(d, row_t + xsize - d.N);
+      const V m_last = load_unaligned(d, row_m + xsize - d.N);
+      const V b_last = load_unaligned(d, row_b + xsize - d.N);
+
+      const auto mirror = set_table_indices(d, MirrorLanes(xsize % d.N));
+      tc = table_lookup_lanes(t_last, mirror);
+      mc = table_lookup_lanes(m_last, mirror);
+      bc = table_lookup_lanes(b_last, mirror);
+    }
+#endif
+
+    // Write the last vector, of which [1, d.N] lanes are valid.
+    {
+      const V sum_lr = Neighbors::L1(mc, mp) + Neighbors::LastR1(mc);
+      const V conv = mul_add(mc, w0, tc + bc + sum_lr);
+      store(conv, d, row_out + x);
+    }
+  }
+
+  // Weights: Manhattan distance 0 = center, 1 = 4-neighborhood, 2 = diagonal.
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightValid, const float* const PIK_RESTRICT row_m, const size_t xsize,
+      const int64_t stride, const WrapRow& wrap_row, const Weights3x3& weights,
+      float* const PIK_RESTRICT row_out) {
+    const D d;
+    // t, m, b = top, middle, bottom row;
+    const float* const PIK_RESTRICT row_t = wrap_row(row_m - stride, stride);
+    const float* const PIK_RESTRICT row_b = wrap_row(row_m + stride, stride);
+    // p, c, n = previous, current, next register.
+    auto tp = load_unaligned(d, row_t - d.N);
+    auto tc = load_unaligned(d, row_t);
+    auto mp = load_unaligned(d, row_m - d.N);
+    auto mc = load_unaligned(d, row_m);
+    auto bp = load_unaligned(d, row_b - d.N);
+    auto bc = load_unaligned(d, row_b);
+
+    const V w0 = load_dup128(d, weights.mc);
+
+    // Loop until all output produced. WARNING: padding is uninitialized!
+    for (size_t x = 0; x < xsize; x += d.N) {
+      const V tn = load_unaligned(d, row_t + x + d.N);
+      const V mn = load_unaligned(d, row_m + x + d.N);
+      const V bn = load_unaligned(d, row_b + x + d.N);
+
+      const V sum_lr = Neighbors::L1(mc, mp) + Neighbors::R1(mc, mn);
+      const V conv = mul_add(mc, w0, (tc + bc) + sum_lr);
+      store(conv, d, row_out + x);
+
+      tp = tc;
+      tc = tn;
+      mp = mc;
+      mc = mn;
+      bp = bc;
+      bc = bn;
+    }
+  }
+};
+
+// 5x5 convolution by separable kernel with a single scan through the input.
+class Separable5 {
+  using D = SIMD_FULL(float);
+  using V = D::V;
+
+ public:
+  static constexpr int64_t kRadius = 2;
+
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightInvalid, const float* const PIK_RESTRICT row_m,
+      const size_t xsize, const int64_t stride, const WrapRow& wrap_row,
+      const WeightsSeparable5& weights, float* const PIK_RESTRICT row_out) {
+    const D d;
+    const int64_t neg_stride = -stride;  // allows LEA addressing.
+    const float* const PIK_RESTRICT row_t2 =
+        wrap_row(row_m + 2 * neg_stride, stride);
+    const float* const PIK_RESTRICT row_t1 =
+        wrap_row(row_m + 1 * neg_stride, stride);
+    const float* const PIK_RESTRICT row_b1 =
+        wrap_row(row_m + 1 * stride, stride);
+    const float* const PIK_RESTRICT row_b2 =
+        wrap_row(row_m + 2 * stride, stride);
+
+    const V wh0 = load_dup128(d, weights.horz + 0 * 4);
+    const V wh1 = load_dup128(d, weights.horz + 1 * 4);
+    const V wh2 = load_dup128(d, weights.horz + 2 * 4);
+    const V wv0 = load_dup128(d, weights.vert + 0 * 4);
+    const V wv1 = load_dup128(d, weights.vert + 1 * 4);
+    const V wv2 = load_dup128(d, weights.vert + 2 * 4);
+
+    size_t x = 0;
+
+    // Need to loop more than once for scalars (d.N == 1).
+    for (; x < kRadius; x += d.N) {
+      const V conv0 = HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2) * wv0;
+
+      const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2);
+      const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2);
+      const V conv1 = mul_add(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2);
+      const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2);
+      const V conv2 = mul_add(conv2t + conv2b, wv2, conv1);
+      store(conv2, d, row_out + x);
+    }
+
+    // Main loop: load inputs without padding
+    for (; x + d.N + kRadius <= xsize; x += d.N) {
+      const V conv0 = HorzConvolve(row_m + x, wh0, wh1, wh2) * wv0;
+
+      const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2);
+      const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2);
+      const V conv1 = mul_add(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2);
+      const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2);
+      const V conv2 = mul_add(conv2t + conv2b, wv2, conv1);
+      store(conv2, d, row_out + x);
+    }
+
+    // Last full vector to write (the above loop handled mod >= 2)
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    while (x < xsize) {
+#else
+    if (kSizeModN < 2) {
+#endif
+      const V conv0 =
+          HorzConvolveLast<kSizeModN>(row_m, x, xsize, wh0, wh1, wh2) * wv0;
+
+      const V conv1t =
+          HorzConvolveLast<kSizeModN>(row_t1, x, xsize, wh0, wh1, wh2);
+      const V conv1b =
+          HorzConvolveLast<kSizeModN>(row_b1, x, xsize, wh0, wh1, wh2);
+      const V conv1 = mul_add(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t =
+          HorzConvolveLast<kSizeModN>(row_t2, x, xsize, wh0, wh1, wh2);
+      const V conv2b =
+          HorzConvolveLast<kSizeModN>(row_b2, x, xsize, wh0, wh1, wh2);
+      const V conv2 = mul_add(conv2t + conv2b, wv2, conv1);
+      store(conv2, d, row_out + x);
+      x += d.N;
+    }
+
+    // If mod = 0, the above vector was the last.
+    if (kSizeModN != 0) {
+      for (; x < xsize; ++x) {
+        float mul = 0.0f;
+        for (int64_t dy = -kRadius; dy <= kRadius; ++dy) {
+          const float wy = weights.vert[std::abs(dy) * 4];
+          const float* clamped_row = wrap_row(row_m + dy * stride, stride);
+          for (int64_t dx = -kRadius; dx <= kRadius; ++dx) {
+            const float wx = weights.horz[std::abs(dx) * 4];
+            const int64_t clamped_x = Mirror(x + dx, xsize);
+            mul += clamped_row[clamped_x] * wx * wy;
+          }
+        }
+        row_out[x] = mul;
+      }
+    }
+  }
+
+  template <size_t kSizeModN, class WrapRow>
+  static SIMD_ATTR PIK_INLINE void ConvolveRow(
+      LeftRightValid, const float* const PIK_RESTRICT row_m, const size_t xsize,
+      const int64_t stride, const WrapRow& wrap_row,
+      const WeightsSeparable5& weights, float* const PIK_RESTRICT row_out) {
+    const D d;
+    const int64_t neg_stride = -stride;  // allows LEA addressing.
+    const float* const PIK_RESTRICT row_t2 =
+        wrap_row(row_m + 2 * neg_stride, stride);
+    const float* const PIK_RESTRICT row_t1 =
+        wrap_row(row_m + 1 * neg_stride, stride);
+    const float* const PIK_RESTRICT row_b1 =
+        wrap_row(row_m + 1 * stride, stride);
+    const float* const PIK_RESTRICT row_b2 =
+        wrap_row(row_m + 2 * stride, stride);
+
+    const V wh0 = load_dup128(d, weights.horz + 0 * 4);
+    const V wh1 = load_dup128(d, weights.horz + 1 * 4);
+    const V wh2 = load_dup128(d, weights.horz + 2 * 4);
+    const V wv0 = load_dup128(d, weights.vert + 0 * 4);
+    const V wv1 = load_dup128(d, weights.vert + 1 * 4);
+    const V wv2 = load_dup128(d, weights.vert + 2 * 4);
+
+    // Loop until all output produced. WARNING: padding is uninitialized!
+    for (size_t x = 0; x < xsize; x += d.N) {
+      const V conv0 = HorzConvolve(row_m + x, wh0, wh1, wh2) * wv0;
+
+      const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2);
+      const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2);
+      const V conv1 = mul_add(conv1t + conv1b, wv1, conv0);
+
+      const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2);
+      const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2);
+      const V conv2 = mul_add(conv2t + conv2b, wv2, conv1);
+
+      store(conv2, d, row_out + x);
+    }
+  }
+
+ private:
+  // Same as HorzConvolve for the first/last vector in a row.
+  static SIMD_ATTR PIK_INLINE V HorzConvolveFirst(
+      const float* const PIK_RESTRICT row, const int64_t x, const int64_t xsize,
+      const V wh0, const V wh1, const V wh2) {
+    const D d;
+    const V c = load_unaligned(d, row + x);
+    const V mul0 = c * wh0;
+
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    const V l1 = load_unaligned(d, row + Mirror(x - 1, xsize));
+    const V l2 = load_unaligned(d, row + Mirror(x - 2, xsize));
+#else
+    const V l1 = Neighbors::FirstL1(c);
+    const V l2 = Neighbors::FirstL2(c);
+#endif
+
+    const V r1 = load_unaligned(d, row + x + 1);
+    const V r2 = load_unaligned(d, row + x + 2);
+
+    const V mul1 = mul_add(l1 + r1, wh1, mul0);
+    const V mul2 = mul_add(l2 + r2, wh2, mul1);
+    return mul2;
+  }
+
+  template <size_t kSizeModN>
+  static SIMD_ATTR PIK_INLINE V
+  HorzConvolveLast(const float* const PIK_RESTRICT row, const int64_t x,
+                   const int64_t xsize, const V wh0, const V wh1, const V wh2) {
+    const D d;
+    const V c = load_unaligned(d, row + x);
+    const V mul0 = c * wh0;
+
+    const V l1 = load_unaligned(d, row + x - 1);
+    const V l2 = load_unaligned(d, row + x - 2);
+
+    V r1, r2;
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    r1 = load_unaligned(d, row + Mirror(x + 1, xsize));
+    r2 = load_unaligned(d, row + Mirror(x + 2, xsize));
+#else
+    if (kSizeModN == 0) {
+      r2 = table_lookup_lanes(c, set_table_indices(d, MirrorLanes(d.N - 2)));
+      r1 = table_lookup_lanes(c, set_table_indices(d, MirrorLanes(d.N - 1)));
+    } else {  // == 1
+      const auto last = load_unaligned(d, row + xsize - d.N);
+      r2 = table_lookup_lanes(last, set_table_indices(d, MirrorLanes(d.N - 1)));
+      r1 = last;
+    }
+#endif
+
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = l1 + r1;
+    const V mul1 = mul_add(sum1, wh1, mul0);
+    const V sum2 = l2 + r2;
+    const V mul2 = mul_add(sum2, wh2, mul1);
+    return mul2;
+  }
+
+  // Requires kRadius valid pixels before/after pos.
+  static SIMD_ATTR PIK_INLINE V
+  HorzConvolve(const float* const PIK_RESTRICT pos, const V wh0, const V wh1,
+               const V wh2) {
+    const D d;
+    const V c = load_unaligned(d, pos);
+    const V mul0 = c * wh0;
+
+    // Loading anew is faster than combining vectors.
+    const V l1 = load_unaligned(d, pos - 1);
+    const V r1 = load_unaligned(d, pos + 1);
+    const V l2 = load_unaligned(d, pos - 2);
+    const V r2 = load_unaligned(d, pos + 2);
+    // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+    const V sum1 = l1 + r1;
+    const V mul1 = mul_add(sum1, wh1, mul0);
+    const V sum2 = l2 + r2;
+    const V mul2 = mul_add(sum2, wh2, mul1);
+    return mul2;
+  }
+};
+
+}  // namespace strategy
+
+// Avoids PadImage, but requires a strategy that supports LeftRightInvalid.
+struct BorderNeverUsed {};
+
+// Slow: Convolve calls PadImage and requires bounds checks.
+struct BorderNeedsInit {};
+
+// 3x3 kernels require inputs at least this wide - for the first vector, they
+// load right neighbors (N lanes starting from x + 1).
+static constexpr size_t kConvolveMinWidth = SIMD_FULL(float)::N + 1;
+
+// Single entry point for convolution.
+// "Strategy" (Direct*/Separable*) decides kernel size and how to evaluate it.
+template <class Strategy>
+class ConvolveT {
+  static constexpr int64_t kRadius = Strategy::kRadius;
+
+ public:
+  // Uses default Border/Executor. "Image" is ImageF or Image3F.
+  template <class Image, class Kernel>
+  static SIMD_ATTR PIK_INLINE void Run(const Image& in, const Kernel& kernel,
+                                       const Image* out) {
+    Run(BorderNeverUsed(), ExecutorLoop(), in, kernel, out);
+  }
+
+  // "Border" is Border{NeverUsed/NeedsInit/AlreadyValid}.
+  // "Executor": ExecutorPool uses a ThreadPool; ExecutorLoop just loops.
+  // "Image" is ImageF or Image3F.
+  template <class Border, class Executor, class Image, class Kernel>
+  static SIMD_ATTR PIK_INLINE void Run(const Border border,
+                                       const Executor executor, const Image& in,
+                                       const Kernel& kernel, const Image* out) {
+    PIK_CHECK(SameSize(in, *out));
+    PIK_CHECK(in.xsize() >= kConvolveMinWidth);  // For BorderNeverUsed.
+    RunImpl(border, executor, in, kernel, out);
+  }
+
+ private:
+  template <size_t kSizeModN, class LeftRight, class WrapRow, class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunRow(const float* PIK_RESTRICT in,
+                                          const size_t xsize,
+                                          const int64_t stride,
+                                          const WrapRow& wrap_row,
+                                          const Kernel& kernel,
+                                          const float* PIK_RESTRICT out) {
+    // LeftRight value instead of template arg enables overload resolution.
+    Strategy::template ConvolveRow<kSizeModN>(LeftRight(), in, xsize, stride,
+                                              wrap_row, kernel.Weights(),
+                                              const_cast<float*>(out));
+  }
+
+  template <size_t kSizeModN, class LeftRight, class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunBorder(const ImageF& in,
+                                             const int64_t ybegin,
+                                             const int64_t yend,
+                                             const Kernel& kernel,
+                                             const ImageF* out) {
+    const int64_t stride = in.PixelsPerRow();
+    const WrapRowMirror wrap_row(in, in.ysize());
+    for (int64_t y = ybegin; y < yend; ++y) {
+      RunRow<kSizeModN, LeftRight>(in.ConstRow(y), in.xsize(), stride, wrap_row,
+                                   kernel, out->Row(y));
+    }
+  }
+
+  // Image3F.
+  template <size_t kSizeModN, class LeftRight, class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunBorder(const Image3F& in,
+                                             const int64_t ybegin,
+                                             const int64_t yend,
+                                             const Kernel& kernel,
+                                             const Image3F* out) {
+    const int64_t stride = in.PixelsPerRow();
+    for (int64_t y = ybegin; y < yend; ++y) {
+      for (size_t c = 0; c < 3; ++c) {
+        const WrapRowMirror wrap_row(in.Plane(c), in.ysize());
+        RunRow<kSizeModN, LeftRight>(in.ConstPlaneRow(c, y), in.xsize(), stride,
+                                     wrap_row, kernel, out->PlaneRow(c, y));
+      }
+    }
+  }
+
+  // Threaded.
+  template <size_t kSizeModN, class LeftRight, class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunInterior(
+      const ExecutorPool executor, const ImageF& in, const int64_t ybegin,
+      const int64_t yend, const Kernel& kernel, const ImageF* out) {
+    // There is no interior if ysize <= 2 * kRadius.
+    if (ybegin >= yend) return;
+
+    const int64_t stride = in.PixelsPerRow();
+    executor.Run(
+        ybegin, yend,
+        [&in, stride, &kernel, out](const int y, const int thread) SIMD_ATTR {
+          RunRow<kSizeModN, LeftRight>(in.ConstRow(y), in.xsize(), stride,
+                                       WrapRowUnchanged(), kernel, out->Row(y));
+        },
+        "Convolve");
+  }
+
+  // Threaded, Image3.
+  template <size_t kSizeModN, class LeftRight, class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunInterior(
+      const ExecutorPool executor, const Image3F& in, const int64_t ybegin,
+      const int64_t yend, const Kernel& kernel, const Image3F* out) {
+    // There is no interior if ysize <= 2 * kRadius.
+    if (ybegin >= yend) return;
+
+    const int64_t stride = in.PixelsPerRow();
+    executor.Run(
+        ybegin, yend,
+        [&in, stride, &kernel, out](const int y, const int thread) SIMD_ATTR {
+          for (size_t c = 0; c < 3; ++c) {
+            RunRow<kSizeModN, LeftRight>(in.ConstPlaneRow(c, y), in.xsize(),
+                                         stride, WrapRowUnchanged(), kernel,
+                                         out->PlaneRow(c, y));
+          }
+        },
+        "Convolve3");
+  }
+
+  // Plain loop.
+  template <size_t kSizeModN, class LeftRight, class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunInterior(ExecutorLoop, const ImageF& in,
+                                               const int64_t ybegin,
+                                               const int64_t yend,
+                                               const Kernel& kernel,
+                                               const ImageF* out) {
+    const int64_t stride = in.PixelsPerRow();
+    const float* row_in = in.ConstRow(ybegin);
+    const float* row_out = out->Row(ybegin);  // RunRow casts to float*.
+    for (int64_t y = ybegin; y < yend; ++y) {
+      RunRow<kSizeModN, LeftRight>(row_in, in.xsize(), stride,
+                                   WrapRowUnchanged(), kernel, row_out);
+      row_in += in.PixelsPerRow();
+      row_out += out->PixelsPerRow();
+    }
+  }
+
+  // Plain loop, Image3.
+  template <size_t kSizeModN, class LeftRight, class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunInterior(ExecutorLoop, const Image3F& in,
+                                               const int64_t ybegin,
+                                               const int64_t yend,
+                                               const Kernel& kernel,
+                                               const Image3F* out) {
+    const int64_t stride = in.PixelsPerRow();
+    for (int64_t y = ybegin; y < yend; ++y) {
+      for (size_t c = 0; c < 3; ++c) {
+        const float* row_in = in.ConstPlaneRow(c, ybegin);
+        const float* row_out = out->PlaneRow(c, ybegin);  // Used as float*.
+        RunRow<kSizeModN, LeftRight>(row_in, in.xsize(), stride,
+                                     WrapRowUnchanged(), kernel, row_out);
+        row_in += in.PixelsPerRow();
+        row_out += out->PixelsPerRow();
+      }
+    }
+  }
+
+  template <size_t kSizeModN, class LeftRight, class Executor, class Image,
+            class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunWithBoundsChecks(const Executor executor,
+                                                       const Image& in,
+                                                       const Kernel& kernel,
+                                                       const Image* out) {
+    const int64_t ysize = in.ysize();
+    RunBorder<kSizeModN, LeftRight>(in, 0, kRadius, kernel, out);
+    RunInterior<kSizeModN, LeftRight>(executor, in, kRadius, ysize - kRadius,
+                                      kernel, out);
+    RunBorder<kSizeModN, LeftRight>(in, ysize - kRadius, ysize, kernel, out);
+  }
+
+  // Ensures each row has an additional vector's worth of valid values on the
+  // right AND left borders (residing in otherwise unused padding area reserved
+  // by BytesPerRow), initialized via mirroring with replication.
+  static void PadImage(const size_t xsize, const size_t ysize,
+                       const ImageF* image) {
+    using T = float;
+    PIK_ASSERT(xsize > kRadius && ysize > kRadius);
+    static_assert(kRadius * sizeof(T) <= kMaxVectorSize, "Not enough padding");
+
+    for (size_t y = 0; y < ysize; ++y) {
+      // Even if the image is const, we're allowed to overwrite its padding.
+      T* const PIK_RESTRICT row = const_cast<T*>(image->ConstRow(y));
+
+      for (int64_t i = 0; i < kRadius; ++i) {
+        row[xsize + i] = row[Mirror(xsize + i, xsize)];
+        row[-1 - i] = row[i];
+      }
+    }
+  }
+
+  // Same for Image3.
+  static void PadImage(const size_t xsize, const size_t ysize,
+                       const Image3F* image) {
+    using T = float;
+    PIK_ASSERT(xsize > kRadius && ysize > kRadius);
+    static_assert(kRadius * sizeof(T) <= kMaxVectorSize, "Not enough padding");
+
+    for (size_t y = 0; y < ysize; ++y) {
+      for (size_t c = 0; c < 3; ++c) {
+        // Even if the image is const, we're allowed to overwrite its padding.
+        T* const PIK_RESTRICT row = const_cast<T*>(image->ConstPlaneRow(c, y));
+
+        for (int64_t i = 0; i < kRadius; ++i) {
+          row[xsize + i] = row[Mirror(xsize + i, xsize)];
+          row[-1 - i] = row[i];
+        }
+      }
+    }
+  }
+
+  // Slow path: padding and bounds checks.
+  template <class Executor, class Image, class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunImpl(BorderNeedsInit,
+                                           const Executor executor,
+                                           const Image& in,
+                                           const Kernel& kernel,
+                                           const Image* out) {
+    PROFILER_ZONE("Convolve slow");
+    // Each RunRow requires that 2*kRadius+1 rows already be padded. Padding
+    // the entire image pollutes the cache. We could pre-pad 2*kRadius rows and
+    // then one row per RunRow, but callers who care about speed should anyway
+    // use the other, faster Border modes.
+    PadImage(in.xsize(), in.ysize(), &in);
+
+    switch (in.xsize() % SIMD_FULL(float)::N) {
+      case 0:
+        return RunWithBoundsChecks<0, LeftRightValid>(executor, in, kernel,
+                                                      out);
+      case 1:
+        return RunWithBoundsChecks<1, LeftRightValid>(executor, in, kernel,
+                                                      out);
+      default:  // Only need <= kRadius
+        return RunWithBoundsChecks<2, LeftRightValid>(executor, in, kernel,
+                                                      out);
+    }
+  }
+
+  // Fast: no padding, but bounds checks.
+  template <class Image, class Executor, class Kernel>
+  static SIMD_ATTR PIK_INLINE void RunImpl(BorderNeverUsed,
+                                           const Executor executor,
+                                           const Image& in, const Kernel kernel,
+                                           const Image* out) {
+    PROFILER_ZONE("Convolve fast");
+
+    switch (in.xsize() % SIMD_FULL(float)::N) {
+      case 0:
+        return RunWithBoundsChecks<0, LeftRightInvalid>(executor, in, kernel,
+                                                        out);
+      case 1:
+        return RunWithBoundsChecks<1, LeftRightInvalid>(executor, in, kernel,
+                                                        out);
+      default:  // Only need <= kRadius
+        return RunWithBoundsChecks<2, LeftRightInvalid>(executor, in, kernel,
+                                                        out);
+    }
+  }
+};
+
+}  // namespace pik
+
+#endif  // PIK_CONVOLVE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/cpik.cc b/codec/L2/demos/pikEnc/host/pik/cpik.cc
new file mode 100755
index 0000000000..094bf85b55
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/cpik.cc
@@ -0,0 +1,287 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/cpik.h"
+
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/arch_specific.h"
+#include "pik/args.h"
+#include "pik/codec.h"
+#include "pik/common.h"
+#include "pik/file_io.h"
+#include "pik/image.h"
+#include "pik/os_specific.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik.h"
+#include "pik/pik_info.h"
+#include "pik/profiler.h"
+#include "pik/simd/targets.h"
+
+namespace pik {
+namespace {
+
+// Proposes a distance to try for a given bpp target. This could depend
+// on the entropy in the image, too, but let's start with something.
+static double ApproximateDistanceForBPP(double bpp) {
+  return 1.704 * pow(bpp, -0.804);
+}
+
+} // namespace
+
+CompressArgs::CompressArgs() {
+  // TODO(janwas): differentiate between cores/HT
+  num_threads = AvailableCPUs().size() / 2;
+}
+
+Status CompressArgs::AddCommandLineOptions(tools::CommandLineParser *cmdline) {
+  // Positional arguments.
+  cmdline->AddPositionalOption("INPUT", "the input can be PNG PPM or PFM.",
+                               &params.file_in);
+  cmdline->AddPositionalOption(
+      "OUTPUT", "the compressed output file (optional)", &params.file_out);
+
+  // Flags.
+  cmdline->AddOptionFlag('\0', "fast", "Use fast encoding mode (less dense).",
+                         &params.fast_mode, &SetBooleanTrue);
+  cmdline->AddOptionFlag('\0', "guetzli", "Use the guetzli mode.",
+                         &params.guetzli_mode, &SetBooleanTrue);
+  cmdline->AddOptionFlag('\0', "progressive", "Use the progressive mode.",
+                         &params.progressive_mode, &SetBooleanTrue);
+  cmdline->AddOptionFlag('\0', "lossless", "Use the lossless mode.",
+                         &params.lossless_mode, &SetBooleanTrue);
+
+  cmdline->AddOptionFlag('\0', "keep_tempfiles",
+                         "Don't delete temporary files.",
+                         &params.keep_tempfiles, &SetBooleanTrue);
+
+  cmdline->AddOptionValue('\0', "num_threads", "N",
+                          "number of worker threads (zero = none).",
+                          &num_threads, &ParseUnsigned);
+
+  cmdline->AddOptionValue('\0', "noise", "0|1",
+                          "force enable/disable noise generation.",
+                          &params.noise, &ParseOverride);
+
+  cmdline->AddOptionValue('\0', "gradient", "0|1",
+                          "force enable/disable extra gradient map.",
+                          &params.gradient, &ParseOverride);
+  cmdline->AddOptionValue('\0', "adaptive_reconstruction", "0|1",
+                          "force enable/disable decoder filter.",
+                          &params.adaptive_reconstruction, &ParseOverride);
+
+  cmdline->AddOptionValue('\0', "gaborish", "0..7",
+                          "chooses deblocking strength (4=normal).",
+                          &params.gaborish, &ParseGaborishStrength);
+
+  cmdline->AddOptionValue('\0', "xclbin", "string",
+                          "path to xclbin file",
+                          &params.xclbinPath, &ParseString);
+
+  // Target distance/size/bpp
+  opt_distance_id = cmdline->AddOptionValue(
+      '\0', "distance", "maxError",
+      ("Max. butteraugli distance, lower = higher quality.\n"
+       "    Good default: 1.0. Supported range: 0.5 .. 3.0."),
+      &params.butteraugli_distance, &ParseFloat);
+  opt_target_size_id = cmdline->AddOptionValue(
+      '\0', "target_size", "N",
+      ("Aim at file size of N bytes.\n"
+       "    Compresses to 1 % of the target size in ideal conditions.\n"
+       "    Runs the same algorithm as --target_bpp"),
+      &params.target_size, &ParseUnsigned);
+  opt_target_bpp_id = cmdline->AddOptionValue(
+      '\0', "target_bpp", "BPP",
+      ("Aim at file size that has N bits per pixel.\n"
+       "    Compresses to 1 % of the target BPP in ideal conditions."),
+      &params.target_bitrate, &ParseFloat);
+
+  cmdline->AddOptionValue(
+      '\0', "intensity_target", "N",
+      ("Intensity target of monitor in nits, higher\n"
+       "   results in higher quality image. Supported range: 250..6000,\n"
+       "   default is 250."),
+      &params.intensity_target, &ParseFloat);
+
+  cmdline->AddOptionValue('\0', "saliency_extractor", "STRING", nullptr,
+                          &params.saliency_extractor_for_progressive_mode,
+                          &ParseString);
+  cmdline->AddOptionValue('\0', "saliency_threshold", "N", nullptr,
+                          &params.saliency_threshold, &ParseFloat);
+  cmdline->AddOptionFlag('\0', "saliency_debug_skip_nonsalient", nullptr,
+                         &params.saliency_debug_skip_nonsalient,
+                         &SetBooleanTrue);
+
+  cmdline->AddOptionValue(
+      'x', "dec-hints", "key=value",
+      "color_space indicates the ColorEncoding, see Description().", &dec_hints,
+      &ParseAndAppendKeyValue);
+
+  cmdline->AddOptionFlag('v', "verbose",
+                         "enable verbose mode with additional output",
+                         &params.verbose, &SetBooleanTrue);
+  cmdline->AddOptionValue('\0', "print_profile", "0|1",
+                          "print timing information before exiting",
+                          &print_profile, &ParseOverride);
+  return true;
+}
+
+Status CompressArgs::ValidateArgs(const tools::CommandLineParser &cmdline) {
+  bool got_distance = cmdline.GetOption(opt_distance_id)->matched();
+  bool got_target_size = cmdline.GetOption(opt_target_size_id)->matched();
+  bool got_target_bpp = cmdline.GetOption(opt_target_bpp_id)->matched();
+
+  if (got_target_size) {
+    fprintf(stderr, "Warning: target_size does not set all flags/modes.\n");
+  }
+  if (got_target_bpp) {
+    fprintf(stderr, "Warning: target_bpp does not set all flags/modes.\n");
+  }
+  if (got_distance) {
+    constexpr float butteraugli_min_dist = 0.125f;
+    constexpr float butteraugli_max_dist = 15.0f;
+    if (!(butteraugli_min_dist <= params.butteraugli_distance &&
+          params.butteraugli_distance <= butteraugli_max_dist)) {
+      fprintf(stderr, "Invalid/out of range distance, try %g to %g.\n",
+              butteraugli_min_dist, butteraugli_max_dist);
+      return false;
+    }
+  }
+
+  if (got_target_bpp + got_target_size + got_distance > 1) {
+    fprintf(stderr,
+            "You can specify only one of '--distance', "
+            "'--target_bpp' and '--target_size'. They are all different ways"
+            " to specify the image quality. When in doubt, use --distance."
+            " It gives the most visually consistent results.\n");
+    return false;
+  }
+
+  if (!params.saliency_extractor_for_progressive_mode.empty()) {
+    if (!params.progressive_mode) {
+      fprintf(stderr,
+              "Warning: Specifying --saliency_extractor only makes sense "
+              "for --progressive mode.\n");
+    }
+    if (!params.file_out) {
+      fprintf(stderr,
+              "Need to have output filename to use saliency extractor.\n");
+      return PIK_FAILURE("file_out");
+    }
+  }
+
+  if (!params.file_in) {
+    fprintf(stderr, "Missing input filename.\n");
+    return false;
+  }
+
+  return true;
+}
+
+Status Compress(ThreadPool *pool, std::string xclbinPath, CompressArgs &args,
+                PaddedBytes *compressed) {
+  double t0, t1;
+
+  CodecContext codec_context;
+  CodecInOut io(&codec_context);
+  io.dec_hints = args.dec_hints;
+  t0 = Now();
+  if (!io.SetFromFile(args.params.file_in)) {
+    fprintf(stderr, "Failed to read image %s.\n", args.params.file_in);
+    return false;
+  }
+  t1 = Now();
+  const double decode_mps = io.xsize() * io.ysize() * 1E-6 / (t1 - t0);
+
+  const size_t xsize = io.xsize();
+  const size_t ysize = io.ysize();
+  if (args.params.target_size > 0 || args.params.target_bitrate > 0) {
+    // Search algorithm for target bpp / size.
+    CompressArgs s = args; // Args for search.
+    if (s.params.target_size > 0) {
+      s.params.target_bitrate =
+          s.params.target_size * 8.0 / (io.xsize() * io.ysize());
+      s.params.target_size = 0;
+    }
+    double dist = ApproximateDistanceForBPP(s.params.target_bitrate);
+    s.params.butteraugli_distance = dist;
+    double target_size =
+        s.params.target_bitrate * (1 / 8.) * io.xsize() * io.ysize();
+    s.params.target_bitrate = 0;
+    double best_dist = 1.0;
+    double best_loss = 1e99;
+    for (int i = 0; i < 7; ++i) {
+      s.params.butteraugli_distance = dist;
+      PaddedBytes candidate;
+      bool ok = Compress(pool, xclbinPath, s, &candidate);
+      if (!ok) {
+        printf("Compression error occurred during the search for best size."
+               " Trying with butteraugli distance %.15g\n",
+               best_dist);
+        break;
+      }
+      printf("Butteraugli distance %g yields %zu bytes, %g bpp.\n", dist,
+             candidate.size(),
+             candidate.size() * 8.0 / (io.xsize() * io.ysize()));
+      const double ratio = static_cast<double>(candidate.size()) / target_size;
+      const double loss = std::max(ratio, 1.0 / std::max(ratio, 1e-30));
+      if (best_loss > loss) {
+        best_dist = dist;
+        best_loss = loss;
+      }
+      dist *= ratio;
+      if (dist < 0.01) {
+        dist = 0.01;
+      }
+      if (dist >= 16.0) {
+        dist = 16.0;
+      }
+    }
+    printf("Choosing butteraugli distance %.15g\n", best_dist);
+    args.params.butteraugli_distance = best_dist;
+    args.params.target_bitrate = 0;
+    args.params.target_size = 0;
+  }
+  char mode[200];
+  if (args.params.fast_mode) {
+    strcpy(mode, "in fast mode ");
+  }
+  snprintf(mode, sizeof(mode), "with maximum Butteraugli distance %f",
+           args.params.butteraugli_distance);
+  fprintf(stderr,
+          "Read %zu bytes (%zux%zu, %.1f MP/s); compressing %s, %zu threads.\n",
+          io.enc_size, xsize, ysize, decode_mps, mode, NumWorkerThreads(pool));
+
+  PikInfo aux_out;
+  if (args.inspector_image3f) {
+    aux_out.SetInspectorImage3F(args.inspector_image3f);
+  }
+  t0 = Now();
+  if (!PixelsToPik(args.params, xclbinPath, &io, compressed, &aux_out, pool)) {
+    fprintf(stderr, "Failed to compress.\n");
+    return false;
+  }
+  t1 = Now();
+  const size_t channels = io.c_current().Channels() + io.HasAlpha();
+  const size_t bytes = xsize * ysize * channels *
+                       DivCeil(io.original_bits_per_sample(), kBitsPerByte);
+  const double bpp =
+      static_cast<double>(compressed->size() * kBitsPerByte) / (xsize * ysize);
+  fprintf(stderr, "Compressed to %zu bytes (%.3f bpp, %.2f MB/s).\n",
+          compressed->size(), bpp, bytes * 1E-6 / (t1 - t0));
+
+  if (args.params.verbose) {
+    aux_out.Print(1);
+  }
+
+  return true;
+}
+
+} // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/cpik.h b/codec/L2/demos/pikEnc/host/pik/cpik.h
new file mode 100755
index 0000000000..91aae3b29e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/cpik.h
@@ -0,0 +1,57 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_CPIK_H_
+#define PIK_CPIK_H_
+
+#include <utility>
+
+#include "pik/cmdline.h"
+#include "pik/codec.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_inspection.h"
+#include "pik/pik_params.h"
+#include "pik/status.h"
+
+namespace pik {
+
+struct CompressArgs {
+  // Initialize non-static default options.
+  CompressArgs();
+
+  void SetInspectorImage3F(InspectorImage3F inspector) {
+    inspector_image3f = inspector;
+  }
+
+  // Add all the command line options to the CommandLineParser. Note that the
+  // options are tied to the instance that this was called on.
+  Status AddCommandLineOptions(tools::CommandLineParser *cmdline);
+
+  // Validate the passed arguments, checking whether all passed options are
+  // compatible. Returns whether the validation was successful.
+  Status ValidateArgs(const tools::CommandLineParser &cmdline);
+
+  DecoderHints dec_hints;
+  CompressParams params;
+  size_t num_threads = 0;
+  bool got_num_threads = false;
+  Override print_profile = Override::kDefault;
+
+  // Will get passed on to PikInfo.
+  InspectorImage3F inspector_image3f;
+
+  // References (ids) of specific options to check if they were matched.
+  tools::CommandLineParser::OptionId opt_distance_id = -1;
+  tools::CommandLineParser::OptionId opt_target_size_id = -1;
+  tools::CommandLineParser::OptionId opt_target_bpp_id = -1;
+};
+
+Status Compress(ThreadPool *pool, std::string xclbinPath, CompressArgs &args,
+                PaddedBytes *compressed);
+
+} // namespace pik
+
+#endif // PIK_CPIK_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/cpik_main.cc b/codec/L2/demos/pikEnc/host/pik/cpik_main.cc
new file mode 100755
index 0000000000..b2fdc4f186
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/cpik_main.cc
@@ -0,0 +1,56 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/cpik.h"
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+
+#include "pik/cmdline.h"
+#include "pik/file_io.h"
+#include "pik/os_specific.h"
+#include "pik/padded_bytes.h"
+#include "pik/profiler.h"
+
+namespace pik {
+namespace {
+
+int CompressMain(int argc, const char **argv) {
+  CompressArgs args;
+  tools::CommandLineParser cmdline;
+  PIK_ASSERT(args.AddCommandLineOptions(&cmdline));
+  if (!cmdline.Parse(argc, argv) || !args.ValidateArgs(cmdline)) {
+    cmdline.PrintHelp();
+    return 1;
+  }
+
+  const int bits = TargetBitfield().Bits();
+  if ((bits & SIMD_ENABLE) != SIMD_ENABLE) {
+    fprintf(stderr, "CPU does not support all enabled targets => exiting.\n");
+    return 1;
+  }
+
+  ThreadPool pool(args.num_threads);
+
+  PaddedBytes compressed;
+  if (!Compress(&pool, args.params.xclbinPath, args, &compressed))
+    return 1;
+
+  if (args.params.file_out) {
+    if (!WriteFile(compressed, args.params.file_out))
+      return 1;
+  }
+
+  if (args.print_profile == Override::kOn) {
+    PROFILER_PRINT_RESULTS();
+  }
+  return 0;
+}
+
+} // namespace
+} // namespace pik
+
+int main(int argc, const char **argv) { return pik::CompressMain(argc, argv); }
diff --git a/codec/L2/demos/pikEnc/host/pik/data_parallel.cc b/codec/L2/demos/pikEnc/host/pik/data_parallel.cc
new file mode 100755
index 0000000000..845c944b89
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/data_parallel.cc
@@ -0,0 +1,81 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/data_parallel.h"
+#include "pik/profiler.h"
+
+namespace pik {
+
+void ThreadPool::ThreadFunc(ThreadPool* self, const int thread) {
+  // Until kWorkerExit command received:
+  for (;;) {
+    std::unique_lock<std::mutex> lock(self->mutex_);
+    // Notify main thread that this thread is ready.
+    if (++self->workers_ready_ == self->NumThreads()) {
+      self->workers_ready_cv_.notify_one();
+    }
+  RESUME_WAIT:
+    // Wait for a command.
+    self->worker_start_cv_.wait(lock);
+    const WorkerCommand command = self->worker_start_command_;
+    switch (command) {
+      case kWorkerWait:    // spurious wakeup:
+        goto RESUME_WAIT;  // lock still held, avoid incrementing ready.
+      case kWorkerOnce:
+        lock.unlock();
+        self->func_(self->arg_, thread, thread);
+        break;
+      case kWorkerExit:
+        return;  // exits thread
+      default:
+        lock.unlock();
+        RunRange(self, command, thread);
+        break;
+    }
+  }
+}
+
+ThreadPool::ThreadPool(const int num_worker_threads)
+    : num_worker_threads_(num_worker_threads),
+      num_threads_(std::max(num_worker_threads, 1)) {
+  PROFILER_ZONE("ThreadPool ctor");
+
+  PIK_CHECK(num_worker_threads >= 0);
+  PIK_CHECK(num_worker_threads <= kMaxThreads);
+  threads_.reserve(num_worker_threads);
+
+  // Suppress "unused-private-field" warning.
+  (void)padding;
+
+  // Safely handle spurious worker wakeups.
+  worker_start_command_ = kWorkerWait;
+
+  for (int i = 0; i < num_worker_threads; ++i) {
+    threads_.emplace_back(ThreadFunc, this, i);
+  }
+
+  if (num_worker_threads_ != 0) {
+    WorkersReadyBarrier();
+  }
+
+  // Warm up profiler on worker threads so its expensive initialization
+  // doesn't count towards other timer measurements.
+  RunOnEachThread(
+      [](const int task, const int thread) { PROFILER_ZONE("@InitWorkers"); });
+}
+
+ThreadPool::~ThreadPool() {
+  if (num_worker_threads_ != 0) {
+    StartWorkers(kWorkerExit);
+  }
+
+  for (std::thread& thread : threads_) {
+    PIK_ASSERT(thread.joinable());
+    thread.join();
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/data_parallel.h b/codec/L2/demos/pikEnc/host/pik/data_parallel.h
new file mode 100755
index 0000000000..21c70e3462
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/data_parallel.h
@@ -0,0 +1,338 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DATA_PARALLEL_H_
+#define PIK_DATA_PARALLEL_H_
+
+// Portable, low-overhead C++11 ThreadPool alternative to OpenMP for
+// data-parallel computations.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <algorithm>  // max
+#include <atomic>
+#include <condition_variable>  //NOLINT
+#include <cstdlib>
+#include <mutex>   //NOLINT
+#include <thread>  //NOLINT
+#include <vector>
+
+#include "pik/bits.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Scalable, lower-overhead thread pool, especially suitable for data-parallel
+// computations in the fork-join model, where clients need to know when all
+// tasks have completed.
+//
+// Thread pools usually store small numbers of heterogeneous tasks in a queue.
+// When tasks are identical or differ only by an integer input parameter, it is
+// much faster to store just one function of an integer parameter and call it
+// for each value. Conventional vector-of-tasks can be run in parallel using a
+// lambda function adapter that simply calls task_funcs[task].
+//
+// This thread pool can efficiently load-balance millions of tasks using an
+// atomic counter, thus avoiding per-task virtual or system calls. With 48
+// hyperthreads and 1M tasks that add to an atomic counter, overall runtime is
+// 10-20x higher when using std::async, and ~200x for a queue-based ThreadPool.
+//
+// Usage:
+//   ThreadPool pool;
+//   pool.Run(0, 1000000, [](int task, int thread) { Func1(task, thread); });
+//
+// When Run returns, all of its tasks have finished. The destructor waits until
+// all worker threads have exited cleanly. "thread" is useful for accessing
+// thread-local data, typically a pre-allocated array of kMaxThreads
+// cache-aligned elements.
+class ThreadPool {
+ public:
+  // For per-thread arrays. Can increase if needed.
+  static constexpr int kMaxThreads = 256;
+
+  // Starts the given number of worker threads and blocks until they are ready.
+  // "num_worker_threads" defaults to one per hyperthread. If zero, all tasks
+  // run on the main thread.
+  explicit ThreadPool(
+      const int num_worker_threads = std::thread::hardware_concurrency());
+
+  ThreadPool(const ThreadPool&) = delete;
+  ThreadPool& operator&(const ThreadPool&) = delete;
+
+  // Waits for all threads to exit.
+  ~ThreadPool();
+
+  // Returns number of worker threads created (some may be sleeping and never
+  // wake up in time to participate in Run). Useful for characterizing
+  // performance; 0 means "run on main thread".
+  size_t NumWorkerThreads() const { return num_worker_threads_; }
+
+  // Returns maximum number of main/worker threads that may call Func. Useful
+  // for allocating per-thread storage.
+  size_t NumThreads() const { return num_threads_; }
+
+  // Runs func(task, thread) on worker thread(s) for every task in [begin, end).
+  // "thread" is 0 if NumThreads() == 0, otherwise [0, NumThreads()).
+  // Not thread-safe - no two calls to Run may overlap.
+  // Subsequent calls will reuse the same threads.
+  //
+  // Precondition: 0 <= begin <= end.
+  template <class Func>
+  void Run(const int begin, const int end, const Func& func,
+           const char* caller = "") {
+    //    printf("ThreadPool::Run: %s\n", caller);
+    PIK_ASSERT(0 <= begin && begin <= end);
+    if (begin == end) {
+      return;
+    }
+
+    if (num_worker_threads_ == 0) {
+      const int thread = 0;
+      for (int task = begin; task < end; ++task) {
+        func(task, thread);
+      }
+      return;
+    }
+
+    if (depth_.fetch_add(1, std::memory_order_acq_rel) != 0) {
+      PIK_ASSERT(false);  // Must not re-enter.
+    }
+
+    const WorkerCommand worker_command = (WorkerCommand(end) << 32) + begin;
+    // Ensure the inputs do not result in a reserved command.
+    PIK_ASSERT(worker_command != kWorkerWait);
+    PIK_ASSERT(worker_command != kWorkerOnce);
+    PIK_ASSERT(worker_command != kWorkerExit);
+
+    func_ = &CallClosure<Func>;
+    arg_ = &func;
+    num_reserved_.store(0, std::memory_order_relaxed);
+
+    StartWorkers(worker_command);
+    WorkersReadyBarrier();
+
+    if (depth_.fetch_add(-1, std::memory_order_acq_rel) != 1) {
+      PIK_ASSERT(false);
+    }
+  }
+
+  // Runs func(thread, thread) on all thread(s) that may participate in Run.
+  // If NumThreads() == 0, runs on the main thread with thread == 0, otherwise
+  // concurrently called by each worker thread in [0, NumThreads()).
+  template <class Func>
+  void RunOnEachThread(const Func& func) {
+    if (num_worker_threads_ == 0) {
+      const int thread = 0;
+      func(thread, thread);
+      return;
+    }
+
+    func_ = reinterpret_cast<TypeErasedFunc>(&CallClosure<Func>);
+    arg_ = &func;
+    StartWorkers(kWorkerOnce);
+    WorkersReadyBarrier();
+  }
+
+ private:
+  // After construction and between calls to Run, workers are "ready", i.e.
+  // waiting on worker_start_cv_. They are "started" by sending a "command"
+  // and notifying all worker_start_cv_ waiters. (That is why all workers
+  // must be ready/waiting - otherwise, the notification will not reach all of
+  // them and the main thread waits in vain for them to report readiness.)
+  using WorkerCommand = uint64_t;
+
+  // Special values; all others encode the begin/end parameters.
+  static constexpr WorkerCommand kWorkerWait = ~0ULL;
+  static constexpr WorkerCommand kWorkerOnce = ~1ULL;
+  static constexpr WorkerCommand kWorkerExit = ~2ULL;
+
+  // Calls f(task, thread). Used for type erasure of Func arguments. The
+  // signature must match TypeErasedFunc, hence a const void* argument.
+  template <class Closure>
+  static void CallClosure(const void* f, const int task, const int thread) {
+    (*reinterpret_cast<const Closure*>(f))(task, thread);
+  }
+
+  void WorkersReadyBarrier() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    // Typically only a single iteration.
+    while (workers_ready_ != threads_.size()) {
+      workers_ready_cv_.wait(lock);
+    }
+    workers_ready_ = 0;
+
+    // Safely handle spurious worker wakeups.
+    worker_start_command_ = kWorkerWait;
+  }
+
+  // Precondition: all workers are ready.
+  void StartWorkers(const WorkerCommand worker_command) {
+    mutex_.lock();
+    worker_start_command_ = worker_command;
+    // Workers will need this lock, so release it before they wake up.
+    mutex_.unlock();
+    worker_start_cv_.notify_all();
+  }
+
+  // Attempts to reserve and perform some work from the global range of tasks,
+  // which is encoded within "command". Returns after all tasks are reserved.
+  static void RunRange(ThreadPool* self, const WorkerCommand command,
+                       const int thread) {
+    const int begin = command & 0xFFFFFFFF;
+    const int end = command >> 32;
+    const int num_tasks = end - begin;
+    const int num_worker_threads = static_cast<int>(self->num_worker_threads_);
+
+    // OpenMP introduced several "schedule" strategies:
+    // "single" (static assignment of exactly one chunk per thread): slower.
+    // "dynamic" (allocates k tasks at a time): competitive for well-chosen k.
+    // "guided" (allocates k tasks, decreases k): computing k = remaining/n
+    //   is faster than halving k each iteration. We prefer this strategy
+    //   because it avoids user-specified parameters.
+
+    for (;;) {
+      // guided
+      const int num_reserved =
+          self->num_reserved_.load(std::memory_order_relaxed);
+      const int num_remaining = num_tasks - num_reserved;
+      const int my_size = std::max(num_remaining / (num_worker_threads * 4), 1);
+      const int my_begin = begin + self->num_reserved_.fetch_add(
+                                       my_size, std::memory_order_relaxed);
+      const int my_end = std::min(my_begin + my_size, begin + num_tasks);
+      // Another thread already reserved the last task.
+      if (my_begin >= my_end) {
+        break;
+      }
+      for (int task = my_begin; task < my_end; ++task) {
+        self->func_(self->arg_, task, thread);
+      }
+    }
+  }
+
+  // What task to run on a worker thread. Points to code generated via
+  // CallClosure. Arguments are arg_ (points to the lambda), task, thread.
+  using TypeErasedFunc = void (*)(const void*, int, int);
+
+  static void ThreadFunc(ThreadPool* self, const int thread);
+
+  // Unmodified after ctor, but cannot be const because we call thread::join().
+  std::vector<std::thread> threads_;
+
+  const size_t num_worker_threads_;  // == threads_.size()
+  const size_t num_threads_;
+
+  std::atomic<int> depth_{0};  // detects if Run is re-entered (not supported).
+
+  std::mutex mutex_;  // guards both cv and their variables.
+  std::condition_variable workers_ready_cv_;
+  size_t workers_ready_ = 0;
+  std::condition_variable worker_start_cv_;
+  WorkerCommand worker_start_command_;
+
+  // Written by main thread, read by workers (after mutex lock/unlock).
+  TypeErasedFunc func_;
+  const void* arg_;
+
+  // Updated by workers; alignment/padding avoids false sharing.
+  alignas(64) std::atomic<int> num_reserved_{0};
+  int padding[15];
+};
+
+// Wrappers to enable ThreadPool* == nullptr (cheaper than constructing a
+// ThreadPool(0)). Do not call pool->* directly.
+
+static inline size_t NumWorkerThreads(ThreadPool* pool) {
+  return pool == nullptr ? 0 : pool->NumWorkerThreads();
+}
+
+static inline size_t NumThreads(ThreadPool* pool) {
+  return pool == nullptr ? 1 : pool->NumThreads();
+}
+
+template <class Func>
+void RunOnPool(ThreadPool* pool, const int begin, const int end,
+               const Func& func, const char* caller = "") {
+  if (pool == nullptr) {
+    const int thread = 0;
+    for (int task = begin; task < end; ++task) {
+      func(task, thread);
+    }
+    return;
+  }
+  pool->Run(begin, end, func, caller);
+}
+
+template <class Func>
+void RunOnEachThread(ThreadPool* pool, const Func& func) {
+  if (pool == nullptr) {
+    const int thread = 0;
+    func(thread, thread);
+    return;
+  }
+  pool->RunOnEachThread(func);
+}
+
+// Adapters for zero-cost switching between ThreadPool and non-threaded loop.
+
+struct ExecutorLoop {
+  // Lambda must accept int task = [begin, end) and int thread = 0 arguments.
+  template <class Lambda>
+  void Run(const int begin, const int end, const Lambda& lambda,
+           const char* caller = "") const {
+    for (int i = begin; i < end; ++i) {
+      lambda(i, 0);
+    }
+  }
+};
+
+struct ExecutorPool {
+  explicit ExecutorPool(ThreadPool* pool) : pool(pool) {}
+
+  // Lambda must accept int task = [begin, end) and int thread arguments.
+  template <class Lambda>
+  void Run(const int begin, const int end, const Lambda& lambda,
+           const char* caller) const {
+    RunOnPool(pool, begin, end, lambda, caller);
+  }
+
+  ThreadPool* pool;  // not owned
+};
+
+// Accelerates multiple unsigned 32-bit divisions with the same divisor by
+// precomputing a multiplier. This is useful for splitting a contiguous range of
+// indices (the task index) into 2D indices. Exhaustively tested on dividends
+// up to 4M with non-power of two divisors up to 2K.
+class Divider {
+ public:
+  // "d" is the divisor (what to divide by).
+  Divider(const uint32_t d) : shift_(FloorLog2Nonzero(d)) {
+    // Power of two divisors (including 1) are not supported because it is more
+    // efficient to special-case them at a higher level.
+    PIK_ASSERT((d & (d - 1)) != 0);
+
+    // ceil_log2 = floor_log2 + 1 because we ruled out powers of two above.
+    const uint64_t next_pow2 = 1ULL << (shift_ + 1);
+
+    mul_ = ((next_pow2 - d) << 32) / d + 1;
+  }
+
+  // "n" is the numerator (what is being divided).
+  inline uint32_t operator()(const uint32_t n) const {
+    // Algorithm from "Division by Invariant Integers using Multiplication".
+    // Its "sh1" is hardcoded to 1 because we don't need to handle d=1.
+    const uint32_t hi = (uint64_t(mul_) * n) >> 32;
+    return (hi + ((n - hi) >> 1)) >> shift_;
+  }
+
+ private:
+  uint32_t mul_;
+  const int shift_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_DATA_PARALLEL_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/dc_predictor.cc b/codec/L2/demos/pikEnc/host/pik/dc_predictor.cc
new file mode 100755
index 0000000000..57348383c5
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dc_predictor.cc
@@ -0,0 +1,646 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/dc_predictor.h"
+
+#include <stddef.h>
+
+#include "pik/compiler_specific.h"
+#include "pik/simd/simd.h"
+
+namespace pik {
+namespace {
+
+constexpr size_t kNumPredictors = 8;
+#if SIMD_TARGET_VALUE == SIMD_NONE
+using DI = Scalar<int16_t>;
+// For predictors and costs.
+struct VIx8 {
+  DI::V lanes[kNumPredictors];
+};
+// For U, V.
+struct VIx2 {
+  DI::V lanes[2];
+};
+PIK_INLINE VIx2 operator+(const VIx2& a, const VIx2& b) {
+  VIx2 ret;
+  ret.lanes[0] = a.lanes[0] + b.lanes[0];
+  ret.lanes[1] = a.lanes[1] + b.lanes[1];
+  return ret;
+}
+PIK_INLINE VIx2 operator-(const VIx2& a, const VIx2& b) {
+  VIx2 ret;
+  ret.lanes[0] = a.lanes[0] - b.lanes[0];
+  ret.lanes[1] = a.lanes[1] - b.lanes[1];
+  return ret;
+}
+#else
+using DI = SIMD_PART(int16_t, kNumPredictors);
+using VIx8 = DI::V;
+using VIx2 = SIMD_PART(int16_t, 2)::V;
+#endif
+
+// Not the same as avg, which rounds rather than truncates!
+template <class V>
+SIMD_ATTR PIK_INLINE V Average(const V v0, const V v1) {
+  return shift_right<1>(saturated_add(v0, v1));
+}
+
+// Clamps gradient to the min/max of n, w, l.
+template <class V>
+SIMD_ATTR PIK_INLINE V ClampedGradient(const V n, const V w, const V l) {
+  const V grad = saturated_subtract(saturated_add(n, w), l);
+  const V vmin = min(n, min(w, l));
+  const V vmax = max(n, max(w, l));
+  return min(max(vmin, grad), vmax);
+}
+
+template <class V>
+SIMD_ATTR PIK_INLINE V AbsResidual(const V c, const V pred) {
+  return abs(saturated_subtract(c, pred));
+}
+
+#if SIMD_TARGET_VALUE == SIMD_NONE
+
+SIMD_ATTR PIK_INLINE size_t IndexOfMinCost(const VIx8& abs_costs) {
+  const DI d;
+  // Algorithm must exactly match minpos_epu16.
+  size_t idx_pred = 0;
+  int16_t min_cost = get_part(d, abs_costs.lanes[0]);
+  for (size_t i = 0; i < kNumPredictors; ++i) {
+    const int16_t cost = get_part(d, abs_costs.lanes[i]);
+    if (cost < min_cost) {
+      min_cost = cost;
+      idx_pred = i;
+    }
+  }
+  return idx_pred;
+}
+
+#else
+
+// Returns a shuffle mask for moving lane i to lane 0 (i = argmin abs_costs[i]).
+// This is used for selecting the best predictor(s). The shuffle also broadcasts
+// the result to all lanes so that callers can use any_part.
+SIMD_ATTR PIK_INLINE u8x16 ShuffleForMinCost(const VIx8 abs_costs) {
+  using D8 = SIMD_PART(uint8_t, kNumPredictors * 2);
+  const D8 d8;
+  // Replicates index16 returned from minpos into all bytes.
+  SIMD_ALIGN const uint8_t kIdx[16] = {2, 2, 2, 2, 2, 2, 2, 2,
+                                       2, 2, 2, 2, 2, 2, 2, 2};
+  // Offset for the most significant byte in each 16-bit pair.
+  SIMD_ALIGN const uint8_t kHighByte[16] = {0, 1, 0, 1, 0, 1, 0, 1,
+                                            0, 1, 0, 1, 0, 1, 0, 1};
+  const auto bytes_from_idx = load(d8, kIdx);
+  const auto high_byte = load(d8, kHighByte);
+  // Note: minpos is unsigned; LimitsMin (a large absolute value) will have a
+  // higher cost than any other value.
+  using DU = SIMD_PART(uint16_t, kNumPredictors);
+  const auto idx_min = ext::minpos(cast_to(DU(), abs_costs));
+  const auto idx_idx = table_lookup_bytes(idx_min, bytes_from_idx);
+  const auto byte_idx = idx_idx + idx_idx;  // shift left by 1 => byte index
+  return cast_to(d8, byte_idx) + high_byte;
+}
+
+#endif
+
+// Sliding window of "causal" (already decoded) pixels, plus simple functions
+// to predict the next pixel "c" from its neighbors: l n r
+// The single-letter names shorten identifiers.      w c
+//
+// Predictions are more accurate when the preceding w pixel is available, but
+// this interferes with SIMD because subsequent pixels depend on the decoding
+// of their predecessor. The encoder can compute residuals in parallel because
+// it knows all DC values up front, but its speed is less important. A diagonal
+// 'wavefront' order would allow computing multiple predictions efficiently,
+// but scattering those to the corresponding pixel positions would be slow.
+// Interleaving pixels by the lane count (eight pixels with x mod 8 = 0, etc)
+// would work if the two pixels before each prediction are already known, but
+// scattering lanes to multiples of 10 would also be slow.
+//
+// We instead compute the various predictors using SIMD, especially because
+// many of them are similar. Horizontal operations are generally inefficient,
+// but we take advantage of special hardware support for video codecs (minpos).
+//
+// The set of 8 predictors was chosen from a set of 16 as the combination that
+// minimized a simple model of encoding cost. Their order matters because
+// minpos(lanes) returns the lowest i with lanes[i] == min. We again retained
+// the permutation with the lowest encoding cost.
+class PixelNeighborsY {
+ public:
+  // Single Y value.
+  using PixelD = SIMD_PART(int16_t, 1);
+  using PixelV = PixelD::V;
+
+  static SIMD_ATTR PIK_INLINE PixelV Load(const DC* PIK_RESTRICT row,
+                                          const size_t x) {
+    return set_part(PixelD(), row[x]);
+  }
+
+  static SIMD_ATTR PIK_INLINE void Store(const PixelV dc, DC* PIK_RESTRICT row,
+                                         const size_t x) {
+    row[x] = get_part(PixelD(), dc);
+  }
+
+  static SIMD_ATTR PIK_INLINE DI::V Broadcast(const PixelV dc) {
+    return broadcast_part<0>(DI(), dc);
+  }
+
+  // Loads the neighborhood required for predicting at x = 2. This involves
+  // top/middle/bottom rows; if y = 1, row_t == row_m == Row(0).
+  SIMD_ATTR PixelNeighborsY(const DC* PIK_RESTRICT row_ym,
+                            const DC* PIK_RESTRICT row_yb,
+                            const DC* PIK_RESTRICT row_t,
+                            const DC* PIK_RESTRICT row_m,
+                            const DC* PIK_RESTRICT row_b) {
+    const DI d;
+    const auto wl = set1(d, row_m[0]);
+    const auto ww = set1(d, row_b[0]);
+    tl_ = set1(d, row_t[1]);
+    tn_ = set1(d, row_t[2]);
+    l_ = set1(d, row_m[1]);
+    n_ = set1(d, row_m[2]);
+    w_ = set1(d, row_b[1]);
+    Predict(l_, ww, wl, n_, &pred_w_);
+  }
+
+  // Estimates "cost" for each predictor by comparing with known n and w.
+  SIMD_ATTR PIK_INLINE void PredictorCosts(const size_t x,
+                                           const DC* PIK_RESTRICT row_ym,
+                                           const DC* PIK_RESTRICT row_yb,
+                                           const DC* PIK_RESTRICT row_t,
+                                           VIx8* PIK_RESTRICT costs) {
+    const auto tr = Broadcast(Load(row_t, x + 1));
+    VIx8 pred_n;
+    Predict(tn_, l_, tl_, tr, &pred_n);
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    for (size_t i = 0; i < kNumPredictors; ++i) {
+      costs->lanes[i] =
+          AbsResidual(n_, pred_n.lanes[i]) + AbsResidual(w_, pred_w_.lanes[i]);
+    }
+#else
+    *costs = AbsResidual(n_, pred_n) + AbsResidual(w_, pred_w_);
+#endif
+    tl_ = tn_;
+    tn_ = tr;
+  }
+
+  // Returns predictor for pixel c with min cost and updates pred_w_.
+  SIMD_ATTR PIK_INLINE PixelV PredictC(const PixelV r, const VIx8 costs) {
+    VIx8 pred_c;
+    Predict(n_, w_, l_, Broadcast(r), &pred_c);
+    pred_w_ = pred_c;
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    return pred_c.lanes[IndexOfMinCost(costs)];
+#else
+    return any_part(PixelD(),
+                    table_lookup_bytes(pred_c, ShuffleForMinCost(costs)));
+#endif
+  }
+
+  SIMD_ATTR PIK_INLINE void Advance(const PixelV r, const PixelV c) {
+    l_ = n_;
+    n_ = Broadcast(r);
+    w_ = Broadcast(c);
+  }
+
+ private:
+  // All input arguments are broadcasted.
+  static SIMD_ATTR PIK_INLINE void Predict(const DI::V n, const DI::V w,
+                                           const DI::V l, const DI::V r,
+                                           VIx8* PIK_RESTRICT pred) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    // Eight predictors for luminance (decreases coded size by ~0.5% vs four)
+    pred->lanes[0] = Average(Average(n, w), r);
+    pred->lanes[1] = Average(w, n);
+    pred->lanes[2] = Average(n, r);
+    pred->lanes[3] = Average(w, l);
+    pred->lanes[4] = Average(n, l);
+    pred->lanes[5] = w;
+    pred->lanes[6] = ClampedGradient(n, w, l);
+    pred->lanes[7] = n;
+#else
+    // "x" are invalid/don't care lanes.
+    const auto vRN = interleave_lo(n, r);
+    const auto v6 = ClampedGradient(n, w, l);
+    const auto vLLRN = combine_shift_right_bytes<12>(l, vRN);
+    const auto vNWNWNWNW = interleave_lo(w, n);
+    const auto vWxxxLLRN = concat_hi_lo(w, vLLRN);
+    const auto vAxxx4321 = Average(vNWNWNWNW, vWxxxLLRN);
+    const auto vx765xxxx = interleave_lo(vNWNWNWNW, v6);
+    const auto vx7654321 = concat_hi_lo(vx765xxxx, vAxxx4321);
+    const auto v0xxxxxxx = Average(vAxxx4321, r);
+    *pred = combine_shift_right_bytes<14>(vx7654321, v0xxxxxxx);
+#endif
+  }
+
+  DI::V tl_;
+  DI::V tn_;
+  DI::V n_;
+  DI::V w_;
+  DI::V l_;
+  // (30% overall speedup by reusing the current prediction as the next pred_w_)
+  VIx8 pred_w_;
+};
+
+// Providing separate sets of predictors for the luminance and chrominance bands
+// reduces the magnitude of residuals, but differentiating between the
+// chrominance bands does not.
+class PixelNeighborsXB {
+ public:
+#if SIMD_TARGET_VALUE != SIMD_NONE
+  using PixelD = SIMD_PART(int16_t, 2);
+#endif
+  using PixelV = VIx2;
+
+  // U in lane1, V in lane0.
+  static SIMD_ATTR PIK_INLINE PixelV Load(const DC* PIK_RESTRICT row,
+                                          const size_t x) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    PixelV ret;
+    ret.lanes[0] = load(DI(), row + 2 * x + 0);  // V
+    ret.lanes[1] = load(DI(), row + 2 * x + 1);  // U
+    return ret;
+#else
+    return load(PixelD(), row + 2 * x);
+#endif
+  }
+
+  static SIMD_ATTR PIK_INLINE void Store(const PixelV xb, DC* PIK_RESTRICT row,
+                                         const size_t x) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    store(xb.lanes[0], DI(), row + 2 * x + 0);  // B
+    store(xb.lanes[1], DI(), row + 2 * x + 1);  // X
+#else
+    store(xb, PixelD(), row + 2 * x);
+#endif
+  }
+
+  SIMD_ATTR PixelNeighborsXB(const DC* PIK_RESTRICT row_ym,
+                             const DC* PIK_RESTRICT row_yb,
+                             const DC* PIK_RESTRICT row_t,
+                             const DC* PIK_RESTRICT row_m,
+                             const DC* PIK_RESTRICT row_b) {
+    const DI d;
+    yn_ = set1(d, row_ym[2]);
+    yw_ = set1(d, row_yb[1]);
+    yl_ = set1(d, row_ym[1]);
+    n_ = Load(row_m, 2);
+    w_ = Load(row_b, 1);
+    l_ = Load(row_m, 1);
+  }
+
+  // Estimates "cost" for each predictor by comparing with known c from Y band.
+  SIMD_ATTR PIK_INLINE void PredictorCosts(const size_t x,
+                                           const DC* PIK_RESTRICT row_ym,
+                                           const DC* PIK_RESTRICT row_yb,
+                                           const DC* PIK_RESTRICT,
+                                           VIx8* PIK_RESTRICT costs) {
+    const auto yr = set1(DI(), row_ym[x + 1]);
+    const auto yc = set1(DI(), row_yb[x]);
+    VIx8 pred_y;
+    Predict(yn_, yw_, yl_, yr, &pred_y);
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    for (size_t i = 0; i < kNumPredictors; ++i) {
+      costs->lanes[i] = AbsResidual(yc, pred_y.lanes[i]);
+    }
+#else
+    *costs = AbsResidual(yc, pred_y);
+#endif
+    yl_ = yn_;
+    yn_ = yr;
+    yw_ = yc;
+  }
+
+  // Returns predictor for pixel c with min cost.
+  SIMD_ATTR PIK_INLINE PixelV PredictC(const PixelV r,
+                                       const VIx8& costs) const {
+    VIx8 u, v;
+    Predict(BroadcastX(n_), BroadcastX(w_), BroadcastX(l_), BroadcastX(r), &u);
+    Predict(BroadcastB(n_), BroadcastB(w_), BroadcastB(l_), BroadcastB(r), &v);
+
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    const size_t idx_pred = IndexOfMinCost(costs);
+    PixelV ret;
+    ret.lanes[0] = v.lanes[idx_pred];
+    ret.lanes[1] = u.lanes[idx_pred];
+    return ret;
+#else
+    const auto shuffle = ShuffleForMinCost(costs);
+    const auto best_u = table_lookup_bytes(u, shuffle);
+    const auto best_v = table_lookup_bytes(v, shuffle);
+    return any_part(PixelD(), interleave_lo(best_v, best_u));
+#endif
+  }
+
+  SIMD_ATTR PIK_INLINE void Advance(const PixelV r, const PixelV c) {
+    l_ = n_;
+    n_ = r;
+    w_ = c;
+  }
+
+ private:
+  static SIMD_ATTR PIK_INLINE DI::V BroadcastX(const PixelV xb) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    return xb.lanes[1];
+#else
+    return broadcast_part<1>(DI(), xb);
+#endif
+  }
+  static SIMD_ATTR PIK_INLINE DI::V BroadcastB(const PixelV xb) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    return xb.lanes[0];
+#else
+    return broadcast_part<0>(DI(), xb);
+#endif
+  }
+
+  // All arguments are broadcasted.
+  static SIMD_ATTR PIK_INLINE void Predict(const DI::V n, const DI::V w,
+                                           const DI::V l, const DI::V r,
+                                           VIx8* PIK_RESTRICT pred) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    // Eight predictors for chrominance:
+    pred->lanes[0] = ClampedGradient(n, w, l);
+    pred->lanes[1] = Average(n, w);
+    pred->lanes[2] = n;
+    pred->lanes[3] = Average(n, r);
+    pred->lanes[4] = w;
+    pred->lanes[5] = Average(w, l);
+    pred->lanes[6] = r;
+    pred->lanes[7] = Average(Average(w, r), n);
+#else
+    // "x" lanes are unused.
+    const auto v0 = ClampedGradient(n, w, l);
+    const auto vRN = interleave_lo(n, r);
+    const auto vW0 = interleave_lo(v0, w);
+    const auto vLNN = combine_shift_right_bytes<12>(l, n);
+    const auto vWRWR = interleave_lo(r, w);
+    const auto vLNNW = combine_shift_right_bytes<14>(vLNN, w);
+    const auto vRWN0 = interleave_lo(vW0, vRN);
+    const auto v531A = Average(vLNNW, vWRWR);
+    const auto v6543210x = interleave_lo(v531A, vRWN0);
+    const auto v7 = Average(v531A, n);
+    *pred = combine_shift_right_bytes<2>(v7, v6543210x);
+#endif
+  }
+
+  DI::V yn_;
+  DI::V yw_;
+  DI::V yl_;
+  PixelV n_;
+  PixelV w_;
+  PixelV l_;
+};
+
+// Computes residuals of a fixed predictor (the preceding pixel W).
+// Useful for Row(0) because no preceding row is required.
+template <class N>
+struct FixedW {
+  static SIMD_ATTR PIK_INLINE void Shrink(const size_t xsize,
+                                          const DC* PIK_RESTRICT dc,
+                                          DC* PIK_RESTRICT residuals) {
+    N::Store(N::Load(dc, 0), residuals, 0);
+    for (size_t x = 1; x < xsize; ++x) {
+      N::Store(N::Load(dc, x) - N::Load(dc, x - 1), residuals, x);
+    }
+  }
+
+  static SIMD_ATTR PIK_INLINE void Expand(const size_t xsize,
+                                          const DC* PIK_RESTRICT residuals,
+                                          DC* PIK_RESTRICT dc) {
+    N::Store(N::Load(residuals, 0), dc, 0);
+    for (size_t x = 1; x < xsize; ++x) {
+      N::Store(N::Load(dc, x - 1) + N::Load(residuals, x), dc, x);
+    }
+  }
+};
+
+// Predicts x = 0 with n, x = 1 with w; this decreases the overall abs
+// residuals by 6% vs FixedW, which stores the first coefficient directly.
+template <class N>
+struct LeftBorder2 {
+  static SIMD_ATTR PIK_INLINE void Shrink(const size_t xsize,
+                                          const DC* PIK_RESTRICT row_m,
+                                          const DC* PIK_RESTRICT row_b,
+                                          DC* PIK_RESTRICT residuals) {
+    N::Store(N::Load(row_b, 0) - N::Load(row_m, 0), residuals, 0);
+    if (xsize >= 2) {
+      // TODO(robryk): Clamped gradient should be slightly better here.
+      N::Store(N::Load(row_b, 1) - N::Load(row_b, 0), residuals, 1);
+    }
+  }
+
+  static SIMD_ATTR PIK_INLINE void Expand(const size_t xsize,
+                                          const DC* PIK_RESTRICT residuals,
+                                          const DC* PIK_RESTRICT row_m,
+                                          DC* PIK_RESTRICT row_b) {
+    N::Store(N::Load(row_m, 0) + N::Load(residuals, 0), row_b, 0);
+    if (xsize >= 2) {
+      N::Store(N::Load(row_b, 0) + N::Load(residuals, 1), row_b, 1);
+    }
+  }
+};
+
+// Predicts the final x with w, necessary because PixelNeighbors* require "r".
+template <class N>
+struct RightBorder1 {
+  static SIMD_ATTR PIK_INLINE void Shrink(const size_t xsize,
+                                          const DC* PIK_RESTRICT dc,
+                                          DC* PIK_RESTRICT residuals) {
+    // TODO(robryk): Clamped gradient should be slightly better here.
+    if (xsize >= 2) {
+      const auto res = N::Load(dc, xsize - 1) - N::Load(dc, xsize - 2);
+      N::Store(res, residuals, xsize - 1);
+    }
+  }
+
+  static SIMD_ATTR PIK_INLINE void Expand(const size_t xsize,
+                                          const DC* PIK_RESTRICT residuals,
+                                          DC* PIK_RESTRICT dc) {
+    if (xsize >= 2) {
+      const auto xb = N::Load(dc, xsize - 2) + N::Load(residuals, xsize - 1);
+      N::Store(xb, dc, xsize - 1);
+    }
+  }
+};
+
+// Selects predictor based upon its error at the prior n and w pixels.
+// Requires two preceding rows (t, m) and the current row b. The row_y*
+// pointers are unused and may be null if N = PixelNeighborsY.
+template <class N>
+class Adaptive {
+  using PixelV = typename N::PixelV;
+
+ public:
+  static SIMD_ATTR void Shrink(const size_t xsize,
+                               const DC* PIK_RESTRICT row_ym,
+                               const DC* PIK_RESTRICT row_yb,
+                               const DC* PIK_RESTRICT row_t,
+                               const DC* PIK_RESTRICT row_m,
+                               const DC* PIK_RESTRICT row_b,
+                               DC* PIK_RESTRICT residuals) {
+    LeftBorder2<N>::Shrink(xsize, row_m, row_b, residuals);
+
+    ForeachPrediction(xsize, row_ym, row_yb, row_t, row_m, row_b,
+                      [row_b, residuals](const size_t x, const PixelV pred)
+                          SIMD_ATTR {
+                            const auto c = N::Load(row_b, x);
+                            N::Store(c - pred, residuals, x);
+                            return c;
+                          });
+
+    RightBorder1<N>::Shrink(xsize, row_b, residuals);
+  }
+
+  static SIMD_ATTR void Expand(const size_t xsize,
+                               const DC* PIK_RESTRICT row_ym,
+                               const DC* PIK_RESTRICT row_yb,
+                               const DC* PIK_RESTRICT residuals,
+                               const DC* PIK_RESTRICT row_t,
+                               const DC* PIK_RESTRICT row_m,
+                               DC* PIK_RESTRICT row_b) {
+    LeftBorder2<N>::Expand(xsize, residuals, row_m, row_b);
+
+    ForeachPrediction(xsize, row_ym, row_yb, row_t, row_m, row_b,
+                      [row_b, residuals](const size_t x, const PixelV pred)
+                          SIMD_ATTR {
+                            const auto c = pred + N::Load(residuals, x);
+                            N::Store(c, row_b, x);
+                            return c;
+                          });
+
+    RightBorder1<N>::Expand(xsize, residuals, row_b);
+  }
+
+ private:
+  // "Func" returns the current pixel, dc[x].
+  template <class Func>
+  static SIMD_ATTR PIK_INLINE void ForeachPrediction(
+      const size_t xsize, const DC* PIK_RESTRICT row_ym,
+      const DC* PIK_RESTRICT row_yb, const DC* PIK_RESTRICT row_t,
+      const DC* PIK_RESTRICT row_m, const DC* PIK_RESTRICT row_b,
+      const Func& func) {
+    if (xsize < 2) {
+      return;  // Avoid out of bounds reads.
+    }
+    N neighbors(row_ym, row_yb, row_t, row_m, row_b);
+    // PixelNeighborsY uses w at x - 1 => two pixel margin.
+    for (size_t x = 2; x < xsize - 1; ++x) {
+      const auto r = N::Load(row_m, x + 1);
+      VIx8 costs;
+      neighbors.PredictorCosts(x, row_ym, row_yb, row_t, &costs);
+      const auto pred_c = neighbors.PredictC(r, costs);
+      const auto c = func(x, pred_c);
+      neighbors.Advance(r, c);
+    }
+  }
+};
+
+}  // namespace
+
+SIMD_ATTR void ShrinkY(const Rect& rect_in, const ImageS& in_y,
+                       const Rect& rect_res, ImageS* PIK_RESTRICT residuals) {
+  const size_t xsize = rect_in.xsize();
+  const size_t ysize = rect_in.ysize();
+  PIK_ASSERT(SameSize(rect_in, rect_res));
+
+  FixedW<PixelNeighborsY>::Shrink(xsize, rect_in.ConstRow(in_y, 0),
+                                  rect_res.Row(residuals, 0));
+
+  if (ysize >= 2) {
+    // Only one previous row, so row_t == row_m.
+    Adaptive<PixelNeighborsY>::Shrink(
+        xsize, nullptr, nullptr, rect_in.ConstRow(in_y, 0),
+        rect_in.ConstRow(in_y, 0), rect_in.ConstRow(in_y, 1),
+        rect_res.Row(residuals, 1));
+  }
+
+  for (size_t y = 2; y < ysize; ++y) {
+    Adaptive<PixelNeighborsY>::Shrink(
+        xsize, nullptr, nullptr, rect_in.ConstRow(in_y, y - 2),
+        rect_in.ConstRow(in_y, y - 1), rect_in.ConstRow(in_y, y),
+        rect_res.Row(residuals, y));
+  }
+}
+
+SIMD_ATTR void ExpandY(const Rect& rect, const ImageS& residuals,
+                       ImageS* PIK_RESTRICT tmp_expanded) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  PIK_ASSERT(xsize <= tmp_expanded->xsize() && ysize <= tmp_expanded->ysize());
+
+  FixedW<PixelNeighborsY>::Expand(xsize, rect.ConstRow(residuals, 0),
+                                  tmp_expanded->Row(0));
+
+  if (ysize >= 2) {
+    Adaptive<PixelNeighborsY>::Expand(
+        xsize, nullptr, nullptr, rect.ConstRow(residuals, 1),
+        tmp_expanded->ConstRow(0), tmp_expanded->ConstRow(0),
+        tmp_expanded->Row(1));
+  }
+
+  for (size_t y = 2; y < ysize; ++y) {
+    Adaptive<PixelNeighborsY>::Expand(
+        xsize, nullptr, nullptr, rect.ConstRow(residuals, y),
+        tmp_expanded->ConstRow(y - 2), tmp_expanded->ConstRow(y - 1),
+        tmp_expanded->Row(y));
+  }
+}
+
+SIMD_ATTR void ShrinkXB(const Rect& rect, const ImageS& in_y,
+                        const ImageS& tmp_xb,
+                        ImageS* PIK_RESTRICT tmp_xb_residuals) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  PIK_ASSERT(SameSize(tmp_xb, *tmp_xb_residuals));
+  PIK_ASSERT(tmp_xb.xsize() >= xsize && tmp_xb.ysize() >= ysize);
+
+  FixedW<PixelNeighborsXB>::Shrink(xsize, tmp_xb.ConstRow(0),
+                                   tmp_xb_residuals->Row(0));
+
+  if (ysize >= 2) {
+    // Only one previous row, so row_t == row_m.
+    Adaptive<PixelNeighborsXB>::Shrink(
+        xsize, rect.ConstRow(in_y, 0), rect.ConstRow(in_y, 1),
+        tmp_xb.ConstRow(0), tmp_xb.ConstRow(0), tmp_xb.ConstRow(1),
+        tmp_xb_residuals->Row(1));
+  }
+
+  for (size_t y = 2; y < ysize; ++y) {
+    Adaptive<PixelNeighborsXB>::Shrink(
+        xsize, rect.ConstRow(in_y, y - 1), rect.ConstRow(in_y, y),
+        tmp_xb.ConstRow(y - 2), tmp_xb.ConstRow(y - 1), tmp_xb.ConstRow(y),
+        tmp_xb_residuals->Row(y));
+  }
+}
+
+SIMD_ATTR void ExpandXB(const size_t xsize, const size_t ysize,
+                        const ImageS& tmp_y, const ImageS& tmp_xb_residuals,
+                        ImageS* PIK_RESTRICT tmp_xb_expanded) {
+  PIK_ASSERT(tmp_y.xsize() >= xsize && tmp_y.ysize() >= ysize);
+  PIK_ASSERT(tmp_y.xsize() >= xsize && tmp_y.ysize() >= ysize);
+  PIK_ASSERT(SameSize(tmp_xb_residuals, *tmp_xb_expanded));
+
+  FixedW<PixelNeighborsXB>::Expand(xsize, tmp_xb_residuals.ConstRow(0),
+                                   tmp_xb_expanded->Row(0));
+
+  if (ysize >= 2) {
+    Adaptive<PixelNeighborsXB>::Expand(
+        xsize, tmp_y.ConstRow(0), tmp_y.ConstRow(1),
+        tmp_xb_residuals.ConstRow(1), tmp_xb_expanded->ConstRow(0),
+        tmp_xb_expanded->ConstRow(0), tmp_xb_expanded->Row(1));
+  }
+
+  for (size_t y = 2; y < ysize; ++y) {
+    Adaptive<PixelNeighborsXB>::Expand(
+        xsize, tmp_y.ConstRow(y - 1), tmp_y.ConstRow(y),
+        tmp_xb_residuals.ConstRow(y), tmp_xb_expanded->ConstRow(y - 2),
+        tmp_xb_expanded->ConstRow(y - 1), tmp_xb_expanded->Row(y));
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/dc_predictor.h b/codec/L2/demos/pikEnc/host/pik/dc_predictor.h
new file mode 100755
index 0000000000..b14a7a6ecc
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dc_predictor.h
@@ -0,0 +1,64 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DC_PREDICTOR_H_
+#define PIK_DC_PREDICTOR_H_
+
+// DC coefficients serve as an image preview, so they are coded separately.
+// Subtracting predicted values leads to a "residual" distribution with lower
+// entropy and magnitudes than the original values. These can be coded more
+// efficiently, even when context modeling is used.
+//
+// Our predictors use immediately adjacent causal pixels because more distant
+// pixels are only weakly correlated in subsampled DC images. We also utilize
+// cross-channel correlation by choosing a predictor based upon its performance
+// on a previously decoded channel.
+//
+// This module decreases final size of DC images by 2-4% vs. the standard
+// MED/MAP predictor from JPEG-LS and processes 330 M coefficients per second.
+// The average residual is about 1.3% of the maximum DC value.
+
+#include <stdint.h>
+
+#include "pik/compiler_specific.h"
+#include "pik/image.h"
+#include "pik/image_ops.h"
+
+namespace pik {
+
+// The predictors operate on DCT coefficients or perhaps original pixels.
+// Must be 16-bit because we have 128 bit vectors and 8 predictors.
+// Can be full-range [-32768, 32767].
+using DC = int16_t;
+
+// Predicts "in_y" coefficients within "rect_in" based on their neighbors and
+// stores the residuals into "residuals" within "rect_res". The predictors
+// are tuned for luminance.
+void ShrinkY(const Rect& rect_in, const ImageS& in_y, const Rect& rect_res,
+             ImageS* PIK_RESTRICT residuals);
+
+// All tmp_* images are thread-specific group-sized subsets:
+
+// Expands "residuals" within "rect" (same as a prior call to ShrinkY) into
+// preallocated "tmp_expanded", using predictions from prior pixels.
+void ExpandY(const Rect& rect, const ImageS& residuals,
+             ImageS* PIK_RESTRICT tmp_expanded);
+
+// Stores residuals of predicting XB pairs in "tmp_xb" from their neighbors
+// and the window "rect" [blocks] within already expanded "y" (luminance).
+void ShrinkXB(const Rect& rect, const ImageS& in_y, const ImageS& tmp_xb,
+              ImageS* PIK_RESTRICT tmp_xb_residuals);
+
+// Expands "tmp_xb_residuals" (a subset of the result of ShrinkXB) into
+// "tmp_xb_expanded", using predictions from prior pixels and "tmp_y". All
+// images are at least xsize * ysize (2*xsize for xz).
+void ExpandXB(const size_t xsize, const size_t ysize, const ImageS& tmp_y,
+              const ImageS& tmp_xb_residuals,
+              ImageS* PIK_RESTRICT tmp_xb_expanded);
+
+}  // namespace pik
+
+#endif  // PIK_DC_PREDICTOR_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/dct.cc b/codec/L2/demos/pikEnc/host/pik/dct.cc
new file mode 100755
index 0000000000..bc3e0c8570
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dct.cc
@@ -0,0 +1,13 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/dct.h"
+
+namespace pik {
+
+// Nothing here. This file remains, as it could be useful in the future.
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/dct.h b/codec/L2/demos/pikEnc/host/pik/dct.h
new file mode 100755
index 0000000000..1e5289e11a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dct.h
@@ -0,0 +1,536 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DCT_H_
+#define PIK_DCT_H_
+
+// Fast SIMD floating-point DCT8-32.
+
+#include <cmath>
+#include <cstring>
+#include "pik/block.h"
+#include "pik/compiler_specific.h"
+#include "pik/dct.h"
+#include "pik/dct_simd_4.h"
+#include "pik/dct_simd_8.h"
+#include "pik/dct_simd_any.h"
+#include "pik/simd/simd.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Final scaling factors of outputs/inputs in the Arai, Agui, and Nakajima
+// algorithm computing the DCT/IDCT (described in the book JPEG: Still Image
+// Data Compression Standard, section 4.3.5) and the "A low multiplicative
+// complexity fast recursive DCT-2 algorithm" (Maxim Vashkevich, Alexander
+// Pertrovsky) algorithm. Note that the DCT and the IDCT scales of these two
+// algorithms are flipped. We use the first algorithm for DCT8, and the second
+// one for all other DCTs.
+/* Python snippet to produce these tables for the Arai, Agui, Nakajima
+ * algorithm:
+ *
+from mpmath import *
+N = 8
+def iscale(u):
+  eps = sqrt(mpf(0.5)) if u == 0 else mpf(1.0)
+  return sqrt(mpf(2) / mpf(N)) * eps * cos(mpf(u) * pi / mpf(2 * N))
+def scale(u):
+  return mpf(1) / (mpf(N) * iscale(i))
+mp.dps = 18
+print(", ".join([str(scale(i)) + 'f' for i in range(N)]))
+print(", ".join([str(iscale(i)) + 'f' for i in range(N)]))
+ */
+static constexpr const float kDCTScales2[2] = {0.707106781186547524f,
+                                               0.707106781186547524f};
+static constexpr const float kIDCTScales2[2] = {0.707106781186547524f,
+                                                0.707106781186547524f};
+static constexpr const float kDCTScales4[4] = {0.5f, 0.653281482438188264f,
+                                               0.5f, 0.270598050073098492f};
+static constexpr const float kIDCTScales4[4] = {0.5f, 0.382683432365089772f,
+                                                0.5f, 0.923879532511286756f};
+static constexpr const float kDCTScales8[8] = {
+    0.353553390593273762f, 0.254897789552079584f, 0.270598050073098492f,
+    0.30067244346752264f,  0.353553390593273762f, 0.449988111568207852f,
+    0.653281482438188264f, 1.28145772387075309f};
+
+static constexpr const float kIDCTScales8[8] = {
+    0.353553390593273762f, 0.490392640201615225f, 0.461939766255643378f,
+    0.415734806151272619f, 0.353553390593273762f, 0.277785116509801112f,
+    0.191341716182544886f, 0.0975451610080641339f};
+
+static constexpr const float kIDCTScales16[16] = {0.25f,
+                                                  0.177632042131274808f,
+                                                  0.180239955501736978f,
+                                                  0.184731156892216368f,
+                                                  0.191341716182544886f,
+                                                  0.200444985785954314f,
+                                                  0.212607523691814112f,
+                                                  0.228686034616512494f,
+                                                  0.25f,
+                                                  0.278654739432954475f,
+                                                  0.318189645143208485f,
+                                                  0.375006192208515097f,
+                                                  0.461939766255643378f,
+                                                  0.608977011699708658f,
+                                                  0.906127446352887843f,
+                                                  1.80352839005774887f};
+
+static constexpr const float kDCTScales16[16] = {0.25f,
+                                                 0.351850934381595615f,
+                                                 0.346759961330536865f,
+                                                 0.33832950029358817f,
+                                                 0.326640741219094132f,
+                                                 0.311806253246667808f,
+                                                 0.293968900604839679f,
+                                                 0.273300466750439372f,
+                                                 0.25f,
+                                                 0.224291896585659071f,
+                                                 0.196423739596775545f,
+                                                 0.166663914619436624f,
+                                                 0.135299025036549246f,
+                                                 0.102631131880589345f,
+                                                 0.0689748448207357531f,
+                                                 0.0346542922997728657f};
+
+static constexpr const float kIDCTScales32[32] = {
+    0.176776695296636881f, 0.125150749558799075f, 0.125604821547038926f,
+    0.126367739974385915f, 0.127448894776039792f, 0.128861827480656137f,
+    0.13062465373492222f,  0.132760647772446044f, 0.135299025036549246f,
+    0.138275974008611132f, 0.141736008704089426f, 0.145733742051533468f,
+    0.15033622173376132f,  0.155626030758916204f, 0.161705445839997532f,
+    0.168702085363751436f, 0.176776695296636881f, 0.186134067750574612f,
+    0.197038655862812556f, 0.20983741135388176f,  0.224994055784103926f,
+    0.243142059465490173f, 0.265169421497586868f, 0.292359983358221239f,
+    0.326640741219094132f, 0.371041154078541569f, 0.430611774559583482f,
+    0.514445252488352888f, 0.640728861935376545f, 0.851902104617179697f,
+    1.27528715467229096f,  2.5475020308870142f};
+
+static constexpr const float kDCTScales32[32] = {
+    0.176776695296636881f,  0.249698864051293098f,  0.248796181668049222f,
+    0.247294127491195243f,  0.245196320100807612f,  0.242507813298635998f,
+    0.239235083933052216f,  0.235386016295755195f,  0.230969883127821689f,
+    0.225997323280860833f,  0.220480316087088757f,  0.214432152500068017f,
+    0.207867403075636309f,  0.200801882870161227f,  0.19325261334068424f,
+    0.185237781338739773f,  0.176776695296636881f,  0.1678897387117546f,
+    0.158598321040911375f,  0.148924826123108336f,  0.138892558254900556f,
+    0.128525686048305432f,  0.117849184206499412f,  0.106888773357570524f,
+    0.0956708580912724429f, 0.0842224633480550127f, 0.0725711693136155919f,
+    0.0607450449758159725f, 0.048772580504032067f,  0.0366826186138404379f,
+    0.0245042850823901505f, 0.0122669185818545036f};
+
+template <size_t N>
+constexpr const float* DCTScales() {
+  return N == 2 ? kDCTScales2
+                : (N == 4 ? kDCTScales4
+                          : (N == 8 ? kDCTScales8
+                                    : (N == 16 ? kDCTScales16 : kDCTScales32)));
+}
+
+template <size_t N>
+constexpr const float* IDCTScales() {
+  return N == 2
+             ? kIDCTScales2
+             : (N == 4 ? kIDCTScales4
+                       : (N == 8 ? kIDCTScales8
+                                 : (N == 16 ? kIDCTScales16 : kIDCTScales32)));
+}
+
+// Relative L1 norm of IDCT of a vector of 0s with a single 1 in position i
+// (with respect to the L1 norm of a DC-only vector).
+static constexpr const float kL1Norm2[2] = {
+    1.0000000000000000000f,
+    1.0000000000000000000f,
+};
+static constexpr const float kL1Norm4[4] = {
+    1.0000000000000000000f,  //
+    0.9238795325112867561f,  // cos(pi/8)
+    1.0000000000000000000f,  //
+    0.9238795325112867561f,  // cos(pi/8)
+};
+static constexpr const float kL1Norm8[8] = {
+    1.0000000000000000000f,  //
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9238795325112867561f,  // cos(pi/8)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    1.0000000000000000000f,  //
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9238795325112867561f,  // cos(pi/8)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+
+};
+static constexpr const float kL1Norm16[16] = {
+    1.0000000000000000000f,  //
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9238795325112867561f,  // cos(pi/8)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    1.0000000000000000000f,  //
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9238795325112867561f,  // cos(pi/8)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+};
+static constexpr const float kL1Norm32[32] = {
+    1.0000000000000000000f,  //
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9238795325112867561f,  // cos(pi/8)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    1.0000000000000000000f,  //
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9238795325112867561f,  // cos(pi/8)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9061274463528878431f,  // cos(pi/8) * cos(pi/16)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f,  // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f,  // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+};
+
+static constexpr const float kL1NormInv2[2] = {
+    1.000000000000000000f,
+    1.000000000000000000f,
+};
+static constexpr const float kL1NormInv4[4] = {
+    1.000000000000000000f,
+    1.082392200292393968f,
+    1.000000000000000000f,
+    1.082392200292393968f,
+};
+static constexpr const float kL1NormInv8[8] = {
+    1.000000000000000000f, 1.103597517131772049f, 1.082392200292393968f,
+    1.103597517131772049f, 1.000000000000000000f, 1.103597517131772049f,
+    1.082392200292393968f, 1.103597517131772049f,
+};
+static constexpr const float kL1NormInv16[16] = {
+    1.000000000000000000f, 1.108937353592731700f, 1.103597517131772049f,
+    1.108937353592731700f, 1.082392200292393968f, 1.108937353592731700f,
+    1.103597517131772049f, 1.108937353592731700f, 1.000000000000000000f,
+    1.108937353592731700f, 1.103597517131772049f, 1.108937353592731700f,
+    1.082392200292393968f, 1.108937353592731700f, 1.103597517131772049f,
+    1.108937353592731700f,
+};
+static constexpr const float kL1NormInv32[32] = {
+    1.000000000000000000, 1.110274728127050414, 1.108937353592731379,
+    1.110274728127050414, 1.103597517131772010, 1.110274728127050636,
+    1.108937353592731379, 1.110274728127050414, 1.082392200292393580,
+    1.110274728127050414, 1.108937353592730934, 1.110274728127050414,
+    1.103597517131771788, 1.110274728127050414, 1.108937353592731156,
+    1.110274728127050414, 0.999999999999999556, 1.110274728127049970,
+    1.108937353592731601, 1.110274728127051080, 1.103597517131771788,
+    1.110274728127050414, 1.108937353592732045, 1.110274728127050192,
+    1.082392200292394691, 1.110274728127049526, 1.108937353592733155,
+    1.110274728127050858, 1.103597517131772232, 1.110274728127051969,
+    1.108937353592732933, 1.110274728127050414,
+};
+
+template <size_t N>
+constexpr const float* L1Norm() {
+  return N == 2
+             ? kL1Norm2
+             : (N == 4
+                    ? kL1Norm4
+                    : (N == 8 ? kL1Norm8 : (N == 16 ? kL1Norm16 : kL1Norm32)));
+}
+
+template <size_t N>
+constexpr const float* L1NormInv() {
+  return N == 2 ? kL1NormInv2
+                : (N == 4 ? kL1NormInv4
+                          : (N == 8 ? kL1NormInv8
+                                    : (N == 16 ? kL1NormInv16 : kL1NormInv32)));
+}
+
+// https://en.wikipedia.org/wiki/In-place_matrix_transposition#Square_matrices
+template <size_t N, class From, class To>
+SIMD_ATTR PIK_INLINE void GenericTransposeBlockInplace(const From& from,
+                                                       const To& to) {
+  // This does not guarantee anything, just saves from the most stupid mistakes.
+  PIK_ASSERT(from.Address(0, 0) == to.Address(0, 0));
+  for (size_t n = 0; n < N - 1; ++n) {
+    for (size_t m = n + 1; m < N; ++m) {
+      // Swap
+      const float tmp = from.Read(m, n);
+      to.Write(from.Read(n, m), m, n);
+      to.Write(tmp, n, m);
+    }
+  }
+}
+
+template <size_t N, class From, class To>
+SIMD_ATTR PIK_INLINE void GenericTransposeBlock(const From& from,
+                                                const To& to) {
+  // This does not guarantee anything, just saves from the most stupid mistakes.
+  PIK_ASSERT(from.Address(0, 0) != to.Address(0, 0));
+  for (size_t n = 0; n < N; ++n) {
+    for (size_t m = 0; m < N; ++m) {
+      to.Write(from.Read(n, m), m, n);
+    }
+  }
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void TransposeBlock8(const From& from, const To& to) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  TransposeBlock8_V8(from, to);
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+  if (from.Address(0, 0) == to.Address(0, 0)) {
+    GenericTransposeBlockInplace<8>(from, to);
+  } else {
+    GenericTransposeBlock<8>(from, to);
+  }
+#else  // generic 128-bit
+  TransposeBlock8_V4(from, to);
+#endif
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void TransposeBlock16(const From& from, const To& to) {
+  SIMD_ALIGN float tmp[8 * 8];
+  TransposeBlock8(from, to);
+  TransposeBlock8(from.View(0, 8), ToBlock<8>(tmp));
+  TransposeBlock8(from.View(8, 0), to.View(0, 8));
+  CopyBlock8(FromBlock<8>(tmp), to.View(8, 0));
+  TransposeBlock8(from.View(8, 8), to.View(8, 8));
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void TransposeBlock32(const From& from, const To& to) {
+  SIMD_ALIGN float tmp[8 * 8];
+  TransposeBlock8(from, to);
+  TransposeBlock8(from.View(0, 8), ToBlock<8>(tmp));
+  TransposeBlock8(from.View(8, 0), to.View(0, 8));
+  CopyBlock8(FromBlock<8>(tmp), to.View(8, 0));
+  TransposeBlock8(from.View(8, 8), to.View(8, 8));
+  TransposeBlock8(from.View(0, 16), ToBlock<8>(tmp));
+  TransposeBlock8(from.View(16, 0), to.View(0, 16));
+  CopyBlock8(FromBlock<8>(tmp), to.View(16, 0));
+  TransposeBlock8(from.View(8, 16), ToBlock<8>(tmp));
+  TransposeBlock8(from.View(16, 8), to.View(8, 16));
+  CopyBlock8(FromBlock<8>(tmp), to.View(16, 8));
+  TransposeBlock8(from.View(16, 16), to.View(16, 16));
+  TransposeBlock8(from.View(0, 24), ToBlock<8>(tmp));
+  TransposeBlock8(from.View(24, 0), to.View(0, 24));
+  CopyBlock8(FromBlock<8>(tmp), to.View(24, 0));
+  TransposeBlock8(from.View(8, 24), ToBlock<8>(tmp));
+  TransposeBlock8(from.View(24, 8), to.View(8, 24));
+  CopyBlock8(FromBlock<8>(tmp), to.View(24, 8));
+  TransposeBlock8(from.View(16, 24), ToBlock<8>(tmp));
+  TransposeBlock8(from.View(24, 16), to.View(16, 24));
+  CopyBlock8(FromBlock<8>(tmp), to.View(24, 16));
+  TransposeBlock8(from.View(24, 24), to.View(24, 24));
+}
+
+// Computes the in-place NxN transposed-scaled-DCT (tsDCT) of block.
+// Requires that block is SIMD_ALIGN'ed.
+//
+// Final DCT coefficients could be obtained the following way:
+//   unscaled(f)[x, y] = f[x, y] * DCTScales<N>[x] * DCTScales<N>[y]
+//   untransposed(f)[x, y] = f[y, x]
+//   DCT(input) = unscaled(untransposed(tsDCT(input)))
+//
+// NB: DCT denotes scaled variant of DCT-II, which is orthonormal.
+//
+// See also DCTSlow, ComputeDCT
+template <size_t N>
+struct ComputeTransposedScaledDCT;
+
+template <>
+struct ComputeTransposedScaledDCT<32> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+    SIMD_ALIGN float block[32 * 32];
+    ColumnDCT32(from, ToBlock<32>(block));
+    TransposeBlock32(FromBlock<32>(block), ToBlock<32>(block));
+    ColumnDCT32(FromBlock<32>(block), to);
+  }
+};
+
+template <>
+struct ComputeTransposedScaledDCT<16> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+    SIMD_ALIGN float block[16 * 16];
+    ColumnDCT16(from, ToBlock<16>(block));
+    TransposeBlock16(FromBlock<16>(block), ToBlock<16>(block));
+    ColumnDCT16(FromBlock<16>(block), to);
+  }
+};
+
+template <>
+struct ComputeTransposedScaledDCT<8> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    ComputeTransposedScaledDCT8_V8(from, to);
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+    SIMD_ALIGN float block[8 * 8];
+    ColumnDCT8(from, ToBlock<8>(block));
+    TransposeBlock8(FromBlock<8>(block), ToBlock<8>(block));
+    ColumnDCT8(FromBlock<8>(block), to);
+#else
+    ComputeTransposedScaledDCT8_V4(from, to);
+#endif
+  }
+};
+
+template <>
+struct ComputeTransposedScaledDCT<4> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+    SIMD_ALIGN float block[4 * 4];
+    ColumnDCT4(from, ToBlock<4>(block));
+    GenericTransposeBlockInplace<4>(FromBlock<4>(block), ToBlock<4>(block));
+    ColumnDCT4(FromBlock<4>(block), to);
+  }
+};
+
+template <>
+struct ComputeTransposedScaledDCT<2> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+    const float a00 = from.Read(0, 0);
+    const float a01 = from.Read(0, 1);
+    const float a10 = from.Read(1, 0);
+    const float a11 = from.Read(1, 1);
+    to.Write(a00 + a01 + a10 + a11, 0, 0);
+    to.Write(a00 + a01 - a10 - a11, 0, 1);
+    to.Write(a00 - a01 + a10 - a11, 1, 0);
+    to.Write(a00 - a01 - a10 + a11, 1, 1);
+  }
+};
+
+// Computes the in-place NxN transposed-scaled-iDCT (tsIDCT)of block.
+// Requires that block is SIMD_ALIGN'ed.
+//
+// Final DCT coefficients could be obtained the following way:
+//   unscaled(f)[x, y] = f[x, y] * IDCTScales<N>[x] * IDCTScales<N>[y]
+//   untransposed(f)[x, y] = f[y, x]
+//   IDCT(input) = tsIDCT(untransposed(unscaled(input)))
+//
+// NB: IDCT denotes scaled variant of DCT-III, which is orthonormal.
+//
+// See also IDCTSlow, ComputeIDCT.
+template <size_t N>
+struct ComputeTransposedScaledIDCT;
+
+template <>
+struct ComputeTransposedScaledIDCT<32> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+    SIMD_ALIGN float block[32 * 32];
+    ColumnIDCT32(from, ToBlock<32>(block));
+    TransposeBlock32(FromBlock<32>(block), ToBlock<32>(block));
+    ColumnIDCT32(FromBlock<32>(block), to);
+  }
+};
+
+template <>
+struct ComputeTransposedScaledIDCT<16> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+    SIMD_ALIGN float block[16 * 16];
+    ColumnIDCT16(from, ToBlock<16>(block));
+    TransposeBlock16(FromBlock<16>(block), ToBlock<16>(block));
+    ColumnIDCT16(FromBlock<16>(block), to);
+  }
+};
+
+template <>
+struct ComputeTransposedScaledIDCT<8> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    ComputeTransposedScaledIDCT8_V8(from, to);
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+    SIMD_ALIGN float block[8 * 8];
+    ColumnIDCT8(from, ToBlock<8>(block));
+    TransposeBlock8(FromBlock<8>(block), ToBlock<8>(block));
+    ColumnIDCT8(FromBlock<8>(block), to);
+#else
+    ComputeTransposedScaledIDCT8_V4(from, to);
+#endif
+  }
+};
+
+template <>
+struct ComputeTransposedScaledIDCT<4> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+    SIMD_ALIGN float block[4 * 4];
+    ColumnIDCT4(from, ToBlock<4>(block));
+    GenericTransposeBlockInplace<4>(FromBlock<4>(block), ToBlock<4>(block));
+    ColumnIDCT4(FromBlock<4>(block), to);
+  }
+};
+
+template <>
+struct ComputeTransposedScaledIDCT<2> {
+  template <class From, class To>
+  SIMD_ATTR PIK_INLINE void operator()(const From& from, const To& to) {
+    const float a00 = from.Read(0, 0);
+    const float a01 = from.Read(0, 1);
+    const float a10 = from.Read(1, 0);
+    const float a11 = from.Read(1, 1);
+
+    //std::cout<<"std_IDCT: a00="<<a00<<" a01="<<a01<<" a10"<<a10<<" a11"<<a11<<std::endl;
+
+    to.Write(a00 + a01 + a10 + a11, 0, 0);
+    to.Write(a00 + a01 - a10 - a11, 0, 1);
+    to.Write(a00 - a01 + a10 - a11, 1, 0);
+    to.Write(a00 - a01 - a10 + a11, 1, 1);
+  }
+};
+
+// Similar to ComputeTransposedScaledDCT, but only DC coefficient is calculated.
+template <size_t N, class From>
+static SIMD_ATTR PIK_INLINE float ComputeScaledDC(const From& from) {
+  static_assert(N == 8, "Currently only 8x8 is supported");
+
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  return ComputeScaledDC8_V8(from);
+#elif SIMD_TARGET_VALUE == SIMD_NONE
+  const BlockDesc<N> d;
+  auto sum = setzero(d);
+  for (size_t iy = 0; iy < N; ++iy) {
+    for (size_t ix = 0; ix < N; ix += d.N) {
+      sum += from.Load(iy, ix);
+    }
+  }
+  sum = ext::sum_of_lanes(sum);
+  return get_part(SIMD_PART(float, 1)(), sum);
+#else
+  return ComputeScaledDC8_V4(from);
+#endif
+}
+
+}  // namespace pik
+
+#endif  // PIK_DCT_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/dct_simd_4.h b/codec/L2/demos/pikEnc/host/pik/dct_simd_4.h
new file mode 100755
index 0000000000..0edac0c07b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dct_simd_4.h
@@ -0,0 +1,163 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DCT_SIMD_4_H_
+#define PIK_DCT_SIMD_4_H_
+
+#include "pik/block.h"
+#include "pik/compiler_specific.h"
+#include "pik/dct_simd_any.h"
+#include "pik/simd/simd.h"
+
+#if (SIMD_TARGET_VALUE != SIMD_AVX2) && (SIMD_TARGET_VALUE != SIMD_NONE)
+
+namespace pik {
+
+// DCT building blocks that require SIMD vector length to be 4, e.g. SSE4.
+static_assert(BlockDesc<8>().N == 4, "Wrong vector size, must be 4");
+
+template <class From, class To>
+static SIMD_ATTR PIK_INLINE void TransposeBlock8_V4(const From& from,
+                                                    const To& to) {
+  const auto p0L = from.Load(0, 0);
+  const auto p0H = from.Load(0, 4);
+  const auto p1L = from.Load(1, 0);
+  const auto p1H = from.Load(1, 4);
+  const auto p2L = from.Load(2, 0);
+  const auto p2H = from.Load(2, 4);
+  const auto p3L = from.Load(3, 0);
+  const auto p3H = from.Load(3, 4);
+  const auto p4L = from.Load(4, 0);
+  const auto p4H = from.Load(4, 4);
+  const auto p5L = from.Load(5, 0);
+  const auto p5H = from.Load(5, 4);
+  const auto p6L = from.Load(6, 0);
+  const auto p6H = from.Load(6, 4);
+  const auto p7L = from.Load(7, 0);
+  const auto p7H = from.Load(7, 4);
+
+  const auto q0L = interleave_lo(p0L, p2L);
+  const auto q0H = interleave_lo(p0H, p2H);
+  const auto q1L = interleave_lo(p1L, p3L);
+  const auto q1H = interleave_lo(p1H, p3H);
+  const auto q2L = interleave_hi(p0L, p2L);
+  const auto q2H = interleave_hi(p0H, p2H);
+  const auto q3L = interleave_hi(p1L, p3L);
+  const auto q3H = interleave_hi(p1H, p3H);
+  const auto q4L = interleave_lo(p4L, p6L);
+  const auto q4H = interleave_lo(p4H, p6H);
+  const auto q5L = interleave_lo(p5L, p7L);
+  const auto q5H = interleave_lo(p5H, p7H);
+  const auto q6L = interleave_hi(p4L, p6L);
+  const auto q6H = interleave_hi(p4H, p6H);
+  const auto q7L = interleave_hi(p5L, p7L);
+  const auto q7H = interleave_hi(p5H, p7H);
+
+  const auto r0L = interleave_lo(q0L, q1L);
+  const auto r0H = interleave_lo(q0H, q1H);
+  const auto r1L = interleave_hi(q0L, q1L);
+  const auto r1H = interleave_hi(q0H, q1H);
+  const auto r2L = interleave_lo(q2L, q3L);
+  const auto r2H = interleave_lo(q2H, q3H);
+  const auto r3L = interleave_hi(q2L, q3L);
+  const auto r3H = interleave_hi(q2H, q3H);
+  const auto r4L = interleave_lo(q4L, q5L);
+  const auto r4H = interleave_lo(q4H, q5H);
+  const auto r5L = interleave_hi(q4L, q5L);
+  const auto r5H = interleave_hi(q4H, q5H);
+  const auto r6L = interleave_lo(q6L, q7L);
+  const auto r6H = interleave_lo(q6H, q7H);
+  const auto r7L = interleave_hi(q6L, q7L);
+  const auto r7H = interleave_hi(q6H, q7H);
+
+  to.Store(r0L, 0, 0);
+  to.Store(r4L, 0, 4);
+  to.Store(r1L, 1, 0);
+  to.Store(r5L, 1, 4);
+  to.Store(r2L, 2, 0);
+  to.Store(r6L, 2, 4);
+  to.Store(r3L, 3, 0);
+  to.Store(r7L, 3, 4);
+  to.Store(r0H, 4, 0);
+  to.Store(r4H, 4, 4);
+  to.Store(r1H, 5, 0);
+  to.Store(r5H, 5, 4);
+  to.Store(r2H, 6, 0);
+  to.Store(r6H, 6, 4);
+  to.Store(r3H, 7, 0);
+  to.Store(r7H, 7, 4);
+}
+
+template <class From>
+static SIMD_ATTR PIK_INLINE float ComputeScaledDC8_V4(const From& from) {
+  const auto p0L = from.Load(0, 0);
+  const auto p0H = from.Load(0, 4);
+  const auto p1L = from.Load(1, 0);
+  const auto p1H = from.Load(1, 4);
+  const auto p2L = from.Load(2, 0);
+  const auto p2H = from.Load(2, 4);
+  const auto p3L = from.Load(3, 0);
+  const auto p3H = from.Load(3, 4);
+  const auto p4L = from.Load(4, 0);
+  const auto p4H = from.Load(4, 4);
+  const auto p5L = from.Load(5, 0);
+  const auto p5H = from.Load(5, 4);
+  const auto p6L = from.Load(6, 0);
+  const auto p6H = from.Load(6, 4);
+  const auto p7L = from.Load(7, 0);
+  const auto p7H = from.Load(7, 4);
+
+  const auto q0 = p0L + p0H;
+  const auto q1 = p1L + p1H;
+  const auto q2 = p2L + p2H;
+  const auto q3 = p3L + p3H;
+  const auto q4 = p4L + p4H;
+  const auto q5 = p5L + p5H;
+  const auto q6 = p6L + p6H;
+  const auto q7 = p7L + p7H;
+
+  const auto r0 = q0 + q1;
+  const auto r2 = q2 + q3;
+  const auto r4 = q4 + q5;
+  const auto r6 = q6 + q7;
+
+  const auto s0 = r0 + r2;
+  const auto s4 = r4 + r6;
+
+  const auto sum = ext::sum_of_lanes(s0 + s4);
+
+  return get_part(SIMD_PART(float, 1)(), sum);
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ComputeTransposedScaledDCT8_V4(const From& from,
+                                                         const To& to) {
+  // TODO(user): it is possible to avoid using temporary array,
+  // after generalizing "To" to be bi-directional; all sub-transforms could
+  // be performed "in-place".
+  SIMD_ALIGN float block[8 * 8];
+  ColumnDCT8(from, ToBlock<8>(block));
+  TransposeBlock8_V4(FromBlock<8>(block), ToBlock<8>(block));
+  ColumnDCT8(FromBlock<8>(block), to);
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ComputeTransposedScaledIDCT8_V4(const From& from,
+                                                          const To& to) {
+  // TODO(user): it is possible to avoid using temporary array,
+  // after generalizing "To" to be bi-directional; all sub-transforms could
+  // be performed "in-place".
+  SIMD_ALIGN float block[8 * 8];
+  ColumnIDCT8(from, ToBlock<8>(block));
+  TransposeBlock8_V4(FromBlock<8>(block), ToBlock<8>(block));
+  ColumnIDCT8(FromBlock<8>(block), to);
+}
+
+}  // namespace pik
+
+#endif  // SIMD_TARGET_VALUE
+
+#endif  // THIRD_PARTY_DCT_SIMD_4_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/dct_simd_8.h b/codec/L2/demos/pikEnc/host/pik/dct_simd_8.h
new file mode 100755
index 0000000000..64ab0c4316
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dct_simd_8.h
@@ -0,0 +1,499 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DCT_SIMD_8_H_
+#define PIK_DCT_SIMD_8_H_
+
+#include "pik/block.h"
+#include "pik/compiler_specific.h"
+#include "pik/dct_simd_any.h"
+#include "pik/simd/simd.h"
+
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+
+namespace pik {
+
+// DCT building blocks that require SIMD vector length to be 8, e.g. AVX2.
+static_assert(BlockDesc<8>().N == 8, "Wrong vector size, must be 8");
+
+// Each vector holds one row of the input/output block.
+template <class V>
+SIMD_ATTR PIK_INLINE void TransposeBlock8_V8(V& i0, V& i1, V& i2, V& i3, V& i4,
+                                             V& i5, V& i6, V& i7) {
+  // Surprisingly, this straightforward implementation (24 cycles on port5) is
+  // faster than load128+insert and load_dup128+concat_hi_lo+blend.
+  const auto q0 = interleave_lo(i0, i2);
+  const auto q1 = interleave_lo(i1, i3);
+  const auto q2 = interleave_hi(i0, i2);
+  const auto q3 = interleave_hi(i1, i3);
+  const auto q4 = interleave_lo(i4, i6);
+  const auto q5 = interleave_lo(i5, i7);
+  const auto q6 = interleave_hi(i4, i6);
+  const auto q7 = interleave_hi(i5, i7);
+
+  const auto r0 = interleave_lo(q0, q1);
+  const auto r1 = interleave_hi(q0, q1);
+  const auto r2 = interleave_lo(q2, q3);
+  const auto r3 = interleave_hi(q2, q3);
+  const auto r4 = interleave_lo(q4, q5);
+  const auto r5 = interleave_hi(q4, q5);
+  const auto r6 = interleave_lo(q6, q7);
+  const auto r7 = interleave_hi(q6, q7);
+
+  i0 = concat_lo_lo(r4, r0);
+  i1 = concat_lo_lo(r5, r1);
+  i2 = concat_lo_lo(r6, r2);
+  i3 = concat_lo_lo(r7, r3);
+  i4 = concat_hi_hi(r4, r0);
+  i5 = concat_hi_hi(r5, r1);
+  i6 = concat_hi_hi(r6, r2);
+  i7 = concat_hi_hi(r7, r3);
+}
+
+template <class From>
+static SIMD_ATTR PIK_INLINE float ComputeScaledDC8_V8(const From& from) {
+  const auto q0 = from.Load(0, 0);
+  const auto q1 = from.Load(1, 0);
+  const auto q2 = from.Load(2, 0);
+  const auto q3 = from.Load(3, 0);
+  const auto q4 = from.Load(4, 0);
+  const auto q5 = from.Load(5, 0);
+  const auto q6 = from.Load(6, 0);
+  const auto q7 = from.Load(7, 0);
+
+  const auto r0 = q0 + q1;
+  const auto r2 = q2 + q3;
+  const auto r4 = q4 + q5;
+  const auto r6 = q6 + q7;
+
+  const auto s0 = r0 + r2;
+  const auto s4 = r4 + r6;
+
+  const auto sum = ext::sum_of_lanes(s0 + s4);
+
+  return get_part(SIMD_PART(float, 1)(), sum);
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void TransposeBlock8_V8(const From& from, const To& to) {
+  auto i0 = from.Load(0, 0);
+  auto i1 = from.Load(1, 0);
+  auto i2 = from.Load(2, 0);
+  auto i3 = from.Load(3, 0);
+  auto i4 = from.Load(4, 0);
+  auto i5 = from.Load(5, 0);
+  auto i6 = from.Load(6, 0);
+  auto i7 = from.Load(7, 0);
+  TransposeBlock8_V8(i0, i1, i2, i3, i4, i5, i6, i7);
+  to.Store(i0, 0, 0);
+  to.Store(i1, 1, 0);
+  to.Store(i2, 2, 0);
+  to.Store(i3, 3, 0);
+  to.Store(i4, 4, 0);
+  to.Store(i5, 5, 0);
+  to.Store(i6, 6, 0);
+  to.Store(i7, 7, 0);
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ComputeTransposedScaledDCT8_V8(const From& from,
+                                                         const To& to) {
+  const BlockDesc<8> d;
+
+  const float c1234_lanes[4] = {
+      0.707106781186548f,  // 1 / sqrt(2)
+      0.382683432365090f,  // cos(3 * pi / 8)
+      1.30656296487638f,   // 1 / (2 * cos(3 * pi / 8))
+      0.541196100146197f   // sqrt(2) * cos(3 * pi / 8)
+  };
+  const auto c1234 = load_dup128(d, c1234_lanes);
+  const auto k1 = set1(d, 1.0f);
+
+  auto i0 = from.template LoadPart<8>(0, 0);
+  auto i7 = from.template LoadPart<8>(7, 0);
+  auto t00 = i0 + i7;           // 2 (faster than fadd)
+  auto t01 = fsub(i0, k1, i7);  // 4
+  SIMD_FENCE;
+
+  auto i3 = from.template LoadPart<8>(3, 0);
+  auto i4 = from.template LoadPart<8>(4, 0);
+  auto t02 = i3 + i4;
+  auto t03 = fsub(i3, k1, i4);  // 1
+  SIMD_FENCE;
+
+  auto i2 = from.template LoadPart<8>(2, 0);
+  auto i5 = from.template LoadPart<8>(5, 0);
+  auto t04 = i2 + i5;  // 1
+  auto t05 = fsub(i2, k1, i5);
+  SIMD_FENCE;
+
+  auto i1 = from.template LoadPart<8>(1, 0);
+  auto i6 = from.template LoadPart<8>(6, 0);
+  auto t06 = i1 + i6;  // !
+  SIMD_FENCE;
+
+  auto t07 = i1 - i6;
+  auto t09 = fsub(t00, k1, t02);
+  const auto c4 = broadcast<3>(c1234);
+
+  auto t11 = t06 - t04;           // !
+  auto t08 = fadd(t00, k1, t02);  // 2
+  const auto c3 = broadcast<2>(c1234);
+
+  auto t14 = t05 + t03;
+  auto t10 = fadd(t06, k1, t04);  // 1; dep-1
+
+  auto t13 = t01 + t07;  // limits odd d
+  const auto c1 = broadcast<0>(c1234);
+
+  auto t15 = t11 + t09;  // !
+  const auto c2 = broadcast<1>(c1234);
+
+  auto t12 = t07 + t05;  // !
+  auto ct14 = c4 * t14;
+
+  auto t16 = t14 - t13;  // 1
+  auto ct13 = c3 * t13;
+
+  auto d0 = fadd(t08, k1, t10);
+  auto d2 = mul_add(c1, t15, t09);
+
+  auto t21 = nmul_add(c1, t12, t01);  // 2
+
+  auto d6 = nmul_add(c1, t15, t09);
+  auto t20 = mul_add(c1, t12, t01);  // 2
+
+  auto t23 = mul_add(c2, t16, ct14);
+
+  auto d4 = t08 - t10;
+  auto t22 = mul_add(c2, t16, ct13);  // !
+
+  const auto q0 = interleave_lo(d0, d2);
+
+  const auto q2 = interleave_hi(d0, d2);
+
+  const auto q4 = interleave_lo(d4, d6);
+
+  auto d3 = t21 - t23;
+  const auto q6 = interleave_hi(d4, d6);
+
+  auto d1 = t20 + t22;
+  const auto q1 = interleave_lo(d1, d3);
+
+  const auto r0 = interleave_lo(q0, q1);
+  const auto r1 = interleave_hi(q0, q1);
+
+  auto d7 = t20 - t22;
+  const auto q3 = interleave_hi(d1, d3);
+  const auto r2 = interleave_lo(q2, q3);
+  const auto r3 = interleave_hi(q2, q3);
+
+  auto d5 = t21 + t23;
+  const auto q5 = interleave_lo(d5, d7);
+  const auto r4 = interleave_lo(q4, q5);
+  const auto r5 = interleave_hi(q4, q5);
+
+  const auto q7 = interleave_hi(d5, d7);
+  const auto r6 = interleave_lo(q6, q7);
+  const auto r7 = interleave_hi(q6, q7);
+
+  // Second column-DCT after transpose
+  i0 = concat_lo_lo(r4, r0);
+  i7 = concat_hi_hi(r7, r3);
+  t01 = i0 - i7;           // 1
+  t00 = fadd(i0, k1, i7);  // 2
+
+  i1 = concat_lo_lo(r5, r1);
+  i6 = concat_hi_hi(r6, r2);
+  t07 = i1 - i6;           // !
+  t06 = fadd(i1, k1, i6);  // 2
+
+  i3 = concat_lo_lo(r7, r3);
+  i4 = concat_hi_hi(r4, r0);
+  t03 = i3 - i4;           // 1
+  t02 = fadd(i3, k1, i4);  // !
+
+  i2 = concat_lo_lo(r6, r2);
+  i5 = concat_hi_hi(r5, r1);
+  t05 = i2 - i5;
+
+  t13 = t01 + t07;  // 1
+
+  t04 = i2 + i5;
+
+  t14 = t05 + t03;
+  t12 = fadd(t07, k1, t05);  // 2
+
+  t09 = fsub(t00, k1, t02);
+  ct13 = c3 * t13;  // 1
+
+  t11 = t06 - t04;  // 1
+  t10 = fadd(t06, k1, t04);
+
+  t16 = t14 - t13;  // !
+  ct14 = c4 * t14;
+
+  t08 = t00 + t02;
+
+  t20 = mul_add(c1, t12, t01);  // 1
+
+  t15 = t11 + t09;
+  t22 = mul_add(c2, t16, ct13);
+
+  i0 = t08 + t10;
+
+  t21 = nmul_add(c1, t12, t01);
+  t23 = mul_add(c2, t16, ct14);
+
+  i4 = t08 - t10;
+  i2 = mul_add(c1, t15, t09);
+
+  i6 = nmul_add(c1, t15, t09);
+  to.template StorePart<8>(i0, 0, 0);
+  SIMD_FENCE;
+
+  i1 = t20 + t22;
+
+  i7 = t20 - t22;
+  to.template StorePart<8>(i2, 2, 0);
+  to.template StorePart<8>(i4, 4, 0);
+  SIMD_FENCE;
+
+  i3 = t21 - t23;
+  to.template StorePart<8>(i1, 1, 0);
+  SIMD_FENCE;
+
+  i5 = t21 + t23;
+  to.template StorePart<8>(i6, 6, 0);
+  to.template StorePart<8>(i7, 7, 0);
+  to.template StorePart<8>(i3, 3, 0);
+  to.template StorePart<8>(i5, 5, 0);
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ComputeTransposedScaledIDCT8_V8(const From& from,
+                                                          const To& to) {
+  const BlockDesc<8> d;
+
+  const float k1_lanes[4] = {SIMD_REP4(1.0f)};
+  const auto k1 = load_dup128(d, k1_lanes);
+  const float c1234_lanes[4] = {
+      1.41421356237310f,  // sqrt(2)
+      2.61312592975275f,  // 1 / cos(3 * pi / 8)
+      0.76536686473018f,  // 2 * cos(3 * pi / 8)
+      1.08239220029239f   // 2 * sqrt(2) * cos(3 * pi / 8)
+  };
+  const auto c1234 = load_dup128(d, c1234_lanes);
+  SIMD_FENCE;
+
+  // Finish d5,d7 and d0,d2 first so we can overlap more port5 (shuffles) with
+  // other computations; they have a shorter dependency chain than d13/46.
+
+  auto i1 = from.Load(1, 0);
+  auto i7 = from.Load(7, 0);
+  auto t05 = i7 - i1;           // !
+  auto t04 = fadd(i7, k1, i1);  // 1
+
+  auto i3 = from.Load(3, 0);
+  auto i5 = from.Load(5, 0);
+  auto t07 = i5 - i3;           // +1
+  auto t06 = fadd(i5, k1, i3);  // +1
+
+  auto i2 = from.Load(2, 0);
+  auto i6 = from.Load(6, 0);
+  auto t02 = i6 + i2;  // 1
+  const auto c2 = broadcast<1>(c1234);
+  SIMD_FENCE;
+
+  auto i0 = from.Load(0, 0);
+  auto i4 = from.Load(4, 0);
+  auto t03 = i6 - i2;    // !
+  auto ct05 = c2 * t05;  // !
+  SIMD_FENCE;
+
+  auto t12 = t07 - t05;                 // 1
+  const auto c1 = broadcast<0>(c1234);  // 1
+
+  auto t00 = fadd(i0, k1, i4);          // +2
+  const auto c3 = broadcast<2>(c1234);  // 2
+
+  auto t09 = fsub(t04, k1, t06);
+  auto t14 = mul_add(c1, t03, t02);  // +3
+
+  auto t08 = fadd(t04, k1, t06);        // 1
+  const auto c4 = broadcast<3>(c1234);  // 2
+
+  auto t01 = i0 - i4;                 // +1
+  auto t17 = mul_add(c3, t12, ct05);  // !
+  SIMD_FENCE;
+
+  //
+
+  auto t10 = fadd(t00, k1, t02);
+  auto ct07 = c4 * t07;  // !
+
+  auto t15 = fsub(t01, k1, t14);  // 1
+  auto ct09 = c1 * t09;
+
+  auto t11 = fsub(t00, k1, t02);  // 6
+
+  auto t19 = t08 + t17;  // !
+
+  auto t16 = fadd(t01, k1, t14);
+
+  auto d0 = fadd(t10, k1, t08);       // dep-3; 4
+  auto t18 = mul_add(c3, t12, ct07);  // !
+
+  auto t20 = ct09 + t19;         // !
+  auto d7 = fsub(t10, k1, t08);  // 1
+
+  auto d1 = fsub(t15, k1, t19);  // 5
+
+  //
+
+  auto d5 = t16 - t20;  // !
+  auto d2 = fadd(t16, k1, t20);
+
+  auto t21 = t18 - t20;  // !
+
+  //
+
+  // Begin transposing finished d#
+
+  auto d6 = t15 + t19;  // 1
+  const auto q5 = interleave_lo(d5, d7);
+
+  auto d4 = t11 - t21;                    // !
+  const auto q7 = interleave_hi(d5, d7);  // 8
+
+  auto d3 = t11 + t21;  // !
+  const auto q0 = interleave_lo(d0, d2);
+
+  const auto q2 = interleave_hi(d0, d2);  // 8
+
+  const auto q4 = interleave_lo(d4, d6);
+
+  const auto q1 = interleave_lo(d1, d3);
+
+  const auto r4 = interleave_lo(q4, q5);
+
+  const auto r0 = interleave_lo(q0, q1);
+
+  i0 = concat_lo_lo(r4, r0);
+
+  i4 = concat_hi_hi(r4, r0);
+  const auto _c1234 = load_dup128(d, c1234_lanes);
+
+  const auto q3 = interleave_hi(d1, d3);
+
+  // Begin second column-IDCT for transposed r#
+
+  const auto q6 = interleave_hi(d4, d6);
+
+  t00 = fadd(i0, k1, i4);
+  const auto r2 = interleave_lo(q2, q3);
+
+  t01 = fsub(i0, k1, i4);
+  const auto r6 = interleave_lo(q6, q7);
+
+  i2 = concat_lo_lo(r6, r2);
+
+  i6 = concat_hi_hi(r6, r2);
+
+  const auto r7 = interleave_hi(q6, q7);
+
+  const auto r3 = interleave_hi(q2, q3);
+
+  t03 = i6 - i2;
+  i7 = concat_hi_hi(r7, r3);
+
+  t02 = i6 + i2;
+  const auto r5 = interleave_hi(q4, q5);
+
+  const auto r1 = interleave_hi(q0, q1);
+  const auto _c1 = broadcast<0>(_c1234);
+
+  i1 = concat_lo_lo(r5, r1);
+  auto ct03 = _c1 * t03;
+
+  t10 = fadd(t00, k1, t02);  // 5
+  i5 = concat_hi_hi(r5, r1);
+
+  i3 = concat_lo_lo(r7, r3);
+
+  t05 = i7 - i1;  // !
+  const auto _c2 = broadcast<1>(_c1234);
+
+  t04 = fadd(i7, k1, i1);  // 1
+
+  t07 = i5 - i3;
+
+  t06 = i5 + i3;
+  ct05 = _c2 * t05;  // !
+
+  t14 = ct03 + t02;  // 1
+
+  t12 = t07 - t05;
+
+  t08 = t04 + t06;
+
+  t09 = t04 - t06;
+
+  t15 = fsub(t01, k1, t14);      // 3
+  t17 = mul_add(c3, t12, ct05);  // !
+
+  d0 = t10 + t08;
+
+  d7 = t10 - t08;
+
+  ct09 = _c1 * t09;
+
+  const auto _c4 = broadcast<3>(_c1234);
+  to.Store(d0, 0, 0);
+  SIMD_FENCE;
+
+  t19 = t08 + t17;   // !
+  ct07 = _c4 * t07;  // !
+  to.Store(d7, 7, 0);
+  SIMD_FENCE;
+
+  t11 = t00 - t02;  // 8
+
+  t16 = t01 + t14;  // 3
+
+  d1 = t15 - t19;
+  t20 = ct09 + t19;  // !
+
+  d6 = t15 + t19;
+  const auto _c3 = broadcast<2>(_c1234);
+
+  t18 = mul_add(_c3, t12, ct07);  // !
+
+  d2 = t16 + t20;
+  to.Store(d1, 1, 0);
+  SIMD_FENCE;
+
+  d5 = t16 - t20;
+  to.Store(d6, 6, 0);
+  SIMD_FENCE;
+
+  t21 = t18 - t20;  // !
+
+  d4 = t11 - t21;
+  to.Store(d2, 2, 0);
+
+  d3 = t11 + t21;
+  to.Store(d5, 5, 0);
+
+  to.Store(d4, 4, 0);
+  to.Store(d3, 3, 0);
+}
+
+}  // namespace pik
+
+#endif  // SIMD_TARGET_VALUE
+
+#endif  // THIRD_PARTY_DCT_SIMD_8_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/dct_simd_any.h b/codec/L2/demos/pikEnc/host/pik/dct_simd_any.h
new file mode 100755
index 0000000000..01e6755ac5
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dct_simd_any.h
@@ -0,0 +1,1350 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DCT_SIMD_ANY_H_
+#define PIK_DCT_SIMD_ANY_H_
+
+#include "pik/block.h"
+#include "pik/compiler_specific.h"
+#include "pik/simd/simd.h"
+
+#include <iostream>
+
+namespace pik {
+
+// DCT building blocks that does not require specific SIMD vector length.
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void CopyBlock8(const From& from, const To& to) {
+  const BlockDesc<8> d;
+  for (size_t i = 0; i < 8; i += d.N) {
+    const auto i0 = from.Load(0, i);
+    const auto i1 = from.Load(1, i);
+    const auto i2 = from.Load(2, i);
+    const auto i3 = from.Load(3, i);
+    const auto i4 = from.Load(4, i);
+    const auto i5 = from.Load(5, i);
+    const auto i6 = from.Load(6, i);
+    const auto i7 = from.Load(7, i);
+    to.Store(i0, 0, i);
+    to.Store(i1, 1, i);
+    to.Store(i2, 2, i);
+    to.Store(i3, 3, i);
+    to.Store(i4, 4, i);
+    to.Store(i5, 5, i);
+    to.Store(i6, 6, i);
+    to.Store(i7, 7, i);
+  }
+}
+
+template <class V>
+SIMD_ATTR PIK_INLINE void ColumnDCT8(V& i0, V& i1, V& i2, V& i3, V& i4, V& i5,
+                                     V& i6, V& i7) {
+  const BlockDesc<8> d;
+
+  const auto c1 = set1(d, 0.707106781186548f);  // 1 / sqrt(2)
+  const auto c2 = set1(d, 0.382683432365090f);  // cos(3 * pi / 8)
+  const auto c3 = set1(d, 1.30656296487638f);   // 1 / (2 * cos(3 * pi / 8))
+  const auto c4 = set1(d, 0.541196100146197f);  // sqrt(2) * cos(3 * pi / 8)
+
+  const auto t00 = i0 + i7;
+  const auto t01 = i0 - i7;
+  const auto t02 = i3 + i4;
+  const auto t03 = i3 - i4;
+  const auto t04 = i2 + i5;
+  const auto t05 = i2 - i5;
+  const auto t06 = i1 + i6;
+  const auto t07 = i1 - i6;
+  const auto t08 = t00 + t02;
+  const auto t09 = t00 - t02;
+  const auto t10 = t06 + t04;
+  const auto t11 = t06 - t04;
+  const auto t12 = t07 + t05;
+  const auto t13 = t01 + t07;
+  const auto t14 = t05 + t03;
+  const auto t15 = t11 + t09;
+  const auto t16 = t14 - t13;
+  const auto t17 = c1 * t15;
+  const auto t18 = c1 * t12;
+  const auto t19 = c2 * t16;
+  const auto t20 = t01 + t18;
+  const auto t21 = t01 - t18;
+  const auto t22 = mul_add(c3, t13, t19);
+  const auto t23 = mul_add(c4, t14, t19);
+  i0 = t08 + t10;
+  i1 = t20 + t22;
+  i2 = t09 + t17;
+  i3 = t21 - t23;
+  i4 = t08 - t10;
+  i5 = t21 + t23;
+  i6 = t09 - t17;
+  i7 = t20 - t22;
+}
+
+// "A low multiplicative complexity fast recursive DCT-2 algorithm"
+// Maxim Vashkevich, Alexander Pertrovsky, 27 Jul 2012
+template <class V>
+SIMD_ATTR PIK_INLINE void ColumnDCT16(V& i00, V& i01, V& i02, V& i03, V& i04,
+                                      V& i05, V& i06, V& i07, V& i08, V& i09,
+                                      V& i10, V& i11, V& i12, V& i13, V& i14,
+                                      V& i15) {
+  const BlockDesc<16> d;
+
+  const auto c1_16 = set1(d, 1.9615705608064609f);   // 2 * cos(1 * pi / 16)
+  const auto c2_16 = set1(d, 1.8477590650225735f);   // 2 * cos(2 * pi / 16)
+  const auto c3_16 = set1(d, 1.6629392246050905f);   // 2 * cos(3 * pi / 16)
+  const auto c4_16 = set1(d, 1.4142135623730951f);   // 2 * cos(4 * pi / 16)
+  const auto c5_16 = set1(d, 1.1111404660392046f);   // 2 * cos(5 * pi / 16)
+  const auto c6_16 = set1(d, 0.7653668647301797f);   // 2 * cos(6 * pi / 16)
+  const auto c7_16 = set1(d, 0.39018064403225666f);  // 2 * cos(7 * pi / 16)
+
+  const auto t00 = i00 + i15;
+  const auto t01 = i01 + i14;
+  const auto t02 = i02 + i13;
+  const auto t03 = i03 + i12;
+  const auto t04 = i04 + i11;
+  const auto t05 = i05 + i10;
+  const auto t06 = i06 + i09;
+  const auto t07 = i07 + i08;
+  const auto t08 = i00 - i15;
+  const auto t09 = i01 - i14;
+  const auto t10 = i02 - i13;
+  const auto t11 = i03 - i12;
+  const auto t12 = i04 - i11;
+  const auto t13 = i05 - i10;
+  const auto t14 = i06 - i09;
+  const auto t15 = i07 - i08;
+
+  const auto t16 = t00 + t07;
+  const auto t17 = t01 + t06;
+  const auto t18 = t02 + t05;
+  const auto t19 = t03 + t04;
+  const auto t20 = t00 - t07;
+  const auto t21 = t01 - t06;
+  const auto t22 = t02 - t05;
+  const auto t23 = t03 - t04;
+  const auto t24 = t16 + t19;
+  const auto t25 = t17 + t18;
+  const auto t26 = t16 - t19;
+  const auto t27 = t17 - t18;
+  i00 = t24 + t25;
+  i08 = t24 - t25;
+  const auto t30 = t26 - t27;
+  const auto t31 = t27 * c4_16;
+  i04 = t30 + t31;
+  i12 = t30 - t31;
+  const auto t34 = t20 - t23;
+  const auto t35 = t21 - t22;
+  const auto t36 = t22 * c4_16;
+  const auto t37 = t23 * c4_16;
+  const auto t38 = t34 + t36;
+  const auto t39 = t35 + t37;
+  const auto t40 = t34 - t36;
+  const auto t41 = t35 - t37;
+  const auto t42 = t38 - t39;
+  const auto t43 = t39 * c2_16;
+  i02 = t42 + t43;
+  i14 = t42 - t43;
+  const auto t46 = t40 - t41;
+  const auto t47 = t41 * c6_16;
+  i06 = t46 + t47;
+  i10 = t46 - t47;
+  const auto t50 = t08 - t15;
+  const auto t51 = t09 - t14;
+  const auto t52 = t10 - t13;
+  const auto t53 = t11 - t12;
+  const auto t54 = t12 * c4_16;
+  const auto t55 = t13 * c4_16;
+  const auto t56 = t14 * c4_16;
+  const auto t57 = t15 * c4_16;
+  const auto t58 = t50 + t54;
+  const auto t59 = t51 + t55;
+  const auto t60 = t52 + t56;
+  const auto t61 = t53 + t57;
+  const auto t62 = t50 - t54;
+  const auto t63 = t51 - t55;
+  const auto t64 = t52 - t56;
+  const auto t65 = t53 - t57;
+  const auto t66 = t58 - t61;
+  const auto t67 = t59 - t60;
+  const auto t68 = t60 * c2_16;
+  const auto t69 = t61 * c2_16;
+  const auto t70 = t66 + t68;
+  const auto t71 = t67 + t69;
+  const auto t72 = t66 - t68;
+  const auto t73 = t67 - t69;
+  const auto t74 = t70 - t71;
+  const auto t75 = t71 * c1_16;
+  i01 = t74 + t75;
+  i15 = t74 - t75;
+  const auto t78 = t72 - t73;
+  const auto t79 = t73 * c7_16;
+  i07 = t78 + t79;
+  i09 = t78 - t79;
+  const auto t82 = t62 - t65;
+  const auto t83 = t63 - t64;
+  const auto t84 = t64 * c6_16;
+  const auto t85 = t65 * c6_16;
+  const auto t86 = t82 + t84;
+  const auto t87 = t83 + t85;
+  const auto t88 = t82 - t84;
+  const auto t89 = t83 - t85;
+  const auto t90 = t86 - t87;
+  const auto t91 = t87 * c3_16;
+  i03 = t90 + t91;
+  i13 = t90 - t91;
+  const auto t94 = t88 - t89;
+  const auto t95 = t89 * c5_16;
+  i05 = t94 + t95;
+  i11 = t94 - t95;
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ColumnDCT8(const From& from, const To& to) {
+  const BlockDesc<8> d;
+
+  for (size_t i = 0; i < 8; i += d.N) {
+    auto i0 = from.template LoadPart<8>(0, i);
+    auto i1 = from.template LoadPart<8>(1, i);
+    auto i2 = from.template LoadPart<8>(2, i);
+    auto i3 = from.template LoadPart<8>(3, i);
+    auto i4 = from.template LoadPart<8>(4, i);
+    auto i5 = from.template LoadPart<8>(5, i);
+    auto i6 = from.template LoadPart<8>(6, i);
+    auto i7 = from.template LoadPart<8>(7, i);
+    ColumnDCT8(i0, i1, i2, i3, i4, i5, i6, i7);
+    to.template StorePart<8>(i0, 0, i);
+    to.template StorePart<8>(i1, 1, i);
+    to.template StorePart<8>(i2, 2, i);
+    to.template StorePart<8>(i3, 3, i);
+    to.template StorePart<8>(i4, 4, i);
+    to.template StorePart<8>(i5, 5, i);
+    to.template StorePart<8>(i6, 6, i);
+    to.template StorePart<8>(i7, 7, i);
+  }
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ColumnDCT16(const From& from, const To& to) {
+  const BlockDesc<16> d;
+
+  for (size_t i = 0; i < 16; i += d.N) {
+    auto i00 = from.Load(0, i);
+    auto i01 = from.Load(1, i);
+    auto i02 = from.Load(2, i);
+    auto i03 = from.Load(3, i);
+    auto i04 = from.Load(4, i);
+    auto i05 = from.Load(5, i);
+    auto i06 = from.Load(6, i);
+    auto i07 = from.Load(7, i);
+    auto i08 = from.Load(8, i);
+    auto i09 = from.Load(9, i);
+    auto i10 = from.Load(10, i);
+    auto i11 = from.Load(11, i);
+    auto i12 = from.Load(12, i);
+    auto i13 = from.Load(13, i);
+    auto i14 = from.Load(14, i);
+    auto i15 = from.Load(15, i);
+    ColumnDCT16(i00, i01, i02, i03, i04, i05, i06, i07, i08, i09, i10, i11, i12,
+                i13, i14, i15);
+    to.Store(i00, 0, i);
+    to.Store(i01, 1, i);
+    to.Store(i02, 2, i);
+    to.Store(i03, 3, i);
+    to.Store(i04, 4, i);
+    to.Store(i05, 5, i);
+    to.Store(i06, 6, i);
+    to.Store(i07, 7, i);
+    to.Store(i08, 8, i);
+    to.Store(i09, 9, i);
+    to.Store(i10, 10, i);
+    to.Store(i11, 11, i);
+    to.Store(i12, 12, i);
+    to.Store(i13, 13, i);
+    to.Store(i14, 14, i);
+    to.Store(i15, 15, i);
+  }
+}
+
+// NB: ColumnIDCT8(ColumnDCT8(I)) = 8.0 * I
+template <class V>
+SIMD_ATTR PIK_INLINE void ColumnIDCT8(V& i0, V& i1, V& i2, V& i3, V& i4, V& i5,
+                                      V& i6, V& i7) {
+  const BlockDesc<8> d;
+
+  const auto c1 = set1(d, 1.41421356237310f);  // sqrt(2)
+  const auto c2 = set1(d, 2.61312592975275f);  // 1 / cos(3 * pi / 8)
+  const auto c3 = set1(d, 0.76536686473018f);  // 2 * cos(3 * pi / 8)
+  const auto c4 = set1(d, 1.08239220029239f);  // 2 * sqrt(2) * cos(3 * pi / 8)
+
+  const auto t00 = i0 + i4;
+  const auto t01 = i0 - i4;
+  const auto t02 = i6 + i2;
+  const auto t03 = i6 - i2;
+  const auto t04 = i7 + i1;
+  const auto t05 = i7 - i1;
+  const auto t06 = i5 + i3;
+  const auto t07 = i5 - i3;
+  const auto t08 = t04 + t06;
+  const auto t09 = t04 - t06;
+  const auto t10 = t00 + t02;
+  const auto t11 = t00 - t02;
+  const auto t12 = t07 - t05;
+  const auto t13 = c3 * t12;
+  const auto t14 = mul_add(c1, t03, t02);
+  const auto t15 = t01 - t14;
+  const auto t16 = t01 + t14;
+  const auto t17 = mul_add(c2, t05, t13);
+  const auto t18 = mul_add(c4, t07, t13);
+  const auto t19 = t08 + t17;
+  const auto t20 = mul_add(c1, t09, t19);
+  const auto t21 = t18 - t20;
+  i0 = t10 + t08;
+  i1 = t15 - t19;
+  i2 = t16 + t20;
+  i3 = t11 + t21;
+  i4 = t11 - t21;
+  i5 = t16 - t20;
+  i6 = t15 + t19;
+  i7 = t10 - t08;
+}
+
+// "A low multiplicative complexity fast recursive DCT-2 algorithm"
+// Maxim Vashkevich, Alexander Pertrovsky, 27 Jul 2012
+template <class V>
+SIMD_ATTR PIK_INLINE void ColumnIDCT16(V& i00, V& i01, V& i02, V& i03, V& i04,
+                                       V& i05, V& i06, V& i07, V& i08, V& i09,
+                                       V& i10, V& i11, V& i12, V& i13, V& i14,
+                                       V& i15) {
+  const BlockDesc<16> d;
+
+  const auto c1_16 = set1(d, 0.5097955791041592f);  // 0.5 / cos(1 * pi / 16)
+  const auto c2_16 = set1(d, 0.541196100146197f);   // 0.5 / cos(2 * pi / 16)
+  const auto c3_16 = set1(d, 0.6013448869350453f);  // 0.5 / cos(3 * pi / 16)
+  const auto c4_16 = set1(d, 0.7071067811865475f);  // 0.5 / cos(4 * pi / 16)
+  const auto c5_16 = set1(d, 0.8999762231364156f);  // 0.5 / cos(5 * pi / 16)
+  const auto c6_16 = set1(d, 1.3065629648763764f);  // 0.5 / cos(6 * pi / 16)
+  const auto c7_16 = set1(d, 2.5629154477415055f);  // 0.5 / cos(7 * pi / 16)
+
+  const auto t00 = i00 + i08;
+  const auto t01 = i00 - i08;
+  const auto t02 = i04 + i12;
+  const auto t03 = i04 - i12;
+  const auto t04 = t03 * c4_16;
+  const auto t05 = t02 + t04;
+  const auto t06 = t00 + t05;
+  const auto t07 = t01 + t04;
+  const auto t08 = t00 - t05;
+  const auto t09 = t01 - t04;
+  const auto t10 = i02 + i14;
+  const auto t11 = i02 - i14;
+  const auto t12 = t11 * c2_16;
+  const auto t13 = t10 + t12;
+  const auto t14 = i06 + i10;
+  const auto t15 = i06 - i10;
+  const auto t16 = t15 * c6_16;
+  const auto t17 = t14 + t16;
+  const auto t18 = t13 + t17;
+  const auto t19 = t12 + t16;
+  const auto t20 = t13 - t17;
+  const auto t21 = t12 - t16;
+  const auto t22 = t20 * c4_16;
+  const auto t23 = t21 * c4_16;
+  const auto t24 = t18 + t23;
+  const auto t25 = t19 + t22;
+  const auto t26 = t06 + t24;
+  const auto t27 = t07 + t25;
+  const auto t28 = t09 + t22;
+  const auto t29 = t08 + t23;
+  const auto t30 = t06 - t24;
+  const auto t31 = t07 - t25;
+  const auto t32 = t09 - t22;
+  const auto t33 = t08 - t23;
+  const auto t34 = i01 + i15;
+  const auto t35 = i01 - i15;
+  const auto t36 = t35 * c1_16;
+  const auto t37 = t34 + t36;
+  const auto t38 = i07 + i09;
+  const auto t39 = i07 - i09;
+  const auto t40 = t39 * c7_16;
+  const auto t41 = t38 + t40;
+  const auto t42 = t37 + t41;
+  const auto t43 = t36 + t40;
+  const auto t44 = t37 - t41;
+  const auto t45 = t36 - t40;
+  const auto t46 = t44 * c2_16;
+  const auto t47 = t45 * c2_16;
+  const auto t48 = t42 + t47;
+  const auto t49 = t43 + t46;
+  const auto t50 = i03 + i13;
+  const auto t51 = i03 - i13;
+  const auto t52 = t51 * c3_16;
+  const auto t53 = t50 + t52;
+  const auto t54 = i05 + i11;
+  const auto t55 = i05 - i11;
+  const auto t56 = t55 * c5_16;
+  const auto t57 = t54 + t56;
+  const auto t58 = t53 + t57;
+  const auto t59 = t52 + t56;
+  const auto t60 = t53 - t57;
+  const auto t61 = t52 - t56;
+  const auto t62 = t60 * c6_16;
+  const auto t63 = t61 * c6_16;
+  const auto t64 = t58 + t63;
+  const auto t65 = t59 + t62;
+  const auto t66 = t48 + t64;
+  const auto t67 = t49 + t65;
+  const auto t68 = t46 + t62;
+  const auto t69 = t47 + t63;
+  const auto t70 = t48 - t64;
+  const auto t71 = t49 - t65;
+  const auto t72 = t46 - t62;
+  const auto t73 = t47 - t63;
+  const auto t74 = t70 * c4_16;
+  const auto t75 = t71 * c4_16;
+  const auto t76 = t72 * c4_16;
+  const auto t77 = t73 * c4_16;
+  const auto t78 = t66 + t77;
+  const auto t79 = t67 + t76;
+  const auto t80 = t68 + t75;
+  const auto t81 = t69 + t74;
+  i00 = t26 + t78;
+  i01 = t27 + t79;
+  i02 = t28 + t80;
+  i03 = t29 + t81;
+  i04 = t33 + t74;
+  i05 = t32 + t75;
+  i06 = t31 + t76;
+  i07 = t30 + t77;
+  i15 = t26 - t78;
+  i14 = t27 - t79;
+  i13 = t28 - t80;
+  i12 = t29 - t81;
+  i11 = t33 - t74;
+  i10 = t32 - t75;
+  i09 = t31 - t76;
+  i08 = t30 - t77;
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ColumnIDCT8(const From& from, const To& to) {
+  const BlockDesc<8> d;
+
+  for (size_t i = 0; i < 8; i += d.N) {
+    auto i0 = from.Load(0, i);
+    auto i1 = from.Load(1, i);
+    auto i2 = from.Load(2, i);
+    auto i3 = from.Load(3, i);
+    auto i4 = from.Load(4, i);
+    auto i5 = from.Load(5, i);
+    auto i6 = from.Load(6, i);
+    auto i7 = from.Load(7, i);
+    ColumnIDCT8(i0, i1, i2, i3, i4, i5, i6, i7);
+    to.Store(i0, 0, i);
+    to.Store(i1, 1, i);
+    to.Store(i2, 2, i);
+    to.Store(i3, 3, i);
+    to.Store(i4, 4, i);
+    to.Store(i5, 5, i);
+    to.Store(i6, 6, i);
+    to.Store(i7, 7, i);
+  }
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ColumnIDCT16(const From& from, const To& to) {
+  const BlockDesc<16> d;
+
+  for (size_t i = 0; i < 16; i += d.N) {
+    auto i00 = from.Load(0, i);
+    auto i01 = from.Load(1, i);
+    auto i02 = from.Load(2, i);
+    auto i03 = from.Load(3, i);
+    auto i04 = from.Load(4, i);
+    auto i05 = from.Load(5, i);
+    auto i06 = from.Load(6, i);
+    auto i07 = from.Load(7, i);
+    auto i08 = from.Load(8, i);
+    auto i09 = from.Load(9, i);
+    auto i10 = from.Load(10, i);
+    auto i11 = from.Load(11, i);
+    auto i12 = from.Load(12, i);
+    auto i13 = from.Load(13, i);
+    auto i14 = from.Load(14, i);
+    auto i15 = from.Load(15, i);
+    ColumnIDCT16(i00, i01, i02, i03, i04, i05, i06, i07, i08, i09, i10, i11,
+                 i12, i13, i14, i15);
+    to.Store(i00, 0, i);
+    to.Store(i01, 1, i);
+    to.Store(i02, 2, i);
+    to.Store(i03, 3, i);
+    to.Store(i04, 4, i);
+    to.Store(i05, 5, i);
+    to.Store(i06, 6, i);
+    to.Store(i07, 7, i);
+    to.Store(i08, 8, i);
+    to.Store(i09, 9, i);
+    to.Store(i10, 10, i);
+    to.Store(i11, 11, i);
+    to.Store(i12, 12, i);
+    to.Store(i13, 13, i);
+    to.Store(i14, 14, i);
+    to.Store(i15, 15, i);
+  }
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ColumnDCT4(const From& from, const To& to) {
+  const BlockDesc<4> d;
+  const auto c2_8 = set1(d, 1.414213562373095048f);  // 2 * cos(2 * pi / 8)
+  for (size_t i = 0; i < 4; i += d.N) {
+    auto i0 = from.template LoadPart<4>(0, i);
+    auto i1 = from.template LoadPart<4>(1, i);
+    auto i2 = from.template LoadPart<4>(2, i);
+    auto i3 = from.template LoadPart<4>(3, i);
+    auto t0 = i0 + i3;
+    auto t1 = i1 + i2;
+    auto t2 = i0 - i3;
+    auto t3 = i1 - i2;
+    auto t4 = t0 + t1;
+    auto t5 = t0 - t1;
+    auto t6 = t2 - t3;
+    auto t7 = t3 * c2_8;
+    auto t8 = t6 + t7;
+    auto t9 = t6 - t7;
+    to.template StorePart<4>(t4, 0, i);
+    to.template StorePart<4>(t8, 1, i);
+    to.template StorePart<4>(t5, 2, i);
+    to.template StorePart<4>(t9, 3, i);
+  }
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ColumnIDCT4(const From& from, const To& to) {
+  const BlockDesc<4> d;
+  const auto c2_8 = set1(d, 0.7071067811865475244f);  // 0.5 / cos(2 * pi / 8)
+  for (size_t i = 0; i < 4; i += d.N) {
+    auto i0 = from.template LoadPart<4>(0, i);
+    auto i1 = from.template LoadPart<4>(1, i);
+    auto i2 = from.template LoadPart<4>(2, i);
+    auto i3 = from.template LoadPart<4>(3, i);
+    auto t0 = i0 + i2;
+    auto t1 = i0 - i2;
+    auto t2 = i1 + i3;
+    auto t3 = i1 - i3;
+    auto t4 = t3 * c2_8;
+    auto t5 = t2 + t4;
+    auto t6 = t0 + t5;
+    auto t7 = t1 + t4;
+    auto t8 = t0 - t5;
+    auto t9 = t1 - t4;
+    to.template StorePart<4>(t6, 0, i);
+    to.template StorePart<4>(t7, 1, i);
+    to.template StorePart<4>(t9, 2, i);
+    to.template StorePart<4>(t8, 3, i);
+  }
+}
+
+template <class V>
+SIMD_ATTR PIK_INLINE void ColumnDCT32(V& i00, V& i01, V& i02, V& i03, V& i04,
+                                      V& i05, V& i06, V& i07, V& i08, V& i09,
+                                      V& i10, V& i11, V& i12, V& i13, V& i14,
+                                      V& i15, V& i16, V& i17, V& i18, V& i19,
+                                      V& i20, V& i21, V& i22, V& i23, V& i24,
+                                      V& i25, V& i26, V& i27, V& i28, V& i29,
+                                      V& i30, V& i31) {
+  const BlockDesc<32> d;
+  const auto c2_64 = set1(d, 1.990369453344393857f);   // 2 * cos(2 * pi / 64)
+  const auto c4_64 = set1(d, 1.961570560806460861f);   // 2 * cos(4 * pi / 64)
+  const auto c6_64 = set1(d, 1.913880671464417649f);   // 2 * cos(6 * pi / 64)
+  const auto c8_64 = set1(d, 1.847759065022573477f);   // 2 * cos(8 * pi / 64)
+  const auto c10_64 = set1(d, 1.763842528696710099f);  // 2 * cos(10 * pi / 64)
+  const auto c12_64 = set1(d, 1.662939224605090471f);  // 2 * cos(12 * pi / 64)
+  const auto c14_64 = set1(d, 1.546020906725473987f);  // 2 * cos(14 * pi / 64)
+  const auto c16_64 = set1(d, 1.414213562373095145f);  // 2 * cos(16 * pi / 64)
+  const auto c18_64 = set1(d, 1.268786568327290976f);  // 2 * cos(18 * pi / 64)
+  const auto c20_64 = set1(d, 1.111140466039204577f);  // 2 * cos(20 * pi / 64)
+  const auto c22_64 = set1(d, 0.942793473651995617f);  // 2 * cos(22 * pi / 64)
+  const auto c24_64 = set1(d, 0.765366864730179675f);  // 2 * cos(24 * pi / 64)
+  const auto c26_64 = set1(d, 0.580569354508924662f);  // 2 * cos(26 * pi / 64)
+  const auto c28_64 = set1(d, 0.390180644032256663f);  // 2 * cos(28 * pi / 64)
+  const auto c30_64 = set1(d, 0.196034280659121540f);  // 2 * cos(30 * pi / 64)
+
+  const auto t00 = i00 + i31;
+  const auto t01 = i01 + i30;
+  const auto t02 = i02 + i29;
+  const auto t03 = i03 + i28;
+  const auto t04 = i04 + i27;
+  const auto t05 = i05 + i26;
+  const auto t06 = i06 + i25;
+  const auto t07 = i07 + i24;
+  const auto t08 = i08 + i23;
+  const auto t09 = i09 + i22;
+  const auto t10 = i10 + i21;
+  const auto t11 = i11 + i20;
+  const auto t12 = i12 + i19;
+  const auto t13 = i13 + i18;
+  const auto t14 = i14 + i17;
+  const auto t15 = i15 + i16;
+  const auto t16 = i00 - i31;
+  const auto t17 = i01 - i30;
+  const auto t18 = i02 - i29;
+  const auto t19 = i03 - i28;
+  const auto t20 = i04 - i27;
+  const auto t21 = i05 - i26;
+  const auto t22 = i06 - i25;
+  const auto t23 = i07 - i24;
+  const auto t24 = i08 - i23;
+  const auto t25 = i09 - i22;
+  const auto t26 = i10 - i21;
+  const auto t27 = i11 - i20;
+  const auto t28 = i12 - i19;
+  const auto t29 = i13 - i18;
+  const auto t30 = i14 - i17;
+  const auto t31 = i15 - i16;
+  const auto t32 = t00 + t15;
+  const auto t33 = t01 + t14;
+  const auto t34 = t02 + t13;
+  const auto t35 = t03 + t12;
+  const auto t36 = t04 + t11;
+  const auto t37 = t05 + t10;
+  const auto t38 = t06 + t09;
+  const auto t39 = t07 + t08;
+  const auto t40 = t00 - t15;
+  const auto t41 = t01 - t14;
+  const auto t42 = t02 - t13;
+  const auto t43 = t03 - t12;
+  const auto t44 = t04 - t11;
+  const auto t45 = t05 - t10;
+  const auto t46 = t06 - t09;
+  const auto t47 = t07 - t08;
+  const auto t48 = t32 + t39;
+  const auto t49 = t33 + t38;
+  const auto t50 = t34 + t37;
+  const auto t51 = t35 + t36;
+  const auto t52 = t32 - t39;
+  const auto t53 = t33 - t38;
+  const auto t54 = t34 - t37;
+  const auto t55 = t35 - t36;
+  const auto t56 = t48 + t51;
+  const auto t57 = t49 + t50;
+  const auto t58 = t48 - t51;
+  const auto t59 = t49 - t50;
+  const auto t60 = t56 + t57;
+  const auto t61 = t56 - t57;
+  const auto t62 = t58 - t59;
+  const auto t63 = t59 * c16_64;
+  const auto t64 = t62 + t63;
+  const auto t65 = t62 - t63;
+  const auto t66 = t52 - t55;
+  const auto t67 = t53 - t54;
+  const auto t68 = t54 * c16_64;
+  const auto t69 = t55 * c16_64;
+  const auto t70 = t66 + t68;
+  const auto t71 = t67 + t69;
+  const auto t72 = t66 - t68;
+  const auto t73 = t67 - t69;
+  const auto t74 = t70 - t71;
+  const auto t75 = t71 * c8_64;
+  const auto t76 = t74 + t75;
+  const auto t77 = t74 - t75;
+  const auto t78 = t72 - t73;
+  const auto t79 = t73 * c24_64;
+  const auto t80 = t78 + t79;
+  const auto t81 = t78 - t79;
+  const auto t82 = t40 - t47;
+  const auto t83 = t41 - t46;
+  const auto t84 = t42 - t45;
+  const auto t85 = t43 - t44;
+  const auto t86 = t44 * c16_64;
+  const auto t87 = t45 * c16_64;
+  const auto t88 = t46 * c16_64;
+  const auto t89 = t47 * c16_64;
+  const auto t90 = t82 + t86;
+  const auto t91 = t83 + t87;
+  const auto t92 = t84 + t88;
+  const auto t93 = t85 + t89;
+  const auto t94 = t82 - t86;
+  const auto t95 = t83 - t87;
+  const auto t96 = t84 - t88;
+  const auto t97 = t85 - t89;
+  const auto t98 = t90 - t93;
+  const auto t99 = t91 - t92;
+  const auto t100 = t92 * c8_64;
+  const auto t101 = t93 * c8_64;
+  const auto t102 = t98 + t100;
+  const auto t103 = t99 + t101;
+  const auto t104 = t98 - t100;
+  const auto t105 = t99 - t101;
+  const auto t106 = t102 - t103;
+  const auto t107 = t103 * c4_64;
+  const auto t108 = t106 + t107;
+  const auto t109 = t106 - t107;
+  const auto t110 = t104 - t105;
+  const auto t111 = t105 * c28_64;
+  const auto t112 = t110 + t111;
+  const auto t113 = t110 - t111;
+  const auto t114 = t94 - t97;
+  const auto t115 = t95 - t96;
+  const auto t116 = t96 * c24_64;
+  const auto t117 = t97 * c24_64;
+  const auto t118 = t114 + t116;
+  const auto t119 = t115 + t117;
+  const auto t120 = t114 - t116;
+  const auto t121 = t115 - t117;
+  const auto t122 = t118 - t119;
+  const auto t123 = t119 * c12_64;
+  const auto t124 = t122 + t123;
+  const auto t125 = t122 - t123;
+  const auto t126 = t120 - t121;
+  const auto t127 = t121 * c20_64;
+  const auto t128 = t126 + t127;
+  const auto t129 = t126 - t127;
+  const auto t130 = t16 - t31;
+  const auto t131 = t17 - t30;
+  const auto t132 = t18 - t29;
+  const auto t133 = t19 - t28;
+  const auto t134 = t20 - t27;
+  const auto t135 = t21 - t26;
+  const auto t136 = t22 - t25;
+  const auto t137 = t23 - t24;
+  const auto t138 = t24 * c16_64;
+  const auto t139 = t25 * c16_64;
+  const auto t140 = t26 * c16_64;
+  const auto t141 = t27 * c16_64;
+  const auto t142 = t28 * c16_64;
+  const auto t143 = t29 * c16_64;
+  const auto t144 = t30 * c16_64;
+  const auto t145 = t31 * c16_64;
+  const auto t146 = t130 + t138;
+  const auto t147 = t131 + t139;
+  const auto t148 = t132 + t140;
+  const auto t149 = t133 + t141;
+  const auto t150 = t134 + t142;
+  const auto t151 = t135 + t143;
+  const auto t152 = t136 + t144;
+  const auto t153 = t137 + t145;
+  const auto t154 = t130 - t138;
+  const auto t155 = t131 - t139;
+  const auto t156 = t132 - t140;
+  const auto t157 = t133 - t141;
+  const auto t158 = t134 - t142;
+  const auto t159 = t135 - t143;
+  const auto t160 = t136 - t144;
+  const auto t161 = t137 - t145;
+  const auto t162 = t146 - t153;
+  const auto t163 = t147 - t152;
+  const auto t164 = t148 - t151;
+  const auto t165 = t149 - t150;
+  const auto t166 = t150 * c8_64;
+  const auto t167 = t151 * c8_64;
+  const auto t168 = t152 * c8_64;
+  const auto t169 = t153 * c8_64;
+  const auto t170 = t162 + t166;
+  const auto t171 = t163 + t167;
+  const auto t172 = t164 + t168;
+  const auto t173 = t165 + t169;
+  const auto t174 = t162 - t166;
+  const auto t175 = t163 - t167;
+  const auto t176 = t164 - t168;
+  const auto t177 = t165 - t169;
+  const auto t178 = t170 - t173;
+  const auto t179 = t171 - t172;
+  const auto t180 = t172 * c4_64;
+  const auto t181 = t173 * c4_64;
+  const auto t182 = t178 + t180;
+  const auto t183 = t179 + t181;
+  const auto t184 = t178 - t180;
+  const auto t185 = t179 - t181;
+  const auto t186 = t182 - t183;
+  const auto t187 = t183 * c2_64;
+  const auto t188 = t186 + t187;
+  const auto t189 = t186 - t187;
+  const auto t190 = t184 - t185;
+  const auto t191 = t185 * c30_64;
+  const auto t192 = t190 + t191;
+  const auto t193 = t190 - t191;
+  const auto t194 = t174 - t177;
+  const auto t195 = t175 - t176;
+  const auto t196 = t176 * c28_64;
+  const auto t197 = t177 * c28_64;
+  const auto t198 = t194 + t196;
+  const auto t199 = t195 + t197;
+  const auto t200 = t194 - t196;
+  const auto t201 = t195 - t197;
+  const auto t202 = t198 - t199;
+  const auto t203 = t199 * c14_64;
+  const auto t204 = t202 + t203;
+  const auto t205 = t202 - t203;
+  const auto t206 = t200 - t201;
+  const auto t207 = t201 * c18_64;
+  const auto t208 = t206 + t207;
+  const auto t209 = t206 - t207;
+  const auto t210 = t154 - t161;
+  const auto t211 = t155 - t160;
+  const auto t212 = t156 - t159;
+  const auto t213 = t157 - t158;
+  const auto t214 = t158 * c24_64;
+  const auto t215 = t159 * c24_64;
+  const auto t216 = t160 * c24_64;
+  const auto t217 = t161 * c24_64;
+  const auto t218 = t210 + t214;
+  const auto t219 = t211 + t215;
+  const auto t220 = t212 + t216;
+  const auto t221 = t213 + t217;
+  const auto t222 = t210 - t214;
+  const auto t223 = t211 - t215;
+  const auto t224 = t212 - t216;
+  const auto t225 = t213 - t217;
+  const auto t226 = t218 - t221;
+  const auto t227 = t219 - t220;
+  const auto t228 = t220 * c12_64;
+  const auto t229 = t221 * c12_64;
+  const auto t230 = t226 + t228;
+  const auto t231 = t227 + t229;
+  const auto t232 = t226 - t228;
+  const auto t233 = t227 - t229;
+  const auto t234 = t230 - t231;
+  const auto t235 = t231 * c6_64;
+  const auto t236 = t234 + t235;
+  const auto t237 = t234 - t235;
+  const auto t238 = t232 - t233;
+  const auto t239 = t233 * c26_64;
+  const auto t240 = t238 + t239;
+  const auto t241 = t238 - t239;
+  const auto t242 = t222 - t225;
+  const auto t243 = t223 - t224;
+  const auto t244 = t224 * c20_64;
+  const auto t245 = t225 * c20_64;
+  const auto t246 = t242 + t244;
+  const auto t247 = t243 + t245;
+  const auto t248 = t242 - t244;
+  const auto t249 = t243 - t245;
+  const auto t250 = t246 - t247;
+  const auto t251 = t247 * c10_64;
+  const auto t252 = t250 + t251;
+  const auto t253 = t250 - t251;
+  const auto t254 = t248 - t249;
+  const auto t255 = t249 * c22_64;
+  const auto t256 = t254 + t255;
+  const auto t257 = t254 - t255;
+
+  i00 = t60;
+  i01 = t188;
+  i02 = t108;
+  i03 = t236;
+  i04 = t76;
+  i05 = t252;
+  i06 = t124;
+  i07 = t204;
+  i08 = t64;
+  i09 = t208;
+  i10 = t128;
+  i11 = t256;
+  i12 = t80;
+  i13 = t240;
+  i14 = t112;
+  i15 = t192;
+  i16 = t61;
+  i17 = t193;
+  i18 = t113;
+  i19 = t241;
+  i20 = t81;
+  i21 = t257;
+  i22 = t129;
+  i23 = t209;
+  i24 = t65;
+  i25 = t205;
+  i26 = t125;
+  i27 = t253;
+  i28 = t77;
+  i29 = t237;
+  i30 = t109;
+  i31 = t189;
+}
+
+template <class V>
+SIMD_ATTR PIK_INLINE void ColumnIDCT32(V& i00, V& i01, V& i02, V& i03, V& i04,
+                                       V& i05, V& i06, V& i07, V& i08, V& i09,
+                                       V& i10, V& i11, V& i12, V& i13, V& i14,
+                                       V& i15, V& i16, V& i17, V& i18, V& i19,
+                                       V& i20, V& i21, V& i22, V& i23, V& i24,
+                                       V& i25, V& i26, V& i27, V& i28, V& i29,
+                                       V& i30, V& i31) {
+  const BlockDesc<32> d;
+  const auto c2_64 = set1(d, 0.502419286188155678f);  // 0.5 / cos(2 * pi / 64)
+  const auto c4_64 = set1(d, 0.509795579104159180f);  // 0.5 / cos(4 * pi / 64)
+  const auto c6_64 = set1(d, 0.522498614939688855f);  // 0.5 / cos(6 * pi / 64)
+  const auto c8_64 = set1(d, 0.541196100146197012f);  // 0.5 / cos(8 * pi / 64)
+  const auto c10_64 =
+      set1(d, 0.566944034816357689f);  // 0.5 / cos(10 * pi / 64)
+  const auto c12_64 =
+      set1(d, 0.601344886935045286f);  // 0.5 / cos(12 * pi / 64)
+  const auto c14_64 =
+      set1(d, 0.646821783359990077f);  // 0.5 / cos(14 * pi / 64)
+  const auto c16_64 =
+      set1(d, 0.707106781186547462f);  // 0.5 / cos(16 * pi / 64)
+  const auto c18_64 =
+      set1(d, 0.788154623451250202f);  // 0.5 / cos(18 * pi / 64)
+  const auto c20_64 =
+      set1(d, 0.899976223136415565f);  // 0.5 / cos(20 * pi / 64)
+  const auto c22_64 =
+      set1(d, 1.060677685990347063f);  // 0.5 / cos(22 * pi / 64)
+  const auto c24_64 =
+      set1(d, 1.306562964876376354f);  // 0.5 / cos(24 * pi / 64)
+  const auto c26_64 =
+      set1(d, 1.722447098238334195f);  // 0.5 / cos(26 * pi / 64)
+  const auto c28_64 =
+      set1(d, 2.562915447741505481f);  // 0.5 / cos(28 * pi / 64)
+  const auto c30_64 =
+      set1(d, 5.101148618689155256f);  // 0.5 / cos(30 * pi / 64)
+
+  const auto t00 = i00 + i16;
+  const auto t01 = i00 - i16;
+  const auto t02 = i08 + i24;
+  const auto t03 = i08 - i24;
+  const auto t04 = t03 * c16_64;
+  const auto t05 = t02 + t04;
+  const auto t06 = t00 + t05;
+  const auto t07 = t01 + t04;
+  const auto t08 = t00 - t05;
+  const auto t09 = t01 - t04;
+  const auto t10 = i04 + i28;
+  const auto t11 = i04 - i28;
+  const auto t12 = t11 * c8_64;
+  const auto t13 = t10 + t12;
+  const auto t14 = i12 + i20;
+  const auto t15 = i12 - i20;
+  const auto t16 = t15 * c24_64;
+  const auto t17 = t14 + t16;
+  const auto t18 = t13 + t17;
+  const auto t19 = t12 + t16;
+  const auto t20 = t13 - t17;
+  const auto t21 = t12 - t16;
+  const auto t22 = t20 * c16_64;
+  const auto t23 = t21 * c16_64;
+  const auto t24 = t18 + t23;
+  const auto t25 = t19 + t22;
+  const auto t26 = t06 + t24;
+  const auto t27 = t07 + t25;
+  const auto t28 = t09 + t22;
+  const auto t29 = t08 + t23;
+  const auto t30 = t06 - t24;
+  const auto t31 = t07 - t25;
+  const auto t32 = t09 - t22;
+  const auto t33 = t08 - t23;
+  const auto t34 = i02 + i30;
+  const auto t35 = i02 - i30;
+  const auto t36 = t35 * c4_64;
+  const auto t37 = t34 + t36;
+  const auto t38 = i14 + i18;
+  const auto t39 = i14 - i18;
+  const auto t40 = t39 * c28_64;
+  const auto t41 = t38 + t40;
+  const auto t42 = t37 + t41;
+  const auto t43 = t36 + t40;
+  const auto t44 = t37 - t41;
+  const auto t45 = t36 - t40;
+  const auto t46 = t44 * c8_64;
+  const auto t47 = t45 * c8_64;
+  const auto t48 = t42 + t47;
+  const auto t49 = t43 + t46;
+  const auto t50 = i06 + i26;
+  const auto t51 = i06 - i26;
+  const auto t52 = t51 * c12_64;
+  const auto t53 = t50 + t52;
+  const auto t54 = i10 + i22;
+  const auto t55 = i10 - i22;
+  const auto t56 = t55 * c20_64;
+  const auto t57 = t54 + t56;
+  const auto t58 = t53 + t57;
+  const auto t59 = t52 + t56;
+  const auto t60 = t53 - t57;
+  const auto t61 = t52 - t56;
+  const auto t62 = t60 * c24_64;
+  const auto t63 = t61 * c24_64;
+  const auto t64 = t58 + t63;
+  const auto t65 = t59 + t62;
+  const auto t66 = t48 + t64;
+  const auto t67 = t49 + t65;
+  const auto t68 = t46 + t62;
+  const auto t69 = t47 + t63;
+  const auto t70 = t48 - t64;
+  const auto t71 = t49 - t65;
+  const auto t72 = t46 - t62;
+  const auto t73 = t47 - t63;
+  const auto t74 = t70 * c16_64;
+  const auto t75 = t71 * c16_64;
+  const auto t76 = t72 * c16_64;
+  const auto t77 = t73 * c16_64;
+  const auto t78 = t66 + t77;
+  const auto t79 = t67 + t76;
+  const auto t80 = t68 + t75;
+  const auto t81 = t69 + t74;
+  const auto t82 = t26 + t78;
+  const auto t83 = t27 + t79;
+  const auto t84 = t28 + t80;
+  const auto t85 = t29 + t81;
+  const auto t86 = t33 + t74;
+  const auto t87 = t32 + t75;
+  const auto t88 = t31 + t76;
+  const auto t89 = t30 + t77;
+  const auto t90 = t26 - t78;
+  const auto t91 = t27 - t79;
+  const auto t92 = t28 - t80;
+  const auto t93 = t29 - t81;
+  const auto t94 = t33 - t74;
+  const auto t95 = t32 - t75;
+  const auto t96 = t31 - t76;
+  const auto t97 = t30 - t77;
+  const auto t98 = i01 + i31;
+  const auto t99 = i01 - i31;
+  const auto t100 = t99 * c2_64;
+  const auto t101 = t98 + t100;
+  const auto t102 = i15 + i17;
+  const auto t103 = i15 - i17;
+  const auto t104 = t103 * c30_64;
+  const auto t105 = t102 + t104;
+  const auto t106 = t101 + t105;
+  const auto t107 = t100 + t104;
+  const auto t108 = t101 - t105;
+  const auto t109 = t100 - t104;
+  const auto t110 = t108 * c4_64;
+  const auto t111 = t109 * c4_64;
+  const auto t112 = t106 + t111;
+  const auto t113 = t107 + t110;
+  const auto t114 = i07 + i25;
+  const auto t115 = i07 - i25;
+  const auto t116 = t115 * c14_64;
+  const auto t117 = t114 + t116;
+  const auto t118 = i09 + i23;
+  const auto t119 = i09 - i23;
+  const auto t120 = t119 * c18_64;
+  const auto t121 = t118 + t120;
+  const auto t122 = t117 + t121;
+  const auto t123 = t116 + t120;
+  const auto t124 = t117 - t121;
+  const auto t125 = t116 - t120;
+  const auto t126 = t124 * c28_64;
+  const auto t127 = t125 * c28_64;
+  const auto t128 = t122 + t127;
+  const auto t129 = t123 + t126;
+  const auto t130 = t112 + t128;
+  const auto t131 = t113 + t129;
+  const auto t132 = t110 + t126;
+  const auto t133 = t111 + t127;
+  const auto t134 = t112 - t128;
+  const auto t135 = t113 - t129;
+  const auto t136 = t110 - t126;
+  const auto t137 = t111 - t127;
+  const auto t138 = t134 * c8_64;
+  const auto t139 = t135 * c8_64;
+  const auto t140 = t136 * c8_64;
+  const auto t141 = t137 * c8_64;
+  const auto t142 = t130 + t141;
+  const auto t143 = t131 + t140;
+  const auto t144 = t132 + t139;
+  const auto t145 = t133 + t138;
+  const auto t146 = i03 + i29;
+  const auto t147 = i03 - i29;
+  const auto t148 = t147 * c6_64;
+  const auto t149 = t146 + t148;
+  const auto t150 = i13 + i19;
+  const auto t151 = i13 - i19;
+  const auto t152 = t151 * c26_64;
+  const auto t153 = t150 + t152;
+  const auto t154 = t149 + t153;
+  const auto t155 = t148 + t152;
+  const auto t156 = t149 - t153;
+  const auto t157 = t148 - t152;
+  const auto t158 = t156 * c12_64;
+  const auto t159 = t157 * c12_64;
+  const auto t160 = t154 + t159;
+  const auto t161 = t155 + t158;
+  const auto t162 = i05 + i27;
+  const auto t163 = i05 - i27;
+  const auto t164 = t163 * c10_64;
+  const auto t165 = t162 + t164;
+  const auto t166 = i11 + i21;
+  const auto t167 = i11 - i21;
+  const auto t168 = t167 * c22_64;
+  const auto t169 = t166 + t168;
+  const auto t170 = t165 + t169;
+  const auto t171 = t164 + t168;
+  const auto t172 = t165 - t169;
+  const auto t173 = t164 - t168;
+  const auto t174 = t172 * c20_64;
+  const auto t175 = t173 * c20_64;
+  const auto t176 = t170 + t175;
+  const auto t177 = t171 + t174;
+  const auto t178 = t160 + t176;
+  const auto t179 = t161 + t177;
+  const auto t180 = t158 + t174;
+  const auto t181 = t159 + t175;
+  const auto t182 = t160 - t176;
+  const auto t183 = t161 - t177;
+  const auto t184 = t158 - t174;
+  const auto t185 = t159 - t175;
+  const auto t186 = t182 * c24_64;
+  const auto t187 = t183 * c24_64;
+  const auto t188 = t184 * c24_64;
+  const auto t189 = t185 * c24_64;
+  const auto t190 = t178 + t189;
+  const auto t191 = t179 + t188;
+  const auto t192 = t180 + t187;
+  const auto t193 = t181 + t186;
+  const auto t194 = t142 + t190;
+  const auto t195 = t143 + t191;
+  const auto t196 = t144 + t192;
+  const auto t197 = t145 + t193;
+  const auto t198 = t138 + t186;
+  const auto t199 = t139 + t187;
+  const auto t200 = t140 + t188;
+  const auto t201 = t141 + t189;
+  const auto t202 = t142 - t190;
+  const auto t203 = t143 - t191;
+  const auto t204 = t144 - t192;
+  const auto t205 = t145 - t193;
+  const auto t206 = t138 - t186;
+  const auto t207 = t139 - t187;
+  const auto t208 = t140 - t188;
+  const auto t209 = t141 - t189;
+  const auto t210 = t202 * c16_64;
+  const auto t211 = t203 * c16_64;
+  const auto t212 = t204 * c16_64;
+  const auto t213 = t205 * c16_64;
+  const auto t214 = t206 * c16_64;
+  const auto t215 = t207 * c16_64;
+  const auto t216 = t208 * c16_64;
+  const auto t217 = t209 * c16_64;
+  const auto t218 = t194 + t217;
+  const auto t219 = t195 + t216;
+  const auto t220 = t196 + t215;
+  const auto t221 = t197 + t214;
+  const auto t222 = t198 + t213;
+  const auto t223 = t199 + t212;
+  const auto t224 = t200 + t211;
+  const auto t225 = t201 + t210;
+  const auto t226 = t82 + t218;
+  const auto t227 = t83 + t219;
+  const auto t228 = t84 + t220;
+  const auto t229 = t85 + t221;
+  const auto t230 = t86 + t222;
+  const auto t231 = t87 + t223;
+  const auto t232 = t88 + t224;
+  const auto t233 = t89 + t225;
+  const auto t234 = t97 + t210;
+  const auto t235 = t96 + t211;
+  const auto t236 = t95 + t212;
+  const auto t237 = t94 + t213;
+  const auto t238 = t93 + t214;
+  const auto t239 = t92 + t215;
+  const auto t240 = t91 + t216;
+  const auto t241 = t90 + t217;
+  const auto t242 = t82 - t218;
+  const auto t243 = t83 - t219;
+  const auto t244 = t84 - t220;
+  const auto t245 = t85 - t221;
+  const auto t246 = t86 - t222;
+  const auto t247 = t87 - t223;
+  const auto t248 = t88 - t224;
+  const auto t249 = t89 - t225;
+  const auto t250 = t97 - t210;
+  const auto t251 = t96 - t211;
+  const auto t252 = t95 - t212;
+  const auto t253 = t94 - t213;
+  const auto t254 = t93 - t214;
+  const auto t255 = t92 - t215;
+  const auto t256 = t91 - t216;
+  const auto t257 = t90 - t217;
+
+  i00 = t226;
+  i01 = t227;
+  i02 = t228;
+  i03 = t229;
+  i04 = t230;
+  i05 = t231;
+  i06 = t232;
+  i07 = t233;
+  i08 = t234;
+  i09 = t235;
+  i10 = t236;
+  i11 = t237;
+  i12 = t238;
+  i13 = t239;
+  i14 = t240;
+  i15 = t241;
+  i16 = t257;
+  i17 = t256;
+  i18 = t255;
+  i19 = t254;
+  i20 = t253;
+  i21 = t252;
+  i22 = t251;
+  i23 = t250;
+  i24 = t249;
+  i25 = t248;
+  i26 = t247;
+  i27 = t246;
+  i28 = t245;
+  i29 = t244;
+  i30 = t243;
+  i31 = t242;
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ColumnDCT32(const From& from, const To& to) {
+  const BlockDesc<32> d;
+
+  for (size_t i = 0; i < 32; i += d.N) {
+    auto i00 = from.Load(0, i);
+    auto i01 = from.Load(1, i);
+    auto i02 = from.Load(2, i);
+    auto i03 = from.Load(3, i);
+    auto i04 = from.Load(4, i);
+    auto i05 = from.Load(5, i);
+    auto i06 = from.Load(6, i);
+    auto i07 = from.Load(7, i);
+    auto i08 = from.Load(8, i);
+    auto i09 = from.Load(9, i);
+    auto i10 = from.Load(10, i);
+    auto i11 = from.Load(11, i);
+    auto i12 = from.Load(12, i);
+    auto i13 = from.Load(13, i);
+    auto i14 = from.Load(14, i);
+    auto i15 = from.Load(15, i);
+    auto i16 = from.Load(16, i);
+    auto i17 = from.Load(17, i);
+    auto i18 = from.Load(18, i);
+    auto i19 = from.Load(19, i);
+    auto i20 = from.Load(20, i);
+    auto i21 = from.Load(21, i);
+    auto i22 = from.Load(22, i);
+    auto i23 = from.Load(23, i);
+    auto i24 = from.Load(24, i);
+    auto i25 = from.Load(25, i);
+    auto i26 = from.Load(26, i);
+    auto i27 = from.Load(27, i);
+    auto i28 = from.Load(28, i);
+    auto i29 = from.Load(29, i);
+    auto i30 = from.Load(30, i);
+    auto i31 = from.Load(31, i);
+    ColumnDCT32(i00, i01, i02, i03, i04, i05, i06, i07, i08, i09, i10, i11, i12,
+                i13, i14, i15, i16, i17, i18, i19, i20, i21, i22, i23, i24, i25,
+                i26, i27, i28, i29, i30, i31);
+    to.Store(i00, 0, i);
+    to.Store(i01, 1, i);
+    to.Store(i02, 2, i);
+    to.Store(i03, 3, i);
+    to.Store(i04, 4, i);
+    to.Store(i05, 5, i);
+    to.Store(i06, 6, i);
+    to.Store(i07, 7, i);
+    to.Store(i08, 8, i);
+    to.Store(i09, 9, i);
+    to.Store(i10, 10, i);
+    to.Store(i11, 11, i);
+    to.Store(i12, 12, i);
+    to.Store(i13, 13, i);
+    to.Store(i14, 14, i);
+    to.Store(i15, 15, i);
+    to.Store(i16, 16, i);
+    to.Store(i17, 17, i);
+    to.Store(i18, 18, i);
+    to.Store(i19, 19, i);
+    to.Store(i20, 20, i);
+    to.Store(i21, 21, i);
+    to.Store(i22, 22, i);
+    to.Store(i23, 23, i);
+    to.Store(i24, 24, i);
+    to.Store(i25, 25, i);
+    to.Store(i26, 26, i);
+    to.Store(i27, 27, i);
+    to.Store(i28, 28, i);
+    to.Store(i29, 29, i);
+    to.Store(i30, 30, i);
+    to.Store(i31, 31, i);
+  }
+}
+
+template <class From, class To>
+SIMD_ATTR PIK_INLINE void ColumnIDCT32(const From& from, const To& to) {
+  const BlockDesc<32> d;
+
+  for (size_t i = 0; i < 32; i += d.N) {
+    auto i00 = from.Load(0, i);
+    auto i01 = from.Load(1, i);
+    auto i02 = from.Load(2, i);
+    auto i03 = from.Load(3, i);
+    auto i04 = from.Load(4, i);
+    auto i05 = from.Load(5, i);
+    auto i06 = from.Load(6, i);
+    auto i07 = from.Load(7, i);
+    auto i08 = from.Load(8, i);
+    auto i09 = from.Load(9, i);
+    auto i10 = from.Load(10, i);
+    auto i11 = from.Load(11, i);
+    auto i12 = from.Load(12, i);
+    auto i13 = from.Load(13, i);
+    auto i14 = from.Load(14, i);
+    auto i15 = from.Load(15, i);
+    auto i16 = from.Load(16, i);
+    auto i17 = from.Load(17, i);
+    auto i18 = from.Load(18, i);
+    auto i19 = from.Load(19, i);
+    auto i20 = from.Load(20, i);
+    auto i21 = from.Load(21, i);
+    auto i22 = from.Load(22, i);
+    auto i23 = from.Load(23, i);
+    auto i24 = from.Load(24, i);
+    auto i25 = from.Load(25, i);
+    auto i26 = from.Load(26, i);
+    auto i27 = from.Load(27, i);
+    auto i28 = from.Load(28, i);
+    auto i29 = from.Load(29, i);
+    auto i30 = from.Load(30, i);
+    auto i31 = from.Load(31, i);
+    ColumnIDCT32(i00, i01, i02, i03, i04, i05, i06, i07, i08, i09, i10, i11,
+                 i12, i13, i14, i15, i16, i17, i18, i19, i20, i21, i22, i23,
+                 i24, i25, i26, i27, i28, i29, i30, i31);
+    to.Store(i00, 0, i);
+    to.Store(i01, 1, i);
+    to.Store(i02, 2, i);
+    to.Store(i03, 3, i);
+    to.Store(i04, 4, i);
+    to.Store(i05, 5, i);
+    to.Store(i06, 6, i);
+    to.Store(i07, 7, i);
+    to.Store(i08, 8, i);
+    to.Store(i09, 9, i);
+    to.Store(i10, 10, i);
+    to.Store(i11, 11, i);
+    to.Store(i12, 12, i);
+    to.Store(i13, 13, i);
+    to.Store(i14, 14, i);
+    to.Store(i15, 15, i);
+    to.Store(i16, 16, i);
+    to.Store(i17, 17, i);
+    to.Store(i18, 18, i);
+    to.Store(i19, 19, i);
+    to.Store(i20, 20, i);
+    to.Store(i21, 21, i);
+    to.Store(i22, 22, i);
+    to.Store(i23, 23, i);
+    to.Store(i24, 24, i);
+    to.Store(i25, 25, i);
+    to.Store(i26, 26, i);
+    to.Store(i27, 27, i);
+    to.Store(i28, 28, i);
+    to.Store(i29, 29, i);
+    to.Store(i30, 30, i);
+    to.Store(i31, 31, i);
+  }
+}
+
+}  // namespace pik
+
+#endif  // THIRD_PARTY_DCT_SIMD_ANY_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/dct_util.cc b/codec/L2/demos/pikEnc/host/pik/dct_util.cc
new file mode 100755
index 0000000000..31b1e29e51
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dct_util.cc
@@ -0,0 +1,68 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/dct_util.h"
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/bits.h"
+#include "pik/common.h"
+#include "pik/dct.h"
+#include "pik/gauss_blur.h"
+#include "pik/profiler.h"
+#include "pik/simd/simd.h"
+#include "pik/status.h"
+
+namespace pik {
+
+SIMD_ATTR void TransposedScaledDCT(const Image3F& image,
+                                   Image3F* PIK_RESTRICT dct) {
+  PROFILER_ZONE("TransposedScaledDCT facade");
+  PIK_ASSERT(image.xsize() % kBlockDim == 0);
+  PIK_ASSERT(image.ysize() % kBlockDim == 0);
+  const size_t xsize_blocks = image.xsize() / kBlockDim;
+  const size_t ysize_blocks = image.ysize() / kBlockDim;
+  PIK_ASSERT(dct->xsize() == xsize_blocks * kDCTBlockSize);
+  PIK_ASSERT(dct->ysize() == ysize_blocks);
+
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const float* PIK_RESTRICT row_in = image.ConstPlaneRow(c, by * kBlockDim);
+      float* PIK_RESTRICT row_dct = dct->PlaneRow(c, by);
+
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        ComputeTransposedScaledDCT<kBlockDim>()(
+            FromLines<kBlockDim>(row_in + bx * kBlockDim, image.PixelsPerRow()),
+            ScaleToBlock<kBlockDim>(row_dct + bx * kDCTBlockSize));
+      }
+    }
+  }
+}
+
+SIMD_ATTR void TransposedScaledIDCT(const Image3F& dct,
+                                    Image3F* PIK_RESTRICT idct) {
+  PROFILER_ZONE("IDCT facade");
+  PIK_ASSERT(dct.xsize() % kDCTBlockSize == 0);
+  const size_t xsize_blocks = dct.xsize() / kDCTBlockSize;
+  const size_t ysize_blocks = dct.ysize();
+  PIK_ASSERT(idct->xsize() == xsize_blocks * kBlockDim);
+  PIK_ASSERT(idct->ysize() == ysize_blocks * kBlockDim);
+
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const float* PIK_RESTRICT row_dct = dct.ConstPlaneRow(c, by);
+      float* PIK_RESTRICT row_idct = idct->PlaneRow(c, by * kBlockDim);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        ComputeTransposedScaledIDCT<kBlockDim>()(
+            FromBlock<kBlockDim>(row_dct + bx * kDCTBlockSize),
+            ToLines<kBlockDim>(row_idct + bx * kBlockDim,
+                               idct->PixelsPerRow()));
+      }
+    }
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/dct_util.h b/codec/L2/demos/pikEnc/host/pik/dct_util.h
new file mode 100755
index 0000000000..c060a597bc
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dct_util.h
@@ -0,0 +1,110 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DCT_UTIL_H_
+#define PIK_DCT_UTIL_H_
+
+#include "pik/common.h"
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include <iostream>
+
+namespace pik {
+
+// Scatter/gather a SX*SY block into (SX/kBlockDim)*(SY/kBlockDim)
+// kBlockDim*kBlockDim blocks. `block` should be composed of SY rows of SX
+// contiguous blocks. In the output, each kBlockDim*kBlockDim block should be
+// contiguous, and the same "block row" should be too, but different block rows
+// are at a distance of `stride` pixels.
+template <size_t SX, size_t SY, typename T>
+void ScatterBlock(const T* PIK_RESTRICT block, size_t block_stride,
+                  T* PIK_RESTRICT row, size_t stride) {
+  constexpr size_t xblocks = SX / kBlockDim;
+  constexpr size_t yblocks = SY / kBlockDim;
+  for (size_t y = 0; y < SY; y++) {
+    T* PIK_RESTRICT current_row =
+        row + (y & (yblocks - 1)) * stride + (y / yblocks) * kBlockDim;
+    for (size_t x = 0; x < SX; x++) {
+      size_t block_pos = y * SX + x;
+      size_t block_row = block_pos / (xblocks * kDCTBlockSize);
+      size_t block_idx = block_pos & (xblocks * kDCTBlockSize - 1);
+      current_row[(x & (xblocks - 1)) * kDCTBlockSize + (x / xblocks)] =
+          block[block_row * block_stride + block_idx];
+
+      std::cout<<"std_scatter out_y="<<(y & (yblocks - 1)) * stride + (y / yblocks) * kBlockDim<<" out_x="<<(x & (xblocks - 1)) * kDCTBlockSize + (x / xblocks)
+    		  <<" in_y="<<block_row<<" in_x="<<block_idx<<" value="<<block[block_row * block_stride + block_idx]<<std::endl;
+    }
+  }
+}
+
+template <size_t SX, size_t SY, typename T>
+void GatherBlock(const T* PIK_RESTRICT row, size_t stride,
+                 T* PIK_RESTRICT block, size_t block_stride) {
+  constexpr size_t xblocks = SX / kBlockDim;
+  constexpr size_t yblocks = SY / kBlockDim;
+  for (size_t y = 0; y < SY; y++) {
+    const T* PIK_RESTRICT current_row =
+        row + (y & (yblocks - 1)) * stride + (y / yblocks) * kBlockDim;
+    for (size_t x = 0; x < SX; x++) {
+      size_t block_pos = y * SX + x;
+      size_t block_row = block_pos / (xblocks * kDCTBlockSize);
+      size_t block_idx = block_pos & (xblocks * kDCTBlockSize - 1);
+      block[block_row * block_stride + block_idx] =
+          current_row[(x & (xblocks - 1)) * kDCTBlockSize + (x / xblocks)];
+    }
+  }
+}
+
+// Fills a preallocated (N*N)*W x H `dct` with (N*N)x1 blocks produced by
+// ComputeTransposedScaledDCT() from the corresponding NxN block of
+// `image`. Note that `dct` coefficients are scaled by 1 / (N*N), so that
+// ComputeTransposedScaledIDCT applied to each block or TransposedScaledIDCT
+// will return the original input.
+// REQUIRES: image.xsize() == N*W, image.ysize() == N*H
+SIMD_ATTR void TransposedScaledDCT(const Image3F& image,
+                                   Image3F* PIK_RESTRICT dct);
+
+// Fills a preallocated N*W x N*H `idct` with NxN blocks produced by
+// ComputeTransposedScaledIDCT() from the (N*N)x1 blocks of `dct`.
+// REQUIRES: dct.xsize() == N*N*W, dct.ysize() == H
+SIMD_ATTR void TransposedScaledIDCT(const Image3F& dct,
+                                    Image3F* PIK_RESTRICT idct);
+
+// Returns an N x M image by taking the DC coefficient from each 64x1 block.
+// REQUIRES: coeffs.xsize() == 64*N, coeffs.ysize() == M
+template <typename T>
+Image3<T> DCImage(const Image3<T>& coeffs) {
+  PIK_ASSERT(coeffs.xsize() % kDCTBlockSize == 0);
+  Image3<T> out(coeffs.xsize() / kDCTBlockSize, coeffs.ysize());
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < out.ysize(); ++y) {
+      const T* PIK_RESTRICT row_in = coeffs.ConstPlaneRow(c, y);
+      T* PIK_RESTRICT row_out = out.PlaneRow(c, y);
+      for (size_t x = 0; x < out.xsize(); ++x) {
+        row_out[x] = row_in[x * kDCTBlockSize];
+      }
+    }
+  }
+  return out;
+}
+
+// Scatters dc into "coeffs" at offset 0 within 1x64 blocks.
+template <typename T>
+void FillDC(const Image3<T>& dc, Image3<T>* PIK_RESTRICT coeffs) {
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < dc.ysize(); y++) {
+      const T* PIK_RESTRICT row_dc = dc.ConstPlaneRow(c, y);
+      T* PIK_RESTRICT row_out = coeffs->PlaneRow(c, y);
+      for (size_t x = 0; x < dc.xsize(); ++x) {
+        row_out[kDCTBlockSize * x] = row_dc[x];
+      }
+    }
+  }
+}
+
+}  // namespace pik
+
+#endif  // PIK_DCT_UTIL_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/decode_and_encode.cc b/codec/L2/demos/pikEnc/host/pik/decode_and_encode.cc
new file mode 100755
index 0000000000..ce6b3476aa
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/decode_and_encode.cc
@@ -0,0 +1,43 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/codec.h"
+
+namespace pik {
+namespace {
+
+// Reads an input file (typically PNM) with color_space hint and writes to an
+// output file (typically PNG) which supports all required metadata.
+int Convert(int argc, char** argv) {
+  if (argc != 4) {
+    fprintf(stderr, "Args: in colorspace_description out\n");
+    return 1;
+  }
+  const std::string& in = argv[1];
+  const std::string& desc = argv[2];
+  const std::string& out = argv[3];
+
+  CodecContext codec_context;
+  CodecInOut io(&codec_context);
+  ThreadPool pool(4);
+  io.dec_hints.Add("color_space", desc);
+  if (!io.SetFromFile(in, &pool)) {
+    fprintf(stderr, "Failed to read %s\n", in.c_str());
+    return 1;
+  }
+  if (!io.EncodeToFile(io.dec_c_original, io.original_bits_per_sample(), out,
+                       &pool)) {
+    fprintf(stderr, "Failed to write %s\n", out.c_str());
+    return 1;
+  }
+
+  return 0;
+}
+
+}  // namespace
+}  // namespace pik
+
+int main(int argc, char** argv) { return pik::Convert(argc, argv); }
diff --git a/codec/L2/demos/pikEnc/host/pik/deconvolve.cc b/codec/L2/demos/pikEnc/host/pik/deconvolve.cc
new file mode 100755
index 0000000000..e7e7e9c067
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/deconvolve.cc
@@ -0,0 +1,99 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/deconvolve.h"
+#include <vector>
+#include "pik/optimize.h"
+#include "pik/status.h"
+
+namespace pik {
+
+namespace {
+
+void Convolve(const float* inp1, int n1, const float* inp2, int n2,
+              float* out) {
+  for (int i = 0; i < n1 + n2 - 1; i++) {
+    out[i] = 0.0;
+  }
+  for (int i = 0; i < n1; i++) {
+    for (int j = 0; j < n2; j++) {
+      out[i + j] += inp1[i] * inp2[j];
+    }
+  }
+}
+
+void ConvolveReversed(const float* inp1, int n1, const float* reverse_inp2,
+                      int n2, float* out) {
+  for (int i = 0; i < n1 + n2 - 1; i++) {
+    out[i] = 0.0;
+  }
+  for (int i = 0; i < n1; i++) {
+    for (int j = 0; j < n2; j++) {
+      out[i + j] += inp1[i] * reverse_inp2[n2 - j - 1];
+    }
+  }
+}
+
+struct LossFunction {
+  float Compute(const std::vector<float>& w, std::vector<float>* df) const {
+    // Size of w is guaranteed to be odd.
+    std::vector<float> result(filter_length + w.size() - 1, 0.0);
+    Convolve(filter, filter_length, &w[0], w.size(), &result[0]);
+    result[result.size() / 2] -= 1.0;
+    float sumsq = 0.0;
+    for (int i = 0; i < result.size(); i++) {
+      sumsq += result[i] * result[i];
+    }
+    // TODO(robryk): This is not operator norm, nor an upper bound: it's a
+    // lower bound on operator norm and an upper bound for operator norm * C(n),
+    // where n is sum of filter sizes. We should actually optimize for operator
+    // norm here.
+    std::vector<float> derivs(result.size() + filter_length - 1, 0.0);
+    ConvolveReversed(&result[0], result.size(), filter, filter_length,
+                     &derivs[0]);
+    for (int i = 0; i < w.size(); i++) {
+      (*df)[i] = -2 * derivs[i + filter_length - 1];
+    }
+    {
+      // Outside of midpoint, regularize the sharpening kernel towards zero.
+      static const double kRegularizationWeight = 0.00001;
+      for (int i = 0; i < w.size(); ++i) {
+        if (i == w.size() / 2) {
+          continue;
+        }
+        sumsq += kRegularizationWeight * w[i] * w[i];
+        (*df)[i] += -2 * kRegularizationWeight * w[i];
+      }
+    }
+    return sumsq;
+  }
+  const float* filter;
+  int filter_length;  // Guaranteed to be odd.
+};
+
+}  // namespace
+
+float InvertConvolution(const float* filter, int filter_length,
+                        float* inverse_filter, int inverse_filter_length) {
+  PIK_CHECK(filter_length % 2 == 1);
+  PIK_CHECK(inverse_filter_length % 2 == 1);
+  LossFunction loss;
+  loss.filter = filter;
+  loss.filter_length = filter_length;
+  constexpr int kMaxIter = 1000;
+  constexpr float kGradNormThreshold = 1e-8;
+  std::vector<float> inverse =
+      optimize::OptimizeWithScaledConjugateGradientMethod(
+          loss, std::vector<float>(inverse_filter_length, 0.0),
+          kGradNormThreshold, kMaxIter);
+  for (int i = 0; i < inverse_filter_length; i++) {
+    inverse_filter[i] = inverse[i];
+  }
+  std::vector<float> dummy(inverse_filter_length);
+  return loss.Compute(inverse, &dummy);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/deconvolve.h b/codec/L2/demos/pikEnc/host/pik/deconvolve.h
new file mode 100755
index 0000000000..0474d888c0
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/deconvolve.h
@@ -0,0 +1,24 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DECONVOLVE_H_
+#define PIK_DECONVOLVE_H_
+
+namespace pik {
+
+// Compute a filter such that convolving with it is an approximation of the
+// inverse of convolving with the provided filter.
+// The resulting filter is written into inverse_filter and is of the provided
+// inverse_filter_length length.
+// filter_length and inverse_filter_length have to be odd.
+// Returns the L2 distance between the identity filter and the composition of
+// the two filters.
+float InvertConvolution(const float* filter, int filter_length,
+                        float* inverse_filter, int inverse_filter_length);
+
+}  // namespace pik
+
+#endif  // PIK_DECONVOLVE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/descriptive_statistics.cc b/codec/L2/demos/pikEnc/host/pik/descriptive_statistics.cc
new file mode 100755
index 0000000000..1f59d2a4ff
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/descriptive_statistics.cc
@@ -0,0 +1,100 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/descriptive_statistics.h"
+
+#include <stdio.h>
+
+#include "pik/status.h"
+
+namespace pik {
+
+void Stats::Assimilate(const Stats& other) {
+  const int64_t total_n = n_ + other.n_;
+  if (total_n == 0) return;  // Nothing to do; prevents div by zero.
+
+  min_ = std::min(min_, other.min_);
+  max_ = std::max(max_, other.max_);
+
+  product_ *= other.product_;
+
+  const double product_n = n_ * other.n_;
+  const double n2 = n_ * n_;
+  const double other_n2 = other.n_ * other.n_;
+  const int64_t total_n2 = total_n * total_n;
+  const int64_t total_n3 = total_n2 * total_n;
+  // Precompute reciprocal for speed - used at least twice.
+  const double inv_total_n = 1.0 / total_n;
+  const double inv_total_n2 = 1.0 / total_n2;
+
+  const double delta = other.m1_ - m1_;
+  const double delta2 = delta * delta;
+  const double delta3 = delta * delta2;
+  const double delta4 = delta2 * delta2;
+
+  m1_ = (n_ * m1_ + other.n_ * other.m1_) * inv_total_n;
+
+  const double new_m2 = m2_ + other.m2_ + delta2 * product_n * inv_total_n;
+
+  const double new_m3 =
+      m3_ + other.m3_ + delta3 * product_n * (n_ - other.n_) * inv_total_n2 +
+      3.0 * delta * (n_ * other.m2_ - other.n_ * m2_) * inv_total_n;
+
+  m4_ += other.m4_ +
+         delta4 * product_n * (n2 - product_n + other_n2) / total_n3 +
+         6.0 * delta2 * (n2 * other.m2_ + other_n2 * m2_) * inv_total_n2 +
+         4.0 * delta * (n_ * other.m3_ - other.n_ * m3_) * inv_total_n;
+
+  m2_ = new_m2;
+  m3_ = new_m3;
+  n_ = total_n;
+}
+
+std::string Stats::ToString(int exclude) const {
+  char buf[300];
+  int pos = 0;
+  int ret;  // snprintf - bytes written or negative for error.
+
+  if ((exclude & kNoCount) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "Count=%6zu ",
+                   static_cast<size_t>(Count()));
+    PIK_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  if ((exclude & kNoMeanSD) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "Mean=%9.6f SD=%8.5f ", Mean(),
+                   StandardDeviation());
+    PIK_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  if ((exclude & kNoMinMax) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "Min=%8.5f Max=%8.5f ", Min(),
+                   Max());
+    PIK_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  if ((exclude & kNoSkewKurt) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "Skew=%5.2f Kurt=%7.2f ",
+                   Skewness(), Kurtosis());
+    PIK_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  if ((exclude & kNoGeomean) == 0) {
+    ret = snprintf(buf + pos, sizeof(buf) - pos, "GeoMean=%9.6f ",
+                   GeometricMean());
+    PIK_ASSERT(ret > 0);
+    pos += ret;
+  }
+
+  PIK_ASSERT(pos < sizeof(buf));
+  return buf;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/descriptive_statistics.h b/codec/L2/demos/pikEnc/host/pik/descriptive_statistics.h
new file mode 100755
index 0000000000..356d451683
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/descriptive_statistics.h
@@ -0,0 +1,126 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DESCRIPTIVE_STATISTICS_H_
+#define PIK_DESCRIPTIVE_STATISTICS_H_
+
+// For analyzing the range/distribution of scalars.
+
+#include <stdint.h>
+#include <algorithm>
+#include <cmath>
+#include <string>
+
+namespace pik {
+
+// Descriptive statistics of a variable (4 moments).
+class Stats {
+ public:
+  void Notify(const float x) {
+    ++n_;
+
+    min_ = std::min(min_, x);
+    max_ = std::max(max_, x);
+
+    product_ *= x;
+
+    // Online moments. Reference: https://goo.gl/9ha694
+    const double d = x - m1_;
+    const double d_div_n = d / n_;
+    const double d2n1_div_n = d * (n_ - 1) * d_div_n;
+    const long n_poly = n_ * n_ - 3 * n_ + 3;
+    m1_ += d_div_n;
+    m4_ += d_div_n * (d_div_n * (d2n1_div_n * n_poly + 6.0 * m2_) - 4.0 * m3_);
+    m3_ += d_div_n * (d2n1_div_n * (n_ - 2) - 3.0 * m2_);
+    m2_ += d2n1_div_n;
+  }
+
+  void Assimilate(const Stats& other);
+
+  int64_t Count() const { return n_; }
+
+  float Min() const { return min_; }
+  float Max() const { return max_; }
+
+  double GeometricMean() const {
+    return n_ == 0 ? 0.0 : pow(product_, 1.0 / n_);
+  }
+
+  double Mean() const { return m1_; }
+  // Same as Mu2. Assumes n_ is large.
+  double SampleVariance() const {
+    return n_ == 0 ? 0.0 : m2_ / static_cast<int>(n_);
+  }
+  // Unbiased estimator for population variance even for smaller n_.
+  double Variance() const {
+    if (n_ == 0) return 0.0;
+    if (n_ == 1) return m2_;
+    return m2_ / static_cast<int>(n_ - 1);
+  }
+  double StandardDeviation() const { return std::sqrt(Variance()); }
+  // Near zero for normal distributions; if positive on a unimodal distribution,
+  // the right tail is fatter. Assumes n_ is large.
+  double SampleSkewness() const {
+    if (std::abs(m2_) < 1E-7) return 0.0;
+    return m3_ * std::sqrt(static_cast<double>(n_)) / std::pow(m2_, 1.5);
+  }
+  // Corrected for bias (same as Wikipedia and Minitab but not Excel).
+  double Skewness() const {
+    if (n_ == 0) return 0.0;
+    const double biased = SampleSkewness();
+    const double r = (n_ - 1.0) / n_;
+    return biased * std::pow(r, 1.5);
+  }
+  // Near zero for normal distributions; smaller values indicate fewer/smaller
+  // outliers and larger indicates more/larger outliers. Assumes n_ is large.
+  double SampleKurtosis() const {
+    if (std::abs(m2_) < 1E-7) return 0.0;
+    return m4_ * n_ / (m2_ * m2_);
+  }
+  // Corrected for bias (same as Wikipedia and Minitab but not Excel).
+  double Kurtosis() const {
+    if (n_ == 0) return 0.0;
+    const double biased = SampleKurtosis();
+    const double r = (n_ - 1.0) / n_;
+    return biased * r * r;
+  }
+
+  // Central moments, useful for "method of moments"-based parameter estimation
+  // of a mixture of two Gaussians. Assumes Count() != 0.
+  double Mu1() const { return m1_; }
+  double Mu2() const { return m2_ / static_cast<int>(n_); }
+  double Mu3() const { return m3_ / static_cast<int>(n_); }
+  double Mu4() const { return m4_ / static_cast<int>(n_); }
+
+  // Which statistics to EXCLUDE in ToString
+  enum {
+    kNoCount = 1,
+    kNoMeanSD = 2,
+    kNoMinMax = 4,
+    kNoSkewKurt = 8,
+    kNoGeomean = 16
+  };
+
+  std::string ToString(int exclude = 0) const;
+
+ private:
+  int64_t n_ = 0;  // signed for faster conversion + safe subtraction
+
+  float min_ = 1E30f;
+  float max_ = -1E30f;
+
+  double product_ = 1.0;
+
+  // Moments
+  double m1_ = 0.0;
+  double m2_ = 0.0;
+  double m3_ = 0.0;
+  double m4_ = 0.0;
+};
+
+}  // namespace pik
+
+#endif  // PIK_DESCRIPTIVE_STATISTICS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/detect_dots.cc b/codec/L2/demos/pikEnc/host/pik/detect_dots.cc
new file mode 100755
index 0000000000..1f813f1243
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/detect_dots.cc
@@ -0,0 +1,137 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include <cstdio>
+#include <string>
+#include "pik/codec.h"
+#include "pik/data_parallel.h"
+#include "pik/file_io.h"
+#include "pik/gauss_blur.h"
+#include "pik/image.h"
+#include "pik/image_ops.h"
+#include "pik/opsin_image.h"
+#include "pik/opsin_inverse.h"
+#include "pik/opsin_params.h"
+
+namespace pik {
+
+namespace {
+
+Image3F LoadImage(const std::string& name) {
+  CodecContext codec_context;
+  CodecInOut io(&codec_context);
+  PIK_CHECK(io.SetFromFile(name, /*pool=*/nullptr));
+  return pik::OpsinDynamicsImage(&io, Rect(io.color()));
+}
+
+void DumpImage(const Image3F& img, const char* filename) {
+  Image3F linear(img.xsize(), img.ysize());
+  CopyImageTo(img, &linear);
+  OpsinToLinear(&linear);
+
+  CodecContext ctx;
+  CodecInOut io(&ctx);
+  io.SetFromImage(std::move(linear), ctx.c_linear_srgb[0]);
+  PIK_ASSERT(io.EncodeToFile(ctx.c_srgb[0], 8, filename));
+}
+
+bool IsLonely(Rect rect, const ImageF& img, float delta) {
+  // At most this many neighbors with large difference in the rect.
+  constexpr size_t kHowLonely = 3;
+  //  A neighbor has large delta, if it is at least kHowHigh times delta.
+  const double kHowHigh = 0.37559546016936962;
+  size_t count = 0;
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    const float* row = rect.ConstRow(img, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      if (row[x] >= kHowHigh * delta) {
+        if (count++ > kHowLonely) {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+}  // namespace
+
+void SplitDots(const Image3F& image, Image3F* without_dots, Image3F* dots) {
+  // Parameters for the Gaussian.
+  constexpr size_t kRadius = 5;
+  const double kSigma = 3.0;
+  // When to consider a delta to be large enough for further investigation.
+  const double kDotThreshold = 0.85;
+  // Side length of the rect for the neighborhood.
+  constexpr size_t kSide = 7;
+  CompressParams params;
+  CodecContext codec_context;
+  CodecInOut output_image(&codec_context);
+  ColorEncoding encoding;
+  encoding.transfer_function = TransferFunction::kLinear;
+  PIK_ASSERT(ColorManagement::SetProfileFromFields(&encoding));
+  ImageF sum_of_squares(image.xsize(), image.ysize());
+  std::vector<double> gauss_kernelD = GaussianKernel(kRadius, kSigma);
+  std::vector<float> gauss_kernel(gauss_kernelD.size());
+  for (int i = 0; i < gauss_kernelD.size(); ++i) {
+    gauss_kernel[i] = gauss_kernelD[i];
+  }
+  *without_dots = Convolve(image, gauss_kernel);
+  CopyImageTo(image, dots);
+  SubtractFrom(*without_dots, dots);
+
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    std::array<const float*, 3> rows;
+    std::array<const float*, 3> dot_rows;
+    float* sos_row = sum_of_squares.Row(y);
+    for (size_t c = 0; c < 3; c++) {
+      rows[c] = image.Plane(c).ConstRow(y);
+      dot_rows[c] = dots->Plane(c).ConstRow(y);
+    }
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      sos_row[x] = 0.0f;
+      for (size_t c = 0; c < 3; c++) {
+        sos_row[x] += dot_rows[c][x] * dot_rows[c][x];
+      }
+    }
+  }
+
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const float* PIK_RESTRICT sos_row = sum_of_squares.ConstRow(y);
+    const float* PIK_RESTRICT rows[3];
+    float* PIK_RESTRICT dots_rows[3];
+    float* PIK_RESTRICT without_dots_rows[3];
+    for (size_t c = 0; c < 3; c++) {
+      rows[c] = image.Plane(c).ConstRow(y);
+      dots_rows[c] = dots->PlaneRow(c, y);
+      without_dots_rows[c] = without_dots->PlaneRow(c, y);
+    }
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      bool keep_original = true;
+
+      if ((sos_row[x] > kDotThreshold)) {
+        Rect rect = Rect(std::max<int64_t>(x - kSide / 2, 0),
+                         std::max<int64_t>(y - kSide / 2, 0), kSide, kSide,
+                         image.xsize(), image.ysize());
+        if (IsLonely(rect, sum_of_squares, sos_row[x])) {
+          std::array<float, 3> medians = Image3Median(image, rect);
+          keep_original = false;
+          for (size_t c = 0; c < 3; c++) {
+            without_dots_rows[c][x] = medians[c];
+            dots_rows[c][x] = rows[c][x] - without_dots_rows[c][x];
+          }
+        }
+      }
+
+      if (keep_original) {
+        for (size_t c = 0; c < 3; c++) {
+          without_dots_rows[c][x] = rows[c][x];
+          dots_rows[c][x] = 0.0f;
+        }
+      }
+    }
+  }
+}
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/detect_dots.h b/codec/L2/demos/pikEnc/host/pik/detect_dots.h
new file mode 100755
index 0000000000..fa3bd81b27
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/detect_dots.h
@@ -0,0 +1,33 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// We attempt to remove dots, or speckle from images using Gaussian blur.
+#ifndef PIK_RESEARCH_REMOVE_DOTS_H_
+#define PIK_RESEARCH_REMOVE_DOTS_H_
+
+#include <cstdio>
+#include <string>
+
+#include "pik/codec.h"
+#include "pik/data_parallel.h"
+#include "pik/file_io.h"
+#include "pik/gauss_blur.h"
+#include "pik/image.h"
+#include "pik/opsin_image.h"
+#include "pik/opsin_inverse.h"
+#include "pik/opsin_params.h"
+
+namespace pik {
+
+// Detects dots in an given `image` and splits the `image` in two images:
+// - `dots`: containing only the dots and
+// - `without_dots`, containing the original image, where the dots have been
+// replaced with the median of surrounding pixels.
+void SplitDots(const Image3F& image, Image3F* without_dots, Image3F* dots);
+
+}  // namespace pik
+
+#endif  // PIK_RESEARCH_REMOVE_DOTS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/dpik.cc b/codec/L2/demos/pikEnc/host/pik/dpik.cc
new file mode 100755
index 0000000000..e18d9be538
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dpik.cc
@@ -0,0 +1,225 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/dpik.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "pik/data_parallel.h"
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/arch_specific.h"
+#include "pik/args.h"
+#include "pik/common.h"
+#include "pik/file_io.h"
+#include "pik/image.h"
+#include "pik/os_specific.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik.h"
+#include "pik/pik_info.h"
+#include "pik/profiler.h"
+#include "pik/robust_statistics.h"
+#include "pik/simd/targets.h"
+
+namespace pik {
+
+DecompressArgs::DecompressArgs() {
+  // TODO(janwas): differentiate between cores/HT
+  num_threads = AvailableCPUs().size() / 2;
+}
+
+void DecompressArgs::AddCommandLineOptions(
+    tools::CommandLineParser* cmdline) {
+  // Positional arguments.
+  cmdline->AddPositionalOption("INPUT", "the compressed input file", &file_in);
+
+  cmdline->AddPositionalOption(
+      "OUTPUT", "the output can be PNG with ICC, or PPM/PFM.", &file_out);
+
+  // Flags.
+  cmdline->AddOptionValue('\0', "bits_per_sample", "N",
+                          "defaults to original (input) bit depth",
+                          &bits_per_sample, &ParseUnsigned);
+
+  cmdline->AddOptionValue('\0', "num_threads", "N",
+                          "The number of threads to use", &num_threads,
+                          &ParseUnsigned);
+
+  cmdline->AddOptionValue('\0', "color_space", "RGB_D65_SRG_Rel_Lin",
+                          "defaults to original (input) color space",
+                          &color_space, &ParseString);
+
+  cmdline->AddOptionValue('\0', "num_reps", "N", nullptr, &num_reps,
+                          &ParseUnsigned);
+
+  cmdline->AddOptionValue('\0', "noise", "0", "disables noise generation",
+                          &params.noise, &ParseOverride);
+
+  cmdline->AddOptionValue('\0', "gradient", "0",
+                          "disables the extra gradient map", &params.gradient,
+                          &ParseOverride);
+
+  cmdline->AddOptionValue('\0', "adaptive_reconstruction", "0|1",
+                          "disables/enables extra filtering",
+                          &params.adaptive_reconstruction, &ParseOverride);
+
+  cmdline->AddOptionValue('\0', "gaborish", "0..7",
+                          "chooses deblocking strength (4=normal).",
+                          &params.gaborish, &ParseGaborishStrength);
+
+  cmdline->AddOptionValue('s', "downsampling", "1,2,4,8",
+                          "maximum permissible downsampling factor",
+                          &params.max_downsampling, &ParseUnsigned);
+
+  cmdline->AddOptionValue('\0', "print_profile", "0|1",
+                          "print timing information before exiting",
+                          &print_profile, &ParseOverride);
+}
+
+Status DecompressArgs::ValidateArgs() {
+  if (params.noise == Override::kOn) {
+    fprintf(stderr, "Noise can only be enabled by the encoder.\n");
+    return PIK_FAILURE("Cannot force noise on");
+  }
+  if (params.gradient == Override::kOn) {
+    fprintf(stderr, "Gradient can only be enabled by the encoder.\n");
+    return PIK_FAILURE("Cannot force gradient on");
+  }
+
+  if (file_in == nullptr) {
+    fprintf(stderr, "Missing INPUT filename.\n");
+    return false;
+  }
+  return true;
+}
+
+void DecompressStats::NotifyElapsed(double elapsed_seconds) {
+  PIK_ASSERT(elapsed_seconds > 0.0);
+  elapsed_.push_back(elapsed_seconds);
+}
+
+Status DecompressStats::Print(const CodecInOut& io, size_t downsampling,
+                              ThreadPool* pool) {
+  ElapsedStats s;
+  PIK_RETURN_IF_ERROR(SummarizeElapsed(&s));
+  char variability[20] = {'\0'};
+  if (s.variability != 0.0) {
+    snprintf(variability, sizeof(variability), " (var %.2f)", s.variability);
+  }
+
+  const size_t xsize = io.xsize();
+  const size_t ysize = io.ysize();
+  const size_t channels = io.c_current().Channels() + io.HasAlpha();
+  const size_t bytes = downsampling * downsampling * xsize * ysize * channels *
+                       DivCeil(io.original_bits_per_sample(), kBitsPerByte);
+  const auto mb_per_sec = [bytes](const double elapsed) {
+    return bytes * 1E-6 / elapsed;
+  };
+  const double mbps = mb_per_sec(s.central_tendency);
+  // Note flipped order: higher elapsed = lower mbps.
+  const double mbps_min = mb_per_sec(s.max);
+  const double mbps_max = mb_per_sec(s.min);
+
+  fprintf(stderr,
+          "%zu x %zu, %s%.2f MB/s [%.2f, %.2f]%s, %zu reps, %zu threads).\n",
+          xsize, ysize, s.type, mbps, mbps_min, mbps_max, variability,
+          elapsed_.size(), NumWorkerThreads(pool));
+  return true;
+}
+
+Status DecompressStats::SummarizeElapsed(ElapsedStats* s) {
+  // type depends on #reps.
+  if (elapsed_.empty()) return PIK_FAILURE("Didn't call NotifyElapsed");
+
+  s->min = *std::min_element(elapsed_.begin(), elapsed_.end());
+  s->max = *std::max_element(elapsed_.begin(), elapsed_.end());
+
+  // Single rep
+  if (elapsed_.size() == 1) {
+    s->central_tendency = elapsed_[0];
+    s->variability = 0.0;
+    s->type = "";
+    return true;
+  }
+
+  // Two: skip first (noisier)
+  if (elapsed_.size() == 2) {
+    s->central_tendency = elapsed_[1];
+    s->variability = 0.0;
+    s->type = "second: ";
+    return true;
+  }
+
+  // Prefer geomean unless numerically unreliable (too many reps)
+  if (std::pow(elapsed_[0], elapsed_.size()) < 1E100) {
+    double product = 1.0;
+    for (size_t i = 1; i < elapsed_.size(); ++i) {
+      product *= elapsed_[i];
+    }
+
+    s->central_tendency = std::pow(product, 1.0 / (elapsed_.size() - 1));
+    s->variability = 0.0;
+    s->type = "geomean: ";
+    return true;
+  }
+
+  // Else: mode
+  std::sort(elapsed_.begin(), elapsed_.end());
+  s->central_tendency = HalfSampleMode()(elapsed_.data(), elapsed_.size());
+  s->variability = MedianAbsoluteDeviation(elapsed_, s->central_tendency);
+  s->type = "mode: ";
+  return true;
+}
+
+// Called num_reps times.
+Status Decompress(const PaddedBytes& compressed, const DecompressParams& params,
+                  ThreadPool* pool, CodecInOut* PIK_RESTRICT io,
+                  size_t* downsampling, DecompressStats* PIK_RESTRICT stats) {
+  PikInfo info;
+  const double t0 = Now();
+  if (!PikToPixels(params, compressed, io, &info, pool)) {
+    fprintf(stderr, "Failed to decompress.\n");
+    return false;
+  }
+  *downsampling = info.downsampling;
+  const double t1 = Now();
+  stats->NotifyElapsed(t1 - t0);
+  return true;
+}
+
+Status WriteOutput(const DecompressArgs& args, const CodecInOut& io) {
+  // Can only write if we decoded and have an output filename.
+  // (Writing large PNGs is slow, so allow skipping it for benchmarks.)
+  if (args.num_reps == 0 || args.file_out == nullptr) return true;
+
+  // Override original color space with arg if specified.
+  ColorEncoding c_out = io.dec_c_original;
+  if (!args.color_space.empty()) {
+    ProfileParams pp;
+    if (!ParseDescription(args.color_space, &pp) ||
+        !ColorManagement::SetFromParams(pp, &c_out)) {
+      fprintf(stderr, "Failed to apply color_space.\n");
+      return false;
+    }
+  }
+
+  // Override original #bits with arg if specified.
+  const size_t bits_per_sample = args.bits_per_sample == 0
+                                     ? io.original_bits_per_sample()
+                                     : args.bits_per_sample;
+
+  if (!io.EncodeToFile(c_out, bits_per_sample, args.file_out)) {
+    fprintf(stderr, "Failed to write decoded image.\n");
+    return false;
+  }
+  fprintf(stderr, "Wrote %zu bytes; done.\n", io.enc_size);
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/dpik.h b/codec/L2/demos/pikEnc/host/pik/dpik.h
new file mode 100755
index 0000000000..c4ada69f66
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dpik.h
@@ -0,0 +1,73 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_DPIK_H_
+#define PIK_DPIK_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "pik/cmdline.h"
+#include "pik/codec.h"
+#include "pik/data_parallel.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_params.h"
+#include "pik/status.h"
+
+namespace pik {
+
+struct DecompressArgs {
+  // Initialize non-static default options.
+  DecompressArgs();
+
+  // Add all the command line options to the CommandLineParser. Note that the
+  // options are tied to the instance that this was called on.
+  void AddCommandLineOptions(tools::CommandLineParser* cmdline);
+
+  // Validate the passed arguments, checking whether all passed options are
+  // compatible. Returns whether the validation was successful.
+  Status ValidateArgs();
+
+  // The parameters.
+  const char* file_in = nullptr;
+  const char* file_out = nullptr;
+  size_t bits_per_sample = 0;
+  size_t num_threads;
+  std::string color_space;  // description
+  DecompressParams params;
+  size_t num_reps = 1;
+  Override print_profile = Override::kDefault;
+};
+
+class DecompressStats {
+ public:
+  void NotifyElapsed(double elapsed_seconds);
+
+  Status Print(const CodecInOut& io, size_t downsampling, ThreadPool* pool);
+
+ private:
+  struct ElapsedStats {
+    double central_tendency;
+    double min;
+    double max;
+    double variability;
+    const char* type;
+  };
+
+  Status SummarizeElapsed(ElapsedStats* s);
+
+  std::vector<double> elapsed_;
+};
+
+Status Decompress(const PaddedBytes& compressed, const DecompressParams& params,
+                  ThreadPool* pool, CodecInOut* PIK_RESTRICT io,
+                  size_t* downsampling, DecompressStats* PIK_RESTRICT stats);
+
+Status WriteOutput(const DecompressArgs& args, const CodecInOut& io);
+
+}  // namespace pik
+
+#endif  // PIK_DPIK_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/dpik_main.cc b/codec/L2/demos/pikEnc/host/pik/dpik_main.cc
new file mode 100755
index 0000000000..0ae07bb819
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/dpik_main.cc
@@ -0,0 +1,78 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/cache_aligned.h"
+#include "pik/cmdline.h"
+#include "pik/dpik.h"
+#include "pik/file_io.h"
+#include "pik/os_specific.h"
+#include "pik/padded_bytes.h"
+#include "pik/profiler.h"
+
+namespace pik {
+namespace {
+
+int DecompressMain(int argc, const char* argv[]) {
+  DecompressArgs args;
+  tools::CommandLineParser cmdline;
+  args.AddCommandLineOptions(&cmdline);
+  if (!cmdline.Parse(argc, argv) || !args.ValidateArgs()) {
+    cmdline.PrintHelp();
+    return 1;
+  }
+
+  const int bits = TargetBitfield().Bits();
+  if ((bits & SIMD_ENABLE) != SIMD_ENABLE) {
+    fprintf(stderr, "CPU does not support all enabled targets => exiting.\n");
+    return 1;
+  }
+
+  PaddedBytes compressed;
+  if (!ReadFile(args.file_in, &compressed)) return 1;
+  fprintf(stderr, "Read %zu compressed bytes\n", compressed.size());
+
+  CodecContext codec_context;
+  ThreadPool pool(args.num_threads);
+  DecompressStats stats;
+
+  const std::vector<int> cpus = AvailableCPUs();
+  pool.RunOnEachThread([&cpus](const int task, const int thread) {
+    // 1.1-1.2x speedup (36 cores) from pinning.
+    if (thread < cpus.size()) {
+      if (!PinThreadToCPU(cpus[thread])) {
+        fprintf(stderr, "WARNING: failed to pin thread %d.\n", thread);
+      }
+    }
+  });
+
+  CodecInOut io(&codec_context);
+  size_t downsampling = 1;
+  for (size_t i = 0; i < args.num_reps; ++i) {
+    if (!Decompress(compressed, args.params, &pool, &io, &downsampling,
+                    &stats)) {
+      return 1;
+    }
+  }
+
+  if (!WriteOutput(args, io)) return 1;
+
+  (void)stats.Print(io, downsampling, &pool);
+
+  if (args.print_profile == Override::kOn) {
+    PROFILER_PRINT_RESULTS();
+  }
+  CacheAligned::PrintStats();
+  return 0;
+}
+
+}  // namespace
+}  // namespace pik
+
+int main(int argc, const char* argv[]) {
+  return pik::DecompressMain(argc, argv);
+}
diff --git a/codec/L2/demos/pikEnc/host/pik/entropy_coder.cc b/codec/L2/demos/pikEnc/host/pik/entropy_coder.cc
new file mode 100755
index 0000000000..615397b39f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/entropy_coder.cc
@@ -0,0 +1,1216 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/entropy_coder.h"
+
+#include <stdint.h>
+#include <string.h>
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "pik/ac_strategy.h"
+#include "pik/ans_decode.h"
+#include "pik/ans_params.h"
+#include "pik/bit_reader.h"
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/context_map_decode.h"
+#include "pik/dc_predictor.h"
+#include "pik/fast_log.h"
+#include "pik/image.h"
+#include "pik/profiler.h"
+#include "pik/status.h"
+#include "pik/write_bits.h"
+
+namespace pik {
+
+// Reorder the skip+bits symbols by decreasing population-count
+// (keeping the first end-of-block symbol in place).
+// Round-trip:
+//  skip_and_bits = (SKIP << 4) | BITS
+//  symbol = kSkipAndBitsSymbol[skip_and_bits]
+//  SKIP = kSkipLut[symbol]
+//  BITS = kBitsLut[symbol]
+constexpr uint8_t kSkipAndBitsSymbol[256] = {
+    0,   1,   2,   3,   5,   10,  17,  32,  68,  83,  84,  85,  86,  87,  88,
+    89,  90,  4,   7,   12,  22,  31,  43,  60,  91,  92,  93,  94,  95,  96,
+    97,  98,  99,  6,   14,  26,  36,  48,  66,  100, 101, 102, 103, 104, 105,
+    106, 107, 108, 109, 8,   19,  34,  44,  57,  78,  110, 111, 112, 113, 114,
+    115, 116, 117, 118, 119, 9,   27,  39,  52,  61,  79,  120, 121, 122, 123,
+    124, 125, 126, 127, 128, 129, 11,  28,  41,  53,  64,  80,  130, 131, 132,
+    133, 134, 135, 136, 137, 138, 139, 13,  33,  46,  63,  72,  140, 141, 142,
+    143, 144, 145, 146, 147, 148, 149, 150, 15,  35,  47,  65,  69,  151, 152,
+    153, 154, 155, 156, 157, 158, 159, 160, 161, 16,  37,  51,  62,  74,  162,
+    163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 18,  38,  50,  59,  75,
+    173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 20,  40,  54,  76,
+    82,  184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 23,  42,  55,
+    77,  195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 24,  45,
+    56,  70,  207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 25,
+    49,  58,  71,  219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
+    29,  67,  81,  231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242,
+    21,  30,  73,  243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+    255,
+};
+
+constexpr uint8_t kSkipLut[256] = {
+    0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x2, 0x1, 0x3, 0x4, 0x0, 0x5, 0x1, 0x6, 0x2,
+    0x7, 0x8, 0x0, 0x9, 0x3, 0xa, 0xf, 0x1, 0xb, 0xc, 0xd, 0x2, 0x4, 0x5, 0xe,
+    0xf, 0x1, 0x0, 0x6, 0x3, 0x7, 0x2, 0x8, 0x9, 0x4, 0xa, 0x5, 0xb, 0x1, 0x3,
+    0xc, 0x6, 0x7, 0x2, 0xd, 0x9, 0x8, 0x4, 0x5, 0xa, 0xb, 0xc, 0x3, 0xd, 0x9,
+    0x1, 0x4, 0x8, 0x6, 0x5, 0x7, 0x2, 0xe, 0x0, 0x7, 0xc, 0xd, 0x6, 0xf, 0x8,
+    0x9, 0xa, 0xb, 0x3, 0x4, 0x5, 0xe, 0xa, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2,
+    0x2, 0x2, 0x2, 0x2, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4,
+    0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x5, 0x5, 0x5, 0x5, 0x5, 0x5,
+    0x5, 0x5, 0x5, 0x5, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
+    0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x8, 0x8, 0x8, 0x8,
+    0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9,
+    0x9, 0x9, 0x9, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xb,
+    0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xc, 0xc, 0xc, 0xc,
+    0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd,
+    0xd, 0xd, 0xd, 0xd, 0xd, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe,
+    0xe, 0xe, 0xe, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+    0xf,
+};
+
+constexpr uint8_t kBitsLut[256] = {
+    0x0, 0x1, 0x2, 0x3, 0x1, 0x4, 0x1, 0x2, 0x1, 0x1, 0x5, 0x1, 0x3, 0x1, 0x2,
+    0x1, 0x1, 0x6, 0x1, 0x2, 0x1, 0x0, 0x4, 0x1, 0x1, 0x1, 0x3, 0x2, 0x2, 0x1,
+    0x1, 0x5, 0x7, 0x2, 0x3, 0x2, 0x4, 0x2, 0x2, 0x3, 0x2, 0x3, 0x2, 0x6, 0x4,
+    0x2, 0x3, 0x3, 0x5, 0x2, 0x3, 0x3, 0x4, 0x4, 0x3, 0x3, 0x3, 0x5, 0x3, 0x4,
+    0x7, 0x5, 0x4, 0x4, 0x5, 0x4, 0x6, 0x2, 0x8, 0x5, 0x4, 0x4, 0x5, 0x2, 0x5,
+    0x5, 0x4, 0x4, 0x6, 0x6, 0x6, 0x3, 0x5, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+    0x0, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x7, 0x8, 0x9, 0xa, 0xb,
+    0xc, 0xd, 0xe, 0xf, 0x0, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0,
+    0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x7, 0x8, 0x9, 0xa, 0xb,
+    0xc, 0xd, 0xe, 0xf, 0x0, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+    0x0, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x6, 0x7, 0x8,
+    0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc,
+    0xd, 0xe, 0xf, 0x0, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0,
+    0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x5, 0x6, 0x7,
+    0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa,
+    0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc,
+    0xd, 0xe, 0xf, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
+    0xf,
+};
+
+// REQUIRED: 0 <= skip, bits <= 15
+constexpr uint8_t SkipAndBitsSymbol(int skip, int bits) {
+  return kSkipAndBitsSymbol[(skip << 4) | bits];
+}
+
+// Size of batch of Lehmer-transformed order of coefficients.
+// If all codes in the batch are zero, then span is encoded with a single bit.
+constexpr int32_t kCoeffOrderCodeSpan = 16;
+
+void ShrinkDC(const Rect& rect_dc, const Image3S& dc,
+              Image3S* PIK_RESTRICT tmp_residuals) {
+  const size_t xsize = rect_dc.xsize();
+  const size_t ysize = rect_dc.ysize();
+  PIK_ASSERT(tmp_residuals->xsize() >= xsize);
+  PIK_ASSERT(tmp_residuals->ysize() >= ysize);
+  const Rect tmp_rect(0, 0, xsize, ysize);
+
+  ShrinkY(rect_dc, dc.Plane(1), tmp_rect,
+          const_cast<ImageS*>(&tmp_residuals->Plane(1)));
+
+  ImageS tmp_xz(xsize * 2, ysize);
+
+  // Interleave X and Z into XZ for ShrinkXB.
+  for (size_t y = 0; y < ysize; ++y) {
+    const int16_t* PIK_RESTRICT row_x = rect_dc.ConstPlaneRow(dc, 0, y);
+    const int16_t* PIK_RESTRICT row_z = rect_dc.ConstPlaneRow(dc, 2, y);
+    int16_t* PIK_RESTRICT row_xz = tmp_xz.Row(y);
+
+    for (size_t x = 0; x < xsize; ++x) {
+      row_xz[2 * x + 0] = row_x[x];
+      row_xz[2 * x + 1] = row_z[x];
+    }
+  }
+
+  ImageS tmp_xz_residuals(xsize * 2, ysize);
+  ShrinkXB(rect_dc, dc.Plane(1), tmp_xz, &tmp_xz_residuals);
+
+  // Deinterleave XZ into residuals X and Z.
+  for (size_t y = 0; y < ysize; ++y) {
+    const int16_t* PIK_RESTRICT row_xz = tmp_xz_residuals.ConstRow(y);
+    int16_t* PIK_RESTRICT row_out_x = tmp_residuals->PlaneRow(0, y);
+    int16_t* PIK_RESTRICT row_out_z = tmp_residuals->PlaneRow(2, y);
+
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out_x[x] = row_xz[2 * x + 0];
+      row_out_z[x] = row_xz[2 * x + 1];
+    }
+  }
+}
+
+void ExpandDC(const Rect& rect_dc, Image3S* PIK_RESTRICT dc,
+              ImageS* PIK_RESTRICT tmp_y, ImageS* PIK_RESTRICT tmp_xz_residuals,
+              ImageS* PIK_RESTRICT tmp_xz_expanded) {
+  const size_t xsize = rect_dc.xsize();
+  const size_t ysize = rect_dc.ysize();
+  PIK_ASSERT(xsize <= tmp_y->xsize() && ysize <= tmp_y->ysize());
+  PIK_ASSERT(SameSize(*tmp_xz_residuals, *tmp_xz_expanded));
+
+  ExpandY(rect_dc, dc->Plane(1), tmp_y);
+
+  // The predictor expects a single image with interleaved X and Z.
+  for (size_t y = 0; y < ysize; ++y) {
+    const int16_t* PIK_RESTRICT row0 = rect_dc.ConstPlaneRow(*dc, 0, y);
+    const int16_t* PIK_RESTRICT row2 = rect_dc.ConstPlaneRow(*dc, 2, y);
+    int16_t* PIK_RESTRICT row_xz = tmp_xz_residuals->Row(y);
+
+    for (size_t x = 0; x < xsize; ++x) {
+      row_xz[2 * x + 0] = row0[x];
+      row_xz[2 * x + 1] = row2[x];
+    }
+  }
+
+  ExpandXB(xsize, ysize, *tmp_y, *tmp_xz_residuals, tmp_xz_expanded);
+
+  for (size_t y = 0; y < ysize; ++y) {
+    const int16_t* PIK_RESTRICT row_from = tmp_y->ConstRow(y);
+    int16_t* PIK_RESTRICT row_to = rect_dc.PlaneRow(dc, 1, y);
+    memcpy(row_to, row_from, xsize * sizeof(row_to[0]));
+  }
+
+  // Deinterleave |tmp_xz_expanded| and copy into |dc|.
+  for (size_t y = 0; y < ysize; ++y) {
+    const int16_t* PIK_RESTRICT row_xz = tmp_xz_expanded->ConstRow(y);
+    int16_t* PIK_RESTRICT row_out0 = rect_dc.PlaneRow(dc, 0, y);
+    int16_t* PIK_RESTRICT row_out2 = rect_dc.PlaneRow(dc, 2, y);
+
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out0[x] = row_xz[2 * x + 0];
+      row_out2[x] = row_xz[2 * x + 1];
+    }
+  }
+}
+
+void ComputeCoeffOrder(const Image3S& ac, const Rect& rect,
+                       int32_t* PIK_RESTRICT order) {
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+  size_t xsize_blocks = rect.xsize() / block_size;
+  size_t ysize_blocks = rect.ysize();
+  const int32_t* natural_coeff_order = NaturalCoeffOrder();
+
+  // Count number of zero coefficients, separately for each DCT band.
+  int32_t num_zeros[block_size * kOrderContexts] = {0};
+  for (int c = 0; c < 3; ++c) {
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const int16_t* PIK_RESTRICT row = rect.ConstPlaneRow(ac, c, by);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        size_t x = bx * block_size;
+        size_t offset = c * block_size;
+        for (size_t k = 1; k < block_size; ++k) {
+          if (row[x + k] == 0) ++num_zeros[offset + k];
+        }
+      }
+    }
+  }
+
+  for (uint8_t ctx = 0; ctx < kOrderContexts; ++ctx) {
+    struct PosAndCount {
+      uint32_t pos;
+      uint32_t count;
+    };
+
+    // Apply zig-zag order.
+    PosAndCount pos_and_val[block_size];
+    size_t offset = ctx * block_size;
+    for (size_t i = 0; i < block_size; ++i) {
+      size_t pos = natural_coeff_order[i];
+      pos_and_val[i].pos = pos;
+      // We don't care for the exact number -> quantize number of zeros,
+      // to get less permuted order.
+      pos_and_val[i].count = num_zeros[offset + pos] / 8;
+    }
+
+    // Stable-sort -> elements with same number of zeros will preserve their
+    // order.
+    auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
+      return a.count < b.count;
+    };
+    std::stable_sort(pos_and_val, pos_and_val + block_size, comparator);
+
+    // Grab indices.
+    for (size_t i = 0; i < block_size; ++i) {
+      order[ctx * block_size + i] = pos_and_val[i].pos;
+    }
+  }
+}
+
+void EncodeCoeffOrder(const int32_t* PIK_RESTRICT order,
+                      size_t* PIK_RESTRICT storage_ix, uint8_t* storage) {
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+  int32_t order_zigzag[block_size];
+  const int32_t* natural_coeff_order_lut = NaturalCoeffOrderLut();
+  for (size_t i = 0; i < block_size; ++i) {
+    order_zigzag[i] = natural_coeff_order_lut[order[i]];
+  }
+  int32_t lehmer[block_size];
+  ComputeLehmerCode(order_zigzag, block_size, lehmer);
+  int32_t end = block_size - 1;
+  while (end >= 1 && lehmer[end] == 0) {
+    --end;
+  }
+  for (int32_t i = 1; i <= end; ++i) {
+    ++lehmer[i];
+  }
+  for (int32_t i = 0; i < block_size; i += kCoeffOrderCodeSpan) {
+    const int32_t start = (i > 0) ? i : 1;
+    const int32_t end = i + kCoeffOrderCodeSpan;
+    int32_t has_non_zero = 0;
+    for (int32_t j = start; j < end; ++j) has_non_zero |= lehmer[j];
+    if (!has_non_zero) {  // all zero in the span -> escape
+      WriteBits(1, 0, storage_ix, storage);
+      continue;
+    } else {
+      WriteBits(1, 1, storage_ix, storage);
+    }
+    for (int32_t j = start; j < end; ++j) {
+      int32_t v;
+      PIK_ASSERT(lehmer[j] <= block_size);
+      for (v = lehmer[j]; v >= 7; v -= 7) {
+        WriteBits(3, 7, storage_ix, storage);
+      }
+      WriteBits(3, v, storage_ix, storage);
+    }
+  }
+}
+
+// Fills "tmp_num_nzeros" with per-block count of non-zero coefficients in
+// "coeffs" within "rect".
+void ExtractNumNZeroes(const Rect& rect, const ImageS& coeffs,
+                       ImageI* PIK_RESTRICT tmp_num_nzeros) {
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+  PIK_CHECK(coeffs.xsize() % block_size == 0);
+  const size_t xsize_blocks = rect.xsize() / block_size;
+  const size_t ysize_blocks = rect.ysize();
+  for (size_t by = 0; by < ysize_blocks; ++by) {
+    const int16_t* PIK_RESTRICT coeffs_row = rect.ConstRow(coeffs, by);
+    int32_t* PIK_RESTRICT output_row = tmp_num_nzeros->Row(by);
+    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+      size_t num_nzeros = 0;
+      for (size_t i = 1; i < block_size; ++i) {
+        num_nzeros += (coeffs_row[bx * block_size + i] != 0);
+      }
+      output_row[bx] = static_cast<int32_t>(num_nzeros);
+    }
+  }
+}
+
+std::string EncodeCoeffOrders(const int32_t* order, PikInfo* pik_info) {
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+  std::string encoded_coeff_order(kOrderContexts * 1024, 0);
+  uint8_t* storage = reinterpret_cast<uint8_t*>(&encoded_coeff_order[0]);
+  size_t storage_ix = 0;
+  for (size_t c = 0; c < kOrderContexts; c++) {
+    EncodeCoeffOrder(&order[c * block_size], &storage_ix, storage);
+  }
+  PIK_CHECK(storage_ix < encoded_coeff_order.size() * kBitsPerByte);
+  encoded_coeff_order.resize((storage_ix + 7) / kBitsPerByte);
+  if (pik_info) {
+    pik_info->layers[kLayerOrder].total_size += encoded_coeff_order.size();
+  }
+  return encoded_coeff_order;
+}
+
+// Number of clusters is encoded with VarLenUint8 - see EncodeContextMap and
+// DecodeContextMap.
+constexpr size_t kMaxClusters = 256;
+// Currently we limit number of clusters even more - for most datasets "optimal"
+// number of clusters is less than 64, so increasing this number does not
+// produce smaller output.
+// TODO(user): find image that would require more clusters.
+// TODO(user): revise this number when non-DCT-8x8 contexts are added / used.
+static const size_t kClustersLimit = 64;
+static const size_t kNumStaticZdensContexts = 7;
+static const size_t kNumStaticOrderFreeContexts = 3;
+// Should depend on N.
+static const size_t kNumStaticContexts =
+    kNumStaticOrderFreeContexts + 3 * kNumStaticZdensContexts;
+
+std::vector<uint8_t> StaticContextMap() {
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+  static const int32_t kStaticZdensContextMap[kZeroDensityContextCount] = {
+      0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 1,
+      1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 1, 1, 2, 2, 2, 2,
+      2, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 3, 3, 3, 3,
+      3, 6, 6, 6, 6, 5, 5, 2, 2, 2, 2, 2, 3, 3, 3, 3, 6, 6, 6, 6, 5,
+      5, 2, 2, 2, 2, 2, 3, 3, 3, 6, 6, 6, 6, 5, 5, 2, 2, 2, 2, 2,
+  };
+  PIK_ASSERT(kNumStaticContexts <= kMaxClusters);
+  std::vector<uint8_t> context_map(kNumContexts);
+  static_assert(kOrderContexts == 3,
+                "The static context map only works with 3 order contexts");
+  for (size_t c = 0; c < kOrderContexts; ++c) {
+    for (size_t i = 0; i < block_size - 1; ++i) {
+      context_map[NonZeroContext(i, c)] = c;  // [0..2]
+    }
+    uint32_t zero_density_context_base = ZeroDensityContextsOffset(c);
+    for (size_t i = 0; i < kZeroDensityContextCount; ++i) {
+      context_map[zero_density_context_base + i] = kNumStaticOrderFreeContexts +
+                                                   c * kNumStaticZdensContexts +
+                                                   kStaticZdensContextMap[i];
+    }
+  }
+  return context_map;
+}
+
+PIK_INLINE int32_t PredictFromTopAndLeft(
+    const int32_t* const PIK_RESTRICT row_top,
+    const int32_t* const PIK_RESTRICT row, size_t x, int32_t default_val) {
+  if (x == 0) {
+    return row_top == nullptr ? default_val : row_top[x];
+  }
+  if (row_top == nullptr) {
+    return row[x - 1];
+  }
+  return (row_top[x] + row[x - 1] + 1) / 2;
+}
+
+void TokenizeQuantField(const Rect& rect, const ImageI& quant_field,
+                        const ImageI* hint, const AcStrategyImage& ac_strategy,
+                        std::vector<Token>* PIK_RESTRICT output) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+
+  // Fixed quant_field with filled gaps.
+  std::vector<int32_t> current(xsize);
+  std::vector<int32_t> last(xsize);
+
+  output->reserve(output->size() + xsize * ysize);
+
+  // Compute actual quant values from prediction residuals.
+  if (hint == nullptr) {
+    for (size_t by = 0; by < ysize; ++by) {
+      const int32_t* PIK_RESTRICT row_src = rect.ConstRow(quant_field, by);
+      int32_t* PIK_RESTRICT row_fixed = current.data();
+      const int32_t* PIK_RESTRICT row_last = (by == 0) ? nullptr : last.data();
+      AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(rect, by);
+      for (size_t bx = 0; bx < xsize; ++bx) {
+        int32_t quant = row_src[bx];
+        int32_t predicted_quant =
+            PredictFromTopAndLeft(row_last, row_fixed, bx, 32);
+        row_fixed[bx] = quant;
+        if (!ac_strategy_row[bx].IsFirstBlock()) continue;
+        size_t q = PackSigned(quant - predicted_quant);
+        if (q >= 255) {
+          output->emplace_back(QuantContext(), 255, 0, 0);
+          output->emplace_back(QuantContext(), quant - 1, 0, 0);
+        } else {
+          output->emplace_back(QuantContext(), q, 0, 0);
+        }
+      }
+      last.swap(current);
+    }
+  } else {
+    for (size_t by = 0; by < ysize; ++by) {
+      const int32_t* PIK_RESTRICT row_src = rect.ConstRow(quant_field, by);
+      const int32_t* PIK_RESTRICT row_hint = rect.ConstRow(*hint, by);
+      AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(rect, by);
+      for (size_t bx = 0; bx < xsize; ++bx) {
+        int32_t quant = row_src[bx];
+        int32_t predicted_quant = row_hint[bx];
+        if (!ac_strategy_row[bx].IsFirstBlock()) continue;
+        size_t q = PackSigned(quant - predicted_quant);
+        if (q >= 255) {
+          output->emplace_back(QuantContext(), 255, 0, 0);
+          output->emplace_back(QuantContext(), quant - 1, 0, 0);
+        } else {
+          output->emplace_back(QuantContext(), q, 0, 0);
+        }
+      }
+    }
+  }
+}
+
+void TokenizeCoefficients(const int32_t* orders, const Rect& rect,
+                          const Image3S& coeffs,
+                          std::vector<Token>* PIK_RESTRICT output) {
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+  const size_t xsize_blocks = rect.xsize();
+  const size_t ysize_blocks = rect.ysize();
+  // Transform block coordinates to coefficient layout coordinates.
+  Rect literal_rect_ac(rect.x0() * block_size, rect.y0(),
+                       xsize_blocks * block_size, ysize_blocks);
+
+  // TODO(user): update the estimate: usually less coefficients are used.
+  output->reserve(output->size() +
+                  3 * xsize_blocks * ysize_blocks * block_size);
+
+  ImageI tmp_num_nzeros(xsize_blocks, ysize_blocks);
+  for (int c = 0; c < 3; ++c) {
+    ExtractNumNZeroes(literal_rect_ac, coeffs.Plane(c), &tmp_num_nzeros);
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const int16_t* PIK_RESTRICT row =
+          literal_rect_ac.ConstPlaneRow(coeffs, c, by);
+      int32_t* PIK_RESTRICT row_nzeros = tmp_num_nzeros.Row(by);
+      const int32_t* PIK_RESTRICT row_nzeros_top =
+          (by == 0) ? nullptr : tmp_num_nzeros.ConstRow(by - 1);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        int32_t predicted_nzeros =
+            PredictFromTopAndLeft(row_nzeros_top, row_nzeros, bx, 32);
+        const int bctx = c;
+        const int32_t* order = &orders[bctx * block_size];
+        int32_t nzero_ctx = NonZeroContext(predicted_nzeros, bctx);
+        size_t num_nzeros = row_nzeros[bx];
+        output->emplace_back(nzero_ctx, num_nzeros, 0, 0);
+        if (num_nzeros == 0) continue;
+        const int16_t* PIK_RESTRICT block = row + bx * block_size;
+        int r = 0;
+        const int histo_offset = ZeroDensityContextsOffset(bctx);
+        size_t last_k = 0;
+        for (size_t k = 1; k < block_size; ++k) {
+          PIK_ASSERT(num_nzeros > 0);
+          int16_t coeff = block[order[k]];
+          if (coeff == 0) {
+            if (++r == 16) {
+              output->emplace_back(
+                  histo_offset + ZeroDensityContext(num_nzeros, last_k),
+                  SkipAndBitsSymbol(15, 0), 0, 0);
+              // Skip 15, encode 0-bit coefficient -> 16 zeros in total.
+              r = 0;
+              last_k = k;
+            }
+            continue;
+          }
+          int nbits, bits;
+          EncodeVarLenInt(coeff, &nbits, &bits);
+          PIK_ASSERT(nbits > 0);
+          PIK_ASSERT(nbits <= 15);
+          int symbol = SkipAndBitsSymbol(r, nbits);
+          output->emplace_back(
+              histo_offset + ZeroDensityContext(num_nzeros, last_k), symbol,
+              nbits, bits);
+          r = 0;
+          last_k = k;
+          if (--num_nzeros == 0) break;
+        }
+        PIK_ASSERT(num_nzeros == 0);
+      }
+    }
+  }
+}
+
+namespace {
+
+inline double CrossEntropy(const uint32_t* counts, const size_t counts_len,
+                           const uint32_t* codes, const size_t codes_len) {
+  double sum = 0.0f;
+  uint32_t total_count = 0;
+  uint32_t total_codes = 0;
+  for (size_t i = 0; i < codes_len; ++i) {
+    if (codes[i] > 0) {
+      if (i < counts_len && counts[i] > 0) {
+        sum -= counts[i] * std::log2(codes[i]);
+        total_count += counts[i];
+      }
+      total_codes += codes[i];
+    }
+  }
+  if (total_codes > 0) {
+    sum += total_count * std::log2(total_codes);
+  }
+  return sum;
+}
+
+inline double ShannonEntropy(const uint32_t* data, const size_t data_size) {
+  return CrossEntropy(data, data_size, data, data_size);
+}
+
+class HistogramBuilder {
+ public:
+  explicit HistogramBuilder(const size_t num_contexts)
+      : histograms_(num_contexts) {}
+
+  void VisitSymbol(int symbol, int histo_idx) {
+    PIK_ASSERT(histo_idx < histograms_.size());
+    histograms_[histo_idx].Add(symbol);
+  }
+
+  template <class EntropyEncodingData>
+  void BuildAndStoreEntropyCodes(std::vector<EntropyEncodingData>* codes,
+                                 std::vector<uint8_t>* context_map,
+                                 size_t* storage_ix, uint8_t* storage,
+                                 PikImageSizeInfo* info) const {
+    std::vector<Histogram> clustered_histograms(histograms_);
+    context_map->resize(histograms_.size());
+    if (histograms_.size() > 1) {
+      std::vector<uint32_t> histogram_symbols;
+      ClusterHistograms(histograms_, histograms_.size(), 1, std::vector<int>(),
+                        kClustersLimit, &clustered_histograms,
+                        &histogram_symbols);
+      for (size_t c = 0; c < histograms_.size(); ++c) {
+        (*context_map)[c] = static_cast<uint8_t>(histogram_symbols[c]);
+      }
+      if (storage_ix != nullptr && storage != nullptr) {
+        EncodeContextMap(*context_map, clustered_histograms.size(), storage_ix,
+                         storage);
+      }
+    }
+    if (info) {
+      for (size_t i = 0; i < clustered_histograms.size(); ++i) {
+        info->clustered_entropy += clustered_histograms[i].ShannonEntropy();
+      }
+    }
+    for (size_t c = 0; c < clustered_histograms.size(); ++c) {
+      EntropyEncodingData code;
+      code.BuildAndStore(clustered_histograms[c].data_.data(),
+                         clustered_histograms[c].data_.size(), storage_ix,
+                         storage);
+      codes->emplace_back(std::move(code));
+    }
+  }
+
+ private:
+  struct Histogram {
+    Histogram() {
+      data_.reserve(256);
+      total_count_ = 0;
+    }
+    void Clear() {
+      memset(data_.data(), 0, data_.size() * sizeof(data_[0]));
+      total_count_ = 0;
+    }
+    void Add(int symbol) {
+      if (symbol >= data_.size()) {
+        data_.resize(symbol + 1);
+      }
+      ++data_[symbol];
+      ++total_count_;
+    }
+    void AddHistogram(const Histogram& other) {
+      if (other.data_.size() > data_.size()) {
+        data_.resize(other.data_.size());
+      }
+      for (size_t i = 0; i < other.data_.size(); ++i) {
+        data_[i] += other.data_[i];
+      }
+      total_count_ += other.total_count_;
+    }
+    float PopulationCost() const {
+      std::vector<int> counts(data_.size());
+      for (size_t i = 0; i < data_.size(); ++i) {
+        counts[i] = data_[i];
+      }
+      return ANSPopulationCost(counts.data(), counts.size(), total_count_);
+    }
+    double ShannonEntropy() const {
+      return pik::ShannonEntropy(data_.data(), data_.size());
+    }
+
+    std::vector<uint32_t> data_;
+    uint32_t total_count_;
+  };
+  std::vector<Histogram> histograms_;
+};
+
+}  // namespace
+
+std::string BuildAndEncodeHistograms(
+    size_t num_contexts, const std::vector<std::vector<Token>>& tokens,
+    std::vector<ANSEncodingData>* codes, std::vector<uint8_t>* context_map,
+    PikImageSizeInfo* info) {
+  // Build histograms.
+  HistogramBuilder builder(num_contexts);
+  for (size_t i = 0; i < tokens.size(); ++i) {
+    for (size_t j = 0; j < tokens[i].size(); ++j) {
+      const Token token = tokens[i][j];
+      builder.VisitSymbol(token.symbol, token.context);
+    }
+  }
+  // Encode histograms.
+  const size_t max_out_size = 1024 * (num_contexts + 4);
+  std::string output(max_out_size, 0);
+  size_t storage_ix = 0;
+  uint8_t* storage = reinterpret_cast<uint8_t*>(&output[0]);
+  storage[0] = 0;
+  builder.BuildAndStoreEntropyCodes(codes, context_map, &storage_ix, storage,
+                                    info);
+  // Close the histogram bit stream.
+  size_t jump_bits = ((storage_ix + 7) & ~7) - storage_ix;
+  WriteBits(jump_bits, 0, &storage_ix, storage);
+  PIK_ASSERT(storage_ix % kBitsPerByte == 0);
+  const size_t histo_bytes = storage_ix >> 3;
+  PIK_CHECK(histo_bytes <= max_out_size);
+  output.resize(histo_bytes);
+  if (info) {
+    info->num_clustered_histograms += codes->size();
+    info->histogram_size += histo_bytes;
+    info->total_size += histo_bytes;
+  }
+  return output;
+}
+
+std::string BuildAndEncodeHistogramsFast(
+    const std::vector<std::vector<Token>>& tokens,
+    std::vector<ANSEncodingData>* codes, std::vector<uint8_t>* context_map,
+    PikImageSizeInfo* info) {
+  *context_map = StaticContextMap();
+  // Build histograms from tokens.
+  std::vector<uint32_t> histograms(kNumStaticContexts << 8);
+  for (size_t i = 0; i < tokens.size(); ++i) {
+    for (size_t j = 0; j < tokens[i].size(); ++j) {
+      const Token token = tokens[i][j];
+      const uint32_t histo_idx = (*context_map)[token.context];
+      ++histograms[(histo_idx << 8) + token.symbol];
+    }
+  }
+  if (info) {
+    for (size_t c = 0; c < kNumStaticContexts; ++c) {
+      info->clustered_entropy += ShannonEntropy(&histograms[c << 8], 256);
+    }
+  }
+  const size_t max_out_size = kNumStaticContexts * 1024;
+  std::string output(max_out_size, 0);
+  size_t storage_ix = 0;
+  uint8_t* storage = reinterpret_cast<uint8_t*>(&output[0]);
+  storage[0] = 0;
+  // Encode the histograms.
+  EncodeContextMap(*context_map, kNumStaticContexts, &storage_ix, storage);
+  for (size_t c = 0; c < kNumStaticContexts; ++c) {
+    ANSEncodingData code;
+    code.BuildAndStore(&histograms[c << 8], 256, &storage_ix, storage);
+    codes->emplace_back(std::move(code));
+  }
+  // Close the histogram bit stream.
+  WriteZeroesToByteBoundary(&storage_ix, storage);
+  const size_t histo_bytes = (storage_ix >> 3);
+  PIK_CHECK(histo_bytes <= max_out_size);
+  output.resize(histo_bytes);
+  if (info) {
+    info->num_clustered_histograms += codes->size();
+    info->histogram_size += histo_bytes;
+  }
+  return output;
+}
+
+std::string WriteTokens(const std::vector<Token>& tokens,
+                        const std::vector<ANSEncodingData>& codes,
+                        const std::vector<uint8_t>& context_map,
+                        PikImageSizeInfo* pik_info) {
+  const size_t max_out_size = 4 * tokens.size() + 4096;
+  std::string output(max_out_size, 0);
+  size_t storage_ix = 0;
+  uint8_t* storage = reinterpret_cast<uint8_t*>(&output[0]);
+  storage[0] = 0;
+  size_t num_extra_bits = 0;
+  PIK_ASSERT(kANSBufferSize <= (1 << 16));
+  for (int start = 0; start < tokens.size(); start += kANSBufferSize) {
+    std::vector<uint32_t> out;
+    out.reserve(kANSBufferSize);
+    const int end = std::min<int>(start + kANSBufferSize, tokens.size());
+    ANSCoder ans;
+    for (int i = end - 1; i >= start; --i) {
+      const Token token = tokens[i];
+      const uint8_t histo_idx = context_map[token.context];
+      const ANSEncSymbolInfo info = codes[histo_idx].ans_table[token.symbol];
+      uint8_t nbits = 0;
+      uint32_t bits = ans.PutSymbol(info, &nbits);
+      if (nbits == 16) {
+        out.push_back(((i - start) << 16) | bits);
+      }
+    }
+    const uint32_t state = ans.GetState();
+    WriteBits(16, (state >> 16) & 0xffff, &storage_ix, storage);
+    WriteBits(16, state & 0xffff, &storage_ix, storage);
+    int tokenidx = start;
+    for (int i = out.size(); i >= 0; --i) {
+      int nextidx = i > 0 ? start + (out[i - 1] >> 16) : end;
+      for (; tokenidx < nextidx; ++tokenidx) {
+        const Token token = tokens[tokenidx];
+        WriteBits(token.nbits, token.bits, &storage_ix, storage);
+        num_extra_bits += token.nbits;
+      }
+      if (i > 0) {
+        WriteBits(16, out[i - 1] & 0xffff, &storage_ix, storage);
+      }
+    }
+  }
+  const size_t out_size = (storage_ix + 7) >> 3;
+  PIK_CHECK(out_size <= max_out_size);
+  output.resize(out_size);
+  if (pik_info) {
+    pik_info->entropy_coded_bits += storage_ix - num_extra_bits;
+    pik_info->extra_bits += num_extra_bits;
+    pik_info->total_size += out_size;
+  }
+  return output;
+}
+
+namespace {
+const constexpr int kRleSymStart =
+    kHybridEncodingSplitToken +
+    2 * (15 - kHybridEncodingDirectSplitExponent) + 1;
+const constexpr int kEntropyCodingNumSymbols = 2 * kRleSymStart;
+}  // namespace
+
+std::string EncodeImageData(const Rect& rect, const Image3S& img,
+                            PikImageSizeInfo* info) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+
+  std::string best;
+  PikImageSizeInfo best_info;
+
+  for (bool rle : {true, false}) {
+    std::vector<std::vector<Token>> tokens(1);
+    tokens[0].reserve(3 * ysize * xsize);
+
+    size_t cnt = 0;
+
+    auto encode_cnt = [&](size_t c) {
+      if (cnt > 0) {
+        int token, nbits, bits;
+        EncodeHybridVarLenUint(cnt - 1, &token, &nbits, &bits);
+        tokens[0].emplace_back(Token(c, kRleSymStart + token, nbits, bits));
+        cnt = 0;
+      }
+    };
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t y = 0; y < ysize; y++) {
+        const int16_t* PIK_RESTRICT row = rect.ConstPlaneRow(img, c, y);
+        for (size_t x = 0; x < xsize; x++) {
+          if (!rle || row[x]) {
+            encode_cnt(c);
+            int token, nbits, bits;
+            EncodeHybridVarLenInt(row[x], &token, &nbits, &bits);
+            PIK_ASSERT(token < kRleSymStart);
+            tokens[0].emplace_back(Token(c, token, nbits, bits));
+          } else {
+            cnt++;
+          }
+        }
+      }
+      encode_cnt(c);
+    }
+
+    std::vector<ANSEncodingData> codes;
+    std::vector<uint8_t> context_map;
+    PikImageSizeInfo info;
+    std::string enc =
+        BuildAndEncodeHistograms(3, tokens, &codes, &context_map, &info);
+    enc += WriteTokens(tokens[0], codes, context_map, &info);
+    if (best.empty() || best.size() > enc.size()) {
+      best = std::move(enc);
+      best_info = info;
+    }
+  }
+  if (info) {
+    info->Assimilate(best_info);
+  }
+  return best;
+}
+
+bool DecodeHistograms(BitReader* br, const size_t num_contexts,
+                      const size_t max_alphabet_size, ANSCode* code,
+                      std::vector<uint8_t>* context_map) {
+  PROFILER_FUNC;
+  size_t num_histograms = 1;
+  context_map->resize(num_contexts);
+  if (num_contexts > 1) {
+    PIK_RETURN_IF_ERROR(DecodeContextMap(context_map, &num_histograms, br));
+  }
+  if (!DecodeANSCodes(num_histograms, max_alphabet_size, br, code)) {
+    return PIK_FAILURE("Histo DecodeANSCodes");
+  }
+  PIK_RETURN_IF_ERROR(br->JumpToByteBoundary());
+  return true;
+}
+
+// See also EncodeImageData.
+bool DecodeImageData(BitReader* PIK_RESTRICT br,
+                     const std::vector<uint8_t>& context_map,
+                     ANSSymbolReader* PIK_RESTRICT decoder, const Rect& rect,
+                     Image3S* PIK_RESTRICT img) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  PIK_ASSERT(xsize <= img->xsize() && ysize <= img->ysize());
+  for (int c = 0; c < 3; ++c) {
+    const int histo_idx = context_map[c];
+    size_t skip = 0;
+    for (size_t y = 0; y < ysize; y++) {
+      int16_t* PIK_RESTRICT row = rect.PlaneRow(img, c, y);
+      for (size_t x = 0; x < xsize; x++) {
+        if (skip) {
+          row[x] = 0;
+          skip--;
+          continue;
+        }
+        br->FillBitBuffer();
+        int token = decoder->ReadSymbol(histo_idx, br);
+        int s = 0;
+        if (token >= kRleSymStart) {
+          token -= kRleSymStart;
+          int num_bits = HybridEncodingTokenNumBits(token);
+          int bits = 0;
+          if (num_bits > 0) {
+            bits = br->ReadBits(num_bits);
+          }
+          s = DecodeHybridVarLenUint(token, bits);
+          skip = s;
+          row[x] = 0;
+          continue;
+        }
+        int num_bits = HybridEncodingTokenNumBits(token);
+        int bits = 0;
+        if (num_bits > 0) {
+          bits = br->ReadBits(num_bits);
+        }
+        s = DecodeHybridVarLenInt(token, bits);
+        row[x] = s;
+      }
+    }
+    if (skip != 0) {
+      return PIK_FAILURE("Invalid DC");
+    }
+  }
+  PIK_RETURN_IF_ERROR(br->JumpToByteBoundary());
+  return true;
+}
+
+bool DecodeCoeffOrder(int32_t* order, BitReader* br) {
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+  int32_t lehmer[block_size] = {0};
+  for (int32_t i = 0; i < block_size; i += kCoeffOrderCodeSpan) {
+    br->FillBitBuffer();
+    const int32_t has_non_zero = br->ReadBits(1);
+    if (!has_non_zero) continue;
+    const int32_t start = (i > 0) ? i : 1;
+    const int32_t end = i + kCoeffOrderCodeSpan;
+    for (int32_t j = start; j < end; ++j) {
+      int32_t v = 0;
+      while (v <= block_size) {
+        br->FillBitBuffer();
+        const int32_t bits = br->ReadBits(3);
+        v += bits;
+        if (bits < 7) break;
+      }
+      if (v > block_size) v = block_size;
+      lehmer[j] = v;
+    }
+  }
+  int32_t end = block_size - 1;
+  while (end > 0 && lehmer[end] == 0) {
+    --end;
+  }
+  for (int32_t i = 1; i <= end; ++i) {
+    --lehmer[i];
+  }
+  DecodeLehmerCode(lehmer, block_size, order);
+  const int32_t* natural_coeff_order = NaturalCoeffOrder();
+  for (size_t k = 0; k < block_size; ++k) {
+    order[k] = natural_coeff_order[order[k]];
+  }
+  return true;
+}
+
+bool DecodeImage(BitReader* PIK_RESTRICT br, const Rect& rect,
+                 Image3S* PIK_RESTRICT img) {
+  std::vector<uint8_t> context_map;
+  ANSCode code;
+  PIK_RETURN_IF_ERROR(DecodeHistograms(br, 3, kEntropyCodingNumSymbols,
+                                       &code, &context_map));
+  ANSSymbolReader decoder(&code);
+  PIK_RETURN_IF_ERROR(DecodeImageData(br, context_map, &decoder, rect, img));
+  if (!decoder.CheckANSFinalState()) {
+    return PIK_FAILURE("ANS checksum failure.");
+  }
+  return true;
+}
+
+// The `rect_qf` argument specifies, in block units, the location we should
+// decode to inside the `quant_field` image, and the location we should read the
+// AC strategy from inside `ac_strategy`. It does *not* apply to the `hint`
+// argument.
+bool DecodeQuantField(BitReader* PIK_RESTRICT br,
+                      ANSSymbolReader* PIK_RESTRICT decoder,
+                      const std::vector<uint8_t>& context_map,
+                      const Rect& rect_qf,
+                      const AcStrategyImage& PIK_RESTRICT ac_strategy,
+                      ImageI* PIK_RESTRICT quant_field,
+                      const ImageI* PIK_RESTRICT hint) {
+  PROFILER_FUNC;
+  const size_t xsize = rect_qf.xsize();
+  const size_t ysize = rect_qf.ysize();
+  const size_t stride = quant_field->PixelsPerRow();
+
+  if (hint == nullptr) {
+    for (size_t by = 0; by < ysize; ++by) {
+      int32_t* PIK_RESTRICT row_quant = rect_qf.Row(quant_field, by);
+      const int32_t* PIK_RESTRICT row_quant_top =
+          (by == 0) ? nullptr : rect_qf.ConstRow(*quant_field, by - 1);
+      AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(rect_qf, by);
+      for (size_t bx = 0; bx < xsize; ++bx) {
+        AcStrategy acs = ac_strategy_row[bx];
+        if (!acs.IsFirstBlock()) continue;
+        int32_t predicted_quant =
+            PredictFromTopAndLeft(row_quant_top, row_quant, bx, 32);
+        br->FillBitBuffer();
+        int32_t quant_ctx = QuantContext();
+        size_t q = decoder->ReadSymbol(context_map[quant_ctx], br);
+        if (q == 255) {
+          br->FillBitBuffer();
+          row_quant[bx] = decoder->ReadSymbol(context_map[quant_ctx], br) + 1;
+        } else {
+          row_quant[bx] = UnpackSigned(q) + predicted_quant;
+        }
+        for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+          for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+            row_quant[bx + iy * stride + ix] = row_quant[bx];
+          }
+        }
+      }
+    }
+  } else {
+    for (size_t by = 0; by < ysize; ++by) {
+      int32_t* PIK_RESTRICT row_quant = rect_qf.Row(quant_field, by);
+      const int32_t* PIK_RESTRICT row_hint = hint->ConstRow(by);
+      AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(rect_qf, by);
+      for (size_t bx = 0; bx < xsize; ++bx) {
+        AcStrategy acs = ac_strategy_row[bx];
+        if (!acs.IsFirstBlock()) continue;
+        int32_t predicted_quant = row_hint[bx];
+        br->FillBitBuffer();
+        int32_t quant_ctx = QuantContext();
+        size_t q = decoder->ReadSymbol(context_map[quant_ctx], br);
+        if (q == 255) {
+          br->FillBitBuffer();
+          row_quant[bx] = decoder->ReadSymbol(context_map[quant_ctx], br) + 1;
+        } else {
+          row_quant[bx] = UnpackSigned(q) + predicted_quant;
+        }
+        for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+          for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+            row_quant[bx + iy * stride + ix] = row_quant[bx];
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+void TokenizeAcStrategy(const Rect& rect, const AcStrategyImage& ac_strategy,
+                        const AcStrategyImage* hint,
+                        std::vector<Token>* PIK_RESTRICT output) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+
+  output->reserve(output->size() + xsize * ysize);
+
+  if (hint == nullptr) {
+    for (size_t by = 0; by < ysize; by++) {
+      AcStrategyRow row_src = ac_strategy.ConstRow(rect, by);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        AcStrategy ac_strategy = row_src[bx];
+        if (!ac_strategy.IsFirstBlock()) continue;
+        output->emplace_back(AcStrategyContext(), ac_strategy.RawStrategy(), 0,
+                             0);
+      }
+    }
+  } else {
+    for (size_t by = 0; by < ysize; by++) {
+      AcStrategyRow row_src = ac_strategy.ConstRow(rect, by);
+      AcStrategyRow row_hint = hint->ConstRow(rect, by);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        AcStrategy ac_strategy = row_src[bx];
+        if (!ac_strategy.IsFirstBlock()) continue;
+        uint8_t raw = ac_strategy.RawStrategy();
+        raw -= row_hint[bx].RawStrategy();
+        output->emplace_back(AcStrategyContext(), raw, 0, 0);
+      }
+    }
+  }
+}
+
+bool DecodeAcStrategy(BitReader* PIK_RESTRICT br,
+                      ANSSymbolReader* PIK_RESTRICT decoder,
+                      const std::vector<uint8_t>& context_map,
+                      ImageB* PIK_RESTRICT ac_strategy_raw, const Rect& rect,
+                      AcStrategyImage* PIK_RESTRICT ac_strategy,
+                      const AcStrategyImage* PIK_RESTRICT hint) {
+  PROFILER_FUNC;
+  const size_t ctx = context_map[AcStrategyContext()];
+
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  FillImage(AcStrategyImage::INVALID, ac_strategy_raw);
+  const size_t stride = ac_strategy_raw->PixelsPerRow();
+
+  if (hint == nullptr) {
+    for (size_t by = 0; by < ysize; by++) {
+      uint8_t* PIK_RESTRICT row_ac = ac_strategy_raw->Row(by);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        if (row_ac[bx] != AcStrategyImage::INVALID) continue;
+        br->FillBitBuffer();
+        uint8_t raw_strategy = decoder->ReadSymbol(ctx, br);
+        if (!AcStrategy::IsRawStrategyValid(raw_strategy)) {
+          return PIK_FAILURE("Invalid AC strategy");
+        }
+        AcStrategy acs = AcStrategy::FromRawStrategy(raw_strategy);
+        if (by + acs.covered_blocks_y() > ysize)
+          return PIK_FAILURE("Invalid AC strategy: y overflow");
+        if (bx + acs.covered_blocks_x() > xsize)
+          return PIK_FAILURE("Invalid AC strategy: x overflow");
+        for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+          for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+            row_ac[bx + ix + iy * stride] = raw_strategy;
+          }
+        }
+      }
+    }
+  } else {
+    for (size_t by = 0; by < ysize; by++) {
+      uint8_t* PIK_RESTRICT row_ac = ac_strategy_raw->Row(by);
+      AcStrategyRow row_hint = hint->ConstRow(by);
+      for (size_t bx = 0; bx < xsize; bx++) {
+        if (row_ac[bx] != AcStrategyImage::INVALID) continue;
+        br->FillBitBuffer();
+        uint8_t raw_strategy = decoder->ReadSymbol(ctx, br);
+        raw_strategy += row_hint[bx].RawStrategy();
+        if (!AcStrategy::IsRawStrategyValid(raw_strategy)) {
+          return PIK_FAILURE("Invalid AC strategy");
+        }
+        AcStrategy acs = AcStrategy::FromRawStrategy(raw_strategy);
+        if (by + acs.covered_blocks_y() > ysize)
+          return PIK_FAILURE("Invalid AC strategy: y overflow");
+        if (bx + acs.covered_blocks_x() > xsize)
+          return PIK_FAILURE("Invalid AC strategy: x overflow");
+        for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+          for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+            row_ac[bx + ix + iy * stride] = raw_strategy;
+          }
+        }
+      }
+    }
+  }
+  ac_strategy->SetFromRaw(rect, *ac_strategy_raw);
+  return true;
+}
+
+void TokenizeARParameters(const Rect& rect, const ImageB& ar_sigma_lut_ids,
+                          const AcStrategyImage& ac_strategy,
+                          std::vector<Token>* PIK_RESTRICT output) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  for (size_t by = 0; by < ysize; by++) {
+    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+    const uint8_t* src_row = rect.ConstRow(ar_sigma_lut_ids, by);
+    for (size_t bx = 0; bx < xsize; bx++) {
+      if (!acs_row[bx].IsFirstBlock()) continue;
+      output->emplace_back(ARParamsContext(), src_row[bx], 0, 0);
+    }
+  }
+}
+
+bool DecodeARParameters(BitReader* br, ANSSymbolReader* decoder,
+                        const std::vector<uint8_t>& context_map,
+                        const Rect& rect, const AcStrategyImage& ac_strategy,
+                        ImageB* ar_sigma_lut_ids) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  for (size_t by = 0; by < ysize; by++) {
+    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+    for (size_t bx = 0; bx < xsize; bx++) {
+      AcStrategy acs = acs_row[bx];
+      if (!acs.IsFirstBlock()) continue;
+      const size_t ctx = context_map[ARParamsContext()];
+      br->FillBitBuffer();
+      size_t value = decoder->ReadSymbol(ctx, br);
+      for (size_t dy = 0; dy < acs.covered_blocks_y(); dy++) {
+        for (size_t dx = 0; dx < acs.covered_blocks_y(); dx++) {
+          rect.Row(ar_sigma_lut_ids, by + dy)[bx + dx] = value;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool DecodeAC(const std::vector<uint8_t>& context_map,
+              const int32_t* PIK_RESTRICT coeff_order,
+              BitReader* PIK_RESTRICT br, ANSSymbolReader* decoder,
+              Image3S* PIK_RESTRICT ac, const Rect& rect,
+              Image3I* PIK_RESTRICT tmp_num_nzeroes) {
+  PROFILER_FUNC;
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+
+  const size_t xsize_blocks = rect.xsize();
+  const size_t ysize_blocks = rect.ysize();
+
+  for (int c = 0; c < 3; ++c) {
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      int16_t* PIK_RESTRICT row_ac = ac->PlaneRow(c, by);
+      int32_t* PIK_RESTRICT row_nzeros = tmp_num_nzeroes->PlaneRow(c, by);
+      const int32_t* PIK_RESTRICT row_nzeros_top =
+          (by == 0) ? nullptr : tmp_num_nzeroes->ConstPlaneRow(c, by - 1);
+
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        int16_t* PIK_RESTRICT block_ac = row_ac + bx * block_size;
+        memset(block_ac, 0, block_size * sizeof(row_ac[0]));
+        int32_t predicted_nzeros =
+            PredictFromTopAndLeft(row_nzeros_top, row_nzeros, bx, 32);
+        const size_t block_ctx = c;
+        const size_t nzero_ctx = NonZeroContext(predicted_nzeros, block_ctx);
+        br->FillBitBuffer();
+        row_nzeros[bx] = decoder->ReadSymbol(context_map[nzero_ctx], br);
+        size_t num_nzeros = row_nzeros[bx];
+        if (num_nzeros > block_size) {
+          return PIK_FAILURE("Invalid AC: nzeros too large");
+        }
+        if (num_nzeros == 0) continue;
+        const int histo_offset = ZeroDensityContextsOffset(block_ctx);
+        const size_t order_offset = block_ctx * block_size;
+        const int* PIK_RESTRICT block_order = &coeff_order[order_offset];
+        PIK_ASSERT(block_ctx < kOrderContexts);
+        for (size_t k = 1; k < block_size && num_nzeros > 0; ++k) {
+          int context = histo_offset + ZeroDensityContext(num_nzeros, k - 1);
+          br->FillBitBuffer();
+          int symbol = decoder->ReadSymbol(context_map[context], br);
+          int nbits = kBitsLut[symbol];
+          int skip = kSkipLut[symbol];
+          k += skip;
+          if (nbits == 0) {
+            // NB: currently format does not prohibit unoptimal code.
+            // PIK_ASSERT(skip == 15);
+            continue;
+          }
+          if (PIK_UNLIKELY(k + num_nzeros > block_size)) {
+            return PIK_FAILURE("Invalid AC data.");
+          }
+          int32_t bits = br->PeekBits(nbits);
+          br->Advance(nbits);
+          int32_t coeff = DecodeVarLenInt(nbits, bits);
+          --num_nzeros;
+          block_ac[block_order[k]] = coeff;
+        }
+        if (num_nzeros != 0) {
+          return PIK_FAILURE("Invalid AC: nzeros not 0.");
+        }
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/entropy_coder.h b/codec/L2/demos/pikEnc/host/pik/entropy_coder.h
new file mode 100755
index 0000000000..80b8b3208a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/entropy_coder.h
@@ -0,0 +1,457 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ENTROPY_CODER_H_
+#define PIK_ENTROPY_CODER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "pik/ac_strategy.h"
+#include "pik/ans_decode.h"
+#include "pik/ans_encode.h"
+#include "pik/bit_reader.h"
+#include "pik/cluster.h"
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/context_map_encode.h"
+#include "pik/fast_log.h"
+#include "pik/image.h"
+#include "pik/lehmer_code.h"
+#include "pik/pik_info.h"
+#include "pik/status.h"
+
+// Entropy coding and context modeling of DC and AC coefficients, as well as AC
+// strategy and quantization field.
+
+namespace pik {
+
+/* Python snippet to generate the zig-zag sequence:
+N = 8
+out, lut = [0] * (N * N), [0] * (N * N)
+x, y, d = 0, 0, 1
+for i in range(N * N // 2):
+  out[i], out[N * N - 1 - i] = x + y * N, N * N - 1 - x - y * N
+  x, y = x + d, y - d
+  if y < 0: y, d = 0, -d
+  if x < 0: x, d = 0, -d
+for i in range(N * N): lut[out[i]] = i
+print("Order: " + str(out) + "\nLut: " + str(lut))
+*/
+// "Natural order" means the order of increasing of "anisotropic" frequency of
+// continuous version of DCT basis.
+// Surprisingly, frequency along the (i + j == const) diagonals is roughly the
+// same. For historical reasons, consequent diagonals are traversed
+// in alternating directions - so called "zig-zag" (or "snake") order.
+// Round-trip:
+//  X = kNaturalCoeffOrderN[kNaturalCoeffOrderLutN[X]]
+//  X = kNaturalCoeffOrderLutN[kNaturalCoeffOrderN[X]]
+constexpr int32_t kNaturalCoeffOrder8[8 * 8] = {
+    0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+
+constexpr int32_t kNaturalCoeffOrderLut8[8 * 8] = {
+    0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42,
+    3,  8,  12, 17, 25, 30, 41, 43, 9,  11, 18, 24, 31, 40, 44, 53,
+    10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
+    21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63};
+
+constexpr const int32_t* NaturalCoeffOrder() { return kNaturalCoeffOrder8; }
+
+constexpr const int32_t* NaturalCoeffOrderLut() {
+  return kNaturalCoeffOrderLut8;
+}
+
+// Block context used for scanning order, number of non-zeros, AC coefficients.
+// Equal to the channel.
+constexpr uint32_t kDCTOrderContextStart = 0;
+constexpr uint32_t kOrderContexts = 3;
+
+// Quantizer values are in range [1..256]. To reduce the total number of
+// contexts, the values are shifted and combined in pairs,
+// i.e. 1..256 -> 0..127.
+constexpr uint32_t kQuantFieldContexts = 1;
+
+// AC strategy contexts.
+constexpr uint32_t kAcStrategyContexts = 1;
+
+// AR parameter contexts
+constexpr uint32_t kARParamsContexts = 1;
+
+// Total number of order-free contextes.
+constexpr uint32_t kNumControlFieldContexts =
+    kQuantFieldContexts + kAcStrategyContexts + kARParamsContexts;
+
+// For DCT 8x8 there could be up to 63 non-zero AC coefficients (and one DC
+// coefficient). To reduce the total number of contexts,
+// the values are combined in pairs, i.e. 0..63 -> 0..31.
+constexpr uint32_t kNonZeroBuckets = 32;
+
+// TODO(user): find better clustering for PIK use case.
+static const uint8_t kCoeffFreqContext[64] = {
+    0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8,  8,  8,
+    9,  9,  9,  9,  10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12,
+    13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+};
+
+// TODO(user): find better clustering for PIK use case.
+static const uint16_t kCoeffNumNonzeroContext[65] = {
+    0xBAD, 0,  0,  16, 16, 16, 32, 32, 32, 32, 48, 48, 48, 48, 48, 48, 64,
+    64,    64, 64, 64, 64, 64, 64, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
+    79,    79, 79, 79, 79, 79, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93,
+    93,    93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93};
+
+// Supremum of ZeroDensityContext(x, y) + 1.
+constexpr int kZeroDensityContextCount = 105;
+
+/* This function is used for entropy-sources pre-clustering.
+ *
+ * Ideally, each combination of |nonzeros_left| and |k| should go to its own
+ * bucket; but it implies (64 * 63 / 2) == 2016 buckets. If there is other
+ * dimension (e.g. block context), then number of primary clusters becomes too
+ * big.
+ *
+ * To solve this problem, |nonzeros_left| and |k| values are clustered. It is
+ * known that their sum is at most 64, consequently, the total number buckets
+ * is at most A(64) * B(64).
+ *
+ * |bits| controls the granularity of pre-clustering. When |bits| is 0, all |k|
+ * values are put together. When |bits| is 6, then all 64 |k| values go to
+ * different buckets.
+ *
+ * Also see the test code, where more compact presentation is expanded into
+ * those lookup tables.
+ */
+// TODO(user): investigate, why disabling pre-clustering makes entropy code
+// less dense. Perhaps we would need to add HQ clustering algorithm that would
+// be able to squeeze better by spending more CPU cycles.
+inline int ZeroDensityContext(int nonzeros_left, int k) {
+  PIK_ASSERT(nonzeros_left > 0);
+  PIK_ASSERT(nonzeros_left + k < 65);
+  return kCoeffNumNonzeroContext[nonzeros_left] + kCoeffFreqContext[k];
+}
+
+// Context map for AC coefficients consists of 2 blocks:
+//  |kOrderContexts x          : context for number of non-zeros in the block
+//   kNonZeroBuckets|            computed from block context and predicted value
+//                               (based top and left values)
+//  |kOrderContexts x          : context for AC coefficient symbols,
+//   kZeroDensityContextCount|   computed from block context,
+//                               number of non-zeros left and
+//                               index in scan order
+constexpr uint32_t kNumContexts = (kOrderContexts * kNonZeroBuckets) +
+                                  (kOrderContexts * kZeroDensityContextCount);
+
+constexpr uint32_t AcStrategyContext() { return 0; }
+
+constexpr uint32_t ARParamsContext() { return kAcStrategyContexts; }
+
+constexpr uint32_t QuantContext() {
+  return kAcStrategyContexts + kARParamsContexts;
+}
+
+// Non-zero context is based on number of non-zeros and block context.
+// For better clustering, contexts with same number of non-zeros are grouped.
+constexpr uint32_t NonZeroContext(uint32_t non_zeros, uint32_t block_ctx) {
+  return kOrderContexts * (non_zeros >> 1) + block_ctx;
+}
+
+// Non-zero context is based on number of non-zeros and block context.
+// For better clustering, contexts with same number of non-zeros are grouped.
+constexpr uint32_t ZeroDensityContextsOffset(uint32_t block_ctx) {
+  return kOrderContexts * kNonZeroBuckets +
+         kZeroDensityContextCount * block_ctx;
+}
+
+// Predicts |rect_dc| (typically a "group" of DC values, or less on the borders)
+// within |dc| and stores residuals in |tmp_residuals| starting at 0,0.
+void ShrinkDC(const Rect& rect_dc, const Image3S& dc,
+              Image3S* PIK_RESTRICT tmp_residuals);
+
+// Reconstructs |rect_dc| within |dc|: replaces these prediction residuals
+// (generated by ShrinkDC) with reconstructed DC values. All images are at least
+// rect.xsize * ysize (2*xsize for xz); tmp_* start at 0,0. Must be called with
+// (one of) the same rect arguments passed to ShrinkDC.
+void ExpandDC(const Rect& rect_dc, Image3S* PIK_RESTRICT dc,
+              ImageS* PIK_RESTRICT tmp_y, ImageS* PIK_RESTRICT tmp_xz_residuals,
+              ImageS* PIK_RESTRICT tmp_xz_expanded);
+
+// Modify zig-zag order, so that DCT bands with more zeros go later.
+// Order of DCT bands with same number of zeros is untouched, so
+// permutation will be cheaper to encode.
+void ComputeCoeffOrder(const Image3S& ac, const Rect& rect,
+                       int32_t* PIK_RESTRICT order);
+
+std::string EncodeCoeffOrders(const int32_t* PIK_RESTRICT order,
+                              PikInfo* PIK_RESTRICT pik_info);
+
+// Encodes the `rect` area of `img`.
+// Typically used for DC.
+// See also DecodeImageData.
+std::string EncodeImageData(const Rect& rect, const Image3S& img,
+                            PikImageSizeInfo* info);
+
+// See also EncodeImageData.
+bool DecodeImageData(BitReader* PIK_RESTRICT br,
+                     const std::vector<uint8_t>& context_map,
+                     ANSSymbolReader* PIK_RESTRICT decoder, const Rect& rect,
+                     Image3S* PIK_RESTRICT img);
+
+// Decodes into "rect" within "img". Calls DecodeImageData.
+bool DecodeImage(BitReader* PIK_RESTRICT br, const Rect& rect,
+                 Image3S* PIK_RESTRICT img);
+
+// Token to be encoded by the ANS. Uses context c (16 bits), writing symbol s
+// (8 bits), and adds up to 32 (nb) extra bits (b) that are interleaved in the
+// ANS stream.
+struct Token {
+  Token(uint32_t c, uint32_t s, uint32_t nb, uint32_t b)
+      : bits(b), context(c), nbits(nb), symbol(s) {
+#ifdef ADDRESS_SANITIZER
+    PIK_ASSERT(c < (1UL << 16));
+#endif
+    static_assert(sizeof(Token) == 8, "Token must be a 8 byte struct!");
+  }
+  uint32_t bits;
+  uint16_t context;
+  uint8_t nbits;
+  uint8_t symbol;
+};
+
+// Generate AC strategy tokens.
+// Only the subset "rect" [in units of blocks] within all images.
+// Appends one token per pixel to output.
+// See also DecodeAcStrategy.
+void TokenizeAcStrategy(const Rect& rect, const AcStrategyImage& ac_strategy,
+                        const AcStrategyImage* hint,
+                        std::vector<Token>* PIK_RESTRICT output);
+
+// Generate quantization field tokens.
+// Only the subset "rect" [in units of blocks] within all images.
+// Appends one token per pixel to output.
+// TODO(user): quant field seems to be useful for all the AC strategies.
+// perhaps, we could just have different quant_ctx based on the block type.
+// See also DecodeQuantField.
+void TokenizeQuantField(const Rect& rect, const ImageI& quant_field,
+                        const ImageI* hint, const AcStrategyImage& ac_strategy,
+                        std::vector<Token>* PIK_RESTRICT output);
+
+// Generate DCT NxN quantized AC values tokens.
+// Only the subset "rect" [in units of blocks] within all images.
+// Warning: uses the DC coefficients in "coeffs"!
+// See also DecodeCoefficients.
+void TokenizeCoefficients(const int32_t* orders, const Rect& rect,
+                          const Image3S& coeffs,
+                          std::vector<Token>* PIK_RESTRICT output);
+
+// Decode AC strategy. The `rect` argument does *not* apply to the hint!
+// See also TokenizeAcStrategy.
+bool DecodeAcStrategy(BitReader* PIK_RESTRICT br,
+                      ANSSymbolReader* PIK_RESTRICT decoder,
+                      const std::vector<uint8_t>& context_map,
+                      ImageB* PIK_RESTRICT ac_strategy_raw, const Rect& rect,
+                      AcStrategyImage* PIK_RESTRICT ac_strategy,
+                      const AcStrategyImage* PIK_RESTRICT hint);
+
+void TokenizeARParameters(const Rect& rect, const ImageB& ar_sigma_lut_ids,
+                          const AcStrategyImage& ac_strategy,
+                          std::vector<Token>* PIK_RESTRICT output);
+bool DecodeARParameters(BitReader* br, ANSSymbolReader* decoder,
+                        const std::vector<uint8_t>& context_map,
+                        const Rect& rect, const AcStrategyImage& ac_strategy,
+                        ImageB* ar_sigma_lut_ids);
+
+// Apply context clustering, compute histograms and encode them.
+std::string BuildAndEncodeHistograms(
+    size_t num_contexts, const std::vector<std::vector<Token> >& tokens,
+    std::vector<ANSEncodingData>* codes, std::vector<uint8_t>* context_map,
+    PikImageSizeInfo* info);
+
+// Same as BuildAndEncodeHistograms, but with static context clustering.
+std::string BuildAndEncodeHistogramsFast(
+    const std::vector<std::vector<Token> >& tokens,
+    std::vector<ANSEncodingData>* codes, std::vector<uint8_t>* context_map,
+    PikImageSizeInfo* info);
+
+// Write the tokens to a string.
+std::string WriteTokens(const std::vector<Token>& tokens,
+                        const std::vector<ANSEncodingData>& codes,
+                        const std::vector<uint8_t>& context_map,
+                        PikImageSizeInfo* pik_info);
+
+bool DecodeCoeffOrder(int32_t* order, BitReader* br);
+
+bool DecodeHistograms(BitReader* br, const size_t num_contexts,
+                      const size_t max_alphabet_size, ANSCode* code,
+                      std::vector<uint8_t>* context_map);
+
+// See TokenizeQuantField.
+bool DecodeQuantField(BitReader* PIK_RESTRICT br,
+                      ANSSymbolReader* PIK_RESTRICT decoder,
+                      const std::vector<uint8_t>& context_map,
+                      const Rect& rect_qf,
+                      const AcStrategyImage& PIK_RESTRICT ac_strategy,
+                      ImageI* PIK_RESTRICT quant_field,
+                      const ImageI* PIK_RESTRICT hint);
+
+// Decode DCT NxN quantized AC values.
+// DC component in ac's DCT blocks is invalid.
+// Decodes to ac; `rect` is used only for size information.
+bool DecodeAC(const std::vector<uint8_t>& context_map,
+              const int32_t* PIK_RESTRICT coeff_order,
+              BitReader* PIK_RESTRICT br, ANSSymbolReader* decoder,
+              Image3S* PIK_RESTRICT ac, const Rect& rect,
+              Image3I* PIK_RESTRICT tmp_num_nzeroes);
+
+// Encodes non-negative (X) into (2 * X), negative (-X) into (2 * X - 1)
+constexpr uint32_t PackSigned(int32_t value) {
+  return ((uint32_t)value << 1) ^ (((uint32_t)(~value) >> 31) - 1);
+}
+
+// Reverse to PackSigned, i.e. UnpackSigned(PackSigned(X)) == X.
+constexpr int32_t UnpackSigned(uint32_t value) {
+  return (int32_t)((value >> 1) ^ (((~value) & 1) - 1));
+}
+
+// Encode non-negative integer as a pair (N, bits), where len(bits) == N.
+// 0 is encoded as (0, ''); X from range [2**N - 1, 2 * (2**N - 1)]
+// is encoded as (N, X + 1 - 2**N). In detail:
+// 0 -> (0, '')
+// 1 -> (1, '0')
+// 2 -> (1, '1')
+// 3 -> (2, '00')
+// 4 -> (2, '01')
+// 5 -> (2, '10')
+// 6 -> (2, '11')
+// 7 -> (3, '000')
+// ...
+// 65535 -> (16, '0000000000000000')
+static PIK_INLINE void EncodeVarLenUint(uint32_t value, int* PIK_RESTRICT nbits,
+                                        int* PIK_RESTRICT bits) {
+  if (value == 0) {
+    *nbits = 0;
+    *bits = 0;
+  } else {
+    int len = Log2FloorNonZero(value + 1);
+    *nbits = len;
+    *bits = (value + 1) & ((1 << len) - 1);
+  }
+}
+
+// Decode variable length non-negative value. Reverse to EncodeVarLenUint.
+constexpr uint32_t DecodeVarLenUint(int nbits, int bits) {
+  return (1u << nbits) + bits - 1;
+}
+
+// Experiments show that best performance is typically achieved for a
+// split-exponent of 3 or 4. Trend seems to be that '4' is better
+// for large-ish pictures, and '3' better for rather small-ish pictures.
+// This is plausible - the more special symbols we have, the better
+// statistics we need to get a benefit out of them.
+constexpr uint32_t kHybridEncodingDirectSplitExponent = 4;
+// constexpr uint32_t kHybridEncodingDirectSplitExponent = 3;
+constexpr uint32_t kHybridEncodingSplitToken =
+    1u << kHybridEncodingDirectSplitExponent;
+
+// Alternative encoding scheme for unsigned integers,
+// expected work better with entropy coding.
+// Numbers N in [0 .. kHybridEncodingSplitToken-1]:
+//   These get represented as (token=N, bits='').
+// Numbers N >= kHybridEncodingSplitToken:
+//   If n is such that 2**n <= N < 2**(n+1),
+//   and m = N - 2**n is the 'mantissa',
+//   these get represented as:
+// (token=kHybridEncodingSplitToken +
+//        ((n - kHybridEncodingDirectSplitExponent) * 2) +
+//        (m >> (n - 1)),
+//  bits=m & (1 << (n - 1)) - 1)
+// Specifically, for kHybridEncodingDirectSplitExponent = 4, i.e.
+// kHybridEncodingSplitToken=16, we would get:
+// N = 0 - 15: (token=N, nbits=0, bits='')
+// N = 16:     (token=16, nbits=3, bits='000')
+// N = 17:     (token=16, nbits=3, bits='001')
+// N = 23:     (token=16, nbits=3, bits='111')
+// N = 24:     (token=17, nbits=3, bits='000')
+// N = 25:     (token=17, nbits=3, bits='001')
+// N = 31:     (token=17, nbits=3, bits='111')
+// N = 32:     (token=18, nbits=4, bits='0000')
+// N=65535:    (token=39, nbits=14, bits='11111111111111')
+static PIK_INLINE void EncodeHybridVarLenUint(uint32_t value,
+                                              int* PIK_RESTRICT token,
+                                              int* PIK_RESTRICT nbits,
+                                              int* PIK_RESTRICT bits) {
+  if (value < kHybridEncodingSplitToken) {
+    *token = value;
+    *nbits = 0;
+    *bits = 0;
+  } else {
+    uint32_t n = Log2FloorNonZero(value);
+    uint32_t m = value - (1 << n);
+    *token = kHybridEncodingSplitToken + (
+        (n - kHybridEncodingDirectSplitExponent) << 1) + (m >> (n - 1));
+    *nbits = n - 1;
+    *bits = value & ((1 << (n - 1)) - 1);
+  }
+}
+
+static PIK_INLINE uint32_t HybridEncodingTokenNumBits(int token) {
+  if (token < kHybridEncodingSplitToken) {
+    return 0;
+  }
+  return kHybridEncodingDirectSplitExponent - 1 + (
+      (token - kHybridEncodingSplitToken) >> 1);
+}
+
+
+// Decode variable length non-negative value. Reverse to EncodeHybridVarLenUint.
+static PIK_INLINE uint32_t DecodeHybridVarLenUint(int token, int bits) {
+  if (token < kHybridEncodingSplitToken) {
+    return token;
+  }
+  uint32_t n = kHybridEncodingDirectSplitExponent +
+               ((token - kHybridEncodingSplitToken) >> 1);
+  return (1 << n) + ((token & 1) << (n - 1)) + bits;
+}
+
+// Pack signed integer and encode value.
+static PIK_INLINE void EncodeVarLenInt(int32_t value, int* PIK_RESTRICT nbits,
+                                       int* PIK_RESTRICT bits) {
+  EncodeVarLenUint(PackSigned(value), nbits, bits);
+}
+
+// Decode value and unpack signed integer.
+constexpr int32_t DecodeVarLenInt(int nbits, int bits) {
+  return UnpackSigned(DecodeVarLenUint(nbits, bits));
+}
+
+// Pack signed integer and encode value.
+static PIK_INLINE void EncodeHybridVarLenInt(int32_t value,
+                                             int* PIK_RESTRICT token,
+                                             int* PIK_RESTRICT nbits,
+                                             int* PIK_RESTRICT bits) {
+  EncodeHybridVarLenUint(PackSigned(value), token, nbits, bits);
+}
+
+// Decode value and unpack signed integer, using Hybrid-Varint encoding.
+static PIK_INLINE int32_t DecodeHybridVarLenInt(int token, int bits) {
+  return UnpackSigned(DecodeHybridVarLenUint(token, bits));
+}
+
+
+}  // namespace pik
+
+#endif  // PIK_ENTROPY_CODER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/epf.cc b/codec/L2/demos/pikEnc/host/pik/epf.cc
new file mode 100755
index 0000000000..3f438cc3dc
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/epf.cc
@@ -0,0 +1,58 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/epf.h"
+
+// Edge-preserving smoothing: 7x8 weighted average based on L1 patch similarity.
+
+#include <float.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <numeric>  // std::accumulate
+
+#ifndef EPF_DUMP_SIGMA
+#define EPF_DUMP_SIGMA 0
+#endif
+#ifndef EPF_ENABLE_STATS
+#define EPF_ENABLE_STATS 0
+#endif
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/ac_strategy.h"
+#include "pik/common.h"
+#include "pik/descriptive_statistics.h"
+#include "pik/fields.h"
+#include "pik/profiler.h"
+#include "pik/simd/simd.h"
+#include "pik/status.h"
+#if EPF_DUMP_SIGMA
+#include "pik/image_io.h"
+#endif
+
+#if 1
+#define EPF_ASSERT(condition)                           \
+  while (!(condition)) {                                \
+    printf("EPF assert failed at line %d\n", __LINE__); \
+    exit(1);                                            \
+  }
+
+#else
+#define EPF_ASSERT(condition)
+#endif
+
+namespace pik {
+
+EpfParams::EpfParams() { Bundle::Init(this); }
+
+}  // namespace pik
+
+// Must include "normally" so the build system understands the dependency.
+#include "pik/epf_target.cc"
+
+#define SIMD_ATTR_IMPL "pik/epf_target.cc"
+#include "pik/simd/foreach_target.h"
diff --git a/codec/L2/demos/pikEnc/host/pik/epf.h b/codec/L2/demos/pikEnc/host/pik/epf.h
new file mode 100755
index 0000000000..2c82d74a7e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/epf.h
@@ -0,0 +1,97 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_EPF_H_
+#define PIK_EPF_H_
+
+// Fast SIMD edge preserving filter (adaptive, nonlinear).
+
+#include <stdio.h>
+#include "pik/ac_strategy.h"
+#include "pik/field_encodings.h"
+#include "pik/image.h"
+
+namespace pik {
+
+struct EpfParams {
+  EpfParams();
+  static const char* Name() { return "EpfParams"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    visitor->Bool(false/*true*/, &enable_adaptive);
+    if (visitor->Conditional(!enable_adaptive)) {
+      visitor->U32(0x0A090880, 0, &sigma);
+    }
+    visitor->Bool(false, &use_sharpened);
+    return true;
+  }
+
+  // If false, use hardcoded sigma for each block.
+  bool enable_adaptive;
+
+  // Only if !enable_adaptive:
+  uint32_t sigma;  // ignored if !enable_adaptive, otherwise >= kMinSigma.
+
+  bool use_sharpened;
+};
+
+// Unit test. Call via dispatch::ForeachTarget.
+struct EdgePreservingFilterTest {
+  template <class Target>
+  void operator()() const;
+
+  // Returns weight given sigma and SAD.
+  template <class Target>
+  float operator()(int sigma, int sad) const;
+};
+
+// Must be called before EdgePreservingFilter, with the same Target.
+struct InitEdgePreservingFilter {
+  template <class Target>
+  void operator()() const;
+};
+
+// Adaptive smoothing based on quantization intervals. "sigma" must be in
+// [kMinSigma, kMaxSigma]. Fills each pixel of "smoothed", which must be
+// pre-allocated. Call via Dispatch.
+struct EdgePreservingFilter {
+  // The "sigma" parameter is the SCALED half-width at half-maximum, i.e. the
+  // SAD value for which the weight is 0.5, times the scaling factor of
+  // 1 << kSigmaShift. Before scaling, sigma is about 1.2 times the standard
+  // deviation of a normal distribution. Larger values cause more smoothing.
+
+  // All sigma values are pre-shifted by this value to increase their
+  // resolution. This allows adaptive sigma to compute "5.5" (represented as 22)
+  // without an additional floating-point multiplication.
+  static constexpr int kSigmaShift = 2;
+
+  // This is the smallest value that avoids 16-bit overflow (see kShiftSAD); it
+  // corresponds to 1/3 of patch pixels having the minimum integer SAD of 1.
+  static constexpr int kMinSigma = 4 << kSigmaShift;
+  // Somewhat arbitrary; determines size of a lookup table.
+  static constexpr int kMaxSigma = 168 << kSigmaShift;  // 14 per patch pixel
+
+  // For each block, compute adaptive sigma.
+  template <class Target>
+  void operator()(const Image3F& in_guide, const Image3F& in,
+                  const ImageI* ac_quant, float quant_scale,
+                  const ImageB& lut_ids, const AcStrategyImage& ac_strategy,
+                  const EpfParams& epf_params,
+                  Image3F* smoothed, EpfStats* epf_stats) const;
+
+  // Fixed sigma in [kMinSigma, kMaxSigma] for generating training data;
+  // sigma == 0 skips filtering and copies "in" to "smoothed".
+  // "stretch" is returned for use by AdaptiveReconstructionAux.
+  template <class Target>
+  void operator()(const Image3F& in_guide, const Image3F& in,
+                  const EpfParams& params, float* PIK_RESTRICT stretch,
+                  Image3F* smoothed) const;
+};
+
+}  // namespace pik
+
+#endif  // PIK_EPF_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/epf_stats.h b/codec/L2/demos/pikEnc/host/pik/epf_stats.h
new file mode 100755
index 0000000000..7549a82595
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/epf_stats.h
@@ -0,0 +1,60 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_EPF_STATS_H_
+#define PIK_EPF_STATS_H_
+
+// Statistics/debug info for epf.h.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "pik/descriptive_statistics.h"
+
+namespace pik {
+
+// Per-thread.
+struct EpfStats {
+  void Assimilate(const EpfStats& other) {
+    total += other.total;
+    skipped += other.skipped;
+    less += other.less;
+    greater += other.greater;
+
+    for (int c = 0; c < 3; ++c) {
+      s_ranges[c].Assimilate(other.s_ranges[c]);
+    }
+    s_quant.Assimilate(other.s_quant);
+    s_sigma.Assimilate(other.s_sigma);
+  }
+
+  void Print() const {
+    const int stats = Stats::kNoSkewKurt + Stats::kNoGeomean;
+    printf(
+        "EPF total blocks: %zu; skipped: %zu (%f%%); outside %zu|%zu (%f%%)\n"
+        "ranges: %s\n        %s\n        %s\nquant: %s\nsigma: %s\n",
+        total, skipped, 100.0 * skipped / total, less, greater,
+        100.0 * (less + greater) / total, s_ranges[0].ToString(stats).c_str(),
+        s_ranges[1].ToString(stats).c_str(),
+        s_ranges[2].ToString(stats).c_str(), s_quant.ToString(stats).c_str(),
+        s_sigma.ToString(stats).c_str());
+  }
+
+  // # blocks
+  size_t total = 0;
+  size_t skipped = 0;  // sigma == 0 => no filter
+  // Outside LUT range:
+  size_t less = 0;
+  size_t greater = 0;
+
+  Stats s_ranges[3];
+  Stats s_quant;
+  Stats s_sigma;
+};
+
+}  // namespace pik
+
+#endif  // PIK_EPF_STATS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/epf_target.cc b/codec/L2/demos/pikEnc/host/pik/epf_target.cc
new file mode 100755
index 0000000000..0f44b9fb15
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/epf_target.cc
@@ -0,0 +1,1428 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// Empty if not included by foreach_target.
+#ifdef SIMD_ATTR_IMPL
+
+#ifndef EPF_NEW_SIGMA
+#define EPF_NEW_SIGMA 0
+#endif
+#ifndef EPF_INDEP_RANGE
+#define EPF_INDEP_RANGE 0
+#endif
+
+namespace pik {
+namespace SIMD_NAMESPACE {
+namespace {
+
+using D16 = SIMD_FULL(int16_t);
+using DF = SIMD_FULL(float);
+const D16 d16;
+const DF df;
+using V16 = D16::V;
+using VF = DF::V;
+
+// Number of extra pixels on the top/bottom/left/right edges of the "guide" and
+// "in" images relative to "out".
+static constexpr int kBorder = 6;  // = Quad radius(2) + reference radius(4)
+
+static constexpr int kSigmaShift = EdgePreservingFilter::kSigmaShift;
+static constexpr int kMinSigma = EdgePreservingFilter::kMinSigma;
+static constexpr int kMaxSigma = EdgePreservingFilter::kMaxSigma;
+
+static constexpr float kFlushWeightToZeroIfBelow = 0.05f;
+
+//------------------------------------------------------------------------------
+// Distance: sum of absolute differences on patches
+
+class Distance {
+ public:
+  // "Patches" are 3x4 areas with top-left pixel northwest of the reference
+  // pixel or its 7x8 neighbors. The 4-pixel width ("quad") is dictated by
+  // MPSADBW.
+  static constexpr int kPatchArea = 4 * 3;
+
+  static constexpr size_t kNeighbors = 7 * 8;
+
+  // Maximum possible sum of 8-bit differences, used in tests.
+  static constexpr int kMaxSAD = kPatchArea * 255;  // = 3060
+
+  static SIMD_ATTR SIMD_INLINE void SumsOfAbsoluteDifferences(
+      const uint8_t* SIMD_RESTRICT guide_m4, const size_t guide_stride,
+      int16_t* SIMD_RESTRICT sad) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    // 7x8 reference pixels (total search window: 9x11)
+    // 56 * 12 * 3 = 2016 ops per pixel, counting abs as one op.
+    for (int cy = -3; cy <= 3; ++cy) {
+      for (int cx = -3; cx <= 4; ++cx) {
+        int sad_sum = 0;
+        // 3x4 patch
+        for (int iy = -1; iy <= 1; ++iy) {
+          const uint8_t* row_ref = guide_m4 + (iy + 4) * guide_stride;
+          const uint8_t* row_wnd = guide_m4 + (cy + iy + 4) * guide_stride;
+          for (int ix = -1; ix <= 2; ++ix) {
+            sad_sum += std::abs(row_ref[ix] - row_wnd[cx + ix]);
+          }
+        }
+
+        sad[(cy + 3) * 8 + cx + 3] = static_cast<int16_t>(sad_sum);
+      }
+    }
+#elif SIMD_TARGET_VALUE != SIMD_AVX2
+    const SIMD_PART(uint8_t, 16) d8;
+    const SIMD_PART(int16_t, 8) d16;
+    const SIMD_PART(uint32_t, 4) d32;
+    const SIMD_PART(uint64_t, 2) d64;
+
+    // Offset to the leftmost pixel of the search window.
+    const int kWindow = -4;  // Starts at row0
+
+    const uint8_t* SIMD_RESTRICT row0 = guide_m4;
+    const uint8_t* SIMD_RESTRICT row1 = guide_m4 + 1 * guide_stride;
+    const uint8_t* SIMD_RESTRICT row2 = guide_m4 + 2 * guide_stride;
+    const uint8_t* SIMD_RESTRICT row3 = guide_m4 + 3 * guide_stride;
+    const uint8_t* SIMD_RESTRICT row4 = guide_m4 + 4 * guide_stride;
+    const uint8_t* SIMD_RESTRICT row5 = guide_m4 + 5 * guide_stride;
+    const uint8_t* SIMD_RESTRICT row6 = guide_m4 + 6 * guide_stride;
+    const uint8_t* SIMD_RESTRICT row7 = guide_m4 + 7 * guide_stride;
+    const uint8_t* SIMD_RESTRICT row8 = guide_m4 + 8 * guide_stride;
+
+    const uint8_t* ref_pos_t = row3 - 1;
+
+    // "ref" := one four-byte quad from three rows (t/m/b = top/middle/bottom),
+    // assembled into 128 bits.
+    // Gather would be faster on SKX, but on HSW we reduce port 5 pressure by
+    // loading m and b MINUS 4 and 8 bytes to shift those quads upwards.
+    // This is safe because we're only shifting m and b => there are valid
+    // pixels to load from the previous row. x = don't care/ignored.
+    const auto ref_xxT = load_dup128(d8, ref_pos_t);
+    const auto ref_xMx = load_dup128(d8, ref_pos_t + guide_stride - 4);
+    const auto ref_Bxx = load_dup128(d8, ref_pos_t + 2 * guide_stride - 8);
+
+    // 3 patch rows x 7 window rows (m3 to p3) = 21x 128-bit SAD.
+    const auto wnd_p2 = load_unaligned(d8, row6 + kWindow);
+    const auto wnd_p3 = load_unaligned(d8, row7 + kWindow);
+    const auto wnd_p4 = load_unaligned(d8, row8 + kWindow);
+
+    const auto ref_xMT =
+        cast_to(d8, odd_even(cast_to(d32, ref_xMx), cast_to(d32, ref_xxT)));
+    const auto ref =
+        cast_to(d8, odd_even(cast_to(d64, ref_Bxx), cast_to(d64, ref_xMT)));
+
+    // MPSADBW is 3 uops (p0 + 2p5) and 6 bytes.
+    auto sad_6t = ext::mpsadbw<0>(wnd_p2, ref);
+    const auto wnd_p0 = load_unaligned(d8, row4 + kWindow);
+    const auto wnd_p1 = load_unaligned(d8, row5 + kWindow);
+
+    const auto sad_6m = ext::mpsadbw<1>(wnd_p3, ref);
+    const auto wnd_m2 = load_unaligned(d8, row2 + kWindow);
+
+    const auto sad_6b = ext::mpsadbw<2>(wnd_p4, ref);
+    // Begin adding together the SAD results from each of the t/m/b rows.
+    sad_6t += sad_6m;
+    const auto wnd_m1 = load_unaligned(d8, row3 + kWindow);
+
+    auto sad_5t = ext::mpsadbw<0>(wnd_p1, ref);
+    auto sad_4m = ext::mpsadbw<1>(wnd_p1, ref);
+    sad_6t += sad_6b;
+    const auto wnd_m4 = load_unaligned(d8, row0 + kWindow);
+
+    const auto sad_5m = ext::mpsadbw<1>(wnd_p2, ref);
+    const auto sad_4b = ext::mpsadbw<2>(wnd_p2, ref);
+
+    const auto sad_5b = ext::mpsadbw<2>(wnd_p3, ref);
+    const auto sad_4t = ext::mpsadbw<0>(wnd_p0, ref);
+
+    auto sad_3t = ext::mpsadbw<0>(wnd_m1, ref);
+    auto sad_2m = ext::mpsadbw<1>(wnd_m1, ref);
+    sad_5t += sad_5m;
+    sad_4m += sad_4b;
+    const auto wnd_m3 = load_unaligned(d8, row1 + kWindow);
+
+    const auto sad_3m = ext::mpsadbw<1>(wnd_p0, ref);
+    const auto sad_2b = ext::mpsadbw<2>(wnd_p0, ref);
+    sad_5t += sad_5b;
+    sad_4m += sad_4t;
+
+    const auto sad_3b = ext::mpsadbw<2>(wnd_p1, ref);
+    const auto sad_2t = ext::mpsadbw<0>(wnd_m2, ref);
+
+    auto sad_1b = ext::mpsadbw<2>(wnd_m1, ref);
+    auto sad_0t = ext::mpsadbw<0>(wnd_m4, ref);
+    sad_3t += sad_3m;
+    sad_2m += sad_2b;
+
+    const auto sad_1t = ext::mpsadbw<0>(wnd_m3, ref);
+    const auto sad_0m = ext::mpsadbw<1>(wnd_m3, ref);
+    sad_3t += sad_3b;
+    sad_2m += sad_2t;
+
+    const auto sad_1m = ext::mpsadbw<1>(wnd_m2, ref);
+    const auto sad_0b = ext::mpsadbw<2>(wnd_m2, ref);
+
+    sad_1b += sad_1t;
+    sad_0t += sad_0m;
+    sad_1b += sad_1m;
+    sad_0t += sad_0b;
+
+    store(sad_0t, d16, sad + 0 * d16.N);
+    store(sad_1b, d16, sad + 1 * d16.N);
+    store(sad_2m, d16, sad + 2 * d16.N);
+    store(sad_3t, d16, sad + 3 * d16.N);
+    store(sad_4m, d16, sad + 4 * d16.N);
+    store(sad_5t, d16, sad + 5 * d16.N);
+    store(sad_6t, d16, sad + 6 * d16.N);
+#else   // AVX2
+    const SIMD_FULL(uint8_t) d8;
+    const SIMD_FULL(uint32_t) d32;
+    const SIMD_FULL(uint64_t) d64;
+
+    // Leftmost pixel of the search window and reference patch.
+    const uint8_t* SIMD_RESTRICT wnd_pos_m4 = guide_m4 - 4;
+
+    const uint8_t* SIMD_RESTRICT ref_pos_m1 = guide_m4 + 3 * guide_stride - 1;
+    const size_t gbpr2 = 2 * guide_stride;
+    const size_t gbpr4 = 4 * guide_stride;
+
+    // "ref" := one four-byte quad from three rows (t/m/b = top/middle/bottom),
+    // assembled into 128 bits, which are duplicated for use by SAD (its
+    // arguments select which two quads/rows to use).
+    // Gather would be faster on SKX, but on HSW we reduce port 5 pressure by
+    // loading m and b MINUS 4 and 8 bytes to shift those quads upwards.
+    // This is safe because we're only shifting m and b => there are valid
+    // pixels to load from the previous row. x = don't care/ignored.
+    const auto ref_xxT = load_dup128(d8, ref_pos_m1);
+    const auto ref_xMx = load_dup128(d8, ref_pos_m1 + guide_stride - 4);
+    const auto ref_Bxx = load_dup128(d8, ref_pos_m1 + gbpr2 - 8);
+
+    // 3 patch rows x 7 window rows (m3 to p3) = 21x 128-bit SAD = 9 + 3 SAD(),
+    // which requires windows to be duplicated into both 128-bit lanes.
+
+    // SAD 10
+    const auto ref_xMT =
+        cast_to(d8, odd_even(cast_to(d32, ref_xMx), cast_to(d32, ref_xxT)));
+    const auto wnd_m3 = load_dup128(d8, wnd_pos_m4 + 1 * guide_stride);
+    const auto ref =
+        cast_to(d8, odd_even(cast_to(d64, ref_Bxx), cast_to(d64, ref_xMT)));
+    const auto wnd_m2 = load_dup128(d8, wnd_pos_m4 + gbpr2);
+    auto sad_1t0m = ext::mpsadbw2<0, 1>(wnd_m3, ref);
+    const auto wnd_m1 = load_dup128(d8, wnd_pos_m4 + 3 * guide_stride);
+    const auto sad_1m0b = ext::mpsadbw2<1, 2>(wnd_m2, ref);
+    const auto wnd_m4 = load_dup128(d8, wnd_pos_m4);
+    const auto wnd_m1m4 = concat_hi_lo(wnd_m1, wnd_m4);
+    const auto sad_1b0t = ext::mpsadbw2<2, 0>(wnd_m1m4, ref);
+    sad_1t0m += sad_1m0b;
+    sad_1t0m += sad_1b0t;
+    store(sad_1t0m, d16, sad + 0 * d16.N);
+
+    // SAD 32
+    const auto wnd_p0 = load_dup128(d8, wnd_pos_m4 + gbpr4);
+    const auto wnd_p1 = load_dup128(d8, wnd_pos_m4 + 5 * guide_stride);
+    auto sad_3t2m = ext::mpsadbw2<0, 1>(wnd_m1, ref);
+    const auto wnd_p1m2 = concat_hi_lo(wnd_p1, wnd_m2);
+    const auto sad_3m2b = ext::mpsadbw2<1, 2>(wnd_p0, ref);
+    const auto sad_3b2t = ext::mpsadbw2<2, 0>(wnd_p1m2, ref);
+    sad_3t2m += sad_3m2b;
+    sad_3t2m += sad_3b2t;
+    store(sad_3t2m, d16, sad + 1 * d16.N);
+
+    // SAD 54
+    const auto wnd_p2 = load_dup128(d8, wnd_pos_m4 + 6 * guide_stride);
+    const auto wnd_p3 = load_dup128(d8, wnd_pos_m4 + 7 * guide_stride);
+    const auto wnd_p3p0 = concat_hi_lo(wnd_p3, wnd_p0);
+    auto sad_5t4m = ext::mpsadbw2<0, 1>(wnd_p1, ref);
+    const auto sad_5m4b = ext::mpsadbw2<1, 2>(wnd_p2, ref);
+    const auto sad_5b4t = ext::mpsadbw2<2, 0>(wnd_p3p0, ref);
+    sad_5t4m += sad_5m4b;
+    sad_5t4m += sad_5b4t;
+    store(sad_5t4m, d16, sad + 2 * d16.N);
+
+    const auto wnd_p4 = load_dup128(d8, wnd_pos_m4 + 8 * guide_stride);
+    auto sad_6 = ext::mpsadbw2<0, 0>(wnd_p2, ref);  // t
+    const auto sad_6m = ext::mpsadbw2<1, 1>(wnd_p3, ref);
+    const auto sad_6b = ext::mpsadbw2<2, 2>(wnd_p4, ref);
+    sad_6 += sad_6m;
+    sad_6 += sad_6b;
+    // Both 128-bit blocks are identical - required by SameBlocks().
+    store(sad_6, d16, sad + 3 * d16.N);
+#endif  // AVX2
+  }
+};
+
+//------------------------------------------------------------------------------
+// Exponentially decreasing weight functions
+
+// Max such that mul_high(kClampedSAD << kShiftSAD, -32768) + bias=127*128 > 0.
+// Also used by WeightExp to match WeightFast behavior at large distances.
+// Doubling this maximum requires doubling kMinSigma.
+constexpr int16_t kClampedSAD = 507;
+
+// Straightforward but slow: computes e^{-s*x}.
+class WeightExp {
+ public:
+  // W(sigma) = 0.5 = exp(mul_ * sigma) => mul_ = ln(0.5) / sigma.
+  void SetSigma(const int sigma) {
+    mul_ = (1 << kSigmaShift) * -0.69314717f / sigma;
+  }
+
+  SIMD_ATTR void operator()(const V16 sad, VF* SIMD_RESTRICT lo,
+                            VF* SIMD_RESTRICT hi) const {
+    const auto clamped = min(sad, set1(d16, kClampedSAD));
+    SIMD_ALIGN int16_t sad_lanes[d16.N];
+    store(clamped, d16, sad_lanes);
+    SIMD_ALIGN float weight_lanes[d16.N];
+    for (size_t i = 0; i < d16.N; ++i) {
+      weight_lanes[i] = expf(sad_lanes[i] * mul_);
+    }
+    *lo = load(df, weight_lanes);
+    *hi = load(df, weight_lanes + df.N);
+  }
+
+  // All blocks of "sad" are identical, but this function does not make use
+  // of that.
+  SIMD_ATTR VF SameBlocks(const V16 sad) const {
+    const auto clamped = min(sad, set1(d16, kClampedSAD));
+    SIMD_ALIGN int16_t sad_lanes[d16.N];
+    store(clamped, d16, sad_lanes);
+    // 1 for scalar, otherwise a full f32 vector.
+    const size_t N = (d16.N + 1) / 2;
+    float weight_lanes[N];
+    for (size_t i = 0; i < N; ++i) {
+      weight_lanes[i] = expf(sad_lanes[i] * mul_);
+    }
+    return load(df, weight_lanes);
+  }
+
+ private:
+  float mul_;
+};
+
+// Fast approximation using the 2^x in the IEEE-754 representation.
+class WeightFast {
+ public:
+  using D32 = SIMD_FULL(int32_t);
+
+  SIMD_ATTR WeightFast() : bias_(set1(d16, 127 << (23 - 16))) {}
+
+  SIMD_ATTR SIMD_INLINE void SetMul(const int mul) {
+    EPF_ASSERT(-32768 <= mul && mul <= -1);
+    mul_ = set1(d16, mul);
+  }
+
+  // Uses MulTable => must define after that class.
+  void SetSigma(const int sigma);
+
+  // Fills two f32 vectors from one i16 vector. On AVX2, "lo" are the lower
+  // halves of two vectors (avoids crossing blocks).
+  SIMD_ATTR SIMD_INLINE void operator()(const V16 sad, VF* SIMD_RESTRICT lo,
+                                        VF* SIMD_RESTRICT hi) const {
+    const auto zero = setzero(d16);
+
+    // Avoid 16-bit overflow; ensures biased_exp >= 0.
+    const auto clamped = min(sad, set1(d16, kClampedSAD));
+
+    // Pre-shift to increase the multiplier range.
+    const auto prescaled = shift_left<kShiftSAD>(clamped);
+
+    // _Decrease_ to an unbiased exponent and fill in some mantissa bits.
+    const auto unbiased_exp = ext::mul_high(prescaled, mul_);
+
+    // Add exponent bias.
+    auto biased_exp = unbiased_exp + bias_;
+
+    // Assemble into an IEEE-754 representation with mantissa = zero.
+    const auto bits_lo = zip_lo(zero, biased_exp);
+    const auto bits_hi = zip_hi(zero, biased_exp);
+
+    // Approximates exp(-s * sad).
+    *lo = cast_to(df, bits_lo);
+    *hi = cast_to(df, bits_hi);
+  }
+
+  // Same as above, but with faster i16x8->i32x8 conversion on AVX2 because all
+  // blocks of "sad" are equal.
+  SIMD_ATTR SIMD_INLINE VF SameBlocks(const V16 sad) const {
+    const auto clamped = min(sad, set1(d16, kClampedSAD));
+    const auto prescaled = shift_left<kShiftSAD>(clamped);
+    const auto unbiased_exp = ext::mul_high(prescaled, mul_);
+    const auto biased_exp = unbiased_exp + bias_;
+
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    // Both blocks of biased_exp are identical, so we can MOVZX + shift into
+    // the upper 16 bits using a single-cycle shuffle.
+    SIMD_ALIGN constexpr int32_t kHi32From16[8] = {
+        0x0100FFFF, 0x0302FFFF, 0x0504FFFF, 0x0706FFFF,
+        0x0908FFFF, 0x0B0AFFFF, 0x0D0CFFFF, 0x0F0EFFFF,
+    };
+    const auto bits = table_lookup_bytes(cast_to(D32(), biased_exp),
+                                         load(D32(), kHi32From16));
+#else
+    const auto bits = zip_lo(setzero(d16), biased_exp);
+#endif
+
+    return cast_to(df, bits);
+  }
+
+ private:
+  // Larger shift = higher precision but narrower range of permissible SAD
+  // (limited by 16-bit overflow, see kClampedSAD).
+  static constexpr int kShiftSAD = 6;
+
+  const V16 bias_;  // Upper 16 bits of the IEEE-754 exponent bias.
+  V16 mul_;         // Set by SetMul.
+};
+
+// Used by WeightFast. Monostate.
+class MulTable {
+ public:
+  // Single-threaded.
+  static SIMD_ATTR void Init() {
+    if (mul_table_[0] != 0) return;  // Already initialized
+
+    WeightFast weight_func;
+    const int gap = 1 << kSigmaShift;
+    int mul = -32768;
+    for (int sigma = kMinSigma; sigma <= kMaxSigma; sigma += gap) {
+      float w = 0.0f;
+      for (; mul < 0; ++mul) {
+        weight_func.SetMul(mul);
+        const auto weight =
+            weight_func.SameBlocks(set1(d16, sigma >> kSigmaShift));
+        w = get_part(SIMD_PART(float, 1)(), weight);
+        if (w > 0.5f) {
+          break;
+        }
+      }
+      mul_table_[sigma] = mul;
+    }
+
+    // Fill in (sigma, sigma + gap) via linear interpolation
+    for (int sigma = kMinSigma; sigma < kMaxSigma; sigma += gap) {
+      const float mul_step =
+          (mul_table_[sigma + gap] - mul_table_[sigma]) / float(gap);
+      for (int i = 1; i < gap; ++i) {
+        mul_table_[sigma + i] = mul_table_[sigma] + i * mul_step;
+      }
+    }
+  }
+
+  static int Get(size_t sigma) {
+    EPF_ASSERT(kMinSigma <= sigma && sigma <= kMaxSigma);
+    EPF_ASSERT(mul_table_[sigma] != 0);
+    return mul_table_[sigma];
+  }
+
+ private:
+  static int mul_table_[kMaxSigma + 1];
+};
+int MulTable::mul_table_[kMaxSigma + 1];
+
+SIMD_ATTR void WeightFast::SetSigma(const int sigma) {
+  const int mul = MulTable::Get(sigma);
+  EPF_ASSERT(mul != 0);  // Must have called MulTable::Init first.
+  SetMul(mul);
+}
+
+// Slow, only use for tests.
+SIMD_ATTR float GetWeightForTest(const WeightFast& weight_func, int sad) {
+  PIK_ASSERT(0 <= sad && sad <= Distance::kMaxSAD);
+  VF lo, hi;
+  weight_func(set1(d16, sad), &lo, &hi);
+
+  const SIMD_PART(float, 1) df1;
+  const float w0 = get_part(df1, lo);
+  const float w1 = get_part(df1, hi);
+  PIK_CHECK(w0 == w1);
+  return w0;
+}
+
+// (Must be in same file to use WeightFast etc.)
+class InternalWeightTests {
+ public:
+  static void Run() {
+    MulTable::Init();
+    TestEndpoints();
+    TestWeaklyMonotonicallyDecreasing();
+    TestFastMatchesExp();
+  }
+
+ private:
+  // Returns weight, or aborts.
+  static SIMD_ATTR float EnsureWeightEquals(const float expected,
+                                            const int16_t sad, const int sigma,
+                                            const WeightFast& weight_func,
+                                            const float tolerance) {
+    const float w = GetWeightForTest(weight_func, sad);
+    if (std::abs(w - expected) > tolerance) {
+      fprintf(stderr, "Weight %f too far from %f for sigma %d, sad %d\n", w,
+              expected, sigma, sad);
+      abort();
+    }
+    return w;
+  }
+
+  static void TestEndpoints() {
+    WeightFast weight_func;
+    // Only test at integral sigma because we can't represent fractional SAD,
+    // and weight_{sigma+3}(sad) is too far from 0.5.
+    for (int sigma = kMinSigma; sigma <= kMaxSigma; sigma += 1 << kSigmaShift) {
+      weight_func.SetSigma(sigma);
+      // Zero SAD => max weight 1.0
+      EnsureWeightEquals(1.0f, 0, sigma, weight_func, 0.02f);
+      // Half-width at half max => 0.5
+      EnsureWeightEquals(0.5f, sigma >> kSigmaShift, sigma, weight_func, 0.02f);
+    }
+  }
+
+  // WeightFast and WeightExp should return similar values.
+  static SIMD_ATTR void TestFastMatchesExp() {
+    WeightExp func_slow;
+    WeightFast func;
+
+    for (int sigma = kMinSigma; sigma <= kMaxSigma; ++sigma) {
+      func_slow.SetSigma(sigma);
+      func.SetSigma(sigma);
+
+      for (int sad = 0; sad <= Distance::kMaxSAD; ++sad) {
+        VF lo_slow, unused;
+        func_slow(set1(d16, sad), &lo_slow, &unused);
+        const float weight_slow = get_part(SIMD_PART(float, 1)(), lo_slow);
+        // Max tolerance is required for very low sigma (0.75 vs 0.707).
+        EnsureWeightEquals(weight_slow, sad, sigma, func, 0.05f);
+      }
+    }
+  }
+
+  // Weight(sad + 1) <= Weight(sad).
+  static SIMD_ATTR void TestWeaklyMonotonicallyDecreasing() {
+    WeightFast weight_func;
+    // half width at half max
+    weight_func.SetSigma(30 << kSigmaShift);
+
+    const SIMD_PART(float, 1) df1;
+
+    float last_w = 1.1f;
+    for (int sad = 1; sad <= kMaxSigma >> kSigmaShift; ++sad) {
+      VF lo, hi;
+      weight_func(set1(d16, sad), &lo, &hi);
+      const float w = get_part(df1, lo);
+      PIK_CHECK(w <= last_w);
+      last_w = w;
+    }
+  }
+};
+
+//------------------------------------------------------------------------------
+
+class WeightedSum {
+ public:
+  static constexpr size_t kNeighbors = Distance::kNeighbors;
+
+  static void Test() { TestHorzSums(); }
+
+  template <class WeightFunc>
+  static SIMD_ATTR SIMD_INLINE void Compute(
+      const uint8_t* SIMD_RESTRICT guide_m4_r,
+      const uint8_t* SIMD_RESTRICT guide_m4_g,
+      const uint8_t* SIMD_RESTRICT guide_m4_b, const size_t guide_stride,
+      const float* SIMD_RESTRICT in_m3_r, const float* SIMD_RESTRICT in_m3_g,
+      const float* SIMD_RESTRICT in_m3_b, const size_t in_stride,
+      const WeightFunc& weight_func, float* SIMD_RESTRICT out_r,
+      float* SIMD_RESTRICT out_g, float* SIMD_RESTRICT out_b) {
+    SIMD_ALIGN float weights[kNeighbors];
+    ComputeWeights(guide_m4_r, guide_m4_g, guide_m4_b, guide_stride,
+                   weight_func, weights);
+
+    const auto kMinWeight = set1(df, kFlushWeightToZeroIfBelow);
+    for (size_t i = 0; i < kNeighbors; i += df.N) {
+      auto v = load(df, weights + i);
+      v &= v >= kMinWeight;
+      store(v, df, weights + i);
+    }
+
+    // Joint weights are better than per-channel!
+    FromWeights(in_m3_r, in_stride, weights, out_r);
+    FromWeights(in_m3_g, in_stride, weights, out_g);
+    FromWeights(in_m3_b, in_stride, weights, out_b);
+  }
+
+  static SIMD_INLINE void CopyOriginalBlock(const Image3F& in, const size_t x,
+                                            const size_t y,
+                                            Image3F* SIMD_RESTRICT out) {
+    for (size_t iy = 0; iy < kBlockDim; ++iy) {
+      CopyBlockRow(in, 0, x, y + iy, out);
+      CopyBlockRow(in, 1, x, y + iy, out);
+      CopyBlockRow(in, 2, x, y + iy, out);
+    }
+  }
+
+ private:
+  static SIMD_INLINE void CopyBlockRow(const Image3F& in, const size_t c,
+                                       const size_t x, const size_t y,
+                                       Image3F* SIMD_RESTRICT out) {
+    const float* SIMD_RESTRICT in_row =
+        in.ConstPlaneRow(c, y + kBorder) + kBorder;
+    float* SIMD_RESTRICT out_row = out->PlaneRow(c, y);
+    memcpy(out_row + x, in_row + x, kBlockDim * sizeof(*in_row));
+  }
+
+  // 2465 ops per pixel (2016 + 56 * (5 + 3) + 1)
+  // NOTE: weights may be stored interleaved.
+  template <class WeightFunc>
+  static SIMD_ATTR SIMD_INLINE void WeightsFromSAD(
+      const int16_t* SIMD_RESTRICT sad, const WeightFunc& weight_func,
+      float* SIMD_RESTRICT weights) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    for (size_t i = 0; i < kNeighbors; ++i) {
+      const auto sad_v = set1(d16, sad[i]);
+      VF lo, unused;
+      weight_func(sad_v, &lo, &unused);
+      store(lo, df, weights + i);
+    }
+#elif SIMD_TARGET_VALUE != SIMD_AVX2
+    f32x4 w0L, w0H, w1L, w1H, w2L, w2H, w3L, w3H, w4L, w4H, w5L, w5H, w6L, w6H;
+    weight_func(load(d16, sad + 0 * d16.N), &w0L, &w0H);
+    weight_func(load(d16, sad + 1 * d16.N), &w1L, &w1H);
+    weight_func(load(d16, sad + 2 * d16.N), &w2L, &w2H);
+    weight_func(load(d16, sad + 3 * d16.N), &w3L, &w3H);
+    weight_func(load(d16, sad + 4 * d16.N), &w4L, &w4H);
+    weight_func(load(d16, sad + 5 * d16.N), &w5L, &w5H);
+    weight_func(load(d16, sad + 6 * d16.N), &w6L, &w6H);
+    store(w0L, df, weights + 0 * df.N);
+    store(w0H, df, weights + 1 * df.N);
+    store(w1L, df, weights + 2 * df.N);
+    store(w1H, df, weights + 3 * df.N);
+    store(w2L, df, weights + 4 * df.N);
+    store(w2H, df, weights + 5 * df.N);
+    store(w3L, df, weights + 6 * df.N);
+    store(w3H, df, weights + 7 * df.N);
+    store(w4L, df, weights + 8 * df.N);
+    store(w4H, df, weights + 9 * df.N);
+    store(w5L, df, weights + 10 * df.N);
+    store(w5H, df, weights + 11 * df.N);
+    store(w6L, df, weights + 12 * df.N);
+    store(w6H, df, weights + 13 * df.N);
+#else  // AVX2
+    decltype(setzero(df)) w10L, w10H, w32L, w32H, w54L, w54H, w6;
+    weight_func(load(d16, sad + 0 * d16.N), &w10L, &w10H);
+    weight_func(load(d16, sad + 1 * d16.N), &w32L, &w32H);
+    weight_func(load(d16, sad + 2 * d16.N), &w54L, &w54H);
+    w6 = weight_func.SameBlocks(load(d16, sad + 3 * d16.N));
+    store(w10L, df, weights + 0 * df.N);
+    store(w10H, df, weights + 1 * df.N);
+    store(w32L, df, weights + 2 * df.N);
+    store(w32H, df, weights + 3 * df.N);
+    store(w54L, df, weights + 4 * df.N);
+    store(w54H, df, weights + 5 * df.N);
+    store(w6, df, weights + 6 * df.N);
+#endif
+  }
+
+  // Returns weights for 7x8 neighbor pixels
+  template <class WeightFunc>
+  static SIMD_ATTR SIMD_INLINE void ComputeWeights(
+      const uint8_t* SIMD_RESTRICT guide_m4_r,
+      const uint8_t* SIMD_RESTRICT guide_m4_g,
+      const uint8_t* SIMD_RESTRICT guide_m4_b, const size_t guide_stride,
+      const WeightFunc& weight_func, float* SIMD_RESTRICT weights) {
+    // It's important to include all channels, only computing for X and Y
+    // channels misses/weakens some edges.
+    SIMD_ALIGN int16_t sad_r[64];
+    SIMD_ALIGN int16_t sad_g[64];
+    SIMD_ALIGN int16_t sad_b[64];
+    Distance::SumsOfAbsoluteDifferences(guide_m4_r, guide_stride, &sad_r[0]);
+    Distance::SumsOfAbsoluteDifferences(guide_m4_g, guide_stride, &sad_g[0]);
+    Distance::SumsOfAbsoluteDifferences(guide_m4_b, guide_stride, &sad_b[0]);
+
+    SIMD_FULL(int16_t) d;
+    for (size_t i = 0; i < 64; i += d.N) {
+      const auto d0 = load(d, &sad_r[i]);
+      const auto d1 = load(d, &sad_g[i]);
+      const auto d2 = load(d, &sad_b[i]);
+      // Better than sum and sum/4.
+      const auto combined = max(max(d0, d1), d2);
+      store(combined, d, &sad_r[i]);
+    }
+
+    // We actually see better results from central distance 0 as opposed to
+    // the minimum non-center (i.e. max weight).
+
+    WeightsFromSAD(&sad_r[0], weight_func, weights);
+  }
+
+  // Returns sum(num) / sum(den).
+  template <class V>
+  static SIMD_ATTR SIMD_INLINE SIMD_PART(float, 1)::V
+      RatioOfHorizontalSums(const V num, const V den) {
+    const SIMD_PART(float, 1) d;
+    // Faster than concat_lo_lo/hi_hi plus single sum_of_lanes.
+    const auto sum_den = any_part(d, ext::sum_of_lanes(den));
+    const auto sum_num = any_part(d, ext::sum_of_lanes(num));
+    const auto rcp_den = approximate_reciprocal(sum_den);
+    return rcp_den * sum_num;
+  }
+
+  static SIMD_ATTR SIMD_INLINE void FromWeights(
+      const float* SIMD_RESTRICT in_m3, const size_t in_stride,
+      const float* SIMD_RESTRICT weights, float* SIMD_RESTRICT out) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+    float weighted_sum = 0.0f;
+    float sum_weights = 0.0f;
+    int i = 0;
+    for (int cy = -3; cy <= 3; ++cy) {
+      const float* SIMD_RESTRICT in_row =
+          ByteOffset(in_m3, (cy + 3) * in_stride);
+      for (int cx = -3; cx <= 4; ++cx) {
+        const float neighbor = in_row[cx];
+        const float weight = weights[i++];
+        weighted_sum += neighbor * weight;
+        sum_weights += weight;
+      }
+    }
+
+    // Safe because weights[27] == 1.
+    *out = weighted_sum / sum_weights;
+#elif SIMD_TARGET_VALUE != SIMD_AVX2
+    in_m3 -= 3;
+
+    const auto w0L = load(df, weights + 0 * df.N);
+    const auto w0H = load(df, weights + 1 * df.N);
+    const auto w1L = load(df, weights + 2 * df.N);
+    const auto w1H = load(df, weights + 3 * df.N);
+    const auto w2L = load(df, weights + 4 * df.N);
+    const auto w2H = load(df, weights + 5 * df.N);
+    const auto w3L = load(df, weights + 6 * df.N);
+    const auto w3H = load(df, weights + 7 * df.N);
+    const auto w4L = load(df, weights + 8 * df.N);
+    const auto w4H = load(df, weights + 9 * df.N);
+    const auto w5L = load(df, weights + 10 * df.N);
+    const auto w5H = load(df, weights + 11 * df.N);
+    const auto w6L = load(df, weights + 12 * df.N);
+    const auto w6H = load(df, weights + 13 * df.N);
+
+    const auto n0L = load_unaligned(df, ByteOffset(in_m3, 0 * in_stride));
+    const auto n1L = load_unaligned(df, ByteOffset(in_m3, 1 * in_stride));
+    const auto n2L = load_unaligned(df, ByteOffset(in_m3, 2 * in_stride));
+    const auto n3L = load_unaligned(df, ByteOffset(in_m3, 3 * in_stride));
+    const auto n4L = load_unaligned(df, ByteOffset(in_m3, 4 * in_stride));
+    const auto n5L = load_unaligned(df, ByteOffset(in_m3, 5 * in_stride));
+    const auto n6L = load_unaligned(df, ByteOffset(in_m3, 6 * in_stride));
+    const auto n0H =
+        load_unaligned(df, ByteOffset(in_m3, 0 * in_stride) + df.N);
+    const auto n1H =
+        load_unaligned(df, ByteOffset(in_m3, 1 * in_stride) + df.N);
+    const auto n2H =
+        load_unaligned(df, ByteOffset(in_m3, 2 * in_stride) + df.N);
+    const auto n3H =
+        load_unaligned(df, ByteOffset(in_m3, 3 * in_stride) + df.N);
+    const auto n4H =
+        load_unaligned(df, ByteOffset(in_m3, 4 * in_stride) + df.N);
+    const auto n5H =
+        load_unaligned(df, ByteOffset(in_m3, 5 * in_stride) + df.N);
+    const auto n6H =
+        load_unaligned(df, ByteOffset(in_m3, 6 * in_stride) + df.N);
+
+    const auto sum_weights = w0L + w0H + w1L + w1H + w2L + w2H + w3L + w3H +
+                             w4L + w4H + w5L + w5H + w6L + w6H;
+
+    auto weighted_sum = n0L * w0L;
+    weighted_sum = mul_add(n0H, w0H, weighted_sum);
+    weighted_sum = mul_add(n1L, w1L, weighted_sum);
+    weighted_sum = mul_add(n1H, w1H, weighted_sum);
+    weighted_sum = mul_add(n2L, w2L, weighted_sum);
+    weighted_sum = mul_add(n2H, w2H, weighted_sum);
+    weighted_sum = mul_add(n3L, w3L, weighted_sum);
+    weighted_sum = mul_add(n3H, w3H, weighted_sum);
+    weighted_sum = mul_add(n4L, w4L, weighted_sum);
+    weighted_sum = mul_add(n4H, w4H, weighted_sum);
+    weighted_sum = mul_add(n5L, w5L, weighted_sum);
+    weighted_sum = mul_add(n5H, w5H, weighted_sum);
+    weighted_sum = mul_add(n6L, w6L, weighted_sum);
+    weighted_sum = mul_add(n6H, w6H, weighted_sum);
+
+    store(RatioOfHorizontalSums(weighted_sum, sum_weights),
+          SIMD_PART(float, 1)(), out);
+#else  // AVX2
+    in_m3 -= 3;
+    const size_t kN2 = df.N / 2;
+
+    // Weighted sum 10
+    const auto n0 = load_unaligned(df, ByteOffset(in_m3, 0 * in_stride));
+    const auto n1L = load_dup128(df, ByteOffset(in_m3, 1 * in_stride));
+    const auto n1H = load_dup128(df, ByteOffset(in_m3, 1 * in_stride) + kN2);
+    const auto w10L = load(df, weights + 0 * df.N);
+    const auto w10H = load(df, weights + 1 * df.N);
+    const auto n10L = concat_hi_lo(n1L, n0);
+    const auto n10H = concat_hi_hi(n1H, n0);
+    const auto sum01 = w10L + w10H;
+    const auto mul0 = n10L * w10L;
+    const auto mul1 = n10H * w10H;
+
+    // Weighted sum 32
+    const auto n2 = load_unaligned(df, ByteOffset(in_m3, 2 * in_stride));
+    const auto n3L = load_dup128(df, ByteOffset(in_m3, 3 * in_stride));
+    const auto n3H = load_dup128(df, ByteOffset(in_m3, 3 * in_stride) + kN2);
+    const auto w32L = load(df, weights + 2 * df.N);
+    const auto w32H = load(df, weights + 3 * df.N);
+    const auto n32L = concat_hi_lo(n3L, n2);
+    const auto n32H = concat_hi_hi(n3H, n2);
+    const auto sum23 = w32L + w32H;
+    const auto mul02 = mul_add(n32L, w32L, mul0);
+    const auto mul13 = mul_add(n32H, w32H, mul1);
+
+    // Weighted sum 54
+    const auto n4 = load_unaligned(df, ByteOffset(in_m3, 4 * in_stride));
+    const auto n5L = load_dup128(df, ByteOffset(in_m3, 5 * in_stride));
+    const auto n5H = load_dup128(df, ByteOffset(in_m3, 5 * in_stride) + kN2);
+    const auto w54L = load(df, weights + 4 * df.N);
+    const auto w54H = load(df, weights + 5 * df.N);
+    const auto n54L = concat_hi_lo(n5L, n4);
+    const auto n54H = concat_hi_hi(n5H, n4);
+    const auto sum0123 = sum01 + sum23;
+    const auto mul024 = mul_add(n54L, w54L, mul02);
+    const auto sum45 = w54L + w54H;
+    const auto mul135 = mul_add(n54H, w54H, mul13);
+
+    const auto mul012345 = mul024 + mul135;
+    const auto sum012345 = sum0123 + sum45;
+
+    // Weighted sum 6
+    const auto n6 = load_unaligned(df, ByteOffset(in_m3, 6 * in_stride));
+    const auto w6 = load(df, weights + 6 * df.N);
+    const auto weighted_sum = mul_add(n6, w6, mul012345);
+    const auto sum_weights = sum012345 + w6;
+
+    store(RatioOfHorizontalSums(weighted_sum, sum_weights),
+          SIMD_PART(float, 1)(), out);
+#endif
+  }
+
+  static SIMD_ATTR SIMD_INLINE float Reciprocal12(const float x) {
+    const SIMD_PART(float, 1) d;
+    return get_part(d, approximate_reciprocal(set_part(d, x)));
+  }
+
+  static SIMD_ATTR void TestHorzSums() {
+    const SIMD_PART(float, 1) df1;
+
+    SIMD_ALIGN const float in0_lanes[8] = {256.8f, 128.7f, 64.6f, 32.5f,
+                                           16.4f,  8.3f,   4.2f,  2.1f};
+    SIMD_ALIGN const float in1_lanes[8] = {-0.1f, -1.2f, -2.3f, -3.4f,
+                                           -4.5f, -5.6f, -6.7f, -7.8f};
+    for (size_t i = 0; i < 8; i += df.N) {
+      const auto in0 = load(df, in0_lanes + i);
+      const auto in1 = load(df, in1_lanes + i);
+
+      const float expected0 =
+          std::accumulate(in0_lanes + i, in0_lanes + i + df.N, 0.0f);
+      const float expected1 =
+          std::accumulate(in1_lanes + i, in1_lanes + i + df.N, 0.0f);
+      const float expected = Reciprocal12(expected1) * expected0;
+
+      const float actual = get_part(df1, RatioOfHorizontalSums(in0, in1));
+      PIK_CHECK(std::abs(expected - actual) < 2E-2f);
+    }
+  }
+};
+
+// POD
+class SIMD_ALIGN MinMaxWorker {
+ public:
+  SIMD_ATTR void Init(const Image3F* SIMD_RESTRICT in,
+                      Image3F* SIMD_RESTRICT padded) {
+    in_ = in;
+    padded_ = padded;
+    xsize_ = in->xsize();
+    ysize_ = in->ysize();
+    aligned_x_end_ = xsize_ - (xsize_ % df.N);
+
+    for (int c = 0; c < 3; ++c) {
+      store(set1(df, FLT_MAX), df, min_[c]);
+      store(set1(df, -FLT_MAX), df, max_[c]);
+      scalar_min_[c] = FLT_MAX;
+      scalar_max_[c] = -FLT_MAX;
+    }
+  }
+
+  // iy may be out of bounds (for padding).
+  SIMD_ATTR void Run(int64_t iy) {
+    const size_t y = (static_cast<size_t>(iy));  // assumes 2's complement
+    if (PIK_LIKELY(y < ysize_)) {
+      for (size_t c = 0; c < 3; ++c) {
+        PadAndUpdate(c, y);
+      }
+    } else {
+      for (size_t c = 0; c < 3; ++c) {
+        PadTopBottomRow(c, iy);
+      }
+    }
+  }
+
+  SIMD_ATTR void Assimilate(const MinMaxWorker& other) {
+    for (int c = 0; c < 3; ++c) {
+      const auto min1 = load(df, min_[c]);
+      const auto min2 = load(df, other.min_[c]);
+      store(min(min1, min2), df, min_[c]);
+      const auto max1 = load(df, max_[c]);
+      const auto max2 = load(df, other.max_[c]);
+      store(max(max1, max2), df, max_[c]);
+      scalar_min_[c] = std::min(scalar_min_[c], other.scalar_min_[c]);
+      scalar_max_[c] = std::max(scalar_max_[c], other.scalar_max_[c]);
+    }
+  }
+
+  SIMD_ATTR void Finalize(std::array<float, 3>* PIK_RESTRICT min,
+                          std::array<float, 3>* PIK_RESTRICT max) const {
+    for (int c = 0; c < 3; ++c) {
+      (*min)[c] =
+          std::min(scalar_min_[c], *std::min_element(min_[c], min_[c] + df.N));
+      (*max)[c] =
+          std::max(scalar_max_[c], *std::max_element(max_[c], max_[c] + df.N));
+    }
+  }
+
+ private:
+  // Interior, y is valid.
+  SIMD_ATTR void PadAndUpdate(const size_t c, const size_t y) {
+    const float* SIMD_RESTRICT row_in = in_->ConstPlaneRow(c, y);
+    float* SIMD_RESTRICT row_out = padded_->PlaneRow(c, y + kBorder) + kBorder;
+
+    // Ensure store alignment (faster than loading aligned)
+    constexpr int64_t aligned_begin = (kBorder + df.N - 1) & ~(df.N - 1);
+
+    // Local copies avoid stores in each iteration. Part+min also leads to
+    // better code than std::min (VUCOMISS + CMOV).
+    const SIMD_PART(float, 1) d1;
+    auto my_min1 = load(d1, &scalar_min_[c]);
+    auto my_max1 = load(d1, &scalar_max_[c]);
+
+    // Left: mirror and vector alignment
+    int64_t ix = -kBorder;
+    for (; ix < aligned_begin - kBorder; ++ix) {
+      const int64_t clamped_x = Mirror(ix, xsize_);
+      const auto in = load(d1, row_in + clamped_x);
+      my_min1 = min(my_min1, in);
+      my_max1 = max(my_max1, in);
+      store(in, d1, row_out + ix);
+    }
+
+    // Interior: whole vectors
+    auto my_min = load(df, min_[c]);
+    auto my_max = load(df, max_[c]);
+    for (; ix + df.N <= xsize_; ix += df.N) {
+      const auto in = load_unaligned(df, row_in + ix);
+      my_min = min(my_min, in);
+      my_max = max(my_max, in);
+      store(in, df, row_out + ix);
+    }
+    store(my_min, df, min_[c]);
+    store(my_max, df, max_[c]);
+
+    // Right: vector remainder and mirror
+    for (; ix < xsize_ + kBorder; ++ix) {
+      const int64_t clamped_x = Mirror(ix, xsize_);
+      const auto in = load(d1, row_in + clamped_x);
+      my_min1 = min(my_min1, in);
+      my_max1 = max(my_max1, in);
+      store(in, d1, row_out + ix);
+    }
+
+    store(my_min1, d1, &scalar_min_[c]);
+    store(my_max1, d1, &scalar_max_[c]);
+  }
+
+  // Border, no need to update min/max from mirrored values.
+  SIMD_ATTR void PadTopBottomRow(const size_t c, const int64_t iy) {
+    const int64_t clamped_y = WrapMirror()(iy, ysize_);
+    const float* SIMD_RESTRICT row_in = in_->ConstPlaneRow(c, clamped_y);
+    float* SIMD_RESTRICT row_out = padded_->PlaneRow(c, iy + kBorder) + kBorder;
+
+    // Ensure store alignment (faster than loading aligned)
+    constexpr int64_t aligned_begin = (kBorder + df.N - 1) & ~(df.N - 1);
+
+    // Left: mirror and vector alignment
+    int64_t ix = -kBorder;
+    for (; ix < aligned_begin - kBorder; ++ix) {
+      const int64_t clamped_x = Mirror(ix, xsize_);
+      row_out[ix] = row_in[clamped_x];
+    }
+
+    // Interior: whole vectors
+    for (; ix + df.N <= xsize_; ix += df.N) {
+      const auto src = load_unaligned(df, row_in + ix);
+      store(src, df, row_out + ix);
+    }
+
+    // Right: vector remainder and mirror
+    for (; ix < xsize_ + kBorder; ++ix) {
+      const int64_t clamped_x = Mirror(ix, xsize_);
+      row_out[ix] = row_in[clamped_x];
+    }
+  }
+
+  SIMD_ALIGN float min_[3][df.N];
+  SIMD_ALIGN float max_[3][df.N];
+  const Image3F* SIMD_RESTRICT in_;  // not owned
+  Image3F* SIMD_RESTRICT padded_;    // not owned
+  size_t xsize_;
+  size_t ysize_;
+  size_t aligned_x_end_;
+  float scalar_min_[3];
+  float scalar_max_[3];
+};
+static_assert(sizeof(MinMaxWorker) % sizeof(DF::V) == 0, "Align");
+
+// Returns a new image with kBorder additional pixels on each side initialized
+// by mirroring.
+SIMD_ATTR void MinMax(const Image3F& in, ThreadPool* pool,
+                      std::array<float, 3>* SIMD_RESTRICT min,
+                      std::array<float, 3>* SIMD_RESTRICT max,
+                      Image3F* SIMD_RESTRICT padded) {
+  PROFILER_FUNC;
+  // A bit too large for the stack. Must be aligned for min_/max_ members.
+  const size_t num_workers = NumThreads(pool);
+  auto workers_mem = AllocateArray(num_workers * sizeof(MinMaxWorker));
+  MinMaxWorker* workers = reinterpret_cast<MinMaxWorker*>(workers_mem.get());
+  for (size_t i = 0; i < num_workers; ++i) {
+    workers[i].Init(&in, padded);
+  }
+
+  // Includes padding. ThreadPool requires task >= 0.
+  RunOnPool(pool, 0, in.ysize() + 2 * kBorder,
+            [workers](const int task, const int thread) {
+              workers[thread].Run(task - kBorder);
+            });
+
+  // Reduction
+  for (size_t i = 1; i < num_workers; ++i) {
+    workers[0].Assimilate(workers[i]);
+  }
+  workers[0].Finalize(min, max);
+}
+
+// Returns a guide image for "in" (padded). u8 is required for the SAD
+// hardware acceleration; precomputing is faster than converting a window for
+// each pixel.
+SIMD_ATTR Image3B MakeGuide(const Image3F& padded,
+                            const std::array<float, 3>& min,
+                            const std::array<float, 3>& max, ThreadPool* pool) {
+  const size_t xsize = padded.xsize();
+  const size_t ysize = padded.ysize();
+  Image3B guide(xsize, ysize);
+
+  const SIMD_FULL(int32_t) di;
+  const SIMD_FULL(uint32_t) du;
+  const SIMD_PART(uint8_t, df.N) d8;
+
+  float c_min[3];
+  float c_mul[3];
+
+#if EPF_INDEP_RANGE
+  const float channel_scale[3] = {1.0f / 16, 1.0f / 4, 1.0f};
+  for (size_t c = 0; c < 3; ++c) {
+    PIK_CHECK(max[c] >= min[c]);
+    float range = max[c] - min[c];
+    if (range == 0.0f) {
+      // Prevent division by zero. Guide is zero because we subtract min.
+      range = 1.0f;
+    }
+    c_mul[c] = 255.0f * channel_scale[c] / range;
+    c_min[c] = min[c];
+  }
+#else
+  const float all_max = *std::max_element(max.begin(), max.end());
+  const float all_min = *std::min_element(min.begin(), min.end());
+  const float range = all_max - all_min;
+  c_mul[0] = c_mul[1] = c_mul[2] = range == 0.0f ? 1.0f : 255.0f / range;
+  c_min[0] = c_min[1] = c_min[2] = all_min;
+#endif
+
+  RunOnPool(pool, 0, ysize, [&](const int task, const int thread) SIMD_ATTR {
+    const size_t y = task;
+    for (size_t c = 0; c < 3; ++c) {
+      const float* SIMD_RESTRICT padded_row = padded.ConstPlaneRow(c, y);
+      uint8_t* SIMD_RESTRICT guide_row = guide.PlaneRow(c, y);
+
+      const auto vmul = set1(df, c_mul[c]);
+      const auto vmin = set1(df, c_min[c]);
+
+      size_t x = 0;
+      for (; x < xsize; x += df.N) {
+        const auto scaled = (load(df, padded_row + x) - vmin) * vmul;
+        const auto i32 = convert_to(di, scaled);
+        const auto bytes = u8_from_u32(cast_to(du, i32));
+        store(bytes, d8, guide_row + x);
+      }
+
+      // MPSADBW will read 16 bytes but only 11 need be valid;
+      // zero-initialize the rest.
+      for (; x < xsize + 16 - 11; x += df.N) {
+        store(setzero(d8), d8, guide_row + x);
+      }
+
+    }  // c
+  });  // y
+
+  return guide;
+}
+
+static PIK_INLINE int SigmaFromQuant(float signal, float stretch, int lut_id,
+                                     const float* luts, EpfStats* stats) {
+  constexpr size_t kTableSize = 16;
+  // Larger signal => less quantization, less smoothing.
+
+  const float* lut = luts + kTableSize * lut_id;
+
+#if EPF_NEW_SIGMA
+#error "Add new LUT"
+
+#else
+  // baseline
+  const float min_signal = 0.022156f;
+  const float max_signal = 0.531738;
+  const float mul_signal = 29.435892;
+#endif
+  float unscaled_sigma;
+  if (signal <= min_signal) {
+#if EPF_ENABLE_STATS
+    stats->less += 1;
+#endif
+    unscaled_sigma = lut[0];
+  } else if (signal >= max_signal) {
+#if EPF_ENABLE_STATS
+    stats->greater += 1;
+#endif
+    unscaled_sigma = lut[kTableSize - 1];
+  } else {
+    const float pos = (signal - min_signal) * mul_signal;
+    const int64_t trunc = static_cast<int64_t>(pos);
+    PIK_ASSERT(0 <= trunc && trunc < kTableSize);
+    const float frac = pos - trunc;
+    PIK_ASSERT(0.0f <= frac && frac <= 1.0f);
+    unscaled_sigma = frac * lut[trunc + 1] + (1.0f - frac) * lut[trunc];
+  }
+  static const float kBias = 0.5182760822018414;
+  const int sigma = unscaled_sigma * stretch + kBias;
+  // No need to clamp to kMinSigma, we skip blocks with very low sigma.
+  return std::min(sigma, kMaxSigma);
+}
+
+SIMD_ATTR void AdaptiveFilter(const Image3F& in_guide, const Image3F& in,
+                              const ImageI* ac_quant, float quant_scale,
+                              const ImageB& lut_ids,
+                              const AcStrategyImage& ac_strategy,
+                              const EpfParams& epf_params,
+                              Image3F* smoothed, EpfStats* epf_stats) {
+  PIK_ASSERT(SameSize(in, *smoothed));
+  const size_t xsize = smoothed->xsize();
+  const size_t ysize = smoothed->ysize();
+  PIK_CHECK(xsize != 0 && ysize != 0);
+  PIK_CHECK((xsize | ysize) % kBlockDim == 0);
+  const size_t ysize_blocks = DivCeil(ysize, kBlockDim);
+  PROFILER_FUNC;
+
+  PIK_ASSERT(epf_params.enable_adaptive);
+
+  std::array<float, 3> min, max;
+
+  Image3F padded_in(xsize + 2 * kBorder, ysize + 2 * kBorder);
+  MinMax(in, nullptr, &min, &max, &padded_in);
+
+  const size_t padded_in_stride = padded_in.bytes_per_row();
+
+  Image3F padded_guide(xsize + 2 * kBorder, ysize + 2 * kBorder);
+  MinMax(epf_params.use_sharpened ? in : in_guide, nullptr, &min, &max,
+         &padded_guide);
+
+  if (epf_stats != nullptr) {
+    for (int c = 0; c < 3; ++c) {
+      epf_stats->s_ranges[c].Notify(max[c] - min[c]);
+    }
+  }
+  const float all_max = *std::max_element(max.begin(), max.end());
+  const float all_min = *std::min_element(min.begin(), min.end());
+  const float stretch = all_min == all_max ? 1.f : 255.0f / (all_max - all_min);
+
+  Image3B guide = MakeGuide(padded_guide, min, max, nullptr);
+  const size_t guide_stride = guide.bytes_per_row();
+
+#if EPF_DUMP_SIGMA
+  ImageB dump(DivCeil(xsize, kBlockDim), ysize_blocks);
+#endif
+
+#if !EPF_NEW_SIGMA
+  quant_scale = 0.039324273f;
+#endif
+
+  std::vector<EpfStats> all_stats(NumThreads(nullptr));
+
+  const float lut[] = {
+      1.9815775622811198,
+      1.9715084740908622,
+      1.6819963065873933,
+      1.2146133632942862,
+      1.0395364091521881,
+      0.93552327583169714,
+      0.68568655651684773,
+      0.51174440217871964,
+      0.36397262821018583,
+      0.31621830414136975,
+      0.30262954326557712,
+      0.246314237855494,
+      0.21617524864418683,
+      0.10,
+      0.05,
+      0.0,
+      // TODO(robryk): This is a temporary test alternative LUT. Provide actual
+      // alternatives.
+      1.9815775622811198 / 2.0,
+      1.9715084740908622 / 2.0,
+      1.6819963065873933 / 2.0,
+      1.2146133632942862 / 2.0,
+      1.0395364091521881 / 2.0,
+      0.93552327583169714 / 2.0,
+      0.68568655651684773 / 2.0,
+      0.51174440217871964 / 2.0,
+      0.36397262821018583 / 2.0,
+      0.31621830414136975 / 2.0,
+      0.30262954326557712 / 2.0,
+      0.246314237855494 / 2.0,
+      0.21617524864418683 / 2.0,
+      0.10 / 2.0,
+      0.05 / 2.0,
+      0.0 / 2.0,
+  };
+
+	  for(int task = 0; task < ysize_blocks; ++task) {
+        const size_t by = task;
+        EpfStats& stats = all_stats[0];
+        const int* SIMD_RESTRICT ac_quant_row = ac_quant->Row(by);
+        const uint8_t* SIMD_RESTRICT lut_id_row = lut_ids.Row(by);
+        AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(by);
+#if EPF_DUMP_SIGMA
+        uint8_t* dump_row = dump.Row(by);
+#endif
+
+        WeightFast weight_func;
+
+        for (size_t bx = 0; bx < xsize; bx += kBlockDim) {
+          const float ac_q = ac_quant_row[bx / kBlockDim];
+          const int lut_id = lut_id_row[bx / kBlockDim];
+          const AcStrategy ac_strategy = ac_strategy_row[bx / kBlockDim];
+          const float scale = ac_strategy.ARQuantScale();
+          // fprintf(stderr, "%d %d %g (%g %g)\n", by, bx, ac_q, scale,
+          // quant_scale);
+          const float quant = ac_q * scale * quant_scale;
+          const int sigma = SigmaFromQuant(quant, stretch, lut_id, lut, &stats);
+#if EPF_ENABLE_STATS
+          stats.s_sigma.Notify(sigma);
+          stats.s_quant.Notify(quant);
+          stats.total += 1;
+#endif
+#if EPF_DUMP_SIGMA
+          dump_row[bx / kBlockDim] = std::min(std::max(0, sigma), 255);
+#endif
+          if (sigma < kMinSigma) {
+            WeightedSum::CopyOriginalBlock(padded_in, bx, by * kBlockDim,
+                                           smoothed);
+#if EPF_ENABLE_STATS
+            stats.skipped += 1;
+#endif
+            continue;
+          }
+          weight_func.SetSigma(sigma);
+
+          for (size_t iy = 0; iy < kBlockDim; ++iy) {
+            const size_t y = by * kBlockDim + iy;
+            // "guide_m4" and "in_m3" are 4 and 3 rows above the current pixel.
+            const uint8_t* SIMD_RESTRICT guide_m4_r =
+                guide.ConstPlaneRow(0, y + kBorder - 4) + kBorder;
+            const uint8_t* SIMD_RESTRICT guide_m4_g =
+                guide.ConstPlaneRow(1, y + kBorder - 4) + kBorder;
+            const uint8_t* SIMD_RESTRICT guide_m4_b =
+                guide.ConstPlaneRow(2, y + kBorder - 4) + kBorder;
+            const float* SIMD_RESTRICT in_m3_r =
+                padded_in.ConstPlaneRow(0, y - 3 + kBorder) + kBorder;
+            const float* SIMD_RESTRICT in_m3_g =
+                padded_in.ConstPlaneRow(1, y - 3 + kBorder) + kBorder;
+            const float* SIMD_RESTRICT in_m3_b =
+                padded_in.ConstPlaneRow(2, y - 3 + kBorder) + kBorder;
+            float* SIMD_RESTRICT out_r = smoothed->PlaneRow(0, y);
+            float* SIMD_RESTRICT out_g = smoothed->PlaneRow(1, y);
+            float* SIMD_RESTRICT out_b = smoothed->PlaneRow(2, y);
+
+            for (size_t ix = 0; ix < kBlockDim; ++ix) {
+              const size_t x = bx + ix;
+              WeightedSum::Compute(
+                  guide_m4_r + x, guide_m4_g + x, guide_m4_b + x, guide_stride,
+                  in_m3_r + x, in_m3_g + x, in_m3_b + x, padded_in_stride,
+                  weight_func, out_r + x, out_g + x, out_b + x);
+            }  // ix
+          }    // iy
+        }      // bx
+      }        // by
+
+  if (epf_stats != nullptr) {
+    for (EpfStats& stats : all_stats) {
+      epf_stats->Assimilate(stats);
+    }
+  }
+
+#if EPF_DUMP_SIGMA
+  WriteImage(ImageFormatPNG(), dump, "/tmp/out/sigma.png");
+#endif
+}
+
+// Closure for ThreadPool, with mutable per-thread state.
+class FilterWorkers {
+ public:
+  explicit FilterWorkers(size_t num_workers, const Image3B& guide,
+                         const Image3F& in, const int sigma,
+                         Image3F* SIMD_RESTRICT out)
+      : guide_(guide),
+        in_(in),
+        out_(out),
+        // Must use out because in is padded.
+        xsize_(out->xsize()),
+        ysize_(out->ysize()) {
+    guide_stride_ = guide.bytes_per_row();
+
+    in_stride_ = in.bytes_per_row();
+
+    PIK_ASSERT(kMinSigma <= sigma && sigma <= kMaxSigma);
+    weight_func_.SetSigma(sigma);
+  }
+
+  // BLOCK y index ("1" is the second block)
+  SIMD_ATTR void Run(const size_t by, const int thread) {
+    for (size_t bx = 0; bx < xsize_; bx += kBlockDim) {
+      for (size_t iy = 0; iy < kBlockDim; ++iy) {
+        const size_t y = by * kBlockDim + iy;
+        // "guide_m4" and "in_m3" are 4 and 3 rows above the current pixel.
+        const uint8_t* SIMD_RESTRICT guide_m4_r =
+            guide_.ConstPlaneRow(0, y + kBorder - 4) + kBorder;
+        const uint8_t* SIMD_RESTRICT guide_m4_g =
+            guide_.ConstPlaneRow(1, y + kBorder - 4) + kBorder;
+        const uint8_t* SIMD_RESTRICT guide_m4_b =
+            guide_.ConstPlaneRow(2, y + kBorder - 4) + kBorder;
+        const float* SIMD_RESTRICT in_m3_r =
+            in_.ConstPlaneRow(0, y - 3 + kBorder) + kBorder;
+        const float* SIMD_RESTRICT in_m3_g =
+            in_.ConstPlaneRow(1, y - 3 + kBorder) + kBorder;
+        const float* SIMD_RESTRICT in_m3_b =
+            in_.ConstPlaneRow(2, y - 3 + kBorder) + kBorder;
+        float* SIMD_RESTRICT out_r = out_->PlaneRow(0, y);
+        float* SIMD_RESTRICT out_g = out_->PlaneRow(1, y);
+        float* SIMD_RESTRICT out_b = out_->PlaneRow(2, y);
+
+        for (size_t ix = 0; ix < kBlockDim; ++ix) {
+          const size_t x = bx + ix;
+          WeightedSum::Compute(guide_m4_r + x, guide_m4_g + x, guide_m4_b + x,
+                               guide_stride_, in_m3_r + x, in_m3_g + x,
+                               in_m3_b + x, in_stride_, weight_func_, out_r + x,
+                               out_g + x, out_b + x);
+        }
+      }
+    }
+  }
+
+ private:
+  const Image3B& guide_;
+  const Image3F& in_;
+  size_t guide_stride_;
+  size_t in_stride_;
+  Image3F* SIMD_RESTRICT out_;
+
+  size_t xsize_;
+  size_t ysize_;
+  WeightFast weight_func_;
+};
+
+void Filter(const Image3F& in_guide, const Image3F& in,
+            const EpfParams& epf_params, float* PIK_RESTRICT stretch,
+            Image3F* smoothed) {
+  PIK_ASSERT(SameSize(in, *smoothed));
+  const size_t xsize = smoothed->xsize();
+  const size_t ysize = smoothed->ysize();
+  PIK_CHECK(xsize != 0 && ysize != 0);
+  PIK_CHECK((xsize | ysize) % kBlockDim == 0);
+  const size_t ysize_blocks = DivCeil(ysize, kBlockDim);
+  PROFILER_FUNC;
+
+  PIK_ASSERT(!epf_params.enable_adaptive);
+  if (epf_params.sigma == 0) {
+    CopyImageTo(in, smoothed);
+    *stretch = 1.0f;
+    return;
+  }
+
+  std::array<float, 3> min, max;
+  Image3F padded_in(xsize + 2 * kBorder, ysize + 2 * kBorder);
+  MinMax(in, /*pool=*/nullptr, &min, &max, &padded_in);
+
+  Image3F padded_guide(xsize + 2 * kBorder, ysize + 2 * kBorder);
+  MinMax(epf_params.use_sharpened ? in : in_guide, /*pool=*/nullptr, &min, &max,
+         &padded_guide);
+
+  const float all_max = *std::max_element(max.begin(), max.end());
+  const float all_min = *std::min_element(min.begin(), min.end());
+  *stretch = all_min == all_max ? 1.0f : 255.0f / (all_max - all_min);
+
+  Image3B guide = MakeGuide(padded_guide, min, max, /*pool=*/nullptr);
+
+  FilterWorkers workers(1, guide, padded_in, epf_params.sigma, smoothed);
+  for (size_t y = 0; y < ysize_blocks; ++y) {
+    workers.Run(y, /*thread=*/0);
+  }
+}
+
+}  // namespace
+}  // namespace SIMD_NAMESPACE
+
+template <>
+void InitEdgePreservingFilter::operator()<SIMD_TARGET>() const {
+  SIMD_NAMESPACE::MulTable::Init();
+}
+
+template <>
+void EdgePreservingFilter::operator()<SIMD_TARGET>(
+    const Image3F& in_guide, const Image3F& in, const ImageI* ac_quant,
+    float sigma_mul, const ImageB& lut_ids, const AcStrategyImage& ac_strategy,
+    const EpfParams& epf_params, Image3F* smoothed,
+    EpfStats* epf_stats) const {
+  SIMD_NAMESPACE::AdaptiveFilter(in_guide, in, ac_quant, sigma_mul, lut_ids,
+                                 ac_strategy, epf_params, smoothed,
+                                 epf_stats);
+}
+
+template <>
+void EdgePreservingFilter::operator()<SIMD_TARGET>(const Image3F& in_guide,
+                                                   const Image3F& in,
+                                                   const EpfParams& epf_params,
+                                                   float* PIK_RESTRICT stretch,
+                                                   Image3F* smoothed) const {
+  SIMD_NAMESPACE::Filter(in_guide, in, epf_params, stretch, smoothed);
+}
+
+template <>
+void EdgePreservingFilterTest::operator()<SIMD_TARGET>() const {
+  SIMD_NAMESPACE::InternalWeightTests::Run();
+  SIMD_NAMESPACE::WeightedSum::Test();
+  fprintf(stderr, "Tests OK: %s\n", vec_name<SIMD_NAMESPACE::DF>());
+}
+
+template <>
+float EdgePreservingFilterTest::operator()<SIMD_TARGET>(int sigma,
+                                                        int sad) const {
+  SIMD_NAMESPACE::MulTable::Init();
+  SIMD_NAMESPACE::WeightFast weight_func;
+  weight_func.SetSigma(sigma);
+  return SIMD_NAMESPACE::GetWeightForTest(weight_func, sad);
+}
+
+}  // namespace pik
+
+#endif  // SIMD_ATTR_IMPL
diff --git a/codec/L2/demos/pikEnc/host/pik/external_image.cc b/codec/L2/demos/pikEnc/host/pik/external_image.cc
new file mode 100755
index 0000000000..0aff78f773
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/external_image.cc
@@ -0,0 +1,1227 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/external_image.h"
+
+#include <string.h>
+
+#include "pik/byte_order.h"
+#include "pik/cache_aligned.h"
+
+namespace pik {
+namespace {
+
+#define PIK_EXT_VERBOSE 0
+
+#if PIK_EXT_VERBOSE
+// For printing RGB values at this X within each line.
+constexpr size_t kX = 1;
+#endif
+
+// Encoding CodecInOut using other codecs requires format conversions to their
+// "External" representation:
+// IO -[1]-> Temp01 -[CMS]-> Temp01 -[2dt]-> External
+// For External -> IO, we need only demux and rescale.
+//
+// "Temp01" and "Temp255" are interleaved and have 1 or 3 non-alpha channels.
+// Alpha is included in External but not Temp because it is neither color-
+// transformed nor included in Image3F.
+// "IO" is Image3F (range [0, 255]) + ImageU alpha.
+//
+// "Temp01" is in range float [0, 1] as required by the CMS, but cannot
+// losslessly represent 8-bit integer values [0, 255] due to floating point
+// precision, which will reflect as a loss in Image3F which uses float range
+// [0, 255] instead, which may cause effects on butteraugli score. Therefore,
+// only use Temp01 if CMS transformation to different color space is required.
+//
+// "Temp255" is in range float [0, 255] and can losslessly represent 8-bit
+// integer values [0, 255], but has floating point loss for 16-bit integer
+// values [0, 65535]. The latter is not an issue however since Image3F uses
+// float [0, 255] so has the same loss (so no butteraugli score effect), and
+// the loss is gone when outputting to external integer again.
+//
+// Summary of formats:
+//   Name   |   Bits  |    Max   | Channels |   Layout    |  Alpha
+// ---------+---------+----------+----------+-------------+---------
+// External | 8,16,32 | 2^Bits-1 |  1,2,3,4 | Interleaved | Included
+//  Temp01  |    32   |     1    |    1,3   | Interleaved | Separate
+// Temp255  |    32   |    255   |    1,3   | Interleaved | Separate
+//    IO    |    32   |    255   |    3,4   |   Planar    |  ImageU
+
+// Number of external channels including alpha.
+struct Channels1 {
+  static const char* Name() { return "1"; }
+};
+struct Channels2 {
+  static const char* Name() { return "2"; }
+};
+struct Channels3 {
+  static const char* Name() { return "3"; }
+};
+struct Channels4 {
+  static const char* Name() { return "4"; }
+};
+
+// Step 1: interleaved <-> planar and rescale [0, 1] <-> [0, 255]
+struct Interleave {
+  static PIK_INLINE void Image3ToTemp01(Channels1, const size_t y,
+                                        const Image3F& image, const Rect& rect,
+                                        float* PIK_RESTRICT row_temp) {
+    const float* PIK_RESTRICT row_image1 = rect.ConstPlaneRow(image, 1, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      row_temp[x] = row_image1[x] * (1.0f / 255);
+    }
+  }
+
+  static PIK_INLINE void Image3ToTemp01(Channels3, const size_t y,
+                                        const Image3F& image, const Rect& rect,
+                                        float* PIK_RESTRICT row_temp) {
+    const float* PIK_RESTRICT row_image0 = rect.ConstPlaneRow(image, 0, y);
+    const float* PIK_RESTRICT row_image1 = rect.ConstPlaneRow(image, 1, y);
+    const float* PIK_RESTRICT row_image2 = rect.ConstPlaneRow(image, 2, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      row_temp[3 * x + 0] = row_image0[x] * (1.0f / 255);
+      row_temp[3 * x + 1] = row_image1[x] * (1.0f / 255);
+      row_temp[3 * x + 2] = row_image2[x] * (1.0f / 255);
+    }
+  }
+
+  // Same implementation for 2/4 because neither Image3 nor Temp have alpha.
+  static PIK_INLINE void Image3ToTemp01(Channels2, const size_t y,
+                                        const Image3F& image, const Rect& rect,
+                                        float* PIK_RESTRICT row_temp) {
+    Image3ToTemp01(Channels1(), y, image, rect, row_temp);
+  }
+
+  static PIK_INLINE void Image3ToTemp01(Channels4, const size_t y,
+                                        const Image3F& image, const Rect& rect,
+                                        float* PIK_RESTRICT row_temp) {
+    Image3ToTemp01(Channels3(), y, image, rect, row_temp);
+  }
+
+  static PIK_INLINE void Temp255ToImage3(Channels1,
+                                         const float* PIK_RESTRICT row_temp,
+                                         size_t y,
+                                         const Image3F* PIK_RESTRICT image) {
+    const size_t xsize = image->xsize();
+    float* PIK_RESTRICT row0 = const_cast<float*>(image->PlaneRow(0, y));
+    for (size_t x = 0; x < xsize; ++x) {
+      row0[x] = row_temp[x];
+    }
+
+    for (size_t c = 1; c < 3; ++c) {
+      float* PIK_RESTRICT row = const_cast<float*>(image->PlaneRow(c, y));
+      memcpy(row, row0, xsize * sizeof(float));
+    }
+  }
+
+  static PIK_INLINE void Temp255ToImage3(Channels3,
+                                         const float* PIK_RESTRICT row_temp,
+                                         size_t y,
+                                         const Image3F* PIK_RESTRICT image) {
+    float* PIK_RESTRICT row_image0 = const_cast<float*>(image->PlaneRow(0, y));
+    float* PIK_RESTRICT row_image1 = const_cast<float*>(image->PlaneRow(1, y));
+    float* PIK_RESTRICT row_image2 = const_cast<float*>(image->PlaneRow(2, y));
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row_image0[x] = row_temp[3 * x + 0];
+      row_image1[x] = row_temp[3 * x + 1];
+      row_image2[x] = row_temp[3 * x + 2];
+    }
+  }
+
+  static PIK_INLINE void Temp255ToImage3(Channels2,
+                                         const float* PIK_RESTRICT row_temp,
+                                         size_t y,
+                                         const Image3F* PIK_RESTRICT image) {
+    Temp255ToImage3(Channels1(), row_temp, y, image);
+  }
+
+  static PIK_INLINE void Temp255ToImage3(Channels4,
+                                         const float* PIK_RESTRICT row_temp,
+                                         size_t y,
+                                         const Image3F* PIK_RESTRICT image) {
+    Temp255ToImage3(Channels3(), row_temp, y, image);
+  }
+
+};
+
+// Step 2t: type conversion
+
+// Same naming convention as Image: B=u8, U=u16, F=f32. kSize enables generic
+// functions with Type and Order template arguments.
+struct TypeB {
+  static const char* Name() { return "B"; }
+  static constexpr size_t kSize = 1;
+  static constexpr uint16_t kMaxAlpha = 0xFF;
+};
+struct TypeU {
+  static const char* Name() { return "U"; }
+  static constexpr size_t kSize = 2;
+  static constexpr uint16_t kMaxAlpha = 0xFFFF;
+};
+struct TypeF {
+  static const char* Name() { return "F"; }
+  static constexpr size_t kSize = 4;
+  static constexpr uint16_t kMaxAlpha = 0xFFFF;
+};
+
+// Load/stores float "sample" (gray/color) from/to u8/u16/float.
+struct Sample {
+  template <class Order>
+  static PIK_INLINE float FromExternal(TypeB, const uint8_t* external) {
+    return *external;
+  }
+
+  template <class Order>
+  static PIK_INLINE float FromExternal(TypeU, const uint8_t* external) {
+    return Load16(Order(), external);
+  }
+
+  template <class Order>
+  static PIK_INLINE float FromExternal(TypeF, const uint8_t* external) {
+    const int32_t bits = Load32(Order(), external);
+    float sample;
+    memcpy(&sample, &bits, 4);
+    return sample;
+  }
+
+  template <class Order>
+  static PIK_INLINE void ToExternal(TypeB, const float sample,
+                                    uint8_t* external) {
+    PIK_ASSERT(0 <= sample && sample < 256);
+    // Don't need std::round since sample value is positive.
+    *external = static_cast<int>(sample + 0.5f);
+  }
+
+  template <class Order>
+  static PIK_INLINE void ToExternal(TypeU, const float sample,
+                                    uint8_t* external) {
+    PIK_ASSERT(0 <= sample && sample < 65536);
+    // Don't need std::round since sample value is positive.
+    Store16(Order(), static_cast<int>(sample + 0.5f), external);
+  }
+
+  template <class Order>
+  static PIK_INLINE void ToExternal(TypeF, const float sample,
+                                    uint8_t* external) {
+    int32_t bits;
+    memcpy(&bits, &sample, 4);
+    Store32(Order(), bits, external);
+  }
+};
+
+// Load/stores uint32_t (8/16-bit range) "alpha" from/to u8/u16. Lossless.
+struct Alpha {
+  // Per-thread alpha statistics.
+  struct Stats {
+    // Bitwise AND of all alpha values; used to detect all-opaque alpha.
+    uint32_t and_bits = 0xFFFF;
+
+    // Bitwise OR; used to detect out of bounds values (i.e. > 255 for 8-bit).
+    uint32_t or_bits = 0;
+
+    // Prevents false sharing.
+    uint8_t pad[CacheAligned::kAlignment - sizeof(and_bits) - sizeof(or_bits)];
+  };
+
+  static PIK_INLINE uint32_t FromExternal(TypeB, OrderLE,
+                                          const uint8_t* external) {
+    return *external;
+  }
+
+  // Any larger type implies 16-bit alpha. NOTE: if TypeF, the alpha is smaller
+  // than other external values (subsequent bytes are uninitialized/ignored).
+  template <typename Type, class Order>
+  static PIK_INLINE uint32_t FromExternal(Type, Order,
+                                          const uint8_t* external) {
+    const uint32_t alpha = Load16(Order(), external);
+    return alpha;
+  }
+
+  static PIK_INLINE void ToExternal(TypeB, OrderLE, const uint32_t alpha,
+                                    uint8_t* external) {
+    PIK_ASSERT(alpha < 256);
+    *external = alpha;
+  }
+
+  // Any larger type implies 16-bit alpha. NOTE: if TypeF, the alpha is smaller
+  // than other external values (subsequent bytes are uninitialized/ignored).
+  template <typename Type, class Order>
+  static PIK_INLINE void ToExternal(Type, Order, const uint32_t alpha,
+                                    uint8_t* external) {
+    Store16(Order(), alpha, external);
+  }
+};
+
+// Step 2d: demux external into separate (type-converted) color and alpha.
+// Supports Temp01 and Temp255, the Cast decides this.
+struct Demux {
+  // 1 plane - copy all.
+  template <class Type, class Order, class Cast>
+  static PIK_INLINE void ExternalToTemp(Type type, Order order, Channels1,
+                                        const size_t xsize,
+                                        const uint8_t* external,
+                                        const Cast cast,
+                                        float* PIK_RESTRICT row_temp) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const float rounded =
+          Sample::FromExternal<Order>(type, external + x * Type::kSize);
+      row_temp[x] = cast.FromExternal(rounded, 0);
+    }
+  }
+  template <class Type, class Order, class Cast>
+  static PIK_INLINE void TempToExternal(Type type, Order order, Channels1,
+                                        const size_t xsize,
+                                        const float* PIK_RESTRICT row_temp,
+                                        const Cast cast,
+                                        uint8_t* row_external) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const float sample = cast.FromTemp(row_temp[x], 0);
+      Sample::ToExternal<Order>(type, sample, row_external + x * Type::kSize);
+    }
+  }
+
+  // 2 planes - ignore alpha.
+  template <class Type, class Order, class Cast>
+  static PIK_INLINE void ExternalToTemp(Type type, Order order, Channels2,
+                                        const size_t xsize,
+                                        const uint8_t* external,
+                                        const Cast cast,
+                                        float* PIK_RESTRICT row_temp) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const float rounded = Sample::FromExternal<Order>(
+          type, external + (2 * x + 0) * Type::kSize);
+      row_temp[x] = cast.FromExternal(rounded, 0);
+    }
+  }
+  template <class Type, class Order, class Cast>
+  static PIK_INLINE void TempToExternal(Type type, Order order, Channels2,
+                                        const size_t xsize,
+                                        const float* PIK_RESTRICT row_temp,
+                                        const Cast cast,
+                                        uint8_t* row_external) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const float sample = cast.FromTemp(row_temp[x], 0);
+      Sample::ToExternal<Order>(type, sample,
+                                row_external + (2 * x + 0) * Type::kSize);
+    }
+  }
+
+  // 3 planes - copy all.
+  template <class Type, class Order, class Cast>
+  static PIK_INLINE void ExternalToTemp(Type type, Order order, Channels3,
+                                        const size_t xsize,
+                                        const uint8_t* external,
+                                        const Cast cast,
+                                        float* PIK_RESTRICT row_temp) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const float rounded0 = Sample::FromExternal<Order>(
+          type, external + (3 * x + 0) * Type::kSize);
+      const float rounded1 = Sample::FromExternal<Order>(
+          type, external + (3 * x + 1) * Type::kSize);
+      const float rounded2 = Sample::FromExternal<Order>(
+          type, external + (3 * x + 2) * Type::kSize);
+      row_temp[3 * x + 0] = cast.FromExternal(rounded0, 0);
+      row_temp[3 * x + 1] = cast.FromExternal(rounded1, 1);
+      row_temp[3 * x + 2] = cast.FromExternal(rounded2, 2);
+    }
+  }
+  template <class Type, class Order, class Cast>
+  static PIK_INLINE void TempToExternal(Type type, Order order, Channels3,
+                                        const size_t xsize,
+                                        const float* PIK_RESTRICT row_temp,
+                                        const Cast cast,
+                                        uint8_t* row_external) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const float sample0 = cast.FromTemp(row_temp[3 * x + 0], 0);
+      const float sample1 = cast.FromTemp(row_temp[3 * x + 1], 1);
+      const float sample2 = cast.FromTemp(row_temp[3 * x + 2], 2);
+      Sample::ToExternal<Order>(type, sample0,
+                                row_external + (3 * x + 0) * Type::kSize);
+      Sample::ToExternal<Order>(type, sample1,
+                                row_external + (3 * x + 1) * Type::kSize);
+      Sample::ToExternal<Order>(type, sample2,
+                                row_external + (3 * x + 2) * Type::kSize);
+    }
+  }
+
+  // 4 planes - ignore alpha.
+  template <class Type, class Order, class Cast>
+  static PIK_INLINE void ExternalToTemp(Type type, Order order, Channels4,
+                                        const size_t xsize,
+                                        const uint8_t* external,
+                                        const Cast cast,
+                                        float* PIK_RESTRICT row_temp) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const float rounded0 = Sample::FromExternal<Order>(
+          type, external + (4 * x + 0) * Type::kSize);
+      const float rounded1 = Sample::FromExternal<Order>(
+          type, external + (4 * x + 1) * Type::kSize);
+      const float rounded2 = Sample::FromExternal<Order>(
+          type, external + (4 * x + 2) * Type::kSize);
+      row_temp[3 * x + 0] = cast.FromExternal(rounded0, 0);
+      row_temp[3 * x + 1] = cast.FromExternal(rounded1, 1);
+      row_temp[3 * x + 2] = cast.FromExternal(rounded2, 2);
+    }
+  }
+  template <class Type, class Order, class Cast>
+  static PIK_INLINE void TempToExternal(Type type, Order order, Channels4,
+                                        const size_t xsize,
+                                        const float* PIK_RESTRICT row_temp,
+                                        const Cast cast,
+                                        uint8_t* row_external) {
+    for (size_t x = 0; x < xsize; ++x) {
+      const float sample0 = cast.FromTemp(row_temp[3 * x + 0], 0);
+      const float sample1 = cast.FromTemp(row_temp[3 * x + 1], 1);
+      const float sample2 = cast.FromTemp(row_temp[3 * x + 2], 2);
+      Sample::ToExternal<Order>(type, sample0,
+                                row_external + (4 * x + 0) * Type::kSize);
+      Sample::ToExternal<Order>(type, sample1,
+                                row_external + (4 * x + 1) * Type::kSize);
+      Sample::ToExternal<Order>(type, sample2,
+                                row_external + (4 * x + 2) * Type::kSize);
+    }
+  }
+
+  // Gray only, no alpha.
+  template <class Type, class Order>
+  static PIK_INLINE void ExternalToAlpha(Type type, Order order, Channels1,
+                                         const size_t xsize,
+                                         const uint8_t* external,
+                                         uint16_t* PIK_RESTRICT row_alpha,
+                                         const size_t thread,
+                                         std::vector<Alpha::Stats>* stats) {}
+  template <class Type, class Order>
+  static PIK_INLINE void AlphaToExternal(Type type, Order order, Channels1,
+                                         const size_t xsize,
+                                         const uint16_t* PIK_RESTRICT row_alpha,
+                                         uint8_t* row_external) {}
+
+  // Gray + alpha.
+  template <class Type, class Order>
+  static PIK_INLINE void ExternalToAlpha(Type type, Order order, Channels2,
+                                         const size_t xsize,
+                                         const uint8_t* external,
+                                         uint16_t* PIK_RESTRICT row_alpha,
+                                         const size_t thread,
+                                         std::vector<Alpha::Stats>* stats) {
+    if (row_alpha == nullptr) return;
+    uint32_t and_bits = 0xFFFF;
+    uint32_t or_bits = 0;
+    for (size_t x = 0; x < xsize; ++x) {
+      const uint32_t alpha = Alpha::FromExternal(
+          type, order, external + (2 * x + 1) * Type::kSize);
+      and_bits &= alpha;
+      or_bits |= alpha;
+      row_alpha[x] = alpha;
+    }
+    (*stats)[thread].and_bits &= and_bits;
+    (*stats)[thread].or_bits |= or_bits;
+  }
+  template <class Type, class Order>
+  static PIK_INLINE void AlphaToExternal(Type type, Order order, Channels2,
+                                         const size_t xsize,
+                                         const uint16_t* PIK_RESTRICT row_alpha,
+                                         uint8_t* row_external) {
+    if (row_alpha == nullptr) {
+      for (size_t x = 0; x < xsize; ++x) {
+        Alpha::ToExternal(type, order, type.kMaxAlpha,
+                          row_external + (2 * x + 1) * Type::kSize);
+      }
+    } else {
+      for (size_t x = 0; x < xsize; ++x) {
+        Alpha::ToExternal(type, order, row_alpha[x],
+                          row_external + (2 * x + 1) * Type::kSize);
+      }
+    }
+  }
+
+  // RGB only, no alpha.
+  template <class Type, class Order>
+  static PIK_INLINE void ExternalToAlpha(Type type, Order order, Channels3,
+                                         const size_t xsize,
+                                         const uint8_t* external,
+                                         uint16_t* PIK_RESTRICT row_alpha,
+                                         const size_t thread,
+                                         std::vector<Alpha::Stats>* stats) {}
+  template <class Type, class Order>
+  static PIK_INLINE void AlphaToExternal(Type type, Order order, Channels3,
+                                         const size_t xsize,
+                                         const uint16_t* PIK_RESTRICT row_alpha,
+                                         uint8_t* row_external) {}
+
+  // RGBA.
+  template <class Type, class Order>
+  static PIK_INLINE void ExternalToAlpha(Type type, Order order, Channels4,
+                                         const size_t xsize,
+                                         const uint8_t* external,
+                                         uint16_t* PIK_RESTRICT row_alpha,
+                                         const size_t thread,
+                                         std::vector<Alpha::Stats>* stats) {
+    if (row_alpha == nullptr) return;
+    uint32_t and_bits = 0xFFFF;
+    uint32_t or_bits = 0;
+    for (size_t x = 0; x < xsize; ++x) {
+      const uint32_t alpha = Alpha::FromExternal(
+          type, order, external + (4 * x + 3) * Type::kSize);
+      and_bits &= alpha;
+      or_bits |= alpha;
+      row_alpha[x] = alpha;
+    }
+    (*stats)[thread].and_bits &= and_bits;
+    (*stats)[thread].or_bits |= or_bits;
+  }
+  template <class Type, class Order>
+  static PIK_INLINE void AlphaToExternal(Type type, Order order, Channels4,
+                                         const size_t xsize,
+                                         const uint16_t* PIK_RESTRICT row_alpha,
+                                         uint8_t* row_external) {
+    if (row_alpha == nullptr) {
+      for (size_t x = 0; x < xsize; ++x) {
+        Alpha::ToExternal(type, order, type.kMaxAlpha,
+                          row_external + (4 * x + 3) * Type::kSize);
+      }
+    } else {
+      for (size_t x = 0; x < xsize; ++x) {
+        Alpha::ToExternal(type, order, row_alpha[x],
+                          row_external + (4 * x + 3) * Type::kSize);
+      }
+    }
+  }
+};
+
+// Used to select the Transformer::DoRow overload to call.
+struct ToExternal1 {};  // first phase: store to temp and compute min/max.
+struct ToExternal2 {};  // second phase: rescale temp to external.
+struct ToExternal {};   // single-pass, only usable with CastClip.
+
+// For ToExternal - assumes known/static extents of temp values.
+struct ExtentsStatic {};
+
+// For ToExternal1 - computes extents of temp values.
+class ExtentsDynamic {
+ public:
+  ExtentsDynamic(const size_t xsize, const size_t ysize,
+                 const size_t num_threads, const ColorEncoding& c_desired)
+      : temp_intervals_(c_desired.Channels()) {
+    // Store all temp pixels here, convert to external in a second phase after
+    // Finalize computes ChannelIntervals from min_max_.
+    temp_ = ImageF(xsize * temp_intervals_, ysize);
+
+    min_max_.resize(num_threads);
+  }
+
+  float* PIK_RESTRICT RowTemp(const size_t y) { return temp_.Row(y); }
+
+  // Row size is obtained from temp_. NOTE: clamps temp values to kMax.
+  PIK_INLINE void Update(const size_t thread, float* PIK_RESTRICT row_temp) {
+    // row_temp is interleaved - keep track of current channel.
+    size_t c = 0;
+    for (size_t i = 0; i < temp_.xsize(); ++i, ++c) {
+      if (c == temp_intervals_) c = 0;
+      if (row_temp[i] > min_max_[thread].max[c]) {
+        if (row_temp[i] > kMax) row_temp[i] = kMax;
+        min_max_[thread].max[c] = row_temp[i];
+      }
+      if (row_temp[i] < min_max_[thread].min[c]) {
+        if (row_temp[i] < -kMax) row_temp[i] = -kMax;
+        min_max_[thread].min[c] = row_temp[i];
+      }
+    }
+  }
+
+  void Finalize(CodecIntervals* temp_intervals) const {
+    // Any other ChannelInterval remains default-initialized.
+    for (size_t c = 0; c < temp_intervals_; ++c) {
+      float min = min_max_[0].min[c];
+      float max = min_max_[0].max[c];
+      for (size_t i = 1; i < min_max_.size(); ++i) {
+        min = std::min(min, min_max_[i].min[c]);
+        max = std::max(max, min_max_[i].max[c]);
+      }
+      // Update ensured these are clamped.
+      PIK_ASSERT(-kMax <= min && min <= max && max <= kMax);
+      (*temp_intervals)[c] = CodecInterval(min, max);
+    }
+  }
+
+ private:
+  // Larger values are probably invalid, so clamp to preserve some precision.
+  static constexpr float kMax = 1E10;
+
+  struct MinMax {
+    MinMax() {
+      for (size_t c = 0; c < 4; ++c) {
+        min[c] = kMax;
+        max[c] = -kMax;
+      }
+    }
+
+    float min[4];
+    float max[4];
+    // Prevents false sharing.
+    uint8_t pad[CacheAligned::kAlignment - sizeof(min) - sizeof(max)];
+  };
+
+  const size_t temp_intervals_;
+  ImageF temp_;
+  std::vector<MinMax> min_max_;
+};
+
+// For ToExternal1, which updates ExtentsDynamic without casting.
+struct CastUnused {};
+
+// Returns range of valid values for all channel.
+CodecInterval GetInterval(const size_t bits_per_sample) {
+  if (bits_per_sample == 32) {
+    // This ensures ConvertImage produces an image with the same [0, 255]
+    // range as its input, but increases round trip error by ~2x vs [0, 1].
+    return CodecInterval(0.0f, 255.0f);
+  } else {
+    const float max = (1U << bits_per_sample) - 1;
+    return CodecInterval(0, max);
+  }
+}
+
+
+// Lossless conversion between [0, 1] and [min, min+width]. Width is 1 or
+// > 1 ("unbounded", useful for round trip testing). This is used to scale to
+// the external type and back to the arbitrary interval.
+class CastRescale01 {
+ public:
+  static const char* Name() { return "Rescale01"; }
+  CastRescale01(const CodecIntervals& temp_intervals,
+                const CodecInterval ext_interval) {
+    for (size_t c = 0; c < 4; ++c) {
+      temp_min_[c] = temp_intervals[c].min;
+      temp_mul_[c] = ext_interval.width / temp_intervals[c].width;
+      external_min_[c] = ext_interval.min;
+      external_mul_[c] = temp_intervals[c].width / ext_interval.width;
+    }
+#if PIK_EXT_VERBOSE >= 2
+    printf("CastRescale01 min %f width %f %f\n", temp_intervals[0].min,
+           temp_intervals[0].width, ext_interval.width);
+#endif
+  }
+
+  PIK_INLINE float FromExternal(const float external, const size_t c) const {
+    return (external - external_min_[c]) * external_mul_[c] + temp_min_[c];
+  }
+  PIK_INLINE float FromTemp(const float temp, const size_t c) const {
+    return (temp - temp_min_[c]) * temp_mul_[c] + external_min_[c];
+  }
+
+ private:
+  float temp_min_[4];
+  float temp_mul_[4];
+  float external_min_[4];
+  float external_mul_[4];
+};
+
+
+// Lossless conversion between [0, 255] and [min, min+width]. Width is 255 or
+// > 255 ("unbounded", useful for round trip testing). This is used to scale to
+// the external type and back to the arbitrary interval.
+// NOTE: this rescaler exists to make CopyTo match the convention of
+// "temp_intervals" used by the color converting constructor. In the external to
+// IO case without color conversion, one normally does not use this parameter.
+class CastRescale255 {
+ public:
+  static const char* Name() { return "Rescale255"; }
+  CastRescale255(const CodecIntervals& temp_intervals,
+                 const CodecInterval ext_interval) {
+    for (size_t c = 0; c < 4; ++c) {
+      temp_min_[c] = 255.0f * temp_intervals[c].min;
+      temp_mul_[c] =
+          ext_interval.width / temp_intervals[c].width * (1.0f / 255);
+      external_min_[c] = ext_interval.min * (1.0f / 255);
+      external_mul_[c] = 255.0f * temp_intervals[c].width / ext_interval.width;
+    }
+#if PIK_EXT_VERBOSE >= 2
+    printf("CastRescale255 min %f width %f %f\n", temp_intervals[0].min,
+           temp_intervals[0].width, ext_interval.width);
+#endif
+  }
+
+  PIK_INLINE float FromExternal(const float external, const size_t c) const {
+    return (external - external_min_[c]) * external_mul_[c] + temp_min_[c];
+  }
+  PIK_INLINE float FromTemp(const float temp, const size_t c) const {
+    return (temp - temp_min_[c]) * temp_mul_[c] + external_min_[c];
+  }
+
+ private:
+  float temp_min_[4];
+  float temp_mul_[4];
+  float external_min_[4];
+  float external_mul_[4];
+};
+
+// Converts between [0, 1] and the external type's range. Lossy because values
+// outside [0, 1] are clamped - this is necessary for codecs that are not able
+// to store min/width metadata.
+class CastClip01 {
+ public:
+  static const char* Name() { return "Clip01"; }
+  CastClip01(const CodecInterval ext_interval) {
+    for (size_t c = 0; c < 4; ++c) {
+      temp_mul_[c] = ext_interval.width;
+      external_min_[c] = ext_interval.min;
+      external_mul_[c] = 1.0f / ext_interval.width;
+    }
+#if PIK_EXT_VERBOSE >= 2
+    printf("CastClip01 width %f\n", ext_interval.width);
+#endif
+  }
+
+  PIK_INLINE float FromExternal(const float external, const size_t c) const {
+    const float temp01 = (external - external_min_[c]) * external_mul_[c];
+    return temp01;
+  }
+  PIK_INLINE float FromTemp(const float temp, const size_t c) const {
+    return Clamp01(temp) * temp_mul_[c] + external_min_[c];
+  }
+
+ private:
+  static PIK_INLINE float Clamp01(const float temp) {
+    return std::min(std::max(0.0f, temp), 1.0f);
+  }
+
+  float temp_mul_[4];
+  float external_min_[4];
+  float external_mul_[4];
+};
+
+struct CastFloat {
+  static const char* Name() { return "Float"; }
+  CastFloat(const CodecInterval ext_interval) {
+    for (size_t c = 0; c < 4; ++c) {
+      PIK_CHECK(ext_interval.min == 0.0f);
+      PIK_CHECK(ext_interval.width == 255.0f);
+    }
+#if PIK_EXT_VERBOSE >= 2
+    printf("CastFloat\n");
+#endif
+  }
+
+  PIK_INLINE float FromExternal(const float external, const size_t c) const {
+    const float temp01 = external * (1.0f / 255);
+    return temp01;
+  }
+  PIK_INLINE float FromTemp(const float temp, const size_t c) const {
+    return temp * 255.0f;
+  }
+};
+
+// Converts between [0, 255] and the external type's range. Lossy because values
+// outside [0, 255] are clamped - this is necessary for codecs that are not able
+// to store min/width metadata.
+class CastClip255 {
+ public:
+  static const char* Name() { return "Clip255"; }
+  CastClip255(const CodecInterval ext_interval) {
+    for (size_t c = 0; c < 4; ++c) {
+      temp_mul_[c] = ext_interval.width;
+      external_min_[c] = ext_interval.min;
+      external_mul_[c] = 255.0f / ext_interval.width;
+    }
+#if PIK_EXT_VERBOSE >= 2
+    printf("CastClip255 width %f\n", ext_interval.width);
+#endif
+  }
+
+  PIK_INLINE float FromExternal(const float external, const size_t c) const {
+    const float temp255 = (external - external_min_[c]) * external_mul_[c];
+    return temp255;
+  }
+  PIK_INLINE float FromTemp(const float temp, const size_t c) const {
+    return Clamp255(temp) * temp_mul_[c] + external_min_[c];
+  }
+
+ private:
+  static PIK_INLINE float Clamp255(const float temp) {
+    return std::min(std::max(0.0f, temp), 255.0f);
+  }
+
+  float temp_mul_[4];
+  float external_min_[4];
+  float external_mul_[4];
+};
+
+struct CastFloat01 {
+  static const char* Name() { return "Float01"; }
+  CastFloat01(const CodecInterval ext_interval) {
+    for (size_t c = 0; c < 4; ++c) {
+      PIK_CHECK(ext_interval.min == 0.0f);
+      PIK_CHECK(ext_interval.width == 255.0f);
+    }
+#if PIK_EXT_VERBOSE >= 2
+    printf("CastFloat01\n");
+#endif
+  }
+
+  PIK_INLINE float FromExternal(const float external, const size_t c) const {
+    const float temp01 = external * (1.0f / 255);
+    return temp01;
+  }
+  PIK_INLINE float FromTemp(const float temp, const size_t c) const {
+    return temp * 255.0f;
+  }
+};
+
+// No-op
+struct CastFloat255 {
+  static const char* Name() { return "Float255"; }
+  CastFloat255(const CodecInterval ext_interval) {
+    for (size_t c = 0; c < 4; ++c) {
+      PIK_CHECK(ext_interval.min == 0.0f);
+      PIK_CHECK(ext_interval.width == 255.0f);
+    }
+#if PIK_EXT_VERBOSE >= 2
+    printf("CastFloat255\n");
+#endif
+  }
+
+  PIK_INLINE float FromExternal(const float external, const size_t c) const {
+    return external;
+  }
+  PIK_INLINE float FromTemp(const float temp, const size_t c) const {
+    return temp;
+  }
+};
+
+// Multithreaded color space transform from IO to ExternalImage.
+class Transformer {
+ public:
+  Transformer(ThreadPool* pool, const Image3F& color, const Rect& rect,
+              const bool has_alpha, const ImageU* alpha,
+              ExternalImage* external)
+      : pool_(pool),
+        color_(color),
+        rect_(rect),
+        alpha_(alpha),
+        external_(external),
+        want_alpha_(has_alpha && external->HasAlpha()) {
+    PIK_ASSERT(rect.IsInside(color));
+    PIK_ASSERT(SameSize(rect, *external));
+  }
+
+  // Can fail => separate from ctor.
+  Status Init(const ColorEncoding& c_src, const ColorEncoding& c_dst) {
+#if PIK_EXT_VERBOSE >= 1
+    printf("%s->%s\n", Description(c_src).c_str(), Description(c_dst).c_str());
+#endif
+
+    return transform_.Init(c_src, c_dst, rect_.xsize(), NumThreads(pool_));
+  }
+
+  // Converts in the specified direction (To*).
+  template <class To, class Extent, class Cast>
+  Status Run(Extent* extents, const Cast& cast) {
+    const size_t bytes = DivCeil(external_->BitsPerSample(), kBitsPerByte);
+    const bool big_endian = external_->BigEndian();
+    if (bytes == 1) {
+      DispatchType<To, TypeB, OrderLE>(extents, cast);
+    } else if (bytes == 2 && big_endian) {
+      DispatchType<To, TypeU, OrderBE>(extents, cast);
+    } else if (bytes == 2) {
+      DispatchType<To, TypeU, OrderLE>(extents, cast);
+    } else if (bytes == 4 && big_endian) {
+      DispatchType<To, TypeF, OrderBE>(extents, cast);
+    } else if (bytes == 4) {
+      DispatchType<To, TypeF, OrderLE>(extents, cast);
+    } else {
+      return PIK_FAILURE("Unsupported BitsPerSample");
+    }
+    return true;
+  }
+
+ private:
+  // First pass: only needed for ExtentsDynamic/CastUnused.
+  template <class Type, class Order, class Channels>
+  PIK_INLINE void DoRow(ToExternal1, ExtentsDynamic* extents, const CastUnused,
+                        const size_t y, const size_t thread) {
+    float* PIK_RESTRICT row_temp = extents->RowTemp(y);
+
+    Interleave::Image3ToTemp01(Channels(), y, color_, rect_, row_temp);
+
+#if PIK_EXT_VERBOSE
+    const float in0 = row_temp[3 * kX + 0], in1 = row_temp[3 * kX + 1];
+    const float in2 = row_temp[3 * kX + 2];
+#endif
+
+    transform_.Run(thread, row_temp, row_temp);
+
+#if PIK_EXT_VERBOSE
+    printf("ToExt1: in %.4f %.4f %.4f; xform %.4f %.4f %.4f\n", in0, in1, in2,
+           row_temp[3 * kX + 0], row_temp[3 * kX + 1], row_temp[3 * kX + 2]);
+#endif
+
+    extents->Update(thread, row_temp);
+  }
+
+  // Second pass: only needed for ExtentsDynamic/CastRescale.
+  template <class Type, class Order, class Channels>
+  PIK_INLINE void DoRow(ToExternal2, ExtentsDynamic* extents,
+                        const CastRescale01& cast, const size_t y,
+                        const size_t thread) {
+    const float* PIK_RESTRICT row_temp = extents->RowTemp(y);
+    uint8_t* PIK_RESTRICT row_external = external_->Row(y);
+    Demux::TempToExternal(Type(), Order(), Channels(), rect_.xsize(), row_temp,
+                          cast, row_external);
+
+#if PIK_EXT_VERBOSE
+    printf("ToExt2: ext %3d %3d %3d\n", row_external[3 * kX + 0],
+           row_external[3 * kX + 1], row_external[3 * kX + 2]);
+#endif
+
+    const uint16_t* PIK_RESTRICT row_alpha =
+        want_alpha_ ? alpha_->ConstRow(y) : nullptr;
+    Demux::AlphaToExternal(Type(), Order(), Channels(), rect_.xsize(),
+                           row_alpha, row_external);
+  }
+
+  // Single-pass: only works for ExtentsStatic.
+  template <class Type, class Order, class Channels, class Cast>
+  PIK_INLINE void DoRow(ToExternal, ExtentsStatic*, const Cast& cast,
+                        const size_t y, const size_t thread) {
+    float* PIK_RESTRICT row_temp = transform_.BufDst(thread);
+    Interleave::Image3ToTemp01(Channels(), y, color_, rect_, row_temp);
+
+#if PIK_EXT_VERBOSE
+    // Save inputs for printing before in-place transform overwrites them.
+    const float in0 = row_temp[3 * kX + 0];
+    const float in1 = row_temp[3 * kX + 1];
+    const float in2 = row_temp[3 * kX + 2];
+#endif
+    transform_.Run(thread, row_temp, row_temp);
+
+    uint8_t* PIK_RESTRICT row_external = external_->Row(y);
+    Demux::TempToExternal(Type(), Order(), Channels(), rect_.xsize(), row_temp,
+                          cast, row_external);
+
+#if PIK_EXT_VERBOSE
+    const float tmp0 = row_temp[3 * kX + 0];
+    const float tmp1 = row_temp[3 * kX + 1];
+    const float tmp2 = row_temp[3 * kX + 2];
+    // Convert back so we can print the external values
+    Demux::ExternalToTemp(Type(), Order(), Channels(), rect_.xsize(),
+                          row_external, cast, row_temp);
+    printf("ToExt(%s%s %s): tmp %.4f %.4f %.4f|%.4f %.4f %.4f|%.4f %.4f %.4f\n",
+           Channels::Name(), Type::Name(), Cast::Name(), in0, in1, in2, tmp0,
+           tmp1, tmp2, row_temp[3 * kX + 0], row_temp[3 * kX + 1],
+           row_temp[3 * kX + 2]);
+#endif
+
+    const uint16_t* PIK_RESTRICT row_alpha =
+        want_alpha_ ? alpha_->ConstRow(y) : nullptr;
+    Demux::AlphaToExternal(Type(), Order(), Channels(), rect_.xsize(),
+                           row_alpha, row_external);
+  }
+
+  // Closure callable by ThreadPool.
+  template <class To, class Type, class Order, class Channels, class Extent,
+            class Cast>
+  class Bind {
+   public:
+    explicit Bind(Transformer* converter, Extent* extents, const Cast& cast)
+        : xform_(converter), extents_(extents), cast_(cast) {}
+
+    PIK_INLINE void operator()(const int task, const int thread) const {
+      xform_->DoRow<Type, Order, Channels>(To(), extents_, cast_, task, thread);
+    }
+
+   private:
+    Transformer* xform_;  // not owned
+    Extent* extents_;     // not owned
+    const Cast cast_;
+  };
+
+  template <class To, class Type, class Order, class Channels, class Extent,
+            class Cast>
+  void DoRows(Extent* extents, const Cast& cast) {
+    RunOnPool(
+        pool_, 0, rect_.ysize(),
+        Bind<To, Type, Order, Channels, Extent, Cast>(this, extents, cast),
+        "ExtImg xform");
+  }
+
+  // Calls the instantiation with the matching Type and Order.
+  template <class To, class Type, class Order, class Extent, class Cast>
+  void DispatchType(Extent* extents, const Cast& cast) {
+    if (external_->IsGray()) {
+      if (external_->HasAlpha()) {
+        DoRows<To, Type, Order, Channels2>(extents, cast);
+      } else {
+        DoRows<To, Type, Order, Channels1>(extents, cast);
+      }
+    } else {
+      if (external_->HasAlpha()) {
+        DoRows<To, Type, Order, Channels4>(extents, cast);
+      } else {
+        DoRows<To, Type, Order, Channels3>(extents, cast);
+      }
+    }
+  }
+
+  ThreadPool* pool_;  // not owned
+  const Image3F& color_;
+  const Rect rect_;          // whence in color_ to copy, and output size.
+  const ImageU* alpha_;      // not owned
+  ExternalImage* external_;  // not owned
+
+  bool want_alpha_;
+
+  ColorSpaceTransform transform_;
+};
+
+// Multithreaded deinterleaving/conversion from ExternalImage to Image3.
+class Converter {
+ public:
+  Converter(ThreadPool* pool, const ExternalImage& external)
+      : pool_(pool),
+        external_(&external),
+        xsize_(external.xsize()),
+        ysize_(external.ysize()),
+        color_(xsize_, ysize_) {
+    const size_t num_threads = NumThreads(pool);
+    temp_buf_ = ImageF(xsize_ * external.c_current().Channels(), num_threads);
+
+    if (external_->HasAlpha()) {
+      alpha_ = ImageU(xsize_, ysize_);
+      bits_per_alpha_ = external_->BitsPerAlpha();
+      alpha_stats_.resize(num_threads);
+    }
+  }
+
+  template <class Cast>
+  Status Run(const Cast& cast) {
+    const size_t bytes = DivCeil(external_->BitsPerSample(), kBitsPerByte);
+    const bool big_endian = external_->BigEndian();
+    if (bytes == 1) {
+      DispatchType<TypeB, OrderLE>(cast);
+    } else if (bytes == 2 && big_endian) {
+      DispatchType<TypeU, OrderBE>(cast);
+    } else if (bytes == 2) {
+      DispatchType<TypeU, OrderLE>(cast);
+    } else if (bytes == 4 && big_endian) {
+      DispatchType<TypeF, OrderBE>(cast);
+    } else if (bytes == 4) {
+      DispatchType<TypeF, OrderLE>(cast);
+    } else {
+      return PIK_FAILURE("Unsupported BitsPerSample");
+    }
+    return true;
+  }
+
+  Status MoveTo(CodecInOut* io) {
+    io->SetFromImage(std::move(color_), external_->c_current());
+
+    // Don't have alpha; during TransformTo, don't remove existing alpha.
+    if (alpha_stats_.empty()) return true;
+
+    const size_t max_alpha = (1 << bits_per_alpha_) - 1;
+
+    // Reduce per-thread statistics.
+    uint32_t and_bits = alpha_stats_[0].and_bits;
+    uint32_t or_bits = alpha_stats_[0].or_bits;
+    for (size_t i = 1; i < alpha_stats_.size(); ++i) {
+      and_bits &= alpha_stats_[i].and_bits;
+      or_bits |= alpha_stats_[i].or_bits;
+    }
+
+    if (or_bits > max_alpha) {
+      return PIK_FAILURE("Alpha out of range");
+    }
+
+    // Keep alpha if at least one value is (semi)transparent.
+    if (and_bits != max_alpha) {
+      io->SetAlpha(std::move(alpha_), bits_per_alpha_);
+    } else {
+      io->RemoveAlpha();
+    }
+    return true;
+  }
+
+ private:
+  template <class Type, class Order, class Channels, class Cast>
+  PIK_INLINE void DoRow(const Cast& cast, const size_t y, const size_t thread) {
+    const uint8_t* PIK_RESTRICT row_external = external_->ConstRow(y);
+
+    if (!alpha_stats_.empty()) {
+      // No-op if Channels1/3.
+      Demux::ExternalToAlpha(Type(), Order(), Channels(), xsize_, row_external,
+                             alpha_.Row(y), thread, &alpha_stats_);
+    }
+
+    float* PIK_RESTRICT row_temp = temp_buf_.Row(thread);
+    Demux::ExternalToTemp(Type(), Order(), Channels(), xsize_, row_external,
+                          cast, row_temp);
+
+#if PIK_EXT_VERBOSE
+    printf("ToIO(%s%s %s): ext %3d %3d %3d  tmp %.4f %.4f %.4f\n",
+           Channels::Name(), Type::Name(), Cast::Name(),
+           row_external[3 * kX + 0], row_external[3 * kX + 1],
+           row_external[3 * kX + 2], row_temp[3 * kX + 0], row_temp[3 * kX + 1],
+           row_temp[3 * kX + 2]);
+#endif
+
+    Interleave::Temp255ToImage3(Channels(), row_temp, y, &color_);
+  }
+
+  // Closure callable by ThreadPool.
+  template <class Type, class Order, class Channels, class Cast>
+  class Bind {
+   public:
+    explicit Bind(Converter* converter, const Cast& cast)
+        : converter_(converter), cast_(cast) {}
+
+    PIK_INLINE void operator()(const int task, const int thread) const {
+      converter_->DoRow<Type, Order, Channels>(cast_, task, thread);
+    }
+
+   private:
+    Converter* converter_;  // not owned
+    const Cast cast_;
+  };
+
+  template <class Type, class Order, class Channels, class Cast>
+  void DoRows(const Cast& cast) {
+    RunOnPool(pool_, 0, ysize_, Bind<Type, Order, Channels, Cast>(this, cast),
+              "ExtImg cvt");
+  }
+
+  // Calls the instantiation with the matching Type and Order.
+  template <class Type, class Order, class Cast>
+  void DispatchType(const Cast& cast) {
+    if (external_->IsGray()) {
+      if (external_->HasAlpha()) {
+        DoRows<Type, Order, Channels2>(cast);
+      } else {
+        DoRows<Type, Order, Channels1>(cast);
+      }
+    } else {
+      if (external_->HasAlpha()) {
+        DoRows<Type, Order, Channels4>(cast);
+      } else {
+        DoRows<Type, Order, Channels3>(cast);
+      }
+    }
+  }
+
+  ThreadPool* pool_;               // not owned
+  const ExternalImage* external_;  // not owned
+  size_t xsize_;
+  size_t ysize_;
+  Image3F color_;
+
+  ImageF temp_buf_;
+
+  // Only initialized if external_->HasAlpha() && want_alpha:
+  std::vector<Alpha::Stats> alpha_stats_;
+  ImageU alpha_;
+  size_t bits_per_alpha_;
+};
+
+}  // namespace
+
+ExternalImage::ExternalImage(const size_t xsize, const size_t ysize,
+                             const ColorEncoding& c_current,
+                             const bool has_alpha, const size_t bits_per_alpha,
+                             const size_t bits_per_sample,
+                             const bool big_endian)
+    : xsize_(xsize),
+      ysize_(ysize),
+      c_current_(c_current),
+      channels_(c_current.Channels() + has_alpha),
+      bits_per_alpha_(bits_per_alpha),
+      bits_per_sample_(bits_per_sample),
+      big_endian_(big_endian),
+      row_size_(xsize * channels_ * DivCeil(bits_per_sample, kBitsPerByte)) {
+  PIK_ASSERT(1 <= channels_ && channels_ <= 4);
+  PIK_ASSERT(1 <= bits_per_sample && bits_per_sample <= 32);
+  if (has_alpha) PIK_ASSERT(1 <= bits_per_alpha && bits_per_alpha <= 32);
+  bytes_.resize(ysize_ * row_size_);
+  is_healthy_ = !bytes_.empty();
+}
+
+ExternalImage::ExternalImage(const size_t xsize, const size_t ysize,
+                             const ColorEncoding& c_current,
+                             const bool has_alpha, const size_t bits_per_alpha,
+                             const size_t bits_per_sample,
+                             const bool big_endian, const uint8_t* bytes,
+                             const uint8_t* end)
+    : ExternalImage(xsize, ysize, c_current, has_alpha, bits_per_alpha,
+                    bits_per_sample, big_endian) {
+  if (is_healthy_) {
+    if (end != nullptr) PIK_CHECK(bytes + ysize * row_size_ <= end);
+    memcpy(bytes_.data(), bytes, bytes_.size());
+  }
+}
+
+ExternalImage::ExternalImage(ThreadPool* pool, const Image3F& color,
+                             const Rect& rect, const ColorEncoding& c_current,
+                             const ColorEncoding& c_desired,
+                             const bool has_alpha, const ImageU* alpha,
+                             size_t bits_per_alpha, size_t bits_per_sample,
+                             bool big_endian,
+                             CodecIntervals* temp_intervals)
+    : ExternalImage(rect.xsize(), rect.ysize(), c_desired, has_alpha,
+                    bits_per_alpha, bits_per_sample, big_endian) {
+  if (!is_healthy_) return;
+  Transformer transformer(pool, color, rect, has_alpha, alpha, this);
+  if (!transformer.Init(c_current, c_desired)) {
+    is_healthy_ = false;
+    return;
+  }
+
+  const CodecInterval ext_interval = GetInterval(bits_per_sample);
+
+  if (bits_per_sample == 32) {
+    ExtentsStatic extents;
+    const CastFloat01 cast(ext_interval);  // only multiply by const
+    is_healthy_ = transformer.Run<ToExternal>(&extents, cast);
+  } else if (temp_intervals != nullptr) {
+    // Store temp to separate image and obtain per-channel intervals.
+    ExtentsDynamic extents(xsize_, ysize_, NumThreads(pool), c_desired);
+    const CastUnused unused;
+    is_healthy_ = transformer.Run<ToExternal1>(&extents, unused);
+    if (!is_healthy_) return;
+    extents.Finalize(temp_intervals);
+
+    // Rescale based on temp_intervals.
+    const CastRescale01 cast(*temp_intervals, ext_interval);
+    is_healthy_ = transformer.Run<ToExternal2>(&extents, cast);
+  } else {
+    ExtentsStatic extents;
+    const CastClip01 cast(ext_interval);  // clip
+    is_healthy_ = transformer.Run<ToExternal>(&extents, cast);
+  }
+}
+
+Status ExternalImage::CopyTo(const CodecIntervals* temp_intervals,
+                             ThreadPool* pool, CodecInOut* io) const {
+  PIK_ASSERT(IsHealthy());  // Caller should have checked beforehand.
+
+  Converter converter(pool, *this);
+
+  const CodecInterval ext_interval = GetInterval(bits_per_sample_);
+
+  if (bits_per_sample_ == 32) {
+    const CastFloat255 cast(ext_interval);
+    PIK_RETURN_IF_ERROR(converter.Run(cast));
+  } else if (temp_intervals != nullptr) {
+    const CastRescale255 cast(*temp_intervals, ext_interval);
+    PIK_RETURN_IF_ERROR(converter.Run(cast));
+  } else {
+    const CastClip255 cast(ext_interval);
+    PIK_RETURN_IF_ERROR(converter.Run(cast));
+  }
+
+  return converter.MoveTo(io);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/external_image.h b/codec/L2/demos/pikEnc/host/pik/external_image.h
new file mode 100755
index 0000000000..ffd1784fdf
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/external_image.h
@@ -0,0 +1,91 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_EXTERNAL_IMAGE_H_
+#define PIK_EXTERNAL_IMAGE_H_
+
+// Interleaved image for color transforms and Codec.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "pik/codec.h"
+#include "pik/data_parallel.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Packed (no row padding), interleaved (RGBRGB) u8/u16/f32.
+class ExternalImage {
+ public:
+  // Copies from existing interleaved image. Called by decoders. "big_endian"
+  // only matters for bits_per_sample > 8. "end" is the STL-style end of "bytes"
+  // for range checks, or null if unknown.
+  ExternalImage(size_t xsize, size_t ysize, const ColorEncoding& c_current,
+                bool has_alpha, size_t bits_per_alpha,
+                size_t bits_per_sample, bool big_endian,
+                const uint8_t* bytes, const uint8_t* end);
+
+  // Copies pixels from rect and converts from c_current to c_desired. Called by
+  // encoders and CodecInOut::CopyTo. alpha is nullptr iff !has_alpha.
+  // If temp_intervals != null, fills them such that CopyTo can rescale to that
+  // range. Otherwise, clamps temp to [0, 1].
+  ExternalImage(ThreadPool* pool, const Image3F& color, const Rect& rect,
+                const ColorEncoding& c_current, const ColorEncoding& c_desired,
+                bool has_alpha, const ImageU* alpha, size_t bits_per_alpha,
+                size_t bits_per_sample, bool big_endian,
+                CodecIntervals* temp_intervals);
+
+  // Indicates whether the ctor succeeded; if not, do not use this instance.
+  Status IsHealthy() const { return is_healthy_; }
+
+  // Sets "io" to a newly allocated copy with c_current color space.
+  // Uses temp_intervals for rescaling if not null (NOTE: temp_intervals is
+  // given as if a range of [0.0f-1.0f] would be used, even though it uses
+  // [0.0f-255.0f] internally, to match the same parameter given to the
+  // color converting constructor).
+  Status CopyTo(const CodecIntervals* temp_intervals, ThreadPool* pool,
+                CodecInOut* io) const;
+
+  // Packed, interleaved pixels, for passing to encoders.
+  const PaddedBytes& Bytes() const { return bytes_; }
+
+  size_t xsize() const { return xsize_; }
+  size_t ysize() const { return ysize_; }
+  const ColorEncoding& c_current() const { return c_current_; }
+  bool IsGray() const { return c_current_.IsGray(); }
+  bool HasAlpha() const { return channels_ == 2 || channels_ == 4; }
+  size_t BitsPerAlpha() const { return bits_per_alpha_; }
+  size_t BitsPerSample() const { return bits_per_sample_; }
+  bool BigEndian() const { return big_endian_; }
+
+  uint8_t* Row(size_t y) { return bytes_.data() + y * row_size_; }
+  const uint8_t* ConstRow(size_t y) const {
+    return bytes_.data() + y * row_size_;
+  }
+
+ private:
+  ExternalImage(size_t xsize, size_t ysize, const ColorEncoding& c_current,
+                bool has_alpha, size_t bits_per_alpha, size_t bits_per_sample,
+                bool big_endian);
+
+  size_t xsize_;
+  size_t ysize_;
+  ColorEncoding c_current_;
+  size_t channels_;
+  // Per alpha channel value
+  size_t bits_per_alpha_;
+  // Per color channel
+  size_t bits_per_sample_;
+  bool big_endian_;
+  size_t row_size_;
+  PaddedBytes bytes_;
+  bool is_healthy_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_EXTERNAL_IMAGE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/fast_log.h b/codec/L2/demos/pikEnc/host/pik/fast_log.h
new file mode 100755
index 0000000000..48fc54d136
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/fast_log.h
@@ -0,0 +1,134 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_FAST_LOG_H_
+#define PIK_FAST_LOG_H_
+
+#include <stdint.h>
+#include <cmath>
+
+namespace pik {
+
+inline int Log2FloorNonZero(uint32_t n) {
+#ifdef __GNUC__
+  return 31 ^ __builtin_clz(n);
+#else
+  unsigned int result = 0;
+  while (n >>= 1) result++;
+  return result;
+#endif
+}
+
+inline int Log2CeilingNonZero(uint32_t n) {
+  int floor = Log2FloorNonZero(n);
+  return (n == (n & ~(n - 1))) ? floor : floor + 1;
+}
+
+inline int Log2Floor(uint32_t n) { return n == 0 ? -1 : Log2FloorNonZero(n); }
+
+// A lookup table for small values of log2(int) to be used in entropy
+// computation.
+//
+// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
+static const float kLog2Table[] = {
+    0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+    1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+    2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+    3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+    3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+    3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+    4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+    4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+    4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+    4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+    4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+    5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+    5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+    5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+    5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+    5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+    5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+    5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+    5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+    5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+    5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+    5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+    6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+    6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+    6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+    6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+    6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+    6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+    6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+    6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+    6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+    6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+    6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+    6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+    6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+    6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+    6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+    6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+    6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+    6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+    6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+    6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+    6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+    7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+    7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+    7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+    7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+    7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+    7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+    7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+    7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+    7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+    7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+    7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+    7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+    7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+    7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+    7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+    7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+    7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+    7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+    7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+    7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+    7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+    7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+    7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+    7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+    7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+    7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+    7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+    7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+    7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+    7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+    7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+    7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+    7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+    7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+    7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+    7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+    7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+    7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+    7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+    7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+    7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+    7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+    7.9943534368588578f};
+
+// Faster logarithm for small integers, with the property of log2(0) == 0.
+static inline float FastLog2(int v) {
+  if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
+    return kLog2Table[v];
+  }
+  return log2(v);
+}
+
+}  // namespace pik
+
+#endif  // PIK_FAST_LOG_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/field_encodings.h b/codec/L2/demos/pikEnc/host/pik/field_encodings.h
new file mode 100755
index 0000000000..d7b2d69e3c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/field_encodings.h
@@ -0,0 +1,44 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_FIELD_ENCODINGS_H_
+#define PIK_FIELD_ENCODINGS_H_
+
+// Constants needed to encode/decode fields; avoids including the full fields.h.
+
+#include <stdint.h>
+
+namespace pik {
+
+// kU32RawBits + x => send x raw bits. This value is convenient because x <= 32
+// and ~32u + 32 == ~0u, which ensures RawBits can never exceed 32 and also
+// allows the values to be sign-extended from an 8-bit immediate.
+static constexpr uint32_t kU32RawBits = ~32u;
+
+// Four direct values [0, 4).
+static constexpr uint32_t kU32Direct0To3 = 0x83828180u;
+
+// Three direct values 0, 1, 2 or 2 extra bits for [3, 6].
+static constexpr uint32_t kU32Direct3Plus4 = 0x51828180u;
+
+// Three direct values 0, 1, 2 or 3 extra bits for [3, 10].
+static constexpr uint32_t kU32Direct3Plus8 = 0x52828180u;
+
+// Four direct values 2, 3, 4, 8 or 1, 2, 4, 8.
+static constexpr uint32_t kU32Direct2348 = 0x88848382u;
+static constexpr uint32_t kU32Direct1248 = 0x88848281u;
+
+enum class BytesEncoding {
+  // Values are determined by kU32Direct3Plus8.
+  kNone = 0,  // Not present, don't write size
+  kRaw,
+  kBrotli  // Only if smaller, otherwise kRaw.
+  // Future extensions: [3, 10].
+};
+
+}  // namespace pik
+
+#endif  // PIK_FIELD_ENCODINGS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/fields.h b/codec/L2/demos/pikEnc/host/pik/fields.h
new file mode 100755
index 0000000000..66ee88db29
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/fields.h
@@ -0,0 +1,1086 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_FIELDS_H_
+#define PIK_FIELDS_H_
+
+// Forward/backward-compatible 'bundles' with auto-serialized 'fields'.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "pik/bit_reader.h"
+#include "pik/bits.h"
+#include "pik/brotli.h"
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/field_encodings.h"
+#include "pik/status.h"
+#include "pik/write_bits.h"
+
+#ifndef PIK_FIELDS_TRACE
+#define PIK_FIELDS_TRACE 0
+#endif
+
+namespace pik {
+
+// Chooses one of four encodings based on an a-priori "distribution":
+// - raw: if IsRaw(distribution), send RawBits(distribution) = 1..32 raw bits;
+//   This are values larger than ~32u, use kU32RawBits + #bits.
+// - non-raw: send a 2-bit selector to choose byte b from "distribution",
+//   least significant byte first. Then the value is encoded according to b:
+//   -- direct: if b & 0x80, the value is b & 0x7F
+//   -- offset: else if b & 0x40, the value is derived from (b & 7) + 1
+//              extra bits plus an offset ((b >> 3) & 7) + 1.
+//   -- extra: otherwise, the value is derived from b extra bits
+//             (must be 1-32 extra bits)
+// This is faster to decode and denser than Exp-Golomb or Gamma codes when both
+// small and large values occur.
+//
+// Examples:
+// Raw:    distribution 0xFFFFFFEF, value 32768 => 1000000000000000
+// Direct: distribution 0x06A09088, value 32 => 10 (selector 2, b=0xA0).
+// Extra:  distribution 0x08060402, value 7 => 01 0111 (selector 1, b=4).
+// Offset: distribution 0x68584801, value 7 => 11 1 (selector 3, offset 5 + 1).
+//
+// Bit for bit example:
+// An encoding mapping the following prefix code:
+// 00 -> 0
+// 01x -> 1..2
+// 10xx -> 3..7
+// 11xxxxxxxx -> 8..263
+// Can be made with distribution 0x7F514080. Dissecting this from hex digits
+// left to right:
+// 7: 0x40 flag for this byte and 2 bits of offset 8 for 8..263
+// F: final bit of offset 8 and 3 bits setting extra to 7+1 for 8..263.
+// 5: 0x40 flag for this byte and 2 bits of offset 3 for 3..7
+// 1: One bit indicating window size 2 set for 3..7
+// 4: 0x40 flag for this byte, no offset bits set, offset 0+1 for 1..2
+// 0: no bits set in this flag, offset and extra bits set to 0 indicating an
+//    offset 1 and extra 1 for 1..2
+// 8: 0x80 flag set to indicate direct value for 0
+// 0: bits of the direct value 0
+class U32Coder {
+ public:
+  // Byte flag indicating direct value.
+  static const uint32_t kDirect = 0x80;
+  // Byte flag indicating extra bits with offset rather than pure extra bits.
+  static const uint32_t kOffset = 0x40;
+
+  static size_t MaxEncodedBits(const uint32_t distribution) {
+    ValidateDistribution(distribution);
+    if (IsRaw(distribution)) return RawBits(distribution);
+    size_t extra_bits = 0;
+    for (int selector = 0; selector < 4; ++selector) {
+      const size_t b = Lookup(distribution, selector);
+      if (b & kDirect) {
+        continue;
+      } else {
+        extra_bits = std::max<size_t>(extra_bits, GetExtraBits(b));
+      }
+    }
+    return 2 + extra_bits;
+  }
+
+  static Status CanEncode(const uint32_t distribution, const uint32_t value,
+                          size_t* PIK_RESTRICT encoded_bits) {
+    ValidateDistribution(distribution);
+    int selector;
+    size_t total_bits;
+    const Status ok =
+        ChooseEncoding(distribution, value, &selector, &total_bits);
+    *encoded_bits = ok ? total_bits : 0;
+    return ok;
+  }
+
+  static uint32_t Read(const uint32_t distribution,
+                       BitReader* PIK_RESTRICT reader) {
+    ValidateDistribution(distribution);
+    if (IsRaw(distribution)) {
+      return reader->ReadBits(RawBits(distribution));
+    }
+    const int selector = reader->ReadFixedBits<2>();
+    const size_t b = Lookup(distribution, selector);
+    if (b & kDirect) {
+      return b & 0x7F;
+    } else {
+      uint32_t offset = GetOffset(b);
+      uint32_t extra_bits = GetExtraBits(b);
+      return reader->ReadBits(extra_bits) + offset;
+    }
+  }
+
+  // Returns false if the value is too large to encode.
+  static Status Write(const uint32_t distribution, const uint32_t value,
+                      size_t* pos, uint8_t* storage) {
+    int selector;
+    size_t total_bits;
+    PIK_RETURN_IF_ERROR(
+        ChooseEncoding(distribution, value, &selector, &total_bits));
+
+    if (IsRaw(distribution)) {
+      WriteBits(RawBits(distribution), value, pos, storage);
+      return true;
+    }
+    WriteBits(2, selector, pos, storage);
+
+    const size_t b = Lookup(distribution, selector);
+    if ((b & kDirect) == 0) {  // Nothing more to write for direct encoding
+      uint32_t offset = GetOffset(b);
+      PIK_ASSERT(value >= offset);
+      WriteBits(total_bits - 2, value - offset, pos, storage);
+    }
+
+    return true;
+  }
+
+ private:
+  static PIK_INLINE bool IsRaw(const uint32_t distribution) {
+    return distribution > kU32RawBits;
+  }
+
+  static PIK_INLINE size_t RawBits(const uint32_t distribution) {
+    PIK_ASSERT(IsRaw(distribution));
+    return distribution - kU32RawBits;
+  }
+
+  // Returns one byte from "distribution" at index "selector".
+  static PIK_INLINE size_t Lookup(const uint32_t distribution,
+                                  const int selector) {
+    PIK_ASSERT(!IsRaw(distribution));
+    return (distribution >> (selector * 8)) & 0xFF;
+  }
+
+  static PIK_INLINE uint32_t GetOffset(const uint8_t b) {
+    PIK_ASSERT(!(b & kDirect));
+    if (b & kOffset) return ((b >> 3) & 7) + 1;
+    return 0;
+  }
+
+  static PIK_INLINE uint32_t GetExtraBits(const uint8_t b) {
+    PIK_ASSERT(!(b & kDirect));
+    if (b & kOffset) return (b & 7) + 1;
+    PIK_ASSERT(b != 0 && b <= 32);
+    return b;
+  }
+
+  static void ValidateDistribution(const uint32_t distribution) {
+#if PIK_ENABLE_ASSERT
+    if (IsRaw(distribution)) return;  // raw 1..32: OK
+    for (int selector = 0; selector < 4; ++selector) {
+      const size_t b = Lookup(distribution, selector);
+      if (b & kDirect) {
+        continue;  // direct: OK
+      } else if (b & kOffset) {
+        continue;  // extra with offset: OK
+      } else {
+        // Forbid b = 0 because it requires an extra call to read/write 0 bits;
+        // to encode a zero value, use b = kDirect instead.
+        if (b == 0 || b > 32) {
+          fprintf(stderr, "Invalid distribution %8x[%d] == %zu\n", distribution,
+                  selector, b);
+          PIK_ASSERT(false);
+        }
+      }
+    }
+#endif
+  }
+
+  static Status ChooseEncoding(const uint32_t distribution,
+                               const uint32_t value, int* PIK_RESTRICT selector,
+                               size_t* PIK_RESTRICT total_bits) {
+    const size_t bits_required = 32 - NumZeroBitsAboveMSB(value);
+    PIK_ASSERT(bits_required <= 32);
+
+    *selector = 0;
+    *total_bits = 0;
+
+    if (IsRaw(distribution)) {
+      const size_t raw_bits = RawBits(distribution);
+      if (bits_required > raw_bits) {
+        return PIK_FAILURE("Insufficient raw bits");
+      }
+      *total_bits = raw_bits;
+      return true;
+    }
+
+    // It is difficult to verify whether "distribution" is sorted, so check all
+    // selectors and keep the one with the fewest total_bits.
+    *total_bits = 64;  // more than any valid encoding
+    for (int s = 0; s < 4; ++s) {
+      const size_t b = Lookup(distribution, s);
+      if (b & kDirect) {
+        if ((b & 0x7F) == value) {
+          *selector = s;
+          *total_bits = 2;
+          return true;  // Done, can't improve upon a direct encoding.
+        }
+        continue;
+      }
+
+      uint32_t extra_bits = GetExtraBits(b);
+      if (b & kOffset) {
+        uint32_t offset = GetOffset(b);
+        if (value < offset || value >= offset + (1u << extra_bits)) continue;
+      } else {
+        if (bits_required > extra_bits) continue;
+      }
+
+      // Better than prior encoding, remember it:
+      if (2 + extra_bits < *total_bits) {
+        *selector = s;
+        *total_bits = 2 + extra_bits;
+      }
+    }
+
+    if (*total_bits == 64) return PIK_FAILURE("No feasible selector found");
+
+    return true;
+  }
+};
+
+// Encodes 64-bit unsigned integers with a fixed distribution, taking 2 bits
+// to encode 0, 6 bits to encode 1 to 16, 10 bits to encode 17 to 272, 15 bits
+// to encode up to 4095, and in the order of log2(value) * 1.125 bits for higher
+// values.
+class U64Coder {
+ public:
+  static uint64_t Read(BitReader* PIK_RESTRICT reader) {
+    uint64_t selector = reader->ReadFixedBits<2>();
+    if (selector == 0) {
+      return 0;
+    }
+    if (selector == 1) {
+      return 1 + reader->ReadFixedBits<4>();
+    }
+    if (selector == 2) {
+      return 17 + reader->ReadFixedBits<8>();
+    }
+
+    // selector 3, varint, first 12 bits, later groups are 8 bits
+    uint64_t result = reader->ReadFixedBits<12>();
+
+    uint64_t shift = 12;
+    while (reader->ReadFixedBits<1>()) {
+      if (shift == 60) {
+        result |= static_cast<uint64_t>(reader->ReadFixedBits<4>()) << shift;
+        break;
+      }
+      result |= static_cast<uint64_t>(reader->ReadFixedBits<8>()) << shift;
+      shift += 8;
+    }
+
+    return result;
+  }
+
+  // Returns false if the value is too large to encode.
+  static Status Write(uint64_t value, size_t* pos, uint8_t* storage) {
+    if (value == 0) {
+      // Selector: use 0 bits, value 0
+      WriteBits(2, 0, pos, storage);
+    } else if (value <= 16) {
+      // Selector: use 4 bits, value 1..16
+      WriteBits(2, 1, pos, storage);
+      WriteBits(4, value - 1, pos, storage);
+    } else if (value <= 272) {
+      // Selector: use 8 bits, value 17..272
+      WriteBits(2, 2, pos, storage);
+      WriteBits(8, value - 17, pos, storage);
+    } else {
+      // Selector: varint, first a 12-bit group, after that per 8-bit group.
+      WriteBits(2, 3, pos, storage);
+      WriteBits(12, value & 4095, pos, storage);
+      value >>= 12;
+      int shift = 12;
+      while (value > 0 && shift < 60) {
+        // Indicate varint not done
+        WriteBits(1, 1, pos, storage);
+        WriteBits(8, value & 255, pos, storage);
+        value >>= 8;
+        shift += 8;
+      }
+      if (value > 0) {
+        // This only could happen if shift == 60.
+        WriteBits(1, 1, pos, storage);
+        WriteBits(4, value & 15, pos, storage);
+        // Implicitly closed sequence, no extra stop bit is required.
+      } else {
+        // Indicate end of varint
+        WriteBits(1, 0, pos, storage);
+      }
+    }
+
+    return true;
+  }
+
+  // Can always encode, but useful because it also returns bit size.
+  static Status CanEncode(uint64_t value, size_t* PIK_RESTRICT encoded_bits) {
+    if (value == 0) {
+      *encoded_bits = 2;  // 2 selector bits
+    } else if (value <= 16) {
+      *encoded_bits = 2 + 4;  // 2 selector bits + 4 payload bits
+    } else if (value <= 272) {
+      *encoded_bits = 2 + 8;  // 2 selector bits + 8 payload bits
+    } else {
+      *encoded_bits = 2 + 12;  // 2 selector bits + 12 payload bits
+      value >>= 12;
+      int shift = 12;
+      while (value > 0 && shift < 60) {
+        *encoded_bits += 1 + 8;  // 1 continuation bit + 8 payload bits
+        value >>= 8;
+        shift += 8;
+      }
+      if (value > 0) {
+        // This only could happen if shift == 60.
+        *encoded_bits += 1 + 4;  // 1 continuation bit + 4 payload bits
+      } else {
+        *encoded_bits += 1;  // 1 stop bit
+      }
+    }
+
+    return true;
+  }
+};
+
+// 3-bit code for exif orientation, encoding values 1-8.
+class OrientationCoder {
+ public:
+  static uint32_t Read(BitReader* PIK_RESTRICT reader) {
+    return 1u + reader->ReadFixedBits<3>();
+  }
+
+  static Status Write(uint32_t value, size_t* pos, uint8_t* storage) {
+    WriteBits(3, value - 1, pos, storage);
+    return true;
+  }
+
+  static Status CanEncode(uint32_t value, size_t* PIK_RESTRICT encoded_bits) {
+    *encoded_bits = 3;
+    return value >= 1 && value <= 8;
+  }
+};
+
+// Coder for byte arrays: stores encoding and #bytes via U32Coder, then raw or
+// Brotli-compressed bytes.
+class BytesCoder {
+  static const int kBrotliQuality = 6;
+
+ public:
+  static Status CanEncode(BytesEncoding encoding, const PaddedBytes& value,
+                          size_t* PIK_RESTRICT encoded_bits) {
+    PIK_ASSERT(encoding == BytesEncoding::kRaw ||
+               encoding == BytesEncoding::kBrotli);
+    if (value.empty()) {
+      return U32Coder::CanEncode(kU32Direct3Plus8,
+                                 static_cast<uint32_t>(BytesEncoding::kNone),
+                                 encoded_bits);
+    }
+
+    PaddedBytes compressed;
+    const PaddedBytes* store_what = &value;
+
+    // Note: we will compress a second time when Write is called.
+    if (encoding == BytesEncoding::kBrotli) {
+      PIK_RETURN_IF_ERROR(BrotliCompress(kBrotliQuality, value, &compressed));
+      if (compressed.size() < value.size()) {
+        store_what = &compressed;
+      } else {
+        encoding = BytesEncoding::kRaw;
+      }
+    }
+
+    size_t bits_encoding, bits_size;
+    PIK_RETURN_IF_ERROR(U32Coder::CanEncode(kU32Direct3Plus8,
+                                            static_cast<uint32_t>(encoding),
+                                            &bits_encoding) &&
+                        U64Coder::CanEncode(store_what->size(), &bits_size));
+    *encoded_bits =
+        bits_encoding + bits_size + store_what->size() * kBitsPerByte;
+    return true;
+  }
+
+  static Status Read(BitReader* PIK_RESTRICT reader,
+                     PaddedBytes* PIK_RESTRICT value) {
+    const BytesEncoding encoding =
+        static_cast<BytesEncoding>(U32Coder::Read(kU32Direct3Plus8, reader));
+    if (encoding == BytesEncoding::kNone) {
+      value->clear();
+      return true;
+    }
+    if (encoding != BytesEncoding::kRaw && encoding != BytesEncoding::kBrotli) {
+      return PIK_FAILURE("Unrecognized BytesEncoding encoding");
+    }
+
+    const uint64_t num_bytes = U64Coder::Read(reader);
+    // Prevent fuzzer from running out of memory.
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (num_bytes > 16 * 1024 * 1024) {
+      return PIK_FAILURE("BytesCoder size too large for fuzzer");
+    }
+#endif
+    value->resize(num_bytes);
+    if (num_bytes != 0 && value->size() == 0) {
+      return PIK_FAILURE("Failed to allocate memory for BytesCoder");
+    }
+
+    // Read groups of bytes without calling FillBitBuffer every time.
+    constexpr size_t kBytesPerGroup = 4;  // guaranteed by FillBitBuffer
+    uint32_t i;
+    for (i = 0; i + kBytesPerGroup <= value->size(); i += kBytesPerGroup) {
+      reader->FillBitBuffer();
+#if PIK_BYTE_ORDER_LITTLE
+      const uint32_t buf = reader->PeekFixedBits<32>();
+      reader->Advance(32);
+      memcpy(value->data() + i, &buf, 4);
+#else
+      for (int idx_byte = 0; idx_byte < kBytesPerGroup; ++idx_byte) {
+        value->data()[i + idx_byte] = reader->PeekFixedBits<8>();
+        reader->Advance(8);
+      }
+#endif
+    }
+
+    reader->FillBitBuffer();
+    for (; i < value->size(); ++i) {
+      value->data()[i] = reader->PeekFixedBits<8>();
+      reader->Advance(8);
+    }
+
+    if (encoding == BytesEncoding::kBrotli) {
+      const size_t kMaxOutput = 1ULL << 32;
+      size_t bytes_read = 0;
+      PaddedBytes decompressed;
+      if (PIK_UNLIKELY(!BrotliDecompress(*value, kMaxOutput, &bytes_read,
+                                         &decompressed))) {
+        return false;
+      }
+      if (bytes_read != value->size()) {
+        PIK_NOTIFY_ERROR("Read too few");
+      }
+      value->swap(decompressed);
+    }
+    return true;
+  }
+
+  static Status Write(BytesEncoding encoding, const PaddedBytes& value,
+                      size_t* PIK_RESTRICT pos, uint8_t* storage) {
+    PIK_ASSERT(encoding == BytesEncoding::kRaw ||
+               encoding == BytesEncoding::kBrotli);
+    if (value.empty()) {
+      return U32Coder::Write(kU32Direct3Plus8,
+                             static_cast<uint32_t>(BytesEncoding::kNone), pos,
+                             storage);
+    }
+
+    PaddedBytes compressed;
+    const PaddedBytes* store_what = &value;
+
+    if (encoding == BytesEncoding::kBrotli) {
+      PIK_RETURN_IF_ERROR(BrotliCompress(kBrotliQuality, value, &compressed));
+      if (compressed.size() < value.size()) {
+        store_what = &compressed;
+      } else {
+        encoding = BytesEncoding::kRaw;
+      }
+    }
+
+    PIK_RETURN_IF_ERROR(U32Coder::Write(
+        kU32Direct3Plus8, static_cast<uint32_t>(encoding), pos, storage));
+    PIK_RETURN_IF_ERROR(
+        U64Coder::Write(store_what->size(), pos, storage));
+
+    size_t i = 0;
+#if PIK_BYTE_ORDER_LITTLE
+    // Write 4 bytes at a time
+    uint32_t buf;
+    for (; i + 4 <= store_what->size(); i += 4) {
+      memcpy(&buf, store_what->data() + i, 4);
+      WriteBits(32, buf, pos, storage);
+    }
+#endif
+
+    // Write remaining bytes
+    for (; i < store_what->size(); ++i) {
+      WriteBits(8, store_what->data()[i], pos, storage);
+    }
+    return true;
+  }
+};
+
+// A "bundle" is a forward- and backward compatible collection of fields.
+// They are used for FileHeader/FrameHeader/GroupHeader. Bundles can be extended
+// by appending(!) fields. Optional fields may be omitted from the bitstream by
+// conditionally visiting them. When reading new bitstreams with old code, we
+// skip unknown fields at the end of the bundle. This requires storing the
+// amount of extra appended bits, and that fields are visited in chronological
+// order of being added to the format, because old decoders cannot skip some
+// future fields and resume reading old fields. Similarly, new readers query
+// bits in an "extensions" field to skip (groups of) fields not present in old
+// bitstreams. Note that each bundle must include an "extensions" field prior to
+// freezing the format, otherwise it cannot be extended.
+//
+// To ensure interoperability, there will be no opaque fields.
+//
+// HOWTO:
+// - basic usage: define a struct with member variables ("fields") and a
+//   VisitFields(v) member function that calls v->U32/Bool etc. for each field,
+//   specifying their default values. The ctor must call Bundle::Init(this).
+//
+// - print a trace of visitors: ensure each bundle has a static Name() member
+//   function, and #define PIK_FIELDS_TRACE 1.
+//
+// - optional fields: in VisitFields, add if (v->Conditional(your_condition))
+//   { v->U32(dist, default, &field); }. This prevents reading/writing field
+//   if !your_condition, which is typically computed from a prior field.
+//   WARNING: do not add an else branch; to ensure all fields are initialized,
+//   instead add another if (v->Conditional(!your_condition)).
+//
+// - repeated fields: for dynamic sizes, add a std::vector field and in
+//   VisitFields, call v->SetSizeWhenReading before accessing the field. For
+//   static or bounded sizes, use an array or std::array. In all cases, simply
+//   visit each array element as if it were a normal field.
+//
+// - nested bundles: add a bundle as a normal field and in VisitFields call
+//   PIK_RETURN_IF_ERROR(v->VisitNested(&nested));
+//
+// - allow future extensions: define a "uint64_t extensions" field and call
+//   v->BeginExtensions(&extensions) after visiting all non-extension fields,
+//   and `return v->EndExtensions();` after the last extension field.
+//
+// - encode an entire bundle in one bit if ALL its fields equal their default
+//   values: add a "bool all_default" field and as the first visitor:
+//   if (v->AllDefault(*this, &all_default)) return true;
+//   Note: if extensions are present, AllDefault() == false.
+
+class Bundle {
+ public:
+  // These are called from headers.cc.
+
+  template <class T>
+  static void Init(T* PIK_RESTRICT t) {
+    Trace("Init");
+    InitVisitor visitor;
+    if (!visitor.Visit(t)) {
+      PIK_ASSERT(false);  // Init should never fail.
+    }
+  }
+
+  // Returns whether ALL fields (including `extensions`, if present) are equal
+  // to their default value.
+  template <class T>
+  static bool AllDefault(const T& t) {
+    Trace("[[AllDefault");
+    AllDefaultVisitor visitor;
+    if (!visitor.VisitConst(t)) {
+      PIK_ASSERT(false);  // AllDefault should never fail.
+    }
+#if PIK_FIELDS_TRACE
+    printf("  %d]]\n", visitor.AllDefault());
+#endif
+    return visitor.AllDefault();
+  }
+
+  // Prepares for Write(): "*total_bits" is the amount of storage required;
+  // "*extension_bits" must be passed to Write().
+  template <class T>
+  static Status CanEncode(const T& t, size_t* PIK_RESTRICT extension_bits,
+                          size_t* PIK_RESTRICT total_bits) {
+    Trace("CanEncode");
+    CanEncodeVisitor visitor;
+    PIK_RETURN_IF_ERROR(visitor.VisitConst(t));
+    return visitor.GetSizes(extension_bits, total_bits);
+  }
+
+  template <class T>
+  static Status Read(BitReader* reader, T* PIK_RESTRICT t) {
+    Trace("Read");
+    ReadVisitor visitor(reader);
+    PIK_RETURN_IF_ERROR(visitor.Visit(t));
+    return visitor.OK();
+  }
+
+  template <class T>
+  static Status Write(const T& t, const size_t extension_bits,
+                      size_t* PIK_RESTRICT pos, uint8_t* storage) {
+    Trace("Write");
+    WriteVisitor visitor(extension_bits, pos, storage);
+    PIK_RETURN_IF_ERROR(visitor.VisitConst(t));
+    return visitor.OK();
+  }
+
+ private:
+  static void Trace(const char* op) {
+#if PIK_FIELDS_TRACE
+    printf("---- %s\n", op);
+#endif
+  }
+
+  // A bundle can be in one of three states concerning extensions: not-begun,
+  // active, ended. Bundles may be nested, so we need a stack of states.
+  class ExtensionStates {
+   public:
+    static constexpr size_t kMaxDepth = 64;
+
+    void Push() {
+      // Initial state = not-begun.
+      begun_ <<= 1;
+      ended_ <<= 1;
+    }
+
+    // Clears current state; caller must check IsEnded beforehand.
+    void Pop() {
+      begun_ >>= 1;
+      ended_ >>= 1;
+    }
+
+    // Returns true if state == active || state == ended.
+    Status IsBegun() const { return (begun_ & 1) != 0; }
+    // Returns true if state != not-begun && state != active.
+    Status IsEnded() const { return (ended_ & 1) != 0; }
+
+    void Begin() {
+      PIK_ASSERT(!IsBegun());
+      PIK_ASSERT(!IsEnded());
+      begun_ += 1;
+    }
+
+    void End() {
+      PIK_ASSERT(IsBegun());
+      PIK_ASSERT(!IsEnded());
+      ended_ += 1;
+    }
+
+   private:
+    // Current state := least-significant bit of begun_ and ended_.
+    uint64_t begun_ = 0;
+    uint64_t ended_ = 0;
+  };
+
+  // Visitors generate Init/AllDefault/Read/Write logic for all fields. Each
+  // bundle's VisitFields member function calls visitor->U32/Bytes/etc. We do
+  // not overload operator() because a function name is easier to search for.
+
+  template <class Derived>
+  class VisitorBase {
+   public:
+    ~VisitorBase() { PIK_ASSERT(depth_ == 0); }
+
+    // This is the only call site of T::VisitFields. Adds tracing and ensures
+    // EndExtensions was called.
+    template <class T>
+    Status Visit(T* t) {
+#if PIK_FIELDS_TRACE
+      char format[10];
+      snprintf(format, sizeof(format), "%%%zus%%s\n", depth_ * 2);
+      printf(format, "", T::Name());
+#endif
+
+      depth_ += 1;
+      PIK_ASSERT(depth_ <= ExtensionStates::kMaxDepth);
+      extension_states_.Push();
+
+      Derived* self = static_cast<Derived*>(this);
+      const Status ok = t->VisitFields(self);
+
+      if (ok) {
+        // If VisitFields called BeginExtensions, must also call EndExtensions.
+        PIK_ASSERT(!extension_states_.IsBegun() || extension_states_.IsEnded());
+      } else {
+        // Failed, undefined state: don't care whether EndExtensions was called.
+      }
+
+      extension_states_.Pop();
+      PIK_ASSERT(depth_ != 0);
+      depth_ -= 1;
+
+      return ok;
+    }
+
+    // For visitors accepting a const T, need to const-cast so we can call the
+    // non-const T::VisitFields. NOTE: T is not modified.
+    template <class T>
+    Status VisitConst(const T& t) {
+      return Visit(const_cast<T*>(&t));
+    }
+
+    // Returns whether VisitFields should visit some subsequent fields.
+    // "condition" is typically from prior fields, e.g. flags.
+    // Overridden by InitVisitor.
+    Status Conditional(bool condition) { return condition; }
+
+    // Overridden by InitVisitor, AllDefaultVisitor and CanEncodeVisitor.
+    template <class Fields>
+    Status AllDefault(const Fields& fields, bool* PIK_RESTRICT all_default) {
+      Derived* self = static_cast<Derived*>(this);
+      self->Bool(true, all_default);
+      return *all_default;
+    }
+
+    // Returns the result of visiting a nested Bundle.
+    // Overridden by InitVisitor.
+    template <class Fields>
+    Status VisitNested(Fields* fields) {
+      Derived* self = static_cast<Derived*>(this);
+      return self->Visit(fields);
+    }
+
+    // Overridden by ReadVisitor.
+    template <typename T>
+    void SetSizeWhenReading(uint32_t size, const T* container) {
+      PIK_ASSERT(container->size() == size);
+    }
+
+    // Called before any conditional visit based on "extensions".
+    // Overridden by ReadVisitor, CanEncodeVisitor and WriteVisitor.
+    void BeginExtensions(uint64_t* PIK_RESTRICT extensions) {
+      Derived* self = static_cast<Derived*>(this);
+      self->U64(0, extensions);
+
+      extension_states_.Begin();
+    }
+
+    // Called after all extension fields (if any). Although non-extension fields
+    // could be visited afterward, we prefer the convention that extension
+    // fields are always the last to be visited.
+    // Overridden by ReadVisitor.
+    Status EndExtensions() {
+      extension_states_.End();
+      return true;
+    }
+
+   private:
+    size_t depth_ = 0;  // for indentation.
+    ExtensionStates extension_states_;
+  };
+
+  struct InitVisitor : public VisitorBase<InitVisitor> {
+    void U32(const uint32_t distribution, const uint32_t default_value,
+             uint32_t* PIK_RESTRICT value) {
+      *value = default_value;
+    }
+
+    void U64(const uint64_t default_value, uint64_t* PIK_RESTRICT value) {
+      *value = default_value;
+    }
+
+    template <typename T>
+    void Enum(const uint32_t distribution, const T default_value,
+              T* PIK_RESTRICT value) {
+      *value = default_value;
+    }
+
+    template <typename T>
+    void Orientation(
+        const T default_value, T* PIK_RESTRICT value) {
+      *value = default_value;
+    }
+
+    void Bool(bool default_value, bool* PIK_RESTRICT value) {
+      *value = default_value;
+    }
+
+    void Bytes(const BytesEncoding unused_encoding,
+               PaddedBytes* PIK_RESTRICT value) {
+      value->clear();
+    }
+
+    // Always visit conditional fields to ensure they are initialized.
+    Status Conditional(bool condition) { return true; }
+
+    template <class Fields>
+    Status AllDefault(const Fields& fields, bool* PIK_RESTRICT all_default) {
+      // Just initialize this field and don't skip initializing others.
+      Bool(true, all_default);
+      return false;
+    }
+
+    template <class Fields>
+    Status VisitNested(Fields* fields) {
+      // Avoid re-initializing nested bundles (their ctors already called
+      // Bundle::Init for their fields).
+      return true;
+    }
+  };
+
+  class AllDefaultVisitor : public VisitorBase<AllDefaultVisitor> {
+   public:
+    void U32(const uint32_t distribution, const uint32_t default_value,
+             const uint32_t* PIK_RESTRICT value) {
+      all_default_ &= *value == default_value;
+    }
+
+    void U64(const uint64_t default_value, const uint64_t* PIK_RESTRICT value) {
+      all_default_ &= *value == default_value;
+    }
+
+    template <typename T>
+    void Enum(const uint32_t distribution, const T default_value,
+              const T* PIK_RESTRICT value) {
+      all_default_ &= *value == default_value;
+    }
+
+    template <typename T>
+    void Orientation(
+        const T default_value, T* PIK_RESTRICT value) {
+      all_default_ &= *value == default_value;
+    }
+
+    void Bool(bool default_value, const bool* PIK_RESTRICT value) {
+      all_default_ &= *value == default_value;
+    }
+
+    void Bytes(const BytesEncoding unused_encoding,
+               const PaddedBytes* PIK_RESTRICT value) {
+      all_default_ &= value->empty();
+    }
+
+    template <class Fields>
+    Status AllDefault(const Fields& fields, bool* PIK_RESTRICT all_default) {
+      // Visit all fields so we can compute the actual all_default_ value.
+      return false;
+    }
+
+    bool AllDefault() const { return all_default_; }
+
+   private:
+    bool all_default_ = true;
+  };
+
+  class ReadVisitor : public VisitorBase<ReadVisitor> {
+   public:
+    ReadVisitor(BitReader* reader) : reader_(reader) {}
+
+    void U32(const uint32_t distribution, const uint32_t default_value,
+             uint32_t* PIK_RESTRICT value) {
+      *value = U32Coder::Read(distribution, reader_);
+    }
+
+    void U64(const uint64_t default_value, uint64_t* PIK_RESTRICT value) {
+      *value = U64Coder::Read(reader_);
+    }
+
+    template <typename T>
+    void Enum(const uint32_t distribution, const T default_value,
+              T* PIK_RESTRICT value) {
+      uint32_t bits;
+      U32(distribution, static_cast<uint32_t>(default_value), &bits);
+      *value = static_cast<T>(bits);
+    }
+
+    template <typename T>
+    void Orientation(
+        const T default_value, T* PIK_RESTRICT value) {
+      *value = static_cast<T>(OrientationCoder::Read(reader_));
+    }
+
+    void Bool(bool default_value, bool* PIK_RESTRICT value) {
+      uint32_t bits;
+      U32(kU32RawBits + 1, default_value, &bits);
+      PIK_ASSERT(bits <= 1);
+      *value = bits == 1;
+    }
+
+    void Bytes(const BytesEncoding unused_encoding,
+               PaddedBytes* PIK_RESTRICT value) {
+      ok_ &= BytesCoder::Read(reader_, value);
+    }
+
+    template <typename T>
+    void SetSizeWhenReading(uint32_t size, T* container) {
+      // Sets the container size to the given size in case of reading. The size
+      // must have been read from a previously visited field.
+      container->resize(size);
+    }
+
+    void BeginExtensions(uint64_t* PIK_RESTRICT extensions) {
+      VisitorBase<ReadVisitor>::BeginExtensions(extensions);
+      if (*extensions != 0) {
+        // Read the additional U64 indicating the number of extension bits
+        // (more compact than sending the total size).
+        extension_bits_ = U64Coder::Read(reader_);  // >= 0
+        // Used by EndExtensions to skip past any _remaining_ extensions.
+        pos_after_ext_size_ = reader_->BitsRead();
+        PIK_ASSERT(pos_after_ext_size_ != 0);
+      }
+    }
+
+    Status EndExtensions() {
+      PIK_RETURN_IF_ERROR(VisitorBase<ReadVisitor>::EndExtensions());
+      // Happens if extensions == 0: don't read size, done.
+      if (pos_after_ext_size_ == 0) return true;
+
+      // Skip new fields this (old?) decoder didn't know about, if any.
+      const size_t bits_read = reader_->BitsRead();
+      const uint64_t end = pos_after_ext_size_ + extension_bits_;
+      if (bits_read > end) {
+        return PIK_FAILURE("Read more extension bits than budgeted");
+      }
+      const size_t remaining_bits = end - bits_read;
+      if (remaining_bits != 0) {
+        fprintf(stderr, "Skipping %zu-bit extension(s)\n", remaining_bits);
+        reader_->SkipBits(remaining_bits);
+      }
+      return true;
+    }
+
+    Status OK() const { return ok_; }
+
+   private:
+    bool ok_ = true;
+    BitReader* const reader_;
+    uint64_t extension_bits_ = 0;    // May be 0 even if extensions present.
+    size_t pos_after_ext_size_ = 0;  // 0 iff extensions == 0.
+  };
+
+  class CanEncodeVisitor : public VisitorBase<CanEncodeVisitor> {
+   public:
+    void U32(const uint32_t distribution, const uint32_t default_value,
+             const uint32_t* PIK_RESTRICT value) {
+      size_t encoded_bits = 0;
+      ok_ &= U32Coder::CanEncode(distribution, *value, &encoded_bits);
+      encoded_bits_ += encoded_bits;
+    }
+
+    void U64(const uint64_t default_value, const uint64_t* PIK_RESTRICT value) {
+      size_t encoded_bits = 0;
+      ok_ &= U64Coder::CanEncode(*value, &encoded_bits);
+      encoded_bits_ += encoded_bits;
+    }
+
+    template <typename T>
+    void Enum(const uint32_t distribution, const T default_value,
+              T* PIK_RESTRICT value) {
+      uint32_t bits = static_cast<uint32_t>(*value);
+      U32(distribution, static_cast<uint32_t>(default_value), &bits);
+    }
+
+    template <typename T>
+    void Orientation(
+        const T default_value, T* PIK_RESTRICT value) {
+      size_t encoded_bits = 0;
+      ok_ &= OrientationCoder::CanEncode(static_cast<uint32_t>(*value),
+          &encoded_bits);
+      encoded_bits_ += encoded_bits;
+    }
+
+    void Bool(const bool default_value, bool* PIK_RESTRICT value) {
+      uint32_t bits = static_cast<uint32_t>(*value);
+      U32(kU32RawBits + 1, default_value, &bits);
+    }
+
+    void Bytes(const BytesEncoding encoding,
+               const PaddedBytes* PIK_RESTRICT value) {
+      size_t encoded_bits = 0;
+      ok_ &= BytesCoder::CanEncode(encoding, *value, &encoded_bits);
+      encoded_bits_ += encoded_bits;
+    }
+
+    template <class Fields>
+    Status AllDefault(const Fields& fields, bool* PIK_RESTRICT all_default) {
+      *all_default = Bundle::AllDefault(fields);
+      Bool(true, all_default);
+      return *all_default;
+    }
+
+    void BeginExtensions(uint64_t* PIK_RESTRICT extensions) {
+      VisitorBase<CanEncodeVisitor>::BeginExtensions(extensions);
+      if (*extensions != 0) {
+        PIK_ASSERT(pos_after_ext_ == 0);
+        pos_after_ext_ = encoded_bits_;
+        PIK_ASSERT(pos_after_ext_ != 0);  // visited "extensions"
+      }
+    }
+    // EndExtensions = default.
+
+    Status GetSizes(size_t* PIK_RESTRICT extension_bits,
+                    size_t* PIK_RESTRICT total_bits) {
+      PIK_RETURN_IF_ERROR(ok_);
+      *extension_bits = 0;
+      *total_bits = encoded_bits_;
+      // Only if extension field was nonzero will we encode the size.
+      if (pos_after_ext_ != 0) {
+        PIK_ASSERT(encoded_bits_ >= pos_after_ext_);
+        *extension_bits = encoded_bits_ - pos_after_ext_;
+        // Also need to encode *extension_bits and bill it to *total_bits.
+        size_t encoded_bits = 0;
+        ok_ &= U64Coder::CanEncode(*extension_bits, &encoded_bits);
+        *total_bits += encoded_bits;
+      }
+      return true;
+    }
+
+   private:
+    bool ok_ = true;
+    size_t encoded_bits_ = 0;
+    // Snapshot of encoded_bits_ after visiting the extension field, but NOT
+    // including the hidden "extension_bits" u64.
+    uint64_t pos_after_ext_ = 0;
+  };
+
+  class WriteVisitor : public VisitorBase<WriteVisitor> {
+   public:
+    WriteVisitor(const size_t extension_bits, size_t* pos, uint8_t* storage)
+        : extension_bits_(extension_bits), pos_(pos), storage_(storage) {}
+
+    void U32(const uint32_t distribution, const uint32_t default_value,
+             const uint32_t* PIK_RESTRICT value) {
+      ok_ &= U32Coder::Write(distribution, *value, pos_, storage_);
+    }
+
+    void U64(const uint64_t default_value, const uint64_t* PIK_RESTRICT value) {
+      ok_ &= U64Coder::Write(*value, pos_, storage_);
+    }
+
+    template <typename T>
+    void Enum(const uint32_t distribution, const T default_value,
+              T* PIK_RESTRICT value) {
+      const uint32_t bits = static_cast<uint32_t>(*value);
+      U32(distribution, static_cast<uint32_t>(default_value), &bits);
+    }
+
+    template <typename T>
+    void Orientation(
+        const T default_value, T* PIK_RESTRICT value) {
+      ok_ &= OrientationCoder::Write(static_cast<uint32_t>(*value),
+          pos_, storage_);
+    }
+
+    void Bool(const bool default_value, bool* PIK_RESTRICT value) {
+      const uint32_t bits = static_cast<uint32_t>(*value);
+      U32(kU32RawBits + 1, default_value, &bits);
+    }
+
+    void Bytes(const BytesEncoding encoding,
+               const PaddedBytes* PIK_RESTRICT value) {
+      ok_ &= BytesCoder::Write(encoding, *value, pos_, storage_);
+    }
+
+    void BeginExtensions(uint64_t* PIK_RESTRICT extensions) {
+      VisitorBase<WriteVisitor>::BeginExtensions(extensions);
+      if (*extensions == 0) {
+        PIK_ASSERT(extension_bits_ == 0);
+      } else {
+        // NOTE: extension_bits_ can be zero if the extensions do not require
+        // any additional fields.
+        ok_ &= U64Coder::Write(extension_bits_, pos_, storage_);
+      }
+    }
+    // EndExtensions = default.
+
+    Status OK() const { return ok_; }
+
+   private:
+    const size_t extension_bits_;
+    size_t* PIK_RESTRICT pos_;
+    uint8_t* storage_;
+    bool ok_ = true;
+  };
+};
+
+}  // namespace pik
+
+#endif  // PIK_FIELDS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/file_io.h b/codec/L2/demos/pikEnc/host/pik/file_io.h
new file mode 100755
index 0000000000..87b038bcfd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/file_io.h
@@ -0,0 +1,95 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_FILE_IO_H_
+#define PIK_FILE_IO_H_
+
+// Helper functions for reading/writing files.
+
+#include <stdio.h>
+#include <string>
+#include "pik/compiler_specific.h"
+#include "pik/padded_bytes.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Returns extension including the dot, or empty string if none. Assumes
+// filename is not a hidden file (e.g. ".bashrc"). May be called with a pathname
+// if the filename contains a dot and/or no other path component does.
+static inline std::string Extension(const std::string& filename) {
+  const size_t pos = filename.rfind('.');
+  if (pos == std::string::npos) return std::string();
+  return filename.substr(pos);
+}
+
+// RAII, ensures files are closed even when returning early.
+class FileWrapper {
+ public:
+  FileWrapper(const FileWrapper& other) = delete;
+  FileWrapper& operator=(const FileWrapper& other) = delete;
+
+  explicit FileWrapper(const std::string& pathname, const char* mode)
+      : file_(fopen(pathname.c_str(), mode)) {}
+
+  ~FileWrapper() {
+    if (file_ != nullptr) {
+      const int err = fclose(file_);
+      PIK_CHECK(err == 0);
+    }
+  }
+
+  operator FILE*() const { return file_; }
+
+ private:
+  FILE* const file_;
+};
+
+template<typename ContainerType>
+static inline Status ReadFile(const std::string& pathname,
+                              ContainerType* PIK_RESTRICT bytes) {
+  FileWrapper f(pathname, "rb");
+  if (f == nullptr) return PIK_FAILURE("Failed to open file for reading");
+
+  if (fseek(f, 0, SEEK_END) != 0) return PIK_FAILURE("Failed to seek end");
+  bytes->resize(ftell(f));
+  if (bytes->size() == 0) return PIK_FAILURE("Zero-length file");
+  if (fseek(f, 0, SEEK_SET) != 0) return PIK_FAILURE("Failed to seek set");
+
+  size_t pos = 0;
+  while (pos < bytes->size()) {
+    // Needed in case ContainerType is std::string, whose data() is const.
+    char* bytes_writable = reinterpret_cast<char*>(&(*bytes)[0]);
+    const size_t bytes_read =
+        fread(bytes_writable + pos, 1, bytes->size() - pos, f);
+    if (bytes_read == 0) return PIK_FAILURE("Failed to read");
+    pos += bytes_read;
+  }
+  PIK_ASSERT(pos == bytes->size());
+  return true;
+}
+
+template<typename ContainerType>
+static inline Status WriteFile(const ContainerType& bytes,
+                               const std::string& pathname) {
+  FileWrapper f(pathname, "wb");
+  if (f == nullptr) return PIK_FAILURE("Failed to open file for writing");
+
+  size_t pos = 0;
+  while (pos < bytes.size()) {
+    const size_t bytes_written =
+        fwrite(bytes.data() + pos, 1, bytes.size() - pos, f);
+    if (bytes_written == 0) return PIK_FAILURE("Failed to write");
+    pos += bytes_written;
+  }
+  PIK_ASSERT(pos == bytes.size());
+
+  return true;
+}
+
+}  // namespace pik
+
+#endif  // PIK_FILE_IO_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/gaborish.cc b/codec/L2/demos/pikEnc/host/pik/gaborish.cc
new file mode 100755
index 0000000000..072c766392
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/gaborish.cc
@@ -0,0 +1,161 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/gaborish.h"
+
+#include "pik/convolve.h"
+#include "pik/pik_params.h"
+
+namespace pik {
+
+namespace kernel {
+
+struct Gaborish3_1000 {
+  PIK_INLINE const Weights3x3& Weights() const {
+    // Unnormalized.
+    constexpr float wu0 = 1.0f;
+    const float wu1 = static_cast<float>(0.11501538179658321);
+    const float wu2 = static_cast<float>(0.089979079587015454);
+    const float mul = 1.0 / (wu0 + 4 * (wu1 + wu2));
+    const float w0 = wu0 * mul;
+    const float w1 = wu1 * mul;
+    const float w2 = wu2 * mul;
+    static const Weights3x3 weights = {
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)},
+        {SIMD_REP4(w1)}, {SIMD_REP4(w0)}, {SIMD_REP4(w1)},
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+
+struct Gaborish3_875 {
+  PIK_INLINE const Weights3x3& Weights() const {
+    // Unnormalized.
+    constexpr float wu0 = 1.0f;
+    const float x = 0.875;
+    const float wu1 = static_cast<float>(x * 0.11501538179658321);
+    const float wu2 = static_cast<float>(x * 0.089979079587015454);
+    const float mul = 1.0 / (wu0 + 4 * (wu1 + wu2));
+    const float w0 = wu0 * mul;
+    const float w1 = wu1 * mul;
+    const float w2 = wu2 * mul;
+    static const Weights3x3 weights = {
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)},
+        {SIMD_REP4(w1)}, {SIMD_REP4(w0)}, {SIMD_REP4(w1)},
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+
+struct Gaborish3_750 {
+  PIK_INLINE const Weights3x3& Weights() const {
+    // Unnormalized.
+    constexpr float wu0 = 1.0f;
+    const float x = 0.75;
+    const float wu1 = static_cast<float>(x * 0.13959942428275746);
+    const float wu2 = static_cast<float>(x * 0.074240717189152386);
+    const float mul = 1.0 / (wu0 + 4 * (wu1 + wu2));
+    const float w0 = wu0 * mul;
+    const float w1 = wu1 * mul;
+    const float w2 = wu2 * mul;
+    static const Weights3x3 weights = {
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)},
+        {SIMD_REP4(w1)}, {SIMD_REP4(w0)}, {SIMD_REP4(w1)},
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+
+struct Gaborish3_500 {
+  PIK_INLINE const Weights3x3& Weights() const {
+    // Unnormalized.
+    constexpr float wu0 = 1.0f;
+    const float wu1 = static_cast<float>(0.056007960760453189);
+    const float wu2 = static_cast<float>(0.045899074552453879);
+    const float mul = 1.0 / (wu0 + 4 * (wu1 + wu2));
+    const float w0 = wu0 * mul;
+    const float w1 = wu1 * mul;
+    const float w2 = wu2 * mul;
+    static const Weights3x3 weights = {
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)},
+        {SIMD_REP4(w1)}, {SIMD_REP4(w0)}, {SIMD_REP4(w1)},
+        {SIMD_REP4(w2)}, {SIMD_REP4(w1)}, {SIMD_REP4(w2)}};
+    return weights;
+  }
+};
+
+}  // namespace kernel
+
+Image3F GaborishInverse(const Image3F& in, double mul) {
+  PIK_ASSERT(mul > 0.0);
+  PROFILER_FUNC;
+
+  // Only an approximation. One or even two 3x3, and rank-1 (separable) 5x5
+  // are insufficient.
+  static const double kGaborish[5] = {
+      -0.092359145662814029,  -0.039253623634014627, 0.016176494530216929,
+      0.00083458437774987476, 0.004512465323949319,
+  };
+  const float smooth_weights5[9] = {
+      1.0f,
+      static_cast<float>(mul * kGaborish[0]),
+      static_cast<float>(mul * kGaborish[2]),
+
+      static_cast<float>(mul * kGaborish[0]),
+      static_cast<float>(mul * kGaborish[1]),
+      static_cast<float>(mul * kGaborish[3]),
+
+      static_cast<float>(mul * kGaborish[2]),
+      static_cast<float>(mul * kGaborish[3]),
+      static_cast<float>(mul * kGaborish[4]),
+  };
+  Image3F sharpened(in.xsize(), in.ysize());
+  slow::SymmetricConvolution<2, WrapClamp>::Run(in, in.xsize(), in.ysize(),
+                                                smooth_weights5, &sharpened);
+  return sharpened;
+}
+
+namespace {
+template <typename Kernel, typename Executor>
+// Assumes `Kernel` is symmetric.
+SIMD_ATTR void RunConv(const Executor executor, const Image3F& in,
+                       const Kernel& kernel, Image3F* out) {
+  const BorderNeverUsed border;
+  if (in.xsize() < kConvolveMinWidth) {
+    using Convolution = slow::General3x3Convolution<1, WrapMirror>;
+    Convolution::Run(in, in.xsize(), in.ysize(), kernel, out);
+  } else {
+    using Conv3 = ConvolveT<strategy::Symmetric3>;
+    Conv3::Run(border, executor, in, kernel, out);
+  }
+}
+}  // namespace
+
+SIMD_ATTR Status ConvolveGaborish(const Image3F& in,
+                                  GaborishStrength strength, ThreadPool* pool,
+                                  Image3F* PIK_RESTRICT out) {
+  // Since kOff would want us to return the `in` parameter as `out`, which would
+  // lead to either a copy or a new memory handling strategy, we disallow it and
+  // require callers to avoid it.
+  PIK_CHECK(strength != GaborishStrength::kOff);
+  PROFILER_FUNC;
+  const ExecutorPool executor(pool);
+  *out = Image3F(in.xsize(), in.ysize());
+  if (strength == GaborishStrength::k1000) {
+    RunConv(executor, in, kernel::Gaborish3_1000(), out);
+  } else if (strength == GaborishStrength::k875) {
+    RunConv(executor, in, kernel::Gaborish3_875(), out);
+  } else if (strength == GaborishStrength::k750) {
+    RunConv(executor, in, kernel::Gaborish3_750(), out);
+  } else if (strength == GaborishStrength::k500) {
+    RunConv(executor, in, kernel::Gaborish3_500(), out);
+  } else {
+    return PIK_FAILURE("Invalid strength argument");
+  }
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/gaborish.h b/codec/L2/demos/pikEnc/host/pik/gaborish.h
new file mode 100755
index 0000000000..d741d94b21
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/gaborish.h
@@ -0,0 +1,30 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_GABORISH_H_
+#define PIK_GABORISH_H_
+
+// Linear smoothing (3x3 convolution) for deblocking without too much blur.
+
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include "pik/image_ops.h"
+#include "pik/pik_params.h"
+
+namespace pik {
+
+// Used in encoder to reduce the impact of the decoder's smoothing.
+// This is approximate and slow (unoptimized 5x5 convolution).
+Image3F GaborishInverse(const Image3F& opsin, double mul);
+
+// Does not accept strength of GaborishStrength::kOff. For those cases it's
+// cheaper and simpler to just not do the convolve.
+Status ConvolveGaborish(const Image3F& in, GaborishStrength strength,
+                        ThreadPool* pool, Image3F* PIK_RESTRICT out);
+
+}  // namespace pik
+
+#endif  // PIK_GABORISH_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/gamma_correct.h b/codec/L2/demos/pikEnc/host/pik/gamma_correct.h
new file mode 100755
index 0000000000..731de2eafe
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/gamma_correct.h
@@ -0,0 +1,40 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_GAMMA_CORRECT_H_
+#define PIK_GAMMA_CORRECT_H_
+
+// Deprecated: sRGB transfer function. Use color_management.h instead.
+
+#include <cmath>
+
+#include "pik/compiler_specific.h"
+
+namespace pik {
+
+// Values are in [0, 255].
+static PIK_INLINE double Srgb8ToLinearDirect(double srgb8) {
+  if (srgb8 <= 0.0) return 0.0;
+  if (srgb8 <= 10.31475) return srgb8 / 12.92;
+  if (srgb8 >= 255.0) return 255.0;
+  const double srgb01 = srgb8 / 255.0;
+  const double linear01 = std::pow((srgb01 + 0.055) / 1.055, 2.4);
+  return linear01 * 255.0;
+}
+
+// Values are in [0, 255].
+static PIK_INLINE double LinearToSrgb8Direct(double linear) {
+  if (linear <= 0.0) return 0.0;
+  if (linear >= 255.0) return 255.0;
+  if (linear <= 10.31475 / 12.92) return linear * 12.92;
+  const double linear01 = linear / 255.0;
+  const double srgb01 = std::pow(linear01, 1.0 / 2.4) * 1.055 - 0.055;
+  return srgb01 * 255.0;
+}
+
+}  // namespace pik
+
+#endif  // PIK_GAMMA_CORRECT_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/gauss_blur.cc b/codec/L2/demos/pikEnc/host/pik/gauss_blur.cc
new file mode 100755
index 0000000000..c1dbdd2771
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/gauss_blur.cc
@@ -0,0 +1,96 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/gauss_blur.h"
+
+#include <math.h>
+#include <algorithm>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/compiler_specific.h"
+#include "pik/profiler.h"
+
+namespace pik {
+
+inline void ExtrapolateBorders(const float* const PIK_RESTRICT row_in,
+                               float* const PIK_RESTRICT row_out,
+                               const int xsize, const int radius) {
+  const int lastcol = xsize - 1;
+  for (int x = 1; x <= radius; ++x) {
+    row_out[-x] = row_in[std::min(x, xsize - 1)];
+  }
+  memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
+  for (int x = 1; x <= radius; ++x) {
+    row_out[lastcol + x] = row_in[std::max(0, lastcol - x)];
+  }
+}
+
+ImageF ConvolveXSampleAndTranspose(const ImageF& in,
+                                   const std::vector<float>& kernel,
+                                   const size_t res) {
+  PIK_ASSERT(kernel.size() % 2 == 1);
+  PIK_ASSERT(in.xsize() % res == 0);
+  const int offset = res / 2;
+  const int out_xsize = in.xsize() / res;
+  ImageF out(in.ysize(), out_xsize);
+  const int r = kernel.size() / 2;
+  std::vector<float> row_tmp(in.xsize() + 2 * r);
+  float* const PIK_RESTRICT rowp = &row_tmp[r];
+  const float* const kernelp = &kernel[r];
+  for (int y = 0; y < in.ysize(); ++y) {
+    ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
+    for (int x = offset, ox = 0; x < in.xsize(); x += res, ++ox) {
+      float sum = 0.0f;
+      for (int i = -r; i <= r; ++i) {
+        sum += rowp[x + i] * kernelp[i];
+      }
+      out.Row(ox)[y] = sum;
+    }
+  }
+  return out;
+}
+
+Image3F ConvolveXSampleAndTranspose(const Image3F& in,
+                                    const std::vector<float>& kernel,
+                                    const size_t res) {
+  return Image3F(ConvolveXSampleAndTranspose(in.Plane(0), kernel, res),
+                 ConvolveXSampleAndTranspose(in.Plane(1), kernel, res),
+                 ConvolveXSampleAndTranspose(in.Plane(2), kernel, res));
+}
+
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel_x,
+                         const std::vector<float>& kernel_y, const size_t res) {
+  ImageF tmp = ConvolveXSampleAndTranspose(in, kernel_x, res);
+  return ConvolveXSampleAndTranspose(tmp, kernel_y, res);
+}
+
+ImageF Convolve(const ImageF& in, const std::vector<float>& kernel_x,
+                const std::vector<float>& kernel_y) {
+  return ConvolveAndSample(in, kernel_x, kernel_y, 1);
+}
+
+Image3F Convolve(const Image3F& in, const std::vector<float>& kernel_x,
+                 const std::vector<float>& kernel_y) {
+  return Image3F(Convolve(in.Plane(0), kernel_x, kernel_y),
+                 Convolve(in.Plane(1), kernel_x, kernel_y),
+                 Convolve(in.Plane(2), kernel_x, kernel_y));
+}
+
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
+                         const size_t res) {
+  return ConvolveAndSample(in, kernel, kernel, res);
+}
+
+ImageF Convolve(const ImageF& in, const std::vector<float>& kernel) {
+  return ConvolveAndSample(in, kernel, 1);
+}
+
+Image3F Convolve(const Image3F& in, const std::vector<float>& kernel) {
+  return Convolve(in, kernel, kernel);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/gauss_blur.h b/codec/L2/demos/pikEnc/host/pik/gauss_blur.h
new file mode 100755
index 0000000000..13e11579b2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/gauss_blur.h
@@ -0,0 +1,69 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_GAUSS_BLUR_H_
+#define PIK_GAUSS_BLUR_H_
+
+#include <stddef.h>
+#include <vector>
+
+#include "pik/image.h"
+
+namespace pik {
+
+template <typename T>
+std::vector<T> GaussianKernel(int radius, T sigma) {
+  PIK_ASSERT(sigma > 0.0);
+  std::vector<T> kernel(2 * radius + 1);
+  const T scaler = -1.0 / (2 * sigma * sigma);
+  double sum = 0.0;
+  for (int i = -radius; i <= radius; ++i) {
+    const T val = std::exp(scaler * i * i);
+    kernel[i + radius] = val;
+    sum += val;
+  }
+  for (int i = 0; i < kernel.size(); ++i) {
+    kernel[i] /= sum;
+  }
+  return kernel;
+}
+
+// All convolution functions below apply mirroring of the input on the borders
+// in the following way:
+//
+//     input: [a0 a1 a2 ...  aN]
+//     mirrored input: [aR ... a1 | a0 a1 a2 .... aN | aN-1 ... aN-R]
+//
+// where R is the radius of the kernel (i.e. kernel size is 2*R+1).
+
+// TODO(janwas): Deprecated, use ConvolveT instead (if |kernel| <= 5).
+ImageF Convolve(const ImageF& in, const std::vector<float>& kernel);
+Image3F Convolve(const Image3F& in, const std::vector<float>& kernel);
+
+// TODO(janwas): Deprecated, use ConvolveT instead (if |kernel| <= 5).
+ImageF Convolve(const ImageF& in, const std::vector<float>& kernel_x,
+                const std::vector<float>& kernel_y);
+Image3F Convolve(const Image3F& in, const std::vector<float>& kernel_x,
+                 const std::vector<float>& kernel_y);
+
+// TODO(janwas): Use ConvolveT instead (if |kernel| <= 5 and res == 1).
+// REQUIRES: in.xsize() and in.ysize() are integer multiples of res.
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
+                         const size_t res);
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel_x,
+                         const std::vector<float>& kernel_y, const size_t res);
+
+// TODO(janwas): Use ConvolveT instead (if |kernel| <= 5 and res == 1).
+ImageF ConvolveXSampleAndTranspose(const ImageF& in,
+                                   const std::vector<float>& kernel,
+                                   const size_t res);
+Image3F ConvolveXSampleAndTranspose(const Image3F& in,
+                                    const std::vector<float>& kernel,
+                                    const size_t res);
+
+}  // namespace pik
+
+#endif  // PIK_GAUSS_BLUR_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/gradient_map.cc b/codec/L2/demos/pikEnc/host/pik/gradient_map.cc
new file mode 100755
index 0000000000..520b664147
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/gradient_map.cc
@@ -0,0 +1,546 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/gradient_map.h"
+
+#include "pik/bit_reader.h"
+#include "pik/entropy_coder.h"
+#include "pik/image.h"
+#include "pik/opsin_params.h"
+#include "pik/padded_bytes.h"
+
+namespace pik {
+namespace {
+
+// Size of the superblock, in amount of DCT blocks. So we operate on
+// blocks of kNumBlocks_ * kNumBlocks_ DC components, or 8x8 times as much
+// original image pixels.
+const size_t kNumBlocks = 8;
+
+double Interpolate(double v00, double v01, double v10, double v11, double x,
+                   double y) {
+  return v00 * (1 - x) * (1 - y) + v10 * x * (1 - y) + v01 * (1 - x) * y +
+         v11 * x * y;
+}
+
+// Computes the max of the horizontal and vertical second derivative for each
+// pixel, where second derivative means absolute value of difference of left
+// delta and right delta (top/bottom for vertical direction).
+// The radius over which the derivative is computed is only 1 pixel and it only
+// checks two angles (hor and ver), but this approximation works well enough.
+ImageF Gradient2(const Image3F& image, const size_t c) {
+  size_t xsize = image.xsize();
+  size_t ysize = image.ysize();
+  ImageF image2(image.xsize(), image.ysize());
+  for (size_t y = 1; y + 1 < ysize; y++) {
+    const float* PIK_RESTRICT row0 = image.PlaneRow(c, y - 1);
+    const float* PIK_RESTRICT row1 = image.PlaneRow(c, y);
+    const float* PIK_RESTRICT row2 = image.PlaneRow(c, y + 1);
+    float* row_out = image2.Row(y);
+    for (int x = 1; x + 1 < xsize; x++) {
+      float ddx = (row1[x] - row1[x - 1]) - (row1[x + 1] - row1[x]);
+      float ddy = (row1[x] - row0[x]) - (row2[x] - row1[x]);
+      row_out[x] = std::max(fabsf(ddx), fabsf(ddy));
+    }
+  }
+  // Copy to the borders
+  if (ysize > 2) {
+    float* PIK_RESTRICT row0 = image2.Row(0);
+    const float* PIK_RESTRICT row1 = image2.Row(1);
+    const float* PIK_RESTRICT row2 = image2.Row(ysize - 2);
+    float* PIK_RESTRICT row3 = image2.Row(ysize - 1);
+    for (size_t x = 1; x + 1 < xsize; x++) {
+      row0[x] = row1[x];
+      row3[x] = row2[x];
+    }
+  } else {
+    const float* row0_in = image.PlaneRow(c, 0);
+    const float* row1_in = image.PlaneRow(c, ysize - 1);
+    float* row0_out = image2.Row(0);
+    float* row1_out = image2.Row(ysize - 1);
+    for (size_t x = 1; x + 1 < xsize; x++) {
+      // Image too narrow, take first derivative instead
+      row0_out[x] = row1_out[x] = fabsf(row0_in[x] - row1_in[x]);
+    }
+  }
+  if (xsize > 2) {
+    for (size_t y = 0; y < ysize; y++) {
+      float* row = image2.Row(y);
+      row[0] = row[1];
+      row[xsize - 1] = row[xsize - 2];
+    }
+  } else {
+    for (size_t y = 0; y < ysize; y++) {
+      const float* PIK_RESTRICT row_in = image.PlaneRow(c, y);
+      float* row_out = image2.Row(y);
+      // Image too narrow, take first derivative instead
+      row_out[0] = row_out[xsize - 1] = fabsf(row_in[0] - row_in[xsize - 1]);
+    }
+  }
+  return image2;
+}
+
+// Grows or shrinks binary image. Negative r makes it erode. Modifies the
+// image in-place, integer is used because it goes out of boolean range for
+// intermediate values.
+void DilateImage(std::vector<int>& image, size_t w, size_t h, int r) {
+  bool erode = false;
+  if (r < 0) {
+    erode = true;
+    r = -r;
+  }
+  // First pass: distance from top to bottom and left to right.
+  for (size_t y = 0; y < h; y++) {
+    for (size_t x = 0; x < w; x++) {
+      size_t i = y * w + x;
+      if (!image[i] == erode) {
+        image[i] = 0;
+      } else {
+        image[i] = w + h;
+        if (y > 0) {
+          image[i] = std::min<int>(image[i], image[(y - 1) * w + x] + 1);
+        }
+        if (x > 0) {
+          image[i] = std::min<int>(image[i], image[y * w + x - 1] + 1);
+        }
+      }
+    }
+  }
+  // Second pass: distance from bottom to top and right to left.
+  for (int y = h - 1; y >= 0; y--) {
+    for (int x = w - 1; x >= 0; x--) {
+      int i = y * w + x;
+      if (y + 1 < h) {
+        image[i] = std::min<int>(image[i], image[(y + 1) * w + x] + 1);
+      }
+      if (x + 1 < w) {
+        image[i] = std::min<int>(image[i], image[y * w + x + 1] + 1);
+      }
+    }
+  }
+  // Convert computed distances into new binary image.
+  for (size_t y = 0; y < h; y++) {
+    for (size_t x = 0; x < w; x++) {
+      int i = y * w + x;
+      image[i] = (image[i] <= r) ? !erode : erode;
+    }
+  }
+}
+
+std::vector<int> ThresholdImage(const ImageF& image, float v) {
+  std::vector<int> result(image.xsize() * image.ysize());
+  for (int y = 0; y < image.ysize(); y++) {
+    const float* row = image.Row(y);
+    for (int x = 0; x < image.xsize(); x++) {
+      // Smaller is included, larger excluded.
+      result[y * image.xsize() + x] = (row[x] > v) ? 0 : 1;
+    }
+  }
+  return result;
+}
+
+void LinePieceFit(const float* p, size_t size, int bs, float exclude,
+                  bool guess_initial, float* r) {
+  size_t m = (size + bs - 2) / bs + 1;
+  std::vector<int> indices(size);
+  std::vector<float> included;
+  size_t n = 0;
+  for (int j = 0; j < size; j++) {
+    if (p[j] != exclude) {
+      indices[j] = n;
+      included.push_back(p[j]);
+      n++;
+    } else {
+      indices[j] = -1;
+    }
+  }
+  // matrix F: One basis function per row, one included point per column.
+  std::vector<float> f(n * m, 0);
+  for (int i = 0; i < m; i++) {
+    int j0 = (i - 1) * bs;
+    int jm = i * bs;
+    int j1 = (i + 1) * bs;
+
+    int jbegin = std::max<int>(0, (i - 1) * bs);
+    int jend = std::min<int>(size - 1, (i + 1) * bs);
+
+    for (int j = jbegin; j <= jend; j++) {
+      if (p[j] == exclude) continue;
+      // This is the linear interpolation function (a triangle), to fit line
+      // segments.
+      float v = 0;
+      if (j < jm) {
+        v = (j - j0) * 1.0 / (jm - j0);
+      } else if (j == jm) {
+        v = 1.0;
+      } else {
+        v = 1.0 - (j - jm) * 1.0 / (j1 - jm);
+      }
+      f[i * n + indices[j]] = v;
+    }
+  }
+
+  if (guess_initial) {
+    // Simple heuristic: guess points that match block corners, but skip
+    // excluded points.
+    float prev = 0;
+    for (int i = 0; i < m; i++) {
+      int j = i * bs;
+      r[i] = (j < size && p[j] != exclude) ? p[j] : prev;
+      prev = r[i];
+    }
+  }
+
+  FEM(f.data(), m, n, included.data(), r);
+}
+
+// Finds values for block corner points r that best fit the points p with
+// linear interpolation.
+// p must have xsize * ysize points, bs is the block size, r must have
+// ((xsize + bs - 2) / bs + 1) * ((ysize + bs - 2) / bs + 1) values.
+// Set point values to the value of exclude to not take them into account,
+// and enable guess_initial to let this function guess initial values, false
+// to use user-chosen initial values of r.
+void PlanePieceFit(const float* p, size_t xsize, size_t ysize, int bs,
+                   float exclude, bool guess_initial, float* r) {
+  // Size of result r
+  size_t xsize2 = ((xsize + bs - 2) / bs + 1);
+  size_t ysize2 = ((ysize + bs - 2) / bs + 1);
+
+  // Done with separate horizontal and vertical pass rather than globally
+  // to avoid large memory and CPU uage.
+
+  // Temporary buffer between the two passes
+  std::vector<float> t(xsize2 * ysize);
+
+  // Horizontal pass
+  for (size_t y = 0; y < ysize; y++) {
+    LinePieceFit(&p[y * xsize], xsize, bs, exclude, guess_initial,
+                 &t[y * xsize2]);
+  }
+
+  // Vertical pass
+  std::vector<float> t2(ysize);
+  std::vector<float> r2(ysize2);
+  for (size_t x = 0; x < xsize2; x++) {
+    for (size_t y = 0; y < ysize; y++) {
+      t2[y] = t[y * xsize2 + x];
+    }
+    LinePieceFit(&t2[0], ysize, bs, exclude, guess_initial, &r2[0]);
+    for (size_t y = 0; y < ysize2; y++) {
+      r[y * xsize2 + x] = r2[y];
+    }
+  }
+}
+
+// Computes the smooth gradient image from the computed corner points.
+Image3F ComputeGradientImage(const GradientMap& gradient) {
+  Image3F upscaled(gradient.xsize_dc, gradient.ysize_dc);
+  for (size_t by = 0; by < gradient.ysize - 1; ++by) {
+    for (int c = 0; c < 3; c++) {
+      const float* row0 = gradient.gradient.PlaneRow(c, by);
+      const float* row1 = gradient.gradient.PlaneRow(c, by + 1);
+      for (size_t bx = 0; bx + 1 < gradient.xsize; bx++) {
+        float v00 = row0[bx];
+        float v01 = row1[bx];
+        float v10 = row0[bx + 1];
+        float v11 = row1[bx + 1];
+        // x1 and y1 are exclusive endpoints and are valid coordinates
+        // because there is one more point than amount of blocks.
+        size_t x0 = bx * kNumBlocks;
+        size_t x1 = x0 + kNumBlocks;
+        size_t xend = std::min<size_t>(gradient.xsize_dc - 1, x0 + kNumBlocks);
+        size_t y0 = by * kNumBlocks;
+        size_t y1 = y0 + kNumBlocks;
+        size_t yend = std::min<size_t>(gradient.ysize_dc - 1, y0 + kNumBlocks);
+        float dx = x1 - x0;
+        float dy = y1 - y0;
+        for (size_t y = y0; y <= yend; y++) {
+          float* row_out = upscaled.PlaneRow(c, y);
+          for (size_t x = x0; x <= xend; x++) {
+            row_out[x] =
+                Interpolate(v00, v01, v10, v11, (x - x0) / dx, (y - y0) / dy);
+          }
+        }
+      }
+    }
+  }
+  return upscaled;
+}
+
+Image3S Quantize(const GradientMap& gradient, const Rect& map_rect,
+                 const Quantizer& quantizer) {
+  const Image3F& image = gradient.gradient;
+  Image3S out(map_rect.xsize(), map_rect.ysize());
+  for (int c = 0; c < 3; c++) {
+    // Skip x and b channels if grayscale, but do initialize them to 0.
+    if (gradient.grayscale && c != 1) {
+      for (size_t y = 0; y < out.ysize(); y++) {
+        int16_t* PIK_RESTRICT row_out = out.PlaneRow(c, y);
+        for (size_t x = 0; x < out.xsize(); x++) {
+          row_out[x] = 0;
+        }
+      }
+      continue;
+    };
+    const float step = quantizer.inv_quant_dc() *
+                       quantizer.DequantMatrix(0, kQuantKindDCT8, c)[0];
+    float range = kXybRadius[c] * 2;
+    // Use around 3x more bits than DC's quantization, capped
+    int steps = std::min(std::max(16, (int)(3 * range / step)), 255);
+    float mul = steps / range;
+
+    for (size_t y = 0; y < map_rect.ysize(); y++) {
+      const float* PIK_RESTRICT row = map_rect.ConstPlaneRow(image, c, y);
+      int16_t* PIK_RESTRICT row_out = out.PlaneRow(c, y);
+      const uint8_t* PIK_RESTRICT apply_row =
+          gradient.apply.ConstPlaneRow(c, y);
+      for (size_t x = 0; x < map_rect.xsize(); x++) {
+        int value = std::round((row[x] - kXybMin[c]) * mul);
+        if (apply_row[x]) {
+          value = std::min(std::max(0, value), steps - 1) + 1;
+        } else {
+          value = 0;
+        }
+        row_out[x] = value;
+      }
+    }
+  }
+  return out;
+}
+
+void Dequantize(const Quantizer& quantizer, const Image3S& quant,
+                GradientMap* gradient) {
+  gradient->gradient = Image3F(gradient->xsize, gradient->ysize);
+  for (int c = 0; c < 3; c++) {
+    if (gradient->grayscale && c != 1) continue;
+    const float step = quantizer.inv_quant_dc() *
+                       quantizer.DequantMatrix(0, kQuantKindDCT8, c)[0];
+    float range = kXybRadius[c] * 2;
+    // Use around 3x more bits than DC's quantization, capped
+    int steps = std::min(std::max(16, (int)(3 * range / step)), 255);
+    float mul = range / steps;
+
+    for (size_t y = 0; y < gradient->ysize; y++) {
+      float* PIK_RESTRICT row_out = gradient->gradient.PlaneRow(c, y);
+      const int16_t* PIK_RESTRICT row = quant.PlaneRow(c, y);
+      uint8_t* PIK_RESTRICT row_apply = gradient->apply.PlaneRow(c, y);
+      for (size_t x = 0; x < gradient->xsize; x++) {
+        float v;
+        if (row[x] != 0) {
+          v = (row[x] - 1) * mul + kXybMin[c];
+          row_apply[x] = true;
+        } else {
+          v = 0;
+          row_apply[x] = false;
+        }
+        row_out[x] = v;
+      }
+    }
+  }
+}
+
+void InitGradientMap(size_t xsize_dc, size_t ysize_dc, bool grayscale,
+                     GradientMap* gradient) {
+  gradient->xsize_dc = xsize_dc;
+  gradient->ysize_dc = ysize_dc;
+  gradient->grayscale = grayscale;
+
+  // numx and numy are amount of blocks in x and y direction, and the
+  // amount is such that when there are N * kNumBlocks + 1 pixels, there
+  // are only N blocks (the one extra pixel can still be part of the last
+  // block), once there are N * kNumBlocks + 2 pixels, there are N + 1
+  // blocks. Note that kNumBlocks is in fact the size of 1 block, num blocks
+  // refers to amount of DC values (from DCT blocks) this block contains.
+  size_t numx = DivCeil(xsize_dc - 1, kNumBlocks);
+  size_t numy = DivCeil(ysize_dc - 1, kNumBlocks);
+
+  // Size of the gradient map: one bigger than numx and numy because the
+  // blocks have values on all corners ("fenceposts").
+  gradient->xsize = numx + 1;
+  gradient->ysize = numy + 1;
+
+  // Note that the gradient is much smaller than the DC image, and the DC image
+  // in turn already is much smaller than the full original image.
+  gradient->gradient = Image3F(gradient->xsize, gradient->ysize);
+  gradient->apply = Image3B(gradient->xsize, gradient->ysize);
+}
+
+// Serializes and deserializes the gradient image so it has the values the
+// decoder will see.
+void AccountForQuantization(const Quantizer& quantizer, GradientMap* gradient) {
+  Image3S quantized = Quantize(
+      *gradient, Rect(0, 0, gradient->xsize, gradient->ysize), quantizer);
+  Dequantize(quantizer, quantized, gradient);
+}
+}  // namespace
+
+// Computes the gradient map for the given image of DC values.
+void ComputeGradientMap(const Image3F& opsin, bool grayscale,
+                        const Quantizer& quantizer,
+                        GradientMap* gradient) {
+  InitGradientMap(opsin.xsize(), opsin.ysize(), grayscale, gradient);
+  size_t xsize_dc = gradient->xsize_dc;
+  size_t ysize_dc = gradient->ysize_dc;
+  size_t xsize = gradient->xsize;
+  size_t ysize = gradient->ysize;
+
+  gradient->gradient = Image3F(xsize, ysize);
+  for(int task = 0; task < 3; ++task) {
+    static const float kExclude = 999999;
+    static const float kMaxDiff[3] = {0.001, 0.01, 0.05};
+    const size_t c = task;
+    if (grayscale && c != 1) return;
+    std::vector<float> points(ysize_dc * xsize_dc, kExclude);
+    ImageF gradient2 = Gradient2(opsin, c);
+    std::vector<int> apply = ThresholdImage(gradient2, kMaxDiff[c]);
+    DilateImage(apply, xsize_dc, ysize_dc, -8);
+    DilateImage(apply, xsize_dc, ysize_dc, 8);
+
+    for (size_t by = 0; by + 1 < ysize; by++) {
+      for (size_t bx = 0; bx + 1 < xsize; bx++) {
+        size_t x0 = bx * kNumBlocks;
+        size_t x1 = std::min<size_t>(xsize_dc, x0 + kNumBlocks);
+        size_t y0 = by * kNumBlocks;
+        size_t y1 = std::min<size_t>(ysize_dc, y0 + kNumBlocks);
+        // Block is one larger than normal if on a right or bottom edge
+        // with particular size.
+        if (bx + 2 == xsize && xsize_dc % kNumBlocks == 1) x1++;
+        if (by + 2 == ysize && ysize_dc % kNumBlocks == 1) y1++;
+        size_t dx = x1 - x0;
+        size_t dy = y1 - y0;
+
+        for (size_t sy = 0; sy < dy; sy++) {
+          for (size_t sx = 0; sx < dx; sx++) {
+            int x = x0 + sx;
+            int y = y0 + sy;
+            if (apply[y * xsize_dc + x]) {
+              points[y * xsize_dc + x] = opsin.PlaneRow(c, y)[x];
+            }
+          }
+        }
+      }
+    }
+
+    const float mul =
+        1.0f / (quantizer.inv_quant_dc() *
+                quantizer.DequantMatrix(0, kQuantKindDCT8, task)[0]);
+    std::vector<float> coeffs(xsize * ysize);
+    PlanePieceFit(points.data(), xsize_dc, ysize_dc, kNumBlocks, kExclude, true,
+                  coeffs.data());
+    for (size_t y = 0; y < ysize; ++y) {
+      float* PIK_RESTRICT row = gradient->gradient.PlaneRow(c, y);
+      const float* PIK_RESTRICT packed_row = &coeffs[y * xsize];
+      uint8_t* PIK_RESTRICT apply_row = gradient->apply.PlaneRow(c, y);
+      memcpy(row, packed_row, xsize * sizeof(float));
+      for (size_t x = 0; x < xsize; ++x) {
+        // TODO(lode): figure out when the gradient map is not needed in a
+        // proper way.
+        if (std::abs(3.0f * row[x] * mul) > 0.5f) {
+          apply_row[x] = 1;
+        } else {
+          apply_row[x] = 0;
+        }
+      }
+    }
+  }
+
+  AccountForQuantization(quantizer, gradient);
+}
+
+// Applies the stored gradient map in the decoder.
+void ApplyGradientMap(const GradientMap& gradient, const Quantizer& quantizer,
+                      Image3F* opsin) {
+  Image3F upscaled = ComputeGradientImage(gradient);
+  size_t xsize_dc = gradient.xsize_dc;
+  size_t ysize_dc = gradient.ysize_dc;
+  static const float kScale[3] = {1.0, 1.0, 1.0};
+
+  for (int c = 0; c < 3; ++c) {
+    if (gradient.grayscale && c != 1) return;
+    const float step = quantizer.inv_quant_dc() *
+                       quantizer.DequantMatrix(0, kQuantKindDCT8, c)[0] *
+                       kScale[c];
+
+    std::vector<int> apply(gradient.ysize_dc * gradient.xsize_dc, 0);
+
+    for (size_t y = 0; y < ysize_dc; y++) {
+      float* PIK_RESTRICT row_out = opsin->PlaneRow(c, y);
+      const float* PIK_RESTRICT row_in = upscaled.ConstPlaneRow(c, y);
+      const uint8_t* PIK_RESTRICT row_apply =
+          gradient.apply.ConstPlaneRow(c, y / kNumBlocks);
+      for (size_t x = 0; x < xsize_dc; x++) {
+        float diff = fabs(row_out[x] - row_in[x]);
+        if (diff < step && row_apply[x / kNumBlocks]) {
+          apply[y * xsize_dc + x] = 1;
+        }
+      }
+    }
+
+    // Reduce the size of the field where to apply the gradient, to avoid
+    // doing it in noisy areas
+    DilateImage(apply, gradient.xsize_dc, gradient.ysize_dc, -3);
+
+    for (size_t y = 0; y < ysize_dc; y++) {
+      float* PIK_RESTRICT row_out = opsin->PlaneRow(c, y);
+      const float* PIK_RESTRICT row_in = upscaled.ConstPlaneRow(c, y);
+      for (size_t x = 0; x < xsize_dc; x++) {
+        if (apply[y * xsize_dc + x]) {
+          row_out[x] = row_in[x];
+        }
+      }
+    }
+  }
+}
+
+void SerializeGradientMap(const GradientMap& gradient, const Rect& rect,
+                          const Quantizer& quantizer, PaddedBytes* compressed) {
+  PIK_ASSERT(rect.x0() % kNumBlocks == 0);
+  PIK_ASSERT(rect.y0() % kNumBlocks == 0);
+  Rect map_rect(rect.x0() / kNumBlocks, rect.y0() / kNumBlocks,
+                DivCeil(rect.xsize() - 1, kNumBlocks) + 1,
+                DivCeil(rect.ysize() - 1, kNumBlocks) + 1);
+  Image3S quantized = Quantize(gradient, map_rect, quantizer);
+  Image3S residuals(map_rect.xsize(), map_rect.ysize());
+  ShrinkDC(map_rect, quantized, &residuals);
+  std::string encoded = EncodeImageData(Rect(residuals), residuals, nullptr);
+  size_t pos = compressed->size();
+  compressed->resize(compressed->size() + encoded.size());
+  for (size_t i = 0; i < encoded.size(); i++) {
+    compressed->data()[pos++] = encoded[i];
+  }
+}
+
+Status DeserializeGradientMap(size_t xsize_dc, size_t ysize_dc, bool grayscale,
+                              const Quantizer& quantizer,
+                              const PaddedBytes& compressed, size_t* byte_pos,
+                              GradientMap* gradient) {
+  InitGradientMap(xsize_dc, ysize_dc, grayscale, gradient);
+
+  BitReader reader(compressed.data() + *byte_pos,
+                   compressed.size() - *byte_pos);
+
+  ImageS gmap_y_tmp(gradient->xsize, gradient->ysize);
+  ImageS gmap_xz_res_tmp(gradient->xsize * 2, gradient->ysize);
+  ImageS gmap_xz_exp_tmp(gradient->xsize * 2, gradient->ysize);
+
+  Image3S gmap_quant(gradient->xsize, gradient->ysize);
+
+  if (!DecodeImage(&reader, Rect(gmap_quant), &gmap_quant)) {
+    return PIK_FAILURE("Failed to decode gradient map");
+  }
+
+  *byte_pos += reader.Position();
+
+  ExpandDC(Rect(gmap_quant), &gmap_quant, &gmap_y_tmp, &gmap_xz_res_tmp,
+           &gmap_xz_exp_tmp);
+
+  Dequantize(quantizer, gmap_quant, gradient);
+
+  return true;  // success
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/gradient_map.h b/codec/L2/demos/pikEnc/host/pik/gradient_map.h
new file mode 100755
index 0000000000..91f15b257a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/gradient_map.h
@@ -0,0 +1,48 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_GRADIENT_MAP_H_
+#define PIK_GRADIENT_MAP_H_
+
+// The gradient map is a low resolution image (1/8th by 1/8th of the DC, that is
+// 1/64th by 1/64th of the image) with finer quantization of the DC. It is used
+// to selectively remove banding caused by DC quantization.
+
+#include "pik/compressed_image_fwd.h"
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include "pik/padded_bytes.h"
+#include "pik/quantizer.h"
+
+namespace pik {
+
+// TODO(robryk): Add unit tests. Verify that
+// ComputeGradientMap(ApplyGradientMap(map)) == map.
+
+// For encoding
+
+// Computes the gradient map for the given image of DC
+// values.
+void ComputeGradientMap(const Image3F& opsin, bool grayscale,
+                        const Quantizer& quantizer, GradientMap* gradient);
+
+void SerializeGradientMap(const GradientMap& gradient, const Rect& rect,
+                          const Quantizer& quantizer, PaddedBytes* compressed);
+
+// For decoding
+
+Status DeserializeGradientMap(size_t xsize_dc, size_t ysize_dc, bool grayscale,
+                              const Quantizer& quantizer,
+                              const PaddedBytes& compressed, size_t* byte_pos,
+                              GradientMap* gradient);
+
+// Applies the gradient map to the decoded DC image.
+void ApplyGradientMap(const GradientMap& gradient, const Quantizer& quantizer,
+                      Image3F* opsin);
+
+}  // namespace pik
+
+#endif  // PIK_GRADIENT_MAP_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/headers.cc b/codec/L2/demos/pikEnc/host/pik/headers.cc
new file mode 100755
index 0000000000..ee309de927
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/headers.cc
@@ -0,0 +1,94 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/headers.h"
+
+#include "pik/fields.h"
+#include "pik/profiler.h"
+
+namespace pik {
+
+Alpha::Alpha() { Bundle::Init(this); }
+ProjectiveTransformParams::ProjectiveTransformParams() { Bundle::Init(this); }
+TileHeader::TileHeader() { Bundle::Init(this); }
+GroupHeader::GroupHeader() { Bundle::Init(this); }
+FrameInfo::FrameInfo() { Bundle::Init(this); }
+FrameHeader::FrameHeader() { Bundle::Init(this); }
+Preview::Preview() { Bundle::Init(this); }
+Animation::Animation() { Bundle::Init(this); }
+FileHeader::FileHeader() { Bundle::Init(this); }
+
+Status CanEncode(const TileHeader& tile, size_t* PIK_RESTRICT extension_bits,
+                 size_t* PIK_RESTRICT total_bits) {
+  return Bundle::CanEncode(tile, extension_bits, total_bits);
+}
+Status CanEncode(const GroupHeader& group, size_t* PIK_RESTRICT extension_bits,
+                 size_t* PIK_RESTRICT total_bits) {
+  return Bundle::CanEncode(group, extension_bits, total_bits);
+}
+Status CanEncode(const FrameHeader& pass, size_t* PIK_RESTRICT extension_bits,
+                 size_t* PIK_RESTRICT total_bits) {
+  return Bundle::CanEncode(pass, extension_bits, total_bits);
+}
+Status CanEncode(const FileHeader& file, size_t* PIK_RESTRICT extension_bits,
+                 size_t* PIK_RESTRICT total_bits) {
+  return Bundle::CanEncode(file, extension_bits, total_bits);
+}
+
+Status ReadTileHeader(BitReader* PIK_RESTRICT reader,
+                      TileHeader* PIK_RESTRICT tile) {
+  PROFILER_FUNC;
+  return Bundle::Read(reader, tile);
+}
+Status ReadGroupHeader(BitReader* PIK_RESTRICT reader,
+                       GroupHeader* PIK_RESTRICT group) {
+  PROFILER_FUNC;
+  return Bundle::Read(reader, group);
+}
+Status ReadPassHeader(BitReader* PIK_RESTRICT reader,
+                      FrameHeader* PIK_RESTRICT pass) {
+  PROFILER_FUNC;
+  return Bundle::Read(reader, pass);
+}
+Status ReadFileHeader(BitReader* PIK_RESTRICT reader,
+                      FileHeader* PIK_RESTRICT file) {
+  PROFILER_FUNC;
+  return Bundle::Read(reader, file);
+}
+
+Status WriteTileHeader(const TileHeader& tile, size_t extension_bits,
+                       size_t* PIK_RESTRICT pos, uint8_t* storage) {
+  return Bundle::Write(tile, extension_bits, pos, storage);
+}
+Status WriteGroupHeader(const GroupHeader& group, size_t extension_bits,
+                        size_t* PIK_RESTRICT pos, uint8_t* storage) {
+  return Bundle::Write(group, extension_bits, pos, storage);
+}
+Status WritePassHeader(const FrameHeader& pass, size_t extension_bits,
+                       size_t* PIK_RESTRICT pos, uint8_t* storage) {
+  return Bundle::Write(pass, extension_bits, pos, storage);
+}
+Status WriteFileHeader(const FileHeader& file, size_t extension_bits,
+                       size_t* PIK_RESTRICT pos, uint8_t* storage) {
+  return Bundle::Write(file, extension_bits, pos, storage);
+}
+
+void MakeFileHeader(const CompressParams& cparams, const CodecInOut* io,
+                    FileHeader* out) {
+  out->xsize_minus_1 = io->xsize() - 1;
+  out->ysize_minus_1 = io->ysize() - 1;
+  Metadata& metadata = out->metadata;
+  metadata = io->metadata;
+  metadata.target_nits_div50 = cparams.intensity_target / 50;
+  metadata.transcoded.original_bit_depth = io->original_bits_per_sample();
+  metadata.transcoded.original_color_encoding = io->dec_c_original;
+  metadata.transcoded.original_bytes_per_alpha =
+      io->HasAlpha() ? io->AlphaBits() / 8 : 0;
+  (void)ColorManagement::MaybeRemoveProfile(
+      &metadata.transcoded.original_color_encoding);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/headers.h b/codec/L2/demos/pikEnc/host/pik/headers.h
new file mode 100755
index 0000000000..6fc3949419
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/headers.h
@@ -0,0 +1,440 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_HEADERS_H_
+#define PIK_HEADERS_H_
+
+// Group/pass/file headers with backward and forward-compatible extension
+// capability and compressed integer fields.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "pik/bit_reader.h"
+#include "pik/codec.h"
+#include "pik/color_encoding.h"
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/epf.h"
+#include "pik/field_encodings.h"
+#include "pik/gaborish.h"
+#include "pik/metadata.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_params.h"
+#include "pik/status.h"
+
+namespace pik {
+
+//------------------------------------------------------------------------------
+// Tile
+
+constexpr size_t kNumProjectiveTransformParams = 8;
+
+struct ProjectiveTransformParams {
+  ProjectiveTransformParams();
+  static const char* Name() { return "ProjectiveTransformParams"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    for (size_t i = 0; i < kNumProjectiveTransformParams; ++i) {
+      visitor->U32(kU32RawBits + 8, 1, &corner_coords[i]);
+    }
+
+    return true;
+  }
+
+  uint32_t corner_coords[kNumProjectiveTransformParams];
+};
+
+struct TileHeader {
+  TileHeader();
+  static const char* Name() { return "TileHeader"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    if (visitor->AllDefault(*this, &all_default)) return true;
+
+    visitor->Bool(false, &have_projective_transform);
+    if (visitor->Conditional(have_projective_transform)) {
+      PIK_RETURN_IF_ERROR(visitor->VisitNested(&projective_transform_params));
+    }
+
+    visitor->BeginExtensions(&extensions);
+    // Extensions: in chronological order of being added to the format.
+    return visitor->EndExtensions();
+  }
+
+  bool all_default;
+
+  bool have_projective_transform;
+  ProjectiveTransformParams projective_transform_params;
+
+  uint64_t extensions;
+};
+
+//------------------------------------------------------------------------------
+// Group
+
+// Alpha channel (lossless compression).
+// TODO(janwas): add analogous depth-image support
+struct Alpha {
+  Alpha();
+  static const char* Name() { return "Alpha"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    // TODO(janwas): use this instead of have_alpha
+    // if (visitor->AllDefault(*this, &all_default)) return true;
+
+    visitor->U32(0x84828180u, 1, &bytes_per_alpha);
+    visitor->Bytes(BytesEncoding::kRaw, &encoded);
+
+    return true;
+  }
+
+  // TODO(b/120660058): Move bytes_per_alpha to container.
+  uint32_t bytes_per_alpha;
+  PaddedBytes encoded;
+};
+
+constexpr size_t kNumTilesPerGroup = (kGroupDimInBlocks / kTileDimInBlocks) *
+                                     (kGroupDimInBlocks / kTileDimInBlocks);
+
+struct GroupHeader {
+  GroupHeader();
+  static const char* Name() { return "GroupHeader"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    if (visitor->AllDefault(*this, &all_default)) return true;
+
+    if (visitor->Conditional(nonserialized_have_alpha)) {
+      PIK_RETURN_IF_ERROR(visitor->VisitNested(&alpha));
+    }
+
+    // TODO(user): Skip all tiles if all of them are all_default.
+    for (size_t i = 0; i < kNumTilesPerGroup; ++i) {
+      PIK_RETURN_IF_ERROR(visitor->VisitNested(&tile_headers[i]));
+    }
+
+    visitor->BeginExtensions(&extensions);
+    // Extensions: in chronological order of being added to the format.
+    return visitor->EndExtensions();
+  }
+
+  bool all_default;
+
+  bool nonserialized_have_alpha = false;
+  Alpha alpha;
+
+  TileHeader tile_headers[kNumTilesPerGroup];
+
+  uint64_t extensions;
+};
+
+//------------------------------------------------------------------------------
+// Frame
+
+enum class ImageEncoding : uint32_t {
+  kPasses = 0,   // PIK
+  kProgressive,  // FUIF
+  kLossless,
+  // TODO(lode): extend amount of possible values
+  // Future extensions: [6]
+};
+
+struct FrameInfo {
+  FrameInfo();
+  static const char* Name() { return "FrameInfo"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    if (visitor->AllDefault(*this, &all_default)) return true;
+
+    visitor->U32(0x20088180, 0, &duration);
+
+    visitor->Bool(false, &have_timecode);
+    if (visitor->Conditional(have_timecode)) {
+      visitor->U32(kU32RawBits + 32, 0, &timecode);
+    }
+
+    visitor->Bool(false, &is_keyframe);
+
+    return true;
+  }
+
+  bool all_default;
+
+  // How long to wait [in ticks, see Animation{}] after rendering
+  uint32_t duration;
+
+  bool have_timecode;
+  uint32_t timecode;  // 0xHHMMSSFF
+
+  bool is_keyframe;
+};
+
+// Image/frame := one of more of these, where the last has is_last = true.
+// Starts at a byte-aligned address "a"; the next pass starts at "a + size".
+struct FrameHeader {
+  // Optional postprocessing steps. These flags are the source of truth;
+  // Override must set/clear them rather than change their meaning.
+  enum Flags {
+    // Gradient map used to predict smooth areas.
+    kGradientMap = 1,
+
+    // Image is compressed with grayscale optimizations. Only used for parsing
+    // of pik file, may not be used to determine decompressed color format or
+    // ICC color profile.
+    kGrayscaleOpt = 2,
+
+    // Inject noise into decoded output.
+    kNoise = 4,
+  };
+
+  FrameHeader();
+  static const char* Name() { return "FrameHeader"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    visitor->U64(0, &size);
+
+    visitor->Bool(false, &has_alpha);
+    PIK_RETURN_IF_ERROR(visitor->VisitNested(&frame));
+
+    visitor->Enum(kU32Direct3Plus4, ImageEncoding::kPasses, &encoding);
+
+    // Flags, AC strategy, AR and predictions only make sense for kPasses.
+    if (visitor->Conditional(encoding == ImageEncoding::kPasses)) {
+      visitor->U32(0x20181008, 0, &flags);
+      visitor->Enum(kU32Direct3Plus4, GaborishStrength::k750, &gaborish);
+
+      visitor->Bool(true, &predict_lf);
+      visitor->Bool(true, &predict_hf);
+      visitor->Bool(false, &have_adaptive_reconstruction);
+      if (visitor->Conditional(have_adaptive_reconstruction)) {
+        PIK_RETURN_IF_ERROR(visitor->VisitNested(&epf_params));
+      }
+      // TODO(veluca); choose a good constant.
+      visitor->U32(0x20181008, 1, &num_passes);
+
+      uint32_t num_downsampling_factors = downsampling_factor_to_passes.size();
+      visitor->U32(kU32Direct0To3, 0, &num_downsampling_factors);
+      visitor->SetSizeWhenReading(num_downsampling_factors,
+                                  &downsampling_factor_to_passes);
+      for (auto& downsampling_and_num_passes : downsampling_factor_to_passes) {
+        visitor->U32(kU32Direct1248, 1, &downsampling_and_num_passes.first);
+        visitor->U32(kU32Direct3Plus8, 1, &downsampling_and_num_passes.second);
+      }
+    }
+
+    // No resampling or group TOC for kProgressive.
+    if (visitor->Conditional(encoding != ImageEncoding::kProgressive)) {
+      // WARNING: nonserialized_num_groups must be set beforehand.
+      visitor->SetSizeWhenReading(nonserialized_num_groups, &group_sizes);
+      for (uint32_t& group_size_bits : group_sizes) {
+        visitor->U32(0x150F0E0C, 0, &group_size_bits);
+      }
+    }
+
+    if (visitor->Conditional(encoding == ImageEncoding::kLossless)) {
+      visitor->Bool(false, &lossless_grayscale);
+      visitor->Bool(false, &lossless_16_bits);
+    }
+
+    visitor->BeginExtensions(&extensions);
+    // Extensions: in chronological order of being added to the format.
+    return visitor->EndExtensions();
+  }
+
+  // Relative to START of (byte-aligned) FrameHeader. Used to seek to next
+  // frame.
+  // TODO(veluca): how do we compute this?
+  uint64_t size;  // [bytes]
+  bool has_alpha;
+
+  FrameInfo frame;
+
+  ImageEncoding encoding;
+
+  // Lossless encoding flags: grayscale mode, 16 (true) or 8 bit (false) mode.
+  bool lossless_grayscale;
+  bool lossless_16_bits;
+
+  uint32_t flags;
+
+  GaborishStrength gaborish;
+
+  bool predict_lf;
+  bool predict_hf;
+
+  // TODO(janwas): move into EpfParams
+  bool have_adaptive_reconstruction;
+  EpfParams epf_params;
+
+  uint32_t num_passes;
+
+  // WARNING: must be set before reading from bitstream - not serialized
+  // like other fields because this is stored in FileHeader to save a few bits.
+  size_t nonserialized_num_groups = 0;
+
+  std::vector<uint32_t> group_sizes;  // TOC, [bytes]
+
+  // Pairs of {max downsampling factor, number of passes to decode}.
+  // It is not necessary to include {1, num_passes} or {8, 0} explicitly. Code
+  // that uses this vector always behaves as though they are there.
+  std::vector<std::pair<uint32_t, uint32_t>> downsampling_factor_to_passes;
+
+  // TODO(janwas): quantization setup (reuse from previous passes)
+
+  uint64_t extensions;
+};
+
+//------------------------------------------------------------------------------
+// File
+
+struct Preview {
+  Preview();
+  static const char* Name() { return "Preview"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    if (visitor->AllDefault(*this, &all_default)) return true;
+
+    visitor->U32(0x1C14100C, 0, &size_bits);
+    visitor->U32(0x0D0B0907, 0, &xsize);
+    visitor->U32(0x0D0B0907, 0, &ysize);
+
+    return true;
+  }
+
+  bool all_default;
+
+  uint32_t size_bits;
+  uint32_t xsize;
+  uint32_t ysize;
+};
+
+struct Animation {
+  Animation();
+  static const char* Name() { return "Animation"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    if (visitor->AllDefault(*this, &all_default)) return true;
+
+    visitor->U32(0x20100380, 0, &num_loops);
+    visitor->U32(0x20140981, 0, &ticks_numerator);
+    visitor->U32(0x20140981, 1, &ticks_denominator);
+
+    return true;
+  }
+
+  bool all_default;
+
+  uint32_t num_loops;  // 0 means to repeat infinitely.
+
+  // Ticks as rational number in seconds per tick
+  uint32_t ticks_numerator;
+  uint32_t ticks_denominator;  // Must be at least 1
+};
+
+// EXIF orientation of the image. This field overrides any field present in
+// actual EXIF metadata. The value tells which transformation the decoder must
+// apply after decoding to display the image with the correct orientation.
+enum class Orientation : uint32_t {
+  kIdentity = 1,
+  kFlipHorizontal = 2,
+  kRotate180 = 3,
+  kFlipVertical = 4,
+  kTranspose = 5,
+  kRotate90 = 6,
+  kAntiTranspose = 7,
+  kRotate270 = 8,
+};
+
+// Followed by an unbounded stream of interleaved FrameHeader+payloads.
+struct FileHeader {
+  FileHeader();
+  static const char* Name() { return "FileHeader"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    visitor->U32(kU32RawBits + 32, kSignature, &signature);
+    if (signature != kSignature) return PIK_FAILURE("Signature mismatch");
+
+    // Almost all camera images are less than 8K * 8K. We also allow the
+    // full 32-bit range for completeness.
+    visitor->U32(0x200D0B09, 0, &xsize_minus_1);
+    visitor->U32(0x200D0B09, 0, &ysize_minus_1);
+    visitor->Orientation(Orientation::kIdentity, &orientation);
+
+    PIK_RETURN_IF_ERROR(visitor->VisitNested(&metadata));
+    PIK_RETURN_IF_ERROR(visitor->VisitNested(&preview));
+    PIK_RETURN_IF_ERROR(visitor->VisitNested(&animation));
+
+    visitor->BeginExtensions(&extensions);
+    // Extensions: in chronological order of being added to the format.
+    return visitor->EndExtensions();
+  }
+
+  size_t xsize() const { return xsize_minus_1 + 1; }
+  size_t ysize() const { return ysize_minus_1 + 1; }
+
+  // \n causes files opened in text mode to be rejected, and \xD7 detects
+  // 7-bit transfers (it also looks like x in ISO-8859-1).
+  static constexpr uint32_t kSignature = 0x0A4D4CD7;  // xLM\n
+  uint32_t signature;
+
+  // This encoding saves bits for size=8K and prevents invalid size=0.
+  uint32_t xsize_minus_1;
+  uint32_t ysize_minus_1;
+
+  Orientation orientation;
+
+  Metadata metadata;
+  Preview preview;
+  Animation animation;
+
+  uint64_t extensions;
+};
+
+void MakeFileHeader(const CompressParams& cparams, const CodecInOut* io,
+                    FileHeader* out);
+
+// Returns whether a header's fields can all be encoded, i.e. they have a valid
+// representation. If so, "*total_bits" is the exact number of bits required.
+Status CanEncode(const TileHeader& tile, size_t* PIK_RESTRICT extension_bits,
+                 size_t* PIK_RESTRICT total_bits);
+Status CanEncode(const GroupHeader& group, size_t* PIK_RESTRICT extension_bits,
+                 size_t* PIK_RESTRICT total_bits);
+Status CanEncode(const FrameHeader& pass, size_t* PIK_RESTRICT extension_bits,
+                 size_t* PIK_RESTRICT total_bits);
+Status CanEncode(const FileHeader& file, size_t* PIK_RESTRICT extension_bits,
+                 size_t* PIK_RESTRICT total_bits);
+
+Status ReadTileHeader(BitReader* PIK_RESTRICT reader,
+                      TileHeader* PIK_RESTRICT tile);
+Status ReadGroupHeader(BitReader* PIK_RESTRICT reader,
+                       GroupHeader* PIK_RESTRICT group);
+Status ReadPassHeader(BitReader* PIK_RESTRICT reader,
+                      FrameHeader* PIK_RESTRICT pass);
+Status ReadFileHeader(BitReader* PIK_RESTRICT reader,
+                      FileHeader* PIK_RESTRICT file);
+
+// "extension_bits" is from the preceding call to CanEncode.
+Status WriteTileHeader(const TileHeader& tile, size_t extension_bits,
+                       size_t* PIK_RESTRICT pos, uint8_t* storage);
+Status WriteGroupHeader(const GroupHeader& group, size_t extension_bits,
+                        size_t* PIK_RESTRICT pos, uint8_t* storage);
+Status WritePassHeader(const FrameHeader& pass, size_t extension_bits,
+                       size_t* PIK_RESTRICT pos, uint8_t* storage);
+Status WriteFileHeader(const FileHeader& file, size_t extension_bits,
+                       size_t* PIK_RESTRICT pos, uint8_t* storage);
+
+}  // namespace pik
+
+#endif  // PIK_HEADERS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/huffman_decode.cc b/codec/L2/demos/pikEnc/host/pik/huffman_decode.cc
new file mode 100755
index 0000000000..2fb80e41d1
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/huffman_decode.cc
@@ -0,0 +1,336 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/huffman_decode.h"
+
+#include <stdint.h>
+#include <cstring>
+#include <vector>
+
+#include "pik/compiler_specific.h"
+
+namespace pik {
+
+static const int kCodeLengthCodes = 18;
+static const uint8_t kCodeLengthCodeOrder[kCodeLengthCodes] = {
+    1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+static const uint8_t kDefaultCodeLength = 8;
+static const uint8_t kCodeLengthRepeatCode = 16;
+
+/* Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the
+   bit-wise reversal of the len least significant bits of key. */
+static PIK_INLINE int GetNextKey(int key, int len) {
+  int step = 1 << (len - 1);
+  while (key & step) {
+    step >>= 1;
+  }
+  return (key & (step - 1)) + step;
+}
+
+/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
+/* Assumes that end is an integer multiple of step */
+static PIK_INLINE void ReplicateValue(HuffmanCode* table, int step, int end,
+                                      HuffmanCode code) {
+  do {
+    end -= step;
+    table[end] = code;
+  } while (end > 0);
+}
+
+/* Returns the table width of the next 2nd level table. count is the histogram
+   of bit lengths for the remaining symbols, len is the code length of the next
+   processed symbol */
+static PIK_INLINE int NextTableBitSize(const uint16_t* const count, int len,
+                                       int root_bits) {
+  int left = 1 << (len - root_bits);
+  while (len < kHuffmanMaxLength) {
+    left -= count[len];
+    if (left <= 0) break;
+    ++len;
+    left <<= 1;
+  }
+  return len - root_bits;
+}
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order. */
+/* Returns false in case of error (invalid tree or memory error). */
+void BuildHuffmanTable(std::vector<HuffmanCode>* table, int root_bits,
+                       const uint8_t* const code_lengths, int code_lengths_size,
+                       uint16_t* count) {
+  HuffmanCode code; /* current table entry */
+  int next;         /* next available space in table */
+  int len;          /* current code length */
+  int symbol;       /* symbol index in original or sorted table */
+  int key;          /* reversed prefix code */
+  int step;         /* step size to replicate values in current table */
+  int low;          /* low bits for current root entry */
+  int mask;         /* mask for low bits */
+  int table_bits;   /* key length of current table */
+  int table_size;   /* size of current table */
+  int total_size;   /* sum of root table size and 2nd level table sizes */
+  /* symbols sorted by code length */
+  std::vector<int> sorted(code_lengths_size);
+  /* offsets in sorted table for each length */
+  uint16_t offset[kHuffmanMaxLength + 1];
+  int max_length = 1;
+
+  /* generate offsets into sorted symbol table by code length */
+  {
+    uint16_t sum = 0;
+    for (len = 1; len <= kHuffmanMaxLength; len++) {
+      offset[len] = sum;
+      if (count[len]) {
+        sum = static_cast<uint16_t>(sum + count[len]);
+        max_length = len;
+      }
+    }
+  }
+
+  /* sort symbols by length, by symbol order within each length */
+  for (symbol = 0; symbol < code_lengths_size; symbol++) {
+    if (code_lengths[symbol] != 0) {
+      sorted[offset[code_lengths[symbol]]++] = symbol;
+    }
+  }
+
+  next = 0;
+  table_bits = root_bits;
+  table_size = 1 << table_bits;
+  total_size = table_size;
+  table->resize(total_size);
+
+  /* special case code with only one value */
+  if (offset[kHuffmanMaxLength] == 1) {
+    code.bits = 0;
+    code.value = static_cast<uint16_t>(sorted[0]);
+    for (key = 0; key < total_size; ++key) {
+      (*table)[key] = code;
+    }
+    return;
+  }
+
+  /* fill in root table */
+  /* let's reduce the table size to a smaller size if possible, and */
+  /* create the repetitions by memcpy if possible in the coming loop */
+  if (table_bits > max_length) {
+    table_bits = max_length;
+    table_size = 1 << table_bits;
+  }
+  key = 0;
+  symbol = 0;
+  code.bits = 1;
+  step = 2;
+  do {
+    for (; count[code.bits] != 0; --count[code.bits]) {
+      code.value = static_cast<uint16_t>(sorted[symbol++]);
+      ReplicateValue(&(*table)[key], step, table_size, code);
+      key = GetNextKey(key, code.bits);
+    }
+    step <<= 1;
+  } while (++code.bits <= table_bits);
+
+  /* if root_bits != table_bits we only created one fraction of the */
+  /* table, and we need to replicate it now. */
+  while (total_size != table_size) {
+    memcpy(&(*table)[table_size], &(*table)[0],
+           table_size * sizeof((*table)[0]));
+    table_size <<= 1;
+  }
+
+  /* fill in 2nd level tables and add pointers to root table */
+  mask = total_size - 1;
+  low = -1;
+  for (len = root_bits + 1, step = 2; len <= max_length; ++len, step <<= 1) {
+    for (; count[len] != 0; --count[len]) {
+      if ((key & mask) != low) {
+        next += table_size;
+        table_bits = NextTableBitSize(count, len, root_bits);
+        table_size = 1 << table_bits;
+        total_size += table_size;
+        table->resize(total_size);
+        low = key & mask;
+        (*table)[low].bits = static_cast<uint8_t>(table_bits + root_bits);
+        (*table)[low].value = static_cast<uint16_t>(next - low);
+      }
+      code.bits = static_cast<uint8_t>(len - root_bits);
+      code.value = static_cast<uint16_t>(sorted[symbol++]);
+      ReplicateValue(&(*table)[next + (key >> root_bits)], step, table_size,
+                     code);
+      key = GetNextKey(key, len);
+    }
+  }
+}
+
+// Decodes a number in the range [0..65535], by reading 1 - 20 bits.
+inline int DecodeVarLenUint16(BitReader* input) {
+  if (input->ReadBits(1)) {
+    int nbits = static_cast<int>(input->ReadBits(4));
+    if (nbits == 0) {
+      return 1;
+    } else {
+      return static_cast<int>(input->ReadBits(nbits)) + (1 << nbits);
+    }
+  }
+  return 0;
+}
+
+int ReadHuffmanCodeLengths(const uint8_t* code_length_code_lengths,
+                           std::vector<uint8_t>* code_lengths,
+                           BitReader* input) {
+  uint8_t prev_code_len = kDefaultCodeLength;
+  int repeat = 0;
+  uint8_t repeat_code_len = 0;
+  int space = 32768;
+  std::vector<HuffmanCode> table;
+
+  uint16_t counts[16] = {0};
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    ++counts[code_length_code_lengths[i]];
+  }
+  BuildHuffmanTable(&table, 5, code_length_code_lengths, kCodeLengthCodes,
+                    &counts[0]);
+
+  const int max_num_symbols = 1 << 16;
+  code_lengths->reserve(256);
+  while (code_lengths->size() < max_num_symbols && space > 0) {
+    const HuffmanCode* p = &table[0];
+    uint8_t code_len;
+    input->FillBitBuffer();
+    p += input->PeekFixedBits<5>();
+    input->Advance(p->bits);
+    code_len = static_cast<uint8_t>(p->value);
+    if (code_len < kCodeLengthRepeatCode) {
+      repeat = 0;
+      code_lengths->push_back(code_len);
+      if (code_len != 0) {
+        prev_code_len = code_len;
+        space -= 32768 >> code_len;
+      }
+    } else {
+      const int extra_bits = code_len - 14;
+      int old_repeat;
+      int repeat_delta;
+      uint8_t new_len = 0;
+      if (code_len == kCodeLengthRepeatCode) {
+        new_len = prev_code_len;
+      }
+      if (repeat_code_len != new_len) {
+        repeat = 0;
+        repeat_code_len = new_len;
+      }
+      old_repeat = repeat;
+      if (repeat > 0) {
+        repeat -= 2;
+        repeat <<= extra_bits;
+      }
+      int next_repeat = input->ReadBits(extra_bits) + 3;
+      repeat += next_repeat;
+      repeat_delta = repeat - old_repeat;
+      if (code_lengths->size() + repeat_delta > max_num_symbols) {
+        return 0;
+      }
+      for (int i = 0; i < repeat_delta; ++i) {
+        code_lengths->push_back(repeat_code_len);
+      }
+      if (repeat_code_len != 0) {
+        space -= repeat_delta << (15 - repeat_code_len);
+      }
+    }
+  }
+  if (space != 0) {
+    return 0;
+  }
+  return 1;
+}
+
+bool HuffmanDecodingData::ReadFromBitStream(BitReader* input) {
+  int ok = 1;
+  int simple_code_or_skip;
+
+  std::vector<uint8_t> code_lengths;
+  /* simple_code_or_skip is used as follows:
+     1 for simple code;
+     0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
+  simple_code_or_skip = input->ReadBits(2);
+  if (simple_code_or_skip == 1) {
+    /* Read symbols, codes & code lengths directly. */
+    int i;
+    int symbols[4] = {0};
+    int max_symbol = 0;
+    const int num_symbols = input->ReadBits(2) + 1;
+    for (i = 0; i < num_symbols; ++i) {
+      symbols[i] = DecodeVarLenUint16(input);
+      if (symbols[i] > max_symbol) max_symbol = symbols[i];
+    }
+    code_lengths.resize(max_symbol + 1);
+    code_lengths[symbols[0]] = 1;
+    for (i = 1; i < num_symbols; ++i) {
+      code_lengths[symbols[i]] = 2;
+    }
+    switch (num_symbols) {
+      case 1:
+        break;
+      case 3:
+        ok = ((symbols[0] != symbols[1]) && (symbols[0] != symbols[2]) &&
+              (symbols[1] != symbols[2]));
+        break;
+      case 2:
+        ok = (symbols[0] != symbols[1]);
+        code_lengths[symbols[1]] = 1;
+        break;
+      case 4:
+        ok = ((symbols[0] != symbols[1]) && (symbols[0] != symbols[2]) &&
+              (symbols[0] != symbols[3]) && (symbols[1] != symbols[2]) &&
+              (symbols[1] != symbols[3]) && (symbols[2] != symbols[3]));
+        if (input->ReadBits(1)) {
+          code_lengths[symbols[2]] = 3;
+          code_lengths[symbols[3]] = 3;
+        } else {
+          code_lengths[symbols[0]] = 2;
+        }
+        break;
+    }
+  } else { /* Decode Huffman-coded code lengths. */
+    int i;
+    uint8_t code_length_code_lengths[kCodeLengthCodes] = {0};
+    int space = 32;
+    int num_codes = 0;
+    /* Static Huffman code for the code length code lengths */
+    static const HuffmanCode huff[16] = {
+        {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1},
+        {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 5},
+    };
+    for (i = simple_code_or_skip; i < kCodeLengthCodes && space > 0; ++i) {
+      const int code_len_idx = kCodeLengthCodeOrder[i];
+      const HuffmanCode* p = huff;
+      uint8_t v;
+      input->FillBitBuffer();
+      p += input->PeekFixedBits<4>();
+      input->Advance(p->bits);
+      v = static_cast<uint8_t>(p->value);
+      code_length_code_lengths[code_len_idx] = v;
+      if (v != 0) {
+        space -= (32 >> v);
+        ++num_codes;
+      }
+    }
+    ok = (num_codes == 1 || space == 0) &&
+         ReadHuffmanCodeLengths(code_length_code_lengths, &code_lengths, input);
+  }
+  if (!ok) {
+    return PIK_FAILURE("Failed to read Huffman data");
+  }
+  uint16_t counts[16] = {0};
+  for (int i = 0; i < code_lengths.size(); ++i) {
+    ++counts[code_lengths[i]];
+  }
+  BuildHuffmanTable(&table_, kHuffmanTableBits, &code_lengths[0],
+                    code_lengths.size(), &counts[0]);
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/huffman_decode.h b/codec/L2/demos/pikEnc/host/pik/huffman_decode.h
new file mode 100755
index 0000000000..6c10074ae7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/huffman_decode.h
@@ -0,0 +1,62 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_HUFFMAN_DECODE_H_
+#define PIK_HUFFMAN_DECODE_H_
+
+// Library to decode the Huffman code lengths from the bit-stream and build a
+// decoding table from them.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <memory>
+#include <vector>
+
+#include "pik/bit_reader.h"
+
+namespace pik {
+
+static const int kHuffmanMaxLength = 15;
+static const int kHuffmanTableMask = 0xff;
+static const int kHuffmanTableBits = 8;
+
+typedef struct {
+  uint8_t bits;   /* number of bits used for this symbol */
+  uint16_t value; /* symbol value or table offset */
+} HuffmanCode;
+
+struct HuffmanDecodingData {
+  HuffmanDecodingData() { table_.reserve(2048); }
+
+  // Decodes the Huffman code lengths from the bit-stream and fills in the
+  // pre-allocated table with the corresponding 2-level Huffman decoding table.
+  // Returns false if the Huffman code lengths can not de decoded.
+  bool ReadFromBitStream(BitReader* input);
+
+  std::vector<HuffmanCode> table_;
+};
+
+struct HuffmanDecoder {
+  // Decodes the next Huffman coded symbol from the bit-stream.
+  int ReadSymbol(const HuffmanDecodingData& code, BitReader* input) {
+    int nbits;
+    const HuffmanCode* table = &code.table_[0];
+    input->FillBitBuffer();
+    table += input->PeekFixedBits<kHuffmanTableBits>();
+    nbits = table->bits - kHuffmanTableBits;
+    if (nbits > 0) {
+      input->Advance(kHuffmanTableBits);
+      table += table->value;
+      table += input->PeekBits(nbits);
+    }
+    input->Advance(table->bits);
+    return table->value;
+  }
+};
+
+}  // namespace pik
+
+#endif  // PIK_HUFFMAN_DECODE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/huffman_encode.cc b/codec/L2/demos/pikEnc/host/pik/huffman_encode.cc
new file mode 100755
index 0000000000..848ca7e780
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/huffman_encode.cc
@@ -0,0 +1,570 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/huffman_encode.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "pik/fast_log.h"
+#include "pik/status.h"
+#include "pik/write_bits.h"
+
+namespace pik {
+
+namespace {
+
+static const int kCodeLengthCodes = 18;
+
+// A node of a Huffman tree.
+struct HuffmanTree {
+  HuffmanTree(uint32_t count, int16_t left, int16_t right)
+      : total_count(count), index_left(left), index_right_or_value(right) {}
+  uint32_t total_count;
+  int16_t index_left;
+  int16_t index_right_or_value;
+};
+
+// Sort the root nodes, least popular first.
+inline bool SortHuffmanTree(const HuffmanTree& v0, const HuffmanTree& v1) {
+  return v0.total_count < v1.total_count;
+}
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+              uint8_t level) {
+  if (p.index_left >= 0) {
+    ++level;
+    SetDepth(pool[p.index_left], pool, depth, level);
+    SetDepth(pool[p.index_right_or_value], pool, depth, level);
+  } else {
+    depth[p.index_right_or_value] = level;
+  }
+}
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// The format specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, const size_t length,
+                       const int tree_limit, uint8_t* depth) {
+  // For block sizes below 64 kB, we never need to do a second iteration
+  // of this loop. Probably all of our block sizes will be smaller than
+  // that, so this loop is mostly of academic interest. If we actually
+  // would need this, we would be better off with the Katajainen algorithm.
+  for (uint32_t count_limit = 1;; count_limit = 2 * count_limit + 1) {
+    std::vector<HuffmanTree> tree;
+    tree.reserve(2 * length + 1);
+
+    for (size_t i = length; i != 0;) {
+      --i;
+      if (data[i]) {
+        const uint32_t count = std::max(data[i], count_limit);
+        tree.push_back(HuffmanTree(count, -1, static_cast<int16_t>(i)));
+      }
+    }
+
+    const size_t n = tree.size();
+    if (n == 1) {
+      depth[tree[0].index_right_or_value] = 1;  // Only one element.
+      break;
+    }
+
+    std::stable_sort(tree.begin(), tree.end(), SortHuffmanTree);
+
+    // The nodes are:
+    // [0, n): the sorted leaf nodes that we start with.
+    // [n]: we add a sentinel here.
+    // [n + 1, 2n): new parent nodes are added here, starting from
+    //              (n+1). These are naturally in ascending order.
+    // [2n]: we add a sentinel at the end as well.
+    // There will be (2n+1) elements at the end.
+    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
+    tree.push_back(sentinel);
+    tree.push_back(sentinel);
+
+    size_t i = 0;      // Points to the next leaf node.
+    size_t j = n + 1;  // Points to the next non-leaf node.
+    for (size_t k = n - 1; k != 0; --k) {
+      size_t left, right;
+      if (tree[i].total_count <= tree[j].total_count) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count <= tree[j].total_count) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      // The sentinel node becomes the parent node.
+      size_t j_end = tree.size() - 1;
+      tree[j_end].total_count =
+          tree[left].total_count + tree[right].total_count;
+      tree[j_end].index_left = static_cast<int16_t>(left);
+      tree[j_end].index_right_or_value = static_cast<int16_t>(right);
+
+      // Add back the last sentinel node.
+      tree.push_back(sentinel);
+    }
+    PIK_ASSERT(tree.size() == 2 * n + 1);
+    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+    // We need to pack the Huffman tree in tree_limit bits.
+    // If this was not successful, add fake entities to the lowest values
+    // and retry.
+    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+      break;
+    }
+  }
+}
+
+void Reverse(uint8_t* v, size_t start, size_t end) {
+  --end;
+  while (start < end) {
+    uint8_t tmp = v[start];
+    v[start] = v[end];
+    v[end] = tmp;
+    ++start;
+    --end;
+  }
+}
+
+void WriteHuffmanTreeRepetitions(const uint8_t previous_value,
+                                 const uint8_t value, size_t repetitions,
+                                 size_t* tree_size, uint8_t* tree,
+                                 uint8_t* extra_bits_data) {
+  PIK_ASSERT(repetitions > 0);
+  if (previous_value != value) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions == 7) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    for (size_t i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = value;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    size_t start = *tree_size;
+    while (true) {
+      tree[*tree_size] = 16;
+      extra_bits_data[*tree_size] = repetitions & 0x3;
+      ++(*tree_size);
+      repetitions >>= 2;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size,
+                                      uint8_t* tree, uint8_t* extra_bits_data) {
+  if (repetitions == 11) {
+    tree[*tree_size] = 0;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    for (size_t i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = 0;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    size_t start = *tree_size;
+    while (true) {
+      tree[*tree_size] = 17;
+      extra_bits_data[*tree_size] = repetitions & 0x7;
+      ++(*tree_size);
+      repetitions >>= 3;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+static void DecideOverRleUse(const uint8_t* depth, const size_t length,
+                             bool* use_rle_for_non_zero,
+                             bool* use_rle_for_zero) {
+  size_t total_reps_zero = 0;
+  size_t total_reps_non_zero = 0;
+  size_t count_reps_zero = 1;
+  size_t count_reps_non_zero = 1;
+  for (size_t i = 0; i < length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    if (reps >= 3 && value == 0) {
+      total_reps_zero += reps;
+      ++count_reps_zero;
+    }
+    if (reps >= 4 && value != 0) {
+      total_reps_non_zero += reps;
+      ++count_reps_non_zero;
+    }
+    i += reps;
+  }
+  *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
+  *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
+}
+
+// Write a Huffman tree from bit depths into the bitstream representation
+// of a Huffman tree. The generated Huffman tree is to be compressed once
+// more using a Huffman tree
+void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
+                      uint8_t* tree, uint8_t* extra_bits_data) {
+  uint8_t previous_value = 8;
+
+  // Throw away trailing zeros.
+  size_t new_length = length;
+  for (size_t i = 0; i < length; ++i) {
+    if (depth[length - i - 1] == 0) {
+      --new_length;
+    } else {
+      break;
+    }
+  }
+
+  // First gather statistics on if it is a good idea to do rle.
+  bool use_rle_for_non_zero = false;
+  bool use_rle_for_zero = false;
+  if (length > 50) {
+    // Find rle coding for longer codes.
+    // Shorter codes seem not to benefit from rle.
+    DecideOverRleUse(depth, new_length, &use_rle_for_non_zero,
+                     &use_rle_for_zero);
+  }
+
+  // Actual rle coding.
+  for (size_t i = 0; i < new_length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    if ((value != 0 && use_rle_for_non_zero) ||
+        (value == 0 && use_rle_for_zero)) {
+      for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
+        ++reps;
+      }
+    }
+    if (value == 0) {
+      WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
+    } else {
+      WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree,
+                                  extra_bits_data);
+      previous_value = value;
+    }
+    i += reps;
+  }
+}
+
+uint16_t ReverseBits(int num_bits, uint16_t bits) {
+  static const size_t kLut[16] = {// Pre-reversed 4-bit values.
+                                  0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+                                  0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
+  size_t retval = kLut[bits & 0xf];
+  for (int i = 4; i < num_bits; i += 4) {
+    retval <<= 4;
+    bits = static_cast<uint16_t>(bits >> 4);
+    retval |= kLut[bits & 0xf];
+  }
+  retval >>= (-num_bits & 0x3);
+  return static_cast<uint16_t>(retval);
+}
+
+// Get the actual bit values for a tree of bit depths.
+void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len,
+                               uint16_t* bits) {
+  // In Brotli, all bit depths are [1..15]
+  // 0 bit depth means that the symbol does not exist.
+  const int kMaxBits = 16;  // 0..15 are values for bits
+  uint16_t bl_count[kMaxBits] = {0};
+  {
+    for (size_t i = 0; i < len; ++i) {
+      ++bl_count[depth[i]];
+    }
+    bl_count[0] = 0;
+  }
+  uint16_t next_code[kMaxBits];
+  next_code[0] = 0;
+  {
+    int code = 0;
+    for (int bits = 1; bits < kMaxBits; ++bits) {
+      code = (code + bl_count[bits - 1]) << 1;
+      next_code[bits] = static_cast<uint16_t>(code);
+    }
+  }
+  for (size_t i = 0; i < len; ++i) {
+    if (depth[i]) {
+      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+    }
+  }
+}
+
+template <class BitVisitor>
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes,
+                                            const uint8_t* code_length_bitdepth,
+                                            BitVisitor* bit_visitor) {
+  static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+      1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  // The bit lengths of the Huffman code over the code length alphabet
+  // are compressed with the following static Huffman code:
+  //   Symbol   Code
+  //   ------   ----
+  //   0          00
+  //   1        1110
+  //   2         110
+  //   3          01
+  //   4          10
+  //   5        1111
+  static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3,
+                                                                 2, 1, 15};
+  static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3,
+                                                                    2, 2, 4};
+
+  // Throw away trailing zeros:
+  size_t codes_to_store = kCodeLengthCodes;
+  if (num_codes > 1) {
+    for (; codes_to_store > 0; --codes_to_store) {
+      if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+        break;
+      }
+    }
+  }
+  size_t skip_some = 0;  // skips none.
+  if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+      code_length_bitdepth[kStorageOrder[1]] == 0) {
+    skip_some = 2;  // skips two.
+    if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+      skip_some = 3;  // skips three.
+    }
+  }
+  bit_visitor->VisitBits(2, skip_some);
+  for (size_t i = skip_some; i < codes_to_store; ++i) {
+    size_t l = code_length_bitdepth[kStorageOrder[i]];
+    bit_visitor->VisitBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
+                           kHuffmanBitLengthHuffmanCodeSymbols[l]);
+  }
+}
+
+template <class BitVisitor>
+void StoreHuffmanTreeToBitMask(const size_t huffman_tree_size,
+                               const uint8_t* huffman_tree,
+                               const uint8_t* huffman_tree_extra_bits,
+                               const uint8_t* code_length_bitdepth,
+                               const uint16_t* code_length_bitdepth_symbols,
+                               BitVisitor* bit_visitor) {
+  for (size_t i = 0; i < huffman_tree_size; ++i) {
+    size_t ix = huffman_tree[i];
+    bit_visitor->VisitBits(code_length_bitdepth[ix],
+                           code_length_bitdepth_symbols[ix]);
+    // Extra bits
+    switch (ix) {
+      case 16:
+        bit_visitor->VisitBits(2, huffman_tree_extra_bits[i]);
+        break;
+      case 17:
+        bit_visitor->VisitBits(3, huffman_tree_extra_bits[i]);
+        break;
+    }
+  }
+}
+
+template <class BitVisitor>
+void StoreVarLenUint16(size_t n, BitVisitor* visitor) {
+  if (n == 0) {
+    visitor->VisitBits(1, 0);
+  } else {
+    visitor->VisitBits(1, 1);
+    size_t nbits = Log2FloorNonZero(n);
+    visitor->VisitBits(4, nbits);
+    visitor->VisitBits(nbits, n - (1ULL << nbits));
+  }
+}
+
+template <class BitVisitor>
+void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4],
+                            size_t num_symbols, BitVisitor* bit_visitor) {
+  // value of 1 indicates a simple Huffman code
+  bit_visitor->VisitBits(2, 1);
+  bit_visitor->VisitBits(2, num_symbols - 1);  // NSYM - 1
+
+  // Sort
+  for (size_t i = 0; i < num_symbols; i++) {
+    for (size_t j = i + 1; j < num_symbols; j++) {
+      if (depths[symbols[j]] < depths[symbols[i]]) {
+        std::swap(symbols[j], symbols[i]);
+      }
+    }
+  }
+
+  for (size_t i = 0; i < num_symbols; ++i) {
+    StoreVarLenUint16(symbols[i], bit_visitor);
+  }
+  if (num_symbols == 4) {
+    // tree-select
+    bit_visitor->VisitBits(1, depths[symbols[0]] == 1 ? 1 : 0);
+  }
+}
+
+// num = alphabet size
+// depths = symbol depths
+template <class BitVisitor>
+void StoreHuffmanTree(const uint8_t* depths, size_t num,
+                      BitVisitor* bit_visitor) {
+  // Write the Huffman tree into the compact representation.
+  std::vector<uint8_t> huffman_tree(num);
+  std::vector<uint8_t> huffman_tree_extra_bits(num);
+  size_t huffman_tree_size = 0;
+  WriteHuffmanTree(depths, num, &huffman_tree_size, &huffman_tree[0],
+                   &huffman_tree_extra_bits[0]);
+
+  // Calculate the statistics of the Huffman tree in the compact representation.
+  uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0};
+  for (size_t i = 0; i < huffman_tree_size; ++i) {
+    ++huffman_tree_histogram[huffman_tree[i]];
+  }
+
+  int num_codes = 0;
+  int code = 0;
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    if (huffman_tree_histogram[i]) {
+      if (num_codes == 0) {
+        code = i;
+        num_codes = 1;
+      } else if (num_codes == 1) {
+        num_codes = 2;
+        break;
+      }
+    }
+  }
+
+  // Calculate another Huffman tree to use for compressing both the
+  // earlier Huffman tree with.
+  uint8_t code_length_bitdepth[kCodeLengthCodes] = {0};
+  uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0};
+  CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5,
+                    &code_length_bitdepth[0]);
+  ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
+                            &code_length_bitdepth_symbols[0]);
+
+  // Now, we have all the data, let's start storing it
+  StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
+                                         bit_visitor);
+
+  if (num_codes == 1) {
+    code_length_bitdepth[code] = 0;
+  }
+
+  // Store the real huffman tree now.
+  StoreHuffmanTreeToBitMask(
+      huffman_tree_size, &huffman_tree[0], &huffman_tree_extra_bits[0],
+      &code_length_bitdepth[0], code_length_bitdepth_symbols, bit_visitor);
+}
+
+template <class BitVisitor>
+void BuildAndVisitHuffmanTree(const uint32_t* histogram, const size_t length,
+                              uint8_t* depth, uint16_t* bits,
+                              BitVisitor* bit_visitor) {
+  size_t count = 0;
+  size_t s4[4] = {0};
+  for (size_t i = 0; i < length; i++) {
+    if (histogram[i]) {
+      if (count < 4) {
+        s4[count] = i;
+      } else if (count > 4) {
+        break;
+      }
+      count++;
+    }
+  }
+
+  if (count <= 1) {
+    bit_visitor->VisitBits(4, 1);
+    StoreVarLenUint16(s4[0], bit_visitor);
+    return;
+  }
+
+  CreateHuffmanTree(histogram, length, 15, depth);
+  if (bits != nullptr) {
+    ConvertBitDepthsToSymbols(depth, length, bits);
+  }
+
+  if (count <= 4) {
+    StoreSimpleHuffmanTree(depth, s4, count, bit_visitor);
+  } else {
+    StoreHuffmanTree(depth, length, bit_visitor);
+  }
+}
+
+}  // namespace
+
+void BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length,
+                              uint8_t* depth, uint16_t* bits,
+                              size_t* storage_ix, uint8_t* storage) {
+  BitWriter bit_writer(storage_ix, storage);
+  BuildAndVisitHuffmanTree(histogram, length, depth, bits, &bit_writer);
+}
+
+void BuildHuffmanTreeAndCountBits(const uint32_t* histogram,
+                                  const size_t length, size_t* histogram_bits,
+                                  size_t* data_bits) {
+  BitCounter bit_counter;
+  std::vector<uint8_t> depths(length);
+  BuildAndVisitHuffmanTree(histogram, length, depths.data(), nullptr,
+                           &bit_counter);
+  *histogram_bits = bit_counter.num_bits;
+  *data_bits = 0;
+  for (int i = 0; i < length; ++i) {
+    *data_bits += histogram[i] * depths[i];
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/huffman_encode.h b/codec/L2/demos/pikEnc/host/pik/huffman_encode.h
new file mode 100755
index 0000000000..9ad0b4149a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/huffman_encode.h
@@ -0,0 +1,24 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_HUFFMAN_ENCODE_H_
+#define PIK_HUFFMAN_ENCODE_H_
+
+#include <stdint.h>
+#include <cstddef>
+
+namespace pik {
+
+void BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length,
+                              uint8_t* depth, uint16_t* bits,
+                              size_t* storage_ix, uint8_t* storage);
+
+void BuildHuffmanTreeAndCountBits(const uint32_t* histogram,
+                                  const size_t length, size_t* histogram_bits,
+                                  size_t* data_bits);
+}  // namespace pik
+
+#endif  // PIK_HUFFMAN_ENCODE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/image.cc b/codec/L2/demos/pikEnc/host/pik/image.cc
new file mode 100755
index 0000000000..cfb017a70d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/image.cc
@@ -0,0 +1,89 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/image.h"
+
+#include <stdint.h>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/common.h"
+#include "pik/profiler.h"
+
+
+namespace pik {
+
+CacheAlignedUniquePtr AllocateImageBytes(size_t size, size_t xsize,
+                                         size_t ysize) {
+  // (Can't profile CacheAligned itself because it is used by profiler.h)
+  PROFILER_FUNC;
+
+
+  // Note: size may be zero.
+  CacheAlignedUniquePtr bytes = AllocateArray(size);
+  PIK_ASSERT(reinterpret_cast<uintptr_t>(bytes.get()) % kImageAlign == 0);
+  return bytes;
+}
+
+ImageB ImageFromPacked(const uint8_t* packed, const size_t xsize,
+                       const size_t ysize, const size_t bytes_per_row) {
+  PIK_ASSERT(bytes_per_row >= xsize);
+  ImageB image(xsize, ysize);
+  PROFILER_FUNC;
+  for (size_t y = 0; y < ysize; ++y) {
+    uint8_t* const PIK_RESTRICT row = image.Row(y);
+    const uint8_t* const PIK_RESTRICT packed_row = packed + y * bytes_per_row;
+    memcpy(row, packed_row, xsize);
+  }
+  return image;
+}
+
+// Note that using mirroring here gives slightly worse results.
+Image3F PadImageToMultiple(const Image3F& in, const size_t N) {
+  PROFILER_FUNC;
+  const size_t xsize_blocks = DivCeil(in.xsize(), N);
+  const size_t ysize_blocks = DivCeil(in.ysize(), N);
+  const size_t xsize = N * xsize_blocks;
+  const size_t ysize = N * ysize_blocks;
+  Image3F out(xsize, ysize);
+  for (int c = 0; c < 3; ++c) {
+    int y = 0;
+    for (; y < in.ysize(); ++y) {
+      const float* PIK_RESTRICT row_in = in.ConstPlaneRow(c, y);
+      float* PIK_RESTRICT row_out = out.PlaneRow(c, y);
+      memcpy(row_out, row_in, in.xsize() * sizeof(row_in[0]));
+      const int lastcol = in.xsize() - 1;
+      const float lastval = row_out[lastcol];
+      for (int x = in.xsize(); x < xsize; ++x) {
+        row_out[x] = lastval;
+      }
+    }
+
+    // TODO(janwas): no need to copy if we can 'extend' image: if rows are
+    // pointers to any memory? Or allocate larger image before IO?
+    const int lastrow = in.ysize() - 1;
+    for (; y < ysize; ++y) {
+      const float* PIK_RESTRICT row_in = out.ConstPlaneRow(c, lastrow);
+      float* PIK_RESTRICT row_out = out.PlaneRow(c, y);
+      memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
+    }
+  }
+  return out;
+}
+
+float DotProduct(const ImageF& a, const ImageF& b) {
+  double sum = 0.0;
+  for (int y = 0; y < a.ysize(); ++y) {
+    const float* const PIK_RESTRICT row_a = a.ConstRow(y);
+    const float* const PIK_RESTRICT row_b = b.ConstRow(y);
+    for (int x = 0; x < a.xsize(); ++x) {
+      sum += row_a[x] * row_b[x];
+    }
+  }
+  return sum;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/image.h b/codec/L2/demos/pikEnc/host/pik/image.h
new file mode 100755
index 0000000000..753b9d21c4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/image.h
@@ -0,0 +1,699 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_IMAGE_H_
+#define PIK_IMAGE_H_
+
+// SIMD/multicore-friendly planar image representation with row accessors.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <limits>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "pik/cache_aligned.h"
+#include "pik/compiler_specific.h"
+#include "pik/profiler.h"
+#include "pik/robust_statistics.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Each row address is a multiple of this - enables aligned loads.
+static constexpr size_t kImageAlign = CacheAligned::kAlignment;
+static_assert(kImageAlign >= kMaxVectorSize, "Insufficient alignment");
+
+// Returns distance [bytes] between the start of two consecutive rows, a
+// multiple of kAlign but NOT CacheAligned::kAlias - see below.
+//
+// Differing "kAlign" make sense for:
+// - Image: 128 to avoid false sharing/RFOs between multiple threads processing
+//   rows independently;
+// - TileFlow: no cache line alignment needed because buffers are per-thread;
+//   just need kMaxVectorSize=16..64 for SIMD.
+//
+// "valid_bytes" is xsize * sizeof(T).
+template <size_t kAlign>
+static inline size_t BytesPerRow(const size_t valid_bytes) {
+  static_assert((kAlign & (kAlign - 1)) == 0, "kAlign should be power of two");
+
+  // Extra two vectors allow *writing* a partial or full vector on the right AND
+  // left border (for convolve.h) without disturbing the next/previous row.
+  const size_t row_size = valid_bytes + 2 * kMaxVectorSize;
+
+  // Round up.
+  size_t bytes_per_row = (row_size + kAlign - 1) & ~(kAlign - 1);
+
+  // During the lengthy window before writes are committed to memory, CPUs
+  // guard against read after write hazards by checking the address, but
+  // only the lower 11 bits. We avoid a false dependency between writes to
+  // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
+  // Avoid2K prevents the same problem for the planes of an Image3.
+  if (bytes_per_row % CacheAligned::kAlias == 0) {
+    bytes_per_row += kImageAlign;
+  }
+
+  return bytes_per_row;
+}
+
+// Factored out of Image<> to avoid dependency on profiler.h and <atomic>.
+CacheAlignedUniquePtr AllocateImageBytes(size_t size, size_t xsize,
+                                         size_t ysize);
+
+// Single channel, aligned rows separated by padding. T must be POD.
+//
+// Rationale: vectorization benefits from aligned operands - unaligned loads and
+// especially stores are expensive when the address crosses cache line
+// boundaries. Introducing padding after each row ensures the start of a row is
+// aligned, and that row loops can process entire vectors (writes to the padding
+// are allowed and ignored).
+//
+// We prefer a planar representation, where channels are stored as separate
+// 2D arrays, because that simplifies vectorization (repeating the same
+// operation on multiple adjacent components) without the complexity of a
+// hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients can easily iterate
+// over all components in a row and Image requires no knowledge of the pixel
+// format beyond the component type "T".
+//
+// This image layout could also be achieved with a vector and a row accessor
+// function, but a class wrapper with support for "deleter" allows wrapping
+// existing memory allocated by clients without copying the pixels. It also
+// provides convenient accessors for xsize/ysize, which shortens function
+// argument lists. Supports move-construction so it can be stored in containers.
+template <typename ComponentType>
+class Image {
+ public:
+  using T = ComponentType;
+  static constexpr size_t kNumPlanes = 1;
+
+  Image() : xsize_(0), ysize_(0), bytes_per_row_(0), bytes_(nullptr) {}
+
+  Image(const size_t xsize, const size_t ysize)
+      : xsize_(xsize),
+        ysize_(ysize),
+        bytes_per_row_(BytesPerRow<kImageAlign>(xsize * sizeof(T))),
+        bytes_(nullptr) {
+    PIK_ASSERT(bytes_per_row_ % kImageAlign == 0);
+    // xsize and/or ysize can legitimately be zero, in which case we don't
+    // want to allocate.
+    if (xsize != 0 && ysize != 0) {
+      bytes_ = AllocateImageBytes(bytes_per_row_ * ysize + kMaxVectorSize,
+                                  xsize, ysize);
+    }
+
+#ifdef MEMORY_SANITIZER
+    // Only in MSAN builds: ensure full vectors are initialized.
+    const size_t partial = (xsize_ * sizeof(T)) % kMaxVectorSize;
+    const size_t remainder = (partial == 0) ? 0 : (kMaxVectorSize - partial);
+    for (size_t y = 0; y < ysize_; ++y) {
+      memset(Row(y) + xsize_, 0, remainder);
+    }
+#endif
+  }
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo() instead.
+  Image(const Image& other) = delete;
+  Image& operator=(const Image& other) = delete;
+
+  // Move constructor (required for returning Image from function)
+  Image(Image&& other) = default;
+
+  // Move assignment (required for std::vector)
+  Image& operator=(Image&& other) = default;
+
+  void Swap(Image& other) {
+    std::swap(xsize_, other.xsize_);
+    std::swap(ysize_, other.ysize_);
+    std::swap(bytes_per_row_, other.bytes_per_row_);
+    std::swap(bytes_, other.bytes_);
+  }
+
+  // Useful for pre-allocating image with some padding for alignment purposes
+  // and later reporting the actual valid dimensions. Caller is responsible
+  // for ensuring xsize/ysize are <= the original dimensions.
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    xsize_ = static_cast<uint32_t>(xsize);
+    ysize_ = static_cast<uint32_t>(ysize);
+    // NOTE: we can't recompute bytes_per_row for more compact storage and
+    // better locality because that would invalidate the image contents.
+  }
+
+  // How many pixels.
+  PIK_INLINE size_t xsize() const { return xsize_; }
+  PIK_INLINE size_t ysize() const { return ysize_; }
+
+  // Returns pointer to the start of a row, with at least xsize (rounded up to
+  // kImageAlign bytes) accessible values.
+  PIK_INLINE T* PIK_RESTRICT Row(const size_t y) {
+    RowBoundsCheck(y);
+    void* row = bytes_.get() + y * bytes_per_row_;
+    return static_cast<T*>(PIK_ASSUME_ALIGNED(row, 64));
+  }
+
+  // Returns pointer to non-const - required for writing to individual planes
+  // of an Image3.
+  PIK_INLINE T* PIK_RESTRICT MutableRow(const size_t y) const {
+    RowBoundsCheck(y);
+    void* row = bytes_.get() + y * bytes_per_row_;
+    return static_cast<T*>(PIK_ASSUME_ALIGNED(row, 64));
+  }
+
+  // Returns pointer to const (see above).
+  PIK_INLINE const T* PIK_RESTRICT Row(const size_t y) const {
+    RowBoundsCheck(y);
+    const void* row = bytes_.get() + y * bytes_per_row_;
+    return static_cast<const T*>(PIK_ASSUME_ALIGNED(row, 64));
+  }
+
+  // Returns pointer to const (see above), even if called on a non-const Image.
+  PIK_INLINE const T* PIK_RESTRICT ConstRow(const size_t y) const {
+    return Row(y);
+  }
+
+  // Raw access to byte contents, for interfacing with other libraries.
+  // Unsigned char instead of char to avoid surprises (sign extension).
+  PIK_INLINE uint8_t* PIK_RESTRICT bytes() {
+    void* p = bytes_.get();
+    return static_cast<uint8_t * PIK_RESTRICT>(PIK_ASSUME_ALIGNED(p, 64));
+  }
+  PIK_INLINE const uint8_t* PIK_RESTRICT bytes() const {
+    const void* p = bytes_.get();
+    return static_cast<const uint8_t * PIK_RESTRICT>(PIK_ASSUME_ALIGNED(p, 64));
+  }
+
+  // NOTE: do not use this for copying rows - the valid xsize may be much less.
+  PIK_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
+
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must
+  // NOT be used to determine xsize. NOTE: this is less efficient than
+  // ByteOffset(row, bytes_per_row).
+  PIK_INLINE intptr_t PixelsPerRow() const {
+    static_assert(kImageAlign % sizeof(T) == 0,
+                  "Padding must be divisible by the pixel size.");
+    return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
+  }
+
+ private:
+  PIK_INLINE void RowBoundsCheck(const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)
+    if (y >= ysize_) {
+      Abort(__FILE__, __LINE__, "Row(%zu) >= %zu\n", y, ysize_);
+    }
+#endif
+  }
+
+  // (Members are non-const to enable assignment during move-assignment.)
+  uint32_t xsize_;  // In valid pixels, not including any padding.
+  uint32_t ysize_;
+  size_t bytes_per_row_;  // Includes padding.
+  CacheAlignedUniquePtr bytes_;
+};
+
+using ImageB = Image<uint8_t>;
+using ImageS = Image<int16_t>;  // signed integer or half-float
+using ImageU = Image<uint16_t>;
+using ImageI = Image<int32_t>;
+using ImageF = Image<float>;
+using ImageD = Image<double>;
+
+// We omit unnecessary fields and choose smaller representations to reduce L1
+// cache pollution.
+#pragma pack(push, 1)
+
+// Size of an image in pixels. POD.
+struct ImageSize {
+  static ImageSize Make(const size_t xsize, const size_t ysize) {
+    ImageSize ret;
+    ret.xsize = static_cast<uint32_t>(xsize);
+    ret.ysize = static_cast<uint32_t>(ysize);
+    return ret;
+  }
+
+  bool operator==(const ImageSize& other) const {
+    return xsize == other.xsize && ysize == other.ysize;
+  }
+
+  uint32_t xsize;
+  uint32_t ysize;
+};
+
+#pragma pack(pop)
+
+template <typename T>
+void CopyImageTo(const Image<T>& from, Image<T>* PIK_RESTRICT to) {
+  PROFILER_ZONE("CopyImage1");
+  PIK_ASSERT(SameSize(from, *to));
+  for (size_t y = 0; y < from.ysize(); ++y) {
+    const T* PIK_RESTRICT row_from = from.ConstRow(y);
+    T* PIK_RESTRICT row_to = to->Row(y);
+    memcpy(row_to, row_from, from.xsize() * sizeof(T));
+  }
+}
+
+// DEPRECATED - prefer to preallocate result.
+template <typename T>
+Image<T> CopyImage(const Image<T>& from) {
+  Image<T> to(from.xsize(), from.ysize());
+  CopyImageTo(from, &to);
+  return to;
+}
+
+// Also works for Image3 and mixed argument types.
+template <class Image1, class Image2>
+bool SameSize(const Image1& image1, const Image2& image2) {
+  return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
+}
+
+template <typename T>
+bool SamePixels(const Image<T>& image1, const Image<T>& image2) {
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  PIK_CHECK(xsize == image2.xsize());
+  PIK_CHECK(ysize == image2.ysize());
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* const PIK_RESTRICT row1 = image1.Row(y);
+    const T* const PIK_RESTRICT row2 = image2.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      if (row1[x] != row2[x]) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+// Use for floating-point images with fairly large numbers; tolerates small
+// absolute errors and/or small relative errors. Returns max_relative.
+template <typename T>
+double VerifyRelativeError(const Image<T>& expected, const Image<T>& actual,
+                           const double threshold_l1,
+                           const double threshold_relative,
+                           const size_t border = 0, const size_t c = 0) {
+  PIK_CHECK(SameSize(expected, actual));
+  // Max over current scanline to give a better idea whether there are
+  // systematic errors or just one outlier. Invalid if negative.
+  double max_l1 = -1;
+  double max_relative = -1;
+  for (size_t y = border; y < expected.ysize() - border; ++y) {
+    const T* const PIK_RESTRICT row_expected = expected.Row(y);
+    const T* const PIK_RESTRICT row_actual = actual.Row(y);
+    bool any_bad = false;
+    for (size_t x = border; x < expected.xsize() - border; ++x) {
+      const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+      // Cannot compute relative, only check/update L1.
+      if (row_expected[x] < 1E-10) {
+        if (l1 > threshold_l1) {
+          any_bad = true;
+          max_l1 = std::max(max_l1, l1);
+        }
+      } else {
+        const double relative = l1 / std::abs(double(row_expected[x]));
+        if (l1 > threshold_l1 && relative > threshold_relative) {
+          // Fails both tolerances => will exit below, update max_*.
+          any_bad = true;
+          max_l1 = std::max(max_l1, l1);
+          max_relative = std::max(max_relative, relative);
+        }
+      }
+    }
+
+    if (any_bad) {
+      // Never had a valid relative value, don't print it.
+      if (max_relative < 0) {
+        printf("c=%zu: max +/- %E exceeds +/- %.2E\n", c, max_l1, threshold_l1);
+      } else {
+        printf("c=%zu: max +/- %E, x %E exceeds +/- %.2E, x %.2E\n", c, max_l1,
+               max_relative, threshold_l1, threshold_relative);
+      }
+      // Find first failing x for further debugging.
+      for (size_t x = border; x < expected.xsize() - border; ++x) {
+        const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+        bool bad = l1 > threshold_l1;
+        if (row_expected[x] > 1E-10) {
+          const double relative = l1 / std::abs(double(row_expected[x]));
+          bad &= relative > threshold_relative;
+        }
+        if (bad) {
+          PIK_ABORT("%zu, %zu (%zu x %zu) expected %f actual %f\n", x, y,
+                    expected.xsize(), expected.ysize(),
+                    static_cast<double>(row_expected[x]),
+                    static_cast<double>(row_actual[x]));
+        }
+      }
+
+      PIK_CHECK(false);  // if any_bad, we should have exited.
+    }
+  }
+
+  return (max_relative < 0) ? 0.0 : max_relative;
+}
+
+template <typename T>
+class Image3;
+
+// Rectangular region in image(s). Factoring this out of Image instead of
+// shifting the pointer by x0/y0 allows this to apply to multiple images with
+// different resolutions (e.g. color transform and quantization field).
+// Can compare using SameSize(rect1, rect2).
+class Rect {
+ public:
+  // Most windows are xsize_max * ysize_max, except those on the borders where
+  // begin + size_max > end.
+  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
+                 size_t ysize_max, size_t xend, size_t yend)
+      : x0_(xbegin),
+        y0_(ybegin),
+        xsize_(ClampedSize(xbegin, xsize_max, xend)),
+        ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
+
+  // Construct with origin and known size (typically from another Rect).
+  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
+      : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
+
+  // Construct a rect that covers a whole image
+  template <typename T>
+  explicit Rect(const Image3<T>& image)
+      : Rect(0, 0, image.xsize(), image.ysize()) {}
+  template <typename T>
+  explicit Rect(const Image<T>& image)
+      : Rect(0, 0, image.xsize(), image.ysize()) {}
+
+  Rect(const Rect&) = default;
+  Rect& operator=(const Rect&) = default;
+
+  Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
+               size_t ysize_max) {
+    return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
+                y0_ + ysize_);
+  }
+
+  template <typename T>
+  T* Row(Image<T>* image, size_t y) const {
+    return image->Row(y + y0_) + x0_;
+  }
+
+  template <typename T>
+  T* PlaneRow(Image3<T>* image, const int c, size_t y) const {
+    return image->PlaneRow(c, y + y0_) + x0_;
+  }
+
+  template <typename T>
+  const T* ConstRow(const Image<T>& image, size_t y) const {
+    return image.ConstRow(y + y0_) + x0_;
+  }
+
+  template <typename T>
+  const T* ConstPlaneRow(const Image3<T>& image, const int c, size_t y) const {
+    return image.ConstPlaneRow(c, y + y0_) + x0_;
+  }
+
+  // Returns true if this Rect fully resides in the given image. ImageT could be
+  // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
+  template <class ImageT>
+  bool IsInside(const ImageT& image) const {
+    return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
+  }
+
+  size_t x0() const { return x0_; }
+  size_t y0() const { return y0_; }
+  size_t xsize() const { return xsize_; }
+  size_t ysize() const { return ysize_; }
+
+ private:
+  // Returns size_max, or whatever is left in [begin, end).
+  static constexpr size_t ClampedSize(size_t begin, size_t size_max,
+                                      size_t end) {
+    return (begin + size_max <= end) ? size_max : end - begin;
+  }
+
+  size_t x0_;
+  size_t y0_;
+
+  size_t xsize_;
+  size_t ysize_;
+};
+
+// Copies `from:rect` to `to`.
+template <typename T>
+void CopyImageTo(const Rect& rect, const Image<T>& from,
+                 Image<T>* PIK_RESTRICT to) {
+  PROFILER_ZONE("CopyImageR");
+  PIK_ASSERT(SameSize(rect, *to));
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    const T* PIK_RESTRICT row_from = rect.ConstRow(from, y);
+    T* PIK_RESTRICT row_to = to->Row(y);
+    memcpy(row_to, row_from, rect.xsize() * sizeof(T));
+  }
+}
+
+// DEPRECATED - Returns a copy of the "image" pixels that lie in "rect".
+template <typename T>
+Image<T> CopyImage(const Rect& rect, const Image<T>& image) {
+  Image<T> copy(rect.xsize(), rect.ysize());
+  CopyImageTo(rect, image, &copy);
+  return copy;
+}
+
+// Currently, we abuse Image to either refer to an image that owns its storage
+// or one that doesn't. In similar vein, we abuse Image* function parameters to
+// either mean "assign to me" or "fill the provided image with data".
+// Hopefully, the "assign to me" meaning will go away and most images in the Pik
+// codebase will not be backed by own storage. When this happens we can redesign
+// Image to be a non-storage-holding view class and introduce BackedImage in
+// those places that actually need it.
+
+// NOTE: we can't use Image as a view because invariants are violated
+// (alignment and the presence of padding before/after each "row").
+
+// A bundle of 3 same-sized images. Typically constructed by moving from three
+// rvalue references to Image. To overwrite an existing Image3 using
+// single-channel producers, we also need access to Image*. Constructing
+// temporary non-owning Image pointing to one plane of an existing Image3 risks
+// dangling references, especially if the wrapper is moved. Therefore, we
+// store an array of Image (which are compact enough that size is not a concern)
+// and provide a Plane+MutableRow accessors.
+template <typename ComponentType>
+class Image3 {
+ public:
+  using T = ComponentType;
+  using PlaneT = Image<T>;
+  static constexpr size_t kNumPlanes = 3;
+
+  Image3() : planes_{PlaneT(), PlaneT(), PlaneT()} {}
+
+  Image3(const size_t xsize, const size_t ysize)
+      : planes_{PlaneT(xsize, ysize), PlaneT(xsize, ysize),
+                PlaneT(xsize, ysize)} {}
+
+  Image3(Image3&& other) {
+    for (int i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+  }
+
+  Image3(PlaneT&& plane0, PlaneT&& plane1, PlaneT&& plane2) {
+    PIK_CHECK(SameSize(plane0, plane1));
+    PIK_CHECK(SameSize(plane0, plane2));
+    planes_[0] = std::move(plane0);
+    planes_[1] = std::move(plane1);
+    planes_[2] = std::move(plane2);
+  }
+
+  // Copy construction/assignment is forbidden to avoid inadvertent copies,
+  // which can be very expensive. Use CopyImageTo instead.
+  Image3(const Image3& other) = delete;
+  Image3& operator=(const Image3& other) = delete;
+
+  Image3& operator=(Image3&& other) {
+    for (int i = 0; i < kNumPlanes; i++) {
+      planes_[i] = std::move(other.planes_[i]);
+    }
+    return *this;
+  }
+
+  // Returns row pointer; usage: PlaneRow(idx_plane, y)[x] = val.
+  PIK_INLINE T* PIK_RESTRICT PlaneRow(const size_t c, const size_t y) {
+    // Custom implementation instead of calling planes_[c].Row ensures only a
+    // single multiplication is needed for PlaneRow(0..2, y).
+    PlaneRowBoundsCheck(c, y);
+    const size_t row_offset = y * planes_[0].bytes_per_row();
+    void* row = planes_[c].bytes() + row_offset;
+    return static_cast<T*>(PIK_ASSUME_ALIGNED(row, 64));
+  }
+
+  // Returns const row pointer; usage: val = PlaneRow(idx_plane, y)[x].
+  PIK_INLINE const T* PIK_RESTRICT PlaneRow(const size_t c,
+                                            const size_t y) const {
+    PlaneRowBoundsCheck(c, y);
+    const size_t row_offset = y * planes_[0].bytes_per_row();
+    const void* row = planes_[c].bytes() + row_offset;
+    return static_cast<const T*>(PIK_ASSUME_ALIGNED(row, 64));
+  }
+
+  // Returns const row pointer, even if called from a non-const Image3.
+  PIK_INLINE const T* PIK_RESTRICT ConstPlaneRow(const size_t c,
+                                                 const size_t y) const {
+    return PlaneRow(c, y);
+  }
+
+  PIK_INLINE const PlaneT& Plane(size_t idx) const { return planes_[idx]; }
+
+  void Swap(Image3& other) {
+    for (int c = 0; c < 3; ++c) {
+      other.planes_[c].Swap(planes_[c]);
+    }
+  }
+
+  void ShrinkTo(const size_t xsize, const size_t ysize) {
+    for (PlaneT& plane : planes_) {
+      plane.ShrinkTo(xsize, ysize);
+    }
+  }
+
+  // Sizes of all three images are guaranteed to be equal.
+  PIK_INLINE size_t xsize() const { return planes_[0].xsize(); }
+  PIK_INLINE size_t ysize() const { return planes_[0].ysize(); }
+  // Returns offset [bytes] from one row to the next row of the same plane.
+  // WARNING: this must NOT be used to determine xsize, nor for copying rows -
+  // the valid xsize may be much less.
+  PIK_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
+  // Returns number of pixels (some of which are padding) per row. Useful for
+  // computing other rows via pointer arithmetic. WARNING: this must NOT be used
+  // to determine xsize. NOTE: this is less efficient than
+  // ByteOffset(row, bytes_per_row).
+  PIK_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
+
+ private:
+  PIK_INLINE void PlaneRowBoundsCheck(const size_t c, const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)
+    if (c >= kNumPlanes || y >= ysize()) {
+      Abort(__FILE__, __LINE__, "PlaneRow(%zu, %zu) >= %zu\n", c, y, ysize());
+    }
+#endif
+  }
+
+ private:
+  PlaneT planes_[kNumPlanes];
+};
+
+using Image3B = Image3<uint8_t>;
+using Image3S = Image3<int16_t>;
+using Image3U = Image3<uint16_t>;
+using Image3I = Image3<int32_t>;
+using Image3F = Image3<float>;
+using Image3D = Image3<double>;
+
+template <typename T>
+void CopyImageTo(const Image3<T>& from, Image3<T>* PIK_RESTRICT to) {
+  PROFILER_ZONE("CopyImage3");
+  PIK_ASSERT(SameSize(from, *to));
+
+  for (size_t c = 0; c < from.kNumPlanes; ++c) {
+    for (size_t y = 0; y < from.ysize(); ++y) {
+      const T* PIK_RESTRICT row_from = from.ConstPlaneRow(c, y);
+      T* PIK_RESTRICT row_to = to->PlaneRow(c, y);
+      memcpy(row_to, row_from, from.xsize() * sizeof(T));
+    }
+  }
+}
+
+// DEPRECATED - prefer to preallocate result.
+template <typename T>
+Image3<T> CopyImage(const Image3<T>& from) {
+  Image3<T> copy(from.xsize(), from.ysize());
+  CopyImageTo(from, &copy);
+  return copy;
+}
+
+// DEPRECATED - prefer to preallocate result.
+template <typename T>
+Image3<T> CopyImage(const Rect& rect, const Image3<T>& from) {
+  Image3<T> to(rect.xsize(), rect.ysize());
+  CopyImageTo(rect, from.Plane(0), const_cast<ImageF*>(&to.Plane(0)));
+  CopyImageTo(rect, from.Plane(1), const_cast<ImageF*>(&to.Plane(1)));
+  CopyImageTo(rect, from.Plane(2), const_cast<ImageF*>(&to.Plane(2)));
+  return to;
+}
+
+template <typename T>
+bool SamePixels(const Image3<T>& image1, const Image3<T>& image2) {
+  PIK_CHECK(SameSize(image1, image2));
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const T* PIK_RESTRICT row1 = image1.PlaneRow(c, y);
+      const T* PIK_RESTRICT row2 = image2.PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        if (row1[x] != row2[x]) {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+template <typename T>
+double VerifyRelativeError(const Image3<T>& expected, const Image3<T>& actual,
+                           const float threshold_l1,
+                           const float threshold_relative,
+                           const size_t border = 0) {
+  double max_relative = 0.0;
+  for (int c = 0; c < 3; ++c) {
+    const double rel =
+        VerifyRelativeError(expected.Plane(c), actual.Plane(c), threshold_l1,
+                            threshold_relative, border, c);
+    max_relative = std::max(max_relative, rel);
+  }
+  return max_relative;
+}
+
+// Sets "thickness" pixels on each border to "value". This is faster than
+// initializing the entire image and overwriting valid/interior pixels.
+template <typename T>
+void SetBorder(const size_t thickness, const T value, Image3<T>* image) {
+  const size_t xsize = image->xsize();
+  const size_t ysize = image->ysize();
+  PIK_ASSERT(2 * thickness < xsize && 2 * thickness < ysize);
+  // Top
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < thickness; ++y) {
+      T* PIK_RESTRICT row = image->PlaneRow(c, y);
+      std::fill(row, row + xsize, value);
+    }
+
+    // Bottom
+    for (size_t y = ysize - thickness; y < ysize; ++y) {
+      T* PIK_RESTRICT row = image->PlaneRow(c, y);
+      std::fill(row, row + xsize, value);
+    }
+
+    // Left/right
+    for (size_t y = thickness; y < ysize - thickness; ++y) {
+      T* PIK_RESTRICT row = image->PlaneRow(c, y);
+      std::fill(row, row + thickness, value);
+      std::fill(row + xsize - thickness, row + xsize, value);
+    }
+  }
+}
+
+
+}  // namespace pik
+
+#endif  // PIK_IMAGE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/image_ops.h b/codec/L2/demos/pikEnc/host/pik/image_ops.h
new file mode 100755
index 0000000000..aac2972861
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/image_ops.h
@@ -0,0 +1,718 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_IMAGE_OPS_H_
+#define PIK_IMAGE_OPS_H_
+
+// Operations on images.
+
+#include "pik/image.h"
+
+namespace pik {
+
+
+template <class ImageIn, class ImageOut>
+void Subtract(const ImageIn& image1, const ImageIn& image2, ImageOut* out) {
+  using T = typename ImageIn::T;
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  PIK_CHECK(xsize == image2.xsize());
+  PIK_CHECK(ysize == image2.ysize());
+
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* const PIK_RESTRICT row1 = image1.Row(y);
+    const T* const PIK_RESTRICT row2 = image2.Row(y);
+    T* const PIK_RESTRICT row_out = out->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out[x] = row1[x] - row2[x];
+    }
+  }
+}
+
+// In-place.
+template <typename Tin, typename Tout>
+void SubtractFrom(const Image<Tin>& what, Image<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    const Tin* PIK_RESTRICT row_what = what.ConstRow(y);
+    Tout* PIK_RESTRICT row_to = to->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_to[x] -= row_what[x];
+    }
+  }
+}
+
+// In-place.
+template <typename Tin, typename Tout>
+void AddTo(const Image<Tin>& what, Image<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    const Tin* PIK_RESTRICT row_what = what.ConstRow(y);
+    Tout* PIK_RESTRICT row_to = to->Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_to[x] += row_what[x];
+    }
+  }
+}
+
+// Returns linear combination of two grayscale images.
+template <typename T>
+Image<T> LinComb(const T lambda1, const Image<T>& image1, const T lambda2,
+                 const Image<T>& image2) {
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  PIK_CHECK(xsize == image2.xsize());
+  PIK_CHECK(ysize == image2.ysize());
+  Image<T> out(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* const PIK_RESTRICT row1 = image1.Row(y);
+    const T* const PIK_RESTRICT row2 = image2.Row(y);
+    T* const PIK_RESTRICT row_out = out.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      row_out[x] = lambda1 * row1[x] + lambda2 * row2[x];
+    }
+  }
+  return out;
+}
+
+// Returns a pixel-by-pixel multiplication of image by lambda.
+template <typename T>
+Image<T> ScaleImage(const T lambda, const Image<T>& image) {
+  Image<T> out(image.xsize(), image.ysize());
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const T* const PIK_RESTRICT row = image.Row(y);
+    T* const PIK_RESTRICT row_out = out.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      row_out[x] = lambda * row[x];
+    }
+  }
+  return out;
+}
+
+template <typename T>
+Image<T> Product(const Image<T>& a, const Image<T>& b) {
+  Image<T> c(a.xsize(), a.ysize());
+  for (size_t y = 0; y < a.ysize(); ++y) {
+    const T* const PIK_RESTRICT row_a = a.Row(y);
+    const T* const PIK_RESTRICT row_b = b.Row(y);
+    T* const PIK_RESTRICT row_c = c.Row(y);
+    for (size_t x = 0; x < a.xsize(); ++x) {
+      row_c[x] = row_a[x] * row_b[x];
+    }
+  }
+  return c;
+}
+
+float DotProduct(const ImageF& a, const ImageF& b);
+
+template <typename T>
+void FillImage(const T value, Image<T>* image) {
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const PIK_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row[x] = value;
+    }
+  }
+}
+
+template <typename T>
+void ZeroFillImage(Image<T>* image) {
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const PIK_RESTRICT row = image->Row(y);
+    memset(row, 0, image->xsize() * sizeof(T));
+  }
+}
+
+// Generator for independent, uniformly distributed integers [0, max].
+template <typename T, typename Random>
+class GeneratorRandom {
+ public:
+  GeneratorRandom(Random* rng, const T max) : rng_(*rng), dist_(0, max) {}
+
+  GeneratorRandom(Random* rng, const T min, const T max)
+      : rng_(*rng), dist_(min, max) {}
+
+  T operator()(const size_t x, const size_t y, const int c) const {
+    return dist_(rng_);
+  }
+
+ private:
+  Random& rng_;
+  mutable std::uniform_int_distribution<> dist_;
+};
+
+template <typename Random>
+class GeneratorRandom<float, Random> {
+ public:
+  GeneratorRandom(Random* rng, const float max)
+      : rng_(*rng), dist_(0.0f, max) {}
+
+  GeneratorRandom(Random* rng, const float min, const float max)
+      : rng_(*rng), dist_(min, max) {}
+
+  float operator()(const size_t x, const size_t y, const int c) const {
+    return dist_(rng_);
+  }
+
+ private:
+  Random& rng_;
+  mutable std::uniform_real_distribution<float> dist_;
+};
+
+template <typename Random>
+class GeneratorRandom<double, Random> {
+ public:
+  GeneratorRandom(Random* rng, const double max)
+      : rng_(*rng), dist_(0.0, max) {}
+
+  GeneratorRandom(Random* rng, const double min, const double max)
+      : rng_(*rng), dist_(min, max) {}
+
+  double operator()(const size_t x, const size_t y, const int c) const {
+    return dist_(rng_);
+  }
+
+ private:
+  Random& rng_;
+  mutable std::uniform_real_distribution<> dist_;
+};
+
+// Assigns generator(x, y, 0) to each pixel (x, y).
+template <class Generator, class Image>
+void GenerateImage(const Generator& generator, Image* image) {
+  using T = typename Image::T;
+  for (size_t y = 0; y < image->ysize(); ++y) {
+    T* const PIK_RESTRICT row = image->Row(y);
+    for (size_t x = 0; x < image->xsize(); ++x) {
+      row[x] = generator(x, y, 0);
+    }
+  }
+}
+
+template <template <typename> class Image, typename T>
+void RandomFillImage(Image<T>* image,
+                     const T max = std::numeric_limits<T>::max()) {
+  std::mt19937_64 rng(129);
+  const GeneratorRandom<T, std::mt19937_64> generator(&rng, max);
+  GenerateImage(generator, image);
+}
+
+template <template <typename> class Image, typename T>
+void RandomFillImage(Image<T>* image, const T min, const T max,
+                     const int seed) {
+  std::mt19937_64 rng(seed);
+  const GeneratorRandom<T, std::mt19937_64> generator(&rng, min, max);
+  GenerateImage(generator, image);
+}
+
+// Mirrors out of bounds coordinates and returns valid coordinates unchanged.
+// We assume the radius (distance outside the image) is small compared to the
+// image size, otherwise this might not terminate.
+// The mirror is outside the last column (border pixel is also replicated).
+static inline int64_t Mirror(int64_t x, const int64_t xsize) {
+  // TODO(janwas): replace with branchless version
+  while (x < 0 || x >= xsize) {
+    if (x < 0) {
+      x = -x - 1;
+    } else {
+      x = 2 * xsize - 1 - x;
+    }
+  }
+  return x;
+}
+
+// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
+
+// Mirrors (repeating the edge pixel once). Useful for convolutions.
+struct WrapMirror {
+  PIK_INLINE int64_t operator()(const int64_t coord, const int64_t size) const {
+    return Mirror(coord, size);
+  }
+};
+
+// Repeats the edge pixel.
+struct WrapClamp {
+  PIK_INLINE int64_t operator()(const int64_t coord, const int64_t size) const {
+    return std::min(std::max<int64_t>(0, coord), size - 1);
+  }
+};
+
+// Returns the same coordinate: required for TFNode with Border(), or useful
+// when we know "coord" is already valid (e.g. interior of an image).
+struct WrapUnchanged {
+  PIK_INLINE int64_t operator()(const int64_t coord, const int64_t size) const {
+    return coord;
+  }
+};
+
+// Similar to Wrap* but for row pointers (reduces Row() multiplications).
+
+class WrapRowMirror {
+ public:
+  template <class ImageOrView>
+  WrapRowMirror(const ImageOrView& image, const size_t ysize)
+      : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
+
+  const float* const PIK_RESTRICT
+  operator()(const float* const PIK_RESTRICT row, const int64_t stride) const {
+    if (row < first_row_) {
+      const int64_t num_before = first_row_ - row;
+      // Mirrored; one row before => row 0, two before = row 1, ...
+      return first_row_ + num_before - stride;
+    }
+    if (row > last_row_) {
+      const int64_t num_after = row - last_row_;
+      // Mirrored; one row after => last row, two after = last - 1, ...
+      return last_row_ - num_after + stride;
+    }
+    return row;
+  }
+
+ private:
+  const float* const PIK_RESTRICT first_row_;
+  const float* const PIK_RESTRICT last_row_;
+};
+
+struct WrapRowUnchanged {
+  PIK_INLINE const float* const PIK_RESTRICT
+  operator()(const float* const PIK_RESTRICT row, const int64_t stride) const {
+    return row;
+  }
+};
+
+// Sets "thickness" pixels on each border to "value". This is faster than
+// initializing the entire image and overwriting valid/interior pixels.
+template <typename T>
+void SetBorder(const size_t thickness, const T value, Image<T>* image) {
+  const size_t xsize = image->xsize();
+  const size_t ysize = image->ysize();
+  PIK_ASSERT(2 * thickness < xsize && 2 * thickness < ysize);
+  // Top
+  for (size_t y = 0; y < thickness; ++y) {
+    T* const PIK_RESTRICT row = image->Row(y);
+    std::fill(row, row + xsize, value);
+  }
+
+  // Bottom
+  for (size_t y = ysize - thickness; y < ysize; ++y) {
+    T* const PIK_RESTRICT row = image->Row(y);
+    std::fill(row, row + xsize, value);
+  }
+
+  // Left/right
+  for (size_t y = thickness; y < ysize - thickness; ++y) {
+    T* const PIK_RESTRICT row = image->Row(y);
+    std::fill(row, row + thickness, value);
+    std::fill(row + xsize - thickness, row + xsize, value);
+  }
+}
+
+// Computes the minimum and maximum pixel value.
+template <typename T>
+void ImageMinMax(const Image<T>& image, T* const PIK_RESTRICT min,
+                 T* const PIK_RESTRICT max) {
+  *min = std::numeric_limits<T>::max();
+  *max = std::numeric_limits<T>::lowest();
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const T* const PIK_RESTRICT row = image.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      *min = std::min(*min, row[x]);
+      *max = std::max(*max, row[x]);
+    }
+  }
+}
+
+// Computes the average pixel value.
+template <typename T>
+double ImageAverage(const Image<T>& image) {
+  double result = 0;
+  size_t n = 0;
+  for (size_t y = 0; y < image.ysize(); ++y) {
+    const T* const PIK_RESTRICT row = image.Row(y);
+    for (size_t x = 0; x < image.xsize(); ++x) {
+      // Numerically stable method.
+      double v = row[x];
+      double delta = v - result;
+      n++;
+      result += delta / n;
+    }
+  }
+  return result;
+}
+
+// Copies pixels, scaling their value relative to the "from" min/max by
+// "to_range". Example: U8 [0, 255] := [0.0, 1.0], to_range = 1.0 =>
+// outputs [0.0, 1.0].
+template <typename FromType, typename ToType>
+void ImageConvert(const Image<FromType>& from, const float to_range,
+                  Image<ToType>* const PIK_RESTRICT to) {
+  PIK_ASSERT(SameSize(from, *to));
+  FromType min_from, max_from;
+  ImageMinMax(from, &min_from, &max_from);
+  const float scale = to_range / (max_from - min_from);
+  for (size_t y = 0; y < from.ysize(); ++y) {
+    const FromType* const PIK_RESTRICT row_from = from.Row(y);
+    ToType* const PIK_RESTRICT row_to = to->Row(y);
+    for (size_t x = 0; x < from.xsize(); ++x) {
+      row_to[x] = static_cast<ToType>((row_from[x] - min_from) * scale);
+    }
+  }
+}
+
+// FromType and ToType are the pixel types.
+template <typename FromType, typename ToType>
+Image<ToType> StaticCastImage(const Image<FromType>& from) {
+  Image<ToType> to(from.xsize(), from.ysize());
+  for (size_t y = 0; y < from.ysize(); ++y) {
+    const FromType* const PIK_RESTRICT row_from = from.Row(y);
+    ToType* const PIK_RESTRICT row_to = to.Row(y);
+    for (size_t x = 0; x < from.xsize(); ++x) {
+      row_to[x] = static_cast<ToType>(row_from[x]);
+    }
+  }
+  return to;
+}
+
+
+template <typename T>
+Image<T> ImageFromPacked(const std::vector<T>& packed, const size_t xsize,
+                         const size_t ysize) {
+  Image<T> out(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    T* const PIK_RESTRICT row = out.Row(y);
+    const T* const PIK_RESTRICT packed_row = &packed[y * xsize];
+    memcpy(row, packed_row, xsize * sizeof(T));
+  }
+  return out;
+}
+
+// Computes independent minimum and maximum values for each plane.
+template <typename T>
+void Image3MinMax(const Image3<T>& image, const Rect& rect,
+                  std::array<T, 3>* out_min, std::array<T, 3>* out_max) {
+  for (int c = 0; c < 3; ++c) {
+    T min = std::numeric_limits<T>::max();
+    T max = std::numeric_limits<T>::min();
+    for (size_t y = 0; y < rect.ysize(); ++y) {
+      const T* PIK_RESTRICT row = rect.ConstPlaneRow(image, c, y);
+      for (size_t x = 0; x < rect.xsize(); ++x) {
+        min = std::min(min, row[x]);
+        max = std::max(max, row[x]);
+      }
+    }
+    (*out_min)[c] = min;
+    (*out_max)[c] = max;
+  }
+}
+
+// Computes independent minimum and maximum values for each plane.
+template <typename T>
+void Image3MinMax(const Image3<T>& image, std::array<T, 3>* out_min,
+                  std::array<T, 3>* out_max) {
+  Image3MinMax(image, Rect(image), out_min, out_max);
+}
+
+template <typename T>
+void Image3Max(const Image3<T>& image, std::array<T, 3>* out_max) {
+  for (int c = 0; c < 3; ++c) {
+    T max = std::numeric_limits<T>::min();
+    for (size_t y = 0; y < image.ysize(); ++y) {
+      const T* PIK_RESTRICT row = image.ConstPlaneRow(c, y);
+      for (size_t x = 0; x < image.xsize(); ++x) {
+        max = std::max(max, row[x]);
+      }
+    }
+    (*out_max)[c] = max;
+  }
+}
+
+// Computes the sum of the pixels in `rect`.
+template <typename T>
+T ImageSum(const Image<T>& image, const Rect& rect) {
+  T result = 0;
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    const T* PIK_RESTRICT row = rect.ConstRow(image, y);
+    for (size_t x = 0; x < rect.xsize(); ++x) {
+      result += row[x];
+    }
+  }
+  return result;
+}
+
+template <typename T>
+T ImageSum(const Image<T>& image) {
+  return ImageSum(image, Rect(image));
+}
+
+template <typename T>
+std::array<T, 3> Image3Sum(const Image3<T>& image, const Rect& rect) {
+  std::array<T, 3> out_sum = 0;
+  for (int c = 0; c < 3; ++c) {
+    (out_sum)[c] = ImageSum(image.Plane(c), rect);
+  }
+  return out_sum;
+}
+
+template <typename T>
+std::array<T, 3> Image3Sum(const Image3<T>& image) {
+  return Image3Sum(image, Rect(image));
+}
+
+template <typename T>
+std::vector<T> PackedFromImage(const Image<T>& image, const Rect& rect) {
+  const size_t xsize = rect.xsize();
+  const size_t ysize = rect.ysize();
+  std::vector<T> packed(xsize * ysize);
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    memcpy(&packed[y * xsize], rect.ConstRow(image, y), xsize * sizeof(T));
+  }
+  return packed;
+}
+
+template <typename T>
+std::vector<T> PackedFromImage(const Image<T>& image) {
+  return PackedFromImage(image, Rect(image));
+}
+
+// Computes the median pixel value.
+template <typename T>
+T ImageMedian(const Image<T>& image, const Rect& rect) {
+  std::vector<T> pixels = PackedFromImage(image, rect);
+  return Median(&pixels);
+}
+
+template <typename T>
+T ImageMedian(const Image<T>& image) {
+  return ImageMedian(image, Rect(image));
+}
+
+template <typename T>
+std::array<T, 3> Image3Median(const Image3<T>& image, const Rect& rect) {
+  std::array<T, 3> out_median;
+  for (int c = 0; c < 3; ++c) {
+    (out_median)[c] = ImageMedian(image.Plane(c), rect);
+  }
+  return out_median;
+}
+
+template <typename T>
+std::array<T, 3> Image3Median(const Image3<T>& image) {
+  return Image3Median(image, Rect(image));
+}
+
+template <typename FromType, typename ToType>
+void Image3Convert(const Image3<FromType>& from, const float to_range,
+                   Image3<ToType>* const PIK_RESTRICT to) {
+  PIK_ASSERT(SameSize(from, *to));
+  std::array<FromType, 3> min_from, max_from;
+  Image3MinMax(from, &min_from, &max_from);
+  float scales[3];
+  for (int c = 0; c < 3; ++c) {
+    scales[c] = to_range / (max_from[c] - min_from[c]);
+  }
+  float scale = std::min(scales[0], std::min(scales[1], scales[2]));
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < from.ysize(); ++y) {
+      const FromType* PIK_RESTRICT row_from = from.ConstPlaneRow(c, y);
+      ToType* PIK_RESTRICT row_to = to->PlaneRow(c, y);
+      for (size_t x = 0; x < from.xsize(); ++x) {
+        const float to = (row_from[x] - min_from[c]) * scale;
+        row_to[x] = static_cast<ToType>(to);
+      }
+    }
+  }
+}
+
+// FromType and ToType are the pixel types.
+template <typename ToType, typename FromType>
+Image3<ToType> StaticCastImage3(const Image3<FromType>& from) {
+  Image3<ToType> to(from.xsize(), from.ysize());
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < from.ysize(); ++y) {
+      const FromType* PIK_RESTRICT row_from = from.ConstPlaneRow(c, y);
+      ToType* PIK_RESTRICT row_to = to.PlaneRow(c, y);
+      for (size_t x = 0; x < from.xsize(); ++x) {
+        row_to[x] = static_cast<ToType>(row_from[x]);
+      }
+    }
+  }
+  return to;
+}
+
+template <typename Tin, typename Tout>
+void Subtract(const Image3<Tin>& image1, const Image3<Tin>& image2,
+              Image3<Tout>* out) {
+  const size_t xsize = image1.xsize();
+  const size_t ysize = image1.ysize();
+  PIK_CHECK(xsize == image2.xsize());
+  PIK_CHECK(ysize == image2.ysize());
+
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const Tin* const PIK_RESTRICT row1 = image1.ConstPlaneRow(c, y);
+      const Tin* const PIK_RESTRICT row2 = image2.ConstPlaneRow(c, y);
+      Tout* const PIK_RESTRICT row_out = out->PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] = row1[x] - row2[x];
+      }
+    }
+  }
+}
+
+template <typename Tin, typename Tout>
+void SubtractFrom(const Image3<Tin>& what, Image3<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const Tin* PIK_RESTRICT row_what = what.ConstPlaneRow(c, y);
+      Tout* PIK_RESTRICT row_to = to->PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_to[x] -= row_what[x];
+      }
+    }
+  }
+}
+
+template <typename Tin, typename Tout>
+void AddTo(const Image3<Tin>& what, Image3<Tout>* to) {
+  const size_t xsize = what.xsize();
+  const size_t ysize = what.ysize();
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const Tin* PIK_RESTRICT row_what = what.ConstPlaneRow(c, y);
+      Tout* PIK_RESTRICT row_to = to->PlaneRow(c, y);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_to[x] += row_what[x];
+      }
+    }
+  }
+}
+
+template <typename T>
+Image3<T> ScaleImage(const T lambda, const Image3<T>& image) {
+  Image3<T> out(image.xsize(), image.ysize());
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image.ysize(); ++y) {
+      const T* PIK_RESTRICT row = image.ConstPlaneRow(c, y);
+      T* PIK_RESTRICT row_out = out.PlaneRow(c, y);
+      for (size_t x = 0; x < image.xsize(); ++x) {
+        row_out[x] = lambda * row[x];
+      }
+    }
+  }
+  return out;
+}
+
+// Initializes all planes to the same "value".
+template <typename T>
+void FillImage(const T value, Image3<T>* image) {
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      T* PIK_RESTRICT row = image->PlaneRow(c, y);
+      for (size_t x = 0; x < image->xsize(); ++x) {
+        row[x] = value;
+      }
+    }
+  }
+}
+
+template <typename T>
+void ZeroFillImage(Image3<T>* image) {
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      T* PIK_RESTRICT row = image->PlaneRow(c, y);
+      memset(row, 0, image->xsize() * sizeof(T));
+    }
+  }
+}
+
+template <typename T>
+// TODO(firsching): add rect parameter to ZeroFillImage for consistency.
+void ZeroFillImage(Image<T>* image, Rect rect) {
+  for (size_t y = 0; y < rect.ysize(); ++y) {
+    T* PIK_RESTRICT row = rect.Row(image, y);
+    memset(row, 0, rect.xsize() * sizeof(T));
+  }
+}
+
+// Assigns generator(x, y, c) to each pixel (x, y).
+template <class Generator, typename T>
+void GenerateImage(const Generator& generator, Image3<T>* image) {
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < image->ysize(); ++y) {
+      T* PIK_RESTRICT row = image->PlaneRow(c, y);
+      for (size_t x = 0; x < image->xsize(); ++x) {
+        row[x] = generator(x, y, c);
+      }
+    }
+  }
+}
+
+template <template <typename> class Image, typename T>
+void RandomFillImage(Image3<T>* image,
+                     const T max = std::numeric_limits<T>::max()) {
+  std::mt19937_64 rng(129);
+  const GeneratorRandom<T, std::mt19937_64> generator(&rng, max);
+  GenerateImage(generator, image);
+}
+
+template <template <typename> class Image, typename T>
+void RandomFillImage(Image3<T>* image, const T min, const T max,
+                     const int seed) {
+  std::mt19937_64 rng(seed);
+  const GeneratorRandom<T, std::mt19937_64> generator(&rng, min, max);
+  GenerateImage(generator, image);
+}
+
+template <typename T>
+std::vector<T> InterleavedFromImage3(const Image3<T>& image3) {
+  const size_t xsize = image3.xsize();
+  const size_t ysize = image3.ysize();
+  std::vector<T> interleaved(xsize * ysize * 3);
+  for (size_t y = 0; y < ysize; ++y) {
+    const T* PIK_RESTRICT row0 = image3.ConstPlaneRow(0, y);
+    const T* PIK_RESTRICT row1 = image3.ConstPlaneRow(1, y);
+    const T* PIK_RESTRICT row2 = image3.ConstPlaneRow(2, y);
+    T* const PIK_RESTRICT row_interleaved = &interleaved[y * xsize * 3];
+    for (size_t x = 0; x < xsize; ++x) {
+      row_interleaved[3 * x + 0] = row0[x];
+      row_interleaved[3 * x + 1] = row1[x];
+      row_interleaved[3 * x + 2] = row2[x];
+    }
+  }
+  return interleaved;
+}
+
+template <typename T>
+Image3<T> Image3FromInterleaved(const T* const interleaved, const size_t xsize,
+                                const size_t ysize,
+                                const size_t bytes_per_row) {
+  PIK_ASSERT(bytes_per_row >= 3 * xsize * sizeof(T));
+  Image3<T> image3(xsize, ysize);
+  const uint8_t* bytes = reinterpret_cast<const uint8_t*>(interleaved);
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      T* PIK_RESTRICT row_out = image3.PlaneRow(c, y);
+      const T* row_interleaved =
+          reinterpret_cast<const T*>(bytes + y * bytes_per_row);
+      for (size_t x = 0; x < xsize; ++x) {
+        row_out[x] = row_interleaved[3 * x + c];
+      }
+    }
+  }
+  return image3;
+}
+
+// First, image is padded horizontally, with the rightmost value.
+// Next, image is padded vertically, by repeating the last line.
+Image3F PadImageToMultiple(const Image3F& in, const size_t N);
+
+}  // namespace pik
+
+#endif  // PIK_IMAGE_OPS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/image_test_utils.h b/codec/L2/demos/pikEnc/host/pik/image_test_utils.h
new file mode 100755
index 0000000000..55cd93611c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/image_test_utils.h
@@ -0,0 +1,39 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_IMAGE_TEST_UTILS_H_
+#define PIK_IMAGE_TEST_UTILS_H_
+
+#include <math.h>
+#include <stddef.h>
+
+#include "gtest/gtest.h"
+#include "pik/compiler_specific.h"
+#include "pik/image.h"
+
+namespace pik {
+
+template <typename T>
+void VerifyEqual(const Image<T>& expected, const Image<T>& actual) {
+  PIK_CHECK(SameSize(expected, actual));
+  for (size_t y = 0; y < expected.ysize(); ++y) {
+    const T* const PIK_RESTRICT row_expected = expected.Row(y);
+    const T* const PIK_RESTRICT row_actual = actual.Row(y);
+    for (size_t x = 0; x < expected.xsize(); ++x) {
+      ASSERT_EQ(row_expected[x], row_actual[x]) << x << " " << y;
+    }
+  }
+}
+
+template <typename T>
+void VerifyEqual(const Image3<T>& expected, const Image3<T>& actual) {
+  for (int c = 0; c < 3; ++c) {
+    VerifyEqual(expected.Plane(c), actual.Plane(c));
+  }
+}
+
+}  // namespace pik
+#endif  // PIK_IMAGE_TEST_UTILS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/lehmer_code.cc b/codec/L2/demos/pikEnc/host/pik/lehmer_code.cc
new file mode 100755
index 0000000000..395521e7bc
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/lehmer_code.cc
@@ -0,0 +1,69 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// TODO(veluca): Lehmer coding takes up to 5% of the decoding time on very
+// small images (32x32 pixels), and up to 1.5% for moderate-sized images.
+// However, since computing and reversing Lehmer coding can be seen as a
+// variation of the Fisher-Yates shuffle, we can reduce the time taken by this
+// step significantly by changing the implementation to a linear time one.
+
+#include "pik/lehmer_code.h"
+
+#include <vector>
+
+namespace pik {
+
+int FindIndexAndRemove(int val, int* s, int len) {
+  int idx = 0;
+  for (int i = 0; i < len; ++i) {
+    if (s[i] == val) {
+      s[i] = -1;
+      break;
+    } else if (s[i] != -1) {
+      ++idx;
+    }
+  }
+  return idx;
+}
+
+void ComputeLehmerCode(const int* sigma, const int len, int* code) {
+  std::vector<int> stdorder(len);
+  for (int i = 0; i < len; ++i) {
+    stdorder[i] = i;
+  }
+  for (int i = 0; i < len; ++i) {
+    code[i] = FindIndexAndRemove(sigma[i], &stdorder[0], len);
+  }
+}
+
+// Result is guaranteed to be one of s[0] .. s[len - 1]
+int FindValueAndRemove(int idx, int* s, int len) {
+  int pos = 0;
+  int val = 0;
+  for (int i = 0; i < len; ++i) {
+    if (s[i] == -1) continue;
+    if (pos == idx) {
+      val = s[i];
+      s[i] = -1;
+      break;
+    }
+    ++pos;
+  }
+  return val;
+}
+
+// sigma[0] .. sigma[len - 1] are guaranteed to be in range 0 .. (len - 1)
+void DecodeLehmerCode(const int* code, int len, int* sigma) {
+  std::vector<int> stdorder(len);
+  for (int i = 0; i < len; ++i) {
+    stdorder[i] = i;
+  }
+  for (int i = 0; i < len; ++i) {
+    sigma[i] = FindValueAndRemove(code[i], &stdorder[0], len);
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/lehmer_code.h b/codec/L2/demos/pikEnc/host/pik/lehmer_code.h
new file mode 100755
index 0000000000..4088df4f0b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/lehmer_code.h
@@ -0,0 +1,87 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_LEHMER_CODE_H_
+#define PIK_LEHMER_CODE_H_
+
+// Library to compute the Lehmer code of a permutation and to reconstruct the
+// permutation from its Lehmer code. For more details on Lehmer codes, see
+// http://en.wikipedia.org/wiki/Lehmer_code
+
+#include <cstring>
+#include <memory>
+#include <vector>
+
+namespace pik {
+
+// Computes the Lehmer code of the permutation sigma[0..len) and puts the
+// result into code[0..len).
+void ComputeLehmerCode(const int* sigma, int len, int* code);
+
+// Decodes the Lehmer code in code[0..len) and puts the resulting permutation
+// into sigma[0..len).
+void DecodeLehmerCode(const int* code, int len, int* sigma);
+
+// This class is an optimized Lehmer-like coder that takes the remaining
+// number of possible values into account to reduce the bit usage.
+class PermutationCoder {
+ public:
+  explicit PermutationCoder(int num_bits)
+      : nbits_(num_bits), num_values_(1 << nbits_), values_(num_values_) {
+    for (int i = 0; i < num_values_; ++i) values_[i] = i;
+  }
+  PermutationCoder(int num_bits, const unsigned char values[])
+      : nbits_(num_bits), num_values_(1 << nbits_), values_(num_values_) {
+    for (int i = 0; i < num_values_; ++i) values_[i] = values[i];
+  }
+  // number of bits needed to represent the next code.
+  int num_bits() const { return nbits_; }
+
+  // Removes (and return) the value coded by 'code'. Returns -1 in
+  // case of error (invalid slot).
+  int Remove(int code) {
+    if (code >= num_values_ || code < 0) {
+      return -1;
+    }
+    const int value = values_[code];
+    DoRemove(code);
+    return value;
+  }
+
+  // Removes 'value' from the list and assign a code + number-of-bits
+  // for it. Returns false if value is not codable.
+  bool RemoveValue(int value, int* code, int* nbits) {
+    for (int i = 0; i < num_values_; ++i) {
+      if (values_[i] == value) {
+        *code = i;
+        *nbits = nbits_;
+        DoRemove(i);
+        return true;
+      }
+    }
+    return false;  // invalid/non-existing value was passed.
+  }
+
+ private:
+  void DoRemove(int pos) {
+    --num_values_;
+    if (pos < num_values_) {
+      memmove(&values_[pos], &values_[pos + 1],
+              (num_values_ - pos) * sizeof(values_[0]));
+    }
+    if (((1 << nbits_) >> 1) >= num_values_) {
+      --nbits_;
+    }
+  }
+
+  int nbits_;
+  int num_values_;
+  std::vector<unsigned char> values_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_LEHMER_CODE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/linalg.cc b/codec/L2/demos/pikEnc/host/pik/linalg.cc
new file mode 100755
index 0000000000..7610f52354
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/linalg.cc
@@ -0,0 +1,234 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/linalg.h"
+
+#include <string.h>
+#include <climits>
+#include <queue>
+#include <utility>
+#include <vector>
+
+#include "pik/common.h"
+#include "pik/status.h"
+
+namespace pik {
+
+void AssertSymmetric(const ImageD& A) {
+#if PIK_ENABLE_ASSERT
+  PIK_ASSERT(A.xsize() == A.ysize());
+  for (size_t i = 0; i < A.xsize(); ++i) {
+    for (size_t j = i + 1; j < A.xsize(); ++j) {
+      PIK_ASSERT(std::abs(A.Row(i)[j] - A.Row(j)[i]) < 1e-15);
+    }
+  }
+#endif
+}
+
+void Diagonalize2x2(const double a0, const double a1, const double b, double* c,
+                    double* s) {
+  if (std::abs(b) < 1e-15) {
+    *c = 1.0;
+    *s = 0.0;
+    return;
+  }
+  double phi = std::atan2(2 * b, a1 - a0);
+  double theta = b > 0.0 ? 0.5 * phi : 0.5 * phi + Pi(1.0);
+  *c = std::cos(theta);
+  *s = std::sin(theta);
+}
+
+void GivensRotation(const double x, const double y, double* c, double* s) {
+  if (y == 0.0) {
+    *c = x < 0.0 ? -1.0 : 1.0;
+    *s = 0.0;
+  } else {
+    const double h = std::hypot(x, y);
+    const double d = 1.0 / h;
+    *c = x * d;
+    *s = -y * d;
+  }
+}
+
+void RotateMatrixCols(ImageD* const PIK_RESTRICT U, int i, int j, double c,
+                      double s) {
+  PIK_ASSERT(U->xsize() == U->ysize());
+  const size_t N = U->xsize();
+  double* const PIK_RESTRICT u_i = U->Row(i);
+  double* const PIK_RESTRICT u_j = U->Row(j);
+  std::vector<double> rot_i, rot_j;
+  rot_i.reserve(N);
+  rot_j.reserve(N);
+  for (size_t k = 0; k < N; ++k) {
+    rot_i.push_back(u_i[k] * c - u_j[k] * s);
+    rot_j.push_back(u_i[k] * s + u_j[k] * c);
+  }
+  for (size_t k = 0; k < N; ++k) {
+    u_i[k] = rot_i[k];
+    u_j[k] = rot_j[k];
+  }
+}
+void HouseholderReflector(const size_t N, const double* x, double* u) {
+  const double sigma = x[0] <= 0.0 ? 1.0 : -1.0;
+  u[0] = x[0] - sigma * std::sqrt(DotProduct(N, x, x));
+  for (size_t k = 1; k < N; ++k) {
+    u[k] = x[k];
+  }
+  double u_norm = 1.0 / std::sqrt(DotProduct(N, u, u));
+  for (size_t k = 0; k < N; ++k) {
+    u[k] *= u_norm;
+  }
+}
+
+void ConvertToTridiagonal(const ImageD& A, ImageD* const PIK_RESTRICT T,
+                          ImageD* const PIK_RESTRICT U) {
+  AssertSymmetric(A);
+  const size_t N = A.xsize();
+  *U = Identity<double>(A.xsize());
+  *T = CopyImage(A);
+  std::vector<ImageD> u_stack;
+  for (size_t k = 0; k + 2 < N; ++k) {
+    if (DotProduct(N - k - 2, &T->Row(k)[k + 2], &T->Row(k)[k + 2]) > 1e-15) {
+      ImageD u(N, 1);
+      ZeroFillImage(&u);
+      HouseholderReflector(N - k - 1, &T->Row(k)[k + 1], &u.Row(0)[k + 1]);
+      ImageD v = MatMul(*T, u);
+      double scale = DotProduct(u, v);
+      v = LinComb(2.0, v, -2.0 * scale, u);
+      SubtractFrom(MatMul(u, Transpose(v)), T);
+      SubtractFrom(MatMul(v, Transpose(u)), T);
+      u_stack.emplace_back(std::move(u));
+    }
+  }
+  while (!u_stack.empty()) {
+    const ImageD& u = u_stack.back();
+    ImageD v = MatMul(Transpose(*U), u);
+    SubtractFrom(ScaleImage(2.0, MatMul(u, Transpose(v))), U);
+    u_stack.pop_back();
+  }
+}
+
+double WilkinsonShift(const double a0, const double a1, const double b) {
+  const double d = 0.5 * (a0 - a1);
+  if (d == 0.0) {
+    return a1 - std::abs(b);
+  }
+  const double sign_d = d > 0.0 ? 1.0 : -1.0;
+  return a1 - b * b / (d + sign_d * std::hypot(d, b));
+}
+
+void ImplicitQRStep(ImageD* const PIK_RESTRICT U, double* const PIK_RESTRICT a,
+                    double* const PIK_RESTRICT b, int m0, int m1) {
+  PIK_ASSERT(m1 - m0 > 2);
+  double x = a[m0] - WilkinsonShift(a[m1 - 2], a[m1 - 1], b[m1 - 1]);
+  double y = b[m0 + 1];
+  for (int k = m0; k < m1 - 1; ++k) {
+    double c, s;
+    GivensRotation(x, y, &c, &s);
+    const double w = c * x - s * y;
+    const double d = a[k] - a[k + 1];
+    const double z = (2 * c * b[k + 1] + d * s) * s;
+    a[k] -= z;
+    a[k + 1] += z;
+    b[k + 1] = d * c * s + (c * c - s * s) * b[k + 1];
+    x = b[k + 1];
+    if (k > m0) {
+      b[k] = w;
+    }
+    if (k < m1 - 2) {
+      y = -s * b[k + 2];
+      b[k + 2] *= c;
+    }
+    RotateMatrixCols(U, k, k + 1, c, s);
+  }
+}
+
+void ScanInterval(const double* const PIK_RESTRICT a,
+                  const double* const PIK_RESTRICT b, int istart,
+                  const int iend, const double eps,
+                  std::deque<std::pair<int, int> >* intervals) {
+  for (int k = istart; k < iend; ++k) {
+    if ((k + 1 == iend) ||
+        std::abs(b[k + 1]) < eps * (std::abs(a[k]) + std::abs(a[k + 1]))) {
+      if (k > istart) {
+        intervals->push_back(std::make_pair(istart, k + 1));
+      }
+      istart = k + 1;
+    }
+  }
+}
+
+void ConvertToDiagonal(const ImageD& A, ImageD* const PIK_RESTRICT diag,
+                       ImageD* const PIK_RESTRICT U) {
+  AssertSymmetric(A);
+  const size_t N = A.xsize();
+  ImageD T;
+  ConvertToTridiagonal(A, &T, U);
+  // From now on, the algorithm keeps the transformed matrix tri-diagonal,
+  // so we only need to keep track of the diagonal and the off-diagonal entries.
+  std::vector<double> a(N);
+  std::vector<double> b(N);
+  for (size_t k = 0; k < N; ++k) {
+    a[k] = T.Row(k)[k];
+    if (k > 0) b[k] = T.Row(k)[k - 1];
+  }
+  // Run the symmetric tri-diagonal QR algorithm with implicit Wilkinson shift.
+  const double kEpsilon = 1e-14;
+  std::deque<std::pair<int, int> > intervals;
+  ScanInterval(&a[0], &b[0], 0, N, kEpsilon, &intervals);
+  while (!intervals.empty()) {
+    const int istart = intervals[0].first;
+    const int iend = intervals[0].second;
+    intervals.pop_front();
+    if (iend == istart + 2) {
+      double& a0 = a[istart];
+      double& a1 = a[istart + 1];
+      double& b1 = b[istart + 1];
+      double c, s;
+      Diagonalize2x2(a0, a1, b1, &c, &s);
+      const double d = a0 - a1;
+      const double z = (2 * c * b1 + d * s) * s;
+      a0 -= z;
+      a1 += z;
+      b1 = 0.0;
+      RotateMatrixCols(U, istart, istart + 1, c, s);
+    } else {
+      ImplicitQRStep(U, &a[0], &b[0], istart, iend);
+      ScanInterval(&a[0], &b[0], istart, iend, kEpsilon, &intervals);
+    }
+  }
+  *diag = ImageD(N, 1);
+  double* const PIK_RESTRICT diag_row = diag->Row(0);
+  for (size_t k = 0; k < N; ++k) {
+    diag_row[k] = a[k];
+  }
+}
+
+void ComputeQRFactorization(const ImageD& A, ImageD* const PIK_RESTRICT Q,
+                            ImageD* const PIK_RESTRICT R) {
+  PIK_ASSERT(A.xsize() == A.ysize());
+  const size_t N = A.xsize();
+  *Q = Identity<double>(N);
+  *R = CopyImage(A);
+  std::vector<ImageD> u_stack;
+  for (size_t k = 0; k + 1 < N; ++k) {
+    if (DotProduct(N - k - 1, &R->Row(k)[k + 1], &R->Row(k)[k + 1]) > 1e-15) {
+      ImageD u(N, 1);
+      FillImage(0.0, &u);
+      HouseholderReflector(N - k, &R->Row(k)[k], &u.Row(0)[k]);
+      ImageD v = MatMul(Transpose(u), *R);
+      SubtractFrom(ScaleImage(2.0, MatMul(u, v)), R);
+      u_stack.emplace_back(std::move(u));
+    }
+  }
+  while (!u_stack.empty()) {
+    const ImageD& u = u_stack.back();
+    ImageD v = MatMul(Transpose(u), *Q);
+    SubtractFrom(ScaleImage(2.0, MatMul(u, v)), Q);
+    u_stack.pop_back();
+  }
+}
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/linalg.h b/codec/L2/demos/pikEnc/host/pik/linalg.h
new file mode 100755
index 0000000000..e23b940efd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/linalg.h
@@ -0,0 +1,264 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_LINALG_H_
+#define PIK_LINALG_H_
+
+// Linear algebra.
+
+#include <stddef.h>
+#include <cmath>
+#include <vector>
+
+#include "pik/image.h"
+#include "pik/image_ops.h"
+#include "pik/status.h"
+
+namespace pik {
+
+using ImageD = Image<double>;
+
+inline double DotProduct(const size_t N, const double* const PIK_RESTRICT a,
+                         const double* const PIK_RESTRICT b) {
+  double sum = 0.0;
+  for (int k = 0; k < N; ++k) {
+    sum += a[k] * b[k];
+  }
+  return sum;
+}
+
+inline double DotProduct(const ImageD& a, const ImageD& b) {
+  PIK_ASSERT(a.ysize() == 1);
+  PIK_ASSERT(b.ysize() == 1);
+  PIK_ASSERT(a.xsize() == b.xsize());
+  const double* const PIK_RESTRICT row_a = a.Row(0);
+  const double* const PIK_RESTRICT row_b = b.Row(0);
+  return DotProduct(a.xsize(), row_a, row_b);
+}
+
+inline ImageD Transpose(const ImageD& A) {
+  ImageD out(A.ysize(), A.xsize());
+  for (size_t x = 0; x < A.xsize(); ++x) {
+    double* const PIK_RESTRICT row_out = out.Row(x);
+    for (size_t y = 0; y < A.ysize(); ++y) {
+      row_out[y] = A.Row(y)[x];
+    }
+  }
+  return out;
+}
+
+template <typename Tout, typename Tin1, typename Tin2>
+Image<Tout> MatMul(const Image<Tin1>& A, const Image<Tin2>& B) {
+  PIK_ASSERT(A.ysize() == B.xsize());
+  Image<Tout> out(A.xsize(), B.ysize());
+  for (size_t y = 0; y < B.ysize(); ++y) {
+    const Tin2* const PIK_RESTRICT row_b = B.Row(y);
+    Tout* const PIK_RESTRICT row_out = out.Row(y);
+    for (size_t x = 0; x < A.xsize(); ++x) {
+      row_out[x] = 0.0;
+      for (size_t k = 0; k < B.xsize(); ++k) {
+        row_out[x] += A.Row(k)[x] * row_b[k];
+      }
+    }
+  }
+  return out;
+}
+
+template <typename T1, typename T2>
+ImageD MatMul(const Image<T1>& A, const Image<T2>& B) {
+  return MatMul<double, T1, T2>(A, B);
+}
+
+template <typename T1, typename T2>
+ImageI MatMulI(const Image<T1>& A, const Image<T2>& B) {
+  return MatMul<int, T1, T2>(A, B);
+}
+
+// Computes A = B * C, with sizes rows*cols: A=ha*wa, B=wa*wb, C=ha*wb
+template <typename T>
+void MatMul(const T* a, const T* b, int ha, int wa, int wb, T* c) {
+  std::vector<T> temp(wa);  // Make better use of cache lines
+  for (int x = 0; x < wb; x++) {
+    for (int z = 0; z < wa; z++) {
+      temp[z] = b[z * wb + x];
+    }
+    for (int y = 0; y < ha; y++) {
+      double e = 0;
+      for (int z = 0; z < wa; z++) {
+        e += a[y * wa + z] * temp[z];
+      }
+      c[y * wb + x] = e;
+    }
+  }
+}
+
+// Computes C = A + factor * B
+template <typename T, typename F>
+void MatAdd(const T* a, const T* b, F factor, int h, int w, T* c) {
+  for (int i = 0; i < w * h; i++) {
+    c[i] = a[i] + b[i] * factor;
+  }
+}
+
+template <typename T>
+inline Image<T> Identity(const size_t N) {
+  Image<T> out(N, N);
+  for (size_t i = 0; i < N; ++i) {
+    T* PIK_RESTRICT row = out.Row(i);
+    std::fill(row, row + N, 0);
+    row[i] = static_cast<T>(1.0);
+  }
+  return out;
+}
+
+inline ImageD Diagonal(const ImageD& d) {
+  PIK_ASSERT(d.ysize() == 1);
+  ImageD out(d.xsize(), d.xsize());
+  const double* PIK_RESTRICT row_diag = d.Row(0);
+  for (size_t k = 0; k < d.xsize(); ++k) {
+    double* PIK_RESTRICT row_out = out.Row(k);
+    std::fill(row_out, row_out + d.xsize(), 0.0);
+    row_out[k] = row_diag[k];
+  }
+  return out;
+}
+
+// Computes c, s such that c^2 + s^2 = 1 and
+//   [c -s] [x] = [ * ]
+//   [s  c] [y]   [ 0 ]
+void GivensRotation(const double x, const double y, double* c, double* s);
+
+// U = U * Givens(i, j, c, s)
+void RotateMatrixCols(ImageD* const PIK_RESTRICT U, int i, int j, double c,
+                      double s);
+
+// A is symmetric, U is orthogonal, T is tri-diagonal and
+// A = U * T * Transpose(U).
+void ConvertToTridiagonal(const ImageD& A, ImageD* const PIK_RESTRICT T,
+                          ImageD* const PIK_RESTRICT U);
+
+// A is symmetric, U is orthogonal, and A = U * Diagonal(diag) * Transpose(U).
+void ConvertToDiagonal(const ImageD& A, ImageD* const PIK_RESTRICT diag,
+                       ImageD* const PIK_RESTRICT U);
+
+// A is square matrix, Q is orthogonal, R is upper triangular and A = Q * R;
+void ComputeQRFactorization(const ImageD& A, ImageD* const PIK_RESTRICT Q,
+                            ImageD* const PIK_RESTRICT R);
+
+// Inverts a 3x3 matrix in place
+template <typename T>
+void Inv3x3Matrix(T* matrix) {
+  T temp[9];
+  temp[0] = matrix[4] * matrix[8] - matrix[5] * matrix[7];
+  temp[1] = matrix[2] * matrix[7] - matrix[1] * matrix[8];
+  temp[2] = matrix[1] * matrix[5] - matrix[2] * matrix[4];
+  temp[3] = matrix[5] * matrix[6] - matrix[3] * matrix[8];
+  temp[4] = matrix[0] * matrix[8] - matrix[2] * matrix[6];
+  temp[5] = matrix[2] * matrix[3] - matrix[0] * matrix[5];
+  temp[6] = matrix[3] * matrix[7] - matrix[4] * matrix[6];
+  temp[7] = matrix[1] * matrix[6] - matrix[0] * matrix[7];
+  temp[8] = matrix[0] * matrix[4] - matrix[1] * matrix[3];
+  T idet =
+      1.0 / (matrix[0] * temp[0] + matrix[1] * temp[3] + matrix[2] * temp[6]);
+  for (int i = 0; i < 9; i++) {
+    matrix[i] = temp[i] * idet;
+  }
+}
+
+// Solves system of linear equations A * X = B using the conjugate gradient
+// method. Matrix a must be a n*n, symmetric and positive definite.
+// Vectors b and x must have n elements
+template <typename T>
+void ConjugateGradient(const T* a, int n, const T* b, T* x) {
+  std::vector<T> r(n);
+  MatMul(a, x, n, n, 1, r.data());
+  MatAdd(b, r.data(), -1, n, 1, r.data());
+  std::vector<T> p = r;
+  T rr;
+  MatMul(r.data(), r.data(), 1, n, 1, &rr);  // inner product
+
+  if (rr == 0) return;  // The initial values were already optimal
+
+  for (int i = 0; i < n; i++) {
+    std::vector<T> ap(n);
+    MatMul(a, p.data(), n, n, 1, ap.data());
+    T alpha;
+    MatMul(r.data(), ap.data(), 1, n, 1, &alpha);
+    // Normally alpha couldn't be zero here but if numerical issues caused it,
+    // return assuming the solution is close.
+    if (alpha == 0) return;
+    alpha = rr / alpha;
+    MatAdd(x, p.data(), alpha, n, 1, x);
+    MatAdd(r.data(), ap.data(), -alpha, n, 1, r.data());
+
+    T rr2;
+    MatMul(r.data(), r.data(), 1, n, 1, &rr2);  // inner product
+    if (rr2 < 1e-20) break;
+
+    T beta = rr2 / rr;
+    MatAdd(r.data(), p.data(), beta, 1, n, p.data());
+    rr = rr2;
+  }
+}
+
+// Computes optimal coefficients r to approximate points p with linear
+// combination of functions f. The matrix f has h rows and w columns, r has h
+// values, p has w values. h is the amount of functions, w the amount of points.
+// Uses the finite element method and minimizes mean square error.
+template <typename T>
+void FEM(const T* f, int h, int w, const T* p, T* r) {
+  // Compute "Gramian" matrix G = F * F^T
+  // Speed up multiplication by using non-zero intervals in sparse F.
+  std::vector<int> start(h);
+  std::vector<int> end(h);
+  for (int y = 0; y < h; y++) {
+    start[y] = end[y] = 0;
+    for (int x = 0; x < w; x++) {
+      if (f[y * w + x] != 0) {
+        start[y] = x;
+        break;
+      }
+    }
+    for (int x = w - 1; x >= 0; x--) {
+      if (f[y * w + x] != 0) {
+        end[y] = x + 1;
+        break;
+      }
+    }
+  }
+
+  std::vector<T> g(h * h);
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x <= y; x++) {
+      T v = 0;
+      // Intersection of the two sparse intervals.
+      int s = std::max(start[x], start[y]);
+      int e = std::min(end[x], end[y]);
+      for (int z = s; z < e; z++) {
+        v += f[x * w + z] * f[y * w + z];
+      }
+      // Symmetric, so two values output at once
+      g[y * h + x] = v;
+      g[x * h + y] = v;
+    }
+  }
+
+  // B vector: sum of each column of F multiplied by corresponding p
+  std::vector<T> b(h, 0);
+  for (int y = 0; y < h; y++) {
+    T v = 0;
+    for (int x = 0; x < w; x++) {
+      v += f[y * w + x] * p[x];
+    }
+    b[y] = v;
+  }
+
+  ConjugateGradient(g.data(), h, b.data(), r);
+}
+
+}  // namespace pik
+
+#endif  // PIK_LINALG_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/lossless16.cc b/codec/L2/demos/pikEnc/host/pik/lossless16.cc
new file mode 100755
index 0000000000..aa46ae8df5
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/lossless16.cc
@@ -0,0 +1,1296 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// @author Alexander Rhatushnyak
+
+#include "pik/lossless16.h"
+
+#include <cmath>
+
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include "pik/lossless_entropy.h"
+
+namespace pik {
+
+namespace {
+
+static const int kGroupSize = 512,
+                 kGroupSize2plus = kGroupSize * kGroupSize * 9 / 8,
+    kWithSign1 = 4, kBitsMax1 = 13, kNumContexts1 = 1 + kWithSign1 + kBitsMax1,
+    kWithSign3 = 3, kBitsMax3 = 13, kNumContexts3 = 1 + kWithSign3 + kBitsMax3,
+    kMaxError = 0x3fbf,  kMaxSumErrors = (kMaxError + 1) * 4,  kNumRuns = 1;
+
+// TODO(lode): split state variables needed for encoder from those for decoder
+//             and run const init just once!  ~65536*3 'const' values in State()
+struct State {
+  int prediction0, prediction1, prediction2, prediction3,
+      width, WithSIGN, BitsMAX, NumCONTEXTS;
+  uint16_t *PIK_RESTRICT rowImg, *PIK_RESTRICT rowPrev;
+
+  uint16_t edata[kNumContexts1 > kNumContexts3 ? kNumContexts1 : kNumContexts3]
+                [kGroupSize * kGroupSize];
+  uint8_t compressedDataTmpBuf[kGroupSize2plus], *compressedData;
+  int32_t errors0[kGroupSize * 2];  // Errors of predictor 0
+  int32_t errors1[kGroupSize * 2];  // Errors of predictor 1
+  int32_t errors2[kGroupSize * 2];  // Errors of predictor 2
+  int32_t errors3[kGroupSize * 2];  // Errors of predictor 3
+  uint8_t nbitErr[kGroupSize * 2];
+  int32_t trueErr[kGroupSize * 2];
+
+  uint16_t error2weight[kMaxSumErrors],            // const
+           sign_LSB_forward_transform[0x10000],    // const
+           sign_LSB_backward_transform[0x10000];   // const
+  uint8_t numBitsTable[256];                       // const
+
+  State() {
+    for (int i = 0; i < 256; ++i)
+      numBitsTable[i] = numbitsInit(i);  // const init!
+
+    error2weight[0] = 0xffff;
+    for (int j = 1; j < kMaxSumErrors; ++j)
+      error2weight[j] = 181 * 256 / j;   // const init!
+
+    // For compress
+    for (int i = 0; i < 256 * 256; ++i)
+      sign_LSB_forward_transform[i] =
+          (i & 32768 ? (0xffff - i) * 2 + 1 : i * 2);  // const init!
+
+    // For decompress
+    for (int i = 0; i < 256 * 256; ++i)
+      sign_LSB_backward_transform[i] =
+          (i & 1 ? 0xffff - (i >> 1) : i >> 1);  // const init!
+
+    // Prevent uninitialized values in case of invalid compressed data
+    memset(edata, 0, sizeof(edata));
+  }
+
+  PIK_INLINE int numbitsInit(int x) {
+    assert(0 <= x && x <= 255);
+    int res = 0;
+    if (x >= 16) res = 4, x >>= 4;
+    if (x >= 4) res += 2, x >>= 2;
+    return (res + std::min(x, 2));
+  }
+
+  PIK_INLINE int numBits(int x) {
+    assert(0 <= x && x <= 0xffff);
+    if (x < 256) return numBitsTable[x];
+    return std::min(8 + numBitsTable[x >> 8], BitsMAX);
+  }
+
+  PIK_INLINE int predict1y0(size_t x, size_t yp, size_t yp1, int& maxErr) {
+    maxErr = (x == 0 ? NumCONTEXTS - 1
+                     : x == 1 ? nbitErr[yp - 1]
+                              : std::max(nbitErr[yp - 1], nbitErr[yp - 2]));
+    prediction0 = prediction1 = prediction2 = prediction3 =
+        (x == 0 ? 14 * 256  // 14
+                : x == 1 ? rowImg[x - 1]
+                         : rowImg[x - 1] + (rowImg[x - 1] - rowImg[x - 2]) / 4);
+    return (prediction0 < 0 ? 0 : prediction0 > 0xffff ? 0xffff : prediction0);
+  }
+
+  PIK_INLINE int predict1x0(size_t x, size_t yp, size_t yp1, int& maxErr) {
+    maxErr = std::max(nbitErr[yp1], nbitErr[yp1 + (x < width ? 1 : 0)]);
+    prediction0 = prediction2 = prediction3 = rowPrev[x];
+    prediction1 = (rowPrev[x] * 3 + rowPrev[x + (x < width ? 1 : 0)] + 2) >> 2;
+    return prediction1;
+  }
+
+  PIK_INLINE int predict1(size_t x, size_t yp, size_t yp1, int& maxErr) {
+    if (!rowPrev) return predict1y0(x, yp, yp1, maxErr);
+    if (x == 0LL) return predict1x0(x, yp, yp1, maxErr);
+    int weight0 = errors0[yp - 1] + errors0[yp1] + errors0[yp1 - 1];
+    int weight1 = errors1[yp - 1] + errors1[yp1] + errors1[yp1 - 1];
+    int weight2 = errors2[yp - 1] + errors2[yp1] + errors2[yp1 - 1];
+    int weight3 = errors3[yp - 1] + errors3[yp1] + errors3[yp1 - 1];
+    uint8_t mxe = nbitErr[yp - 1];
+    mxe = std::max(mxe, nbitErr[yp1]);
+    mxe = std::max(mxe, nbitErr[yp1 - 1]);
+    int N = rowPrev[x], W = rowImg[x - 1],
+        NE = N;  // NW = rowPrev[x - 1] unused!
+    if (x < width) {
+      mxe = std::max(mxe, nbitErr[yp1 + 1]), NE = rowPrev[x + 1];
+      weight0 += errors0[yp1 + 1];
+      weight1 += errors1[yp1 + 1];
+      weight2 += errors2[yp1 + 1];
+      weight3 += errors3[yp1 + 1];
+    }
+
+    weight0 = error2weight[weight0] + 1;
+    weight1 = error2weight[weight1] + 1;
+    weight2 = error2weight[weight2];
+    weight3 = error2weight[weight3];
+
+    int teW = trueErr[yp - 1];  // range: -0xffff...0xffff
+    int teN = trueErr[yp1];
+    int teNW = trueErr[yp1 - 1];
+    int sumWN = teN + teW;  // range: -0x1fffe...0x1fffe
+    int teNE = (x < width ? trueErr[yp1 + 1] : 0);
+
+    prediction0 = N - sumWN * 3 / 4;                          // 24/32
+    prediction1 = W - (sumWN + teNW) * 11 / 32;               // 11/32
+    prediction2 = W + (((NE - N) * 13 + 7) >> 4);             // 26/32
+    prediction3 = N - (((teN + teNW + teNE) * 7 + 29) >> 5);  //  7/32
+    int sumWeights = weight0 + weight1 + weight2 + weight3;
+    int64_t s = sumWeights * 3 / 8;
+    s += ((int64_t)prediction0) * weight0;
+    s += ((int64_t)prediction1) * weight1;
+    s += ((int64_t)prediction2) * weight2;
+    s += ((int64_t)prediction3) * weight3;
+    int prediction = s / sumWeights;
+
+    if (mxe && mxe <= WithSIGN * 2) {
+      if (teW * 3 + teN * 2 + teNW + teNE < 0) --mxe;  // 3 2 1 1
+    }
+    maxErr = mxe;
+
+    int mx = std::max(N - 28, std::max(W, NE));  // 28
+    int mn = std::min(N + 28, std::min(W, NE));  // 28
+    prediction = std::max(mn, std::min(mx, prediction));
+    return prediction;
+  }
+
+  bool IsRLE(const uint8_t* data, size_t size) {
+    if (size < 4) return false;
+    uint8_t first = data[0];
+    for (size_t i = 1; i < size; i++) {
+      if (data[i] != first) return false;
+    }
+    return true;
+  }
+
+  // TODO(lode): move this to lossless_entropy.cc
+  bool compressWithEntropyCode(size_t* pos, size_t S, uint8_t* compressedBuf) {
+    if (S == 0) {
+      *pos += encodeVarInt(0, &compressedBuf[*pos]);
+      return true;
+    }
+    uint8_t* src = &compressedBuf[*pos + 8];
+    size_t cs;
+    if (IsRLE(src, S)) {
+      cs = 1;  // use RLE encoding instead
+    } else {
+      if (!MaybeEntropyEncode(src, S, sizeof(compressedDataTmpBuf),
+                              &compressedDataTmpBuf[0], &cs)) {
+        return PIK_FAILURE("lossless16 entropy encode");
+      }
+    }
+    if (cs >= S) cs = 0;  // EntropyCode worse than original, use memcpy.
+    *pos += encodeVarInt(cs <= 1 ? (S - 1) * 3 + 1 + cs : cs * 3,
+                         &compressedBuf[*pos]);
+    uint8_t* dst = &compressedBuf[*pos];
+    if (cs == 1)
+      compressedBuf[(*pos)++] = *src;
+    else if (cs == 0)
+      memmove(dst, src, S), *pos += S;
+    else
+      memcpy(dst, &compressedDataTmpBuf[0], cs), *pos += cs;
+    return true;
+  }
+
+  // TODO(lode): move this to lossless_entropy.cc
+  // ds = decompressed size output
+  bool decompressWithEntropyCode(uint8_t* dst, size_t dst_capacity,
+                                 const uint8_t* src, size_t src_capacity,
+                                 size_t* ds, size_t* pos) {
+    size_t cs = decodeVarInt(src, src_capacity, pos);
+    if (cs == 0) {
+      *ds = 0;
+      return true;
+    }
+    size_t mode = cs % 3;
+    cs /= 3;
+    if (mode == 2) {
+      if (*pos >= src_capacity) return PIK_FAILURE("entropy decode failed");
+      if (cs + 1 > dst_capacity) return PIK_FAILURE("entropy decode failed");
+      memset(dst, src[(*pos)++], ++cs);
+      *ds = cs;
+    } else if (mode == 1) {
+      if (*pos + cs + 1 > src_capacity)
+        return PIK_FAILURE("entropy decode failed");
+      if (cs + 1 > dst_capacity) return PIK_FAILURE("entropy decode failed");
+      memcpy(dst, &src[*pos], ++cs);
+      *pos += cs;
+      *ds = cs;
+    } else {
+      if (*pos + cs > src_capacity) return PIK_FAILURE("entropy decode failed");
+      if (!MaybeEntropyDecode(&src[*pos], cs, dst_capacity, dst, ds)) {
+        return PIK_FAILURE("entropy decode failed");
+      }
+      *pos += cs;
+    }
+
+    return true;
+  }
+
+#define Update_Errors_0_1_2_3                                  \
+  err = prediction0 - truePixelValue;                          \
+  if (err < 0) err = -err; /* abs() and min()? worse speed! */ \
+  if (err > kMaxError) err = kMaxError;                        \
+  errors0[yp + x] = err;                                       \
+  err = prediction1 - truePixelValue;                          \
+  if (err < 0) err = -err;                                     \
+  if (err > kMaxError) err = kMaxError;                        \
+  errors1[yp + x] = err;                                       \
+  err = prediction2 - truePixelValue;                          \
+  if (err < 0) err = -err;                                     \
+  if (err > kMaxError) err = kMaxError;                        \
+  errors2[yp + x] = err;                                       \
+  err = prediction3 - truePixelValue;                          \
+  if (err < 0) err = -err;                                     \
+  if (err > kMaxError) err = kMaxError;                        \
+  errors3[yp + x] = err;
+
+#define Update_Size_And_Errors                                    \
+  ++esize[maxErr];                                                \
+  trueErr[yp + x] = err;                                          \
+  err = numBits(err >= 0 ? err : -err);                           \
+  nbitErr[yp + x] = (err <= WithSIGN ? err * 2 : err + WithSIGN); \
+  Update_Errors_0_1_2_3
+
+  const uint16_t smt0[64] = {
+    0x2415, 0x1d7d, 0x1f71, 0x46fe, 0x24f1, 0x3f15, 0x4a65, 0x6236,
+    0x242c, 0x34ce, 0x4872, 0x5cf6, 0x4857, 0x64fe, 0x6745, 0x7986,
+    0x24ad, 0x343c, 0x499a, 0x5fb5, 0x49a9, 0x61e8, 0x6e1f, 0x78ae,
+    0x4ba3, 0x6332, 0x6c8b, 0x7ccd, 0x6819, 0x8247, 0x83f2, 0x8cce,
+    0x247e, 0x3277, 0x391f, 0x5ea3, 0x4694, 0x5168, 0x67e3, 0x784b,
+    0x474b, 0x5072, 0x666b, 0x6cb3, 0x6514, 0x7ba6, 0x83e4, 0x8cef,
+    0x48bf, 0x6363, 0x6677, 0x7b76, 0x67f9, 0x7e0d, 0x826f, 0x8a52,
+    0x659f, 0x7d6f, 0x7f8e, 0x8f66, 0x7ed6, 0x9169, 0x9269, 0x90e4,
+  };
+
+  uint8_t* Palette_compress(int numChannels, uint32_t *numColors, uint8_t *pb,
+                     std::vector<uint32_t> &palette123, uint32_t *firstColors) {
+    for (int channel = 0; channel < numChannels; ++channel) {
+      uint32_t *palette = &palette123[0x10000 * channel];
+      uint8_t *pb0 = pb;
+      pb += 2;  // reserve 2 bytes for  (Compressed Size)*2 + Method
+      palette[firstColors[channel]] = 1;
+      uint32_t nc = numColors[channel], x1 = 0, x2 = 0xffffffff;
+      int x, smt[64], context6 = 0, sumv = 0;
+      for (int i = 0; i < 64; ++i)  smt[i] = smt0[i] << 11;  // 1<<(15+11);
+      for (x = 0; x < 0x10000; ++x) {
+        int v = (palette[x] ? 1 : 0);
+        uint32_t pr = smt[context6] >> 11;
+        uint32_t xmid = x1 + ((x2-x1) >> 16)*pr + (((x2-x1) & 0xffff)*pr >> 16);
+        assert(pr>=0 && pr<=0xffff && xmid>=x1 && xmid<x2);
+        if (v) x2 = xmid;
+        else   x1 = xmid + 1;
+        if (((x1 ^ x2) & 0xff000000)==0) {
+          do {
+            *pb++ = x1 >> 24;
+            x1 <<= 8;
+            x2 = (x2 << 8) + 255;
+          }
+          while (((x1 ^ x2) & 0xff000000)==0);
+          if (pb >= pb0 + 2 + 0x10000)  break;
+        }
+        int p0 = smt[context6];
+        p0 += ((v << (16+11)) - p0) * 5 >> 7;  // Learning rate
+        smt[context6] = p0;
+        context6 = (context6 * 2 + v) & 0x3f;
+        sumv += v;
+        if (sumv == nc || sumv + 0x10000 - 1 - x == nc)  break;
+      }
+      *pb++ = static_cast<uint8_t>((x1 >> 24) & 0xFF);
+      //if (count > 512) {  for (int i = 0; i < 64; ++i)
+      //                        printf("0x%x,", smt[i]);   printf("\n"); }
+      int method = 0;
+      if (pb - (pb0+2) >= ((x+7)>>3)) {   // Store, no compression
+        method = 1;
+        pb = pb0+2;
+        for (int sumv = 0, x = 0; x < 0x10000; x += 8) {
+          uint32_t b = 0, v;
+          for (int y = x + 7; y >= x; --y)
+              v = (palette[y] ? 1 : 0), b += b + v, sumv += v;
+          *pb++ = b;
+          if (sumv >= nc || sumv + 0x10000 - 8 - x == nc)  break;
+        }
+      }
+      int compressedSize = (pb - (pb0+2)) * 2  +  method;
+      pb0[0] = static_cast<uint8_t>( compressedSize       & 0xFF);
+      pb0[1] = static_cast<uint8_t>((compressedSize >> 8) & 0xFF);
+      palette[firstColors[channel]] = 0;
+    }
+    return pb;
+  }
+
+#define FWr(buf, bufsize) {                          \
+    if (run == 0) {                                  \
+      size_t current = bytes->size();                \
+      bytes->resize(bytes->size() + bufsize);        \
+      memcpy(bytes->data() + current, buf, bufsize); \
+    }}                                               \
+
+#define FWrByte(b) {  \
+    uint8_t byte = b; \
+    FWr(&byte, 1);    \
+  }
+
+  void PerChannelPalette_compress_1(ImageU& img, PaddedBytes* bytes, int run) {
+    const int numChannels = 1, channel = 0;
+    size_t xsize = img.xsize(), ysize = img.ysize();
+    std::vector<uint32_t> palette123(0x10000 * numChannels);
+    memset(palette123.data(), 0, 0x10000 * numChannels * sizeof(uint32_t));
+    uint8_t bits[0x10010 / 8], flags = 0, compressedData[6];
+    memset(bits, 0, sizeof(bits));
+    uint32_t firstColors[3], numColors[3] = {0xffff, 0xffff, 0xffff};
+
+      uint32_t i, count, *palette = &palette123[0x10000 * channel];
+      for (size_t y = 0; y < ysize; ++y) {
+        uint16_t* const PIK_RESTRICT rowImg = img.Row(y);
+        for (size_t x = 0; x < xsize; ++x)  // UNROLL AND PARALLELIZE ME!
+          palette[rowImg[x]] = 1;
+      }
+      // count the number of pixel values present in the channel
+      for (i = 0; i < 0x10000; ++i)
+        if (palette[i]) break;
+      for (firstColors[channel] = i, count = 0; i < 0x10000; ++i)
+        if (palette[i]) palette[i] = count++;
+      // printf("count=%5d, %f%%\n", count, count * 100. / 65536);
+      if (count > 65536 / 16) {  // TODO: smarter decision making
+        flags = 0;
+      }
+      else flags += 1 << channel;
+      numColors[channel] = count;
+
+    FWrByte(flags);  // As of Jan.2019, ImageMethod==flags, either 0 or 7
+    if (flags) {
+      uint8_t *pb = Palette_compress(numChannels, &numColors[0], &bits[0],
+                                     palette123, &firstColors[0]);
+      // Apply the channel's "palette"
+      uint32_t *palette = &palette123[0x10000 * channel];
+      for (size_t y = 0; y < ysize; ++y) {
+        uint16_t* const PIK_RESTRICT rowImg = img.Row(y);
+        for (size_t x = 0; x < xsize; ++x)  // UNROLL AND PARALLELIZE ME!
+          rowImg[x] = palette[rowImg[x]];
+      }
+      int pos = encodeVarInt(numColors[channel], &compressedData[0]);
+      FWr(&compressedData[0], pos);
+      FWr(&bits[0], sizeof(uint8_t) * (pb - &bits[0]));
+    }  // if (flags)
+  }
+
+  void PerChannelPalette_compress_3(Image3U& img, PaddedBytes* bytes, int run) {
+    const int numChannels = 3;
+    size_t xsize = img.xsize(), ysize = img.ysize();
+    std::vector<uint32_t> palette123(0x10000 * numChannels);
+    memset(palette123.data(), 0, 0x10000 * numChannels * sizeof(uint32_t));
+    uint8_t bits[3 * 0x10010 / 8], flags = 0, compressedData[3*6];
+    memset(bits, 0, sizeof(bits));
+    uint32_t firstColors[3], numColors[3] = {0xffff, 0xffff, 0xffff};
+
+    for (int channel = 0; channel < numChannels; ++channel) {
+      uint32_t i, count, *palette = &palette123[0x10000 * channel];
+      for (size_t y = 0; y < ysize; ++y) {
+        uint16_t* const PIK_RESTRICT rowImg = img.PlaneRow(channel, y);
+        for (size_t x = 0; x < xsize; ++x)  // UNROLL AND PARALLELIZE ME!
+          palette[rowImg[x]] = 1;
+      }
+      // count the number of pixel values present in the channel
+      for (i = 0; i < 0x10000; ++i)
+        if (palette[i]) break;
+      for (firstColors[channel] = i, count = 0; i < 0x10000; ++i)
+        if (palette[i]) palette[i] = count++;
+      // printf("count=%5d, %f%%\n", count, count * 100. / 65536);
+      if (count > 65536 * 3 / 4) {  // TODO: smarter decision making
+        flags = 0;
+        break;
+      }
+      flags += 1 << channel;
+      numColors[channel] = count;
+    }  // for channel
+
+    FWrByte(flags);  // As of Jan.2019, ImageMethod==flags, either 0 or 7
+    if (flags) {
+      uint8_t *pb = Palette_compress(numChannels, &numColors[0], &bits[0],
+                                     palette123, &firstColors[0]);
+      // Apply the channel's "palette"
+      for (int channel = 0; channel < numChannels; ++channel) {
+        uint32_t *palette = &palette123[0x10000 * channel];
+        for (size_t y = 0; y < ysize; ++y) {
+          uint16_t* const PIK_RESTRICT rowImg = img.PlaneRow(channel, y);
+          for (size_t x = 0; x < xsize; ++x)  // UNROLL AND PARALLELIZE ME!
+            rowImg[x] = palette[rowImg[x]];
+        }
+      }
+      int pos = 0;
+      for (int c=0; c < numChannels; ++c)
+        pos += encodeVarInt(numColors[c], &compressedData[pos]);
+      FWr(&compressedData[0], pos);
+      FWr(&bits[0], sizeof(uint8_t) * (pb - &bits[0]));
+    }  // if (flags)
+  }
+
+
+  bool PerChannelPalette_decompress(const uint8_t* compressedData,
+        size_t compressedSize, size_t *pos, int numChannels, int imageMethod,
+        std::vector<int> &palette) {
+    int numColors[3];
+    for (int channel = 0; channel < numChannels; ++channel) {
+      numColors[channel] = decodeVarInt(compressedData, compressedSize, pos);
+      if (numColors[channel] > 65536)  return PIK_FAILURE("lossless16");
+    }
+    const uint8_t* p = &compressedData[*pos];
+    const uint8_t* p_end = compressedData + compressedSize;
+    for (int channel = 0; channel < numChannels; ++channel)
+      if (imageMethod & (1 << channel)) {
+        int methodAndSize = p[0] + p[1]*256, cSize = methodAndSize >> 1;
+        p += 2;
+        const uint8_t *p00 = p, *pcEnd = p00 + cSize;
+        if (pcEnd >= p_end)  return PIK_FAILURE("lossless16");
+        if (methodAndSize & 1) {
+          int x=0, sumv = channel << 16, stop = sumv + numColors[channel];
+          while (x < 0x10000) {
+            if (p >= pcEnd)  return PIK_FAILURE("lossless16");
+            for (int b = *p++, i = 0; i < 8; ++i)
+              palette[sumv] = x++, sumv += b & 1, b >>= 1;
+            if (sumv >= stop)  break;
+            if (sumv + 0x10000 - x == stop) {
+              while (x < 0x10000)  palette[sumv++] = x++;
+            }
+          } // while x
+          continue;
+        } // if (methodAndSize & 1)
+
+        uint32_t smt[64], x1 = 0, x2 = 0xffffffff, xr = 0, context6 = 0,
+                 sumv = channel << 16, stop = sumv + numColors[channel];
+        for (int i = 0; i < 4; ++i)
+          xr = (xr << 8) + (p >= pcEnd? 0xFF : *p++);
+        for (int i = 0; i < 64; ++i)  smt[i] = smt0[i] << 11;
+        for (int x = 0; x < 0x10000; ) {
+          int v;
+          uint32_t pr = smt[context6] >> 11;
+
+          uint32_t xmid = x1 + ((x2-x1)>>16)*pr + (((x2-x1) & 0xffff)*pr >> 16);
+          assert(pr>=0 && pr<=0xffff && xmid>=x1 && xmid<x2);
+          if (xr <= xmid)  x2 = xmid, v = 1;   else  x1 = xmid + 1, v = 0;
+
+          while (((x1 ^ x2) & 0xff000000)==0) {  // Binary arithm decomprs
+            xr = (xr<<8) + (p >= pcEnd? 0xFF : *p++);
+            x1 <<= 8;
+            x2 = (x2 << 8) + 255;
+          }
+
+          int p0 = smt[context6];
+          p0 += ((v << (16+11)) - p0) * 5 >> 7;  // Learning rate
+          smt[context6] = p0;
+          context6 = (context6 * 2 + v) & 0x3f;
+          palette[sumv] = x++;
+          sumv += v;
+          if (sumv == stop) break;
+          if (sumv + 0x10000 - x == stop) {
+              while (x < 0x10000)  palette[sumv++] = x++;
+          }
+        }  // for x
+        p = p00 + cSize;
+      } // if (imageMethod & ...
+    *pos = p - &compressedData[0];
+    return true;
+  }
+
+
+
+
+
+
+  bool Grayscale16bit_compress(const ImageU& img_in, PaddedBytes* bytes) {
+    WithSIGN = kWithSign1, BitsMAX = kBitsMax1, NumCONTEXTS = kNumContexts1;
+
+    // The code modifies the image for palette so must copy for now.
+    ImageU img = CopyImage(img_in);
+
+    size_t esize[kNumContexts1], xsize = img.xsize(), ysize = img.ysize();
+    std::vector<uint8_t> temp_buffer(kGroupSize2plus * 2);
+    compressedData = temp_buffer.data();
+
+    clock_t start = clock();
+    for (int run = 0; run < kNumRuns; ++run) {
+      size_t pos = encodeVarInt(xsize, &compressedData[0]);
+      pos       += encodeVarInt(ysize, &compressedData[pos]);
+      FWr(&compressedData[0], pos);
+
+      if (xsize * ysize > 256 * 256)  // TODO: smarter decision making here
+        PerChannelPalette_compress_1(img, bytes, run);
+
+      for (size_t groupY = 0; groupY < ysize; groupY += kGroupSize) {
+        for (size_t groupX = 0; groupX < xsize; groupX += kGroupSize) {
+          memset(esize, 0, sizeof(esize));
+          for (size_t y = 0, yp = 0, yp1,
+                      yEnd = std::min((size_t)kGroupSize, ysize - groupY);
+               y < yEnd; ++y, yp ^= kGroupSize, yp1 = kGroupSize - yp) {
+            rowImg = img.Row(groupY + y) + groupX;
+            rowPrev = (y == 0 ? nullptr : img.Row(groupY + y - 1) + groupX);
+            width = std::min((size_t)kGroupSize, xsize - groupX) - 1;
+            for (size_t x = 0; x <= width; ++x) {
+              int maxErr, prediction = predict1(x, yp + x, yp1 + x, maxErr);
+              assert(0 <= maxErr && maxErr <= kNumContexts1 - 1);
+              assert(0 <= prediction && prediction <= 0xffff);
+
+              int truePixelValue = (int)rowImg[x];
+              int err = prediction - truePixelValue;
+              size_t s = esize[maxErr];
+              edata[maxErr][s] = sign_LSB_forward_transform[err & 0xffff];
+
+              Update_Size_And_Errors
+            }  // x
+          }    // y
+          size_t pos = 0;
+          for (int i = 0; i < kNumContexts1; ++i) {
+            size_t S = esize[i];
+            if (S == 0) {
+              // This means uncompressed size 0.
+              pos += encodeVarInt(0, &compressedData[pos]);
+              continue;
+            }
+            uint16_t* d = &edata[i][0];
+            // first, compress MSBs (most significant bytes)
+            uint8_t* p = &compressedData[pos + 8];
+            for (size_t x = 0; x < S; ++x) p[x] = d[x] >> 8;
+            PIK_RETURN_IF_ERROR(
+              compressWithEntropyCode(&pos, S, compressedData));
+
+            if (i > 9 || S < 128) {  //  9  128
+              // then, compress LSBs (least significant bytes)
+              p = &compressedData[pos + 8];
+              for (size_t x = 0; x < S; ++x) p[x] = d[x] & 255;  // All
+              PIK_RETURN_IF_ERROR(
+                compressWithEntropyCode(&pos, S, compressedData));
+            } else {
+              p = &compressedData[pos + 8];
+              size_t y = 0;
+              for (size_t x = 0; x < S; ++x)
+                if (d[x] < 256) p[y++] = d[x] & 255;  // LSBs such that MSB==0
+              if (y) {
+                PIK_RETURN_IF_ERROR(
+                  compressWithEntropyCode(&pos, y, compressedData));
+              }
+
+              p = &compressedData[pos + 8];
+              y = 0;
+              for (size_t x = 0; x < S; ++x)
+                if (d[x] >= 256) p[y++] = d[x] & 255;  // LSBs such that MSB!=0
+              if (y) {
+                PIK_RETURN_IF_ERROR(
+                  compressWithEntropyCode(&pos, y, compressedData));
+              }
+            }  // if (i > 9)
+          }    // i
+          FWr(&compressedData[0], pos)
+        }  // groupX
+      }    // groupY
+    }      // run
+    if (kNumRuns > 1)
+      printf("%d runs, %1.5f seconds", kNumRuns,
+             ((double)clock() - start) / CLOCKS_PER_SEC);
+    return true;
+  }
+
+  bool Grayscale16bit_decompress(const PaddedBytes& bytes, size_t* bytes_pos,
+                                 ImageU* result) {
+    WithSIGN = kWithSign1, BitsMAX = kBitsMax1, NumCONTEXTS = kNumContexts1;
+    if (*bytes_pos > bytes.size()) return PIK_FAILURE("lossless16");
+    size_t compressedSize = bytes.size() - *bytes_pos;
+    const uint8_t* compressedData = bytes.data() + *bytes_pos;
+
+    // Size of an edata entry
+    size_t maxDecodedSize = kGroupSize * kGroupSize;
+    // Size of a compressedDataTmpBuf entry
+    size_t maxDecodedSize2 = kGroupSize2plus;
+
+    size_t esize[kNumContexts1], xsize, ysize, pos0 = 0, imageMethod = 0;
+    xsize = decodeVarInt(compressedData, compressedSize, &pos0);
+    ysize = decodeVarInt(compressedData, compressedSize, &pos0);
+    if (!xsize || !ysize) return PIK_FAILURE("lossless16");
+    // Too large, would run out of memory. Chosen as reasonable limit for pik
+    // while being below default fuzzer memory limit. We check for total pixel
+    // size, and an additional restriction to ysize, because large ysize
+    // consumes more memory due to the scanline padding.
+    if (uint64_t(xsize) * uint64_t(ysize) >= 134217728ull || ysize >= 65536) {
+      return PIK_FAILURE("lossless16");
+    }
+    pik::ImageU img(xsize, ysize);
+    std::vector<int> palette(0x10000);
+
+    clock_t start = clock();
+    for (int run = 0; run < kNumRuns; ++run) {
+      size_t pos = pos0;
+      if (xsize * ysize > 256 * 256) {  // TODO: smarter decision making here
+        imageMethod = compressedData[pos++];
+        if (imageMethod) {  // As of Jan.2019, ImageMethod is either 0 or 7
+          PIK_RETURN_IF_ERROR(
+            PerChannelPalette_decompress(compressedData, compressedSize,
+                                            &pos, 1, imageMethod, palette));
+        }
+      }  // if (xsize*ysize ...
+      for (size_t groupY = 0; groupY < ysize; groupY += kGroupSize) {
+        for (size_t groupX = 0; groupX < xsize; groupX += kGroupSize) {
+          size_t decompressedSize = 0;  // is used only for return PIK_FAILURE
+
+          for (int i = 0; i < kNumContexts1; ++i) {
+            size_t ds, ds1, ds2, ds3;
+            // first, decompress MSBs (most significant bytes)
+            PIK_RETURN_IF_ERROR(
+              decompressWithEntropyCode((uint8_t*)&edata[i][0],
+                                           maxDecodedSize, compressedData,
+                                           compressedSize, &ds1, &pos));
+            if (!ds1) continue;
+            if (i > 9 || ds1 < 128) {  // All LSBs at once
+              PIK_RETURN_IF_ERROR(
+                decompressWithEntropyCode(&compressedDataTmpBuf[0],
+                                             maxDecodedSize2, compressedData,
+                                             compressedSize, &ds2, &pos));
+              if (ds1 != ds2) return PIK_FAILURE("lossless16");
+              uint16_t* dst = &edata[i][0];
+              uint8_t* p = (uint8_t*)dst;
+              for (int j = ds1 - 1; j >= 0; --j)
+                dst[j] = p[j] * 256 + compressedDataTmpBuf[j];  // MSB*256 + LSB
+            } else {
+              uint16_t* dst = &edata[i][0];
+              uint8_t* p = (uint8_t*)dst;
+              ds2 = ds3 = 0;
+              for (int j = ds1 - 1; j >= 0; --j)
+                if (p[j])
+                  ++ds3;
+                else
+                  ++ds2;
+
+              if (ds2) {  // LSBs such that MSB==0
+                PIK_RETURN_IF_ERROR(
+                  decompressWithEntropyCode(&compressedDataTmpBuf[0],
+                                               maxDecodedSize2, compressedData,
+                                               compressedSize, &ds, &pos));
+                if (ds != ds2) return PIK_FAILURE("lossless16");
+              }
+
+              if (ds3) {  // LSBs such that MSB!=0
+                PIK_RETURN_IF_ERROR(
+                  decompressWithEntropyCode(&compressedDataTmpBuf[ds2],
+                                               maxDecodedSize2, compressedData,
+                                               compressedSize, &ds, &pos));
+                if (ds != ds3) return PIK_FAILURE("lossless16");
+              }
+              uint8_t *p2 = &compressedDataTmpBuf[ds2 - 1],
+                      *p3 = &compressedDataTmpBuf[ds1 - 1];  // Note ds1=ds2+ds3
+              for (int j = ds1 - 1; j >= 0; --j)
+                dst[j] = p[j] * 256 + (p[j] == 0 ? *p2-- : *p3--);
+            }
+            decompressedSize += ds1;
+          }  // for i
+          if (!(decompressedSize ==
+                std::min((size_t)kGroupSize, ysize - groupY) *
+                std::min((size_t)kGroupSize, xsize - groupX))) {
+            return PIK_FAILURE("lossless16");
+          }
+// Disabled, because it is actually useful that the decoder supports decoding
+// its own stream when contained inside a bigger stream and knows the correct
+// end position.
+
+          memset(esize, 0, sizeof(esize));
+          for (size_t y = 0,
+                      yEnd = std::min((size_t)kGroupSize, ysize - groupY),
+                      yp = 0, yp1;
+               y < yEnd; ++y, yp ^= kGroupSize, yp1 = kGroupSize - yp) {
+            rowImg = img.Row(groupY + y) + groupX;
+            rowPrev = (y == 0 ? nullptr : img.Row(groupY + y - 1) + groupX);
+            width = std::min((size_t)kGroupSize, xsize - groupX) - 1;
+            for (size_t x = 0; x <= width; ++x) {
+              int maxErr, prediction = predict1(x, yp + x, yp1 + x, maxErr);
+              assert(0 <= maxErr && maxErr <= kNumContexts1 - 1);
+              assert(0 <= prediction && prediction <= 0xffff);
+
+              size_t s = esize[maxErr];
+              int err = edata[maxErr][s];
+              int truePixelValue =
+                  (prediction - sign_LSB_backward_transform[err]) & 0xffff;
+              rowImg[x] = truePixelValue;
+              err = prediction - truePixelValue;
+
+              Update_Size_And_Errors
+            }  // x
+          }    // y
+        }      // groupX
+      }        // groupY
+      *bytes_pos += pos;
+      if (imageMethod & 1) {
+        for (size_t y = 0; y < ysize; ++y) {
+          uint16_t* const PIK_RESTRICT rowImg = img.Row(y);
+          for (size_t x = 0; x < xsize; ++x)  // UNROLL AND PARALLELIZE ME!
+            rowImg[x] = palette[rowImg[x]];
+        }
+      }
+    }  // run
+    if (kNumRuns > 1)
+      printf("%d runs, %1.5f seconds", kNumRuns,
+             ((double)clock() - start) / CLOCKS_PER_SEC);
+    *result = std::move(img);
+    return true;
+  }
+
+
+
+
+  const int PL1 = 0, PL2 = 1, PL3 = 2;
+
+  enum PlaneMethods_30 {  // 8/30 are redundant (left for encoder's convenience)
+    RR_G_B = 0,           // p1=R  p2=G  p3=B
+    RR_GmR_B = 1,         // p2-p1  p3
+    RR_G_BmR = 2,         //   p2  p3-p1
+    RR_GmR_BmR = 3,       // p2-p1 p3-p1
+
+    RR_GmB_B = 4,  // == 21   p2-p3 @ p2
+    RR_G_GmB = 5,  // ~= 12   p2-p3 @ p3
+
+    RR_GmR_Bm2 = 6,  //  p2-p1  p3-(p1+p2)/2
+    RR_Gm2_BmR = 7,  // p2-(p1+p3)/2   p3-p1
+    RR_G_Bm2 = 8,    //   p2    p3-(p1+p2)/2
+    RR_Gm2_B = 9,    // p2-(p1+p3)/2     p3
+
+    R_GG_B = 10,  // p1=G  p2=R  p3=B
+    RmG_GG_B = 11,
+    R_GG_BmG = 12,
+    RmG_GG_BmG = 13,
+
+    RmB_GG_B = 14,  // == 22
+    R_GG_RmB = 15,  // ~=  2
+
+    RmG_GG_Bm2 = 16,
+    Rm2_GG_BmG = 17,
+    R_GG_Bm2 = 18,
+    Rm2_GG_B = 19,
+
+    R_G_BB = 20,  // p1=B  p2=R  p3=G
+    R_GmB_BB = 21,
+    RmB_G_BB = 22,
+    RmB_GmB_BB = 23,
+
+    RmG_G_BB = 24,  // == 11
+    R_RmG_BB = 25,  // ~=  1
+
+    RmB_Gm2_BB = 26,
+    Rm2_GmB_BB = 27,
+    R_Gm2_BB = 28,
+    Rm2_G_BB = 29,
+  };
+
+  bool dcmprs512x512(pik::Image3U* img, int planeToDecompress, size_t& pos,
+                     size_t groupY, size_t groupX,
+                     const uint8_t* compressedData, size_t compressedSize) {
+    // Size of an edata entry
+    const size_t maxDecodedSize = kGroupSize * kGroupSize;
+    // Size of a compressedDataTmpBuf entry
+    const size_t maxDecodedSize2 = kGroupSize2plus;
+
+    size_t esize[kNumContexts3], xsize = img->xsize(), ysize = img->ysize();
+    size_t decompressedSize = 0;  // is used only for 'return PIK_FAILURE'
+    memset(esize, 0, sizeof(esize));
+    for (int i = 0; i < kNumContexts3; ++i) {
+      size_t ds, ds1, ds2, ds3;
+      // first, decompress MSBs (most significant bytes)
+      PIK_RETURN_IF_ERROR(
+        decompressWithEntropyCode((uint8_t*)&edata[i][0], maxDecodedSize,
+                               compressedData, compressedSize, &ds1, &pos));
+      if (!ds1) continue;
+      uint32_t freq[256];
+      memset(freq, 0, sizeof(freq));
+      uint16_t* dst = &edata[i][0];
+      uint8_t* p = (uint8_t*)dst;
+      for (int j = 0; j < ds1; ++j) ++freq[p[j]];
+
+      if (ds1 < 120 || freq[0] < 120) {  // All LSBs at once
+        PIK_RETURN_IF_ERROR(
+          decompressWithEntropyCode(&compressedDataTmpBuf[0],
+                                       maxDecodedSize2, compressedData,
+                                       compressedSize, &ds2, &pos));
+        if (ds1 != ds2) return PIK_FAILURE("lossless16");
+        for (int j = ds1 - 1; j >= 0; --j)
+          dst[j] = p[j] * 256 + compressedDataTmpBuf[j];  // MSB*256 + LSB
+      } else {
+        uint32_t c = (freq[0] > (ds1 * 13 >> 4) ? 2 : 1);
+        ds2 = freq[0] + (c == 2 ? freq[1] : 0);
+        ds3 = ds1 - ds2;
+        if (ds2) {  // LSBs such that MSB==0
+          PIK_RETURN_IF_ERROR(
+            decompressWithEntropyCode(&compressedDataTmpBuf[0],
+                                         maxDecodedSize2, compressedData,
+                                         compressedSize, &ds, &pos));
+          if (ds != ds2) return PIK_FAILURE("lossless16");
+        }
+
+        if (ds3) {  // LSBs such that MSB!=0
+          PIK_RETURN_IF_ERROR(
+            decompressWithEntropyCode(&compressedDataTmpBuf[ds2],
+                                         maxDecodedSize2, compressedData,
+                                         compressedSize, &ds, &pos));
+          if (ds != ds3) return PIK_FAILURE("lossless16");
+        }
+        uint8_t *p2 = &compressedDataTmpBuf[ds2 - 1],
+                *p3 = &compressedDataTmpBuf[ds1 - 1];  // Note ds1=ds2+ds3
+        for (int j = ds1 - 1; j >= 0; --j)
+          dst[j] = p[j] * 256 + (p[j] < c ? *p2-- : *p3--);
+      }
+      decompressedSize += ds1;
+    }  // for i
+    if (decompressedSize !=
+          std::min((size_t)kGroupSize, ysize - groupY) *
+          std::min((size_t)kGroupSize, xsize - groupX)) {
+      return PIK_FAILURE("lossless16");
+    }
+
+    size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY);
+    width = std::min((size_t)kGroupSize, xsize - groupX) - 1;
+    size_t area = yEnd * (width + 1);
+    int maxerrShift = (area > 25600 ? 0
+             : area > 12800 ? 1 : area > 2800 ? 2 : area > 512 ? 3 : 4);
+    int maxerrAdd = (1 << maxerrShift) - 1;
+
+    for (size_t y = 0, yp = 0, yp1; y < yEnd;
+         ++y, yp ^= kGroupSize, yp1 = kGroupSize - yp) {
+      rowImg = img->PlaneRow(planeToDecompress, groupY + y) + groupX;
+      rowPrev =
+          (y == 0 ? nullptr
+                  : img->PlaneRow(planeToDecompress, groupY + y - 1) + groupX);
+      for (size_t x = 0; x <= width; ++x) {
+        int maxErr, prediction = predict1(x, yp + x, yp1 + x, maxErr);
+        maxErr = (maxErr + maxerrAdd) >> maxerrShift;
+        assert(0 <= maxErr && maxErr <= kNumContexts3 - 1);
+        assert(0 <= prediction && prediction <= 0xffff);
+
+        size_t s = esize[maxErr];
+        int err = edata[maxErr][s], truePixelValue =
+            (prediction - sign_LSB_backward_transform[err]) & 0xffff;
+        rowImg[x] = truePixelValue;
+        err = prediction - truePixelValue;
+
+        Update_Size_And_Errors
+      }  // x
+    }    // y
+    return true;
+  }
+
+  bool Colorful16bit_decompress(const PaddedBytes& bytes, size_t* bytes_pos,
+                                Image3U* result) {
+    WithSIGN = kWithSign3, BitsMAX = kBitsMax3, NumCONTEXTS = kNumContexts3;
+    if (*bytes_pos > bytes.size()) return PIK_FAILURE("lossless16");
+    size_t cSize = bytes.size() - *bytes_pos;
+    const uint8_t* cprsdData = bytes.data() + *bytes_pos;
+
+    size_t xsize, ysize, pos0 = 0, imageMethod = 0;
+    xsize = decodeVarInt(cprsdData, cSize, &pos0);
+    ysize = decodeVarInt(cprsdData, cSize, &pos0);
+    if (!xsize || !ysize) return PIK_FAILURE("lossless16");
+    // Too large, would run out of memory. Chosen as reasonable limit for pik
+    // while being below default fuzzer memory limit. We check for total pixel
+    // size, and an additional restriction to ysize, because large ysize
+    // consumes more memory due to the scanline padding.
+    if (uint64_t(xsize) * uint64_t(ysize) >= 134217728ull || ysize >= 65536)
+      return PIK_FAILURE("lossless16");
+
+    pik::Image3U img(xsize, ysize);
+    std::vector<int> palette(0x10000 * 3);
+
+    clock_t start = clock();
+    for (int run = 0; run < kNumRuns; ++run) {
+      size_t pos = pos0;
+      if (pos >= cSize)
+        return PIK_FAILURE("lossless16: out of bounds");
+      if (xsize * ysize > 256 * 256) {  // TODO: smarter decision making here
+        imageMethod = cprsdData[pos++];
+        if (imageMethod) {  // As of Jan.2019, ImageMethod is either 0 or 7
+          PIK_RETURN_IF_ERROR(
+            PerChannelPalette_decompress(cprsdData, cSize, &pos, 3,
+               imageMethod, palette));
+        }
+      }  // if (xsize*ysize ...
+
+      for (size_t groupY = 0; groupY < ysize; groupY += kGroupSize) {
+        for (size_t groupX = 0; groupX < xsize; groupX += kGroupSize) {
+          uint16_t *PIK_RESTRICT row1, *PIK_RESTRICT row2, *PIK_RESTRICT row3;
+          size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY);
+          size_t xEnd = std::min((size_t)kGroupSize, xsize - groupX);
+          PIK_RETURN_IF_ERROR(
+            dcmprs512x512(&img, PL1, pos, groupY, groupX, cprsdData, cSize));
+          PIK_RETURN_IF_ERROR(
+            dcmprs512x512(&img, PL2, pos, groupY, groupX, cprsdData, cSize));
+          PIK_RETURN_IF_ERROR(
+            dcmprs512x512(&img, PL3, pos, groupY, groupX, cprsdData, cSize));
+
+#define T3bgn                                      \
+  for (size_t y = 0; y < yEnd; ++y) {              \
+    row1 = img.PlaneRow(PL1, groupY + y) + groupX; \
+    row2 = img.PlaneRow(PL2, groupY + y) + groupX; \
+    row3 = img.PlaneRow(PL3, groupY + y) + groupX; \
+    for (size_t x = 0; x < xEnd; ++x) {            \
+      int R = row1[x], G = row2[x], B = row3[x];   \
+      (void)R;                                     \
+      (void)G;                                     \
+      (void)B;
+
+// Close T3bgn above; not using a #define confuses brace matching of editor.
+#define CC \
+  }        \
+  }
+
+          if (pos >= cSize)
+            return PIK_FAILURE("lossless16: out of bounds");
+          int planeMethod = cprsdData[pos++];
+          switch (planeMethod) {
+            case 0:
+            case 10:
+            case 20:
+              break;
+            case 1:
+              T3bgn G += R + 0x8000;
+              row2[x] = G;
+              CC break;
+            case 2:
+              T3bgn B += R + 0x8000;
+              row3[x] = B;
+              CC break;
+            case 3:
+              T3bgn G += R + 0x8000;
+              B += R + 0x8000;
+              row2[x] = G;
+              row3[x] = B;
+              CC break;
+            case 22:
+            case 4:
+              T3bgn row2[x] = G + B + 0x8000;
+              CC break;
+            case 5:
+              T3bgn row3[x] = G - B + 0x8000;
+              CC break;
+            case 6:
+              T3bgn row2[x] = G = (G + R + 0x8000) & 0xffff;
+              row3[x] = B + ((R + G) >> 1) + 0x8000;
+              CC break;
+            case 7:
+              T3bgn row3[x] = B = (B + R + 0x8000) & 0xffff;
+              row2[x] = G + ((R + B) >> 1) + 0x8000;
+              CC break;
+            case 8:
+              T3bgn row3[x] = B + ((R + G) >> 1) + 0x8000;
+              CC break;
+            case 9:
+              T3bgn row2[x] = G + ((R + B) >> 1) + 0x8000;
+              CC break;
+
+            case 24:
+            case 11:
+              T3bgn R += G + 0x8000;
+              row1[x] = R;
+              CC break;
+            case 12:
+              T3bgn B += G + 0x8000;
+              row3[x] = B;
+              CC break;
+            case 13:
+              T3bgn R += G + 0x8000;
+              B += G + 0x8000;
+              row1[x] = R;
+              row3[x] = B;
+              CC break;
+            case 21:
+            case 14:
+              T3bgn row1[x] = R + B + 0x8000;
+              CC break;
+            case 15:
+              T3bgn row3[x] = R - B + 0x8000;
+              CC break;
+
+            case 16:
+              T3bgn row1[x] = R = (R + G + 0x8000) & 0xffff;
+              row3[x] = B + ((R + G) >> 1) + 0x8000;
+              CC break;
+            case 17:
+              T3bgn row3[x] = B = (B + G + 0x8000) & 0xffff;
+              row1[x] = R + ((B + G) >> 1) + 0x8000;
+              CC break;
+            case 18:
+              T3bgn row3[x] = B + ((R + G) >> 1) + 0x8000;
+              CC break;
+            case 19:
+              T3bgn row1[x] = R + ((B + G) >> 1) + 0x8000;
+              CC break;
+
+            case 23:
+              T3bgn G += B + 0x8000;
+              R += B + 0x8000;
+              row1[x] = R;
+              row2[x] = G;
+              CC break;
+            case 25:
+              T3bgn row2[x] = R - G + 0x8000;
+              CC break;
+            case 26:
+              T3bgn row1[x] = R = (R + B + 0x8000) & 0xffff;
+              row2[x] = G + ((B + R) >> 1) + 0x8000;
+              CC break;
+            case 27:
+              T3bgn row2[x] = G = (G + B + 0x8000) & 0xffff;
+              row1[x] = R + ((B + G) >> 1) + 0x8000;
+              CC break;
+            case 28:
+              T3bgn row2[x] = G + ((B + R) >> 1) + 0x8000;
+              CC break;
+            case 29:
+              T3bgn row1[x] = R + ((B + G) >> 1) + 0x8000;
+              CC break;
+          }
+        }  // groupX
+      }    // groupY
+// Disabled, because it is actually useful that the decoder supports decoding
+// its own stream when contained inside a bigger stream and knows the correct
+// end position.
+
+      for (int channel = 0; channel < 3; ++channel)
+        if (imageMethod & (1 << channel)) {
+          int* p = &palette[0x10000 * channel];
+          for (size_t y = 0; y < ysize; ++y) {
+            uint16_t* const PIK_RESTRICT rowImg = img.PlaneRow(channel, y);
+            for (size_t x = 0; x < xsize; ++x)  // UNROLL AND PARALLELIZE ME!
+              rowImg[x] = p[rowImg[x]];
+          }
+        }
+      *bytes_pos += pos;
+    }  // run
+    if (kNumRuns > 1)
+      printf("%d runs, %1.5f seconds", kNumRuns,
+             ((double)clock() - start) / CLOCKS_PER_SEC);
+    *result = std::move(img);
+    return true;
+  }
+
+  bool cmprs512x512(pik::Image3U& img, int planeToCompress, int planeToUse,
+                        size_t groupY, size_t groupX,
+                        uint8_t* compressedOutput, size_t *csize) {
+    size_t esize[kNumContexts3], xsize = img.xsize(), ysize = img.ysize();
+    memset(esize, 0, sizeof(esize));
+    size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY);
+    width = std::min((size_t)kGroupSize, xsize - groupX) - 1;
+    size_t area = yEnd * (width + 1);
+    int maxerrShift = (area > 25600 ? 0
+             : area > 12800 ? 1 : area > 2800 ? 2 : area > 512 ? 3 : 4);
+    int maxerrAdd = (1 << maxerrShift) - 1;
+
+    for (size_t y = 0, yp = 0, yp1; y < yEnd;
+         ++y, yp ^= kGroupSize, yp1 = kGroupSize - yp) {
+      rowImg = img.PlaneRow(planeToCompress, groupY + y) + groupX;
+      rowPrev = (!y ? nullptr :
+               img.PlaneRow(planeToCompress, groupY + y - 1) + groupX);
+      uint16_t* PIK_RESTRICT rowUse =
+               img.PlaneRow(planeToUse, groupY + y) + groupX;
+      for (size_t x = 0; x <= width; ++x) {
+        int maxErr, prediction = predict1(x, yp + x, yp1 + x, maxErr);
+        maxErr = (maxErr + maxerrAdd) >> maxerrShift;
+        assert(0 <= maxErr && maxErr <= kNumContexts3 - 1);
+        assert(0 <= prediction && prediction <= 0xffff);
+        int truePixelValue = (int)rowImg[x];
+        if (planeToCompress != planeToUse) {
+          truePixelValue -= (int)rowUse[x] - 0x8000;
+          truePixelValue &= 0xffff;
+          rowImg[x] = truePixelValue;
+        }
+        int err = prediction - truePixelValue;
+        size_t s = esize[maxErr];
+        edata[maxErr][s] = sign_LSB_forward_transform[err & 0xffff];
+        Update_Size_And_Errors
+      }  // x
+    }    // y
+
+    size_t pos = 0;
+    for (int i = 0; i < kNumContexts3; ++i) {
+      size_t c = 0, S = esize[i];
+      if (S == 0) {  // If uncompressed size is 0: empty bucket.
+        pos += encodeVarInt(0, &compressedOutput[pos]);
+        continue;
+      }
+      uint16_t* d = &edata[i][0];
+      // first, compress MSBs (most significant bytes)
+      uint8_t* p = &compressedOutput[pos + 8];
+      for (size_t x = 0; x < S; ++x) p[x] = d[x] >> 8, c += (p[x] ? 0 : 1);
+      PIK_RETURN_IF_ERROR(
+        compressWithEntropyCode(&pos, S, compressedOutput));
+      if (S < 120 || c < 120) {  // 120
+        // then, compress LSBs (least significant bytes)
+        p = &compressedOutput[pos + 8];
+        for (size_t x = 0; x < S; ++x) p[x] = d[x] & 255;  // All LSBs!
+        PIK_RETURN_IF_ERROR(
+          compressWithEntropyCode(&pos, S, compressedOutput));
+      } else {
+        c = (c > (S * 13 >> 4) ? 2 : 1) << 8;
+        p = &compressedOutput[pos + 8];
+        size_t y = 0;
+        for (size_t x = 0; x < S; ++x)
+          if (d[x] < c) p[y++] = d[x] & 255;  // LSBs such that MSB<2
+        if (y) {
+          PIK_RETURN_IF_ERROR(
+            compressWithEntropyCode(&pos, y, compressedOutput));
+        }
+
+        p = &compressedOutput[pos + 8];
+        y = 0;
+        for (size_t x = 0; x < S; ++x)
+          if (d[x] >= c) p[y++] = d[x] & 255;  // LSBs such that MSB>=2
+        if (y) {
+          PIK_RETURN_IF_ERROR(
+            compressWithEntropyCode(&pos, y, compressedOutput));
+        }
+      }  // if (S < 120)
+    }    // for i
+    *csize = pos;
+    return true;
+  }
+
+  bool Colorful16bit_compress(const Image3U& img_in, PaddedBytes* bytes) {
+    WithSIGN = kWithSign3, BitsMAX = kBitsMax3, NumCONTEXTS = kNumContexts3;
+    clock_t start = clock();
+
+    // The code modifies the image for palette so must copy for now.
+    Image3U img = CopyImage(img_in);
+
+    std::vector<uint8_t> temp_buffer(kGroupSize2plus * 2 * 6);
+    compressedData = temp_buffer.data();
+
+    for (int run = 0; run < kNumRuns; ++run) {
+      size_t xsize = img.xsize(), ysize = img.ysize(), pos;
+      pos =  encodeVarInt(xsize, &compressedData[0]);
+      pos += encodeVarInt(ysize, &compressedData[pos]);
+      FWr(&compressedData[0], pos);
+
+      if (xsize * ysize > 256 * 256)  // TODO: smarter decision making here
+        PerChannelPalette_compress_3(img, bytes, run);
+
+      uint8_t* compressedData2 = &compressedData[kGroupSize2plus * 2];
+      uint8_t* compressedData3 = &compressedData[kGroupSize2plus * 4];
+      uint8_t* cd4 = &compressedData[kGroupSize2plus * 6];
+      uint8_t* cd5 = &compressedData[kGroupSize2plus * 8];
+      uint8_t* cd6 = &compressedData[kGroupSize2plus * 10];
+      for (size_t groupY = 0; groupY < ysize; groupY += kGroupSize) {
+        for (size_t groupX = 0; groupX < xsize; groupX += kGroupSize) {
+          size_t S1, S2, S3, S4, S5, S6, s1, s2, s3, p1, p2, p3;
+          uint8_t *cd1, *cd2, *cd3;
+          int planeMethod;  // Here we try guessing which of the 30 PlaneMethods
+                            // is best, after trying just six color planes.
+          PIK_RETURN_IF_ERROR(
+            cmprs512x512(img, PL1, PL1, groupY, groupX, compressedData,  &s1));
+          PIK_RETURN_IF_ERROR(
+            cmprs512x512(img, PL2, PL2, groupY, groupX, compressedData2, &s2));
+          PIK_RETURN_IF_ERROR(
+            cmprs512x512(img, PL3, PL3, groupY, groupX, compressedData3, &s3));
+
+          S1 = s2, p1 = PL2, cd1 = compressedData2, planeMethod = 10;
+          S2 = s1, p2 = PL1, cd2 = compressedData;
+          S3 = s3, p3 = PL3, cd3 = compressedData3;
+          if (s1 < s2 * 63 / 64 && s1 < s3) {
+            S1 = s1, p1 = PL1, cd1 = compressedData, planeMethod = 0;
+            S2 = s2, p2 = PL2, cd2 = compressedData2;
+            S3 = s3, p3 = PL3, cd3 = compressedData3;
+          } else if (s3 < s2 * 63 / 64 && s3 < s1) {
+            S1 = s3, p1 = PL3, cd1 = compressedData3, planeMethod = 20;
+            S2 = s1, p2 = PL1, cd2 = compressedData;
+            S3 = s2, p3 = PL2, cd3 = compressedData2;
+          }
+          PIK_RETURN_IF_ERROR(
+            cmprs512x512(img, p2, p1, groupY, groupX, cd4, &S4)); // R-G+0x8000
+          PIK_RETURN_IF_ERROR(
+            cmprs512x512(img, p3, p1, groupY, groupX, cd5, &S5)); // B-G+0x8000
+          if (p1 == PL1)
+            FWr(cd1, S1)
+
+          if (S4 >= S2 && S5 >= S3) {
+            PIK_RETURN_IF_ERROR(
+              cmprs512x512(img, p2, p3, groupY, groupX, cd6, &S6)); // R-B+0x..
+            if (S6 >= S2 && S6 >= S3)     FWr(cd2, S2)
+            else if (S3 > S2 && S3 > S6)  FWr(cd2, S2)
+            else                          FWr(cd6, S6)
+            if (p1 == PL2)  FWr(cd1, S1)
+            if (S6 >= S2 && S6 >= S3)    { FWr(cd3, S3) }
+            else if (S3 > S2 && S3 > S6) { FWr(cd6, S6) planeMethod+=5; }
+            else                         { FWr(cd3, S3) planeMethod+=4; }
+          }
+          else {
+            size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY) + groupY;
+            size_t xEnd = std::min((size_t)kGroupSize, xsize - groupX);
+            size_t pp = S5 < S4? p2 : p3;
+            for (size_t y = groupY; y < yEnd; ++y) {
+                uint16_t* PIK_RESTRICT row1 = img.PlaneRow(p1, y) + groupX;
+                uint16_t* PIK_RESTRICT row2 = img.PlaneRow(pp, y) + groupX;
+                for (size_t x = 0; x < xEnd; ++x) {
+                  uint32_t v1 = row1[x], v2 = (row2[x] + v1 + 0x8000) & 0xffff;
+                  row2[x] = ((v1 + v2) >> 1) - v1 + 0x8000;
+                }
+            }
+            if (S5 < S4) {
+              PIK_RETURN_IF_ERROR(
+                cmprs512x512(img, p3, p2, groupY, groupX, cd6, &S6)); //B-RpG/2
+              if (S4 < S2)  FWr(cd4, S4) else  FWr(cd2, S2)
+              if (p1 == PL2)
+                FWr(cd1, S1)
+              if (S3 <= S5 && S3 <= S6) {
+                FWr(cd3, S3) planeMethod += 1;
+              } else if (S5 <= S6) {
+                FWr(cd5, S5) planeMethod += (S4 < S2 ? 3 : 2);
+              } else {
+                FWr(cd6, S6) planeMethod += (S4 < S2 ? 6 : 8);
+              }
+            } else {
+              PIK_RETURN_IF_ERROR(
+                cmprs512x512(img, p2, p3, groupY, groupX, cd6, &S6)); //R-BpG/2
+              if (S2 <= S4 && S2 <= S6) {
+                FWr(cd2, S2) planeMethod += 2;
+              } else if (S4 <= S6) {
+                FWr(cd4, S4) planeMethod += (S5 < S3 ? 3 : 1);
+              } else {
+                FWr(cd6, S6) planeMethod += (S5 < S3 ? 7 : 9);
+              }
+              if (p1 == PL2)
+                FWr(cd1, S1)
+              if (S5 < S3) FWr(cd5, S5) else FWr(cd3, S3)
+            }
+          }
+          if (p1 == PL3)
+            FWr(cd1, S1)
+          FWrByte(planeMethod);  // printf("%2d ", planeMethod);
+        }                                       // groupX
+      }                                         // groupY
+    }                                           // run
+    if (kNumRuns > 1)
+      printf("%d runs, %1.5f seconds", kNumRuns,
+             ((double)clock() - start) / CLOCKS_PER_SEC);
+    return true;
+  }
+};  // struct State
+
+}  // namespace
+
+bool Grayscale16bit_compress(const ImageU& img, PaddedBytes* bytes) {
+  std::unique_ptr<State> state(new State());
+  return state->Grayscale16bit_compress(img, bytes);
+}
+
+bool Grayscale16bit_decompress(const PaddedBytes& bytes, size_t* pos,
+                               ImageU* result) {
+  std::unique_ptr<State> state(new State());
+  return state->Grayscale16bit_decompress(bytes, pos, result);
+}
+
+bool Colorful16bit_compress(const Image3U& img, PaddedBytes* bytes) {
+  std::unique_ptr<State> state(new State());
+  return state->Colorful16bit_compress(img, bytes);
+}
+
+bool Colorful16bit_decompress(const PaddedBytes& bytes, size_t* pos,
+                              Image3U* result) {
+  std::unique_ptr<State> state(new State());
+  return state->Colorful16bit_decompress(bytes, pos, result);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/lossless16.h b/codec/L2/demos/pikEnc/host/pik/lossless16.h
new file mode 100755
index 0000000000..ecaefe8fcd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/lossless16.h
@@ -0,0 +1,29 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// @author Alexander Rhatushnyak
+
+#ifndef PIK_LOSSLESS16_H_
+#define PIK_LOSSLESS16_H_
+
+#include "pik/image.h"
+#include "pik/padded_bytes.h"
+
+namespace pik {
+
+// *compress appends to `bytes`.
+// *decompress starts at byte offset `*pos` and sets `*pos` to point to the
+// first unconsumed byte.
+bool Grayscale16bit_compress(const ImageU& img, PaddedBytes* bytes);
+bool Grayscale16bit_decompress(const PaddedBytes& bytes, size_t* pos,
+                               ImageU* result);
+
+bool Colorful16bit_compress(const Image3U& img, PaddedBytes* bytes);
+bool Colorful16bit_decompress(const PaddedBytes& bytes, size_t* pos,
+                              Image3U* result);
+}  // namespace pik
+
+#endif  // PIK_LOSSLESS16_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/lossless8.cc b/codec/L2/demos/pikEnc/host/pik/lossless8.cc
new file mode 100755
index 0000000000..6c445bac96
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/lossless8.cc
@@ -0,0 +1,1453 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// @author Alexander Rhatushnyak
+
+#include "pik/lossless8.h"
+
+#include <cmath>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "pik/lossless_entropy.h"
+
+namespace pik {
+
+namespace {
+
+static const int mulWeights0and1_R_[] = {
+    34, 36,  // when errors are small,
+    31, 37,  // we assume they are random noise,
+    33, 37,  // and penalize predictors 0 and 1
+    36, 40, 39, 44, 42, 46, 43, 47, 43, 42,
+};
+
+static const int mulWeights3teNE_R_[] = {
+    28, 0, 24, 15, 24, 19, 24, 16, 23, 12, 23, 12, 25, 11, 32, 11,
+};
+
+static const int mulWeights0and1_W_[] = {
+    27, 31,  // when errors are small,
+    33, 31,  // we assume they are random noise,
+    40, 34,  // and penalize predictors 0 and 1
+    43, 36, 52, 43, 59, 45, 63, 43, 65, 28,
+};
+
+static const int mulWeights3teNE_W_[] = {
+    31, 0, 31, 21, 29, 19, 28, 13, 26, 14, 28, 24, 32, 26, 43, 35,
+};
+
+static const int mulWeights0and1_N_[] = {
+    43, 23,  // when errors are small,
+    38, 21,  // we assume they are random noise,
+    35, 24,  // and penalize predictors 0 and 1
+    34, 27, 35, 29, 33, 31, 28, 31, 23, 31,
+};
+
+static const int mulWeights3teNE_N_[] = {
+    27, 0, 23, 29, 26, 34, 29, 29, 30, 13, 35, 13, 40, 11, 51, 9,
+};
+
+static const int kWithSign = 7, kNumContexts = 8 + kWithSign + 2, kNumRuns = 1,
+            kGroupSize = 512, kGroupSize2plus = kGroupSize * kGroupSize * 9 / 8,
+            kMaxError = 101, kMaxSumErrors = kMaxError * 7 + 1;
+
+// Left shift a signed integer by the shift amount.
+PIK_INLINE int LshInt(int value, unsigned shift) {
+  // Cast to unsigned and back to avoid undefined behavior of signed left shift.
+  return static_cast<int>(static_cast<unsigned>(value) << shift);
+}
+
+// TODO(lode): split state variables needed for encoder from those for decoder
+//             and perform one-time global initialization where possible.
+struct State {
+  const int PBits = 3,  // SET ME TO ZERO FOR A FASTER VERSION WITH NO ROUNDING!
+      toRound = ((1 << PBits) >> 1), toRound_m1 = (toRound ? toRound - 1 : 0);
+  typedef enum { PM_Regular, PM_West, PM_North } PredictMode;
+
+  // uint64_t gqe[kNumContexts];  // global quantized errors (all groups) counts
+
+  uint8_t edata[kNumContexts][kGroupSize * kGroupSize],  // size should be [2][]
+                        // instead of [kNumContexts][] in the Production edition
+      compressedDataTmpBuf[kGroupSize2plus], *compressedData;
+  uint8_t errors0[kGroupSize*2+4];  // Errors of predictor 0. Range 0..kMaxError
+  uint8_t errors1[kGroupSize*2+4];  // Errors of predictor 1
+  uint8_t errors2[kGroupSize*2+4];  // Errors of predictor 2
+  uint8_t errors3[kGroupSize*2+4];  // Errors of predictor 3
+  int16_t trueErr[kGroupSize*2];  // True errors. Their range is -255...255
+  uint8_t quantizedError[kGroupSize * 2];  // The range is 0...14, all are
+                                           // even due to quantizedInit()
+
+#ifdef SIMPLE_signToLSB_TRANSFORM  // to fully disable, "=i;" in the init macros
+
+  uint8_t signLSB_forwardTransform[256], signLSB_backwardTransform[256]; //const
+#define ToLSB_FRWRD signLSB_forwardTransform[err & 255]
+#define ToLSB_BKWRD (prediction - signLSB_backwardTransform[q]) & 255
+
+#define signToLSB_FORWARD_INIT  \
+  for (int i = 0; i < 256; ++i) \
+    signLSB_forwardTransform[i] = (i & 128 ? (255 - i) * 2 + 1 : i * 2);
+
+#define signToLSB_BACKWARD_INIT \
+  for (int i = 0; i < 256; ++i) \
+    signLSB_backwardTransform[i] = (i & 1 ? 255 - (i >> 1) : i >> 1);
+
+#else
+  uint8_t signLSB_forwardTransform[1 << 16], signLSB_backwardTransform[1 << 16];
+#define ToLSB_FRWRD signLSB_forwardTransform[prediction * 256 + truePixelValue]
+#define ToLSB_BKWRD \
+  signLSB_backwardTransform[((prediction + toRound_m1) >> PBits) * 256 + q]
+
+#define signToLSB_FORWARD_INIT                                               \
+  for (int p = 0; p < 256; ++p) {                                            \
+    signLSB_forwardTransform[p * 256 + p] = 0;                               \
+    for (int v, top = p, btm = p, d = 1; d < 256; ++d) {                     \
+      v = (d & 1 ? (btm > 0 ? --btm : ++top) : (top < 255 ? ++top : --btm)); \
+      signLSB_forwardTransform[p * 256 + v] = d;                             \
+    }                                                                        \
+  }
+
+#define signToLSB_BACKWARD_INIT                                              \
+  for (int p = 0; p < 256; ++p) {                                            \
+    signLSB_backwardTransform[p * 256] = p;                                  \
+    for (int v, top = p, btm = p, d = 1; d < 256; ++d) {                     \
+      v = (d & 1 ? (btm > 0 ? --btm : ++top) : (top < 255 ? ++top : --btm)); \
+      signLSB_backwardTransform[p * 256 + d] = v;                            \
+    }                                                                        \
+  }
+#endif
+
+  uint8_t quantizedTable[256], diff2error[512 * 2];  // const
+  uint16_t error2weight[kMaxSumErrors];              // const
+
+  State() {
+    for (int j = 0; j < kMaxSumErrors; ++j)
+      error2weight[j] =
+          150 * 512 / (58 + j * std::sqrt(j + 50));  // const init!  150 58 50
+
+    for (int j = -512; j <= 511; ++j)
+      diff2error[512 + j] = std::min(j < 0 ? -j : j, kMaxError);  // const init!
+    for (int j = 0; j <= 255; ++j)
+      quantizedTable[j] = quantizedInit(j);  // const init!
+    // for (int i=0; i < 512; i += 16, printf("\n"))
+    //   for (int j=i; j < i + 16; ++j)  printf("%2d, ", quantizedTable[j]);
+    signToLSB_FORWARD_INIT       // const init!
+    signToLSB_BACKWARD_INIT      // const init!
+    // Prevent uninitialized values in case of invalid compressed data
+    memset(edata, 0, sizeof(edata));
+  }
+
+  PIK_INLINE int quantized(int x) {
+    assert(0 <= x && x <= 255);
+    return quantizedTable[x];
+  }
+
+  PIK_INLINE int quantizedInit(int x) {
+    assert(0 <= x && x <= 255);
+    x = (x + 1) >> 1;
+    int res = (x >= 4 ? 4 : x);
+    if (x >= 6) res = 5;  // no 'else' to reduce code size
+    if (x >= 9) res = 6;
+    if (x >= 15) res = 7;
+    return res * 2;
+  }
+
+  int prediction0,
+      prediction1,  // Their range is -255...510 rather than 0...255!
+      prediction2,
+      prediction3;  // And -510..510 after subtracting truePixelValue
+  int numColors[3], planeMethod, maxerrShift, maxTpv, width; // width-1 actually
+
+  uint8_t* PIK_RESTRICT rowImg;
+  uint8_t const *PIK_RESTRICT rowPrev, *PIK_RESTRICT rowPP;
+
+  PIK_INLINE int predictY0(size_t x, size_t yc, size_t yp, int* maxErr) {
+    *maxErr = (x == 0 ? kNumContexts - 3
+                      : x == 1 ? quantizedError[yc]
+                               : std::max(quantizedError[yc],
+                                          quantizedError[yc - 1]));
+    prediction1 = prediction2 = prediction3 = (x > 0 ? rowImg[x - 1] : 27)
+                                              << PBits;
+    prediction0 =
+        (x <= 1 ? prediction1
+                : prediction1 +
+                      LshInt(rowImg[x - 1] - rowImg[x - 2], PBits) * 5 / 16);
+    return (prediction0 < 0 ? 0 : prediction0 > maxTpv ? maxTpv : prediction0);
+  }
+
+  PIK_INLINE int predictX0(size_t x, size_t yc, size_t yp, int* maxErr) {
+    *maxErr =
+        std::max(quantizedError[yp], quantizedError[yp + (x < width ? 1 : 0)]);
+    prediction1 = prediction2 = prediction3 = rowPrev[x] << PBits;
+    prediction0 =
+      (((rowPrev[x] * 7 + rowPrev[x + (x < width ? 1 : 0)]) << PBits) + 4) >> 3;
+    return prediction0;
+  }
+
+  PIK_INLINE int predict_R_(size_t x, size_t yc, size_t yp, int* maxErr) {
+    if (!rowPrev)
+      return predictY0(x, yc, yp, maxErr);  // OK for Prototype edition
+    if (x == 0)
+      return predictX0(x, yc, yp, maxErr);  // tobe fixed in Production
+
+    int N = rowPrev[x] << PBits, W = rowImg[x - 1] << PBits,
+        NW = rowPrev[x - 1] << PBits;
+    int a1 = (x < width ? 1 : 0), NE = rowPrev[x + a1] << PBits;
+    int weight0 = errors0[yp] + errors0[yp - 1] + errors0[yp + a1];
+    int weight1 = errors1[yp] + errors1[yp - 1] + errors1[yp + a1];
+    int weight2 = errors2[yp] + errors2[yp - 1] + errors2[yp + a1];
+    int weight3 = errors3[yp] + errors3[yp - 1] + errors3[yp + a1];
+
+    uint8_t mxe = quantizedError[yc];
+    mxe = std::max(mxe, quantizedError[yp]);
+    mxe = std::max(mxe, quantizedError[yp - 1]);
+    mxe = std::max(mxe, quantizedError[yp + a1]);
+    if (x > 1) mxe = std::max(mxe, quantizedError[yc - 1]);
+    int mE = mxe;  // at this point 0 <= mxe <= 14,  and  mxe % 2 == 0
+
+    weight0 = error2weight[weight0] * mulWeights0and1_R_[0 + mE];
+    weight1 = error2weight[weight1] * mulWeights0and1_R_[1 + mE];
+    weight2 = error2weight[weight2] * 32;  // Baseline
+    weight3 = error2weight[weight3] * mulWeights3teNE_R_[0 + mE];
+
+    int teW = trueErr[yc];
+    int teN = trueErr[yp];
+    int sumWN = teN + teW;  //  -510<<PBits <= sumWN <= 510<<PBits
+    int teNW = trueErr[yp - 1];
+    int teNE = trueErr[yp + a1];
+
+    if (mE) {
+      if (sumWN * 40 + teNW * 20 + teNE * mulWeights3teNE_R_[1 + mE] <= 0) ++mE;
+    } else {
+      if (N == W && N == NE)
+        mE = ((sumWN | teNE | teNW) == 0 ? kNumContexts - 1 : 1);
+    }
+    *maxErr = mE;
+
+    prediction0 = W - (sumWN + teNW) / 4;  // 7/32 works better than 1/4 ?
+    prediction1 =
+        N - (sumWN + teNE) / 4;  // predictors 0 & 1 rely on true errors
+    prediction2 = W + NE - N;
+    int t = (teNE * 3 + teNW * 4 + 7) >> 5;
+    prediction3 = N + (N - (rowPP[x] << PBits)) * 23 / 32 + (W - NW) / 16 - t;
+    assert(LshInt(-255, PBits) <= prediction0 && prediction0 <= 510 << PBits);
+    assert(LshInt(-255, PBits) <= prediction1 && prediction1 <= 510 << PBits);
+    assert(LshInt(-255, PBits) <= prediction2 && prediction2 <= 510 << PBits);
+    assert(LshInt(-255, PBits) <= prediction3 && prediction3 <= 510 << PBits);
+
+    int sumWeights = weight0 + weight1 + weight2 + weight3;
+    // assert(sumWeights>0);  // true if min(error2weight)*min(mulWeights**_R_)
+    // > 0
+
+    int prediction = (prediction0 * weight0 + prediction1 * weight1 +
+                      (sumWeights >> 3) + prediction2 * weight2 +
+                      prediction3 * weight3) /  // biased rounding: >>3
+                     sumWeights;
+
+    if (((teN ^ teW) | (teN ^ teNW)) > 0)  // if all three have the same sign
+      return (prediction < 0 ? 0 : prediction > maxTpv ? maxTpv : prediction);
+
+    int max = (W > N ? W : N);
+    int min = W + N - max;
+    if (NE > max) max = NE;
+    if (NE < min) min = NE;
+    return (prediction < min ? min : prediction > max ? max : prediction);
+  }
+
+  PIK_INLINE int predict_W_(size_t x, size_t yc, size_t yp, int* maxErr) {
+    if (!rowPrev)
+      return predictY0(x, yc, yp, maxErr);  // OK for Prototype edition
+    if (x == 0)
+      return predictX0(x, yc, yp, maxErr);  // tobe fixed in Production
+
+    int N = rowPrev[x] << PBits, W = rowImg[x - 1] << PBits,
+        NW = rowPrev[x - 1] << PBits;
+    int a1 = (x < width ? 1 : 0), NE = rowPrev[x + a1] << PBits;
+    int weight0 = (errors0[yp] * 3 >> 1) + errors0[yp - 1] + errors0[yp + a1];
+    int weight1 = (errors1[yp] * 3 >> 1) + errors1[yp - 1] + errors1[yp + a1];
+    int weight2 = (errors2[yp] * 3 >> 1) + errors2[yp - 1] + errors2[yp + a1];
+    int weight3 = (errors3[yp] * 3 >> 1) + errors3[yp - 1] + errors3[yp + a1];
+
+    uint8_t mxe = quantizedError[yc];
+    mxe = std::max(mxe, quantizedError[yp]);
+    mxe = std::max(mxe, quantizedError[yp - 1]);
+    mxe = std::max(mxe, quantizedError[yp + a1]);
+    if (x > 1) mxe = std::max(mxe, quantizedError[yc - 1]);
+    int mE = mxe;  // at this point 0 <= mxe <= 14,  and  mxe % 2 == 0
+
+    weight0 = error2weight[weight0] * mulWeights0and1_W_[0 + mE];
+    weight1 = error2weight[weight1] * mulWeights0and1_W_[1 + mE];
+    weight2 = error2weight[weight2] * 32;  // Baseline
+    weight3 = error2weight[weight3] * mulWeights3teNE_W_[0 + mE];
+
+    int teW = trueErr[yc];
+    int teN = trueErr[yp];
+    int sumWN = teN + teW;  //  -510<<PBits <= sumWN <= 510<<PBits
+    int teNW = trueErr[yp - 1];
+    int teNE = trueErr[yp + a1];
+
+    if (mE) {
+      if (sumWN * 40 + (teNW + teNE) * mulWeights3teNE_W_[1 + mE] <= 0) ++mE;
+    } else {
+      if (N == W && N == NE)
+        mE = ((sumWN | teNE | teNW) == 0 ? kNumContexts - 1 : 1);
+    }
+    *maxErr = mE;
+
+    prediction0 =
+        W - (sumWN + teNW) * 9 / 32;  // pr's 0 & 1 rely on true errors
+    prediction1 =
+        N - (sumWN + teNE) * 171 / 512;  // clamping not needed, is it?
+    prediction2 = W + NE - N;
+    prediction3 =
+        N + ((N - (rowPP[x] << PBits)) >> 1) + ((W - NW) * 19 - teNW * 13) / 64;
+    assert(LshInt(-255, PBits) <= prediction0 && prediction0 <= 510 << PBits);
+    assert(LshInt(-255, PBits) <= prediction1 && prediction1 <= 510 << PBits);
+    assert(LshInt(-255, PBits) <= prediction2 && prediction2 <= 510 << PBits);
+    assert(LshInt(-255, PBits) <= prediction3 && prediction3 <= 510 << PBits);
+
+    int sumWeights = weight0 + weight1 + weight2 + weight3;
+    // assert(sumWeights>0);  // true if min(error2weight)*min(mulWeights**_W_)
+    // > 0
+
+    int prediction =
+        (prediction0 * weight0 + prediction1 * weight1 + (sumWeights >> 1) +
+         prediction2 * weight2 + prediction3 * weight3) /
+        sumWeights;
+
+    if (((teN ^ teW) | (teN ^ teNE)) > 0)  // if all three have the same sign
+      return (prediction < 0 ? 0 : prediction > maxTpv ? maxTpv : prediction);
+
+    int max = (W > N ? W : N);
+    int min = W + N - max;
+    if (NE > max) max = NE;
+    if (NE < min) min = NE;
+    return (prediction < min ? min : prediction > max ? max : prediction);
+  }
+
+  PIK_INLINE int predict_N_(size_t x, size_t yc, size_t yp, int* maxErr) {
+    if (!rowPrev)
+      return predictY0(x, yc, yp, maxErr);  // OK for Prototype edition
+    if (x == 0)
+      return predictX0(x, yc, yp, maxErr);  // tobe fixed in Production
+
+    int N = rowPrev[x] << PBits, W = rowImg[x - 1] << PBits;  //, NW is not used
+    int a1 = (x < width ? 1 : 0), NE = rowPrev[x + a1] << PBits;
+    int weight0 = errors0[yp] + errors0[yp - 1] + errors0[yp + a1];
+    int weight1 = errors1[yp] + errors1[yp - 1] + errors1[yp + a1];
+    int weight2 = errors2[yp] + errors2[yp - 1] + errors2[yp + a1];
+    int weight3 = errors3[yp] + errors3[yp - 1] + errors3[yp + a1];
+
+    uint8_t mxe = quantizedError[yc];
+    mxe = std::max(mxe, quantizedError[yp]);
+    mxe = std::max(mxe, quantizedError[yp - 1]);
+    mxe = std::max(mxe, quantizedError[yp + a1]);
+    if (x > 1) mxe = std::max(mxe, quantizedError[yc - 1]);
+    int mE = mxe;  // at this point 0 <= mxe <= 14,  and  mxe % 2 == 0
+
+    weight0 = error2weight[weight0] * mulWeights0and1_N_[0 + mE];
+    weight1 = error2weight[weight1] * mulWeights0and1_N_[1 + mE];
+    weight2 = error2weight[weight2] * 32;  // Baseline
+    weight3 = error2weight[weight3] * mulWeights3teNE_N_[0 + mE];
+
+    int teW = trueErr[yc];
+    int teN = trueErr[yp];
+    int sumWN = teN + teW;  //  -510<<PBits <= sumWN <= 510<<PBits
+    int teNW = trueErr[yp - 1];
+    int teNE = trueErr[yp + a1];
+
+    if (mE) {
+      if (sumWN * 40 + teNW * 23 + teNE * mulWeights3teNE_N_[1 + mE] <= 0) ++mE;
+    } else {
+      if (N == W && N == NE)
+        mE = ((sumWN | teNE | teNW) == 0 ? kNumContexts - 1 : 1);
+    }
+    *maxErr = mE;
+
+    prediction0 = N - (sumWN + teNW + teNE) / 4;  // if bigger than 1/4,
+                                                  // clamping would be needed!
+    prediction1 =
+        W - ((teW * 2 + teNW) >> 2);  // pr's 0 & 1 rely on true errors
+    prediction2 = W + NE - N;
+    prediction3 = N + ((N - (rowPP[x] << PBits)) * 47) / 64 - (teN >> 2);
+    assert(LshInt(-255, PBits) <= prediction0 && prediction0 <= 510 << PBits);
+    assert(LshInt(-255, PBits) <= prediction1 && prediction1 <= 510 << PBits);
+    assert(LshInt(-255, PBits) <= prediction2 && prediction2 <= 510 << PBits);
+    assert(LshInt(-255, PBits) <= prediction3 && prediction3 <= 510 << PBits);
+
+    int sumWeights = weight0 + weight1 + weight2 + weight3;
+    // assert(sumWeights>0);  // true if min(error2weight)*min(mulWeights**_N_)
+    // > 0
+
+    int prediction =
+        (prediction0 * weight0 + prediction1 * weight1 + (sumWeights >> 1) +
+         prediction2 * weight2 + prediction3 * weight3) /
+        sumWeights;
+
+    if (((teN ^ teW) | (teN ^ teNE)) > 0)  // if all three have the same sign
+      return (prediction < 0 ? 0 : prediction > maxTpv ? maxTpv : prediction);
+
+    int max = (W > N ? W : N);
+    int min = W + N - max;
+    if (NE > max) max = NE;
+    if (NE < min) min = NE;
+    return (prediction < min ? min : prediction > max ? max : prediction);
+  }
+
+#define Update_Size_And_Errors                                  \
+  esize[maxErr] = s + 1;                                        \
+  trueErr[yc + x] = err;                                        \
+  q = quantized(q);                                             \
+  quantizedError[yc + x] = q;                                   \
+  uint8_t* dp = &diff2error[512 - truePixelValue];              \
+  errors0[1 + yp + x] +=                                        \
+      (errors0[yc + x] = dp[(prediction0 + toRound) >> PBits]); \
+  errors1[1 + yp + x] +=                                        \
+      (errors1[yc + x] = dp[(prediction1 + toRound) >> PBits]); \
+  errors2[1 + yp + x] +=                                        \
+      (errors2[yc + x] = dp[(prediction2 + toRound) >> PBits]); \
+  errors3[1 + yp + x] +=                                        \
+      (errors3[yc + x] = dp[(prediction3 + toRound) >> PBits]);
+
+#define AfterPredictWhenCompressing                 \
+  maxErr >>= maxerrShift;                           \
+  assert(0 <= maxErr && maxErr <= kNumContexts - 1);\
+  int q, truePixelValue = rowImg[x];                \
+  int err = prediction - (truePixelValue << PBits); \
+  size_t s = esize[maxErr];                         \
+  prediction = (prediction + toRound_m1) >> PBits;  \
+  assert(0 <= prediction && prediction <= 255);     \
+  edata[maxErr][s] = q = ToLSB_FRWRD;               \
+  Update_Size_And_Errors  // ++gqe[maxErr];
+
+#define AfterPredictWhenCompressing3                \
+  maxErr >>= maxerrShift;                           \
+  assert(0 <= maxErr && maxErr <= kNumContexts - 1);\
+  int q, truePixelValue = rowImg[x];                \
+  if (planeToCompress != planeToUse) {              \
+    truePixelValue -= (int)rowUse[x] - 0x80;        \
+    truePixelValue &= 0xff;                         \
+    rowImg[x] = truePixelValue;                     \
+  }                                                 \
+  int err = prediction - (truePixelValue << PBits); \
+  size_t s = esize[maxErr];                         \
+  prediction = (prediction + toRound_m1) >> PBits;  \
+  assert(0 <= prediction && prediction <= 255);     \
+  edata[maxErr][s] = q = ToLSB_FRWRD;               \
+  Update_Size_And_Errors  // ++gqe[maxErr];
+
+#define AfterPredictWhenDecompressing                          \
+  maxErr >>= maxerrShift;                                      \
+  assert(0 <= maxErr && maxErr <= kNumContexts - 1);           \
+  assert(0 <= prediction && prediction <= 255 << PBits);       \
+  size_t s = esize[maxErr];                                    \
+  int err, q = edata[maxErr][s], truePixelValue = ToLSB_BKWRD; \
+  rowImg[x] = truePixelValue;                                  \
+  err = prediction - (truePixelValue << PBits);                \
+  Update_Size_And_Errors
+
+#define setRowImgPointers(imgRow)                              \
+  yc ^= kGroupSize, yp = kGroupSize - yc;                      \
+  rowImg = imgRow(groupY + y) + groupX;                        \
+  rowPrev = (y == 0 ? NULL : imgRow(groupY + y - 1) + groupX); \
+  rowPP = (y <= 1 ? rowPrev : imgRow(groupY + y - 2) + groupX);
+
+#define setRowImgPointers3(imgRow)                                        \
+  yc ^= kGroupSize, yp = kGroupSize - yc;                                 \
+  uint8_t const* PIK_RESTRICT rowUse;                                     \
+  rowImg = imgRow(planeToCompress, groupY + y) + groupX;                  \
+  rowUse = imgRow(planeToUse, groupY + y) + groupX;                       \
+  rowPrev =                                                               \
+      (y == 0 ? NULL : imgRow(planeToCompress, groupY + y - 1) + groupX); \
+  rowPP = (y <= 1 ? rowPrev : imgRow(planeToCompress, groupY + y - 2) + groupX);
+
+#define setRowImgPointers3dec(imgRow)                                       \
+  yc ^= kGroupSize, yp = kGroupSize - yc;                                   \
+  rowImg = imgRow(planeToDecompress, groupY + y) + groupX;                  \
+  rowPrev =                                                                 \
+      (y == 0 ? NULL : imgRow(planeToDecompress, groupY + y - 1) + groupX); \
+  rowPP =                                                                   \
+      (y <= 1 ? rowPrev : imgRow(planeToDecompress, groupY + y - 2) + groupX);
+
+  bool Grayscale8bit_compress(const ImageB& img_in, pik::PaddedBytes* bytes) {
+    clock_t start = clock();
+
+    // The code modifies the image for palette so must copy for now.
+    ImageB img = CopyImage(img_in);
+
+    size_t esize[kNumContexts], xsize = img.xsize(), ysize = img.ysize();
+    std::vector<uint8_t> temp_buffer(kGroupSize2plus);
+    compressedData = temp_buffer.data();
+
+    for (int run = 0; run < kNumRuns; ++run) {
+      int freqs[256];
+      memset(freqs, 0, sizeof(freqs));
+      for (size_t y = 0; y < ysize; ++y) {
+        uint8_t* const PIK_RESTRICT rowImg = img.Row(y);
+        for (size_t x = 0; x < xsize; ++x)  // UNROLL and PARALLELIZE ME!
+          ++freqs[rowImg[x]];  // They can also be used for guessing
+                               // photo/nonphoto
+      }
+      int palette[256], count = 0;
+      for (int i = 0; i < 256; ++i)
+        palette[i] = count, count += (freqs[i] ? 1 : 0);
+      int havePalette = (count < 255 ? 1 : 0);  // 255? or 256?
+      maxTpv = (havePalette ? std::min(255, count + 1) : 255) << PBits;
+
+      if (havePalette)
+        for (size_t y = 0; y < ysize; ++y) {
+          uint8_t* const PIK_RESTRICT rowImg = img.Row(y);
+          for (size_t x = 0; x < xsize; ++x)  // UNROLL and PARALLELIZE ME!
+            rowImg[x] = palette[rowImg[x]];
+        }
+
+      for (size_t groupY = 0; groupY < ysize; groupY += kGroupSize) {
+        for (size_t groupX = 0; groupX < xsize; groupX += kGroupSize) {
+          memset(esize, 0, sizeof(esize));
+          size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY);
+          width       = std::min((size_t)kGroupSize, xsize - groupX) - 1;
+          size_t area = yEnd * (width + 1);
+          maxerrShift =
+              (area > 25600 ? 0 :
+               area > 12800 ? 1 : area > 4000 ? 2 : area > 400 ? 3 : 4);
+
+          uint64_t fromN = 0, fromW = 0;
+          for (size_t y = 1; y < yEnd; ++y) {
+            rowImg  = img.Row(groupY + y)     + groupX;
+            rowPrev = img.Row(groupY + y - 1) + groupX;
+            for (size_t x = 1; x <= width; ++x) {
+              int c = rowImg[x];
+              int N = rowPrev[x];
+              int W = rowImg[x - 1];
+              N -= c;
+              W -= c;
+              fromN += N * N;
+              fromW += W * W;
+            }
+          }
+          PredictMode pMode = PM_Regular;
+          if (fromW * 5 < fromN * 4)
+            pMode = PM_West;  // no 'else' to reduce codesize
+          if (fromN * 5 < fromW * 4)
+            pMode = PM_North;  // if (fromN < fromW*0.8)
+          // printf("%c ", pMode);
+
+          if (pMode == PM_Regular)  // Regular mode
+            for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+              setRowImgPointers(img.Row)
+              for (size_t x = 0; x <= width; ++x) {
+                int maxErr,
+                    prediction = predict_R_(x, yc + x - 1, yp + x, &maxErr);
+                AfterPredictWhenCompressing
+              }
+            }
+          else if (pMode == PM_West)  // 'West predicts better' mode
+            for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+              setRowImgPointers(img.Row)
+              for (size_t x = 0; x <= width; ++x) {
+                int maxErr,
+                    prediction = predict_W_(x, yc + x - 1, yp + x, &maxErr);
+                AfterPredictWhenCompressing
+              }
+            }
+          else if (pMode == PM_North)  // 'North predicts better' mode
+            for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+              setRowImgPointers(img.Row)
+              for (size_t x = 0; x <= width; ++x) {
+                int maxErr,
+                    prediction = predict_N_(x, yc + x - 1, yp + x, &maxErr);
+                AfterPredictWhenCompressing
+              }
+            }
+          else {
+          }  // TODO: other prediction modes!
+
+          size_t pos = 0;
+          if (groupY == 0 && groupX == 0) {
+            pos += encodeVarInt(xsize * 2 + havePalette, &compressedData[pos]);
+            pos += encodeVarInt(ysize, &compressedData[pos]);
+            if (havePalette) {  // Save bit 1 if color is present, bit 0 if not
+              const int kBitsPerByte = 8;
+              for (int i = 0; i < 256 / kBitsPerByte; ++i) {
+                int code = 0;
+                for (int j = kBitsPerByte - 1; j >= 0; --j)
+                  code = code * 2 + (freqs[i * 8 + j] ? 1 : 0);
+                compressedData[pos++] = code;
+              }  // for i
+            }    // if (havePalette)
+          }      // if (groupY...)
+          int nC = ((kNumContexts - 1) >> maxerrShift) + 1;
+          for (int i = 0; i < nC; ++i) {
+            if (esize[i]) {
+              // size_t cs = FSE_compress(&compressedDataTmpBuf[0],
+              // sizeof(compressedDataTmpBuf), &edata[i][0], esize[i]);
+              size_t cs;
+              if (!MaybeEntropyEncode(&edata[i][0], esize[i],
+                                      sizeof(compressedDataTmpBuf),
+                                      &compressedDataTmpBuf[0], &cs)) {
+                return PIK_FAILURE("lossless8");
+              }
+              size_t s = (cs <= 1 ? (esize[i] - 1) * 3 + 1 + cs : cs * 3);
+              pos +=
+                  encodeVarInt(i > 0 ? s : s * 3 + pMode, &compressedData[pos]);
+              if (cs == 1)
+                compressedData[pos++] = edata[i][0];
+              else if (cs == 0)
+                memcpy(&compressedData[pos], &edata[i][0], esize[i]),
+                pos += esize[i];
+              else
+                memcpy(&compressedData[pos], &compressedDataTmpBuf[0], cs),
+                pos += cs;
+            } else
+              pos += encodeVarInt(i > 0 ? 0 : pMode, &compressedData[pos]);
+          }  // i
+          if (kNumRuns == 1) {
+            size_t current = bytes->size();
+            bytes->resize(bytes->size() + pos);
+            memcpy(bytes->data() + current, &compressedData[0], pos);
+          }
+        }  // groupX
+      }    // groupY
+    }      // run
+    // for (int i=0; i<kNumContexts; ++i) printf("%3d
+    // ",gqe[i]*1000/(xsize*ysize)); printf("\n");
+
+    if (kNumRuns > 1)
+      printf("%d runs, %1.5f seconds", kNumRuns,
+             ((double)clock() - start) / CLOCKS_PER_SEC);
+    return true;
+  }
+
+  bool Grayscale8bit_decompress(const PaddedBytes& bytes, size_t* bytes_pos,
+                                ImageB* result) {
+    if (*bytes_pos > bytes.size()) return PIK_FAILURE("lossless8");
+    size_t compressedSize = bytes.size() - *bytes_pos;
+    const uint8_t* compressedData = bytes.data() + *bytes_pos;
+
+    size_t maxDecodedSize = kGroupSize * kGroupSize;  // Size of an edata entry
+
+    clock_t start = clock();
+    size_t esize[kNumContexts], xsize, ysize, pos = 0;
+    xsize = decodeVarInt(compressedData, compressedSize, &pos);
+    ysize = decodeVarInt(compressedData, compressedSize, &pos);
+    int havePalette = xsize & 1, count = 256, palette[256];
+    if (havePalette) {
+      const uint8_t* p = &compressedData[pos];
+      pos += 32;
+      if (pos >= compressedSize) return PIK_FAILURE("lossless8");
+      count = 0;
+      for (int i = 0; i < 256; ++i)
+        if (p[i >> 3] & (1 << (i & 7))) palette[count++] = i;
+    }
+    maxTpv = std::min(255, count + 1) << PBits;
+    xsize >>= 1;
+    if (!xsize || !ysize) return PIK_FAILURE("lossless8");
+    // Too large, would run out of memory. Chosen as reasonable limit for pik
+    // while being below default fuzzer memory limit. We check for total pixel
+    // size, and an additional restriction to ysize, because large ysize
+    // consumes more memory due to the scanline padding.
+    if (uint64_t(xsize) * uint64_t(ysize) >= 268435456ull || ysize >= 65536) {
+      return PIK_FAILURE("lossless8");
+    }
+    pik::ImageB img(xsize, ysize);
+
+    for (int run = 0; run < kNumRuns; ++run) {
+      if (kNumRuns > 1) pos = 0;
+      for (size_t groupY = 0; groupY < ysize; groupY += kGroupSize) {
+        for (size_t groupX = 0; groupX < xsize; groupX += kGroupSize) {
+          size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY);
+          width =       std::min((size_t)kGroupSize, xsize - groupX) - 1;
+          size_t area = yEnd * (width + 1);
+          maxerrShift =
+              (area > 25600 ? 0 :
+               area > 12800 ? 1 : area > 4000 ? 2 : area > 400 ? 3 : 4);
+          size_t decompressedSize = 0;  // is used only for the assert()
+
+          if (kNumRuns > 1 && groupY == 0 && groupX == 0) {
+            decodeVarInt(compressedData, compressedSize,
+                         &pos);  // just skip them
+            decodeVarInt(compressedData, compressedSize, &pos);
+            if (havePalette) pos += 32;
+          }
+          PredictMode pMode;
+          int nC = ((kNumContexts - 1) >> maxerrShift) + 1;
+          for (int i = 0; i < nC; ++i) {
+            size_t cs = decodeVarInt(compressedData, compressedSize, &pos);
+            if (i == 0) pMode = (PredictMode)(cs % 3), cs /= 3;
+            if (cs == 0) continue;
+            int mode = cs % 3;
+            cs /= 3;
+            if (mode == 2) {
+              if (pos >= compressedSize) return PIK_FAILURE("lossless8");
+              if (cs > maxDecodedSize) return PIK_FAILURE("lossless8");
+              memset(&edata[i][0], compressedData[pos++], ++cs);
+              decompressedSize += cs;
+            } else if (mode == 1) {
+              if (pos + cs > compressedSize) return PIK_FAILURE("lossless8");
+              if (cs > maxDecodedSize) return PIK_FAILURE("lossless8");
+              memcpy(&edata[i][0], &compressedData[pos], ++cs);
+              decompressedSize += cs, pos += cs;
+            } else {
+              if (pos + cs > compressedSize) return PIK_FAILURE("lossless8");
+              size_t ds;
+              if (!MaybeEntropyDecode(&compressedData[pos], cs, maxDecodedSize,
+                                      &edata[i][0], &ds)) {
+                return PIK_FAILURE("lossless8");
+              }
+              pos += cs;
+              decompressedSize += ds;
+            }
+          }
+          if (decompressedSize != area) return PIK_FAILURE("lossless8");
+          if (groupY + kGroupSize >= ysize && groupX + kGroupSize >= xsize) {
+            /* if the last group */
+            // if (inpSize != pos) return PIK_FAILURE("lossless8");
+          }
+          memset(esize, 0, sizeof(esize));
+
+          if (pMode == PM_Regular)
+            for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+              setRowImgPointers(img.Row)
+              for (size_t x = 0; x <= width; ++x) {
+                int maxErr,
+                    prediction = predict_R_(x, yc + x - 1, yp + x, &maxErr);
+                AfterPredictWhenDecompressing
+              }
+            }
+          else if (pMode == PM_West)
+            for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+              setRowImgPointers(img.Row)
+              for (size_t x = 0; x <= width; ++x) {
+                int maxErr,
+                    prediction = predict_W_(x, yc + x - 1, yp + x, &maxErr);
+                AfterPredictWhenDecompressing
+              }
+            }
+          else if (pMode == PM_North)
+            for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+              setRowImgPointers(img.Row)
+              for (size_t x = 0; x <= width; ++x) {
+                int maxErr,
+                    prediction = predict_N_(x, yc + x - 1, yp + x, &maxErr);
+                AfterPredictWhenDecompressing
+              }
+            }
+        }  // groupX
+      }    // groupY
+      if (havePalette)
+        for (size_t y = 0; y < ysize; ++y) {
+          uint8_t* const PIK_RESTRICT rowImg = img.Row(y);
+          for (size_t x = 0; x < xsize; ++x)  // UNROLL and PARALLELIZE ME!
+            rowImg[x] = palette[rowImg[x]];
+        }
+      *bytes_pos += pos;
+    }  // run
+    if (kNumRuns > 1)
+      printf("%d runs, %1.5f seconds", kNumRuns,
+             ((double)clock() - start) / CLOCKS_PER_SEC);
+    *result = std::move(img);
+    return true;
+  }
+
+  const int PL1 = 0, PL2 = 1, PL3 = 2;
+
+  enum PlaneMethods_30 {  // 8/30 are redundant (left for encoder's convenience)
+    RR_G_B = 0,           // p1=R  p2=G  p3=B
+    RR_GmR_B = 1,         // p2-p1  p3
+    RR_G_BmR = 2,         //   p2  p3-p1
+    RR_GmR_BmR = 3,       // p2-p1 p3-p1
+
+    RR_GmB_B = 4,  // == 22   p2-p3 @ p2
+    RR_G_GmB = 5,  // ~= 12   p2-p3 @ p3
+
+    RR_GmR_Bm2 = 6,  //  p2-p1  p3-(p1+p2)/2
+    RR_Gm2_BmR = 7,  // p2-(p1+p3)/2   p3-p1
+    RR_G_Bm2 = 8,    //   p2    p3-(p1+p2)/2
+    RR_Gm2_B = 9,    // p2-(p1+p3)/2     p3
+
+    R_GG_B = 10,  // p1=G  p2=R  p3=B
+    RmG_GG_B = 11,
+    R_GG_BmG = 12,
+    RmG_GG_BmG = 13,
+
+    RmB_GG_B = 14,  // == 21
+    R_GG_RmB = 15,  // ~=  2
+
+    RmG_GG_Bm2 = 16,
+    Rm2_GG_BmG = 17,
+    R_GG_Bm2 = 18,
+    Rm2_GG_B = 19,
+
+    R_G_BB = 20,  // p1=B  p2=R  p3=G
+    RmB_G_BB = 21,
+    R_GmB_BB = 22,
+    RmB_GmB_BB = 23,
+
+    RmG_G_BB = 24,  // == 11
+    R_RmG_BB = 25,  // ~=  1
+
+    RmB_Gm2_BB = 26,
+    Rm2_GmB_BB = 27,
+    R_Gm2_BB = 28,
+    Rm2_G_BB = 29,
+  };
+  const uint8_t ncMap[30] = {
+    1+2+4,
+    1+0+4,
+    1+2+0,
+    1,
+    1+0+4,
+    1+2+0,
+    1,
+    1,
+    1+2+0,
+    1+0+4,
+
+    1+2+4,
+    0+2+4,
+    1+2+0,
+    0+2+0,
+    0+2+4,
+    1+2+0,
+    0+2+0,
+    0+2+0,
+    1+2+0,
+    0+2+4,
+
+    1+2+4,
+    0+2+4,
+    1+0+4,
+    0+0+4,
+    0+2+4,
+    1+0+4,
+    0+0+4,
+    0+0+4,
+    1+0+4,
+    0+2+4,
+  };
+
+  bool dcmprs512x512(pik::Image3B* img, int planeToDecompress, size_t& pos,
+                     size_t groupY, size_t groupX,
+                     const uint8_t* compressedData, size_t compressedSize,
+                     size_t maxDecodedSize) {
+    size_t esize[kNumContexts], xsize = img->xsize(), ysize = img->ysize();
+    size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY);
+    width =       std::min((size_t)kGroupSize, xsize - groupX) - 1;
+    size_t area = yEnd * (width + 1);
+    maxerrShift =
+        (area > 25600 ? 0 :
+         area > 12800 ? 1 : area > 4000 ? 2 : area > 400 ? 3 : 4);
+    maxTpv = ((ncMap[planeMethod] & (1 << planeToDecompress)) ?
+            numColors[planeToDecompress] - 1 : 255) << PBits;
+    size_t decompressedSize = 0;  // is used only for the assert()
+
+    PredictMode pMode;
+    int nC = ((kNumContexts - 1) >> maxerrShift) + 1;
+    for (int i = 0; i < nC; ++i) {
+      size_t cs = decodeVarInt(compressedData, compressedSize, &pos);
+      if (i == 0) pMode = (PredictMode)(cs % 3), cs /= 3;
+      if (cs == 0) continue;
+      int mode = cs % 3;
+      cs /= 3;
+      if (mode == 2) {
+        if (pos >= compressedSize) return PIK_FAILURE("lossless8");
+        if (cs > maxDecodedSize) return PIK_FAILURE("lossless8");
+        memset(&edata[i][0], compressedData[pos++], ++cs);
+        decompressedSize += cs;
+      } else if (mode == 1) {
+        if (pos + cs > compressedSize) return PIK_FAILURE("lossless8");
+        if (cs > maxDecodedSize) return PIK_FAILURE("lossless8");
+        memcpy(&edata[i][0], &compressedData[pos], ++cs);
+        decompressedSize += cs, pos += cs;
+      } else {
+        if (pos + cs > compressedSize) return PIK_FAILURE("lossless8");
+        size_t ds;
+        if (!MaybeEntropyDecode(&compressedData[pos], cs, maxDecodedSize,
+                                &edata[i][0], &ds)) {
+          return PIK_FAILURE("lossless8");
+        }
+        pos += cs;
+        decompressedSize += ds;
+      }
+    }
+    if (decompressedSize != area) return PIK_FAILURE("lossless8");
+    // if (groupY + kGroupSize >= ysize && groupX + kGroupSize >= xsize)
+    //  /* if the last group */  assert(inpSize == pos);
+
+    memset(esize, 0, sizeof(esize));
+
+    if (pMode == PM_Regular)
+      for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+        setRowImgPointers3dec(img->PlaneRow)
+        for (size_t x = 0; x <= width; ++x) {
+          int maxErr, prediction = predict_R_(x, yc + x - 1, yp + x, &maxErr);
+          AfterPredictWhenDecompressing
+        }
+      }
+    else if (pMode == PM_West)
+      for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+        setRowImgPointers3dec(img->PlaneRow)
+        for (size_t x = 0; x <= width; ++x) {
+          int maxErr, prediction = predict_W_(x, yc + x - 1, yp + x, &maxErr);
+          AfterPredictWhenDecompressing
+        }
+      }
+    else if (pMode == PM_North)
+      for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+        setRowImgPointers3dec(img->PlaneRow)
+        for (size_t x = 0; x <= width; ++x) {
+          int maxErr, prediction = predict_N_(x, yc + x - 1, yp + x, &maxErr);
+          AfterPredictWhenDecompressing
+        }
+      }
+    return true;
+  }
+
+  bool Colorful8bit_decompress(const PaddedBytes& bytes, size_t* bytes_pos,
+                               Image3B* result) {
+    if (*bytes_pos > bytes.size()) return PIK_FAILURE("lossless8");
+    size_t compressedSize = bytes.size() - *bytes_pos;
+    const uint8_t* compressedData = bytes.data() + *bytes_pos;
+
+    size_t maxDecodedSize = kGroupSize * kGroupSize;  // Size of an edata entry
+
+    size_t xsize, ysize, pos0 = 0, imageMethod = 0;
+    xsize = decodeVarInt(compressedData, compressedSize, &pos0);
+    ysize = decodeVarInt(compressedData, compressedSize, &pos0);
+    if (!xsize || !ysize) return PIK_FAILURE("lossless8");
+    // Too large, would run out of memory. Chosen as reasonable limit for pik
+    // while being below default fuzzer memory limit. We check for total pixel
+    // size, and an additional restriction to ysize, because large ysize
+    // consumes more memory due to the scanline padding.
+    if (uint64_t(xsize) * uint64_t(ysize) >= 268435456ull || ysize >= 65536) {
+      return PIK_FAILURE("lossless8");
+    }
+    pik::Image3B img(xsize, ysize);
+    std::vector<int> palette(0x100 * 3);
+
+    clock_t start = clock();
+    for (int run = 0; run < kNumRuns; ++run) {
+      numColors[0] = numColors[1] = numColors[2] = 0x100;
+      size_t pos = pos0;
+      if (xsize * ysize > 4 * 0x100) {  // TODO: smarter decision making here
+        if (pos >= compressedSize)
+          return PIK_FAILURE("lossless8: out of bounds");
+        const uint8_t* p = &compressedData[pos];
+        imageMethod = *p++;
+        if (imageMethod) {
+          ++pos;
+          if (pos+3 >= compressedSize)
+            return PIK_FAILURE("lossless8: out of bounds");
+          numColors[0] = compressedData[pos++] + 1;
+          numColors[1] = compressedData[pos++] + 1;
+          numColors[2] = compressedData[pos++] + 1;
+          p = &compressedData[pos];
+          const uint8_t* p_end = compressedData + compressedSize;
+          for (int channel = 0; channel < 3; ++channel)
+            if (imageMethod & (1 << channel))
+              for (int sb = channel << 8, stop = sb + numColors[channel],
+                       color = 0, x = 0;
+                   x < 0x100; x += 8) {
+                if (p >= p_end) return PIK_FAILURE("lossless8");
+                for (int b = *p++, j = 0; j < 8; ++j)
+                  palette[sb] = color++, sb += b & 1, b >>= 1;
+                if (sb >= stop) break;
+                if (sb + 0x100 - 8 - x == stop) {
+                  for (int i = x; i < 0x100 - 8; ++i) palette[sb++] = color++;
+                  break;
+                }
+              }
+        }
+        pos = p - &compressedData[0];
+      }
+      for (size_t groupY = 0; groupY < ysize; groupY += kGroupSize) {
+        for (size_t groupX = 0; groupX < xsize; groupX += kGroupSize) {
+          if (pos >= compressedSize) return PIK_FAILURE("lossless8");
+          planeMethod = compressedData[pos++];
+          if (!dcmprs512x512(&img, PL1, pos, groupY, groupX, compressedData,
+                             compressedSize, maxDecodedSize))
+            return PIK_FAILURE("lossless8");
+          if (!dcmprs512x512(&img, PL2, pos, groupY, groupX, compressedData,
+                             compressedSize, maxDecodedSize))
+            return PIK_FAILURE("lossless8");
+          if (!dcmprs512x512(&img, PL3, pos, groupY, groupX, compressedData,
+                             compressedSize, maxDecodedSize))
+            return PIK_FAILURE("lossless8");
+
+          uint8_t *PIK_RESTRICT row1, *PIK_RESTRICT row2, *PIK_RESTRICT row3;
+          size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY);
+          size_t xEnd = std::min((size_t)kGroupSize, xsize - groupX);
+
+#define T3bgn                                      \
+  for (size_t y = 0; y < yEnd; ++y) {              \
+    row1 = img.PlaneRow(PL1, groupY + y) + groupX; \
+    row2 = img.PlaneRow(PL2, groupY + y) + groupX; \
+    row3 = img.PlaneRow(PL3, groupY + y) + groupX; \
+    for (size_t x = 0; x < xEnd; ++x) {            \
+      int R = row1[x], G = row2[x], B = row3[x];   \
+      (void)R;                                     \
+      (void)G;                                     \
+      (void)B;
+
+// Close T3bgn above; not using a #define confuses brace matching of editor.
+#define CC \
+  }        \
+  }
+
+          switch (planeMethod) {
+            case 0:
+            case 10:
+            case 20:
+              break;
+            case 1:
+              T3bgn G += R + 0x80;
+              row2[x] = G;
+              CC break;
+            case 2:
+              T3bgn B += R + 0x80;
+              row3[x] = B;
+              CC break;
+            case 3:
+              T3bgn G += R + 0x80;
+              B += R + 0x80;
+              row2[x] = G;
+              row3[x] = B;
+              CC break;
+            case 22:
+            case 4:
+              T3bgn row2[x] = G + B + 0x80;
+              CC break;
+            case 5:
+              T3bgn row3[x] = G - B + 0x80;
+              CC break;
+            case 6:
+              T3bgn row2[x] = G = (G + R + 0x80) & 0xff;
+              row3[x] = B + ((R + G) >> 1) + 0x80;
+              CC break;
+            case 7:
+              T3bgn row3[x] = B = (B + R + 0x80) & 0xff;
+              row2[x] = G + ((R + B) >> 1) + 0x80;
+              CC break;
+            case 8:
+              T3bgn row3[x] = B + ((R + G) >> 1) + 0x80;
+              CC break;
+            case 9:
+              T3bgn row2[x] = G + ((R + B) >> 1) + 0x80;
+              CC break;
+
+            case 24:
+            case 11:
+              T3bgn R += G + 0x80;
+              row1[x] = R;
+              CC break;
+            case 12:
+              T3bgn B += G + 0x80;
+              row3[x] = B;
+              CC break;
+            case 13:
+              T3bgn R += G + 0x80;
+              B += G + 0x80;
+              row1[x] = R;
+              row3[x] = B;
+              CC break;
+            case 21:
+            case 14:
+              T3bgn row1[x] = R + B + 0x80;
+              CC break;
+            case 15:
+              T3bgn row3[x] = R - B + 0x80;
+              CC break;
+
+            case 16:
+              T3bgn row1[x] = R = (R + G + 0x80) & 0xff;
+              row3[x] = B + ((R + G) >> 1) + 0x80;
+              CC break;
+            case 17:
+              T3bgn row3[x] = B = (B + G + 0x80) & 0xff;
+              row1[x] = R + ((B + G) >> 1) + 0x80;
+              CC break;
+            case 18:
+              T3bgn row3[x] = B + ((R + G) >> 1) + 0x80;
+              CC break;
+            case 19:
+              T3bgn row1[x] = R + ((B + G) >> 1) + 0x80;
+              CC break;
+
+            case 23:
+              T3bgn G += B + 0x80;
+              R += B + 0x80;
+              row1[x] = R;
+              row2[x] = G;
+              CC break;
+            case 25:
+              T3bgn row2[x] = R - G + 0x80;
+              CC break;
+            case 26:
+              T3bgn row1[x] = R = (R + B + 0x80) & 0xff;
+              row2[x] = G + ((B + R) >> 1) + 0x80;
+              CC break;
+            case 27:
+              T3bgn row2[x] = G = (G + B + 0x80) & 0xff;
+              row1[x] = R + ((B + G) >> 1) + 0x80;
+              CC break;
+            case 28:
+              T3bgn row2[x] = G + ((B + R) >> 1) + 0x80;
+              CC break;
+            case 29:
+              T3bgn row1[x] = R + ((B + G) >> 1) + 0x80;
+              CC break;
+          }
+        }  // groupX
+      }    // groupY
+// Disabled, because it is actually useful that the decoder supports decoding
+// its own stream when contained inside a bigger stream and knows the correct
+// end position.
+
+      for (int channel = 0; channel < 3; ++channel)
+        if (imageMethod & (1 << channel)) {
+          int* p = &palette[0x100 * channel];
+          for (size_t y = 0; y < ysize; ++y) {
+            uint8_t* const PIK_RESTRICT rowImg = img.PlaneRow(channel, y);
+            for (size_t x = 0; x < xsize; ++x)  // UNROLL AND PARALLELIZE ME!
+              rowImg[x] = p[rowImg[x]];
+          }
+        }
+      *bytes_pos += pos;
+    }  // run
+    if (kNumRuns > 1)
+      printf("%d runs, %1.5f seconds", kNumRuns,
+             ((double)clock() - start) / CLOCKS_PER_SEC);
+    *result = std::move(img);
+    return true;
+  }
+
+  uint32_t cmprs512x512(pik::Image3B& img, int planeToCompress, int planeToUse,
+                        size_t groupY, size_t groupX,
+                        uint8_t* compressedOutput) {
+    size_t esize[kNumContexts], xsize = img.xsize(), ysize = img.ysize();
+    memset(esize, 0, sizeof(esize));
+    size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY);
+    width =       std::min((size_t)kGroupSize, xsize - groupX) - 1;
+    size_t area = yEnd * (width + 1);
+    maxerrShift =
+        (area > 25600 ? 0 :
+         area > 12800 ? 1 : area > 4000 ? 2 : area > 400 ? 3 : 4);
+    maxTpv =
+     (planeToCompress==planeToUse? numColors[planeToCompress]-1 : 255) << PBits;
+
+    uint64_t fromN = 0, fromW = 0;
+    for (size_t y = 1; y < yEnd; ++y) {
+      rowImg  = img.PlaneRow(planeToCompress, groupY + y) + groupX;
+      rowPrev = img.PlaneRow(planeToCompress, groupY + y - 1) + groupX;
+      for (size_t x = 1; x <= width; ++x) {
+        int c = rowImg[x];
+        int N = rowPrev[x];
+        int W = rowImg[x - 1];
+        N -= c;
+        W -= c;
+        fromN += N * N;
+        fromW += W * W;
+      }
+    }
+    PredictMode pMode = PM_Regular;
+    if (fromW * 5 < fromN * 4) pMode = PM_West;  // no 'else' to reduce codesize
+    if (fromN * 5 < fromW * 4) pMode = PM_North;  // if (fromN < fromW*0.8)
+    // printf("%c ", pMode);
+
+    if (pMode == PM_Regular)  // Regular mode
+      for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+        setRowImgPointers3(img.PlaneRow)
+        for (size_t x = 0; x <= width; ++x) {
+          int maxErr, prediction = predict_R_(x, yc + x - 1, yp + x, &maxErr);
+          AfterPredictWhenCompressing3
+        }
+      }
+    else if (pMode == PM_West)  // 'West predicts better' mode
+      for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+        setRowImgPointers3(img.PlaneRow)
+        for (size_t x = 0; x <= width; ++x) {
+          int maxErr, prediction = predict_W_(x, yc + x - 1, yp + x, &maxErr);
+          AfterPredictWhenCompressing3
+        }
+      }
+    else if (pMode == PM_North)  // 'North predicts better' mode
+      for (size_t y = 0, yc = 0, yp; y < yEnd; ++y) {
+        setRowImgPointers3(img.PlaneRow)
+        for (size_t x = 0; x <= width; ++x) {
+          int maxErr, prediction = predict_N_(x, yc + x - 1, yp + x, &maxErr);
+          AfterPredictWhenCompressing3
+        }
+      }
+    else {
+    }  // TODO: other prediction modes!
+
+    size_t pos = 0;
+    int nC = ((kNumContexts - 1) >> maxerrShift) + 1;
+    for (int i = 0; i < nC; ++i) {
+      if (esize[i]) {
+        // size_t cs = FSE_compress(&compressedDataTmpBuf[0],
+        // sizeof(compressedDataTmpBuf), &edata[i][0], esize[i]);
+        size_t cs;
+        if (!MaybeEntropyEncode(&edata[i][0], esize[i],
+                                sizeof(compressedDataTmpBuf),
+                                &compressedDataTmpBuf[0], &cs)) {
+          return PIK_FAILURE("lossless8");
+        }
+
+        size_t s = (cs <= 1 ? (esize[i] - 1) * 3 + 1 + cs : cs * 3);
+        pos += encodeVarInt(i > 0 ? s : s * 3 + pMode, &compressedOutput[pos]);
+        if (cs == 1)
+          compressedOutput[pos++] = edata[i][0];
+        else if (cs == 0)
+          memcpy(&compressedOutput[pos], &edata[i][0], esize[i]),
+              pos += esize[i];
+        else
+          memcpy(&compressedOutput[pos], &compressedDataTmpBuf[0], cs),
+              pos += cs;
+      } else {
+        pos += encodeVarInt(i > 0 ? 0 : pMode, &compressedOutput[pos]);
+      }
+    }  // i
+    return pos;
+  }
+
+#define Fsc(buf, bufsize) \
+  {                       \
+    datas[sp] = buf;      \
+    sizes[sp] = bufsize;  \
+    ++sp;                 \
+  }
+
+#define FWr(buf, bufsize)                            \
+  {                                                  \
+    if (kNumRuns == 1) {                             \
+      size_t current = bytes->size();                \
+      bytes->resize(bytes->size() + bufsize);        \
+      memcpy(bytes->data() + current, buf, bufsize); \
+    }                                                \
+  }
+
+#define FWrByte(b)    \
+  {                   \
+    uint8_t byte = b; \
+    FWr(&byte, 1);    \
+  }
+
+  bool Colorful8bit_compress(const Image3B& img_in, pik::PaddedBytes* bytes) {
+    clock_t start = clock();
+
+    // The code modifies the image for palette so must copy for now.
+    Image3B img = CopyImage(img_in);
+
+    std::vector<uint8_t> temp_buffer(kGroupSize2plus * 6);
+    compressedData = temp_buffer.data();
+
+    for (int run = 0; run < kNumRuns; ++run) {
+      size_t xsize = img.xsize(), ysize = img.ysize(), pos;
+      pos  = encodeVarInt(xsize, &compressedData[0]);
+      pos += encodeVarInt(ysize, &compressedData[pos]);
+      FWr(&compressedData[0], pos)
+      numColors[0] = numColors[1] = numColors[2] = 0x100;
+
+      if (xsize * ysize > 4 * 0x100) {  // TODO: smarter decision making here
+        // Let's check whether the image should be 'palettized',
+        // because the range is 64k, but 25% or more of the range is unused.
+        uint8_t flags = 0, bits[3 * 0x100 / 8], *pb = &bits[0];
+        uint32_t palette123[3 * 0x100];
+
+#if 1  // Enable/disable the CompactChannel transform(per-channel palettization)
+        memset(bits, 0, sizeof(bits));
+        memset(palette123, 0, sizeof(palette123));
+        for (int channel = 0; channel < 3; ++channel) {
+          uint32_t i, first, count, *palette = &palette123[0x100 * channel];
+          for (size_t y = 0; y < ysize; ++y) {
+            uint8_t* const PIK_RESTRICT rowImg = img.PlaneRow(channel, y);
+            for (size_t x = 0; x < xsize; ++x)  // UNROLL AND PARALLELIZE ME!
+              palette[rowImg[x]] = 1;
+          }
+          // count the number of pixel values present in the image
+          for (i = 0; i < 0x100; ++i)
+            if (palette[i])  break;
+          for (first = i, count = 0; i < 0x100; ++i)
+            if (palette[i])  palette[i] = count++;
+          // printf("count=%5d, %f%%\n", count, count * 100. / 256);
+          numColors[channel] = count;
+          if (count >= 255) continue;  // TODO: better decision making
+          flags += 1 << channel;
+          palette[first] = 1;
+          for (int sb = 0, x = 0; x < 0x100; x += 8) {
+            uint32_t b = 0, v;
+            for (int y = x + 7; y >= x; --y)
+              v = (palette[y] ? 1 : 0), b += b + v, sb += v;
+            *pb++ = b;  // TODO: Compress the bits, not store!
+            if (sb >= count || sb + 0x100 - 8 - x == count) break;
+          }
+          palette[first] = 0;
+        }  // for channel
+#endif
+
+        FWrByte(flags);  // As of now (Feb.2019) ImageMethod==flags
+        if (flags) {
+          for (int channel = 0; channel < 3; ++channel)
+          if (flags & (1 << channel)) {
+            uint32_t* palette = &palette123[0x100 * channel];
+            for (size_t y = 0; y < ysize; ++y) {
+              uint8_t* const PIK_RESTRICT rowImg = img.PlaneRow(channel, y);
+              for (size_t x = 0; x < xsize; ++x)  // UNROLL AND PARALLELIZE ME!
+                rowImg[x] = palette[rowImg[x]];
+            }
+          }
+          compressedData[0] = numColors[0] - 1;
+          compressedData[1] = numColors[1] - 1;
+          compressedData[2] = numColors[2] - 1;
+          FWr(&compressedData[0], 3);
+          FWr(&bits[0], sizeof(uint8_t) * (pb - &bits[0]));
+        }  // if (flags)
+        else numColors[0] = numColors[1] = numColors[2] = 0x100;
+      }    // if (xsize*ysize > 4*0x100)
+      uint8_t* compressedData2 = &compressedData[kGroupSize2plus];
+      uint8_t* compressedData3 = &compressedData[kGroupSize2plus * 2];
+      uint8_t* cd4 = &compressedData[kGroupSize2plus * 3];
+      uint8_t* cd5 = &compressedData[kGroupSize2plus * 4];
+      uint8_t* cd6 = &compressedData[kGroupSize2plus * 5];
+      for (size_t groupY = 0; groupY < ysize; groupY += kGroupSize) {
+        for (size_t groupX = 0; groupX < xsize; groupX += kGroupSize) {
+          size_t S1, S2, S3, S4, S5, S6, s1, s2, s3, p1, p2, p3, sizes[3];
+          uint8_t *cd1, *cd2, *cd3, *datas[3];
+          int sp = 0, planeMethod;  // Here we try guessing which one of the 30
+                    // PlaneMethods is best, after trying just six color planes.
+
+          s1 = cmprs512x512(img, PL1, PL1, groupY, groupX, compressedData);
+          s2 = cmprs512x512(img, PL2, PL2, groupY, groupX, compressedData2);
+          s3 = cmprs512x512(img, PL3, PL3, groupY, groupX, compressedData3);
+
+          S1 = s2, p1 = PL2, cd1 = compressedData2, planeMethod = 10;
+          S2 = s1, p2 = PL1, cd2 = compressedData;
+          S3 = s3, p3 = PL3, cd3 = compressedData3;
+          if (s1 < s2 * 63 / 64 && s1 < s3) {
+            S1 = s1, p1 = PL1, cd1 = compressedData, planeMethod = 0;
+            S2 = s2, p2 = PL2, cd2 = compressedData2;
+            S3 = s3, p3 = PL3, cd3 = compressedData3;
+          } else if (s3 < s2 * 63 / 64 && s3 < s1) {
+            S1 = s3, p1 = PL3, cd1 = compressedData3, planeMethod = 20;
+            S2 = s1, p2 = PL1, cd2 = compressedData;
+            S3 = s2, p3 = PL2, cd3 = compressedData2;
+          }
+          S4 = cmprs512x512(img, p2, p1, groupY, groupX, cd4); /* R-G+0x80 */
+          S5 = cmprs512x512(img, p3, p1, groupY, groupX, cd5); /* B-G+0x80 */
+          if (p1 == PL1)
+            Fsc(cd1, S1)
+
+          if (S4 >= S2 && S5 >= S3) {
+              S6 = cmprs512x512(img, p2, p3, groupY, groupX, cd6); // R-B+0x80
+              if (S6 >= S2 && S6 >= S3)
+                Fsc(cd2, S2)
+              else if (S3 > S2 && S3 > S6)
+                Fsc(cd2, S2)
+              else
+                Fsc(cd6, S6)
+              if (p1 == PL2)
+                Fsc(cd1, S1)
+              if (S6 >= S2 && S6 >= S3)
+                Fsc(cd3, S3)
+              else if (S3 > S2 && S3 > S6) {
+                Fsc(cd6, S6)
+                planeMethod += 5;
+              } else {
+                Fsc(cd3, S3)
+                planeMethod += 4;
+              }
+          }
+          else {
+            size_t yEnd = std::min((size_t)kGroupSize, ysize - groupY) + groupY;
+            size_t xEnd = std::min((size_t)kGroupSize, xsize - groupX);
+            size_t p2or3 = (S5 < S4 ? p2 : p3);
+            for (size_t y = groupY; y < yEnd; ++y) {
+              uint8_t* PIK_RESTRICT row1 = img.PlaneRow(p1,    y) + groupX;
+              uint8_t* PIK_RESTRICT row2 = img.PlaneRow(p2or3, y) + groupX;
+              for (size_t x = 0; x < xEnd; ++x) {
+                uint32_t v1 = row1[x], v2 = (row2[x] + v1 + 0x80) & 0xff;
+                row2[x] = ((v1 + v2) >> 1) - v1 + 0x80;
+              }
+            }
+            if (S5 < S4) {
+              S6 = cmprs512x512(img, p3, p2, groupY, groupX, cd6);  // B-(R+G)/2
+              if (S4 < S2)
+                Fsc(cd4, S4)
+              else
+                Fsc(cd2, S2)
+              if (p1 == PL2)
+                Fsc(cd1, S1)
+              if (S3 <= S5 && S3 <= S6) {
+                Fsc(cd3, S3) planeMethod += 1;
+              }
+              else if (S5 <= S6) {
+                Fsc(cd5, S5) planeMethod += (S4 < S2 ? 3 : 2);
+              } else {
+                Fsc(cd6, S6) planeMethod += (S4 < S2 ? 6 : 8);
+              }
+            } else {
+              S6 = cmprs512x512(img, p2, p3, groupY, groupX, cd6);  // R-(B+G)/2
+              if (S2 <= S4 && S2 <= S6) {
+                Fsc(cd2, S2) planeMethod += 2;
+              } else if (S4 <= S6) {
+                Fsc(cd4, S4) planeMethod += (S5 < S3 ? 3 : 1);
+              } else {
+                Fsc(cd6, S6) planeMethod += (S5 < S3 ? 7 : 9);
+              }
+              if (p1 == PL2)
+                Fsc(cd1, S1)
+              if (S5 < S3)
+                Fsc(cd5, S5)
+              else
+                Fsc(cd3, S3)
+            }
+          }
+          if (p1 == PL3)
+            Fsc(cd1, S1)
+          FWrByte(planeMethod);  // printf("%2d ", planeMethod);
+          FWr(datas[0], sizes[0])
+          FWr(datas[1], sizes[1])
+          FWr(datas[2], sizes[2])
+        }  // groupX
+      }    // groupY
+    }     // run
+    if (kNumRuns > 1)
+      printf("%d runs, %1.5f seconds", kNumRuns,
+             ((double)clock() - start) / CLOCKS_PER_SEC);
+    return true;
+  }
+
+};  // struct State
+
+}  // namespace
+
+bool Grayscale8bit_compress(const ImageB& img, PaddedBytes* bytes) {
+  std::unique_ptr<State> state(new State());
+  return state->Grayscale8bit_compress(img, bytes);
+}
+
+bool Grayscale8bit_decompress(const PaddedBytes& bytes, size_t* pos,
+                              ImageB* result) {
+  std::unique_ptr<State> state(new State());
+  return state->Grayscale8bit_decompress(bytes, pos, result);
+}
+
+bool Colorful8bit_compress(const Image3B& img, PaddedBytes* bytes) {
+  std::unique_ptr<State> state(new State());
+  return state->Colorful8bit_compress(img, bytes);
+}
+
+bool Colorful8bit_decompress(const PaddedBytes& bytes, size_t* pos,
+                             Image3B* result) {
+  std::unique_ptr<State> state(new State());
+  return state->Colorful8bit_decompress(bytes, pos, result);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/lossless8.h b/codec/L2/demos/pikEnc/host/pik/lossless8.h
new file mode 100755
index 0000000000..6f131e7099
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/lossless8.h
@@ -0,0 +1,26 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// @author Alexander Rhatushnyak
+
+#ifndef PIK_LOSSLESS8_H_
+#define PIK_LOSSLESS8_H_
+
+#include "pik/image.h"
+#include "pik/padded_bytes.h"
+
+namespace pik {
+
+bool Grayscale8bit_compress(const ImageB& img, PaddedBytes* bytes);
+bool Grayscale8bit_decompress(const PaddedBytes& bytes, size_t* pos,
+                              ImageB* result);
+
+bool Colorful8bit_compress(const Image3B& img, PaddedBytes* bytes);
+bool Colorful8bit_decompress(const PaddedBytes& bytes, size_t* pos,
+                              Image3B* result);
+}  // namespace pik
+
+#endif  // PIK_LOSSLESS8_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/lossless_entropy.cc b/codec/L2/demos/pikEnc/host/pik/lossless_entropy.cc
new file mode 100755
index 0000000000..38cdadcbd2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/lossless_entropy.cc
@@ -0,0 +1,236 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/lossless_entropy.h"
+#include "pik/padded_bytes.h"
+
+#define PIK_ENTROPY_CODER_FSE 1  // tANS; smallest results for DC
+#define PIK_ENTROPY_CODER_PIK 2  // rANS
+// Potentially helpful for synthetic images but not DC
+#define PIK_ENTROPY_CODER_BROTLI 3
+
+#ifndef PIK_ENTROPY_CODER
+#define PIK_ENTROPY_CODER PIK_ENTROPY_CODER_FSE
+#endif
+
+#if PIK_ENTROPY_CODER == PIK_ENTROPY_CODER_FSE
+//#include "fse_wrapper.h"
+#include "FiniteStateEntropy/lib/fse.h"
+#elif PIK_ENTROPY_CODER == PIK_ENTROPY_CODER_PIK
+#include "pik/entropy_coder.h"
+#elif PIK_ENTROPY_CODER == PIK_ENTROPY_CODER_BROTLI
+#include "pik/brotli.h"
+#else
+#error "Add include for entropy coder"
+#endif
+
+namespace pik {
+
+size_t encodeVarInt(size_t value, uint8_t* output) {
+  size_t outputSize = 0;
+  // While more than 7 bits of data are left,
+  // store 7 bits and set the next byte flag
+  while (value > 127) {
+    // |128: Set the next byte flag
+    output[outputSize++] = ((uint8_t)(value & 127)) | 128;
+    // Remove the seven bits we just wrote
+    value >>= 7;
+  }
+  output[outputSize++] = ((uint8_t)value) & 127;
+  return outputSize;
+}
+
+size_t decodeVarInt(const uint8_t* input, size_t inputSize, size_t* pos) {
+  size_t i, ret = 0;
+  for (i = 0; *pos + i < inputSize && i < 10; ++i) {
+    ret |= uint64_t(input[*pos + i] & 127) << uint64_t(7 * i);
+    // If the next-byte flag is not set, stop
+    if ((input[*pos + i] & 128) == 0) break;
+  }
+  // TODO: Return a decoding error if i == 10.
+  *pos += i + 1;
+  return ret;
+}
+
+#if PIK_ENTROPY_CODER == PIK_ENTROPY_CODER_FSE
+
+bool MaybeEntropyEncode(const uint8_t* data, size_t size, size_t out_capacity,
+                        uint8_t* out, size_t* out_size) {
+  size_t cs = FSE_compress2(out, out_capacity, data, size, 255,
+                            /*FSE_MAX_TABLELOG=*/12);
+  if (FSE_isError(cs)) {
+    printf("FSE enc error: %s !!!\n", FSE_getErrorName(cs));
+    return PIK_FAILURE("FSE enc error");
+  }
+  *out_size = cs;
+  return true;
+}
+
+bool MaybeEntropyDecode(const uint8_t* data, size_t size, size_t out_capacity,
+                        uint8_t* out, size_t* out_size) {
+  size_t ds = FSE_decompress(out, out_capacity, data, size);
+  if (FSE_isError(ds)) {
+    printf("FSE dec error: %s !!!\n", FSE_getErrorName(ds));
+    return PIK_FAILURE("FSE dec error");
+  }
+  *out_size = ds;
+  return true;
+}
+
+#elif PIK_ENTROPY_CODER == PIK_ENTROPY_CODER_PIK
+
+// Entropy encode with pik ANS
+bool EntropyEncodePikANS(const uint8_t* data, size_t size,
+                         std::vector<uint8_t>* result) {
+  static const int kAlphabetSize = 256;
+  static const int kContext = 0;
+
+  std::vector<int> histogram(kAlphabetSize, 0);
+  for (size_t i = 0; i < size; i++) {
+    histogram[data[i]]++;
+  }
+  size_t cost_bound =
+      1000 + 4 * size + 8 +
+      ((size_t)ANSPopulationCost(histogram.data(), kAlphabetSize, size) + 7) /
+          8;
+  result->resize(cost_bound, 0);
+
+  uint8_t* storage = result->data();
+  size_t pos = 0;
+
+  pos += encodeVarInt(size, storage + pos);
+
+  std::vector<ANSEncodingData> encoding_codes(1);
+  size_t bitpos = 0;
+  encoding_codes[0].BuildAndStore(&histogram[0], histogram.size(), &bitpos,
+                                  storage + pos);
+
+  std::vector<uint8_t> dummy_context_map;
+  dummy_context_map.push_back(0);  // only 1 histogram
+  ANSSymbolWriter writer(encoding_codes, dummy_context_map, &bitpos,
+                         storage + pos);
+  for (size_t i = 0; i < size; i++) {
+    writer.VisitSymbol(data[i], kContext);
+  }
+  writer.FlushToBitStream();
+  pos += ((bitpos + 7) >> 3);
+  result->resize(pos);
+
+  return true;
+}
+
+// Entropy decode with pik ANS
+bool EntropyDecodePikANS(const uint8_t* data, size_t size,
+                         std::vector<uint8_t>* result) {
+  static const int kContext = 0;
+  size_t pos = 0;
+  size_t num_symbols = decodeVarInt(data, size, &pos);
+  if (pos >= size) {
+    return PIK_FAILURE("lossless pik ANS decode failed");
+  }
+  // TODO(lode): instead take expected decoded size as function parameter
+  if (num_symbols > 16777216) {
+    // Avoid large allocations, we never expect this many symbols for
+    // the limited group sizes.
+    return PIK_FAILURE("lossless pik ANS decode too large");
+  }
+
+  BitReader br(data + pos, size - pos);
+  ANSCode codes;
+  if (!DecodeANSCodes(1, 256, &br, &codes)) {
+    return PIK_FAILURE("lossless pik ANS decode failed");
+  }
+
+  result->resize(num_symbols);
+  ANSSymbolReader reader(&codes);
+  for (size_t i = 0; i < num_symbols; i++) {
+    br.FillBitBuffer();
+    int read_symbol = reader.ReadSymbol(kContext, &br);
+    (*result)[i] = read_symbol;
+  }
+  if (!reader.CheckANSFinalState()) {
+    return PIK_FAILURE("lossless pik ANS decode final state failed");
+  }
+
+  return true;
+}
+
+bool IsRLECompressible(const uint8_t* data, size_t size) {
+  if (size < 4) return false;
+  uint8_t first = data[0];
+  for (size_t i = 1; i < size; i++) {
+    if (data[i] != first) return false;
+  }
+  return true;
+}
+
+// TODO(lode): avoid the copying between std::vector and data.
+// Entropy encode with pik ANS
+bool MaybeEntropyEncode(const uint8_t* data, size_t size, size_t out_capacity,
+                        uint8_t* out, size_t* out_size) {
+  if (IsRLECompressible(data, size)) {
+    *out_size = 1;  // Indicate the codec should use RLE instead,
+    return true;
+  }
+  std::vector<uint8_t> result;
+  if (!EntropyEncodePikANS(data, size, &result)) {
+    return PIK_FAILURE("lossless entropy encoding failed");
+  }
+  if (result.size() > size) {
+    *out_size = 0;  // Indicate the codec should use uncompressed mode instead.
+    return true;
+  }
+  if (result.size() > out_capacity) {
+    return PIK_FAILURE("lossless entropy encoding out of capacity");
+  }
+  memcpy(out, result.data(), result.size());
+  *out_size = result.size();
+  return true;
+}
+
+// Entropy decode with pik ANS
+bool MaybeEntropyDecode(const uint8_t* data, size_t size, size_t out_capacity,
+                        uint8_t* out, size_t* out_size) {
+  std::vector<uint8_t> result;
+  if (!EntropyDecodePikANS(data, size, &result)) {
+    return PIK_FAILURE("lossless entropy decoding failed");
+  }
+  if (result.size() > out_capacity) {
+    return PIK_FAILURE("lossless entropy encoding out of capacity");
+  }
+  memcpy(out, result.data(), result.size());
+  *out_size = result.size();
+  return true;
+}
+
+#elif PIK_ENTROPY_CODER == PIK_ENTROPY_CODER_BROTLI
+
+bool MaybeEntropyEncode(const uint8_t* data, size_t size, size_t out_capacity,
+                        uint8_t* out, size_t* out_size) {
+  *out_size = 0;
+  PIK_RETURN_IF_ERROR(BrotliCompress(11, data, size, out, out_size));
+  if (*out_size > out_capacity) {
+    return PIK_FAILURE("MaybeEntropyEncode exceeded buffer");
+  }
+  return true;
+}
+
+bool MaybeEntropyDecode(const uint8_t* data, size_t size, size_t out_capacity,
+                        uint8_t* out, size_t* out_size) {
+  size_t bytes_read = 0;
+  PaddedBytes padded_out;
+  PIK_RETURN_IF_ERROR(
+      BrotliDecompress(data, size, out_capacity, &bytes_read, &padded_out));
+  *out_size = padded_out.size();
+  memcpy(out, padded_out.data(), padded_out.size());
+  return true;
+}
+
+#else
+#error "Implement all PIK_ENTROPY_CODER"
+#endif
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/lossless_entropy.h b/codec/L2/demos/pikEnc/host/pik/lossless_entropy.h
new file mode 100755
index 0000000000..e34869d2d1
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/lossless_entropy.h
@@ -0,0 +1,36 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_LOSSLESS_ENTROPY_H_
+#define PIK_LOSSLESS_ENTROPY_H_
+
+#include "pik/padded_bytes.h"
+
+namespace pik {
+
+size_t encodeVarInt(size_t value, uint8_t* output);
+
+size_t decodeVarInt(const uint8_t* input, size_t inputSize, size_t* pos);
+
+// TODO(janwas): output to PaddedBytes for compatibility with brotli.h.
+
+// Output size can have special meaning, in each case you must encode the
+// data differently yourself and EntropyDecode will not be able to decode it.
+// If 0, then compression was not able to reduce size and you should output
+// uncompressed.
+// If 1, then the input data has exactly one byte repeated size times, and
+// you must RLE compress it (encode the amount of times the one value repeats)
+bool MaybeEntropyEncode(const uint8_t* data, size_t size, size_t out_capacity,
+                        uint8_t* out, size_t* out_size);
+
+// Does not know or return the compressed size, must be known from external
+// source.
+bool MaybeEntropyDecode(const uint8_t* data, size_t size, size_t out_capacity,
+                        uint8_t* out, size_t* out_size);
+
+}  // namespace pik
+
+#endif  // PIK_LOSSLESS_ENTROPY_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/metadata.cc b/codec/L2/demos/pikEnc/host/pik/metadata.cc
new file mode 100755
index 0000000000..7df17a81cf
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/metadata.cc
@@ -0,0 +1,16 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/metadata.h"
+
+#include "pik/fields.h"
+
+namespace pik {
+
+Transcoded::Transcoded() { Bundle::Init(this); }
+Metadata::Metadata() { Bundle::Init(this); }
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/metadata.h b/codec/L2/demos/pikEnc/host/pik/metadata.h
new file mode 100755
index 0000000000..08d5f4e6d3
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/metadata.h
@@ -0,0 +1,82 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_METADATA_H_
+#define PIK_METADATA_H_
+
+// Image metadata stored in FileHeader and CodecInOut.
+
+#include <stdint.h>
+
+#include "pik/color_encoding.h"
+#include "pik/field_encodings.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_params.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Optional metadata about the original image source.
+struct Transcoded {
+  Transcoded();
+  static const char* Name() { return "Transcoded"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    if (visitor->AllDefault(*this, &all_default)) return true;
+
+    visitor->U32(0x05A09088, 8, &original_bit_depth);
+    PIK_RETURN_IF_ERROR(visitor->VisitNested(&original_color_encoding));
+    visitor->U32(0x84828180u, 0, &original_bytes_per_alpha);
+
+    return true;
+  }
+
+  bool all_default;
+
+  uint32_t original_bit_depth;            // = CodecInOut.dec_bit_depth
+  ColorEncoding original_color_encoding;  // = io->dec_c_original in the encoder
+  // TODO(lode): This should use bits instead of bytes, 1-bit alpha channel
+  //             images exist and may be desired by users using this feature.
+  // Alpha bytes per channel of original image (not necessarily the same as
+  // the encoding used in the pik file).
+  uint32_t original_bytes_per_alpha = 0;
+};
+
+struct Metadata {
+  Metadata();
+  static const char* Name() { return "Metadata"; }
+
+  template <class Visitor>
+  Status VisitFields(Visitor* PIK_RESTRICT visitor) {
+    if (visitor->AllDefault(*this, &all_default)) return true;
+
+    PIK_RETURN_IF_ERROR(visitor->VisitNested(&transcoded));
+
+    // 100, 250, 4000 are common; don't anticipate more than 10,000.
+    visitor->U32(0x08D08582, kDefaultIntensityTarget / 50, &target_nits_div50);
+
+    visitor->Bytes(BytesEncoding::kBrotli, &exif);
+    visitor->Bytes(BytesEncoding::kBrotli, &iptc);
+    visitor->Bytes(BytesEncoding::kBrotli, &xmp);
+
+    return true;
+  }
+
+  bool all_default;
+
+  Transcoded transcoded;
+
+  uint32_t target_nits_div50;
+
+  PaddedBytes exif;
+  PaddedBytes iptc;
+  PaddedBytes xmp;
+};
+
+}  // namespace pik
+
+#endif  // PIK_METADATA_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/multipass_handler.h b/codec/L2/demos/pikEnc/host/pik/multipass_handler.h
new file mode 100755
index 0000000000..f9bb28d09e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/multipass_handler.h
@@ -0,0 +1,164 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_MULTIPASS_HANDLER_H_
+#define PIK_MULTIPASS_HANDLER_H_
+
+#include "pik/ac_strategy.h"
+#include "pik/block_dictionary.h"
+#include "pik/codec.h"
+#include "pik/color_correlation.h"
+#include "pik/color_encoding.h"
+#include "pik/common.h"
+#include "pik/compressed_image_fwd.h"
+#include "pik/data_parallel.h"
+#include "pik/headers.h"
+#include "pik/image.h"
+#include "pik/pik_params.h"
+#include "pik/quant_weights.h"
+#include "pik/quantizer.h"
+#include "pik/status.h"
+
+// Defines how multi-pass images should be encoded and decoded.
+
+namespace pik {
+
+class MultipassManager;
+
+// MultipassHandler is a child object of MultipassManager. It is bound to
+// specific group (see GetGroupHandler) and is used to perform operations over
+// that group region.
+class MultipassHandler {
+ public:
+  virtual ~MultipassHandler() = default;
+
+  virtual const Rect& GroupRect() = 0;
+  virtual const Rect& PaddedGroupRect() = 0;
+  Rect BlockGroupRect() {
+    const Rect& r = PaddedGroupRect();
+    return Rect(r.x0() / kBlockDim, r.y0() / kBlockDim, r.xsize() / kBlockDim,
+                r.ysize() / kBlockDim);
+  }
+
+  // Progressive mode.
+  virtual std::vector<Image3S> SplitACCoefficients(
+      Image3S&& ac, const AcStrategyImage& ac_strategy) {
+    std::vector<Image3S> ret;
+    ret.push_back(std::move(ac));
+    return ret;
+  }
+
+  // Returns the MultipassManager this handler was created by.
+  virtual MultipassManager* Manager() = 0;
+
+ private:
+  DequantMatrices default_matrices_{/*need_inv_matrices=*/false};
+  Quantizer quantizer_{&default_matrices_, 0, 0};
+};
+
+// MultipassManager holds information about passes and manages
+// MultipassHandlers. It is assumed that parallelization goes below the manager
+// level (at group level), so all the methods of MultipassManager should be
+// invoked from a single thread.
+class MultipassManager {
+ public:
+  virtual ~MultipassManager() = default;
+
+  // Modifies img, applying a transformation that reduces its entropy given a
+  // reference image, typically the output of a previous pass.
+  virtual void DecorrelateOpsin(Image3F* img) = 0;
+
+  // Inverse of DecorrelateOpsin.
+  virtual void RestoreOpsin(Image3F* img) = 0;
+
+  // Called at the start of each pass.
+  virtual void StartPass(const FrameHeader& header) = 0;
+
+  // Called by the decoder when a pass is done.
+  virtual void SetDecodedPass(const Image3F& opsin) = 0;
+
+  // This version is only called if we decoded a lossless pass.
+  virtual void SetDecodedPass(CodecInOut* io) = 0;
+
+  // Used *on the encoder only* to forcibly enable adaptive reconstruction in
+  // GetQuantizer.
+  virtual void UseAdaptiveReconstruction() {}
+
+  // NOTE: not thread safe.
+  // Preferably, `group_id` should be small non-negative number.
+  // Same `group_rect` should be provided with corresponding `group_id`.
+  virtual MultipassHandler* GetGroupHandler(size_t group_id,
+                                            const Rect& group_rect) = 0;
+
+  // Methods to retrieve color correlation, ac strategy, quantizer, block
+  // dictionary and dequant matrices.
+  virtual void GetDequantMatrices(
+      float butteraugli_target, float intensity_multiplier,
+      const Image3F& opsin, const ImageF& initial_quant_field,
+      DequantMatrices* dequant_matrices, ImageB* control_field,
+      uint8_t table_map[kMaxQuantControlFieldValue][256]) {
+    return FindBestDequantMatrices(butteraugli_target, intensity_multiplier,
+                                   opsin, initial_quant_field, dequant_matrices,
+                                   control_field, table_map);
+  }
+
+  virtual BlockDictionary GetBlockDictionary(double butteraugli_target,
+                                             const Image3F& opsin) = 0;
+
+  virtual void GetColorCorrelationMap(const Image3F& opsin,
+                                      const DequantMatrices& dequant,
+                                      ColorCorrelationMap* cmap) = 0;
+
+  virtual void GetAcStrategy(float butteraugli_target,
+                             const ImageF* quant_field,
+                             const DequantMatrices& dequant, const Image3F& src,
+                             ThreadPool* pool, AcStrategyImage* ac_strategy,
+                             PikInfo* aux_out) = 0;
+
+  virtual std::shared_ptr<Quantizer> GetQuantizer(
+      const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+      const Image3F& opsin_orig, const Image3F& opsin,
+      const FrameHeader& frame_header, const GroupHeader& header,
+      const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+      const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+      const DequantMatrices* dequant, const ImageB& dequant_control_field,
+      const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+      ImageF& quant_field, PikInfo* aux_out) = 0;
+
+  virtual std::shared_ptr<Quantizer> GetQuantizerAvg(float avg, float absavg,
+      const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+      const Image3F& opsin_orig, const Image3F& opsin,
+      const FrameHeader& frame_header, const GroupHeader& header,
+      const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+      const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+      const DequantMatrices* dequant, const ImageB& dequant_control_field,
+      const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+      ImageF& quant_field, PikInfo* aux_out) = 0;
+
+  virtual size_t GetNumPasses() { return 1; }
+  virtual std::vector<std::pair<uint32_t, uint32_t>>
+  GetDownsamplingToNumPasses() {
+    return {};
+  }
+
+  // Save the ac strategy / quant field of this pass.
+  virtual void SaveAcStrategy(const AcStrategyImage& af) {}
+  virtual void SaveQuantField(const ImageI& qf) {}
+
+  // Give a hint to the ac strategy / quant field encoder/decoder.
+  virtual const AcStrategyImage* HintAcStrategy() { return nullptr; }
+  virtual const ImageI* HintQuantField() { return nullptr; }
+
+  // Previous pass in a specific colorspace.
+  virtual Status GetPreviousPass(const ColorEncoding& color_encoding,
+                                 ThreadPool* pool, Image3F* out) {
+    return true;
+  }
+};
+
+}  // namespace pik
+
+#endif  // PIK_MULTIPASS_HANDLER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/noise.cc b/codec/L2/demos/pikEnc/host/pik/noise.cc
new file mode 100755
index 0000000000..0552a44647
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/noise.cc
@@ -0,0 +1,682 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include <cmath>
+#include <cstdio>
+#include <numeric>
+#include <random>
+
+#include "pik/convolve.h"
+#include "pik/descriptive_statistics.h"
+#include "pik/noise.h"
+#include "pik/opsin_params.h"
+#include "pik/optimize.h"
+#include "pik/rational_polynomial.h"
+#include "pik/robust_statistics.h"
+#include "pik/simd/simd.h"
+#include "pik/write_bits.h"
+
+typedef std::mt19937 RandomGenerator;
+
+namespace pik {
+namespace {
+
+SIMD_ATTR ImageF RandomImage(ImageF* PIK_RESTRICT temp, RandomGenerator* rng) {
+  const size_t xsize = temp->xsize();
+  const size_t ysize = temp->ysize();
+  for (size_t y = 0; y < ysize; ++y) {
+    float* PIK_RESTRICT row = temp->Row(y);
+    const SIMD_FULL(float) df;
+    const SIMD_FULL(uint32_t) du;
+    for (size_t x = 0; x < xsize; x += df.N) {
+      uint32_t rnd[SIMD_FULL(uint32_t)::N];
+      for (size_t i = 0; i < du.N; i++) {
+        rnd[i] = (*rng)();
+      }
+      const auto bits = load_unaligned(du, rnd);
+      // 1.0 + 23 random mantissa bits = [1, 2)
+      const auto rand12 =
+          cast_to(df, shift_right<9>(bits) | set1(du, 0x3F800000));
+      const auto rand01 = rand12 - set1(df, 1.0f);
+      store(rand01, df, row + x);
+    }
+  }
+
+  ImageF out(xsize, ysize);
+  ConvolveT<strategy::Laplacian3>::Run(*temp, kernel::Laplacian3(), &out);
+  return out;
+}
+
+float GetScoreSumsOfAbsoluteDifferences(const Image3F& opsin, const int x,
+                                        const int y, const int block_size) {
+  const int small_bl_size_x = 3;
+  const int small_bl_size_y = 4;
+  const int kNumSAD =
+      (block_size - small_bl_size_x) * (block_size - small_bl_size_y);
+  // block_size x block_size reference pixels
+  int counter = 0;
+  const int offset = 2;
+
+  std::vector<float> sad(kNumSAD, 0);
+  for (int y_bl = 0; y_bl + small_bl_size_y < block_size; ++y_bl) {
+    for (int x_bl = 0; x_bl + small_bl_size_x < block_size; ++x_bl) {
+      float sad_sum = 0;
+      // size of the center patch, we compare all the patches inside window with
+      // the center one
+      for (int cy = 0; cy < small_bl_size_y; ++cy) {
+        for (int cx = 0; cx < small_bl_size_x; ++cx) {
+          float wnd = 0.5f * (opsin.PlaneRow(1, y + y_bl + cy)[x + x_bl + cx] +
+                              opsin.PlaneRow(0, y + y_bl + cy)[x + x_bl + cx]);
+          float center =
+              0.5f * (opsin.PlaneRow(1, y + offset + cy)[x + offset + cx] +
+                      opsin.PlaneRow(0, y + offset + cy)[x + offset + cx]);
+          sad_sum += std::abs(center - wnd);
+        }
+      }
+      sad[counter++] = sad_sum;
+    }
+  }
+  const int kSamples = (kNumSAD) / 2;
+  // As with ROAD (rank order absolute distance), we keep the smallest half of
+  // the values in SAD (we use here the more robust patch SAD instead of
+  // absolute single-pixel differences).
+  std::sort(sad.begin(), sad.end());
+  const float total_sad_sum =
+      std::accumulate(sad.begin(), sad.begin() + kSamples, 0.0f);
+  return total_sad_sum / kSamples;
+}
+
+class Histogram {
+ public:
+  static constexpr int kBins = 256;
+
+  Histogram() { std::fill(bins, bins + kBins, 0); }
+
+  void Increment(const float x) { bins[Index(x)] += 1; }
+  int Get(const float x) const { return bins[Index(x)]; }
+  int Bin(const size_t bin) const { return bins[bin]; }
+
+  void Print() const {
+    for (size_t i = 0; i < kBins; ++i) {
+      printf("%d\n", bins[i]);
+    }
+  }
+
+  int Mode() const {
+    uint32_t cdf[kBins];
+    std::partial_sum(bins, bins + kBins, cdf);
+    return HalfRangeMode()(cdf, kBins);
+  }
+
+  double Quantile(double q01) const {
+    const int64_t total = std::accumulate(bins, bins + kBins, 1LL);
+    const int64_t target = static_cast<int64_t>(q01 * total);
+    // Until sum >= target:
+    int64_t sum = 0;
+    size_t i = 0;
+    for (; i < kBins; ++i) {
+      sum += bins[i];
+      // Exact match: assume middle of bin i
+      if (sum == target) {
+        return i + 0.5;
+      }
+      if (sum > target) break;
+    }
+
+    // Next non-empty bin (in case histogram is sparsely filled)
+    size_t next = i + 1;
+    while (next < kBins && bins[next] == 0) {
+      ++next;
+    }
+
+    // Linear interpolation according to how far into next we went
+    const double excess = target - sum;
+    const double weight_next = bins[Index(next)] / excess;
+    return ClampX(next * weight_next + i * (1.0 - weight_next));
+  }
+
+  // Inter-quartile range
+  double IQR() const { return Quantile(0.75) - Quantile(0.25); }
+
+ private:
+  template <typename T>
+  T ClampX(const T x) const {
+    return std::min(std::max(T(0), x), T(kBins - 1));
+  }
+  size_t Index(const float x) const { return ClampX(static_cast<int>(x)); }
+
+  uint32_t bins[kBins];
+};
+
+std::vector<float> GetSADScoresForPatches(const Image3F& opsin,
+                                          const int block_s, const int num_bin,
+                                          Histogram* sad_histogram) {
+  std::vector<float> sad_scores(
+      (opsin.ysize() / block_s) * (opsin.xsize() / block_s), 0.0f);
+
+  int block_index = 0;
+
+  for (int y = 0; y + block_s <= opsin.ysize(); y += block_s) {
+    for (int x = 0; x + block_s <= opsin.xsize(); x += block_s) {
+      // We assume that we work with Y opsin channel [-0.5, 0.5]
+      float sad_sc = GetScoreSumsOfAbsoluteDifferences(opsin, x, y, block_s);
+      sad_scores[block_index++] = sad_sc;
+      sad_histogram->Increment(sad_sc * num_bin);
+    }
+  }
+  return sad_scores;
+}
+
+float GetSADThreshold(const Histogram& histogram, const int num_bin) {
+  // Here we assume that the most patches with similar SAD value is a "flat"
+  // patches. However, some images might contain regular texture part and
+  // generate second strong peak at the histogram
+  // TODO(user) handle bimodal and heavy-tailed case
+  const int mode = histogram.Mode();
+  return static_cast<float>(mode) / Histogram::kBins;
+}
+
+// [0, max_value]
+template <class D, class V>
+static SIMD_ATTR SIMD_INLINE V Clamp0ToMax(D d, const V x, const V max_value) {
+  const auto clamped = min(x, max_value);
+  // If negative, replace with zero (faster than floating-point max()).
+  return select(clamped, setzero(d), condition_from_sign(clamped));
+}
+
+// x is in [0+delta, 1+delta], delta ~= 0.06
+template <class StrengthEval>
+SIMD_ATTR typename StrengthEval::V NoiseStrength(
+    const StrengthEval& eval, const typename StrengthEval::V x) {
+  const typename StrengthEval::D d;
+  return Clamp0ToMax(d, eval(x), set1(d, 1.0f));
+}
+
+// General case: slow but precise.
+class StrengthEvalPow {
+ public:
+  using D = Scalar<float>;
+  using V = D::V;
+
+  StrengthEvalPow(const NoiseParams& noise_params)
+      : noise_params_(noise_params) {}
+
+  SIMD_ATTR V operator()(const V vx) const {
+    float x;
+    store(vx, D(), &x);
+    return set1(D(), noise_params_.alpha * std::pow(x, noise_params_.gamma) +
+                         noise_params_.beta);
+  }
+
+ private:
+  const NoiseParams noise_params_;
+};
+
+// For noise_params.alpha == 0: cheaper to evaluate than a polynomial and
+// avoids BLAS errors in RationalPolynomial.
+template <class D_Arg>
+class StrengthEvalLinear {
+ public:
+  using D = D_Arg;
+  using V = typename D::V;
+
+  SIMD_ATTR StrengthEvalLinear(const NoiseParams& noise_params)
+      : strength_(set1(D(), noise_params.beta)) {}
+
+  SIMD_ATTR V operator()(const V x) const { return strength_; }
+
+ private:
+  V strength_;
+};
+
+// Uses rational polynomial - faster than Pow.
+template <class D_Arg>
+class StrengthEvalPoly {
+  // Max err < 1E-6.
+  static constexpr size_t kDegreeP = 3;
+  static constexpr size_t kDegreeQ = 2;
+  using Polynomial = RationalPolynomial<D_Arg, kDegreeP, kDegreeQ>;
+
+ public:
+  using D = D_Arg;
+  using V = typename D::V;
+
+  static Polynomial InitPoly() {
+    const float p[kDegreeP + 1] = {
+        2.8334176974065262E-05, -4.0383997904166469E-03, 1.3657279781005727E-01,
+        1.0765042185381457E+00};
+    const float q[kDegreeQ + 1] = {7.6921408240996481E-01,
+                                   5.2686210349332230E-01,
+                                   -8.7053691084335916E-02};
+    return Polynomial(p, q);
+  }
+
+  SIMD_ATTR StrengthEvalPoly(const NoiseParams& noise_params)
+      : poly_(InitPoly()),
+        mul_(set1(D(), noise_params.alpha)),
+        add_(set1(D(), noise_params.beta)) {}
+
+  SIMD_ATTR PIK_INLINE V operator()(const V x) const {
+    return mul_add(mul_, poly_(x), add_);
+  }
+
+ private:
+  Polynomial poly_;
+  const V mul_;
+  const V add_;
+};
+
+template <class D>
+SIMD_ATTR void AddNoiseToRGB(
+    const typename D::V rnd_noise_r, const typename D::V rnd_noise_g,
+    const typename D::V rnd_noise_cor, const typename D::V noise_strength_g,
+    const typename D::V noise_strength_r, float* PIK_RESTRICT out_x,
+    float* PIK_RESTRICT out_y, float* PIK_RESTRICT out_b) {
+  const D d;
+  const auto kRGCorr = set1(d, 0.9f);
+  const auto kRGNCorr = set1(d, 0.1f);
+
+  const auto red_noise = kRGNCorr * rnd_noise_r * noise_strength_r +
+                         kRGCorr * rnd_noise_cor * noise_strength_r;
+  const auto green_noise = kRGNCorr * rnd_noise_g * noise_strength_g +
+                           kRGCorr * rnd_noise_cor * noise_strength_g;
+
+  auto vx = load(d, out_x);
+  auto vy = load(d, out_y);
+  auto vb = load(d, out_b);
+
+  vx += red_noise - green_noise;
+  vy += red_noise + green_noise;
+  vb += set1(d, 0.9375f) * (red_noise + green_noise);
+
+  vx = clamp(vx, set1(d, -kXybRadius[0]), set1(d, kXybRadius[0]));
+  vy = clamp(vy, set1(d, -kXybRadius[1]), set1(d, kXybRadius[1]));
+  vb = clamp(vb, set1(d, -kXybRadius[2]), set1(d, kXybRadius[2]));
+
+  store(vx, d, out_x);
+  store(vy, d, out_y);
+  store(vb, d, out_b);
+}
+
+template <class StrengthEval>
+SIMD_ATTR void AddNoiseT(const StrengthEval& noise_model, Image3F* opsin) {
+  using D = typename StrengthEval::D;
+  const D d;
+  const auto half = set1(d, 0.5f);
+
+  const size_t xsize = opsin->xsize();
+  const size_t ysize = opsin->ysize();
+
+  RandomGenerator rng(123456789);
+  ImageF temp(xsize, ysize);
+  const ImageF& rnd_noise_red = RandomImage(&temp, &rng);
+  const ImageF& rnd_noise_green = RandomImage(&temp, &rng);
+  const ImageF& rnd_noise_correlated = RandomImage(&temp, &rng);
+
+  // With the prior subtract-random Laplacian approximation, rnd_* ranges were
+  // about [-1.5, 1.6]; Laplacian3 about doubles this to [-3.6, 3.6], so the
+  // normalizer is half of what it was before (0.5).
+  const auto norm_const = set1(d, 0.22f);
+
+  for (size_t y = 0; y < ysize; ++y) {
+    float* PIK_RESTRICT row_x = opsin->PlaneRow(0, y);
+    float* PIK_RESTRICT row_y = opsin->PlaneRow(1, y);
+    float* PIK_RESTRICT row_b = opsin->PlaneRow(2, y);
+    const float* PIK_RESTRICT row_rnd_r = rnd_noise_red.Row(y);
+    const float* PIK_RESTRICT row_rnd_g = rnd_noise_green.Row(y);
+    const float* PIK_RESTRICT row_rnd_c = rnd_noise_correlated.Row(y);
+    for (size_t x = 0; x < xsize; x += d.N) {
+      const auto vx = load(d, row_x + x);
+      const auto vy = load(d, row_y + x);
+      const auto in_g = half * (vy - vx);
+      const auto in_r = half * (vy + vx);
+      const auto clamped_g =
+          clamp(in_g, set1(d, -kXybRadius[1]), set1(d, kXybRadius[1]));
+      const auto clamped_r =
+          clamp(in_r, set1(d, -kXybRadius[1]), set1(d, kXybRadius[1]));
+      const auto noise_strength_g = NoiseStrength(noise_model, clamped_g);
+      const auto noise_strength_r = NoiseStrength(noise_model, clamped_r);
+      const auto addit_rnd_noise_red = load(d, row_rnd_r + x) * norm_const;
+      const auto addit_rnd_noise_green = load(d, row_rnd_g + x) * norm_const;
+      const auto addit_rnd_noise_correlated =
+          load(d, row_rnd_c + x) * norm_const;
+      AddNoiseToRGB<D>(addit_rnd_noise_red, addit_rnd_noise_green,
+                       addit_rnd_noise_correlated, noise_strength_g,
+                       noise_strength_r, row_x + x, row_y + x, row_b + x);
+    }
+  }
+}
+
+// Returns max absolute error at uniformly spaced x.
+template <class EvalApprox>
+SIMD_ATTR float MaxAbsError(const NoiseParams& noise_params,
+                            const EvalApprox& eval_approx) {
+  const StrengthEvalPow eval_pow(noise_params);
+
+  float max_abs_err = 0.0f;
+  const float x0 = -kXybRadius[1] + kXybCenter[1];
+  const float x1 = kXybRadius[1] + kXybCenter[1];
+  for (float x = x0; x < x1; x += 1E-1f) {
+    const Scalar<float> d1;
+    const SIMD_FULL(float) d;
+    const auto expected_v = NoiseStrength(eval_pow, set1(d1, x));
+    const auto actual_v = NoiseStrength(eval_approx, set1(d, x));
+    float expected;
+    SIMD_ALIGN float actual[d.N];
+    store(expected_v, d1, &expected);
+    store(actual_v, d, actual);
+    const float abs_err = std::abs(expected - actual[0]);
+    if (abs_err > max_abs_err) {
+      // printf("  x=%f %E %E = %E\n", x, expected, actual[0], abs_err);
+      max_abs_err = abs_err;
+    }
+  }
+  // printf("max abs %.2E\n", max_abs_err);
+  return max_abs_err;
+}
+
+}  // namespace
+
+SIMD_ATTR void AddNoise(const NoiseParams& noise_params, Image3F* opsin) {
+  // SIMD descriptor.
+  using D = SIMD_FULL(float);
+
+  if (noise_params.alpha == 0.0f) {
+    // No noise at all
+    if (noise_params.beta == 0.0f && noise_params.gamma == 0.0f) return;
+
+    // Constant noise strength independent of pixel intensity
+    AddNoiseT(StrengthEvalLinear<D>(noise_params), opsin);
+    return;
+  }
+
+  const StrengthEvalPoly<D> poly(noise_params);
+  if (MaxAbsError(noise_params, poly) < 1E-3f) {
+    AddNoiseT(poly, opsin);
+  } else {
+    printf("Reverting to pow: %.3f %.3f ^%.3f\n", noise_params.alpha,
+           noise_params.beta, noise_params.gamma);
+    AddNoiseT(StrengthEvalPow(noise_params), opsin);
+  }
+}
+
+// F(alpha, beta, gamma| x,y) = (1-n) * sum_i(y_i - (alpha x_i ^ gamma +
+// beta))^2 + n * alpha * gamma.
+struct LossFunction {
+  explicit LossFunction(const std::vector<NoiseLevel>& nl0) : nl(nl0) {}
+
+  double Compute(const std::vector<double>& w, std::vector<double>* df) const {
+    double loss_function = 0;
+    const double kEpsilon = 1e-2;
+    const double kRegul = 0.00005;
+    (*df)[0] = 0;
+    (*df)[1] = 0;
+    (*df)[2] = 0;
+    for (int ind = 0; ind < nl.size(); ++ind) {
+      double shifted_intensity = nl[ind].intensity + kXybCenter[1];
+      if (shifted_intensity > kEpsilon) {
+        double l_f =
+            nl[ind].noise_level - (w[0] * pow(shifted_intensity, w[1]) + w[2]);
+        (*df)[0] += (1 - kRegul) * 2.0 * l_f * pow(shifted_intensity, w[1]) +
+                    kRegul * w[1];
+        (*df)[1] += (1 - kRegul) * 2.0 * l_f * w[0] *
+                        pow(shifted_intensity, w[1]) * log(shifted_intensity) +
+                    kRegul * w[0];
+        (*df)[2] += (1 - kRegul) * 2.0 * l_f;
+        loss_function += (1 - kRegul) * l_f * l_f + kRegul * w[0] * w[1];
+      }
+    }
+    return loss_function;
+  }
+
+  std::vector<NoiseLevel> nl;
+};
+
+void AddPointsForExtrapolation(std::vector<NoiseLevel>* noise_level) {
+  NoiseLevel nl_min;
+  NoiseLevel nl_max;
+  nl_min.noise_level = 2;
+  nl_max.noise_level = -2;
+  for (auto nl : *noise_level) {
+    if (nl.noise_level < nl_min.noise_level) {
+      nl_min.intensity = nl.intensity;
+      nl_min.noise_level = nl.noise_level;
+    }
+    if (nl.noise_level > nl_max.noise_level) {
+      nl_max.intensity = nl.intensity;
+      nl_max.noise_level = nl.noise_level;
+    }
+  }
+  nl_max.intensity = -0.5;
+  nl_min.intensity = 0.5;
+  noise_level->push_back(nl_min);
+  noise_level->push_back(nl_max);
+}
+
+void GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params,
+                       float quality_coef) {
+  // The size of a patch in decoder might be different from encoder's patch
+  // size.
+  // For encoder: the patch size should be big enough to estimate
+  //              noise level, but, at the same time, it should be not too big
+  //              to be able to estimate intensity value of the patch
+  const int block_s = 8;
+  const int kNumBin = 256;
+  Histogram sad_histogram;
+  std::vector<float> sad_scores =
+      GetSADScoresForPatches(opsin, block_s, kNumBin, &sad_histogram);
+  float sad_threshold = GetSADThreshold(sad_histogram, kNumBin);
+  // If threshold is too large, the image has a strong pattern. This pattern
+  // fools our model and it will add too much noise. Therefore, we do not add
+  // noise for such images
+  if (sad_threshold > 0.15f || sad_threshold <= 0.0f) {
+    noise_params->alpha = 0;
+    noise_params->beta = 0;
+    noise_params->gamma = 0;
+    return;
+  }
+  std::vector<NoiseLevel> nl =
+      GetNoiseLevel(opsin, sad_scores, sad_threshold, block_s);
+
+  AddPointsForExtrapolation(&nl);
+  OptimizeNoiseParameters(nl, noise_params);
+  noise_params->alpha *= quality_coef;
+  noise_params->beta *= quality_coef;
+}
+
+const float kNoisePrecision = 1000.0f;
+
+void EncodeFloatParam(float val, float precision, size_t* storage_ix,
+                      uint8_t* storage) {
+  WriteBits(1, val >= 0 ? 1 : 0, storage_ix, storage);
+  const int absval_quant = static_cast<int>(std::abs(val) * precision + 0.5f);
+  PIK_ASSERT(absval_quant < (1 << 16));
+  WriteBits(16, absval_quant, storage_ix, storage);
+}
+
+void DecodeFloatParam(float precision, float* val, BitReader* br) {
+  const int sign = 2 * br->ReadBits(1) - 1;
+  const int absval_quant = br->ReadBits(16);
+  *val = sign * absval_quant / precision;
+}
+
+std::string EncodeNoise(const NoiseParams& noise_params) {
+  const size_t kMaxNoiseSize = 16;
+  std::string output(kMaxNoiseSize, 0);
+  size_t storage_ix = 0;
+  uint8_t* storage = reinterpret_cast<uint8_t*>(&output[0]);
+  storage[0] = 0;
+  const bool have_noise =
+      (noise_params.alpha != 0.0f || noise_params.gamma != 0.0f ||
+       noise_params.beta != 0.0f);
+  WriteBits(1, have_noise, &storage_ix, storage);
+  if (have_noise) {
+    EncodeFloatParam(noise_params.alpha, kNoisePrecision, &storage_ix, storage);
+    EncodeFloatParam(noise_params.gamma, kNoisePrecision, &storage_ix, storage);
+    EncodeFloatParam(noise_params.beta, kNoisePrecision, &storage_ix, storage);
+  }
+  size_t jump_bits = ((storage_ix + 7) & ~7) - storage_ix;
+  WriteBits(jump_bits, 0, &storage_ix, storage);
+  PIK_ASSERT(storage_ix % 8 == 0);
+  size_t output_size = storage_ix >> 3;
+  output.resize(output_size);
+  return output;
+}
+
+bool DecodeNoise(BitReader* br, NoiseParams* noise_params) {
+  const bool have_noise = br->ReadBits(1);
+  if (have_noise) {
+    DecodeFloatParam(kNoisePrecision, &noise_params->alpha, br);
+    DecodeFloatParam(kNoisePrecision, &noise_params->gamma, br);
+    DecodeFloatParam(kNoisePrecision, &noise_params->beta, br);
+  } else {
+    noise_params->alpha = noise_params->gamma = noise_params->beta = 0.0f;
+  }
+  return br->JumpToByteBoundary();
+}
+
+void OptimizeNoiseParameters(const std::vector<NoiseLevel>& noise_level,
+                             NoiseParams* noise_params) {
+  static const double kPrecision = 1e-8;
+  static const int kMaxIter = 1000;
+
+  LossFunction loss_function(noise_level);
+  std::vector<double> parameter_vector(3);
+  parameter_vector[0] = -0.05;
+  parameter_vector[1] = 2.6;
+  parameter_vector[2] = 0.025;
+
+  parameter_vector = optimize::OptimizeWithScaledConjugateGradientMethod(
+      loss_function, parameter_vector, kPrecision, kMaxIter);
+
+  noise_params->alpha = parameter_vector[0];
+  noise_params->gamma = parameter_vector[1];
+  noise_params->beta = parameter_vector[2];
+}
+
+std::vector<float> GetTextureStrength(const Image3F& opsin, const int block_s) {
+  std::vector<float> texture_strength_index((opsin.ysize() / block_s) *
+                                            (opsin.xsize() / block_s));
+  int block_index = 0;
+
+  for (int y = 0; y + block_s <= opsin.ysize(); y += block_s) {
+    for (int x = 0; x + block_s <= opsin.xsize(); x += block_s) {
+      float texture_strength = 0;
+      for (int y_bl = 0; y_bl < block_s; ++y_bl) {
+        for (int x_bl = 0; x_bl + 1 < block_s; ++x_bl) {
+          float diff = opsin.PlaneRow(1, y)[x + x_bl + 1] -
+                       opsin.PlaneRow(1, y)[x + x_bl];
+          texture_strength += diff * diff;
+        }
+      }
+      for (int y_bl = 0; y_bl + 1 < block_s; ++y_bl) {
+        for (int x_bl = 0; x_bl < block_s; ++x_bl) {
+          float diff = opsin.PlaneRow(1, y + 1)[x + x_bl] -
+                       opsin.PlaneRow(1, y)[x + x_bl];
+          texture_strength += diff * diff;
+        }
+      }
+      texture_strength_index[block_index] = texture_strength;
+      ++block_index;
+    }
+  }
+  return texture_strength_index;
+}
+
+float GetThresholdFlatIndices(const std::vector<float>& texture_strength,
+                              const int n_patches) {
+  std::vector<float> kth_statistic = texture_strength;
+  std::stable_sort(kth_statistic.begin(), kth_statistic.end());
+  return kth_statistic[n_patches];
+}
+
+std::vector<NoiseLevel> GetNoiseLevel(
+    const Image3F& opsin, const std::vector<float>& texture_strength,
+    const float threshold, const int block_s) {
+  std::vector<NoiseLevel> noise_level_per_intensity;
+
+  const int filt_size = 1;
+  static const float kLaplFilter[filt_size * 2 + 1][filt_size * 2 + 1] = {
+      {-0.25f, -1.0f, -0.25f},
+      {-1.0f, 5.0f, -1.0f},
+      {-0.25f, -1.0f, -0.25f},
+  };
+
+  // The noise model is build based on channel 0.5 * (X+Y) as we notices that it
+  // is similar to the model 0.5 * (Y-X)
+  int patch_index = 0;
+
+  for (int y = 0; y + block_s <= opsin.ysize(); y += block_s) {
+    for (int x = 0; x + block_s <= opsin.xsize(); x += block_s) {
+      if (texture_strength[patch_index] <= threshold) {
+        // Calculate mean value
+        float mean_int = 0;
+        for (int y_bl = 0; y_bl < block_s; ++y_bl) {
+          for (int x_bl = 0; x_bl < block_s; ++x_bl) {
+            mean_int += 0.5f * (opsin.PlaneRow(1, y + y_bl)[x + x_bl] +
+                                opsin.PlaneRow(0, y + y_bl)[x + x_bl]);
+          }
+        }
+        mean_int /= block_s * block_s;
+
+        // Calculate Noise level
+        float noise_level = 0;
+        int count = 0;
+        for (int y_bl = 0; y_bl < block_s; ++y_bl) {
+          for (int x_bl = 0; x_bl < block_s; ++x_bl) {
+            float filtered_value = 0;
+            for (int y_f = -1 * filt_size; y_f <= filt_size; ++y_f) {
+              if (((y_bl + y_f) < block_s) && ((y_bl + y_f) >= 0)) {
+                for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) {
+                  if ((x_bl + x_f) >= 0 && (x_bl + x_f) < block_s) {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl + x_f] +
+                         opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl + x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  } else {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl - x_f] +
+                         opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl - x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  }
+                }
+              } else {
+                for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) {
+                  if ((x_bl + x_f) >= 0 && (x_bl + x_f) < block_s) {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl + x_f] +
+                         opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl + x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  } else {
+                    filtered_value +=
+                        0.5f *
+                        (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl - x_f] +
+                         opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl - x_f]) *
+                        kLaplFilter[y_f + filt_size][x_f + filt_size];
+                  }
+                }
+              }
+            }
+            noise_level += std::abs(filtered_value);
+            ++count;
+          }
+        }
+        noise_level /= count;
+        NoiseLevel nl;
+        nl.intensity = mean_int;
+        nl.noise_level = noise_level;
+        noise_level_per_intensity.push_back(nl);
+      }
+      ++patch_index;
+    }
+  }
+  return noise_level_per_intensity;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/noise.h b/codec/L2/demos/pikEnc/host/pik/noise.h
new file mode 100755
index 0000000000..92e11728da
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/noise.h
@@ -0,0 +1,56 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_NOISE_H_
+#define PIK_NOISE_H_
+
+// Noise synthesis. Currently disabled.
+
+#include "pik/bit_reader.h"
+#include "pik/image.h"
+
+namespace pik {
+
+struct NoiseParams {
+  // Parameters of the fitted noise curve.
+  // alpha * x ^ gamma + beta,
+  // where x is an intensity of pixel / mean intensity of patch
+  float alpha = 0.0f;
+  float gamma = 0.0f;
+  float beta = 0.0f;
+};
+
+struct NoiseLevel {
+  float noise_level;
+  float intensity;
+};
+
+// Add a noise to Opsin image
+void AddNoise(const NoiseParams& noise_params, Image3F* opsin);
+
+// Get parameters of the noise for NoiseParams model
+void GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params,
+                       float quality_coef);
+
+std::string EncodeNoise(const NoiseParams& noise_params);
+
+bool DecodeNoise(BitReader* br, NoiseParams* noise_params);
+
+// Texture Strength is defined as tr(A), A = [Gh, Gv]^T[[Gh, Gv]]
+std::vector<float> GetTextureStrength(const Image3F& opsin, const int block_s);
+
+float GetThresholdFlatIndices(const std::vector<float>& texture_strength,
+                              const int n_patches);
+
+std::vector<NoiseLevel> GetNoiseLevel(
+    const Image3F& opsin, const std::vector<float>& texture_strength,
+    const float threshold, const int block_s);
+
+void OptimizeNoiseParameters(const std::vector<NoiseLevel>& noise_level,
+                             NoiseParams* noise_params);
+}  // namespace pik
+
+#endif  // PIK_NOISE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/noise_distributions.h b/codec/L2/demos/pikEnc/host/pik/noise_distributions.h
new file mode 100755
index 0000000000..1da2c641ca
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/noise_distributions.h
@@ -0,0 +1,139 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_NOISE_DISTRIBUTIONS_H_
+#define PIK_NOISE_DISTRIBUTIONS_H_
+
+// Noise distributions for testing partial_derivatives and robust_statistics.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <random>  // distributions
+#include <string>
+
+#include "pik/image.h"
+
+namespace pik {
+
+// Unmodified input
+struct NoiseNone {
+  std::string Name() const { return "None"; }
+
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    return in;
+  }
+};
+
+// Salt+pepper
+class NoiseImpulse {
+ public:
+  NoiseImpulse(const uint32_t threshold) : threshold_(threshold) {}
+  std::string Name() const { return "Impulse" + std::to_string(threshold_); }
+
+  // Sets pixels to 0 if rand < threshold or 1 if rand > ~threshold.
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    const uint32_t rand = (*rng)();
+    float out = 0.0f;
+    if (rand > ~threshold_) {
+      out = 1.0f;
+    }
+    if (rand > threshold_) {
+      out = in;
+    }
+    return out;
+  }
+
+ private:
+  const uint32_t threshold_;
+};
+
+class NoiseUniform {
+ public:
+  NoiseUniform(const float min, const float max_exclusive)
+      : dist_(min, max_exclusive) {}
+  std::string Name() const { return "Uniform" + std::to_string(dist_.b()); }
+
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    return in + dist_(*rng);
+  }
+
+ private:
+  mutable std::uniform_real_distribution<float> dist_;
+};
+
+// Additive, zero-mean Gaussian.
+class NoiseGaussian {
+ public:
+  NoiseGaussian(const float stddev) : dist_(0.0f, stddev) {}
+  std::string Name() const {
+    return "Gaussian" + std::to_string(dist_.stddev());
+  }
+
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    return in + dist_(*rng);
+  }
+
+ private:
+  mutable std::normal_distribution<float> dist_;
+};
+
+// Integer noise is scaled by 1E-3.
+class NoisePoisson {
+ public:
+  NoisePoisson(const double mean) : dist_(mean) {}
+  std::string Name() const { return "Poisson" + std::to_string(dist_.mean()); }
+
+  template <class Random>
+  float operator()(const float in, Random* rng) const {
+    return in + dist_(*rng) * 1E-3f;
+  }
+
+ private:
+  mutable std::poisson_distribution<int> dist_;
+};
+
+// Returns the result of applying the randomized "noise" function to each pixel.
+template <class NoiseType, class Random>
+ImageF AddNoise(const ImageF& in, const NoiseType& noise, Random* rng) {
+  const size_t xsize = in.xsize();
+  const size_t ysize = in.ysize();
+  ImageF out(xsize, ysize);
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* PIK_RESTRICT in_row = in.ConstRow(y);
+    float* PIK_RESTRICT out_row = out.Row(y);
+    for (size_t x = 0; x < xsize; ++x) {
+      out_row[x] = noise(in_row[x], rng);
+    }
+  }
+  return out;
+}
+
+template <class NoiseType, class Random>
+Image3F AddNoise(const Image3F& in, const NoiseType& noise, Random* rng) {
+  const size_t xsize = in.xsize();
+  const size_t ysize = in.ysize();
+  Image3F out(xsize, ysize);
+  // noise_estimator_test requires this loop order.
+  for (size_t c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < ysize; ++y) {
+      const float* PIK_RESTRICT in_row = in.ConstPlaneRow(c, y);
+      float* PIK_RESTRICT out_row = out.PlaneRow(c, y);
+
+      for (size_t x = 0; x < xsize; ++x) {
+        out_row[x] = noise(in_row[x], rng);
+      }
+    }
+  }
+  return out;
+}
+
+}  // namespace pik
+
+#endif  // PIK_NOISE_DISTRIBUTIONS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/opsin_image.cc b/codec/L2/demos/pikEnc/host/pik/opsin_image.cc
new file mode 100755
index 0000000000..0d8def90cb
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/opsin_image.cc
@@ -0,0 +1,98 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/opsin_image.h"
+
+#include <stddef.h>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/approx_cube_root.h"
+#include "pik/codec.h"
+#include "pik/compiler_specific.h"
+#include "pik/external_image.h"
+#include "pik/profiler.h"
+
+namespace pik {
+
+namespace {
+
+PIK_INLINE float SimpleGamma(float v) { return ApproxCubeRoot(v); }
+
+void LinearXybTransform(float r, float g, float b, float* PIK_RESTRICT valx,
+                        float* PIK_RESTRICT valy, float* PIK_RESTRICT valz) {
+  *valx = (kScaleR * r - kScaleG * g) * 0.5f;
+  *valy = (kScaleR * r + kScaleG * g) * 0.5f;
+  *valz = b;
+}
+
+}  // namespace
+
+void LinearToXyb(const float r, const float g, const float b,
+                 float* PIK_RESTRICT valx, float* PIK_RESTRICT valy,
+                 float* PIK_RESTRICT valz) {
+  float mixed[3];
+  OpsinAbsorbance(r, g, b, mixed);
+  for (size_t c = 0; c < 3; ++c) {
+    // mixed should be non-negative even for wide-gamut. Make sure of that:
+    mixed[c] = std::max(0.0f, mixed[c]);
+    mixed[c] = SimpleGamma(mixed[c]);
+  }
+  LinearXybTransform(mixed[0], mixed[1], mixed[2], valx, valy, valz);
+
+  // For wide-gamut inputs, r/g/b and valx (but not y/z) are often negative.
+}
+
+// This is different from butteraugli::OpsinDynamicsImage() in the sense that
+// it does not contain a sensitivity multiplier based on the blurred image.
+Image3F OpsinDynamicsImage(const CodecInOut* in, const Rect& in_rect) {
+  PROFILER_FUNC;
+
+  // Convert to linear sRGB (unless already in that space)
+  const Image3F* linear_srgb = &in->color();
+  Image3F copy;
+  Rect linear_rect = in_rect;
+  if (!in->IsLinearSRGB()) {
+    const ColorEncoding& c = in->Context()->c_linear_srgb[in->IsGray()];
+    PIK_CHECK(in->CopyTo(in_rect, c, &copy));
+    linear_srgb = &copy;
+    // We've cut out the rectangle, start at x0=y0=0 in copy.
+    linear_rect = Rect(copy);
+  }
+
+  const size_t xsize = in_rect.xsize();
+  const size_t ysize = in_rect.ysize();
+  Image3F opsin(xsize, ysize);
+
+  for (size_t y = 0; y < ysize; ++y) {
+    const float* PIK_RESTRICT row_in0 =
+        linear_rect.ConstPlaneRow(*linear_srgb, 0, y);
+    const float* PIK_RESTRICT row_in1 =
+        linear_rect.ConstPlaneRow(*linear_srgb, 1, y);
+    const float* PIK_RESTRICT row_in2 =
+        linear_rect.ConstPlaneRow(*linear_srgb, 2, y);
+    float* PIK_RESTRICT row_xyb0 = opsin.PlaneRow(0, y);
+    float* PIK_RESTRICT row_xyb1 = opsin.PlaneRow(1, y);
+    float* PIK_RESTRICT row_xyb2 = opsin.PlaneRow(2, y);
+    for (size_t x = 0; x < xsize; x++) {
+      LinearToXyb(row_in0[x], row_in1[x], row_in2[x], &row_xyb0[x],
+                  &row_xyb1[x], &row_xyb2[x]);
+    }
+  }
+  return opsin;
+}
+
+// DEPRECATED
+Image3F OpsinDynamicsImage(const Image3B& srgb8) {
+  CodecContext codec_context;
+  CodecInOut io(&codec_context);
+  Image3F srgb = StaticCastImage3<float>(srgb8);
+  io.SetFromImage(std::move(srgb), codec_context.c_srgb[0]);
+  PIK_CHECK(io.TransformTo(codec_context.c_linear_srgb[io.IsGray()]));
+  return OpsinDynamicsImage(&io, Rect(io.color()));
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/opsin_image.h b/codec/L2/demos/pikEnc/host/pik/opsin_image.h
new file mode 100755
index 0000000000..693142836b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/opsin_image.h
@@ -0,0 +1,44 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_OPSIN_IMAGE_H_
+#define PIK_OPSIN_IMAGE_H_
+
+// Converts to XYB color space.
+
+#include <stdint.h>
+#include <cstdlib>
+#include <vector>
+
+#include "pik/codec.h"
+#include "pik/compiler_specific.h"
+#include "pik/opsin_params.h"
+
+namespace pik {
+
+// r, g, b are linear.
+static PIK_INLINE void OpsinAbsorbance(const float r, const float g,
+                                       const float b, float out[3]) {
+  const float* mix = &kOpsinAbsorbanceMatrix[0];
+  const float* bias = &kOpsinAbsorbanceBias[0];
+  out[0] = mix[0] * r + mix[1] * g + mix[2] * b + bias[0];
+  out[1] = mix[3] * r + mix[4] * g + mix[5] * b + bias[1];
+  out[2] = mix[6] * r + mix[7] * g + mix[8] * b + bias[2];
+}
+
+void LinearToXyb(const float r, const float g, const float b,
+                 float* PIK_RESTRICT valx, float* PIK_RESTRICT valy,
+                 float* PIK_RESTRICT valz);
+
+// Returns the opsin XYB for the part of the image bounded by rect.
+Image3F OpsinDynamicsImage(const CodecInOut* in, const Rect& rect);
+
+// DEPRECATED, used by opsin_image_wrapper.
+Image3F OpsinDynamicsImage(const Image3B& srgb);
+
+}  // namespace pik
+
+#endif  // PIK_OPSIN_IMAGE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/opsin_inverse.cc b/codec/L2/demos/pikEnc/host/pik/opsin_inverse.cc
new file mode 100755
index 0000000000..f236a90e57
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/opsin_inverse.cc
@@ -0,0 +1,162 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/opsin_inverse.h"
+
+#include <mutex>
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/compiler_specific.h"
+#include "pik/opsin_params.h"
+#include "pik/profiler.h"
+
+namespace pik {
+namespace {
+
+SIMD_ALIGN float inverse_matrix[9 * SIMD_FULL(float)::N];
+
+SIMD_ATTR void InitInverseMatrix() {
+  // Prevent tsan warnings just in case this is called by concurrent decoders.
+  static std::mutex mutex;
+  std::lock_guard<std::mutex> guard(mutex);
+  const SIMD_FULL(float) d;
+  const float* PIK_RESTRICT inverse = GetOpsinAbsorbanceInverseMatrix();
+  for (size_t i = 0; i < 9; ++i) {
+    store(set1(d, inverse[i]), d, &inverse_matrix[i * d.N]);
+  }
+}
+
+// Inverts the pixel-wise RGB->XYB conversion in OpsinDynamicsImage() (including
+// the gamma mixing and simple gamma). Avoids clamping to [0, 255] - out of
+// (sRGB) gamut values may be in-gamut after transforming to a wider space.
+// "inverse_matrix" points to 9 broadcasted vectors, which are the 3x3 entries
+// of the (row-major) opsin absorbance matrix inverse. Pre-multiplying its
+// entries by c is equivalent to multiplying linear_* by c afterwards.
+template <class D, class V>
+SIMD_ATTR PIK_INLINE void XybToRgb(D d, const V opsin_x, const V opsin_y,
+                                   const V opsin_b,
+                                   const float* PIK_RESTRICT inverse_matrix,
+                                   V* const PIK_RESTRICT linear_r,
+                                   V* const PIK_RESTRICT linear_g,
+                                   V* const PIK_RESTRICT linear_b) {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+  const auto inv_scale_x = set1(d, kInvScaleR);
+  const auto inv_scale_y = set1(d, kInvScaleG);
+  const auto neg_bias_r = set1(d, kNegOpsinAbsorbanceBiasRGB[0]);
+  const auto neg_bias_g = set1(d, kNegOpsinAbsorbanceBiasRGB[1]);
+  const auto neg_bias_b = set1(d, kNegOpsinAbsorbanceBiasRGB[2]);
+#else
+  const auto neg_bias_rgb = load_dup128(d, kNegOpsinAbsorbanceBiasRGB);
+  SIMD_ALIGN const float inv_scale_lanes[4] = {kInvScaleR, kInvScaleG};
+  const auto inv_scale = load_dup128(d, inv_scale_lanes);
+  const auto inv_scale_x = broadcast<0>(inv_scale);
+  const auto inv_scale_y = broadcast<1>(inv_scale);
+  const auto neg_bias_r = broadcast<0>(neg_bias_rgb);
+  const auto neg_bias_g = broadcast<1>(neg_bias_rgb);
+  const auto neg_bias_b = broadcast<2>(neg_bias_rgb);
+#endif
+
+  // Color space: XYB -> RGB
+  const auto gamma_r = inv_scale_x * (opsin_y + opsin_x);
+  const auto gamma_g = inv_scale_y * (opsin_y - opsin_x);
+  const auto gamma_b = opsin_b;
+
+  // Undo gamma compression: linear = gamma^3 for efficiency.
+  const auto gamma_r2 = gamma_r * gamma_r;
+  const auto gamma_g2 = gamma_g * gamma_g;
+  const auto gamma_b2 = gamma_b * gamma_b;
+  const auto mixed_r = mul_add(gamma_r2, gamma_r, neg_bias_r);
+  const auto mixed_g = mul_add(gamma_g2, gamma_g, neg_bias_g);
+  const auto mixed_b = mul_add(gamma_b2, gamma_b, neg_bias_b);
+
+  // Unmix (multiply by 3x3 inverse_matrix)
+  *linear_r = load(d, &inverse_matrix[0 * d.N]) * mixed_r;
+  *linear_g = load(d, &inverse_matrix[3 * d.N]) * mixed_r;
+  *linear_b = load(d, &inverse_matrix[6 * d.N]) * mixed_r;
+  const auto tmp_r = load(d, &inverse_matrix[1 * d.N]) * mixed_g;
+  const auto tmp_g = load(d, &inverse_matrix[4 * d.N]) * mixed_g;
+  const auto tmp_b = load(d, &inverse_matrix[7 * d.N]) * mixed_g;
+  *linear_r = mul_add(load(d, &inverse_matrix[2 * d.N]), mixed_b, *linear_r);
+  *linear_g = mul_add(load(d, &inverse_matrix[5 * d.N]), mixed_b, *linear_g);
+  *linear_b = mul_add(load(d, &inverse_matrix[8 * d.N]), mixed_b, *linear_b);
+  *linear_r += tmp_r;
+  *linear_g += tmp_g;
+  *linear_b += tmp_b;
+}
+
+}  // namespace
+
+SIMD_ATTR void OpsinToLinear(Image3F* PIK_RESTRICT inout) {
+  PROFILER_FUNC;
+  InitInverseMatrix();
+  const size_t xsize = inout->xsize();  // not padded
+
+  for(int task = 0; task < inout->ysize(); ++task) {
+        const size_t y = task;
+
+        // Faster than adding via ByteOffset at end of loop.
+        float* PIK_RESTRICT row0 = inout->PlaneRow(0, y);
+        float* PIK_RESTRICT row1 = inout->PlaneRow(1, y);
+        float* PIK_RESTRICT row2 = inout->PlaneRow(2, y);
+
+        const SIMD_FULL(float) d;
+
+        for (size_t x = 0; x < xsize; x += d.N) {
+          const auto in_opsin_x = load(d, row0 + x);
+          const auto in_opsin_y = load(d, row1 + x);
+          const auto in_opsin_b = load(d, row2 + x);
+          PIK_COMPILER_FENCE;
+          SIMD_FULL(float)::V linear_r, linear_g, linear_b;
+          XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, inverse_matrix,
+                   &linear_r, &linear_g, &linear_b);
+
+          store(linear_r, d, row0 + x);
+          store(linear_g, d, row1 + x);
+          store(linear_b, d, row2 + x);
+        }
+  }
+}
+
+SIMD_ATTR void OpsinToLinear(const Image3F& opsin, const Rect& rect_out,
+                             Image3F* PIK_RESTRICT linear) {
+  PROFILER_ZONE("OpsinToLinear(Rect)");
+  InitInverseMatrix();
+  PIK_ASSERT(linear->xsize() != 0);
+  // Opsin is padded to blocks; only produce valid output pixels.
+  const size_t xsize = rect_out.xsize();
+  const size_t ysize = rect_out.ysize();
+  PIK_ASSERT(xsize <= opsin.xsize());
+  PIK_ASSERT(ysize <= opsin.ysize());
+
+  for (size_t y = 0; y < ysize; ++y) {
+    // Faster than adding via ByteOffset at end of loop.
+    const float* PIK_RESTRICT row_opsin_x = opsin.ConstPlaneRow(0, y);
+    const float* PIK_RESTRICT row_opsin_y = opsin.ConstPlaneRow(1, y);
+    const float* PIK_RESTRICT row_opsin_b = opsin.ConstPlaneRow(2, y);
+
+    float* PIK_RESTRICT row_linear_r = rect_out.PlaneRow(linear, 0, y);
+    float* PIK_RESTRICT row_linear_g = rect_out.PlaneRow(linear, 1, y);
+    float* PIK_RESTRICT row_linear_b = rect_out.PlaneRow(linear, 2, y);
+
+    const SIMD_FULL(float) d;
+
+    for (size_t x = 0; x < xsize; x += d.N) {
+      const auto in_opsin_x = load(d, row_opsin_x + x);
+      const auto in_opsin_y = load(d, row_opsin_y + x);
+      const auto in_opsin_b = load(d, row_opsin_b + x);
+      PIK_COMPILER_FENCE;
+      SIMD_FULL(float)::V linear_r, linear_g, linear_b;
+      XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, inverse_matrix, &linear_r,
+               &linear_g, &linear_b);
+
+      store(linear_r, d, row_linear_r + x);
+      store(linear_g, d, row_linear_g + x);
+      store(linear_b, d, row_linear_b + x);
+    }
+  }
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/opsin_inverse.h b/codec/L2/demos/pikEnc/host/pik/opsin_inverse.h
new file mode 100755
index 0000000000..d63ddcf315
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/opsin_inverse.h
@@ -0,0 +1,28 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_OPSIN_INVERSE_H_
+#define PIK_OPSIN_INVERSE_H_
+
+// XYB -> linear sRGB.
+
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include "pik/simd/simd.h"
+
+namespace pik {
+
+// Converts `inout` from opsin to linear sRGB in-place. Called from per-pass
+// postprocessing, hence parallelized.
+SIMD_ATTR void OpsinToLinear(Image3F* PIK_RESTRICT inout);
+
+// Converts to linear sRGB, writing to linear:rect_out.
+SIMD_ATTR void OpsinToLinear(const Image3F& opsin, const Rect& rect_out,
+                             Image3F* PIK_RESTRICT linear);
+
+}  // namespace pik
+
+#endif  // PIK_OPSIN_INVERSE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/opsin_params.cc b/codec/L2/demos/pikEnc/host/pik/opsin_params.cc
new file mode 100755
index 0000000000..1c596c015b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/opsin_params.cc
@@ -0,0 +1,27 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/opsin_params.h"
+
+#include "pik/linalg.h"
+
+namespace pik {
+
+static const float* ComputeOpsinAbsorbanceInverseMatrix() {
+  float* inverse = new float[9];
+  for (int i = 0; i < 9; i++) {
+    inverse[i] = kOpsinAbsorbanceMatrix[i];
+  }
+  Inv3x3Matrix(inverse);
+  return inverse;
+}
+
+const float* GetOpsinAbsorbanceInverseMatrix() {
+  static const float* kOpsinInverse = ComputeOpsinAbsorbanceInverseMatrix();
+  return kOpsinInverse;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/opsin_params.h b/codec/L2/demos/pikEnc/host/pik/opsin_params.h
new file mode 100755
index 0000000000..addc0f5d92
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/opsin_params.h
@@ -0,0 +1,69 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_OPSIN_PARAMS_H_
+#define PIK_OPSIN_PARAMS_H_
+
+// Constants that define the XYB color space.
+
+#include <stdlib.h>
+
+#include "pik/simd/simd.h"  // SIMD_ALIGN
+
+namespace pik {
+
+static constexpr float kScale = 255.0f;
+
+// NOTE: inverse of this cannot be constant because we tune these values.
+static const float kOpsinAbsorbanceMatrix[9] = {
+    static_cast<float>((0.29956550340058319) / kScale),
+    static_cast<float>((0.63373087833825936) / kScale),
+    static_cast<float>((0.077705617820981968) / kScale),
+    static_cast<float>((0.22158691104574774) / kScale),
+    static_cast<float>((0.68491388044116142) / kScale),
+    static_cast<float>((0.096254234043612538) / kScale),
+    static_cast<float>((0.20062661225219422) / kScale),
+    static_cast<float>((0.070366199217588729) / kScale),
+    static_cast<float>((0.5571760754215358) / kScale),
+};
+
+// Returns 3x3 row-major matrix inverse of kOpsinAbsorbanceMatrix.
+// opsin_image_test verifies this is actually the inverse.
+const float* GetOpsinAbsorbanceInverseMatrix();
+
+static const float kOpsinAbsorbanceBias[3] = {
+    static_cast<float>((0.26786006338144885) / kScale),
+    static_cast<float>((0.24494032763907073) / kScale),
+    static_cast<float>((0.14255999980363571) / kScale),
+};
+SIMD_ALIGN static const float kNegOpsinAbsorbanceBiasRGB[4] = {
+    -kOpsinAbsorbanceBias[0], -kOpsinAbsorbanceBias[1],
+    -kOpsinAbsorbanceBias[2], 255.0f};
+
+static const float kScaleR = 1.0f;
+static const float kScaleG = 2.0f - kScaleR;
+static const float kInvScaleR = 1.0f / kScaleR;
+static const float kInvScaleG = 1.0f / kScaleG;
+
+static constexpr float kXybCenter[3] = {0.0087982f, 0.5513899f, 0.4716444f};
+
+// Radius of the XYB range around the center. The full range is 2 * kXybRadius.
+static constexpr float kXybRadius[3] = {0.0301006f, 0.4512295f, 0.4716444f};
+
+static constexpr float kXybMin[3] = {
+    kXybCenter[0] - kXybRadius[0],
+    kXybCenter[1] - kXybRadius[1],
+    kXybCenter[2] - kXybRadius[2],
+};
+static constexpr float kXybMax[3] = {
+    kXybCenter[0] + kXybRadius[0],
+    kXybCenter[1] + kXybRadius[1],
+    kXybCenter[2] + kXybRadius[2],
+};
+
+}  // namespace pik
+
+#endif  // PIK_OPSIN_PARAMS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/optimize.h b/codec/L2/demos/pikEnc/host/pik/optimize.h
new file mode 100755
index 0000000000..87b2b2ae7c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/optimize.h
@@ -0,0 +1,149 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// Utility functions for optimizing multi-dimensional nonlinear functions.
+
+#ifndef PIK_OPTIMIZE_H_
+#define PIK_OPTIMIZE_H_
+
+#include <stdio.h>
+#include <cmath>
+#include <vector>
+
+namespace pik {
+namespace optimize {
+
+template <typename T>
+std::vector<T> operator+(const std::vector<T>& x, const std::vector<T>& y) {
+  std::vector<T> z(x.size());
+  for (int i = 0; i < x.size(); ++i) {
+    z[i] = x[i] + y[i];
+  }
+  return z;
+}
+
+template <typename T>
+std::vector<T> operator-(const std::vector<T>& x, const std::vector<T>& y) {
+  std::vector<T> z(x.size());
+  for (int i = 0; i < x.size(); ++i) {
+    z[i] = x[i] - y[i];
+  }
+  return z;
+}
+
+template <typename T>
+std::vector<T> operator*(T v, const std::vector<T>& x) {
+  std::vector<T> y(x.size());
+  for (int i = 0; i < x.size(); ++i) {
+    y[i] = v * x[i];
+  }
+  return y;
+}
+
+template <typename T>
+T operator*(const std::vector<T>& x, const std::vector<T>& y) {
+  T r = 0.0;
+  for (int i = 0; i < x.size(); ++i) {
+    r += x[i] * y[i];
+  }
+  return r;
+}
+
+// Implementation of the Scaled Conjugate Gradient method described in the
+// following paper:
+//   Moller, M. "A Scaled Conjugate Gradient Algorithm for Fast Supervised
+//   Learning", Neural Networks, Vol. 6. pp. 525-533, 1993
+//   http://sci2s.ugr.es/keel/pdf/algorithm/articulo/moller1990.pdf
+//
+// The Function template parameter is a class that has the following method:
+//
+//   // Returns the value of the function at point w and sets *df to be the
+//   // negative gradient vector of the function at point w.
+//   double Compute(const vector<double>& w, vector<double>* df) const;
+//
+// Returns a vector w, such that |df(w)| < grad_norm_threshold.
+template <typename T, typename Function>
+std::vector<T> OptimizeWithScaledConjugateGradientMethod(
+    const Function& f, const std::vector<T>& w0, const T grad_norm_threshold,
+    int max_iters) {
+  const int n = w0.size();
+  const T rsq_threshold = grad_norm_threshold * grad_norm_threshold;
+  const T sigma0 = static_cast<T>(0.0001);
+  T lambda = static_cast<T>(0.000001);
+  std::vector<T> w(w0);
+  std::vector<T> p(n);
+  std::vector<T> r(n);
+  T fw = f.Compute(w, &r);
+  T rsq = r * r;
+  T psq = rsq;
+  T mu = rsq;
+  p = r;
+  for (int k = 1; rsq > rsq_threshold; ++k) {
+    if (max_iters > 0 && k > max_iters) break;
+    T sigma = sigma0 / std::sqrt(psq);
+    std::vector<T> r2(n);
+    std::vector<T> w2 = w + (sigma * p);
+    f.Compute(w2, &r2);
+    T delta = (mu - (p * r2)) / sigma;
+    T delta1 = delta + lambda * psq;
+
+    if (delta1 <= 0) {
+      lambda = -2.0 * delta / psq;
+      delta1 = delta + lambda * psq;
+    }
+
+    bool success = true;
+    T alpha;
+    T fw1;
+    T Delta;
+    std::vector<T> w1(n);
+    std::vector<T> r1(n);
+
+    do {
+      alpha = mu / delta1;
+      w1 = w + (alpha * p);
+      fw1 = f.Compute(w1, &r1);
+      const T div = mu * alpha;
+      Delta = div == 0 ? 0 : 2 * (fw - fw1) / div;
+      success = (fw1 <= fw);
+      if (!success) {
+        lambda += delta1 * (1 - Delta) / psq;
+        delta1 = delta + lambda * psq;
+      }
+    } while (!success);
+
+    T r1sq = r1 * r1;
+    T beta = k % n == 0 ? 0.0 : (r1sq - (r1 * r)) / mu;
+
+#if SCG_DEBUG
+    printf(
+        "Step %3d fw=%10.2f |dfw|=%7.3f |p|=%6.2f "
+        "delta=%9.6f lambda=%6.4f mu=%8.4f alpha=%8.4f "
+        "beta=%5.3f Delta=%5.3f\n",
+        k, fw, sqrt(rsq), sqrt(psq), delta, lambda, mu, alpha, beta, Delta);
+#endif
+
+    if (Delta >= 0.75) {
+      lambda *= 0.25;
+    } else if (Delta < 0.25) {
+      lambda += delta1 * (1 - Delta) / psq;
+    }
+
+    w = w1;
+    fw = fw1;
+    r = r1;
+    rsq = r1sq;
+    p = r + (beta * p);
+    psq = p * p;
+    mu = p * r;
+  }
+  return w;
+}
+
+}  // namespace optimize
+}  // namespace pik
+
+#endif  // PIK_OPTIMIZE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/os_specific.cc b/codec/L2/demos/pikEnc/host/pik/os_specific.cc
new file mode 100755
index 0000000000..06c4683f28
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/os_specific.cc
@@ -0,0 +1,269 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/os_specific.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+#include <ctime>
+#include <random>
+#include <sstream>
+#include <iterator>
+
+#include "pik/arch_specific.h"
+#include "pik/compiler_specific.h"
+
+#if defined(_WIN32) || defined(_WIN64)
+#define OS_WIN 1
+#define NOMINMAX
+#include <windows.h>
+#else
+#define OS_WIN 0
+#endif
+
+#ifdef __linux__
+#define OS_LINUX 1
+#include <sched.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#else
+#define OS_LINUX 0
+#endif
+
+#ifdef __MACH__
+#define OS_MAC 1
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#else
+#define OS_MAC 0
+#endif
+
+#ifdef __FreeBSD__
+#define OS_FREEBSD 1
+#include <sys/cpuset.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#else
+#define OS_FREEBSD 0
+#endif
+
+namespace pik {
+
+double Now() {
+#if OS_WIN
+  LARGE_INTEGER counter;
+  (void)QueryPerformanceCounter(&counter);
+  LARGE_INTEGER freq;
+  (void)QueryPerformanceFrequency(&freq);
+  return double(counter.QuadPart) / freq.QuadPart;
+#elif OS_MAC
+  const auto t = mach_absolute_time();
+  // On OSX/iOS platform the elapsed time is cpu time unit
+  // We have to query the time base information to convert it back
+  // See https://developer.apple.com/library/mac/qa/qa1398/_index.html
+  static mach_timebase_info_data_t timebase;
+  if (timebase.denom == 0) {
+    (void)mach_timebase_info(&timebase);
+  }
+  return double(t) * timebase.numer / timebase.denom * 1E-9;
+#else
+  timespec t;
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  return t.tv_sec + t.tv_nsec * 1E-9;
+#endif
+}
+
+struct ThreadAffinity {
+#if OS_WIN
+  DWORD_PTR mask;
+#elif OS_LINUX
+  cpu_set_t set;
+#elif OS_FREEBSD
+  cpuset_t set;
+#endif
+};
+
+ThreadAffinity* GetThreadAffinity() {
+  ThreadAffinity* affinity =
+      static_cast<ThreadAffinity*>(malloc(sizeof(ThreadAffinity)));
+#if OS_WIN
+  DWORD_PTR system_affinity;
+  const BOOL ok = GetProcessAffinityMask(GetCurrentProcess(), &affinity->mask,
+                                         &system_affinity);
+  PIK_CHECK(ok);
+#elif OS_LINUX
+  const pid_t pid = 0;  // current thread
+  const int err = sched_getaffinity(pid, sizeof(cpu_set_t), &affinity->set);
+  PIK_CHECK(err == 0);
+#elif OS_FREEBSD
+  const pid_t pid = getpid();  // current thread
+  const int err = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, pid,
+                                     sizeof(cpuset_t), &affinity->set);
+  PIK_CHECK(err == 0);
+
+#endif
+  return affinity;
+}
+
+namespace {
+
+ThreadAffinity* OriginalThreadAffinity() {
+  static ThreadAffinity* original = GetThreadAffinity();
+  return original;
+}
+
+}  // namespace
+
+Status SetThreadAffinity(ThreadAffinity* affinity) {
+  // Ensure original is initialized before changing.
+  const ThreadAffinity* const original = OriginalThreadAffinity();
+  PIK_CHECK(original != nullptr);
+
+#if OS_WIN
+  const HANDLE hThread = GetCurrentThread();
+  const DWORD_PTR prev = SetThreadAffinityMask(hThread, affinity->mask);
+  if (prev == 0) return PIK_FAILURE("SetThreadAffinityMask failed");
+#elif OS_LINUX
+  const pid_t pid = 0;  // current thread
+  const int err = sched_setaffinity(pid, sizeof(cpu_set_t), &affinity->set);
+  if (err != 0) return PIK_FAILURE("sched_setaffinity failed");
+#elif OS_FREEBSD
+  const pid_t pid = getpid();  // current thread
+  const int err = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, pid,
+                                     sizeof(cpuset_t), &affinity->set);
+  if (err != 0) return PIK_FAILURE("cpuset_setaffinity failed");
+#else
+  printf("Don't know how to SetThreadAffinity on this platform.\n");
+  return false;
+#endif
+  return true;
+}
+
+std::vector<int> AvailableCPUs() {
+  std::vector<int> cpus;
+  cpus.reserve(64);
+#if OS_WIN
+  const ThreadAffinity* const affinity = OriginalThreadAffinity();
+  for (int cpu = 0; cpu < 64; ++cpu) {
+    if (affinity->mask & (1ULL << cpu)) {
+      cpus.push_back(cpu);
+    }
+  }
+#elif OS_LINUX
+  const ThreadAffinity* const affinity = OriginalThreadAffinity();
+  for (size_t cpu = 0; cpu < sizeof(cpu_set_t) * 8; ++cpu) {
+    if (CPU_ISSET(cpu, &affinity->set)) {
+      cpus.push_back(cpu);
+    }
+  }
+#elif OS_FREEBSD
+  const ThreadAffinity* const affinity = OriginalThreadAffinity();
+  for (size_t cpu = 0; cpu < sizeof(cpuset_t) * 8; ++cpu) {
+    if (CPU_ISSET(cpu, &affinity->set)) {
+      cpus.push_back(cpu);
+    }
+  }
+#else
+  cpus.push_back(0);
+#endif
+  return cpus;
+}
+
+Status PinThreadToCPU(const int cpu) {
+  ThreadAffinity affinity;
+#if OS_WIN
+  affinity.mask = 1ULL << cpu;
+#elif OS_LINUX
+  CPU_ZERO(&affinity.set);
+  CPU_SET(cpu, &affinity.set);
+#elif OS_FREEBSD
+  CPU_ZERO(&affinity.set);
+  CPU_SET(cpu, &affinity.set);
+#endif
+  return SetThreadAffinity(&affinity);
+}
+
+Status PinThreadToRandomCPU() {
+  std::vector<int> cpus = AvailableCPUs();
+
+  // Remove first two CPUs because interrupts are often pinned to them.
+  PIK_CHECK(cpus.size() > 2);
+  cpus.erase(cpus.begin(), cpus.begin() + 2);
+
+  // Random choice to prevent burning up the same core.
+  std::random_device device;
+  std::ranlux48 generator(device());
+  std::shuffle(cpus.begin(), cpus.end(), generator);
+  const int cpu = cpus.front();
+
+  PIK_RETURN_IF_ERROR(PinThreadToCPU(cpu));
+
+  // After setting affinity, we should be running on the desired CPU.
+#if PIK_ARCH_X64
+  printf("Running on CPU #%d, APIC ID %02x\n", cpu, ApicId());
+#else
+  printf("Running on CPU #%d\n", cpu);
+#endif
+  return true;
+}
+
+Status RunCommand(const std::vector<std::string>& args) {
+#if _POSIX_VERSION >= 200112L
+  // Avoid system(), but do not try to be over-zealous about not passing along
+  // some special resources further (such as: inherited-not-marked-FD_CLOEXEC
+  // file descriptors).
+  std::vector<const char*> c_args;
+  c_args.reserve(args.size() + 1);
+  for (size_t i = 0; i < args.size(); ++i) {
+    c_args.push_back(args[i].c_str());
+  }
+  c_args.push_back(nullptr);
+  const pid_t pid = fork();
+  if (pid == -1)  // fork() failed.
+    return false;
+  if (pid != 0) {  // Parent process.
+    int ret_status;
+    if (pid != waitpid(pid, &ret_status, 0)) {
+      return false;  // waitpid() error.
+    }
+    return ret_status == 0;
+  } else {  // Child process.
+    execvp(c_args[0],
+           // Address benign-but-annoying execvp() signature weirdness.
+           const_cast<char * const *>(c_args.data()));
+    fprintf(stderr, "execvp() failed. Exiting child process.\n");
+    exit(EXIT_FAILURE);
+  }
+#elif OS_WIN
+  // Synthesize a string for system(). And warn about it.
+  // TODO(user): Fix this - research the safe way to run a command on Windows.
+  // Likely, the solution is along these lines:
+  // https://docs.microsoft.com/en-us/windows/desktop/ProcThread/creating-processes
+  std::ostringstream cmd;
+  std::copy(args.begin(), args.end(),
+           std::ostream_iterator<std::string>(cmd, " "));
+  printf(stderr, "Warning: Using system() on string: %s\n", cmd.str.c_str());
+  int ret = system(cmd.str.c_str());
+  if (errno != ENOENT &&  // Windows: Command interpreter not found.
+      ret == 0) {
+    return true;
+  }
+  return false;
+#else
+#error Neither a POSIX-1.2001 nor a Windows System.
+#endif
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/os_specific.h b/codec/L2/demos/pikEnc/host/pik/os_specific.h
new file mode 100755
index 0000000000..bf200c16c4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/os_specific.h
@@ -0,0 +1,50 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_OS_SPECIFIC_H_
+#define PIK_OS_SPECIFIC_H_
+
+// OS-specific functions (e.g. timing and thread affinity)
+
+#include <string>
+#include <vector>
+
+#include "pik/status.h"
+
+namespace pik {
+
+// Returns current time [seconds] from a monotonic clock with unspecified
+// starting point - only suitable for computing elapsed time.
+double Now();
+
+// Returns CPU numbers in [0, N), where N is the number of bits in the
+// thread's initial affinity (unaffected by any SetThreadAffinity).
+std::vector<int> AvailableCPUs();
+
+// Opaque.
+struct ThreadAffinity;
+
+// Caller must free() the return value.
+ThreadAffinity* GetThreadAffinity();
+
+// Restores a previous affinity returned by GetThreadAffinity.
+Status SetThreadAffinity(ThreadAffinity* affinity);
+
+// Ensures the thread is running on the specified cpu, and no others.
+// Useful for reducing nanobenchmark variability (fewer context switches).
+// Uses SetThreadAffinity.
+Status PinThreadToCPU(const int cpu);
+
+// Random choice of CPU avoids overloading any one core.
+// Uses SetThreadAffinity.
+Status PinThreadToRandomCPU();
+
+// Executes a command in a subprocess.
+Status RunCommand(const std::vector<std::string>& args);
+
+}  // namespace pik
+
+#endif  // PIK_OS_SPECIFIC_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/padded_bytes.cc b/codec/L2/demos/pikEnc/host/pik/padded_bytes.cc
new file mode 100755
index 0000000000..5a4dc3a76c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/padded_bytes.cc
@@ -0,0 +1,37 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/padded_bytes.h"
+
+namespace pik {
+
+void PaddedBytes::IncreaseCapacityTo(size_t capacity) {
+  PIK_ASSERT(capacity > capacity_);
+
+  // write_bits.h writes up to 7 bytes past the end.
+  CacheAlignedUniquePtr new_data = AllocateArray(capacity + 8);
+  if (new_data == nullptr) {
+    // Allocation failed, discard all data to ensure this is noticed.
+    size_ = capacity_ = 0;
+    return;
+  }
+
+  if (data_ == nullptr) {
+    // First allocation: ensure first byte is initialized (won't be copied).
+    new_data[0] = 0;
+  } else {
+    // Subsequent resize: copy existing data to new location.
+    memcpy(new_data.get(), data_.get(), size_);
+    // Ensure that the first new byte is initialized, to allow write_bits to
+    // safely append to the newly-resized PaddedBytes.
+    new_data[size_] = 0;
+  }
+
+  capacity_ = capacity;
+  std::swap(new_data, data_);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/padded_bytes.h b/codec/L2/demos/pikEnc/host/pik/padded_bytes.h
new file mode 100755
index 0000000000..afd183d819
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/padded_bytes.h
@@ -0,0 +1,156 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_PADDED_BYTES_H_
+#define PIK_PADDED_BYTES_H_
+
+// std::vector replacement with padding to reduce bounds checks in WriteBits
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <memory>
+
+#include "pik/cache_aligned.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Provides a subset of the std::vector interface with some differences:
+// - allows WriteBits to write 64 bits at a time without bounds checking;
+// - ONLY zero-initializes the first byte (required by WriteBits);
+// - ensures cache-line alignment.
+class PaddedBytes {
+ public:
+  // Required for output params.
+  PaddedBytes() : size_(0), capacity_(0) {}
+
+  explicit PaddedBytes(size_t size) : size_(size), capacity_(0) {
+    if (size != 0) IncreaseCapacityTo(size);
+  }
+
+  PaddedBytes(const PaddedBytes& other) : size_(other.size_), capacity_(0) {
+    if (size_ != 0) IncreaseCapacityTo(size_);
+    if (data() != nullptr) memcpy(data(), other.data(), size_);
+  }
+  PaddedBytes& operator=(const PaddedBytes& other) {
+    // Self-assignment is safe.
+    resize(other.size());
+    if (data() != nullptr) memmove(data(), other.data(), size_);
+    return *this;
+  }
+
+  // default is not OK - need to set other.size_ to 0!
+  PaddedBytes(PaddedBytes&& other)
+      : size_(other.size_),
+        capacity_(other.capacity_),
+        data_(std::move(other.data_)) {
+    other.size_ = other.capacity_ = 0;
+  }
+  PaddedBytes& operator=(PaddedBytes&& other) {
+    size_ = other.size_;
+    capacity_ = other.capacity_;
+    data_ = std::move(other.data_);
+
+    if (&other != this) {
+      other.size_ = other.capacity_ = 0;
+    }
+    return *this;
+  }
+
+  void swap(PaddedBytes& other) {
+    std::swap(size_, other.size_);
+    std::swap(capacity_, other.capacity_);
+    std::swap(data_, other.data_);
+  }
+
+  void reserve(size_t capacity) {
+    if (capacity > capacity_) IncreaseCapacityTo(capacity);
+  }
+  // NOTE: unlike vector, this does not initialize the new data!
+  // However, we guarantee that write_bits can safely append after
+  // the resize, as we zero-initialize the first new byte of data.
+  void resize(size_t size) {
+    if (size > capacity_) IncreaseCapacityTo(size);
+    size_ = (data() == nullptr) ? 0 : size;
+  }
+  // Amortized constant complexity due to exponential growth.
+  void push_back(uint8_t x) {
+    if (size_ == capacity_) {
+      IncreaseCapacityTo(std::max<size_t>(3 * capacity_ / 2, 64));
+      if (data() == nullptr) return;
+    }
+
+    data_[size_++] = x;
+  }
+
+  size_t size() const { return size_; }
+  size_t capacity() const { return capacity_; }
+
+  uint8_t* data() { return data_.get(); }
+  const uint8_t* data() const { return data_.get(); }
+
+  // std::vector operations implemented in terms of the public interface above.
+
+  void clear() { resize(0); }
+  bool empty() const { return size() == 0; }
+
+  void assign(std::initializer_list<uint8_t> il) {
+    resize(il.size());
+    memcpy(data(), il.begin(), il.size());
+  }
+
+  uint8_t* begin() { return data(); }
+  const uint8_t* begin() const { return data(); }
+  uint8_t* end() { return begin() + size(); }
+  const uint8_t* end() const { return begin() + size(); }
+
+  uint8_t& operator[](const size_t i) {
+    PIK_ASSERT(i < size());
+    return data()[i];
+  }
+  const uint8_t& operator[](const size_t i) const {
+    PIK_ASSERT(i < size());
+    return data()[i];
+  }
+
+  uint8_t& back() {
+    PIK_ASSERT(size() != 0);
+    return data()[size() - 1];
+  }
+  const uint8_t& back() const {
+    PIK_ASSERT(size() != 0);
+    return data()[size() - 1];
+  }
+
+  template <typename T>
+  void append(const T& other) {
+    size_t old_size = size();
+    resize(size() + other.size());
+    memcpy(data() + old_size, other.data(), other.size());
+  }
+
+ private:
+  // Copies existing data to newly allocated "data_". If allocation fails,
+  // data() == nullptr and size_ = capacity_ = 0.
+  void IncreaseCapacityTo(size_t capacity);
+
+  size_t size_;
+  size_t capacity_;
+  CacheAlignedUniquePtr data_;
+};
+
+template <typename T>
+static inline void Append(const T& s, PaddedBytes* out,
+                          size_t* PIK_RESTRICT byte_pos) {
+  memcpy(out->data() + *byte_pos, s.data(), s.size());
+  *byte_pos += s.size();
+  PIK_CHECK(*byte_pos <= out->size());
+}
+
+}  // namespace pik
+
+#endif  // PIK_PADDED_BYTES_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/pik.cc b/codec/L2/demos/pikEnc/host/pik/pik.cc
new file mode 100755
index 0000000000..074b1359c0
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/pik.cc
@@ -0,0 +1,128 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/pik.h"
+
+#include <string>
+#include <vector>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/adaptive_quantization.h"
+#include "pik/common.h"
+#include "pik/compressed_image.h"
+#include "pik/headers.h"
+#include "pik/image.h"
+#include "pik/multipass_handler.h"
+#include "pik/noise.h"
+#include "pik/os_specific.h"
+#include "pik/pik_frame.h"
+#include "pik/pik_params.h"
+#include "pik/profiler.h"
+#include "pik/quantizer.h"
+#include "pik/saliency_map.h"
+#include "pik/single_image_handler.h"
+
+namespace pik {
+
+namespace {
+static const uint8_t kBrunsliMagic[] = {0x0A, 0x04, 'B', 0xd2, 0xd5, 'N', 0x12};
+
+// TODO(user): use VerifySignature, when brunsli codebase is attached.
+bool IsBrunsliFile(const PaddedBytes &compressed) {
+  const size_t magic_size = sizeof(kBrunsliMagic);
+  if (compressed.size() < magic_size) {
+    return false;
+  }
+  if (memcmp(compressed.data(), kBrunsliMagic, magic_size) != 0) {
+    return false;
+  }
+  return true;
+}
+
+Status BrunsliToPixels(const DecompressParams &dparams,
+                       const PaddedBytes &compressed, CodecInOut *io,
+                       PikInfo *aux_out, ThreadPool *pool) {
+  return PIK_FAILURE("Brunsli decoding is not implemented yet.");
+}
+
+} // namespace
+
+Status PixelsToPik(const CompressParams &cparams, std::string xclbinPath,
+                   const CodecInOut *io, PaddedBytes *compressed,
+                   PikInfo *aux_out, ThreadPool *pool) {
+  if (io->xsize() == 0 || io->ysize() == 0) {
+    return PIK_FAILURE("Empty image");
+  }
+  if (!io->HasOriginalBitsPerSample()) {
+    return PIK_FAILURE("Pik requires specifying original bit depth "
+                       "of the pixels to encode as metadata.");
+  }
+  FileHeader container;
+  MakeFileHeader(cparams, io, &container);
+
+  size_t extension_bits, total_bits;
+  PIK_CHECK(CanEncode(container, &extension_bits, &total_bits));
+
+  compressed->resize(DivCeil(total_bits, kBitsPerByte));
+  size_t pos = 0;
+  PIK_RETURN_IF_ERROR(
+      WriteFileHeader(container, extension_bits, &pos, compressed->data()));
+  FrameParams frame_params;
+  SingleImageManager transform;
+  if (cparams.progressive_mode) {
+    // TODO(veluca): re-enable saliency.
+    PassDefinition pass_definition[] = {
+        {/*num_coefficients=*/2, /*salient_only=*/false,
+         /*suitable_for_downsampling_factor_of_at_least=*/4},
+        {/*num_coefficients=*/3, /*salient_only=*/false,
+         /*suitable_for_downsampling_factor_of_at_least=*/2},
+        {/*num_coefficients=*/8, /*salient_only=*/false}};
+    transform.SetProgressiveMode(ProgressiveMode{pass_definition});
+  }
+  PIK_RETURN_IF_ERROR(hls_PixelsToPikPass(cparams, xclbinPath, frame_params, io,
+                                          pool, compressed, pos, aux_out,
+                                          &transform));
+  return true;
+}
+
+Status PikToPixels(const DecompressParams &dparams,
+                   const PaddedBytes &compressed, CodecInOut *io,
+                   PikInfo *aux_out, ThreadPool *pool) {
+  PROFILER_ZONE("PikToPixels uninstrumented");
+
+  if (IsBrunsliFile(compressed)) {
+    return BrunsliToPixels(dparams, compressed, io, aux_out, pool);
+  }
+
+  // To avoid the complexity of file I/O and buffering, we assume the bitstream
+  // is loaded (or for large images/sequences: mapped into) memory.
+  BitReader reader(compressed.data(), compressed.size());
+  FileHeader container;
+  PIK_RETURN_IF_ERROR(ReadFileHeader(&reader, &container));
+
+  // Preview is discardable, i.e. content image does not rely on decoded preview
+  // pixels; just skip it, if any.
+  size_t preview_size_bits = container.preview.size_bits;
+  if (preview_size_bits != 0) {
+    reader.SkipBits(preview_size_bits);
+  }
+
+  SingleImageManager transform;
+  PIK_RETURN_IF_ERROR(PikPassToPixels(dparams, compressed, container, pool,
+                                      &reader, io, aux_out, &transform));
+
+  if (dparams.check_decompressed_size &&
+      reader.Position() != compressed.size()) {
+    return PIK_FAILURE("Pik compressed data size mismatch.");
+  }
+
+  io->enc_size = compressed.size();
+
+  return true;
+}
+
+} // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/pik.h b/codec/L2/demos/pikEnc/host/pik/pik.h
new file mode 100755
index 0000000000..38cc8c1953
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/pik.h
@@ -0,0 +1,38 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_PIK_H_
+#define PIK_PIK_H_
+
+// Top-level interface for PIK encoding/decoding.
+
+#include "pik/codec.h"
+#include "pik/data_parallel.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_info.h"
+#include "pik/pik_params.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Compresses pixels from `io` (given in any ColorEncoding).
+// `io` must have original_bits_per_sample and dec_c_original fields set.
+Status PixelsToPik(const CompressParams &params, std::string xclbinPath,
+                   const CodecInOut *io, PaddedBytes *compressed,
+                   PikInfo *aux_out = nullptr, ThreadPool *pool = nullptr);
+
+// Implementation detail: currently decodes to linear sRGB. The contract is:
+// `io` appears 'identical' (modulo compression artifacts) to the encoder input
+// in a color-aware viewer. Note that `io`->dec_c_original identifies the color
+// space that was passed to the encoder; clients that need that encoding must
+// call `io`->TransformTo afterwards.
+Status PikToPixels(const DecompressParams &params,
+                   const PaddedBytes &compressed, CodecInOut *io,
+                   PikInfo *aux_out = nullptr, ThreadPool *pool = nullptr);
+
+} // namespace pik
+
+#endif // PIK_PIK_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/pik_frame.cc b/codec/L2/demos/pikEnc/host/pik/pik_frame.cc
new file mode 100755
index 0000000000..fec0ab1452
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/pik_frame.cc
@@ -0,0 +1,2139 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/pik_frame.h"
+
+#include "pik/status.h"
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cstdint>
+#include <iostream>
+#include <limits.h> // PATH_MAX
+#include <limits>
+#include <memory>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+#include <vector>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/ac_strategy.h"
+#include "pik/adaptive_quantization.h"
+#include "pik/alpha.h"
+#include "pik/ar_control_field.h"
+#include "pik/arch_specific.h"
+#include "pik/bilinear_transform.h"
+#include "pik/bit_reader.h"
+#include "pik/bits.h"
+#include "pik/byte_order.h"
+#include "pik/color_correlation.h"
+#include "pik/color_encoding.h"
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/compressed_dc.h"
+#include "pik/convolve.h"
+#include "pik/dct.h"
+#include "pik/dct_util.h"
+#include "pik/entropy_coder.h"
+#include "pik/external_image.h"
+#include "pik/fast_log.h"
+#include "pik/gaborish.h"
+#include "pik/gamma_correct.h"
+#include "pik/headers.h"
+#include "pik/image.h"
+#include "pik/lossless16.h"
+#include "pik/lossless8.h"
+#include "pik/multipass_handler.h"
+#include "pik/noise.h"
+#include "pik/opsin_image.h"
+#include "pik/opsin_inverse.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_params.h"
+#include "pik/profiler.h"
+#include "pik/resize.h"
+#include "pik/simd/targets.h"
+#include "pik/size_coder.h"
+
+#include "XAccPIKKernel1.hpp"
+#include "XAccPIKKernel2.hpp"
+#include "XAccPIKKernel3.hpp"
+#include "host_dev.hpp"
+#include "kernel3/encode_order.hpp"
+
+#include <ap_int.h>
+#include <hls_stream.h>
+
+namespace pik {
+namespace {
+
+// For encoder.
+uint32_t FrameFlagsFromParams(const CompressParams& cparams, const CodecInOut* io) {
+    uint32_t flags = 0;
+
+    const float dist = cparams.butteraugli_distance;
+
+    // We don't add noise at low butteraugli distances because the original
+    // noise is stored within the compressed image and adding noise makes things
+    // worse.
+    if (ApplyOverride(cparams.noise, dist >= kMinButteraugliForNoise)) {
+        flags |= FrameHeader::kNoise;
+    }
+
+    if (ApplyOverride(cparams.gradient, dist >= kMinButteraugliForGradient)) {
+        flags |= FrameHeader::kGradientMap;
+    }
+
+    if (io->IsGray()) {
+        flags |= FrameHeader::kGrayscaleOpt;
+    }
+
+    return flags;
+}
+
+void OverrideFlag(const Override o, const uint32_t flag, uint32_t* PIK_RESTRICT flags) {
+    if (o == Override::kOn) {
+        *flags |= flag;
+    } else if (o == Override::kOff) {
+        *flags &= ~flag;
+    }
+}
+
+void OverridePassFlags(const DecompressParams& dparams, FrameHeader* PIK_RESTRICT frame_header) {
+    OverrideFlag(dparams.noise, FrameHeader::kNoise, &frame_header->flags);
+    OverrideFlag(dparams.gradient, FrameHeader::kGradientMap, &frame_header->flags);
+
+    if (dparams.adaptive_reconstruction == Override::kOff) {
+        frame_header->have_adaptive_reconstruction = false;
+    } else if (dparams.adaptive_reconstruction == Override::kOn) {
+        frame_header->have_adaptive_reconstruction = true;
+    }
+    frame_header->epf_params.use_sharpened =
+        ApplyOverride(dparams.epf_use_sharpened, frame_header->epf_params.use_sharpened);
+    if (dparams.epf_sigma > 0) {
+        frame_header->epf_params.enable_adaptive = false;
+        frame_header->epf_params.sigma = dparams.epf_sigma;
+    }
+
+    if (dparams.gaborish != -1) {
+        frame_header->gaborish = GaborishStrength(dparams.gaborish);
+    }
+}
+
+void OverrideGroupFlags(const DecompressParams& dparams,
+                        const FrameHeader* PIK_RESTRICT frame_header,
+                        GroupHeader* PIK_RESTRICT header) {}
+
+// Specializes a 8-bit and 16-bit of rounding from floating point to lossless.
+template <typename T>
+T RoundForLossless(float in);
+
+template <>
+uint8_t RoundForLossless(float in) {
+    // NOTE: if in was originally an 8 or 16 bit value, we don't need to round
+    // because such values are exactly representable as floats. Rounding is only
+    // needed when forcing inexact values back to integers.
+    return static_cast<uint8_t>(in + 0.5f);
+}
+
+template <>
+uint16_t RoundForLossless(float in) {
+    return static_cast<uint16_t>(in * 257.0f + 0.5f);
+}
+
+// Specializes a 8-bit and 16-bit lossless diff for previous pass.
+template <typename T>
+T DiffForLossless(float in, float prev);
+
+template <>
+uint8_t DiffForLossless(float in, float prev) {
+    uint8_t diff = static_cast<int>(RoundForLossless<uint8_t>(in)) - static_cast<int>(RoundForLossless<uint8_t>(prev));
+    if (diff > 127)
+        diff = (255 - diff) * 2 + 1;
+    else
+        diff = diff * 2;
+    return diff;
+}
+
+template <>
+uint16_t DiffForLossless(float in, float prev) {
+    uint32_t diff = 0xFFFF & (static_cast<int>(RoundForLossless<uint16_t>(in)) -
+                              static_cast<int>(RoundForLossless<uint16_t>(prev)));
+    if (diff > 32767)
+        diff = (65535 - diff) * 2 + 1;
+    else
+        diff = diff * 2;
+    return diff;
+}
+
+// Handles one channel c for converting ImageF or Image3F to lossless 8-bit or
+// lossless 16-bit, and optionally handles previous pass delta.
+template <typename T>
+void LosslessChannelPass(
+    const int c, const CodecInOut* io, const Rect& rect, const Image3F& previous_pass, Image<T>* channel_out) {
+    size_t xsize = rect.xsize();
+    size_t ysize = rect.ysize();
+    if (previous_pass.xsize() == 0) {
+        for (size_t y = 0; y < ysize; ++y) {
+            const float* const PIK_RESTRICT row_in = rect.ConstPlaneRow(io->color(), c, y);
+            T* const PIK_RESTRICT row_out = channel_out->Row(y);
+            for (size_t x = 0; x < xsize; ++x) {
+                row_out[x] = RoundForLossless<T>(row_in[x]);
+            }
+        }
+    } else {
+        for (size_t y = 0; y < ysize; ++y) {
+            const float* const PIK_RESTRICT row_in = rect.ConstPlaneRow(io->color(), c, y);
+            T* const PIK_RESTRICT row_out = channel_out->Row(y);
+            const float* const PIK_RESTRICT row_prev = previous_pass.PlaneRow(0, y);
+            for (size_t x = 0; x < xsize; ++x) {
+                row_out[x] = DiffForLossless<T>(row_in[x], row_prev[x]);
+            }
+        }
+    }
+}
+
+Status PixelsToPikLosslessFrame(CompressParams cparams,
+                                const FrameHeader& frame_header,
+                                const CodecInOut* io,
+                                const Rect& rect,
+                                const Image3F& previous_pass,
+                                PaddedBytes* compressed,
+                                size_t& pos,
+                                PikInfo* aux_out) {
+    PIK_ASSERT(pos % kBitsPerByte == 0);
+    size_t xsize = rect.xsize();
+    size_t ysize = rect.ysize();
+    if (frame_header.lossless_grayscale) {
+        if (frame_header.lossless_16_bits) {
+            ImageU channel(xsize, ysize);
+            LosslessChannelPass(0, io, rect, previous_pass, &channel);
+            compressed->resize(pos / kBitsPerByte);
+            if (!Grayscale16bit_compress(channel, compressed)) {
+                return PIK_FAILURE("Lossless compression failed");
+            }
+        } else {
+            ImageB channel(xsize, ysize);
+            LosslessChannelPass(0, io, rect, previous_pass, &channel);
+            compressed->resize(pos / kBitsPerByte);
+            if (!Grayscale8bit_compress(channel, compressed)) {
+                return PIK_FAILURE("Lossless compression failed");
+            }
+        }
+    } else {
+        if (frame_header.lossless_16_bits) {
+            Image3U image(xsize, ysize);
+            LosslessChannelPass(0, io, rect, previous_pass, const_cast<ImageU*>(&image.Plane(0)));
+            LosslessChannelPass(1, io, rect, previous_pass, const_cast<ImageU*>(&image.Plane(1)));
+            LosslessChannelPass(2, io, rect, previous_pass, const_cast<ImageU*>(&image.Plane(2)));
+            compressed->resize(pos / kBitsPerByte);
+            if (!Colorful16bit_compress(image, compressed)) {
+                return PIK_FAILURE("Lossless compression failed");
+            }
+        } else {
+            Image3B image(xsize, ysize);
+            LosslessChannelPass(0, io, rect, previous_pass, const_cast<ImageB*>(&image.Plane(0)));
+            LosslessChannelPass(1, io, rect, previous_pass, const_cast<ImageB*>(&image.Plane(1)));
+            LosslessChannelPass(2, io, rect, previous_pass, const_cast<ImageB*>(&image.Plane(2)));
+            compressed->resize(pos / kBitsPerByte);
+            if (!Colorful8bit_compress(image, compressed)) {
+                return PIK_FAILURE("Lossless compression failed");
+            }
+        }
+    }
+    pos = compressed->size() * kBitsPerByte;
+    return true;
+}
+
+// Returns the target size based on whether bitrate or direct targetsize is
+// given.
+size_t TargetSize(const CompressParams& cparams, const Rect& rect) {
+    if (cparams.target_size > 0) {
+        return cparams.target_size;
+    }
+    if (cparams.target_bitrate > 0.0) {
+        return 0.5 + cparams.target_bitrate * rect.xsize() * rect.ysize() / 8;
+    }
+    return 0;
+}
+
+Status PikPassHeuristics(CompressParams cparams,
+                         const FrameHeader& frame_header,
+                         const Image3F& opsin_orig,
+                         const Image3F& opsin,
+                         DequantMatrices* dequant,
+                         ImageB* dequant_control_field,
+                         uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+                         MultipassManager* multipass_manager,
+                         GroupHeader* template_group_header,
+                         ColorCorrelationMap* full_cmap,
+                         std::shared_ptr<Quantizer>* full_quantizer,
+                         AcStrategyImage* full_ac_strategy,
+                         ImageB* full_ar_sigma_lut_ids,
+                         BlockDictionary* block_dictionary,
+                         PikInfo* aux_out) {
+    size_t target_size = TargetSize(cparams, Rect(opsin_orig));
+    // TODO(robryk): This should take *template_group_header size, and size of
+    // other passes into account.
+    size_t opsin_target_size = target_size;
+    if (cparams.target_size > 0 || cparams.target_bitrate > 0.0) {
+        cparams.target_size = opsin_target_size;
+    } else if (cparams.butteraugli_distance < 0) {
+        return PIK_FAILURE("Expected non-negative distance");
+    }
+
+    template_group_header->nonserialized_have_alpha = frame_header.has_alpha;
+
+    if (cparams.lossless_mode) {
+        return true;
+    }
+
+    constexpr size_t N = kBlockDim;
+    PROFILER_ZONE("enc OpsinToPik uninstrumented");
+    const size_t xsize = opsin_orig.xsize();
+    const size_t ysize = opsin_orig.ysize();
+    const size_t xsize_blocks = DivCeil(xsize, N);
+    const size_t ysize_blocks = DivCeil(ysize, N);
+
+    ImageF quant_field = InitialQuantField(cparams.butteraugli_distance, cparams.GetIntensityMultiplier(), opsin_orig,
+                                           cparams, /*pool=*/nullptr, 1.0);
+
+    //  *block_dictionary = multipass_manager->GetBlockDictionary(
+    //      cparams.butteraugli_distance, opsin);
+
+    //  Image3F opsin_with_removed_blocks = CopyImage(opsin);
+    //  block_dictionary->SubtractFromDict(&opsin_with_removed_blocks);
+    //  ApplyReverseBilinear(&opsin_with_removed_blocks);
+
+    /*  multipass_manager->GetDequantMatrices(
+          cparams.butteraugli_distance, cparams.GetIntensityMultiplier(),
+          opsin_with_removed_blocks, quant_field, dequant, dequant_control_field,
+          dequant_map);*/
+
+    *dequant = DequantMatrices(/*need_inv_matrices=*/true);
+    *dequant_control_field = ImageB(DivCeil(opsin.xsize(), kTileDim), DivCeil(opsin.ysize(), kTileDim));
+    ZeroFillImage(dequant_control_field);
+    memset(dequant_map, 0, kMaxQuantControlFieldValue * 256);
+
+    multipass_manager->GetColorCorrelationMap(opsin, dequant, &*full_cmap);
+
+    multipass_manager->GetAcStrategy(cparams.butteraugli_distance, &quant_field, dequant, opsin,
+                                     /*pool=*/nullptr, full_ac_strategy, aux_out);
+
+    // TODO(veluca): investigate if this should be included in
+    // multipass_manager.
+    FindBestArControlField(cparams.butteraugli_distance, cparams.GetIntensityMultiplier(), opsin, *full_ac_strategy,
+                           quant_field, dequant, frame_header.gaborish,
+                           /*pool=*/nullptr, full_ar_sigma_lut_ids);
+
+    *full_quantizer = multipass_manager->GetQuantizer(
+        cparams, xsize_blocks, ysize_blocks, opsin_orig, opsin, frame_header, *template_group_header, *full_cmap,
+        *block_dictionary, *full_ac_strategy, *full_ar_sigma_lut_ids, dequant, *dequant_control_field, dequant_map,
+        quant_field, aux_out);
+    return true;
+}
+
+void strmToString(const int num_in, hls::stream<uint8_t>& strm_in, std::string& output) {
+    for (int j = 0; j < num_in; ++j) {
+        output[j] = strm_in.read();
+    }
+    output.resize(num_in);
+}
+// maybe parallelize color
+std::string hls_EncodeCoeffOrders_top(const int32_t order[3][64]) { //, hls_PikImageSizeInfo &order_info
+
+    std::string encoded_coeff_order(3 * 1024, 0);
+    uint8_t* storage = reinterpret_cast<uint8_t*>(&encoded_coeff_order[0]);
+    int storage_ix = 0;
+
+    int num_bits = 0; // pos
+    int num_pair = 0;
+
+    hls::stream<nbits_t> strm_nbits;
+    hls::stream<uint16_t> strm_bits("order_strm_bits");
+    hls::stream<uint8_t> strm_order_byte("order_byte");
+    hls::stream<bool> strm_order_e("strm_order_e");
+
+    int hls_pos = 0;
+    uint8_t tail_bits = 0;
+    hls::stream<int> strm_order;
+
+    for (int c = 0; c < 3; c++) {
+        for (int j = 0; j < 64; j++) {
+            strm_order.write(order[c][j]);
+        }
+
+        hls_EncodeCoeffOrder(strm_order, num_bits, num_pair, strm_nbits, strm_bits);
+
+        hls_WriteBitToStream(num_pair, tail_bits, strm_nbits, strm_bits, hls_pos, strm_order_byte, strm_order_e);
+
+        _XF_IMAGE_PRINT("--byte_tail = %d , pos=%d\n", (int)tail_bits, (int)hls_pos);
+        _XF_IMAGE_PRINT("--num_pair = %d \n", num_pair);
+    }
+
+    while (!strm_order_e.empty()) strm_order_e.read();
+
+    if (hls_pos & (7)) {
+        strm_order_byte.write(tail_bits);
+    }
+
+    // 4. Close the order bit stream.
+    _XF_IMAGE_PRINT("storage_ix=%d \n", (int)hls_pos);
+
+    strmToString(((hls_pos + 7) >> 3), strm_order_byte, encoded_coeff_order);
+    encoded_coeff_order.resize((hls_pos + 7) >> 3);
+    return encoded_coeff_order;
+}
+
+inline void XAcc_EncodeFloatParam(float val, float precision, size_t* storage_ix, uint8_t* storage) {
+    WriteBits(1, val >= 0 ? 1 : 0, storage_ix, storage);
+    const int absval_quant = static_cast<int>(std::abs(val) * precision + 0.5f);
+    PIK_ASSERT(absval_quant < (1 << 16));
+    WriteBits(16, absval_quant, storage_ix, storage);
+}
+
+void XAcc_EncodeNoise(const NoiseParams& noise_params, uint8_t storage[hls_kMaxNoiseSize], uint8_t& storage_size) {
+#pragma HLS INLINE
+    const size_t kMaxNoiseSize = 16;
+    const float kNoisePrecision = 1000.0f;
+    // std::string output(kMaxNoiseSize, 0);
+    // uint8_t output[kMaxNoiseSize];
+    size_t storage_ix = 0;
+    // uint8_t* storage = reinterpret_cast<uint8_t*>(&output[0]);
+    // uint8_t storage[kMaxNoiseSize];
+    storage[0] = 0;
+    const bool have_noise = (noise_params.alpha != 0.0f || noise_params.gamma != 0.0f || noise_params.beta != 0.0f);
+    WriteBits(1, have_noise, &storage_ix, storage);
+    if (have_noise) {
+        XAcc_EncodeFloatParam(noise_params.alpha, kNoisePrecision, &storage_ix, storage);
+        XAcc_EncodeFloatParam(noise_params.gamma, kNoisePrecision, &storage_ix, storage);
+        XAcc_EncodeFloatParam(noise_params.beta, kNoisePrecision, &storage_ix, storage);
+    }
+    size_t jump_bits = ((storage_ix + 7) & ~7) - storage_ix;
+    WriteBits(jump_bits, 0, &storage_ix, storage);
+    PIK_ASSERT(storage_ix % 8 == 0);
+    storage_size = storage_ix >> 3;
+    // output.resize(output_size);
+    // return output;
+}
+
+void arrayCharToString(const int num_in, ap_uint<32>* array_in, std::string& output) {
+    for (int j = 0; j < num_in; ++j) {
+        output[j] = (ap_uint<8>)array_in[j];
+    }
+    output.resize(num_in);
+}
+
+void arrayShortToString(const int num_in, ap_uint<32>* array_in, std::string& output) {
+    std::cout << "shortTnt:" << std::endl;
+    for (int i = 0; i < num_in; i += 2) {
+        ap_uint<32> shortInt = array_in[i >> 1];
+        output[i] = shortInt(7, 0);
+        output[i + 1] = shortInt(15, 8);
+        std::cout << std::dec << "," << shortInt;
+    }
+    std::cout << std::endl;
+    output.resize(num_in);
+}
+
+Status PixelsToPikGroup(CompressParams cparams,
+                        const FrameHeader& frame_header,
+                        GroupHeader header,
+                        const AcStrategyImage& ac_strategy,
+                        const Quantizer* full_quantizer,
+                        const ColorCorrelationMap& full_cmap,
+                        const CodecInOut* io,
+                        const Image3F& opsin_in,
+                        const NoiseParams& noise_params,
+                        size_t& pos,
+                        const FrameEncCache& frame_enc_cache,
+                        PikInfo* aux_out,
+                        EncCache* cache,
+                        MultipassHandler* multipass_handler) {
+    const Rect& rect = multipass_handler->GroupRect();
+    const Rect& padded_rect = multipass_handler->PaddedGroupRect();
+    const Rect area_to_encode = Rect(0, 0, padded_rect.xsize(), padded_rect.ysize());
+
+    if (frame_header.has_alpha) {
+        PROFILER_ZONE("enc alpha");
+        PIK_RETURN_IF_ERROR(EncodeAlpha(cparams, io->alpha(), rect, io->AlphaBits(), &header.alpha));
+    }
+    header.nonserialized_have_alpha = frame_header.has_alpha;
+
+    uint8_t compressed = 0;
+
+    size_t extension_bits, total_bits;
+    PIK_RETURN_IF_ERROR(CanEncode(header, &extension_bits, &total_bits));
+    PIK_RETURN_IF_ERROR(WriteGroupHeader(header, extension_bits, &pos, (&compressed)));
+    WriteZeroesToByteBoundary(&pos, (&compressed));
+    if (aux_out != nullptr) {
+        aux_out->layers[kLayerHeader].total_size += DivCeil(total_bits, kBitsPerByte);
+    }
+
+    if (cparams.lossless_mode) {
+        // Done; we'll encode the entire image in one shot later.
+        return true;
+    }
+
+    Rect group_in_color_tiles(multipass_handler->BlockGroupRect().x0() / kColorTileDimInBlocks,
+                              multipass_handler->BlockGroupRect().y0() / kColorTileDimInBlocks,
+                              DivCeil(multipass_handler->BlockGroupRect().xsize(), kColorTileDimInBlocks),
+                              DivCeil(multipass_handler->BlockGroupRect().ysize(), kColorTileDimInBlocks));
+
+    ColorCorrelationMap cmap = full_cmap.Copy(group_in_color_tiles);
+    cache->saliency_threshold = cparams.saliency_threshold;
+    cache->saliency_debug_skip_nonsalient = cparams.saliency_debug_skip_nonsalient;
+
+    InitializeEncCache(frame_header, header, frame_enc_cache, multipass_handler->PaddedGroupRect(), cache);
+
+    Quantizer quantizer = full_quantizer->Copy(multipass_handler->BlockGroupRect());
+
+    ComputeCoefficients(quantizer, full_cmap, group_in_color_tiles, frame_enc_cache, cache, aux_out);
+
+    printf("area_to_encode(%d,%d)\n", area_to_encode.x0(), area_to_encode.y0());
+
+    return true;
+}
+
+// Max observed: 1.1M on RGB noise with d0.1.
+// 512*512*4*2 = 2M should be enough for 16-bit RGBA images.
+using GroupSizeCoder = SizeCoderT<0x150F0E0C>;
+
+} // namespace
+
+Status PixelsToPikPass(CompressParams cparams,
+                       const FrameParams& frame_params,
+                       const CodecInOut* io,
+                       ThreadPool* pool,
+                       PaddedBytes* compressed,
+                       size_t& pos,
+                       PikInfo* aux_out,
+                       MultipassManager* multipass_manager) {
+    FrameHeader frame_header;
+    frame_header.num_passes = multipass_manager->GetNumPasses();
+    frame_header.downsampling_factor_to_passes = multipass_manager->GetDownsamplingToNumPasses();
+    frame_header.have_adaptive_reconstruction = false;
+    if (cparams.lossless_mode) {
+        frame_header.encoding = ImageEncoding::kLossless;
+        frame_header.lossless_16_bits = io->original_bits_per_sample() > 8;
+        frame_header.lossless_grayscale = io->IsGray();
+    }
+
+    frame_header.frame = frame_params.frame_info;
+    frame_header.has_alpha = io->HasAlpha();
+
+    if (frame_header.encoding == ImageEncoding::kPasses) {
+        frame_header.flags = FrameFlagsFromParams(cparams, io);
+        frame_header.predict_hf = cparams.predict_hf;
+        frame_header.predict_lf = cparams.predict_lf;
+        frame_header.gaborish = GaborishStrength(cparams.gaborish);
+
+        if (ApplyOverride(cparams.adaptive_reconstruction,
+                          cparams.butteraugli_distance >= kMinButteraugliForAdaptiveReconstruction)) {
+            frame_header.have_adaptive_reconstruction = true;
+            frame_header.epf_params.use_sharpened =
+                ApplyOverride(cparams.epf_use_sharpened, frame_header.epf_params.use_sharpened);
+            if (cparams.epf_sigma > 0) {
+                frame_header.epf_params.enable_adaptive = false;
+                frame_header.epf_params.sigma = cparams.epf_sigma;
+            }
+        }
+    }
+
+    multipass_manager->StartPass(frame_header);
+
+    // TODO(veluca): delay writing the header until we know the total pass size.
+    size_t extension_bits, total_bits;
+    PIK_RETURN_IF_ERROR(CanEncode(frame_header, &extension_bits, &total_bits));
+    compressed->resize(DivCeil(pos + total_bits, kBitsPerByte));
+    PIK_RETURN_IF_ERROR(WritePassHeader(frame_header, extension_bits, &pos, compressed->data()));
+    WriteZeroesToByteBoundary(&pos, compressed->data());
+    if (aux_out != nullptr) {
+        aux_out->layers[kLayerHeader].total_size += DivCeil(total_bits, kBitsPerByte);
+    }
+
+    const size_t xsize_groups = DivCeil(io->xsize(), kGroupDim);
+    const size_t ysize_groups = DivCeil(io->ysize(), kGroupDim);
+    const size_t num_groups = xsize_groups * ysize_groups;
+
+    std::vector<MultipassHandler*> handlers(num_groups);
+    for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+        const size_t gx = group_index % xsize_groups;
+        const size_t gy = group_index / xsize_groups;
+        const Rect rect(gx * kGroupDim, gy * kGroupDim, kGroupDim, kGroupDim, io->xsize(), io->ysize());
+        handlers[group_index] = multipass_manager->GetGroupHandler(group_index, rect);
+    }
+
+    GroupHeader template_group_header;
+    ColorCorrelationMap full_cmap(io->xsize(), io->ysize());
+    std::shared_ptr<Quantizer> full_quantizer;
+    AcStrategyImage full_ac_strategy;
+    Image3F opsin_orig, opsin;
+    NoiseParams noise_params;
+    BlockDictionary block_dictionary;
+    FrameEncCache frame_enc_cache;
+
+    if (frame_header.encoding == ImageEncoding::kPasses) {
+        opsin_orig = OpsinDynamicsImage(io, Rect(io->color()));
+        if (aux_out != nullptr) {
+            PIK_RETURN_IF_ERROR(aux_out->InspectImage3F("pik_pass:OpsinDynamicsImage", opsin_orig));
+        }
+
+        constexpr size_t N = kBlockDim;
+        PROFILER_ZONE("enc OpsinToPik uninstrumented");
+        const size_t xsize = opsin_orig.xsize();
+        const size_t ysize = opsin_orig.ysize();
+        if (xsize == 0 || ysize == 0) return PIK_FAILURE("Empty image");
+        opsin = PadImageToMultiple(opsin_orig, N);
+
+        if (frame_header.flags & FrameHeader::kNoise) {
+            PROFILER_ZONE("enc GetNoiseParam");
+            // Don't start at zero amplitude since adding noise is expensive -- it
+            // significantly slows down decoding, and this is unlikely to completely
+            // go away even with advanced optimizations. After the
+            // kNoiseModelingRampUpDistanceRange we have reached the full level,
+            // i.e. noise is no longer represented by the compressed image, so we
+            // can add full noise by the noise modeling itself.
+            static const double kNoiseModelingRampUpDistanceRange = 0.6;
+            static const double kNoiseLevelAtStartOfRampUp = 0.25;
+            // TODO(user) test and properly select quality_coef with smooth
+            // filter
+            float quality_coef = 1.0f;
+            const double rampup =
+                (cparams.butteraugli_distance - kMinButteraugliForNoise) / kNoiseModelingRampUpDistanceRange;
+            if (rampup < 1.0) {
+                quality_coef = kNoiseLevelAtStartOfRampUp + (1.0 - kNoiseLevelAtStartOfRampUp) * rampup;
+            }
+            GetNoiseParameter(opsin, &noise_params, quality_coef);
+        }
+        if (frame_header.gaborish != GaborishStrength::kOff) {
+            opsin = GaborishInverse(opsin, 0.92718927264540152);
+        }
+
+        multipass_manager->DecorrelateOpsin(&opsin);
+
+        PIK_RETURN_IF_ERROR(PikPassHeuristics(
+            cparams, frame_header, opsin_orig, opsin, &frame_enc_cache.matrices, &frame_enc_cache.dequant_control_field,
+            frame_enc_cache.dequant_map, multipass_manager, &template_group_header, &full_cmap, &full_quantizer,
+            &full_ac_strategy, &frame_enc_cache.ar_sigma_lut_ids, &block_dictionary, aux_out));
+
+        // Initialize frame_enc_cache and encode DC.
+        InitializeFrameEncCache(frame_header, opsin, full_ac_strategy, *full_quantizer, full_cmap, block_dictionary,
+                                &frame_enc_cache, aux_out);
+        frame_enc_cache.use_new_dc = cparams.use_new_dc;
+
+        PikImageSizeInfo* matrices_info = aux_out != nullptr ? &aux_out->layers[kLayerDequantTables] : nullptr;
+
+        std::string dequant_code = frame_enc_cache.matrices.Encode(matrices_info);
+        compressed->append(dequant_code);
+        pos += dequant_code.size() * 8;
+        std::cout << "dequant_code_pos=" << pos << std::endl;
+
+        PaddedBytes pass_global_code;
+        size_t byte_pos = 0;
+
+        // Encode quantizer DC and global scale.
+        PikImageSizeInfo* quant_info = aux_out ? &aux_out->layers[kLayerQuant] : nullptr;
+        std::string quant_code = full_quantizer->Encode(quant_info);
+
+        // Encode cmap. TODO(veluca): consider encoding DC part of cmap only here,
+        // and AC in (super)groups.
+        PikImageSizeInfo* cmap_info = aux_out ? &aux_out->layers[kLayerCmap] : nullptr;
+        std::string cmap_code =
+            EncodeColorMap(full_cmap.ytob_map, Rect(full_cmap.ytob_map), full_cmap.ytob_dc, cmap_info) +
+            EncodeColorMap(full_cmap.ytox_map, Rect(full_cmap.ytox_map), full_cmap.ytox_dc, cmap_info);
+
+        pass_global_code.resize(quant_code.size() + cmap_code.size());
+        Append(quant_code, &pass_global_code, &byte_pos);
+        Append(cmap_code, &pass_global_code, &byte_pos);
+
+        PikImageSizeInfo* dc_info = aux_out != nullptr ? &aux_out->layers[kLayerDC] : nullptr;
+        PikImageSizeInfo* cfields_info = aux_out != nullptr ? &aux_out->layers[kLayerControlFields] : nullptr;
+
+        pass_global_code.append(EncodeDCGroups(*full_quantizer, frame_enc_cache, full_ac_strategy, multipass_manager,
+                                               dc_info, cfields_info));
+        compressed->append(pass_global_code);
+        pos += pass_global_code.size() * 8;
+        std::cout << "pass_global_code_pos=" << pos << std::endl;
+
+        PikImageSizeInfo* dictionary_info = aux_out ? &aux_out->layers[kLayerDictionary] : nullptr;
+        std::string dictionary_code = block_dictionary.Encode(dictionary_info);
+        compressed->append(dictionary_code);
+        pos += dictionary_code.size() * 8;
+        std::cout << "dictionary_code_pos=" << pos << std::endl;
+
+        std::string quant_cf_code = EncodeDequantControlField(frame_enc_cache.dequant_control_field, matrices_info);
+        quant_cf_code +=
+            EncodeDequantControlFieldMap(full_quantizer->RawQuantField(), frame_enc_cache.dequant_control_field,
+                                         frame_enc_cache.dequant_map, matrices_info);
+        compressed->append(quant_cf_code);
+        pos += quant_cf_code.size() * 8;
+        std::cout << "quant_cf_code_pos=" << pos << std::endl;
+    }
+
+    // Compress groups: one per combination of group and pass. Outer loop lists
+    // passes, inner lists groups. Group headers are only encoded in the groups
+    // of the first pass.
+    std::vector<std::vector<PaddedBytes> > group_codes(num_groups);
+    std::atomic<int> num_errors{0};
+    for (int group_index = 0; group_index < num_groups; ++group_index) {
+        std::vector<PaddedBytes>* group_code = &group_codes[group_index];
+        size_t group_pos = 0;
+        group_code->resize(multipass_manager->GetNumPasses());
+        /*
+        if (!PixelsToPikGroup(cparams, frame_header, template_group_header,
+                              full_ac_strategy, full_quantizer.get(), full_cmap, io,
+                              opsin, noise_params, group_code, group_pos,
+                              frame_enc_cache, aux_out, handlers[group_index])) {
+          num_errors.fetch_add(1, std::memory_order_relaxed);
+
+          continue;
+        }
+        */
+    };
+
+    for (size_t i = 0; i < num_groups; i++) {
+        PIK_ASSERT(group_codes[i].size() == multipass_manager->GetNumPasses());
+    }
+
+    PIK_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+    // Build TOCs.
+
+    for (size_t i = 0; i < multipass_manager->GetNumPasses(); i++) {
+        size_t group_toc_pos = 0;
+        PaddedBytes group_toc(PaddedBytes(GroupSizeCoder::MaxSize(num_groups)));
+        uint8_t* group_toc_storage = group_toc.data();
+        size_t total_groups_size = 0;
+        for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+            size_t group_size = group_codes[group_index][i].size();
+            GroupSizeCoder::Encode(group_size, &group_toc_pos, group_toc_storage);
+            total_groups_size += group_size;
+        }
+        WriteZeroesToByteBoundary(&group_toc_pos, group_toc_storage);
+        group_toc.resize(group_toc_pos / kBitsPerByte);
+
+        // Push output.
+        PIK_ASSERT(pos % kBitsPerByte == 0);
+        compressed->reserve(DivCeil(pos, kBitsPerByte) + group_toc.size() + total_groups_size);
+        compressed->append(group_toc);
+        pos += group_toc.size() * kBitsPerByte;
+        std::cout << "group_toc_pos=" << pos << std::endl;
+
+        // Only do lossless encoding in the first pass, if there is more than one.
+        if (frame_header.encoding == ImageEncoding::kLossless && i == 0) {
+            // Encode entire image at once to avoid per-group overhead. Must come
+            // BEFORE the encoded groups because the decoder assumes that the last
+            // group coincides with the end of the bitstream.
+            const Rect rect(io->color());
+
+            Image3F previous_pass;
+            PIK_RETURN_IF_ERROR(multipass_manager->GetPreviousPass(io->dec_c_original, pool, &previous_pass));
+            PIK_RETURN_IF_ERROR(
+                PixelsToPikLosslessFrame(cparams, frame_header, io, rect, previous_pass, compressed, pos, aux_out));
+        }
+
+        for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+            const PaddedBytes& group_code = group_codes[group_index][i];
+            compressed->append(group_code);
+            pos += group_code.size() * kBitsPerByte;
+            std::cout << "group_code_pos=" << pos << std::endl;
+        }
+    }
+
+    io->enc_size = compressed->size();
+    return true;
+}
+
+void Exp(ImageF* out) {
+    for (int y = 0; y < out->ysize(); ++y) {
+        float* const PIK_RESTRICT row_out = out->Row(y);
+        for (int x = 0; x < out->xsize(); ++x) {
+            row_out[x] = exp(row_out[x]);
+        }
+    }
+}
+
+using DCGroupSizeCoder = SizeCoderT<0x150F0E0C>;
+using GroupSizeCoder = SizeCoderT<0x150F0E0C>;
+
+Status hls_PixelsToPikPass(CompressParams cparams,
+                           std::string xclbinPath,
+                           const FrameParams& frame_params,
+                           const CodecInOut* io,
+                           ThreadPool* pool,
+                           PaddedBytes* compressed,
+                           size_t& pos,
+                           PikInfo* aux_out,
+                           MultipassManager* multipass_manager) {
+    FrameHeader frame_header;
+    frame_header.num_passes = multipass_manager->GetNumPasses();
+    frame_header.downsampling_factor_to_passes = multipass_manager->GetDownsamplingToNumPasses();
+    frame_header.have_adaptive_reconstruction = false;
+    if (cparams.lossless_mode) {
+        frame_header.encoding = ImageEncoding::kLossless;
+        frame_header.lossless_16_bits = io->original_bits_per_sample() > 8;
+        frame_header.lossless_grayscale = io->IsGray();
+    }
+
+    frame_header.frame = frame_params.frame_info;
+    frame_header.has_alpha = io->HasAlpha();
+
+    if (frame_header.encoding == ImageEncoding::kPasses) {
+        frame_header.flags = FrameFlagsFromParams(cparams, io);
+        frame_header.predict_hf = cparams.predict_hf;
+        frame_header.predict_lf = cparams.predict_lf;
+        frame_header.gaborish = GaborishStrength(cparams.gaborish);
+
+        if (ApplyOverride(cparams.adaptive_reconstruction,
+                          cparams.butteraugli_distance >= kMinButteraugliForAdaptiveReconstruction)) {
+            frame_header.have_adaptive_reconstruction = true;
+            frame_header.epf_params.use_sharpened =
+                ApplyOverride(cparams.epf_use_sharpened, frame_header.epf_params.use_sharpened);
+            if (cparams.epf_sigma > 0) {
+                frame_header.epf_params.enable_adaptive = false;
+                frame_header.epf_params.sigma = cparams.epf_sigma;
+            }
+        }
+    }
+
+    multipass_manager->StartPass(frame_header);
+
+    // TODO(veluca): delay writing the header until we know the total pass size.
+    size_t extension_bits, total_bits;
+    PIK_RETURN_IF_ERROR(CanEncode(frame_header, &extension_bits, &total_bits));
+    compressed->resize(DivCeil(pos + total_bits, kBitsPerByte));
+    PIK_RETURN_IF_ERROR(WritePassHeader(frame_header, extension_bits, &pos, compressed->data()));
+    WriteZeroesToByteBoundary(&pos, compressed->data());
+    if (aux_out != nullptr) {
+        aux_out->layers[kLayerHeader].total_size += DivCeil(total_bits, kBitsPerByte);
+    }
+
+    const size_t xsize_groups = DivCeil(io->xsize(), kGroupDim);
+    const size_t ysize_groups = DivCeil(io->ysize(), kGroupDim);
+    const size_t num_groups = xsize_groups * ysize_groups;
+
+    std::vector<MultipassHandler*> handlers(num_groups);
+    for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+        const size_t gx = group_index % xsize_groups;
+        const size_t gy = group_index / xsize_groups;
+        const Rect rect(gx * kGroupDim, gy * kGroupDim, kGroupDim, kGroupDim, io->xsize(), io->ysize());
+        handlers[group_index] = multipass_manager->GetGroupHandler(group_index, rect);
+    }
+
+    GroupHeader template_group_header;
+    ColorCorrelationMap full_cmap(io->xsize(), io->ysize());
+    std::shared_ptr<Quantizer> full_quantizer;
+    AcStrategyImage full_ac_strategy;
+    //  Image3F opsin_orig, opsin;
+    NoiseParams noise_params;
+    BlockDictionary block_dictionary;
+    FrameEncCache frame_enc_cache;
+
+    constexpr size_t N = kBlockDim;
+    const size_t xsizet = Rect(io->color()).xsize();
+    const size_t ysizet = Rect(io->color()).ysize();
+    const size_t xsize_blocks = DivCeil(xsizet, N);
+    const size_t ysize_blocks = DivCeil(ysizet, N);
+    const size_t ysize_padded = ysize_blocks * kBlockDim;
+    const size_t xsize_padded = xsize_blocks * kBlockDim;
+    Image3F opsin_orig(xsizet, ysizet);
+    Image3F opsin(xsize_padded, ysize_padded);
+
+    //============================kernel1 initialize==========================
+
+    // initial kernel1 buffer and config
+    const Image3F* linear_srgb = &io->color();
+    Image3F copy;
+    Rect linear_rect = Rect(io->color());
+    if (!io->IsLinearSRGB()) {
+        const ColorEncoding& c = io->Context()->c_linear_srgb[io->IsGray()];
+        PIK_CHECK(io->CopyTo(Rect(io->color()), c, &copy));
+        linear_srgb = &copy;
+        // We've cut out the rectangle, start at x0=y0=0 in copy.
+        linear_rect = Rect(copy);
+    }
+
+    size_t target_size = TargetSize(cparams, Rect(opsin_orig));
+    size_t opsin_target_size = target_size;
+    if (cparams.target_size > 0 || cparams.target_bitrate > 0.0) {
+        cparams.target_size = opsin_target_size;
+    } else if (cparams.butteraugli_distance < 0) {
+        return PIK_FAILURE("Expected non-negative distance");
+    }
+
+    ap_uint<32>* axi_out = (ap_uint<32>*)malloc(8192 * 8192 * 3 * sizeof(ap_uint<32>));
+    ap_uint<32>* axi_cmap = (ap_uint<32>*)malloc((128 * 128 * 2 + 2) * sizeof(ap_uint<32>));
+    ap_uint<32>* axi_qf = (ap_uint<32>*)malloc((1024 * 1024 + 2) * sizeof(ap_uint<32>));
+    hls::stream<float> rgbStrm[3];
+    hls::stream<float> xybStrm[3];
+    hls::stream<float> xybGabStrm[3];
+    hls::stream<float> yOrigStrm("yorig");
+    hls::stream<DT> ostrm[3];
+    hls::stream<bool> e_ostrm[3];
+
+    const int nums = xsizet * ysizet;
+    int len[3] = {nums * DT_SZ, nums * DT_SZ, nums * DT_SZ};
+    int offset[3] = {0, nums * DT_SZ, 2 * nums * DT_SZ};
+    float* buf = (float*)malloc(BUF_DEPTH * sizeof(float));
+    memset(buf, 0, sizeof(float) * BUF_DEPTH);
+
+    DT* const d0ptr = (DT*)(buf);
+    DT* ptr = d0ptr;
+    for (int y = 0; y < ysizet; ++y) {
+        const float* PIK_RESTRICT row_in0 = linear_rect.ConstPlaneRow(*linear_srgb, 0, y);
+        memcpy(ptr, row_in0, xsizet * sizeof(float));
+        ptr = ptr + xsizet;
+    }
+
+    DT* const d1ptr = (DT*)(buf + nums);
+    ptr = d1ptr;
+    for (int y = 0; y < ysizet; ++y) {
+        const float* PIK_RESTRICT row_in1 = linear_rect.ConstPlaneRow(*linear_srgb, 1, y);
+        memcpy(ptr, row_in1, xsizet * sizeof(float));
+        ptr = ptr + xsizet;
+    }
+
+    DT* const d2ptr = (DT*)(buf + 2 * nums);
+    ptr = d2ptr;
+    for (int y = 0; y < ysizet; ++y) {
+        const float* PIK_RESTRICT row_in2 = linear_rect.ConstPlaneRow(*linear_srgb, 2, y);
+        memcpy(ptr, row_in2, xsizet * sizeof(float));
+        ptr = ptr + xsizet;
+    }
+
+    static const float kAcQuant = 0.97136686727219523;
+    const float intensity_multiplier3 = std::cbrt(cparams.GetIntensityMultiplier());
+    const float quant_ac = intensity_multiplier3 * kAcQuant / cparams.butteraugli_distance;
+
+    ap_uint<32> k1_config[32];
+    k1_config[0] = len[0];
+    k1_config[1] = len[1];
+    k1_config[2] = len[2];
+    k1_config[3] = offset[0];
+    k1_config[4] = offset[1];
+    k1_config[5] = offset[2];
+    k1_config[6] = xsizet;
+    k1_config[7] = ysizet;
+    k1_config[8] = fToBits<float, int32_t>(quant_ac);
+    k1_config[9] = nums;
+    k1_config[10] = 3 * nums;
+
+#ifdef HLS_TEST
+    kernel1Top(k1_config, (ap_uint<AXI_WIDTH>*)buf, axi_out, axi_cmap, axi_qf);
+#endif
+
+    //=============================kernel1 end===========================
+
+    //============================kernel2 initialize==========================
+
+    /*
+    uint32_t xsize;
+    uint32_t ysize;
+    uint32_t xblock8;
+    uint32_t yblock8;
+    uint32_t xblock32;
+    uint32_t yblock32;
+    uint32_t xgroup;
+    uint32_t ygroup;
+
+    int src_num;
+    int in_quant_field_num;
+    int cmap_num0;
+    int cmap_num1;
+    int ac_num;
+    int dc_num;
+    int acs_num;
+    int out_quant_field_num;
+
+    bool kChooseAcStrategy;
+    float discretization_factor;
+    float kMulInhomogeneity16x16;
+    float kMulInhomogeneity32x32;
+    float butteraugli_target;
+    float intensity_multiplier;
+    float quant_dc;
+    */
+
+    // initial kernel2 k2_config
+    ap_uint<32>* ac = (ap_uint<32>*)malloc(3 * 8192 * 8192 * sizeof(ap_uint<32>));
+    ap_uint<32>* dc = (ap_uint<32>*)malloc(3 * 1024 * 1024 * sizeof(ap_uint<32>));
+    ap_uint<32>* quant_field_out = (ap_uint<32>*)malloc((1024 * 1024 + 2) * sizeof(ap_uint<32>));
+    ap_uint<32>* ac_strategy = (ap_uint<32>*)malloc(1024 * 1024 * sizeof(ap_uint<32>));
+    ap_uint<32>* block = (ap_uint<32>*)malloc(1024 * 1024 * sizeof(ap_uint<32>));
+    ap_uint<32>* order = (ap_uint<32>*)malloc(64 * 16 * 16 * 3 * sizeof(ap_uint<32>));
+
+    float butteraugli_target_dc =
+        std::min<float>(cparams.butteraugli_distance, std::pow(cparams.butteraugli_distance, 0.57840232344431763));
+    float quant_dc = intensity_multiplier3 * 0.74852919562896747 / butteraugli_target_dc;
+
+    ap_uint<32> k2_config[32];
+
+    k2_config[0] = xsize_padded;
+    k2_config[1] = ysize_padded;
+    k2_config[2] = xsize_padded / 8;
+    k2_config[3] = ysize_padded / 8;
+    k2_config[4] = (k2_config[0] + 31) / 32;
+    k2_config[5] = (k2_config[1] + 31) / 32;
+    k2_config[6] = (k2_config[0] + 511) / 512;
+    k2_config[7] = (k2_config[1] + 511) / 512;
+
+    k2_config[8] = k2_config[4] * k2_config[5] * 3 * 32 * 32;
+    k2_config[9] = k2_config[4] * k2_config[5] * 4 * 4 + 2;
+    k2_config[10] = ((k2_config[0] + 63) / 64) * ((k2_config[1] + 63) / 64) * 2 + 2;
+    k2_config[11] = ((k2_config[0] + 63) / 64) * ((k2_config[1] + 63) / 64);
+    k2_config[12] = xsize_padded * ysize_padded;
+    k2_config[13] = k2_config[2] * k2_config[3];
+    k2_config[14] = k2_config[2] * k2_config[3];
+    k2_config[15] = k2_config[2] * k2_config[3] + 2;
+
+    k2_config[16] = true;
+    k2_config[17] = fToBits<float, uint32_t>(100 * (6.9654004856811754) / cparams.butteraugli_distance);
+    k2_config[18] = fToBits<float, uint32_t>((-47.780 * (3.9429727851421288)) / cparams.butteraugli_distance);
+    k2_config[19] = fToBits<float, uint32_t>((-47.780 * (-4.270639713545533)) / cparams.butteraugli_distance);
+    k2_config[20] = fToBits<float, uint32_t>(cparams.butteraugli_distance);
+    k2_config[21] = fToBits<float, uint32_t>(intensity_multiplier3);
+    k2_config[22] = fToBits<float, uint32_t>(quant_dc);
+    k2_config[23] = k2_config[6] * k2_config[7] * 3 * 64;
+    k2_config[24] = 3 * k2_config[13];
+
+    k2_config[25] = k2_config[4] * k2_config[5] * 32 * 32 * 4;
+    k2_config[26] = k2_config[25];
+    k2_config[27] = k2_config[25];
+    k2_config[28] = 0;
+    k2_config[29] = k2_config[25];
+    k2_config[30] = k2_config[25] * 2;
+
+    int hls_ac_groups = num_groups;
+
+#ifdef HLS_TEST
+    kernel2Top(k2_config, (ap_uint<64>*)axi_out, axi_qf, axi_cmap, ac, dc, quant_field_out, ac_strategy, block, order);
+
+    std::cout << "k2 order:" << std::endl;
+    for (int i = 0; i < hls_ac_groups; i++) {
+        for (int j = 0; j < 64 * 3; j++) {
+            std::cout << (int)order[i * 3 * 64 + j] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 dc:" << std::endl;
+    for (int c = 0; c < 3; c++) {
+        for (int i = 0; i < k2_config[3]; i++) {
+            for (int j = 0; j < k2_config[2]; j++) {
+                std::cout << (int)dc[c * MAX_NUM_BLOCK88 + i * k2_config[2] + j] << ",";
+            }
+            std::cout << std::endl;
+        }
+    }
+
+    std::cout << "k2 acs:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << (int)ac_strategy[i * k2_config[2] + j] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 block:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << (int)block[i * k2_config[2] + j] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+    std::cout << "k2 quant:" << std::endl;
+    for (int i = 0; i < k2_config[3]; i++) {
+        for (int j = 0; j < k2_config[2]; j++) {
+            std::cout << (int)quant_field_out[i * k2_config[2] + j] << ",";
+        }
+        std::cout << std::endl;
+    }
+
+#else
+    std::cout << "k2 Config:" << std::endl;
+    std::cout << "xsize:" << k2_config[0] << std::endl;
+    std::cout << "ysize:" << k2_config[1] << std::endl;
+    std::cout << "xblock8:" << k2_config[2] << std::endl;
+    std::cout << "yblock8:" << k2_config[3] << std::endl;
+    std::cout << "xblock32:" << k2_config[4] << std::endl;
+    std::cout << "yblock32:" << k2_config[5] << std::endl;
+    std::cout << "xgroup:" << k2_config[6] << std::endl;
+    std::cout << "ygroup:" << k2_config[7] << std::endl;
+    std::cout << "src_num:" << k2_config[8] << std::endl;
+    std::cout << "in_quant_field_num:" << k2_config[9] << std::endl;
+    std::cout << "cmap_num0:" << k2_config[10] << std::endl;
+    std::cout << "cmap_num1:" << k2_config[11] << std::endl;
+    std::cout << "ac_num:" << k2_config[12] << std::endl;
+    std::cout << "dc_num:" << k2_config[13] << std::endl;
+    std::cout << "acs_num:" << k2_config[14] << std::endl;
+    std::cout << "out_quant_field_num:" << k2_config[15] << std::endl;
+    std::cout << "choose_ac_astrategy:" << k2_config[16] << std::endl;
+    std::cout << "discretization_factor:" << bitsToF<uint32_t, float>(k2_config[17]) << std::endl;
+    std::cout << "kmul16:" << bitsToF<uint32_t, float>(k2_config[18]) << std::endl;
+    std::cout << "kmul32:" << bitsToF<uint32_t, float>(k2_config[19]) << std::endl;
+    std::cout << "butteraugli:" << bitsToF<uint32_t, float>(k2_config[20]) << std::endl;
+    std::cout << "intensity_mul:" << bitsToF<uint32_t, float>(k2_config[21]) << std::endl;
+    std::cout << "quant_dc:" << bitsToF<uint32_t, float>(k2_config[22]) << std::endl;
+#endif
+
+    //=============================kernel2 end============================
+
+    //============================kernel3 initialize======================
+
+    // initilaize kernel3 config
+    ap_uint<32> k3_config[32];
+
+    k3_config[0] = xsize_padded;
+    k3_config[1] = ysize_padded;
+    k3_config[2] = k3_config[0] / 8;
+    k3_config[3] = k3_config[1] / 8;
+    k3_config[4] = (k3_config[0] + 31) / 32;
+    k3_config[5] = (k3_config[1] + 31) / 32;
+    k3_config[6] = (k3_config[0] + 63) / 64;
+    k3_config[7] = (k3_config[1] + 63) / 64;
+    k3_config[8] = (k3_config[0] + 511) / 512;
+    k3_config[9] = (k3_config[1] + 511) / 512;
+    k3_config[10] = (k3_config[0] + 2047) / 2048;
+    k3_config[11] = (k3_config[1] + 2047) / 2048;
+    k3_config[12] = k3_config[8] * k3_config[9];
+    k3_config[13] = k3_config[10] * k3_config[11];
+    k3_config[14] = k3_config[2] * k3_config[3];
+    k3_config[15] = k3_config[0] * k3_config[1] * 3;
+
+    const size_t max_dc_histo_size = 1024 * (MAX_NUM_COLOR + 4);
+    const size_t max_dc_size = 4 * (2 * hls_kTotalSize) + 4096; // to be test
+    const size_t max_ac_size = 4 * (4 * hls_kTotalSize) + 4096;
+    const size_t max_ac_histo_size = hls_kNumStaticContexts * 1024;
+
+    ap_uint<32>* dc_histo_code_out = (ap_uint<32>*)malloc(2 * k3_config[13] * max_dc_histo_size * sizeof(ap_uint<32>));
+    ap_uint<32>* dc_code_out = (ap_uint<32>*)malloc(2 * k3_config[13] * max_dc_size * sizeof(ap_uint<32>));
+    ap_uint<32>* ac_histo_code_out = (ap_uint<32>*)malloc(k3_config[12] * max_ac_histo_size * sizeof(ap_uint<32>));
+    ap_uint<32>* ac_code_out = (ap_uint<32>*)malloc(k3_config[12] * max_ac_size * sizeof(ap_uint<32>));
+
+    int len_dc_histo[2 * MAX_DC_GROUP] = {0};
+    int len_dc[2 * MAX_DC_GROUP] = {0};
+    int len_ac_histo[MAX_AC_GROUP] = {0};
+    int len_ac[MAX_AC_GROUP] = {0};
+    ap_uint<32> histo_cfg[4 * MAX_DC_GROUP + 2 * MAX_AC_GROUP] = {0};
+
+    memset(dc_histo_code_out, 0, 2 * k3_config[13] * max_dc_histo_size * sizeof(ap_uint<32>));
+    memset(dc_code_out, 0, 2 * k3_config[13] * max_dc_size * sizeof(ap_uint<32>));
+    memset(ac_histo_code_out, 0, k3_config[12] * max_ac_histo_size * sizeof(ap_uint<32>));
+    memset(ac_code_out, 0, k3_config[12] * max_ac_size * sizeof(ap_uint<32>));
+
+#ifdef HLS_TEST
+
+    kernel3Top(k3_config, ac, dc, quant_field_out, ac_strategy, block, order, histo_cfg, dc_histo_code_out, dc_code_out,
+               ac_histo_code_out, ac_code_out);
+
+    for (int j = 0; j < 20; j++) {
+        std::cout << ", " << (int)dc_histo_code_out[j];
+    }
+    std::cout << std::endl;
+    for (int j = 0; j < 20; j++) {
+        std::cout << ", " << (int)dc_histo_code_out[j + MAX_DC_HISTO_SIZE];
+    }
+    std::cout << std::endl;
+    for (int j = 0; j < 20; j++) {
+        std::cout << ", " << (int)dc_code_out[j];
+    }
+    std::cout << std::endl;
+    for (int j = 0; j < 20; j++) {
+        std::cout << ", " << (int)dc_code_out[j + MAX_DC_SIZE];
+    }
+    std::cout << std::endl;
+
+    for (int j = 0; j < 2 * k3_config[13]; j++) {
+        len_dc_histo[j] = histo_cfg[j];
+        std::cout << "len_dc_h:" << (int)histo_cfg[j] << std::endl;
+    }
+
+    for (int j = 0; j < k3_config[12]; j++) {
+        len_ac_histo[j] = histo_cfg[2 * k3_config[13] + j];
+        std::cout << "len_ac_h:" << (int)histo_cfg[2 * k3_config[13] + j] << std::endl;
+    }
+
+    for (int j = 0; j < 2 * k3_config[13]; j++) {
+        len_dc[j] = histo_cfg[2 * k3_config[13] + k3_config[12] + j];
+        std::cout << "len_dc_c:" << (int)histo_cfg[2 * k3_config[13] + k3_config[12] + j] << std::endl;
+    }
+
+    for (int j = 0; j < k3_config[12]; j++) {
+        len_ac[j] = histo_cfg[4 * k3_config[13] + k3_config[12] + j];
+        std::cout << "len_ac_c:" << (int)histo_cfg[4 * k3_config[13] + k3_config[12] + j] << std::endl;
+    }
+
+    for (int j = 0; j < 20; j++) {
+        std::cout << ", " << (int)ac_histo_code_out[j];
+    }
+    std::cout << std::endl;
+    for (int j = 0; j < 20; j++) {
+        std::cout << ", " << (int)ac_code_out[j];
+    }
+    std::cout << std::endl;
+#endif
+
+//============================kernel3 end=====================================
+
+//==========================host code start===================================
+#ifndef HLS_TEST
+    std::cout << "openCL host start!" << std::endl;
+
+    host_func(xclbinPath, buf, k1_config, k2_config, k3_config, axi_cmap, order, quant_field_out, len_dc_histo, len_dc,
+              dc_histo_code_out, dc_code_out, len_ac_histo, len_ac, ac_histo_code_out, ac_code_out);
+
+    std::cout << "openCL host end!" << std::endl;
+#endif
+//============================host code end===================================
+
+// return kernel1 result to original code
+#ifdef HLS_TEST
+
+    int addr = 0;
+    int x32 = (opsin.xsize() + 32 - 1) / 32;
+    int y32 = (opsin.ysize() + 32 - 1) / 32;
+    unsigned int xyb_int;
+    for (int y = 0; y < y32 * 32; y = y + 32) {
+        for (int x = 0; x < x32 * 32; x = x + 32) {
+            for (int i = 0; i < 32; i++) {
+                if (y + i < opsin.ysize()) {
+                    float* PIK_RESTRICT row_xyb0 = opsin.PlaneRow(0, y + i);
+                    for (int j = 0; j < 32; j++) {
+                        if (x + j < opsin.xsize()) {
+                            xyb_int = axi_out[addr].to_int();
+                            row_xyb0[x + j] = bitsToF<unsigned int, float>(xyb_int);
+                        }
+                        addr++;
+                    }
+                } else {
+                    for (int j = 0; j < 32; j++) {
+                        addr++;
+                    }
+                }
+            }
+
+            for (int i = 0; i < 32; i++) {
+                if (y + i < opsin.ysize()) {
+                    float* PIK_RESTRICT row_xyb1 = opsin.PlaneRow(1, y + i);
+                    for (int j = 0; j < 32; j++) {
+                        if (x + j < opsin.xsize()) {
+                            xyb_int = axi_out[addr].to_int();
+                            row_xyb1[x + j] = bitsToF<unsigned int, float>(xyb_int);
+                        }
+                        addr++;
+                    }
+                } else {
+                    for (int j = 0; j < 32; j++) {
+                        addr++;
+                    }
+                }
+            }
+
+            for (int i = 0; i < 32; i++) {
+                if (y + i < opsin.ysize()) {
+                    float* PIK_RESTRICT row_xyb2 = opsin.PlaneRow(2, y + i);
+                    for (int j = 0; j < 32; j++) {
+                        if (x + j < opsin.xsize()) {
+                            xyb_int = axi_out[addr].to_int();
+                            row_xyb2[x + j] = bitsToF<unsigned int, float>(xyb_int);
+                        }
+                        addr++;
+                    }
+                } else {
+                    for (int j = 0; j < 32; j++) {
+                        addr++;
+                    }
+                }
+            }
+        }
+    }
+
+    static const int kResolution = 8;
+    const size_t out_xsize = (xsizet + kResolution - 1) / kResolution;
+    const size_t out_ysize = (ysizet + kResolution - 1) / kResolution;
+
+    int cnt = 2;
+    ImageF quant_field = ImageF(out_xsize, out_ysize);
+    addr = 2;
+    int x4 = (quant_field.xsize() + 4 - 1) / 4;
+    int y4 = (quant_field.ysize() + 4 - 1) / 4;
+    unsigned int qf_int;
+    for (int y = 0; y < y4 * 4; y = y + 4) {
+        for (int x = 0; x < x4 * 4; x = x + 4) {
+            for (int i = 0; i < 4; i++) {
+                if (y + i < quant_field.ysize()) {
+                    float* PIK_RESTRICT row_qf = quant_field.Row(y + i);
+                    for (int j = 0; j < 4; j++) {
+                        if (x + j < quant_field.xsize()) {
+                            qf_int = axi_qf[addr].to_int();
+                            row_qf[x + j] = bitsToF<unsigned int, float>(qf_int);
+                        }
+                        addr++;
+                    }
+                } else {
+                    for (int j = 0; j < 4; j++) {
+                        addr++;
+                    }
+                }
+            }
+        }
+    }
+
+    qf_int = axi_qf[0].to_int();
+    float avg = bitsToF<unsigned int, float>(qf_int);
+    qf_int = axi_qf[1].to_int();
+    float absavg = bitsToF<unsigned int, float>(qf_int);
+
+#endif
+
+    int cntCmap = 0;
+    unsigned int cmap_int = axi_cmap[0].to_int();
+    full_cmap.ytox_dc = cmap_int;
+    cntCmap++;
+    cmap_int = axi_cmap[1].to_int();
+    full_cmap.ytob_dc = cmap_int;
+    cntCmap++;
+    for (int i = 0; i < full_cmap.ytob_map.ysize(); i++) {
+        int* PIK_RESTRICT tmpb = full_cmap.ytob_map.Row(i);
+        int* PIK_RESTRICT tmpx = full_cmap.ytox_map.Row(i);
+        for (int j = 0; j < full_cmap.ytob_map.xsize(); j++) {
+            cmap_int = axi_cmap[cntCmap].to_int();
+            tmpx[j] = cmap_int;
+            cntCmap++;
+            cmap_int = axi_cmap[cntCmap].to_int();
+            tmpb[j] = cmap_int;
+            cntCmap++;
+        }
+    }
+    //=============================return result==================================
+
+    DequantMatrices* dequant = &frame_enc_cache.matrices;
+    *dequant = DequantMatrices(/*need_inv_matrices=*/true);
+
+#ifdef HLS_TEST
+    multipass_manager->GetAcStrategy(cparams.butteraugli_distance, &quant_field, dequant, opsin,
+                                     /*pool=*/nullptr, &full_ac_strategy, aux_out);
+#endif
+
+    ImageB* dequant_control_field = &frame_enc_cache.dequant_control_field;
+    uint8_t dequant_map[kMaxQuantControlFieldValue][256];
+    for (int i = 0; i < kMaxQuantControlFieldValue; i++) {
+        for (int j = 0; j < 256; j++) {
+            dequant_map[i][j] = frame_enc_cache.dequant_map[i][j];
+        }
+    }
+    // TODO(veluca): investigate if this should be included in
+    // multipass_manager.
+    ImageB* full_ar_sigma_lut_ids = &frame_enc_cache.ar_sigma_lut_ids;
+
+#ifdef HLS_TEST
+    FindBestArControlField(cparams.butteraugli_distance, cparams.GetIntensityMultiplier(), opsin, full_ac_strategy,
+                           quant_field, dequant, frame_header.gaborish,
+                           /*pool=*/nullptr, full_ar_sigma_lut_ids);
+#endif
+
+    *dequant_control_field = ImageB(DivCeil(opsin.xsize(), kTileDim), DivCeil(opsin.ysize(), kTileDim));
+    ZeroFillImage(dequant_control_field);
+    memset(dequant_map, 0, kMaxQuantControlFieldValue * 256);
+
+    template_group_header.nonserialized_have_alpha = frame_header.has_alpha;
+
+#ifdef HLS_TEST
+
+    full_quantizer = multipass_manager->GetQuantizerAvg(
+        avg, absavg, cparams, xsize_blocks, ysize_blocks, opsin_orig, opsin, frame_header, template_group_header,
+        full_cmap, block_dictionary, full_ac_strategy, *full_ar_sigma_lut_ids, dequant, *dequant_control_field,
+        dequant_map, quant_field, aux_out);
+
+    // Initialize frame_enc_cache and encode DC.
+    InitializeFrameEncCache(frame_header, opsin, full_ac_strategy, *full_quantizer, full_cmap, block_dictionary,
+                            &frame_enc_cache, aux_out);
+
+#else
+
+    full_quantizer = std::make_shared<Quantizer>(dequant, xsize_blocks, ysize_blocks);
+    for (size_t by = 0; by < k2_config[3]; ++by) {
+        int32_t* PIK_RESTRICT row_quant = full_quantizer->quant_img_ac_.Row(by);
+        for (size_t bx = 0; bx < k2_config[2]; ++bx) {
+            row_quant[bx] = quant_field_out[by * k2_config[2] + bx];
+        }
+    }
+
+    full_quantizer->global_scale_ = 3065; // quant_field_out[k2_config[15]];
+    full_quantizer->quant_dc_ = 16;       // quant_field_out[k2_config[15] + 1];
+
+#endif
+
+    frame_enc_cache.use_new_dc = cparams.use_new_dc;
+
+    PikImageSizeInfo* matrices_info = aux_out != nullptr ? &aux_out->layers[kLayerDequantTables] : nullptr;
+
+    std::string dequant_code = frame_enc_cache.matrices.Encode(matrices_info);
+    compressed->append(dequant_code);
+    pos += dequant_code.size() * 8;
+    std::cout << "dequant_code:" << std::hex << dequant_code << std::endl;
+    std::cout << "dequant_code_pos=" << pos << std::endl;
+
+    PaddedBytes pass_global_code;
+    size_t byte_pos = 0;
+
+    // Encode quantizer DC and global scale.
+    PikImageSizeInfo* quant_info = aux_out ? &aux_out->layers[kLayerQuant] : nullptr;
+    std::string quant_code = full_quantizer->Encode(quant_info);
+    std::cout << "quant_code:" << std::hex << quant_code << std::endl;
+
+    // Encode cmap. TODO(veluca): consider encoding DC part of cmap only here,
+    // and AC in (super)groups.
+    PikImageSizeInfo* cmap_info = aux_out ? &aux_out->layers[kLayerCmap] : nullptr;
+    std::string cmap_code = EncodeColorMap(full_cmap.ytob_map, Rect(full_cmap.ytob_map), full_cmap.ytob_dc, cmap_info) +
+                            EncodeColorMap(full_cmap.ytox_map, Rect(full_cmap.ytox_map), full_cmap.ytox_dc, cmap_info);
+    std::cout << "cmap_code:" << std::hex << cmap_code << std::endl;
+
+    pass_global_code.resize(quant_code.size() + cmap_code.size());
+    Append(quant_code, &pass_global_code, &byte_pos);
+    Append(cmap_code, &pass_global_code, &byte_pos);
+
+    PikImageSizeInfo* dictionary_info = aux_out ? &aux_out->layers[kLayerDictionary] : nullptr;
+    std::string dictionary_code = block_dictionary.Encode(dictionary_info);
+    std::cout << "dictionary_code:" << std::hex << dictionary_code << std::endl;
+
+    std::string quant_cf_code = EncodeDequantControlField(frame_enc_cache.dequant_control_field, matrices_info);
+
+    quant_cf_code += EncodeDequantControlFieldMap(full_quantizer->RawQuantField(),
+                                                  frame_enc_cache.dequant_control_field, dequant_map, matrices_info);
+    std::cout << "quant_cf_code:" << std::hex << quant_cf_code << std::endl;
+
+    // Compress groups: one per combination of group and pass. Outer loop lists
+    // passes, inner lists groups. Group headers are only encoded in the groups
+    // of the first pass.
+    std::atomic<int> num_errors{0};
+
+    std::vector<PaddedBytes> group_codes(num_groups);
+    std::vector<Image3S>* group_ac;
+    std::vector<AcStrategyImage>* group_ac_strategy;
+    std::vector<EncCache> ac_cache(num_groups);
+
+#ifdef HLS_TEST
+
+    for (int group_index = 0; group_index < num_groups; ++group_index) {
+        size_t group_pos = 0;
+        Image3S* hls_ac;
+        AcStrategyImage* hls_ac_strategy;
+
+        if (!PixelsToPikGroup(cparams, frame_header, template_group_header, full_ac_strategy, full_quantizer.get(),
+                              full_cmap, io, opsin, noise_params, group_pos, frame_enc_cache, aux_out,
+                              &(ac_cache[group_index]), handlers[group_index])) {
+            num_errors.fetch_add(1, std::memory_order_relaxed);
+            continue;
+        }
+    };
+
+#endif
+    std::string compressed_dc[2 * MAX_DC_GROUP];
+    int hls_dc_groups = k3_config[13];
+    std::vector<PaddedBytes> group_codes_dc(hls_dc_groups);
+
+    hls::stream<dct_t> strm_coef_raster_syn("strm_coef_raster_syn");
+    ap_uint<32> hls_order[MAX_AC_GROUP][kOrderContexts][kDCTBlockSize];
+    int order2enc[MAX_AC_GROUP][kOrderContexts][kDCTBlockSize];
+
+    uint8_t noise_size[MAX_AC_GROUP];
+    std::string noise_code[MAX_AC_GROUP];
+    std::string order_code[MAX_AC_GROUP];
+    std::string ac_histo_code[MAX_AC_GROUP];
+    std::string ac_code[MAX_AC_GROUP];
+
+    std::vector<Image3S> ddr_group_ac;
+
+    for (int group_index = 0; group_index < num_groups; ++group_index) {
+        Image3S& group_ac = ac_cache[group_index].ac;
+        AcStrategyImage& group_ac_strategy = ac_cache[group_index].ac_strategy;
+
+        PikImageSizeInfo* ac_info = aux_out != nullptr ? &aux_out->layers[kLayerAC] : nullptr; // clean to 0
+
+        // EncodeNoise
+        _XF_IMAGE_PRINT("EncodeNoise - E2B\n");
+
+        uint8_t noise_out[hls_kMaxNoiseSize];
+        XAcc_EncodeNoise(noise_params, noise_out, noise_size[group_index]);
+
+        for (size_t i = 0; i < noise_size[group_index]; ++i)
+            noise_code[group_index][i] = static_cast<char>(noise_out[i]);
+
+        noise_code[group_index].resize(noise_size[group_index]);
+
+        // EncodeCoeffOrders
+        for (size_t c = 0; c < 3; c++) {
+            for (size_t y = 0; y < 8; y++) {     // 8* 8
+                for (size_t x = 0; x < 8; x++) { // 8
+                    order2enc[group_index][c][y * 8 + x] = order[group_index * 3 * 64 + c * 64 + y * 8 + x];
+                }
+            }
+        }
+
+        _XF_IMAGE_PRINT("Enc(deCoeffOrders - E2B\n");
+        order_code[group_index] = hls_EncodeCoeffOrders_top(order2enc[group_index]);
+    } // end rect init
+
+    std::string dc_histo_code[2 * MAX_DC_GROUP];
+    std::string dc_code[2 * MAX_DC_GROUP];
+    int offset_dc_histo = 0;
+    int offset_dc = 0;
+    for (int group_index = 0; group_index < (2 * hls_dc_groups); ++group_index) {
+        dc_histo_code[group_index].resize(max_dc_histo_size);
+        dc_code[group_index].resize(max_dc_size);
+
+        arrayCharToString(len_dc_histo[group_index], (dc_histo_code_out + offset_dc_histo), dc_histo_code[group_index]);
+        arrayShortToString(len_dc[group_index], (dc_code_out + offset_dc), dc_code[group_index]);
+        compressed_dc[group_index] = dc_histo_code[group_index] + dc_code[group_index];
+
+        offset_dc_histo += len_dc_histo[group_index];
+        offset_dc += (len_dc[group_index] + 1) / 2;
+    }
+
+    int offset_ac_histo = 0;
+    int offset_ac = 0;
+    for (int group_index = 0; group_index < hls_ac_groups; ++group_index) {
+        ac_histo_code[group_index].resize(max_ac_histo_size);
+        ac_code[group_index].resize(max_ac_size);
+        // std::cout<<"offset_ac:"<<offset_ac<<std::endl;
+        arrayCharToString(len_ac_histo[group_index], (ac_histo_code_out + offset_ac_histo), ac_histo_code[group_index]);
+        arrayShortToString(len_ac[group_index], (ac_code_out + offset_ac), ac_code[group_index]);
+
+        offset_ac_histo += len_ac_histo[group_index];
+        offset_ac += (len_ac[group_index] + 1) / 2;
+    }
+
+    std::vector<Token> ac_tokens; // no means
+    for (int group_index = 0; group_index < hls_ac_groups; ++group_index) {
+        PaddedBytes out(noise_size[group_index] + order_code[group_index].size() + ac_histo_code[group_index].size() +
+                        ac_code[group_index].size());
+        _XF_IMAGE_PRINT("noise_code size = %d\n", (int)noise_size[group_index]);
+        _XF_IMAGE_PRINT("order_code size = %d\n", (int)order_code[group_index].size());
+        _XF_IMAGE_PRINT("histo_code size = %d\n", (int)ac_histo_code[group_index].size());
+        _XF_IMAGE_PRINT("ac_code size = %d\n", (int)ac_code[group_index].size());
+        size_t byte_pos = 0;
+        Append(noise_code[group_index], &out, &byte_pos);
+        Append(order_code[group_index], &out, &byte_pos);
+        Append(ac_histo_code[group_index], &out, &byte_pos);
+        Append(ac_code[group_index], &out, &byte_pos);
+
+        // TODO(veluca): fix this with DC supergroups.
+        float output_size_estimate = out.size() - ac_code[group_index].size() - ac_histo_code[group_index].size();
+        std::vector<std::array<size_t, 256> > counts(kNumContexts);
+        size_t extra_bits = 0;
+        for (const auto& token : ac_tokens) {
+            counts[token.context][token.symbol]++;
+            extra_bits += token.nbits;
+        }
+        float entropy_coded_bits = 0;
+        for (size_t ctx = 0; ctx < kNumContexts; ctx++) {
+            size_t total = std::accumulate(counts[ctx].begin(), counts[ctx].end(), size_t(0));
+            if (total == 0) continue; // Prevent div by zero.
+            double entropy = 0;
+            for (size_t i = 0; i < 256; i++) {
+                double p = 1.0 * counts[ctx][i] / total;
+                if (p > 1e-4) {
+                    entropy -= p * std::log(p);
+                }
+            }
+            entropy_coded_bits += entropy * total / std::log(2);
+        }
+        output_size_estimate += static_cast<float>(extra_bits + entropy_coded_bits) / kBitsPerByte;
+        if (aux_out != nullptr) aux_out->entropy_estimate = output_size_estimate;
+
+        uint8_t tmp = 1;
+        std::string header_str(1, 0);
+        header_str[0] = static_cast<char>(tmp);
+        PaddedBytes ac_header(1);
+        byte_pos = 0;
+        Append(header_str, &ac_header, &byte_pos);
+        group_codes[group_index].append(ac_header);
+        group_codes[group_index].append(out);
+    } // end group ac
+
+    _XF_IMAGE_PRINT("-Build TOCs!\n");
+
+    for (int group_index = 0; group_index < hls_dc_groups; ++group_index) {
+        size_t group_pos = 0;
+        compressed_dc[group_index] = compressed_dc[2 * group_index] + compressed_dc[2 * group_index + 1];
+        group_codes_dc[group_index].resize(compressed_dc[group_index].size());
+        Append(compressed_dc[group_index], &group_codes_dc[group_index], &group_pos);
+    }
+
+    // Build TOCs.
+    // TOC0+TOC1+...+TOCn+data0+...+datan
+    PaddedBytes group_toc_dc(DCGroupSizeCoder::MaxSize(hls_dc_groups));
+    size_t group_toc_pos_dc = 0;
+    uint8_t* group_toc_storage_dc = group_toc_dc.data();
+    size_t total_groups_size_dc = 0;
+
+    //----------------
+    for (size_t group_index = 0; group_index < hls_dc_groups; ++group_index) {
+        size_t group_size = group_codes_dc[group_index].size();
+        DCGroupSizeCoder::Encode(group_size, &group_toc_pos_dc, group_toc_storage_dc);
+        _XF_IMAGE_PRINT("group_size= %d, group_toc_pos_dc=%d\n", (int)group_size, (int)group_toc_pos_dc);
+        total_groups_size_dc += group_size;
+    }
+    //----------------
+    _XF_IMAGE_PRINT("-start the group_toc/ code-DC group\n");
+    WriteZeroesToByteBoundary(&group_toc_pos_dc, group_toc_storage_dc);
+    group_toc_dc.resize(group_toc_pos_dc / kBitsPerByte);
+
+    // Push output.
+    PaddedBytes dc_codes;
+    size_t pos_dc = 0;
+    dc_codes.reserve(group_toc_dc.size() + total_groups_size_dc); //+serialized_gradient_map.size()
+    dc_codes.append(group_toc_dc);
+    pos_dc += group_toc_dc.size() * kBitsPerByte;
+    //----------------
+    for (size_t group_index = 0; group_index < hls_dc_groups; ++group_index) {
+        const PaddedBytes& group_code = group_codes_dc[group_index];
+        dc_codes.append(group_code);
+        pos_dc += group_code.size() * kBitsPerByte;
+    }
+    std::cout << "dequant_code_pos=" << pos << std::endl;
+    pass_global_code.append(dc_codes);
+    compressed->append(pass_global_code);
+    pos += pass_global_code.size() * 8;
+    std::cout << "dequant_code_pos=" << pos << std::endl;
+
+    compressed->append(dictionary_code);
+    pos += dictionary_code.size() * 8;
+    std::cout << "dequant_code_pos=" << pos << std::endl;
+
+    compressed->append(quant_cf_code);
+    pos += quant_cf_code.size() * 8;
+    std::cout << "dequant_code_pos=" << pos << std::endl;
+
+    for (size_t i = 0; i < multipass_manager->GetNumPasses(); i++) {
+        size_t group_toc_pos = 0;
+        PaddedBytes group_toc(PaddedBytes(GroupSizeCoder::MaxSize(num_groups)));
+        uint8_t* group_toc_storage = group_toc.data();
+        size_t total_groups_size = 0;
+
+        for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+            size_t group_size = group_codes[group_index].size();
+            _XF_IMAGE_PRINT("group_codes size = %d\n", (int)group_size);
+            GroupSizeCoder::Encode(group_size, &group_toc_pos, group_toc_storage);
+            total_groups_size += group_size;
+        }
+        WriteZeroesToByteBoundary(&group_toc_pos, group_toc_storage);
+        group_toc.resize(group_toc_pos / kBitsPerByte);
+
+        // Push output.
+        PIK_ASSERT(pos % kBitsPerByte == 0);
+        compressed->reserve(DivCeil(pos, kBitsPerByte) + group_toc.size() + total_groups_size);
+        compressed->append(group_toc);
+        pos += group_toc.size() * kBitsPerByte;
+        std::cout << "dequant_code_pos=" << pos << std::endl;
+
+        // Only do lossless encoding in the first pass, if there is more than one.
+        if (frame_header.encoding == ImageEncoding::kLossless && i == 0) {
+            // Encode entire image at once to avoid per-group overhead. Must come
+            // BEFORE the encoded groups because the decoder assumes that the last
+            // group coincides with the end of the bitstream.
+            const Rect rect(io->color());
+
+            Image3F previous_pass;
+            PIK_RETURN_IF_ERROR(multipass_manager->GetPreviousPass(io->dec_c_original, pool, &previous_pass));
+            PIK_RETURN_IF_ERROR(
+                PixelsToPikLosslessFrame(cparams, frame_header, io, rect, previous_pass, compressed, pos, aux_out));
+        }
+
+        for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+            const PaddedBytes& group_code = group_codes[group_index];
+            compressed->append(group_code);
+            pos += group_code.size() * kBitsPerByte;
+        }
+    }
+
+    io->enc_size = compressed->size();
+    _XF_IMAGE_PRINT("compressed size = %d\n", (int)compressed->size());
+    return true;
+}
+
+namespace {
+
+Status ValidateImageDimensions(const FileHeader& file_header, const DecompressParams& dparams) {
+    const size_t xsize = file_header.xsize();
+    const size_t ysize = file_header.ysize();
+    if (xsize == 0 || ysize == 0) {
+        return PIK_FAILURE("Empty image.");
+    }
+
+    static const size_t kMaxWidth = (1 << 25) - 1;
+    if (xsize > kMaxWidth) {
+        return PIK_FAILURE("Image too wide.");
+    }
+
+    const size_t num_pixels = xsize * ysize;
+    if (num_pixels > dparams.max_num_pixels) {
+        return PIK_FAILURE("Image too big.");
+    }
+
+    return true;
+}
+
+// Specializes a 8-bit and 16-bit of converting to float from lossless.
+float ToFloatForLossless(uint8_t in) {
+    return static_cast<float>(in);
+}
+
+float ToFloatForLossless(uint16_t in) {
+    return in * (1.0f / 257);
+}
+
+// Specializes a 8-bit and 16-bit undo of lossless diff.
+float UndiffForLossless(uint8_t in, float prev) {
+    uint16_t diff;
+    if (in % 2 == 0)
+        diff = in / 2;
+    else
+        diff = 255 - (in / 2);
+    uint8_t out = diff + static_cast<int>(RoundForLossless<uint8_t>(prev));
+    return ToFloatForLossless(out);
+}
+
+float UndiffForLossless(uint16_t in, float prev) {
+    uint16_t diff;
+    if (in % 2 == 0)
+        diff = in / 2;
+    else
+        diff = 65535 - (in / 2);
+    uint16_t out = diff + static_cast<int>(RoundForLossless<uint16_t>(prev));
+    return ToFloatForLossless(out);
+}
+
+// Handles converting lossless 8-bit or lossless 16-bit, to Image3F, with
+// option to give 3x same channel at input for grayscale, and optionally
+// handles previous pass delta.
+template <typename T>
+void LosslessChannelDecodePass(
+    int num_channels, const Image<T>** in, const Rect& rect, const Image3F& previous_pass, Image3F* color) {
+    size_t xsize = rect.xsize();
+    size_t ysize = rect.ysize();
+
+    for (int c = 0; c < num_channels; c++) {
+        if (previous_pass.xsize() == 0) {
+            for (size_t y = 0; y < ysize; ++y) {
+                const T* const PIK_RESTRICT row_in = in[c]->Row(y);
+                float* const PIK_RESTRICT row_out = rect.PlaneRow(color, c, y);
+                for (size_t x = 0; x < xsize; ++x) {
+                    row_out[x] = ToFloatForLossless(row_in[x]);
+                }
+            }
+        } else {
+            for (size_t y = 0; y < ysize; ++y) {
+                const T* const PIK_RESTRICT row_in = in[c]->Row(y);
+                float* const PIK_RESTRICT row_out = rect.PlaneRow(color, c, y);
+                const float* const PIK_RESTRICT row_prev = previous_pass.ConstPlaneRow(c, y);
+                for (size_t x = 0; x < xsize; ++x) {
+                    row_out[x] = UndiffForLossless(row_in[x], row_prev[x]);
+                }
+            }
+        }
+    }
+
+    // Grayscale, copy the channel to the other two output channels
+    if (num_channels == 1) {
+        for (size_t y = 0; y < ysize; ++y) {
+            const float* const PIK_RESTRICT row_0 = rect.PlaneRow(color, 0, y);
+            float* const PIK_RESTRICT row_1 = rect.PlaneRow(color, 1, y);
+            float* const PIK_RESTRICT row_2 = rect.PlaneRow(color, 2, y);
+            for (size_t x = 0; x < xsize; ++x) {
+                row_1[x] = row_2[x] = row_0[x];
+            }
+        }
+    }
+}
+
+Status PikLosslessFrameToPixels(const PaddedBytes& compressed,
+                                const FrameHeader& frame_header,
+                                size_t* position,
+                                Image3F* color,
+                                const Rect& rect,
+                                const Image3F& previous_pass) {
+    PROFILER_FUNC;
+    if (frame_header.lossless_grayscale) {
+        if (frame_header.lossless_16_bits) {
+            ImageU image;
+            if (!Grayscale16bit_decompress(compressed, position, &image)) {
+                return PIK_FAILURE("Lossless decompression failed");
+            }
+            if (!SameSize(image, rect)) {
+                return PIK_FAILURE("Lossless decompression yielded wrong dimensions.");
+            }
+            const ImageU* array[1] = {&image};
+            LosslessChannelDecodePass(1, array, rect, previous_pass, color);
+        } else {
+            ImageB image;
+            if (!Grayscale8bit_decompress(compressed, position, &image)) {
+                return PIK_FAILURE("Lossless decompression failed");
+            }
+            if (!SameSize(image, rect)) {
+                return PIK_FAILURE("Lossless decompression yielded wrong dimensions.");
+            }
+            const ImageB* array[1] = {&image};
+            LosslessChannelDecodePass(1, array, rect, previous_pass, color);
+        }
+    } else {
+        if (frame_header.lossless_16_bits) {
+            Image3U image;
+            if (!Colorful16bit_decompress(compressed, position, &image)) {
+                return PIK_FAILURE("Lossless decompression failed");
+            }
+            if (!SameSize(image, rect)) {
+                return PIK_FAILURE("Lossless decompression yielded wrong dimensions.");
+            }
+            const ImageU* array[3] = {&image.Plane(0), &image.Plane(1), &image.Plane(2)};
+            LosslessChannelDecodePass(3, array, rect, previous_pass, color);
+        } else {
+            Image3B image;
+            if (!Colorful8bit_decompress(compressed, position, &image)) {
+                return PIK_FAILURE("Lossless decompression failed");
+            }
+            if (!SameSize(image, rect)) {
+                return PIK_FAILURE("Lossless decompression yielded wrong dimensions.");
+            }
+            const ImageB* array[3] = {&image.Plane(0), &image.Plane(1), &image.Plane(2)};
+            LosslessChannelDecodePass(3, array, rect, previous_pass, color);
+        }
+    }
+    return true;
+}
+
+// `reader` is a vector of readers (one per pass). Group headers are only
+// present in the first pass, thus the group header of this group is read from
+// `reader[0]`.
+Status PikGroupToPixels(const DecompressParams& dparams,
+                        const FileHeader& file_header,
+                        const FrameHeader* frame_header,
+                        const PaddedBytes& compressed,
+                        const Quantizer& quantizer,
+                        const ColorCorrelationMap& full_cmap,
+                        std::vector<BitReader>* reader,
+                        Image3F* PIK_RESTRICT opsin_output,
+                        ImageU* alpha_output,
+                        const CodecContext* context,
+                        PikInfo* aux_out,
+                        FrameDecCache* PIK_RESTRICT frame_dec_cache,
+                        GroupDecCache* PIK_RESTRICT group_dec_cache,
+                        MultipassHandler* multipass_handler,
+                        const ColorEncoding& original_color_encoding,
+                        size_t downsampling) {
+    PROFILER_FUNC;
+    const Rect& padded_rect = multipass_handler->PaddedGroupRect();
+    const Rect& rect = multipass_handler->GroupRect();
+    GroupHeader header;
+    header.nonserialized_have_alpha = frame_header->has_alpha;
+    PIK_RETURN_IF_ERROR(ReadGroupHeader(&(*reader)[0], &header));
+    PIK_RETURN_IF_ERROR((*reader)[0].JumpToByteBoundary());
+    OverrideGroupFlags(dparams, frame_header, &header);
+
+    if (frame_header->has_alpha) {
+        // TODO(lode): do not fail here based on the metadata
+        // original_bytes_per_alpha, it should be allowed to use an efficient
+        // encoding in pik which differs from what the original had (or
+        // alternatively if they must be the same, there should not be two fields)
+        if (header.alpha.bytes_per_alpha != file_header.metadata.transcoded.original_bytes_per_alpha) {
+            return PIK_FAILURE("Nonuniform alpha bitdepth is not supported yet.");
+        }
+        if (file_header.metadata.transcoded.original_bytes_per_alpha == 0) {
+            return PIK_FAILURE("Header claims to contain alpha but the depth is 0.");
+        }
+        PIK_RETURN_IF_ERROR(DecodeAlpha(dparams, header.alpha, alpha_output, rect));
+    }
+
+    if (frame_header->encoding == ImageEncoding::kLossless) {
+        // Done; we'll decode the entire image in one shot later.
+        return true;
+    }
+
+    ImageSize opsin_size = ImageSize::Make(padded_rect.xsize(), padded_rect.ysize());
+    const size_t xsize_blocks = DivCeil<size_t>(opsin_size.xsize, kBlockDim);
+    const size_t ysize_blocks = DivCeil<size_t>(opsin_size.ysize, kBlockDim);
+
+    Rect group_in_color_tiles(multipass_handler->BlockGroupRect().x0() / kColorTileDimInBlocks,
+                              multipass_handler->BlockGroupRect().y0() / kColorTileDimInBlocks,
+                              DivCeil(multipass_handler->BlockGroupRect().xsize(), kColorTileDimInBlocks),
+                              DivCeil(multipass_handler->BlockGroupRect().ysize(), kColorTileDimInBlocks));
+
+    NoiseParams noise_params;
+
+    InitializeDecCache(*frame_dec_cache, padded_rect, group_dec_cache);
+
+    if (dparams.max_passes == 0) ZeroFillImage(&group_dec_cache->ac);
+    for (size_t i = 0; i < frame_header->num_passes && i < dparams.max_passes; i++) {
+        PROFILER_ZONE("dec_bitstr");
+        auto decode = i == 0 ? &DecodeFromBitstream</*first=*/true> : &DecodeFromBitstream</*first=*/false>;
+        if (!decode(*frame_header, header, compressed, &(*reader)[i], padded_rect, multipass_handler, xsize_blocks,
+                    ysize_blocks, full_cmap, group_in_color_tiles, &noise_params, quantizer, frame_dec_cache,
+                    group_dec_cache, aux_out)) {
+            return PIK_FAILURE("Pik decoding failed.");
+        }
+        if (!(*reader)[i].JumpToByteBoundary()) {
+            return PIK_FAILURE("Pik bitstream is corrupted.");
+        }
+    }
+
+    Rect opsin_rect(padded_rect.x0() / downsampling, padded_rect.y0() / downsampling,
+                    DivCeil(padded_rect.xsize(), downsampling), DivCeil(padded_rect.ysize(), downsampling));
+
+    // Note: DecodeFromBitstream already performed dequantization.
+    ReconOpsinImage(*frame_header, header, quantizer, multipass_handler->BlockGroupRect(), frame_dec_cache,
+                    group_dec_cache, opsin_output, opsin_rect, aux_out, downsampling);
+
+    return true;
+}
+
+} // namespace
+
+Status PikPassToPixels(DecompressParams dparams,
+                       const PaddedBytes& compressed,
+                       const FileHeader& file_header,
+                       ThreadPool* pool,
+                       BitReader* reader,
+                       CodecInOut* io,
+                       PikInfo* aux_out,
+                       MultipassManager* multipass_manager) {
+    PROFILER_ZONE("PikPassToPixels uninstrumented");
+    PIK_RETURN_IF_ERROR(ValidateImageDimensions(file_header, dparams));
+
+    io->metadata = file_header.metadata;
+
+    // Used when writing the output file unless DecoderHints overrides it.
+    io->SetOriginalBitsPerSample(file_header.metadata.transcoded.original_bit_depth);
+    io->dec_c_original = file_header.metadata.transcoded.original_color_encoding;
+    if (io->dec_c_original.icc.empty()) {
+        // Removed by MaybeRemoveProfile; fail unless we successfully restore it.
+        PIK_RETURN_IF_ERROR(ColorManagement::SetProfileFromFields(&io->dec_c_original));
+    }
+
+    const size_t xsize = file_header.xsize();
+    const size_t ysize = file_header.ysize();
+    const size_t padded_xsize = DivCeil(xsize, kBlockDim) * kBlockDim;
+    const size_t padded_ysize = DivCeil(ysize, kBlockDim) * kBlockDim;
+
+    FrameHeader header;
+    PIK_RETURN_IF_ERROR(ReadPassHeader(reader, &header));
+
+    PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+    // TODO(veluca): add kProgressive.
+    if (header.encoding != ImageEncoding::kPasses && header.encoding != ImageEncoding::kLossless) {
+        return PIK_FAILURE("Unsupported bitstream");
+    }
+
+    OverridePassFlags(dparams, &header);
+
+    size_t downsampling;
+    if (dparams.max_downsampling >= 8) {
+        downsampling = 8;
+        dparams.max_passes = 0;
+    } else {
+        downsampling = 1;
+        for (const auto& downsampling_and_num_passes : header.downsampling_factor_to_passes) {
+            if (dparams.max_downsampling >= downsampling_and_num_passes.first &&
+                dparams.max_passes > downsampling_and_num_passes.second) {
+                downsampling = downsampling_and_num_passes.first;
+                dparams.max_passes = downsampling_and_num_passes.second + 1;
+            }
+        }
+    }
+    if (aux_out != nullptr) {
+        aux_out->downsampling = downsampling;
+    }
+
+    multipass_manager->StartPass(header);
+
+    ImageU alpha;
+    if (header.has_alpha) {
+        alpha = ImageU(xsize, ysize);
+    }
+
+    const size_t xsize_groups = DivCeil(xsize, kGroupDim);
+    const size_t ysize_groups = DivCeil(ysize, kGroupDim);
+    const size_t num_groups = xsize_groups * ysize_groups;
+
+    std::vector<PikInfo> aux_outs;
+    if (aux_out != nullptr) {
+        aux_outs.resize(num_groups, *aux_out);
+    }
+    std::vector<MultipassHandler*> handlers(num_groups);
+    {
+        PROFILER_ZONE("Get handlers");
+        for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+            const size_t gx = group_index % xsize_groups;
+            const size_t gy = group_index / xsize_groups;
+            const size_t x = gx * kGroupDim;
+            const size_t y = gy * kGroupDim;
+            Rect rect(x, y, kGroupDim, kGroupDim, xsize, ysize);
+            handlers[group_index] = multipass_manager->GetGroupHandler(group_index, rect);
+        }
+    }
+
+    const size_t xsize_blocks = padded_xsize / kBlockDim;
+    const size_t ysize_blocks = padded_ysize / kBlockDim;
+
+    FrameDecCache frame_dec_cache;
+    frame_dec_cache.use_new_dc = dparams.use_new_dc;
+    frame_dec_cache.grayscale = header.flags & FrameHeader::kGrayscaleOpt;
+    frame_dec_cache.ac_strategy = AcStrategyImage(xsize_blocks, ysize_blocks);
+    frame_dec_cache.raw_quant_field = ImageI(xsize_blocks, ysize_blocks);
+    frame_dec_cache.ar_sigma_lut_ids = ImageB(xsize_blocks, ysize_blocks);
+    frame_dec_cache.dequant_control_field = ImageB(DivCeil(xsize, kTileDim), DivCeil(ysize, kTileDim));
+
+    ColorCorrelationMap cmap(xsize, ysize);
+
+    // TODO(veluca): deserialize quantization tables from the bitstream.
+
+    Quantizer quantizer(&frame_dec_cache.matrices, 0, 0);
+    BlockDictionary block_dictionary;
+
+    std::vector<GroupDecCache> group_dec_caches(NumThreads(pool));
+
+    if (header.encoding == ImageEncoding::kPasses) {
+        PROFILER_ZONE("DecodeColorMap+DC");
+        PIK_RETURN_IF_ERROR(frame_dec_cache.matrices.Decode(reader));
+        PIK_RETURN_IF_ERROR(quantizer.Decode(reader));
+        PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+        // TODO(veluca): decode quantization table mapping.
+
+        DecodeColorMap(reader, &cmap.ytob_map, &cmap.ytob_dc);
+        DecodeColorMap(reader, &cmap.ytox_map, &cmap.ytox_dc);
+        PIK_RETURN_IF_ERROR(DecodeDCGroups(reader, compressed, header, xsize_blocks, ysize_blocks, quantizer, cmap,
+                                           pool, multipass_manager, &frame_dec_cache, &group_dec_caches, aux_out));
+        PIK_RETURN_IF_ERROR(block_dictionary.Decode(reader, padded_xsize, padded_ysize));
+
+        // TODO(veluca): think of splitting this in DC groups.
+        PIK_RETURN_IF_ERROR(DecodeDequantControlField(reader, &frame_dec_cache.dequant_control_field));
+        PIK_RETURN_IF_ERROR(DecodeDequantControlFieldMap(reader, frame_dec_cache.raw_quant_field,
+                                                         frame_dec_cache.dequant_control_field,
+                                                         frame_dec_cache.dequant_map));
+        multipass_manager->SaveAcStrategy(frame_dec_cache.ac_strategy);
+        multipass_manager->SaveQuantField(frame_dec_cache.raw_quant_field);
+    }
+
+    Image3F opsin(DivCeil(padded_xsize, downsampling), DivCeil(padded_ysize, downsampling));
+
+    // Read TOCs.
+    std::vector<std::vector<size_t> > group_offsets(header.num_passes);
+    std::vector<size_t> group_codes_begin(header.num_passes);
+    for (size_t i = 0; i < header.num_passes; i++) {
+        PROFILER_ZONE("Read TOC");
+        std::vector<size_t>& group_offsets_pass = group_offsets[i];
+        group_offsets_pass.reserve(num_groups + 1);
+        group_offsets_pass.push_back(0);
+        for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+            const uint32_t size = GroupSizeCoder::Decode(reader);
+            group_offsets_pass.push_back(group_offsets_pass.back() + size);
+        }
+        PIK_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+        // On first pass, read lossless.
+        if (header.encoding == ImageEncoding::kLossless && i == 0) {
+            const Rect rect(0, 0, xsize, ysize);
+
+            size_t pos = reader->Position();
+            const size_t before_pos = pos;
+
+            Image3F previous_pass;
+            PIK_RETURN_IF_ERROR(multipass_manager->GetPreviousPass(io->dec_c_original, pool, &previous_pass));
+            PIK_RETURN_IF_ERROR(PikLosslessFrameToPixels(compressed, header, &pos, &opsin, rect, previous_pass));
+            reader->SkipBits((pos - before_pos) * kBitsPerByte);
+            // Byte-wise; no need to jump to boundary.
+        }
+        // Pretend all groups of this pass are read.
+        group_codes_begin[i] = reader->Position();
+        reader->SkipBits(group_offsets_pass.back() * kBitsPerByte);
+        if (reader->Position() > compressed.size()) {
+            return PIK_FAILURE("Group code extends after stream end");
+        }
+    }
+
+    // Decode groups.
+    std::atomic<int> num_errors{0};
+    const auto process_group = [&](const int group_index, const int thread) {
+        std::vector<BitReader> readers;
+        for (size_t i = 0; i < header.num_passes; i++) {
+            size_t group_code_offset = group_offsets[i][group_index];
+            size_t group_reader_limit = group_offsets[i][group_index + 1];
+            // TODO(user): this looks ugly; we should get rid of PaddedBytes
+            //               parameter once it is wrapped into BitReader; otherwise
+            //               it is easy to screw the things up.
+            readers.emplace_back(compressed.data(), group_codes_begin[i] + group_reader_limit);
+            readers.back().SkipBits((group_codes_begin[i] + group_code_offset) * kBitsPerByte);
+        }
+
+        PikInfo* my_aux_out = aux_out ? &aux_outs[group_index] : nullptr;
+        if (!PikGroupToPixels(dparams, file_header, &header, compressed, quantizer, cmap, &readers, &opsin, &alpha,
+                              io->Context(), my_aux_out, &frame_dec_cache, &group_dec_caches[thread],
+                              handlers[group_index], io->dec_c_original, downsampling)) {
+            num_errors.fetch_add(1);
+            return;
+        }
+    };
+    {
+        PROFILER_ZONE("PikPassToPixels pool");
+        RunOnPool(pool, 0, num_groups, process_group, "PikPassToPixels");
+    }
+
+    PIK_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+    if (aux_out != nullptr) {
+        for (size_t group_index = 0; group_index < num_groups; ++group_index) {
+            aux_out->Assimilate(aux_outs[group_index]);
+        }
+    }
+
+    if (header.encoding == ImageEncoding::kPasses) {
+        multipass_manager->RestoreOpsin(&opsin);
+        multipass_manager->SetDecodedPass(opsin);
+
+        PIK_RETURN_IF_ERROR(FinalizeFrameDecoding(&opsin, file_header.xsize(), file_header.ysize(), header,
+                                                  NoiseParams(), quantizer, block_dictionary, &frame_dec_cache, aux_out,
+                                                  downsampling));
+        // From now on, `opsin` is actually linear sRGB.
+
+        if (header.flags & FrameHeader::kGrayscaleOpt) {
+            PROFILER_ZONE("Grayscale opt");
+            // Force all channels to gray
+            for (size_t y = 0; y < opsin.ysize(); ++y) {
+                float* PIK_RESTRICT row_r = opsin.PlaneRow(0, y);
+                float* PIK_RESTRICT row_g = opsin.PlaneRow(1, y);
+                float* PIK_RESTRICT row_b = opsin.PlaneRow(2, y);
+                for (size_t x = 0; x < opsin.xsize(); x++) {
+                    float gray = row_r[x] * 0.299 + row_g[x] * 0.587 + row_b[x] * 0.114;
+                    row_r[x] = row_g[x] = row_b[x] = gray;
+                }
+            }
+        }
+        const ColorEncoding& c = io->Context()->c_linear_srgb[io->dec_c_original.IsGray()];
+        io->SetFromImage(std::move(opsin), c);
+    } else if (header.encoding == ImageEncoding::kLossless) {
+        io->SetFromImage(std::move(opsin), io->dec_c_original);
+        io->ShrinkTo(xsize, ysize);
+        multipass_manager->SetDecodedPass(io);
+    } else {
+        return PIK_FAILURE("Unsupported image encoding");
+    }
+
+    if (header.has_alpha) {
+        io->SetAlpha(std::move(alpha), 8 * file_header.metadata.transcoded.original_bytes_per_alpha);
+    }
+
+    io->ShrinkTo(DivCeil(xsize, downsampling), DivCeil(ysize, downsampling));
+
+    return true;
+}
+
+} // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/pik_frame.h b/codec/L2/demos/pikEnc/host/pik/pik_frame.h
new file mode 100755
index 0000000000..3aecac935c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/pik_frame.h
@@ -0,0 +1,57 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_PIK_PASS_H_
+#define PIK_PIK_PASS_H_
+
+#include "pik/codec.h"
+#include "pik/compressed_image.h"
+#include "pik/data_parallel.h"
+#include "pik/headers.h"
+#include "pik/multipass_handler.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_info.h"
+#include "pik/pik_params.h"
+#include "pik/quantizer.h"
+#include "pik/status.h"
+
+// Encode and decode a single pass of an image. A pass can be either a
+// decomposition of an image (eg. DC-only pass), or a frame in an animation.
+// The behaviour of the (en/de)coder is defined by the given multipass_manager.
+
+namespace pik {
+
+struct FrameParams {
+  FrameInfo frame_info;
+};
+
+// These process each group in parallel.
+
+// Encodes an input image `io` in a byte stream, without adding a file header.
+// `pos` represents the bit position in the output data that we should
+// start writing to.
+Status PixelsToPikPass(CompressParams params, const FrameParams &frame_params,
+                       const CodecInOut *io, ThreadPool *pool,
+                       PaddedBytes *compressed, size_t &pos, PikInfo *aux_out,
+                       MultipassManager *multipass_manager);
+
+Status hls_PixelsToPikPass(CompressParams params, std::string xclbinPath,
+                           const FrameParams &frame_params,
+                           const CodecInOut *io, ThreadPool *pool,
+                           PaddedBytes *compressed, size_t &pos,
+                           PikInfo *aux_out,
+                           MultipassManager *multipass_manager);
+
+// Decodes an input image from a byte stream, using `file_header`.
+// See PikToPixels for explanation of `io` color space.
+Status PikPassToPixels(DecompressParams params, const PaddedBytes &compressed,
+                       const FileHeader &file_header, ThreadPool *pool,
+                       BitReader *reader, CodecInOut *io, PikInfo *aux_out,
+                       MultipassManager *multipass_manager);
+
+} // namespace pik
+
+#endif // PIK_PIK_PASS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/pik_info.cc b/codec/L2/demos/pikEnc/host/pik/pik_info.cc
new file mode 100755
index 0000000000..1460e95aa7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/pik_info.cc
@@ -0,0 +1,28 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/pik_info.h"
+
+namespace pik {
+
+void PikInfo::DumpCoeffImage(const char* label,
+                             const Image3S& coeff_image) const {
+  PIK_ASSERT(coeff_image.xsize() % 64 == 0);
+  Image3S reshuffled(coeff_image.xsize() / 8, coeff_image.ysize() * 8);
+  for (int c = 0; c < 3; c++) {
+    for (int y = 0; y < coeff_image.ysize(); y++) {
+      for (int x = 0; x < coeff_image.xsize(); x += 64) {
+        for (int i = 0; i < 64; i++) {
+          reshuffled.PlaneRow(c, 8 * y + i / 8)[x / 8 + i % 8] =
+              coeff_image.PlaneRow(c, y)[x + i];
+        }
+      }
+    }
+  }
+  DumpImage(label, reshuffled);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/pik_info.h b/codec/L2/demos/pikEnc/host/pik/pik_info.h
new file mode 100755
index 0000000000..7716ddcc1a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/pik_info.h
@@ -0,0 +1,201 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_PIK_INFO_H_
+#define PIK_PIK_INFO_H_
+
+// Optional output information for debugging and analyzing size usage.
+
+#include <cstddef>
+#include <sstream>
+#include <string>
+#include <vector>
+#include "pik/adaptive_reconstruction_fwd.h"
+#include "pik/chroma_from_luma_fwd.h"
+#include "pik/codec.h"
+#include "pik/image.h"
+#include "pik/pik_inspection.h"
+
+namespace pik {
+
+struct PikImageSizeInfo {
+  PikImageSizeInfo() {}
+
+  void Assimilate(const PikImageSizeInfo& victim) {
+    num_clustered_histograms += victim.num_clustered_histograms;
+    histogram_size += victim.histogram_size;
+    entropy_coded_bits += victim.entropy_coded_bits;
+    extra_bits += victim.extra_bits;
+    total_size += victim.total_size;
+    clustered_entropy += victim.clustered_entropy;
+  }
+  void Print(size_t num_inputs) const {
+    printf("%10zd", total_size);
+    if (histogram_size > 0) {
+      printf("   [%6.2f %8zd %8zd %8zd %12.3f",
+             num_clustered_histograms * 1.0 / num_inputs, histogram_size,
+             entropy_coded_bits >> 3, extra_bits >> 3,
+             histogram_size + (clustered_entropy + extra_bits) / 8.0f);
+      printf("]");
+    }
+    printf("\n");
+  }
+  size_t num_clustered_histograms = 0;
+  size_t histogram_size = 0;
+  size_t entropy_coded_bits = 0;
+  size_t extra_bits = 0;
+  size_t total_size = 0;
+  double clustered_entropy = 0.0f;
+};
+
+enum {
+  kLayerHeader = 0,
+  kLayerQuant,
+  kLayerDequantTables,
+  kLayerOrder,
+  kLayerDC,
+  kLayerCmap,
+  kLayerControlFields,
+  kLayerAC,
+  kLayerDictionary,
+  kNumImageLayers
+};
+static const char* kImageLayers[kNumImageLayers] = {
+    "header", "quant",   "tables", "order",     "DC",
+    "cmap",   "cfields", "AC",     "dictionary"};
+
+struct TestingAux {
+  Image3F* ac_prediction = nullptr;
+};
+
+// Metadata and statistics gathered during compression or decompression.
+struct PikInfo {
+  PikInfo() : layers(kNumImageLayers) {}
+
+  PikInfo(const PikInfo&) = default;
+
+  void Assimilate(const PikInfo& victim) {
+    for (int i = 0; i < layers.size(); ++i) {
+      layers[i].Assimilate(victim.layers[i]);
+    }
+    num_blocks += victim.num_blocks;
+    num_dct2_blocks += victim.num_dct2_blocks;
+    num_dct4_blocks += victim.num_dct4_blocks;
+    num_dct16_blocks += victim.num_dct16_blocks;
+    num_dct32_blocks += victim.num_dct32_blocks;
+    entropy_estimate += victim.entropy_estimate;
+    num_butteraugli_iters += victim.num_butteraugli_iters;
+    cfl_stats_dc.Assimilate(victim.cfl_stats_dc);
+    cfl_stats_ac.Assimilate(victim.cfl_stats_ac);
+    adaptive_reconstruction_aux.Assimilate(victim.adaptive_reconstruction_aux);
+  }
+
+  PikImageSizeInfo TotalImageSize() const {
+    PikImageSizeInfo total;
+    for (int i = 0; i < layers.size(); ++i) {
+      total.Assimilate(layers[i]);
+    }
+    return total;
+  }
+
+  void Print(size_t num_inputs) const {
+    if (num_inputs == 0) return;
+    printf("Average butteraugli iters: %10.2f\n",
+           num_butteraugli_iters * 1.0 / num_inputs);
+    for (int i = 0; i < layers.size(); ++i) {
+      if (layers[i].total_size > 0) {
+        printf("Total layer size %-10s\t", kImageLayers[i]);
+        printf("%10f%%",
+               100.0f * layers[i].total_size / TotalImageSize().total_size);
+        layers[i].Print(num_inputs);
+      }
+    }
+    printf("Total image size           ");
+    TotalImageSize().Print(num_inputs);
+
+    printf("\nCFL:\n");
+    cfl_stats_dc.Print();
+    cfl_stats_ac.Print();
+
+    printf("\nAR:\n");
+    adaptive_reconstruction_aux.Print();
+  }
+
+  template <typename T>
+  void DumpImage(const char* label, const Image3<T>& image) const {
+    if (debug_prefix.empty()) return;
+    std::ostringstream pathname;
+    pathname << debug_prefix << label << ".png";
+    CodecContext context;
+    CodecInOut io(&context);
+    io.SetFromImage(StaticCastImage3<float>(image), context.c_srgb[0]);
+    (void)io.EncodeToFile(io.c_current(), sizeof(T) * kBitsPerByte,
+                          pathname.str());
+  }
+  template <typename T>
+  void DumpImage(const char* label, const Image<T>& image) {
+    DumpImage(label,
+              Image3<T>(CopyImage(image), CopyImage(image), CopyImage(image)));
+  }
+
+  // This dumps coefficients as a 16-bit PNG with coefficients of a block placed
+  // in the area that would contain that block in a normal image. To view the
+  // resulting image manually, rescale intensities by using:
+  // $ convert -auto-level IMAGE.PNG - | display -
+  void DumpCoeffImage(const char* label, const Image3S& coeff_image) const;
+
+  void SetInspectorImage3F(pik::InspectorImage3F inspector) {
+    inspector_image3f_ = inspector;
+  }
+
+  // Allows hooking intermediate data inspection into various
+  // places of the PIK processing pipeline. Returns true iff
+  // processing should proceed.
+  bool InspectImage3F(const char* label, const Image3F& image) {
+    if (inspector_image3f_ != nullptr) {
+      return inspector_image3f_(label, image);
+    }
+    return true;
+  }
+
+  std::vector<PikImageSizeInfo> layers;
+  size_t num_blocks = 0;
+  // Number of blocks that use larger DCT. Only set in the encoder.
+  size_t num_dct2_blocks = 0;
+  size_t num_dct4_blocks = 0;
+  size_t num_dct16_blocks = 0;
+  size_t num_dct32_blocks = 0;
+  // Estimate of compressed size according to entropy-given lower bounds.
+  float entropy_estimate = 0;
+  int num_butteraugli_iters = 0;
+  // If not empty, additional debugging information (e.g. debug images) is
+  // saved in files with this prefix.
+  std::string debug_prefix;
+
+  // By how much the decoded image was downsampled relative to the encoded
+  // image.
+  size_t downsampling = 1;
+
+  AdaptiveReconstructionAux adaptive_reconstruction_aux;
+  CFL_Stats cfl_stats_dc;
+  CFL_Stats cfl_stats_ac;
+
+  pik::InspectorImage3F inspector_image3f_;
+
+  // WARNING: this is actually an INPUT to some code, and must be
+  // copy-initialized from aux_out to aux_outs.
+  TestingAux testing_aux;
+};
+
+// Used to skip image creation if they won't be written to debug directory.
+static inline bool WantDebugOutput(const PikInfo* info) {
+  // Need valid pointer and filename.
+  return info != nullptr && !info->debug_prefix.empty();
+}
+
+}  // namespace pik
+
+#endif  // PIK_PIK_INFO_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/pik_inspection.h b/codec/L2/demos/pikEnc/host/pik/pik_inspection.h
new file mode 100755
index 0000000000..ae4ccd675c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/pik_inspection.h
@@ -0,0 +1,23 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+
+#ifndef PIK_PIK_INSPECTION_H_
+#define PIK_PIK_INSPECTION_H_
+
+#include <functional>
+#include "pik/image.h"
+
+namespace pik {
+// Type of the inspection-callback which, if enabled, will be called on various
+// intermediate data during image processing, allowing inspection access.
+//
+// Returns false if processing can be stopped at that point, true otherwise.
+// This is only advisory - it is always OK to just continue processing.
+using InspectorImage3F = std::function<bool(const char*, const Image3F&)>;
+}  // namespace pik
+
+#endif  // PIK_PIK_INSPECTION_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/pik_params.h b/codec/L2/demos/pikEnc/host/pik/pik_params.h
new file mode 100755
index 0000000000..00f51aa212
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/pik_params.h
@@ -0,0 +1,176 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_PIK_PARAMS_H_
+#define PIK_PIK_PARAMS_H_
+
+// Parameters and flags that govern PIK compression/decompression.
+
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <string>
+
+namespace pik {
+
+// Reasonable default for sRGB, matches common monitors. Butteraugli was tuned
+// for this, we scale darker/brighter inputs accordingly.
+static constexpr int kDefaultIntensityTarget = 250;
+static constexpr float kIntensityMultiplier = 1.0f / kDefaultIntensityTarget;
+
+// No effect if kDefault, otherwise forces a feature (typically a GroupHeader
+// flag) on or off.
+enum class Override : int { kOn = 1, kOff = 0, kDefault = -1 };
+
+static inline Override OverrideFromBool(bool flag) {
+  return flag ? Override::kOn : Override::kOff;
+}
+
+static inline bool ApplyOverride(Override o, bool condition) {
+  if (o == Override::kOn) condition = true;
+  if (o == Override::kOff) condition = false;
+  return condition;
+}
+
+// Additional smoothing helps for medium/low-quality.
+enum class GaborishStrength : uint32_t {
+  // Serialized, do not change enumerator values.
+  kOff = 0,
+  k500,
+  k750,
+  k875,
+  k1000,
+
+  // Future extensions: [5, 6]
+  kMaxValue
+};
+
+struct CompressParams {
+  // Only used for benchmarking (comparing vs libjpeg)
+  int jpeg_quality = 100;
+  bool jpeg_chroma_subsampling = false;
+  bool clear_metadata = false;
+
+  float butteraugli_distance = 1.0f;
+  size_t target_size = 0;
+  float target_bitrate = 0.0f;
+
+  // 0.0 means search for the adaptive quantization map that matches the
+  // butteraugli distance, positive values mean quantize everywhere with that
+  // value.
+  float uniform_quant = 0.0f;
+  float quant_border_bias = 0.0f;
+
+  // If true, will use a compression method that is reasonably fast and aims to
+  // find a trade-off between quality and file size that optimizes the
+  // quality-adjusted-bits-per-pixel metric.
+  bool fast_mode = false;
+  int max_butteraugli_iters = 11;
+
+  bool guetzli_mode = false;
+  int max_butteraugli_iters_guetzli_mode = 100;
+
+  bool lossless_mode = false;
+
+  Override noise = Override::kDefault;
+  Override gradient = Override::kDefault;
+
+  Override adaptive_reconstruction = Override::kDefault;
+  // Optional parameters for adaptive reconstruction.
+  Override epf_use_sharpened = Override::kDefault;
+  uint32_t epf_sigma = 0;  // 0 means adaptive
+
+  int gaborish = int(GaborishStrength::k750);
+
+  bool use_ac_strategy = false;
+
+  // Progressive mode.
+  bool progressive_mode = false;
+
+  // Progressive-mode saliency extractor.
+  // Empty string disables this feature.
+  std::string saliency_extractor_for_progressive_mode;
+  std::string xclbinPath;
+  // Every saliency-heatmap cell with saliency > threshold will be considered as
+  // 'salient'.
+  float saliency_threshold = 1.0f;
+  // Debug parameter: If true, drop non-salient AC part in progressive encoding.
+  bool saliency_debug_skip_nonsalient = false;
+
+  // Input and output file name. Will be used to provide pluggable saliency
+  // extractor with paths.
+  const char* file_in = nullptr;
+  const char* file_out = nullptr;
+
+  // Whether to keep temporary files (used e.g. to communicate with external
+  // saliency extractor).
+  bool keep_tempfiles = false;
+
+  // Prints extra information during/after encoding.
+  bool verbose = false;
+
+  // Multiplier for penalizing new HF artifacts more than blurring away
+  // features. 1.0=neutral.
+  float hf_asymmetry = 1.0f;
+
+  // Intended intensity target of the viewer after decoding, in nits (cd/m^2).
+  // There is no other way of knowing the target brightness - depends on source
+  // material. 709 typically targets 100 nits, 2020 PQ up to 10K, but HDR
+  // content is more typically mastered to 4K nits. The default requires no
+  // scaling for Butteraugli.
+  float intensity_target = kDefaultIntensityTarget;
+
+  // Enable new Lossless codec for DC. This flag exists only temporarily
+  // as long as both old and new implementation co-exist, and eventually
+  // only the new implementation should remain.
+  bool use_new_dc = false;
+
+  // Enable LF/HF predictions.
+  bool predict_lf = false;
+  bool predict_hf = false;
+
+  float GetIntensityMultiplier() const {
+    return intensity_target * kIntensityMultiplier;
+  }
+};
+
+struct DecompressParams {
+  uint64_t max_num_pixels = (1 << 30) - 1;
+  // If true, checks at the end of decoding that all of the compressed data
+  // was consumed by the decoder.
+  bool check_decompressed_size = true;
+
+  Override noise = Override::kDefault;     // cannot be kOn (needs encoder)
+  Override gradient = Override::kDefault;  // cannot be kOn (needs encoder)
+
+  Override adaptive_reconstruction = Override::kDefault;
+  // Optional parameters for adaptive reconstruction.
+  Override epf_use_sharpened = Override::kDefault;
+  uint32_t epf_sigma = 0;  // 0 means adaptive
+
+  int gaborish = -1;
+
+  // Enable new Lossless codec for DC. This flag exists only temporarily
+  // as long as both old and new implementation co-exist, and eventually
+  // only the new implementation should remain.
+  bool use_new_dc = false;
+
+  // How many passes to decode at most. By default, decode everything.
+  uint32_t max_passes = std::numeric_limits<uint32_t>::max();
+  // Alternatively, one can specify the maximum tolerable downscaling factor
+  // with respect to the full size of the image. By default, nothing less than
+  // the full size is requested.
+  size_t max_downsampling = 1;
+};
+
+// Enable features for distances >= these thresholds:
+static constexpr float kMinButteraugliForNoise = 99.0f;  // disabled
+static constexpr float kMinButteraugliForGradient = 99.0f;  // disabled
+static constexpr float kMinButteraugliForAdaptiveReconstruction = 0.0f;
+
+}  // namespace pik
+
+#endif  // PIK_PIK_PARAMS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/pikcommon.cmake b/codec/L2/demos/pikEnc/host/pik/pikcommon.cmake
new file mode 100755
index 0000000000..109476bb1e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/pikcommon.cmake
@@ -0,0 +1,297 @@
+# Copyright 2019 Google LLC
+#
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+add_library(pikcommon STATIC
+  ${CMAKE_CURRENT_LIST_DIR}/simd/targets.cc
+  ${CMAKE_CURRENT_LIST_DIR}/simd/targets.h
+  ${CMAKE_CURRENT_LIST_DIR}/ac_predictions.cc
+  ${CMAKE_CURRENT_LIST_DIR}/ac_predictions.h
+  ${CMAKE_CURRENT_LIST_DIR}/ac_strategy.cc
+  ${CMAKE_CURRENT_LIST_DIR}/ac_strategy.h
+  ${CMAKE_CURRENT_LIST_DIR}/adaptive_quantization.cc
+  ${CMAKE_CURRENT_LIST_DIR}/adaptive_quantization.h
+  ${CMAKE_CURRENT_LIST_DIR}/adaptive_reconstruction.cc
+  ${CMAKE_CURRENT_LIST_DIR}/adaptive_reconstruction.h
+  ${CMAKE_CURRENT_LIST_DIR}/adaptive_reconstruction_fwd.h
+  ${CMAKE_CURRENT_LIST_DIR}/alpha.cc
+  ${CMAKE_CURRENT_LIST_DIR}/alpha.h
+  ${CMAKE_CURRENT_LIST_DIR}/ans_common.cc
+  ${CMAKE_CURRENT_LIST_DIR}/ans_common.h
+  ${CMAKE_CURRENT_LIST_DIR}/ans_decode.cc
+  ${CMAKE_CURRENT_LIST_DIR}/ans_decode.h
+  ${CMAKE_CURRENT_LIST_DIR}/ans_encode.cc
+  ${CMAKE_CURRENT_LIST_DIR}/ans_encode.h
+  ${CMAKE_CURRENT_LIST_DIR}/ans_params.h
+  ${CMAKE_CURRENT_LIST_DIR}/approx_cube_root.h
+  ${CMAKE_CURRENT_LIST_DIR}/ar_control_field.cc
+  ${CMAKE_CURRENT_LIST_DIR}/ar_control_field.h
+  ${CMAKE_CURRENT_LIST_DIR}/arch_specific.cc
+  ${CMAKE_CURRENT_LIST_DIR}/arch_specific.h
+  ${CMAKE_CURRENT_LIST_DIR}/args.h
+  ${CMAKE_CURRENT_LIST_DIR}/bit_reader.h
+  ${CMAKE_CURRENT_LIST_DIR}/bits.h
+  ${CMAKE_CURRENT_LIST_DIR}/block.h
+  ${CMAKE_CURRENT_LIST_DIR}/block_dictionary.cc
+  ${CMAKE_CURRENT_LIST_DIR}/block_dictionary.h
+  ${CMAKE_CURRENT_LIST_DIR}/brotli.cc
+  ${CMAKE_CURRENT_LIST_DIR}/brotli.h
+  ${CMAKE_CURRENT_LIST_DIR}/butteraugli/butteraugli.cc
+  ${CMAKE_CURRENT_LIST_DIR}/butteraugli/butteraugli.h
+  ${CMAKE_CURRENT_LIST_DIR}/butteraugli_comparator.cc
+  ${CMAKE_CURRENT_LIST_DIR}/butteraugli_comparator.h
+  ${CMAKE_CURRENT_LIST_DIR}/butteraugli_distance.cc
+  ${CMAKE_CURRENT_LIST_DIR}/butteraugli_distance.h
+  ${CMAKE_CURRENT_LIST_DIR}/byte_order.h
+  ${CMAKE_CURRENT_LIST_DIR}/cache_aligned.cc
+  ${CMAKE_CURRENT_LIST_DIR}/cache_aligned.h
+  ${CMAKE_CURRENT_LIST_DIR}/cluster.h
+  ${CMAKE_CURRENT_LIST_DIR}/chroma_from_luma.cc
+  ${CMAKE_CURRENT_LIST_DIR}/chroma_from_luma.h
+  ${CMAKE_CURRENT_LIST_DIR}/chroma_from_luma_fwd.h
+  ${CMAKE_CURRENT_LIST_DIR}/codec.h
+  ${CMAKE_CURRENT_LIST_DIR}/codec_impl.cc
+  ${CMAKE_CURRENT_LIST_DIR}/codec_png.cc
+  ${CMAKE_CURRENT_LIST_DIR}/codec_png.h
+  ${CMAKE_CURRENT_LIST_DIR}/codec_pnm.cc
+  ${CMAKE_CURRENT_LIST_DIR}/codec_pnm.h
+  ${CMAKE_CURRENT_LIST_DIR}/color_correlation.cc
+  ${CMAKE_CURRENT_LIST_DIR}/color_correlation.h
+  ${CMAKE_CURRENT_LIST_DIR}/color_encoding.cc
+  ${CMAKE_CURRENT_LIST_DIR}/color_encoding.h
+  ${CMAKE_CURRENT_LIST_DIR}/color_management.cc
+  ${CMAKE_CURRENT_LIST_DIR}/color_management.h
+  ${CMAKE_CURRENT_LIST_DIR}/common.h
+  ${CMAKE_CURRENT_LIST_DIR}/compiler_specific.h
+  ${CMAKE_CURRENT_LIST_DIR}/compressed_dc.cc
+  ${CMAKE_CURRENT_LIST_DIR}/compressed_dc.h
+  ${CMAKE_CURRENT_LIST_DIR}/compressed_image.cc
+  ${CMAKE_CURRENT_LIST_DIR}/compressed_image.h
+  ${CMAKE_CURRENT_LIST_DIR}/compressed_image_fwd.h
+  ${CMAKE_CURRENT_LIST_DIR}/context_map_decode.cc
+  ${CMAKE_CURRENT_LIST_DIR}/context_map_decode.h
+  ${CMAKE_CURRENT_LIST_DIR}/context_map_encode.cc
+  ${CMAKE_CURRENT_LIST_DIR}/context_map_encode.h
+  ${CMAKE_CURRENT_LIST_DIR}/convolve.h
+  ${CMAKE_CURRENT_LIST_DIR}/data_parallel.cc
+  ${CMAKE_CURRENT_LIST_DIR}/data_parallel.h
+  ${CMAKE_CURRENT_LIST_DIR}/dc_predictor.cc
+  ${CMAKE_CURRENT_LIST_DIR}/dc_predictor.h
+  ${CMAKE_CURRENT_LIST_DIR}/dct.cc
+  ${CMAKE_CURRENT_LIST_DIR}/dct.h
+  ${CMAKE_CURRENT_LIST_DIR}/dct_simd_4.h
+  ${CMAKE_CURRENT_LIST_DIR}/dct_simd_8.h
+  ${CMAKE_CURRENT_LIST_DIR}/dct_simd_any.h
+  ${CMAKE_CURRENT_LIST_DIR}/dct_util.cc
+  ${CMAKE_CURRENT_LIST_DIR}/dct_util.h
+  ${CMAKE_CURRENT_LIST_DIR}/deconvolve.cc
+  ${CMAKE_CURRENT_LIST_DIR}/deconvolve.h
+  ${CMAKE_CURRENT_LIST_DIR}/descriptive_statistics.cc
+  ${CMAKE_CURRENT_LIST_DIR}/descriptive_statistics.h
+  ${CMAKE_CURRENT_LIST_DIR}/detect_dots.cc
+  ${CMAKE_CURRENT_LIST_DIR}/detect_dots.h
+  ${CMAKE_CURRENT_LIST_DIR}/entropy_coder.cc
+  ${CMAKE_CURRENT_LIST_DIR}/entropy_coder.h
+  ${CMAKE_CURRENT_LIST_DIR}/epf.cc
+  ${CMAKE_CURRENT_LIST_DIR}/epf.h
+  ${CMAKE_CURRENT_LIST_DIR}/bilinear_transform.cc
+  ${CMAKE_CURRENT_LIST_DIR}/bilinear_transform.h
+  ${CMAKE_CURRENT_LIST_DIR}/epf_stats.h
+  ${CMAKE_CURRENT_LIST_DIR}/epf_target.cc
+  ${CMAKE_CURRENT_LIST_DIR}/external_image.cc
+  ${CMAKE_CURRENT_LIST_DIR}/external_image.h
+  ${CMAKE_CURRENT_LIST_DIR}/fast_log.h
+  ${CMAKE_CURRENT_LIST_DIR}/field_encodings.h
+  ${CMAKE_CURRENT_LIST_DIR}/fields.h
+  ${CMAKE_CURRENT_LIST_DIR}/file_io.h
+  ${CMAKE_CURRENT_LIST_DIR}/gaborish.cc
+  ${CMAKE_CURRENT_LIST_DIR}/gaborish.h
+  ${CMAKE_CURRENT_LIST_DIR}/gamma_correct.h
+  ${CMAKE_CURRENT_LIST_DIR}/gauss_blur.cc
+  ${CMAKE_CURRENT_LIST_DIR}/gauss_blur.h
+  ${CMAKE_CURRENT_LIST_DIR}/gradient_map.cc
+  ${CMAKE_CURRENT_LIST_DIR}/gradient_map.h
+  ${CMAKE_CURRENT_LIST_DIR}/headers.cc
+  ${CMAKE_CURRENT_LIST_DIR}/headers.h
+  ${CMAKE_CURRENT_LIST_DIR}/huffman_decode.cc
+  ${CMAKE_CURRENT_LIST_DIR}/huffman_decode.h
+  ${CMAKE_CURRENT_LIST_DIR}/huffman_encode.cc
+  ${CMAKE_CURRENT_LIST_DIR}/huffman_encode.h
+  ${CMAKE_CURRENT_LIST_DIR}/image.cc
+  ${CMAKE_CURRENT_LIST_DIR}/image.h
+  ${CMAKE_CURRENT_LIST_DIR}/image_ops.h
+  ${CMAKE_CURRENT_LIST_DIR}/lehmer_code.cc
+  ${CMAKE_CURRENT_LIST_DIR}/lehmer_code.h
+  ${CMAKE_CURRENT_LIST_DIR}/linalg.cc
+  ${CMAKE_CURRENT_LIST_DIR}/linalg.h
+  ${CMAKE_CURRENT_LIST_DIR}/lossless16.cc
+  ${CMAKE_CURRENT_LIST_DIR}/lossless16.h
+  ${CMAKE_CURRENT_LIST_DIR}/lossless8.cc
+  ${CMAKE_CURRENT_LIST_DIR}/lossless8.h
+  ${CMAKE_CURRENT_LIST_DIR}/lossless_entropy.cc
+  ${CMAKE_CURRENT_LIST_DIR}/lossless_entropy.h
+  ${CMAKE_CURRENT_LIST_DIR}/metadata.cc
+  ${CMAKE_CURRENT_LIST_DIR}/metadata.h
+  ${CMAKE_CURRENT_LIST_DIR}/multipass_handler.h
+  ${CMAKE_CURRENT_LIST_DIR}/noise.cc
+  ${CMAKE_CURRENT_LIST_DIR}/noise.h
+  ${CMAKE_CURRENT_LIST_DIR}/opsin_image.cc
+  ${CMAKE_CURRENT_LIST_DIR}/opsin_image.h
+  ${CMAKE_CURRENT_LIST_DIR}/opsin_inverse.cc
+  ${CMAKE_CURRENT_LIST_DIR}/opsin_inverse.h
+  ${CMAKE_CURRENT_LIST_DIR}/opsin_params.cc
+  ${CMAKE_CURRENT_LIST_DIR}/opsin_params.h
+  ${CMAKE_CURRENT_LIST_DIR}/optimize.h
+  ${CMAKE_CURRENT_LIST_DIR}/os_specific.cc
+  ${CMAKE_CURRENT_LIST_DIR}/os_specific.h
+  ${CMAKE_CURRENT_LIST_DIR}/padded_bytes.cc
+  ${CMAKE_CURRENT_LIST_DIR}/padded_bytes.h
+  ${CMAKE_CURRENT_LIST_DIR}/pik.cc
+  ${CMAKE_CURRENT_LIST_DIR}/pik.h
+  ${CMAKE_CURRENT_LIST_DIR}/pik_info.cc
+  ${CMAKE_CURRENT_LIST_DIR}/pik_info.h
+  ${CMAKE_CURRENT_LIST_DIR}/pik_params.h
+  ${CMAKE_CURRENT_LIST_DIR}/pik_frame.cc
+  ${CMAKE_CURRENT_LIST_DIR}/pik_frame.h
+  ${CMAKE_CURRENT_LIST_DIR}/profiler.h
+  ${CMAKE_CURRENT_LIST_DIR}/quantizer.cc
+  ${CMAKE_CURRENT_LIST_DIR}/quantizer.h
+  ${CMAKE_CURRENT_LIST_DIR}/quant_weights.cc
+  ${CMAKE_CURRENT_LIST_DIR}/quant_weights.h
+  ${CMAKE_CURRENT_LIST_DIR}/rational_polynomial.h
+  ${CMAKE_CURRENT_LIST_DIR}/resample.h
+  ${CMAKE_CURRENT_LIST_DIR}/resize.h
+  ${CMAKE_CURRENT_LIST_DIR}/saliency_map.cc
+  ${CMAKE_CURRENT_LIST_DIR}/saliency_map.h
+  ${CMAKE_CURRENT_LIST_DIR}/single_image_handler.cc
+  ${CMAKE_CURRENT_LIST_DIR}/single_image_handler.h
+  ${CMAKE_CURRENT_LIST_DIR}/size_coder.h
+  ${CMAKE_CURRENT_LIST_DIR}/status.cc
+  ${CMAKE_CURRENT_LIST_DIR}/status.h
+  ${CMAKE_CURRENT_LIST_DIR}/tsc_timer.h
+  ${CMAKE_CURRENT_LIST_DIR}/upscaler.cc
+  ${CMAKE_CURRENT_LIST_DIR}/upscaler.h
+  ${CMAKE_CURRENT_LIST_DIR}/write_bits.h
+  ${CMAKE_CURRENT_LIST_DIR}/yuv_convert.cc
+  ${CMAKE_CURRENT_LIST_DIR}/yuv_convert.h
+  ${CMAKE_CURRENT_LIST_DIR}/yuv_opsin_convert.cc
+  ${CMAKE_CURRENT_LIST_DIR}/yuv_opsin_convert.h
+)
+
+target_compile_options(pikcommon PUBLIC
+  # Debug flags
+  -dwarf-column-info
+  -debug-info-kind=line-tables-only
+  -dwarf-version=4
+  -debugger-tuning=gdb
+
+  # F_FLAGS
+  -fmerge-all-constants
+  -fno-builtin-fwrite
+  -fno-builtin-fread
+  -fno-signed-char
+  -fsized-deallocation
+  -fnew-alignment=8
+  -fno-cxx-exceptions
+  -fno-exceptions
+  -fno-slp-vectorize
+  -fno-vectorize
+
+  # WARN_FLAGS
+  -Wformat-security
+  -Wno-char-subscripts
+  -Wno-error=deprecated-declarations
+  -Wno-sign-compare
+  -Wno-strict-overflow
+  -Wno-unused-function
+  -Wthread-safety-analysis
+  -Wno-unknown-warning-option
+  -Wno-unused-command-line-argument
+  -Wno-ignored-optimization-argument
+  -Wno-ambiguous-member-template
+  -Wno-pointer-sign
+  -Wno-address-of-packed-member
+  -Wno-enum-compare-switch
+  -Wno-expansion-to-defined
+  -Wno-extern-c-compat
+  -Wno-gnu-alignof-expression
+  -Wno-gnu-designator
+  -Wno-gnu-variable-sized-type-not-at-end
+  -Wno-ignored-attributes
+  -Wno-ignored-qualifiers
+  -Wno-inconsistent-missing-override
+  -Wno-invalid-source-encoding
+  -Wno-mismatched-tags
+  -Wno-potentially-evaluated-expression
+  -Wno-return-std-move
+  -Wno-self-assign-overloaded
+  -Wno-tautological-constant-compare
+  -Wno-tautological-constant-in-range-compare
+  -Wno-tautological-type-limit-compare
+  -Wno-tautological-undefined-compare
+  -Wno-tautological-unsigned-zero-compare
+  -Wno-tautological-unsigned-enum-zero-compare
+  -Wno-undefined-func-template
+  -Wno-unknown-pragmas
+  -Wno-unused-const-variable
+  -Wno-unused-lambda-capture
+  -Wno-unused-local-typedef
+  -Wno-unused-private-field
+  -Wno-private-header
+  -Wfloat-overflow-conversion
+  -Wfloat-zero-conversion
+  -Wfor-loop-analysis
+  -Wgnu-redeclared-enum
+  -Wimplicit-fallthrough
+  -Winfinite-recursion
+  -Wliteral-conversion
+  -Wself-assign
+  -Wstring-conversion
+  -Wtautological-overlap-compare
+  -Wunused-comparison
+  -Wvla
+  -Wno-reserved-user-defined-literal
+  -Wno-return-type-c-linkage
+  -Wno-deprecated
+  -Wno-invalid-offsetof
+  -Wno-literal-suffix
+  -Woverloaded-virtual
+  -Wnon-virtual-dtor
+  -Wdeprecated-increment-bool
+  -Wc++11-compat
+  -Wno-c++11-compat-binary-literal
+  -Wc++2a-extensions
+  -Wno-register
+  -Wno-dynamic-exception-spec
+  -Wprivate-header
+  -Wno-builtin-macro-redefined
+
+  # Machine flags
+  # We don't add -pthread here since it is added automatically when depending on
+  # Threads::Threads.
+  -mthread-model posix
+
+  # Language flags
+  -disable-free
+  -disable-llvm-verifier
+  -discard-value-names
+  # Note: this works only because this is the only -Xclang passed.
+  -Xclang -relaxed-aliasing
+  -fmath-errno
+)
+
+target_include_directories(pikcommon
+    PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
+
+target_link_libraries(pikcommon PRIVATE
+  brotlicommon-static
+  brotlienc-static
+  brotlidec-static
+  fse
+  lodepng
+  lcms2
+  Threads::Threads
+  "${CMAKE_DL_LIBS}"
+)
diff --git a/codec/L2/demos/pikEnc/host/pik/piktests.cmake b/codec/L2/demos/pikEnc/host/pik/piktests.cmake
new file mode 100755
index 0000000000..f46f61e2e4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/piktests.cmake
@@ -0,0 +1,58 @@
+# Copyright 2019 Google LLC
+#
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+set(PIK_TESTS
+  ac_predictions_test
+  adaptive_reconstruction_test
+  ans_encode_test
+  ans_test
+  approx_cube_root_test
+  bit_reader_test
+  bits_test
+  brotli_test
+  byte_order_test
+  chroma_from_luma_test
+  codec_impl_test
+  color_encoding_test
+  color_management_test
+  compressed_image_test
+  convolve_test
+  data_parallel_test
+  dc_predictor_test
+  dct_test
+  dct_util_test
+  deconvolve_test
+  descriptive_statistics_test
+  entropy_coder_test
+  epf_test
+  external_image_test
+  fields_test
+  gaborish_test
+  gamma_correct_test
+  gradient_test
+  headers_test
+  image_test
+  linalg_test
+  lossless8_test
+  lossless16_test
+  opsin_image_test
+  opsin_inverse_test
+  optimize_test
+  padded_bytes_test
+  pik_test
+  quantizer_test
+  rational_polynomial_test
+  resample_test
+  robust_statistics_test
+  yuv_convert_test
+  yuv_opsin_convert_test
+)
+foreach (TEST IN LISTS PIK_TESTS)
+  add_executable("${TEST}" "${CMAKE_CURRENT_LIST_DIR}/${TEST}.cc")
+  target_compile_definitions("${TEST}" PRIVATE -DTEST_DATA_PATH="${CMAKE_CURRENT_SOURCE_DIR}/third_party/testdata")
+  target_link_libraries("${TEST}" pikcommon gmock gtest gtest_main)
+  gtest_add_tests(TARGET "${TEST}")
+endforeach ()
diff --git a/codec/L2/demos/pikEnc/host/pik/profiler.h b/codec/L2/demos/pikEnc/host/pik/profiler.h
new file mode 100755
index 0000000000..e8ca45fe1d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/profiler.h
@@ -0,0 +1,715 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_PROFILER_H_
+#define PIK_PROFILER_H_
+
+// High precision, low overhead time measurements. Returns exact call counts and
+// total elapsed time for user-defined 'zones' (code regions, i.e. C++ scopes).
+//
+// Usage: add this header to BUILD srcs; instrument regions of interest:
+// { PROFILER_ZONE("name"); /*code*/ } or
+// void FuncToMeasure() { PROFILER_FUNC; /*code*/ }.
+// After all threads have exited any zones, invoke PROFILER_PRINT_RESULTS() to
+// print call counts and average durations [CPU cycles] to stdout, sorted in
+// descending order of total duration.
+
+// Configuration settings:
+
+// If zero, this file has no effect and no measurements will be recorded.
+#ifndef PROFILER_ENABLED
+#define PROFILER_ENABLED 0
+#endif
+
+// How many mebibytes to allocate (if PROFILER_ENABLED) per thread that
+// enters at least one zone. Once this buffer is full, the thread will analyze
+// and discard packets, thus temporarily adding some observer overhead.
+// Each zone occupies 16 bytes.
+#ifndef PROFILER_THREAD_STORAGE
+#define PROFILER_THREAD_STORAGE 16ULL
+#endif
+
+// Temporarily disabled.
+#if PROFILER_ENABLED && 0
+
+#define PROFILER_PRINT_OVERHEAD 0
+
+#include <algorithm>  // min/max
+#include <atomic>
+#include <cassert>
+#include <cinttypes>  // PRIu64
+#include <cstddef>    // ptrdiff_t
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>  // memcpy
+#include <new>
+
+#include "pik/arch_specific.h"
+#include "pik/cache_aligned.h"
+#include "pik/compiler_specific.h"
+#include "pik/robust_statistics.h"
+#include "pik/status.h"
+#include "pik/tsc_timer.h"
+
+// Non-portable aspects:
+// - SSE2 128-bit load/store (write-combining, UpdateOrAdd)
+// - RDTSCP timestamps (serializing, high-resolution)
+// - assumes string literals are stored within an 8 MiB range
+// - compiler-specific annotations (restrict, alignment, fences)
+#if PIK_ARCH_X64
+#include <emmintrin.h>
+#if PIK_COMPILER_MSVC
+#include <intrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#endif
+
+namespace pik {
+
+// Upper bounds for various fixed-size data structures (guarded via PIK_ASSERT):
+
+// How many unique threads can enter a zone (those that don't do not count).
+// Memory use is about kMaxThreads * PROFILER_THREAD_STORAGE MiB.
+// WARNING: fiber libraries and multiple ThreadPool can spawn >100 threads.
+static constexpr size_t kMaxThreads = 1024;
+
+// Maximum nesting of zones.
+static constexpr size_t kMaxDepth = 64;
+
+// Total number of zones.
+static constexpr size_t kMaxZones = 256;
+
+// Represents zone entry/exit events. Stores a full-resolution timestamp plus
+// an offset (representing zone name or identifying exit packets). POD.
+class Packet {
+ public:
+  // If offsets do not fit, UpdateOrAdd will overrun our heap allocation
+  // (governed by kMaxZones). We have seen ~100 MiB static binaries.
+  static constexpr size_t kOffsetBits = 27;
+  static constexpr uint64_t kOffsetBias = 1ULL << (kOffsetBits - 1);
+
+  // We need full-resolution timestamps; at an effective rate of 4 GHz,
+  // this permits 34 second zone durations (for longer durations, split into
+  // multiple zones). Wraparound is handled by masking.
+  static constexpr size_t kTimestampBits = 64 - kOffsetBits;
+  static constexpr uint64_t kTimestampMask = (1ULL << kTimestampBits) - 1;
+
+  static Packet Make(const size_t biased_offset, const uint64_t timestamp) {
+    PIK_ASSERT(biased_offset < (1ULL << kOffsetBits));
+
+    Packet packet;
+    packet.bits_ =
+        (biased_offset << kTimestampBits) + (timestamp & kTimestampMask);
+    return packet;
+  }
+
+  uint64_t Timestamp() const { return bits_ & kTimestampMask; }
+
+  size_t BiasedOffset() const { return (bits_ >> kTimestampBits); }
+
+ private:
+  uint64_t bits_;
+};
+static_assert(sizeof(Packet) == 8, "Wrong Packet size");
+
+// Returns the address of a string literal. Assuming zone names are also
+// literals and stored nearby, we can represent them as offsets, which are
+// faster to compute than hashes or even a static index.
+//
+// This function must not be static - each call (even from other translation
+// units) must return the same value.
+inline const char* StringOrigin() {
+  // Chosen such that no zone name is a prefix nor suffix of this string
+  // to ensure they aren't merged (offset 0 identifies zone-exit packets).
+  static const char* string_origin = "__#Origin#__";
+
+  return string_origin - Packet::kOffsetBias;
+}
+
+// Representation of an active zone, stored in a stack. Used to deduct
+// child duration from the parent's self time. POD.
+struct ProfilerNode {
+  Packet packet;
+  uint64_t child_total;
+};
+
+// Holds statistics for all zones with the same name. POD.
+struct Accumulator {
+  static constexpr size_t kNumCallBits = 64 - Packet::kOffsetBits;
+
+  uint64_t BiasedOffset() const { return num_calls >> kNumCallBits; }
+  uint64_t NumCalls() const { return num_calls & ((1ULL << kNumCallBits) - 1); }
+
+  // UpdateOrAdd relies upon this layout.
+  uint64_t num_calls = 0;  // upper bits = biased_offset.
+  uint64_t total_duration = 0;
+};
+#if PIK_ARCH_X64
+static_assert(sizeof(Accumulator) == sizeof(__m128i), "Wrong Accumulator size");
+#endif
+
+template <typename T>
+inline T ClampedSubtract(const T minuend, const T subtrahend) {
+  if (subtrahend > minuend) {
+    return 0;
+  }
+  return minuend - subtrahend;
+}
+
+// Per-thread call graph (stack) and Accumulator for each zone.
+class Results {
+ public:
+  Results() {
+    // Zero-initialize first accumulator to avoid a check for num_zones_ == 0.
+    memset(zones_, 0, sizeof(Accumulator));
+  }
+
+  // Used for computing overhead when this thread encounters its first Zone.
+  // This has no observable effect apart from increasing "analyze_elapsed_".
+  uint64_t ZoneDuration(const Packet* packets) {
+    PIK_CHECK(depth_ == 0);
+    PIK_CHECK(num_zones_ == 0);
+    AnalyzePackets(packets, 2);
+    const uint64_t duration = zones_[0].total_duration;
+    zones_[0].num_calls = 0;
+    zones_[0].total_duration = 0;
+    PIK_CHECK(depth_ == 0);
+    num_zones_ = 0;
+    return duration;
+  }
+
+  void SetSelfOverhead(const uint64_t self_overhead) {
+    self_overhead_ = self_overhead;
+  }
+
+  void SetChildOverhead(const uint64_t child_overhead) {
+    child_overhead_ = child_overhead;
+  }
+
+  // Draw all required information from the packets, which can be discarded
+  // afterwards. Called whenever this thread's storage is full.
+  void AnalyzePackets(const Packet* packets, const size_t num_packets) {
+    const uint64_t t0 = TicksBefore();
+
+    for (size_t i = 0; i < num_packets; ++i) {
+      const Packet p = packets[i];
+      // Entering a zone
+      if (p.BiasedOffset() != Packet::kOffsetBias) {
+        PIK_ASSERT(depth_ < kMaxDepth);
+        nodes_[depth_].packet = p;
+        nodes_[depth_].child_total = 0;
+        ++depth_;
+        continue;
+      }
+
+      PIK_ASSERT(depth_ != 0);
+      const ProfilerNode& node = nodes_[depth_ - 1];
+      // Masking correctly handles unsigned wraparound.
+      const uint64_t duration =
+          (p.Timestamp() - node.packet.Timestamp()) & Packet::kTimestampMask;
+      const uint64_t self_duration = ClampedSubtract(
+          duration, self_overhead_ + child_overhead_ + node.child_total);
+
+      UpdateOrAdd(node.packet.BiasedOffset(), 1, self_duration);
+      --depth_;
+
+      // Deduct this nested node's time from its parent's self_duration.
+      if (depth_ != 0) {
+        nodes_[depth_ - 1].child_total += duration + child_overhead_;
+      }
+    }
+
+    const uint64_t t1 = TicksAfter();
+    analyze_elapsed_ += t1 - t0;
+  }
+
+  // Incorporates results from another thread. Call after all threads have
+  // exited any zones.
+  void Assimilate(const Results& other) {
+    const uint64_t t0 = TicksBefore();
+    PIK_ASSERT(depth_ == 0);
+    PIK_ASSERT(other.depth_ == 0);
+
+    for (size_t i = 0; i < other.num_zones_; ++i) {
+      const Accumulator& zone = other.zones_[i];
+      UpdateOrAdd(zone.BiasedOffset(), zone.NumCalls(), zone.total_duration);
+    }
+    const uint64_t t1 = TicksAfter();
+    analyze_elapsed_ += t1 - t0 + other.analyze_elapsed_;
+  }
+
+  // Single-threaded.
+  void Print() {
+    const uint64_t t0 = TicksBefore();
+    MergeDuplicates();
+
+    // Sort by decreasing total (self) cost.
+    std::sort(zones_, zones_ + num_zones_,
+              [](const Accumulator& r1, const Accumulator& r2) {
+                return r1.total_duration > r2.total_duration;
+              });
+
+    const char* string_origin = StringOrigin();
+    uint64_t total_visible_duration = 0;
+    for (size_t i = 0; i < num_zones_; ++i) {
+      const Accumulator& r = zones_[i];
+      const uint64_t num_calls = r.NumCalls();
+      const char* name = string_origin + r.BiasedOffset();
+      if (name[0] != '@') {
+        total_visible_duration += r.total_duration;
+        printf("%-40s: %10" PRIu64 " x %15" PRIu64 "= %15" PRIu64 "\n", name,
+               num_calls, r.total_duration / num_calls, r.total_duration);
+      }
+    }
+
+    const uint64_t t1 = TicksAfter();
+    analyze_elapsed_ += t1 - t0;
+    printf("Total clocks during analysis: %" PRIu64 "\n", analyze_elapsed_);
+    printf("Total clocks measured: %" PRIu64 "\n", total_visible_duration);
+  }
+
+  // Single-threaded. Clears all results as if no zones had been recorded.
+  void Reset() {
+    analyze_elapsed_ = 0;
+    PIK_CHECK(depth_ == 0);
+    num_zones_ = 0;
+    memset(nodes_, 0, sizeof(nodes_));
+    memset(zones_, 0, sizeof(zones_));
+  }
+
+ private:
+#if PIK_ARCH_X64
+  static bool SameOffset(const __m128i zone, const size_t biased_offset) {
+    const uint64_t num_calls = _mm_cvtsi128_si64(zone);
+    return (num_calls >> Accumulator::kNumCallBits) == biased_offset;
+  }
+#endif
+
+  // Updates an existing Accumulator (uniquely identified by biased_offset) or
+  // adds one if this is the first time this thread analyzed that zone.
+  // Uses a self-organizing list data structure, which avoids dynamic memory
+  // allocations and is far faster than unordered_map. Loads, updates and
+  // stores the entire Accumulator with vector instructions.
+  void UpdateOrAdd(const size_t biased_offset, const uint64_t num_calls,
+                   const uint64_t duration) {
+    PIK_ASSERT(biased_offset < (1ULL << Packet::kOffsetBits));
+
+#if PIK_ARCH_X64
+    const __m128i num_calls_64 = _mm_cvtsi64_si128(num_calls);
+    const __m128i duration_64 = _mm_cvtsi64_si128(duration);
+    const __m128i add_duration_call =
+        _mm_unpacklo_epi64(num_calls_64, duration_64);
+
+    __m128i* const PIK_RESTRICT zones = reinterpret_cast<__m128i*>(zones_);
+
+    // Special case for first zone: (maybe) update, without swapping.
+    __m128i prev = _mm_load_si128(zones);
+    if (SameOffset(prev, biased_offset)) {
+      prev = _mm_add_epi64(prev, add_duration_call);
+      PIK_ASSERT(SameOffset(prev, biased_offset));
+      _mm_store_si128(zones, prev);
+      return;
+    }
+
+    // Look for a zone with the same offset.
+    for (size_t i = 1; i < num_zones_; ++i) {
+      __m128i zone = _mm_load_si128(zones + i);
+      if (SameOffset(zone, biased_offset)) {
+        zone = _mm_add_epi64(zone, add_duration_call);
+        PIK_ASSERT(SameOffset(zone, biased_offset));
+        // Swap with predecessor (more conservative than move to front,
+        // but at least as successful).
+        _mm_store_si128(zones + i - 1, zone);
+        _mm_store_si128(zones + i, prev);
+        return;
+      }
+      prev = zone;
+    }
+
+    // Not found; create a new Accumulator.
+    const __m128i biased_offset_64 = _mm_slli_epi64(
+        _mm_cvtsi64_si128(biased_offset), Accumulator::kNumCallBits);
+    const __m128i zone = _mm_add_epi64(biased_offset_64, add_duration_call);
+    PIK_ASSERT(SameOffset(zone, biased_offset));
+
+    PIK_ASSERT(num_zones_ < kMaxZones);
+    _mm_store_si128(zones + num_zones_, zone);
+    ++num_zones_;
+#else
+    // Special case for first zone: (maybe) update, without swapping.
+    if (zones_[0].BiasedOffset() == biased_offset) {
+      zones_[0].total_duration += duration;
+      zones_[0].num_calls += num_calls;
+      PIK_ASSERT(zones_[0].BiasedOffset() == biased_offset);
+      return;
+    }
+
+    // Look for a zone with the same offset.
+    for (size_t i = 1; i < num_zones_; ++i) {
+      if (zones_[i].BiasedOffset() == biased_offset) {
+        zones_[i].total_duration += duration;
+        zones_[i].num_calls += num_calls;
+        PIK_ASSERT(zones_[i].BiasedOffset() == biased_offset);
+        // Swap with predecessor (more conservative than move to front,
+        // but at least as successful).
+        const Accumulator prev = zones_[i - 1];
+        zones_[i - 1] = zones_[i];
+        zones_[i] = prev;
+        return;
+      }
+    }
+
+    // Not found; create a new Accumulator.
+    PIK_ASSERT(num_zones_ < kMaxZones);
+    Accumulator* PIK_RESTRICT zone = zones_ + num_zones_;
+    zone->num_calls = (biased_offset << Accumulator::kNumCallBits) + num_calls;
+    zone->total_duration = duration;
+    PIK_ASSERT(zone->BiasedOffset() == biased_offset);
+    ++num_zones_;
+#endif
+  }
+
+  // Each instantiation of a function template seems to get its own copy of
+  // __func__ and GCC doesn't merge them. An N^2 search for duplicates is
+  // acceptable because we only expect a few dozen zones.
+  void MergeDuplicates() {
+    const char* string_origin = StringOrigin();
+    for (size_t i = 0; i < num_zones_; ++i) {
+      const size_t biased_offset = zones_[i].BiasedOffset();
+      const char* name = string_origin + biased_offset;
+      // Separate num_calls from biased_offset so we can add them together.
+      uint64_t num_calls = zones_[i].NumCalls();
+
+      // Add any subsequent duplicates to num_calls and total_duration.
+      for (size_t j = i + 1; j < num_zones_;) {
+        if (!strcmp(name, string_origin + zones_[j].BiasedOffset())) {
+          num_calls += zones_[j].NumCalls();
+          zones_[i].total_duration += zones_[j].total_duration;
+          // Fill hole with last item.
+          zones_[j] = zones_[--num_zones_];
+        } else {  // Name differed, try next Accumulator.
+          ++j;
+        }
+      }
+
+      PIK_ASSERT(num_calls < (1ULL << Accumulator::kNumCallBits));
+
+      // Re-pack regardless of whether any duplicates were found.
+      zones_[i].num_calls =
+          (biased_offset << Accumulator::kNumCallBits) + num_calls;
+    }
+  }
+
+  uint64_t analyze_elapsed_ = 0;
+  uint64_t self_overhead_ = 0;
+  uint64_t child_overhead_ = 0;
+
+  size_t depth_ = 0;      // Number of active zones.
+  size_t num_zones_ = 0;  // Number of retired zones.
+
+  alignas(64) ProfilerNode nodes_[kMaxDepth];  // Stack
+  alignas(64) Accumulator zones_[kMaxZones];   // Self-organizing list
+};
+
+// Per-thread packet storage, allocated via CacheAligned.
+class ThreadSpecific {
+  static constexpr size_t kBufferCapacity =
+      CacheAligned::kCacheLineSize / sizeof(Packet);
+
+ public:
+  // "name" is used to sanity-check offsets fit in kOffsetBits.
+  explicit ThreadSpecific(const char* name)
+      : packets_(static_cast<Packet*>(
+            CacheAligned::Allocate(PROFILER_THREAD_STORAGE << 20))),
+        num_packets_(0),
+        max_packets_(PROFILER_THREAD_STORAGE << 17),
+        string_origin_(StringOrigin()) {
+    // Even in optimized builds (with NDEBUG), verify that this zone's name
+    // offset fits within the allotted space. If not, UpdateOrAdd is likely to
+    // overrun zones_[]. We also PIK_ASSERT(), but users often do not run debug
+    // builds. Checking here on the cold path (only reached once per thread)
+    // is cheap, but it only covers one zone.
+    const size_t biased_offset = name - string_origin_;
+    PIK_CHECK(biased_offset <= (1ULL << Packet::kOffsetBits));
+  }
+
+  ~ThreadSpecific() { CacheAligned::Free(packets_); }
+
+  // Depends on Zone => defined below.
+  void ComputeOverhead();
+
+  void WriteEntry(const char* name, const uint64_t timestamp) {
+    const size_t biased_offset = name - string_origin_;
+    Write(Packet::Make(biased_offset, timestamp));
+  }
+
+  void WriteExit(const uint64_t timestamp) {
+    const size_t biased_offset = Packet::kOffsetBias;
+    Write(Packet::Make(biased_offset, timestamp));
+  }
+
+  void AnalyzeRemainingPackets() {
+#if PIK_ARCH_X64
+    // Ensures prior weakly-ordered streaming stores are globally visible.
+    _mm_sfence();
+
+    // Storage full => empty it.
+    if (num_packets_ + buffer_size_ > max_packets_) {
+      results_.AnalyzePackets(packets_, num_packets_);
+      num_packets_ = 0;
+    }
+    memcpy(packets_ + num_packets_, buffer_, buffer_size_ * sizeof(Packet));
+    num_packets_ += buffer_size_;
+    buffer_size_ = 0;
+#endif
+
+    results_.AnalyzePackets(packets_, num_packets_);
+    num_packets_ = 0;
+  }
+
+  Results& GetResults() { return results_; }
+
+ private:
+  // Write packet to buffer/storage, emptying them as needed.
+  void Write(const Packet packet) {
+#if PIK_ARCH_X64
+    // Buffer full => copy to storage.
+    if (buffer_size_ == kBufferCapacity) {
+      // Storage full => empty it.
+      if (num_packets_ + kBufferCapacity > max_packets_) {
+        results_.AnalyzePackets(packets_, num_packets_);
+        num_packets_ = 0;
+      }
+      // This buffering halves observer overhead and decreases the overall
+      // runtime by about 3%.
+      CacheAligned::StreamCacheLine(buffer_, packets_ + num_packets_);
+      num_packets_ += kBufferCapacity;
+      buffer_size_ = 0;
+    }
+    buffer_[buffer_size_] = packet;
+    ++buffer_size_;
+#else
+    // Write directly to storage.
+    if (num_packets_ >= max_packets_) {
+      results_.AnalyzePackets(packets_, num_packets_);
+      num_packets_ = 0;
+    }
+    packets_[num_packets_] = packet;
+    ++num_packets_;
+#endif
+  }
+
+  // Write-combining buffer to avoid cache pollution. Must be the first
+  // non-static member to ensure cache-line alignment.
+#if PIK_ARCH_X64
+  Packet buffer_[kBufferCapacity];
+  size_t buffer_size_ = 0;
+#endif
+
+  // Contiguous storage for zone enter/exit packets.
+  Packet* const PIK_RESTRICT packets_;
+  size_t num_packets_;
+  const size_t max_packets_;
+  // Cached here because we already read this cache line on zone entry/exit.
+  const char* PIK_RESTRICT string_origin_;
+  Results results_;
+};
+
+class ThreadList {
+ public:
+  // Thread-safe.
+  void Add(ThreadSpecific* const ts) {
+    const uint32_t index = num_threads_.fetch_add(1, std::memory_order_relaxed);
+    PIK_CHECK(index < kMaxThreads);
+    threads_[index] = ts;
+  }
+
+  // Single-threaded.
+  void PrintResults() {
+    const uint32_t num_threads = num_threads_.load(std::memory_order_relaxed);
+    for (uint32_t i = 0; i < num_threads; ++i) {
+      threads_[i]->AnalyzeRemainingPackets();
+    }
+
+    // Combine all threads into a single Result.
+    for (uint32_t i = 1; i < num_threads; ++i) {
+      threads_[0]->GetResults().Assimilate(threads_[i]->GetResults());
+    }
+
+    if (num_threads != 0) {
+      threads_[0]->GetResults().Print();
+
+      for (uint32_t i = 0; i < num_threads; ++i) {
+        threads_[i]->GetResults().Reset();
+      }
+    }
+  }
+
+ private:
+  // Owning pointers.
+  alignas(64) ThreadSpecific* threads_[kMaxThreads];
+  std::atomic<uint32_t> num_threads_{0};
+};
+
+// RAII zone enter/exit recorder constructed by the ZONE macro; also
+// responsible for initializing ThreadSpecific.
+class Zone {
+ public:
+  // "name" must be a string literal (see StringOrigin).
+  PIK_NOINLINE explicit Zone(const char* name) {
+    PIK_COMPILER_FENCE;
+    ThreadSpecific* PIK_RESTRICT thread_specific = StaticThreadSpecific();
+    if (PIK_UNLIKELY(thread_specific == nullptr)) {
+      void* mem = CacheAligned::Allocate(sizeof(ThreadSpecific));
+      thread_specific = new (mem) ThreadSpecific(name);
+      // Must happen before ComputeOverhead, which re-enters this ctor.
+      Threads().Add(thread_specific);
+      StaticThreadSpecific() = thread_specific;
+      thread_specific->ComputeOverhead();
+    }
+
+    // (Capture timestamp ASAP, not inside WriteEntry.)
+    PIK_COMPILER_FENCE;
+    const uint64_t timestamp = TicksBefore();
+    thread_specific->WriteEntry(name, timestamp);
+  }
+
+  PIK_NOINLINE ~Zone() {
+    PIK_COMPILER_FENCE;
+    const uint64_t timestamp = TicksAfter();
+    StaticThreadSpecific()->WriteExit(timestamp);
+    PIK_COMPILER_FENCE;
+  }
+
+  // Call exactly once after all threads have exited all zones.
+  static void PrintResults() { Threads().PrintResults(); }
+
+ private:
+  // Returns reference to the thread's ThreadSpecific pointer (initially null).
+  // Function-local static avoids needing a separate definition.
+  static ThreadSpecific*& StaticThreadSpecific() {
+    static thread_local ThreadSpecific* thread_specific;
+    return thread_specific;
+  }
+
+  // Returns the singleton ThreadList. Non time-critical.
+  static ThreadList& Threads() {
+    static ThreadList threads_;
+    return threads_;
+  }
+};
+
+// Creates a zone starting from here until the end of the current scope.
+// Timestamps will be recorded when entering and exiting the zone.
+// "name" must be a string literal, which is ensured by merging with "".
+#define PROFILER_ZONE(name) \
+  PIK_COMPILER_FENCE;       \
+  const Zone zone("" name); \
+  PIK_COMPILER_FENCE
+
+// Creates a zone for an entire function (when placed at its beginning).
+// Shorter/more convenient than ZONE.
+#define PROFILER_FUNC        \
+  PIK_COMPILER_FENCE;        \
+  const Zone zone(__func__); \
+  PIK_COMPILER_FENCE
+
+#define PROFILER_PRINT_RESULTS Zone::PrintResults
+
+inline void ThreadSpecific::ComputeOverhead() {
+  // Delay after capturing timestamps before/after the actual zone runs. Even
+  // with frequency throttling disabled, this has a multimodal distribution,
+  // including 32, 34, 48, 52, 59, 62.
+  uint64_t self_overhead;
+  {
+    const size_t kNumSamples = 32;
+    uint32_t samples[kNumSamples];
+    for (size_t idx_sample = 0; idx_sample < kNumSamples; ++idx_sample) {
+      const size_t kNumDurations = 1024;
+      uint32_t durations[kNumDurations];
+
+      for (size_t idx_duration = 0; idx_duration < kNumDurations;
+           ++idx_duration) {
+        {
+          PROFILER_ZONE("Dummy Zone (never shown)");
+        }
+#if PIK_ARCH_X64
+        const uint64_t duration = results_.ZoneDuration(buffer_);
+        buffer_size_ = 0;
+#else
+        const uint64_t duration = results_.ZoneDuration(packets_);
+        num_packets_ = 0;
+#endif
+        durations[idx_duration] = static_cast<uint32_t>(duration);
+        PIK_CHECK(num_packets_ == 0);
+      }
+      CountingSort(durations, durations + kNumDurations);
+      samples[idx_sample] = HalfSampleMode()(durations, kNumDurations);
+    }
+    // Median.
+    CountingSort(samples, samples + kNumSamples);
+    self_overhead = samples[kNumSamples / 2];
+#if PROFILER_PRINT_OVERHEAD
+    printf("Overhead: %zu\n", self_overhead);
+#endif
+    results_.SetSelfOverhead(self_overhead);
+  }
+
+  // Delay before capturing start timestamp / after end timestamp.
+  const size_t kNumSamples = 32;
+  uint32_t samples[kNumSamples];
+  for (size_t idx_sample = 0; idx_sample < kNumSamples; ++idx_sample) {
+    const size_t kNumDurations = 16;
+    uint32_t durations[kNumDurations];
+    for (size_t idx_duration = 0; idx_duration < kNumDurations;
+         ++idx_duration) {
+      const size_t kReps = 10000;
+      // Analysis time should not be included => must fit within buffer.
+      PIK_CHECK(kReps * 2 < max_packets_);
+#if PIK_ARCH_X64
+      _mm_mfence();
+#endif
+      const uint64_t t0 = TicksBefore();
+      for (size_t i = 0; i < kReps; ++i) {
+        PROFILER_ZONE("Dummy");
+      }
+#if PIK_ARCH_X64
+      _mm_sfence();
+#endif
+      const uint64_t t1 = TicksAfter();
+#if PIK_ARCH_X64
+      PIK_CHECK(num_packets_ + buffer_size_ == kReps * 2);
+      buffer_size_ = 0;
+#else
+      PIK_CHECK(num_packets_ == kReps * 2);
+#endif
+      num_packets_ = 0;
+      const uint64_t avg_duration = (t1 - t0 + kReps / 2) / kReps;
+      durations[idx_duration] =
+          static_cast<uint32_t>(ClampedSubtract(avg_duration, self_overhead));
+    }
+    CountingSort(durations, durations + kNumDurations);
+    samples[idx_sample] = HalfSampleMode()(durations, kNumDurations);
+  }
+  CountingSort(samples, samples + kNumSamples);
+  const uint64_t child_overhead = samples[9 * kNumSamples / 10];
+#if PROFILER_PRINT_OVERHEAD
+  printf("Child overhead: %zu\n", child_overhead);
+#endif
+  results_.SetChildOverhead(child_overhead);
+}
+
+}  // namespace pik
+
+#else  // !PROFILER_ENABLED
+#define PROFILER_ZONE(name)
+#define PROFILER_FUNC
+#define PROFILER_PRINT_RESULTS()
+#endif
+
+#endif  // PIK_PROFILER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/quant_weights.cc b/codec/L2/demos/pikEnc/host/pik/quant_weights.cc
new file mode 100755
index 0000000000..f7dc81e1ab
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/quant_weights.cc
@@ -0,0 +1,1077 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+#include "pik/quant_weights.h"
+
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <limits>
+#include "pik/bit_reader.h"
+#include "pik/cache_aligned.h"
+#include "pik/common.h"
+#include "pik/dct.h"
+#include "pik/huffman_decode.h"
+#include "pik/huffman_encode.h"
+#include "pik/image.h"
+#include "pik/pik_info.h"
+#include "pik/status.h"
+#include "pik/write_bits.h"
+
+namespace pik {
+
+// kQuantWeights[N * N * c + N * y + x] is the relative weight of the (x, y)
+// coefficient in component c. Higher weights correspond to finer quantization
+// intervals and more bits spent in encoding.
+
+namespace {
+void GetQuantWeightsDCT2(const float dct2weights[3][6], double* weights) {
+  for (size_t c = 0; c < 3; c++) {
+    size_t start = c * 64;
+    weights[start] = 0xBAD;
+    weights[start + 1] = weights[start + 8] = dct2weights[c][0];
+    weights[start + 9] = dct2weights[c][1];
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        weights[start + y * 8 + x + 2] = dct2weights[c][2];
+        weights[start + (y + 2) * 8 + x] = dct2weights[c][2];
+      }
+    }
+    for (size_t y = 0; y < 2; y++) {
+      for (size_t x = 0; x < 2; x++) {
+        weights[start + (y + 2) * 8 + x + 2] = dct2weights[c][3];
+      }
+    }
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        weights[start + y * 8 + x + 4] = dct2weights[c][4];
+        weights[start + (y + 4) * 8 + x] = dct2weights[c][4];
+      }
+    }
+    for (size_t y = 0; y < 4; y++) {
+      for (size_t x = 0; x < 4; x++) {
+        weights[start + (y + 4) * 8 + x + 4] = dct2weights[c][5];
+      }
+    }
+  }
+}
+
+const double* GetQuantWeightsLines() {
+  // The first value does not matter: it is the DC which is quantized elsewhere.
+  static const double kPositionWeights[64] = {
+      0,   100, 100, 100, 100, 100, 100, 5, 100, 100, 50, 20, 20, 10, 5, 5,
+      100, 100, 50,  20,  20,  10,  5,   5, 100, 50,  50, 20, 20, 10, 5, 5,
+      100, 20,  20,  20,  20,  10,  5,   5, 100, 10,  10, 10, 10, 10, 5, 5,
+      100, 5,   5,   5,   5,   5,   5,   5, 5,   5,   5,  5,  5,  5,  5, 5,
+  };
+  static const double kChannelWeights[3] = {0.2, 0.5, 0.01};
+  static const double kGlobal = 35.0;
+
+  static double kQuantWeights[3 * 8 * 8] = {};
+
+  for (size_t c = 0; c < 3; c++) {
+    size_t start = c * 64;
+    for (size_t y = 0; y < 8; y++) {
+      for (size_t x = 0; x < 8; x++) {
+        kQuantWeights[start + y * 8 + x] =
+            kPositionWeights[y * 8 + x] * kChannelWeights[c] * kGlobal;
+      }
+    }
+  }
+  return kQuantWeights;
+}
+
+void GetQuantWeightsIdentity(const float idweights[3][3], double* weights) {
+  for (size_t c = 0; c < 3; c++) {
+    for (int i = 0; i < 64; i++) {
+      weights[64 * c + i] = idweights[c][0];
+    }
+    weights[64 * c + 1] = idweights[c][1];
+    weights[64 * c + 8] = idweights[c][1];
+    weights[64 * c + 9] = idweights[c][2];
+  }
+}
+
+// Computes quant weights for a SX*SY-sized transform, using num_bands
+// eccentricity bands and num_ebands eccentricity bands. If print_mode is 1,
+// prints the resulting matrix; if print_mode is 2, prints the matrix in a
+// format suitable for a 3d plot with gnuplot.
+template <size_t SX, size_t SY, size_t print_mode = 0>
+Status GetQuantWeights(
+    const float distance_bands[3][DctQuantWeightParams::kMaxDistanceBands],
+    size_t num_bands,
+    const float eccentricity_bands[3][DctQuantWeightParams::kMaxRadialBands],
+    size_t num_ebands, double* out) {
+  auto mult = [](double v) {
+    if (v > 0) return 1 + v;
+    return 1 / (1 - v);
+  };
+
+  auto interpolate = [](double pos, double max, double* array, size_t len) {
+    double scaled_pos = pos * (len - 1) / max;
+    size_t idx = scaled_pos;
+    PIK_ASSERT(idx + 1 < len);
+    double a = array[idx];
+    double b = array[idx + 1];
+    return a * pow(b / a, scaled_pos - idx);
+  };
+
+  for (size_t c = 0; c < 3; c++) {
+    if (print_mode) {
+      fprintf(stderr, "Channel %lu\n", c);
+    }
+    double bands[DctQuantWeightParams::kMaxDistanceBands] = {
+        distance_bands[c][0]};
+    for (size_t i = 1; i < num_bands; i++) {
+      bands[i] = bands[i - 1] * mult(distance_bands[c][i]);
+      if (bands[i] < 0) return PIK_FAILURE("Invalid distance bands");
+    }
+    double ebands[DctQuantWeightParams::kMaxRadialBands + 1] = {1.0};
+    for (size_t i = 1; i <= num_ebands; i++) {
+      ebands[i] = ebands[i - 1] * mult(eccentricity_bands[c][i - 1]);
+      if (ebands[i] < 0) return PIK_FAILURE("Invalid eccentricity bands");
+    }
+    for (size_t y = 0; y < SY; y++) {
+      for (size_t x = 0; x < SX; x++) {
+        double dx = 1.0 * x / (SX - 1);
+        double dy = 1.0 * y / (SY - 1);
+        double distance = std::sqrt(dx * dx + dy * dy);
+        double wd =
+            interpolate(distance, std::sqrt(2) + 1e-6, bands, num_bands);
+        double eccentricity =
+            (x == 0 && y == 0) ? 0 : std::abs((double)dx - dy) / distance;
+        double we =
+            interpolate(eccentricity, 1.0 + 1e-6, ebands, num_ebands + 1);
+        double weight = we * wd;
+
+        if (print_mode == 1) {
+          fprintf(stderr, "%15.12f, ", weight);
+        }
+        if (print_mode == 2) {
+          fprintf(stderr, "%lu %lu %15.12f\n", x, y, weight);
+        }
+        out[c * SX * SY + y * SX + x] = weight;
+      }
+      if (print_mode) fprintf(stderr, "\n");
+      if (print_mode == 1) fprintf(stderr, "\n");
+    }
+    if (print_mode) fprintf(stderr, "\n");
+  }
+  return true;
+}
+
+// TODO(veluca): use proper encoding for floats. If not, use integer
+// encoding/decoding functions from byte_order.h. Also consider moving those
+// fields to use the header machinery.
+void EncodeUint(uint32_t v, std::string* s) {
+  *s += (uint8_t)(v >> 24);
+  *s += (uint8_t)(v >> 16);
+  *s += (uint8_t)(v >> 8);
+  *s += (uint8_t)v;
+}
+
+void EncodeFloat(float v, std::string* s) {
+  static_assert(sizeof(float) == sizeof(uint32_t),
+                "Float should be composed of 32 bits!");
+  uint32_t tmp;
+  memcpy(&tmp, &v, sizeof(float));
+  EncodeUint(tmp, s);
+}
+
+uint32_t DecodeUint(BitReader* br) {
+  br->FillBitBuffer();
+  uint32_t v = br->ReadBits(8);
+  v = (v << 8) | br->ReadBits(8);
+  v = (v << 8) | br->ReadBits(8);
+  v = (v << 8) | br->ReadBits(8);
+  return v;
+}
+
+float DecodeFloat(BitReader* br) {
+  uint32_t tmp = DecodeUint(br);
+  float ret;
+  memcpy(&ret, &tmp, sizeof(float));
+  return ret;
+}
+
+void EncodeDctParams(const DctQuantWeightParams& params, std::string* s) {
+  s += (uint8_t)params.num_distance_bands;
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t i = 0; i < params.num_distance_bands; i++) {
+      EncodeFloat(params.distance_bands[c][i], s);
+    }
+  }
+  s += (uint8_t)params.num_eccentricity_bands;
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t i = 0; i < params.num_eccentricity_bands; i++) {
+      EncodeFloat(params.eccentricity_bands[c][i], s);
+    }
+  }
+}
+
+Status DecodeDctParams(BitReader* br, DctQuantWeightParams* params) {
+  br->FillBitBuffer();
+  if (params->num_distance_bands > DctQuantWeightParams::kMaxDistanceBands)
+    return PIK_FAILURE("Too many distance bands");
+  if (params->num_distance_bands == 0)
+    return PIK_FAILURE("Too few distance bands");
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t i = 0; i < params->num_distance_bands; i++) {
+      params->distance_bands[c][i] = DecodeFloat(br);
+    }
+  }
+  br->FillBitBuffer();
+  params->num_eccentricity_bands = br->ReadBits(8);
+  if (params->num_eccentricity_bands > DctQuantWeightParams::kMaxRadialBands)
+    return PIK_FAILURE("Too many eccentricity bands");
+  for (size_t c = 0; c < 3; c++) {
+    for (size_t i = 0; i < params->num_eccentricity_bands; i++) {
+      params->eccentricity_bands[c][i] = DecodeFloat(br);
+    }
+  }
+  return true;
+}
+
+std::string Encode(const QuantEncoding& encoding) {
+  std::string out(1, encoding.mode);
+  switch (encoding.mode) {
+    case QuantEncoding::kQuantModeLibrary: {
+      out += encoding.predefined;
+      break;
+    }
+    case QuantEncoding::kQuantModeID: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 3; i++) {
+          EncodeFloat(encoding.idweights[c][i], &out);
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT2: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 6; i++) {
+          EncodeFloat(encoding.dct2weights[c][i], &out);
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4: {
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 2; i++) {
+          EncodeFloat(encoding.dct4multipliers[c][i], &out);
+        }
+      }
+      EncodeDctParams(encoding.dct_params, &out);
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT: {
+      EncodeDctParams(encoding.dct_params, &out);
+      break;
+    }
+    case QuantEncoding::kQuantModeRaw: {
+      out += (uint8_t)encoding.block_dim;
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t y = 0; y < encoding.block_dim * kBlockDim; y++) {
+          for (size_t x = 0; x < encoding.block_dim * kBlockDim; x++) {
+            if (x < encoding.block_dim && y < encoding.block_dim) continue;
+            EncodeFloat(
+                encoding.weights[c][y * encoding.block_dim * kBlockDim + x],
+                &out);
+          }
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeRawScaled: {
+      out += (uint8_t)encoding.block_dim;
+      for (size_t y = 0; y < encoding.block_dim * kBlockDim; y++) {
+        for (size_t x = 0; x < encoding.block_dim * kBlockDim; x++) {
+          if (x < encoding.block_dim && y < encoding.block_dim) continue;
+          EncodeFloat(
+              encoding.weights[0][y * encoding.block_dim * kBlockDim + x],
+              &out);
+        }
+      }
+      for (size_t c = 0; c < 3; c++) {
+        EncodeFloat(encoding.scales[c], &out);
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeCopy: {
+      out += encoding.source;
+      break;
+    }
+  }
+  return out;
+}
+
+Status Decode(BitReader* br, QuantEncoding* encoding, size_t required_size,
+              size_t idx) {
+  br->FillBitBuffer();
+  int mode = br->ReadBits(8);
+  switch (mode) {
+    case QuantEncoding::kQuantModeLibrary: {
+      br->FillBitBuffer();
+      encoding->predefined = br->ReadBits(8);
+      if (encoding->predefined >= kNumPredefinedTables) {
+        return PIK_FAILURE("Invalid predefined table");
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeID: {
+      if (required_size != 1) return PIK_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 3; i++) {
+          encoding->idweights[c][i] = DecodeFloat(br);
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT2: {
+      if (required_size != 1) return PIK_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 6; i++) {
+          encoding->dct2weights[c][i] = DecodeFloat(br);
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4: {
+      if (required_size != 1) return PIK_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < 2; i++) {
+          encoding->dct4multipliers[c][i] = DecodeFloat(br);
+        }
+      }
+      PIK_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT: {
+      PIK_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+      break;
+    }
+    case QuantEncoding::kQuantModeRaw: {
+      br->FillBitBuffer();
+      encoding->block_dim = br->ReadBits(8);
+      if (required_size != encoding->block_dim)
+        return PIK_FAILURE("Invalid mode");
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t y = 0; y < encoding->block_dim * kBlockDim; y++) {
+          for (size_t x = 0; x < encoding->block_dim * kBlockDim; x++) {
+            // Override LLF values in the quantization table with invalid
+            // values.
+            if (x < encoding->block_dim && y < encoding->block_dim) {
+              encoding->weights[c][y * encoding->block_dim * kBlockDim + x] =
+                  0xBAD;
+              continue;
+            }
+            encoding->weights[c][y * encoding->block_dim * kBlockDim + x] =
+                DecodeFloat(br);
+          }
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeRawScaled: {
+      br->FillBitBuffer();
+      encoding->block_dim = br->ReadBits(8);
+      if (required_size != encoding->block_dim)
+        return PIK_FAILURE("Invalid mode");
+      for (size_t y = 0; y < encoding->block_dim * kBlockDim; y++) {
+        for (size_t x = 0; x < encoding->block_dim * kBlockDim; x++) {
+          // Override LLF values in the quantization table with invalid values.
+          if (x < encoding->block_dim && y < encoding->block_dim) {
+            encoding->weights[0][y * encoding->block_dim * kBlockDim + x] =
+                0xBAD;
+            continue;
+          }
+          encoding->weights[0][y * encoding->block_dim * kBlockDim + x] =
+              DecodeFloat(br);
+        }
+      }
+      for (size_t c = 0; c < 3; c++) {
+        encoding->scales[c] = DecodeFloat(br);
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeCopy: {
+      br->FillBitBuffer();
+      encoding->source = br->ReadBits(8);
+      if (encoding->source >= idx) {
+        return PIK_FAILURE("Invalid source table");
+      }
+      break;
+    }
+    default:
+      return PIK_FAILURE("Invalid quantization table encoding");
+  }
+  encoding->mode = QuantEncoding::Mode(mode);
+  if (!br->Healthy()) return PIK_FAILURE("Failed reading quantization tables");
+  return true;
+}
+
+Status ComputeQuantTable(const QuantEncoding& encoding, float* table,
+                         size_t* offsets, QuantKind kind, size_t* pos) {
+  double weights[3 * kMaxQuantTableSize];
+  double numerators[kMaxQuantTableSize];
+  decltype(&GetQuantWeights<8, 8>) get_dct_weights = nullptr;
+
+  constexpr int N = kBlockDim;
+  constexpr int block_size = N * N;
+  const float* idct4_scales = IDCTScales<N / 2>();
+  const float* idct_scales = IDCTScales<N>();
+  const float* idct16_scales = IDCTScales<2 * N>();
+  const float* idct32_scales = IDCTScales<4 * N>();
+  size_t num = 0;
+  switch (kind) {
+    case kQuantKindDCT8: {
+      num = block_size;
+      get_dct_weights = GetQuantWeights<8, 8>;
+      for (size_t i = 0; i < num; i++) {
+        const size_t x = i % N;
+        const size_t y = i / N;
+        const float idct_scale = idct_scales[x] * idct_scales[y] / num;
+        numerators[i] = idct_scale;
+      }
+      break;
+    }
+    case kQuantKindDCT16: {
+      num = 4 * block_size;
+      get_dct_weights = GetQuantWeights<16, 16>;
+      for (size_t i = 0; i < num; i++) {
+        const size_t x = i % (2 * N);
+        const size_t y = i / (2 * N);
+        const float idct_scale = idct16_scales[x] * idct16_scales[y] / num;
+        numerators[i] = idct_scale;
+      }
+      break;
+    }
+    case kQuantKindDCT32: {
+      num = 16 * block_size;
+      get_dct_weights = GetQuantWeights<32, 32>;
+      for (size_t i = 0; i < num; i++) {
+        const size_t x = i % (4 * N);
+        const size_t y = i / (4 * N);
+        const float idct_scale = idct32_scales[x] * idct32_scales[y] / num;
+        numerators[i] = idct_scale;
+      }
+      break;
+    }
+    case kQuantKindDCT4: {
+      num = block_size;
+      get_dct_weights = GetQuantWeights<4, 4>;
+      for (size_t i = 0; i < N * N; i++) {
+        const size_t x = i % N;
+        const size_t y = i / N;
+        float idct_scale =
+            idct4_scales[x / 2] * idct4_scales[y / 2] / (N / 2 * N / 2);
+        numerators[i] = idct_scale;
+      }
+      break;
+    }
+    case kQuantKindID:
+    case kQuantKindDCT2:
+    case kQuantKindLines: {
+      get_dct_weights = GetQuantWeights<8, 8>;
+      num = block_size;
+      std::fill_n(numerators, block_size, 1.0);
+      break;
+    }
+    case kNumQuantKinds: {
+      PIK_ASSERT(false);
+    }
+  }
+  PIK_ASSERT(get_dct_weights != nullptr);
+
+  switch (encoding.mode) {
+    case QuantEncoding::kQuantModeLibrary:
+    case QuantEncoding::kQuantModeCopy: {
+      // Library and copy quant encoding should get replaced by the actual
+      // parameters by the caller.
+      PIK_ASSERT(false);
+      break;
+    }
+    case QuantEncoding::kQuantModeID: {
+      PIK_ASSERT(num == block_size);
+      GetQuantWeightsIdentity(encoding.idweights, weights);
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT2: {
+      PIK_ASSERT(num == block_size);
+      GetQuantWeightsDCT2(encoding.dct2weights, weights);
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT4: {
+      PIK_ASSERT(num == block_size);
+      double weights4x4[3 * 4 * 4];
+      PIK_RETURN_IF_ERROR(get_dct_weights(
+          encoding.dct_params.distance_bands,
+          encoding.dct_params.num_distance_bands,
+          encoding.dct_params.eccentricity_bands,
+          encoding.dct_params.num_eccentricity_bands, weights4x4));
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t y = 0; y < kBlockDim; y++) {
+          for (size_t x = 0; x < kBlockDim; x++) {
+            weights[c * num + y * kBlockDim + x] =
+                weights4x4[c * 16 + (y / 2) * 4 + (x / 2)];
+          }
+        }
+        weights[c * num + 1] /= encoding.dct4multipliers[c][0];
+        weights[c * num + N] /= encoding.dct4multipliers[c][0];
+        weights[c * num + N + 1] /= encoding.dct4multipliers[c][1];
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeDCT: {
+      PIK_RETURN_IF_ERROR(
+          get_dct_weights(encoding.dct_params.distance_bands,
+                          encoding.dct_params.num_distance_bands,
+                          encoding.dct_params.eccentricity_bands,
+                          encoding.dct_params.num_eccentricity_bands, weights));
+      break;
+    }
+    case QuantEncoding::kQuantModeRaw: {
+      PIK_ASSERT(num == encoding.block_dim * encoding.block_dim * block_size);
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < num; i++) {
+          weights[c * num + i] = encoding.weights[c][i];
+        }
+      }
+      break;
+    }
+    case QuantEncoding::kQuantModeRawScaled: {
+      PIK_ASSERT(num == encoding.block_dim * encoding.block_dim * block_size);
+      for (size_t c = 0; c < 3; c++) {
+        for (size_t i = 0; i < num; i++) {
+          weights[c * num + i] = encoding.weights[0][i] * encoding.scales[c];
+        }
+      }
+      break;
+    }
+  }
+  for (size_t c = 0; c < 3; c++) {
+    offsets[kind * 3 + c] = *pos;
+    for (size_t i = 0; i < num; i++) {
+      double val = numerators[i] / weights[c * num + i];
+      if (val > std::numeric_limits<float>::max() || val < 0) {
+        return PIK_FAILURE("Invalid quantization table");
+      }
+      table[(*pos)++] = val;
+    }
+  }
+  return true;
+}
+}  // namespace
+
+// This definition is needed before C++17.
+constexpr size_t DequantMatrices::required_size_[kNumQuantKinds];
+
+std::string DequantMatrices::Encode(PikImageSizeInfo* info) const {
+  PIK_ASSERT(encodings_.size() < std::numeric_limits<uint8_t>::max());
+  uint8_t num_tables = encodings_.size();
+  while (num_tables > 0 &&
+         encodings_[num_tables - 1].mode == QuantEncoding::kQuantModeLibrary &&
+         encodings_[num_tables - 1].predefined == 0) {
+    num_tables--;
+  }
+  std::string out(1, num_tables);
+  for (size_t i = 0; i < num_tables; i++) {
+    out += pik::Encode(encodings_[i]);
+  }
+  if (info != nullptr) {
+    info->total_size += out.size();
+  }
+  return out;
+}
+
+Status DequantMatrices::Decode(BitReader* br) {
+  br->FillBitBuffer();
+  size_t num_tables = br->ReadBits(8);
+  encodings_.clear();
+  size_t num_full_tables = DivCeil(num_tables, size_t(kNumQuantKinds));
+  if (num_full_tables == 0) num_full_tables = 1;
+  encodings_.resize(num_full_tables * kNumQuantKinds,
+                    QuantEncoding::Library(0));
+  for (size_t i = 0; i < num_tables; i++) {
+    PIK_RETURN_IF_ERROR(
+        pik::Decode(br, &encodings_[i], required_size_[i % kNumQuantKinds], i));
+  }
+  return DequantMatrices::Compute();
+}
+
+float V(double v) { return static_cast<float>(v); }
+
+Status DequantMatrices::Compute() {
+  size_t pos = 0;
+
+  static_assert(kNumQuantKinds == 7,
+                "Update this function when adding new quantization kinds.");
+  static_assert(kNumPredefinedTables == 1,
+                "Update this function when adding new quantization matrices to "
+                "the library.");
+
+  QuantEncoding library[kNumPredefinedTables][kNumQuantKinds];
+
+  // DCT8
+  {
+    static const float distance_bands[3][6] = {{
+                                                   V(6.7128322747011593),
+                                                   V(-0.75596993600717899),
+                                                   V(-0.47741990264036249),
+                                                   V(-0.81596269409665323),
+                                                   V(0.068767170571484654),
+                                                   V(-20.887837229035178),
+                                               },
+                                               {
+                                                   V(1.0308008496910044),
+                                                   V(0.12563824546332958),
+                                                   V(-0.98580000474151119),
+                                                   V(-0.74783541528315123),
+                                                   V(-0.18837957703830949),
+                                                   V(-0.50540560621792985),
+                                               },
+                                               {
+                                                   V(0.52922318082990072),
+                                                   V(-6.2099138554436495),
+                                                   V(2.4559360555622511),
+                                                   V(-1.8645272975104017),
+                                                   V(2.1626488944731781),
+                                                   V(-20.514468231628619),
+                                               }};
+
+    static const float eccentricity_bands[3][3] = {
+        {
+            V(0.020372599856493687),
+            V(0.0060672219272973112),
+            V(-0.037634794641950318),
+        },
+        {
+            V(0.19964780548254896),
+            V(-0.20598244512425934),
+            V(0.22606880802424917),
+        },
+        {
+            V(-0.53357069165890425),
+            V(0.067877070499761022),
+            V(2.3529080139232321),
+        },
+    };
+    library[0][kQuantKindDCT8] = QuantEncoding::DCT(
+        DctQuantWeightParams(distance_bands, eccentricity_bands));
+  }
+
+  // Identity
+  {
+    static const float weights[3][3] = {
+        {
+            V(174.50360988711236),
+            V(7098.4292418698387),
+            V(4459.2881530953237),
+        },
+        {
+            V(29.181414754407044),
+            V(1462.9387613234978),
+            V(1364.8889051351412),
+        },
+        {
+            V(10.427519104606029),
+            V(23.975682913740158),
+            V(11.132318126587421),
+        },
+    };
+    library[0][kQuantKindID] =
+        QuantEncoding::Identity(weights[0], weights[1], weights[2]);
+  }
+
+  // DCT2
+  {
+    static const float weights[3][6] = {
+        {
+            V(3838.4633860359086),
+            V(2711.45620096628),
+            V(740.86588368521473),
+            V(673.9663156327548),
+            V(146.0409913884842),
+            V(71.829450601171018),
+        },
+        {
+            V(855.89982430974862),
+            V(835.22486787836522),
+            V(268.7887798267422),
+            V(161.58150295707284),
+            V(46.818625352324425),
+            V(28.025832307111365),
+        },
+        {
+            V(135.95933746046285),
+            V(100.36113442694905),
+            V(52.759147600958094),
+            V(54.55000110144173),
+            V(10.61194822539392),
+            V(6.7321557070577027),
+        },
+    };
+    library[0][kQuantKindDCT2] =
+        QuantEncoding::DCT2(weights[0], weights[1], weights[2]);
+  }
+
+  // DCT4 (quant_kind 3)
+  {
+    static const float distance_bands[3][4] = {
+        {
+            V(20.464243458003235),
+            V(-1.3216361675651374),
+            V(-0.90068227414064506),
+            V(-0.51692149442719293),
+        },
+        {
+            V(3.4892753025959551),
+            V(-0.3851659055605578),
+            V(-1.6024424566582844),
+            V(-0.090185175016963492),
+        },
+        {
+            V(2.0543507462254667),
+            V(-17.083007167897751),
+            V(1.1553317008558754),
+            V(-17.06851301189084),
+        },
+    };
+
+    static const float eccentricity_bands[3][2] = {
+        {
+            V(-1.6540674814777321),
+            V(1.4353603203078817),
+        },
+        {
+            V(0.23246389755392743),
+            V(0.11670410074064763),
+        },
+        {
+            V(0.039676509798850998),
+            V(1.7114284305197651),
+        },
+    };
+    static const float muls[3][2] = {
+        {
+            V(0.47188805913083881),
+            V(0.74665256923039514),
+        },
+        {
+            V(0.27688273718512119),
+            V(0.32787026106006584),
+        },
+        {
+            V(0.94572969005995233),
+            V(1.649348791638829),
+        },
+    };
+    library[0][kQuantKindDCT4] = QuantEncoding::DCT4(
+        DctQuantWeightParams(distance_bands, eccentricity_bands), muls[0],
+        muls[1], muls[2]);
+  }
+
+  // DCT16
+  {
+    static const float distance_bands[3][6] = {{
+                                                   V(2.8081053178832627),
+                                                   V(-2.4300085829870786),
+                                                   V(0.11683860865233302),
+                                                   V(-0.48546810937737683),
+                                                   V(-772.68999845881376),
+                                                   V(-30.167218264433497),
+                                               },
+                                               {
+                                                   V(0.61651518963555374),
+                                                   V(-0.89670752611689697),
+                                                   V(-1.4823203833923126),
+                                                   V(-0.4392530120704895),
+                                                   V(-0.96459916681512592),
+                                                   V(-4.5043195385133448),
+                                               },
+                                               {
+                                                   V(0.35315014395417571),
+                                                   V(-6.1622959506013206),
+                                                   V(1.3987478239168303),
+                                                   V(-5.221619505420998),
+                                                   V(-87.102308097158911),
+                                                   V(-29.330248661246706),
+                                               }};
+
+    static const float eccentricity_bands[3][3] = {
+        {
+            V(-0.1082223243760141),
+            V(0.16581730095161393),
+            V(-0.22834397719738264),
+        },
+        {
+            V(0.064907061033690178),
+            V(-0.07809582529363121),
+            V(-0.044761862879806769),
+        },
+        {
+            V(-0.23977989838080313),
+            V(-0.14631104822608662),
+            V(0.026626451443453436),
+        },
+    };
+    library[0][kQuantKindDCT16] = QuantEncoding::DCT(
+        DctQuantWeightParams(distance_bands, eccentricity_bands));
+  }
+
+  // DCT32
+  {
+    static const float distance_bands[3][8] = {{
+                                                   V(0.84716094396432662),
+                                                   V(-2.4766455452218108),
+                                                   V(0.2471181572547147),
+                                                   V(0.57650543843415769),
+                                                   V(-4.0833701828342583),
+                                                   V(-28.279479541125081),
+                                                   V(1.8036899065163079),
+                                                   V(39.052449003220673),
+                                               },
+                                               {
+                                                   V(0.17234631384979648),
+                                                   V(-1.1404450629580913),
+                                                   V(-0.69128963252295739),
+                                                   V(-0.53270455087075774),
+                                                   V(-0.46759485378919513),
+                                                   V(-0.89356535322414299),
+                                                   V(0.65008570941628885),
+                                                   V(-0.66302446211939114),
+                                               },
+                                               {
+                                                   V(0.22743363189568044),
+                                                   V(-11.670472775652776),
+                                                   V(-4.9179016084759626),
+                                                   V(-5.4264719484417459),
+                                                   V(-10.370646227045418),
+                                                   V(1.9002093523030437),
+                                                   V(-2.6705664701413623),
+                                                   V(-20.889766266401665),
+                                               }};
+
+    static const float eccentricity_bands[3][4] = {
+        {
+            V(-0.77613778421797797),
+            V(0.8972017714545496),
+            V(-0.93436764214829893),
+            V(0.18670848590931757),
+        },
+        {
+            V(0.089533427641859925),
+            V(0.08358828409098),
+            V(-0.094110728686133543),
+            V(-0.1286652050040859),
+        },
+        {
+            V(1.0095255806548),
+            V(-1.5336522088790263),
+            V(-6.9680701189357501),
+            V(1.3664229471277314),
+        },
+    };
+    library[0][kQuantKindDCT32] = QuantEncoding::DCT(
+        DctQuantWeightParams(distance_bands, eccentricity_bands));
+  }
+
+  // Diagonal lines
+  {
+    static const float kPositionWeights[64] = {
+        0,   100, 100, 100, 100, 100, 100, 5, 100, 100, 50, 20, 20, 10, 5, 5,
+        100, 100, 50,  20,  20,  10,  5,   5, 100, 50,  50, 20, 20, 10, 5, 5,
+        100, 20,  20,  20,  20,  10,  5,   5, 100, 10,  10, 10, 10, 10, 5, 5,
+        100, 5,   5,   5,   5,   5,   5,   5, 5,   5,   5,  5,  5,  5,  5, 5,
+    };
+    static const float kChannelWeights[3] = {7.0, 17.5, 0.35};
+    library[0][kQuantKindLines] =
+        QuantEncoding::RawScaled(1, kPositionWeights, kChannelWeights);
+  }
+
+  table_memory_ = AllocateArray(encodings_.size() / kNumQuantKinds *
+                                TotalTableSize() * sizeof(float));
+  table_ = reinterpret_cast<float*>(table_memory_.get());
+  table_offsets_.resize(encodings_.size() * 3);
+
+  auto encodings = encodings_;
+
+  for (size_t table = 0; table < encodings.size(); table++) {
+    while (encodings[table].mode == QuantEncoding::kQuantModeCopy) {
+      encodings[table] = encodings[encodings[table].source];
+    }
+    if (encodings[table].mode == QuantEncoding::kQuantModeLibrary) {
+      encodings[table] =
+          library[encodings[table].predefined][table % kNumQuantKinds];
+    }
+    PIK_RETURN_IF_ERROR(
+        ComputeQuantTable(encodings[table], table_, table_offsets_.data(),
+                          (QuantKind)(table % kNumQuantKinds), &pos));
+  }
+
+  PIK_ASSERT(pos == encodings.size() / kNumQuantKinds * TotalTableSize());
+
+  size_ = pos;
+  if (need_inv_matrices_) {
+    inv_table_memory_ = AllocateArray(pos * sizeof(float));
+    inv_table_ = reinterpret_cast<float*>(inv_table_memory_.get());
+    for (size_t i = 0; i < pos; i++) {
+      inv_table_[i] = 1.0f / table_[i];
+    }
+  }
+  return true;
+}
+
+void FindBestDequantMatrices(
+    float butteraugli_target, float intensity_multiplier, const Image3F& opsin,
+    const ImageF& initial_quant_field, DequantMatrices* dequant_matrices,
+    ImageB* control_field, uint8_t table_map[kMaxQuantControlFieldValue][256]) {
+  // TODO(veluca): heuristics for in-bitstream quant tables. Notice that this
+  // function does *not* know the exact values of the quant field
+  // post-FindBestQuantization.
+  *dequant_matrices = DequantMatrices(/*need_inv_matrices=*/true);
+  *control_field = ImageB(DivCeil(opsin.xsize(), kTileDim),
+                          DivCeil(opsin.ysize(), kTileDim));
+  ZeroFillImage(control_field);
+  memset(table_map, 0, kMaxQuantControlFieldValue * 256);
+}
+
+bool DecodeDequantControlField(BitReader* PIK_RESTRICT br,
+                               ImageB* PIK_RESTRICT dequant_cf) {
+  HuffmanDecodingData entropy;
+  if (!entropy.ReadFromBitStream(br)) {
+    return PIK_FAILURE("Invalid histogram data.");
+  }
+  HuffmanDecoder decoder;
+  for (size_t y = 0; y < dequant_cf->ysize(); ++y) {
+    uint8_t* PIK_RESTRICT row = dequant_cf->Row(y);
+    for (size_t x = 0; x < dequant_cf->xsize(); ++x) {
+      br->FillBitBuffer();
+      row[x] = decoder.ReadSymbol(entropy, br);
+    }
+  }
+  PIK_RETURN_IF_ERROR(br->JumpToByteBoundary());
+  return true;
+}
+
+std::string EncodeDequantControlField(const ImageB& dequant_cf,
+                                      PikImageSizeInfo* info) {
+  const size_t max_out_size = dequant_cf.xsize() * dequant_cf.ysize() + 1024;
+  std::string output(max_out_size, 0);
+  size_t storage_ix = 0;
+  uint8_t* storage = reinterpret_cast<uint8_t*>(&output[0]);
+  storage[0] = 0;
+  std::vector<uint32_t> histogram(256);
+  for (int y = 0; y < dequant_cf.ysize(); ++y) {
+    for (int x = 0; x < dequant_cf.xsize(); ++x) {
+      ++histogram[dequant_cf.ConstRow(y)[x]];
+    }
+  }
+  std::vector<uint8_t> bit_depths(256);
+  std::vector<uint16_t> bit_codes(256);
+  BuildAndStoreHuffmanTree(histogram.data(), histogram.size(),
+                           bit_depths.data(), bit_codes.data(), &storage_ix,
+                           storage);
+  const size_t histo_bits = storage_ix;
+  for (int y = 0; y < dequant_cf.ysize(); ++y) {
+    const uint8_t* PIK_RESTRICT row = dequant_cf.ConstRow(y);
+    for (int x = 0; x < dequant_cf.xsize(); ++x) {
+      WriteBits(bit_depths[row[x]], bit_codes[row[x]], &storage_ix, storage);
+    }
+  }
+  WriteZeroesToByteBoundary(&storage_ix, storage);
+  PIK_ASSERT((storage_ix >> 3) <= output.size());
+  output.resize(storage_ix >> 3);
+  if (info) {
+    info->histogram_size += histo_bits >> 3;
+    info->entropy_coded_bits += storage_ix - histo_bits;
+    info->total_size += output.size();
+  }
+  return output;
+}
+
+namespace {
+void ComputeDequantControlFieldMapMask(
+    const ImageI& quant_field, const ImageB& dequant_cf,
+    bool table_mask[kMaxQuantControlFieldValue][256]) {
+  for (size_t y = 0; y < quant_field.ysize(); y++) {
+    const int* PIK_RESTRICT row_qf = quant_field.ConstRow(y);
+    const uint8_t* PIK_RESTRICT row_cf =
+        dequant_cf.ConstRow(y / kTileDimInBlocks);
+    for (size_t x = 0; x < quant_field.xsize(); x++) {
+      table_mask[row_cf[x / kTileDimInBlocks]][row_qf[x] - 1] = true;
+    }
+  }
+}
+
+}  // namespace
+
+std::string EncodeDequantControlFieldMap(
+    const ImageI& quant_field, const ImageB& dequant_cf,
+    const uint8_t table_map[kMaxQuantControlFieldValue][256],
+    PikImageSizeInfo* info) {
+  bool table_mask[kMaxQuantControlFieldValue][256] = {};
+  ComputeDequantControlFieldMapMask(quant_field, dequant_cf, table_mask);
+  const size_t max_out_size = kMaxQuantControlFieldValue * 256 + 1024;
+  std::string output(max_out_size, 0);
+  size_t storage_ix = 0;
+  uint8_t* storage = reinterpret_cast<uint8_t*>(&output[0]);
+  storage[0] = 0;
+  std::vector<uint32_t> histogram(256);
+  for (int y = 0; y < kMaxQuantControlFieldValue; ++y) {
+    for (int x = 0; x < 256; ++x) {
+      if (!table_mask[y][x]) continue;
+      ++histogram[table_map[y][x]];
+    }
+  }
+  std::vector<uint8_t> bit_depths(256);
+  std::vector<uint16_t> bit_codes(256);
+  BuildAndStoreHuffmanTree(histogram.data(), histogram.size(),
+                           bit_depths.data(), bit_codes.data(), &storage_ix,
+                           storage);
+  const size_t histo_bits = storage_ix;
+  for (int y = 0; y < kMaxQuantControlFieldValue; ++y) {
+    for (int x = 0; x < 256; ++x) {
+      if (!table_mask[y][x]) continue;
+      WriteBits(bit_depths[table_map[y][x]], bit_codes[table_map[y][x]],
+                &storage_ix, storage);
+    }
+  }
+  WriteZeroesToByteBoundary(&storage_ix, storage);
+  PIK_ASSERT((storage_ix >> 3) <= output.size());
+  output.resize(storage_ix >> 3);
+  if (info) {
+    info->histogram_size += histo_bits >> 3;
+    info->entropy_coded_bits += storage_ix - histo_bits;
+    info->total_size += output.size();
+  }
+  return output;
+}
+
+bool DecodeDequantControlFieldMap(
+    BitReader* PIK_RESTRICT br, const ImageI& quant_field,
+    const ImageB& dequant_cf,
+    uint8_t table_map[kMaxQuantControlFieldValue][256]) {
+  bool table_mask[kMaxQuantControlFieldValue][256] = {};
+  memset(table_map, 0, kMaxQuantControlFieldValue * 256 * sizeof(uint8_t));
+  ComputeDequantControlFieldMapMask(quant_field, dequant_cf, table_mask);
+  HuffmanDecodingData entropy;
+  if (!entropy.ReadFromBitStream(br)) {
+    return PIK_FAILURE("Invalid histogram data.");
+  }
+  HuffmanDecoder decoder;
+  for (size_t y = 0; y < kMaxQuantControlFieldValue; ++y) {
+    for (size_t x = 0; x < 256; ++x) {
+      if (!table_mask[y][x]) continue;
+      br->FillBitBuffer();
+      table_map[y][x] = decoder.ReadSymbol(entropy, br);
+    }
+  }
+  PIK_RETURN_IF_ERROR(br->JumpToByteBoundary());
+  return true;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/quant_weights.h b/codec/L2/demos/pikEnc/host/pik/quant_weights.h
new file mode 100755
index 0000000000..f54af09fd2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/quant_weights.h
@@ -0,0 +1,293 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_QUANT_WEIGHTS_H_
+#define PIK_QUANT_WEIGHTS_H_
+
+#include <cstdint>
+#include <vector>
+#include "pik/bit_reader.h"
+#include "pik/cache_aligned.h"
+#include "pik/common.h"
+#include "pik/image.h"
+#include "pik/pik_info.h"
+#include "pik/status.h"
+
+namespace pik {
+
+static constexpr size_t kMaxQuantTableSize = kBlockDim * kBlockDim * 16;
+static constexpr size_t kNumPredefinedTables = 1;
+
+// ac_strategy.h GetQuantKind static_asserts these values remain unchanged.
+enum QuantKind {
+  kQuantKindDCT8 = 0,
+  kQuantKindID,
+  kQuantKindDCT2,
+  kQuantKindDCT4,
+  kQuantKindDCT16,
+  kQuantKindDCT32,
+  kQuantKindLines,
+  kNumQuantKinds
+};
+
+struct DctQuantWeightParams {
+  static constexpr size_t kMaxRadialBands = 8;
+  static constexpr size_t kMaxDistanceBands = 16;
+  size_t num_distance_bands;
+  float distance_bands[3][kMaxDistanceBands];
+  size_t num_eccentricity_bands;
+  float eccentricity_bands[3][kMaxRadialBands];
+
+  DctQuantWeightParams() : num_distance_bands(0), num_eccentricity_bands(0) {}
+  template <size_t num_dist_bands, size_t num_ecc_bands>
+  DctQuantWeightParams(const float dist_bands[3][num_dist_bands],
+                       const float ecc_bands[3][num_ecc_bands]) {
+    num_distance_bands = num_dist_bands;
+    num_eccentricity_bands = num_ecc_bands;
+    for (size_t c = 0; c < 3; c++) {
+      memcpy(distance_bands[c], dist_bands[c], sizeof(float) * num_dist_bands);
+    }
+    for (size_t c = 0; c < 3; c++) {
+      memcpy(eccentricity_bands[c], ecc_bands[c],
+             sizeof(float) * num_ecc_bands);
+    }
+  }
+};
+
+struct QuantEncoding {
+  static QuantEncoding Library(uint8_t predefined) {
+    QuantEncoding enc{/*mode=*/kQuantModeLibrary};
+    PIK_ASSERT(predefined < kNumPredefinedTables);
+    enc.predefined = predefined;
+    return enc;
+  }
+
+  static QuantEncoding Identity(const float* xweights, const float* yweights,
+                                const float* bweights) {
+    QuantEncoding encoding;
+    encoding.mode = kQuantModeID;
+    memcpy(encoding.idweights[0], xweights, sizeof(float) * 3);
+    memcpy(encoding.idweights[1], yweights, sizeof(float) * 3);
+    memcpy(encoding.idweights[2], bweights, sizeof(float) * 3);
+    return encoding;
+  }
+
+  static QuantEncoding DCT2(const float* xweights, const float* yweights,
+                            const float* bweights) {
+    QuantEncoding encoding;
+    encoding.mode = kQuantModeDCT2;
+    memcpy(encoding.dct2weights[0], xweights, sizeof(float) * 6);
+    memcpy(encoding.dct2weights[1], yweights, sizeof(float) * 6);
+    memcpy(encoding.dct2weights[2], bweights, sizeof(float) * 6);
+    return encoding;
+  }
+
+  static QuantEncoding DCT4(const DctQuantWeightParams& params,
+                            const float* xmul, const float* ymul,
+                            const float* bmul) {
+    QuantEncoding encoding;
+    encoding.mode = kQuantModeDCT4;
+    encoding.dct_params = params;
+    memcpy(encoding.dct4multipliers[0], xmul, sizeof(float) * 2);
+    memcpy(encoding.dct4multipliers[1], ymul, sizeof(float) * 2);
+    memcpy(encoding.dct4multipliers[2], bmul, sizeof(float) * 2);
+    PIK_ASSERT(params.num_distance_bands <=
+               DctQuantWeightParams::kMaxDistanceBands);
+    PIK_ASSERT(params.num_eccentricity_bands <=
+               DctQuantWeightParams::kMaxRadialBands);
+    return encoding;
+  }
+
+  static QuantEncoding DCT(const DctQuantWeightParams& params) {
+    QuantEncoding encoding;
+    encoding.mode = kQuantModeDCT;
+    encoding.dct_params = params;
+    PIK_ASSERT(params.num_distance_bands <=
+               DctQuantWeightParams::kMaxDistanceBands);
+    PIK_ASSERT(params.num_eccentricity_bands <=
+               DctQuantWeightParams::kMaxRadialBands);
+    return encoding;
+  }
+
+  static QuantEncoding Raw(size_t block_dim, const float* xweights,
+                           const float* yweights, const float* bweights) {
+    QuantEncoding encoding;
+    encoding.mode = kQuantModeRaw;
+    encoding.block_dim = block_dim;
+    PIK_ASSERT(block_dim == 1 || block_dim == 2 || block_dim == 4);
+    memcpy(encoding.weights[0], xweights,
+           block_dim * block_dim * kBlockDim * kBlockDim * sizeof(float));
+    memcpy(encoding.weights[1], yweights,
+           block_dim * block_dim * kBlockDim * kBlockDim * sizeof(float));
+    memcpy(encoding.weights[2], bweights,
+           block_dim * block_dim * kBlockDim * kBlockDim * sizeof(float));
+    // Override LLF values in the quantization table with invalid values.
+    for (size_t c = 0; c < 3; c++) {
+      for (size_t y = 0; y < encoding.block_dim; y++) {
+        for (size_t x = 0; x < encoding.block_dim; x++) {
+          encoding.weights[c][y * encoding.block_dim * kBlockDim + x] = 0xBAD;
+        }
+      }
+    }
+    return encoding;
+  }
+
+  static QuantEncoding RawScaled(size_t block_dim, const float* base_weights,
+                                 const float* scales) {
+    QuantEncoding encoding;
+    encoding.mode = kQuantModeRawScaled;
+    encoding.block_dim = block_dim;
+    PIK_ASSERT(block_dim == 1 || block_dim == 2 || block_dim == 4);
+    memcpy(encoding.weights[0], base_weights,
+           block_dim * block_dim * kBlockDim * kBlockDim * sizeof(float));
+    memcpy(encoding.scales, scales, sizeof(encoding.scales));
+    // Override LLF values in the quantization table with invalid values.
+    for (size_t y = 0; y < encoding.block_dim; y++) {
+      for (size_t x = 0; x < encoding.block_dim; x++) {
+        encoding.weights[0][y * encoding.block_dim * kBlockDim + x] = 0xBAD;
+      }
+    }
+    return encoding;
+  }
+
+  static QuantEncoding Copy(uint8_t source) {
+    QuantEncoding enc{/*mode=*/kQuantModeCopy};
+    enc.source = source;
+    return enc;
+  }
+
+  enum Mode {
+    kQuantModeLibrary,
+    kQuantModeID,
+    kQuantModeDCT2,
+    kQuantModeDCT4,
+    kQuantModeDCT,
+    kQuantModeRaw,
+    kQuantModeRawScaled,
+    kQuantModeCopy,
+  };
+  Mode mode;
+
+  // Only used for raw and raw scaled.
+  uint32_t block_dim;
+
+  // Raw weights. Uses only the first channel for raw scaled, all
+  // three for Raw, unused otherwise. `scales` is only used for raw scaled.
+  float weights[3][kMaxQuantTableSize];
+  float scales[3];
+
+  // Weights for identity.
+  float idweights[3][3];
+
+  // Weights for DCT2.
+  float dct2weights[3][6];
+
+  // Extra multipliers for coefficients 01/10 and 11 for DCT4.
+  float dct4multipliers[3][2];
+
+  // Weights for DCT4+ tables.
+  DctQuantWeightParams dct_params;
+
+  // Which predefined table to use. Only used if mode is kQuantModeLibrary.
+  uint8_t predefined;
+
+  // Which other quant table to copy; must copy from a table that comes before
+  // the current one. Only used if mode is kQuantModeCopy.
+  uint8_t source;
+};
+
+class DequantMatrices {
+ public:
+  DequantMatrices(bool need_inv_matrices)
+      : need_inv_matrices_(need_inv_matrices),
+        encodings_({kNumQuantKinds, QuantEncoding::Library(0)}) {
+    // Default quantization tables need to be valid.
+    PIK_CHECK(Compute());
+  }
+
+  PIK_INLINE size_t MatrixOffset(uint8_t quant_table, size_t quant_kind,
+                                 int c) const {
+    PIK_ASSERT(quant_table * kNumQuantKinds * 3 < table_offsets_.size());
+    return table_offsets_[(quant_table * kNumQuantKinds + quant_kind) * 3 + c];
+  }
+
+  // Returns aligned memory.
+  PIK_INLINE const float* Matrix(uint8_t quant_table, size_t quant_kind,
+                                 int c) const {
+    PIK_ASSERT(quant_kind < kNumQuantKinds);
+    return &table_[MatrixOffset(quant_table, quant_kind, c)];
+  }
+
+  PIK_INLINE const float* InvMatrix(uint8_t quant_table, size_t quant_kind,
+                                    int c) const {
+    PIK_ASSERT(quant_kind < kNumQuantKinds);
+    return &inv_table_[MatrixOffset(quant_table, quant_kind, c)];
+  }
+
+  size_t Size() const { return size_; }
+
+  void SetCustom(const std::vector<QuantEncoding>& encodings) {
+    // For now, we require a constant number of quantization tables.
+    PIK_ASSERT(encodings.size() == kNumQuantKinds);
+    encodings_ = encodings;
+    // Called only in the encoder: should fail only for programmer errors.
+    PIK_CHECK(Compute());
+  }
+
+  std::string Encode(PikImageSizeInfo* info) const;
+
+  Status Decode(BitReader* br);
+
+ private:
+  Status Compute();
+  static size_t TotalTableSize() {
+    size_t res = 0;
+    for (size_t i = 0; i < kNumQuantKinds; i++) {
+      res += required_size_[i] * required_size_[i];
+    }
+    return res * kDCTBlockSize * 3;
+  }
+  CacheAlignedUniquePtr table_memory_;
+  float* table_;
+  CacheAlignedUniquePtr inv_table_memory_;
+  float* inv_table_;
+  std::vector<size_t> table_offsets_;
+  bool need_inv_matrices_;
+  size_t size_;
+  std::vector<QuantEncoding> encodings_;
+
+  static_assert(kNumQuantKinds == 7,
+                "Update this array when adding new quantization kinds.");
+  static constexpr size_t required_size_[kNumQuantKinds] = {1, 1, 1, 1,
+                                                            2, 4, 1};
+};
+
+static constexpr size_t kMaxQuantControlFieldValue = 16;
+
+void FindBestDequantMatrices(
+    float butteraugli_target, float intensity_multiplier, const Image3F& opsin,
+    const ImageF& initial_quant_field, DequantMatrices* dequant_matrices,
+    ImageB* control_field, uint8_t table_map[kMaxQuantControlFieldValue][256]);
+
+std::string EncodeDequantControlField(const ImageB& dequant_cf,
+                                      PikImageSizeInfo* info);
+
+bool DecodeDequantControlField(BitReader* PIK_RESTRICT br,
+                               ImageB* PIK_RESTRICT dequant_cf);
+
+std::string EncodeDequantControlFieldMap(
+    const ImageI& quant_field, const ImageB& dequant_cf,
+    const uint8_t table_map[kMaxQuantControlFieldValue][256],
+    PikImageSizeInfo* info);
+
+bool DecodeDequantControlFieldMap(
+    BitReader* PIK_RESTRICT br, const ImageI& quant_field,
+    const ImageB& dequant_cf,
+    uint8_t table_map[kMaxQuantControlFieldValue][256]);
+
+}  // namespace pik
+
+#endif  // PIK_QUANT_WEIGHTS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/quantizer.cc b/codec/L2/demos/pikEnc/host/pik/quantizer.cc
new file mode 100755
index 0000000000..498da1e3e2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/quantizer.cc
@@ -0,0 +1,371 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "ap_int.h"
+#include "pik/ac_strategy.h"
+#include "pik/quantizer.h"
+#include <algorithm>
+#include <sstream>
+#include <stdio.h>
+#include <vector>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/arch_specific.h"
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/dct.h"
+#include "pik/dct_util.h"
+#include "pik/profiler.h"
+#include "pik/quant_weights.h"
+#include "pik/simd/simd.h"
+
+namespace pik {
+
+static const int kDefaultQuant = 64;
+
+Quantizer::Quantizer(const DequantMatrices *dequant, int quant_xsize,
+                     int quant_ysize)
+    : Quantizer(dequant, quant_xsize, quant_ysize, kDefaultQuant,
+                kGlobalScaleDenom / kDefaultQuant) {}
+
+Quantizer::Quantizer(const DequantMatrices *dequant, int quant_xsize,
+                     int quant_ysize, int quant_dc, int global_scale)
+    : quant_xsize_(quant_xsize), quant_ysize_(quant_ysize),
+      global_scale_(global_scale), quant_dc_(quant_dc),
+      quant_img_ac_(quant_xsize_, quant_ysize_), dequant_(dequant) {
+  RecomputeFromGlobalScale();
+
+  FillImage(kDefaultQuant, &quant_img_ac_);
+
+  memcpy(zero_bias_, kZeroBiasDefault, sizeof(kZeroBiasDefault));
+}
+
+// TODO(veluca): reclaim the unused bit in global_scale encoding.
+std::string Quantizer::Encode(PikImageSizeInfo *info) const {
+  std::stringstream ss;
+  int global_scale = global_scale_ - 1;
+  ss << std::string(1, global_scale >> 8);
+  ss << std::string(1, global_scale & 0xff);
+  ss << std::string(1, quant_dc_ - 1);
+
+  std::cout<<std::dec<<"global_scale_="<<global_scale_<<std::endl;
+  std::cout<<std::dec<<"quant_dc_="<<quant_dc_<<std::endl;
+  if (info) {
+    info->total_size += 3;
+  }
+  return ss.str();
+}
+
+bool Quantizer::Decode(BitReader *br) {
+  int global_scale = br->ReadBits(8) << 8;
+  global_scale |= br->ReadBits(8);
+  global_scale_ = (global_scale & 0x7FFF) + 1;
+  quant_dc_ = br->ReadBits(8) + 1;
+  RecomputeFromGlobalScale();
+  inv_quant_dc_ = inv_global_scale_ / quant_dc_;
+  return true;
+}
+
+void Quantizer::DumpQuantizationMap() const {
+  printf("Global scale: %d (%.7f)\nDC quant: %d\n", global_scale_,
+         global_scale_ * 1.0 / kGlobalScaleDenom, quant_dc_);
+  printf("AC quantization Map:\n");
+  for (size_t y = 0; y < quant_img_ac_.ysize(); ++y) {
+    for (size_t x = 0; x < quant_img_ac_.xsize(); ++x) {
+      printf(" %3d", quant_img_ac_.Row(y)[x]);
+    }
+    printf("\n");
+  }
+}
+
+// Works in "DC image", i.e. transforms every pixel.
+Image3S QuantizeCoeffsDC(const Image3F &dc, const Quantizer &quantizer) {
+  const size_t xsize_blocks = dc.xsize();
+  const size_t ysize_blocks = dc.ysize();
+  Image3S out(xsize_blocks, ysize_blocks);
+  for (int c = 0; c < 3; ++c) {
+    for (size_t by = 0; by < ysize_blocks; ++by) {
+      const float *PIK_RESTRICT row_in = dc.PlaneRow(c, by);
+      int16_t *PIK_RESTRICT row_out = out.PlaneRow(c, by);
+      for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+        row_out[bx] = quantizer.QuantizeDC(c, row_in[bx]);
+      }
+    }
+  }
+
+  return out;
+}
+
+ImageF QuantizeRoundtripDC(const Quantizer &quantizer, int c,
+                           const ImageF &dc) {
+  // All coordinates are blocks.
+  const int xsize_blocks = dc.xsize();
+  const int ysize_blocks = dc.ysize();
+  ImageF out(xsize_blocks, ysize_blocks);
+
+  // Always use DCT8 quantization kind for DC
+  const float mul = quantizer.DequantMatrix(0, kQuantKindDCT8, c)[0] *
+                    quantizer.inv_quant_dc();
+
+  for (size_t by = 0; by < ysize_blocks; ++by) {
+    const float *PIK_RESTRICT row_in = dc.ConstRow(by);
+    float *PIK_RESTRICT row_out = out.Row(by);
+    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+      row_out[bx] = quantizer.QuantizeDC(c, row_in[bx]) * mul;
+    }
+  }
+  return out;
+}
+/*
+#define SHIFT_SCL  (15)
+#define FACTOR_SCL  (1<<SHIFT_SCL)
+#define SCLF( a) ((int)(a*FACTOR_SCL))
+typedef  short ap_frac;
+typedef  unsigned short apu_frac;
+typedef  short ap_frac_1;
+typedef  int   ap_frac_15;
+*/
+#define SHIFT_SCL (6)
+#define FACTOR_SCL (1 << SHIFT_SCL)
+#define SCLF(a) ((int)(a * FACTOR_SCL))
+typedef ap_int<SHIFT_SCL + 1> ap_frac;
+typedef ap_uint<SHIFT_SCL + 1> apu_frac;
+typedef ap_int<SHIFT_SCL + 1> ap_frac_1;
+typedef ap_int<SHIFT_SCL + 10> ap_frac_15;
+
+int16_t UpdateErr_int(ap_frac_15 val_i, apu_frac thres_i,
+                      char k //, int previous_row_err_i[8]
+                      ) {
+#pragma HLS inline
+  static ap_frac err_left;
+  static ap_frac previous_row_err_i[8];
+#pragma HLS ARRAY_PARTITION variable = previous_row_err_i complete dim = 1
+  int idx = k & 7;
+  short err_i;
+
+  if (k == 0)
+    err_i = 0;
+  else if ((idx) == 0) {
+    err_i = previous_row_err_i[idx];
+  } else {
+    if (k > 7)
+      err_i = err_left + previous_row_err_i[idx];
+    else
+      err_i = err_left;
+  }
+  bool isPos = val_i > 0;
+
+  int val_org_i = val_i;
+  bool isValOrg_1 = (val_org_i > FACTOR_SCL) || (0 - val_org_i > FACTOR_SCL);
+  apu_frac val_frac = val_i & (FACTOR_SCL - 1);
+  ap_frac_15 val_int = (val_i - val_frac) >> SHIFT_SCL;
+  bool isValIntZero = val_int == 0;
+  bool isValNegOne = val_int == -1;
+
+  bool isZero_u;
+  bool isZero_Nu;
+  bool isUseErr = (err_i > 0);
+  ap_frac_1 gap_u_Z_p = (0 << SHIFT_SCL) + val_frac + err_i / 2;       //;
+  ap_frac_1 gap_u_Z_n = -(((-1) << SHIFT_SCL) + val_frac - err_i / 2); //;
+  ap_frac_1 gap_u_Nz_p =
+      val_frac + err_i / 2 -
+      (((val_frac + err_i / 2 + FACTOR_SCL / 2) >> SHIFT_SCL) << SHIFT_SCL); //;
+  ap_frac_1 gap_u_Nz_n =
+      (((val_frac - err_i / 2 + FACTOR_SCL / 2) >> SHIFT_SCL) << SHIFT_SCL) -
+      val_frac + err_i / 2; //;
+  ap_frac_1 gap_un_Z_p = val_frac;
+  ap_frac_1 gap_un_Z_n = (FACTOR_SCL - val_frac); //-val_i;;
+  ap_frac_1 gap_un_Nz_p =
+      val_frac - (((val_frac + FACTOR_SCL / 2) >> SHIFT_SCL) << SHIFT_SCL);
+  ;
+  ap_frac_1 gap_un_Nz_n =
+      (((val_frac + FACTOR_SCL / 2) >> SHIFT_SCL) << SHIFT_SCL) - val_frac;
+  ;
+  ap_frac_1 err_i_gap = isValOrg_1 ? 0 : err_i;
+  ap_frac_1 err_i_gap_un_Z_p = err_i_gap + gap_un_Z_p;
+  ap_frac_1 err_i_gap_un_Z_n = err_i_gap + gap_un_Z_n;
+  ap_frac_1 err_i_gap_un_Nz_p = err_i_gap + gap_un_Nz_p;
+  ap_frac_1 err_i_gap_un_Nz_n = err_i_gap + gap_un_Nz_n;
+  bool NoCarry_u_p = val_frac < (thres_i - err_i / 2);
+  bool NoCarry_u_n = (FACTOR_SCL - val_frac) < (thres_i - err_i / 2);
+  bool NoCarry_Nu_p = val_frac < thres_i;
+  bool NoCarry_Nu_n = (FACTOR_SCL - val_frac) < thres_i;
+
+  if (isUseErr) {
+    if (isPos) {
+      if (isValIntZero && NoCarry_u_p)
+        isZero_u = true;
+      else
+        isZero_u = false;
+    } else {
+      if (isValNegOne && NoCarry_u_n)
+        isZero_u = true;
+      else
+        isZero_u = false;
+    }
+  }
+  if (!isUseErr) {
+    if (isPos) {
+      if (isValIntZero && NoCarry_Nu_p)
+        isZero_Nu = true;
+      else
+        isZero_Nu = false;
+    } else {
+      if (isValNegOne && NoCarry_Nu_n)
+        isZero_Nu = true;
+      else
+        isZero_Nu = false;
+    }
+  }
+
+  if (k == 0 || (idx) == 7) {
+    err_left = 0;
+  } else if (isUseErr) {
+    if (isZero_u) {
+      if (isPos)
+        err_left = gap_u_Z_p / 2;
+      else
+        err_left = gap_u_Z_n / 2;
+    } else {
+      if (isPos)
+        err_left = gap_u_Nz_p / 2;
+      else
+        err_left = gap_u_Nz_n / 2;
+    }
+  } else {
+    if (isZero_Nu) {
+      if (isPos)
+        err_left = err_i_gap_un_Z_p / 2; // + gap_un_Z_p;
+      else
+        err_left = err_i_gap_un_Z_n / 2; // + gap_un_Z_n;
+    } else {
+      if (isPos)
+        err_left = err_i_gap_un_Nz_p / 2; // + gap_un_Nz_p;
+      else
+        err_left = err_i_gap_un_Nz_n / 2; // + gap_un_Nz_n;
+    }
+  }
+
+  ap_frac err_new_i;
+  if (k == 0) {
+    err_new_i = 0;
+  } else if (isUseErr) {
+    if (isZero_u) {
+      if (isPos)
+        err_new_i = gap_u_Z_p;
+      else
+        err_new_i = gap_u_Z_n;
+    } else {
+      if (isPos)
+        err_new_i = gap_u_Nz_p;
+      else
+        err_new_i = gap_u_Nz_n;
+    }
+  } else {
+    if (isZero_Nu) {
+      if (isPos)
+        err_new_i = err_i_gap_un_Z_p; // + gap_un_Z_p;
+      else
+        err_new_i = err_i_gap_un_Z_n; // + gap_un_Z_n;
+    } else {
+      if (isPos)
+        err_new_i = err_i_gap_un_Nz_p; // + gap_un_Nz_p;
+      else
+        err_new_i = err_i_gap_un_Nz_n; // + gap_un_Nz_n;
+    }
+  }
+
+  if ((idx) == 7)
+    previous_row_err_i[idx] = err_new_i; // 1.0 * err;
+  else
+    previous_row_err_i[idx] = err_new_i / 2;
+
+  int16_t v_i;
+  if (isUseErr) {
+    if (isPos)
+      v_i = isZero_u ? 0 : (int16_t)(((val_i + err_i / 2 + FACTOR_SCL / 2) >>
+                                      SHIFT_SCL));
+    else
+      v_i = isZero_u ? 0 : (int16_t)(((val_i - err_i / 2 + FACTOR_SCL / 2) >>
+                                      SHIFT_SCL));
+  } else // err is not used
+    v_i = isZero_Nu ? 0 : (int16_t)(((val_i + FACTOR_SCL / 2) >> SHIFT_SCL));
+  if (v_i > 32767)
+    v_i = 32767;
+  if (v_i < -32767)
+    v_i = -32767;
+
+  return v_i;
+}
+
+void QuantizeBlockAC_core_L1(
+    uint8_t quant_table, int32_t quant, size_t quant_kind, int c, size_t xsize,
+    size_t ysize, const float *block_in, size_t in_stride, int16_t *block_out,
+    size_t out_stride,
+    const float *qm,   //    = dequant_->InvMatrix(quant_table, quant_kind, c);
+    const float qac,   // = Scale() * quant;
+    const float thres, // = zero_bias_[c];
+    size_t block_shift) {
+
+  size_t kBlockSize = kBlockDim * kBlockDim;
+  //std::cout<<"instride="<<in_stride<<std::endl;
+  for (size_t iy = 0; iy < ysize; iy++) {
+    for (size_t ix = 0; ix < xsize; ix++) {
+      for (char k = 0; k < kBlockSize; ++k) {
+#pragma HLS pipeline II = 1
+        size_t x = xsize * (k % kBlockDim) + ix;
+        size_t y = ysize * (k / kBlockDim) + iy;
+        size_t pos = y * kBlockDim * xsize + x;
+        size_t block_off = pos >> block_shift;
+        size_t block_idx = pos & (xsize * kBlockDim * kBlockDim - 1);
+        float v_block_in = block_in[block_off * in_stride + block_idx]; //
+        float v_qm = qm[pos];
+        float v_qac = qac;
+        float val = v_block_in * (v_qm * v_qac);
+        ap_frac_15 val_i = SCLF(val);
+        apu_frac thres_i = SCLF(thres);
+        int16_t v_i = UpdateErr_int(val_i, thres_i, k); //, previous_row_err_i);
+        block_out[block_off * out_stride + block_idx] = v_i;
+
+        std::cout<<"std_qua: iy="<<iy<<" ix="<<ix<<" k="<<(int)k<<" cplane="
+        		 <<v_block_in<<" val="<<val<<" by=" <<block_off<<" bx="<<block_idx
+				 <<" qm="<<v_qm<<" qac="<<v_qac
+				 <<std::dec<<" quantized="<<v_i<<std::endl;
+        /*
+
+         std::cout << "k2_qua: iy=" << iy << " ix=" << ix << " k=" << k
+        		  << " cplane=" << plane << " val=" << val
+				  << " by=" << addr_o(9, 5) << " bx=" << addr_o(4, 0)
+				  << " qm=" << qm << " qac=" << qac[iy][ix]
+				  << " quantized=" << v << std::endl;
+
+         */
+      }
+    }
+  }
+}
+
+/*
+void Quantizer::QuantizeBlockAC(uint8_t quant_table, int32_t quant,
+                                size_t quant_kind, int c, size_t xsize,
+                                size_t ysize, const float *block_in,
+                                size_t in_stride, int16_t *block_out,
+                                size_t out_stride) const {
+  constexpr size_t kBlockSize = kBlockDim * kBlockDim;
+  const float *qm = dequant_->InvMatrix(quant_table, quant_kind, c);
+  const float qac = Scale() * quant;
+
+  const float thres = zero_bias_[c];
+  size_t block_shift =
+      NumZeroBitsBelowLSBNonzero(kBlockDim * kBlockDim * xsize);
+  //std::cout<<"block_shift="<<block_shift<<" out_stride="<<out_stride<<std::endl;
+  QuantizeBlockAC_core_L1(quant_table, quant, quant_kind, c, xsize, ysize,
+                          block_in, in_stride, block_out, out_stride, qm, qac,
+                          thres, block_shift);
+}
+*/
+} // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/quantizer.h b/codec/L2/demos/pikEnc/host/pik/quantizer.h
new file mode 100755
index 0000000000..00fe566383
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/quantizer.h
@@ -0,0 +1,630 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_QUANTIZER_H_
+#define PIK_QUANTIZER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <cmath>
+#include <cstdint>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <iomanip>
+
+#include "pik/ac_strategy.h"
+#include "pik/bit_reader.h"
+#include "pik/bits.h"
+#include "pik/common.h"
+#include "pik/compiler_specific.h"
+#include "pik/image.h"
+#include "pik/linalg.h"
+#include "pik/pik_info.h"
+#include "pik/quant_weights.h"
+#include "pik/robust_statistics.h"
+#include "pik/simd/simd.h"
+
+// Quantizes DC and AC coefficients, with separate quantization tables according
+// to the quant_kind (which is currently computed from the AC strategy and the
+// block index inside that strategy).
+
+namespace pik {
+
+static const int kGlobalScaleDenom = 1 << 16;
+static const int kGlobalScaleNumerator = 4096;
+
+// zero-biases for quantizing channels X, Y, B
+static constexpr float kZeroBiasDefault[3] = {0.65f, 0.6f, 0.7f};
+
+// Quantization biases.
+// The residuals of AC coefficients that we quantize are not uniformly
+// distributed. Numerical experiments show that they have a distribution with
+// the "shape" of 1/(1+x^2) [up to some coefficients]. This means that the
+// expected value of a coefficient that gets quantized to x will not be x
+// itself, but (at least with reasonable approximation):
+// - 0 if x is 0
+// - x * (1 - kOneBias[c]) if x is 1 or -1
+// - x - kBiasNumerator/x otherwise
+// This follows from computing the distribution of the quantization bias, which
+// can be approximated fairly well by <constant>/x when |x| is at least two. If
+// |x| is 1, kZeroBias creates a different bias for each channel, thus we look
+// it up in the kOneBias LUT.
+static constexpr float kOneBias[3] = {
+    0.05465007330715401f, 0.07005449891748593f, 0.049935103337343655f};
+static constexpr float kBiasNumerator = 0.145f;
+
+// Returns adjusted version of a quantized integer, such that its value is
+// closer to the expected value of the original (see comment above).
+template <int c>
+PIK_INLINE float AdjustQuantBias(int16_t quant) {
+  if (quant == 0) return 0;
+  if (quant == 1) return 1 - kOneBias[c];
+  if (quant == -1) return kOneBias[c] - 1;
+  return quant - kBiasNumerator / quant;
+}
+
+// Same as above, but runtime variable c.
+static PIK_INLINE float AdjustQuantBiasVar(const size_t c, int16_t quant) {
+  if (quant == 0) return 0;
+  if (quant == 1) return 1 - kOneBias[c];
+  if (quant == -1) return kOneBias[c] - 1;
+  return quant - kBiasNumerator / quant;
+}
+
+// SIMD version of the method above.
+template <int c>
+SIMD_ATTR PIK_INLINE SIMD_FULL(float)::V
+    AdjustQuantBias(const SIMD_FULL(float)::V quant) {
+  SIMD_FULL(float) df;
+  SIMD_FULL(uint32_t) du;
+  const auto quant_sign = quant & cast_to(df, set1(du, 0x80000000u));
+  const auto quant_abs = andnot(quant_sign, quant);
+  const auto quant_one = select(quant, set1(df, 1 - kOneBias[c]) ^ quant_sign,
+                                quant_abs >= set1(df, 0.5f));
+  const auto quant_minus_inv = quant - set1(df, kBiasNumerator) / quant;
+  return select(quant_one, quant_minus_inv, quant_abs >= set1(df, 1.5f));
+}
+
+// Accessor for retrieving a single constant without initializing an image.
+class QuantConst {
+ public:
+  explicit QuantConst(const float quant) : quant_(quant) {}
+  const float* PIK_RESTRICT Row(size_t y) const { return nullptr; }
+  float Get(const float* PIK_RESTRICT row, size_t x) const { return quant_; }
+
+ private:
+  const float quant_;
+};
+
+class QuantField {
+ public:
+  explicit QuantField(const ImageF& quant) : quant_(quant) {}
+  const float* PIK_RESTRICT Row(size_t y) const { return quant_.Row(y); }
+  float Get(const float* PIK_RESTRICT row, size_t x) const { return row[x]; }
+
+ private:
+  const ImageF& quant_;
+};
+
+class Quantizer {
+ public:
+  Quantizer(const DequantMatrices* dequant, int quant_xsize, int quant_ysize);
+  Quantizer(const DequantMatrices* dequant, int quant_xsize, int quant_ysize,
+            int quant_dc, int global_scale);
+
+  Quantizer Copy(const Rect& rect) const {
+    Quantizer copy(dequant_, rect.xsize(), rect.ysize(), quant_dc_,
+                   global_scale_);
+    copy.inv_quant_dc_ = inv_quant_dc_;
+    copy.SetRawQuantField(CopyImage(rect, RawQuantField()));
+    return copy;
+  }
+
+  static PIK_INLINE int ClampVal(float val) {
+    static const int kQuantMax = 256;
+    return std::min<float>(kQuantMax, std::max<float>(1, val));
+  }
+
+  // Recomputes other derived fields after global_scale_ has changed.
+  void RecomputeFromGlobalScale() {
+    global_scale_float_ = global_scale_ * (1.0 / kGlobalScaleDenom);
+    inv_global_scale_ = 1.0 * kGlobalScaleDenom / global_scale_;
+  }
+  // Returns scaling factor such that Scale() * (RawDC() or RawQuantField())
+  // pixels yields the same float values returned by GetQuantField.
+  PIK_INLINE float Scale() const { return global_scale_float_; }
+  // Reciprocal of Scale().
+  PIK_INLINE float InvGlobalScale() const { return inv_global_scale_; }
+
+  template <class QuantInput>  // Quant[Const/Map]
+  bool SetQuantField(const float quant_dc, const QuantInput& qf) {
+    bool changed = false;
+    std::vector<float> data(quant_ysize_ * quant_xsize_);
+    for (size_t y = 0; y < quant_ysize_; ++y) {
+      const float* PIK_RESTRICT row_qf = qf.Row(y);
+      for (size_t x = 0; x < quant_xsize_; ++x) {
+        float quant = qf.Get(row_qf, x);
+        data[quant_xsize_ * y + x] = quant;
+      }
+    }
+    const float quant_median = Median(&data);
+    const float quant_median_absd = MedianAbsoluteDeviation(data, quant_median);
+    // Target value for the median value in the quant field.
+    const float kQuantFieldTarget = 3.80987740592518214386;
+    // We reduce the median of the quant field by the median absolute deviation:
+    // higher resolution on highly varying quant fields.
+    int new_global_scale = kGlobalScaleDenom *
+                           (quant_median - quant_median_absd) /
+                           kQuantFieldTarget;
+    // Ensure that quant_dc_ will always be at least
+    // kGlobalScaleDenom/kGlobalScaleNumerator.
+    if (new_global_scale > quant_dc * kGlobalScaleNumerator) {
+      new_global_scale = quant_dc * kGlobalScaleNumerator;
+    }
+    // Ensure that new_global_scale is positive and no more than 1<<15.
+    if (new_global_scale <= 0) new_global_scale = 1;
+    if (new_global_scale > (1 << 15)) new_global_scale = 1 << 15;
+    if (new_global_scale != global_scale_) {
+      global_scale_ = new_global_scale;
+      RecomputeFromGlobalScale();
+      changed = true;
+    }
+    int val = ClampVal(quant_dc * inv_global_scale_ + 0.5f);
+    if (val != quant_dc_) {
+      quant_dc_ = val;
+      changed = true;
+    }
+    for (size_t y = 0; y < quant_ysize_; ++y) {
+      const float* PIK_RESTRICT row_qf = qf.Row(y);
+      int32_t* PIK_RESTRICT row_qi = quant_img_ac_.Row(y);
+      for (size_t x = 0; x < quant_xsize_; ++x) {
+        int val = ClampVal(qf.Get(row_qf, x) * inv_global_scale_ + 0.5f);
+        if (val != row_qi[x]) {
+          row_qi[x] = val;
+          changed = true;
+        }
+      }
+    }
+
+    if (changed) {
+      const float qdc = global_scale_float_ * quant_dc_;
+      inv_quant_dc_ = 1.0f / qdc;
+    }
+    return changed;
+  }
+
+  float cal_average(std::vector<float> in, size_t s){
+    float sum=0;
+    for(int i=0;i<s;i++){
+        sum=sum+in[i];
+    }
+    return sum/s;
+  }
+
+  float cal_absaverage(std::vector<float> in,size_t s,float avg){
+    float sum=0;
+    for(int i=0;i<s;i++){
+        sum=sum+std::abs(in[i]-avg);
+    }
+    return sum/s;
+  }
+
+  template <class QuantInput>  // Quant[Const/Map]
+  bool SetQuantFieldOR(float avg, float absavg, const float quant_dc, const QuantInput& qf, ImageF& qfOrigin) {
+    bool changed = false;
+    std::vector<float> data(quant_ysize_ * quant_xsize_);
+    for (size_t y = 0; y < quant_ysize_; ++y) {
+      const float* PIK_RESTRICT row_qf = qfOrigin.Row(y);
+      for (size_t x = 0; x < quant_xsize_; ++x) {
+        float quant = row_qf[x];
+        data[quant_xsize_ * y + x] = quant;
+      }
+    }
+    const float quant_median = avg;//cal_average(data,quant_ysize_ * quant_xsize_);//Median(&data);
+    const float quant_median_absd = absavg;//cal_absaverage(data,quant_ysize_ * quant_xsize_,quant_median);//MedianAbsoluteDeviation(data, quant_median);
+    // Target value for the median value in the quant field.
+    const float kQuantFieldTarget = 3.80987740592518214386;
+    // We reduce the median of the quant field by the median absolute deviation:
+    // higher resolution on highly varying quant fields.
+    int new_global_scale = kGlobalScaleDenom *
+                           (quant_median - quant_median_absd) /
+                           kQuantFieldTarget;
+    // Ensure that quant_dc_ will always be at least
+    // kGlobalScaleDenom/kGlobalScaleNumerator.
+    if (new_global_scale > quant_dc * kGlobalScaleNumerator) {
+      new_global_scale = quant_dc * kGlobalScaleNumerator;
+    }
+    // Ensure that new_global_scale is positive and no more than 1<<15.
+    if (new_global_scale <= 0) new_global_scale = 1;
+    if (new_global_scale > (1 << 15)) new_global_scale = 1 << 15;
+    if (new_global_scale != global_scale_) {
+      global_scale_ = new_global_scale;
+      RecomputeFromGlobalScale();
+      changed = true;
+    }
+    int val = ClampVal(quant_dc * inv_global_scale_ + 0.5f);
+    if (val != quant_dc_) {
+      quant_dc_ = val;
+      changed = true;
+    }
+    for (size_t y = 0; y < quant_ysize_; ++y) {
+      const float* PIK_RESTRICT row_qf = qf.Row(y);
+      int32_t* PIK_RESTRICT row_qi = quant_img_ac_.Row(y);
+      for (size_t x = 0; x < quant_xsize_; ++x) {
+        int val = ClampVal(qf.Get(row_qf, x) * inv_global_scale_ + 0.5f);
+        if (val != row_qi[x]) {
+          row_qi[x] = val;
+          changed = true;
+        }
+
+        //std::cout<<"std_qf: y="<<y<<" x="<<x<<" "<<row_qi[x]<<std::endl;
+      }
+    }
+
+    if (changed) {
+      const float qdc = global_scale_float_ * quant_dc_;
+      inv_quant_dc_ = 1.0f / qdc;
+    }
+
+  std::cout<<"std quant_dc="<<quant_dc_<<std::endl;
+  std::cout<<"std global_scale="<<global_scale_<<std::endl;
+  std::cout<<"std inv_quant_dc="<<inv_quant_dc_<<std::endl;
+  std::cout<<"std inv_global_scale="<<inv_global_scale_<<std::endl;
+  std::cout<<"std global_scale_float="<<global_scale_float_<<std::endl;
+
+    return changed;
+  }
+
+  const DequantMatrices& GetDequantMatrices() const { return *dequant_; }
+
+  // Accessors used for adaptive edge-preserving filter:
+  // Returns integer AC quantization field.
+  const ImageI& RawQuantField() const { return quant_img_ac_; }
+
+  void SetRawQuantField(ImageI&& qf) { quant_img_ac_ = std::move(qf); }
+
+  void SetQuantField(ImageI& qf) { quant_img_ac_ = std::move(qf); }
+
+  // Returns "key" that could be used to check if DC quantization is changed.
+  // Normally key value is less than (1 << 24), so (~0u) would never occur.
+  uint32_t QuantDcKey() const { return (global_scale_ << 16) + quant_dc_; }
+
+  void SetQuant(float quant) { SetQuantField(quant, QuantConst(quant)); }
+
+  // Returns the DC quantization base value, which is currently global (not
+  // adaptive). The actual scale factor used to dequantize pixels in channel c
+  // is: inv_quant_dc() * DequantMatrix(c, kQuantKindDCT8)[0].
+  float inv_quant_dc() const { return inv_quant_dc_; }
+
+  // Dequantize by multiplying with this times dequant_matrix.
+  float inv_quant_ac(int32_t quant) const { return inv_global_scale_ / quant; }
+
+  void QuantizeBlockAC(uint8_t quant_table, int32_t quant, size_t quant_kind,
+                       int c, size_t xsize, size_t ysize,
+                       const float* PIK_RESTRICT block_in, size_t in_stride,
+                       int16_t* PIK_RESTRICT block_out,
+                       size_t out_stride) const {
+    constexpr size_t kBlockSize = kBlockDim * kBlockDim;
+    const float* qm = dequant_->InvMatrix(quant_table, quant_kind, c);
+    std::cout<<"std quant_table="<<(int)quant_table<<" quant_kind="<<quant_kind<<" c="<<c<<std::endl;
+
+    const float* qm1;
+    /*
+    qm1 = dequant_->Matrix(0, 3, 0);
+    std::cout<<"std dequant4x4: ";
+    for(int i=0;i<192;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->Matrix(0, 0, 0);
+    std::cout<<"std dequant8x8: ";
+    for(int i=0;i<192;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->Matrix(0, 4, 0);
+    std::cout<<"std dequant16x16: ";
+    for(int i=0;i<768;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->Matrix(0, 5, 0);
+    std::cout<<"std dequant32x32: ";
+    for(int i=0;i<3072;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->InvMatrix(0, 3, 0);
+    std::cout<<"std inv_dequant4x4: ";
+    for(int i=0;i<192;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->InvMatrix(0, 0, 0);
+    std::cout<<"std inv_dequant8x8: ";
+    for(int i=0;i<192;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->InvMatrix(0, 4, 0);
+    std::cout<<"std inv_dequant16x16: ";
+    for(int i=0;i<768;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->InvMatrix(0, 5, 0);
+    std::cout<<"std inv_dequant3x32: ";
+    for(int i=0;i<3072;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->Matrix(0, 3, 1);
+    std::cout<<"std dequant4x4Y: ";
+    for(int i=0;i<64;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->Matrix(0, 0, 1);
+    std::cout<<"std dequant8x8Y: ";
+    for(int i=0;i<64;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->Matrix(0, 4, 1);
+    std::cout<<"std dequant16x16Y: ";
+    for(int i=0;i<256;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->Matrix(0, 5, 1);
+    std::cout<<"std dequant32x32Y: ";
+    for(int i=0;i<1024;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->InvMatrix(0, 3, 1);
+    std::cout<<"std inv_dequant4x4Y: ";
+    for(int i=0;i<64;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->InvMatrix(0, 0, 1);
+    std::cout<<"std inv_dequant8x8Y: ";
+    for(int i=0;i<64;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->InvMatrix(0, 4, 1);
+    std::cout<<"std inv_dequant16x16Y: ";
+    for(int i=0;i<256;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+
+    qm1 = dequant_->InvMatrix(0, 5, 1);
+    std::cout<<"std inv_dequant32x32Y: ";
+    for(int i=0;i<1024;i++){
+    	std::cout<<std::setprecision(8)<<qm1[i]<<",";
+    }
+    std::cout<<std::endl;
+    */
+
+    const float qac = Scale() * quant;
+    // Not SIMD-fied for now.
+    const float thres = zero_bias_[c];
+    size_t block_shift =
+        NumZeroBitsBelowLSBNonzero(kBlockDim * kBlockDim * xsize);
+    // Done in a somewhat weird way to preserve the previous behaviour of
+    // dithering.
+    // TODO(jyrki): properly dither DCT blocks larger than 8.
+    for (size_t iy = 0; iy < ysize; iy++) {
+      for (size_t ix = 0; ix < xsize; ix++) {
+        double previous_row_err[8] = {0};
+        double err = 0;
+        for (size_t k = 0; k < kBlockSize; ++k) {
+          if ((k & 7) == 0) {
+            err = previous_row_err[0];
+          } else {
+            err += previous_row_err[k & 7];
+          }
+          size_t x = xsize * (k % kBlockDim) + ix;
+          size_t y = ysize * (k / kBlockDim) + iy;
+          size_t pos = y * kBlockDim * xsize + x;
+          size_t block_off = pos >> block_shift;
+          size_t block_idx = pos & (xsize * kBlockDim * kBlockDim - 1);
+          float val =
+              block_in[block_off * in_stride + block_idx] * (qm[pos] * qac);
+          if (err > 0) {
+            if (val > 0) {
+              val += 0.5 * err;
+            } else {
+              val -= 0.5 * err;
+            }
+            err = 0;
+          }
+          if (fabs(val) > 1) {
+            err = 0;
+          }
+          double v = (std::abs(val) < thres) ? 0 : std::round(val);
+          if (fabs(v) < fabs(val)) {
+            err += fabs(v - val);
+          } else {
+            err -= fabs(v - val);
+          }
+          if (k == 0) {
+            err = 0;
+          }
+          if ((k & 7) == 7) {
+            previous_row_err[k & 7] = 1.0 * err;
+            err = 0;
+          } else {
+            err *= 0.5;
+            previous_row_err[k & 7] = err;
+          }
+          if (v > 32767) v = 32767;
+          if (v < -32767) v = -32767;
+          block_out[block_off * out_stride + block_idx] = v;
+
+          /*
+          std::cout<<"std_qua: iy="<<iy<<" ix="<<ix<<" k="<<(int)k<<" cplane="
+          		 <<block_in[block_off * in_stride + block_idx]<<" val="<<val<<" by=" <<block_off<<" bx="<<block_idx
+  				 <<" qm="<<qm[pos]<<" qac="<<qac
+  				 <<std::dec<<" quantized="<<v<<std::endl;
+
+  				 std::cout<< std::hex<<"std_qua: k="<<(int)k<<" cplane="
+          		 <<(int&)block_in[block_off * in_stride + block_idx]<<" val="<<(int&)val<<" qm="<<(int&)qm[pos]<<" qac="<<(int&)qac
+  				 <<std::dec<<" quantized="<<v<<std::endl;
+                 */
+
+          /*
+          std::cout<< std::setprecision(16)<<"std_qua: k="<<(int)k<<" cplane="
+                    		 <<block_in[block_off * in_stride + block_idx]<<" val="<<val<<" qm="<<qm[pos]<<" qac="<<qac
+            				 <<std::dec<<" quantized="<<v<<std::endl;
+            				 */
+
+
+        }
+      }
+    }
+  }
+
+  SIMD_ATTR PIK_INLINE void QuantizeRoundtripBlockAC(
+      const size_t c, uint8_t quant_table, int32_t quant, size_t quant_kind,
+      size_t xsize, size_t ysize, const float* in, size_t in_stride, float* out,
+      size_t out_stride) const {
+    constexpr size_t N = kBlockDim;
+    constexpr size_t kBlockSize = N * N;
+    int16_t quantized[AcStrategy::kMaxCoeffArea];
+    float inv_qac = inv_quant_ac(quant);
+    QuantizeBlockAC(quant_table, quant, quant_kind, c, xsize, ysize, in,
+                    in_stride, quantized, xsize * kBlockSize);
+    
+    const float* PIK_RESTRICT dequant_matrix =
+        DequantMatrix(quant_table, quant_kind, c);
+    for (size_t y = 0; y < ysize; y++) {
+      for (size_t k = 0; k < kBlockSize * xsize; k++) {
+        float quantized_coeff = quantized[y * kBlockSize * xsize + k];
+        out[y * out_stride + k] = AdjustQuantBiasVar(c, quantized_coeff) *
+                                  dequant_matrix[y * kBlockSize * xsize + k] *
+                                  inv_qac;
+      }
+    }
+
+for(int k=0;k<64;k++){
+    	//std::cout<<"std in="<<in[k]<<" quant_y_int="<<quantized[k]<<" quant_table="<<(int)quant_table<<" quant="<<quant<<" quant_kind="<<quant_kind<<" c="<<c<<" out="<<out[k]<<std::endl;
+    }
+
+  }
+
+  // Quantizes the specified values in the given block, given as a bitmask in
+  // coefficients (as coefficient indices go up to 64, a bitmask is a convenient
+  // way to encode them), and then dequantizes them. Note that this requires the
+  // specified coefficients to be valid for the given quantization table (i.e.
+  // no DC for dct8 or dct16 blocks). `block_in` (and out) may not necessarily
+  // be contiguous, but it can be composed of `ysize` slices of size
+  // `xsize`*kBlockDim*kBlockDim that are `block_stride` apart.
+  template <int c>
+  void QuantizeRoundtripBlockCoefficients(uint8_t quant_table, int32_t quant,
+                                          size_t quant_kind, size_t xsize,
+                                          size_t ysize, const float* block_in,
+                                          size_t in_stride, float* block_out,
+                                          size_t out_stride,
+                                          uint64_t coefficients) const {
+    constexpr size_t N = kBlockDim;
+    int16_t quantized[AcStrategy::kMaxCoeffArea];
+    float inv_qac = inv_quant_ac(quant);
+    QuantizeBlockAC(quant_table, quant, quant_kind, c, xsize, ysize, block_in,
+                    in_stride, quantized, xsize * N * N);
+    const float* PIK_RESTRICT dequant_matrix =
+        DequantMatrix(quant_table, quant_kind, c);
+    size_t block_shift =
+        NumZeroBitsBelowLSBNonzero(kBlockDim * kBlockDim * xsize);
+    for (uint64_t bits = coefficients; bits != 0; bits &= bits - 1) {
+      size_t k = NumZeroBitsBelowLSBNonzero(bits);
+      for (size_t iy = 0; iy < ysize; iy++) {
+        for (size_t ix = 0; ix < xsize; ix++) {
+          size_t x = k % kBlockDim;
+          size_t y = k / kBlockDim;
+          size_t pos = (y * ysize + iy) * xsize * kBlockDim + x * xsize + ix;
+          size_t block_off = pos >> block_shift;
+          size_t block_idx = pos & (kBlockDim * kBlockDim * xsize - 1);
+          float quantized_coeff = quantized[pos];
+          block_out[block_off * out_stride + block_idx] =
+              AdjustQuantBias<c>(quantized_coeff) * dequant_matrix[pos] *
+              inv_qac;
+        }
+      }
+    }
+  }
+
+  PIK_INLINE int QuantizeDC(int c, float dc) const {
+
+	    //std::cout<<"inv_mul_x="<<(dequant_->InvMatrix(0, kQuantKindDCT8, 0)[0] * (global_scale_float_ * quant_dc_))<<std::endl;
+	    //std::cout<<"inv_mul_y="<<(dequant_->InvMatrix(0, kQuantKindDCT8, 1)[0] * (global_scale_float_ * quant_dc_))<<std::endl;
+	    //std::cout<<"inv_mul_b="<<(dequant_->InvMatrix(0, kQuantKindDCT8, 2)[0] * (global_scale_float_ * quant_dc_))<<std::endl;
+
+    return std::round(dc * (dequant_->InvMatrix(0, kQuantKindDCT8, c)[0] *
+                            (global_scale_float_ * quant_dc_)));
+  }
+
+  std::string Encode(PikImageSizeInfo* info) const;
+
+  bool Decode(BitReader* br);
+
+  void DumpQuantizationMap() const;
+
+  PIK_INLINE const float* DequantMatrix(uint8_t quant_table, size_t quant_kind,
+                                        int c) const {
+    return dequant_->Matrix(quant_table, quant_kind, c);
+  }
+
+  PIK_INLINE const size_t DequantMatrixOffset(uint8_t quant_table,
+                                              size_t quant_kind, int c) const {
+    return dequant_->MatrixOffset(quant_table, quant_kind, c);
+  }
+
+  int QuantDC() const { return quant_dc_; }
+
+  size_t quant_xsize_;
+  size_t quant_ysize_;
+
+  // These are serialized:
+  int global_scale_;
+  int quant_dc_;
+  ImageI quant_img_ac_;
+
+  // These are derived from global_scale_:
+  float inv_global_scale_;
+  float global_scale_float_;  // reciprocal of inv_global_scale_
+  float inv_quant_dc_;
+
+  float zero_bias_[3];
+  const DequantMatrices* dequant_;
+};
+
+Image3S QuantizeCoeffsDC(const Image3F& in, const Quantizer& quantizer);
+
+// Input is already 1 DC per block!
+ImageF QuantizeRoundtripDC(const Quantizer& quantizer, int c, const ImageF& dc);
+
+}  // namespace pik
+
+#endif  // PIK_QUANTIZER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/rational_polynomial.h b/codec/L2/demos/pikEnc/host/pik/rational_polynomial.h
new file mode 100755
index 0000000000..6d5f65a030
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/rational_polynomial.h
@@ -0,0 +1,248 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_RATIONAL_POLYNOMIAL_H_
+#define PIK_RATIONAL_POLYNOMIAL_H_
+
+// Fast SIMD evaluation of rational polynomials for approximating functions.
+
+#include "pik/compiler_specific.h"
+#include "pik/simd/simd.h"
+
+namespace pik {
+
+// Approximates smooth functions via rational polynomials (i.e. dividing two
+// polynomials). Supports V = SIMD or Scalar<T> inputs.
+
+// Evaluates the polynomial using Horner's method, which is faster than
+// Clenshaw recurrence for Chebyshev polynomials.
+//
+// "kDeg" is the degree of the numerator and denominator polynomials;
+// kDegP == kDegQ + 1 = 3 or 4 is usually a good choice.
+template <class D, int kDegP, int kDegQ>
+class RationalPolynomial {
+  using T = typename D::T;
+  using V = typename D::V;
+  static_assert(kDegP <= 7, "Unroll more iterations");
+  static_assert(kDegQ <= 7, "Unroll more iterations");
+
+ public:
+  template <typename U>
+  SIMD_ATTR void SetCoefficients(const U (&p)[kDegP + 1],
+                                 const U (&q)[kDegQ + 1]) {
+    for (int i = 0; i <= kDegP; ++i) {
+      p_[i] = set1(D(), static_cast<T>(p[i]));
+    }
+    for (int i = 0; i <= kDegQ; ++i) {
+      q_[i] = set1(D(), static_cast<T>(q[i]));
+    }
+  }
+
+  SIMD_ATTR void GetCoefficients(T (*p)[kDegP + 1], T (*q)[kDegQ + 1]) const {
+    const SIMD_PART(T, 1) d;
+    for (int i = 0; i <= kDegP; ++i) {
+      store(any_part(d, p_[i]), d, (*p) + i);
+    }
+    for (int i = 0; i <= kDegQ; ++i) {
+      store(any_part(d, q_[i]), d, (*q) + i);
+    }
+  }
+
+  template <typename U>
+  SIMD_ATTR RationalPolynomial(const U (&p)[kDegP + 1],
+                               const U (&q)[kDegQ + 1]) {
+    SetCoefficients(p, q);
+  }
+
+  // Evaluates the polynomial at x.
+  SIMD_ATTR PIK_INLINE V operator()(const V x) const {
+    V yp = p_[kDegP];
+    V yq = q_[kDegQ];
+    PIK_COMPILER_FENCE;
+    if (kDegP >= 1) yp = mul_add(yp, x, p_[kDegP - 1]);
+    if (kDegQ >= 1) yq = mul_add(yq, x, q_[kDegQ - 1]);
+    PIK_COMPILER_FENCE;
+    if (kDegP >= 2) yp = mul_add(yp, x, p_[kDegP - 2]);
+    if (kDegQ >= 2) yq = mul_add(yq, x, q_[kDegQ - 2]);
+    PIK_COMPILER_FENCE;
+    if (kDegP >= 3) yp = mul_add(yp, x, p_[kDegP - 3]);
+    if (kDegQ >= 3) yq = mul_add(yq, x, q_[kDegQ - 3]);
+    PIK_COMPILER_FENCE;
+    if (kDegP >= 4) yp = mul_add(yp, x, p_[kDegP - 4]);
+    if (kDegQ >= 4) yq = mul_add(yq, x, q_[kDegQ - 4]);
+    PIK_COMPILER_FENCE;
+    if (kDegP >= 5) yp = mul_add(yp, x, p_[kDegP - 5]);
+    if (kDegQ >= 5) yq = mul_add(yq, x, q_[kDegQ - 5]);
+    PIK_COMPILER_FENCE;
+    if (kDegP >= 6) yp = mul_add(yp, x, p_[kDegP - 6]);
+    if (kDegQ >= 6) yq = mul_add(yq, x, q_[kDegQ - 6]);
+    PIK_COMPILER_FENCE;
+    if (kDegP >= 7) yp = mul_add(yp, x, p_[kDegP - 7]);
+    if (kDegQ >= 7) yq = mul_add(yq, x, q_[kDegQ - 7]);
+
+    // Division is faster for a single evaluation but the Triple below are
+    // much faster with NR, and we use the same approach to here so that we
+    // compute the same max error as reached below.
+    return FastDivision<T, V>()(yp, yq);
+  }
+
+ private:
+  // Horner coefficients in ascending order.
+  V p_[kDegP + 1];
+  V q_[kDegQ + 1];
+};
+
+// Evaluates a rational polynomial via Horner's scheme. Equivalent to
+// RationalPolynomial poly(p, q); return poly(x). This can be more efficient
+// because the coefficients are loaded directly from memory, whereas set1
+// can result in copying them from RIP+x to stack frame. load_dup128 allows us
+// to specify constants (replicated 4x) independently of the lane count.
+template <int NP, int NQ, class V, typename T>
+SIMD_ATTR PIK_INLINE V EvalRationalPolynomial(const V x, const T (&p)[NP],
+                                              const T (&q)[NQ]) {
+  const SIMD_FULL(T) d;
+  constexpr int kDegP = NP / 4 - 1;
+  constexpr int kDegQ = NQ / 4 - 1;
+  auto yp = load_dup128(d, &p[kDegP * 4]);
+  auto yq = load_dup128(d, &q[kDegQ * 4]);
+  PIK_COMPILER_FENCE;
+  if (kDegP >= 1) yp = mul_add(yp, x, load_dup128(d, &p[(kDegP - 1) * 4]));
+  if (kDegQ >= 1) yq = mul_add(yq, x, load_dup128(d, &q[(kDegQ - 1) * 4]));
+  PIK_COMPILER_FENCE;
+  if (kDegP >= 2) yp = mul_add(yp, x, load_dup128(d, &p[(kDegP - 2) * 4]));
+  if (kDegQ >= 2) yq = mul_add(yq, x, load_dup128(d, &q[(kDegQ - 2) * 4]));
+  PIK_COMPILER_FENCE;
+  if (kDegP >= 3) yp = mul_add(yp, x, load_dup128(d, &p[(kDegP - 3) * 4]));
+  if (kDegQ >= 3) yq = mul_add(yq, x, load_dup128(d, &q[(kDegQ - 3) * 4]));
+  PIK_COMPILER_FENCE;
+  if (kDegP >= 4) yp = mul_add(yp, x, load_dup128(d, &p[(kDegP - 4) * 4]));
+  if (kDegQ >= 4) yq = mul_add(yq, x, load_dup128(d, &q[(kDegQ - 4) * 4]));
+  PIK_COMPILER_FENCE;
+  if (kDegP >= 5) yp = mul_add(yp, x, load_dup128(d, &p[(kDegP - 5) * 4]));
+  if (kDegQ >= 5) yq = mul_add(yq, x, load_dup128(d, &q[(kDegQ - 5) * 4]));
+  PIK_COMPILER_FENCE;
+  if (kDegP >= 6) yp = mul_add(yp, x, load_dup128(d, &p[(kDegP - 6) * 4]));
+  if (kDegQ >= 6) yq = mul_add(yq, x, load_dup128(d, &q[(kDegQ - 6) * 4]));
+  PIK_COMPILER_FENCE;
+  if (kDegP >= 7) yp = mul_add(yp, x, load_dup128(d, &p[(kDegP - 7) * 4]));
+  if (kDegQ >= 7) yq = mul_add(yq, x, load_dup128(d, &q[(kDegQ - 7) * 4]));
+
+  return FastDivision<T, V>()(yp, yq);
+}
+
+// Evaluates three at once for better FMA utilization and fewer loads.
+template <int NP, int NQ, class V, typename T>
+SIMD_ATTR void EvalRationalPolynomialTriple(const V x0, const V x1, const V x2,
+                                            const T (&p)[NP], const T (&q)[NQ],
+                                            V* PIK_RESTRICT y0,
+                                            V* PIK_RESTRICT y1,
+                                            V* PIK_RESTRICT y2) {
+  // Computing both polynomials in parallel is slightly faster than sequential
+  // (better utilization of FMA slots despite higher register pressure).
+  const SIMD_FULL(T) d;
+  constexpr int kDegP = NP / 4 - 1;
+  constexpr int kDegQ = NQ / 4 - 1;
+  V yp0 = load_dup128(d, &p[kDegP * 4]);
+  V yq0 = load_dup128(d, &q[kDegQ * 4]);
+  V yp1 = yp0;
+  V yq1 = yq0;
+  V yp2 = yp0;
+  V yq2 = yq0;
+  V c;
+  if (kDegP >= 1) {
+    c = load_dup128(d, &p[(kDegP - 1) * 4]);
+    yp0 = mul_add(yp0, x0, c);
+    yp1 = mul_add(yp1, x1, c);
+    yp2 = mul_add(yp2, x2, c);
+  }
+  if (kDegQ >= 1) {
+    c = load_dup128(d, &q[(kDegQ - 1) * 4]);
+    yq0 = mul_add(yq0, x0, c);
+    yq1 = mul_add(yq1, x1, c);
+    yq2 = mul_add(yq2, x2, c);
+  }
+  if (kDegP >= 2) {
+    c = load_dup128(d, &p[(kDegP - 2) * 4]);
+    yp0 = mul_add(yp0, x0, c);
+    yp1 = mul_add(yp1, x1, c);
+    yp2 = mul_add(yp2, x2, c);
+  }
+  if (kDegQ >= 2) {
+    c = load_dup128(d, &q[(kDegQ - 2) * 4]);
+    yq0 = mul_add(yq0, x0, c);
+    yq1 = mul_add(yq1, x1, c);
+    yq2 = mul_add(yq2, x2, c);
+  }
+  if (kDegP >= 3) {
+    c = load_dup128(d, &p[(kDegP - 3) * 4]);
+    yp0 = mul_add(yp0, x0, c);
+    yp1 = mul_add(yp1, x1, c);
+    yp2 = mul_add(yp2, x2, c);
+  }
+  if (kDegQ >= 3) {
+    c = load_dup128(d, &q[(kDegQ - 3) * 4]);
+    yq0 = mul_add(yq0, x0, c);
+    yq1 = mul_add(yq1, x1, c);
+    yq2 = mul_add(yq2, x2, c);
+  }
+  if (kDegP >= 4) {
+    c = load_dup128(d, &p[(kDegP - 4) * 4]);
+    yp0 = mul_add(yp0, x0, c);
+    yp1 = mul_add(yp1, x1, c);
+    yp2 = mul_add(yp2, x2, c);
+  }
+  if (kDegQ >= 4) {
+    c = load_dup128(d, &q[(kDegQ - 4) * 4]);
+    yq0 = mul_add(yq0, x0, c);
+    yq1 = mul_add(yq1, x1, c);
+    yq2 = mul_add(yq2, x2, c);
+  }
+  if (kDegP >= 5) {
+    c = load_dup128(d, &p[(kDegP - 5) * 4]);
+    yp0 = mul_add(yp0, x0, c);
+    yp1 = mul_add(yp1, x1, c);
+    yp2 = mul_add(yp2, x2, c);
+  }
+  if (kDegQ >= 5) {
+    c = load_dup128(d, &q[(kDegQ - 5) * 4]);
+    yq0 = mul_add(yq0, x0, c);
+    yq1 = mul_add(yq1, x1, c);
+    yq2 = mul_add(yq2, x2, c);
+  }
+  if (kDegP >= 6) {
+    c = load_dup128(d, &p[(kDegP - 6) * 4]);
+    yp0 = mul_add(yp0, x0, c);
+    yp1 = mul_add(yp1, x1, c);
+    yp2 = mul_add(yp2, x2, c);
+  }
+  if (kDegQ >= 6) {
+    c = load_dup128(d, &q[(kDegQ - 6) * 4]);
+    yq0 = mul_add(yq0, x0, c);
+    yq1 = mul_add(yq1, x1, c);
+    yq2 = mul_add(yq2, x2, c);
+  }
+  if (kDegP >= 7) {
+    c = load_dup128(d, &p[(kDegP - 7) * 4]);
+    yp0 = mul_add(yp0, x0, c);
+    yp1 = mul_add(yp1, x1, c);
+    yp2 = mul_add(yp2, x2, c);
+  }
+  if (kDegQ >= 7) {
+    c = load_dup128(d, &q[(kDegQ - 7) * 4]);
+    yq0 = mul_add(yq0, x0, c);
+    yq1 = mul_add(yq1, x1, c);
+    yq2 = mul_add(yq2, x2, c);
+  }
+
+  // Much faster than division when computing three at once.
+  *y0 = FastDivision<T, V>()(yp0, yq0);
+  *y1 = FastDivision<T, V>()(yp1, yq1);
+  *y2 = FastDivision<T, V>()(yp2, yq2);
+}
+
+}  // namespace pik
+
+#endif  // PIK_RATIONAL_POLYNOMIAL_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/resample.h b/codec/L2/demos/pikEnc/host/pik/resample.h
new file mode 100755
index 0000000000..255a82e837
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/resample.h
@@ -0,0 +1,1402 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// Fast SIMD cubic upsampling.
+
+#ifndef PIK_RESAMPLE_H_
+#define PIK_RESAMPLE_H_
+
+#include <stddef.h>
+#include <atomic>
+#include <cmath>
+
+#undef PROFILER_ENABLED
+#define PROFILER_ENABLED 1
+#include "pik/data_parallel.h"
+#include "pik/image.h"
+#include "pik/image_ops.h"
+#include "pik/profiler.h"
+#include "pik/simd/simd.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Main entry points. Executor{Loop/Pool} are from data_parallel.h.
+// Results are better if input pixels are non-linear (e.g. gamma-compressed).
+
+// (Possibly) multithreaded, single channel: called by all other Upsample.
+template <class Upsampler, class Executor, class Kernel>
+SIMD_ATTR PIK_INLINE void Upsample(const Executor executor, const ImageF& in,
+                                   const Kernel& kernel, ImageF* out) {
+  Upsampler::Run(executor, in, kernel, out);
+}
+
+// (Possibly) multithreaded, RGB.
+template <class Upsampler, class Executor, class Kernel>
+SIMD_ATTR PIK_INLINE void Upsample(const Executor executor, const Image3F& in,
+                                   const Kernel& kernel, Image3F* out) {
+  Upsampler::Run(executor, in, kernel, out);
+}
+
+// Single-thread, single channel.
+template <class Upsampler, class Kernel>
+SIMD_ATTR PIK_INLINE void Upsample(const ImageF& in, const Kernel& kernel,
+                                   ImageF* PIK_RESTRICT out) {
+  Upsample<Upsampler>(ExecutorLoop(), in, kernel, out);
+}
+
+// Single-thread, RGB.
+template <class Upsampler, class Kernel>
+SIMD_ATTR PIK_INLINE void Upsample(const Image3F& in, const Kernel& kernel,
+                                   Image3F* PIK_RESTRICT out) {
+  Upsample<Upsampler>(ExecutorLoop(), in, kernel, out);
+}
+
+namespace kernel {
+
+// Arbitrary, possibly non-separable kernel.
+template <int kRadiusArg>
+class Custom {
+ public:
+  static constexpr int kRadius = kRadiusArg;
+  static constexpr int kWidth = 2 * kRadius;
+
+  // Derive from impulse response image.
+  static Custom FromResult(const ImageF& result) {
+    PIK_ASSERT(result.xsize() % 8 == 0);
+    PIK_ASSERT(result.ysize() % 8 == 0);
+    PIK_ASSERT(result.xsize() / 8 >= kWidth);
+    PIK_ASSERT(result.ysize() / 8 >= kWidth);
+    const int off_x = result.xsize() / 8 / 2 + kRadius;
+    const int off_y = result.ysize() / 8 / 2 + kRadius;
+    Custom kernel;
+    int idx = 0;
+    for (int mod_y = 0; mod_y < 8; mod_y++) {
+      for (int tap_y = 0; tap_y < kWidth; tap_y++) {
+        for (int tap_x = 0; tap_x < kWidth; tap_x++) {
+          for (int mod_x = 0; mod_x < 8; mod_x++) {
+            const int wrap_mod_x = mod_x >= 4 ? mod_x - 8 : mod_x;
+            const int wrap_mod_y = mod_y >= 4 ? mod_y - 8 : mod_y;
+            const int x = (off_x - tap_x) * 8 + wrap_mod_x;
+            const int y = (off_y - tap_y) * 8 + wrap_mod_y;
+            kernel.weights_[idx++] = result.Row(y)[x];
+          }
+        }
+      }
+    }
+    return kernel;
+  }
+
+  const char* Name() const { return "Custom"; }
+  const float* Weights2D() const { return weights_; }
+
+ private:
+  SIMD_ALIGN float weights_[8 * kWidth * kWidth * 8];
+};
+
+class CatmullRom {
+  // constexpr functions for precomputing Weights_i() at compile time.
+
+  static constexpr float Abs(const float x) { return x < 0.0f ? -x : x; }
+  static constexpr int Ceil(const float x) {
+    return (static_cast<float>(static_cast<int>(x)) == x)
+               ? static_cast<int>(x)
+               : static_cast<int>(x) + ((x > 0.0f) ? 1 : 0);
+  }
+
+  static constexpr float F0_1(float u) {
+    return ((-1.5f * u + 2.0f) * u + 0.5f) * u;
+  }
+  static constexpr float F1_2(float u) { return ((0.5f * u - 0.5f) * u) * u; }
+
+  static constexpr float EvalNonNegative(const float x) {
+    return x > 2.0f ? 0.0f : x > 1.0f ? F1_2(2.0f - x) : F0_1(1.0f - x);
+  }
+  static constexpr float Eval(const float x) { return EvalNonNegative(Abs(x)); }
+
+  static constexpr float InX(const float out_x) {
+    return (out_x + 0.5f) / 8 - 0.5f;
+  }
+
+  // template enables static_assert.
+  template <int tap, int mod>
+  static constexpr float Weight() {
+    static_assert(0 <= tap && tap < 2 * kRadius, "Invalid tap");
+    static_assert(0 <= mod && mod < 8, "Invalid mod");
+    return Eval((InX(mod) - (Ceil(InX(mod) - kRadius) + tap)));
+  }
+
+  template <int tap_y, int tap_x, int mod_y, int mod_x>
+  static constexpr float Weight2D() {
+    return Weight<tap_y, mod_y>() * Weight<tap_x, mod_x>();
+  }
+
+ public:
+  static constexpr int kRadius = 2;  // cubic
+
+  const char* Name() const { return "CatmullRom"; }
+
+  constexpr float operator()(const float x) const { return Eval(x); }
+
+  // Precomputed weights for upscalers with separate X/Y muls.
+
+#define PIK_FOREACH_MOD_X(mod_y, tap_x)                           \
+  Weight<tap_x, 0>(), Weight<tap_x, 1>(), Weight<tap_x, 2>(),     \
+      Weight<tap_x, 3>(), Weight<tap_x, 4>(), Weight<tap_x, 5>(), \
+      Weight<tap_x, 6>(), Weight<tap_x, 7>()
+
+#define PIK_FOREACH_TAP_X_AND_Y(mod_y)                                  \
+  /* [tap_x=4][mod_x=8]: */                                             \
+  PIK_FOREACH_MOD_X(mod_y, 0), PIK_FOREACH_MOD_X(mod_y, 1),             \
+      PIK_FOREACH_MOD_X(mod_y, 2),                                      \
+      PIK_FOREACH_MOD_X(mod_y, 3), /* [tap_y=4][4]: */                  \
+      SIMD_REP4((Weight<0, mod_y>())), SIMD_REP4((Weight<1, mod_y>())), \
+      SIMD_REP4((Weight<2, mod_y>())), SIMD_REP4((Weight<3, mod_y>()))
+
+#define PIK_FOREACH_MOD_Y                                     \
+  PIK_FOREACH_TAP_X_AND_Y(0), PIK_FOREACH_TAP_X_AND_Y(1),     \
+      PIK_FOREACH_TAP_X_AND_Y(2), PIK_FOREACH_TAP_X_AND_Y(3), \
+      PIK_FOREACH_TAP_X_AND_Y(4), PIK_FOREACH_TAP_X_AND_Y(5), \
+      PIK_FOREACH_TAP_X_AND_Y(6), PIK_FOREACH_TAP_X_AND_Y(7)
+
+  PIK_INLINE const float* WeightsSeparated() const {
+    // Memory layout required for SIMD (we load 4..8 consecutive mod_x):
+    // For each mod_y(8): [tap_x=4][mod_x=8], [4x tap_y=4] = 384
+    // (repeating the tap_x/mod_x for every mod_y is wasteful but avoids
+    // needing two separate weight pointers/GetWeights)
+    SIMD_ALIGN static constexpr float weights[8 * 48] = {PIK_FOREACH_MOD_Y};
+    return weights;
+  }
+
+#undef PIK_FOREACH_MOD_Y
+#undef PIK_FOREACH_TAP_X_AND_Y
+#undef PIK_FOREACH_MOD_X
+
+  // Precomputed weights for non-separable upscalers.
+
+#define PIK_FOREACH_MOD_X(mod_y, tap_y, tap_x)                                \
+  Weight2D<tap_y, tap_x, mod_y, 0>(), Weight2D<tap_y, tap_x, mod_y, 1>(),     \
+      Weight2D<tap_y, tap_x, mod_y, 2>(), Weight2D<tap_y, tap_x, mod_y, 3>(), \
+      Weight2D<tap_y, tap_x, mod_y, 4>(), Weight2D<tap_y, tap_x, mod_y, 5>(), \
+      Weight2D<tap_y, tap_x, mod_y, 6>(), Weight2D<tap_y, tap_x, mod_y, 7>()
+
+#define PIK_FOREACH_TAP_X(mod_y, tap_y)                                   \
+  PIK_FOREACH_MOD_X(mod_y, tap_y, 0), PIK_FOREACH_MOD_X(mod_y, tap_y, 1), \
+      PIK_FOREACH_MOD_X(mod_y, tap_y, 2), PIK_FOREACH_MOD_X(mod_y, tap_y, 3)
+
+#define PIK_FOREACH_TAP_Y(mod_y)                            \
+  PIK_FOREACH_TAP_X(mod_y, 0), PIK_FOREACH_TAP_X(mod_y, 1), \
+      PIK_FOREACH_TAP_X(mod_y, 2), PIK_FOREACH_TAP_X(mod_y, 3)
+
+#define PIK_FOREACH_MOD_Y                                               \
+  PIK_FOREACH_TAP_Y(0), PIK_FOREACH_TAP_Y(1), PIK_FOREACH_TAP_Y(2),     \
+      PIK_FOREACH_TAP_Y(3), PIK_FOREACH_TAP_Y(4), PIK_FOREACH_TAP_Y(5), \
+      PIK_FOREACH_TAP_Y(6), PIK_FOREACH_TAP_Y(7)
+
+  PIK_INLINE const float* Weights2D() const {
+    // Memory layout required for SIMD (we load 4..8 consecutive mod_x):
+    // 4D array: [mod_y=8][tap_y=4][tap_x=4][mod_x=8] = 1024 entries
+    SIMD_ALIGN static constexpr float weights[1024] = {PIK_FOREACH_MOD_Y};
+    return weights;
+  }
+
+#undef PIK_FOREACH_MOD_Y
+#undef PIK_FOREACH_TAP_Y
+#undef PIK_FOREACH_TAP_X
+#undef PIK_FOREACH_MOD_X
+};
+
+// 6-tap
+class Lanczos3 {
+  static constexpr float Abs(const float x) { return x < 0.0f ? -x : x; }
+
+  static /*constexpr*/ float Sinc(const float x) {
+    const float t = x * 3.1415926536f;
+    return x == 0.0f ? 1.0f : sin(t) / t;
+  }
+
+  static /*constexpr*/ float EvalNonNegative(const float x) {
+    return x > 3.0f ? 0.0f : Sinc(x) * Sinc(x * 0.333333333f);
+  }
+  static /*constexpr*/ float Eval(const float x) {
+    return EvalNonNegative(Abs(x));
+  }
+
+ public:
+  static constexpr int kRadius = 3;
+
+  const char* Name() const { return "Lanczos3"; }
+
+  /*constexpr*/ float operator()(const float x) const { return Eval(x); }
+};
+
+}  // namespace kernel
+
+namespace slow {
+
+// For verifying Upsampler8. Supports any scale factor and kernel size, but slow
+// (cache thrashing) due to separate X/Y passes through the entire image.
+class Upsampler {
+ public:
+  // TODO(janwas): add ExecutorPool overload
+  template <class Executor, class Kernel>
+  static void Run(const Executor executor, const ImageF& in,
+                  const Kernel& kernel, ImageF* PIK_RESTRICT out) {
+    const size_t in_xsize = in.xsize();
+    const size_t in_ysize = in.ysize();
+    const size_t out_xsize = out->xsize();
+    const size_t out_ysize = out->ysize();
+    ImageF resampled_rows(out_xsize, in_ysize);
+    PROFILER_ZONE("slow::Upsampler");
+
+    for (size_t y = 0; y < in_ysize; ++y) {
+      const float* PIK_RESTRICT in_row = in.ConstRow(y);
+      float* PIK_RESTRICT out_row = resampled_rows.Row(y);
+      Upsample1D(in_row, in_xsize, 1, kernel, out_row, out_xsize, 1);
+    }
+
+    const size_t in_stride = resampled_rows.PixelsPerRow();
+    const size_t out_stride = out->PixelsPerRow();
+    for (size_t out_x = 0; out_x < out_xsize; ++out_x) {
+      const float* PIK_RESTRICT in_col = resampled_rows.Row(0) + out_x;
+      float* PIK_RESTRICT out_col = out->Row(0) + out_x;
+      Upsample1D(in_col, in_ysize, in_stride, kernel, out_col, out_ysize,
+                 out_stride);
+    }
+  }
+
+  template <class Executor, class Kernel>
+  static void Run(const Executor executor, const Image3F& in,
+                  const Kernel& kernel, Image3F* PIK_RESTRICT out) {
+    // Unoptimized: separate planes (additional fork/join)
+    for (int c = 0; c < 3; ++c) {
+      Run(executor, in.Plane(c), kernel, const_cast<ImageF*>(&out->Plane(c)));
+    }
+  }
+
+ private:
+  template <class Kernel>
+  static void Upsample1D(const float* PIK_RESTRICT in, const size_t in_size,
+                         const size_t in_stride, const Kernel& kernel,
+                         float* PIK_RESTRICT out, const int64_t out_size,
+                         const size_t out_stride) {
+    for (int64_t idx_out = 0; idx_out < out_size; idx_out++) {
+      // Position in input/output, [0, 1].
+      const float x = (idx_out + 0.5f) / out_size;
+      const float in_x = x * in_size - 0.5f;
+      // Leftmost sample index.
+      const int in_min_x = static_cast<int>(std::ceil(in_x - Kernel::kRadius));
+
+      float sum = 0.0f;
+      for (int i = in_min_x; i < in_min_x + 2 * Kernel::kRadius; i++) {
+        const int64_t mirror = Mirror(i, in_size);
+        sum += in[mirror * in_stride] * kernel(in_x - i);
+      }
+      out[idx_out * out_stride] = sum;
+    }
+  }
+};
+
+// For verifying GeneralUpsampler8 using a kernel that is actually separable.
+// Computes tensor product using Kernel::operator(). Supports any scale factor
+// and kernel size.
+class GeneralUpsamplerFromSeparable {
+ public:
+  // TODO(janwas): add ExecutorPool overload
+  template <class Executor, class Kernel>
+  static void Run(const Executor executor, const ImageF& in,
+                  const Kernel& kernel, ImageF* PIK_RESTRICT out) {
+    const size_t in_xsize = in.xsize();
+    const size_t in_ysize = in.ysize();
+    const size_t out_xsize = out->xsize();
+    const size_t out_ysize = out->ysize();
+    PROFILER_ZONE("slow::GeneralUpsamplerFromSeparable");
+
+    const int64_t kWidth = 2 * Kernel::kRadius;  // even
+
+    for (size_t out_y = 0; out_y < out_ysize; ++out_y) {
+      float* PIK_RESTRICT out_row = out->Row(out_y);
+      const float in_fy = ((out_y + 0.5f) / out_ysize) * in_ysize - 0.5f;
+      const int64_t top = std::ceil(in_fy - Kernel::kRadius);
+
+      const float* PIK_RESTRICT in_rows[kWidth];
+      float wy[kWidth];
+      for (int64_t i = 0; i < kWidth; ++i) {
+        in_rows[i] = in.ConstRow(Mirror(top + i, in_ysize));
+        wy[i] = kernel(in_fy - (top + i));
+      }
+
+      for (int64_t out_x = 0; out_x < out_xsize; out_x++) {
+        const float in_fx = ((out_x + 0.5f) / out_xsize) * in_xsize - 0.5f;
+        const int64_t left = std::ceil(in_fx - Kernel::kRadius);
+
+        int64_t in_x[kWidth];
+        float wx[kWidth];
+        for (int64_t i = 0; i < kWidth; ++i) {
+          in_x[i] = Mirror(left + i, in_xsize);
+          wx[i] = kernel(in_fx - (left + i));
+        }
+
+        float sum = 0.0f;
+        for (size_t r = 0; r < kWidth; ++r) {
+          const float* PIK_RESTRICT in_row = in_rows[r];
+          for (size_t c = 0; c < kWidth; ++c) {
+            sum += in_row[in_x[c]] * wy[r] * wx[c];
+          }
+        }
+
+        out_row[out_x] = sum;
+      }
+    }
+  }
+
+  template <class Executor, class Kernel>
+  static void Run(const Executor executor, const Image3F& in,
+                  const Kernel& kernel, Image3F* PIK_RESTRICT out) {
+    // Unoptimized: separate planes (additional fork/join)
+    for (int c = 0; c < 3; ++c) {
+      Run(executor, in.Plane(c), kernel, const_cast<ImageF*>(&out->Plane(c)));
+    }
+  }
+};
+
+// Supports any kernel size. Requires known kScale and Kernel::Weights2D.
+template <int64_t kScale>
+class GeneralUpsampler {
+ public:
+  // TODO(janwas): add ExecutorPool overload
+  template <class Executor, class Kernel>
+  static void Run(const Executor executor, const ImageF& in,
+                  const Kernel& kernel, ImageF* PIK_RESTRICT out) {
+    const size_t in_xsize = in.xsize();
+    const size_t in_ysize = in.ysize();
+    const size_t out_xsize = out->xsize();
+    const size_t out_ysize = out->ysize();
+    PROFILER_ZONE("slow::GeneralUpsampler");
+
+    const int64_t kWidth = 2 * Kernel::kRadius;  // even
+    const float* PIK_RESTRICT weights = kernel.Weights2D();
+
+    for (size_t out_y = 0; out_y < out_ysize; ++out_y) {
+      float* PIK_RESTRICT out_row = out->Row(out_y);
+      const float in_fy = ((out_y + 0.5f) / out_ysize) * in_ysize - 0.5f;
+      const int64_t top = std::ceil(in_fy - Kernel::kRadius);
+
+      const float* PIK_RESTRICT in_rows[kWidth];
+      for (int64_t i = 0; i < kWidth; ++i) {
+        in_rows[i] = in.ConstRow(Mirror(top + i, in_ysize));
+      }
+
+      for (int64_t out_x = 0; out_x < out_xsize; out_x++) {
+        const float in_fx = ((out_x + 0.5f) / out_xsize) * in_xsize - 0.5f;
+        const int64_t left = std::ceil(in_fx - Kernel::kRadius);
+
+        int64_t in_x[kWidth];
+        for (int64_t i = 0; i < kWidth; ++i) {
+          in_x[i] = Mirror(left + i, in_xsize);
+        }
+
+        float sum = 0.0f;
+        for (size_t r = 0; r < kWidth; ++r) {
+          const float* PIK_RESTRICT in_row = in_rows[r];
+          for (size_t c = 0; c < kWidth; ++c) {
+            size_t idx_weight = out_y % kScale;
+            idx_weight *= kWidth;
+            idx_weight += r;
+            idx_weight *= kWidth;
+            idx_weight += c;
+            idx_weight *= kScale;
+            idx_weight += out_x % kScale;
+            sum += in_row[in_x[c]] * weights[idx_weight];
+          }
+        }
+
+        out_row[out_x] = sum;
+      }
+    }
+  }
+
+  template <class Executor, class Kernel>
+  static void Run(const Executor executor, const Image3F& in,
+                  const Kernel& kernel, Image3F* PIK_RESTRICT out) {
+    // Unoptimized: separate planes (additional fork/join)
+    for (int c = 0; c < 3; ++c) {
+      Run(executor, in.Plane(c), kernel, const_cast<ImageF*>(&out->Plane(c)));
+    }
+  }
+};
+
+}  // namespace slow
+
+// Shared code factored out of *Upsample8. CRTP: Derived needs kScale etc. and
+// implements ProducePair.
+template <int64_t kRadiusArg, class Derived>
+class Upsampler8Base {
+ public:
+  // Called by Upsample function templates. Image = Image[3]F.
+  template <class Executor, class Image, class Kernel>
+  static SIMD_ATTR PIK_INLINE void Run(const Executor executor, const Image& in,
+                                       const Kernel& kernel, Image* out) {
+    PROFILER_ZONE("Upsampler8");
+    PIK_CHECK(in.xsize() * kScale == out->xsize());
+    PIK_CHECK(in.ysize() * kScale == out->ysize());
+
+    const float* PIK_RESTRICT weights = Derived::GetWeights(kernel);
+
+    if (out->xsize() >= kBorder) {
+      RunImpl(HorzSplit(), executor, in, weights, out);
+    } else {
+      RunImpl(HorzLoop(), executor, in, weights, out);
+    }
+  }
+
+ protected:
+  using D = SIMD_FULL(float);
+  using V = D::V;
+
+  static constexpr int64_t kRadius = kRadiusArg;
+  static constexpr int64_t kWidth = 2 * kRadius;
+
+  static constexpr int kLogScale = 3;
+  static constexpr int64_t kScale = 1 << kLogScale;
+
+  static constexpr int64_t kBorder = kRadius * kScale;
+
+  // Returns first (left/top) input x/y for the given output x/y. "out_mod" is
+  // "out" % kScale.
+  static PIK_INLINE int64_t InFromOut(const size_t out, const size_t out_mod) {
+    // Shifted by 0.5 (dual grid).
+    return (out >> kLogScale) + (out_mod >> (kLogScale - 1)) - kRadius;
+  }
+
+ private:
+  // Policies for iterating in X direction:
+
+  // Wide enough to skip bounds checks in the interior.
+  struct HorzSplit {
+    SIMD_ATTR PIK_INLINE void operator()(
+        const float* PIK_RESTRICT row_t3, const float* PIK_RESTRICT row_t2,
+        const float* PIK_RESTRICT row_t, const float* PIK_RESTRICT row_m,
+        const float* PIK_RESTRICT row_b, const float* PIK_RESTRICT row_b2,
+        const size_t in_xsize, const float* PIK_RESTRICT weights,
+        float* PIK_RESTRICT row_out, const size_t out_xsize) const {
+      size_t out_x = 0;
+      for (; out_x < kBorder; out_x += 2 * D::N) {
+        Derived::template ProducePair(out_x, row_t3, row_t2, row_t, row_m,
+                                      row_b, row_b2, in_xsize, WrapMirror(),
+                                      weights, row_out);
+      }
+      // (One more than kRadius because ProducePair reads offsets [-r, r+1])
+      for (; out_x < out_xsize - (kRadius + 1) * kScale; out_x += 2 * D::N) {
+        Derived::template ProducePair(out_x, row_t3, row_t2, row_t, row_m,
+                                      row_b, row_b2, in_xsize, WrapUnchanged(),
+                                      weights, row_out);
+      }
+      for (; out_x < out_xsize; out_x += 2 * D::N) {
+        Derived::template ProducePair(out_x, row_t3, row_t2, row_t, row_m,
+                                      row_b, row_b2, in_xsize, WrapMirror(),
+                                      weights, row_out);
+      }
+    }
+  };
+
+  // Narrow, only a single loop with X bounds checks.
+  struct HorzLoop {
+    SIMD_ATTR PIK_INLINE void operator()(
+        const float* PIK_RESTRICT row_t3, const float* PIK_RESTRICT row_t2,
+        const float* PIK_RESTRICT row_t, const float* PIK_RESTRICT row_m,
+        const float* PIK_RESTRICT row_b, const float* PIK_RESTRICT row_b2,
+        const size_t in_xsize, const float* PIK_RESTRICT weights,
+        float* PIK_RESTRICT row_out, const size_t out_xsize) const {
+      for (size_t out_x = 0; out_x < out_xsize; out_x += 2 * D::N) {
+        Derived::template ProducePair(out_x, row_t3, row_t2, row_t, row_m,
+                                      row_b, row_b2, in_xsize, WrapMirror(),
+                                      weights, row_out);
+      }
+    }
+  };
+
+  // Produces a row of output using a single pass through the input rows.
+  template <class Horz, class WrapY>
+  static SIMD_ATTR void ProduceRow(const Horz horz, const size_t out_y,
+                                   const ImageF& in, const WrapY wrap_y,
+                                   const float* PIK_RESTRICT weights,
+                                   float* PIK_RESTRICT out_row,
+                                   size_t out_xsize) {
+    const size_t in_xsize = in.xsize();
+    const size_t in_ysize = in.ysize();
+
+    const size_t mod_y = out_y % kScale;
+    // Coordinate of the top input row (possibly out of bounds).
+    const int64_t in_y = InFromOut(out_y, mod_y);
+
+    const float* PIK_RESTRICT row_t3 = in.ConstRow(wrap_y(in_y + 0, in_ysize));
+    const float* PIK_RESTRICT row_t2 = in.ConstRow(wrap_y(in_y + 1, in_ysize));
+    const float* PIK_RESTRICT row_t = in.ConstRow(wrap_y(in_y + 2, in_ysize));
+    const float* PIK_RESTRICT row_m = in.ConstRow(wrap_y(in_y + 3, in_ysize));
+    // Avoid out-of-bounds access - these two rows are unused for r=2 anyway.
+    const float* PIK_RESTRICT row_b =
+        kRadius == 2 ? nullptr : in.ConstRow(wrap_y(in_y + 4, in_ysize));
+    const float* PIK_RESTRICT row_b2 =
+        kRadius == 2 ? nullptr : in.ConstRow(wrap_y(in_y + 5, in_ysize));
+
+    weights += mod_y * Derived::kWeightsPerModY;
+    horz(row_t3, row_t2, row_t, row_m, row_b, row_b2, in_xsize, weights,
+         out_row, out_xsize);
+  }
+
+  template <class Horz, class Executor>
+  static SIMD_ATTR void RunImpl(const Horz horz, const Executor executor,
+                                const ImageF& in,
+                                const float* PIK_RESTRICT weights,
+                                ImageF* PIK_RESTRICT out) {
+    const size_t out_xsize = out->xsize();
+    const size_t out_ysize = out->ysize();
+
+    // Short: single loop (ignore pool - not worthwhile).
+    if (out_ysize <= 2 * kBorder) {
+      for (size_t out_y = 0; out_y < out_ysize; ++out_y) {
+        ProduceRow(horz, out_y, in, WrapMirror(), weights, out->Row(out_y),
+                   out_xsize);
+      }
+      return;
+    }
+
+    // Tall: skip bounds checks for middle rows.
+    for (size_t out_y = 0; out_y < kBorder; ++out_y) {
+      ProduceRow(horz, out_y, in, WrapMirror(), weights, out->Row(out_y),
+                 out_xsize);
+    }
+    executor.Run(
+        kBorder, out_ysize - kBorder,
+        [horz, &in, weights, out, out_xsize](const int task, const int thread) {
+          const int64_t out_y = task;
+          ProduceRow(horz, out_y, in, WrapUnchanged(), weights, out->Row(out_y),
+                     out_xsize);
+        },
+        "Resample");
+    for (size_t out_y = out_ysize - kBorder; out_y < out_ysize; ++out_y) {
+      ProduceRow(horz, out_y, in, WrapMirror(), weights, out->Row(out_y),
+                 out_xsize);
+    }
+  }
+
+  template <class Horz, class Executor>
+  static SIMD_ATTR void RunImpl(const Horz horz, const Executor executor,
+                                const Image3F& in,
+                                const float* PIK_RESTRICT weights,
+                                Image3F* PIK_RESTRICT out) {
+    const size_t out_xsize = out->xsize();
+    const size_t out_ysize = out->ysize();
+
+    // Short: single loop (ignore pool - not worthwhile).
+    if (out_ysize <= 2 * kBorder) {
+      for (int c = 0; c < 3; ++c) {
+        for (size_t out_y = 0; out_y < out_ysize; ++out_y) {
+          ProduceRow(horz, out_y, in.Plane(c), WrapMirror(), weights,
+                     out->PlaneRow(c, out_y), out_xsize);
+        }
+      }
+      return;
+    }
+
+    // Tall: skip bounds checks for middle rows.
+    for (int c = 0; c < 3; ++c) {
+      for (size_t out_y = 0; out_y < kBorder; ++out_y) {
+        ProduceRow(horz, out_y, in.Plane(c), WrapMirror(), weights,
+                   out->PlaneRow(c, out_y), out_xsize);
+      }
+    }
+    executor.Run(
+        kBorder, out_ysize - kBorder,
+        [horz, &in, weights, out, out_xsize](const int task, const int thread) {
+          const int64_t out_y = task;
+          for (int c = 0; c < 3; ++c) {
+            ProduceRow(horz, out_y, in.Plane(c), WrapUnchanged(), weights,
+                       out->PlaneRow(c, out_y), out_xsize);
+          }
+        },
+        "Resample3");
+    for (int c = 0; c < 3; ++c) {
+      for (size_t out_y = out_ysize - kBorder; out_y < out_ysize; ++out_y) {
+        ProduceRow(horz, out_y, in.Plane(c), WrapMirror(), weights,
+                   out->PlaneRow(c, out_y), out_xsize);
+      }
+    }
+  }
+};
+
+// Single-pass 8x cubic upsampling for separable 4x4 kernels. Unused: slower
+// than GeneralUpsampler8(!) and we need 6x6 for sufficient quality.
+class Upsampler8 : public Upsampler8Base<2, Upsampler8> {
+ public:
+  static constexpr size_t kWeightsPerModY = kWidth * kScale + kWidth * 4;
+
+  // Returns contiguous storage: x[tap_x=4][mod_x=8], 4x-broadcasted y[tap_y=4].
+  // Extracts the required kind of weights from Kernel. Type-safe: compile error
+  // if kernel is unable to precompute non-separated weights.
+  template <class Kernel>
+  static PIK_INLINE const float* PIK_RESTRICT GetWeights(const Kernel& kernel) {
+    return kernel.WeightsSeparated();
+  }
+
+  // Stores 2 vectors of upsampled pixels to row_out + out_x. "weights" are
+  // the return value of GetWeights. About 104 uops.
+  template <class WrapX>
+  static SIMD_ATTR PIK_INLINE void ProducePair(
+      const size_t out_x, const float* PIK_RESTRICT row_t3,
+      const float* PIK_RESTRICT row_t2, const float* PIK_RESTRICT row_t,
+      const float* PIK_RESTRICT row_m, const float* PIK_RESTRICT row_b,
+      const float* PIK_RESTRICT row_b2, const size_t in_xsize,
+      const WrapX wrap_x, const float* PIK_RESTRICT weights,
+      float* PIK_RESTRICT row_out) {
+    const D d;
+    const V wy0 = load_dup128(d, weights + 4 * kScale + 0 * 4);
+    const V wy1 = load_dup128(d, weights + 4 * kScale + 1 * 4);
+    const V wy2 = load_dup128(d, weights + 4 * kScale + 2 * 4);
+    const V wy3 = load_dup128(d, weights + 4 * kScale + 3 * 4);
+
+    // Accumulators for upsampled output, i.e. sum(horz * wy).
+    V u0 = setzero(d);
+    V u1 = setzero(d);
+    // t3 is our first valid row; ignore b/b2.
+    MulAddHorzConv(out_x, row_t3, in_xsize, wrap_x, weights, wy0, &u0, &u1);
+    MulAddHorzConv(out_x, row_t2, in_xsize, wrap_x, weights, wy1, &u0, &u1);
+    MulAddHorzConv(out_x, row_t, in_xsize, wrap_x, weights, wy2, &u0, &u1);
+    MulAddHorzConv(out_x, row_m, in_xsize, wrap_x, weights, wy3, &u0, &u1);
+    // stream is slightly slower (for both in-L3 and larger outputs)
+    store(u0, d, row_out + out_x);
+    store(u1, d, row_out + out_x + d.N);
+  }
+
+ private:
+#if SIMD_TARGET_VALUE == SIMD_NONE
+  // Without SIMD, there's no point in the pair unrolling because we cannot
+  // reuse anything between them. This function produces a single output pixel.
+  template <class WrapX>
+  static SIMD_ATTR PIK_INLINE void MulAddHorzConv1(
+      const size_t out_x, const float* PIK_RESTRICT row_in,
+      const size_t in_xsize, const WrapX wrap_x,
+      const float* PIK_RESTRICT weights, const V wy, V* PIK_RESTRICT out) {
+    const D d;
+    const size_t mod_x = out_x % kScale;
+    const V wx0 = load(d, weights + mod_x + 0 * kScale);
+    const V wx1 = load(d, weights + mod_x + 1 * kScale);
+    const V wx2 = load(d, weights + mod_x + 2 * kScale);
+    const V wx3 = load(d, weights + mod_x + 3 * kScale);
+
+    // We'll load 4 input values from these (clamped) coordinates.
+    const int64_t in_x = InFromOut(out_x, mod_x);
+    const int64_t in_x0 = wrap_x(in_x + 0, in_xsize);
+    const int64_t in_x1 = wrap_x(in_x + 1, in_xsize);
+    const int64_t in_x2 = wrap_x(in_x + 2, in_xsize);
+    const int64_t in_x3 = wrap_x(in_x + 3, in_xsize);
+
+    const V v0 = set1(d, row_in[in_x0]);
+    const V v1 = set1(d, row_in[in_x1]);
+    const V v2 = set1(d, row_in[in_x2]);
+    const V v3 = set1(d, row_in[in_x3]);
+
+    const V m0 = v0 * wx0;
+    const V m1 = v1 * wx1;
+
+    const V m2 = mul_add(v2, wx2, m0);
+    const V m3 = mul_add(v3, wx3, m1);
+
+    *out = mul_add(m2 + m3, wy, *out);
+  }
+#endif
+
+  // Computes two vectors of horizontal 1D convolution results, multiplies them
+  // with the Y weight "wy" and accumulates into out0/1.
+  template <class WrapX>
+  static SIMD_ATTR PIK_INLINE void MulAddHorzConv(
+      const size_t out_x, const float* PIK_RESTRICT row_in,
+      const size_t in_xsize, const WrapX wrap_x,
+      const float* PIK_RESTRICT weights, const V wy, V* PIK_RESTRICT out0,
+      V* PIK_RESTRICT out1) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    const D d;
+    const size_t mod_x = 0;  // because 2 * d.N == 2 * kScale
+
+    // Load weights for all mod_x values.
+    const V wx0 = load(d, weights + mod_x + 0 * kScale);
+    const V wx1 = load(d, weights + mod_x + 1 * kScale);
+    const V wx2 = load(d, weights + mod_x + 2 * kScale);
+    const V wx3 = load(d, weights + mod_x + 3 * kScale);
+
+    // We'll load 6 input values from these (clamped) coordinates.
+    const int64_t in_x = InFromOut(out_x, mod_x);
+    const int64_t in_x0 = wrap_x(in_x + 0, in_xsize);
+    const int64_t in_x1 = wrap_x(in_x + 1, in_xsize);
+    const int64_t in_x2 = wrap_x(in_x + 2, in_xsize);
+    const int64_t in_x3 = wrap_x(in_x + 3, in_xsize);
+    const int64_t in_x4 = wrap_x(in_x + 4, in_xsize);
+    const int64_t in_x5 = wrap_x(in_x + 5, in_xsize);
+
+    const V in0 = set1(d, row_in[in_x0]);
+    const V in1 = set1(d, row_in[in_x1]);
+    const V in2 = set1(d, row_in[in_x2]);
+    const V in3 = set1(d, row_in[in_x3]);
+    // Upper half = in1, lower half = in0.
+    const V v0 = concat_hi_lo(in1, in0);
+    const V v1 = concat_hi_lo(in2, in1);
+    const V v2 = concat_hi_lo(in3, in2);
+    const V m0 = v0 * wx0;
+    const V m1 = v1 * wx1;
+    // out1 is the result for out_x + kScale, basically unrolling the caller's
+    // loop once. This gives a 1.25x overall speedup because we reuse the
+    // weights and v1..3, and hide the multiplication latency.
+    const V n0 = v1 * wx0;
+    const V n1 = v2 * wx1;
+
+    const V in4 = set1(d, row_in[in_x4]);
+    const V in5 = set1(d, row_in[in_x5]);
+    const V v3 = concat_hi_lo(in4, in3);
+    const V v4 = concat_hi_lo(in5, in4);
+    const V m2 = mul_add(v2, wx2, m0);
+    const V m3 = mul_add(v3, wx3, m1);
+    const V n2 = mul_add(v3, wx2, n0);
+    const V n3 = mul_add(v4, wx3, n1);
+
+    *out0 = mul_add(m2 + m3, wy, *out0);
+    *out1 = mul_add(n2 + n3, wy, *out1);
+#elif SIMD_TARGET_VALUE != SIMD_NONE
+    const D d;
+
+    // Load first two weights for the first and second vectors.
+    constexpr size_t mod_x = 0;  // because 2 * d.N == kScale
+    const V wx0 = load(d, weights + mod_x + 0 * kScale);
+    const V wx1 = load(d, weights + mod_x + 1 * kScale);
+    const V wx0H = load(d, weights + mod_x + 0 * kScale + 4);
+    const V wx1H = load(d, weights + mod_x + 1 * kScale + 4);
+
+    // We'll load 5 input values from these (clamped) coordinates.
+    const int64_t in_x = InFromOut(out_x, mod_x);
+    const int64_t in_x0 = wrap_x(in_x + 0, in_xsize);
+    const int64_t in_x1 = wrap_x(in_x + 1, in_xsize);
+    const int64_t in_x2 = wrap_x(in_x + 2, in_xsize);
+    const int64_t in_x3 = wrap_x(in_x + 3, in_xsize);
+    const int64_t in_x4 = wrap_x(in_x + 4, in_xsize);
+
+    const V v0 = set1(d, row_in[in_x0]);
+    const V v1 = set1(d, row_in[in_x1]);
+    const V v2 = set1(d, row_in[in_x2]);
+    const V m0 = v0 * wx0;
+    const V m1 = v1 * wx1;
+    const V n0 = v1 * wx0H;
+    const V n1 = v2 * wx1H;
+
+    const V wx2 = load(d, weights + mod_x + 2 * kScale);
+    const V wx3 = load(d, weights + mod_x + 3 * kScale);
+    const V wx2H = load(d, weights + mod_x + 2 * kScale + 4);
+    const V wx3H = load(d, weights + mod_x + 3 * kScale + 4);
+
+    const V v3 = set1(d, row_in[in_x3]);
+    const V v4 = set1(d, row_in[in_x4]);
+    const V m2 = mul_add(v2, wx2, m0);
+    const V m3 = mul_add(v3, wx3, m1);
+    const V n2 = mul_add(v3, wx2H, n0);
+    const V n3 = mul_add(v4, wx3H, n1);
+
+    *out0 = mul_add(m2 + m3, wy, *out0);
+    *out1 = mul_add(n2 + n3, wy, *out1);
+#else
+    MulAddHorzConv1(out_x + 0, row_in, in_xsize, wrap_x, weights, wy, out0);
+    MulAddHorzConv1(out_x + 1, row_in, in_xsize, wrap_x, weights, wy, out1);
+#endif
+  }
+};
+
+// Single-pass 8x cubic upsampling for not necessarily separable 4x4 kernels.
+// Unused: we need 6x6 for sufficient quality.
+class GeneralUpsampler8 : public Upsampler8Base<2, GeneralUpsampler8> {
+ public:
+  static constexpr size_t kWeightsPerModY = kWidth * kWidth * kScale;
+
+  // Extracts the required kind of weights from Kernel. Type-safe: compile error
+  // if kernel is unable to precompute non-separated weights.
+  template <class Kernel>
+  static PIK_INLINE const float* PIK_RESTRICT GetWeights(const Kernel& kernel) {
+    return kernel.Weights2D();
+  }
+
+  // Stores 2 vectors of upsampled pixels to row_out + out_x. "weights" are
+  // the return value of GetWeights.
+  template <class WrapX>
+  static SIMD_ATTR PIK_INLINE void ProducePair(
+      const size_t out_x, const float* PIK_RESTRICT row_t3,
+      const float* PIK_RESTRICT row_t2, const float* PIK_RESTRICT row_t,
+      const float* PIK_RESTRICT row_m, const float* PIK_RESTRICT row_b,
+      const float* PIK_RESTRICT row_b2, const size_t in_xsize,
+      const WrapX wrap_x, const float* PIK_RESTRICT weights,
+      float* PIK_RESTRICT row_out) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    const D d;
+    const int64_t mod_x = 0;  // because 2 * d.N == 2 * kScale
+
+    // We'll load 6 input values from these (clamped) coordinates.
+    const int64_t in_x = InFromOut(out_x, mod_x);
+    const int64_t in_x0 = wrap_x(in_x + 0, in_xsize);
+    const int64_t in_x1 = wrap_x(in_x + 1, in_xsize);
+    const int64_t in_x2 = wrap_x(in_x + 2, in_xsize);
+    const int64_t in_x3 = wrap_x(in_x + 3, in_xsize);
+    const int64_t in_x4 = wrap_x(in_x + 4, in_xsize);
+    const int64_t in_x5 = wrap_x(in_x + 5, in_xsize);
+
+    V in0, in1, in2, in3, in4, in5;
+    V v0, v1, v2, v3, v4;
+    V w0, w1, w2, w3;
+
+    // (broadcastss only requires load ports, not port5.)
+    // Start at t3, our top row; ignore b/b2.
+    in0 = set1(d, row_t3[in_x0]);
+    in1 = set1(d, row_t3[in_x1]);
+    in2 = set1(d, row_t3[in_x2]);
+    in3 = set1(d, row_t3[in_x3]);
+    in4 = set1(d, row_t3[in_x4]);
+    in5 = set1(d, row_t3[in_x5]);
+    // v := upper half = next X, lower half = current X. Note that port5
+    // is underutilized; blendps should use ports 015 but IACA only shows 01.
+    // However, using concat_lo_lo for some of these is actually slower.
+    v0 = concat_hi_lo(in1, in0);
+    v1 = concat_hi_lo(in2, in1);
+    v2 = concat_hi_lo(in3, in2);
+    v3 = concat_hi_lo(in4, in3);
+    v4 = concat_hi_lo(in5, in4);
+    // wyx[i] is the weight for tap_x=x, tap_y=y and mod_x=i (mod_y was
+    // already used to select the "weights" range). Reused once.
+    w0 = load(d, weights + 0 * kScale);
+    w1 = load(d, weights + 1 * kScale);
+    w2 = load(d, weights + 2 * kScale);
+    w3 = load(d, weights + 3 * kScale);
+    // 4 separate accumulators for inputs * weights; their sum is the result.
+    const V m00 = v0 * w0;
+    const V m01 = v1 * w1;
+    const V m02 = v2 * w2;
+    const V m03 = v3 * w3;
+    // For the second output vector, use same weights but skip 1 input vector.
+    const V n00 = v1 * w0;
+    const V n01 = v2 * w1;
+    const V n02 = v3 * w2;
+    const V n03 = v4 * w3;
+
+    // Prevents clang from doing 13 successive broadcast+blend; it's about 2%
+    // faster to separate them into groups of 5.
+    std::atomic_thread_fence(std::memory_order_release);
+
+    in0 = set1(d, row_t2[in_x0]);
+    in1 = set1(d, row_t2[in_x1]);
+    in2 = set1(d, row_t2[in_x2]);
+    in3 = set1(d, row_t2[in_x3]);
+    in4 = set1(d, row_t2[in_x4]);
+    in5 = set1(d, row_t2[in_x5]);
+    v0 = concat_hi_lo(in1, in0);
+    v1 = concat_hi_lo(in2, in1);
+    v2 = concat_hi_lo(in3, in2);
+    v3 = concat_hi_lo(in4, in3);
+    v4 = concat_hi_lo(in5, in4);
+    w0 = load(d, weights + 4 * kScale);
+    w1 = load(d, weights + 5 * kScale);
+    w2 = load(d, weights + 6 * kScale);
+    w3 = load(d, weights + 7 * kScale);
+    const V m10 = mul_add(v0, w0, m00);
+    const V m11 = mul_add(v1, w1, m01);
+    const V m12 = mul_add(v2, w2, m02);
+    const V m13 = mul_add(v3, w3, m03);
+    const V n10 = mul_add(v1, w0, n00);
+    const V n11 = mul_add(v2, w1, n01);
+    const V n12 = mul_add(v3, w2, n02);
+    const V n13 = mul_add(v4, w3, n03);
+
+    in0 = set1(d, row_t[in_x0]);
+    in1 = set1(d, row_t[in_x1]);
+    in2 = set1(d, row_t[in_x2]);
+    in3 = set1(d, row_t[in_x3]);
+    in4 = set1(d, row_t[in_x4]);
+    in5 = set1(d, row_t[in_x5]);
+    v0 = concat_hi_lo(in1, in0);
+    v1 = concat_hi_lo(in2, in1);
+    v2 = concat_hi_lo(in3, in2);
+    v3 = concat_hi_lo(in4, in3);
+    v4 = concat_hi_lo(in5, in4);
+    w0 = load(d, weights + 8 * kScale);
+    w1 = load(d, weights + 9 * kScale);
+    w2 = load(d, weights + 10 * kScale);
+    w3 = load(d, weights + 11 * kScale);
+    const V m20 = mul_add(v0, w0, m10);
+    const V m21 = mul_add(v1, w1, m11);
+    const V m22 = mul_add(v2, w2, m12);
+    const V m23 = mul_add(v3, w3, m13);
+    const V n20 = mul_add(v1, w0, n10);
+    const V n21 = mul_add(v2, w1, n11);
+    const V n22 = mul_add(v3, w2, n12);
+    const V n23 = mul_add(v4, w3, n13);
+
+    in0 = set1(d, row_m[in_x0]);
+    in1 = set1(d, row_m[in_x1]);
+    in2 = set1(d, row_m[in_x2]);
+    in3 = set1(d, row_m[in_x3]);
+    in4 = set1(d, row_m[in_x4]);
+    in5 = set1(d, row_m[in_x5]);
+    v0 = concat_hi_lo(in1, in0);
+    v1 = concat_hi_lo(in2, in1);
+    v2 = concat_hi_lo(in3, in2);
+    v3 = concat_hi_lo(in4, in3);
+    v4 = concat_hi_lo(in5, in4);
+    w0 = load(d, weights + 12 * kScale);
+    w1 = load(d, weights + 13 * kScale);
+    w2 = load(d, weights + 14 * kScale);
+    w3 = load(d, weights + 15 * kScale);
+    const V m30 = mul_add(v0, w0, m20);
+    const V m31 = mul_add(v1, w1, m21);
+    const V m32 = mul_add(v2, w2, m22);
+    const V m33 = mul_add(v3, w3, m23);
+    const V n30 = mul_add(v1, w0, n20);
+    const V n31 = mul_add(v2, w1, n21);
+    const V n32 = mul_add(v3, w2, n22);
+    const V n33 = mul_add(v4, w3, n23);
+    const V k1 = set1(d, 1.0f);
+    const V sum0_01 = mul_add(m30, k1, m31);
+    const V sum0_23 = mul_add(m32, k1, m33);
+    const V sum1_01 = mul_add(n30, k1, n31);
+    const V sum1_23 = mul_add(n32, k1, n33);
+    const V sum0 = mul_add(sum0_01, k1, sum0_23);
+    const V sum1 = mul_add(sum1_01, k1, sum1_23);
+    store(sum0, d, row_out + out_x);
+    store(sum1, d, row_out + out_x + d.N);
+#elif SIMD_TARGET_VALUE != SIMD_NONE
+    const D d;
+    const int64_t mod_x = 0;  // because 2 * d.N == kScale.
+    // wyx[i] is the weight for tap_x=x, tap_y=y and mod_x=i (mod_y was already
+    // used to select the "weights" range). Reused once.
+    const V w00 = load(d, weights + 0 * kScale);
+    const V w01 = load(d, weights + 1 * kScale);
+    const V w02 = load(d, weights + 2 * kScale);
+    const V w03 = load(d, weights + 3 * kScale);
+    const V w10 = load(d, weights + 4 * kScale);
+    const V w11 = load(d, weights + 5 * kScale);
+    const V w12 = load(d, weights + 6 * kScale);
+    const V w13 = load(d, weights + 7 * kScale);
+    const V w20 = load(d, weights + 8 * kScale);
+    const V w21 = load(d, weights + 9 * kScale);
+    const V w22 = load(d, weights + 10 * kScale);
+    const V w23 = load(d, weights + 11 * kScale);
+    const V w30 = load(d, weights + 12 * kScale);
+    const V w31 = load(d, weights + 13 * kScale);
+    const V w32 = load(d, weights + 14 * kScale);
+    const V w33 = load(d, weights + 15 * kScale);
+
+    // Same, but wyxH has mod_x=i+4.
+    const V w00H = load(d, weights + 0 * kScale + d.N);
+    const V w01H = load(d, weights + 1 * kScale + d.N);
+    const V w02H = load(d, weights + 2 * kScale + d.N);
+    const V w03H = load(d, weights + 3 * kScale + d.N);
+    const V w10H = load(d, weights + 4 * kScale + d.N);
+    const V w11H = load(d, weights + 5 * kScale + d.N);
+    const V w12H = load(d, weights + 6 * kScale + d.N);
+    const V w13H = load(d, weights + 7 * kScale + d.N);
+    const V w20H = load(d, weights + 8 * kScale + d.N);
+    const V w21H = load(d, weights + 9 * kScale + d.N);
+    const V w22H = load(d, weights + 10 * kScale + d.N);
+    const V w23H = load(d, weights + 11 * kScale + d.N);
+    const V w30H = load(d, weights + 12 * kScale + d.N);
+    const V w31H = load(d, weights + 13 * kScale + d.N);
+    const V w32H = load(d, weights + 14 * kScale + d.N);
+    const V w33H = load(d, weights + 15 * kScale + d.N);
+
+    // We'll load 5 input values from these (clamped) coordinates.
+    const int64_t in_x = InFromOut(out_x, mod_x);
+    const int64_t in_x0 = wrap_x(in_x + 0, in_xsize);
+    const int64_t in_x1 = wrap_x(in_x + 1, in_xsize);
+    const int64_t in_x2 = wrap_x(in_x + 2, in_xsize);
+    const int64_t in_x3 = wrap_x(in_x + 3, in_xsize);
+    const int64_t in_x4 = wrap_x(in_x + 4, in_xsize);
+
+    const V v00 = set1(d, row_t3[in_x0]);
+    const V v01 = set1(d, row_t3[in_x1]);
+    const V v02 = set1(d, row_t3[in_x2]);
+    const V v03 = set1(d, row_t3[in_x3]);
+    const V v04 = set1(d, row_t3[in_x4]);
+    const V v10 = set1(d, row_t2[in_x0]);
+    const V v11 = set1(d, row_t2[in_x1]);
+    const V v12 = set1(d, row_t2[in_x2]);
+    const V v13 = set1(d, row_t2[in_x3]);
+    const V v14 = set1(d, row_t2[in_x4]);
+    const V v20 = set1(d, row_t[in_x0]);
+    const V v21 = set1(d, row_t[in_x1]);
+    const V v22 = set1(d, row_t[in_x2]);
+    const V v23 = set1(d, row_t[in_x3]);
+    const V v24 = set1(d, row_t[in_x4]);
+    const V v30 = set1(d, row_m[in_x0]);
+    const V v31 = set1(d, row_m[in_x1]);
+    const V v32 = set1(d, row_m[in_x2]);
+    const V v33 = set1(d, row_m[in_x3]);
+    const V v34 = set1(d, row_m[in_x4]);
+
+    const V m00 = v00 * w00;
+    const V m01 = v01 * w01;
+    const V m02 = v02 * w02;
+    const V m03 = v03 * w03;
+    const V n00 = v01 * w00H;
+    const V n01 = v02 * w01H;
+    const V n02 = v03 * w02H;
+    const V n03 = v04 * w03H;
+
+    const V m10 = mul_add(v10, w10, m00);
+    const V m11 = mul_add(v11, w11, m01);
+    const V m12 = mul_add(v12, w12, m02);
+    const V m13 = mul_add(v13, w13, m03);
+    const V n10 = mul_add(v11, w10H, n00);
+    const V n11 = mul_add(v12, w11H, n01);
+    const V n12 = mul_add(v13, w12H, n02);
+    const V n13 = mul_add(v14, w13H, n03);
+
+    const V m20 = mul_add(v20, w20, m10);
+    const V m21 = mul_add(v21, w21, m11);
+    const V m22 = mul_add(v22, w22, m12);
+    const V m23 = mul_add(v23, w23, m13);
+    const V n20 = mul_add(v21, w20H, n10);
+    const V n21 = mul_add(v22, w21H, n11);
+    const V n22 = mul_add(v23, w22H, n12);
+    const V n23 = mul_add(v24, w23H, n13);
+
+    const V m30 = mul_add(v30, w30, m20);
+    const V m31 = mul_add(v31, w31, m21);
+    const V m32 = mul_add(v32, w32, m22);
+    const V m33 = mul_add(v33, w33, m23);
+    const V n30 = mul_add(v31, w30H, n20);
+    const V n31 = mul_add(v32, w31H, n21);
+    const V n32 = mul_add(v33, w32H, n22);
+    const V n33 = mul_add(v34, w33H, n23);
+
+    const V sum0 = (m30 + m31) + (m32 + m33);
+    const V sum1 = (n30 + n31) + (n32 + n33);
+    store(sum0, d, row_out + out_x);
+    store(sum1, d, row_out + out_x + d.N);
+#else
+    weights += (out_x % kScale);
+    ProduceSingle(out_x + 0, row_t3, row_t2, row_t, row_m, in_xsize, wrap_x,
+                  weights + 0, row_out);
+    ProduceSingle(out_x + 1, row_t3, row_t2, row_t, row_m, in_xsize, wrap_x,
+                  weights + 1, row_out);
+#endif
+  }
+
+ private:
+#if SIMD_TARGET_VALUE == SIMD_NONE
+  template <class WrapX>
+  static SIMD_ATTR PIK_INLINE void ProduceSingle(
+      const size_t out_x, const float* PIK_RESTRICT row_t2,
+      const float* PIK_RESTRICT row_t, const float* PIK_RESTRICT row_m,
+      const float* PIK_RESTRICT row_b, const size_t in_xsize,
+      const WrapX wrap_x, const float* PIK_RESTRICT weights,
+      float* PIK_RESTRICT row_out) {
+    const D d;
+    const int64_t mod_x = out_x % kScale;
+
+    // We'll load 4 input values from these (clamped) coordinates.
+    const int64_t in_x = InFromOut(out_x, mod_x);
+    const int64_t in_x0 = wrap_x(in_x + 0, in_xsize);
+    const int64_t in_x1 = wrap_x(in_x + 1, in_xsize);
+    const int64_t in_x2 = wrap_x(in_x + 2, in_xsize);
+    const int64_t in_x3 = wrap_x(in_x + 3, in_xsize);
+
+    const V v00 = set1(d, row_t2[in_x0]);
+    const V v01 = set1(d, row_t2[in_x1]);
+    const V v02 = set1(d, row_t2[in_x2]);
+    const V v03 = set1(d, row_t2[in_x3]);
+    // mod_y and mod_x have determined weights.
+    const V w00 = load(d, weights + 0 * kScale);
+    const V w01 = load(d, weights + 1 * kScale);
+    const V w02 = load(d, weights + 2 * kScale);
+    const V w03 = load(d, weights + 3 * kScale);
+    const V m00 = v00 * w00;
+    const V m01 = v01 * w01;
+    const V m02 = v02 * w02;
+    const V m03 = v03 * w03;
+
+    const V v10 = set1(d, row_t[in_x0]);
+    const V v11 = set1(d, row_t[in_x1]);
+    const V v12 = set1(d, row_t[in_x2]);
+    const V v13 = set1(d, row_t[in_x3]);
+    const V w10 = load(d, weights + 4 * kScale);
+    const V w11 = load(d, weights + 5 * kScale);
+    const V w12 = load(d, weights + 6 * kScale);
+    const V w13 = load(d, weights + 7 * kScale);
+    const V m10 = mul_add(v10, w10, m00);
+    const V m11 = mul_add(v11, w11, m01);
+    const V m12 = mul_add(v12, w12, m02);
+    const V m13 = mul_add(v13, w13, m03);
+
+    const V v20 = set1(d, row_m[in_x0]);
+    const V v21 = set1(d, row_m[in_x1]);
+    const V v22 = set1(d, row_m[in_x2]);
+    const V v23 = set1(d, row_m[in_x3]);
+    const V w20 = load(d, weights + 8 * kScale);
+    const V w21 = load(d, weights + 9 * kScale);
+    const V w22 = load(d, weights + 10 * kScale);
+    const V w23 = load(d, weights + 11 * kScale);
+    const V m20 = mul_add(v20, w20, m10);
+    const V m21 = mul_add(v21, w21, m11);
+    const V m22 = mul_add(v22, w22, m12);
+    const V m23 = mul_add(v23, w23, m13);
+
+    const V v30 = set1(d, row_b[in_x0]);
+    const V v31 = set1(d, row_b[in_x1]);
+    const V v32 = set1(d, row_b[in_x2]);
+    const V v33 = set1(d, row_b[in_x3]);
+    const V w30 = load(d, weights + 12 * kScale);
+    const V w31 = load(d, weights + 13 * kScale);
+    const V w32 = load(d, weights + 14 * kScale);
+    const V w33 = load(d, weights + 15 * kScale);
+    const V m30 = mul_add(v30, w30, m20);
+    const V m31 = mul_add(v31, w31, m21);
+    const V m32 = mul_add(v32, w32, m22);
+    const V m33 = mul_add(v33, w33, m23);
+    const V sum = (m30 + m31) + (m32 + m33);
+    store(sum, d, row_out + out_x);
+  }
+#endif
+};
+
+// Single-pass 8x cubic upsampling for not necessarily separable 6x6 kernels.
+class GeneralUpsampler8_6x6 : public Upsampler8Base<3, GeneralUpsampler8_6x6> {
+ public:
+  static constexpr size_t kWeightsPerModY = kWidth * kWidth * kScale;
+
+  // Extracts the required kind of weights from Kernel. Type-safe: compile error
+  // if kernel is unable to precompute non-separated weights.
+  template <class Kernel>
+  static PIK_INLINE const float* PIK_RESTRICT GetWeights(const Kernel& kernel) {
+    static_assert(Kernel::kRadius == kRadius, "kRadius mismatch");
+    return kernel.Weights2D();
+  }
+
+  // Stores 2 vectors of upsampled pixels to row_out + out_x. "weights" are
+  // the return value of GetWeights.
+  template <class WrapX>
+  static SIMD_ATTR PIK_INLINE void ProducePair(
+      const size_t out_x, const float* PIK_RESTRICT row_t3,
+      const float* PIK_RESTRICT row_t2, const float* PIK_RESTRICT row_t,
+      const float* PIK_RESTRICT row_m, const float* PIK_RESTRICT row_b,
+      const float* PIK_RESTRICT row_b2, const size_t in_xsize,
+      const WrapX wrap_x, const float* PIK_RESTRICT weights,
+      float* PIK_RESTRICT row_out) {
+    const D d;
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    const int64_t mod_x = 0;  // because 2 * d.N == 2 * kScale
+
+    // We'll load 8 input values from each row at these (wrapped) coordinates.
+    const int64_t in_x = InFromOut(out_x, mod_x);
+    const int64_t in_x0 = wrap_x(in_x + 0, in_xsize);
+    const int64_t in_x1 = wrap_x(in_x + 1, in_xsize);
+    const int64_t in_x2 = wrap_x(in_x + 2, in_xsize);
+    const int64_t in_x3 = wrap_x(in_x + 3, in_xsize);
+    const int64_t in_x4 = wrap_x(in_x + 4, in_xsize);
+    const int64_t in_x5 = wrap_x(in_x + 5, in_xsize);
+    const int64_t in_x6 = wrap_x(in_x + 6, in_xsize);
+    const int64_t in_x7 = wrap_x(in_x + 7, in_xsize);
+
+    // First row (tap_y = 0): multiply 6*8 weights by broadcasted inputs and
+    // begin adding them together:
+
+    // (set1 is faster than broadcast because it uses load ports, not port5.)
+    V in0 = set1(d, row_t3[in_x0]);
+    V in1 = set1(d, row_t3[in_x1]);
+    V in2 = set1(d, row_t3[in_x2]);
+    // w#[i] := weight for tap_x=# and mod_x=i (mod_y was already used to select
+    // the "weights" range). Reused once.
+    V w0 = load(d, weights + 0 * kScale);
+    // q := upper four = next pixel replicated 4x, lower four = current 4x.
+    // (port5 is underutilized; blendps should use ports 015 but IACA only
+    // shows 01. Using concat_lo_lo for some of these is still slower.)
+    V q0 = concat_hi_lo(in1, in0);
+    V q1 = concat_hi_lo(in2, in1);
+    // s := accumulators for inputs * weights; their sum is the first result.
+    V s0 = q0 * w0;
+    // t := accumulators for second output vector (same weights, shifted in#).
+    V t0 = q1 * w0;
+    V in3 = set1(d, row_t3[in_x3]);
+    V w1 = load(d, weights + 1 * kScale);
+    V q2 = concat_hi_lo(in3, in2);
+    V s1 = q1 * w1;
+    V t1 = q2 * w1;
+    V in4 = set1(d, row_t3[in_x4]);
+    V w2 = load(d, weights + 2 * kScale);
+    V q3 = concat_hi_lo(in4, in3);
+    V s2 = q2 * w2;
+    V t2 = q3 * w2;
+    V in5 = set1(d, row_t3[in_x5]);
+    V w3 = load(d, weights + 3 * kScale);
+    V q4 = concat_hi_lo(in5, in4);
+    V s3 = q3 * w3;
+    V t3 = q4 * w3;
+    V in6 = set1(d, row_t3[in_x6]);
+    V w4 = load(d, weights + 4 * kScale);
+    V q5 = concat_hi_lo(in6, in5);
+    V s4 = q4 * w4;
+    V t4 = q5 * w4;
+    V in7 = set1(d, row_t3[in_x7]);
+    V w5 = load(d, weights + 5 * kScale);
+    V q6 = concat_hi_lo(in7, in6);
+    // s/t0 already finished, take advantage of free add.
+    V s5 = mul_add(q5, w5, s0);
+    V t5 = mul_add(q6, w5, t0);
+
+    // This prevents spills, leading to a 1.8x speedup.
+    std::atomic_thread_fence(std::memory_order_release);
+
+    // Last 5 rows: multiply and accumulate into existing s/t.
+#define PIK_MUL_WEIGHTS_ACCUMULATE(p_row, p_weights) \
+  in0 = set1(d, p_row[in_x0]);                       \
+  in1 = set1(d, p_row[in_x1]);                       \
+  in2 = set1(d, p_row[in_x2]);                       \
+  w0 = load(d, p_weights + 0 * kScale);              \
+  q0 = concat_hi_lo(in1, in0);                       \
+  q1 = concat_hi_lo(in2, in1);                       \
+  s0 = mul_add(q0, w0, s1);                          \
+  t0 = mul_add(q1, w0, t1);                          \
+  in3 = set1(d, p_row[in_x3]);                       \
+  w1 = load(d, p_weights + 1 * kScale);              \
+  q2 = concat_hi_lo(in3, in2);                       \
+  s1 = mul_add(q1, w1, s2);                          \
+  t1 = mul_add(q2, w1, t2);                          \
+  in4 = set1(d, p_row[in_x4]);                       \
+  w2 = load(d, p_weights + 2 * kScale);              \
+  q3 = concat_hi_lo(in4, in3);                       \
+  s2 = mul_add(q2, w2, s3);                          \
+  t2 = mul_add(q3, w2, t3);                          \
+  in5 = set1(d, p_row[in_x5]);                       \
+  w3 = load(d, p_weights + 3 * kScale);              \
+  q4 = concat_hi_lo(in5, in4);                       \
+  s3 = mul_add(q3, w3, s4);                          \
+  t3 = mul_add(q4, w3, t4);                          \
+  in6 = set1(d, p_row[in_x6]);                       \
+  w4 = load(d, p_weights + 4 * kScale);              \
+  q5 = concat_hi_lo(in6, in5);                       \
+  s4 = mul_add(q4, w4, s5);                          \
+  t4 = mul_add(q5, w4, t5);                          \
+  in7 = set1(d, p_row[in_x7]);                       \
+  w5 = load(d, p_weights + 5 * kScale);              \
+  q6 = concat_hi_lo(in7, in6);                       \
+  s5 = mul_add(q5, w5, s0);                          \
+  t5 = mul_add(q6, w5, t0)
+
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_t2, weights + 6 * kScale);
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_t, weights + 12 * kScale);
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_m, weights + 18 * kScale);
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_b, weights + 24 * kScale);
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_b2, weights + 30 * kScale);
+#undef PIK_MUL_WEIGHTS_ACCUMULATE
+
+    // (s/t5 already include s/t0)
+    const V sum0 = (s1 + s2) + (s3 + s4) + s5;
+    const V sum1 = (t1 + t2) + (t3 + t4) + t5;
+    store(sum0, d, row_out + out_x);
+    store(sum1, d, row_out + out_x + d.N);
+#else
+    ProduceVector(out_x, row_t3, row_t2, row_t, row_m, row_b, row_b2, in_xsize,
+                  wrap_x, weights, row_out);
+    ProduceVector(out_x + d.N, row_t3, row_t2, row_t, row_m, row_b, row_b2,
+                  in_xsize, wrap_x, weights, row_out);
+#endif
+  }
+
+ private:
+#if SIMD_TARGET_VALUE != SIMD_AVX2
+  // If less than 8 lanes, produce a single output vector at a time because
+  // there is not much benefit from pairwise unrolling.
+  template <class WrapX>
+  static SIMD_ATTR PIK_INLINE void ProduceVector(
+      const size_t out_x, const float* PIK_RESTRICT row_t3,
+      const float* PIK_RESTRICT row_t2, const float* PIK_RESTRICT row_t,
+      const float* PIK_RESTRICT row_m, const float* PIK_RESTRICT row_b,
+      const float* PIK_RESTRICT row_b2, const size_t in_xsize,
+      const WrapX wrap_x, const float* PIK_RESTRICT weights,
+      float* PIK_RESTRICT row_out) {
+    const D d;
+    const int64_t mod_x = out_x % kScale;
+
+    // We'll load 6 input values from these (clamped) coordinates.
+    const int64_t in_x = InFromOut(out_x, mod_x);
+    const int64_t in_x0 = wrap_x(in_x + 0, in_xsize);
+    const int64_t in_x1 = wrap_x(in_x + 1, in_xsize);
+    const int64_t in_x2 = wrap_x(in_x + 2, in_xsize);
+    const int64_t in_x3 = wrap_x(in_x + 3, in_xsize);
+    const int64_t in_x4 = wrap_x(in_x + 4, in_xsize);
+    const int64_t in_x5 = wrap_x(in_x + 5, in_xsize);
+
+    // wyx[i] is the weight for tap_x=x, tap_y=y and mod_x=i (mod_y was already
+    // used to select the "weights" range). Reused once.
+    V w0 = load(d, weights + mod_x + 0 * kScale);
+    V w1 = load(d, weights + mod_x + 1 * kScale);
+    V w2 = load(d, weights + mod_x + 2 * kScale);
+    V w3 = load(d, weights + mod_x + 3 * kScale);
+    V w4 = load(d, weights + mod_x + 4 * kScale);
+    V w5 = load(d, weights + mod_x + 5 * kScale);
+
+    V q0 = set1(d, row_t3[in_x0]);
+    V q1 = set1(d, row_t3[in_x1]);
+    V q2 = set1(d, row_t3[in_x2]);
+    V q3 = set1(d, row_t3[in_x3]);
+    V q4 = set1(d, row_t3[in_x4]);
+    V q5 = set1(d, row_t3[in_x5]);
+
+    V s0 = q0 * w0;
+    V s1 = q1 * w1;
+    V s2 = q2 * w2;
+    V s3 = q3 * w3;
+    V s4 = q4 * w4;
+    V s5 = q5 * w5;
+
+#define PIK_MUL_WEIGHTS_ACCUMULATE(p_row, p_weights) \
+  q0 = set1(d, p_row[in_x0]);                        \
+  q1 = set1(d, p_row[in_x1]);                        \
+  q2 = set1(d, p_row[in_x2]);                        \
+  q3 = set1(d, p_row[in_x3]);                        \
+  q4 = set1(d, p_row[in_x4]);                        \
+  q5 = set1(d, p_row[in_x5]);                        \
+  w0 = load(d, p_weights + mod_x + 0 * kScale);      \
+  w1 = load(d, p_weights + mod_x + 1 * kScale);      \
+  w2 = load(d, p_weights + mod_x + 2 * kScale);      \
+  w3 = load(d, p_weights + mod_x + 3 * kScale);      \
+  w4 = load(d, p_weights + mod_x + 4 * kScale);      \
+  w5 = load(d, p_weights + mod_x + 5 * kScale);      \
+  s0 += q0 * w0;                                     \
+  s1 += q1 * w1;                                     \
+  s2 += q2 * w2;                                     \
+  s3 += q3 * w3;                                     \
+  s4 += q4 * w4;                                     \
+  s5 += q5 * w5
+
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_t2, weights + 6 * kScale);
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_t, weights + 12 * kScale);
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_m, weights + 18 * kScale);
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_b, weights + 24 * kScale);
+    PIK_MUL_WEIGHTS_ACCUMULATE(row_b2, weights + 30 * kScale);
+#undef PIK_MUL_WEIGHTS_ACCUMULATE
+
+    const V sum = (s0 + s1) + (s2 + s3) + (s4 + s5);
+    store(sum, d, row_out + out_x);
+  }
+#endif
+};
+
+// (Possibly) multithreaded, variable scale.
+template <size_t N, class Image, class Executor, class Kernel>
+PIK_NOINLINE SIMD_ATTR void Upsample(const Executor executor, const Image& in,
+                                     const Kernel& kernel, Image* out) {
+  if (N == 8) {
+    Upsample<GeneralUpsampler8_6x6>(executor, in, kernel, out);
+  } else {
+    Upsample<slow::GeneralUpsampler<N> >(executor, in, kernel, out);
+  }
+}
+
+}  // namespace pik
+
+#endif  // PIK_RESAMPLE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/resize.h b/codec/L2/demos/pikEnc/host/pik/resize.h
new file mode 100755
index 0000000000..b032b693c7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/resize.h
@@ -0,0 +1,531 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_RESIZE_H_
+#define PIK_RESIZE_H_
+
+#include <stddef.h>
+
+#include "pik/common.h"
+#include "pik/image.h"
+#include "pik/status.h"
+#include "pik/upscaler.h"
+
+namespace pik {
+
+// Ideally, this is 1.0f, but butteraugli likes bigger values (me too),
+// like 1.25 for 2x resampling and 1.5 for 4x resampling.
+constexpr float kUpdateScale = 1.0f;
+
+namespace {
+class Columns {
+ public:
+  Columns(float* start) : start_(start) {}
+
+  float Read(size_t i) const { return start_[i]; }
+
+  void Write(size_t i, float value) const { start_[i] = value; }
+
+  float* start_;
+};
+
+class EvenColumns {
+ public:
+  EvenColumns(float* start) : start_(start) {}
+
+  float Read(size_t i) const { return start_[2 * i]; }
+
+  void Write(size_t i, float value) const { start_[2 * i] = value; }
+
+  float* start_;
+};
+
+class OddColumns {
+ public:
+  OddColumns(float* start) : start_(start) {}
+
+  float Read(size_t i) const { return start_[2 * i + 1]; }
+
+  void Write(size_t i, float value) const { start_[2 * i + 1] = value; }
+
+  float* start_;
+};
+
+class Rows {
+ public:
+  Rows(float* start, size_t stride) : start_(start), stride_(stride) {}
+
+  float Read(size_t i) const { return start_[stride_ * i]; }
+
+  void Write(size_t i, float value) const { start_[stride_ * i] = value; }
+
+  float* start_;
+  size_t stride_;
+};
+
+class EvenRows {
+ public:
+  EvenRows(float* start, size_t stride) : start_(start), stride_(stride) {}
+
+  float Read(size_t i) const { return start_[2 * i * stride_]; }
+
+  void Write(size_t i, float value) const { start_[2 * i * stride_] = value; }
+
+  float* start_;
+  size_t stride_;
+};
+
+class OddRows {
+ public:
+  OddRows(float* start, size_t stride) : start_(start), stride_(stride) {}
+
+  float Read(size_t i) const { return start_[(2 * i + 1) * stride_]; }
+
+  void Write(size_t i, float value) const {
+    start_[(2 * i + 1) * stride_] = value;
+  }
+
+  float* start_;
+  size_t stride_;
+};
+
+// TODO(user): SIMDify all the row/column processing.
+
+template <class From, class To>
+void F2(size_t n, const From& from, const To& to, float* tmp) {
+  PIK_ASSERT(n > 8);
+  constexpr float alpha[9] = {// a = (3 - sqrt(8)); [a; -a, a^2, -a^3, ...]
+                              0.1715728752538099023966225736f,
+                              -0.1715728752538099023966225736f,
+                              0.02943725152285941437973531699f,
+                              -0.005050633883346583881789307292f,
+                              0.0008665517772200889110005228959f,
+                              -0.0001486767799739495842138294335f,
+                              0.00002550890262360859428245360423f,
+                              -0.000004376635767701981480892173763f,
+                              0.0000007509119826032946028994350462f};
+
+  constexpr float mul = 0.5857864376269049511983113385f;  // (4 * a) / (1 + a)
+
+  float y_last = from.Read(0);
+  for (size_t i = 1; i <= 8; ++i) y_last += from.Read(i - 1) * alpha[i];
+  tmp[0] = y_last;
+  for (size_t i = 1; i < n; ++i) {
+    float y_current = from.Read(i) - alpha[0] * y_last;
+    tmp[i] = y_current;
+    y_last = y_current;
+  }
+
+  y_last = from.Read(n - 1);
+  for (size_t i = 1; i <= 8; ++i) y_last += from.Read(n - i) * alpha[i];
+  float x_last = from.Read(n - 1);
+  to.Write(n - 1, mul * (tmp[n - 1] + y_last));
+  for (size_t i = n - 2; i < n; --i) {
+    float y_current = x_last - alpha[0] * y_last;
+    x_last = from.Read(i);
+    to.Write(i, mul * (tmp[i] + y_current));
+    y_last = y_current;
+  }
+}
+
+template <class From, class To>
+void Phi2(size_t n, const From& from, const To& to, float* tmp) {
+  F2(n, from, to, tmp);
+  for (size_t i = n - 1; i > 0; --i) to.Write(i, 0.5f * to.Read(i - 1));
+  to.Write(0, 0.5f * to.Read(0));
+}
+
+void SubsampleRow2(size_t n, float* from, float* to, float* tmp) {
+  PIK_ASSERT(n > 8);
+  PIK_ASSERT(n % 2 == 0);
+  size_t n2 = n / 2;
+  float* tmp2 = tmp + n2;
+  F2(n2, EvenColumns(from), Columns(tmp2), tmp);
+  for (size_t i = 0; i < n2; ++i) tmp2[i] = from[2 * i + 1] - tmp2[i];
+  Phi2(n2, Columns(tmp2), Columns(tmp2), tmp);
+  for (size_t i = 0; i < n2; ++i) to[i] = from[2 * i] + kUpdateScale * tmp2[i];
+}
+
+void SubsampleColumn2(size_t n, float* from, size_t from_stride, float* to,
+                      size_t to_stride, float* tmp) {
+  PIK_ASSERT(n > 8);
+  PIK_ASSERT(n % 2 == 0);
+  size_t n2 = n / 2;
+  float* tmp2 = tmp + n2;
+  F2(n2, EvenRows(from, from_stride), Columns(tmp2), tmp);
+  for (size_t i = 0; i < n2; ++i) {
+    tmp2[i] = from[from_stride * (2 * i + 1)] - tmp2[i];
+  }
+  Phi2(n2, Columns(tmp2), Columns(tmp2), tmp);
+  for (size_t i = 0; i < n2; ++i) {
+    to[i * to_stride] = from[from_stride * (2 * i)] + kUpdateScale * tmp2[i];
+  }
+}
+
+void UpsampleRow2(size_t n, float* from, float* to, float* tmp) {
+  PIK_ASSERT(n > 8);
+  for (size_t i = n - 1; i < n; --i) {
+    to[2 * i] = from[i];
+  }
+  F2(n, EvenColumns(to), OddColumns(to), tmp);
+}
+
+void UpsampleColumn2(size_t n, float* from, size_t from_stride, float* to,
+                     size_t to_stride, float* tmp) {
+  PIK_ASSERT(n > 8);
+  for (size_t i = n - 1; i < n; --i) {
+    to[to_stride * (2 * i)] = from[from_stride * i];
+  }
+  F2(n, EvenRows(to, to_stride), OddRows(to, to_stride), tmp);
+}
+
+constexpr float subL[5] = {-0.1477632789908915, 0.6043134178154527,
+                           0.6017439248475215, -0.06092538825140858,
+                           0.00263132457932580};
+
+constexpr float subR[5] = {0.14335798104235847, -0.23802370655991587,
+                           0.12502505370893394, 0.7550194876366351,
+                           0.214621184171988};
+
+template <class From, class To>
+void Subsample32(size_t n, const From& from, const To& to) {
+  PIK_ASSERT(n % 3 == 0);
+  size_t n3 = n / 3;
+  {
+    float l = from.Read(0);
+    float x0 = from.Read(0);
+    float x1 = from.Read(1);
+    float x2 = from.Read(2);
+    float r = from.Read(3);
+    to.Write(0, l * subL[0] + x0 * subL[1] + x1 * subL[2] + x2 * subL[3] +
+                    r * subL[4]);
+    to.Write(1, l * subR[0] + x0 * subR[1] + x1 * subR[2] + x2 * subR[3] +
+                    r * subR[4]);
+  }
+  for (size_t i = 1; i < n3 - 1; ++i) {
+    size_t f = i * 3;
+    size_t t = i * 2;
+    float l = from.Read(f - 1);
+    float x0 = from.Read(f + 0);
+    float x1 = from.Read(f + 1);
+    float x2 = from.Read(f + 2);
+    float r = from.Read(f + 3);
+    to.Write(t + 0, l * subL[0] + x0 * subL[1] + x1 * subL[2] + x2 * subL[3] +
+                        r * subL[4]);
+    to.Write(t + 1, l * subR[0] + x0 * subR[1] + x1 * subR[2] + x2 * subR[3] +
+                        r * subR[4]);
+  }
+  {
+    float l = from.Read(n - 4);
+    float x0 = from.Read(n - 3);
+    float x1 = from.Read(n - 2);
+    float x2 = from.Read(n - 1);
+    float r = from.Read(n - 1);
+    to.Write(2 * n3 - 2, l * subL[0] + x0 * subL[1] + x1 * subL[2] +
+                             x2 * subL[3] + r * subL[4]);
+    to.Write(2 * n3 - 1, l * subR[0] + x0 * subR[1] + x1 * subR[2] +
+                             x2 * subR[3] + r * subR[4]);
+  }
+}
+
+constexpr float upL[4] = {0.38757486500910365, 0.7620777552137453,
+                          -0.22848886478020333, 0.0788362445573544};
+constexpr float upC[4] = {-0.11860750280548209, 0.868540521473126,
+                          0.3507502137687898, -0.100683232436434};
+constexpr float upR[4] = {-0.06717404018363016, 0.13550763911335584,
+                          1.0687743540167292, -0.137107952946455};
+
+template <class From, class To>
+void Upsample23(size_t n, const From& from, const To& to) {
+  PIK_ASSERT(n % 2 == 0);
+  size_t n2 = n / 2;
+  {
+    float l = from.Read(n - 3);
+    float x0 = from.Read(n - 2);
+    float x1 = from.Read(n - 1);
+    float r = from.Read(n - 1);
+    to.Write(3 * n2 - 3, l * upL[0] + x0 * upL[1] + x1 * upL[2] + r * upL[3]);
+    to.Write(3 * n2 - 2, l * upC[0] + x0 * upC[1] + x1 * upC[2] + r * upC[3]);
+    to.Write(3 * n2 - 1, l * upR[0] + x0 * upR[1] + x1 * upR[2] + r * upR[3]);
+  }
+  for (size_t i = n2 - 2; i > 0; --i) {
+    size_t f = i * 2;
+    size_t t = i * 3;
+    float l = from.Read(f - 1);
+    float x0 = from.Read(f + 0);
+    float x1 = from.Read(f + 1);
+    float r = from.Read(f + 2);
+    to.Write(t + 0, l * upL[0] + x0 * upL[1] + x1 * upL[2] + r * upL[3]);
+    to.Write(t + 1, l * upC[0] + x0 * upC[1] + x1 * upC[2] + r * upC[3]);
+    to.Write(t + 2, l * upR[0] + x0 * upR[1] + x1 * upR[2] + r * upR[3]);
+  }
+  {
+    float l = from.Read(0);
+    float x0 = from.Read(0);
+    float x1 = from.Read(1);
+    float r = from.Read(2);
+    to.Write(0, l * upL[0] + x0 * upL[1] + x1 * upL[2] + r * upL[3]);
+    to.Write(1, l * upC[0] + x0 * upC[1] + x1 * upC[2] + r * upC[3]);
+    to.Write(2, l * upR[0] + x0 * upR[1] + x1 * upR[2] + r * upR[3]);
+  }
+}
+
+}  // namespace
+
+static inline Image3F DownsampleImage32(Image3F& src) {
+  size_t w = src.xsize();
+  size_t h = src.ysize();
+  PIK_ASSERT(w % 3 == 0);
+  PIK_ASSERT(h % 3 == 0);
+
+  Image3F dst((w / 3) * 2, (h / 3) * 2);
+
+  for (int c = 0; c < 3; ++c) {
+    for (size_t x = 0; x < w; ++x) {
+      Subsample32(h, Rows(src.PlaneRow(c, 0) + x, src.PixelsPerRow()),
+                  Rows(src.PlaneRow(c, 0) + x, src.PixelsPerRow()));
+    }
+  }
+  h = (h / 3) * 2;
+
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < h; ++y) {
+      Subsample32(w, Columns(src.PlaneRow(c, y)), Columns(dst.PlaneRow(c, y)));
+    }
+  }
+  w = (w * 3) / 2;
+
+  return dst;
+}
+
+static inline Image3F UpsampleImage23(Image3F& src, size_t orig_xsize,
+                                      size_t orig_ysize) {
+  PIK_ASSERT(orig_xsize % 3 == 0);
+  PIK_ASSERT(orig_ysize % 3 == 0);
+  size_t w = (orig_xsize / 3) * 2;
+  size_t h = (orig_ysize / 3) * 2;
+  PIK_ASSERT(w <= src.xsize());
+  PIK_ASSERT(h <= src.ysize());
+
+  Image3F dst((w / 2) * 3, (h / 2) * 3);
+
+  for (int c = 0; c < 3; ++c) {
+    for (size_t y = 0; y < h; ++y) {
+      Upsample23(w, Columns(src.PlaneRow(c, y)), Columns(dst.PlaneRow(c, y)));
+    }
+  }
+  w = (w / 2) * 3;
+
+  for (int c = 0; c < 3; ++c) {
+    for (size_t x = 0; x < w; ++x) {
+      Upsample23(h, Rows(dst.PlaneRow(c, 0) + x, dst.PixelsPerRow()),
+                 Rows(dst.PlaneRow(c, 0) + x, dst.PixelsPerRow()));
+    }
+  }
+  h = (h / 2) * 3;
+
+  // dst = Blur(dst, 0.66666f);
+
+  return dst;
+}
+
+static inline Image3F DownsampleImage2N(Image3F& src, size_t factor) {
+  size_t w = src.xsize();
+  size_t h = src.ysize();
+  PIK_ASSERT(w % factor == 0);
+  PIK_ASSERT(h % factor == 0);
+
+  Image3F dst(w / factor, h / factor);
+  std::vector<float> tmp_storage(std::max(w, h));
+  float* tmp = &tmp_storage[0];
+
+  if (factor >= 2) {
+    for (int c = 0; c < 3; ++c) {
+      for (size_t x = 0; x < w; ++x) {
+        SubsampleColumn2(h, src.PlaneRow(c, 0) + x, src.PixelsPerRow(),
+                         src.PlaneRow(c, 0) + x, src.PixelsPerRow(), tmp);
+      }
+    }
+    h /= 2;
+
+    if (factor >= 4) {
+      for (int c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < h; ++y) {
+          SubsampleRow2(w, src.PlaneRow(c, y), src.PlaneRow(c, y), tmp);
+        }
+      }
+      w /= 2;
+
+      for (int c = 0; c < 3; ++c) {
+        for (size_t x = 0; x < w; ++x) {
+          SubsampleColumn2(h, src.PlaneRow(c, 0) + x, src.PixelsPerRow(),
+                           src.PlaneRow(c, 0) + x, src.PixelsPerRow(), tmp);
+        }
+      }
+      h /= 2;
+    }
+
+    for (int c = 0; c < 3; ++c) {
+      for (size_t y = 0; y < h; ++y) {
+        SubsampleRow2(w, src.PlaneRow(c, y), dst.PlaneRow(c, y), tmp);
+      }
+    }
+    w /= 2;
+  }
+
+  return dst;
+}
+
+static inline Image3F UpsampleImage2N(Image3F& src, size_t factor,
+                                      size_t orig_xsize, size_t orig_ysize) {
+  PIK_ASSERT(orig_xsize % factor == 0);
+  PIK_ASSERT(orig_ysize % factor == 0);
+  size_t w = orig_xsize / factor;
+  size_t h = orig_ysize / factor;
+  PIK_ASSERT(w <= src.xsize());
+  PIK_ASSERT(h <= src.ysize());
+  Image3F dst(w * factor, h * factor);
+  std::vector<float> tmp_storage(factor * std::max(w, h));
+  float* tmp = &tmp_storage[0];
+
+  if (factor >= 2) {
+    for (int c = 0; c < 3; ++c) {
+      for (size_t y = 0; y < h; ++y) {
+        UpsampleRow2(w, src.PlaneRow(c, y), dst.PlaneRow(c, y), tmp);
+      }
+    }
+    w *= 2;
+
+    if (factor >= 4) {
+      for (int c = 0; c < 3; ++c) {
+        for (size_t x = 0; x < w; ++x) {
+          UpsampleColumn2(h, dst.PlaneRow(c, 0) + x, dst.PixelsPerRow(),
+                          dst.PlaneRow(c, 0) + x, dst.PixelsPerRow(), tmp);
+        }
+      }
+      h *= 2;
+
+      for (int c = 0; c < 3; ++c) {
+        for (size_t y = 0; y < h; ++y) {
+          UpsampleRow2(w, dst.PlaneRow(c, y), dst.PlaneRow(c, y), tmp);
+        }
+      }
+      w *= 2;
+    }
+
+    for (int c = 0; c < 3; ++c) {
+      for (size_t x = 0; x < w; ++x) {
+        UpsampleColumn2(h, dst.PlaneRow(c, 0) + x, dst.PixelsPerRow(),
+                        dst.PlaneRow(c, 0) + x, dst.PixelsPerRow(), tmp);
+      }
+    }
+    h *= 2;
+  }
+
+  return dst;
+}
+
+static inline Image3F PadImage(const Image3F& in, size_t min_padding,
+                               size_t factor) {
+  const size_t xsize = DivCeil(in.xsize() + 2 * min_padding, factor) * factor;
+  const size_t ysize = DivCeil(in.ysize() + 2 * min_padding, factor) * factor;
+  const size_t left_padding = (xsize - in.xsize()) / 2;
+  const size_t top_padding = (ysize - in.ysize()) / 2;
+  Image3F out(xsize, ysize);
+  for (int c = 0; c < 3; ++c) {
+    for (int y = 0; y < in.ysize(); ++y) {
+      const float* PIK_RESTRICT row_in = in.ConstPlaneRow(c, y);
+      float* PIK_RESTRICT row_out = out.PlaneRow(c, y + top_padding);
+      memcpy(row_out + left_padding, row_in, in.xsize() * sizeof(row_in[0]));
+      const float firstval = row_in[0];
+      for (int x = 0; x < left_padding; ++x) {
+        row_out[x] = firstval;
+      }
+      const float lastval = row_in[in.xsize() - 1];
+      for (int x = in.xsize() + left_padding; x < xsize; ++x) {
+        row_out[x] = lastval;
+      }
+    }
+
+    for (int y = 0; y < top_padding; ++y) {
+      const float* PIK_RESTRICT row_in = out.ConstPlaneRow(c, top_padding);
+      float* PIK_RESTRICT row_out = out.PlaneRow(c, y);
+      memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
+    }
+    const int lastrow = in.ysize() + top_padding - 1;
+    for (int y = lastrow + 1; y < ysize; ++y) {
+      const float* PIK_RESTRICT row_in = out.ConstPlaneRow(c, lastrow);
+      float* PIK_RESTRICT row_out = out.PlaneRow(c, y);
+      memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
+    }
+  }
+  return out;
+}
+
+static inline Image3F UnpadImage(const Image3F& in, size_t min_padding,
+                                 size_t factor, size_t orig_xsize,
+                                 size_t orig_ysize) {
+  PIK_ASSERT(in.xsize() % factor == 0);
+  PIK_ASSERT(in.ysize() % factor == 0);
+  const size_t left_padding = (in.xsize() - orig_xsize) / 2;
+  const size_t top_padding = (in.ysize() - orig_ysize) / 2;
+  Image3F out(orig_xsize, orig_ysize);
+  for (int c = 0; c < 3; ++c) {
+    for (int y = 0; y < orig_ysize; ++y) {
+      const float* PIK_RESTRICT row_in =
+          in.ConstPlaneRow(c, y + top_padding) + left_padding;
+      float* PIK_RESTRICT row_out = out.PlaneRow(c, y);
+      memcpy(row_out, row_in, orig_xsize * sizeof(row_in[0]));
+    }
+  }
+  return out;
+}
+
+static inline uint32_t ResizePadding(size_t factor2) { return 1u; }
+
+static inline ImageSize DownsampledImageSize(ImageSize src, size_t factor2) {
+  PIK_ASSERT(factor2 == 2 || factor2 == 3 || factor2 == 4 || factor2 == 8);
+  ImageSize dst;
+  uint32_t min_padding = ResizePadding(factor2);
+  if (factor2 == 2) {
+    dst.xsize = src.xsize;
+    dst.ysize = src.ysize;
+  } else if (factor2 == 3) {
+    dst.xsize = DivCeil(src.xsize + 2u * min_padding, 3u) * 2u;
+    dst.ysize = DivCeil(src.ysize + 2u * min_padding, 3u) * 2u;
+  } else {  // 4 or 8
+    uint32_t factor = factor2 / 2;
+    dst.xsize = DivCeil(src.xsize + 2u * min_padding, factor);
+    dst.ysize = DivCeil(src.ysize + 2u * min_padding, factor);
+  }
+  return dst;
+}
+
+static inline Image3F DownsampleImage(Image3F& src, size_t factor2) {
+  PIK_ASSERT(factor2 == 3 || factor2 == 4 || factor2 == 8);
+  size_t min_padding = ResizePadding(factor2);
+  size_t factor = (factor2 == 3) ? 3 : (factor2 / 2);
+  Image3F padded = PadImage(src, min_padding, factor);
+  return (factor2 == 3) ? DownsampleImage32(padded)
+                        : DownsampleImage2N(padded, factor);
+}
+
+static inline Image3F UpsampleImage(Image3F& src, size_t orig_xsize,
+                                    size_t orig_ysize, size_t factor2) {
+  PIK_ASSERT(factor2 == 3 || factor2 == 4 || factor2 == 8);
+  size_t factor = (factor2 == 3) ? 3 : (factor2 / 2);
+  size_t min_padding = ResizePadding(factor2);
+  size_t padded_xsize = DivCeil(orig_xsize + 2 * min_padding, factor) * factor;
+  size_t padded_ysize = DivCeil(orig_ysize + 2 * min_padding, factor) * factor;
+  Image3F upsampled =
+      (factor2 == 3) ? UpsampleImage23(src, padded_xsize, padded_ysize)
+                     : UpsampleImage2N(src, factor, padded_xsize, padded_ysize);
+  return UnpadImage(upsampled, min_padding, factor, orig_xsize, orig_ysize);
+}
+
+}  // namespace pik
+
+#endif  // PIK_RESIZE_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/robust_statistics.h b/codec/L2/demos/pikEnc/host/pik/robust_statistics.h
new file mode 100755
index 0000000000..ba6e883fb3
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/robust_statistics.h
@@ -0,0 +1,349 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_ROBUST_STATISTICS_H_
+#define PIK_ROBUST_STATISTICS_H_
+
+// Robust statistics: Mode, Median, MedianAbsoluteDeviation.
+
+#include <stddef.h>
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "pik/arch_specific.h"
+#include "pik/compiler_specific.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// Round up for integers
+template <class T, typename std::enable_if<
+                       std::numeric_limits<T>::is_integer>::type* = nullptr>
+inline T Half(T x) {
+  return (x + 1) / 2;
+}
+
+// Mul is faster than div.
+template <class T, typename std::enable_if<
+                       !std::numeric_limits<T>::is_integer>::type* = nullptr>
+inline T Half(T x) {
+  return x * 0.5;
+}
+
+// Returns the median value. Side effect: values <= median will appear before,
+// values >= median after the middle index.
+// Guarantees average speed O(num_values).
+template <typename T>
+T Median(T* samples, const size_t num_samples) {
+  PIK_ASSERT(num_samples != 0);
+  std::nth_element(samples, samples + num_samples / 2, samples + num_samples);
+  T result = samples[num_samples / 2];
+  // If even size, find largest element in the partially sorted vector to
+  // use as second element to average with
+  if ((num_samples & 1) == 0) {
+    T biggest = *std::max_element(samples, samples + num_samples / 2);
+    result = Half(result + biggest);
+  }
+  return result;
+}
+
+template <typename T>
+T Median(std::vector<T>* samples) {
+  return Median(samples->data(), samples->size());
+}
+
+template <typename T>
+static inline T Median3(const T a, const T b, const T c) {
+  return std::max(std::min(a, b), std::min(c, std::max(a, b)));
+}
+
+template <typename T>
+static inline T Median5(const T a, const T b, const T c, const T d, const T e) {
+  return Median3(e, std::max(std::min(a, b), std::min(c, d)),
+                 std::min(std::max(a, b), std::max(c, d)));
+}
+
+// Returns a robust measure of variability.
+template <typename T>
+T MedianAbsoluteDeviation(const T* samples, const size_t num_samples,
+                          const T median) {
+  PIK_ASSERT(num_samples != 0);
+  std::vector<T> abs_deviations;
+  abs_deviations.reserve(num_samples);
+  for (size_t i = 0; i < num_samples; ++i) {
+    abs_deviations.push_back(std::abs(samples[i] - median));
+  }
+  return Median(&abs_deviations);
+}
+
+template <typename T>
+T MedianAbsoluteDeviation(const std::vector<T>& samples, const T median) {
+  return MedianAbsoluteDeviation(samples.data(), samples.size(), median);
+}
+
+// Half{Range/Sample}Mode are implementations of "Robust estimators of the mode
+// and skewness of continuous data". The mode is less affected by outliers in
+// highly-skewed distributions than the median.
+
+// Robust estimator of the mode for data given as sorted values.
+// O(N*logN), N=num_values.
+class HalfSampleMode {
+ public:
+  // Returns mode. "sorted" must be in ascending order.
+  template <typename T>
+  T operator()(const T* const PIK_RESTRICT sorted,
+               const size_t num_values) const {
+    int64_t center = num_values / 2;
+    int64_t width = num_values;
+
+    // Zoom in on modal intervals of decreasing width. Stop before we reach
+    // width=1, i.e. single values, for which there is no "slope".
+    while (width > 2) {
+      // Round up so we can still reach the outer edges of odd widths.
+      width = Half(width);
+
+      center = CenterOfIntervalWithMinSlope(sorted, num_values, center, width);
+    }
+
+    return sorted[center];  // mode := middle value in modal interval.
+  }
+
+ private:
+  // Returns center of the densest region [c-radius, c+radius].
+  template <typename T>
+  static PIK_INLINE int64_t CenterOfIntervalWithMinSlope(
+      const T* PIK_RESTRICT sorted, const int64_t total_values,
+      const int64_t center, const int64_t width) {
+    const int64_t radius = Half(width);
+
+    auto compute_slope = [radius, total_values, sorted](
+                             int64_t c, int64_t* actual_center = nullptr) {
+      // For symmetry, check 2*radius+1 values, i.e. [min, max].
+      const int64_t min = std::max(c - radius, int64_t(0));
+      const int64_t max = std::min(c + radius, total_values - 1);
+      PIK_ASSERT(min < max);
+      PIK_ASSERT(sorted[min] <=
+                 sorted[max] + std::numeric_limits<float>::epsilon());
+      const float dx = max - min + 1;
+      const float slope = (sorted[max] - sorted[min]) / dx;
+
+      if (actual_center != nullptr) {
+        // c may be out of bounds, so return center of the clamped bounds.
+        *actual_center = Half(min + max);
+      }
+      return slope;
+    };
+
+    // First find min_slope for all centers.
+    float min_slope = std::numeric_limits<float>::max();
+    for (int64_t c = center - radius; c <= center + radius; ++c) {
+      min_slope = std::min(min_slope, compute_slope(c));
+    }
+
+    // Candidates := centers with slope ~= min_slope.
+    std::vector<int64_t> candidates;
+    for (int64_t c = center - radius; c <= center + radius; ++c) {
+      int64_t actual_center;
+      const float slope = compute_slope(c, &actual_center);
+      if (slope <= min_slope * 1.001f) {
+        candidates.push_back(actual_center);
+      }
+    }
+
+    // Keep the median.
+    PIK_ASSERT(!candidates.empty());
+    if (candidates.size() == 1) return candidates[0];
+    return Median(&candidates);
+  }
+};
+
+// Robust estimator of the mode for data given as a CDF.
+// O(N*logN), N=num_bins.
+class HalfRangeMode {
+ public:
+  // Returns mode expressed as a histogram bin index. "cdf" must be weakly
+  // monotonically increasing, e.g. from std::partial_sum.
+  int operator()(const uint32_t* PIK_RESTRICT cdf,
+                 const size_t num_bins) const {
+    int center = num_bins / 2;
+    int width = num_bins;
+
+    // Zoom in on modal intervals of decreasing width. Stop before we reach
+    // width=1, i.e. original bins, because those are noisy.
+    while (width > 2) {
+      // Round up so we can still reach the outer edges of odd widths.
+      width = Half(width);
+
+      center = CenterOfIntervalWithMaxDensity(cdf, num_bins, center, width);
+    }
+
+    return center;  // mode := midpoint of modal interval.
+  }
+
+ private:
+  // Returns center of the densest interval [c-radius, c+radius].
+  static PIK_INLINE int CenterOfIntervalWithMaxDensity(
+      const uint32_t* PIK_RESTRICT cdf, const int total_bins, const int center,
+      const int width) {
+    const int radius = Half(width);
+
+    auto compute_density = [radius, total_bins, cdf](
+                               int c, int* actual_center = nullptr) {
+      // For symmetry, check 2*radius+1 bins, i.e. [min, max].
+      const int min = std::max(c - radius, 1);  // for -1 below
+      const int max = std::min(c + radius, total_bins - 1);
+      PIK_ASSERT(min < max);
+      PIK_ASSERT(cdf[min] <= cdf[max - 1]);
+      const int num_bins = max - min + 1;
+      // Sum over [min, max] == CDF(max) - CDF(min-1).
+      const float density = float(cdf[max] - cdf[min - 1]) / num_bins;
+
+      if (actual_center != nullptr) {
+        // c may be out of bounds, so take center of the clamped bounds.
+        *actual_center = Half(min + max);
+      }
+      return density;
+    };
+
+    // First find max_density for all centers.
+    float max_density = 0.0f;
+    for (int c = center - radius; c <= center + radius; ++c) {
+      max_density = std::max(max_density, compute_density(c));
+    }
+
+    // Candidates := centers with density ~= max_density.
+    std::vector<int> candidates;
+    for (int c = center - radius; c <= center + radius; ++c) {
+      int actual_center;
+      const float density = compute_density(c, &actual_center);
+      if (density >= max_density * 0.999f) {
+        candidates.push_back(actual_center);
+      }
+    }
+
+    // Keep the median.
+    PIK_ASSERT(!candidates.empty());
+    if (candidates.size() == 1) return candidates[0];
+    return Median(&candidates);
+  }
+};
+
+// Sorts integral values in ascending order. About 3x faster than std::sort for
+// input distributions with very few unique values.
+template <class T>
+void CountingSort(T* begin, T* end) {
+  // Unique values and their frequency (similar to flat_map).
+  using Unique = std::pair<T, int>;
+  std::vector<Unique> unique;
+  for (const T* p = begin; p != end; ++p) {
+    const T value = *p;
+    const auto pos =
+        std::find_if(unique.begin(), unique.end(),
+                     [value](const Unique& u) { return u.first == value; });
+    if (pos == unique.end()) {
+      unique.push_back(std::make_pair(*p, 1));
+    } else {
+      ++pos->second;
+    }
+  }
+
+  // Sort in ascending order of value (pair.first).
+  std::sort(unique.begin(), unique.end());
+
+  // Write that many copies of each unique value to the array.
+  T* PIK_RESTRICT p = begin;
+  for (const auto& value_count : unique) {
+    std::fill(p, p + value_count.second, value_count.first);
+    p += value_count.second;
+  }
+  PIK_ASSERT(p == end);
+}
+
+struct Bivariate {
+  Bivariate(float x, float y) : x(x), y(y) {}
+  float x;
+  float y;
+};
+
+class Line {
+ public:
+  constexpr Line(const float slope, const float intercept)
+      : slope_(slope), intercept_(intercept) {}
+
+  constexpr float slope() const { return slope_; }
+  constexpr float intercept() const { return intercept_; }
+
+  // Robust line fit using Siegel's repeated-median algorithm.
+  explicit Line(const std::vector<Bivariate>& points) {
+    const size_t N = points.size();
+    // This straightforward N^2 implementation is OK for small N.
+    PIK_ASSERT(N < 10 * 1000);
+
+    // One for every point i.
+    std::vector<float> medians;
+    medians.reserve(N);
+
+    // One for every j != i. Never cleared to avoid reallocation.
+    std::vector<float> slopes(N - 1);
+
+    for (size_t i = 0; i < N; ++i) {
+      // Index within slopes[] (avoids the hole where j == i).
+      size_t idx_slope = 0;
+
+      for (size_t j = 0; j < N; ++j) {
+        if (j == i) continue;
+
+        const float dy = points[j].y - points[i].y;
+        const float dx = points[j].x - points[i].x;
+        PIK_ASSERT(std::abs(dx) > 1E-7f);  // x must be distinct
+        slopes[idx_slope++] = dy / dx;
+      }
+      PIK_ASSERT(idx_slope == N - 1);
+
+      const float median = Median(&slopes);
+      medians.push_back(median);
+    }
+
+    slope_ = Median(&medians);
+
+    // Solve for intercept, overwriting medians[].
+    for (size_t i = 0; i < N; ++i) {
+      medians[i] = points[i].y - slope_ * points[i].x;
+    }
+    intercept_ = Median(&medians);
+  }
+
+  constexpr float operator()(float x) const { return x * slope_ + intercept_; }
+
+ private:
+  float slope_;
+  float intercept_;
+};
+
+static inline void EvaluateQuality(const Line& line,
+                                   const std::vector<Bivariate>& points,
+                                   float* PIK_RESTRICT max_l1,
+                                   float* PIK_RESTRICT median_abs_deviation) {
+  // For computing median_abs_deviation.
+  std::vector<float> abs_deviations;
+  abs_deviations.reserve(points.size());
+
+  *max_l1 = 0.0f;
+  for (const Bivariate& point : points) {
+    const float l1 = std::abs(line(point.x) - point.y);
+    *max_l1 = std::max(*max_l1, l1);
+    abs_deviations.push_back(l1);
+  }
+
+  *median_abs_deviation = Median(&abs_deviations);
+}
+
+}  // namespace pik
+
+#endif  // PIK_ROBUST_STATISTICS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/saliency_map.cc b/codec/L2/demos/pikEnc/host/pik/saliency_map.cc
new file mode 100755
index 0000000000..fc08b4c8ea
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/saliency_map.cc
@@ -0,0 +1,102 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include <unistd.h>
+
+#include <cstdio>
+#include <string>
+
+#include "pik/saliency_map.h"
+
+#include "pik/bit_reader.h"
+#include "pik/headers.h"
+#include "pik/os_specific.h"
+#include "pik/pik_frame.h"
+#include "pik/pik_info.h"
+#include "pik/single_image_handler.h"
+
+namespace pik {
+
+const char* const kPartialSuffix = ".partial.png";
+const char* const kHeatmapSuffix = ".heatmap.pgm";
+
+namespace {
+
+Status ProduceSaliencyMapWithoutCleanup(const CompressParams& cparams,
+                                        const PaddedBytes* compressed,
+                                        const CodecInOut* io, ThreadPool* pool,
+                                        std::shared_ptr<ImageF>* out_heatmap) {
+  DecompressParams dparams;
+  BitReader reader(compressed->data(), compressed->size());
+  FileHeader container;
+  PikInfo aux_out;
+  CodecInOut io_partial(io->Context());
+  PIK_RETURN_IF_ERROR(ReadFileHeader(&reader, &container));
+  SingleImageManager transform;
+  // TODO(user): Replace resynthesis below with using a GetDecodedPass()
+  // method.
+  // Cannot rely on transform.IsLastPass() here, since we process a
+  // partially-compressed image.
+  const int kNumStepsAvailable = 2;  // DC and Low frequency.
+  for (int num_pass = 0; num_pass < kNumStepsAvailable; num_pass++) {
+    PIK_RETURN_IF_ERROR(PikPassToPixels(dparams, *compressed, container, pool,
+                                        &reader, &io_partial, &aux_out,
+                                        &transform));
+  }
+  const std::string filename_partially_constructed_image =
+      std::string(cparams.file_out) + kPartialSuffix;
+  const std::string filename_heatmap =
+      std::string(cparams.file_out) + kHeatmapSuffix;
+
+  if (!io_partial.EncodeToFile(io->dec_c_original,
+                               io->original_bits_per_sample(),
+                               filename_partially_constructed_image, pool))
+    return false;
+  if (!RunCommand({cparams.saliency_extractor_for_progressive_mode,
+                   std::to_string(kBlockDim),
+                   cparams.file_in,
+                   filename_partially_constructed_image,
+                   filename_heatmap}))
+    return false;
+
+  CodecInOut io_heatmap(io->Context());
+  if (!io_heatmap.SetFromFile(filename_heatmap, pool)) {
+    fprintf(stderr, "Failed to read heatmap: %s\n", filename_heatmap.c_str());
+    return false;
+  }
+  if (cparams.verbose) {
+    printf("Read heatmap: xsize=%zu ysize=%zu is_gray=%d\n",
+           io_heatmap.xsize(), io_heatmap.ysize(), io_heatmap.IsGray());
+  }
+  out_heatmap->reset(new ImageF(io_heatmap.xsize(),
+                                io_heatmap.ysize()));
+  for (size_t num_row = 0; num_row < io_heatmap.ysize(); num_row++) {
+    const auto row_src = io_heatmap.color().PlaneRow(0, num_row);
+    const auto row_dst = (*out_heatmap)->Row(num_row);
+    for (size_t num_col = 0; num_col < io_heatmap.xsize(); num_col++) {
+      row_dst[num_col] = row_src[num_col] / 255.0;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+Status ProduceSaliencyMap(const CompressParams& cparams,
+                          const PaddedBytes* compressed, const CodecInOut* io,
+                          ThreadPool* pool,
+                          std::shared_ptr<ImageF>* out_heatmap) {
+  Status ret = ProduceSaliencyMapWithoutCleanup(cparams, compressed, io, pool,
+                                                out_heatmap);
+  if (!cparams.keep_tempfiles) {
+    // Ignore (benign) failures.
+    unlink((std::string(cparams.file_out) + kPartialSuffix).c_str());
+    unlink((std::string(cparams.file_out) + kHeatmapSuffix).c_str());
+  }
+  return ret;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/saliency_map.h b/codec/L2/demos/pikEnc/host/pik/saliency_map.h
new file mode 100755
index 0000000000..7775ed2218
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/saliency_map.h
@@ -0,0 +1,27 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SALIENCY_MAP_H_
+#define PIK_SALIENCY_MAP_H_
+
+#include <memory>
+
+#include "pik/codec.h"
+#include "pik/image.h"
+#include "pik/padded_bytes.h"
+#include "pik/pik_params.h"
+#include "pik/status.h"
+
+namespace pik {
+
+Status ProduceSaliencyMap(const CompressParams& cparams,
+                          const PaddedBytes* compressed, const CodecInOut* io,
+                          ThreadPool* pool,
+                          std::shared_ptr<ImageF>* out_heatmap);
+
+}  // namespace pik
+
+#endif  // PIK_SALIENCY_MAP_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/LICENSE b/codec/L2/demos/pikEnc/host/pik/simd/LICENSE
new file mode 100755
index 0000000000..1e58380948
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/LICENSE
@@ -0,0 +1,20 @@
+Copyright 2019 Google LLC
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/Makefile b/codec/L2/demos/pikEnc/host/pik/simd/Makefile
new file mode 100755
index 0000000000..d68caf2dad
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/Makefile
@@ -0,0 +1,39 @@
+# Copyright 2019 Google LLC
+#
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+
+override CPPFLAGS += -I../..
+override CXXFLAGS += -std=c++11 -Wall -O3 -fPIC -fsanitize=address
+override LDFLAGS += $(CXXFLAGS)
+
+all: $(addprefix bin/, simd_test)
+
+obj/targets.o: targets.cc
+	@mkdir -p obj
+	$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) targets.cc -o obj/targets.o
+
+obj/simd_test.o: simd_test.cc
+	@mkdir -p obj
+	$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) simd_test.cc -o obj/simd_test.o
+
+.DELETE_ON_ERROR:
+deps.mk: $(wildcard *.cc) $(wildcard *.h) Makefile
+	set -eu; for file in *.cc; do \
+		target=obj/$${file##*/}; target=$${target%.*}.o; \
+		$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) -MM -MT \
+		"$$target" "$$file"; \
+	done | sed -e ':b' -e 's-../[^./]*/--' -e 'tb' >$@
+-include deps.mk
+
+bin/simd_test: $(addprefix obj/, targets.o simd_test.o)
+	@mkdir -p bin
+	$(CXX) $(LDFLAGS) $^ -o $@
+
+clean:
+	[ ! -d obj ] || $(RM) -r -- obj/
+	[ ! -d bin ] || $(RM) -r -- bin/
+	[ ! -d lib ] || $(RM) -r -- lib/
+
+.PHONY: clean all
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/README.md b/codec/L2/demos/pikEnc/host/pik/simd/README.md
new file mode 100755
index 0000000000..8f1e86af0a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/README.md
@@ -0,0 +1,314 @@
+## Efficient and portable SIMD wrapper
+
+This library provides type-safe and source-code portable wrappers over existing
+platform-specific intrinsics. Its design aims for simplicity, efficiency and
+immediate usability with current compilers.
+
+## Current status
+
+Implemented for scalar/SSE4/AVX2/ARMv8 targets, each with unit tests.
+
+A [quick-reference page](g3doc/quick_reference.md) briefly lists all operations
+and their parameters.
+
+
+`make -j8 && bin/simd_test`
+
+## Design philosophy
+
+*   Performance is important but not the sole consideration. Anyone who goes to
+    the trouble of using SIMD clearly cares about speed. However, portability,
+    maintainability and readability also matter, otherwise we would write in
+    assembly. We aim for performance within 10-20% of a hand-written assembly
+    implementation on the development platform.
+
+*   The guiding principles of C++ are "pay only for what you use" and "leave no
+    room for a lower-level language below C++". We apply these by defining a
+    SIMD API that ensures operation costs are visible, predictable and minimal.
+
+*   Performance portability is important, i.e. the API should be efficient on
+    all target platforms. Unfortunately, common idioms for one platform can be
+    inefficient on others. For example: summing lanes horizontally versus
+    shuffling. Documenting which operations are expensive does not prevent their
+    use, as evidenced by widespread use of `HADDPS`. Performance acceptance
+    tests may detect large regressions, but do not help choose the approach
+    during initial development. Analysis tools can warn about some potential
+    inefficiencies, but likely not all. We instead provide [a carefully chosen
+    set of vector types and operations that are efficient on all target
+    platforms][instmtx] (PPC8, SSE4/AVX2+, ARMv8), plus some useful but less
+    performance-portable operations in an `ext` namespace to make their cost
+    visible.
+
+*   Future SIMD hardware features are difficult to predict. For example, AVX2
+    came with surprising semantics (almost no interaction between 128-bit
+    halves) and AVX-512 added two kinds of predicates (writemask and zeromask).
+    To ensure the API reflects hardware realities, we suggest a flexible
+    approach that adds new operations as they become commonly available.
+
+*   Masking is not yet widely supported on current CPUs. It is difficult to
+    define an interface that provides access to all platform features while
+    retaining performance portability. The P0214R5 proposal lacks support for
+    AVX-512/ARM SVE zeromasks. We suggest standardizing masking only after the
+    community has gained more experience with it.
+
+*   "Width-agnostic" SIMD is more future-proof than user-specified fixed sizes.
+    For example, valarray-like code can iterate over a 1D array with a
+    library-specified vector width. This will result in better code when vector
+    sizes increase, and matches the direction taken by
+    [ARM SVE](https://alastairreid.github.io/papers/sve-ieee-micro-2017.pdf) and
+    RiscV hardware as well as Agner Fog's
+    [ForwardCom instruction set proposal](https://goo.gl/CFizWu). However, some
+    applications may require fixed sizes, so we also guarantee support for
+    128-bit vectors in each instruction set.
+
+*   The API and its implementation should be usable and efficient with commonly
+    used compilers. Some of our open-source users cannot upgrade, so we need to
+    support ~4 year old compilers. For example, we write `shift_left<3>(v)`
+    instead of `v << 3` because MSVC 2017 (ARM64) does not propagate the literal
+    (https://godbolt.org/g/rKx5Ga). However, we do require function-specific
+    target attributes, supported by GCC 4.9 / Clang 3.9 / MSVC 2015.
+
+*   Efficient and safe runtime dispatch is important. Modules such as image or
+    video codecs are typically embedded into larger applications such as
+    browsers, so they cannot require separate binaries for each CPU. Libraries
+    also cannot predict whether the application already uses AVX2 (and pays the
+    frequency throttling cost), so this decision must be left to the
+    application. Using only the lowest-common denominator instructions
+    sacrifices too much performance.
+    Therefore, we need to provide code paths for multiple instruction sets and
+    choose the most suitable at runtime. To reduce overhead, dispatch should be
+    hoisted to higher layers instead of checking inside every low-level
+    function. Generating each code path from the same source reduces
+    implementation and debugging cost.
+
+*   Not every CPU need be supported. For example, pre-SSE4.1 CPUs are increasingly
+    rare and the AVX instruction set is limited to floating-point operations.
+    To reduce code size and compile time, we provide specializations for SSE4,
+    AVX2 and AVX-512 instruction sets on x86.
+
+*   Access to platform-specific intrinsics is necessary for acceptance in
+    performance-critical projects. We provide conversions to and from intrinsics
+    to allow utilizing specialized platform-specific functionality such as
+    `MPSADBW`, and simplify incremental porting of existing code.
+
+*   The core API should be compact and easy to learn. We provide only the few
+    dozen operations which are necessary and sufficient for most of the 150+
+    SIMD applications we examined.
+
+## Differences versus [P0214R5 proposal](https://goo.gl/zKW4SA)
+
+1.  Adding widely used and portable operations such as `andnot`, `average`,
+    bit-shift by immediates and `select`.
+
+1.  Adding the concept of vector 'parts', which are often used in existing ARM
+    and x86 code.
+
+1.  Designing the API to avoid or minimize overhead on AVX2/AVX-512 caused by
+    crossing 128-bit 'block' boundaries.
+
+1.  Avoiding the need for non-native vectors. By contrast, P0214R5's `simd_cast`
+    returns `fixed_size<>` vectors which are more expensive to access because
+    they reside on the stack. We can avoid this plus additional overhead on
+    ARM/AVX2 by defining width-expanding operations as functions of a vector
+    part, e.g. promoting half a vector of `uint8_t` lanes to one full vector of
+    `uint16_t`, or demoting full vectors to half vectors with half-width lanes.
+
+1.  Guaranteeing access to the underlying intrinsic vector type. This ensures
+    all platform-specific capabilities can be used. P0214R5 instead only
+    'encourages' implementations to provide an access.
+
+1.  Enabling safe runtime dispatch and inlining in the same binary. P0214R5 is
+    based on the Vc library, which does not provide assistance for linking
+    multiple instruction sets into the same binary. The Vc documentation
+    suggests compiling separate executables for each instruction set or using
+    GCC's ifunc (indirect functions). The latter is compiler-specific and risks
+    crashes due to ODR violations when compiling the same function with
+    different compiler flags. We solve this problem via target-specific
+    attributes (see HOWTO section below).
+
+1.  Using built-in PPC vector types without a wrapper class. This leads to much
+    better code generation with GCC 6.3: https://godbolt.org/z/pd2PNP.
+    By contrast, P0214R5 requires a wrapper. We avoid this by using only the
+    member operators provided by the PPC vectors; all other functions and
+    typedefs are non-members.
+
+*   Omitting inefficient or non-performance-portable operations such as `hmax`,
+    `operator[]`, and unsupported integer comparisons. Applications can often
+    replace these operations at lower cost than emulating them.
+
+*   Omitting `long double` types: these are not commonly available in hardware.
+
+*   Ensuring signed integer overflow has well-defined semantics (wraparound).
+
+*   Simple header-only implementation and less than a tenth of the size of the
+    Vc library from which P0214 was derived (98,000 lines in
+    https://github.com/VcDevel/Vc according to the gloc Chrome extension).
+
+*   Avoiding hidden performance costs. P0214R5 allows implicit conversions from
+    integer to float, which costs 3-4 cycles on x86. We make these conversions
+    explicit to ensure their cost is visible.
+
+## Prior API designs
+
+The author has been writing SIMD code since 2002: first via assembly language,
+then intrinsics, later Intel's `F32vec4` wrapper, followed by three generations
+of custom vector classes. The first used macros to generate the classes, which
+reduces duplication but also readability. The second used templates instead.
+The third (used in highwayhash and PIK) added support for AVX2 and runtime
+dispatch. The current design enables code generation for multiple platforms
+and/or instruction sets from the same source, and improves runtime dispatch.
+
+## Other related work
+
+*   [Neat SIMD](http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7568423)
+    adopts a similar approach with interchangeable vector/scalar types and
+    a compact interface. It allows access to the underlying intrinsics, but
+    does not appear to be designed for other platforms than x86.
+
+*   UME::SIMD ([code](https://goo.gl/yPeVZx), [paper](https://goo.gl/2xpZrk))
+    also adopts an explicit vectorization model with vector classes.
+    However, it exposes the union of all platform capabilities, which makes the
+    API harder to learn (209-page spec) and implement (the estimated LOC count
+    is [500K](https://goo.gl/1THFRi)). The API is less performance-portable
+    because it allows applications to use operations that are inefficient on
+    other platforms.
+
+*   Inastemp ([code](https://goo.gl/hg3USM), [paper](https://goo.gl/YcTU7S))
+    is a vector library for scientific computing with some innovative features:
+    automatic FLOPS counting, and "if/else branches" using lambda functions.
+    It supports IBM Power8, but only provides float and double types.
+
+### Overloaded function API
+
+Most C++ vector APIs rely on class templates. However, two PPC compilers
+including GCC 6.3 generate inefficient code for classes with a SIMD vector
+member: an [extra load/store for every function argument/return
+value](https://godbolt.org/z/pd2PNP). To avoid this overhead, we use built-in
+vector types on PPC. These provide overloaded arithmetic operators but do not
+allow member functions/typedefs such as `size()` or `value_type`. We instead
+rely on overloaded functions.
+
+Because full vectors and parts are synonyms on PPC, we need an additional tag
+argument for disambiguation. Any function template with multiple return types
+uses a descriptor argument to specify the return type. For example, the return
+type of `setzero(Desc<T, N, Target>)` is `Desc<T, N, Target>::V`. For brevity,
+`Desc` is abbreviated to `D` for template arguments and `d` in lvalues.
+
+It may seem preferable to write `setzero<D>()` rather than `setzero(D())`, but
+there are technical difficulties. We prefer generic implementations where
+possible rather than overloading for every single `T`. Because C++ does not
+allow partial specialization of function templates, we need multiple overloads:
+one primary template per target. Thus, functions cannot be invoked using
+template syntax. Can we instead add a wrapper function template that calls the
+appropriate overload? Unfortunately, the compiler mechanism for avoiding
+dangerous per-file `-mavx2` requires per-function annotations, and these
+attributes are not generic. Thus, a wrapper into which SIMD functions are
+inlined cannot be a function, because it would also need a target-specific
+attribute. A macro `SETZERO(D)` could work, but this is hardly more clear than a
+normal function with arguments. Note that descriptors occur often, so user code
+can define a `const SIMD_FULL(float) d;` and then write `setzero(d)`.
+
+## Use cases and HOWTO
+
+Applications may rely on 128-bit vectors, e.g. `SIMD_PART(float, 4)::V`, or
+preferably use vectors of unspecified size `SIMD_FULL(float)::V`.
+
+*   Single instruction set per platform: use normal C++ functions with
+    `SIMD_ATTR` annotation.
+
+*   Runtime dispatch: move target-specific code into a separate file
+    (unlike .inc, .cctest extension leads to syntax highlighting). Specialize
+    an `operator()<SIMD_TARGET>` in this file; any helper functions must reside
+    in `namespace SIMD_NAMESPACE` to avoid ODR violations. All functions must
+    still be prefixed with `SIMD_ATTR`. Include the file for each target via
+    `foreach_target.h`. Call via `Dispatch`, choosing the 'best' available
+    target for the current CPU via `TargetBitfield.Best()`.
+
+## Demos
+
+To compile on Unix systems: `make -j8`. We tested with GCC 7.3.0.
+
+`bin/simd_test` prints a bitfield of instruction sets that were
+tested, e.g. `6` for SSE4=`4` and AVX2=`2`. The demo compiles the same source
+file once per enabled instruction set.
+
+## Example source code
+
+```c++
+void FloorLog2(const uint8_t* SIMD_RESTRICT values,
+               uint8_t* SIMD_RESTRICT log2) {
+  // Descriptors for all required data types:
+  const SIMD_FULL(int32_t) d32;
+  const SIMD_FULL(float) df;
+  const SIMD_PART(uint8_t, d32.N) d8;
+
+  const auto u8 = load(d8, values);
+  const auto bits = cast_to(d32, convert_to(df, convert_to(d32, u8)));
+  const auto exponent = shift_right<23>(bits) - set1(d32, 127);
+  store(convert_to(d8, exponent), d8, log2);
+}
+```
+
+This generates the following SSE4 and AVX2 code, as shown by IACA:
+
+```
+ p0  p1  p5
+|   |   | 1 | CP | pmovzxbd xmm1, dword [rsp+0x25c]
+|   | 1 |   |    | cvtdq2ps xmm1, xmm1
+| 1 |   |   |    | psrad xmm1, 0x17
+|   | 1 |   |    | paddd xmm1, xmm0
+|   |   | 1 | CP | packusdw xmm1, xmm0
+|   |   | 1 | CP | packuswb xmm1, xmm0
+|   |   |   |    | movd [rsp+0x45c], xmm1
+
+|   |   | 1 | CP | vpmovzxbd ymm1, qword [rsp+0x228]
+|   | 1 |   |    | vcvtdq2ps ymm1, ymm1
+| 1 |   |   |    | vpsrad ymm1, ymm1, 0x17
+|   | 1 |   |    | vpaddd ymm1, ymm1, ymm0
+|   |   | 1 | CP | vpackusdw ymm1, ymm1, ymm0
+|   |   | 1 | CP | vpermq ymm1, ymm1, 0xe8
+|   |   | 1 | CP | vpackuswb xmm1, xmm1, xmm0
+|   |   |   |    | vmovq [rsp+0x448], xmm1
+```
+
+```c++
+void Copy(const uint8_t* SIMD_RESTRICT from, const size_t size,
+          uint8_t* SIMD_RESTRICT to) {
+  // Width-agnostic (library-specified N)
+  const SIMD_FULL(uint8_t) d;
+  const Scalar<uint8_t> ds;
+  size_t i = 0;
+  for (; i + d.N <= size; i += d.N) {
+    const auto bytes = load(d, from + i);
+    store(bytes, d, to + i);
+  }
+
+  for (; i < size; ++i) {
+    // (Same loop body as above, could factor into a shared template)
+    const auto bytes = load(ds, from + i);
+    store(bytes, ds, to + i);
+  }
+}
+```
+
+```c++
+void MulAdd(const T* SIMD_RESTRICT mul_array, const T* SIMD_RESTRICT add_array,
+            const size_t size, T* SIMD_RESTRICT x_array) {
+  // Type-agnostic (caller-specified lane type) and width-agnostic (uses
+  // best available instruction set).
+  const SIMD_FULL(T) d;
+  for (size_t i = 0; i < size; i += d.N) {
+    const auto mul = load(d, mul_array + i);
+    const auto add = load(d, add_array + i);
+    auto x = load(d, x_array + i);
+    x = mul_add(mul, x, add);
+    store(x, d, x_array + i);
+  }
+}
+```
+
+## Additional resources
+
+*   [Overview of instructions per operation on different architectures][instmtx]
+
+[instmtx]: instruction_matrix.pdf
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/arch.h b/codec/L2/demos/pikEnc/host/pik/simd/arch.h
new file mode 100755
index 0000000000..ad707c934e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/arch.h
@@ -0,0 +1,29 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_ARCH_H_
+#define PIK_SIMD_ARCH_H_
+
+// Sets SIMD_ARCH to one of the following based on predefined macros:
+
+#define SIMD_ARCH_X86 8
+#define SIMD_ARCH_PPC 9
+#define SIMD_ARCH_ARM 0xA
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define SIMD_ARCH SIMD_ARCH_X86
+
+#elif defined(__powerpc64__) || defined(_M_PPC)
+#define SIMD_ARCH SIMD_ARCH_PPC
+
+#elif defined(__aarch64__)
+#define SIMD_ARCH SIMD_ARCH_ARM
+
+#else
+#error "Unsupported platform"
+#endif
+
+#endif  // PIK_SIMD_ARCH_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/arm64_neon.h b/codec/L2/demos/pikEnc/host/pik/simd/arm64_neon.h
new file mode 100755
index 0000000000..63bd509b84
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/arm64_neon.h
@@ -0,0 +1,2643 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_ARM64_NEON_H_
+#define PIK_SIMD_ARM64_NEON_H_
+
+// 128-bit ARM64 NEON vectors and operations.
+
+#include "pik/simd/compiler_specific.h"
+#include "pik/simd/shared.h"
+#include "pik/simd/targets.h"
+
+#if SIMD_ENABLE & SIMD_ARM8
+#include <arm_neon.h>
+
+namespace pik {
+
+template <typename T, size_t N>
+struct raw_arm8;
+
+// 128
+template <>
+struct raw_arm8<uint8_t, 16> {
+  using type = uint8x16_t;
+};
+
+template <>
+struct raw_arm8<uint16_t, 8> {
+  using type = uint16x8_t;
+};
+
+template <>
+struct raw_arm8<uint32_t, 4> {
+  using type = uint32x4_t;
+};
+
+template <>
+struct raw_arm8<uint64_t, 2> {
+  using type = uint64x2_t;
+};
+
+template <>
+struct raw_arm8<int8_t, 16> {
+  using type = int8x16_t;
+};
+
+template <>
+struct raw_arm8<int16_t, 8> {
+  using type = int16x8_t;
+};
+
+template <>
+struct raw_arm8<int32_t, 4> {
+  using type = int32x4_t;
+};
+
+template <>
+struct raw_arm8<int64_t, 2> {
+  using type = int64x2_t;
+};
+
+template <>
+struct raw_arm8<float, 4> {
+  using type = float32x4_t;
+};
+
+template <>
+struct raw_arm8<double, 2> {
+  using type = float64x2_t;
+};
+
+// 64
+template <>
+struct raw_arm8<uint8_t, 8> {
+  using type = uint8x8_t;
+};
+
+template <>
+struct raw_arm8<uint16_t, 4> {
+  using type = uint16x4_t;
+};
+
+template <>
+struct raw_arm8<uint32_t, 2> {
+  using type = uint32x2_t;
+};
+
+template <>
+struct raw_arm8<uint64_t, 1> {
+  using type = uint64x1_t;
+};
+
+template <>
+struct raw_arm8<int8_t, 8> {
+  using type = int8x8_t;
+};
+
+template <>
+struct raw_arm8<int16_t, 4> {
+  using type = int16x4_t;
+};
+
+template <>
+struct raw_arm8<int32_t, 2> {
+  using type = int32x2_t;
+};
+
+template <>
+struct raw_arm8<int64_t, 1> {
+  using type = int64x1_t;
+};
+
+template <>
+struct raw_arm8<float, 2> {
+  using type = float32x2_t;
+};
+
+template <>
+struct raw_arm8<double, 1> {
+  using type = float64x1_t;
+};
+
+// 32 (same as 64)
+template <>
+struct raw_arm8<uint8_t, 4> {
+  using type = uint8x8_t;
+};
+
+template <>
+struct raw_arm8<uint16_t, 2> {
+  using type = uint16x4_t;
+};
+
+template <>
+struct raw_arm8<uint32_t, 1> {
+  using type = uint32x2_t;
+};
+
+template <>
+struct raw_arm8<int8_t, 4> {
+  using type = int8x8_t;
+};
+
+template <>
+struct raw_arm8<int16_t, 2> {
+  using type = int16x4_t;
+};
+
+template <>
+struct raw_arm8<int32_t, 1> {
+  using type = int32x2_t;
+};
+
+template <>
+struct raw_arm8<float, 1> {
+  using type = float32x2_t;
+};
+
+// Returned by set_table_indices for use by table_lookup_lanes.
+template <typename T>
+struct permute_sse4 {
+  uint8x16_t raw;
+};
+
+template <typename T, size_t N = ARM8::NumLanes<T>()>
+class vec_arm8 {
+  using Raw = typename raw_arm8<T, N>::type;
+
+ public:
+  SIMD_INLINE vec_arm8() {}
+  vec_arm8(const vec_arm8&) = default;
+  vec_arm8& operator=(const vec_arm8&) = default;
+  SIMD_INLINE explicit vec_arm8(const Raw raw) : raw(raw) {}
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  SIMD_INLINE vec_arm8& operator*=(const vec_arm8 other) {
+    return *this = (*this * other);
+  }
+  SIMD_INLINE vec_arm8& operator/=(const vec_arm8 other) {
+    return *this = (*this / other);
+  }
+  SIMD_INLINE vec_arm8& operator+=(const vec_arm8 other) {
+    return *this = (*this + other);
+  }
+  SIMD_INLINE vec_arm8& operator-=(const vec_arm8 other) {
+    return *this = (*this - other);
+  }
+  SIMD_INLINE vec_arm8& operator&=(const vec_arm8 other) {
+    return *this = (*this & other);
+  }
+  SIMD_INLINE vec_arm8& operator|=(const vec_arm8 other) {
+    return *this = (*this | other);
+  }
+  SIMD_INLINE vec_arm8& operator^=(const vec_arm8 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+template <typename T, size_t N>
+struct VecT<T, N, ARM8> {
+  using type = vec_arm8<T, N>;
+};
+
+using u8x16 = vec_arm8<uint8_t, 16>;
+using u16x8 = vec_arm8<uint16_t, 8>;
+using u32x4 = vec_arm8<uint32_t, 4>;
+using u64x2 = vec_arm8<uint64_t, 2>;
+using i8x16 = vec_arm8<int8_t, 16>;
+using i16x8 = vec_arm8<int16_t, 8>;
+using i32x4 = vec_arm8<int32_t, 4>;
+using i64x2 = vec_arm8<int64_t, 2>;
+using f32x4 = vec_arm8<float, 4>;
+using f64x2 = vec_arm8<double, 2>;
+
+using u8x8 = vec_arm8<uint8_t, 8>;
+using u16x4 = vec_arm8<uint16_t, 4>;
+using u32x2 = vec_arm8<uint32_t, 2>;
+using u64x1 = vec_arm8<uint64_t, 1>;
+using i8x8 = vec_arm8<int8_t, 8>;
+using i16x4 = vec_arm8<int16_t, 4>;
+using i32x2 = vec_arm8<int32_t, 2>;
+using i64x1 = vec_arm8<int64_t, 1>;
+using f32x2 = vec_arm8<float, 2>;
+using f64x1 = vec_arm8<double, 1>;
+
+using u8x4 = vec_arm8<uint8_t, 4>;
+using u16x2 = vec_arm8<uint16_t, 2>;
+using u32x1 = vec_arm8<uint32_t, 1>;
+using i8x4 = vec_arm8<int8_t, 4>;
+using i16x2 = vec_arm8<int16_t, 2>;
+using i32x1 = vec_arm8<int32_t, 1>;
+using f32x1 = vec_arm8<float, 1>;
+
+// ------------------------------ Cast
+
+// cast_to_u8
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<uint8_t, N> v) {
+  return v;
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<uint16_t, N / 2> v) {
+  return vec_arm8<uint8_t, N>(vreinterpretq_u8_u16(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<uint32_t, N / 4> v) {
+  return vec_arm8<uint8_t, N>(vreinterpretq_u8_u32(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<uint64_t, N / 8> v) {
+  return vec_arm8<uint8_t, N>(vreinterpretq_u8_u64(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<int8_t, N> v) {
+  return vec_arm8<uint8_t, N>(vreinterpretq_u8_s8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<int16_t, N / 2> v) {
+  return vec_arm8<uint8_t, N>(vreinterpretq_u8_s16(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<int32_t, N / 4> v) {
+  return vec_arm8<uint8_t, N>(vreinterpretq_u8_s32(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<int64_t, N / 8> v) {
+  return vec_arm8<uint8_t, N>(vreinterpretq_u8_s64(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<float, N / 4> v) {
+  return vec_arm8<uint8_t, N>(vreinterpretq_u8_f32(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_to_u8(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<double, N / 8> v) {
+  return vec_arm8<uint8_t, N>(vreinterpretq_u8_f64(v.raw));
+}
+
+// cast_u8_to
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> cast_u8_to(Desc<uint8_t, N, ARM8>,
+                                            vec_arm8<uint8_t, N> v) {
+  return v;
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> cast_u8_to(Desc<uint16_t, N, ARM8>,
+                                             vec_arm8<uint8_t, N * 2> v) {
+  return vec_arm8<uint16_t, N>(vreinterpretq_u16_u8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> cast_u8_to(Desc<uint32_t, N, ARM8>,
+                                             vec_arm8<uint8_t, N * 4> v) {
+  return vec_arm8<uint32_t, N>(vreinterpretq_u32_u8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> cast_u8_to(Desc<uint64_t, N, ARM8>,
+                                             vec_arm8<uint8_t, N * 8> v) {
+  return vec_arm8<uint64_t, N>(vreinterpretq_u64_u8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> cast_u8_to(Desc<int8_t, N, ARM8>,
+                                           vec_arm8<uint8_t, N> v) {
+  return vec_arm8<int8_t, N>(vreinterpretq_s8_u8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> cast_u8_to(Desc<int16_t, N, ARM8>,
+                                            vec_arm8<uint8_t, N * 2> v) {
+  return vec_arm8<int16_t, N>(vreinterpretq_s16_u8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> cast_u8_to(Desc<int32_t, N, ARM8>,
+                                            vec_arm8<uint8_t, N * 4> v) {
+  return vec_arm8<int32_t, N>(vreinterpretq_s32_u8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> cast_u8_to(Desc<int64_t, N, ARM8>,
+                                            vec_arm8<uint8_t, N * 8> v) {
+  return vec_arm8<int64_t, N>(vreinterpretq_s64_u8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> cast_u8_to(Desc<float, N, ARM8>,
+                                          vec_arm8<uint8_t, N * 4> v) {
+  return vec_arm8<float, N>(vreinterpretq_f32_u8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> cast_u8_to(Desc<double, N, ARM8>,
+                                           vec_arm8<uint8_t, N * 8> v) {
+  return vec_arm8<double, N>(vreinterpretq_f64_u8(v.raw));
+}
+
+// cast_to
+template <typename T, size_t N, typename FromT>
+SIMD_INLINE vec_arm8<T, N> cast_to(
+    Desc<T, N, ARM8> d, vec_arm8<FromT, N * sizeof(T) / sizeof(FromT)> v) {
+  const auto u8 = cast_to_u8(Desc<uint8_t, N * sizeof(T), ARM8>(), v);
+  return cast_u8_to(d, u8);
+}
+
+// ------------------------------ Set
+
+// Returns a vector with all lanes set to "t".
+SIMD_INLINE vec_arm8<uint8_t> set1(Full<uint8_t, ARM8>, const uint8_t t) {
+  return vec_arm8<uint8_t>(vdupq_n_u8(t));
+}
+SIMD_INLINE vec_arm8<uint16_t> set1(Full<uint16_t, ARM8>, const uint16_t t) {
+  return vec_arm8<uint16_t>(vdupq_n_u16(t));
+}
+SIMD_INLINE vec_arm8<uint32_t> set1(Full<uint32_t, ARM8>, const uint32_t t) {
+  return vec_arm8<uint32_t>(vdupq_n_u32(t));
+}
+SIMD_INLINE vec_arm8<uint64_t> set1(Full<uint64_t, ARM8>, const uint64_t t) {
+  return vec_arm8<uint64_t>(vdupq_n_u64(t));
+}
+SIMD_INLINE vec_arm8<int8_t> set1(Full<int8_t, ARM8>, const int8_t t) {
+  return vec_arm8<int8_t>(vdupq_n_s8(t));
+}
+SIMD_INLINE vec_arm8<int16_t> set1(Full<int16_t, ARM8>, const int16_t t) {
+  return vec_arm8<int16_t>(vdupq_n_s16(t));
+}
+SIMD_INLINE vec_arm8<int32_t> set1(Full<int32_t, ARM8>, const int32_t t) {
+  return vec_arm8<int32_t>(vdupq_n_s32(t));
+}
+SIMD_INLINE vec_arm8<int64_t> set1(Full<int64_t, ARM8>, const int64_t t) {
+  return vec_arm8<int64_t>(vdupq_n_s64(t));
+}
+SIMD_INLINE vec_arm8<float> set1(Full<float, ARM8>, const float t) {
+  return vec_arm8<float>(vdupq_n_f32(t));
+}
+SIMD_INLINE vec_arm8<double> set1(Full<double, ARM8>, const double t) {
+  return vec_arm8<double>(vdupq_n_f64(t));
+}
+
+// 64
+SIMD_INLINE vec_arm8<uint8_t, 8> set1(Desc<uint8_t, 8, ARM8>, const uint8_t t) {
+  return vec_arm8<uint8_t, 8>(vdup_n_u8(t));
+}
+SIMD_INLINE vec_arm8<uint16_t, 4> set1(Desc<uint16_t, 4, ARM8>,
+                                       const uint16_t t) {
+  return vec_arm8<uint16_t, 4>(vdup_n_u16(t));
+}
+SIMD_INLINE vec_arm8<uint32_t, 2> set1(Desc<uint32_t, 2, ARM8>,
+                                       const uint32_t t) {
+  return vec_arm8<uint32_t, 2>(vdup_n_u32(t));
+}
+SIMD_INLINE vec_arm8<uint64_t, 1> set1(Desc<uint64_t, 1, ARM8>,
+                                       const uint64_t t) {
+  return vec_arm8<uint64_t, 1>(vdup_n_u64(t));
+}
+SIMD_INLINE vec_arm8<int8_t, 8> set1(Desc<int8_t, 8, ARM8>, const int8_t t) {
+  return vec_arm8<int8_t, 8>(vdup_n_s8(t));
+}
+SIMD_INLINE vec_arm8<int16_t, 4> set1(Desc<int16_t, 4, ARM8>, const int16_t t) {
+  return vec_arm8<int16_t, 4>(vdup_n_s16(t));
+}
+SIMD_INLINE vec_arm8<int32_t, 2> set1(Desc<int32_t, 2, ARM8>, const int32_t t) {
+  return vec_arm8<int32_t, 2>(vdup_n_s32(t));
+}
+SIMD_INLINE vec_arm8<int64_t, 1> set1(Desc<int64_t, 1, ARM8>, const int64_t t) {
+  return vec_arm8<int64_t, 1>(vdup_n_s64(t));
+}
+SIMD_INLINE vec_arm8<float, 2> set1(Desc<float, 2, ARM8>, const float t) {
+  return vec_arm8<float, 2>(vdup_n_f32(t));
+}
+SIMD_INLINE vec_arm8<double, 1> set1(Desc<double, 1, ARM8>, const double t) {
+  return vec_arm8<double, 1>(vdup_n_f64(t));
+}
+
+// 32
+SIMD_INLINE vec_arm8<uint8_t, 4> set1(Desc<uint8_t, 4, ARM8>, const uint8_t t) {
+  return vec_arm8<uint8_t, 4>(vdup_n_u8(t));
+}
+SIMD_INLINE vec_arm8<uint16_t, 2> set1(Desc<uint16_t, 2, ARM8>,
+                                       const uint16_t t) {
+  return vec_arm8<uint16_t, 2>(vdup_n_u16(t));
+}
+SIMD_INLINE vec_arm8<uint32_t, 1> set1(Desc<uint32_t, 1, ARM8>,
+                                       const uint32_t t) {
+  return vec_arm8<uint32_t, 1>(vdup_n_u32(t));
+}
+SIMD_INLINE vec_arm8<int8_t, 4> set1(Desc<int8_t, 4, ARM8>, const int8_t t) {
+  return vec_arm8<int8_t, 4>(vdup_n_s8(t));
+}
+SIMD_INLINE vec_arm8<int16_t, 2> set1(Desc<int16_t, 2, ARM8>, const int16_t t) {
+  return vec_arm8<int16_t, 2>(vdup_n_s16(t));
+}
+SIMD_INLINE vec_arm8<int32_t, 1> set1(Desc<int32_t, 1, ARM8>, const int32_t t) {
+  return vec_arm8<int32_t, 1>(vdup_n_s32(t));
+}
+SIMD_INLINE vec_arm8<float, 1> set1(Desc<float, 1, ARM8>, const float t) {
+  return vec_arm8<float, 1>(vdup_n_f32(t));
+}
+
+// Returns an all-zero vector.
+template <typename T, size_t N>
+SIMD_INLINE vec_arm8<T, N> setzero(Desc<T, N, ARM8> d) {
+  return set1(d, 0);
+}
+
+// Returns a vector with lane i=[0, N) set to "first" + i. Unique per-lane
+// values are required to detect lane-crossing bugs.
+template <typename T, size_t N, typename T2>
+SIMD_INLINE vec_arm8<T, N> iota(Desc<T, N, ARM8> d, const T2 first) {
+  SIMD_ALIGN T lanes[N];
+  for (size_t i = 0; i < N; ++i) {
+    lanes[i] = first + i;
+  }
+  return load(d, lanes);
+}
+
+// Returns a vector with uninitialized elements.
+template <typename T, size_t N>
+SIMD_INLINE vec_arm8<T, N> undefined(Desc<T, N, ARM8> d) {
+  SIMD_DIAGNOSTICS(push)
+  SIMD_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wuninitialized")
+  typename raw_arm8<T, N>::type a;
+  return vec_arm8<T, N>(a);
+  SIMD_DIAGNOSTICS(pop)
+}
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> operator+(const vec_arm8<uint8_t, N> a,
+                                           const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vaddq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> operator+(const vec_arm8<uint16_t, N> a,
+                                            const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vaddq_u16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> operator+(const vec_arm8<uint32_t, N> a,
+                                            const vec_arm8<uint32_t, N> b) {
+  return vec_arm8<uint32_t, N>(vaddq_u32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> operator+(const vec_arm8<uint64_t, N> a,
+                                            const vec_arm8<uint64_t, N> b) {
+  return vec_arm8<uint64_t, N>(vaddq_u64(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> operator+(const vec_arm8<int8_t, N> a,
+                                          const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vaddq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> operator+(const vec_arm8<int16_t, N> a,
+                                           const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vaddq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator+(const vec_arm8<int32_t, N> a,
+                                           const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vaddq_s32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> operator+(const vec_arm8<int64_t, N> a,
+                                           const vec_arm8<int64_t, N> b) {
+  return vec_arm8<int64_t, N>(vaddq_s64(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator+(const vec_arm8<float, N> a,
+                                         const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vaddq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator+(const vec_arm8<double, N> a,
+                                          const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vaddq_f64(a.raw, b.raw));
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> operator-(const vec_arm8<uint8_t, N> a,
+                                           const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vsubq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> operator-(const vec_arm8<uint16_t, N> a,
+                                            const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vsubq_u16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> operator-(const vec_arm8<uint32_t, N> a,
+                                            const vec_arm8<uint32_t, N> b) {
+  return vec_arm8<uint32_t, N>(vsubq_u32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> operator-(const vec_arm8<uint64_t, N> a,
+                                            const vec_arm8<uint64_t, N> b) {
+  return vec_arm8<uint64_t, N>(vsubq_u64(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> operator-(const vec_arm8<int8_t, N> a,
+                                          const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vsubq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> operator-(const vec_arm8<int16_t, N> a,
+                                           const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vsubq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator-(const vec_arm8<int32_t, N> a,
+                                           const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vsubq_s32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> operator-(const vec_arm8<int64_t, N> a,
+                                           const vec_arm8<int64_t, N> b) {
+  return vec_arm8<int64_t, N>(vsubq_s64(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator-(const vec_arm8<float, N> a,
+                                         const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vsubq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator-(const vec_arm8<double, N> a,
+                                          const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vsubq_f64(a.raw, b.raw));
+}
+
+// ------------------------------ Saturating addition
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> saturated_add(const vec_arm8<uint8_t, N> a,
+                                               const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vqaddq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> saturated_add(const vec_arm8<uint16_t, N> a,
+                                                const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vqaddq_u16(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> saturated_add(const vec_arm8<int8_t, N> a,
+                                              const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vqaddq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> saturated_add(const vec_arm8<int16_t, N> a,
+                                               const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vqaddq_s16(a.raw, b.raw));
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> saturated_subtract(
+    const vec_arm8<uint8_t, N> a, const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vqsubq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> saturated_subtract(
+    const vec_arm8<uint16_t, N> a, const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vqsubq_u16(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> saturated_subtract(
+    const vec_arm8<int8_t, N> a, const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vqsubq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> saturated_subtract(
+    const vec_arm8<int16_t, N> a, const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vqsubq_s16(a.raw, b.raw));
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> average_round(const vec_arm8<uint8_t, N> a,
+                                               const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vrhaddq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> average_round(const vec_arm8<uint16_t, N> a,
+                                                const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vrhaddq_u16(a.raw, b.raw));
+}
+
+// ------------------------------ Absolute value
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> abs(const vec_arm8<int8_t, N> v) {
+  return vec_arm8<int8_t, N>(vabsq_s8(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> abs(const vec_arm8<int16_t, N> v) {
+  return vec_arm8<int16_t, N>(vabsq_s16(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> abs(const vec_arm8<int32_t, N> v) {
+  return vec_arm8<int32_t, N>(vabsq_s32(v.raw));
+}
+
+// ------------------------------ Shift lanes by constant #bits
+
+// Unsigned
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> shift_left(const vec_arm8<uint16_t, N> v) {
+  return vec_arm8<uint16_t, N>(vshlq_n_u16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> shift_right(const vec_arm8<uint16_t, N> v) {
+  return vec_arm8<uint16_t, N>(vshrq_n_u16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> shift_left(const vec_arm8<uint32_t, N> v) {
+  return vec_arm8<uint32_t, N>(vshlq_n_u32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> shift_right(const vec_arm8<uint32_t, N> v) {
+  return vec_arm8<uint32_t, N>(vshrq_n_u32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> shift_left(const vec_arm8<uint64_t, N> v) {
+  return vec_arm8<uint64_t, N>(vshlq_n_u64(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> shift_right(const vec_arm8<uint64_t, N> v) {
+  return vec_arm8<uint64_t, N>(vshrq_n_u64(v.raw, kBits));
+}
+
+// Signed (no i64 shr)
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> shift_left(const vec_arm8<int16_t, N> v) {
+  return vec_arm8<int16_t, N>(vshlq_n_s16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> shift_right(const vec_arm8<int16_t, N> v) {
+  return vec_arm8<int16_t, N>(vshrq_n_s16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> shift_left(const vec_arm8<int32_t, N> v) {
+  return vec_arm8<int32_t, N>(vshlq_n_s32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> shift_right(const vec_arm8<int32_t, N> v) {
+  return vec_arm8<int32_t, N>(vshrq_n_s32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> shift_left(const vec_arm8<int64_t, N> v) {
+  return vec_arm8<int64_t, N>(vshlq_n_s64(v.raw, kBits));
+}
+
+// ------------------------------ Shift lanes by same variable #bits
+
+// Extra overhead, use _var instead unless SSE4 support is required.
+
+template <typename T, size_t N>
+struct shift_left_count {
+  vec_arm8<T> v;
+};
+
+template <typename T, size_t N>
+struct shift_right_count {
+  vec_arm8<T> v;
+};
+
+template <typename T, size_t N>
+SIMD_INLINE shift_left_count<T, N> set_shift_left_count(Desc<T, N, ARM8> d,
+                                                        const int bits) {
+  return shift_left_count<T, N>{set1(d, bits)};
+}
+
+template <typename T, size_t N>
+SIMD_INLINE shift_right_count<T, N> set_shift_right_count(Desc<T, N, ARM8> d,
+                                                          const int bits) {
+  return shift_right_count<T, N>{set1(d, -bits)};
+}
+
+// Unsigned (no u8)
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> shift_left_same(
+    const vec_arm8<uint16_t, N> v, const shift_left_count<uint16_t, N> bits) {
+  return vec_arm8<uint16_t, N>(vshlq_u16(v.raw, bits.v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> shift_right_same(
+    const vec_arm8<uint16_t, N> v, const shift_right_count<uint16_t, N> bits) {
+  return vec_arm8<uint16_t, N>(vshlq_u16(v.raw, bits.v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> shift_left_same(
+    const vec_arm8<uint32_t, N> v, const shift_left_count<uint32_t, N> bits) {
+  return vec_arm8<uint32_t, N>(vshlq_u32(v.raw, bits.v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> shift_right_same(
+    const vec_arm8<uint32_t, N> v, const shift_right_count<uint32_t, N> bits) {
+  return vec_arm8<uint32_t, N>(vshlq_u32(v.raw, bits.v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> shift_left_same(
+    const vec_arm8<uint64_t, N> v, const shift_left_count<uint64_t, N> bits) {
+  return vec_arm8<uint64_t, N>(vshlq_u64(v.raw, bits.v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> shift_right_same(
+    const vec_arm8<uint64_t, N> v, const shift_right_count<uint64_t, N> bits) {
+  return vec_arm8<uint64_t, N>(vshlq_u64(v.raw, bits.v.raw));
+}
+
+// Signed (no i8,i64)
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> shift_left_same(
+    const vec_arm8<int16_t, N> v, const shift_left_count<int16_t, N> bits) {
+  return vec_arm8<int16_t, N>(vshlq_s16(v.raw, bits.v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> shift_right_same(
+    const vec_arm8<int16_t, N> v, const shift_right_count<int16_t, N> bits) {
+  return vec_arm8<int16_t, N>(vshlq_s16(v.raw, bits.v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> shift_left_same(
+    const vec_arm8<int32_t, N> v, const shift_left_count<int32_t, N> bits) {
+  return vec_arm8<int32_t, N>(vshlq_s32(v.raw, bits.v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> shift_right_same(
+    const vec_arm8<int32_t, N> v, const shift_right_count<int32_t, N> bits) {
+  return vec_arm8<int32_t, N>(vshlq_s32(v.raw, bits.v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> shift_left_same(
+    const vec_arm8<int64_t, N> v, const shift_left_count<int64_t, N> bits) {
+  return vec_arm8<int64_t, N>(vshlq_s64(v.raw, bits.v.raw));
+}
+
+// ------------------------------ Shift lanes by independent variable #bits
+
+// Unsigned (no u8,u16)
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> operator<<(const vec_arm8<uint32_t, N> v,
+                                             const vec_arm8<uint32_t, N> bits) {
+  return vec_arm8<uint32_t, N>(vshlq_u32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> operator>>(const vec_arm8<uint32_t, N> v,
+                                             const vec_arm8<uint32_t, N> bits) {
+  return vec_arm8<uint32_t, N>(
+      vshlq_u32(v.raw, vnegq_s32(vreinterpretq_s32_u32(bits.raw))));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> operator<<(const vec_arm8<uint64_t, N> v,
+                                             const vec_arm8<uint64_t, N> bits) {
+  return vec_arm8<uint64_t, N>(vshlq_u64(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> operator>>(const vec_arm8<uint64_t, N> v,
+                                             const vec_arm8<uint64_t, N> bits) {
+  return vec_arm8<uint64_t, N>(
+      vshlq_u64(v.raw, vnegq_s64(vreinterpretq_s64_u64(bits.raw))));
+}
+
+// Signed (no i8,i16)
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator<<(const vec_arm8<int32_t, N> v,
+                                            const vec_arm8<int32_t, N> bits) {
+  return vec_arm8<int32_t, N>(vshlq_s32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator>>(const vec_arm8<int32_t, N> v,
+                                            const vec_arm8<int32_t, N> bits) {
+  return vec_arm8<int32_t, N>(vshlq_s32(v.raw, vnegq_s32(bits.raw)));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> operator<<(const vec_arm8<int64_t, N> v,
+                                            const vec_arm8<int64_t, N> bits) {
+  return vec_arm8<int64_t, N>(vshlq_s64(v.raw, bits.raw));
+}
+
+// ------------------------------ Minimum
+
+// Unsigned (no u64)
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> min(const vec_arm8<uint8_t, N> a,
+                                     const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vminq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> min(const vec_arm8<uint16_t, N> a,
+                                      const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vminq_u16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> min(const vec_arm8<uint32_t, N> a,
+                                      const vec_arm8<uint32_t, N> b) {
+  return vec_arm8<uint32_t, N>(vminq_u32(a.raw, b.raw));
+}
+
+// Signed (no i64)
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> min(const vec_arm8<int8_t, N> a,
+                                    const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vminq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> min(const vec_arm8<int16_t, N> a,
+                                     const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vminq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> min(const vec_arm8<int32_t, N> a,
+                                     const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vminq_s32(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> min(const vec_arm8<float, N> a,
+                                   const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vminq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> min(const vec_arm8<double, N> a,
+                                    const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vminq_f64(a.raw, b.raw));
+}
+
+// ------------------------------ Maximum
+
+// Unsigned (no u64)
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> max(const vec_arm8<uint8_t, N> a,
+                                     const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vmaxq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> max(const vec_arm8<uint16_t, N> a,
+                                      const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vmaxq_u16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> max(const vec_arm8<uint32_t, N> a,
+                                      const vec_arm8<uint32_t, N> b) {
+  return vec_arm8<uint32_t, N>(vmaxq_u32(a.raw, b.raw));
+}
+
+// Signed (no i64)
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> max(const vec_arm8<int8_t, N> a,
+                                    const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vmaxq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> max(const vec_arm8<int16_t, N> a,
+                                     const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vmaxq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> max(const vec_arm8<int32_t, N> a,
+                                     const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vmaxq_s32(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> max(const vec_arm8<float, N> a,
+                                   const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vmaxq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> max(const vec_arm8<double, N> a,
+                                    const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vmaxq_f64(a.raw, b.raw));
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> operator*(const vec_arm8<uint16_t, N> a,
+                                            const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vmulq_u16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> operator*(const vec_arm8<uint32_t, N> a,
+                                            const vec_arm8<uint32_t, N> b) {
+  return vec_arm8<uint32_t, N>(vmulq_u32(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> operator*(const vec_arm8<int16_t, N> a,
+                                           const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vmulq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator*(const vec_arm8<int32_t, N> a,
+                                           const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vmulq_s32(a.raw, b.raw));
+}
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+// Returns the upper 16 bits of a * b in each lane.
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> mul_high(const vec_arm8<int16_t, N> a,
+                                          const vec_arm8<int16_t, N> b) {
+  int32x4_t rlo = vmull_s16(vget_low_s16(a.raw), vget_low_s16(b.raw));
+  int32x4_t rhi = vmull_high_s16(a.raw, b.raw);
+  return vec_arm8<int16_t, N>(
+      vuzp2q_s16(vreinterpretq_s16_s32(rlo), vreinterpretq_s16_s32(rhi)));
+}
+
+}  // namespace ext
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+SIMD_INLINE vec_arm8<int64_t> mul_even(const vec_arm8<int32_t> a,
+                                       const vec_arm8<int32_t> b) {
+  int32x4_t a_packed = vuzp1q_s32(a.raw, a.raw);
+  int32x4_t b_packed = vuzp1q_s32(b.raw, b.raw);
+  return vec_arm8<int64_t>(
+      vmull_s32(vget_low_s32(a_packed), vget_low_s32(b_packed)));
+}
+SIMD_INLINE vec_arm8<uint64_t> mul_even(const vec_arm8<uint32_t> a,
+                                        const vec_arm8<uint32_t> b) {
+  uint32x4_t a_packed = vuzp1q_u32(a.raw, a.raw);
+  uint32x4_t b_packed = vuzp1q_u32(b.raw, b.raw);
+  return vec_arm8<uint64_t>(
+      vmull_u32(vget_low_u32(a_packed), vget_low_u32(b_packed)));
+}
+
+// ------------------------------ Floating-point negate
+
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> neg(const vec_arm8<float, N> v) {
+  return vec_arm8<float, N>(vnegq_f32(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> neg(const vec_arm8<double, N> v) {
+  return vec_arm8<double, N>(vnegq_f64(v.raw));
+}
+
+// ------------------------------ Floating-point mul / div
+
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator*(const vec_arm8<float, N> a,
+                                         const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vmulq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator*(const vec_arm8<double, N> a,
+                                          const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vmulq_f64(a.raw, b.raw));
+}
+
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator/(const vec_arm8<float, N> a,
+                                         const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vdivq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator/(const vec_arm8<double, N> a,
+                                          const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vdivq_f64(a.raw, b.raw));
+}
+
+// Approximate reciprocal
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> approximate_reciprocal(
+    const vec_arm8<float, N> v) {
+  return vec_arm8<float, N>(vrecpeq_f32(v.raw));
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+// Returns add + mul * x
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> mul_add(const vec_arm8<float, N> mul,
+                                       const vec_arm8<float, N> x,
+                                       const vec_arm8<float, N> add) {
+  return vec_arm8<float, N>(vfmaq_f32(add.raw, mul.raw, x.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> mul_add(const vec_arm8<double, N> mul,
+                                        const vec_arm8<double, N> x,
+                                        const vec_arm8<double, N> add) {
+  return vec_arm8<double, N>(vfmaq_f64(add.raw, mul.raw, x.raw));
+}
+
+// Returns add - mul * x
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> nmul_add(const vec_arm8<float, N> mul,
+                                        const vec_arm8<float, N> x,
+                                        const vec_arm8<float, N> add) {
+  return vec_arm8<float, N>(vfmsq_f32(add.raw, mul.raw, x.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> nmul_add(const vec_arm8<double, N> mul,
+                                         const vec_arm8<double, N> x,
+                                         const vec_arm8<double, N> add) {
+  return vec_arm8<double, N>(vfmsq_f64(add.raw, mul.raw, x.raw));
+}
+
+// Slightly more expensive (extra negate)
+namespace ext {
+
+// Returns mul * x - sub
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> mul_subtract(const vec_arm8<float, N> mul,
+                                            const vec_arm8<float, N> x,
+                                            const vec_arm8<float, N> sub) {
+  return neg(nmul_add(mul, x, sub));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> mul_subtract(const vec_arm8<double, N> mul,
+                                             const vec_arm8<double, N> x,
+                                             const vec_arm8<double, N> sub) {
+  return neg(nmul_add(mul, x, sub));
+}
+
+// Returns -mul * x - sub
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> nmul_subtract(const vec_arm8<float, N> mul,
+                                             const vec_arm8<float, N> x,
+                                             const vec_arm8<float, N> sub) {
+  return neg(mul_add(mul, x, sub));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> nmul_subtract(const vec_arm8<double, N> mul,
+                                              const vec_arm8<double, N> x,
+                                              const vec_arm8<double, N> sub) {
+  return neg(mul_add(mul, x, sub));
+}
+
+}  // namespace ext
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> sqrt(const vec_arm8<float, N> v) {
+  return vec_arm8<float, N>(vsqrtq_f32(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> sqrt(const vec_arm8<double, N> v) {
+  return vec_arm8<double, N>(vsqrtq_f64(v.raw));
+}
+
+// Approximate reciprocal square root
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> approximate_reciprocal_sqrt(
+    const vec_arm8<float, N> v) {
+  return vec_arm8<float, N>(vrsqrteq_f32(v.raw));
+}
+
+// ------------------------------ Floating-point rounding
+
+// Toward nearest integer
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> round(const vec_arm8<float, N> v) {
+  return vec_arm8<float, N>(vrndnq_f32(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> round(const vec_arm8<double, N> v) {
+  return vec_arm8<double, N>(vrndnq_f64(v.raw));
+}
+
+// Toward +infinity, aka ceiling
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> ceil(const vec_arm8<float, N> v) {
+  return vec_arm8<float, N>(vrndpq_f32(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> ceil(const vec_arm8<double, N> v) {
+  return vec_arm8<double, N>(vrndpq_f64(v.raw));
+}
+
+// Toward -infinity, aka floor
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> floor(const vec_arm8<float, N> v) {
+  return vec_arm8<float, N>(vrndmq_f32(v.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> floor(const vec_arm8<double, N> v) {
+  return vec_arm8<double, N>(vrndmq_f64(v.raw));
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+// ------------------------------ Equality
+
+// Unsigned
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> operator==(const vec_arm8<uint8_t, N> a,
+                                            const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vceqq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> operator==(const vec_arm8<uint16_t, N> a,
+                                             const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vceqq_u16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> operator==(const vec_arm8<uint32_t, N> a,
+                                             const vec_arm8<uint32_t, N> b) {
+  return vec_arm8<uint32_t, N>(vceqq_u32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> operator==(const vec_arm8<uint64_t, N> a,
+                                             const vec_arm8<uint64_t, N> b) {
+  return vec_arm8<uint64_t, N>(vceqq_u64(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> operator==(const vec_arm8<int8_t, N> a,
+                                           const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vceqq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> operator==(const vec_arm8<int16_t, N> a,
+                                            const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vceqq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator==(const vec_arm8<int32_t, N> a,
+                                            const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vceqq_s32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> operator==(const vec_arm8<int64_t, N> a,
+                                            const vec_arm8<int64_t, N> b) {
+  return vec_arm8<int64_t, N>(vceqq_s64(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator==(const vec_arm8<float, N> a,
+                                          const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vceqq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator==(const vec_arm8<double, N> a,
+                                           const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vceqq_f64(a.raw, b.raw));
+}
+
+// ------------------------------ Strict inequality
+
+// Signed/float <
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> operator<(const vec_arm8<int8_t, N> a,
+                                          const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vcltq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> operator<(const vec_arm8<int16_t, N> a,
+                                           const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vcltq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator<(const vec_arm8<int32_t, N> a,
+                                           const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vcltq_s32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> operator<(const vec_arm8<int64_t, N> a,
+                                           const vec_arm8<int64_t, N> b) {
+  return vec_arm8<int64_t, N>(vcltq_s64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator<(const vec_arm8<float, N> a,
+                                         const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vcltq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator<(const vec_arm8<double, N> a,
+                                          const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vcltq_f64(a.raw, b.raw));
+}
+
+// Signed/float >
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> operator>(const vec_arm8<int8_t, N> a,
+                                          const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vcgtq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> operator>(const vec_arm8<int16_t, N> a,
+                                           const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vcgtq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator>(const vec_arm8<int32_t, N> a,
+                                           const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vcgtq_s32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> operator>(const vec_arm8<int64_t, N> a,
+                                           const vec_arm8<int64_t, N> b) {
+  return vec_arm8<int64_t, N>(vcgtq_s64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator>(const vec_arm8<float, N> a,
+                                         const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vcgtq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator>(const vec_arm8<double, N> a,
+                                          const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vcgtq_f64(a.raw, b.raw));
+}
+
+// ------------------------------ Weak inequality
+
+// Float <= >=
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator<=(const vec_arm8<float, N> a,
+                                          const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vcleq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator<=(const vec_arm8<double, N> a,
+                                           const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vcleq_f64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator>=(const vec_arm8<float, N> a,
+                                          const vec_arm8<float, N> b) {
+  return vec_arm8<float, N>(vcgeq_f32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator>=(const vec_arm8<double, N> a,
+                                           const vec_arm8<double, N> b) {
+  return vec_arm8<double, N>(vcgeq_f64(a.raw, b.raw));
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ Bitwise AND
+
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> operator&(const vec_arm8<int8_t, N> a,
+                                          const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vandq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> operator&(const vec_arm8<uint8_t, N> a,
+                                           const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vandq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> operator&(const vec_arm8<int16_t, N> a,
+                                           const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vandq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> operator&(const vec_arm8<uint16_t, N> a,
+                                            const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vandq_u16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator&(const vec_arm8<int32_t, N> a,
+                                           const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vandq_s32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> operator&(const vec_arm8<uint32_t, N> a,
+                                            const vec_arm8<uint32_t, N> b) {
+  return vec_arm8<uint32_t, N>(vandq_u32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> operator&(const vec_arm8<int64_t, N> a,
+                                           const vec_arm8<int64_t, N> b) {
+  return vec_arm8<int64_t, N>(vandq_s64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> operator&(const vec_arm8<uint64_t, N> a,
+                                            const vec_arm8<uint64_t, N> b) {
+  return vec_arm8<uint64_t, N>(vandq_u64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator&(const vec_arm8<float, N> a,
+                                         const vec_arm8<float, N> b) {
+  const Full<uint32_t, ARM8> d;
+  return cast_to(Full<float, ARM8>(), cast_to(d, a) & cast_to(d, b));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator&(const vec_arm8<double, N> a,
+                                          const vec_arm8<double, N> b) {
+  const Full<uint64_t, ARM8> d;
+  return cast_to(Full<double, ARM8>(), cast_to(d, a) & cast_to(d, b));
+}
+
+// ------------------------------ Bitwise AND-NOT
+
+// Returns ~not_mask & mask.
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> andnot(const vec_arm8<int8_t, N> not_mask,
+                                       const vec_arm8<int8_t, N> mask) {
+  return vec_arm8<int8_t, N>(vbicq_s8(mask.raw, not_mask.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> andnot(const vec_arm8<uint8_t, N> not_mask,
+                                        const vec_arm8<uint8_t, N> mask) {
+  return vec_arm8<uint8_t, N>(vbicq_u8(mask.raw, not_mask.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> andnot(const vec_arm8<int16_t, N> not_mask,
+                                        const vec_arm8<int16_t, N> mask) {
+  return vec_arm8<int16_t, N>(vbicq_s16(mask.raw, not_mask.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> andnot(const vec_arm8<uint16_t, N> not_mask,
+                                         const vec_arm8<uint16_t, N> mask) {
+  return vec_arm8<uint16_t, N>(vbicq_u16(mask.raw, not_mask.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> andnot(const vec_arm8<int32_t, N> not_mask,
+                                        const vec_arm8<int32_t, N> mask) {
+  return vec_arm8<int32_t, N>(vbicq_s32(mask.raw, not_mask.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> andnot(const vec_arm8<uint32_t, N> not_mask,
+                                         const vec_arm8<uint32_t, N> mask) {
+  return vec_arm8<uint32_t, N>(vbicq_u32(mask.raw, not_mask.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> andnot(const vec_arm8<int64_t, N> not_mask,
+                                        const vec_arm8<int64_t, N> mask) {
+  return vec_arm8<int64_t, N>(vbicq_s64(mask.raw, not_mask.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> andnot(const vec_arm8<uint64_t, N> not_mask,
+                                         const vec_arm8<uint64_t, N> mask) {
+  return vec_arm8<uint64_t, N>(vbicq_u64(mask.raw, not_mask.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> andnot(const vec_arm8<float, N> not_mask,
+                                      const vec_arm8<float, N> mask) {
+  const Desc<uint32_t, N, ARM8> du;
+  uint32x4_t ret = vbicq_u32(cast_to(du, mask).raw, cast_to(du, not_mask).raw);
+  return vec_arm8<float, N>(vreinterpretq_f32_u32(ret));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> andnot(const vec_arm8<double, N> not_mask,
+                                       const vec_arm8<double, N> mask) {
+  const Desc<uint64_t, N, ARM8> du;
+  uint64x2_t ret = vbicq_u64(cast_to(du, mask).raw, cast_to(du, not_mask).raw);
+  return vec_arm8<double, N>(vreinterpretq_f64_u64(ret));
+}
+
+// ------------------------------ Bitwise OR
+
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> operator|(const vec_arm8<int8_t, N> a,
+                                          const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(vorrq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> operator|(const vec_arm8<uint8_t, N> a,
+                                           const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(vorrq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> operator|(const vec_arm8<int16_t, N> a,
+                                           const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(vorrq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> operator|(const vec_arm8<uint16_t, N> a,
+                                            const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(vorrq_u16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator|(const vec_arm8<int32_t, N> a,
+                                           const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(vorrq_s32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> operator|(const vec_arm8<uint32_t, N> a,
+                                            const vec_arm8<uint32_t, N> b) {
+  return vec_arm8<uint32_t, N>(vorrq_u32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> operator|(const vec_arm8<int64_t, N> a,
+                                           const vec_arm8<int64_t, N> b) {
+  return vec_arm8<int64_t, N>(vorrq_s64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> operator|(const vec_arm8<uint64_t, N> a,
+                                            const vec_arm8<uint64_t, N> b) {
+  return vec_arm8<uint64_t, N>(vorrq_u64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator|(const vec_arm8<float, N> a,
+                                         const vec_arm8<float, N> b) {
+  const Full<uint32_t, ARM8> d;
+  return cast_to(Full<float, ARM8>(), cast_to(d, a) | cast_to(d, b));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator|(const vec_arm8<double, N> a,
+                                          const vec_arm8<double, N> b) {
+  const Full<uint64_t, ARM8> d;
+  return cast_to(Full<double, ARM8>(), cast_to(d, a) & cast_to(d, b));
+}
+
+// ------------------------------ Bitwise XOR
+
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> operator^(const vec_arm8<int8_t, N> a,
+                                          const vec_arm8<int8_t, N> b) {
+  return vec_arm8<int8_t, N>(veorq_s8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> operator^(const vec_arm8<uint8_t, N> a,
+                                           const vec_arm8<uint8_t, N> b) {
+  return vec_arm8<uint8_t, N>(veorq_u8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> operator^(const vec_arm8<int16_t, N> a,
+                                           const vec_arm8<int16_t, N> b) {
+  return vec_arm8<int16_t, N>(veorq_s16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> operator^(const vec_arm8<uint16_t, N> a,
+                                            const vec_arm8<uint16_t, N> b) {
+  return vec_arm8<uint16_t, N>(veorq_u16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> operator^(const vec_arm8<int32_t, N> a,
+                                           const vec_arm8<int32_t, N> b) {
+  return vec_arm8<int32_t, N>(veorq_s32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> operator^(const vec_arm8<uint32_t, N> a,
+                                            const vec_arm8<uint32_t, N> b) {
+  return vec_arm8<uint32_t, N>(veorq_u32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> operator^(const vec_arm8<int64_t, N> a,
+                                           const vec_arm8<int64_t, N> b) {
+  return vec_arm8<int64_t, N>(veorq_s64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> operator^(const vec_arm8<uint64_t, N> a,
+                                            const vec_arm8<uint64_t, N> b) {
+  return vec_arm8<uint64_t, N>(veorq_u64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> operator^(const vec_arm8<float, N> a,
+                                         const vec_arm8<float, N> b) {
+  const Full<uint32_t, ARM8> d;
+  return cast_to(Full<float, ARM8>(), cast_to(d, a) ^ cast_to(d, b));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> operator^(const vec_arm8<double, N> a,
+                                          const vec_arm8<double, N> b) {
+  const Full<uint64_t, ARM8> d;
+  return cast_to(Full<double, ARM8>(), cast_to(d, a) ^ cast_to(d, b));
+}
+
+// ------------------------------ Select/blend
+
+// Returns a mask for use by select().
+// blendv_ps/pd only check the sign bit, so this is a no-op on x86.
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> condition_from_sign(const vec_arm8<float, N> v) {
+  const Part<float, N> df;
+  const Part<int32_t, N> di;
+  return cast_to(df, shift_right<31>(cast_to(di, v)));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> condition_from_sign(
+    const vec_arm8<double, N> v) {
+  const Part<double, N> df;
+  const Part<int64_t, N> di;
+  return cast_to(df, shift_right<63>(cast_to(di, v)));
+}
+
+// Returns mask ? b : a. "mask" must either have been returned by
+// selector_from_mask, or callers must ensure its lanes are T(0) or ~T(0).
+template <size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> select(const vec_arm8<uint8_t, N> a,
+                                        const vec_arm8<uint8_t, N> b,
+                                        const vec_arm8<uint8_t, N> mask) {
+  return vec_arm8<uint8_t, N>(vbslq_u8(mask.raw, b.raw, a.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> select(const vec_arm8<int8_t, N> a,
+                                       const vec_arm8<int8_t, N> b,
+                                       const vec_arm8<int8_t, N> mask) {
+  return vec_arm8<int8_t, N>(vbslq_s8(mask.raw, b.raw, a.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> select(const vec_arm8<uint16_t, N> a,
+                                         const vec_arm8<uint16_t, N> b,
+                                         const vec_arm8<uint16_t, N> mask) {
+  return vec_arm8<uint16_t, N>(vbslq_u16(mask.raw, b.raw, a.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> select(const vec_arm8<int16_t, N> a,
+                                        const vec_arm8<int16_t, N> b,
+                                        const vec_arm8<int16_t, N> mask) {
+  return vec_arm8<int16_t, N>(vbslq_s16(mask.raw, b.raw, a.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint32_t, N> select(const vec_arm8<uint32_t, N> a,
+                                         const vec_arm8<uint32_t, N> b,
+                                         const vec_arm8<uint32_t, N> mask) {
+  return vec_arm8<uint32_t, N>(vbslq_u32(mask.raw, b.raw, a.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> select(const vec_arm8<int32_t, N> a,
+                                        const vec_arm8<int32_t, N> b,
+                                        const vec_arm8<int32_t, N> mask) {
+  return vec_arm8<int32_t, N>(vbslq_s32(mask.raw, b.raw, a.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<uint64_t, N> select(const vec_arm8<uint64_t, N> a,
+                                         const vec_arm8<uint64_t, N> b,
+                                         const vec_arm8<uint64_t, N> mask) {
+  return vec_arm8<uint64_t, N>(vbslq_u64(mask.raw, b.raw, a.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<int64_t, N> select(const vec_arm8<int64_t, N> a,
+                                        const vec_arm8<int64_t, N> b,
+                                        const vec_arm8<int64_t, N> mask) {
+  return vec_arm8<int64_t, N>(vbslq_s64(mask.raw, b.raw, a.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> select(const vec_arm8<float, N> a,
+                                      const vec_arm8<float, N> b,
+                                      const vec_arm8<float, N> mask) {
+  return vec_arm8<float, N>(vbslq_f32(mask.raw, b.raw, a.raw));
+}
+template <size_t N>
+SIMD_INLINE vec_arm8<double, N> select(const vec_arm8<double, N> a,
+                                       const vec_arm8<double, N> b,
+                                       const vec_arm8<double, N> mask) {
+  return vec_arm8<double, N>(vbslq_f64(mask.raw, b.raw, a.raw));
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load 128
+
+SIMD_INLINE vec_arm8<uint8_t> load_unaligned(
+    Full<uint8_t, ARM8>, const uint8_t* SIMD_RESTRICT aligned) {
+  return vec_arm8<uint8_t>(vld1q_u8(aligned));
+}
+SIMD_INLINE vec_arm8<uint16_t> load_unaligned(
+    Full<uint16_t, ARM8>, const uint16_t* SIMD_RESTRICT aligned) {
+  return vec_arm8<uint16_t>(vld1q_u16(aligned));
+}
+SIMD_INLINE vec_arm8<uint32_t> load_unaligned(
+    Full<uint32_t, ARM8>, const uint32_t* SIMD_RESTRICT aligned) {
+  return vec_arm8<uint32_t>(vld1q_u32(aligned));
+}
+SIMD_INLINE vec_arm8<uint64_t> load_unaligned(
+    Full<uint64_t, ARM8>, const uint64_t* SIMD_RESTRICT aligned) {
+  return vec_arm8<uint64_t>(vld1q_u64(aligned));
+}
+SIMD_INLINE vec_arm8<int8_t> load_unaligned(
+    Full<int8_t, ARM8>, const int8_t* SIMD_RESTRICT aligned) {
+  return vec_arm8<int8_t>(vld1q_s8(aligned));
+}
+SIMD_INLINE vec_arm8<int16_t> load_unaligned(
+    Full<int16_t, ARM8>, const int16_t* SIMD_RESTRICT aligned) {
+  return vec_arm8<int16_t>(vld1q_s16(aligned));
+}
+SIMD_INLINE vec_arm8<int32_t> load_unaligned(
+    Full<int32_t, ARM8>, const int32_t* SIMD_RESTRICT aligned) {
+  return vec_arm8<int32_t>(vld1q_s32(aligned));
+}
+SIMD_INLINE vec_arm8<int64_t> load_unaligned(
+    Full<int64_t, ARM8>, const int64_t* SIMD_RESTRICT aligned) {
+  return vec_arm8<int64_t>(vld1q_s64(aligned));
+}
+SIMD_INLINE vec_arm8<float> load_unaligned(Full<float, ARM8>,
+                                           const float* SIMD_RESTRICT aligned) {
+  return vec_arm8<float>(vld1q_f32(aligned));
+}
+SIMD_INLINE vec_arm8<double> load_unaligned(
+    Full<double, ARM8>, const double* SIMD_RESTRICT aligned) {
+  return vec_arm8<double>(vld1q_f64(aligned));
+}
+
+template <typename T>
+SIMD_INLINE vec_arm8<T> load(Full<T, ARM8> d, const T* SIMD_RESTRICT p) {
+  return load_unaligned(d, p);
+}
+
+// 128-bit SIMD => nothing to duplicate, same as an unaligned load.
+template <typename T>
+SIMD_INLINE vec_arm8<T> load_dup128(Full<T, ARM8> d,
+                                    const T* const SIMD_RESTRICT p) {
+  return load_unaligned(d, p);
+}
+
+// ------------------------------ Load 64
+
+SIMD_INLINE vec_arm8<uint8_t, 8> load(Desc<uint8_t, 8, ARM8>,
+                                      const uint8_t* SIMD_RESTRICT p) {
+  return vec_arm8<uint8_t, 8>(vld1_u8(p));
+}
+SIMD_INLINE vec_arm8<uint16_t, 4> load(Desc<uint16_t, 4, ARM8>,
+                                       const uint16_t* SIMD_RESTRICT p) {
+  return vec_arm8<uint16_t, 4>(vld1_u16(p));
+}
+SIMD_INLINE vec_arm8<uint32_t, 2> load(Desc<uint32_t, 2, ARM8>,
+                                       const uint32_t* SIMD_RESTRICT p) {
+  return vec_arm8<uint32_t, 2>(vld1_u32(p));
+}
+SIMD_INLINE vec_arm8<uint64_t, 1> load(Desc<uint64_t, 1, ARM8>,
+                                       const uint64_t* SIMD_RESTRICT p) {
+  return vec_arm8<uint64_t, 1>(vld1_u64(p));
+}
+SIMD_INLINE vec_arm8<int8_t, 8> load(Desc<int8_t, 8, ARM8>,
+                                     const int8_t* SIMD_RESTRICT p) {
+  return vec_arm8<int8_t, 8>(vld1_s8(p));
+}
+SIMD_INLINE vec_arm8<int16_t, 4> load(Desc<int16_t, 4, ARM8>,
+                                      const int16_t* SIMD_RESTRICT p) {
+  return vec_arm8<int16_t, 4>(vld1_s16(p));
+}
+SIMD_INLINE vec_arm8<int32_t, 2> load(Desc<int32_t, 2, ARM8>,
+                                      const int32_t* SIMD_RESTRICT p) {
+  return vec_arm8<int32_t, 2>(vld1_s32(p));
+}
+SIMD_INLINE vec_arm8<int64_t, 1> load(Desc<int64_t, 1, ARM8>,
+                                      const int64_t* SIMD_RESTRICT p) {
+  return vec_arm8<int64_t, 1>(vld1_s64(p));
+}
+SIMD_INLINE vec_arm8<float, 2> load(Desc<float, 2, ARM8>,
+                                    const float* SIMD_RESTRICT p) {
+  return vec_arm8<float, 2>(vld1_f32(p));
+}
+SIMD_INLINE vec_arm8<double, 1> load(Desc<double, 1, ARM8>,
+                                     const double* SIMD_RESTRICT p) {
+  return vec_arm8<double, 1>(vld1_f64(p));
+}
+
+// ------------------------------ Load 32
+
+// In the following load functions, |a| is purposely undefined.
+// It is a required parameter to the intrinsic, however
+// we don't actually care what is in it, and we don't want
+// to introduce extra overhead by initializing it to something.
+
+SIMD_INLINE vec_arm8<uint8_t, 4> load(Desc<uint8_t, 4, ARM8> d,
+                                      const uint8_t* SIMD_RESTRICT p) {
+  uint32x2_t a = undefined(d).raw;
+  uint32x2_t b = vld1_lane_u32(reinterpret_cast<const uint32_t*>(p), a, 0);
+  return vec_arm8<uint8_t, 4>(vreinterpret_u8_u32(b));
+}
+SIMD_INLINE vec_arm8<uint16_t, 2> load(Desc<uint16_t, 2, ARM8> d,
+                                       const uint16_t* SIMD_RESTRICT p) {
+  uint32x2_t a = undefined(d).raw;
+  uint32x2_t b = vld1_lane_u32(reinterpret_cast<const uint32_t*>(p), a, 0);
+  return vec_arm8<uint16_t, 2>(vreinterpret_u16_u32(b));
+}
+SIMD_INLINE vec_arm8<uint32_t, 1> load(Desc<uint32_t, 1, ARM8> d,
+                                       const uint32_t* SIMD_RESTRICT p) {
+  uint32x2_t a = undefined(d).raw;
+  uint32x2_t b = vld1_lane_u32(p, a, 0);
+  return vec_arm8<uint32_t, 1>(b);
+}
+SIMD_INLINE vec_arm8<int8_t, 4> load(Desc<int8_t, 4, ARM8> d,
+                                     const int8_t* SIMD_RESTRICT p) {
+  int32x2_t a = undefined(d).raw;
+  int32x2_t b = vld1_lane_s32(reinterpret_cast<const int32_t*>(p), a, 0);
+  return vec_arm8<int8_t, 4>(vreinterpret_s8_s32(b));
+}
+SIMD_INLINE vec_arm8<int16_t, 2> load(Desc<int16_t, 2, ARM8> d,
+                                      const int16_t* SIMD_RESTRICT p) {
+  int32x2_t a = undefined(d).raw;
+  int32x2_t b = vld1_lane_s32(reinterpret_cast<const int32_t*>(p), a, 0);
+  return vec_arm8<int16_t, 2>(vreinterpret_s16_s32(b));
+}
+SIMD_INLINE vec_arm8<int32_t, 1> load(Desc<int32_t, 1, ARM8> d,
+                                      const int32_t* SIMD_RESTRICT p) {
+  int32x2_t a = undefined(d).raw;
+  int32x2_t b = vld1_lane_s32(p, a, 0);
+  return vec_arm8<int32_t, 1>(b);
+}
+SIMD_INLINE vec_arm8<float, 1> load(Desc<float, 1, ARM8> d,
+                                    const float* SIMD_RESTRICT p) {
+  float32x2_t a = undefined(d).raw;
+  float32x2_t b = vld1_lane_f32(p, a, 0);
+  return vec_arm8<float, 1>(b);
+}
+
+// ------------------------------ Store 128
+
+SIMD_INLINE void store_unaligned(const vec_arm8<uint8_t> v, Full<uint8_t, ARM8>,
+                                 uint8_t* SIMD_RESTRICT aligned) {
+  vst1q_u8(aligned, v.raw);
+}
+SIMD_INLINE void store_unaligned(const vec_arm8<uint16_t> v,
+                                 Full<uint16_t, ARM8>,
+                                 uint16_t* SIMD_RESTRICT aligned) {
+  vst1q_u16(aligned, v.raw);
+}
+SIMD_INLINE void store_unaligned(const vec_arm8<uint32_t> v,
+                                 Full<uint32_t, ARM8>,
+                                 uint32_t* SIMD_RESTRICT aligned) {
+  vst1q_u32(aligned, v.raw);
+}
+SIMD_INLINE void store_unaligned(const vec_arm8<uint64_t> v,
+                                 Full<uint64_t, ARM8>,
+                                 uint64_t* SIMD_RESTRICT aligned) {
+  vst1q_u64(aligned, v.raw);
+}
+SIMD_INLINE void store_unaligned(const vec_arm8<int8_t> v, Full<int8_t, ARM8>,
+                                 int8_t* SIMD_RESTRICT aligned) {
+  vst1q_s8(aligned, v.raw);
+}
+SIMD_INLINE void store_unaligned(const vec_arm8<int16_t> v, Full<int16_t, ARM8>,
+                                 int16_t* SIMD_RESTRICT aligned) {
+  vst1q_s16(aligned, v.raw);
+}
+SIMD_INLINE void store_unaligned(const vec_arm8<int32_t> v, Full<int32_t, ARM8>,
+                                 int32_t* SIMD_RESTRICT aligned) {
+  vst1q_s32(aligned, v.raw);
+}
+SIMD_INLINE void store_unaligned(const vec_arm8<int64_t> v, Full<int64_t, ARM8>,
+                                 int64_t* SIMD_RESTRICT aligned) {
+  vst1q_s64(aligned, v.raw);
+}
+SIMD_INLINE void store_unaligned(const vec_arm8<float> v, Full<float, ARM8>,
+                                 float* SIMD_RESTRICT aligned) {
+  vst1q_f32(aligned, v.raw);
+}
+SIMD_INLINE void store_unaligned(const vec_arm8<double> v, Full<double, ARM8>,
+                                 double* SIMD_RESTRICT aligned) {
+  vst1q_f64(aligned, v.raw);
+}
+
+template <typename T, size_t N>
+SIMD_INLINE void store(vec_arm8<T, N> v, Desc<T, N, ARM8> d,
+                       T* SIMD_RESTRICT p) {
+  store_unaligned(v, d, p);
+}
+
+// ------------------------------ Store 64
+
+SIMD_INLINE void store(const vec_arm8<uint8_t, 8> v, Desc<uint8_t, 8, ARM8>,
+                       uint8_t* SIMD_RESTRICT p) {
+  vst1_u8(p, v.raw);
+}
+SIMD_INLINE void store(const vec_arm8<uint16_t, 4> v, Desc<uint16_t, 4, ARM8>,
+                       uint16_t* SIMD_RESTRICT p) {
+  vst1_u16(p, v.raw);
+}
+SIMD_INLINE void store(const vec_arm8<uint32_t, 2> v, Desc<uint32_t, 2, ARM8>,
+                       uint32_t* SIMD_RESTRICT p) {
+  vst1_u32(p, v.raw);
+}
+SIMD_INLINE void store(const vec_arm8<uint64_t, 1> v, Desc<uint64_t, 1, ARM8>,
+                       uint64_t* SIMD_RESTRICT p) {
+  vst1_u64(p, v.raw);
+}
+SIMD_INLINE void store(const vec_arm8<int8_t, 8> v, Desc<int8_t, 8, ARM8>,
+                       int8_t* SIMD_RESTRICT p) {
+  vst1_s8(p, v.raw);
+}
+SIMD_INLINE void store(const vec_arm8<int16_t, 4> v, Desc<int16_t, 4, ARM8>,
+                       int16_t* SIMD_RESTRICT p) {
+  vst1_s16(p, v.raw);
+}
+SIMD_INLINE void store(const vec_arm8<int32_t, 2> v, Desc<int32_t, 2, ARM8>,
+                       int32_t* SIMD_RESTRICT p) {
+  vst1_s32(p, v.raw);
+}
+SIMD_INLINE void store(const vec_arm8<int64_t, 1> v, Desc<int64_t, 1, ARM8>,
+                       int64_t* SIMD_RESTRICT p) {
+  vst1_s64(p, v.raw);
+}
+SIMD_INLINE void store(const vec_arm8<float, 2> v, Desc<float, 2, ARM8>,
+                       float* SIMD_RESTRICT p) {
+  vst1_f32(p, v.raw);
+}
+SIMD_INLINE void store(const vec_arm8<double, 1> v, Desc<double, 1, ARM8>,
+                       double* SIMD_RESTRICT p) {
+  vst1_f64(p, v.raw);
+}
+
+// ------------------------------ Store 32
+
+SIMD_INLINE void store(const vec_arm8<uint8_t, 4> v, Desc<uint8_t, 4, ARM8>,
+                       uint8_t* SIMD_RESTRICT p) {
+  uint32x2_t a = vreinterpret_u32_u8(v.raw);
+  vst1_lane_u32(p, a, 0);
+}
+SIMD_INLINE void store(const vec_arm8<uint16_t, 2> v, Desc<uint16_t, 2, ARM8>,
+                       uint16_t* SIMD_RESTRICT p) {
+  uint32x2_t a = vreinterpret_u32_u16(v.raw);
+  vst1_lane_u32(p, a, 0);
+}
+SIMD_INLINE void store(const vec_arm8<uint32_t, 1> v, Desc<uint32_t, 1, ARM8>,
+                       uint32_t* SIMD_RESTRICT p) {
+  vst1_lane_u32(p, v.raw, 0);
+}
+SIMD_INLINE void store(const vec_arm8<int8_t, 4> v, Desc<int8_t, 4, ARM8>,
+                       int8_t* SIMD_RESTRICT p) {
+  int32x2_t a = vreinterpret_s32_s8(v.raw);
+  vst1_lane_s32(p, a, 0);
+}
+SIMD_INLINE void store(const vec_arm8<int16_t, 2> v, Desc<int16_t, 2, ARM8>,
+                       int16_t* SIMD_RESTRICT p) {
+  int32x2_t a = vreinterpret_s32_s16(v.raw);
+  vst1_lane_s32(p, a, 0);
+}
+SIMD_INLINE void store(const vec_arm8<int32_t, 1> v, Desc<int32_t, 1, ARM8>,
+                       int32_t* SIMD_RESTRICT p) {
+  vst1_lane_s32(p, v.raw, 0);
+}
+SIMD_INLINE void store(const vec_arm8<float, 1> v, Desc<float, 1, ARM8>,
+                       float* SIMD_RESTRICT p) {
+  vst1_lane_f32(p, v.raw, 0);
+}
+
+// ------------------------------ Non-temporal stores
+
+// Same as aligned stores on non-x86.
+
+template <typename T>
+SIMD_INLINE void stream(const vec_arm8<T> v, Full<T, ARM8> d,
+                        T* SIMD_RESTRICT aligned) {
+  store(v, d, aligned);
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+// Unsigned: zero-extend.
+SIMD_INLINE vec_arm8<uint16_t> convert_to(Full<uint16_t, ARM8>,
+                                          const vec_arm8<uint8_t, 8> v) {
+  return vec_arm8<uint16_t>(vmovl_u8(v.raw));
+}
+SIMD_INLINE vec_arm8<uint32_t> convert_to(Full<uint32_t, ARM8>,
+                                          const vec_arm8<uint8_t, 4> v) {
+  uint16x8_t a = vmovl_u8(v.raw);
+  return vec_arm8<uint32_t>(vmovl_u16(vget_low_u16(a)));
+}
+SIMD_INLINE vec_arm8<uint32_t> convert_to(Full<uint32_t, ARM8>,
+                                          const vec_arm8<uint16_t, 4> v) {
+  return vec_arm8<uint32_t>(vmovl_u16(v.raw));
+}
+SIMD_INLINE vec_arm8<uint64_t> convert_to(Full<uint64_t, ARM8>,
+                                          const vec_arm8<uint32_t, 2> v) {
+  return vec_arm8<uint64_t>(vmovl_u32(v.raw));
+}
+SIMD_INLINE vec_arm8<int16_t> convert_to(Full<int16_t, ARM8>,
+                                         const vec_arm8<uint8_t, 8> v) {
+  return vec_arm8<int16_t>(vmovl_u8(v.raw));
+}
+SIMD_INLINE vec_arm8<int32_t> convert_to(Full<int32_t, ARM8>,
+                                         const vec_arm8<uint8_t, 4> v) {
+  uint16x8_t a = vmovl_u8(v.raw);
+  return vec_arm8<int32_t>(vreinterpretq_s32_u16(vmovl_u16(vget_low_u16(a))));
+}
+SIMD_INLINE vec_arm8<int32_t> convert_to(Full<int32_t, ARM8>,
+                                         const vec_arm8<uint16_t, 4> v) {
+  return vec_arm8<int32_t>(vmovl_u16(v.raw));
+}
+
+SIMD_INLINE vec_arm8<uint32_t> u32_from_u8(const vec_arm8<uint8_t> v) {
+  return convert_to(Full<uint32_t, ARM8>(), v);
+}
+
+// Signed: replicate sign bit.
+SIMD_INLINE vec_arm8<int16_t> convert_to(Full<int16_t, ARM8>,
+                                         const vec_arm8<int8_t, 8> v) {
+  return vec_arm8<int16_t>(vmovl_s8(v.raw));
+}
+SIMD_INLINE vec_arm8<int32_t> convert_to(Full<int32_t, ARM8>,
+                                         const vec_arm8<int8_t, 4> v) {
+  int16x8_t a = vmovl_s8(v.raw);
+  return vec_arm8<int32_t>(vmovl_s16(vget_low_s16(a)));
+}
+SIMD_INLINE vec_arm8<int32_t> convert_to(Full<int32_t, ARM8>,
+                                         const vec_arm8<int16_t, 4> v) {
+  return vec_arm8<int32_t>(vmovl_s16(v.raw));
+}
+SIMD_INLINE vec_arm8<int64_t> convert_to(Full<int64_t, ARM8>,
+                                         const vec_arm8<int32_t, 2> v) {
+  return vec_arm8<int64_t>(vmovl_s32(v.raw));
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template<size_t N>
+SIMD_INLINE vec_arm8<uint16_t, N> convert_to(Part<uint16_t, N, ARM8>,
+                                             const vec_arm8<int32_t> v) {
+  return vec_arm8<uint16_t, N>(vqmovun_s32(v.raw));
+}
+template<size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> convert_to(Part<uint8_t, N, ARM8>,
+                                            const vec_arm8<uint16_t> v) {
+  return vec_arm8<uint8_t, N>(vqmovn_u16(v.raw));
+}
+
+template<size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> convert_to(Part<uint8_t, N, ARM8>,
+                                            const vec_arm8<int16_t> v) {
+  return vec_arm8<uint8_t, N>(vqmovun_s16(v.raw));
+}
+
+template<size_t N>
+SIMD_INLINE vec_arm8<int16_t, N> convert_to(Part<int16_t, N, ARM8>,
+                                            const vec_arm8<int32_t> v) {
+  return vec_arm8<int16_t, N>(vqmovn_s32(v.raw));
+}
+template<size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> convert_to(Part<int8_t, N, ARM8>,
+                                           const vec_arm8<int16_t> v) {
+  return vec_arm8<int8_t, N>(vqmovn_s16(v.raw));
+}
+
+// In the following convert_to functions, |b| is purposely undefined.
+// The value a needs to be extended to 128 bits so that vqmovn can be
+// used and |b| is undefined so that no extra overhead is introduced.
+SIMD_DIAGNOSTICS(push)
+SIMD_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wuninitialized")
+
+template<size_t N>
+SIMD_INLINE vec_arm8<uint8_t, N> convert_to(Part<uint8_t, N, ARM8>,
+                                            const vec_arm8<int32_t> v) {
+  vec_arm8<uint16_t, N> a = convert_to(Desc<uint16_t, N, ARM8>(), v);
+  vec_arm8<uint16_t, N> b;
+  uint16x8_t c = vcombine_u16(a.raw, b.raw);
+  return vec_arm8<uint8_t, N>(vqmovn_u16(c));
+}
+
+template<size_t N>
+SIMD_INLINE vec_arm8<int8_t, N> convert_to(Part<int8_t, N, ARM8>,
+                                           const vec_arm8<int32_t> v) {
+  vec_arm8<int16_t, N> a = convert_to(Desc<int16_t, N, ARM8>(), v);
+  vec_arm8<int16_t, N> b;
+  uint16x8_t c = vcombine_s16(a.raw, b.raw);
+  return vec_arm8<int8_t, N>(vqmovn_s16(c));
+}
+
+SIMD_DIAGNOSTICS(pop)
+
+// ------------------------------ Convert i32 <=> f32
+
+template <size_t N>
+SIMD_INLINE vec_arm8<float, N> convert_to(Part<float, N, ARM8>,
+                                          const vec_arm8<int32_t, N> v) {
+  return vec_arm8<float, N>(vcvtq_f32_s32(v.raw));
+}
+// Truncates (rounds toward zero).
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> convert_to(Part<int32_t, N, ARM8>,
+                                            const vec_arm8<float, N> v) {
+  return vec_arm8<int32_t, N>(vcvtq_s32_f32(v.raw));
+}
+
+template <size_t N>
+SIMD_INLINE vec_arm8<int32_t, N> nearest_int(const vec_arm8<float, N> v) {
+  return vec_arm8<int32_t, N>(vcvtnq_s32_f32(v.raw));
+}
+
+// ================================================== SWIZZLE
+
+// ------------------------------ 'Extract' other half (see any_part)
+
+// These copy hi into lo
+SIMD_INLINE vec_arm8<uint8_t, 8> other_half(const vec_arm8<uint8_t> v) {
+  return vec_arm8<uint8_t, 8>(vget_high_u8(v.raw));
+}
+SIMD_INLINE vec_arm8<int8_t, 8> other_half(const vec_arm8<int8_t> v) {
+  return vec_arm8<int8_t, 8>(vget_high_s8(v.raw));
+}
+SIMD_INLINE vec_arm8<uint16_t, 4> other_half(const vec_arm8<uint16_t> v) {
+  return vec_arm8<uint16_t, 4>(vget_high_u16(v.raw));
+}
+SIMD_INLINE vec_arm8<int16_t, 4> other_half(const vec_arm8<int16_t> v) {
+  return vec_arm8<int16_t, 4>(vget_high_s16(v.raw));
+}
+SIMD_INLINE vec_arm8<uint32_t, 2> other_half(const vec_arm8<uint32_t> v) {
+  return vec_arm8<uint32_t, 2>(vget_high_u32(v.raw));
+}
+SIMD_INLINE vec_arm8<int32_t, 2> other_half(const vec_arm8<int32_t> v) {
+  return vec_arm8<int32_t, 2>(vget_high_s32(v.raw));
+}
+SIMD_INLINE vec_arm8<uint64_t, 1> other_half(const vec_arm8<uint64_t> v) {
+  return vec_arm8<uint64_t, 1>(vget_high_u64(v.raw));
+}
+SIMD_INLINE vec_arm8<int64_t, 1> other_half(const vec_arm8<int64_t> v) {
+  return vec_arm8<int64_t, 1>(vget_high_s64(v.raw));
+}
+SIMD_INLINE vec_arm8<float, 2> other_half(const vec_arm8<float> v) {
+  return vec_arm8<float, 2>(vget_high_f32(v.raw));
+}
+SIMD_INLINE vec_arm8<double, 1> other_half(const vec_arm8<double> v) {
+  return vec_arm8<double, 1>(vget_high_f64(v.raw));
+}
+
+// ------------------------------ Extract from 2x 128-bit at constant offset
+
+// Extracts 128 bits from <hi, lo> by skipping the least-significant kBytes.
+template <int kBytes, typename T>
+SIMD_INLINE vec_arm8<T> combine_shift_right_bytes(const vec_arm8<T> hi,
+                                                  const vec_arm8<T> lo) {
+  static_assert(0 < kBytes && kBytes < 16, "kBytes must be in [1, 15]");
+  const Full<uint8_t, ARM8> d8;
+  return cast_to(Full<T, ARM8>(),
+                 vec_arm8<uint8_t>(vextq_u8(cast_to(d8, lo).raw,
+                                            cast_to(d8, hi).raw, kBytes)));
+}
+
+// ------------------------------ Shift vector by constant #bytes
+
+// 0x01..0F, kBytes = 1 => 0x02..0F00
+template <int kBytes, typename T, size_t N>
+SIMD_INLINE vec_arm8<T, N> shift_left_bytes(const vec_arm8<T, N> v) {
+  return combine_shift_right_bytes<16 - kBytes>(v, setzero(Full<T, ARM8>()));
+}
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, typename T, size_t N>
+SIMD_INLINE vec_arm8<T, N> shift_right_bytes(const vec_arm8<T, N> v) {
+  return combine_shift_right_bytes<kBytes>(setzero(Full<T, ARM8>()), v);
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+// Unsigned
+template <int kLane>
+SIMD_INLINE vec_arm8<uint16_t> broadcast(const vec_arm8<uint16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return vec_arm8<uint16_t>(vdupq_laneq_u16(v.raw, kLane));
+}
+template <int kLane>
+SIMD_INLINE vec_arm8<uint32_t> broadcast(const vec_arm8<uint32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return vec_arm8<uint32_t>(vdupq_laneq_u32(v.raw, kLane));
+}
+template <int kLane>
+SIMD_INLINE vec_arm8<uint64_t> broadcast(const vec_arm8<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return vec_arm8<uint64_t>(vdupq_laneq_u64(v.raw, kLane));
+}
+
+// Signed
+template <int kLane>
+SIMD_INLINE vec_arm8<int16_t> broadcast(const vec_arm8<int16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  return vec_arm8<int16_t>(vdupq_laneq_s16(v.raw, kLane));
+}
+template <int kLane>
+SIMD_INLINE vec_arm8<int32_t> broadcast(const vec_arm8<int32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return vec_arm8<int32_t>(vdupq_laneq_s32(v.raw, kLane));
+}
+template <int kLane>
+SIMD_INLINE vec_arm8<int64_t> broadcast(const vec_arm8<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return vec_arm8<int64_t>(vdupq_laneq_s64(v.raw, kLane));
+}
+
+// Float
+template <int kLane>
+SIMD_INLINE vec_arm8<float> broadcast(const vec_arm8<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return vec_arm8<float>(vdupq_laneq_f32(v.raw, kLane));
+}
+template <int kLane>
+SIMD_INLINE vec_arm8<double> broadcast(const vec_arm8<double> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return vec_arm8<double>(vdupq_laneq_f64(v.raw, kLane));
+}
+
+// ------------------------------ Shuffle bytes with variable indices
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes:
+// either valid indices in [0, 16) or >= 0x80 to zero the i-th output byte.
+template <typename T, typename TI>
+SIMD_INLINE vec_arm8<T> table_lookup_bytes(const vec_arm8<T> bytes,
+                                           const vec_arm8<TI> from) {
+  const Full<uint8_t, ARM8> d8;
+  return cast_to(Full<T, ARM8>(),
+                 vec_arm8<uint8_t>(vqtbl1q_u8(cast_to(d8, bytes).raw,
+                                              cast_to(d8, from).raw)));
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let vec_arm8<int32_t> have lanes 3,2,1,0 (0 is least-significant).
+// shuffle_0321 rotates one lane to the right (the previous least-significant
+// lane is now most-significant). These could also be implemented via
+// combine_shift_right_bytes but the shuffle_abcd notation is more convenient.
+
+// Swap 64-bit halves
+template <typename T>
+SIMD_INLINE vec_arm8<T> shuffle_1032(const vec_arm8<T> v) {
+  return combine_shift_right_bytes<8>(v, v);
+}
+template <typename T>
+SIMD_INLINE vec_arm8<T> shuffle_01(const vec_arm8<T> v) {
+  return combine_shift_right_bytes<8>(v, v);
+}
+
+// Rotate right 32 bits
+template <typename T>
+SIMD_INLINE vec_arm8<T> shuffle_0321(const vec_arm8<T> v) {
+  return combine_shift_right_bytes<4>(v, v);
+}
+
+// Rotate left 32 bits
+template <typename T>
+SIMD_INLINE vec_arm8<T> shuffle_2103(const vec_arm8<T> v) {
+  return combine_shift_right_bytes<12>(v, v);
+}
+
+// Reverse
+template <typename T>
+SIMD_INLINE vec_arm8<T> shuffle_0123(const vec_arm8<T> v) {
+  // TODO(janwas): more efficient implementation?
+  static constexpr uint8_t bytes[16] = {15, 14, 13, 12, 11, 10, 9, 8,
+                                        7,  6,  5,  4,  3,  2,  1, 0};
+  return table_lookup_bytes(v, load(Full<uint8_t, ARM8>(), bytes));
+}
+
+// ------------------------------ Permute (runtime variable)
+
+template <typename T>
+SIMD_INLINE permute_arm8<T> set_table_indices(const Full<T, ARM8> d,
+                                        const int32_t* idx) {
+  const Full<uint8_t, ARM8> d8;
+  SIMD_ALIGN uint8_t control[d8.N];
+  for (size_t idx_byte = 0; idx_byte < d8.N; ++idx_byte) {
+    const size_t idx_lane = idx_byte / sizeof(T);
+    const size_t mod = idx_byte % sizeof(T);
+    control[idx_byte] = idx[idx_lane] * sizeof(T) + mod;
+  }
+  return permute_arm8<T>{load(d8, control).raw};
+}
+
+SIMD_INLINE vec_arm8<uint32_t> table_lookup_lanes(const vec_arm8<uint32_t> v,
+                                             const permute_arm8<uint32_t> idx) {
+  return table_lookup_bytes(v, vec_arm8<uint8_t>(idx.raw));
+}
+SIMD_INLINE vec_arm8<int32_t> table_lookup_lanes(const vec_arm8<int32_t> v,
+                                            const permute_arm8<int32_t> idx) {
+  return table_lookup_bytes(v, vec_arm8<uint8_t>(idx.raw));
+}
+SIMD_INLINE vec_arm8<float> table_lookup_lanes(const vec_arm8<float> v,
+                                          const permute_arm8<float> idx) {
+  const Full<int32_t, ARM8> di;
+  const Full<float, ARM8> df;
+  return cast_to(
+      df, table_lookup_bytes(cast_to(di, v), vec_arm8<uint8_t>(idx.raw)));
+}
+
+// ------------------------------ Interleave lanes
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use zip_lo/hi instead (also works with scalar).
+
+SIMD_INLINE vec_arm8<uint8_t> interleave_lo(const vec_arm8<uint8_t> a,
+                                            const vec_arm8<uint8_t> b) {
+  return vec_arm8<uint8_t>(vzip1q_u8(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint16_t> interleave_lo(const vec_arm8<uint16_t> a,
+                                             const vec_arm8<uint16_t> b) {
+  return vec_arm8<uint16_t>(vzip1q_u16(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint32_t> interleave_lo(const vec_arm8<uint32_t> a,
+                                             const vec_arm8<uint32_t> b) {
+  return vec_arm8<uint32_t>(vzip1q_u32(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint64_t> interleave_lo(const vec_arm8<uint64_t> a,
+                                             const vec_arm8<uint64_t> b) {
+  return vec_arm8<uint64_t>(vzip1q_u64(a.raw, b.raw));
+}
+
+SIMD_INLINE vec_arm8<int8_t> interleave_lo(const vec_arm8<int8_t> a,
+                                           const vec_arm8<int8_t> b) {
+  return vec_arm8<int8_t>(vzip1q_s8(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int16_t> interleave_lo(const vec_arm8<int16_t> a,
+                                            const vec_arm8<int16_t> b) {
+  return vec_arm8<int16_t>(vzip1q_s16(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int32_t> interleave_lo(const vec_arm8<int32_t> a,
+                                            const vec_arm8<int32_t> b) {
+  return vec_arm8<int32_t>(vzip1q_s32(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int64_t> interleave_lo(const vec_arm8<int64_t> a,
+                                            const vec_arm8<int64_t> b) {
+  return vec_arm8<int64_t>(vzip1q_s64(a.raw, b.raw));
+}
+
+SIMD_INLINE vec_arm8<float> interleave_lo(const vec_arm8<float> a,
+                                          const vec_arm8<float> b) {
+  return vec_arm8<float>(vzip1q_f32(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<double> interleave_lo(const vec_arm8<double> a,
+                                           const vec_arm8<double> b) {
+  return vec_arm8<double>(vzip1q_f64(a.raw, b.raw));
+}
+
+SIMD_INLINE vec_arm8<uint8_t> interleave_hi(const vec_arm8<uint8_t> a,
+                                            const vec_arm8<uint8_t> b) {
+  return vec_arm8<uint8_t>(vzip2q_u8(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint16_t> interleave_hi(const vec_arm8<uint16_t> a,
+                                             const vec_arm8<uint16_t> b) {
+  return vec_arm8<uint16_t>(vzip2q_u16(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint32_t> interleave_hi(const vec_arm8<uint32_t> a,
+                                             const vec_arm8<uint32_t> b) {
+  return vec_arm8<uint32_t>(vzip2q_u32(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint64_t> interleave_hi(const vec_arm8<uint64_t> a,
+                                             const vec_arm8<uint64_t> b) {
+  return vec_arm8<uint64_t>(vzip2q_u64(a.raw, b.raw));
+}
+
+SIMD_INLINE vec_arm8<int8_t> interleave_hi(const vec_arm8<int8_t> a,
+                                           const vec_arm8<int8_t> b) {
+  return vec_arm8<int8_t>(vzip2q_s8(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int16_t> interleave_hi(const vec_arm8<int16_t> a,
+                                            const vec_arm8<int16_t> b) {
+  return vec_arm8<int16_t>(vzip2q_s16(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int32_t> interleave_hi(const vec_arm8<int32_t> a,
+                                            const vec_arm8<int32_t> b) {
+  return vec_arm8<int32_t>(vzip2q_s32(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int64_t> interleave_hi(const vec_arm8<int64_t> a,
+                                            const vec_arm8<int64_t> b) {
+  return vec_arm8<int64_t>(vzip2q_s64(a.raw, b.raw));
+}
+
+SIMD_INLINE vec_arm8<float> interleave_hi(const vec_arm8<float> a,
+                                          const vec_arm8<float> b) {
+  return vec_arm8<float>(vzip2q_f32(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<double> interleave_hi(const vec_arm8<double> a,
+                                           const vec_arm8<double> b) {
+  return vec_arm8<double>(vzip2q_s64(a.raw, b.raw));
+}
+
+// ------------------------------ Zip lanes
+
+// Same as interleave_*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+
+SIMD_INLINE vec_arm8<uint16_t> zip_lo(const vec_arm8<uint8_t> a,
+                                      const vec_arm8<uint8_t> b) {
+  return vec_arm8<uint16_t>(vzip1q_u8(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint32_t> zip_lo(const vec_arm8<uint16_t> a,
+                                      const vec_arm8<uint16_t> b) {
+  return vec_arm8<uint32_t>(vzip1q_u16(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint64_t> zip_lo(const vec_arm8<uint32_t> a,
+                                      const vec_arm8<uint32_t> b) {
+  return vec_arm8<uint64_t>(vzip1q_u32(a.raw, b.raw));
+}
+
+SIMD_INLINE vec_arm8<int16_t> zip_lo(const vec_arm8<int8_t> a,
+                                     const vec_arm8<int8_t> b) {
+  return vec_arm8<int16_t>(vzip1q_s8(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int32_t> zip_lo(const vec_arm8<int16_t> a,
+                                     const vec_arm8<int16_t> b) {
+  return vec_arm8<int32_t>(vzip1q_s16(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int64_t> zip_lo(const vec_arm8<int32_t> a,
+                                     const vec_arm8<int32_t> b) {
+  return vec_arm8<int64_t>(vzip1q_s32(a.raw, b.raw));
+}
+
+SIMD_INLINE vec_arm8<uint16_t> zip_hi(const vec_arm8<uint8_t> a,
+                                      const vec_arm8<uint8_t> b) {
+  return vec_arm8<uint16_t>(vzip2q_u8(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint32_t> zip_hi(const vec_arm8<uint16_t> a,
+                                      const vec_arm8<uint16_t> b) {
+  return vec_arm8<uint32_t>(vzip2q_u16(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<uint64_t> zip_hi(const vec_arm8<uint32_t> a,
+                                      const vec_arm8<uint32_t> b) {
+  return vec_arm8<uint64_t>(vzip2q_u32(a.raw, b.raw));
+}
+
+SIMD_INLINE vec_arm8<int16_t> zip_hi(const vec_arm8<int8_t> a,
+                                     const vec_arm8<int8_t> b) {
+  return vec_arm8<int16_t>(vzip2q_s8(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int32_t> zip_hi(const vec_arm8<int16_t> a,
+                                     const vec_arm8<int16_t> b) {
+  return vec_arm8<int32_t>(vzip2q_s16(a.raw, b.raw));
+}
+SIMD_INLINE vec_arm8<int64_t> zip_hi(const vec_arm8<int32_t> a,
+                                     const vec_arm8<int32_t> b) {
+  return vec_arm8<int64_t>(vzip2q_s32(a.raw, b.raw));
+}
+
+// ------------------------------ Parts
+
+// Returns a part with value "t".
+template <typename T>
+SIMD_INLINE vec_arm8<T, 1> set_part(Desc<T, 1, ARM8> d, const T t) {
+  return set1(d, t);
+}
+
+// Gets the single value stored in a vector/part.
+template <typename T, size_t N>
+SIMD_INLINE T get_part(Desc<T, 1, ARM8> d, const vec_arm8<T, N> v) {
+  // TODO(janwas): more efficient implementation?
+  SIMD_ALIGN T ret[N];
+  store(v, Desc<T, N, ARM8>(), &ret);
+  return ret[0];
+}
+
+// Returns part of a vector (unspecified whether upper or lower).
+SIMD_INLINE vec_arm8<uint8_t, 8> any_part(Desc<uint8_t, 8, ARM8>,
+                                          const vec_arm8<uint8_t> v) {
+  return vec_arm8<uint8_t, 8>(vget_low_u8(v.raw));
+}
+SIMD_INLINE vec_arm8<uint16_t, 4> any_part(Desc<uint16_t, 4, ARM8>,
+                                           const vec_arm8<uint16_t> v) {
+  return vec_arm8<uint16_t, 4>(vget_low_u16(v.raw));
+}
+SIMD_INLINE vec_arm8<uint32_t, 2> any_part(Desc<uint32_t, 2, ARM8>,
+                                           const vec_arm8<uint32_t> v) {
+  return vec_arm8<uint32_t, 2>(vget_low_u32(v.raw));
+}
+SIMD_INLINE vec_arm8<uint64_t, 1> any_part(Desc<uint64_t, 1, ARM8>,
+                                           const vec_arm8<uint64_t> v) {
+  return vec_arm8<uint64_t, 1>(vget_low_u64(v.raw));
+}
+SIMD_INLINE vec_arm8<int8_t, 8> any_part(Desc<int8_t, 8, ARM8>,
+                                         const vec_arm8<int8_t> v) {
+  return vec_arm8<int8_t, 8>(vget_low_s8(v.raw));
+}
+SIMD_INLINE vec_arm8<int16_t, 4> any_part(Desc<int16_t, 4, ARM8>,
+                                          const vec_arm8<int16_t> v) {
+  return vec_arm8<int16_t, 4>(vget_low_s16(v.raw));
+}
+SIMD_INLINE vec_arm8<int32_t, 2> any_part(Desc<int32_t, 2, ARM8>,
+                                          const vec_arm8<int32_t> v) {
+  return vec_arm8<int32_t, 2>(vget_low_s32(v.raw));
+}
+SIMD_INLINE vec_arm8<int64_t, 1> any_part(Desc<int64_t, 1, ARM8>,
+                                          const vec_arm8<int64_t> v) {
+  return vec_arm8<int64_t, 1>(vget_low_s64(v.raw));
+}
+SIMD_INLINE vec_arm8<float, 2> any_part(Desc<float, 2, ARM8>,
+                                        const vec_arm8<float> v) {
+  return vec_arm8<float, 2>(vget_low_f32(v.raw));
+}
+SIMD_INLINE vec_arm8<double, 1> any_part(Desc<double, 1, ARM8>,
+                                         const vec_arm8<double> v) {
+  return vec_arm8<double, 1>(vget_low_f64(v.raw));
+}
+
+SIMD_INLINE vec_arm8<uint8_t, 4> any_part(Desc<uint8_t, 4, ARM8>,
+                                          const vec_arm8<uint8_t> v) {
+  return vec_arm8<uint8_t, 4>(vget_low_u8(v.raw));
+}
+SIMD_INLINE vec_arm8<uint16_t, 2> any_part(Desc<uint16_t, 2, ARM8>,
+                                           const vec_arm8<uint16_t> v) {
+  return vec_arm8<uint16_t, 2>(vget_low_u16(v.raw));
+}
+SIMD_INLINE vec_arm8<uint32_t, 1> any_part(Desc<uint32_t, 1, ARM8>,
+                                           const vec_arm8<uint32_t> v) {
+  return vec_arm8<uint32_t, 1>(vget_low_u32(v.raw));
+}
+SIMD_INLINE vec_arm8<int8_t, 4> any_part(Desc<int8_t, 4, ARM8>,
+                                         const vec_arm8<int8_t> v) {
+  return vec_arm8<int8_t, 4>(vget_low_s8(v.raw));
+}
+SIMD_INLINE vec_arm8<int16_t, 2> any_part(Desc<int16_t, 2, ARM8>,
+                                          const vec_arm8<int16_t> v) {
+  return vec_arm8<int16_t, 2>(vget_low_s16(v.raw));
+}
+SIMD_INLINE vec_arm8<int32_t, 1> any_part(Desc<int32_t, 1, ARM8>,
+                                          const vec_arm8<int32_t> v) {
+  return vec_arm8<int32_t, 1>(vget_low_s32(v.raw));
+}
+SIMD_INLINE vec_arm8<float, 1> any_part(Desc<float, 1, ARM8>,
+                                        const vec_arm8<float> v) {
+  return vec_arm8<float, 1>(vget_low_f32(v.raw));
+}
+
+// Returns full vector with the given part's lane broadcasted. Note that
+// callers cannot use broadcast directly because part lane order is undefined.
+template <int kLane, typename T, size_t N>
+SIMD_INLINE vec_arm8<T> broadcast_part(Full<T, ARM8>, const vec_arm8<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return broadcast<kLane>(vec_arm8<T>(v.raw));
+}
+
+// ------------------------------ Blocks
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <typename T>
+SIMD_INLINE vec_arm8<T> concat_lo_lo(const vec_arm8<T> hi,
+                                     const vec_arm8<T> lo) {
+  const Full<uint64_t, ARM8> d64;
+  return cast_to(Full<T, ARM8>(),
+                 interleave_lo(cast_to(d64, lo), cast_to(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <typename T>
+SIMD_INLINE vec_arm8<T> concat_hi_hi(const vec_arm8<T> hi,
+                                     const vec_arm8<T> lo) {
+  const Full<uint64_t, ARM8> d64;
+  return cast_to(Full<T, ARM8>(),
+                 interleave_hi(cast_to(d64, lo), cast_to(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves)
+template <typename T>
+SIMD_INLINE vec_arm8<T> concat_lo_hi(const vec_arm8<T> hi,
+                                     const vec_arm8<T> lo) {
+  return combine_shift_right_bytes<8>(hi, lo);
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <typename T>
+SIMD_INLINE vec_arm8<T> concat_hi_lo(const vec_arm8<T> hi,
+                                     const vec_arm8<T> lo) {
+  // TODO(janwas): more efficient implementation?
+  SIMD_ALIGN const uint8_t mask[16] = {
+      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0};
+  return select(hi, lo,
+                cast_to(Full<T, ARM8>(), load(Full<uint8_t, ARM8>(), mask)));
+}
+
+// ------------------------------ Odd/even lanes
+
+template<typename T>
+SIMD_INLINE vec_arm8<T> odd_even(
+    const vec_arm8<T> a, const vec_arm8<T> b) {
+  const Full<uint8_t, ARM8> d8;
+  SIMD_ALIGN constexpr uint8_t mask[16] = {
+    ((0 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((1 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((2 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((3 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((4 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((5 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((6 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((7 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((8 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((9 / sizeof(T)) & 1)  ? 0 : 0xFF,
+    ((10 / sizeof(T)) & 1) ? 0 : 0xFF,
+    ((11 / sizeof(T)) & 1) ? 0 : 0xFF,
+    ((12 / sizeof(T)) & 1) ? 0 : 0xFF,
+    ((13 / sizeof(T)) & 1) ? 0 : 0xFF,
+    ((14 / sizeof(T)) & 1) ? 0 : 0xFF,
+    ((15 / sizeof(T)) & 1) ? 0 : 0xFF,
+  };
+  return select(a, b, load(d8, mask));
+}
+
+// ================================================== MISC
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+// ------------------------------ movemask
+
+// Returns a bit array of the most significant bit of each byte in "v", i.e.
+// sum_i=0..15 of (v[i] >> 7) << i; v[0] is the least-significant byte of "v".
+// This is useful for testing/branching based on comparison results.
+SIMD_INLINE uint32_t movemask(const vec_arm8<uint8_t> v) {
+  static constexpr uint8x16_t kCollapseMask = {
+      1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80,
+  };
+  int8x16_t signed_v = vreinterpretq_s8_u8(v.raw);
+  int8x16_t signed_mask = vshrq_n_s8(signed_v, 7);
+  uint8x16_t values = vreinterpretq_u8_s8(signed_mask) & kCollapseMask;
+
+  uint8x8_t c0 = vget_low_u8(vpaddq_u8(values, values));
+  uint8x8_t c1 = vpadd_u8(c0, c0);
+  uint8x8_t c2 = vpadd_u8(c1, c1);
+
+  return vreinterpret_u16_u8(c2)[0];
+}
+
+// Returns the most significant bit of each float/double lane (see above).
+SIMD_INLINE uint32_t movemask(const vec_arm8<float> v) {
+  static constexpr uint32x4_t kCollapseMask = {1, 2, 4, 8};
+  int32x4_t signed_v = vreinterpretq_s32_f32(v.raw);
+  int32x4_t signed_mask = vshrq_n_s32(signed_v, 31);
+  uint32x4_t values = vreinterpretq_u32_s32(signed_mask) & kCollapseMask;
+  return vaddvq_u32(values);
+}
+SIMD_INLINE uint32_t movemask(const vec_arm8<double> v) {
+  static constexpr uint64x2_t kCollapseMask = {1, 2};
+  int64x2_t signed_v = vreinterpretq_s64_f64(v.raw);
+  int64x2_t signed_mask = vshrq_n_s64(signed_v, 63);
+  uint64x2_t values = vreinterpretq_u64_s64(signed_mask) & kCollapseMask;
+  return (uint32_t)vaddvq_u64(values);
+}
+
+// ------------------------------ all_zero
+
+// Returns whether all lanes are equal to zero.
+template <typename T>
+SIMD_INLINE bool all_zero(const vec_arm8<T> v) {
+  const auto v64 = cast_to(Full<uint64_t, ARM8>(), v);
+  uint32x2_t a = vqmovn_u64(v64.raw);
+  return vreinterpret_u64_u32(a)[0] == 0;
+}
+
+// ------------------------------ Horizontal sum (reduction)
+
+// Returns 64-bit sums of 8-byte groups.
+SIMD_INLINE vec_arm8<uint64_t> sums_of_u8x8(
+    const vec_arm8<uint8_t> v) {
+  uint16x8_t a = vpaddlq_u8(v.raw);
+  uint32x4_t b = vpaddlq_u16(a);
+  return vec_arm8<uint64_t>(vpaddlq_u32(b));
+}
+
+// Supported for 32b and 64b vector types. Returns the sum in each lane.
+SIMD_INLINE vec_arm8<uint32_t> sum_of_lanes(const vec_arm8<uint32_t> v) {
+  return vec_arm8<uint32_t>(vdupq_n_u32(vaddvq_u32(v.raw)));
+}
+SIMD_INLINE vec_arm8<int32_t> sum_of_lanes(const vec_arm8<int32_t> v) {
+  return vec_arm8<int32_t>(vdupq_n_s32(vaddvq_s32(v.raw)));
+}
+SIMD_INLINE vec_arm8<float> sum_of_lanes(const vec_arm8<float> v) {
+  return vec_arm8<float>(vdupq_n_f32(vaddvq_f32(v.raw)));
+}
+SIMD_INLINE vec_arm8<uint64_t> sum_of_lanes(const vec_arm8<uint64_t> v) {
+  return vec_arm8<uint64_t>(vdupq_n_u64(vaddvq_u64(v.raw)));
+}
+SIMD_INLINE vec_arm8<int64_t> sum_of_lanes(const vec_arm8<int64_t> v) {
+  return vec_arm8<int64_t>(vdupq_n_s64(vaddvq_s64(v.raw)));
+}
+SIMD_INLINE vec_arm8<double> sum_of_lanes(const vec_arm8<double> v) {
+  return vec_arm8<double>(vdupq_n_f64(vaddvq_f64(v.raw)));
+}
+
+}  // namespace ext
+
+// TODO(user): wrappers for all intrinsics (in neon namespace).
+}  // namespace pik
+
+#endif  // SIMD_ENABLE & SIMD_ARM8
+#endif  // PIK_SIMD_ARM64_NEON_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/compiler_specific.h b/codec/L2/demos/pikEnc/host/pik/simd/compiler_specific.h
new file mode 100755
index 0000000000..84521427ac
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/compiler_specific.h
@@ -0,0 +1,54 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_COMPILER_SPECIFIC_H_
+#define PIK_SIMD_COMPILER_SPECIFIC_H_
+
+// Compiler-specific includes and definitions.
+
+// SIMD_COMPILER expands to one of the following:
+#define SIMD_COMPILER_CLANG 1
+#define SIMD_COMPILER_GCC 2
+#define SIMD_COMPILER_MSVC 3
+
+#ifdef _MSC_VER
+#define SIMD_COMPILER SIMD_COMPILER_MSVC
+#elif defined(__clang__)
+#define SIMD_COMPILER SIMD_COMPILER_CLANG
+#elif defined(__GNUC__)
+#define SIMD_COMPILER SIMD_COMPILER_GCC
+#else
+#error "Unsupported compiler"
+#endif
+
+#if SIMD_COMPILER == SIMD_COMPILER_MSVC
+#include <intrin.h>
+
+#define SIMD_RESTRICT __restrict
+#define SIMD_INLINE __forceinline
+#define SIMD_NOINLINE __declspec(noinline)
+#define SIMD_LIKELY(expr) expr
+#define SIMD_TRAP __debugbreak
+#define SIMD_TARGET_ATTR(feature_str)
+#define SIMD_DIAGNOSTICS(tokens) __pragma(warning(tokens))
+#define SIMD_DIAGNOSTICS_OFF(msc, gcc) SIMD_DIAGNOSTICS(msc)
+
+#else
+
+#define SIMD_RESTRICT __restrict__
+#define SIMD_INLINE \
+  inline __attribute__((always_inline)) __attribute__((flatten))
+#define SIMD_NOINLINE inline __attribute__((noinline))
+#define SIMD_LIKELY(expr) __builtin_expect(!!(expr), 1)
+#define SIMD_TRAP __builtin_trap
+#define SIMD_TARGET_ATTR(feature_str) __attribute__((target(feature_str)))
+#define SIMD_PRAGMA(tokens) _Pragma(#tokens)
+#define SIMD_DIAGNOSTICS(tokens) SIMD_PRAGMA(GCC diagnostic tokens)
+#define SIMD_DIAGNOSTICS_OFF(msc, gcc) SIMD_DIAGNOSTICS(gcc)
+
+#endif
+
+#endif  // PIK_SIMD_COMPILER_SPECIFIC_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/foreach_target.h b/codec/L2/demos/pikEnc/host/pik/simd/foreach_target.h
new file mode 100755
index 0000000000..cdff53cbd7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/foreach_target.h
@@ -0,0 +1,42 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// Includes a specified file for every enabled SIMD_TARGET. This is used to
+// generate template instantiations to be called via runtime dispatche.
+
+#ifndef SIMD_ATTR_IMPL
+#error "Must set SIMD_ATTR_IMPL to name of include file"
+#endif
+
+#if SIMD_ENABLE & SIMD_AVX2
+#undef SIMD_TARGET
+#define SIMD_TARGET AVX2
+#include SIMD_ATTR_IMPL
+#endif
+
+#if SIMD_ENABLE & SIMD_SSE4
+#undef SIMD_TARGET
+#define SIMD_TARGET SSE4
+#include SIMD_ATTR_IMPL
+#endif
+
+#if SIMD_ENABLE & SIMD_PPC8
+#undef SIMD_TARGET
+#define SIMD_TARGET PPC8
+#include SIMD_ATTR_IMPL
+#endif
+
+#if SIMD_ENABLE & SIMD_ARM8
+#undef SIMD_TARGET
+#define SIMD_TARGET ARM8
+#include SIMD_ATTR_IMPL
+#endif
+
+#undef SIMD_TARGET
+#define SIMD_TARGET NONE
+#include SIMD_ATTR_IMPL
+
+#undef SIMD_ATTR_IMPL
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/instruction_matrix.pdf b/codec/L2/demos/pikEnc/host/pik/simd/instruction_matrix.pdf
new file mode 100755
index 0000000000..23608f3610
Binary files /dev/null and b/codec/L2/demos/pikEnc/host/pik/simd/instruction_matrix.pdf differ
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/quick_reference.md b/codec/L2/demos/pikEnc/host/pik/simd/quick_reference.md
new file mode 100755
index 0000000000..38df82dbfb
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/quick_reference.md
@@ -0,0 +1,443 @@
+# API synopsis / quick reference
+
+## Compiler support
+
+Some compilers require e.g. -mavx2 flags in order to use SIMD intrinsics.
+However, this enables AVX2 instructions in the entire translation unit, which
+may violate the one-definition rule and cause crashes. Instead, we use
+target-specific attribute annotations: any function using SIMD must be prefixed
+with `SIMD_ATTR`.
+
+## Preprocessor macros
+
+Let `Target` denote an instruction set: `NONE/SSE4/AVX2/AVX512/PPC8/ARM8`.
+
+*   `SIMD_Target=##` are powers of two uniquely identifying `Target`.
+*   `SIMD_ENABLE=##`, defined within `targets.h`, indicates for which Target(s)
+    to generate code. The compiler must support them.
+
+*   `SIMD_TARGET = Target` is the currently active instruction set; use this for
+    instantiating the `operator()` template called by `Dispatch`. `Target` is a
+    struct with a `value` member initialized to the `SIMD_Target` value.
+
+*   `SIMD_TARGET_VALUE == Target::value == SIMD_Target` enables preprocessor
+    `#if` based on target. Applications should only rarely need this; examples
+    include avoiding `shift_*_var` if `SIMD_TARGET_VALUE == SIMD_SSE4` and
+    avoiding shuffles etc. when `SIMD_TARGET_VALUE == SIMD_NONE`.
+
+*   `SIMD_NAMESPACE` is useful for defining target-specific functions called by
+    `operator()` specializations.
+
+*   `SIMD_ATTR` expands to `SIMD_ATTR_Target` and must be prefixed to any
+    inlined function declaration that (transitively) calls SIMD functions.
+
+*   `SIMD_FULL(T)` and `SIMD_PART(T, N)` expand to `Full<T, SIMD_TARGET>` and
+    `Part<T, N, SIMD_TARGET>`.
+
+## Vector types
+
+SIMD vectors consist of one or more 'lanes' of the same built-in type `T =
+uint##_t, int##_t, float or double` for `## = 8, 16, 32, 64`. The API includes
+three main families of data types:
+
+*   Full vector corresponding to a SIMD register with `N` lanes;
+*   Part of a vector with 2^j (<= `N`) contiguous lanes;
+*   Scalar with a single lane, useful for loop remainders or portable code.
+
+Full vector lane indices are in little-endian order: least-significant = lane 0.
+Due to platform differences, the lane order of parts is undefined. For technical
+reasons (see "Overloaded function API" in README.md), overloaded functions are
+selected using 'descriptors' (abbreviated as `D`) rather than the actual data
+types. For example, `setzero(Desc<T, N, Target>())` returns a `Desc<T, N,
+Target>::V`. Users typically define a `Desc` lvalue `d` using alias templates:
+
+*   `const Full<T, Target> d;` for a full vector;
+*   `const Part<T, N, Target> d;` for a part or full vector with `N` lanes;
+*   `const Scalar<T> d;` for scalars (or `Full<T, NONE>` or `Part<T, 1, NONE>`).
+
+Initializers such as `setzero(d)` return the correct data type and user code can
+rely on `auto` to avoid spelling out the data types. For output parameters or
+type checking (rather than auto), use `D::V`.
+
+## Operations
+
+Let `V` denote a vector/part/scalar. Operations limited to certain types have
+prefixes `V`: `u8/16` or `uif` for unsigned/signed/floating-point types.
+
+### Initialization
+
+*   `V setzero(D)`: returns vector/part/scalar with all bits set to zero.
+*   `V set1(D, T)`: returns vector/part/scalar with all lanes set to `T`.
+*   `V iota(D, T)`: returns vector/part/scalar with lanes `a[i] == T + i`.
+*   `V undefined(D)`: returns vector/part/scalar with uninitialized lanes.
+
+### Arithmetic
+
+*   `V operator+(V a, V b)`: returns `a[i] + b[i]` (mod 2^bits).
+*   `V operator-(V a, V b)`: returns `a[i] - b[i]` (mod 2^bits).
+*   `V`: `ui8/16` \
+    `V saturated_add(V a, V b)` returns `a[i] + b[i]` saturated to the
+    minimum/maximum representable value.
+*   `V`: `ui8/16` \
+    `V saturated_subtract(V a, V b)` returns `a[i] - b[i]` saturated to the
+    minimum/maximum representable value.
+*   `V`: `u8/16` \
+    `V average_round(V a, V b)` returns `(a[i] + b[i] + 1) / 2`.
+*   `V`: `i8/16/32` \
+    `V abs(V a)` returns the absolute value of `a[i]`; `LimitsMin()` maps to
+    `LimitsMax() + 1`.
+
+*   `V`: `ui16/32/64` \
+    `V shift_left<int>(V a)` returns `a[i] <<` a compile-time constant count.
+    Making it a template argument avoids constant-propagation issues with Clang
+    on ARM. ARM also requires the count be less than the lane size. This is the
+    fastest shift variant on x86.
+
+*   `V`: `u16/32/64`, `i16/32` \
+    `V shift_right<int>(V a)` returns `a[i] >>` a compile-time constant count.
+    Making it a template argument avoids constant-propagation issues with Clang
+    on ARM. ARM also requires the count be less than the lane size. This is the
+    fastest shift variant on x86. Inserts zero or sign bit(s) depending on `V`.
+
+*   `V`: `ui16/32/64` \
+    `V shift_left_same(V a, Count bits)` returns `a[i] << bits`, where `bits` is
+    returned from `set_shift_left_count(D, int)`.
+
+*   `V`: `u16/32/64`, `i16/32` \
+    `V shift_right_same(V a, Count bits)` returns `a[i] >> bits`, where `bits`
+    is returned from `set_shift_right_count(D, int)`. Inserts 0 or sign bit(s).
+
+*   `V`: `ui32/64` \
+    `V operator<<(V a, V b)` returns `a[i] << b[i]`, which is zero when the
+    shift count `b[i] >= sizeof(T)*8`. Not supported by SSE4, but more efficient
+    than the `shift_*_same` functions on AVX2+.
+
+*   `V`: `u32/64`, `i32` \
+    `V operator>>(V a, V b)` returns `a[i] >> b[i]`, which is zero when the
+    shift count `b[i] >= sizeof(T)*8`. Not supported by SSE4, but more efficient
+    than the `shift_*_same` functions on AVX2+. Inserts zero or sign bit(s).
+
+*   `V`: `ui8/16/32`, `f` \
+    `V min(V a, V b)`: returns `min(a[i], b[i])`.
+
+*   `V`: `ui8/16/32`, `f` \
+    `V max(V a, V b)`: returns `max(a[i], b[i])`.
+
+*   `V`: `ui8/16/32`, `f` \
+    `V clamp(V a, V lo, V hi)`: returns `a[i]` clamped to `[lo[i], hi[i]]`.
+
+*   `V`: `ui16/32` \
+    `V operator*(V a, V b)`: returns the lower half of `a[i] * b[i]` in each
+    lane.
+
+*   `V`: `f` \
+    `V operator*(V a, V b)`: returns `a[i] * b[i]` in each lane.
+
+*   `V`: `f` \
+    `V operator/(V a, V b)`: returns `a[i] / b[i]` in each lane.
+
+*   `V`: `i16` \
+    `V ext::mul_high(V a, V b)`: returns the upper half of `a[i] * b[i]` in each
+    lane.
+
+*   `V`: `i16` \
+    `V mul_high_round(V a, V b)`: returns `(((a[i] * b[i]) >> 14) + 1) >> 1`.
+
+*   `V`: `ui32` \
+    `V mul_even(V a, V b)`: returns double-wide result of `a[i] * b[i]` for
+    every even `i`, in lanes `i` (lower) and `i + 1` (upper).
+
+*   `V`: `f` \
+    `V mul_add(V a, V b, V c)`: returns `a[i] * b[i] + c[i]`.
+
+*   `V`: `f` \
+    `V nmul_add(V a, V b, V c)`: returns `-a[i] * b[i] + c[i]`.
+
+*   `V`: `f` \
+    `V fadd(V x, V k1, V add)`: returns `x[i] + add[i]` (k1 must equal 1.0).
+    Clobbers the register holding x due to 3-address encoding.
+
+*   `V`: `f` \
+    `V fsub(V x, V k1, V sub)`: returns `x[i] - sub[i]` (k1 must equal 1.0).
+    Clobbers the register holding x due to 3-address encoding.
+
+*   `V`: `f` \
+    `V fnadd(V sub, V k1, V x)`: returns `x[i] - sub[i]` (k1 must equal
+    1.0). Clobbers the register holding sub due to 3-address encoding.
+
+*   `V`: `f` \
+    `V ext::mul_subtract(V a, V b, V c)`: returns `a[i] * b[i] - c[i]`.
+
+*   `V`: `f` \
+    `V ext::nmul_subtract(V a, V b, V c)`: returns `-a[i] * b[i] - c[i]`.
+
+*   `V`: `f` \
+    `V sqrt(V a)`: returns `sqrt(a[i])`.
+
+*   `V`: `f32` \
+    `V approximate_reciprocal_sqrt(V a)`: returns an approximation of `1.0 /
+    sqrt(a[i])`. `sqrt(a) ~= approximate_reciprocal_sqrt(a) * a`. x86 and PPC
+    provide 12-bit approximations but the error on ARM may be closer to 1%.
+
+*   `V`: `f32` \
+    `V approximate_reciprocal(V a)`: returns an approximation of `1.0 / a[i]`.
+
+*   `V`: `f` \
+    `V round(V a)`: returns `a[i]` rounded towards the nearest integer, with
+    ties to even.
+
+*   `V`: `f` \
+    `V trunc(V a)`: returns `a[i]` rounded towards zero (truncate).
+
+*   `V`: `f` \
+    `V ceil(V a)`: returns `a[i]` rounded towards positive infinity (ceiling).
+
+*   `V`: `f` \
+    `V floor(V a)`: returns `a[i]` rounded towards negative infinity.
+
+### Comparisons
+
+These set a lane to 1-bits if the condition is true, otherwise all zero.
+
+*   `V operator==(V a, V b)`: returns `a[i] == b[i]`.
+*   `V`: `if` \
+    `V operator<(V a, V b)`: returns `a[i] < b[i]`.
+*   `V`: `if` \
+    `V operator>(V a, V b)`: returns `a[i] > b[i]`.
+*   `V`: `f` \
+    `V operator<=(V a, V b)`: returns `a[i] <= b[i]`.
+*   `V`: `f` \
+    `V operator>=(V a, V b)`: returns `a[i] >= b[i]`.
+
+### Logical
+
+These operate on individual bits, even for floating-point vector types.
+
+*   `V operator&(V a, V b)`: returns `a[i] & b[i]`.
+*   `V andnot(V a, V b)`: returns `~a[i] & b[i]`.
+*   `V operator|(V a, V b)`: returns `a[i] | b[i]`.
+*   `V operator^(V a, V b)`: returns `a[i] ^ b[i]`.
+*   `V`: `f` \
+    `V condition_from_sign(V v)`: returns `s` such that `select(a, b, s)` is
+    equivalent to `v.sign_bit ? b : a`. This is a no-op on x86.
+*   `V select(V a, V b, V mask)`: returns `mask[i] ? b[i] : a[i]`. **Note**:
+    each `mask[i]` must be all zero or all 1-bits, or returned from
+    `condition_from_sign`.
+*   `V odd_even(V a, V b)`: returns a vector whose odd lanes are taken from `a`
+    and the even lanes from `b`.
+
+### Memory
+
+Memory operands are little-endian, otherwise their order would depend on the
+lane configuration. Pointers are the addresses of `N` consecutive `T` values,
+either naturally-aligned (`aligned`) or possibly unaligned (`p`).
+
+*   `D::V load(D, const D::T* aligned)`: returns `aligned[i]`. **Note**: the
+    lane order of parts is undefined; use `broadcast_part` to get a full vector.
+*   `D::V load_unaligned(D, const D::T* p)`: returns `p[i]`.
+*   `D::V load_dup128(D, const D::T* p)`: returns one 128-bit block loaded from
+    `p` and broadcasted into all 128-bit block\[s\]. This enables a `convert_to`
+    overload that avoids a 3-cycle overhead on AVX2/AVX-512. This is faster than
+    broadcasting single values and useful for specifying constants without
+    having to know the (maximum) vector length.
+*   `void store(D::V a, D, D::T* aligned)`: copies `a[i]` into `aligned[i]`.
+*   `void store_unaligned(D::V a, D, D::T* p)`: copies `a[i]` into `p[i]`.
+*   `void stream(D::V a, D, const D::T* aligned)`: copies `a[i]` into
+    `aligned[i]` with non-temporal hint on x86 (for good performance, call for
+    all consecutive vectors within the same cache line).
+*   `V`,`VI`: (`uif32,i32`), (`uif64,i64`) \
+    `D::V gather_offset(D, const D::T* base, VI offsets)`. Returns elements of
+    base selected by signed/possibly repeated *byte* `offsets[i]`.
+*   `V`,`VI`: (`uif32,i32`), (`uif64,i64`) \
+    `D::V gather_index(D, const D::T* base, VI indices)`. Returns vector of
+    `base[indices[i]]`. Indices are signed and need not be unique.
+*   `T`: `u32/64` \
+    `void stream(T, T* aligned)`: copies `T` into `*aligned` with non-temporal
+    hint on x86.
+
+*   `void load_fence()`: delays subsequent loads until prior loads are visible.
+    Also a full fence on Intel CPUs. No effect on non-x86.
+
+*   `void store_fence()`: ensures previous non-temporal stores are visible. No
+    effect on non-x86.
+
+*   `void flush_cacheline(const void* p)`: invalidates and flushes the cache
+    line containing "p". No effect on non-x86.
+
+*   `void prefetch(const T* p)`: begins loading the cache line containing "p".
+
+### Type conversion
+
+*   `D::V cast_to(D, V)`: returns the bits of `V` reinterpreted as type `D::V`.
+
+*   `V`,`D`: (`u8,i16`), (`u8,i32`), (`u16,i32`), (`i8,i16`), (`i8,i32`),
+    (`i16,i32`), (`f32,f64`) \
+    `D::V convert_to(D, V part)`: returns `part[i]` zero- or sign-extended to
+    the wider `D::T` type.
+
+*   `V`,`D`: (`u8,u32`) \
+    `D::V u32_from_u8(V)`: special-case `u8` to `u32` conversion when all blocks
+    of `V` are identical, e.g. from `load_dup128`.
+
+*   `V`,`D`: (`u32,u8`) \
+    `D::V u8_from_u32(V)`: special-case `u32` to `u8` conversion when all lanes
+    of `V` are already clamped to `[0, 256)`.
+
+*   `V`,`D`: (`i16,i8`), (`i32,i8`), (`i32,i16`), (`i16,u8`), (`i32,u8`),
+    (`i32,u16`) \
+    `D::V convert_to(D, V a)`: returns `a[i]` after packing with signed/unsigned
+    saturation, i.e. a vector part with narrower lane type `D::T`.
+
+*   `V`,`D`: (`i32`,`f32`) \
+    `D::V convert_to(D, V)`: converts an int32_t value to float.
+
+*   `V`,`D`: (`f32`,`i32`) \
+    `D::V convert_to(D, V)`: rounds float towards zero and converts the value to
+    int32_t.
+
+*   `V`: `f32`; `Ret`: `i32` \
+    `Ret nearest_int(V a)`: returns the integer nearest to `a[i]`.
+
+### Parts
+
+The part abstraction is necessary because the preferred lane to get/set differs
+depending on platform.
+
+*   `D::N == 1` \
+    `D::V set_part(D, D::T)`: returns a part containing the single value `T` in
+    an unspecified lane.
+
+*   `D::V any_part(D, V)`: returns a contiguous part of the full vector `V`.
+    **Note**: returns either the least- or most-significant bits depending on
+    platform; use `broadcast_part` to obtain a full vector.
+
+*   `V2 get_half(Upper/Lower, V)`: returns upper or lower half of the full
+    vector `V`. `SIMD_HALF` evaluates to an instance of Upper or Lower
+    (whichever is more efficient) that can be passed as the first argument. When
+    a specific half is needed, `V2 upper_half(V)` and `V2 lower_half(V)` are
+    more convenient alternatives.
+
+*   `D::V broadcast_part<int i>(D, V)`: returns a full vector with the `i`-th
+    element broadcasted. The interpretation of `i < N` is platform-dependent.
+    For `V` from `load(Part<T, N>(), p)`, `i` is the index into `p[]`; for `V`
+    from `set_part`, `N == 1` and thus `i == 0`.
+
+*   `D::N == 1` \
+    `D::T get_part(D, V)`: returns the single value stored within `V`. This is
+    also useful for extracting `sum_of_lanes` results.
+
+### Swizzle
+
+**Note**: if vectors are larger than 128 bits, the following operations split
+their operands into independently processed 128-bit *blocks*.
+
+*   `V`: `ui16/32/64`, `f` \
+    `V broadcast<int i>(V)`: returns individual *blocks*, each with lanes set to
+    `input_block[i]`, `i = [0, 16/sizeof(T))`.
+
+*   `Ret`: double-width `u/i`; `V`: `u8/16/32`, `i8/16/32` \
+    `Ret zip_lo(V a, V b)`: returns the same bits as interleave_lo, except that
+    `Ret` is a vector with double-width lanes (required in order to use this
+    operation with `scalar`).
+
+*   `Ret`: double-width u/i; `V`: `u8/16/32`, `i8/16/32` \
+    `Ret zip_hi(V a, V b)`: returns the same bits as interleave_hi, except that
+    `Ret` is a vector with double-width lanes (required in order to use this
+    operation with `scalar`).
+
+**Note**: the following are only available for full vectors (`N > 1, Target !=
+NONE`), and split their operands into independently processed 128-bit *blocks*:
+
+*   `Ret`: half-sized vector part \
+    `Ret other_half(V v)`: returns the other half-sized vector part, i.e. the
+    part not returned by `any_part(Desc<T, N / 2>, V)`.
+
+*   `V`: `ui` \
+    `V shift_left_bytes<int>(V)`: returns the result of shifting independent
+    *blocks* left by `int` bytes \[1, 15\].
+
+*   `V`: `ui` \
+    `V shift_left_lanes<int>(V)`: returns the result of shifting independent
+    *blocks* left by `int` lanes \[1, 15\].
+
+*   `V`: `ui` \
+    `V shift_right_bytes<int>(V)`: returns the result of shifting independent
+    *blocks* right by `int` bytes \[1, 15\].
+
+*   `V`: `ui` \
+    `V shift_right_lanes<int>(V)`: returns the result of shifting independent
+    *blocks* right by `int` lanes \[1, 15\].
+
+*   `V`: `ui` \
+    `V combine_shift_right_bytes<int>(V hi, V lo)`: returns the result of
+    shifting two concatenated *blocks* `hi[i] || lo[i]` right by `int` bytes
+    \[1, 15\].
+
+*   `V`: `ui`; `VI`: `ui` \
+    `V table_lookup_bytes(V bytes, VI from)`: returns *blocks* with
+    `bytes[from[i]]`, or zero if `from[i] >= 0x80`.
+
+*   `V`: `uif32` \
+    `V shuffle_1032(V)`: returns *blocks* with 64-bit halves swapped.
+
+*   `V`: `uif64` \
+    `V shuffle_01(V)`: returns *blocks* with 64-bit halves swapped.
+
+*   `V`: `uif32` \
+    `V shuffle_0321(V)`: returns *blocks* rotated right (toward the lower end)
+    by 32 bits.
+
+*   `V`: `uif32` \
+    `V shuffle_2103(V)`: returns *blocks* rotated left (toward the upper end) by
+    32 bits.
+
+*   `V`: `uif32` \
+    `V shuffle_0123(V)`: returns *blocks* with lanes in reverse order.
+
+*   `V interleave_lo(V a, V b)`: returns *blocks* with alternating lanes from
+    the lower halves of `a` and `b` (`a[0]` in the least-significant lane).
+
+*   `V interleave_hi(V a, V b)`: returns *blocks* with alternating lanes from
+    the upper halves of `a` and `b` (`a[N/2]` in the least-significant lane).
+
+**Note**: the following operations cross block boundaries, which is typically
+more expensive on AVX2/AVX-512 than within-block operations.
+
+*   `V concat_lo_lo(V hi, V lo)`: returns the concatenation of the lower halves
+    of `hi` and `lo` without splitting into blocks.
+*   `V concat_hi_hi(V hi, V lo)`: returns the concatenation of the upper halves
+    of `hi` and `lo` without splitting into blocks.
+*   `V concat_lo_hi(V hi, V lo)`: returns the inner half of the concatenation of
+    `hi` and `lo` without splitting into blocks. Useful for swapping the two
+    blocks in 256-bit vectors.
+*   `V concat_hi_lo(V hi, V lo)`: returns the outer quarters of the
+    concatenation of `hi` and `lo` without splitting into blocks. Unlike the
+    other variants, this does not incur a block-crossing penalty on AVX2.
+
+*   `V`: `uif32` \
+    `V table_lookup_lanes(V a, VI)` returns a vector of `a[indices[i]]`,
+    where `VI` is returned from `set_lane_indices(D, &indices[0])`.
+
+*   `VI set_lane_indices(D, int* idx)` prepares for `table_lookup_lanes`
+    with lane indices `idx = [0, d.N)` (need not be unique).
+
+### Misc
+
+*   `void pause()`: call during spin loops to reduce power consumption.
+
+**Note**: the following are only available for full vectors (`N > 1, Target !=
+NONE`):
+
+*   `V`: `u8`, `f` \
+    `uint32_t ext::movemask(V a)`: returns sum of `upper_bit(a[i]) << i`.
+
+*   `V`: `ui` \
+    `bool ext::all_zero(V a)`: returns whether all lanes are zero.
+
+*   `V`: `u8`; `Ret`: `u64` \
+    `Ret ext::sums_of_u8x8(V)`: returns the sums of 8 consecutive bytes in each
+    64-bit lane.
+
+*   `V`: `uif32/64` \
+    `V ext::sum_of_lanes(V v)`: returns the sum of all lanes in each lane; to
+    obtain the result, use `get(D, horz_sum_result)`.
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/scalar.h b/codec/L2/demos/pikEnc/host/pik/simd/scalar.h
new file mode 100755
index 0000000000..a6f1944fdf
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/scalar.h
@@ -0,0 +1,874 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_SCALAR_H_
+#define PIK_SIMD_SCALAR_H_
+
+// Single-element vectors and operations.
+
+#include "pik/simd/compiler_specific.h"
+#include "pik/simd/shared.h"
+#include "pik/simd/targets.h"
+#include "pik/simd/util.h"
+
+namespace pik {
+
+// Never override NONE.
+template <>
+struct PartTargetT<1, NONE> {
+  using type = NONE;
+};
+
+// Shorthand for a scalar; note that scalar<T> is the actual data class.
+template <typename T>
+using Scalar = Desc<T, 1, NONE>;
+
+// Returned by set_shift_*_count; do not use directly.
+struct scalar_shift_left_count {
+  int count;
+};
+struct scalar_shift_right_count {
+  int count;
+};
+
+// (Wrapper class required for overloading comparison operators.)
+template <typename T>
+struct scalar {
+  SIMD_INLINE scalar() {}
+  scalar(const scalar&) = default;
+  scalar& operator=(const scalar&) = default;
+  SIMD_INLINE explicit scalar(const T t) : raw(t) {}
+
+  SIMD_INLINE scalar& operator*=(const scalar other) {
+    return *this = (*this * other);
+  }
+  SIMD_INLINE scalar& operator/=(const scalar other) {
+    return *this = (*this / other);
+  }
+  SIMD_INLINE scalar& operator+=(const scalar other) {
+    return *this = (*this + other);
+  }
+  SIMD_INLINE scalar& operator-=(const scalar other) {
+    return *this = (*this - other);
+  }
+  SIMD_INLINE scalar& operator&=(const scalar other) {
+    return *this = (*this & other);
+  }
+  SIMD_INLINE scalar& operator|=(const scalar other) {
+    return *this = (*this | other);
+  }
+  SIMD_INLINE scalar& operator^=(const scalar other) {
+    return *this = (*this ^ other);
+  }
+
+  T raw;
+};
+
+template <typename T>
+struct VecT<T, 1, NONE> {
+  using type = scalar<T>;
+};
+
+// ------------------------------ Cast
+
+template <typename T, typename FromT>
+SIMD_INLINE scalar<T> cast_to(Scalar<T>, scalar<FromT> v) {
+  static_assert(sizeof(T) <= sizeof(FromT), "Promoting is undefined");
+  T to;
+  CopyBytes<sizeof(FromT)>(&v.raw, &to);
+  return scalar<T>(to);
+}
+
+// ------------------------------ Set
+
+template <typename T>
+SIMD_INLINE scalar<T> setzero(Scalar<T>) {
+  return scalar<T>(T(0));
+}
+
+template <typename T, typename T2>
+SIMD_INLINE scalar<T> set1(Scalar<T>, const T2 t) {
+  return scalar<T>(t);
+}
+
+template <typename T, typename T2>
+SIMD_INLINE scalar<T> iota(Scalar<T>, const T2 first) {
+  return scalar<T>(first);
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> undefined(Scalar<T>) {
+  return scalar<T>(0);
+}
+
+// ================================================== SHIFTS
+
+// ------------------------------ Shift lanes by constant #bits
+
+template <int kBits, typename T>
+SIMD_INLINE scalar<T> shift_left(const scalar<T> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+  return scalar<T>(v.raw << kBits);
+}
+
+template <int kBits, typename T>
+SIMD_INLINE scalar<T> shift_right(const scalar<T> v) {
+  static_assert(0 <= kBits && kBits < sizeof(T) * 8, "Invalid shift");
+  return scalar<T>(v.raw >> kBits);
+}
+
+// ------------------------------ Shift lanes by same variable #bits
+
+template <typename T>
+SIMD_INLINE scalar_shift_left_count set_shift_left_count(Scalar<T>,
+                                                         const int bits) {
+  return scalar_shift_left_count{bits};
+}
+
+template <typename T>
+SIMD_INLINE scalar_shift_right_count set_shift_right_count(Scalar<T>,
+                                                           const int bits) {
+  return scalar_shift_right_count{bits};
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> shift_left_same(const scalar<T> v,
+                                      const scalar_shift_left_count bits) {
+  return scalar<T>(v.raw << bits.count);
+}
+template <typename T>
+SIMD_INLINE scalar<T> shift_right_same(const scalar<T> v,
+                                       const scalar_shift_right_count bits) {
+  return scalar<T>(v.raw >> bits.count);
+}
+
+// ------------------------------ Shift lanes by independent variable #bits
+
+// Single-lane => same as above except for the argument type.
+template <typename T>
+SIMD_INLINE scalar<T> operator<<(const scalar<T> v, const scalar<T> bits) {
+  return scalar<T>(v.raw << bits.raw);
+}
+template <typename T>
+SIMD_INLINE scalar<T> operator>>(const scalar<T> v, const scalar<T> bits) {
+  return scalar<T>(v.raw >> bits.raw);
+}
+
+// ================================================== LOGICAL
+
+template <typename Bits>
+struct BitwiseOp {
+  template <typename T, class Op>
+  scalar<T> operator()(const scalar<T> a, const scalar<T> b,
+                       const Op& op) const {
+    static_assert(sizeof(T) == sizeof(Bits), "Float/int size mismatch");
+    Bits ia, ib;
+    CopyBytes<sizeof(Bits)>(&a, &ia);
+    CopyBytes<sizeof(Bits)>(&b, &ib);
+    ia = op(ia, ib);
+    T ret;
+    CopyBytes<sizeof(Bits)>(&ia, &ret);
+    return scalar<T>(ret);
+  }
+};
+
+// ------------------------------ Bitwise AND
+
+template <typename T>
+SIMD_INLINE scalar<T> operator&(const scalar<T> a, const scalar<T> b) {
+  return scalar<T>(a.raw & b.raw);
+}
+template <>
+SIMD_INLINE scalar<float> operator&(const scalar<float> a,
+                                    const scalar<float> b) {
+  return BitwiseOp<int32_t>()(a, b, [](int32_t i, int32_t j) { return i & j; });
+}
+template <>
+SIMD_INLINE scalar<double> operator&(const scalar<double> a,
+                                     const scalar<double> b) {
+  return BitwiseOp<int64_t>()(a, b, [](int64_t i, int64_t j) { return i & j; });
+}
+
+// ------------------------------ Bitwise AND-NOT
+
+// Returns ~a & b.
+template <typename T>
+SIMD_INLINE scalar<T> andnot(const scalar<T> a, const scalar<T> b) {
+  return scalar<T>(~a.raw & b.raw);
+}
+template <>
+SIMD_INLINE scalar<float> andnot(const scalar<float> a, const scalar<float> b) {
+  return BitwiseOp<int32_t>()(a, b,
+                              [](int32_t i, int32_t j) { return ~i & j; });
+}
+template <>
+SIMD_INLINE scalar<double> andnot(const scalar<double> a,
+                                  const scalar<double> b) {
+  return BitwiseOp<int64_t>()(a, b,
+                              [](int64_t i, int64_t j) { return ~i & j; });
+}
+
+// ------------------------------ Bitwise OR
+
+template <typename T>
+SIMD_INLINE scalar<T> operator|(const scalar<T> a, const scalar<T> b) {
+  return scalar<T>(a.raw | b.raw);
+}
+template <>
+SIMD_INLINE scalar<float> operator|(const scalar<float> a,
+                                    const scalar<float> b) {
+  return BitwiseOp<int32_t>()(a, b, [](int32_t i, int32_t j) { return i | j; });
+}
+template <>
+SIMD_INLINE scalar<double> operator|(const scalar<double> a,
+                                     const scalar<double> b) {
+  return BitwiseOp<int64_t>()(a, b, [](int64_t i, int64_t j) { return i | j; });
+}
+
+// ------------------------------ Bitwise XOR
+
+template <typename T>
+SIMD_INLINE scalar<T> operator^(const scalar<T> a, const scalar<T> b) {
+  return scalar<T>(a.raw ^ b.raw);
+}
+template <>
+SIMD_INLINE scalar<float> operator^(const scalar<float> a,
+                                    const scalar<float> b) {
+  return BitwiseOp<int32_t>()(a, b, [](int32_t i, int32_t j) { return i ^ j; });
+}
+template <>
+SIMD_INLINE scalar<double> operator^(const scalar<double> a,
+                                     const scalar<double> b) {
+  return BitwiseOp<int64_t>()(a, b, [](int64_t i, int64_t j) { return i ^ j; });
+}
+
+// ------------------------------ Select/blend
+
+// Returns a mask for use by select().
+SIMD_INLINE scalar<float> condition_from_sign(const scalar<float> v) {
+  const Scalar<float> df;
+  const Scalar<int32_t> di;
+  return cast_to(df, shift_right<31>(cast_to(di, v)));
+}
+SIMD_INLINE scalar<double> condition_from_sign(const scalar<double> v) {
+  const Scalar<double> df;
+  const Scalar<int64_t> di;
+  return cast_to(df, shift_right<63>(cast_to(di, v)));
+}
+
+// Returns mask ? b : a. "mask" must either have been returned by
+// selector_from_mask, or callers must ensure its lanes are T(0) or ~T(0).
+template <typename T>
+SIMD_INLINE scalar<T> select(const scalar<T> a, const scalar<T> b,
+                             const scalar<T> mask) {
+  return (mask & b) | andnot(mask, a);
+}
+
+// ================================================== ARITHMETIC
+
+template <typename T>
+SIMD_INLINE scalar<T> operator+(const scalar<T> a, const scalar<T> b) {
+  const uint64_t a64 = static_cast<int64_t>(a.raw);
+  const uint64_t b64 = static_cast<int64_t>(b.raw);
+  return scalar<T>((a64 + b64) & ~T(0));
+}
+SIMD_INLINE scalar<float> operator+(const scalar<float> a,
+                                    const scalar<float> b) {
+  return scalar<float>(a.raw + b.raw);
+}
+SIMD_INLINE scalar<double> operator+(const scalar<double> a,
+                                     const scalar<double> b) {
+  return scalar<double>(a.raw + b.raw);
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> operator-(const scalar<T> a, const scalar<T> b) {
+  const uint64_t a64 = static_cast<int64_t>(a.raw);
+  const uint64_t b64 = static_cast<int64_t>(b.raw);
+  return scalar<T>((a64 - b64) & ~T(0));
+}
+SIMD_INLINE scalar<float> operator-(const scalar<float> a,
+                                    const scalar<float> b) {
+  return scalar<float>(a.raw - b.raw);
+}
+SIMD_INLINE scalar<double> operator-(const scalar<double> a,
+                                     const scalar<double> b) {
+  return scalar<double>(a.raw - b.raw);
+}
+
+// ------------------------------ Saturating addition
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+SIMD_INLINE scalar<uint8_t> saturated_add(const scalar<uint8_t> a,
+                                          const scalar<uint8_t> b) {
+  return scalar<uint8_t>(SIMD_MIN(SIMD_MAX(0, a.raw + b.raw), 255));
+}
+SIMD_INLINE scalar<uint16_t> saturated_add(const scalar<uint16_t> a,
+                                           const scalar<uint16_t> b) {
+  return scalar<uint16_t>(SIMD_MIN(SIMD_MAX(0, a.raw + b.raw), 65535));
+}
+
+// Signed
+SIMD_INLINE scalar<int8_t> saturated_add(const scalar<int8_t> a,
+                                         const scalar<int8_t> b) {
+  return scalar<int8_t>(SIMD_MIN(SIMD_MAX(-128, a.raw + b.raw), 127));
+}
+SIMD_INLINE scalar<int16_t> saturated_add(const scalar<int16_t> a,
+                                          const scalar<int16_t> b) {
+  return scalar<int16_t>(SIMD_MIN(SIMD_MAX(-32768, a.raw + b.raw), 32767));
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+SIMD_INLINE scalar<uint8_t> saturated_subtract(const scalar<uint8_t> a,
+                                               const scalar<uint8_t> b) {
+  return scalar<uint8_t>(SIMD_MIN(SIMD_MAX(0, a.raw - b.raw), 255));
+}
+SIMD_INLINE scalar<uint16_t> saturated_subtract(const scalar<uint16_t> a,
+                                                const scalar<uint16_t> b) {
+  return scalar<uint16_t>(SIMD_MIN(SIMD_MAX(0, a.raw - b.raw), 65535));
+}
+
+// Signed
+SIMD_INLINE scalar<int8_t> saturated_subtract(const scalar<int8_t> a,
+                                              const scalar<int8_t> b) {
+  return scalar<int8_t>(SIMD_MIN(SIMD_MAX(-128, a.raw - b.raw), 127));
+}
+SIMD_INLINE scalar<int16_t> saturated_subtract(const scalar<int16_t> a,
+                                               const scalar<int16_t> b) {
+  return scalar<int16_t>(SIMD_MIN(SIMD_MAX(-32768, a.raw - b.raw), 32767));
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+SIMD_INLINE scalar<uint8_t> average_round(const scalar<uint8_t> a,
+                                          const scalar<uint8_t> b) {
+  return scalar<uint8_t>((a.raw + b.raw + 1) / 2);
+}
+SIMD_INLINE scalar<uint16_t> average_round(const scalar<uint16_t> a,
+                                           const scalar<uint16_t> b) {
+  return scalar<uint16_t>((a.raw + b.raw + 1) / 2);
+}
+
+// ------------------------------ Absolute value
+
+template <typename T>
+SIMD_INLINE scalar<T> abs(const scalar<T> a) {
+  const T i = a.raw;
+  return (i >= 0 || i == LimitsMin<T>()) ? a : scalar<T>(-i);
+}
+
+// ------------------------------ min/max
+
+template <typename T>
+SIMD_INLINE scalar<T> min(const scalar<T> a, const scalar<T> b) {
+  return scalar<T>(SIMD_MIN(a.raw, b.raw));
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> max(const scalar<T> a, const scalar<T> b) {
+  return scalar<T>(SIMD_MAX(a.raw, b.raw));
+}
+
+// Returns the closest value to v within [lo, hi].
+template <typename T>
+SIMD_INLINE scalar<T> clamp(const scalar<T> v, const scalar<T> lo,
+                            const scalar<T> hi) {
+  return min(max(lo, v), hi);
+}
+
+// ------------------------------ Floating-point negate
+
+SIMD_INLINE scalar<float> neg(const scalar<float> v) {
+  const Scalar<float> df;
+  const Scalar<uint32_t> du;
+  const auto sign = cast_to(df, set1(du, 0x80000000u));
+  return v ^ sign;
+}
+
+SIMD_INLINE scalar<double> neg(const scalar<double> v) {
+  const Scalar<double> df;
+  const Scalar<uint64_t> du;
+  const auto sign = cast_to(df, set1(du, 0x8000000000000000ull));
+  return v ^ sign;
+}
+
+// ------------------------------ mul/div
+
+template <typename T>
+SIMD_INLINE scalar<T> operator*(const scalar<T> a, const scalar<T> b) {
+  if (IsFloat<T>()) {
+    return scalar<T>(static_cast<T>(double(a.raw) * b.raw));
+  } else if (IsSigned<T>()) {
+    return scalar<T>(static_cast<T>(int64_t(a.raw) * b.raw));
+  } else {
+    return scalar<T>(static_cast<T>(uint64_t(a.raw) * b.raw));
+  }
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> operator/(const scalar<T> a, const scalar<T> b) {
+  return scalar<T>(a.raw / b.raw);
+}
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+// Returns the upper 16 bits of a * b in each lane.
+SIMD_INLINE scalar<int16_t> mul_high(const scalar<int16_t> a,
+                                     const scalar<int16_t> b) {
+  return scalar<int16_t>((a.raw * b.raw) >> 16);
+}
+
+}  // namespace ext
+
+// Returns (((a * b) >> 14) + 1) >> 1.
+SIMD_INLINE scalar<int16_t> mul_high_round(const scalar<int16_t> a,
+                                           const scalar<int16_t> b) {
+  const int rounded = ((a.raw * b.raw) + (1 << 14)) >> 15;
+  const int clamped = SIMD_MIN(SIMD_MAX(-32768, rounded), 32767);
+  return scalar<int16_t>(clamped);
+}
+
+// Multiplies even lanes (0, 2 ..) and returns the double-wide result.
+SIMD_INLINE scalar<int64_t> mul_even(const scalar<int32_t> a,
+                                   const scalar<int32_t> b) {
+  const int64_t a64 = a.raw;
+  return scalar<int64_t>(a64 * b.raw);
+}
+SIMD_INLINE scalar<uint64_t> mul_even(const scalar<uint32_t> a,
+                                    const scalar<uint32_t> b) {
+  const uint64_t a64 = a.raw;
+  return scalar<uint64_t>(a64 * b.raw);
+}
+
+// Approximate reciprocal
+SIMD_INLINE scalar<float> approximate_reciprocal(const scalar<float> v) {
+  return scalar<float>(1.0f / v.raw);
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+template <typename T>
+SIMD_INLINE scalar<T> mul_add(const scalar<T> mul, const scalar<T> x,
+                            const scalar<T> add) {
+  return mul * x + add;
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> nmul_add(const scalar<T> mul, const scalar<T> x,
+                               const scalar<T> add) {
+  return add - mul * x;
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> fadd(const scalar<T> x, const scalar<T> k1,
+                           const scalar<T> add) {
+  return x + add;
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> fsub(const scalar<T> x, const scalar<T> k1,
+                           const scalar<T> sub) {
+  return x - sub;
+}
+
+// (parameter order swapped)
+template <typename T>
+SIMD_INLINE scalar<T> fnadd(const scalar<T> sub, const scalar<T> k1,
+                                   const scalar<T> x) {
+  return x - sub;
+}
+
+// Slightly more expensive on ARM (extra negate)
+namespace ext {
+
+template <typename T>
+SIMD_INLINE scalar<T> mul_subtract(const scalar<T> mul, const scalar<T> x,
+                                   const scalar<T> sub) {
+  return mul * x - sub;
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> nmul_subtract(const scalar<T> mul, const scalar<T> x,
+                                    const scalar<T> sub) {
+  return neg(mul) * x - sub;
+}
+
+}  // namespace ext
+
+// ------------------------------ Floating-point square root
+
+// Approximate reciprocal square root
+SIMD_INLINE scalar<float> approximate_reciprocal_sqrt(const scalar<float> v) {
+  float f = v.raw;
+  const float half = f * 0.5f;
+  uint32_t bits;
+  CopyBytes<4>(&f, &bits);
+  // Initial guess based on log2(f)
+  bits = 0x5F3759DF - (bits >> 1);
+  CopyBytes<4>(&bits, &f);
+  // One Newton-Raphson iteration
+  return scalar<float>(f * (1.5f - (half * f * f)));
+}
+
+// Square root
+SIMD_INLINE scalar<float> sqrt(const scalar<float> v) {
+  return approximate_reciprocal_sqrt(v) * v;
+}
+SIMD_INLINE scalar<double> sqrt(const scalar<double> v) {
+  return scalar<double>(sqrt(scalar<float>(v.raw)).raw);
+}
+
+// ------------------------------ Floating-point rounding
+
+// Approximation of round-to-nearest for numbers representable as integers.
+SIMD_INLINE scalar<float> round(const scalar<float> v) {
+  const float bias = v.raw < 0.0f ? -0.5f : 0.5f;
+  return scalar<float>(static_cast<int32_t>(v.raw + bias));
+}
+SIMD_INLINE scalar<double> round(const scalar<double> v) {
+  const double bias = v.raw < 0.0 ? -0.5 : 0.5;
+  return scalar<double>(static_cast<int64_t>(v.raw + bias));
+}
+
+SIMD_INLINE scalar<float> trunc(const scalar<float> v) {
+  return scalar<float>(static_cast<int32_t>(v.raw));
+}
+SIMD_INLINE scalar<double> trunc(const scalar<double> v) {
+  return scalar<double>(static_cast<int64_t>(v.raw));
+}
+
+template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
+          class V>
+V Ceiling(const V v) {
+  const Bits kExponentMask = (1ull << kExponentBits) - 1;
+  const Bits kMantissaMask = (1ull << kMantissaBits) - 1;
+  const Bits kBias = kExponentMask / 2;
+
+  Float f = v.raw;
+  const bool positive = f > 0.0f;
+
+  Bits bits;
+  CopyBytes<sizeof(Bits)>(&v, &bits);
+
+  const int exponent = ((bits >> kMantissaBits) & kExponentMask) - kBias;
+  // Already an integer.
+  if (exponent >= kMantissaBits) return v;
+  // |v| <= 1 => 0 or 1.
+  if (exponent < 0) return V(positive);
+
+  const Bits mantissa_mask = kMantissaMask >> exponent;
+  // Already an integer
+  if ((bits & mantissa_mask) == 0) return v;
+
+  // Clear fractional bits and round up
+  if (positive) bits += (kMantissaMask + 1) >> exponent;
+  bits &= ~mantissa_mask;
+
+  CopyBytes<sizeof(Bits)>(&bits, &f);
+  return V(f);
+}
+
+template <typename Float, typename Bits, int kMantissaBits, int kExponentBits,
+          class V>
+V Floor(const V v) {
+  const Bits kExponentMask = (1ull << kExponentBits) - 1;
+  const Bits kMantissaMask = (1ull << kMantissaBits) - 1;
+  const Bits kBias = kExponentMask / 2;
+
+  Float f = v.raw;
+  const bool negative = f < 0.0f;
+
+  Bits bits;
+  CopyBytes<sizeof(Bits)>(&v, &bits);
+
+  const int exponent = ((bits >> kMantissaBits) & kExponentMask) - kBias;
+  // Already an integer.
+  if (exponent >= kMantissaBits) return v;
+  // |v| <= 1 => -1 or 0.
+  if (exponent < 0) return V(negative ? -1.0 : 0.0f);
+
+  const Bits mantissa_mask = kMantissaMask >> exponent;
+  // Already an integer
+  if ((bits & mantissa_mask) == 0) return v;
+
+  // Clear fractional bits and round down
+  if (negative) bits += (kMantissaMask + 1) >> exponent;
+  bits &= ~mantissa_mask;
+
+  CopyBytes<sizeof(Bits)>(&bits, &f);
+  return V(f);
+}
+
+// Toward +infinity, aka ceiling
+SIMD_INLINE scalar<float> ceil(const scalar<float> v) {
+  return Ceiling<float, uint32_t, 23, 8>(v);
+}
+SIMD_INLINE scalar<double> ceil(const scalar<double> v) {
+  return Ceiling<double, uint64_t, 52, 11>(v);
+}
+
+// Toward -infinity, aka floor
+SIMD_INLINE scalar<float> floor(const scalar<float> v) {
+  return Floor<float, uint32_t, 23, 8>(v);
+}
+SIMD_INLINE scalar<double> floor(const scalar<double> v) {
+  return Floor<double, uint64_t, 52, 11>(v);
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+template <typename T>
+scalar<T> ComparisonResult(const bool result) {
+  T ret;
+  SetBytes(result ? 0xFF : 0, &ret);
+  return scalar<T>(ret);
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> operator==(const scalar<T> a, const scalar<T> b) {
+  return ComparisonResult<T>(a.raw == b.raw);
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> operator<(const scalar<T> a, const scalar<T> b) {
+  return ComparisonResult<T>(a.raw < b.raw);
+}
+template <typename T>
+SIMD_INLINE scalar<T> operator>(const scalar<T> a, const scalar<T> b) {
+  return ComparisonResult<T>(a.raw > b.raw);
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> operator<=(const scalar<T> a, const scalar<T> b) {
+  return ComparisonResult<T>(a.raw <= b.raw);
+}
+template <typename T>
+SIMD_INLINE scalar<T> operator>=(const scalar<T> a, const scalar<T> b) {
+  return ComparisonResult<T>(a.raw >= b.raw);
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <typename T>
+SIMD_INLINE scalar<T> load(Scalar<T>, const T* SIMD_RESTRICT aligned) {
+  T t;
+  CopyBytes<sizeof(T)>(aligned, &t);
+  return scalar<T>(t);
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> load_unaligned(Scalar<T> d, const T* SIMD_RESTRICT p) {
+  return load(d, p);
+}
+
+// In some use cases, "load single lane" is sufficient; otherwise avoid this.
+template <typename T>
+SIMD_INLINE scalar<T> load_dup128(Scalar<T> d, const T* SIMD_RESTRICT aligned) {
+  return load(d, aligned);
+}
+
+// ------------------------------ Store
+
+template <typename T>
+SIMD_INLINE void store(const scalar<T> v, Scalar<T>, T* SIMD_RESTRICT aligned) {
+  CopyBytes<sizeof(T)>(&v.raw, aligned);
+}
+
+template <typename T>
+SIMD_INLINE void store_unaligned(const scalar<T> v, Scalar<T> d,
+                                 T* SIMD_RESTRICT p) {
+  return store(v, d, p);
+}
+
+// ------------------------------ "Non-temporal" stores
+
+template <typename T>
+SIMD_INLINE void stream(const scalar<T> v, Scalar<T> d,
+                        T* SIMD_RESTRICT aligned) {
+  return store(v, d, aligned);
+}
+
+// ------------------------------ Gather
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+template <typename T, typename Offset>
+SIMD_INLINE scalar<T> gather_offset(Scalar<T> d, const T* base,
+                                    const scalar<Offset> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "SVE requires same size base/ofs");
+  char sizeof_t[sizeof(T)];
+  const uintptr_t addr = reinterpret_cast<uintptr_t>(base) + offset.raw;
+  return load(d, reinterpret_cast<const T*>(addr));
+}
+
+template <typename T, typename Index>
+SIMD_INLINE scalar<T> gather_index(Scalar<T> d, const T* SIMD_RESTRICT base,
+                                   const scalar<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "SVE requires same size base/idx");
+  return load(d, base + index.raw);
+}
+
+}  // namespace ext
+
+// ================================================== CONVERT
+
+template <typename FromT, typename ToT>
+SIMD_INLINE scalar<ToT> convert_to(Desc<ToT, 1, NONE>,
+                                   const scalar<FromT> from) {
+  return scalar<ToT>(from.raw);
+}
+
+SIMD_INLINE scalar<float> convert_to(Scalar<float>, const scalar<int32_t> v) {
+  return scalar<float>(v.raw);
+}
+
+// Truncates (rounds toward zero).
+SIMD_INLINE scalar<int32_t> convert_to(Scalar<int32_t>, const scalar<float> v) {
+  const float f = v.raw;
+  return scalar<int32_t>(f);
+}
+
+SIMD_INLINE scalar<uint32_t> u32_from_u8(const scalar<uint8_t> v) {
+  return convert_to(Scalar<uint32_t>(), v);
+}
+
+SIMD_INLINE scalar<uint8_t> u8_from_u32(const scalar<uint32_t> v) {
+  return convert_to(Scalar<uint8_t>(), v);
+}
+
+// Approximation of round-to-nearest for numbers representable as int32_t.
+SIMD_INLINE scalar<int32_t> nearest_int(const scalar<float> v) {
+  const float f = v.raw;
+  const float bias = f < 0.0f ? -0.5f : 0.5f;
+  return scalar<int32_t>(f + bias);
+}
+
+// ================================================== SWIZZLE
+
+// Unsupported: shift_*_bytes, combine_shift_right_bytes, interleave_*,
+// other_half, shuffle_*, sums_of_u8x8, sum_of_lanes - these require more than
+// one lane and/or actual 128-bit vectors.
+
+// ------------------------------ Broadcast/splat any lane
+
+template <int kLane, typename T>
+SIMD_INLINE scalar<T> broadcast(const scalar<T> v) {
+  static_assert(kLane == 0, "Scalar only has one lane");
+  return v;
+}
+
+// ------------------------------ Zip/unpack
+
+SIMD_INLINE scalar<uint16_t> zip_lo(const scalar<uint8_t> a,
+                                  const scalar<uint8_t> b) {
+  return scalar<uint16_t>((uint32_t(b.raw) << 8) + a.raw);
+}
+SIMD_INLINE scalar<uint32_t> zip_lo(const scalar<uint16_t> a,
+                                  const scalar<uint16_t> b) {
+  return scalar<uint32_t>((uint32_t(b.raw) << 16) + a.raw);
+}
+SIMD_INLINE scalar<uint64_t> zip_lo(const scalar<uint32_t> a,
+                                  const scalar<uint32_t> b) {
+  return scalar<uint64_t>((uint64_t(b.raw) << 32) + a.raw);
+}
+SIMD_INLINE scalar<int16_t> zip_lo(const scalar<int8_t> a,
+                                   const scalar<int8_t> b) {
+  return scalar<int16_t>((uint32_t(b.raw) << 8) + a.raw);
+}
+SIMD_INLINE scalar<int32_t> zip_lo(const scalar<int16_t> a,
+                                   const scalar<int16_t> b) {
+  return scalar<int32_t>((uint32_t(b.raw) << 16) + a.raw);
+}
+SIMD_INLINE scalar<int64_t> zip_lo(const scalar<int32_t> a,
+                                   const scalar<int32_t> b) {
+  return scalar<int64_t>((uint64_t(b.raw) << 32) + a.raw);
+}
+
+template <typename T>
+SIMD_INLINE auto zip_hi(const scalar<T> a, const scalar<T> b)
+    -> decltype(zip_lo(a, b)) {
+  return zip_lo(a, b);
+}
+
+// ------------------------------ Parts
+
+template <typename T, typename T2>
+SIMD_INLINE scalar<T> set_part(Scalar<T>, const T2 t) {
+  return scalar<T>(t);
+}
+
+template <typename T>
+SIMD_INLINE T get_part(Scalar<T>, const scalar<T> v) {
+  return v.raw;
+}
+
+template <typename T>
+SIMD_INLINE scalar<T> any_part(Scalar<T>, const scalar<T> v) {
+  return v;
+}
+
+template <int kLane, typename T>
+SIMD_INLINE scalar<T> broadcast_part(Scalar<T>, const scalar<T> v) {
+  static_assert(kLane == 0, "Invalid kLane");
+  return v;
+}
+
+// ================================================== MISC
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+// Returns a bit array of the most significant bit of each byte in "v", i.e.
+// sum_i=0..15 of (v[i] >> 7) << i; v[0] is the least-significant byte of "v".
+// This is useful for testing/branching based on comparison results.
+SIMD_INLINE uint32_t movemask(const scalar<uint8_t> v) { return v.raw >> 7; }
+
+// Returns the most significant bit of each float/double lane (see above).
+SIMD_INLINE uint32_t movemask(const scalar<float> v) {
+  // Cannot return (v < 0) because +0.0 == -0.0.
+  const Scalar<uint32_t> du;
+  const auto bits = cast_to(du, v);
+  return get_part(du, shift_right<31>(bits));
+}
+SIMD_INLINE uint32_t movemask(const scalar<double> v) {
+  // Cannot return (v < 0) because +0.0 == -0.0.
+  const Scalar<uint64_t> du;
+  const auto bits = cast_to(du, v);
+  return get_part(du, shift_right<63>(bits));
+}
+
+// Returns whether all lanes are equal to zero. Supported for all integer T.
+template <typename T>
+SIMD_INLINE bool all_zero(const scalar<T> v) {
+  return v.raw == 0;
+}
+
+// Sum of all lanes, i.e. the only one.
+template <typename T>
+SIMD_INLINE scalar<T> sum_of_lanes(const scalar<T> v0) {
+  return v0;
+}
+
+}  // namespace ext
+}  // namespace pik
+
+#endif  // PIK_SIMD_SCALAR_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/shared.h b/codec/L2/demos/pikEnc/host/pik/simd/shared.h
new file mode 100755
index 0000000000..ff74e06ad4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/shared.h
@@ -0,0 +1,100 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_SHARED_H_
+#define PIK_SIMD_SHARED_H_
+
+// Definitions needed by multiple platform-specific headers.
+
+#include <stddef.h>
+#include <atomic>
+
+// Ensures an array is aligned and suitable for load()/store() functions.
+// Example: SIMD_ALIGN T lanes[d.N];
+#define SIMD_ALIGN alignas(32)
+
+// 4 instances of a given literal value, useful as input to load_dup128.
+#define SIMD_REP4(literal) literal, literal, literal, literal
+#define SIMD_REP8(literal) SIMD_REP4(literal), SIMD_REP4(literal)
+
+// Alternative for asm volatile("" : : : "memory"), which has no effect.
+#define SIMD_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
+
+namespace pik {
+
+// SIMD operations are implemented as overloaded functions selected using a
+// "descriptor" D := Desc<T, N[, Target]>. For example: `D::V setzero(D)`.
+// T is the lane type, N the number of lanes, Target is an instruction set
+// (e.g. SSE4). The return type D::V is either a full vector of at least
+// 128 bits, an N-lane (=2^j) part, or a scalar.
+
+// Specialized in platform-specific headers. Only for use by PartTarget.
+// Default: no change to Target. kBlocks = ceil(size / 16).
+template <size_t kBlocks, class Target>
+struct PartTargetT {
+  using type = Target;
+};
+
+template <typename T, size_t N, class Target>
+using PartTarget =
+    typename PartTargetT<(N * sizeof(T) + 15) / 16, Target>::type;
+
+// Specialized in platform-specific headers. Only for use by Desc and VT.
+template <typename T, size_t N, class Target>
+struct VecT {};
+
+// Shorthand for function arg/return types. Overrides Target with the narrowest
+// possible for the given N.
+template <typename T, size_t N, class Target>
+using VT = typename VecT<T, N, PartTarget<T, N, Target>>::type;
+
+// Descriptor: properties that uniquely identify a vector/part/scalar. Used to
+// select overloaded functions; see Full/Part/Scalar aliases below.
+template <typename LaneT, size_t kLanes, class TargetT>
+struct Desc {
+  constexpr Desc() {}
+
+  using T = LaneT;
+  static constexpr size_t N = kLanes;
+  using Target = TargetT;
+
+  // Alias for the actual vector data, e.g. scalar<float> for <float, 1, NONE>,
+  // returned by initializers such as setzero(). Parts and full vectors are
+  // distinct types on x86 to avoid inadvertent conversions. By contrast, PPC
+  // parts are merely aliases for full vectors to avoid wrapper overhead.
+  using V = typename VecT<T, N, Target>::type;
+
+  static_assert((N & (N - 1)) == 0, "N must be a power of two");
+  static_assert(N <= Target::template NumLanes<T>(), "N too large");
+};
+
+// Shorthand for a full vector.
+template <typename T, class Target>
+using Full = Desc<T, Target::template NumLanes<T>(), Target>;
+
+// Shorthand for a part (or full) vector. N=2^j. Note that PartTarget selects
+// a 128-bit Target when T and N are small enough (avoids additional AVX2
+// versions of SSE4 initializers/loads).
+template <typename T, size_t N, class Target>
+using Part = Desc<T, N, PartTarget<T, N, Target>>;
+
+// Shorthand for Part/Full. NOTE: uses SIMD_TARGET at the moment of expansion,
+// not its current (possibly undefined) value.
+#define SIMD_FULL(T) Full<T, SIMD_TARGET>
+#define SIMD_PART(T, N) Part<T, N, SIMD_TARGET>
+
+// Type tags for get_half(Upper(), v) etc.
+struct Upper {};
+struct Lower {};
+#define SIMD_HALF Lower()
+
+// Unfortunately the GCC/Clang intrinsics do not accept int64_t*.
+using GatherIndex64 = long long int;
+static_assert(sizeof(GatherIndex64) == 8, "Must be 64-bit type");
+
+}  // namespace pik
+
+#endif  // PIK_SIMD_SHARED_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/simd.h b/codec/L2/demos/pikEnc/host/pik/simd/simd.h
new file mode 100755
index 0000000000..b66d9ecfd1
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/simd.h
@@ -0,0 +1,161 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_SIMD_H_
+#define PIK_SIMD_SIMD_H_
+
+// Performance-portable SIMD API for SSE4/AVX2/ARMv8, later AVX-512 and POWER8.
+// Each operation is efficient on all platforms.
+
+#include <stddef.h>  // size_t
+#include <stdint.h>
+#include "pik/simd/arch.h"
+#include "pik/simd/util.h"  // CopyBytes
+
+#include "pik/simd/arm64_neon.h"
+#include "pik/simd/scalar.h"
+#include "pik/simd/x86_avx2.h"
+#include "pik/simd/x86_sse4.h"
+
+#if SIMD_ARCH == SIMD_ARCH_X86 && SIMD_ENABLE == SIMD_NONE
+// No targets enabled, but we still need this for functions below.
+#include <emmintrin.h>
+#endif
+
+// Use SIMD_TARGET to derive other macros. NOTE: SIMD_TARGET is only evaluated
+// when these macros are expanded.
+#define SIMD_CONCAT_IMPL(a, b) a##b
+#define SIMD_CONCAT(a, b) SIMD_CONCAT_IMPL(a, b)
+
+// Attributes; must precede every function declaration.
+#define SIMD_ATTR SIMD_CONCAT(SIMD_ATTR_, SIMD_TARGET)
+
+// Target-specific namespace, required when using foreach_target.h.
+#define SIMD_NAMESPACE SIMD_CONCAT(N_, SIMD_TARGET)
+
+// Which target is active, e.g. #if SIMD_TARGET_VALUE == SIMD_AVX2
+#define SIMD_TARGET_VALUE SIMD_CONCAT(SIMD_, SIMD_TARGET)
+
+// Functions common to multiple targets:
+namespace pik {
+
+// One Newton-Raphson iteration.
+template <class V>
+static SIMD_ATTR SIMD_INLINE V ReciprocalNR(const V x) {
+  const auto rcp = approximate_reciprocal(x);
+  const auto sum = rcp + rcp;
+  const auto x_rcp = x * rcp;
+  return nmul_add(x_rcp, rcp, sum);
+}
+
+// Primary template: default to actual division.
+template <typename T, class V>
+struct FastDivision {
+  SIMD_ATTR V operator()(const V n, const V d) const { return n / d; }
+};
+// Partial specialization for float vectors.
+template <class V>
+struct FastDivision<float, V> {
+  SIMD_ATTR V operator()(const V n, const V d) const {
+    return n * ReciprocalNR(d);
+  }
+};
+
+// Returns a name for the vector/part/scalar. The type prefix is u/i/f for
+// unsigned/signed/floating point, followed by the number of bits per lane;
+// then 'x' followed by the number of lanes. Example: u8x16. This is useful for
+// understanding which instantiation of a generic test failed.
+template <class D>
+inline const char* vec_name() {
+  using T = typename D::T;
+  constexpr size_t N = D::N;
+  constexpr int kTarget = D::Target::value;
+
+  // Avoids depending on <type_traits>.
+  const bool is_float = T(2.25) != T(2);
+  const bool is_signed = T(-1) < T(0);
+  constexpr char prefix = is_float ? 'f' : (is_signed ? 'i' : 'u');
+
+  constexpr size_t bits = sizeof(T) * 8;
+  constexpr char bits10 = '0' + (bits / 10);
+  constexpr char bits1 = '0' + (bits % 10);
+
+  // Scalars: omit the xN suffix.
+  if (kTarget == SIMD_NONE) {
+    static constexpr char name1[8] = {prefix, bits1};
+    static constexpr char name2[8] = {prefix, bits10, bits1};
+    return sizeof(T) == 1 ? name1 : name2;
+  }
+
+  constexpr char N1 = (N < 10) ? '\0' : '0' + (N % 10);
+  constexpr char N10 = (N < 10) ? '0' + (N % 10) : '0' + (N / 10);
+
+  static constexpr char name1[8] = {prefix, bits1, 'x', N10, N1};
+  static constexpr char name2[8] = {prefix, bits10, bits1, 'x', N10, N1};
+  return sizeof(T) == 1 ? name1 : name2;
+}
+
+// Cache control
+
+SIMD_INLINE void stream(const uint32_t t, uint32_t* SIMD_RESTRICT aligned) {
+#if SIMD_ARCH == SIMD_ARCH_X86
+  _mm_stream_si32(reinterpret_cast<int*>(aligned), t);
+#else
+  CopyBytes<4>(&t, aligned);
+#endif
+}
+
+SIMD_INLINE void stream(const uint64_t t, uint64_t* SIMD_RESTRICT aligned) {
+#if SIMD_ARCH == SIMD_ARCH_X86
+  _mm_stream_si64(reinterpret_cast<long long*>(aligned), t);
+#else
+  CopyBytes<8>(&t, aligned);
+#endif
+}
+
+// Delays subsequent loads until prior loads are visible. On Intel CPUs, also
+// serves as a full fence (waits for all prior instructions to complete).
+// No effect on non-x86.
+SIMD_INLINE void load_fence() {
+#if SIMD_ARCH == SIMD_ARCH_X86
+  _mm_lfence();
+#endif
+}
+
+// Ensures previous weakly-ordered stores are visible. No effect on non-x86.
+SIMD_INLINE void store_fence() {
+#if SIMD_ARCH == SIMD_ARCH_X86
+  _mm_sfence();
+#endif
+}
+
+// Begins loading the cache line containing "p".
+template <typename T>
+SIMD_INLINE void prefetch(const T* p) {
+#if SIMD_ARCH == SIMD_ARCH_X86
+  _mm_prefetch(p, _MM_HINT_T0);
+#elif SIMD_ARCH == SIMD_ARCH_ARM
+  __pld(p);
+#endif
+}
+
+// Invalidates and flushes the cache line containing "p". No effect on non-x86.
+SIMD_INLINE void flush_cacheline(const void* p) {
+#if SIMD_ARCH == SIMD_ARCH_X86
+  _mm_clflush(p);
+#endif
+}
+
+// Call during spin loops to potentially reduce contention/power consumption.
+SIMD_INLINE void pause() {
+#if SIMD_ARCH == SIMD_ARCH_X86
+  _mm_pause();
+#endif
+}
+
+}  // namespace pik
+
+#endif  // PIK_SIMD_SIMD_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/simd_test.cctest b/codec/L2/demos/pikEnc/host/pik/simd/simd_test.cctest
new file mode 100755
index 0000000000..9f340c37fb
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/simd_test.cctest
@@ -0,0 +1,2468 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// Empty if not included by foreach_target.
+#ifdef SIMD_ATTR_IMPL
+
+#include <random>
+
+namespace pik {
+namespace SIMD_NAMESPACE {
+namespace {
+
+// Prevents the compiler from eliding the computations that led to "output".
+// Works by indicating to the compiler that "output" is being read and modified.
+// The +r constraint avoids unnecessary writes to memory, but only works for
+// built-in types.
+template <class T>
+inline SIMD_ATTR void PreventElision(T&& output) {
+#ifndef _MSC_VER
+  asm volatile("" : "+r"(output) : : "memory");
+#endif
+}
+
+// Random numbers
+
+typedef std::mt19937 RandomState;
+
+SIMD_INLINE uint32_t Random32(RandomState* rng) {
+  return (*rng)();
+}
+
+// Compare non-vector T.
+template <typename T>
+SIMD_ATTR void AssertEqual(const T expected, const T actual,
+                           const int line = -1, const int lane = 0,
+                           const char* name = "builtin") {
+  char expected_buf[64];
+  char actual_buf[64];
+  ToString(expected, expected_buf);
+  ToString(actual, actual_buf);
+  // Rely on string comparison to ensure similar floats are "equal".
+  if (!StringsEqual(expected_buf, actual_buf)) {
+    NotifyFailure(SIMD_TARGET::value, line, name, lane, expected_buf,
+                  actual_buf);
+  }
+}
+
+#define ASSERT_EQ(expected, actual) AssertEqual(expected, actual, __LINE__)
+
+// Compare expected vector to vector.
+template <class D, class V>
+SIMD_ATTR void AssertVecEqual(D d, const V expected, const V actual,
+                              const int line) {
+  SIMD_ALIGN typename D::T expected_lanes[d.N];
+  SIMD_ALIGN typename D::T actual_lanes[d.N];
+  store(expected, d, expected_lanes);
+  store(actual, d, actual_lanes);
+  for (size_t i = 0; i < d.N; ++i) {
+    AssertEqual(expected_lanes[i], actual_lanes[i], line, i, vec_name<D>());
+  }
+}
+
+// Compare expected lanes to vector.
+template <class D, class V>
+SIMD_ATTR void AssertVecEqual(D d, const typename D::T (&expected)[D::N],
+                              const V actual, const int line) {
+  AssertVecEqual(d, load_unaligned(d, expected), actual, line);
+}
+
+#define ASSERT_VEC_EQ(d, expected, actual) \
+  AssertVecEqual(d, expected, actual, __LINE__)
+
+// Type lists
+
+template <class Test, typename T>
+SIMD_ATTR void Call() {
+  Test().template operator()(T(), SIMD_FULL(T)());
+}
+
+// Calls Test::operator()(T, D) for each lane type.
+template <class Test>
+SIMD_ATTR void ForeachUnsignedLaneType() {
+  Call<Test, uint8_t>();
+  Call<Test, uint16_t>();
+  Call<Test, uint32_t>();
+  Call<Test, uint64_t>();
+}
+
+template <class Test>
+SIMD_ATTR void ForeachSignedLaneType() {
+  Call<Test, int8_t>();
+  Call<Test, int16_t>();
+  Call<Test, int32_t>();
+  Call<Test, int64_t>();
+}
+
+template <class Test>
+SIMD_ATTR void ForeachFloatLaneType() {
+  Call<Test, float>();
+  Call<Test, double>();
+}
+
+template <class Test>
+SIMD_ATTR void ForeachLaneType() {
+  ForeachUnsignedLaneType<Test>();
+  ForeachSignedLaneType<Test>();
+  ForeachFloatLaneType<Test>();
+}
+
+namespace examples {
+
+namespace {
+SIMD_ATTR void FloorLog2(const uint8_t* SIMD_RESTRICT values,
+                         uint8_t* SIMD_RESTRICT log2) {
+  // Descriptors for all required data types:
+  const SIMD_FULL(int32_t) d32;
+  const SIMD_FULL(float) df;
+  const SIMD_PART(uint8_t, d32.N) d8;
+
+  const auto u8 = load(d8, values);
+  const auto bits = cast_to(d32, convert_to(df, convert_to(d32, u8)));
+  const auto exponent = shift_right<23>(bits) - set1(d32, 127);
+  store(convert_to(d8, exponent), d8, log2);
+}
+}  // namespace
+
+SIMD_ATTR void TestFloorLog2() {
+  const size_t kStep = SIMD_FULL(int32_t)::N;
+  const size_t kBytes = 32;
+  static_assert(kBytes % kStep == 0, "Must be a multiple of kStep");
+
+  uint8_t in[kBytes];
+  uint8_t expected[kBytes];
+  RandomState rng{1234};
+  for (size_t i = 0; i < kBytes; ++i) {
+    expected[i] = Random32(&rng) & 7;
+    in[i] = 1u << expected[i];
+  }
+  uint8_t out[32];
+  for (size_t i = 0; i < kBytes; i += kStep) {
+    FloorLog2(in + i, out + i);
+  }
+  int sum = 0;
+  for (size_t i = 0; i < kBytes; ++i) {
+    ASSERT_EQ(expected[i], out[i]);
+    sum += out[i];
+  }
+  PreventElision(sum);
+}
+
+SIMD_ATTR void Copy(const uint8_t* SIMD_RESTRICT from, const size_t size,
+                    uint8_t* SIMD_RESTRICT to) {
+  // Width-agnostic (library-specified N)
+  const SIMD_FULL(uint8_t) d;
+  const Scalar<uint8_t> ds;
+  size_t i = 0;
+  for (; i + d.N <= size; i += d.N) {
+    const auto bytes = load(d, from + i);
+    store(bytes, d, to + i);
+  }
+
+  for (; i < size; ++i) {
+    // (Same loop body as above, could factor into a shared template)
+    const auto bytes = load(ds, from + i);
+    store(bytes, ds, to + i);
+  }
+}
+
+SIMD_ATTR void TestCopy() {
+  RandomState rng{1234};
+  const size_t kSize = 34;
+  SIMD_ALIGN uint8_t from[kSize];
+  for (size_t i = 0; i < kSize; ++i) {
+    from[i] = Random32(&rng) & 0xFF;
+  }
+  SIMD_ALIGN uint8_t to[kSize];
+  Copy(from, kSize, to);
+  for (size_t i = 0; i < kSize; ++i) {
+    ASSERT_EQ(from[i], to[i]);
+  }
+}
+
+template <typename T>
+SIMD_ATTR void MulAdd(const T* SIMD_RESTRICT mul_array,
+                      const T* SIMD_RESTRICT add_array, const size_t size,
+                      T* SIMD_RESTRICT x_array) {
+  // Type-agnostic (caller-specified lane type) and width-agnostic (uses
+  // best available instruction set).
+  const SIMD_FULL(T) d;
+  for (size_t i = 0; i < size; i += d.N) {
+    const auto mul = load(d, mul_array + i);
+    const auto add = load(d, add_array + i);
+    auto x = load(d, x_array + i);
+    x = mul_add(mul, x, add);
+    store(x, d, x_array + i);
+  }
+}
+
+template <typename T>
+SIMD_ATTR T SumMulAdd() {
+  RandomState rng{1234};
+  const size_t kSize = 64;
+  SIMD_ALIGN T mul[kSize];
+  SIMD_ALIGN T x[kSize];
+  SIMD_ALIGN T add[kSize];
+  for (size_t i = 0; i < kSize; ++i) {
+    mul[i] = Random32(&rng) & 0xF;
+    x[i] = Random32(&rng) & 0xFF;
+    add[i] = Random32(&rng) & 0xFF;
+  }
+  MulAdd(mul, add, kSize, x);
+  double sum = 0.0;
+  for (auto xi : x) {
+    sum += xi;
+  }
+  return sum;
+}
+
+SIMD_ATTR void TestExamples() {
+  TestFloorLog2();
+  TestCopy();
+
+  ASSERT_EQ(78944.0f, SumMulAdd<float>());
+  ASSERT_EQ(78944.0, SumMulAdd<double>());
+}
+
+}  // namespace examples
+
+namespace basic {
+
+// util.h
+
+SIMD_ATTR void TestLimits() {
+  ASSERT_EQ(uint8_t(0), LimitsMin<uint8_t>());
+  ASSERT_EQ(uint16_t(0), LimitsMin<uint16_t>());
+  ASSERT_EQ(uint32_t(0), LimitsMin<uint32_t>());
+  ASSERT_EQ(uint64_t(0), LimitsMin<uint64_t>());
+
+  ASSERT_EQ(int8_t(-128), LimitsMin<int8_t>());
+  ASSERT_EQ(int16_t(-32768), LimitsMin<int16_t>());
+  ASSERT_EQ(int32_t(0x80000000u), LimitsMin<int32_t>());
+  ASSERT_EQ(int64_t(0x8000000000000000ull), LimitsMin<int64_t>());
+
+  ASSERT_EQ(uint8_t(0xFF), LimitsMax<uint8_t>());
+  ASSERT_EQ(uint16_t(0xFFFF), LimitsMax<uint16_t>());
+  ASSERT_EQ(uint32_t(0xFFFFFFFFu), LimitsMax<uint32_t>());
+  ASSERT_EQ(uint64_t(0xFFFFFFFFFFFFFFFFull), LimitsMax<uint64_t>());
+
+  ASSERT_EQ(int8_t(0x7F), LimitsMax<int8_t>());
+  ASSERT_EQ(int16_t(0x7FFF), LimitsMax<int16_t>());
+  ASSERT_EQ(int32_t(0x7FFFFFFFu), LimitsMax<int32_t>());
+  ASSERT_EQ(int64_t(0x7FFFFFFFFFFFFFFFull), LimitsMax<int64_t>());
+}
+
+// Test the ToString used to output test failures
+
+SIMD_ATTR void TestToString() {
+  char buf[32];
+  const char* end;
+
+  end = ToString(int64_t(0), buf);
+  ASSERT_EQ('0', end[-1]);
+  ASSERT_EQ('\0', end[0]);
+
+  end = ToString(int64_t(3), buf);
+  ASSERT_EQ('3', end[-1]);
+  ASSERT_EQ('\0', end[0]);
+
+  end = ToString(int64_t(-1), buf);
+  ASSERT_EQ('-', end[-2]);
+  ASSERT_EQ('1', end[-1]);
+  ASSERT_EQ('\0', end[0]);
+
+  ToString(0x7FFFFFFFFFFFFFFFLL, buf);
+  ASSERT_EQ(true, StringsEqual("9223372036854775807", buf));
+
+  ToString(int64_t(0x8000000000000000ULL), buf);
+  ASSERT_EQ(true, StringsEqual("-9223372036854775808", buf));
+
+  ToString(0.0, buf);
+  ASSERT_EQ(true, StringsEqual("0.0", buf));
+  ToString(4.0, buf);
+  ASSERT_EQ(true, StringsEqual("4.0", buf));
+  ToString(-1.0, buf);
+  ASSERT_EQ(true, StringsEqual("-1.0", buf));
+  ToString(-1.25, buf);
+  ASSERT_EQ("-1.2500000000000000", const_cast<const char*>(buf));
+  ToString(2.125f, buf);
+  ASSERT_EQ("2.12500000", const_cast<const char*>(buf));
+}
+
+struct TestIsUnsigned {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    static_assert(!IsFloat<T>(), "Expected !IsFloat");
+    static_assert(!IsSigned<T>(), "Expected !IsSigned");
+  }
+};
+
+struct TestIsSigned {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    static_assert(!IsFloat<T>(), "Expected !IsFloat");
+    static_assert(IsSigned<T>(), "Expected IsSigned");
+  }
+};
+
+struct TestIsFloat {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    static_assert(IsFloat<T>(), "Expected IsFloat");
+    static_assert(IsSigned<T>(), "Floats are also considered signed");
+  }
+};
+
+SIMD_ATTR void TestType() {
+  ForeachUnsignedLaneType<TestIsUnsigned>();
+  ForeachSignedLaneType<TestIsSigned>();
+  ForeachFloatLaneType<TestIsFloat>();
+}
+
+// Ensures wraparound (mod 2^bits)
+struct TestOverflowT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v1 = set1(d, T(1));
+    const auto vmax = set1(d, LimitsMax<T>());
+    const auto vmin = set1(d, LimitsMin<T>());
+    // Unsigned underflow / negative -> positive
+    ASSERT_VEC_EQ(d, vmax, vmin - v1);
+    // Unsigned overflow / positive -> negative
+    ASSERT_VEC_EQ(d, vmin, vmax + v1);
+  }
+};
+
+SIMD_ATTR void TestOverflow() {
+  ForeachUnsignedLaneType<TestOverflowT>();
+  ForeachSignedLaneType<TestOverflowT>();
+}
+
+struct TestName {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    char expected[7] = {IsFloat<T>() ? 'f' : (IsSigned<T>() ? 'i' : 'u')};
+    char* end = ToString(sizeof(T) * 8, expected + 1);
+    if (D::Target::value != SIMD_NONE) {
+      *end++ = 'x';
+      end = ToString(d.N, end);
+    }
+    if (!StringsEqual(expected, vec_name<D>())) {
+      NotifyFailure(SIMD_TARGET::value, __LINE__, expected, -1, expected,
+                    vec_name<D>());
+    }
+  }
+};
+
+struct TestSet {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    // setzero
+    const auto v0 = setzero(d);
+    T expected[d.N] = {T(0)};
+    ASSERT_VEC_EQ(d, expected, v0);
+
+    // set1
+    const auto v2 = set1(d, T(2));
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = 2;
+    }
+    ASSERT_VEC_EQ(d, expected, v2);
+
+    // iota
+    const auto vi = iota(d, T(5));
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = 5 + i;
+    }
+    ASSERT_VEC_EQ(d, expected, vi);
+
+    // undefined
+    const auto vu = undefined(d);
+    store(vu, d, expected);
+  }
+};
+
+struct TestCopyAndAssign {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    using V = typename D::V;
+
+    // copy V
+    const auto v3 = iota(d, 3);
+    V v3b(v3);
+    ASSERT_VEC_EQ(d, v3, v3b);
+
+    // assign V
+    V v3c;
+    v3c = v3;
+    ASSERT_VEC_EQ(d, v3, v3c);
+  }
+};
+
+struct TestHalf {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    size_t i;
+    constexpr size_t N2 = (d.N + 1) / 2;
+    const SIMD_PART(T, N2) d2;
+
+    const auto v = iota(d, 1);
+    SIMD_ALIGN T lanes[d.N] = {0};
+
+    store(lower_half(v), d2, lanes);
+    i = 0;
+    for (; i < N2; ++i) {
+      ASSERT_EQ(T(1 + i), lanes[i]);
+    }
+    // Other half remains unchanged
+    for (; i < d.N; ++i) {
+      ASSERT_EQ(T(0), lanes[i]);
+    }
+    store(lower_half(v), d2, lanes);  // Also test the wrapper
+    i = 0;
+    for (; i < N2; ++i) {
+      ASSERT_EQ(T(1 + i), lanes[i]);
+    }
+    // Other half remains unchanged
+    for (; i < d.N; ++i) {
+      ASSERT_EQ(T(0), lanes[i]);
+    }
+
+    store(upper_half(v), d2, lanes);
+    i = 0;
+    for (; i < N2; ++i) {
+      ASSERT_EQ(T(N2 + 1 + i), lanes[i]);
+    }
+    // Other half remains unchanged
+    for (; i < d.N; ++i) {
+      ASSERT_EQ(T(0), lanes[i]);
+    }
+    store(upper_half(v), d2, lanes);  // Also test the wrapper
+    i = 0;
+    for (; i < N2; ++i) {
+      ASSERT_EQ(T(N2 + 1 + i), lanes[i]);
+    }
+    // Other half remains unchanged
+    for (; i < d.N; ++i) {
+      ASSERT_EQ(T(0), lanes[i]);
+    }
+
+    store(any_part(d2, v), d2, lanes);
+    i = 0;
+    for (; i < N2; ++i) {
+      ASSERT_EQ(T(1 + i), lanes[i]);
+    }
+    // Other half remains unchanged
+    for (; i < d.N; ++i) {
+      ASSERT_EQ(T(0), lanes[i]);
+    }
+
+    // Ensure part lanes are contiguous
+    const auto vi = iota(d2, 1);
+    store(vi, d2, lanes);
+    for (size_t i = 1; i < N2; ++i) {
+      ASSERT_EQ(T(lanes[i - 1] + 1), lanes[i]);
+    }
+#endif
+  }
+};
+
+struct TestQuarterT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    constexpr size_t N4 = (d.N + 3) / 4;
+    const SIMD_PART(T, N4) d4;
+
+    const auto v = iota(d, 1);
+    SIMD_ALIGN T lanes[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = 123;
+    }
+    const auto lo = any_part(d4, v);
+    store(lo, d4, lanes);
+    size_t i = 0;
+    for (; i < N4; ++i) {
+      ASSERT_EQ(T(i + 1), lanes[i]);
+    }
+    // Other lanes remain unchanged
+    for (; i < d.N; ++i) {
+      ASSERT_EQ(T(123), lanes[i]);
+    }
+  }
+};
+
+SIMD_ATTR void TestQuarter() {
+  Call<TestQuarterT, uint8_t>();
+  Call<TestQuarterT, uint16_t>();
+  Call<TestQuarterT, uint32_t>();
+  Call<TestQuarterT, int8_t>();
+  Call<TestQuarterT, int16_t>();
+  Call<TestQuarterT, int32_t>();
+  Call<TestQuarterT, float>();
+}
+
+SIMD_ATTR void TestBasic() {
+  TestLimits();
+  TestToString();
+  TestType();
+  ForeachLaneType<TestName>();
+  TestOverflow();
+  ForeachLaneType<TestSet>();
+  ForeachLaneType<TestCopyAndAssign>();
+  ForeachLaneType<TestHalf>();
+  TestQuarter();
+}
+
+}  // namespace basic
+
+namespace arithmetic {
+
+struct TestPlusMinus {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v2 = iota(D(), 2);
+    const auto v3 = iota(D(), 3);
+    const auto v4 = iota(D(), 4);
+
+    T lanes[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (2 + i) + (3 + i);
+    }
+    ASSERT_VEC_EQ(d, lanes, v2 + v3);
+    ASSERT_VEC_EQ(d, v3, (v2 + v3) - v2);
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (2 + i) + (4 + i);
+    }
+    auto sum = v2;
+    sum += v4;  // sum == 6,8..
+    ASSERT_VEC_EQ(d, lanes, sum);
+
+    sum -= v4;
+    ASSERT_VEC_EQ(d, v2, sum);
+  }
+};
+
+struct TestUnsignedSaturatingArithmetic {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto vi = iota(d, 1);
+    const auto vm = set1(d, LimitsMax<T>());
+
+    ASSERT_VEC_EQ(d, v0 + v0, saturated_add(v0, v0));
+    ASSERT_VEC_EQ(d, v0 + vi, saturated_add(v0, vi));
+    ASSERT_VEC_EQ(d, v0 + vm, saturated_add(v0, vm));
+    ASSERT_VEC_EQ(d, vm, saturated_add(vi, vm));
+    ASSERT_VEC_EQ(d, vm, saturated_add(vm, vm));
+
+    ASSERT_VEC_EQ(d, v0, saturated_subtract(v0, v0));
+    ASSERT_VEC_EQ(d, v0, saturated_subtract(v0, vi));
+    ASSERT_VEC_EQ(d, v0, saturated_subtract(vi, vi));
+    ASSERT_VEC_EQ(d, v0, saturated_subtract(vi, vm));
+    ASSERT_VEC_EQ(d, vm - vi, saturated_subtract(vm, vi));
+  }
+};
+
+struct TestSignedSaturatingArithmetic {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto vi = iota(d, 1);
+    const auto vpm = set1(d, LimitsMax<T>());
+    const auto vn = iota(d, -T(d.N));
+    const auto vnm = set1(d, LimitsMin<T>());
+
+    ASSERT_VEC_EQ(d, v0, saturated_add(v0, v0));
+    ASSERT_VEC_EQ(d, vi, saturated_add(v0, vi));
+    ASSERT_VEC_EQ(d, vpm, saturated_add(v0, vpm));
+    ASSERT_VEC_EQ(d, vpm, saturated_add(vi, vpm));
+    ASSERT_VEC_EQ(d, vpm, saturated_add(vpm, vpm));
+
+    ASSERT_VEC_EQ(d, v0, saturated_subtract(v0, v0));
+    ASSERT_VEC_EQ(d, v0 - vi, saturated_subtract(v0, vi));
+    ASSERT_VEC_EQ(d, vn, saturated_subtract(vn, v0));
+    ASSERT_VEC_EQ(d, vnm, saturated_subtract(vnm, vi));
+    ASSERT_VEC_EQ(d, vnm, saturated_subtract(vnm, vpm));
+  }
+};
+
+SIMD_ATTR void TestSaturatingArithmetic() {
+  Call<TestUnsignedSaturatingArithmetic, uint8_t>();
+  Call<TestUnsignedSaturatingArithmetic, uint16_t>();
+  Call<TestSignedSaturatingArithmetic, int8_t>();
+  Call<TestSignedSaturatingArithmetic, int16_t>();
+}
+
+struct TestAverageT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto v1 = set1(d, T(1));
+    const auto v2 = set1(d, T(2));
+
+    ASSERT_VEC_EQ(d, v0, average_round(v0, v0));
+    ASSERT_VEC_EQ(d, v1, average_round(v0, v1));
+    ASSERT_VEC_EQ(d, v1, average_round(v1, v1));
+    ASSERT_VEC_EQ(d, v2, average_round(v1, v2));
+    ASSERT_VEC_EQ(d, v2, average_round(v2, v2));
+  }
+};
+
+SIMD_ATTR void TestAverage() {
+  Call<TestAverageT, uint8_t>();
+  Call<TestAverageT, uint16_t>();
+}
+
+struct TestAbsT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto vp1 = set1(d, T(1));
+    const auto vn1 = set1(d, T(-1));
+    const auto vpm = set1(d, LimitsMax<T>());
+    const auto vnm = set1(d, LimitsMin<T>());
+
+    ASSERT_VEC_EQ(d, v0, abs(v0));
+    ASSERT_VEC_EQ(d, vp1, abs(vp1));
+    ASSERT_VEC_EQ(d, vp1, abs(vn1));
+    ASSERT_VEC_EQ(d, vpm, abs(vpm));
+    ASSERT_VEC_EQ(d, vnm, abs(vnm));
+  }
+};
+
+SIMD_ATTR void TestAbs() {
+  Call<TestAbsT, int8_t>();
+  Call<TestAbsT, int16_t>();
+  Call<TestAbsT, int32_t>();
+}
+
+struct TestUnsignedShifts {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    constexpr int kSign = (sizeof(T) * 8) - 1;
+    const auto v0 = setzero(d);
+    const auto vi = iota(d, 0);
+    SIMD_ALIGN T expected[d.N];
+
+    // Shifting out of right side => zero
+    ASSERT_VEC_EQ(d, v0, shift_right<7>(vi));
+    ASSERT_VEC_EQ(d, v0, shift_right_same(vi, set_shift_right_count(d, 7)));
+
+    // Simple left shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(i << 1);
+    }
+    ASSERT_VEC_EQ(d, expected, shift_left<1>(vi));
+    ASSERT_VEC_EQ(d, expected, shift_left_same(vi, set_shift_left_count(d, 1)));
+
+    // Simple right shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(i >> 1);
+    }
+    ASSERT_VEC_EQ(d, expected, shift_right<1>(vi));
+    ASSERT_VEC_EQ(d, expected,
+                  shift_right_same(vi, set_shift_right_count(d, 1)));
+
+    // Verify truncation for left-shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = static_cast<T>((i << kSign) & ~T(0));
+    }
+    ASSERT_VEC_EQ(d, expected, shift_left<kSign>(vi));
+    ASSERT_VEC_EQ(d, expected,
+                  shift_left_same(vi, set_shift_left_count(d, kSign)));
+  }
+};
+
+struct TestSignedShifts {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto vi = iota(d, 0);
+    SIMD_ALIGN T expected[d.N];
+
+    // Shifting out of right side => zero
+    ASSERT_VEC_EQ(d, v0, shift_right<7>(vi));
+    ASSERT_VEC_EQ(d, v0, shift_right_same(vi, set_shift_right_count(d, 7)));
+
+    // Simple left shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(i << 1);
+    }
+    ASSERT_VEC_EQ(d, expected, shift_left<1>(vi));
+    ASSERT_VEC_EQ(d, expected, shift_left_same(vi, set_shift_left_count(d, 1)));
+
+    // Simple right shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(i >> 1);
+    }
+    ASSERT_VEC_EQ(d, expected, shift_right<1>(vi));
+    ASSERT_VEC_EQ(d, expected,
+                  shift_right_same(vi, set_shift_right_count(d, 1)));
+
+    // Sign extension
+    constexpr T min = LimitsMin<T>();
+    const auto vn = iota(d, min);
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T((min + i) >> 1);
+    }
+    ASSERT_VEC_EQ(d, expected, shift_right<1>(vn));
+    ASSERT_VEC_EQ(d, expected,
+                  shift_right_same(vn, set_shift_right_count(d, 1)));
+
+    // Shifting negative left
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T((min + i) << 1);
+    }
+    ASSERT_VEC_EQ(d, expected, shift_left<1>(vn));
+    ASSERT_VEC_EQ(d, expected, shift_left_same(vn, set_shift_left_count(d, 1)));
+  }
+};
+
+#if SIMD_TARGET_VALUE != SIMD_SSE4
+
+struct TestUnsignedVarShifts {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    constexpr int kSign = (sizeof(T) * 8) - 1;
+    const auto v0 = setzero(d);
+    const auto v1 = set1(d, 1);
+    const auto vi = iota(d, 0);
+    SIMD_ALIGN T expected[d.N];
+
+    // Shifting out of right side => zero
+    ASSERT_VEC_EQ(d, v0, vi >> set1(d, 7));
+
+    // Simple left shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(i << 1);
+    }
+    ASSERT_VEC_EQ(d, expected, vi << set1(d, 1));
+
+    // Simple right shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(i >> 1);
+    }
+    ASSERT_VEC_EQ(d, expected, vi >> set1(d, 1));
+
+    // Verify truncation for left-shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = static_cast<T>((i << kSign) & ~T(0));
+    }
+    ASSERT_VEC_EQ(d, expected, vi << set1(d, kSign));
+
+    // Verify variable left shift (assumes < 32 lanes)
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(1) << i;
+    }
+    ASSERT_VEC_EQ(d, expected, v1 << vi);
+  }
+};
+
+struct TestSignedVarLeftShifts {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v1 = set1(d, 1);
+    const auto vi = iota(d, 0);
+
+    SIMD_ALIGN T expected[d.N];
+
+    // Simple left shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(i << 1);
+    }
+    ASSERT_VEC_EQ(d, expected, vi << v1);
+
+    // Shifting negative numbers left
+    constexpr T min = LimitsMin<T>();
+    const auto vn = iota(d, min);
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T((min + i) << 1);
+    }
+    ASSERT_VEC_EQ(d, expected, vn << v1);
+
+    // Differing shift counts (assumes < 32 lanes)
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(1) << i;
+    }
+    ASSERT_VEC_EQ(d, expected, v1 << vi);
+  }
+};
+
+struct TestSignedVarRightShifts {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto vi = iota(d, 0);
+    const auto vmax = set1(d, LimitsMax<T>());
+    SIMD_ALIGN T expected[d.N];
+
+    // Shifting out of right side => zero
+    ASSERT_VEC_EQ(d, v0, vi >> set1(d, 7));
+
+    // Simple right shift
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T(i >> 1);
+    }
+    ASSERT_VEC_EQ(d, expected, vi >> set1(d, 1));
+
+    // Sign extension
+    constexpr T min = LimitsMin<T>();
+    const auto vn = iota(d, min);
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = T((min + i) >> 1);
+    }
+    ASSERT_VEC_EQ(d, expected, vn >> set1(d, 1));
+
+    // Differing shift counts (assumes < 32 lanes)
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = LimitsMax<T>() >> i;
+    }
+    ASSERT_VEC_EQ(d, expected, vmax >> vi);
+  }
+};
+
+#endif
+
+SIMD_ATTR void TestShifts() {
+  // No u8.
+  Call<TestUnsignedShifts, uint16_t>();
+  Call<TestUnsignedShifts, uint32_t>();
+  Call<TestUnsignedShifts, uint64_t>();
+  // No i8.
+  Call<TestSignedShifts, int16_t>();
+  Call<TestSignedShifts, int32_t>();
+  // No i64/f32/f64.
+
+#if SIMD_TARGET_VALUE != SIMD_SSE4
+  Call<TestUnsignedVarShifts, uint32_t>();
+  Call<TestUnsignedVarShifts, uint64_t>();
+  Call<TestSignedVarLeftShifts, int32_t>();
+  Call<TestSignedVarRightShifts, int32_t>();
+  Call<TestSignedVarLeftShifts, int64_t>();
+// No i64 (right-shift).
+#endif
+}
+
+struct TestUnsignedMinMax {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto v1 = iota(d, 1);
+    const auto v2 = iota(d, 2);
+    const auto v_max = iota(d, LimitsMax<T>() - d.N + 1);
+    ASSERT_VEC_EQ(d, v1, min(v1, v2));
+    ASSERT_VEC_EQ(d, v2, max(v1, v2));
+    ASSERT_VEC_EQ(d, v0, min(v1, v0));
+    ASSERT_VEC_EQ(d, v1, max(v1, v0));
+    ASSERT_VEC_EQ(d, v1, min(v1, v_max));
+    ASSERT_VEC_EQ(d, v_max, max(v1, v_max));
+    ASSERT_VEC_EQ(d, v0, min(v0, v_max));
+    ASSERT_VEC_EQ(d, v_max, max(v0, v_max));
+  }
+};
+
+struct TestSignedMinMax {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v1 = iota(d, 1);
+    const auto v2 = iota(d, 2);
+    const auto v_neg = iota(d, -T(d.N));
+    const auto v_neg_max = iota(d, LimitsMin<T>());
+    ASSERT_VEC_EQ(d, v1, min(v1, v2));
+    ASSERT_VEC_EQ(d, v2, max(v1, v2));
+    ASSERT_VEC_EQ(d, v_neg, min(v1, v_neg));
+    ASSERT_VEC_EQ(d, v1, max(v1, v_neg));
+    ASSERT_VEC_EQ(d, v_neg_max, min(v1, v_neg_max));
+    ASSERT_VEC_EQ(d, v1, max(v1, v_neg_max));
+    ASSERT_VEC_EQ(d, v_neg_max, min(v_neg, v_neg_max));
+    ASSERT_VEC_EQ(d, v_neg, max(v_neg, v_neg_max));
+  }
+};
+
+struct TestFloatMinMax {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v1 = iota(d, 1);
+    const auto v2 = iota(d, 2);
+    const auto v_neg = iota(d, -T(d.N));
+    ASSERT_VEC_EQ(d, v1, min(v1, v2));
+    ASSERT_VEC_EQ(d, v2, max(v1, v2));
+    ASSERT_VEC_EQ(d, v_neg, min(v1, v_neg));
+    ASSERT_VEC_EQ(d, v1, max(v1, v_neg));
+  }
+};
+
+SIMD_ATTR void TestMinMax() {
+  Call<TestUnsignedMinMax, uint8_t>();
+  Call<TestUnsignedMinMax, uint16_t>();
+  Call<TestUnsignedMinMax, uint32_t>();
+  // No u64.
+  Call<TestSignedMinMax, int8_t>();
+  Call<TestSignedMinMax, int16_t>();
+  Call<TestSignedMinMax, int32_t>();
+  // No i64.
+  Call<TestFloatMinMax, float>();
+  Call<TestFloatMinMax, double>();
+}
+
+struct TestUnsignedMul {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto v1 = set1(d, T(1));
+    const auto vi = iota(d, 1);
+    const auto vj = iota(d, 3);
+    T lanes[d.N];
+    ASSERT_VEC_EQ(d, v0, v0 * v0);
+    ASSERT_VEC_EQ(d, v1, v1 * v1);
+    ASSERT_VEC_EQ(d, vi, v1 * vi);
+    ASSERT_VEC_EQ(d, vi, vi * v1);
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (1 + i) * (1 + i);
+    }
+    ASSERT_VEC_EQ(d, lanes, vi * vi);
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (1 + i) * (3 + i);
+    }
+    ASSERT_VEC_EQ(d, lanes, vi * vj);
+
+    const T max = LimitsMax<T>();
+    const auto vmax = set1(d, max);
+    ASSERT_VEC_EQ(d, vmax, vmax * v1);
+    ASSERT_VEC_EQ(d, vmax, v1 * vmax);
+
+    const size_t bits = sizeof(T) * 8;
+    const uint64_t mask = (1ull << bits) - 1;
+    const T max2 = (uint64_t(max) * max) & mask;
+    ASSERT_VEC_EQ(d, set1(d, max2), vmax * vmax);
+  }
+};
+
+struct TestSignedMul {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto v1 = set1(d, T(1));
+    const auto vi = iota(d, 1);
+    const auto vn = iota(d, -T(d.N));
+    T lanes[d.N];
+    ASSERT_VEC_EQ(d, v0, v0 * v0);
+    ASSERT_VEC_EQ(d, v1, v1 * v1);
+    ASSERT_VEC_EQ(d, vi, v1 * vi);
+    ASSERT_VEC_EQ(d, vi, vi * v1);
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (1 + i) * (1 + i);
+    }
+    ASSERT_VEC_EQ(d, lanes, vi * vi);
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (-T(d.N) + i) * (1 + i);
+    }
+    ASSERT_VEC_EQ(d, lanes, vn * vi);
+    ASSERT_VEC_EQ(d, lanes, vi * vn);
+  }
+};
+
+SIMD_ATTR void TestMul() {
+  // No u8.
+  Call<TestUnsignedMul, uint16_t>();
+  Call<TestUnsignedMul, uint32_t>();
+  // No u64,i8.
+  Call<TestSignedMul, int16_t>();
+  Call<TestSignedMul, int32_t>();
+  // No i64.
+}
+
+struct TestMulHi16 {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    SIMD_ALIGN T in_lanes[d.N];
+    SIMD_ALIGN T expected_lanes[d.N];
+    const auto vi = iota(d, 1);
+    const auto vni = iota(d, -T(d.N));
+
+    const auto v0 = setzero(d);
+    ASSERT_VEC_EQ(d, v0, ext::mul_high(v0, v0));
+    ASSERT_VEC_EQ(d, v0, ext::mul_high(v0, vi));
+    ASSERT_VEC_EQ(d, v0, ext::mul_high(vi, v0));
+
+    // Large positive squared
+    for (size_t i = 0; i < d.N; ++i) {
+      in_lanes[i] = LimitsMax<T>() >> i;
+      expected_lanes[i] = (int32_t(in_lanes[i]) * in_lanes[i]) >> 16;
+    }
+    auto v = load(d, in_lanes);
+    ASSERT_VEC_EQ(d, expected_lanes, ext::mul_high(v, v));
+
+    // Large positive * small positive
+    for (size_t i = 0; i < d.N; ++i) {
+      expected_lanes[i] = (int32_t(in_lanes[i]) * (1 + i)) >> 16;
+    }
+    ASSERT_VEC_EQ(d, expected_lanes, ext::mul_high(v, vi));
+    ASSERT_VEC_EQ(d, expected_lanes, ext::mul_high(vi, v));
+
+    // Large positive * small negative
+    for (size_t i = 0; i < d.N; ++i) {
+      expected_lanes[i] = (int32_t(in_lanes[i]) * (i - d.N)) >> 16;
+    }
+    ASSERT_VEC_EQ(d, expected_lanes, ext::mul_high(v, vni));
+    ASSERT_VEC_EQ(d, expected_lanes, ext::mul_high(vni, v));
+  }
+};
+
+template <typename T1, typename T2>
+SIMD_ATTR void TestMulEvenT() {
+  const SIMD_FULL(T1) d1;
+  const SIMD_FULL(T2) d2;  // wider type, half the lanes
+
+  const auto v0 = setzero(d1);
+  ASSERT_VEC_EQ(d2, setzero(d2), mul_even(v0, v0));
+
+  // scalar has N=1 and we write to "lane 1" below, though it isn't used by
+  // the actual mul_even.
+  SIMD_ALIGN T1 in_lanes[SIMD_MAX(d1.N, 2)];
+  SIMD_ALIGN T2 expected[d2.N];
+  for (size_t i = 0; i < d1.N; i += 2) {
+    in_lanes[i + 0] = LimitsMax<T1>() >> i;
+    in_lanes[i + 1] = 1;  // will be overwritten with upper half of result
+    expected[i / 2] = T2(in_lanes[i + 0]) * in_lanes[i + 0];
+  }
+
+  const auto v = load(d1, in_lanes);
+  ASSERT_VEC_EQ(d2, expected, mul_even(v, v));
+}
+
+SIMD_ATTR void TestMulEven() {
+  TestMulEvenT<int32_t, int64_t>();
+  TestMulEvenT<uint32_t, uint64_t>();
+}
+
+struct TestMulAdd {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto k0 = setzero(d);
+    const auto k1 = set1(d, 1);
+    const auto v1 = iota(d, 1);
+    const auto v2 = iota(d, 2);
+    T lanes[d.N];
+    ASSERT_VEC_EQ(d, k0, mul_add(k0, k0, k0));
+    ASSERT_VEC_EQ(d, v2, mul_add(k0, v1, v2));
+    ASSERT_VEC_EQ(d, v2, mul_add(v1, k0, v2));
+    ASSERT_VEC_EQ(d, k0, nmul_add(k0, k0, k0));
+    ASSERT_VEC_EQ(d, v2, nmul_add(k0, v1, v2));
+    ASSERT_VEC_EQ(d, v2, nmul_add(v1, k0, v2));
+
+    ASSERT_VEC_EQ(d, v1, fadd(k0, k1, v1));
+    ASSERT_VEC_EQ(d, v2, fadd(k0, k1, v2));
+    ASSERT_VEC_EQ(d, v1, fadd(v1, k1, k0));
+    ASSERT_VEC_EQ(d, v2, fadd(v2, k1, k0));
+
+    ASSERT_VEC_EQ(d, v2, fsub(v2, k1, k0));
+    ASSERT_VEC_EQ(d, v1, fsub(v1, k1, k0));
+    ASSERT_VEC_EQ(d, v2, fsub(k0, k1, neg(v2)));
+    ASSERT_VEC_EQ(d, v1, fsub(k0, k1, neg(v1)));
+
+    // Swapped arg order
+    ASSERT_VEC_EQ(d, v2, fnadd(k0, k1, v2));
+    ASSERT_VEC_EQ(d, v1, fnadd(k0, k1, v1));
+    ASSERT_VEC_EQ(d, v2, fnadd(neg(v2), k1, k0));
+    ASSERT_VEC_EQ(d, v1, fnadd(neg(v1), k1, k0));
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (i + 1) + (i + 2);
+    }
+    ASSERT_VEC_EQ(d, lanes, fadd(v1, k1, v2));
+    ASSERT_VEC_EQ(d, lanes, fadd(v2, k1, v1));
+    ASSERT_VEC_EQ(d, lanes, fsub(v1, k1, neg(v2)));
+    ASSERT_VEC_EQ(d, lanes, fnadd(neg(v2), k1, v1));
+    ASSERT_VEC_EQ(d, k1, fsub(v2, k1, v1));
+    ASSERT_VEC_EQ(d, k1, fnadd(v1, k1, v2));
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (i + 1) * (i + 2);
+    }
+    ASSERT_VEC_EQ(d, lanes, mul_add(v2, v1, k0));
+    ASSERT_VEC_EQ(d, lanes, mul_add(v1, v2, k0));
+    ASSERT_VEC_EQ(d, lanes, nmul_add(neg(v2), v1, k0));
+    ASSERT_VEC_EQ(d, lanes, nmul_add(v1, neg(v2), k0));
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (i + 2) * (i + 2) + (i + 1);
+    }
+    ASSERT_VEC_EQ(d, lanes, mul_add(v2, v2, v1));
+    ASSERT_VEC_EQ(d, lanes, nmul_add(neg(v2), v2, v1));
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = -T(i + 2) * (i + 2) + (1 + i);
+    }
+    ASSERT_VEC_EQ(d, lanes, nmul_add(v2, v2, v1));
+
+    ASSERT_VEC_EQ(d, k0, ext::mul_subtract(k0, k0, k0));
+    ASSERT_VEC_EQ(d, k0, ext::nmul_subtract(k0, k0, k0));
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = -T(i + 2);
+    }
+    ASSERT_VEC_EQ(d, lanes, ext::mul_subtract(k0, v1, v2));
+    ASSERT_VEC_EQ(d, lanes, ext::mul_subtract(v1, k0, v2));
+    ASSERT_VEC_EQ(d, lanes, ext::nmul_subtract(neg(k0), v1, v2));
+    ASSERT_VEC_EQ(d, lanes, ext::nmul_subtract(v1, neg(k0), v2));
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (i + 1) * (i + 2);
+    }
+    ASSERT_VEC_EQ(d, lanes, ext::mul_subtract(v1, v2, k0));
+    ASSERT_VEC_EQ(d, lanes, ext::mul_subtract(v2, v1, k0));
+    ASSERT_VEC_EQ(d, lanes, ext::nmul_subtract(neg(v1), v2, k0));
+    ASSERT_VEC_EQ(d, lanes, ext::nmul_subtract(v2, neg(v1), k0));
+
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = (i + 2) * (i + 2) - (1 + i);
+    }
+    ASSERT_VEC_EQ(d, lanes, ext::mul_subtract(v2, v2, v1));
+    ASSERT_VEC_EQ(d, lanes, ext::nmul_subtract(neg(v2), v2, v1));
+  }
+};
+
+struct TestSquareRoot {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto vi = iota(d, 0);
+    ASSERT_VEC_EQ(d, vi, sqrt(vi * vi));
+  }
+};
+
+struct TestReciprocalSquareRoot {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v = set1(d, 123.0f);
+    SIMD_ALIGN float lanes[d.N];
+    store(approximate_reciprocal_sqrt(v), d, lanes);
+    for (size_t i = 0; i < d.N; ++i) {
+      float err = lanes[i] - 0.090166f;
+      if (err < 0.0f) err = -err;
+      ASSERT_EQ(true, err < 1E-4f);
+    }
+  }
+};
+
+struct TestRound {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    // Integer positive
+    {
+      const auto v = iota(d, 4.0);
+      ASSERT_VEC_EQ(d, v, ceil(v));
+      ASSERT_VEC_EQ(d, v, floor(v));
+      ASSERT_VEC_EQ(d, v, round(v));
+      ASSERT_VEC_EQ(d, v, trunc(v));
+    }
+
+    // Integer negative
+    {
+      const auto v = iota(d, T(-32.0));
+      ASSERT_VEC_EQ(d, v, ceil(v));
+      ASSERT_VEC_EQ(d, v, floor(v));
+      ASSERT_VEC_EQ(d, v, round(v));
+      ASSERT_VEC_EQ(d, v, trunc(v));
+    }
+
+    // Huge positive
+    {
+      const auto v = set1(d, T(1E15));
+      ASSERT_VEC_EQ(d, v, ceil(v));
+      ASSERT_VEC_EQ(d, v, floor(v));
+    }
+
+    // Huge negative
+    {
+      const auto v = set1(d, T(-1E15));
+      ASSERT_VEC_EQ(d, v, ceil(v));
+      ASSERT_VEC_EQ(d, v, floor(v));
+    }
+
+    // Above positive
+    {
+      const auto v = iota(d, T(2.0001));
+      const auto v3 = iota(d, T(3));
+      const auto v2 = iota(d, T(2));
+      ASSERT_VEC_EQ(d, v3, ceil(v));
+      ASSERT_VEC_EQ(d, v2, floor(v));
+      ASSERT_VEC_EQ(d, v2, round(v));
+      ASSERT_VEC_EQ(d, v2, trunc(v));
+    }
+
+    // Below positive
+    {
+      const auto v = iota(d, T(3.9999));
+      const auto v4 = iota(d, T(4));
+      const auto v3 = iota(d, T(3));
+      ASSERT_VEC_EQ(d, v4, ceil(v));
+      ASSERT_VEC_EQ(d, v3, floor(v));
+      ASSERT_VEC_EQ(d, v4, round(v));
+      ASSERT_VEC_EQ(d, v3, trunc(v));
+    }
+
+    // Above negative
+    {
+      // WARNING: using iota => ensure negative value is low enough that
+      // even 16 lanes remain negative, otherwise trunc will behave differently
+      // for positive/negative values.
+      const auto v = iota(d, T(-19.9999));
+      const auto v3 = iota(d, T(-19));
+      const auto v4 = iota(d, T(-20));
+      ASSERT_VEC_EQ(d, v3, ceil(v));
+      ASSERT_VEC_EQ(d, v4, floor(v));
+      ASSERT_VEC_EQ(d, v4, round(v));
+      ASSERT_VEC_EQ(d, v3, trunc(v));
+    }
+
+    // Below negative
+    {
+      const auto v = iota(d, T(-18.0001));
+      const auto v2 = iota(d, T(-18));
+      const auto v3 = iota(d, T(-19));
+      ASSERT_VEC_EQ(d, v2, ceil(v));
+      ASSERT_VEC_EQ(d, v3, floor(v));
+      ASSERT_VEC_EQ(d, v2, round(v));
+      ASSERT_VEC_EQ(d, v2, trunc(v));
+    }
+  }
+};
+
+struct TestIntFromFloat {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const SIMD_FULL(float) df;
+
+    // Integer positive
+    ASSERT_VEC_EQ(d, iota(d, 4), convert_to(d, iota(df, 4.0f)));
+    ASSERT_VEC_EQ(d, iota(d, 4), nearest_int(iota(df, 4.0f)));
+
+    // Integer negative
+    ASSERT_VEC_EQ(d, iota(d, -32), convert_to(d, iota(df, -32.0f)));
+    ASSERT_VEC_EQ(d, iota(d, -32), nearest_int(iota(df, -32.0f)));
+
+    // Above positive
+    ASSERT_VEC_EQ(d, iota(d, 2), convert_to(d, iota(df, 2.001f)));
+    ASSERT_VEC_EQ(d, iota(d, 2), nearest_int(iota(df, 2.001f)));
+
+    // Below positive
+    ASSERT_VEC_EQ(d, iota(d, 3), convert_to(d, iota(df, 3.9999f)));
+    ASSERT_VEC_EQ(d, iota(d, 4), nearest_int(iota(df, 3.9999f)));
+
+    // Above negative
+    ASSERT_VEC_EQ(d, iota(d, -23), convert_to(d, iota(df, -23.9999f)));
+    ASSERT_VEC_EQ(d, iota(d, -24), nearest_int(iota(df, -23.9999f)));
+
+    // Below negative
+    ASSERT_VEC_EQ(d, iota(d, -24), convert_to(d, iota(df, -24.001f)));
+    ASSERT_VEC_EQ(d, iota(d, -24), nearest_int(iota(df, -24.001f)));
+  }
+};
+
+struct TestFloatFromInt {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const SIMD_FULL(int32_t) di;
+
+    // Integer positive
+    ASSERT_VEC_EQ(d, iota(d, 4.0f), convert_to(d, iota(di, 4)));
+
+    // Integer negative
+    ASSERT_VEC_EQ(d, iota(d, -32.0f), convert_to(d, iota(di, -32)));
+
+    // Above positive
+    ASSERT_VEC_EQ(d, iota(d, 2.0f), convert_to(d, iota(di, 2)));
+
+    // Below positive
+    ASSERT_VEC_EQ(d, iota(d, 4.0f), convert_to(d, iota(di, 4)));
+
+    // Above negative
+    ASSERT_VEC_EQ(d, iota(d, -4.0f), convert_to(d, iota(di, -4)));
+
+    // Below negative
+    ASSERT_VEC_EQ(d, iota(d, -2.0f), convert_to(d, iota(di, -2)));
+  }
+};
+
+struct TestSumsOfU8 {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    const SIMD_FULL(uint8_t) d8;
+    SIMD_ALIGN uint8_t in_bytes[d8.N];
+    uint64_t sums[d.N] = {0};
+    for (size_t i = 0; i < d8.N; ++i) {
+      const size_t group = i / 8;
+      in_bytes[i] = 2 * i + 1;
+      sums[group] += in_bytes[i];
+    }
+    const auto v = load(d8, in_bytes);
+    ASSERT_VEC_EQ(d, sums, ext::sums_of_u8x8(v));
+#endif
+  }
+};
+
+struct TestHorzSumT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    SIMD_ALIGN T in_lanes[d.N];
+    double sum = 0.0;
+    for (size_t i = 0; i < d.N; ++i) {
+      in_lanes[i] = 1u << i;
+      sum += in_lanes[i];
+    }
+    const auto v = load(d, in_lanes);
+    const auto expected = set1(d, T(sum));
+    ASSERT_VEC_EQ(d, expected, ext::sum_of_lanes(v));
+  }
+};
+
+SIMD_ATTR void TestHorzSum() {
+  // No u16.
+  Call<TestHorzSumT, uint32_t>();
+  Call<TestHorzSumT, uint64_t>();
+
+  // No i8/i16.
+  Call<TestHorzSumT, int32_t>();
+  Call<TestHorzSumT, int64_t>();
+
+  Call<TestHorzSumT, float>();
+  Call<TestHorzSumT, double>();
+}
+
+SIMD_ATTR void TestArithmetic() {
+  ForeachLaneType<TestPlusMinus>();
+  TestSaturatingArithmetic();
+
+  TestShifts();
+  TestMinMax();
+  TestAverage();
+  TestAbs();
+  TestMul();
+  Call<TestMulHi16, int16_t>();
+  TestMulEven();
+
+  ForeachFloatLaneType<TestMulAdd>();
+  ForeachFloatLaneType<TestSquareRoot>();
+  Call<TestReciprocalSquareRoot, float>();
+  ForeachFloatLaneType<TestRound>();
+  Call<TestIntFromFloat, int32_t>();
+  Call<TestFloatFromInt, float>();
+
+  Call<TestSumsOfU8, uint64_t>();
+  TestHorzSum();
+}
+
+}  // namespace arithmetic
+
+namespace compare {
+
+struct TestSignedCompare {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v2 = iota(d, 2);
+    const auto v2b = iota(d, 2);
+    const auto vn = iota(d, -T(d.N));
+    const auto yes = set1(d, static_cast<T>(-1));
+    const auto no = setzero(d);
+
+    ASSERT_VEC_EQ(d, no, v2 == vn);
+    ASSERT_VEC_EQ(d, yes, v2 == v2b);
+
+    ASSERT_VEC_EQ(d, yes, v2 > vn);
+    ASSERT_VEC_EQ(d, yes, vn < v2);
+    ASSERT_VEC_EQ(d, no, v2 < vn);
+    ASSERT_VEC_EQ(d, no, vn > v2);
+  }
+};
+
+struct TestUnsignedCompare {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v2 = iota(d, 2);
+    const auto v2b = iota(d, 2);
+    const auto v3 = iota(d, 3);
+    const auto yes = set1(d, T(~0ull));
+    const auto no = setzero(d);
+
+    ASSERT_VEC_EQ(d, no, v2 == v3);
+    ASSERT_VEC_EQ(d, yes, v2 == v2b);
+  }
+};
+
+struct TestFloatCompare {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    constexpr size_t N8 = SIMD_FULL(uint8_t)::N;
+    const auto v2 = iota(d, 2);
+    const auto v2b = iota(d, 2);
+    const auto vn = iota(d, -T(d.N));
+    const auto no = setzero(d);
+
+    ASSERT_VEC_EQ(d, no, v2 == vn);
+    ASSERT_VEC_EQ(d, no, v2 < vn);
+    ASSERT_VEC_EQ(d, no, vn > v2);
+
+    // Equality is represented as 1-bits, which is a NaN, so compare bytes.
+    uint8_t yes[N8];
+    SetBytes(0xFF, &yes);
+
+    SIMD_ALIGN T lanes[d.N];
+    store(v2 == v2b, d, lanes);
+    ASSERT_EQ(true, BytesEqual(lanes, yes, N8));
+    store(v2 > vn, d, lanes);
+    ASSERT_EQ(true, BytesEqual(lanes, yes, N8));
+    store(vn < v2, d, lanes);
+    ASSERT_EQ(true, BytesEqual(lanes, yes, N8));
+  }
+};
+
+// Returns "bits" after zeroing any upper bits that wouldn't be returned by
+// movemask for the given vector "D".
+template <class D>
+uint32_t ValidBits(D d, const uint32_t bits) {
+  const uint64_t mask = (1ull << d.N) - 1;
+  return bits & mask;
+}
+
+SIMD_ATTR void TestMovemask() {
+  const SIMD_FULL(uint8_t) d;
+  SIMD_ALIGN const uint8_t bytes[32] = {
+      0x80, 0xFF, 0x7F, 0x00, 0x01, 0x10, 0x20, 0x40, 0x80, 0x02, 0x04,
+      0x08, 0xC0, 0xC1, 0xFE, 0x0F, 0x0F, 0xFE, 0xC1, 0xC0, 0x08, 0x04,
+      0x02, 0x80, 0x40, 0x20, 0x10, 0x01, 0x00, 0x7F, 0xFF, 0x80};
+  ASSERT_EQ(ValidBits(d, 0xC08E7103), ext::movemask(load(d, bytes)));
+
+  SIMD_ALIGN const float lanes[8] = {-1.0f,  1E30f, -0.0f, 1E-30f,
+                                     1E-30f, -0.0f, 1E30f, -1.0f};
+  const SIMD_FULL(float) df;
+  ASSERT_EQ(ValidBits(df, 0xa5), ext::movemask(load(df, lanes)));
+
+  const SIMD_FULL(double) dd;
+  SIMD_ALIGN const double lanes2[4] = {1E300, -1E-300, -0.0, 1E-10};
+  ASSERT_EQ(ValidBits(dd, 6), ext::movemask(load(dd, lanes2)));
+}
+
+struct TestAllZero {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const T max = LimitsMax<T>();
+    const T min_nonzero = LimitsMin<T>() + 1;
+
+    // all lanes zero
+    auto v = setzero(d);
+    SIMD_ALIGN T lanes[d.N];
+    store(v, d, lanes);
+
+    // Set each lane to nonzero and ensure !all_zero
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes[i] = max;
+      v = load(d, lanes);
+      ASSERT_EQ(false, ext::all_zero(v));
+
+      lanes[i] = min_nonzero;
+      v = load(d, lanes);
+      ASSERT_EQ(false, ext::all_zero(v));
+
+      // Reset to all zero
+      lanes[i] = T(0);
+      v = load(d, lanes);
+      ASSERT_EQ(true, ext::all_zero(v));
+    }
+  }
+};
+
+SIMD_ATTR void TestCompare() {
+  // ForeachSignedLaneType<TestSignedCompare>();
+  ForeachUnsignedLaneType<TestUnsignedCompare>();
+  ForeachFloatLaneType<TestFloatCompare>();
+
+  TestMovemask();
+
+  ForeachUnsignedLaneType<TestAllZero>();
+  ForeachSignedLaneType<TestAllZero>();
+  // No float.
+}
+
+}  // namespace compare
+
+namespace logical {
+
+struct TestLogicalT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v0 = setzero(d);
+    const auto vi = iota(d, 0);
+
+    ASSERT_VEC_EQ(d, v0, v0 & vi);
+    ASSERT_VEC_EQ(d, v0, vi & v0);
+    ASSERT_VEC_EQ(d, vi, vi & vi);
+
+    ASSERT_VEC_EQ(d, vi, v0 | vi);
+    ASSERT_VEC_EQ(d, vi, vi | v0);
+    ASSERT_VEC_EQ(d, vi, vi | vi);
+
+    ASSERT_VEC_EQ(d, vi, v0 ^ vi);
+    ASSERT_VEC_EQ(d, vi, vi ^ v0);
+    ASSERT_VEC_EQ(d, v0, vi ^ vi);
+
+    ASSERT_VEC_EQ(d, vi, andnot(v0, vi));
+    ASSERT_VEC_EQ(d, v0, andnot(vi, v0));
+    ASSERT_VEC_EQ(d, v0, andnot(vi, vi));
+
+    auto v = vi;
+    v &= vi;
+    ASSERT_VEC_EQ(d, vi, v);
+    v &= v0;
+    ASSERT_VEC_EQ(d, v0, v);
+
+    v |= vi;
+    ASSERT_VEC_EQ(d, vi, v);
+    v |= v0;
+    ASSERT_VEC_EQ(d, vi, v);
+
+    v ^= vi;
+    ASSERT_VEC_EQ(d, v0, v);
+    v ^= v0;
+    ASSERT_VEC_EQ(d, v0, v);
+  }
+};
+
+struct TestSelect {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    RandomState rng{1234};
+    const T mask0(0);
+    const uint64_t ones = ~0ull;
+    T mask1;
+    CopyBytes<sizeof(T)>(&ones, &mask1);
+
+    SIMD_ALIGN T lanes1[d.N];
+    SIMD_ALIGN T lanes2[d.N];
+    SIMD_ALIGN T masks[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes1[i] = int32_t(Random32(&rng));
+      lanes2[i] = int32_t(Random32(&rng));
+      masks[i] = (Random32(&rng) & 1024) ? mask0 : mask1;
+    }
+
+    SIMD_ALIGN T out_lanes[d.N];
+    store(select(load(d, lanes1), load(d, lanes2), load(d, masks)), d,
+          out_lanes);
+    for (size_t i = 0; i < d.N; ++i) {
+      ASSERT_EQ((masks[i] == mask0) ? lanes1[i] : lanes2[i], out_lanes[i]);
+    }
+  }
+};
+
+struct TestSelectSign {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    RandomState rng{1234};
+
+    SIMD_ALIGN T lanes1[d.N];
+    SIMD_ALIGN T lanes2[d.N];
+    SIMD_ALIGN T masks[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      lanes1[i] = Random32(&rng);
+      lanes2[i] = Random32(&rng);
+      masks[i] = (Random32(&rng) & 1024) ? lanes1[i] : -lanes2[i];
+    }
+
+    SIMD_ALIGN T out_lanes[d.N];
+    const auto selector = condition_from_sign(load(d, masks));
+    store(select(load(d, lanes1), load(d, lanes2), selector), d, out_lanes);
+    for (size_t i = 0; i < d.N; ++i) {
+      ASSERT_EQ((masks[i] < T(0.0)) ? lanes2[i] : lanes1[i], out_lanes[i]);
+    }
+  }
+};
+
+SIMD_ATTR void TestLogical() {
+  ForeachLaneType<TestLogicalT>();
+  ForeachLaneType<TestSelect>();
+  ForeachFloatLaneType<TestSelectSign>();
+}
+
+}  // namespace logical
+
+namespace memory {
+
+struct TestLoadStore {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto hi = iota(d, 1 + d.N);
+    const auto lo = iota(d, 1);
+    SIMD_ALIGN T lanes[2 * d.N];
+    store(hi, d, lanes + d.N);
+    store(lo, d, lanes);
+
+    // Aligned load
+    const auto lo2 = load(d, lanes);
+    ASSERT_VEC_EQ(d, lo2, lo);
+
+    // Aligned store
+    SIMD_ALIGN T lanes2[2 * d.N];
+    store(lo2, d, lanes2);
+    store(hi, d, lanes2 + d.N);
+    for (size_t i = 0; i < 2 * d.N; ++i) {
+      ASSERT_EQ(lanes[i], lanes2[i]);
+    }
+
+    // Unaligned load
+    const auto vu = load_unaligned(d, lanes + 1);
+    SIMD_ALIGN T lanes3[d.N];
+    store(vu, d, lanes3);
+    for (size_t i = 0; i < d.N; ++i) {
+      ASSERT_EQ(T(i + 2), lanes3[i]);
+    }
+
+    // Unaligned store
+    store_unaligned(lo2, d, lanes2 + d.N / 2);
+    size_t i = 0;
+    for (; i < d.N / 2; ++i) {
+      ASSERT_EQ(lanes[i], lanes2[i]);
+    }
+    for (; i < 3 * d.N / 2; ++i) {
+      ASSERT_EQ(T(i - d.N / 2 + 1), lanes2[i]);
+    }
+    // Subsequent values remain unchanged.
+    for (; i < 2 * d.N; ++i) {
+      ASSERT_EQ(T(i + 1), lanes2[i]);
+    }
+  }
+};
+
+struct TestLoadDup128 {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    constexpr size_t N128 = 16 / sizeof(T);
+    alignas(16) T lanes[N128];
+    for (size_t i = 0; i < N128; ++i) {
+      lanes[i] = 1 + i;
+    }
+    const auto v = load_dup128(d, lanes);
+    SIMD_ALIGN T out[d.N];
+    store(v, d, out);
+    for (size_t i = 0; i < d.N; ++i) {
+      ASSERT_EQ(T(i % N128 + 1), out[i]);
+    }
+#endif
+  }
+};
+
+struct TestStreamT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v = iota(d, 0);
+    SIMD_ALIGN T out[d.N];
+    stream(v, d, out);
+    store_fence();
+    for (size_t i = 0; i < d.N; ++i) {
+      ASSERT_EQ(T(i), out[i]);
+    }
+  }
+};
+
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+
+template <typename Offset, int kShift>
+struct TestGatherT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    static_assert(sizeof(T) == (1 << kShift), "Incorrect kShift");
+
+    // Base points to middle; |max_offset| + sizeof(T) <= kNumBytes / 2.
+    constexpr size_t kNumBytes = 64;
+    uint8_t bytes[kNumBytes];
+    for (size_t i = 0; i < kNumBytes; ++i) {
+      bytes[i] = i + 1;
+    }
+    const uint8_t* middle = bytes + kNumBytes / 2;
+
+    // Offsets: combinations of aligned, repeated, negative.
+    SIMD_ALIGN Offset offset_lanes[SIMD_MAX(d.N, 8)] = {2,   12,  4,   4,
+                                                        -16, -16, -21, -20};
+
+    SIMD_ALIGN T expected[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      CopyBytes<sizeof(T)>(middle + offset_lanes[i], &expected[i]);
+    }
+
+    const auto offsets = load(SIMD_FULL(Offset)(), offset_lanes);
+    auto actual =
+        ext::gather_offset(d, reinterpret_cast<const T*>(middle), offsets);
+    ASSERT_VEC_EQ(d, expected, actual);
+
+    // Indices
+    SIMD_ALIGN const Offset index_lanes[SIMD_MAX(d.N, 8)] = {1, -2, 0,  1,
+                                                             3, -2, -1, 2};
+    for (size_t i = 0; i < d.N; ++i) {
+      CopyBytes<sizeof(T)>(middle + index_lanes[i] * sizeof(T), &expected[i]);
+    }
+    const auto indices = load(SIMD_FULL(Offset)(), index_lanes);
+    actual = ext::gather_index(d, reinterpret_cast<const T*>(middle), indices);
+    ASSERT_VEC_EQ(d, expected, actual);
+  }
+};
+
+template <typename Offset, int kShift>
+struct TestFloatGatherT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    static_assert(sizeof(T) == (1 << kShift), "Incorrect kShift");
+
+    constexpr size_t kNumValues = 16;
+    // Base points to middle; |max_index| < kNumValues / 2.
+    SIMD_ALIGN const T values[SIMD_MAX(d.N, kNumValues)] = {
+        T(100.0), T(110.0), T(111.0), T(128.0), T(1024.0), T(-1.0),
+        T(-2.0),  T(-3.0),  T(0.25),  T(0.5),   T(0.75),   T(1.25),
+        T(1.5),   T(1.75),  T(-0.25), T(-0.5)};
+    const T* middle = values + kNumValues / 2;
+
+    // Indices: combinations of aligned, repeated, negative.
+    SIMD_ALIGN const Offset index_lanes[SIMD_MAX(d.N, 8)] = {1, -6, 0,  1,
+                                                             3, -6, -1, 7};
+    SIMD_ALIGN T expected[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      CopyBytes<sizeof(T)>(middle + index_lanes[i], &expected[i]);
+    }
+    const auto indices = load(SIMD_FULL(Offset)(), index_lanes);
+    auto actual = ext::gather_index(d, middle, indices);
+    ASSERT_VEC_EQ(d, expected, actual);
+
+    // Offsets: same as index * sizeof(T).
+    const auto offsets = shift_left<kShift>(indices);
+    actual = ext::gather_offset(d, middle, offsets);
+    ASSERT_VEC_EQ(d, expected, actual);
+  }
+};
+
+#endif  // SIMD_TARGET_VALUE == SIMD_AVX2
+
+SIMD_ATTR void TestStream() {
+  // No u8,u16.
+  Call<TestStreamT, uint32_t>();
+  Call<TestStreamT, uint64_t>();
+  // No i8,i16.
+  Call<TestStreamT, int32_t>();
+  Call<TestStreamT, int64_t>();
+  Call<TestStreamT, float>();
+  Call<TestStreamT, double>();
+}
+
+SIMD_ATTR void TestGather() {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  // No u8,u16.
+  Call<TestGatherT<int32_t, 2>, uint32_t>();
+  Call<TestGatherT<int64_t, 3>, uint64_t>();
+  // No i8,i16.
+  Call<TestGatherT<int32_t, 2>, int32_t>();
+  Call<TestGatherT<int64_t, 3>, int64_t>();
+
+  Call<TestFloatGatherT<int32_t, 2>, float>();
+  Call<TestFloatGatherT<int64_t, 3>, double>();
+#endif
+}
+
+SIMD_ATTR void TestMemory() {
+  ForeachLaneType<TestLoadStore>();
+  ForeachLaneType<TestLoadDup128>();
+  TestStream();
+  TestGather();
+}
+
+}  // namespace memory
+
+namespace convert {
+
+struct TestCastT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    Test<uint8_t, T>();
+    Test<uint16_t, T>();
+    Test<uint32_t, T>();
+    Test<uint64_t, T>();
+    Test<int8_t, T>();
+    Test<int16_t, T>();
+    Test<int32_t, T>();
+    Test<int64_t, T>();
+    Test<float, T>();
+    Test<double, T>();
+  }
+
+  template <typename FromT, typename ToT>
+  SIMD_ATTR void Test() const {
+    const SIMD_FULL(FromT) df;
+    const SIMD_FULL(ToT) dt;
+    const auto vf = iota(df, 1);
+    const auto vt = cast_to(dt, vf);
+    static_assert(sizeof(vf) == sizeof(vt), "Cast must return same size");
+    // Must return the same bits
+    SIMD_ALIGN FromT from_lanes[df.N];
+    SIMD_ALIGN ToT to_lanes[dt.N];
+    store(vf, df, from_lanes);
+    store(vt, dt, to_lanes);
+    ASSERT_EQ(true, BytesEqual(from_lanes, to_lanes, sizeof(vf)));
+  }
+};
+
+SIMD_ATTR void TestCast() {
+#if SIMD_TARGET_VALUE == SIMD_NONE
+  // Promotion is undefined => only test same-sized types.
+  TestCastT().Test<uint8_t, uint8_t>();
+  TestCastT().Test<int8_t, int8_t>();
+  TestCastT().Test<uint8_t, int8_t>();
+  TestCastT().Test<int8_t, uint8_t>();
+
+  TestCastT().Test<uint16_t, uint16_t>();
+  TestCastT().Test<int16_t, int16_t>();
+  TestCastT().Test<uint16_t, int16_t>();
+  TestCastT().Test<int16_t, uint16_t>();
+
+  TestCastT().Test<uint32_t, uint32_t>();
+  TestCastT().Test<int32_t, int32_t>();
+  TestCastT().Test<uint32_t, int32_t>();
+  TestCastT().Test<int32_t, uint32_t>();
+  TestCastT().Test<uint32_t, float>();
+  TestCastT().Test<int32_t, float>();
+  TestCastT().Test<float, float>();
+  TestCastT().Test<float, uint32_t>();
+  TestCastT().Test<float, int32_t>();
+
+  TestCastT().Test<uint64_t, uint64_t>();
+  TestCastT().Test<int64_t, int64_t>();
+  TestCastT().Test<uint64_t, int64_t>();
+  TestCastT().Test<int64_t, uint64_t>();
+  TestCastT().Test<uint64_t, double>();
+  TestCastT().Test<int64_t, double>();
+  TestCastT().Test<double, double>();
+  TestCastT().Test<double, uint64_t>();
+  TestCastT().Test<double, int64_t>();
+#else
+  ForeachUnsignedLaneType<TestCastT>();
+  ForeachSignedLaneType<TestCastT>();
+#endif
+
+  // Float <-> u/i32
+  TestCastT().Test<uint32_t, float>();
+  TestCastT().Test<int32_t, float>();
+  TestCastT().Test<float, uint32_t>();
+  TestCastT().Test<float, int32_t>();
+}
+
+template <typename FromT, typename ToT>
+SIMD_ATTR void TestPromoteT() {
+  constexpr size_t N = SIMD_FULL(ToT)::N;
+  const SIMD_PART(FromT, N) from_d;
+  const SIMD_FULL(ToT) to_d;
+
+  const auto from = iota(from_d, 1);
+  const auto from_n1 = set1(from_d, FromT(-1));
+  const auto from_min = set1(from_d, LimitsMin<FromT>());
+  const auto from_max = set1(from_d, LimitsMax<FromT>());
+  const auto to = iota(to_d, 1);
+  const auto to_n1 = set1(to_d, ToT(FromT(-1)));
+  const auto to_min = set1(to_d, ToT(LimitsMin<FromT>()));
+  const auto to_max = set1(to_d, ToT(LimitsMax<FromT>()));
+  ASSERT_VEC_EQ(to_d, to, convert_to(to_d, from));
+  ASSERT_VEC_EQ(to_d, to_n1, convert_to(to_d, from_n1));
+  ASSERT_VEC_EQ(to_d, to_min, convert_to(to_d, from_min));
+  ASSERT_VEC_EQ(to_d, to_max, convert_to(to_d, from_max));
+}
+
+template <typename FromT, typename ToT>
+SIMD_ATTR void TestDemoteT() {
+  constexpr size_t N = SIMD_FULL(FromT)::N;
+  const SIMD_FULL(FromT) from_d;
+  const SIMD_PART(ToT, N) to_d;
+
+  const auto from = iota(from_d, 1);
+  const auto from_n1 = set1(from_d, FromT(ToT(-1)));
+  const auto from_min = set1(from_d, FromT(LimitsMin<ToT>()));
+  const auto from_max = set1(from_d, FromT(LimitsMax<ToT>()));
+  const auto to = iota(to_d, 1);
+  const auto to_n1 = set1(to_d, ToT(-1));
+  const auto to_min = set1(to_d, LimitsMin<ToT>());
+  const auto to_max = set1(to_d, LimitsMax<ToT>());
+  ASSERT_VEC_EQ(to_d, to, convert_to(to_d, from));
+  ASSERT_VEC_EQ(to_d, to_n1, convert_to(to_d, from_n1));
+  ASSERT_VEC_EQ(to_d, to_min, convert_to(to_d, from_min));
+  ASSERT_VEC_EQ(to_d, to_max, convert_to(to_d, from_max));
+}
+
+template <typename FromT, typename ToT>
+SIMD_ATTR void TestDupPromoteT() {
+  constexpr size_t N = SIMD_FULL(ToT)::N;
+  const SIMD_PART(FromT, N) from_d;
+  const SIMD_FULL(ToT) to_d;
+
+  const auto from = iota(from_d, 1);
+  const auto from_n1 = set1(from_d, FromT(-1));
+  const auto from_min = set1(from_d, LimitsMin<FromT>());
+  const auto from_max = set1(from_d, LimitsMax<FromT>());
+  const auto to = iota(to_d, 1);
+  const auto to_n1 = set1(to_d, ToT(FromT(-1)));
+  const auto to_min = set1(to_d, ToT(LimitsMin<FromT>()));
+  const auto to_max = set1(to_d, ToT(LimitsMax<FromT>()));
+  ASSERT_VEC_EQ(to_d, to, convert_to(to_d, from));
+  ASSERT_VEC_EQ(to_d, to_n1, convert_to(to_d, from_n1));
+  ASSERT_VEC_EQ(to_d, to_min, convert_to(to_d, from_min));
+  ASSERT_VEC_EQ(to_d, to_max, convert_to(to_d, from_max));
+}
+
+SIMD_ATTR void TestConvert() {
+  TestCast();
+
+  const SIMD_FULL(uint8_t) d8;
+  const SIMD_FULL(uint32_t) d32;
+  SIMD_ALIGN uint8_t lanes8[d8.N];
+  store(iota(d8, 0), d8, lanes8);
+  ASSERT_VEC_EQ(d32, iota(d32, 0), u32_from_u8(load_dup128(d8, lanes8)));
+  store(iota(d8, 0x7F), d8, lanes8);
+  ASSERT_VEC_EQ(d32, iota(d32, 0x7F), u32_from_u8(load_dup128(d8, lanes8)));
+  const SIMD_PART(uint8_t, d32.N) p8;
+  ASSERT_VEC_EQ(p8, iota(p8, 0), u8_from_u32(iota(d32, 0)));
+  ASSERT_VEC_EQ(p8, iota(p8, 0x7F), u8_from_u32(iota(d32, 0x7F)));
+
+  // Promote: no u64,i64
+  TestPromoteT<float, double>();
+  TestPromoteT<uint8_t, int16_t>();
+  TestPromoteT<uint8_t, int32_t>();
+  TestPromoteT<uint16_t, int32_t>();
+  TestPromoteT<int8_t, int16_t>();
+  TestPromoteT<int8_t, int32_t>();
+  TestPromoteT<int16_t, int32_t>();
+  TestPromoteT<uint32_t, uint64_t>();
+  TestPromoteT<int32_t, int64_t>();
+
+  // Demote
+  TestDemoteT<int16_t, int8_t>();
+  TestDemoteT<int32_t, int8_t>();
+  TestDemoteT<int32_t, int16_t>();
+  TestDemoteT<int16_t, uint8_t>();
+  TestDemoteT<int32_t, uint8_t>();
+  TestDemoteT<int32_t, uint16_t>();
+
+  TestDupPromoteT<uint8_t, uint32_t>();
+}
+
+}  // namespace convert
+
+namespace swizzle {
+
+struct TestShiftBytesT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    const SIMD_FULL(uint8_t) d8;
+
+    // Zero remains zero
+    const auto v0 = setzero(d);
+    ASSERT_VEC_EQ(d, v0, shift_left_bytes<1>(v0));
+    ASSERT_VEC_EQ(d, v0, shift_right_bytes<1>(v0));
+
+    // Zero after shifting out the high/low byte
+    SIMD_ALIGN uint8_t bytes[d8.N] = {0};
+    bytes[d8.N - 1] = 0x7F;
+    const auto vhi = cast_to(d, load(d8, bytes));
+    bytes[d8.N - 1] = 0;
+    bytes[0] = 0x7F;
+    const auto vlo = cast_to(d, load(d8, bytes));
+    ASSERT_EQ(true, ext::all_zero(shift_left_bytes<1>(vhi)));
+    ASSERT_EQ(true, ext::all_zero(shift_right_bytes<1>(vlo)));
+
+    SIMD_ALIGN T in[d.N];
+    const uint8_t* in_bytes = reinterpret_cast<const uint8_t*>(in);
+    const auto v = cast_to(d, iota(d8, 1));
+    store(v, d, in);
+
+    // Shifting by one lane is the same as shifting #bytes
+    ASSERT_VEC_EQ(d, shift_left_lanes<1>(v), shift_left_bytes<sizeof(T)>(v));
+    ASSERT_VEC_EQ(d, shift_right_lanes<1>(v), shift_right_bytes<sizeof(T)>(v));
+    // Two lanes
+    ASSERT_VEC_EQ(d, shift_left_lanes<2>(v),
+                  shift_left_bytes<2 * sizeof(T)>(v));
+    ASSERT_VEC_EQ(d, shift_right_lanes<2>(v),
+                  shift_right_bytes<2 * sizeof(T)>(v));
+
+    SIMD_ALIGN T shifted[d.N];
+    const uint8_t* shifted_bytes = reinterpret_cast<const uint8_t*>(shifted);
+
+    const size_t kBlockSize = SIMD_MIN(d8.N, 16);
+    store(shift_left_bytes<1>(v), d, shifted);
+    for (size_t block = 0; block < d8.N; block += kBlockSize) {
+      ASSERT_EQ(uint8_t(0), shifted_bytes[block]);
+      ASSERT_EQ(true, BytesEqual(in_bytes + block, shifted_bytes + block + 1,
+                                 kBlockSize - 1));
+    }
+
+    store(shift_right_bytes<1>(v), d, shifted);
+    for (size_t block = 0; block < d8.N; block += kBlockSize) {
+      ASSERT_EQ(uint8_t(0), shifted_bytes[block + kBlockSize - 1]);
+      ASSERT_EQ(true, BytesEqual(in_bytes + block + 1, shifted_bytes + block,
+                                 kBlockSize - 1));
+    }
+#endif
+  }
+};
+
+SIMD_ATTR void TestShiftBytes() {
+  ForeachUnsignedLaneType<TestShiftBytesT>();
+  ForeachSignedLaneType<TestShiftBytesT>();
+  // No float.
+}
+
+template <typename T, int kLane>
+struct TestBroadcastR {
+  SIMD_ATTR void operator()() const {
+    const SIMD_FULL(T) d;
+    SIMD_ALIGN T in_lanes[d.N] = {0};
+    constexpr size_t kVecN = SIMD_FULL(T)::N;
+    constexpr size_t kBlockN = SIMD_MIN(kVecN * sizeof(T), 16) / sizeof(T);
+    // Need to set within each 128-bit block
+    for (size_t block = 0; block < d.N; block += kBlockN) {
+      in_lanes[block + kLane] = block + 1;
+    }
+    const auto in = load(d, in_lanes);
+    SIMD_ALIGN T out_lanes[d.N];
+    store(broadcast<kLane>(in), d, out_lanes);
+    for (size_t block = 0; block < d.N; block += kBlockN) {
+      for (size_t i = 0; i < kBlockN; ++i) {
+        ASSERT_EQ(T(block + 1), out_lanes[block + i]);
+      }
+    }
+
+    TestBroadcastR<T, kLane - 1>()();
+  }
+};
+
+template <typename T>
+struct TestBroadcastR<T, -1> {
+  void operator()() const {}
+};
+
+template <typename T>
+SIMD_ATTR void TestBroadcastT() {
+  constexpr size_t kVecN = SIMD_FULL(T)::N;
+  TestBroadcastR<T, SIMD_MIN(kVecN, 16 / sizeof(T)) - 1>()();
+}
+
+SIMD_ATTR void TestBroadcast() {
+  // No u8.
+  TestBroadcastT<uint16_t>();
+  TestBroadcastT<uint32_t>();
+  TestBroadcastT<uint64_t>();
+  // No i8.
+  TestBroadcastT<int16_t>();
+  TestBroadcastT<int64_t>();
+  TestBroadcastT<float>();
+  TestBroadcastT<double>();
+}
+
+#if SIMD_TARGET_VALUE != SIMD_NONE
+
+struct TestPermuteT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+    // Test one specific permutation with repeated and cross-block indices.
+    SIMD_ALIGN int32_t idx[d.N] = {1, 7, 2, 2, 4, 1, 3, 6};
+    const auto v = iota(d, 1);
+    SIMD_ALIGN T expected_lanes[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      expected_lanes[i] = idx[i] + 1;  // == v[idx[i]]
+    }
+
+    const auto opaque = set_table_indices(d, idx);
+    const auto actual = table_lookup_lanes(v, opaque);
+    ASSERT_VEC_EQ(d, expected_lanes, actual);
+#else
+    // Non-AVX2: test all possible permutations.
+    SIMD_ALIGN int32_t idx[d.N];
+    const auto v = iota(d, 1);
+    SIMD_ALIGN T expected_lanes[d.N];
+
+    const int32_t N = static_cast<int32_t>(d.N);
+    for (int32_t i0 = 0; i0 < N; ++i0) {
+      idx[0] = i0;
+      for (int32_t i1 = 0; i1 < N; ++i1) {
+        idx[1] = i1;
+        for (int32_t i2 = 0; i2 < N; ++i2) {
+          idx[2] = i2;
+          for (int32_t i3 = 0; i3 < N; ++i3) {
+            idx[3] = i3;
+
+            for (size_t i = 0; i < d.N; ++i) {
+              expected_lanes[i] = idx[i] + 1;  // == v[idx[i]]
+            }
+
+            const auto opaque = set_table_indices(d, idx);
+            const auto actual = table_lookup_lanes(v, opaque);
+            ASSERT_VEC_EQ(d, expected_lanes, actual);
+          }
+        }
+      }
+    }
+#endif
+  }
+};
+
+#endif
+
+SIMD_ATTR void TestPermute() {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+  // Only uif32.
+  Call<TestPermuteT, uint32_t>();
+  Call<TestPermuteT, int32_t>();
+  Call<TestPermuteT, float>();
+#endif
+}
+
+struct TestInterleave {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+// Not supported by scalar.h: zip(f32, f32) would need to return f32x2.
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    SIMD_ALIGN T even_lanes[d.N];
+    SIMD_ALIGN T odd_lanes[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      even_lanes[i] = 2 * i + 0;
+      odd_lanes[i] = 2 * i + 1;
+    }
+    const auto even = load(d, even_lanes);
+    const auto odd = load(d, odd_lanes);
+
+    SIMD_ALIGN T lo_lanes[d.N];
+    SIMD_ALIGN T hi_lanes[d.N];
+    store(interleave_lo(even, odd), d, lo_lanes);
+    store(interleave_hi(even, odd), d, hi_lanes);
+
+    constexpr size_t kBlockN = 16 / sizeof(T);
+    for (size_t i = 0; i < d.N; ++i) {
+      const size_t block = i / kBlockN;
+      const size_t lo = (i % kBlockN) + block * 2 * kBlockN;
+      ASSERT_EQ(T(lo), lo_lanes[i]);
+      ASSERT_EQ(T(lo + kBlockN), hi_lanes[i]);
+    }
+#endif
+  }
+};
+
+template <typename T, typename WideT>
+struct TestZipT {
+  SIMD_ATTR void operator()() const {
+    const SIMD_FULL(T) d;
+    const SIMD_FULL(WideT) dw;
+    SIMD_ALIGN T even_lanes[d.N];
+    SIMD_ALIGN T odd_lanes[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      even_lanes[i] = 2 * i + 0;
+      odd_lanes[i] = 2 * i + 1;
+    }
+    const auto even = load(d, even_lanes);
+    const auto odd = load(d, odd_lanes);
+
+    SIMD_ALIGN WideT lo_lanes[dw.N];
+    SIMD_ALIGN WideT hi_lanes[dw.N];
+    store(zip_lo(even, odd), dw, lo_lanes);
+    store(zip_hi(even, odd), dw, hi_lanes);
+
+    constexpr size_t kBlockN = 16 / sizeof(WideT);
+    for (size_t i = 0; i < dw.N; ++i) {
+      const size_t block = i / kBlockN;
+      const size_t lo = (i % kBlockN) + block * 2 * kBlockN;
+      const size_t bits = sizeof(T) * 8;
+      const size_t expected_lo = ((lo + 1) << bits) + lo;
+      const size_t expected_hi = ((lo + kBlockN + 1) << bits) + lo + kBlockN;
+      ASSERT_EQ(T(expected_lo), lo_lanes[i]);
+      ASSERT_EQ(T(expected_hi), hi_lanes[i]);
+    }
+  }
+};
+
+SIMD_ATTR void TestZip() {
+  TestZipT<uint8_t, uint16_t>();
+  TestZipT<uint16_t, uint32_t>();
+  TestZipT<uint32_t, uint64_t>();
+  // No 64-bit nor float.
+  TestZipT<int8_t, int16_t>();
+  TestZipT<int16_t, int32_t>();
+  TestZipT<int32_t, int64_t>();
+}
+
+struct TestShuffleT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+// Not supported by scalar.h (its vector size is always less than 16 bytes)
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    RandomState rng{1234};
+    const SIMD_FULL(uint8_t) d8;
+    constexpr size_t N8 = SIMD_FULL(uint8_t)::N;
+    SIMD_ALIGN uint8_t in_bytes[N8];
+    for (size_t i = 0; i < N8; ++i) {
+      in_bytes[i] = Random32(&rng) & 0xFF;
+    }
+    const auto in = load(d8, in_bytes);
+    SIMD_ALIGN const uint8_t index_bytes[32] = {
+        // Same index as source, multiple outputs from same input,
+        // unused input (9), ascending/descending and nonconsecutive neighbors.
+        0,  2,  1, 2, 15, 12, 13, 14, 6,  7,  8,  5,  4, 3, 10, 11,
+        11, 10, 3, 4, 5,  8,  7,  6,  14, 13, 12, 15, 2, 1, 2,  0};
+    const auto indices = load(d8, index_bytes);
+    SIMD_ALIGN T out_lanes[d.N];
+    store(table_lookup_bytes(cast_to(d, in), indices), d, out_lanes);
+    const uint8_t* out_bytes = reinterpret_cast<const uint8_t*>(out_lanes);
+
+    for (size_t block = 0; block < N8; block += 16) {
+      for (size_t i = 0; i < 16; ++i) {
+        const uint8_t expected = in_bytes[block + index_bytes[block + i]];
+        ASSERT_EQ(expected, out_bytes[block + i]);
+      }
+    }
+#endif
+  }
+};
+
+SIMD_ATTR void TestShuffle() {
+  ForeachUnsignedLaneType<TestShuffleT>();
+  ForeachSignedLaneType<TestShuffleT>();
+  // No float.
+}
+
+template <typename T, class D, int kBytes>
+struct TestExtractR {
+  SIMD_ATTR void operator()() const {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    const D d;
+    const SIMD_FULL(uint8_t) d8;
+    const auto lo = cast_to(d, iota(d8, 1));
+    const auto hi = cast_to(d, iota(d8, 1 + d8.N));
+
+    SIMD_ALIGN T lanes[D::N];
+    store(combine_shift_right_bytes<kBytes>(hi, lo), d, lanes);
+    const uint8_t* bytes = reinterpret_cast<const uint8_t*>(lanes);
+
+    const size_t kBlockSize = 16;
+    for (size_t i = 0; i < d8.N; ++i) {
+      const size_t block = i / kBlockSize;
+      const size_t lane = i % kBlockSize;
+      const size_t first_lo = block * kBlockSize;
+      const size_t idx = lane + kBytes;
+      const size_t offset = (idx < kBlockSize) ? 0 : d8.N - kBlockSize;
+      const bool at_end = idx >= 2 * kBlockSize;
+      const uint8_t expected = at_end ? 0 : (first_lo + idx + 1 + offset);
+      ASSERT_EQ(expected, bytes[i]);
+    }
+
+    TestExtractR<T, D, kBytes - 1>()();
+#endif
+  }
+};
+
+template <typename T, class D>
+struct TestExtractR<T, D, 0> {
+  SIMD_ATTR void operator()() const {}
+};
+
+struct TestExtractT {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    TestExtractR<T, D, 15>()();
+  }
+};
+
+SIMD_ATTR void TestExtract() {
+  ForeachUnsignedLaneType<TestExtractT>();
+  ForeachSignedLaneType<TestExtractT>();
+  // No float.
+}
+
+#if SIMD_TARGET_VALUE != SIMD_NONE
+
+template <class D, class V>
+SIMD_ATTR void VerifyLanes32(D d, V v, const int i3, const int i2, const int i1,
+                             const int i0) {
+  using T = typename D::T;
+  SIMD_ALIGN T lanes[d.N];
+  store(v, d, lanes);
+  constexpr size_t kBlockN = 16 / sizeof(T);
+  for (size_t block = 0; block < d.N; block += kBlockN) {
+    ASSERT_EQ(T(block + i3), lanes[block + 3]);
+    ASSERT_EQ(T(block + i2), lanes[block + 2]);
+    ASSERT_EQ(T(block + i1), lanes[block + 1]);
+    ASSERT_EQ(T(block + i0), lanes[block + 0]);
+  }
+}
+
+template <class D, class V>
+SIMD_ATTR void VerifyLanes64(D d, V v, const int i1, const int i0) {
+  using T = typename D::T;
+  SIMD_ALIGN T lanes[d.N];
+  store(v, d, lanes);
+  constexpr size_t kBlockN = 16 / sizeof(T);
+  for (size_t block = 0; block < d.N; block += kBlockN) {
+    ASSERT_EQ(T(block + i1), lanes[block + 1]);
+    ASSERT_EQ(T(block + i0), lanes[block + 0]);
+  }
+}
+
+struct TestSpecialShuffle32 {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v = iota(d, 0);
+    VerifyLanes32(d, shuffle_1032(v), 1, 0, 3, 2);
+    VerifyLanes32(d, shuffle_0321(v), 0, 3, 2, 1);
+    VerifyLanes32(d, shuffle_2103(v), 2, 1, 0, 3);
+    VerifyLanes32(d, shuffle_0123(v), 0, 1, 2, 3);
+  }
+};
+
+struct TestSpecialShuffle64 {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+    const auto v = iota(d, 0);
+    VerifyLanes64(d, shuffle_01(v), 0, 1);
+  }
+};
+
+#endif
+
+SIMD_ATTR void TestSpecialShuffles() {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+  Call<TestSpecialShuffle32, int32_t>();
+  Call<TestSpecialShuffle64, int64_t>();
+  Call<TestSpecialShuffle32, float>();
+  Call<TestSpecialShuffle64, double>();
+#endif
+}
+
+struct TestConcatHalves {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    // Construct inputs such that interleaved halves == iota.
+    const auto expected = iota(d, 1);
+
+    SIMD_ALIGN T lo[d.N];
+    SIMD_ALIGN T hi[d.N];
+    size_t i;
+    for (i = 0; i < d.N / 2; ++i) {
+      lo[i] = 1 + i;
+      hi[i] = lo[i] + d.N / 2;
+    }
+    for (; i < d.N; ++i) {
+      lo[i] = hi[i] = 0;
+    }
+    ASSERT_VEC_EQ(d, expected, concat_lo_lo(load(d, hi), load(d, lo)));
+
+    // Same for high blocks.
+    for (i = 0; i < d.N / 2; ++i) {
+      lo[i] = hi[i] = 0;
+    }
+    for (; i < d.N; ++i) {
+      lo[i] = 1 + i - d.N / 2;
+      hi[i] = lo[i] + d.N / 2;
+    }
+    ASSERT_VEC_EQ(d, expected, concat_hi_hi(load(d, hi), load(d, lo)));
+#endif
+  }
+};
+
+struct TestConcatLoHi {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    // Middle part of iota(1) == iota(1 + d.N / 2).
+    const auto lo = iota(d, 1);
+    const auto hi = iota(d, 1 + d.N);
+    ASSERT_VEC_EQ(d, iota(d, 1 + d.N / 2), concat_lo_hi(hi, lo));
+#endif
+  }
+};
+
+struct TestConcatHiLo {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    const auto lo = iota(d, 1);
+    const auto hi = iota(d, 1 + d.N);
+    T expected[d.N];
+    size_t i = 0;
+    for (; i < d.N / 2; ++i) {
+      expected[i] = 1 + i;
+    }
+    for (; i < d.N; ++i) {
+      expected[i] = 1 + i + d.N;
+    }
+    ASSERT_VEC_EQ(d, expected, concat_hi_lo(hi, lo));
+#endif
+  }
+};
+
+struct TestOddEven {
+  template <typename T, class D>
+  SIMD_ATTR void operator()(T, D d) const {
+#if SIMD_TARGET_VALUE != SIMD_NONE
+    const auto even = iota(d, 1);
+    const auto odd = iota(d, 1 + d.N);
+    T expected[d.N];
+    for (size_t i = 0; i < d.N; ++i) {
+      expected[i] = 1 + i + ((i & 1) ? d.N : 0);
+    }
+    ASSERT_VEC_EQ(d, expected, odd_even(odd, even));
+#endif
+  }
+};
+
+SIMD_ATTR void TestSwizzle() {
+  TestShiftBytes();
+  TestBroadcast();
+  ForeachLaneType<TestInterleave>();
+  TestPermute();
+  TestZip();
+  TestShuffle();
+  TestExtract();
+  TestSpecialShuffles();
+  ForeachLaneType<TestConcatHalves>();
+  ForeachLaneType<TestConcatLoHi>();
+  ForeachLaneType<TestConcatHiLo>();
+  ForeachLaneType<TestOddEven>();
+}
+
+}  // namespace swizzle
+
+SIMD_ATTR SIMD_NOINLINE void RunTests() {
+  examples::TestExamples();
+  basic::TestBasic();
+  arithmetic::TestArithmetic();
+  compare::TestCompare();
+  logical::TestLogical();
+  memory::TestMemory();
+  convert::TestConvert();
+  swizzle::TestSwizzle();
+}
+
+}  // namespace
+}  // namespace SIMD_NAMESPACE
+
+// Instantiate for the current target.
+template <>
+void SimdTest::operator()<SIMD_TARGET>() {
+  SIMD_NAMESPACE::RunTests();
+}
+
+}  // namespace pik
+
+#endif  // #ifdef SIMD_ATTR_IMPL
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/targets.cc b/codec/L2/demos/pikEnc/host/pik/simd/targets.cc
new file mode 100755
index 0000000000..0a1b5cde22
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/targets.cc
@@ -0,0 +1,165 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/simd/targets.h"
+
+#include <stdint.h>
+#include <atomic>
+
+#if SIMD_ARCH == SIMD_ARCH_X86
+#include <xmmintrin.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#else
+#include <cpuid.h>
+#endif
+#endif
+
+namespace pik {
+
+namespace {
+
+bool IsBitSet(const uint32_t reg, const int index) {
+  return (reg & (1U << index)) != 0;
+}
+
+#if SIMD_ARCH == SIMD_ARCH_X86
+
+// Calls CPUID instruction with eax=level and ecx=count and returns the result
+// in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd).
+void Cpuid(const uint32_t level, const uint32_t count,
+           uint32_t* SIMD_RESTRICT abcd) {
+#ifdef _MSC_VER
+  int regs[4];
+  __cpuidex(regs, level, count);
+  for (int i = 0; i < 4; ++i) {
+    abcd[i] = regs[i];
+  }
+#else
+  uint32_t a, b, c, d;
+  __cpuid_count(level, count, a, b, c, d);
+  abcd[0] = a;
+  abcd[1] = b;
+  abcd[2] = c;
+  abcd[3] = d;
+#endif
+}
+
+// Returns the lower 32 bits of extended control register 0.
+// Requires CPU support for "OSXSAVE" (see below).
+uint32_t ReadXCR0() {
+#ifdef _MSC_VER
+  return static_cast<uint32_t>(_xgetbv(0));
+#else
+  uint32_t xcr0, xcr0_high;
+  const uint32_t index = 0;
+  asm volatile(".byte 0x0F, 0x01, 0xD0"
+               : "=a"(xcr0), "=d"(xcr0_high)
+               : "c"(index));
+  return xcr0;
+#endif
+}
+
+#endif  // SIMD_ARCH_X86
+
+// Not function-local => no compiler-generated locking.
+std::atomic<int> supported_{-1};  // Not yet initialized
+
+// Bits indicating which instruction set extensions are supported.
+enum {
+  kSSE = 1 << 0,
+  kSSE2 = 1 << 1,
+  kSSE3 = 1 << 2,
+  kSSSE3 = 1 << 3,
+  kSSE41 = 1 << 4,
+  kSSE42 = 1 << 5,
+  kAVX = 1 << 6,
+  kAVX2 = 1 << 7,
+  kFMA = 1 << 8,
+  kLZCNT = 1 << 9,
+  kBMI = 1 << 10,
+  kBMI2 = 1 << 11,
+
+  kGroupAVX2 = kAVX | kAVX2 | kFMA | kLZCNT | kBMI | kBMI2,
+  kGroupSSE4 = kSSE | kSSE2 | kSSE3 | kSSSE3 | kSSE41 | kSSE42
+};
+
+}  // namespace
+
+TargetBitfield::TargetBitfield() {
+  bits_ = supported_.load(std::memory_order_acquire);
+  // Already initialized?
+  if (SIMD_LIKELY(bits_ != -1)) {
+    return;
+  }
+
+  bits_ = SIMD_NONE;
+
+#if SIMD_ARCH == SIMD_ARCH_X86
+  uint32_t flags = 0;
+  uint32_t abcd[4];
+
+  Cpuid(0, 0, abcd);
+  const uint32_t max_level = abcd[0];
+
+  // Standard feature flags
+  Cpuid(1, 0, abcd);
+  flags |= IsBitSet(abcd[3], 25) ? kSSE : 0;
+  flags |= IsBitSet(abcd[3], 26) ? kSSE2 : 0;
+  flags |= IsBitSet(abcd[2], 0) ? kSSE3 : 0;
+  flags |= IsBitSet(abcd[2], 9) ? kSSSE3 : 0;
+  flags |= IsBitSet(abcd[2], 19) ? kSSE41 : 0;
+  flags |= IsBitSet(abcd[2], 20) ? kSSE42 : 0;
+  flags |= IsBitSet(abcd[2], 12) ? kFMA : 0;
+  flags |= IsBitSet(abcd[2], 28) ? kAVX : 0;
+  const bool has_osxsave = IsBitSet(abcd[2], 27);
+
+  // Extended feature flags
+  Cpuid(0x80000001U, 0, abcd);
+  flags |= IsBitSet(abcd[2], 5) ? kLZCNT : 0;
+
+  // Extended features
+  if (max_level >= 7) {
+    Cpuid(7, 0, abcd);
+    flags |= IsBitSet(abcd[1], 3) ? kBMI : 0;
+    flags |= IsBitSet(abcd[1], 5) ? kAVX2 : 0;
+    flags |= IsBitSet(abcd[1], 8) ? kBMI2 : 0;
+  }
+
+  // Verify OS support for XSAVE, without which XMM/YMM registers are not
+  // preserved across context switches and are not safe to use.
+  if (has_osxsave) {
+    const uint32_t xcr0 = ReadXCR0();
+    // XMM
+    if (!IsBitSet(xcr0, 1)) {
+      flags &= ~(kSSE | kSSE2 | kSSE3 | kSSSE3 | kSSE41 | kSSE42 | kAVX |
+                 kAVX2 | kFMA);
+    }
+    // YMM
+    if (!IsBitSet(xcr0, 2)) {
+      flags &= ~(kAVX | kAVX2);
+    }
+  }
+
+  // Set target bit(s) if all their group's flags are all set.
+  if ((flags & kGroupAVX2) == kGroupAVX2) {
+    bits_ |= SIMD_AVX2;
+  }
+  if ((flags & kGroupSSE4) == kGroupSSE4) {
+    bits_ |= SIMD_SSE4;
+  }
+#elif SIMD_ARCH == SIMD_ARCH_ARM
+  bits_ |= SIMD_ARM8;
+#endif
+
+  // Don't report targets that aren't enabled, otherwise foreach-target loops
+  // will not terminate.
+  bits_ &= SIMD_ENABLE;
+
+  supported_.store(bits_, std::memory_order_release);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/targets.h b/codec/L2/demos/pikEnc/host/pik/simd/targets.h
new file mode 100755
index 0000000000..03abb8f7cc
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/targets.h
@@ -0,0 +1,251 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_TARGETS_H_
+#define PIK_SIMD_TARGETS_H_
+
+// Definitions of the supported targets (= instruction sets).
+
+#include <stddef.h>
+#include <utility>  // std::forward
+#include "pik/simd/arch.h"
+#include "pik/simd/compiler_specific.h"  // SIMD_TARGET_ATTR
+
+namespace pik {
+
+// The SIMD_ENABLE macro expands to a bitfield of one or more targets:
+#define SIMD_NONE 0
+#define SIMD_AVX2 2
+#define SIMD_SSE4 4
+#define SIMD_AVX512 16
+#define SIMD_PPC8 1  // v2.07 or 3
+#define SIMD_ARM8 8
+
+#define SIMD_ENABLE SIMD_NONE
+
+// "Enabling" a target leads to:
+// - including a header that defines its vector type and specializes VecT;
+// - an additional SIMD_TARGET redefinition and another #include of a
+//   target-independent source file from foreach_target, to generate a
+//   specialization of Functor::operator()<SIMD_TARGET>;
+// - a conditional call to operator()<SIMD_TARGET> in Dispatch().
+//
+// To entirely disable an instruction set (e.g. if not supported by the
+// compiler), comment it out below. Specifying this in source code simplifies
+// the build system and avoids needing custom compiler options.
+#ifndef SIMD_ENABLE
+#if SIMD_ARCH == SIMD_ARCH_X86
+#define SIMD_ENABLE (SIMD_SSE4 | SIMD_AVX2)
+#elif SIMD_ARCH == SIMD_ARCH_PPC
+#define SIMD_ENABLE SIMD_PPC8
+#elif SIMD_ARCH == SIMD_ARCH_ARM
+#define SIMD_ENABLE SIMD_ARM8
+#error "Unsupported platform"
+#endif  // #if SIMD_ARCH
+#endif  // #ifndef SIMD_ENABLE
+
+// Sets SIMD_TARGET to the 'best' target in SIMD_ENABLE. This is only useful for
+// single-target code; for runtime dispatch, use foreach_target.h to generate
+// specializations for all enabled targets.
+#if SIMD_ENABLE & SIMD_AVX2
+#define SIMD_TARGET AVX2
+#elif SIMD_ENABLE & SIMD_SSE4
+#define SIMD_TARGET SSE4
+#elif SIMD_ENABLE & SIMD_PPC8
+#define SIMD_TARGET PPC8
+#elif SIMD_ENABLE & SIMD_ARM8
+#define SIMD_TARGET ARM8
+#else
+#define SIMD_TARGET NONE
+#endif
+
+// SIMD_TARGET serves two purposes: specializing functors and selecting the
+// definition of other macros (e.g. SIMD_ATTR). For the former, we use structs
+// instead of SIMD_SSE4=4 so that the mangled names are easier to understand.
+// The latter requires that struct names match the macro name without the SIMD_
+// prefix, e.g. SIMD_TARGET=SSE4.
+struct NONE {
+  static constexpr int value = SIMD_NONE;
+  template <typename T>
+  static constexpr size_t NumLanes() {
+    return 1;
+  }
+};
+#define SIMD_ATTR_NONE
+
+#if SIMD_ENABLE & SIMD_SSE4
+struct SSE4 {
+  static constexpr int value = SIMD_SSE4;
+  template <typename T>
+  static constexpr size_t NumLanes() {
+    return 16 / sizeof(T);
+  }
+};
+#define SIMD_ATTR_SSE4 SIMD_TARGET_ATTR("sse4.1")
+#endif
+
+#if SIMD_ENABLE & SIMD_AVX2
+struct AVX2 {
+  static constexpr int value = SIMD_AVX2;
+  template <typename T>
+  static constexpr size_t NumLanes() {
+    return 32 / sizeof(T);
+  }
+};
+#define SIMD_ATTR_AVX2 SIMD_TARGET_ATTR("avx,avx2,fma")
+#endif
+
+#if SIMD_ENABLE & SIMD_AVX512
+struct AVX512 {
+  static constexpr int value = SIMD_AVX512;
+  template <typename T>
+  static constexpr size_t NumLanes() {
+    return 64 / sizeof(T);
+  }
+};
+#endif
+
+#if SIMD_ENABLE & SIMD_PPC8
+struct PPC8 {
+  static constexpr int value = SIMD_PPC8;
+  template <typename T>
+  static constexpr size_t NumLanes() {
+    return 16 / sizeof(T);
+  }
+};
+#endif
+
+#if SIMD_ENABLE & SIMD_ARM8
+struct ARM8 {
+  static constexpr int value = SIMD_ARM8;
+  template <typename T>
+  static constexpr size_t NumLanes() {
+    return 16 / sizeof(T);
+  }
+};
+#define SIMD_ATTR_ARM8 SIMD_TARGET_ATTR("armv8-a+crypto")
+#endif
+
+// Strongly-typed enum ensures the argument to Dispatch is a single target, not
+// a bitfield.
+enum class Target {
+#if SIMD_ENABLE & SIMD_AVX2
+  kAVX2 = SIMD_AVX2,
+#endif
+#if SIMD_ENABLE & SIMD_SSE4
+  kSSE4 = SIMD_SSE4,
+#endif
+#if SIMD_ENABLE & SIMD_PPC8
+  kPPC8 = SIMD_PPC8,
+#endif
+#if SIMD_ENABLE & SIMD_ARM8
+  kARM8 = SIMD_ARM8,
+#endif
+  kNONE = SIMD_NONE
+};
+
+// Returns func.operator()<Target>(args), where Target::value == target. Calling
+// a member function template instead of a class template allows stateful
+// functors. Dispatch overhead is low but prefer to call this infrequently by
+// hoisting this call to higher levels.
+template <class Func, typename... Args>
+SIMD_INLINE auto Dispatch(const Target target, Func&& func, Args&&... args)
+    -> decltype(std::forward<Func>(func).template operator()<NONE>(
+        std::forward<Args>(args)...)) {
+  switch (target) {
+#if SIMD_ENABLE & SIMD_AVX2
+    case Target::kAVX2:
+      return std::forward<Func>(func).template operator()<AVX2>(
+          std::forward<Args>(args)...);
+#endif
+#if SIMD_ENABLE & SIMD_SSE4
+    case Target::kSSE4:
+      return std::forward<Func>(func).template operator()<SSE4>(
+          std::forward<Args>(args)...);
+#endif
+#if SIMD_ENABLE & SIMD_PPC8
+    case Target::kPPC8:
+      return std::forward<Func>(func).template operator()<PPC8>(
+          std::forward<Args>(args)...);
+#endif
+#if SIMD_ENABLE & SIMD_ARM8
+    case Target::kARM8:
+      return std::forward<Func>(func).template operator()<ARM8>(
+          std::forward<Args>(args)...);
+#endif
+
+    case Target::kNONE:
+      return std::forward<Func>(func).template operator()<NONE>(
+          std::forward<Args>(args)...);
+  }
+}
+
+// All targets supported by the current CPU. Cheap to construct.
+class TargetBitfield {
+ public:
+  TargetBitfield();
+
+  int Bits() const { return bits_; }
+  bool Any() const { return bits_ != 0; }
+
+  // Returns 'best' (widest/most recent) target amongst those supported.
+  Target Best() const {
+#if SIMD_ENABLE & SIMD_AVX2
+    if (bits_ & SIMD_AVX2) return Target::kAVX2;
+#endif
+#if SIMD_ENABLE & SIMD_SSE4
+    if (bits_ & SIMD_SSE4) return Target::kSSE4;
+#endif
+#if SIMD_ENABLE & SIMD_PPC8
+    if (bits_ & SIMD_PPC8) return Target::kPPC8;
+#endif
+#if SIMD_ENABLE & SIMD_ARM8
+    if (bits_ & SIMD_ARM8) return Target::kARM8;
+#endif
+    return Target::kNONE;
+  }
+
+  void Clear(Target target) { bits_ &= ~static_cast<int>(target); }
+
+  // Calls func.operator()<Target>(args) for all targets.
+  template <class Func, typename... Args>
+  SIMD_INLINE void Foreach(Func&& func, Args&&... args) const {
+#if SIMD_ENABLE & SIMD_SSE4
+    if (bits_ & SIMD_SSE4) {
+      std::forward<Func>(func).template operator()<SSE4>(
+          std::forward<Args>(args)...);
+    }
+#endif
+#if SIMD_ENABLE & SIMD_AVX2
+    if (bits_ & SIMD_AVX2) {
+      std::forward<Func>(func).template operator()<AVX2>(
+          std::forward<Args>(args)...);
+    }
+#endif
+#if SIMD_ENABLE & SIMD_PPC8
+    if (bits_ & SIMD_PPC8) {
+      std::forward<Func>(func).template operator()<PPC8>(
+          std::forward<Args>(args)...);
+    }
+#endif
+#if SIMD_ENABLE & SIMD_ARM8
+    if (bits_ & SIMD_ARM8) {
+      std::forward<Func>(func).template operator()<ARM8>(
+          std::forward<Args>(args)...);
+    }
+#endif
+
+    std::forward<Func>(func).template operator()<NONE>(
+        std::forward<Args>(args)...);
+  }
+
+ private:
+  int bits_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_SIMD_TARGETS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/util.h b/codec/L2/demos/pikEnc/host/pik/simd/util.h
new file mode 100755
index 0000000000..c5865d6a5a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/util.h
@@ -0,0 +1,156 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_UTIL_H_
+#define PIK_SIMD_UTIL_H_
+
+// Optional replacements for standard library functionality.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "pik/simd/compiler_specific.h"
+
+namespace pik {
+
+// std::min/max.
+
+#define SIMD_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define SIMD_MAX(a, b) ((a) < (b) ? (b) : (a))
+
+// memcpy/memset.
+
+// The source/destination must not overlap/alias.
+template <size_t kBytes, typename From, typename To>
+SIMD_INLINE void CopyBytes(const From* from, To* to) {
+  const uint8_t* SIMD_RESTRICT from_bytes =
+      reinterpret_cast<const uint8_t*>(from);
+  uint8_t* SIMD_RESTRICT to_bytes = reinterpret_cast<uint8_t*>(to);
+  for (size_t i = 0; i < kBytes; ++i) {
+    to_bytes[i] = from_bytes[i];
+  }
+}
+
+// The source/destination must not overlap/alias.
+template <size_t kBytes, typename From, typename To>
+SIMD_INLINE void CopyBytesWithOffset(const From* from, const int offset,
+                                     To* to) {
+  const uint8_t* SIMD_RESTRICT from_bytes =
+      reinterpret_cast<const uint8_t*>(from) + offset;
+  uint8_t* SIMD_RESTRICT to_bytes = reinterpret_cast<uint8_t*>(to);
+  for (size_t i = 0; i < kBytes; ++i) {
+    to_bytes[i] = from_bytes[i];
+  }
+}
+
+template <typename T>
+SIMD_INLINE void SetBytes(const uint8_t byte, T* t) {
+  uint8_t* bytes = reinterpret_cast<uint8_t*>(t);
+  for (size_t i = 0; i < sizeof(T); ++i) {
+    bytes[i] = byte;
+  }
+}
+
+// numeric_limits<T>
+
+template <typename T>
+constexpr bool IsFloat() {
+  return T(1.25) != T(1);
+}
+
+template <typename T>
+constexpr bool IsSigned() {
+  return T(0) > T(-1);
+}
+
+// Largest/smallest representable integer values.
+template <typename T>
+constexpr T LimitsMax() {
+  return IsSigned<T>() ? T((1ULL << (sizeof(T) * 8 - 1)) - 1)
+                       : static_cast<T>(~0ull);
+}
+template <typename T>
+constexpr T LimitsMin() {
+  return IsSigned<T>() ? T(-1) - LimitsMax<T>() : T(0);
+}
+
+// Value to string
+
+// Returns end of string (position of '\0').
+template <typename T>
+inline char* ToString(T value, char* to) {
+  char reversed[64];
+  char* pos = reversed;
+  int64_t before;
+  do {
+    before = value;
+    value /= 10;
+    const int64_t mod = before - value * 10;
+    *pos++ = "9876543210123456789"[9 + mod];
+  } while (value != 0);
+  if (before < 0) *pos++ = '-';
+
+  // Reverse the string
+  const int num_chars = pos - reversed;
+  for (int i = 0; i < num_chars; ++i) {
+    to[i] = pos[-1 - i];
+  }
+  to[num_chars] = '\0';
+  return to + num_chars;
+}
+
+template <>
+inline char* ToString<float>(const float value, char* to) {
+  const int64_t truncated = static_cast<int64_t>(value);
+  char* end = ToString(truncated, to);
+  *end++ = '.';
+  int64_t frac = static_cast<int64_t>((value - truncated) * 1E8);
+  if (frac < 0) frac = -frac;
+  return ToString(frac, end);
+}
+
+template <>
+inline char* ToString<double>(const double value, char* to) {
+  const int64_t truncated = static_cast<int64_t>(value);
+  char* end = ToString(truncated, to);
+  *end++ = '.';
+  int64_t frac = static_cast<int64_t>((value - truncated) * 1E16);
+  if (frac < 0) frac = -frac;
+  return ToString(frac, end);
+}
+
+template <>
+inline char* ToString<const char*>(const char* value, char* to) {
+  const char* p = value;
+  while (*p != '\0') {
+    *to++ = *p++;
+  }
+  *to = '\0';
+  return to;
+}
+
+// String comparison
+
+template <typename T1, typename T2>
+inline bool BytesEqual(const T1* p1, const T2* p2, const size_t size) {
+  const uint8_t* bytes1 = reinterpret_cast<const uint8_t*>(p1);
+  const uint8_t* bytes2 = reinterpret_cast<const uint8_t*>(p2);
+  for (size_t i = 0; i < size; ++i) {
+    if (bytes1[i] != bytes2[i]) return false;
+  }
+  return true;
+}
+
+inline bool StringsEqual(const char* s1, const char* s2) {
+  while (*s1 == *s2++) {
+    if (*s1++ == '\0') return true;
+  }
+  return false;
+}
+
+}  // namespace pik
+
+#endif  // PIK_SIMD_UTIL_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/x86_avx2.h b/codec/L2/demos/pikEnc/host/pik/simd/x86_avx2.h
new file mode 100755
index 0000000000..ab2838db00
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/x86_avx2.h
@@ -0,0 +1,2395 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_X86_AVX2_H_
+#define PIK_SIMD_X86_AVX2_H_
+
+// 256-bit AVX2 vectors and operations.
+// WARNING: most operations do not cross 128-bit block boundaries. In
+// particular, "broadcast", pack and zip behavior may be surprising.
+
+#include "pik/simd/compiler_specific.h"
+#include "pik/simd/shared.h"
+#include "pik/simd/targets.h"
+#include "pik/simd/x86_sse4.h"
+
+#if SIMD_ENABLE & SIMD_AVX2
+#include <immintrin.h>
+
+namespace pik {
+
+template <class Target>
+struct PartTargetT<2, Target> {
+  using type = AVX2;
+};
+
+template <typename T>
+struct raw_avx2 {
+  using type = __m256i;
+};
+template <>
+struct raw_avx2<float> {
+  using type = __m256;
+};
+template <>
+struct raw_avx2<double> {
+  using type = __m256d;
+};
+
+// Returned by set_table_indices for use by table_lookup_lanes.
+template <typename T>
+struct permute_avx2 {
+  __m256i raw;
+};
+
+template <typename T, size_t N = AVX2::NumLanes<T>()>
+class vec_avx2 {
+  using Raw = typename raw_avx2<T>::type;
+
+ public:
+  SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2() {}
+  vec_avx2(const vec_avx2&) = default;
+  vec_avx2& operator=(const vec_avx2&) = default;
+  SIMD_ATTR_AVX2 SIMD_INLINE explicit vec_avx2(const Raw raw) : raw(raw) {}
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2& operator*=(const vec_avx2 other) {
+    return *this = (*this * other);
+  }
+  SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2& operator/=(const vec_avx2 other) {
+    return *this = (*this / other);
+  }
+  SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2& operator+=(const vec_avx2 other) {
+    return *this = (*this + other);
+  }
+  SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2& operator-=(const vec_avx2 other) {
+    return *this = (*this - other);
+  }
+  SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2& operator&=(const vec_avx2 other) {
+    return *this = (*this & other);
+  }
+  SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2& operator|=(const vec_avx2 other) {
+    return *this = (*this | other);
+  }
+  SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2& operator^=(const vec_avx2 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+template <typename T, size_t N>
+struct VecT<T, N, AVX2> {
+  using type = vec_avx2<T, N>;
+};
+
+using u8x32 = vec_avx2<uint8_t, 32>;
+using u16x16 = vec_avx2<uint16_t, 16>;
+using u32x8 = vec_avx2<uint32_t, 8>;
+using u64x4 = vec_avx2<uint64_t, 4>;
+using i8x32 = vec_avx2<int8_t, 32>;
+using i16x16 = vec_avx2<int16_t, 16>;
+using i32x8 = vec_avx2<int32_t, 8>;
+using i64x4 = vec_avx2<int64_t, 4>;
+using f32x8 = vec_avx2<float, 8>;
+using f64x4 = vec_avx2<double, 4>;
+
+// ------------------------------ Cast
+
+SIMD_ATTR_AVX2 SIMD_INLINE __m256i BitCastToInteger(__m256i v) { return v; }
+SIMD_ATTR_AVX2 SIMD_INLINE __m256i BitCastToInteger(__m256 v) {
+  return _mm256_castps_si256(v);
+}
+SIMD_ATTR_AVX2 SIMD_INLINE __m256i BitCastToInteger(__m256d v) {
+  return _mm256_castpd_si256(v);
+}
+
+// cast_to_u8
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> cast_to_u8(
+    Desc<uint8_t, N, AVX2>, vec_avx2<T, N / sizeof(T)> v) {
+  return vec_avx2<uint8_t, N>(BitCastToInteger(v.raw));
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromIntegerAVX2 {
+  SIMD_ATTR_AVX2 SIMD_INLINE __m256i operator()(__m256i v) { return v; }
+};
+template <>
+struct BitCastFromIntegerAVX2<float> {
+  SIMD_ATTR_AVX2 SIMD_INLINE __m256 operator()(__m256i v) {
+    return _mm256_castsi256_ps(v);
+  }
+};
+template <>
+struct BitCastFromIntegerAVX2<double> {
+  SIMD_ATTR_AVX2 SIMD_INLINE __m256d operator()(__m256i v) {
+    return _mm256_castsi256_pd(v);
+  }
+};
+
+// cast_u8_to
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> cast_u8_to(
+    Desc<T, N, AVX2>, vec_avx2<uint8_t, N * sizeof(T)> v) {
+  return vec_avx2<T, N>(BitCastFromIntegerAVX2<T>()(v.raw));
+}
+
+// cast_to
+template <typename T, size_t N, typename FromT>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> cast_to(
+    Desc<T, N, AVX2> d, vec_avx2<FromT, N * sizeof(T) / sizeof(FromT)> v) {
+  const auto u8 = cast_to_u8(Desc<uint8_t, N * sizeof(T), AVX2>(), v);
+  return cast_u8_to(d, u8);
+}
+
+// ------------------------------ Set
+
+// Returns an all-zero vector.
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> setzero(Desc<T, N, AVX2>) {
+  return vec_avx2<T, N>(_mm256_setzero_si256());
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> setzero(Desc<float, N, AVX2>) {
+  return vec_avx2<float, N>(_mm256_setzero_ps());
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> setzero(Desc<double, N, AVX2>) {
+  return vec_avx2<double, N>(_mm256_setzero_pd());
+}
+
+template <typename T, size_t N, typename T2>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> iota(Desc<T, N, AVX2> d,
+                                               const T2 first) {
+  SIMD_ALIGN T lanes[N];
+  for (size_t i = 0; i < N; ++i) {
+    lanes[i] = first + i;
+  }
+  return load(d, lanes);
+}
+
+// Returns a vector with all lanes set to "t".
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> set1(Desc<uint8_t, N, AVX2>,
+                                                     const uint8_t t) {
+  return vec_avx2<uint8_t, N>(_mm256_set1_epi8(t));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> set1(Desc<uint16_t, N, AVX2>,
+                                                      const uint16_t t) {
+  return vec_avx2<uint16_t, N>(_mm256_set1_epi16(t));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> set1(Desc<uint32_t, N, AVX2>,
+                                                      const uint32_t t) {
+  return vec_avx2<uint32_t, N>(_mm256_set1_epi32(t));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> set1(Desc<uint64_t, N, AVX2>,
+                                                      const uint64_t t) {
+  return vec_avx2<uint64_t, N>(_mm256_set1_epi64x(t));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> set1(Desc<int8_t, N, AVX2>,
+                                                    const int8_t t) {
+  return vec_avx2<int8_t, N>(_mm256_set1_epi8(t));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> set1(Desc<int16_t, N, AVX2>,
+                                                     const int16_t t) {
+  return vec_avx2<int16_t, N>(_mm256_set1_epi16(t));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> set1(Desc<int32_t, N, AVX2>,
+                                                     const int32_t t) {
+  return vec_avx2<int32_t, N>(_mm256_set1_epi32(t));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t, N> set1(Desc<int64_t, N, AVX2>,
+                                                     const int64_t t) {
+  return vec_avx2<int64_t, N>(_mm256_set1_epi64x(t));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> set1(Desc<float, N, AVX2>,
+                                                   const float t) {
+  return vec_avx2<float, N>(_mm256_set1_ps(t));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> set1(Desc<double, N, AVX2>,
+                                                    const double t) {
+  return vec_avx2<double, N>(_mm256_set1_pd(t));
+}
+
+SIMD_DIAGNOSTICS(push)
+SIMD_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// Returns a vector with uninitialized elements.
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> undefined(Desc<T, N, AVX2>) {
+#ifdef __clang__
+  return vec_avx2<T, N>(_mm256_undefined_si256());
+#else
+  __m256i raw;
+  return vec_avx2<T, N>(raw);
+#endif
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> undefined(Desc<float, N, AVX2>) {
+#ifdef __clang__
+  return vec_avx2<float, N>(_mm256_undefined_ps());
+#else
+  __m256 raw;
+  return vec_avx2<float, N>(raw);
+#endif
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> undefined(
+    Desc<double, N, AVX2>) {
+#ifdef __clang__
+  return vec_avx2<double, N>(_mm256_undefined_pd());
+#else
+  __m256d raw;
+  return vec_avx2<double, N>(raw);
+#endif
+}
+
+SIMD_DIAGNOSTICS(pop)
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> operator+(
+    const vec_avx2<uint8_t, N> a, const vec_avx2<uint8_t, N> b) {
+  return vec_avx2<uint8_t, N>(_mm256_add_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> operator+(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_add_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> operator+(
+    const vec_avx2<uint32_t, N> a, const vec_avx2<uint32_t, N> b) {
+  return vec_avx2<uint32_t, N>(_mm256_add_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> operator+(
+    const vec_avx2<uint64_t, N> a, const vec_avx2<uint64_t, N> b) {
+  return vec_avx2<uint64_t, N>(_mm256_add_epi64(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> operator+(
+    const vec_avx2<int8_t, N> a, const vec_avx2<int8_t, N> b) {
+  return vec_avx2<int8_t, N>(_mm256_add_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> operator+(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_add_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> operator+(
+    const vec_avx2<int32_t, N> a, const vec_avx2<int32_t, N> b) {
+  return vec_avx2<int32_t, N>(_mm256_add_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t, N> operator+(
+    const vec_avx2<int64_t, N> a, const vec_avx2<int64_t, N> b) {
+  return vec_avx2<int64_t, N>(_mm256_add_epi64(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator+(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_add_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator+(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_add_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> operator-(
+    const vec_avx2<uint8_t, N> a, const vec_avx2<uint8_t, N> b) {
+  return vec_avx2<uint8_t, N>(_mm256_sub_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> operator-(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_sub_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> operator-(
+    const vec_avx2<uint32_t, N> a, const vec_avx2<uint32_t, N> b) {
+  return vec_avx2<uint32_t, N>(_mm256_sub_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> operator-(
+    const vec_avx2<uint64_t, N> a, const vec_avx2<uint64_t, N> b) {
+  return vec_avx2<uint64_t, N>(_mm256_sub_epi64(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> operator-(
+    const vec_avx2<int8_t, N> a, const vec_avx2<int8_t, N> b) {
+  return vec_avx2<int8_t, N>(_mm256_sub_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> operator-(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_sub_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> operator-(
+    const vec_avx2<int32_t, N> a, const vec_avx2<int32_t, N> b) {
+  return vec_avx2<int32_t, N>(_mm256_sub_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t, N> operator-(
+    const vec_avx2<int64_t, N> a, const vec_avx2<int64_t, N> b) {
+  return vec_avx2<int64_t, N>(_mm256_sub_epi64(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator-(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_sub_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator-(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_sub_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Saturating addition
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> saturated_add(
+    const vec_avx2<uint8_t, N> a, const vec_avx2<uint8_t, N> b) {
+  return vec_avx2<uint8_t, N>(_mm256_adds_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> saturated_add(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_adds_epu16(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> saturated_add(
+    const vec_avx2<int8_t, N> a, const vec_avx2<int8_t, N> b) {
+  return vec_avx2<int8_t, N>(_mm256_adds_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> saturated_add(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_adds_epi16(a.raw, b.raw));
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> saturated_subtract(
+    const vec_avx2<uint8_t, N> a, const vec_avx2<uint8_t, N> b) {
+  return vec_avx2<uint8_t, N>(_mm256_subs_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> saturated_subtract(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_subs_epu16(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> saturated_subtract(
+    const vec_avx2<int8_t, N> a, const vec_avx2<int8_t, N> b) {
+  return vec_avx2<int8_t, N>(_mm256_subs_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> saturated_subtract(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_subs_epi16(a.raw, b.raw));
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> average_round(
+    const vec_avx2<uint8_t, N> a, const vec_avx2<uint8_t, N> b) {
+  return vec_avx2<uint8_t, N>(_mm256_avg_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> average_round(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_avg_epu16(a.raw, b.raw));
+}
+
+// ------------------------------ Absolute value
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> abs(
+    const vec_avx2<int8_t, N> v) {
+  return vec_avx2<int8_t, N>(_mm256_abs_epi8(v.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> abs(
+    const vec_avx2<int16_t, N> v) {
+  return vec_avx2<int16_t, N>(_mm256_abs_epi16(v.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> abs(
+    const vec_avx2<int32_t, N> v) {
+  return vec_avx2<int32_t, N>(_mm256_abs_epi32(v.raw));
+}
+
+// ------------------------------ Shift lanes by constant #bits
+
+// Unsigned
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> shift_left(
+    const vec_avx2<uint16_t, N> v) {
+  return vec_avx2<uint16_t, N>(_mm256_slli_epi16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> shift_right(
+    const vec_avx2<uint16_t, N> v) {
+  return vec_avx2<uint16_t, N>(_mm256_srli_epi16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> shift_left(
+    const vec_avx2<uint32_t, N> v) {
+  return vec_avx2<uint32_t, N>(_mm256_slli_epi32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> shift_right(
+    const vec_avx2<uint32_t, N> v) {
+  return vec_avx2<uint32_t, N>(_mm256_srli_epi32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> shift_left(
+    const vec_avx2<uint64_t, N> v) {
+  return vec_avx2<uint64_t, N>(_mm256_slli_epi64(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> shift_right(
+    const vec_avx2<uint64_t, N> v) {
+  return vec_avx2<uint64_t, N>(_mm256_srli_epi64(v.raw, kBits));
+}
+
+// Signed (no i64 shift_right)
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> shift_left(
+    const vec_avx2<int16_t, N> v) {
+  return vec_avx2<int16_t, N>(_mm256_slli_epi16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> shift_right(
+    const vec_avx2<int16_t, N> v) {
+  return vec_avx2<int16_t, N>(_mm256_srai_epi16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> shift_left(
+    const vec_avx2<int32_t, N> v) {
+  return vec_avx2<int32_t, N>(_mm256_slli_epi32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> shift_right(
+    const vec_avx2<int32_t, N> v) {
+  return vec_avx2<int32_t, N>(_mm256_srai_epi32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t, N> shift_left(
+    const vec_avx2<int64_t, N> v) {
+  return vec_avx2<int64_t, N>(_mm256_slli_epi64(v.raw, kBits));
+}
+
+// ------------------------------ Shift lanes by same variable #bits
+
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE shift_left_count<T, N> set_shift_left_count(
+    Desc<T, N, AVX2>, const int bits) {
+  return shift_left_count<T, N>{_mm_cvtsi32_si128(bits)};
+}
+
+// Same as shift_left_count on x86, but different on ARM.
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE shift_right_count<T, N> set_shift_right_count(
+    Desc<T, N, AVX2>, const int bits) {
+  return shift_right_count<T, N>{_mm_cvtsi32_si128(bits)};
+}
+
+// Unsigned (no u8)
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> shift_left_same(
+    const vec_avx2<uint16_t, N> v, const shift_left_count<uint16_t, N> bits) {
+  return vec_avx2<uint16_t, N>(_mm256_sll_epi16(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> shift_right_same(
+    const vec_avx2<uint16_t, N> v, const shift_right_count<uint16_t, N> bits) {
+  return vec_avx2<uint16_t, N>(_mm256_srl_epi16(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> shift_left_same(
+    const vec_avx2<uint32_t, N> v, const shift_left_count<uint32_t, N> bits) {
+  return vec_avx2<uint32_t, N>(_mm256_sll_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> shift_right_same(
+    const vec_avx2<uint32_t, N> v, const shift_right_count<uint32_t, N> bits) {
+  return vec_avx2<uint32_t, N>(_mm256_srl_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> shift_left_same(
+    const vec_avx2<uint64_t, N> v, const shift_left_count<uint64_t, N> bits) {
+  return vec_avx2<uint64_t, N>(_mm256_sll_epi64(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> shift_right_same(
+    const vec_avx2<uint64_t, N> v, const shift_right_count<uint64_t, N> bits) {
+  return vec_avx2<uint64_t, N>(_mm256_srl_epi64(v.raw, bits.raw));
+}
+
+// Signed (no i8,i64)
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> shift_left_same(
+    const vec_avx2<int16_t, N> v, const shift_left_count<int16_t, N> bits) {
+  return vec_avx2<int16_t, N>(_mm256_sll_epi16(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> shift_right_same(
+    const vec_avx2<int16_t, N> v, const shift_right_count<int16_t, N> bits) {
+  return vec_avx2<int16_t, N>(_mm256_sra_epi16(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> shift_left_same(
+    const vec_avx2<int32_t, N> v, const shift_left_count<int32_t, N> bits) {
+  return vec_avx2<int32_t, N>(_mm256_sll_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> shift_right_same(
+    const vec_avx2<int32_t, N> v, const shift_right_count<int32_t, N> bits) {
+  return vec_avx2<int32_t, N>(_mm256_sra_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t, N> shift_left_same(
+    const vec_avx2<int64_t, N> v, const shift_left_count<int64_t, N> bits) {
+  return vec_avx2<int64_t, N>(_mm256_sll_epi64(v.raw, bits.raw));
+}
+
+// ------------------------------ Shift lanes by independent variable #bits
+
+// Unsigned (no u8,u16)
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> operator<<(
+    const vec_avx2<uint32_t, N> v, const vec_avx2<uint32_t, N> bits) {
+  return vec_avx2<uint32_t, N>(_mm256_sllv_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> operator>>(
+    const vec_avx2<uint32_t, N> v, const vec_avx2<uint32_t, N> bits) {
+  return vec_avx2<uint32_t, N>(_mm256_srlv_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> operator<<(
+    const vec_avx2<uint64_t, N> v, const vec_avx2<uint64_t, N> bits) {
+  return vec_avx2<uint64_t, N>(_mm256_sllv_epi64(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> operator>>(
+    const vec_avx2<uint64_t, N> v, const vec_avx2<uint64_t, N> bits) {
+  return vec_avx2<uint64_t, N>(_mm256_srlv_epi64(v.raw, bits.raw));
+}
+
+// Signed (no i8,i16,i64)
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> operator<<(
+    const vec_avx2<int32_t, N> v, const vec_avx2<int32_t, N> bits) {
+  return vec_avx2<int32_t, N>(_mm256_sllv_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> operator>>(
+    const vec_avx2<int32_t, N> v, const vec_avx2<int32_t, N> bits) {
+  return vec_avx2<int32_t, N>(_mm256_srav_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t, N> operator<<(
+    const vec_avx2<int64_t, N> v, const vec_avx2<int64_t, N> bits) {
+  return vec_avx2<int64_t, N>(_mm256_sllv_epi64(v.raw, bits.raw));
+}
+
+// ------------------------------ Minimum
+
+// Unsigned (no u64)
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> min(
+    const vec_avx2<uint8_t, N> a, const vec_avx2<uint8_t, N> b) {
+  return vec_avx2<uint8_t, N>(_mm256_min_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> min(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_min_epu16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> min(
+    const vec_avx2<uint32_t, N> a, const vec_avx2<uint32_t, N> b) {
+  return vec_avx2<uint32_t, N>(_mm256_min_epu32(a.raw, b.raw));
+}
+
+// Signed (no i64)
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> min(
+    const vec_avx2<int8_t, N> a, const vec_avx2<int8_t, N> b) {
+  return vec_avx2<int8_t, N>(_mm256_min_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> min(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_min_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> min(
+    const vec_avx2<int32_t, N> a, const vec_avx2<int32_t, N> b) {
+  return vec_avx2<int32_t, N>(_mm256_min_epi32(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> min(const vec_avx2<float, N> a,
+                                                  const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_min_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> min(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_min_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Maximum
+
+// Unsigned (no u64)
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> max(
+    const vec_avx2<uint8_t, N> a, const vec_avx2<uint8_t, N> b) {
+  return vec_avx2<uint8_t, N>(_mm256_max_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> max(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_max_epu16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> max(
+    const vec_avx2<uint32_t, N> a, const vec_avx2<uint32_t, N> b) {
+  return vec_avx2<uint32_t, N>(_mm256_max_epu32(a.raw, b.raw));
+}
+
+// Signed (no i64)
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> max(
+    const vec_avx2<int8_t, N> a, const vec_avx2<int8_t, N> b) {
+  return vec_avx2<int8_t, N>(_mm256_max_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> max(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_max_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> max(
+    const vec_avx2<int32_t, N> a, const vec_avx2<int32_t, N> b) {
+  return vec_avx2<int32_t, N>(_mm256_max_epi32(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> max(const vec_avx2<float, N> a,
+                                                  const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_max_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> max(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_max_pd(a.raw, b.raw));
+}
+
+// Returns the closest value to v within [lo, hi].
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> clamp(const vec_avx2<T, N> v,
+                                                const vec_avx2<T, N> lo,
+                                                const vec_avx2<T, N> hi) {
+  return min(max(lo, v), hi);
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> operator*(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_mullo_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> operator*(
+    const vec_avx2<uint32_t, N> a, const vec_avx2<uint32_t, N> b) {
+  return vec_avx2<uint32_t, N>(_mm256_mullo_epi32(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> operator*(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_mullo_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> operator*(
+    const vec_avx2<int32_t, N> a, const vec_avx2<int32_t, N> b) {
+  return vec_avx2<int32_t, N>(_mm256_mullo_epi32(a.raw, b.raw));
+}
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+// Returns the upper 16 bits of a * b in each lane.
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> mul_high(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_mulhi_epu16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> mul_high(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_mulhi_epi16(a.raw, b.raw));
+}
+
+}  // namespace ext
+
+// Returns (((a * b) >> 14) + 1) >> 1.
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> mul_high_round(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_mulhrs_epi16(a.raw, b.raw));
+}
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t> mul_even(
+    const vec_avx2<int32_t> a, const vec_avx2<int32_t> b) {
+  return vec_avx2<int64_t>(_mm256_mul_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t> mul_even(
+    const vec_avx2<uint32_t> a, const vec_avx2<uint32_t> b) {
+  return vec_avx2<uint64_t>(_mm256_mul_epu32(a.raw, b.raw));
+}
+
+// ------------------------------ Floating-point negate
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> neg(
+    const vec_avx2<float, N> v) {
+  const Part<float, N, AVX2> df;
+  const Part<uint32_t, N, AVX2> du;
+  const auto sign = cast_to(df, set1(du, 0x80000000u));
+  return v ^ sign;
+}
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> neg(
+    const vec_avx2<double, N> v) {
+  const Part<double, N, AVX2> df;
+  const Part<uint64_t, N, AVX2> du;
+  const auto sign = cast_to(df, set1(du, 0x8000000000000000ull));
+  return v ^ sign;
+}
+
+// ------------------------------ Floating-point mul / div
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator*(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_mul_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator*(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_mul_pd(a.raw, b.raw));
+}
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator/(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_div_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator/(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_div_pd(a.raw, b.raw));
+}
+
+// Approximate reciprocal
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> approximate_reciprocal(
+    const vec_avx2<float, N> v) {
+  return vec_avx2<float, N>(_mm256_rcp_ps(v.raw));
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+// Returns mul * x + add
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> mul_add(
+    const vec_avx2<float, N> mul, const vec_avx2<float, N> x,
+    const vec_avx2<float, N> add) {
+  return vec_avx2<float, N>(_mm256_fmadd_ps(mul.raw, x.raw, add.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> mul_add(
+    const vec_avx2<double, N> mul, const vec_avx2<double, N> x,
+    const vec_avx2<double, N> add) {
+  return vec_avx2<double, N>(_mm256_fmadd_pd(mul.raw, x.raw, add.raw));
+}
+
+// Returns add - mul * x
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> nmul_add(
+    const vec_avx2<float, N> mul, const vec_avx2<float, N> x,
+    const vec_avx2<float, N> add) {
+  return vec_avx2<float, N>(_mm256_fnmadd_ps(mul.raw, x.raw, add.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> nmul_add(
+    const vec_avx2<double, N> mul, const vec_avx2<double, N> x,
+    const vec_avx2<double, N> add) {
+  return vec_avx2<double, N>(_mm256_fnmadd_pd(mul.raw, x.raw, add.raw));
+}
+
+// Expresses addition/subtraction as FMA for higher throughput (but also
+// higher latency) on HSW/BDW. Requires inline assembly because clang > 6
+// 'optimizes' FMA by 1.0 to addition/subtraction. x86 offers 132, 213, 231
+// forms (1=F, 2=M, 3=A); the first is also the destination.
+
+// Returns x + add
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> fadd(
+    vec_avx2<float, N> x, const vec_avx2<float, N> k1,
+    const vec_avx2<float, N> add) {
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfmadd132ps %2, %1, %0"
+               : "+x"(x.raw)
+               : "x"(add.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_avx2<float, N>(_mm256_fmadd_ps(k1.raw, x.raw, add.raw));
+#endif
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> fadd(
+    vec_avx2<double, N> x, const vec_avx2<double, N> k1,
+    const vec_avx2<double, N> add) {
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfmadd132pd %2, %1, %0"
+               : "+x"(x.raw)
+               : "x"(add.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_avx2<double, N>(_mm256_fmadd_pd(k1.raw, x.raw, add.raw));
+#endif
+}
+
+// Returns x - sub
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> fsub(
+    vec_avx2<float, N> x, const vec_avx2<float, N> k1,
+    const vec_avx2<float, N> sub) {
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfmsub132ps %2, %1, %0"
+               : "+x"(x.raw)
+               : "x"(sub.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_avx2<float, N>(_mm256_fmsub_ps(k1.raw, x.raw, sub.raw));
+#endif
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> fsub(
+    vec_avx2<double, N> x, const vec_avx2<double, N> k1,
+    const vec_avx2<double, N> sub) {
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfmsub132pd %2, %1, %0"
+               : "+x"(x.raw)
+               : "x"(sub.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_avx2<double, N>(_mm256_fmsub_pd(k1.raw, x.raw, sub.raw));
+#endif
+}
+
+// Returns -sub + x (clobbers sub register)
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> fnadd(
+    vec_avx2<float, N> sub, const vec_avx2<float, N> k1,
+    const vec_avx2<float, N> x) {
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfnmadd132ps %2, %1, %0"
+               : "+x"(sub.raw)
+               : "x"(x.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_avx2<float, N>(_mm256_fnmadd_ps(sub.raw, k1.raw, x.raw));
+#endif
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> fnadd(
+    vec_avx2<double, N> sub, const vec_avx2<double, N> k1,
+    const vec_avx2<double, N> x) {
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfnmadd132pd %2, %1, %0"
+               : "+x"(sub.raw)
+               : "x"(x.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_avx2<double, N>(_mm256_fnmadd_pd(sub.raw, k1.raw, x.raw));
+#endif
+}
+
+// Slightly more expensive on ARM (extra negate)
+namespace ext {
+
+// Returns mul * x - sub
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> mul_subtract(
+    const vec_avx2<float, N> mul, const vec_avx2<float, N> x,
+    const vec_avx2<float, N> sub) {
+  return vec_avx2<float, N>(_mm256_fmsub_ps(mul.raw, x.raw, sub.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> mul_subtract(
+    const vec_avx2<double, N> mul, const vec_avx2<double, N> x,
+    const vec_avx2<double, N> sub) {
+  return vec_avx2<double, N>(_mm256_fmsub_pd(mul.raw, x.raw, sub.raw));
+}
+
+// Returns -mul * x - sub
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> nmul_subtract(
+    const vec_avx2<float, N> mul, const vec_avx2<float, N> x,
+    const vec_avx2<float, N> sub) {
+  return vec_avx2<float, N>(_mm256_fnmsub_ps(mul.raw, x.raw, sub.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> nmul_subtract(
+    const vec_avx2<double, N> mul, const vec_avx2<double, N> x,
+    const vec_avx2<double, N> sub) {
+  return vec_avx2<double, N>(_mm256_fnmsub_pd(mul.raw, x.raw, sub.raw));
+}
+
+}  // namespace ext
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> sqrt(const vec_avx2<float, N> v) {
+  return vec_avx2<float, N>(_mm256_sqrt_ps(v.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> sqrt(
+    const vec_avx2<double, N> v) {
+  return vec_avx2<double, N>(_mm256_sqrt_pd(v.raw));
+}
+
+// Approximate reciprocal square root
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> approximate_reciprocal_sqrt(
+    const vec_avx2<float, N> v) {
+  return vec_avx2<float, N>(_mm256_rsqrt_ps(v.raw));
+}
+
+// ------------------------------ Floating-point rounding
+
+// Toward nearest integer, tie to even
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> round(
+    const vec_avx2<float, N> v) {
+  return vec_avx2<float, N>(
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> round(
+    const vec_avx2<double, N> v) {
+  return vec_avx2<double, N>(
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+}
+
+// Toward zero, aka truncate
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> trunc(
+    const vec_avx2<float, N> v) {
+  return vec_avx2<float, N>(
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> trunc(
+    const vec_avx2<double, N> v) {
+  return vec_avx2<double, N>(
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
+}
+
+// Toward +infinity, aka ceiling
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> ceil(const vec_avx2<float, N> v) {
+  return vec_avx2<float, N>(
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> ceil(
+    const vec_avx2<double, N> v) {
+  return vec_avx2<double, N>(
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC));
+}
+
+// Toward -infinity, aka floor
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> floor(
+    const vec_avx2<float, N> v) {
+  return vec_avx2<float, N>(
+      _mm256_round_ps(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> floor(
+    const vec_avx2<double, N> v) {
+  return vec_avx2<double, N>(
+      _mm256_round_pd(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+// ------------------------------ Equality
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t, N> operator==(
+    const vec_avx2<uint8_t, N> a, const vec_avx2<uint8_t, N> b) {
+  return vec_avx2<uint8_t, N>(_mm256_cmpeq_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t, N> operator==(
+    const vec_avx2<uint16_t, N> a, const vec_avx2<uint16_t, N> b) {
+  return vec_avx2<uint16_t, N>(_mm256_cmpeq_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t, N> operator==(
+    const vec_avx2<uint32_t, N> a, const vec_avx2<uint32_t, N> b) {
+  return vec_avx2<uint32_t, N>(_mm256_cmpeq_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t, N> operator==(
+    const vec_avx2<uint64_t, N> a, const vec_avx2<uint64_t, N> b) {
+  return vec_avx2<uint64_t, N>(_mm256_cmpeq_epi64(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> operator==(
+    const vec_avx2<int8_t, N> a, const vec_avx2<int8_t, N> b) {
+  return vec_avx2<int8_t, N>(_mm256_cmpeq_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> operator==(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_cmpeq_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> operator==(
+    const vec_avx2<int32_t, N> a, const vec_avx2<int32_t, N> b) {
+  return vec_avx2<int32_t, N>(_mm256_cmpeq_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t, N> operator==(
+    const vec_avx2<int64_t, N> a, const vec_avx2<int64_t, N> b) {
+  return vec_avx2<int64_t, N>(_mm256_cmpeq_epi64(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator==(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_cmp_ps(a.raw, b.raw, _CMP_EQ_OQ));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator==(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_cmp_pd(a.raw, b.raw, _CMP_EQ_OQ));
+}
+
+// ------------------------------ Strict inequality
+
+// Signed/float <
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> operator<(
+    const vec_avx2<int8_t, N> a, const vec_avx2<int8_t, N> b) {
+  return vec_avx2<int8_t, N>(_mm256_cmpgt_epi8(b.raw, a.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> operator<(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_cmpgt_epi16(b.raw, a.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> operator<(
+    const vec_avx2<int32_t, N> a, const vec_avx2<int32_t, N> b) {
+  return vec_avx2<int32_t, N>(_mm256_cmpgt_epi32(b.raw, a.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t, N> operator<(
+    const vec_avx2<int64_t, N> a, const vec_avx2<int64_t, N> b) {
+  return vec_avx2<int64_t, N>(_mm256_cmpgt_epi64(b.raw, a.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator<(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_cmp_ps(a.raw, b.raw, _CMP_LT_OQ));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator<(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_cmp_pd(a.raw, b.raw, _CMP_LT_OQ));
+}
+
+// Signed/float >
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t, N> operator>(
+    const vec_avx2<int8_t, N> a, const vec_avx2<int8_t, N> b) {
+  return vec_avx2<int8_t, N>(_mm256_cmpgt_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t, N> operator>(
+    const vec_avx2<int16_t, N> a, const vec_avx2<int16_t, N> b) {
+  return vec_avx2<int16_t, N>(_mm256_cmpgt_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> operator>(
+    const vec_avx2<int32_t, N> a, const vec_avx2<int32_t, N> b) {
+  return vec_avx2<int32_t, N>(_mm256_cmpgt_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t, N> operator>(
+    const vec_avx2<int64_t, N> a, const vec_avx2<int64_t, N> b) {
+  return vec_avx2<int64_t, N>(_mm256_cmpgt_epi64(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator>(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_cmp_ps(a.raw, b.raw, _CMP_GT_OQ));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator>(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_cmp_pd(a.raw, b.raw, _CMP_GT_OQ));
+}
+
+// ------------------------------ Weak inequality
+
+// Float <= >=
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator<=(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_cmp_ps(a.raw, b.raw, _CMP_LE_OQ));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator<=(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_cmp_pd(a.raw, b.raw, _CMP_LE_OQ));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator>=(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_cmp_ps(a.raw, b.raw, _CMP_GE_OQ));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator>=(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_cmp_pd(a.raw, b.raw, _CMP_GE_OQ));
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ Bitwise AND
+
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> operator&(const vec_avx2<T, N> a,
+                                                    const vec_avx2<T, N> b) {
+  return vec_avx2<T, N>(_mm256_and_si256(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator&(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_and_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator&(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_and_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Bitwise AND-NOT
+
+// Returns ~not_mask & mask.
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> andnot(const vec_avx2<T, N> not_mask,
+                                                 const vec_avx2<T, N> mask) {
+  return vec_avx2<T, N>(_mm256_andnot_si256(not_mask.raw, mask.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> andnot(
+    const vec_avx2<float, N> not_mask, const vec_avx2<float, N> mask) {
+  return vec_avx2<float, N>(_mm256_andnot_ps(not_mask.raw, mask.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> andnot(
+    const vec_avx2<double, N> not_mask, const vec_avx2<double, N> mask) {
+  return vec_avx2<double, N>(_mm256_andnot_pd(not_mask.raw, mask.raw));
+}
+
+// ------------------------------ Bitwise OR
+
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> operator|(const vec_avx2<T, N> a,
+                                                    const vec_avx2<T, N> b) {
+  return vec_avx2<T, N>(_mm256_or_si256(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator|(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_or_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator|(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_or_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Bitwise XOR
+
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> operator^(const vec_avx2<T, N> a,
+                                                    const vec_avx2<T, N> b) {
+  return vec_avx2<T, N>(_mm256_xor_si256(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> operator^(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b) {
+  return vec_avx2<float, N>(_mm256_xor_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> operator^(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b) {
+  return vec_avx2<double, N>(_mm256_xor_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Select/blend
+
+// Returns a mask for use by select().
+// blendv_ps/pd only check the sign bit, so this is a no-op on x86.
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> condition_from_sign(
+    const vec_avx2<T, N> v) {
+  return v;
+}
+
+// Returns mask ? b : a. "mask" must either have been returned by
+// selector_from_mask, or callers must ensure its lanes are T(0) or ~T(0).
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> select(const vec_avx2<T, N> a,
+                                                 const vec_avx2<T, N> b,
+                                                 const vec_avx2<T, N> mask) {
+  return vec_avx2<T, N>(_mm256_blendv_epi8(a.raw, b.raw, mask.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> select(
+    const vec_avx2<float, N> a, const vec_avx2<float, N> b,
+    const vec_avx2<float, N> mask) {
+  return vec_avx2<float, N>(_mm256_blendv_ps(a.raw, b.raw, mask.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double, N> select(
+    const vec_avx2<double, N> a, const vec_avx2<double, N> b,
+    const vec_avx2<double, N> mask) {
+  return vec_avx2<double, N>(_mm256_blendv_pd(a.raw, b.raw, mask.raw));
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> load(Full<T, AVX2>,
+                                            const T* SIMD_RESTRICT aligned) {
+  return vec_avx2<T>(
+      _mm256_load_si256(reinterpret_cast<const __m256i*>(aligned)));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> load(
+    Full<float, AVX2>, const float* SIMD_RESTRICT aligned) {
+  return vec_avx2<float>(_mm256_load_ps(aligned));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> load(
+    Full<double, AVX2>, const double* SIMD_RESTRICT aligned) {
+  return vec_avx2<double>(_mm256_load_pd(aligned));
+}
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> load_unaligned(
+    Full<T, AVX2>, const T* SIMD_RESTRICT p) {
+  return vec_avx2<T>(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(p)));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> load_unaligned(
+    Full<float, AVX2>, const float* SIMD_RESTRICT p) {
+  return vec_avx2<float>(_mm256_loadu_ps(p));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> load_unaligned(
+    Full<double, AVX2>, const double* SIMD_RESTRICT p) {
+  return vec_avx2<double>(_mm256_loadu_pd(p));
+}
+
+// Loads 128 bit and duplicates into both 128-bit halves. This avoids the
+// 3-cycle cost of moving data between 128-bit halves and avoids port 5.
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> load_dup128(
+    Full<T, AVX2>, const T* const SIMD_RESTRICT p) {
+  // Clang 3.9 generates VINSERTF128 which is slower, but inline assembly leads
+  // to "invalid output size for constraint" without -mavx2:
+  // https://gcc.godbolt.org/z/-Jt_-F
+#if (SIMD_COMPILER != SIMD_COMPILER_MSVC) && defined(__AVX2__)
+  __m256i out;
+  asm volatile("vbroadcasti128 %1, %[reg]" : [reg] "=x"(out) : "m"(p[0]));
+  return vec_avx2<T>(out);
+#else
+  return vec_avx2<T>(
+      _mm256_broadcastsi128_si256(load_unaligned(Full<T, SSE4>(), p).raw));
+#endif
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> load_dup128(
+    Full<float, AVX2>, const float* const SIMD_RESTRICT p) {
+#if (SIMD_COMPILER != SIMD_COMPILER_MSVC) && defined(__AVX2__)
+  __m256 out;
+  asm volatile("vbroadcastf128 %1, %[reg]" : [reg] "=x"(out) : "m"(p[0]));
+  return vec_avx2<float>(out);
+#else
+  return vec_avx2<float>(_mm256_broadcast_ps((const __m128*)p));
+#endif
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> load_dup128(
+    Full<double, AVX2>, const double* const SIMD_RESTRICT p) {
+#if (SIMD_COMPILER != SIMD_COMPILER_MSVC) && defined(__AVX2__)
+  __m256d out;
+  asm volatile("vbroadcastf128 %1, %[reg]" : [reg] "=x"(out) : "m"(p[0]));
+  return vec_avx2<double>(out);
+#else
+  return vec_avx2<double>(_mm256_broadcast_pd((const __m128d*)p));
+#endif
+}
+
+// ------------------------------ Store
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE void store(const vec_avx2<T> v, Full<T, AVX2>,
+                                      T* SIMD_RESTRICT aligned) {
+  _mm256_store_si256(reinterpret_cast<__m256i*>(aligned), v.raw);
+}
+SIMD_ATTR_AVX2 SIMD_INLINE void store(const vec_avx2<float> v,
+                                      Full<float, AVX2>,
+                                      float* SIMD_RESTRICT aligned) {
+  _mm256_store_ps(aligned, v.raw);
+}
+SIMD_ATTR_AVX2 SIMD_INLINE void store(const vec_avx2<double> v,
+                                      Full<double, AVX2>,
+                                      double* SIMD_RESTRICT aligned) {
+  _mm256_store_pd(aligned, v.raw);
+}
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE void store_unaligned(const vec_avx2<T> v,
+                                                Full<T, AVX2>,
+                                                T* SIMD_RESTRICT p) {
+  _mm256_storeu_si256(reinterpret_cast<__m256i*>(p), v.raw);
+}
+SIMD_ATTR_AVX2 SIMD_INLINE void store_unaligned(const vec_avx2<float> v,
+                                                Full<float, AVX2>,
+                                                float* SIMD_RESTRICT p) {
+  _mm256_storeu_ps(p, v.raw);
+}
+SIMD_ATTR_AVX2 SIMD_INLINE void store_unaligned(const vec_avx2<double> v,
+                                                Full<double, AVX2>,
+                                                double* SIMD_RESTRICT p) {
+  _mm256_storeu_pd(p, v.raw);
+}
+
+// ------------------------------ Non-temporal stores
+
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE void stream(const vec_avx2<T, N> v, Full<T, AVX2>,
+                                       T* SIMD_RESTRICT aligned) {
+  _mm256_stream_si256(reinterpret_cast<__m256i*>(aligned), v.raw);
+}
+SIMD_ATTR_AVX2 SIMD_INLINE void stream(const vec_avx2<float> v,
+                                       Full<float, AVX2>,
+                                       float* SIMD_RESTRICT aligned) {
+  _mm256_stream_ps(aligned, v.raw);
+}
+SIMD_ATTR_AVX2 SIMD_INLINE void stream(const vec_avx2<double> v,
+                                       Full<double, AVX2>,
+                                       double* SIMD_RESTRICT aligned) {
+  _mm256_stream_pd(aligned, v.raw);
+}
+
+// ------------------------------ Gather
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> gather_offset_impl(
+    char (&sizeof_t)[4], Full<T, AVX2>, const T* SIMD_RESTRICT base,
+    const vec_avx2<int32_t> offset) {
+  return vec_avx2<T>(_mm256_i32gather_epi32(
+      reinterpret_cast<const int32_t*>(base), offset.raw, 1));
+}
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> gather_index_impl(
+    char (&sizeof_t)[4], Full<T, AVX2>, const T* SIMD_RESTRICT base,
+    const vec_avx2<int32_t> index) {
+  return vec_avx2<T>(_mm256_i32gather_epi32(
+      reinterpret_cast<const int32_t*>(base), index.raw, 4));
+}
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> gather_offset_impl(
+    char (&sizeof_t)[8], Full<T, AVX2>, const T* SIMD_RESTRICT base,
+    const vec_avx2<int64_t> offset) {
+  return vec_avx2<T>(_mm256_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), offset.raw, 1));
+}
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> gather_index_impl(
+    char (&sizeof_t)[8], Full<T, AVX2>, const T* SIMD_RESTRICT base,
+    const vec_avx2<int64_t> index) {
+  return vec_avx2<T>(_mm256_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), index.raw, 8));
+}
+
+template <typename T, typename Offset>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> gather_offset(
+    Full<T, AVX2> d, const T* SIMD_RESTRICT base,
+    const vec_avx2<Offset> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "SVE requires same size base/ofs");
+  char sizeof_t[sizeof(T)];
+  return gather_offset_impl(sizeof_t, d, base, offset);
+}
+template <typename T, typename Index>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> gather_index(
+    Full<T, AVX2> d, const T* SIMD_RESTRICT base, const vec_avx2<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "SVE requires same size base/idx");
+  char sizeof_t[sizeof(T)];
+  return gather_index_impl(sizeof_t, d, base, index);
+}
+
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> gather_offset<float>(
+    Full<float, AVX2>, const float* SIMD_RESTRICT base,
+    const vec_avx2<int32_t> offset) {
+  return vec_avx2<float>(_mm256_i32gather_ps(base, offset.raw, 1));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> gather_index<float>(
+    Full<float, AVX2>, const float* SIMD_RESTRICT base,
+    const vec_avx2<int32_t> index) {
+  return vec_avx2<float>(_mm256_i32gather_ps(base, index.raw, 4));
+}
+
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> gather_offset<double>(
+    Full<double, AVX2>, const double* SIMD_RESTRICT base,
+    const vec_avx2<int64_t> offset) {
+  return vec_avx2<double>(_mm256_i64gather_pd(base, offset.raw, 1));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> gather_index<double>(
+    Full<double, AVX2>, const double* SIMD_RESTRICT base,
+    const vec_avx2<int64_t> index) {
+  return vec_avx2<double>(_mm256_i64gather_pd(base, index.raw, 8));
+}
+
+}  // namespace ext
+
+// ================================================== SWIZZLE
+
+// ------------------------------ Extract half
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<T> get_half(Lower, vec_avx2<T> v) {
+  return vec_sse4<T>(_mm256_castsi256_si128(v.raw));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<float> get_half(Lower, vec_avx2<float> v) {
+  return vec_sse4<float>(_mm256_castps256_ps128(v.raw));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<double> get_half(Lower,
+                                                     vec_avx2<double> v) {
+  return vec_sse4<double>(_mm256_castpd256_pd128(v.raw));
+}
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<T> lower_half(const vec_avx2<T> v) {
+  return get_half(Lower(), v);
+}
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<T> get_half(Upper, const vec_avx2<T> v) {
+  return vec_sse4<T>(_mm256_extracti128_si256(v.raw, 1));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<float> get_half(Upper,
+                                                    const vec_avx2<float> v) {
+  return vec_sse4<float>(_mm256_extractf128_ps(v.raw, 1));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<double> get_half(Upper,
+                                                     const vec_avx2<double> v) {
+  return vec_sse4<double>(_mm256_extractf128_pd(v.raw, 1));
+}
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<T> upper_half(const vec_avx2<T> v) {
+  return get_half(Upper(), v);
+}
+
+// ------------------------------ Shift vector by constant #bytes
+
+// 0x01..0F, kBytes = 1 => 0x02..0F00
+template <int kBytes, typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> shift_left_bytes(
+    const vec_avx2<T, N> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  // This is the same operation as _mm256_bslli_epi128.
+  return vec_avx2<T, N>(_mm256_slli_si256(v.raw, kBytes));
+}
+
+template <int kLanes, typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> shift_left_lanes(
+    const vec_avx2<T, N> v) {
+  return shift_left_bytes<kLanes * sizeof(T)>(v);
+}
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> shift_right_bytes(
+    const vec_avx2<T, N> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  // This is the same operation as _mm256_bsrli_epi128.
+  return vec_avx2<T, N>(_mm256_srli_si256(v.raw, kBytes));
+}
+
+template <int kLanes, typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> shift_right_lanes(
+    const vec_avx2<T, N> v) {
+  return shift_right_bytes<kLanes * sizeof(T)>(v);
+}
+
+// ------------------------------ Extract from 2x 128-bit at constant offset
+
+// Extracts 128 bits from <hi, lo> by skipping the least-significant kBytes.
+template <int kBytes, typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> combine_shift_right_bytes(
+    const vec_avx2<T, N> hi, const vec_avx2<T, N> lo) {
+  const Full<uint8_t, AVX2> d8;
+  const vec_avx2<uint8_t> extracted_bytes(
+      _mm256_alignr_epi8(cast_to(d8, hi).raw, cast_to(d8, lo).raw, kBytes));
+  return cast_to(Full<T, AVX2>(), extracted_bytes);
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+// Unsigned
+template <int kLane>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t> broadcast(
+    const vec_avx2<uint16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m256i lo = _mm256_shufflelo_epi16(v.raw, 0x55 * kLane);
+    return vec_avx2<uint16_t>(_mm256_unpacklo_epi64(lo, lo));
+  } else {
+    const __m256i hi = _mm256_shufflehi_epi16(v.raw, 0x55 * (kLane - 4));
+    return vec_avx2<uint16_t>(_mm256_unpackhi_epi64(hi, hi));
+  }
+}
+template <int kLane>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> broadcast(
+    const vec_avx2<uint32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return vec_avx2<uint32_t>(_mm256_shuffle_epi32(v.raw, 0x55 * kLane));
+}
+template <int kLane>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t> broadcast(
+    const vec_avx2<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return vec_avx2<uint64_t>(_mm256_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44));
+}
+
+// Signed
+template <int kLane>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t> broadcast(
+    const vec_avx2<int16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m256i lo = _mm256_shufflelo_epi16(v.raw, 0x55 * kLane);
+    return vec_avx2<int16_t>(_mm256_unpacklo_epi64(lo, lo));
+  } else {
+    const __m256i hi = _mm256_shufflehi_epi16(v.raw, 0x55 * (kLane - 4));
+    return vec_avx2<int16_t>(_mm256_unpackhi_epi64(hi, hi));
+  }
+}
+template <int kLane>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> broadcast(
+    const vec_avx2<int32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return vec_avx2<int32_t>(_mm256_shuffle_epi32(v.raw, 0x55 * kLane));
+}
+template <int kLane>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t> broadcast(
+    const vec_avx2<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return vec_avx2<int64_t>(_mm256_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44));
+}
+
+// Float
+template <int kLane>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> broadcast(const vec_avx2<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return vec_avx2<float>(_mm256_shuffle_ps(v.raw, v.raw, 0x55 * kLane));
+}
+template <int kLane>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> broadcast(
+    const vec_avx2<double> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return vec_avx2<double>(_mm256_shuffle_pd(v.raw, v.raw, 15 * kLane));
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let vec_avx2<int32_t> have lanes 7,6,5,4,3,2,1,0 (0 is
+// least-significant). shuffle_0321 rotates four-lane blocks one lane to the
+// right (the previous least-significant lane is now most-significant =>
+// 47650321). These could also be implemented via combine_shift_right_bytes but
+// the shuffle_abcd notation is more convenient.
+
+// Swap 64-bit halves
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> shuffle_1032(
+    const vec_avx2<uint32_t> v) {
+  return vec_avx2<uint32_t>(_mm256_shuffle_epi32(v.raw, 0x4E));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> shuffle_1032(
+    const vec_avx2<int32_t> v) {
+  return vec_avx2<int32_t>(_mm256_shuffle_epi32(v.raw, 0x4E));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> shuffle_1032(
+    const vec_avx2<float> v) {
+  // Shorter encoding than _mm256_permute_ps.
+  return vec_avx2<float>(_mm256_shuffle_ps(v.raw, v.raw, 0x4E));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t> shuffle_01(
+    const vec_avx2<uint64_t> v) {
+  return vec_avx2<uint64_t>(_mm256_shuffle_epi32(v.raw, 0x4E));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t> shuffle_01(
+    const vec_avx2<int64_t> v) {
+  return vec_avx2<int64_t>(_mm256_shuffle_epi32(v.raw, 0x4E));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> shuffle_01(
+    const vec_avx2<double> v) {
+  // Shorter encoding than _mm256_permute_pd.
+  return vec_avx2<double>(_mm256_shuffle_pd(v.raw, v.raw, 5));
+}
+
+// Rotate right 32 bits
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> shuffle_0321(
+    const vec_avx2<uint32_t> v) {
+  return vec_avx2<uint32_t>(_mm256_shuffle_epi32(v.raw, 0x39));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> shuffle_0321(
+    const vec_avx2<int32_t> v) {
+  return vec_avx2<int32_t>(_mm256_shuffle_epi32(v.raw, 0x39));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> shuffle_0321(
+    const vec_avx2<float> v) {
+  return vec_avx2<float>(_mm256_shuffle_ps(v.raw, v.raw, 0x39));
+}
+// Rotate left 32 bits
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> shuffle_2103(
+    const vec_avx2<uint32_t> v) {
+  return vec_avx2<uint32_t>(_mm256_shuffle_epi32(v.raw, 0x93));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> shuffle_2103(
+    const vec_avx2<int32_t> v) {
+  return vec_avx2<int32_t>(_mm256_shuffle_epi32(v.raw, 0x93));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> shuffle_2103(
+    const vec_avx2<float> v) {
+  return vec_avx2<float>(_mm256_shuffle_ps(v.raw, v.raw, 0x93));
+}
+
+// Reverse
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> shuffle_0123(
+    const vec_avx2<uint32_t> v) {
+  return vec_avx2<uint32_t>(_mm256_shuffle_epi32(v.raw, 0x1B));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> shuffle_0123(
+    const vec_avx2<int32_t> v) {
+  return vec_avx2<int32_t>(_mm256_shuffle_epi32(v.raw, 0x1B));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> shuffle_0123(
+    const vec_avx2<float> v) {
+  return vec_avx2<float>(_mm256_shuffle_ps(v.raw, v.raw, 0x1B));
+}
+
+// ------------------------------ Permute (runtime variable)
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE permute_avx2<T> set_table_indices(const Full<T, AVX2>,
+                                                       const int32_t* idx) {
+  return permute_avx2<T>{load_unaligned(Full<int32_t, AVX2>(), idx).raw};
+}
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> table_lookup_lanes(
+    const vec_avx2<uint32_t> v, const permute_avx2<uint32_t> idx) {
+  return vec_avx2<uint32_t>(_mm256_permutevar8x32_epi32(v.raw, idx.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> table_lookup_lanes(
+    const vec_avx2<int32_t> v, const permute_avx2<int32_t> idx) {
+  return vec_avx2<int32_t>(_mm256_permutevar8x32_epi32(v.raw, idx.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> table_lookup_lanes(
+    const vec_avx2<float> v, const permute_avx2<float> idx) {
+  return vec_avx2<float>(_mm256_permutevar8x32_ps(v.raw, idx.raw));
+}
+
+// ------------------------------ Interleave lanes
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use zip_lo/hi instead (also works with scalar).
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t> interleave_lo(
+    const vec_avx2<uint8_t> a, const vec_avx2<uint8_t> b) {
+  return vec_avx2<uint8_t>(_mm256_unpacklo_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t> interleave_lo(
+    const vec_avx2<uint16_t> a, const vec_avx2<uint16_t> b) {
+  return vec_avx2<uint16_t>(_mm256_unpacklo_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> interleave_lo(
+    const vec_avx2<uint32_t> a, const vec_avx2<uint32_t> b) {
+  return vec_avx2<uint32_t>(_mm256_unpacklo_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t> interleave_lo(
+    const vec_avx2<uint64_t> a, const vec_avx2<uint64_t> b) {
+  return vec_avx2<uint64_t>(_mm256_unpacklo_epi64(a.raw, b.raw));
+}
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t> interleave_lo(
+    const vec_avx2<int8_t> a, const vec_avx2<int8_t> b) {
+  return vec_avx2<int8_t>(_mm256_unpacklo_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t> interleave_lo(
+    const vec_avx2<int16_t> a, const vec_avx2<int16_t> b) {
+  return vec_avx2<int16_t>(_mm256_unpacklo_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> interleave_lo(
+    const vec_avx2<int32_t> a, const vec_avx2<int32_t> b) {
+  return vec_avx2<int32_t>(_mm256_unpacklo_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t> interleave_lo(
+    const vec_avx2<int64_t> a, const vec_avx2<int64_t> b) {
+  return vec_avx2<int64_t>(_mm256_unpacklo_epi64(a.raw, b.raw));
+}
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> interleave_lo(
+    const vec_avx2<float> a, const vec_avx2<float> b) {
+  return vec_avx2<float>(_mm256_unpacklo_ps(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> interleave_lo(
+    const vec_avx2<double> a, const vec_avx2<double> b) {
+  return vec_avx2<double>(_mm256_unpacklo_pd(a.raw, b.raw));
+}
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint8_t> interleave_hi(
+    const vec_avx2<uint8_t> a, const vec_avx2<uint8_t> b) {
+  return vec_avx2<uint8_t>(_mm256_unpackhi_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t> interleave_hi(
+    const vec_avx2<uint16_t> a, const vec_avx2<uint16_t> b) {
+  return vec_avx2<uint16_t>(_mm256_unpackhi_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> interleave_hi(
+    const vec_avx2<uint32_t> a, const vec_avx2<uint32_t> b) {
+  return vec_avx2<uint32_t>(_mm256_unpackhi_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t> interleave_hi(
+    const vec_avx2<uint64_t> a, const vec_avx2<uint64_t> b) {
+  return vec_avx2<uint64_t>(_mm256_unpackhi_epi64(a.raw, b.raw));
+}
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int8_t> interleave_hi(
+    const vec_avx2<int8_t> a, const vec_avx2<int8_t> b) {
+  return vec_avx2<int8_t>(_mm256_unpackhi_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t> interleave_hi(
+    const vec_avx2<int16_t> a, const vec_avx2<int16_t> b) {
+  return vec_avx2<int16_t>(_mm256_unpackhi_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> interleave_hi(
+    const vec_avx2<int32_t> a, const vec_avx2<int32_t> b) {
+  return vec_avx2<int32_t>(_mm256_unpackhi_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t> interleave_hi(
+    const vec_avx2<int64_t> a, const vec_avx2<int64_t> b) {
+  return vec_avx2<int64_t>(_mm256_unpackhi_epi64(a.raw, b.raw));
+}
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> interleave_hi(
+    const vec_avx2<float> a, const vec_avx2<float> b) {
+  return vec_avx2<float>(_mm256_unpackhi_ps(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> interleave_hi(
+    const vec_avx2<double> a, const vec_avx2<double> b) {
+  return vec_avx2<double>(_mm256_unpackhi_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Zip lanes
+
+// Same as interleave_*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t> zip_lo(
+    const vec_avx2<uint8_t> a, const vec_avx2<uint8_t> b) {
+  return vec_avx2<uint16_t>(_mm256_unpacklo_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> zip_lo(
+    const vec_avx2<uint16_t> a, const vec_avx2<uint16_t> b) {
+  return vec_avx2<uint32_t>(_mm256_unpacklo_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t> zip_lo(
+    const vec_avx2<uint32_t> a, const vec_avx2<uint32_t> b) {
+  return vec_avx2<uint64_t>(_mm256_unpacklo_epi32(a.raw, b.raw));
+}
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t> zip_lo(const vec_avx2<int8_t> a,
+                                                    const vec_avx2<int8_t> b) {
+  return vec_avx2<int16_t>(_mm256_unpacklo_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> zip_lo(const vec_avx2<int16_t> a,
+                                                    const vec_avx2<int16_t> b) {
+  return vec_avx2<int32_t>(_mm256_unpacklo_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t> zip_lo(const vec_avx2<int32_t> a,
+                                                    const vec_avx2<int32_t> b) {
+  return vec_avx2<int64_t>(_mm256_unpacklo_epi32(a.raw, b.raw));
+}
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t> zip_hi(
+    const vec_avx2<uint8_t> a, const vec_avx2<uint8_t> b) {
+  return vec_avx2<uint16_t>(_mm256_unpackhi_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> zip_hi(
+    const vec_avx2<uint16_t> a, const vec_avx2<uint16_t> b) {
+  return vec_avx2<uint32_t>(_mm256_unpackhi_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t> zip_hi(
+    const vec_avx2<uint32_t> a, const vec_avx2<uint32_t> b) {
+  return vec_avx2<uint64_t>(_mm256_unpackhi_epi32(a.raw, b.raw));
+}
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t> zip_hi(const vec_avx2<int8_t> a,
+                                                    const vec_avx2<int8_t> b) {
+  return vec_avx2<int16_t>(_mm256_unpackhi_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> zip_hi(const vec_avx2<int16_t> a,
+                                                    const vec_avx2<int16_t> b) {
+  return vec_avx2<int32_t>(_mm256_unpackhi_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t> zip_hi(const vec_avx2<int32_t> a,
+                                                    const vec_avx2<int32_t> b) {
+  return vec_avx2<int64_t>(_mm256_unpackhi_epi32(a.raw, b.raw));
+}
+
+// ------------------------------ Parts
+
+// Returns part of a vector (unspecified whether upper or lower).
+template <typename T, size_t N, size_t VN>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> any_part(Desc<T, N, AVX2>,
+                                                   const vec_avx2<T, VN> v) {
+  return vec_avx2<T, N>(v.raw);  // shrink AVX2
+}
+template <typename T, size_t N, size_t VN>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<T, N> any_part(Desc<T, N, SSE4>,
+                                                   const vec_avx2<T, VN> v) {
+  return vec_sse4<T, N>(_mm256_castsi256_si128(v.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<float, N> any_part(Desc<float, N, SSE4>,
+                                                       vec_avx2<float> v) {
+  return vec_sse4<float, N>(_mm256_castps256_ps128(v.raw));
+}
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<double, N> any_part(Desc<double, N, SSE4>,
+                                                        vec_avx2<double> v) {
+  return vec_sse4<double, N>(_mm256_castpd256_pd128(v.raw));
+}
+
+// Gets the single value stored in a vector/part.
+template <typename T, size_t N, class Target, size_t VN>
+SIMD_ATTR_AVX2 SIMD_INLINE T get_part(Desc<T, N, Target>,
+                                      const vec_avx2<T, VN> v) {
+  const Part<T, 1, AVX2> d;
+  return get_part(d, any_part(d, v));
+}
+
+// Returns full vector with the given part's lane broadcasted. Note that
+// callers cannot use broadcast directly because part lane order is undefined.
+template <int kLane, typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> broadcast_part(Full<T, AVX2>,
+                                                      const vec_sse4<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  const auto v128 = broadcast<kLane>(vec_sse4<T>(v.raw));
+  // Same as _mm256_castsi128_si256, but with guaranteed zero-extension.
+  const auto lo = _mm256_zextsi128_si256(v128.raw);
+  // Same instruction as _mm256_permute2f128_si256.
+  return vec_avx2<T>(_mm256_permute2x128_si256(lo, lo, 0));
+}
+template <int kLane, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> broadcast_part(
+    Full<float, AVX2>, const vec_sse4<float, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  const auto v128 = broadcast<kLane>(vec_sse4<float>(v.raw)).raw;
+  // Same as _mm256_castps128_ps256, but with guaranteed zero-extension.
+  const auto lo = _mm256_zextps128_ps256(v128);
+  return vec_avx2<float>(_mm256_permute2f128_ps(lo, lo, 0));
+}
+template <int kLane, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> broadcast_part(
+    Full<double, AVX2>, const vec_sse4<double, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  const auto v128 = broadcast<kLane>(vec_sse4<double>(v.raw)).raw;
+  // Same as _mm256_castpd128_pd256, but with guaranteed zero-extension.
+  const auto lo = _mm256_zextpd128_pd256(v128);
+  return vec_avx2<double>(_mm256_permute2f128_pd(lo, lo, 0));
+}
+
+// ------------------------------ Blocks
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> concat_lo_lo(const vec_avx2<T> hi,
+                                                    const vec_avx2<T> lo) {
+  return vec_avx2<T>(_mm256_permute2x128_si256(lo.raw, hi.raw, 0x20));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> concat_lo_lo(
+    const vec_avx2<float> hi, const vec_avx2<float> lo) {
+  return vec_avx2<float>(_mm256_permute2f128_ps(lo.raw, hi.raw, 0x20));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> concat_lo_lo(
+    const vec_avx2<double> hi, const vec_avx2<double> lo) {
+  return vec_avx2<double>(_mm256_permute2f128_pd(lo.raw, hi.raw, 0x20));
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> concat_hi_hi(const vec_avx2<T> hi,
+                                                    const vec_avx2<T> lo) {
+  return vec_avx2<T>(_mm256_permute2x128_si256(lo.raw, hi.raw, 0x31));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> concat_hi_hi(
+    const vec_avx2<float> hi, const vec_avx2<float> lo) {
+  return vec_avx2<float>(_mm256_permute2f128_ps(lo.raw, hi.raw, 0x31));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> concat_hi_hi(
+    const vec_avx2<double> hi, const vec_avx2<double> lo) {
+  return vec_avx2<double>(_mm256_permute2f128_pd(lo.raw, hi.raw, 0x31));
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves / swap blocks)
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> concat_lo_hi(const vec_avx2<T> hi,
+                                                    const vec_avx2<T> lo) {
+  return vec_avx2<T>(_mm256_permute2x128_si256(lo.raw, hi.raw, 0x21));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> concat_lo_hi(
+    const vec_avx2<float> hi, const vec_avx2<float> lo) {
+  return vec_avx2<float>(_mm256_permute2f128_ps(lo.raw, hi.raw, 0x21));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> concat_lo_hi(
+    const vec_avx2<double> hi, const vec_avx2<double> lo) {
+  return vec_avx2<double>(_mm256_permute2f128_pd(lo.raw, hi.raw, 0x21));
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> concat_hi_lo(const vec_avx2<T> hi,
+                                                    const vec_avx2<T> lo) {
+  return vec_avx2<T>(_mm256_blend_epi32(hi.raw, lo.raw, 0x0F));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> concat_hi_lo(
+    const vec_avx2<float> hi, const vec_avx2<float> lo) {
+  return vec_avx2<float>(_mm256_blend_ps(hi.raw, lo.raw, 0x0F));
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> concat_hi_lo(
+    const vec_avx2<double> hi, const vec_avx2<double> lo) {
+  return vec_avx2<double>(_mm256_blend_pd(hi.raw, lo.raw, 3));
+}
+
+// ------------------------------ Odd/even lanes
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> odd_even_impl(char (&sizeof_t)[1],
+                                                     const vec_avx2<T> a,
+                                                     const vec_avx2<T> b) {
+  const Full<T, AVX2> d;
+  const Full<uint8_t, AVX2> d8;
+  SIMD_ALIGN constexpr uint8_t mask[16] = {0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0,
+                                           0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0};
+  return select(a, b, cast_to(d, load_dup128(d8, mask)));
+}
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> odd_even_impl(char (&sizeof_t)[2],
+                                                     const vec_avx2<T> a,
+                                                     const vec_avx2<T> b) {
+  return vec_avx2<T>(_mm256_blend_epi16(a.raw, b.raw, 0x55));
+}
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> odd_even_impl(char (&sizeof_t)[4],
+                                                     const vec_avx2<T> a,
+                                                     const vec_avx2<T> b) {
+  return vec_avx2<T>(_mm256_blend_epi32(a.raw, b.raw, 0x55));
+}
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> odd_even_impl(char (&sizeof_t)[8],
+                                                     const vec_avx2<T> a,
+                                                     const vec_avx2<T> b) {
+  return vec_avx2<T>(_mm256_blend_epi32(a.raw, b.raw, 0x33));
+}
+
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T> odd_even(const vec_avx2<T> a,
+                                                const vec_avx2<T> b) {
+  char sizeof_t[sizeof(T)];
+  return odd_even_impl(sizeof_t, a, b);
+}
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float> odd_even<float>(
+    const vec_avx2<float> a, const vec_avx2<float> b) {
+  return vec_avx2<float>(_mm256_blend_ps(a.raw, b.raw, 0x55));
+}
+
+template <>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> odd_even<double>(
+    const vec_avx2<double> a, const vec_avx2<double> b) {
+  return vec_avx2<double>(_mm256_blend_pd(a.raw, b.raw, 5));
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Shuffle bytes with variable indices
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes:
+// either valid indices in [0, 16) or >= 0x80 to zero the i-th output byte.
+template <typename T, typename TI, size_t N, size_t NI>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> table_lookup_bytes(
+    const vec_avx2<T, N> bytes, const vec_avx2<TI, NI> from) {
+  return vec_avx2<T, N>(_mm256_shuffle_epi8(bytes.raw, from.raw));
+}
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<double> convert_to(
+    Full<double, AVX2>, const vec_sse4<float, 4> v) {
+  return vec_avx2<double>(_mm256_cvtps_pd(v.raw));
+}
+
+// Unsigned: zero-extend.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then zip_hi/lo would be faster.
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint16_t> convert_to(Full<uint16_t, AVX2>,
+                                                         const u8x16 v) {
+  return vec_avx2<uint16_t>(_mm256_cvtepu8_epi16(v.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> convert_to(Full<uint32_t, AVX2>,
+                                                         const u8x8 v) {
+  return vec_avx2<uint32_t>(_mm256_cvtepu8_epi32(v.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t> convert_to(Full<int16_t, AVX2>,
+                                                        const u8x16 v) {
+  return vec_avx2<int16_t>(_mm256_cvtepu8_epi16(v.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> convert_to(Full<int32_t, AVX2>,
+                                                        const u8x8 v) {
+  return vec_avx2<int32_t>(_mm256_cvtepu8_epi32(v.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> convert_to(Full<uint32_t, AVX2>,
+                                                         const u16x8 v) {
+  return vec_avx2<uint32_t>(_mm256_cvtepu16_epi32(v.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> convert_to(Full<int32_t, AVX2>,
+                                                        const u16x8 v) {
+  return vec_avx2<int32_t>(_mm256_cvtepu16_epi32(v.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t> convert_to(Full<uint64_t, AVX2>,
+                                                         const u32x4 v) {
+  return vec_avx2<uint64_t>(_mm256_cvtepu32_epi64(v.raw));
+}
+
+// Special case for "v" with all blocks equal (e.g. from broadcast_block or
+// load_dup128): single-cycle latency instead of 3.
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint32_t> u32_from_u8(
+    const vec_avx2<uint8_t> v) {
+  const Full<uint32_t, AVX2> d32;
+  SIMD_ALIGN static constexpr uint32_t k32From8[8] = {
+      0xFFFFFF00UL, 0xFFFFFF01UL, 0xFFFFFF02UL, 0xFFFFFF03UL,
+      0xFFFFFF04UL, 0xFFFFFF05UL, 0xFFFFFF06UL, 0xFFFFFF07UL};
+  return table_lookup_bytes(cast_to(d32, v), load(d32, k32From8));
+}
+
+// Signed: replicate sign bit.
+// Note: these have 3 cycle latency; if inputs are already split across the
+// 128 bit blocks (in their upper/lower halves), then zip_hi/lo followed by
+// signed shift would be faster.
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t> convert_to(Full<int16_t, AVX2>,
+                                                        const i8x16 v) {
+  return vec_avx2<int16_t>(_mm256_cvtepi8_epi16(v.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> convert_to(Full<int32_t, AVX2>,
+                                                        const i8x8 v) {
+  return vec_avx2<int32_t>(_mm256_cvtepi8_epi32(v.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t> convert_to(Full<int32_t, AVX2>,
+                                                        const i16x8 v) {
+  return vec_avx2<int32_t>(_mm256_cvtepi16_epi32(v.raw));
+}
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int64_t> convert_to(Full<int64_t, AVX2>,
+                                                        const i32x4 v) {
+  return vec_avx2<int64_t>(_mm256_cvtepi32_epi64(v.raw));
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE VT<uint16_t, N, AVX2> convert_to(
+    Part<uint16_t, N, AVX2>, const vec_avx2<int32_t> v) {
+  const __m256i u16 = _mm256_packus_epi32(v.raw, v.raw);
+  // Concatenating lower halves of both 128-bit blocks afterward is more
+  // efficient than an extra input with low block = high block of v.
+  return VT<uint16_t, N, AVX2>(
+      _mm256_castsi256_si128(_mm256_permute4x64_epi64(u16, 0x88)));
+}
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE VT<uint8_t, N, AVX2> convert_to(
+    Part<uint8_t, N, AVX2>, const vec_avx2<int32_t> v) {
+  const __m256i u16_blocks = _mm256_packus_epi32(v.raw, v.raw);
+  // Concatenate lower 64 bits of each 128-bit block
+  const __m256i u16_concat = _mm256_permute4x64_epi64(u16_blocks, 0x88);
+  const __m128i u16 = _mm256_castsi256_si128(u16_concat);
+  return VT<uint8_t, N, AVX2>(_mm_packus_epi16(u16, u16));
+}
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE VT<int16_t, N, AVX2> convert_to(
+    Part<int16_t, N, AVX2>, const vec_avx2<int32_t> v) {
+  const __m256i i16 = _mm256_packs_epi32(v.raw, v.raw);
+  return VT<int16_t, N, AVX2>(
+      _mm256_castsi256_si128(_mm256_permute4x64_epi64(i16, 0x88)));
+}
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE VT<int8_t, N, AVX2> convert_to(
+    Part<int8_t, N, AVX2>, const vec_avx2<int32_t> v) {
+  const __m256i i16_blocks = _mm256_packs_epi32(v.raw, v.raw);
+  // Concatenate lower 64 bits of each 128-bit block
+  const __m256i i16_concat = _mm256_permute4x64_epi64(i16_blocks, 0x88);
+  const __m128i i16 = _mm256_castsi256_si128(i16_concat);
+  return VT<int8_t, N, AVX2>(_mm_packs_epi16(i16, i16));
+}
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE VT<uint8_t, N, AVX2> convert_to(
+    Part<uint8_t, N, AVX2>, const vec_avx2<int16_t> v) {
+  const __m256i u8 = _mm256_packus_epi16(v.raw, v.raw);
+  return VT<uint8_t, N, AVX2>(
+      _mm256_castsi256_si128(_mm256_permute4x64_epi64(u8, 0x88)));
+}
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE VT<int8_t, N, AVX2> convert_to(
+    Part<int8_t, N, AVX2>, const vec_avx2<int16_t> v) {
+  const __m256i i8 = _mm256_packs_epi16(v.raw, v.raw);
+  return VT<int8_t, N, AVX2>(
+      _mm256_castsi256_si128(_mm256_permute4x64_epi64(i8, 0x88)));
+}
+
+// For already range-limited input [0, 255].
+SIMD_ATTR_AVX2 SIMD_INLINE vec_sse4<uint8_t, 8> u8_from_u32(
+    const vec_avx2<uint32_t> v) {
+  const Full<uint32_t, AVX2> d32;
+  SIMD_ALIGN static constexpr uint32_t k8From32[8] = {
+      0x0C080400u, ~0u, ~0u, ~0u, ~0u, 0x0C080400u, ~0u, ~0u};
+  // Place first four bytes in lo[0], remainding 4 in hi[1].
+  const auto quad = table_lookup_bytes(v, load(d32, k8From32));
+  // Interleave both quadruplets - OR instead of unpack reduces port5 pressure.
+  const auto lo = get_half(Lower(), quad);
+  const auto hi = get_half(Upper(), quad);
+  const auto pair = get_half(Lower(), lo | hi);
+  return cast_to(Part<uint8_t, 8, SSE4>(), pair);
+}
+
+// ------------------------------ Convert i32 <=> f32
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<float, N> convert_to(
+    Part<float, N, AVX2>, const vec_avx2<int32_t, N> v) {
+  return vec_avx2<float, N>(_mm256_cvtepi32_ps(v.raw));
+}
+// Truncates (rounds toward zero).
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> convert_to(
+    Part<int32_t, N, AVX2>, const vec_avx2<float, N> v) {
+  return vec_avx2<int32_t, N>(_mm256_cvttps_epi32(v.raw));
+}
+
+template <size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int32_t, N> nearest_int(
+    const vec_avx2<float, N> v) {
+  return vec_avx2<int32_t, N>(_mm256_cvtps_epi32(v.raw));
+}
+
+// ================================================== MISC
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+// ------------------------------ movemask
+
+// Returns a bit array of the most significant bit of each byte in "v", i.e.
+// sum_i=0..31 of (v[i] >> 7) << i; v[0] is the least-significant byte of "v".
+// This is useful for testing/branching based on comparison results.
+SIMD_ATTR_AVX2 SIMD_INLINE uint32_t movemask(const vec_avx2<uint8_t> v) {
+  return _mm256_movemask_epi8(v.raw);
+}
+
+// Returns the most significant bit of each float/double lane (see above).
+SIMD_ATTR_AVX2 SIMD_INLINE uint32_t movemask(const vec_avx2<float> v) {
+  return _mm256_movemask_ps(v.raw);
+}
+SIMD_ATTR_AVX2 SIMD_INLINE uint32_t movemask(const vec_avx2<double> v) {
+  return _mm256_movemask_pd(v.raw);
+}
+
+// ------------------------------ all_zero
+
+// Returns whether all lanes are equal to zero. Supported for all integer V.
+// (Floating-point VTESTP* only test the sign bit!)
+template <typename T>
+SIMD_ATTR_AVX2 SIMD_INLINE bool all_zero(const vec_avx2<T> v) {
+  return static_cast<bool>(_mm256_testz_si256(v.raw, v.raw));
+}
+
+// ------------------------------ Horizontal sum (reduction)
+
+// Returns 64-bit sums of 8-byte groups.
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<uint64_t> sums_of_u8x8(
+    const vec_avx2<uint8_t> v) {
+  return vec_avx2<uint64_t>(_mm256_sad_epu8(v.raw, _mm256_setzero_si256()));
+}
+
+// Returns N sums of differences of byte quadruplets, starting from byte offset
+// i = [0, N) in window (11 consecutive bytes) and idx_ref * 4 in ref.
+// This version computes two independent SAD with separate idx_ref.
+template <int idx_ref1, int idx_ref0>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<int16_t> mpsadbw2(
+    const vec_avx2<uint8_t> window, const vec_avx2<uint8_t> ref) {
+  return vec_avx2<int16_t>(
+      _mm256_mpsadbw_epu8(window.raw, ref.raw, (idx_ref1 << 3) + idx_ref0));
+}
+
+// Returns sum{lane[i]} in each lane. "v3210" is a replicated 128-bit block.
+// Same logic as x86_sse4.h, but with vec_avx2 arguments.
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> horz_sum_impl(
+    char (&sizeof_t)[4], const vec_avx2<T, N> v3210) {
+  const auto v1032 = shuffle_1032(v3210);
+  const auto v31_20_31_20 = v3210 + v1032;
+  const auto v20_31_20_31 = shuffle_0321(v31_20_31_20);
+  return v20_31_20_31 + v31_20_31_20;
+}
+
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> horz_sum_impl(
+    char (&sizeof_t)[8], const vec_avx2<T, N> v10) {
+  const auto v01 = shuffle_01(v10);
+  return v10 + v01;
+}
+
+// Supported for {uif}32x8, {uif}64x4. Returns the sum in each lane.
+template <typename T, size_t N>
+SIMD_ATTR_AVX2 SIMD_INLINE vec_avx2<T, N> sum_of_lanes(
+    const vec_avx2<T, N> vHL) {
+  const vec_avx2<T, N> vLH = concat_lo_hi(vHL, vHL);
+  char sizeof_t[sizeof(T)];
+  return horz_sum_impl(sizeof_t, vLH + vHL);
+}
+
+}  // namespace ext
+
+// TODO(janwas): wrappers for all intrinsics (in x86 namespace).
+}  // namespace pik
+
+#endif  // SIMD_ENABLE & SIMD_AVX2
+#endif  // PIK_SIMD_X86_AVX2_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/simd/x86_sse4.h b/codec/L2/demos/pikEnc/host/pik/simd/x86_sse4.h
new file mode 100755
index 0000000000..c87cc6abbd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/simd/x86_sse4.h
@@ -0,0 +1,2507 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIMD_X86_SSE4_H_
+#define PIK_SIMD_X86_SSE4_H_
+
+// 128-bit SSE4 vectors and operations.
+
+#include "pik/simd/compiler_specific.h"
+#include "pik/simd/shared.h"
+#include "pik/simd/targets.h"
+#include "pik/simd/util.h"
+
+#if SIMD_ENABLE & SIMD_SSE4
+#include <smmintrin.h>
+
+namespace pik {
+
+// On X86, it is cheaper to use small vectors (prefixes of larger registers)
+// when possible; this also reduces the number of overloaded functions.
+template <class Target>
+struct PartTargetT<1, Target> {
+  using type = SSE4;
+};
+
+template <typename T>
+struct raw_sse4 {
+  using type = __m128i;
+};
+template <>
+struct raw_sse4<float> {
+  using type = __m128;
+};
+template <>
+struct raw_sse4<double> {
+  using type = __m128d;
+};
+
+// Returned by set_shift_*_count, also used by AVX2; do not use directly.
+template <typename T, size_t N>
+struct shift_left_count {
+  __m128i raw;
+};
+
+template <typename T, size_t N>
+struct shift_right_count {
+  __m128i raw;
+};
+
+// Returned by set_table_indices for use by table_lookup_lanes.
+template <typename T>
+struct permute_sse4 {
+  __m128i raw;
+};
+
+template <typename T, size_t N = SSE4::NumLanes<T>()>
+class vec_sse4 {
+  using Raw = typename raw_sse4<T>::type;
+
+ public:
+  SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4() {}
+  vec_sse4(const vec_sse4&) = default;
+  vec_sse4& operator=(const vec_sse4&) = default;
+  SIMD_ATTR_SSE4 SIMD_INLINE explicit vec_sse4(const Raw raw) : raw(raw) {}
+
+  // Compound assignment. Only usable if there is a corresponding non-member
+  // binary operator overload. For example, only f32 and f64 support division.
+  SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4& operator*=(const vec_sse4 other) {
+    return *this = (*this * other);
+  }
+  SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4& operator/=(const vec_sse4 other) {
+    return *this = (*this / other);
+  }
+  SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4& operator+=(const vec_sse4 other) {
+    return *this = (*this + other);
+  }
+  SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4& operator-=(const vec_sse4 other) {
+    return *this = (*this - other);
+  }
+  SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4& operator&=(const vec_sse4 other) {
+    return *this = (*this & other);
+  }
+  SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4& operator|=(const vec_sse4 other) {
+    return *this = (*this | other);
+  }
+  SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4& operator^=(const vec_sse4 other) {
+    return *this = (*this ^ other);
+  }
+
+  Raw raw;
+};
+
+template <typename T, size_t N>
+struct VecT<T, N, SSE4> {
+  using type = vec_sse4<T, N>;
+};
+
+using u8x16 = vec_sse4<uint8_t, 16>;
+using u16x8 = vec_sse4<uint16_t, 8>;
+using u32x4 = vec_sse4<uint32_t, 4>;
+using u64x2 = vec_sse4<uint64_t, 2>;
+using i8x16 = vec_sse4<int8_t, 16>;
+using i16x8 = vec_sse4<int16_t, 8>;
+using i32x4 = vec_sse4<int32_t, 4>;
+using i64x2 = vec_sse4<int64_t, 2>;
+using f32x4 = vec_sse4<float, 4>;
+using f64x2 = vec_sse4<double, 2>;
+
+using u8x8 = vec_sse4<uint8_t, 8>;
+using u16x4 = vec_sse4<uint16_t, 4>;
+using u32x2 = vec_sse4<uint32_t, 2>;
+using i8x8 = vec_sse4<int8_t, 8>;
+using i16x4 = vec_sse4<int16_t, 4>;
+using i32x2 = vec_sse4<int32_t, 2>;
+using f32x2 = vec_sse4<float, 2>;
+using f64x1 = vec_sse4<double, 1>;
+
+using u8x4 = vec_sse4<uint8_t, 4>;
+using i8x4 = vec_sse4<int8_t, 4>;
+using f32x1 = vec_sse4<float, 1>;
+
+// ------------------------------ Cast
+
+SIMD_ATTR_SSE4 SIMD_INLINE __m128i BitCastToInteger(__m128i v) { return v; }
+SIMD_ATTR_SSE4 SIMD_INLINE __m128i BitCastToInteger(__m128 v) {
+  return _mm_castps_si128(v);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE __m128i BitCastToInteger(__m128d v) {
+  return _mm_castpd_si128(v);
+}
+
+// cast_to_u8
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> cast_to_u8(
+    Desc<uint8_t, N, SSE4>, vec_sse4<T, N / sizeof(T)> v) {
+  return vec_sse4<uint8_t, N>(BitCastToInteger(v.raw));
+}
+
+// Cannot rely on function overloading because return types differ.
+template <typename T>
+struct BitCastFromIntegerSSE4 {
+  SIMD_ATTR_SSE4 SIMD_INLINE __m128i operator()(__m128i v) { return v; }
+};
+template <>
+struct BitCastFromIntegerSSE4<float> {
+  SIMD_ATTR_SSE4 SIMD_INLINE __m128 operator()(__m128i v) {
+    return _mm_castsi128_ps(v);
+  }
+};
+template <>
+struct BitCastFromIntegerSSE4<double> {
+  SIMD_ATTR_SSE4 SIMD_INLINE __m128d operator()(__m128i v) {
+    return _mm_castsi128_pd(v);
+  }
+};
+
+// cast_u8_to
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> cast_u8_to(
+    Desc<T, N, SSE4>, vec_sse4<uint8_t, N * sizeof(T)> v) {
+  return vec_sse4<T, N>(BitCastFromIntegerSSE4<T>()(v.raw));
+}
+
+// cast_to
+template <typename T, size_t N, typename FromT>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> cast_to(
+    Desc<T, N, SSE4> d, vec_sse4<FromT, N * sizeof(T) / sizeof(FromT)> v) {
+  const auto u8 = cast_to_u8(Desc<uint8_t, N * sizeof(T), SSE4>(), v);
+  return cast_u8_to(d, u8);
+}
+
+// ------------------------------ Set
+
+// Returns an all-zero vector/part.
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> setzero(Desc<T, N, SSE4>) {
+  return vec_sse4<T, N>(_mm_setzero_si128());
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> setzero(Desc<float, N, SSE4>) {
+  return vec_sse4<float, N>(_mm_setzero_ps());
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> setzero(Desc<double, N, SSE4>) {
+  return vec_sse4<double, N>(_mm_setzero_pd());
+}
+
+// Returns a vector/part with all lanes set to "t".
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> set1(Desc<uint8_t, N, SSE4>,
+                                                     const uint8_t t) {
+  return vec_sse4<uint8_t, N>(_mm_set1_epi8(t));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> set1(Desc<uint16_t, N, SSE4>,
+                                                      const uint16_t t) {
+  return vec_sse4<uint16_t, N>(_mm_set1_epi16(t));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> set1(Desc<uint32_t, N, SSE4>,
+                                                      const uint32_t t) {
+  return vec_sse4<uint32_t, N>(_mm_set1_epi32(t));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> set1(Desc<uint64_t, N, SSE4>,
+                                                      const uint64_t t) {
+  return vec_sse4<uint64_t, N>(_mm_set1_epi64x(t));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> set1(Desc<int8_t, N, SSE4>,
+                                                    const int8_t t) {
+  return vec_sse4<int8_t, N>(_mm_set1_epi8(t));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> set1(Desc<int16_t, N, SSE4>,
+                                                     const int16_t t) {
+  return vec_sse4<int16_t, N>(_mm_set1_epi16(t));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> set1(Desc<int32_t, N, SSE4>,
+                                                     const int32_t t) {
+  return vec_sse4<int32_t, N>(_mm_set1_epi32(t));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t, N> set1(Desc<int64_t, N, SSE4>,
+                                                     const int64_t t) {
+  return vec_sse4<int64_t, N>(_mm_set1_epi64x(t));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> set1(Desc<float, N, SSE4>,
+                                                   const float t) {
+  return vec_sse4<float, N>(_mm_set1_ps(t));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> set1(Desc<double, N, SSE4>,
+                                                    const double t) {
+  return vec_sse4<double, N>(_mm_set1_pd(t));
+}
+
+// Returns a vector with lane i=[0, N) set to "first" + i. Unique per-lane
+// values are required to detect lane-crossing bugs.
+template <typename T, size_t N, typename T2>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> iota(Desc<T, N, SSE4> d,
+                                               const T2 first) {
+  SIMD_ALIGN T lanes[N];
+  for (size_t i = 0; i < N; ++i) {
+    lanes[i] = first + i;
+  }
+  return load(d, lanes);
+}
+
+SIMD_DIAGNOSTICS(push)
+SIMD_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized")
+
+// Returns a vector with uninitialized elements.
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> undefined(Desc<T, N, SSE4>) {
+#ifdef __clang__
+  return vec_sse4<T, N>(_mm_undefined_si128());
+#else
+  __m128i raw;
+  return vec_sse4<T, N>(raw);
+#endif
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> undefined(Desc<float, N, SSE4>) {
+#ifdef __clang__
+  return vec_sse4<float, N>(_mm_undefined_ps());
+#else
+  __m128 raw;
+  return vec_sse4<float, N>(raw);
+#endif
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> undefined(
+    Desc<double, N, SSE4>) {
+#ifdef __clang__
+  return vec_sse4<double, N>(_mm_undefined_pd());
+#else
+  __m128d raw;
+  return vec_sse4<double, N>(raw);
+#endif
+}
+
+SIMD_DIAGNOSTICS(pop)
+
+// ================================================== ARITHMETIC
+
+// ------------------------------ Addition
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> operator+(
+    const vec_sse4<uint8_t, N> a, const vec_sse4<uint8_t, N> b) {
+  return vec_sse4<uint8_t, N>(_mm_add_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> operator+(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_add_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> operator+(
+    const vec_sse4<uint32_t, N> a, const vec_sse4<uint32_t, N> b) {
+  return vec_sse4<uint32_t, N>(_mm_add_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> operator+(
+    const vec_sse4<uint64_t, N> a, const vec_sse4<uint64_t, N> b) {
+  return vec_sse4<uint64_t, N>(_mm_add_epi64(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> operator+(
+    const vec_sse4<int8_t, N> a, const vec_sse4<int8_t, N> b) {
+  return vec_sse4<int8_t, N>(_mm_add_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> operator+(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_add_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> operator+(
+    const vec_sse4<int32_t, N> a, const vec_sse4<int32_t, N> b) {
+  return vec_sse4<int32_t, N>(_mm_add_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t, N> operator+(
+    const vec_sse4<int64_t, N> a, const vec_sse4<int64_t, N> b) {
+  return vec_sse4<int64_t, N>(_mm_add_epi64(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator+(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_add_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator+(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_add_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Subtraction
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> operator-(
+    const vec_sse4<uint8_t, N> a, const vec_sse4<uint8_t, N> b) {
+  return vec_sse4<uint8_t, N>(_mm_sub_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> operator-(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_sub_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> operator-(
+    const vec_sse4<uint32_t, N> a, const vec_sse4<uint32_t, N> b) {
+  return vec_sse4<uint32_t, N>(_mm_sub_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> operator-(
+    const vec_sse4<uint64_t, N> a, const vec_sse4<uint64_t, N> b) {
+  return vec_sse4<uint64_t, N>(_mm_sub_epi64(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> operator-(
+    const vec_sse4<int8_t, N> a, const vec_sse4<int8_t, N> b) {
+  return vec_sse4<int8_t, N>(_mm_sub_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> operator-(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_sub_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> operator-(
+    const vec_sse4<int32_t, N> a, const vec_sse4<int32_t, N> b) {
+  return vec_sse4<int32_t, N>(_mm_sub_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t, N> operator-(
+    const vec_sse4<int64_t, N> a, const vec_sse4<int64_t, N> b) {
+  return vec_sse4<int64_t, N>(_mm_sub_epi64(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator-(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_sub_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator-(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_sub_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Saturating addition
+
+// Returns a + b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> saturated_add(
+    const vec_sse4<uint8_t, N> a, const vec_sse4<uint8_t, N> b) {
+  return vec_sse4<uint8_t, N>(_mm_adds_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> saturated_add(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_adds_epu16(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> saturated_add(
+    const vec_sse4<int8_t, N> a, const vec_sse4<int8_t, N> b) {
+  return vec_sse4<int8_t, N>(_mm_adds_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> saturated_add(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_adds_epi16(a.raw, b.raw));
+}
+
+// ------------------------------ Saturating subtraction
+
+// Returns a - b clamped to the destination range.
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> saturated_subtract(
+    const vec_sse4<uint8_t, N> a, const vec_sse4<uint8_t, N> b) {
+  return vec_sse4<uint8_t, N>(_mm_subs_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> saturated_subtract(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_subs_epu16(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> saturated_subtract(
+    const vec_sse4<int8_t, N> a, const vec_sse4<int8_t, N> b) {
+  return vec_sse4<int8_t, N>(_mm_subs_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> saturated_subtract(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_subs_epi16(a.raw, b.raw));
+}
+
+// ------------------------------ Average
+
+// Returns (a + b + 1) / 2
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> average_round(
+    const vec_sse4<uint8_t, N> a, const vec_sse4<uint8_t, N> b) {
+  return vec_sse4<uint8_t, N>(_mm_avg_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> average_round(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_avg_epu16(a.raw, b.raw));
+}
+
+// ------------------------------ Absolute value
+
+// Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1.
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> abs(
+    const vec_sse4<int8_t, N> v) {
+  return vec_sse4<int8_t, N>(_mm_abs_epi8(v.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> abs(
+    const vec_sse4<int16_t, N> v) {
+  return vec_sse4<int16_t, N>(_mm_abs_epi16(v.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> abs(
+    const vec_sse4<int32_t, N> v) {
+  return vec_sse4<int32_t, N>(_mm_abs_epi32(v.raw));
+}
+
+// ------------------------------ Shift lanes by constant #bits
+
+// Unsigned
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> shift_left(
+    const vec_sse4<uint16_t, N> v) {
+  return vec_sse4<uint16_t, N>(_mm_slli_epi16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> shift_right(
+    const vec_sse4<uint16_t, N> v) {
+  return vec_sse4<uint16_t, N>(_mm_srli_epi16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> shift_left(
+    const vec_sse4<uint32_t, N> v) {
+  return vec_sse4<uint32_t, N>(_mm_slli_epi32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> shift_right(
+    const vec_sse4<uint32_t, N> v) {
+  return vec_sse4<uint32_t, N>(_mm_srli_epi32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> shift_left(
+    const vec_sse4<uint64_t, N> v) {
+  return vec_sse4<uint64_t, N>(_mm_slli_epi64(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> shift_right(
+    const vec_sse4<uint64_t, N> v) {
+  return vec_sse4<uint64_t, N>(_mm_srli_epi64(v.raw, kBits));
+}
+
+// Signed (no i64 shift_right)
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> shift_left(
+    const vec_sse4<int16_t, N> v) {
+  return vec_sse4<int16_t, N>(_mm_slli_epi16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> shift_right(
+    const vec_sse4<int16_t, N> v) {
+  return vec_sse4<int16_t, N>(_mm_srai_epi16(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> shift_left(
+    const vec_sse4<int32_t, N> v) {
+  return vec_sse4<int32_t, N>(_mm_slli_epi32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> shift_right(
+    const vec_sse4<int32_t, N> v) {
+  return vec_sse4<int32_t, N>(_mm_srai_epi32(v.raw, kBits));
+}
+template <int kBits, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t, N> shift_left(
+    const vec_sse4<int64_t, N> v) {
+  return vec_sse4<int64_t, N>(_mm_slli_epi64(v.raw, kBits));
+}
+
+// ------------------------------ Shift lanes by same variable #bits
+
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE shift_left_count<T, N> set_shift_left_count(
+    Desc<T, N, SSE4>, const int bits) {
+  return shift_left_count<T, N>{_mm_cvtsi32_si128(bits)};
+}
+
+// Same as shift_left_count on x86, but different on ARM.
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE shift_right_count<T, N> set_shift_right_count(
+    Desc<T, N, SSE4>, const int bits) {
+  return shift_right_count<T, N>{_mm_cvtsi32_si128(bits)};
+}
+
+// Unsigned (no u8)
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> shift_left_same(
+    const vec_sse4<uint16_t, N> v, const shift_left_count<uint16_t, N> bits) {
+  return vec_sse4<uint16_t, N>(_mm_sll_epi16(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> shift_right_same(
+    const vec_sse4<uint16_t, N> v, const shift_right_count<uint16_t, N> bits) {
+  return vec_sse4<uint16_t, N>(_mm_srl_epi16(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> shift_left_same(
+    const vec_sse4<uint32_t, N> v, const shift_left_count<uint32_t, N> bits) {
+  return vec_sse4<uint32_t, N>(_mm_sll_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> shift_right_same(
+    const vec_sse4<uint32_t, N> v, const shift_right_count<uint32_t, N> bits) {
+  return vec_sse4<uint32_t, N>(_mm_srl_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> shift_left_same(
+    const vec_sse4<uint64_t, N> v, const shift_left_count<uint64_t, N> bits) {
+  return vec_sse4<uint64_t, N>(_mm_sll_epi64(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> shift_right_same(
+    const vec_sse4<uint64_t, N> v, const shift_right_count<uint64_t, N> bits) {
+  return vec_sse4<uint64_t, N>(_mm_srl_epi64(v.raw, bits.raw));
+}
+
+// Signed (no i8,i64)
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> shift_left_same(
+    const vec_sse4<int16_t, N> v, const shift_left_count<int16_t, N> bits) {
+  return vec_sse4<int16_t, N>(_mm_sll_epi16(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> shift_right_same(
+    const vec_sse4<int16_t, N> v, const shift_right_count<int16_t, N> bits) {
+  return vec_sse4<int16_t, N>(_mm_sra_epi16(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> shift_left_same(
+    const vec_sse4<int32_t, N> v, const shift_left_count<int32_t, N> bits) {
+  return vec_sse4<int32_t, N>(_mm_sll_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> shift_right_same(
+    const vec_sse4<int32_t, N> v, const shift_right_count<int32_t, N> bits) {
+  return vec_sse4<int32_t, N>(_mm_sra_epi32(v.raw, bits.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t, N> shift_left_same(
+    const vec_sse4<int64_t, N> v, const shift_left_count<int64_t, N> bits) {
+  return vec_sse4<int64_t, N>(_mm_sll_epi64(v.raw, bits.raw));
+}
+
+// ------------------------------ Shift lanes by independent variable #bits
+
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+
+// Unsigned (no u8,u16)
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> operator<<(
+    const vec_sse4<uint32_t, N> v, const vec_sse4<uint32_t, N> bits) {
+  return vec_sse4<uint32_t, N>(_mm_sllv_epi32(v.raw, bits));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> operator>>(
+    const vec_sse4<uint32_t, N> v, const vec_sse4<uint32_t, N> bits) {
+  return vec_sse4<uint32_t, N>(_mm_srlv_epi32(v.raw, bits));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> operator<<(
+    const vec_sse4<uint64_t, N> v, const vec_sse4<uint64_t, N> bits) {
+  return vec_sse4<uint64_t, N>(_mm_sllv_epi64(v.raw, bits));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> operator>>(
+    const vec_sse4<uint64_t, N> v, const vec_sse4<uint64_t, N> bits) {
+  return vec_sse4<uint64_t, N>(_mm_srlv_epi64(v.raw, bits));
+}
+
+// Signed (no i8,i16,i64)
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> operator<<(
+    const vec_sse4<int32_t, N> v, const vec_sse4<int32_t, N> bits) {
+  return vec_sse4<int32_t, N>(_mm_sllv_epi32(v.raw, bits));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> operator>>(
+    const vec_sse4<int32_t, N> v, const vec_sse4<int32_t, N> bits) {
+  return vec_sse4<int32_t, N>(_mm_srav_epi32(v.raw, bits));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t, N> operator<<(
+    const vec_sse4<int64_t, N> v, const vec_sse4<int64_t, N> bits) {
+  return vec_sse4<int64_t, N>(_mm_sllv_epi64(v.raw, bits));
+}
+
+#endif  // SIMD_TARGET_VALUE == SIMD_AVX2
+
+// ------------------------------ Minimum
+
+// Unsigned (no u64)
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> min(
+    const vec_sse4<uint8_t, N> a, const vec_sse4<uint8_t, N> b) {
+  return vec_sse4<uint8_t, N>(_mm_min_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> min(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_min_epu16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> min(
+    const vec_sse4<uint32_t, N> a, const vec_sse4<uint32_t, N> b) {
+  return vec_sse4<uint32_t, N>(_mm_min_epu32(a.raw, b.raw));
+}
+
+// Signed (no i64)
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> min(
+    const vec_sse4<int8_t, N> a, const vec_sse4<int8_t, N> b) {
+  return vec_sse4<int8_t, N>(_mm_min_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> min(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_min_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> min(
+    const vec_sse4<int32_t, N> a, const vec_sse4<int32_t, N> b) {
+  return vec_sse4<int32_t, N>(_mm_min_epi32(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> min(const vec_sse4<float, N> a,
+                                                  const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_min_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> min(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_min_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Maximum
+
+// Unsigned (no u64)
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> max(
+    const vec_sse4<uint8_t, N> a, const vec_sse4<uint8_t, N> b) {
+  return vec_sse4<uint8_t, N>(_mm_max_epu8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> max(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_max_epu16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> max(
+    const vec_sse4<uint32_t, N> a, const vec_sse4<uint32_t, N> b) {
+  return vec_sse4<uint32_t, N>(_mm_max_epu32(a.raw, b.raw));
+}
+
+// Signed (no i64)
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> max(
+    const vec_sse4<int8_t, N> a, const vec_sse4<int8_t, N> b) {
+  return vec_sse4<int8_t, N>(_mm_max_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> max(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_max_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> max(
+    const vec_sse4<int32_t, N> a, const vec_sse4<int32_t, N> b) {
+  return vec_sse4<int32_t, N>(_mm_max_epi32(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> max(const vec_sse4<float, N> a,
+                                                  const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_max_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> max(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_max_pd(a.raw, b.raw));
+}
+
+// Returns the closest value to v within [lo, hi].
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> clamp(const vec_sse4<T, N> v,
+                                                const vec_sse4<T, N> lo,
+                                                const vec_sse4<T, N> hi) {
+  return min(max(lo, v), hi);
+}
+
+// ------------------------------ Integer multiplication
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> operator*(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_mullo_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> operator*(
+    const vec_sse4<uint32_t, N> a, const vec_sse4<uint32_t, N> b) {
+  return vec_sse4<uint32_t, N>(_mm_mullo_epi32(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> operator*(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_mullo_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> operator*(
+    const vec_sse4<int32_t, N> a, const vec_sse4<int32_t, N> b) {
+  return vec_sse4<int32_t, N>(_mm_mullo_epi32(a.raw, b.raw));
+}
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+// Returns the upper 16 bits of a * b in each lane.
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> mul_high(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_mulhi_epu16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> mul_high(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_mulhi_epi16(a.raw, b.raw));
+}
+
+}  // namespace ext
+
+// Returns (((a * b) >> 14) + 1) >> 1.
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> mul_high_round(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_mulhrs_epi16(a.raw, b.raw));
+}
+
+// Multiplies even lanes (0, 2 ..) and places the double-wide result into
+// even and the upper half into its odd neighbor lane.
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t> mul_even(
+    const vec_sse4<int32_t> a, const vec_sse4<int32_t> b) {
+  return vec_sse4<int64_t>(_mm_mul_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t> mul_even(
+    const vec_sse4<uint32_t> a, const vec_sse4<uint32_t> b) {
+  return vec_sse4<uint64_t>(_mm_mul_epu32(a.raw, b.raw));
+}
+
+// ------------------------------ Floating-point negate
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> neg(const vec_sse4<float, N> v) {
+  const Part<float, N, SSE4> df;
+  const Part<uint32_t, N, SSE4> du;
+  const auto sign = cast_to(df, set1(du, 0x80000000u));
+  return v ^ sign;
+}
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> neg(
+    const vec_sse4<double, N> v) {
+  const Part<double, N, SSE4> df;
+  const Part<uint64_t, N, SSE4> du;
+  const auto sign = cast_to(df, set1(du, 0x8000000000000000ull));
+  return v ^ sign;
+}
+
+// ------------------------------ Floating-point mul / div
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator*(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_mul_ps(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, 1> operator*(
+    const vec_sse4<float, 1> a, const vec_sse4<float, 1> b) {
+  return vec_sse4<float, 1>(_mm_mul_ss(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator*(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_mul_pd(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, 1> operator*(
+    const vec_sse4<double, 1> a, const vec_sse4<double, 1> b) {
+  return vec_sse4<double, 1>(_mm_mul_sd(a.raw, b.raw));
+}
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator/(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_div_ps(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, 1> operator/(
+    const vec_sse4<float, 1> a, const vec_sse4<float, 1> b) {
+  return vec_sse4<float, 1>(_mm_div_ss(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator/(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_div_pd(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, 1> operator/(
+    const vec_sse4<double, 1> a, const vec_sse4<double, 1> b) {
+  return vec_sse4<double, 1>(_mm_div_sd(a.raw, b.raw));
+}
+
+// Approximate reciprocal
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> approximate_reciprocal(
+    const vec_sse4<float, N> v) {
+  return vec_sse4<float, N>(_mm_rcp_ps(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, 1> approximate_reciprocal(
+    const vec_sse4<float, 1> v) {
+  return vec_sse4<float, 1>(_mm_rcp_ss(v.raw));
+}
+
+// ------------------------------ Floating-point multiply-add variants
+
+// Returns mul * x + add
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> mul_add(
+    const vec_sse4<float, N> mul, const vec_sse4<float, N> x,
+    const vec_sse4<float, N> add) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  return vec_sse4<float, N>(_mm_fmadd_ps(mul.raw, x.raw, add.raw));
+#else
+  return mul * x + add;
+#endif
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> mul_add(
+    const vec_sse4<double, N> mul, const vec_sse4<double, N> x,
+    const vec_sse4<double, N> add) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  return vec_sse4<double, N>(_mm_fmadd_pd(mul.raw, x.raw, add.raw));
+#else
+  return mul * x + add;
+#endif
+}
+
+// Returns add - mul * x
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> nmul_add(
+    const vec_sse4<float, N> mul, const vec_sse4<float, N> x,
+    const vec_sse4<float, N> add) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  return vec_sse4<float, N>(_mm_fnmadd_ps(mul.raw, x.raw, add.raw));
+#else
+  return add - mul * x;
+#endif
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> nmul_add(
+    const vec_sse4<double, N> mul, const vec_sse4<double, N> x,
+    const vec_sse4<double, N> add) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  return vec_sse4<double, N>(_mm_fnmadd_pd(mul.raw, x.raw, add.raw));
+#else
+  return add - mul * x;
+#endif
+}
+
+// Returns x + add
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> fadd(
+    vec_sse4<float, N> x, const vec_sse4<float, N> k1,
+    const vec_sse4<float, N> add) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfmadd132ps %2, %1, %0"
+               : "+x"(x.raw)
+               : "x"(add.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_sse4<float, N>(_mm_fmadd_ps(x.raw, k1.raw, add.raw));
+#endif
+#else
+  return x + add;
+#endif
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> fadd(
+    vec_sse4<double, N> x, const vec_sse4<double, N> k1,
+    const vec_sse4<double, N> add) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfmadd132pd %2, %1, %0"
+               : "+x"(x.raw)
+               : "x"(add.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_sse4<double, N>(_mm_fmadd_pd(x.raw, k1.raw, add.raw));
+#endif
+#else
+  return x + add;
+#endif
+}
+
+// Returns x - sub
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> fsub(
+    vec_sse4<float, N> x, const vec_sse4<float, N> k1,
+    const vec_sse4<float, N> sub) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfmsub132ps %2, %1, %0"
+               : "+x"(x.raw)
+               : "x"(sub.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_sse4<float, N>(_mm_fmsub_ps(x.raw, k1.raw, sub.raw));
+#endif
+#else
+  return x - sub;
+#endif
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> fsub(
+    vec_sse4<double, N> x, const vec_sse4<double, N> k1,
+    const vec_sse4<double, N> sub) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfmsub132pd %2, %1, %0"
+               : "+x"(x.raw)
+               : "x"(sub.raw), "x"(k1.raw));
+  return x;
+#else
+  return vec_sse4<double, N>(_mm_fmsub_pd(x.raw, k1.raw, sub.raw));
+#endif
+#else
+  return x - sub;
+#endif
+}
+
+// Returns -sub + x (clobbers sub register)
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> fnadd(
+    vec_sse4<float, N> sub, const vec_sse4<float, N> k1,
+    const vec_sse4<float, N> x) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfnmadd132ps %2, %1, %0"
+               : "+x"(sub.raw)
+               : "x"(x.raw), "x"(k1.raw));
+  return sub;
+#else
+  return vec_sse4<float, N>(_mm_fnmadd_ps(sub.raw, k1.raw, x.raw));
+#endif
+#else
+  return x - sub;
+#endif
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> fnadd(
+    vec_sse4<double, N> sub, const vec_sse4<double, N> k1,
+    const vec_sse4<double, N> x) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+#if SIMD_COMPILER != SIMD_COMPILER_MSVC && defined(__AVX2__)
+  asm volatile("vfnmadd132pd %2, %1, %0"
+               : "+x"(sub.raw)
+               : "x"(x.raw), "x"(k1.raw));
+  return sub;
+#else
+  return vec_sse4<double, N>(_mm_fnmadd_pd(sub.raw, k1.raw, x.raw));
+#endif
+#else
+  return x - sub;
+#endif
+}
+
+// Slightly more expensive on ARM (extra negate)
+namespace ext {
+
+// Returns mul * x - sub
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> mul_subtract(
+    const vec_sse4<float, N> mul, const vec_sse4<float, N> x,
+    const vec_sse4<float, N> sub) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  return vec_sse4<float, N>(_mm_fmsub_ps(mul.raw, x.raw, sub.raw));
+#else
+  return mul * x - sub;
+#endif
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> mul_subtract(
+    const vec_sse4<double, N> mul, const vec_sse4<double, N> x,
+    const vec_sse4<double, N> sub) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  return vec_sse4<double, N>(_mm_fmsub_pd(mul.raw, x.raw, sub.raw));
+#else
+  return mul * x - sub;
+#endif
+}
+
+// Returns -mul * x - sub
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> nmul_subtract(
+    const vec_sse4<float, N> mul, const vec_sse4<float, N> x,
+    const vec_sse4<float, N> sub) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  return vec_sse4<float, N>(_mm_fnmsub_ps(mul.raw, x.raw, sub.raw));
+#else
+  return neg(mul) * x - sub;
+#endif
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> nmul_subtract(
+    const vec_sse4<double, N> mul, const vec_sse4<double, N> x,
+    const vec_sse4<double, N> sub) {
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+  return vec_sse4<double, N>(_mm_fnmsub_pd(mul.raw, x.raw, sub.raw));
+#else
+  return neg(mul) * x - sub;
+#endif
+}
+
+}  // namespace ext
+
+// ------------------------------ Floating-point square root
+
+// Full precision square root
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> sqrt(const vec_sse4<float, N> v) {
+  return vec_sse4<float, N>(_mm_sqrt_ps(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, 1> sqrt(const vec_sse4<float, 1> v) {
+  return vec_sse4<float, 1>(_mm_sqrt_ss(v.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> sqrt(
+    const vec_sse4<double, N> v) {
+  return vec_sse4<double, N>(_mm_sqrt_pd(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, 1> sqrt(
+    const vec_sse4<double, 1> v) {
+  return vec_sse4<double, 1>(_mm_sqrt_sd(_mm_setzero_pd(), v.raw));
+}
+
+// Approximate reciprocal square root
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> approximate_reciprocal_sqrt(
+    const vec_sse4<float, N> v) {
+  return vec_sse4<float, N>(_mm_rsqrt_ps(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, 1> approximate_reciprocal_sqrt(
+    const vec_sse4<float, 1> v) {
+  return vec_sse4<float, 1>(_mm_rsqrt_ss(v.raw));
+}
+
+// ------------------------------ Floating-point rounding
+
+// Toward nearest integer, ties to even
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> round(
+    const vec_sse4<float, N> v) {
+  return vec_sse4<float, N>(
+      _mm_round_ps(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> round(
+    const vec_sse4<double, N> v) {
+  return vec_sse4<double, N>(
+      _mm_round_pd(v.raw, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+}
+
+// Toward zero, aka truncate
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> trunc(
+    const vec_sse4<float, N> v) {
+  return vec_sse4<float, N>(
+      _mm_round_ps(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> trunc(
+    const vec_sse4<double, N> v) {
+  return vec_sse4<double, N>(
+      _mm_round_pd(v.raw, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
+}
+
+// Toward +infinity, aka ceiling
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> ceil(const vec_sse4<float, N> v) {
+  return vec_sse4<float, N>(
+      _mm_round_ps(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> ceil(
+    const vec_sse4<double, N> v) {
+  return vec_sse4<double, N>(
+      _mm_round_pd(v.raw, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC));
+}
+
+// Toward -infinity, aka floor
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> floor(
+    const vec_sse4<float, N> v) {
+  return vec_sse4<float, N>(
+      _mm_round_ps(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> floor(
+    const vec_sse4<double, N> v) {
+  return vec_sse4<double, N>(
+      _mm_round_pd(v.raw, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));
+}
+
+// ================================================== COMPARE
+
+// Comparisons fill a lane with 1-bits if the condition is true, else 0.
+
+// ------------------------------ Equality
+
+// Unsigned
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> operator==(
+    const vec_sse4<uint8_t, N> a, const vec_sse4<uint8_t, N> b) {
+  return vec_sse4<uint8_t, N>(_mm_cmpeq_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> operator==(
+    const vec_sse4<uint16_t, N> a, const vec_sse4<uint16_t, N> b) {
+  return vec_sse4<uint16_t, N>(_mm_cmpeq_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, N> operator==(
+    const vec_sse4<uint32_t, N> a, const vec_sse4<uint32_t, N> b) {
+  return vec_sse4<uint32_t, N>(_mm_cmpeq_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, N> operator==(
+    const vec_sse4<uint64_t, N> a, const vec_sse4<uint64_t, N> b) {
+  return vec_sse4<uint64_t, N>(_mm_cmpeq_epi64(a.raw, b.raw));
+}
+
+// Signed
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> operator==(
+    const vec_sse4<int8_t, N> a, const vec_sse4<int8_t, N> b) {
+  return vec_sse4<int8_t, N>(_mm_cmpeq_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> operator==(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_cmpeq_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> operator==(
+    const vec_sse4<int32_t, N> a, const vec_sse4<int32_t, N> b) {
+  return vec_sse4<int32_t, N>(_mm_cmpeq_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t, N> operator==(
+    const vec_sse4<int64_t, N> a, const vec_sse4<int64_t, N> b) {
+  return vec_sse4<int64_t, N>(_mm_cmpeq_epi64(a.raw, b.raw));
+}
+
+// Float
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator==(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_cmpeq_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator==(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_cmpeq_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Strict inequality
+
+// Signed/float <
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> operator<(
+    const vec_sse4<int8_t, N> a, const vec_sse4<int8_t, N> b) {
+  return vec_sse4<int8_t, N>(_mm_cmpgt_epi8(b.raw, a.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> operator<(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_cmpgt_epi16(b.raw, a.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> operator<(
+    const vec_sse4<int32_t, N> a, const vec_sse4<int32_t, N> b) {
+  return vec_sse4<int32_t, N>(_mm_cmpgt_epi32(b.raw, a.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator<(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_cmplt_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator<(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_cmplt_pd(a.raw, b.raw));
+}
+
+// Signed/float >
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> operator>(
+    const vec_sse4<int8_t, N> a, const vec_sse4<int8_t, N> b) {
+  return vec_sse4<int8_t, N>(_mm_cmpgt_epi8(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> operator>(
+    const vec_sse4<int16_t, N> a, const vec_sse4<int16_t, N> b) {
+  return vec_sse4<int16_t, N>(_mm_cmpgt_epi16(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> operator>(
+    const vec_sse4<int32_t, N> a, const vec_sse4<int32_t, N> b) {
+  return vec_sse4<int32_t, N>(_mm_cmpgt_epi32(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator>(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_cmpgt_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator>(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_cmpgt_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Weak inequality
+
+// Float <= >=
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator<=(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_cmple_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator<=(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_cmple_pd(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator>=(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_cmpge_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator>=(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_cmpge_pd(a.raw, b.raw));
+}
+
+// ================================================== LOGICAL
+
+// ------------------------------ Bitwise AND
+
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> operator&(const vec_sse4<T, N> a,
+                                                    const vec_sse4<T, N> b) {
+  return vec_sse4<T, N>(_mm_and_si128(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator&(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_and_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator&(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_and_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Bitwise AND-NOT
+
+// Returns ~not_mask & mask.
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> andnot(const vec_sse4<T, N> not_mask,
+                                                 const vec_sse4<T, N> mask) {
+  return vec_sse4<T, N>(_mm_andnot_si128(not_mask.raw, mask.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> andnot(
+    const vec_sse4<float, N> not_mask, const vec_sse4<float, N> mask) {
+  return vec_sse4<float, N>(_mm_andnot_ps(not_mask.raw, mask.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> andnot(
+    const vec_sse4<double, N> not_mask, const vec_sse4<double, N> mask) {
+  return vec_sse4<double, N>(_mm_andnot_pd(not_mask.raw, mask.raw));
+}
+
+// ------------------------------ Bitwise OR
+
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> operator|(const vec_sse4<T, N> a,
+                                                    const vec_sse4<T, N> b) {
+  return vec_sse4<T, N>(_mm_or_si128(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator|(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_or_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator|(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_or_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Bitwise XOR
+
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> operator^(const vec_sse4<T, N> a,
+                                                    const vec_sse4<T, N> b) {
+  return vec_sse4<T, N>(_mm_xor_si128(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> operator^(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b) {
+  return vec_sse4<float, N>(_mm_xor_ps(a.raw, b.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> operator^(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b) {
+  return vec_sse4<double, N>(_mm_xor_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Select/blend
+
+// Returns a mask for use by select().
+// blendv_ps/pd only check the sign bit, so this is a no-op on x86.
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> condition_from_sign(
+    const vec_sse4<T, N> v) {
+  return v;
+}
+
+// Returns mask ? b : a. "mask" must either have been returned by
+// selector_from_mask, or callers must ensure its lanes are T(0) or ~T(0).
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> select(const vec_sse4<T, N> a,
+                                                 const vec_sse4<T, N> b,
+                                                 const vec_sse4<T, N> mask) {
+  return vec_sse4<T, N>(_mm_blendv_epi8(a.raw, b.raw, mask.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> select(
+    const vec_sse4<float, N> a, const vec_sse4<float, N> b,
+    const vec_sse4<float, N> mask) {
+  return vec_sse4<float, N>(_mm_blendv_ps(a.raw, b.raw, mask.raw));
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, N> select(
+    const vec_sse4<double, N> a, const vec_sse4<double, N> b,
+    const vec_sse4<double, N> mask) {
+  return vec_sse4<double, N>(_mm_blendv_pd(a.raw, b.raw, mask.raw));
+}
+
+// ================================================== MEMORY
+
+// ------------------------------ Load
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> load(Full<T, SSE4>,
+                                            const T* SIMD_RESTRICT aligned) {
+  return vec_sse4<T>(_mm_load_si128(reinterpret_cast<const __m128i*>(aligned)));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> load(
+    Full<float, SSE4>, const float* SIMD_RESTRICT aligned) {
+  return vec_sse4<float>(_mm_load_ps(aligned));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> load(
+    Full<double, SSE4>, const double* SIMD_RESTRICT aligned) {
+  return vec_sse4<double>(_mm_load_pd(aligned));
+}
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> load_unaligned(
+    Full<T, SSE4>, const T* SIMD_RESTRICT p) {
+  return vec_sse4<T>(_mm_loadu_si128(reinterpret_cast<const __m128i*>(p)));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> load_unaligned(
+    Full<float, SSE4>, const float* SIMD_RESTRICT p) {
+  return vec_sse4<float>(_mm_loadu_ps(p));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> load_unaligned(
+    Full<double, SSE4>, const double* SIMD_RESTRICT p) {
+  return vec_sse4<double>(_mm_loadu_pd(p));
+}
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, 8 / sizeof(T)> load(
+    Desc<T, 8 / sizeof(T), SSE4>, const T* SIMD_RESTRICT p) {
+  return vec_sse4<T, 8 / sizeof(T)>(
+      _mm_loadl_epi64(reinterpret_cast<const __m128i*>(p)));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, 2> load(
+    Desc<float, 2, SSE4>, const float* SIMD_RESTRICT p) {
+  const __m128 hi = _mm_setzero_ps();
+  return vec_sse4<float, 2>(
+      _mm_loadl_pi(hi, reinterpret_cast<const __m64*>(p)));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, 1> load(
+    Desc<double, 1, SSE4>, const double* SIMD_RESTRICT p) {
+  const __m128d hi = _mm_setzero_pd();
+  return vec_sse4<double, 1>(_mm_loadl_pd(hi, p));
+}
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, 4 / sizeof(T)> load(
+    Desc<T, 4 / sizeof(T), SSE4>, const T* SIMD_RESTRICT p) {
+  // TODO(janwas): load_ss?
+  int32_t bits;
+  CopyBytes<4>(p, &bits);
+  return vec_sse4<T, 4 / sizeof(T)>(_mm_cvtsi32_si128(bits));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, 1> load(
+    Desc<float, 1, SSE4>, const float* SIMD_RESTRICT p) {
+  return vec_sse4<float, 1>(_mm_load_ss(p));
+}
+
+// 128-bit SIMD => nothing to duplicate, same as an unaligned load.
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> load_dup128(
+    Full<T, SSE4> d, const T* const SIMD_RESTRICT p) {
+  return load_unaligned(d, p);
+}
+
+// ------------------------------ Store
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE void store(const vec_sse4<T> v, Full<T, SSE4>,
+                                      T* SIMD_RESTRICT aligned) {
+  _mm_store_si128(reinterpret_cast<__m128i*>(aligned), v.raw);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE void store(const vec_sse4<float> v,
+                                      Full<float, SSE4>,
+                                      float* SIMD_RESTRICT aligned) {
+  _mm_store_ps(aligned, v.raw);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE void store(const vec_sse4<double> v,
+                                      Full<double, SSE4>,
+                                      double* SIMD_RESTRICT aligned) {
+  _mm_store_pd(aligned, v.raw);
+}
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE void store_unaligned(const vec_sse4<T> v,
+                                                Full<T, SSE4>,
+                                                T* SIMD_RESTRICT p) {
+  _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v.raw);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE void store_unaligned(const vec_sse4<float> v,
+                                                Full<float, SSE4>,
+                                                float* SIMD_RESTRICT p) {
+  _mm_storeu_ps(p, v.raw);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE void store_unaligned(const vec_sse4<double> v,
+                                                Full<double, SSE4>,
+                                                double* SIMD_RESTRICT p) {
+  _mm_storeu_pd(p, v.raw);
+}
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE void store(const vec_sse4<T, 8 / sizeof(T)> v,
+                                      Desc<T, 8 / sizeof(T), SSE4>,
+                                      T* SIMD_RESTRICT p) {
+  _mm_storel_epi64(reinterpret_cast<__m128i*>(p), v.raw);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE void store(const vec_sse4<float, 2> v,
+                                      Desc<float, 2, SSE4>,
+                                      float* SIMD_RESTRICT p) {
+  _mm_storel_pi(reinterpret_cast<__m64*>(p), v.raw);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE void store(const vec_sse4<double, 1> v,
+                                      Desc<double, 1, SSE4>,
+                                      double* SIMD_RESTRICT p) {
+  _mm_storel_pd(p, v.raw);
+}
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE void store(const vec_sse4<T, 4 / sizeof(T)> v,
+                                      Desc<T, 4 / sizeof(T), SSE4>,
+                                      T* SIMD_RESTRICT p) {
+  // _mm_storeu_si32 is documented but unavailable in Clang; CopyBytes generates
+  // bad code; type-punning is unsafe; this actually generates MOVD.
+  _mm_store_ss(reinterpret_cast<float * SIMD_RESTRICT>(p),
+               _mm_castsi128_ps(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE void store(const vec_sse4<float, 1> v,
+                                      Desc<float, 1, SSE4>,
+                                      float* SIMD_RESTRICT p) {
+  _mm_store_ss(p, v.raw);
+}
+
+// ------------------------------ Non-temporal stores
+
+// Same as aligned stores on non-x86.
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE void stream(const vec_sse4<T> v, Full<T, SSE4>,
+                                       T* SIMD_RESTRICT aligned) {
+  _mm_stream_si128(reinterpret_cast<__m128i*>(aligned), v.raw);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE void stream(const vec_sse4<float> v,
+                                       Full<float, SSE4>,
+                                       float* SIMD_RESTRICT aligned) {
+  _mm_stream_ps(aligned, v.raw);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE void stream(const vec_sse4<double> v,
+                                       Full<double, SSE4>,
+                                       double* SIMD_RESTRICT aligned) {
+  _mm_stream_pd(aligned, v.raw);
+}
+
+// ------------------------------ Gather
+
+#if SIMD_TARGET_VALUE == SIMD_AVX2
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> gather_offset_impl(
+    char (&sizeof_t)[4], Full<T, SSE4>, const T* SIMD_RESTRICT base,
+    const vec_sse4<int32_t> offset) {
+  return vec_sse4<T>(_mm_i32gather_epi32(reinterpret_cast<const int32_t*>(base),
+                                         offset.raw, 1));
+}
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> gather_index_impl(
+    char (&sizeof_t)[4], Full<T, SSE4>, const T* SIMD_RESTRICT base,
+    const vec_sse4<int32_t> index) {
+  return vec_sse4<T>(_mm_i32gather_epi32(reinterpret_cast<const int32_t*>(base),
+                                         index.raw, 4));
+}
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> gather_offset_impl(
+    char (&sizeof_t)[8], Full<T, SSE4>, const T* SIMD_RESTRICT base,
+    const vec_sse4<int64_t> offset) {
+  return vec_sse4<T>(_mm_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), offset.raw, 1));
+}
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> gather_index_impl(
+    char (&sizeof_t)[8], Full<T, SSE4>, const T* SIMD_RESTRICT base,
+    const vec_sse4<int64_t> index) {
+  return vec_sse4<T>(_mm_i64gather_epi64(
+      reinterpret_cast<const GatherIndex64*>(base), index.raw, 8));
+}
+
+template <typename T, typename Offset>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> gather_offset(
+    Full<T, SSE4> d, const T* SIMD_RESTRICT base,
+    const vec_sse4<Offset> offset) {
+  static_assert(sizeof(T) == sizeof(Offset), "SVE requires same size base/ofs");
+  char sizeof_t[sizeof(T)];
+  return gather_offset_impl(sizeof_t, d, base, offset);
+}
+template <typename T, typename Index>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> gather_index(
+    Full<T, SSE4> d, const T* SIMD_RESTRICT base, const vec_sse4<Index> index) {
+  static_assert(sizeof(T) == sizeof(Index), "SVE requires same size base/idx");
+  char sizeof_t[sizeof(T)];
+  return gather_index_impl(sizeof_t, d, base, index);
+}
+
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> gather_offset<float>(
+    Full<float, SSE4>, const float* SIMD_RESTRICT base,
+    const vec_sse4<int32_t> offset) {
+  return vec_sse4<float>(_mm_i32gather_ps(base, offset.raw, 1));
+}
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> gather_index<float>(
+    Full<float, SSE4>, const float* SIMD_RESTRICT base,
+    const vec_sse4<int32_t> index) {
+  return vec_sse4<float>(_mm_i32gather_ps(base, index.raw, 4));
+}
+
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> gather_offset<double>(
+    Full<double, SSE4>, const double* SIMD_RESTRICT base,
+    const vec_sse4<int64_t> offset) {
+  return vec_sse4<double>(_mm_i64gather_pd(base, offset.raw, 1));
+}
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> gather_index<double>(
+    Full<double, SSE4>, const double* SIMD_RESTRICT base,
+    const vec_sse4<int64_t> index) {
+  return vec_sse4<double>(_mm_i64gather_pd(base, index.raw, 8));
+}
+
+}  // namespace ext
+
+#endif  // SIMD_TARGET_VALUE == SIMD_AVX2
+
+// ================================================== SWIZZLE
+
+// ------------------------------ Shift vector by constant #bytes
+
+// 0x01..0F, kBytes = 1 => 0x02..0F00
+template <int kBytes, typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> shift_left_bytes(const vec_sse4<T> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  return vec_sse4<T>(_mm_slli_si128(v.raw, kBytes));
+}
+
+template <int kLanes, typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> shift_left_lanes(const vec_sse4<T> v) {
+  return shift_left_bytes<kLanes * sizeof(T)>(v);
+}
+
+// 0x01..0F, kBytes = 1 => 0x0001..0E
+template <int kBytes, typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> shift_right_bytes(const vec_sse4<T> v) {
+  static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes");
+  return vec_sse4<T>(_mm_srli_si128(v.raw, kBytes));
+}
+
+template <int kLanes, typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> shift_right_lanes(const vec_sse4<T> v) {
+  return shift_right_bytes<kLanes * sizeof(T)>(v);
+}
+
+// ------------------------------ Extract from 2x 128-bit at constant offset
+
+// Extracts 128 bits from <hi, lo> by skipping the least-significant kBytes.
+template <int kBytes, typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> combine_shift_right_bytes(
+    const vec_sse4<T> hi, const vec_sse4<T> lo) {
+  const Full<uint8_t, SSE4> d8;
+  const vec_sse4<uint8_t> extracted_bytes(
+      _mm_alignr_epi8(cast_to(d8, hi).raw, cast_to(d8, lo).raw, kBytes));
+  return cast_to(Full<T, SSE4>(), extracted_bytes);
+}
+
+// ------------------------------ Broadcast/splat any lane
+
+// Unsigned
+template <int kLane>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t> broadcast(
+    const vec_sse4<uint16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m128i lo = _mm_shufflelo_epi16(v.raw, 0x55 * kLane);
+    return vec_sse4<uint16_t>(_mm_unpacklo_epi64(lo, lo));
+  } else {
+    const __m128i hi = _mm_shufflehi_epi16(v.raw, 0x55 * (kLane - 4));
+    return vec_sse4<uint16_t>(_mm_unpackhi_epi64(hi, hi));
+  }
+}
+template <int kLane>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> broadcast(
+    const vec_sse4<uint32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return vec_sse4<uint32_t>(_mm_shuffle_epi32(v.raw, 0x55 * kLane));
+}
+template <int kLane>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t> broadcast(
+    const vec_sse4<uint64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return vec_sse4<uint64_t>(_mm_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44));
+}
+
+// Signed
+template <int kLane>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t> broadcast(
+    const vec_sse4<int16_t> v) {
+  static_assert(0 <= kLane && kLane < 8, "Invalid lane");
+  if (kLane < 4) {
+    const __m128i lo = _mm_shufflelo_epi16(v.raw, 0x55 * kLane);
+    return vec_sse4<int16_t>(_mm_unpacklo_epi64(lo, lo));
+  } else {
+    const __m128i hi = _mm_shufflehi_epi16(v.raw, 0x55 * (kLane - 4));
+    return vec_sse4<int16_t>(_mm_unpackhi_epi64(hi, hi));
+  }
+}
+template <int kLane>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> broadcast(
+    const vec_sse4<int32_t> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return vec_sse4<int32_t>(_mm_shuffle_epi32(v.raw, 0x55 * kLane));
+}
+template <int kLane>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t> broadcast(
+    const vec_sse4<int64_t> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return vec_sse4<int64_t>(_mm_shuffle_epi32(v.raw, kLane ? 0xEE : 0x44));
+}
+
+// Float
+template <int kLane>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> broadcast(const vec_sse4<float> v) {
+  static_assert(0 <= kLane && kLane < 4, "Invalid lane");
+  return vec_sse4<float>(_mm_shuffle_ps(v.raw, v.raw, 0x55 * kLane));
+}
+template <int kLane>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> broadcast(
+    const vec_sse4<double> v) {
+  static_assert(0 <= kLane && kLane < 2, "Invalid lane");
+  return vec_sse4<double>(_mm_shuffle_pd(v.raw, v.raw, 3 * kLane));
+}
+
+// ------------------------------ Shuffle bytes with variable indices
+
+// Returns vector of bytes[from[i]]. "from" is also interpreted as bytes:
+// either valid indices in [0, 16) or >= 0x80 to zero the i-th output byte.
+template <typename T, typename TI>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> table_lookup_bytes(
+    const vec_sse4<T> bytes, const vec_sse4<TI> from) {
+  return vec_sse4<T>(_mm_shuffle_epi8(bytes.raw, from.raw));
+}
+
+// ------------------------------ Hard-coded shuffles
+
+// Notation: let vec_sse4<int32_t> have lanes 3,2,1,0 (0 is least-significant).
+// shuffle_0321 rotates one lane to the right (the previous least-significant
+// lane is now most-significant). These could also be implemented via
+// combine_shift_right_bytes but the shuffle_abcd notation is more convenient.
+
+// Swap 64-bit halves
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> shuffle_1032(
+    const vec_sse4<uint32_t> v) {
+  return vec_sse4<uint32_t>(_mm_shuffle_epi32(v.raw, 0x4E));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> shuffle_1032(
+    const vec_sse4<int32_t> v) {
+  return vec_sse4<int32_t>(_mm_shuffle_epi32(v.raw, 0x4E));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> shuffle_1032(
+    const vec_sse4<float> v) {
+  return vec_sse4<float>(_mm_shuffle_ps(v.raw, v.raw, 0x4E));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t> shuffle_01(
+    const vec_sse4<uint64_t> v) {
+  return vec_sse4<uint64_t>(_mm_shuffle_epi32(v.raw, 0x4E));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t> shuffle_01(
+    const vec_sse4<int64_t> v) {
+  return vec_sse4<int64_t>(_mm_shuffle_epi32(v.raw, 0x4E));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> shuffle_01(
+    const vec_sse4<double> v) {
+  return vec_sse4<double>(_mm_shuffle_pd(v.raw, v.raw, 1));
+}
+
+// Rotate right 32 bits
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> shuffle_0321(
+    const vec_sse4<uint32_t> v) {
+  return vec_sse4<uint32_t>(_mm_shuffle_epi32(v.raw, 0x39));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> shuffle_0321(
+    const vec_sse4<int32_t> v) {
+  return vec_sse4<int32_t>(_mm_shuffle_epi32(v.raw, 0x39));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> shuffle_0321(
+    const vec_sse4<float> v) {
+  return vec_sse4<float>(_mm_shuffle_ps(v.raw, v.raw, 0x39));
+}
+// Rotate left 32 bits
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> shuffle_2103(
+    const vec_sse4<uint32_t> v) {
+  return vec_sse4<uint32_t>(_mm_shuffle_epi32(v.raw, 0x93));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> shuffle_2103(
+    const vec_sse4<int32_t> v) {
+  return vec_sse4<int32_t>(_mm_shuffle_epi32(v.raw, 0x93));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> shuffle_2103(
+    const vec_sse4<float> v) {
+  return vec_sse4<float>(_mm_shuffle_ps(v.raw, v.raw, 0x93));
+}
+
+// Reverse
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> shuffle_0123(
+    const vec_sse4<uint32_t> v) {
+  return vec_sse4<uint32_t>(_mm_shuffle_epi32(v.raw, 0x1B));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> shuffle_0123(
+    const vec_sse4<int32_t> v) {
+  return vec_sse4<int32_t>(_mm_shuffle_epi32(v.raw, 0x1B));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> shuffle_0123(
+    const vec_sse4<float> v) {
+  return vec_sse4<float>(_mm_shuffle_ps(v.raw, v.raw, 0x1B));
+}
+
+// ------------------------------ Permute (runtime variable)
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE permute_sse4<T> set_table_indices(const Full<T, SSE4> d,
+                                                       const int32_t* idx) {
+  const Full<uint8_t, SSE4> d8;
+  SIMD_ALIGN uint8_t control[d8.N];
+  for (size_t idx_byte = 0; idx_byte < d8.N; ++idx_byte) {
+    const size_t idx_lane = idx_byte / sizeof(T);
+    const size_t mod = idx_byte % sizeof(T);
+    control[idx_byte] = idx[idx_lane] * sizeof(T) + mod;
+  }
+  return permute_sse4<T>{load(d8, control).raw};
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> table_lookup_lanes(
+    const vec_sse4<uint32_t> v, const permute_sse4<uint32_t> idx) {
+  return table_lookup_bytes(v, vec_sse4<uint8_t>(idx.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> table_lookup_lanes(
+    const vec_sse4<int32_t> v, const permute_sse4<int32_t> idx) {
+  return table_lookup_bytes(v, vec_sse4<uint8_t>(idx.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> table_lookup_lanes(
+    const vec_sse4<float> v, const permute_sse4<float> idx) {
+  const Full<int32_t, SSE4> di;
+  const Full<float, SSE4> df;
+  return cast_to(
+      df, table_lookup_bytes(cast_to(di, v), vec_sse4<uint8_t>(idx.raw)));
+}
+
+// ------------------------------ Interleave lanes
+
+// Interleaves lanes from halves of the 128-bit blocks of "a" (which provides
+// the least-significant lane) and "b". To concatenate two half-width integers
+// into one, use zip_lo/hi instead (also works with scalar).
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t> interleave_lo(
+    const vec_sse4<uint8_t> a, const vec_sse4<uint8_t> b) {
+  return vec_sse4<uint8_t>(_mm_unpacklo_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t> interleave_lo(
+    const vec_sse4<uint16_t> a, const vec_sse4<uint16_t> b) {
+  return vec_sse4<uint16_t>(_mm_unpacklo_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> interleave_lo(
+    const vec_sse4<uint32_t> a, const vec_sse4<uint32_t> b) {
+  return vec_sse4<uint32_t>(_mm_unpacklo_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t> interleave_lo(
+    const vec_sse4<uint64_t> a, const vec_sse4<uint64_t> b) {
+  return vec_sse4<uint64_t>(_mm_unpacklo_epi64(a.raw, b.raw));
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t> interleave_lo(
+    const vec_sse4<int8_t> a, const vec_sse4<int8_t> b) {
+  return vec_sse4<int8_t>(_mm_unpacklo_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t> interleave_lo(
+    const vec_sse4<int16_t> a, const vec_sse4<int16_t> b) {
+  return vec_sse4<int16_t>(_mm_unpacklo_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> interleave_lo(
+    const vec_sse4<int32_t> a, const vec_sse4<int32_t> b) {
+  return vec_sse4<int32_t>(_mm_unpacklo_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t> interleave_lo(
+    const vec_sse4<int64_t> a, const vec_sse4<int64_t> b) {
+  return vec_sse4<int64_t>(_mm_unpacklo_epi64(a.raw, b.raw));
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> interleave_lo(
+    const vec_sse4<float> a, const vec_sse4<float> b) {
+  return vec_sse4<float>(_mm_unpacklo_ps(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> interleave_lo(
+    const vec_sse4<double> a, const vec_sse4<double> b) {
+  return vec_sse4<double>(_mm_unpacklo_pd(a.raw, b.raw));
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t> interleave_hi(
+    const vec_sse4<uint8_t> a, const vec_sse4<uint8_t> b) {
+  return vec_sse4<uint8_t>(_mm_unpackhi_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t> interleave_hi(
+    const vec_sse4<uint16_t> a, const vec_sse4<uint16_t> b) {
+  return vec_sse4<uint16_t>(_mm_unpackhi_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> interleave_hi(
+    const vec_sse4<uint32_t> a, const vec_sse4<uint32_t> b) {
+  return vec_sse4<uint32_t>(_mm_unpackhi_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t> interleave_hi(
+    const vec_sse4<uint64_t> a, const vec_sse4<uint64_t> b) {
+  return vec_sse4<uint64_t>(_mm_unpackhi_epi64(a.raw, b.raw));
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t> interleave_hi(
+    const vec_sse4<int8_t> a, const vec_sse4<int8_t> b) {
+  return vec_sse4<int8_t>(_mm_unpackhi_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t> interleave_hi(
+    const vec_sse4<int16_t> a, const vec_sse4<int16_t> b) {
+  return vec_sse4<int16_t>(_mm_unpackhi_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> interleave_hi(
+    const vec_sse4<int32_t> a, const vec_sse4<int32_t> b) {
+  return vec_sse4<int32_t>(_mm_unpackhi_epi32(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t> interleave_hi(
+    const vec_sse4<int64_t> a, const vec_sse4<int64_t> b) {
+  return vec_sse4<int64_t>(_mm_unpackhi_epi64(a.raw, b.raw));
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> interleave_hi(
+    const vec_sse4<float> a, const vec_sse4<float> b) {
+  return vec_sse4<float>(_mm_unpackhi_ps(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> interleave_hi(
+    const vec_sse4<double> a, const vec_sse4<double> b) {
+  return vec_sse4<double>(_mm_unpackhi_pd(a.raw, b.raw));
+}
+
+// ------------------------------ Zip lanes
+
+// Same as interleave_*, except that the return lanes are double-width integers;
+// this is necessary because the single-lane scalar cannot return two values.
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t> zip_lo(
+    const vec_sse4<uint8_t> a, const vec_sse4<uint8_t> b) {
+  return vec_sse4<uint16_t>(_mm_unpacklo_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> zip_lo(
+    const vec_sse4<uint16_t> a, const vec_sse4<uint16_t> b) {
+  return vec_sse4<uint32_t>(_mm_unpacklo_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t> zip_lo(
+    const vec_sse4<uint32_t> a, const vec_sse4<uint32_t> b) {
+  return vec_sse4<uint64_t>(_mm_unpacklo_epi32(a.raw, b.raw));
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t> zip_lo(const vec_sse4<int8_t> a,
+                                                    const vec_sse4<int8_t> b) {
+  return vec_sse4<int16_t>(_mm_unpacklo_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> zip_lo(const vec_sse4<int16_t> a,
+                                                    const vec_sse4<int16_t> b) {
+  return vec_sse4<int32_t>(_mm_unpacklo_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t> zip_lo(const vec_sse4<int32_t> a,
+                                                    const vec_sse4<int32_t> b) {
+  return vec_sse4<int64_t>(_mm_unpacklo_epi32(a.raw, b.raw));
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t> zip_hi(
+    const vec_sse4<uint8_t> a, const vec_sse4<uint8_t> b) {
+  return vec_sse4<uint16_t>(_mm_unpackhi_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> zip_hi(
+    const vec_sse4<uint16_t> a, const vec_sse4<uint16_t> b) {
+  return vec_sse4<uint32_t>(_mm_unpackhi_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t> zip_hi(
+    const vec_sse4<uint32_t> a, const vec_sse4<uint32_t> b) {
+  return vec_sse4<uint64_t>(_mm_unpackhi_epi32(a.raw, b.raw));
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t> zip_hi(const vec_sse4<int8_t> a,
+                                                    const vec_sse4<int8_t> b) {
+  return vec_sse4<int16_t>(_mm_unpackhi_epi8(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> zip_hi(const vec_sse4<int16_t> a,
+                                                    const vec_sse4<int16_t> b) {
+  return vec_sse4<int32_t>(_mm_unpackhi_epi16(a.raw, b.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t> zip_hi(const vec_sse4<int32_t> a,
+                                                    const vec_sse4<int32_t> b) {
+  return vec_sse4<int64_t>(_mm_unpackhi_epi32(a.raw, b.raw));
+}
+
+// ------------------------------ Parts
+
+// Returns a part with value "t".
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, 1> set_part(
+    Desc<uint16_t, 1, SSE4>, const uint16_t t) {
+  return vec_sse4<uint16_t, 1>(_mm_cvtsi32_si128(t));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, 1> set_part(Desc<int16_t, 1, SSE4>,
+                                                         const int16_t t) {
+  return vec_sse4<int16_t, 1>(_mm_cvtsi32_si128(t));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t, 1> set_part(Desc<uint32_t, 1, SSE4>,
+                                                     const uint32_t t) {
+  return vec_sse4<uint32_t, 1>(_mm_cvtsi32_si128(t));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, 1> set_part(Desc<int32_t, 1, SSE4>,
+                                                    const int32_t t) {
+  return vec_sse4<int32_t, 1>(_mm_cvtsi32_si128(t));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, 1> set_part(Desc<float, 1, SSE4>,
+                                                  const float t) {
+  return vec_sse4<float, 1>(_mm_set_ss(t));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t, 1> set_part(Desc<uint64_t, 1, SSE4>,
+                                                     const uint64_t t) {
+  return vec_sse4<uint64_t, 1>(_mm_cvtsi64_si128(t));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t, 1> set_part(Desc<int64_t, 1, SSE4>,
+                                                    const int64_t t) {
+  return vec_sse4<int64_t, 1>(_mm_cvtsi64_si128(t));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, 1> set_part(Desc<double, 1, SSE4>,
+                                                   const double t) {
+  return vec_sse4<double, 1>(_mm_set_sd(t));
+}
+
+// Gets the single value stored in a vector/part.
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE uint16_t get_part(Desc<uint16_t, 1, SSE4>,
+                                             const vec_sse4<uint16_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw) & 0xFFFF;
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE int16_t get_part(Desc<int16_t, 1, SSE4>,
+                                            const vec_sse4<int16_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw) & 0xFFFF;
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE uint32_t get_part(Desc<uint32_t, 1, SSE4>,
+                                        const vec_sse4<uint32_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw);
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE int32_t get_part(Desc<int32_t, 1, SSE4>,
+                                       const vec_sse4<int32_t, N> v) {
+  return _mm_cvtsi128_si32(v.raw);
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE float get_part(Desc<float, 1, SSE4>,
+                                     const vec_sse4<float, N> v) {
+  return _mm_cvtss_f32(v.raw);
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE uint64_t get_part(Desc<uint64_t, 1, SSE4>,
+                                        const vec_sse4<uint64_t, N> v) {
+  return _mm_cvtsi128_si64(v.raw);
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE int64_t get_part(Desc<int64_t, 1, SSE4>,
+                                       const vec_sse4<int64_t, N> v) {
+  return _mm_cvtsi128_si64(v.raw);
+}
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE double get_part(Desc<double, 1, SSE4>,
+                                      const vec_sse4<double, N> v) {
+  return _mm_cvtsd_f64(v.raw);
+}
+
+// Returns part of a vector (unspecified whether upper or lower).
+template <typename T, size_t N, size_t VN>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> any_part(Desc<T, N, SSE4>,
+                                                   const vec_sse4<T, VN> v) {
+  return vec_sse4<T, N>(v.raw);
+}
+
+// Returns full vector with the given part's lane broadcasted. Note that
+// callers cannot use broadcast directly because part lane order is undefined.
+template <int kLane, typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> broadcast_part(Full<T, SSE4>,
+                                                      const vec_sse4<T, N> v) {
+  static_assert(0 <= kLane && kLane < N, "Invalid lane");
+  return broadcast<kLane>(vec_sse4<T>(v.raw));
+}
+
+// Returns upper/lower half of a vector.
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, 8 / sizeof(T)> get_half(
+    Lower, const vec_sse4<T> v) {
+  return vec_sse4<T, 8 / sizeof(T)>(v.raw);
+}
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, 8 / sizeof(T)> lower_half(
+    const vec_sse4<T> v) {
+  return get_half(Lower(), v);
+}
+
+// These copy hi into lo (smaller instruction encoding than shifts).
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, 8 / sizeof(T)> get_half(
+    Upper, const vec_sse4<T> v) {
+  return vec_sse4<T, 8 / sizeof(T)>(_mm_unpackhi_epi64(v.raw, v.raw));
+}
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, 2> get_half(
+    Upper, const vec_sse4<float> v) {
+  return vec_sse4<float, 2>(_mm_movehl_ps(v.raw, v.raw));
+}
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double, 1> get_half(
+    Upper, const vec_sse4<double> v) {
+  return vec_sse4<double, 1>(_mm_unpackhi_pd(v.raw, v.raw));
+}
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, 8 / sizeof(T)> upper_half(
+    const vec_sse4<T> v) {
+  return get_half(Upper(), v);
+}
+
+// ------------------------------ Blocks
+
+// hiH,hiL loH,loL |-> hiL,loL (= lower halves)
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> concat_lo_lo(const vec_sse4<T> hi,
+                                                    const vec_sse4<T> lo) {
+  const Full<uint64_t, SSE4> d64;
+  return cast_to(Full<T, SSE4>(),
+                 interleave_lo(cast_to(d64, lo), cast_to(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiH,loH (= upper halves)
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> concat_hi_hi(const vec_sse4<T> hi,
+                                                    const vec_sse4<T> lo) {
+  const Full<uint64_t, SSE4> d64;
+  return cast_to(Full<T, SSE4>(),
+                 interleave_hi(cast_to(d64, lo), cast_to(d64, hi)));
+}
+
+// hiH,hiL loH,loL |-> hiL,loH (= inner halves)
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> concat_lo_hi(const vec_sse4<T> hi,
+                                                    const vec_sse4<T> lo) {
+  return combine_shift_right_bytes<8>(hi, lo);
+}
+
+// hiH,hiL loH,loL |-> hiH,loL (= outer halves)
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> concat_hi_lo(const vec_sse4<T> hi,
+                                                    const vec_sse4<T> lo) {
+  return vec_sse4<T>(_mm_blend_epi16(hi.raw, lo.raw, 0x0F));
+}
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> concat_hi_lo(
+    const vec_sse4<float> hi, const vec_sse4<float> lo) {
+  return vec_sse4<float>(_mm_blend_ps(hi.raw, lo.raw, 3));
+}
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> concat_hi_lo(
+    const vec_sse4<double> hi, const vec_sse4<double> lo) {
+  return vec_sse4<double>(_mm_blend_pd(hi.raw, lo.raw, 1));
+}
+
+// ------------------------------ Odd/even lanes
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> odd_even_impl(char (&sizeof_t)[1],
+                                                     const vec_sse4<T> a,
+                                                     const vec_sse4<T> b) {
+  const Full<T, SSE4> d;
+  const Full<uint8_t, SSE4> d8;
+  SIMD_ALIGN constexpr uint8_t mask[16] = {0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0,
+                                           0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0};
+  return select(a, b, cast_to(d, load(d8, mask)));
+}
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> odd_even_impl(char (&sizeof_t)[2],
+                                                     const vec_sse4<T> a,
+                                                     const vec_sse4<T> b) {
+  return vec_sse4<T>(_mm_blend_epi16(a.raw, b.raw, 0x55));
+}
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> odd_even_impl(char (&sizeof_t)[4],
+                                                     const vec_sse4<T> a,
+                                                     const vec_sse4<T> b) {
+  return vec_sse4<T>(_mm_blend_epi16(a.raw, b.raw, 0x33));
+}
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> odd_even_impl(char (&sizeof_t)[8],
+                                                     const vec_sse4<T> a,
+                                                     const vec_sse4<T> b) {
+  return vec_sse4<T>(_mm_blend_epi16(a.raw, b.raw, 0x0F));
+}
+
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T> odd_even(const vec_sse4<T> a,
+                                                const vec_sse4<T> b) {
+  char sizeof_t[sizeof(T)];
+  return odd_even_impl(sizeof_t, a, b);
+}
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float> odd_even<float>(
+    const vec_sse4<float> a, const vec_sse4<float> b) {
+  return vec_sse4<float>(_mm_blend_ps(a.raw, b.raw, 5));
+}
+
+template <>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> odd_even<double>(
+    const vec_sse4<double> a, const vec_sse4<double> b) {
+  return vec_sse4<double>(_mm_blend_pd(a.raw, b.raw, 1));
+}
+
+// ================================================== CONVERT
+
+// ------------------------------ Promotions (part w/ narrow lanes -> full)
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<double> convert_to(
+    Full<double, SSE4>, const vec_sse4<float, 2> v) {
+  return vec_sse4<double>(_mm_cvtps_pd(v.raw));
+}
+
+// Unsigned: zero-extend.
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t> convert_to(
+    Full<uint16_t, SSE4>, const vec_sse4<uint8_t, 8> v) {
+  return vec_sse4<uint16_t>(_mm_cvtepu8_epi16(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> convert_to(
+    Full<uint32_t, SSE4>, const vec_sse4<uint8_t, 4> v) {
+  return vec_sse4<uint32_t>(_mm_cvtepu8_epi32(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t> convert_to(
+    Full<int16_t, SSE4>, const vec_sse4<uint8_t, 8> v) {
+  return vec_sse4<int16_t>(_mm_cvtepu8_epi16(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> convert_to(
+    Full<int32_t, SSE4>, const vec_sse4<uint8_t, 4> v) {
+  return vec_sse4<int32_t>(_mm_cvtepu8_epi32(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> convert_to(
+    Full<uint32_t, SSE4>, const vec_sse4<uint16_t, 4> v) {
+  return vec_sse4<uint32_t>(_mm_cvtepu16_epi32(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> convert_to(
+    Full<int32_t, SSE4>, const vec_sse4<uint16_t, 4> v) {
+  return vec_sse4<int32_t>(_mm_cvtepu16_epi32(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t> convert_to(
+    Full<uint64_t, SSE4>, const vec_sse4<uint32_t, 2> v) {
+  return vec_sse4<uint64_t>(_mm_cvtepu32_epi64(v.raw));
+}
+
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint32_t> u32_from_u8(
+    const vec_sse4<uint8_t> v) {
+  return vec_sse4<uint32_t>(_mm_cvtepu8_epi32(v.raw));
+}
+
+// Signed: replicate sign bit.
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t> convert_to(
+    Full<int16_t, SSE4>, const vec_sse4<int8_t, 8> v) {
+  return vec_sse4<int16_t>(_mm_cvtepi8_epi16(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> convert_to(
+    Full<int32_t, SSE4>, const vec_sse4<int8_t, 4> v) {
+  return vec_sse4<int32_t>(_mm_cvtepi8_epi32(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t> convert_to(
+    Full<int32_t, SSE4>, const vec_sse4<int16_t, 4> v) {
+  return vec_sse4<int32_t>(_mm_cvtepi16_epi32(v.raw));
+}
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int64_t> convert_to(
+    Full<int64_t, SSE4>, const vec_sse4<int32_t, 2> v) {
+  return vec_sse4<int64_t>(_mm_cvtepi32_epi64(v.raw));
+}
+
+// ------------------------------ Demotions (full -> part w/ narrow lanes)
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t, N> convert_to(
+    Part<uint16_t, N, SSE4>, const vec_sse4<int32_t, N> v) {
+  return vec_sse4<uint16_t, N>(_mm_packus_epi32(v.raw, v.raw));
+}
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> convert_to(
+    Part<uint8_t, N, SSE4>, const vec_sse4<int32_t> v) {
+  const __m128i u16 = _mm_packus_epi32(v.raw, v.raw);
+  return vec_sse4<uint8_t, N>(_mm_packus_epi16(u16, u16));
+}
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, N> convert_to(
+    Part<uint8_t, N, SSE4>, const vec_sse4<int16_t> v) {
+  return vec_sse4<uint8_t, N>(_mm_packus_epi16(v.raw, v.raw));
+}
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t, N> convert_to(
+    Part<int16_t, N, SSE4>, const vec_sse4<int32_t> v) {
+  return vec_sse4<int16_t, N>(_mm_packs_epi32(v.raw, v.raw));
+}
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> convert_to(
+    Part<int8_t, N, SSE4>, const vec_sse4<int32_t> v) {
+  const __m128i i16 = _mm_packs_epi32(v.raw, v.raw);
+  return vec_sse4<int8_t, N>(_mm_packs_epi16(i16, i16));
+}
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int8_t, N> convert_to(
+    Part<int8_t, N, SSE4>, const vec_sse4<int16_t> v) {
+  return vec_sse4<int8_t, N>(_mm_packs_epi16(v.raw, v.raw));
+}
+
+// For already range-limited input [0, 255].
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint8_t, 4> u8_from_u32(
+    const vec_sse4<uint32_t> v) {
+  const Full<uint32_t, SSE4> d32;
+  const Full<uint8_t, SSE4> d8;
+  SIMD_ALIGN static constexpr uint32_t k8From32[4] = {0x0C080400u, 0x0C080400u,
+                                                      0x0C080400u, 0x0C080400u};
+  // Replicate bytes into all 32 bit lanes for any_part.
+  const auto quad = table_lookup_bytes(v, load(d32, k8From32));
+  return any_part(Part<uint8_t, 4, SSE4>(), cast_to(d8, quad));
+}
+
+// ------------------------------ Convert i32 <=> f32
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<float, N> convert_to(
+    Part<float, N, SSE4>, const vec_sse4<int32_t, N> v) {
+  return vec_sse4<float, N>(_mm_cvtepi32_ps(v.raw));
+}
+// Truncates (rounds toward zero).
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> convert_to(
+    Part<int32_t, N, SSE4>, const vec_sse4<float, N> v) {
+  return vec_sse4<int32_t, N>(_mm_cvttps_epi32(v.raw));
+}
+
+template <size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int32_t, N> nearest_int(
+    const vec_sse4<float, N> v) {
+  return vec_sse4<int32_t, N>(_mm_cvtps_epi32(v.raw));
+}
+
+// ================================================== MISC
+
+// "Extensions": useful but not quite performance-portable operations. We add
+// functions to this namespace in multiple places.
+namespace ext {
+
+// ------------------------------ movemask
+
+// Returns a bit array of the most significant bit of each byte in "v", i.e.
+// sum_i=0..15 of (v[i] >> 7) << i; v[0] is the least-significant byte of "v".
+// This is useful for testing/branching based on comparison results.
+SIMD_ATTR_SSE4 SIMD_INLINE uint32_t movemask(const vec_sse4<uint8_t> v) {
+  return _mm_movemask_epi8(v.raw);
+}
+
+// Returns the most significant bit of each float/double lane (see above).
+SIMD_ATTR_SSE4 SIMD_INLINE uint32_t movemask(const vec_sse4<float> v) {
+  return _mm_movemask_ps(v.raw);
+}
+SIMD_ATTR_SSE4 SIMD_INLINE uint32_t movemask(const vec_sse4<double> v) {
+  return _mm_movemask_pd(v.raw);
+}
+
+// ------------------------------ all_zero
+
+// Returns whether all lanes are equal to zero. Supported for all integer V.
+template <typename T>
+SIMD_ATTR_SSE4 SIMD_INLINE bool all_zero(const vec_sse4<T> v) {
+  return static_cast<bool>(_mm_testz_si128(v.raw, v.raw));
+}
+
+// ------------------------------ minpos
+
+// Returns index and min value in lanes 1 and 0.
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint16_t> minpos(
+    const vec_sse4<uint16_t> v) {
+  return vec_sse4<uint16_t>(_mm_minpos_epu16(v.raw));
+}
+
+// ------------------------------ Horizontal sum (reduction)
+
+// Returns 64-bit sums of 8-byte groups.
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<uint64_t> sums_of_u8x8(
+    const vec_sse4<uint8_t> v) {
+  return vec_sse4<uint64_t>(_mm_sad_epu8(v.raw, _mm_setzero_si128()));
+}
+
+// Returns N sums of differences of byte quadruplets, starting from byte offset
+// i = [0, N) in window (11 consecutive bytes) and idx_ref * 4 in ref.
+template <int idx_ref>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<int16_t> mpsadbw(
+    const vec_sse4<uint8_t> window, const vec_sse4<uint8_t> ref) {
+  return vec_sse4<int16_t>(_mm_mpsadbw_epu8(window.raw, ref.raw, idx_ref));
+}
+
+// For u32/i32/f32.
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> horz_sum_impl(
+    char (&sizeof_t)[4], const vec_sse4<T, N> v3210) {
+  const vec_sse4<T> v1032 = shuffle_1032(v3210);
+  const vec_sse4<T> v31_20_31_20 = v3210 + v1032;
+  const vec_sse4<T> v20_31_20_31 = shuffle_0321(v31_20_31_20);
+  return v20_31_20_31 + v31_20_31_20;
+}
+
+// For u64/i64/f64.
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> horz_sum_impl(
+    char (&sizeof_t)[8], const vec_sse4<T, N> v10) {
+  const vec_sse4<T> v01 = shuffle_01(v10);
+  return v10 + v01;
+}
+
+// Supported for u/i/f 32/64. Returns the sum in each lane.
+template <typename T, size_t N>
+SIMD_ATTR_SSE4 SIMD_INLINE vec_sse4<T, N> sum_of_lanes(const vec_sse4<T, N> v) {
+  char sizeof_t[sizeof(T)];
+  return horz_sum_impl(sizeof_t, v);
+}
+
+}  // namespace ext
+
+// TODO(janwas): wrappers for all intrinsics (in x86 namespace).
+}  // namespace pik
+
+#endif  // SIMD_ENABLE & SIMD_SSE4
+#endif  // PIK_SIMD_X86_SSE4_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/single_image_handler.cc b/codec/L2/demos/pikEnc/host/pik/single_image_handler.cc
new file mode 100755
index 0000000000..f768c2224d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/single_image_handler.cc
@@ -0,0 +1,177 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/single_image_handler.h"
+#include "pik/ac_strategy.h"
+#include "pik/adaptive_quantization.h"
+#include "pik/codec.h"
+#include "pik/color_correlation.h"
+#include "pik/common.h"
+#include "pik/image.h"
+#include "pik/opsin_image.h"
+#include "pik/opsin_inverse.h"
+#include "pik/pik_params.h"
+#include "pik/profiler.h"
+#include "pik/quantizer.h"
+
+namespace pik {
+
+MultipassHandler* SingleImageManager::GetGroupHandler(size_t group_id,
+                                                      const Rect& group_rect) {
+  if (group_handlers_.size() <= group_id) {
+    group_handlers_.resize(group_id + 1);
+  }
+  if (!group_handlers_[group_id]) {
+    group_handlers_[group_id].reset(
+        new SingleImageHandler(this, group_rect, mode_));
+  }
+  return group_handlers_[group_id].get();
+}
+
+float SingleImageManager::BlockSaliency(size_t row, size_t col) const {
+  auto saliency_map = saliency_map_.get();
+  if (saliency_map == nullptr) return 0.0f;
+  return saliency_map->Row(row)[col];
+}
+
+void SingleImageManager::GetColorCorrelationMap(const Image3F& opsin,
+                                                const DequantMatrices& dequant,
+                                                ColorCorrelationMap* cmap) {
+  if (!has_cmap_) {
+    cmap_ = std::move(*cmap);
+    FindBestColorCorrelationMap(opsin, dequant, &cmap_);
+    has_cmap_ = true;
+  }
+  *cmap = cmap_.Copy();
+}
+
+BlockDictionary SingleImageManager::GetBlockDictionary(
+    double butteraugli_target, const Image3F& opsin) {
+  return FindBestBlockDictionary(butteraugli_target, opsin);
+}
+
+void SingleImageManager::GetAcStrategy(float butteraugli_target,
+                                       const ImageF* quant_field,
+                                       const DequantMatrices& dequant,
+                                       const Image3F& src, ThreadPool* pool,
+                                       AcStrategyImage* ac_strategy,
+                                       PikInfo* aux_out) {
+  if (!has_ac_strategy_) {
+    FindBestAcStrategy(butteraugli_target, quant_field, dequant, src, pool,
+                       &ac_strategy_, aux_out);
+    has_ac_strategy_ = true;
+  }
+  *ac_strategy = ac_strategy_.Copy();
+}
+
+std::shared_ptr<Quantizer> SingleImageManager::GetQuantizer(
+    const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+    const Image3F& opsin_orig, const Image3F& opsin,
+    const FrameHeader& frame_header, const GroupHeader& header,
+    const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+    const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+    const DequantMatrices* dequant, const ImageB& dequant_control_field,
+    const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+    ImageF& quant_field, PikInfo* aux_out) {
+  if (!has_quantizer_) {
+    FrameHeader hdr = frame_header;
+    if (use_adaptive_reconstruction_) {
+      hdr.have_adaptive_reconstruction = true;
+    }
+    quantizer_ = FindBestQuantizer(
+        cparams, xsize_blocks, ysize_blocks, opsin_orig, opsin, hdr, header,
+        cmap, block_dictionary, ac_strategy, ar_sigma_lut_ids, dequant,
+        dequant_control_field, dequant_map, quant_field, aux_out, this);
+    has_quantizer_ = true;
+  }
+  return quantizer_;
+}
+
+std::shared_ptr<Quantizer> SingleImageManager::GetQuantizerAvg(float avg, float absavg,
+    const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+    const Image3F& opsin_orig, const Image3F& opsin,
+    const FrameHeader& frame_header, const GroupHeader& header,
+    const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+    const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+    const DequantMatrices* dequant, const ImageB& dequant_control_field,
+    const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+    ImageF& quant_field, PikInfo* aux_out) {
+  if (!has_quantizer_) {
+    FrameHeader hdr = frame_header;
+    if (use_adaptive_reconstruction_) {
+      hdr.have_adaptive_reconstruction = true;
+    }
+    quantizer_ = FindBestQuantizerAvg(avg,absavg,
+        cparams, xsize_blocks, ysize_blocks, opsin_orig, opsin, hdr, header,
+        cmap, block_dictionary, ac_strategy, ar_sigma_lut_ids, dequant,
+        dequant_control_field, dequant_map, quant_field, aux_out, this);
+    has_quantizer_ = true;
+  }
+  return quantizer_;
+}
+
+std::vector<Image3S> SingleImageHandler::SplitACCoefficients(
+    Image3S&& ac, const AcStrategyImage& ac_strategy) {
+  if (mode_.num_passes == 1) {
+    PIK_ASSERT(mode_.passes[0].num_coefficients == 8);
+    PIK_ASSERT(!mode_.passes[0].salient_only);
+    std::vector<Image3S> ret;
+    ret.push_back(std::move(ac));
+    return ret;
+  }
+
+  size_t xsize_blocks = ac.xsize() / (kBlockDim * kBlockDim);
+  size_t ysize_blocks = ac.ysize();
+
+  size_t last_ncoeff = 1;
+  size_t last_salient_only = false;
+  std::vector<Image3S> ac_split;
+
+  // TODO(veluca): handle saliency.
+  for (size_t i = 0; i < mode_.num_passes; i++) {
+    ac_split.emplace_back(ac.xsize(), ac.ysize());
+    Image3S* current = &ac_split.back();
+    ZeroFillImage(current);
+    size_t stride = current->PixelsPerRow();
+    size_t frame_coeffs = mode_.passes[i].num_coefficients;
+    for (size_t c = 0; c < ac.kNumPlanes; c++) {
+      for (size_t by = 0; by < ysize_blocks; by++) {
+        const int16_t* PIK_RESTRICT row_in = ac.ConstPlaneRow(c, by);
+        AcStrategyRow row_strategy = ac_strategy.ConstRow(by);
+        int16_t* PIK_RESTRICT row_out = current->PlaneRow(c, by);
+        for (size_t bx = 0; bx < xsize_blocks; bx++) {
+          AcStrategy strategy = row_strategy[bx];
+          if (!strategy.IsFirstBlock()) continue;
+          size_t xsize = strategy.covered_blocks_x();
+          size_t ysize = strategy.covered_blocks_y();
+          size_t block_shift =
+              NumZeroBitsBelowLSBNonzero(kBlockDim * kBlockDim * xsize);
+          for (size_t y = 0; y < ysize * frame_coeffs; y++) {
+            size_t line_start = y * xsize * kBlockDim;
+            size_t block_off = line_start >> block_shift;
+            size_t block_idx = line_start & (xsize * kBlockDim * kBlockDim - 1);
+            line_start = block_off * stride + block_idx;
+            for (size_t x = 0; x < xsize * frame_coeffs; x++) {
+              if (x < xsize * last_ncoeff && y < ysize * last_ncoeff) continue;
+              row_out[bx * kBlockDim * kBlockDim + line_start + x] =
+                  row_in[bx * kBlockDim * kBlockDim + line_start + x];
+            }
+          }
+        }
+      }
+    }
+    last_ncoeff = frame_coeffs;
+    last_salient_only = mode_.passes[i].salient_only;
+  }
+  PIK_ASSERT(last_ncoeff == 8);
+  PIK_ASSERT(last_salient_only == false);
+
+  // Saved saliency code. TODO(veluca): integrate
+  return ac_split;
+}
+
+MultipassManager* SingleImageHandler::Manager() { return manager_; }
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/single_image_handler.h b/codec/L2/demos/pikEnc/host/pik/single_image_handler.h
new file mode 100755
index 0000000000..77c7434cc5
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/single_image_handler.h
@@ -0,0 +1,206 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SINGLE_PASS_HANDLER_H_
+#define PIK_SINGLE_PASS_HANDLER_H_
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "pik/ac_strategy.h"
+#include "pik/adaptive_quantization.h"
+#include "pik/codec.h"
+#include "pik/color_correlation.h"
+#include "pik/image.h"
+#include "pik/multipass_handler.h"
+
+// A multipass handler/manager to encode single images. It will run heuristics
+// for quantization, AC strategy and color correlation map only the first time
+// we want to encode a lossy pass, and will then re-use the existing heuristics
+// for further passes. All the passes of a single image are added together.
+
+namespace pik {
+
+constexpr size_t kMaxNumPasses = 8;
+constexpr size_t kNoDownsamplingFactor = std::numeric_limits<size_t>::max();
+
+struct PassDefinition {
+  // Side of the square of the coefficients that should be kept in each 8x8
+  // block. Must be greater than 1, and at most 8. Should be in non-decreasing
+  // order.
+  size_t num_coefficients;
+  // Whether or not we should include only salient blocks.
+  // TODO(veluca): ignored for now.
+  bool salient_only;
+
+  // If specified, this indicates that if the requested downsampling factor is
+  // sufficiently high, then it is fine to stop decoding after this pass.
+  // By default, passes are not marked as being suitable for any downsampling.
+  size_t suitable_for_downsampling_factor_of_at_least;
+};
+
+struct ProgressiveMode {
+  size_t num_passes = 1;
+  PassDefinition passes[kMaxNumPasses] = {
+      PassDefinition{/*num_coefficients=*/8, /*salient_only=*/false,
+                     /*suitable_for_downsampling_factor_of_at_least=*/1}};
+
+  ProgressiveMode() {}
+
+  template <size_t nump>
+  ProgressiveMode(const PassDefinition (&p)[nump]) {
+    PIK_ASSERT(nump <= kMaxNumPasses);
+    num_passes = nump;
+    PassDefinition previous_pass{
+        /*num_coefficients=*/1,
+        /*salient_only=*/false,
+        /*suitable_for_downsampling_factor_of_at_least=*/kNoDownsamplingFactor};
+    size_t last_downsampling_factor = std::numeric_limits<size_t>::max();
+    for (size_t i = 0; i < nump; i++) {
+      PIK_ASSERT(p[i].num_coefficients > previous_pass.num_coefficients ||
+                 (p[i].num_coefficients == previous_pass.num_coefficients &&
+                  !p[i].salient_only && previous_pass.salient_only));
+      PIK_ASSERT(p[i].suitable_for_downsampling_factor_of_at_least ==
+                     std::numeric_limits<size_t>::max() ||
+                 p[i].suitable_for_downsampling_factor_of_at_least <=
+                     last_downsampling_factor);
+      if (p[i].suitable_for_downsampling_factor_of_at_least !=
+          std::numeric_limits<size_t>::max()) {
+        last_downsampling_factor =
+            p[i].suitable_for_downsampling_factor_of_at_least;
+      }
+      previous_pass = passes[i] = p[i];
+    }
+  }
+};
+
+class SingleImageManager;
+
+class SingleImageHandler : public MultipassHandler {
+ public:
+  SingleImageHandler(SingleImageManager* manager, const Rect& group_rect,
+                     ProgressiveMode mode)
+      : manager_(manager),
+        group_rect_(group_rect),
+        padded_group_rect_(group_rect.x0(), group_rect.y0(),
+                           DivCeil(group_rect.xsize(), kBlockDim) * kBlockDim,
+                           DivCeil(group_rect.ysize(), kBlockDim) * kBlockDim),
+        mode_(mode) {}
+
+  const Rect& GroupRect() override { return group_rect_; }
+  const Rect& PaddedGroupRect() override { return padded_group_rect_; };
+
+  std::vector<Image3S> SplitACCoefficients(
+      Image3S&& ac, const AcStrategyImage& ac_strategy) override;
+
+  MultipassManager* Manager() override;
+
+ private:
+  SingleImageManager* manager_;
+  const Rect group_rect_;
+  const Rect padded_group_rect_;
+  ProgressiveMode mode_;
+};
+
+// A MultipassManager for single images.
+class SingleImageManager : public MultipassManager {
+ public:
+  SingleImageManager() { group_handlers_.reserve(16); }
+
+  void StartPass(const FrameHeader& frame_header) override {
+    current_header_ = frame_header;
+  }
+
+  void SetDecodedPass(const Image3F& opsin) override {}
+  void SetDecodedPass(CodecInOut* io) override {}
+  void DecorrelateOpsin(Image3F* img) override {}
+  void RestoreOpsin(Image3F* img) override {}
+
+  void SetProgressiveMode(ProgressiveMode mode) { mode_ = mode; }
+
+  void SetSaliencyMap(std::shared_ptr<ImageF> saliency_map) {
+    saliency_map_ = saliency_map;
+  }
+
+  void UseAdaptiveReconstruction() override {
+    use_adaptive_reconstruction_ = true;
+  }
+
+  MultipassHandler* GetGroupHandler(size_t group_id,
+                                    const Rect& group_rect) override;
+
+  BlockDictionary GetBlockDictionary(double butteraugli_target,
+                                     const Image3F& opsin) override;
+
+  void GetColorCorrelationMap(const Image3F& opsin,
+                              const DequantMatrices& dequant,
+                              ColorCorrelationMap* cmap) override;
+
+  void GetAcStrategy(float butteraugli_target, const ImageF* quant_field,
+                     const DequantMatrices& dequant, const Image3F& src,
+                     ThreadPool* pool, AcStrategyImage* ac_strategy,
+                     PikInfo* aux_out) override;
+
+  std::shared_ptr<Quantizer> GetQuantizer(
+      const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+      const Image3F& opsin_orig, const Image3F& opsin,
+      const FrameHeader& frame_header, const GroupHeader& header,
+      const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+      const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+      const DequantMatrices* dequant, const ImageB& dequant_control_field,
+      const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+      ImageF& quant_field, PikInfo* aux_out) override;
+
+  std::shared_ptr<Quantizer> GetQuantizerAvg(float avg, float absavg,
+      const CompressParams& cparams, size_t xsize_blocks, size_t ysize_blocks,
+      const Image3F& opsin_orig, const Image3F& opsin,
+      const FrameHeader& frame_header, const GroupHeader& header,
+      const ColorCorrelationMap& cmap, const BlockDictionary& block_dictionary,
+      const AcStrategyImage& ac_strategy, const ImageB& ar_sigma_lut_ids,
+      const DequantMatrices* dequant, const ImageB& dequant_control_field,
+      const uint8_t dequant_map[kMaxQuantControlFieldValue][256],
+      ImageF& quant_field, PikInfo* aux_out) override;
+
+  size_t GetNumPasses() override { return mode_.num_passes; }
+  std::vector<std::pair<uint32_t, uint32_t>> GetDownsamplingToNumPasses()
+      override {
+    std::vector<std::pair<uint32_t, uint32_t>> result;
+    for (int i = 0; i < mode_.num_passes - 1; ++i) {
+      const auto min_downsampling_factor =
+          mode_.passes[i].suitable_for_downsampling_factor_of_at_least;
+      if (1 < min_downsampling_factor &&
+          min_downsampling_factor < std::numeric_limits<size_t>::max()) {
+        result.emplace_back(min_downsampling_factor, i);
+      }
+    }
+    return result;
+  }
+
+ private:
+  friend class SingleImageHandler;
+
+  float BlockSaliency(size_t row, size_t col) const;
+
+  FrameHeader current_header_;
+  ProgressiveMode mode_;
+  bool use_adaptive_reconstruction_ = false;
+
+  std::shared_ptr<ImageF> saliency_map_;
+
+  std::shared_ptr<Quantizer> quantizer_;
+  bool has_quantizer_ = false;
+  ColorCorrelationMap cmap_;
+  bool has_cmap_ = false;
+  AcStrategyImage ac_strategy_;
+  bool has_ac_strategy_ = false;
+
+  std::vector<std::unique_ptr<SingleImageHandler>> group_handlers_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_SINGLE_PASS_HANDLER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/size_coder.h b/codec/L2/demos/pikEnc/host/pik/size_coder.h
new file mode 100755
index 0000000000..73ab785c59
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/size_coder.h
@@ -0,0 +1,37 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_SIZE_CODER_H_
+#define PIK_SIZE_CODER_H_
+
+#include <cstddef>
+#include <cstdint>
+#include "pik/fields.h"
+#include "pik/status.h"
+
+namespace pik {
+
+template <uint32_t kDistribution>
+class SizeCoderT {
+ public:
+  static size_t MaxSize(const size_t num_sizes) {
+    const size_t bits = U32Coder::MaxEncodedBits(kDistribution) * num_sizes;
+    return DivCeil(bits, kBitsPerByte);
+  }
+
+  static void Encode(const size_t size, size_t* PIK_RESTRICT pos,
+                     uint8_t* storage) {
+    PIK_CHECK(U32Coder::Write(kDistribution, size, pos, storage));
+  }
+
+  static size_t Decode(BitReader* reader) {
+    return U32Coder::Read(kDistribution, reader);
+  }
+};
+
+}  // namespace pik
+
+#endif  // PIK_SIZE_CODER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/status.cc b/codec/L2/demos/pikEnc/host/pik/status.cc
new file mode 100755
index 0000000000..fd217e5b28
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/status.cc
@@ -0,0 +1,37 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/status.h"
+
+#include <stdarg.h>
+#include <string>
+
+namespace pik {
+
+bool Abort(const char* f, int l, const char* format, ...) {
+  char buf[2000];
+  va_list args;
+  va_start(args, format);
+  vsnprintf(buf, sizeof(buf), format, args);
+  va_end(args);
+
+  const std::string call_stack;
+
+  fprintf(stderr, "Abort at %s:%d: %s\n%s\n", f, l, buf, call_stack.c_str());
+  exit(1);
+  return false;
+}
+
+void Warning(const char* format, ...) {
+  char buf[2000];
+  va_list args;
+  va_start(args, format);
+  vsnprintf(buf, sizeof(buf), format, args);
+  va_end(args);
+  fprintf(stderr, "%s\n", buf);
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/status.h b/codec/L2/demos/pikEnc/host/pik/status.h
new file mode 100755
index 0000000000..744cad4653
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/status.h
@@ -0,0 +1,80 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_STATUS_H_
+#define PIK_STATUS_H_
+
+// Error handling: Status return type + helper macros.
+
+#include <cstdio>
+#include <cstdlib>
+
+#include "pik/compiler_specific.h"
+
+namespace pik {
+
+#ifndef PIK_ENABLE_ASSERT
+#define PIK_ENABLE_ASSERT 1
+#endif
+
+// Exits the program after printing file/line plus a formatted string.
+PIK_FORMAT(3, 4) bool Abort(const char* f, int l, const char* format, ...);
+
+// Emits a warning to standard error. Will be replaced with proper error
+// reporting in the future.
+PIK_FORMAT(1, 2) void Warning(const char* format, ...);
+
+#define PIK_ABORT(...) Abort(__FILE__, __LINE__, __VA_ARGS__)
+
+// Does not guarantee running the code, use only for debug mode checks.
+#if PIK_ENABLE_ASSERT || defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
+#define PIK_ASSERT(condition)                           \
+  while (!(condition)) {                                \
+    Abort(__FILE__, __LINE__, "Assert %s", #condition); \
+  }
+#else
+#define PIK_ASSERT(condition)
+#endif
+
+// Always runs the condition, so can be used for non-debug calls.
+#define PIK_CHECK(condition)                           \
+  while (!(condition)) {                               \
+    Abort(__FILE__, __LINE__, "Check %s", #condition); \
+  }
+
+// Always runs the condition, so can be used for non-debug calls.
+#define PIK_RETURN_IF_ERROR(condition) \
+  while (!(condition)) return false
+
+// Annotation for the location where an error condition is first noticed.
+// Error codes are too unspecific to pinpoint the exact location, so we
+// add a build flag that crashes and dumps stack at the actual error source.
+#ifdef PIK_CRASH_ON_ERROR
+#define PIK_NOTIFY_ERROR(message_string) \
+  (void)Abort(__FILE__, __LINE__, message_string)
+#define PIK_FAILURE(...) Abort(__FILE__, __LINE__, __VA_ARGS__)
+#else
+#define PIK_NOTIFY_ERROR(message_string)
+#define PIK_FAILURE(...) false
+#endif
+
+// Drop-in replacement for bool that raises compiler warnings if not used
+// after being returned from a function. Example:
+// Status LoadFile(...) { return true; } is more compact than
+// bool PIK_MUST_USE_RESULT LoadFile(...) { return true; }
+class PIK_MUST_USE_RESULT Status {
+ public:
+  Status(bool ok) : ok_(ok) {}
+
+  operator bool() const { return ok_; }
+
+ private:
+  bool ok_;
+};
+
+}  // namespace pik
+
+#endif  // PIK_STATUS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/testdata_path.h b/codec/L2/demos/pikEnc/host/pik/testdata_path.h
new file mode 100755
index 0000000000..6d61e9052c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/testdata_path.h
@@ -0,0 +1,21 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_TESTDATA_PATH_H_
+#define PIK_TESTDATA_PATH_H_
+
+#include <string>
+
+
+namespace pik {
+
+static inline std::string GetTestDataPath(const std::string& filename) {
+  return std::string(TEST_DATA_PATH "/") + filename;
+}
+
+}  // namespace pik
+
+#endif  // PIK_TESTDATA_PATH_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/tsc_timer.h b/codec/L2/demos/pikEnc/host/pik/tsc_timer.h
new file mode 100755
index 0000000000..3aeb1714b6
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/tsc_timer.h
@@ -0,0 +1,133 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_TSC_TIMER_H_
+#define PIK_TSC_TIMER_H_
+
+// High-resolution (~10 ns) timestamps, using fences to prevent reordering and
+// ensure exactly the desired regions are measured.
+
+#include <stdint.h>
+#include <time.h>
+
+#include "pik/arch_specific.h"
+#include "pik/compiler_specific.h"
+#include "pik/simd/simd.h"
+
+namespace pik {
+
+// TicksBefore/After return absolute timestamps and must be placed immediately
+// before and after the region to measure. The functions are distinct because
+// they use different fences.
+//
+// Background: RDTSC is not 'serializing'; earlier instructions may complete
+// after it, and/or later instructions may complete before it. 'Fences' ensure
+// regions' elapsed times are independent of such reordering. The only
+// documented unprivileged serializing instruction is CPUID, which acts as a
+// full fence (no reordering across it in either direction). Unfortunately
+// the latency of CPUID varies wildly (perhaps made worse by not initializing
+// its EAX input). Because it cannot reliably be deducted from the region's
+// elapsed time, it must not be included in the region to measure (i.e.
+// between the two RDTSC).
+//
+// The newer RDTSCP is sometimes described as serializing, but it actually
+// only serves as a half-fence with release semantics. Although all
+// instructions in the region will complete before the final timestamp is
+// captured, subsequent instructions may leak into the region and increase the
+// elapsed time. Inserting another fence after the final RDTSCP would prevent
+// such reordering without affecting the measured region.
+//
+// Fortunately, such a fence exists. The LFENCE instruction is only documented
+// to delay later loads until earlier loads are visible. However, Intel's
+// reference manual says it acts as a full fence (waiting until all earlier
+// instructions have completed, and delaying later instructions until it
+// completes). AMD assigns the same behavior to MFENCE.
+//
+// We need a fence before the initial RDTSC to prevent earlier instructions
+// from leaking into the region, and arguably another after RDTSC to avoid
+// region instructions from completing before the timestamp is recorded.
+// When surrounded by fences, the additional RDTSCP half-fence provides no
+// benefit, so the initial timestamp can be recorded via RDTSC, which has
+// lower overhead than RDTSCP because it does not read TSC_AUX. In summary,
+// we define Before = LFENCE/RDTSC/LFENCE; After = RDTSCP/LFENCE.
+//
+// Using Before+Before leads to higher variance and overhead than After+After.
+// However, After+After includes an LFENCE in the region measurements, which
+// adds a delay dependent on earlier loads. The combination of Before+After
+// is faster than Before+Before and more consistent than Stop+Stop because
+// the first LFENCE already delayed subsequent loads before the measured
+// region. This combination seems not to have been considered in prior work:
+// http://akaros.cs.berkeley.edu/lxr/akaros/kern/arch/x86/rdtsc_test.c
+//
+// Note: performance counters can measure 'exact' instructions-retired or
+// (unhalted) cycle counts. The RDPMC instruction is not serializing and also
+// requires fences. Unfortunately, it is not accessible on all OSes and we
+// prefer to avoid kernel-mode drivers. Performance counters are also affected
+// by several under/over-count errata, so we use the TSC instead.
+
+// Returns a 64-bit timestamp in unit of 'ticks'; to convert to seconds,
+// divide by InvariantTicksPerSecond. Although 32-bit ticks are faster to read,
+// they overflow too quickly to measure long regions.
+static inline uint64_t TicksBefore() {
+  uint64_t t;
+#if PIK_ARCH_PPC
+  asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268));
+#elif PIK_ARCH_X64 && PIK_COMPILER_MSVC
+  load_fence();
+  PIK_COMPILER_FENCE;
+  t = __rdtsc();
+  load_fence();
+  PIK_COMPILER_FENCE;
+#elif PIK_ARCH_X64 && (PIK_COMPILER_CLANG || PIK_COMPILER_GCC)
+  asm volatile(
+      "lfence\n\t"
+      "rdtsc\n\t"
+      "shl $32, %%rdx\n\t"
+      "or %%rdx, %0\n\t"
+      "lfence"
+      : "=a"(t)
+      :
+      // "memory" avoids reordering. rdx = TSC >> 32.
+      // "cc" = flags modified by SHL.
+      : "rdx", "memory", "cc");
+#else
+  // Fall back to OS - unsure how to reliably query cntvct_el0 frequency.
+  timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  t = ts.tv_sec * 1000000000LL + ts.tv_nsec;
+#endif
+  return t;
+}
+
+static inline uint64_t TicksAfter() {
+  uint64_t t;
+#if PIK_ARCH_X64 && PIK_COMPILER_MSVC
+  PIK_COMPILER_FENCE;
+  unsigned aux;
+  t = __rdtscp(&aux);
+  load_fence();
+  PIK_COMPILER_FENCE;
+#elif PIK_ARCH_X64 && (PIK_COMPILER_CLANG || PIK_COMPILER_GCC)
+  // Use inline asm because __rdtscp generates code to store TSC_AUX (ecx).
+  asm volatile(
+      "rdtscp\n\t"
+      "shl $32, %%rdx\n\t"
+      "or %%rdx, %0\n\t"
+      "lfence"
+      : "=a"(t)
+      :
+      // "memory" avoids reordering. rcx = TSC_AUX. rdx = TSC >> 32.
+      // "cc" = flags modified by SHL.
+      : "rcx", "rdx", "memory", "cc");
+#else
+  t = TicksBefore();  // no difference on other platforms.
+#endif
+  return t;
+}
+
+}  // namespace pik
+
+#endif  // PIK_TSC_TIMER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/upscaler.cc b/codec/L2/demos/pikEnc/host/pik/upscaler.cc
new file mode 100755
index 0000000000..5032e670bb
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/upscaler.cc
@@ -0,0 +1,115 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/upscaler.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <cmath>
+#include <vector>
+#include "pik/compiler_specific.h"
+#include "pik/image.h"
+#include "pik/resample.h"
+
+namespace pik {
+
+namespace {
+
+std::vector<float> ComputeKernel(float sigma) {
+  // Filtering becomes slower, but more Gaussian when m is increased.
+  // More Gaussian doesn't mean necessarily better results altogether.
+  const float m = 2.5;
+  const float scaler = -1.0 / (2 * sigma * sigma);
+  const int diff = std::max<int>(1, m * fabs(sigma));
+  std::vector<float> kernel(2 * diff + 1);
+  for (int i = -diff; i <= diff; ++i) {
+    kernel[i + diff] = exp(scaler * i * i);
+  }
+  return kernel;
+}
+
+void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel,
+                          const float weight_no_border,
+                          const float border_ratio, const size_t x,
+                          float* const PIK_RESTRICT row_out) {
+  const int offset = kernel.size() / 2;
+  int minx = x < offset ? 0 : x - offset;
+  int maxx = std::min<int>(in.xsize() - 1, x + offset);
+  float weight = 0.0f;
+  for (int j = minx; j <= maxx; ++j) {
+    weight += kernel[j - x + offset];
+  }
+  // Interpolate linearly between the no-border scaling and border scaling.
+  weight = (1.0f - border_ratio) * weight + border_ratio * weight_no_border;
+  float scale = 1.0f / weight;
+  for (size_t y = 0; y < in.ysize(); ++y) {
+    const float* const PIK_RESTRICT row_in = in.Row(y);
+    float sum = 0.0f;
+    for (int j = minx; j <= maxx; ++j) {
+      sum += row_in[j] * kernel[j - x + offset];
+    }
+    row_out[y] = sum * scale;
+  }
+}
+
+// Computes a horizontal convolution and transposes the result.
+ImageF Convolution(const ImageF& in, const std::vector<float>& kernel,
+                   const float border_ratio) {
+  ImageF out(in.ysize(), in.xsize());
+  const int len = kernel.size();
+  const int offset = kernel.size() / 2;
+  float weight_no_border = 0.0f;
+  for (int j = 0; j < len; ++j) {
+    weight_no_border += kernel[j];
+  }
+  float scale_no_border = 1.0f / weight_no_border;
+  const int border1 = in.xsize() <= offset ? in.xsize() : offset;
+  const int border2 = in.xsize() - offset;
+  int x = 0;
+  // left border
+  for (; x < border1; ++x) {
+    ConvolveBorderColumn(in, kernel, weight_no_border, border_ratio, x,
+                         out.Row(x));
+  }
+  // middle
+  for (; x < border2; ++x) {
+    float* const PIK_RESTRICT row_out = out.Row(x);
+    for (size_t y = 0; y < in.ysize(); ++y) {
+      const float* const PIK_RESTRICT row_in = &in.Row(y)[x - offset];
+      float sum = 0.0f;
+      for (int j = 0; j < len; ++j) {
+        sum += row_in[j] * kernel[j];
+      }
+      row_out[y] = sum * scale_no_border;
+    }
+  }
+  // right border
+  for (; x < in.xsize(); ++x) {
+    ConvolveBorderColumn(in, kernel, weight_no_border, border_ratio, x,
+                         out.Row(x));
+  }
+  return out;
+}
+
+// A blur somewhat similar to a 2D Gaussian blur.
+// See: https://en.wikipedia.org/wiki/Gaussian_blur
+ImageF Blur(const ImageF& in, float sigma, float border_ratio) {
+  std::vector<float> kernel = ComputeKernel(sigma);
+  return Convolution(Convolution(in, kernel, border_ratio), kernel,
+                     border_ratio);
+}
+
+}  // namespace
+
+Image3F Blur(const Image3F& image, float sigma) {
+  float border = 0.0;
+  return Image3F(Blur(image.Plane(0), sigma, border),
+                 Blur(image.Plane(1), sigma, border),
+                 Blur(image.Plane(2), sigma, border));
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/upscaler.h b/codec/L2/demos/pikEnc/host/pik/upscaler.h
new file mode 100755
index 0000000000..cbc52e81b7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/upscaler.h
@@ -0,0 +1,18 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_UPSCALER_H_
+#define PIK_UPSCALER_H_
+
+#include "pik/image.h"
+
+namespace pik {
+
+Image3F Blur(const Image3F& image, float sigma);
+
+}  // namespace pik
+
+#endif  // PIK_UPSCALER_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/write_bits.h b/codec/L2/demos/pikEnc/host/pik/write_bits.h
new file mode 100755
index 0000000000..422efd3281
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/write_bits.h
@@ -0,0 +1,112 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_WRITE_BITS_H_
+#define PIK_WRITE_BITS_H_
+
+// Unbuffered writes to the bitstream using unaligned 64-bit stores.
+
+#include <stdint.h>
+#include <string.h>  // memcpy
+#include <cstddef>
+
+#include "pik/arch_specific.h"
+#include "pik/byte_order.h"
+#include "pik/compiler_specific.h"
+#include "pik/status.h"
+
+namespace pik {
+
+// This function writes bits into bytes in increasing addresses, and within
+// a byte least-significant-bit first.
+//
+// The function can write up to 56 bits in one go with WriteBits
+// Example: let's assume that 3 bits (Rs below) have been written already:
+//
+// BYTE-0     BYTE+1       BYTE+2
+//
+// 0000 0RRR    0000 0000    0000 0000
+//
+// Now, we could write 5 or less bits in MSB by just shifting by 3
+// and OR'ing to BYTE-0.
+//
+// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+// and locate the rest in BYTE+1, BYTE+2, etc.
+PIK_INLINE void WriteBits(const size_t n_bits, uint64_t bits,
+                          size_t* PIK_RESTRICT pos,
+                          uint8_t* PIK_RESTRICT array) {
+  PIK_ASSERT((bits >> n_bits) == 0);
+  PIK_ASSERT(n_bits <= 56);
+#if PIK_BYTE_ORDER_LITTLE
+  // This branch of the code can write up to 56 bits at a time,
+  // 7 bits are lost by being perhaps already in *p and at least
+  // 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+  // bits are in *p and we write 57 bits, then the next write will
+  // access a byte that was never initialized).
+  uint8_t* p = &array[*pos >> 3];
+  uint64_t v = *p;
+  v |= bits << (*pos & 7);
+  memcpy(p, &v, sizeof(v));  // Write bytes: possibly more than n_bits/8
+  *pos += n_bits;
+#else
+  // implicit & 0xff is assumed for uint8_t arithmetics
+  uint8_t* array_pos = &array[*pos >> 3];
+  const size_t bits_reserved_in_first_byte = (*pos & 7);
+  bits <<= bits_reserved_in_first_byte;
+  *array_pos++ |= static_cast<uint8_t>(bits);
+  for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
+       bits_left_to_write >= 9; bits_left_to_write -= 8) {
+    bits >>= 8;
+    *array_pos++ = static_cast<uint8_t>(bits);
+  }
+  *array_pos = 0;
+  *pos += n_bits;
+#endif
+}
+
+PIK_INLINE void WriteZeroesToByteBoundary(size_t* PIK_RESTRICT pos,
+                                          uint8_t* PIK_RESTRICT array) {
+  const size_t nbits = ((*pos + 7) & ~7) - *pos;
+  WriteBits(nbits, 0, pos, array);
+  PIK_ASSERT(*pos % 8 == 0);
+}
+
+PIK_INLINE void WriteBitsPrepareStorage(size_t pos, uint8_t* array) {
+  PIK_ASSERT((pos & 7) == 0);
+  array[pos >> 3] = 0;
+}
+
+PIK_INLINE void RewindStorage(const size_t pos0, size_t* PIK_RESTRICT pos,
+                              uint8_t* PIK_RESTRICT array) {
+  PIK_ASSERT(pos0 <= *pos);
+  *pos = pos0;
+  static const uint8_t kRewindMasks[8] = {0x0, 0x1,  0x3,  0x7,
+                                          0xf, 0x1f, 0x3f, 0x7f};
+  array[pos0 >> 3] &= kRewindMasks[pos0 & 7];
+}
+
+class BitWriter {
+ public:
+  BitWriter(size_t* storage_ix, uint8_t* storage)
+      : storage_ix_(storage_ix), storage_(storage) {}
+
+  void VisitBits(size_t nbits, uint64_t bits) {
+    WriteBits(nbits, bits, storage_ix_, storage_);
+  }
+
+ protected:
+  size_t* storage_ix_;
+  uint8_t* storage_;
+};
+
+struct BitCounter {
+  void VisitBits(size_t nbits, uint64_t bits) { num_bits += nbits; }
+  size_t num_bits = 0;
+};
+
+}  // namespace pik
+
+#endif  // PIK_WRITE_BITS_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/yuv_convert.cc b/codec/L2/demos/pikEnc/host/pik/yuv_convert.cc
new file mode 100755
index 0000000000..886fc9667a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/yuv_convert.cc
@@ -0,0 +1,283 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/yuv_convert.h"
+
+#include <stdint.h>
+#include <algorithm>
+#include <array>
+#include <type_traits>
+
+#include "pik/compiler_specific.h"
+#include "pik/gamma_correct.h"
+
+namespace pik {
+
+// Conversion matrices and functions between 8 or 16 bit sRGB and
+// 8, 10 or 12 bit YUV Rec 709 color spaces.
+
+constexpr double kWeightR = 0.2126;
+constexpr double kWeightB = 0.0722;
+constexpr double kWeightG = 1.0 - kWeightR - kWeightB;
+constexpr double kWeightBc = 1.0 - kWeightB;
+constexpr double kWeightRc = 1.0 - kWeightR;
+constexpr double kScaleY = 219.0 / 255.0;
+constexpr double kScaleUV = 112.0 / 255.0;
+
+// clang-format off
+constexpr double RGBtoYUVMatrix[9] = {
+    kWeightR * kScaleY,
+    kWeightG * kScaleY,
+    kWeightB * kScaleY,
+    (-kWeightR / kWeightBc) * kScaleUV,
+    (-kWeightG / kWeightBc) * kScaleUV,
+    kScaleUV,
+    kScaleUV,
+    (-kWeightG / kWeightRc) * kScaleUV,
+    (-kWeightB / kWeightRc) * kScaleUV,
+};
+
+constexpr double RGBtoYUVMatrixAdd[3] = {.0625, .5, .5};
+
+constexpr double YUVtoRGBMatrix[9] = {
+    1.0 / kScaleY,
+    0.0,
+    kWeightRc / kScaleUV,
+    1.0 / kScaleY,
+    -kWeightBc * kWeightB / kWeightG / kScaleUV,
+    -kWeightRc * kWeightR / kWeightG / kScaleUV,
+    1.0 / kScaleY,
+    kWeightBc / kScaleUV,
+    0.0};
+// clang-format on
+
+#define clamp(V, M) (uint16_t)((V) < 0 ? 0 : ((V) > (M) ? (M) : V))
+
+// Input range:  [0 .. (1<<bits)-1]
+// Output range: [0.0 .. 1.0]
+void YUVPixelToRGB(uint16_t yv, uint16_t uv, uint16_t vv, int bits, double* r,
+                   double* g, double* b) {
+  const double norm = 1. / ((1 << bits) - 1);
+  const double y = yv * norm - RGBtoYUVMatrixAdd[0];
+  const double u = uv * norm - RGBtoYUVMatrixAdd[1];
+  const double v = vv * norm - RGBtoYUVMatrixAdd[2];
+  *r = YUVtoRGBMatrix[0] * y + YUVtoRGBMatrix[1] * u + YUVtoRGBMatrix[2] * v;
+  *g = YUVtoRGBMatrix[3] * y + YUVtoRGBMatrix[4] * u + YUVtoRGBMatrix[5] * v;
+  *b = YUVtoRGBMatrix[6] * y + YUVtoRGBMatrix[7] * u + YUVtoRGBMatrix[8] * v;
+}
+
+// Input range:  [0 .. (1<<bits)-1]
+template <typename T>
+void YUVPixelToRGB(uint16_t yv, uint16_t uv, uint16_t vv, int bits, T* r, T* g,
+                   T* b) {
+  const int maxv_out = (1 << (8 * sizeof(T))) - 1;
+  double rd, gd, bd;
+  YUVPixelToRGB(yv, uv, vv, bits, &rd, &gd, &bd);
+  *r = clamp(.5 + maxv_out * rd, maxv_out);
+  *g = clamp(.5 + maxv_out * gd, maxv_out);
+  *b = clamp(.5 + maxv_out * bd, maxv_out);
+}
+
+// Input range:  [0.0 .. 1.0]
+// Output range: [0 .. (1<<bits)-1]
+void RGBPixelToYUV(double r, double g, double b, int bits, uint16_t* y,
+                   uint16_t* u, uint16_t* v) {
+  const double maxv = (1 << bits) - 1;
+  const double Y = RGBtoYUVMatrixAdd[0] + RGBtoYUVMatrix[0] * r +
+                   RGBtoYUVMatrix[1] * g + RGBtoYUVMatrix[2] * b;
+  const double U = RGBtoYUVMatrixAdd[1] + RGBtoYUVMatrix[3] * r +
+                   RGBtoYUVMatrix[4] * g + RGBtoYUVMatrix[5] * b;
+  const double V = RGBtoYUVMatrixAdd[2] + RGBtoYUVMatrix[6] * r +
+                   RGBtoYUVMatrix[7] * g + RGBtoYUVMatrix[8] * b;
+  *y = clamp(.5 + maxv * Y, maxv);
+  *u = clamp(.5 + maxv * U, maxv);
+  *v = clamp(.5 + maxv * V, maxv);
+}
+
+// Output range: [0 .. (1<<bits)-1]
+template <typename T>
+void RGBPixelToYUV(T r, T g, T b, int bits, uint16_t* y, uint16_t* u,
+                   uint16_t* v) {
+  const double norm = 1. / ((1 << (8 * sizeof(T))) - 1);
+  const double rd = r * norm;
+  const double gd = g * norm;
+  const double bd = b * norm;
+  RGBPixelToYUV(rd, gd, bd, bits, y, u, v);
+}
+
+//
+// Wrapper functions to convert between 8-bit, 16-bit or linear sRGB images
+// and 8, 10 or 12 bit YUV Rec 709 images.
+//
+
+template <typename T>
+void YUVRec709ImageToRGB(const Image3U& yuv, int bit_depth, Image3<T>* rgb) {
+  for (size_t y = 0; y < yuv.ysize(); ++y) {
+    const uint16_t* PIK_RESTRICT row_yuv0 = yuv.PlaneRow(0, y);
+    const uint16_t* PIK_RESTRICT row_yuv1 = yuv.PlaneRow(1, y);
+    const uint16_t* PIK_RESTRICT row_yuv2 = yuv.PlaneRow(2, y);
+
+    T* PIK_RESTRICT row_rgb0 = rgb->PlaneRow(0, y);
+    T* PIK_RESTRICT row_rgb1 = rgb->PlaneRow(1, y);
+    T* PIK_RESTRICT row_rgb2 = rgb->PlaneRow(2, y);
+    for (size_t x = 0; x < yuv.xsize(); ++x) {
+      YUVPixelToRGB(row_yuv0[x], row_yuv1[x], row_yuv2[x], bit_depth,
+                    &row_rgb0[x], &row_rgb1[x], &row_rgb2[x]);
+    }
+  }
+}
+
+Image3B RGB8ImageFromYUVRec709(const Image3U& yuv, int bit_depth) {
+  Image3B rgb(yuv.xsize(), yuv.ysize());
+  YUVRec709ImageToRGB(yuv, bit_depth, &rgb);
+  return rgb;
+}
+
+Image3U RGB16ImageFromYUVRec709(const Image3U& yuv, int bit_depth) {
+  Image3U rgb(yuv.xsize(), yuv.ysize());
+  YUVRec709ImageToRGB(yuv, bit_depth, &rgb);
+  return rgb;
+}
+
+Image3F RGBLinearImageFromYUVRec709(const Image3U& yuv, int bit_depth) {
+  Image3F rgb(yuv.xsize(), yuv.ysize());
+  for (int y = 0; y < yuv.ysize(); ++y) {
+    const uint16_t* PIK_RESTRICT row_yuv0 = yuv.ConstPlaneRow(0, y);
+    const uint16_t* PIK_RESTRICT row_yuv1 = yuv.ConstPlaneRow(1, y);
+    const uint16_t* PIK_RESTRICT row_yuv2 = yuv.ConstPlaneRow(2, y);
+    float* PIK_RESTRICT row_linear0 = rgb.PlaneRow(0, y);
+    float* PIK_RESTRICT row_linear1 = rgb.PlaneRow(1, y);
+    float* PIK_RESTRICT row_linear2 = rgb.PlaneRow(2, y);
+    for (int x = 0; x < yuv.xsize(); ++x) {
+      double rd, gd, bd;
+      YUVPixelToRGB(row_yuv0[x], row_yuv1[x], row_yuv2[x], bit_depth, &rd, &gd,
+                    &bd);
+      row_linear0[x] = Srgb8ToLinearDirect(rd * 255.0);
+      row_linear1[x] = Srgb8ToLinearDirect(gd * 255.0);
+      row_linear2[x] = Srgb8ToLinearDirect(bd * 255.0);
+    }
+  }
+  return rgb;
+}
+
+template <typename T>
+void RGBImageToYUVRec709(const Image3<T>& rgb, int bit_depth, Image3U* yuv) {
+  for (int y = 0; y < rgb.ysize(); ++y) {
+    const T* PIK_RESTRICT row_rgb0 = rgb.ConstPlaneRow(0, y);
+    const T* PIK_RESTRICT row_rgb1 = rgb.ConstPlaneRow(1, y);
+    const T* PIK_RESTRICT row_rgb2 = rgb.ConstPlaneRow(2, y);
+    uint16_t* PIK_RESTRICT row_yuv0 = yuv->PlaneRow(0, y);
+    uint16_t* PIK_RESTRICT row_yuv1 = yuv->PlaneRow(1, y);
+    uint16_t* PIK_RESTRICT row_yuv2 = yuv->PlaneRow(2, y);
+    for (int x = 0; x < rgb.xsize(); ++x) {
+      RGBPixelToYUV(row_rgb0[x], row_rgb1[x], row_rgb2[x], bit_depth,
+                    &row_yuv0[x], &row_yuv1[x], &row_yuv2[x]);
+    }
+  }
+}
+
+Image3U YUVRec709ImageFromRGB8(const Image3B& rgb, int out_bit_depth) {
+  Image3U yuv(rgb.xsize(), rgb.ysize());
+  RGBImageToYUVRec709(rgb, out_bit_depth, &yuv);
+  return yuv;
+}
+
+Image3U YUVRec709ImageFromRGB16(const Image3U& rgb, int out_bit_depth) {
+  Image3U yuv(rgb.xsize(), rgb.ysize());
+  RGBImageToYUVRec709(rgb, out_bit_depth, &yuv);
+  return yuv;
+}
+
+Image3U YUVRec709ImageFromRGBLinear(const Image3F& rgb, int out_bit_depth) {
+  Image3U yuv(rgb.xsize(), rgb.ysize());
+  const double norm = 1. / 255.;
+  for (int y = 0; y < yuv.ysize(); ++y) {
+    const float* PIK_RESTRICT row_linear0 = rgb.ConstPlaneRow(0, y);
+    const float* PIK_RESTRICT row_linear1 = rgb.ConstPlaneRow(1, y);
+    const float* PIK_RESTRICT row_linear2 = rgb.ConstPlaneRow(2, y);
+    uint16_t* PIK_RESTRICT row_yuv0 = yuv.PlaneRow(0, y);
+    uint16_t* PIK_RESTRICT row_yuv1 = yuv.PlaneRow(1, y);
+    uint16_t* PIK_RESTRICT row_yuv2 = yuv.PlaneRow(2, y);
+    for (int x = 0; x < yuv.xsize(); ++x) {
+      double rd = LinearToSrgb8Direct(row_linear0[x]) * norm;
+      double gd = LinearToSrgb8Direct(row_linear1[x]) * norm;
+      double bd = LinearToSrgb8Direct(row_linear2[x]) * norm;
+      RGBPixelToYUV(rd, gd, bd, out_bit_depth, &row_yuv0[x], &row_yuv1[x],
+                    &row_yuv2[x]);
+    }
+  }
+  return yuv;
+}
+
+void SubSampleChroma(const Image3U& yuv, int bit_depth, ImageU* yplane,
+                     ImageU* uplane, ImageU* vplane) {
+  const int xsize = yuv.xsize();
+  const int ysize = yuv.ysize();
+  const int c_xsize = (xsize + 1) / 2;
+  const int c_ysize = (ysize + 1) / 2;
+  *yplane = CopyImage(yuv.Plane(0));
+  *uplane = ImageU(c_xsize, c_ysize);
+  *vplane = ImageU(c_xsize, c_ysize);
+  for (int y = 0; y < c_ysize; ++y) {
+    for (int x = 0; x < c_xsize; ++x) {
+      int sum_u = 0;
+      int sum_v = 0;
+      for (int iy = 0; iy < 2; ++iy) {
+        for (int ix = 0; ix < 2; ++ix) {
+          int yy = std::min(2 * y + iy, ysize - 1);
+          int xx = std::min(2 * x + ix, xsize - 1);
+          sum_u += yuv.PlaneRow(1, yy)[xx];
+          sum_v += yuv.PlaneRow(2, yy)[xx];
+        }
+      }
+      uplane->Row(y)[x] = (sum_u + 2) / 4;
+      vplane->Row(y)[x] = (sum_v + 2) / 4;
+    }
+  }
+}
+
+ImageU SuperSamplePlane(const ImageU& in, int bit_depth, int out_xsize,
+                        int out_ysize) {
+  const int c_xsize = in.xsize();
+  const int c_ysize = in.ysize();
+  ImageU out(2 * c_xsize, 2 * c_ysize);
+  for (int y = 0; y < c_ysize; ++y) {
+    const int y0 = y > 0 ? y - 1 : y;
+    const int y1 = y;
+    const int y2 = y + 1 < c_ysize ? y + 1 : y;
+    const uint16_t* const PIK_RESTRICT row0 = in.Row(y0);
+    const uint16_t* const PIK_RESTRICT row1 = in.Row(y1);
+    const uint16_t* const PIK_RESTRICT row2 = in.Row(y2);
+    uint16_t* const PIK_RESTRICT row_out0 = out.Row(2 * y);
+    uint16_t* const PIK_RESTRICT row_out1 = out.Row(2 * y + 1);
+    for (int x = 0; x < c_xsize; ++x) {
+      const int x0 = x > 0 ? x - 1 : x;
+      const int x1 = x;
+      const int x2 = x + 1 < c_xsize ? x + 1 : x;
+      row_out0[2 * x + 0] =
+          (9 * row1[x1] + 3 * row1[x0] + 3 * row0[x1] + 1 * row0[x0] + 8) / 16;
+      row_out0[2 * x + 1] =
+          (9 * row1[x1] + 3 * row1[x2] + 3 * row0[x1] + 1 * row0[x2] + 8) / 16;
+      row_out1[2 * x + 0] =
+          (9 * row1[x1] + 3 * row1[x0] + 3 * row2[x1] + 1 * row2[x0] + 8) / 16;
+      row_out1[2 * x + 1] =
+          (9 * row1[x1] + 3 * row1[x2] + 3 * row2[x1] + 1 * row2[x2] + 8) / 16;
+    }
+  }
+  out.ShrinkTo(out_xsize, out_ysize);
+  return out;
+}
+
+Image3U SuperSampleChroma(const ImageU& yplane, const ImageU& uplane,
+                          const ImageU& vplane, int bit_depth) {
+  const int xsize = yplane.xsize();
+  const int ysize = yplane.ysize();
+  return Image3U(CopyImage(yplane),
+                 SuperSamplePlane(uplane, bit_depth, xsize, ysize),
+                 SuperSamplePlane(vplane, bit_depth, xsize, ysize));
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/yuv_convert.h b/codec/L2/demos/pikEnc/host/pik/yuv_convert.h
new file mode 100755
index 0000000000..c64dfc0544
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/yuv_convert.h
@@ -0,0 +1,30 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_YUV_CONVERT_H_
+#define PIK_YUV_CONVERT_H_
+
+#include "pik/image.h"
+
+namespace pik {
+
+Image3B RGB8ImageFromYUVRec709(const Image3U& yuv, int bit_depth);
+Image3U RGB16ImageFromYUVRec709(const Image3U& yuv, int bit_depth);
+Image3F RGBLinearImageFromYUVRec709(const Image3U& yuv, int bit_depth);
+
+Image3U YUVRec709ImageFromRGB8(const Image3B& rgb, int out_bit_depth);
+Image3U YUVRec709ImageFromRGB16(const Image3U& rgb, int out_bit_depth);
+Image3U YUVRec709ImageFromRGBLinear(const Image3F& rgb, int out_bit_depth);
+
+void SubSampleChroma(const Image3U& yuv, int bit_depth, ImageU* yplane,
+                     ImageU* uplane, ImageU* vplane);
+
+Image3U SuperSampleChroma(const ImageU& yplane, const ImageU& uplane,
+                          const ImageU& vplane, int bit_depth);
+
+}  // namespace pik
+
+#endif  // PIK_YUV_CONVERT_H_
diff --git a/codec/L2/demos/pikEnc/host/pik/yuv_opsin_convert.cc b/codec/L2/demos/pikEnc/host/pik/yuv_opsin_convert.cc
new file mode 100755
index 0000000000..d21872b61a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/yuv_opsin_convert.cc
@@ -0,0 +1,294 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#include "pik/yuv_opsin_convert.h"
+
+#include <stdint.h>
+#include <algorithm>
+#include <array>
+#include <type_traits>
+
+#include "pik/compiler_specific.h"
+#include "pik/gamma_correct.h"
+
+namespace pik {
+
+static constexpr double kScaleR = 1.001746913108605;
+static constexpr double kScaleG = 2.0 - kScaleR;
+static constexpr double kInvScaleR = 1.0 / kScaleR;
+static constexpr double kInvScaleG = 1.0 / kScaleG;
+
+static constexpr double kOpsinAbsorbanceMatrix[9] = {
+    0.355028246972028, 0.589422218034148, 0.055549534993826,
+    0.250871605395556, 0.714937756329137, 0.034190638275308,
+    0.091915449087840, 0.165250230906774, 0.742834320005384,
+};
+
+static constexpr double kOpsinAbsorbanceInverseMatrix[9] = {
+    6.805644286129,  -5.552270790544, -0.253373707795,
+    -2.373074275591, 3.349796660147,  0.023277709773,
+    -0.314192274838, -0.058176067042, 1.372368367449,
+};
+static constexpr double kXCenter = 0.008714601398;
+static constexpr float kXRadius = 0.035065606236;
+
+constexpr double kScaleY = 219.0 / 255.0;
+constexpr double kScaleUV = 112.0 / 255.0;
+
+constexpr double kOffsetY = 0.0625;
+constexpr double kOffsetUV = 0.5;
+
+constexpr double kScaleX = 0.3;
+constexpr double kScaleB = 0.5;
+
+#define clamp(V, M) (uint16_t)((V) < 0 ? 0 : ((V) > (M) ? (M) : V))
+
+double SrgbToLinear(double val) {
+  if (val < 0.0) return 0.0;
+  if (val <= 0.04045) return val / 12.92;
+  if (val >= 1.0) return 1.0;
+  return std::pow((val + 0.055) / 1.055, 2.4);
+}
+
+double LinearToSrgb(double val) {
+  if (val < 0.0) return 0.0;
+  if (val >= 1.0) return 1.0;
+  if (val <= 0.04045 / 12.92) return val * 12.92;
+  return std::pow(val, 1.0 / 2.4) * 1.055 - 0.055;
+}
+
+double SimpleGamma(double x) {
+  return x < 0.04 / 29.16 ? x * 29.16 : std::pow(x, 1.0 / 3.0) * 1.08 - 0.08;
+}
+
+double SimpleGammaInverse(double x) {
+  return x < 0.04 ? x / 29.16 : std::pow((x + 0.08) / 1.08, 3);
+}
+
+// Input range:  [0 .. (1<<bits)-1]
+// Output range: [0.0 .. 1.0]
+void YUVOpsinPixelToRGB(uint16_t yv, uint16_t uv, uint16_t vv, int bits,
+                        double* r, double* g, double* b) {
+  const double norm = 1. / ((1 << bits) - 1);
+  const double Y = yv * norm;
+  const double U = uv * norm;
+  const double V = vv * norm;
+
+  const double valy = (Y - kOffsetY) / kScaleY;
+  const double valb = (U - kOffsetUV) / kScaleUV / kScaleB;
+  const double valx = (V - kOffsetUV) * kXRadius / kScaleUV / kScaleX;
+
+  const double bmg = valb + valy;
+  const double rmg = kInvScaleR * (valx + kXCenter + valy);
+  const double gmg = kInvScaleG * (valy - valx - kXCenter);
+
+  const double rm = SimpleGammaInverse(rmg);
+  const double gm = SimpleGammaInverse(gmg);
+  const double bm = SimpleGammaInverse(bmg);
+
+  const double rl = (kOpsinAbsorbanceInverseMatrix[0] * rm +
+                     kOpsinAbsorbanceInverseMatrix[1] * gm +
+                     kOpsinAbsorbanceInverseMatrix[2] * bm);
+  const double gl = (kOpsinAbsorbanceInverseMatrix[3] * rm +
+                     kOpsinAbsorbanceInverseMatrix[4] * gm +
+                     kOpsinAbsorbanceInverseMatrix[5] * bm);
+  const double bl = (kOpsinAbsorbanceInverseMatrix[6] * rm +
+                     kOpsinAbsorbanceInverseMatrix[7] * gm +
+                     kOpsinAbsorbanceInverseMatrix[8] * bm);
+
+  *r = LinearToSrgb(rl);
+  *g = LinearToSrgb(gl);
+  *b = LinearToSrgb(bl);
+#if YUV_OPSIN_DEBUG
+  printf("y: %d  u: %d  v: %d\n", yv, uv, vv);
+  printf("Y: %.10f  U: %.10f  V: %.10f\n", Y, U, V);
+  printf("valx: %.10f  valy: %.10f  valb: %.10f\n", valx, valy, valb);
+  printf("rmg: %.10f  gmg: %.10f  bmg: %.10f\n", rmg, gmg, bmg);
+  printf("rm: %.10f  gm: %.10f  bm: %.10f\n", rm, gm, bm);
+  printf("rl: %.10f  gl: %.10f  bl: %.10f\n", rl, gl, bl);
+  printf("r: %.10f  g: %.10f  b: %.10f\n", *r, *g, *b);
+#endif
+}
+
+// Input range:  [0 .. (1<<bits)-1]
+template <typename T>
+void YUVOpsinPixelToRGB(uint16_t yv, uint16_t uv, uint16_t vv, int bits, T* r,
+                        T* g, T* b) {
+  const int maxv_out = (1 << (8 * sizeof(T))) - 1;
+  double rd, gd, bd;
+  YUVOpsinPixelToRGB(yv, uv, vv, bits, &rd, &gd, &bd);
+  *r = clamp(.5 + maxv_out * rd, maxv_out);
+  *g = clamp(.5 + maxv_out * gd, maxv_out);
+  *b = clamp(.5 + maxv_out * bd, maxv_out);
+#if YUV_OPSIN_DEBUG
+  printf("r: %d  g: %d  b: %d\n", *r, *g, *b);
+#endif
+}
+
+// Input range:  [0.0 .. 1.0]
+// Output range: [0 .. (1<<bits)-1]
+void RGBPixelToYUVOpsin(double r, double g, double b, int bits, uint16_t* y,
+                        uint16_t* u, uint16_t* v) {
+  const double rl = SrgbToLinear(r);
+  const double gl = SrgbToLinear(g);
+  const double bl = SrgbToLinear(b);
+
+  const double rm =
+      (kOpsinAbsorbanceMatrix[0] * rl + kOpsinAbsorbanceMatrix[1] * gl +
+       kOpsinAbsorbanceMatrix[2] * bl);
+  const double gm =
+      (kOpsinAbsorbanceMatrix[3] * rl + kOpsinAbsorbanceMatrix[4] * gl +
+       kOpsinAbsorbanceMatrix[5] * bl);
+  const double bm =
+      (kOpsinAbsorbanceMatrix[6] * rl + kOpsinAbsorbanceMatrix[7] * gl +
+       kOpsinAbsorbanceMatrix[8] * bl);
+
+  const double rmg = SimpleGamma(rm);
+  const double gmg = SimpleGamma(gm);
+  const double bmg = SimpleGamma(bm);
+
+  const double valx = (kScaleR * rmg - kScaleG * gmg) * 0.5 - kXCenter;
+  const double valy = (kScaleR * rmg + kScaleG * gmg) * 0.5;
+  const double valb = (bmg - valy);
+
+  const double Y = kOffsetY + kScaleY * valy;
+  const double U = kOffsetUV + kScaleUV * kScaleB * valb;
+  const double V = kOffsetUV + kScaleUV * kScaleX * valx / kXRadius;
+
+  const double maxv = (1 << bits) - 1;
+  *y = clamp(.5 + maxv * Y, maxv);
+  *u = clamp(.5 + maxv * U, maxv);
+  *v = clamp(.5 + maxv * V, maxv);
+#if YUV_OPSIN_DEBUG
+  printf("rl: %.10f  gl: %.10f  bl: %.10f\n", rl, gl, bl);
+  printf("rm: %.10f  gm: %.10f  bm: %.10f\n", rm, gm, bm);
+  printf("rmg: %.10f  gmg: %.10f  bmg: %.10f\n", rmg, gmg, bmg);
+  printf("valx: %.10f  valy: %.10f  valb: %.10f\n", valx, valy, valb);
+  printf("Y: %.10f  U: %.10f  V: %.10f\n", Y, U, V);
+  printf("y: %d  u: %d  v: %d\n", *y, *u, *v);
+#endif
+}
+
+// Output range: [0 .. (1<<bits)-1]
+template <typename T>
+void RGBPixelToYUVOpsin(T r, T g, T b, int bits, uint16_t* y, uint16_t* u,
+                        uint16_t* v) {
+  const double norm = 1. / ((1 << (8 * sizeof(T))) - 1);
+  const double rd = r * norm;
+  const double gd = g * norm;
+  const double bd = b * norm;
+  RGBPixelToYUVOpsin(rd, gd, bd, bits, y, u, v);
+#if YUV_OPSIN_DEBUG
+  printf("r: %d  g: %d  b: %d\n", r, g, b);
+  printf("r: %.10f  g: %.10f  b: %.10f\n", rd, gd, bd);
+#endif
+}
+
+//
+// Wrapper functions to convert between 8-bit, 16-bit or linear sRGB images
+// and 8, 10 or 12 bit YUV Opsin images.
+//
+
+template <typename T>
+void YUVOpsinImageToRGB(const Image3U& yuv, int bit_depth, Image3<T>* rgb) {
+  for (size_t y = 0; y < yuv.ysize(); ++y) {
+    const uint16_t* PIK_RESTRICT row_yuv0 = yuv.PlaneRow(0, y);
+    const uint16_t* PIK_RESTRICT row_yuv1 = yuv.PlaneRow(1, y);
+    const uint16_t* PIK_RESTRICT row_yuv2 = yuv.PlaneRow(2, y);
+    T* PIK_RESTRICT row_rgb0 = rgb->PlaneRow(0, y);
+    T* PIK_RESTRICT row_rgb1 = rgb->PlaneRow(1, y);
+    T* PIK_RESTRICT row_rgb2 = rgb->PlaneRow(2, y);
+    for (size_t x = 0; x < yuv.xsize(); ++x) {
+      YUVOpsinPixelToRGB(row_yuv0[x], row_yuv1[x], row_yuv2[x], bit_depth,
+                         &row_rgb0[x], &row_rgb1[x], &row_rgb2[x]);
+    }
+  }
+}
+
+Image3B RGB8ImageFromYUVOpsin(const Image3U& yuv, int bit_depth) {
+  Image3B rgb(yuv.xsize(), yuv.ysize());
+  YUVOpsinImageToRGB(yuv, bit_depth, &rgb);
+  return rgb;
+}
+
+Image3U RGB16ImageFromYUVOpsin(const Image3U& yuv, int bit_depth) {
+  Image3U rgb(yuv.xsize(), yuv.ysize());
+  YUVOpsinImageToRGB(yuv, bit_depth, &rgb);
+  return rgb;
+}
+
+Image3F RGBLinearImageFromYUVOpsin(const Image3U& yuv, int bit_depth) {
+  Image3F rgb(yuv.xsize(), yuv.ysize());
+  for (size_t y = 0; y < yuv.ysize(); ++y) {
+    const uint16_t* PIK_RESTRICT row_yuv0 = yuv.PlaneRow(0, y);
+    const uint16_t* PIK_RESTRICT row_yuv1 = yuv.PlaneRow(1, y);
+    const uint16_t* PIK_RESTRICT row_yuv2 = yuv.PlaneRow(2, y);
+    float* PIK_RESTRICT row_linear0 = rgb.PlaneRow(0, y);
+    float* PIK_RESTRICT row_linear1 = rgb.PlaneRow(1, y);
+    float* PIK_RESTRICT row_linear2 = rgb.PlaneRow(2, y);
+    for (size_t x = 0; x < yuv.xsize(); ++x) {
+      double rd, gd, bd;
+      YUVOpsinPixelToRGB(row_yuv0[x], row_yuv1[x], row_yuv2[x], bit_depth, &rd,
+                         &gd, &bd);
+      row_linear0[x] = Srgb8ToLinearDirect(rd * 255.0);
+      row_linear1[x] = Srgb8ToLinearDirect(gd * 255.0);
+      row_linear2[x] = Srgb8ToLinearDirect(bd * 255.0);
+    }
+  }
+  return rgb;
+}
+
+template <typename T>
+void RGBImageToYUVOpsin(const Image3<T>& rgb, int bit_depth, Image3U* yuv) {
+  for (size_t y = 0; y < rgb.ysize(); ++y) {
+    const T* PIK_RESTRICT row_rgb0 = rgb.ConstPlaneRow(0, y);
+    const T* PIK_RESTRICT row_rgb1 = rgb.ConstPlaneRow(1, y);
+    const T* PIK_RESTRICT row_rgb2 = rgb.ConstPlaneRow(2, y);
+    uint16_t* PIK_RESTRICT row_yuv0 = yuv->PlaneRow(0, y);
+    uint16_t* PIK_RESTRICT row_yuv1 = yuv->PlaneRow(1, y);
+    uint16_t* PIK_RESTRICT row_yuv2 = yuv->PlaneRow(2, y);
+    for (size_t x = 0; x < rgb.xsize(); ++x) {
+      RGBPixelToYUVOpsin(row_rgb0[x], row_rgb1[x], row_rgb2[x], bit_depth,
+                         &row_yuv0[x], &row_yuv1[x], &row_yuv2[x]);
+    }
+  }
+}
+
+Image3U YUVOpsinImageFromRGB8(const Image3B& rgb, int out_bit_depth) {
+  Image3U yuv(rgb.xsize(), rgb.ysize());
+  RGBImageToYUVOpsin(rgb, out_bit_depth, &yuv);
+  return yuv;
+}
+
+Image3U YUVOpsinImageFromRGB16(const Image3U& rgb, int out_bit_depth) {
+  Image3U yuv(rgb.xsize(), rgb.ysize());
+  RGBImageToYUVOpsin(rgb, out_bit_depth, &yuv);
+  return yuv;
+}
+
+Image3U YUVOpsinImageFromRGBLinear(const Image3F& rgb, int out_bit_depth) {
+  Image3U yuv(rgb.xsize(), rgb.ysize());
+  const double norm = 1. / 255.;
+  for (size_t y = 0; y < yuv.ysize(); ++y) {
+    const float* PIK_RESTRICT row_linear0 = rgb.ConstPlaneRow(0, y);
+    const float* PIK_RESTRICT row_linear1 = rgb.ConstPlaneRow(1, y);
+    const float* PIK_RESTRICT row_linear2 = rgb.ConstPlaneRow(2, y);
+    uint16_t* PIK_RESTRICT row_yuv0 = yuv.PlaneRow(0, y);
+    uint16_t* PIK_RESTRICT row_yuv1 = yuv.PlaneRow(1, y);
+    uint16_t* PIK_RESTRICT row_yuv2 = yuv.PlaneRow(2, y);
+
+    for (size_t x = 0; x < yuv.xsize(); ++x) {
+      double rd = LinearToSrgb8Direct(row_linear0[x]) * norm;
+      double gd = LinearToSrgb8Direct(row_linear1[x]) * norm;
+      double bd = LinearToSrgb8Direct(row_linear2[x]) * norm;
+      RGBPixelToYUVOpsin(rd, gd, bd, out_bit_depth, &row_yuv0[x], &row_yuv1[x],
+                         &row_yuv2[x]);
+    }
+  }
+  return yuv;
+}
+
+}  // namespace pik
diff --git a/codec/L2/demos/pikEnc/host/pik/yuv_opsin_convert.h b/codec/L2/demos/pikEnc/host/pik/yuv_opsin_convert.h
new file mode 100755
index 0000000000..0a22f58094
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik/yuv_opsin_convert.h
@@ -0,0 +1,24 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+#ifndef PIK_YUV_OPSIN_CONVERT_H_
+#define PIK_YUV_OPSIN_CONVERT_H_
+
+#include "pik/image.h"
+
+namespace pik {
+
+Image3B RGB8ImageFromYUVOpsin(const Image3U& yuv, int bit_depth);
+Image3U RGB16ImageFromYUVOpsin(const Image3U& yuv, int bit_depth);
+Image3F RGBLinearImageFromYUVOpsin(const Image3U& yuv, int bit_depth);
+
+Image3U YUVOpsinImageFromRGB8(const Image3B& rgb, int out_bit_depth);
+Image3U YUVOpsinImageFromRGB16(const Image3U& rgb, int out_bit_depth);
+Image3U YUVOpsinImageFromRGBLinear(const Image3F& rgb, int out_bit_depth);
+
+}  // namespace pik
+
+#endif  // PIK_YUV_OPSIN_CONVERT_H_
diff --git a/codec/L2/demos/pikEnc/host/pik_codec_common.cc b/codec/L2/demos/pikEnc/host/pik_codec_common.cc
new file mode 100644
index 0000000000..08fc7bbf7d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/pik_codec_common.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/kernel3_common.hpp"
+
+void hls_WriteBits_strm(const nbits_t n_bits,
+                        uint16_t bits,
+
+                        int& num_bits,
+                        int& num,
+                        hls::stream<nbits_t>& strm_nbits,
+                        hls::stream<uint16_t>& strm_bits) {
+    assert((bits >> n_bits) == 0);
+    assert(n_bits <= 56);
+
+    if (n_bits != 0) {
+        _XF_IMAGE_PRINT("---W--- n_bits=%d, bits=%d\n", (int)n_bits, bits);
+        nbits_t nbits = n_bits;
+        num_bits += nbits;
+        num++;
+
+        strm_nbits.write(nbits);
+        strm_bits.write(bits);
+    }
+}
+
+void hls_WriteBits_strm_nodepend(const nbits_t n_bits,
+                                 uint16_t bits,
+                                 hls::stream<nbits_t>& strm_nbits,
+                                 hls::stream<uint16_t>& strm_bits) {
+    assert((bits >> n_bits) == 0);
+    assert(n_bits <= 56);
+
+    if (n_bits != 0) {
+        _XF_IMAGE_PRINT("---W--- n_bits=%d, bits=%d\n", (int)n_bits, bits);
+        nbits_t nbits = n_bits;
+        strm_nbits.write(nbits);
+        strm_bits.write(bits);
+    }
+}
+
+void hls_StoreVarLenUint16(
+    uint32_t n, int& num_bits, int& num, hls::stream<nbits_t>& strm_nbits, hls::stream<uint16_t>& strm_bits) {
+    if (n == 0) {
+        hls_WriteBits_strm(1, 0, num_bits, num, strm_nbits, strm_bits);
+    } else {
+        hls_WriteBits_strm(1, 1, num_bits, num, strm_nbits, strm_bits);
+        int nbits = hls_Log2FloorNonZero_32b(n);
+        hls_WriteBits_strm(4, nbits, num_bits, num, strm_nbits, strm_bits);
+        hls_WriteBits_strm(nbits, n - (1ULL << nbits), num_bits, num, strm_nbits, strm_bits);
+    }
+}
+
+// ------------------------------------------------------------
+static const float hls_kLog2Table[] = {
+    0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f, 1.5849625007211563f, 2.0000000000000000f,
+    2.3219280948873622f, 2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f, 3.1699250014423126f,
+    3.3219280948873626f, 3.4594316186372978f, 3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+    3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f, 4.1699250014423122f, 4.2479275134435852f,
+    4.3219280948873626f, 4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f, 4.5849625007211570f,
+    4.6438561897747244f, 4.7004397181410926f, 4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+    4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f, 5.0443941193584534f, 5.0874628412503400f,
+    5.1292830169449664f, 5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f, 5.2854022188622487f,
+    5.3219280948873626f, 5.3575520046180838f, 5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+    5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f, 5.5849625007211570f, 5.6147098441152083f,
+    5.6438561897747244f, 5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f, 5.7548875021634691f,
+    5.7813597135246599f, 5.8073549220576046f, 5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+    5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f, 5.9772799234999168f, 6.0000000000000000f,
+    6.0223678130284544f, 6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f, 6.1085244567781700f,
+    6.1292830169449672f, 6.1497471195046822f, 6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+    6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f, 6.2854022188622487f, 6.3037807481771031f,
+    6.3219280948873617f, 6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f, 6.3923174227787598f,
+    6.4093909361377026f, 6.4262647547020979f, 6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+    6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f, 6.5391588111080319f, 6.5545888516776376f,
+    6.5698556083309478f, 6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f, 6.6293566200796095f,
+    6.6438561897747253f, 6.6582114827517955f, 6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+    6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f, 6.7548875021634691f, 6.7681843247769260f,
+    6.7813597135246599f, 6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f, 6.8328900141647422f,
+    6.8454900509443757f, 6.8579809951275719f, 6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+    6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f, 6.9425145053392399f, 6.9541963103868758f,
+    6.9657842846620879f, 6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f, 7.0112272554232540f,
+    7.0223678130284544f, 7.0334230015374501f, 7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+    7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f, 7.1085244567781700f, 7.1189410727235076f,
+    7.1292830169449664f, 7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f, 7.1699250014423130f,
+    7.1799090900149345f, 7.1898245588800176f, 7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+    7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f, 7.2573878426926521f, 7.2667865406949019f,
+    7.2761244052742384f, 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f, 7.3128829552843557f,
+    7.3219280948873617f, 7.3309168781146177f, 7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+    7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f, 7.3923174227787607f, 7.4008794362821844f,
+    7.4093909361377026f, 7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f, 7.4429434958487288f,
+    7.4512111118323299f, 7.4594316186372973f, 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+    7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f, 7.5156998382840436f, 7.5235619560570131f,
+    7.5313814605163119f, 7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f, 7.5622424242210728f,
+    7.5698556083309478f, 7.5774288280357487f, 7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+    7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f, 7.6293566200796095f, 7.6366246205436488f,
+    7.6438561897747244f, 7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f, 7.6724253419714952f,
+    7.6794800995054464f, 7.6865005271832185f, 7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+    7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f, 7.7347096202258392f, 7.7414669864011465f,
+    7.7481928495894596f, 7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f, 7.7747870596011737f,
+    7.7813597135246608f, 7.7879025593914317f, 7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+    7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f, 7.8328900141647422f, 7.8392037880969445f,
+    7.8454900509443757f, 7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f, 7.8703647195834048f,
+    7.8765169465650002f, 7.8826430493618425f, 7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+    7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f, 7.9248125036057813f, 7.9307373375628867f,
+    7.9366379390025719f, 7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f, 7.9600019320680806f,
+    7.9657842846620870f, 7.9715435539507720f, 7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+    7.9943534368588578f};
+
+float hls_FastLog2(int v) {
+    if (v < 256) {
+        return hls_kLog2Table[v];
+    } else if (v == 256) {
+        return 8.0f;
+    }
+#ifndef __SYNTHESIS__
+    float golden = std::log2(v);
+    ap_ufixed<19, 16> rtn = std::log2(v);
+    _XF_IMAGE_PRINT("-- debug log2(%d) = %f , golden = %f\n", v, (float)rtn, golden);
+#else
+    ap_ufixed<19, 16> tmp = v;
+    ap_ufixed<19, 16> rtn = hls::log2(tmp);
+#endif
+    return (float)rtn;
+}
+
+void hls_WriteBitToStream(const int num_pair,
+                          uint8_t& byte_tail,
+                          hls::stream<nbits_t>& strm_nbits,
+                          hls::stream<uint16_t>& strm_bits,
+                          int& pos,
+                          hls::stream<uint8_t>& strm_byte,
+                          hls::stream<bool>& strm_histo_e) {
+    uint8_t ntail = pos & 7;
+    uint8_t n_byte = 0;
+    nbits_t nbits = 0;
+    uint16_t bits;
+    ap_uint<32> buffer = byte_tail;
+    int cnt = 0;
+
+    while (cnt < num_pair + 1) {
+#pragma HLS PIPELINE II = 1
+
+        if (n_byte == 0) { // update num to write
+            if (cnt < num_pair) {
+                nbits = strm_nbits.read();
+                bits = strm_bits.read();
+
+                pos += nbits;
+                n_byte = (ntail + nbits) >> 3;
+                buffer(ntail + 16, ntail) = bits;
+                ntail = (ntail + nbits) & 7;
+            }
+
+            byte_tail = buffer(7, 0);
+            cnt++; // end here
+
+        } else { // write out
+
+            uint8_t byte = buffer(7, 0);
+
+            buffer = buffer >> 8;
+            strm_byte.write(byte);
+            strm_histo_e.write(false);
+            n_byte--;
+        }
+    } // end while
+}
+
+// ------------------------------------------------------------
+// funtion to be removed or to move to the top
+// ------------------------------------------------------------
+void hls_WriteZeroesToByteBoundary(int* pos) {
+    const uint8_t nbits = ((*pos + 7) & ~7) - *pos;
+    *pos += nbits;
+    assert(*pos % 8 == 0);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/.clang-format b/codec/L2/demos/pikEnc/host/third_party/.clang-format
new file mode 100644
index 0000000000..ff5c354782
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/.clang-format
@@ -0,0 +1,89 @@
+---
+Language:        Cpp
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: false
+BraceWrapping:
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+ColumnLimit:     120
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: true
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories:
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IndentCaseLabels: true
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 4
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp03
+TabWidth:        8
+UseTab:          Never
+...
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/.travis.yml b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/.travis.yml
new file mode 100755
index 0000000000..f28503341a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/.travis.yml
@@ -0,0 +1,20 @@
+language: c
+compiler: gcc
+script: make $TravisCi_FseTest
+before_install:
+  - sudo apt-get update  -qq
+  - sudo apt-get install -qq gcc-arm-linux-gnueabi
+  - sudo apt-get install -qq clang
+  - sudo apt-get install -qq g++-multilib
+  - sudo apt-get install -qq gcc-multilib
+  - sudo apt-get install -qq valgrind
+
+env:
+  - TravisCi_FseTest=clangtest
+  - TravisCi_FseTest=armtest
+  - TravisCi_FseTest=gpptest
+  - TravisCi_FseTest=test
+  - TravisCi_FseTest=sanitize
+  - TravisCi_FseTest="-C programs test32"
+  - TravisCi_FseTest="-C programs memtest"
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/LICENSE b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/LICENSE
new file mode 100755
index 0000000000..b6942af4c8
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/LICENSE
@@ -0,0 +1,23 @@
+Copyright (c) 2013, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/Makefile b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/Makefile
new file mode 100755
index 0000000000..f78208e4bf
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/Makefile
@@ -0,0 +1,66 @@
+# #####################################################################
+# FSE - Makefile
+# Copyright (C) Yann Collet 2015
+# GPL v2 License
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# You can contact the author at :
+#  - Public forum froup : https://groups.google.com/forum/#!forum/lz4c
+# #####################################################################
+# This is just a launcher for the Makefile within test directory
+# #####################################################################
+
+PROGDIR?= programs
+
+.PHONY: clean test
+
+default: test
+
+all:
+	$(MAKE) -C $(PROGDIR) $@
+
+test:
+	$(MAKE) -C $(PROGDIR) $@
+
+clean:
+	$(MAKE) -C $(PROGDIR) $@
+
+gpptest: clean
+	@echo ---- test g++ compilation ----
+	$(MAKE) -C $(PROGDIR) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wcast-qual -Werror"
+
+armtest: clean
+	@echo ---- test ARM compilation ----
+	CFLAGS="-O3 -Werror" $(MAKE) -C $(PROGDIR) bin CC=arm-linux-gnueabi-gcc
+
+clangtest: clean
+	@echo ---- test clang compilation ----
+	CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" CC=clang $(MAKE) -C $(PROGDIR) all
+
+clangpptest: clean
+	@echo ---- test clang++ compilation ----
+	$(MAKE) -C $(PROGDIR) all CC=clang++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wcast-qual -x c++ -Werror"
+
+staticAnalyze: clean
+	@echo ---- static analyzer - scan-build ----
+	scan-build --status-bugs -v $(MAKE) -C $(PROGDIR) all CFLAGS=-g   # does not work well; too many false positives
+
+sanitize: clean
+	@echo ---- check undefined behavior - sanitize ----
+	CC=clang CFLAGS="-g -O3 -fsanitize=undefined" $(MAKE) -C $(PROGDIR) test   FSETEST="-i5000" FSEU16TEST=-i2000
+	CC=clang CFLAGS="-g -O3 -fsanitize=undefined" $(MAKE) -C $(PROGDIR) test32 FSETEST="-i5000" FSEU16TEST=-i2000
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/README.md b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/README.md
new file mode 100755
index 0000000000..6b91e3ed26
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/README.md
@@ -0,0 +1,53 @@
+New Generation Entropy coders
+=============================
+
+This library proposes two high speed entropy coders :
+
+__Huff0__, a [Huffman codec](https://en.wikipedia.org/wiki/Huffman_coding) designed for modern CPU, 
+featuring OoO (Out of Order) operations on multiple ALU (Arithmetic Logic Unit),
+achieving extremely fast compression and decompression speeds.
+
+__FSE__ is a new kind of [Entropy encoder](http://en.wikipedia.org/wiki/Entropy_encoding),
+based on [ANS theory, from Jarek Duda](http://arxiv.org/abs/1311.2540),
+achieving precise compression accuracy (like [Arithmetic coding](http://en.wikipedia.org/wiki/Arithmetic_coding)) at much higher speeds.
+
+|Branch      |Status   |
+|------------|---------|
+|master      | [![Build Status](https://travis-ci.org/Cyan4973/lz4.svg?branch=master)](https://travis-ci.org/Cyan4973/FiniteStateEntropy) |
+|dev         | [![Build Status](https://travis-ci.org/Cyan4973/lz4.svg?branch=dev)](https://travis-ci.org/Cyan4973/FiniteStateEntropy) |
+
+
+Benchmarks
+-------------------------
+
+Benchmarks are run on an Intel Core i7-5600U, with Linux Mint 64-bits.
+Source code is compiled using GCC 4.8.4, 64-bits mode.
+Test files are generated using the provided `probagen` program.
+Benchmark breaks sample files into blocks of 32 KB.
+`Huff0` and `FSE` are compared to `zlibh`, the huffman encoder within zlib, provided by Frederic Kayser.
+
+| File    | Codec | Ratio  | Compression | Decompression |
+| ------- | ----- |:------:| -----------:| -------------:|
+| Proba80 |       |        |             |               |
+|         | Huff0 |  6.38  |__600 MB/s__ |__1350 MB/s__  |
+|         | FSE   |__8.84__|  325 MB/s   |   440 MB/s    |
+|         | zlibh |  6.38  |  265 MB/s   |   300 MB/s    |
+| Proba14 |       |        |             |               |
+|         | Huff0 |  1.90  |  595 MB/s   |   860 MB/s    |
+|         | FSE   |  1.91  |  330 MB/s   |   460 MB/s    |
+|         | zlibh |  1.90  |  255 MB/s   |   250 MB/s    |
+| Proba02 |       |        |             |               |
+|         | Huff0 |  1.13  |  525 MB/s   |   555 MB/s    |
+|         | FSE   |  1.13  |  325 MB/s   |   445 MB/s    |
+|         | zlibh |  1.13  |  180 MB/s   |   210 MB/s    |
+
+By design, Huffman can't break the "1 bit per symbol" limit, hence loses efficiency on squeezed distributions, such as `Proba80`.
+FSE is free of such limit, and its compression efficiency remains close to Shannon limit in all circumstances.
+However, this accuracy is not always necessary, and less compressible distributions show little difference with Huffman.
+On its side, Huff0 delivers in the form of a massive speed advantage.
+
+Branch Policy
+-------------------------
+External contributions are welcomed and encouraged.
+The "master" branch is only meant to host stable releases.
+The "dev" branch is the one where all contributions are merged. If you want to propose a patch, please commit into "dev" branch or dedicated feature branch. Direct commit to "master" are not permitted.
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/Archives/hufx6.h b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/Archives/hufx6.h
new file mode 100755
index 0000000000..3d6fa49cca
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/Archives/hufx6.h
@@ -0,0 +1,71 @@
+/* ******************************************************************
+   Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUF_H_2342354
+#define HUF_H_2342354
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *** Dependencies *** */
+#include <stddef.h>    /* size_t */
+#define HUF_STATIC_LINKING_ONLY
+#include "huf.h"
+
+
+/* *** Static allocation *** */
+#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
+        unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
+
+
+/* *** Advanced decompression functions *** */
+size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder, only works for dstSize >= 64 */
+
+
+/* *** HUF detailed API *** */
+size_t HUF_readDTableX6 (unsigned* DTable, const void* src, size_t srcSize);
+size_t HUF_decompress4X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+/* single stream variants */
+size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* quad-symbols decoder, only works for dstSize >= 64 */
+size_t HUF_decompress1X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUF_H_2342354 */
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/Archives/hufx6_decompress.c b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/Archives/hufx6_decompress.c
new file mode 100755
index 0000000000..39aa53df5e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/Archives/hufx6_decompress.c
@@ -0,0 +1,452 @@
+/* ******************************************************************
+   Huffman decoder, part of New Generation Entropy library
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "bitstream.h"
+#include "fse.h"        /* header compression */
+#include "hufx6.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* ********************************/
+/* quad-symbol decoding           */
+/* ********************************/
+typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6;
+typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6;
+
+/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */
+static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSequence, int sizeLog,
+                           const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart,
+                           const U32 nbBitsBaseline, HUF_DSeqX6 baseSeq, HUF_DDescX6 DDesc)
+{
+    const int scaleLog = nbBitsBaseline - sizeLog;   /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */
+    const int minBits  = nbBitsBaseline - maxWeight;
+    const U32 level = DDesc.nbBytes;
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+    U32 symbolStartPos, s;
+
+    /* local rankVal, will be modified */
+    memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i;
+        const U32 skipSize = rankVal[minWeight];
+        for (i = 0; i < skipSize; i++) {
+            DSequence[i] = baseSeq;
+            DDescription[i] = DDesc;
+    }   }
+
+    /* fill DTable */
+    DDesc.nbBytes++;
+    symbolStartPos = rankStart[minWeight];
+    for (s=symbolStartPos; s<sortedListSize; s++) {
+        const BYTE symbol = sortedSymbols[s].symbol;
+        const U32  weight = sortedSymbols[s].weight;   /* >= 1 (sorted) */
+        const int  nbBits = nbBitsBaseline - weight;   /* >= 1 (by construction) */
+        const int  totalBits = consumed+nbBits;
+        const U32  start  = rankVal[weight];
+        const U32  length = 1 << (sizeLog-nbBits);
+        baseSeq.byte[level] = symbol;
+        DDesc.nbBits = (BYTE)totalBits;
+
+        if ((level<3) && (sizeLog-totalBits >= minBits)) {  /* enough room for another symbol */
+            int nextMinWeight = totalBits + scaleLog;
+            if (nextMinWeight < 1) nextMinWeight = 1;
+            HUF_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits,
+                           rankValOrigin, totalBits, nextMinWeight, maxWeight,
+                           sortedSymbols, sortedListSize, rankStart,
+                           nbBitsBaseline, baseSeq, DDesc);   /* recursive (max : level 3) */
+        } else {
+            U32 i;
+            const U32 end = start + length;
+            for (i = start; i < end; i++) {
+                DDescription[i] = DDesc;
+                DSequence[i] = baseSeq;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+
+/* note : same preparation as X4 */
+size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 };
+    U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    rankVal_t rankVal;
+    const U32 memLog = DTable[0];
+    size_t iSize;
+
+    if (memLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
+
+    /* find maxWeight */
+    for (maxW = tableLog; maxW && rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 w = weightList[s];
+            U32 r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   const U32 minBits = tableLog+1 - maxW;
+        U32 nextRankVal = 0;
+        U32 w, consumed;
+        const int rescale = (memLog-tableLog) - 1;   /* tableLog <= memLog */
+        U32* rankVal0 = rankVal[0];
+        for (w=1; w<maxW+1; w++) {
+            U32 current = nextRankVal;
+            nextRankVal += rankStats[w] << (w+rescale);
+            rankVal0[w] = current;
+        }
+        for (consumed = minBits; consumed <= memLog - minBits; consumed++) {
+            U32* rankValPtr = rankVal[consumed];
+            for (w = 1; w < maxW+1; w++) {
+                rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }
+
+    /* fill tables */
+    {   void* ddPtr = DTable+1;
+        HUF_DDescX6* DDescription = (HUF_DDescX6*)ddPtr;
+        void* dsPtr = DTable + 1 + ((size_t)1<<(memLog-1));
+        HUF_DSeqX6* DSequence = (HUF_DSeqX6*)dsPtr;
+        HUF_DSeqX6 DSeq;
+        HUF_DDescX6 DDesc;
+        DSeq.sequence = 0;
+        DDesc.nbBits = 0;
+        DDesc.nbBytes = 0;
+        HUF_fillDTableX6LevelN(DDescription, DSequence, memLog,
+                       (const U32 (*)[HUF_TABLELOG_ABSOLUTEMAX + 1])rankVal, 0, 1, maxW,
+                       sortedSymbol, sizeOfSort, rankStart0,
+                       tableLog+1, DSeq, DDesc);
+    }
+
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, ds+val, sizeof(HUF_DSeqX6));
+    BIT_skipBits(DStream, dd[val].nbBits);
+    return dd[val].nbBytes;
+}
+
+static U32 HUF_decodeLastSymbolsX6(void* op, U32 const maxL, BIT_DStream_t* DStream,
+                                  const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    U32 const length = dd[val].nbBytes;
+    if (length <= maxL) {
+        memcpy(op, ds+val, length);
+        BIT_skipBits(DStream, dd[val].nbBits);
+        return length;
+    }
+    memcpy(op, ds+val, maxL);
+    if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+        BIT_skipBits(DStream, dd[val].nbBits);
+        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+            DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }
+    return maxL;
+}
+
+
+#define HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog)
+
+#define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
+{
+    const void* const ddPtr = DTable+1;
+    const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
+    const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+    const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
+    BYTE* const pStart = p;
+
+    /* up to 16 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-16)) {
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+    }
+
+    /* closer to the end, up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
+        HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
+
+    while ((BIT_reloadDStream(bitDPtr) <= BIT_DStream_endOfBuffer) && (p < pEnd))
+        p += HUF_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog);
+
+    return p-pStart;
+}
+
+size_t HUF_decompress1X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    BIT_DStream_t bitD;
+
+    /* Init */
+    { size_t const errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
+      if (HUF_isError(errorCode)) return errorCode; }
+
+    /* finish bitStreams one by one */
+    { U32 const dtLog = DTable[0];
+      HUF_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog); }
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_TABLELOG_MAX);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress1X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
+
+
+#define HUF_DECODE_ROUNDX6 \
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
+            HUF_DECODE_SYMBOLX6_1(op1, &bitD1); \
+            HUF_DECODE_SYMBOLX6_1(op2, &bitD2); \
+            HUF_DECODE_SYMBOLX6_1(op3, &bitD3); \
+            HUF_DECODE_SYMBOLX6_1(op4, &bitD4); \
+            HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
+            HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
+            HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
+            HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
+            HUF_DECODE_SYMBOLX6_0(op1, &bitD1); \
+            HUF_DECODE_SYMBOLX6_0(op2, &bitD2); \
+            HUF_DECODE_SYMBOLX6_0(op3, &bitD3); \
+            HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+
+size_t HUF_decompress4X6_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const U32* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+    if (dstSize  < 64) return ERROR(dstSize_tooSmall);      /* only work for dstSize >= 64 */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+
+        const U32 dtLog = DTable[0];
+        const void* const ddPtr = DTable+1;
+        const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
+        const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
+        const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
+          if (HUF_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
+          if (HUF_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
+          if (HUF_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
+          if (HUF_isError(errorCode)) return errorCode; }
+
+        /* 4-64 symbols per loop (1-16 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        if (endSignal==BIT_DStream_unfinished) {
+            HUF_DECODE_ROUNDX6;
+            if (sizeof(bitD1.bitContainer)==4) {   /* need to decode at least 4 bytes per stream */
+                    endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+                    HUF_DECODE_ROUNDX6;
+            }
+            {   U32 const saved2 = MEM_read32(opStart2);   /* saved from overwrite */
+                U32 const saved3 = MEM_read32(opStart3);
+                U32 const saved4 = MEM_read32(opStart4);
+                endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+                for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
+                    HUF_DECODE_ROUNDX6;
+                    endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+                }
+                MEM_write32(opStart2, saved2);
+                MEM_write32(opStart3, saved3);
+                MEM_write32(opStart4, saved4);
+        }   }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
+        HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
+        HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
+        HUF_decodeStreamX6(op4, &bitD4, oend,     DTable, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_TABLELOG_MAX);
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/README.md b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/README.md
new file mode 100755
index 0000000000..4fd58e3961
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/README.md
@@ -0,0 +1,38 @@
+New Generation Entropy library
+==============================
+
+The __lib__ directory contains several files, but you don't necessarily want them all.
+Here is a detailed list, to help you decide which one you need :
+
+
+#### Compulsory files
+
+These files are required in all circumstances :
+- __error_public.h__ : error list as enum
+- __error_private.h__ : error management
+- __mem.h__ : low level memory access routines
+- __bitstream.h__ : generic read/write bitstream common to all entropy codecs
+- __entropy_common.c__ : common functions needed for both compression and decompression
+
+
+#### Finite State Entropy
+
+This is the base codec required by other ones.
+It implements a tANS variant, similar to arithmetic in compression performance, but much faster. Compression and decompression can be compiled independently.
+- __fse.h__ : exposes interfaces
+- __fse_compress.c__ : implements compression codec
+- __fse_decompress.c__ : implements decompression codec
+
+
+#### FSE 16-bits symbols version
+
+This codec is able to encode alphabets of size > 256, using 2 bytes per symbol. It requires the base FSE codec to compile properly. Compression and decompression are merged in the same file.
+- __fseU16.c__ implements the codec, while __fseU16.h__ exposes its interfaces.
+
+
+#### Huffman codec
+
+This is the fast huffman codec. It requires the base FSE codec to compress its headers. Compression and decompression can be compiled independently.
+- __huf.h__ : exposes interfaces.
+- __huf_compress.c__ : implements compression codec
+- __huf_decompress.c__ : implements decompression codec
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/bitstream.h b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/bitstream.h
new file mode 100755
index 0000000000..e96798fe47
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/bitstream.h
@@ -0,0 +1,414 @@
+/* ******************************************************************
+   bitstream
+   Part of FSE library
+   header file (to include)
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include "mem.h"            /* unaligned access routines */
+#include "error_private.h"  /* error codes and messages */
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+#if defined(__BMI__) && defined(__GNUC__)
+#  include <immintrin.h>   /* support for bextr (experimental) */
+#endif
+
+
+/*-******************************************
+*  bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+*  A critical property of these streams is that they encode and decode in **reverse** direction.
+*  So the first bit sequence you add will be the last to be read, like a LIFO stack.
+*/
+typedef struct
+{
+    size_t bitContainer;
+    int    bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
+*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
+*
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct
+{
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+#   endif
+}
+
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */
+
+
+/*-**************************************************************
+*  bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(void*)
+ *  @return : 0 if success,
+              otherwise an error code (can be tested using ERR_isError() ) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;
+    bitC->startPtr = (char*)startPtr;
+    bitC->ptr = bitC->startPtr;
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
+    if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+/*! BIT_addBits() :
+    can add up to 26 bits into `bitC`.
+    Does not check for register overflow ! */
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+{
+    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+{
+    bitC->bitContainer |= value << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ *  unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+}
+
+/*! BIT_flushBits() :
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+}
+
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+              or 0 if it could not fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
+{
+    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
+    BIT_flushBits(bitC);
+
+    if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
+
+    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+
+/*-********************************************************
+* bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+*   Initialize a BIT_DStream_t.
+*   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+*   `srcSize` must be the *exact* size of the bitStream, in bytes.
+*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
+*/
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+    } else {
+        bitD->start = (const char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+            case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+            case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+            case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+            case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+            case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+            default:;
+        }
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+{
+    return bitContainer >> start;
+}
+
+MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+{
+#if defined(__BMI__) && defined(__GNUC__)   /* experimental */
+#  if defined(__x86_64__)
+    if (sizeof(bitContainer)==8)
+        return _bextr_u64(bitContainer, start, nbBits);
+    else
+#  endif
+        return _bextr_u32(bitContainer, start, nbBits);
+#else
+    return (bitContainer >> start) & BIT_mask[nbBits];
+#endif
+}
+
+MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+{
+    return bitContainer & BIT_mask[nbBits];
+}
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ *  @return : value extracted
+ */
+ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+{
+#if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
+    return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
+#else
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+{
+    U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+    return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+}
+
+MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ *  @return : extracted value.
+ */
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t const value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_readBitsFast() :
+*   unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
+{
+    size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_reloadDStream() :
+*   Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ).
+*   This function is safe, it guarantees it will not read beyond src buffer.
+*   @return : status of `BIT_DStream_t` internal register.
+              if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should not happen => corruption detected */
+		return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        return BIT_DStream_unfinished;
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream() :
+*   @return Tells if DStream has exactly reached its end (all bits consumed).
+*/
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/entropy_common.cpp b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/entropy_common.cpp
new file mode 100755
index 0000000000..b42acb4a3c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/entropy_common.cpp
@@ -0,0 +1,231 @@
+/*
+   Common functions of New Generation Entropy library
+   Copyright (C) 2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*************************************************************************** */
+
+/* *************************************
+*  Dependencies
+***************************************/
+#include "mem.h"
+#include "error_private.h"       /* ERR_*, ERROR */
+#define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
+#include "fse.h"   /* FSE_isError, FSE_getErrorName */
+#define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
+#include "huf.h"   /* HUF_isError, HUF_getErrorName */
+
+
+
+/*-****************************************
+*  FSE Error Management
+******************************************/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/* **************************************************************
+*  HUF Error Management
+****************************************************************/
+unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+
+const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static short FSE_abs(short a) { return a<0 ? -a : a; }
+
+size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    if (hbSize < 4) return ERROR(srcSize_wrong);
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+        if (previous0) {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF) {
+                n0+=24;
+                if (ip < iend-5) {
+                    ip+=2;
+                    bitStream = MEM_readLE32(ip) >> bitCount;
+                } else {
+                    bitStream >>= 16;
+                    bitCount+=16;
+            }   }
+            while ((bitStream & 3) == 3) {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+                bitStream = MEM_readLE32(ip) >> bitCount;
+            }
+            else
+                bitStream >>= 2;
+        }
+        {   short const max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            } else {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold) {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
+    }   }   /* while ((remaining>1) && (charnum<=*maxSVPtr)) */
+    if (remaining != 1) return ERROR(GENERIC);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
+    return ip-istart;
+}
+
+
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUF_readCTable() and HUF_readDTableXn() .
+*/
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize)
+{
+    U32 weightTotal;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize = ip[0];
+    size_t oSize;
+
+    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)  { /* special header */
+        if (iSize >= (242)) {  /* RLE */
+            static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
+            oSize = l[iSize-242];
+            memset(huffWeight, 1, hwSize);
+            iSize = 0;
+        }
+        else {   /* Incompressible */
+            oSize = iSize - 127;
+            iSize = ((oSize+1)/2);
+            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+            if (oSize >= hwSize) return ERROR(corruption_detected);
+            ip += 1;
+            {   U32 n;
+                for (n=0; n<oSize; n+=2) {
+                    huffWeight[n]   = ip[n/2] >> 4;
+                    huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }   }
+    else  {   /* header compressed with FSE (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    memset(rankStats, 0, (HUF_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32));
+    weightTotal = 0;
+    {   U32 n; for (n=0; n<oSize; n++) {
+            if (huffWeight[n] >= HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+            rankStats[huffWeight[n]]++;
+            weightTotal += (1 << huffWeight[n]) >> 1;
+    }   }
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
+        if (tableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
+        *tableLogPtr = tableLog;
+        /* determine last weight */
+        {   U32 const total = 1 << tableLog;
+            U32 const rest = total - weightTotal;
+            U32 const verif = 1 << BIT_highbit32(rest);
+            U32 const lastWeight = BIT_highbit32(rest) + 1;
+            if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+            huffWeight[oSize] = (BYTE)lastWeight;
+            rankStats[lastWeight]++;
+    }   }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    return iSize+1;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/error_private.h b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/error_private.h
new file mode 100755
index 0000000000..1340c16bf2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/error_private.h
@@ -0,0 +1,115 @@
+/* ******************************************************************
+   Error codes and messages
+   Copyright (C) 2013-2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Homepage : http://www.zstd.net
+****************************************************************** */
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>        /* size_t */
+#include "error_public.h"  /* enum list */
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+#  define ERR_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+#  define ERR_STATIC static __inline
+#else
+#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-****************************************
+*  Customization (error_public.h)
+******************************************/
+typedef FSE_ErrorCode ERR_enum;
+#define PREFIX(name) FSE_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#ifdef ERROR
+#  undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
+#endif
+#define ERROR(name) ((size_t)-PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
+ERR_STATIC const char* ERR_getErrorString(ERR_enum code)
+{
+    static const char* notErrorCode = "Unspecified error code";
+    switch( code )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size incorrect";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(maxCode):
+    default: return notErrorCode;
+    }
+}
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    return ERR_getErrorString(ERR_getErrorCode(code));
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/error_public.h b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/error_public.h
new file mode 100755
index 0000000000..a852471988
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/error_public.h
@@ -0,0 +1,64 @@
+/* ******************************************************************
+   Error codes list
+   Copyright (C) 2016, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef ERROR_PUBLIC_H_MODULE
+#define ERROR_PUBLIC_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  error codes list
+******************************************/
+typedef enum {
+  FSE_error_no_error,
+  FSE_error_GENERIC,
+  FSE_error_dstSize_tooSmall,
+  FSE_error_srcSize_wrong,
+  FSE_error_corruption_detected,
+  FSE_error_tableLog_tooLarge,
+  FSE_error_maxSymbolValue_tooLarge,
+  FSE_error_maxSymbolValue_tooSmall,
+  FSE_error_maxCode
+} FSE_ErrorCode;
+
+/* note : compare with size_t function results using FSE_getError() */
+        
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ERROR_PUBLIC_H_MODULE */
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse.h b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse.h
new file mode 100755
index 0000000000..e711d01354
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse.h
@@ -0,0 +1,628 @@
+/* ******************************************************************
+   FSE : Finite State Entropy codec
+   Public Prototypes declaration
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef FSE_H
+#define FSE_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*-*****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/*-****************************************
+*  FSE simple functions
+******************************************/
+/*! FSE_compress() :
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+    @return : size of compressed data (<= dstCapacity).
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+*/
+size_t FSE_compress(void* dst, size_t dstCapacity,
+              const void* src, size_t srcSize);
+
+/*! FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSE_isError() .
+
+    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                const void* cSrc, size_t cSrcSize);
+
+
+/*-*****************************************
+*  Tool functions
+******************************************/
+size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE advanced functions
+******************************************/
+/*! FSE_compress2() :
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    @return : size of compressed data
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+                     if FSE_isError(return), it's an error code.
+*/
+size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[]
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*! FSE_count():
+    Provides the precise count of each byte within a table 'count'.
+    'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
+    *maxSymbolValuePtr will be updated if detected smaller than initial value.
+    @return : the count of the most frequent symbol (which is not identified).
+              if return == srcSize, there is only one symbol.
+              Can also return an error code, which can be tested with FSE_isError(). */
+size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+
+/*! FSE_optimalTableLog():
+    dynamically downsize 'tableLog' when conditions are met.
+    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+    @return : recommended tableLog (necessarily <= 'maxTableLog') */
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    @return : tableLog,
+              or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_NCountWriteBound():
+    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+    Typically useful for allocation purpose. */
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+    Compactly save 'normalizedCounter' into 'buffer'.
+    @return : size of the compressed table,
+              or an errorCode, which can be tested using FSE_isError(). */
+size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*! Constructor and Destructor of FSE_CTable.
+    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
+FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
+void        FSE_freeCTable (FSE_CTable* ct);
+
+/*! FSE_buildCTable():
+    Builds `ct`, which must be already allocated, using FSE_createCTable().
+    @return : 0, or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_compress_usingCTable():
+    Compress `src` using `ct` into `dst` which must be already allocated.
+    @return : size of compressed data (<= `dstCapacity`),
+              or 0 if compressed data could not fit into `dst`,
+              or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSE_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+
+/*! Constructor and Destructor of FSE_DTable.
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_DTable* FSE_createDTable(unsigned tableLog);
+void        FSE_freeDTable(FSE_DTable* dt);
+
+/*! FSE_buildDTable():
+    Builds 'dt', which must be already allocated, using FSE_createDTable().
+    return : 0, or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSE_isError() */
+size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+
+#ifdef FSE_STATIC_LINKING_ONLY
+
+/* *** Dependency *** */
+#include "bitstream.h"
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+
+
+/* *****************************************
+*  FSE advanced API
+*******************************************/
+size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+/**< same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr  */
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/**< same as FSE_optimalTableLog(), which used `minus==2` */
+
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+/**< build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/**< build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+
+/* *****************************************
+*  FSE symbol compression API
+*******************************************/
+/*!
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
+   Visual seems to do it automatically.
+   For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
+   If none of these solutions is applicable, include "fse.c" directly.
+*/
+typedef struct
+{
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/**<
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+    FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct
+{
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/**<
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+typedef struct {
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
+{
+    const void* ptr = ct;
+    const U16* u16ptr = (const U16*) ptr;
+    const U32 tableLog = MEM_read16(ptr);
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = u16ptr+2;
+    statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
+    statePtr->stateLog = tableLog;
+}
+
+
+/*! FSE_initCState2() :
+*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
+*   uses the smallest state value possible, saving the cost of this symbol */
+MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
+{
+    FSE_initCState(statePtr, ct);
+    {   const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+        const U16* stateTable = (const U16*)(statePtr->stateTable);
+        U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
+        statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+    }
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
+{
+    const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    const U16* const stateTable = (const U16*)(statePtr->stateTable);
+    U32 nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_flushBits(bitC);
+}
+
+/*<=====    Decompression    =====>*/
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
+
+
+#endif /* FSE_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* FSE_H */
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fseU16.cpp b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fseU16.cpp
new file mode 100755
index 0000000000..980696d7a7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fseU16.cpp
@@ -0,0 +1,310 @@
+/* ******************************************************************
+   FSEU16 : Finite State Entropy coder for 16-bits input
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* *************************************************************
+*  Tuning parameters
+*****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 15
+#define FSE_DEFAULT_MEMORY_USAGE 14
+
+
+/* **************************************************************
+*  Includes
+*****************************************************************/
+#include "fseU16.h"
+#define FSEU16_SYMBOLVALUE_ABSOLUTEMAX 4095
+#if (FSE_MAX_SYMBOL_VALUE > FSEU16_SYMBOLVALUE_ABSOLUTEMAX)
+#  error "FSE_MAX_SYMBOL_VALUE is too large !"
+#endif
+
+/* **************************************************************
+*  Compiler specifics
+*****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#endif
+
+#if defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if defined (__clang__)
+#  pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+
+/* **************************************************************
+*  Local type
+****************************************************************/
+typedef struct {
+    unsigned short newState;
+    unsigned nbBits : 4;
+    unsigned symbol : 12;
+} FSE_decode_tU16;    /* Note : the size of this struct must be 4 */
+
+
+/* *******************************************************************
+*  Include type-specific functions from fse.c (C template emulation)
+*********************************************************************/
+#define FSE_COMMONDEFS_ONLY
+
+#define FSE_FUNCTION_TYPE U16
+#define FSE_FUNCTION_EXTENSION U16
+
+#define FSE_count_generic FSE_count_genericU16
+#define FSE_buildCTable   FSE_buildCTableU16
+
+#define FSE_DECODE_TYPE   FSE_decode_tU16
+#define FSE_createDTable  FSE_createDTableU16
+#define FSE_freeDTable    FSE_freeDTableU16
+#define FSE_buildDTable   FSE_buildDTableU16
+
+#include "fse_compress.cpp"   /* FSE_countU16, FSE_buildCTableU16 */
+#include "fse_decompress.cpp"   /* FSE_buildDTableU16 */
+
+
+/*! FSE_countU16() :
+    This function just counts U16 values within `src`,
+    and store the histogram into `count`.
+    This function is unsafe : it doesn't check that all values within `src` can fit into `count`.
+    For this reason, prefer using a table `count` with 256 elements.
+    @return : highest count for a single element
+*/
+size_t FSE_countU16(unsigned* count, unsigned* maxSymbolValuePtr,
+                    const U16* src, size_t srcSize)
+{
+    const U16* ip16 = (const U16*)src;
+    const U16* const end = src + srcSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned max=0;
+    U32 s;
+
+    memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
+    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
+
+    while (ip16<end) {
+        if (*ip16 > maxSymbolValue) return ERROR(maxSymbolValue_tooSmall);
+        count[*ip16++]++;
+    }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+
+    for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s];
+
+    return (size_t)max;
+}
+
+/* *******************************************************
+*  U16 Compression functions
+*********************************************************/
+size_t FSE_compressU16_usingCTable (void* dst, size_t maxDstSize,
+                              const U16*  src, size_t srcSize,
+                              const FSE_CTable* ct)
+{
+    const U16* const istart = src;
+    const U16* const iend = istart + srcSize;
+    const U16* ip;
+
+    BYTE* op = (BYTE*) dst;
+    BIT_CStream_t bitC;
+    FSE_CState_t CState;
+
+
+    /* init */
+    BIT_initCStream(&bitC, op, maxDstSize);
+    FSE_initCState(&CState, ct);
+
+    ip=iend;
+
+    /* join to even */
+    if (srcSize & 1) {
+        FSE_encodeSymbol(&bitC, &CState, *--ip);
+        BIT_flushBits(&bitC);
+    }
+
+    /* join to mod 4 */
+    if (srcSize & 2) {
+        FSE_encodeSymbol(&bitC, &CState, *--ip);
+        FSE_encodeSymbol(&bitC, &CState, *--ip);
+        BIT_flushBits(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    while (ip>istart) {
+        FSE_encodeSymbol(&bitC, &CState, *--ip);
+
+        if (sizeof(size_t)*8 < FSE_MAX_TABLELOG*2+7 )   /* This test must be static */
+            BIT_flushBits(&bitC);
+
+        FSE_encodeSymbol(&bitC, &CState, *--ip);
+
+        if (sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* This test must be static */
+            FSE_encodeSymbol(&bitC, &CState, *--ip);
+            FSE_encodeSymbol(&bitC, &CState, *--ip);
+        }
+        BIT_flushBits(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState);
+    return BIT_closeCStream(&bitC);
+}
+
+
+size_t FSE_compressU16(void* dst, size_t maxDstSize,
+       const unsigned short* src, size_t srcSize,
+       unsigned maxSymbolValue, unsigned tableLog)
+{
+    const U16* const istart = src;
+    const U16* ip = istart;
+
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const omax = ostart + maxDstSize;
+    BYTE* op = ostart;
+
+    U32   counting[FSE_MAX_SYMBOL_VALUE+1] = {0};
+    S16   norm[FSE_MAX_SYMBOL_VALUE+1];
+    CTable_max_t ct;
+
+
+
+    /* checks */
+    if (srcSize <= 1) return srcSize;
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Scan for stats */
+    {   size_t const maxCount = FSE_countU16 (counting, &maxSymbolValue, ip, srcSize);
+        if (FSE_isError(maxCount)) return maxCount;
+        if (maxCount == srcSize) return 1;   /* Input data is one constant element x srcSize times. Use RLE compression. */
+    }
+    /* Normalize */
+    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
+    {   size_t const errorCode = FSE_normalizeCount (norm, tableLog, counting, srcSize, maxSymbolValue);
+        if (FSE_isError(errorCode)) return errorCode;
+    }
+    /* Write table description header */
+    {   size_t const NSize = FSE_writeNCount (op, omax-op, norm, maxSymbolValue, tableLog);
+        if (FSE_isError(NSize)) return NSize;
+        op += NSize;
+    }
+    /* Compress */
+    {   size_t const errorCode = FSE_buildCTableU16 (ct, norm, maxSymbolValue, tableLog);
+        if (FSE_isError(errorCode)) return errorCode;
+    }
+    op += FSE_compressU16_usingCTable (op, omax - op, ip, srcSize, ct);
+
+    /* check compressibility */
+    if ( (size_t)(op-ostart) >= (size_t)(srcSize-1)*(sizeof(U16)) )
+        return 0;   /* no compression */
+
+    return op-ostart;
+}
+
+
+/* *******************************************************
+*  U16 Decompression functions
+*********************************************************/
+
+U16 FSE_decodeSymbolU16(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    const FSE_decode_tU16 DInfo = ((const FSE_decode_tU16*)(DStatePtr->table))[DStatePtr->state];
+    U16 symbol;
+    size_t lowBits;
+    const U32 nbBits = DInfo.nbBits;
+
+    symbol = (U16)(DInfo.symbol);
+    lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+
+    return symbol;
+}
+
+
+size_t FSE_decompressU16_usingDTable (U16* dst, size_t maxDstSize,
+                               const void* cSrc, size_t cSrcSize,
+                               const FSE_DTable* dt)
+{
+    U16* const ostart = dst;
+    U16* op = ostart;
+    U16* const oend = ostart + maxDstSize;
+    BIT_DStream_t bitD;
+    FSE_DState_t state;
+
+    /* Init */
+    memset(&bitD, 0, sizeof(bitD));
+    BIT_initDStream(&bitD, cSrc, cSrcSize);
+    FSE_initDState(&state, &bitD, dt);
+
+    while((BIT_reloadDStream(&bitD) < 2) && (op<oend)) {
+        *op++ = FSE_decodeSymbolU16(&state, &bitD);
+    }
+
+    if (!BIT_endOfDStream(&bitD)) return ERROR(GENERIC);
+
+    return op-ostart;
+}
+
+
+size_t FSE_decompressU16(U16* dst, size_t maxDstSize,
+                  const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*) cSrc;
+    const BYTE* ip = istart;
+    short NCount[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    unsigned tableLog;
+
+    /* Sanity check */
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* specific corner cases (uncompressed & rle) */
+
+    /* normal FSE decoding mode */
+    {   size_t const NSize = FSE_readNCount (NCount, &maxSymbolValue, &tableLog, istart, cSrcSize);
+        if (FSE_isError(NSize)) return NSize;
+        ip += NSize;
+        cSrcSize -= NSize;
+    }
+    {   size_t const errorCode = FSE_buildDTableU16 (dt, NCount, maxSymbolValue, tableLog);
+        if (FSE_isError(errorCode)) return errorCode;
+    }
+    return FSE_decompressU16_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fseU16.h b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fseU16.h
new file mode 100755
index 0000000000..e7ba3964b5
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fseU16.h
@@ -0,0 +1,79 @@
+/* ******************************************************************
+   FSEU16 : Finite State Entropy coder for 16-bits input
+   header file
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*-*****************************************
+*  Tuning parameters
+*******************************************/
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum nb of symbol values authorized.
+*  Required for allocation purposes */
+#define FSE_MAX_SYMBOL_VALUE 286   /* This is just an example, typical value for zlib */
+
+
+/*-*****************************************
+*  Includes
+*******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+
+
+/* *****************************************
+*  FSE U16 functions
+*******************************************/
+
+/*!FSE_compressU16() :
+   data is presented or regenerated as a table of unsigned short (2 bytes per symbol),
+   which is useful for alphabet size > 256.
+   Important ! All symbol values within input table must be <= 'maxSymbolValue'.
+   Maximum allowed 'maxSymbolValue' is controlled by constant FSE_MAX_SYMBOL_VALUE
+   Special values : if result == 0, data is not compressible => Nothing is stored within cSrc !!
+                    if result == 1, data is one constant element x srcSize times. Use RLE compression.
+                    if FSE_isError(result), it's an error code.*/
+size_t FSE_compressU16(void* dst, size_t maxDstSize,
+       const unsigned short* src, size_t srcSize,
+       unsigned maxSymbolValue, unsigned tableLog);
+
+size_t FSE_decompressU16(unsigned short* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_compress.cpp b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_compress.cpp
new file mode 100755
index 0000000000..192d55026d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_compress.cpp
@@ -0,0 +1,807 @@
+/* ******************************************************************
+   FSE : Finite State Entropy encoder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "bitstream.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    U32 const tableSize = 1 << tableLog;
+    U32 const tableMask = tableSize - 1;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    U32 const step = FSE_TABLESTEP(tableSize);
+    U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
+
+    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
+    U32 highThreshold = tableSize-1;
+
+    /* CTable header */
+    tableU16[-2] = (U16) tableLog;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* For explanations on how to distribute symbol values over the table :
+    *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+    /* symbol start positions */
+    {   U32 u;
+        cumul[0] = 0;
+        for (u=1; u<=maxSymbolValue+1; u++) {
+            if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
+                cumul[u] = cumul[u-1] + 1;
+                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
+            } else {
+                cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+        }   }
+        cumul[maxSymbolValue+1] = tableSize+1;
+    }
+
+    /* Spread symbols */
+    {   U32 position = 0;
+        U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            int nbOccurences;
+            for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
+                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* Low proba area */
+        }   }
+
+        if (position!=0) return ERROR(GENERIC);   /* Must have gone through all positions */
+    }
+
+    /* Build table */
+    {   U32 u; for (u=0; u<tableSize; u++) {
+        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
+        tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
+    }   }
+
+    /* Build Symbol Transformation Table */
+    {   unsigned total = 0;
+        unsigned s;
+        for (s=0; s<=maxSymbolValue; s++) {
+            switch (normalizedCounter[s])
+            {
+            case  0: break;
+
+            case -1:
+            case  1:
+                symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
+                symbolTT[s].deltaFindState = total - 1;
+                total ++;
+                break;
+            default :
+                {
+                    U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
+                    U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+                    symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
+                    symbolTT[s].deltaFindState = total - normalizedCounter[s];
+                    total +=  normalizedCounter[s];
+    }   }   }   }
+
+    return 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
+}
+
+static short FSE_abs(short a) { return a<0 ? -a : a; }
+
+static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+                                       const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                                       unsigned writeIsSafe)
+{
+    BYTE* const ostart = (BYTE*) header;
+    BYTE* out = ostart;
+    BYTE* const oend = ostart + headerBufferSize;
+    int nbBits;
+    const int tableSize = 1 << tableLog;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    bitStream = 0;
+    bitCount  = 0;
+    /* Table Size */
+    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
+    bitCount  += 4;
+
+    /* Init */
+    remaining = tableSize+1;   /* +1 for extra accuracy */
+    threshold = tableSize;
+    nbBits = tableLog+1;
+
+    while (remaining>1) {  /* stops at 1 */
+        if (previous0) {
+            unsigned start = charnum;
+            while (!normalizedCounter[charnum]) charnum++;
+            while (charnum >= start+24) {
+                start+=24;
+                bitStream += 0xFFFFU << bitCount;
+                if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE) bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out+=2;
+                bitStream>>=16;
+            }
+            while (charnum >= start+3) {
+                start+=3;
+                bitStream += 3 << bitCount;
+                bitCount += 2;
+            }
+            bitStream += (charnum-start) << bitCount;
+            bitCount += 2;
+            if (bitCount>16) {
+                if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out += 2;
+                bitStream >>= 16;
+                bitCount -= 16;
+        }   }
+        {   short count = normalizedCounter[charnum++];
+            const short max = (short)((2*threshold-1)-remaining);
+            remaining -= FSE_abs(count);
+            if (remaining<1) return ERROR(GENERIC);
+            count++;   /* +1 for extra accuracy */
+            if (count>=threshold) count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+            bitStream += count << bitCount;
+            bitCount  += nbBits;
+            bitCount  -= (count<max);
+            previous0  = (count==1);
+            while (remaining<threshold) nbBits--, threshold>>=1;
+        }
+        if (bitCount>16) {
+            if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+            out[0] = (BYTE)bitStream;
+            out[1] = (BYTE)(bitStream>>8);
+            out += 2;
+            bitStream >>= 16;
+            bitCount -= 16;
+    }   }
+
+    /* flush remaining bitStream */
+    if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+    out[0] = (BYTE)bitStream;
+    out[1] = (BYTE)(bitStream>>8);
+    out+= (bitCount+7) /8;
+
+    if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
+
+    return (out-ostart);
+}
+
+
+size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+
+    if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
+        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+    return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
+}
+
+
+
+/*-**************************************************************
+*  Counting histogram
+****************************************************************/
+/*! FSE_count_simple
+    This function just counts byte values within `src`,
+    and store the histogram into table `count`.
+    This function is unsafe : it doesn't check that all values within `src` can fit into `count`.
+    For this reason, prefer using a table `count` with 256 elements.
+    @return : count of most numerous element
+*/
+static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                               const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const end = ip + srcSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned max=0;
+
+
+    memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
+    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
+
+    while (ip<end) count[*ip++]++;
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+
+    { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
+
+    return (size_t)max;
+}
+
+
+static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
+                                const void* source, size_t sourceSize,
+                                unsigned checkMax)
+{
+    const BYTE* ip = (const BYTE*)source;
+    const BYTE* const iend = ip+sourceSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned max=0;
+
+
+    U32 Counting1[256] = { 0 };
+    U32 Counting2[256] = { 0 };
+    U32 Counting3[256] = { 0 };
+    U32 Counting4[256] = { 0 };
+
+    /* safety checks */
+    if (!sourceSize) {
+        memset(count, 0, maxSymbolValue + 1);
+        *maxSymbolValuePtr = 0;
+        return 0;
+    }
+    if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
+
+    /* by stripes of 16 bytes */
+    {   U32 cached = MEM_read32(ip); ip += 4;
+        while (ip < iend-15) {
+            U32 c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+        }
+        ip-=4;
+    }
+
+    /* finish last symbols */
+    while (ip<iend) Counting1[*ip++]++;
+
+    if (checkMax) {   /* verify stats will fit into destination table */
+        U32 s; for (s=255; s>maxSymbolValue; s--) {
+            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
+            if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
+    }   }
+
+    { U32 s; for (s=0; s<=maxSymbolValue; s++) {
+        count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
+        if (count[s] > max) max = count[s];
+    }}
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+    return (size_t)max;
+}
+
+/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
+size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
+                     const void* source, size_t sourceSize)
+{
+    if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+    return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0);
+}
+
+size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
+                 const void* source, size_t sourceSize)
+{
+    if (*maxSymbolValuePtr <255)
+        return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1);
+    *maxSymbolValuePtr = 255;
+    return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize);
+}
+
+
+
+/*-**************************************************************
+*  FSE Compression Code
+****************************************************************/
+/*! FSE_sizeof_CTable() :
+    FSE_CTable is a variable size structure which contains :
+    `U16 tableLog;`
+    `U16 maxSymbolValue;`
+    `U16 nextStateNumber[1 << tableLog];`                         // This size is variable
+    `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
+Allocation is manual (C standard does not support variable-size structures).
+*/
+
+size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t));   /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return size;
+}
+
+FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return (FSE_CTable*)malloc(size);
+}
+
+void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
+
+/* provides the minimum logSize to safely represent a distribution */
+static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+{
+	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
+	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+	return minBits;
+}
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
+{
+	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+    U32 tableLog = maxTableLog;
+	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+	if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
+	if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
+    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
+    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
+    return tableLog;
+}
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
+}
+
+
+/* Secondary normalization method.
+   To be used when primary method fails. */
+
+static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
+{
+    U32 s;
+    U32 distributed = 0;
+    U32 ToDistribute;
+
+    /* Init */
+    U32 lowThreshold = (U32)(total >> tableLog);
+    U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
+
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (count[s] == 0) {
+            norm[s]=0;
+            continue;
+        }
+        if (count[s] <= lowThreshold) {
+            norm[s] = -1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+        if (count[s] <= lowOne) {
+            norm[s] = 1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+        norm[s]=-2;
+    }
+    ToDistribute = (1 << tableLog) - distributed;
+
+    if ((total / ToDistribute) > lowOne) {
+        /* risk of rounding to zero */
+        lowOne = (U32)((total * 3) / (ToDistribute * 2));
+        for (s=0; s<=maxSymbolValue; s++) {
+            if ((norm[s] == -2) && (count[s] <= lowOne)) {
+                norm[s] = 1;
+                distributed++;
+                total -= count[s];
+                continue;
+        }   }
+        ToDistribute = (1 << tableLog) - distributed;
+    }
+
+    if (distributed == maxSymbolValue+1) {
+        /* all values are pretty poor;
+           probably incompressible data (should have already been detected);
+           find max, then give all remaining points to max */
+        U32 maxV = 0, maxC = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > maxC) maxV=s, maxC=count[s];
+        norm[maxV] += (short)ToDistribute;
+        return 0;
+    }
+
+    {
+        U64 const vStepLog = 62 - tableLog;
+        U64 const mid = (1ULL << (vStepLog-1)) - 1;
+        U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
+        U64 tmpTotal = mid;
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (norm[s]==-2) {
+                U64 end = tmpTotal + (count[s] * rStep);
+                U32 sStart = (U32)(tmpTotal >> vStepLog);
+                U32 sEnd = (U32)(end >> vStepLog);
+                U32 weight = sEnd - sStart;
+                if (weight < 1)
+                    return ERROR(GENERIC);
+                norm[s] = (short)weight;
+                tmpTotal = end;
+    }   }   }
+
+    return 0;
+}
+
+
+size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+                           const unsigned* count, size_t total,
+                           unsigned maxSymbolValue)
+{
+    /* Sanity checks */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
+    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
+
+    {   U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+
+        U64 const scale = 62 - tableLog;
+        U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
+        U64 const vStep = 1ULL<<(scale-20);
+        int stillToDistribute = 1<<tableLog;
+        unsigned s;
+        unsigned largest=0;
+        short largestP=0;
+        U32 lowThreshold = (U32)(total >> tableLog);
+
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (count[s] == total) return 0;   /* rle special case */
+            if (count[s] == 0) { normalizedCounter[s]=0; continue; }
+            if (count[s] <= lowThreshold) {
+                normalizedCounter[s] = -1;
+                stillToDistribute--;
+            } else {
+                short proba = (short)((count[s]*step) >> scale);
+                if (proba<8) {
+                    U64 restToBeat = vStep * rtbTable[proba];
+                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
+                }
+                if (proba > largestP) largestP=proba, largest=s;
+                normalizedCounter[s] = proba;
+                stillToDistribute -= proba;
+        }   }
+        if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
+            /* corner case, need another normalization method */
+            size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
+            if (FSE_isError(errorCode)) return errorCode;
+        }
+        else normalizedCounter[largest] += (short)stillToDistribute;
+    }
+
+#if 0
+    {   /* Print Table (debug) */
+        U32 s;
+        U32 nTotal = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            printf("%3i: %4i \n", s, normalizedCounter[s]);
+        for (s=0; s<=maxSymbolValue; s++)
+            nTotal += abs(normalizedCounter[s]);
+        if (nTotal != (1U<<tableLog))
+            printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
+        getchar();
+    }
+#endif
+
+    return tableLog;
+}
+
+
+/* fake FSE_CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
+{
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
+
+    /* header */
+    tableU16[-2] = (U16) nbBits;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* Build table */
+    for (s=0; s<tableSize; s++)
+        tableU16[s] = (U16)(tableSize + s);
+
+    /* Build Symbol Transformation Table */
+    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+
+        for (s=0; s<=maxSymbolValue; s++) {
+            symbolTT[s].deltaNbBits = deltaNbBits;
+            symbolTT[s].deltaFindState = s-1;
+    }   }
+
+
+    return 0;
+}
+
+/* fake FSE_CTable, for rle (100% always same symbol) input */
+size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
+{
+    void* ptr = ct;
+    U16* tableU16 = ( (U16*) ptr) + 2;
+    void* FSCTptr = (U32*)ptr + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
+
+    /* header */
+    tableU16[-2] = (U16) 0;
+    tableU16[-1] = (U16) symbolValue;
+
+    /* Build table */
+    tableU16[0] = 0;
+    tableU16[1] = 0;   /* just in case */
+
+    /* Build Symbol Transformation Table */
+    symbolTT[symbolValue].deltaNbBits = 0;
+    symbolTT[symbolValue].deltaFindState = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct, const unsigned fast)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip=iend;
+
+
+    BIT_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+    /* init */
+    if (srcSize <= 2) return 0;
+    { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize);
+      if (FSE_isError(errorCode)) return 0; }
+
+#define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+    if (srcSize & 1) {
+        FSE_initCState2(&CState1, ct, *--ip);
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    } else {
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_initCState2(&CState1, ct, *--ip);
+    }
+
+    /* join to mod 4 */
+    srcSize -= 2;
+    if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    for ( ; ip>istart ; ) {
+
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
+            FSE_FLUSHBITS(&bitC);
+
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
+            FSE_encodeSymbol(&bitC, &CState2, *--ip);
+            FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        }
+
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return BIT_closeCStream(&bitC);
+}
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct)
+{
+    const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+
+    if (fast)
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
+    else
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
+}
+
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+
+size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip = istart;
+
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    U32   count[FSE_MAX_SYMBOL_VALUE+1];
+    S16   norm[FSE_MAX_SYMBOL_VALUE+1];
+    CTable_max_t ct;
+    size_t errorCode;
+
+    /* init conditions */
+    if (srcSize <= 1) return 0;  /* Uncompressible */
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
+
+    /* Scan input and build symbol stats */
+    errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode == srcSize) return 1;
+    if (errorCode == 1) return 0;   /* each symbol only present once */
+    if (errorCode < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
+
+    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
+    errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* Write table description header */
+    errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+    op += errorCode;
+
+    /* Compress */
+    errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+    errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
+    if (errorCode == 0) return 0;   /* not enough space for compressed data */
+    op += errorCode;
+
+    /* check compressibility */
+    if ( (size_t)(op-ostart) >= srcSize-1 )
+        return 0;
+
+    return op-ostart;
+}
+
+size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
+{
+    return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
+}
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_decompress.cpp b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_decompress.cpp
new file mode 100755
index 0000000000..918de64c5d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/fse_decompress.cpp
@@ -0,0 +1,331 @@
+/* ******************************************************************
+   FSE : Finite State Entropy decoder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  ifdef __GNUC__
+#    define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "bitstream.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Complex types
+****************************************************************/
+typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+FSE_DTable* FSE_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSE_freeDTable (FSE_DTable* dt)
+{
+    free(dt);
+}
+
+size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 highThreshold = tableSize-1;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    {   FSE_DTableHeader DTableH;
+        DTableH.tableLog = (U16)tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    symbolNext[s] = normalizedCounter[s];
+        }   }   }
+        memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    {   U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+    }   }
+
+    return 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSV1 = tableMask+1;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<maxSV1; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+
+    /* Init */
+    { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+      if (FSE_isError(errorCode)) return errorCode; }
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op<olimit) ; op+=4) {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1) {
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+
+        *op++ = FSE_GETSYMBOL(&state1);
+
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+
+        *op++ = FSE_GETSYMBOL(&state2);
+
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state1);
+            break;
+    }   }
+
+    return op-ostart;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+
+    if (cSrcSize<2) return ERROR(srcSize_wrong);   /* too small input size */
+
+    /* normal FSE decoding mode */
+    {   size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+        if (FSE_isError(NCountLength)) return NCountLength;
+        if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size */
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
+
+    { size_t const errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
+      if (FSE_isError(errorCode)) return errorCode; }
+
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);   /* always return, even if it is an error code */
+}
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf.h b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf.h
new file mode 100755
index 0000000000..ef538df32a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf.h
@@ -0,0 +1,228 @@
+/* ******************************************************************
+   Huffman coder, part of New Generation Entropy library
+   header file
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+****************************************************************** */
+#ifndef HUF_H_298734234
+#define HUF_H_298734234
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* *** Dependencies *** */
+#include <stddef.h>    /* size_t */
+
+
+/* *** simple functions *** */
+/**
+HUF_compress() :
+    Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+    'dst' buffer must be already allocated.
+    Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+    `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+    @return : size of compressed data (<= `dstCapacity`).
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single repeated byte symbol (RLE compression).
+                     if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+*/
+size_t HUF_compress(void* dst, size_t dstCapacity,
+              const void* src, size_t srcSize);
+
+/**
+HUF_decompress() :
+    Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated buffer 'dst', of minimum size 'dstSize'.
+    `dstSize` : **must** be the ***exact*** size of original (uncompressed) data.
+    Note : in contrast with FSE, HUF_decompress can regenerate
+           RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+           because it knows size to regenerate.
+    @return : size of regenerated data (== dstSize),
+              or an error code, which can be tested using HUF_isError()
+*/
+size_t HUF_decompress(void* dst,  size_t dstSize,
+                const void* cSrc, size_t cSrcSize);
+
+
+/* ****************************************
+*  Tool functions
+******************************************/
+#define HUF_BLOCKSIZE_MAX (128 * 1024)
+size_t HUF_compressBound(size_t size);       /**< maximum compressed size (worst case) */
+
+/* Error Management */
+unsigned    HUF_isError(size_t code);        /**< tells if a return value is an error code */
+const char* HUF_getErrorName(size_t code);   /**< provides error code string (useful for debugging) */
+
+
+/* *** Advanced function *** */
+
+/** HUF_compress2() :
+*   Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */
+size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+#ifdef HUF_STATIC_LINKING_ONLY
+
+/* *** Dependencies *** */
+#include "mem.h"   /* U32 */
+
+
+/* *** Constants *** */
+#define HUF_TABLELOG_ABSOLUTEMAX  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_TABLELOG_MAX  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_DEFAULT  HUF_TABLELOG_MAX   /* tableLog by default, when not specified */
+#define HUF_SYMBOLVALUE_MAX 255
+#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
+#  error "HUF_TABLELOG_MAX is too large !"
+#endif
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true if incompressible pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of HUF's Compression Table */
+#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
+    U32 name##hb[maxSymbolValue+1]; \
+    void* name##hv = &(name##hb); \
+    HUF_CElt* name = (HUF_CElt*)(name##hv)   /* no final ; */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUF_DTable;
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1)*0x1000001) }
+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog)*0x1000001) }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
+size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
+
+
+/* ****************************************
+*  HUF detailed API
+******************************************/
+/*!
+HUF_compress() does the following:
+1. count symbol occurrence from source[] into table count[] using FSE_count()
+2. (optional) refine tableLog using HUF_optimalTableLog()
+3. build Huffman table from count using HUF_buildCTable()
+4. save Huffman table to memory buffer using HUF_writeCTable()
+5. encode the data stream using HUF_compress4X_usingCTable()
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and regenerate 'CTable' using external methods.
+*/
+/* FSE_count() : find it within "fse.h" */
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+typedef struct HUF_CElt_s HUF_CElt;   /* incomplete type */
+size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);
+size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+
+
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize);
+
+/** HUF_readCTable() :
+*   Loading a CTable saved with HUF_writeCTable() */
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize);
+
+
+/*
+HUF_decompress() does the following:
+1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
+2. build Huffman table from save, using HUF_readDTableXn()
+3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
+*/
+
+/** HUF_selectDecoder() :
+*   Tells which decoder is likely to decode faster,
+*   based on a set of pre-determined metrics.
+*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+
+size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+
+
+/* single stream variants */
+
+size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+
+
+#endif /* HUF_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif   /* HUF_H_298734234 */
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_compress.cpp b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_compress.cpp
new file mode 100755
index 0000000000..3533bb6135
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_compress.cpp
@@ -0,0 +1,576 @@
+/* ******************************************************************
+   Huffman encoder, part of New Generation Entropy library
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "bitstream.h"
+#define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
+#include "fse.h"        /* header compression */
+#define HUF_STATIC_LINKING_ONLY
+#include "huf.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Utils
+****************************************************************/
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+}
+
+
+/* *******************************************************
+*  HUF : Huffman block compression
+*********************************************************/
+struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+};   /* typedef'd to HUF_CElt within huf_static.h */
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+/*! HUF_writeCTable() :
+    `CTable` : huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
+{
+    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+    U32 n;
+    BYTE* op = (BYTE*)dst;
+    size_t size;
+
+     /* check conditions */
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX + 1)
+        return ERROR(GENERIC);
+
+    /* convert to weight */
+    bitsToWeight[0] = 0;
+    for (n=1; n<=huffLog; n++)
+        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+    for (n=0; n<maxSymbolValue; n++)
+        huffWeight[n] = bitsToWeight[CTable[n].nbBits];
+
+    size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);   /* don't need last symbol stat : implied */
+    if (HUF_isError(size)) return size;
+    if (size >= 128) return ERROR(GENERIC);   /* should never happen, since maxSymbolValue <= 255 */
+    if ((size <= 1) || (size >= maxSymbolValue/2)) {
+        if (size==1) {  /* RLE */
+            /* only possible case : series of 1 (because there are at least 2) */
+            /* can only be 2^n or (2^n-1), otherwise not an huffman tree */
+            BYTE code;
+            switch(maxSymbolValue)
+            {
+            case 1: code = 0; break;
+            case 2: code = 1; break;
+            case 3: code = 2; break;
+            case 4: code = 3; break;
+            case 7: code = 4; break;
+            case 8: code = 5; break;
+            case 15: code = 6; break;
+            case 16: code = 7; break;
+            case 31: code = 8; break;
+            case 32: code = 9; break;
+            case 63: code = 10; break;
+            case 64: code = 11; break;
+            case 127: code = 12; break;
+            case 128: code = 13; break;
+            default : return ERROR(corruption_detected);
+            }
+            op[0] = (BYTE)(255-13 + code);
+            return 1;
+        }
+         /* Not compressible */
+        if (maxSymbolValue > (241-128)) return ERROR(GENERIC);   /* not implemented (not possible with current format) */
+        if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+        op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1));
+        huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
+        for (n=0; n<maxSymbolValue; n+=2)
+            op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
+        return ((maxSymbolValue+1)/2) + 1;
+    }
+
+    /* normal header case */
+    op[0] = (BYTE)size;
+    return size+1;
+}
+
+
+
+size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    size_t readSize;
+    U32 nbSymbols = 0;
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    /* get symbol weights */
+    readSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(readSize)) return readSize;
+
+    /* check result */
+    if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall);
+
+    /* Prepare base value per rank */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<=tableLog; n++) {
+            U32 current = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = current;
+    }   }
+
+    /* fill nbBits */
+    { U32 n; for (n=0; n<nbSymbols; n++) {
+        const U32 w = huffWeight[n];
+        CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
+    }}
+
+    /* fill val */
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
+        U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
+        /* determine stating value per rank */
+        {   U16 min = 0;
+            U32 n; for (n=HUF_TABLELOG_MAX; n>0; n--) {
+                valPerRank[n] = min;      /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        /* assign value within rank, symbol order */
+        { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
+    }
+
+    return readSize;
+}
+
+
+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+    const U32 largestBits = huffNode[lastNonNull].nbBits;
+    if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
+
+    /* there are several too large elements (at least >= 2) */
+    {   int totalCost = 0;
+        const U32 baseCost = 1 << (largestBits - maxNbBits);
+        U32 n = lastNonNull;
+
+        while (huffNode[n].nbBits > maxNbBits) {
+            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+            huffNode[n].nbBits = (BYTE)maxNbBits;
+            n --;
+        }  /* n stops at huffNode[n].nbBits <= maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
+
+        /* renorm totalCost */
+        totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
+
+        /* repay normalized cost */
+        {   U32 const noSymbol = 0xF0F0F0F0;
+            U32 rankLast[HUF_TABLELOG_MAX+1];
+            int pos;
+
+            /* Get pos of last (smallest) symbol per rank */
+            memset(rankLast, 0xF0, sizeof(rankLast));
+            {   U32 currentNbBits = maxNbBits;
+                for (pos=n ; pos >= 0; pos--) {
+                    if (huffNode[pos].nbBits >= currentNbBits) continue;
+                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+                    rankLast[maxNbBits-currentNbBits] = pos;
+            }   }
+
+            while (totalCost > 0) {
+                U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
+                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
+                    U32 highPos = rankLast[nBitsToDecrease];
+                    U32 lowPos = rankLast[nBitsToDecrease-1];
+                    if (highPos == noSymbol) continue;
+                    if (lowPos == noSymbol) break;
+                    {   U32 const highTotal = huffNode[highPos].count;
+                        U32 const lowTotal = 2 * huffNode[lowPos].count;
+                        if (highTotal <= lowTotal) break;
+                }   }
+                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                    nBitsToDecrease ++;
+                totalCost -= 1 << (nBitsToDecrease-1);
+                if (rankLast[nBitsToDecrease-1] == noSymbol)
+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
+                huffNode[rankLast[nBitsToDecrease]].nbBits ++;
+                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
+                    rankLast[nBitsToDecrease] = noSymbol;
+                else {
+                    rankLast[nBitsToDecrease]--;
+                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
+                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
+            }   }   /* while (totalCost > 0) */
+
+            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
+                if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
+                    while (huffNode[n].nbBits == maxNbBits) n--;
+                    huffNode[n+1].nbBits--;
+                    rankLast[1] = n+1;
+                    totalCost++;
+                    continue;
+                }
+                huffNode[ rankLast[1] + 1 ].nbBits--;
+                rankLast[1]++;
+                totalCost ++;
+    }   }   }   /* there are several too large elements (at least >= 2) */
+
+    return maxNbBits;
+}
+
+
+typedef struct {
+    U32 base;
+    U32 current;
+} rankPos;
+
+static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
+{
+    rankPos rank[32];
+    U32 n;
+
+    memset(rank, 0, sizeof(rank));
+    for (n=0; n<=maxSymbolValue; n++) {
+        U32 r = BIT_highbit32(count[n] + 1);
+        rank[r].base ++;
+    }
+    for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
+    for (n=0; n<32; n++) rank[n].current = rank[n].base;
+    for (n=0; n<=maxSymbolValue; n++) {
+        U32 const c = count[n];
+        U32 const r = BIT_highbit32(c+1) + 1;
+        U32 pos = rank[r].current++;
+        while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
+        huffNode[pos].count = c;
+        huffNode[pos].byte  = (BYTE)n;
+    }
+}
+
+
+#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
+size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
+{
+    nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1];
+    nodeElt* huffNode = huffNode0 + 1;
+    U32 n, nonNullRank;
+    int lowS, lowN;
+    U16 nodeNb = STARTNODE;
+    U32 nodeRoot;
+
+    /* safety checks */
+    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
+    memset(huffNode0, 0, sizeof(huffNode0));
+
+    /* sort, decreasing order */
+    HUF_sort(huffNode, count, maxSymbolValue);
+
+    /* init for parents */
+    nonNullRank = maxSymbolValue;
+    while(huffNode[nonNullRank].count == 0) nonNullRank--;
+    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
+    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
+    huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
+    nodeNb++; lowS-=2;
+    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
+    huffNode0[0].count = (U32)(1U<<31);
+
+    /* create parents */
+    while (nodeNb <= nodeRoot) {
+        U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+        huffNode[n1].parent = huffNode[n2].parent = nodeNb;
+        nodeNb++;
+    }
+
+    /* distribute weights (unlimited tree height) */
+    huffNode[nodeRoot].nbBits = 0;
+    for (n=nodeRoot-1; n>=STARTNODE; n--)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+    for (n=0; n<=nonNullRank; n++)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+
+    /* enforce maxTableLog */
+    maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
+
+    /* fill result into tree (val, nbBits) */
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
+        U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+        if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
+        for (n=0; n<=nonNullRank; n++)
+            nbPerRank[huffNode[n].nbBits]++;
+        /* determine stating value per rank */
+        {   U16 min = 0;
+            for (n=maxNbBits; n>0; n--) {
+                valPerRank[n] = min;      /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        for (n=0; n<=maxSymbolValue; n++)
+            tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
+        for (n=0; n<=maxSymbolValue; n++)
+            tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
+    }
+
+    return maxNbBits;
+}
+
+static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+{
+    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+#define HUF_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+#define HUF_FLUSHBITS_1(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
+
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    const BYTE* ip = (const BYTE*) src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+    size_t n;
+    const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
+    BIT_CStream_t bitC;
+
+    /* init */
+    if (dstSize < 8) return 0;   /* not enough space to compress */
+    { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op);
+      if (HUF_isError(errorCode)) return 0; }
+
+    n = srcSize & ~3;  /* join to mod 4 */
+    switch (srcSize & 3)
+    {
+        case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
+                 HUF_FLUSHBITS_2(&bitC);
+        case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
+                 HUF_FLUSHBITS_1(&bitC);
+        case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
+                 HUF_FLUSHBITS(&bitC);
+        case 0 :
+        default: ;
+    }
+
+    for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
+        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
+        HUF_FLUSHBITS_2(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
+        HUF_FLUSHBITS(&bitC);
+    }
+
+    return BIT_closeCStream(&bitC);
+}
+
+
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
+    if (srcSize < 12) return 0;   /* no saving possible : too small input */
+    op += 6;   /* jumpTable */
+
+    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
+        if (HUF_isError(cSize)) return cSize;
+        if (cSize==0) return 0;
+        MEM_writeLE16(ostart, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
+        if (HUF_isError(cSize)) return cSize;
+        if (cSize==0) return 0;
+        MEM_writeLE16(ostart+2, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
+        if (HUF_isError(cSize)) return cSize;
+        if (cSize==0) return 0;
+        MEM_writeLE16(ostart+4, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    {   size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable);
+        if (HUF_isError(cSize)) return cSize;
+        if (cSize==0) return 0;
+        op += cSize;
+    }
+
+    return op-ostart;
+}
+
+
+static size_t HUF_compress_internal (
+                void* dst, size_t dstSize,
+                const void* src, size_t srcSize,
+                unsigned maxSymbolValue, unsigned huffLog,
+                unsigned singleStream)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    U32 count[HUF_SYMBOLVALUE_MAX+1];
+    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1];
+
+    /* checks & inits */
+    if (!srcSize) return 0;  /* Uncompressed (note : 1 means rle, so first byte must be correct) */
+    if (!dstSize) return 0;  /* cannot fit within dst budget */
+    if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
+    if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
+        if (HUF_isError(largest)) return largest;
+        if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* rle */
+        if (largest <= (srcSize >> 7)+1) return 0;   /* Fast heuristic : not compressible enough */
+    }
+
+    /* Build Huffman Tree */
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
+        if (HUF_isError(maxBits)) return maxBits;
+        huffLog = (U32)maxBits;
+    }
+
+    /* Write table description header */
+    {   size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog);
+        if (HUF_isError(hSize)) return hSize;
+        if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
+        op += hSize;
+    }
+
+    /* Compress */
+    {   size_t const cSize = (singleStream) ?
+                            HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :   /* single segment */
+                            HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
+        if (HUF_isError(cSize)) return cSize;
+        if (cSize==0) return 0;   /* uncompressible */
+        op += cSize;
+    }
+
+    /* check compressibility */
+    if ((size_t)(op-ostart) >= srcSize-1)
+        return 0;
+
+    return op-ostart;
+}
+
+
+size_t HUF_compress1X (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                 unsigned maxSymbolValue, unsigned huffLog)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1);
+}
+
+size_t HUF_compress2 (void* dst, size_t dstSize,
+                const void* src, size_t srcSize,
+                unsigned maxSymbolValue, unsigned huffLog)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0);
+}
+
+
+size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_decompress.cpp b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_decompress.cpp
new file mode 100755
index 0000000000..1580b3750a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/huf_decompress.cpp
@@ -0,0 +1,894 @@
+/* ******************************************************************
+   Huffman decoder, part of New Generation Entropy library
+   Copyright (C) 2013-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+/* inline is defined */
+#elif defined(_MSC_VER)
+#  define inline __inline
+#else
+#  define inline /* disable inline */
+#endif
+
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include <string.h>     /* memcpy, memset */
+#include "bitstream.h"
+#include "fse.h"        /* header compression */
+#define HUF_STATIC_LINKING_ONLY
+#include "huf.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
+
+static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+{
+    DTableDesc dtd;
+    memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */
+
+size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+    size_t iSize;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+
+    HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* Table header */
+    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog = (BYTE)tableLog;
+        memcpy(DTable, &dtd, sizeof(dtd));
+    }
+
+    /* Prepare ranks */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<tableLog+1; n++) {
+            U32 current = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = current;
+    }   }
+
+    /* fill DTable */
+    {   U32 n;
+        for (n=0; n<nbSymbols; n++) {
+            U32 const w = huffWeight[n];
+            U32 const length = (1 << w) >> 1;
+            U32 i;
+            HUF_DEltX2 D;
+            D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
+            for (i = rankVal[w]; i < rankVal[w] + length; i++)
+                dt[i] = D;
+            rankVal[w] += length;
+    }   }
+
+    return iSize;
+}
+
+
+static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const c = dt[val].byte;
+    BIT_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
+
+static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to the end */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, hence no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+static size_t HUF_decompress1X2_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const void* dtPtr = DTable + 1;
+    const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+    BIT_DStream_t bitD;
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+    U32 const dtLog = dtd.tableLog;
+
+    { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
+      if (HUF_isError(errorCode)) return errorCode; }
+
+    HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+size_t HUF_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
+}
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+
+static size_t HUF_decompress4X2_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable + 1;
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
+          if (HUF_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
+          if (HUF_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
+          if (HUF_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
+          if (HUF_isError(errorCode)) return errorCode; }
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+
+size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
+}
+
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4;  /* double-symbols decoding */
+
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+
+static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    HUF_DEltX4 DElt;
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+
+    /* get pre-calculated rankVal */
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    { U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+        const U32 symbol = sortedSymbols[s].symbol;
+        const U32 weight = sortedSymbols[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 length = 1 << (sizeLog-nbBits);
+        const U32 start = rankVal[weight];
+        U32 i = start;
+        const U32 end = start + length;
+
+        MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+        DElt.nbBits = (BYTE)(nbBits + consumed);
+        DElt.length = 2;
+        do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+        rankVal[weight] += length;
+    }}
+}
+
+typedef U32 rankVal_t[HUF_TABLELOG_ABSOLUTEMAX][HUF_TABLELOG_ABSOLUTEMAX + 1];
+
+static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline)
+{
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol);
+        } else {
+            HUF_DEltX4 DElt;
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 u;
+                const U32 end = start + length;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 };
+    U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 };
+    U32* const rankStart = rankStart0+1;
+    rankVal_t rankVal;
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    U32 const maxTableLog = dtd.maxTableLog;
+    size_t iSize;
+    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
+    HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
+
+    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable));   /* if compilation fails here, assertion is false */
+    if (maxTableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
+    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 current = nextRankStart;
+            nextRankStart += rankStats[w];
+            rankStart[w] = current;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 const w = weightList[s];
+            U32 const r = rankStart[w]++;
+            sortedSymbol[r].symbol = (BYTE)s;
+            sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   U32* const rankVal0 = rankVal[0];
+        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
+            U32 nextRankVal = 0;
+            U32 w;
+            for (w=1; w<maxW+1; w++) {
+                U32 current = nextRankVal;
+                nextRankVal += rankStats[w] << (w+rescale);
+                rankVal0[w] = current;
+        }   }
+        {   U32 const minBits = tableLog+1 - maxW;
+            U32 consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+                U32* const rankValPtr = rankVal[consumed];
+                U32 w;
+                for (w = 1; w < maxW+1; w++) {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }   }
+
+    HUF_fillDTableX4(dt, maxTableLog,
+                   sortedSymbol, sizeOfSort,
+                   rankStart0, rankVal, maxW,
+                   tableLog+1);
+
+    dtd.tableLog = (BYTE)maxTableLog;
+    dtd.tableType = 1;
+    memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
+
+
+static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+    const size_t val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+    }   }
+    return 1;
+}
+
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) {
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+    }
+
+    /* closer to end : up to 2 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX4_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+
+static size_t HUF_decompress1X4_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BIT_DStream_t bitD;
+
+    /* Init */
+    {   size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
+        if (HUF_isError(errorCode)) return errorCode;
+    }
+
+    /* decode */
+    {   BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+        const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
+    }
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+size_t HUF_decompress1X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
+}
+
+size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+static size_t HUF_decompress4X4_usingDTable_internal(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;
+        const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
+          if (HUF_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
+          if (HUF_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
+          if (HUF_isError(errorCode)) return errorCode; }
+        { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
+          if (HUF_isError(errorCode)) return errorCode; }
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX4(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+size_t HUF_decompress4X4_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+
+size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
+}
+
+size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+
+/* ********************************/
+/* Generic decompression selector */
+/* ********************************/
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+    return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
+                           HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+}
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+    return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
+                           HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+}
+
+
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+
+/** HUF_selectDecoder() :
+*   Tells which decoder is likely to decode faster,
+*   based on a set of pre-determined metrics.
+*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+    /* decoder timing evaluation */
+    U32 const Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
+    U32 const D256 = (U32)(dstSize >> 8);
+    U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+    U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+    DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, for cache eviction */
+
+    return DTime1 < DTime0;
+}
+
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+    }
+
+    //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
+    //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
+}
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+        return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+    }
+}
+
+size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected);   /* invalid */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+        return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+    }
+}
+
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+        return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+    }
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/mem.h b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/mem.h
new file mode 100755
index 0000000000..99de9cb368
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/FiniteStateEntropy/lib/mem.h
@@ -0,0 +1,374 @@
+/* ******************************************************************
+   mem.h
+   low-level memory access routines
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include <string.h>    /* memcpy */
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(_MSC_VER)
+#   include <intrin.h>   /* _byteswap_ */
+#endif
+#if defined(__GNUC__)
+#  define MEM_STATIC static __attribute__((unused))
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
+#else
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef  int16_t S16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef  int64_t S64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef   signed short      S16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef   signed long long  S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define MEM_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define MEM_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
+
+MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr)
+{
+    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC size_t MEM_readST(const void* memPtr)
+{
+    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+MEM_STATIC U32 MEM_swap32(U32 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_ulong(in);
+#elif defined (__GNUC__)
+    return __builtin_bswap32(in);
+#else
+    return  ((in << 24) & 0xff000000 ) |
+            ((in <<  8) & 0x00ff0000 ) |
+            ((in >>  8) & 0x0000ff00 ) |
+            ((in >> 24) & 0x000000ff );
+#endif
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in)
+{
+#if defined(_MSC_VER)     /* Visual Studio */
+    return _byteswap_uint64(in);
+#elif defined (__GNUC__)
+    return __builtin_bswap64(in);
+#else
+    return  ((in << 56) & 0xff00000000000000ULL) |
+            ((in << 40) & 0x00ff000000000000ULL) |
+            ((in << 24) & 0x0000ff0000000000ULL) |
+            ((in << 8)  & 0x000000ff00000000ULL) |
+            ((in >> 8)  & 0x00000000ff000000ULL) |
+            ((in >> 24) & 0x0000000000ff0000ULL) |
+            ((in >> 40) & 0x000000000000ff00ULL) |
+            ((in >> 56) & 0x00000000000000ffULL);
+#endif
+}
+
+MEM_STATIC size_t MEM_swapST(size_t in)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_swap32((U32)in);
+    else
+        return (size_t)MEM_swap64((U64)in);
+}
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)(p[0] + (p[1]<<8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
+{
+    if (MEM_isLittleEndian()) {
+        MEM_write16(memPtr, val);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE)val;
+        p[1] = (BYTE)(val>>8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+        return MEM_swap32(MEM_read32(memPtr));
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+        MEM_write32(memPtr, val32);
+    else
+        MEM_write32(memPtr, MEM_swap32(val32));
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+        return MEM_swap64(MEM_read64(memPtr));
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+        MEM_write64(memPtr, val64);
+    else
+        MEM_write64(memPtr, MEM_swap64(val64));
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readLE32(memPtr);
+    else
+        return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32)val);
+    else
+        MEM_writeLE64(memPtr, (U64)val);
+}
+
+/*=== Big endian r/w ===*/
+
+MEM_STATIC U32 MEM_readBE32(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_swap32(MEM_read32(memPtr));
+    else
+        return MEM_read32(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
+{
+    if (MEM_isLittleEndian())
+        MEM_write32(memPtr, MEM_swap32(val32));
+    else
+        MEM_write32(memPtr, val32);
+}
+
+MEM_STATIC U64 MEM_readBE64(const void* memPtr)
+{
+    if (MEM_isLittleEndian())
+        return MEM_swap64(MEM_read64(memPtr));
+    else
+        return MEM_read64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
+{
+    if (MEM_isLittleEndian())
+        MEM_write64(memPtr, MEM_swap64(val64));
+    else
+        MEM_write64(memPtr, val64);
+}
+
+MEM_STATIC size_t MEM_readBEST(const void* memPtr)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_readBE32(memPtr);
+    else
+        return (size_t)MEM_readBE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
+{
+    if (MEM_32bits())
+        MEM_writeBE32(memPtr, (U32)val);
+    else
+        MEM_writeBE64(memPtr, (U64)val);
+}
+
+
+/* function safe only for comparisons */
+MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/.editorconfig b/codec/L2/demos/pikEnc/host/third_party/brotli/.editorconfig
new file mode 100755
index 0000000000..17ed3c197a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/.editorconfig
@@ -0,0 +1,40 @@
+# http://editorconfig.org
+# Consistent coding style across different editors.
+
+# Top-most file
+root = true
+
+# Global styles:
+#   - indent 2 spaces
+#   - add final new line
+#   - trim trailing whitespace
+[*]
+charset = utf-8
+end_of_line = lf
+indent_size = 2
+indent_style = space
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+# BUILD:
+#   - indent 4 spaces
+[BUILD]
+indent_size = 4
+
+# Makefile:
+#   - indent 1 tab
+[Makefile]
+indent_size = tab
+indent_style = tab
+
+# Markdown:
+#   - indent 4 spaces
+#   - trailing whitespace is significant
+[*.md]
+indent_size = 4
+trim_trailing_whitespace = false
+
+# Python
+#   - indent 4 spaces
+[*.py]
+indent_size = 4
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/.travis.yml b/codec/L2/demos/pikEnc/host/third_party/brotli/.travis.yml
new file mode 100755
index 0000000000..5cfeafc85c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/.travis.yml
@@ -0,0 +1,226 @@
+language: c
+sudo: false
+branches:
+  only:
+  - master
+matrix:
+  include:
+    ###
+    ## Linux builds using various versions of GCC.
+    ###
+    - os: linux
+      env: BUILD_SYSTEM=cmake C_COMPILER=gcc-7 CXX_COMPILER=g++-7
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          packages:
+          - gcc-7
+          - g++-7
+    - os: linux
+      env: BUILD_SYSTEM=cmake C_COMPILER=gcc-4.4 CXX_COMPILER=g++-4.4
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          packages:
+          - gcc-4.4
+          - g++-4.4
+
+    ###
+    ## Test that Autotools build works.
+    ###
+    - os: linux
+      env: BUILD_SYSTEM=autotools C_COMPILER=gcc-5 CXX_COMPILER=g++-5
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          packages:
+          - gcc-5
+          - g++-5
+
+    ###
+    ## Test that fuzzer is compiling / working.
+    ###
+    - os: linux
+      env: BUILD_SYSTEM=fuzz C_COMPILER=clang-5.0 CXX_COMPILER=clang++-5.0 ASAN_OPTIONS=detect_leaks=0
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          - llvm-toolchain-trusty-5.0
+          packages:
+          - clang-5.0
+
+    ###
+    ## clang on Linux
+    ###
+    - os: linux
+      env: BUILD_SYSTEM=cmake C_COMPILER=clang-5.0 CXX_COMPILER=clang++-5.0
+      addons:
+        apt:
+          sources:
+          - llvm-toolchain-trusty-5.0
+          - ubuntu-toolchain-r-test
+          packages:
+          - clang-5.0
+    - os: linux
+      env: BUILD_SYSTEM=cmake C_COMPILER=clang-3.5 CXX_COMPILER=clang++-3.5
+      addons:
+        apt:
+          sources:
+          - llvm-toolchain-trusty-3.5
+          - ubuntu-toolchain-r-test
+          packages:
+          - clang-3.5
+
+    ###
+    ## testing arm via qemu on Linux
+    ###
+    - os: linux
+      env: BUILD_SYSTEM=cmake C_COMPILER=arm-linux-gnueabihf-gcc CXX_COMPILER=arm-linux-gnueabihf-g++ CFLAGS="-march=armv7-a -mfloat-abi=hard -mfpu=neon"
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          packages:
+          - qemu
+          - gcc-arm-linux-gnueabihf
+          - libc6-dev-armhf-cross
+
+    ###
+    ## PGI Community Edition on Linux
+    ###
+    - os: linux
+      env: BUILD_SYSTEM=cmake C_COMPILER=pgcc CXX_COMPILER=pgc++
+
+    ###
+    ## Python 2.7 and 3.6 builds on Linux
+    ###
+    - os: linux
+      language: python
+      python: 2.7
+      env: BUILD_SYSTEM=python C_COMPILER=gcc-5 CXX_COMPILER=g++-5
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          packages:
+          - gcc-5
+          - g++-5
+    - os: linux
+      language: python
+      python: 3.6
+      env: BUILD_SYSTEM=python C_COMPILER=gcc-5 CXX_COMPILER=g++-5
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          packages:
+          - gcc-5
+          - g++-5
+
+    ###
+    ## CMake on OS X
+    ##
+    ## These all work, but it seems unnecessary to actually build them
+    ## all since we already test all these versions of GCC on Linux.
+    ## We'll just test 4.4 and the most recent version.
+    ###
+    - os: osx
+      env: BUILD_SYSTEM=cmake C_COMPILER=gcc CXX_COMPILER=g++
+    - os: osx
+      env: BUILD_SYSTEM=cmake C_COMPILER=gcc-4.9 CXX_COMPILER=g++-4.9
+    - os: osx
+      env: BUILD_SYSTEM=cmake
+
+    ###
+    ## Python 2.7 OS X build (using the system /usr/bin/python)
+    ###
+    - os: osx
+      env: BUILD_SYSTEM=python C_COMPILER=gcc CXX_COMPILER=g++
+
+    ###
+    ## Sanitizers
+    ###
+    - os: linux
+      env: BUILD_SYSTEM=cmake C_COMPILER=clang-5.0 CXX_COMPILER=clang++-5.0 SANITIZER=address ASAN_OPTIONS=detect_leaks=0
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          - llvm-toolchain-trusty-5.0
+          packages:
+          - clang-5.0
+    - os: linux
+      env: BUILD_SYSTEM=cmake C_COMPILER=clang-5.0 CXX_COMPILER=clang++-5.0 SANITIZER=thread
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          - llvm-toolchain-trusty-5.0
+          packages:
+          - clang-5.0
+    - os: linux
+      env: BUILD_SYSTEM=cmake C_COMPILER=clang-5.0 CXX_COMPILER=clang++-5.0 SANITIZER=undefined CFLAGS="-fno-sanitize-recover=undefined,integer"
+      addons:
+        apt:
+          sources:
+          - ubuntu-toolchain-r-test
+          - llvm-toolchain-trusty-5.0
+          packages:
+          - clang-5.0
+
+    - os: linux
+      env: BUILD_SYSTEM=maven
+      language: java
+
+    - os: linux
+      sudo: required
+      language: java
+      jdk: oraclejdk9
+      env: BUILD_SYSTEM=bazel
+      addons:
+        apt:
+          sources:
+            - sourceline: "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8"
+              key_url: "https://storage.googleapis.com/bazel-apt/doc/apt-key.pub.gpg"
+            - ubuntu-toolchain-r-test
+          packages:
+            - bazel
+
+    - os: osx
+      env: BUILD_SYSTEM=bazel
+      # Latest image with Java 1.8 (required to install Bazel).
+      osx_image: xcode9.3
+      language: java
+
+before_install:
+###
+## If we use the matrix to set CC/CXX Travis, overwrites the values,
+## so instead we use C/CXX_COMPILER, then copy the values to CC/CXX
+## here (after Travis has set CC/CXX).
+###
+- if [ -n "${C_COMPILER}" ]; then export CC="${C_COMPILER}"; fi
+- if [ -n "${CXX_COMPILER}" ]; then export CXX="${CXX_COMPILER}"; fi
+- scripts/.travis.sh before_install
+install:
+- scripts/.travis.sh install
+script:
+- scripts/.travis.sh script
+after_success:
+- scripts/.travis.sh after_success
+
+before_deploy:
+- scripts/.travis.sh before_deploy
+
+deploy:
+- provider: bintray
+  file: "scripts/.bintray.json"
+  user: "eustas"
+  key:
+    secure: "Kbam/lTAdz72fZivbs6riJT+Y4PbuKP7r6t5PAWxJxAAykjwnYTRe3zF472g9HCE14KYMsdB+KSYSgg6TGJnqGC9gL9xhhGU9U/WmA+vbMWS/MSnMWpK9IRpp77pM2i2NKZD4v33JuEwKFCBJP3Vj6QQ5Qd1NKdobuXJyznhgnw="
+  on:
+    condition: "${BUILD_SYSTEM} = bazel"
+  skip_cleanup: true
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/CONTRIBUTING.md b/codec/L2/demos/pikEnc/host/third_party/brotli/CONTRIBUTING.md
new file mode 100755
index 0000000000..a00e37d17f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/CONTRIBUTING.md
@@ -0,0 +1,27 @@
+Want to contribute? Great! First, read this page (including the small print at
+the end).
+
+### Before you contribute
+Before we can use your code, you must sign the
+[Google Individual Contributor License Agreement]
+(https://cla.developers.google.com/about/google-individual)
+(CLA), which you can do online. The CLA is necessary mainly because you own the
+copyright to your changes, even after your contribution becomes part of our
+codebase, so we need your permission to use and distribute your code. We also
+need to be sure of various other things—for instance that you'll tell us if you
+know that your code infringes on other people's patents. You don't have to sign
+the CLA until after you've submitted your code for review and a member has
+approved it, but you must do it before we can put your code into our codebase.
+Before you start working on a larger contribution, you should get in touch with
+us first through the issue tracker with your idea so that we can help out and
+possibly guide you. Coordinating up front makes it much easier to avoid
+frustration later on.
+
+### Code reviews
+All submissions, including submissions by project members, require review. We
+use Github pull requests for this purpose.
+
+### The small print
+Contributions made by corporations are covered by a different agreement than
+the one above, the [Software Grant and Corporate Contributor License Agreement]
+(https://cla.developers.google.com/about/google-corporate).
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/LICENSE b/codec/L2/demos/pikEnc/host/third_party/brotli/LICENSE
new file mode 100755
index 0000000000..33b7cdd2db
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/README b/codec/L2/demos/pikEnc/host/third_party/brotli/README
new file mode 100755
index 0000000000..3fb3f2291a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/README
@@ -0,0 +1,15 @@
+BROTLI DATA COMPRESSIOM LIBRARY
+
+Brotli is a generic-purpose lossless compression algorithm that compresses data
+using a combination of a modern variant of the LZ77 algorithm, Huffman coding
+and 2nd order context modeling, with a compression ratio comparable to the best
+currently available general-purpose compression methods. It is similar in speed
+with deflate but offers more dense compression.
+
+The specification of the Brotli Compressed Data Format is defined in RFC 7932
+https://tools.ietf.org/html/rfc7932
+
+Brotli is open-sourced under the MIT License, see the LICENSE file.
+
+Brotli mailing list:
+https://groups.google.com/forum/#!forum/brotli
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/constants.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/constants.h
new file mode 100755
index 0000000000..d1b88d12af
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/constants.h
@@ -0,0 +1,64 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#ifndef BROTLI_COMMON_CONSTANTS_H_
+#define BROTLI_COMMON_CONSTANTS_H_
+
+/* Specification: 7.3. Encoding of the context map */
+#define BROTLI_CONTEXT_MAP_MAX_RLE 16
+
+/* Specification: 2. Compressed representation overview */
+#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES 256
+
+/* Specification: 3.3. Alphabet sizes: insert-and-copy length */
+#define BROTLI_NUM_LITERAL_SYMBOLS 256
+#define BROTLI_NUM_COMMAND_SYMBOLS 704
+#define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26
+#define BROTLI_MAX_CONTEXT_MAP_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + \
+                                        BROTLI_CONTEXT_MAP_MAX_RLE)
+#define BROTLI_MAX_BLOCK_TYPE_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 2)
+
+/* Specification: 3.5. Complex prefix codes */
+#define BROTLI_REPEAT_PREVIOUS_CODE_LENGTH 16
+#define BROTLI_REPEAT_ZERO_CODE_LENGTH 17
+#define BROTLI_CODE_LENGTH_CODES (BROTLI_REPEAT_ZERO_CODE_LENGTH + 1)
+/* "code length of 8 is repeated" */
+#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
+
+/* "Large Window Brotli" */
+#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
+#define BROTLI_LARGE_MIN_WBITS 10
+#define BROTLI_LARGE_MAX_WBITS 30
+
+/* Specification: 4. Encoding of distances */
+#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
+#define BROTLI_MAX_NPOSTFIX 3
+#define BROTLI_MAX_NDIRECT 120
+#define BROTLI_MAX_DISTANCE_BITS 24U
+#define BROTLI_DISTANCE_ALPHABET_SIZE(NPOSTFIX, NDIRECT, MAXNBITS) ( \
+    BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) +                    \
+    ((MAXNBITS) << ((NPOSTFIX) + 1)))
+/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
+#define BROTLI_NUM_DISTANCE_SYMBOLS \
+    BROTLI_DISTANCE_ALPHABET_SIZE(  \
+        BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
+#define BROTLI_MAX_DISTANCE 0x3FFFFFC
+#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
+
+/* 7.1. Context modes and context ID lookup for literals */
+/* "context IDs for literals are in the range of 0..63" */
+#define BROTLI_LITERAL_CONTEXT_BITS 6
+
+/* 7.2. Context ID for distances */
+#define BROTLI_DISTANCE_CONTEXT_BITS 2
+
+/* 9.1. Format of the Stream Header */
+/* Number of slack bytes for window size. Don't confuse
+   with BROTLI_NUM_DISTANCE_SHORT_CODES. */
+#define BROTLI_WINDOW_GAP 16
+#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
+
+#endif  /* BROTLI_COMMON_CONSTANTS_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/context.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/context.h
new file mode 100755
index 0000000000..24b3eb48f5
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/context.h
@@ -0,0 +1,261 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Lookup table to map the previous two bytes to a context id.
+
+  There are four different context modeling modes defined here:
+    CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
+    CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
+    CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
+    CONTEXT_SIGNED: second-order context model tuned for signed integers.
+
+  If |p1| and |p2| are the previous two bytes, and |mode| is current context
+  mode, we calculate the context as:
+
+    context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].
+
+  For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters
+  (i.e. < 128), this will be equivalent to
+
+    context = 4 * context1(p1) + context2(p2),
+
+  where context1 is based on the previous byte in the following way:
+
+    0  : non-ASCII control
+    1  : \t, \n, \r
+    2  : space
+    3  : other punctuation
+    4  : " '
+    5  : %
+    6  : ( < [ {
+    7  : ) > ] }
+    8  : , ; :
+    9  : .
+    10 : =
+    11 : number
+    12 : upper-case vowel
+    13 : upper-case consonant
+    14 : lower-case vowel
+    15 : lower-case consonant
+
+  and context2 is based on the second last byte:
+
+    0 : control, space
+    1 : punctuation
+    2 : upper-case letter, number
+    3 : lower-case letter
+
+  If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+  stream it will be a continuation byte, value between 128 and 191), the
+  context is the same as if the second last byte was an ASCII control or space.
+
+  If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+  be a continuation byte and the context id is 2 or 3 depending on the LSB of
+  the last byte and to a lesser extent on the second last byte if it is ASCII.
+
+  If the last byte is a UTF8 continuation byte, the second last byte can be:
+    - continuation byte: the next byte is probably ASCII or lead byte (assuming
+      4-byte UTF8 characters are rare) and the context id is 0 or 1.
+    - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+    - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+
+  The possible value combinations of the previous two bytes, the range of
+  context ids and the type of the next byte is summarized in the table below:
+
+  |--------\-----------------------------------------------------------------|
+  |         \                         Last byte                              |
+  | Second   \---------------------------------------------------------------|
+  | last byte \    ASCII            |   cont. byte        |   lead byte      |
+  |            \   (0-127)          |   (128-191)         |   (192-)         |
+  |=============|===================|=====================|==================|
+  |  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
+  |  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
+  |-------------|-------------------|---------------------|------------------|
+  |  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
+  |  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
+  |-------------|-------------------|---------------------|------------------|
+  |  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
+  |  (192-207)  |                   |  context: 0 - 1     |                  |
+  |-------------|-------------------|---------------------|------------------|
+  |  lead byte  | not valid         |  next: cont.        |  not valid       |
+  |  (208-)     |                   |  context: 2 - 3     |                  |
+  |-------------|-------------------|---------------------|------------------|
+*/
+
+#ifndef BROTLI_COMMON_CONTEXT_H_
+#define BROTLI_COMMON_CONTEXT_H_
+
+#include <brotli/types.h>
+
+typedef enum ContextType {
+  CONTEXT_LSB6 = 0,
+  CONTEXT_MSB6 = 1,
+  CONTEXT_UTF8 = 2,
+  CONTEXT_SIGNED = 3
+} ContextType;
+
+/* Common context lookup table for all context modes. */
+static const uint8_t kContextLookup[2048] = {
+  /* CONTEXT_LSB6, last byte. */
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+
+  /* CONTEXT_LSB6, second last byte, */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+  /* CONTEXT_MSB6, last byte. */
+   0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+   4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
+   8,  8,  8,  8,  9,  9,  9,  9, 10, 10, 10, 10, 11, 11, 11, 11,
+  12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+  16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
+  20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
+  24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
+  28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
+  32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
+  36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
+  40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
+  44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
+  48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
+  52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
+  56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
+  60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
+
+  /* CONTEXT_MSB6, second last byte, */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+  /* CONTEXT_UTF8, last byte. */
+  /* ASCII range. */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+  44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+  12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+  52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+  12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+  60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0,
+  /* UTF8 continuation byte range. */
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  /* UTF8 lead byte range. */
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+
+  /* CONTEXT_UTF8 second last byte. */
+  /* ASCII range. */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+  1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+  /* UTF8 continuation byte range. */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  /* UTF8 lead byte range. */
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+
+  /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
+   0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+  48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
+
+  /* CONTEXT_SIGNED, second last byte. */
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+};
+
+typedef const uint8_t* ContextLut;
+
+/* typeof(MODE) == ContextType; returns ContextLut */
+#define BROTLI_CONTEXT_LUT(MODE) (&kContextLookup[(MODE) << 9])
+
+/* typeof(LUT) == ContextLut */
+#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2])
+
+#endif  /* BROTLI_COMMON_CONTEXT_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.bin b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.bin
new file mode 100755
index 0000000000..a585c0e292
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.bin
@@ -0,0 +1,432 @@
+timedownlifeleftbackcodedatashowonlysitecityopenjustlikefreeworktextyearoverbodyloveformbookplaylivelinehelphomesidemorewordlongthemviewfindpagedaysfullheadtermeachareafromtruemarkableuponhighdatelandnewsevennextcasebothpostusedmadehandherewhatnameLinkblogsizebaseheldmakemainuser') +holdendswithNewsreadweresigntakehavegameseencallpathwellplusmenufilmpartjointhislistgoodneedwayswestjobsmindalsologorichuseslastteamarmyfoodkingwilleastwardbestfirePageknowaway.pngmovethanloadgiveselfnotemuchfeedmanyrockicononcelookhidediedHomerulehostajaxinfoclublawslesshalfsomesuchzone100%onescareTimeracebluefourweekfacehopegavehardlostwhenparkkeptpassshiproomHTMLplanTypedonesavekeepflaglinksoldfivetookratetownjumpthusdarkcardfilefearstaykillthatfallautoever.comtalkshopvotedeepmoderestturnbornbandfellroseurl(skinrolecomeactsagesmeetgold.jpgitemvaryfeltthensenddropViewcopy1.0"</a>stopelseliestourpack.gifpastcss?graymean&gt;rideshotlatesaidroadvar feeljohnrickportfast'UA-dead</b>poorbilltypeU.S.woodmust2px;Inforankwidewantwalllead[0];paulwavesure$('#waitmassarmsgoesgainlangpaid!-- lockunitrootwalkfirmwifexml"songtest20pxkindrowstoolfontmailsafestarmapscorerainflowbabyspansays4px;6px;artsfootrealwikiheatsteptriporg/lakeweaktoldFormcastfansbankveryrunsjulytask1px;goalgrewslowedgeid="sets5px;.js?40pxif (soonseatnonetubezerosentreedfactintogiftharm18pxcamehillboldzoomvoideasyringfillpeakinitcost3px;jacktagsbitsrolleditknewnear<!--growJSONdutyNamesaleyou lotspainjazzcoldeyesfishwww.risktabsprev10pxrise25pxBlueding300,ballfordearnwildbox.fairlackverspairjunetechif(!pickevil$("#warmlorddoespull,000ideadrawhugespotfundburnhrefcellkeystickhourlossfuel12pxsuitdealRSS"agedgreyGET"easeaimsgirlaids8px;navygridtips#999warsladycars); }php?helltallwhomzh:�*/
+ 100hall.
+
+A7px;pushchat0px;crew*/</hash75pxflatrare && tellcampontolaidmissskiptentfinemalegetsplot400,
+
+coolfeet.php<br>ericmostguidbelldeschairmathatom/img&#82luckcent000;tinygonehtmlselldrugFREEnodenick?id=losenullvastwindRSS wearrelybeensamedukenasacapewishgulfT23:hitsslotgatekickblurthey15px''););">msiewinsbirdsortbetaseekT18:ordstreemall60pxfarm’sboys[0].');"POSTbearkids);}}marytend(UK)quadzh:�-siz----prop');liftT19:viceandydebt>RSSpoolneckblowT16:doorevalT17:letsfailoralpollnovacolsgene —softrometillross<h3>pourfadepink<tr>mini)|!(minezh:�barshear00);milk -->ironfreddiskwentsoilputs/js/holyT22:ISBNT20:adamsees<h2>json', 'contT21: RSSloopasiamoon</p>soulLINEfortcartT14:<h1>80px!--<9px;T04:mike:46ZniceinchYorkricezh:�'));puremageparatonebond:37Z_of_']);000,zh:�tankyardbowlbush:56ZJava30px
+|}
+%C3%:34ZjeffEXPIcashvisagolfsnowzh:�quer.csssickmeatmin.binddellhirepicsrent:36ZHTTP-201fotowolfEND xbox:54ZBODYdick;
+}
+exit:35Zvarsbeat'});diet999;anne}}</[i].Langkm²wiretoysaddssealalex;
+	}echonine.org005)tonyjewssandlegsroof000) 200winegeardogsbootgarycutstyletemption.xmlcockgang$('.50pxPh.Dmiscalanloandeskmileryanunixdisc);}
+dustclip).
+
+70px-200DVDs7]><tapedemoi++)wageeurophiloptsholeFAQsasin-26TlabspetsURL bulkcook;}
+HEAD[0])abbrjuan(198leshtwin</i>sonyguysfuckpipe|-
+!002)ndow[1];[];
+Log salt
+		bangtrimbath){
+00px
+});ko:�feesad>s:// [];tollplug(){
+{
+ .js'200pdualboat.JPG);
+}quot);
+
+');
+
+}201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037201320122011201020092008200720062005200420032002200120001999199819971996199519941993199219911990198919881987198619851984198319821981198019791978197719761975197419731972197119701969196819671966196519641963196219611960195919581957195619551954195319521951195010001024139400009999comomásesteestaperotodohacecadaañobiendíaasívidacasootroforosolootracualdijosidograntipotemadebealgoquéestonadatrespococasabajotodasinoaguapuesunosantediceluisellamayozonaamorpisoobraclicellodioshoracasiзанаомрарутанепоотизнодотожеонихНаеебымыВысовывоНообПолиниРФНеМытыОнимдаЗаДаНуОбтеИзейнуммТыужفيأنمامعكلأورديافىهولملكاولهبسالإنهيأيقدهلثمبهلوليبلايبكشيامأمنتبيلنحبهممشوشfirstvideolightworldmediawhitecloseblackrightsmallbooksplacemusicfieldorderpointvalueleveltableboardhousegroupworksyearsstatetodaywaterstartstyledeathpowerphonenighterrorinputabouttermstitletoolseventlocaltimeslargewordsgamesshortspacefocusclearmodelblockguideradiosharewomenagainmoneyimagenamesyounglineslatercolorgreenfront&amp;watchforcepricerulesbeginaftervisitissueareasbelowindextotalhourslabelprintpressbuiltlinksspeedstudytradefoundsenseundershownformsrangeaddedstillmovedtakenaboveflashfixedoftenotherviewschecklegalriveritemsquickshapehumanexistgoingmoviethirdbasicpeacestagewidthloginideaswrotepagesusersdrivestorebreaksouthvoicesitesmonthwherebuildwhichearthforumthreesportpartyClicklowerlivesclasslayerentrystoryusagesoundcourtyour birthpopuptypesapplyImagebeinguppernoteseveryshowsmeansextramatchtrackknownearlybegansuperpapernorthlearngivennamedendedTermspartsGroupbrandusingwomanfalsereadyaudiotakeswhile.com/livedcasesdailychildgreatjudgethoseunitsneverbroadcoastcoverapplefilescyclesceneplansclickwritequeenpieceemailframeolderphotolimitcachecivilscaleenterthemetheretouchboundroyalaskedwholesincestock namefaithheartemptyofferscopeownedmightalbumthinkbloodarraymajortrustcanonunioncountvalidstoneStyleLoginhappyoccurleft:freshquitefilmsgradeneedsurbanfightbasishoverauto;route.htmlmixedfinalYour slidetopicbrownalonedrawnsplitreachRightdatesmarchquotegoodsLinksdoubtasyncthumballowchiefyouthnovel10px;serveuntilhandsCheckSpacequeryjamesequaltwice0,000Startpanelsongsroundeightshiftworthpostsleadsweeksavoidthesemilesplanesmartalphaplantmarksratesplaysclaimsalestextsstarswrong</h3>thing.org/multiheardPowerstandtokensolid(thisbringshipsstafftriedcallsfullyfactsagentThis //-->adminegyptEvent15px;Emailtrue"crossspentblogsbox">notedleavechinasizesguest</h4>robotheavytrue,sevengrandcrimesignsawaredancephase><!--en_US&#39;200px_namelatinenjoyajax.ationsmithU.S. holdspeterindianav">chainscorecomesdoingpriorShare1990sromanlistsjapanfallstrialowneragree</h2>abusealertopera"-//WcardshillsteamsPhototruthclean.php?saintmetallouismeantproofbriefrow">genretrucklooksValueFrame.net/-->
+<try {
+var makescostsplainadultquesttrainlaborhelpscausemagicmotortheir250pxleaststepsCountcouldglasssidesfundshotelawardmouthmovesparisgivesdutchtexasfruitnull,||[];top">
+<!--POST"ocean<br/>floorspeakdepth sizebankscatchchart20px;aligndealswould50px;url="parksmouseMost ...</amongbrainbody none;basedcarrydraftreferpage_home.meterdelaydreamprovejoint</tr>drugs<!-- aprilidealallenexactforthcodeslogicView seemsblankports (200saved_linkgoalsgrantgreekhomesringsrated30px;whoseparse();" Blocklinuxjonespixel');">);if(-leftdavidhorseFocusraiseboxesTrackement</em>bar">.src=toweralt="cablehenry24px;setupitalysharpminortastewantsthis.resetwheelgirls/css/100%;clubsstuffbiblevotes 1000korea});
+bandsqueue= {};80px;cking{
+		aheadclockirishlike ratiostatsForm"yahoo)[0];Aboutfinds</h1>debugtasksURL =cells})();12px;primetellsturns0x600.jpg"spainbeachtaxesmicroangel--></giftssteve-linkbody.});
+	mount (199FAQ</rogerfrankClass28px;feeds<h1><scotttests22px;drink) || lewisshall#039; for lovedwaste00px;ja:�simon<fontreplymeetsuntercheaptightBrand) != dressclipsroomsonkeymobilmain.Name platefunnytreescom/"1.jpgwmodeparamSTARTleft idden, 201);
+}
+form.viruschairtransworstPagesitionpatch<!--
+o-cacfirmstours,000 asiani++){adobe')[0]id=10both;menu .2.mi.png"kevincoachChildbruce2.jpgURL)+.jpg|suitesliceharry120" sweettr>
+name=diegopage swiss-->
+
+#fff;">Log.com"treatsheet) && 14px;sleepntentfiledja:�id="cName"worseshots-box-delta
+&lt;bears:48Z<data-rural</a> spendbakershops= "";php">ction13px;brianhellosize=o=%2F joinmaybe<img img">, fjsimg" ")[0]MTopBType"newlyDanskczechtrailknows</h5>faq">zh-cn10);
+-1");type=bluestrulydavis.js';>
+<!steel you h2>
+form jesus100% menu.
+	
+walesrisksumentddingb-likteachgif" vegasdanskeestishqipsuomisobredesdeentretodospuedeañosestátienehastaotrospartedondenuevohacerformamismomejormundoaquídíassóloayudafechatodastantomenosdatosotrassitiomuchoahoralugarmayorestoshorastenerantesfotosestaspaísnuevasaludforosmedioquienmesespoderchileserávecesdecirjoséestarventagrupohechoellostengoamigocosasnivelgentemismaairesjuliotemashaciafavorjuniolibrepuntobuenoautorabrilbuenatextomarzosaberlistaluegocómoenerojuegoperúhaberestoynuncamujervalorfueralibrogustaigualvotoscasosguíapuedosomosavisousteddebennochebuscafaltaeurosseriedichocursoclavecasasleónplazolargoobrasvistaapoyojuntotratavistocrearcampohemoscincocargopisosordenhacenáreadiscopedrocercapuedapapelmenorútilclarojorgecalleponertardenadiemarcasigueellassiglocochemotosmadreclaserestoniñoquedapasarbancohijosviajepabloéstevienereinodejarfondocanalnorteletracausatomarmanoslunesautosvillavendopesartipostengamarcollevapadreunidovamoszonasambosbandamariaabusomuchasubirriojavivirgradochicaallíjovendichaestantalessalirsuelopesosfinesllamabuscoéstalleganegroplazahumorpagarjuntadobleislasbolsabañohablaluchaÁreadicenjugarnotasvalleallácargadolorabajoestégustomentemariofirmacostofichaplatahogarartesleyesaquelmuseobasespocosmitadcielochicomiedoganarsantoetapadebesplayaredessietecortecoreadudasdeseoviejodeseaaguas&quot;domaincommonstatuseventsmastersystemactionbannerremovescrollupdateglobalmediumfilternumberchangeresultpublicscreenchoosenormaltravelissuessourcetargetspringmodulemobileswitchphotosborderregionitselfsocialactivecolumnrecordfollowtitle>eitherlengthfamilyfriendlayoutauthorcreatereviewsummerserverplayedplayerexpandpolicyformatdoublepointsseriespersonlivingdesignmonthsforcesuniqueweightpeopleenergynaturesearchfigurehavingcustomoffsetletterwindowsubmitrendergroupsuploadhealthmethodvideosschoolfutureshadowdebatevaluesObjectothersrightsleaguechromesimplenoticesharedendingseasonreportonlinesquarebuttonimagesenablemovinglatestwinterFranceperiodstrongrepeatLondondetailformeddemandsecurepassedtoggleplacesdevicestaticcitiesstreamyellowattackstreetflighthiddeninfo">openedusefulvalleycausesleadersecretseconddamagesportsexceptratingsignedthingseffectfieldsstatesofficevisualeditorvolumeReportmuseummoviesparentaccessmostlymother" id="marketgroundchancesurveybeforesymbolmomentspeechmotioninsidematterCenterobjectexistsmiddleEuropegrowthlegacymannerenoughcareeransweroriginportalclientselectrandomclosedtopicscomingfatheroptionsimplyraisedescapechosenchurchdefinereasoncorneroutputmemoryiframepolicemodelsNumberduringoffersstyleskilledlistedcalledsilvermargindeletebetterbrowselimitsGlobalsinglewidgetcenterbudgetnowrapcreditclaimsenginesafetychoicespirit-stylespreadmakingneededrussiapleaseextentScriptbrokenallowschargedividefactormember-basedtheoryconfigaroundworkedhelpedChurchimpactshouldalwayslogo" bottomlist">){var prefixorangeHeader.push(couplegardenbridgelaunchReviewtakingvisionlittledatingButtonbeautythemesforgotSearchanchoralmostloadedChangereturnstringreloadMobileincomesupplySourceordersviewed&nbsp;courseAbout island<html cookiename="amazonmodernadvicein</a>: The dialoghousesBEGIN MexicostartscentreheightaddingIslandassetsEmpireSchooleffortdirectnearlymanualSelect.
+
+Onejoinedmenu">PhilipawardshandleimportOfficeregardskillsnationSportsdegreeweekly (e.g.behinddoctorloggedunited</b></beginsplantsassistartistissued300px|canadaagencyschemeremainBrazilsamplelogo">beyond-scaleacceptservedmarineFootercamera</h1>
+_form"leavesstress" />
+.gif" onloadloaderOxfordsistersurvivlistenfemaleDesignsize="appealtext">levelsthankshigherforcedanimalanyoneAfricaagreedrecentPeople<br />wonderpricesturned|| {};main">inlinesundaywrap">failedcensusminutebeaconquotes150px|estateremoteemail"linkedright;signalformal1.htmlsignupprincefloat:.png" forum.AccesspaperssoundsextendHeightsliderUTF-8"&amp; Before. WithstudioownersmanageprofitjQueryannualparamsboughtfamousgooglelongeri++) {israelsayingdecidehome">headerensurebranchpiecesblock;statedtop"><racingresize--&gt;pacitysexualbureau.jpg" 10,000obtaintitlesamount, Inc.comedymenu" lyricstoday.indeedcounty_logo.FamilylookedMarketlse ifPlayerturkey);var forestgivingerrorsDomain}else{insertBlog</footerlogin.fasteragents<body 10px 0pragmafridayjuniordollarplacedcoversplugin5,000 page">boston.test(avatartested_countforumsschemaindex,filledsharesreaderalert(appearSubmitline">body">
+* TheThoughseeingjerseyNews</verifyexpertinjurywidth=CookieSTART across_imagethreadnativepocketbox">
+System DavidcancertablesprovedApril reallydriveritem">more">boardscolorscampusfirst || [];media.guitarfinishwidth:showedOther .php" assumelayerswilsonstoresreliefswedenCustomeasily your String
+
+Whiltaylorclear:resortfrenchthough") + "<body>buyingbrandsMembername">oppingsector5px;">vspacepostermajor coffeemartinmaturehappen</nav>kansaslink">Images=falsewhile hspace0&amp; 
+
+In  powerPolski-colorjordanBottomStart -count2.htmlnews">01.jpgOnline-rightmillerseniorISBN 00,000 guidesvalue)ectionrepair.xml"  rights.html-blockregExp:hoverwithinvirginphones</tr>using 
+	var >');
+	</td>
+</tr>
+bahasabrasilgalegomagyarpolskisrpskiردو中文简体繁體信息中国我们一个公司管理论坛可以服务时间个人产品自己企业查看工作联系没有网站所有评论中心文章用户首页作者技术问题相关下载搜索使用软件在线主题资料视频回复注册网络收藏内容推荐市场消息空间发布什么好友生活图片发展如果手机新闻最新方式北京提供关于更多这个系统知道游戏广告其他发表安全第一会员进行点击版权电子世界设计免费教育加入活动他们商品博客现在上海如何已经留言详细社区登录本站需要价格支持国际链接国家建设朋友阅读法律位置经济选择这样当前分类排行因为交易最后音乐不能通过行业科技可能设备合作大家社会研究专业全部项目这里还是开始情况电脑文件品牌帮助文化资源大学学习地址浏览投资工程要求怎么时候功能主要目前资讯城市方法电影招聘声明任何健康数据美国汽车介绍但是交流生产所以电话显示一些单位人员分析地图旅游工具学生系列网友帖子密码频道控制地区基本全国网上重要第二喜欢进入友情这些考试发现培训以上政府成为环境香港同时娱乐发送一定开发作品标准欢迎解决地方一下以及责任或者客户代表积分女人数码销售出现离线应用列表不同编辑统计查询不要有关机构很多播放组织政策直接能力来源時間看到热门关键专区非常英语百度希望美女比较知识规定建议部门意见精彩日本提高发言方面基金处理权限影片银行还有分享物品经营添加专家这种话题起来业务公告记录简介质量男人影响引用报告部分快速咨询时尚注意申请学校应该历史只是返回购买名称为了成功说明供应孩子专题程序一般會員只有其它保护而且今天窗口动态状态特别认为必须更新小说我們作为媒体包括那么一样国内是否根据电视学院具有过程由于人才出来不过正在明星故事关系标题商务输入一直基础教学了解建筑结果全球通知计划对于艺术相册发生真的建立等级类型经验实现制作来自标签以下原创无法其中個人一切指南关闭集团第三关注因此照片深圳商业广州日期高级最近综合表示专辑行为交通评价觉得精华家庭完成感觉安装得到邮件制度食品虽然转载报价记者方案行政人民用品东西提出酒店然后付款热点以前完全发帖设置领导工业医院看看经典原因平台各种增加材料新增之后职业效果今年论文我国告诉版主修改参与打印快乐机械观点存在精神获得利用继续你们这么模式语言能够雅虎操作风格一起科学体育短信条件治疗运动产业会议导航先生联盟可是問題结构作用调查資料自动负责农业访问实施接受讨论那个反馈加强女性范围服務休闲今日客服觀看参加的话一点保证图书有效测试移动才能决定股票不断需求不得办法之间采用营销投诉目标爱情摄影有些複製文学机会数字装修购物农村全面精品其实事情水平提示上市谢谢普通教师上传类别歌曲拥有创新配件只要时代資訊达到人生订阅老师展示心理贴子網站主題自然级别简单改革那些来说打开代码删除证券节目重点次數多少规划资金找到以后大全主页最佳回答天下保障现代检查投票小时沒有正常甚至代理目录公开复制金融幸福版本形成准备行情回到思想怎样协议认证最好产生按照服装广东动漫采购新手组图面板参考政治容易天地努力人们升级速度人物调整流行造成文字韩国贸易开展相關表现影视如此美容大小报道条款心情许多法规家居书店连接立即举报技巧奥运登入以来理论事件自由中华办公妈妈真正不错全文合同价值别人监督具体世纪团队创业承担增长有人保持商家维修台湾左右股份答案实际电信经理生命宣传任务正式特色下来协会只能当然重新內容指导运行日志賣家超过土地浙江支付推出站长杭州执行制造之一推广现场描述变化传统歌手保险课程医疗经过过去之前收入年度杂志美丽最高登陆未来加工免责教程版块身体重庆出售成本形式土豆出價东方邮箱南京求职取得职位相信页面分钟网页确定图例网址积极错误目的宝贝机关风险授权病毒宠物除了評論疾病及时求购站点儿童每天中央认识每个天津字体台灣维护本页个性官方常见相机战略应当律师方便校园股市房屋栏目员工导致突然道具本网结合档案劳动另外美元引起改变第四会计說明隐私宝宝规范消费共同忘记体系带来名字發表开放加盟受到二手大量成人数量共享区域女孩原则所在结束通信超级配置当时优秀性感房产遊戲出口提交就业保健程度参数事业整个山东情感特殊分類搜尋属于门户财务声音及其财经坚持干部成立利益考虑成都包装用戶比赛文明招商完整真是眼睛伙伴威望领域卫生优惠論壇公共良好充分符合附件特点不可英文资产根本明显密碼公众民族更加享受同学启动适合原来问答本文美食绿色稳定终于生物供求搜狐力量严重永远写真有限竞争对象费用不好绝对十分促进点评影音优势不少欣赏并且有点方向全新信用设施形象资格突破随着重大于是毕业智能化工完美商城统一出版打造產品概况用于保留因素中國存储贴图最愛长期口价理财基地安排武汉里面创建天空首先完善驱动下面不再诚信意义阳光英国漂亮军事玩家群众农民即可名稱家具动画想到注明小学性能考研硬件观看清楚搞笑首頁黄金适用江苏真实主管阶段註冊翻译权利做好似乎通讯施工狀態也许环保培养概念大型机票理解匿名cuandoenviarmadridbuscariniciotiempoporquecuentaestadopuedenjuegoscontraestánnombretienenperfilmaneraamigosciudadcentroaunquepuedesdentroprimerpreciosegúnbuenosvolverpuntossemanahabíaagostonuevosunidoscarlosequiponiñosmuchosalgunacorreoimagenpartirarribamaríahombreempleoverdadcambiomuchasfueronpasadolíneaparecenuevascursosestabaquierolibroscuantoaccesomiguelvarioscuatrotienesgruposseráneuropamediosfrenteacercademásofertacochesmodeloitalialetrasalgúncompracualesexistecuerposiendoprensallegarviajesdineromurciapodrápuestodiariopuebloquieremanuelpropiocrisisciertoseguromuertefuentecerrargrandeefectopartesmedidapropiaofrecetierrae-mailvariasformasfuturoobjetoseguirriesgonormasmismosúnicocaminositiosrazóndebidopruebatoledoteníajesúsesperococinaorigentiendacientocádizhablarseríalatinafuerzaestiloguerraentraréxitolópezagendavídeoevitarpaginametrosjavierpadresfácilcabezaáreassalidaenvíojapónabusosbienestextosllevarpuedanfuertecomúnclaseshumanotenidobilbaounidadestáseditarcreadoдлячтокакилиэтовсеегопритакещеужеКакбезбылониВсеподЭтотомчемнетлетразонагдемнеДляПринаснихтемктогодвоттамСШАмаяЧтовасвамемуТакдванамэтиэтуВамтехпротутнаддняВоттринейВаснимсамтотрубОнимирнееОООлицэтаОнанемдоммойдвеоносудकेहैकीसेकाकोऔरपरनेएककिभीइसकरतोहोआपहीयहयातकथाjagranआजजोअबदोगईजागएहमइनवहयेथेथीघरजबदीकईजीवेनईनएहरउसमेकमवोलेसबमईदेओरआमबसभरबनचलमनआगसीलीعلىإلىهذاآخرعددالىهذهصورغيركانولابينعرضذلكهنايومقالعليانالكنحتىقبلوحةاخرفقطعبدركنإذاكمااحدإلافيهبعضكيفبحثومنوهوأناجدالهاسلمعندليسعبرصلىمنذبهاأنهمثلكنتالاحيثمصرشرححولوفياذالكلمرةانتالفأبوخاصأنتانهاليعضووقدابنخيربنتلكمشاءوهيابوقصصومارقمأحدنحنعدمرأياحةكتبدونيجبمنهتحتجهةسنةيتمكرةغزةنفسبيتللهلناتلكقلبلماعنهأولشيءنورأمافيكبكلذاترتببأنهمسانكبيعفقدحسنلهمشعرأهلشهرقطرطلبprofileservicedefaulthimselfdetailscontentsupportstartedmessagesuccessfashion<title>countryaccountcreatedstoriesresultsrunningprocesswritingobjectsvisiblewelcomearticleunknownnetworkcompanydynamicbrowserprivacyproblemServicerespectdisplayrequestreservewebsitehistoryfriendsoptionsworkingversionmillionchannelwindow.addressvisitedweathercorrectproductedirectforwardyou canremovedsubjectcontrolarchivecurrentreadinglibrarylimitedmanagerfurthersummarymachineminutesprivatecontextprogramsocietynumberswrittenenabledtriggersourcesloadingelementpartnerfinallyperfectmeaningsystemskeepingculture&quot;,journalprojectsurfaces&quot;expiresreviewsbalanceEnglishContentthroughPlease opinioncontactaverageprimaryvillageSpanishgallerydeclinemeetingmissionpopularqualitymeasuregeneralspeciessessionsectionwriterscounterinitialreportsfiguresmembersholdingdisputeearlierexpressdigitalpictureAnothermarriedtrafficleadingchangedcentralvictoryimages/reasonsstudiesfeaturelistingmust beschoolsVersionusuallyepisodeplayinggrowingobviousoverlaypresentactions</ul>
+wrapperalreadycertainrealitystorageanotherdesktopofferedpatternunusualDigitalcapitalWebsitefailureconnectreducedAndroiddecadesregular &amp; animalsreleaseAutomatgettingmethodsnothingPopularcaptionletterscapturesciencelicensechangesEngland=1&amp;History = new CentralupdatedSpecialNetworkrequirecommentwarningCollegetoolbarremainsbecauseelectedDeutschfinanceworkersquicklybetweenexactlysettingdiseaseSocietyweaponsexhibit&lt;!--Controlclassescoveredoutlineattacksdevices(windowpurposetitle="Mobile killingshowingItaliandroppedheavilyeffects-1']);
+confirmCurrentadvancesharingopeningdrawingbillionorderedGermanyrelated</form>includewhetherdefinedSciencecatalogArticlebuttonslargestuniformjourneysidebarChicagoholidayGeneralpassage,&quot;animatefeelingarrivedpassingnaturalroughly.
+
+The but notdensityBritainChineselack oftributeIreland" data-factorsreceivethat isLibraryhusbandin factaffairsCharlesradicalbroughtfindinglanding:lang="return leadersplannedpremiumpackageAmericaEdition]&quot;Messageneed tovalue="complexlookingstationbelievesmaller-mobilerecordswant tokind ofFirefoxyou aresimilarstudiedmaximumheadingrapidlyclimatekingdomemergedamountsfoundedpioneerformuladynastyhow to SupportrevenueeconomyResultsbrothersoldierlargelycalling.&quot;AccountEdward segmentRobert effortsPacificlearnedup withheight:we haveAngelesnations_searchappliedacquiremassivegranted: falsetreatedbiggestbenefitdrivingStudiesminimumperhapsmorningsellingis usedreversevariant role="missingachievepromotestudentsomeoneextremerestorebottom:evolvedall thesitemapenglishway to  AugustsymbolsCompanymattersmusicalagainstserving})();
+paymenttroubleconceptcompareparentsplayersregionsmonitor ''The winningexploreadaptedGalleryproduceabilityenhancecareers). The collectSearch ancientexistedfooter handlerprintedconsoleEasternexportswindowsChannelillegalneutralsuggest_headersigning.html">settledwesterncausing-webkitclaimedJusticechaptervictimsThomas mozillapromisepartieseditionoutside:false,hundredOlympic_buttonauthorsreachedchronicdemandssecondsprotectadoptedprepareneithergreatlygreateroverallimprovecommandspecialsearch.worshipfundingthoughthighestinsteadutilityquarterCulturetestingclearlyexposedBrowserliberal} catchProjectexamplehide();FloridaanswersallowedEmperordefenseseriousfreedomSeveral-buttonFurtherout of != nulltrainedDenmarkvoid(0)/all.jspreventRequestStephen
+
+When observe</h2>
+Modern provide" alt="borders.
+
+For 
+
+Many artistspoweredperformfictiontype ofmedicalticketsopposedCouncilwitnessjusticeGeorge Belgium...</a>twitternotablywaitingwarfare Other rankingphrasesmentionsurvivescholar</p>
+ Countryignoredloss ofjust asGeorgiastrange<head><stopped1']);
+islandsnotableborder:list ofcarried100,000</h3>
+ severalbecomesselect wedding00.htmlmonarchoff theteacherhighly biologylife ofor evenrise of&raquo;plusonehunting(thoughDouglasjoiningcirclesFor theAncientVietnamvehiclesuch ascrystalvalue =Windowsenjoyeda smallassumed<a id="foreign All rihow theDisplayretiredhoweverhidden;battlesseekingcabinetwas notlook atconductget theJanuaryhappensturninga:hoverOnline French lackingtypicalextractenemieseven ifgeneratdecidedare not/searchbeliefs-image:locatedstatic.login">convertviolententeredfirst">circuitFinlandchemistshe was10px;">as suchdivided</span>will beline ofa greatmystery/index.fallingdue to railwaycollegemonsterdescentit withnuclearJewish protestBritishflowerspredictreformsbutton who waslectureinstantsuicidegenericperiodsmarketsSocial fishingcombinegraphicwinners<br /><by the NaturalPrivacycookiesoutcomeresolveSwedishbrieflyPersianso muchCenturydepictscolumnshousingscriptsnext tobearingmappingrevisedjQuery(-width:title">tooltipSectiondesignsTurkishyounger.match(})();
+
+burningoperatedegreessource=Richardcloselyplasticentries</tr>
+color:#ul id="possessrollingphysicsfailingexecutecontestlink toDefault<br />
+: true,chartertourismclassicproceedexplain</h1>
+online.?xml vehelpingdiamonduse theairlineend -->).attr(readershosting#ffffffrealizeVincentsignals src="/ProductdespitediversetellingPublic held inJoseph theatreaffects<style>a largedoesn'tlater, ElementfaviconcreatorHungaryAirportsee theso thatMichaelSystemsPrograms, and  width=e&quot;tradingleft">
+personsGolden Affairsgrammarformingdestroyidea ofcase ofoldest this is.src = cartoonregistrCommonsMuslimsWhat isin manymarkingrevealsIndeed,equally/show_aoutdoorescape(Austriageneticsystem,In the sittingHe alsoIslandsAcademy
+		<!--Daniel bindingblock">imposedutilizeAbraham(except{width:putting).html(|| [];
+DATA[ *kitchenmountedactual dialectmainly _blank'installexpertsif(typeIt also&copy; ">Termsborn inOptionseasterntalkingconcerngained ongoingjustifycriticsfactoryits ownassaultinvitedlastinghis ownhref="/" rel="developconcertdiagramdollarsclusterphp?id=alcohol);})();using a><span>vesselsrevivalAddressamateurandroidallegedillnesswalkingcentersqualifymatchesunifiedextinctDefensedied in
+	<!-- customslinkingLittle Book ofeveningmin.js?are thekontakttoday's.html" target=wearingAll Rig;
+})();raising Also, crucialabout">declare-->
+<scfirefoxas muchappliesindex, s, but type = 
+
+<!--towardsRecordsPrivateForeignPremierchoicesVirtualreturnsCommentPoweredinline;povertychamberLiving volumesAnthonylogin" RelatedEconomyreachescuttinggravitylife inChapter-shadowNotable</td>
+ returnstadiumwidgetsvaryingtravelsheld bywho arework infacultyangularwho hadairporttown of
+
+Some 'click'chargeskeywordit willcity of(this);Andrew unique checkedor more300px; return;rsion="pluginswithin herselfStationFederalventurepublishsent totensionactresscome tofingersDuke ofpeople,exploitwhat isharmonya major":"httpin his menu">
+monthlyofficercouncilgainingeven inSummarydate ofloyaltyfitnessand wasemperorsupremeSecond hearingRussianlongestAlbertalateralset of small">.appenddo withfederalbank ofbeneathDespiteCapitalgrounds), and percentit fromclosingcontainInsteadfifteenas well.yahoo.respondfighterobscurereflectorganic= Math.editingonline paddinga wholeonerroryear ofend of barrierwhen itheader home ofresumedrenamedstrong>heatingretainscloudfrway of March 1knowingin partBetweenlessonsclosestvirtuallinks">crossedEND -->famous awardedLicenseHealth fairly wealthyminimalAfricancompetelabel">singingfarmersBrasil)discussreplaceGregoryfont copursuedappearsmake uproundedboth ofblockedsaw theofficescoloursif(docuwhen heenforcepush(fuAugust UTF-8">Fantasyin mostinjuredUsuallyfarmingclosureobject defenceuse of Medical<body>
+evidentbe usedkeyCodesixteenIslamic#000000entire widely active (typeofone cancolor =speakerextendsPhysicsterrain<tbody>funeralviewingmiddle cricketprophetshifteddoctorsRussell targetcompactalgebrasocial-bulk ofman and</td>
+ he left).val()false);logicalbankinghome tonaming Arizonacredits);
+});
+founderin turnCollinsbefore But thechargedTitle">CaptainspelledgoddessTag -->Adding:but wasRecent patientback in=false&Lincolnwe knowCounterJudaismscript altered']);
+  has theunclearEvent',both innot all
+
+<!-- placinghard to centersort ofclientsstreetsBernardassertstend tofantasydown inharbourFreedomjewelry/about..searchlegendsis mademodern only ononly toimage" linear painterand notrarely acronymdelivershorter00&amp;as manywidth="/* <![Ctitle =of the lowest picked escapeduses ofpeoples PublicMatthewtacticsdamagedway forlaws ofeasy to windowstrong  simple}catch(seventhinfoboxwent topaintedcitizenI don'tretreat. Some ww.");
+bombingmailto:made in. Many carries||{};wiwork ofsynonymdefeatsfavoredopticalpageTraunless sendingleft"><comScorAll thejQuery.touristClassicfalse" Wilhelmsuburbsgenuinebishops.split(global followsbody ofnominalContactsecularleft tochiefly-hidden-banner</li>
+
+. When in bothdismissExplorealways via thespañolwelfareruling arrangecaptainhis sonrule ofhe tookitself,=0&amp;(calledsamplesto makecom/pagMartin Kennedyacceptsfull ofhandledBesides//--></able totargetsessencehim to its by common.mineralto takeways tos.org/ladvisedpenaltysimple:if theyLettersa shortHerbertstrikes groups.lengthflightsoverlapslowly lesser social </p>
+		it intoranked rate oful>
+  attemptpair ofmake itKontaktAntoniohaving ratings activestreamstrapped").css(hostilelead tolittle groups,Picture-->
+
+ rows=" objectinverse<footerCustomV><\/scrsolvingChamberslaverywoundedwhereas!= 'undfor allpartly -right:Arabianbacked centuryunit ofmobile-Europe,is homerisk ofdesiredClintoncost ofage of become none ofp&quot;Middle ead')[0Criticsstudios>&copy;group">assemblmaking pressedwidget.ps:" ? rebuiltby someFormer editorsdelayedCanonichad thepushingclass="but arepartialBabylonbottom carrierCommandits useAs withcoursesa thirddenotesalso inHouston20px;">accuseddouble goal ofFamous ).bind(priests Onlinein Julyst + "gconsultdecimalhelpfulrevivedis veryr'+'iptlosing femalesis alsostringsdays ofarrivalfuture <objectforcingString(" />
+		here isencoded.  The balloondone by/commonbgcolorlaw of Indianaavoidedbut the2px 3pxjquery.after apolicy.men andfooter-= true;for usescreen.Indian image =family,http:// &nbsp;driverseternalsame asnoticedviewers})();
+ is moreseasonsformer the newis justconsent Searchwas thewhy theshippedbr><br>width: height=made ofcuisineis thata very Admiral fixed;normal MissionPress, ontariocharsettry to invaded="true"spacingis mosta more totallyfall of});
+  immensetime inset outsatisfyto finddown tolot of Playersin Junequantumnot thetime todistantFinnishsrc = (single help ofGerman law andlabeledforestscookingspace">header-well asStanleybridges/globalCroatia About [0];
+  it, andgroupedbeing a){throwhe madelighterethicalFFFFFF"bottom"like a employslive inas seenprintermost ofub-linkrejectsand useimage">succeedfeedingNuclearinformato helpWomen'sNeitherMexicanprotein<table by manyhealthylawsuitdevised.push({sellerssimply Through.cookie Image(older">us.js"> Since universlarger open to!-- endlies in']);
+  marketwho is ("DOMComanagedone fortypeof Kingdomprofitsproposeto showcenter;made itdressedwere inmixtureprecisearisingsrc = 'make a securedBaptistvoting 
+		var March 2grew upClimate.removeskilledway the</head>face ofacting right">to workreduceshas haderectedshow();action=book ofan area== "htt<header
+<html>conformfacing cookie.rely onhosted .customhe wentbut forspread Family a meansout theforums.footage">MobilClements" id="as highintense--><!--female is seenimpliedset thea stateand hisfastestbesidesbutton_bounded"><img Infoboxevents,a youngand areNative cheaperTimeoutand hasengineswon the(mostlyright: find a -bottomPrince area ofmore ofsearch_nature,legallyperiod,land ofor withinducedprovingmissilelocallyAgainstthe wayk&quot;px;">
+pushed abandonnumeralCertainIn thismore inor somename isand, incrownedISBN 0-createsOctobermay notcenter late inDefenceenactedwish tobroadlycoolingonload=it. TherecoverMembersheight assumes<html>
+people.in one =windowfooter_a good reklamaothers,to this_cookiepanel">London,definescrushedbaptismcoastalstatus title" move tolost inbetter impliesrivalryservers SystemPerhapses and contendflowinglasted rise inGenesisview ofrising seem tobut in backinghe willgiven agiving cities.flow of Later all butHighwayonly bysign ofhe doesdiffersbattery&amp;lasinglesthreatsintegertake onrefusedcalled =US&ampSee thenativesby thissystem.head of:hover,lesbiansurnameand allcommon/header__paramsHarvard/pixel.removalso longrole ofjointlyskyscraUnicodebr />
+AtlantanucleusCounty,purely count">easily build aonclicka givenpointerh&quot;events else {
+ditionsnow the, with man whoorg/Webone andcavalryHe diedseattle00,000 {windowhave toif(windand itssolely m&quot;renewedDetroitamongsteither them inSenatorUs</a><King ofFrancis-produche usedart andhim andused byscoringat hometo haverelatesibilityfactionBuffalolink"><what hefree toCity ofcome insectorscountedone daynervoussquare };if(goin whatimg" alis onlysearch/tuesdaylooselySolomonsexual - <a hrmedium"DO NOT France,with a war andsecond take a >
+
+
+market.highwaydone inctivity"last">obligedrise to"undefimade to Early praisedin its for hisathleteJupiterYahoo! termed so manyreally s. The a woman?value=direct right" bicycleacing="day andstatingRather,higher Office are nowtimes, when a pay foron this-link">;borderaround annual the Newput the.com" takin toa brief(in thegroups.; widthenzymessimple in late{returntherapya pointbanninginks">
+();" rea place\u003Caabout atr>
+		ccount gives a<SCRIPTRailwaythemes/toolboxById("xhumans,watchesin some if (wicoming formats Under but hashanded made bythan infear ofdenoted/iframeleft involtagein eacha&quot;base ofIn manyundergoregimesaction </p>
+<ustomVa;&gt;</importsor thatmostly &amp;re size="</a></ha classpassiveHost = WhetherfertileVarious=[];(fucameras/></td>acts asIn some>
+
+<!organis <br />Beijingcatalàdeutscheuropeueuskaragaeilgesvenskaespañamensajeusuariotrabajoméxicopáginasiempresistemaoctubreduranteañadirempresamomentonuestroprimeratravésgraciasnuestraprocesoestadoscalidadpersonanúmeroacuerdomúsicamiembroofertasalgunospaísesejemploderechoademásprivadoagregarenlacesposiblehotelessevillaprimeroúltimoeventosarchivoculturamujeresentradaanuncioembargomercadograndesestudiomejoresfebrerodiseñoturismocódigoportadaespaciofamiliaantoniopermiteguardaralgunaspreciosalguiensentidovisitastítuloconocersegundoconsejofranciaminutossegundatenemosefectosmálagasesiónrevistagranadacompraringresogarcíaacciónecuadorquienesinclusodeberámateriahombresmuestrapodríamañanaúltimaestamosoficialtambienningúnsaludospodemosmejorarpositionbusinesshomepagesecuritylanguagestandardcampaignfeaturescategoryexternalchildrenreservedresearchexchangefavoritetemplatemilitaryindustryservicesmaterialproductsz-index:commentssoftwarecompletecalendarplatformarticlesrequiredmovementquestionbuildingpoliticspossiblereligionphysicalfeedbackregisterpicturesdisabledprotocolaudiencesettingsactivityelementslearninganythingabstractprogressoverviewmagazineeconomictrainingpressurevarious <strong>propertyshoppingtogetheradvancedbehaviordownloadfeaturedfootballselectedLanguagedistanceremembertrackingpasswordmodifiedstudentsdirectlyfightingnortherndatabasefestivalbreakinglocationinternetdropdownpracticeevidencefunctionmarriageresponseproblemsnegativeprogramsanalysisreleasedbanner">purchasepoliciesregionalcreativeargumentbookmarkreferrerchemicaldivisioncallbackseparateprojectsconflicthardwareinterestdeliverymountainobtained= false;for(var acceptedcapacitycomputeridentityaircraftemployedproposeddomesticincludesprovidedhospitalverticalcollapseapproachpartnerslogo"><adaughterauthor" culturalfamilies/images/assemblypowerfulteachingfinisheddistrictcriticalcgi-bin/purposesrequireselectionbecomingprovidesacademicexerciseactuallymedicineconstantaccidentMagazinedocumentstartingbottom">observed: &quot;extendedpreviousSoftwarecustomerdecisionstrengthdetailedslightlyplanningtextareacurrencyeveryonestraighttransferpositiveproducedheritageshippingabsolutereceivedrelevantbutton" violenceanywherebenefitslaunchedrecentlyalliancefollowedmultiplebulletinincludedoccurredinternal$(this).republic><tr><tdcongressrecordedultimatesolution<ul id="discoverHome</a>websitesnetworksalthoughentirelymemorialmessagescontinueactive">somewhatvictoriaWestern  title="LocationcontractvisitorsDownloadwithout right">
+measureswidth = variableinvolvedvirginianormallyhappenedaccountsstandingnationalRegisterpreparedcontrolsaccuratebirthdaystrategyofficialgraphicscriminalpossiblyconsumerPersonalspeakingvalidateachieved.jpg" />machines</h2>
+  keywordsfriendlybrotherscombinedoriginalcomposedexpectedadequatepakistanfollow" valuable</label>relativebringingincreasegovernorplugins/List of Header">" name=" (&quot;graduate</head>
+commercemalaysiadirectormaintain;height:schedulechangingback to catholicpatternscolor: #greatestsuppliesreliable</ul>
+		<select citizensclothingwatching<li id="specificcarryingsentence<center>contrastthinkingcatch(e)southernMichael merchantcarouselpadding:interior.split("lizationOctober ){returnimproved--&gt;
+
+coveragechairman.png" />subjectsRichard whateverprobablyrecoverybaseballjudgmentconnect..css" /> websitereporteddefault"/></a>
+electricscotlandcreationquantity. ISBN 0did not instance-search-" lang="speakersComputercontainsarchivesministerreactiondiscountItalianocriteriastrongly: 'http:'script'coveringofferingappearedBritish identifyFacebooknumerousvehiclesconcernsAmericanhandlingdiv id="William provider_contentaccuracysection andersonflexibleCategorylawrence<script>layout="approved maximumheader"></table>Serviceshamiltoncurrent canadianchannels/themes//articleoptionalportugalvalue=""intervalwirelessentitledagenciesSearch" measuredthousandspending&hellip;new Date" size="pageNamemiddle" " /></a>hidden">sequencepersonaloverflowopinionsillinoislinks">
+	<title>versionssaturdayterminalitempropengineersectionsdesignerproposal="false"Españolreleasessubmit" er&quot;additionsymptomsorientedresourceright"><pleasurestationshistory.leaving  border=contentscenter">.
+
+Some directedsuitablebulgaria.show();designedGeneral conceptsExampleswilliamsOriginal"><span>search">operatorrequestsa &quot;allowingDocumentrevision. 
+
+The yourselfContact michiganEnglish columbiapriorityprintingdrinkingfacilityreturnedContent officersRussian generate-8859-1"indicatefamiliar qualitymargin:0 contentviewportcontacts-title">portable.length eligibleinvolvesatlanticonload="default.suppliedpaymentsglossary
+
+After guidance</td><tdencodingmiddle">came to displaysscottishjonathanmajoritywidgets.clinicalthailandteachers<head>
+	affectedsupportspointer;toString</small>oklahomawill be investor0" alt="holidaysResourcelicensed (which . After considervisitingexplorerprimary search" android"quickly meetingsestimate;return ;color:# height=approval, &quot; checked.min.js"magnetic></a></hforecast. While thursdaydvertise&eacute;hasClassevaluateorderingexistingpatients Online coloradoOptions"campbell<!-- end</span><<br />
+_popups|sciences,&quot; quality Windows assignedheight: <b classle&quot; value=" Companyexamples<iframe believespresentsmarshallpart of properly).
+
+The taxonomymuch of </span>
+" data-srtuguêsscrollTo project<head>
+attorneyemphasissponsorsfancyboxworld's wildlifechecked=sessionsprogrammpx;font- Projectjournalsbelievedvacationthompsonlightingand the special border=0checking</tbody><button Completeclearfix
+<head>
+article <sectionfindingsrole in popular  Octoberwebsite exposureused to  changesoperatedclickingenteringcommandsinformed numbers  </div>creatingonSubmitmarylandcollegesanalyticlistingscontact.loggedInadvisorysiblingscontent"s&quot;)s. This packagescheckboxsuggestspregnanttomorrowspacing=icon.pngjapanesecodebasebutton">gamblingsuch as , while </span> missourisportingtop:1px .</span>tensionswidth="2lazyloadnovemberused in height="cript">
+&nbsp;</<tr><td height:2/productcountry include footer" &lt;!-- title"></jquery.</form>
+(简体)(繁體)hrvatskiitalianoromânătürkçeاردوtambiénnoticiasmensajespersonasderechosnacionalserviciocontactousuariosprogramagobiernoempresasanunciosvalenciacolombiadespuésdeportesproyectoproductopúbliconosotroshistoriapresentemillonesmediantepreguntaanteriorrecursosproblemasantiagonuestrosopiniónimprimirmientrasaméricavendedorsociedadrespectorealizarregistropalabrasinterésentoncesespecialmiembrosrealidadcórdobazaragozapáginassocialesbloqueargestiónalquilersistemascienciascompletoversióncompletaestudiospúblicaobjetivoalicantebuscadorcantidadentradasaccionesarchivossuperiormayoríaalemaniafunciónúltimoshaciendoaquellosediciónfernandoambientefacebooknuestrasclientesprocesosbastantepresentareportarcongresopublicarcomerciocontratojóvenesdistritotécnicaconjuntoenergíatrabajarasturiasrecienteutilizarboletínsalvadorcorrectatrabajosprimerosnegocioslibertaddetallespantallapróximoalmeríaanimalesquiénescorazónsecciónbuscandoopcionesexteriorconceptotodavíagaleríaescribirmedicinalicenciaconsultaaspectoscríticadólaresjusticiadeberánperíodonecesitamantenerpequeñorecibidatribunaltenerifecancióncanariasdescargadiversosmallorcarequieretécnicodeberíaviviendafinanzasadelantefuncionaconsejosdifícilciudadesantiguasavanzadatérminounidadessánchezcampañasoftonicrevistascontienesectoresmomentosfacultadcréditodiversassupuestofactoressegundospequeñaгодаеслиестьбылобытьэтомЕслитогоменявсехэтойдажебылигодуденьэтотбыласебяодинсебенадосайтфотонегосвоисвойигрытожевсемсвоюлишьэтихпокаднейдомамиралиботемухотядвухсетилюдиделомиретебясвоевидечегоэтимсчеттемыценысталведьтемеводытебевышенамитипатомуправлицаоднагодызнаюмогудругвсейидеткиноодноделаделесрокиюнявесьЕстьразанашиاللهالتيجميعخاصةالذيعليهجديدالآنالردتحكمصفحةكانتاللييكونشبكةفيهابناتحواءأكثرخلالالحبدليلدروساضغطتكونهناكساحةناديالطبعليكشكرايمكنمنهاشركةرئيسنشيطماذاالفنشبابتعبررحمةكافةيقولمركزكلمةأحمدقلبييعنيصورةطريقشاركجوالأخرىمعناابحثعروضبشكلمسجلبنانخالدكتابكليةبدونأيضايوجدفريقكتبتأفضلمطبخاكثرباركافضلاحلىنفسهأيامردودأنهاديناالانمعرضتعلمداخلممكن                      	
+
+	����        ����                  ��      ��                resourcescountriesquestionsequipmentcommunityavailablehighlightDTD/xhtmlmarketingknowledgesomethingcontainerdirectionsubscribeadvertisecharacter" value="</select>Australia" class="situationauthorityfollowingprimarilyoperationchallengedevelopedanonymousfunction functionscompaniesstructureagreement" title="potentialeducationargumentssecondarycopyrightlanguagesexclusivecondition</form>
+statementattentionBiography} else {
+solutionswhen the Analyticstemplatesdangeroussatellitedocumentspublisherimportantprototypeinfluence&raquo;</effectivegenerallytransformbeautifultransportorganizedpublishedprominentuntil thethumbnailNational .focus();over the migrationannouncedfooter">
+exceptionless thanexpensiveformationframeworkterritoryndicationcurrentlyclassNamecriticismtraditionelsewhereAlexanderappointedmaterialsbroadcastmentionedaffiliate</option>treatmentdifferent/default.Presidentonclick="biographyotherwisepermanentFrançaisHollywoodexpansionstandards</style>
+reductionDecember preferredCambridgeopponentsBusiness confusion>
+<title>presentedexplaineddoes not worldwideinterfacepositionsnewspaper</table>
+mountainslike the essentialfinancialselectionaction="/abandonedEducationparseInt(stabilityunable to</title>
+relationsNote thatefficientperformedtwo yearsSince thethereforewrapper">alternateincreasedBattle ofperceivedtrying tonecessaryportrayedelectionsElizabeth</iframe>discoveryinsurances.length;legendaryGeographycandidatecorporatesometimesservices.inherited</strong>CommunityreligiouslocationsCommitteebuildingsthe worldno longerbeginningreferencecannot befrequencytypicallyinto the relative;recordingpresidentinitiallytechniquethe otherit can beexistenceunderlinethis timetelephoneitemscopepracticesadvantage);return For otherprovidingdemocracyboth the extensivesufferingsupportedcomputers functionpracticalsaid thatit may beEnglish</from the scheduleddownloads</label>
+suspectedmargin: 0spiritual</head>
+
+microsoftgraduallydiscussedhe becameexecutivejquery.jshouseholdconfirmedpurchasedliterallydestroyedup to thevariationremainingit is notcenturiesJapanese among thecompletedalgorithminterestsrebellionundefinedencourageresizableinvolvingsensitiveuniversalprovision(althoughfeaturingconducted), which continued-header">February numerous overflow:componentfragmentsexcellentcolspan="technicalnear the Advanced source ofexpressedHong Kong Facebookmultiple mechanismelevationoffensive</form>
+	sponsoreddocument.or &quot;there arethose whomovementsprocessesdifficultsubmittedrecommendconvincedpromoting" width=".replace(classicalcoalitionhis firstdecisionsassistantindicatedevolution-wrapper"enough toalong thedelivered-->
+<!--American protectedNovember </style><furnitureInternet  onblur="suspendedrecipientbased on Moreover,abolishedcollectedwere madeemotionalemergencynarrativeadvocatespx;bordercommitteddir="ltr"employeesresearch. selectedsuccessorcustomersdisplayedSeptemberaddClass(Facebook suggestedand lateroperatingelaborateSometimesInstitutecertainlyinstalledfollowersJerusalemthey havecomputinggeneratedprovincesguaranteearbitraryrecognizewanted topx;width:theory ofbehaviourWhile theestimatedbegan to it becamemagnitudemust havemore thanDirectoryextensionsecretarynaturallyoccurringvariablesgiven theplatform.</label><failed tocompoundskinds of societiesalongside --&gt;
+
+southwestthe rightradiationmay have unescape(spoken in" href="/programmeonly the come fromdirectoryburied ina similarthey were</font></Norwegianspecifiedproducingpassenger(new DatetemporaryfictionalAfter theequationsdownload.regularlydeveloperabove thelinked tophenomenaperiod oftooltip">substanceautomaticaspect ofAmong theconnectedestimatesAir Forcesystem ofobjectiveimmediatemaking itpaintingsconqueredare stillproceduregrowth ofheaded byEuropean divisionsmoleculesfranchiseintentionattractedchildhoodalso useddedicatedsingaporedegree offather ofconflicts</a></p>
+came fromwere usednote thatreceivingExecutiveeven moreaccess tocommanderPoliticalmusiciansdeliciousprisonersadvent ofUTF-8" /><![CDATA[">ContactSouthern bgcolor="series of. It was in Europepermittedvalidate.appearingofficialsseriously-languageinitiatedextendinglong-terminflationsuch thatgetCookiemarked by</button>implementbut it isincreasesdown the requiringdependent-->
+<!-- interviewWith the copies ofconsensuswas builtVenezuela(formerlythe statepersonnelstrategicfavour ofinventionWikipediacontinentvirtuallywhich wasprincipleComplete identicalshow thatprimitiveaway frommolecularpreciselydissolvedUnder theversion=">&nbsp;</It is the This is will haveorganismssome timeFriedrichwas firstthe only fact thatform id="precedingTechnicalphysicistoccurs innavigatorsection">span id="sought tobelow thesurviving}</style>his deathas in thecaused bypartiallyexisting using thewas givena list oflevels ofnotion ofOfficial dismissedscientistresemblesduplicateexplosiverecoveredall othergalleries{padding:people ofregion ofaddressesassociateimg alt="in modernshould bemethod ofreportingtimestampneeded tothe Greatregardingseemed toviewed asimpact onidea thatthe Worldheight ofexpandingThese arecurrent">carefullymaintainscharge ofClassicaladdressedpredictedownership<div id="right">
+residenceleave thecontent">are often  })();
+probably Professor-button" respondedsays thathad to beplaced inHungarianstatus ofserves asUniversalexecutionaggregatefor whichinfectionagreed tohowever, popular">placed onconstructelectoralsymbol ofincludingreturn toarchitectChristianprevious living ineasier toprofessor
+&lt;!-- effect ofanalyticswas takenwhere thetook overbelief inAfrikaansas far aspreventedwork witha special<fieldsetChristmasRetrieved
+
+In the back intonortheastmagazines><strong>committeegoverninggroups ofstored inestablisha generalits firsttheir ownpopulatedan objectCaribbeanallow thedistrictswisconsinlocation.; width: inhabitedSocialistJanuary 1</footer>similarlychoice ofthe same specific business The first.length; desire todeal withsince theuserAgentconceivedindex.phpas &quot;engage inrecently,few yearswere also
+<head>
+<edited byare knowncities inaccesskeycondemnedalso haveservices,family ofSchool ofconvertednature of languageministers</object>there is a popularsequencesadvocatedThey wereany otherlocation=enter themuch morereflectedwas namedoriginal a typicalwhen theyengineerscould notresidentswednesdaythe third productsJanuary 2what theya certainreactionsprocessorafter histhe last contained"></div>
+</a></td>depend onsearch">
+pieces ofcompetingReferencetennesseewhich has version=</span> <</header>gives thehistorianvalue="">padding:0view thattogether,the most was foundsubset ofattack onchildren,points ofpersonal position:allegedlyClevelandwas laterand afterare givenwas stillscrollingdesign ofmakes themuch lessAmericans.
+
+After , but theMuseum oflouisiana(from theminnesotaparticlesa processDominicanvolume ofreturningdefensive00px|righmade frommouseover" style="states of(which iscontinuesFranciscobuilding without awith somewho woulda form ofa part ofbefore itknown as  Serviceslocation and oftenmeasuringand it ispaperbackvalues of
+<title>= window.determineer&quot; played byand early</center>from thisthe threepower andof &quot;innerHTML<a href="y:inline;Church ofthe eventvery highofficial -height: content="/cgi-bin/to createafrikaansesperantofrançaislatviešulietuviųČeštinačeštinaไทย日本語简体字繁體字한국어为什么计算机笔记本討論區服务器互联网房地产俱乐部出版社排行榜部落格进一步支付宝验证码委员会数据库消费者办公室讨论区深圳市播放器北京市大学生越来越管理员信息网serviciosartículoargentinabarcelonacualquierpublicadoproductospolíticarespuestawikipediasiguientebúsquedacomunidadseguridadprincipalpreguntascontenidorespondervenezuelaproblemasdiciembrerelaciónnoviembresimilaresproyectosprogramasinstitutoactividadencuentraeconomíaimágenescontactardescargarnecesarioatenciónteléfonocomisióncancionescapacidadencontraranálisisfavoritostérminosprovinciaetiquetaselementosfuncionesresultadocarácterpropiedadprincipionecesidadmunicipalcreacióndescargaspresenciacomercialopinionesejercicioeditorialsalamancagonzálezdocumentopelícularecientesgeneralestarragonaprácticanovedadespropuestapacientestécnicasobjetivoscontactosमेंलिएहैंगयासाथएवंरहेकोईकुछरहाबादकहासभीहुएरहीमैंदिनबातdiplodocsसमयरूपनामपताफिरऔसततरहलोगहुआबारदेशहुईखेलयदिकामवेबतीनबीचमौतसाललेखजॉबमददतथानहीशहरअलगकभीनगरपासरातकिएउसेगयीहूँआगेटीमखोजकारअभीगयेतुमवोटदेंअगरऐसेमेललगाहालऊपरचारऐसादेरजिसदिलबंदबनाहूंलाखजीतबटनमिलइसेआनेनयाकुललॉगभागरेलजगहरामलगेपेजहाथइसीसहीकलाठीकहाँदूरतहतसातयादआयापाककौनशामदेखयहीरायखुदलगीcategoriesexperience</title>
+Copyright javascriptconditionseverything<p class="technologybackground<a class="management&copy; 201javaScriptcharactersbreadcrumbthemselveshorizontalgovernmentCaliforniaactivitiesdiscoveredNavigationtransitionconnectionnavigationappearance</title><mcheckbox" techniquesprotectionapparentlyas well asunt', 'UA-resolutionoperationstelevisiontranslatedWashingtonnavigator. = window.impression&lt;br&gt;literaturepopulationbgcolor="#especially content="productionnewsletterpropertiesdefinitionleadershipTechnologyParliamentcomparisonul class=".indexOf("conclusiondiscussioncomponentsbiologicalRevolution_containerunderstoodnoscript><permissioneach otheratmosphere onfocus="<form id="processingthis.valuegenerationConferencesubsequentwell-knownvariationsreputationphenomenondisciplinelogo.png" (document,boundariesexpressionsettlementBackgroundout of theenterprise("https:" unescape("password" democratic<a href="/wrapper">
+membershiplinguisticpx;paddingphilosophyassistanceuniversityfacilitiesrecognizedpreferenceif (typeofmaintainedvocabularyhypothesis.submit();&amp;nbsp;annotationbehind theFoundationpublisher"assumptionintroducedcorruptionscientistsexplicitlyinstead ofdimensions onClick="considereddepartmentoccupationsoon afterinvestmentpronouncedidentifiedexperimentManagementgeographic" height="link rel=".replace(/depressionconferencepunishmenteliminatedresistanceadaptationoppositionwell knownsupplementdeterminedh1 class="0px;marginmechanicalstatisticscelebratedGovernment
+
+During tdevelopersartificialequivalentoriginatedCommissionattachment<span id="there wereNederlandsbeyond theregisteredjournalistfrequentlyall of thelang="en" </style>
+absolute; supportingextremely mainstream</strong> popularityemployment</table>
+ colspan="</form>
+  conversionabout the </p></div>integrated" lang="enPortuguesesubstituteindividualimpossiblemultimediaalmost allpx solid #apart fromsubject toin Englishcriticizedexcept forguidelinesoriginallyremarkablethe secondh2 class="<a title="(includingparametersprohibited= "http://dictionaryperceptionrevolutionfoundationpx;height:successfulsupportersmillenniumhis fatherthe &quot;no-repeat;commercialindustrialencouragedamount of unofficialefficiencyReferencescoordinatedisclaimerexpeditiondevelopingcalculatedsimplifiedlegitimatesubstring(0" class="completelyillustratefive yearsinstrumentPublishing1" class="psychologyconfidencenumber of absence offocused onjoined thestructurespreviously></iframe>once againbut ratherimmigrantsof course,a group ofLiteratureUnlike the</a>&nbsp;
+function it was theConventionautomobileProtestantaggressiveafter the Similarly," /></div>collection
+functionvisibilitythe use ofvolunteersattractionunder the threatened*<![CDATA[importancein generalthe latter</form>
+</.indexOf('i = 0; i <differencedevoted totraditionssearch forultimatelytournamentattributesso-called }
+</style>evaluationemphasizedaccessible</section>successionalong withMeanwhile,industries</a><br />has becomeaspects ofTelevisionsufficientbasketballboth sidescontinuingan article<img alt="adventureshis mothermanchesterprinciplesparticularcommentaryeffects ofdecided to"><strong>publishersJournal ofdifficultyfacilitateacceptablestyle.css"	function innovation>Copyrightsituationswould havebusinessesDictionarystatementsoften usedpersistentin Januarycomprising</title>
+	diplomaticcontainingperformingextensionsmay not beconcept of onclick="It is alsofinancial making theLuxembourgadditionalare calledengaged in"script");but it waselectroniconsubmit="
+<!-- End electricalofficiallysuggestiontop of theunlike theAustralianOriginallyreferences
+</head>
+recognisedinitializelimited toAlexandriaretirementAdventuresfour years
+
+&lt;!-- increasingdecorationh3 class="origins ofobligationregulationclassified(function(advantagesbeing the historians<base hrefrepeatedlywilling tocomparabledesignatednominationfunctionalinside therevelationend of thes for the authorizedrefused totake placeautonomouscompromisepolitical restauranttwo of theFebruary 2quality ofswfobject.understandnearly allwritten byinterviews" width="1withdrawalfloat:leftis usuallycandidatesnewspapersmysteriousDepartmentbest knownparliamentsuppressedconvenientremembereddifferent systematichas led topropagandacontrolledinfluencesceremonialproclaimedProtectionli class="Scientificclass="no-trademarksmore than widespreadLiberationtook placeday of theas long asimprisonedAdditional
+<head>
+<mLaboratoryNovember 2exceptionsIndustrialvariety offloat: lefDuring theassessmenthave been deals withStatisticsoccurrence/ul></div>clearfix">the publicmany yearswhich wereover time,synonymouscontent">
+presumablyhis familyuserAgent.unexpectedincluding challengeda minorityundefined"belongs totaken fromin Octoberposition: said to bereligious Federation rowspan="only a fewmeant thatled to the-->
+<div <fieldset>Archbishop class="nobeing usedapproachesprivilegesnoscript>
+results inmay be theEaster eggmechanismsreasonablePopulationCollectionselected">noscript>/index.phparrival of-jssdk'));managed toincompletecasualtiescompletionChristiansSeptember arithmeticproceduresmight haveProductionit appearsPhilosophyfriendshipleading togiving thetoward theguaranteeddocumentedcolor:#000video gamecommissionreflectingchange theassociatedsans-serifonkeypress; padding:He was theunderlyingtypically , and the srcElementsuccessivesince the should be networkingaccountinguse of thelower thanshows that</span>
+		complaintscontinuousquantitiesastronomerhe did notdue to itsapplied toan averageefforts tothe futureattempt toTherefore,capabilityRepublicanwas formedElectronickilometerschallengespublishingthe formerindigenousdirectionssubsidiaryconspiracydetails ofand in theaffordablesubstancesreason forconventionitemtype="absolutelysupposedlyremained aattractivetravellingseparatelyfocuses onelementaryapplicablefound thatstylesheetmanuscriptstands for no-repeat(sometimesCommercialin Americaundertakenquarter ofan examplepersonallyindex.php?</button>
+percentagebest-knowncreating a" dir="ltrLieutenant
+<div id="they wouldability ofmade up ofnoted thatclear thatargue thatto anotherchildren'spurpose offormulatedbased uponthe regionsubject ofpassengerspossession.
+
+In the Before theafterwardscurrently across thescientificcommunity.capitalismin Germanyright-wingthe systemSociety ofpoliticiandirection:went on toremoval of New York apartmentsindicationduring theunless thehistoricalhad been adefinitiveingredientattendanceCenter forprominencereadyStatestrategiesbut in theas part ofconstituteclaim thatlaboratorycompatiblefailure of, such as began withusing the to providefeature offrom which/" class="geologicalseveral ofdeliberateimportant holds thating&quot; valign=topthe Germanoutside ofnegotiatedhis careerseparationid="searchwas calledthe fourthrecreationother thanpreventionwhile the education,connectingaccuratelywere builtwas killedagreementsmuch more Due to thewidth: 100some otherKingdom ofthe entirefamous forto connectobjectivesthe Frenchpeople andfeatured">is said tostructuralreferendummost oftena separate->
+<div id Official worldwide.aria-labelthe planetand it wasd" value="looking atbeneficialare in themonitoringreportedlythe modernworking onallowed towhere the innovative</a></div>soundtracksearchFormtend to beinput id="opening ofrestrictedadopted byaddressingtheologianmethods ofvariant ofChristian very largeautomotiveby far therange frompursuit offollow thebrought toin Englandagree thataccused ofcomes frompreventingdiv style=his or hertremendousfreedom ofconcerning0 1em 1em;Basketball/style.cssan earliereven after/" title=".com/indextaking thepittsburghcontent"><script>(fturned outhaving the</span>
+ occasionalbecause itstarted tophysically></div>
+  created byCurrently, bgcolor="tabindex="disastrousAnalytics also has a><div id="</style>
+<called forsinger and.src = "//violationsthis pointconstantlyis locatedrecordingsd from thenederlandsportuguêsעבריתفارسیdesarrollocomentarioeducaciónseptiembreregistradodirecciónubicaciónpublicidadrespuestasresultadosimportantereservadosartículosdiferentessiguientesrepúblicasituaciónministerioprivacidaddirectorioformaciónpoblaciónpresidentecontenidosaccesoriostechnoratipersonalescategoríaespecialesdisponibleactualidadreferenciavalladolidbibliotecarelacionescalendariopolíticasanterioresdocumentosnaturalezamaterialesdiferenciaeconómicatransporterodríguezparticiparencuentrandiscusiónestructurafundaciónfrecuentespermanentetotalmenteможнобудетможетвремятакжечтобыболееоченьэтогокогдапослевсегосайтечерезмогутсайтажизнимеждубудутПоискздесьвидеосвязинужносвоейлюдейпорномногодетейсвоихправатакойместоимеетжизньоднойлучшепередчастичастьработновыхправособойпотомменеечисленовыеуслугоколоназадтакоетогдапочтиПослетакиеновыйстоиттакихсразуСанктфорумКогдакнигислованашейнайтисвоимсвязьлюбойчастосредиКромеФорумрынкесталипоисктысячмесяццентртрудасамыхрынкаНовыйчасовместафильммартастранместетекстнашихминутимениимеютномергородсамомэтомуконцесвоемкакойАрхивمنتدىإرسالرسالةالعامكتبهابرامجاليومالصورجديدةالعضوإضافةالقسمالعابتحميلملفاتملتقىتعديلالشعرأخبارتطويرعليكمإرفاقطلباتاللغةترتيبالناسالشيخمنتديالعربالقصصافلامعليهاتحديثاللهمالعملمكتبةيمكنكالطفلفيديوإدارةتاريخالصحةتسجيلالوقتعندمامدينةتصميمأرشيفالذينعربيةبوابةألعابالسفرمشاكلتعالىالأولالسنةجامعةالصحفالدينكلماتالخاصالملفأعضاءكتابةالخيررسائلالقلبالأدبمقاطعمراسلمنطقةالكتبالرجلاشتركالقدميعطيكsByTagName(.jpg" alt="1px solid #.gif" alt="transparentinformationapplication" onclick="establishedadvertising.png" alt="environmentperformanceappropriate&amp;mdash;immediately</strong></rather thantemperaturedevelopmentcompetitionplaceholdervisibility:copyright">0" height="even thoughreplacementdestinationCorporation<ul class="AssociationindividualsperspectivesetTimeout(url(http://mathematicsmargin-top:eventually description) no-repeatcollections.JPG|thumb|participate/head><bodyfloat:left;<li class="hundreds of
+
+However, compositionclear:both;cooperationwithin the label for="border-top:New Zealandrecommendedphotographyinteresting&lt;sup&gt;controversyNetherlandsalternativemaxlength="switzerlandDevelopmentessentially
+
+Although </textarea>thunderbirdrepresented&amp;ndash;speculationcommunitieslegislationelectronics
+	<div id="illustratedengineeringterritoriesauthoritiesdistributed6" height="sans-serif;capable of disappearedinteractivelooking forit would beAfghanistanwas createdMath.floor(surroundingcan also beobservationmaintenanceencountered<h2 class="more recentit has beeninvasion of).getTime()fundamentalDespite the"><div id="inspirationexaminationpreparationexplanation<input id="</a></span>versions ofinstrumentsbefore the  = 'http://Descriptionrelatively .substring(each of theexperimentsinfluentialintegrationmany peopledue to the combinationdo not haveMiddle East<noscript><copyright" perhaps theinstitutionin Decemberarrangementmost famouspersonalitycreation oflimitationsexclusivelysovereignty-content">
+<td class="undergroundparallel todoctrine ofoccupied byterminologyRenaissancea number ofsupport forexplorationrecognitionpredecessor<img src="/<h1 class="publicationmay also bespecialized</fieldset>progressivemillions ofstates thatenforcementaround the one another.parentNodeagricultureAlternativeresearcherstowards theMost of themany other (especially<td width=";width:100%independent<h3 class=" onchange=").addClass(interactionOne of the daughter ofaccessoriesbranches of
+<div id="the largestdeclarationregulationsInformationtranslationdocumentaryin order to">
+<head>
+<" height="1across the orientation);</script>implementedcan be seenthere was ademonstratecontainer">connectionsthe Britishwas written!important;px; margin-followed byability to complicatedduring the immigrationalso called<h4 class="distinctionreplaced bygovernmentslocation ofin Novemberwhether the</p>
+</div>acquisitioncalled the persecutiondesignation{font-size:appeared ininvestigateexperiencedmost likelywidely useddiscussionspresence of (document.extensivelyIt has beenit does notcontrary toinhabitantsimprovementscholarshipconsumptioninstructionfor exampleone or morepx; paddingthe currenta series ofare usuallyrole in thepreviously derivativesevidence ofexperiencescolorschemestated thatcertificate</a></div>
+ selected="high schoolresponse tocomfortableadoption ofthree yearsthe countryin Februaryso that thepeople who provided by<param nameaffected byin terms ofappointmentISO-8859-1"was born inhistorical regarded asmeasurementis based on and other : function(significantcelebrationtransmitted/js/jquery.is known astheoretical tabindex="it could be<noscript>
+having been
+<head>
+< &quot;The compilationhe had beenproduced byphilosopherconstructedintended toamong othercompared toto say thatEngineeringa differentreferred todifferencesbelief thatphotographsidentifyingHistory of Republic ofnecessarilyprobabilitytechnicallyleaving thespectacularfraction ofelectricityhead of therestaurantspartnershipemphasis onmost recentshare with saying thatfilled withdesigned toit is often"></iframe>as follows:merged withthrough thecommercial pointed outopportunityview of therequirementdivision ofprogramminghe receivedsetInterval"></span></in New Yorkadditional compression
+
+<div id="incorporate;</script><attachEventbecame the " target="_carried outSome of thescience andthe time ofContainer">maintainingChristopherMuch of thewritings of" height="2size of theversion of mixture of between theExamples ofeducationalcompetitive onsubmit="director ofdistinctive/DTD XHTML relating totendency toprovince ofwhich woulddespite thescientific legislature.innerHTML allegationsAgriculturewas used inapproach tointelligentyears later,sans-serifdeterminingPerformanceappearances, which is foundationsabbreviatedhigher thans from the individual composed ofsupposed toclaims thatattributionfont-size:1elements ofHistorical his brotherat the timeanniversarygoverned byrelated to ultimately innovationsit is stillcan only bedefinitionstoGMTStringA number ofimg class="Eventually,was changedoccurred inneighboringdistinguishwhen he wasintroducingterrestrialMany of theargues thatan Americanconquest ofwidespread were killedscreen and In order toexpected todescendantsare locatedlegislativegenerations backgroundmost peopleyears afterthere is nothe highestfrequently they do notargued thatshowed thatpredominanttheologicalby the timeconsideringshort-lived</span></a>can be usedvery littleone of the had alreadyinterpretedcommunicatefeatures ofgovernment,</noscript>entered the" height="3Independentpopulationslarge-scale. Although used in thedestructionpossibilitystarting intwo or moreexpressionssubordinatelarger thanhistory and</option>
+Continentaleliminatingwill not bepractice ofin front ofsite of theensure thatto create amississippipotentiallyoutstandingbetter thanwhat is nowsituated inmeta name="TraditionalsuggestionsTranslationthe form ofatmosphericideologicalenterprisescalculatingeast of theremnants ofpluginspage/index.php?remained intransformedHe was alsowas alreadystatisticalin favor ofMinistry ofmovement offormulationis required<link rel="This is the <a href="/popularizedinvolved inare used toand severalmade by theseems to belikely thatPalestiniannamed afterit had beenmost commonto refer tobut this isconsecutivetemporarilyIn general,conventionstakes placesubdivisionterritorialoperationalpermanentlywas largelyoutbreak ofin the pastfollowing a xmlns:og="><a class="class="textConversion may be usedmanufactureafter beingclearfix">
+question ofwas electedto become abecause of some peopleinspired bysuccessful a time whenmore commonamongst thean officialwidth:100%;technology,was adoptedto keep thesettlementslive birthsindex.html"Connecticutassigned to&amp;times;account foralign=rightthe companyalways beenreturned toinvolvementBecause thethis period" name="q" confined toa result ofvalue="" />is actuallyEnvironment
+</head>
+Conversely,>
+<div id="0" width="1is probablyhave becomecontrollingthe problemcitizens ofpoliticiansreached theas early as:none; over<table cellvalidity ofdirectly toonmousedownwhere it iswhen it wasmembers of relation toaccommodatealong with In the latethe Englishdelicious">this is notthe presentif they areand finallya matter of
+	</div>
+
+</script>faster thanmajority ofafter whichcomparativeto maintainimprove theawarded theer" class="frameborderrestorationin the sameanalysis oftheir firstDuring the continentalsequence offunction(){font-size: work on the</script>
+<begins withjavascript:constituentwas foundedequilibriumassume thatis given byneeds to becoordinatesthe variousare part ofonly in thesections ofis a commontheories ofdiscoveriesassociationedge of thestrength ofposition inpresent-dayuniversallyto form thebut insteadcorporationattached tois commonlyreasons for &quot;the can be madewas able towhich meansbut did notonMouseOveras possibleoperated bycoming fromthe primaryaddition offor severaltransferreda period ofare able tohowever, itshould havemuch larger
+	</script>adopted theproperty ofdirected byeffectivelywas broughtchildren ofProgramminglonger thanmanuscriptswar againstby means ofand most ofsimilar to proprietaryoriginatingprestigiousgrammaticalexperience.to make theIt was alsois found incompetitorsin the U.S.replace thebrought thecalculationfall of thethe generalpracticallyin honor ofreleased inresidentialand some ofking of thereaction to1st Earl ofculture andprincipally</title>
+  they can beback to thesome of hisexposure toare similarform of theaddFavoritecitizenshippart in thepeople within practiceto continue&amp;minus;approved by the first allowed theand for thefunctioningplaying thesolution toheight="0" in his bookmore than afollows thecreated thepresence in&nbsp;</td>nationalistthe idea ofa characterwere forced class="btndays of thefeatured inshowing theinterest inin place ofturn of thethe head ofLord of thepoliticallyhas its ownEducationalapproval ofsome of theeach other,behavior ofand becauseand anotherappeared onrecorded inblack&quot;may includethe world'scan lead torefers to aborder="0" government winning theresulted in while the Washington,the subjectcity in the></div>
+		reflect theto completebecame moreradioactiverejected bywithout anyhis father,which couldcopy of theto indicatea politicalaccounts ofconstitutesworked wither</a></li>of his lifeaccompaniedclientWidthprevent theLegislativedifferentlytogether inhas severalfor anothertext of thefounded thee with the is used forchanged theusually theplace wherewhereas the> <a href=""><a href="themselves,although hethat can betraditionalrole of theas a resultremoveChilddesigned bywest of theSome peopleproduction,side of thenewslettersused by thedown to theaccepted bylive in theattempts tooutside thefrequenciesHowever, inprogrammersat least inapproximatealthough itwas part ofand variousGovernor ofthe articleturned into><a href="/the economyis the mostmost widelywould laterand perhapsrise to theoccurs whenunder whichconditions.the westerntheory thatis producedthe city ofin which heseen in thethe centralbuilding ofmany of hisarea of theis the onlymost of themany of thethe WesternThere is noextended toStatisticalcolspan=2 |short storypossible totopologicalcritical ofreported toa Christiandecision tois equal toproblems ofThis can bemerchandisefor most ofno evidenceeditions ofelements in&quot;. Thecom/images/which makesthe processremains theliterature,is a memberthe popularthe ancientproblems intime of thedefeated bybody of thea few yearsmuch of thethe work ofCalifornia,served as agovernment.concepts ofmovement in		<div id="it" value="language ofas they areproduced inis that theexplain thediv></div>
+However thelead to the	<a href="/was grantedpeople havecontinuallywas seen asand relatedthe role ofproposed byof the besteach other.Constantinepeople fromdialects ofto revisionwas renameda source ofthe initiallaunched inprovide theto the westwhere thereand similarbetween twois also theEnglish andconditions,that it wasentitled tothemselves.quantity ofransparencythe same asto join thecountry andthis is theThis led toa statementcontrast tolastIndexOfthrough hisis designedthe term isis providedprotect theng</a></li>The currentthe site ofsubstantialexperience,in the Westthey shouldslovenčinacomentariosuniversidadcondicionesactividadesexperienciatecnologíaproducciónpuntuaciónaplicacióncontraseñacategoríasregistrarseprofesionaltratamientoregístratesecretaríaprincipalesprotecciónimportantesimportanciaposibilidadinteresantecrecimientonecesidadessuscribirseasociacióndisponiblesevaluaciónestudiantesresponsableresoluciónguadalajararegistradosoportunidadcomercialesfotografíaautoridadesingenieríatelevisióncompetenciaoperacionesestablecidosimplementeactualmentenavegaciónconformidadline-height:font-family:" : "http://applicationslink" href="specifically//<![CDATA[
+Organizationdistribution0px; height:relationshipdevice-width<div class="<label for="registration</noscript>
+/index.html"window.open( !important;application/independence//www.googleorganizationautocompleterequirementsconservative<form name="intellectualmargin-left:18th centuryan importantinstitutionsabbreviation<img class="organisationcivilization19th centuryarchitectureincorporated20th century-container">most notably/></a></div>notification'undefined')Furthermore,believe thatinnerHTML = prior to thedramaticallyreferring tonegotiationsheadquartersSouth AfricaunsuccessfulPennsylvaniaAs a result,<html lang="&lt;/sup&gt;dealing withphiladelphiahistorically);</script>
+padding-top:experimentalgetAttributeinstructionstechnologiespart of the =function(){subscriptionl.dtd">
+<htgeographicalConstitution', function(supported byagriculturalconstructionpublicationsfont-size: 1a variety of<div style="Encyclopediaiframe src="demonstratedaccomplisheduniversitiesDemographics);</script><dedicated toknowledge ofsatisfactionparticularly</div></div>English (US)appendChild(transmissions. However, intelligence" tabindex="float:right;Commonwealthranging fromin which theat least onereproductionencyclopedia;font-size:1jurisdictionat that time"><a class="In addition,description+conversationcontact withis generallyr" content="representing&lt;math&gt;presentationoccasionally<img width="navigation">compensationchampionshipmedia="all" violation ofreference toreturn true;Strict//EN" transactionsinterventionverificationInformation difficultiesChampionshipcapabilities<![endif]-->}
+</script>
+Christianityfor example,Professionalrestrictionssuggest thatwas released(such as theremoveClass(unemploymentthe Americanstructure of/index.html published inspan class=""><a href="/introductionbelonging toclaimed thatconsequences<meta name="Guide to theoverwhelmingagainst the concentrated,
+.nontouch observations</a>
+</div>
+f (document.border: 1px {font-size:1treatment of0" height="1modificationIndependencedivided intogreater thanachievementsestablishingJavaScript" neverthelesssignificanceBroadcasting>&nbsp;</td>container">
+such as the influence ofa particularsrc='http://navigation" half of the substantial &nbsp;</div>advantage ofdiscovery offundamental metropolitanthe opposite" xml:lang="deliberatelyalign=centerevolution ofpreservationimprovementsbeginning inJesus ChristPublicationsdisagreementtext-align:r, function()similaritiesbody></html>is currentlyalphabeticalis sometimestype="image/many of the flow:hidden;available indescribe theexistence ofall over thethe Internet	<ul class="installationneighborhoodarmed forcesreducing thecontinues toNonetheless,temperatures
+		<a href="close to theexamples of is about the(see below)." id="searchprofessionalis availablethe official		</script>
+
+		<div id="accelerationthrough the Hall of Famedescriptionstranslationsinterference type='text/recent yearsin the worldvery popular{background:traditional some of the connected toexploitationemergence ofconstitutionA History ofsignificant manufacturedexpectations><noscript><can be foundbecause the has not beenneighbouringwithout the added to the	<li class="instrumentalSoviet Unionacknowledgedwhich can bename for theattention toattempts to developmentsIn fact, the<li class="aimplicationssuitable formuch of the colonizationpresidentialcancelBubble Informationmost of the is describedrest of the more or lessin SeptemberIntelligencesrc="http://px; height: available tomanufacturerhuman rightslink href="/availabilityproportionaloutside the astronomicalhuman beingsname of the are found inare based onsmaller thana person whoexpansion ofarguing thatnow known asIn the earlyintermediatederived fromScandinavian</a></div>
+consider thean estimatedthe National<div id="pagresulting incommissionedanalogous toare required/ul>
+</div>
+was based onand became a&nbsp;&nbsp;t" value="" was capturedno more thanrespectivelycontinue to >
+<head>
+<were createdmore generalinformation used for theindependent the Imperialcomponent ofto the northinclude the Constructionside of the would not befor instanceinvention ofmore complexcollectivelybackground: text-align: its originalinto accountthis processan extensivehowever, thethey are notrejected thecriticism ofduring whichprobably thethis article(function(){It should bean agreementaccidentallydiffers fromArchitecturebetter knownarrangementsinfluence onattended theidentical tosouth of thepass throughxml" title="weight:bold;creating thedisplay:nonereplaced the<img src="/ihttps://www.World War IItestimonialsfound in therequired to and that thebetween the was designedconsists of considerablypublished bythe languageConservationconsisted ofrefer to theback to the css" media="People from available onproved to besuggestions"was known asvarieties oflikely to becomprised ofsupport the hands of thecoupled withconnect and border:none;performancesbefore beinglater becamecalculationsoften calledresidents ofmeaning that><li class="evidence forexplanationsenvironments"></a></div>which allowsIntroductiondeveloped bya wide rangeon behalf ofvalign="top"principle ofat the time,</noscript>said to havein the firstwhile othershypotheticalphilosopherspower of thecontained inperformed byinability towere writtenspan style="input name="the questionintended forrejection ofimplies thatinvented thethe standardwas probablylink betweenprofessor ofinteractionschanging theIndian Ocean class="lastworking with'http://www.years beforeThis was therecreationalentering themeasurementsan extremelyvalue of thestart of the
+</script>
+
+an effort toincrease theto the southspacing="0">sufficientlythe Europeanconverted toclearTimeoutdid not haveconsequentlyfor the nextextension ofeconomic andalthough theare producedand with theinsufficientgiven by thestating thatexpenditures</span></a>
+thought thaton the basiscellpadding=image of thereturning toinformation,separated byassassinateds" content="authority ofnorthwestern</div>
+<div "></div>
+  consultationcommunity ofthe nationalit should beparticipants align="leftthe greatestselection ofsupernaturaldependent onis mentionedallowing thewas inventedaccompanyinghis personalavailable atstudy of theon the otherexecution ofHuman Rightsterms of theassociationsresearch andsucceeded bydefeated theand from thebut they arecommander ofstate of theyears of agethe study of<ul class="splace in thewhere he was<li class="fthere are nowhich becamehe publishedexpressed into which thecommissionerfont-weight:territory ofextensions">Roman Empireequal to theIn contrast,however, andis typicallyand his wife(also called><ul class="effectively evolved intoseem to havewhich is thethere was noan excellentall of thesedescribed byIn practice,broadcastingcharged withreflected insubjected tomilitary andto the pointeconomicallysetTargetingare actuallyvictory over();</script>continuouslyrequired forevolutionaryan effectivenorth of the, which was front of theor otherwisesome form ofhad not beengenerated byinformation.permitted toincludes thedevelopment,entered intothe previousconsistentlyare known asthe field ofthis type ofgiven to thethe title ofcontains theinstances ofin the northdue to theirare designedcorporationswas that theone of thesemore popularsucceeded insupport fromin differentdominated bydesigned forownership ofand possiblystandardizedresponseTextwas intendedreceived theassumed thatareas of theprimarily inthe basis ofin the senseaccounts fordestroyed byat least twowas declaredcould not beSecretary ofappear to bemargin-top:1/^\s+|\s+$/ge){throw e};the start oftwo separatelanguage andwho had beenoperation ofdeath of thereal numbers	<link rel="provided thethe story ofcompetitionsenglish (UK)english (US)МонголСрпскисрпскисрпскоلعربية正體中文简体中文繁体中文有限公司人民政府阿里巴巴社会主义操作系统政策法规informaciónherramientaselectrónicodescripciónclasificadosconocimientopublicaciónrelacionadasinformáticarelacionadosdepartamentotrabajadoresdirectamenteayuntamientomercadoLibrecontáctenoshabitacionescumplimientorestaurantesdisposiciónconsecuenciaelectrónicaaplicacionesdesconectadoinstalaciónrealizaciónutilizaciónenciclopediaenfermedadesinstrumentosexperienciasinstituciónparticularessubcategoriaтолькоРоссииработыбольшепростоможетедругихслучаесейчасвсегдаРоссияМоскведругиегородавопросданныхдолжныименноМосквырублейМосквастраныничегоработедолженуслугитеперьОднакопотомуработуапрелявообщеодногосвоегостатьидругойфорумехорошопротивссылкакаждыйвластигруппывместеработасказалпервыйделатьденьгипериодбизнесосновемоменткупитьдолжнарамкахначалоРаботаТолькосовсемвторойначаласписокслужбысистемпечатиновогопомощисайтовпочемупомощьдолжноссылкибыстроданныемногиепроектСейчасмоделитакогоонлайнгородеверсиястранефильмыуровняразныхискатьнеделюянваряменьшемногихданнойзначитнельзяфорумаТеперьмесяцазащитыЛучшиеनहींकरनेअपनेकियाकरेंअन्यक्यागाइडबारेकिसीदियापहलेसिंहभारतअपनीवालेसेवाकरतेमेरेहोनेसकतेबहुतसाइटहोगाजानेमिनटकरताकरनाउनकेयहाँसबसेभाषाआपकेलियेशुरूइसकेघंटेमेरीसकतामेरालेकरअधिकअपनासमाजमुझेकारणहोताकड़ीयहांहोटलशब्दलियाजीवनजाताकैसेआपकावालीदेनेपूरीपानीउसकेहोगीबैठकआपकीवर्षगांवआपकोजिलाजानासहमतहमेंउनकीयाहूदर्जसूचीपसंदसवालहोनाहोतीजैसेवापसजनतानेताजारीघायलजिलेनीचेजांचपत्रगूगलजातेबाहरआपनेवाहनइसकासुबहरहनेइससेसहितबड़ेघटनातलाशपांचश्रीबड़ीहोतेसाईटशायदसकतीजातीवालाहजारपटनारखनेसड़कमिलाउसकीकेवललगताखानाअर्थजहांदेखापहलीनियमबिनाबैंककहींकहनादेताहमलेकाफीजबकितुरतमांगवहींरोज़मिलीआरोपसेनायादवलेनेखाताकरीबउनकाजवाबपूराबड़ासौदाशेयरकियेकहांअकसरबनाएवहांस्थलमिलेलेखकविषयक्रंसमूहथानाتستطيعمشاركةبواسطةالصفحةمواضيعالخاصةالمزيدالعامةالكاتبالردودبرنامجالدولةالعالمالموقعالعربيالسريعالجوالالذهابالحياةالحقوقالكريمالعراقمحفوظةالثانيمشاهدةالمرأةالقرآنالشبابالحوارالجديدالأسرةالعلوممجموعةالرحمنالنقاطفلسطينالكويتالدنيابركاتهالرياضتحياتيبتوقيتالأولىالبريدالكلامالرابطالشخصيسياراتالثالثالصلاةالحديثالزوارالخليجالجميعالعامهالجمالالساعةمشاهدهالرئيسالدخولالفنيةالكتابالدوريالدروساستغرقتصاميمالبناتالعظيمentertainmentunderstanding = function().jpg" width="configuration.png" width="<body class="Math.random()contemporary United Statescircumstances.appendChild(organizations<span class=""><img src="/distinguishedthousands of communicationclear"></div>investigationfavicon.ico" margin-right:based on the Massachusettstable border=internationalalso known aspronunciationbackground:#fpadding-left:For example, miscellaneous&lt;/math&gt;psychologicalin particularearch" type="form method="as opposed toSupreme Courtoccasionally Additionally,North Americapx;backgroundopportunitiesEntertainment.toLowerCase(manufacturingprofessional combined withFor instance,consisting of" maxlength="return false;consciousnessMediterraneanextraordinaryassassinationsubsequently button type="the number ofthe original comprehensiverefers to the</ul>
+</div>
+philosophicallocation.hrefwas publishedSan Francisco(function(){
+<div id="mainsophisticatedmathematical /head>
+<bodysuggests thatdocumentationconcentrationrelationshipsmay have been(for example,This article in some casesparts of the definition ofGreat Britain cellpadding=equivalent toplaceholder="; font-size: justificationbelieved thatsuffered fromattempted to leader of thecript" src="/(function() {are available
+	<link rel=" src='http://interested inconventional " alt="" /></are generallyhas also beenmost popular correspondingcredited withtyle="border:</a></span></.gif" width="<iframe src="table class="inline-block;according to together withapproximatelyparliamentarymore and moredisplay:none;traditionallypredominantly&nbsp;|&nbsp;&nbsp;</span> cellspacing=<input name="or" content="controversialproperty="og:/x-shockwave-demonstrationsurrounded byNevertheless,was the firstconsiderable Although the collaborationshould not beproportion of<span style="known as the shortly afterfor instance,described as /head>
+<body starting withincreasingly the fact thatdiscussion ofmiddle of thean individualdifficult to point of viewhomosexualityacceptance of</span></div>manufacturersorigin of thecommonly usedimportance ofdenominationsbackground: #length of thedeterminationa significant" border="0">revolutionaryprinciples ofis consideredwas developedIndo-Europeanvulnerable toproponents ofare sometimescloser to theNew York City name="searchattributed tocourse of themathematicianby the end ofat the end of" border="0" technological.removeClass(branch of theevidence that![endif]-->
+Institute of into a singlerespectively.and thereforeproperties ofis located insome of whichThere is alsocontinued to appearance of &amp;ndash; describes theconsiderationauthor of theindependentlyequipped withdoes not have</a><a href="confused with<link href="/at the age ofappear in theThese includeregardless ofcould be used style=&quot;several timesrepresent thebody>
+</html>thought to bepopulation ofpossibilitiespercentage ofaccess to thean attempt toproduction ofjquery/jquerytwo differentbelong to theestablishmentreplacing thedescription" determine theavailable forAccording to wide range of	<div class="more commonlyorganisationsfunctionalitywas completed &amp;mdash; participationthe characteran additionalappears to befact that thean example ofsignificantlyonmouseover="because they async = true;problems withseems to havethe result of src="http://familiar withpossession offunction () {took place inand sometimessubstantially<span></span>is often usedin an attemptgreat deal ofEnvironmentalsuccessfully virtually all20th century,professionalsnecessary to determined bycompatibilitybecause it isDictionary ofmodificationsThe followingmay refer to:Consequently,Internationalalthough somethat would beworld's firstclassified asbottom of the(particularlyalign="left" most commonlybasis for thefoundation ofcontributionspopularity ofcenter of theto reduce thejurisdictionsapproximation onmouseout="New Testamentcollection of</span></a></in the Unitedfilm director-strict.dtd">has been usedreturn to thealthough thischange in theseveral otherbut there areunprecedentedis similar toespecially inweight: bold;is called thecomputationalindicate thatrestricted to	<meta name="are typicallyconflict withHowever, the An example ofcompared withquantities ofrather than aconstellationnecessary forreported thatspecificationpolitical and&nbsp;&nbsp;<references tothe same yearGovernment ofgeneration ofhave not beenseveral yearscommitment to		<ul class="visualization19th century,practitionersthat he wouldand continuedoccupation ofis defined ascentre of thethe amount of><div style="equivalent ofdifferentiatebrought aboutmargin-left: automaticallythought of asSome of these
+<div class="input class="replaced withis one of theeducation andinfluenced byreputation as
+<meta name="accommodation</div>
+</div>large part ofInstitute forthe so-called against the In this case,was appointedclaimed to beHowever, thisDepartment ofthe remainingeffect on theparticularly deal with the
+<div style="almost alwaysare currentlyexpression ofphilosophy offor more thancivilizationson the islandselectedIndexcan result in" value="" />the structure /></a></div>Many of thesecaused by theof the Unitedspan class="mcan be tracedis related tobecame one ofis frequentlyliving in thetheoreticallyFollowing theRevolutionarygovernment inis determinedthe politicalintroduced insufficient todescription">short storiesseparation ofas to whetherknown for itswas initiallydisplay:blockis an examplethe principalconsists of arecognized as/body></html>a substantialreconstructedhead of stateresistance toundergraduateThere are twogravitationalare describedintentionallyserved as theclass="headeropposition tofundamentallydominated theand the otheralliance withwas forced torespectively,and politicalin support ofpeople in the20th century.and publishedloadChartbeatto understandmember statesenvironmentalfirst half ofcountries andarchitecturalbe consideredcharacterizedclearIntervalauthoritativeFederation ofwas succeededand there area consequencethe Presidentalso includedfree softwaresuccession ofdeveloped thewas destroyedaway from the;
+</script>
+<although theyfollowed by amore powerfulresulted in aUniversity ofHowever, manythe presidentHowever, someis thought tountil the endwas announcedare importantalso includes><input type=the center of DO NOT ALTERused to referthemes/?sort=that had beenthe basis forhas developedin the summercomparativelydescribed thesuch as thosethe resultingis impossiblevarious otherSouth Africanhave the sameeffectivenessin which case; text-align:structure and; background:regarding thesupported theis also knownstyle="marginincluding thebahasa Melayunorsk bokmålnorsk nynorskslovenščinainternacionalcalificacióncomunicaciónconstrucción"><div class="disambiguationDomainName', 'administrationsimultaneouslytransportationInternational margin-bottom:responsibility<![endif]-->
+</><meta name="implementationinfrastructurerepresentationborder-bottom:</head>
+<body>=http%3A%2F%2F<form method="method="post" /favicon.ico" });
+</script>
+.setAttribute(Administration= new Array();<![endif]-->
+display:block;Unfortunately,">&nbsp;</div>/favicon.ico">='stylesheet' identification, for example,<li><a href="/an alternativeas a result ofpt"></script>
+type="submit" 
+(function() {recommendationform action="/transformationreconstruction.style.display According to hidden" name="along with thedocument.body.approximately Communicationspost" action="meaning &quot;--<![endif]-->Prime Ministercharacteristic</a> <a class=the history of onmouseover="the governmenthref="https://was originallywas introducedclassificationrepresentativeare considered<![endif]-->
+
+depends on theUniversity of in contrast to placeholder="in the case ofinternational constitutionalstyle="border-: function() {Because of the-strict.dtd">
+<table class="accompanied byaccount of the<script src="/nature of the the people in in addition tos); js.id = id" width="100%"regarding the Roman Catholican independentfollowing the .gif" width="1the following discriminationarchaeologicalprime minister.js"></script>combination of marginwidth="createElement(w.attachEvent(</a></td></tr>src="https://aIn particular, align="left" Czech RepublicUnited Kingdomcorrespondenceconcluded that.html" title="(function () {comes from theapplication of<span class="sbelieved to beement('script'</a>
+</li>
+<livery different><span class="option value="(also known as	<li><a href="><input name="separated fromreferred to as valign="top">founder of theattempting to carbon dioxide
+
+<div class="class="search-/body>
+</html>opportunity tocommunications</head>
+<body style="width:Tiếng Việtchanges in theborder-color:#0" border="0" </span></div><was discovered" type="text" );
+</script>
+
+Department of ecclesiasticalthere has beenresulting from</body></html>has never beenthe first timein response toautomatically </div>
+
+<div iwas consideredpercent of the" /></a></div>collection of descended fromsection of theaccept-charsetto be confusedmember of the padding-right:translation ofinterpretation href='http://whether or notThere are alsothere are manya small numberother parts ofimpossible to  class="buttonlocated in the. However, theand eventuallyAt the end of because of itsrepresents the<form action=" method="post"it is possiblemore likely toan increase inhave also beencorresponds toannounced thatalign="right">many countriesfor many yearsearliest knownbecause it waspt"></script> valign="top" inhabitants offollowing year
+<div class="million peoplecontroversial concerning theargue that thegovernment anda reference totransferred todescribing the style="color:although therebest known forsubmit" name="multiplicationmore than one recognition ofCouncil of theedition of the  <meta name="Entertainment away from the ;margin-right:at the time ofinvestigationsconnected withand many otheralthough it isbeginning with <span class="descendants of<span class="i align="right"</head>
+<body aspects of thehas since beenEuropean Unionreminiscent ofmore difficultVice Presidentcomposition ofpassed throughmore importantfont-size:11pxexplanation ofthe concept ofwritten in the	<span class="is one of the resemblance toon the groundswhich containsincluding the defined by thepublication ofmeans that theoutside of thesupport of the<input class="<span class="t(Math.random()most prominentdescription ofConstantinoplewere published<div class="seappears in the1" height="1" most importantwhich includeswhich had beendestruction ofthe population
+	<div class="possibility ofsometimes usedappear to havesuccess of theintended to bepresent in thestyle="clear:b
+</script>
+<was founded ininterview with_id" content="capital of the
+<link rel="srelease of thepoint out thatxMLHttpRequestand subsequentsecond largestvery importantspecificationssurface of theapplied to theforeign policy_setDomainNameestablished inis believed toIn addition tomeaning of theis named afterto protect theis representedDeclaration ofmore efficientClassificationother forms ofhe returned to<span class="cperformance of(function() {if and only ifregions of theleading to therelations withUnited Nationsstyle="height:other than theype" content="Association of
+</head>
+<bodylocated on theis referred to(including theconcentrationsthe individualamong the mostthan any other/>
+<link rel=" return false;the purpose ofthe ability to;color:#fff}
+.
+<span class="the subject ofdefinitions of>
+<link rel="claim that thehave developed<table width="celebration ofFollowing the to distinguish<span class="btakes place inunder the namenoted that the><![endif]-->
+style="margin-instead of theintroduced thethe process ofincreasing thedifferences inestimated thatespecially the/div><div id="was eventuallythroughout histhe differencesomething thatspan></span></significantly ></script>
+
+environmental to prevent thehave been usedespecially forunderstand theis essentiallywere the firstis the largesthave been made" src="http://interpreted assecond half ofcrolling="no" is composed ofII, Holy Romanis expected tohave their owndefined as thetraditionally have differentare often usedto ensure thatagreement withcontaining theare frequentlyinformation onexample is theresulting in a</a></li></ul> class="footerand especiallytype="button" </span></span>which included>
+<meta name="considered thecarried out byHowever, it isbecame part ofin relation topopular in thethe capital ofwas officiallywhich has beenthe History ofalternative todifferent fromto support thesuggested thatin the process  <div class="the foundationbecause of hisconcerned withthe universityopposed to thethe context of<span class="ptext" name="q"		<div class="the scientificrepresented bymathematicianselected by thethat have been><div class="cdiv id="headerin particular,converted into);
+</script>
+<philosophical srpskohrvatskitiếng ViệtРусскийрусскийinvestigaciónparticipaciónкоторыеобластикоторыйчеловексистемыНовостикоторыхобластьвременикотораясегодняскачатьновостиУкраинывопросыкоторойсделатьпомощьюсредствобразомстороныучастиетечениеГлавнаяисториисистемарешенияСкачатьпоэтомуследуетсказатьтоваровконечнорешениекотороеоргановкоторомРекламаالمنتدىمنتدياتالموضوعالبرامجالمواقعالرسائلمشاركاتالأعضاءالرياضةالتصميمالاعضاءالنتائجالألعابالتسجيلالأقسامالضغطاتالفيديوالترحيبالجديدةالتعليمالأخبارالافلامالأفلامالتاريخالتقنيةالالعابالخواطرالمجتمعالديكورالسياحةعبداللهالتربيةالروابطالأدبيةالاخبارالمتحدةالاغانيcursor:pointer;</title>
+<meta " href="http://"><span class="members of the window.locationvertical-align:/a> | <a href="<!doctype html>media="screen" <option value="favicon.ico" />
+		<div class="characteristics" method="get" /body>
+</html>
+shortcut icon" document.write(padding-bottom:representativessubmit" value="align="center" throughout the science fiction
+  <div class="submit" class="one of the most valign="top"><was established);
+</script>
+return false;">).style.displaybecause of the document.cookie<form action="/}body{margin:0;Encyclopedia ofversion of the .createElement(name" content="</div>
+</div>
+
+administrative </body>
+</html>history of the "><input type="portion of the as part of the &nbsp;<a href="other countries">
+<div class="</span></span><In other words,display: block;control of the introduction of/>
+<meta name="as well as the in recent years
+	<div class="</div>
+	</div>
+inspired by thethe end of the compatible withbecame known as style="margin:.js"></script>< International there have beenGerman language style="color:#Communist Partyconsistent withborder="0" cell marginheight="the majority of" align="centerrelated to the many different Orthodox Churchsimilar to the />
+<link rel="swas one of the until his death})();
+</script>other languagescompared to theportions of thethe Netherlandsthe most commonbackground:url(argued that thescrolling="no" included in theNorth American the name of theinterpretationsthe traditionaldevelopment of frequently useda collection ofvery similar tosurrounding theexample of thisalign="center">would have beenimage_caption =attached to thesuggesting thatin the form of involved in theis derived fromnamed after theIntroduction torestrictions on style="width: can be used to the creation ofmost important information andresulted in thecollapse of theThis means thatelements of thewas replaced byanalysis of theinspiration forregarded as themost successfulknown as &quot;a comprehensiveHistory of the were consideredreturned to theare referred toUnsourced image>
+	<div class="consists of thestopPropagationinterest in theavailability ofappears to haveelectromagneticenableServices(function of theIt is important</script></div>function(){var relative to theas a result of the position ofFor example, in method="post" was followed by&amp;mdash; thethe applicationjs"></script>
+ul></div></div>after the deathwith respect tostyle="padding:is particularlydisplay:inline; type="submit" is divided into中文 (简体)responsabilidadadministracióninternacionalescorrespondienteउपयोगपूर्वहमारेलोगोंचुनावलेकिनसरकारपुलिसखोजेंचाहिएभेजेंशामिलहमारीजागरणबनानेकुमारब्लॉगमालिकमहिलापृष्ठबढ़तेभाजपाक्लिकट्रेनखिलाफदौरानमामलेमतदानबाजारविकासक्योंचाहतेपहुँचबतायासंवाददेखनेपिछलेविशेषराज्यउत्तरमुंबईदोनोंउपकरणपढ़ेंस्थितफिल्ममुख्यअच्छाछूटतीसंगीतजाएगाविभागघण्टेदूसरेदिनोंहत्यासेक्सगांधीविश्वरातेंदैट्सनक्शासामनेअदालतबिजलीपुरूषहिंदीमित्रकवितारुपयेस्थानकरोड़मुक्तयोजनाकृपयापोस्टघरेलूकार्यविचारसूचनामूल्यदेखेंहमेशास्कूलमैंनेतैयारजिसकेrss+xml" title="-type" content="title" content="at the same time.js"></script>
+<" method="post" </span></a></li>vertical-align:t/jquery.min.js">.click(function( style="padding-})();
+</script>
+</span><a href="<a href="http://); return false;text-decoration: scrolling="no" border-collapse:associated with Bahasa IndonesiaEnglish language<text xml:space=.gif" border="0"</body>
+</html>
+overflow:hidden;img src="http://addEventListenerresponsible for s.js"></script>
+/favicon.ico" />operating system" style="width:1target="_blank">State Universitytext-align:left;
+document.write(, including the around the world);
+</script>
+<" style="height:;overflow:hiddenmore informationan internationala member of the one of the firstcan be found in </div>
+		</div>
+display: none;">" />
+<link rel="
+  (function() {the 15th century.preventDefault(large number of Byzantine Empire.jpg|thumb|left|vast majority ofmajority of the  align="center">University Pressdominated by theSecond World Wardistribution of style="position:the rest of the characterized by rel="nofollow">derives from therather than the a combination ofstyle="width:100English-speakingcomputer scienceborder="0" alt="the existence ofDemocratic Party" style="margin-For this reason,.js"></script>
+	sByTagName(s)[0]js"></script>
+<.js"></script>
+link rel="icon" ' alt='' class='formation of theversions of the </a></div></div>/page>
+  <page>
+<div class="contbecame the firstbahasa Indonesiaenglish (simple)ΕλληνικάхрватскикомпанииявляетсяДобавитьчеловекаразвитияИнтернетОтветитьнапримеринтернеткоторогостраницыкачествеусловияхпроблемыполучитьявляютсянаиболеекомпаниявниманиесредстваالمواضيعالرئيسيةالانتقالمشاركاتكالسياراتالمكتوبةالسعوديةاحصائياتالعالميةالصوتياتالانترنتالتصاميمالإسلاميالمشاركةالمرئياتrobots" content="<div id="footer">the United States<img src="http://.jpg|right|thumb|.js"></script>
+<location.protocolframeborder="0" s" />
+<meta name="</a></div></div><font-weight:bold;&quot; and &quot;depending on the margin:0;padding:" rel="nofollow" President of the twentieth centuryevision>
+  </pageInternet Explorera.async = true;
+information about<div id="header">" action="http://<a href="https://<div id="content"</div>
+</div>
+<derived from the <img src='http://according to the 
+</body>
+</html>
+style="font-size:script language="Arial, Helvetica,</a><span class="</script><script political partiestd></tr></table><href="http://www.interpretation ofrel="stylesheet" document.write('<charset="utf-8">
+beginning of the revealed that thetelevision series" rel="nofollow"> target="_blank">claiming that thehttp%3A%2F%2Fwww.manifestations ofPrime Minister ofinfluenced by theclass="clearfix">/div>
+</div>
+
+three-dimensionalChurch of Englandof North Carolinasquare kilometres.addEventListenerdistinct from thecommonly known asPhonetic Alphabetdeclared that thecontrolled by theBenjamin Franklinrole-playing gamethe University ofin Western Europepersonal computerProject Gutenbergregardless of thehas been proposedtogether with the></li><li class="in some countriesmin.js"></script>of the populationofficial language<img src="images/identified by thenatural resourcesclassification ofcan be consideredquantum mechanicsNevertheless, themillion years ago</body>
+</html>Ελληνικά
+take advantage ofand, according toattributed to theMicrosoft Windowsthe first centuryunder the controldiv class="headershortly after thenotable exceptiontens of thousandsseveral differentaround the world.reaching militaryisolated from theopposition to thethe Old TestamentAfrican Americansinserted into theseparate from themetropolitan areamakes it possibleacknowledged thatarguably the mosttype="text/css">
+the InternationalAccording to the pe="text/css" />
+coincide with thetwo-thirds of theDuring this time,during the periodannounced that hethe internationaland more recentlybelieved that theconsciousness andformerly known assurrounded by thefirst appeared inoccasionally usedposition:absolute;" target="_blank" position:relative;text-align:center;jax/libs/jquery/1.background-color:#type="application/anguage" content="<meta http-equiv="Privacy Policy</a>e("%3Cscript src='" target="_blank">On the other hand,.jpg|thumb|right|2</div><div class="<div style="float:nineteenth century</body>
+</html>
+<img src="http://s;text-align:centerfont-weight: bold; According to the difference between" frameborder="0" " style="position:link href="http://html4/loose.dtd">
+during this period</td></tr></table>closely related tofor the first time;font-weight:bold;input type="text" <span style="font-onreadystatechange	<div class="cleardocument.location. For example, the a wide variety of <!DOCTYPE html>
+<&nbsp;&nbsp;&nbsp;"><a href="http://style="float:left;concerned with the=http%3A%2F%2Fwww.in popular culturetype="text/css" />it is possible to Harvard Universitytylesheet" href="/the main characterOxford University  name="keywords" cstyle="text-align:the United Kingdomfederal government<div style="margin depending on the description of the<div class="header.min.js"></script>destruction of theslightly differentin accordance withtelecommunicationsindicates that theshortly thereafterespecially in the European countriesHowever, there aresrc="http://staticsuggested that the" src="http://www.a large number of Telecommunications" rel="nofollow" tHoly Roman Emperoralmost exclusively" border="0" alt="Secretary of Stateculminating in theCIA World Factbookthe most importantanniversary of thestyle="background-<li><em><a href="/the Atlantic Oceanstrictly speaking,shortly before thedifferent types ofthe Ottoman Empire><img src="http://An Introduction toconsequence of thedeparture from theConfederate Statesindigenous peoplesProceedings of theinformation on thetheories have beeninvolvement in thedivided into threeadjacent countriesis responsible fordissolution of thecollaboration withwidely regarded ashis contemporariesfounding member ofDominican Republicgenerally acceptedthe possibility ofare also availableunder constructionrestoration of thethe general publicis almost entirelypasses through thehas been suggestedcomputer and videoGermanic languages according to the different from theshortly afterwardshref="https://www.recent developmentBoard of Directors<div class="search| <a href="http://In particular, theMultiple footnotesor other substancethousands of yearstranslation of the</div>
+</div>
+
+<a href="index.phpwas established inmin.js"></script>
+participate in thea strong influencestyle="margin-top:represented by thegraduated from theTraditionally, theElement("script");However, since the/div>
+</div>
+<div left; margin-left:protection against0; vertical-align:Unfortunately, thetype="image/x-icon/div>
+<div class=" class="clearfix"><div class="footer		</div>
+		</div>
+the motion pictureБългарскибългарскиФедерациинесколькосообщениесообщенияпрограммыОтправитьбесплатноматериалыпозволяетпоследниеразличныхпродукциипрограммаполностьюнаходитсяизбранноенаселенияизменениякатегорииАлександрद्वारामैनुअलप्रदानभारतीयअनुदेशहिन्दीइंडियादिल्लीअधिकारवीडियोचिट्ठेसमाचारजंक्शनदुनियाप्रयोगअनुसारऑनलाइनपार्टीशर्तोंलोकसभाफ़्लैशशर्तेंप्रदेशप्लेयरकेंद्रस्थितिउत्पादउन्हेंचिट्ठायात्राज्यादापुरानेजोड़ेंअनुवादश्रेणीशिक्षासरकारीसंग्रहपरिणामब्रांडबच्चोंउपलब्धमंत्रीसंपर्कउम्मीदमाध्यमसहायताशब्दोंमीडियाआईपीएलमोबाइलसंख्याआपरेशनअनुबंधबाज़ारनवीनतमप्रमुखप्रश्नपरिवारनुकसानसमर्थनआयोजितसोमवारالمشاركاتالمنتدياتالكمبيوترالمشاهداتعددالزوارعددالردودالإسلاميةالفوتوشوبالمسابقاتالمعلوماتالمسلسلاتالجرافيكسالاسلاميةالاتصالاتkeywords" content="w3.org/1999/xhtml"><a target="_blank" text/html; charset=" target="_blank"><table cellpadding="autocomplete="off" text-align: center;to last version by background-color: #" href="http://www./div></div><div id=<a href="#" class=""><img src="http://cript" src="http://
+<script language="//EN" "http://www.wencodeURIComponent(" href="javascript:<div class="contentdocument.write('<scposition: absolute;script src="http:// style="margin-top:.min.js"></script>
+</div>
+<div class="w3.org/1999/xhtml" 
+
+</body>
+</html>distinction between/" target="_blank"><link href="http://encoding="utf-8"?>
+w.addEventListener?action="http://www.icon" href="http:// style="background:type="text/css" />
+meta property="og:t<input type="text"  style="text-align:the development of tylesheet" type="tehtml; charset=utf-8is considered to betable width="100%" In addition to the contributed to the differences betweendevelopment of the It is important to </script>
+
+<script  style="font-size:1></span><span id=gbLibrary of Congress<img src="http://imEnglish translationAcademy of Sciencesdiv style="display:construction of the.getElementById(id)in conjunction withElement('script'); <meta property="og:Български
+ type="text" name=">Privacy Policy</a>administered by theenableSingleRequeststyle=&quot;margin:</div></div></div><><img src="http://i style=&quot;float:referred to as the total population ofin Washington, D.C. style="background-among other things,organization of theparticipated in thethe introduction ofidentified with thefictional character Oxford University misunderstanding ofThere are, however,stylesheet" href="/Columbia Universityexpanded to includeusually referred toindicating that thehave suggested thataffiliated with thecorrelation betweennumber of different></td></tr></table>Republic of Ireland
+</script>
+<script under the influencecontribution to theOfficial website ofheadquarters of thecentered around theimplications of thehave been developedFederal Republic ofbecame increasinglycontinuation of theNote, however, thatsimilar to that of capabilities of theaccordance with theparticipants in thefurther developmentunder the directionis often consideredhis younger brother</td></tr></table><a http-equiv="X-UA-physical propertiesof British Columbiahas been criticized(with the exceptionquestions about thepassing through the0" cellpadding="0" thousands of peopleredirects here. Forhave children under%3E%3C/script%3E"));<a href="http://www.<li><a href="http://site_name" content="text-decoration:nonestyle="display: none<meta http-equiv="X-new Date().getTime() type="image/x-icon"</span><span class="language="javascriptwindow.location.href<a href="javascript:-->
+<script type="t<a href='http://www.hortcut icon" href="</div>
+<div class="<script src="http://" rel="stylesheet" t</div>
+<script type=/a> <a href="http:// allowTransparency="X-UA-Compatible" conrelationship between
+</script>
+<script </a></li></ul></div>associated with the programming language</a><a href="http://</a></li><li class="form action="http://<div style="display:type="text" name="q"<table width="100%" background-position:" border="0" width="rel="shortcut icon" h6><ul><li><a href="  <meta http-equiv="css" media="screen" responsible for the " type="application/" style="background-html; charset=utf-8" allowtransparency="stylesheet" type="te
+<meta http-equiv="></span><span class="0" cellspacing="0">;
+</script>
+<script sometimes called thedoes not necessarilyFor more informationat the beginning of <!DOCTYPE html><htmlparticularly in the type="hidden" name="javascript:void(0);"effectiveness of the autocomplete="off" generally considered><input type="text" "></script>
+<scriptthroughout the worldcommon misconceptionassociation with the</div>
+</div>
+<div cduring his lifetime,corresponding to thetype="image/x-icon" an increasing numberdiplomatic relationsare often consideredmeta charset="utf-8" <input type="text" examples include the"><img src="http://iparticipation in thethe establishment of
+</div>
+<div class="&amp;nbsp;&amp;nbsp;to determine whetherquite different frommarked the beginningdistance between thecontributions to theconflict between thewidely considered towas one of the firstwith varying degreeshave speculated that(document.getElementparticipating in theoriginally developedeta charset="utf-8"> type="text/css" />
+interchangeably withmore closely relatedsocial and politicalthat would otherwiseperpendicular to thestyle type="text/csstype="submit" name="families residing indeveloping countriescomputer programmingeconomic developmentdetermination of thefor more informationon several occasionsportuguês (Europeu)УкраїнськаукраїнськаРоссийскойматериаловинформацииуправлениянеобходимоинформацияИнформацияРеспубликиколичествоинформациютерриториидостаточноالمتواجدونالاشتراكاتالاقتراحاتhtml; charset=UTF-8" setTimeout(function()display:inline-block;<input type="submit" type = 'text/javascri<img src="http://www." "http://www.w3.org/shortcut icon" href="" autocomplete="off" </a></div><div class=</a></li>
+<li class="css" type="text/css" <form action="http://xt/css" href="http://link rel="alternate" 
+<script type="text/ onclick="javascript:(new Date).getTime()}height="1" width="1" People's Republic of  <a href="http://www.text-decoration:underthe beginning of the </div>
+</div>
+</div>
+establishment of the </div></div></div></d#viewport{min-height:
+<script src="http://option><option value=often referred to as /option>
+<option valu<!DOCTYPE html>
+<!--[International Airport>
+<a href="http://www</a><a href="http://wภาษาไทยქართული正體中文 (繁體)निर्देशडाउनलोडक्षेत्रजानकारीसंबंधितस्थापनास्वीकारसंस्करणसामग्रीचिट्ठोंविज्ञानअमेरिकाविभिन्नगाडियाँक्योंकिसुरक्षापहुँचतीप्रबंधनटिप्पणीक्रिकेटप्रारंभप्राप्तमालिकोंरफ़्तारनिर्माणलिमिटेडdescription" content="document.location.prot.getElementsByTagName(<!DOCTYPE html>
+<html <meta charset="utf-8">:url" content="http://.css" rel="stylesheet"style type="text/css">type="text/css" href="w3.org/1999/xhtml" xmltype="text/javascript" method="get" action="link rel="stylesheet"  = document.getElementtype="image/x-icon" />cellpadding="0" cellsp.css" type="text/css" </a></li><li><a href="" width="1" height="1""><a href="http://www.style="display:none;">alternate" type="appli-//W3C//DTD XHTML 1.0 ellspacing="0" cellpad type="hidden" value="/a>&nbsp;<span role="s
+<input type="hidden" language="JavaScript"  document.getElementsBg="0" cellspacing="0" ype="text/css" media="type='text/javascript'with the exception of ype="text/css" rel="st height="1" width="1" ='+encodeURIComponent(<link rel="alternate" 
+body, tr, input, textmeta name="robots" conmethod="post" action=">
+<a href="http://www.css" rel="stylesheet" </div></div><div classlanguage="javascript">aria-hidden="true">·<ript" type="text/javasl=0;})();
+(function(){background-image: url(/a></li><li><a href="h		<li><a href="http://ator" aria-hidden="tru> <a href="http://www.language="javascript" /option>
+<option value/div></div><div class=rator" aria-hidden="tre=(new Date).getTime()português (do Brasil)организациивозможностьобразованиярегистрациивозможностиобязательна<!DOCTYPE html PUBLIC "nt-Type" content="text/<meta http-equiv="Conteransitional//EN" "http:<html xmlns="http://www-//W3C//DTD XHTML 1.0 TDTD/xhtml1-transitional//www.w3.org/TR/xhtml1/pe = 'text/javascript';<meta name="descriptionparentNode.insertBefore<input type="hidden" najs" type="text/javascri(document).ready(functiscript type="text/javasimage" content="http://UA-Compatible" content=tml; charset=utf-8" />
+link rel="shortcut icon<link rel="stylesheet" </script>
+<script type== document.createElemen<a target="_blank" href= document.getElementsBinput type="text" name=a.type = 'text/javascrinput type="hidden" namehtml; charset=utf-8" />dtd">
+<html xmlns="http-//W3C//DTD HTML 4.01 TentsByTagName('script')input type="hidden" nam<script type="text/javas" style="display:none;">document.getElementById(=document.createElement(' type='text/javascript'input type="text" name="d.getElementsByTagName(snical" href="http://www.C//DTD HTML 4.01 Transit<style type="text/css">
+
+<style type="text/css">ional.dtd">
+<html xmlns=http-equiv="Content-Typeding="0" cellspacing="0"html; charset=utf-8" />
+ style="display:none;"><<li><a href="http://www. type='text/javascript'>деятельностисоответствиипроизводствабезопасностиपुस्तिकाकांग्रेसउन्होंनेविधानसभाफिक्सिंगसुरक्षितकॉपीराइटविज्ञापनकार्रवाईसक्रियता
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.bin.br b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.bin.br
new file mode 100755
index 0000000000..6a55d420a8
Binary files /dev/null and b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.bin.br differ
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.c
new file mode 100755
index 0000000000..64822a381b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.c
@@ -0,0 +1,5905 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./dictionary.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifndef BROTLI_EXTERNAL_DICTIONARY_DATA
+static const uint8_t kBrotliDictionaryData[] =
+{
+116,105,109,101,100,111,119,110,108,105,102,101,108,101,102,116,98,97,99,107,99,
+111,100,101,100,97,116,97,115,104,111,119,111,110,108,121,115,105,116,101,99,105
+,116,121,111,112,101,110,106,117,115,116,108,105,107,101,102,114,101,101,119,111
+,114,107,116,101,120,116,121,101,97,114,111,118,101,114,98,111,100,121,108,111,
+118,101,102,111,114,109,98,111,111,107,112,108,97,121,108,105,118,101,108,105,
+110,101,104,101,108,112,104,111,109,101,115,105,100,101,109,111,114,101,119,111,
+114,100,108,111,110,103,116,104,101,109,118,105,101,119,102,105,110,100,112,97,
+103,101,100,97,121,115,102,117,108,108,104,101,97,100,116,101,114,109,101,97,99,
+104,97,114,101,97,102,114,111,109,116,114,117,101,109,97,114,107,97,98,108,101,
+117,112,111,110,104,105,103,104,100,97,116,101,108,97,110,100,110,101,119,115,
+101,118,101,110,110,101,120,116,99,97,115,101,98,111,116,104,112,111,115,116,117
+,115,101,100,109,97,100,101,104,97,110,100,104,101,114,101,119,104,97,116,110,97
+,109,101,76,105,110,107,98,108,111,103,115,105,122,101,98,97,115,101,104,101,108
+,100,109,97,107,101,109,97,105,110,117,115,101,114,39,41,32,43,104,111,108,100,
+101,110,100,115,119,105,116,104,78,101,119,115,114,101,97,100,119,101,114,101,
+115,105,103,110,116,97,107,101,104,97,118,101,103,97,109,101,115,101,101,110,99,
+97,108,108,112,97,116,104,119,101,108,108,112,108,117,115,109,101,110,117,102,
+105,108,109,112,97,114,116,106,111,105,110,116,104,105,115,108,105,115,116,103,
+111,111,100,110,101,101,100,119,97,121,115,119,101,115,116,106,111,98,115,109,
+105,110,100,97,108,115,111,108,111,103,111,114,105,99,104,117,115,101,115,108,97
+,115,116,116,101,97,109,97,114,109,121,102,111,111,100,107,105,110,103,119,105,
+108,108,101,97,115,116,119,97,114,100,98,101,115,116,102,105,114,101,80,97,103,
+101,107,110,111,119,97,119,97,121,46,112,110,103,109,111,118,101,116,104,97,110,
+108,111,97,100,103,105,118,101,115,101,108,102,110,111,116,101,109,117,99,104,
+102,101,101,100,109,97,110,121,114,111,99,107,105,99,111,110,111,110,99,101,108,
+111,111,107,104,105,100,101,100,105,101,100,72,111,109,101,114,117,108,101,104,
+111,115,116,97,106,97,120,105,110,102,111,99,108,117,98,108,97,119,115,108,101,
+115,115,104,97,108,102,115,111,109,101,115,117,99,104,122,111,110,101,49,48,48,
+37,111,110,101,115,99,97,114,101,84,105,109,101,114,97,99,101,98,108,117,101,102
+,111,117,114,119,101,101,107,102,97,99,101,104,111,112,101,103,97,118,101,104,97
+,114,100,108,111,115,116,119,104,101,110,112,97,114,107,107,101,112,116,112,97,
+115,115,115,104,105,112,114,111,111,109,72,84,77,76,112,108,97,110,84,121,112,
+101,100,111,110,101,115,97,118,101,107,101,101,112,102,108,97,103,108,105,110,
+107,115,111,108,100,102,105,118,101,116,111,111,107,114,97,116,101,116,111,119,
+110,106,117,109,112,116,104,117,115,100,97,114,107,99,97,114,100,102,105,108,101
+,102,101,97,114,115,116,97,121,107,105,108,108,116,104,97,116,102,97,108,108,97,
+117,116,111,101,118,101,114,46,99,111,109,116,97,108,107,115,104,111,112,118,111
+,116,101,100,101,101,112,109,111,100,101,114,101,115,116,116,117,114,110,98,111,
+114,110,98,97,110,100,102,101,108,108,114,111,115,101,117,114,108,40,115,107,105
+,110,114,111,108,101,99,111,109,101,97,99,116,115,97,103,101,115,109,101,101,116
+,103,111,108,100,46,106,112,103,105,116,101,109,118,97,114,121,102,101,108,116,
+116,104,101,110,115,101,110,100,100,114,111,112,86,105,101,119,99,111,112,121,49
+,46,48,34,60,47,97,62,115,116,111,112,101,108,115,101,108,105,101,115,116,111,
+117,114,112,97,99,107,46,103,105,102,112,97,115,116,99,115,115,63,103,114,97,121
+,109,101,97,110,38,103,116,59,114,105,100,101,115,104,111,116,108,97,116,101,115
+,97,105,100,114,111,97,100,118,97,114,32,102,101,101,108,106,111,104,110,114,105
+,99,107,112,111,114,116,102,97,115,116,39,85,65,45,100,101,97,100,60,47,98,62,
+112,111,111,114,98,105,108,108,116,121,112,101,85,46,83,46,119,111,111,100,109,
+117,115,116,50,112,120,59,73,110,102,111,114,97,110,107,119,105,100,101,119,97,
+110,116,119,97,108,108,108,101,97,100,91,48,93,59,112,97,117,108,119,97,118,101,
+115,117,114,101,36,40,39,35,119,97,105,116,109,97,115,115,97,114,109,115,103,111
+,101,115,103,97,105,110,108,97,110,103,112,97,105,100,33,45,45,32,108,111,99,107
+,117,110,105,116,114,111,111,116,119,97,108,107,102,105,114,109,119,105,102,101,
+120,109,108,34,115,111,110,103,116,101,115,116,50,48,112,120,107,105,110,100,114
+,111,119,115,116,111,111,108,102,111,110,116,109,97,105,108,115,97,102,101,115,
+116,97,114,109,97,112,115,99,111,114,101,114,97,105,110,102,108,111,119,98,97,98
+,121,115,112,97,110,115,97,121,115,52,112,120,59,54,112,120,59,97,114,116,115,
+102,111,111,116,114,101,97,108,119,105,107,105,104,101,97,116,115,116,101,112,
+116,114,105,112,111,114,103,47,108,97,107,101,119,101,97,107,116,111,108,100,70,
+111,114,109,99,97,115,116,102,97,110,115,98,97,110,107,118,101,114,121,114,117,
+110,115,106,117,108,121,116,97,115,107,49,112,120,59,103,111,97,108,103,114,101,
+119,115,108,111,119,101,100,103,101,105,100,61,34,115,101,116,115,53,112,120,59,
+46,106,115,63,52,48,112,120,105,102,32,40,115,111,111,110,115,101,97,116,110,111
+,110,101,116,117,98,101,122,101,114,111,115,101,110,116,114,101,101,100,102,97,
+99,116,105,110,116,111,103,105,102,116,104,97,114,109,49,56,112,120,99,97,109,
+101,104,105,108,108,98,111,108,100,122,111,111,109,118,111,105,100,101,97,115,
+121,114,105,110,103,102,105,108,108,112,101,97,107,105,110,105,116,99,111,115,
+116,51,112,120,59,106,97,99,107,116,97,103,115,98,105,116,115,114,111,108,108,
+101,100,105,116,107,110,101,119,110,101,97,114,60,33,45,45,103,114,111,119,74,83
+,79,78,100,117,116,121,78,97,109,101,115,97,108,101,121,111,117,32,108,111,116,
+115,112,97,105,110,106,97,122,122,99,111,108,100,101,121,101,115,102,105,115,104
+,119,119,119,46,114,105,115,107,116,97,98,115,112,114,101,118,49,48,112,120,114,
+105,115,101,50,53,112,120,66,108,117,101,100,105,110,103,51,48,48,44,98,97,108,
+108,102,111,114,100,101,97,114,110,119,105,108,100,98,111,120,46,102,97,105,114,
+108,97,99,107,118,101,114,115,112,97,105,114,106,117,110,101,116,101,99,104,105,
+102,40,33,112,105,99,107,101,118,105,108,36,40,34,35,119,97,114,109,108,111,114,
+100,100,111,101,115,112,117,108,108,44,48,48,48,105,100,101,97,100,114,97,119,
+104,117,103,101,115,112,111,116,102,117,110,100,98,117,114,110,104,114,101,102,
+99,101,108,108,107,101,121,115,116,105,99,107,104,111,117,114,108,111,115,115,
+102,117,101,108,49,50,112,120,115,117,105,116,100,101,97,108,82,83,83,34,97,103,
+101,100,103,114,101,121,71,69,84,34,101,97,115,101,97,105,109,115,103,105,114,
+108,97,105,100,115,56,112,120,59,110,97,118,121,103,114,105,100,116,105,112,115,
+35,57,57,57,119,97,114,115,108,97,100,121,99,97,114,115,41,59,32,125,112,104,112
+,63,104,101,108,108,116,97,108,108,119,104,111,109,122,104,58,229,42,47,13,10,32
+,49,48,48,104,97,108,108,46,10,10,65,55,112,120,59,112,117,115,104,99,104,97,116
+,48,112,120,59,99,114,101,119,42,47,60,47,104,97,115,104,55,53,112,120,102,108,
+97,116,114,97,114,101,32,38,38,32,116,101,108,108,99,97,109,112,111,110,116,111,
+108,97,105,100,109,105,115,115,115,107,105,112,116,101,110,116,102,105,110,101,
+109,97,108,101,103,101,116,115,112,108,111,116,52,48,48,44,13,10,13,10,99,111,
+111,108,102,101,101,116,46,112,104,112,60,98,114,62,101,114,105,99,109,111,115,
+116,103,117,105,100,98,101,108,108,100,101,115,99,104,97,105,114,109,97,116,104,
+97,116,111,109,47,105,109,103,38,35,56,50,108,117,99,107,99,101,110,116,48,48,48
+,59,116,105,110,121,103,111,110,101,104,116,109,108,115,101,108,108,100,114,117,
+103,70,82,69,69,110,111,100,101,110,105,99,107,63,105,100,61,108,111,115,101,110
+,117,108,108,118,97,115,116,119,105,110,100,82,83,83,32,119,101,97,114,114,101,
+108,121,98,101,101,110,115,97,109,101,100,117,107,101,110,97,115,97,99,97,112,
+101,119,105,115,104,103,117,108,102,84,50,51,58,104,105,116,115,115,108,111,116,
+103,97,116,101,107,105,99,107,98,108,117,114,116,104,101,121,49,53,112,120,39,39
+,41,59,41,59,34,62,109,115,105,101,119,105,110,115,98,105,114,100,115,111,114,
+116,98,101,116,97,115,101,101,107,84,49,56,58,111,114,100,115,116,114,101,101,
+109,97,108,108,54,48,112,120,102,97,114,109,226,128,153,115,98,111,121,115,91,48
+,93,46,39,41,59,34,80,79,83,84,98,101,97,114,107,105,100,115,41,59,125,125,109,
+97,114,121,116,101,110,100,40,85,75,41,113,117,97,100,122,104,58,230,45,115,105,
+122,45,45,45,45,112,114,111,112,39,41,59,13,108,105,102,116,84,49,57,58,118,105,
+99,101,97,110,100,121,100,101,98,116,62,82,83,83,112,111,111,108,110,101,99,107,
+98,108,111,119,84,49,54,58,100,111,111,114,101,118,97,108,84,49,55,58,108,101,
+116,115,102,97,105,108,111,114,97,108,112,111,108,108,110,111,118,97,99,111,108,
+115,103,101,110,101,32,226,128,148,115,111,102,116,114,111,109,101,116,105,108,
+108,114,111,115,115,60,104,51,62,112,111,117,114,102,97,100,101,112,105,110,107,
+60,116,114,62,109,105,110,105,41,124,33,40,109,105,110,101,122,104,58,232,98,97,
+114,115,104,101,97,114,48,48,41,59,109,105,108,107,32,45,45,62,105,114,111,110,
+102,114,101,100,100,105,115,107,119,101,110,116,115,111,105,108,112,117,116,115,
+47,106,115,47,104,111,108,121,84,50,50,58,73,83,66,78,84,50,48,58,97,100,97,109,
+115,101,101,115,60,104,50,62,106,115,111,110,39,44,32,39,99,111,110,116,84,50,49
+,58,32,82,83,83,108,111,111,112,97,115,105,97,109,111,111,110,60,47,112,62,115,
+111,117,108,76,73,78,69,102,111,114,116,99,97,114,116,84,49,52,58,60,104,49,62,
+56,48,112,120,33,45,45,60,57,112,120,59,84,48,52,58,109,105,107,101,58,52,54,90,
+110,105,99,101,105,110,99,104,89,111,114,107,114,105,99,101,122,104,58,228,39,41
+,41,59,112,117,114,101,109,97,103,101,112,97,114,97,116,111,110,101,98,111,110,
+100,58,51,55,90,95,111,102,95,39,93,41,59,48,48,48,44,122,104,58,231,116,97,110,
+107,121,97,114,100,98,111,119,108,98,117,115,104,58,53,54,90,74,97,118,97,51,48,
+112,120,10,124,125,10,37,67,51,37,58,51,52,90,106,101,102,102,69,88,80,73,99,97,
+115,104,118,105,115,97,103,111,108,102,115,110,111,119,122,104,58,233,113,117,
+101,114,46,99,115,115,115,105,99,107,109,101,97,116,109,105,110,46,98,105,110,
+100,100,101,108,108,104,105,114,101,112,105,99,115,114,101,110,116,58,51,54,90,
+72,84,84,80,45,50,48,49,102,111,116,111,119,111,108,102,69,78,68,32,120,98,111,
+120,58,53,52,90,66,79,68,89,100,105,99,107,59,10,125,10,101,120,105,116,58,51,53
+,90,118,97,114,115,98,101,97,116,39,125,41,59,100,105,101,116,57,57,57,59,97,110
+,110,101,125,125,60,47,91,105,93,46,76,97,110,103,107,109,194,178,119,105,114,
+101,116,111,121,115,97,100,100,115,115,101,97,108,97,108,101,120,59,10,9,125,101
+,99,104,111,110,105,110,101,46,111,114,103,48,48,53,41,116,111,110,121,106,101,
+119,115,115,97,110,100,108,101,103,115,114,111,111,102,48,48,48,41,32,50,48,48,
+119,105,110,101,103,101,97,114,100,111,103,115,98,111,111,116,103,97,114,121,99,
+117,116,115,116,121,108,101,116,101,109,112,116,105,111,110,46,120,109,108,99,
+111,99,107,103,97,110,103,36,40,39,46,53,48,112,120,80,104,46,68,109,105,115,99,
+97,108,97,110,108,111,97,110,100,101,115,107,109,105,108,101,114,121,97,110,117,
+110,105,120,100,105,115,99,41,59,125,10,100,117,115,116,99,108,105,112,41,46,10,
+10,55,48,112,120,45,50,48,48,68,86,68,115,55,93,62,60,116,97,112,101,100,101,109
+,111,105,43,43,41,119,97,103,101,101,117,114,111,112,104,105,108,111,112,116,115
+,104,111,108,101,70,65,81,115,97,115,105,110,45,50,54,84,108,97,98,115,112,101,
+116,115,85,82,76,32,98,117,108,107,99,111,111,107,59,125,13,10,72,69,65,68,91,48
+,93,41,97,98,98,114,106,117,97,110,40,49,57,56,108,101,115,104,116,119,105,110,
+60,47,105,62,115,111,110,121,103,117,121,115,102,117,99,107,112,105,112,101,124,
+45,10,33,48,48,50,41,110,100,111,119,91,49,93,59,91,93,59,10,76,111,103,32,115,
+97,108,116,13,10,9,9,98,97,110,103,116,114,105,109,98,97,116,104,41,123,13,10,48
+,48,112,120,10,125,41,59,107,111,58,236,102,101,101,115,97,100,62,13,115,58,47,
+47,32,91,93,59,116,111,108,108,112,108,117,103,40,41,123,10,123,13,10,32,46,106,
+115,39,50,48,48,112,100,117,97,108,98,111,97,116,46,74,80,71,41,59,10,125,113,
+117,111,116,41,59,10,10,39,41,59,10,13,10,125,13,50,48,49,52,50,48,49,53,50,48,
+49,54,50,48,49,55,50,48,49,56,50,48,49,57,50,48,50,48,50,48,50,49,50,48,50,50,50
+,48,50,51,50,48,50,52,50,48,50,53,50,48,50,54,50,48,50,55,50,48,50,56,50,48,50,
+57,50,48,51,48,50,48,51,49,50,48,51,50,50,48,51,51,50,48,51,52,50,48,51,53,50,48
+,51,54,50,48,51,55,50,48,49,51,50,48,49,50,50,48,49,49,50,48,49,48,50,48,48,57,
+50,48,48,56,50,48,48,55,50,48,48,54,50,48,48,53,50,48,48,52,50,48,48,51,50,48,48
+,50,50,48,48,49,50,48,48,48,49,57,57,57,49,57,57,56,49,57,57,55,49,57,57,54,49,
+57,57,53,49,57,57,52,49,57,57,51,49,57,57,50,49,57,57,49,49,57,57,48,49,57,56,57
+,49,57,56,56,49,57,56,55,49,57,56,54,49,57,56,53,49,57,56,52,49,57,56,51,49,57,
+56,50,49,57,56,49,49,57,56,48,49,57,55,57,49,57,55,56,49,57,55,55,49,57,55,54,49
+,57,55,53,49,57,55,52,49,57,55,51,49,57,55,50,49,57,55,49,49,57,55,48,49,57,54,
+57,49,57,54,56,49,57,54,55,49,57,54,54,49,57,54,53,49,57,54,52,49,57,54,51,49,57
+,54,50,49,57,54,49,49,57,54,48,49,57,53,57,49,57,53,56,49,57,53,55,49,57,53,54,
+49,57,53,53,49,57,53,52,49,57,53,51,49,57,53,50,49,57,53,49,49,57,53,48,49,48,48
+,48,49,48,50,52,49,51,57,52,48,48,48,48,57,57,57,57,99,111,109,111,109,195,161,
+115,101,115,116,101,101,115,116,97,112,101,114,111,116,111,100,111,104,97,99,101
+,99,97,100,97,97,195,177,111,98,105,101,110,100,195,173,97,97,115,195,173,118,
+105,100,97,99,97,115,111,111,116,114,111,102,111,114,111,115,111,108,111,111,116
+,114,97,99,117,97,108,100,105,106,111,115,105,100,111,103,114,97,110,116,105,112
+,111,116,101,109,97,100,101,98,101,97,108,103,111,113,117,195,169,101,115,116,
+111,110,97,100,97,116,114,101,115,112,111,99,111,99,97,115,97,98,97,106,111,116,
+111,100,97,115,105,110,111,97,103,117,97,112,117,101,115,117,110,111,115,97,110,
+116,101,100,105,99,101,108,117,105,115,101,108,108,97,109,97,121,111,122,111,110
+,97,97,109,111,114,112,105,115,111,111,98,114,97,99,108,105,99,101,108,108,111,
+100,105,111,115,104,111,114,97,99,97,115,105,208,183,208,176,208,189,208,176,208
+,190,208,188,209,128,208,176,209,128,209,131,209,130,208,176,208,189,208,181,208
+,191,208,190,208,190,209,130,208,184,208,183,208,189,208,190,208,180,208,190,209
+,130,208,190,208,182,208,181,208,190,208,189,208,184,209,133,208,157,208,176,208
+,181,208,181,208,177,209,139,208,188,209,139,208,146,209,139,209,129,208,190,208
+,178,209,139,208,178,208,190,208,157,208,190,208,190,208,177,208,159,208,190,208
+,187,208,184,208,189,208,184,208,160,208,164,208,157,208,181,208,156,209,139,209
+,130,209,139,208,158,208,189,208,184,208,188,208,180,208,176,208,151,208,176,208
+,148,208,176,208,157,209,131,208,158,208,177,209,130,208,181,208,152,208,183,208
+,181,208,185,208,189,209,131,208,188,208,188,208,162,209,139,209,131,208,182,217
+,129,217,138,216,163,217,134,217,133,216,167,217,133,216,185,217,131,217,132,216
+,163,217,136,216,177,216,175,217,138,216,167,217,129,217,137,217,135,217,136,217
+,132,217,133,217,132,217,131,216,167,217,136,217,132,217,135,216,168,216,179,216
+,167,217,132,216,165,217,134,217,135,217,138,216,163,217,138,217,130,216,175,217
+,135,217,132,216,171,217,133,216,168,217,135,217,132,217,136,217,132,217,138,216
+,168,217,132,216,167,217,138,216,168,217,131,216,180,217,138,216,167,217,133,216
+,163,217,133,217,134,216,170,216,168,217,138,217,132,217,134,216,173,216,168,217
+,135,217,133,217,133,216,180,217,136,216,180,102,105,114,115,116,118,105,100,101
+,111,108,105,103,104,116,119,111,114,108,100,109,101,100,105,97,119,104,105,116,
+101,99,108,111,115,101,98,108,97,99,107,114,105,103,104,116,115,109,97,108,108,
+98,111,111,107,115,112,108,97,99,101,109,117,115,105,99,102,105,101,108,100,111,
+114,100,101,114,112,111,105,110,116,118,97,108,117,101,108,101,118,101,108,116,
+97,98,108,101,98,111,97,114,100,104,111,117,115,101,103,114,111,117,112,119,111,
+114,107,115,121,101,97,114,115,115,116,97,116,101,116,111,100,97,121,119,97,116,
+101,114,115,116,97,114,116,115,116,121,108,101,100,101,97,116,104,112,111,119,
+101,114,112,104,111,110,101,110,105,103,104,116,101,114,114,111,114,105,110,112,
+117,116,97,98,111,117,116,116,101,114,109,115,116,105,116,108,101,116,111,111,
+108,115,101,118,101,110,116,108,111,99,97,108,116,105,109,101,115,108,97,114,103
+,101,119,111,114,100,115,103,97,109,101,115,115,104,111,114,116,115,112,97,99,
+101,102,111,99,117,115,99,108,101,97,114,109,111,100,101,108,98,108,111,99,107,
+103,117,105,100,101,114,97,100,105,111,115,104,97,114,101,119,111,109,101,110,97
+,103,97,105,110,109,111,110,101,121,105,109,97,103,101,110,97,109,101,115,121,
+111,117,110,103,108,105,110,101,115,108,97,116,101,114,99,111,108,111,114,103,
+114,101,101,110,102,114,111,110,116,38,97,109,112,59,119,97,116,99,104,102,111,
+114,99,101,112,114,105,99,101,114,117,108,101,115,98,101,103,105,110,97,102,116,
+101,114,118,105,115,105,116,105,115,115,117,101,97,114,101,97,115,98,101,108,111
+,119,105,110,100,101,120,116,111,116,97,108,104,111,117,114,115,108,97,98,101,
+108,112,114,105,110,116,112,114,101,115,115,98,117,105,108,116,108,105,110,107,
+115,115,112,101,101,100,115,116,117,100,121,116,114,97,100,101,102,111,117,110,
+100,115,101,110,115,101,117,110,100,101,114,115,104,111,119,110,102,111,114,109,
+115,114,97,110,103,101,97,100,100,101,100,115,116,105,108,108,109,111,118,101,
+100,116,97,107,101,110,97,98,111,118,101,102,108,97,115,104,102,105,120,101,100,
+111,102,116,101,110,111,116,104,101,114,118,105,101,119,115,99,104,101,99,107,
+108,101,103,97,108,114,105,118,101,114,105,116,101,109,115,113,117,105,99,107,
+115,104,97,112,101,104,117,109,97,110,101,120,105,115,116,103,111,105,110,103,
+109,111,118,105,101,116,104,105,114,100,98,97,115,105,99,112,101,97,99,101,115,
+116,97,103,101,119,105,100,116,104,108,111,103,105,110,105,100,101,97,115,119,
+114,111,116,101,112,97,103,101,115,117,115,101,114,115,100,114,105,118,101,115,
+116,111,114,101,98,114,101,97,107,115,111,117,116,104,118,111,105,99,101,115,105
+,116,101,115,109,111,110,116,104,119,104,101,114,101,98,117,105,108,100,119,104,
+105,99,104,101,97,114,116,104,102,111,114,117,109,116,104,114,101,101,115,112,
+111,114,116,112,97,114,116,121,67,108,105,99,107,108,111,119,101,114,108,105,118
+,101,115,99,108,97,115,115,108,97,121,101,114,101,110,116,114,121,115,116,111,
+114,121,117,115,97,103,101,115,111,117,110,100,99,111,117,114,116,121,111,117,
+114,32,98,105,114,116,104,112,111,112,117,112,116,121,112,101,115,97,112,112,108
+,121,73,109,97,103,101,98,101,105,110,103,117,112,112,101,114,110,111,116,101,
+115,101,118,101,114,121,115,104,111,119,115,109,101,97,110,115,101,120,116,114,
+97,109,97,116,99,104,116,114,97,99,107,107,110,111,119,110,101,97,114,108,121,98
+,101,103,97,110,115,117,112,101,114,112,97,112,101,114,110,111,114,116,104,108,
+101,97,114,110,103,105,118,101,110,110,97,109,101,100,101,110,100,101,100,84,101
+,114,109,115,112,97,114,116,115,71,114,111,117,112,98,114,97,110,100,117,115,105
+,110,103,119,111,109,97,110,102,97,108,115,101,114,101,97,100,121,97,117,100,105
+,111,116,97,107,101,115,119,104,105,108,101,46,99,111,109,47,108,105,118,101,100
+,99,97,115,101,115,100,97,105,108,121,99,104,105,108,100,103,114,101,97,116,106,
+117,100,103,101,116,104,111,115,101,117,110,105,116,115,110,101,118,101,114,98,
+114,111,97,100,99,111,97,115,116,99,111,118,101,114,97,112,112,108,101,102,105,
+108,101,115,99,121,99,108,101,115,99,101,110,101,112,108,97,110,115,99,108,105,
+99,107,119,114,105,116,101,113,117,101,101,110,112,105,101,99,101,101,109,97,105
+,108,102,114,97,109,101,111,108,100,101,114,112,104,111,116,111,108,105,109,105,
+116,99,97,99,104,101,99,105,118,105,108,115,99,97,108,101,101,110,116,101,114,
+116,104,101,109,101,116,104,101,114,101,116,111,117,99,104,98,111,117,110,100,
+114,111,121,97,108,97,115,107,101,100,119,104,111,108,101,115,105,110,99,101,115
+,116,111,99,107,32,110,97,109,101,102,97,105,116,104,104,101,97,114,116,101,109,
+112,116,121,111,102,102,101,114,115,99,111,112,101,111,119,110,101,100,109,105,
+103,104,116,97,108,98,117,109,116,104,105,110,107,98,108,111,111,100,97,114,114,
+97,121,109,97,106,111,114,116,114,117,115,116,99,97,110,111,110,117,110,105,111,
+110,99,111,117,110,116,118,97,108,105,100,115,116,111,110,101,83,116,121,108,101
+,76,111,103,105,110,104,97,112,112,121,111,99,99,117,114,108,101,102,116,58,102,
+114,101,115,104,113,117,105,116,101,102,105,108,109,115,103,114,97,100,101,110,
+101,101,100,115,117,114,98,97,110,102,105,103,104,116,98,97,115,105,115,104,111,
+118,101,114,97,117,116,111,59,114,111,117,116,101,46,104,116,109,108,109,105,120
+,101,100,102,105,110,97,108,89,111,117,114,32,115,108,105,100,101,116,111,112,
+105,99,98,114,111,119,110,97,108,111,110,101,100,114,97,119,110,115,112,108,105,
+116,114,101,97,99,104,82,105,103,104,116,100,97,116,101,115,109,97,114,99,104,
+113,117,111,116,101,103,111,111,100,115,76,105,110,107,115,100,111,117,98,116,97
+,115,121,110,99,116,104,117,109,98,97,108,108,111,119,99,104,105,101,102,121,111
+,117,116,104,110,111,118,101,108,49,48,112,120,59,115,101,114,118,101,117,110,
+116,105,108,104,97,110,100,115,67,104,101,99,107,83,112,97,99,101,113,117,101,
+114,121,106,97,109,101,115,101,113,117,97,108,116,119,105,99,101,48,44,48,48,48,
+83,116,97,114,116,112,97,110,101,108,115,111,110,103,115,114,111,117,110,100,101
+,105,103,104,116,115,104,105,102,116,119,111,114,116,104,112,111,115,116,115,108
+,101,97,100,115,119,101,101,107,115,97,118,111,105,100,116,104,101,115,101,109,
+105,108,101,115,112,108,97,110,101,115,109,97,114,116,97,108,112,104,97,112,108,
+97,110,116,109,97,114,107,115,114,97,116,101,115,112,108,97,121,115,99,108,97,
+105,109,115,97,108,101,115,116,101,120,116,115,115,116,97,114,115,119,114,111,
+110,103,60,47,104,51,62,116,104,105,110,103,46,111,114,103,47,109,117,108,116,
+105,104,101,97,114,100,80,111,119,101,114,115,116,97,110,100,116,111,107,101,110
+,115,111,108,105,100,40,116,104,105,115,98,114,105,110,103,115,104,105,112,115,
+115,116,97,102,102,116,114,105,101,100,99,97,108,108,115,102,117,108,108,121,102
+,97,99,116,115,97,103,101,110,116,84,104,105,115,32,47,47,45,45,62,97,100,109,
+105,110,101,103,121,112,116,69,118,101,110,116,49,53,112,120,59,69,109,97,105,
+108,116,114,117,101,34,99,114,111,115,115,115,112,101,110,116,98,108,111,103,115
+,98,111,120,34,62,110,111,116,101,100,108,101,97,118,101,99,104,105,110,97,115,
+105,122,101,115,103,117,101,115,116,60,47,104,52,62,114,111,98,111,116,104,101,
+97,118,121,116,114,117,101,44,115,101,118,101,110,103,114,97,110,100,99,114,105,
+109,101,115,105,103,110,115,97,119,97,114,101,100,97,110,99,101,112,104,97,115,
+101,62,60,33,45,45,101,110,95,85,83,38,35,51,57,59,50,48,48,112,120,95,110,97,
+109,101,108,97,116,105,110,101,110,106,111,121,97,106,97,120,46,97,116,105,111,
+110,115,109,105,116,104,85,46,83,46,32,104,111,108,100,115,112,101,116,101,114,
+105,110,100,105,97,110,97,118,34,62,99,104,97,105,110,115,99,111,114,101,99,111,
+109,101,115,100,111,105,110,103,112,114,105,111,114,83,104,97,114,101,49,57,57,
+48,115,114,111,109,97,110,108,105,115,116,115,106,97,112,97,110,102,97,108,108,
+115,116,114,105,97,108,111,119,110,101,114,97,103,114,101,101,60,47,104,50,62,97
+,98,117,115,101,97,108,101,114,116,111,112,101,114,97,34,45,47,47,87,99,97,114,
+100,115,104,105,108,108,115,116,101,97,109,115,80,104,111,116,111,116,114,117,
+116,104,99,108,101,97,110,46,112,104,112,63,115,97,105,110,116,109,101,116,97,
+108,108,111,117,105,115,109,101,97,110,116,112,114,111,111,102,98,114,105,101,
+102,114,111,119,34,62,103,101,110,114,101,116,114,117,99,107,108,111,111,107,115
+,86,97,108,117,101,70,114,97,109,101,46,110,101,116,47,45,45,62,10,60,116,114,
+121,32,123,10,118,97,114,32,109,97,107,101,115,99,111,115,116,115,112,108,97,105
+,110,97,100,117,108,116,113,117,101,115,116,116,114,97,105,110,108,97,98,111,114
+,104,101,108,112,115,99,97,117,115,101,109,97,103,105,99,109,111,116,111,114,116
+,104,101,105,114,50,53,48,112,120,108,101,97,115,116,115,116,101,112,115,67,111,
+117,110,116,99,111,117,108,100,103,108,97,115,115,115,105,100,101,115,102,117,
+110,100,115,104,111,116,101,108,97,119,97,114,100,109,111,117,116,104,109,111,
+118,101,115,112,97,114,105,115,103,105,118,101,115,100,117,116,99,104,116,101,
+120,97,115,102,114,117,105,116,110,117,108,108,44,124,124,91,93,59,116,111,112,
+34,62,10,60,33,45,45,80,79,83,84,34,111,99,101,97,110,60,98,114,47,62,102,108,
+111,111,114,115,112,101,97,107,100,101,112,116,104,32,115,105,122,101,98,97,110,
+107,115,99,97,116,99,104,99,104,97,114,116,50,48,112,120,59,97,108,105,103,110,
+100,101,97,108,115,119,111,117,108,100,53,48,112,120,59,117,114,108,61,34,112,97
+,114,107,115,109,111,117,115,101,77,111,115,116,32,46,46,46,60,47,97,109,111,110
+,103,98,114,97,105,110,98,111,100,121,32,110,111,110,101,59,98,97,115,101,100,99
+,97,114,114,121,100,114,97,102,116,114,101,102,101,114,112,97,103,101,95,104,111
+,109,101,46,109,101,116,101,114,100,101,108,97,121,100,114,101,97,109,112,114,
+111,118,101,106,111,105,110,116,60,47,116,114,62,100,114,117,103,115,60,33,45,45
+,32,97,112,114,105,108,105,100,101,97,108,97,108,108,101,110,101,120,97,99,116,
+102,111,114,116,104,99,111,100,101,115,108,111,103,105,99,86,105,101,119,32,115,
+101,101,109,115,98,108,97,110,107,112,111,114,116,115,32,40,50,48,48,115,97,118,
+101,100,95,108,105,110,107,103,111,97,108,115,103,114,97,110,116,103,114,101,101
+,107,104,111,109,101,115,114,105,110,103,115,114,97,116,101,100,51,48,112,120,59
+,119,104,111,115,101,112,97,114,115,101,40,41,59,34,32,66,108,111,99,107,108,105
+,110,117,120,106,111,110,101,115,112,105,120,101,108,39,41,59,34,62,41,59,105,
+102,40,45,108,101,102,116,100,97,118,105,100,104,111,114,115,101,70,111,99,117,
+115,114,97,105,115,101,98,111,120,101,115,84,114,97,99,107,101,109,101,110,116,
+60,47,101,109,62,98,97,114,34,62,46,115,114,99,61,116,111,119,101,114,97,108,116
+,61,34,99,97,98,108,101,104,101,110,114,121,50,52,112,120,59,115,101,116,117,112
+,105,116,97,108,121,115,104,97,114,112,109,105,110,111,114,116,97,115,116,101,
+119,97,110,116,115,116,104,105,115,46,114,101,115,101,116,119,104,101,101,108,
+103,105,114,108,115,47,99,115,115,47,49,48,48,37,59,99,108,117,98,115,115,116,
+117,102,102,98,105,98,108,101,118,111,116,101,115,32,49,48,48,48,107,111,114,101
+,97,125,41,59,13,10,98,97,110,100,115,113,117,101,117,101,61,32,123,125,59,56,48
+,112,120,59,99,107,105,110,103,123,13,10,9,9,97,104,101,97,100,99,108,111,99,107
+,105,114,105,115,104,108,105,107,101,32,114,97,116,105,111,115,116,97,116,115,70
+,111,114,109,34,121,97,104,111,111,41,91,48,93,59,65,98,111,117,116,102,105,110,
+100,115,60,47,104,49,62,100,101,98,117,103,116,97,115,107,115,85,82,76,32,61,99,
+101,108,108,115,125,41,40,41,59,49,50,112,120,59,112,114,105,109,101,116,101,108
+,108,115,116,117,114,110,115,48,120,54,48,48,46,106,112,103,34,115,112,97,105,
+110,98,101,97,99,104,116,97,120,101,115,109,105,99,114,111,97,110,103,101,108,45
+,45,62,60,47,103,105,102,116,115,115,116,101,118,101,45,108,105,110,107,98,111,
+100,121,46,125,41,59,10,9,109,111,117,110,116,32,40,49,57,57,70,65,81,60,47,114,
+111,103,101,114,102,114,97,110,107,67,108,97,115,115,50,56,112,120,59,102,101,
+101,100,115,60,104,49,62,60,115,99,111,116,116,116,101,115,116,115,50,50,112,120
+,59,100,114,105,110,107,41,32,124,124,32,108,101,119,105,115,115,104,97,108,108,
+35,48,51,57,59,32,102,111,114,32,108,111,118,101,100,119,97,115,116,101,48,48,
+112,120,59,106,97,58,227,130,115,105,109,111,110,60,102,111,110,116,114,101,112,
+108,121,109,101,101,116,115,117,110,116,101,114,99,104,101,97,112,116,105,103,
+104,116,66,114,97,110,100,41,32,33,61,32,100,114,101,115,115,99,108,105,112,115,
+114,111,111,109,115,111,110,107,101,121,109,111,98,105,108,109,97,105,110,46,78,
+97,109,101,32,112,108,97,116,101,102,117,110,110,121,116,114,101,101,115,99,111,
+109,47,34,49,46,106,112,103,119,109,111,100,101,112,97,114,97,109,83,84,65,82,84
+,108,101,102,116,32,105,100,100,101,110,44,32,50,48,49,41,59,10,125,10,102,111,
+114,109,46,118,105,114,117,115,99,104,97,105,114,116,114,97,110,115,119,111,114,
+115,116,80,97,103,101,115,105,116,105,111,110,112,97,116,99,104,60,33,45,45,10,
+111,45,99,97,99,102,105,114,109,115,116,111,117,114,115,44,48,48,48,32,97,115,
+105,97,110,105,43,43,41,123,97,100,111,98,101,39,41,91,48,93,105,100,61,49,48,98
+,111,116,104,59,109,101,110,117,32,46,50,46,109,105,46,112,110,103,34,107,101,
+118,105,110,99,111,97,99,104,67,104,105,108,100,98,114,117,99,101,50,46,106,112,
+103,85,82,76,41,43,46,106,112,103,124,115,117,105,116,101,115,108,105,99,101,104
+,97,114,114,121,49,50,48,34,32,115,119,101,101,116,116,114,62,13,10,110,97,109,
+101,61,100,105,101,103,111,112,97,103,101,32,115,119,105,115,115,45,45,62,10,10,
+35,102,102,102,59,34,62,76,111,103,46,99,111,109,34,116,114,101,97,116,115,104,
+101,101,116,41,32,38,38,32,49,52,112,120,59,115,108,101,101,112,110,116,101,110,
+116,102,105,108,101,100,106,97,58,227,131,105,100,61,34,99,78,97,109,101,34,119,
+111,114,115,101,115,104,111,116,115,45,98,111,120,45,100,101,108,116,97,10,38,
+108,116,59,98,101,97,114,115,58,52,56,90,60,100,97,116,97,45,114,117,114,97,108,
+60,47,97,62,32,115,112,101,110,100,98,97,107,101,114,115,104,111,112,115,61,32,
+34,34,59,112,104,112,34,62,99,116,105,111,110,49,51,112,120,59,98,114,105,97,110
+,104,101,108,108,111,115,105,122,101,61,111,61,37,50,70,32,106,111,105,110,109,
+97,121,98,101,60,105,109,103,32,105,109,103,34,62,44,32,102,106,115,105,109,103,
+34,32,34,41,91,48,93,77,84,111,112,66,84,121,112,101,34,110,101,119,108,121,68,
+97,110,115,107,99,122,101,99,104,116,114,97,105,108,107,110,111,119,115,60,47,
+104,53,62,102,97,113,34,62,122,104,45,99,110,49,48,41,59,10,45,49,34,41,59,116,
+121,112,101,61,98,108,117,101,115,116,114,117,108,121,100,97,118,105,115,46,106,
+115,39,59,62,13,10,60,33,115,116,101,101,108,32,121,111,117,32,104,50,62,13,10,
+102,111,114,109,32,106,101,115,117,115,49,48,48,37,32,109,101,110,117,46,13,10,9
+,13,10,119,97,108,101,115,114,105,115,107,115,117,109,101,110,116,100,100,105,
+110,103,98,45,108,105,107,116,101,97,99,104,103,105,102,34,32,118,101,103,97,115
+,100,97,110,115,107,101,101,115,116,105,115,104,113,105,112,115,117,111,109,105,
+115,111,98,114,101,100,101,115,100,101,101,110,116,114,101,116,111,100,111,115,
+112,117,101,100,101,97,195,177,111,115,101,115,116,195,161,116,105,101,110,101,
+104,97,115,116,97,111,116,114,111,115,112,97,114,116,101,100,111,110,100,101,110
+,117,101,118,111,104,97,99,101,114,102,111,114,109,97,109,105,115,109,111,109,
+101,106,111,114,109,117,110,100,111,97,113,117,195,173,100,195,173,97,115,115,
+195,179,108,111,97,121,117,100,97,102,101,99,104,97,116,111,100,97,115,116,97,
+110,116,111,109,101,110,111,115,100,97,116,111,115,111,116,114,97,115,115,105,
+116,105,111,109,117,99,104,111,97,104,111,114,97,108,117,103,97,114,109,97,121,
+111,114,101,115,116,111,115,104,111,114,97,115,116,101,110,101,114,97,110,116,
+101,115,102,111,116,111,115,101,115,116,97,115,112,97,195,173,115,110,117,101,
+118,97,115,97,108,117,100,102,111,114,111,115,109,101,100,105,111,113,117,105,
+101,110,109,101,115,101,115,112,111,100,101,114,99,104,105,108,101,115,101,114,
+195,161,118,101,99,101,115,100,101,99,105,114,106,111,115,195,169,101,115,116,97
+,114,118,101,110,116,97,103,114,117,112,111,104,101,99,104,111,101,108,108,111,
+115,116,101,110,103,111,97,109,105,103,111,99,111,115,97,115,110,105,118,101,108
+,103,101,110,116,101,109,105,115,109,97,97,105,114,101,115,106,117,108,105,111,
+116,101,109,97,115,104,97,99,105,97,102,97,118,111,114,106,117,110,105,111,108,
+105,98,114,101,112,117,110,116,111,98,117,101,110,111,97,117,116,111,114,97,98,
+114,105,108,98,117,101,110,97,116,101,120,116,111,109,97,114,122,111,115,97,98,
+101,114,108,105,115,116,97,108,117,101,103,111,99,195,179,109,111,101,110,101,
+114,111,106,117,101,103,111,112,101,114,195,186,104,97,98,101,114,101,115,116,
+111,121,110,117,110,99,97,109,117,106,101,114,118,97,108,111,114,102,117,101,114
+,97,108,105,98,114,111,103,117,115,116,97,105,103,117,97,108,118,111,116,111,115
+,99,97,115,111,115,103,117,195,173,97,112,117,101,100,111,115,111,109,111,115,97
+,118,105,115,111,117,115,116,101,100,100,101,98,101,110,110,111,99,104,101,98,
+117,115,99,97,102,97,108,116,97,101,117,114,111,115,115,101,114,105,101,100,105,
+99,104,111,99,117,114,115,111,99,108,97,118,101,99,97,115,97,115,108,101,195,179
+,110,112,108,97,122,111,108,97,114,103,111,111,98,114,97,115,118,105,115,116,97,
+97,112,111,121,111,106,117,110,116,111,116,114,97,116,97,118,105,115,116,111,99,
+114,101,97,114,99,97,109,112,111,104,101,109,111,115,99,105,110,99,111,99,97,114
+,103,111,112,105,115,111,115,111,114,100,101,110,104,97,99,101,110,195,161,114,
+101,97,100,105,115,99,111,112,101,100,114,111,99,101,114,99,97,112,117,101,100,
+97,112,97,112,101,108,109,101,110,111,114,195,186,116,105,108,99,108,97,114,111,
+106,111,114,103,101,99,97,108,108,101,112,111,110,101,114,116,97,114,100,101,110
+,97,100,105,101,109,97,114,99,97,115,105,103,117,101,101,108,108,97,115,115,105,
+103,108,111,99,111,99,104,101,109,111,116,111,115,109,97,100,114,101,99,108,97,
+115,101,114,101,115,116,111,110,105,195,177,111,113,117,101,100,97,112,97,115,97
+,114,98,97,110,99,111,104,105,106,111,115,118,105,97,106,101,112,97,98,108,111,
+195,169,115,116,101,118,105,101,110,101,114,101,105,110,111,100,101,106,97,114,
+102,111,110,100,111,99,97,110,97,108,110,111,114,116,101,108,101,116,114,97,99,
+97,117,115,97,116,111,109,97,114,109,97,110,111,115,108,117,110,101,115,97,117,
+116,111,115,118,105,108,108,97,118,101,110,100,111,112,101,115,97,114,116,105,
+112,111,115,116,101,110,103,97,109,97,114,99,111,108,108,101,118,97,112,97,100,
+114,101,117,110,105,100,111,118,97,109,111,115,122,111,110,97,115,97,109,98,111,
+115,98,97,110,100,97,109,97,114,105,97,97,98,117,115,111,109,117,99,104,97,115,
+117,98,105,114,114,105,111,106,97,118,105,118,105,114,103,114,97,100,111,99,104,
+105,99,97,97,108,108,195,173,106,111,118,101,110,100,105,99,104,97,101,115,116,
+97,110,116,97,108,101,115,115,97,108,105,114,115,117,101,108,111,112,101,115,111
+,115,102,105,110,101,115,108,108,97,109,97,98,117,115,99,111,195,169,115,116,97,
+108,108,101,103,97,110,101,103,114,111,112,108,97,122,97,104,117,109,111,114,112
+,97,103,97,114,106,117,110,116,97,100,111,98,108,101,105,115,108,97,115,98,111,
+108,115,97,98,97,195,177,111,104,97,98,108,97,108,117,99,104,97,195,129,114,101,
+97,100,105,99,101,110,106,117,103,97,114,110,111,116,97,115,118,97,108,108,101,
+97,108,108,195,161,99,97,114,103,97,100,111,108,111,114,97,98,97,106,111,101,115
+,116,195,169,103,117,115,116,111,109,101,110,116,101,109,97,114,105,111,102,105,
+114,109,97,99,111,115,116,111,102,105,99,104,97,112,108,97,116,97,104,111,103,97
+,114,97,114,116,101,115,108,101,121,101,115,97,113,117,101,108,109,117,115,101,
+111,98,97,115,101,115,112,111,99,111,115,109,105,116,97,100,99,105,101,108,111,
+99,104,105,99,111,109,105,101,100,111,103,97,110,97,114,115,97,110,116,111,101,
+116,97,112,97,100,101,98,101,115,112,108,97,121,97,114,101,100,101,115,115,105,
+101,116,101,99,111,114,116,101,99,111,114,101,97,100,117,100,97,115,100,101,115,
+101,111,118,105,101,106,111,100,101,115,101,97,97,103,117,97,115,38,113,117,111,
+116,59,100,111,109,97,105,110,99,111,109,109,111,110,115,116,97,116,117,115,101,
+118,101,110,116,115,109,97,115,116,101,114,115,121,115,116,101,109,97,99,116,105
+,111,110,98,97,110,110,101,114,114,101,109,111,118,101,115,99,114,111,108,108,
+117,112,100,97,116,101,103,108,111,98,97,108,109,101,100,105,117,109,102,105,108
+,116,101,114,110,117,109,98,101,114,99,104,97,110,103,101,114,101,115,117,108,
+116,112,117,98,108,105,99,115,99,114,101,101,110,99,104,111,111,115,101,110,111,
+114,109,97,108,116,114,97,118,101,108,105,115,115,117,101,115,115,111,117,114,99
+,101,116,97,114,103,101,116,115,112,114,105,110,103,109,111,100,117,108,101,109,
+111,98,105,108,101,115,119,105,116,99,104,112,104,111,116,111,115,98,111,114,100
+,101,114,114,101,103,105,111,110,105,116,115,101,108,102,115,111,99,105,97,108,
+97,99,116,105,118,101,99,111,108,117,109,110,114,101,99,111,114,100,102,111,108,
+108,111,119,116,105,116,108,101,62,101,105,116,104,101,114,108,101,110,103,116,
+104,102,97,109,105,108,121,102,114,105,101,110,100,108,97,121,111,117,116,97,117
+,116,104,111,114,99,114,101,97,116,101,114,101,118,105,101,119,115,117,109,109,
+101,114,115,101,114,118,101,114,112,108,97,121,101,100,112,108,97,121,101,114,
+101,120,112,97,110,100,112,111,108,105,99,121,102,111,114,109,97,116,100,111,117
+,98,108,101,112,111,105,110,116,115,115,101,114,105,101,115,112,101,114,115,111,
+110,108,105,118,105,110,103,100,101,115,105,103,110,109,111,110,116,104,115,102,
+111,114,99,101,115,117,110,105,113,117,101,119,101,105,103,104,116,112,101,111,
+112,108,101,101,110,101,114,103,121,110,97,116,117,114,101,115,101,97,114,99,104
+,102,105,103,117,114,101,104,97,118,105,110,103,99,117,115,116,111,109,111,102,
+102,115,101,116,108,101,116,116,101,114,119,105,110,100,111,119,115,117,98,109,
+105,116,114,101,110,100,101,114,103,114,111,117,112,115,117,112,108,111,97,100,
+104,101,97,108,116,104,109,101,116,104,111,100,118,105,100,101,111,115,115,99,
+104,111,111,108,102,117,116,117,114,101,115,104,97,100,111,119,100,101,98,97,116
+,101,118,97,108,117,101,115,79,98,106,101,99,116,111,116,104,101,114,115,114,105
+,103,104,116,115,108,101,97,103,117,101,99,104,114,111,109,101,115,105,109,112,
+108,101,110,111,116,105,99,101,115,104,97,114,101,100,101,110,100,105,110,103,
+115,101,97,115,111,110,114,101,112,111,114,116,111,110,108,105,110,101,115,113,
+117,97,114,101,98,117,116,116,111,110,105,109,97,103,101,115,101,110,97,98,108,
+101,109,111,118,105,110,103,108,97,116,101,115,116,119,105,110,116,101,114,70,
+114,97,110,99,101,112,101,114,105,111,100,115,116,114,111,110,103,114,101,112,
+101,97,116,76,111,110,100,111,110,100,101,116,97,105,108,102,111,114,109,101,100
+,100,101,109,97,110,100,115,101,99,117,114,101,112,97,115,115,101,100,116,111,
+103,103,108,101,112,108,97,99,101,115,100,101,118,105,99,101,115,116,97,116,105,
+99,99,105,116,105,101,115,115,116,114,101,97,109,121,101,108,108,111,119,97,116,
+116,97,99,107,115,116,114,101,101,116,102,108,105,103,104,116,104,105,100,100,
+101,110,105,110,102,111,34,62,111,112,101,110,101,100,117,115,101,102,117,108,
+118,97,108,108,101,121,99,97,117,115,101,115,108,101,97,100,101,114,115,101,99,
+114,101,116,115,101,99,111,110,100,100,97,109,97,103,101,115,112,111,114,116,115
+,101,120,99,101,112,116,114,97,116,105,110,103,115,105,103,110,101,100,116,104,
+105,110,103,115,101,102,102,101,99,116,102,105,101,108,100,115,115,116,97,116,
+101,115,111,102,102,105,99,101,118,105,115,117,97,108,101,100,105,116,111,114,
+118,111,108,117,109,101,82,101,112,111,114,116,109,117,115,101,117,109,109,111,
+118,105,101,115,112,97,114,101,110,116,97,99,99,101,115,115,109,111,115,116,108,
+121,109,111,116,104,101,114,34,32,105,100,61,34,109,97,114,107,101,116,103,114,
+111,117,110,100,99,104,97,110,99,101,115,117,114,118,101,121,98,101,102,111,114,
+101,115,121,109,98,111,108,109,111,109,101,110,116,115,112,101,101,99,104,109,
+111,116,105,111,110,105,110,115,105,100,101,109,97,116,116,101,114,67,101,110,
+116,101,114,111,98,106,101,99,116,101,120,105,115,116,115,109,105,100,100,108,
+101,69,117,114,111,112,101,103,114,111,119,116,104,108,101,103,97,99,121,109,97,
+110,110,101,114,101,110,111,117,103,104,99,97,114,101,101,114,97,110,115,119,101
+,114,111,114,105,103,105,110,112,111,114,116,97,108,99,108,105,101,110,116,115,
+101,108,101,99,116,114,97,110,100,111,109,99,108,111,115,101,100,116,111,112,105
+,99,115,99,111,109,105,110,103,102,97,116,104,101,114,111,112,116,105,111,110,
+115,105,109,112,108,121,114,97,105,115,101,100,101,115,99,97,112,101,99,104,111,
+115,101,110,99,104,117,114,99,104,100,101,102,105,110,101,114,101,97,115,111,110
+,99,111,114,110,101,114,111,117,116,112,117,116,109,101,109,111,114,121,105,102,
+114,97,109,101,112,111,108,105,99,101,109,111,100,101,108,115,78,117,109,98,101,
+114,100,117,114,105,110,103,111,102,102,101,114,115,115,116,121,108,101,115,107,
+105,108,108,101,100,108,105,115,116,101,100,99,97,108,108,101,100,115,105,108,
+118,101,114,109,97,114,103,105,110,100,101,108,101,116,101,98,101,116,116,101,
+114,98,114,111,119,115,101,108,105,109,105,116,115,71,108,111,98,97,108,115,105,
+110,103,108,101,119,105,100,103,101,116,99,101,110,116,101,114,98,117,100,103,
+101,116,110,111,119,114,97,112,99,114,101,100,105,116,99,108,97,105,109,115,101,
+110,103,105,110,101,115,97,102,101,116,121,99,104,111,105,99,101,115,112,105,114
+,105,116,45,115,116,121,108,101,115,112,114,101,97,100,109,97,107,105,110,103,
+110,101,101,100,101,100,114,117,115,115,105,97,112,108,101,97,115,101,101,120,
+116,101,110,116,83,99,114,105,112,116,98,114,111,107,101,110,97,108,108,111,119,
+115,99,104,97,114,103,101,100,105,118,105,100,101,102,97,99,116,111,114,109,101,
+109,98,101,114,45,98,97,115,101,100,116,104,101,111,114,121,99,111,110,102,105,
+103,97,114,111,117,110,100,119,111,114,107,101,100,104,101,108,112,101,100,67,
+104,117,114,99,104,105,109,112,97,99,116,115,104,111,117,108,100,97,108,119,97,
+121,115,108,111,103,111,34,32,98,111,116,116,111,109,108,105,115,116,34,62,41,
+123,118,97,114,32,112,114,101,102,105,120,111,114,97,110,103,101,72,101,97,100,
+101,114,46,112,117,115,104,40,99,111,117,112,108,101,103,97,114,100,101,110,98,
+114,105,100,103,101,108,97,117,110,99,104,82,101,118,105,101,119,116,97,107,105,
+110,103,118,105,115,105,111,110,108,105,116,116,108,101,100,97,116,105,110,103,
+66,117,116,116,111,110,98,101,97,117,116,121,116,104,101,109,101,115,102,111,114
+,103,111,116,83,101,97,114,99,104,97,110,99,104,111,114,97,108,109,111,115,116,
+108,111,97,100,101,100,67,104,97,110,103,101,114,101,116,117,114,110,115,116,114
+,105,110,103,114,101,108,111,97,100,77,111,98,105,108,101,105,110,99,111,109,101
+,115,117,112,112,108,121,83,111,117,114,99,101,111,114,100,101,114,115,118,105,
+101,119,101,100,38,110,98,115,112,59,99,111,117,114,115,101,65,98,111,117,116,32
+,105,115,108,97,110,100,60,104,116,109,108,32,99,111,111,107,105,101,110,97,109,
+101,61,34,97,109,97,122,111,110,109,111,100,101,114,110,97,100,118,105,99,101,
+105,110,60,47,97,62,58,32,84,104,101,32,100,105,97,108,111,103,104,111,117,115,
+101,115,66,69,71,73,78,32,77,101,120,105,99,111,115,116,97,114,116,115,99,101,
+110,116,114,101,104,101,105,103,104,116,97,100,100,105,110,103,73,115,108,97,110
+,100,97,115,115,101,116,115,69,109,112,105,114,101,83,99,104,111,111,108,101,102
+,102,111,114,116,100,105,114,101,99,116,110,101,97,114,108,121,109,97,110,117,97
+,108,83,101,108,101,99,116,46,10,10,79,110,101,106,111,105,110,101,100,109,101,
+110,117,34,62,80,104,105,108,105,112,97,119,97,114,100,115,104,97,110,100,108,
+101,105,109,112,111,114,116,79,102,102,105,99,101,114,101,103,97,114,100,115,107
+,105,108,108,115,110,97,116,105,111,110,83,112,111,114,116,115,100,101,103,114,
+101,101,119,101,101,107,108,121,32,40,101,46,103,46,98,101,104,105,110,100,100,
+111,99,116,111,114,108,111,103,103,101,100,117,110,105,116,101,100,60,47,98,62,
+60,47,98,101,103,105,110,115,112,108,97,110,116,115,97,115,115,105,115,116,97,
+114,116,105,115,116,105,115,115,117,101,100,51,48,48,112,120,124,99,97,110,97,
+100,97,97,103,101,110,99,121,115,99,104,101,109,101,114,101,109,97,105,110,66,
+114,97,122,105,108,115,97,109,112,108,101,108,111,103,111,34,62,98,101,121,111,
+110,100,45,115,99,97,108,101,97,99,99,101,112,116,115,101,114,118,101,100,109,97
+,114,105,110,101,70,111,111,116,101,114,99,97,109,101,114,97,60,47,104,49,62,10,
+95,102,111,114,109,34,108,101,97,118,101,115,115,116,114,101,115,115,34,32,47,62
+,13,10,46,103,105,102,34,32,111,110,108,111,97,100,108,111,97,100,101,114,79,120
+,102,111,114,100,115,105,115,116,101,114,115,117,114,118,105,118,108,105,115,116
+,101,110,102,101,109,97,108,101,68,101,115,105,103,110,115,105,122,101,61,34,97,
+112,112,101,97,108,116,101,120,116,34,62,108,101,118,101,108,115,116,104,97,110,
+107,115,104,105,103,104,101,114,102,111,114,99,101,100,97,110,105,109,97,108,97,
+110,121,111,110,101,65,102,114,105,99,97,97,103,114,101,101,100,114,101,99,101,
+110,116,80,101,111,112,108,101,60,98,114,32,47,62,119,111,110,100,101,114,112,
+114,105,99,101,115,116,117,114,110,101,100,124,124,32,123,125,59,109,97,105,110,
+34,62,105,110,108,105,110,101,115,117,110,100,97,121,119,114,97,112,34,62,102,97
+,105,108,101,100,99,101,110,115,117,115,109,105,110,117,116,101,98,101,97,99,111
+,110,113,117,111,116,101,115,49,53,48,112,120,124,101,115,116,97,116,101,114,101
+,109,111,116,101,101,109,97,105,108,34,108,105,110,107,101,100,114,105,103,104,
+116,59,115,105,103,110,97,108,102,111,114,109,97,108,49,46,104,116,109,108,115,
+105,103,110,117,112,112,114,105,110,99,101,102,108,111,97,116,58,46,112,110,103,
+34,32,102,111,114,117,109,46,65,99,99,101,115,115,112,97,112,101,114,115,115,111
+,117,110,100,115,101,120,116,101,110,100,72,101,105,103,104,116,115,108,105,100,
+101,114,85,84,70,45,56,34,38,97,109,112,59,32,66,101,102,111,114,101,46,32,87,
+105,116,104,115,116,117,100,105,111,111,119,110,101,114,115,109,97,110,97,103,
+101,112,114,111,102,105,116,106,81,117,101,114,121,97,110,110,117,97,108,112,97,
+114,97,109,115,98,111,117,103,104,116,102,97,109,111,117,115,103,111,111,103,108
+,101,108,111,110,103,101,114,105,43,43,41,32,123,105,115,114,97,101,108,115,97,
+121,105,110,103,100,101,99,105,100,101,104,111,109,101,34,62,104,101,97,100,101,
+114,101,110,115,117,114,101,98,114,97,110,99,104,112,105,101,99,101,115,98,108,
+111,99,107,59,115,116,97,116,101,100,116,111,112,34,62,60,114,97,99,105,110,103,
+114,101,115,105,122,101,45,45,38,103,116,59,112,97,99,105,116,121,115,101,120,
+117,97,108,98,117,114,101,97,117,46,106,112,103,34,32,49,48,44,48,48,48,111,98,
+116,97,105,110,116,105,116,108,101,115,97,109,111,117,110,116,44,32,73,110,99,46
+,99,111,109,101,100,121,109,101,110,117,34,32,108,121,114,105,99,115,116,111,100
+,97,121,46,105,110,100,101,101,100,99,111,117,110,116,121,95,108,111,103,111,46,
+70,97,109,105,108,121,108,111,111,107,101,100,77,97,114,107,101,116,108,115,101,
+32,105,102,80,108,97,121,101,114,116,117,114,107,101,121,41,59,118,97,114,32,102
+,111,114,101,115,116,103,105,118,105,110,103,101,114,114,111,114,115,68,111,109,
+97,105,110,125,101,108,115,101,123,105,110,115,101,114,116,66,108,111,103,60,47,
+102,111,111,116,101,114,108,111,103,105,110,46,102,97,115,116,101,114,97,103,101
+,110,116,115,60,98,111,100,121,32,49,48,112,120,32,48,112,114,97,103,109,97,102,
+114,105,100,97,121,106,117,110,105,111,114,100,111,108,108,97,114,112,108,97,99,
+101,100,99,111,118,101,114,115,112,108,117,103,105,110,53,44,48,48,48,32,112,97,
+103,101,34,62,98,111,115,116,111,110,46,116,101,115,116,40,97,118,97,116,97,114,
+116,101,115,116,101,100,95,99,111,117,110,116,102,111,114,117,109,115,115,99,104
+,101,109,97,105,110,100,101,120,44,102,105,108,108,101,100,115,104,97,114,101,
+115,114,101,97,100,101,114,97,108,101,114,116,40,97,112,112,101,97,114,83,117,98
+,109,105,116,108,105,110,101,34,62,98,111,100,121,34,62,10,42,32,84,104,101,84,
+104,111,117,103,104,115,101,101,105,110,103,106,101,114,115,101,121,78,101,119,
+115,60,47,118,101,114,105,102,121,101,120,112,101,114,116,105,110,106,117,114,
+121,119,105,100,116,104,61,67,111,111,107,105,101,83,84,65,82,84,32,97,99,114,
+111,115,115,95,105,109,97,103,101,116,104,114,101,97,100,110,97,116,105,118,101,
+112,111,99,107,101,116,98,111,120,34,62,10,83,121,115,116,101,109,32,68,97,118,
+105,100,99,97,110,99,101,114,116,97,98,108,101,115,112,114,111,118,101,100,65,
+112,114,105,108,32,114,101,97,108,108,121,100,114,105,118,101,114,105,116,101,
+109,34,62,109,111,114,101,34,62,98,111,97,114,100,115,99,111,108,111,114,115,99,
+97,109,112,117,115,102,105,114,115,116,32,124,124,32,91,93,59,109,101,100,105,97
+,46,103,117,105,116,97,114,102,105,110,105,115,104,119,105,100,116,104,58,115,
+104,111,119,101,100,79,116,104,101,114,32,46,112,104,112,34,32,97,115,115,117,
+109,101,108,97,121,101,114,115,119,105,108,115,111,110,115,116,111,114,101,115,
+114,101,108,105,101,102,115,119,101,100,101,110,67,117,115,116,111,109,101,97,
+115,105,108,121,32,121,111,117,114,32,83,116,114,105,110,103,10,10,87,104,105,
+108,116,97,121,108,111,114,99,108,101,97,114,58,114,101,115,111,114,116,102,114,
+101,110,99,104,116,104,111,117,103,104,34,41,32,43,32,34,60,98,111,100,121,62,98
+,117,121,105,110,103,98,114,97,110,100,115,77,101,109,98,101,114,110,97,109,101,
+34,62,111,112,112,105,110,103,115,101,99,116,111,114,53,112,120,59,34,62,118,115
+,112,97,99,101,112,111,115,116,101,114,109,97,106,111,114,32,99,111,102,102,101,
+101,109,97,114,116,105,110,109,97,116,117,114,101,104,97,112,112,101,110,60,47,
+110,97,118,62,107,97,110,115,97,115,108,105,110,107,34,62,73,109,97,103,101,115,
+61,102,97,108,115,101,119,104,105,108,101,32,104,115,112,97,99,101,48,38,97,109,
+112,59,32,10,10,73,110,32,32,112,111,119,101,114,80,111,108,115,107,105,45,99,
+111,108,111,114,106,111,114,100,97,110,66,111,116,116,111,109,83,116,97,114,116,
+32,45,99,111,117,110,116,50,46,104,116,109,108,110,101,119,115,34,62,48,49,46,
+106,112,103,79,110,108,105,110,101,45,114,105,103,104,116,109,105,108,108,101,
+114,115,101,110,105,111,114,73,83,66,78,32,48,48,44,48,48,48,32,103,117,105,100,
+101,115,118,97,108,117,101,41,101,99,116,105,111,110,114,101,112,97,105,114,46,
+120,109,108,34,32,32,114,105,103,104,116,115,46,104,116,109,108,45,98,108,111,99
+,107,114,101,103,69,120,112,58,104,111,118,101,114,119,105,116,104,105,110,118,
+105,114,103,105,110,112,104,111,110,101,115,60,47,116,114,62,13,117,115,105,110,
+103,32,10,9,118,97,114,32,62,39,41,59,10,9,60,47,116,100,62,10,60,47,116,114,62,
+10,98,97,104,97,115,97,98,114,97,115,105,108,103,97,108,101,103,111,109,97,103,
+121,97,114,112,111,108,115,107,105,115,114,112,115,107,105,216,177,216,175,217,
+136,228,184,173,230,150,135,231,174,128,228,189,147,231,185,129,233,171,148,228,
+191,161,230,129,175,228,184,173,229,155,189,230,136,145,228,187,172,228,184,128,
+228,184,170,229,133,172,229,143,184,231,174,161,231,144,134,232,174,186,229,157,
+155,229,143,175,228,187,165,230,156,141,229,138,161,230,151,182,233,151,180,228,
+184,170,228,186,186,228,186,167,229,147,129,232,135,170,229,183,177,228,188,129,
+228,184,154,230,159,165,231,156,139,229,183,165,228,189,156,232,129,148,231,179,
+187,230,178,161,230,156,137,231,189,145,231,171,153,230,137,128,230,156,137,232,
+175,132,232,174,186,228,184,173,229,191,131,230,150,135,231,171,160,231,148,168,
+230,136,183,233,166,150,233,161,181,228,189,156,232,128,133,230,138,128,230,156,
+175,233,151,174,233,162,152,231,155,184,229,133,179,228,184,139,232,189,189,230,
+144,156,231,180,162,228,189,191,231,148,168,232,189,175,228,187,182,229,156,168,
+231,186,191,228,184,187,233,162,152,232,181,132,230,150,153,232,167,134,233,162,
+145,229,155,158,229,164,141,230,179,168,229,134,140,231,189,145,231,187,156,230,
+148,182,232,151,143,229,134,133,229,174,185,230,142,168,232,141,144,229,184,130,
+229,156,186,230,182,136,230,129,175,231,169,186,233,151,180,229,143,145,229,184,
+131,228,187,128,228,185,136,229,165,189,229,143,139,231,148,159,230,180,187,229,
+155,190,231,137,135,229,143,145,229,177,149,229,166,130,230,158,156,230,137,139,
+230,156,186,230,150,176,233,151,187,230,156,128,230,150,176,230,150,185,229,188,
+143,229,140,151,228,186,172,230,143,144,228,190,155,229,133,179,228,186,142,230,
+155,180,229,164,154,232,191,153,228,184,170,231,179,187,231,187,159,231,159,165,
+233,129,147,230,184,184,230,136,143,229,185,191,229,145,138,229,133,182,228,187,
+150,229,143,145,232,161,168,229,174,137,229,133,168,231,172,172,228,184,128,228,
+188,154,229,145,152,232,191,155,232,161,140,231,130,185,229,135,187,231,137,136,
+230,157,131,231,148,181,229,173,144,228,184,150,231,149,140,232,174,190,232,174,
+161,229,133,141,232,180,185,230,149,153,232,130,178,229,138,160,229,133,165,230,
+180,187,229,138,168,228,187,150,228,187,172,229,149,134,229,147,129,229,141,154,
+229,174,162,231,142,176,229,156,168,228,184,138,230,181,183,229,166,130,228,189,
+149,229,183,178,231,187,143,231,149,153,232,168,128,232,175,166,231,187,134,231,
+164,190,229,140,186,231,153,187,229,189,149,230,156,172,231,171,153,233,156,128,
+232,166,129,228,187,183,230,160,188,230,148,175,230,140,129,229,155,189,233,153,
+133,233,147,190,230,142,165,229,155,189,229,174,182,229,187,186,232,174,190,230,
+156,139,229,143,139,233,152,133,232,175,187,230,179,149,229,190,139,228,189,141,
+231,189,174,231,187,143,230,181,142,233,128,137,230,139,169,232,191,153,230,160,
+183,229,189,147,229,137,141,229,136,134,231,177,187,230,142,146,232,161,140,229,
+155,160,228,184,186,228,186,164,230,152,147,230,156,128,229,144,142,233,159,179,
+228,185,144,228,184,141,232,131,189,233,128,154,232,191,135,232,161,140,228,184,
+154,231,167,145,230,138,128,229,143,175,232,131,189,232,174,190,229,164,135,229,
+144,136,228,189,156,229,164,167,229,174,182,231,164,190,228,188,154,231,160,148,
+231,169,182,228,184,147,228,184,154,229,133,168,233,131,168,233,161,185,231,155,
+174,232,191,153,233,135,140,232,191,152,230,152,175,229,188,128,229,167,139,230,
+131,133,229,134,181,231,148,181,232,132,145,230,150,135,228,187,182,229,147,129,
+231,137,140,229,184,174,229,138,169,230,150,135,229,140,150,232,181,132,230,186,
+144,229,164,167,229,173,166,229,173,166,228,185,160,229,156,176,229,157,128,230,
+181,143,232,167,136,230,138,149,232,181,132,229,183,165,231,168,139,232,166,129,
+230,177,130,230,128,142,228,185,136,230,151,182,229,128,153,229,138,159,232,131,
+189,228,184,187,232,166,129,231,155,174,229,137,141,232,181,132,232,174,175,229,
+159,142,229,184,130,230,150,185,230,179,149,231,148,181,229,189,177,230,139,155,
+232,129,152,229,163,176,230,152,142,228,187,187,228,189,149,229,129,165,229,186,
+183,230,149,176,230,141,174,231,190,142,229,155,189,230,177,189,232,189,166,228,
+187,139,231,187,141,228,189,134,230,152,175,228,186,164,230,181,129,231,148,159,
+228,186,167,230,137,128,228,187,165,231,148,181,232,175,157,230,152,190,231,164,
+186,228,184,128,228,186,155,229,141,149,228,189,141,228,186,186,229,145,152,229,
+136,134,230,158,144,229,156,176,229,155,190,230,151,133,230,184,184,229,183,165,
+229,133,183,229,173,166,231,148,159,231,179,187,229,136,151,231,189,145,229,143,
+139,229,184,150,229,173,144,229,175,134,231,160,129,233,162,145,233,129,147,230,
+142,167,229,136,182,229,156,176,229,140,186,229,159,186,230,156,172,229,133,168,
+229,155,189,231,189,145,228,184,138,233,135,141,232,166,129,231,172,172,228,186,
+140,229,150,156,230,172,162,232,191,155,229,133,165,229,143,139,230,131,133,232,
+191,153,228,186,155,232,128,131,232,175,149,229,143,145,231,142,176,229,159,185,
+232,174,173,228,187,165,228,184,138,230,148,191,229,186,156,230,136,144,228,184,
+186,231,142,175,229,162,131,233,166,153,230,184,175,229,144,140,230,151,182,229,
+168,177,228,185,144,229,143,145,233,128,129,228,184,128,229,174,154,229,188,128,
+229,143,145,228,189,156,229,147,129,230,160,135,229,135,134,230,172,162,232,191,
+142,232,167,163,229,134,179,229,156,176,230,150,185,228,184,128,228,184,139,228,
+187,165,229,143,138,232,180,163,228,187,187,230,136,150,232,128,133,229,174,162,
+230,136,183,228,187,163,232,161,168,231,167,175,229,136,134,229,165,179,228,186,
+186,230,149,176,231,160,129,233,148,128,229,148,174,229,135,186,231,142,176,231,
+166,187,231,186,191,229,186,148,231,148,168,229,136,151,232,161,168,228,184,141,
+229,144,140,231,188,150,232,190,145,231,187,159,232,174,161,230,159,165,232,175,
+162,228,184,141,232,166,129,230,156,137,229,133,179,230,156,186,230,158,132,229,
+190,136,229,164,154,230,146,173,230,148,190,231,187,132,231,187,135,230,148,191,
+231,173,150,231,155,180,230,142,165,232,131,189,229,138,155,230,157,165,230,186,
+144,230,153,130,233,150,147,231,156,139,229,136,176,231,131,173,233,151,168,229,
+133,179,233,148,174,228,184,147,229,140,186,233,157,158,229,184,184,232,139,177,
+232,175,173,231,153,190,229,186,166,229,184,140,230,156,155,231,190,142,229,165,
+179,230,175,148,232,190,131,231,159,165,232,175,134,232,167,132,229,174,154,229,
+187,186,232,174,174,233,131,168,233,151,168,230,132,143,232,167,129,231,178,190,
+229,189,169,230,151,165,230,156,172,230,143,144,233,171,152,229,143,145,232,168,
+128,230,150,185,233,157,162,229,159,186,233,135,145,229,164,132,231,144,134,230,
+157,131,233,153,144,229,189,177,231,137,135,233,147,182,232,161,140,232,191,152,
+230,156,137,229,136,134,228,186,171,231,137,169,229,147,129,231,187,143,232,144,
+165,230,183,187,229,138,160,228,184,147,229,174,182,232,191,153,231,167,141,232,
+175,157,233,162,152,232,181,183,230,157,165,228,184,154,229,138,161,229,133,172,
+229,145,138,232,174,176,229,189,149,231,174,128,228,187,139,232,180,168,233,135,
+143,231,148,183,228,186,186,229,189,177,229,147,141,229,188,149,231,148,168,230,
+138,165,229,145,138,233,131,168,229,136,134,229,191,171,233,128,159,229,146,168,
+232,175,162,230,151,182,229,176,154,230,179,168,230,132,143,231,148,179,232,175,
+183,229,173,166,230,160,161,229,186,148,232,175,165,229,142,134,229,143,178,229,
+143,170,230,152,175,232,191,148,229,155,158,232,180,173,228,185,176,229,144,141,
+231,167,176,228,184,186,228,186,134,230,136,144,229,138,159,232,175,180,230,152,
+142,228,190,155,229,186,148,229,173,169,229,173,144,228,184,147,233,162,152,231,
+168,139,229,186,143,228,184,128,232,136,172,230,156,131,229,147,161,229,143,170,
+230,156,137,229,133,182,229,174,131,228,191,157,230,138,164,232,128,140,228,184,
+148,228,187,138,229,164,169,231,170,151,229,143,163,229,138,168,230,128,129,231,
+138,182,230,128,129,231,137,185,229,136,171,232,174,164,228,184,186,229,191,133,
+233,161,187,230,155,180,230,150,176,229,176,143,232,175,180,230,136,145,229,128,
+145,228,189,156,228,184,186,229,170,146,228,189,147,229,140,133,230,139,172,233,
+130,163,228,185,136,228,184,128,230,160,183,229,155,189,229,134,133,230,152,175,
+229,144,166,230,160,185,230,141,174,231,148,181,232,167,134,229,173,166,233,153,
+162,229,133,183,230,156,137,232,191,135,231,168,139,231,148,177,228,186,142,228,
+186,186,230,137,141,229,135,186,230,157,165,228,184,141,232,191,135,230,173,163,
+229,156,168,230,152,142,230,152,159,230,149,133,228,186,139,229,133,179,231,179,
+187,230,160,135,233,162,152,229,149,134,229,138,161,232,190,147,229,133,165,228,
+184,128,231,155,180,229,159,186,231,161,128,230,149,153,229,173,166,228,186,134,
+232,167,163,229,187,186,231,173,145,231,187,147,230,158,156,229,133,168,231,144,
+131,233,128,154,231,159,165,232,174,161,229,136,146,229,175,185,228,186,142,232,
+137,186,230,156,175,231,155,184,229,134,140,229,143,145,231,148,159,231,156,159,
+231,154,132,229,187,186,231,171,139,231,173,137,231,186,167,231,177,187,229,158,
+139,231,187,143,233,170,140,229,174,158,231,142,176,229,136,182,228,189,156,230,
+157,165,232,135,170,230,160,135,231,173,190,228,187,165,228,184,139,229,142,159,
+229,136,155,230,151,160,230,179,149,229,133,182,228,184,173,229,128,139,228,186,
+186,228,184,128,229,136,135,230,140,135,229,141,151,229,133,179,233,151,173,233,
+155,134,229,155,162,231,172,172,228,184,137,229,133,179,230,179,168,229,155,160,
+230,173,164,231,133,167,231,137,135,230,183,177,229,156,179,229,149,134,228,184,
+154,229,185,191,229,183,158,230,151,165,230,156,159,233,171,152,231,186,167,230,
+156,128,232,191,145,231,187,188,229,144,136,232,161,168,231,164,186,228,184,147,
+232,190,145,232,161,140,228,184,186,228,186,164,233,128,154,232,175,132,228,187,
+183,232,167,137,229,190,151,231,178,190,229,141,142,229,174,182,229,186,173,229,
+174,140,230,136,144,230,132,159,232,167,137,229,174,137,232,163,133,229,190,151,
+229,136,176,233,130,174,228,187,182,229,136,182,229,186,166,233,163,159,229,147,
+129,232,153,189,231,132,182,232,189,172,232,189,189,230,138,165,228,187,183,232,
+174,176,232,128,133,230,150,185,230,161,136,232,161,140,230,148,191,228,186,186,
+230,176,145,231,148,168,229,147,129,228,184,156,232,165,191,230,143,144,229,135,
+186,233,133,146,229,186,151,231,132,182,229,144,142,228,187,152,230,172,190,231,
+131,173,231,130,185,228,187,165,229,137,141,229,174,140,229,133,168,229,143,145,
+229,184,150,232,174,190,231,189,174,233,162,134,229,175,188,229,183,165,228,184,
+154,229,140,187,233,153,162,231,156,139,231,156,139,231,187,143,229,133,184,229,
+142,159,229,155,160,229,185,179,229,143,176,229,144,132,231,167,141,229,162,158,
+229,138,160,230,157,144,230,150,153,230,150,176,229,162,158,228,185,139,229,144,
+142,232,129,140,228,184,154,230,149,136,230,158,156,228,187,138,229,185,180,232,
+174,186,230,150,135,230,136,145,229,155,189,229,145,138,232,175,137,231,137,136,
+228,184,187,228,191,174,230,148,185,229,143,130,228,184,142,230,137,147,229,141,
+176,229,191,171,228,185,144,230,156,186,230,162,176,232,167,130,231,130,185,229,
+173,152,229,156,168,231,178,190,231,165,158,232,142,183,229,190,151,229,136,169,
+231,148,168,231,187,167,231,187,173,228,189,160,228,187,172,232,191,153,228,185,
+136,230,168,161,229,188,143,232,175,173,232,168,128,232,131,189,229,164,159,233,
+155,133,232,153,142,230,147,141,228,189,156,233,163,142,230,160,188,228,184,128,
+232,181,183,231,167,145,229,173,166,228,189,147,232,130,178,231,159,173,228,191,
+161,230,157,161,228,187,182,230,178,187,231,150,151,232,191,144,229,138,168,228,
+186,167,228,184,154,228,188,154,232,174,174,229,175,188,232,136,170,229,133,136,
+231,148,159,232,129,148,231,155,159,229,143,175,230,152,175,229,149,143,233,161,
+140,231,187,147,230,158,132,228,189,156,231,148,168,232,176,131,230,159,165,232,
+179,135,230,150,153,232,135,170,229,138,168,232,180,159,232,180,163,229,134,156,
+228,184,154,232,174,191,233,151,174,229,174,158,230,150,189,230,142,165,229,143,
+151,232,174,168,232,174,186,233,130,163,228,184,170,229,143,141,233,166,136,229,
+138,160,229,188,186,229,165,179,230,128,167,232,140,131,229,155,180,230,156,141,
+229,139,153,228,188,145,233,151,178,228,187,138,230,151,165,229,174,162,230,156,
+141,232,167,128,231,156,139,229,143,130,229,138,160,231,154,132,232,175,157,228,
+184,128,231,130,185,228,191,157,232,175,129,229,155,190,228,185,166,230,156,137,
+230,149,136,230,181,139,232,175,149,231,167,187,229,138,168,230,137,141,232,131,
+189,229,134,179,229,174,154,232,130,161,231,165,168,228,184,141,230,150,173,233,
+156,128,230,177,130,228,184,141,229,190,151,229,138,158,230,179,149,228,185,139,
+233,151,180,233,135,135,231,148,168,232,144,165,233,148,128,230,138,149,232,175,
+137,231,155,174,230,160,135,231,136,177,230,131,133,230,145,132,229,189,177,230,
+156,137,228,186,155,232,164,135,232,163,189,230,150,135,229,173,166,230,156,186,
+228,188,154,230,149,176,229,173,151,232,163,133,228,191,174,232,180,173,231,137,
+169,229,134,156,230,157,145,229,133,168,233,157,162,231,178,190,229,147,129,229,
+133,182,229,174,158,228,186,139,230,131,133,230,176,180,229,185,179,230,143,144,
+231,164,186,228,184,138,229,184,130,232,176,162,232,176,162,230,153,174,233,128,
+154,230,149,153,229,184,136,228,184,138,228,188,160,231,177,187,229,136,171,230,
+173,140,230,155,178,230,139,165,230,156,137,229,136,155,230,150,176,233,133,141,
+228,187,182,229,143,170,232,166,129,230,151,182,228,187,163,232,179,135,232,168,
+138,232,190,190,229,136,176,228,186,186,231,148,159,232,174,162,233,152,133,232,
+128,129,229,184,136,229,177,149,231,164,186,229,191,131,231,144,134,232,180,180,
+229,173,144,231,182,178,231,171,153,228,184,187,233,161,140,232,135,170,231,132,
+182,231,186,167,229,136,171,231,174,128,229,141,149,230,148,185,233,157,169,233,
+130,163,228,186,155,230,157,165,232,175,180,230,137,147,229,188,128,228,187,163,
+231,160,129,229,136,160,233,153,164,232,175,129,229,136,184,232,138,130,231,155,
+174,233,135,141,231,130,185,230,172,161,230,149,184,229,164,154,229,176,145,232,
+167,132,229,136,146,232,181,132,233,135,145,230,137,190,229,136,176,228,187,165,
+229,144,142,229,164,167,229,133,168,228,184,187,233,161,181,230,156,128,228,189,
+179,229,155,158,231,173,148,229,164,169,228,184,139,228,191,157,233,154,156,231,
+142,176,228,187,163,230,163,128,230,159,165,230,138,149,231,165,168,229,176,143,
+230,151,182,230,178,146,230,156,137,230,173,163,229,184,184,231,148,154,232,135,
+179,228,187,163,231,144,134,231,155,174,229,189,149,229,133,172,229,188,128,229,
+164,141,229,136,182,233,135,145,232,158,141,229,185,184,231,166,143,231,137,136,
+230,156,172,229,189,162,230,136,144,229,135,134,229,164,135,232,161,140,230,131,
+133,229,155,158,229,136,176,230,128,157,230,131,179,230,128,142,230,160,183,229,
+141,143,232,174,174,232,174,164,232,175,129,230,156,128,229,165,189,228,186,167,
+231,148,159,230,140,137,231,133,167,230,156,141,232,163,133,229,185,191,228,184,
+156,229,138,168,230,188,171,233,135,135,232,180,173,230,150,176,230,137,139,231,
+187,132,229,155,190,233,157,162,230,157,191,229,143,130,232,128,131,230,148,191,
+230,178,187,229,174,185,230,152,147,229,164,169,229,156,176,229,138,170,229,138,
+155,228,186,186,228,187,172,229,141,135,231,186,167,233,128,159,229,186,166,228,
+186,186,231,137,169,232,176,131,230,149,180,230,181,129,232,161,140,233,128,160,
+230,136,144,230,150,135,229,173,151,233,159,169,229,155,189,232,180,184,230,152,
+147,229,188,128,229,177,149,231,155,184,233,151,156,232,161,168,231,142,176,229,
+189,177,232,167,134,229,166,130,230,173,164,231,190,142,229,174,185,229,164,167,
+229,176,143,230,138,165,233,129,147,230,157,161,230,172,190,229,191,131,230,131,
+133,232,174,184,229,164,154,230,179,149,232,167,132,229,174,182,229,177,133,228,
+185,166,229,186,151,232,191,158,230,142,165,231,171,139,229,141,179,228,184,190,
+230,138,165,230,138,128,229,183,167,229,165,165,232,191,144,231,153,187,229,133,
+165,228,187,165,230,157,165,231,144,134,232,174,186,228,186,139,228,187,182,232,
+135,170,231,148,177,228,184,173,229,141,142,229,138,158,229,133,172,229,166,136,
+229,166,136,231,156,159,230,173,163,228,184,141,233,148,153,229,133,168,230,150,
+135,229,144,136,229,144,140,228,187,183,229,128,188,229,136,171,228,186,186,231,
+155,145,231,157,163,229,133,183,228,189,147,228,184,150,231,186,170,229,155,162,
+233,152,159,229,136,155,228,184,154,230,137,191,230,139,133,229,162,158,233,149,
+191,230,156,137,228,186,186,228,191,157,230,140,129,229,149,134,229,174,182,231,
+187,180,228,191,174,229,143,176,230,185,190,229,183,166,229,143,179,232,130,161,
+228,187,189,231,173,148,230,161,136,229,174,158,233,153,133,231,148,181,228,191,
+161,231,187,143,231,144,134,231,148,159,229,145,189,229,174,163,228,188,160,228,
+187,187,229,138,161,230,173,163,229,188,143,231,137,185,232,137,178,228,184,139,
+230,157,165,229,141,143,228,188,154,229,143,170,232,131,189,229,189,147,231,132,
+182,233,135,141,230,150,176,229,133,167,229,174,185,230,140,135,229,175,188,232,
+191,144,232,161,140,230,151,165,229,191,151,232,179,163,229,174,182,232,182,133,
+232,191,135,229,156,159,229,156,176,230,181,153,230,177,159,230,148,175,228,187,
+152,230,142,168,229,135,186,231,171,153,233,149,191,230,157,173,229,183,158,230,
+137,167,232,161,140,229,136,182,233,128,160,228,185,139,228,184,128,230,142,168,
+229,185,191,231,142,176,229,156,186,230,143,143,232,191,176,229,143,152,229,140,
+150,228,188,160,231,187,159,230,173,140,230,137,139,228,191,157,233,153,169,232,
+175,190,231,168,139,229,140,187,231,150,151,231,187,143,232,191,135,232,191,135,
+229,142,187,228,185,139,229,137,141,230,148,182,229,133,165,229,185,180,229,186,
+166,230,157,130,229,191,151,231,190,142,228,184,189,230,156,128,233,171,152,231,
+153,187,233,153,134,230,156,170,230,157,165,229,138,160,229,183,165,229,133,141,
+232,180,163,230,149,153,231,168,139,231,137,136,229,157,151,232,186,171,228,189,
+147,233,135,141,229,186,134,229,135,186,229,148,174,230,136,144,230,156,172,229,
+189,162,229,188,143,229,156,159,232,177,134,229,135,186,229,131,185,228,184,156,
+230,150,185,233,130,174,231,174,177,229,141,151,228,186,172,230,177,130,232,129,
+140,229,143,150,229,190,151,232,129,140,228,189,141,231,155,184,228,191,161,233,
+161,181,233,157,162,229,136,134,233,146,159,231,189,145,233,161,181,231,161,174,
+229,174,154,229,155,190,228,190,139,231,189,145,229,157,128,231,167,175,230,158,
+129,233,148,153,232,175,175,231,155,174,231,154,132,229,174,157,232,180,157,230,
+156,186,229,133,179,233,163,142,233,153,169,230,142,136,230,157,131,231,151,133,
+230,175,146,229,174,160,231,137,169,233,153,164,228,186,134,232,169,149,232,171,
+150,231,150,190,231,151,133,229,143,138,230,151,182,230,177,130,232,180,173,231,
+171,153,231,130,185,229,132,191,231,171,165,230,175,143,229,164,169,228,184,173,
+229,164,174,232,174,164,232,175,134,230,175,143,228,184,170,229,164,169,230,180,
+165,229,173,151,228,189,147,229,143,176,231,129,163,231,187,180,230,138,164,230,
+156,172,233,161,181,228,184,170,230,128,167,229,174,152,230,150,185,229,184,184,
+232,167,129,231,155,184,230,156,186,230,136,152,231,149,165,229,186,148,229,189,
+147,229,190,139,229,184,136,230,150,185,228,190,191,230,160,161,229,155,173,232,
+130,161,229,184,130,230,136,191,229,177,139,230,160,143,231,155,174,229,145,152,
+229,183,165,229,175,188,232,135,180,231,170,129,231,132,182,233,129,147,229,133,
+183,230,156,172,231,189,145,231,187,147,229,144,136,230,161,163,230,161,136,229,
+138,179,229,138,168,229,143,166,229,164,150,231,190,142,229,133,131,229,188,149,
+232,181,183,230,148,185,229,143,152,231,172,172,229,155,155,228,188,154,232,174,
+161,232,170,170,230,152,142,233,154,144,231,167,129,229,174,157,229,174,157,232,
+167,132,232,140,131,230,182,136,232,180,185,229,133,177,229,144,140,229,191,152,
+232,174,176,228,189,147,231,179,187,229,184,166,230,157,165,229,144,141,229,173,
+151,231,153,188,232,161,168,229,188,128,230,148,190,229,138,160,231,155,159,229,
+143,151,229,136,176,228,186,140,230,137,139,229,164,167,233,135,143,230,136,144,
+228,186,186,230,149,176,233,135,143,229,133,177,228,186,171,229,140,186,229,159,
+159,229,165,179,229,173,169,229,142,159,229,136,153,230,137,128,229,156,168,231,
+187,147,230,157,159,233,128,154,228,191,161,232,182,133,231,186,167,233,133,141,
+231,189,174,229,189,147,230,151,182,228,188,152,231,167,128,230,128,167,230,132,
+159,230,136,191,228,186,167,233,129,138,230,136,178,229,135,186,229,143,163,230,
+143,144,228,186,164,229,176,177,228,184,154,228,191,157,229,129,165,231,168,139,
+229,186,166,229,143,130,230,149,176,228,186,139,228,184,154,230,149,180,228,184,
+170,229,177,177,228,184,156,230,131,133,230,132,159,231,137,185,230,174,138,229,
+136,134,233,161,158,230,144,156,229,176,139,229,177,158,228,186,142,233,151,168,
+230,136,183,232,180,162,229,138,161,229,163,176,233,159,179,229,143,138,229,133,
+182,232,180,162,231,187,143,229,157,154,230,140,129,229,185,178,233,131,168,230,
+136,144,231,171,139,229,136,169,231,155,138,232,128,131,232,153,145,230,136,144,
+233,131,189,229,140,133,232,163,133,231,148,168,230,136,182,230,175,148,232,181,
+155,230,150,135,230,152,142,230,139,155,229,149,134,229,174,140,230,149,180,231,
+156,159,230,152,175,231,156,188,231,157,155,228,188,153,228,188,180,229,168,129,
+230,156,155,233,162,134,229,159,159,229,141,171,231,148,159,228,188,152,230,131,
+160,232,171,150,229,163,135,229,133,172,229,133,177,232,137,175,229,165,189,229,
+133,133,229,136,134,231,172,166,229,144,136,233,153,132,228,187,182,231,137,185,
+231,130,185,228,184,141,229,143,175,232,139,177,230,150,135,232,181,132,228,186,
+167,230,160,185,230,156,172,230,152,142,230,152,190,229,175,134,231,162,188,229,
+133,172,228,188,151,230,176,145,230,151,143,230,155,180,229,138,160,228,186,171,
+229,143,151,229,144,140,229,173,166,229,144,175,229,138,168,233,128,130,229,144,
+136,229,142,159,230,157,165,233,151,174,231,173,148,230,156,172,230,150,135,231,
+190,142,233,163,159,231,187,191,232,137,178,231,168,179,229,174,154,231,187,136,
+228,186,142,231,148,159,231,137,169,228,190,155,230,177,130,230,144,156,231,139,
+144,229,138,155,233,135,143,228,184,165,233,135,141,230,176,184,232,191,156,229,
+134,153,231,156,159,230,156,137,233,153,144,231,171,158,228,186,137,229,175,185,
+232,177,161,232,180,185,231,148,168,228,184,141,229,165,189,231,187,157,229,175,
+185,229,141,129,229,136,134,228,191,131,232,191,155,231,130,185,232,175,132,229,
+189,177,233,159,179,228,188,152,229,138,191,228,184,141,229,176,145,230,172,163,
+232,181,143,229,185,182,228,184,148,230,156,137,231,130,185,230,150,185,229,144,
+145,229,133,168,230,150,176,228,191,161,231,148,168,232,174,190,230,150,189,229,
+189,162,232,177,161,232,181,132,230,160,188,231,170,129,231,160,180,233,154,143,
+231,157,128,233,135,141,229,164,167,228,186,142,230,152,175,230,175,149,228,184,
+154,230,153,186,232,131,189,229,140,150,229,183,165,229,174,140,231,190,142,229,
+149,134,229,159,142,231,187,159,228,184,128,229,135,186,231,137,136,230,137,147,
+233,128,160,231,148,162,229,147,129,230,166,130,229,134,181,231,148,168,228,186,
+142,228,191,157,231,149,153,229,155,160,231,180,160,228,184,173,229,156,139,229,
+173,152,229,130,168,232,180,180,229,155,190,230,156,128,230,132,155,233,149,191,
+230,156,159,229,143,163,228,187,183,231,144,134,232,180,162,229,159,186,229,156,
+176,229,174,137,230,142,146,230,173,166,230,177,137,233,135,140,233,157,162,229,
+136,155,229,187,186,229,164,169,231,169,186,233,166,150,229,133,136,229,174,140,
+229,150,132,233,169,177,229,138,168,228,184,139,233,157,162,228,184,141,229,134,
+141,232,175,154,228,191,161,230,132,143,228,185,137,233,152,179,229,133,137,232,
+139,177,229,155,189,230,188,130,228,186,174,229,134,155,228,186,139,231,142,169,
+229,174,182,231,190,164,228,188,151,229,134,156,230,176,145,229,141,179,229,143,
+175,229,144,141,231,168,177,229,174,182,229,133,183,229,138,168,231,148,187,230,
+131,179,229,136,176,230,179,168,230,152,142,229,176,143,229,173,166,230,128,167,
+232,131,189,232,128,131,231,160,148,231,161,172,228,187,182,232,167,130,231,156,
+139,230,184,133,230,165,154,230,144,158,231,172,145,233,166,150,233,160,129,233,
+187,132,233,135,145,233,128,130,231,148,168,230,177,159,232,139,143,231,156,159,
+229,174,158,228,184,187,231,174,161,233,152,182,230,174,181,232,168,187,229,134,
+138,231,191,187,232,175,145,230,157,131,229,136,169,229,129,154,229,165,189,228,
+188,188,228,185,142,233,128,154,232,174,175,230,150,189,229,183,165,231,139,128,
+230,133,139,228,185,159,232,174,184,231,142,175,228,191,157,229,159,185,229,133,
+187,230,166,130,229,191,181,229,164,167,229,158,139,230,156,186,231,165,168,231,
+144,134,232,167,163,229,140,191,229,144,141,99,117,97,110,100,111,101,110,118,
+105,97,114,109,97,100,114,105,100,98,117,115,99,97,114,105,110,105,99,105,111,
+116,105,101,109,112,111,112,111,114,113,117,101,99,117,101,110,116,97,101,115,
+116,97,100,111,112,117,101,100,101,110,106,117,101,103,111,115,99,111,110,116,
+114,97,101,115,116,195,161,110,110,111,109,98,114,101,116,105,101,110,101,110,
+112,101,114,102,105,108,109,97,110,101,114,97,97,109,105,103,111,115,99,105,117,
+100,97,100,99,101,110,116,114,111,97,117,110,113,117,101,112,117,101,100,101,115
+,100,101,110,116,114,111,112,114,105,109,101,114,112,114,101,99,105,111,115,101,
+103,195,186,110,98,117,101,110,111,115,118,111,108,118,101,114,112,117,110,116,
+111,115,115,101,109,97,110,97,104,97,98,195,173,97,97,103,111,115,116,111,110,
+117,101,118,111,115,117,110,105,100,111,115,99,97,114,108,111,115,101,113,117,
+105,112,111,110,105,195,177,111,115,109,117,99,104,111,115,97,108,103,117,110,97
+,99,111,114,114,101,111,105,109,97,103,101,110,112,97,114,116,105,114,97,114,114
+,105,98,97,109,97,114,195,173,97,104,111,109,98,114,101,101,109,112,108,101,111,
+118,101,114,100,97,100,99,97,109,98,105,111,109,117,99,104,97,115,102,117,101,
+114,111,110,112,97,115,97,100,111,108,195,173,110,101,97,112,97,114,101,99,101,
+110,117,101,118,97,115,99,117,114,115,111,115,101,115,116,97,98,97,113,117,105,
+101,114,111,108,105,98,114,111,115,99,117,97,110,116,111,97,99,99,101,115,111,
+109,105,103,117,101,108,118,97,114,105,111,115,99,117,97,116,114,111,116,105,101
+,110,101,115,103,114,117,112,111,115,115,101,114,195,161,110,101,117,114,111,112
+,97,109,101,100,105,111,115,102,114,101,110,116,101,97,99,101,114,99,97,100,101,
+109,195,161,115,111,102,101,114,116,97,99,111,99,104,101,115,109,111,100,101,108
+,111,105,116,97,108,105,97,108,101,116,114,97,115,97,108,103,195,186,110,99,111,
+109,112,114,97,99,117,97,108,101,115,101,120,105,115,116,101,99,117,101,114,112,
+111,115,105,101,110,100,111,112,114,101,110,115,97,108,108,101,103,97,114,118,
+105,97,106,101,115,100,105,110,101,114,111,109,117,114,99,105,97,112,111,100,114
+,195,161,112,117,101,115,116,111,100,105,97,114,105,111,112,117,101,98,108,111,
+113,117,105,101,114,101,109,97,110,117,101,108,112,114,111,112,105,111,99,114,
+105,115,105,115,99,105,101,114,116,111,115,101,103,117,114,111,109,117,101,114,
+116,101,102,117,101,110,116,101,99,101,114,114,97,114,103,114,97,110,100,101,101
+,102,101,99,116,111,112,97,114,116,101,115,109,101,100,105,100,97,112,114,111,
+112,105,97,111,102,114,101,99,101,116,105,101,114,114,97,101,45,109,97,105,108,
+118,97,114,105,97,115,102,111,114,109,97,115,102,117,116,117,114,111,111,98,106,
+101,116,111,115,101,103,117,105,114,114,105,101,115,103,111,110,111,114,109,97,
+115,109,105,115,109,111,115,195,186,110,105,99,111,99,97,109,105,110,111,115,105
+,116,105,111,115,114,97,122,195,179,110,100,101,98,105,100,111,112,114,117,101,
+98,97,116,111,108,101,100,111,116,101,110,195,173,97,106,101,115,195,186,115,101
+,115,112,101,114,111,99,111,99,105,110,97,111,114,105,103,101,110,116,105,101,
+110,100,97,99,105,101,110,116,111,99,195,161,100,105,122,104,97,98,108,97,114,
+115,101,114,195,173,97,108,97,116,105,110,97,102,117,101,114,122,97,101,115,116,
+105,108,111,103,117,101,114,114,97,101,110,116,114,97,114,195,169,120,105,116,
+111,108,195,179,112,101,122,97,103,101,110,100,97,118,195,173,100,101,111,101,
+118,105,116,97,114,112,97,103,105,110,97,109,101,116,114,111,115,106,97,118,105,
+101,114,112,97,100,114,101,115,102,195,161,99,105,108,99,97,98,101,122,97,195,
+161,114,101,97,115,115,97,108,105,100,97,101,110,118,195,173,111,106,97,112,195,
+179,110,97,98,117,115,111,115,98,105,101,110,101,115,116,101,120,116,111,115,108
+,108,101,118,97,114,112,117,101,100,97,110,102,117,101,114,116,101,99,111,109,
+195,186,110,99,108,97,115,101,115,104,117,109,97,110,111,116,101,110,105,100,111
+,98,105,108,98,97,111,117,110,105,100,97,100,101,115,116,195,161,115,101,100,105
+,116,97,114,99,114,101,97,100,111,208,180,208,187,209,143,209,135,209,130,208,
+190,208,186,208,176,208,186,208,184,208,187,208,184,209,141,209,130,208,190,208,
+178,209,129,208,181,208,181,208,179,208,190,208,191,209,128,208,184,209,130,208,
+176,208,186,208,181,209,137,208,181,209,131,208,182,208,181,208,154,208,176,208,
+186,208,177,208,181,208,183,208,177,209,139,208,187,208,190,208,189,208,184,208,
+146,209,129,208,181,208,191,208,190,208,180,208,173,209,130,208,190,209,130,208,
+190,208,188,209,135,208,181,208,188,208,189,208,181,209,130,208,187,208,181,209,
+130,209,128,208,176,208,183,208,190,208,189,208,176,208,179,208,180,208,181,208,
+188,208,189,208,181,208,148,208,187,209,143,208,159,209,128,208,184,208,189,208,
+176,209,129,208,189,208,184,209,133,209,130,208,181,208,188,208,186,209,130,208,
+190,208,179,208,190,208,180,208,178,208,190,209,130,209,130,208,176,208,188,208,
+161,208,168,208,144,208,188,208,176,209,143,208,167,209,130,208,190,208,178,208,
+176,209,129,208,178,208,176,208,188,208,181,208,188,209,131,208,162,208,176,208,
+186,208,180,208,178,208,176,208,189,208,176,208,188,209,141,209,130,208,184,209,
+141,209,130,209,131,208,146,208,176,208,188,209,130,208,181,209,133,208,191,209,
+128,208,190,209,130,209,131,209,130,208,189,208,176,208,180,208,180,208,189,209,
+143,208,146,208,190,209,130,209,130,209,128,208,184,208,189,208,181,208,185,208,
+146,208,176,209,129,208,189,208,184,208,188,209,129,208,176,208,188,209,130,208,
+190,209,130,209,128,209,131,208,177,208,158,208,189,208,184,208,188,208,184,209,
+128,208,189,208,181,208,181,208,158,208,158,208,158,208,187,208,184,209,134,209,
+141,209,130,208,176,208,158,208,189,208,176,208,189,208,181,208,188,208,180,208,
+190,208,188,208,188,208,190,208,185,208,180,208,178,208,181,208,190,208,189,208,
+190,209,129,209,131,208,180,224,164,149,224,165,135,224,164,185,224,165,136,224,
+164,149,224,165,128,224,164,184,224,165,135,224,164,149,224,164,190,224,164,149,
+224,165,139,224,164,148,224,164,176,224,164,170,224,164,176,224,164,168,224,165,
+135,224,164,143,224,164,149,224,164,149,224,164,191,224,164,173,224,165,128,224,
+164,135,224,164,184,224,164,149,224,164,176,224,164,164,224,165,139,224,164,185,
+224,165,139,224,164,134,224,164,170,224,164,185,224,165,128,224,164,175,224,164,
+185,224,164,175,224,164,190,224,164,164,224,164,149,224,164,165,224,164,190,106,
+97,103,114,97,110,224,164,134,224,164,156,224,164,156,224,165,139,224,164,133,
+224,164,172,224,164,166,224,165,139,224,164,151,224,164,136,224,164,156,224,164,
+190,224,164,151,224,164,143,224,164,185,224,164,174,224,164,135,224,164,168,224,
+164,181,224,164,185,224,164,175,224,165,135,224,164,165,224,165,135,224,164,165,
+224,165,128,224,164,152,224,164,176,224,164,156,224,164,172,224,164,166,224,165,
+128,224,164,149,224,164,136,224,164,156,224,165,128,224,164,181,224,165,135,224,
+164,168,224,164,136,224,164,168,224,164,143,224,164,185,224,164,176,224,164,137,
+224,164,184,224,164,174,224,165,135,224,164,149,224,164,174,224,164,181,224,165,
+139,224,164,178,224,165,135,224,164,184,224,164,172,224,164,174,224,164,136,224,
+164,166,224,165,135,224,164,147,224,164,176,224,164,134,224,164,174,224,164,172,
+224,164,184,224,164,173,224,164,176,224,164,172,224,164,168,224,164,154,224,164,
+178,224,164,174,224,164,168,224,164,134,224,164,151,224,164,184,224,165,128,224,
+164,178,224,165,128,216,185,217,132,217,137,216,165,217,132,217,137,217,135,216,
+176,216,167,216,162,216,174,216,177,216,185,216,175,216,175,216,167,217,132,217,
+137,217,135,216,176,217,135,216,181,217,136,216,177,216,186,217,138,216,177,217,
+131,216,167,217,134,217,136,217,132,216,167,216,168,217,138,217,134,216,185,216,
+177,216,182,216,176,217,132,217,131,217,135,217,134,216,167,217,138,217,136,217,
+133,217,130,216,167,217,132,216,185,217,132,217,138,216,167,217,134,216,167,217,
+132,217,131,217,134,216,173,216,170,217,137,217,130,216,168,217,132,217,136,216,
+173,216,169,216,167,216,174,216,177,217,129,217,130,216,183,216,185,216,168,216,
+175,216,177,217,131,217,134,216,165,216,176,216,167,217,131,217,133,216,167,216,
+167,216,173,216,175,216,165,217,132,216,167,217,129,217,138,217,135,216,168,216,
+185,216,182,217,131,217,138,217,129,216,168,216,173,216,171,217,136,217,133,217,
+134,217,136,217,135,217,136,216,163,217,134,216,167,216,172,216,175,216,167,217,
+132,217,135,216,167,216,179,217,132,217,133,216,185,217,134,216,175,217,132,217,
+138,216,179,216,185,216,168,216,177,216,181,217,132,217,137,217,133,217,134,216,
+176,216,168,217,135,216,167,216,163,217,134,217,135,217,133,216,171,217,132,217,
+131,217,134,216,170,216,167,217,132,216,167,216,173,217,138,216,171,217,133,216,
+181,216,177,216,180,216,177,216,173,216,173,217,136,217,132,217,136,217,129,217,
+138,216,167,216,176,216,167,217,132,217,131,217,132,217,133,216,177,216,169,216,
+167,217,134,216,170,216,167,217,132,217,129,216,163,216,168,217,136,216,174,216,
+167,216,181,216,163,217,134,216,170,216,167,217,134,217,135,216,167,217,132,217,
+138,216,185,216,182,217,136,217,136,217,130,216,175,216,167,216,168,217,134,216,
+174,217,138,216,177,216,168,217,134,216,170,217,132,217,131,217,133,216,180,216,
+167,216,161,217,136,217,135,217,138,216,167,216,168,217,136,217,130,216,181,216,
+181,217,136,217,133,216,167,216,177,217,130,217,133,216,163,216,173,216,175,217,
+134,216,173,217,134,216,185,216,175,217,133,216,177,216,163,217,138,216,167,216,
+173,216,169,217,131,216,170,216,168,216,175,217,136,217,134,217,138,216,172,216,
+168,217,133,217,134,217,135,216,170,216,173,216,170,216,172,217,135,216,169,216,
+179,217,134,216,169,217,138,216,170,217,133,217,131,216,177,216,169,216,186,216,
+178,216,169,217,134,217,129,216,179,216,168,217,138,216,170,217,132,217,132,217,
+135,217,132,217,134,216,167,216,170,217,132,217,131,217,130,217,132,216,168,217,
+132,217,133,216,167,216,185,217,134,217,135,216,163,217,136,217,132,216,180,217,
+138,216,161,217,134,217,136,216,177,216,163,217,133,216,167,217,129,217,138,217,
+131,216,168,217,131,217,132,216,176,216,167,216,170,216,177,216,170,216,168,216,
+168,216,163,217,134,217,135,217,133,216,179,216,167,217,134,217,131,216,168,217,
+138,216,185,217,129,217,130,216,175,216,173,216,179,217,134,217,132,217,135,217,
+133,216,180,216,185,216,177,216,163,217,135,217,132,216,180,217,135,216,177,217,
+130,216,183,216,177,216,183,217,132,216,168,112,114,111,102,105,108,101,115,101,
+114,118,105,99,101,100,101,102,97,117,108,116,104,105,109,115,101,108,102,100,
+101,116,97,105,108,115,99,111,110,116,101,110,116,115,117,112,112,111,114,116,
+115,116,97,114,116,101,100,109,101,115,115,97,103,101,115,117,99,99,101,115,115,
+102,97,115,104,105,111,110,60,116,105,116,108,101,62,99,111,117,110,116,114,121,
+97,99,99,111,117,110,116,99,114,101,97,116,101,100,115,116,111,114,105,101,115,
+114,101,115,117,108,116,115,114,117,110,110,105,110,103,112,114,111,99,101,115,
+115,119,114,105,116,105,110,103,111,98,106,101,99,116,115,118,105,115,105,98,108
+,101,119,101,108,99,111,109,101,97,114,116,105,99,108,101,117,110,107,110,111,
+119,110,110,101,116,119,111,114,107,99,111,109,112,97,110,121,100,121,110,97,109
+,105,99,98,114,111,119,115,101,114,112,114,105,118,97,99,121,112,114,111,98,108,
+101,109,83,101,114,118,105,99,101,114,101,115,112,101,99,116,100,105,115,112,108
+,97,121,114,101,113,117,101,115,116,114,101,115,101,114,118,101,119,101,98,115,
+105,116,101,104,105,115,116,111,114,121,102,114,105,101,110,100,115,111,112,116,
+105,111,110,115,119,111,114,107,105,110,103,118,101,114,115,105,111,110,109,105,
+108,108,105,111,110,99,104,97,110,110,101,108,119,105,110,100,111,119,46,97,100,
+100,114,101,115,115,118,105,115,105,116,101,100,119,101,97,116,104,101,114,99,
+111,114,114,101,99,116,112,114,111,100,117,99,116,101,100,105,114,101,99,116,102
+,111,114,119,97,114,100,121,111,117,32,99,97,110,114,101,109,111,118,101,100,115
+,117,98,106,101,99,116,99,111,110,116,114,111,108,97,114,99,104,105,118,101,99,
+117,114,114,101,110,116,114,101,97,100,105,110,103,108,105,98,114,97,114,121,108
+,105,109,105,116,101,100,109,97,110,97,103,101,114,102,117,114,116,104,101,114,
+115,117,109,109,97,114,121,109,97,99,104,105,110,101,109,105,110,117,116,101,115
+,112,114,105,118,97,116,101,99,111,110,116,101,120,116,112,114,111,103,114,97,
+109,115,111,99,105,101,116,121,110,117,109,98,101,114,115,119,114,105,116,116,
+101,110,101,110,97,98,108,101,100,116,114,105,103,103,101,114,115,111,117,114,99
+,101,115,108,111,97,100,105,110,103,101,108,101,109,101,110,116,112,97,114,116,
+110,101,114,102,105,110,97,108,108,121,112,101,114,102,101,99,116,109,101,97,110
+,105,110,103,115,121,115,116,101,109,115,107,101,101,112,105,110,103,99,117,108,
+116,117,114,101,38,113,117,111,116,59,44,106,111,117,114,110,97,108,112,114,111,
+106,101,99,116,115,117,114,102,97,99,101,115,38,113,117,111,116,59,101,120,112,
+105,114,101,115,114,101,118,105,101,119,115,98,97,108,97,110,99,101,69,110,103,
+108,105,115,104,67,111,110,116,101,110,116,116,104,114,111,117,103,104,80,108,
+101,97,115,101,32,111,112,105,110,105,111,110,99,111,110,116,97,99,116,97,118,
+101,114,97,103,101,112,114,105,109,97,114,121,118,105,108,108,97,103,101,83,112,
+97,110,105,115,104,103,97,108,108,101,114,121,100,101,99,108,105,110,101,109,101
+,101,116,105,110,103,109,105,115,115,105,111,110,112,111,112,117,108,97,114,113,
+117,97,108,105,116,121,109,101,97,115,117,114,101,103,101,110,101,114,97,108,115
+,112,101,99,105,101,115,115,101,115,115,105,111,110,115,101,99,116,105,111,110,
+119,114,105,116,101,114,115,99,111,117,110,116,101,114,105,110,105,116,105,97,
+108,114,101,112,111,114,116,115,102,105,103,117,114,101,115,109,101,109,98,101,
+114,115,104,111,108,100,105,110,103,100,105,115,112,117,116,101,101,97,114,108,
+105,101,114,101,120,112,114,101,115,115,100,105,103,105,116,97,108,112,105,99,
+116,117,114,101,65,110,111,116,104,101,114,109,97,114,114,105,101,100,116,114,97
+,102,102,105,99,108,101,97,100,105,110,103,99,104,97,110,103,101,100,99,101,110,
+116,114,97,108,118,105,99,116,111,114,121,105,109,97,103,101,115,47,114,101,97,
+115,111,110,115,115,116,117,100,105,101,115,102,101,97,116,117,114,101,108,105,
+115,116,105,110,103,109,117,115,116,32,98,101,115,99,104,111,111,108,115,86,101,
+114,115,105,111,110,117,115,117,97,108,108,121,101,112,105,115,111,100,101,112,
+108,97,121,105,110,103,103,114,111,119,105,110,103,111,98,118,105,111,117,115,
+111,118,101,114,108,97,121,112,114,101,115,101,110,116,97,99,116,105,111,110,115
+,60,47,117,108,62,13,10,119,114,97,112,112,101,114,97,108,114,101,97,100,121,99,
+101,114,116,97,105,110,114,101,97,108,105,116,121,115,116,111,114,97,103,101,97,
+110,111,116,104,101,114,100,101,115,107,116,111,112,111,102,102,101,114,101,100,
+112,97,116,116,101,114,110,117,110,117,115,117,97,108,68,105,103,105,116,97,108,
+99,97,112,105,116,97,108,87,101,98,115,105,116,101,102,97,105,108,117,114,101,99
+,111,110,110,101,99,116,114,101,100,117,99,101,100,65,110,100,114,111,105,100,
+100,101,99,97,100,101,115,114,101,103,117,108,97,114,32,38,97,109,112,59,32,97,
+110,105,109,97,108,115,114,101,108,101,97,115,101,65,117,116,111,109,97,116,103,
+101,116,116,105,110,103,109,101,116,104,111,100,115,110,111,116,104,105,110,103,
+80,111,112,117,108,97,114,99,97,112,116,105,111,110,108,101,116,116,101,114,115,
+99,97,112,116,117,114,101,115,99,105,101,110,99,101,108,105,99,101,110,115,101,
+99,104,97,110,103,101,115,69,110,103,108,97,110,100,61,49,38,97,109,112,59,72,
+105,115,116,111,114,121,32,61,32,110,101,119,32,67,101,110,116,114,97,108,117,
+112,100,97,116,101,100,83,112,101,99,105,97,108,78,101,116,119,111,114,107,114,
+101,113,117,105,114,101,99,111,109,109,101,110,116,119,97,114,110,105,110,103,67
+,111,108,108,101,103,101,116,111,111,108,98,97,114,114,101,109,97,105,110,115,98
+,101,99,97,117,115,101,101,108,101,99,116,101,100,68,101,117,116,115,99,104,102,
+105,110,97,110,99,101,119,111,114,107,101,114,115,113,117,105,99,107,108,121,98,
+101,116,119,101,101,110,101,120,97,99,116,108,121,115,101,116,116,105,110,103,
+100,105,115,101,97,115,101,83,111,99,105,101,116,121,119,101,97,112,111,110,115,
+101,120,104,105,98,105,116,38,108,116,59,33,45,45,67,111,110,116,114,111,108,99,
+108,97,115,115,101,115,99,111,118,101,114,101,100,111,117,116,108,105,110,101,97
+,116,116,97,99,107,115,100,101,118,105,99,101,115,40,119,105,110,100,111,119,112
+,117,114,112,111,115,101,116,105,116,108,101,61,34,77,111,98,105,108,101,32,107,
+105,108,108,105,110,103,115,104,111,119,105,110,103,73,116,97,108,105,97,110,100
+,114,111,112,112,101,100,104,101,97,118,105,108,121,101,102,102,101,99,116,115,
+45,49,39,93,41,59,10,99,111,110,102,105,114,109,67,117,114,114,101,110,116,97,
+100,118,97,110,99,101,115,104,97,114,105,110,103,111,112,101,110,105,110,103,100
+,114,97,119,105,110,103,98,105,108,108,105,111,110,111,114,100,101,114,101,100,
+71,101,114,109,97,110,121,114,101,108,97,116,101,100,60,47,102,111,114,109,62,
+105,110,99,108,117,100,101,119,104,101,116,104,101,114,100,101,102,105,110,101,
+100,83,99,105,101,110,99,101,99,97,116,97,108,111,103,65,114,116,105,99,108,101,
+98,117,116,116,111,110,115,108,97,114,103,101,115,116,117,110,105,102,111,114,
+109,106,111,117,114,110,101,121,115,105,100,101,98,97,114,67,104,105,99,97,103,
+111,104,111,108,105,100,97,121,71,101,110,101,114,97,108,112,97,115,115,97,103,
+101,44,38,113,117,111,116,59,97,110,105,109,97,116,101,102,101,101,108,105,110,
+103,97,114,114,105,118,101,100,112,97,115,115,105,110,103,110,97,116,117,114,97,
+108,114,111,117,103,104,108,121,46,10,10,84,104,101,32,98,117,116,32,110,111,116
+,100,101,110,115,105,116,121,66,114,105,116,97,105,110,67,104,105,110,101,115,
+101,108,97,99,107,32,111,102,116,114,105,98,117,116,101,73,114,101,108,97,110,
+100,34,32,100,97,116,97,45,102,97,99,116,111,114,115,114,101,99,101,105,118,101,
+116,104,97,116,32,105,115,76,105,98,114,97,114,121,104,117,115,98,97,110,100,105
+,110,32,102,97,99,116,97,102,102,97,105,114,115,67,104,97,114,108,101,115,114,97
+,100,105,99,97,108,98,114,111,117,103,104,116,102,105,110,100,105,110,103,108,97
+,110,100,105,110,103,58,108,97,110,103,61,34,114,101,116,117,114,110,32,108,101,
+97,100,101,114,115,112,108,97,110,110,101,100,112,114,101,109,105,117,109,112,97
+,99,107,97,103,101,65,109,101,114,105,99,97,69,100,105,116,105,111,110,93,38,113
+,117,111,116,59,77,101,115,115,97,103,101,110,101,101,100,32,116,111,118,97,108,
+117,101,61,34,99,111,109,112,108,101,120,108,111,111,107,105,110,103,115,116,97,
+116,105,111,110,98,101,108,105,101,118,101,115,109,97,108,108,101,114,45,109,111
+,98,105,108,101,114,101,99,111,114,100,115,119,97,110,116,32,116,111,107,105,110
+,100,32,111,102,70,105,114,101,102,111,120,121,111,117,32,97,114,101,115,105,109
+,105,108,97,114,115,116,117,100,105,101,100,109,97,120,105,109,117,109,104,101,
+97,100,105,110,103,114,97,112,105,100,108,121,99,108,105,109,97,116,101,107,105,
+110,103,100,111,109,101,109,101,114,103,101,100,97,109,111,117,110,116,115,102,
+111,117,110,100,101,100,112,105,111,110,101,101,114,102,111,114,109,117,108,97,
+100,121,110,97,115,116,121,104,111,119,32,116,111,32,83,117,112,112,111,114,116,
+114,101,118,101,110,117,101,101,99,111,110,111,109,121,82,101,115,117,108,116,
+115,98,114,111,116,104,101,114,115,111,108,100,105,101,114,108,97,114,103,101,
+108,121,99,97,108,108,105,110,103,46,38,113,117,111,116,59,65,99,99,111,117,110,
+116,69,100,119,97,114,100,32,115,101,103,109,101,110,116,82,111,98,101,114,116,
+32,101,102,102,111,114,116,115,80,97,99,105,102,105,99,108,101,97,114,110,101,
+100,117,112,32,119,105,116,104,104,101,105,103,104,116,58,119,101,32,104,97,118,
+101,65,110,103,101,108,101,115,110,97,116,105,111,110,115,95,115,101,97,114,99,
+104,97,112,112,108,105,101,100,97,99,113,117,105,114,101,109,97,115,115,105,118,
+101,103,114,97,110,116,101,100,58,32,102,97,108,115,101,116,114,101,97,116,101,
+100,98,105,103,103,101,115,116,98,101,110,101,102,105,116,100,114,105,118,105,
+110,103,83,116,117,100,105,101,115,109,105,110,105,109,117,109,112,101,114,104,
+97,112,115,109,111,114,110,105,110,103,115,101,108,108,105,110,103,105,115,32,
+117,115,101,100,114,101,118,101,114,115,101,118,97,114,105,97,110,116,32,114,111
+,108,101,61,34,109,105,115,115,105,110,103,97,99,104,105,101,118,101,112,114,111
+,109,111,116,101,115,116,117,100,101,110,116,115,111,109,101,111,110,101,101,120
+,116,114,101,109,101,114,101,115,116,111,114,101,98,111,116,116,111,109,58,101,
+118,111,108,118,101,100,97,108,108,32,116,104,101,115,105,116,101,109,97,112,101
+,110,103,108,105,115,104,119,97,121,32,116,111,32,32,65,117,103,117,115,116,115,
+121,109,98,111,108,115,67,111,109,112,97,110,121,109,97,116,116,101,114,115,109,
+117,115,105,99,97,108,97,103,97,105,110,115,116,115,101,114,118,105,110,103,125,
+41,40,41,59,13,10,112,97,121,109,101,110,116,116,114,111,117,98,108,101,99,111,
+110,99,101,112,116,99,111,109,112,97,114,101,112,97,114,101,110,116,115,112,108,
+97,121,101,114,115,114,101,103,105,111,110,115,109,111,110,105,116,111,114,32,39
+,39,84,104,101,32,119,105,110,110,105,110,103,101,120,112,108,111,114,101,97,100
+,97,112,116,101,100,71,97,108,108,101,114,121,112,114,111,100,117,99,101,97,98,
+105,108,105,116,121,101,110,104,97,110,99,101,99,97,114,101,101,114,115,41,46,32
+,84,104,101,32,99,111,108,108,101,99,116,83,101,97,114,99,104,32,97,110,99,105,
+101,110,116,101,120,105,115,116,101,100,102,111,111,116,101,114,32,104,97,110,
+100,108,101,114,112,114,105,110,116,101,100,99,111,110,115,111,108,101,69,97,115
+,116,101,114,110,101,120,112,111,114,116,115,119,105,110,100,111,119,115,67,104,
+97,110,110,101,108,105,108,108,101,103,97,108,110,101,117,116,114,97,108,115,117
+,103,103,101,115,116,95,104,101,97,100,101,114,115,105,103,110,105,110,103,46,
+104,116,109,108,34,62,115,101,116,116,108,101,100,119,101,115,116,101,114,110,99
+,97,117,115,105,110,103,45,119,101,98,107,105,116,99,108,97,105,109,101,100,74,
+117,115,116,105,99,101,99,104,97,112,116,101,114,118,105,99,116,105,109,115,84,
+104,111,109,97,115,32,109,111,122,105,108,108,97,112,114,111,109,105,115,101,112
+,97,114,116,105,101,115,101,100,105,116,105,111,110,111,117,116,115,105,100,101,
+58,102,97,108,115,101,44,104,117,110,100,114,101,100,79,108,121,109,112,105,99,
+95,98,117,116,116,111,110,97,117,116,104,111,114,115,114,101,97,99,104,101,100,
+99,104,114,111,110,105,99,100,101,109,97,110,100,115,115,101,99,111,110,100,115,
+112,114,111,116,101,99,116,97,100,111,112,116,101,100,112,114,101,112,97,114,101
+,110,101,105,116,104,101,114,103,114,101,97,116,108,121,103,114,101,97,116,101,
+114,111,118,101,114,97,108,108,105,109,112,114,111,118,101,99,111,109,109,97,110
+,100,115,112,101,99,105,97,108,115,101,97,114,99,104,46,119,111,114,115,104,105,
+112,102,117,110,100,105,110,103,116,104,111,117,103,104,116,104,105,103,104,101,
+115,116,105,110,115,116,101,97,100,117,116,105,108,105,116,121,113,117,97,114,
+116,101,114,67,117,108,116,117,114,101,116,101,115,116,105,110,103,99,108,101,97
+,114,108,121,101,120,112,111,115,101,100,66,114,111,119,115,101,114,108,105,98,
+101,114,97,108,125,32,99,97,116,99,104,80,114,111,106,101,99,116,101,120,97,109,
+112,108,101,104,105,100,101,40,41,59,70,108,111,114,105,100,97,97,110,115,119,
+101,114,115,97,108,108,111,119,101,100,69,109,112,101,114,111,114,100,101,102,
+101,110,115,101,115,101,114,105,111,117,115,102,114,101,101,100,111,109,83,101,
+118,101,114,97,108,45,98,117,116,116,111,110,70,117,114,116,104,101,114,111,117,
+116,32,111,102,32,33,61,32,110,117,108,108,116,114,97,105,110,101,100,68,101,110
+,109,97,114,107,118,111,105,100,40,48,41,47,97,108,108,46,106,115,112,114,101,
+118,101,110,116,82,101,113,117,101,115,116,83,116,101,112,104,101,110,10,10,87,
+104,101,110,32,111,98,115,101,114,118,101,60,47,104,50,62,13,10,77,111,100,101,
+114,110,32,112,114,111,118,105,100,101,34,32,97,108,116,61,34,98,111,114,100,101
+,114,115,46,10,10,70,111,114,32,10,10,77,97,110,121,32,97,114,116,105,115,116,
+115,112,111,119,101,114,101,100,112,101,114,102,111,114,109,102,105,99,116,105,
+111,110,116,121,112,101,32,111,102,109,101,100,105,99,97,108,116,105,99,107,101,
+116,115,111,112,112,111,115,101,100,67,111,117,110,99,105,108,119,105,116,110,
+101,115,115,106,117,115,116,105,99,101,71,101,111,114,103,101,32,66,101,108,103,
+105,117,109,46,46,46,60,47,97,62,116,119,105,116,116,101,114,110,111,116,97,98,
+108,121,119,97,105,116,105,110,103,119,97,114,102,97,114,101,32,79,116,104,101,
+114,32,114,97,110,107,105,110,103,112,104,114,97,115,101,115,109,101,110,116,105
+,111,110,115,117,114,118,105,118,101,115,99,104,111,108,97,114,60,47,112,62,13,
+10,32,67,111,117,110,116,114,121,105,103,110,111,114,101,100,108,111,115,115,32,
+111,102,106,117,115,116,32,97,115,71,101,111,114,103,105,97,115,116,114,97,110,
+103,101,60,104,101,97,100,62,60,115,116,111,112,112,101,100,49,39,93,41,59,13,10
+,105,115,108,97,110,100,115,110,111,116,97,98,108,101,98,111,114,100,101,114,58,
+108,105,115,116,32,111,102,99,97,114,114,105,101,100,49,48,48,44,48,48,48,60,47,
+104,51,62,10,32,115,101,118,101,114,97,108,98,101,99,111,109,101,115,115,101,108
+,101,99,116,32,119,101,100,100,105,110,103,48,48,46,104,116,109,108,109,111,110,
+97,114,99,104,111,102,102,32,116,104,101,116,101,97,99,104,101,114,104,105,103,
+104,108,121,32,98,105,111,108,111,103,121,108,105,102,101,32,111,102,111,114,32,
+101,118,101,110,114,105,115,101,32,111,102,38,114,97,113,117,111,59,112,108,117,
+115,111,110,101,104,117,110,116,105,110,103,40,116,104,111,117,103,104,68,111,
+117,103,108,97,115,106,111,105,110,105,110,103,99,105,114,99,108,101,115,70,111,
+114,32,116,104,101,65,110,99,105,101,110,116,86,105,101,116,110,97,109,118,101,
+104,105,99,108,101,115,117,99,104,32,97,115,99,114,121,115,116,97,108,118,97,108
+,117,101,32,61,87,105,110,100,111,119,115,101,110,106,111,121,101,100,97,32,115,
+109,97,108,108,97,115,115,117,109,101,100,60,97,32,105,100,61,34,102,111,114,101
+,105,103,110,32,65,108,108,32,114,105,104,111,119,32,116,104,101,68,105,115,112,
+108,97,121,114,101,116,105,114,101,100,104,111,119,101,118,101,114,104,105,100,
+100,101,110,59,98,97,116,116,108,101,115,115,101,101,107,105,110,103,99,97,98,
+105,110,101,116,119,97,115,32,110,111,116,108,111,111,107,32,97,116,99,111,110,
+100,117,99,116,103,101,116,32,116,104,101,74,97,110,117,97,114,121,104,97,112,
+112,101,110,115,116,117,114,110,105,110,103,97,58,104,111,118,101,114,79,110,108
+,105,110,101,32,70,114,101,110,99,104,32,108,97,99,107,105,110,103,116,121,112,
+105,99,97,108,101,120,116,114,97,99,116,101,110,101,109,105,101,115,101,118,101,
+110,32,105,102,103,101,110,101,114,97,116,100,101,99,105,100,101,100,97,114,101,
+32,110,111,116,47,115,101,97,114,99,104,98,101,108,105,101,102,115,45,105,109,97
+,103,101,58,108,111,99,97,116,101,100,115,116,97,116,105,99,46,108,111,103,105,
+110,34,62,99,111,110,118,101,114,116,118,105,111,108,101,110,116,101,110,116,101
+,114,101,100,102,105,114,115,116,34,62,99,105,114,99,117,105,116,70,105,110,108,
+97,110,100,99,104,101,109,105,115,116,115,104,101,32,119,97,115,49,48,112,120,59
+,34,62,97,115,32,115,117,99,104,100,105,118,105,100,101,100,60,47,115,112,97,110
+,62,119,105,108,108,32,98,101,108,105,110,101,32,111,102,97,32,103,114,101,97,
+116,109,121,115,116,101,114,121,47,105,110,100,101,120,46,102,97,108,108,105,110
+,103,100,117,101,32,116,111,32,114,97,105,108,119,97,121,99,111,108,108,101,103,
+101,109,111,110,115,116,101,114,100,101,115,99,101,110,116,105,116,32,119,105,
+116,104,110,117,99,108,101,97,114,74,101,119,105,115,104,32,112,114,111,116,101,
+115,116,66,114,105,116,105,115,104,102,108,111,119,101,114,115,112,114,101,100,
+105,99,116,114,101,102,111,114,109,115,98,117,116,116,111,110,32,119,104,111,32,
+119,97,115,108,101,99,116,117,114,101,105,110,115,116,97,110,116,115,117,105,99,
+105,100,101,103,101,110,101,114,105,99,112,101,114,105,111,100,115,109,97,114,
+107,101,116,115,83,111,99,105,97,108,32,102,105,115,104,105,110,103,99,111,109,
+98,105,110,101,103,114,97,112,104,105,99,119,105,110,110,101,114,115,60,98,114,
+32,47,62,60,98,121,32,116,104,101,32,78,97,116,117,114,97,108,80,114,105,118,97,
+99,121,99,111,111,107,105,101,115,111,117,116,99,111,109,101,114,101,115,111,108
+,118,101,83,119,101,100,105,115,104,98,114,105,101,102,108,121,80,101,114,115,
+105,97,110,115,111,32,109,117,99,104,67,101,110,116,117,114,121,100,101,112,105,
+99,116,115,99,111,108,117,109,110,115,104,111,117,115,105,110,103,115,99,114,105
+,112,116,115,110,101,120,116,32,116,111,98,101,97,114,105,110,103,109,97,112,112
+,105,110,103,114,101,118,105,115,101,100,106,81,117,101,114,121,40,45,119,105,
+100,116,104,58,116,105,116,108,101,34,62,116,111,111,108,116,105,112,83,101,99,
+116,105,111,110,100,101,115,105,103,110,115,84,117,114,107,105,115,104,121,111,
+117,110,103,101,114,46,109,97,116,99,104,40,125,41,40,41,59,10,10,98,117,114,110
+,105,110,103,111,112,101,114,97,116,101,100,101,103,114,101,101,115,115,111,117,
+114,99,101,61,82,105,99,104,97,114,100,99,108,111,115,101,108,121,112,108,97,115
+,116,105,99,101,110,116,114,105,101,115,60,47,116,114,62,13,10,99,111,108,111,
+114,58,35,117,108,32,105,100,61,34,112,111,115,115,101,115,115,114,111,108,108,
+105,110,103,112,104,121,115,105,99,115,102,97,105,108,105,110,103,101,120,101,99
+,117,116,101,99,111,110,116,101,115,116,108,105,110,107,32,116,111,68,101,102,97
+,117,108,116,60,98,114,32,47,62,10,58,32,116,114,117,101,44,99,104,97,114,116,
+101,114,116,111,117,114,105,115,109,99,108,97,115,115,105,99,112,114,111,99,101,
+101,100,101,120,112,108,97,105,110,60,47,104,49,62,13,10,111,110,108,105,110,101
+,46,63,120,109,108,32,118,101,104,101,108,112,105,110,103,100,105,97,109,111,110
+,100,117,115,101,32,116,104,101,97,105,114,108,105,110,101,101,110,100,32,45,45,
+62,41,46,97,116,116,114,40,114,101,97,100,101,114,115,104,111,115,116,105,110,
+103,35,102,102,102,102,102,102,114,101,97,108,105,122,101,86,105,110,99,101,110,
+116,115,105,103,110,97,108,115,32,115,114,99,61,34,47,80,114,111,100,117,99,116,
+100,101,115,112,105,116,101,100,105,118,101,114,115,101,116,101,108,108,105,110,
+103,80,117,98,108,105,99,32,104,101,108,100,32,105,110,74,111,115,101,112,104,32
+,116,104,101,97,116,114,101,97,102,102,101,99,116,115,60,115,116,121,108,101,62,
+97,32,108,97,114,103,101,100,111,101,115,110,39,116,108,97,116,101,114,44,32,69,
+108,101,109,101,110,116,102,97,118,105,99,111,110,99,114,101,97,116,111,114,72,
+117,110,103,97,114,121,65,105,114,112,111,114,116,115,101,101,32,116,104,101,115
+,111,32,116,104,97,116,77,105,99,104,97,101,108,83,121,115,116,101,109,115,80,
+114,111,103,114,97,109,115,44,32,97,110,100,32,32,119,105,100,116,104,61,101,38,
+113,117,111,116,59,116,114,97,100,105,110,103,108,101,102,116,34,62,10,112,101,
+114,115,111,110,115,71,111,108,100,101,110,32,65,102,102,97,105,114,115,103,114,
+97,109,109,97,114,102,111,114,109,105,110,103,100,101,115,116,114,111,121,105,
+100,101,97,32,111,102,99,97,115,101,32,111,102,111,108,100,101,115,116,32,116,
+104,105,115,32,105,115,46,115,114,99,32,61,32,99,97,114,116,111,111,110,114,101,
+103,105,115,116,114,67,111,109,109,111,110,115,77,117,115,108,105,109,115,87,104
+,97,116,32,105,115,105,110,32,109,97,110,121,109,97,114,107,105,110,103,114,101,
+118,101,97,108,115,73,110,100,101,101,100,44,101,113,117,97,108,108,121,47,115,
+104,111,119,95,97,111,117,116,100,111,111,114,101,115,99,97,112,101,40,65,117,
+115,116,114,105,97,103,101,110,101,116,105,99,115,121,115,116,101,109,44,73,110,
+32,116,104,101,32,115,105,116,116,105,110,103,72,101,32,97,108,115,111,73,115,
+108,97,110,100,115,65,99,97,100,101,109,121,10,9,9,60,33,45,45,68,97,110,105,101
+,108,32,98,105,110,100,105,110,103,98,108,111,99,107,34,62,105,109,112,111,115,
+101,100,117,116,105,108,105,122,101,65,98,114,97,104,97,109,40,101,120,99,101,
+112,116,123,119,105,100,116,104,58,112,117,116,116,105,110,103,41,46,104,116,109
+,108,40,124,124,32,91,93,59,10,68,65,84,65,91,32,42,107,105,116,99,104,101,110,
+109,111,117,110,116,101,100,97,99,116,117,97,108,32,100,105,97,108,101,99,116,
+109,97,105,110,108,121,32,95,98,108,97,110,107,39,105,110,115,116,97,108,108,101
+,120,112,101,114,116,115,105,102,40,116,121,112,101,73,116,32,97,108,115,111,38,
+99,111,112,121,59,32,34,62,84,101,114,109,115,98,111,114,110,32,105,110,79,112,
+116,105,111,110,115,101,97,115,116,101,114,110,116,97,108,107,105,110,103,99,111
+,110,99,101,114,110,103,97,105,110,101,100,32,111,110,103,111,105,110,103,106,
+117,115,116,105,102,121,99,114,105,116,105,99,115,102,97,99,116,111,114,121,105,
+116,115,32,111,119,110,97,115,115,97,117,108,116,105,110,118,105,116,101,100,108
+,97,115,116,105,110,103,104,105,115,32,111,119,110,104,114,101,102,61,34,47,34,
+32,114,101,108,61,34,100,101,118,101,108,111,112,99,111,110,99,101,114,116,100,
+105,97,103,114,97,109,100,111,108,108,97,114,115,99,108,117,115,116,101,114,112,
+104,112,63,105,100,61,97,108,99,111,104,111,108,41,59,125,41,40,41,59,117,115,
+105,110,103,32,97,62,60,115,112,97,110,62,118,101,115,115,101,108,115,114,101,
+118,105,118,97,108,65,100,100,114,101,115,115,97,109,97,116,101,117,114,97,110,
+100,114,111,105,100,97,108,108,101,103,101,100,105,108,108,110,101,115,115,119,
+97,108,107,105,110,103,99,101,110,116,101,114,115,113,117,97,108,105,102,121,109
+,97,116,99,104,101,115,117,110,105,102,105,101,100,101,120,116,105,110,99,116,68
+,101,102,101,110,115,101,100,105,101,100,32,105,110,10,9,60,33,45,45,32,99,117,
+115,116,111,109,115,108,105,110,107,105,110,103,76,105,116,116,108,101,32,66,111
+,111,107,32,111,102,101,118,101,110,105,110,103,109,105,110,46,106,115,63,97,114
+,101,32,116,104,101,107,111,110,116,97,107,116,116,111,100,97,121,39,115,46,104,
+116,109,108,34,32,116,97,114,103,101,116,61,119,101,97,114,105,110,103,65,108,
+108,32,82,105,103,59,10,125,41,40,41,59,114,97,105,115,105,110,103,32,65,108,115
+,111,44,32,99,114,117,99,105,97,108,97,98,111,117,116,34,62,100,101,99,108,97,
+114,101,45,45,62,10,60,115,99,102,105,114,101,102,111,120,97,115,32,109,117,99,
+104,97,112,112,108,105,101,115,105,110,100,101,120,44,32,115,44,32,98,117,116,32
+,116,121,112,101,32,61,32,10,13,10,60,33,45,45,116,111,119,97,114,100,115,82,101
+,99,111,114,100,115,80,114,105,118,97,116,101,70,111,114,101,105,103,110,80,114,
+101,109,105,101,114,99,104,111,105,99,101,115,86,105,114,116,117,97,108,114,101,
+116,117,114,110,115,67,111,109,109,101,110,116,80,111,119,101,114,101,100,105,
+110,108,105,110,101,59,112,111,118,101,114,116,121,99,104,97,109,98,101,114,76,
+105,118,105,110,103,32,118,111,108,117,109,101,115,65,110,116,104,111,110,121,
+108,111,103,105,110,34,32,82,101,108,97,116,101,100,69,99,111,110,111,109,121,
+114,101,97,99,104,101,115,99,117,116,116,105,110,103,103,114,97,118,105,116,121,
+108,105,102,101,32,105,110,67,104,97,112,116,101,114,45,115,104,97,100,111,119,
+78,111,116,97,98,108,101,60,47,116,100,62,13,10,32,114,101,116,117,114,110,115,
+116,97,100,105,117,109,119,105,100,103,101,116,115,118,97,114,121,105,110,103,
+116,114,97,118,101,108,115,104,101,108,100,32,98,121,119,104,111,32,97,114,101,
+119,111,114,107,32,105,110,102,97,99,117,108,116,121,97,110,103,117,108,97,114,
+119,104,111,32,104,97,100,97,105,114,112,111,114,116,116,111,119,110,32,111,102,
+10,10,83,111,109,101,32,39,99,108,105,99,107,39,99,104,97,114,103,101,115,107,
+101,121,119,111,114,100,105,116,32,119,105,108,108,99,105,116,121,32,111,102,40,
+116,104,105,115,41,59,65,110,100,114,101,119,32,117,110,105,113,117,101,32,99,
+104,101,99,107,101,100,111,114,32,109,111,114,101,51,48,48,112,120,59,32,114,101
+,116,117,114,110,59,114,115,105,111,110,61,34,112,108,117,103,105,110,115,119,
+105,116,104,105,110,32,104,101,114,115,101,108,102,83,116,97,116,105,111,110,70,
+101,100,101,114,97,108,118,101,110,116,117,114,101,112,117,98,108,105,115,104,
+115,101,110,116,32,116,111,116,101,110,115,105,111,110,97,99,116,114,101,115,115
+,99,111,109,101,32,116,111,102,105,110,103,101,114,115,68,117,107,101,32,111,102
+,112,101,111,112,108,101,44,101,120,112,108,111,105,116,119,104,97,116,32,105,
+115,104,97,114,109,111,110,121,97,32,109,97,106,111,114,34,58,34,104,116,116,112
+,105,110,32,104,105,115,32,109,101,110,117,34,62,10,109,111,110,116,104,108,121,
+111,102,102,105,99,101,114,99,111,117,110,99,105,108,103,97,105,110,105,110,103,
+101,118,101,110,32,105,110,83,117,109,109,97,114,121,100,97,116,101,32,111,102,
+108,111,121,97,108,116,121,102,105,116,110,101,115,115,97,110,100,32,119,97,115,
+101,109,112,101,114,111,114,115,117,112,114,101,109,101,83,101,99,111,110,100,32
+,104,101,97,114,105,110,103,82,117,115,115,105,97,110,108,111,110,103,101,115,
+116,65,108,98,101,114,116,97,108,97,116,101,114,97,108,115,101,116,32,111,102,32
+,115,109,97,108,108,34,62,46,97,112,112,101,110,100,100,111,32,119,105,116,104,
+102,101,100,101,114,97,108,98,97,110,107,32,111,102,98,101,110,101,97,116,104,68
+,101,115,112,105,116,101,67,97,112,105,116,97,108,103,114,111,117,110,100,115,41
+,44,32,97,110,100,32,112,101,114,99,101,110,116,105,116,32,102,114,111,109,99,
+108,111,115,105,110,103,99,111,110,116,97,105,110,73,110,115,116,101,97,100,102,
+105,102,116,101,101,110,97,115,32,119,101,108,108,46,121,97,104,111,111,46,114,
+101,115,112,111,110,100,102,105,103,104,116,101,114,111,98,115,99,117,114,101,
+114,101,102,108,101,99,116,111,114,103,97,110,105,99,61,32,77,97,116,104,46,101,
+100,105,116,105,110,103,111,110,108,105,110,101,32,112,97,100,100,105,110,103,97
+,32,119,104,111,108,101,111,110,101,114,114,111,114,121,101,97,114,32,111,102,
+101,110,100,32,111,102,32,98,97,114,114,105,101,114,119,104,101,110,32,105,116,
+104,101,97,100,101,114,32,104,111,109,101,32,111,102,114,101,115,117,109,101,100
+,114,101,110,97,109,101,100,115,116,114,111,110,103,62,104,101,97,116,105,110,
+103,114,101,116,97,105,110,115,99,108,111,117,100,102,114,119,97,121,32,111,102,
+32,77,97,114,99,104,32,49,107,110,111,119,105,110,103,105,110,32,112,97,114,116,
+66,101,116,119,101,101,110,108,101,115,115,111,110,115,99,108,111,115,101,115,
+116,118,105,114,116,117,97,108,108,105,110,107,115,34,62,99,114,111,115,115,101,
+100,69,78,68,32,45,45,62,102,97,109,111,117,115,32,97,119,97,114,100,101,100,76,
+105,99,101,110,115,101,72,101,97,108,116,104,32,102,97,105,114,108,121,32,119,
+101,97,108,116,104,121,109,105,110,105,109,97,108,65,102,114,105,99,97,110,99,
+111,109,112,101,116,101,108,97,98,101,108,34,62,115,105,110,103,105,110,103,102,
+97,114,109,101,114,115,66,114,97,115,105,108,41,100,105,115,99,117,115,115,114,
+101,112,108,97,99,101,71,114,101,103,111,114,121,102,111,110,116,32,99,111,112,
+117,114,115,117,101,100,97,112,112,101,97,114,115,109,97,107,101,32,117,112,114,
+111,117,110,100,101,100,98,111,116,104,32,111,102,98,108,111,99,107,101,100,115,
+97,119,32,116,104,101,111,102,102,105,99,101,115,99,111,108,111,117,114,115,105,
+102,40,100,111,99,117,119,104,101,110,32,104,101,101,110,102,111,114,99,101,112,
+117,115,104,40,102,117,65,117,103,117,115,116,32,85,84,70,45,56,34,62,70,97,110,
+116,97,115,121,105,110,32,109,111,115,116,105,110,106,117,114,101,100,85,115,117
+,97,108,108,121,102,97,114,109,105,110,103,99,108,111,115,117,114,101,111,98,106
+,101,99,116,32,100,101,102,101,110,99,101,117,115,101,32,111,102,32,77,101,100,
+105,99,97,108,60,98,111,100,121,62,10,101,118,105,100,101,110,116,98,101,32,117,
+115,101,100,107,101,121,67,111,100,101,115,105,120,116,101,101,110,73,115,108,97
+,109,105,99,35,48,48,48,48,48,48,101,110,116,105,114,101,32,119,105,100,101,108,
+121,32,97,99,116,105,118,101,32,40,116,121,112,101,111,102,111,110,101,32,99,97,
+110,99,111,108,111,114,32,61,115,112,101,97,107,101,114,101,120,116,101,110,100,
+115,80,104,121,115,105,99,115,116,101,114,114,97,105,110,60,116,98,111,100,121,
+62,102,117,110,101,114,97,108,118,105,101,119,105,110,103,109,105,100,100,108,
+101,32,99,114,105,99,107,101,116,112,114,111,112,104,101,116,115,104,105,102,116
+,101,100,100,111,99,116,111,114,115,82,117,115,115,101,108,108,32,116,97,114,103
+,101,116,99,111,109,112,97,99,116,97,108,103,101,98,114,97,115,111,99,105,97,108
+,45,98,117,108,107,32,111,102,109,97,110,32,97,110,100,60,47,116,100,62,10,32,
+104,101,32,108,101,102,116,41,46,118,97,108,40,41,102,97,108,115,101,41,59,108,
+111,103,105,99,97,108,98,97,110,107,105,110,103,104,111,109,101,32,116,111,110,
+97,109,105,110,103,32,65,114,105,122,111,110,97,99,114,101,100,105,116,115,41,59
+,10,125,41,59,10,102,111,117,110,100,101,114,105,110,32,116,117,114,110,67,111,
+108,108,105,110,115,98,101,102,111,114,101,32,66,117,116,32,116,104,101,99,104,
+97,114,103,101,100,84,105,116,108,101,34,62,67,97,112,116,97,105,110,115,112,101
+,108,108,101,100,103,111,100,100,101,115,115,84,97,103,32,45,45,62,65,100,100,
+105,110,103,58,98,117,116,32,119,97,115,82,101,99,101,110,116,32,112,97,116,105,
+101,110,116,98,97,99,107,32,105,110,61,102,97,108,115,101,38,76,105,110,99,111,
+108,110,119,101,32,107,110,111,119,67,111,117,110,116,101,114,74,117,100,97,105,
+115,109,115,99,114,105,112,116,32,97,108,116,101,114,101,100,39,93,41,59,10,32,
+32,104,97,115,32,116,104,101,117,110,99,108,101,97,114,69,118,101,110,116,39,44,
+98,111,116,104,32,105,110,110,111,116,32,97,108,108,10,10,60,33,45,45,32,112,108
+,97,99,105,110,103,104,97,114,100,32,116,111,32,99,101,110,116,101,114,115,111,
+114,116,32,111,102,99,108,105,101,110,116,115,115,116,114,101,101,116,115,66,101
+,114,110,97,114,100,97,115,115,101,114,116,115,116,101,110,100,32,116,111,102,97
+,110,116,97,115,121,100,111,119,110,32,105,110,104,97,114,98,111,117,114,70,114,
+101,101,100,111,109,106,101,119,101,108,114,121,47,97,98,111,117,116,46,46,115,
+101,97,114,99,104,108,101,103,101,110,100,115,105,115,32,109,97,100,101,109,111,
+100,101,114,110,32,111,110,108,121,32,111,110,111,110,108,121,32,116,111,105,109
+,97,103,101,34,32,108,105,110,101,97,114,32,112,97,105,110,116,101,114,97,110,
+100,32,110,111,116,114,97,114,101,108,121,32,97,99,114,111,110,121,109,100,101,
+108,105,118,101,114,115,104,111,114,116,101,114,48,48,38,97,109,112,59,97,115,32
+,109,97,110,121,119,105,100,116,104,61,34,47,42,32,60,33,91,67,116,105,116,108,
+101,32,61,111,102,32,116,104,101,32,108,111,119,101,115,116,32,112,105,99,107,
+101,100,32,101,115,99,97,112,101,100,117,115,101,115,32,111,102,112,101,111,112,
+108,101,115,32,80,117,98,108,105,99,77,97,116,116,104,101,119,116,97,99,116,105,
+99,115,100,97,109,97,103,101,100,119,97,121,32,102,111,114,108,97,119,115,32,111
+,102,101,97,115,121,32,116,111,32,119,105,110,100,111,119,115,116,114,111,110,
+103,32,32,115,105,109,112,108,101,125,99,97,116,99,104,40,115,101,118,101,110,
+116,104,105,110,102,111,98,111,120,119,101,110,116,32,116,111,112,97,105,110,116
+,101,100,99,105,116,105,122,101,110,73,32,100,111,110,39,116,114,101,116,114,101
+,97,116,46,32,83,111,109,101,32,119,119,46,34,41,59,10,98,111,109,98,105,110,103
+,109,97,105,108,116,111,58,109,97,100,101,32,105,110,46,32,77,97,110,121,32,99,
+97,114,114,105,101,115,124,124,123,125,59,119,105,119,111,114,107,32,111,102,115
+,121,110,111,110,121,109,100,101,102,101,97,116,115,102,97,118,111,114,101,100,
+111,112,116,105,99,97,108,112,97,103,101,84,114,97,117,110,108,101,115,115,32,
+115,101,110,100,105,110,103,108,101,102,116,34,62,60,99,111,109,83,99,111,114,65
+,108,108,32,116,104,101,106,81,117,101,114,121,46,116,111,117,114,105,115,116,67
+,108,97,115,115,105,99,102,97,108,115,101,34,32,87,105,108,104,101,108,109,115,
+117,98,117,114,98,115,103,101,110,117,105,110,101,98,105,115,104,111,112,115,46,
+115,112,108,105,116,40,103,108,111,98,97,108,32,102,111,108,108,111,119,115,98,
+111,100,121,32,111,102,110,111,109,105,110,97,108,67,111,110,116,97,99,116,115,
+101,99,117,108,97,114,108,101,102,116,32,116,111,99,104,105,101,102,108,121,45,
+104,105,100,100,101,110,45,98,97,110,110,101,114,60,47,108,105,62,10,10,46,32,87
+,104,101,110,32,105,110,32,98,111,116,104,100,105,115,109,105,115,115,69,120,112
+,108,111,114,101,97,108,119,97,121,115,32,118,105,97,32,116,104,101,115,112,97,
+195,177,111,108,119,101,108,102,97,114,101,114,117,108,105,110,103,32,97,114,114
+,97,110,103,101,99,97,112,116,97,105,110,104,105,115,32,115,111,110,114,117,108,
+101,32,111,102,104,101,32,116,111,111,107,105,116,115,101,108,102,44,61,48,38,97
+,109,112,59,40,99,97,108,108,101,100,115,97,109,112,108,101,115,116,111,32,109,
+97,107,101,99,111,109,47,112,97,103,77,97,114,116,105,110,32,75,101,110,110,101,
+100,121,97,99,99,101,112,116,115,102,117,108,108,32,111,102,104,97,110,100,108,
+101,100,66,101,115,105,100,101,115,47,47,45,45,62,60,47,97,98,108,101,32,116,111
+,116,97,114,103,101,116,115,101,115,115,101,110,99,101,104,105,109,32,116,111,32
+,105,116,115,32,98,121,32,99,111,109,109,111,110,46,109,105,110,101,114,97,108,
+116,111,32,116,97,107,101,119,97,121,115,32,116,111,115,46,111,114,103,47,108,97
+,100,118,105,115,101,100,112,101,110,97,108,116,121,115,105,109,112,108,101,58,
+105,102,32,116,104,101,121,76,101,116,116,101,114,115,97,32,115,104,111,114,116,
+72,101,114,98,101,114,116,115,116,114,105,107,101,115,32,103,114,111,117,112,115
+,46,108,101,110,103,116,104,102,108,105,103,104,116,115,111,118,101,114,108,97,
+112,115,108,111,119,108,121,32,108,101,115,115,101,114,32,115,111,99,105,97,108,
+32,60,47,112,62,10,9,9,105,116,32,105,110,116,111,114,97,110,107,101,100,32,114,
+97,116,101,32,111,102,117,108,62,13,10,32,32,97,116,116,101,109,112,116,112,97,
+105,114,32,111,102,109,97,107,101,32,105,116,75,111,110,116,97,107,116,65,110,
+116,111,110,105,111,104,97,118,105,110,103,32,114,97,116,105,110,103,115,32,97,
+99,116,105,118,101,115,116,114,101,97,109,115,116,114,97,112,112,101,100,34,41,
+46,99,115,115,40,104,111,115,116,105,108,101,108,101,97,100,32,116,111,108,105,
+116,116,108,101,32,103,114,111,117,112,115,44,80,105,99,116,117,114,101,45,45,62
+,13,10,13,10,32,114,111,119,115,61,34,32,111,98,106,101,99,116,105,110,118,101,
+114,115,101,60,102,111,111,116,101,114,67,117,115,116,111,109,86,62,60,92,47,115
+,99,114,115,111,108,118,105,110,103,67,104,97,109,98,101,114,115,108,97,118,101,
+114,121,119,111,117,110,100,101,100,119,104,101,114,101,97,115,33,61,32,39,117,
+110,100,102,111,114,32,97,108,108,112,97,114,116,108,121,32,45,114,105,103,104,
+116,58,65,114,97,98,105,97,110,98,97,99,107,101,100,32,99,101,110,116,117,114,
+121,117,110,105,116,32,111,102,109,111,98,105,108,101,45,69,117,114,111,112,101,
+44,105,115,32,104,111,109,101,114,105,115,107,32,111,102,100,101,115,105,114,101
+,100,67,108,105,110,116,111,110,99,111,115,116,32,111,102,97,103,101,32,111,102,
+32,98,101,99,111,109,101,32,110,111,110,101,32,111,102,112,38,113,117,111,116,59
+,77,105,100,100,108,101,32,101,97,100,39,41,91,48,67,114,105,116,105,99,115,115,
+116,117,100,105,111,115,62,38,99,111,112,121,59,103,114,111,117,112,34,62,97,115
+,115,101,109,98,108,109,97,107,105,110,103,32,112,114,101,115,115,101,100,119,
+105,100,103,101,116,46,112,115,58,34,32,63,32,114,101,98,117,105,108,116,98,121,
+32,115,111,109,101,70,111,114,109,101,114,32,101,100,105,116,111,114,115,100,101
+,108,97,121,101,100,67,97,110,111,110,105,99,104,97,100,32,116,104,101,112,117,
+115,104,105,110,103,99,108,97,115,115,61,34,98,117,116,32,97,114,101,112,97,114,
+116,105,97,108,66,97,98,121,108,111,110,98,111,116,116,111,109,32,99,97,114,114,
+105,101,114,67,111,109,109,97,110,100,105,116,115,32,117,115,101,65,115,32,119,
+105,116,104,99,111,117,114,115,101,115,97,32,116,104,105,114,100,100,101,110,111
+,116,101,115,97,108,115,111,32,105,110,72,111,117,115,116,111,110,50,48,112,120,
+59,34,62,97,99,99,117,115,101,100,100,111,117,98,108,101,32,103,111,97,108,32,
+111,102,70,97,109,111,117,115,32,41,46,98,105,110,100,40,112,114,105,101,115,116
+,115,32,79,110,108,105,110,101,105,110,32,74,117,108,121,115,116,32,43,32,34,103
+,99,111,110,115,117,108,116,100,101,99,105,109,97,108,104,101,108,112,102,117,
+108,114,101,118,105,118,101,100,105,115,32,118,101,114,121,114,39,43,39,105,112,
+116,108,111,115,105,110,103,32,102,101,109,97,108,101,115,105,115,32,97,108,115,
+111,115,116,114,105,110,103,115,100,97,121,115,32,111,102,97,114,114,105,118,97,
+108,102,117,116,117,114,101,32,60,111,98,106,101,99,116,102,111,114,99,105,110,
+103,83,116,114,105,110,103,40,34,32,47,62,10,9,9,104,101,114,101,32,105,115,101,
+110,99,111,100,101,100,46,32,32,84,104,101,32,98,97,108,108,111,111,110,100,111,
+110,101,32,98,121,47,99,111,109,109,111,110,98,103,99,111,108,111,114,108,97,119
+,32,111,102,32,73,110,100,105,97,110,97,97,118,111,105,100,101,100,98,117,116,32
+,116,104,101,50,112,120,32,51,112,120,106,113,117,101,114,121,46,97,102,116,101,
+114,32,97,112,111,108,105,99,121,46,109,101,110,32,97,110,100,102,111,111,116,
+101,114,45,61,32,116,114,117,101,59,102,111,114,32,117,115,101,115,99,114,101,
+101,110,46,73,110,100,105,97,110,32,105,109,97,103,101,32,61,102,97,109,105,108,
+121,44,104,116,116,112,58,47,47,32,38,110,98,115,112,59,100,114,105,118,101,114,
+115,101,116,101,114,110,97,108,115,97,109,101,32,97,115,110,111,116,105,99,101,
+100,118,105,101,119,101,114,115,125,41,40,41,59,10,32,105,115,32,109,111,114,101
+,115,101,97,115,111,110,115,102,111,114,109,101,114,32,116,104,101,32,110,101,
+119,105,115,32,106,117,115,116,99,111,110,115,101,110,116,32,83,101,97,114,99,
+104,119,97,115,32,116,104,101,119,104,121,32,116,104,101,115,104,105,112,112,101
+,100,98,114,62,60,98,114,62,119,105,100,116,104,58,32,104,101,105,103,104,116,61
+,109,97,100,101,32,111,102,99,117,105,115,105,110,101,105,115,32,116,104,97,116,
+97,32,118,101,114,121,32,65,100,109,105,114,97,108,32,102,105,120,101,100,59,110
+,111,114,109,97,108,32,77,105,115,115,105,111,110,80,114,101,115,115,44,32,111,
+110,116,97,114,105,111,99,104,97,114,115,101,116,116,114,121,32,116,111,32,105,
+110,118,97,100,101,100,61,34,116,114,117,101,34,115,112,97,99,105,110,103,105,
+115,32,109,111,115,116,97,32,109,111,114,101,32,116,111,116,97,108,108,121,102,
+97,108,108,32,111,102,125,41,59,13,10,32,32,105,109,109,101,110,115,101,116,105,
+109,101,32,105,110,115,101,116,32,111,117,116,115,97,116,105,115,102,121,116,111
+,32,102,105,110,100,100,111,119,110,32,116,111,108,111,116,32,111,102,32,80,108,
+97,121,101,114,115,105,110,32,74,117,110,101,113,117,97,110,116,117,109,110,111,
+116,32,116,104,101,116,105,109,101,32,116,111,100,105,115,116,97,110,116,70,105,
+110,110,105,115,104,115,114,99,32,61,32,40,115,105,110,103,108,101,32,104,101,
+108,112,32,111,102,71,101,114,109,97,110,32,108,97,119,32,97,110,100,108,97,98,
+101,108,101,100,102,111,114,101,115,116,115,99,111,111,107,105,110,103,115,112,
+97,99,101,34,62,104,101,97,100,101,114,45,119,101,108,108,32,97,115,83,116,97,
+110,108,101,121,98,114,105,100,103,101,115,47,103,108,111,98,97,108,67,114,111,
+97,116,105,97,32,65,98,111,117,116,32,91,48,93,59,10,32,32,105,116,44,32,97,110,
+100,103,114,111,117,112,101,100,98,101,105,110,103,32,97,41,123,116,104,114,111,
+119,104,101,32,109,97,100,101,108,105,103,104,116,101,114,101,116,104,105,99,97,
+108,70,70,70,70,70,70,34,98,111,116,116,111,109,34,108,105,107,101,32,97,32,101,
+109,112,108,111,121,115,108,105,118,101,32,105,110,97,115,32,115,101,101,110,112
+,114,105,110,116,101,114,109,111,115,116,32,111,102,117,98,45,108,105,110,107,
+114,101,106,101,99,116,115,97,110,100,32,117,115,101,105,109,97,103,101,34,62,
+115,117,99,99,101,101,100,102,101,101,100,105,110,103,78,117,99,108,101,97,114,
+105,110,102,111,114,109,97,116,111,32,104,101,108,112,87,111,109,101,110,39,115,
+78,101,105,116,104,101,114,77,101,120,105,99,97,110,112,114,111,116,101,105,110,
+60,116,97,98,108,101,32,98,121,32,109,97,110,121,104,101,97,108,116,104,121,108,
+97,119,115,117,105,116,100,101,118,105,115,101,100,46,112,117,115,104,40,123,115
+,101,108,108,101,114,115,115,105,109,112,108,121,32,84,104,114,111,117,103,104,
+46,99,111,111,107,105,101,32,73,109,97,103,101,40,111,108,100,101,114,34,62,117,
+115,46,106,115,34,62,32,83,105,110,99,101,32,117,110,105,118,101,114,115,108,97,
+114,103,101,114,32,111,112,101,110,32,116,111,33,45,45,32,101,110,100,108,105,
+101,115,32,105,110,39,93,41,59,13,10,32,32,109,97,114,107,101,116,119,104,111,32
+,105,115,32,40,34,68,79,77,67,111,109,97,110,97,103,101,100,111,110,101,32,102,
+111,114,116,121,112,101,111,102,32,75,105,110,103,100,111,109,112,114,111,102,
+105,116,115,112,114,111,112,111,115,101,116,111,32,115,104,111,119,99,101,110,
+116,101,114,59,109,97,100,101,32,105,116,100,114,101,115,115,101,100,119,101,114
+,101,32,105,110,109,105,120,116,117,114,101,112,114,101,99,105,115,101,97,114,
+105,115,105,110,103,115,114,99,32,61,32,39,109,97,107,101,32,97,32,115,101,99,
+117,114,101,100,66,97,112,116,105,115,116,118,111,116,105,110,103,32,10,9,9,118,
+97,114,32,77,97,114,99,104,32,50,103,114,101,119,32,117,112,67,108,105,109,97,
+116,101,46,114,101,109,111,118,101,115,107,105,108,108,101,100,119,97,121,32,116
+,104,101,60,47,104,101,97,100,62,102,97,99,101,32,111,102,97,99,116,105,110,103,
+32,114,105,103,104,116,34,62,116,111,32,119,111,114,107,114,101,100,117,99,101,
+115,104,97,115,32,104,97,100,101,114,101,99,116,101,100,115,104,111,119,40,41,59
+,97,99,116,105,111,110,61,98,111,111,107,32,111,102,97,110,32,97,114,101,97,61,
+61,32,34,104,116,116,60,104,101,97,100,101,114,10,60,104,116,109,108,62,99,111,
+110,102,111,114,109,102,97,99,105,110,103,32,99,111,111,107,105,101,46,114,101,
+108,121,32,111,110,104,111,115,116,101,100,32,46,99,117,115,116,111,109,104,101,
+32,119,101,110,116,98,117,116,32,102,111,114,115,112,114,101,97,100,32,70,97,109
+,105,108,121,32,97,32,109,101,97,110,115,111,117,116,32,116,104,101,102,111,114,
+117,109,115,46,102,111,111,116,97,103,101,34,62,77,111,98,105,108,67,108,101,109
+,101,110,116,115,34,32,105,100,61,34,97,115,32,104,105,103,104,105,110,116,101,
+110,115,101,45,45,62,60,33,45,45,102,101,109,97,108,101,32,105,115,32,115,101,
+101,110,105,109,112,108,105,101,100,115,101,116,32,116,104,101,97,32,115,116,97,
+116,101,97,110,100,32,104,105,115,102,97,115,116,101,115,116,98,101,115,105,100,
+101,115,98,117,116,116,111,110,95,98,111,117,110,100,101,100,34,62,60,105,109,
+103,32,73,110,102,111,98,111,120,101,118,101,110,116,115,44,97,32,121,111,117,
+110,103,97,110,100,32,97,114,101,78,97,116,105,118,101,32,99,104,101,97,112,101,
+114,84,105,109,101,111,117,116,97,110,100,32,104,97,115,101,110,103,105,110,101,
+115,119,111,110,32,116,104,101,40,109,111,115,116,108,121,114,105,103,104,116,58
+,32,102,105,110,100,32,97,32,45,98,111,116,116,111,109,80,114,105,110,99,101,32,
+97,114,101,97,32,111,102,109,111,114,101,32,111,102,115,101,97,114,99,104,95,110
+,97,116,117,114,101,44,108,101,103,97,108,108,121,112,101,114,105,111,100,44,108
+,97,110,100,32,111,102,111,114,32,119,105,116,104,105,110,100,117,99,101,100,112
+,114,111,118,105,110,103,109,105,115,115,105,108,101,108,111,99,97,108,108,121,
+65,103,97,105,110,115,116,116,104,101,32,119,97,121,107,38,113,117,111,116,59,
+112,120,59,34,62,13,10,112,117,115,104,101,100,32,97,98,97,110,100,111,110,110,
+117,109,101,114,97,108,67,101,114,116,97,105,110,73,110,32,116,104,105,115,109,
+111,114,101,32,105,110,111,114,32,115,111,109,101,110,97,109,101,32,105,115,97,
+110,100,44,32,105,110,99,114,111,119,110,101,100,73,83,66,78,32,48,45,99,114,101
+,97,116,101,115,79,99,116,111,98,101,114,109,97,121,32,110,111,116,99,101,110,
+116,101,114,32,108,97,116,101,32,105,110,68,101,102,101,110,99,101,101,110,97,99
+,116,101,100,119,105,115,104,32,116,111,98,114,111,97,100,108,121,99,111,111,108
+,105,110,103,111,110,108,111,97,100,61,105,116,46,32,84,104,101,114,101,99,111,
+118,101,114,77,101,109,98,101,114,115,104,101,105,103,104,116,32,97,115,115,117,
+109,101,115,60,104,116,109,108,62,10,112,101,111,112,108,101,46,105,110,32,111,
+110,101,32,61,119,105,110,100,111,119,102,111,111,116,101,114,95,97,32,103,111,
+111,100,32,114,101,107,108,97,109,97,111,116,104,101,114,115,44,116,111,32,116,
+104,105,115,95,99,111,111,107,105,101,112,97,110,101,108,34,62,76,111,110,100,
+111,110,44,100,101,102,105,110,101,115,99,114,117,115,104,101,100,98,97,112,116,
+105,115,109,99,111,97,115,116,97,108,115,116,97,116,117,115,32,116,105,116,108,
+101,34,32,109,111,118,101,32,116,111,108,111,115,116,32,105,110,98,101,116,116,
+101,114,32,105,109,112,108,105,101,115,114,105,118,97,108,114,121,115,101,114,
+118,101,114,115,32,83,121,115,116,101,109,80,101,114,104,97,112,115,101,115,32,
+97,110,100,32,99,111,110,116,101,110,100,102,108,111,119,105,110,103,108,97,115,
+116,101,100,32,114,105,115,101,32,105,110,71,101,110,101,115,105,115,118,105,101
+,119,32,111,102,114,105,115,105,110,103,32,115,101,101,109,32,116,111,98,117,116
+,32,105,110,32,98,97,99,107,105,110,103,104,101,32,119,105,108,108,103,105,118,
+101,110,32,97,103,105,118,105,110,103,32,99,105,116,105,101,115,46,102,108,111,
+119,32,111,102,32,76,97,116,101,114,32,97,108,108,32,98,117,116,72,105,103,104,
+119,97,121,111,110,108,121,32,98,121,115,105,103,110,32,111,102,104,101,32,100,
+111,101,115,100,105,102,102,101,114,115,98,97,116,116,101,114,121,38,97,109,112,
+59,108,97,115,105,110,103,108,101,115,116,104,114,101,97,116,115,105,110,116,101
+,103,101,114,116,97,107,101,32,111,110,114,101,102,117,115,101,100,99,97,108,108
+,101,100,32,61,85,83,38,97,109,112,83,101,101,32,116,104,101,110,97,116,105,118,
+101,115,98,121,32,116,104,105,115,115,121,115,116,101,109,46,104,101,97,100,32,
+111,102,58,104,111,118,101,114,44,108,101,115,98,105,97,110,115,117,114,110,97,
+109,101,97,110,100,32,97,108,108,99,111,109,109,111,110,47,104,101,97,100,101,
+114,95,95,112,97,114,97,109,115,72,97,114,118,97,114,100,47,112,105,120,101,108,
+46,114,101,109,111,118,97,108,115,111,32,108,111,110,103,114,111,108,101,32,111,
+102,106,111,105,110,116,108,121,115,107,121,115,99,114,97,85,110,105,99,111,100,
+101,98,114,32,47,62,13,10,65,116,108,97,110,116,97,110,117,99,108,101,117,115,67
+,111,117,110,116,121,44,112,117,114,101,108,121,32,99,111,117,110,116,34,62,101,
+97,115,105,108,121,32,98,117,105,108,100,32,97,111,110,99,108,105,99,107,97,32,
+103,105,118,101,110,112,111,105,110,116,101,114,104,38,113,117,111,116,59,101,
+118,101,110,116,115,32,101,108,115,101,32,123,10,100,105,116,105,111,110,115,110
+,111,119,32,116,104,101,44,32,119,105,116,104,32,109,97,110,32,119,104,111,111,
+114,103,47,87,101,98,111,110,101,32,97,110,100,99,97,118,97,108,114,121,72,101,
+32,100,105,101,100,115,101,97,116,116,108,101,48,48,44,48,48,48,32,123,119,105,
+110,100,111,119,104,97,118,101,32,116,111,105,102,40,119,105,110,100,97,110,100,
+32,105,116,115,115,111,108,101,108,121,32,109,38,113,117,111,116,59,114,101,110,
+101,119,101,100,68,101,116,114,111,105,116,97,109,111,110,103,115,116,101,105,
+116,104,101,114,32,116,104,101,109,32,105,110,83,101,110,97,116,111,114,85,115,
+60,47,97,62,60,75,105,110,103,32,111,102,70,114,97,110,99,105,115,45,112,114,111
+,100,117,99,104,101,32,117,115,101,100,97,114,116,32,97,110,100,104,105,109,32,
+97,110,100,117,115,101,100,32,98,121,115,99,111,114,105,110,103,97,116,32,104,
+111,109,101,116,111,32,104,97,118,101,114,101,108,97,116,101,115,105,98,105,108,
+105,116,121,102,97,99,116,105,111,110,66,117,102,102,97,108,111,108,105,110,107,
+34,62,60,119,104,97,116,32,104,101,102,114,101,101,32,116,111,67,105,116,121,32,
+111,102,99,111,109,101,32,105,110,115,101,99,116,111,114,115,99,111,117,110,116,
+101,100,111,110,101,32,100,97,121,110,101,114,118,111,117,115,115,113,117,97,114
+,101,32,125,59,105,102,40,103,111,105,110,32,119,104,97,116,105,109,103,34,32,97
+,108,105,115,32,111,110,108,121,115,101,97,114,99,104,47,116,117,101,115,100,97,
+121,108,111,111,115,101,108,121,83,111,108,111,109,111,110,115,101,120,117,97,
+108,32,45,32,60,97,32,104,114,109,101,100,105,117,109,34,68,79,32,78,79,84,32,70
+,114,97,110,99,101,44,119,105,116,104,32,97,32,119,97,114,32,97,110,100,115,101,
+99,111,110,100,32,116,97,107,101,32,97,32,62,13,10,13,10,13,10,109,97,114,107,
+101,116,46,104,105,103,104,119,97,121,100,111,110,101,32,105,110,99,116,105,118,
+105,116,121,34,108,97,115,116,34,62,111,98,108,105,103,101,100,114,105,115,101,
+32,116,111,34,117,110,100,101,102,105,109,97,100,101,32,116,111,32,69,97,114,108
+,121,32,112,114,97,105,115,101,100,105,110,32,105,116,115,32,102,111,114,32,104,
+105,115,97,116,104,108,101,116,101,74,117,112,105,116,101,114,89,97,104,111,111,
+33,32,116,101,114,109,101,100,32,115,111,32,109,97,110,121,114,101,97,108,108,
+121,32,115,46,32,84,104,101,32,97,32,119,111,109,97,110,63,118,97,108,117,101,61
+,100,105,114,101,99,116,32,114,105,103,104,116,34,32,98,105,99,121,99,108,101,97
+,99,105,110,103,61,34,100,97,121,32,97,110,100,115,116,97,116,105,110,103,82,97,
+116,104,101,114,44,104,105,103,104,101,114,32,79,102,102,105,99,101,32,97,114,
+101,32,110,111,119,116,105,109,101,115,44,32,119,104,101,110,32,97,32,112,97,121
+,32,102,111,114,111,110,32,116,104,105,115,45,108,105,110,107,34,62,59,98,111,
+114,100,101,114,97,114,111,117,110,100,32,97,110,110,117,97,108,32,116,104,101,
+32,78,101,119,112,117,116,32,116,104,101,46,99,111,109,34,32,116,97,107,105,110,
+32,116,111,97,32,98,114,105,101,102,40,105,110,32,116,104,101,103,114,111,117,
+112,115,46,59,32,119,105,100,116,104,101,110,122,121,109,101,115,115,105,109,112
+,108,101,32,105,110,32,108,97,116,101,123,114,101,116,117,114,110,116,104,101,
+114,97,112,121,97,32,112,111,105,110,116,98,97,110,110,105,110,103,105,110,107,
+115,34,62,10,40,41,59,34,32,114,101,97,32,112,108,97,99,101,92,117,48,48,51,67,
+97,97,98,111,117,116,32,97,116,114,62,13,10,9,9,99,99,111,117,110,116,32,103,105
+,118,101,115,32,97,60,83,67,82,73,80,84,82,97,105,108,119,97,121,116,104,101,109
+,101,115,47,116,111,111,108,98,111,120,66,121,73,100,40,34,120,104,117,109,97,
+110,115,44,119,97,116,99,104,101,115,105,110,32,115,111,109,101,32,105,102,32,40
+,119,105,99,111,109,105,110,103,32,102,111,114,109,97,116,115,32,85,110,100,101,
+114,32,98,117,116,32,104,97,115,104,97,110,100,101,100,32,109,97,100,101,32,98,
+121,116,104,97,110,32,105,110,102,101,97,114,32,111,102,100,101,110,111,116,101,
+100,47,105,102,114,97,109,101,108,101,102,116,32,105,110,118,111,108,116,97,103,
+101,105,110,32,101,97,99,104,97,38,113,117,111,116,59,98,97,115,101,32,111,102,
+73,110,32,109,97,110,121,117,110,100,101,114,103,111,114,101,103,105,109,101,115
+,97,99,116,105,111,110,32,60,47,112,62,13,10,60,117,115,116,111,109,86,97,59,38,
+103,116,59,60,47,105,109,112,111,114,116,115,111,114,32,116,104,97,116,109,111,
+115,116,108,121,32,38,97,109,112,59,114,101,32,115,105,122,101,61,34,60,47,97,62
+,60,47,104,97,32,99,108,97,115,115,112,97,115,115,105,118,101,72,111,115,116,32,
+61,32,87,104,101,116,104,101,114,102,101,114,116,105,108,101,86,97,114,105,111,
+117,115,61,91,93,59,40,102,117,99,97,109,101,114,97,115,47,62,60,47,116,100,62,
+97,99,116,115,32,97,115,73,110,32,115,111,109,101,62,13,10,13,10,60,33,111,114,
+103,97,110,105,115,32,60,98,114,32,47,62,66,101,105,106,105,110,103,99,97,116,97
+,108,195,160,100,101,117,116,115,99,104,101,117,114,111,112,101,117,101,117,115,
+107,97,114,97,103,97,101,105,108,103,101,115,118,101,110,115,107,97,101,115,112,
+97,195,177,97,109,101,110,115,97,106,101,117,115,117,97,114,105,111,116,114,97,
+98,97,106,111,109,195,169,120,105,99,111,112,195,161,103,105,110,97,115,105,101,
+109,112,114,101,115,105,115,116,101,109,97,111,99,116,117,98,114,101,100,117,114
+,97,110,116,101,97,195,177,97,100,105,114,101,109,112,114,101,115,97,109,111,109
+,101,110,116,111,110,117,101,115,116,114,111,112,114,105,109,101,114,97,116,114,
+97,118,195,169,115,103,114,97,99,105,97,115,110,117,101,115,116,114,97,112,114,
+111,99,101,115,111,101,115,116,97,100,111,115,99,97,108,105,100,97,100,112,101,
+114,115,111,110,97,110,195,186,109,101,114,111,97,99,117,101,114,100,111,109,195
+,186,115,105,99,97,109,105,101,109,98,114,111,111,102,101,114,116,97,115,97,108,
+103,117,110,111,115,112,97,195,173,115,101,115,101,106,101,109,112,108,111,100,
+101,114,101,99,104,111,97,100,101,109,195,161,115,112,114,105,118,97,100,111,97,
+103,114,101,103,97,114,101,110,108,97,99,101,115,112,111,115,105,98,108,101,104,
+111,116,101,108,101,115,115,101,118,105,108,108,97,112,114,105,109,101,114,111,
+195,186,108,116,105,109,111,101,118,101,110,116,111,115,97,114,99,104,105,118,
+111,99,117,108,116,117,114,97,109,117,106,101,114,101,115,101,110,116,114,97,100
+,97,97,110,117,110,99,105,111,101,109,98,97,114,103,111,109,101,114,99,97,100,
+111,103,114,97,110,100,101,115,101,115,116,117,100,105,111,109,101,106,111,114,
+101,115,102,101,98,114,101,114,111,100,105,115,101,195,177,111,116,117,114,105,
+115,109,111,99,195,179,100,105,103,111,112,111,114,116,97,100,97,101,115,112,97,
+99,105,111,102,97,109,105,108,105,97,97,110,116,111,110,105,111,112,101,114,109,
+105,116,101,103,117,97,114,100,97,114,97,108,103,117,110,97,115,112,114,101,99,
+105,111,115,97,108,103,117,105,101,110,115,101,110,116,105,100,111,118,105,115,
+105,116,97,115,116,195,173,116,117,108,111,99,111,110,111,99,101,114,115,101,103
+,117,110,100,111,99,111,110,115,101,106,111,102,114,97,110,99,105,97,109,105,110
+,117,116,111,115,115,101,103,117,110,100,97,116,101,110,101,109,111,115,101,102,
+101,99,116,111,115,109,195,161,108,97,103,97,115,101,115,105,195,179,110,114,101
+,118,105,115,116,97,103,114,97,110,97,100,97,99,111,109,112,114,97,114,105,110,
+103,114,101,115,111,103,97,114,99,195,173,97,97,99,99,105,195,179,110,101,99,117
+,97,100,111,114,113,117,105,101,110,101,115,105,110,99,108,117,115,111,100,101,
+98,101,114,195,161,109,97,116,101,114,105,97,104,111,109,98,114,101,115,109,117,
+101,115,116,114,97,112,111,100,114,195,173,97,109,97,195,177,97,110,97,195,186,
+108,116,105,109,97,101,115,116,97,109,111,115,111,102,105,99,105,97,108,116,97,
+109,98,105,101,110,110,105,110,103,195,186,110,115,97,108,117,100,111,115,112,
+111,100,101,109,111,115,109,101,106,111,114,97,114,112,111,115,105,116,105,111,
+110,98,117,115,105,110,101,115,115,104,111,109,101,112,97,103,101,115,101,99,117
+,114,105,116,121,108,97,110,103,117,97,103,101,115,116,97,110,100,97,114,100,99,
+97,109,112,97,105,103,110,102,101,97,116,117,114,101,115,99,97,116,101,103,111,
+114,121,101,120,116,101,114,110,97,108,99,104,105,108,100,114,101,110,114,101,
+115,101,114,118,101,100,114,101,115,101,97,114,99,104,101,120,99,104,97,110,103,
+101,102,97,118,111,114,105,116,101,116,101,109,112,108,97,116,101,109,105,108,
+105,116,97,114,121,105,110,100,117,115,116,114,121,115,101,114,118,105,99,101,
+115,109,97,116,101,114,105,97,108,112,114,111,100,117,99,116,115,122,45,105,110,
+100,101,120,58,99,111,109,109,101,110,116,115,115,111,102,116,119,97,114,101,99,
+111,109,112,108,101,116,101,99,97,108,101,110,100,97,114,112,108,97,116,102,111,
+114,109,97,114,116,105,99,108,101,115,114,101,113,117,105,114,101,100,109,111,
+118,101,109,101,110,116,113,117,101,115,116,105,111,110,98,117,105,108,100,105,
+110,103,112,111,108,105,116,105,99,115,112,111,115,115,105,98,108,101,114,101,
+108,105,103,105,111,110,112,104,121,115,105,99,97,108,102,101,101,100,98,97,99,
+107,114,101,103,105,115,116,101,114,112,105,99,116,117,114,101,115,100,105,115,
+97,98,108,101,100,112,114,111,116,111,99,111,108,97,117,100,105,101,110,99,101,
+115,101,116,116,105,110,103,115,97,99,116,105,118,105,116,121,101,108,101,109,
+101,110,116,115,108,101,97,114,110,105,110,103,97,110,121,116,104,105,110,103,97
+,98,115,116,114,97,99,116,112,114,111,103,114,101,115,115,111,118,101,114,118,
+105,101,119,109,97,103,97,122,105,110,101,101,99,111,110,111,109,105,99,116,114,
+97,105,110,105,110,103,112,114,101,115,115,117,114,101,118,97,114,105,111,117,
+115,32,60,115,116,114,111,110,103,62,112,114,111,112,101,114,116,121,115,104,111
+,112,112,105,110,103,116,111,103,101,116,104,101,114,97,100,118,97,110,99,101,
+100,98,101,104,97,118,105,111,114,100,111,119,110,108,111,97,100,102,101,97,116,
+117,114,101,100,102,111,111,116,98,97,108,108,115,101,108,101,99,116,101,100,76,
+97,110,103,117,97,103,101,100,105,115,116,97,110,99,101,114,101,109,101,109,98,
+101,114,116,114,97,99,107,105,110,103,112,97,115,115,119,111,114,100,109,111,100
+,105,102,105,101,100,115,116,117,100,101,110,116,115,100,105,114,101,99,116,108,
+121,102,105,103,104,116,105,110,103,110,111,114,116,104,101,114,110,100,97,116,
+97,98,97,115,101,102,101,115,116,105,118,97,108,98,114,101,97,107,105,110,103,
+108,111,99,97,116,105,111,110,105,110,116,101,114,110,101,116,100,114,111,112,
+100,111,119,110,112,114,97,99,116,105,99,101,101,118,105,100,101,110,99,101,102,
+117,110,99,116,105,111,110,109,97,114,114,105,97,103,101,114,101,115,112,111,110
+,115,101,112,114,111,98,108,101,109,115,110,101,103,97,116,105,118,101,112,114,
+111,103,114,97,109,115,97,110,97,108,121,115,105,115,114,101,108,101,97,115,101,
+100,98,97,110,110,101,114,34,62,112,117,114,99,104,97,115,101,112,111,108,105,99
+,105,101,115,114,101,103,105,111,110,97,108,99,114,101,97,116,105,118,101,97,114
+,103,117,109,101,110,116,98,111,111,107,109,97,114,107,114,101,102,101,114,114,
+101,114,99,104,101,109,105,99,97,108,100,105,118,105,115,105,111,110,99,97,108,
+108,98,97,99,107,115,101,112,97,114,97,116,101,112,114,111,106,101,99,116,115,99
+,111,110,102,108,105,99,116,104,97,114,100,119,97,114,101,105,110,116,101,114,
+101,115,116,100,101,108,105,118,101,114,121,109,111,117,110,116,97,105,110,111,
+98,116,97,105,110,101,100,61,32,102,97,108,115,101,59,102,111,114,40,118,97,114,
+32,97,99,99,101,112,116,101,100,99,97,112,97,99,105,116,121,99,111,109,112,117,
+116,101,114,105,100,101,110,116,105,116,121,97,105,114,99,114,97,102,116,101,109
+,112,108,111,121,101,100,112,114,111,112,111,115,101,100,100,111,109,101,115,116
+,105,99,105,110,99,108,117,100,101,115,112,114,111,118,105,100,101,100,104,111,
+115,112,105,116,97,108,118,101,114,116,105,99,97,108,99,111,108,108,97,112,115,
+101,97,112,112,114,111,97,99,104,112,97,114,116,110,101,114,115,108,111,103,111,
+34,62,60,97,100,97,117,103,104,116,101,114,97,117,116,104,111,114,34,32,99,117,
+108,116,117,114,97,108,102,97,109,105,108,105,101,115,47,105,109,97,103,101,115,
+47,97,115,115,101,109,98,108,121,112,111,119,101,114,102,117,108,116,101,97,99,
+104,105,110,103,102,105,110,105,115,104,101,100,100,105,115,116,114,105,99,116,
+99,114,105,116,105,99,97,108,99,103,105,45,98,105,110,47,112,117,114,112,111,115
+,101,115,114,101,113,117,105,114,101,115,101,108,101,99,116,105,111,110,98,101,
+99,111,109,105,110,103,112,114,111,118,105,100,101,115,97,99,97,100,101,109,105,
+99,101,120,101,114,99,105,115,101,97,99,116,117,97,108,108,121,109,101,100,105,
+99,105,110,101,99,111,110,115,116,97,110,116,97,99,99,105,100,101,110,116,77,97,
+103,97,122,105,110,101,100,111,99,117,109,101,110,116,115,116,97,114,116,105,110
+,103,98,111,116,116,111,109,34,62,111,98,115,101,114,118,101,100,58,32,38,113,
+117,111,116,59,101,120,116,101,110,100,101,100,112,114,101,118,105,111,117,115,
+83,111,102,116,119,97,114,101,99,117,115,116,111,109,101,114,100,101,99,105,115,
+105,111,110,115,116,114,101,110,103,116,104,100,101,116,97,105,108,101,100,115,
+108,105,103,104,116,108,121,112,108,97,110,110,105,110,103,116,101,120,116,97,
+114,101,97,99,117,114,114,101,110,99,121,101,118,101,114,121,111,110,101,115,116
+,114,97,105,103,104,116,116,114,97,110,115,102,101,114,112,111,115,105,116,105,
+118,101,112,114,111,100,117,99,101,100,104,101,114,105,116,97,103,101,115,104,
+105,112,112,105,110,103,97,98,115,111,108,117,116,101,114,101,99,101,105,118,101
+,100,114,101,108,101,118,97,110,116,98,117,116,116,111,110,34,32,118,105,111,108
+,101,110,99,101,97,110,121,119,104,101,114,101,98,101,110,101,102,105,116,115,
+108,97,117,110,99,104,101,100,114,101,99,101,110,116,108,121,97,108,108,105,97,
+110,99,101,102,111,108,108,111,119,101,100,109,117,108,116,105,112,108,101,98,
+117,108,108,101,116,105,110,105,110,99,108,117,100,101,100,111,99,99,117,114,114
+,101,100,105,110,116,101,114,110,97,108,36,40,116,104,105,115,41,46,114,101,112,
+117,98,108,105,99,62,60,116,114,62,60,116,100,99,111,110,103,114,101,115,115,114
+,101,99,111,114,100,101,100,117,108,116,105,109,97,116,101,115,111,108,117,116,
+105,111,110,60,117,108,32,105,100,61,34,100,105,115,99,111,118,101,114,72,111,
+109,101,60,47,97,62,119,101,98,115,105,116,101,115,110,101,116,119,111,114,107,
+115,97,108,116,104,111,117,103,104,101,110,116,105,114,101,108,121,109,101,109,
+111,114,105,97,108,109,101,115,115,97,103,101,115,99,111,110,116,105,110,117,101
+,97,99,116,105,118,101,34,62,115,111,109,101,119,104,97,116,118,105,99,116,111,
+114,105,97,87,101,115,116,101,114,110,32,32,116,105,116,108,101,61,34,76,111,99,
+97,116,105,111,110,99,111,110,116,114,97,99,116,118,105,115,105,116,111,114,115,
+68,111,119,110,108,111,97,100,119,105,116,104,111,117,116,32,114,105,103,104,116
+,34,62,10,109,101,97,115,117,114,101,115,119,105,100,116,104,32,61,32,118,97,114
+,105,97,98,108,101,105,110,118,111,108,118,101,100,118,105,114,103,105,110,105,
+97,110,111,114,109,97,108,108,121,104,97,112,112,101,110,101,100,97,99,99,111,
+117,110,116,115,115,116,97,110,100,105,110,103,110,97,116,105,111,110,97,108,82,
+101,103,105,115,116,101,114,112,114,101,112,97,114,101,100,99,111,110,116,114,
+111,108,115,97,99,99,117,114,97,116,101,98,105,114,116,104,100,97,121,115,116,
+114,97,116,101,103,121,111,102,102,105,99,105,97,108,103,114,97,112,104,105,99,
+115,99,114,105,109,105,110,97,108,112,111,115,115,105,98,108,121,99,111,110,115,
+117,109,101,114,80,101,114,115,111,110,97,108,115,112,101,97,107,105,110,103,118
+,97,108,105,100,97,116,101,97,99,104,105,101,118,101,100,46,106,112,103,34,32,47
+,62,109,97,99,104,105,110,101,115,60,47,104,50,62,10,32,32,107,101,121,119,111,
+114,100,115,102,114,105,101,110,100,108,121,98,114,111,116,104,101,114,115,99,
+111,109,98,105,110,101,100,111,114,105,103,105,110,97,108,99,111,109,112,111,115
+,101,100,101,120,112,101,99,116,101,100,97,100,101,113,117,97,116,101,112,97,107
+,105,115,116,97,110,102,111,108,108,111,119,34,32,118,97,108,117,97,98,108,101,
+60,47,108,97,98,101,108,62,114,101,108,97,116,105,118,101,98,114,105,110,103,105
+,110,103,105,110,99,114,101,97,115,101,103,111,118,101,114,110,111,114,112,108,
+117,103,105,110,115,47,76,105,115,116,32,111,102,32,72,101,97,100,101,114,34,62,
+34,32,110,97,109,101,61,34,32,40,38,113,117,111,116,59,103,114,97,100,117,97,116
+,101,60,47,104,101,97,100,62,10,99,111,109,109,101,114,99,101,109,97,108,97,121,
+115,105,97,100,105,114,101,99,116,111,114,109,97,105,110,116,97,105,110,59,104,
+101,105,103,104,116,58,115,99,104,101,100,117,108,101,99,104,97,110,103,105,110,
+103,98,97,99,107,32,116,111,32,99,97,116,104,111,108,105,99,112,97,116,116,101,
+114,110,115,99,111,108,111,114,58,32,35,103,114,101,97,116,101,115,116,115,117,
+112,112,108,105,101,115,114,101,108,105,97,98,108,101,60,47,117,108,62,10,9,9,60
+,115,101,108,101,99,116,32,99,105,116,105,122,101,110,115,99,108,111,116,104,105
+,110,103,119,97,116,99,104,105,110,103,60,108,105,32,105,100,61,34,115,112,101,
+99,105,102,105,99,99,97,114,114,121,105,110,103,115,101,110,116,101,110,99,101,
+60,99,101,110,116,101,114,62,99,111,110,116,114,97,115,116,116,104,105,110,107,
+105,110,103,99,97,116,99,104,40,101,41,115,111,117,116,104,101,114,110,77,105,99
+,104,97,101,108,32,109,101,114,99,104,97,110,116,99,97,114,111,117,115,101,108,
+112,97,100,100,105,110,103,58,105,110,116,101,114,105,111,114,46,115,112,108,105
+,116,40,34,108,105,122,97,116,105,111,110,79,99,116,111,98,101,114,32,41,123,114
+,101,116,117,114,110,105,109,112,114,111,118,101,100,45,45,38,103,116,59,10,10,
+99,111,118,101,114,97,103,101,99,104,97,105,114,109,97,110,46,112,110,103,34,32,
+47,62,115,117,98,106,101,99,116,115,82,105,99,104,97,114,100,32,119,104,97,116,
+101,118,101,114,112,114,111,98,97,98,108,121,114,101,99,111,118,101,114,121,98,
+97,115,101,98,97,108,108,106,117,100,103,109,101,110,116,99,111,110,110,101,99,
+116,46,46,99,115,115,34,32,47,62,32,119,101,98,115,105,116,101,114,101,112,111,
+114,116,101,100,100,101,102,97,117,108,116,34,47,62,60,47,97,62,13,10,101,108,
+101,99,116,114,105,99,115,99,111,116,108,97,110,100,99,114,101,97,116,105,111,
+110,113,117,97,110,116,105,116,121,46,32,73,83,66,78,32,48,100,105,100,32,110,
+111,116,32,105,110,115,116,97,110,99,101,45,115,101,97,114,99,104,45,34,32,108,
+97,110,103,61,34,115,112,101,97,107,101,114,115,67,111,109,112,117,116,101,114,
+99,111,110,116,97,105,110,115,97,114,99,104,105,118,101,115,109,105,110,105,115,
+116,101,114,114,101,97,99,116,105,111,110,100,105,115,99,111,117,110,116,73,116,
+97,108,105,97,110,111,99,114,105,116,101,114,105,97,115,116,114,111,110,103,108,
+121,58,32,39,104,116,116,112,58,39,115,99,114,105,112,116,39,99,111,118,101,114,
+105,110,103,111,102,102,101,114,105,110,103,97,112,112,101,97,114,101,100,66,114
+,105,116,105,115,104,32,105,100,101,110,116,105,102,121,70,97,99,101,98,111,111,
+107,110,117,109,101,114,111,117,115,118,101,104,105,99,108,101,115,99,111,110,99
+,101,114,110,115,65,109,101,114,105,99,97,110,104,97,110,100,108,105,110,103,100
+,105,118,32,105,100,61,34,87,105,108,108,105,97,109,32,112,114,111,118,105,100,
+101,114,95,99,111,110,116,101,110,116,97,99,99,117,114,97,99,121,115,101,99,116,
+105,111,110,32,97,110,100,101,114,115,111,110,102,108,101,120,105,98,108,101,67,
+97,116,101,103,111,114,121,108,97,119,114,101,110,99,101,60,115,99,114,105,112,
+116,62,108,97,121,111,117,116,61,34,97,112,112,114,111,118,101,100,32,109,97,120
+,105,109,117,109,104,101,97,100,101,114,34,62,60,47,116,97,98,108,101,62,83,101,
+114,118,105,99,101,115,104,97,109,105,108,116,111,110,99,117,114,114,101,110,116
+,32,99,97,110,97,100,105,97,110,99,104,97,110,110,101,108,115,47,116,104,101,109
+,101,115,47,47,97,114,116,105,99,108,101,111,112,116,105,111,110,97,108,112,111,
+114,116,117,103,97,108,118,97,108,117,101,61,34,34,105,110,116,101,114,118,97,
+108,119,105,114,101,108,101,115,115,101,110,116,105,116,108,101,100,97,103,101,
+110,99,105,101,115,83,101,97,114,99,104,34,32,109,101,97,115,117,114,101,100,116
+,104,111,117,115,97,110,100,115,112,101,110,100,105,110,103,38,104,101,108,108,
+105,112,59,110,101,119,32,68,97,116,101,34,32,115,105,122,101,61,34,112,97,103,
+101,78,97,109,101,109,105,100,100,108,101,34,32,34,32,47,62,60,47,97,62,104,105,
+100,100,101,110,34,62,115,101,113,117,101,110,99,101,112,101,114,115,111,110,97,
+108,111,118,101,114,102,108,111,119,111,112,105,110,105,111,110,115,105,108,108,
+105,110,111,105,115,108,105,110,107,115,34,62,10,9,60,116,105,116,108,101,62,118
+,101,114,115,105,111,110,115,115,97,116,117,114,100,97,121,116,101,114,109,105,
+110,97,108,105,116,101,109,112,114,111,112,101,110,103,105,110,101,101,114,115,
+101,99,116,105,111,110,115,100,101,115,105,103,110,101,114,112,114,111,112,111,
+115,97,108,61,34,102,97,108,115,101,34,69,115,112,97,195,177,111,108,114,101,108
+,101,97,115,101,115,115,117,98,109,105,116,34,32,101,114,38,113,117,111,116,59,
+97,100,100,105,116,105,111,110,115,121,109,112,116,111,109,115,111,114,105,101,
+110,116,101,100,114,101,115,111,117,114,99,101,114,105,103,104,116,34,62,60,112,
+108,101,97,115,117,114,101,115,116,97,116,105,111,110,115,104,105,115,116,111,
+114,121,46,108,101,97,118,105,110,103,32,32,98,111,114,100,101,114,61,99,111,110
+,116,101,110,116,115,99,101,110,116,101,114,34,62,46,10,10,83,111,109,101,32,100
+,105,114,101,99,116,101,100,115,117,105,116,97,98,108,101,98,117,108,103,97,114,
+105,97,46,115,104,111,119,40,41,59,100,101,115,105,103,110,101,100,71,101,110,
+101,114,97,108,32,99,111,110,99,101,112,116,115,69,120,97,109,112,108,101,115,
+119,105,108,108,105,97,109,115,79,114,105,103,105,110,97,108,34,62,60,115,112,97
+,110,62,115,101,97,114,99,104,34,62,111,112,101,114,97,116,111,114,114,101,113,
+117,101,115,116,115,97,32,38,113,117,111,116,59,97,108,108,111,119,105,110,103,
+68,111,99,117,109,101,110,116,114,101,118,105,115,105,111,110,46,32,10,10,84,104
+,101,32,121,111,117,114,115,101,108,102,67,111,110,116,97,99,116,32,109,105,99,
+104,105,103,97,110,69,110,103,108,105,115,104,32,99,111,108,117,109,98,105,97,
+112,114,105,111,114,105,116,121,112,114,105,110,116,105,110,103,100,114,105,110,
+107,105,110,103,102,97,99,105,108,105,116,121,114,101,116,117,114,110,101,100,67
+,111,110,116,101,110,116,32,111,102,102,105,99,101,114,115,82,117,115,115,105,97
+,110,32,103,101,110,101,114,97,116,101,45,56,56,53,57,45,49,34,105,110,100,105,
+99,97,116,101,102,97,109,105,108,105,97,114,32,113,117,97,108,105,116,121,109,97
+,114,103,105,110,58,48,32,99,111,110,116,101,110,116,118,105,101,119,112,111,114
+,116,99,111,110,116,97,99,116,115,45,116,105,116,108,101,34,62,112,111,114,116,
+97,98,108,101,46,108,101,110,103,116,104,32,101,108,105,103,105,98,108,101,105,
+110,118,111,108,118,101,115,97,116,108,97,110,116,105,99,111,110,108,111,97,100,
+61,34,100,101,102,97,117,108,116,46,115,117,112,112,108,105,101,100,112,97,121,
+109,101,110,116,115,103,108,111,115,115,97,114,121,10,10,65,102,116,101,114,32,
+103,117,105,100,97,110,99,101,60,47,116,100,62,60,116,100,101,110,99,111,100,105
+,110,103,109,105,100,100,108,101,34,62,99,97,109,101,32,116,111,32,100,105,115,
+112,108,97,121,115,115,99,111,116,116,105,115,104,106,111,110,97,116,104,97,110,
+109,97,106,111,114,105,116,121,119,105,100,103,101,116,115,46,99,108,105,110,105
+,99,97,108,116,104,97,105,108,97,110,100,116,101,97,99,104,101,114,115,60,104,
+101,97,100,62,10,9,97,102,102,101,99,116,101,100,115,117,112,112,111,114,116,115
+,112,111,105,110,116,101,114,59,116,111,83,116,114,105,110,103,60,47,115,109,97,
+108,108,62,111,107,108,97,104,111,109,97,119,105,108,108,32,98,101,32,105,110,
+118,101,115,116,111,114,48,34,32,97,108,116,61,34,104,111,108,105,100,97,121,115
+,82,101,115,111,117,114,99,101,108,105,99,101,110,115,101,100,32,40,119,104,105,
+99,104,32,46,32,65,102,116,101,114,32,99,111,110,115,105,100,101,114,118,105,115
+,105,116,105,110,103,101,120,112,108,111,114,101,114,112,114,105,109,97,114,121,
+32,115,101,97,114,99,104,34,32,97,110,100,114,111,105,100,34,113,117,105,99,107,
+108,121,32,109,101,101,116,105,110,103,115,101,115,116,105,109,97,116,101,59,114
+,101,116,117,114,110,32,59,99,111,108,111,114,58,35,32,104,101,105,103,104,116,
+61,97,112,112,114,111,118,97,108,44,32,38,113,117,111,116,59,32,99,104,101,99,
+107,101,100,46,109,105,110,46,106,115,34,109,97,103,110,101,116,105,99,62,60,47,
+97,62,60,47,104,102,111,114,101,99,97,115,116,46,32,87,104,105,108,101,32,116,
+104,117,114,115,100,97,121,100,118,101,114,116,105,115,101,38,101,97,99,117,116,
+101,59,104,97,115,67,108,97,115,115,101,118,97,108,117,97,116,101,111,114,100,
+101,114,105,110,103,101,120,105,115,116,105,110,103,112,97,116,105,101,110,116,
+115,32,79,110,108,105,110,101,32,99,111,108,111,114,97,100,111,79,112,116,105,
+111,110,115,34,99,97,109,112,98,101,108,108,60,33,45,45,32,101,110,100,60,47,115
+,112,97,110,62,60,60,98,114,32,47,62,13,10,95,112,111,112,117,112,115,124,115,99
+,105,101,110,99,101,115,44,38,113,117,111,116,59,32,113,117,97,108,105,116,121,
+32,87,105,110,100,111,119,115,32,97,115,115,105,103,110,101,100,104,101,105,103,
+104,116,58,32,60,98,32,99,108,97,115,115,108,101,38,113,117,111,116,59,32,118,97
+,108,117,101,61,34,32,67,111,109,112,97,110,121,101,120,97,109,112,108,101,115,
+60,105,102,114,97,109,101,32,98,101,108,105,101,118,101,115,112,114,101,115,101,
+110,116,115,109,97,114,115,104,97,108,108,112,97,114,116,32,111,102,32,112,114,
+111,112,101,114,108,121,41,46,10,10,84,104,101,32,116,97,120,111,110,111,109,121
+,109,117,99,104,32,111,102,32,60,47,115,112,97,110,62,10,34,32,100,97,116,97,45,
+115,114,116,117,103,117,195,170,115,115,99,114,111,108,108,84,111,32,112,114,111
+,106,101,99,116,60,104,101,97,100,62,13,10,97,116,116,111,114,110,101,121,101,
+109,112,104,97,115,105,115,115,112,111,110,115,111,114,115,102,97,110,99,121,98,
+111,120,119,111,114,108,100,39,115,32,119,105,108,100,108,105,102,101,99,104,101
+,99,107,101,100,61,115,101,115,115,105,111,110,115,112,114,111,103,114,97,109,
+109,112,120,59,102,111,110,116,45,32,80,114,111,106,101,99,116,106,111,117,114,
+110,97,108,115,98,101,108,105,101,118,101,100,118,97,99,97,116,105,111,110,116,
+104,111,109,112,115,111,110,108,105,103,104,116,105,110,103,97,110,100,32,116,
+104,101,32,115,112,101,99,105,97,108,32,98,111,114,100,101,114,61,48,99,104,101,
+99,107,105,110,103,60,47,116,98,111,100,121,62,60,98,117,116,116,111,110,32,67,
+111,109,112,108,101,116,101,99,108,101,97,114,102,105,120,10,60,104,101,97,100,
+62,10,97,114,116,105,99,108,101,32,60,115,101,99,116,105,111,110,102,105,110,100
+,105,110,103,115,114,111,108,101,32,105,110,32,112,111,112,117,108,97,114,32,32,
+79,99,116,111,98,101,114,119,101,98,115,105,116,101,32,101,120,112,111,115,117,
+114,101,117,115,101,100,32,116,111,32,32,99,104,97,110,103,101,115,111,112,101,
+114,97,116,101,100,99,108,105,99,107,105,110,103,101,110,116,101,114,105,110,103
+,99,111,109,109,97,110,100,115,105,110,102,111,114,109,101,100,32,110,117,109,98
+,101,114,115,32,32,60,47,100,105,118,62,99,114,101,97,116,105,110,103,111,110,83
+,117,98,109,105,116,109,97,114,121,108,97,110,100,99,111,108,108,101,103,101,115
+,97,110,97,108,121,116,105,99,108,105,115,116,105,110,103,115,99,111,110,116,97,
+99,116,46,108,111,103,103,101,100,73,110,97,100,118,105,115,111,114,121,115,105,
+98,108,105,110,103,115,99,111,110,116,101,110,116,34,115,38,113,117,111,116,59,
+41,115,46,32,84,104,105,115,32,112,97,99,107,97,103,101,115,99,104,101,99,107,98
+,111,120,115,117,103,103,101,115,116,115,112,114,101,103,110,97,110,116,116,111,
+109,111,114,114,111,119,115,112,97,99,105,110,103,61,105,99,111,110,46,112,110,
+103,106,97,112,97,110,101,115,101,99,111,100,101,98,97,115,101,98,117,116,116,
+111,110,34,62,103,97,109,98,108,105,110,103,115,117,99,104,32,97,115,32,44,32,
+119,104,105,108,101,32,60,47,115,112,97,110,62,32,109,105,115,115,111,117,114,
+105,115,112,111,114,116,105,110,103,116,111,112,58,49,112,120,32,46,60,47,115,
+112,97,110,62,116,101,110,115,105,111,110,115,119,105,100,116,104,61,34,50,108,
+97,122,121,108,111,97,100,110,111,118,101,109,98,101,114,117,115,101,100,32,105,
+110,32,104,101,105,103,104,116,61,34,99,114,105,112,116,34,62,10,38,110,98,115,
+112,59,60,47,60,116,114,62,60,116,100,32,104,101,105,103,104,116,58,50,47,112,
+114,111,100,117,99,116,99,111,117,110,116,114,121,32,105,110,99,108,117,100,101,
+32,102,111,111,116,101,114,34,32,38,108,116,59,33,45,45,32,116,105,116,108,101,
+34,62,60,47,106,113,117,101,114,121,46,60,47,102,111,114,109,62,10,40,231,174,
+128,228,189,147,41,40,231,185,129,233,171,148,41,104,114,118,97,116,115,107,105,
+105,116,97,108,105,97,110,111,114,111,109,195,162,110,196,131,116,195,188,114,
+107,195,167,101,216,167,216,177,216,175,217,136,116,97,109,98,105,195,169,110,
+110,111,116,105,99,105,97,115,109,101,110,115,97,106,101,115,112,101,114,115,111
+,110,97,115,100,101,114,101,99,104,111,115,110,97,99,105,111,110,97,108,115,101,
+114,118,105,99,105,111,99,111,110,116,97,99,116,111,117,115,117,97,114,105,111,
+115,112,114,111,103,114,97,109,97,103,111,98,105,101,114,110,111,101,109,112,114
+,101,115,97,115,97,110,117,110,99,105,111,115,118,97,108,101,110,99,105,97,99,
+111,108,111,109,98,105,97,100,101,115,112,117,195,169,115,100,101,112,111,114,
+116,101,115,112,114,111,121,101,99,116,111,112,114,111,100,117,99,116,111,112,
+195,186,98,108,105,99,111,110,111,115,111,116,114,111,115,104,105,115,116,111,
+114,105,97,112,114,101,115,101,110,116,101,109,105,108,108,111,110,101,115,109,
+101,100,105,97,110,116,101,112,114,101,103,117,110,116,97,97,110,116,101,114,105
+,111,114,114,101,99,117,114,115,111,115,112,114,111,98,108,101,109,97,115,97,110
+,116,105,97,103,111,110,117,101,115,116,114,111,115,111,112,105,110,105,195,179,
+110,105,109,112,114,105,109,105,114,109,105,101,110,116,114,97,115,97,109,195,
+169,114,105,99,97,118,101,110,100,101,100,111,114,115,111,99,105,101,100,97,100,
+114,101,115,112,101,99,116,111,114,101,97,108,105,122,97,114,114,101,103,105,115
+,116,114,111,112,97,108,97,98,114,97,115,105,110,116,101,114,195,169,115,101,110
+,116,111,110,99,101,115,101,115,112,101,99,105,97,108,109,105,101,109,98,114,111
+,115,114,101,97,108,105,100,97,100,99,195,179,114,100,111,98,97,122,97,114,97,
+103,111,122,97,112,195,161,103,105,110,97,115,115,111,99,105,97,108,101,115,98,
+108,111,113,117,101,97,114,103,101,115,116,105,195,179,110,97,108,113,117,105,
+108,101,114,115,105,115,116,101,109,97,115,99,105,101,110,99,105,97,115,99,111,
+109,112,108,101,116,111,118,101,114,115,105,195,179,110,99,111,109,112,108,101,
+116,97,101,115,116,117,100,105,111,115,112,195,186,98,108,105,99,97,111,98,106,
+101,116,105,118,111,97,108,105,99,97,110,116,101,98,117,115,99,97,100,111,114,99
+,97,110,116,105,100,97,100,101,110,116,114,97,100,97,115,97,99,99,105,111,110,
+101,115,97,114,99,104,105,118,111,115,115,117,112,101,114,105,111,114,109,97,121
+,111,114,195,173,97,97,108,101,109,97,110,105,97,102,117,110,99,105,195,179,110,
+195,186,108,116,105,109,111,115,104,97,99,105,101,110,100,111,97,113,117,101,108
+,108,111,115,101,100,105,99,105,195,179,110,102,101,114,110,97,110,100,111,97,
+109,98,105,101,110,116,101,102,97,99,101,98,111,111,107,110,117,101,115,116,114,
+97,115,99,108,105,101,110,116,101,115,112,114,111,99,101,115,111,115,98,97,115,
+116,97,110,116,101,112,114,101,115,101,110,116,97,114,101,112,111,114,116,97,114
+,99,111,110,103,114,101,115,111,112,117,98,108,105,99,97,114,99,111,109,101,114,
+99,105,111,99,111,110,116,114,97,116,111,106,195,179,118,101,110,101,115,100,105
+,115,116,114,105,116,111,116,195,169,99,110,105,99,97,99,111,110,106,117,110,116
+,111,101,110,101,114,103,195,173,97,116,114,97,98,97,106,97,114,97,115,116,117,
+114,105,97,115,114,101,99,105,101,110,116,101,117,116,105,108,105,122,97,114,98,
+111,108,101,116,195,173,110,115,97,108,118,97,100,111,114,99,111,114,114,101,99,
+116,97,116,114,97,98,97,106,111,115,112,114,105,109,101,114,111,115,110,101,103,
+111,99,105,111,115,108,105,98,101,114,116,97,100,100,101,116,97,108,108,101,115,
+112,97,110,116,97,108,108,97,112,114,195,179,120,105,109,111,97,108,109,101,114,
+195,173,97,97,110,105,109,97,108,101,115,113,117,105,195,169,110,101,115,99,111,
+114,97,122,195,179,110,115,101,99,99,105,195,179,110,98,117,115,99,97,110,100,
+111,111,112,99,105,111,110,101,115,101,120,116,101,114,105,111,114,99,111,110,99
+,101,112,116,111,116,111,100,97,118,195,173,97,103,97,108,101,114,195,173,97,101
+,115,99,114,105,98,105,114,109,101,100,105,99,105,110,97,108,105,99,101,110,99,
+105,97,99,111,110,115,117,108,116,97,97,115,112,101,99,116,111,115,99,114,195,
+173,116,105,99,97,100,195,179,108,97,114,101,115,106,117,115,116,105,99,105,97,
+100,101,98,101,114,195,161,110,112,101,114,195,173,111,100,111,110,101,99,101,
+115,105,116,97,109,97,110,116,101,110,101,114,112,101,113,117,101,195,177,111,
+114,101,99,105,98,105,100,97,116,114,105,98,117,110,97,108,116,101,110,101,114,
+105,102,101,99,97,110,99,105,195,179,110,99,97,110,97,114,105,97,115,100,101,115
+,99,97,114,103,97,100,105,118,101,114,115,111,115,109,97,108,108,111,114,99,97,
+114,101,113,117,105,101,114,101,116,195,169,99,110,105,99,111,100,101,98,101,114
+,195,173,97,118,105,118,105,101,110,100,97,102,105,110,97,110,122,97,115,97,100,
+101,108,97,110,116,101,102,117,110,99,105,111,110,97,99,111,110,115,101,106,111,
+115,100,105,102,195,173,99,105,108,99,105,117,100,97,100,101,115,97,110,116,105,
+103,117,97,115,97,118,97,110,122,97,100,97,116,195,169,114,109,105,110,111,117,
+110,105,100,97,100,101,115,115,195,161,110,99,104,101,122,99,97,109,112,97,195,
+177,97,115,111,102,116,111,110,105,99,114,101,118,105,115,116,97,115,99,111,110,
+116,105,101,110,101,115,101,99,116,111,114,101,115,109,111,109,101,110,116,111,
+115,102,97,99,117,108,116,97,100,99,114,195,169,100,105,116,111,100,105,118,101,
+114,115,97,115,115,117,112,117,101,115,116,111,102,97,99,116,111,114,101,115,115
+,101,103,117,110,100,111,115,112,101,113,117,101,195,177,97,208,179,208,190,208,
+180,208,176,208,181,209,129,208,187,208,184,208,181,209,129,209,130,209,140,208,
+177,209,139,208,187,208,190,208,177,209,139,209,130,209,140,209,141,209,130,208,
+190,208,188,208,149,209,129,208,187,208,184,209,130,208,190,208,179,208,190,208,
+188,208,181,208,189,209,143,208,178,209,129,208,181,209,133,209,141,209,130,208,
+190,208,185,208,180,208,176,208,182,208,181,208,177,209,139,208,187,208,184,208,
+179,208,190,208,180,209,131,208,180,208,181,208,189,209,140,209,141,209,130,208,
+190,209,130,208,177,209,139,208,187,208,176,209,129,208,181,208,177,209,143,208,
+190,208,180,208,184,208,189,209,129,208,181,208,177,208,181,208,189,208,176,208,
+180,208,190,209,129,208,176,208,185,209,130,209,132,208,190,209,130,208,190,208,
+189,208,181,208,179,208,190,209,129,208,178,208,190,208,184,209,129,208,178,208,
+190,208,185,208,184,208,179,209,128,209,139,209,130,208,190,208,182,208,181,208,
+178,209,129,208,181,208,188,209,129,208,178,208,190,209,142,208,187,208,184,209,
+136,209,140,209,141,209,130,208,184,209,133,208,191,208,190,208,186,208,176,208,
+180,208,189,208,181,208,185,208,180,208,190,208,188,208,176,208,188,208,184,209,
+128,208,176,208,187,208,184,208,177,208,190,209,130,208,181,208,188,209,131,209,
+133,208,190,209,130,209,143,208,180,208,178,209,131,209,133,209,129,208,181,209,
+130,208,184,208,187,209,142,208,180,208,184,208,180,208,181,208,187,208,190,208,
+188,208,184,209,128,208,181,209,130,208,181,208,177,209,143,209,129,208,178,208,
+190,208,181,208,178,208,184,208,180,208,181,209,135,208,181,208,179,208,190,209,
+141,209,130,208,184,208,188,209,129,209,135,208,181,209,130,209,130,208,181,208,
+188,209,139,209,134,208,181,208,189,209,139,209,129,209,130,208,176,208,187,208,
+178,208,181,208,180,209,140,209,130,208,181,208,188,208,181,208,178,208,190,208,
+180,209,139,209,130,208,181,208,177,208,181,208,178,209,139,209,136,208,181,208,
+189,208,176,208,188,208,184,209,130,208,184,208,191,208,176,209,130,208,190,208,
+188,209,131,208,191,209,128,208,176,208,178,208,187,208,184,209,134,208,176,208,
+190,208,180,208,189,208,176,208,179,208,190,208,180,209,139,208,183,208,189,208,
+176,209,142,208,188,208,190,208,179,209,131,208,180,209,128,209,131,208,179,208,
+178,209,129,208,181,208,185,208,184,208,180,208,181,209,130,208,186,208,184,208,
+189,208,190,208,190,208,180,208,189,208,190,208,180,208,181,208,187,208,176,208,
+180,208,181,208,187,208,181,209,129,209,128,208,190,208,186,208,184,209,142,208,
+189,209,143,208,178,208,181,209,129,209,140,208,149,209,129,209,130,209,140,209,
+128,208,176,208,183,208,176,208,189,208,176,209,136,208,184,216,167,217,132,217,
+132,217,135,216,167,217,132,216,170,217,138,216,172,217,133,217,138,216,185,216,
+174,216,167,216,181,216,169,216,167,217,132,216,176,217,138,216,185,217,132,217,
+138,217,135,216,172,216,175,217,138,216,175,216,167,217,132,216,162,217,134,216,
+167,217,132,216,177,216,175,216,170,216,173,217,131,217,133,216,181,217,129,216,
+173,216,169,217,131,216,167,217,134,216,170,216,167,217,132,217,132,217,138,217,
+138,217,131,217,136,217,134,216,180,216,168,217,131,216,169,217,129,217,138,217,
+135,216,167,216,168,217,134,216,167,216,170,216,173,217,136,216,167,216,161,216,
+163,217,131,216,171,216,177,216,174,217,132,216,167,217,132,216,167,217,132,216,
+173,216,168,216,175,217,132,217,138,217,132,216,175,216,177,217,136,216,179,216,
+167,216,182,216,186,216,183,216,170,217,131,217,136,217,134,217,135,217,134,216,
+167,217,131,216,179,216,167,216,173,216,169,217,134,216,167,216,175,217,138,216,
+167,217,132,216,183,216,168,216,185,217,132,217,138,217,131,216,180,217,131,216,
+177,216,167,217,138,217,133,217,131,217,134,217,133,217,134,217,135,216,167,216,
+180,216,177,217,131,216,169,216,177,216,166,217,138,216,179,217,134,216,180,217,
+138,216,183,217,133,216,167,216,176,216,167,216,167,217,132,217,129,217,134,216,
+180,216,168,216,167,216,168,216,170,216,185,216,168,216,177,216,177,216,173,217,
+133,216,169,217,131,216,167,217,129,216,169,217,138,217,130,217,136,217,132,217,
+133,216,177,217,131,216,178,217,131,217,132,217,133,216,169,216,163,216,173,217,
+133,216,175,217,130,217,132,216,168,217,138,217,138,216,185,217,134,217,138,216,
+181,217,136,216,177,216,169,216,183,216,177,217,138,217,130,216,180,216,167,216,
+177,217,131,216,172,217,136,216,167,217,132,216,163,216,174,216,177,217,137,217,
+133,216,185,217,134,216,167,216,167,216,168,216,173,216,171,216,185,216,177,217,
+136,216,182,216,168,216,180,217,131,217,132,217,133,216,179,216,172,217,132,216,
+168,217,134,216,167,217,134,216,174,216,167,217,132,216,175,217,131,216,170,216,
+167,216,168,217,131,217,132,217,138,216,169,216,168,216,175,217,136,217,134,216,
+163,217,138,216,182,216,167,217,138,217,136,216,172,216,175,217,129,216,177,217,
+138,217,130,217,131,216,170,216,168,216,170,216,163,217,129,216,182,217,132,217,
+133,216,183,216,168,216,174,216,167,217,131,216,171,216,177,216,168,216,167,216,
+177,217,131,216,167,217,129,216,182,217,132,216,167,216,173,217,132,217,137,217,
+134,217,129,216,179,217,135,216,163,217,138,216,167,217,133,216,177,216,175,217,
+136,216,175,216,163,217,134,217,135,216,167,216,175,217,138,217,134,216,167,216,
+167,217,132,216,167,217,134,217,133,216,185,216,177,216,182,216,170,216,185,217,
+132,217,133,216,175,216,167,216,174,217,132,217,133,217,133,217,131,217,134,0,0,
+0,0,0,0,0,0,1,0,1,0,1,0,1,0,2,0,2,0,2,0,2,0,4,0,4,0,4,0,4,0,0,1,2,3,4,5,6,7,7,6,
+5,4,3,2,1,0,8,9,10,11,12,13,14,15,15,14,13,12,11,10,9,8,16,17,18,19,20,21,22,23,
+23,22,21,20,19,18,17,16,24,25,26,27,28,29,30,31,31,30,29,28,27,26,25,24,255,255,
+255,255,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,2,0,0,0,2,0,0,0,1,0,0,0,1,0,0,0,
+3,0,0,0,255,255,0,1,0,0,0,1,0,0,255,255,0,1,0,0,0,8,0,8,0,8,0,8,0,0,0,1,0,2,0,3,
+0,4,0,5,0,6,0,7,114,101,115,111,117,114,99,101,115,99,111,117,110,116,114,105,
+101,115,113,117,101,115,116,105,111,110,115,101,113,117,105,112,109,101,110,116,
+99,111,109,109,117,110,105,116,121,97,118,97,105,108,97,98,108,101,104,105,103,
+104,108,105,103,104,116,68,84,68,47,120,104,116,109,108,109,97,114,107,101,116,
+105,110,103,107,110,111,119,108,101,100,103,101,115,111,109,101,116,104,105,110,
+103,99,111,110,116,97,105,110,101,114,100,105,114,101,99,116,105,111,110,115,117
+,98,115,99,114,105,98,101,97,100,118,101,114,116,105,115,101,99,104,97,114,97,99
+,116,101,114,34,32,118,97,108,117,101,61,34,60,47,115,101,108,101,99,116,62,65,
+117,115,116,114,97,108,105,97,34,32,99,108,97,115,115,61,34,115,105,116,117,97,
+116,105,111,110,97,117,116,104,111,114,105,116,121,102,111,108,108,111,119,105,
+110,103,112,114,105,109,97,114,105,108,121,111,112,101,114,97,116,105,111,110,99
+,104,97,108,108,101,110,103,101,100,101,118,101,108,111,112,101,100,97,110,111,
+110,121,109,111,117,115,102,117,110,99,116,105,111,110,32,102,117,110,99,116,105
+,111,110,115,99,111,109,112,97,110,105,101,115,115,116,114,117,99,116,117,114,
+101,97,103,114,101,101,109,101,110,116,34,32,116,105,116,108,101,61,34,112,111,
+116,101,110,116,105,97,108,101,100,117,99,97,116,105,111,110,97,114,103,117,109,
+101,110,116,115,115,101,99,111,110,100,97,114,121,99,111,112,121,114,105,103,104
+,116,108,97,110,103,117,97,103,101,115,101,120,99,108,117,115,105,118,101,99,111
+,110,100,105,116,105,111,110,60,47,102,111,114,109,62,13,10,115,116,97,116,101,
+109,101,110,116,97,116,116,101,110,116,105,111,110,66,105,111,103,114,97,112,104
+,121,125,32,101,108,115,101,32,123,10,115,111,108,117,116,105,111,110,115,119,
+104,101,110,32,116,104,101,32,65,110,97,108,121,116,105,99,115,116,101,109,112,
+108,97,116,101,115,100,97,110,103,101,114,111,117,115,115,97,116,101,108,108,105
+,116,101,100,111,99,117,109,101,110,116,115,112,117,98,108,105,115,104,101,114,
+105,109,112,111,114,116,97,110,116,112,114,111,116,111,116,121,112,101,105,110,
+102,108,117,101,110,99,101,38,114,97,113,117,111,59,60,47,101,102,102,101,99,116
+,105,118,101,103,101,110,101,114,97,108,108,121,116,114,97,110,115,102,111,114,
+109,98,101,97,117,116,105,102,117,108,116,114,97,110,115,112,111,114,116,111,114
+,103,97,110,105,122,101,100,112,117,98,108,105,115,104,101,100,112,114,111,109,
+105,110,101,110,116,117,110,116,105,108,32,116,104,101,116,104,117,109,98,110,97
+,105,108,78,97,116,105,111,110,97,108,32,46,102,111,99,117,115,40,41,59,111,118,
+101,114,32,116,104,101,32,109,105,103,114,97,116,105,111,110,97,110,110,111,117,
+110,99,101,100,102,111,111,116,101,114,34,62,10,101,120,99,101,112,116,105,111,
+110,108,101,115,115,32,116,104,97,110,101,120,112,101,110,115,105,118,101,102,
+111,114,109,97,116,105,111,110,102,114,97,109,101,119,111,114,107,116,101,114,
+114,105,116,111,114,121,110,100,105,99,97,116,105,111,110,99,117,114,114,101,110
+,116,108,121,99,108,97,115,115,78,97,109,101,99,114,105,116,105,99,105,115,109,
+116,114,97,100,105,116,105,111,110,101,108,115,101,119,104,101,114,101,65,108,
+101,120,97,110,100,101,114,97,112,112,111,105,110,116,101,100,109,97,116,101,114
+,105,97,108,115,98,114,111,97,100,99,97,115,116,109,101,110,116,105,111,110,101,
+100,97,102,102,105,108,105,97,116,101,60,47,111,112,116,105,111,110,62,116,114,
+101,97,116,109,101,110,116,100,105,102,102,101,114,101,110,116,47,100,101,102,97
+,117,108,116,46,80,114,101,115,105,100,101,110,116,111,110,99,108,105,99,107,61,
+34,98,105,111,103,114,97,112,104,121,111,116,104,101,114,119,105,115,101,112,101
+,114,109,97,110,101,110,116,70,114,97,110,195,167,97,105,115,72,111,108,108,121,
+119,111,111,100,101,120,112,97,110,115,105,111,110,115,116,97,110,100,97,114,100
+,115,60,47,115,116,121,108,101,62,10,114,101,100,117,99,116,105,111,110,68,101,
+99,101,109,98,101,114,32,112,114,101,102,101,114,114,101,100,67,97,109,98,114,
+105,100,103,101,111,112,112,111,110,101,110,116,115,66,117,115,105,110,101,115,
+115,32,99,111,110,102,117,115,105,111,110,62,10,60,116,105,116,108,101,62,112,
+114,101,115,101,110,116,101,100,101,120,112,108,97,105,110,101,100,100,111,101,
+115,32,110,111,116,32,119,111,114,108,100,119,105,100,101,105,110,116,101,114,
+102,97,99,101,112,111,115,105,116,105,111,110,115,110,101,119,115,112,97,112,101
+,114,60,47,116,97,98,108,101,62,10,109,111,117,110,116,97,105,110,115,108,105,
+107,101,32,116,104,101,32,101,115,115,101,110,116,105,97,108,102,105,110,97,110,
+99,105,97,108,115,101,108,101,99,116,105,111,110,97,99,116,105,111,110,61,34,47,
+97,98,97,110,100,111,110,101,100,69,100,117,99,97,116,105,111,110,112,97,114,115
+,101,73,110,116,40,115,116,97,98,105,108,105,116,121,117,110,97,98,108,101,32,
+116,111,60,47,116,105,116,108,101,62,10,114,101,108,97,116,105,111,110,115,78,
+111,116,101,32,116,104,97,116,101,102,102,105,99,105,101,110,116,112,101,114,102
+,111,114,109,101,100,116,119,111,32,121,101,97,114,115,83,105,110,99,101,32,116,
+104,101,116,104,101,114,101,102,111,114,101,119,114,97,112,112,101,114,34,62,97,
+108,116,101,114,110,97,116,101,105,110,99,114,101,97,115,101,100,66,97,116,116,
+108,101,32,111,102,112,101,114,99,101,105,118,101,100,116,114,121,105,110,103,32
+,116,111,110,101,99,101,115,115,97,114,121,112,111,114,116,114,97,121,101,100,
+101,108,101,99,116,105,111,110,115,69,108,105,122,97,98,101,116,104,60,47,105,
+102,114,97,109,101,62,100,105,115,99,111,118,101,114,121,105,110,115,117,114,97,
+110,99,101,115,46,108,101,110,103,116,104,59,108,101,103,101,110,100,97,114,121,
+71,101,111,103,114,97,112,104,121,99,97,110,100,105,100,97,116,101,99,111,114,
+112,111,114,97,116,101,115,111,109,101,116,105,109,101,115,115,101,114,118,105,
+99,101,115,46,105,110,104,101,114,105,116,101,100,60,47,115,116,114,111,110,103,
+62,67,111,109,109,117,110,105,116,121,114,101,108,105,103,105,111,117,115,108,
+111,99,97,116,105,111,110,115,67,111,109,109,105,116,116,101,101,98,117,105,108,
+100,105,110,103,115,116,104,101,32,119,111,114,108,100,110,111,32,108,111,110,
+103,101,114,98,101,103,105,110,110,105,110,103,114,101,102,101,114,101,110,99,
+101,99,97,110,110,111,116,32,98,101,102,114,101,113,117,101,110,99,121,116,121,
+112,105,99,97,108,108,121,105,110,116,111,32,116,104,101,32,114,101,108,97,116,
+105,118,101,59,114,101,99,111,114,100,105,110,103,112,114,101,115,105,100,101,
+110,116,105,110,105,116,105,97,108,108,121,116,101,99,104,110,105,113,117,101,
+116,104,101,32,111,116,104,101,114,105,116,32,99,97,110,32,98,101,101,120,105,
+115,116,101,110,99,101,117,110,100,101,114,108,105,110,101,116,104,105,115,32,
+116,105,109,101,116,101,108,101,112,104,111,110,101,105,116,101,109,115,99,111,
+112,101,112,114,97,99,116,105,99,101,115,97,100,118,97,110,116,97,103,101,41,59,
+114,101,116,117,114,110,32,70,111,114,32,111,116,104,101,114,112,114,111,118,105
+,100,105,110,103,100,101,109,111,99,114,97,99,121,98,111,116,104,32,116,104,101,
+32,101,120,116,101,110,115,105,118,101,115,117,102,102,101,114,105,110,103,115,
+117,112,112,111,114,116,101,100,99,111,109,112,117,116,101,114,115,32,102,117,
+110,99,116,105,111,110,112,114,97,99,116,105,99,97,108,115,97,105,100,32,116,104
+,97,116,105,116,32,109,97,121,32,98,101,69,110,103,108,105,115,104,60,47,102,114
+,111,109,32,116,104,101,32,115,99,104,101,100,117,108,101,100,100,111,119,110,
+108,111,97,100,115,60,47,108,97,98,101,108,62,10,115,117,115,112,101,99,116,101,
+100,109,97,114,103,105,110,58,32,48,115,112,105,114,105,116,117,97,108,60,47,104
+,101,97,100,62,10,10,109,105,99,114,111,115,111,102,116,103,114,97,100,117,97,
+108,108,121,100,105,115,99,117,115,115,101,100,104,101,32,98,101,99,97,109,101,
+101,120,101,99,117,116,105,118,101,106,113,117,101,114,121,46,106,115,104,111,
+117,115,101,104,111,108,100,99,111,110,102,105,114,109,101,100,112,117,114,99,
+104,97,115,101,100,108,105,116,101,114,97,108,108,121,100,101,115,116,114,111,
+121,101,100,117,112,32,116,111,32,116,104,101,118,97,114,105,97,116,105,111,110,
+114,101,109,97,105,110,105,110,103,105,116,32,105,115,32,110,111,116,99,101,110,
+116,117,114,105,101,115,74,97,112,97,110,101,115,101,32,97,109,111,110,103,32,
+116,104,101,99,111,109,112,108,101,116,101,100,97,108,103,111,114,105,116,104,
+109,105,110,116,101,114,101,115,116,115,114,101,98,101,108,108,105,111,110,117,
+110,100,101,102,105,110,101,100,101,110,99,111,117,114,97,103,101,114,101,115,
+105,122,97,98,108,101,105,110,118,111,108,118,105,110,103,115,101,110,115,105,
+116,105,118,101,117,110,105,118,101,114,115,97,108,112,114,111,118,105,115,105,
+111,110,40,97,108,116,104,111,117,103,104,102,101,97,116,117,114,105,110,103,99,
+111,110,100,117,99,116,101,100,41,44,32,119,104,105,99,104,32,99,111,110,116,105
+,110,117,101,100,45,104,101,97,100,101,114,34,62,70,101,98,114,117,97,114,121,32
+,110,117,109,101,114,111,117,115,32,111,118,101,114,102,108,111,119,58,99,111,
+109,112,111,110,101,110,116,102,114,97,103,109,101,110,116,115,101,120,99,101,
+108,108,101,110,116,99,111,108,115,112,97,110,61,34,116,101,99,104,110,105,99,97
+,108,110,101,97,114,32,116,104,101,32,65,100,118,97,110,99,101,100,32,115,111,
+117,114,99,101,32,111,102,101,120,112,114,101,115,115,101,100,72,111,110,103,32,
+75,111,110,103,32,70,97,99,101,98,111,111,107,109,117,108,116,105,112,108,101,32
+,109,101,99,104,97,110,105,115,109,101,108,101,118,97,116,105,111,110,111,102,
+102,101,110,115,105,118,101,60,47,102,111,114,109,62,10,9,115,112,111,110,115,
+111,114,101,100,100,111,99,117,109,101,110,116,46,111,114,32,38,113,117,111,116,
+59,116,104,101,114,101,32,97,114,101,116,104,111,115,101,32,119,104,111,109,111,
+118,101,109,101,110,116,115,112,114,111,99,101,115,115,101,115,100,105,102,102,
+105,99,117,108,116,115,117,98,109,105,116,116,101,100,114,101,99,111,109,109,101
+,110,100,99,111,110,118,105,110,99,101,100,112,114,111,109,111,116,105,110,103,
+34,32,119,105,100,116,104,61,34,46,114,101,112,108,97,99,101,40,99,108,97,115,
+115,105,99,97,108,99,111,97,108,105,116,105,111,110,104,105,115,32,102,105,114,
+115,116,100,101,99,105,115,105,111,110,115,97,115,115,105,115,116,97,110,116,105
+,110,100,105,99,97,116,101,100,101,118,111,108,117,116,105,111,110,45,119,114,97
+,112,112,101,114,34,101,110,111,117,103,104,32,116,111,97,108,111,110,103,32,116
+,104,101,100,101,108,105,118,101,114,101,100,45,45,62,13,10,60,33,45,45,65,109,
+101,114,105,99,97,110,32,112,114,111,116,101,99,116,101,100,78,111,118,101,109,
+98,101,114,32,60,47,115,116,121,108,101,62,60,102,117,114,110,105,116,117,114,
+101,73,110,116,101,114,110,101,116,32,32,111,110,98,108,117,114,61,34,115,117,
+115,112,101,110,100,101,100,114,101,99,105,112,105,101,110,116,98,97,115,101,100
+,32,111,110,32,77,111,114,101,111,118,101,114,44,97,98,111,108,105,115,104,101,
+100,99,111,108,108,101,99,116,101,100,119,101,114,101,32,109,97,100,101,101,109,
+111,116,105,111,110,97,108,101,109,101,114,103,101,110,99,121,110,97,114,114,97,
+116,105,118,101,97,100,118,111,99,97,116,101,115,112,120,59,98,111,114,100,101,
+114,99,111,109,109,105,116,116,101,100,100,105,114,61,34,108,116,114,34,101,109,
+112,108,111,121,101,101,115,114,101,115,101,97,114,99,104,46,32,115,101,108,101,
+99,116,101,100,115,117,99,99,101,115,115,111,114,99,117,115,116,111,109,101,114,
+115,100,105,115,112,108,97,121,101,100,83,101,112,116,101,109,98,101,114,97,100,
+100,67,108,97,115,115,40,70,97,99,101,98,111,111,107,32,115,117,103,103,101,115,
+116,101,100,97,110,100,32,108,97,116,101,114,111,112,101,114,97,116,105,110,103,
+101,108,97,98,111,114,97,116,101,83,111,109,101,116,105,109,101,115,73,110,115,
+116,105,116,117,116,101,99,101,114,116,97,105,110,108,121,105,110,115,116,97,108
+,108,101,100,102,111,108,108,111,119,101,114,115,74,101,114,117,115,97,108,101,
+109,116,104,101,121,32,104,97,118,101,99,111,109,112,117,116,105,110,103,103,101
+,110,101,114,97,116,101,100,112,114,111,118,105,110,99,101,115,103,117,97,114,97
+,110,116,101,101,97,114,98,105,116,114,97,114,121,114,101,99,111,103,110,105,122
+,101,119,97,110,116,101,100,32,116,111,112,120,59,119,105,100,116,104,58,116,104
+,101,111,114,121,32,111,102,98,101,104,97,118,105,111,117,114,87,104,105,108,101
+,32,116,104,101,101,115,116,105,109,97,116,101,100,98,101,103,97,110,32,116,111,
+32,105,116,32,98,101,99,97,109,101,109,97,103,110,105,116,117,100,101,109,117,
+115,116,32,104,97,118,101,109,111,114,101,32,116,104,97,110,68,105,114,101,99,
+116,111,114,121,101,120,116,101,110,115,105,111,110,115,101,99,114,101,116,97,
+114,121,110,97,116,117,114,97,108,108,121,111,99,99,117,114,114,105,110,103,118,
+97,114,105,97,98,108,101,115,103,105,118,101,110,32,116,104,101,112,108,97,116,
+102,111,114,109,46,60,47,108,97,98,101,108,62,60,102,97,105,108,101,100,32,116,
+111,99,111,109,112,111,117,110,100,115,107,105,110,100,115,32,111,102,32,115,111
+,99,105,101,116,105,101,115,97,108,111,110,103,115,105,100,101,32,45,45,38,103,
+116,59,10,10,115,111,117,116,104,119,101,115,116,116,104,101,32,114,105,103,104,
+116,114,97,100,105,97,116,105,111,110,109,97,121,32,104,97,118,101,32,117,110,
+101,115,99,97,112,101,40,115,112,111,107,101,110,32,105,110,34,32,104,114,101,
+102,61,34,47,112,114,111,103,114,97,109,109,101,111,110,108,121,32,116,104,101,
+32,99,111,109,101,32,102,114,111,109,100,105,114,101,99,116,111,114,121,98,117,
+114,105,101,100,32,105,110,97,32,115,105,109,105,108,97,114,116,104,101,121,32,
+119,101,114,101,60,47,102,111,110,116,62,60,47,78,111,114,119,101,103,105,97,110
+,115,112,101,99,105,102,105,101,100,112,114,111,100,117,99,105,110,103,112,97,
+115,115,101,110,103,101,114,40,110,101,119,32,68,97,116,101,116,101,109,112,111,
+114,97,114,121,102,105,99,116,105,111,110,97,108,65,102,116,101,114,32,116,104,
+101,101,113,117,97,116,105,111,110,115,100,111,119,110,108,111,97,100,46,114,101
+,103,117,108,97,114,108,121,100,101,118,101,108,111,112,101,114,97,98,111,118,
+101,32,116,104,101,108,105,110,107,101,100,32,116,111,112,104,101,110,111,109,
+101,110,97,112,101,114,105,111,100,32,111,102,116,111,111,108,116,105,112,34,62,
+115,117,98,115,116,97,110,99,101,97,117,116,111,109,97,116,105,99,97,115,112,101
+,99,116,32,111,102,65,109,111,110,103,32,116,104,101,99,111,110,110,101,99,116,
+101,100,101,115,116,105,109,97,116,101,115,65,105,114,32,70,111,114,99,101,115,
+121,115,116,101,109,32,111,102,111,98,106,101,99,116,105,118,101,105,109,109,101
+,100,105,97,116,101,109,97,107,105,110,103,32,105,116,112,97,105,110,116,105,110
+,103,115,99,111,110,113,117,101,114,101,100,97,114,101,32,115,116,105,108,108,
+112,114,111,99,101,100,117,114,101,103,114,111,119,116,104,32,111,102,104,101,97
+,100,101,100,32,98,121,69,117,114,111,112,101,97,110,32,100,105,118,105,115,105,
+111,110,115,109,111,108,101,99,117,108,101,115,102,114,97,110,99,104,105,115,101
+,105,110,116,101,110,116,105,111,110,97,116,116,114,97,99,116,101,100,99,104,105
+,108,100,104,111,111,100,97,108,115,111,32,117,115,101,100,100,101,100,105,99,97
+,116,101,100,115,105,110,103,97,112,111,114,101,100,101,103,114,101,101,32,111,
+102,102,97,116,104,101,114,32,111,102,99,111,110,102,108,105,99,116,115,60,47,97
+,62,60,47,112,62,10,99,97,109,101,32,102,114,111,109,119,101,114,101,32,117,115,
+101,100,110,111,116,101,32,116,104,97,116,114,101,99,101,105,118,105,110,103,69,
+120,101,99,117,116,105,118,101,101,118,101,110,32,109,111,114,101,97,99,99,101,
+115,115,32,116,111,99,111,109,109,97,110,100,101,114,80,111,108,105,116,105,99,
+97,108,109,117,115,105,99,105,97,110,115,100,101,108,105,99,105,111,117,115,112,
+114,105,115,111,110,101,114,115,97,100,118,101,110,116,32,111,102,85,84,70,45,56
+,34,32,47,62,60,33,91,67,68,65,84,65,91,34,62,67,111,110,116,97,99,116,83,111,
+117,116,104,101,114,110,32,98,103,99,111,108,111,114,61,34,115,101,114,105,101,
+115,32,111,102,46,32,73,116,32,119,97,115,32,105,110,32,69,117,114,111,112,101,
+112,101,114,109,105,116,116,101,100,118,97,108,105,100,97,116,101,46,97,112,112,
+101,97,114,105,110,103,111,102,102,105,99,105,97,108,115,115,101,114,105,111,117
+,115,108,121,45,108,97,110,103,117,97,103,101,105,110,105,116,105,97,116,101,100
+,101,120,116,101,110,100,105,110,103,108,111,110,103,45,116,101,114,109,105,110,
+102,108,97,116,105,111,110,115,117,99,104,32,116,104,97,116,103,101,116,67,111,
+111,107,105,101,109,97,114,107,101,100,32,98,121,60,47,98,117,116,116,111,110,62
+,105,109,112,108,101,109,101,110,116,98,117,116,32,105,116,32,105,115,105,110,99
+,114,101,97,115,101,115,100,111,119,110,32,116,104,101,32,114,101,113,117,105,
+114,105,110,103,100,101,112,101,110,100,101,110,116,45,45,62,10,60,33,45,45,32,
+105,110,116,101,114,118,105,101,119,87,105,116,104,32,116,104,101,32,99,111,112,
+105,101,115,32,111,102,99,111,110,115,101,110,115,117,115,119,97,115,32,98,117,
+105,108,116,86,101,110,101,122,117,101,108,97,40,102,111,114,109,101,114,108,121
+,116,104,101,32,115,116,97,116,101,112,101,114,115,111,110,110,101,108,115,116,
+114,97,116,101,103,105,99,102,97,118,111,117,114,32,111,102,105,110,118,101,110,
+116,105,111,110,87,105,107,105,112,101,100,105,97,99,111,110,116,105,110,101,110
+,116,118,105,114,116,117,97,108,108,121,119,104,105,99,104,32,119,97,115,112,114
+,105,110,99,105,112,108,101,67,111,109,112,108,101,116,101,32,105,100,101,110,
+116,105,99,97,108,115,104,111,119,32,116,104,97,116,112,114,105,109,105,116,105,
+118,101,97,119,97,121,32,102,114,111,109,109,111,108,101,99,117,108,97,114,112,
+114,101,99,105,115,101,108,121,100,105,115,115,111,108,118,101,100,85,110,100,
+101,114,32,116,104,101,118,101,114,115,105,111,110,61,34,62,38,110,98,115,112,59
+,60,47,73,116,32,105,115,32,116,104,101,32,84,104,105,115,32,105,115,32,119,105,
+108,108,32,104,97,118,101,111,114,103,97,110,105,115,109,115,115,111,109,101,32,
+116,105,109,101,70,114,105,101,100,114,105,99,104,119,97,115,32,102,105,114,115,
+116,116,104,101,32,111,110,108,121,32,102,97,99,116,32,116,104,97,116,102,111,
+114,109,32,105,100,61,34,112,114,101,99,101,100,105,110,103,84,101,99,104,110,
+105,99,97,108,112,104,121,115,105,99,105,115,116,111,99,99,117,114,115,32,105,
+110,110,97,118,105,103,97,116,111,114,115,101,99,116,105,111,110,34,62,115,112,
+97,110,32,105,100,61,34,115,111,117,103,104,116,32,116,111,98,101,108,111,119,32
+,116,104,101,115,117,114,118,105,118,105,110,103,125,60,47,115,116,121,108,101,
+62,104,105,115,32,100,101,97,116,104,97,115,32,105,110,32,116,104,101,99,97,117,
+115,101,100,32,98,121,112,97,114,116,105,97,108,108,121,101,120,105,115,116,105,
+110,103,32,117,115,105,110,103,32,116,104,101,119,97,115,32,103,105,118,101,110,
+97,32,108,105,115,116,32,111,102,108,101,118,101,108,115,32,111,102,110,111,116,
+105,111,110,32,111,102,79,102,102,105,99,105,97,108,32,100,105,115,109,105,115,
+115,101,100,115,99,105,101,110,116,105,115,116,114,101,115,101,109,98,108,101,
+115,100,117,112,108,105,99,97,116,101,101,120,112,108,111,115,105,118,101,114,
+101,99,111,118,101,114,101,100,97,108,108,32,111,116,104,101,114,103,97,108,108,
+101,114,105,101,115,123,112,97,100,100,105,110,103,58,112,101,111,112,108,101,32
+,111,102,114,101,103,105,111,110,32,111,102,97,100,100,114,101,115,115,101,115,
+97,115,115,111,99,105,97,116,101,105,109,103,32,97,108,116,61,34,105,110,32,109,
+111,100,101,114,110,115,104,111,117,108,100,32,98,101,109,101,116,104,111,100,32
+,111,102,114,101,112,111,114,116,105,110,103,116,105,109,101,115,116,97,109,112,
+110,101,101,100,101,100,32,116,111,116,104,101,32,71,114,101,97,116,114,101,103,
+97,114,100,105,110,103,115,101,101,109,101,100,32,116,111,118,105,101,119,101,
+100,32,97,115,105,109,112,97,99,116,32,111,110,105,100,101,97,32,116,104,97,116,
+116,104,101,32,87,111,114,108,100,104,101,105,103,104,116,32,111,102,101,120,112
+,97,110,100,105,110,103,84,104,101,115,101,32,97,114,101,99,117,114,114,101,110,
+116,34,62,99,97,114,101,102,117,108,108,121,109,97,105,110,116,97,105,110,115,99
+,104,97,114,103,101,32,111,102,67,108,97,115,115,105,99,97,108,97,100,100,114,
+101,115,115,101,100,112,114,101,100,105,99,116,101,100,111,119,110,101,114,115,
+104,105,112,60,100,105,118,32,105,100,61,34,114,105,103,104,116,34,62,13,10,114,
+101,115,105,100,101,110,99,101,108,101,97,118,101,32,116,104,101,99,111,110,116,
+101,110,116,34,62,97,114,101,32,111,102,116,101,110,32,32,125,41,40,41,59,13,10,
+112,114,111,98,97,98,108,121,32,80,114,111,102,101,115,115,111,114,45,98,117,116
+,116,111,110,34,32,114,101,115,112,111,110,100,101,100,115,97,121,115,32,116,104
+,97,116,104,97,100,32,116,111,32,98,101,112,108,97,99,101,100,32,105,110,72,117,
+110,103,97,114,105,97,110,115,116,97,116,117,115,32,111,102,115,101,114,118,101,
+115,32,97,115,85,110,105,118,101,114,115,97,108,101,120,101,99,117,116,105,111,
+110,97,103,103,114,101,103,97,116,101,102,111,114,32,119,104,105,99,104,105,110,
+102,101,99,116,105,111,110,97,103,114,101,101,100,32,116,111,104,111,119,101,118
+,101,114,44,32,112,111,112,117,108,97,114,34,62,112,108,97,99,101,100,32,111,110
+,99,111,110,115,116,114,117,99,116,101,108,101,99,116,111,114,97,108,115,121,109
+,98,111,108,32,111,102,105,110,99,108,117,100,105,110,103,114,101,116,117,114,
+110,32,116,111,97,114,99,104,105,116,101,99,116,67,104,114,105,115,116,105,97,
+110,112,114,101,118,105,111,117,115,32,108,105,118,105,110,103,32,105,110,101,97
+,115,105,101,114,32,116,111,112,114,111,102,101,115,115,111,114,10,38,108,116,59
+,33,45,45,32,101,102,102,101,99,116,32,111,102,97,110,97,108,121,116,105,99,115,
+119,97,115,32,116,97,107,101,110,119,104,101,114,101,32,116,104,101,116,111,111,
+107,32,111,118,101,114,98,101,108,105,101,102,32,105,110,65,102,114,105,107,97,
+97,110,115,97,115,32,102,97,114,32,97,115,112,114,101,118,101,110,116,101,100,
+119,111,114,107,32,119,105,116,104,97,32,115,112,101,99,105,97,108,60,102,105,
+101,108,100,115,101,116,67,104,114,105,115,116,109,97,115,82,101,116,114,105,101
+,118,101,100,10,10,73,110,32,116,104,101,32,98,97,99,107,32,105,110,116,111,110,
+111,114,116,104,101,97,115,116,109,97,103,97,122,105,110,101,115,62,60,115,116,
+114,111,110,103,62,99,111,109,109,105,116,116,101,101,103,111,118,101,114,110,
+105,110,103,103,114,111,117,112,115,32,111,102,115,116,111,114,101,100,32,105,
+110,101,115,116,97,98,108,105,115,104,97,32,103,101,110,101,114,97,108,105,116,
+115,32,102,105,114,115,116,116,104,101,105,114,32,111,119,110,112,111,112,117,
+108,97,116,101,100,97,110,32,111,98,106,101,99,116,67,97,114,105,98,98,101,97,
+110,97,108,108,111,119,32,116,104,101,100,105,115,116,114,105,99,116,115,119,105
+,115,99,111,110,115,105,110,108,111,99,97,116,105,111,110,46,59,32,119,105,100,
+116,104,58,32,105,110,104,97,98,105,116,101,100,83,111,99,105,97,108,105,115,116
+,74,97,110,117,97,114,121,32,49,60,47,102,111,111,116,101,114,62,115,105,109,105
+,108,97,114,108,121,99,104,111,105,99,101,32,111,102,116,104,101,32,115,97,109,
+101,32,115,112,101,99,105,102,105,99,32,98,117,115,105,110,101,115,115,32,84,104
+,101,32,102,105,114,115,116,46,108,101,110,103,116,104,59,32,100,101,115,105,114
+,101,32,116,111,100,101,97,108,32,119,105,116,104,115,105,110,99,101,32,116,104,
+101,117,115,101,114,65,103,101,110,116,99,111,110,99,101,105,118,101,100,105,110
+,100,101,120,46,112,104,112,97,115,32,38,113,117,111,116,59,101,110,103,97,103,
+101,32,105,110,114,101,99,101,110,116,108,121,44,102,101,119,32,121,101,97,114,
+115,119,101,114,101,32,97,108,115,111,10,60,104,101,97,100,62,10,60,101,100,105,
+116,101,100,32,98,121,97,114,101,32,107,110,111,119,110,99,105,116,105,101,115,
+32,105,110,97,99,99,101,115,115,107,101,121,99,111,110,100,101,109,110,101,100,
+97,108,115,111,32,104,97,118,101,115,101,114,118,105,99,101,115,44,102,97,109,
+105,108,121,32,111,102,83,99,104,111,111,108,32,111,102,99,111,110,118,101,114,
+116,101,100,110,97,116,117,114,101,32,111,102,32,108,97,110,103,117,97,103,101,
+109,105,110,105,115,116,101,114,115,60,47,111,98,106,101,99,116,62,116,104,101,
+114,101,32,105,115,32,97,32,112,111,112,117,108,97,114,115,101,113,117,101,110,
+99,101,115,97,100,118,111,99,97,116,101,100,84,104,101,121,32,119,101,114,101,97
+,110,121,32,111,116,104,101,114,108,111,99,97,116,105,111,110,61,101,110,116,101
+,114,32,116,104,101,109,117,99,104,32,109,111,114,101,114,101,102,108,101,99,116
+,101,100,119,97,115,32,110,97,109,101,100,111,114,105,103,105,110,97,108,32,97,
+32,116,121,112,105,99,97,108,119,104,101,110,32,116,104,101,121,101,110,103,105,
+110,101,101,114,115,99,111,117,108,100,32,110,111,116,114,101,115,105,100,101,
+110,116,115,119,101,100,110,101,115,100,97,121,116,104,101,32,116,104,105,114,
+100,32,112,114,111,100,117,99,116,115,74,97,110,117,97,114,121,32,50,119,104,97,
+116,32,116,104,101,121,97,32,99,101,114,116,97,105,110,114,101,97,99,116,105,111
+,110,115,112,114,111,99,101,115,115,111,114,97,102,116,101,114,32,104,105,115,
+116,104,101,32,108,97,115,116,32,99,111,110,116,97,105,110,101,100,34,62,60,47,
+100,105,118,62,10,60,47,97,62,60,47,116,100,62,100,101,112,101,110,100,32,111,
+110,115,101,97,114,99,104,34,62,10,112,105,101,99,101,115,32,111,102,99,111,109,
+112,101,116,105,110,103,82,101,102,101,114,101,110,99,101,116,101,110,110,101,
+115,115,101,101,119,104,105,99,104,32,104,97,115,32,118,101,114,115,105,111,110,
+61,60,47,115,112,97,110,62,32,60,60,47,104,101,97,100,101,114,62,103,105,118,101
+,115,32,116,104,101,104,105,115,116,111,114,105,97,110,118,97,108,117,101,61,34,
+34,62,112,97,100,100,105,110,103,58,48,118,105,101,119,32,116,104,97,116,116,111
+,103,101,116,104,101,114,44,116,104,101,32,109,111,115,116,32,119,97,115,32,102,
+111,117,110,100,115,117,98,115,101,116,32,111,102,97,116,116,97,99,107,32,111,
+110,99,104,105,108,100,114,101,110,44,112,111,105,110,116,115,32,111,102,112,101
+,114,115,111,110,97,108,32,112,111,115,105,116,105,111,110,58,97,108,108,101,103
+,101,100,108,121,67,108,101,118,101,108,97,110,100,119,97,115,32,108,97,116,101,
+114,97,110,100,32,97,102,116,101,114,97,114,101,32,103,105,118,101,110,119,97,
+115,32,115,116,105,108,108,115,99,114,111,108,108,105,110,103,100,101,115,105,
+103,110,32,111,102,109,97,107,101,115,32,116,104,101,109,117,99,104,32,108,101,
+115,115,65,109,101,114,105,99,97,110,115,46,10,10,65,102,116,101,114,32,44,32,98
+,117,116,32,116,104,101,77,117,115,101,117,109,32,111,102,108,111,117,105,115,
+105,97,110,97,40,102,114,111,109,32,116,104,101,109,105,110,110,101,115,111,116,
+97,112,97,114,116,105,99,108,101,115,97,32,112,114,111,99,101,115,115,68,111,109
+,105,110,105,99,97,110,118,111,108,117,109,101,32,111,102,114,101,116,117,114,
+110,105,110,103,100,101,102,101,110,115,105,118,101,48,48,112,120,124,114,105,
+103,104,109,97,100,101,32,102,114,111,109,109,111,117,115,101,111,118,101,114,34
+,32,115,116,121,108,101,61,34,115,116,97,116,101,115,32,111,102,40,119,104,105,
+99,104,32,105,115,99,111,110,116,105,110,117,101,115,70,114,97,110,99,105,115,99
+,111,98,117,105,108,100,105,110,103,32,119,105,116,104,111,117,116,32,97,119,105
+,116,104,32,115,111,109,101,119,104,111,32,119,111,117,108,100,97,32,102,111,114
+,109,32,111,102,97,32,112,97,114,116,32,111,102,98,101,102,111,114,101,32,105,
+116,107,110,111,119,110,32,97,115,32,32,83,101,114,118,105,99,101,115,108,111,99
+,97,116,105,111,110,32,97,110,100,32,111,102,116,101,110,109,101,97,115,117,114,
+105,110,103,97,110,100,32,105,116,32,105,115,112,97,112,101,114,98,97,99,107,118
+,97,108,117,101,115,32,111,102,13,10,60,116,105,116,108,101,62,61,32,119,105,110
+,100,111,119,46,100,101,116,101,114,109,105,110,101,101,114,38,113,117,111,116,
+59,32,112,108,97,121,101,100,32,98,121,97,110,100,32,101,97,114,108,121,60,47,99
+,101,110,116,101,114,62,102,114,111,109,32,116,104,105,115,116,104,101,32,116,
+104,114,101,101,112,111,119,101,114,32,97,110,100,111,102,32,38,113,117,111,116,
+59,105,110,110,101,114,72,84,77,76,60,97,32,104,114,101,102,61,34,121,58,105,110
+,108,105,110,101,59,67,104,117,114,99,104,32,111,102,116,104,101,32,101,118,101,
+110,116,118,101,114,121,32,104,105,103,104,111,102,102,105,99,105,97,108,32,45,
+104,101,105,103,104,116,58,32,99,111,110,116,101,110,116,61,34,47,99,103,105,45,
+98,105,110,47,116,111,32,99,114,101,97,116,101,97,102,114,105,107,97,97,110,115,
+101,115,112,101,114,97,110,116,111,102,114,97,110,195,167,97,105,115,108,97,116,
+118,105,101,197,161,117,108,105,101,116,117,118,105,197,179,196,140,101,197,161,
+116,105,110,97,196,141,101,197,161,116,105,110,97,224,185,132,224,184,151,224,
+184,162,230,151,165,230,156,172,232,170,158,231,174,128,228,189,147,229,173,151,
+231,185,129,233,171,148,229,173,151,237,149,156,234,181,173,236,150,180,228,184,
+186,228,187,128,228,185,136,232,174,161,231,174,151,230,156,186,231,172,148,232,
+174,176,230,156,172,232,168,142,232,171,150,229,141,128,230,156,141,229,138,161,
+229,153,168,228,186,146,232,129,148,231,189,145,230,136,191,229,156,176,228,186,
+167,228,191,177,228,185,144,233,131,168,229,135,186,231,137,136,231,164,190,230,
+142,146,232,161,140,230,166,156,233,131,168,232,144,189,230,160,188,232,191,155,
+228,184,128,230,173,165,230,148,175,228,187,152,229,174,157,233,170,140,232,175,
+129,231,160,129,229,167,148,229,145,152,228,188,154,230,149,176,230,141,174,229,
+186,147,230,182,136,232,180,185,232,128,133,229,138,158,229,133,172,229,174,164,
+232,174,168,232,174,186,229,140,186,230,183,177,229,156,179,229,184,130,230,146,
+173,230,148,190,229,153,168,229,140,151,228,186,172,229,184,130,229,164,167,229,
+173,166,231,148,159,232,182,138,230,157,165,232,182,138,231,174,161,231,144,134,
+229,145,152,228,191,161,230,129,175,231,189,145,115,101,114,118,105,99,105,111,
+115,97,114,116,195,173,99,117,108,111,97,114,103,101,110,116,105,110,97,98,97,
+114,99,101,108,111,110,97,99,117,97,108,113,117,105,101,114,112,117,98,108,105,
+99,97,100,111,112,114,111,100,117,99,116,111,115,112,111,108,195,173,116,105,99,
+97,114,101,115,112,117,101,115,116,97,119,105,107,105,112,101,100,105,97,115,105
+,103,117,105,101,110,116,101,98,195,186,115,113,117,101,100,97,99,111,109,117,
+110,105,100,97,100,115,101,103,117,114,105,100,97,100,112,114,105,110,99,105,112
+,97,108,112,114,101,103,117,110,116,97,115,99,111,110,116,101,110,105,100,111,
+114,101,115,112,111,110,100,101,114,118,101,110,101,122,117,101,108,97,112,114,
+111,98,108,101,109,97,115,100,105,99,105,101,109,98,114,101,114,101,108,97,99,
+105,195,179,110,110,111,118,105,101,109,98,114,101,115,105,109,105,108,97,114,
+101,115,112,114,111,121,101,99,116,111,115,112,114,111,103,114,97,109,97,115,105
+,110,115,116,105,116,117,116,111,97,99,116,105,118,105,100,97,100,101,110,99,117
+,101,110,116,114,97,101,99,111,110,111,109,195,173,97,105,109,195,161,103,101,
+110,101,115,99,111,110,116,97,99,116,97,114,100,101,115,99,97,114,103,97,114,110
+,101,99,101,115,97,114,105,111,97,116,101,110,99,105,195,179,110,116,101,108,195
+,169,102,111,110,111,99,111,109,105,115,105,195,179,110,99,97,110,99,105,111,110
+,101,115,99,97,112,97,99,105,100,97,100,101,110,99,111,110,116,114,97,114,97,110
+,195,161,108,105,115,105,115,102,97,118,111,114,105,116,111,115,116,195,169,114,
+109,105,110,111,115,112,114,111,118,105,110,99,105,97,101,116,105,113,117,101,
+116,97,115,101,108,101,109,101,110,116,111,115,102,117,110,99,105,111,110,101,
+115,114,101,115,117,108,116,97,100,111,99,97,114,195,161,99,116,101,114,112,114,
+111,112,105,101,100,97,100,112,114,105,110,99,105,112,105,111,110,101,99,101,115
+,105,100,97,100,109,117,110,105,99,105,112,97,108,99,114,101,97,99,105,195,179,
+110,100,101,115,99,97,114,103,97,115,112,114,101,115,101,110,99,105,97,99,111,
+109,101,114,99,105,97,108,111,112,105,110,105,111,110,101,115,101,106,101,114,99
+,105,99,105,111,101,100,105,116,111,114,105,97,108,115,97,108,97,109,97,110,99,
+97,103,111,110,122,195,161,108,101,122,100,111,99,117,109,101,110,116,111,112,
+101,108,195,173,99,117,108,97,114,101,99,105,101,110,116,101,115,103,101,110,101
+,114,97,108,101,115,116,97,114,114,97,103,111,110,97,112,114,195,161,99,116,105,
+99,97,110,111,118,101,100,97,100,101,115,112,114,111,112,117,101,115,116,97,112,
+97,99,105,101,110,116,101,115,116,195,169,99,110,105,99,97,115,111,98,106,101,
+116,105,118,111,115,99,111,110,116,97,99,116,111,115,224,164,174,224,165,135,224
+,164,130,224,164,178,224,164,191,224,164,143,224,164,185,224,165,136,224,164,130
+,224,164,151,224,164,175,224,164,190,224,164,184,224,164,190,224,164,165,224,164
+,143,224,164,181,224,164,130,224,164,176,224,164,185,224,165,135,224,164,149,224
+,165,139,224,164,136,224,164,149,224,165,129,224,164,155,224,164,176,224,164,185
+,224,164,190,224,164,172,224,164,190,224,164,166,224,164,149,224,164,185,224,164
+,190,224,164,184,224,164,173,224,165,128,224,164,185,224,165,129,224,164,143,224
+,164,176,224,164,185,224,165,128,224,164,174,224,165,136,224,164,130,224,164,166
+,224,164,191,224,164,168,224,164,172,224,164,190,224,164,164,100,105,112,108,111
+,100,111,99,115,224,164,184,224,164,174,224,164,175,224,164,176,224,165,130,224,
+164,170,224,164,168,224,164,190,224,164,174,224,164,170,224,164,164,224,164,190,
+224,164,171,224,164,191,224,164,176,224,164,148,224,164,184,224,164,164,224,164,
+164,224,164,176,224,164,185,224,164,178,224,165,139,224,164,151,224,164,185,224,
+165,129,224,164,134,224,164,172,224,164,190,224,164,176,224,164,166,224,165,135,
+224,164,182,224,164,185,224,165,129,224,164,136,224,164,150,224,165,135,224,164,
+178,224,164,175,224,164,166,224,164,191,224,164,149,224,164,190,224,164,174,224,
+164,181,224,165,135,224,164,172,224,164,164,224,165,128,224,164,168,224,164,172,
+224,165,128,224,164,154,224,164,174,224,165,140,224,164,164,224,164,184,224,164,
+190,224,164,178,224,164,178,224,165,135,224,164,150,224,164,156,224,165,137,224,
+164,172,224,164,174,224,164,166,224,164,166,224,164,164,224,164,165,224,164,190,
+224,164,168,224,164,185,224,165,128,224,164,182,224,164,185,224,164,176,224,164,
+133,224,164,178,224,164,151,224,164,149,224,164,173,224,165,128,224,164,168,224,
+164,151,224,164,176,224,164,170,224,164,190,224,164,184,224,164,176,224,164,190,
+224,164,164,224,164,149,224,164,191,224,164,143,224,164,137,224,164,184,224,165,
+135,224,164,151,224,164,175,224,165,128,224,164,185,224,165,130,224,164,129,224,
+164,134,224,164,151,224,165,135,224,164,159,224,165,128,224,164,174,224,164,150,
+224,165,139,224,164,156,224,164,149,224,164,190,224,164,176,224,164,133,224,164,
+173,224,165,128,224,164,151,224,164,175,224,165,135,224,164,164,224,165,129,224,
+164,174,224,164,181,224,165,139,224,164,159,224,164,166,224,165,135,224,164,130,
+224,164,133,224,164,151,224,164,176,224,164,144,224,164,184,224,165,135,224,164,
+174,224,165,135,224,164,178,224,164,178,224,164,151,224,164,190,224,164,185,224,
+164,190,224,164,178,224,164,138,224,164,170,224,164,176,224,164,154,224,164,190,
+224,164,176,224,164,144,224,164,184,224,164,190,224,164,166,224,165,135,224,164,
+176,224,164,156,224,164,191,224,164,184,224,164,166,224,164,191,224,164,178,224,
+164,172,224,164,130,224,164,166,224,164,172,224,164,168,224,164,190,224,164,185,
+224,165,130,224,164,130,224,164,178,224,164,190,224,164,150,224,164,156,224,165,
+128,224,164,164,224,164,172,224,164,159,224,164,168,224,164,174,224,164,191,224,
+164,178,224,164,135,224,164,184,224,165,135,224,164,134,224,164,168,224,165,135,
+224,164,168,224,164,175,224,164,190,224,164,149,224,165,129,224,164,178,224,164,
+178,224,165,137,224,164,151,224,164,173,224,164,190,224,164,151,224,164,176,224,
+165,135,224,164,178,224,164,156,224,164,151,224,164,185,224,164,176,224,164,190,
+224,164,174,224,164,178,224,164,151,224,165,135,224,164,170,224,165,135,224,164,
+156,224,164,185,224,164,190,224,164,165,224,164,135,224,164,184,224,165,128,224,
+164,184,224,164,185,224,165,128,224,164,149,224,164,178,224,164,190,224,164,160,
+224,165,128,224,164,149,224,164,185,224,164,190,224,164,129,224,164,166,224,165,
+130,224,164,176,224,164,164,224,164,185,224,164,164,224,164,184,224,164,190,224,
+164,164,224,164,175,224,164,190,224,164,166,224,164,134,224,164,175,224,164,190,
+224,164,170,224,164,190,224,164,149,224,164,149,224,165,140,224,164,168,224,164,
+182,224,164,190,224,164,174,224,164,166,224,165,135,224,164,150,224,164,175,224,
+164,185,224,165,128,224,164,176,224,164,190,224,164,175,224,164,150,224,165,129,
+224,164,166,224,164,178,224,164,151,224,165,128,99,97,116,101,103,111,114,105,
+101,115,101,120,112,101,114,105,101,110,99,101,60,47,116,105,116,108,101,62,13,
+10,67,111,112,121,114,105,103,104,116,32,106,97,118,97,115,99,114,105,112,116,99
+,111,110,100,105,116,105,111,110,115,101,118,101,114,121,116,104,105,110,103,60,
+112,32,99,108,97,115,115,61,34,116,101,99,104,110,111,108,111,103,121,98,97,99,
+107,103,114,111,117,110,100,60,97,32,99,108,97,115,115,61,34,109,97,110,97,103,
+101,109,101,110,116,38,99,111,112,121,59,32,50,48,49,106,97,118,97,83,99,114,105
+,112,116,99,104,97,114,97,99,116,101,114,115,98,114,101,97,100,99,114,117,109,98
+,116,104,101,109,115,101,108,118,101,115,104,111,114,105,122,111,110,116,97,108,
+103,111,118,101,114,110,109,101,110,116,67,97,108,105,102,111,114,110,105,97,97,
+99,116,105,118,105,116,105,101,115,100,105,115,99,111,118,101,114,101,100,78,97,
+118,105,103,97,116,105,111,110,116,114,97,110,115,105,116,105,111,110,99,111,110
+,110,101,99,116,105,111,110,110,97,118,105,103,97,116,105,111,110,97,112,112,101
+,97,114,97,110,99,101,60,47,116,105,116,108,101,62,60,109,99,104,101,99,107,98,
+111,120,34,32,116,101,99,104,110,105,113,117,101,115,112,114,111,116,101,99,116,
+105,111,110,97,112,112,97,114,101,110,116,108,121,97,115,32,119,101,108,108,32,
+97,115,117,110,116,39,44,32,39,85,65,45,114,101,115,111,108,117,116,105,111,110,
+111,112,101,114,97,116,105,111,110,115,116,101,108,101,118,105,115,105,111,110,
+116,114,97,110,115,108,97,116,101,100,87,97,115,104,105,110,103,116,111,110,110,
+97,118,105,103,97,116,111,114,46,32,61,32,119,105,110,100,111,119,46,105,109,112
+,114,101,115,115,105,111,110,38,108,116,59,98,114,38,103,116,59,108,105,116,101,
+114,97,116,117,114,101,112,111,112,117,108,97,116,105,111,110,98,103,99,111,108,
+111,114,61,34,35,101,115,112,101,99,105,97,108,108,121,32,99,111,110,116,101,110
+,116,61,34,112,114,111,100,117,99,116,105,111,110,110,101,119,115,108,101,116,
+116,101,114,112,114,111,112,101,114,116,105,101,115,100,101,102,105,110,105,116,
+105,111,110,108,101,97,100,101,114,115,104,105,112,84,101,99,104,110,111,108,111
+,103,121,80,97,114,108,105,97,109,101,110,116,99,111,109,112,97,114,105,115,111,
+110,117,108,32,99,108,97,115,115,61,34,46,105,110,100,101,120,79,102,40,34,99,
+111,110,99,108,117,115,105,111,110,100,105,115,99,117,115,115,105,111,110,99,111
+,109,112,111,110,101,110,116,115,98,105,111,108,111,103,105,99,97,108,82,101,118
+,111,108,117,116,105,111,110,95,99,111,110,116,97,105,110,101,114,117,110,100,
+101,114,115,116,111,111,100,110,111,115,99,114,105,112,116,62,60,112,101,114,109
+,105,115,115,105,111,110,101,97,99,104,32,111,116,104,101,114,97,116,109,111,115
+,112,104,101,114,101,32,111,110,102,111,99,117,115,61,34,60,102,111,114,109,32,
+105,100,61,34,112,114,111,99,101,115,115,105,110,103,116,104,105,115,46,118,97,
+108,117,101,103,101,110,101,114,97,116,105,111,110,67,111,110,102,101,114,101,
+110,99,101,115,117,98,115,101,113,117,101,110,116,119,101,108,108,45,107,110,111
+,119,110,118,97,114,105,97,116,105,111,110,115,114,101,112,117,116,97,116,105,
+111,110,112,104,101,110,111,109,101,110,111,110,100,105,115,99,105,112,108,105,
+110,101,108,111,103,111,46,112,110,103,34,32,40,100,111,99,117,109,101,110,116,
+44,98,111,117,110,100,97,114,105,101,115,101,120,112,114,101,115,115,105,111,110
+,115,101,116,116,108,101,109,101,110,116,66,97,99,107,103,114,111,117,110,100,
+111,117,116,32,111,102,32,116,104,101,101,110,116,101,114,112,114,105,115,101,40
+,34,104,116,116,112,115,58,34,32,117,110,101,115,99,97,112,101,40,34,112,97,115,
+115,119,111,114,100,34,32,100,101,109,111,99,114,97,116,105,99,60,97,32,104,114,
+101,102,61,34,47,119,114,97,112,112,101,114,34,62,10,109,101,109,98,101,114,115,
+104,105,112,108,105,110,103,117,105,115,116,105,99,112,120,59,112,97,100,100,105
+,110,103,112,104,105,108,111,115,111,112,104,121,97,115,115,105,115,116,97,110,
+99,101,117,110,105,118,101,114,115,105,116,121,102,97,99,105,108,105,116,105,101
+,115,114,101,99,111,103,110,105,122,101,100,112,114,101,102,101,114,101,110,99,
+101,105,102,32,40,116,121,112,101,111,102,109,97,105,110,116,97,105,110,101,100,
+118,111,99,97,98,117,108,97,114,121,104,121,112,111,116,104,101,115,105,115,46,
+115,117,98,109,105,116,40,41,59,38,97,109,112,59,110,98,115,112,59,97,110,110,
+111,116,97,116,105,111,110,98,101,104,105,110,100,32,116,104,101,70,111,117,110,
+100,97,116,105,111,110,112,117,98,108,105,115,104,101,114,34,97,115,115,117,109,
+112,116,105,111,110,105,110,116,114,111,100,117,99,101,100,99,111,114,114,117,
+112,116,105,111,110,115,99,105,101,110,116,105,115,116,115,101,120,112,108,105,
+99,105,116,108,121,105,110,115,116,101,97,100,32,111,102,100,105,109,101,110,115
+,105,111,110,115,32,111,110,67,108,105,99,107,61,34,99,111,110,115,105,100,101,
+114,101,100,100,101,112,97,114,116,109,101,110,116,111,99,99,117,112,97,116,105,
+111,110,115,111,111,110,32,97,102,116,101,114,105,110,118,101,115,116,109,101,
+110,116,112,114,111,110,111,117,110,99,101,100,105,100,101,110,116,105,102,105,
+101,100,101,120,112,101,114,105,109,101,110,116,77,97,110,97,103,101,109,101,110
+,116,103,101,111,103,114,97,112,104,105,99,34,32,104,101,105,103,104,116,61,34,
+108,105,110,107,32,114,101,108,61,34,46,114,101,112,108,97,99,101,40,47,100,101,
+112,114,101,115,115,105,111,110,99,111,110,102,101,114,101,110,99,101,112,117,
+110,105,115,104,109,101,110,116,101,108,105,109,105,110,97,116,101,100,114,101,
+115,105,115,116,97,110,99,101,97,100,97,112,116,97,116,105,111,110,111,112,112,
+111,115,105,116,105,111,110,119,101,108,108,32,107,110,111,119,110,115,117,112,
+112,108,101,109,101,110,116,100,101,116,101,114,109,105,110,101,100,104,49,32,99
+,108,97,115,115,61,34,48,112,120,59,109,97,114,103,105,110,109,101,99,104,97,110
+,105,99,97,108,115,116,97,116,105,115,116,105,99,115,99,101,108,101,98,114,97,
+116,101,100,71,111,118,101,114,110,109,101,110,116,10,10,68,117,114,105,110,103,
+32,116,100,101,118,101,108,111,112,101,114,115,97,114,116,105,102,105,99,105,97,
+108,101,113,117,105,118,97,108,101,110,116,111,114,105,103,105,110,97,116,101,
+100,67,111,109,109,105,115,115,105,111,110,97,116,116,97,99,104,109,101,110,116,
+60,115,112,97,110,32,105,100,61,34,116,104,101,114,101,32,119,101,114,101,78,101
+,100,101,114,108,97,110,100,115,98,101,121,111,110,100,32,116,104,101,114,101,
+103,105,115,116,101,114,101,100,106,111,117,114,110,97,108,105,115,116,102,114,
+101,113,117,101,110,116,108,121,97,108,108,32,111,102,32,116,104,101,108,97,110,
+103,61,34,101,110,34,32,60,47,115,116,121,108,101,62,13,10,97,98,115,111,108,117
+,116,101,59,32,115,117,112,112,111,114,116,105,110,103,101,120,116,114,101,109,
+101,108,121,32,109,97,105,110,115,116,114,101,97,109,60,47,115,116,114,111,110,
+103,62,32,112,111,112,117,108,97,114,105,116,121,101,109,112,108,111,121,109,101
+,110,116,60,47,116,97,98,108,101,62,13,10,32,99,111,108,115,112,97,110,61,34,60,
+47,102,111,114,109,62,10,32,32,99,111,110,118,101,114,115,105,111,110,97,98,111,
+117,116,32,116,104,101,32,60,47,112,62,60,47,100,105,118,62,105,110,116,101,103,
+114,97,116,101,100,34,32,108,97,110,103,61,34,101,110,80,111,114,116,117,103,117
+,101,115,101,115,117,98,115,116,105,116,117,116,101,105,110,100,105,118,105,100,
+117,97,108,105,109,112,111,115,115,105,98,108,101,109,117,108,116,105,109,101,
+100,105,97,97,108,109,111,115,116,32,97,108,108,112,120,32,115,111,108,105,100,
+32,35,97,112,97,114,116,32,102,114,111,109,115,117,98,106,101,99,116,32,116,111,
+105,110,32,69,110,103,108,105,115,104,99,114,105,116,105,99,105,122,101,100,101,
+120,99,101,112,116,32,102,111,114,103,117,105,100,101,108,105,110,101,115,111,
+114,105,103,105,110,97,108,108,121,114,101,109,97,114,107,97,98,108,101,116,104,
+101,32,115,101,99,111,110,100,104,50,32,99,108,97,115,115,61,34,60,97,32,116,105
+,116,108,101,61,34,40,105,110,99,108,117,100,105,110,103,112,97,114,97,109,101,
+116,101,114,115,112,114,111,104,105,98,105,116,101,100,61,32,34,104,116,116,112,
+58,47,47,100,105,99,116,105,111,110,97,114,121,112,101,114,99,101,112,116,105,
+111,110,114,101,118,111,108,117,116,105,111,110,102,111,117,110,100,97,116,105,
+111,110,112,120,59,104,101,105,103,104,116,58,115,117,99,99,101,115,115,102,117,
+108,115,117,112,112,111,114,116,101,114,115,109,105,108,108,101,110,110,105,117,
+109,104,105,115,32,102,97,116,104,101,114,116,104,101,32,38,113,117,111,116,59,
+110,111,45,114,101,112,101,97,116,59,99,111,109,109,101,114,99,105,97,108,105,
+110,100,117,115,116,114,105,97,108,101,110,99,111,117,114,97,103,101,100,97,109,
+111,117,110,116,32,111,102,32,117,110,111,102,102,105,99,105,97,108,101,102,102,
+105,99,105,101,110,99,121,82,101,102,101,114,101,110,99,101,115,99,111,111,114,
+100,105,110,97,116,101,100,105,115,99,108,97,105,109,101,114,101,120,112,101,100
+,105,116,105,111,110,100,101,118,101,108,111,112,105,110,103,99,97,108,99,117,
+108,97,116,101,100,115,105,109,112,108,105,102,105,101,100,108,101,103,105,116,
+105,109,97,116,101,115,117,98,115,116,114,105,110,103,40,48,34,32,99,108,97,115,
+115,61,34,99,111,109,112,108,101,116,101,108,121,105,108,108,117,115,116,114,97,
+116,101,102,105,118,101,32,121,101,97,114,115,105,110,115,116,114,117,109,101,
+110,116,80,117,98,108,105,115,104,105,110,103,49,34,32,99,108,97,115,115,61,34,
+112,115,121,99,104,111,108,111,103,121,99,111,110,102,105,100,101,110,99,101,110
+,117,109,98,101,114,32,111,102,32,97,98,115,101,110,99,101,32,111,102,102,111,99
+,117,115,101,100,32,111,110,106,111,105,110,101,100,32,116,104,101,115,116,114,
+117,99,116,117,114,101,115,112,114,101,118,105,111,117,115,108,121,62,60,47,105,
+102,114,97,109,101,62,111,110,99,101,32,97,103,97,105,110,98,117,116,32,114,97,
+116,104,101,114,105,109,109,105,103,114,97,110,116,115,111,102,32,99,111,117,114
+,115,101,44,97,32,103,114,111,117,112,32,111,102,76,105,116,101,114,97,116,117,
+114,101,85,110,108,105,107,101,32,116,104,101,60,47,97,62,38,110,98,115,112,59,
+10,102,117,110,99,116,105,111,110,32,105,116,32,119,97,115,32,116,104,101,67,111
+,110,118,101,110,116,105,111,110,97,117,116,111,109,111,98,105,108,101,80,114,
+111,116,101,115,116,97,110,116,97,103,103,114,101,115,115,105,118,101,97,102,116
+,101,114,32,116,104,101,32,83,105,109,105,108,97,114,108,121,44,34,32,47,62,60,
+47,100,105,118,62,99,111,108,108,101,99,116,105,111,110,13,10,102,117,110,99,116
+,105,111,110,118,105,115,105,98,105,108,105,116,121,116,104,101,32,117,115,101,
+32,111,102,118,111,108,117,110,116,101,101,114,115,97,116,116,114,97,99,116,105,
+111,110,117,110,100,101,114,32,116,104,101,32,116,104,114,101,97,116,101,110,101
+,100,42,60,33,91,67,68,65,84,65,91,105,109,112,111,114,116,97,110,99,101,105,110
+,32,103,101,110,101,114,97,108,116,104,101,32,108,97,116,116,101,114,60,47,102,
+111,114,109,62,10,60,47,46,105,110,100,101,120,79,102,40,39,105,32,61,32,48,59,
+32,105,32,60,100,105,102,102,101,114,101,110,99,101,100,101,118,111,116,101,100,
+32,116,111,116,114,97,100,105,116,105,111,110,115,115,101,97,114,99,104,32,102,
+111,114,117,108,116,105,109,97,116,101,108,121,116,111,117,114,110,97,109,101,
+110,116,97,116,116,114,105,98,117,116,101,115,115,111,45,99,97,108,108,101,100,
+32,125,10,60,47,115,116,121,108,101,62,101,118,97,108,117,97,116,105,111,110,101
+,109,112,104,97,115,105,122,101,100,97,99,99,101,115,115,105,98,108,101,60,47,
+115,101,99,116,105,111,110,62,115,117,99,99,101,115,115,105,111,110,97,108,111,
+110,103,32,119,105,116,104,77,101,97,110,119,104,105,108,101,44,105,110,100,117,
+115,116,114,105,101,115,60,47,97,62,60,98,114,32,47,62,104,97,115,32,98,101,99,
+111,109,101,97,115,112,101,99,116,115,32,111,102,84,101,108,101,118,105,115,105,
+111,110,115,117,102,102,105,99,105,101,110,116,98,97,115,107,101,116,98,97,108,
+108,98,111,116,104,32,115,105,100,101,115,99,111,110,116,105,110,117,105,110,103
+,97,110,32,97,114,116,105,99,108,101,60,105,109,103,32,97,108,116,61,34,97,100,
+118,101,110,116,117,114,101,115,104,105,115,32,109,111,116,104,101,114,109,97,
+110,99,104,101,115,116,101,114,112,114,105,110,99,105,112,108,101,115,112,97,114
+,116,105,99,117,108,97,114,99,111,109,109,101,110,116,97,114,121,101,102,102,101
+,99,116,115,32,111,102,100,101,99,105,100,101,100,32,116,111,34,62,60,115,116,
+114,111,110,103,62,112,117,98,108,105,115,104,101,114,115,74,111,117,114,110,97,
+108,32,111,102,100,105,102,102,105,99,117,108,116,121,102,97,99,105,108,105,116,
+97,116,101,97,99,99,101,112,116,97,98,108,101,115,116,121,108,101,46,99,115,115,
+34,9,102,117,110,99,116,105,111,110,32,105,110,110,111,118,97,116,105,111,110,62
+,67,111,112,121,114,105,103,104,116,115,105,116,117,97,116,105,111,110,115,119,
+111,117,108,100,32,104,97,118,101,98,117,115,105,110,101,115,115,101,115,68,105,
+99,116,105,111,110,97,114,121,115,116,97,116,101,109,101,110,116,115,111,102,116
+,101,110,32,117,115,101,100,112,101,114,115,105,115,116,101,110,116,105,110,32,
+74,97,110,117,97,114,121,99,111,109,112,114,105,115,105,110,103,60,47,116,105,
+116,108,101,62,10,9,100,105,112,108,111,109,97,116,105,99,99,111,110,116,97,105,
+110,105,110,103,112,101,114,102,111,114,109,105,110,103,101,120,116,101,110,115,
+105,111,110,115,109,97,121,32,110,111,116,32,98,101,99,111,110,99,101,112,116,32
+,111,102,32,111,110,99,108,105,99,107,61,34,73,116,32,105,115,32,97,108,115,111,
+102,105,110,97,110,99,105,97,108,32,109,97,107,105,110,103,32,116,104,101,76,117
+,120,101,109,98,111,117,114,103,97,100,100,105,116,105,111,110,97,108,97,114,101
+,32,99,97,108,108,101,100,101,110,103,97,103,101,100,32,105,110,34,115,99,114,
+105,112,116,34,41,59,98,117,116,32,105,116,32,119,97,115,101,108,101,99,116,114,
+111,110,105,99,111,110,115,117,98,109,105,116,61,34,10,60,33,45,45,32,69,110,100
+,32,101,108,101,99,116,114,105,99,97,108,111,102,102,105,99,105,97,108,108,121,
+115,117,103,103,101,115,116,105,111,110,116,111,112,32,111,102,32,116,104,101,
+117,110,108,105,107,101,32,116,104,101,65,117,115,116,114,97,108,105,97,110,79,
+114,105,103,105,110,97,108,108,121,114,101,102,101,114,101,110,99,101,115,10,60,
+47,104,101,97,100,62,13,10,114,101,99,111,103,110,105,115,101,100,105,110,105,
+116,105,97,108,105,122,101,108,105,109,105,116,101,100,32,116,111,65,108,101,120
+,97,110,100,114,105,97,114,101,116,105,114,101,109,101,110,116,65,100,118,101,
+110,116,117,114,101,115,102,111,117,114,32,121,101,97,114,115,10,10,38,108,116,
+59,33,45,45,32,105,110,99,114,101,97,115,105,110,103,100,101,99,111,114,97,116,
+105,111,110,104,51,32,99,108,97,115,115,61,34,111,114,105,103,105,110,115,32,111
+,102,111,98,108,105,103,97,116,105,111,110,114,101,103,117,108,97,116,105,111,
+110,99,108,97,115,115,105,102,105,101,100,40,102,117,110,99,116,105,111,110,40,
+97,100,118,97,110,116,97,103,101,115,98,101,105,110,103,32,116,104,101,32,104,
+105,115,116,111,114,105,97,110,115,60,98,97,115,101,32,104,114,101,102,114,101,
+112,101,97,116,101,100,108,121,119,105,108,108,105,110,103,32,116,111,99,111,109
+,112,97,114,97,98,108,101,100,101,115,105,103,110,97,116,101,100,110,111,109,105
+,110,97,116,105,111,110,102,117,110,99,116,105,111,110,97,108,105,110,115,105,
+100,101,32,116,104,101,114,101,118,101,108,97,116,105,111,110,101,110,100,32,111
+,102,32,116,104,101,115,32,102,111,114,32,116,104,101,32,97,117,116,104,111,114,
+105,122,101,100,114,101,102,117,115,101,100,32,116,111,116,97,107,101,32,112,108
+,97,99,101,97,117,116,111,110,111,109,111,117,115,99,111,109,112,114,111,109,105
+,115,101,112,111,108,105,116,105,99,97,108,32,114,101,115,116,97,117,114,97,110,
+116,116,119,111,32,111,102,32,116,104,101,70,101,98,114,117,97,114,121,32,50,113
+,117,97,108,105,116,121,32,111,102,115,119,102,111,98,106,101,99,116,46,117,110,
+100,101,114,115,116,97,110,100,110,101,97,114,108,121,32,97,108,108,119,114,105,
+116,116,101,110,32,98,121,105,110,116,101,114,118,105,101,119,115,34,32,119,105,
+100,116,104,61,34,49,119,105,116,104,100,114,97,119,97,108,102,108,111,97,116,58
+,108,101,102,116,105,115,32,117,115,117,97,108,108,121,99,97,110,100,105,100,97,
+116,101,115,110,101,119,115,112,97,112,101,114,115,109,121,115,116,101,114,105,
+111,117,115,68,101,112,97,114,116,109,101,110,116,98,101,115,116,32,107,110,111,
+119,110,112,97,114,108,105,97,109,101,110,116,115,117,112,112,114,101,115,115,
+101,100,99,111,110,118,101,110,105,101,110,116,114,101,109,101,109,98,101,114,
+101,100,100,105,102,102,101,114,101,110,116,32,115,121,115,116,101,109,97,116,
+105,99,104,97,115,32,108,101,100,32,116,111,112,114,111,112,97,103,97,110,100,97
+,99,111,110,116,114,111,108,108,101,100,105,110,102,108,117,101,110,99,101,115,
+99,101,114,101,109,111,110,105,97,108,112,114,111,99,108,97,105,109,101,100,80,
+114,111,116,101,99,116,105,111,110,108,105,32,99,108,97,115,115,61,34,83,99,105,
+101,110,116,105,102,105,99,99,108,97,115,115,61,34,110,111,45,116,114,97,100,101
+,109,97,114,107,115,109,111,114,101,32,116,104,97,110,32,119,105,100,101,115,112
+,114,101,97,100,76,105,98,101,114,97,116,105,111,110,116,111,111,107,32,112,108,
+97,99,101,100,97,121,32,111,102,32,116,104,101,97,115,32,108,111,110,103,32,97,
+115,105,109,112,114,105,115,111,110,101,100,65,100,100,105,116,105,111,110,97,
+108,10,60,104,101,97,100,62,10,60,109,76,97,98,111,114,97,116,111,114,121,78,111
+,118,101,109,98,101,114,32,50,101,120,99,101,112,116,105,111,110,115,73,110,100,
+117,115,116,114,105,97,108,118,97,114,105,101,116,121,32,111,102,102,108,111,97,
+116,58,32,108,101,102,68,117,114,105,110,103,32,116,104,101,97,115,115,101,115,
+115,109,101,110,116,104,97,118,101,32,98,101,101,110,32,100,101,97,108,115,32,
+119,105,116,104,83,116,97,116,105,115,116,105,99,115,111,99,99,117,114,114,101,
+110,99,101,47,117,108,62,60,47,100,105,118,62,99,108,101,97,114,102,105,120,34,
+62,116,104,101,32,112,117,98,108,105,99,109,97,110,121,32,121,101,97,114,115,119
+,104,105,99,104,32,119,101,114,101,111,118,101,114,32,116,105,109,101,44,115,121
+,110,111,110,121,109,111,117,115,99,111,110,116,101,110,116,34,62,10,112,114,101
+,115,117,109,97,98,108,121,104,105,115,32,102,97,109,105,108,121,117,115,101,114
+,65,103,101,110,116,46,117,110,101,120,112,101,99,116,101,100,105,110,99,108,117
+,100,105,110,103,32,99,104,97,108,108,101,110,103,101,100,97,32,109,105,110,111,
+114,105,116,121,117,110,100,101,102,105,110,101,100,34,98,101,108,111,110,103,
+115,32,116,111,116,97,107,101,110,32,102,114,111,109,105,110,32,79,99,116,111,98
+,101,114,112,111,115,105,116,105,111,110,58,32,115,97,105,100,32,116,111,32,98,
+101,114,101,108,105,103,105,111,117,115,32,70,101,100,101,114,97,116,105,111,110
+,32,114,111,119,115,112,97,110,61,34,111,110,108,121,32,97,32,102,101,119,109,
+101,97,110,116,32,116,104,97,116,108,101,100,32,116,111,32,116,104,101,45,45,62,
+13,10,60,100,105,118,32,60,102,105,101,108,100,115,101,116,62,65,114,99,104,98,
+105,115,104,111,112,32,99,108,97,115,115,61,34,110,111,98,101,105,110,103,32,117
+,115,101,100,97,112,112,114,111,97,99,104,101,115,112,114,105,118,105,108,101,
+103,101,115,110,111,115,99,114,105,112,116,62,10,114,101,115,117,108,116,115,32,
+105,110,109,97,121,32,98,101,32,116,104,101,69,97,115,116,101,114,32,101,103,103
+,109,101,99,104,97,110,105,115,109,115,114,101,97,115,111,110,97,98,108,101,80,
+111,112,117,108,97,116,105,111,110,67,111,108,108,101,99,116,105,111,110,115,101
+,108,101,99,116,101,100,34,62,110,111,115,99,114,105,112,116,62,13,47,105,110,
+100,101,120,46,112,104,112,97,114,114,105,118,97,108,32,111,102,45,106,115,115,
+100,107,39,41,41,59,109,97,110,97,103,101,100,32,116,111,105,110,99,111,109,112,
+108,101,116,101,99,97,115,117,97,108,116,105,101,115,99,111,109,112,108,101,116,
+105,111,110,67,104,114,105,115,116,105,97,110,115,83,101,112,116,101,109,98,101,
+114,32,97,114,105,116,104,109,101,116,105,99,112,114,111,99,101,100,117,114,101,
+115,109,105,103,104,116,32,104,97,118,101,80,114,111,100,117,99,116,105,111,110,
+105,116,32,97,112,112,101,97,114,115,80,104,105,108,111,115,111,112,104,121,102,
+114,105,101,110,100,115,104,105,112,108,101,97,100,105,110,103,32,116,111,103,
+105,118,105,110,103,32,116,104,101,116,111,119,97,114,100,32,116,104,101,103,117
+,97,114,97,110,116,101,101,100,100,111,99,117,109,101,110,116,101,100,99,111,108
+,111,114,58,35,48,48,48,118,105,100,101,111,32,103,97,109,101,99,111,109,109,105
+,115,115,105,111,110,114,101,102,108,101,99,116,105,110,103,99,104,97,110,103,
+101,32,116,104,101,97,115,115,111,99,105,97,116,101,100,115,97,110,115,45,115,
+101,114,105,102,111,110,107,101,121,112,114,101,115,115,59,32,112,97,100,100,105
+,110,103,58,72,101,32,119,97,115,32,116,104,101,117,110,100,101,114,108,121,105,
+110,103,116,121,112,105,99,97,108,108,121,32,44,32,97,110,100,32,116,104,101,32,
+115,114,99,69,108,101,109,101,110,116,115,117,99,99,101,115,115,105,118,101,115,
+105,110,99,101,32,116,104,101,32,115,104,111,117,108,100,32,98,101,32,110,101,
+116,119,111,114,107,105,110,103,97,99,99,111,117,110,116,105,110,103,117,115,101
+,32,111,102,32,116,104,101,108,111,119,101,114,32,116,104,97,110,115,104,111,119
+,115,32,116,104,97,116,60,47,115,112,97,110,62,10,9,9,99,111,109,112,108,97,105,
+110,116,115,99,111,110,116,105,110,117,111,117,115,113,117,97,110,116,105,116,
+105,101,115,97,115,116,114,111,110,111,109,101,114,104,101,32,100,105,100,32,110
+,111,116,100,117,101,32,116,111,32,105,116,115,97,112,112,108,105,101,100,32,116
+,111,97,110,32,97,118,101,114,97,103,101,101,102,102,111,114,116,115,32,116,111,
+116,104,101,32,102,117,116,117,114,101,97,116,116,101,109,112,116,32,116,111,84,
+104,101,114,101,102,111,114,101,44,99,97,112,97,98,105,108,105,116,121,82,101,
+112,117,98,108,105,99,97,110,119,97,115,32,102,111,114,109,101,100,69,108,101,99
+,116,114,111,110,105,99,107,105,108,111,109,101,116,101,114,115,99,104,97,108,
+108,101,110,103,101,115,112,117,98,108,105,115,104,105,110,103,116,104,101,32,
+102,111,114,109,101,114,105,110,100,105,103,101,110,111,117,115,100,105,114,101,
+99,116,105,111,110,115,115,117,98,115,105,100,105,97,114,121,99,111,110,115,112,
+105,114,97,99,121,100,101,116,97,105,108,115,32,111,102,97,110,100,32,105,110,32
+,116,104,101,97,102,102,111,114,100,97,98,108,101,115,117,98,115,116,97,110,99,
+101,115,114,101,97,115,111,110,32,102,111,114,99,111,110,118,101,110,116,105,111
+,110,105,116,101,109,116,121,112,101,61,34,97,98,115,111,108,117,116,101,108,121
+,115,117,112,112,111,115,101,100,108,121,114,101,109,97,105,110,101,100,32,97,97
+,116,116,114,97,99,116,105,118,101,116,114,97,118,101,108,108,105,110,103,115,
+101,112,97,114,97,116,101,108,121,102,111,99,117,115,101,115,32,111,110,101,108,
+101,109,101,110,116,97,114,121,97,112,112,108,105,99,97,98,108,101,102,111,117,
+110,100,32,116,104,97,116,115,116,121,108,101,115,104,101,101,116,109,97,110,117
+,115,99,114,105,112,116,115,116,97,110,100,115,32,102,111,114,32,110,111,45,114,
+101,112,101,97,116,40,115,111,109,101,116,105,109,101,115,67,111,109,109,101,114
+,99,105,97,108,105,110,32,65,109,101,114,105,99,97,117,110,100,101,114,116,97,
+107,101,110,113,117,97,114,116,101,114,32,111,102,97,110,32,101,120,97,109,112,
+108,101,112,101,114,115,111,110,97,108,108,121,105,110,100,101,120,46,112,104,
+112,63,60,47,98,117,116,116,111,110,62,10,112,101,114,99,101,110,116,97,103,101,
+98,101,115,116,45,107,110,111,119,110,99,114,101,97,116,105,110,103,32,97,34,32,
+100,105,114,61,34,108,116,114,76,105,101,117,116,101,110,97,110,116,10,60,100,
+105,118,32,105,100,61,34,116,104,101,121,32,119,111,117,108,100,97,98,105,108,
+105,116,121,32,111,102,109,97,100,101,32,117,112,32,111,102,110,111,116,101,100,
+32,116,104,97,116,99,108,101,97,114,32,116,104,97,116,97,114,103,117,101,32,116,
+104,97,116,116,111,32,97,110,111,116,104,101,114,99,104,105,108,100,114,101,110,
+39,115,112,117,114,112,111,115,101,32,111,102,102,111,114,109,117,108,97,116,101
+,100,98,97,115,101,100,32,117,112,111,110,116,104,101,32,114,101,103,105,111,110
+,115,117,98,106,101,99,116,32,111,102,112,97,115,115,101,110,103,101,114,115,112
+,111,115,115,101,115,115,105,111,110,46,10,10,73,110,32,116,104,101,32,66,101,
+102,111,114,101,32,116,104,101,97,102,116,101,114,119,97,114,100,115,99,117,114,
+114,101,110,116,108,121,32,97,99,114,111,115,115,32,116,104,101,115,99,105,101,
+110,116,105,102,105,99,99,111,109,109,117,110,105,116,121,46,99,97,112,105,116,
+97,108,105,115,109,105,110,32,71,101,114,109,97,110,121,114,105,103,104,116,45,
+119,105,110,103,116,104,101,32,115,121,115,116,101,109,83,111,99,105,101,116,121
+,32,111,102,112,111,108,105,116,105,99,105,97,110,100,105,114,101,99,116,105,111
+,110,58,119,101,110,116,32,111,110,32,116,111,114,101,109,111,118,97,108,32,111,
+102,32,78,101,119,32,89,111,114,107,32,97,112,97,114,116,109,101,110,116,115,105
+,110,100,105,99,97,116,105,111,110,100,117,114,105,110,103,32,116,104,101,117,
+110,108,101,115,115,32,116,104,101,104,105,115,116,111,114,105,99,97,108,104,97,
+100,32,98,101,101,110,32,97,100,101,102,105,110,105,116,105,118,101,105,110,103,
+114,101,100,105,101,110,116,97,116,116,101,110,100,97,110,99,101,67,101,110,116,
+101,114,32,102,111,114,112,114,111,109,105,110,101,110,99,101,114,101,97,100,121
+,83,116,97,116,101,115,116,114,97,116,101,103,105,101,115,98,117,116,32,105,110,
+32,116,104,101,97,115,32,112,97,114,116,32,111,102,99,111,110,115,116,105,116,
+117,116,101,99,108,97,105,109,32,116,104,97,116,108,97,98,111,114,97,116,111,114
+,121,99,111,109,112,97,116,105,98,108,101,102,97,105,108,117,114,101,32,111,102,
+44,32,115,117,99,104,32,97,115,32,98,101,103,97,110,32,119,105,116,104,117,115,
+105,110,103,32,116,104,101,32,116,111,32,112,114,111,118,105,100,101,102,101,97,
+116,117,114,101,32,111,102,102,114,111,109,32,119,104,105,99,104,47,34,32,99,108
+,97,115,115,61,34,103,101,111,108,111,103,105,99,97,108,115,101,118,101,114,97,
+108,32,111,102,100,101,108,105,98,101,114,97,116,101,105,109,112,111,114,116,97,
+110,116,32,104,111,108,100,115,32,116,104,97,116,105,110,103,38,113,117,111,116,
+59,32,118,97,108,105,103,110,61,116,111,112,116,104,101,32,71,101,114,109,97,110
+,111,117,116,115,105,100,101,32,111,102,110,101,103,111,116,105,97,116,101,100,
+104,105,115,32,99,97,114,101,101,114,115,101,112,97,114,97,116,105,111,110,105,
+100,61,34,115,101,97,114,99,104,119,97,115,32,99,97,108,108,101,100,116,104,101,
+32,102,111,117,114,116,104,114,101,99,114,101,97,116,105,111,110,111,116,104,101
+,114,32,116,104,97,110,112,114,101,118,101,110,116,105,111,110,119,104,105,108,
+101,32,116,104,101,32,101,100,117,99,97,116,105,111,110,44,99,111,110,110,101,99
+,116,105,110,103,97,99,99,117,114,97,116,101,108,121,119,101,114,101,32,98,117,
+105,108,116,119,97,115,32,107,105,108,108,101,100,97,103,114,101,101,109,101,110
+,116,115,109,117,99,104,32,109,111,114,101,32,68,117,101,32,116,111,32,116,104,
+101,119,105,100,116,104,58,32,49,48,48,115,111,109,101,32,111,116,104,101,114,75
+,105,110,103,100,111,109,32,111,102,116,104,101,32,101,110,116,105,114,101,102,
+97,109,111,117,115,32,102,111,114,116,111,32,99,111,110,110,101,99,116,111,98,
+106,101,99,116,105,118,101,115,116,104,101,32,70,114,101,110,99,104,112,101,111,
+112,108,101,32,97,110,100,102,101,97,116,117,114,101,100,34,62,105,115,32,115,97
+,105,100,32,116,111,115,116,114,117,99,116,117,114,97,108,114,101,102,101,114,
+101,110,100,117,109,109,111,115,116,32,111,102,116,101,110,97,32,115,101,112,97,
+114,97,116,101,45,62,10,60,100,105,118,32,105,100,32,79,102,102,105,99,105,97,
+108,32,119,111,114,108,100,119,105,100,101,46,97,114,105,97,45,108,97,98,101,108
+,116,104,101,32,112,108,97,110,101,116,97,110,100,32,105,116,32,119,97,115,100,
+34,32,118,97,108,117,101,61,34,108,111,111,107,105,110,103,32,97,116,98,101,110,
+101,102,105,99,105,97,108,97,114,101,32,105,110,32,116,104,101,109,111,110,105,
+116,111,114,105,110,103,114,101,112,111,114,116,101,100,108,121,116,104,101,32,
+109,111,100,101,114,110,119,111,114,107,105,110,103,32,111,110,97,108,108,111,
+119,101,100,32,116,111,119,104,101,114,101,32,116,104,101,32,105,110,110,111,118
+,97,116,105,118,101,60,47,97,62,60,47,100,105,118,62,115,111,117,110,100,116,114
+,97,99,107,115,101,97,114,99,104,70,111,114,109,116,101,110,100,32,116,111,32,98
+,101,105,110,112,117,116,32,105,100,61,34,111,112,101,110,105,110,103,32,111,102
+,114,101,115,116,114,105,99,116,101,100,97,100,111,112,116,101,100,32,98,121,97,
+100,100,114,101,115,115,105,110,103,116,104,101,111,108,111,103,105,97,110,109,
+101,116,104,111,100,115,32,111,102,118,97,114,105,97,110,116,32,111,102,67,104,
+114,105,115,116,105,97,110,32,118,101,114,121,32,108,97,114,103,101,97,117,116,
+111,109,111,116,105,118,101,98,121,32,102,97,114,32,116,104,101,114,97,110,103,
+101,32,102,114,111,109,112,117,114,115,117,105,116,32,111,102,102,111,108,108,
+111,119,32,116,104,101,98,114,111,117,103,104,116,32,116,111,105,110,32,69,110,
+103,108,97,110,100,97,103,114,101,101,32,116,104,97,116,97,99,99,117,115,101,100
+,32,111,102,99,111,109,101,115,32,102,114,111,109,112,114,101,118,101,110,116,
+105,110,103,100,105,118,32,115,116,121,108,101,61,104,105,115,32,111,114,32,104,
+101,114,116,114,101,109,101,110,100,111,117,115,102,114,101,101,100,111,109,32,
+111,102,99,111,110,99,101,114,110,105,110,103,48,32,49,101,109,32,49,101,109,59,
+66,97,115,107,101,116,98,97,108,108,47,115,116,121,108,101,46,99,115,115,97,110,
+32,101,97,114,108,105,101,114,101,118,101,110,32,97,102,116,101,114,47,34,32,116
+,105,116,108,101,61,34,46,99,111,109,47,105,110,100,101,120,116,97,107,105,110,
+103,32,116,104,101,112,105,116,116,115,98,117,114,103,104,99,111,110,116,101,110
+,116,34,62,13,60,115,99,114,105,112,116,62,40,102,116,117,114,110,101,100,32,111
+,117,116,104,97,118,105,110,103,32,116,104,101,60,47,115,112,97,110,62,13,10,32,
+111,99,99,97,115,105,111,110,97,108,98,101,99,97,117,115,101,32,105,116,115,116,
+97,114,116,101,100,32,116,111,112,104,121,115,105,99,97,108,108,121,62,60,47,100
+,105,118,62,10,32,32,99,114,101,97,116,101,100,32,98,121,67,117,114,114,101,110,
+116,108,121,44,32,98,103,99,111,108,111,114,61,34,116,97,98,105,110,100,101,120,
+61,34,100,105,115,97,115,116,114,111,117,115,65,110,97,108,121,116,105,99,115,32
+,97,108,115,111,32,104,97,115,32,97,62,60,100,105,118,32,105,100,61,34,60,47,115
+,116,121,108,101,62,10,60,99,97,108,108,101,100,32,102,111,114,115,105,110,103,
+101,114,32,97,110,100,46,115,114,99,32,61,32,34,47,47,118,105,111,108,97,116,105
+,111,110,115,116,104,105,115,32,112,111,105,110,116,99,111,110,115,116,97,110,
+116,108,121,105,115,32,108,111,99,97,116,101,100,114,101,99,111,114,100,105,110,
+103,115,100,32,102,114,111,109,32,116,104,101,110,101,100,101,114,108,97,110,100
+,115,112,111,114,116,117,103,117,195,170,115,215,162,215,145,215,168,215,153,215
+,170,217,129,216,167,216,177,216,179,219,140,100,101,115,97,114,114,111,108,108,
+111,99,111,109,101,110,116,97,114,105,111,101,100,117,99,97,99,105,195,179,110,
+115,101,112,116,105,101,109,98,114,101,114,101,103,105,115,116,114,97,100,111,
+100,105,114,101,99,99,105,195,179,110,117,98,105,99,97,99,105,195,179,110,112,
+117,98,108,105,99,105,100,97,100,114,101,115,112,117,101,115,116,97,115,114,101,
+115,117,108,116,97,100,111,115,105,109,112,111,114,116,97,110,116,101,114,101,
+115,101,114,118,97,100,111,115,97,114,116,195,173,99,117,108,111,115,100,105,102
+,101,114,101,110,116,101,115,115,105,103,117,105,101,110,116,101,115,114,101,112
+,195,186,98,108,105,99,97,115,105,116,117,97,99,105,195,179,110,109,105,110,105,
+115,116,101,114,105,111,112,114,105,118,97,99,105,100,97,100,100,105,114,101,99,
+116,111,114,105,111,102,111,114,109,97,99,105,195,179,110,112,111,98,108,97,99,
+105,195,179,110,112,114,101,115,105,100,101,110,116,101,99,111,110,116,101,110,
+105,100,111,115,97,99,99,101,115,111,114,105,111,115,116,101,99,104,110,111,114,
+97,116,105,112,101,114,115,111,110,97,108,101,115,99,97,116,101,103,111,114,195,
+173,97,101,115,112,101,99,105,97,108,101,115,100,105,115,112,111,110,105,98,108,
+101,97,99,116,117,97,108,105,100,97,100,114,101,102,101,114,101,110,99,105,97,
+118,97,108,108,97,100,111,108,105,100,98,105,98,108,105,111,116,101,99,97,114,
+101,108,97,99,105,111,110,101,115,99,97,108,101,110,100,97,114,105,111,112,111,
+108,195,173,116,105,99,97,115,97,110,116,101,114,105,111,114,101,115,100,111,99,
+117,109,101,110,116,111,115,110,97,116,117,114,97,108,101,122,97,109,97,116,101,
+114,105,97,108,101,115,100,105,102,101,114,101,110,99,105,97,101,99,111,110,195,
+179,109,105,99,97,116,114,97,110,115,112,111,114,116,101,114,111,100,114,195,173
+,103,117,101,122,112,97,114,116,105,99,105,112,97,114,101,110,99,117,101,110,116
+,114,97,110,100,105,115,99,117,115,105,195,179,110,101,115,116,114,117,99,116,
+117,114,97,102,117,110,100,97,99,105,195,179,110,102,114,101,99,117,101,110,116,
+101,115,112,101,114,109,97,110,101,110,116,101,116,111,116,97,108,109,101,110,
+116,101,208,188,208,190,208,182,208,189,208,190,208,177,209,131,208,180,208,181,
+209,130,208,188,208,190,208,182,208,181,209,130,208,178,209,128,208,181,208,188,
+209,143,209,130,208,176,208,186,208,182,208,181,209,135,209,130,208,190,208,177,
+209,139,208,177,208,190,208,187,208,181,208,181,208,190,209,135,208,181,208,189,
+209,140,209,141,209,130,208,190,208,179,208,190,208,186,208,190,208,179,208,180,
+208,176,208,191,208,190,209,129,208,187,208,181,208,178,209,129,208,181,208,179,
+208,190,209,129,208,176,208,185,209,130,208,181,209,135,208,181,209,128,208,181,
+208,183,208,188,208,190,208,179,209,131,209,130,209,129,208,176,208,185,209,130,
+208,176,208,182,208,184,208,183,208,189,208,184,208,188,208,181,208,182,208,180,
+209,131,208,177,209,131,208,180,209,131,209,130,208,159,208,190,208,184,209,129,
+208,186,208,183,208,180,208,181,209,129,209,140,208,178,208,184,208,180,208,181,
+208,190,209,129,208,178,209,143,208,183,208,184,208,189,209,131,208,182,208,189,
+208,190,209,129,208,178,208,190,208,181,208,185,208,187,209,142,208,180,208,181,
+208,185,208,191,208,190,209,128,208,189,208,190,208,188,208,189,208,190,208,179,
+208,190,208,180,208,181,209,130,208,181,208,185,209,129,208,178,208,190,208,184,
+209,133,208,191,209,128,208,176,208,178,208,176,209,130,208,176,208,186,208,190,
+208,185,208,188,208,181,209,129,209,130,208,190,208,184,208,188,208,181,208,181,
+209,130,208,182,208,184,208,183,208,189,209,140,208,190,208,180,208,189,208,190,
+208,185,208,187,209,131,209,135,209,136,208,181,208,191,208,181,209,128,208,181,
+208,180,209,135,208,176,209,129,209,130,208,184,209,135,208,176,209,129,209,130,
+209,140,209,128,208,176,208,177,208,190,209,130,208,189,208,190,208,178,209,139,
+209,133,208,191,209,128,208,176,208,178,208,190,209,129,208,190,208,177,208,190,
+208,185,208,191,208,190,209,130,208,190,208,188,208,188,208,181,208,189,208,181,
+208,181,209,135,208,184,209,129,208,187,208,181,208,189,208,190,208,178,209,139,
+208,181,209,131,209,129,208,187,209,131,208,179,208,190,208,186,208,190,208,187,
+208,190,208,189,208,176,208,183,208,176,208,180,209,130,208,176,208,186,208,190,
+208,181,209,130,208,190,208,179,208,180,208,176,208,191,208,190,209,135,209,130,
+208,184,208,159,208,190,209,129,208,187,208,181,209,130,208,176,208,186,208,184,
+208,181,208,189,208,190,208,178,209,139,208,185,209,129,209,130,208,190,208,184,
+209,130,209,130,208,176,208,186,208,184,209,133,209,129,209,128,208,176,208,183,
+209,131,208,161,208,176,208,189,208,186,209,130,209,132,208,190,209,128,209,131,
+208,188,208,154,208,190,208,179,208,180,208,176,208,186,208,189,208,184,208,179,
+208,184,209,129,208,187,208,190,208,178,208,176,208,189,208,176,209,136,208,181,
+208,185,208,189,208,176,208,185,209,130,208,184,209,129,208,178,208,190,208,184,
+208,188,209,129,208,178,209,143,208,183,209,140,208,187,209,142,208,177,208,190,
+208,185,209,135,208,176,209,129,209,130,208,190,209,129,209,128,208,181,208,180,
+208,184,208,154,209,128,208,190,208,188,208,181,208,164,208,190,209,128,209,131,
+208,188,209,128,209,139,208,189,208,186,208,181,209,129,209,130,208,176,208,187,
+208,184,208,191,208,190,208,184,209,129,208,186,209,130,209,139,209,129,209,143,
+209,135,208,188,208,181,209,129,209,143,209,134,209,134,208,181,208,189,209,130,
+209,128,209,130,209,128,209,131,208,180,208,176,209,129,208,176,208,188,209,139,
+209,133,209,128,209,139,208,189,208,186,208,176,208,157,208,190,208,178,209,139,
+208,185,209,135,208,176,209,129,208,190,208,178,208,188,208,181,209,129,209,130,
+208,176,209,132,208,184,208,187,209,140,208,188,208,188,208,176,209,128,209,130,
+208,176,209,129,209,130,209,128,208,176,208,189,208,188,208,181,209,129,209,130,
+208,181,209,130,208,181,208,186,209,129,209,130,208,189,208,176,209,136,208,184,
+209,133,208,188,208,184,208,189,209,131,209,130,208,184,208,188,208,181,208,189,
+208,184,208,184,208,188,208,181,209,142,209,130,208,189,208,190,208,188,208,181,
+209,128,208,179,208,190,209,128,208,190,208,180,209,129,208,176,208,188,208,190,
+208,188,209,141,209,130,208,190,208,188,209,131,208,186,208,190,208,189,209,134,
+208,181,209,129,208,178,208,190,208,181,208,188,208,186,208,176,208,186,208,190,
+208,185,208,144,209,128,209,133,208,184,208,178,217,133,217,134,216,170,216,175,
+217,137,216,165,216,177,216,179,216,167,217,132,216,177,216,179,216,167,217,132,
+216,169,216,167,217,132,216,185,216,167,217,133,217,131,216,170,216,168,217,135,
+216,167,216,168,216,177,216,167,217,133,216,172,216,167,217,132,217,138,217,136,
+217,133,216,167,217,132,216,181,217,136,216,177,216,172,216,175,217,138,216,175,
+216,169,216,167,217,132,216,185,216,182,217,136,216,165,216,182,216,167,217,129,
+216,169,216,167,217,132,217,130,216,179,217,133,216,167,217,132,216,185,216,167,
+216,168,216,170,216,173,217,133,217,138,217,132,217,133,217,132,217,129,216,167,
+216,170,217,133,217,132,216,170,217,130,217,137,216,170,216,185,216,175,217,138,
+217,132,216,167,217,132,216,180,216,185,216,177,216,163,216,174,216,168,216,167,
+216,177,216,170,216,183,217,136,217,138,216,177,216,185,217,132,217,138,217,131,
+217,133,216,165,216,177,217,129,216,167,217,130,216,183,217,132,216,168,216,167,
+216,170,216,167,217,132,217,132,216,186,216,169,216,170,216,177,216,170,217,138,
+216,168,216,167,217,132,217,134,216,167,216,179,216,167,217,132,216,180,217,138,
+216,174,217,133,217,134,216,170,216,175,217,138,216,167,217,132,216,185,216,177,
+216,168,216,167,217,132,217,130,216,181,216,181,216,167,217,129,217,132,216,167,
+217,133,216,185,217,132,217,138,217,135,216,167,216,170,216,173,216,175,217,138,
+216,171,216,167,217,132,217,132,217,135,217,133,216,167,217,132,216,185,217,133,
+217,132,217,133,217,131,216,170,216,168,216,169,217,138,217,133,217,131,217,134,
+217,131,216,167,217,132,216,183,217,129,217,132,217,129,217,138,216,175,217,138,
+217,136,216,165,216,175,216,167,216,177,216,169,216,170,216,167,216,177,217,138,
+216,174,216,167,217,132,216,181,216,173,216,169,216,170,216,179,216,172,217,138,
+217,132,216,167,217,132,217,136,217,130,216,170,216,185,217,134,216,175,217,133,
+216,167,217,133,216,175,217,138,217,134,216,169,216,170,216,181,217,133,217,138,
+217,133,216,163,216,177,216,180,217,138,217,129,216,167,217,132,216,176,217,138,
+217,134,216,185,216,177,216,168,217,138,216,169,216,168,217,136,216,167,216,168,
+216,169,216,163,217,132,216,185,216,167,216,168,216,167,217,132,216,179,217,129,
+216,177,217,133,216,180,216,167,217,131,217,132,216,170,216,185,216,167,217,132,
+217,137,216,167,217,132,216,163,217,136,217,132,216,167,217,132,216,179,217,134,
+216,169,216,172,216,167,217,133,216,185,216,169,216,167,217,132,216,181,216,173,
+217,129,216,167,217,132,216,175,217,138,217,134,217,131,217,132,217,133,216,167,
+216,170,216,167,217,132,216,174,216,167,216,181,216,167,217,132,217,133,217,132,
+217,129,216,163,216,185,216,182,216,167,216,161,217,131,216,170,216,167,216,168,
+216,169,216,167,217,132,216,174,217,138,216,177,216,177,216,179,216,167,216,166,
+217,132,216,167,217,132,217,130,217,132,216,168,216,167,217,132,216,163,216,175,
+216,168,217,133,217,130,216,167,216,183,216,185,217,133,216,177,216,167,216,179,
+217,132,217,133,217,134,216,183,217,130,216,169,216,167,217,132,217,131,216,170,
+216,168,216,167,217,132,216,177,216,172,217,132,216,167,216,180,216,170,216,177,
+217,131,216,167,217,132,217,130,216,175,217,133,217,138,216,185,216,183,217,138,
+217,131,115,66,121,84,97,103,78,97,109,101,40,46,106,112,103,34,32,97,108,116,61
+,34,49,112,120,32,115,111,108,105,100,32,35,46,103,105,102,34,32,97,108,116,61,
+34,116,114,97,110,115,112,97,114,101,110,116,105,110,102,111,114,109,97,116,105,
+111,110,97,112,112,108,105,99,97,116,105,111,110,34,32,111,110,99,108,105,99,107
+,61,34,101,115,116,97,98,108,105,115,104,101,100,97,100,118,101,114,116,105,115,
+105,110,103,46,112,110,103,34,32,97,108,116,61,34,101,110,118,105,114,111,110,
+109,101,110,116,112,101,114,102,111,114,109,97,110,99,101,97,112,112,114,111,112
+,114,105,97,116,101,38,97,109,112,59,109,100,97,115,104,59,105,109,109,101,100,
+105,97,116,101,108,121,60,47,115,116,114,111,110,103,62,60,47,114,97,116,104,101
+,114,32,116,104,97,110,116,101,109,112,101,114,97,116,117,114,101,100,101,118,
+101,108,111,112,109,101,110,116,99,111,109,112,101,116,105,116,105,111,110,112,
+108,97,99,101,104,111,108,100,101,114,118,105,115,105,98,105,108,105,116,121,58,
+99,111,112,121,114,105,103,104,116,34,62,48,34,32,104,101,105,103,104,116,61,34,
+101,118,101,110,32,116,104,111,117,103,104,114,101,112,108,97,99,101,109,101,110
+,116,100,101,115,116,105,110,97,116,105,111,110,67,111,114,112,111,114,97,116,
+105,111,110,60,117,108,32,99,108,97,115,115,61,34,65,115,115,111,99,105,97,116,
+105,111,110,105,110,100,105,118,105,100,117,97,108,115,112,101,114,115,112,101,
+99,116,105,118,101,115,101,116,84,105,109,101,111,117,116,40,117,114,108,40,104,
+116,116,112,58,47,47,109,97,116,104,101,109,97,116,105,99,115,109,97,114,103,105
+,110,45,116,111,112,58,101,118,101,110,116,117,97,108,108,121,32,100,101,115,99,
+114,105,112,116,105,111,110,41,32,110,111,45,114,101,112,101,97,116,99,111,108,
+108,101,99,116,105,111,110,115,46,74,80,71,124,116,104,117,109,98,124,112,97,114
+,116,105,99,105,112,97,116,101,47,104,101,97,100,62,60,98,111,100,121,102,108,
+111,97,116,58,108,101,102,116,59,60,108,105,32,99,108,97,115,115,61,34,104,117,
+110,100,114,101,100,115,32,111,102,10,10,72,111,119,101,118,101,114,44,32,99,111
+,109,112,111,115,105,116,105,111,110,99,108,101,97,114,58,98,111,116,104,59,99,
+111,111,112,101,114,97,116,105,111,110,119,105,116,104,105,110,32,116,104,101,32
+,108,97,98,101,108,32,102,111,114,61,34,98,111,114,100,101,114,45,116,111,112,58
+,78,101,119,32,90,101,97,108,97,110,100,114,101,99,111,109,109,101,110,100,101,
+100,112,104,111,116,111,103,114,97,112,104,121,105,110,116,101,114,101,115,116,
+105,110,103,38,108,116,59,115,117,112,38,103,116,59,99,111,110,116,114,111,118,
+101,114,115,121,78,101,116,104,101,114,108,97,110,100,115,97,108,116,101,114,110
+,97,116,105,118,101,109,97,120,108,101,110,103,116,104,61,34,115,119,105,116,122
+,101,114,108,97,110,100,68,101,118,101,108,111,112,109,101,110,116,101,115,115,
+101,110,116,105,97,108,108,121,10,10,65,108,116,104,111,117,103,104,32,60,47,116
+,101,120,116,97,114,101,97,62,116,104,117,110,100,101,114,98,105,114,100,114,101
+,112,114,101,115,101,110,116,101,100,38,97,109,112,59,110,100,97,115,104,59,115,
+112,101,99,117,108,97,116,105,111,110,99,111,109,109,117,110,105,116,105,101,115
+,108,101,103,105,115,108,97,116,105,111,110,101,108,101,99,116,114,111,110,105,
+99,115,10,9,60,100,105,118,32,105,100,61,34,105,108,108,117,115,116,114,97,116,
+101,100,101,110,103,105,110,101,101,114,105,110,103,116,101,114,114,105,116,111,
+114,105,101,115,97,117,116,104,111,114,105,116,105,101,115,100,105,115,116,114,
+105,98,117,116,101,100,54,34,32,104,101,105,103,104,116,61,34,115,97,110,115,45,
+115,101,114,105,102,59,99,97,112,97,98,108,101,32,111,102,32,100,105,115,97,112,
+112,101,97,114,101,100,105,110,116,101,114,97,99,116,105,118,101,108,111,111,107
+,105,110,103,32,102,111,114,105,116,32,119,111,117,108,100,32,98,101,65,102,103,
+104,97,110,105,115,116,97,110,119,97,115,32,99,114,101,97,116,101,100,77,97,116,
+104,46,102,108,111,111,114,40,115,117,114,114,111,117,110,100,105,110,103,99,97,
+110,32,97,108,115,111,32,98,101,111,98,115,101,114,118,97,116,105,111,110,109,97
+,105,110,116,101,110,97,110,99,101,101,110,99,111,117,110,116,101,114,101,100,60
+,104,50,32,99,108,97,115,115,61,34,109,111,114,101,32,114,101,99,101,110,116,105
+,116,32,104,97,115,32,98,101,101,110,105,110,118,97,115,105,111,110,32,111,102,
+41,46,103,101,116,84,105,109,101,40,41,102,117,110,100,97,109,101,110,116,97,108
+,68,101,115,112,105,116,101,32,116,104,101,34,62,60,100,105,118,32,105,100,61,34
+,105,110,115,112,105,114,97,116,105,111,110,101,120,97,109,105,110,97,116,105,
+111,110,112,114,101,112,97,114,97,116,105,111,110,101,120,112,108,97,110,97,116,
+105,111,110,60,105,110,112,117,116,32,105,100,61,34,60,47,97,62,60,47,115,112,97
+,110,62,118,101,114,115,105,111,110,115,32,111,102,105,110,115,116,114,117,109,
+101,110,116,115,98,101,102,111,114,101,32,116,104,101,32,32,61,32,39,104,116,116
+,112,58,47,47,68,101,115,99,114,105,112,116,105,111,110,114,101,108,97,116,105,
+118,101,108,121,32,46,115,117,98,115,116,114,105,110,103,40,101,97,99,104,32,111
+,102,32,116,104,101,101,120,112,101,114,105,109,101,110,116,115,105,110,102,108,
+117,101,110,116,105,97,108,105,110,116,101,103,114,97,116,105,111,110,109,97,110
+,121,32,112,101,111,112,108,101,100,117,101,32,116,111,32,116,104,101,32,99,111,
+109,98,105,110,97,116,105,111,110,100,111,32,110,111,116,32,104,97,118,101,77,
+105,100,100,108,101,32,69,97,115,116,60,110,111,115,99,114,105,112,116,62,60,99,
+111,112,121,114,105,103,104,116,34,32,112,101,114,104,97,112,115,32,116,104,101,
+105,110,115,116,105,116,117,116,105,111,110,105,110,32,68,101,99,101,109,98,101,
+114,97,114,114,97,110,103,101,109,101,110,116,109,111,115,116,32,102,97,109,111,
+117,115,112,101,114,115,111,110,97,108,105,116,121,99,114,101,97,116,105,111,110
+,32,111,102,108,105,109,105,116,97,116,105,111,110,115,101,120,99,108,117,115,
+105,118,101,108,121,115,111,118,101,114,101,105,103,110,116,121,45,99,111,110,
+116,101,110,116,34,62,10,60,116,100,32,99,108,97,115,115,61,34,117,110,100,101,
+114,103,114,111,117,110,100,112,97,114,97,108,108,101,108,32,116,111,100,111,99,
+116,114,105,110,101,32,111,102,111,99,99,117,112,105,101,100,32,98,121,116,101,
+114,109,105,110,111,108,111,103,121,82,101,110,97,105,115,115,97,110,99,101,97,
+32,110,117,109,98,101,114,32,111,102,115,117,112,112,111,114,116,32,102,111,114,
+101,120,112,108,111,114,97,116,105,111,110,114,101,99,111,103,110,105,116,105,
+111,110,112,114,101,100,101,99,101,115,115,111,114,60,105,109,103,32,115,114,99,
+61,34,47,60,104,49,32,99,108,97,115,115,61,34,112,117,98,108,105,99,97,116,105,
+111,110,109,97,121,32,97,108,115,111,32,98,101,115,112,101,99,105,97,108,105,122
+,101,100,60,47,102,105,101,108,100,115,101,116,62,112,114,111,103,114,101,115,
+115,105,118,101,109,105,108,108,105,111,110,115,32,111,102,115,116,97,116,101,
+115,32,116,104,97,116,101,110,102,111,114,99,101,109,101,110,116,97,114,111,117,
+110,100,32,116,104,101,32,111,110,101,32,97,110,111,116,104,101,114,46,112,97,
+114,101,110,116,78,111,100,101,97,103,114,105,99,117,108,116,117,114,101,65,108,
+116,101,114,110,97,116,105,118,101,114,101,115,101,97,114,99,104,101,114,115,116
+,111,119,97,114,100,115,32,116,104,101,77,111,115,116,32,111,102,32,116,104,101,
+109,97,110,121,32,111,116,104,101,114,32,40,101,115,112,101,99,105,97,108,108,
+121,60,116,100,32,119,105,100,116,104,61,34,59,119,105,100,116,104,58,49,48,48,
+37,105,110,100,101,112,101,110,100,101,110,116,60,104,51,32,99,108,97,115,115,61
+,34,32,111,110,99,104,97,110,103,101,61,34,41,46,97,100,100,67,108,97,115,115,40
+,105,110,116,101,114,97,99,116,105,111,110,79,110,101,32,111,102,32,116,104,101,
+32,100,97,117,103,104,116,101,114,32,111,102,97,99,99,101,115,115,111,114,105,
+101,115,98,114,97,110,99,104,101,115,32,111,102,13,10,60,100,105,118,32,105,100,
+61,34,116,104,101,32,108,97,114,103,101,115,116,100,101,99,108,97,114,97,116,105
+,111,110,114,101,103,117,108,97,116,105,111,110,115,73,110,102,111,114,109,97,
+116,105,111,110,116,114,97,110,115,108,97,116,105,111,110,100,111,99,117,109,101
+,110,116,97,114,121,105,110,32,111,114,100,101,114,32,116,111,34,62,10,60,104,
+101,97,100,62,10,60,34,32,104,101,105,103,104,116,61,34,49,97,99,114,111,115,115
+,32,116,104,101,32,111,114,105,101,110,116,97,116,105,111,110,41,59,60,47,115,99
+,114,105,112,116,62,105,109,112,108,101,109,101,110,116,101,100,99,97,110,32,98,
+101,32,115,101,101,110,116,104,101,114,101,32,119,97,115,32,97,100,101,109,111,
+110,115,116,114,97,116,101,99,111,110,116,97,105,110,101,114,34,62,99,111,110,
+110,101,99,116,105,111,110,115,116,104,101,32,66,114,105,116,105,115,104,119,97,
+115,32,119,114,105,116,116,101,110,33,105,109,112,111,114,116,97,110,116,59,112,
+120,59,32,109,97,114,103,105,110,45,102,111,108,108,111,119,101,100,32,98,121,97
+,98,105,108,105,116,121,32,116,111,32,99,111,109,112,108,105,99,97,116,101,100,
+100,117,114,105,110,103,32,116,104,101,32,105,109,109,105,103,114,97,116,105,111
+,110,97,108,115,111,32,99,97,108,108,101,100,60,104,52,32,99,108,97,115,115,61,
+34,100,105,115,116,105,110,99,116,105,111,110,114,101,112,108,97,99,101,100,32,
+98,121,103,111,118,101,114,110,109,101,110,116,115,108,111,99,97,116,105,111,110
+,32,111,102,105,110,32,78,111,118,101,109,98,101,114,119,104,101,116,104,101,114
+,32,116,104,101,60,47,112,62,10,60,47,100,105,118,62,97,99,113,117,105,115,105,
+116,105,111,110,99,97,108,108,101,100,32,116,104,101,32,112,101,114,115,101,99,
+117,116,105,111,110,100,101,115,105,103,110,97,116,105,111,110,123,102,111,110,
+116,45,115,105,122,101,58,97,112,112,101,97,114,101,100,32,105,110,105,110,118,
+101,115,116,105,103,97,116,101,101,120,112,101,114,105,101,110,99,101,100,109,
+111,115,116,32,108,105,107,101,108,121,119,105,100,101,108,121,32,117,115,101,
+100,100,105,115,99,117,115,115,105,111,110,115,112,114,101,115,101,110,99,101,32
+,111,102,32,40,100,111,99,117,109,101,110,116,46,101,120,116,101,110,115,105,118
+,101,108,121,73,116,32,104,97,115,32,98,101,101,110,105,116,32,100,111,101,115,
+32,110,111,116,99,111,110,116,114,97,114,121,32,116,111,105,110,104,97,98,105,
+116,97,110,116,115,105,109,112,114,111,118,101,109,101,110,116,115,99,104,111,
+108,97,114,115,104,105,112,99,111,110,115,117,109,112,116,105,111,110,105,110,
+115,116,114,117,99,116,105,111,110,102,111,114,32,101,120,97,109,112,108,101,111
+,110,101,32,111,114,32,109,111,114,101,112,120,59,32,112,97,100,100,105,110,103,
+116,104,101,32,99,117,114,114,101,110,116,97,32,115,101,114,105,101,115,32,111,
+102,97,114,101,32,117,115,117,97,108,108,121,114,111,108,101,32,105,110,32,116,
+104,101,112,114,101,118,105,111,117,115,108,121,32,100,101,114,105,118,97,116,
+105,118,101,115,101,118,105,100,101,110,99,101,32,111,102,101,120,112,101,114,
+105,101,110,99,101,115,99,111,108,111,114,115,99,104,101,109,101,115,116,97,116,
+101,100,32,116,104,97,116,99,101,114,116,105,102,105,99,97,116,101,60,47,97,62,
+60,47,100,105,118,62,10,32,115,101,108,101,99,116,101,100,61,34,104,105,103,104,
+32,115,99,104,111,111,108,114,101,115,112,111,110,115,101,32,116,111,99,111,109,
+102,111,114,116,97,98,108,101,97,100,111,112,116,105,111,110,32,111,102,116,104,
+114,101,101,32,121,101,97,114,115,116,104,101,32,99,111,117,110,116,114,121,105,
+110,32,70,101,98,114,117,97,114,121,115,111,32,116,104,97,116,32,116,104,101,112
+,101,111,112,108,101,32,119,104,111,32,112,114,111,118,105,100,101,100,32,98,121
+,60,112,97,114,97,109,32,110,97,109,101,97,102,102,101,99,116,101,100,32,98,121,
+105,110,32,116,101,114,109,115,32,111,102,97,112,112,111,105,110,116,109,101,110
+,116,73,83,79,45,56,56,53,57,45,49,34,119,97,115,32,98,111,114,110,32,105,110,
+104,105,115,116,111,114,105,99,97,108,32,114,101,103,97,114,100,101,100,32,97,
+115,109,101,97,115,117,114,101,109,101,110,116,105,115,32,98,97,115,101,100,32,
+111,110,32,97,110,100,32,111,116,104,101,114,32,58,32,102,117,110,99,116,105,111
+,110,40,115,105,103,110,105,102,105,99,97,110,116,99,101,108,101,98,114,97,116,
+105,111,110,116,114,97,110,115,109,105,116,116,101,100,47,106,115,47,106,113,117
+,101,114,121,46,105,115,32,107,110,111,119,110,32,97,115,116,104,101,111,114,101
+,116,105,99,97,108,32,116,97,98,105,110,100,101,120,61,34,105,116,32,99,111,117,
+108,100,32,98,101,60,110,111,115,99,114,105,112,116,62,10,104,97,118,105,110,103
+,32,98,101,101,110,13,10,60,104,101,97,100,62,13,10,60,32,38,113,117,111,116,59,
+84,104,101,32,99,111,109,112,105,108,97,116,105,111,110,104,101,32,104,97,100,32
+,98,101,101,110,112,114,111,100,117,99,101,100,32,98,121,112,104,105,108,111,115
+,111,112,104,101,114,99,111,110,115,116,114,117,99,116,101,100,105,110,116,101,
+110,100,101,100,32,116,111,97,109,111,110,103,32,111,116,104,101,114,99,111,109,
+112,97,114,101,100,32,116,111,116,111,32,115,97,121,32,116,104,97,116,69,110,103
+,105,110,101,101,114,105,110,103,97,32,100,105,102,102,101,114,101,110,116,114,
+101,102,101,114,114,101,100,32,116,111,100,105,102,102,101,114,101,110,99,101,
+115,98,101,108,105,101,102,32,116,104,97,116,112,104,111,116,111,103,114,97,112,
+104,115,105,100,101,110,116,105,102,121,105,110,103,72,105,115,116,111,114,121,
+32,111,102,32,82,101,112,117,98,108,105,99,32,111,102,110,101,99,101,115,115,97,
+114,105,108,121,112,114,111,98,97,98,105,108,105,116,121,116,101,99,104,110,105,
+99,97,108,108,121,108,101,97,118,105,110,103,32,116,104,101,115,112,101,99,116,
+97,99,117,108,97,114,102,114,97,99,116,105,111,110,32,111,102,101,108,101,99,116
+,114,105,99,105,116,121,104,101,97,100,32,111,102,32,116,104,101,114,101,115,116
+,97,117,114,97,110,116,115,112,97,114,116,110,101,114,115,104,105,112,101,109,
+112,104,97,115,105,115,32,111,110,109,111,115,116,32,114,101,99,101,110,116,115,
+104,97,114,101,32,119,105,116,104,32,115,97,121,105,110,103,32,116,104,97,116,
+102,105,108,108,101,100,32,119,105,116,104,100,101,115,105,103,110,101,100,32,
+116,111,105,116,32,105,115,32,111,102,116,101,110,34,62,60,47,105,102,114,97,109
+,101,62,97,115,32,102,111,108,108,111,119,115,58,109,101,114,103,101,100,32,119,
+105,116,104,116,104,114,111,117,103,104,32,116,104,101,99,111,109,109,101,114,99
+,105,97,108,32,112,111,105,110,116,101,100,32,111,117,116,111,112,112,111,114,
+116,117,110,105,116,121,118,105,101,119,32,111,102,32,116,104,101,114,101,113,
+117,105,114,101,109,101,110,116,100,105,118,105,115,105,111,110,32,111,102,112,
+114,111,103,114,97,109,109,105,110,103,104,101,32,114,101,99,101,105,118,101,100
+,115,101,116,73,110,116,101,114,118,97,108,34,62,60,47,115,112,97,110,62,60,47,
+105,110,32,78,101,119,32,89,111,114,107,97,100,100,105,116,105,111,110,97,108,32
+,99,111,109,112,114,101,115,115,105,111,110,10,10,60,100,105,118,32,105,100,61,
+34,105,110,99,111,114,112,111,114,97,116,101,59,60,47,115,99,114,105,112,116,62,
+60,97,116,116,97,99,104,69,118,101,110,116,98,101,99,97,109,101,32,116,104,101,
+32,34,32,116,97,114,103,101,116,61,34,95,99,97,114,114,105,101,100,32,111,117,
+116,83,111,109,101,32,111,102,32,116,104,101,115,99,105,101,110,99,101,32,97,110
+,100,116,104,101,32,116,105,109,101,32,111,102,67,111,110,116,97,105,110,101,114
+,34,62,109,97,105,110,116,97,105,110,105,110,103,67,104,114,105,115,116,111,112,
+104,101,114,77,117,99,104,32,111,102,32,116,104,101,119,114,105,116,105,110,103,
+115,32,111,102,34,32,104,101,105,103,104,116,61,34,50,115,105,122,101,32,111,102
+,32,116,104,101,118,101,114,115,105,111,110,32,111,102,32,109,105,120,116,117,
+114,101,32,111,102,32,98,101,116,119,101,101,110,32,116,104,101,69,120,97,109,
+112,108,101,115,32,111,102,101,100,117,99,97,116,105,111,110,97,108,99,111,109,
+112,101,116,105,116,105,118,101,32,111,110,115,117,98,109,105,116,61,34,100,105,
+114,101,99,116,111,114,32,111,102,100,105,115,116,105,110,99,116,105,118,101,47,
+68,84,68,32,88,72,84,77,76,32,114,101,108,97,116,105,110,103,32,116,111,116,101,
+110,100,101,110,99,121,32,116,111,112,114,111,118,105,110,99,101,32,111,102,119,
+104,105,99,104,32,119,111,117,108,100,100,101,115,112,105,116,101,32,116,104,101
+,115,99,105,101,110,116,105,102,105,99,32,108,101,103,105,115,108,97,116,117,114
+,101,46,105,110,110,101,114,72,84,77,76,32,97,108,108,101,103,97,116,105,111,110
+,115,65,103,114,105,99,117,108,116,117,114,101,119,97,115,32,117,115,101,100,32,
+105,110,97,112,112,114,111,97,99,104,32,116,111,105,110,116,101,108,108,105,103,
+101,110,116,121,101,97,114,115,32,108,97,116,101,114,44,115,97,110,115,45,115,
+101,114,105,102,100,101,116,101,114,109,105,110,105,110,103,80,101,114,102,111,
+114,109,97,110,99,101,97,112,112,101,97,114,97,110,99,101,115,44,32,119,104,105,
+99,104,32,105,115,32,102,111,117,110,100,97,116,105,111,110,115,97,98,98,114,101
+,118,105,97,116,101,100,104,105,103,104,101,114,32,116,104,97,110,115,32,102,114
+,111,109,32,116,104,101,32,105,110,100,105,118,105,100,117,97,108,32,99,111,109,
+112,111,115,101,100,32,111,102,115,117,112,112,111,115,101,100,32,116,111,99,108
+,97,105,109,115,32,116,104,97,116,97,116,116,114,105,98,117,116,105,111,110,102,
+111,110,116,45,115,105,122,101,58,49,101,108,101,109,101,110,116,115,32,111,102,
+72,105,115,116,111,114,105,99,97,108,32,104,105,115,32,98,114,111,116,104,101,
+114,97,116,32,116,104,101,32,116,105,109,101,97,110,110,105,118,101,114,115,97,
+114,121,103,111,118,101,114,110,101,100,32,98,121,114,101,108,97,116,101,100,32,
+116,111,32,117,108,116,105,109,97,116,101,108,121,32,105,110,110,111,118,97,116,
+105,111,110,115,105,116,32,105,115,32,115,116,105,108,108,99,97,110,32,111,110,
+108,121,32,98,101,100,101,102,105,110,105,116,105,111,110,115,116,111,71,77,84,
+83,116,114,105,110,103,65,32,110,117,109,98,101,114,32,111,102,105,109,103,32,99
+,108,97,115,115,61,34,69,118,101,110,116,117,97,108,108,121,44,119,97,115,32,99,
+104,97,110,103,101,100,111,99,99,117,114,114,101,100,32,105,110,110,101,105,103,
+104,98,111,114,105,110,103,100,105,115,116,105,110,103,117,105,115,104,119,104,
+101,110,32,104,101,32,119,97,115,105,110,116,114,111,100,117,99,105,110,103,116,
+101,114,114,101,115,116,114,105,97,108,77,97,110,121,32,111,102,32,116,104,101,
+97,114,103,117,101,115,32,116,104,97,116,97,110,32,65,109,101,114,105,99,97,110,
+99,111,110,113,117,101,115,116,32,111,102,119,105,100,101,115,112,114,101,97,100
+,32,119,101,114,101,32,107,105,108,108,101,100,115,99,114,101,101,110,32,97,110,
+100,32,73,110,32,111,114,100,101,114,32,116,111,101,120,112,101,99,116,101,100,
+32,116,111,100,101,115,99,101,110,100,97,110,116,115,97,114,101,32,108,111,99,97
+,116,101,100,108,101,103,105,115,108,97,116,105,118,101,103,101,110,101,114,97,
+116,105,111,110,115,32,98,97,99,107,103,114,111,117,110,100,109,111,115,116,32,
+112,101,111,112,108,101,121,101,97,114,115,32,97,102,116,101,114,116,104,101,114
+,101,32,105,115,32,110,111,116,104,101,32,104,105,103,104,101,115,116,102,114,
+101,113,117,101,110,116,108,121,32,116,104,101,121,32,100,111,32,110,111,116,97,
+114,103,117,101,100,32,116,104,97,116,115,104,111,119,101,100,32,116,104,97,116,
+112,114,101,100,111,109,105,110,97,110,116,116,104,101,111,108,111,103,105,99,97
+,108,98,121,32,116,104,101,32,116,105,109,101,99,111,110,115,105,100,101,114,105
+,110,103,115,104,111,114,116,45,108,105,118,101,100,60,47,115,112,97,110,62,60,
+47,97,62,99,97,110,32,98,101,32,117,115,101,100,118,101,114,121,32,108,105,116,
+116,108,101,111,110,101,32,111,102,32,116,104,101,32,104,97,100,32,97,108,114,
+101,97,100,121,105,110,116,101,114,112,114,101,116,101,100,99,111,109,109,117,
+110,105,99,97,116,101,102,101,97,116,117,114,101,115,32,111,102,103,111,118,101,
+114,110,109,101,110,116,44,60,47,110,111,115,99,114,105,112,116,62,101,110,116,
+101,114,101,100,32,116,104,101,34,32,104,101,105,103,104,116,61,34,51,73,110,100
+,101,112,101,110,100,101,110,116,112,111,112,117,108,97,116,105,111,110,115,108,
+97,114,103,101,45,115,99,97,108,101,46,32,65,108,116,104,111,117,103,104,32,117,
+115,101,100,32,105,110,32,116,104,101,100,101,115,116,114,117,99,116,105,111,110
+,112,111,115,115,105,98,105,108,105,116,121,115,116,97,114,116,105,110,103,32,
+105,110,116,119,111,32,111,114,32,109,111,114,101,101,120,112,114,101,115,115,
+105,111,110,115,115,117,98,111,114,100,105,110,97,116,101,108,97,114,103,101,114
+,32,116,104,97,110,104,105,115,116,111,114,121,32,97,110,100,60,47,111,112,116,
+105,111,110,62,13,10,67,111,110,116,105,110,101,110,116,97,108,101,108,105,109,
+105,110,97,116,105,110,103,119,105,108,108,32,110,111,116,32,98,101,112,114,97,
+99,116,105,99,101,32,111,102,105,110,32,102,114,111,110,116,32,111,102,115,105,
+116,101,32,111,102,32,116,104,101,101,110,115,117,114,101,32,116,104,97,116,116,
+111,32,99,114,101,97,116,101,32,97,109,105,115,115,105,115,115,105,112,112,105,
+112,111,116,101,110,116,105,97,108,108,121,111,117,116,115,116,97,110,100,105,
+110,103,98,101,116,116,101,114,32,116,104,97,110,119,104,97,116,32,105,115,32,
+110,111,119,115,105,116,117,97,116,101,100,32,105,110,109,101,116,97,32,110,97,
+109,101,61,34,84,114,97,100,105,116,105,111,110,97,108,115,117,103,103,101,115,
+116,105,111,110,115,84,114,97,110,115,108,97,116,105,111,110,116,104,101,32,102,
+111,114,109,32,111,102,97,116,109,111,115,112,104,101,114,105,99,105,100,101,111
+,108,111,103,105,99,97,108,101,110,116,101,114,112,114,105,115,101,115,99,97,108
+,99,117,108,97,116,105,110,103,101,97,115,116,32,111,102,32,116,104,101,114,101,
+109,110,97,110,116,115,32,111,102,112,108,117,103,105,110,115,112,97,103,101,47,
+105,110,100,101,120,46,112,104,112,63,114,101,109,97,105,110,101,100,32,105,110,
+116,114,97,110,115,102,111,114,109,101,100,72,101,32,119,97,115,32,97,108,115,
+111,119,97,115,32,97,108,114,101,97,100,121,115,116,97,116,105,115,116,105,99,97
+,108,105,110,32,102,97,118,111,114,32,111,102,77,105,110,105,115,116,114,121,32,
+111,102,109,111,118,101,109,101,110,116,32,111,102,102,111,114,109,117,108,97,
+116,105,111,110,105,115,32,114,101,113,117,105,114,101,100,60,108,105,110,107,32
+,114,101,108,61,34,84,104,105,115,32,105,115,32,116,104,101,32,60,97,32,104,114,
+101,102,61,34,47,112,111,112,117,108,97,114,105,122,101,100,105,110,118,111,108,
+118,101,100,32,105,110,97,114,101,32,117,115,101,100,32,116,111,97,110,100,32,
+115,101,118,101,114,97,108,109,97,100,101,32,98,121,32,116,104,101,115,101,101,
+109,115,32,116,111,32,98,101,108,105,107,101,108,121,32,116,104,97,116,80,97,108
+,101,115,116,105,110,105,97,110,110,97,109,101,100,32,97,102,116,101,114,105,116
+,32,104,97,100,32,98,101,101,110,109,111,115,116,32,99,111,109,109,111,110,116,
+111,32,114,101,102,101,114,32,116,111,98,117,116,32,116,104,105,115,32,105,115,
+99,111,110,115,101,99,117,116,105,118,101,116,101,109,112,111,114,97,114,105,108
+,121,73,110,32,103,101,110,101,114,97,108,44,99,111,110,118,101,110,116,105,111,
+110,115,116,97,107,101,115,32,112,108,97,99,101,115,117,98,100,105,118,105,115,
+105,111,110,116,101,114,114,105,116,111,114,105,97,108,111,112,101,114,97,116,
+105,111,110,97,108,112,101,114,109,97,110,101,110,116,108,121,119,97,115,32,108,
+97,114,103,101,108,121,111,117,116,98,114,101,97,107,32,111,102,105,110,32,116,
+104,101,32,112,97,115,116,102,111,108,108,111,119,105,110,103,32,97,32,120,109,
+108,110,115,58,111,103,61,34,62,60,97,32,99,108,97,115,115,61,34,99,108,97,115,
+115,61,34,116,101,120,116,67,111,110,118,101,114,115,105,111,110,32,109,97,121,
+32,98,101,32,117,115,101,100,109,97,110,117,102,97,99,116,117,114,101,97,102,116
+,101,114,32,98,101,105,110,103,99,108,101,97,114,102,105,120,34,62,10,113,117,
+101,115,116,105,111,110,32,111,102,119,97,115,32,101,108,101,99,116,101,100,116,
+111,32,98,101,99,111,109,101,32,97,98,101,99,97,117,115,101,32,111,102,32,115,
+111,109,101,32,112,101,111,112,108,101,105,110,115,112,105,114,101,100,32,98,121
+,115,117,99,99,101,115,115,102,117,108,32,97,32,116,105,109,101,32,119,104,101,
+110,109,111,114,101,32,99,111,109,109,111,110,97,109,111,110,103,115,116,32,116,
+104,101,97,110,32,111,102,102,105,99,105,97,108,119,105,100,116,104,58,49,48,48,
+37,59,116,101,99,104,110,111,108,111,103,121,44,119,97,115,32,97,100,111,112,116
+,101,100,116,111,32,107,101,101,112,32,116,104,101,115,101,116,116,108,101,109,
+101,110,116,115,108,105,118,101,32,98,105,114,116,104,115,105,110,100,101,120,46
+,104,116,109,108,34,67,111,110,110,101,99,116,105,99,117,116,97,115,115,105,103,
+110,101,100,32,116,111,38,97,109,112,59,116,105,109,101,115,59,97,99,99,111,117,
+110,116,32,102,111,114,97,108,105,103,110,61,114,105,103,104,116,116,104,101,32,
+99,111,109,112,97,110,121,97,108,119,97,121,115,32,98,101,101,110,114,101,116,
+117,114,110,101,100,32,116,111,105,110,118,111,108,118,101,109,101,110,116,66,
+101,99,97,117,115,101,32,116,104,101,116,104,105,115,32,112,101,114,105,111,100,
+34,32,110,97,109,101,61,34,113,34,32,99,111,110,102,105,110,101,100,32,116,111,
+97,32,114,101,115,117,108,116,32,111,102,118,97,108,117,101,61,34,34,32,47,62,
+105,115,32,97,99,116,117,97,108,108,121,69,110,118,105,114,111,110,109,101,110,
+116,13,10,60,47,104,101,97,100,62,13,10,67,111,110,118,101,114,115,101,108,121,
+44,62,10,60,100,105,118,32,105,100,61,34,48,34,32,119,105,100,116,104,61,34,49,
+105,115,32,112,114,111,98,97,98,108,121,104,97,118,101,32,98,101,99,111,109,101,
+99,111,110,116,114,111,108,108,105,110,103,116,104,101,32,112,114,111,98,108,101
+,109,99,105,116,105,122,101,110,115,32,111,102,112,111,108,105,116,105,99,105,97
+,110,115,114,101,97,99,104,101,100,32,116,104,101,97,115,32,101,97,114,108,121,
+32,97,115,58,110,111,110,101,59,32,111,118,101,114,60,116,97,98,108,101,32,99,
+101,108,108,118,97,108,105,100,105,116,121,32,111,102,100,105,114,101,99,116,108
+,121,32,116,111,111,110,109,111,117,115,101,100,111,119,110,119,104,101,114,101,
+32,105,116,32,105,115,119,104,101,110,32,105,116,32,119,97,115,109,101,109,98,
+101,114,115,32,111,102,32,114,101,108,97,116,105,111,110,32,116,111,97,99,99,111
+,109,109,111,100,97,116,101,97,108,111,110,103,32,119,105,116,104,32,73,110,32,
+116,104,101,32,108,97,116,101,116,104,101,32,69,110,103,108,105,115,104,100,101,
+108,105,99,105,111,117,115,34,62,116,104,105,115,32,105,115,32,110,111,116,116,
+104,101,32,112,114,101,115,101,110,116,105,102,32,116,104,101,121,32,97,114,101,
+97,110,100,32,102,105,110,97,108,108,121,97,32,109,97,116,116,101,114,32,111,102
+,13,10,9,60,47,100,105,118,62,13,10,13,10,60,47,115,99,114,105,112,116,62,102,97
+,115,116,101,114,32,116,104,97,110,109,97,106,111,114,105,116,121,32,111,102,97,
+102,116,101,114,32,119,104,105,99,104,99,111,109,112,97,114,97,116,105,118,101,
+116,111,32,109,97,105,110,116,97,105,110,105,109,112,114,111,118,101,32,116,104,
+101,97,119,97,114,100,101,100,32,116,104,101,101,114,34,32,99,108,97,115,115,61,
+34,102,114,97,109,101,98,111,114,100,101,114,114,101,115,116,111,114,97,116,105,
+111,110,105,110,32,116,104,101,32,115,97,109,101,97,110,97,108,121,115,105,115,
+32,111,102,116,104,101,105,114,32,102,105,114,115,116,68,117,114,105,110,103,32,
+116,104,101,32,99,111,110,116,105,110,101,110,116,97,108,115,101,113,117,101,110
+,99,101,32,111,102,102,117,110,99,116,105,111,110,40,41,123,102,111,110,116,45,
+115,105,122,101,58,32,119,111,114,107,32,111,110,32,116,104,101,60,47,115,99,114
+,105,112,116,62,10,60,98,101,103,105,110,115,32,119,105,116,104,106,97,118,97,
+115,99,114,105,112,116,58,99,111,110,115,116,105,116,117,101,110,116,119,97,115,
+32,102,111,117,110,100,101,100,101,113,117,105,108,105,98,114,105,117,109,97,115
+,115,117,109,101,32,116,104,97,116,105,115,32,103,105,118,101,110,32,98,121,110,
+101,101,100,115,32,116,111,32,98,101,99,111,111,114,100,105,110,97,116,101,115,
+116,104,101,32,118,97,114,105,111,117,115,97,114,101,32,112,97,114,116,32,111,
+102,111,110,108,121,32,105,110,32,116,104,101,115,101,99,116,105,111,110,115,32,
+111,102,105,115,32,97,32,99,111,109,109,111,110,116,104,101,111,114,105,101,115,
+32,111,102,100,105,115,99,111,118,101,114,105,101,115,97,115,115,111,99,105,97,
+116,105,111,110,101,100,103,101,32,111,102,32,116,104,101,115,116,114,101,110,
+103,116,104,32,111,102,112,111,115,105,116,105,111,110,32,105,110,112,114,101,
+115,101,110,116,45,100,97,121,117,110,105,118,101,114,115,97,108,108,121,116,111
+,32,102,111,114,109,32,116,104,101,98,117,116,32,105,110,115,116,101,97,100,99,
+111,114,112,111,114,97,116,105,111,110,97,116,116,97,99,104,101,100,32,116,111,
+105,115,32,99,111,109,109,111,110,108,121,114,101,97,115,111,110,115,32,102,111,
+114,32,38,113,117,111,116,59,116,104,101,32,99,97,110,32,98,101,32,109,97,100,
+101,119,97,115,32,97,98,108,101,32,116,111,119,104,105,99,104,32,109,101,97,110,
+115,98,117,116,32,100,105,100,32,110,111,116,111,110,77,111,117,115,101,79,118,
+101,114,97,115,32,112,111,115,115,105,98,108,101,111,112,101,114,97,116,101,100,
+32,98,121,99,111,109,105,110,103,32,102,114,111,109,116,104,101,32,112,114,105,
+109,97,114,121,97,100,100,105,116,105,111,110,32,111,102,102,111,114,32,115,101,
+118,101,114,97,108,116,114,97,110,115,102,101,114,114,101,100,97,32,112,101,114,
+105,111,100,32,111,102,97,114,101,32,97,98,108,101,32,116,111,104,111,119,101,
+118,101,114,44,32,105,116,115,104,111,117,108,100,32,104,97,118,101,109,117,99,
+104,32,108,97,114,103,101,114,10,9,60,47,115,99,114,105,112,116,62,97,100,111,
+112,116,101,100,32,116,104,101,112,114,111,112,101,114,116,121,32,111,102,100,
+105,114,101,99,116,101,100,32,98,121,101,102,102,101,99,116,105,118,101,108,121,
+119,97,115,32,98,114,111,117,103,104,116,99,104,105,108,100,114,101,110,32,111,
+102,80,114,111,103,114,97,109,109,105,110,103,108,111,110,103,101,114,32,116,104
+,97,110,109,97,110,117,115,99,114,105,112,116,115,119,97,114,32,97,103,97,105,
+110,115,116,98,121,32,109,101,97,110,115,32,111,102,97,110,100,32,109,111,115,
+116,32,111,102,115,105,109,105,108,97,114,32,116,111,32,112,114,111,112,114,105,
+101,116,97,114,121,111,114,105,103,105,110,97,116,105,110,103,112,114,101,115,
+116,105,103,105,111,117,115,103,114,97,109,109,97,116,105,99,97,108,101,120,112,
+101,114,105,101,110,99,101,46,116,111,32,109,97,107,101,32,116,104,101,73,116,32
+,119,97,115,32,97,108,115,111,105,115,32,102,111,117,110,100,32,105,110,99,111,
+109,112,101,116,105,116,111,114,115,105,110,32,116,104,101,32,85,46,83,46,114,
+101,112,108,97,99,101,32,116,104,101,98,114,111,117,103,104,116,32,116,104,101,
+99,97,108,99,117,108,97,116,105,111,110,102,97,108,108,32,111,102,32,116,104,101
+,116,104,101,32,103,101,110,101,114,97,108,112,114,97,99,116,105,99,97,108,108,
+121,105,110,32,104,111,110,111,114,32,111,102,114,101,108,101,97,115,101,100,32,
+105,110,114,101,115,105,100,101,110,116,105,97,108,97,110,100,32,115,111,109,101
+,32,111,102,107,105,110,103,32,111,102,32,116,104,101,114,101,97,99,116,105,111,
+110,32,116,111,49,115,116,32,69,97,114,108,32,111,102,99,117,108,116,117,114,101
+,32,97,110,100,112,114,105,110,99,105,112,97,108,108,121,60,47,116,105,116,108,
+101,62,10,32,32,116,104,101,121,32,99,97,110,32,98,101,98,97,99,107,32,116,111,
+32,116,104,101,115,111,109,101,32,111,102,32,104,105,115,101,120,112,111,115,117
+,114,101,32,116,111,97,114,101,32,115,105,109,105,108,97,114,102,111,114,109,32,
+111,102,32,116,104,101,97,100,100,70,97,118,111,114,105,116,101,99,105,116,105,
+122,101,110,115,104,105,112,112,97,114,116,32,105,110,32,116,104,101,112,101,111
+,112,108,101,32,119,105,116,104,105,110,32,112,114,97,99,116,105,99,101,116,111,
+32,99,111,110,116,105,110,117,101,38,97,109,112,59,109,105,110,117,115,59,97,112
+,112,114,111,118,101,100,32,98,121,32,116,104,101,32,102,105,114,115,116,32,97,
+108,108,111,119,101,100,32,116,104,101,97,110,100,32,102,111,114,32,116,104,101,
+102,117,110,99,116,105,111,110,105,110,103,112,108,97,121,105,110,103,32,116,104
+,101,115,111,108,117,116,105,111,110,32,116,111,104,101,105,103,104,116,61,34,48
+,34,32,105,110,32,104,105,115,32,98,111,111,107,109,111,114,101,32,116,104,97,
+110,32,97,102,111,108,108,111,119,115,32,116,104,101,99,114,101,97,116,101,100,
+32,116,104,101,112,114,101,115,101,110,99,101,32,105,110,38,110,98,115,112,59,60
+,47,116,100,62,110,97,116,105,111,110,97,108,105,115,116,116,104,101,32,105,100,
+101,97,32,111,102,97,32,99,104,97,114,97,99,116,101,114,119,101,114,101,32,102,
+111,114,99,101,100,32,99,108,97,115,115,61,34,98,116,110,100,97,121,115,32,111,
+102,32,116,104,101,102,101,97,116,117,114,101,100,32,105,110,115,104,111,119,105
+,110,103,32,116,104,101,105,110,116,101,114,101,115,116,32,105,110,105,110,32,
+112,108,97,99,101,32,111,102,116,117,114,110,32,111,102,32,116,104,101,116,104,
+101,32,104,101,97,100,32,111,102,76,111,114,100,32,111,102,32,116,104,101,112,
+111,108,105,116,105,99,97,108,108,121,104,97,115,32,105,116,115,32,111,119,110,
+69,100,117,99,97,116,105,111,110,97,108,97,112,112,114,111,118,97,108,32,111,102
+,115,111,109,101,32,111,102,32,116,104,101,101,97,99,104,32,111,116,104,101,114,
+44,98,101,104,97,118,105,111,114,32,111,102,97,110,100,32,98,101,99,97,117,115,
+101,97,110,100,32,97,110,111,116,104,101,114,97,112,112,101,97,114,101,100,32,
+111,110,114,101,99,111,114,100,101,100,32,105,110,98,108,97,99,107,38,113,117,
+111,116,59,109,97,121,32,105,110,99,108,117,100,101,116,104,101,32,119,111,114,
+108,100,39,115,99,97,110,32,108,101,97,100,32,116,111,114,101,102,101,114,115,32
+,116,111,32,97,98,111,114,100,101,114,61,34,48,34,32,103,111,118,101,114,110,109
+,101,110,116,32,119,105,110,110,105,110,103,32,116,104,101,114,101,115,117,108,
+116,101,100,32,105,110,32,119,104,105,108,101,32,116,104,101,32,87,97,115,104,
+105,110,103,116,111,110,44,116,104,101,32,115,117,98,106,101,99,116,99,105,116,
+121,32,105,110,32,116,104,101,62,60,47,100,105,118,62,13,10,9,9,114,101,102,108,
+101,99,116,32,116,104,101,116,111,32,99,111,109,112,108,101,116,101,98,101,99,97
+,109,101,32,109,111,114,101,114,97,100,105,111,97,99,116,105,118,101,114,101,106
+,101,99,116,101,100,32,98,121,119,105,116,104,111,117,116,32,97,110,121,104,105,
+115,32,102,97,116,104,101,114,44,119,104,105,99,104,32,99,111,117,108,100,99,111
+,112,121,32,111,102,32,116,104,101,116,111,32,105,110,100,105,99,97,116,101,97,
+32,112,111,108,105,116,105,99,97,108,97,99,99,111,117,110,116,115,32,111,102,99,
+111,110,115,116,105,116,117,116,101,115,119,111,114,107,101,100,32,119,105,116,
+104,101,114,60,47,97,62,60,47,108,105,62,111,102,32,104,105,115,32,108,105,102,
+101,97,99,99,111,109,112,97,110,105,101,100,99,108,105,101,110,116,87,105,100,
+116,104,112,114,101,118,101,110,116,32,116,104,101,76,101,103,105,115,108,97,116
+,105,118,101,100,105,102,102,101,114,101,110,116,108,121,116,111,103,101,116,104
+,101,114,32,105,110,104,97,115,32,115,101,118,101,114,97,108,102,111,114,32,97,
+110,111,116,104,101,114,116,101,120,116,32,111,102,32,116,104,101,102,111,117,
+110,100,101,100,32,116,104,101,101,32,119,105,116,104,32,116,104,101,32,105,115,
+32,117,115,101,100,32,102,111,114,99,104,97,110,103,101,100,32,116,104,101,117,
+115,117,97,108,108,121,32,116,104,101,112,108,97,99,101,32,119,104,101,114,101,
+119,104,101,114,101,97,115,32,116,104,101,62,32,60,97,32,104,114,101,102,61,34,
+34,62,60,97,32,104,114,101,102,61,34,116,104,101,109,115,101,108,118,101,115,44,
+97,108,116,104,111,117,103,104,32,104,101,116,104,97,116,32,99,97,110,32,98,101,
+116,114,97,100,105,116,105,111,110,97,108,114,111,108,101,32,111,102,32,116,104,
+101,97,115,32,97,32,114,101,115,117,108,116,114,101,109,111,118,101,67,104,105,
+108,100,100,101,115,105,103,110,101,100,32,98,121,119,101,115,116,32,111,102,32,
+116,104,101,83,111,109,101,32,112,101,111,112,108,101,112,114,111,100,117,99,116
+,105,111,110,44,115,105,100,101,32,111,102,32,116,104,101,110,101,119,115,108,
+101,116,116,101,114,115,117,115,101,100,32,98,121,32,116,104,101,100,111,119,110
+,32,116,111,32,116,104,101,97,99,99,101,112,116,101,100,32,98,121,108,105,118,
+101,32,105,110,32,116,104,101,97,116,116,101,109,112,116,115,32,116,111,111,117,
+116,115,105,100,101,32,116,104,101,102,114,101,113,117,101,110,99,105,101,115,72
+,111,119,101,118,101,114,44,32,105,110,112,114,111,103,114,97,109,109,101,114,
+115,97,116,32,108,101,97,115,116,32,105,110,97,112,112,114,111,120,105,109,97,
+116,101,97,108,116,104,111,117,103,104,32,105,116,119,97,115,32,112,97,114,116,
+32,111,102,97,110,100,32,118,97,114,105,111,117,115,71,111,118,101,114,110,111,
+114,32,111,102,116,104,101,32,97,114,116,105,99,108,101,116,117,114,110,101,100,
+32,105,110,116,111,62,60,97,32,104,114,101,102,61,34,47,116,104,101,32,101,99,
+111,110,111,109,121,105,115,32,116,104,101,32,109,111,115,116,109,111,115,116,32
+,119,105,100,101,108,121,119,111,117,108,100,32,108,97,116,101,114,97,110,100,32
+,112,101,114,104,97,112,115,114,105,115,101,32,116,111,32,116,104,101,111,99,99,
+117,114,115,32,119,104,101,110,117,110,100,101,114,32,119,104,105,99,104,99,111,
+110,100,105,116,105,111,110,115,46,116,104,101,32,119,101,115,116,101,114,110,
+116,104,101,111,114,121,32,116,104,97,116,105,115,32,112,114,111,100,117,99,101,
+100,116,104,101,32,99,105,116,121,32,111,102,105,110,32,119,104,105,99,104,32,
+104,101,115,101,101,110,32,105,110,32,116,104,101,116,104,101,32,99,101,110,116,
+114,97,108,98,117,105,108,100,105,110,103,32,111,102,109,97,110,121,32,111,102,
+32,104,105,115,97,114,101,97,32,111,102,32,116,104,101,105,115,32,116,104,101,32
+,111,110,108,121,109,111,115,116,32,111,102,32,116,104,101,109,97,110,121,32,111
+,102,32,116,104,101,116,104,101,32,87,101,115,116,101,114,110,84,104,101,114,101
+,32,105,115,32,110,111,101,120,116,101,110,100,101,100,32,116,111,83,116,97,116,
+105,115,116,105,99,97,108,99,111,108,115,112,97,110,61,50,32,124,115,104,111,114
+,116,32,115,116,111,114,121,112,111,115,115,105,98,108,101,32,116,111,116,111,
+112,111,108,111,103,105,99,97,108,99,114,105,116,105,99,97,108,32,111,102,114,
+101,112,111,114,116,101,100,32,116,111,97,32,67,104,114,105,115,116,105,97,110,
+100,101,99,105,115,105,111,110,32,116,111,105,115,32,101,113,117,97,108,32,116,
+111,112,114,111,98,108,101,109,115,32,111,102,84,104,105,115,32,99,97,110,32,98,
+101,109,101,114,99,104,97,110,100,105,115,101,102,111,114,32,109,111,115,116,32,
+111,102,110,111,32,101,118,105,100,101,110,99,101,101,100,105,116,105,111,110,
+115,32,111,102,101,108,101,109,101,110,116,115,32,105,110,38,113,117,111,116,59,
+46,32,84,104,101,99,111,109,47,105,109,97,103,101,115,47,119,104,105,99,104,32,
+109,97,107,101,115,116,104,101,32,112,114,111,99,101,115,115,114,101,109,97,105,
+110,115,32,116,104,101,108,105,116,101,114,97,116,117,114,101,44,105,115,32,97,
+32,109,101,109,98,101,114,116,104,101,32,112,111,112,117,108,97,114,116,104,101,
+32,97,110,99,105,101,110,116,112,114,111,98,108,101,109,115,32,105,110,116,105,
+109,101,32,111,102,32,116,104,101,100,101,102,101,97,116,101,100,32,98,121,98,
+111,100,121,32,111,102,32,116,104,101,97,32,102,101,119,32,121,101,97,114,115,
+109,117,99,104,32,111,102,32,116,104,101,116,104,101,32,119,111,114,107,32,111,
+102,67,97,108,105,102,111,114,110,105,97,44,115,101,114,118,101,100,32,97,115,32
+,97,103,111,118,101,114,110,109,101,110,116,46,99,111,110,99,101,112,116,115,32,
+111,102,109,111,118,101,109,101,110,116,32,105,110,9,9,60,100,105,118,32,105,100
+,61,34,105,116,34,32,118,97,108,117,101,61,34,108,97,110,103,117,97,103,101,32,
+111,102,97,115,32,116,104,101,121,32,97,114,101,112,114,111,100,117,99,101,100,
+32,105,110,105,115,32,116,104,97,116,32,116,104,101,101,120,112,108,97,105,110,
+32,116,104,101,100,105,118,62,60,47,100,105,118,62,10,72,111,119,101,118,101,114
+,32,116,104,101,108,101,97,100,32,116,111,32,116,104,101,9,60,97,32,104,114,101,
+102,61,34,47,119,97,115,32,103,114,97,110,116,101,100,112,101,111,112,108,101,32
+,104,97,118,101,99,111,110,116,105,110,117,97,108,108,121,119,97,115,32,115,101,
+101,110,32,97,115,97,110,100,32,114,101,108,97,116,101,100,116,104,101,32,114,
+111,108,101,32,111,102,112,114,111,112,111,115,101,100,32,98,121,111,102,32,116,
+104,101,32,98,101,115,116,101,97,99,104,32,111,116,104,101,114,46,67,111,110,115
+,116,97,110,116,105,110,101,112,101,111,112,108,101,32,102,114,111,109,100,105,
+97,108,101,99,116,115,32,111,102,116,111,32,114,101,118,105,115,105,111,110,119,
+97,115,32,114,101,110,97,109,101,100,97,32,115,111,117,114,99,101,32,111,102,116
+,104,101,32,105,110,105,116,105,97,108,108,97,117,110,99,104,101,100,32,105,110,
+112,114,111,118,105,100,101,32,116,104,101,116,111,32,116,104,101,32,119,101,115
+,116,119,104,101,114,101,32,116,104,101,114,101,97,110,100,32,115,105,109,105,
+108,97,114,98,101,116,119,101,101,110,32,116,119,111,105,115,32,97,108,115,111,
+32,116,104,101,69,110,103,108,105,115,104,32,97,110,100,99,111,110,100,105,116,
+105,111,110,115,44,116,104,97,116,32,105,116,32,119,97,115,101,110,116,105,116,
+108,101,100,32,116,111,116,104,101,109,115,101,108,118,101,115,46,113,117,97,110
+,116,105,116,121,32,111,102,114,97,110,115,112,97,114,101,110,99,121,116,104,101
+,32,115,97,109,101,32,97,115,116,111,32,106,111,105,110,32,116,104,101,99,111,
+117,110,116,114,121,32,97,110,100,116,104,105,115,32,105,115,32,116,104,101,84,
+104,105,115,32,108,101,100,32,116,111,97,32,115,116,97,116,101,109,101,110,116,
+99,111,110,116,114,97,115,116,32,116,111,108,97,115,116,73,110,100,101,120,79,
+102,116,104,114,111,117,103,104,32,104,105,115,105,115,32,100,101,115,105,103,
+110,101,100,116,104,101,32,116,101,114,109,32,105,115,105,115,32,112,114,111,118
+,105,100,101,100,112,114,111,116,101,99,116,32,116,104,101,110,103,60,47,97,62,
+60,47,108,105,62,84,104,101,32,99,117,114,114,101,110,116,116,104,101,32,115,105
+,116,101,32,111,102,115,117,98,115,116,97,110,116,105,97,108,101,120,112,101,114
+,105,101,110,99,101,44,105,110,32,116,104,101,32,87,101,115,116,116,104,101,121,
+32,115,104,111,117,108,100,115,108,111,118,101,110,196,141,105,110,97,99,111,109
+,101,110,116,97,114,105,111,115,117,110,105,118,101,114,115,105,100,97,100,99,
+111,110,100,105,99,105,111,110,101,115,97,99,116,105,118,105,100,97,100,101,115,
+101,120,112,101,114,105,101,110,99,105,97,116,101,99,110,111,108,111,103,195,173
+,97,112,114,111,100,117,99,99,105,195,179,110,112,117,110,116,117,97,99,105,195,
+179,110,97,112,108,105,99,97,99,105,195,179,110,99,111,110,116,114,97,115,101,
+195,177,97,99,97,116,101,103,111,114,195,173,97,115,114,101,103,105,115,116,114,
+97,114,115,101,112,114,111,102,101,115,105,111,110,97,108,116,114,97,116,97,109,
+105,101,110,116,111,114,101,103,195,173,115,116,114,97,116,101,115,101,99,114,
+101,116,97,114,195,173,97,112,114,105,110,99,105,112,97,108,101,115,112,114,111,
+116,101,99,99,105,195,179,110,105,109,112,111,114,116,97,110,116,101,115,105,109
+,112,111,114,116,97,110,99,105,97,112,111,115,105,98,105,108,105,100,97,100,105,
+110,116,101,114,101,115,97,110,116,101,99,114,101,99,105,109,105,101,110,116,111
+,110,101,99,101,115,105,100,97,100,101,115,115,117,115,99,114,105,98,105,114,115
+,101,97,115,111,99,105,97,99,105,195,179,110,100,105,115,112,111,110,105,98,108,
+101,115,101,118,97,108,117,97,99,105,195,179,110,101,115,116,117,100,105,97,110,
+116,101,115,114,101,115,112,111,110,115,97,98,108,101,114,101,115,111,108,117,99
+,105,195,179,110,103,117,97,100,97,108,97,106,97,114,97,114,101,103,105,115,116,
+114,97,100,111,115,111,112,111,114,116,117,110,105,100,97,100,99,111,109,101,114
+,99,105,97,108,101,115,102,111,116,111,103,114,97,102,195,173,97,97,117,116,111,
+114,105,100,97,100,101,115,105,110,103,101,110,105,101,114,195,173,97,116,101,
+108,101,118,105,115,105,195,179,110,99,111,109,112,101,116,101,110,99,105,97,111
+,112,101,114,97,99,105,111,110,101,115,101,115,116,97,98,108,101,99,105,100,111,
+115,105,109,112,108,101,109,101,110,116,101,97,99,116,117,97,108,109,101,110,116
+,101,110,97,118,101,103,97,99,105,195,179,110,99,111,110,102,111,114,109,105,100
+,97,100,108,105,110,101,45,104,101,105,103,104,116,58,102,111,110,116,45,102,97,
+109,105,108,121,58,34,32,58,32,34,104,116,116,112,58,47,47,97,112,112,108,105,99
+,97,116,105,111,110,115,108,105,110,107,34,32,104,114,101,102,61,34,115,112,101,
+99,105,102,105,99,97,108,108,121,47,47,60,33,91,67,68,65,84,65,91,10,79,114,103,
+97,110,105,122,97,116,105,111,110,100,105,115,116,114,105,98,117,116,105,111,110
+,48,112,120,59,32,104,101,105,103,104,116,58,114,101,108,97,116,105,111,110,115,
+104,105,112,100,101,118,105,99,101,45,119,105,100,116,104,60,100,105,118,32,99,
+108,97,115,115,61,34,60,108,97,98,101,108,32,102,111,114,61,34,114,101,103,105,
+115,116,114,97,116,105,111,110,60,47,110,111,115,99,114,105,112,116,62,10,47,105
+,110,100,101,120,46,104,116,109,108,34,119,105,110,100,111,119,46,111,112,101,
+110,40,32,33,105,109,112,111,114,116,97,110,116,59,97,112,112,108,105,99,97,116,
+105,111,110,47,105,110,100,101,112,101,110,100,101,110,99,101,47,47,119,119,119,
+46,103,111,111,103,108,101,111,114,103,97,110,105,122,97,116,105,111,110,97,117,
+116,111,99,111,109,112,108,101,116,101,114,101,113,117,105,114,101,109,101,110,
+116,115,99,111,110,115,101,114,118,97,116,105,118,101,60,102,111,114,109,32,110,
+97,109,101,61,34,105,110,116,101,108,108,101,99,116,117,97,108,109,97,114,103,
+105,110,45,108,101,102,116,58,49,56,116,104,32,99,101,110,116,117,114,121,97,110
+,32,105,109,112,111,114,116,97,110,116,105,110,115,116,105,116,117,116,105,111,
+110,115,97,98,98,114,101,118,105,97,116,105,111,110,60,105,109,103,32,99,108,97,
+115,115,61,34,111,114,103,97,110,105,115,97,116,105,111,110,99,105,118,105,108,
+105,122,97,116,105,111,110,49,57,116,104,32,99,101,110,116,117,114,121,97,114,99
+,104,105,116,101,99,116,117,114,101,105,110,99,111,114,112,111,114,97,116,101,
+100,50,48,116,104,32,99,101,110,116,117,114,121,45,99,111,110,116,97,105,110,101
+,114,34,62,109,111,115,116,32,110,111,116,97,98,108,121,47,62,60,47,97,62,60,47,
+100,105,118,62,110,111,116,105,102,105,99,97,116,105,111,110,39,117,110,100,101,
+102,105,110,101,100,39,41,70,117,114,116,104,101,114,109,111,114,101,44,98,101,
+108,105,101,118,101,32,116,104,97,116,105,110,110,101,114,72,84,77,76,32,61,32,
+112,114,105,111,114,32,116,111,32,116,104,101,100,114,97,109,97,116,105,99,97,
+108,108,121,114,101,102,101,114,114,105,110,103,32,116,111,110,101,103,111,116,
+105,97,116,105,111,110,115,104,101,97,100,113,117,97,114,116,101,114,115,83,111,
+117,116,104,32,65,102,114,105,99,97,117,110,115,117,99,99,101,115,115,102,117,
+108,80,101,110,110,115,121,108,118,97,110,105,97,65,115,32,97,32,114,101,115,117
+,108,116,44,60,104,116,109,108,32,108,97,110,103,61,34,38,108,116,59,47,115,117,
+112,38,103,116,59,100,101,97,108,105,110,103,32,119,105,116,104,112,104,105,108,
+97,100,101,108,112,104,105,97,104,105,115,116,111,114,105,99,97,108,108,121,41,
+59,60,47,115,99,114,105,112,116,62,10,112,97,100,100,105,110,103,45,116,111,112,
+58,101,120,112,101,114,105,109,101,110,116,97,108,103,101,116,65,116,116,114,105
+,98,117,116,101,105,110,115,116,114,117,99,116,105,111,110,115,116,101,99,104,
+110,111,108,111,103,105,101,115,112,97,114,116,32,111,102,32,116,104,101,32,61,
+102,117,110,99,116,105,111,110,40,41,123,115,117,98,115,99,114,105,112,116,105,
+111,110,108,46,100,116,100,34,62,13,10,60,104,116,103,101,111,103,114,97,112,104
+,105,99,97,108,67,111,110,115,116,105,116,117,116,105,111,110,39,44,32,102,117,
+110,99,116,105,111,110,40,115,117,112,112,111,114,116,101,100,32,98,121,97,103,
+114,105,99,117,108,116,117,114,97,108,99,111,110,115,116,114,117,99,116,105,111,
+110,112,117,98,108,105,99,97,116,105,111,110,115,102,111,110,116,45,115,105,122,
+101,58,32,49,97,32,118,97,114,105,101,116,121,32,111,102,60,100,105,118,32,115,
+116,121,108,101,61,34,69,110,99,121,99,108,111,112,101,100,105,97,105,102,114,97
+,109,101,32,115,114,99,61,34,100,101,109,111,110,115,116,114,97,116,101,100,97,
+99,99,111,109,112,108,105,115,104,101,100,117,110,105,118,101,114,115,105,116,
+105,101,115,68,101,109,111,103,114,97,112,104,105,99,115,41,59,60,47,115,99,114,
+105,112,116,62,60,100,101,100,105,99,97,116,101,100,32,116,111,107,110,111,119,
+108,101,100,103,101,32,111,102,115,97,116,105,115,102,97,99,116,105,111,110,112,
+97,114,116,105,99,117,108,97,114,108,121,60,47,100,105,118,62,60,47,100,105,118,
+62,69,110,103,108,105,115,104,32,40,85,83,41,97,112,112,101,110,100,67,104,105,
+108,100,40,116,114,97,110,115,109,105,115,115,105,111,110,115,46,32,72,111,119,
+101,118,101,114,44,32,105,110,116,101,108,108,105,103,101,110,99,101,34,32,116,
+97,98,105,110,100,101,120,61,34,102,108,111,97,116,58,114,105,103,104,116,59,67,
+111,109,109,111,110,119,101,97,108,116,104,114,97,110,103,105,110,103,32,102,114
+,111,109,105,110,32,119,104,105,99,104,32,116,104,101,97,116,32,108,101,97,115,
+116,32,111,110,101,114,101,112,114,111,100,117,99,116,105,111,110,101,110,99,121
+,99,108,111,112,101,100,105,97,59,102,111,110,116,45,115,105,122,101,58,49,106,
+117,114,105,115,100,105,99,116,105,111,110,97,116,32,116,104,97,116,32,116,105,
+109,101,34,62,60,97,32,99,108,97,115,115,61,34,73,110,32,97,100,100,105,116,105,
+111,110,44,100,101,115,99,114,105,112,116,105,111,110,43,99,111,110,118,101,114,
+115,97,116,105,111,110,99,111,110,116,97,99,116,32,119,105,116,104,105,115,32,
+103,101,110,101,114,97,108,108,121,114,34,32,99,111,110,116,101,110,116,61,34,
+114,101,112,114,101,115,101,110,116,105,110,103,38,108,116,59,109,97,116,104,38,
+103,116,59,112,114,101,115,101,110,116,97,116,105,111,110,111,99,99,97,115,105,
+111,110,97,108,108,121,60,105,109,103,32,119,105,100,116,104,61,34,110,97,118,
+105,103,97,116,105,111,110,34,62,99,111,109,112,101,110,115,97,116,105,111,110,
+99,104,97,109,112,105,111,110,115,104,105,112,109,101,100,105,97,61,34,97,108,
+108,34,32,118,105,111,108,97,116,105,111,110,32,111,102,114,101,102,101,114,101,
+110,99,101,32,116,111,114,101,116,117,114,110,32,116,114,117,101,59,83,116,114,
+105,99,116,47,47,69,78,34,32,116,114,97,110,115,97,99,116,105,111,110,115,105,
+110,116,101,114,118,101,110,116,105,111,110,118,101,114,105,102,105,99,97,116,
+105,111,110,73,110,102,111,114,109,97,116,105,111,110,32,100,105,102,102,105,99,
+117,108,116,105,101,115,67,104,97,109,112,105,111,110,115,104,105,112,99,97,112,
+97,98,105,108,105,116,105,101,115,60,33,91,101,110,100,105,102,93,45,45,62,125,
+10,60,47,115,99,114,105,112,116,62,10,67,104,114,105,115,116,105,97,110,105,116,
+121,102,111,114,32,101,120,97,109,112,108,101,44,80,114,111,102,101,115,115,105,
+111,110,97,108,114,101,115,116,114,105,99,116,105,111,110,115,115,117,103,103,
+101,115,116,32,116,104,97,116,119,97,115,32,114,101,108,101,97,115,101,100,40,
+115,117,99,104,32,97,115,32,116,104,101,114,101,109,111,118,101,67,108,97,115,
+115,40,117,110,101,109,112,108,111,121,109,101,110,116,116,104,101,32,65,109,101
+,114,105,99,97,110,115,116,114,117,99,116,117,114,101,32,111,102,47,105,110,100,
+101,120,46,104,116,109,108,32,112,117,98,108,105,115,104,101,100,32,105,110,115,
+112,97,110,32,99,108,97,115,115,61,34,34,62,60,97,32,104,114,101,102,61,34,47,
+105,110,116,114,111,100,117,99,116,105,111,110,98,101,108,111,110,103,105,110,
+103,32,116,111,99,108,97,105,109,101,100,32,116,104,97,116,99,111,110,115,101,
+113,117,101,110,99,101,115,60,109,101,116,97,32,110,97,109,101,61,34,71,117,105,
+100,101,32,116,111,32,116,104,101,111,118,101,114,119,104,101,108,109,105,110,
+103,97,103,97,105,110,115,116,32,116,104,101,32,99,111,110,99,101,110,116,114,97
+,116,101,100,44,10,46,110,111,110,116,111,117,99,104,32,111,98,115,101,114,118,
+97,116,105,111,110,115,60,47,97,62,10,60,47,100,105,118,62,10,102,32,40,100,111,
+99,117,109,101,110,116,46,98,111,114,100,101,114,58,32,49,112,120,32,123,102,111
+,110,116,45,115,105,122,101,58,49,116,114,101,97,116,109,101,110,116,32,111,102,
+48,34,32,104,101,105,103,104,116,61,34,49,109,111,100,105,102,105,99,97,116,105,
+111,110,73,110,100,101,112,101,110,100,101,110,99,101,100,105,118,105,100,101,
+100,32,105,110,116,111,103,114,101,97,116,101,114,32,116,104,97,110,97,99,104,
+105,101,118,101,109,101,110,116,115,101,115,116,97,98,108,105,115,104,105,110,
+103,74,97,118,97,83,99,114,105,112,116,34,32,110,101,118,101,114,116,104,101,108
+,101,115,115,115,105,103,110,105,102,105,99,97,110,99,101,66,114,111,97,100,99,
+97,115,116,105,110,103,62,38,110,98,115,112,59,60,47,116,100,62,99,111,110,116,
+97,105,110,101,114,34,62,10,115,117,99,104,32,97,115,32,116,104,101,32,105,110,
+102,108,117,101,110,99,101,32,111,102,97,32,112,97,114,116,105,99,117,108,97,114
+,115,114,99,61,39,104,116,116,112,58,47,47,110,97,118,105,103,97,116,105,111,110
+,34,32,104,97,108,102,32,111,102,32,116,104,101,32,115,117,98,115,116,97,110,116
+,105,97,108,32,38,110,98,115,112,59,60,47,100,105,118,62,97,100,118,97,110,116,
+97,103,101,32,111,102,100,105,115,99,111,118,101,114,121,32,111,102,102,117,110,
+100,97,109,101,110,116,97,108,32,109,101,116,114,111,112,111,108,105,116,97,110,
+116,104,101,32,111,112,112,111,115,105,116,101,34,32,120,109,108,58,108,97,110,
+103,61,34,100,101,108,105,98,101,114,97,116,101,108,121,97,108,105,103,110,61,99
+,101,110,116,101,114,101,118,111,108,117,116,105,111,110,32,111,102,112,114,101,
+115,101,114,118,97,116,105,111,110,105,109,112,114,111,118,101,109,101,110,116,
+115,98,101,103,105,110,110,105,110,103,32,105,110,74,101,115,117,115,32,67,104,
+114,105,115,116,80,117,98,108,105,99,97,116,105,111,110,115,100,105,115,97,103,
+114,101,101,109,101,110,116,116,101,120,116,45,97,108,105,103,110,58,114,44,32,
+102,117,110,99,116,105,111,110,40,41,115,105,109,105,108,97,114,105,116,105,101,
+115,98,111,100,121,62,60,47,104,116,109,108,62,105,115,32,99,117,114,114,101,110
+,116,108,121,97,108,112,104,97,98,101,116,105,99,97,108,105,115,32,115,111,109,
+101,116,105,109,101,115,116,121,112,101,61,34,105,109,97,103,101,47,109,97,110,
+121,32,111,102,32,116,104,101,32,102,108,111,119,58,104,105,100,100,101,110,59,
+97,118,97,105,108,97,98,108,101,32,105,110,100,101,115,99,114,105,98,101,32,116,
+104,101,101,120,105,115,116,101,110,99,101,32,111,102,97,108,108,32,111,118,101,
+114,32,116,104,101,116,104,101,32,73,110,116,101,114,110,101,116,9,60,117,108,32
+,99,108,97,115,115,61,34,105,110,115,116,97,108,108,97,116,105,111,110,110,101,
+105,103,104,98,111,114,104,111,111,100,97,114,109,101,100,32,102,111,114,99,101,
+115,114,101,100,117,99,105,110,103,32,116,104,101,99,111,110,116,105,110,117,101
+,115,32,116,111,78,111,110,101,116,104,101,108,101,115,115,44,116,101,109,112,
+101,114,97,116,117,114,101,115,10,9,9,60,97,32,104,114,101,102,61,34,99,108,111,
+115,101,32,116,111,32,116,104,101,101,120,97,109,112,108,101,115,32,111,102,32,
+105,115,32,97,98,111,117,116,32,116,104,101,40,115,101,101,32,98,101,108,111,119
+,41,46,34,32,105,100,61,34,115,101,97,114,99,104,112,114,111,102,101,115,115,105
+,111,110,97,108,105,115,32,97,118,97,105,108,97,98,108,101,116,104,101,32,111,
+102,102,105,99,105,97,108,9,9,60,47,115,99,114,105,112,116,62,10,10,9,9,60,100,
+105,118,32,105,100,61,34,97,99,99,101,108,101,114,97,116,105,111,110,116,104,114
+,111,117,103,104,32,116,104,101,32,72,97,108,108,32,111,102,32,70,97,109,101,100
+,101,115,99,114,105,112,116,105,111,110,115,116,114,97,110,115,108,97,116,105,
+111,110,115,105,110,116,101,114,102,101,114,101,110,99,101,32,116,121,112,101,61
+,39,116,101,120,116,47,114,101,99,101,110,116,32,121,101,97,114,115,105,110,32,
+116,104,101,32,119,111,114,108,100,118,101,114,121,32,112,111,112,117,108,97,114
+,123,98,97,99,107,103,114,111,117,110,100,58,116,114,97,100,105,116,105,111,110,
+97,108,32,115,111,109,101,32,111,102,32,116,104,101,32,99,111,110,110,101,99,116
+,101,100,32,116,111,101,120,112,108,111,105,116,97,116,105,111,110,101,109,101,
+114,103,101,110,99,101,32,111,102,99,111,110,115,116,105,116,117,116,105,111,110
+,65,32,72,105,115,116,111,114,121,32,111,102,115,105,103,110,105,102,105,99,97,
+110,116,32,109,97,110,117,102,97,99,116,117,114,101,100,101,120,112,101,99,116,
+97,116,105,111,110,115,62,60,110,111,115,99,114,105,112,116,62,60,99,97,110,32,
+98,101,32,102,111,117,110,100,98,101,99,97,117,115,101,32,116,104,101,32,104,97,
+115,32,110,111,116,32,98,101,101,110,110,101,105,103,104,98,111,117,114,105,110,
+103,119,105,116,104,111,117,116,32,116,104,101,32,97,100,100,101,100,32,116,111,
+32,116,104,101,9,60,108,105,32,99,108,97,115,115,61,34,105,110,115,116,114,117,
+109,101,110,116,97,108,83,111,118,105,101,116,32,85,110,105,111,110,97,99,107,
+110,111,119,108,101,100,103,101,100,119,104,105,99,104,32,99,97,110,32,98,101,
+110,97,109,101,32,102,111,114,32,116,104,101,97,116,116,101,110,116,105,111,110,
+32,116,111,97,116,116,101,109,112,116,115,32,116,111,32,100,101,118,101,108,111,
+112,109,101,110,116,115,73,110,32,102,97,99,116,44,32,116,104,101,60,108,105,32,
+99,108,97,115,115,61,34,97,105,109,112,108,105,99,97,116,105,111,110,115,115,117
+,105,116,97,98,108,101,32,102,111,114,109,117,99,104,32,111,102,32,116,104,101,
+32,99,111,108,111,110,105,122,97,116,105,111,110,112,114,101,115,105,100,101,110
+,116,105,97,108,99,97,110,99,101,108,66,117,98,98,108,101,32,73,110,102,111,114,
+109,97,116,105,111,110,109,111,115,116,32,111,102,32,116,104,101,32,105,115,32,
+100,101,115,99,114,105,98,101,100,114,101,115,116,32,111,102,32,116,104,101,32,
+109,111,114,101,32,111,114,32,108,101,115,115,105,110,32,83,101,112,116,101,109,
+98,101,114,73,110,116,101,108,108,105,103,101,110,99,101,115,114,99,61,34,104,
+116,116,112,58,47,47,112,120,59,32,104,101,105,103,104,116,58,32,97,118,97,105,
+108,97,98,108,101,32,116,111,109,97,110,117,102,97,99,116,117,114,101,114,104,
+117,109,97,110,32,114,105,103,104,116,115,108,105,110,107,32,104,114,101,102,61,
+34,47,97,118,97,105,108,97,98,105,108,105,116,121,112,114,111,112,111,114,116,
+105,111,110,97,108,111,117,116,115,105,100,101,32,116,104,101,32,97,115,116,114,
+111,110,111,109,105,99,97,108,104,117,109,97,110,32,98,101,105,110,103,115,110,
+97,109,101,32,111,102,32,116,104,101,32,97,114,101,32,102,111,117,110,100,32,105
+,110,97,114,101,32,98,97,115,101,100,32,111,110,115,109,97,108,108,101,114,32,
+116,104,97,110,97,32,112,101,114,115,111,110,32,119,104,111,101,120,112,97,110,
+115,105,111,110,32,111,102,97,114,103,117,105,110,103,32,116,104,97,116,110,111,
+119,32,107,110,111,119,110,32,97,115,73,110,32,116,104,101,32,101,97,114,108,121
+,105,110,116,101,114,109,101,100,105,97,116,101,100,101,114,105,118,101,100,32,
+102,114,111,109,83,99,97,110,100,105,110,97,118,105,97,110,60,47,97,62,60,47,100
+,105,118,62,13,10,99,111,110,115,105,100,101,114,32,116,104,101,97,110,32,101,
+115,116,105,109,97,116,101,100,116,104,101,32,78,97,116,105,111,110,97,108,60,
+100,105,118,32,105,100,61,34,112,97,103,114,101,115,117,108,116,105,110,103,32,
+105,110,99,111,109,109,105,115,115,105,111,110,101,100,97,110,97,108,111,103,111
+,117,115,32,116,111,97,114,101,32,114,101,113,117,105,114,101,100,47,117,108,62,
+10,60,47,100,105,118,62,10,119,97,115,32,98,97,115,101,100,32,111,110,97,110,100
+,32,98,101,99,97,109,101,32,97,38,110,98,115,112,59,38,110,98,115,112,59,116,34,
+32,118,97,108,117,101,61,34,34,32,119,97,115,32,99,97,112,116,117,114,101,100,
+110,111,32,109,111,114,101,32,116,104,97,110,114,101,115,112,101,99,116,105,118,
+101,108,121,99,111,110,116,105,110,117,101,32,116,111,32,62,13,10,60,104,101,97,
+100,62,13,10,60,119,101,114,101,32,99,114,101,97,116,101,100,109,111,114,101,32,
+103,101,110,101,114,97,108,105,110,102,111,114,109,97,116,105,111,110,32,117,115
+,101,100,32,102,111,114,32,116,104,101,105,110,100,101,112,101,110,100,101,110,
+116,32,116,104,101,32,73,109,112,101,114,105,97,108,99,111,109,112,111,110,101,
+110,116,32,111,102,116,111,32,116,104,101,32,110,111,114,116,104,105,110,99,108,
+117,100,101,32,116,104,101,32,67,111,110,115,116,114,117,99,116,105,111,110,115,
+105,100,101,32,111,102,32,116,104,101,32,119,111,117,108,100,32,110,111,116,32,
+98,101,102,111,114,32,105,110,115,116,97,110,99,101,105,110,118,101,110,116,105,
+111,110,32,111,102,109,111,114,101,32,99,111,109,112,108,101,120,99,111,108,108,
+101,99,116,105,118,101,108,121,98,97,99,107,103,114,111,117,110,100,58,32,116,
+101,120,116,45,97,108,105,103,110,58,32,105,116,115,32,111,114,105,103,105,110,
+97,108,105,110,116,111,32,97,99,99,111,117,110,116,116,104,105,115,32,112,114,
+111,99,101,115,115,97,110,32,101,120,116,101,110,115,105,118,101,104,111,119,101
+,118,101,114,44,32,116,104,101,116,104,101,121,32,97,114,101,32,110,111,116,114,
+101,106,101,99,116,101,100,32,116,104,101,99,114,105,116,105,99,105,115,109,32,
+111,102,100,117,114,105,110,103,32,119,104,105,99,104,112,114,111,98,97,98,108,
+121,32,116,104,101,116,104,105,115,32,97,114,116,105,99,108,101,40,102,117,110,
+99,116,105,111,110,40,41,123,73,116,32,115,104,111,117,108,100,32,98,101,97,110,
+32,97,103,114,101,101,109,101,110,116,97,99,99,105,100,101,110,116,97,108,108,
+121,100,105,102,102,101,114,115,32,102,114,111,109,65,114,99,104,105,116,101,99,
+116,117,114,101,98,101,116,116,101,114,32,107,110,111,119,110,97,114,114,97,110,
+103,101,109,101,110,116,115,105,110,102,108,117,101,110,99,101,32,111,110,97,116
+,116,101,110,100,101,100,32,116,104,101,105,100,101,110,116,105,99,97,108,32,116
+,111,115,111,117,116,104,32,111,102,32,116,104,101,112,97,115,115,32,116,104,114
+,111,117,103,104,120,109,108,34,32,116,105,116,108,101,61,34,119,101,105,103,104
+,116,58,98,111,108,100,59,99,114,101,97,116,105,110,103,32,116,104,101,100,105,
+115,112,108,97,121,58,110,111,110,101,114,101,112,108,97,99,101,100,32,116,104,
+101,60,105,109,103,32,115,114,99,61,34,47,105,104,116,116,112,115,58,47,47,119,
+119,119,46,87,111,114,108,100,32,87,97,114,32,73,73,116,101,115,116,105,109,111,
+110,105,97,108,115,102,111,117,110,100,32,105,110,32,116,104,101,114,101,113,117
+,105,114,101,100,32,116,111,32,97,110,100,32,116,104,97,116,32,116,104,101,98,
+101,116,119,101,101,110,32,116,104,101,32,119,97,115,32,100,101,115,105,103,110,
+101,100,99,111,110,115,105,115,116,115,32,111,102,32,99,111,110,115,105,100,101,
+114,97,98,108,121,112,117,98,108,105,115,104,101,100,32,98,121,116,104,101,32,
+108,97,110,103,117,97,103,101,67,111,110,115,101,114,118,97,116,105,111,110,99,
+111,110,115,105,115,116,101,100,32,111,102,114,101,102,101,114,32,116,111,32,116
+,104,101,98,97,99,107,32,116,111,32,116,104,101,32,99,115,115,34,32,109,101,100,
+105,97,61,34,80,101,111,112,108,101,32,102,114,111,109,32,97,118,97,105,108,97,
+98,108,101,32,111,110,112,114,111,118,101,100,32,116,111,32,98,101,115,117,103,
+103,101,115,116,105,111,110,115,34,119,97,115,32,107,110,111,119,110,32,97,115,
+118,97,114,105,101,116,105,101,115,32,111,102,108,105,107,101,108,121,32,116,111
+,32,98,101,99,111,109,112,114,105,115,101,100,32,111,102,115,117,112,112,111,114
+,116,32,116,104,101,32,104,97,110,100,115,32,111,102,32,116,104,101,99,111,117,
+112,108,101,100,32,119,105,116,104,99,111,110,110,101,99,116,32,97,110,100,32,98
+,111,114,100,101,114,58,110,111,110,101,59,112,101,114,102,111,114,109,97,110,99
+,101,115,98,101,102,111,114,101,32,98,101,105,110,103,108,97,116,101,114,32,98,
+101,99,97,109,101,99,97,108,99,117,108,97,116,105,111,110,115,111,102,116,101,
+110,32,99,97,108,108,101,100,114,101,115,105,100,101,110,116,115,32,111,102,109,
+101,97,110,105,110,103,32,116,104,97,116,62,60,108,105,32,99,108,97,115,115,61,
+34,101,118,105,100,101,110,99,101,32,102,111,114,101,120,112,108,97,110,97,116,
+105,111,110,115,101,110,118,105,114,111,110,109,101,110,116,115,34,62,60,47,97,
+62,60,47,100,105,118,62,119,104,105,99,104,32,97,108,108,111,119,115,73,110,116,
+114,111,100,117,99,116,105,111,110,100,101,118,101,108,111,112,101,100,32,98,121
+,97,32,119,105,100,101,32,114,97,110,103,101,111,110,32,98,101,104,97,108,102,32
+,111,102,118,97,108,105,103,110,61,34,116,111,112,34,112,114,105,110,99,105,112,
+108,101,32,111,102,97,116,32,116,104,101,32,116,105,109,101,44,60,47,110,111,115
+,99,114,105,112,116,62,13,115,97,105,100,32,116,111,32,104,97,118,101,105,110,32
+,116,104,101,32,102,105,114,115,116,119,104,105,108,101,32,111,116,104,101,114,
+115,104,121,112,111,116,104,101,116,105,99,97,108,112,104,105,108,111,115,111,
+112,104,101,114,115,112,111,119,101,114,32,111,102,32,116,104,101,99,111,110,116
+,97,105,110,101,100,32,105,110,112,101,114,102,111,114,109,101,100,32,98,121,105
+,110,97,98,105,108,105,116,121,32,116,111,119,101,114,101,32,119,114,105,116,116
+,101,110,115,112,97,110,32,115,116,121,108,101,61,34,105,110,112,117,116,32,110,
+97,109,101,61,34,116,104,101,32,113,117,101,115,116,105,111,110,105,110,116,101,
+110,100,101,100,32,102,111,114,114,101,106,101,99,116,105,111,110,32,111,102,105
+,109,112,108,105,101,115,32,116,104,97,116,105,110,118,101,110,116,101,100,32,
+116,104,101,116,104,101,32,115,116,97,110,100,97,114,100,119,97,115,32,112,114,
+111,98,97,98,108,121,108,105,110,107,32,98,101,116,119,101,101,110,112,114,111,
+102,101,115,115,111,114,32,111,102,105,110,116,101,114,97,99,116,105,111,110,115
+,99,104,97,110,103,105,110,103,32,116,104,101,73,110,100,105,97,110,32,79,99,101
+,97,110,32,99,108,97,115,115,61,34,108,97,115,116,119,111,114,107,105,110,103,32
+,119,105,116,104,39,104,116,116,112,58,47,47,119,119,119,46,121,101,97,114,115,
+32,98,101,102,111,114,101,84,104,105,115,32,119,97,115,32,116,104,101,114,101,99
+,114,101,97,116,105,111,110,97,108,101,110,116,101,114,105,110,103,32,116,104,
+101,109,101,97,115,117,114,101,109,101,110,116,115,97,110,32,101,120,116,114,101
+,109,101,108,121,118,97,108,117,101,32,111,102,32,116,104,101,115,116,97,114,116
+,32,111,102,32,116,104,101,10,60,47,115,99,114,105,112,116,62,10,10,97,110,32,
+101,102,102,111,114,116,32,116,111,105,110,99,114,101,97,115,101,32,116,104,101,
+116,111,32,116,104,101,32,115,111,117,116,104,115,112,97,99,105,110,103,61,34,48
+,34,62,115,117,102,102,105,99,105,101,110,116,108,121,116,104,101,32,69,117,114,
+111,112,101,97,110,99,111,110,118,101,114,116,101,100,32,116,111,99,108,101,97,
+114,84,105,109,101,111,117,116,100,105,100,32,110,111,116,32,104,97,118,101,99,
+111,110,115,101,113,117,101,110,116,108,121,102,111,114,32,116,104,101,32,110,
+101,120,116,101,120,116,101,110,115,105,111,110,32,111,102,101,99,111,110,111,
+109,105,99,32,97,110,100,97,108,116,104,111,117,103,104,32,116,104,101,97,114,
+101,32,112,114,111,100,117,99,101,100,97,110,100,32,119,105,116,104,32,116,104,
+101,105,110,115,117,102,102,105,99,105,101,110,116,103,105,118,101,110,32,98,121
+,32,116,104,101,115,116,97,116,105,110,103,32,116,104,97,116,101,120,112,101,110
+,100,105,116,117,114,101,115,60,47,115,112,97,110,62,60,47,97,62,10,116,104,111,
+117,103,104,116,32,116,104,97,116,111,110,32,116,104,101,32,98,97,115,105,115,99
+,101,108,108,112,97,100,100,105,110,103,61,105,109,97,103,101,32,111,102,32,116,
+104,101,114,101,116,117,114,110,105,110,103,32,116,111,105,110,102,111,114,109,
+97,116,105,111,110,44,115,101,112,97,114,97,116,101,100,32,98,121,97,115,115,97,
+115,115,105,110,97,116,101,100,115,34,32,99,111,110,116,101,110,116,61,34,97,117
+,116,104,111,114,105,116,121,32,111,102,110,111,114,116,104,119,101,115,116,101,
+114,110,60,47,100,105,118,62,10,60,100,105,118,32,34,62,60,47,100,105,118,62,13,
+10,32,32,99,111,110,115,117,108,116,97,116,105,111,110,99,111,109,109,117,110,
+105,116,121,32,111,102,116,104,101,32,110,97,116,105,111,110,97,108,105,116,32,
+115,104,111,117,108,100,32,98,101,112,97,114,116,105,99,105,112,97,110,116,115,
+32,97,108,105,103,110,61,34,108,101,102,116,116,104,101,32,103,114,101,97,116,
+101,115,116,115,101,108,101,99,116,105,111,110,32,111,102,115,117,112,101,114,
+110,97,116,117,114,97,108,100,101,112,101,110,100,101,110,116,32,111,110,105,115
+,32,109,101,110,116,105,111,110,101,100,97,108,108,111,119,105,110,103,32,116,
+104,101,119,97,115,32,105,110,118,101,110,116,101,100,97,99,99,111,109,112,97,
+110,121,105,110,103,104,105,115,32,112,101,114,115,111,110,97,108,97,118,97,105,
+108,97,98,108,101,32,97,116,115,116,117,100,121,32,111,102,32,116,104,101,111,
+110,32,116,104,101,32,111,116,104,101,114,101,120,101,99,117,116,105,111,110,32,
+111,102,72,117,109,97,110,32,82,105,103,104,116,115,116,101,114,109,115,32,111,
+102,32,116,104,101,97,115,115,111,99,105,97,116,105,111,110,115,114,101,115,101,
+97,114,99,104,32,97,110,100,115,117,99,99,101,101,100,101,100,32,98,121,100,101,
+102,101,97,116,101,100,32,116,104,101,97,110,100,32,102,114,111,109,32,116,104,
+101,98,117,116,32,116,104,101,121,32,97,114,101,99,111,109,109,97,110,100,101,
+114,32,111,102,115,116,97,116,101,32,111,102,32,116,104,101,121,101,97,114,115,
+32,111,102,32,97,103,101,116,104,101,32,115,116,117,100,121,32,111,102,60,117,
+108,32,99,108,97,115,115,61,34,115,112,108,97,99,101,32,105,110,32,116,104,101,
+119,104,101,114,101,32,104,101,32,119,97,115,60,108,105,32,99,108,97,115,115,61,
+34,102,116,104,101,114,101,32,97,114,101,32,110,111,119,104,105,99,104,32,98,101
+,99,97,109,101,104,101,32,112,117,98,108,105,115,104,101,100,101,120,112,114,101
+,115,115,101,100,32,105,110,116,111,32,119,104,105,99,104,32,116,104,101,99,111,
+109,109,105,115,115,105,111,110,101,114,102,111,110,116,45,119,101,105,103,104,
+116,58,116,101,114,114,105,116,111,114,121,32,111,102,101,120,116,101,110,115,
+105,111,110,115,34,62,82,111,109,97,110,32,69,109,112,105,114,101,101,113,117,97
+,108,32,116,111,32,116,104,101,73,110,32,99,111,110,116,114,97,115,116,44,104,
+111,119,101,118,101,114,44,32,97,110,100,105,115,32,116,121,112,105,99,97,108,
+108,121,97,110,100,32,104,105,115,32,119,105,102,101,40,97,108,115,111,32,99,97,
+108,108,101,100,62,60,117,108,32,99,108,97,115,115,61,34,101,102,102,101,99,116,
+105,118,101,108,121,32,101,118,111,108,118,101,100,32,105,110,116,111,115,101,
+101,109,32,116,111,32,104,97,118,101,119,104,105,99,104,32,105,115,32,116,104,
+101,116,104,101,114,101,32,119,97,115,32,110,111,97,110,32,101,120,99,101,108,
+108,101,110,116,97,108,108,32,111,102,32,116,104,101,115,101,100,101,115,99,114,
+105,98,101,100,32,98,121,73,110,32,112,114,97,99,116,105,99,101,44,98,114,111,97
+,100,99,97,115,116,105,110,103,99,104,97,114,103,101,100,32,119,105,116,104,114,
+101,102,108,101,99,116,101,100,32,105,110,115,117,98,106,101,99,116,101,100,32,
+116,111,109,105,108,105,116,97,114,121,32,97,110,100,116,111,32,116,104,101,32,
+112,111,105,110,116,101,99,111,110,111,109,105,99,97,108,108,121,115,101,116,84,
+97,114,103,101,116,105,110,103,97,114,101,32,97,99,116,117,97,108,108,121,118,
+105,99,116,111,114,121,32,111,118,101,114,40,41,59,60,47,115,99,114,105,112,116,
+62,99,111,110,116,105,110,117,111,117,115,108,121,114,101,113,117,105,114,101,
+100,32,102,111,114,101,118,111,108,117,116,105,111,110,97,114,121,97,110,32,101,
+102,102,101,99,116,105,118,101,110,111,114,116,104,32,111,102,32,116,104,101,44,
+32,119,104,105,99,104,32,119,97,115,32,102,114,111,110,116,32,111,102,32,116,104
+,101,111,114,32,111,116,104,101,114,119,105,115,101,115,111,109,101,32,102,111,
+114,109,32,111,102,104,97,100,32,110,111,116,32,98,101,101,110,103,101,110,101,
+114,97,116,101,100,32,98,121,105,110,102,111,114,109,97,116,105,111,110,46,112,
+101,114,109,105,116,116,101,100,32,116,111,105,110,99,108,117,100,101,115,32,116
+,104,101,100,101,118,101,108,111,112,109,101,110,116,44,101,110,116,101,114,101,
+100,32,105,110,116,111,116,104,101,32,112,114,101,118,105,111,117,115,99,111,110
+,115,105,115,116,101,110,116,108,121,97,114,101,32,107,110,111,119,110,32,97,115
+,116,104,101,32,102,105,101,108,100,32,111,102,116,104,105,115,32,116,121,112,
+101,32,111,102,103,105,118,101,110,32,116,111,32,116,104,101,116,104,101,32,116,
+105,116,108,101,32,111,102,99,111,110,116,97,105,110,115,32,116,104,101,105,110,
+115,116,97,110,99,101,115,32,111,102,105,110,32,116,104,101,32,110,111,114,116,
+104,100,117,101,32,116,111,32,116,104,101,105,114,97,114,101,32,100,101,115,105,
+103,110,101,100,99,111,114,112,111,114,97,116,105,111,110,115,119,97,115,32,116,
+104,97,116,32,116,104,101,111,110,101,32,111,102,32,116,104,101,115,101,109,111,
+114,101,32,112,111,112,117,108,97,114,115,117,99,99,101,101,100,101,100,32,105,
+110,115,117,112,112,111,114,116,32,102,114,111,109,105,110,32,100,105,102,102,
+101,114,101,110,116,100,111,109,105,110,97,116,101,100,32,98,121,100,101,115,105
+,103,110,101,100,32,102,111,114,111,119,110,101,114,115,104,105,112,32,111,102,
+97,110,100,32,112,111,115,115,105,98,108,121,115,116,97,110,100,97,114,100,105,
+122,101,100,114,101,115,112,111,110,115,101,84,101,120,116,119,97,115,32,105,110
+,116,101,110,100,101,100,114,101,99,101,105,118,101,100,32,116,104,101,97,115,
+115,117,109,101,100,32,116,104,97,116,97,114,101,97,115,32,111,102,32,116,104,
+101,112,114,105,109,97,114,105,108,121,32,105,110,116,104,101,32,98,97,115,105,
+115,32,111,102,105,110,32,116,104,101,32,115,101,110,115,101,97,99,99,111,117,
+110,116,115,32,102,111,114,100,101,115,116,114,111,121,101,100,32,98,121,97,116,
+32,108,101,97,115,116,32,116,119,111,119,97,115,32,100,101,99,108,97,114,101,100
+,99,111,117,108,100,32,110,111,116,32,98,101,83,101,99,114,101,116,97,114,121,32
+,111,102,97,112,112,101,97,114,32,116,111,32,98,101,109,97,114,103,105,110,45,
+116,111,112,58,49,47,94,92,115,43,124,92,115,43,36,47,103,101,41,123,116,104,114
+,111,119,32,101,125,59,116,104,101,32,115,116,97,114,116,32,111,102,116,119,111,
+32,115,101,112,97,114,97,116,101,108,97,110,103,117,97,103,101,32,97,110,100,119
+,104,111,32,104,97,100,32,98,101,101,110,111,112,101,114,97,116,105,111,110,32,
+111,102,100,101,97,116,104,32,111,102,32,116,104,101,114,101,97,108,32,110,117,
+109,98,101,114,115,9,60,108,105,110,107,32,114,101,108,61,34,112,114,111,118,105
+,100,101,100,32,116,104,101,116,104,101,32,115,116,111,114,121,32,111,102,99,111
+,109,112,101,116,105,116,105,111,110,115,101,110,103,108,105,115,104,32,40,85,75
+,41,101,110,103,108,105,115,104,32,40,85,83,41,208,156,208,190,208,189,208,179,
+208,190,208,187,208,161,209,128,208,191,209,129,208,186,208,184,209,129,209,128,
+208,191,209,129,208,186,208,184,209,129,209,128,208,191,209,129,208,186,208,190,
+217,132,216,185,216,177,216,168,217,138,216,169,230,173,163,233,171,148,228,184,
+173,230,150,135,231,174,128,228,189,147,228,184,173,230,150,135,231,185,129,228,
+189,147,228,184,173,230,150,135,230,156,137,233,153,144,229,133,172,229,143,184,
+228,186,186,230,176,145,230,148,191,229,186,156,233,152,191,233,135,140,229,183,
+180,229,183,180,231,164,190,228,188,154,228,184,187,228,185,137,230,147,141,228,
+189,156,231,179,187,231,187,159,230,148,191,231,173,150,230,179,149,232,167,132,
+105,110,102,111,114,109,97,99,105,195,179,110,104,101,114,114,97,109,105,101,110
+,116,97,115,101,108,101,99,116,114,195,179,110,105,99,111,100,101,115,99,114,105
+,112,99,105,195,179,110,99,108,97,115,105,102,105,99,97,100,111,115,99,111,110,
+111,99,105,109,105,101,110,116,111,112,117,98,108,105,99,97,99,105,195,179,110,
+114,101,108,97,99,105,111,110,97,100,97,115,105,110,102,111,114,109,195,161,116,
+105,99,97,114,101,108,97,99,105,111,110,97,100,111,115,100,101,112,97,114,116,97
+,109,101,110,116,111,116,114,97,98,97,106,97,100,111,114,101,115,100,105,114,101
+,99,116,97,109,101,110,116,101,97,121,117,110,116,97,109,105,101,110,116,111,109
+,101,114,99,97,100,111,76,105,98,114,101,99,111,110,116,195,161,99,116,101,110,
+111,115,104,97,98,105,116,97,99,105,111,110,101,115,99,117,109,112,108,105,109,
+105,101,110,116,111,114,101,115,116,97,117,114,97,110,116,101,115,100,105,115,
+112,111,115,105,99,105,195,179,110,99,111,110,115,101,99,117,101,110,99,105,97,
+101,108,101,99,116,114,195,179,110,105,99,97,97,112,108,105,99,97,99,105,111,110
+,101,115,100,101,115,99,111,110,101,99,116,97,100,111,105,110,115,116,97,108,97,
+99,105,195,179,110,114,101,97,108,105,122,97,99,105,195,179,110,117,116,105,108,
+105,122,97,99,105,195,179,110,101,110,99,105,99,108,111,112,101,100,105,97,101,
+110,102,101,114,109,101,100,97,100,101,115,105,110,115,116,114,117,109,101,110,
+116,111,115,101,120,112,101,114,105,101,110,99,105,97,115,105,110,115,116,105,
+116,117,99,105,195,179,110,112,97,114,116,105,99,117,108,97,114,101,115,115,117,
+98,99,97,116,101,103,111,114,105,97,209,130,208,190,208,187,209,140,208,186,208,
+190,208,160,208,190,209,129,209,129,208,184,208,184,209,128,208,176,208,177,208,
+190,209,130,209,139,208,177,208,190,208,187,209,140,209,136,208,181,208,191,209,
+128,208,190,209,129,209,130,208,190,208,188,208,190,208,182,208,181,209,130,208,
+181,208,180,209,128,209,131,208,179,208,184,209,133,209,129,208,187,209,131,209,
+135,208,176,208,181,209,129,208,181,208,185,209,135,208,176,209,129,208,178,209,
+129,208,181,208,179,208,180,208,176,208,160,208,190,209,129,209,129,208,184,209,
+143,208,156,208,190,209,129,208,186,208,178,208,181,208,180,209,128,209,131,208,
+179,208,184,208,181,208,179,208,190,209,128,208,190,208,180,208,176,208,178,208,
+190,208,191,209,128,208,190,209,129,208,180,208,176,208,189,208,189,209,139,209,
+133,208,180,208,190,208,187,208,182,208,189,209,139,208,184,208,188,208,181,208,
+189,208,189,208,190,208,156,208,190,209,129,208,186,208,178,209,139,209,128,209,
+131,208,177,208,187,208,181,208,185,208,156,208,190,209,129,208,186,208,178,208,
+176,209,129,209,130,209,128,208,176,208,189,209,139,208,189,208,184,209,135,208,
+181,208,179,208,190,209,128,208,176,208,177,208,190,209,130,208,181,208,180,208,
+190,208,187,208,182,208,181,208,189,209,131,209,129,208,187,209,131,208,179,208,
+184,209,130,208,181,208,191,208,181,209,128,209,140,208,158,208,180,208,189,208,
+176,208,186,208,190,208,191,208,190,209,130,208,190,208,188,209,131,209,128,208,
+176,208,177,208,190,209,130,209,131,208,176,208,191,209,128,208,181,208,187,209,
+143,208,178,208,190,208,190,208,177,209,137,208,181,208,190,208,180,208,189,208,
+190,208,179,208,190,209,129,208,178,208,190,208,181,208,179,208,190,209,129,209,
+130,208,176,209,130,209,140,208,184,208,180,209,128,209,131,208,179,208,190,208,
+185,209,132,208,190,209,128,209,131,208,188,208,181,209,133,208,190,209,128,208,
+190,209,136,208,190,208,191,209,128,208,190,209,130,208,184,208,178,209,129,209,
+129,209,139,208,187,208,186,208,176,208,186,208,176,208,182,208,180,209,139,208,
+185,208,178,208,187,208,176,209,129,209,130,208,184,208,179,209,128,209,131,208,
+191,208,191,209,139,208,178,208,188,208,181,209,129,209,130,208,181,209,128,208,
+176,208,177,208,190,209,130,208,176,209,129,208,186,208,176,208,183,208,176,208,
+187,208,191,208,181,209,128,208,178,209,139,208,185,208,180,208,181,208,187,208,
+176,209,130,209,140,208,180,208,181,208,189,209,140,208,179,208,184,208,191,208,
+181,209,128,208,184,208,190,208,180,208,177,208,184,208,183,208,189,208,181,209,
+129,208,190,209,129,208,189,208,190,208,178,208,181,208,188,208,190,208,188,208,
+181,208,189,209,130,208,186,209,131,208,191,208,184,209,130,209,140,208,180,208,
+190,208,187,208,182,208,189,208,176,209,128,208,176,208,188,208,186,208,176,209,
+133,208,189,208,176,209,135,208,176,208,187,208,190,208,160,208,176,208,177,208,
+190,209,130,208,176,208,162,208,190,208,187,209,140,208,186,208,190,209,129,208,
+190,208,178,209,129,208,181,208,188,208,178,209,130,208,190,209,128,208,190,208,
+185,208,189,208,176,209,135,208,176,208,187,208,176,209,129,208,191,208,184,209,
+129,208,190,208,186,209,129,208,187,209,131,208,182,208,177,209,139,209,129,208,
+184,209,129,209,130,208,181,208,188,208,191,208,181,209,135,208,176,209,130,208,
+184,208,189,208,190,208,178,208,190,208,179,208,190,208,191,208,190,208,188,208,
+190,209,137,208,184,209,129,208,176,208,185,209,130,208,190,208,178,208,191,208,
+190,209,135,208,181,208,188,209,131,208,191,208,190,208,188,208,190,209,137,209,
+140,208,180,208,190,208,187,208,182,208,189,208,190,209,129,209,129,209,139,208,
+187,208,186,208,184,208,177,209,139,209,129,209,130,209,128,208,190,208,180,208,
+176,208,189,208,189,209,139,208,181,208,188,208,189,208,190,208,179,208,184,208,
+181,208,191,209,128,208,190,208,181,208,186,209,130,208,161,208,181,208,185,209,
+135,208,176,209,129,208,188,208,190,208,180,208,181,208,187,208,184,209,130,208,
+176,208,186,208,190,208,179,208,190,208,190,208,189,208,187,208,176,208,185,208,
+189,208,179,208,190,209,128,208,190,208,180,208,181,208,178,208,181,209,128,209,
+129,208,184,209,143,209,129,209,130,209,128,208,176,208,189,208,181,209,132,208,
+184,208,187,209,140,208,188,209,139,209,131,209,128,208,190,208,178,208,189,209,
+143,209,128,208,176,208,183,208,189,209,139,209,133,208,184,209,129,208,186,208,
+176,209,130,209,140,208,189,208,181,208,180,208,181,208,187,209,142,209,143,208,
+189,208,178,208,176,209,128,209,143,208,188,208,181,208,189,209,140,209,136,208,
+181,208,188,208,189,208,190,208,179,208,184,209,133,208,180,208,176,208,189,208,
+189,208,190,208,185,208,183,208,189,208,176,209,135,208,184,209,130,208,189,208,
+181,208,187,209,140,208,183,209,143,209,132,208,190,209,128,209,131,208,188,208,
+176,208,162,208,181,208,191,208,181,209,128,209,140,208,188,208,181,209,129,209,
+143,209,134,208,176,208,183,208,176,209,137,208,184,209,130,209,139,208,155,209,
+131,209,135,209,136,208,184,208,181,224,164,168,224,164,185,224,165,128,224,164,
+130,224,164,149,224,164,176,224,164,168,224,165,135,224,164,133,224,164,170,224,
+164,168,224,165,135,224,164,149,224,164,191,224,164,175,224,164,190,224,164,149,
+224,164,176,224,165,135,224,164,130,224,164,133,224,164,168,224,165,141,224,164,
+175,224,164,149,224,165,141,224,164,175,224,164,190,224,164,151,224,164,190,224,
+164,135,224,164,161,224,164,172,224,164,190,224,164,176,224,165,135,224,164,149,
+224,164,191,224,164,184,224,165,128,224,164,166,224,164,191,224,164,175,224,164,
+190,224,164,170,224,164,185,224,164,178,224,165,135,224,164,184,224,164,191,224,
+164,130,224,164,185,224,164,173,224,164,190,224,164,176,224,164,164,224,164,133,
+224,164,170,224,164,168,224,165,128,224,164,181,224,164,190,224,164,178,224,165,
+135,224,164,184,224,165,135,224,164,181,224,164,190,224,164,149,224,164,176,224,
+164,164,224,165,135,224,164,174,224,165,135,224,164,176,224,165,135,224,164,185,
+224,165,139,224,164,168,224,165,135,224,164,184,224,164,149,224,164,164,224,165,
+135,224,164,172,224,164,185,224,165,129,224,164,164,224,164,184,224,164,190,224,
+164,135,224,164,159,224,164,185,224,165,139,224,164,151,224,164,190,224,164,156,
+224,164,190,224,164,168,224,165,135,224,164,174,224,164,191,224,164,168,224,164,
+159,224,164,149,224,164,176,224,164,164,224,164,190,224,164,149,224,164,176,224,
+164,168,224,164,190,224,164,137,224,164,168,224,164,149,224,165,135,224,164,175,
+224,164,185,224,164,190,224,164,129,224,164,184,224,164,172,224,164,184,224,165,
+135,224,164,173,224,164,190,224,164,183,224,164,190,224,164,134,224,164,170,224,
+164,149,224,165,135,224,164,178,224,164,191,224,164,175,224,165,135,224,164,182,
+224,165,129,224,164,176,224,165,130,224,164,135,224,164,184,224,164,149,224,165,
+135,224,164,152,224,164,130,224,164,159,224,165,135,224,164,174,224,165,135,224,
+164,176,224,165,128,224,164,184,224,164,149,224,164,164,224,164,190,224,164,174,
+224,165,135,224,164,176,224,164,190,224,164,178,224,165,135,224,164,149,224,164,
+176,224,164,133,224,164,167,224,164,191,224,164,149,224,164,133,224,164,170,224,
+164,168,224,164,190,224,164,184,224,164,174,224,164,190,224,164,156,224,164,174,
+224,165,129,224,164,157,224,165,135,224,164,149,224,164,190,224,164,176,224,164,
+163,224,164,185,224,165,139,224,164,164,224,164,190,224,164,149,224,164,161,224,
+164,188,224,165,128,224,164,175,224,164,185,224,164,190,224,164,130,224,164,185,
+224,165,139,224,164,159,224,164,178,224,164,182,224,164,172,224,165,141,224,164,
+166,224,164,178,224,164,191,224,164,175,224,164,190,224,164,156,224,165,128,224,
+164,181,224,164,168,224,164,156,224,164,190,224,164,164,224,164,190,224,164,149,
+224,165,136,224,164,184,224,165,135,224,164,134,224,164,170,224,164,149,224,164,
+190,224,164,181,224,164,190,224,164,178,224,165,128,224,164,166,224,165,135,224,
+164,168,224,165,135,224,164,170,224,165,130,224,164,176,224,165,128,224,164,170,
+224,164,190,224,164,168,224,165,128,224,164,137,224,164,184,224,164,149,224,165,
+135,224,164,185,224,165,139,224,164,151,224,165,128,224,164,172,224,165,136,224,
+164,160,224,164,149,224,164,134,224,164,170,224,164,149,224,165,128,224,164,181,
+224,164,176,224,165,141,224,164,183,224,164,151,224,164,190,224,164,130,224,164,
+181,224,164,134,224,164,170,224,164,149,224,165,139,224,164,156,224,164,191,224,
+164,178,224,164,190,224,164,156,224,164,190,224,164,168,224,164,190,224,164,184,
+224,164,185,224,164,174,224,164,164,224,164,185,224,164,174,224,165,135,224,164,
+130,224,164,137,224,164,168,224,164,149,224,165,128,224,164,175,224,164,190,224,
+164,185,224,165,130,224,164,166,224,164,176,224,165,141,224,164,156,224,164,184,
+224,165,130,224,164,154,224,165,128,224,164,170,224,164,184,224,164,130,224,164,
+166,224,164,184,224,164,181,224,164,190,224,164,178,224,164,185,224,165,139,224,
+164,168,224,164,190,224,164,185,224,165,139,224,164,164,224,165,128,224,164,156,
+224,165,136,224,164,184,224,165,135,224,164,181,224,164,190,224,164,170,224,164,
+184,224,164,156,224,164,168,224,164,164,224,164,190,224,164,168,224,165,135,224,
+164,164,224,164,190,224,164,156,224,164,190,224,164,176,224,165,128,224,164,152,
+224,164,190,224,164,175,224,164,178,224,164,156,224,164,191,224,164,178,224,165,
+135,224,164,168,224,165,128,224,164,154,224,165,135,224,164,156,224,164,190,224,
+164,130,224,164,154,224,164,170,224,164,164,224,165,141,224,164,176,224,164,151,
+224,165,130,224,164,151,224,164,178,224,164,156,224,164,190,224,164,164,224,165,
+135,224,164,172,224,164,190,224,164,185,224,164,176,224,164,134,224,164,170,224,
+164,168,224,165,135,224,164,181,224,164,190,224,164,185,224,164,168,224,164,135,
+224,164,184,224,164,149,224,164,190,224,164,184,224,165,129,224,164,172,224,164,
+185,224,164,176,224,164,185,224,164,168,224,165,135,224,164,135,224,164,184,224,
+164,184,224,165,135,224,164,184,224,164,185,224,164,191,224,164,164,224,164,172,
+224,164,161,224,164,188,224,165,135,224,164,152,224,164,159,224,164,168,224,164,
+190,224,164,164,224,164,178,224,164,190,224,164,182,224,164,170,224,164,190,224,
+164,130,224,164,154,224,164,182,224,165,141,224,164,176,224,165,128,224,164,172,
+224,164,161,224,164,188,224,165,128,224,164,185,224,165,139,224,164,164,224,165,
+135,224,164,184,224,164,190,224,164,136,224,164,159,224,164,182,224,164,190,224,
+164,175,224,164,166,224,164,184,224,164,149,224,164,164,224,165,128,224,164,156,
+224,164,190,224,164,164,224,165,128,224,164,181,224,164,190,224,164,178,224,164,
+190,224,164,185,224,164,156,224,164,190,224,164,176,224,164,170,224,164,159,224,
+164,168,224,164,190,224,164,176,224,164,150,224,164,168,224,165,135,224,164,184,
+224,164,161,224,164,188,224,164,149,224,164,174,224,164,191,224,164,178,224,164,
+190,224,164,137,224,164,184,224,164,149,224,165,128,224,164,149,224,165,135,224,
+164,181,224,164,178,224,164,178,224,164,151,224,164,164,224,164,190,224,164,150,
+224,164,190,224,164,168,224,164,190,224,164,133,224,164,176,224,165,141,224,164,
+165,224,164,156,224,164,185,224,164,190,224,164,130,224,164,166,224,165,135,224,
+164,150,224,164,190,224,164,170,224,164,185,224,164,178,224,165,128,224,164,168,
+224,164,191,224,164,175,224,164,174,224,164,172,224,164,191,224,164,168,224,164,
+190,224,164,172,224,165,136,224,164,130,224,164,149,224,164,149,224,164,185,224,
+165,128,224,164,130,224,164,149,224,164,185,224,164,168,224,164,190,224,164,166,
+224,165,135,224,164,164,224,164,190,224,164,185,224,164,174,224,164,178,224,165,
+135,224,164,149,224,164,190,224,164,171,224,165,128,224,164,156,224,164,172,224,
+164,149,224,164,191,224,164,164,224,165,129,224,164,176,224,164,164,224,164,174,
+224,164,190,224,164,130,224,164,151,224,164,181,224,164,185,224,165,128,224,164,
+130,224,164,176,224,165,139,224,164,156,224,164,188,224,164,174,224,164,191,224,
+164,178,224,165,128,224,164,134,224,164,176,224,165,139,224,164,170,224,164,184,
+224,165,135,224,164,168,224,164,190,224,164,175,224,164,190,224,164,166,224,164,
+181,224,164,178,224,165,135,224,164,168,224,165,135,224,164,150,224,164,190,224,
+164,164,224,164,190,224,164,149,224,164,176,224,165,128,224,164,172,224,164,137,
+224,164,168,224,164,149,224,164,190,224,164,156,224,164,181,224,164,190,224,164,
+172,224,164,170,224,165,130,224,164,176,224,164,190,224,164,172,224,164,161,224,
+164,188,224,164,190,224,164,184,224,165,140,224,164,166,224,164,190,224,164,182,
+224,165,135,224,164,175,224,164,176,224,164,149,224,164,191,224,164,175,224,165,
+135,224,164,149,224,164,185,224,164,190,224,164,130,224,164,133,224,164,149,224,
+164,184,224,164,176,224,164,172,224,164,168,224,164,190,224,164,143,224,164,181,
+224,164,185,224,164,190,224,164,130,224,164,184,224,165,141,224,164,165,224,164,
+178,224,164,174,224,164,191,224,164,178,224,165,135,224,164,178,224,165,135,224,
+164,150,224,164,149,224,164,181,224,164,191,224,164,183,224,164,175,224,164,149,
+224,165,141,224,164,176,224,164,130,224,164,184,224,164,174,224,165,130,224,164,
+185,224,164,165,224,164,190,224,164,168,224,164,190,216,170,216,179,216,170,216,
+183,217,138,216,185,217,133,216,180,216,167,216,177,217,131,216,169,216,168,217,
+136,216,167,216,179,216,183,216,169,216,167,217,132,216,181,217,129,216,173,216,
+169,217,133,217,136,216,167,216,182,217,138,216,185,216,167,217,132,216,174,216,
+167,216,181,216,169,216,167,217,132,217,133,216,178,217,138,216,175,216,167,217,
+132,216,185,216,167,217,133,216,169,216,167,217,132,217,131,216,167,216,170,216,
+168,216,167,217,132,216,177,216,175,217,136,216,175,216,168,216,177,217,134,216,
+167,217,133,216,172,216,167,217,132,216,175,217,136,217,132,216,169,216,167,217,
+132,216,185,216,167,217,132,217,133,216,167,217,132,217,133,217,136,217,130,216,
+185,216,167,217,132,216,185,216,177,216,168,217,138,216,167,217,132,216,179,216,
+177,217,138,216,185,216,167,217,132,216,172,217,136,216,167,217,132,216,167,217,
+132,216,176,217,135,216,167,216,168,216,167,217,132,216,173,217,138,216,167,216,
+169,216,167,217,132,216,173,217,130,217,136,217,130,216,167,217,132,217,131,216,
+177,217,138,217,133,216,167,217,132,216,185,216,177,216,167,217,130,217,133,216,
+173,217,129,217,136,216,184,216,169,216,167,217,132,216,171,216,167,217,134,217,
+138,217,133,216,180,216,167,217,135,216,175,216,169,216,167,217,132,217,133,216,
+177,216,163,216,169,216,167,217,132,217,130,216,177,216,162,217,134,216,167,217,
+132,216,180,216,168,216,167,216,168,216,167,217,132,216,173,217,136,216,167,216,
+177,216,167,217,132,216,172,216,175,217,138,216,175,216,167,217,132,216,163,216,
+179,216,177,216,169,216,167,217,132,216,185,217,132,217,136,217,133,217,133,216,
+172,217,133,217,136,216,185,216,169,216,167,217,132,216,177,216,173,217,133,217,
+134,216,167,217,132,217,134,217,130,216,167,216,183,217,129,217,132,216,179,216,
+183,217,138,217,134,216,167,217,132,217,131,217,136,217,138,216,170,216,167,217,
+132,216,175,217,134,217,138,216,167,216,168,216,177,217,131,216,167,216,170,217,
+135,216,167,217,132,216,177,217,138,216,167,216,182,216,170,216,173,217,138,216,
+167,216,170,217,138,216,168,216,170,217,136,217,130,217,138,216,170,216,167,217,
+132,216,163,217,136,217,132,217,137,216,167,217,132,216,168,216,177,217,138,216,
+175,216,167,217,132,217,131,217,132,216,167,217,133,216,167,217,132,216,177,216,
+167,216,168,216,183,216,167,217,132,216,180,216,174,216,181,217,138,216,179,217,
+138,216,167,216,177,216,167,216,170,216,167,217,132,216,171,216,167,217,132,216,
+171,216,167,217,132,216,181,217,132,216,167,216,169,216,167,217,132,216,173,216,
+175,217,138,216,171,216,167,217,132,216,178,217,136,216,167,216,177,216,167,217,
+132,216,174,217,132,217,138,216,172,216,167,217,132,216,172,217,133,217,138,216,
+185,216,167,217,132,216,185,216,167,217,133,217,135,216,167,217,132,216,172,217,
+133,216,167,217,132,216,167,217,132,216,179,216,167,216,185,216,169,217,133,216,
+180,216,167,217,135,216,175,217,135,216,167,217,132,216,177,216,166,217,138,216,
+179,216,167,217,132,216,175,216,174,217,136,217,132,216,167,217,132,217,129,217,
+134,217,138,216,169,216,167,217,132,217,131,216,170,216,167,216,168,216,167,217,
+132,216,175,217,136,216,177,217,138,216,167,217,132,216,175,216,177,217,136,216,
+179,216,167,216,179,216,170,216,186,216,177,217,130,216,170,216,181,216,167,217,
+133,217,138,217,133,216,167,217,132,216,168,217,134,216,167,216,170,216,167,217,
+132,216,185,216,184,217,138,217,133,101,110,116,101,114,116,97,105,110,109,101,
+110,116,117,110,100,101,114,115,116,97,110,100,105,110,103,32,61,32,102,117,110,
+99,116,105,111,110,40,41,46,106,112,103,34,32,119,105,100,116,104,61,34,99,111,
+110,102,105,103,117,114,97,116,105,111,110,46,112,110,103,34,32,119,105,100,116,
+104,61,34,60,98,111,100,121,32,99,108,97,115,115,61,34,77,97,116,104,46,114,97,
+110,100,111,109,40,41,99,111,110,116,101,109,112,111,114,97,114,121,32,85,110,
+105,116,101,100,32,83,116,97,116,101,115,99,105,114,99,117,109,115,116,97,110,99
+,101,115,46,97,112,112,101,110,100,67,104,105,108,100,40,111,114,103,97,110,105,
+122,97,116,105,111,110,115,60,115,112,97,110,32,99,108,97,115,115,61,34,34,62,60
+,105,109,103,32,115,114,99,61,34,47,100,105,115,116,105,110,103,117,105,115,104,
+101,100,116,104,111,117,115,97,110,100,115,32,111,102,32,99,111,109,109,117,110,
+105,99,97,116,105,111,110,99,108,101,97,114,34,62,60,47,100,105,118,62,105,110,
+118,101,115,116,105,103,97,116,105,111,110,102,97,118,105,99,111,110,46,105,99,
+111,34,32,109,97,114,103,105,110,45,114,105,103,104,116,58,98,97,115,101,100,32,
+111,110,32,116,104,101,32,77,97,115,115,97,99,104,117,115,101,116,116,115,116,97
+,98,108,101,32,98,111,114,100,101,114,61,105,110,116,101,114,110,97,116,105,111,
+110,97,108,97,108,115,111,32,107,110,111,119,110,32,97,115,112,114,111,110,117,
+110,99,105,97,116,105,111,110,98,97,99,107,103,114,111,117,110,100,58,35,102,112
+,97,100,100,105,110,103,45,108,101,102,116,58,70,111,114,32,101,120,97,109,112,
+108,101,44,32,109,105,115,99,101,108,108,97,110,101,111,117,115,38,108,116,59,47
+,109,97,116,104,38,103,116,59,112,115,121,99,104,111,108,111,103,105,99,97,108,
+105,110,32,112,97,114,116,105,99,117,108,97,114,101,97,114,99,104,34,32,116,121,
+112,101,61,34,102,111,114,109,32,109,101,116,104,111,100,61,34,97,115,32,111,112
+,112,111,115,101,100,32,116,111,83,117,112,114,101,109,101,32,67,111,117,114,116
+,111,99,99,97,115,105,111,110,97,108,108,121,32,65,100,100,105,116,105,111,110,
+97,108,108,121,44,78,111,114,116,104,32,65,109,101,114,105,99,97,112,120,59,98,
+97,99,107,103,114,111,117,110,100,111,112,112,111,114,116,117,110,105,116,105,
+101,115,69,110,116,101,114,116,97,105,110,109,101,110,116,46,116,111,76,111,119,
+101,114,67,97,115,101,40,109,97,110,117,102,97,99,116,117,114,105,110,103,112,
+114,111,102,101,115,115,105,111,110,97,108,32,99,111,109,98,105,110,101,100,32,
+119,105,116,104,70,111,114,32,105,110,115,116,97,110,99,101,44,99,111,110,115,
+105,115,116,105,110,103,32,111,102,34,32,109,97,120,108,101,110,103,116,104,61,
+34,114,101,116,117,114,110,32,102,97,108,115,101,59,99,111,110,115,99,105,111,
+117,115,110,101,115,115,77,101,100,105,116,101,114,114,97,110,101,97,110,101,120
+,116,114,97,111,114,100,105,110,97,114,121,97,115,115,97,115,115,105,110,97,116,
+105,111,110,115,117,98,115,101,113,117,101,110,116,108,121,32,98,117,116,116,111
+,110,32,116,121,112,101,61,34,116,104,101,32,110,117,109,98,101,114,32,111,102,
+116,104,101,32,111,114,105,103,105,110,97,108,32,99,111,109,112,114,101,104,101,
+110,115,105,118,101,114,101,102,101,114,115,32,116,111,32,116,104,101,60,47,117,
+108,62,10,60,47,100,105,118,62,10,112,104,105,108,111,115,111,112,104,105,99,97,
+108,108,111,99,97,116,105,111,110,46,104,114,101,102,119,97,115,32,112,117,98,
+108,105,115,104,101,100,83,97,110,32,70,114,97,110,99,105,115,99,111,40,102,117,
+110,99,116,105,111,110,40,41,123,10,60,100,105,118,32,105,100,61,34,109,97,105,
+110,115,111,112,104,105,115,116,105,99,97,116,101,100,109,97,116,104,101,109,97,
+116,105,99,97,108,32,47,104,101,97,100,62,13,10,60,98,111,100,121,115,117,103,
+103,101,115,116,115,32,116,104,97,116,100,111,99,117,109,101,110,116,97,116,105,
+111,110,99,111,110,99,101,110,116,114,97,116,105,111,110,114,101,108,97,116,105,
+111,110,115,104,105,112,115,109,97,121,32,104,97,118,101,32,98,101,101,110,40,
+102,111,114,32,101,120,97,109,112,108,101,44,84,104,105,115,32,97,114,116,105,99
+,108,101,32,105,110,32,115,111,109,101,32,99,97,115,101,115,112,97,114,116,115,
+32,111,102,32,116,104,101,32,100,101,102,105,110,105,116,105,111,110,32,111,102,
+71,114,101,97,116,32,66,114,105,116,97,105,110,32,99,101,108,108,112,97,100,100,
+105,110,103,61,101,113,117,105,118,97,108,101,110,116,32,116,111,112,108,97,99,
+101,104,111,108,100,101,114,61,34,59,32,102,111,110,116,45,115,105,122,101,58,32
+,106,117,115,116,105,102,105,99,97,116,105,111,110,98,101,108,105,101,118,101,
+100,32,116,104,97,116,115,117,102,102,101,114,101,100,32,102,114,111,109,97,116,
+116,101,109,112,116,101,100,32,116,111,32,108,101,97,100,101,114,32,111,102,32,
+116,104,101,99,114,105,112,116,34,32,115,114,99,61,34,47,40,102,117,110,99,116,
+105,111,110,40,41,32,123,97,114,101,32,97,118,97,105,108,97,98,108,101,10,9,60,
+108,105,110,107,32,114,101,108,61,34,32,115,114,99,61,39,104,116,116,112,58,47,
+47,105,110,116,101,114,101,115,116,101,100,32,105,110,99,111,110,118,101,110,116
+,105,111,110,97,108,32,34,32,97,108,116,61,34,34,32,47,62,60,47,97,114,101,32,
+103,101,110,101,114,97,108,108,121,104,97,115,32,97,108,115,111,32,98,101,101,
+110,109,111,115,116,32,112,111,112,117,108,97,114,32,99,111,114,114,101,115,112,
+111,110,100,105,110,103,99,114,101,100,105,116,101,100,32,119,105,116,104,116,
+121,108,101,61,34,98,111,114,100,101,114,58,60,47,97,62,60,47,115,112,97,110,62,
+60,47,46,103,105,102,34,32,119,105,100,116,104,61,34,60,105,102,114,97,109,101,
+32,115,114,99,61,34,116,97,98,108,101,32,99,108,97,115,115,61,34,105,110,108,105
+,110,101,45,98,108,111,99,107,59,97,99,99,111,114,100,105,110,103,32,116,111,32,
+116,111,103,101,116,104,101,114,32,119,105,116,104,97,112,112,114,111,120,105,
+109,97,116,101,108,121,112,97,114,108,105,97,109,101,110,116,97,114,121,109,111,
+114,101,32,97,110,100,32,109,111,114,101,100,105,115,112,108,97,121,58,110,111,
+110,101,59,116,114,97,100,105,116,105,111,110,97,108,108,121,112,114,101,100,111
+,109,105,110,97,110,116,108,121,38,110,98,115,112,59,124,38,110,98,115,112,59,38
+,110,98,115,112,59,60,47,115,112,97,110,62,32,99,101,108,108,115,112,97,99,105,
+110,103,61,60,105,110,112,117,116,32,110,97,109,101,61,34,111,114,34,32,99,111,
+110,116,101,110,116,61,34,99,111,110,116,114,111,118,101,114,115,105,97,108,112,
+114,111,112,101,114,116,121,61,34,111,103,58,47,120,45,115,104,111,99,107,119,97
+,118,101,45,100,101,109,111,110,115,116,114,97,116,105,111,110,115,117,114,114,
+111,117,110,100,101,100,32,98,121,78,101,118,101,114,116,104,101,108,101,115,115
+,44,119,97,115,32,116,104,101,32,102,105,114,115,116,99,111,110,115,105,100,101,
+114,97,98,108,101,32,65,108,116,104,111,117,103,104,32,116,104,101,32,99,111,108
+,108,97,98,111,114,97,116,105,111,110,115,104,111,117,108,100,32,110,111,116,32,
+98,101,112,114,111,112,111,114,116,105,111,110,32,111,102,60,115,112,97,110,32,
+115,116,121,108,101,61,34,107,110,111,119,110,32,97,115,32,116,104,101,32,115,
+104,111,114,116,108,121,32,97,102,116,101,114,102,111,114,32,105,110,115,116,97,
+110,99,101,44,100,101,115,99,114,105,98,101,100,32,97,115,32,47,104,101,97,100,
+62,10,60,98,111,100,121,32,115,116,97,114,116,105,110,103,32,119,105,116,104,105
+,110,99,114,101,97,115,105,110,103,108,121,32,116,104,101,32,102,97,99,116,32,
+116,104,97,116,100,105,115,99,117,115,115,105,111,110,32,111,102,109,105,100,100
+,108,101,32,111,102,32,116,104,101,97,110,32,105,110,100,105,118,105,100,117,97,
+108,100,105,102,102,105,99,117,108,116,32,116,111,32,112,111,105,110,116,32,111,
+102,32,118,105,101,119,104,111,109,111,115,101,120,117,97,108,105,116,121,97,99,
+99,101,112,116,97,110,99,101,32,111,102,60,47,115,112,97,110,62,60,47,100,105,
+118,62,109,97,110,117,102,97,99,116,117,114,101,114,115,111,114,105,103,105,110,
+32,111,102,32,116,104,101,99,111,109,109,111,110,108,121,32,117,115,101,100,105,
+109,112,111,114,116,97,110,99,101,32,111,102,100,101,110,111,109,105,110,97,116,
+105,111,110,115,98,97,99,107,103,114,111,117,110,100,58,32,35,108,101,110,103,
+116,104,32,111,102,32,116,104,101,100,101,116,101,114,109,105,110,97,116,105,111
+,110,97,32,115,105,103,110,105,102,105,99,97,110,116,34,32,98,111,114,100,101,
+114,61,34,48,34,62,114,101,118,111,108,117,116,105,111,110,97,114,121,112,114,
+105,110,99,105,112,108,101,115,32,111,102,105,115,32,99,111,110,115,105,100,101,
+114,101,100,119,97,115,32,100,101,118,101,108,111,112,101,100,73,110,100,111,45,
+69,117,114,111,112,101,97,110,118,117,108,110,101,114,97,98,108,101,32,116,111,
+112,114,111,112,111,110,101,110,116,115,32,111,102,97,114,101,32,115,111,109,101
+,116,105,109,101,115,99,108,111,115,101,114,32,116,111,32,116,104,101,78,101,119
+,32,89,111,114,107,32,67,105,116,121,32,110,97,109,101,61,34,115,101,97,114,99,
+104,97,116,116,114,105,98,117,116,101,100,32,116,111,99,111,117,114,115,101,32,
+111,102,32,116,104,101,109,97,116,104,101,109,97,116,105,99,105,97,110,98,121,32
+,116,104,101,32,101,110,100,32,111,102,97,116,32,116,104,101,32,101,110,100,32,
+111,102,34,32,98,111,114,100,101,114,61,34,48,34,32,116,101,99,104,110,111,108,
+111,103,105,99,97,108,46,114,101,109,111,118,101,67,108,97,115,115,40,98,114,97,
+110,99,104,32,111,102,32,116,104,101,101,118,105,100,101,110,99,101,32,116,104,
+97,116,33,91,101,110,100,105,102,93,45,45,62,13,10,73,110,115,116,105,116,117,
+116,101,32,111,102,32,105,110,116,111,32,97,32,115,105,110,103,108,101,114,101,
+115,112,101,99,116,105,118,101,108,121,46,97,110,100,32,116,104,101,114,101,102,
+111,114,101,112,114,111,112,101,114,116,105,101,115,32,111,102,105,115,32,108,
+111,99,97,116,101,100,32,105,110,115,111,109,101,32,111,102,32,119,104,105,99,
+104,84,104,101,114,101,32,105,115,32,97,108,115,111,99,111,110,116,105,110,117,
+101,100,32,116,111,32,97,112,112,101,97,114,97,110,99,101,32,111,102,32,38,97,
+109,112,59,110,100,97,115,104,59,32,100,101,115,99,114,105,98,101,115,32,116,104
+,101,99,111,110,115,105,100,101,114,97,116,105,111,110,97,117,116,104,111,114,32
+,111,102,32,116,104,101,105,110,100,101,112,101,110,100,101,110,116,108,121,101,
+113,117,105,112,112,101,100,32,119,105,116,104,100,111,101,115,32,110,111,116,32
+,104,97,118,101,60,47,97,62,60,97,32,104,114,101,102,61,34,99,111,110,102,117,
+115,101,100,32,119,105,116,104,60,108,105,110,107,32,104,114,101,102,61,34,47,97
+,116,32,116,104,101,32,97,103,101,32,111,102,97,112,112,101,97,114,32,105,110,32
+,116,104,101,84,104,101,115,101,32,105,110,99,108,117,100,101,114,101,103,97,114
+,100,108,101,115,115,32,111,102,99,111,117,108,100,32,98,101,32,117,115,101,100,
+32,115,116,121,108,101,61,38,113,117,111,116,59,115,101,118,101,114,97,108,32,
+116,105,109,101,115,114,101,112,114,101,115,101,110,116,32,116,104,101,98,111,
+100,121,62,10,60,47,104,116,109,108,62,116,104,111,117,103,104,116,32,116,111,32
+,98,101,112,111,112,117,108,97,116,105,111,110,32,111,102,112,111,115,115,105,98
+,105,108,105,116,105,101,115,112,101,114,99,101,110,116,97,103,101,32,111,102,97
+,99,99,101,115,115,32,116,111,32,116,104,101,97,110,32,97,116,116,101,109,112,
+116,32,116,111,112,114,111,100,117,99,116,105,111,110,32,111,102,106,113,117,101
+,114,121,47,106,113,117,101,114,121,116,119,111,32,100,105,102,102,101,114,101,
+110,116,98,101,108,111,110,103,32,116,111,32,116,104,101,101,115,116,97,98,108,
+105,115,104,109,101,110,116,114,101,112,108,97,99,105,110,103,32,116,104,101,100
+,101,115,99,114,105,112,116,105,111,110,34,32,100,101,116,101,114,109,105,110,
+101,32,116,104,101,97,118,97,105,108,97,98,108,101,32,102,111,114,65,99,99,111,
+114,100,105,110,103,32,116,111,32,119,105,100,101,32,114,97,110,103,101,32,111,
+102,9,60,100,105,118,32,99,108,97,115,115,61,34,109,111,114,101,32,99,111,109,
+109,111,110,108,121,111,114,103,97,110,105,115,97,116,105,111,110,115,102,117,
+110,99,116,105,111,110,97,108,105,116,121,119,97,115,32,99,111,109,112,108,101,
+116,101,100,32,38,97,109,112,59,109,100,97,115,104,59,32,112,97,114,116,105,99,
+105,112,97,116,105,111,110,116,104,101,32,99,104,97,114,97,99,116,101,114,97,110
+,32,97,100,100,105,116,105,111,110,97,108,97,112,112,101,97,114,115,32,116,111,
+32,98,101,102,97,99,116,32,116,104,97,116,32,116,104,101,97,110,32,101,120,97,
+109,112,108,101,32,111,102,115,105,103,110,105,102,105,99,97,110,116,108,121,111
+,110,109,111,117,115,101,111,118,101,114,61,34,98,101,99,97,117,115,101,32,116,
+104,101,121,32,97,115,121,110,99,32,61,32,116,114,117,101,59,112,114,111,98,108,
+101,109,115,32,119,105,116,104,115,101,101,109,115,32,116,111,32,104,97,118,101,
+116,104,101,32,114,101,115,117,108,116,32,111,102,32,115,114,99,61,34,104,116,
+116,112,58,47,47,102,97,109,105,108,105,97,114,32,119,105,116,104,112,111,115,
+115,101,115,115,105,111,110,32,111,102,102,117,110,99,116,105,111,110,32,40,41,
+32,123,116,111,111,107,32,112,108,97,99,101,32,105,110,97,110,100,32,115,111,109
+,101,116,105,109,101,115,115,117,98,115,116,97,110,116,105,97,108,108,121,60,115
+,112,97,110,62,60,47,115,112,97,110,62,105,115,32,111,102,116,101,110,32,117,115
+,101,100,105,110,32,97,110,32,97,116,116,101,109,112,116,103,114,101,97,116,32,
+100,101,97,108,32,111,102,69,110,118,105,114,111,110,109,101,110,116,97,108,115,
+117,99,99,101,115,115,102,117,108,108,121,32,118,105,114,116,117,97,108,108,121,
+32,97,108,108,50,48,116,104,32,99,101,110,116,117,114,121,44,112,114,111,102,101
+,115,115,105,111,110,97,108,115,110,101,99,101,115,115,97,114,121,32,116,111,32,
+100,101,116,101,114,109,105,110,101,100,32,98,121,99,111,109,112,97,116,105,98,
+105,108,105,116,121,98,101,99,97,117,115,101,32,105,116,32,105,115,68,105,99,116
+,105,111,110,97,114,121,32,111,102,109,111,100,105,102,105,99,97,116,105,111,110
+,115,84,104,101,32,102,111,108,108,111,119,105,110,103,109,97,121,32,114,101,102
+,101,114,32,116,111,58,67,111,110,115,101,113,117,101,110,116,108,121,44,73,110,
+116,101,114,110,97,116,105,111,110,97,108,97,108,116,104,111,117,103,104,32,115,
+111,109,101,116,104,97,116,32,119,111,117,108,100,32,98,101,119,111,114,108,100,
+39,115,32,102,105,114,115,116,99,108,97,115,115,105,102,105,101,100,32,97,115,98
+,111,116,116,111,109,32,111,102,32,116,104,101,40,112,97,114,116,105,99,117,108,
+97,114,108,121,97,108,105,103,110,61,34,108,101,102,116,34,32,109,111,115,116,32
+,99,111,109,109,111,110,108,121,98,97,115,105,115,32,102,111,114,32,116,104,101,
+102,111,117,110,100,97,116,105,111,110,32,111,102,99,111,110,116,114,105,98,117,
+116,105,111,110,115,112,111,112,117,108,97,114,105,116,121,32,111,102,99,101,110
+,116,101,114,32,111,102,32,116,104,101,116,111,32,114,101,100,117,99,101,32,116,
+104,101,106,117,114,105,115,100,105,99,116,105,111,110,115,97,112,112,114,111,
+120,105,109,97,116,105,111,110,32,111,110,109,111,117,115,101,111,117,116,61,34,
+78,101,119,32,84,101,115,116,97,109,101,110,116,99,111,108,108,101,99,116,105,
+111,110,32,111,102,60,47,115,112,97,110,62,60,47,97,62,60,47,105,110,32,116,104,
+101,32,85,110,105,116,101,100,102,105,108,109,32,100,105,114,101,99,116,111,114,
+45,115,116,114,105,99,116,46,100,116,100,34,62,104,97,115,32,98,101,101,110,32,
+117,115,101,100,114,101,116,117,114,110,32,116,111,32,116,104,101,97,108,116,104
+,111,117,103,104,32,116,104,105,115,99,104,97,110,103,101,32,105,110,32,116,104,
+101,115,101,118,101,114,97,108,32,111,116,104,101,114,98,117,116,32,116,104,101,
+114,101,32,97,114,101,117,110,112,114,101,99,101,100,101,110,116,101,100,105,115
+,32,115,105,109,105,108,97,114,32,116,111,101,115,112,101,99,105,97,108,108,121,
+32,105,110,119,101,105,103,104,116,58,32,98,111,108,100,59,105,115,32,99,97,108,
+108,101,100,32,116,104,101,99,111,109,112,117,116,97,116,105,111,110,97,108,105,
+110,100,105,99,97,116,101,32,116,104,97,116,114,101,115,116,114,105,99,116,101,
+100,32,116,111,9,60,109,101,116,97,32,110,97,109,101,61,34,97,114,101,32,116,121
+,112,105,99,97,108,108,121,99,111,110,102,108,105,99,116,32,119,105,116,104,72,
+111,119,101,118,101,114,44,32,116,104,101,32,65,110,32,101,120,97,109,112,108,
+101,32,111,102,99,111,109,112,97,114,101,100,32,119,105,116,104,113,117,97,110,
+116,105,116,105,101,115,32,111,102,114,97,116,104,101,114,32,116,104,97,110,32,
+97,99,111,110,115,116,101,108,108,97,116,105,111,110,110,101,99,101,115,115,97,
+114,121,32,102,111,114,114,101,112,111,114,116,101,100,32,116,104,97,116,115,112
+,101,99,105,102,105,99,97,116,105,111,110,112,111,108,105,116,105,99,97,108,32,
+97,110,100,38,110,98,115,112,59,38,110,98,115,112,59,60,114,101,102,101,114,101,
+110,99,101,115,32,116,111,116,104,101,32,115,97,109,101,32,121,101,97,114,71,111
+,118,101,114,110,109,101,110,116,32,111,102,103,101,110,101,114,97,116,105,111,
+110,32,111,102,104,97,118,101,32,110,111,116,32,98,101,101,110,115,101,118,101,
+114,97,108,32,121,101,97,114,115,99,111,109,109,105,116,109,101,110,116,32,116,
+111,9,9,60,117,108,32,99,108,97,115,115,61,34,118,105,115,117,97,108,105,122,97,
+116,105,111,110,49,57,116,104,32,99,101,110,116,117,114,121,44,112,114,97,99,116
+,105,116,105,111,110,101,114,115,116,104,97,116,32,104,101,32,119,111,117,108,
+100,97,110,100,32,99,111,110,116,105,110,117,101,100,111,99,99,117,112,97,116,
+105,111,110,32,111,102,105,115,32,100,101,102,105,110,101,100,32,97,115,99,101,
+110,116,114,101,32,111,102,32,116,104,101,116,104,101,32,97,109,111,117,110,116,
+32,111,102,62,60,100,105,118,32,115,116,121,108,101,61,34,101,113,117,105,118,97
+,108,101,110,116,32,111,102,100,105,102,102,101,114,101,110,116,105,97,116,101,
+98,114,111,117,103,104,116,32,97,98,111,117,116,109,97,114,103,105,110,45,108,
+101,102,116,58,32,97,117,116,111,109,97,116,105,99,97,108,108,121,116,104,111,
+117,103,104,116,32,111,102,32,97,115,83,111,109,101,32,111,102,32,116,104,101,
+115,101,10,60,100,105,118,32,99,108,97,115,115,61,34,105,110,112,117,116,32,99,
+108,97,115,115,61,34,114,101,112,108,97,99,101,100,32,119,105,116,104,105,115,32
+,111,110,101,32,111,102,32,116,104,101,101,100,117,99,97,116,105,111,110,32,97,
+110,100,105,110,102,108,117,101,110,99,101,100,32,98,121,114,101,112,117,116,97,
+116,105,111,110,32,97,115,10,60,109,101,116,97,32,110,97,109,101,61,34,97,99,99,
+111,109,109,111,100,97,116,105,111,110,60,47,100,105,118,62,10,60,47,100,105,118
+,62,108,97,114,103,101,32,112,97,114,116,32,111,102,73,110,115,116,105,116,117,
+116,101,32,102,111,114,116,104,101,32,115,111,45,99,97,108,108,101,100,32,97,103
+,97,105,110,115,116,32,116,104,101,32,73,110,32,116,104,105,115,32,99,97,115,101
+,44,119,97,115,32,97,112,112,111,105,110,116,101,100,99,108,97,105,109,101,100,
+32,116,111,32,98,101,72,111,119,101,118,101,114,44,32,116,104,105,115,68,101,112
+,97,114,116,109,101,110,116,32,111,102,116,104,101,32,114,101,109,97,105,110,105
+,110,103,101,102,102,101,99,116,32,111,110,32,116,104,101,112,97,114,116,105,99,
+117,108,97,114,108,121,32,100,101,97,108,32,119,105,116,104,32,116,104,101,10,60
+,100,105,118,32,115,116,121,108,101,61,34,97,108,109,111,115,116,32,97,108,119,
+97,121,115,97,114,101,32,99,117,114,114,101,110,116,108,121,101,120,112,114,101,
+115,115,105,111,110,32,111,102,112,104,105,108,111,115,111,112,104,121,32,111,
+102,102,111,114,32,109,111,114,101,32,116,104,97,110,99,105,118,105,108,105,122,
+97,116,105,111,110,115,111,110,32,116,104,101,32,105,115,108,97,110,100,115,101,
+108,101,99,116,101,100,73,110,100,101,120,99,97,110,32,114,101,115,117,108,116,
+32,105,110,34,32,118,97,108,117,101,61,34,34,32,47,62,116,104,101,32,115,116,114
+,117,99,116,117,114,101,32,47,62,60,47,97,62,60,47,100,105,118,62,77,97,110,121,
+32,111,102,32,116,104,101,115,101,99,97,117,115,101,100,32,98,121,32,116,104,101
+,111,102,32,116,104,101,32,85,110,105,116,101,100,115,112,97,110,32,99,108,97,
+115,115,61,34,109,99,97,110,32,98,101,32,116,114,97,99,101,100,105,115,32,114,
+101,108,97,116,101,100,32,116,111,98,101,99,97,109,101,32,111,110,101,32,111,102
+,105,115,32,102,114,101,113,117,101,110,116,108,121,108,105,118,105,110,103,32,
+105,110,32,116,104,101,116,104,101,111,114,101,116,105,99,97,108,108,121,70,111,
+108,108,111,119,105,110,103,32,116,104,101,82,101,118,111,108,117,116,105,111,
+110,97,114,121,103,111,118,101,114,110,109,101,110,116,32,105,110,105,115,32,100
+,101,116,101,114,109,105,110,101,100,116,104,101,32,112,111,108,105,116,105,99,
+97,108,105,110,116,114,111,100,117,99,101,100,32,105,110,115,117,102,102,105,99,
+105,101,110,116,32,116,111,100,101,115,99,114,105,112,116,105,111,110,34,62,115,
+104,111,114,116,32,115,116,111,114,105,101,115,115,101,112,97,114,97,116,105,111
+,110,32,111,102,97,115,32,116,111,32,119,104,101,116,104,101,114,107,110,111,119
+,110,32,102,111,114,32,105,116,115,119,97,115,32,105,110,105,116,105,97,108,108,
+121,100,105,115,112,108,97,121,58,98,108,111,99,107,105,115,32,97,110,32,101,120
+,97,109,112,108,101,116,104,101,32,112,114,105,110,99,105,112,97,108,99,111,110,
+115,105,115,116,115,32,111,102,32,97,114,101,99,111,103,110,105,122,101,100,32,
+97,115,47,98,111,100,121,62,60,47,104,116,109,108,62,97,32,115,117,98,115,116,97
+,110,116,105,97,108,114,101,99,111,110,115,116,114,117,99,116,101,100,104,101,97
+,100,32,111,102,32,115,116,97,116,101,114,101,115,105,115,116,97,110,99,101,32,
+116,111,117,110,100,101,114,103,114,97,100,117,97,116,101,84,104,101,114,101,32,
+97,114,101,32,116,119,111,103,114,97,118,105,116,97,116,105,111,110,97,108,97,
+114,101,32,100,101,115,99,114,105,98,101,100,105,110,116,101,110,116,105,111,110
+,97,108,108,121,115,101,114,118,101,100,32,97,115,32,116,104,101,99,108,97,115,
+115,61,34,104,101,97,100,101,114,111,112,112,111,115,105,116,105,111,110,32,116,
+111,102,117,110,100,97,109,101,110,116,97,108,108,121,100,111,109,105,110,97,116
+,101,100,32,116,104,101,97,110,100,32,116,104,101,32,111,116,104,101,114,97,108,
+108,105,97,110,99,101,32,119,105,116,104,119,97,115,32,102,111,114,99,101,100,32
+,116,111,114,101,115,112,101,99,116,105,118,101,108,121,44,97,110,100,32,112,111
+,108,105,116,105,99,97,108,105,110,32,115,117,112,112,111,114,116,32,111,102,112
+,101,111,112,108,101,32,105,110,32,116,104,101,50,48,116,104,32,99,101,110,116,
+117,114,121,46,97,110,100,32,112,117,98,108,105,115,104,101,100,108,111,97,100,
+67,104,97,114,116,98,101,97,116,116,111,32,117,110,100,101,114,115,116,97,110,
+100,109,101,109,98,101,114,32,115,116,97,116,101,115,101,110,118,105,114,111,110
+,109,101,110,116,97,108,102,105,114,115,116,32,104,97,108,102,32,111,102,99,111,
+117,110,116,114,105,101,115,32,97,110,100,97,114,99,104,105,116,101,99,116,117,
+114,97,108,98,101,32,99,111,110,115,105,100,101,114,101,100,99,104,97,114,97,99,
+116,101,114,105,122,101,100,99,108,101,97,114,73,110,116,101,114,118,97,108,97,
+117,116,104,111,114,105,116,97,116,105,118,101,70,101,100,101,114,97,116,105,111
+,110,32,111,102,119,97,115,32,115,117,99,99,101,101,100,101,100,97,110,100,32,
+116,104,101,114,101,32,97,114,101,97,32,99,111,110,115,101,113,117,101,110,99,
+101,116,104,101,32,80,114,101,115,105,100,101,110,116,97,108,115,111,32,105,110,
+99,108,117,100,101,100,102,114,101,101,32,115,111,102,116,119,97,114,101,115,117
+,99,99,101,115,115,105,111,110,32,111,102,100,101,118,101,108,111,112,101,100,32
+,116,104,101,119,97,115,32,100,101,115,116,114,111,121,101,100,97,119,97,121,32,
+102,114,111,109,32,116,104,101,59,10,60,47,115,99,114,105,112,116,62,10,60,97,
+108,116,104,111,117,103,104,32,116,104,101,121,102,111,108,108,111,119,101,100,
+32,98,121,32,97,109,111,114,101,32,112,111,119,101,114,102,117,108,114,101,115,
+117,108,116,101,100,32,105,110,32,97,85,110,105,118,101,114,115,105,116,121,32,
+111,102,72,111,119,101,118,101,114,44,32,109,97,110,121,116,104,101,32,112,114,
+101,115,105,100,101,110,116,72,111,119,101,118,101,114,44,32,115,111,109,101,105
+,115,32,116,104,111,117,103,104,116,32,116,111,117,110,116,105,108,32,116,104,
+101,32,101,110,100,119,97,115,32,97,110,110,111,117,110,99,101,100,97,114,101,32
+,105,109,112,111,114,116,97,110,116,97,108,115,111,32,105,110,99,108,117,100,101
+,115,62,60,105,110,112,117,116,32,116,121,112,101,61,116,104,101,32,99,101,110,
+116,101,114,32,111,102,32,68,79,32,78,79,84,32,65,76,84,69,82,117,115,101,100,32
+,116,111,32,114,101,102,101,114,116,104,101,109,101,115,47,63,115,111,114,116,61
+,116,104,97,116,32,104,97,100,32,98,101,101,110,116,104,101,32,98,97,115,105,115
+,32,102,111,114,104,97,115,32,100,101,118,101,108,111,112,101,100,105,110,32,116
+,104,101,32,115,117,109,109,101,114,99,111,109,112,97,114,97,116,105,118,101,108
+,121,100,101,115,99,114,105,98,101,100,32,116,104,101,115,117,99,104,32,97,115,
+32,116,104,111,115,101,116,104,101,32,114,101,115,117,108,116,105,110,103,105,
+115,32,105,109,112,111,115,115,105,98,108,101,118,97,114,105,111,117,115,32,111,
+116,104,101,114,83,111,117,116,104,32,65,102,114,105,99,97,110,104,97,118,101,32
+,116,104,101,32,115,97,109,101,101,102,102,101,99,116,105,118,101,110,101,115,
+115,105,110,32,119,104,105,99,104,32,99,97,115,101,59,32,116,101,120,116,45,97,
+108,105,103,110,58,115,116,114,117,99,116,117,114,101,32,97,110,100,59,32,98,97,
+99,107,103,114,111,117,110,100,58,114,101,103,97,114,100,105,110,103,32,116,104,
+101,115,117,112,112,111,114,116,101,100,32,116,104,101,105,115,32,97,108,115,111
+,32,107,110,111,119,110,115,116,121,108,101,61,34,109,97,114,103,105,110,105,110
+,99,108,117,100,105,110,103,32,116,104,101,98,97,104,97,115,97,32,77,101,108,97,
+121,117,110,111,114,115,107,32,98,111,107,109,195,165,108,110,111,114,115,107,32
+,110,121,110,111,114,115,107,115,108,111,118,101,110,197,161,196,141,105,110,97,
+105,110,116,101,114,110,97,99,105,111,110,97,108,99,97,108,105,102,105,99,97,99,
+105,195,179,110,99,111,109,117,110,105,99,97,99,105,195,179,110,99,111,110,115,
+116,114,117,99,99,105,195,179,110,34,62,60,100,105,118,32,99,108,97,115,115,61,
+34,100,105,115,97,109,98,105,103,117,97,116,105,111,110,68,111,109,97,105,110,78
+,97,109,101,39,44,32,39,97,100,109,105,110,105,115,116,114,97,116,105,111,110,
+115,105,109,117,108,116,97,110,101,111,117,115,108,121,116,114,97,110,115,112,
+111,114,116,97,116,105,111,110,73,110,116,101,114,110,97,116,105,111,110,97,108,
+32,109,97,114,103,105,110,45,98,111,116,116,111,109,58,114,101,115,112,111,110,
+115,105,98,105,108,105,116,121,60,33,91,101,110,100,105,102,93,45,45,62,10,60,47
+,62,60,109,101,116,97,32,110,97,109,101,61,34,105,109,112,108,101,109,101,110,
+116,97,116,105,111,110,105,110,102,114,97,115,116,114,117,99,116,117,114,101,114
+,101,112,114,101,115,101,110,116,97,116,105,111,110,98,111,114,100,101,114,45,98
+,111,116,116,111,109,58,60,47,104,101,97,100,62,10,60,98,111,100,121,62,61,104,
+116,116,112,37,51,65,37,50,70,37,50,70,60,102,111,114,109,32,109,101,116,104,111
+,100,61,34,109,101,116,104,111,100,61,34,112,111,115,116,34,32,47,102,97,118,105
+,99,111,110,46,105,99,111,34,32,125,41,59,10,60,47,115,99,114,105,112,116,62,10,
+46,115,101,116,65,116,116,114,105,98,117,116,101,40,65,100,109,105,110,105,115,
+116,114,97,116,105,111,110,61,32,110,101,119,32,65,114,114,97,121,40,41,59,60,33
+,91,101,110,100,105,102,93,45,45,62,13,10,100,105,115,112,108,97,121,58,98,108,
+111,99,107,59,85,110,102,111,114,116,117,110,97,116,101,108,121,44,34,62,38,110,
+98,115,112,59,60,47,100,105,118,62,47,102,97,118,105,99,111,110,46,105,99,111,34
+,62,61,39,115,116,121,108,101,115,104,101,101,116,39,32,105,100,101,110,116,105,
+102,105,99,97,116,105,111,110,44,32,102,111,114,32,101,120,97,109,112,108,101,44
+,60,108,105,62,60,97,32,104,114,101,102,61,34,47,97,110,32,97,108,116,101,114,
+110,97,116,105,118,101,97,115,32,97,32,114,101,115,117,108,116,32,111,102,112,
+116,34,62,60,47,115,99,114,105,112,116,62,10,116,121,112,101,61,34,115,117,98,
+109,105,116,34,32,10,40,102,117,110,99,116,105,111,110,40,41,32,123,114,101,99,
+111,109,109,101,110,100,97,116,105,111,110,102,111,114,109,32,97,99,116,105,111,
+110,61,34,47,116,114,97,110,115,102,111,114,109,97,116,105,111,110,114,101,99,
+111,110,115,116,114,117,99,116,105,111,110,46,115,116,121,108,101,46,100,105,115
+,112,108,97,121,32,65,99,99,111,114,100,105,110,103,32,116,111,32,104,105,100,
+100,101,110,34,32,110,97,109,101,61,34,97,108,111,110,103,32,119,105,116,104,32,
+116,104,101,100,111,99,117,109,101,110,116,46,98,111,100,121,46,97,112,112,114,
+111,120,105,109,97,116,101,108,121,32,67,111,109,109,117,110,105,99,97,116,105,
+111,110,115,112,111,115,116,34,32,97,99,116,105,111,110,61,34,109,101,97,110,105
+,110,103,32,38,113,117,111,116,59,45,45,60,33,91,101,110,100,105,102,93,45,45,62
+,80,114,105,109,101,32,77,105,110,105,115,116,101,114,99,104,97,114,97,99,116,
+101,114,105,115,116,105,99,60,47,97,62,32,60,97,32,99,108,97,115,115,61,116,104,
+101,32,104,105,115,116,111,114,121,32,111,102,32,111,110,109,111,117,115,101,111
+,118,101,114,61,34,116,104,101,32,103,111,118,101,114,110,109,101,110,116,104,
+114,101,102,61,34,104,116,116,112,115,58,47,47,119,97,115,32,111,114,105,103,105
+,110,97,108,108,121,119,97,115,32,105,110,116,114,111,100,117,99,101,100,99,108,
+97,115,115,105,102,105,99,97,116,105,111,110,114,101,112,114,101,115,101,110,116
+,97,116,105,118,101,97,114,101,32,99,111,110,115,105,100,101,114,101,100,60,33,
+91,101,110,100,105,102,93,45,45,62,10,10,100,101,112,101,110,100,115,32,111,110,
+32,116,104,101,85,110,105,118,101,114,115,105,116,121,32,111,102,32,105,110,32,
+99,111,110,116,114,97,115,116,32,116,111,32,112,108,97,99,101,104,111,108,100,
+101,114,61,34,105,110,32,116,104,101,32,99,97,115,101,32,111,102,105,110,116,101
+,114,110,97,116,105,111,110,97,108,32,99,111,110,115,116,105,116,117,116,105,111
+,110,97,108,115,116,121,108,101,61,34,98,111,114,100,101,114,45,58,32,102,117,
+110,99,116,105,111,110,40,41,32,123,66,101,99,97,117,115,101,32,111,102,32,116,
+104,101,45,115,116,114,105,99,116,46,100,116,100,34,62,10,60,116,97,98,108,101,
+32,99,108,97,115,115,61,34,97,99,99,111,109,112,97,110,105,101,100,32,98,121,97,
+99,99,111,117,110,116,32,111,102,32,116,104,101,60,115,99,114,105,112,116,32,115
+,114,99,61,34,47,110,97,116,117,114,101,32,111,102,32,116,104,101,32,116,104,101
+,32,112,101,111,112,108,101,32,105,110,32,105,110,32,97,100,100,105,116,105,111,
+110,32,116,111,115,41,59,32,106,115,46,105,100,32,61,32,105,100,34,32,119,105,
+100,116,104,61,34,49,48,48,37,34,114,101,103,97,114,100,105,110,103,32,116,104,
+101,32,82,111,109,97,110,32,67,97,116,104,111,108,105,99,97,110,32,105,110,100,
+101,112,101,110,100,101,110,116,102,111,108,108,111,119,105,110,103,32,116,104,
+101,32,46,103,105,102,34,32,119,105,100,116,104,61,34,49,116,104,101,32,102,111,
+108,108,111,119,105,110,103,32,100,105,115,99,114,105,109,105,110,97,116,105,111
+,110,97,114,99,104,97,101,111,108,111,103,105,99,97,108,112,114,105,109,101,32,
+109,105,110,105,115,116,101,114,46,106,115,34,62,60,47,115,99,114,105,112,116,62
+,99,111,109,98,105,110,97,116,105,111,110,32,111,102,32,109,97,114,103,105,110,
+119,105,100,116,104,61,34,99,114,101,97,116,101,69,108,101,109,101,110,116,40,
+119,46,97,116,116,97,99,104,69,118,101,110,116,40,60,47,97,62,60,47,116,100,62,
+60,47,116,114,62,115,114,99,61,34,104,116,116,112,115,58,47,47,97,73,110,32,112,
+97,114,116,105,99,117,108,97,114,44,32,97,108,105,103,110,61,34,108,101,102,116,
+34,32,67,122,101,99,104,32,82,101,112,117,98,108,105,99,85,110,105,116,101,100,
+32,75,105,110,103,100,111,109,99,111,114,114,101,115,112,111,110,100,101,110,99,
+101,99,111,110,99,108,117,100,101,100,32,116,104,97,116,46,104,116,109,108,34,32
+,116,105,116,108,101,61,34,40,102,117,110,99,116,105,111,110,32,40,41,32,123,99,
+111,109,101,115,32,102,114,111,109,32,116,104,101,97,112,112,108,105,99,97,116,
+105,111,110,32,111,102,60,115,112,97,110,32,99,108,97,115,115,61,34,115,98,101,
+108,105,101,118,101,100,32,116,111,32,98,101,101,109,101,110,116,40,39,115,99,
+114,105,112,116,39,60,47,97,62,10,60,47,108,105,62,10,60,108,105,118,101,114,121
+,32,100,105,102,102,101,114,101,110,116,62,60,115,112,97,110,32,99,108,97,115,
+115,61,34,111,112,116,105,111,110,32,118,97,108,117,101,61,34,40,97,108,115,111,
+32,107,110,111,119,110,32,97,115,9,60,108,105,62,60,97,32,104,114,101,102,61,34,
+62,60,105,110,112,117,116,32,110,97,109,101,61,34,115,101,112,97,114,97,116,101,
+100,32,102,114,111,109,114,101,102,101,114,114,101,100,32,116,111,32,97,115,32,
+118,97,108,105,103,110,61,34,116,111,112,34,62,102,111,117,110,100,101,114,32,
+111,102,32,116,104,101,97,116,116,101,109,112,116,105,110,103,32,116,111,32,99,
+97,114,98,111,110,32,100,105,111,120,105,100,101,10,10,60,100,105,118,32,99,108,
+97,115,115,61,34,99,108,97,115,115,61,34,115,101,97,114,99,104,45,47,98,111,100,
+121,62,10,60,47,104,116,109,108,62,111,112,112,111,114,116,117,110,105,116,121,
+32,116,111,99,111,109,109,117,110,105,99,97,116,105,111,110,115,60,47,104,101,97
+,100,62,13,10,60,98,111,100,121,32,115,116,121,108,101,61,34,119,105,100,116,104
+,58,84,105,225,186,191,110,103,32,86,105,225,187,135,116,99,104,97,110,103,101,
+115,32,105,110,32,116,104,101,98,111,114,100,101,114,45,99,111,108,111,114,58,35
+,48,34,32,98,111,114,100,101,114,61,34,48,34,32,60,47,115,112,97,110,62,60,47,
+100,105,118,62,60,119,97,115,32,100,105,115,99,111,118,101,114,101,100,34,32,116
+,121,112,101,61,34,116,101,120,116,34,32,41,59,10,60,47,115,99,114,105,112,116,
+62,10,10,68,101,112,97,114,116,109,101,110,116,32,111,102,32,101,99,99,108,101,
+115,105,97,115,116,105,99,97,108,116,104,101,114,101,32,104,97,115,32,98,101,101
+,110,114,101,115,117,108,116,105,110,103,32,102,114,111,109,60,47,98,111,100,121
+,62,60,47,104,116,109,108,62,104,97,115,32,110,101,118,101,114,32,98,101,101,110
+,116,104,101,32,102,105,114,115,116,32,116,105,109,101,105,110,32,114,101,115,
+112,111,110,115,101,32,116,111,97,117,116,111,109,97,116,105,99,97,108,108,121,
+32,60,47,100,105,118,62,10,10,60,100,105,118,32,105,119,97,115,32,99,111,110,115
+,105,100,101,114,101,100,112,101,114,99,101,110,116,32,111,102,32,116,104,101,34
+,32,47,62,60,47,97,62,60,47,100,105,118,62,99,111,108,108,101,99,116,105,111,110
+,32,111,102,32,100,101,115,99,101,110,100,101,100,32,102,114,111,109,115,101,99,
+116,105,111,110,32,111,102,32,116,104,101,97,99,99,101,112,116,45,99,104,97,114,
+115,101,116,116,111,32,98,101,32,99,111,110,102,117,115,101,100,109,101,109,98,
+101,114,32,111,102,32,116,104,101,32,112,97,100,100,105,110,103,45,114,105,103,
+104,116,58,116,114,97,110,115,108,97,116,105,111,110,32,111,102,105,110,116,101,
+114,112,114,101,116,97,116,105,111,110,32,104,114,101,102,61,39,104,116,116,112,
+58,47,47,119,104,101,116,104,101,114,32,111,114,32,110,111,116,84,104,101,114,
+101,32,97,114,101,32,97,108,115,111,116,104,101,114,101,32,97,114,101,32,109,97,
+110,121,97,32,115,109,97,108,108,32,110,117,109,98,101,114,111,116,104,101,114,
+32,112,97,114,116,115,32,111,102,105,109,112,111,115,115,105,98,108,101,32,116,
+111,32,32,99,108,97,115,115,61,34,98,117,116,116,111,110,108,111,99,97,116,101,
+100,32,105,110,32,116,104,101,46,32,72,111,119,101,118,101,114,44,32,116,104,101
+,97,110,100,32,101,118,101,110,116,117,97,108,108,121,65,116,32,116,104,101,32,
+101,110,100,32,111,102,32,98,101,99,97,117,115,101,32,111,102,32,105,116,115,114
+,101,112,114,101,115,101,110,116,115,32,116,104,101,60,102,111,114,109,32,97,99,
+116,105,111,110,61,34,32,109,101,116,104,111,100,61,34,112,111,115,116,34,105,
+116,32,105,115,32,112,111,115,115,105,98,108,101,109,111,114,101,32,108,105,107,
+101,108,121,32,116,111,97,110,32,105,110,99,114,101,97,115,101,32,105,110,104,97
+,118,101,32,97,108,115,111,32,98,101,101,110,99,111,114,114,101,115,112,111,110,
+100,115,32,116,111,97,110,110,111,117,110,99,101,100,32,116,104,97,116,97,108,
+105,103,110,61,34,114,105,103,104,116,34,62,109,97,110,121,32,99,111,117,110,116
+,114,105,101,115,102,111,114,32,109,97,110,121,32,121,101,97,114,115,101,97,114,
+108,105,101,115,116,32,107,110,111,119,110,98,101,99,97,117,115,101,32,105,116,
+32,119,97,115,112,116,34,62,60,47,115,99,114,105,112,116,62,13,32,118,97,108,105
+,103,110,61,34,116,111,112,34,32,105,110,104,97,98,105,116,97,110,116,115,32,111
+,102,102,111,108,108,111,119,105,110,103,32,121,101,97,114,13,10,60,100,105,118,
+32,99,108,97,115,115,61,34,109,105,108,108,105,111,110,32,112,101,111,112,108,
+101,99,111,110,116,114,111,118,101,114,115,105,97,108,32,99,111,110,99,101,114,
+110,105,110,103,32,116,104,101,97,114,103,117,101,32,116,104,97,116,32,116,104,
+101,103,111,118,101,114,110,109,101,110,116,32,97,110,100,97,32,114,101,102,101,
+114,101,110,99,101,32,116,111,116,114,97,110,115,102,101,114,114,101,100,32,116,
+111,100,101,115,99,114,105,98,105,110,103,32,116,104,101,32,115,116,121,108,101,
+61,34,99,111,108,111,114,58,97,108,116,104,111,117,103,104,32,116,104,101,114,
+101,98,101,115,116,32,107,110,111,119,110,32,102,111,114,115,117,98,109,105,116,
+34,32,110,97,109,101,61,34,109,117,108,116,105,112,108,105,99,97,116,105,111,110
+,109,111,114,101,32,116,104,97,110,32,111,110,101,32,114,101,99,111,103,110,105,
+116,105,111,110,32,111,102,67,111,117,110,99,105,108,32,111,102,32,116,104,101,
+101,100,105,116,105,111,110,32,111,102,32,116,104,101,32,32,60,109,101,116,97,32
+,110,97,109,101,61,34,69,110,116,101,114,116,97,105,110,109,101,110,116,32,97,
+119,97,121,32,102,114,111,109,32,116,104,101,32,59,109,97,114,103,105,110,45,114
+,105,103,104,116,58,97,116,32,116,104,101,32,116,105,109,101,32,111,102,105,110,
+118,101,115,116,105,103,97,116,105,111,110,115,99,111,110,110,101,99,116,101,100
+,32,119,105,116,104,97,110,100,32,109,97,110,121,32,111,116,104,101,114,97,108,
+116,104,111,117,103,104,32,105,116,32,105,115,98,101,103,105,110,110,105,110,103
+,32,119,105,116,104,32,60,115,112,97,110,32,99,108,97,115,115,61,34,100,101,115,
+99,101,110,100,97,110,116,115,32,111,102,60,115,112,97,110,32,99,108,97,115,115,
+61,34,105,32,97,108,105,103,110,61,34,114,105,103,104,116,34,60,47,104,101,97,
+100,62,10,60,98,111,100,121,32,97,115,112,101,99,116,115,32,111,102,32,116,104,
+101,104,97,115,32,115,105,110,99,101,32,98,101,101,110,69,117,114,111,112,101,97
+,110,32,85,110,105,111,110,114,101,109,105,110,105,115,99,101,110,116,32,111,102
+,109,111,114,101,32,100,105,102,102,105,99,117,108,116,86,105,99,101,32,80,114,
+101,115,105,100,101,110,116,99,111,109,112,111,115,105,116,105,111,110,32,111,
+102,112,97,115,115,101,100,32,116,104,114,111,117,103,104,109,111,114,101,32,105
+,109,112,111,114,116,97,110,116,102,111,110,116,45,115,105,122,101,58,49,49,112,
+120,101,120,112,108,97,110,97,116,105,111,110,32,111,102,116,104,101,32,99,111,
+110,99,101,112,116,32,111,102,119,114,105,116,116,101,110,32,105,110,32,116,104,
+101,9,60,115,112,97,110,32,99,108,97,115,115,61,34,105,115,32,111,110,101,32,111
+,102,32,116,104,101,32,114,101,115,101,109,98,108,97,110,99,101,32,116,111,111,
+110,32,116,104,101,32,103,114,111,117,110,100,115,119,104,105,99,104,32,99,111,
+110,116,97,105,110,115,105,110,99,108,117,100,105,110,103,32,116,104,101,32,100,
+101,102,105,110,101,100,32,98,121,32,116,104,101,112,117,98,108,105,99,97,116,
+105,111,110,32,111,102,109,101,97,110,115,32,116,104,97,116,32,116,104,101,111,
+117,116,115,105,100,101,32,111,102,32,116,104,101,115,117,112,112,111,114,116,32
+,111,102,32,116,104,101,60,105,110,112,117,116,32,99,108,97,115,115,61,34,60,115
+,112,97,110,32,99,108,97,115,115,61,34,116,40,77,97,116,104,46,114,97,110,100,
+111,109,40,41,109,111,115,116,32,112,114,111,109,105,110,101,110,116,100,101,115
+,99,114,105,112,116,105,111,110,32,111,102,67,111,110,115,116,97,110,116,105,110
+,111,112,108,101,119,101,114,101,32,112,117,98,108,105,115,104,101,100,60,100,
+105,118,32,99,108,97,115,115,61,34,115,101,97,112,112,101,97,114,115,32,105,110,
+32,116,104,101,49,34,32,104,101,105,103,104,116,61,34,49,34,32,109,111,115,116,
+32,105,109,112,111,114,116,97,110,116,119,104,105,99,104,32,105,110,99,108,117,
+100,101,115,119,104,105,99,104,32,104,97,100,32,98,101,101,110,100,101,115,116,
+114,117,99,116,105,111,110,32,111,102,116,104,101,32,112,111,112,117,108,97,116,
+105,111,110,10,9,60,100,105,118,32,99,108,97,115,115,61,34,112,111,115,115,105,
+98,105,108,105,116,121,32,111,102,115,111,109,101,116,105,109,101,115,32,117,115
+,101,100,97,112,112,101,97,114,32,116,111,32,104,97,118,101,115,117,99,99,101,
+115,115,32,111,102,32,116,104,101,105,110,116,101,110,100,101,100,32,116,111,32,
+98,101,112,114,101,115,101,110,116,32,105,110,32,116,104,101,115,116,121,108,101
+,61,34,99,108,101,97,114,58,98,13,10,60,47,115,99,114,105,112,116,62,13,10,60,
+119,97,115,32,102,111,117,110,100,101,100,32,105,110,105,110,116,101,114,118,105
+,101,119,32,119,105,116,104,95,105,100,34,32,99,111,110,116,101,110,116,61,34,99
+,97,112,105,116,97,108,32,111,102,32,116,104,101,13,10,60,108,105,110,107,32,114
+,101,108,61,34,115,114,101,108,101,97,115,101,32,111,102,32,116,104,101,112,111,
+105,110,116,32,111,117,116,32,116,104,97,116,120,77,76,72,116,116,112,82,101,113
+,117,101,115,116,97,110,100,32,115,117,98,115,101,113,117,101,110,116,115,101,99
+,111,110,100,32,108,97,114,103,101,115,116,118,101,114,121,32,105,109,112,111,
+114,116,97,110,116,115,112,101,99,105,102,105,99,97,116,105,111,110,115,115,117,
+114,102,97,99,101,32,111,102,32,116,104,101,97,112,112,108,105,101,100,32,116,
+111,32,116,104,101,102,111,114,101,105,103,110,32,112,111,108,105,99,121,95,115,
+101,116,68,111,109,97,105,110,78,97,109,101,101,115,116,97,98,108,105,115,104,
+101,100,32,105,110,105,115,32,98,101,108,105,101,118,101,100,32,116,111,73,110,
+32,97,100,100,105,116,105,111,110,32,116,111,109,101,97,110,105,110,103,32,111,
+102,32,116,104,101,105,115,32,110,97,109,101,100,32,97,102,116,101,114,116,111,
+32,112,114,111,116,101,99,116,32,116,104,101,105,115,32,114,101,112,114,101,115,
+101,110,116,101,100,68,101,99,108,97,114,97,116,105,111,110,32,111,102,109,111,
+114,101,32,101,102,102,105,99,105,101,110,116,67,108,97,115,115,105,102,105,99,
+97,116,105,111,110,111,116,104,101,114,32,102,111,114,109,115,32,111,102,104,101
+,32,114,101,116,117,114,110,101,100,32,116,111,60,115,112,97,110,32,99,108,97,
+115,115,61,34,99,112,101,114,102,111,114,109,97,110,99,101,32,111,102,40,102,117
+,110,99,116,105,111,110,40,41,32,123,13,105,102,32,97,110,100,32,111,110,108,121
+,32,105,102,114,101,103,105,111,110,115,32,111,102,32,116,104,101,108,101,97,100
+,105,110,103,32,116,111,32,116,104,101,114,101,108,97,116,105,111,110,115,32,119
+,105,116,104,85,110,105,116,101,100,32,78,97,116,105,111,110,115,115,116,121,108
+,101,61,34,104,101,105,103,104,116,58,111,116,104,101,114,32,116,104,97,110,32,
+116,104,101,121,112,101,34,32,99,111,110,116,101,110,116,61,34,65,115,115,111,99
+,105,97,116,105,111,110,32,111,102,10,60,47,104,101,97,100,62,10,60,98,111,100,
+121,108,111,99,97,116,101,100,32,111,110,32,116,104,101,105,115,32,114,101,102,
+101,114,114,101,100,32,116,111,40,105,110,99,108,117,100,105,110,103,32,116,104,
+101,99,111,110,99,101,110,116,114,97,116,105,111,110,115,116,104,101,32,105,110,
+100,105,118,105,100,117,97,108,97,109,111,110,103,32,116,104,101,32,109,111,115,
+116,116,104,97,110,32,97,110,121,32,111,116,104,101,114,47,62,10,60,108,105,110,
+107,32,114,101,108,61,34,32,114,101,116,117,114,110,32,102,97,108,115,101,59,116
+,104,101,32,112,117,114,112,111,115,101,32,111,102,116,104,101,32,97,98,105,108,
+105,116,121,32,116,111,59,99,111,108,111,114,58,35,102,102,102,125,10,46,10,60,
+115,112,97,110,32,99,108,97,115,115,61,34,116,104,101,32,115,117,98,106,101,99,
+116,32,111,102,100,101,102,105,110,105,116,105,111,110,115,32,111,102,62,13,10,
+60,108,105,110,107,32,114,101,108,61,34,99,108,97,105,109,32,116,104,97,116,32,
+116,104,101,104,97,118,101,32,100,101,118,101,108,111,112,101,100,60,116,97,98,
+108,101,32,119,105,100,116,104,61,34,99,101,108,101,98,114,97,116,105,111,110,32
+,111,102,70,111,108,108,111,119,105,110,103,32,116,104,101,32,116,111,32,100,105
+,115,116,105,110,103,117,105,115,104,60,115,112,97,110,32,99,108,97,115,115,61,
+34,98,116,97,107,101,115,32,112,108,97,99,101,32,105,110,117,110,100,101,114,32,
+116,104,101,32,110,97,109,101,110,111,116,101,100,32,116,104,97,116,32,116,104,
+101,62,60,33,91,101,110,100,105,102,93,45,45,62,10,115,116,121,108,101,61,34,109
+,97,114,103,105,110,45,105,110,115,116,101,97,100,32,111,102,32,116,104,101,105,
+110,116,114,111,100,117,99,101,100,32,116,104,101,116,104,101,32,112,114,111,99,
+101,115,115,32,111,102,105,110,99,114,101,97,115,105,110,103,32,116,104,101,100,
+105,102,102,101,114,101,110,99,101,115,32,105,110,101,115,116,105,109,97,116,101
+,100,32,116,104,97,116,101,115,112,101,99,105,97,108,108,121,32,116,104,101,47,
+100,105,118,62,60,100,105,118,32,105,100,61,34,119,97,115,32,101,118,101,110,116
+,117,97,108,108,121,116,104,114,111,117,103,104,111,117,116,32,104,105,115,116,
+104,101,32,100,105,102,102,101,114,101,110,99,101,115,111,109,101,116,104,105,
+110,103,32,116,104,97,116,115,112,97,110,62,60,47,115,112,97,110,62,60,47,115,
+105,103,110,105,102,105,99,97,110,116,108,121,32,62,60,47,115,99,114,105,112,116
+,62,13,10,13,10,101,110,118,105,114,111,110,109,101,110,116,97,108,32,116,111,32
+,112,114,101,118,101,110,116,32,116,104,101,104,97,118,101,32,98,101,101,110,32,
+117,115,101,100,101,115,112,101,99,105,97,108,108,121,32,102,111,114,117,110,100
+,101,114,115,116,97,110,100,32,116,104,101,105,115,32,101,115,115,101,110,116,
+105,97,108,108,121,119,101,114,101,32,116,104,101,32,102,105,114,115,116,105,115
+,32,116,104,101,32,108,97,114,103,101,115,116,104,97,118,101,32,98,101,101,110,
+32,109,97,100,101,34,32,115,114,99,61,34,104,116,116,112,58,47,47,105,110,116,
+101,114,112,114,101,116,101,100,32,97,115,115,101,99,111,110,100,32,104,97,108,
+102,32,111,102,99,114,111,108,108,105,110,103,61,34,110,111,34,32,105,115,32,99,
+111,109,112,111,115,101,100,32,111,102,73,73,44,32,72,111,108,121,32,82,111,109,
+97,110,105,115,32,101,120,112,101,99,116,101,100,32,116,111,104,97,118,101,32,
+116,104,101,105,114,32,111,119,110,100,101,102,105,110,101,100,32,97,115,32,116,
+104,101,116,114,97,100,105,116,105,111,110,97,108,108,121,32,104,97,118,101,32,
+100,105,102,102,101,114,101,110,116,97,114,101,32,111,102,116,101,110,32,117,115
+,101,100,116,111,32,101,110,115,117,114,101,32,116,104,97,116,97,103,114,101,101
+,109,101,110,116,32,119,105,116,104,99,111,110,116,97,105,110,105,110,103,32,116
+,104,101,97,114,101,32,102,114,101,113,117,101,110,116,108,121,105,110,102,111,
+114,109,97,116,105,111,110,32,111,110,101,120,97,109,112,108,101,32,105,115,32,
+116,104,101,114,101,115,117,108,116,105,110,103,32,105,110,32,97,60,47,97,62,60,
+47,108,105,62,60,47,117,108,62,32,99,108,97,115,115,61,34,102,111,111,116,101,
+114,97,110,100,32,101,115,112,101,99,105,97,108,108,121,116,121,112,101,61,34,98
+,117,116,116,111,110,34,32,60,47,115,112,97,110,62,60,47,115,112,97,110,62,119,
+104,105,99,104,32,105,110,99,108,117,100,101,100,62,10,60,109,101,116,97,32,110,
+97,109,101,61,34,99,111,110,115,105,100,101,114,101,100,32,116,104,101,99,97,114
+,114,105,101,100,32,111,117,116,32,98,121,72,111,119,101,118,101,114,44,32,105,
+116,32,105,115,98,101,99,97,109,101,32,112,97,114,116,32,111,102,105,110,32,114,
+101,108,97,116,105,111,110,32,116,111,112,111,112,117,108,97,114,32,105,110,32,
+116,104,101,116,104,101,32,99,97,112,105,116,97,108,32,111,102,119,97,115,32,111
+,102,102,105,99,105,97,108,108,121,119,104,105,99,104,32,104,97,115,32,98,101,
+101,110,116,104,101,32,72,105,115,116,111,114,121,32,111,102,97,108,116,101,114,
+110,97,116,105,118,101,32,116,111,100,105,102,102,101,114,101,110,116,32,102,114
+,111,109,116,111,32,115,117,112,112,111,114,116,32,116,104,101,115,117,103,103,
+101,115,116,101,100,32,116,104,97,116,105,110,32,116,104,101,32,112,114,111,99,
+101,115,115,32,32,60,100,105,118,32,99,108,97,115,115,61,34,116,104,101,32,102,
+111,117,110,100,97,116,105,111,110,98,101,99,97,117,115,101,32,111,102,32,104,
+105,115,99,111,110,99,101,114,110,101,100,32,119,105,116,104,116,104,101,32,117,
+110,105,118,101,114,115,105,116,121,111,112,112,111,115,101,100,32,116,111,32,
+116,104,101,116,104,101,32,99,111,110,116,101,120,116,32,111,102,60,115,112,97,
+110,32,99,108,97,115,115,61,34,112,116,101,120,116,34,32,110,97,109,101,61,34,
+113,34,9,9,60,100,105,118,32,99,108,97,115,115,61,34,116,104,101,32,115,99,105,
+101,110,116,105,102,105,99,114,101,112,114,101,115,101,110,116,101,100,32,98,121
+,109,97,116,104,101,109,97,116,105,99,105,97,110,115,101,108,101,99,116,101,100,
+32,98,121,32,116,104,101,116,104,97,116,32,104,97,118,101,32,98,101,101,110,62,
+60,100,105,118,32,99,108,97,115,115,61,34,99,100,105,118,32,105,100,61,34,104,
+101,97,100,101,114,105,110,32,112,97,114,116,105,99,117,108,97,114,44,99,111,110
+,118,101,114,116,101,100,32,105,110,116,111,41,59,10,60,47,115,99,114,105,112,
+116,62,10,60,112,104,105,108,111,115,111,112,104,105,99,97,108,32,115,114,112,
+115,107,111,104,114,118,97,116,115,107,105,116,105,225,186,191,110,103,32,86,105
+,225,187,135,116,208,160,209,131,209,129,209,129,208,186,208,184,208,185,209,128
+,209,131,209,129,209,129,208,186,208,184,208,185,105,110,118,101,115,116,105,103
+,97,99,105,195,179,110,112,97,114,116,105,99,105,112,97,99,105,195,179,110,208,
+186,208,190,209,130,208,190,209,128,209,139,208,181,208,190,208,177,208,187,208,
+176,209,129,209,130,208,184,208,186,208,190,209,130,208,190,209,128,209,139,208,
+185,209,135,208,181,208,187,208,190,208,178,208,181,208,186,209,129,208,184,209,
+129,209,130,208,181,208,188,209,139,208,157,208,190,208,178,208,190,209,129,209,
+130,208,184,208,186,208,190,209,130,208,190,209,128,209,139,209,133,208,190,208,
+177,208,187,208,176,209,129,209,130,209,140,208,178,209,128,208,181,208,188,208,
+181,208,189,208,184,208,186,208,190,209,130,208,190,209,128,208,176,209,143,209,
+129,208,181,208,179,208,190,208,180,208,189,209,143,209,129,208,186,208,176,209,
+135,208,176,209,130,209,140,208,189,208,190,208,178,208,190,209,129,209,130,208,
+184,208,163,208,186,209,128,208,176,208,184,208,189,209,139,208,178,208,190,208,
+191,209,128,208,190,209,129,209,139,208,186,208,190,209,130,208,190,209,128,208,
+190,208,185,209,129,208,180,208,181,208,187,208,176,209,130,209,140,208,191,208,
+190,208,188,208,190,209,137,209,140,209,142,209,129,209,128,208,181,208,180,209,
+129,209,130,208,178,208,190,208,177,209,128,208,176,208,183,208,190,208,188,209,
+129,209,130,208,190,209,128,208,190,208,189,209,139,209,131,209,135,208,176,209,
+129,209,130,208,184,208,181,209,130,208,181,209,135,208,181,208,189,208,184,208,
+181,208,147,208,187,208,176,208,178,208,189,208,176,209,143,208,184,209,129,209,
+130,208,190,209,128,208,184,208,184,209,129,208,184,209,129,209,130,208,181,208,
+188,208,176,209,128,208,181,209,136,208,181,208,189,208,184,209,143,208,161,208,
+186,208,176,209,135,208,176,209,130,209,140,208,191,208,190,209,141,209,130,208,
+190,208,188,209,131,209,129,208,187,208,181,208,180,209,131,208,181,209,130,209,
+129,208,186,208,176,208,183,208,176,209,130,209,140,209,130,208,190,208,178,208,
+176,209,128,208,190,208,178,208,186,208,190,208,189,208,181,209,135,208,189,208,
+190,209,128,208,181,209,136,208,181,208,189,208,184,208,181,208,186,208,190,209,
+130,208,190,209,128,208,190,208,181,208,190,209,128,208,179,208,176,208,189,208,
+190,208,178,208,186,208,190,209,130,208,190,209,128,208,190,208,188,208,160,208,
+181,208,186,208,187,208,176,208,188,208,176,216,167,217,132,217,133,217,134,216,
+170,216,175,217,137,217,133,217,134,216,170,216,175,217,138,216,167,216,170,216,
+167,217,132,217,133,217,136,216,182,217,136,216,185,216,167,217,132,216,168,216,
+177,216,167,217,133,216,172,216,167,217,132,217,133,217,136,216,167,217,130,216,
+185,216,167,217,132,216,177,216,179,216,167,216,166,217,132,217,133,216,180,216,
+167,216,177,217,131,216,167,216,170,216,167,217,132,216,163,216,185,216,182,216,
+167,216,161,216,167,217,132,216,177,217,138,216,167,216,182,216,169,216,167,217,
+132,216,170,216,181,217,133,217,138,217,133,216,167,217,132,216,167,216,185,216,
+182,216,167,216,161,216,167,217,132,217,134,216,170,216,167,216,166,216,172,216,
+167,217,132,216,163,217,132,216,185,216,167,216,168,216,167,217,132,216,170,216,
+179,216,172,217,138,217,132,216,167,217,132,216,163,217,130,216,179,216,167,217,
+133,216,167,217,132,216,182,216,186,216,183,216,167,216,170,216,167,217,132,217,
+129,217,138,216,175,217,138,217,136,216,167,217,132,216,170,216,177,216,173,217,
+138,216,168,216,167,217,132,216,172,216,175,217,138,216,175,216,169,216,167,217,
+132,216,170,216,185,217,132,217,138,217,133,216,167,217,132,216,163,216,174,216,
+168,216,167,216,177,216,167,217,132,216,167,217,129,217,132,216,167,217,133,216,
+167,217,132,216,163,217,129,217,132,216,167,217,133,216,167,217,132,216,170,216,
+167,216,177,217,138,216,174,216,167,217,132,216,170,217,130,217,134,217,138,216,
+169,216,167,217,132,216,167,217,132,216,185,216,167,216,168,216,167,217,132,216,
+174,217,136,216,167,216,183,216,177,216,167,217,132,217,133,216,172,216,170,217,
+133,216,185,216,167,217,132,216,175,217,138,217,131,217,136,216,177,216,167,217,
+132,216,179,217,138,216,167,216,173,216,169,216,185,216,168,216,175,216,167,217,
+132,217,132,217,135,216,167,217,132,216,170,216,177,216,168,217,138,216,169,216,
+167,217,132,216,177,217,136,216,167,216,168,216,183,216,167,217,132,216,163,216,
+175,216,168,217,138,216,169,216,167,217,132,216,167,216,174,216,168,216,167,216,
+177,216,167,217,132,217,133,216,170,216,173,216,175,216,169,216,167,217,132,216,
+167,216,186,216,167,217,134,217,138,99,117,114,115,111,114,58,112,111,105,110,
+116,101,114,59,60,47,116,105,116,108,101,62,10,60,109,101,116,97,32,34,32,104,
+114,101,102,61,34,104,116,116,112,58,47,47,34,62,60,115,112,97,110,32,99,108,97,
+115,115,61,34,109,101,109,98,101,114,115,32,111,102,32,116,104,101,32,119,105,
+110,100,111,119,46,108,111,99,97,116,105,111,110,118,101,114,116,105,99,97,108,
+45,97,108,105,103,110,58,47,97,62,32,124,32,60,97,32,104,114,101,102,61,34,60,33
+,100,111,99,116,121,112,101,32,104,116,109,108,62,109,101,100,105,97,61,34,115,
+99,114,101,101,110,34,32,60,111,112,116,105,111,110,32,118,97,108,117,101,61,34,
+102,97,118,105,99,111,110,46,105,99,111,34,32,47,62,10,9,9,60,100,105,118,32,99,
+108,97,115,115,61,34,99,104,97,114,97,99,116,101,114,105,115,116,105,99,115,34,
+32,109,101,116,104,111,100,61,34,103,101,116,34,32,47,98,111,100,121,62,10,60,47
+,104,116,109,108,62,10,115,104,111,114,116,99,117,116,32,105,99,111,110,34,32,
+100,111,99,117,109,101,110,116,46,119,114,105,116,101,40,112,97,100,100,105,110,
+103,45,98,111,116,116,111,109,58,114,101,112,114,101,115,101,110,116,97,116,105,
+118,101,115,115,117,98,109,105,116,34,32,118,97,108,117,101,61,34,97,108,105,103
+,110,61,34,99,101,110,116,101,114,34,32,116,104,114,111,117,103,104,111,117,116,
+32,116,104,101,32,115,99,105,101,110,99,101,32,102,105,99,116,105,111,110,10,32,
+32,60,100,105,118,32,99,108,97,115,115,61,34,115,117,98,109,105,116,34,32,99,108
+,97,115,115,61,34,111,110,101,32,111,102,32,116,104,101,32,109,111,115,116,32,
+118,97,108,105,103,110,61,34,116,111,112,34,62,60,119,97,115,32,101,115,116,97,
+98,108,105,115,104,101,100,41,59,13,10,60,47,115,99,114,105,112,116,62,13,10,114
+,101,116,117,114,110,32,102,97,108,115,101,59,34,62,41,46,115,116,121,108,101,46
+,100,105,115,112,108,97,121,98,101,99,97,117,115,101,32,111,102,32,116,104,101,
+32,100,111,99,117,109,101,110,116,46,99,111,111,107,105,101,60,102,111,114,109,
+32,97,99,116,105,111,110,61,34,47,125,98,111,100,121,123,109,97,114,103,105,110,
+58,48,59,69,110,99,121,99,108,111,112,101,100,105,97,32,111,102,118,101,114,115,
+105,111,110,32,111,102,32,116,104,101,32,46,99,114,101,97,116,101,69,108,101,109
+,101,110,116,40,110,97,109,101,34,32,99,111,110,116,101,110,116,61,34,60,47,100,
+105,118,62,10,60,47,100,105,118,62,10,10,97,100,109,105,110,105,115,116,114,97,
+116,105,118,101,32,60,47,98,111,100,121,62,10,60,47,104,116,109,108,62,104,105,
+115,116,111,114,121,32,111,102,32,116,104,101,32,34,62,60,105,110,112,117,116,32
+,116,121,112,101,61,34,112,111,114,116,105,111,110,32,111,102,32,116,104,101,32,
+97,115,32,112,97,114,116,32,111,102,32,116,104,101,32,38,110,98,115,112,59,60,97
+,32,104,114,101,102,61,34,111,116,104,101,114,32,99,111,117,110,116,114,105,101,
+115,34,62,10,60,100,105,118,32,99,108,97,115,115,61,34,60,47,115,112,97,110,62,
+60,47,115,112,97,110,62,60,73,110,32,111,116,104,101,114,32,119,111,114,100,115,
+44,100,105,115,112,108,97,121,58,32,98,108,111,99,107,59,99,111,110,116,114,111,
+108,32,111,102,32,116,104,101,32,105,110,116,114,111,100,117,99,116,105,111,110,
+32,111,102,47,62,10,60,109,101,116,97,32,110,97,109,101,61,34,97,115,32,119,101,
+108,108,32,97,115,32,116,104,101,32,105,110,32,114,101,99,101,110,116,32,121,101
+,97,114,115,13,10,9,60,100,105,118,32,99,108,97,115,115,61,34,60,47,100,105,118,
+62,10,9,60,47,100,105,118,62,10,105,110,115,112,105,114,101,100,32,98,121,32,116
+,104,101,116,104,101,32,101,110,100,32,111,102,32,116,104,101,32,99,111,109,112,
+97,116,105,98,108,101,32,119,105,116,104,98,101,99,97,109,101,32,107,110,111,119
+,110,32,97,115,32,115,116,121,108,101,61,34,109,97,114,103,105,110,58,46,106,115
+,34,62,60,47,115,99,114,105,112,116,62,60,32,73,110,116,101,114,110,97,116,105,
+111,110,97,108,32,116,104,101,114,101,32,104,97,118,101,32,98,101,101,110,71,101
+,114,109,97,110,32,108,97,110,103,117,97,103,101,32,115,116,121,108,101,61,34,99
+,111,108,111,114,58,35,67,111,109,109,117,110,105,115,116,32,80,97,114,116,121,
+99,111,110,115,105,115,116,101,110,116,32,119,105,116,104,98,111,114,100,101,114
+,61,34,48,34,32,99,101,108,108,32,109,97,114,103,105,110,104,101,105,103,104,116
+,61,34,116,104,101,32,109,97,106,111,114,105,116,121,32,111,102,34,32,97,108,105
+,103,110,61,34,99,101,110,116,101,114,114,101,108,97,116,101,100,32,116,111,32,
+116,104,101,32,109,97,110,121,32,100,105,102,102,101,114,101,110,116,32,79,114,
+116,104,111,100,111,120,32,67,104,117,114,99,104,115,105,109,105,108,97,114,32,
+116,111,32,116,104,101,32,47,62,10,60,108,105,110,107,32,114,101,108,61,34,115,
+119,97,115,32,111,110,101,32,111,102,32,116,104,101,32,117,110,116,105,108,32,
+104,105,115,32,100,101,97,116,104,125,41,40,41,59,10,60,47,115,99,114,105,112,
+116,62,111,116,104,101,114,32,108,97,110,103,117,97,103,101,115,99,111,109,112,
+97,114,101,100,32,116,111,32,116,104,101,112,111,114,116,105,111,110,115,32,111,
+102,32,116,104,101,116,104,101,32,78,101,116,104,101,114,108,97,110,100,115,116,
+104,101,32,109,111,115,116,32,99,111,109,109,111,110,98,97,99,107,103,114,111,
+117,110,100,58,117,114,108,40,97,114,103,117,101,100,32,116,104,97,116,32,116,
+104,101,115,99,114,111,108,108,105,110,103,61,34,110,111,34,32,105,110,99,108,
+117,100,101,100,32,105,110,32,116,104,101,78,111,114,116,104,32,65,109,101,114,
+105,99,97,110,32,116,104,101,32,110,97,109,101,32,111,102,32,116,104,101,105,110
+,116,101,114,112,114,101,116,97,116,105,111,110,115,116,104,101,32,116,114,97,
+100,105,116,105,111,110,97,108,100,101,118,101,108,111,112,109,101,110,116,32,
+111,102,32,102,114,101,113,117,101,110,116,108,121,32,117,115,101,100,97,32,99,
+111,108,108,101,99,116,105,111,110,32,111,102,118,101,114,121,32,115,105,109,105
+,108,97,114,32,116,111,115,117,114,114,111,117,110,100,105,110,103,32,116,104,
+101,101,120,97,109,112,108,101,32,111,102,32,116,104,105,115,97,108,105,103,110,
+61,34,99,101,110,116,101,114,34,62,119,111,117,108,100,32,104,97,118,101,32,98,
+101,101,110,105,109,97,103,101,95,99,97,112,116,105,111,110,32,61,97,116,116,97,
+99,104,101,100,32,116,111,32,116,104,101,115,117,103,103,101,115,116,105,110,103
+,32,116,104,97,116,105,110,32,116,104,101,32,102,111,114,109,32,111,102,32,105,
+110,118,111,108,118,101,100,32,105,110,32,116,104,101,105,115,32,100,101,114,105
+,118,101,100,32,102,114,111,109,110,97,109,101,100,32,97,102,116,101,114,32,116,
+104,101,73,110,116,114,111,100,117,99,116,105,111,110,32,116,111,114,101,115,116
+,114,105,99,116,105,111,110,115,32,111,110,32,115,116,121,108,101,61,34,119,105,
+100,116,104,58,32,99,97,110,32,98,101,32,117,115,101,100,32,116,111,32,116,104,
+101,32,99,114,101,97,116,105,111,110,32,111,102,109,111,115,116,32,105,109,112,
+111,114,116,97,110,116,32,105,110,102,111,114,109,97,116,105,111,110,32,97,110,
+100,114,101,115,117,108,116,101,100,32,105,110,32,116,104,101,99,111,108,108,97,
+112,115,101,32,111,102,32,116,104,101,84,104,105,115,32,109,101,97,110,115,32,
+116,104,97,116,101,108,101,109,101,110,116,115,32,111,102,32,116,104,101,119,97,
+115,32,114,101,112,108,97,99,101,100,32,98,121,97,110,97,108,121,115,105,115,32,
+111,102,32,116,104,101,105,110,115,112,105,114,97,116,105,111,110,32,102,111,114
+,114,101,103,97,114,100,101,100,32,97,115,32,116,104,101,109,111,115,116,32,115,
+117,99,99,101,115,115,102,117,108,107,110,111,119,110,32,97,115,32,38,113,117,
+111,116,59,97,32,99,111,109,112,114,101,104,101,110,115,105,118,101,72,105,115,
+116,111,114,121,32,111,102,32,116,104,101,32,119,101,114,101,32,99,111,110,115,
+105,100,101,114,101,100,114,101,116,117,114,110,101,100,32,116,111,32,116,104,
+101,97,114,101,32,114,101,102,101,114,114,101,100,32,116,111,85,110,115,111,117,
+114,99,101,100,32,105,109,97,103,101,62,10,9,60,100,105,118,32,99,108,97,115,115
+,61,34,99,111,110,115,105,115,116,115,32,111,102,32,116,104,101,115,116,111,112,
+80,114,111,112,97,103,97,116,105,111,110,105,110,116,101,114,101,115,116,32,105,
+110,32,116,104,101,97,118,97,105,108,97,98,105,108,105,116,121,32,111,102,97,112
+,112,101,97,114,115,32,116,111,32,104,97,118,101,101,108,101,99,116,114,111,109,
+97,103,110,101,116,105,99,101,110,97,98,108,101,83,101,114,118,105,99,101,115,40
+,102,117,110,99,116,105,111,110,32,111,102,32,116,104,101,73,116,32,105,115,32,
+105,109,112,111,114,116,97,110,116,60,47,115,99,114,105,112,116,62,60,47,100,105
+,118,62,102,117,110,99,116,105,111,110,40,41,123,118,97,114,32,114,101,108,97,
+116,105,118,101,32,116,111,32,116,104,101,97,115,32,97,32,114,101,115,117,108,
+116,32,111,102,32,116,104,101,32,112,111,115,105,116,105,111,110,32,111,102,70,
+111,114,32,101,120,97,109,112,108,101,44,32,105,110,32,109,101,116,104,111,100,
+61,34,112,111,115,116,34,32,119,97,115,32,102,111,108,108,111,119,101,100,32,98,
+121,38,97,109,112,59,109,100,97,115,104,59,32,116,104,101,116,104,101,32,97,112,
+112,108,105,99,97,116,105,111,110,106,115,34,62,60,47,115,99,114,105,112,116,62,
+13,10,117,108,62,60,47,100,105,118,62,60,47,100,105,118,62,97,102,116,101,114,32
+,116,104,101,32,100,101,97,116,104,119,105,116,104,32,114,101,115,112,101,99,116
+,32,116,111,115,116,121,108,101,61,34,112,97,100,100,105,110,103,58,105,115,32,
+112,97,114,116,105,99,117,108,97,114,108,121,100,105,115,112,108,97,121,58,105,
+110,108,105,110,101,59,32,116,121,112,101,61,34,115,117,98,109,105,116,34,32,105
+,115,32,100,105,118,105,100,101,100,32,105,110,116,111,228,184,173,230,150,135,
+32,40,231,174,128,228,189,147,41,114,101,115,112,111,110,115,97,98,105,108,105,
+100,97,100,97,100,109,105,110,105,115,116,114,97,99,105,195,179,110,105,110,116,
+101,114,110,97,99,105,111,110,97,108,101,115,99,111,114,114,101,115,112,111,110,
+100,105,101,110,116,101,224,164,137,224,164,170,224,164,175,224,165,139,224,164,
+151,224,164,170,224,165,130,224,164,176,224,165,141,224,164,181,224,164,185,224,
+164,174,224,164,190,224,164,176,224,165,135,224,164,178,224,165,139,224,164,151,
+224,165,139,224,164,130,224,164,154,224,165,129,224,164,168,224,164,190,224,164,
+181,224,164,178,224,165,135,224,164,149,224,164,191,224,164,168,224,164,184,224,
+164,176,224,164,149,224,164,190,224,164,176,224,164,170,224,165,129,224,164,178,
+224,164,191,224,164,184,224,164,150,224,165,139,224,164,156,224,165,135,224,164,
+130,224,164,154,224,164,190,224,164,185,224,164,191,224,164,143,224,164,173,224,
+165,135,224,164,156,224,165,135,224,164,130,224,164,182,224,164,190,224,164,174,
+224,164,191,224,164,178,224,164,185,224,164,174,224,164,190,224,164,176,224,165,
+128,224,164,156,224,164,190,224,164,151,224,164,176,224,164,163,224,164,172,224,
+164,168,224,164,190,224,164,168,224,165,135,224,164,149,224,165,129,224,164,174,
+224,164,190,224,164,176,224,164,172,224,165,141,224,164,178,224,165,137,224,164,
+151,224,164,174,224,164,190,224,164,178,224,164,191,224,164,149,224,164,174,224,
+164,185,224,164,191,224,164,178,224,164,190,224,164,170,224,165,131,224,164,183,
+224,165,141,224,164,160,224,164,172,224,164,162,224,164,188,224,164,164,224,165,
+135,224,164,173,224,164,190,224,164,156,224,164,170,224,164,190,224,164,149,224,
+165,141,224,164,178,224,164,191,224,164,149,224,164,159,224,165,141,224,164,176,
+224,165,135,224,164,168,224,164,150,224,164,191,224,164,178,224,164,190,224,164,
+171,224,164,166,224,165,140,224,164,176,224,164,190,224,164,168,224,164,174,224,
+164,190,224,164,174,224,164,178,224,165,135,224,164,174,224,164,164,224,164,166,
+224,164,190,224,164,168,224,164,172,224,164,190,224,164,156,224,164,190,224,164,
+176,224,164,181,224,164,191,224,164,149,224,164,190,224,164,184,224,164,149,224,
+165,141,224,164,175,224,165,139,224,164,130,224,164,154,224,164,190,224,164,185,
+224,164,164,224,165,135,224,164,170,224,164,185,224,165,129,224,164,129,224,164,
+154,224,164,172,224,164,164,224,164,190,224,164,175,224,164,190,224,164,184,224,
+164,130,224,164,181,224,164,190,224,164,166,224,164,166,224,165,135,224,164,150,
+224,164,168,224,165,135,224,164,170,224,164,191,224,164,155,224,164,178,224,165,
+135,224,164,181,224,164,191,224,164,182,224,165,135,224,164,183,224,164,176,224,
+164,190,224,164,156,224,165,141,224,164,175,224,164,137,224,164,164,224,165,141,
+224,164,164,224,164,176,224,164,174,224,165,129,224,164,130,224,164,172,224,164,
+136,224,164,166,224,165,139,224,164,168,224,165,139,224,164,130,224,164,137,224,
+164,170,224,164,149,224,164,176,224,164,163,224,164,170,224,164,162,224,164,188,
+224,165,135,224,164,130,224,164,184,224,165,141,224,164,165,224,164,191,224,164,
+164,224,164,171,224,164,191,224,164,178,224,165,141,224,164,174,224,164,174,224,
+165,129,224,164,150,224,165,141,224,164,175,224,164,133,224,164,154,224,165,141,
+224,164,155,224,164,190,224,164,155,224,165,130,224,164,159,224,164,164,224,165,
+128,224,164,184,224,164,130,224,164,151,224,165,128,224,164,164,224,164,156,224,
+164,190,224,164,143,224,164,151,224,164,190,224,164,181,224,164,191,224,164,173,
+224,164,190,224,164,151,224,164,152,224,164,163,224,165,141,224,164,159,224,165,
+135,224,164,166,224,165,130,224,164,184,224,164,176,224,165,135,224,164,166,224,
+164,191,224,164,168,224,165,139,224,164,130,224,164,185,224,164,164,224,165,141,
+224,164,175,224,164,190,224,164,184,224,165,135,224,164,149,224,165,141,224,164,
+184,224,164,151,224,164,190,224,164,130,224,164,167,224,165,128,224,164,181,224,
+164,191,224,164,182,224,165,141,224,164,181,224,164,176,224,164,190,224,164,164,
+224,165,135,224,164,130,224,164,166,224,165,136,224,164,159,224,165,141,224,164,
+184,224,164,168,224,164,149,224,165,141,224,164,182,224,164,190,224,164,184,224,
+164,190,224,164,174,224,164,168,224,165,135,224,164,133,224,164,166,224,164,190,
+224,164,178,224,164,164,224,164,172,224,164,191,224,164,156,224,164,178,224,165,
+128,224,164,170,224,165,129,224,164,176,224,165,130,224,164,183,224,164,185,224,
+164,191,224,164,130,224,164,166,224,165,128,224,164,174,224,164,191,224,164,164,
+224,165,141,224,164,176,224,164,149,224,164,181,224,164,191,224,164,164,224,164,
+190,224,164,176,224,165,129,224,164,170,224,164,175,224,165,135,224,164,184,224,
+165,141,224,164,165,224,164,190,224,164,168,224,164,149,224,164,176,224,165,139,
+224,164,161,224,164,188,224,164,174,224,165,129,224,164,149,224,165,141,224,164,
+164,224,164,175,224,165,139,224,164,156,224,164,168,224,164,190,224,164,149,224,
+165,131,224,164,170,224,164,175,224,164,190,224,164,170,224,165,139,224,164,184,
+224,165,141,224,164,159,224,164,152,224,164,176,224,165,135,224,164,178,224,165,
+130,224,164,149,224,164,190,224,164,176,224,165,141,224,164,175,224,164,181,224,
+164,191,224,164,154,224,164,190,224,164,176,224,164,184,224,165,130,224,164,154,
+224,164,168,224,164,190,224,164,174,224,165,130,224,164,178,224,165,141,224,164,
+175,224,164,166,224,165,135,224,164,150,224,165,135,224,164,130,224,164,185,224,
+164,174,224,165,135,224,164,182,224,164,190,224,164,184,224,165,141,224,164,149,
+224,165,130,224,164,178,224,164,174,224,165,136,224,164,130,224,164,168,224,165,
+135,224,164,164,224,165,136,224,164,175,224,164,190,224,164,176,224,164,156,224,
+164,191,224,164,184,224,164,149,224,165,135,114,115,115,43,120,109,108,34,32,116
+,105,116,108,101,61,34,45,116,121,112,101,34,32,99,111,110,116,101,110,116,61,34
+,116,105,116,108,101,34,32,99,111,110,116,101,110,116,61,34,97,116,32,116,104,
+101,32,115,97,109,101,32,116,105,109,101,46,106,115,34,62,60,47,115,99,114,105,
+112,116,62,10,60,34,32,109,101,116,104,111,100,61,34,112,111,115,116,34,32,60,47
+,115,112,97,110,62,60,47,97,62,60,47,108,105,62,118,101,114,116,105,99,97,108,45
+,97,108,105,103,110,58,116,47,106,113,117,101,114,121,46,109,105,110,46,106,115,
+34,62,46,99,108,105,99,107,40,102,117,110,99,116,105,111,110,40,32,115,116,121,
+108,101,61,34,112,97,100,100,105,110,103,45,125,41,40,41,59,10,60,47,115,99,114,
+105,112,116,62,10,60,47,115,112,97,110,62,60,97,32,104,114,101,102,61,34,60,97,
+32,104,114,101,102,61,34,104,116,116,112,58,47,47,41,59,32,114,101,116,117,114,
+110,32,102,97,108,115,101,59,116,101,120,116,45,100,101,99,111,114,97,116,105,
+111,110,58,32,115,99,114,111,108,108,105,110,103,61,34,110,111,34,32,98,111,114,
+100,101,114,45,99,111,108,108,97,112,115,101,58,97,115,115,111,99,105,97,116,101
+,100,32,119,105,116,104,32,66,97,104,97,115,97,32,73,110,100,111,110,101,115,105
+,97,69,110,103,108,105,115,104,32,108,97,110,103,117,97,103,101,60,116,101,120,
+116,32,120,109,108,58,115,112,97,99,101,61,46,103,105,102,34,32,98,111,114,100,
+101,114,61,34,48,34,60,47,98,111,100,121,62,10,60,47,104,116,109,108,62,10,111,
+118,101,114,102,108,111,119,58,104,105,100,100,101,110,59,105,109,103,32,115,114
+,99,61,34,104,116,116,112,58,47,47,97,100,100,69,118,101,110,116,76,105,115,116,
+101,110,101,114,114,101,115,112,111,110,115,105,98,108,101,32,102,111,114,32,115
+,46,106,115,34,62,60,47,115,99,114,105,112,116,62,10,47,102,97,118,105,99,111,
+110,46,105,99,111,34,32,47,62,111,112,101,114,97,116,105,110,103,32,115,121,115,
+116,101,109,34,32,115,116,121,108,101,61,34,119,105,100,116,104,58,49,116,97,114
+,103,101,116,61,34,95,98,108,97,110,107,34,62,83,116,97,116,101,32,85,110,105,
+118,101,114,115,105,116,121,116,101,120,116,45,97,108,105,103,110,58,108,101,102
+,116,59,10,100,111,99,117,109,101,110,116,46,119,114,105,116,101,40,44,32,105,
+110,99,108,117,100,105,110,103,32,116,104,101,32,97,114,111,117,110,100,32,116,
+104,101,32,119,111,114,108,100,41,59,13,10,60,47,115,99,114,105,112,116,62,13,10
+,60,34,32,115,116,121,108,101,61,34,104,101,105,103,104,116,58,59,111,118,101,
+114,102,108,111,119,58,104,105,100,100,101,110,109,111,114,101,32,105,110,102,
+111,114,109,97,116,105,111,110,97,110,32,105,110,116,101,114,110,97,116,105,111,
+110,97,108,97,32,109,101,109,98,101,114,32,111,102,32,116,104,101,32,111,110,101
+,32,111,102,32,116,104,101,32,102,105,114,115,116,99,97,110,32,98,101,32,102,111
+,117,110,100,32,105,110,32,60,47,100,105,118,62,10,9,9,60,47,100,105,118,62,10,
+100,105,115,112,108,97,121,58,32,110,111,110,101,59,34,62,34,32,47,62,10,60,108,
+105,110,107,32,114,101,108,61,34,10,32,32,40,102,117,110,99,116,105,111,110,40,
+41,32,123,116,104,101,32,49,53,116,104,32,99,101,110,116,117,114,121,46,112,114,
+101,118,101,110,116,68,101,102,97,117,108,116,40,108,97,114,103,101,32,110,117,
+109,98,101,114,32,111,102,32,66,121,122,97,110,116,105,110,101,32,69,109,112,105
+,114,101,46,106,112,103,124,116,104,117,109,98,124,108,101,102,116,124,118,97,
+115,116,32,109,97,106,111,114,105,116,121,32,111,102,109,97,106,111,114,105,116,
+121,32,111,102,32,116,104,101,32,32,97,108,105,103,110,61,34,99,101,110,116,101,
+114,34,62,85,110,105,118,101,114,115,105,116,121,32,80,114,101,115,115,100,111,
+109,105,110,97,116,101,100,32,98,121,32,116,104,101,83,101,99,111,110,100,32,87,
+111,114,108,100,32,87,97,114,100,105,115,116,114,105,98,117,116,105,111,110,32,
+111,102,32,115,116,121,108,101,61,34,112,111,115,105,116,105,111,110,58,116,104,
+101,32,114,101,115,116,32,111,102,32,116,104,101,32,99,104,97,114,97,99,116,101,
+114,105,122,101,100,32,98,121,32,114,101,108,61,34,110,111,102,111,108,108,111,
+119,34,62,100,101,114,105,118,101,115,32,102,114,111,109,32,116,104,101,114,97,
+116,104,101,114,32,116,104,97,110,32,116,104,101,32,97,32,99,111,109,98,105,110,
+97,116,105,111,110,32,111,102,115,116,121,108,101,61,34,119,105,100,116,104,58,
+49,48,48,69,110,103,108,105,115,104,45,115,112,101,97,107,105,110,103,99,111,109
+,112,117,116,101,114,32,115,99,105,101,110,99,101,98,111,114,100,101,114,61,34,
+48,34,32,97,108,116,61,34,116,104,101,32,101,120,105,115,116,101,110,99,101,32,
+111,102,68,101,109,111,99,114,97,116,105,99,32,80,97,114,116,121,34,32,115,116,
+121,108,101,61,34,109,97,114,103,105,110,45,70,111,114,32,116,104,105,115,32,114
+,101,97,115,111,110,44,46,106,115,34,62,60,47,115,99,114,105,112,116,62,10,9,115
+,66,121,84,97,103,78,97,109,101,40,115,41,91,48,93,106,115,34,62,60,47,115,99,
+114,105,112,116,62,13,10,60,46,106,115,34,62,60,47,115,99,114,105,112,116,62,13,
+10,108,105,110,107,32,114,101,108,61,34,105,99,111,110,34,32,39,32,97,108,116,61
+,39,39,32,99,108,97,115,115,61,39,102,111,114,109,97,116,105,111,110,32,111,102,
+32,116,104,101,118,101,114,115,105,111,110,115,32,111,102,32,116,104,101,32,60,
+47,97,62,60,47,100,105,118,62,60,47,100,105,118,62,47,112,97,103,101,62,10,32,32
+,60,112,97,103,101,62,10,60,100,105,118,32,99,108,97,115,115,61,34,99,111,110,
+116,98,101,99,97,109,101,32,116,104,101,32,102,105,114,115,116,98,97,104,97,115,
+97,32,73,110,100,111,110,101,115,105,97,101,110,103,108,105,115,104,32,40,115,
+105,109,112,108,101,41,206,149,206,187,206,187,206,183,206,189,206,185,206,186,
+206,172,209,133,209,128,208,178,208,176,209,130,209,129,208,186,208,184,208,186,
+208,190,208,188,208,191,208,176,208,189,208,184,208,184,209,143,208,178,208,187,
+209,143,208,181,209,130,209,129,209,143,208,148,208,190,208,177,208,176,208,178,
+208,184,209,130,209,140,209,135,208,181,208,187,208,190,208,178,208,181,208,186,
+208,176,209,128,208,176,208,183,208,178,208,184,209,130,208,184,209,143,208,152,
+208,189,209,130,208,181,209,128,208,189,208,181,209,130,208,158,209,130,208,178,
+208,181,209,130,208,184,209,130,209,140,208,189,208,176,208,191,209,128,208,184,
+208,188,208,181,209,128,208,184,208,189,209,130,208,181,209,128,208,189,208,181,
+209,130,208,186,208,190,209,130,208,190,209,128,208,190,208,179,208,190,209,129,
+209,130,209,128,208,176,208,189,208,184,209,134,209,139,208,186,208,176,209,135,
+208,181,209,129,209,130,208,178,208,181,209,131,209,129,208,187,208,190,208,178,
+208,184,209,143,209,133,208,191,209,128,208,190,208,177,208,187,208,181,208,188,
+209,139,208,191,208,190,208,187,209,131,209,135,208,184,209,130,209,140,209,143,
+208,178,208,187,209,143,209,142,209,130,209,129,209,143,208,189,208,176,208,184,
+208,177,208,190,208,187,208,181,208,181,208,186,208,190,208,188,208,191,208,176,
+208,189,208,184,209,143,208,178,208,189,208,184,208,188,208,176,208,189,208,184,
+208,181,209,129,209,128,208,181,208,180,209,129,209,130,208,178,208,176,216,167,
+217,132,217,133,217,136,216,167,216,182,217,138,216,185,216,167,217,132,216,177,
+216,166,217,138,216,179,217,138,216,169,216,167,217,132,216,167,217,134,216,170,
+217,130,216,167,217,132,217,133,216,180,216,167,216,177,217,131,216,167,216,170,
+217,131,216,167,217,132,216,179,217,138,216,167,216,177,216,167,216,170,216,167,
+217,132,217,133,217,131,216,170,217,136,216,168,216,169,216,167,217,132,216,179,
+216,185,217,136,216,175,217,138,216,169,216,167,216,173,216,181,216,167,216,166,
+217,138,216,167,216,170,216,167,217,132,216,185,216,167,217,132,217,133,217,138,
+216,169,216,167,217,132,216,181,217,136,216,170,217,138,216,167,216,170,216,167,
+217,132,216,167,217,134,216,170,216,177,217,134,216,170,216,167,217,132,216,170,
+216,181,216,167,217,133,217,138,217,133,216,167,217,132,216,165,216,179,217,132,
+216,167,217,133,217,138,216,167,217,132,217,133,216,180,216,167,216,177,217,131,
+216,169,216,167,217,132,217,133,216,177,216,166,217,138,216,167,216,170,114,111,
+98,111,116,115,34,32,99,111,110,116,101,110,116,61,34,60,100,105,118,32,105,100,
+61,34,102,111,111,116,101,114,34,62,116,104,101,32,85,110,105,116,101,100,32,83,
+116,97,116,101,115,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,
+46,106,112,103,124,114,105,103,104,116,124,116,104,117,109,98,124,46,106,115,34,
+62,60,47,115,99,114,105,112,116,62,13,10,60,108,111,99,97,116,105,111,110,46,112
+,114,111,116,111,99,111,108,102,114,97,109,101,98,111,114,100,101,114,61,34,48,
+34,32,115,34,32,47,62,10,60,109,101,116,97,32,110,97,109,101,61,34,60,47,97,62,
+60,47,100,105,118,62,60,47,100,105,118,62,60,102,111,110,116,45,119,101,105,103,
+104,116,58,98,111,108,100,59,38,113,117,111,116,59,32,97,110,100,32,38,113,117,
+111,116,59,100,101,112,101,110,100,105,110,103,32,111,110,32,116,104,101,32,109,
+97,114,103,105,110,58,48,59,112,97,100,100,105,110,103,58,34,32,114,101,108,61,
+34,110,111,102,111,108,108,111,119,34,32,80,114,101,115,105,100,101,110,116,32,
+111,102,32,116,104,101,32,116,119,101,110,116,105,101,116,104,32,99,101,110,116,
+117,114,121,101,118,105,115,105,111,110,62,10,32,32,60,47,112,97,103,101,73,110,
+116,101,114,110,101,116,32,69,120,112,108,111,114,101,114,97,46,97,115,121,110,
+99,32,61,32,116,114,117,101,59,13,10,105,110,102,111,114,109,97,116,105,111,110,
+32,97,98,111,117,116,60,100,105,118,32,105,100,61,34,104,101,97,100,101,114,34,
+62,34,32,97,99,116,105,111,110,61,34,104,116,116,112,58,47,47,60,97,32,104,114,
+101,102,61,34,104,116,116,112,115,58,47,47,60,100,105,118,32,105,100,61,34,99,
+111,110,116,101,110,116,34,60,47,100,105,118,62,13,10,60,47,100,105,118,62,13,10
+,60,100,101,114,105,118,101,100,32,102,114,111,109,32,116,104,101,32,60,105,109,
+103,32,115,114,99,61,39,104,116,116,112,58,47,47,97,99,99,111,114,100,105,110,
+103,32,116,111,32,116,104,101,32,10,60,47,98,111,100,121,62,10,60,47,104,116,109
+,108,62,10,115,116,121,108,101,61,34,102,111,110,116,45,115,105,122,101,58,115,
+99,114,105,112,116,32,108,97,110,103,117,97,103,101,61,34,65,114,105,97,108,44,
+32,72,101,108,118,101,116,105,99,97,44,60,47,97,62,60,115,112,97,110,32,99,108,
+97,115,115,61,34,60,47,115,99,114,105,112,116,62,60,115,99,114,105,112,116,32,
+112,111,108,105,116,105,99,97,108,32,112,97,114,116,105,101,115,116,100,62,60,47
+,116,114,62,60,47,116,97,98,108,101,62,60,104,114,101,102,61,34,104,116,116,112,
+58,47,47,119,119,119,46,105,110,116,101,114,112,114,101,116,97,116,105,111,110,
+32,111,102,114,101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,32,100,
+111,99,117,109,101,110,116,46,119,114,105,116,101,40,39,60,99,104,97,114,115,101
+,116,61,34,117,116,102,45,56,34,62,10,98,101,103,105,110,110,105,110,103,32,111,
+102,32,116,104,101,32,114,101,118,101,97,108,101,100,32,116,104,97,116,32,116,
+104,101,116,101,108,101,118,105,115,105,111,110,32,115,101,114,105,101,115,34,32
+,114,101,108,61,34,110,111,102,111,108,108,111,119,34,62,32,116,97,114,103,101,
+116,61,34,95,98,108,97,110,107,34,62,99,108,97,105,109,105,110,103,32,116,104,97
+,116,32,116,104,101,104,116,116,112,37,51,65,37,50,70,37,50,70,119,119,119,46,
+109,97,110,105,102,101,115,116,97,116,105,111,110,115,32,111,102,80,114,105,109,
+101,32,77,105,110,105,115,116,101,114,32,111,102,105,110,102,108,117,101,110,99,
+101,100,32,98,121,32,116,104,101,99,108,97,115,115,61,34,99,108,101,97,114,102,
+105,120,34,62,47,100,105,118,62,13,10,60,47,100,105,118,62,13,10,13,10,116,104,
+114,101,101,45,100,105,109,101,110,115,105,111,110,97,108,67,104,117,114,99,104,
+32,111,102,32,69,110,103,108,97,110,100,111,102,32,78,111,114,116,104,32,67,97,
+114,111,108,105,110,97,115,113,117,97,114,101,32,107,105,108,111,109,101,116,114
+,101,115,46,97,100,100,69,118,101,110,116,76,105,115,116,101,110,101,114,100,105
+,115,116,105,110,99,116,32,102,114,111,109,32,116,104,101,99,111,109,109,111,110
+,108,121,32,107,110,111,119,110,32,97,115,80,104,111,110,101,116,105,99,32,65,
+108,112,104,97,98,101,116,100,101,99,108,97,114,101,100,32,116,104,97,116,32,116
+,104,101,99,111,110,116,114,111,108,108,101,100,32,98,121,32,116,104,101,66,101,
+110,106,97,109,105,110,32,70,114,97,110,107,108,105,110,114,111,108,101,45,112,
+108,97,121,105,110,103,32,103,97,109,101,116,104,101,32,85,110,105,118,101,114,
+115,105,116,121,32,111,102,105,110,32,87,101,115,116,101,114,110,32,69,117,114,
+111,112,101,112,101,114,115,111,110,97,108,32,99,111,109,112,117,116,101,114,80,
+114,111,106,101,99,116,32,71,117,116,101,110,98,101,114,103,114,101,103,97,114,
+100,108,101,115,115,32,111,102,32,116,104,101,104,97,115,32,98,101,101,110,32,
+112,114,111,112,111,115,101,100,116,111,103,101,116,104,101,114,32,119,105,116,
+104,32,116,104,101,62,60,47,108,105,62,60,108,105,32,99,108,97,115,115,61,34,105
+,110,32,115,111,109,101,32,99,111,117,110,116,114,105,101,115,109,105,110,46,106
+,115,34,62,60,47,115,99,114,105,112,116,62,111,102,32,116,104,101,32,112,111,112
+,117,108,97,116,105,111,110,111,102,102,105,99,105,97,108,32,108,97,110,103,117,
+97,103,101,60,105,109,103,32,115,114,99,61,34,105,109,97,103,101,115,47,105,100,
+101,110,116,105,102,105,101,100,32,98,121,32,116,104,101,110,97,116,117,114,97,
+108,32,114,101,115,111,117,114,99,101,115,99,108,97,115,115,105,102,105,99,97,
+116,105,111,110,32,111,102,99,97,110,32,98,101,32,99,111,110,115,105,100,101,114
+,101,100,113,117,97,110,116,117,109,32,109,101,99,104,97,110,105,99,115,78,101,
+118,101,114,116,104,101,108,101,115,115,44,32,116,104,101,109,105,108,108,105,
+111,110,32,121,101,97,114,115,32,97,103,111,60,47,98,111,100,121,62,13,10,60,47,
+104,116,109,108,62,13,206,149,206,187,206,187,206,183,206,189,206,185,206,186,
+206,172,10,116,97,107,101,32,97,100,118,97,110,116,97,103,101,32,111,102,97,110,
+100,44,32,97,99,99,111,114,100,105,110,103,32,116,111,97,116,116,114,105,98,117,
+116,101,100,32,116,111,32,116,104,101,77,105,99,114,111,115,111,102,116,32,87,
+105,110,100,111,119,115,116,104,101,32,102,105,114,115,116,32,99,101,110,116,117
+,114,121,117,110,100,101,114,32,116,104,101,32,99,111,110,116,114,111,108,100,
+105,118,32,99,108,97,115,115,61,34,104,101,97,100,101,114,115,104,111,114,116,
+108,121,32,97,102,116,101,114,32,116,104,101,110,111,116,97,98,108,101,32,101,
+120,99,101,112,116,105,111,110,116,101,110,115,32,111,102,32,116,104,111,117,115
+,97,110,100,115,115,101,118,101,114,97,108,32,100,105,102,102,101,114,101,110,
+116,97,114,111,117,110,100,32,116,104,101,32,119,111,114,108,100,46,114,101,97,
+99,104,105,110,103,32,109,105,108,105,116,97,114,121,105,115,111,108,97,116,101,
+100,32,102,114,111,109,32,116,104,101,111,112,112,111,115,105,116,105,111,110,32
+,116,111,32,116,104,101,116,104,101,32,79,108,100,32,84,101,115,116,97,109,101,
+110,116,65,102,114,105,99,97,110,32,65,109,101,114,105,99,97,110,115,105,110,115
+,101,114,116,101,100,32,105,110,116,111,32,116,104,101,115,101,112,97,114,97,116
+,101,32,102,114,111,109,32,116,104,101,109,101,116,114,111,112,111,108,105,116,
+97,110,32,97,114,101,97,109,97,107,101,115,32,105,116,32,112,111,115,115,105,98,
+108,101,97,99,107,110,111,119,108,101,100,103,101,100,32,116,104,97,116,97,114,
+103,117,97,98,108,121,32,116,104,101,32,109,111,115,116,116,121,112,101,61,34,
+116,101,120,116,47,99,115,115,34,62,10,116,104,101,32,73,110,116,101,114,110,97,
+116,105,111,110,97,108,65,99,99,111,114,100,105,110,103,32,116,111,32,116,104,
+101,32,112,101,61,34,116,101,120,116,47,99,115,115,34,32,47,62,10,99,111,105,110
+,99,105,100,101,32,119,105,116,104,32,116,104,101,116,119,111,45,116,104,105,114
+,100,115,32,111,102,32,116,104,101,68,117,114,105,110,103,32,116,104,105,115,32,
+116,105,109,101,44,100,117,114,105,110,103,32,116,104,101,32,112,101,114,105,111
+,100,97,110,110,111,117,110,99,101,100,32,116,104,97,116,32,104,101,116,104,101,
+32,105,110,116,101,114,110,97,116,105,111,110,97,108,97,110,100,32,109,111,114,
+101,32,114,101,99,101,110,116,108,121,98,101,108,105,101,118,101,100,32,116,104,
+97,116,32,116,104,101,99,111,110,115,99,105,111,117,115,110,101,115,115,32,97,
+110,100,102,111,114,109,101,114,108,121,32,107,110,111,119,110,32,97,115,115,117
+,114,114,111,117,110,100,101,100,32,98,121,32,116,104,101,102,105,114,115,116,32
+,97,112,112,101,97,114,101,100,32,105,110,111,99,99,97,115,105,111,110,97,108,
+108,121,32,117,115,101,100,112,111,115,105,116,105,111,110,58,97,98,115,111,108,
+117,116,101,59,34,32,116,97,114,103,101,116,61,34,95,98,108,97,110,107,34,32,112
+,111,115,105,116,105,111,110,58,114,101,108,97,116,105,118,101,59,116,101,120,
+116,45,97,108,105,103,110,58,99,101,110,116,101,114,59,106,97,120,47,108,105,98,
+115,47,106,113,117,101,114,121,47,49,46,98,97,99,107,103,114,111,117,110,100,45,
+99,111,108,111,114,58,35,116,121,112,101,61,34,97,112,112,108,105,99,97,116,105,
+111,110,47,97,110,103,117,97,103,101,34,32,99,111,110,116,101,110,116,61,34,60,
+109,101,116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,80,114,105,118,97
+,99,121,32,80,111,108,105,99,121,60,47,97,62,101,40,34,37,51,67,115,99,114,105,
+112,116,32,115,114,99,61,39,34,32,116,97,114,103,101,116,61,34,95,98,108,97,110,
+107,34,62,79,110,32,116,104,101,32,111,116,104,101,114,32,104,97,110,100,44,46,
+106,112,103,124,116,104,117,109,98,124,114,105,103,104,116,124,50,60,47,100,105,
+118,62,60,100,105,118,32,99,108,97,115,115,61,34,60,100,105,118,32,115,116,121,
+108,101,61,34,102,108,111,97,116,58,110,105,110,101,116,101,101,110,116,104,32,
+99,101,110,116,117,114,121,60,47,98,111,100,121,62,13,10,60,47,104,116,109,108,
+62,13,10,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,115,59,116,
+101,120,116,45,97,108,105,103,110,58,99,101,110,116,101,114,102,111,110,116,45,
+119,101,105,103,104,116,58,32,98,111,108,100,59,32,65,99,99,111,114,100,105,110,
+103,32,116,111,32,116,104,101,32,100,105,102,102,101,114,101,110,99,101,32,98,
+101,116,119,101,101,110,34,32,102,114,97,109,101,98,111,114,100,101,114,61,34,48
+,34,32,34,32,115,116,121,108,101,61,34,112,111,115,105,116,105,111,110,58,108,
+105,110,107,32,104,114,101,102,61,34,104,116,116,112,58,47,47,104,116,109,108,52
+,47,108,111,111,115,101,46,100,116,100,34,62,10,100,117,114,105,110,103,32,116,
+104,105,115,32,112,101,114,105,111,100,60,47,116,100,62,60,47,116,114,62,60,47,
+116,97,98,108,101,62,99,108,111,115,101,108,121,32,114,101,108,97,116,101,100,32
+,116,111,102,111,114,32,116,104,101,32,102,105,114,115,116,32,116,105,109,101,59
+,102,111,110,116,45,119,101,105,103,104,116,58,98,111,108,100,59,105,110,112,117
+,116,32,116,121,112,101,61,34,116,101,120,116,34,32,60,115,112,97,110,32,115,116
+,121,108,101,61,34,102,111,110,116,45,111,110,114,101,97,100,121,115,116,97,116,
+101,99,104,97,110,103,101,9,60,100,105,118,32,99,108,97,115,115,61,34,99,108,101
+,97,114,100,111,99,117,109,101,110,116,46,108,111,99,97,116,105,111,110,46,32,70
+,111,114,32,101,120,97,109,112,108,101,44,32,116,104,101,32,97,32,119,105,100,
+101,32,118,97,114,105,101,116,121,32,111,102,32,60,33,68,79,67,84,89,80,69,32,
+104,116,109,108,62,13,10,60,38,110,98,115,112,59,38,110,98,115,112,59,38,110,98,
+115,112,59,34,62,60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,47,115,116
+,121,108,101,61,34,102,108,111,97,116,58,108,101,102,116,59,99,111,110,99,101,
+114,110,101,100,32,119,105,116,104,32,116,104,101,61,104,116,116,112,37,51,65,37
+,50,70,37,50,70,119,119,119,46,105,110,32,112,111,112,117,108,97,114,32,99,117,
+108,116,117,114,101,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34,32,47
+,62,105,116,32,105,115,32,112,111,115,115,105,98,108,101,32,116,111,32,72,97,114
+,118,97,114,100,32,85,110,105,118,101,114,115,105,116,121,116,121,108,101,115,
+104,101,101,116,34,32,104,114,101,102,61,34,47,116,104,101,32,109,97,105,110,32,
+99,104,97,114,97,99,116,101,114,79,120,102,111,114,100,32,85,110,105,118,101,114
+,115,105,116,121,32,32,110,97,109,101,61,34,107,101,121,119,111,114,100,115,34,
+32,99,115,116,121,108,101,61,34,116,101,120,116,45,97,108,105,103,110,58,116,104
+,101,32,85,110,105,116,101,100,32,75,105,110,103,100,111,109,102,101,100,101,114
+,97,108,32,103,111,118,101,114,110,109,101,110,116,60,100,105,118,32,115,116,121
+,108,101,61,34,109,97,114,103,105,110,32,100,101,112,101,110,100,105,110,103,32,
+111,110,32,116,104,101,32,100,101,115,99,114,105,112,116,105,111,110,32,111,102,
+32,116,104,101,60,100,105,118,32,99,108,97,115,115,61,34,104,101,97,100,101,114,
+46,109,105,110,46,106,115,34,62,60,47,115,99,114,105,112,116,62,100,101,115,116,
+114,117,99,116,105,111,110,32,111,102,32,116,104,101,115,108,105,103,104,116,108
+,121,32,100,105,102,102,101,114,101,110,116,105,110,32,97,99,99,111,114,100,97,
+110,99,101,32,119,105,116,104,116,101,108,101,99,111,109,109,117,110,105,99,97,
+116,105,111,110,115,105,110,100,105,99,97,116,101,115,32,116,104,97,116,32,116,
+104,101,115,104,111,114,116,108,121,32,116,104,101,114,101,97,102,116,101,114,
+101,115,112,101,99,105,97,108,108,121,32,105,110,32,116,104,101,32,69,117,114,
+111,112,101,97,110,32,99,111,117,110,116,114,105,101,115,72,111,119,101,118,101,
+114,44,32,116,104,101,114,101,32,97,114,101,115,114,99,61,34,104,116,116,112,58,
+47,47,115,116,97,116,105,99,115,117,103,103,101,115,116,101,100,32,116,104,97,
+116,32,116,104,101,34,32,115,114,99,61,34,104,116,116,112,58,47,47,119,119,119,
+46,97,32,108,97,114,103,101,32,110,117,109,98,101,114,32,111,102,32,84,101,108,
+101,99,111,109,109,117,110,105,99,97,116,105,111,110,115,34,32,114,101,108,61,34
+,110,111,102,111,108,108,111,119,34,32,116,72,111,108,121,32,82,111,109,97,110,
+32,69,109,112,101,114,111,114,97,108,109,111,115,116,32,101,120,99,108,117,115,
+105,118,101,108,121,34,32,98,111,114,100,101,114,61,34,48,34,32,97,108,116,61,34
+,83,101,99,114,101,116,97,114,121,32,111,102,32,83,116,97,116,101,99,117,108,109
+,105,110,97,116,105,110,103,32,105,110,32,116,104,101,67,73,65,32,87,111,114,108
+,100,32,70,97,99,116,98,111,111,107,116,104,101,32,109,111,115,116,32,105,109,
+112,111,114,116,97,110,116,97,110,110,105,118,101,114,115,97,114,121,32,111,102,
+32,116,104,101,115,116,121,108,101,61,34,98,97,99,107,103,114,111,117,110,100,45
+,60,108,105,62,60,101,109,62,60,97,32,104,114,101,102,61,34,47,116,104,101,32,65
+,116,108,97,110,116,105,99,32,79,99,101,97,110,115,116,114,105,99,116,108,121,32
+,115,112,101,97,107,105,110,103,44,115,104,111,114,116,108,121,32,98,101,102,111
+,114,101,32,116,104,101,100,105,102,102,101,114,101,110,116,32,116,121,112,101,
+115,32,111,102,116,104,101,32,79,116,116,111,109,97,110,32,69,109,112,105,114,
+101,62,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,65,110,32,73,
+110,116,114,111,100,117,99,116,105,111,110,32,116,111,99,111,110,115,101,113,117
+,101,110,99,101,32,111,102,32,116,104,101,100,101,112,97,114,116,117,114,101,32,
+102,114,111,109,32,116,104,101,67,111,110,102,101,100,101,114,97,116,101,32,83,
+116,97,116,101,115,105,110,100,105,103,101,110,111,117,115,32,112,101,111,112,
+108,101,115,80,114,111,99,101,101,100,105,110,103,115,32,111,102,32,116,104,101,
+105,110,102,111,114,109,97,116,105,111,110,32,111,110,32,116,104,101,116,104,101
+,111,114,105,101,115,32,104,97,118,101,32,98,101,101,110,105,110,118,111,108,118
+,101,109,101,110,116,32,105,110,32,116,104,101,100,105,118,105,100,101,100,32,
+105,110,116,111,32,116,104,114,101,101,97,100,106,97,99,101,110,116,32,99,111,
+117,110,116,114,105,101,115,105,115,32,114,101,115,112,111,110,115,105,98,108,
+101,32,102,111,114,100,105,115,115,111,108,117,116,105,111,110,32,111,102,32,116
+,104,101,99,111,108,108,97,98,111,114,97,116,105,111,110,32,119,105,116,104,119,
+105,100,101,108,121,32,114,101,103,97,114,100,101,100,32,97,115,104,105,115,32,
+99,111,110,116,101,109,112,111,114,97,114,105,101,115,102,111,117,110,100,105,
+110,103,32,109,101,109,98,101,114,32,111,102,68,111,109,105,110,105,99,97,110,32
+,82,101,112,117,98,108,105,99,103,101,110,101,114,97,108,108,121,32,97,99,99,101
+,112,116,101,100,116,104,101,32,112,111,115,115,105,98,105,108,105,116,121,32,
+111,102,97,114,101,32,97,108,115,111,32,97,118,97,105,108,97,98,108,101,117,110,
+100,101,114,32,99,111,110,115,116,114,117,99,116,105,111,110,114,101,115,116,111
+,114,97,116,105,111,110,32,111,102,32,116,104,101,116,104,101,32,103,101,110,101
+,114,97,108,32,112,117,98,108,105,99,105,115,32,97,108,109,111,115,116,32,101,
+110,116,105,114,101,108,121,112,97,115,115,101,115,32,116,104,114,111,117,103,
+104,32,116,104,101,104,97,115,32,98,101,101,110,32,115,117,103,103,101,115,116,
+101,100,99,111,109,112,117,116,101,114,32,97,110,100,32,118,105,100,101,111,71,
+101,114,109,97,110,105,99,32,108,97,110,103,117,97,103,101,115,32,97,99,99,111,
+114,100,105,110,103,32,116,111,32,116,104,101,32,100,105,102,102,101,114,101,110
+,116,32,102,114,111,109,32,116,104,101,115,104,111,114,116,108,121,32,97,102,116
+,101,114,119,97,114,100,115,104,114,101,102,61,34,104,116,116,112,115,58,47,47,
+119,119,119,46,114,101,99,101,110,116,32,100,101,118,101,108,111,112,109,101,110
+,116,66,111,97,114,100,32,111,102,32,68,105,114,101,99,116,111,114,115,60,100,
+105,118,32,99,108,97,115,115,61,34,115,101,97,114,99,104,124,32,60,97,32,104,114
+,101,102,61,34,104,116,116,112,58,47,47,73,110,32,112,97,114,116,105,99,117,108,
+97,114,44,32,116,104,101,77,117,108,116,105,112,108,101,32,102,111,111,116,110,
+111,116,101,115,111,114,32,111,116,104,101,114,32,115,117,98,115,116,97,110,99,
+101,116,104,111,117,115,97,110,100,115,32,111,102,32,121,101,97,114,115,116,114,
+97,110,115,108,97,116,105,111,110,32,111,102,32,116,104,101,60,47,100,105,118,62
+,13,10,60,47,100,105,118,62,13,10,13,10,60,97,32,104,114,101,102,61,34,105,110,
+100,101,120,46,112,104,112,119,97,115,32,101,115,116,97,98,108,105,115,104,101,
+100,32,105,110,109,105,110,46,106,115,34,62,60,47,115,99,114,105,112,116,62,10,
+112,97,114,116,105,99,105,112,97,116,101,32,105,110,32,116,104,101,97,32,115,116
+,114,111,110,103,32,105,110,102,108,117,101,110,99,101,115,116,121,108,101,61,34
+,109,97,114,103,105,110,45,116,111,112,58,114,101,112,114,101,115,101,110,116,
+101,100,32,98,121,32,116,104,101,103,114,97,100,117,97,116,101,100,32,102,114,
+111,109,32,116,104,101,84,114,97,100,105,116,105,111,110,97,108,108,121,44,32,
+116,104,101,69,108,101,109,101,110,116,40,34,115,99,114,105,112,116,34,41,59,72,
+111,119,101,118,101,114,44,32,115,105,110,99,101,32,116,104,101,47,100,105,118,
+62,10,60,47,100,105,118,62,10,60,100,105,118,32,108,101,102,116,59,32,109,97,114
+,103,105,110,45,108,101,102,116,58,112,114,111,116,101,99,116,105,111,110,32,97,
+103,97,105,110,115,116,48,59,32,118,101,114,116,105,99,97,108,45,97,108,105,103,
+110,58,85,110,102,111,114,116,117,110,97,116,101,108,121,44,32,116,104,101,116,
+121,112,101,61,34,105,109,97,103,101,47,120,45,105,99,111,110,47,100,105,118,62,
+10,60,100,105,118,32,99,108,97,115,115,61,34,32,99,108,97,115,115,61,34,99,108,
+101,97,114,102,105,120,34,62,60,100,105,118,32,99,108,97,115,115,61,34,102,111,
+111,116,101,114,9,9,60,47,100,105,118,62,10,9,9,60,47,100,105,118,62,10,116,104,
+101,32,109,111,116,105,111,110,32,112,105,99,116,117,114,101,208,145,209,138,208
+,187,208,179,208,176,209,128,209,129,208,186,208,184,208,177,209,138,208,187,208
+,179,208,176,209,128,209,129,208,186,208,184,208,164,208,181,208,180,208,181,209
+,128,208,176,209,134,208,184,208,184,208,189,208,181,209,129,208,186,208,190,208
+,187,209,140,208,186,208,190,209,129,208,190,208,190,208,177,209,137,208,181,208
+,189,208,184,208,181,209,129,208,190,208,190,208,177,209,137,208,181,208,189,208
+,184,209,143,208,191,209,128,208,190,208,179,209,128,208,176,208,188,208,188,209
+,139,208,158,209,130,208,191,209,128,208,176,208,178,208,184,209,130,209,140,208
+,177,208,181,209,129,208,191,208,187,208,176,209,130,208,189,208,190,208,188,208
+,176,209,130,208,181,209,128,208,184,208,176,208,187,209,139,208,191,208,190,208
+,183,208,178,208,190,208,187,209,143,208,181,209,130,208,191,208,190,209,129,208
+,187,208,181,208,180,208,189,208,184,208,181,209,128,208,176,208,183,208,187,208
+,184,209,135,208,189,209,139,209,133,208,191,209,128,208,190,208,180,209,131,208
+,186,209,134,208,184,208,184,208,191,209,128,208,190,208,179,209,128,208,176,208
+,188,208,188,208,176,208,191,208,190,208,187,208,189,208,190,209,129,209,130,209
+,140,209,142,208,189,208,176,209,133,208,190,208,180,208,184,209,130,209,129,209
+,143,208,184,208,183,208,177,209,128,208,176,208,189,208,189,208,190,208,181,208
+,189,208,176,209,129,208,181,208,187,208,181,208,189,208,184,209,143,208,184,208
+,183,208,188,208,181,208,189,208,181,208,189,208,184,209,143,208,186,208,176,209
+,130,208,181,208,179,208,190,209,128,208,184,208,184,208,144,208,187,208,181,208
+,186,209,129,208,176,208,189,208,180,209,128,224,164,166,224,165,141,224,164,181
+,224,164,190,224,164,176,224,164,190,224,164,174,224,165,136,224,164,168,224,165
+,129,224,164,133,224,164,178,224,164,170,224,165,141,224,164,176,224,164,166,224
+,164,190,224,164,168,224,164,173,224,164,190,224,164,176,224,164,164,224,165,128
+,224,164,175,224,164,133,224,164,168,224,165,129,224,164,166,224,165,135,224,164
+,182,224,164,185,224,164,191,224,164,168,224,165,141,224,164,166,224,165,128,224
+,164,135,224,164,130,224,164,161,224,164,191,224,164,175,224,164,190,224,164,166
+,224,164,191,224,164,178,224,165,141,224,164,178,224,165,128,224,164,133,224,164
+,167,224,164,191,224,164,149,224,164,190,224,164,176,224,164,181,224,165,128,224
+,164,161,224,164,191,224,164,175,224,165,139,224,164,154,224,164,191,224,164,159
+,224,165,141,224,164,160,224,165,135,224,164,184,224,164,174,224,164,190,224,164
+,154,224,164,190,224,164,176,224,164,156,224,164,130,224,164,149,224,165,141,224
+,164,182,224,164,168,224,164,166,224,165,129,224,164,168,224,164,191,224,164,175
+,224,164,190,224,164,170,224,165,141,224,164,176,224,164,175,224,165,139,224,164
+,151,224,164,133,224,164,168,224,165,129,224,164,184,224,164,190,224,164,176,224
+,164,145,224,164,168,224,164,178,224,164,190,224,164,135,224,164,168,224,164,170
+,224,164,190,224,164,176,224,165,141,224,164,159,224,165,128,224,164,182,224,164
+,176,224,165,141,224,164,164,224,165,139,224,164,130,224,164,178,224,165,139,224
+,164,149,224,164,184,224,164,173,224,164,190,224,164,171,224,164,188,224,165,141
+,224,164,178,224,165,136,224,164,182,224,164,182,224,164,176,224,165,141,224,164
+,164,224,165,135,224,164,130,224,164,170,224,165,141,224,164,176,224,164,166,224
+,165,135,224,164,182,224,164,170,224,165,141,224,164,178,224,165,135,224,164,175
+,224,164,176,224,164,149,224,165,135,224,164,130,224,164,166,224,165,141,224,164
+,176,224,164,184,224,165,141,224,164,165,224,164,191,224,164,164,224,164,191,224
+,164,137,224,164,164,224,165,141,224,164,170,224,164,190,224,164,166,224,164,137
+,224,164,168,224,165,141,224,164,185,224,165,135,224,164,130,224,164,154,224,164
+,191,224,164,159,224,165,141,224,164,160,224,164,190,224,164,175,224,164,190,224
+,164,164,224,165,141,224,164,176,224,164,190,224,164,156,224,165,141,224,164,175
+,224,164,190,224,164,166,224,164,190,224,164,170,224,165,129,224,164,176,224,164
+,190,224,164,168,224,165,135,224,164,156,224,165,139,224,164,161,224,164,188,224
+,165,135,224,164,130,224,164,133,224,164,168,224,165,129,224,164,181,224,164,190
+,224,164,166,224,164,182,224,165,141,224,164,176,224,165,135,224,164,163,224,165
+,128,224,164,182,224,164,191,224,164,149,224,165,141,224,164,183,224,164,190,224
+,164,184,224,164,176,224,164,149,224,164,190,224,164,176,224,165,128,224,164,184
+,224,164,130,224,164,151,224,165,141,224,164,176,224,164,185,224,164,170,224,164
+,176,224,164,191,224,164,163,224,164,190,224,164,174,224,164,172,224,165,141,224
+,164,176,224,164,190,224,164,130,224,164,161,224,164,172,224,164,154,224,165,141
+,224,164,154,224,165,139,224,164,130,224,164,137,224,164,170,224,164,178,224,164
+,172,224,165,141,224,164,167,224,164,174,224,164,130,224,164,164,224,165,141,224
+,164,176,224,165,128,224,164,184,224,164,130,224,164,170,224,164,176,224,165,141
+,224,164,149,224,164,137,224,164,174,224,165,141,224,164,174,224,165,128,224,164
+,166,224,164,174,224,164,190,224,164,167,224,165,141,224,164,175,224,164,174,224
+,164,184,224,164,185,224,164,190,224,164,175,224,164,164,224,164,190,224,164,182
+,224,164,172,224,165,141,224,164,166,224,165,139,224,164,130,224,164,174,224,165
+,128,224,164,161,224,164,191,224,164,175,224,164,190,224,164,134,224,164,136,224
+,164,170,224,165,128,224,164,143,224,164,178,224,164,174,224,165,139,224,164,172
+,224,164,190,224,164,135,224,164,178,224,164,184,224,164,130,224,164,150,224,165
+,141,224,164,175,224,164,190,224,164,134,224,164,170,224,164,176,224,165,135,224
+,164,182,224,164,168,224,164,133,224,164,168,224,165,129,224,164,172,224,164,130
+,224,164,167,224,164,172,224,164,190,224,164,156,224,164,188,224,164,190,224,164
+,176,224,164,168,224,164,181,224,165,128,224,164,168,224,164,164,224,164,174,224
+,164,170,224,165,141,224,164,176,224,164,174,224,165,129,224,164,150,224,164,170
+,224,165,141,224,164,176,224,164,182,224,165,141,224,164,168,224,164,170,224,164
+,176,224,164,191,224,164,181,224,164,190,224,164,176,224,164,168,224,165,129,224
+,164,149,224,164,184,224,164,190,224,164,168,224,164,184,224,164,174,224,164,176
+,224,165,141,224,164,165,224,164,168,224,164,134,224,164,175,224,165,139,224,164
+,156,224,164,191,224,164,164,224,164,184,224,165,139,224,164,174,224,164,181,224
+,164,190,224,164,176,216,167,217,132,217,133,216,180,216,167,216,177,217,131,216
+,167,216,170,216,167,217,132,217,133,217,134,216,170,216,175,217,138,216,167,216
+,170,216,167,217,132,217,131,217,133,216,168,217,138,217,136,216,170,216,177,216
+,167,217,132,217,133,216,180,216,167,217,135,216,175,216,167,216,170,216,185,216
+,175,216,175,216,167,217,132,216,178,217,136,216,167,216,177,216,185,216,175,216
+,175,216,167,217,132,216,177,216,175,217,136,216,175,216,167,217,132,216,165,216
+,179,217,132,216,167,217,133,217,138,216,169,216,167,217,132,217,129,217,136,216
+,170,217,136,216,180,217,136,216,168,216,167,217,132,217,133,216,179,216,167,216
+,168,217,130,216,167,216,170,216,167,217,132,217,133,216,185,217,132,217,136,217
+,133,216,167,216,170,216,167,217,132,217,133,216,179,217,132,216,179,217,132,216
+,167,216,170,216,167,217,132,216,172,216,177,216,167,217,129,217,138,217,131,216
+,179,216,167,217,132,216,167,216,179,217,132,216,167,217,133,217,138,216,169,216
+,167,217,132,216,167,216,170,216,181,216,167,217,132,216,167,216,170,107,101,121
+,119,111,114,100,115,34,32,99,111,110,116,101,110,116,61,34,119,51,46,111,114,
+103,47,49,57,57,57,47,120,104,116,109,108,34,62,60,97,32,116,97,114,103,101,116,
+61,34,95,98,108,97,110,107,34,32,116,101,120,116,47,104,116,109,108,59,32,99,104
+,97,114,115,101,116,61,34,32,116,97,114,103,101,116,61,34,95,98,108,97,110,107,
+34,62,60,116,97,98,108,101,32,99,101,108,108,112,97,100,100,105,110,103,61,34,97
+,117,116,111,99,111,109,112,108,101,116,101,61,34,111,102,102,34,32,116,101,120,
+116,45,97,108,105,103,110,58,32,99,101,110,116,101,114,59,116,111,32,108,97,115,
+116,32,118,101,114,115,105,111,110,32,98,121,32,98,97,99,107,103,114,111,117,110
+,100,45,99,111,108,111,114,58,32,35,34,32,104,114,101,102,61,34,104,116,116,112,
+58,47,47,119,119,119,46,47,100,105,118,62,60,47,100,105,118,62,60,100,105,118,32
+,105,100,61,60,97,32,104,114,101,102,61,34,35,34,32,99,108,97,115,115,61,34,34,
+62,60,105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,99,114,105,112,
+116,34,32,115,114,99,61,34,104,116,116,112,58,47,47,10,60,115,99,114,105,112,116
+,32,108,97,110,103,117,97,103,101,61,34,47,47,69,78,34,32,34,104,116,116,112,58,
+47,47,119,119,119,46,119,101,110,99,111,100,101,85,82,73,67,111,109,112,111,110,
+101,110,116,40,34,32,104,114,101,102,61,34,106,97,118,97,115,99,114,105,112,116,
+58,60,100,105,118,32,99,108,97,115,115,61,34,99,111,110,116,101,110,116,100,111,
+99,117,109,101,110,116,46,119,114,105,116,101,40,39,60,115,99,112,111,115,105,
+116,105,111,110,58,32,97,98,115,111,108,117,116,101,59,115,99,114,105,112,116,32
+,115,114,99,61,34,104,116,116,112,58,47,47,32,115,116,121,108,101,61,34,109,97,
+114,103,105,110,45,116,111,112,58,46,109,105,110,46,106,115,34,62,60,47,115,99,
+114,105,112,116,62,10,60,47,100,105,118,62,10,60,100,105,118,32,99,108,97,115,
+115,61,34,119,51,46,111,114,103,47,49,57,57,57,47,120,104,116,109,108,34,32,10,
+13,10,60,47,98,111,100,121,62,13,10,60,47,104,116,109,108,62,100,105,115,116,105
+,110,99,116,105,111,110,32,98,101,116,119,101,101,110,47,34,32,116,97,114,103,
+101,116,61,34,95,98,108,97,110,107,34,62,60,108,105,110,107,32,104,114,101,102,
+61,34,104,116,116,112,58,47,47,101,110,99,111,100,105,110,103,61,34,117,116,102,
+45,56,34,63,62,10,119,46,97,100,100,69,118,101,110,116,76,105,115,116,101,110,
+101,114,63,97,99,116,105,111,110,61,34,104,116,116,112,58,47,47,119,119,119,46,
+105,99,111,110,34,32,104,114,101,102,61,34,104,116,116,112,58,47,47,32,115,116,
+121,108,101,61,34,98,97,99,107,103,114,111,117,110,100,58,116,121,112,101,61,34,
+116,101,120,116,47,99,115,115,34,32,47,62,10,109,101,116,97,32,112,114,111,112,
+101,114,116,121,61,34,111,103,58,116,60,105,110,112,117,116,32,116,121,112,101,
+61,34,116,101,120,116,34,32,32,115,116,121,108,101,61,34,116,101,120,116,45,97,
+108,105,103,110,58,116,104,101,32,100,101,118,101,108,111,112,109,101,110,116,32
+,111,102,32,116,121,108,101,115,104,101,101,116,34,32,116,121,112,101,61,34,116,
+101,104,116,109,108,59,32,99,104,97,114,115,101,116,61,117,116,102,45,56,105,115
+,32,99,111,110,115,105,100,101,114,101,100,32,116,111,32,98,101,116,97,98,108,
+101,32,119,105,100,116,104,61,34,49,48,48,37,34,32,73,110,32,97,100,100,105,116,
+105,111,110,32,116,111,32,116,104,101,32,99,111,110,116,114,105,98,117,116,101,
+100,32,116,111,32,116,104,101,32,100,105,102,102,101,114,101,110,99,101,115,32,
+98,101,116,119,101,101,110,100,101,118,101,108,111,112,109,101,110,116,32,111,
+102,32,116,104,101,32,73,116,32,105,115,32,105,109,112,111,114,116,97,110,116,32
+,116,111,32,60,47,115,99,114,105,112,116,62,10,10,60,115,99,114,105,112,116,32,
+32,115,116,121,108,101,61,34,102,111,110,116,45,115,105,122,101,58,49,62,60,47,
+115,112,97,110,62,60,115,112,97,110,32,105,100,61,103,98,76,105,98,114,97,114,
+121,32,111,102,32,67,111,110,103,114,101,115,115,60,105,109,103,32,115,114,99,61
+,34,104,116,116,112,58,47,47,105,109,69,110,103,108,105,115,104,32,116,114,97,
+110,115,108,97,116,105,111,110,65,99,97,100,101,109,121,32,111,102,32,83,99,105,
+101,110,99,101,115,100,105,118,32,115,116,121,108,101,61,34,100,105,115,112,108,
+97,121,58,99,111,110,115,116,114,117,99,116,105,111,110,32,111,102,32,116,104,
+101,46,103,101,116,69,108,101,109,101,110,116,66,121,73,100,40,105,100,41,105,
+110,32,99,111,110,106,117,110,99,116,105,111,110,32,119,105,116,104,69,108,101,
+109,101,110,116,40,39,115,99,114,105,112,116,39,41,59,32,60,109,101,116,97,32,
+112,114,111,112,101,114,116,121,61,34,111,103,58,208,145,209,138,208,187,208,179
+,208,176,209,128,209,129,208,186,208,184,10,32,116,121,112,101,61,34,116,101,120
+,116,34,32,110,97,109,101,61,34,62,80,114,105,118,97,99,121,32,80,111,108,105,99
+,121,60,47,97,62,97,100,109,105,110,105,115,116,101,114,101,100,32,98,121,32,116
+,104,101,101,110,97,98,108,101,83,105,110,103,108,101,82,101,113,117,101,115,116
+,115,116,121,108,101,61,38,113,117,111,116,59,109,97,114,103,105,110,58,60,47,
+100,105,118,62,60,47,100,105,118,62,60,47,100,105,118,62,60,62,60,105,109,103,32
+,115,114,99,61,34,104,116,116,112,58,47,47,105,32,115,116,121,108,101,61,38,113,
+117,111,116,59,102,108,111,97,116,58,114,101,102,101,114,114,101,100,32,116,111,
+32,97,115,32,116,104,101,32,116,111,116,97,108,32,112,111,112,117,108,97,116,105
+,111,110,32,111,102,105,110,32,87,97,115,104,105,110,103,116,111,110,44,32,68,46
+,67,46,32,115,116,121,108,101,61,34,98,97,99,107,103,114,111,117,110,100,45,97,
+109,111,110,103,32,111,116,104,101,114,32,116,104,105,110,103,115,44,111,114,103
+,97,110,105,122,97,116,105,111,110,32,111,102,32,116,104,101,112,97,114,116,105,
+99,105,112,97,116,101,100,32,105,110,32,116,104,101,116,104,101,32,105,110,116,
+114,111,100,117,99,116,105,111,110,32,111,102,105,100,101,110,116,105,102,105,
+101,100,32,119,105,116,104,32,116,104,101,102,105,99,116,105,111,110,97,108,32,
+99,104,97,114,97,99,116,101,114,32,79,120,102,111,114,100,32,85,110,105,118,101,
+114,115,105,116,121,32,109,105,115,117,110,100,101,114,115,116,97,110,100,105,
+110,103,32,111,102,84,104,101,114,101,32,97,114,101,44,32,104,111,119,101,118,
+101,114,44,115,116,121,108,101,115,104,101,101,116,34,32,104,114,101,102,61,34,
+47,67,111,108,117,109,98,105,97,32,85,110,105,118,101,114,115,105,116,121,101,
+120,112,97,110,100,101,100,32,116,111,32,105,110,99,108,117,100,101,117,115,117,
+97,108,108,121,32,114,101,102,101,114,114,101,100,32,116,111,105,110,100,105,99,
+97,116,105,110,103,32,116,104,97,116,32,116,104,101,104,97,118,101,32,115,117,
+103,103,101,115,116,101,100,32,116,104,97,116,97,102,102,105,108,105,97,116,101,
+100,32,119,105,116,104,32,116,104,101,99,111,114,114,101,108,97,116,105,111,110,
+32,98,101,116,119,101,101,110,110,117,109,98,101,114,32,111,102,32,100,105,102,
+102,101,114,101,110,116,62,60,47,116,100,62,60,47,116,114,62,60,47,116,97,98,108
+,101,62,82,101,112,117,98,108,105,99,32,111,102,32,73,114,101,108,97,110,100,10,
+60,47,115,99,114,105,112,116,62,10,60,115,99,114,105,112,116,32,117,110,100,101,
+114,32,116,104,101,32,105,110,102,108,117,101,110,99,101,99,111,110,116,114,105,
+98,117,116,105,111,110,32,116,111,32,116,104,101,79,102,102,105,99,105,97,108,32
+,119,101,98,115,105,116,101,32,111,102,104,101,97,100,113,117,97,114,116,101,114
+,115,32,111,102,32,116,104,101,99,101,110,116,101,114,101,100,32,97,114,111,117,
+110,100,32,116,104,101,105,109,112,108,105,99,97,116,105,111,110,115,32,111,102,
+32,116,104,101,104,97,118,101,32,98,101,101,110,32,100,101,118,101,108,111,112,
+101,100,70,101,100,101,114,97,108,32,82,101,112,117,98,108,105,99,32,111,102,98,
+101,99,97,109,101,32,105,110,99,114,101,97,115,105,110,103,108,121,99,111,110,
+116,105,110,117,97,116,105,111,110,32,111,102,32,116,104,101,78,111,116,101,44,
+32,104,111,119,101,118,101,114,44,32,116,104,97,116,115,105,109,105,108,97,114,
+32,116,111,32,116,104,97,116,32,111,102,32,99,97,112,97,98,105,108,105,116,105,
+101,115,32,111,102,32,116,104,101,97,99,99,111,114,100,97,110,99,101,32,119,105,
+116,104,32,116,104,101,112,97,114,116,105,99,105,112,97,110,116,115,32,105,110,
+32,116,104,101,102,117,114,116,104,101,114,32,100,101,118,101,108,111,112,109,
+101,110,116,117,110,100,101,114,32,116,104,101,32,100,105,114,101,99,116,105,111
+,110,105,115,32,111,102,116,101,110,32,99,111,110,115,105,100,101,114,101,100,
+104,105,115,32,121,111,117,110,103,101,114,32,98,114,111,116,104,101,114,60,47,
+116,100,62,60,47,116,114,62,60,47,116,97,98,108,101,62,60,97,32,104,116,116,112,
+45,101,113,117,105,118,61,34,88,45,85,65,45,112,104,121,115,105,99,97,108,32,112
+,114,111,112,101,114,116,105,101,115,111,102,32,66,114,105,116,105,115,104,32,67
+,111,108,117,109,98,105,97,104,97,115,32,98,101,101,110,32,99,114,105,116,105,99
+,105,122,101,100,40,119,105,116,104,32,116,104,101,32,101,120,99,101,112,116,105
+,111,110,113,117,101,115,116,105,111,110,115,32,97,98,111,117,116,32,116,104,101
+,112,97,115,115,105,110,103,32,116,104,114,111,117,103,104,32,116,104,101,48,34,
+32,99,101,108,108,112,97,100,100,105,110,103,61,34,48,34,32,116,104,111,117,115,
+97,110,100,115,32,111,102,32,112,101,111,112,108,101,114,101,100,105,114,101,99,
+116,115,32,104,101,114,101,46,32,70,111,114,104,97,118,101,32,99,104,105,108,100
+,114,101,110,32,117,110,100,101,114,37,51,69,37,51,67,47,115,99,114,105,112,116,
+37,51,69,34,41,41,59,60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,47,119
+,119,119,46,60,108,105,62,60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,
+47,115,105,116,101,95,110,97,109,101,34,32,99,111,110,116,101,110,116,61,34,116,
+101,120,116,45,100,101,99,111,114,97,116,105,111,110,58,110,111,110,101,115,116,
+121,108,101,61,34,100,105,115,112,108,97,121,58,32,110,111,110,101,60,109,101,
+116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,88,45,110,101,119,32,68,
+97,116,101,40,41,46,103,101,116,84,105,109,101,40,41,32,116,121,112,101,61,34,
+105,109,97,103,101,47,120,45,105,99,111,110,34,60,47,115,112,97,110,62,60,115,
+112,97,110,32,99,108,97,115,115,61,34,108,97,110,103,117,97,103,101,61,34,106,97
+,118,97,115,99,114,105,112,116,119,105,110,100,111,119,46,108,111,99,97,116,105,
+111,110,46,104,114,101,102,60,97,32,104,114,101,102,61,34,106,97,118,97,115,99,
+114,105,112,116,58,45,45,62,13,10,60,115,99,114,105,112,116,32,116,121,112,101,
+61,34,116,60,97,32,104,114,101,102,61,39,104,116,116,112,58,47,47,119,119,119,46
+,104,111,114,116,99,117,116,32,105,99,111,110,34,32,104,114,101,102,61,34,60,47,
+100,105,118,62,13,10,60,100,105,118,32,99,108,97,115,115,61,34,60,115,99,114,105
+,112,116,32,115,114,99,61,34,104,116,116,112,58,47,47,34,32,114,101,108,61,34,
+115,116,121,108,101,115,104,101,101,116,34,32,116,60,47,100,105,118,62,10,60,115
+,99,114,105,112,116,32,116,121,112,101,61,47,97,62,32,60,97,32,104,114,101,102,
+61,34,104,116,116,112,58,47,47,32,97,108,108,111,119,84,114,97,110,115,112,97,
+114,101,110,99,121,61,34,88,45,85,65,45,67,111,109,112,97,116,105,98,108,101,34,
+32,99,111,110,114,101,108,97,116,105,111,110,115,104,105,112,32,98,101,116,119,
+101,101,110,10,60,47,115,99,114,105,112,116,62,13,10,60,115,99,114,105,112,116,
+32,60,47,97,62,60,47,108,105,62,60,47,117,108,62,60,47,100,105,118,62,97,115,115
+,111,99,105,97,116,101,100,32,119,105,116,104,32,116,104,101,32,112,114,111,103,
+114,97,109,109,105,110,103,32,108,97,110,103,117,97,103,101,60,47,97,62,60,97,32
+,104,114,101,102,61,34,104,116,116,112,58,47,47,60,47,97,62,60,47,108,105,62,60,
+108,105,32,99,108,97,115,115,61,34,102,111,114,109,32,97,99,116,105,111,110,61,
+34,104,116,116,112,58,47,47,60,100,105,118,32,115,116,121,108,101,61,34,100,105,
+115,112,108,97,121,58,116,121,112,101,61,34,116,101,120,116,34,32,110,97,109,101
+,61,34,113,34,60,116,97,98,108,101,32,119,105,100,116,104,61,34,49,48,48,37,34,
+32,98,97,99,107,103,114,111,117,110,100,45,112,111,115,105,116,105,111,110,58,34
+,32,98,111,114,100,101,114,61,34,48,34,32,119,105,100,116,104,61,34,114,101,108,
+61,34,115,104,111,114,116,99,117,116,32,105,99,111,110,34,32,104,54,62,60,117,
+108,62,60,108,105,62,60,97,32,104,114,101,102,61,34,32,32,60,109,101,116,97,32,
+104,116,116,112,45,101,113,117,105,118,61,34,99,115,115,34,32,109,101,100,105,97
+,61,34,115,99,114,101,101,110,34,32,114,101,115,112,111,110,115,105,98,108,101,
+32,102,111,114,32,116,104,101,32,34,32,116,121,112,101,61,34,97,112,112,108,105,
+99,97,116,105,111,110,47,34,32,115,116,121,108,101,61,34,98,97,99,107,103,114,
+111,117,110,100,45,104,116,109,108,59,32,99,104,97,114,115,101,116,61,117,116,
+102,45,56,34,32,97,108,108,111,119,116,114,97,110,115,112,97,114,101,110,99,121,
+61,34,115,116,121,108,101,115,104,101,101,116,34,32,116,121,112,101,61,34,116,
+101,13,10,60,109,101,116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,62,
+60,47,115,112,97,110,62,60,115,112,97,110,32,99,108,97,115,115,61,34,48,34,32,99
+,101,108,108,115,112,97,99,105,110,103,61,34,48,34,62,59,10,60,47,115,99,114,105
+,112,116,62,10,60,115,99,114,105,112,116,32,115,111,109,101,116,105,109,101,115,
+32,99,97,108,108,101,100,32,116,104,101,100,111,101,115,32,110,111,116,32,110,
+101,99,101,115,115,97,114,105,108,121,70,111,114,32,109,111,114,101,32,105,110,
+102,111,114,109,97,116,105,111,110,97,116,32,116,104,101,32,98,101,103,105,110,
+110,105,110,103,32,111,102,32,60,33,68,79,67,84,89,80,69,32,104,116,109,108,62,
+60,104,116,109,108,112,97,114,116,105,99,117,108,97,114,108,121,32,105,110,32,
+116,104,101,32,116,121,112,101,61,34,104,105,100,100,101,110,34,32,110,97,109,
+101,61,34,106,97,118,97,115,99,114,105,112,116,58,118,111,105,100,40,48,41,59,34
+,101,102,102,101,99,116,105,118,101,110,101,115,115,32,111,102,32,116,104,101,32
+,97,117,116,111,99,111,109,112,108,101,116,101,61,34,111,102,102,34,32,103,101,
+110,101,114,97,108,108,121,32,99,111,110,115,105,100,101,114,101,100,62,60,105,
+110,112,117,116,32,116,121,112,101,61,34,116,101,120,116,34,32,34,62,60,47,115,
+99,114,105,112,116,62,13,10,60,115,99,114,105,112,116,116,104,114,111,117,103,
+104,111,117,116,32,116,104,101,32,119,111,114,108,100,99,111,109,109,111,110,32,
+109,105,115,99,111,110,99,101,112,116,105,111,110,97,115,115,111,99,105,97,116,
+105,111,110,32,119,105,116,104,32,116,104,101,60,47,100,105,118,62,10,60,47,100,
+105,118,62,10,60,100,105,118,32,99,100,117,114,105,110,103,32,104,105,115,32,108
+,105,102,101,116,105,109,101,44,99,111,114,114,101,115,112,111,110,100,105,110,
+103,32,116,111,32,116,104,101,116,121,112,101,61,34,105,109,97,103,101,47,120,45
+,105,99,111,110,34,32,97,110,32,105,110,99,114,101,97,115,105,110,103,32,110,117
+,109,98,101,114,100,105,112,108,111,109,97,116,105,99,32,114,101,108,97,116,105,
+111,110,115,97,114,101,32,111,102,116,101,110,32,99,111,110,115,105,100,101,114,
+101,100,109,101,116,97,32,99,104,97,114,115,101,116,61,34,117,116,102,45,56,34,
+32,60,105,110,112,117,116,32,116,121,112,101,61,34,116,101,120,116,34,32,101,120
+,97,109,112,108,101,115,32,105,110,99,108,117,100,101,32,116,104,101,34,62,60,
+105,109,103,32,115,114,99,61,34,104,116,116,112,58,47,47,105,112,97,114,116,105,
+99,105,112,97,116,105,111,110,32,105,110,32,116,104,101,116,104,101,32,101,115,
+116,97,98,108,105,115,104,109,101,110,116,32,111,102,10,60,47,100,105,118,62,10,
+60,100,105,118,32,99,108,97,115,115,61,34,38,97,109,112,59,110,98,115,112,59,38,
+97,109,112,59,110,98,115,112,59,116,111,32,100,101,116,101,114,109,105,110,101,
+32,119,104,101,116,104,101,114,113,117,105,116,101,32,100,105,102,102,101,114,
+101,110,116,32,102,114,111,109,109,97,114,107,101,100,32,116,104,101,32,98,101,
+103,105,110,110,105,110,103,100,105,115,116,97,110,99,101,32,98,101,116,119,101,
+101,110,32,116,104,101,99,111,110,116,114,105,98,117,116,105,111,110,115,32,116,
+111,32,116,104,101,99,111,110,102,108,105,99,116,32,98,101,116,119,101,101,110,
+32,116,104,101,119,105,100,101,108,121,32,99,111,110,115,105,100,101,114,101,100
+,32,116,111,119,97,115,32,111,110,101,32,111,102,32,116,104,101,32,102,105,114,
+115,116,119,105,116,104,32,118,97,114,121,105,110,103,32,100,101,103,114,101,101
+,115,104,97,118,101,32,115,112,101,99,117,108,97,116,101,100,32,116,104,97,116,
+40,100,111,99,117,109,101,110,116,46,103,101,116,69,108,101,109,101,110,116,112,
+97,114,116,105,99,105,112,97,116,105,110,103,32,105,110,32,116,104,101,111,114,
+105,103,105,110,97,108,108,121,32,100,101,118,101,108,111,112,101,100,101,116,97
+,32,99,104,97,114,115,101,116,61,34,117,116,102,45,56,34,62,32,116,121,112,101,
+61,34,116,101,120,116,47,99,115,115,34,32,47,62,10,105,110,116,101,114,99,104,97
+,110,103,101,97,98,108,121,32,119,105,116,104,109,111,114,101,32,99,108,111,115,
+101,108,121,32,114,101,108,97,116,101,100,115,111,99,105,97,108,32,97,110,100,32
+,112,111,108,105,116,105,99,97,108,116,104,97,116,32,119,111,117,108,100,32,111,
+116,104,101,114,119,105,115,101,112,101,114,112,101,110,100,105,99,117,108,97,
+114,32,116,111,32,116,104,101,115,116,121,108,101,32,116,121,112,101,61,34,116,
+101,120,116,47,99,115,115,116,121,112,101,61,34,115,117,98,109,105,116,34,32,110
+,97,109,101,61,34,102,97,109,105,108,105,101,115,32,114,101,115,105,100,105,110,
+103,32,105,110,100,101,118,101,108,111,112,105,110,103,32,99,111,117,110,116,114
+,105,101,115,99,111,109,112,117,116,101,114,32,112,114,111,103,114,97,109,109,
+105,110,103,101,99,111,110,111,109,105,99,32,100,101,118,101,108,111,112,109,101
+,110,116,100,101,116,101,114,109,105,110,97,116,105,111,110,32,111,102,32,116,
+104,101,102,111,114,32,109,111,114,101,32,105,110,102,111,114,109,97,116,105,111
+,110,111,110,32,115,101,118,101,114,97,108,32,111,99,99,97,115,105,111,110,115,
+112,111,114,116,117,103,117,195,170,115,32,40,69,117,114,111,112,101,117,41,208,
+163,208,186,209,128,208,176,209,151,208,189,209,129,209,140,208,186,208,176,209,
+131,208,186,209,128,208,176,209,151,208,189,209,129,209,140,208,186,208,176,208,
+160,208,190,209,129,209,129,208,184,208,185,209,129,208,186,208,190,208,185,208,
+188,208,176,209,130,208,181,209,128,208,184,208,176,208,187,208,190,208,178,208,
+184,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,208,184,209,
+131,208,191,209,128,208,176,208,178,208,187,208,181,208,189,208,184,209,143,208,
+189,208,181,208,190,208,177,209,133,208,190,208,180,208,184,208,188,208,190,208,
+184,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,209,143,208,
+152,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,209,143,208,
+160,208,181,209,129,208,191,209,131,208,177,208,187,208,184,208,186,208,184,208,
+186,208,190,208,187,208,184,209,135,208,181,209,129,209,130,208,178,208,190,208,
+184,208,189,209,132,208,190,209,128,208,188,208,176,209,134,208,184,209,142,209,
+130,208,181,209,128,209,128,208,184,209,130,208,190,209,128,208,184,208,184,208,
+180,208,190,209,129,209,130,208,176,209,130,208,190,209,135,208,189,208,190,216,
+167,217,132,217,133,216,170,217,136,216,167,216,172,216,175,217,136,217,134,216,
+167,217,132,216,167,216,180,216,170,216,177,216,167,217,131,216,167,216,170,216,
+167,217,132,216,167,217,130,216,170,216,177,216,167,216,173,216,167,216,170,104,
+116,109,108,59,32,99,104,97,114,115,101,116,61,85,84,70,45,56,34,32,115,101,116,
+84,105,109,101,111,117,116,40,102,117,110,99,116,105,111,110,40,41,100,105,115,
+112,108,97,121,58,105,110,108,105,110,101,45,98,108,111,99,107,59,60,105,110,112
+,117,116,32,116,121,112,101,61,34,115,117,98,109,105,116,34,32,116,121,112,101,
+32,61,32,39,116,101,120,116,47,106,97,118,97,115,99,114,105,60,105,109,103,32,
+115,114,99,61,34,104,116,116,112,58,47,47,119,119,119,46,34,32,34,104,116,116,
+112,58,47,47,119,119,119,46,119,51,46,111,114,103,47,115,104,111,114,116,99,117,
+116,32,105,99,111,110,34,32,104,114,101,102,61,34,34,32,97,117,116,111,99,111,
+109,112,108,101,116,101,61,34,111,102,102,34,32,60,47,97,62,60,47,100,105,118,62
+,60,100,105,118,32,99,108,97,115,115,61,60,47,97,62,60,47,108,105,62,10,60,108,
+105,32,99,108,97,115,115,61,34,99,115,115,34,32,116,121,112,101,61,34,116,101,
+120,116,47,99,115,115,34,32,60,102,111,114,109,32,97,99,116,105,111,110,61,34,
+104,116,116,112,58,47,47,120,116,47,99,115,115,34,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,108,105,110,107,32,114,101,108,61,34,97,108,116,101,114,110
+,97,116,101,34,32,13,10,60,115,99,114,105,112,116,32,116,121,112,101,61,34,116,
+101,120,116,47,32,111,110,99,108,105,99,107,61,34,106,97,118,97,115,99,114,105,
+112,116,58,40,110,101,119,32,68,97,116,101,41,46,103,101,116,84,105,109,101,40,
+41,125,104,101,105,103,104,116,61,34,49,34,32,119,105,100,116,104,61,34,49,34,32
+,80,101,111,112,108,101,39,115,32,82,101,112,117,98,108,105,99,32,111,102,32,32,
+60,97,32,104,114,101,102,61,34,104,116,116,112,58,47,47,119,119,119,46,116,101,
+120,116,45,100,101,99,111,114,97,116,105,111,110,58,117,110,100,101,114,116,104,
+101,32,98,101,103,105,110,110,105,110,103,32,111,102,32,116,104,101,32,60,47,100
+,105,118,62,10,60,47,100,105,118,62,10,60,47,100,105,118,62,10,101,115,116,97,98
+,108,105,115,104,109,101,110,116,32,111,102,32,116,104,101,32,60,47,100,105,118,
+62,60,47,100,105,118,62,60,47,100,105,118,62,60,47,100,35,118,105,101,119,112,
+111,114,116,123,109,105,110,45,104,101,105,103,104,116,58,10,60,115,99,114,105,
+112,116,32,115,114,99,61,34,104,116,116,112,58,47,47,111,112,116,105,111,110,62,
+60,111,112,116,105,111,110,32,118,97,108,117,101,61,111,102,116,101,110,32,114,
+101,102,101,114,114,101,100,32,116,111,32,97,115,32,47,111,112,116,105,111,110,
+62,10,60,111,112,116,105,111,110,32,118,97,108,117,60,33,68,79,67,84,89,80,69,32
+,104,116,109,108,62,10,60,33,45,45,91,73,110,116,101,114,110,97,116,105,111,110,
+97,108,32,65,105,114,112,111,114,116,62,10,60,97,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,119,119,119,60,47,97,62,60,97,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,119,224,184,160,224,184,178,224,184,169,224,184,178,224,185
+,132,224,184,151,224,184,162,225,131,165,225,131,144,225,131,160,225,131,151,225
+,131,163,225,131,154,225,131,152,230,173,163,233,171,148,228,184,173,230,150,135
+,32,40,231,185,129,233,171,148,41,224,164,168,224,164,191,224,164,176,224,165,
+141,224,164,166,224,165,135,224,164,182,224,164,161,224,164,190,224,164,137,224,
+164,168,224,164,178,224,165,139,224,164,161,224,164,149,224,165,141,224,164,183,
+224,165,135,224,164,164,224,165,141,224,164,176,224,164,156,224,164,190,224,164,
+168,224,164,149,224,164,190,224,164,176,224,165,128,224,164,184,224,164,130,224,
+164,172,224,164,130,224,164,167,224,164,191,224,164,164,224,164,184,224,165,141,
+224,164,165,224,164,190,224,164,170,224,164,168,224,164,190,224,164,184,224,165,
+141,224,164,181,224,165,128,224,164,149,224,164,190,224,164,176,224,164,184,224,
+164,130,224,164,184,224,165,141,224,164,149,224,164,176,224,164,163,224,164,184,
+224,164,190,224,164,174,224,164,151,224,165,141,224,164,176,224,165,128,224,164,
+154,224,164,191,224,164,159,224,165,141,224,164,160,224,165,139,224,164,130,224,
+164,181,224,164,191,224,164,156,224,165,141,224,164,158,224,164,190,224,164,168,
+224,164,133,224,164,174,224,165,135,224,164,176,224,164,191,224,164,149,224,164,
+190,224,164,181,224,164,191,224,164,173,224,164,191,224,164,168,224,165,141,224,
+164,168,224,164,151,224,164,190,224,164,161,224,164,191,224,164,175,224,164,190,
+224,164,129,224,164,149,224,165,141,224,164,175,224,165,139,224,164,130,224,164,
+149,224,164,191,224,164,184,224,165,129,224,164,176,224,164,149,224,165,141,224,
+164,183,224,164,190,224,164,170,224,164,185,224,165,129,224,164,129,224,164,154,
+224,164,164,224,165,128,224,164,170,224,165,141,224,164,176,224,164,172,224,164,
+130,224,164,167,224,164,168,224,164,159,224,164,191,224,164,170,224,165,141,224,
+164,170,224,164,163,224,165,128,224,164,149,224,165,141,224,164,176,224,164,191,
+224,164,149,224,165,135,224,164,159,224,164,170,224,165,141,224,164,176,224,164,
+190,224,164,176,224,164,130,224,164,173,224,164,170,224,165,141,224,164,176,224,
+164,190,224,164,170,224,165,141,224,164,164,224,164,174,224,164,190,224,164,178,
+224,164,191,224,164,149,224,165,139,224,164,130,224,164,176,224,164,171,224,164,
+188,224,165,141,224,164,164,224,164,190,224,164,176,224,164,168,224,164,191,224,
+164,176,224,165,141,224,164,174,224,164,190,224,164,163,224,164,178,224,164,191,
+224,164,174,224,164,191,224,164,159,224,165,135,224,164,161,100,101,115,99,114,
+105,112,116,105,111,110,34,32,99,111,110,116,101,110,116,61,34,100,111,99,117,
+109,101,110,116,46,108,111,99,97,116,105,111,110,46,112,114,111,116,46,103,101,
+116,69,108,101,109,101,110,116,115,66,121,84,97,103,78,97,109,101,40,60,33,68,79
+,67,84,89,80,69,32,104,116,109,108,62,10,60,104,116,109,108,32,60,109,101,116,97
+,32,99,104,97,114,115,101,116,61,34,117,116,102,45,56,34,62,58,117,114,108,34,32
+,99,111,110,116,101,110,116,61,34,104,116,116,112,58,47,47,46,99,115,115,34,32,
+114,101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,115,116,121,108,101
+,32,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34,62,116,121,112,101,61
+,34,116,101,120,116,47,99,115,115,34,32,104,114,101,102,61,34,119,51,46,111,114,
+103,47,49,57,57,57,47,120,104,116,109,108,34,32,120,109,108,116,121,112,101,61,
+34,116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,34,32,109,101,116,104
+,111,100,61,34,103,101,116,34,32,97,99,116,105,111,110,61,34,108,105,110,107,32,
+114,101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,32,32,61,32,100,111
+,99,117,109,101,110,116,46,103,101,116,69,108,101,109,101,110,116,116,121,112,
+101,61,34,105,109,97,103,101,47,120,45,105,99,111,110,34,32,47,62,99,101,108,108
+,112,97,100,100,105,110,103,61,34,48,34,32,99,101,108,108,115,112,46,99,115,115,
+34,32,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34,32,60,47,97,62,60,
+47,108,105,62,60,108,105,62,60,97,32,104,114,101,102,61,34,34,32,119,105,100,116
+,104,61,34,49,34,32,104,101,105,103,104,116,61,34,49,34,34,62,60,97,32,104,114,
+101,102,61,34,104,116,116,112,58,47,47,119,119,119,46,115,116,121,108,101,61,34,
+100,105,115,112,108,97,121,58,110,111,110,101,59,34,62,97,108,116,101,114,110,97
+,116,101,34,32,116,121,112,101,61,34,97,112,112,108,105,45,47,47,87,51,67,47,47,
+68,84,68,32,88,72,84,77,76,32,49,46,48,32,101,108,108,115,112,97,99,105,110,103,
+61,34,48,34,32,99,101,108,108,112,97,100,32,116,121,112,101,61,34,104,105,100,
+100,101,110,34,32,118,97,108,117,101,61,34,47,97,62,38,110,98,115,112,59,60,115,
+112,97,110,32,114,111,108,101,61,34,115,10,60,105,110,112,117,116,32,116,121,112
+,101,61,34,104,105,100,100,101,110,34,32,108,97,110,103,117,97,103,101,61,34,74,
+97,118,97,83,99,114,105,112,116,34,32,32,100,111,99,117,109,101,110,116,46,103,
+101,116,69,108,101,109,101,110,116,115,66,103,61,34,48,34,32,99,101,108,108,115,
+112,97,99,105,110,103,61,34,48,34,32,121,112,101,61,34,116,101,120,116,47,99,115
+,115,34,32,109,101,100,105,97,61,34,116,121,112,101,61,39,116,101,120,116,47,106
+,97,118,97,115,99,114,105,112,116,39,119,105,116,104,32,116,104,101,32,101,120,
+99,101,112,116,105,111,110,32,111,102,32,121,112,101,61,34,116,101,120,116,47,99
+,115,115,34,32,114,101,108,61,34,115,116,32,104,101,105,103,104,116,61,34,49,34,
+32,119,105,100,116,104,61,34,49,34,32,61,39,43,101,110,99,111,100,101,85,82,73,
+67,111,109,112,111,110,101,110,116,40,60,108,105,110,107,32,114,101,108,61,34,97
+,108,116,101,114,110,97,116,101,34,32,10,98,111,100,121,44,32,116,114,44,32,105,
+110,112,117,116,44,32,116,101,120,116,109,101,116,97,32,110,97,109,101,61,34,114
+,111,98,111,116,115,34,32,99,111,110,109,101,116,104,111,100,61,34,112,111,115,
+116,34,32,97,99,116,105,111,110,61,34,62,10,60,97,32,104,114,101,102,61,34,104,
+116,116,112,58,47,47,119,119,119,46,99,115,115,34,32,114,101,108,61,34,115,116,
+121,108,101,115,104,101,101,116,34,32,60,47,100,105,118,62,60,47,100,105,118,62,
+60,100,105,118,32,99,108,97,115,115,108,97,110,103,117,97,103,101,61,34,106,97,
+118,97,115,99,114,105,112,116,34,62,97,114,105,97,45,104,105,100,100,101,110,61,
+34,116,114,117,101,34,62,194,183,60,114,105,112,116,34,32,116,121,112,101,61,34,
+116,101,120,116,47,106,97,118,97,115,108,61,48,59,125,41,40,41,59,10,40,102,117,
+110,99,116,105,111,110,40,41,123,98,97,99,107,103,114,111,117,110,100,45,105,109
+,97,103,101,58,32,117,114,108,40,47,97,62,60,47,108,105,62,60,108,105,62,60,97,
+32,104,114,101,102,61,34,104,9,9,60,108,105,62,60,97,32,104,114,101,102,61,34,
+104,116,116,112,58,47,47,97,116,111,114,34,32,97,114,105,97,45,104,105,100,100,
+101,110,61,34,116,114,117,62,32,60,97,32,104,114,101,102,61,34,104,116,116,112,
+58,47,47,119,119,119,46,108,97,110,103,117,97,103,101,61,34,106,97,118,97,115,99
+,114,105,112,116,34,32,47,111,112,116,105,111,110,62,10,60,111,112,116,105,111,
+110,32,118,97,108,117,101,47,100,105,118,62,60,47,100,105,118,62,60,100,105,118,
+32,99,108,97,115,115,61,114,97,116,111,114,34,32,97,114,105,97,45,104,105,100,
+100,101,110,61,34,116,114,101,61,40,110,101,119,32,68,97,116,101,41,46,103,101,
+116,84,105,109,101,40,41,112,111,114,116,117,103,117,195,170,115,32,40,100,111,
+32,66,114,97,115,105,108,41,208,190,209,128,208,179,208,176,208,189,208,184,208,
+183,208,176,209,134,208,184,208,184,208,178,208,190,208,183,208,188,208,190,208,
+182,208,189,208,190,209,129,209,130,209,140,208,190,208,177,209,128,208,176,208,
+183,208,190,208,178,208,176,208,189,208,184,209,143,209,128,208,181,208,179,208,
+184,209,129,209,130,209,128,208,176,209,134,208,184,208,184,208,178,208,190,208,
+183,208,188,208,190,208,182,208,189,208,190,209,129,209,130,208,184,208,190,208,
+177,209,143,208,183,208,176,209,130,208,181,208,187,209,140,208,189,208,176,60,
+33,68,79,67,84,89,80,69,32,104,116,109,108,32,80,85,66,76,73,67,32,34,110,116,45
+,84,121,112,101,34,32,99,111,110,116,101,110,116,61,34,116,101,120,116,47,60,109
+,101,116,97,32,104,116,116,112,45,101,113,117,105,118,61,34,67,111,110,116,101,
+114,97,110,115,105,116,105,111,110,97,108,47,47,69,78,34,32,34,104,116,116,112,
+58,60,104,116,109,108,32,120,109,108,110,115,61,34,104,116,116,112,58,47,47,119,
+119,119,45,47,47,87,51,67,47,47,68,84,68,32,88,72,84,77,76,32,49,46,48,32,84,68,
+84,68,47,120,104,116,109,108,49,45,116,114,97,110,115,105,116,105,111,110,97,108
+,47,47,119,119,119,46,119,51,46,111,114,103,47,84,82,47,120,104,116,109,108,49,
+47,112,101,32,61,32,39,116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,
+39,59,60,109,101,116,97,32,110,97,109,101,61,34,100,101,115,99,114,105,112,116,
+105,111,110,112,97,114,101,110,116,78,111,100,101,46,105,110,115,101,114,116,66,
+101,102,111,114,101,60,105,110,112,117,116,32,116,121,112,101,61,34,104,105,100,
+100,101,110,34,32,110,97,106,115,34,32,116,121,112,101,61,34,116,101,120,116,47,
+106,97,118,97,115,99,114,105,40,100,111,99,117,109,101,110,116,41,46,114,101,97,
+100,121,40,102,117,110,99,116,105,115,99,114,105,112,116,32,116,121,112,101,61,
+34,116,101,120,116,47,106,97,118,97,115,105,109,97,103,101,34,32,99,111,110,116,
+101,110,116,61,34,104,116,116,112,58,47,47,85,65,45,67,111,109,112,97,116,105,98
+,108,101,34,32,99,111,110,116,101,110,116,61,116,109,108,59,32,99,104,97,114,115
+,101,116,61,117,116,102,45,56,34,32,47,62,10,108,105,110,107,32,114,101,108,61,
+34,115,104,111,114,116,99,117,116,32,105,99,111,110,60,108,105,110,107,32,114,
+101,108,61,34,115,116,121,108,101,115,104,101,101,116,34,32,60,47,115,99,114,105
+,112,116,62,10,60,115,99,114,105,112,116,32,116,121,112,101,61,61,32,100,111,99,
+117,109,101,110,116,46,99,114,101,97,116,101,69,108,101,109,101,110,60,97,32,116
+,97,114,103,101,116,61,34,95,98,108,97,110,107,34,32,104,114,101,102,61,32,100,
+111,99,117,109,101,110,116,46,103,101,116,69,108,101,109,101,110,116,115,66,105,
+110,112,117,116,32,116,121,112,101,61,34,116,101,120,116,34,32,110,97,109,101,61
+,97,46,116,121,112,101,32,61,32,39,116,101,120,116,47,106,97,118,97,115,99,114,
+105,110,112,117,116,32,116,121,112,101,61,34,104,105,100,100,101,110,34,32,110,
+97,109,101,104,116,109,108,59,32,99,104,97,114,115,101,116,61,117,116,102,45,56,
+34,32,47,62,100,116,100,34,62,10,60,104,116,109,108,32,120,109,108,110,115,61,34
+,104,116,116,112,45,47,47,87,51,67,47,47,68,84,68,32,72,84,77,76,32,52,46,48,49,
+32,84,101,110,116,115,66,121,84,97,103,78,97,109,101,40,39,115,99,114,105,112,
+116,39,41,105,110,112,117,116,32,116,121,112,101,61,34,104,105,100,100,101,110,
+34,32,110,97,109,60,115,99,114,105,112,116,32,116,121,112,101,61,34,116,101,120,
+116,47,106,97,118,97,115,34,32,115,116,121,108,101,61,34,100,105,115,112,108,97,
+121,58,110,111,110,101,59,34,62,100,111,99,117,109,101,110,116,46,103,101,116,69
+,108,101,109,101,110,116,66,121,73,100,40,61,100,111,99,117,109,101,110,116,46,
+99,114,101,97,116,101,69,108,101,109,101,110,116,40,39,32,116,121,112,101,61,39,
+116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,39,105,110,112,117,116,
+32,116,121,112,101,61,34,116,101,120,116,34,32,110,97,109,101,61,34,100,46,103,
+101,116,69,108,101,109,101,110,116,115,66,121,84,97,103,78,97,109,101,40,115,110
+,105,99,97,108,34,32,104,114,101,102,61,34,104,116,116,112,58,47,47,119,119,119,
+46,67,47,47,68,84,68,32,72,84,77,76,32,52,46,48,49,32,84,114,97,110,115,105,116,
+60,115,116,121,108,101,32,116,121,112,101,61,34,116,101,120,116,47,99,115,115,34
+,62,10,10,60,115,116,121,108,101,32,116,121,112,101,61,34,116,101,120,116,47,99,
+115,115,34,62,105,111,110,97,108,46,100,116,100,34,62,10,60,104,116,109,108,32,
+120,109,108,110,115,61,104,116,116,112,45,101,113,117,105,118,61,34,67,111,110,
+116,101,110,116,45,84,121,112,101,100,105,110,103,61,34,48,34,32,99,101,108,108,
+115,112,97,99,105,110,103,61,34,48,34,104,116,109,108,59,32,99,104,97,114,115,
+101,116,61,117,116,102,45,56,34,32,47,62,10,32,115,116,121,108,101,61,34,100,105
+,115,112,108,97,121,58,110,111,110,101,59,34,62,60,60,108,105,62,60,97,32,104,
+114,101,102,61,34,104,116,116,112,58,47,47,119,119,119,46,32,116,121,112,101,61,
+39,116,101,120,116,47,106,97,118,97,115,99,114,105,112,116,39,62,208,180,208,181
+,209,143,209,130,208,181,208,187,209,140,208,189,208,190,209,129,209,130,208,184
+,209,129,208,190,208,190,209,130,208,178,208,181,209,130,209,129,209,130,208,178
+,208,184,208,184,208,191,209,128,208,190,208,184,208,183,208,178,208,190,208,180
+,209,129,209,130,208,178,208,176,208,177,208,181,208,183,208,190,208,191,208,176
+,209,129,208,189,208,190,209,129,209,130,208,184,224,164,170,224,165,129,224,164
+,184,224,165,141,224,164,164,224,164,191,224,164,149,224,164,190,224,164,149,224
+,164,190,224,164,130,224,164,151,224,165,141,224,164,176,224,165,135,224,164,184
+,224,164,137,224,164,168,224,165,141,224,164,185,224,165,139,224,164,130,224,164
+,168,224,165,135,224,164,181,224,164,191,224,164,167,224,164,190,224,164,168,224
+,164,184,224,164,173,224,164,190,224,164,171,224,164,191,224,164,149,224,165,141
+,224,164,184,224,164,191,224,164,130,224,164,151,224,164,184,224,165,129,224,164
+,176,224,164,149,224,165,141,224,164,183,224,164,191,224,164,164,224,164,149,224
+,165,137,224,164,170,224,165,128,224,164,176,224,164,190,224,164,135,224,164,159
+,224,164,181,224,164,191,224,164,156,224,165,141,224,164,158,224,164,190,224,164
+,170,224,164,168,224,164,149,224,164,190,224,164,176,224,165,141,224,164,176,224
+,164,181,224,164,190,224,164,136,224,164,184,224,164,149,224,165,141,224,164,176
+,224,164,191,224,164,175,224,164,164,224,164,190
+}
+;
+#endif  /* !BROTLI_EXTERNAL_DICTIONARY_DATA */
+
+static BrotliDictionary kBrotliDictionary = {
+  /* size_bits_by_length */
+  {
+    0, 0, 0, 0, 10, 10, 11, 11,
+    10, 10, 10, 10, 10, 9, 9, 8,
+    7, 7, 8, 7, 7, 6, 6, 5,
+    5, 0, 0, 0, 0, 0, 0, 0
+  },
+
+  /* offsets_by_length */
+  {
+    0, 0, 0, 0, 0, 4096, 9216, 21504,
+    35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
+    104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280,
+    122016, 122784, 122784, 122784, 122784, 122784, 122784, 122784
+  },
+
+  /* data_size ==  sizeof(kBrotliDictionaryData) */
+  122784,
+
+  /* data */
+#if defined(BROTLI_EXTERNAL_DICTIONARY_DATA)
+  NULL
+#else
+  kBrotliDictionaryData
+#endif
+};
+
+const BrotliDictionary* BrotliGetDictionary() {
+  return &kBrotliDictionary;
+}
+
+void BrotliSetDictionaryData(const uint8_t* data) {
+  if (!!data && !kBrotliDictionary.data) {
+    kBrotliDictionary.data = data;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.h
new file mode 100755
index 0000000000..b1c6f7f580
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/dictionary.h
@@ -0,0 +1,64 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Collection of static dictionary words. */
+
+#ifndef BROTLI_COMMON_DICTIONARY_H_
+#define BROTLI_COMMON_DICTIONARY_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BrotliDictionary {
+  /**
+   * Number of bits to encode index of dictionary word in a bucket.
+   *
+   * Specification: Appendix A. Static Dictionary Data
+   *
+   * Words in a dictionary are bucketed by length.
+   * @c 0 means that there are no words of a given length.
+   * Dictionary consists of words with length of [4..24] bytes.
+   * Values at [0..3] and [25..31] indices should not be addressed.
+   */
+  uint8_t size_bits_by_length[32];
+
+  /* assert(offset[i + 1] == offset[i] + (bits[i] ? (i << bits[i]) : 0)) */
+  uint32_t offsets_by_length[32];
+
+  /* assert(data_size == offsets_by_length[31]) */
+  size_t data_size;
+
+  /* Data array is not bound, and should obey to size_bits_by_length values.
+     Specified size matches default (RFC 7932) dictionary. Its size is
+     defined by data_size */
+  const uint8_t* data;
+} BrotliDictionary;
+
+BROTLI_COMMON_API const BrotliDictionary* BrotliGetDictionary(void);
+
+/**
+ * Sets dictionary data.
+ *
+ * When dictionary data is already set / present, this method is no-op.
+ *
+ * Dictionary data MUST be provided before BrotliGetDictionary is invoked.
+ * This method is used ONLY in multi-client environment (e.g. C + Java),
+ * to reduce storage by sharing single dictionary between implementations.
+ */
+BROTLI_COMMON_API void BrotliSetDictionaryData(const uint8_t* data);
+
+#define BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
+#define BROTLI_MAX_DICTIONARY_WORD_LENGTH 24
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_COMMON_DICTIONARY_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/platform.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/platform.h
new file mode 100755
index 0000000000..84c448c4cf
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/platform.h
@@ -0,0 +1,568 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for compiler / platform specific features and build options.
+
+   Build options are:
+    * BROTLI_BUILD_32_BIT disables 64-bit optimizations
+    * BROTLI_BUILD_64_BIT forces to use 64-bit optimizations
+    * BROTLI_BUILD_BIG_ENDIAN forces to use big-endian optimizations
+    * BROTLI_BUILD_ENDIAN_NEUTRAL disables endian-aware optimizations
+    * BROTLI_BUILD_LITTLE_ENDIAN forces to use little-endian optimizations
+    * BROTLI_BUILD_PORTABLE disables dangerous optimizations, like unaligned
+      read and overlapping memcpy; this reduces decompression speed by 5%
+    * BROTLI_BUILD_NO_RBIT disables "rbit" optimization for ARM CPUs
+    * BROTLI_DEBUG dumps file name and line number when decoder detects stream
+      or memory error
+    * BROTLI_ENABLE_LOG enables asserts and dumps various state information
+*/
+
+#ifndef BROTLI_COMMON_PLATFORM_H_
+#define BROTLI_COMMON_PLATFORM_H_
+
+#include <string.h>  /* memcpy */
+#include <stdlib.h>  /* malloc, free */
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(OS_LINUX) || defined(OS_CYGWIN)
+#include <endian.h>
+#elif defined(OS_FREEBSD)
+#include <machine/endian.h>
+#elif defined(OS_MACOSX)
+#include <machine/endian.h>
+/* Let's try and follow the Linux convention */
+#define BROTLI_X_BYTE_ORDER BYTE_ORDER
+#define BROTLI_X_LITTLE_ENDIAN LITTLE_ENDIAN
+#define BROTLI_X_BIG_ENDIAN BIG_ENDIAN
+#endif
+
+#if defined(BROTLI_ENABLE_LOG) || defined(BROTLI_DEBUG)
+#include <assert.h>
+#include <stdio.h>
+#endif
+
+/* The following macros were borrowed from https://github.com/nemequ/hedley
+ * with permission of original author - Evan Nemerson <evan@nemerson.com> */
+
+/* >>> >>> >>> hedley macros */
+
+/* Define "BROTLI_PREDICT_TRUE" and "BROTLI_PREDICT_FALSE" macros for capable
+   compilers.
+
+To apply compiler hint, enclose the branching condition into macros, like this:
+
+  if (BROTLI_PREDICT_TRUE(zero == 0)) {
+    // main execution path
+  } else {
+    // compiler should place this code outside of main execution path
+  }
+
+OR:
+
+  if (BROTLI_PREDICT_FALSE(something_rare_or_unexpected_happens)) {
+    // compiler should place this code outside of main execution path
+  }
+
+*/
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_expect, 3, 0, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||               \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 15, 0) ||              \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                  \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                 \
+    BROTLI_TI_VERSION_CHECK(7, 3, 0) ||                   \
+    BROTLI_TINYC_VERSION_CHECK(0, 9, 27)
+#define BROTLI_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#define BROTLI_PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#else
+#define BROTLI_PREDICT_FALSE(x) (x)
+#define BROTLI_PREDICT_TRUE(x) (x)
+#endif
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+    !defined(__cplusplus)
+#define BROTLI_RESTRICT restrict
+#elif BROTLI_GNUC_VERSION_CHECK(3, 1, 0) ||                         \
+    BROTLI_MSVC_VERSION_CHECK(14, 0, 0) ||                          \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                         \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                            \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                           \
+    BROTLI_PGI_VERSION_CHECK(17, 10, 0) ||                          \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                             \
+    BROTLI_IAR_VERSION_CHECK(8, 0, 0) ||                            \
+    (BROTLI_SUNPRO_VERSION_CHECK(5, 14, 0) && defined(__cplusplus))
+#define BROTLI_RESTRICT __restrict
+#elif BROTLI_SUNPRO_VERSION_CHECK(5, 3, 0) && !defined(__cplusplus)
+#define BROTLI_RESTRICT _Restrict
+#else
+#define BROTLI_RESTRICT
+#endif
+
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
+    (defined(__cplusplus) && (__cplusplus >= 199711L))
+#define BROTLI_MAYBE_INLINE inline
+#elif defined(__GNUC_STDC_INLINE__) || defined(__GNUC_GNU_INLINE__) || \
+    BROTLI_ARM_VERSION_CHECK(6, 2, 0)
+#define BROTLI_MAYBE_INLINE __inline__
+#elif BROTLI_MSVC_VERSION_CHECK(12, 0, 0) || \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) || BROTLI_TI_VERSION_CHECK(8, 0, 0)
+#define BROTLI_MAYBE_INLINE __inline
+#else
+#define BROTLI_MAYBE_INLINE
+#endif
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(always_inline, 4, 0, 0) ||                       \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                                    \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                                   \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                                       \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                                      \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__))
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE __attribute__((__always_inline__))
+#elif BROTLI_MSVC_VERSION_CHECK(12, 0, 0)
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE __forceinline
+#elif BROTLI_TI_VERSION_CHECK(7, 0, 0) && defined(__cplusplus)
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE _Pragma("FUNC_ALWAYS_INLINE;")
+#elif BROTLI_IAR_VERSION_CHECK(8, 0, 0)
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE _Pragma("inline=forced")
+#else
+#define BROTLI_INLINE BROTLI_MAYBE_INLINE
+#endif
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(noinline, 4, 0, 0) ||                            \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                                    \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                                   \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                                       \
+    BROTLI_IBM_VERSION_CHECK(10, 1, 0) ||                                      \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__))
+#define BROTLI_NOINLINE __attribute__((__noinline__))
+#elif BROTLI_MSVC_VERSION_CHECK(13, 10, 0)
+#define BROTLI_NOINLINE __declspec(noinline)
+#elif BROTLI_PGI_VERSION_CHECK(10, 2, 0)
+#define BROTLI_NOINLINE _Pragma("noinline")
+#elif BROTLI_TI_VERSION_CHECK(6, 0, 0) && defined(__cplusplus)
+#define BROTLI_NOINLINE _Pragma("FUNC_CANNOT_INLINE;")
+#elif BROTLI_IAR_VERSION_CHECK(8, 0, 0)
+#define BROTLI_NOINLINE _Pragma("inline=never")
+#else
+#define BROTLI_NOINLINE
+#endif
+
+/* BROTLI_INTERNAL could be defined to override visibility, e.g. for tests. */
+#if !defined(BROTLI_INTERNAL)
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define BROTLI_INTERNAL
+#elif BROTLI_GNUC_VERSION_CHECK(3, 3, 0) ||                         \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                             \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                         \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                            \
+    BROTLI_IBM_VERSION_CHECK(13, 1, 0) ||                           \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) &&                            \
+     defined(__TI_GNU_ATTRIBUTE_SUPPORT__) && defined(__TI_EABI__))
+#define BROTLI_INTERNAL __attribute__ ((visibility ("hidden")))
+#else
+#define BROTLI_INTERNAL
+#endif
+#endif
+
+/* <<< <<< <<< end of hedley macros. */
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(unused, 2, 7, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+#define BROTLI_UNUSED_FUNCTION static BROTLI_INLINE __attribute__ ((unused))
+#else
+#define BROTLI_UNUSED_FUNCTION static BROTLI_INLINE
+#endif
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)
+#define BROTLI_ALIGNED(N) __attribute__((aligned(N)))
+#else
+#define BROTLI_ALIGNED(N)
+#endif
+
+#if (defined(__ARM_ARCH) && (__ARM_ARCH == 7)) || \
+    (defined(M_ARM) && (M_ARM == 7))
+#define BROTLI_TARGET_ARMV7
+#endif  /* ARMv7 */
+
+#if (defined(__ARM_ARCH) && (__ARM_ARCH == 8)) || \
+    defined(__aarch64__) || defined(__ARM64_ARCH_8__)
+#define BROTLI_TARGET_ARMV8_ANY
+
+#if defined(__ARM_32BIT_STATE)
+#define BROTLI_TARGET_ARMV8_32
+#elif defined(__ARM_64BIT_STATE)
+#define BROTLI_TARGET_ARMV8_64
+#endif
+
+#endif  /* ARMv8 */
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define BROTLI_TARGET_NEON
+#endif
+
+#if defined(__i386) || defined(_M_IX86)
+#define BROTLI_TARGET_X86
+#endif
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define BROTLI_TARGET_X64
+#endif
+
+#if defined(__PPC64__)
+#define BROTLI_TARGET_POWERPC64
+#endif
+
+#if defined(__riscv) && defined(__riscv_xlen) && __riscv_xlen == 64
+#define BROTLI_TARGET_RISCV64
+#endif
+
+#if defined(BROTLI_BUILD_64_BIT)
+#define BROTLI_64_BITS 1
+#elif defined(BROTLI_BUILD_32_BIT)
+#define BROTLI_64_BITS 0
+#elif defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8_64) || \
+    defined(BROTLI_TARGET_POWERPC64) || defined(BROTLI_TARGET_RISCV64)
+#define BROTLI_64_BITS 1
+#else
+#define BROTLI_64_BITS 0
+#endif
+
+#if (BROTLI_64_BITS)
+#define brotli_reg_t uint64_t
+#else
+#define brotli_reg_t uint32_t
+#endif
+
+#if defined(BROTLI_BUILD_BIG_ENDIAN)
+#define BROTLI_BIG_ENDIAN 1
+#elif defined(BROTLI_BUILD_LITTLE_ENDIAN)
+#define BROTLI_LITTLE_ENDIAN 1
+#elif defined(BROTLI_BUILD_ENDIAN_NEUTRAL)
+/* Just break elif chain. */
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define BROTLI_LITTLE_ENDIAN 1
+#elif defined(_WIN32) || defined(BROTLI_TARGET_X64)
+/* Win32 & x64 can currently always be assumed to be little endian */
+#define BROTLI_LITTLE_ENDIAN 1
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define BROTLI_BIG_ENDIAN 1
+#elif defined(BROTLI_X_BYTE_ORDER)
+#if BROTLI_X_BYTE_ORDER == BROTLI_X_LITTLE_ENDIAN
+#define BROTLI_LITTLE_ENDIAN 1
+#elif BROTLI_X_BYTE_ORDER == BROTLI_X_BIG_ENDIAN
+#define BROTLI_BIG_ENDIAN 1
+#endif
+#endif  /* BROTLI_X_BYTE_ORDER */
+
+#if !defined(BROTLI_LITTLE_ENDIAN)
+#define BROTLI_LITTLE_ENDIAN 0
+#endif
+
+#if !defined(BROTLI_BIG_ENDIAN)
+#define BROTLI_BIG_ENDIAN 0
+#endif
+
+#if defined(BROTLI_X_BYTE_ORDER)
+#undef BROTLI_X_BYTE_ORDER
+#undef BROTLI_X_LITTLE_ENDIAN
+#undef BROTLI_X_BIG_ENDIAN
+#endif
+
+#if defined(BROTLI_BUILD_PORTABLE)
+#define BROTLI_ALIGNED_READ (!!1)
+#elif defined(BROTLI_TARGET_X86) || defined(BROTLI_TARGET_X64) || \
+    defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY) || \
+    defined(BROTLI_TARGET_RISCV64)
+/* Allow unaligned read only for white-listed CPUs. */
+#define BROTLI_ALIGNED_READ (!!0)
+#else
+#define BROTLI_ALIGNED_READ (!!1)
+#endif
+
+#if BROTLI_ALIGNED_READ
+/* Portable unaligned memory access: read / write values via memcpy. */
+static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) {
+  uint16_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+static BROTLI_INLINE uint32_t BrotliUnalignedRead32(const void* p) {
+  uint32_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
+  memcpy(p, &v, sizeof v);
+}
+#else  /* BROTLI_ALIGNED_READ */
+/* Unaligned memory access is allowed: just cast pointer to requested type. */
+#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || \
+    defined(MEMORY_SANITIZER)
+/* Consider we have an unaligned load/store of 4 bytes from address 0x...05.
+   AddressSanitizer will treat it as a 3-byte access to the range 05:07 and
+   will miss a bug if 08 is the first unaddressable byte.
+   ThreadSanitizer will also treat this as a 3-byte access to 05:07 and will
+   miss a race between this access and some other accesses to 08.
+   MemorySanitizer will correctly propagate the shadow on unaligned stores
+   and correctly report bugs on unaligned loads, but it may not properly
+   update and report the origin of the uninitialized memory.
+   For all three tools, replacing an unaligned access with a tool-specific
+   callback solves the problem. */
+#if defined(__cplusplus)
+extern "C" {
+#endif  /* __cplusplus */
+  uint16_t __sanitizer_unaligned_load16(const void* p);
+  uint32_t __sanitizer_unaligned_load32(const void* p);
+  uint64_t __sanitizer_unaligned_load64(const void* p);
+  void __sanitizer_unaligned_store64(void* p, uint64_t v);
+#if defined(__cplusplus)
+}  /* extern "C" */
+#endif  /* __cplusplus */
+#define BrotliUnalignedRead16 __sanitizer_unaligned_load16
+#define BrotliUnalignedRead32 __sanitizer_unaligned_load32
+#define BrotliUnalignedRead64 __sanitizer_unaligned_load64
+#define BrotliUnalignedWrite64 __sanitizer_unaligned_store64
+#else
+static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) {
+  return *(const uint16_t*)p;
+}
+static BROTLI_INLINE uint32_t BrotliUnalignedRead32(const void* p) {
+  return *(const uint32_t*)p;
+}
+#if (BROTLI_64_BITS)
+static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
+  return *(const uint64_t*)p;
+}
+static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
+  *(uint64_t*)p = v;
+}
+#else  /* BROTLI_64_BITS */
+/* Avoid emitting LDRD / STRD, which require properly aligned address. */
+/* If __attribute__(aligned) is available, use that. Otherwise, memcpy. */
+
+#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)
+typedef BROTLI_ALIGNED(1) uint64_t brotli_unaligned_uint64_t;
+
+static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
+  return (uint64_t) ((brotli_unaligned_uint64_t*) p)[0];
+}
+static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
+  brotli_unaligned_uint64_t* dwords = (brotli_unaligned_uint64_t*) p;
+  dwords[0] = (brotli_unaligned_uint64_t) v;
+}
+#else /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */
+static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
+  uint64_t v;
+  memcpy(&v, p, sizeof(uint64_t));
+  return v;
+}
+
+static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
+  memcpy(p, &v, sizeof(uint64_t));
+}
+#endif  /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */
+#endif  /* BROTLI_64_BITS */
+#endif  /* ASAN / TSAN / MSAN */
+#endif  /* BROTLI_ALIGNED_READ */
+
+#if BROTLI_LITTLE_ENDIAN
+/* Straight endianness. Just read / write values. */
+#define BROTLI_UNALIGNED_LOAD16LE BrotliUnalignedRead16
+#define BROTLI_UNALIGNED_LOAD32LE BrotliUnalignedRead32
+#define BROTLI_UNALIGNED_LOAD64LE BrotliUnalignedRead64
+#define BROTLI_UNALIGNED_STORE64LE BrotliUnalignedWrite64
+#elif BROTLI_BIG_ENDIAN  /* BROTLI_LITTLE_ENDIAN */
+/* Explain compiler to byte-swap values. */
+#define BROTLI_BSWAP16_(V) ((uint16_t)( \
+  (((V) & 0xFFU) << 8) | \
+  (((V) >> 8) & 0xFFU)))
+static BROTLI_INLINE uint16_t BROTLI_UNALIGNED_LOAD16LE(const void* p) {
+  uint16_t value = BrotliUnalignedRead16(p);
+  return BROTLI_BSWAP16_(value);
+}
+#define BROTLI_BSWAP32_(V) ( \
+  (((V) & 0xFFU) << 24) | (((V) & 0xFF00U) << 8) | \
+  (((V) >> 8) & 0xFF00U) | (((V) >> 24) & 0xFFU))
+static BROTLI_INLINE uint32_t BROTLI_UNALIGNED_LOAD32LE(const void* p) {
+  uint32_t value = BrotliUnalignedRead32(p);
+  return BROTLI_BSWAP32_(value);
+}
+#define BROTLI_BSWAP64_(V) ( \
+  (((V) & 0xFFU) << 56) | (((V) & 0xFF00U) << 40) | \
+  (((V) & 0xFF0000U) << 24) | (((V) & 0xFF000000U) << 8) | \
+  (((V) >> 8) & 0xFF000000U) | (((V) >> 24) & 0xFF0000U) | \
+  (((V) >> 40) & 0xFF00U) | (((V) >> 56) & 0xFFU))
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64LE(const void* p) {
+  uint64_t value = BrotliUnalignedRead64(p);
+  return BROTLI_BSWAP64_(value);
+}
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) {
+  uint64_t value = BROTLI_BSWAP64_(v);
+  BrotliUnalignedWrite64(p, value);
+}
+#else  /* BROTLI_LITTLE_ENDIAN */
+/* Read / store values byte-wise; hopefully compiler will understand. */
+static BROTLI_INLINE uint16_t BROTLI_UNALIGNED_LOAD16LE(const void* p) {
+  const uint8_t* in = (const uint8_t*)p;
+  return (uint16_t)(in[0] | (in[1] << 8));
+}
+static BROTLI_INLINE uint32_t BROTLI_UNALIGNED_LOAD32LE(const void* p) {
+  const uint8_t* in = (const uint8_t*)p;
+  uint32_t value = (uint32_t)(in[0]);
+  value |= (uint32_t)(in[1]) << 8;
+  value |= (uint32_t)(in[2]) << 16;
+  value |= (uint32_t)(in[3]) << 24;
+  return value;
+}
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64LE(const void* p) {
+  const uint8_t* in = (const uint8_t*)p;
+  uint64_t value = (uint64_t)(in[0]);
+  value |= (uint64_t)(in[1]) << 8;
+  value |= (uint64_t)(in[2]) << 16;
+  value |= (uint64_t)(in[3]) << 24;
+  value |= (uint64_t)(in[4]) << 32;
+  value |= (uint64_t)(in[5]) << 40;
+  value |= (uint64_t)(in[6]) << 48;
+  value |= (uint64_t)(in[7]) << 56;
+  return value;
+}
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) {
+  uint8_t* out = (uint8_t*)p;
+  out[0] = (uint8_t)v;
+  out[1] = (uint8_t)(v >> 8);
+  out[2] = (uint8_t)(v >> 16);
+  out[3] = (uint8_t)(v >> 24);
+  out[4] = (uint8_t)(v >> 32);
+  out[5] = (uint8_t)(v >> 40);
+  out[6] = (uint8_t)(v >> 48);
+  out[7] = (uint8_t)(v >> 56);
+}
+#endif  /* BROTLI_LITTLE_ENDIAN */
+
+/* BROTLI_IS_CONSTANT macros returns true for compile-time constants. */
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_constant_p, 3, 0, 1) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+#define BROTLI_IS_CONSTANT(x) (!!__builtin_constant_p(x))
+#else
+#define BROTLI_IS_CONSTANT(x) (!!0)
+#endif
+
+#if defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY)
+#define BROTLI_HAS_UBFX (!!1)
+#else
+#define BROTLI_HAS_UBFX (!!0)
+#endif
+
+#if defined(BROTLI_ENABLE_LOG)
+#define BROTLI_DCHECK(x) assert(x)
+#define BROTLI_LOG(x) printf x
+#else
+#define BROTLI_DCHECK(x)
+#define BROTLI_LOG(x)
+#endif
+
+#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
+static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) {
+  fprintf(stderr, "%s:%d (%s)\n", f, l, fn);
+  fflush(stderr);
+}
+#define BROTLI_DUMP() BrotliDump(__FILE__, __LINE__, __FUNCTION__)
+#else
+#define BROTLI_DUMP() (void)(0)
+#endif
+
+/* TODO: add appropriate icc/sunpro/arm/ibm/ti checks. */
+#if (BROTLI_GNUC_VERSION_CHECK(3, 0, 0) || defined(__llvm__)) && \
+    !defined(BROTLI_BUILD_NO_RBIT)
+#if defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY)
+/* TODO: detect ARMv6T2 and enable this code for it. */
+static BROTLI_INLINE brotli_reg_t BrotliRBit(brotli_reg_t input) {
+  brotli_reg_t output;
+  __asm__("rbit %0, %1\n" : "=r"(output) : "r"(input));
+  return output;
+}
+#define BROTLI_RBIT(x) BrotliRBit(x)
+#endif  /* armv7 / armv8 */
+#endif  /* gcc || clang */
+#if !defined(BROTLI_RBIT)
+static BROTLI_INLINE void BrotliRBit(void) { /* Should break build if used. */ }
+#endif  /* BROTLI_RBIT */
+
+#define BROTLI_REPEAT(N, X) {     \
+  if ((N & 1) != 0) {X;}          \
+  if ((N & 2) != 0) {X; X;}       \
+  if ((N & 4) != 0) {X; X; X; X;} \
+}
+
+#define BROTLI_UNUSED(X) (void)(X)
+
+#define BROTLI_MIN_MAX(T)                                                      \
+  static BROTLI_INLINE T brotli_min_ ## T (T a, T b) { return a < b ? a : b; } \
+  static BROTLI_INLINE T brotli_max_ ## T (T a, T b) { return a > b ? a : b; }
+BROTLI_MIN_MAX(double) BROTLI_MIN_MAX(float) BROTLI_MIN_MAX(int)
+BROTLI_MIN_MAX(size_t) BROTLI_MIN_MAX(uint32_t) BROTLI_MIN_MAX(uint8_t)
+#undef BROTLI_MIN_MAX
+#define BROTLI_MIN(T, A, B) (brotli_min_ ## T((A), (B)))
+#define BROTLI_MAX(T, A, B) (brotli_max_ ## T((A), (B)))
+
+#define BROTLI_SWAP(T, A, I, J) { \
+  T __brotli_swap_tmp = (A)[(I)]; \
+  (A)[(I)] = (A)[(J)];            \
+  (A)[(J)] = __brotli_swap_tmp;   \
+}
+
+/* Default brotli_alloc_func */
+static void* BrotliDefaultAllocFunc(void* opaque, size_t size) {
+  BROTLI_UNUSED(opaque);
+  return malloc(size);
+}
+
+/* Default brotli_free_func */
+static void BrotliDefaultFreeFunc(void* opaque, void* address) {
+  BROTLI_UNUSED(opaque);
+  free(address);
+}
+
+BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
+  BROTLI_UNUSED(&BrotliSuppressUnusedFunctions);
+  BROTLI_UNUSED(&BrotliUnalignedRead16);
+  BROTLI_UNUSED(&BrotliUnalignedRead32);
+  BROTLI_UNUSED(&BrotliUnalignedRead64);
+  BROTLI_UNUSED(&BrotliUnalignedWrite64);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD16LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD32LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD64LE);
+  BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE64LE);
+  BROTLI_UNUSED(&BrotliRBit);
+  BROTLI_UNUSED(&brotli_min_double);
+  BROTLI_UNUSED(&brotli_max_double);
+  BROTLI_UNUSED(&brotli_min_float);
+  BROTLI_UNUSED(&brotli_max_float);
+  BROTLI_UNUSED(&brotli_min_int);
+  BROTLI_UNUSED(&brotli_max_int);
+  BROTLI_UNUSED(&brotli_min_size_t);
+  BROTLI_UNUSED(&brotli_max_size_t);
+  BROTLI_UNUSED(&brotli_min_uint32_t);
+  BROTLI_UNUSED(&brotli_max_uint32_t);
+  BROTLI_UNUSED(&brotli_min_uint8_t);
+  BROTLI_UNUSED(&brotli_max_uint8_t);
+  BROTLI_UNUSED(&BrotliDefaultAllocFunc);
+  BROTLI_UNUSED(&BrotliDefaultFreeFunc);
+#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
+  BROTLI_UNUSED(&BrotliDump);
+#endif
+}
+
+#endif  /* BROTLI_COMMON_PLATFORM_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/transform.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/transform.c
new file mode 100755
index 0000000000..4395c5e992
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/transform.c
@@ -0,0 +1,235 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./transform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* RFC 7932 transforms string data */
+static const char kPrefixSuffix[218] =
+      "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
+/* 0x  _0 _2  __5        _E    _3  _6 _8     _E */
+      "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
+/* 2x     _3_ _5    _A_  _D_ _F  _2 _4     _A   _E */
+      "that \1\'\6 with \6 from \4 by \1(\6. T"
+/* 4x       _5_ _7      _E      _5    _A _C */
+      "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
+/* 6x     _3    _8    _D    _2    _7_ _ _A _C */
+      "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
+/* 8x  _0 _ _3    _8   _C _E _ _1     _7       _F */
+      " not \3er \3al \4ful \4ive \5less \4es"
+/* Ax       _5   _9   _D    _2    _7     _D */
+      "t \4ize \2\xc2\xa0\4ous \5 the \2e \0";
+/* Cx    _2    _7___ ___ _A    _F     _5  _8 */
+
+static const uint16_t kPrefixSuffixMap[50] = {
+  0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
+  0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
+  0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
+  0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
+  0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
+};
+
+/* RFC 7932 transforms */
+static const uint8_t kTransformsData[] = {
+  49, BROTLI_TRANSFORM_IDENTITY, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 0,
+   0, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 47,
+   0, BROTLI_TRANSFORM_IDENTITY, 49,
+   4, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 3,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 6,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
+   1, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 1,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 7,
+  49, BROTLI_TRANSFORM_IDENTITY, 9,
+  48, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 8,
+  49, BROTLI_TRANSFORM_IDENTITY, 5,
+  49, BROTLI_TRANSFORM_IDENTITY, 10,
+  49, BROTLI_TRANSFORM_IDENTITY, 11,
+  49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 13,
+  49, BROTLI_TRANSFORM_IDENTITY, 14,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 15,
+  49, BROTLI_TRANSFORM_IDENTITY, 16,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 12,
+   5, BROTLI_TRANSFORM_IDENTITY, 49,
+   0, BROTLI_TRANSFORM_IDENTITY, 1,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 18,
+  49, BROTLI_TRANSFORM_IDENTITY, 17,
+  49, BROTLI_TRANSFORM_IDENTITY, 19,
+  49, BROTLI_TRANSFORM_IDENTITY, 20,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
+  47, BROTLI_TRANSFORM_IDENTITY, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 22,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 23,
+  49, BROTLI_TRANSFORM_IDENTITY, 24,
+  49, BROTLI_TRANSFORM_IDENTITY, 25,
+  49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
+  49, BROTLI_TRANSFORM_IDENTITY, 27,
+  49, BROTLI_TRANSFORM_IDENTITY, 28,
+   0, BROTLI_TRANSFORM_IDENTITY, 12,
+  49, BROTLI_TRANSFORM_IDENTITY, 29,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
+  49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+  49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 31,
+  49, BROTLI_TRANSFORM_IDENTITY, 32,
+  47, BROTLI_TRANSFORM_IDENTITY, 3,
+  49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
+  49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
+   5, BROTLI_TRANSFORM_IDENTITY, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
+  49, BROTLI_TRANSFORM_IDENTITY, 30,
+   0, BROTLI_TRANSFORM_IDENTITY, 5,
+  35, BROTLI_TRANSFORM_IDENTITY, 49,
+  47, BROTLI_TRANSFORM_IDENTITY, 2,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
+  49, BROTLI_TRANSFORM_IDENTITY, 36,
+  49, BROTLI_TRANSFORM_IDENTITY, 33,
+   5, BROTLI_TRANSFORM_IDENTITY, 0,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+  49, BROTLI_TRANSFORM_IDENTITY, 37,
+   0, BROTLI_TRANSFORM_IDENTITY, 30,
+  49, BROTLI_TRANSFORM_IDENTITY, 38,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+  49, BROTLI_TRANSFORM_IDENTITY, 39,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+  49, BROTLI_TRANSFORM_IDENTITY, 34,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+   0, BROTLI_TRANSFORM_IDENTITY, 21,
+  49, BROTLI_TRANSFORM_IDENTITY, 40,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+  49, BROTLI_TRANSFORM_IDENTITY, 41,
+  49, BROTLI_TRANSFORM_IDENTITY, 42,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
+  49, BROTLI_TRANSFORM_IDENTITY, 43,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
+   0, BROTLI_TRANSFORM_IDENTITY, 34,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+  49, BROTLI_TRANSFORM_IDENTITY, 44,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+  45, BROTLI_TRANSFORM_IDENTITY, 49,
+   0, BROTLI_TRANSFORM_IDENTITY, 33,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+  49, BROTLI_TRANSFORM_IDENTITY, 46,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+};
+
+static BrotliTransforms kBrotliTransforms = {
+  sizeof(kPrefixSuffix),
+  (const uint8_t*)kPrefixSuffix,
+  kPrefixSuffixMap,
+  sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
+  kTransformsData,
+  {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
+};
+
+const BrotliTransforms* BrotliGetTransforms(void) {
+  return &kBrotliTransforms;
+}
+
+static int ToUpperCase(uint8_t* p) {
+  if (p[0] < 0xC0) {
+    if (p[0] >= 'a' && p[0] <= 'z') {
+      p[0] ^= 32;
+    }
+    return 1;
+  }
+  /* An overly simplified uppercasing model for UTF-8. */
+  if (p[0] < 0xE0) {
+    p[1] ^= 32;
+    return 2;
+  }
+  /* An arbitrary transform for three byte characters. */
+  p[2] ^= 5;
+  return 3;
+}
+
+int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
+    const BrotliTransforms* transforms, int transform_idx) {
+  int idx = 0;
+  const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx);
+  uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx);
+  const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx);
+  {
+    int prefix_len = *prefix++;
+    while (prefix_len--) { dst[idx++] = *prefix++; }
+  }
+  {
+    const int t = type;
+    int i = 0;
+    if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
+      len -= t;
+    } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
+        && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
+      int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
+      word += skip;
+      len -= skip;
+    }
+    while (i < len) { dst[idx++] = word[i++]; }
+    if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
+      ToUpperCase(&dst[idx - len]);
+    } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
+      uint8_t* uppercase = &dst[idx - len];
+      while (len > 0) {
+        int step = ToUpperCase(uppercase);
+        uppercase += step;
+        len -= step;
+      }
+    }
+  }
+  {
+    int suffix_len = *suffix++;
+    while (suffix_len--) { dst[idx++] = *suffix++; }
+    return idx;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/transform.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/transform.h
new file mode 100755
index 0000000000..456c12db9b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/transform.h
@@ -0,0 +1,80 @@
+/* transforms is a part of ABI, but not API.
+
+   It means that there are some functions that are supposed to be in "common"
+   library, but header itself is not placed into include/brotli. This way,
+   aforementioned functions will be available only to brotli internals.
+ */
+
+#ifndef BROTLI_COMMON_TRANSFORM_H_
+#define BROTLI_COMMON_TRANSFORM_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+enum BrotliWordTransformType {
+  BROTLI_TRANSFORM_IDENTITY = 0,
+  BROTLI_TRANSFORM_OMIT_LAST_1 = 1,
+  BROTLI_TRANSFORM_OMIT_LAST_2 = 2,
+  BROTLI_TRANSFORM_OMIT_LAST_3 = 3,
+  BROTLI_TRANSFORM_OMIT_LAST_4 = 4,
+  BROTLI_TRANSFORM_OMIT_LAST_5 = 5,
+  BROTLI_TRANSFORM_OMIT_LAST_6 = 6,
+  BROTLI_TRANSFORM_OMIT_LAST_7 = 7,
+  BROTLI_TRANSFORM_OMIT_LAST_8 = 8,
+  BROTLI_TRANSFORM_OMIT_LAST_9 = 9,
+  BROTLI_TRANSFORM_UPPERCASE_FIRST = 10,
+  BROTLI_TRANSFORM_UPPERCASE_ALL = 11,
+  BROTLI_TRANSFORM_OMIT_FIRST_1 = 12,
+  BROTLI_TRANSFORM_OMIT_FIRST_2 = 13,
+  BROTLI_TRANSFORM_OMIT_FIRST_3 = 14,
+  BROTLI_TRANSFORM_OMIT_FIRST_4 = 15,
+  BROTLI_TRANSFORM_OMIT_FIRST_5 = 16,
+  BROTLI_TRANSFORM_OMIT_FIRST_6 = 17,
+  BROTLI_TRANSFORM_OMIT_FIRST_7 = 18,
+  BROTLI_TRANSFORM_OMIT_FIRST_8 = 19,
+  BROTLI_TRANSFORM_OMIT_FIRST_9 = 20,
+  BROTLI_NUM_TRANSFORM_TYPES  /* Counts transforms, not a transform itself. */
+};
+
+#define BROTLI_TRANSFORMS_MAX_CUT_OFF BROTLI_TRANSFORM_OMIT_LAST_9
+
+typedef struct BrotliTransforms {
+  uint16_t prefix_suffix_size;
+  /* Last character must be null, so prefix_suffix_size must be at least 1. */
+  const uint8_t* prefix_suffix;
+  const uint16_t* prefix_suffix_map;
+  uint32_t num_transforms;
+  /* Each entry is a [prefix_id, transform, suffix_id] triplet. */
+  const uint8_t* transforms;
+  /* Indices of transforms like ["", BROTLI_TRANSFORM_OMIT_LAST_#, ""].
+     0-th element corresponds to ["", BROTLI_TRANSFORM_IDENTITY, ""].
+     -1, if cut-off transform does not exist. */
+  int16_t cutOffTransforms[BROTLI_TRANSFORMS_MAX_CUT_OFF + 1];
+} BrotliTransforms;
+
+/* T is BrotliTransforms*; result is uint8_t. */
+#define BROTLI_TRANSFORM_PREFIX_ID(T, I) ((T)->transforms[((I) * 3) + 0])
+#define BROTLI_TRANSFORM_TYPE(T, I)      ((T)->transforms[((I) * 3) + 1])
+#define BROTLI_TRANSFORM_SUFFIX_ID(T, I) ((T)->transforms[((I) * 3) + 2])
+
+/* T is BrotliTransforms*; result is const uint8_t*. */
+#define BROTLI_TRANSFORM_PREFIX(T, I) (&(T)->prefix_suffix[ \
+    (T)->prefix_suffix_map[BROTLI_TRANSFORM_PREFIX_ID(T, I)]])
+#define BROTLI_TRANSFORM_SUFFIX(T, I) (&(T)->prefix_suffix[ \
+    (T)->prefix_suffix_map[BROTLI_TRANSFORM_SUFFIX_ID(T, I)]])
+
+BROTLI_COMMON_API const BrotliTransforms* BrotliGetTransforms(void);
+
+BROTLI_COMMON_API int BrotliTransformDictionaryWord(
+    uint8_t* dst, const uint8_t* word, int len,
+    const BrotliTransforms* transforms, int transform_idx);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_COMMON_TRANSFORM_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/version.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/version.h
new file mode 100755
index 0000000000..0d0d0c7967
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/common/version.h
@@ -0,0 +1,26 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Version definition. */
+
+#ifndef BROTLI_COMMON_VERSION_H_
+#define BROTLI_COMMON_VERSION_H_
+
+/* This macro should only be used when library is compiled together with client.
+   If library is dynamically linked, use BrotliDecoderVersion and
+   BrotliEncoderVersion methods. */
+
+/* Semantic version, calculated as (MAJOR << 24) | (MINOR << 12) | PATCH */
+#define BROTLI_VERSION 0x1000007
+
+/* This macro is used by build system to produce Libtool-friendly soname. See
+   https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html
+ */
+
+/* ABI version, calculated as (CURRENT << 24) | (REVISION << 12) | AGE */
+#define BROTLI_ABI_VERSION 0x1007000
+
+#endif  /* BROTLI_COMMON_VERSION_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/bit_reader.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/bit_reader.c
new file mode 100755
index 0000000000..722fd906dd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/bit_reader.c
@@ -0,0 +1,48 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Bit reading helpers */
+
+#include "./bit_reader.h"
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliInitBitReader(BrotliBitReader* const br) {
+  br->val_ = 0;
+  br->bit_pos_ = sizeof(br->val_) << 3;
+}
+
+BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
+  size_t aligned_read_mask = (sizeof(br->val_) >> 1) - 1;
+  /* Fixing alignment after unaligned BrotliFillWindow would result accumulator
+     overflow. If unalignment is caused by BrotliSafeReadBits, then there is
+     enough space in accumulator to fix alignment. */
+  if (!BROTLI_ALIGNED_READ) {
+    aligned_read_mask = 0;
+  }
+  if (BrotliGetAvailableBits(br) == 0) {
+    if (!BrotliPullByte(br)) {
+      return BROTLI_FALSE;
+    }
+  }
+
+  while ((((size_t)br->next_in) & aligned_read_mask) != 0) {
+    if (!BrotliPullByte(br)) {
+      /* If we consumed all the input, we don't care about the alignment. */
+      return BROTLI_TRUE;
+    }
+  }
+  return BROTLI_TRUE;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/bit_reader.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/bit_reader.h
new file mode 100755
index 0000000000..c06e91419f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/bit_reader.h
@@ -0,0 +1,309 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Bit reading helpers */
+
+#ifndef BROTLI_DEC_BIT_READER_H_
+#define BROTLI_DEC_BIT_READER_H_
+
+#include <string.h>  /* memcpy */
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1)
+
+static const uint32_t kBitMask[33] = {  0x00000000,
+    0x00000001, 0x00000003, 0x00000007, 0x0000000F,
+    0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
+    0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
+    0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
+    0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
+    0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
+    0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
+    0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
+};
+
+static BROTLI_INLINE uint32_t BitMask(uint32_t n) {
+  if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) {
+    /* Masking with this expression turns to a single
+       "Unsigned Bit Field Extract" UBFX instruction on ARM. */
+    return ~((0xFFFFFFFFu) << n);
+  } else {
+    return kBitMask[n];
+  }
+}
+
+typedef struct {
+  brotli_reg_t val_;       /* pre-fetched bits */
+  uint32_t bit_pos_;       /* current bit-reading position in val_ */
+  const uint8_t* next_in;  /* the byte we're reading from */
+  size_t avail_in;
+} BrotliBitReader;
+
+typedef struct {
+  brotli_reg_t val_;
+  uint32_t bit_pos_;
+  const uint8_t* next_in;
+  size_t avail_in;
+} BrotliBitReaderState;
+
+/* Initializes the BrotliBitReader fields. */
+BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
+
+/* Ensures that accumulator is not empty.
+   May consume up to sizeof(brotli_reg_t) - 1 bytes of input.
+   Returns BROTLI_FALSE if data is required but there is no input available.
+   For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned
+   reading. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br);
+
+static BROTLI_INLINE void BrotliBitReaderSaveState(
+    BrotliBitReader* const from, BrotliBitReaderState* to) {
+  to->val_ = from->val_;
+  to->bit_pos_ = from->bit_pos_;
+  to->next_in = from->next_in;
+  to->avail_in = from->avail_in;
+}
+
+static BROTLI_INLINE void BrotliBitReaderRestoreState(
+    BrotliBitReader* const to, BrotliBitReaderState* from) {
+  to->val_ = from->val_;
+  to->bit_pos_ = from->bit_pos_;
+  to->next_in = from->next_in;
+  to->avail_in = from->avail_in;
+}
+
+static BROTLI_INLINE uint32_t BrotliGetAvailableBits(
+    const BrotliBitReader* br) {
+  return (BROTLI_64_BITS ? 64 : 32) - br->bit_pos_;
+}
+
+/* Returns amount of unread bytes the bit reader still has buffered from the
+   BrotliInput, including whole bytes in br->val_. */
+static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) {
+  return br->avail_in + (BrotliGetAvailableBits(br) >> 3);
+}
+
+/* Checks if there is at least |num| bytes left in the input ring-buffer
+   (excluding the bits remaining in br->val_). */
+static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount(
+    BrotliBitReader* const br, size_t num) {
+  return TO_BROTLI_BOOL(br->avail_in >= num);
+}
+
+/* Guarantees that there are at least |n_bits| + 1 bits in accumulator.
+   Precondition: accumulator contains at least 1 bit.
+   |n_bits| should be in the range [1..24] for regular build. For portable
+   non-64-bit little-endian build only 16 bits are safe to request. */
+static BROTLI_INLINE void BrotliFillBitWindow(
+    BrotliBitReader* const br, uint32_t n_bits) {
+#if (BROTLI_64_BITS)
+  if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
+    if (br->bit_pos_ >= 56) {
+      br->val_ >>= 56;
+      br->bit_pos_ ^= 56;  /* here same as -= 56 because of the if condition */
+      br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 8;
+      br->avail_in -= 7;
+      br->next_in += 7;
+    }
+  } else if (
+      !BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 16)) {
+    if (br->bit_pos_ >= 48) {
+      br->val_ >>= 48;
+      br->bit_pos_ ^= 48;  /* here same as -= 48 because of the if condition */
+      br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 16;
+      br->avail_in -= 6;
+      br->next_in += 6;
+    }
+  } else {
+    if (br->bit_pos_ >= 32) {
+      br->val_ >>= 32;
+      br->bit_pos_ ^= 32;  /* here same as -= 32 because of the if condition */
+      br->val_ |= ((uint64_t)BROTLI_UNALIGNED_LOAD32LE(br->next_in)) << 32;
+      br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+      br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+    }
+  }
+#else
+  if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
+    if (br->bit_pos_ >= 24) {
+      br->val_ >>= 24;
+      br->bit_pos_ ^= 24;  /* here same as -= 24 because of the if condition */
+      br->val_ |= BROTLI_UNALIGNED_LOAD32LE(br->next_in) << 8;
+      br->avail_in -= 3;
+      br->next_in += 3;
+    }
+  } else {
+    if (br->bit_pos_ >= 16) {
+      br->val_ >>= 16;
+      br->bit_pos_ ^= 16;  /* here same as -= 16 because of the if condition */
+      br->val_ |= ((uint32_t)BROTLI_UNALIGNED_LOAD16LE(br->next_in)) << 16;
+      br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+      br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+    }
+  }
+#endif
+}
+
+/* Mostly like BrotliFillBitWindow, but guarantees only 16 bits and reads no
+   more than BROTLI_SHORT_FILL_BIT_WINDOW_READ bytes of input. */
+static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) {
+  BrotliFillBitWindow(br, 17);
+}
+
+/* Tries to pull one byte of input to accumulator.
+   Returns BROTLI_FALSE if there is no input available. */
+static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) {
+  if (br->avail_in == 0) {
+    return BROTLI_FALSE;
+  }
+  br->val_ >>= 8;
+#if (BROTLI_64_BITS)
+  br->val_ |= ((uint64_t)*br->next_in) << 56;
+#else
+  br->val_ |= ((uint32_t)*br->next_in) << 24;
+#endif
+  br->bit_pos_ -= 8;
+  --br->avail_in;
+  ++br->next_in;
+  return BROTLI_TRUE;
+}
+
+/* Returns currently available bits.
+   The number of valid bits could be calculated by BrotliGetAvailableBits. */
+static BROTLI_INLINE brotli_reg_t BrotliGetBitsUnmasked(
+    BrotliBitReader* const br) {
+  return br->val_ >> br->bit_pos_;
+}
+
+/* Like BrotliGetBits, but does not mask the result.
+   The result contains at least 16 valid bits. */
+static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked(
+    BrotliBitReader* const br) {
+  BrotliFillBitWindow(br, 16);
+  return (uint32_t)BrotliGetBitsUnmasked(br);
+}
+
+/* Returns the specified number of bits from |br| without advancing bit
+   position. */
+static BROTLI_INLINE uint32_t BrotliGetBits(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  BrotliFillBitWindow(br, n_bits);
+  return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+}
+
+/* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there
+   is not enough input. */
+static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  while (BrotliGetAvailableBits(br) < n_bits) {
+    if (!BrotliPullByte(br)) {
+      return BROTLI_FALSE;
+    }
+  }
+  *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+  return BROTLI_TRUE;
+}
+
+/* Advances the bit pos by |n_bits|. */
+static BROTLI_INLINE void BrotliDropBits(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  br->bit_pos_ += n_bits;
+}
+
+static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
+  uint32_t unused_bytes = BrotliGetAvailableBits(br) >> 3;
+  uint32_t unused_bits = unused_bytes << 3;
+  br->avail_in += unused_bytes;
+  br->next_in -= unused_bytes;
+  if (unused_bits == sizeof(br->val_) << 3) {
+    br->val_ = 0;
+  } else {
+    br->val_ <<= unused_bits;
+  }
+  br->bit_pos_ += unused_bits;
+}
+
+/* Reads the specified number of bits from |br| and advances the bit pos.
+   Precondition: accumulator MUST contain at least |n_bits|. */
+static BROTLI_INLINE void BrotliTakeBits(
+  BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+  BROTLI_LOG(("[BrotliReadBits]  %d %d %d val: %6x\n",
+      (int)br->avail_in, (int)br->bit_pos_, (int)n_bits, (int)*val));
+  BrotliDropBits(br, n_bits);
+}
+
+/* Reads the specified number of bits from |br| and advances the bit pos.
+   Assumes that there is enough input to perform BrotliFillBitWindow. */
+static BROTLI_INLINE uint32_t BrotliReadBits(
+    BrotliBitReader* const br, uint32_t n_bits) {
+  if (BROTLI_64_BITS || (n_bits <= 16)) {
+    uint32_t val;
+    BrotliFillBitWindow(br, n_bits);
+    BrotliTakeBits(br, n_bits, &val);
+    return val;
+  } else {
+    uint32_t low_val;
+    uint32_t high_val;
+    BrotliFillBitWindow(br, 16);
+    BrotliTakeBits(br, 16, &low_val);
+    BrotliFillBitWindow(br, 8);
+    BrotliTakeBits(br, n_bits - 16, &high_val);
+    return low_val | (high_val << 16);
+  }
+}
+
+/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there
+   is not enough input. |n_bits| MUST be positive. */
+static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  while (BrotliGetAvailableBits(br) < n_bits) {
+    if (!BrotliPullByte(br)) {
+      return BROTLI_FALSE;
+    }
+  }
+  BrotliTakeBits(br, n_bits, val);
+  return BROTLI_TRUE;
+}
+
+/* Advances the bit reader position to the next byte boundary and verifies
+   that any skipped bits are set to zero. */
+static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) {
+  uint32_t pad_bits_count = BrotliGetAvailableBits(br) & 0x7;
+  uint32_t pad_bits = 0;
+  if (pad_bits_count != 0) {
+    BrotliTakeBits(br, pad_bits_count, &pad_bits);
+  }
+  return TO_BROTLI_BOOL(pad_bits == 0);
+}
+
+/* Copies remaining input bytes stored in the bit reader to the output. Value
+   |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be
+   warmed up again after this. */
+static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
+                                          BrotliBitReader* br, size_t num) {
+  while (BrotliGetAvailableBits(br) >= 8 && num > 0) {
+    *dest = (uint8_t)BrotliGetBitsUnmasked(br);
+    BrotliDropBits(br, 8);
+    ++dest;
+    --num;
+  }
+  memcpy(dest, br->next_in, num);
+  br->avail_in -= num;
+  br->next_in += num;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_BIT_READER_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/decode.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/decode.c
new file mode 100755
index 0000000000..08bd76ca16
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/decode.c
@@ -0,0 +1,2506 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include <brotli/decode.h>
+
+#include <stdlib.h>  /* free, malloc */
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/transform.h"
+#include "../common/version.h"
+#include "./bit_reader.h"
+#include "./huffman.h"
+#include "./prefix.h"
+#include "./state.h"
+
+#if defined(BROTLI_TARGET_NEON)
+#include <arm_neon.h>
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_FAILURE(CODE) (BROTLI_DUMP(), CODE)
+
+#define BROTLI_LOG_UINT(name)                                       \
+  BROTLI_LOG(("[%s] %s = %lu\n", __func__, #name, (unsigned long)(name)))
+#define BROTLI_LOG_ARRAY_INDEX(array_name, idx)                     \
+  BROTLI_LOG(("[%s] %s[%lu] = %lu\n", __func__, #array_name,        \
+         (unsigned long)(idx), (unsigned long)array_name[idx]))
+
+#define HUFFMAN_TABLE_BITS 8U
+#define HUFFMAN_TABLE_MASK 0xFF
+
+/* We need the slack region for the following reasons:
+    - doing up to two 16-byte copies for fast backward copying
+    - inserting transformed dictionary word (5 prefix + 24 base + 8 suffix) */
+static const uint32_t kRingBufferWriteAheadSlack = 42;
+
+static const uint8_t kCodeLengthCodeOrder[BROTLI_CODE_LENGTH_CODES] = {
+  1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+
+/* Static prefix code for the complex code length code lengths. */
+static const uint8_t kCodeLengthPrefixLength[16] = {
+  2, 2, 2, 3, 2, 2, 2, 4, 2, 2, 2, 3, 2, 2, 2, 4,
+};
+
+static const uint8_t kCodeLengthPrefixValue[16] = {
+  0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5,
+};
+
+BROTLI_BOOL BrotliDecoderSetParameter(
+    BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) {
+  if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
+  switch (p) {
+    case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION:
+      state->canny_ringbuffer_allocation = !!value ? 0 : 1;
+      return BROTLI_TRUE;
+
+    case BROTLI_DECODER_PARAM_LARGE_WINDOW:
+      state->large_window = TO_BROTLI_BOOL(!!value);
+      return BROTLI_TRUE;
+
+    default: return BROTLI_FALSE;
+  }
+}
+
+BrotliDecoderState* BrotliDecoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  BrotliDecoderState* state = 0;
+  if (!alloc_func && !free_func) {
+    state = (BrotliDecoderState*)malloc(sizeof(BrotliDecoderState));
+  } else if (alloc_func && free_func) {
+    state = (BrotliDecoderState*)alloc_func(opaque, sizeof(BrotliDecoderState));
+  }
+  if (state == 0) {
+    BROTLI_DUMP();
+    return 0;
+  }
+  if (!BrotliDecoderStateInit(state, alloc_func, free_func, opaque)) {
+    BROTLI_DUMP();
+    if (!alloc_func && !free_func) {
+      free(state);
+    } else if (alloc_func && free_func) {
+      free_func(opaque, state);
+    }
+    return 0;
+  }
+  return state;
+}
+
+/* Deinitializes and frees BrotliDecoderState instance. */
+void BrotliDecoderDestroyInstance(BrotliDecoderState* state) {
+  if (!state) {
+    return;
+  } else {
+    brotli_free_func free_func = state->free_func;
+    void* opaque = state->memory_manager_opaque;
+    BrotliDecoderStateCleanup(state);
+    free_func(opaque, state);
+  }
+}
+
+/* Saves error code and converts it to BrotliDecoderResult. */
+static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode(
+    BrotliDecoderState* s, BrotliDecoderErrorCode e) {
+  s->error_code = (int)e;
+  switch (e) {
+    case BROTLI_DECODER_SUCCESS:
+      return BROTLI_DECODER_RESULT_SUCCESS;
+
+    case BROTLI_DECODER_NEEDS_MORE_INPUT:
+      return BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+
+    case BROTLI_DECODER_NEEDS_MORE_OUTPUT:
+      return BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
+
+    default:
+      return BROTLI_DECODER_RESULT_ERROR;
+  }
+}
+
+/* Decodes WBITS by reading 1 - 7 bits, or 0x11 for "Large Window Brotli".
+   Precondition: bit-reader accumulator has at least 8 bits. */
+static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s,
+                                               BrotliBitReader* br) {
+  uint32_t n;
+  BROTLI_BOOL large_window = s->large_window;
+  s->large_window = BROTLI_FALSE;
+  BrotliTakeBits(br, 1, &n);
+  if (n == 0) {
+    s->window_bits = 16;
+    return BROTLI_DECODER_SUCCESS;
+  }
+  BrotliTakeBits(br, 3, &n);
+  if (n != 0) {
+    s->window_bits = 17 + n;
+    return BROTLI_DECODER_SUCCESS;
+  }
+  BrotliTakeBits(br, 3, &n);
+  if (n == 1) {
+    if (large_window) {
+      BrotliTakeBits(br, 1, &n);
+      if (n == 1) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+      }
+      s->large_window = BROTLI_TRUE;
+      return BROTLI_DECODER_SUCCESS;
+    } else {
+      return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+    }
+  }
+  if (n != 0) {
+    s->window_bits = 8 + n;
+    return BROTLI_DECODER_SUCCESS;
+  }
+  s->window_bits = 17;
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) {
+#if defined(BROTLI_TARGET_NEON)
+  vst1q_u8(dst, vld1q_u8(src));
+#else
+  uint32_t buffer[4];
+  memcpy(buffer, src, 16);
+  memcpy(dst, buffer, 16);
+#endif
+}
+
+/* Decodes a number in the range [0..255], by reading 1 - 11 bits. */
+static BROTLI_NOINLINE BrotliDecoderErrorCode DecodeVarLenUint8(
+    BrotliDecoderState* s, BrotliBitReader* br, uint32_t* value) {
+  uint32_t bits;
+  switch (s->substate_decode_uint8) {
+    case BROTLI_STATE_DECODE_UINT8_NONE:
+      if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 1, &bits))) {
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if (bits == 0) {
+        *value = 0;
+        return BROTLI_DECODER_SUCCESS;
+      }
+    /* Fall through. */
+
+    case BROTLI_STATE_DECODE_UINT8_SHORT:
+      if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, 3, &bits))) {
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_SHORT;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if (bits == 0) {
+        *value = 1;
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+        return BROTLI_DECODER_SUCCESS;
+      }
+      /* Use output value as a temporary storage. It MUST be persisted. */
+      *value = bits;
+    /* Fall through. */
+
+    case BROTLI_STATE_DECODE_UINT8_LONG:
+      if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, *value, &bits))) {
+        s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_LONG;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      *value = (1U << *value) + bits;
+      s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+      return BROTLI_DECODER_SUCCESS;
+
+    default:
+      return
+          BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+  }
+}
+
+/* Decodes a metablock length and flags by reading 2 - 31 bits. */
+static BrotliDecoderErrorCode BROTLI_NOINLINE DecodeMetaBlockLength(
+    BrotliDecoderState* s, BrotliBitReader* br) {
+  uint32_t bits;
+  int i;
+  for (;;) {
+    switch (s->substate_metablock_header) {
+      case BROTLI_STATE_METABLOCK_HEADER_NONE:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->is_last_metablock = bits ? 1 : 0;
+        s->meta_block_remaining_len = 0;
+        s->is_uncompressed = 0;
+        s->is_metadata = 0;
+        if (!s->is_last_metablock) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES;
+          break;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_EMPTY;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_EMPTY:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        if (bits) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+          return BROTLI_DECODER_SUCCESS;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NIBBLES;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_NIBBLES:
+        if (!BrotliSafeReadBits(br, 2, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->size_nibbles = (uint8_t)(bits + 4);
+        s->loop_counter = 0;
+        if (bits == 3) {
+          s->is_metadata = 1;
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_RESERVED;
+          break;
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_SIZE;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_SIZE:
+        i = s->loop_counter;
+        for (; i < (int)s->size_nibbles; ++i) {
+          if (!BrotliSafeReadBits(br, 4, &bits)) {
+            s->loop_counter = i;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          if (i + 1 == s->size_nibbles && s->size_nibbles > 4 && bits == 0) {
+            return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE);
+          }
+          s->meta_block_remaining_len |= (int)(bits << (i * 4));
+        }
+        s->substate_metablock_header =
+            BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED:
+        if (!s->is_last_metablock) {
+          if (!BrotliSafeReadBits(br, 1, &bits)) {
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          s->is_uncompressed = bits ? 1 : 0;
+        }
+        ++s->meta_block_remaining_len;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+        return BROTLI_DECODER_SUCCESS;
+
+      case BROTLI_STATE_METABLOCK_HEADER_RESERVED:
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        if (bits != 0) {
+          return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_RESERVED);
+        }
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_BYTES;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_BYTES:
+        if (!BrotliSafeReadBits(br, 2, &bits)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        if (bits == 0) {
+          s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+          return BROTLI_DECODER_SUCCESS;
+        }
+        s->size_nibbles = (uint8_t)bits;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_METADATA;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER_METADATA:
+        i = s->loop_counter;
+        for (; i < (int)s->size_nibbles; ++i) {
+          if (!BrotliSafeReadBits(br, 8, &bits)) {
+            s->loop_counter = i;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          if (i + 1 == s->size_nibbles && s->size_nibbles > 1 && bits == 0) {
+            return BROTLI_FAILURE(
+                BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE);
+          }
+          s->meta_block_remaining_len |= (int)(bits << (i * 8));
+        }
+        ++s->meta_block_remaining_len;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+        return BROTLI_DECODER_SUCCESS;
+
+      default:
+        return
+            BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+    }
+  }
+}
+
+/* Decodes the Huffman code.
+   This method doesn't read data from the bit reader, BUT drops the amount of
+   bits that correspond to the decoded symbol.
+   bits MUST contain at least 15 (BROTLI_HUFFMAN_MAX_CODE_LENGTH) valid bits. */
+static BROTLI_INLINE uint32_t DecodeSymbol(uint32_t bits,
+                                           const HuffmanCode* table,
+                                           BrotliBitReader* br) {
+  BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, bits & HUFFMAN_TABLE_MASK);
+  if (BROTLI_HC_FAST_LOAD_BITS(table) > HUFFMAN_TABLE_BITS) {
+    uint32_t nbits = BROTLI_HC_FAST_LOAD_BITS(table) - HUFFMAN_TABLE_BITS;
+    BrotliDropBits(br, HUFFMAN_TABLE_BITS);
+    BROTLI_HC_ADJUST_TABLE_INDEX(table,
+        BROTLI_HC_FAST_LOAD_VALUE(table) +
+        ((bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits)));
+  }
+  BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table));
+  return BROTLI_HC_FAST_LOAD_VALUE(table);
+}
+
+/* Reads and decodes the next Huffman code from bit-stream.
+   This method peeks 16 bits of input and drops 0 - 15 of them. */
+static BROTLI_INLINE uint32_t ReadSymbol(const HuffmanCode* table,
+                                         BrotliBitReader* br) {
+  return DecodeSymbol(BrotliGet16BitsUnmasked(br), table, br);
+}
+
+/* Same as DecodeSymbol, but it is known that there is less than 15 bits of
+   input are currently available. */
+static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
+    const HuffmanCode* table, BrotliBitReader* br, uint32_t* result) {
+  uint32_t val;
+  uint32_t available_bits = BrotliGetAvailableBits(br);
+  BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+  if (available_bits == 0) {
+    if (BROTLI_HC_FAST_LOAD_BITS(table) == 0) {
+      *result = BROTLI_HC_FAST_LOAD_VALUE(table);
+      return BROTLI_TRUE;
+    }
+    return BROTLI_FALSE;  /* No valid bits at all. */
+  }
+  val = (uint32_t)BrotliGetBitsUnmasked(br);
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, val & HUFFMAN_TABLE_MASK);
+  if (BROTLI_HC_FAST_LOAD_BITS(table) <= HUFFMAN_TABLE_BITS) {
+    if (BROTLI_HC_FAST_LOAD_BITS(table) <= available_bits) {
+      BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table));
+      *result = BROTLI_HC_FAST_LOAD_VALUE(table);
+      return BROTLI_TRUE;
+    } else {
+      return BROTLI_FALSE;  /* Not enough bits for the first level. */
+    }
+  }
+  if (available_bits <= HUFFMAN_TABLE_BITS) {
+    return BROTLI_FALSE;  /* Not enough bits to move to the second level. */
+  }
+
+  /* Speculatively drop HUFFMAN_TABLE_BITS. */
+  val = (val & BitMask(BROTLI_HC_FAST_LOAD_BITS(table))) >> HUFFMAN_TABLE_BITS;
+  available_bits -= HUFFMAN_TABLE_BITS;
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, BROTLI_HC_FAST_LOAD_VALUE(table) + val);
+  if (available_bits < BROTLI_HC_FAST_LOAD_BITS(table)) {
+    return BROTLI_FALSE;  /* Not enough bits for the second level. */
+  }
+
+  BrotliDropBits(br, HUFFMAN_TABLE_BITS + BROTLI_HC_FAST_LOAD_BITS(table));
+  *result = BROTLI_HC_FAST_LOAD_VALUE(table);
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadSymbol(
+    const HuffmanCode* table, BrotliBitReader* br, uint32_t* result) {
+  uint32_t val;
+  if (BROTLI_PREDICT_TRUE(BrotliSafeGetBits(br, 15, &val))) {
+    *result = DecodeSymbol(val, table, br);
+    return BROTLI_TRUE;
+  }
+  return SafeDecodeSymbol(table, br, result);
+}
+
+/* Makes a look-up in first level Huffman table. Peeks 8 bits. */
+static BROTLI_INLINE void PreloadSymbol(int safe,
+                                        const HuffmanCode* table,
+                                        BrotliBitReader* br,
+                                        uint32_t* bits,
+                                        uint32_t* value) {
+  if (safe) {
+    return;
+  }
+  BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+  BROTLI_HC_ADJUST_TABLE_INDEX(table, BrotliGetBits(br, HUFFMAN_TABLE_BITS));
+  *bits = BROTLI_HC_FAST_LOAD_BITS(table);
+  *value = BROTLI_HC_FAST_LOAD_VALUE(table);
+}
+
+/* Decodes the next Huffman code using data prepared by PreloadSymbol.
+   Reads 0 - 15 bits. Also peeks 8 following bits. */
+static BROTLI_INLINE uint32_t ReadPreloadedSymbol(const HuffmanCode* table,
+                                                  BrotliBitReader* br,
+                                                  uint32_t* bits,
+                                                  uint32_t* value) {
+  uint32_t result = *value;
+  if (BROTLI_PREDICT_FALSE(*bits > HUFFMAN_TABLE_BITS)) {
+    uint32_t val = BrotliGet16BitsUnmasked(br);
+    const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value;
+    uint32_t mask = BitMask((*bits - HUFFMAN_TABLE_BITS));
+    BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(ext);
+    BrotliDropBits(br, HUFFMAN_TABLE_BITS);
+    BROTLI_HC_ADJUST_TABLE_INDEX(ext, (val >> HUFFMAN_TABLE_BITS) & mask);
+    BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(ext));
+    result = BROTLI_HC_FAST_LOAD_VALUE(ext);
+  } else {
+    BrotliDropBits(br, *bits);
+  }
+  PreloadSymbol(0, table, br, bits, value);
+  return result;
+}
+
+static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) {
+  uint32_t result = 0;
+  while (x) {
+    x >>= 1;
+    ++result;
+  }
+  return result;
+}
+
+/* Reads (s->symbol + 1) symbols.
+   Totally 1..4 symbols are read, 1..11 bits each.
+   The list of symbols MUST NOT contain duplicates. */
+static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
+    uint32_t alphabet_size, uint32_t max_symbol, BrotliDecoderState* s) {
+  /* max_bits == 1..11; symbol == 0..3; 1..44 bits will be read. */
+  BrotliBitReader* br = &s->br;
+  uint32_t max_bits = Log2Floor(alphabet_size - 1);
+  uint32_t i = s->sub_loop_counter;
+  uint32_t num_symbols = s->symbol;
+  while (i <= num_symbols) {
+    uint32_t v;
+    if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, max_bits, &v))) {
+      s->sub_loop_counter = i;
+      s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ;
+      return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    }
+    if (v >= max_symbol) {
+      return
+          BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET);
+    }
+    s->symbols_lists_array[i] = (uint16_t)v;
+    BROTLI_LOG_UINT(s->symbols_lists_array[i]);
+    ++i;
+  }
+
+  for (i = 0; i < num_symbols; ++i) {
+    uint32_t k = i + 1;
+    for (; k <= num_symbols; ++k) {
+      if (s->symbols_lists_array[i] == s->symbols_lists_array[k]) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_SAME);
+      }
+    }
+  }
+
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Process single decoded symbol code length:
+    A) reset the repeat variable
+    B) remember code length (if it is not 0)
+    C) extend corresponding index-chain
+    D) reduce the Huffman space
+    E) update the histogram */
+static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len,
+    uint32_t* symbol, uint32_t* repeat, uint32_t* space,
+    uint32_t* prev_code_len, uint16_t* symbol_lists,
+    uint16_t* code_length_histo, int* next_symbol) {
+  *repeat = 0;
+  if (code_len != 0) {  /* code_len == 1..15 */
+    symbol_lists[next_symbol[code_len]] = (uint16_t)(*symbol);
+    next_symbol[code_len] = (int)(*symbol);
+    *prev_code_len = code_len;
+    *space -= 32768U >> code_len;
+    code_length_histo[code_len]++;
+    BROTLI_LOG(("[ReadHuffmanCode] code_length[%d] = %d\n",
+        (int)*symbol, (int)code_len));
+  }
+  (*symbol)++;
+}
+
+/* Process repeated symbol code length.
+    A) Check if it is the extension of previous repeat sequence; if the decoded
+       value is not BROTLI_REPEAT_PREVIOUS_CODE_LENGTH, then it is a new
+       symbol-skip
+    B) Update repeat variable
+    C) Check if operation is feasible (fits alphabet)
+    D) For each symbol do the same operations as in ProcessSingleCodeLength
+
+   PRECONDITION: code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH or
+                 code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH */
+static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len,
+    uint32_t repeat_delta, uint32_t alphabet_size, uint32_t* symbol,
+    uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len,
+    uint32_t* repeat_code_len, uint16_t* symbol_lists,
+    uint16_t* code_length_histo, int* next_symbol) {
+  uint32_t old_repeat;
+  uint32_t extra_bits = 3;  /* for BROTLI_REPEAT_ZERO_CODE_LENGTH */
+  uint32_t new_len = 0;  /* for BROTLI_REPEAT_ZERO_CODE_LENGTH */
+  if (code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
+    new_len = *prev_code_len;
+    extra_bits = 2;
+  }
+  if (*repeat_code_len != new_len) {
+    *repeat = 0;
+    *repeat_code_len = new_len;
+  }
+  old_repeat = *repeat;
+  if (*repeat > 0) {
+    *repeat -= 2;
+    *repeat <<= extra_bits;
+  }
+  *repeat += repeat_delta + 3U;
+  repeat_delta = *repeat - old_repeat;
+  if (*symbol + repeat_delta > alphabet_size) {
+    BROTLI_DUMP();
+    *symbol = alphabet_size;
+    *space = 0xFFFFF;
+    return;
+  }
+  BROTLI_LOG(("[ReadHuffmanCode] code_length[%d..%d] = %d\n",
+      (int)*symbol, (int)(*symbol + repeat_delta - 1), (int)*repeat_code_len));
+  if (*repeat_code_len != 0) {
+    unsigned last = *symbol + repeat_delta;
+    int next = next_symbol[*repeat_code_len];
+    do {
+      symbol_lists[next] = (uint16_t)*symbol;
+      next = (int)*symbol;
+    } while (++(*symbol) != last);
+    next_symbol[*repeat_code_len] = next;
+    *space -= repeat_delta << (15 - *repeat_code_len);
+    code_length_histo[*repeat_code_len] =
+        (uint16_t)(code_length_histo[*repeat_code_len] + repeat_delta);
+  } else {
+    *symbol += repeat_delta;
+  }
+}
+
+/* Reads and decodes symbol codelengths. */
+static BrotliDecoderErrorCode ReadSymbolCodeLengths(
+    uint32_t alphabet_size, BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  uint32_t symbol = s->symbol;
+  uint32_t repeat = s->repeat;
+  uint32_t space = s->space;
+  uint32_t prev_code_len = s->prev_code_len;
+  uint32_t repeat_code_len = s->repeat_code_len;
+  uint16_t* symbol_lists = s->symbol_lists;
+  uint16_t* code_length_histo = s->code_length_histo;
+  int* next_symbol = s->next_symbol;
+  if (!BrotliWarmupBitReader(br)) {
+    return BROTLI_DECODER_NEEDS_MORE_INPUT;
+  }
+  while (symbol < alphabet_size && space > 0) {
+    const HuffmanCode* p = s->table;
+    uint32_t code_len;
+    BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
+    if (!BrotliCheckInputAmount(br, BROTLI_SHORT_FILL_BIT_WINDOW_READ)) {
+      s->symbol = symbol;
+      s->repeat = repeat;
+      s->prev_code_len = prev_code_len;
+      s->repeat_code_len = repeat_code_len;
+      s->space = space;
+      return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    }
+    BrotliFillBitWindow16(br);
+    BROTLI_HC_ADJUST_TABLE_INDEX(p, BrotliGetBitsUnmasked(br) &
+        BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH));
+    BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p));  /* Use 1..5 bits. */
+    code_len = BROTLI_HC_FAST_LOAD_VALUE(p);  /* code_len == 0..17 */
+    if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
+      ProcessSingleCodeLength(code_len, &symbol, &repeat, &space,
+          &prev_code_len, symbol_lists, code_length_histo, next_symbol);
+    } else {  /* code_len == 16..17, extra_bits == 2..3 */
+      uint32_t extra_bits =
+          (code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) ? 2 : 3;
+      uint32_t repeat_delta =
+          (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(extra_bits);
+      BrotliDropBits(br, extra_bits);
+      ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
+          &symbol, &repeat, &space, &prev_code_len, &repeat_code_len,
+          symbol_lists, code_length_histo, next_symbol);
+    }
+  }
+  s->space = space;
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
+    uint32_t alphabet_size, BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BROTLI_BOOL get_byte = BROTLI_FALSE;
+  while (s->symbol < alphabet_size && s->space > 0) {
+    const HuffmanCode* p = s->table;
+    uint32_t code_len;
+    uint32_t available_bits;
+    uint32_t bits = 0;
+    BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
+    if (get_byte && !BrotliPullByte(br)) return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    get_byte = BROTLI_FALSE;
+    available_bits = BrotliGetAvailableBits(br);
+    if (available_bits != 0) {
+      bits = (uint32_t)BrotliGetBitsUnmasked(br);
+    }
+    BROTLI_HC_ADJUST_TABLE_INDEX(p,
+        bits & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH));
+    if (BROTLI_HC_FAST_LOAD_BITS(p) > available_bits) {
+      get_byte = BROTLI_TRUE;
+      continue;
+    }
+    code_len = BROTLI_HC_FAST_LOAD_VALUE(p);  /* code_len == 0..17 */
+    if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
+      BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p));
+      ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space,
+          &s->prev_code_len, s->symbol_lists, s->code_length_histo,
+          s->next_symbol);
+    } else {  /* code_len == 16..17, extra_bits == 2..3 */
+      uint32_t extra_bits = code_len - 14U;
+      uint32_t repeat_delta = (bits >> BROTLI_HC_FAST_LOAD_BITS(p)) &
+          BitMask(extra_bits);
+      if (available_bits < BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits) {
+        get_byte = BROTLI_TRUE;
+        continue;
+      }
+      BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits);
+      ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
+          &s->symbol, &s->repeat, &s->space, &s->prev_code_len,
+          &s->repeat_code_len, s->symbol_lists, s->code_length_histo,
+          s->next_symbol);
+    }
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Reads and decodes 15..18 codes using static prefix code.
+   Each code is 2..4 bits long. In total 30..72 bits are used. */
+static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  uint32_t num_codes = s->repeat;
+  unsigned space = s->space;
+  uint32_t i = s->sub_loop_counter;
+  for (; i < BROTLI_CODE_LENGTH_CODES; ++i) {
+    const uint8_t code_len_idx = kCodeLengthCodeOrder[i];
+    uint32_t ix;
+    uint32_t v;
+    if (BROTLI_PREDICT_FALSE(!BrotliSafeGetBits(br, 4, &ix))) {
+      uint32_t available_bits = BrotliGetAvailableBits(br);
+      if (available_bits != 0) {
+        ix = BrotliGetBitsUnmasked(br) & 0xF;
+      } else {
+        ix = 0;
+      }
+      if (kCodeLengthPrefixLength[ix] > available_bits) {
+        s->sub_loop_counter = i;
+        s->repeat = num_codes;
+        s->space = space;
+        s->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+    }
+    v = kCodeLengthPrefixValue[ix];
+    BrotliDropBits(br, kCodeLengthPrefixLength[ix]);
+    s->code_length_code_lengths[code_len_idx] = (uint8_t)v;
+    BROTLI_LOG_ARRAY_INDEX(s->code_length_code_lengths, code_len_idx);
+    if (v != 0) {
+      space = space - (32U >> v);
+      ++num_codes;
+      ++s->code_length_histo[v];
+      if (space - 1U >= 32U) {
+        /* space is 0 or wrapped around. */
+        break;
+      }
+    }
+  }
+  if (!(num_codes == 1 || space == 0)) {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_CL_SPACE);
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Decodes the Huffman tables.
+   There are 2 scenarios:
+    A) Huffman code contains only few symbols (1..4). Those symbols are read
+       directly; their code lengths are defined by the number of symbols.
+       For this scenario 4 - 49 bits will be read.
+
+    B) 2-phase decoding:
+    B.1) Small Huffman table is decoded; it is specified with code lengths
+         encoded with predefined entropy code. 32 - 74 bits are used.
+    B.2) Decoded table is used to decode code lengths of symbols in resulting
+         Huffman table. In worst case 3520 bits are read. */
+static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
+                                              uint32_t max_symbol,
+                                              HuffmanCode* table,
+                                              uint32_t* opt_table_size,
+                                              BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  /* Unnecessary masking, but might be good for safety. */
+  alphabet_size &= 0x7FF;
+  /* State machine. */
+  for (;;) {
+    switch (s->substate_huffman) {
+      case BROTLI_STATE_HUFFMAN_NONE:
+        if (!BrotliSafeReadBits(br, 2, &s->sub_loop_counter)) {
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        BROTLI_LOG_UINT(s->sub_loop_counter);
+        /* The value is used as follows:
+           1 for simple code;
+           0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
+        if (s->sub_loop_counter != 1) {
+          s->space = 32;
+          s->repeat = 0;  /* num_codes */
+          memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo[0]) *
+              (BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1));
+          memset(&s->code_length_code_lengths[0], 0,
+              sizeof(s->code_length_code_lengths));
+          s->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
+          continue;
+        }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE:
+        /* Read symbols, codes & code lengths directly. */
+        if (!BrotliSafeReadBits(br, 2, &s->symbol)) {  /* num_symbols */
+          s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE;
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->sub_loop_counter = 0;
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_SIMPLE_READ: {
+        BrotliDecoderErrorCode result =
+            ReadSimpleHuffmanSymbols(alphabet_size, max_symbol, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: {
+        uint32_t table_size;
+        if (s->symbol == 3) {
+          uint32_t bits;
+          if (!BrotliSafeReadBits(br, 1, &bits)) {
+            s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_BUILD;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          s->symbol += bits;
+        }
+        BROTLI_LOG_UINT(s->symbol);
+        table_size = BrotliBuildSimpleHuffmanTable(
+            table, HUFFMAN_TABLE_BITS, s->symbols_lists_array, s->symbol);
+        if (opt_table_size) {
+          *opt_table_size = table_size;
+        }
+        s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+        return BROTLI_DECODER_SUCCESS;
+      }
+
+      /* Decode Huffman-coded code lengths. */
+      case BROTLI_STATE_HUFFMAN_COMPLEX: {
+        uint32_t i;
+        BrotliDecoderErrorCode result = ReadCodeLengthCodeLengths(s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+        BrotliBuildCodeLengthsHuffmanTable(s->table,
+                                           s->code_length_code_lengths,
+                                           s->code_length_histo);
+        memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo));
+        for (i = 0; i <= BROTLI_HUFFMAN_MAX_CODE_LENGTH; ++i) {
+          s->next_symbol[i] = (int)i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+          s->symbol_lists[s->next_symbol[i]] = 0xFFFF;
+        }
+
+        s->symbol = 0;
+        s->prev_code_len = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
+        s->repeat = 0;
+        s->repeat_code_len = 0;
+        s->space = 32768;
+        s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: {
+        uint32_t table_size;
+        BrotliDecoderErrorCode result = ReadSymbolCodeLengths(max_symbol, s);
+        if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
+          result = SafeReadSymbolCodeLengths(max_symbol, s);
+        }
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+
+        if (s->space != 0) {
+          BROTLI_LOG(("[ReadHuffmanCode] space = %d\n", (int)s->space));
+          return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE);
+        }
+        table_size = BrotliBuildHuffmanTable(
+            table, HUFFMAN_TABLE_BITS, s->symbol_lists, s->code_length_histo);
+        if (opt_table_size) {
+          *opt_table_size = table_size;
+        }
+        s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+        return BROTLI_DECODER_SUCCESS;
+      }
+
+      default:
+        return
+            BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+    }
+  }
+}
+
+/* Decodes a block length by reading 3..39 bits. */
+static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table,
+                                              BrotliBitReader* br) {
+  uint32_t code;
+  uint32_t nbits;
+  code = ReadSymbol(table, br);
+  nbits = kBlockLengthPrefixCode[code].nbits;  /* nbits == 2..24 */
+  return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits);
+}
+
+/* WARNING: if state is not BROTLI_STATE_READ_BLOCK_LENGTH_NONE, then
+   reading can't be continued with ReadBlockLength. */
+static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength(
+    BrotliDecoderState* s, uint32_t* result, const HuffmanCode* table,
+    BrotliBitReader* br) {
+  uint32_t index;
+  if (s->substate_read_block_length == BROTLI_STATE_READ_BLOCK_LENGTH_NONE) {
+    if (!SafeReadSymbol(table, br, &index)) {
+      return BROTLI_FALSE;
+    }
+  } else {
+    index = s->block_length_index;
+  }
+  {
+    uint32_t bits;
+    uint32_t nbits = kBlockLengthPrefixCode[index].nbits;  /* nbits == 2..24 */
+    if (!BrotliSafeReadBits(br, nbits, &bits)) {
+      s->block_length_index = index;
+      s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX;
+      return BROTLI_FALSE;
+    }
+    *result = kBlockLengthPrefixCode[index].offset + bits;
+    s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+    return BROTLI_TRUE;
+  }
+}
+
+/* Transform:
+    1) initialize list L with values 0, 1,... 255
+    2) For each input element X:
+    2.1) let Y = L[X]
+    2.2) remove X-th element from L
+    2.3) prepend Y to L
+    2.4) append Y to output
+
+   In most cases max(Y) <= 7, so most of L remains intact.
+   To reduce the cost of initialization, we reuse L, remember the upper bound
+   of Y values, and reinitialize only first elements in L.
+
+   Most of input values are 0 and 1. To reduce number of branches, we replace
+   inner for loop with do-while. */
+static BROTLI_NOINLINE void InverseMoveToFrontTransform(
+    uint8_t* v, uint32_t v_len, BrotliDecoderState* state) {
+  /* Reinitialize elements that could have been changed. */
+  uint32_t i = 1;
+  uint32_t upper_bound = state->mtf_upper_bound;
+  uint32_t* mtf = &state->mtf[1];  /* Make mtf[-1] addressable. */
+  uint8_t* mtf_u8 = (uint8_t*)mtf;
+  /* Load endian-aware constant. */
+  const uint8_t b0123[4] = {0, 1, 2, 3};
+  uint32_t pattern;
+  memcpy(&pattern, &b0123, 4);
+
+  /* Initialize list using 4 consequent values pattern. */
+  mtf[0] = pattern;
+  do {
+    pattern += 0x04040404;  /* Advance all 4 values by 4. */
+    mtf[i] = pattern;
+    i++;
+  } while (i <= upper_bound);
+
+  /* Transform the input. */
+  upper_bound = 0;
+  for (i = 0; i < v_len; ++i) {
+    int index = v[i];
+    uint8_t value = mtf_u8[index];
+    upper_bound |= v[i];
+    v[i] = value;
+    mtf_u8[-1] = value;
+    do {
+      index--;
+      mtf_u8[index + 1] = mtf_u8[index];
+    } while (index >= 0);
+  }
+  /* Remember amount of elements to be reinitialized. */
+  state->mtf_upper_bound = upper_bound >> 2;
+}
+
+/* Decodes a series of Huffman table using ReadHuffmanCode function. */
+static BrotliDecoderErrorCode HuffmanTreeGroupDecode(
+    HuffmanTreeGroup* group, BrotliDecoderState* s) {
+  if (s->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) {
+    s->next = group->codes;
+    s->htree_index = 0;
+    s->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP;
+  }
+  while (s->htree_index < group->num_htrees) {
+    uint32_t table_size;
+    BrotliDecoderErrorCode result =
+        ReadHuffmanCode(group->alphabet_size, group->max_symbol,
+                        s->next, &table_size, s);
+    if (result != BROTLI_DECODER_SUCCESS) return result;
+    group->htrees[s->htree_index] = s->next;
+    s->next += table_size;
+    ++s->htree_index;
+  }
+  s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
+  return BROTLI_DECODER_SUCCESS;
+}
+
+/* Decodes a context map.
+   Decoding is done in 4 phases:
+    1) Read auxiliary information (6..16 bits) and allocate memory.
+       In case of trivial context map, decoding is finished at this phase.
+    2) Decode Huffman table using ReadHuffmanCode function.
+       This table will be used for reading context map items.
+    3) Read context map items; "0" values could be run-length encoded.
+    4) Optionally, apply InverseMoveToFront transform to the resulting map. */
+static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
+                                               uint32_t* num_htrees,
+                                               uint8_t** context_map_arg,
+                                               BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
+
+  switch ((int)s->substate_context_map) {
+    case BROTLI_STATE_CONTEXT_MAP_NONE:
+      result = DecodeVarLenUint8(s, br, num_htrees);
+      if (result != BROTLI_DECODER_SUCCESS) {
+        return result;
+      }
+      (*num_htrees)++;
+      s->context_index = 0;
+      BROTLI_LOG_UINT(context_map_size);
+      BROTLI_LOG_UINT(*num_htrees);
+      *context_map_arg =
+          (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)context_map_size);
+      if (*context_map_arg == 0) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MAP);
+      }
+      if (*num_htrees <= 1) {
+        memset(*context_map_arg, 0, (size_t)context_map_size);
+        return BROTLI_DECODER_SUCCESS;
+      }
+      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX;
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: {
+      uint32_t bits;
+      /* In next stage ReadHuffmanCode uses at least 4 bits, so it is safe
+         to peek 4 bits ahead. */
+      if (!BrotliSafeGetBits(br, 5, &bits)) {
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if ((bits & 1) != 0) { /* Use RLE for zeros. */
+        s->max_run_length_prefix = (bits >> 1) + 1;
+        BrotliDropBits(br, 5);
+      } else {
+        s->max_run_length_prefix = 0;
+        BrotliDropBits(br, 1);
+      }
+      BROTLI_LOG_UINT(s->max_run_length_prefix);
+      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN;
+    }
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: {
+      uint32_t alphabet_size = *num_htrees + s->max_run_length_prefix;
+      result = ReadHuffmanCode(alphabet_size, alphabet_size,
+                               s->context_map_table, NULL, s);
+      if (result != BROTLI_DECODER_SUCCESS) return result;
+      s->code = 0xFFFF;
+      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE;
+    }
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_DECODE: {
+      uint32_t context_index = s->context_index;
+      uint32_t max_run_length_prefix = s->max_run_length_prefix;
+      uint8_t* context_map = *context_map_arg;
+      uint32_t code = s->code;
+      BROTLI_BOOL skip_preamble = (code != 0xFFFF);
+      while (context_index < context_map_size || skip_preamble) {
+        if (!skip_preamble) {
+          if (!SafeReadSymbol(s->context_map_table, br, &code)) {
+            s->code = 0xFFFF;
+            s->context_index = context_index;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          BROTLI_LOG_UINT(code);
+
+          if (code == 0) {
+            context_map[context_index++] = 0;
+            continue;
+          }
+          if (code > max_run_length_prefix) {
+            context_map[context_index++] =
+                (uint8_t)(code - max_run_length_prefix);
+            continue;
+          }
+        } else {
+          skip_preamble = BROTLI_FALSE;
+        }
+        /* RLE sub-stage. */
+        {
+          uint32_t reps;
+          if (!BrotliSafeReadBits(br, code, &reps)) {
+            s->code = code;
+            s->context_index = context_index;
+            return BROTLI_DECODER_NEEDS_MORE_INPUT;
+          }
+          reps += 1U << code;
+          BROTLI_LOG_UINT(reps);
+          if (context_index + reps > context_map_size) {
+            return
+                BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_CONTEXT_MAP_REPEAT);
+          }
+          do {
+            context_map[context_index++] = 0;
+          } while (--reps);
+        }
+      }
+    }
+    /* Fall through. */
+
+    case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: {
+      uint32_t bits;
+      if (!BrotliSafeReadBits(br, 1, &bits)) {
+        s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_TRANSFORM;
+        return BROTLI_DECODER_NEEDS_MORE_INPUT;
+      }
+      if (bits != 0) {
+        InverseMoveToFrontTransform(*context_map_arg, context_map_size, s);
+      }
+      s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
+      return BROTLI_DECODER_SUCCESS;
+    }
+
+    default:
+      return
+          BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+  }
+}
+
+/* Decodes a command or literal and updates block type ring-buffer.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength(
+    int safe, BrotliDecoderState* s, int tree_type) {
+  uint32_t max_block_type = s->num_block_types[tree_type];
+  const HuffmanCode* type_tree = &s->block_type_trees[
+      tree_type * BROTLI_HUFFMAN_MAX_SIZE_258];
+  const HuffmanCode* len_tree = &s->block_len_trees[
+      tree_type * BROTLI_HUFFMAN_MAX_SIZE_26];
+  BrotliBitReader* br = &s->br;
+  uint32_t* ringbuffer = &s->block_type_rb[tree_type * 2];
+  uint32_t block_type;
+  if (max_block_type <= 1) {
+    return BROTLI_FALSE;
+  }
+
+  /* Read 0..15 + 3..39 bits. */
+  if (!safe) {
+    block_type = ReadSymbol(type_tree, br);
+    s->block_length[tree_type] = ReadBlockLength(len_tree, br);
+  } else {
+    BrotliBitReaderState memento;
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(type_tree, br, &block_type)) return BROTLI_FALSE;
+    if (!SafeReadBlockLength(s, &s->block_length[tree_type], len_tree, br)) {
+      s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+      BrotliBitReaderRestoreState(br, &memento);
+      return BROTLI_FALSE;
+    }
+  }
+
+  if (block_type == 1) {
+    block_type = ringbuffer[1] + 1;
+  } else if (block_type == 0) {
+    block_type = ringbuffer[0];
+  } else {
+    block_type -= 2;
+  }
+  if (block_type >= max_block_type) {
+    block_type -= max_block_type;
+  }
+  ringbuffer[0] = ringbuffer[1];
+  ringbuffer[1] = block_type;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void DetectTrivialLiteralBlockTypes(
+    BrotliDecoderState* s) {
+  size_t i;
+  for (i = 0; i < 8; ++i) s->trivial_literal_contexts[i] = 0;
+  for (i = 0; i < s->num_block_types[0]; i++) {
+    size_t offset = i << BROTLI_LITERAL_CONTEXT_BITS;
+    size_t error = 0;
+    size_t sample = s->context_map[offset];
+    size_t j;
+    for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS);) {
+      BROTLI_REPEAT(4, error |= s->context_map[offset + j++] ^ sample;)
+    }
+    if (error == 0) {
+      s->trivial_literal_contexts[i >> 5] |= 1u << (i & 31);
+    }
+  }
+}
+
+static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) {
+  uint8_t context_mode;
+  size_t trivial;
+  uint32_t block_type = s->block_type_rb[1];
+  uint32_t context_offset = block_type << BROTLI_LITERAL_CONTEXT_BITS;
+  s->context_map_slice = s->context_map + context_offset;
+  trivial = s->trivial_literal_contexts[block_type >> 5];
+  s->trivial_literal_context = (trivial >> (block_type & 31)) & 1;
+  s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]];
+  context_mode = s->context_modes[block_type] & 3;
+  s->context_lookup = BROTLI_CONTEXT_LUT(context_mode);
+}
+
+/* Decodes the block type and updates the state for literal context.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeLiteralBlockSwitchInternal(
+    int safe, BrotliDecoderState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 0)) {
+    return BROTLI_FALSE;
+  }
+  PrepareLiteralDecoding(s);
+  return BROTLI_TRUE;
+}
+
+static void BROTLI_NOINLINE DecodeLiteralBlockSwitch(BrotliDecoderState* s) {
+  DecodeLiteralBlockSwitchInternal(0, s);
+}
+
+static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeLiteralBlockSwitch(
+    BrotliDecoderState* s) {
+  return DecodeLiteralBlockSwitchInternal(1, s);
+}
+
+/* Block switch for insert/copy length.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeCommandBlockSwitchInternal(
+    int safe, BrotliDecoderState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 1)) {
+    return BROTLI_FALSE;
+  }
+  s->htree_command = s->insert_copy_hgroup.htrees[s->block_type_rb[3]];
+  return BROTLI_TRUE;
+}
+
+static void BROTLI_NOINLINE DecodeCommandBlockSwitch(BrotliDecoderState* s) {
+  DecodeCommandBlockSwitchInternal(0, s);
+}
+
+static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeCommandBlockSwitch(
+    BrotliDecoderState* s) {
+  return DecodeCommandBlockSwitchInternal(1, s);
+}
+
+/* Block switch for distance codes.
+   Reads 3..54 bits. */
+static BROTLI_INLINE BROTLI_BOOL DecodeDistanceBlockSwitchInternal(
+    int safe, BrotliDecoderState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 2)) {
+    return BROTLI_FALSE;
+  }
+  s->dist_context_map_slice = s->dist_context_map +
+      (s->block_type_rb[5] << BROTLI_DISTANCE_CONTEXT_BITS);
+  s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
+  return BROTLI_TRUE;
+}
+
+static void BROTLI_NOINLINE DecodeDistanceBlockSwitch(BrotliDecoderState* s) {
+  DecodeDistanceBlockSwitchInternal(0, s);
+}
+
+static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeDistanceBlockSwitch(
+    BrotliDecoderState* s) {
+  return DecodeDistanceBlockSwitchInternal(1, s);
+}
+
+static size_t UnwrittenBytes(const BrotliDecoderState* s, BROTLI_BOOL wrap) {
+  size_t pos = wrap && s->pos > s->ringbuffer_size ?
+      (size_t)s->ringbuffer_size : (size_t)(s->pos);
+  size_t partial_pos_rb = (s->rb_roundtrips * (size_t)s->ringbuffer_size) + pos;
+  return partial_pos_rb - s->partial_pos_out;
+}
+
+/* Dumps output.
+   Returns BROTLI_DECODER_NEEDS_MORE_OUTPUT only if there is more output to push
+   and either ring-buffer is as big as window size, or |force| is true. */
+static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer(
+    BrotliDecoderState* s, size_t* available_out, uint8_t** next_out,
+    size_t* total_out, BROTLI_BOOL force) {
+  uint8_t* start =
+      s->ringbuffer + (s->partial_pos_out & (size_t)s->ringbuffer_mask);
+  size_t to_write = UnwrittenBytes(s, BROTLI_TRUE);
+  size_t num_written = *available_out;
+  if (num_written > to_write) {
+    num_written = to_write;
+  }
+  if (s->meta_block_remaining_len < 0) {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_1);
+  }
+  if (next_out && !*next_out) {
+    *next_out = start;
+  } else {
+    if (next_out) {
+      memcpy(*next_out, start, num_written);
+      *next_out += num_written;
+    }
+  }
+  *available_out -= num_written;
+  BROTLI_LOG_UINT(to_write);
+  BROTLI_LOG_UINT(num_written);
+  s->partial_pos_out += num_written;
+  if (total_out) {
+    *total_out = s->partial_pos_out;
+  }
+  if (num_written < to_write) {
+    if (s->ringbuffer_size == (1 << s->window_bits) || force) {
+      return BROTLI_DECODER_NEEDS_MORE_OUTPUT;
+    } else {
+      return BROTLI_DECODER_SUCCESS;
+    }
+  }
+  /* Wrap ring buffer only if it has reached its maximal size. */
+  if (s->ringbuffer_size == (1 << s->window_bits) &&
+      s->pos >= s->ringbuffer_size) {
+    s->pos -= s->ringbuffer_size;
+    s->rb_roundtrips++;
+    s->should_wrap_ringbuffer = (size_t)s->pos != 0 ? 1 : 0;
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) {
+  if (s->should_wrap_ringbuffer) {
+    memcpy(s->ringbuffer, s->ringbuffer_end, (size_t)s->pos);
+    s->should_wrap_ringbuffer = 0;
+  }
+}
+
+/* Allocates ring-buffer.
+
+   s->ringbuffer_size MUST be updated by BrotliCalculateRingBufferSize before
+   this function is called.
+
+   Last two bytes of ring-buffer are initialized to 0, so context calculation
+   could be done uniformly for the first two and all other positions. */
+static BROTLI_BOOL BROTLI_NOINLINE BrotliEnsureRingBuffer(
+    BrotliDecoderState* s) {
+  uint8_t* old_ringbuffer = s->ringbuffer;
+  if (s->ringbuffer_size == s->new_ringbuffer_size) {
+    return BROTLI_TRUE;
+  }
+
+  s->ringbuffer = (uint8_t*)BROTLI_DECODER_ALLOC(s,
+      (size_t)(s->new_ringbuffer_size) + kRingBufferWriteAheadSlack);
+  if (s->ringbuffer == 0) {
+    /* Restore previous value. */
+    s->ringbuffer = old_ringbuffer;
+    return BROTLI_FALSE;
+  }
+  s->ringbuffer[s->new_ringbuffer_size - 2] = 0;
+  s->ringbuffer[s->new_ringbuffer_size - 1] = 0;
+
+  if (!!old_ringbuffer) {
+    memcpy(s->ringbuffer, old_ringbuffer, (size_t)s->pos);
+    BROTLI_DECODER_FREE(s, old_ringbuffer);
+  }
+
+  s->ringbuffer_size = s->new_ringbuffer_size;
+  s->ringbuffer_mask = s->new_ringbuffer_size - 1;
+  s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
+
+  return BROTLI_TRUE;
+}
+
+static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
+    size_t* available_out, uint8_t** next_out, size_t* total_out,
+    BrotliDecoderState* s) {
+  /* TODO: avoid allocation for single uncompressed block. */
+  if (!BrotliEnsureRingBuffer(s)) {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_1);
+  }
+
+  /* State machine */
+  for (;;) {
+    switch (s->substate_uncompressed) {
+      case BROTLI_STATE_UNCOMPRESSED_NONE: {
+        int nbytes = (int)BrotliGetRemainingBytes(&s->br);
+        if (nbytes > s->meta_block_remaining_len) {
+          nbytes = s->meta_block_remaining_len;
+        }
+        if (s->pos + nbytes > s->ringbuffer_size) {
+          nbytes = s->ringbuffer_size - s->pos;
+        }
+        /* Copy remaining bytes from s->br.buf_ to ring-buffer. */
+        BrotliCopyBytes(&s->ringbuffer[s->pos], &s->br, (size_t)nbytes);
+        s->pos += nbytes;
+        s->meta_block_remaining_len -= nbytes;
+        if (s->pos < 1 << s->window_bits) {
+          if (s->meta_block_remaining_len == 0) {
+            return BROTLI_DECODER_SUCCESS;
+          }
+          return BROTLI_DECODER_NEEDS_MORE_INPUT;
+        }
+        s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_UNCOMPRESSED_WRITE: {
+        BrotliDecoderErrorCode result;
+        result = WriteRingBuffer(
+            s, available_out, next_out, total_out, BROTLI_FALSE);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          return result;
+        }
+        if (s->ringbuffer_size == 1 << s->window_bits) {
+          s->max_distance = s->max_backward_distance;
+        }
+        s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
+        break;
+      }
+    }
+  }
+  BROTLI_DCHECK(0);  /* Unreachable */
+}
+
+/* Calculates the smallest feasible ring buffer.
+
+   If we know the data size is small, do not allocate more ring buffer
+   size than needed to reduce memory usage.
+
+   When this method is called, metablock size and flags MUST be decoded. */
+static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(
+    BrotliDecoderState* s) {
+  int window_size = 1 << s->window_bits;
+  int new_ringbuffer_size = window_size;
+  /* We need at least 2 bytes of ring buffer size to get the last two
+     bytes for context from there */
+  int min_size = s->ringbuffer_size ? s->ringbuffer_size : 1024;
+  int output_size;
+
+  /* If maximum is already reached, no further extension is retired. */
+  if (s->ringbuffer_size == window_size) {
+    return;
+  }
+
+  /* Metadata blocks does not touch ring buffer. */
+  if (s->is_metadata) {
+    return;
+  }
+
+  if (!s->ringbuffer) {
+    output_size = 0;
+  } else {
+    output_size = s->pos;
+  }
+  output_size += s->meta_block_remaining_len;
+  min_size = min_size < output_size ? output_size : min_size;
+
+  if (!!s->canny_ringbuffer_allocation) {
+    /* Reduce ring buffer size to save memory when server is unscrupulous.
+       In worst case memory usage might be 1.5x bigger for a short period of
+       ring buffer reallocation. */
+    while ((new_ringbuffer_size >> 1) >= min_size) {
+      new_ringbuffer_size >>= 1;
+    }
+  }
+
+  s->new_ringbuffer_size = new_ringbuffer_size;
+}
+
+/* Reads 1..256 2-bit context modes. */
+static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) {
+  BrotliBitReader* br = &s->br;
+  int i = s->loop_counter;
+
+  while (i < (int)s->num_block_types[0]) {
+    uint32_t bits;
+    if (!BrotliSafeReadBits(br, 2, &bits)) {
+      s->loop_counter = i;
+      return BROTLI_DECODER_NEEDS_MORE_INPUT;
+    }
+    s->context_modes[i] = (uint8_t)bits;
+    BROTLI_LOG_ARRAY_INDEX(s->context_modes, i);
+    i++;
+  }
+  return BROTLI_DECODER_SUCCESS;
+}
+
+static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
+  if (s->distance_code == 0) {
+    --s->dist_rb_idx;
+    s->distance_code = s->dist_rb[s->dist_rb_idx & 3];
+    /* Compensate double distance-ring-buffer roll for dictionary items. */
+    s->distance_context = 1;
+  } else {
+    int distance_code = s->distance_code << 1;
+    /* kDistanceShortCodeIndexOffset has 2-bit values from LSB:
+        3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */
+    const uint32_t kDistanceShortCodeIndexOffset = 0xAAAFFF1B;
+    /* kDistanceShortCodeValueOffset has 2-bit values from LSB:
+       -0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */
+    const uint32_t kDistanceShortCodeValueOffset = 0xFA5FA500;
+    int v = (s->dist_rb_idx +
+        (int)(kDistanceShortCodeIndexOffset >> distance_code)) & 0x3;
+    s->distance_code = s->dist_rb[v];
+    v = (int)(kDistanceShortCodeValueOffset >> distance_code) & 0x3;
+    if ((distance_code & 0x3) != 0) {
+      s->distance_code += v;
+    } else {
+      s->distance_code -= v;
+      if (s->distance_code <= 0) {
+        /* A huge distance will cause a BROTLI_FAILURE() soon.
+           This is a little faster than failing here. */
+        s->distance_code = 0x7FFFFFFF;
+      }
+    }
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  if (n_bits != 0) {
+    return BrotliSafeReadBits(br, n_bits, val);
+  } else {
+    *val = 0;
+    return BROTLI_TRUE;
+  }
+}
+
+/* Precondition: s->distance_code < 0. */
+static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
+    int safe, BrotliDecoderState* s, BrotliBitReader* br) {
+  int distval;
+  BrotliBitReaderState memento;
+  HuffmanCode* distance_tree = s->distance_hgroup.htrees[s->dist_htree_index];
+  if (!safe) {
+    s->distance_code = (int)ReadSymbol(distance_tree, br);
+  } else {
+    uint32_t code;
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(distance_tree, br, &code)) {
+      return BROTLI_FALSE;
+    }
+    s->distance_code = (int)code;
+  }
+  /* Convert the distance code to the actual distance by possibly
+     looking up past distances from the s->ringbuffer. */
+  s->distance_context = 0;
+  if ((s->distance_code & ~0xF) == 0) {
+    TakeDistanceFromRingBuffer(s);
+    --s->block_length[2];
+    return BROTLI_TRUE;
+  }
+  distval = s->distance_code - (int)s->num_direct_distance_codes;
+  if (distval >= 0) {
+    uint32_t nbits;
+    int postfix;
+    int offset;
+    if (!safe && (s->distance_postfix_bits == 0)) {
+      nbits = ((uint32_t)distval >> 1) + 1;
+      offset = ((2 + (distval & 1)) << nbits) - 4;
+      s->distance_code = (int)s->num_direct_distance_codes + offset +
+                         (int)BrotliReadBits(br, nbits);
+    } else {
+      /* This branch also works well when s->distance_postfix_bits == 0. */
+      uint32_t bits;
+      postfix = distval & s->distance_postfix_mask;
+      distval >>= s->distance_postfix_bits;
+      nbits = ((uint32_t)distval >> 1) + 1;
+      if (safe) {
+        if (!SafeReadBits(br, nbits, &bits)) {
+          s->distance_code = -1;  /* Restore precondition. */
+          BrotliBitReaderRestoreState(br, &memento);
+          return BROTLI_FALSE;
+        }
+      } else {
+        bits = BrotliReadBits(br, nbits);
+      }
+      offset = ((2 + (distval & 1)) << nbits) - 4;
+      s->distance_code = (int)s->num_direct_distance_codes +
+          ((offset + (int)bits) << s->distance_postfix_bits) + postfix;
+    }
+  }
+  s->distance_code = s->distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
+  --s->block_length[2];
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void ReadDistance(
+    BrotliDecoderState* s, BrotliBitReader* br) {
+  ReadDistanceInternal(0, s, br);
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadDistance(
+    BrotliDecoderState* s, BrotliBitReader* br) {
+  return ReadDistanceInternal(1, s, br);
+}
+
+static BROTLI_INLINE BROTLI_BOOL ReadCommandInternal(
+    int safe, BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
+  uint32_t cmd_code;
+  uint32_t insert_len_extra = 0;
+  uint32_t copy_length;
+  CmdLutElement v;
+  BrotliBitReaderState memento;
+  if (!safe) {
+    cmd_code = ReadSymbol(s->htree_command, br);
+  } else {
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(s->htree_command, br, &cmd_code)) {
+      return BROTLI_FALSE;
+    }
+  }
+  v = kCmdLut[cmd_code];
+  s->distance_code = v.distance_code;
+  s->distance_context = v.context;
+  s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
+  *insert_length = v.insert_len_offset;
+  if (!safe) {
+    if (BROTLI_PREDICT_FALSE(v.insert_len_extra_bits != 0)) {
+      insert_len_extra = BrotliReadBits(br, v.insert_len_extra_bits);
+    }
+    copy_length = BrotliReadBits(br, v.copy_len_extra_bits);
+  } else {
+    if (!SafeReadBits(br, v.insert_len_extra_bits, &insert_len_extra) ||
+        !SafeReadBits(br, v.copy_len_extra_bits, &copy_length)) {
+      BrotliBitReaderRestoreState(br, &memento);
+      return BROTLI_FALSE;
+    }
+  }
+  s->copy_length = (int)copy_length + v.copy_len_offset;
+  --s->block_length[1];
+  *insert_length += (int)insert_len_extra;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void ReadCommand(
+    BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
+  ReadCommandInternal(0, s, br, insert_length);
+}
+
+static BROTLI_INLINE BROTLI_BOOL SafeReadCommand(
+    BrotliDecoderState* s, BrotliBitReader* br, int* insert_length) {
+  return ReadCommandInternal(1, s, br, insert_length);
+}
+
+static BROTLI_INLINE BROTLI_BOOL CheckInputAmount(
+    int safe, BrotliBitReader* const br, size_t num) {
+  if (safe) {
+    return BROTLI_TRUE;
+  }
+  return BrotliCheckInputAmount(br, num);
+}
+
+#define BROTLI_SAFE(METHOD)                       \
+  {                                               \
+    if (safe) {                                   \
+      if (!Safe##METHOD) {                        \
+        result = BROTLI_DECODER_NEEDS_MORE_INPUT; \
+        goto saveStateAndReturn;                  \
+      }                                           \
+    } else {                                      \
+      METHOD;                                     \
+    }                                             \
+  }
+
+static BROTLI_INLINE BrotliDecoderErrorCode ProcessCommandsInternal(
+    int safe, BrotliDecoderState* s) {
+  int pos = s->pos;
+  int i = s->loop_counter;
+  BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
+  BrotliBitReader* br = &s->br;
+
+  if (!CheckInputAmount(safe, br, 28)) {
+    result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+    goto saveStateAndReturn;
+  }
+  if (!safe) {
+    BROTLI_UNUSED(BrotliWarmupBitReader(br));
+  }
+
+  /* Jump into state machine. */
+  if (s->state == BROTLI_STATE_COMMAND_BEGIN) {
+    goto CommandBegin;
+  } else if (s->state == BROTLI_STATE_COMMAND_INNER) {
+    goto CommandInner;
+  } else if (s->state == BROTLI_STATE_COMMAND_POST_DECODE_LITERALS) {
+    goto CommandPostDecodeLiterals;
+  } else if (s->state == BROTLI_STATE_COMMAND_POST_WRAP_COPY) {
+    goto CommandPostWrapCopy;
+  } else {
+    return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
+  }
+
+CommandBegin:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_BEGIN;
+  }
+  if (!CheckInputAmount(safe, br, 28)) {  /* 156 bits + 7 bytes */
+    s->state = BROTLI_STATE_COMMAND_BEGIN;
+    result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+    goto saveStateAndReturn;
+  }
+  if (BROTLI_PREDICT_FALSE(s->block_length[1] == 0)) {
+    BROTLI_SAFE(DecodeCommandBlockSwitch(s));
+    goto CommandBegin;
+  }
+  /* Read the insert/copy length in the command. */
+  BROTLI_SAFE(ReadCommand(s, br, &i));
+  BROTLI_LOG(("[ProcessCommandsInternal] pos = %d insert = %d copy = %d\n",
+              pos, i, s->copy_length));
+  if (i == 0) {
+    goto CommandPostDecodeLiterals;
+  }
+  s->meta_block_remaining_len -= i;
+
+CommandInner:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_INNER;
+  }
+  /* Read the literals in the command. */
+  if (s->trivial_literal_context) {
+    uint32_t bits;
+    uint32_t value;
+    PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
+    do {
+      if (!CheckInputAmount(safe, br, 28)) {  /* 162 bits + 7 bytes */
+        s->state = BROTLI_STATE_COMMAND_INNER;
+        result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+        goto saveStateAndReturn;
+      }
+      if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
+        BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
+        PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
+        if (!s->trivial_literal_context) goto CommandInner;
+      }
+      if (!safe) {
+        s->ringbuffer[pos] =
+            (uint8_t)ReadPreloadedSymbol(s->literal_htree, br, &bits, &value);
+      } else {
+        uint32_t literal;
+        if (!SafeReadSymbol(s->literal_htree, br, &literal)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          goto saveStateAndReturn;
+        }
+        s->ringbuffer[pos] = (uint8_t)literal;
+      }
+      --s->block_length[0];
+      BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
+      ++pos;
+      if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
+        s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
+        --i;
+        goto saveStateAndReturn;
+      }
+    } while (--i != 0);
+  } else {
+    uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
+    uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
+    do {
+      const HuffmanCode* hc;
+      uint8_t context;
+      if (!CheckInputAmount(safe, br, 28)) {  /* 162 bits + 7 bytes */
+        s->state = BROTLI_STATE_COMMAND_INNER;
+        result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+        goto saveStateAndReturn;
+      }
+      if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
+        BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
+        if (s->trivial_literal_context) goto CommandInner;
+      }
+      context = BROTLI_CONTEXT(p1, p2, s->context_lookup);
+      BROTLI_LOG_UINT(context);
+      hc = s->literal_hgroup.htrees[s->context_map_slice[context]];
+      p2 = p1;
+      if (!safe) {
+        p1 = (uint8_t)ReadSymbol(hc, br);
+      } else {
+        uint32_t literal;
+        if (!SafeReadSymbol(hc, br, &literal)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          goto saveStateAndReturn;
+        }
+        p1 = (uint8_t)literal;
+      }
+      s->ringbuffer[pos] = p1;
+      --s->block_length[0];
+      BROTLI_LOG_UINT(s->context_map_slice[context]);
+      BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos & s->ringbuffer_mask);
+      ++pos;
+      if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
+        s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
+        --i;
+        goto saveStateAndReturn;
+      }
+    } while (--i != 0);
+  }
+  BROTLI_LOG_UINT(s->meta_block_remaining_len);
+  if (BROTLI_PREDICT_FALSE(s->meta_block_remaining_len <= 0)) {
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  }
+
+CommandPostDecodeLiterals:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS;
+  }
+  if (s->distance_code >= 0) {
+    /* Implicit distance case. */
+    s->distance_context = s->distance_code ? 0 : 1;
+    --s->dist_rb_idx;
+    s->distance_code = s->dist_rb[s->dist_rb_idx & 3];
+  } else {
+    /* Read distance code in the command, unless it was implicitly zero. */
+    if (BROTLI_PREDICT_FALSE(s->block_length[2] == 0)) {
+      BROTLI_SAFE(DecodeDistanceBlockSwitch(s));
+    }
+    BROTLI_SAFE(ReadDistance(s, br));
+  }
+  BROTLI_LOG(("[ProcessCommandsInternal] pos = %d distance = %d\n",
+              pos, s->distance_code));
+  if (s->max_distance != s->max_backward_distance) {
+    s->max_distance =
+        (pos < s->max_backward_distance) ? pos : s->max_backward_distance;
+  }
+  i = s->copy_length;
+  /* Apply copy of LZ77 back-reference, or static dictionary reference if
+     the distance is larger than the max LZ77 distance */
+  if (s->distance_code > s->max_distance) {
+    /* The maximum allowed distance is BROTLI_MAX_ALLOWED_DISTANCE = 0x7FFFFFFC.
+       With this choice, no signed overflow can occur after decoding
+       a special distance code (e.g., after adding 3 to the last distance). */
+    if (s->distance_code > BROTLI_MAX_ALLOWED_DISTANCE) {
+      BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+          "len: %d bytes left: %d\n",
+          pos, s->distance_code, i, s->meta_block_remaining_len));
+      return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE);
+    }
+    if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
+        i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
+      int address = s->distance_code - s->max_distance - 1;
+      const BrotliDictionary* words = s->dictionary;
+      const BrotliTransforms* transforms = s->transforms;
+      int offset = (int)s->dictionary->offsets_by_length[i];
+      uint32_t shift = s->dictionary->size_bits_by_length[i];
+
+      int mask = (int)BitMask(shift);
+      int word_idx = address & mask;
+      int transform_idx = address >> shift;
+      /* Compensate double distance-ring-buffer roll. */
+      s->dist_rb_idx += s->distance_context;
+      offset += word_idx * i;
+      if (BROTLI_PREDICT_FALSE(!words->data)) {
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET);
+      }
+      if (transform_idx < (int)transforms->num_transforms) {
+        const uint8_t* word = &words->data[offset];
+        int len = i;
+        if (transform_idx == transforms->cutOffTransforms[0]) {
+          memcpy(&s->ringbuffer[pos], word, (size_t)len);
+          BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]\n",
+                      len, word));
+        } else {
+          len = BrotliTransformDictionaryWord(&s->ringbuffer[pos], word, len,
+              transforms, transform_idx);
+          BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s],"
+                      " transform_idx = %d, transformed: [%.*s]\n",
+                      i, word, transform_idx, len, &s->ringbuffer[pos]));
+        }
+        pos += len;
+        s->meta_block_remaining_len -= len;
+        if (pos >= s->ringbuffer_size) {
+          s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
+          goto saveStateAndReturn;
+        }
+      } else {
+        BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+            "len: %d bytes left: %d\n",
+            pos, s->distance_code, i, s->meta_block_remaining_len));
+        return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_TRANSFORM);
+      }
+    } else {
+      BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+          "len: %d bytes left: %d\n",
+          pos, s->distance_code, i, s->meta_block_remaining_len));
+      return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DICTIONARY);
+    }
+  } else {
+    int src_start = (pos - s->distance_code) & s->ringbuffer_mask;
+    uint8_t* copy_dst = &s->ringbuffer[pos];
+    uint8_t* copy_src = &s->ringbuffer[src_start];
+    int dst_end = pos + i;
+    int src_end = src_start + i;
+    /* Update the recent distances cache. */
+    s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
+    ++s->dist_rb_idx;
+    s->meta_block_remaining_len -= i;
+    /* There are 32+ bytes of slack in the ring-buffer allocation.
+       Also, we have 16 short codes, that make these 16 bytes irrelevant
+       in the ring-buffer. Let's copy over them as a first guess. */
+    memmove16(copy_dst, copy_src);
+    if (src_end > pos && dst_end > src_start) {
+      /* Regions intersect. */
+      goto CommandPostWrapCopy;
+    }
+    if (dst_end >= s->ringbuffer_size || src_end >= s->ringbuffer_size) {
+      /* At least one region wraps. */
+      goto CommandPostWrapCopy;
+    }
+    pos += i;
+    if (i > 16) {
+      if (i > 32) {
+        memcpy(copy_dst + 16, copy_src + 16, (size_t)(i - 16));
+      } else {
+        /* This branch covers about 45% cases.
+           Fixed size short copy allows more compiler optimizations. */
+        memmove16(copy_dst + 16, copy_src + 16);
+      }
+    }
+  }
+  BROTLI_LOG_UINT(s->meta_block_remaining_len);
+  if (s->meta_block_remaining_len <= 0) {
+    /* Next metablock, if any. */
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  } else {
+    goto CommandBegin;
+  }
+CommandPostWrapCopy:
+  {
+    int wrap_guard = s->ringbuffer_size - pos;
+    while (--i >= 0) {
+      s->ringbuffer[pos] =
+          s->ringbuffer[(pos - s->distance_code) & s->ringbuffer_mask];
+      ++pos;
+      if (BROTLI_PREDICT_FALSE(--wrap_guard == 0)) {
+        s->state = BROTLI_STATE_COMMAND_POST_WRITE_2;
+        goto saveStateAndReturn;
+      }
+    }
+  }
+  if (s->meta_block_remaining_len <= 0) {
+    /* Next metablock, if any. */
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  } else {
+    goto CommandBegin;
+  }
+
+saveStateAndReturn:
+  s->pos = pos;
+  s->loop_counter = i;
+  return result;
+}
+
+#undef BROTLI_SAFE
+
+static BROTLI_NOINLINE BrotliDecoderErrorCode ProcessCommands(
+    BrotliDecoderState* s) {
+  return ProcessCommandsInternal(0, s);
+}
+
+static BROTLI_NOINLINE BrotliDecoderErrorCode SafeProcessCommands(
+    BrotliDecoderState* s) {
+  return ProcessCommandsInternal(1, s);
+}
+
+/* Returns the maximum number of distance symbols which can only represent
+   distances not exceeding BROTLI_MAX_ALLOWED_DISTANCE. */
+static uint32_t BrotliMaxDistanceSymbol(uint32_t ndirect, uint32_t npostfix) {
+  static const uint32_t bound[BROTLI_MAX_NPOSTFIX + 1] = {0, 4, 12, 28};
+  static const uint32_t diff[BROTLI_MAX_NPOSTFIX + 1] = {73, 126, 228, 424};
+  uint32_t postfix = 1U << npostfix;
+  if (ndirect < bound[npostfix]) {
+    return ndirect + diff[npostfix] + postfix;
+  } else if (ndirect > bound[npostfix] + postfix) {
+    return ndirect + diff[npostfix];
+  } else {
+    return bound[npostfix] + diff[npostfix] + postfix;
+  }
+}
+
+BrotliDecoderResult BrotliDecoderDecompress(
+    size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size,
+    uint8_t* decoded_buffer) {
+  BrotliDecoderState s;
+  BrotliDecoderResult result;
+  size_t total_out = 0;
+  size_t available_in = encoded_size;
+  const uint8_t* next_in = encoded_buffer;
+  size_t available_out = *decoded_size;
+  uint8_t* next_out = decoded_buffer;
+  if (!BrotliDecoderStateInit(&s, 0, 0, 0)) {
+    return BROTLI_DECODER_RESULT_ERROR;
+  }
+  result = BrotliDecoderDecompressStream(
+      &s, &available_in, &next_in, &available_out, &next_out, &total_out);
+  *decoded_size = total_out;
+  BrotliDecoderStateCleanup(&s);
+  if (result != BROTLI_DECODER_RESULT_SUCCESS) {
+    result = BROTLI_DECODER_RESULT_ERROR;
+  }
+  return result;
+}
+
+/* Invariant: input stream is never overconsumed:
+    - invalid input implies that the whole stream is invalid -> any amount of
+      input could be read and discarded
+    - when result is "needs more input", then at least one more byte is REQUIRED
+      to complete decoding; all input data MUST be consumed by decoder, so
+      client could swap the input buffer
+    - when result is "needs more output" decoder MUST ensure that it doesn't
+      hold more than 7 bits in bit reader; this saves client from swapping input
+      buffer ahead of time
+    - when result is "success" decoder MUST return all unused data back to input
+      buffer; this is possible because the invariant is held on enter */
+BrotliDecoderResult BrotliDecoderDecompressStream(
+    BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in,
+    size_t* available_out, uint8_t** next_out, size_t* total_out) {
+  BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
+  BrotliBitReader* br = &s->br;
+  /* Ensure that |total_out| is set, even if no data will ever be pushed out. */
+  if (total_out) {
+    *total_out = s->partial_pos_out;
+  }
+  /* Do not try to process further in a case of unrecoverable error. */
+  if ((int)s->error_code < 0) {
+    return BROTLI_DECODER_RESULT_ERROR;
+  }
+  if (*available_out && (!next_out || !*next_out)) {
+    return SaveErrorCode(
+        s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS));
+  }
+  if (!*available_out) next_out = 0;
+  if (s->buffer_length == 0) {  /* Just connect bit reader to input stream. */
+    br->avail_in = *available_in;
+    br->next_in = *next_in;
+  } else {
+    /* At least one byte of input is required. More than one byte of input may
+       be required to complete the transaction -> reading more data must be
+       done in a loop -> do it in a main loop. */
+    result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+    br->next_in = &s->buffer.u8[0];
+  }
+  /* State machine */
+  for (;;) {
+    if (result != BROTLI_DECODER_SUCCESS) {
+      /* Error, needs more input/output. */
+      if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
+        if (s->ringbuffer != 0) {  /* Pro-actively push output. */
+          BrotliDecoderErrorCode intermediate_result = WriteRingBuffer(s,
+              available_out, next_out, total_out, BROTLI_TRUE);
+          /* WriteRingBuffer checks s->meta_block_remaining_len validity. */
+          if ((int)intermediate_result < 0) {
+            result = intermediate_result;
+            break;
+          }
+        }
+        if (s->buffer_length != 0) {  /* Used with internal buffer. */
+          if (br->avail_in == 0) {
+            /* Successfully finished read transaction.
+               Accumulator contains less than 8 bits, because internal buffer
+               is expanded byte-by-byte until it is enough to complete read. */
+            s->buffer_length = 0;
+            /* Switch to input stream and restart. */
+            result = BROTLI_DECODER_SUCCESS;
+            br->avail_in = *available_in;
+            br->next_in = *next_in;
+            continue;
+          } else if (*available_in != 0) {
+            /* Not enough data in buffer, but can take one more byte from
+               input stream. */
+            result = BROTLI_DECODER_SUCCESS;
+            s->buffer.u8[s->buffer_length] = **next_in;
+            s->buffer_length++;
+            br->avail_in = s->buffer_length;
+            (*next_in)++;
+            (*available_in)--;
+            /* Retry with more data in buffer. */
+            continue;
+          }
+          /* Can't finish reading and no more input. */
+          break;
+        } else {  /* Input stream doesn't contain enough input. */
+          /* Copy tail to internal buffer and return. */
+          *next_in = br->next_in;
+          *available_in = br->avail_in;
+          while (*available_in) {
+            s->buffer.u8[s->buffer_length] = **next_in;
+            s->buffer_length++;
+            (*next_in)++;
+            (*available_in)--;
+          }
+          break;
+        }
+        /* Unreachable. */
+      }
+
+      /* Fail or needs more output. */
+
+      if (s->buffer_length != 0) {
+        /* Just consumed the buffered input and produced some output. Otherwise
+           it would result in "needs more input". Reset internal buffer. */
+        s->buffer_length = 0;
+      } else {
+        /* Using input stream in last iteration. When decoder switches to input
+           stream it has less than 8 bits in accumulator, so it is safe to
+           return unused accumulator bits there. */
+        BrotliBitReaderUnload(br);
+        *available_in = br->avail_in;
+        *next_in = br->next_in;
+      }
+      break;
+    }
+    switch (s->state) {
+      case BROTLI_STATE_UNINITED:
+        /* Prepare to the first read. */
+        if (!BrotliWarmupBitReader(br)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        /* Decode window size. */
+        result = DecodeWindowBits(s, br);  /* Reads 1..8 bits. */
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        if (s->large_window) {
+          s->state = BROTLI_STATE_LARGE_WINDOW_BITS;
+          break;
+        }
+        s->state = BROTLI_STATE_INITIALIZE;
+        break;
+
+      case BROTLI_STATE_LARGE_WINDOW_BITS:
+        if (!BrotliSafeReadBits(br, 6, &s->window_bits)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        if (s->window_bits < BROTLI_LARGE_MIN_WBITS ||
+            s->window_bits > BROTLI_LARGE_MAX_WBITS) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+          break;
+        }
+        s->state = BROTLI_STATE_INITIALIZE;
+      /* Fall through. */
+
+      case BROTLI_STATE_INITIALIZE:
+        BROTLI_LOG_UINT(s->window_bits);
+        /* Maximum distance, see section 9.1. of the spec. */
+        s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP;
+
+        /* Allocate memory for both block_type_trees and block_len_trees. */
+        s->block_type_trees = (HuffmanCode*)BROTLI_DECODER_ALLOC(s,
+            sizeof(HuffmanCode) * 3 *
+                (BROTLI_HUFFMAN_MAX_SIZE_258 + BROTLI_HUFFMAN_MAX_SIZE_26));
+        if (s->block_type_trees == 0) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_BLOCK_TYPE_TREES);
+          break;
+        }
+        s->block_len_trees =
+            s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258;
+
+        s->state = BROTLI_STATE_METABLOCK_BEGIN;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_BEGIN:
+        BrotliDecoderStateMetablockBegin(s);
+        BROTLI_LOG_UINT(s->pos);
+        s->state = BROTLI_STATE_METABLOCK_HEADER;
+      /* Fall through. */
+
+      case BROTLI_STATE_METABLOCK_HEADER:
+        result = DecodeMetaBlockLength(s, br);  /* Reads 2 - 31 bits. */
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        BROTLI_LOG_UINT(s->is_last_metablock);
+        BROTLI_LOG_UINT(s->meta_block_remaining_len);
+        BROTLI_LOG_UINT(s->is_metadata);
+        BROTLI_LOG_UINT(s->is_uncompressed);
+        if (s->is_metadata || s->is_uncompressed) {
+          if (!BrotliJumpToByteBoundary(br)) {
+            result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_1);
+            break;
+          }
+        }
+        if (s->is_metadata) {
+          s->state = BROTLI_STATE_METADATA;
+          break;
+        }
+        if (s->meta_block_remaining_len == 0) {
+          s->state = BROTLI_STATE_METABLOCK_DONE;
+          break;
+        }
+        BrotliCalculateRingBufferSize(s);
+        if (s->is_uncompressed) {
+          s->state = BROTLI_STATE_UNCOMPRESSED;
+          break;
+        }
+        s->loop_counter = 0;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
+        break;
+
+      case BROTLI_STATE_UNCOMPRESSED: {
+        result = CopyUncompressedBlockToOutput(
+            available_out, next_out, total_out, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        s->state = BROTLI_STATE_METABLOCK_DONE;
+        break;
+      }
+
+      case BROTLI_STATE_METADATA:
+        for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) {
+          uint32_t bits;
+          /* Read one byte and ignore it. */
+          if (!BrotliSafeReadBits(br, 8, &bits)) {
+            result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+            break;
+          }
+        }
+        if (result == BROTLI_DECODER_SUCCESS) {
+          s->state = BROTLI_STATE_METABLOCK_DONE;
+        }
+        break;
+
+      case BROTLI_STATE_HUFFMAN_CODE_0:
+        if (s->loop_counter >= 3) {
+          s->state = BROTLI_STATE_METABLOCK_HEADER_2;
+          break;
+        }
+        /* Reads 1..11 bits. */
+        result = DecodeVarLenUint8(s, br, &s->num_block_types[s->loop_counter]);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        s->num_block_types[s->loop_counter]++;
+        BROTLI_LOG_UINT(s->num_block_types[s->loop_counter]);
+        if (s->num_block_types[s->loop_counter] < 2) {
+          s->loop_counter++;
+          break;
+        }
+        s->state = BROTLI_STATE_HUFFMAN_CODE_1;
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_1: {
+        uint32_t alphabet_size = s->num_block_types[s->loop_counter] + 2;
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_258;
+        result = ReadHuffmanCode(alphabet_size, alphabet_size,
+            &s->block_type_trees[tree_offset], NULL, s);
+        if (result != BROTLI_DECODER_SUCCESS) break;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_2;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_2: {
+        uint32_t alphabet_size = BROTLI_NUM_BLOCK_LEN_SYMBOLS;
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
+        result = ReadHuffmanCode(alphabet_size, alphabet_size,
+            &s->block_len_trees[tree_offset], NULL, s);
+        if (result != BROTLI_DECODER_SUCCESS) break;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_3;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_HUFFMAN_CODE_3: {
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
+        if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter],
+            &s->block_len_trees[tree_offset], br)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        BROTLI_LOG_UINT(s->block_length[s->loop_counter]);
+        s->loop_counter++;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
+        break;
+      }
+
+      case BROTLI_STATE_METABLOCK_HEADER_2: {
+        uint32_t bits;
+        if (!BrotliSafeReadBits(br, 6, &bits)) {
+          result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+          break;
+        }
+        s->distance_postfix_bits = bits & BitMask(2);
+        bits >>= 2;
+        s->num_direct_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
+            (bits << s->distance_postfix_bits);
+        BROTLI_LOG_UINT(s->num_direct_distance_codes);
+        BROTLI_LOG_UINT(s->distance_postfix_bits);
+        s->distance_postfix_mask = (int)BitMask(s->distance_postfix_bits);
+        s->context_modes =
+            (uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)s->num_block_types[0]);
+        if (s->context_modes == 0) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MODES);
+          break;
+        }
+        s->loop_counter = 0;
+        s->state = BROTLI_STATE_CONTEXT_MODES;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_CONTEXT_MODES:
+        result = ReadContextModes(s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        s->state = BROTLI_STATE_CONTEXT_MAP_1;
+      /* Fall through. */
+
+      case BROTLI_STATE_CONTEXT_MAP_1:
+        result = DecodeContextMap(
+            s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS,
+            &s->num_literal_htrees, &s->context_map, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        DetectTrivialLiteralBlockTypes(s);
+        s->state = BROTLI_STATE_CONTEXT_MAP_2;
+      /* Fall through. */
+
+      case BROTLI_STATE_CONTEXT_MAP_2: {
+        uint32_t num_direct_codes =
+            s->num_direct_distance_codes - BROTLI_NUM_DISTANCE_SHORT_CODES;
+        uint32_t num_distance_codes = BROTLI_DISTANCE_ALPHABET_SIZE(
+            s->distance_postfix_bits, num_direct_codes,
+            (s->large_window ? BROTLI_LARGE_MAX_DISTANCE_BITS :
+                               BROTLI_MAX_DISTANCE_BITS));
+        uint32_t max_distance_symbol = (s->large_window ?
+            BrotliMaxDistanceSymbol(
+                num_direct_codes, s->distance_postfix_bits) :
+            num_distance_codes);
+        BROTLI_BOOL allocation_success = BROTLI_TRUE;
+        result = DecodeContextMap(
+            s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS,
+            &s->num_dist_htrees, &s->dist_context_map, s);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+            s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS,
+            BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees);
+        allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+            s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS,
+            BROTLI_NUM_COMMAND_SYMBOLS, s->num_block_types[1]);
+        allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+            s, &s->distance_hgroup, num_distance_codes,
+            max_distance_symbol, s->num_dist_htrees);
+        if (!allocation_success) {
+          return SaveErrorCode(s,
+              BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
+        }
+        s->loop_counter = 0;
+        s->state = BROTLI_STATE_TREE_GROUP;
+      }
+      /* Fall through. */
+
+      case BROTLI_STATE_TREE_GROUP: {
+        HuffmanTreeGroup* hgroup = NULL;
+        switch (s->loop_counter) {
+          case 0: hgroup = &s->literal_hgroup; break;
+          case 1: hgroup = &s->insert_copy_hgroup; break;
+          case 2: hgroup = &s->distance_hgroup; break;
+          default: return SaveErrorCode(s, BROTLI_FAILURE(
+              BROTLI_DECODER_ERROR_UNREACHABLE));
+        }
+        result = HuffmanTreeGroupDecode(hgroup, s);
+        if (result != BROTLI_DECODER_SUCCESS) break;
+        s->loop_counter++;
+        if (s->loop_counter >= 3) {
+          PrepareLiteralDecoding(s);
+          s->dist_context_map_slice = s->dist_context_map;
+          s->htree_command = s->insert_copy_hgroup.htrees[0];
+          if (!BrotliEnsureRingBuffer(s)) {
+            result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2);
+            break;
+          }
+          s->state = BROTLI_STATE_COMMAND_BEGIN;
+        }
+        break;
+      }
+
+      case BROTLI_STATE_COMMAND_BEGIN:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_INNER:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_WRAP_COPY:
+        result = ProcessCommands(s);
+        if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
+          result = SafeProcessCommands(s);
+        }
+        break;
+
+      case BROTLI_STATE_COMMAND_INNER_WRITE:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_WRITE_1:
+      /* Fall through. */
+      case BROTLI_STATE_COMMAND_POST_WRITE_2:
+        result = WriteRingBuffer(
+            s, available_out, next_out, total_out, BROTLI_FALSE);
+        if (result != BROTLI_DECODER_SUCCESS) {
+          break;
+        }
+        WrapRingBuffer(s);
+        if (s->ringbuffer_size == 1 << s->window_bits) {
+          s->max_distance = s->max_backward_distance;
+        }
+        if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) {
+          if (s->meta_block_remaining_len == 0) {
+            /* Next metablock, if any. */
+            s->state = BROTLI_STATE_METABLOCK_DONE;
+          } else {
+            s->state = BROTLI_STATE_COMMAND_BEGIN;
+          }
+          break;
+        } else if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_2) {
+          s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY;
+        } else {  /* BROTLI_STATE_COMMAND_INNER_WRITE */
+          if (s->loop_counter == 0) {
+            if (s->meta_block_remaining_len == 0) {
+              s->state = BROTLI_STATE_METABLOCK_DONE;
+            } else {
+              s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS;
+            }
+            break;
+          }
+          s->state = BROTLI_STATE_COMMAND_INNER;
+        }
+        break;
+
+      case BROTLI_STATE_METABLOCK_DONE:
+        if (s->meta_block_remaining_len < 0) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2);
+          break;
+        }
+        BrotliDecoderStateCleanupAfterMetablock(s);
+        if (!s->is_last_metablock) {
+          s->state = BROTLI_STATE_METABLOCK_BEGIN;
+          break;
+        }
+        if (!BrotliJumpToByteBoundary(br)) {
+          result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_PADDING_2);
+          break;
+        }
+        if (s->buffer_length == 0) {
+          BrotliBitReaderUnload(br);
+          *available_in = br->avail_in;
+          *next_in = br->next_in;
+        }
+        s->state = BROTLI_STATE_DONE;
+      /* Fall through. */
+
+      case BROTLI_STATE_DONE:
+        if (s->ringbuffer != 0) {
+          result = WriteRingBuffer(
+              s, available_out, next_out, total_out, BROTLI_TRUE);
+          if (result != BROTLI_DECODER_SUCCESS) {
+            break;
+          }
+        }
+        return SaveErrorCode(s, result);
+    }
+  }
+  return SaveErrorCode(s, result);
+}
+
+BROTLI_BOOL BrotliDecoderHasMoreOutput(const BrotliDecoderState* s) {
+  /* After unrecoverable error remaining output is considered nonsensical. */
+  if ((int)s->error_code < 0) {
+    return BROTLI_FALSE;
+  }
+  return TO_BROTLI_BOOL(
+      s->ringbuffer != 0 && UnwrittenBytes(s, BROTLI_FALSE) != 0);
+}
+
+const uint8_t* BrotliDecoderTakeOutput(BrotliDecoderState* s, size_t* size) {
+  uint8_t* result = 0;
+  size_t available_out = *size ? *size : 1u << 24;
+  size_t requested_out = available_out;
+  BrotliDecoderErrorCode status;
+  if ((s->ringbuffer == 0) || ((int)s->error_code < 0)) {
+    *size = 0;
+    return 0;
+  }
+  WrapRingBuffer(s);
+  status = WriteRingBuffer(s, &available_out, &result, 0, BROTLI_TRUE);
+  /* Either WriteRingBuffer returns those "success" codes... */
+  if (status == BROTLI_DECODER_SUCCESS ||
+      status == BROTLI_DECODER_NEEDS_MORE_OUTPUT) {
+    *size = requested_out - available_out;
+  } else {
+    /* ... or stream is broken. Normally this should be caught by
+       BrotliDecoderDecompressStream, this is just a safeguard. */
+    if ((int)status < 0) SaveErrorCode(s, status);
+    *size = 0;
+    result = 0;
+  }
+  return result;
+}
+
+BROTLI_BOOL BrotliDecoderIsUsed(const BrotliDecoderState* s) {
+  return TO_BROTLI_BOOL(s->state != BROTLI_STATE_UNINITED ||
+      BrotliGetAvailableBits(&s->br) != 0);
+}
+
+BROTLI_BOOL BrotliDecoderIsFinished(const BrotliDecoderState* s) {
+  return TO_BROTLI_BOOL(s->state == BROTLI_STATE_DONE) &&
+      !BrotliDecoderHasMoreOutput(s);
+}
+
+BrotliDecoderErrorCode BrotliDecoderGetErrorCode(const BrotliDecoderState* s) {
+  return (BrotliDecoderErrorCode)s->error_code;
+}
+
+const char* BrotliDecoderErrorString(BrotliDecoderErrorCode c) {
+  switch (c) {
+#define BROTLI_ERROR_CODE_CASE_(PREFIX, NAME, CODE) \
+    case BROTLI_DECODER ## PREFIX ## NAME: return #NAME;
+#define BROTLI_NOTHING_
+    BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE_CASE_, BROTLI_NOTHING_)
+#undef BROTLI_ERROR_CODE_CASE_
+#undef BROTLI_NOTHING_
+    default: return "INVALID";
+  }
+}
+
+uint32_t BrotliDecoderVersion() {
+  return BROTLI_VERSION;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/huffman.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/huffman.c
new file mode 100755
index 0000000000..30c40d33f2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/huffman.c
@@ -0,0 +1,339 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+#include "./huffman.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_REVERSE_BITS_MAX 8
+
+#if defined(BROTLI_RBIT)
+#define BROTLI_REVERSE_BITS_BASE \
+  ((sizeof(brotli_reg_t) << 3) - BROTLI_REVERSE_BITS_MAX)
+#else
+#define BROTLI_REVERSE_BITS_BASE 0
+static uint8_t kReverseBits[1 << BROTLI_REVERSE_BITS_MAX] = {
+  0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
+  0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+  0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+  0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+  0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
+  0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+  0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
+  0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+  0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+  0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+  0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
+  0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+  0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+  0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+  0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+  0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+  0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
+  0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+  0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
+  0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+  0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+  0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+  0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
+  0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+  0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
+  0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+  0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+  0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+  0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
+  0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+  0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
+  0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
+#endif  /* BROTLI_RBIT */
+
+#define BROTLI_REVERSE_BITS_LOWEST \
+  ((brotli_reg_t)1 << (BROTLI_REVERSE_BITS_MAX - 1 + BROTLI_REVERSE_BITS_BASE))
+
+/* Returns reverse(num >> BROTLI_REVERSE_BITS_BASE, BROTLI_REVERSE_BITS_MAX),
+   where reverse(value, len) is the bit-wise reversal of the len least
+   significant bits of value. */
+static BROTLI_INLINE brotli_reg_t BrotliReverseBits(brotli_reg_t num) {
+#if defined(BROTLI_RBIT)
+  return BROTLI_RBIT(num);
+#else
+  return kReverseBits[num];
+#endif
+}
+
+/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
+/* Assumes that end is an integer multiple of step */
+static BROTLI_INLINE void ReplicateValue(HuffmanCode* table,
+                                         int step, int end,
+                                         HuffmanCode code) {
+  do {
+    end -= step;
+    table[end] = code;
+  } while (end > 0);
+}
+
+/* Returns the table width of the next 2nd level table. |count| is the histogram
+   of bit lengths for the remaining symbols, |len| is the code length of the
+   next processed symbol. */
+static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count,
+                                          int len, int root_bits) {
+  int left = 1 << (len - root_bits);
+  while (len < BROTLI_HUFFMAN_MAX_CODE_LENGTH) {
+    left -= count[len];
+    if (left <= 0) break;
+    ++len;
+    left <<= 1;
+  }
+  return len - root_bits;
+}
+
+void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
+                                        const uint8_t* const code_lengths,
+                                        uint16_t* count) {
+  HuffmanCode code;       /* current table entry */
+  int symbol;             /* symbol index in original or sorted table */
+  brotli_reg_t key;       /* prefix code */
+  brotli_reg_t key_step;  /* prefix code addend */
+  int step;               /* step size to replicate values in current table */
+  int table_size;         /* size of current table */
+  int sorted[BROTLI_CODE_LENGTH_CODES];  /* symbols sorted by code length */
+  /* offsets in sorted table for each length */
+  int offset[BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1];
+  int bits;
+  int bits_count;
+  BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <=
+                BROTLI_REVERSE_BITS_MAX);
+
+  /* Generate offsets into sorted symbol table by code length. */
+  symbol = -1;
+  bits = 1;
+  BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, {
+    symbol += count[bits];
+    offset[bits] = symbol;
+    bits++;
+  });
+  /* Symbols with code length 0 are placed after all other symbols. */
+  offset[0] = BROTLI_CODE_LENGTH_CODES - 1;
+
+  /* Sort symbols by length, by symbol order within each length. */
+  symbol = BROTLI_CODE_LENGTH_CODES;
+  do {
+    BROTLI_REPEAT(6, {
+      symbol--;
+      sorted[offset[code_lengths[symbol]]--] = symbol;
+    });
+  } while (symbol != 0);
+
+  table_size = 1 << BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH;
+
+  /* Special case: all symbols but one have 0 code length. */
+  if (offset[0] == 0) {
+    code = ConstructHuffmanCode(0, (uint16_t)sorted[0]);
+    for (key = 0; key < (brotli_reg_t)table_size; ++key) {
+      table[key] = code;
+    }
+    return;
+  }
+
+  /* Fill in table. */
+  key = 0;
+  key_step = BROTLI_REVERSE_BITS_LOWEST;
+  symbol = 0;
+  bits = 1;
+  step = 2;
+  do {
+    for (bits_count = count[bits]; bits_count != 0; --bits_count) {
+      code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)sorted[symbol++]);
+      ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
+      key += key_step;
+    }
+    step <<= 1;
+    key_step >>= 1;
+  } while (++bits <= BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
+}
+
+uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
+                                 int root_bits,
+                                 const uint16_t* const symbol_lists,
+                                 uint16_t* count) {
+  HuffmanCode code;       /* current table entry */
+  HuffmanCode* table;     /* next available space in table */
+  int len;                /* current code length */
+  int symbol;             /* symbol index in original or sorted table */
+  brotli_reg_t key;       /* prefix code */
+  brotli_reg_t key_step;  /* prefix code addend */
+  brotli_reg_t sub_key;   /* 2nd level table prefix code */
+  brotli_reg_t sub_key_step;  /* 2nd level table prefix code addend */
+  int step;               /* step size to replicate values in current table */
+  int table_bits;         /* key length of current table */
+  int table_size;         /* size of current table */
+  int total_size;         /* sum of root table size and 2nd level table sizes */
+  int max_length = -1;
+  int bits;
+  int bits_count;
+
+  BROTLI_DCHECK(root_bits <= BROTLI_REVERSE_BITS_MAX);
+  BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH - root_bits <=
+                BROTLI_REVERSE_BITS_MAX);
+
+  while (symbol_lists[max_length] == 0xFFFF) max_length--;
+  max_length += BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1;
+
+  table = root_table;
+  table_bits = root_bits;
+  table_size = 1 << table_bits;
+  total_size = table_size;
+
+  /* Fill in the root table. Reduce the table size to if possible,
+     and create the repetitions by memcpy. */
+  if (table_bits > max_length) {
+    table_bits = max_length;
+    table_size = 1 << table_bits;
+  }
+  key = 0;
+  key_step = BROTLI_REVERSE_BITS_LOWEST;
+  bits = 1;
+  step = 2;
+  do {
+    symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+    for (bits_count = count[bits]; bits_count != 0; --bits_count) {
+      symbol = symbol_lists[symbol];
+      code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)symbol);
+      ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
+      key += key_step;
+    }
+    step <<= 1;
+    key_step >>= 1;
+  } while (++bits <= table_bits);
+
+  /* If root_bits != table_bits then replicate to fill the remaining slots. */
+  while (total_size != table_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+
+  /* Fill in 2nd level tables and add pointers to root table. */
+  key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1);
+  sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1);
+  sub_key_step = BROTLI_REVERSE_BITS_LOWEST;
+  for (len = root_bits + 1, step = 2; len <= max_length; ++len) {
+    symbol = len - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+    for (; count[len] != 0; --count[len]) {
+      if (sub_key == (BROTLI_REVERSE_BITS_LOWEST << 1U)) {
+        table += table_size;
+        table_bits = NextTableBitSize(count, len, root_bits);
+        table_size = 1 << table_bits;
+        total_size += table_size;
+        sub_key = BrotliReverseBits(key);
+        key += key_step;
+        root_table[sub_key] = ConstructHuffmanCode(
+            (uint8_t)(table_bits + root_bits),
+            (uint16_t)(((size_t)(table - root_table)) - sub_key));
+        sub_key = 0;
+      }
+      symbol = symbol_lists[symbol];
+      code = ConstructHuffmanCode((uint8_t)(len - root_bits), (uint16_t)symbol);
+      ReplicateValue(
+          &table[BrotliReverseBits(sub_key)], step, table_size, code);
+      sub_key += sub_key_step;
+    }
+    step <<= 1;
+    sub_key_step >>= 1;
+  }
+  return (uint32_t)total_size;
+}
+
+uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
+                                       int root_bits,
+                                       uint16_t* val,
+                                       uint32_t num_symbols) {
+  uint32_t table_size = 1;
+  const uint32_t goal_size = 1U << root_bits;
+  switch (num_symbols) {
+    case 0:
+      table[0] = ConstructHuffmanCode(0, val[0]);
+      break;
+    case 1:
+      if (val[1] > val[0]) {
+        table[0] = ConstructHuffmanCode(1, val[0]);
+        table[1] = ConstructHuffmanCode(1, val[1]);
+      } else {
+        table[0] = ConstructHuffmanCode(1, val[1]);
+        table[1] = ConstructHuffmanCode(1, val[0]);
+      }
+      table_size = 2;
+      break;
+    case 2:
+      table[0] = ConstructHuffmanCode(1, val[0]);
+      table[2] = ConstructHuffmanCode(1, val[0]);
+      if (val[2] > val[1]) {
+        table[1] = ConstructHuffmanCode(2, val[1]);
+        table[3] = ConstructHuffmanCode(2, val[2]);
+      } else {
+        table[1] = ConstructHuffmanCode(2, val[2]);
+        table[3] = ConstructHuffmanCode(2, val[1]);
+      }
+      table_size = 4;
+      break;
+    case 3: {
+      int i, k;
+      for (i = 0; i < 3; ++i) {
+        for (k = i + 1; k < 4; ++k) {
+          if (val[k] < val[i]) {
+            uint16_t t = val[k];
+            val[k] = val[i];
+            val[i] = t;
+          }
+        }
+      }
+      table[0] = ConstructHuffmanCode(2, val[0]);
+      table[2] = ConstructHuffmanCode(2, val[1]);
+      table[1] = ConstructHuffmanCode(2, val[2]);
+      table[3] = ConstructHuffmanCode(2, val[3]);
+      table_size = 4;
+      break;
+    }
+    case 4: {
+      if (val[3] < val[2]) {
+        uint16_t t = val[3];
+        val[3] = val[2];
+        val[2] = t;
+      }
+      table[0] = ConstructHuffmanCode(1, val[0]);
+      table[1] = ConstructHuffmanCode(2, val[1]);
+      table[2] = ConstructHuffmanCode(1, val[0]);
+      table[3] = ConstructHuffmanCode(3, val[2]);
+      table[4] = ConstructHuffmanCode(1, val[0]);
+      table[5] = ConstructHuffmanCode(2, val[1]);
+      table[6] = ConstructHuffmanCode(1, val[0]);
+      table[7] = ConstructHuffmanCode(3, val[3]);
+      table_size = 8;
+      break;
+    }
+  }
+  while (table_size != goal_size) {
+    memcpy(&table[table_size], &table[0],
+           (size_t)table_size * sizeof(table[0]));
+    table_size <<= 1;
+  }
+  return goal_size;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/huffman.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/huffman.h
new file mode 100755
index 0000000000..b9f0716c16
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/huffman.h
@@ -0,0 +1,127 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for building Huffman decoding tables. */
+
+#ifndef BROTLI_DEC_HUFFMAN_H_
+#define BROTLI_DEC_HUFFMAN_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15
+
+/* Maximum possible Huffman table size for an alphabet size of (index * 32),
+   max code length 15 and root table bits 8. */
+static const uint16_t kMaxHuffmanTableSize[] = {
+  256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822,
+  854, 886, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240, 1272,
+  1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528};
+/* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */
+#define BROTLI_HUFFMAN_MAX_SIZE_26 396
+/* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */
+#define BROTLI_HUFFMAN_MAX_SIZE_258 632
+/* BROTLI_MAX_CONTEXT_MAP_SYMBOLS == 272 */
+#define BROTLI_HUFFMAN_MAX_SIZE_272 646
+
+#define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5
+
+#if ((defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_32)) && \
+  BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0))
+#define BROTLI_HUFFMAN_CODE_FAST_LOAD
+#endif
+
+#if !defined(BROTLI_HUFFMAN_CODE_FAST_LOAD)
+/* Do not create this struct directly - use the ConstructHuffmanCode
+ * constructor below! */
+typedef struct {
+  uint8_t bits;    /* number of bits used for this symbol */
+  uint16_t value;  /* symbol value or table offset */
+} HuffmanCode;
+
+static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
+    const uint16_t value) {
+  HuffmanCode h;
+  h.bits = bits;
+  h.value = value;
+  return h;
+}
+
+/* Please use the following macros to optimize HuffmanCode accesses in hot
+ * paths.
+ *
+ * For example, assuming |table| contains a HuffmanCode pointer:
+ *
+ *   BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+ *   BROTLI_HC_ADJUST_TABLE_INDEX(table, index_into_table);
+ *   *bits = BROTLI_HC_GET_BITS(table);
+ *   *value = BROTLI_HC_GET_VALUE(table);
+ *   BROTLI_HC_ADJUST_TABLE_INDEX(table, offset);
+ *   *bits2 = BROTLI_HC_GET_BITS(table);
+ *   *value2 = BROTLI_HC_GET_VALUE(table);
+ *
+ */
+
+#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H)
+#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V)
+
+/* These must be given a HuffmanCode pointer! */
+#define BROTLI_HC_FAST_LOAD_BITS(H) (H->bits)
+#define BROTLI_HC_FAST_LOAD_VALUE(H) (H->value)
+
+#else /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
+
+typedef BROTLI_ALIGNED(4) uint32_t HuffmanCode;
+
+static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
+    const uint16_t value) {
+  return ((value & 0xFFFF) << 16) | (bits & 0xFF);
+}
+
+#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H) uint32_t __fastload_##H = (*H)
+#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V); __fastload_##H = (*H)
+
+/* These must be given a HuffmanCode pointer! */
+#define BROTLI_HC_FAST_LOAD_BITS(H) ((__fastload_##H) & 0xFF)
+#define BROTLI_HC_FAST_LOAD_VALUE(H) ((__fastload_##H) >> 16)
+#endif /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order. */
+BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table,
+    const uint8_t* const code_lengths, uint16_t* count);
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order.
+   Returns size of resulting table. */
+BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
+    int root_bits, const uint16_t* const symbol_lists, uint16_t* count_arg);
+
+/* Builds a simple Huffman table. The |num_symbols| parameter is to be
+   interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
+   2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
+   4 means 4 symbols with lengths [1, 2, 3, 3]. */
+BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
+    int root_bits, uint16_t* symbols, uint32_t num_symbols);
+
+/* Contains a collection of Huffman trees with the same alphabet size. */
+/* max_symbol is needed due to simple codes since log2(alphabet_size) could be
+   greater than log2(max_symbol). */
+typedef struct {
+  HuffmanCode** htrees;
+  HuffmanCode* codes;
+  uint16_t alphabet_size;
+  uint16_t max_symbol;
+  uint16_t num_htrees;
+} HuffmanTreeGroup;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_HUFFMAN_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/prefix.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/prefix.h
new file mode 100755
index 0000000000..3ea062d84a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/prefix.h
@@ -0,0 +1,750 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Lookup tables to map prefix codes to value ranges. This is used during
+   decoding of the block lengths, literal insertion lengths and copy lengths. */
+
+#ifndef BROTLI_DEC_PREFIX_H_
+#define BROTLI_DEC_PREFIX_H_
+
+#include "../common/constants.h"
+#include <brotli/types.h>
+
+/* Represents the range of values belonging to a prefix code:
+   [offset, offset + 2^nbits) */
+struct PrefixCodeRange {
+  uint16_t offset;
+  uint8_t nbits;
+};
+
+static const struct PrefixCodeRange
+    kBlockLengthPrefixCode[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
+  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
+  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
+  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
+  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
+  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
+  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12},
+  {8433, 13}, {16625, 24}
+};
+
+typedef struct CmdLutElement {
+  uint8_t insert_len_extra_bits;
+  uint8_t copy_len_extra_bits;
+  int8_t distance_code;
+  uint8_t context;
+  uint16_t insert_len_offset;
+  uint16_t copy_len_offset;
+} CmdLutElement;
+
+static const CmdLutElement kCmdLut[BROTLI_NUM_COMMAND_SYMBOLS] = {
+  { 0x00, 0x00, 0, 0x00, 0x0000, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0000, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0000, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0000, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0001, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0001, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0001, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0001, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0002, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0002, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0002, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0002, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0003, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0003, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0003, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0003, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0004, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0004, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0004, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0004, 0x0009 },
+  { 0x00, 0x00, 0, 0x00, 0x0005, 0x0002 },
+  { 0x00, 0x00, 0, 0x01, 0x0005, 0x0003 },
+  { 0x00, 0x00, 0, 0x02, 0x0005, 0x0004 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0005 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0006 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0007 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0008 },
+  { 0x00, 0x00, 0, 0x03, 0x0005, 0x0009 },
+  { 0x01, 0x00, 0, 0x00, 0x0006, 0x0002 },
+  { 0x01, 0x00, 0, 0x01, 0x0006, 0x0003 },
+  { 0x01, 0x00, 0, 0x02, 0x0006, 0x0004 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0005 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0006 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0007 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0008 },
+  { 0x01, 0x00, 0, 0x03, 0x0006, 0x0009 },
+  { 0x01, 0x00, 0, 0x00, 0x0008, 0x0002 },
+  { 0x01, 0x00, 0, 0x01, 0x0008, 0x0003 },
+  { 0x01, 0x00, 0, 0x02, 0x0008, 0x0004 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0005 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0006 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0007 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0008 },
+  { 0x01, 0x00, 0, 0x03, 0x0008, 0x0009 },
+  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0000, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0000, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0000, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0000, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0000, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0000, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0001, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0001, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0001, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0001, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0001, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0001, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0002, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0002, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0002, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0002, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0002, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0002, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0003, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0003, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0003, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0003, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0003, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0003, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0004, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0004, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0004, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0004, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0004, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0004, 0x0036 },
+  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000a },
+  { 0x00, 0x01, 0, 0x03, 0x0005, 0x000c },
+  { 0x00, 0x02, 0, 0x03, 0x0005, 0x000e },
+  { 0x00, 0x02, 0, 0x03, 0x0005, 0x0012 },
+  { 0x00, 0x03, 0, 0x03, 0x0005, 0x0016 },
+  { 0x00, 0x03, 0, 0x03, 0x0005, 0x001e },
+  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0026 },
+  { 0x00, 0x04, 0, 0x03, 0x0005, 0x0036 },
+  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000a },
+  { 0x01, 0x01, 0, 0x03, 0x0006, 0x000c },
+  { 0x01, 0x02, 0, 0x03, 0x0006, 0x000e },
+  { 0x01, 0x02, 0, 0x03, 0x0006, 0x0012 },
+  { 0x01, 0x03, 0, 0x03, 0x0006, 0x0016 },
+  { 0x01, 0x03, 0, 0x03, 0x0006, 0x001e },
+  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0026 },
+  { 0x01, 0x04, 0, 0x03, 0x0006, 0x0036 },
+  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000a },
+  { 0x01, 0x01, 0, 0x03, 0x0008, 0x000c },
+  { 0x01, 0x02, 0, 0x03, 0x0008, 0x000e },
+  { 0x01, 0x02, 0, 0x03, 0x0008, 0x0012 },
+  { 0x01, 0x03, 0, 0x03, 0x0008, 0x0016 },
+  { 0x01, 0x03, 0, 0x03, 0x0008, 0x001e },
+  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0026 },
+  { 0x01, 0x04, 0, 0x03, 0x0008, 0x0036 },
+  { 0x00, 0x00, -1, 0x00, 0x0000, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0000, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0000, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0000, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0001, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0001, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0001, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0001, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0002, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0002, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0002, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0002, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0003, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0003, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0003, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0003, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0004, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0004, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0004, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0004, 0x0009 },
+  { 0x00, 0x00, -1, 0x00, 0x0005, 0x0002 },
+  { 0x00, 0x00, -1, 0x01, 0x0005, 0x0003 },
+  { 0x00, 0x00, -1, 0x02, 0x0005, 0x0004 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0005 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0006 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0007 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0008 },
+  { 0x00, 0x00, -1, 0x03, 0x0005, 0x0009 },
+  { 0x01, 0x00, -1, 0x00, 0x0006, 0x0002 },
+  { 0x01, 0x00, -1, 0x01, 0x0006, 0x0003 },
+  { 0x01, 0x00, -1, 0x02, 0x0006, 0x0004 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0005 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0006 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0007 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0008 },
+  { 0x01, 0x00, -1, 0x03, 0x0006, 0x0009 },
+  { 0x01, 0x00, -1, 0x00, 0x0008, 0x0002 },
+  { 0x01, 0x00, -1, 0x01, 0x0008, 0x0003 },
+  { 0x01, 0x00, -1, 0x02, 0x0008, 0x0004 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0005 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0006 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0007 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0008 },
+  { 0x01, 0x00, -1, 0x03, 0x0008, 0x0009 },
+  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0000, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0000, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0000, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0000, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0000, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0000, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0001, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0001, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0001, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0001, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0001, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0001, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0002, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0002, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0002, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0002, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0002, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0002, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0003, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0003, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0003, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0003, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0003, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0003, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0004, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0004, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0004, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0004, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0004, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0004, 0x0036 },
+  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000a },
+  { 0x00, 0x01, -1, 0x03, 0x0005, 0x000c },
+  { 0x00, 0x02, -1, 0x03, 0x0005, 0x000e },
+  { 0x00, 0x02, -1, 0x03, 0x0005, 0x0012 },
+  { 0x00, 0x03, -1, 0x03, 0x0005, 0x0016 },
+  { 0x00, 0x03, -1, 0x03, 0x0005, 0x001e },
+  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0026 },
+  { 0x00, 0x04, -1, 0x03, 0x0005, 0x0036 },
+  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000a },
+  { 0x01, 0x01, -1, 0x03, 0x0006, 0x000c },
+  { 0x01, 0x02, -1, 0x03, 0x0006, 0x000e },
+  { 0x01, 0x02, -1, 0x03, 0x0006, 0x0012 },
+  { 0x01, 0x03, -1, 0x03, 0x0006, 0x0016 },
+  { 0x01, 0x03, -1, 0x03, 0x0006, 0x001e },
+  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0026 },
+  { 0x01, 0x04, -1, 0x03, 0x0006, 0x0036 },
+  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000a },
+  { 0x01, 0x01, -1, 0x03, 0x0008, 0x000c },
+  { 0x01, 0x02, -1, 0x03, 0x0008, 0x000e },
+  { 0x01, 0x02, -1, 0x03, 0x0008, 0x0012 },
+  { 0x01, 0x03, -1, 0x03, 0x0008, 0x0016 },
+  { 0x01, 0x03, -1, 0x03, 0x0008, 0x001e },
+  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0026 },
+  { 0x01, 0x04, -1, 0x03, 0x0008, 0x0036 },
+  { 0x02, 0x00, -1, 0x00, 0x000a, 0x0002 },
+  { 0x02, 0x00, -1, 0x01, 0x000a, 0x0003 },
+  { 0x02, 0x00, -1, 0x02, 0x000a, 0x0004 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0005 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0006 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0007 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0008 },
+  { 0x02, 0x00, -1, 0x03, 0x000a, 0x0009 },
+  { 0x02, 0x00, -1, 0x00, 0x000e, 0x0002 },
+  { 0x02, 0x00, -1, 0x01, 0x000e, 0x0003 },
+  { 0x02, 0x00, -1, 0x02, 0x000e, 0x0004 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0005 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0006 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0007 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0008 },
+  { 0x02, 0x00, -1, 0x03, 0x000e, 0x0009 },
+  { 0x03, 0x00, -1, 0x00, 0x0012, 0x0002 },
+  { 0x03, 0x00, -1, 0x01, 0x0012, 0x0003 },
+  { 0x03, 0x00, -1, 0x02, 0x0012, 0x0004 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0005 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0006 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0007 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0008 },
+  { 0x03, 0x00, -1, 0x03, 0x0012, 0x0009 },
+  { 0x03, 0x00, -1, 0x00, 0x001a, 0x0002 },
+  { 0x03, 0x00, -1, 0x01, 0x001a, 0x0003 },
+  { 0x03, 0x00, -1, 0x02, 0x001a, 0x0004 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0005 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0006 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0007 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0008 },
+  { 0x03, 0x00, -1, 0x03, 0x001a, 0x0009 },
+  { 0x04, 0x00, -1, 0x00, 0x0022, 0x0002 },
+  { 0x04, 0x00, -1, 0x01, 0x0022, 0x0003 },
+  { 0x04, 0x00, -1, 0x02, 0x0022, 0x0004 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0005 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0006 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0007 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0008 },
+  { 0x04, 0x00, -1, 0x03, 0x0022, 0x0009 },
+  { 0x04, 0x00, -1, 0x00, 0x0032, 0x0002 },
+  { 0x04, 0x00, -1, 0x01, 0x0032, 0x0003 },
+  { 0x04, 0x00, -1, 0x02, 0x0032, 0x0004 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0005 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0006 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0007 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0008 },
+  { 0x04, 0x00, -1, 0x03, 0x0032, 0x0009 },
+  { 0x05, 0x00, -1, 0x00, 0x0042, 0x0002 },
+  { 0x05, 0x00, -1, 0x01, 0x0042, 0x0003 },
+  { 0x05, 0x00, -1, 0x02, 0x0042, 0x0004 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0005 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0006 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0007 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0008 },
+  { 0x05, 0x00, -1, 0x03, 0x0042, 0x0009 },
+  { 0x05, 0x00, -1, 0x00, 0x0062, 0x0002 },
+  { 0x05, 0x00, -1, 0x01, 0x0062, 0x0003 },
+  { 0x05, 0x00, -1, 0x02, 0x0062, 0x0004 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0005 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0006 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0007 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0008 },
+  { 0x05, 0x00, -1, 0x03, 0x0062, 0x0009 },
+  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000a },
+  { 0x02, 0x01, -1, 0x03, 0x000a, 0x000c },
+  { 0x02, 0x02, -1, 0x03, 0x000a, 0x000e },
+  { 0x02, 0x02, -1, 0x03, 0x000a, 0x0012 },
+  { 0x02, 0x03, -1, 0x03, 0x000a, 0x0016 },
+  { 0x02, 0x03, -1, 0x03, 0x000a, 0x001e },
+  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0026 },
+  { 0x02, 0x04, -1, 0x03, 0x000a, 0x0036 },
+  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000a },
+  { 0x02, 0x01, -1, 0x03, 0x000e, 0x000c },
+  { 0x02, 0x02, -1, 0x03, 0x000e, 0x000e },
+  { 0x02, 0x02, -1, 0x03, 0x000e, 0x0012 },
+  { 0x02, 0x03, -1, 0x03, 0x000e, 0x0016 },
+  { 0x02, 0x03, -1, 0x03, 0x000e, 0x001e },
+  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0026 },
+  { 0x02, 0x04, -1, 0x03, 0x000e, 0x0036 },
+  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000a },
+  { 0x03, 0x01, -1, 0x03, 0x0012, 0x000c },
+  { 0x03, 0x02, -1, 0x03, 0x0012, 0x000e },
+  { 0x03, 0x02, -1, 0x03, 0x0012, 0x0012 },
+  { 0x03, 0x03, -1, 0x03, 0x0012, 0x0016 },
+  { 0x03, 0x03, -1, 0x03, 0x0012, 0x001e },
+  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0026 },
+  { 0x03, 0x04, -1, 0x03, 0x0012, 0x0036 },
+  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000a },
+  { 0x03, 0x01, -1, 0x03, 0x001a, 0x000c },
+  { 0x03, 0x02, -1, 0x03, 0x001a, 0x000e },
+  { 0x03, 0x02, -1, 0x03, 0x001a, 0x0012 },
+  { 0x03, 0x03, -1, 0x03, 0x001a, 0x0016 },
+  { 0x03, 0x03, -1, 0x03, 0x001a, 0x001e },
+  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0026 },
+  { 0x03, 0x04, -1, 0x03, 0x001a, 0x0036 },
+  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000a },
+  { 0x04, 0x01, -1, 0x03, 0x0022, 0x000c },
+  { 0x04, 0x02, -1, 0x03, 0x0022, 0x000e },
+  { 0x04, 0x02, -1, 0x03, 0x0022, 0x0012 },
+  { 0x04, 0x03, -1, 0x03, 0x0022, 0x0016 },
+  { 0x04, 0x03, -1, 0x03, 0x0022, 0x001e },
+  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0026 },
+  { 0x04, 0x04, -1, 0x03, 0x0022, 0x0036 },
+  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000a },
+  { 0x04, 0x01, -1, 0x03, 0x0032, 0x000c },
+  { 0x04, 0x02, -1, 0x03, 0x0032, 0x000e },
+  { 0x04, 0x02, -1, 0x03, 0x0032, 0x0012 },
+  { 0x04, 0x03, -1, 0x03, 0x0032, 0x0016 },
+  { 0x04, 0x03, -1, 0x03, 0x0032, 0x001e },
+  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0026 },
+  { 0x04, 0x04, -1, 0x03, 0x0032, 0x0036 },
+  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000a },
+  { 0x05, 0x01, -1, 0x03, 0x0042, 0x000c },
+  { 0x05, 0x02, -1, 0x03, 0x0042, 0x000e },
+  { 0x05, 0x02, -1, 0x03, 0x0042, 0x0012 },
+  { 0x05, 0x03, -1, 0x03, 0x0042, 0x0016 },
+  { 0x05, 0x03, -1, 0x03, 0x0042, 0x001e },
+  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0026 },
+  { 0x05, 0x04, -1, 0x03, 0x0042, 0x0036 },
+  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000a },
+  { 0x05, 0x01, -1, 0x03, 0x0062, 0x000c },
+  { 0x05, 0x02, -1, 0x03, 0x0062, 0x000e },
+  { 0x05, 0x02, -1, 0x03, 0x0062, 0x0012 },
+  { 0x05, 0x03, -1, 0x03, 0x0062, 0x0016 },
+  { 0x05, 0x03, -1, 0x03, 0x0062, 0x001e },
+  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0026 },
+  { 0x05, 0x04, -1, 0x03, 0x0062, 0x0036 },
+  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0000, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0000, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0000, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0000, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0000, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0000, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0000, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0001, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0001, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0001, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0001, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0001, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0001, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0001, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0002, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0002, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0002, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0002, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0002, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0002, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0002, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0003, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0003, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0003, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0003, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0003, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0003, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0003, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0004, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0004, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0004, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0004, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0004, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0004, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0004, 0x0846 },
+  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0046 },
+  { 0x00, 0x05, -1, 0x03, 0x0005, 0x0066 },
+  { 0x00, 0x06, -1, 0x03, 0x0005, 0x0086 },
+  { 0x00, 0x07, -1, 0x03, 0x0005, 0x00c6 },
+  { 0x00, 0x08, -1, 0x03, 0x0005, 0x0146 },
+  { 0x00, 0x09, -1, 0x03, 0x0005, 0x0246 },
+  { 0x00, 0x0a, -1, 0x03, 0x0005, 0x0446 },
+  { 0x00, 0x18, -1, 0x03, 0x0005, 0x0846 },
+  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0046 },
+  { 0x01, 0x05, -1, 0x03, 0x0006, 0x0066 },
+  { 0x01, 0x06, -1, 0x03, 0x0006, 0x0086 },
+  { 0x01, 0x07, -1, 0x03, 0x0006, 0x00c6 },
+  { 0x01, 0x08, -1, 0x03, 0x0006, 0x0146 },
+  { 0x01, 0x09, -1, 0x03, 0x0006, 0x0246 },
+  { 0x01, 0x0a, -1, 0x03, 0x0006, 0x0446 },
+  { 0x01, 0x18, -1, 0x03, 0x0006, 0x0846 },
+  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0046 },
+  { 0x01, 0x05, -1, 0x03, 0x0008, 0x0066 },
+  { 0x01, 0x06, -1, 0x03, 0x0008, 0x0086 },
+  { 0x01, 0x07, -1, 0x03, 0x0008, 0x00c6 },
+  { 0x01, 0x08, -1, 0x03, 0x0008, 0x0146 },
+  { 0x01, 0x09, -1, 0x03, 0x0008, 0x0246 },
+  { 0x01, 0x0a, -1, 0x03, 0x0008, 0x0446 },
+  { 0x01, 0x18, -1, 0x03, 0x0008, 0x0846 },
+  { 0x06, 0x00, -1, 0x00, 0x0082, 0x0002 },
+  { 0x06, 0x00, -1, 0x01, 0x0082, 0x0003 },
+  { 0x06, 0x00, -1, 0x02, 0x0082, 0x0004 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0005 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0006 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0007 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0008 },
+  { 0x06, 0x00, -1, 0x03, 0x0082, 0x0009 },
+  { 0x07, 0x00, -1, 0x00, 0x00c2, 0x0002 },
+  { 0x07, 0x00, -1, 0x01, 0x00c2, 0x0003 },
+  { 0x07, 0x00, -1, 0x02, 0x00c2, 0x0004 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0005 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0006 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0007 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0008 },
+  { 0x07, 0x00, -1, 0x03, 0x00c2, 0x0009 },
+  { 0x08, 0x00, -1, 0x00, 0x0142, 0x0002 },
+  { 0x08, 0x00, -1, 0x01, 0x0142, 0x0003 },
+  { 0x08, 0x00, -1, 0x02, 0x0142, 0x0004 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0005 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0006 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0007 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0008 },
+  { 0x08, 0x00, -1, 0x03, 0x0142, 0x0009 },
+  { 0x09, 0x00, -1, 0x00, 0x0242, 0x0002 },
+  { 0x09, 0x00, -1, 0x01, 0x0242, 0x0003 },
+  { 0x09, 0x00, -1, 0x02, 0x0242, 0x0004 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0005 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0006 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0007 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0008 },
+  { 0x09, 0x00, -1, 0x03, 0x0242, 0x0009 },
+  { 0x0a, 0x00, -1, 0x00, 0x0442, 0x0002 },
+  { 0x0a, 0x00, -1, 0x01, 0x0442, 0x0003 },
+  { 0x0a, 0x00, -1, 0x02, 0x0442, 0x0004 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0005 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0006 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0007 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0008 },
+  { 0x0a, 0x00, -1, 0x03, 0x0442, 0x0009 },
+  { 0x0c, 0x00, -1, 0x00, 0x0842, 0x0002 },
+  { 0x0c, 0x00, -1, 0x01, 0x0842, 0x0003 },
+  { 0x0c, 0x00, -1, 0x02, 0x0842, 0x0004 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0005 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0006 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0007 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0008 },
+  { 0x0c, 0x00, -1, 0x03, 0x0842, 0x0009 },
+  { 0x0e, 0x00, -1, 0x00, 0x1842, 0x0002 },
+  { 0x0e, 0x00, -1, 0x01, 0x1842, 0x0003 },
+  { 0x0e, 0x00, -1, 0x02, 0x1842, 0x0004 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0005 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0006 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0007 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0008 },
+  { 0x0e, 0x00, -1, 0x03, 0x1842, 0x0009 },
+  { 0x18, 0x00, -1, 0x00, 0x5842, 0x0002 },
+  { 0x18, 0x00, -1, 0x01, 0x5842, 0x0003 },
+  { 0x18, 0x00, -1, 0x02, 0x5842, 0x0004 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0005 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0006 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0007 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0008 },
+  { 0x18, 0x00, -1, 0x03, 0x5842, 0x0009 },
+  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0046 },
+  { 0x02, 0x05, -1, 0x03, 0x000a, 0x0066 },
+  { 0x02, 0x06, -1, 0x03, 0x000a, 0x0086 },
+  { 0x02, 0x07, -1, 0x03, 0x000a, 0x00c6 },
+  { 0x02, 0x08, -1, 0x03, 0x000a, 0x0146 },
+  { 0x02, 0x09, -1, 0x03, 0x000a, 0x0246 },
+  { 0x02, 0x0a, -1, 0x03, 0x000a, 0x0446 },
+  { 0x02, 0x18, -1, 0x03, 0x000a, 0x0846 },
+  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0046 },
+  { 0x02, 0x05, -1, 0x03, 0x000e, 0x0066 },
+  { 0x02, 0x06, -1, 0x03, 0x000e, 0x0086 },
+  { 0x02, 0x07, -1, 0x03, 0x000e, 0x00c6 },
+  { 0x02, 0x08, -1, 0x03, 0x000e, 0x0146 },
+  { 0x02, 0x09, -1, 0x03, 0x000e, 0x0246 },
+  { 0x02, 0x0a, -1, 0x03, 0x000e, 0x0446 },
+  { 0x02, 0x18, -1, 0x03, 0x000e, 0x0846 },
+  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0046 },
+  { 0x03, 0x05, -1, 0x03, 0x0012, 0x0066 },
+  { 0x03, 0x06, -1, 0x03, 0x0012, 0x0086 },
+  { 0x03, 0x07, -1, 0x03, 0x0012, 0x00c6 },
+  { 0x03, 0x08, -1, 0x03, 0x0012, 0x0146 },
+  { 0x03, 0x09, -1, 0x03, 0x0012, 0x0246 },
+  { 0x03, 0x0a, -1, 0x03, 0x0012, 0x0446 },
+  { 0x03, 0x18, -1, 0x03, 0x0012, 0x0846 },
+  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0046 },
+  { 0x03, 0x05, -1, 0x03, 0x001a, 0x0066 },
+  { 0x03, 0x06, -1, 0x03, 0x001a, 0x0086 },
+  { 0x03, 0x07, -1, 0x03, 0x001a, 0x00c6 },
+  { 0x03, 0x08, -1, 0x03, 0x001a, 0x0146 },
+  { 0x03, 0x09, -1, 0x03, 0x001a, 0x0246 },
+  { 0x03, 0x0a, -1, 0x03, 0x001a, 0x0446 },
+  { 0x03, 0x18, -1, 0x03, 0x001a, 0x0846 },
+  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0046 },
+  { 0x04, 0x05, -1, 0x03, 0x0022, 0x0066 },
+  { 0x04, 0x06, -1, 0x03, 0x0022, 0x0086 },
+  { 0x04, 0x07, -1, 0x03, 0x0022, 0x00c6 },
+  { 0x04, 0x08, -1, 0x03, 0x0022, 0x0146 },
+  { 0x04, 0x09, -1, 0x03, 0x0022, 0x0246 },
+  { 0x04, 0x0a, -1, 0x03, 0x0022, 0x0446 },
+  { 0x04, 0x18, -1, 0x03, 0x0022, 0x0846 },
+  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0046 },
+  { 0x04, 0x05, -1, 0x03, 0x0032, 0x0066 },
+  { 0x04, 0x06, -1, 0x03, 0x0032, 0x0086 },
+  { 0x04, 0x07, -1, 0x03, 0x0032, 0x00c6 },
+  { 0x04, 0x08, -1, 0x03, 0x0032, 0x0146 },
+  { 0x04, 0x09, -1, 0x03, 0x0032, 0x0246 },
+  { 0x04, 0x0a, -1, 0x03, 0x0032, 0x0446 },
+  { 0x04, 0x18, -1, 0x03, 0x0032, 0x0846 },
+  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0046 },
+  { 0x05, 0x05, -1, 0x03, 0x0042, 0x0066 },
+  { 0x05, 0x06, -1, 0x03, 0x0042, 0x0086 },
+  { 0x05, 0x07, -1, 0x03, 0x0042, 0x00c6 },
+  { 0x05, 0x08, -1, 0x03, 0x0042, 0x0146 },
+  { 0x05, 0x09, -1, 0x03, 0x0042, 0x0246 },
+  { 0x05, 0x0a, -1, 0x03, 0x0042, 0x0446 },
+  { 0x05, 0x18, -1, 0x03, 0x0042, 0x0846 },
+  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0046 },
+  { 0x05, 0x05, -1, 0x03, 0x0062, 0x0066 },
+  { 0x05, 0x06, -1, 0x03, 0x0062, 0x0086 },
+  { 0x05, 0x07, -1, 0x03, 0x0062, 0x00c6 },
+  { 0x05, 0x08, -1, 0x03, 0x0062, 0x0146 },
+  { 0x05, 0x09, -1, 0x03, 0x0062, 0x0246 },
+  { 0x05, 0x0a, -1, 0x03, 0x0062, 0x0446 },
+  { 0x05, 0x18, -1, 0x03, 0x0062, 0x0846 },
+  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000a },
+  { 0x06, 0x01, -1, 0x03, 0x0082, 0x000c },
+  { 0x06, 0x02, -1, 0x03, 0x0082, 0x000e },
+  { 0x06, 0x02, -1, 0x03, 0x0082, 0x0012 },
+  { 0x06, 0x03, -1, 0x03, 0x0082, 0x0016 },
+  { 0x06, 0x03, -1, 0x03, 0x0082, 0x001e },
+  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0026 },
+  { 0x06, 0x04, -1, 0x03, 0x0082, 0x0036 },
+  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000a },
+  { 0x07, 0x01, -1, 0x03, 0x00c2, 0x000c },
+  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x000e },
+  { 0x07, 0x02, -1, 0x03, 0x00c2, 0x0012 },
+  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x0016 },
+  { 0x07, 0x03, -1, 0x03, 0x00c2, 0x001e },
+  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0026 },
+  { 0x07, 0x04, -1, 0x03, 0x00c2, 0x0036 },
+  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000a },
+  { 0x08, 0x01, -1, 0x03, 0x0142, 0x000c },
+  { 0x08, 0x02, -1, 0x03, 0x0142, 0x000e },
+  { 0x08, 0x02, -1, 0x03, 0x0142, 0x0012 },
+  { 0x08, 0x03, -1, 0x03, 0x0142, 0x0016 },
+  { 0x08, 0x03, -1, 0x03, 0x0142, 0x001e },
+  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0026 },
+  { 0x08, 0x04, -1, 0x03, 0x0142, 0x0036 },
+  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000a },
+  { 0x09, 0x01, -1, 0x03, 0x0242, 0x000c },
+  { 0x09, 0x02, -1, 0x03, 0x0242, 0x000e },
+  { 0x09, 0x02, -1, 0x03, 0x0242, 0x0012 },
+  { 0x09, 0x03, -1, 0x03, 0x0242, 0x0016 },
+  { 0x09, 0x03, -1, 0x03, 0x0242, 0x001e },
+  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0026 },
+  { 0x09, 0x04, -1, 0x03, 0x0242, 0x0036 },
+  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000a },
+  { 0x0a, 0x01, -1, 0x03, 0x0442, 0x000c },
+  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x000e },
+  { 0x0a, 0x02, -1, 0x03, 0x0442, 0x0012 },
+  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x0016 },
+  { 0x0a, 0x03, -1, 0x03, 0x0442, 0x001e },
+  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0026 },
+  { 0x0a, 0x04, -1, 0x03, 0x0442, 0x0036 },
+  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000a },
+  { 0x0c, 0x01, -1, 0x03, 0x0842, 0x000c },
+  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x000e },
+  { 0x0c, 0x02, -1, 0x03, 0x0842, 0x0012 },
+  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x0016 },
+  { 0x0c, 0x03, -1, 0x03, 0x0842, 0x001e },
+  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0026 },
+  { 0x0c, 0x04, -1, 0x03, 0x0842, 0x0036 },
+  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000a },
+  { 0x0e, 0x01, -1, 0x03, 0x1842, 0x000c },
+  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x000e },
+  { 0x0e, 0x02, -1, 0x03, 0x1842, 0x0012 },
+  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x0016 },
+  { 0x0e, 0x03, -1, 0x03, 0x1842, 0x001e },
+  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0026 },
+  { 0x0e, 0x04, -1, 0x03, 0x1842, 0x0036 },
+  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000a },
+  { 0x18, 0x01, -1, 0x03, 0x5842, 0x000c },
+  { 0x18, 0x02, -1, 0x03, 0x5842, 0x000e },
+  { 0x18, 0x02, -1, 0x03, 0x5842, 0x0012 },
+  { 0x18, 0x03, -1, 0x03, 0x5842, 0x0016 },
+  { 0x18, 0x03, -1, 0x03, 0x5842, 0x001e },
+  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0026 },
+  { 0x18, 0x04, -1, 0x03, 0x5842, 0x0036 },
+  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0046 },
+  { 0x06, 0x05, -1, 0x03, 0x0082, 0x0066 },
+  { 0x06, 0x06, -1, 0x03, 0x0082, 0x0086 },
+  { 0x06, 0x07, -1, 0x03, 0x0082, 0x00c6 },
+  { 0x06, 0x08, -1, 0x03, 0x0082, 0x0146 },
+  { 0x06, 0x09, -1, 0x03, 0x0082, 0x0246 },
+  { 0x06, 0x0a, -1, 0x03, 0x0082, 0x0446 },
+  { 0x06, 0x18, -1, 0x03, 0x0082, 0x0846 },
+  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0046 },
+  { 0x07, 0x05, -1, 0x03, 0x00c2, 0x0066 },
+  { 0x07, 0x06, -1, 0x03, 0x00c2, 0x0086 },
+  { 0x07, 0x07, -1, 0x03, 0x00c2, 0x00c6 },
+  { 0x07, 0x08, -1, 0x03, 0x00c2, 0x0146 },
+  { 0x07, 0x09, -1, 0x03, 0x00c2, 0x0246 },
+  { 0x07, 0x0a, -1, 0x03, 0x00c2, 0x0446 },
+  { 0x07, 0x18, -1, 0x03, 0x00c2, 0x0846 },
+  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0046 },
+  { 0x08, 0x05, -1, 0x03, 0x0142, 0x0066 },
+  { 0x08, 0x06, -1, 0x03, 0x0142, 0x0086 },
+  { 0x08, 0x07, -1, 0x03, 0x0142, 0x00c6 },
+  { 0x08, 0x08, -1, 0x03, 0x0142, 0x0146 },
+  { 0x08, 0x09, -1, 0x03, 0x0142, 0x0246 },
+  { 0x08, 0x0a, -1, 0x03, 0x0142, 0x0446 },
+  { 0x08, 0x18, -1, 0x03, 0x0142, 0x0846 },
+  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0046 },
+  { 0x09, 0x05, -1, 0x03, 0x0242, 0x0066 },
+  { 0x09, 0x06, -1, 0x03, 0x0242, 0x0086 },
+  { 0x09, 0x07, -1, 0x03, 0x0242, 0x00c6 },
+  { 0x09, 0x08, -1, 0x03, 0x0242, 0x0146 },
+  { 0x09, 0x09, -1, 0x03, 0x0242, 0x0246 },
+  { 0x09, 0x0a, -1, 0x03, 0x0242, 0x0446 },
+  { 0x09, 0x18, -1, 0x03, 0x0242, 0x0846 },
+  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0046 },
+  { 0x0a, 0x05, -1, 0x03, 0x0442, 0x0066 },
+  { 0x0a, 0x06, -1, 0x03, 0x0442, 0x0086 },
+  { 0x0a, 0x07, -1, 0x03, 0x0442, 0x00c6 },
+  { 0x0a, 0x08, -1, 0x03, 0x0442, 0x0146 },
+  { 0x0a, 0x09, -1, 0x03, 0x0442, 0x0246 },
+  { 0x0a, 0x0a, -1, 0x03, 0x0442, 0x0446 },
+  { 0x0a, 0x18, -1, 0x03, 0x0442, 0x0846 },
+  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0046 },
+  { 0x0c, 0x05, -1, 0x03, 0x0842, 0x0066 },
+  { 0x0c, 0x06, -1, 0x03, 0x0842, 0x0086 },
+  { 0x0c, 0x07, -1, 0x03, 0x0842, 0x00c6 },
+  { 0x0c, 0x08, -1, 0x03, 0x0842, 0x0146 },
+  { 0x0c, 0x09, -1, 0x03, 0x0842, 0x0246 },
+  { 0x0c, 0x0a, -1, 0x03, 0x0842, 0x0446 },
+  { 0x0c, 0x18, -1, 0x03, 0x0842, 0x0846 },
+  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0046 },
+  { 0x0e, 0x05, -1, 0x03, 0x1842, 0x0066 },
+  { 0x0e, 0x06, -1, 0x03, 0x1842, 0x0086 },
+  { 0x0e, 0x07, -1, 0x03, 0x1842, 0x00c6 },
+  { 0x0e, 0x08, -1, 0x03, 0x1842, 0x0146 },
+  { 0x0e, 0x09, -1, 0x03, 0x1842, 0x0246 },
+  { 0x0e, 0x0a, -1, 0x03, 0x1842, 0x0446 },
+  { 0x0e, 0x18, -1, 0x03, 0x1842, 0x0846 },
+  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0046 },
+  { 0x18, 0x05, -1, 0x03, 0x5842, 0x0066 },
+  { 0x18, 0x06, -1, 0x03, 0x5842, 0x0086 },
+  { 0x18, 0x07, -1, 0x03, 0x5842, 0x00c6 },
+  { 0x18, 0x08, -1, 0x03, 0x5842, 0x0146 },
+  { 0x18, 0x09, -1, 0x03, 0x5842, 0x0246 },
+  { 0x18, 0x0a, -1, 0x03, 0x5842, 0x0446 },
+  { 0x18, 0x18, -1, 0x03, 0x5842, 0x0846 },
+};
+
+#endif  /* BROTLI_DEC_PREFIX_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/state.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/state.c
new file mode 100755
index 0000000000..e0b37c2dcd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/state.c
@@ -0,0 +1,164 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./state.h"
+
+#include <stdlib.h>  /* free, malloc */
+
+#include <brotli/types.h>
+#include "./huffman.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  if (!alloc_func) {
+    s->alloc_func = BrotliDefaultAllocFunc;
+    s->free_func = BrotliDefaultFreeFunc;
+    s->memory_manager_opaque = 0;
+  } else {
+    s->alloc_func = alloc_func;
+    s->free_func = free_func;
+    s->memory_manager_opaque = opaque;
+  }
+
+  s->error_code = 0; /* BROTLI_DECODER_NO_ERROR */
+
+  BrotliInitBitReader(&s->br);
+  s->state = BROTLI_STATE_UNINITED;
+  s->large_window = 0;
+  s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+  s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
+  s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
+  s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
+  s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+  s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+  s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+
+  s->buffer_length = 0;
+  s->loop_counter = 0;
+  s->pos = 0;
+  s->rb_roundtrips = 0;
+  s->partial_pos_out = 0;
+
+  s->block_type_trees = NULL;
+  s->block_len_trees = NULL;
+  s->ringbuffer = NULL;
+  s->ringbuffer_size = 0;
+  s->new_ringbuffer_size = 0;
+  s->ringbuffer_mask = 0;
+
+  s->context_map = NULL;
+  s->context_modes = NULL;
+  s->dist_context_map = NULL;
+  s->context_map_slice = NULL;
+  s->dist_context_map_slice = NULL;
+
+  s->sub_loop_counter = 0;
+
+  s->literal_hgroup.codes = NULL;
+  s->literal_hgroup.htrees = NULL;
+  s->insert_copy_hgroup.codes = NULL;
+  s->insert_copy_hgroup.htrees = NULL;
+  s->distance_hgroup.codes = NULL;
+  s->distance_hgroup.htrees = NULL;
+
+  s->is_last_metablock = 0;
+  s->is_uncompressed = 0;
+  s->is_metadata = 0;
+  s->should_wrap_ringbuffer = 0;
+  s->canny_ringbuffer_allocation = 1;
+
+  s->window_bits = 0;
+  s->max_distance = 0;
+  s->dist_rb[0] = 16;
+  s->dist_rb[1] = 15;
+  s->dist_rb[2] = 11;
+  s->dist_rb[3] = 4;
+  s->dist_rb_idx = 0;
+  s->block_type_trees = NULL;
+  s->block_len_trees = NULL;
+
+  /* Make small negative indexes addressable. */
+  s->symbol_lists = &s->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1];
+
+  s->mtf_upper_bound = 63;
+
+  s->dictionary = BrotliGetDictionary();
+  s->transforms = BrotliGetTransforms();
+
+  return BROTLI_TRUE;
+}
+
+void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) {
+  s->meta_block_remaining_len = 0;
+  s->block_length[0] = 1U << 24;
+  s->block_length[1] = 1U << 24;
+  s->block_length[2] = 1U << 24;
+  s->num_block_types[0] = 1;
+  s->num_block_types[1] = 1;
+  s->num_block_types[2] = 1;
+  s->block_type_rb[0] = 1;
+  s->block_type_rb[1] = 0;
+  s->block_type_rb[2] = 1;
+  s->block_type_rb[3] = 0;
+  s->block_type_rb[4] = 1;
+  s->block_type_rb[5] = 0;
+  s->context_map = NULL;
+  s->context_modes = NULL;
+  s->dist_context_map = NULL;
+  s->context_map_slice = NULL;
+  s->literal_htree = NULL;
+  s->dist_context_map_slice = NULL;
+  s->dist_htree_index = 0;
+  s->context_lookup = NULL;
+  s->literal_hgroup.codes = NULL;
+  s->literal_hgroup.htrees = NULL;
+  s->insert_copy_hgroup.codes = NULL;
+  s->insert_copy_hgroup.htrees = NULL;
+  s->distance_hgroup.codes = NULL;
+  s->distance_hgroup.htrees = NULL;
+}
+
+void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) {
+  BROTLI_DECODER_FREE(s, s->context_modes);
+  BROTLI_DECODER_FREE(s, s->context_map);
+  BROTLI_DECODER_FREE(s, s->dist_context_map);
+  BROTLI_DECODER_FREE(s, s->literal_hgroup.htrees);
+  BROTLI_DECODER_FREE(s, s->insert_copy_hgroup.htrees);
+  BROTLI_DECODER_FREE(s, s->distance_hgroup.htrees);
+}
+
+void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
+  BrotliDecoderStateCleanupAfterMetablock(s);
+
+  BROTLI_DECODER_FREE(s, s->ringbuffer);
+  BROTLI_DECODER_FREE(s, s->block_type_trees);
+}
+
+BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
+    HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t max_symbol,
+    uint32_t ntrees) {
+  /* Pack two allocations into one */
+  const size_t max_table_size = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5];
+  const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size;
+  const size_t htree_size = sizeof(HuffmanCode*) * ntrees;
+  /* Pointer alignment is, hopefully, wider than sizeof(HuffmanCode). */
+  HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s,
+      code_size + htree_size);
+  group->alphabet_size = (uint16_t)alphabet_size;
+  group->max_symbol = (uint16_t)max_symbol;
+  group->num_htrees = (uint16_t)ntrees;
+  group->htrees = p;
+  group->codes = (HuffmanCode*)(&p[ntrees]);
+  return !!p;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/state.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/state.h
new file mode 100755
index 0000000000..d28b63920e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/dec/state.h
@@ -0,0 +1,258 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Brotli state for partial streaming decoding. */
+
+#ifndef BROTLI_DEC_STATE_H_
+#define BROTLI_DEC_STATE_H_
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/transform.h"
+#include <brotli/types.h>
+#include "./bit_reader.h"
+#include "./huffman.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef enum {
+  BROTLI_STATE_UNINITED,
+  BROTLI_STATE_LARGE_WINDOW_BITS,
+  BROTLI_STATE_INITIALIZE,
+  BROTLI_STATE_METABLOCK_BEGIN,
+  BROTLI_STATE_METABLOCK_HEADER,
+  BROTLI_STATE_METABLOCK_HEADER_2,
+  BROTLI_STATE_CONTEXT_MODES,
+  BROTLI_STATE_COMMAND_BEGIN,
+  BROTLI_STATE_COMMAND_INNER,
+  BROTLI_STATE_COMMAND_POST_DECODE_LITERALS,
+  BROTLI_STATE_COMMAND_POST_WRAP_COPY,
+  BROTLI_STATE_UNCOMPRESSED,
+  BROTLI_STATE_METADATA,
+  BROTLI_STATE_COMMAND_INNER_WRITE,
+  BROTLI_STATE_METABLOCK_DONE,
+  BROTLI_STATE_COMMAND_POST_WRITE_1,
+  BROTLI_STATE_COMMAND_POST_WRITE_2,
+  BROTLI_STATE_HUFFMAN_CODE_0,
+  BROTLI_STATE_HUFFMAN_CODE_1,
+  BROTLI_STATE_HUFFMAN_CODE_2,
+  BROTLI_STATE_HUFFMAN_CODE_3,
+  BROTLI_STATE_CONTEXT_MAP_1,
+  BROTLI_STATE_CONTEXT_MAP_2,
+  BROTLI_STATE_TREE_GROUP,
+  BROTLI_STATE_DONE
+} BrotliRunningState;
+
+typedef enum {
+  BROTLI_STATE_METABLOCK_HEADER_NONE,
+  BROTLI_STATE_METABLOCK_HEADER_EMPTY,
+  BROTLI_STATE_METABLOCK_HEADER_NIBBLES,
+  BROTLI_STATE_METABLOCK_HEADER_SIZE,
+  BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED,
+  BROTLI_STATE_METABLOCK_HEADER_RESERVED,
+  BROTLI_STATE_METABLOCK_HEADER_BYTES,
+  BROTLI_STATE_METABLOCK_HEADER_METADATA
+} BrotliRunningMetablockHeaderState;
+
+typedef enum {
+  BROTLI_STATE_UNCOMPRESSED_NONE,
+  BROTLI_STATE_UNCOMPRESSED_WRITE
+} BrotliRunningUncompressedState;
+
+typedef enum {
+  BROTLI_STATE_TREE_GROUP_NONE,
+  BROTLI_STATE_TREE_GROUP_LOOP
+} BrotliRunningTreeGroupState;
+
+typedef enum {
+  BROTLI_STATE_CONTEXT_MAP_NONE,
+  BROTLI_STATE_CONTEXT_MAP_READ_PREFIX,
+  BROTLI_STATE_CONTEXT_MAP_HUFFMAN,
+  BROTLI_STATE_CONTEXT_MAP_DECODE,
+  BROTLI_STATE_CONTEXT_MAP_TRANSFORM
+} BrotliRunningContextMapState;
+
+typedef enum {
+  BROTLI_STATE_HUFFMAN_NONE,
+  BROTLI_STATE_HUFFMAN_SIMPLE_SIZE,
+  BROTLI_STATE_HUFFMAN_SIMPLE_READ,
+  BROTLI_STATE_HUFFMAN_SIMPLE_BUILD,
+  BROTLI_STATE_HUFFMAN_COMPLEX,
+  BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS
+} BrotliRunningHuffmanState;
+
+typedef enum {
+  BROTLI_STATE_DECODE_UINT8_NONE,
+  BROTLI_STATE_DECODE_UINT8_SHORT,
+  BROTLI_STATE_DECODE_UINT8_LONG
+} BrotliRunningDecodeUint8State;
+
+typedef enum {
+  BROTLI_STATE_READ_BLOCK_LENGTH_NONE,
+  BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
+} BrotliRunningReadBlockLengthState;
+
+struct BrotliDecoderStateStruct {
+  BrotliRunningState state;
+
+  /* This counter is reused for several disjoint loops. */
+  int loop_counter;
+
+  BrotliBitReader br;
+
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* memory_manager_opaque;
+
+  /* Temporary storage for remaining input. */
+  union {
+    uint64_t u64;
+    uint8_t u8[8];
+  } buffer;
+  uint32_t buffer_length;
+
+  int pos;
+  int max_backward_distance;
+  int max_distance;
+  int ringbuffer_size;
+  int ringbuffer_mask;
+  int dist_rb_idx;
+  int dist_rb[4];
+  int error_code;
+  uint32_t sub_loop_counter;
+  uint8_t* ringbuffer;
+  uint8_t* ringbuffer_end;
+  HuffmanCode* htree_command;
+  const uint8_t* context_lookup;
+  uint8_t* context_map_slice;
+  uint8_t* dist_context_map_slice;
+
+  /* This ring buffer holds a few past copy distances that will be used by
+     some special distance codes. */
+  HuffmanTreeGroup literal_hgroup;
+  HuffmanTreeGroup insert_copy_hgroup;
+  HuffmanTreeGroup distance_hgroup;
+  HuffmanCode* block_type_trees;
+  HuffmanCode* block_len_trees;
+  /* This is true if the literal context map histogram type always matches the
+     block type. It is then not needed to keep the context (faster decoding). */
+  int trivial_literal_context;
+  /* Distance context is actual after command is decoded and before distance is
+     computed. After distance computation it is used as a temporary variable. */
+  int distance_context;
+  int meta_block_remaining_len;
+  uint32_t block_length_index;
+  uint32_t block_length[3];
+  uint32_t num_block_types[3];
+  uint32_t block_type_rb[6];
+  uint32_t distance_postfix_bits;
+  uint32_t num_direct_distance_codes;
+  int distance_postfix_mask;
+  uint32_t num_dist_htrees;
+  uint8_t* dist_context_map;
+  HuffmanCode* literal_htree;
+  uint8_t dist_htree_index;
+  uint32_t repeat_code_len;
+  uint32_t prev_code_len;
+
+  int copy_length;
+  int distance_code;
+
+  /* For partial write operations. */
+  size_t rb_roundtrips;  /* how many times we went around the ring-buffer */
+  size_t partial_pos_out;  /* how much output to the user in total */
+
+  /* For ReadHuffmanCode. */
+  uint32_t symbol;
+  uint32_t repeat;
+  uint32_t space;
+
+  HuffmanCode table[32];
+  /* List of heads of symbol chains. */
+  uint16_t* symbol_lists;
+  /* Storage from symbol_lists. */
+  uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 +
+                               BROTLI_NUM_COMMAND_SYMBOLS];
+  /* Tails of symbol chains. */
+  int next_symbol[32];
+  uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
+  /* Population counts for the code lengths. */
+  uint16_t code_length_histo[16];
+
+  /* For HuffmanTreeGroupDecode. */
+  int htree_index;
+  HuffmanCode* next;
+
+  /* For DecodeContextMap. */
+  uint32_t context_index;
+  uint32_t max_run_length_prefix;
+  uint32_t code;
+  HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
+
+  /* For InverseMoveToFrontTransform. */
+  uint32_t mtf_upper_bound;
+  uint32_t mtf[64 + 1];
+
+  /* Less used attributes are at the end of this struct. */
+
+  /* States inside function calls. */
+  BrotliRunningMetablockHeaderState substate_metablock_header;
+  BrotliRunningTreeGroupState substate_tree_group;
+  BrotliRunningContextMapState substate_context_map;
+  BrotliRunningUncompressedState substate_uncompressed;
+  BrotliRunningHuffmanState substate_huffman;
+  BrotliRunningDecodeUint8State substate_decode_uint8;
+  BrotliRunningReadBlockLengthState substate_read_block_length;
+
+  unsigned int is_last_metablock : 1;
+  unsigned int is_uncompressed : 1;
+  unsigned int is_metadata : 1;
+  unsigned int should_wrap_ringbuffer : 1;
+  unsigned int canny_ringbuffer_allocation : 1;
+  unsigned int large_window : 1;
+  unsigned int size_nibbles : 8;
+  uint32_t window_bits;
+
+  int new_ringbuffer_size;
+
+  uint32_t num_literal_htrees;
+  uint8_t* context_map;
+  uint8_t* context_modes;
+
+  const BrotliDictionary* dictionary;
+  const BrotliTransforms* transforms;
+
+  uint32_t trivial_literal_contexts[8];  /* 256 bits */
+};
+
+typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal;
+#define BrotliDecoderState BrotliDecoderStateInternal
+
+BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s);
+BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s);
+BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock(
+    BrotliDecoderState* s);
+BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(
+    BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size,
+    uint32_t max_symbol, uint32_t ntrees);
+
+#define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L)
+
+#define BROTLI_DECODER_FREE(S, X) {          \
+  S->free_func(S->memory_manager_opaque, X); \
+  X = NULL;                                  \
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_STATE_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references.c
new file mode 100755
index 0000000000..cd023d9b41
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references.c
@@ -0,0 +1,144 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#include "./backward_references.h"
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./dictionary_hash.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
+                                                size_t max_distance,
+                                                const int* dist_cache) {
+  if (distance <= max_distance) {
+    size_t distance_plus_3 = distance + 3;
+    size_t offset0 = distance_plus_3 - (size_t)dist_cache[0];
+    size_t offset1 = distance_plus_3 - (size_t)dist_cache[1];
+    if (distance == (size_t)dist_cache[0]) {
+      return 0;
+    } else if (distance == (size_t)dist_cache[1]) {
+      return 1;
+    } else if (offset0 < 7) {
+      return (0x9750468 >> (4 * offset0)) & 0xF;
+    } else if (offset1 < 7) {
+      return (0xFDB1ACE >> (4 * offset1)) & 0xF;
+    } else if (distance == (size_t)dist_cache[2]) {
+      return 2;
+    } else if (distance == (size_t)dist_cache[3]) {
+      return 3;
+    }
+  }
+  return distance + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
+}
+
+#define EXPAND_CAT(a, b) CAT(a, b)
+#define CAT(a, b) a ## b
+#define FN(X) EXPAND_CAT(X, HASHER())
+#define EXPORT_FN(X) EXPAND_CAT(X, EXPAND_CAT(PREFIX(), HASHER()))
+
+#define PREFIX() N
+
+#define HASHER() H2
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H3
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H4
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H5
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H6
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H40
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H41
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H42
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H54
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H35
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H55
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H65
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#undef PREFIX
+
+#undef EXPORT_FN
+#undef FN
+#undef CAT
+#undef EXPAND_CAT
+
+void BrotliCreateBackwardReferences(
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask, const BrotliEncoderParams* params,
+    HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  switch (params->hasher.type) {
+#define CASE_(N)                                                  \
+    case N:                                                       \
+      CreateBackwardReferencesNH ## N(                            \
+          num_bytes, position, ringbuffer,                        \
+          ringbuffer_mask, params, hasher, dist_cache,            \
+          last_insert_len, commands, num_commands, num_literals); \
+      return;
+    FOR_GENERIC_HASHERS(CASE_)
+#undef CASE_
+    default:
+      break;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references.h
new file mode 100755
index 0000000000..3a4146647c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references.h
@@ -0,0 +1,38 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./hash.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* "commands" points to the next output command to write to, "*num_commands" is
+   initially the total amount of commands output by previous
+   CreateBackwardReferences calls, and must be incremented by the amount written
+   by this call. */
+BROTLI_INTERNAL void BrotliCreateBackwardReferences(
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask, const BrotliEncoderParams* params,
+    HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_hq.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_hq.c
new file mode 100755
index 0000000000..96b0e708de
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_hq.c
@@ -0,0 +1,825 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#include "./backward_references_hq.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./literal_cost.h"
+#include "./memory.h"
+#include "./params.h"
+#include "./prefix.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE 544
+
+static const float kInfinity = 1.7e38f;  /* ~= 2 ^ 127 */
+
+static const uint32_t kDistanceCacheIndex[] = {
+  0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+};
+static const int kDistanceCacheOffset[] = {
+  0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+};
+
+void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
+  ZopfliNode stub;
+  size_t i;
+  stub.length = 1;
+  stub.distance = 0;
+  stub.dcode_insert_length = 0;
+  stub.u.cost = kInfinity;
+  for (i = 0; i < length; ++i) array[i] = stub;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
+  return self->length & 0x1FFFFFF;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
+  const uint32_t modifier = self->length >> 25;
+  return ZopfliNodeCopyLength(self) + 9u - modifier;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
+  return self->distance;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
+  const uint32_t short_code = self->dcode_insert_length >> 27;
+  return short_code == 0 ?
+      ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
+      short_code - 1;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
+  return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
+}
+
+/* Histogram based cost model for zopflification. */
+typedef struct ZopfliCostModel {
+  /* The insert and copy length symbols. */
+  float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
+  float* cost_dist_;
+  uint32_t distance_histogram_size;
+  /* Cumulative costs of literals per position in the stream. */
+  float* literal_costs_;
+  float min_cost_cmd_;
+  size_t num_bytes_;
+} ZopfliCostModel;
+
+static void InitZopfliCostModel(
+    MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist,
+    size_t num_bytes) {
+  uint32_t distance_histogram_size = dist->alphabet_size;
+  if (distance_histogram_size > BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE) {
+    distance_histogram_size = BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE;
+  }
+  self->num_bytes_ = num_bytes;
+  self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
+  self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size);
+  self->distance_histogram_size = distance_histogram_size;
+  if (BROTLI_IS_OOM(m)) return;
+}
+
+static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
+  BROTLI_FREE(m, self->literal_costs_);
+  BROTLI_FREE(m, self->cost_dist_);
+}
+
+static void SetCost(const uint32_t* histogram, size_t histogram_size,
+                    BROTLI_BOOL literal_histogram, float* cost) {
+  size_t sum = 0;
+  size_t missing_symbol_sum;
+  float log2sum;
+  float missing_symbol_cost;
+  size_t i;
+  for (i = 0; i < histogram_size; i++) {
+    sum += histogram[i];
+  }
+  log2sum = (float)FastLog2(sum);
+  missing_symbol_sum = sum;
+  if (!literal_histogram) {
+    for (i = 0; i < histogram_size; i++) {
+      if (histogram[i] == 0) missing_symbol_sum++;
+    }
+  }
+  missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2;
+  for (i = 0; i < histogram_size; i++) {
+    if (histogram[i] == 0) {
+      cost[i] = missing_symbol_cost;
+      continue;
+    }
+
+    /* Shannon bits for this symbol. */
+    cost[i] = log2sum - (float)FastLog2(histogram[i]);
+
+    /* Cannot be coded with less than 1 bit */
+    if (cost[i] < 1) cost[i] = 1;
+  }
+}
+
+static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
+                                           size_t position,
+                                           const uint8_t* ringbuffer,
+                                           size_t ringbuffer_mask,
+                                           const Command* commands,
+                                           size_t num_commands,
+                                           size_t last_insert_len) {
+  uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
+  float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+  size_t pos = position - last_insert_len;
+  float min_cost_cmd = kInfinity;
+  size_t i;
+  float* cost_cmd = self->cost_cmd_;
+
+  memset(histogram_literal, 0, sizeof(histogram_literal));
+  memset(histogram_cmd, 0, sizeof(histogram_cmd));
+  memset(histogram_dist, 0, sizeof(histogram_dist));
+
+  for (i = 0; i < num_commands; i++) {
+    size_t inslength = commands[i].insert_len_;
+    size_t copylength = CommandCopyLen(&commands[i]);
+    size_t distcode = commands[i].dist_prefix_ & 0x3FF;
+    size_t cmdcode = commands[i].cmd_prefix_;
+    size_t j;
+
+    histogram_cmd[cmdcode]++;
+    if (cmdcode >= 128) histogram_dist[distcode]++;
+
+    for (j = 0; j < inslength; j++) {
+      histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
+    }
+
+    pos += inslength + copylength;
+  }
+
+  SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
+          cost_literal);
+  SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
+          cost_cmd);
+  SetCost(histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
+          self->cost_dist_);
+
+  for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
+    min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
+  }
+  self->min_cost_cmd_ = min_cost_cmd;
+
+  {
+    float* literal_costs = self->literal_costs_;
+    float literal_carry = 0.0;
+    size_t num_bytes = self->num_bytes_;
+    literal_costs[0] = 0.0;
+    for (i = 0; i < num_bytes; ++i) {
+      literal_carry +=
+          cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
+      literal_costs[i + 1] = literal_costs[i] + literal_carry;
+      literal_carry -= literal_costs[i + 1] - literal_costs[i];
+    }
+  }
+}
+
+static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
+                                               size_t position,
+                                               const uint8_t* ringbuffer,
+                                               size_t ringbuffer_mask) {
+  float* literal_costs = self->literal_costs_;
+  float literal_carry = 0.0;
+  float* cost_dist = self->cost_dist_;
+  float* cost_cmd = self->cost_cmd_;
+  size_t num_bytes = self->num_bytes_;
+  size_t i;
+  BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
+                                    ringbuffer, &literal_costs[1]);
+  literal_costs[0] = 0.0;
+  for (i = 0; i < num_bytes; ++i) {
+    literal_carry += literal_costs[i + 1];
+    literal_costs[i + 1] = literal_costs[i] + literal_carry;
+    literal_carry -= literal_costs[i + 1] - literal_costs[i];
+  }
+  for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
+    cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
+  }
+  for (i = 0; i < self->distance_histogram_size; ++i) {
+    cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
+  }
+  self->min_cost_cmd_ = (float)FastLog2(11);
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetCommandCost(
+    const ZopfliCostModel* self, uint16_t cmdcode) {
+  return self->cost_cmd_[cmdcode];
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetDistanceCost(
+    const ZopfliCostModel* self, size_t distcode) {
+  return self->cost_dist_[distcode];
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetLiteralCosts(
+    const ZopfliCostModel* self, size_t from, size_t to) {
+  return self->literal_costs_[to] - self->literal_costs_[from];
+}
+
+static BROTLI_INLINE float ZopfliCostModelGetMinCostCmd(
+    const ZopfliCostModel* self) {
+  return self->min_cost_cmd_;
+}
+
+/* REQUIRES: len >= 2, start_pos <= pos */
+/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
+/* Maintains the "ZopfliNode array invariant". */
+static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
+    size_t start_pos, size_t len, size_t len_code, size_t dist,
+    size_t short_code, float cost) {
+  ZopfliNode* next = &nodes[pos + len];
+  next->length = (uint32_t)(len | ((len + 9u - len_code) << 25));
+  next->distance = (uint32_t)dist;
+  next->dcode_insert_length = (uint32_t)(
+      (short_code << 27) | (pos - start_pos));
+  next->u.cost = cost;
+}
+
+typedef struct PosData {
+  size_t pos;
+  int distance_cache[4];
+  float costdiff;
+  float cost;
+} PosData;
+
+/* Maintains the smallest 8 cost difference together with their positions */
+typedef struct StartPosQueue {
+  PosData q_[8];
+  size_t idx_;
+} StartPosQueue;
+
+static BROTLI_INLINE void InitStartPosQueue(StartPosQueue* self) {
+  self->idx_ = 0;
+}
+
+static size_t StartPosQueueSize(const StartPosQueue* self) {
+  return BROTLI_MIN(size_t, self->idx_, 8);
+}
+
+static void StartPosQueuePush(StartPosQueue* self, const PosData* posdata) {
+  size_t offset = ~(self->idx_++) & 7;
+  size_t len = StartPosQueueSize(self);
+  size_t i;
+  PosData* q = self->q_;
+  q[offset] = *posdata;
+  /* Restore the sorted order. In the list of |len| items at most |len - 1|
+     adjacent element comparisons / swaps are required. */
+  for (i = 1; i < len; ++i) {
+    if (q[offset & 7].costdiff > q[(offset + 1) & 7].costdiff) {
+      BROTLI_SWAP(PosData, q, offset & 7, (offset + 1) & 7);
+    }
+    ++offset;
+  }
+}
+
+static const PosData* StartPosQueueAt(const StartPosQueue* self, size_t k) {
+  return &self->q_[(k - self->idx_) & 7];
+}
+
+/* Returns the minimum possible copy length that can improve the cost of any */
+/* future position. */
+static size_t ComputeMinimumCopyLength(const float start_cost,
+                                       const ZopfliNode* nodes,
+                                       const size_t num_bytes,
+                                       const size_t pos) {
+  /* Compute the minimum possible cost of reaching any future position. */
+  float min_cost = start_cost;
+  size_t len = 2;
+  size_t next_len_bucket = 4;
+  size_t next_len_offset = 10;
+  while (pos + len <= num_bytes && nodes[pos + len].u.cost <= min_cost) {
+    /* We already reached (pos + len) with no more cost than the minimum
+       possible cost of reaching anything from this pos, so there is no point in
+       looking for lengths <= len. */
+    ++len;
+    if (len == next_len_offset) {
+      /* We reached the next copy length code bucket, so we add one more
+         extra bit to the minimum cost. */
+      min_cost += 1.0f;
+      next_len_offset += next_len_bucket;
+      next_len_bucket *= 2;
+    }
+  }
+  return len;
+}
+
+/* REQUIRES: nodes[pos].cost < kInfinity
+   REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
+static uint32_t ComputeDistanceShortcut(const size_t block_start,
+                                        const size_t pos,
+                                        const size_t max_backward_limit,
+                                        const size_t gap,
+                                        const ZopfliNode* nodes) {
+  const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
+  const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF;
+  const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
+  /* Since |block_start + pos| is the end position of the command, the copy part
+     starts from |block_start + pos - clen|. Distances that are greater than
+     this or greater than |max_backward_limit| + |gap| are static dictionary
+     references, and do not update the last distances.
+     Also distance code 0 (last distance) does not update the last distances. */
+  if (pos == 0) {
+    return 0;
+  } else if (dist + clen <= block_start + pos + gap &&
+             dist <= max_backward_limit + gap &&
+             ZopfliNodeDistanceCode(&nodes[pos]) > 0) {
+    return (uint32_t)pos;
+  } else {
+    return nodes[pos - clen - ilen].u.shortcut;
+  }
+}
+
+/* Fills in dist_cache[0..3] with the last four distances (as defined by
+   Section 4. of the Spec) that would be used at (block_start + pos) if we
+   used the shortest path of commands from block_start, computed from
+   nodes[0..pos]. The last four distances at block_start are in
+   starting_dist_cache[0..3].
+   REQUIRES: nodes[pos].cost < kInfinity
+   REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
+static void ComputeDistanceCache(const size_t pos,
+                                 const int* starting_dist_cache,
+                                 const ZopfliNode* nodes,
+                                 int* dist_cache) {
+  int idx = 0;
+  size_t p = nodes[pos].u.shortcut;
+  while (idx < 4 && p > 0) {
+    const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF;
+    const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
+    const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
+    dist_cache[idx++] = (int)dist;
+    /* Because of prerequisite, p >= clen + ilen >= 2. */
+    p = nodes[p - clen - ilen].u.shortcut;
+  }
+  for (; idx < 4; ++idx) {
+    dist_cache[idx] = *starting_dist_cache++;
+  }
+}
+
+/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
+   is eligible. */
+static void EvaluateNode(
+    const size_t block_start, const size_t pos, const size_t max_backward_limit,
+    const size_t gap, const int* starting_dist_cache,
+    const ZopfliCostModel* model, StartPosQueue* queue, ZopfliNode* nodes) {
+  /* Save cost, because ComputeDistanceCache invalidates it. */
+  float node_cost = nodes[pos].u.cost;
+  nodes[pos].u.shortcut = ComputeDistanceShortcut(
+      block_start, pos, max_backward_limit, gap, nodes);
+  if (node_cost <= ZopfliCostModelGetLiteralCosts(model, 0, pos)) {
+    PosData posdata;
+    posdata.pos = pos;
+    posdata.cost = node_cost;
+    posdata.costdiff = node_cost -
+        ZopfliCostModelGetLiteralCosts(model, 0, pos);
+    ComputeDistanceCache(
+        pos, starting_dist_cache, nodes, posdata.distance_cache);
+    StartPosQueuePush(queue, &posdata);
+  }
+}
+
+/* Returns longest copy length. */
+static size_t UpdateNodes(
+    const size_t num_bytes, const size_t block_start, const size_t pos,
+    const uint8_t* ringbuffer, const size_t ringbuffer_mask,
+    const BrotliEncoderParams* params, const size_t max_backward_limit,
+    const int* starting_dist_cache, const size_t num_matches,
+    const BackwardMatch* matches, const ZopfliCostModel* model,
+    StartPosQueue* queue, ZopfliNode* nodes) {
+  const size_t cur_ix = block_start + pos;
+  const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
+  const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
+  const size_t max_len = num_bytes - pos;
+  const size_t max_zopfli_len = MaxZopfliLen(params);
+  const size_t max_iters = MaxZopfliCandidates(params);
+  size_t min_len;
+  size_t result = 0;
+  size_t k;
+  size_t gap = 0;
+
+  EvaluateNode(block_start, pos, max_backward_limit, gap, starting_dist_cache,
+      model, queue, nodes);
+
+  {
+    const PosData* posdata = StartPosQueueAt(queue, 0);
+    float min_cost = (posdata->cost + ZopfliCostModelGetMinCostCmd(model) +
+        ZopfliCostModelGetLiteralCosts(model, posdata->pos, pos));
+    min_len = ComputeMinimumCopyLength(min_cost, nodes, num_bytes, pos);
+  }
+
+  /* Go over the command starting positions in order of increasing cost
+     difference. */
+  for (k = 0; k < max_iters && k < StartPosQueueSize(queue); ++k) {
+    const PosData* posdata = StartPosQueueAt(queue, k);
+    const size_t start = posdata->pos;
+    const uint16_t inscode = GetInsertLengthCode(pos - start);
+    const float start_costdiff = posdata->costdiff;
+    const float base_cost = start_costdiff + (float)GetInsertExtra(inscode) +
+        ZopfliCostModelGetLiteralCosts(model, 0, pos);
+
+    /* Look for last distance matches using the distance cache from this
+       starting position. */
+    size_t best_len = min_len - 1;
+    size_t j = 0;
+    for (; j < BROTLI_NUM_DISTANCE_SHORT_CODES && best_len < max_len; ++j) {
+      const size_t idx = kDistanceCacheIndex[j];
+      const size_t backward =
+          (size_t)(posdata->distance_cache[idx] + kDistanceCacheOffset[j]);
+      size_t prev_ix = cur_ix - backward;
+      size_t len = 0;
+      uint8_t continuation = ringbuffer[cur_ix_masked + best_len];
+      if (cur_ix_masked + best_len > ringbuffer_mask) {
+        break;
+      }
+      if (BROTLI_PREDICT_FALSE(backward > max_distance + gap)) {
+        /* Word dictionary -> ignore. */
+        continue;
+      }
+      if (backward <= max_distance) {
+        /* Regular backward reference. */
+        if (prev_ix >= cur_ix) {
+          continue;
+        }
+
+        prev_ix &= ringbuffer_mask;
+        if (prev_ix + best_len > ringbuffer_mask ||
+            continuation != ringbuffer[prev_ix + best_len]) {
+          continue;
+        }
+        len = FindMatchLengthWithLimit(&ringbuffer[prev_ix],
+                                       &ringbuffer[cur_ix_masked],
+                                       max_len);
+      } else {
+        continue;
+      }
+      {
+        const float dist_cost = base_cost +
+            ZopfliCostModelGetDistanceCost(model, j);
+        size_t l;
+        for (l = best_len + 1; l <= len; ++l) {
+          const uint16_t copycode = GetCopyLengthCode(l);
+          const uint16_t cmdcode =
+              CombineLengthCodes(inscode, copycode, j == 0);
+          const float cost = (cmdcode < 128 ? base_cost : dist_cost) +
+              (float)GetCopyExtra(copycode) +
+              ZopfliCostModelGetCommandCost(model, cmdcode);
+          if (cost < nodes[pos + l].u.cost) {
+            UpdateZopfliNode(nodes, pos, start, l, l, backward, j + 1, cost);
+            result = BROTLI_MAX(size_t, result, l);
+          }
+          best_len = l;
+        }
+      }
+    }
+
+    /* At higher iterations look only for new last distance matches, since
+       looking only for new command start positions with the same distances
+       does not help much. */
+    if (k >= 2) continue;
+
+    {
+      /* Loop through all possible copy lengths at this position. */
+      size_t len = min_len;
+      for (j = 0; j < num_matches; ++j) {
+        BackwardMatch match = matches[j];
+        size_t dist = match.distance;
+        BROTLI_BOOL is_dictionary_match =
+            TO_BROTLI_BOOL(dist > max_distance + gap);
+        /* We already tried all possible last distance matches, so we can use
+           normal distance code here. */
+        size_t dist_code = dist + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
+        uint16_t dist_symbol;
+        uint32_t distextra;
+        uint32_t distnumextra;
+        float dist_cost;
+        size_t max_match_len;
+        PrefixEncodeCopyDistance(
+            dist_code, params->dist.num_direct_distance_codes,
+            params->dist.distance_postfix_bits, &dist_symbol, &distextra);
+        distnumextra = dist_symbol >> 10;
+        dist_cost = base_cost + (float)distnumextra +
+            ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF);
+
+        /* Try all copy lengths up until the maximum copy length corresponding
+           to this distance. If the distance refers to the static dictionary, or
+           the maximum length is long enough, try only one maximum length. */
+        max_match_len = BackwardMatchLength(&match);
+        if (len < max_match_len &&
+            (is_dictionary_match || max_match_len > max_zopfli_len)) {
+          len = max_match_len;
+        }
+        for (; len <= max_match_len; ++len) {
+          const size_t len_code =
+              is_dictionary_match ? BackwardMatchLengthCode(&match) : len;
+          const uint16_t copycode = GetCopyLengthCode(len_code);
+          const uint16_t cmdcode = CombineLengthCodes(inscode, copycode, 0);
+          const float cost = dist_cost + (float)GetCopyExtra(copycode) +
+              ZopfliCostModelGetCommandCost(model, cmdcode);
+          if (cost < nodes[pos + len].u.cost) {
+            UpdateZopfliNode(nodes, pos, start, len, len_code, dist, 0, cost);
+            result = BROTLI_MAX(size_t, result, len);
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+
+static size_t ComputeShortestPathFromNodes(size_t num_bytes,
+    ZopfliNode* nodes) {
+  size_t index = num_bytes;
+  size_t num_commands = 0;
+  while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 &&
+      nodes[index].length == 1) --index;
+  nodes[index].u.next = BROTLI_UINT32_MAX;
+  while (index != 0) {
+    size_t len = ZopfliNodeCommandLength(&nodes[index]);
+    index -= len;
+    nodes[index].u.next = (uint32_t)len;
+    num_commands++;
+  }
+  return num_commands;
+}
+
+/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
+void BrotliZopfliCreateCommands(const size_t num_bytes,
+    const size_t block_start, const ZopfliNode* nodes, int* dist_cache,
+    size_t* last_insert_len, const BrotliEncoderParams* params,
+    Command* commands, size_t* num_literals) {
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  size_t pos = 0;
+  uint32_t offset = nodes[0].u.next;
+  size_t i;
+  size_t gap = 0;
+  for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
+    const ZopfliNode* next = &nodes[pos + offset];
+    size_t copy_length = ZopfliNodeCopyLength(next);
+    size_t insert_length = next->dcode_insert_length & 0x7FFFFFF;
+    pos += insert_length;
+    offset = next->u.next;
+    if (i == 0) {
+      insert_length += *last_insert_len;
+      *last_insert_len = 0;
+    }
+    {
+      size_t distance = ZopfliNodeCopyDistance(next);
+      size_t len_code = ZopfliNodeLengthCode(next);
+      size_t max_distance =
+          BROTLI_MIN(size_t, block_start + pos, max_backward_limit);
+      BROTLI_BOOL is_dictionary = TO_BROTLI_BOOL(distance > max_distance + gap);
+      size_t dist_code = ZopfliNodeDistanceCode(next);
+      InitCommand(&commands[i], &params->dist, insert_length,
+          copy_length, (int)len_code - (int)copy_length, dist_code);
+
+      if (!is_dictionary && dist_code > 0) {
+        dist_cache[3] = dist_cache[2];
+        dist_cache[2] = dist_cache[1];
+        dist_cache[1] = dist_cache[0];
+        dist_cache[0] = (int)distance;
+      }
+    }
+
+    *num_literals += insert_length;
+    pos += copy_length;
+  }
+  *last_insert_len += num_bytes - pos;
+}
+
+static size_t ZopfliIterate(size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    const BrotliEncoderParams* params, const size_t gap, const int* dist_cache,
+    const ZopfliCostModel* model, const uint32_t* num_matches,
+    const BackwardMatch* matches, ZopfliNode* nodes) {
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  const size_t max_zopfli_len = MaxZopfliLen(params);
+  StartPosQueue queue;
+  size_t cur_match_pos = 0;
+  size_t i;
+  nodes[0].length = 0;
+  nodes[0].u.cost = 0;
+  InitStartPosQueue(&queue);
+  for (i = 0; i + 3 < num_bytes; i++) {
+    size_t skip = UpdateNodes(num_bytes, position, i, ringbuffer,
+        ringbuffer_mask, params, max_backward_limit, dist_cache,
+        num_matches[i], &matches[cur_match_pos], model, &queue, nodes);
+    if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
+    cur_match_pos += num_matches[i];
+    if (num_matches[i] == 1 &&
+        BackwardMatchLength(&matches[cur_match_pos - 1]) > max_zopfli_len) {
+      skip = BROTLI_MAX(size_t,
+          BackwardMatchLength(&matches[cur_match_pos - 1]), skip);
+    }
+    if (skip > 1) {
+      skip--;
+      while (skip) {
+        i++;
+        if (i + 3 >= num_bytes) break;
+        EvaluateNode(position, i, max_backward_limit, gap,
+            dist_cache, model, &queue, nodes);
+        cur_match_pos += num_matches[i];
+        skip--;
+      }
+    }
+  }
+  return ComputeShortestPathFromNodes(num_bytes, nodes);
+}
+
+/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
+size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    const BrotliEncoderParams* params,
+    const int* dist_cache, HasherHandle hasher, ZopfliNode* nodes) {
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  const size_t max_zopfli_len = MaxZopfliLen(params);
+  ZopfliCostModel model;
+  StartPosQueue queue;
+  BackwardMatch matches[2 * (MAX_NUM_MATCHES_H10 + 64)];
+  const size_t store_end = num_bytes >= StoreLookaheadH10() ?
+      position + num_bytes - StoreLookaheadH10() + 1 : position;
+  size_t i;
+  size_t gap = 0;
+  size_t lz_matches_offset = 0;
+  nodes[0].length = 0;
+  nodes[0].u.cost = 0;
+  InitZopfliCostModel(m, &model, &params->dist, num_bytes);
+  if (BROTLI_IS_OOM(m)) return 0;
+  ZopfliCostModelSetFromLiteralCosts(
+      &model, position, ringbuffer, ringbuffer_mask);
+  InitStartPosQueue(&queue);
+  for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
+    const size_t pos = position + i;
+    const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
+    size_t skip;
+    size_t num_matches;
+    num_matches = FindAllMatchesH10(hasher,
+        &params->dictionary,
+        ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
+        gap, params, &matches[lz_matches_offset]);
+    if (num_matches > 0 &&
+        BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
+      matches[0] = matches[num_matches - 1];
+      num_matches = 1;
+    }
+    skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
+        params, max_backward_limit, dist_cache, num_matches, matches, &model,
+        &queue, nodes);
+    if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
+    if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
+      skip = BROTLI_MAX(size_t, BackwardMatchLength(&matches[0]), skip);
+    }
+    if (skip > 1) {
+      /* Add the tail of the copy to the hasher. */
+      StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
+          size_t, pos + skip, store_end));
+      skip--;
+      while (skip) {
+        i++;
+        if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
+        EvaluateNode(position, i, max_backward_limit, gap,
+            dist_cache, &model, &queue, nodes);
+        skip--;
+      }
+    }
+  }
+  CleanupZopfliCostModel(m, &model);
+  return ComputeShortestPathFromNodes(num_bytes, nodes);
+}
+
+void BrotliCreateZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    const BrotliEncoderParams* params,
+    HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  ZopfliNode* nodes;
+  nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
+  if (BROTLI_IS_OOM(m)) return;
+  BrotliInitZopfliNodes(nodes, num_bytes + 1);
+  *num_commands += BrotliZopfliComputeShortestPath(m, num_bytes,
+      position, ringbuffer, ringbuffer_mask, params,
+      dist_cache, hasher, nodes);
+  if (BROTLI_IS_OOM(m)) return;
+  BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
+      last_insert_len, params, commands, num_literals);
+  BROTLI_FREE(m, nodes);
+}
+
+void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    const BrotliEncoderParams* params,
+    HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+  uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
+  size_t matches_size = 4 * num_bytes;
+  const size_t store_end = num_bytes >= StoreLookaheadH10() ?
+      position + num_bytes - StoreLookaheadH10() + 1 : position;
+  size_t cur_match_pos = 0;
+  size_t i;
+  size_t orig_num_literals;
+  size_t orig_last_insert_len;
+  int orig_dist_cache[4];
+  size_t orig_num_commands;
+  ZopfliCostModel model;
+  ZopfliNode* nodes;
+  BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
+  size_t gap = 0;
+  size_t shadow_matches = 0;
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
+    const size_t pos = position + i;
+    size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
+    size_t max_length = num_bytes - i;
+    size_t num_found_matches;
+    size_t cur_match_end;
+    size_t j;
+    /* Ensure that we have enough free slots. */
+    BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
+        cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
+    if (BROTLI_IS_OOM(m)) return;
+    num_found_matches = FindAllMatchesH10(hasher,
+        &params->dictionary,
+        ringbuffer, ringbuffer_mask, pos, max_length,
+        max_distance, gap, params,
+        &matches[cur_match_pos + shadow_matches]);
+    cur_match_end = cur_match_pos + num_found_matches;
+    for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
+      BROTLI_DCHECK(BackwardMatchLength(&matches[j]) <=
+          BackwardMatchLength(&matches[j + 1]));
+    }
+    num_matches[i] = (uint32_t)num_found_matches;
+    if (num_found_matches > 0) {
+      const size_t match_len = BackwardMatchLength(&matches[cur_match_end - 1]);
+      if (match_len > MAX_ZOPFLI_LEN_QUALITY_11) {
+        const size_t skip = match_len - 1;
+        matches[cur_match_pos++] = matches[cur_match_end - 1];
+        num_matches[i] = 1;
+        /* Add the tail of the copy to the hasher. */
+        StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1,
+                      BROTLI_MIN(size_t, pos + match_len, store_end));
+        memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
+        i += skip;
+      } else {
+        cur_match_pos = cur_match_end;
+      }
+    }
+  }
+  orig_num_literals = *num_literals;
+  orig_last_insert_len = *last_insert_len;
+  memcpy(orig_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
+  orig_num_commands = *num_commands;
+  nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
+  if (BROTLI_IS_OOM(m)) return;
+  InitZopfliCostModel(m, &model, &params->dist, num_bytes);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < 2; i++) {
+    BrotliInitZopfliNodes(nodes, num_bytes + 1);
+    if (i == 0) {
+      ZopfliCostModelSetFromLiteralCosts(
+          &model, position, ringbuffer, ringbuffer_mask);
+    } else {
+      ZopfliCostModelSetFromCommands(&model, position, ringbuffer,
+          ringbuffer_mask, commands, *num_commands - orig_num_commands,
+          orig_last_insert_len);
+    }
+    *num_commands = orig_num_commands;
+    *num_literals = orig_num_literals;
+    *last_insert_len = orig_last_insert_len;
+    memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
+    *num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
+        ringbuffer_mask, params, gap, dist_cache, &model, num_matches, matches,
+        nodes);
+    BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
+        last_insert_len, params, commands, num_literals);
+  }
+  CleanupZopfliCostModel(m, &model);
+  BROTLI_FREE(m, nodes);
+  BROTLI_FREE(m, matches);
+  BROTLI_FREE(m, num_matches);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_hq.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_hq.h
new file mode 100755
index 0000000000..1e4275d403
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_hq.h
@@ -0,0 +1,92 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find backward reference copies. */
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./hash.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask, const BrotliEncoderParams* params,
+    HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals);
+
+BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask, const BrotliEncoderParams* params,
+    HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals);
+
+typedef struct ZopfliNode {
+  /* Best length to get up to this byte (not including this byte itself)
+     highest 7 bit is used to reconstruct the length code. */
+  uint32_t length;
+  /* Distance associated with the length. */
+  uint32_t distance;
+  /* Number of literal inserts before this copy; highest 5 bits contain
+     distance short code + 1 (or zero if no short code). */
+  uint32_t dcode_insert_length;
+
+  /* This union holds information used by dynamic-programming. During forward
+     pass |cost| it used to store the goal function. When node is processed its
+     |cost| is invalidated in favor of |shortcut|. On path back-tracing pass
+     |next| is assigned the offset to next node on the path. */
+  union {
+    /* Smallest cost to get to this byte from the beginning, as found so far. */
+    float cost;
+    /* Offset to the next node on the path. Equals to command_length() of the
+       next node on the path. For last node equals to BROTLI_UINT32_MAX */
+    uint32_t next;
+    /* Node position that provides next distance for distance cache. */
+    uint32_t shortcut;
+  } u;
+} ZopfliNode;
+
+BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
+
+/* Computes the shortest path of commands from position to at most
+   position + num_bytes.
+
+   On return, path->size() is the number of commands found and path[i] is the
+   length of the i-th command (copy length plus insert length).
+   Note that the sum of the lengths of all commands can be less than num_bytes.
+
+   On return, the nodes[0..num_bytes] array will have the following
+   "ZopfliNode array invariant":
+   For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
+     (1) nodes[i].copy_length() >= 2
+     (2) nodes[i].command_length() <= i and
+     (3) nodes[i - nodes[i].command_length()].cost < kInfinity */
+BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
+    MemoryManager* m, size_t num_bytes,
+    size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    const BrotliEncoderParams* params,
+    const int* dist_cache, HasherHandle hasher, ZopfliNode* nodes);
+
+BROTLI_INTERNAL void BrotliZopfliCreateCommands(
+    const size_t num_bytes, const size_t block_start, const ZopfliNode* nodes,
+    int* dist_cache, size_t* last_insert_len, const BrotliEncoderParams* params,
+    Command* commands, size_t* num_literals);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_inc.h
new file mode 100755
index 0000000000..c18cdb00cb
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/backward_references_inc.h
@@ -0,0 +1,153 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: EXPORT_FN, FN */
+
+static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
+    size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask,
+    const BrotliEncoderParams* params,
+    HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
+    Command* commands, size_t* num_commands, size_t* num_literals) {
+  /* Set maximum distance, see section 9.1. of the spec. */
+  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
+
+  const Command* const orig_commands = commands;
+  size_t insert_length = *last_insert_len;
+  const size_t pos_end = position + num_bytes;
+  const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
+      position + num_bytes - FN(StoreLookahead)() + 1 : position;
+
+  /* For speed up heuristics for random data. */
+  const size_t random_heuristics_window_size =
+      LiteralSpreeLengthForSparseSearch(params);
+  size_t apply_random_heuristics = position + random_heuristics_window_size;
+  const size_t gap = 0;
+
+  /* Minimum score to accept a backward reference. */
+  const score_t kMinScore = BROTLI_SCORE_BASE + 100;
+
+  FN(PrepareDistanceCache)(hasher, dist_cache);
+
+  while (position + FN(HashTypeLength)() < pos_end) {
+    size_t max_length = pos_end - position;
+    size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
+    HasherSearchResult sr;
+    sr.len = 0;
+    sr.len_code_delta = 0;
+    sr.distance = 0;
+    sr.score = kMinScore;
+    FN(FindLongestMatch)(hasher, &params->dictionary,
+        ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
+        max_distance, gap, params->dist.max_distance, &sr);
+    if (sr.score > kMinScore) {
+      /* Found a match. Let's look for something even better ahead. */
+      int delayed_backward_references_in_row = 0;
+      --max_length;
+      for (;; --max_length) {
+        const score_t cost_diff_lazy = 175;
+        HasherSearchResult sr2;
+        sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
+            BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
+        sr2.len_code_delta = 0;
+        sr2.distance = 0;
+        sr2.score = kMinScore;
+        max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
+        FN(FindLongestMatch)(hasher,
+            &params->dictionary,
+            ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
+            max_distance, gap, params->dist.max_distance, &sr2);
+        if (sr2.score >= sr.score + cost_diff_lazy) {
+          /* Ok, let's just write one byte for now and start a match from the
+             next byte. */
+          ++position;
+          ++insert_length;
+          sr = sr2;
+          if (++delayed_backward_references_in_row < 4 &&
+              position + FN(HashTypeLength)() < pos_end) {
+            continue;
+          }
+        }
+        break;
+      }
+      apply_random_heuristics =
+          position + 2 * sr.len + random_heuristics_window_size;
+      max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
+      {
+        /* The first 16 codes are special short-codes,
+           and the minimum offset is 1. */
+        size_t distance_code = ComputeDistanceCode(
+            sr.distance, max_distance + gap, dist_cache);
+        if ((sr.distance <= (max_distance + gap)) && distance_code > 0) {
+          dist_cache[3] = dist_cache[2];
+          dist_cache[2] = dist_cache[1];
+          dist_cache[1] = dist_cache[0];
+          dist_cache[0] = (int)sr.distance;
+          FN(PrepareDistanceCache)(hasher, dist_cache);
+        }
+        InitCommand(commands++, &params->dist, insert_length,
+            sr.len, sr.len_code_delta, distance_code);
+      }
+      *num_literals += insert_length;
+      insert_length = 0;
+      /* Put the hash keys into the table, if there are enough bytes left.
+         Depending on the hasher implementation, it can push all positions
+         in the given range or only a subset of them.
+         Avoid hash poisoning with RLE data. */
+      {
+        size_t range_start = position + 2;
+        size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
+        if (sr.distance < (sr.len >> 2)) {
+          range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
+              range_start, position + sr.len - (sr.distance << 2)));
+        }
+        FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, range_start,
+                       range_end);
+      }
+      position += sr.len;
+    } else {
+      ++insert_length;
+      ++position;
+      /* If we have not seen matches for a long time, we can skip some
+         match lookups. Unsuccessful match lookups are very very expensive
+         and this kind of a heuristic speeds up compression quite
+         a lot. */
+      if (position > apply_random_heuristics) {
+        /* Going through uncompressible data, jump. */
+        if (position >
+            apply_random_heuristics + 4 * random_heuristics_window_size) {
+          /* It is quite a long time since we saw a copy, so we assume
+             that this data is not compressible, and store hashes less
+             often. Hashes of non compressible data are less likely to
+             turn out to be useful in the future, too, so we store less of
+             them to not to flood out the hash table of good compressible
+             data. */
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
+          for (; position < pos_jump; position += 4) {
+            FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
+            insert_length += 4;
+          }
+        } else {
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
+          for (; position < pos_jump; position += 2) {
+            FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
+            insert_length += 2;
+          }
+        }
+      }
+    }
+  }
+  insert_length += pos_end - position;
+  *last_insert_len = insert_length;
+  *num_commands += (size_t)(commands - orig_commands);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost.c
new file mode 100755
index 0000000000..1f3f7ad5c9
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost.c
@@ -0,0 +1,35 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to estimate the bit cost of Huffman trees. */
+
+#include "./bit_cost.h"
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+#include "./histogram.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define FN(X) X ## Literal
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost.h
new file mode 100755
index 0000000000..6586469e62
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost.h
@@ -0,0 +1,63 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to estimate the bit cost of Huffman trees. */
+
+#ifndef BROTLI_ENC_BIT_COST_H_
+#define BROTLI_ENC_BIT_COST_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+#include "./histogram.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE double ShannonEntropy(
+    const uint32_t* population, size_t size, size_t* total) {
+  size_t sum = 0;
+  double retval = 0;
+  const uint32_t* population_end = population + size;
+  size_t p;
+  if (size & 1) {
+    goto odd_number_of_elements_left;
+  }
+  while (population < population_end) {
+    p = *population++;
+    sum += p;
+    retval -= (double)p * FastLog2(p);
+ odd_number_of_elements_left:
+    p = *population++;
+    sum += p;
+    retval -= (double)p * FastLog2(p);
+  }
+  if (sum) retval += (double)sum * FastLog2(sum);
+  *total = sum;
+  return retval;
+}
+
+static BROTLI_INLINE double BitsEntropy(
+    const uint32_t* population, size_t size) {
+  size_t sum;
+  double retval = ShannonEntropy(population, size, &sum);
+  if (retval < sum) {
+    /* At least one bit per literal is needed. */
+    retval = (double)sum;
+  }
+  return retval;
+}
+
+BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
+BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
+BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BIT_COST_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost_inc.h
new file mode 100755
index 0000000000..453c226042
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/bit_cost_inc.h
@@ -0,0 +1,127 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+double FN(BrotliPopulationCost)(const HistogramType* histogram) {
+  static const double kOneSymbolHistogramCost = 12;
+  static const double kTwoSymbolHistogramCost = 20;
+  static const double kThreeSymbolHistogramCost = 28;
+  static const double kFourSymbolHistogramCost = 37;
+  const size_t data_size = FN(HistogramDataSize)();
+  int count = 0;
+  size_t s[5];
+  double bits = 0.0;
+  size_t i;
+  if (histogram->total_count_ == 0) {
+    return kOneSymbolHistogramCost;
+  }
+  for (i = 0; i < data_size; ++i) {
+    if (histogram->data_[i] > 0) {
+      s[count] = i;
+      ++count;
+      if (count > 4) break;
+    }
+  }
+  if (count == 1) {
+    return kOneSymbolHistogramCost;
+  }
+  if (count == 2) {
+    return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
+  }
+  if (count == 3) {
+    const uint32_t histo0 = histogram->data_[s[0]];
+    const uint32_t histo1 = histogram->data_[s[1]];
+    const uint32_t histo2 = histogram->data_[s[2]];
+    const uint32_t histomax =
+        BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
+    return (kThreeSymbolHistogramCost +
+            2 * (histo0 + histo1 + histo2) - histomax);
+  }
+  if (count == 4) {
+    uint32_t histo[4];
+    uint32_t h23;
+    uint32_t histomax;
+    for (i = 0; i < 4; ++i) {
+      histo[i] = histogram->data_[s[i]];
+    }
+    /* Sort */
+    for (i = 0; i < 4; ++i) {
+      size_t j;
+      for (j = i + 1; j < 4; ++j) {
+        if (histo[j] > histo[i]) {
+          BROTLI_SWAP(uint32_t, histo, j, i);
+        }
+      }
+    }
+    h23 = histo[2] + histo[3];
+    histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
+    return (kFourSymbolHistogramCost +
+            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
+  }
+
+  {
+    /* In this loop we compute the entropy of the histogram and simultaneously
+       build a simplified histogram of the code length codes where we use the
+       zero repeat code 17, but we don't use the non-zero repeat code 16. */
+    size_t max_depth = 1;
+    uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
+    const double log2total = FastLog2(histogram->total_count_);
+    for (i = 0; i < data_size;) {
+      if (histogram->data_[i] > 0) {
+        /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
+                                    = log2(total_count) - log2(count(symbol)) */
+        double log2p = log2total - FastLog2(histogram->data_[i]);
+        /* Approximate the bit depth by round(-log2(P(symbol))) */
+        size_t depth = (size_t)(log2p + 0.5);
+        bits += histogram->data_[i] * log2p;
+        if (depth > 15) {
+          depth = 15;
+        }
+        if (depth > max_depth) {
+          max_depth = depth;
+        }
+        ++depth_histo[depth];
+        ++i;
+      } else {
+        /* Compute the run length of zeros and add the appropriate number of 0
+           and 17 code length codes to the code length code histogram. */
+        uint32_t reps = 1;
+        size_t k;
+        for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
+          ++reps;
+        }
+        i += reps;
+        if (i == data_size) {
+          /* Don't add any cost for the last zero run, since these are encoded
+             only implicitly. */
+          break;
+        }
+        if (reps < 3) {
+          depth_histo[0] += reps;
+        } else {
+          reps -= 2;
+          while (reps > 0) {
+            ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
+            /* Add the 3 extra bits for the 17 code length code. */
+            bits += 3;
+            reps >>= 3;
+          }
+        }
+      }
+    }
+    /* Add the estimated encoding cost of the code length code histogram. */
+    bits += (double)(18 + 2 * max_depth);
+    /* Add the entropy of the code length code histogram. */
+    bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
+  }
+  return bits;
+}
+
+#undef HistogramType
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_encoder_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_encoder_inc.h
new file mode 100755
index 0000000000..8cbd5eac67
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_encoder_inc.h
@@ -0,0 +1,34 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Creates entropy codes for all block types and stores them to the bit
+   stream. */
+static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
+    const HistogramType* histograms, const size_t histograms_size,
+    const size_t alphabet_size, HuffmanTree* tree,
+    size_t* storage_ix, uint8_t* storage) {
+  const size_t table_size = histograms_size * self->histogram_length_;
+  self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
+  self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  {
+    size_t i;
+    for (i = 0; i < histograms_size; ++i) {
+      size_t ix = i * self->histogram_length_;
+      BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_,
+          alphabet_size, tree, &self->depths_[ix], &self->bits_[ix],
+          storage_ix, storage);
+    }
+  }
+}
+
+#undef HistogramType
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter.c
new file mode 100755
index 0000000000..d308eca59d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter.c
@@ -0,0 +1,194 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Block split point selection utilities. */
+
+#include "./block_splitter.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/platform.h"
+#include "./bit_cost.h"
+#include "./cluster.h"
+#include "./command.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const size_t kMaxLiteralHistograms = 100;
+static const size_t kMaxCommandHistograms = 50;
+static const double kLiteralBlockSwitchCost = 28.1;
+static const double kCommandBlockSwitchCost = 13.5;
+static const double kDistanceBlockSwitchCost = 14.6;
+static const size_t kLiteralStrideLength = 70;
+static const size_t kCommandStrideLength = 40;
+static const size_t kSymbolsPerLiteralHistogram = 544;
+static const size_t kSymbolsPerCommandHistogram = 530;
+static const size_t kSymbolsPerDistanceHistogram = 544;
+static const size_t kMinLengthForBlockSplitting = 128;
+static const size_t kIterMulForRefining = 2;
+static const size_t kMinItersForRefining = 100;
+
+static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
+  /* Count how many we have. */
+  size_t total_length = 0;
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
+    total_length += cmds[i].insert_len_;
+  }
+  return total_length;
+}
+
+static void CopyLiteralsToByteArray(const Command* cmds,
+                                    const size_t num_commands,
+                                    const uint8_t* data,
+                                    const size_t offset,
+                                    const size_t mask,
+                                    uint8_t* literals) {
+  size_t pos = 0;
+  size_t from_pos = offset & mask;
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
+    size_t insert_len = cmds[i].insert_len_;
+    if (from_pos + insert_len > mask) {
+      size_t head_size = mask + 1 - from_pos;
+      memcpy(literals + pos, data + from_pos, head_size);
+      from_pos = 0;
+      pos += head_size;
+      insert_len -= head_size;
+    }
+    if (insert_len > 0) {
+      memcpy(literals + pos, data + from_pos, insert_len);
+      pos += insert_len;
+    }
+    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
+  }
+}
+
+static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
+  /* Initial seed should be 7. In this case, loop length is (1 << 29). */
+  *seed *= 16807U;
+  return *seed;
+}
+
+static BROTLI_INLINE double BitCost(size_t count) {
+  return count == 0 ? -2.0 : FastLog2(count);
+}
+
+#define HISTOGRAMS_PER_BATCH 64
+#define CLUSTERS_PER_BATCH 16
+
+#define FN(X) X ## Literal
+#define DataType uint8_t
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+#define FN(X) X ## Command
+#define DataType uint16_t
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef FN
+
+#define FN(X) X ## Distance
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+void BrotliInitBlockSplit(BlockSplit* self) {
+  self->num_types = 0;
+  self->num_blocks = 0;
+  self->types = 0;
+  self->lengths = 0;
+  self->types_alloc_size = 0;
+  self->lengths_alloc_size = 0;
+}
+
+void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
+  BROTLI_FREE(m, self->types);
+  BROTLI_FREE(m, self->lengths);
+}
+
+void BrotliSplitBlock(MemoryManager* m,
+                      const Command* cmds,
+                      const size_t num_commands,
+                      const uint8_t* data,
+                      const size_t pos,
+                      const size_t mask,
+                      const BrotliEncoderParams* params,
+                      BlockSplit* literal_split,
+                      BlockSplit* insert_and_copy_split,
+                      BlockSplit* dist_split) {
+  {
+    size_t literals_count = CountLiterals(cmds, num_commands);
+    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
+    if (BROTLI_IS_OOM(m)) return;
+    /* Create a continuous array of literals. */
+    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
+    /* Create the block split on the array of literals.
+       Literal histograms have alphabet size 256. */
+    SplitByteVectorLiteral(
+        m, literals, literals_count,
+        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
+        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
+        literal_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, literals);
+  }
+
+  {
+    /* Compute prefix codes for commands. */
+    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < num_commands; ++i) {
+      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
+    }
+    /* Create the block split on the array of command prefixes. */
+    SplitByteVectorCommand(
+        m, insert_and_copy_codes, num_commands,
+        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
+        kCommandStrideLength, kCommandBlockSwitchCost, params,
+        insert_and_copy_split);
+    if (BROTLI_IS_OOM(m)) return;
+    /* TODO: reuse for distances? */
+    BROTLI_FREE(m, insert_and_copy_codes);
+  }
+
+  {
+    /* Create a continuous array of distance prefixes. */
+    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t j = 0;
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < num_commands; ++i) {
+      const Command* cmd = &cmds[i];
+      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+        distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
+      }
+    }
+    /* Create the block split on the array of distance prefixes. */
+    SplitByteVectorDistance(
+        m, distance_prefixes, j,
+        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
+        kCommandStrideLength, kDistanceBlockSwitchCost, params,
+        dist_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, distance_prefixes);
+  }
+}
+
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter.h
new file mode 100755
index 0000000000..a5e006c4b3
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter.h
@@ -0,0 +1,51 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Block split point selection utilities. */
+
+#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
+#define BROTLI_ENC_BLOCK_SPLITTER_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BlockSplit {
+  size_t num_types;  /* Amount of distinct types */
+  size_t num_blocks;  /* Amount of values in types and length */
+  uint8_t* types;
+  uint32_t* lengths;
+
+  size_t types_alloc_size;
+  size_t lengths_alloc_size;
+} BlockSplit;
+
+BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
+BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
+                                             BlockSplit* self);
+
+BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
+                                      const Command* cmds,
+                                      const size_t num_commands,
+                                      const uint8_t* data,
+                                      const size_t offset,
+                                      const size_t mask,
+                                      const BrotliEncoderParams* params,
+                                      BlockSplit* literal_split,
+                                      BlockSplit* insert_and_copy_split,
+                                      BlockSplit* dist_split);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter_inc.h
new file mode 100755
index 0000000000..023712b84d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/block_splitter_inc.h
@@ -0,0 +1,431 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, DataType */
+
+#define HistogramType FN(Histogram)
+
+static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
+                                    size_t stride,
+                                    size_t num_histograms,
+                                    HistogramType* histograms) {
+  uint32_t seed = 7;
+  size_t block_length = length / num_histograms;
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    size_t pos = length * i / num_histograms;
+    if (i != 0) {
+      pos += MyRand(&seed) % block_length;
+    }
+    if (pos + stride >= length) {
+      pos = length - stride - 1;
+    }
+    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
+  }
+}
+
+static void FN(RandomSample)(uint32_t* seed,
+                             const DataType* data,
+                             size_t length,
+                             size_t stride,
+                             HistogramType* sample) {
+  size_t pos = 0;
+  if (stride >= length) {
+    stride = length;
+  } else {
+    pos = MyRand(seed) % (length - stride + 1);
+  }
+  FN(HistogramAddVector)(sample, data + pos, stride);
+}
+
+static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
+                                   size_t stride,
+                                   size_t num_histograms,
+                                   HistogramType* histograms) {
+  size_t iters =
+      kIterMulForRefining * length / stride + kMinItersForRefining;
+  uint32_t seed = 7;
+  size_t iter;
+  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
+  for (iter = 0; iter < iters; ++iter) {
+    HistogramType sample;
+    FN(HistogramClear)(&sample);
+    FN(RandomSample)(&seed, data, length, stride, &sample);
+    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
+  }
+}
+
+/* Assigns a block id from the range [0, num_histograms) to each data element
+   in data[0..length) and fills in block_id[0..length) with the assigned values.
+   Returns the number of blocks, i.e. one plus the number of block switches. */
+static size_t FN(FindBlocks)(const DataType* data, const size_t length,
+                             const double block_switch_bitcost,
+                             const size_t num_histograms,
+                             const HistogramType* histograms,
+                             double* insert_cost,
+                             double* cost,
+                             uint8_t* switch_signal,
+                             uint8_t* block_id) {
+  const size_t data_size = FN(HistogramDataSize)();
+  const size_t bitmaplen = (num_histograms + 7) >> 3;
+  size_t num_blocks = 1;
+  size_t i;
+  size_t j;
+  BROTLI_DCHECK(num_histograms <= 256);
+  if (num_histograms <= 1) {
+    for (i = 0; i < length; ++i) {
+      block_id[i] = 0;
+    }
+    return 1;
+  }
+  memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
+  }
+  for (i = data_size; i != 0;) {
+    --i;
+    for (j = 0; j < num_histograms; ++j) {
+      insert_cost[i * num_histograms + j] =
+          insert_cost[j] - BitCost(histograms[j].data_[i]);
+    }
+  }
+  memset(cost, 0, sizeof(cost[0]) * num_histograms);
+  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
+  /* After each iteration of this loop, cost[k] will contain the difference
+     between the minimum cost of arriving at the current byte position using
+     entropy code k, and the minimum cost of arriving at the current byte
+     position. This difference is capped at the block switch cost, and if it
+     reaches block switch cost, it means that when we trace back from the last
+     position, we need to switch here. */
+  for (i = 0; i < length; ++i) {
+    const size_t byte_ix = i;
+    size_t ix = byte_ix * bitmaplen;
+    size_t insert_cost_ix = data[byte_ix] * num_histograms;
+    double min_cost = 1e99;
+    double block_switch_cost = block_switch_bitcost;
+    size_t k;
+    for (k = 0; k < num_histograms; ++k) {
+      /* We are coding the symbol in data[byte_ix] with entropy code k. */
+      cost[k] += insert_cost[insert_cost_ix + k];
+      if (cost[k] < min_cost) {
+        min_cost = cost[k];
+        block_id[byte_ix] = (uint8_t)k;
+      }
+    }
+    /* More blocks for the beginning. */
+    if (byte_ix < 2000) {
+      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
+    }
+    for (k = 0; k < num_histograms; ++k) {
+      cost[k] -= min_cost;
+      if (cost[k] >= block_switch_cost) {
+        const uint8_t mask = (uint8_t)(1u << (k & 7));
+        cost[k] = block_switch_cost;
+        BROTLI_DCHECK((k >> 3) < bitmaplen);
+        switch_signal[ix + (k >> 3)] |= mask;
+      }
+    }
+  }
+  {  /* Trace back from the last position and switch at the marked places. */
+    size_t byte_ix = length - 1;
+    size_t ix = byte_ix * bitmaplen;
+    uint8_t cur_id = block_id[byte_ix];
+    while (byte_ix > 0) {
+      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
+      BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmaplen);
+      --byte_ix;
+      ix -= bitmaplen;
+      if (switch_signal[ix + (cur_id >> 3)] & mask) {
+        if (cur_id != block_id[byte_ix]) {
+          cur_id = block_id[byte_ix];
+          ++num_blocks;
+        }
+      }
+      block_id[byte_ix] = cur_id;
+    }
+  }
+  return num_blocks;
+}
+
+static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
+                                uint16_t* new_id, const size_t num_histograms) {
+  static const uint16_t kInvalidId = 256;
+  uint16_t next_id = 0;
+  size_t i;
+  for (i = 0; i < num_histograms; ++i) {
+    new_id[i] = kInvalidId;
+  }
+  for (i = 0; i < length; ++i) {
+    BROTLI_DCHECK(block_ids[i] < num_histograms);
+    if (new_id[block_ids[i]] == kInvalidId) {
+      new_id[block_ids[i]] = next_id++;
+    }
+  }
+  for (i = 0; i < length; ++i) {
+    block_ids[i] = (uint8_t)new_id[block_ids[i]];
+    BROTLI_DCHECK(block_ids[i] < num_histograms);
+  }
+  BROTLI_DCHECK(next_id <= num_histograms);
+  return next_id;
+}
+
+static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
+                                     const uint8_t* block_ids,
+                                     const size_t num_histograms,
+                                     HistogramType* histograms) {
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < length; ++i) {
+    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
+  }
+}
+
+static void FN(ClusterBlocks)(MemoryManager* m,
+                              const DataType* data, const size_t length,
+                              const size_t num_blocks,
+                              uint8_t* block_ids,
+                              BlockSplit* split) {
+  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
+      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
+  size_t all_histograms_size = 0;
+  size_t all_histograms_capacity = expected_num_clusters;
+  HistogramType* all_histograms =
+      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
+  size_t cluster_size_size = 0;
+  size_t cluster_size_capacity = expected_num_clusters;
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
+  size_t num_clusters = 0;
+  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
+      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
+  size_t max_num_pairs =
+      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
+  size_t pairs_capacity = max_num_pairs + 1;
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
+  size_t pos = 0;
+  uint32_t* clusters;
+  size_t num_final_clusters;
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index;
+  size_t i;
+  uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 };
+  uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 };
+  uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 };
+  uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 };
+
+  if (BROTLI_IS_OOM(m)) return;
+
+  memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
+
+  {
+    size_t block_idx = 0;
+    for (i = 0; i < length; ++i) {
+      BROTLI_DCHECK(block_idx < num_blocks);
+      ++block_lengths[block_idx];
+      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
+        ++block_idx;
+      }
+    }
+    BROTLI_DCHECK(block_idx == num_blocks);
+  }
+
+  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
+    const size_t num_to_combine =
+        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      size_t k;
+      FN(HistogramClear)(&histograms[j]);
+      for (k = 0; k < block_lengths[i + j]; ++k) {
+        FN(HistogramAdd)(&histograms[j], data[pos++]);
+      }
+      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
+      new_clusters[j] = (uint32_t)j;
+      symbols[j] = (uint32_t)j;
+      sizes[j] = 1;
+    }
+    num_new_clusters = FN(BrotliHistogramCombine)(
+        histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
+        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
+    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
+        all_histograms_capacity, all_histograms_size + num_new_clusters);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
+        cluster_size_capacity, cluster_size_size + num_new_clusters);
+    if (BROTLI_IS_OOM(m)) return;
+    for (j = 0; j < num_new_clusters; ++j) {
+      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
+      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
+      remap[new_clusters[j]] = (uint32_t)j;
+    }
+    for (j = 0; j < num_to_combine; ++j) {
+      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
+    }
+    num_clusters += num_new_clusters;
+    BROTLI_DCHECK(num_clusters == cluster_size_size);
+    BROTLI_DCHECK(num_clusters == all_histograms_size);
+  }
+  BROTLI_FREE(m, histograms);
+
+  max_num_pairs =
+      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
+  if (pairs_capacity < max_num_pairs + 1) {
+    BROTLI_FREE(m, pairs);
+    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+
+  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < num_clusters; ++i) {
+    clusters[i] = (uint32_t)i;
+  }
+  num_final_clusters = FN(BrotliHistogramCombine)(
+      all_histograms, cluster_size, histogram_symbols, clusters, pairs,
+      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
+      max_num_pairs);
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+
+  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
+  pos = 0;
+  {
+    uint32_t next_index = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      HistogramType histo;
+      size_t j;
+      uint32_t best_out;
+      double best_bits;
+      FN(HistogramClear)(&histo);
+      for (j = 0; j < block_lengths[i]; ++j) {
+        FN(HistogramAdd)(&histo, data[pos++]);
+      }
+      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
+      best_bits =
+          FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
+      for (j = 0; j < num_final_clusters; ++j) {
+        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
+            &histo, &all_histograms[clusters[j]]);
+        if (cur_bits < best_bits) {
+          best_bits = cur_bits;
+          best_out = clusters[j];
+        }
+      }
+      histogram_symbols[i] = best_out;
+      if (new_index[best_out] == kInvalidIndex) {
+        new_index[best_out] = next_index++;
+      }
+    }
+  }
+  BROTLI_FREE(m, clusters);
+  BROTLI_FREE(m, all_histograms);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  {
+    uint32_t cur_length = 0;
+    size_t block_idx = 0;
+    uint8_t max_type = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      cur_length += block_lengths[i];
+      if (i + 1 == num_blocks ||
+          histogram_symbols[i] != histogram_symbols[i + 1]) {
+        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
+        split->types[block_idx] = id;
+        split->lengths[block_idx] = cur_length;
+        max_type = BROTLI_MAX(uint8_t, max_type, id);
+        cur_length = 0;
+        ++block_idx;
+      }
+    }
+    split->num_blocks = block_idx;
+    split->num_types = (size_t)max_type + 1;
+  }
+  BROTLI_FREE(m, new_index);
+  BROTLI_FREE(m, block_lengths);
+  BROTLI_FREE(m, histogram_symbols);
+}
+
+static void FN(SplitByteVector)(MemoryManager* m,
+                                const DataType* data, const size_t length,
+                                const size_t literals_per_histogram,
+                                const size_t max_histograms,
+                                const size_t sampling_stride_length,
+                                const double block_switch_cost,
+                                const BrotliEncoderParams* params,
+                                BlockSplit* split) {
+  const size_t data_size = FN(HistogramDataSize)();
+  size_t num_histograms = length / literals_per_histogram + 1;
+  HistogramType* histograms;
+  if (num_histograms > max_histograms) {
+    num_histograms = max_histograms;
+  }
+  if (length == 0) {
+    split->num_types = 1;
+    return;
+  } else if (length < kMinLengthForBlockSplitting) {
+    BROTLI_ENSURE_CAPACITY(m, uint8_t,
+        split->types, split->types_alloc_size, split->num_blocks + 1);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t,
+        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
+    if (BROTLI_IS_OOM(m)) return;
+    split->num_types = 1;
+    split->types[split->num_blocks] = 0;
+    split->lengths[split->num_blocks] = (uint32_t)length;
+    split->num_blocks++;
+    return;
+  }
+  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
+  if (BROTLI_IS_OOM(m)) return;
+  /* Find good entropy codes. */
+  FN(InitialEntropyCodes)(data, length,
+                          sampling_stride_length,
+                          num_histograms, histograms);
+  FN(RefineEntropyCodes)(data, length,
+                         sampling_stride_length,
+                         num_histograms, histograms);
+  {
+    /* Find a good path through literals with the good entropy codes. */
+    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
+    size_t num_blocks = 0;
+    const size_t bitmaplen = (num_histograms + 7) >> 3;
+    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
+    double* cost = BROTLI_ALLOC(m, double, num_histograms);
+    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
+    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
+    const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < iters; ++i) {
+      num_blocks = FN(FindBlocks)(data, length,
+                                  block_switch_cost,
+                                  num_histograms, histograms,
+                                  insert_cost, cost, switch_signal,
+                                  block_ids);
+      num_histograms = FN(RemapBlockIds)(block_ids, length,
+                                         new_id, num_histograms);
+      FN(BuildBlockHistograms)(data, length, block_ids,
+                               num_histograms, histograms);
+    }
+    BROTLI_FREE(m, insert_cost);
+    BROTLI_FREE(m, cost);
+    BROTLI_FREE(m, switch_signal);
+    BROTLI_FREE(m, new_id);
+    BROTLI_FREE(m, histograms);
+    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, block_ids);
+  }
+}
+
+#undef HistogramType
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/brotli_bit_stream.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/brotli_bit_stream.c
new file mode 100755
index 0000000000..aaf2dad7db
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/brotli_bit_stream.c
@@ -0,0 +1,1331 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Brotli bit stream functions to support the low level format. There are no
+   compression algorithms here, just the right ordering of bits to match the
+   specs. */
+
+#include "./brotli_bit_stream.h"
+
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./entropy_encode.h"
+#include "./entropy_encode_static.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_HUFFMAN_TREE_SIZE (2 * BROTLI_NUM_COMMAND_SYMBOLS + 1)
+/* The maximum size of Huffman dictionary for distances assuming that
+   NPOSTFIX = 0 and NDIRECT = 0. */
+#define MAX_SIMPLE_DISTANCE_ALPHABET_SIZE \
+  BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_LARGE_MAX_DISTANCE_BITS)
+/* MAX_SIMPLE_DISTANCE_ALPHABET_SIZE == 140 */
+
+/* Represents the range of values belonging to a prefix code:
+   [offset, offset + 2^nbits) */
+typedef struct PrefixCodeRange {
+  uint32_t offset;
+  uint32_t nbits;
+} PrefixCodeRange;
+
+static const PrefixCodeRange
+    kBlockLengthPrefixCode[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
+  { 1, 2}, { 5, 2}, { 9, 2}, {13, 2}, {17, 3}, { 25, 3}, { 33, 3},
+  {41, 3}, {49, 4}, {65, 4}, {81, 4}, {97, 4}, {113, 5}, {145, 5},
+  {177, 5}, { 209,  5}, { 241,  6}, { 305,  6}, { 369,  7}, {  497,  8},
+  {753, 9}, {1265, 10}, {2289, 11}, {4337, 12}, {8433, 13}, {16625, 24}
+};
+
+static BROTLI_INLINE uint32_t BlockLengthPrefixCode(uint32_t len) {
+  uint32_t code = (len >= 177) ? (len >= 753 ? 20 : 14) : (len >= 41 ? 7 : 0);
+  while (code < (BROTLI_NUM_BLOCK_LEN_SYMBOLS - 1) &&
+      len >= kBlockLengthPrefixCode[code + 1].offset) ++code;
+  return code;
+}
+
+static BROTLI_INLINE void GetBlockLengthPrefixCode(uint32_t len, size_t* code,
+    uint32_t* n_extra, uint32_t* extra) {
+  *code = BlockLengthPrefixCode(len);
+  *n_extra = kBlockLengthPrefixCode[*code].nbits;
+  *extra = len - kBlockLengthPrefixCode[*code].offset;
+}
+
+typedef struct BlockTypeCodeCalculator {
+  size_t last_type;
+  size_t second_last_type;
+} BlockTypeCodeCalculator;
+
+static void InitBlockTypeCodeCalculator(BlockTypeCodeCalculator* self) {
+  self->last_type = 1;
+  self->second_last_type = 0;
+}
+
+static BROTLI_INLINE size_t NextBlockTypeCode(
+    BlockTypeCodeCalculator* calculator, uint8_t type) {
+  size_t type_code = (type == calculator->last_type + 1) ? 1u :
+      (type == calculator->second_last_type) ? 0u : type + 2u;
+  calculator->second_last_type = calculator->last_type;
+  calculator->last_type = type;
+  return type_code;
+}
+
+/* |nibblesbits| represents the 2 bits to encode MNIBBLES (0-3)
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+static void BrotliEncodeMlen(size_t length, uint64_t* bits,
+                             size_t* numbits, uint64_t* nibblesbits) {
+  size_t lg = (length == 1) ? 1 : Log2FloorNonZero((uint32_t)(length - 1)) + 1;
+  size_t mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
+  BROTLI_DCHECK(length > 0);
+  BROTLI_DCHECK(length <= (1 << 24));
+  BROTLI_DCHECK(lg <= 24);
+  *nibblesbits = mnibbles - 4;
+  *numbits = mnibbles * 4;
+  *bits = length - 1;
+}
+
+static BROTLI_INLINE void StoreCommandExtra(
+    const Command* cmd, size_t* storage_ix, uint8_t* storage) {
+  uint32_t copylen_code = CommandCopyLenCode(cmd);
+  uint16_t inscode = GetInsertLengthCode(cmd->insert_len_);
+  uint16_t copycode = GetCopyLengthCode(copylen_code);
+  uint32_t insnumextra = GetInsertExtra(inscode);
+  uint64_t insextraval = cmd->insert_len_ - GetInsertBase(inscode);
+  uint64_t copyextraval = copylen_code - GetCopyBase(copycode);
+  uint64_t bits = (copyextraval << insnumextra) | insextraval;
+  BrotliWriteBits(
+      insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
+}
+
+/* Data structure that stores almost everything that is needed to encode each
+   block switch command. */
+typedef struct BlockSplitCode {
+  BlockTypeCodeCalculator type_code_calculator;
+  uint8_t type_depths[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint16_t type_bits[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint8_t length_depths[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+  uint16_t length_bits[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+} BlockSplitCode;
+
+/* Stores a number between 0 and 255. */
+static void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
+  if (n == 0) {
+    BrotliWriteBits(1, 0, storage_ix, storage);
+  } else {
+    size_t nbits = Log2FloorNonZero(n);
+    BrotliWriteBits(1, 1, storage_ix, storage);
+    BrotliWriteBits(3, nbits, storage_ix, storage);
+    BrotliWriteBits(nbits, n - ((size_t)1 << nbits), storage_ix, storage);
+  }
+}
+
+/* Stores the compressed meta-block header.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+static void StoreCompressedMetaBlockHeader(BROTLI_BOOL is_final_block,
+                                           size_t length,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  uint64_t lenbits;
+  size_t nlenbits;
+  uint64_t nibblesbits;
+
+  /* Write ISLAST bit. */
+  BrotliWriteBits(1, (uint64_t)is_final_block, storage_ix, storage);
+  /* Write ISEMPTY bit. */
+  if (is_final_block) {
+    BrotliWriteBits(1, 0, storage_ix, storage);
+  }
+
+  BrotliEncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
+  BrotliWriteBits(2, nibblesbits, storage_ix, storage);
+  BrotliWriteBits(nlenbits, lenbits, storage_ix, storage);
+
+  if (!is_final_block) {
+    /* Write ISUNCOMPRESSED bit. */
+    BrotliWriteBits(1, 0, storage_ix, storage);
+  }
+}
+
+/* Stores the uncompressed meta-block header.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+static void BrotliStoreUncompressedMetaBlockHeader(size_t length,
+                                                   size_t* storage_ix,
+                                                   uint8_t* storage) {
+  uint64_t lenbits;
+  size_t nlenbits;
+  uint64_t nibblesbits;
+
+  /* Write ISLAST bit.
+     Uncompressed block cannot be the last one, so set to 0. */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  BrotliEncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
+  BrotliWriteBits(2, nibblesbits, storage_ix, storage);
+  BrotliWriteBits(nlenbits, lenbits, storage_ix, storage);
+  /* Write ISUNCOMPRESSED bit. */
+  BrotliWriteBits(1, 1, storage_ix, storage);
+}
+
+static void BrotliStoreHuffmanTreeOfHuffmanTreeToBitMask(
+    const int num_codes, const uint8_t* code_length_bitdepth,
+    size_t* storage_ix, uint8_t* storage) {
+  static const uint8_t kStorageOrder[BROTLI_CODE_LENGTH_CODES] = {
+    1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
+  };
+  /* The bit lengths of the Huffman code over the code length alphabet
+     are compressed with the following static Huffman code:
+       Symbol   Code
+       ------   ----
+       0          00
+       1        1110
+       2         110
+       3          01
+       4          10
+       5        1111 */
+  static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {
+     0, 7, 3, 2, 1, 15
+  };
+  static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {
+    2, 4, 3, 2, 2, 4
+  };
+
+  size_t skip_some = 0;  /* skips none. */
+
+  /* Throw away trailing zeros: */
+  size_t codes_to_store = BROTLI_CODE_LENGTH_CODES;
+  if (num_codes > 1) {
+    for (; codes_to_store > 0; --codes_to_store) {
+      if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+        break;
+      }
+    }
+  }
+  if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+      code_length_bitdepth[kStorageOrder[1]] == 0) {
+    skip_some = 2;  /* skips two. */
+    if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+      skip_some = 3;  /* skips three. */
+    }
+  }
+  BrotliWriteBits(2, skip_some, storage_ix, storage);
+  {
+    size_t i;
+    for (i = skip_some; i < codes_to_store; ++i) {
+      size_t l = code_length_bitdepth[kStorageOrder[i]];
+      BrotliWriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
+          kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
+    }
+  }
+}
+
+static void BrotliStoreHuffmanTreeToBitMask(
+    const size_t huffman_tree_size, const uint8_t* huffman_tree,
+    const uint8_t* huffman_tree_extra_bits, const uint8_t* code_length_bitdepth,
+    const uint16_t* code_length_bitdepth_symbols,
+    size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage) {
+  size_t i;
+  for (i = 0; i < huffman_tree_size; ++i) {
+    size_t ix = huffman_tree[i];
+    BrotliWriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
+                    storage_ix, storage);
+    /* Extra bits */
+    switch (ix) {
+      case BROTLI_REPEAT_PREVIOUS_CODE_LENGTH:
+        BrotliWriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
+        break;
+      case BROTLI_REPEAT_ZERO_CODE_LENGTH:
+        BrotliWriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage);
+        break;
+    }
+  }
+}
+
+static void StoreSimpleHuffmanTree(const uint8_t* depths,
+                                   size_t symbols[4],
+                                   size_t num_symbols,
+                                   size_t max_bits,
+                                   size_t* storage_ix, uint8_t* storage) {
+  /* value of 1 indicates a simple Huffman code */
+  BrotliWriteBits(2, 1, storage_ix, storage);
+  BrotliWriteBits(2, num_symbols - 1, storage_ix, storage);  /* NSYM - 1 */
+
+  {
+    /* Sort */
+    size_t i;
+    for (i = 0; i < num_symbols; i++) {
+      size_t j;
+      for (j = i + 1; j < num_symbols; j++) {
+        if (depths[symbols[j]] < depths[symbols[i]]) {
+          BROTLI_SWAP(size_t, symbols, j, i);
+        }
+      }
+    }
+  }
+
+  if (num_symbols == 2) {
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+  } else if (num_symbols == 3) {
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+  } else {
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[3], storage_ix, storage);
+    /* tree-select */
+    BrotliWriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
+  }
+}
+
+/* num = alphabet size
+   depths = symbol depths */
+void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
+                            HuffmanTree* tree,
+                            size_t* storage_ix, uint8_t* storage) {
+  /* Write the Huffman tree into the brotli-representation.
+     The command alphabet is the largest, so this allocation will fit all
+     alphabets. */
+  uint8_t huffman_tree[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint8_t huffman_tree_extra_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+  size_t huffman_tree_size = 0;
+  uint8_t code_length_bitdepth[BROTLI_CODE_LENGTH_CODES] = { 0 };
+  uint16_t code_length_bitdepth_symbols[BROTLI_CODE_LENGTH_CODES];
+  uint32_t huffman_tree_histogram[BROTLI_CODE_LENGTH_CODES] = { 0 };
+  size_t i;
+  int num_codes = 0;
+  size_t code = 0;
+
+  BROTLI_DCHECK(num <= BROTLI_NUM_COMMAND_SYMBOLS);
+
+  BrotliWriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
+                         huffman_tree_extra_bits);
+
+  /* Calculate the statistics of the Huffman tree in brotli-representation. */
+  for (i = 0; i < huffman_tree_size; ++i) {
+    ++huffman_tree_histogram[huffman_tree[i]];
+  }
+
+  for (i = 0; i < BROTLI_CODE_LENGTH_CODES; ++i) {
+    if (huffman_tree_histogram[i]) {
+      if (num_codes == 0) {
+        code = i;
+        num_codes = 1;
+      } else if (num_codes == 1) {
+        num_codes = 2;
+        break;
+      }
+    }
+  }
+
+  /* Calculate another Huffman tree to use for compressing both the
+     earlier Huffman tree with. */
+  BrotliCreateHuffmanTree(huffman_tree_histogram, BROTLI_CODE_LENGTH_CODES,
+                          5, tree, code_length_bitdepth);
+  BrotliConvertBitDepthsToSymbols(code_length_bitdepth,
+                                  BROTLI_CODE_LENGTH_CODES,
+                                  code_length_bitdepth_symbols);
+
+  /* Now, we have all the data, let's start storing it */
+  BrotliStoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
+                                               storage_ix, storage);
+
+  if (num_codes == 1) {
+    code_length_bitdepth[code] = 0;
+  }
+
+  /* Store the real Huffman tree now. */
+  BrotliStoreHuffmanTreeToBitMask(huffman_tree_size,
+                                  huffman_tree,
+                                  huffman_tree_extra_bits,
+                                  code_length_bitdepth,
+                                  code_length_bitdepth_symbols,
+                                  storage_ix, storage);
+}
+
+/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
+   bits[0:length] and stores the encoded tree to the bit stream. */
+static void BuildAndStoreHuffmanTree(const uint32_t* histogram,
+                                     const size_t histogram_length,
+                                     const size_t alphabet_size,
+                                     HuffmanTree* tree,
+                                     uint8_t* depth,
+                                     uint16_t* bits,
+                                     size_t* storage_ix,
+                                     uint8_t* storage) {
+  size_t count = 0;
+  size_t s4[4] = { 0 };
+  size_t i;
+  size_t max_bits = 0;
+  for (i = 0; i < histogram_length; i++) {
+    if (histogram[i]) {
+      if (count < 4) {
+        s4[count] = i;
+      } else if (count > 4) {
+        break;
+      }
+      count++;
+    }
+  }
+
+  {
+    size_t max_bits_counter = alphabet_size - 1;
+    while (max_bits_counter) {
+      max_bits_counter >>= 1;
+      ++max_bits;
+    }
+  }
+
+  if (count <= 1) {
+    BrotliWriteBits(4, 1, storage_ix, storage);
+    BrotliWriteBits(max_bits, s4[0], storage_ix, storage);
+    depth[s4[0]] = 0;
+    bits[s4[0]] = 0;
+    return;
+  }
+
+  memset(depth, 0, histogram_length * sizeof(depth[0]));
+  BrotliCreateHuffmanTree(histogram, histogram_length, 15, tree, depth);
+  BrotliConvertBitDepthsToSymbols(depth, histogram_length, bits);
+
+  if (count <= 4) {
+    StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
+  } else {
+    BrotliStoreHuffmanTree(depth, histogram_length, tree, storage_ix, storage);
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
+    const HuffmanTree* v0, const HuffmanTree* v1) {
+  return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
+}
+
+void BrotliBuildAndStoreHuffmanTreeFast(MemoryManager* m,
+                                        const uint32_t* histogram,
+                                        const size_t histogram_total,
+                                        const size_t max_bits,
+                                        uint8_t* depth, uint16_t* bits,
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
+  size_t count = 0;
+  size_t symbols[4] = { 0 };
+  size_t length = 0;
+  size_t total = histogram_total;
+  while (total != 0) {
+    if (histogram[length]) {
+      if (count < 4) {
+        symbols[count] = length;
+      }
+      ++count;
+      total -= histogram[length];
+    }
+    ++length;
+  }
+
+  if (count <= 1) {
+    BrotliWriteBits(4, 1, storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    depth[symbols[0]] = 0;
+    bits[symbols[0]] = 0;
+    return;
+  }
+
+  memset(depth, 0, length * sizeof(depth[0]));
+  {
+    const size_t max_tree_size = 2 * length + 1;
+    HuffmanTree* tree = BROTLI_ALLOC(m, HuffmanTree, max_tree_size);
+    uint32_t count_limit;
+    if (BROTLI_IS_OOM(m)) return;
+    for (count_limit = 1; ; count_limit *= 2) {
+      HuffmanTree* node = tree;
+      size_t l;
+      for (l = length; l != 0;) {
+        --l;
+        if (histogram[l]) {
+          if (BROTLI_PREDICT_TRUE(histogram[l] >= count_limit)) {
+            InitHuffmanTree(node, histogram[l], -1, (int16_t)l);
+          } else {
+            InitHuffmanTree(node, count_limit, -1, (int16_t)l);
+          }
+          ++node;
+        }
+      }
+      {
+        const int n = (int)(node - tree);
+        HuffmanTree sentinel;
+        int i = 0;      /* Points to the next leaf node. */
+        int j = n + 1;  /* Points to the next non-leaf node. */
+        int k;
+
+        SortHuffmanTreeItems(tree, (size_t)n, SortHuffmanTree);
+        /* The nodes are:
+           [0, n): the sorted leaf nodes that we start with.
+           [n]: we add a sentinel here.
+           [n + 1, 2n): new parent nodes are added here, starting from
+                        (n+1). These are naturally in ascending order.
+           [2n]: we add a sentinel at the end as well.
+           There will be (2n+1) elements at the end. */
+        InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
+        *node++ = sentinel;
+        *node++ = sentinel;
+
+        for (k = n - 1; k > 0; --k) {
+          int left, right;
+          if (tree[i].total_count_ <= tree[j].total_count_) {
+            left = i;
+            ++i;
+          } else {
+            left = j;
+            ++j;
+          }
+          if (tree[i].total_count_ <= tree[j].total_count_) {
+            right = i;
+            ++i;
+          } else {
+            right = j;
+            ++j;
+          }
+          /* The sentinel node becomes the parent node. */
+          node[-1].total_count_ =
+              tree[left].total_count_ + tree[right].total_count_;
+          node[-1].index_left_ = (int16_t)left;
+          node[-1].index_right_or_value_ = (int16_t)right;
+          /* Add back the last sentinel node. */
+          *node++ = sentinel;
+        }
+        if (BrotliSetDepth(2 * n - 1, tree, depth, 14)) {
+          /* We need to pack the Huffman tree in 14 bits. If this was not
+             successful, add fake entities to the lowest values and retry. */
+          break;
+        }
+      }
+    }
+    BROTLI_FREE(m, tree);
+  }
+  BrotliConvertBitDepthsToSymbols(depth, length, bits);
+  if (count <= 4) {
+    size_t i;
+    /* value of 1 indicates a simple Huffman code */
+    BrotliWriteBits(2, 1, storage_ix, storage);
+    BrotliWriteBits(2, count - 1, storage_ix, storage);  /* NSYM - 1 */
+
+    /* Sort */
+    for (i = 0; i < count; i++) {
+      size_t j;
+      for (j = i + 1; j < count; j++) {
+        if (depth[symbols[j]] < depth[symbols[i]]) {
+          BROTLI_SWAP(size_t, symbols, j, i);
+        }
+      }
+    }
+
+    if (count == 2) {
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    } else if (count == 3) {
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+    } else {
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[3], storage_ix, storage);
+      /* tree-select */
+      BrotliWriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
+    }
+  } else {
+    uint8_t previous_value = 8;
+    size_t i;
+    /* Complex Huffman Tree */
+    StoreStaticCodeLengthCode(storage_ix, storage);
+
+    /* Actual RLE coding. */
+    for (i = 0; i < length;) {
+      const uint8_t value = depth[i];
+      size_t reps = 1;
+      size_t k;
+      for (k = i + 1; k < length && depth[k] == value; ++k) {
+        ++reps;
+      }
+      i += reps;
+      if (value == 0) {
+        BrotliWriteBits(kZeroRepsDepth[reps], kZeroRepsBits[reps],
+                        storage_ix, storage);
+      } else {
+        if (previous_value != value) {
+          BrotliWriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
+                          storage_ix, storage);
+          --reps;
+        }
+        if (reps < 3) {
+          while (reps != 0) {
+            reps--;
+            BrotliWriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
+                            storage_ix, storage);
+          }
+        } else {
+          reps -= 3;
+          BrotliWriteBits(kNonZeroRepsDepth[reps], kNonZeroRepsBits[reps],
+                          storage_ix, storage);
+        }
+        previous_value = value;
+      }
+    }
+  }
+}
+
+static size_t IndexOf(const uint8_t* v, size_t v_size, uint8_t value) {
+  size_t i = 0;
+  for (; i < v_size; ++i) {
+    if (v[i] == value) return i;
+  }
+  return i;
+}
+
+static void MoveToFront(uint8_t* v, size_t index) {
+  uint8_t value = v[index];
+  size_t i;
+  for (i = index; i != 0; --i) {
+    v[i] = v[i - 1];
+  }
+  v[0] = value;
+}
+
+static void MoveToFrontTransform(const uint32_t* BROTLI_RESTRICT v_in,
+                                 const size_t v_size,
+                                 uint32_t* v_out) {
+  size_t i;
+  uint8_t mtf[256];
+  uint32_t max_value;
+  if (v_size == 0) {
+    return;
+  }
+  max_value = v_in[0];
+  for (i = 1; i < v_size; ++i) {
+    if (v_in[i] > max_value) max_value = v_in[i];
+  }
+  BROTLI_DCHECK(max_value < 256u);
+  for (i = 0; i <= max_value; ++i) {
+    mtf[i] = (uint8_t)i;
+  }
+  {
+    size_t mtf_size = max_value + 1;
+    for (i = 0; i < v_size; ++i) {
+      size_t index = IndexOf(mtf, mtf_size, (uint8_t)v_in[i]);
+      BROTLI_DCHECK(index < mtf_size);
+      v_out[i] = (uint32_t)index;
+      MoveToFront(mtf, index);
+    }
+  }
+}
+
+/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
+   the run length plus extra bits (lower 9 bits is the prefix code and the rest
+   are the extra bits). Non-zero values in v[] are shifted by
+   *max_length_prefix. Will not create prefix codes bigger than the initial
+   value of *max_run_length_prefix. The prefix code of run length L is simply
+   Log2Floor(L) and the number of extra bits is the same as the prefix code. */
+static void RunLengthCodeZeros(const size_t in_size,
+    uint32_t* BROTLI_RESTRICT v, size_t* BROTLI_RESTRICT out_size,
+    uint32_t* BROTLI_RESTRICT max_run_length_prefix) {
+  uint32_t max_reps = 0;
+  size_t i;
+  uint32_t max_prefix;
+  for (i = 0; i < in_size;) {
+    uint32_t reps = 0;
+    for (; i < in_size && v[i] != 0; ++i) ;
+    for (; i < in_size && v[i] == 0; ++i) {
+      ++reps;
+    }
+    max_reps = BROTLI_MAX(uint32_t, reps, max_reps);
+  }
+  max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
+  max_prefix = BROTLI_MIN(uint32_t, max_prefix, *max_run_length_prefix);
+  *max_run_length_prefix = max_prefix;
+  *out_size = 0;
+  for (i = 0; i < in_size;) {
+    BROTLI_DCHECK(*out_size <= i);
+    if (v[i] != 0) {
+      v[*out_size] = v[i] + *max_run_length_prefix;
+      ++i;
+      ++(*out_size);
+    } else {
+      uint32_t reps = 1;
+      size_t k;
+      for (k = i + 1; k < in_size && v[k] == 0; ++k) {
+        ++reps;
+      }
+      i += reps;
+      while (reps != 0) {
+        if (reps < (2u << max_prefix)) {
+          uint32_t run_length_prefix = Log2FloorNonZero(reps);
+          const uint32_t extra_bits = reps - (1u << run_length_prefix);
+          v[*out_size] = run_length_prefix + (extra_bits << 9);
+          ++(*out_size);
+          break;
+        } else {
+          const uint32_t extra_bits = (1u << max_prefix) - 1u;
+          v[*out_size] = max_prefix + (extra_bits << 9);
+          reps -= (2u << max_prefix) - 1u;
+          ++(*out_size);
+        }
+      }
+    }
+  }
+}
+
+#define SYMBOL_BITS 9
+
+static void EncodeContextMap(MemoryManager* m,
+                             const uint32_t* context_map,
+                             size_t context_map_size,
+                             size_t num_clusters,
+                             HuffmanTree* tree,
+                             size_t* storage_ix, uint8_t* storage) {
+  size_t i;
+  uint32_t* rle_symbols;
+  uint32_t max_run_length_prefix = 6;
+  size_t num_rle_symbols = 0;
+  uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  static const uint32_t kSymbolMask = (1u << SYMBOL_BITS) - 1u;
+  uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+
+  StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
+
+  if (num_clusters == 1) {
+    return;
+  }
+
+  rle_symbols = BROTLI_ALLOC(m, uint32_t, context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+  MoveToFrontTransform(context_map, context_map_size, rle_symbols);
+  RunLengthCodeZeros(context_map_size, rle_symbols,
+                     &num_rle_symbols, &max_run_length_prefix);
+  memset(histogram, 0, sizeof(histogram));
+  for (i = 0; i < num_rle_symbols; ++i) {
+    ++histogram[rle_symbols[i] & kSymbolMask];
+  }
+  {
+    BROTLI_BOOL use_rle = TO_BROTLI_BOOL(max_run_length_prefix > 0);
+    BrotliWriteBits(1, (uint64_t)use_rle, storage_ix, storage);
+    if (use_rle) {
+      BrotliWriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+    }
+  }
+  BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
+                           num_clusters + max_run_length_prefix,
+                           tree, depths, bits, storage_ix, storage);
+  for (i = 0; i < num_rle_symbols; ++i) {
+    const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
+    const uint32_t extra_bits_val = rle_symbols[i] >> SYMBOL_BITS;
+    BrotliWriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
+    if (rle_symbol > 0 && rle_symbol <= max_run_length_prefix) {
+      BrotliWriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
+    }
+  }
+  BrotliWriteBits(1, 1, storage_ix, storage);  /* use move-to-front */
+  BROTLI_FREE(m, rle_symbols);
+}
+
+/* Stores the block switch command with index block_ix to the bit stream. */
+static BROTLI_INLINE void StoreBlockSwitch(BlockSplitCode* code,
+                                           const uint32_t block_len,
+                                           const uint8_t block_type,
+                                           BROTLI_BOOL is_first_block,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  size_t typecode = NextBlockTypeCode(&code->type_code_calculator, block_type);
+  size_t lencode;
+  uint32_t len_nextra;
+  uint32_t len_extra;
+  if (!is_first_block) {
+    BrotliWriteBits(code->type_depths[typecode], code->type_bits[typecode],
+                    storage_ix, storage);
+  }
+  GetBlockLengthPrefixCode(block_len, &lencode, &len_nextra, &len_extra);
+
+  BrotliWriteBits(code->length_depths[lencode], code->length_bits[lencode],
+                  storage_ix, storage);
+  BrotliWriteBits(len_nextra, len_extra, storage_ix, storage);
+}
+
+/* Builds a BlockSplitCode data structure from the block split given by the
+   vector of block types and block lengths and stores it to the bit stream. */
+static void BuildAndStoreBlockSplitCode(const uint8_t* types,
+                                        const uint32_t* lengths,
+                                        const size_t num_blocks,
+                                        const size_t num_types,
+                                        HuffmanTree* tree,
+                                        BlockSplitCode* code,
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
+  uint32_t type_histo[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint32_t length_histo[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+  size_t i;
+  BlockTypeCodeCalculator type_code_calculator;
+  memset(type_histo, 0, (num_types + 2) * sizeof(type_histo[0]));
+  memset(length_histo, 0, sizeof(length_histo));
+  InitBlockTypeCodeCalculator(&type_code_calculator);
+  for (i = 0; i < num_blocks; ++i) {
+    size_t type_code = NextBlockTypeCode(&type_code_calculator, types[i]);
+    if (i != 0) ++type_histo[type_code];
+    ++length_histo[BlockLengthPrefixCode(lengths[i])];
+  }
+  StoreVarLenUint8(num_types - 1, storage_ix, storage);
+  if (num_types > 1) {  /* TODO: else? could StoreBlockSwitch occur? */
+    BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, num_types + 2, tree,
+                             &code->type_depths[0], &code->type_bits[0],
+                             storage_ix, storage);
+    BuildAndStoreHuffmanTree(&length_histo[0], BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+                             BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+                             tree, &code->length_depths[0],
+                             &code->length_bits[0], storage_ix, storage);
+    StoreBlockSwitch(code, lengths[0], types[0], 1, storage_ix, storage);
+  }
+}
+
+/* Stores a context map where the histogram type is always the block type. */
+static void StoreTrivialContextMap(size_t num_types,
+                                   size_t context_bits,
+                                   HuffmanTree* tree,
+                                   size_t* storage_ix,
+                                   uint8_t* storage) {
+  StoreVarLenUint8(num_types - 1, storage_ix, storage);
+  if (num_types > 1) {
+    size_t repeat_code = context_bits - 1u;
+    size_t repeat_bits = (1u << repeat_code) - 1u;
+    size_t alphabet_size = num_types + repeat_code;
+    uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    size_t i;
+    memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
+    /* Write RLEMAX. */
+    BrotliWriteBits(1, 1, storage_ix, storage);
+    BrotliWriteBits(4, repeat_code - 1, storage_ix, storage);
+    histogram[repeat_code] = (uint32_t)num_types;
+    histogram[0] = 1;
+    for (i = context_bits; i < alphabet_size; ++i) {
+      histogram[i] = 1;
+    }
+    BuildAndStoreHuffmanTree(histogram, alphabet_size, alphabet_size,
+                             tree, depths, bits, storage_ix, storage);
+    for (i = 0; i < num_types; ++i) {
+      size_t code = (i == 0 ? 0 : i + context_bits - 1);
+      BrotliWriteBits(depths[code], bits[code], storage_ix, storage);
+      BrotliWriteBits(
+          depths[repeat_code], bits[repeat_code], storage_ix, storage);
+      BrotliWriteBits(repeat_code, repeat_bits, storage_ix, storage);
+    }
+    /* Write IMTF (inverse-move-to-front) bit. */
+    BrotliWriteBits(1, 1, storage_ix, storage);
+  }
+}
+
+/* Manages the encoding of one block category (literal, command or distance). */
+typedef struct BlockEncoder {
+  size_t histogram_length_;
+  size_t num_block_types_;
+  const uint8_t* block_types_;  /* Not owned. */
+  const uint32_t* block_lengths_;  /* Not owned. */
+  size_t num_blocks_;
+  BlockSplitCode block_split_code_;
+  size_t block_ix_;
+  size_t block_len_;
+  size_t entropy_ix_;
+  uint8_t* depths_;
+  uint16_t* bits_;
+} BlockEncoder;
+
+static void InitBlockEncoder(BlockEncoder* self, size_t histogram_length,
+    size_t num_block_types, const uint8_t* block_types,
+    const uint32_t* block_lengths, const size_t num_blocks) {
+  self->histogram_length_ = histogram_length;
+  self->num_block_types_ = num_block_types;
+  self->block_types_ = block_types;
+  self->block_lengths_ = block_lengths;
+  self->num_blocks_ = num_blocks;
+  InitBlockTypeCodeCalculator(&self->block_split_code_.type_code_calculator);
+  self->block_ix_ = 0;
+  self->block_len_ = num_blocks == 0 ? 0 : block_lengths[0];
+  self->entropy_ix_ = 0;
+  self->depths_ = 0;
+  self->bits_ = 0;
+}
+
+static void CleanupBlockEncoder(MemoryManager* m, BlockEncoder* self) {
+  BROTLI_FREE(m, self->depths_);
+  BROTLI_FREE(m, self->bits_);
+}
+
+/* Creates entropy codes of block lengths and block types and stores them
+   to the bit stream. */
+static void BuildAndStoreBlockSwitchEntropyCodes(BlockEncoder* self,
+    HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) {
+  BuildAndStoreBlockSplitCode(self->block_types_, self->block_lengths_,
+      self->num_blocks_, self->num_block_types_, tree, &self->block_split_code_,
+      storage_ix, storage);
+}
+
+/* Stores the next symbol with the entropy code of the current block type.
+   Updates the block type and block length at block boundaries. */
+static void StoreSymbol(BlockEncoder* self, size_t symbol, size_t* storage_ix,
+    uint8_t* storage) {
+  if (self->block_len_ == 0) {
+    size_t block_ix = ++self->block_ix_;
+    uint32_t block_len = self->block_lengths_[block_ix];
+    uint8_t block_type = self->block_types_[block_ix];
+    self->block_len_ = block_len;
+    self->entropy_ix_ = block_type * self->histogram_length_;
+    StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
+        storage_ix, storage);
+  }
+  --self->block_len_;
+  {
+    size_t ix = self->entropy_ix_ + symbol;
+    BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
+  }
+}
+
+/* Stores the next symbol with the entropy code of the current block type and
+   context value.
+   Updates the block type and block length at block boundaries. */
+static void StoreSymbolWithContext(BlockEncoder* self, size_t symbol,
+    size_t context, const uint32_t* context_map, size_t* storage_ix,
+    uint8_t* storage, const size_t context_bits) {
+  if (self->block_len_ == 0) {
+    size_t block_ix = ++self->block_ix_;
+    uint32_t block_len = self->block_lengths_[block_ix];
+    uint8_t block_type = self->block_types_[block_ix];
+    self->block_len_ = block_len;
+    self->entropy_ix_ = (size_t)block_type << context_bits;
+    StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
+        storage_ix, storage);
+  }
+  --self->block_len_;
+  {
+    size_t histo_ix = context_map[self->entropy_ix_ + context];
+    size_t ix = histo_ix * self->histogram_length_ + symbol;
+    BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
+  }
+}
+
+#define FN(X) X ## Literal
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_encoder_inc.h"
+#undef FN
+
+#define FN(X) X ## Command
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_encoder_inc.h"
+#undef FN
+
+#define FN(X) X ## Distance
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_encoder_inc.h"
+#undef FN
+
+static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  storage[*storage_ix >> 3] = 0;
+}
+
+void BrotliStoreMetaBlock(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+    const BrotliEncoderParams* params, ContextType literal_context_mode,
+    const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+    size_t* storage_ix, uint8_t* storage) {
+
+  size_t pos = start_pos;
+  size_t i;
+  uint32_t num_distance_symbols = params->dist.alphabet_size;
+  uint32_t num_effective_distance_symbols = num_distance_symbols;
+  HuffmanTree* tree;
+  ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
+  BlockEncoder literal_enc;
+  BlockEncoder command_enc;
+  BlockEncoder distance_enc;
+  const BrotliDistanceParams* dist = &params->dist;
+  if (params->large_window &&
+      num_effective_distance_symbols > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
+    num_effective_distance_symbols = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
+  }
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockEncoder(&literal_enc, BROTLI_NUM_LITERAL_SYMBOLS,
+      mb->literal_split.num_types, mb->literal_split.types,
+      mb->literal_split.lengths, mb->literal_split.num_blocks);
+  InitBlockEncoder(&command_enc, BROTLI_NUM_COMMAND_SYMBOLS,
+      mb->command_split.num_types, mb->command_split.types,
+      mb->command_split.lengths, mb->command_split.num_blocks);
+  InitBlockEncoder(&distance_enc, num_effective_distance_symbols,
+      mb->distance_split.num_types, mb->distance_split.types,
+      mb->distance_split.lengths, mb->distance_split.num_blocks);
+
+  BuildAndStoreBlockSwitchEntropyCodes(&literal_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(&command_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(
+      &distance_enc, tree, storage_ix, storage);
+
+  BrotliWriteBits(2, dist->distance_postfix_bits, storage_ix, storage);
+  BrotliWriteBits(
+      4, dist->num_direct_distance_codes >> dist->distance_postfix_bits,
+      storage_ix, storage);
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    BrotliWriteBits(2, literal_context_mode, storage_ix, storage);
+  }
+
+  if (mb->literal_context_map_size == 0) {
+    StoreTrivialContextMap(mb->literal_histograms_size,
+        BROTLI_LITERAL_CONTEXT_BITS, tree, storage_ix, storage);
+  } else {
+    EncodeContextMap(m,
+        mb->literal_context_map, mb->literal_context_map_size,
+        mb->literal_histograms_size, tree, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+
+  if (mb->distance_context_map_size == 0) {
+    StoreTrivialContextMap(mb->distance_histograms_size,
+        BROTLI_DISTANCE_CONTEXT_BITS, tree, storage_ix, storage);
+  } else {
+    EncodeContextMap(m,
+        mb->distance_context_map, mb->distance_context_map_size,
+        mb->distance_histograms_size, tree, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+
+  BuildAndStoreEntropyCodesLiteral(m, &literal_enc, mb->literal_histograms,
+      mb->literal_histograms_size, BROTLI_NUM_LITERAL_SYMBOLS, tree,
+      storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreEntropyCodesCommand(m, &command_enc, mb->command_histograms,
+      mb->command_histograms_size, BROTLI_NUM_COMMAND_SYMBOLS, tree,
+      storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreEntropyCodesDistance(m, &distance_enc, mb->distance_histograms,
+      mb->distance_histograms_size, num_distance_symbols, tree,
+      storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, tree);
+
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t cmd_code = cmd.cmd_prefix_;
+    StoreSymbol(&command_enc, cmd_code, storage_ix, storage);
+    StoreCommandExtra(&cmd, storage_ix, storage);
+    if (mb->literal_context_map_size == 0) {
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        StoreSymbol(&literal_enc, input[pos & mask], storage_ix, storage);
+        ++pos;
+      }
+    } else {
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        size_t context =
+            BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
+        uint8_t literal = input[pos & mask];
+        StoreSymbolWithContext(&literal_enc, literal, context,
+            mb->literal_context_map, storage_ix, storage,
+            BROTLI_LITERAL_CONTEXT_BITS);
+        prev_byte2 = prev_byte;
+        prev_byte = literal;
+        ++pos;
+      }
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd)) {
+      prev_byte2 = input[(pos - 2) & mask];
+      prev_byte = input[(pos - 1) & mask];
+      if (cmd.cmd_prefix_ >= 128) {
+        size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+        uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+        uint64_t distextra = cmd.dist_extra_;
+        if (mb->distance_context_map_size == 0) {
+          StoreSymbol(&distance_enc, dist_code, storage_ix, storage);
+        } else {
+          size_t context = CommandDistanceContext(&cmd);
+          StoreSymbolWithContext(&distance_enc, dist_code, context,
+              mb->distance_context_map, storage_ix, storage,
+              BROTLI_DISTANCE_CONTEXT_BITS);
+        }
+        BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
+      }
+    }
+  }
+  CleanupBlockEncoder(m, &distance_enc);
+  CleanupBlockEncoder(m, &command_enc);
+  CleanupBlockEncoder(m, &literal_enc);
+  if (is_last) {
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+static void BuildHistograms(const uint8_t* input,
+                            size_t start_pos,
+                            size_t mask,
+                            const Command* commands,
+                            size_t n_commands,
+                            HistogramLiteral* lit_histo,
+                            HistogramCommand* cmd_histo,
+                            HistogramDistance* dist_histo) {
+  size_t pos = start_pos;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t j;
+    HistogramAddCommand(cmd_histo, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      HistogramAddLiteral(lit_histo, input[pos & mask]);
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
+      HistogramAddDistance(dist_histo, cmd.dist_prefix_ & 0x3FF);
+    }
+  }
+}
+
+static void StoreDataWithHuffmanCodes(const uint8_t* input,
+                                      size_t start_pos,
+                                      size_t mask,
+                                      const Command* commands,
+                                      size_t n_commands,
+                                      const uint8_t* lit_depth,
+                                      const uint16_t* lit_bits,
+                                      const uint8_t* cmd_depth,
+                                      const uint16_t* cmd_bits,
+                                      const uint8_t* dist_depth,
+                                      const uint16_t* dist_bits,
+                                      size_t* storage_ix,
+                                      uint8_t* storage) {
+  size_t pos = start_pos;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    const size_t cmd_code = cmd.cmd_prefix_;
+    size_t j;
+    BrotliWriteBits(
+        cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
+    StoreCommandExtra(&cmd, storage_ix, storage);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      const uint8_t literal = input[pos & mask];
+      BrotliWriteBits(
+          lit_depth[literal], lit_bits[literal], storage_ix, storage);
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
+      const size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+      const uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+      const uint32_t distextra = cmd.dist_extra_;
+      BrotliWriteBits(dist_depth[dist_code], dist_bits[dist_code],
+                      storage_ix, storage);
+      BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
+    }
+  }
+}
+
+void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage) {
+  HistogramLiteral lit_histo;
+  HistogramCommand cmd_histo;
+  HistogramDistance dist_histo;
+  uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+  uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+  HuffmanTree* tree;
+  uint32_t num_distance_symbols = params->dist.alphabet_size;
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  HistogramClearLiteral(&lit_histo);
+  HistogramClearCommand(&cmd_histo);
+  HistogramClearDistance(&dist_histo);
+
+  BuildHistograms(input, start_pos, mask, commands, n_commands,
+                  &lit_histo, &cmd_histo, &dist_histo);
+
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS,
+                           BROTLI_NUM_LITERAL_SYMBOLS, tree,
+                           lit_depth, lit_bits,
+                           storage_ix, storage);
+  BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS,
+                           BROTLI_NUM_COMMAND_SYMBOLS, tree,
+                           cmd_depth, cmd_bits,
+                           storage_ix, storage);
+  BuildAndStoreHuffmanTree(dist_histo.data_, MAX_SIMPLE_DISTANCE_ALPHABET_SIZE,
+                           num_distance_symbols, tree,
+                           dist_depth, dist_bits,
+                           storage_ix, storage);
+  BROTLI_FREE(m, tree);
+  StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                            n_commands, lit_depth, lit_bits,
+                            cmd_depth, cmd_bits,
+                            dist_depth, dist_bits,
+                            storage_ix, storage);
+  if (is_last) {
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+void BrotliStoreMetaBlockFast(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage) {
+  uint32_t num_distance_symbols = params->dist.alphabet_size;
+  uint32_t distance_alphabet_bits =
+      Log2FloorNonZero(num_distance_symbols - 1) + 1;
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  if (n_commands <= 128) {
+    uint32_t histogram[BROTLI_NUM_LITERAL_SYMBOLS] = { 0 };
+    size_t pos = start_pos;
+    size_t num_literals = 0;
+    size_t i;
+    uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
+    uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
+    for (i = 0; i < n_commands; ++i) {
+      const Command cmd = commands[i];
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        ++histogram[input[pos & mask]];
+        ++pos;
+      }
+      num_literals += cmd.insert_len_;
+      pos += CommandCopyLen(&cmd);
+    }
+    BrotliBuildAndStoreHuffmanTreeFast(m, histogram, num_literals,
+                                       /* max_bits = */ 8,
+                                       lit_depth, lit_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    StoreStaticCommandHuffmanTree(storage_ix, storage);
+    StoreStaticDistanceHuffmanTree(storage_ix, storage);
+    StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                              n_commands, lit_depth, lit_bits,
+                              kStaticCommandCodeDepth,
+                              kStaticCommandCodeBits,
+                              kStaticDistanceCodeDepth,
+                              kStaticDistanceCodeBits,
+                              storage_ix, storage);
+  } else {
+    HistogramLiteral lit_histo;
+    HistogramCommand cmd_histo;
+    HistogramDistance dist_histo;
+    uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
+    uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
+    uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+    uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+    uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+    uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+    HistogramClearLiteral(&lit_histo);
+    HistogramClearCommand(&cmd_histo);
+    HistogramClearDistance(&dist_histo);
+    BuildHistograms(input, start_pos, mask, commands, n_commands,
+                    &lit_histo, &cmd_histo, &dist_histo);
+    BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo.data_,
+                                       lit_histo.total_count_,
+                                       /* max_bits = */ 8,
+                                       lit_depth, lit_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    BrotliBuildAndStoreHuffmanTreeFast(m, cmd_histo.data_,
+                                       cmd_histo.total_count_,
+                                       /* max_bits = */ 10,
+                                       cmd_depth, cmd_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    BrotliBuildAndStoreHuffmanTreeFast(m, dist_histo.data_,
+                                       dist_histo.total_count_,
+                                       /* max_bits = */
+                                       distance_alphabet_bits,
+                                       dist_depth, dist_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                              n_commands, lit_depth, lit_bits,
+                              cmd_depth, cmd_bits,
+                              dist_depth, dist_bits,
+                              storage_ix, storage);
+  }
+
+  if (is_last) {
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+/* This is for storing uncompressed blocks (simple raw storage of
+   bytes-as-bytes). */
+void BrotliStoreUncompressedMetaBlock(BROTLI_BOOL is_final_block,
+                                      const uint8_t* BROTLI_RESTRICT input,
+                                      size_t position, size_t mask,
+                                      size_t len,
+                                      size_t* BROTLI_RESTRICT storage_ix,
+                                      uint8_t* BROTLI_RESTRICT storage) {
+  size_t masked_pos = position & mask;
+  BrotliStoreUncompressedMetaBlockHeader(len, storage_ix, storage);
+  JumpToByteBoundary(storage_ix, storage);
+
+  if (masked_pos + len > mask + 1) {
+    size_t len1 = mask + 1 - masked_pos;
+    memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
+    *storage_ix += len1 << 3;
+    len -= len1;
+    masked_pos = 0;
+  }
+  memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
+  *storage_ix += len << 3;
+
+  /* We need to clear the next 4 bytes to continue to be
+     compatible with BrotliWriteBits. */
+  BrotliWriteBitsPrepareStorage(*storage_ix, storage);
+
+  /* Since the uncompressed block itself may not be the final block, add an
+     empty one after this. */
+  if (is_final_block) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    JumpToByteBoundary(storage_ix, storage);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/brotli_bit_stream.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/brotli_bit_stream.h
new file mode 100755
index 0000000000..2ed703bf79
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/brotli_bit_stream.h
@@ -0,0 +1,84 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to convert brotli-related data structures into the
+   brotli bit stream. The functions here operate under
+   assumption that there is enough space in the storage, i.e., there are
+   no out-of-range checks anywhere.
+
+   These functions do bit addressing into a byte array. The byte array
+   is called "storage" and the index to the bit is called storage_ix
+   in function arguments. */
+
+#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
+#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
+
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./command.h"
+#include "./entropy_encode.h"
+#include "./memory.h"
+#include "./metablock.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* All Store functions here will use a storage_ix, which is always the bit
+   position for the current storage. */
+
+BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
+    HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
+
+BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
+    MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
+    const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
+    uint8_t* storage);
+
+/* REQUIRES: length > 0 */
+/* REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+    const BrotliEncoderParams* params, ContextType literal_context_mode,
+    const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+    size_t* storage_ix, uint8_t* storage);
+
+/* Stores the meta-block without doing any block splitting, just collects
+   one histogram per block category and uses that for entropy coding.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage);
+
+/* Same as above, but uses static prefix codes for histograms with a only a few
+   symbols, and uses static code length prefix codes for all other histograms.
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage);
+
+/* This is for storing uncompressed blocks (simple raw storage of
+   bytes-as-bytes).
+   REQUIRES: length > 0
+   REQUIRES: length <= (1 << 24) */
+BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
+    BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input,
+    size_t position, size_t mask, size_t len,
+    size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster.c
new file mode 100755
index 0000000000..a20dfd385f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster.c
@@ -0,0 +1,56 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for clustering similar histograms together. */
+
+#include "./cluster.h"
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./bit_cost.h"  /* BrotliPopulationCost */
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE BROTLI_BOOL HistogramPairIsLess(
+    const HistogramPair* p1, const HistogramPair* p2) {
+  if (p1->cost_diff != p2->cost_diff) {
+    return TO_BROTLI_BOOL(p1->cost_diff > p2->cost_diff);
+  }
+  return TO_BROTLI_BOOL((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1));
+}
+
+/* Returns entropy reduction of the context map when we combine two clusters. */
+static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
+  size_t size_c = size_a + size_b;
+  return (double)size_a * FastLog2(size_a) +
+    (double)size_b * FastLog2(size_b) -
+    (double)size_c * FastLog2(size_c);
+}
+
+#define CODE(X) X
+
+#define FN(X) X ## Literal
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#undef CODE
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster.h
new file mode 100755
index 0000000000..bb26124d24
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster.h
@@ -0,0 +1,48 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for clustering similar histograms together. */
+
+#ifndef BROTLI_ENC_CLUSTER_H_
+#define BROTLI_ENC_CLUSTER_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./histogram.h"
+#include "./memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct HistogramPair {
+  uint32_t idx1;
+  uint32_t idx2;
+  double cost_combo;
+  double cost_diff;
+} HistogramPair;
+
+#define CODE(X) /* Declaration */;
+
+#define FN(X) X ## Literal
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#undef CODE
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_CLUSTER_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster_inc.h
new file mode 100755
index 0000000000..22ecb3cca8
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/cluster_inc.h
@@ -0,0 +1,317 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, CODE */
+
+#define HistogramType FN(Histogram)
+
+/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+   it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
+BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
+    const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
+    uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
+    size_t* num_pairs) CODE({
+  BROTLI_BOOL is_good_pair = BROTLI_FALSE;
+  HistogramPair p;
+  p.idx1 = p.idx2 = 0;
+  p.cost_diff = p.cost_combo = 0;
+  if (idx1 == idx2) {
+    return;
+  }
+  if (idx2 < idx1) {
+    uint32_t t = idx2;
+    idx2 = idx1;
+    idx1 = t;
+  }
+  p.idx1 = idx1;
+  p.idx2 = idx2;
+  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+  p.cost_diff -= out[idx1].bit_cost_;
+  p.cost_diff -= out[idx2].bit_cost_;
+
+  if (out[idx1].total_count_ == 0) {
+    p.cost_combo = out[idx2].bit_cost_;
+    is_good_pair = BROTLI_TRUE;
+  } else if (out[idx2].total_count_ == 0) {
+    p.cost_combo = out[idx1].bit_cost_;
+    is_good_pair = BROTLI_TRUE;
+  } else {
+    double threshold = *num_pairs == 0 ? 1e99 :
+        BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
+    HistogramType combo = out[idx1];
+    double cost_combo;
+    FN(HistogramAddHistogram)(&combo, &out[idx2]);
+    cost_combo = FN(BrotliPopulationCost)(&combo);
+    if (cost_combo < threshold - p.cost_diff) {
+      p.cost_combo = cost_combo;
+      is_good_pair = BROTLI_TRUE;
+    }
+  }
+  if (is_good_pair) {
+    p.cost_diff += p.cost_combo;
+    if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
+      /* Replace the top of the queue if needed. */
+      if (*num_pairs < max_num_pairs) {
+        pairs[*num_pairs] = pairs[0];
+        ++(*num_pairs);
+      }
+      pairs[0] = p;
+    } else if (*num_pairs < max_num_pairs) {
+      pairs[*num_pairs] = p;
+      ++(*num_pairs);
+    }
+  }
+})
+
+BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
+                                                  uint32_t* cluster_size,
+                                                  uint32_t* symbols,
+                                                  uint32_t* clusters,
+                                                  HistogramPair* pairs,
+                                                  size_t num_clusters,
+                                                  size_t symbols_size,
+                                                  size_t max_clusters,
+                                                  size_t max_num_pairs) CODE({
+  double cost_diff_threshold = 0.0;
+  size_t min_cluster_size = 1;
+  size_t num_pairs = 0;
+
+  {
+    /* We maintain a vector of histogram pairs, with the property that the pair
+       with the maximum bit cost reduction is the first. */
+    size_t idx1;
+    for (idx1 = 0; idx1 < num_clusters; ++idx1) {
+      size_t idx2;
+      for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
+        FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
+            clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
+      }
+    }
+  }
+
+  while (num_clusters > min_cluster_size) {
+    uint32_t best_idx1;
+    uint32_t best_idx2;
+    size_t i;
+    if (pairs[0].cost_diff >= cost_diff_threshold) {
+      cost_diff_threshold = 1e99;
+      min_cluster_size = max_clusters;
+      continue;
+    }
+    /* Take the best pair from the top of heap. */
+    best_idx1 = pairs[0].idx1;
+    best_idx2 = pairs[0].idx2;
+    FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
+    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+    cluster_size[best_idx1] += cluster_size[best_idx2];
+    for (i = 0; i < symbols_size; ++i) {
+      if (symbols[i] == best_idx2) {
+        symbols[i] = best_idx1;
+      }
+    }
+    for (i = 0; i < num_clusters; ++i) {
+      if (clusters[i] == best_idx2) {
+        memmove(&clusters[i], &clusters[i + 1],
+                (num_clusters - i - 1) * sizeof(clusters[0]));
+        break;
+      }
+    }
+    --num_clusters;
+    {
+      /* Remove pairs intersecting the just combined best pair. */
+      size_t copy_to_idx = 0;
+      for (i = 0; i < num_pairs; ++i) {
+        HistogramPair* p = &pairs[i];
+        if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
+            p->idx1 == best_idx2 || p->idx2 == best_idx2) {
+          /* Remove invalid pair from the queue. */
+          continue;
+        }
+        if (HistogramPairIsLess(&pairs[0], p)) {
+          /* Replace the top of the queue if needed. */
+          HistogramPair front = pairs[0];
+          pairs[0] = *p;
+          pairs[copy_to_idx] = front;
+        } else {
+          pairs[copy_to_idx] = *p;
+        }
+        ++copy_to_idx;
+      }
+      num_pairs = copy_to_idx;
+    }
+
+    /* Push new pairs formed with the combined histogram to the heap. */
+    for (i = 0; i < num_clusters; ++i) {
+      FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
+                                      max_num_pairs, &pairs[0], &num_pairs);
+    }
+  }
+  return num_clusters;
+})
+
+/* What is the bit cost of moving histogram from cur_symbol to candidate. */
+BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
+    const HistogramType* histogram, const HistogramType* candidate) CODE({
+  if (histogram->total_count_ == 0) {
+    return 0.0;
+  } else {
+    HistogramType tmp = *histogram;
+    FN(HistogramAddHistogram)(&tmp, candidate);
+    return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
+  }
+})
+
+/* Find the best 'out' histogram for each of the 'in' histograms.
+   When called, clusters[0..num_clusters) contains the unique values from
+   symbols[0..in_size), but this property is not preserved in this function.
+   Note: we assume that out[]->bit_cost_ is already up-to-date. */
+BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
+    size_t in_size, const uint32_t* clusters, size_t num_clusters,
+    HistogramType* out, uint32_t* symbols) CODE({
+  size_t i;
+  for (i = 0; i < in_size; ++i) {
+    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
+    double best_bits =
+        FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
+    size_t j;
+    for (j = 0; j < num_clusters; ++j) {
+      const double cur_bits =
+          FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = clusters[j];
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  /* Recompute each out based on raw and symbols. */
+  for (i = 0; i < num_clusters; ++i) {
+    FN(HistogramClear)(&out[clusters[i]]);
+  }
+  for (i = 0; i < in_size; ++i) {
+    FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
+  }
+})
+
+/* Reorders elements of the out[0..length) array and changes values in
+   symbols[0..length) array in the following way:
+     * when called, symbols[] contains indexes into out[], and has N unique
+       values (possibly N < length)
+     * on return, symbols'[i] = f(symbols[i]) and
+                  out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
+       where f is a bijection between the range of symbols[] and [0..N), and
+       the first occurrences of values in symbols'[i] come in consecutive
+       increasing order.
+   Returns N, the number of unique values in symbols[]. */
+BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
+    HistogramType* out, uint32_t* symbols, size_t length) CODE({
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
+  uint32_t next_index;
+  HistogramType* tmp;
+  size_t i;
+  if (BROTLI_IS_OOM(m)) return 0;
+  for (i = 0; i < length; ++i) {
+      new_index[i] = kInvalidIndex;
+  }
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == kInvalidIndex) {
+      new_index[symbols[i]] = next_index;
+      ++next_index;
+    }
+  }
+  /* TODO: by using idea of "cycle-sort" we can avoid allocation of
+     tmp and reduce the number of copying by the factor of 2. */
+  tmp = BROTLI_ALLOC(m, HistogramType, next_index);
+  if (BROTLI_IS_OOM(m)) return 0;
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == next_index) {
+      tmp[next_index] = out[symbols[i]];
+      ++next_index;
+    }
+    symbols[i] = new_index[symbols[i]];
+  }
+  BROTLI_FREE(m, new_index);
+  for (i = 0; i < next_index; ++i) {
+    out[i] = tmp[i];
+  }
+  BROTLI_FREE(m, tmp);
+  return next_index;
+})
+
+BROTLI_INTERNAL void FN(BrotliClusterHistograms)(
+    MemoryManager* m, const HistogramType* in, const size_t in_size,
+    size_t max_histograms, HistogramType* out, size_t* out_size,
+    uint32_t* histogram_symbols) CODE({
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
+  uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
+  size_t num_clusters = 0;
+  const size_t max_input_histograms = 64;
+  size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
+  /* For the first pass of clustering, we allow all pairs. */
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
+  size_t i;
+
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < in_size; ++i) {
+    cluster_size[i] = 1;
+  }
+
+  for (i = 0; i < in_size; ++i) {
+    out[i] = in[i];
+    out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
+    histogram_symbols[i] = (uint32_t)i;
+  }
+
+  for (i = 0; i < in_size; i += max_input_histograms) {
+    size_t num_to_combine =
+        BROTLI_MIN(size_t, in_size - i, max_input_histograms);
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      clusters[num_clusters + j] = (uint32_t)(i + j);
+    }
+    num_new_clusters =
+        FN(BrotliHistogramCombine)(out, cluster_size,
+                                   &histogram_symbols[i],
+                                   &clusters[num_clusters], pairs,
+                                   num_to_combine, num_to_combine,
+                                   max_histograms, pairs_capacity);
+    num_clusters += num_new_clusters;
+  }
+
+  {
+    /* For the second pass, we limit the total number of histogram pairs.
+       After this limit is reached, we only keep searching for the best pair. */
+    size_t max_num_pairs = BROTLI_MIN(size_t,
+        64 * num_clusters, (num_clusters / 2) * num_clusters);
+    BROTLI_ENSURE_CAPACITY(
+        m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m)) return;
+
+    /* Collapse similar histograms. */
+    num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
+                                              histogram_symbols, clusters,
+                                              pairs, num_clusters, in_size,
+                                              max_histograms, max_num_pairs);
+  }
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+  /* Find the optimal map from original histograms to the final ones. */
+  FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
+                           out, histogram_symbols);
+  BROTLI_FREE(m, clusters);
+  /* Convert the context map to a canonical form. */
+  *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
+  if (BROTLI_IS_OOM(m)) return;
+})
+
+#undef HistogramType
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/command.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/command.h
new file mode 100755
index 0000000000..1aac85689b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/command.h
@@ -0,0 +1,190 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* This class models a sequence of literals and a backward reference copy. */
+
+#ifndef BROTLI_ENC_COMMAND_H_
+#define BROTLI_ENC_COMMAND_H_
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+#include "./params.h"
+#include "./prefix.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static uint32_t kInsBase[] =   { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
+    66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
+static uint32_t kInsExtra[] =  { 0, 0, 0, 0, 0, 0, 1, 1,  2,  2,  3,  3,  4,  4,
+    5,   5,   6,   7,   8,   9,   10,   12,   14,    24 };
+static uint32_t kCopyBase[] =  { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
+    38, 54,  70, 102, 134, 198, 326,   582, 1094,  2118 };
+static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0,  1,  1,  2,  2,  3,  3,
+     4,  4,   5,   5,   6,   7,   8,     9,   10,    24 };
+
+static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) {
+  if (insertlen < 6) {
+    return (uint16_t)insertlen;
+  } else if (insertlen < 130) {
+    uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u;
+    return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2);
+  } else if (insertlen < 2114) {
+    return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10);
+  } else if (insertlen < 6210) {
+    return 21u;
+  } else if (insertlen < 22594) {
+    return 22u;
+  } else {
+    return 23u;
+  }
+}
+
+static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) {
+  if (copylen < 10) {
+    return (uint16_t)(copylen - 2);
+  } else if (copylen < 134) {
+    uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u;
+    return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4);
+  } else if (copylen < 2118) {
+    return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12);
+  } else {
+    return 23u;
+  }
+}
+
+static BROTLI_INLINE uint16_t CombineLengthCodes(
+    uint16_t inscode, uint16_t copycode, BROTLI_BOOL use_last_distance) {
+  uint16_t bits64 =
+      (uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3u));
+  if (use_last_distance && inscode < 8u && copycode < 16u) {
+    return (copycode < 8u) ? bits64 : (bits64 | 64u);
+  } else {
+    /* Specification: 5 Encoding of ... (last table) */
+    /* offset = 2 * index, where index is in range [0..8] */
+    uint32_t offset = 2u * ((copycode >> 3u) + 3u * (inscode >> 3u));
+    /* All values in specification are K * 64,
+       where   K = [2, 3, 6, 4, 5, 8, 7, 9, 10],
+           i + 1 = [1, 2, 3, 4, 5, 6, 7, 8,  9],
+       K - i - 1 = [1, 1, 3, 0, 0, 2, 0, 1,  2] = D.
+       All values in D require only 2 bits to encode.
+       Magic constant is shifted 6 bits left, to avoid final multiplication. */
+    offset = (offset << 5u) + 0x40u + ((0x520D40u >> offset) & 0xC0u);
+    return (uint16_t)(offset | bits64);
+  }
+}
+
+static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen,
+                                        BROTLI_BOOL use_last_distance,
+                                        uint16_t* code) {
+  uint16_t inscode = GetInsertLengthCode(insertlen);
+  uint16_t copycode = GetCopyLengthCode(copylen);
+  *code = CombineLengthCodes(inscode, copycode, use_last_distance);
+}
+
+static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) {
+  return kInsBase[inscode];
+}
+
+static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) {
+  return kInsExtra[inscode];
+}
+
+static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) {
+  return kCopyBase[copycode];
+}
+
+static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
+  return kCopyExtra[copycode];
+}
+
+typedef struct Command {
+  uint32_t insert_len_;
+  /* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */
+  uint32_t copy_len_;
+  /* Stores distance extra bits. */
+  uint32_t dist_extra_;
+  uint16_t cmd_prefix_;
+  /* Stores distance code in low 10 bits
+     and number of extra bits in high 6 bits. */
+  uint16_t dist_prefix_;
+} Command;
+
+/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
+static BROTLI_INLINE void InitCommand(Command* self,
+    const BrotliDistanceParams* dist, size_t insertlen,
+    size_t copylen, int copylen_code_delta, size_t distance_code) {
+  /* Don't rely on signed int representation, use honest casts. */
+  uint32_t delta = (uint8_t)((int8_t)copylen_code_delta);
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = (uint32_t)(copylen | (delta << 25));
+  /* The distance prefix and extra bits are stored in this Command as if
+     npostfix and ndirect were 0, they are only recomputed later after the
+     clustering if needed. */
+  PrefixEncodeCopyDistance(
+      distance_code, dist->num_direct_distance_codes,
+      dist->distance_postfix_bits, &self->dist_prefix_, &self->dist_extra_);
+  GetLengthCode(
+      insertlen, (size_t)((int)copylen + copylen_code_delta),
+      TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_);
+}
+
+static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = 4 << 25;
+  self->dist_extra_ = 0;
+  self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES;
+  GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_);
+}
+
+static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(
+    const Command* self, const BrotliDistanceParams* dist) {
+  if ((self->dist_prefix_ & 0x3FFu) <
+      BROTLI_NUM_DISTANCE_SHORT_CODES + dist->num_direct_distance_codes) {
+    return self->dist_prefix_ & 0x3FFu;
+  } else {
+    uint32_t dcode = self->dist_prefix_ & 0x3FFu;
+    uint32_t nbits = self->dist_prefix_ >> 10;
+    uint32_t extra = self->dist_extra_;
+    uint32_t postfix_mask = (1U << dist->distance_postfix_bits) - 1U;
+    uint32_t hcode = (dcode - dist->num_direct_distance_codes -
+        BROTLI_NUM_DISTANCE_SHORT_CODES) >>
+        dist->distance_postfix_bits;
+    uint32_t lcode = (dcode - dist->num_direct_distance_codes -
+        BROTLI_NUM_DISTANCE_SHORT_CODES) & postfix_mask;
+    uint32_t offset = ((2U + (hcode & 1U)) << nbits) - 4U;
+    return ((offset + extra) << dist->distance_postfix_bits) + lcode +
+        dist->num_direct_distance_codes + BROTLI_NUM_DISTANCE_SHORT_CODES;
+  }
+}
+
+static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
+  uint32_t r = self->cmd_prefix_ >> 6;
+  uint32_t c = self->cmd_prefix_ & 7;
+  if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
+    return c;
+  }
+  return 3;
+}
+
+static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
+  return self->copy_len_ & 0x1FFFFFF;
+}
+
+static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
+  uint32_t modifier = self->copy_len_ >> 25;
+  int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1)));
+  return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_COMMAND_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment.c
new file mode 100755
index 0000000000..9e50b2098a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment.c
@@ -0,0 +1,790 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses one-pass processing: when we find a backward
+   match, we immediately emit the corresponding command and literal codes to
+   the bit stream.
+
+   Adapted from the CompressFragment() function in
+   https://github.com/google/snappy/blob/master/snappy.cc */
+
+#include "./compress_fragment.h"
+
+#include <string.h>  /* memcmp, memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./brotli_bit_stream.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./memory.h"
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
+  const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
+  return (uint32_t)(h >> shift);
+}
+
+static BROTLI_INLINE uint32_t HashBytesAtOffset(
+    uint64_t v, int offset, size_t shift) {
+  BROTLI_DCHECK(offset >= 0);
+  BROTLI_DCHECK(offset <= 3);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
+  return TO_BROTLI_BOOL(
+      BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
+      p1[4] == p2[4]);
+}
+
+/* Builds a literal prefix code into "depths" and "bits" based on the statistics
+   of the "input" string and stores it into the bit stream.
+   Note that the prefix code here is built from the pre-LZ77 input, therefore
+   we can only approximate the statistics of the actual literal stream.
+   Moreover, for long inputs we build a histogram from a sample of the input
+   and thus have to assign a non-zero depth for each literal.
+   Returns estimated compression ratio millibytes/char for encoding given input
+   with generated code. */
+static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
+                                             const uint8_t* input,
+                                             const size_t input_size,
+                                             uint8_t depths[256],
+                                             uint16_t bits[256],
+                                             size_t* storage_ix,
+                                             uint8_t* storage) {
+  uint32_t histogram[256] = { 0 };
+  size_t histogram_total;
+  size_t i;
+  if (input_size < (1 << 15)) {
+    for (i = 0; i < input_size; ++i) {
+      ++histogram[input[i]];
+    }
+    histogram_total = input_size;
+    for (i = 0; i < 256; ++i) {
+      /* We weigh the first 11 samples with weight 3 to account for the
+         balancing effect of the LZ77 phase on the histogram. */
+      const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
+      histogram[i] += adjust;
+      histogram_total += adjust;
+    }
+  } else {
+    static const size_t kSampleRate = 29;
+    for (i = 0; i < input_size; i += kSampleRate) {
+      ++histogram[input[i]];
+    }
+    histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
+    for (i = 0; i < 256; ++i) {
+      /* We add 1 to each population count to avoid 0 bit depths (since this is
+         only a sample and we don't know if the symbol appears or not), and we
+         weigh the first 11 samples with weight 3 to account for the balancing
+         effect of the LZ77 phase on the histogram (more frequent symbols are
+         more likely to be in backward references instead as literals). */
+      const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
+      histogram[i] += adjust;
+      histogram_total += adjust;
+    }
+  }
+  BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
+                                     /* max_bits = */ 8,
+                                     depths, bits, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return 0;
+  {
+    size_t literal_ratio = 0;
+    for (i = 0; i < 256; ++i) {
+      if (histogram[i]) literal_ratio += histogram[i] * depths[i];
+    }
+    /* Estimated encoding ratio, millibytes per symbol. */
+    return (literal_ratio * 125) / histogram_total;
+  }
+}
+
+/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
+   "bits" based on "histogram" and stores it into the bit stream. */
+static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
+    uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
+    uint8_t* storage) {
+  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
+  HuffmanTree tree[129];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
+  uint16_t cmd_bits[64];
+
+  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  /* We have to jump through a few hoops here in order to compute
+     the command bits because the symbols are in a different order than in
+     the full alphabet. This looks complicated, but having the symbols
+     in this order in the command bits saves a few branches in the Emit*
+     functions. */
+  memcpy(cmd_depth, depth, 24);
+  memcpy(cmd_depth + 24, depth + 40, 8);
+  memcpy(cmd_depth + 32, depth + 24, 8);
+  memcpy(cmd_depth + 40, depth + 48, 8);
+  memcpy(cmd_depth + 48, depth + 32, 8);
+  memcpy(cmd_depth + 56, depth + 56, 8);
+  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  memcpy(bits, cmd_bits, 48);
+  memcpy(bits + 24, cmd_bits + 32, 16);
+  memcpy(bits + 32, cmd_bits + 48, 16);
+  memcpy(bits + 40, cmd_bits + 24, 16);
+  memcpy(bits + 48, cmd_bits + 40, 16);
+  memcpy(bits + 56, cmd_bits + 56, 16);
+  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  {
+    /* Create the bit length array for the full command alphabet. */
+    size_t i;
+    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
+    memcpy(cmd_depth, depth, 8);
+    memcpy(cmd_depth + 64, depth + 8, 8);
+    memcpy(cmd_depth + 128, depth + 16, 8);
+    memcpy(cmd_depth + 192, depth + 24, 8);
+    memcpy(cmd_depth + 384, depth + 32, 8);
+    for (i = 0; i < 8; ++i) {
+      cmd_depth[128 + 8 * i] = depth[40 + i];
+      cmd_depth[256 + 8 * i] = depth[48 + i];
+      cmd_depth[448 + 8 * i] = depth[56 + i];
+    }
+    BrotliStoreHuffmanTree(
+        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
+  }
+  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+}
+
+/* REQUIRES: insertlen < 6210 */
+static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
+                                        const uint8_t depth[128],
+                                        const uint16_t bits[128],
+                                        uint32_t histo[128],
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
+  if (insertlen < 6) {
+    const size_t code = insertlen + 40;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    ++histo[code];
+  } else if (insertlen < 130) {
+    const size_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
+    const size_t inscode = (nbits << 1) + prefix + 42;
+    BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
+    ++histo[inscode];
+  } else if (insertlen < 2114) {
+    const size_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 50;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
+    ++histo[code];
+  } else {
+    BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
+    BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
+    ++histo[61];
+  }
+}
+
+static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
+                                            const uint8_t depth[128],
+                                            const uint16_t bits[128],
+                                            uint32_t histo[128],
+                                            size_t* storage_ix,
+                                            uint8_t* storage) {
+  if (insertlen < 22594) {
+    BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
+    BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
+    ++histo[62];
+  } else {
+    BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
+    BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
+    ++histo[63];
+  }
+}
+
+static BROTLI_INLINE void EmitCopyLen(size_t copylen,
+                                      const uint8_t depth[128],
+                                      const uint16_t bits[128],
+                                      uint32_t histo[128],
+                                      size_t* storage_ix,
+                                      uint8_t* storage) {
+  if (copylen < 10) {
+    BrotliWriteBits(
+        depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
+    ++histo[copylen + 14];
+  } else if (copylen < 134) {
+    const size_t tail = copylen - 6;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 20;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
+    ++histo[code];
+  } else if (copylen < 2118) {
+    const size_t tail = copylen - 70;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 28;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
+    ++histo[code];
+  } else {
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
+    ++histo[39];
+  }
+}
+
+static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
+                                                  const uint8_t depth[128],
+                                                  const uint16_t bits[128],
+                                                  uint32_t histo[128],
+                                                  size_t* storage_ix,
+                                                  uint8_t* storage) {
+  if (copylen < 12) {
+    BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
+    ++histo[copylen - 4];
+  } else if (copylen < 72) {
+    const size_t tail = copylen - 8;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 4;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
+    ++histo[code];
+  } else if (copylen < 136) {
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 30;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(5, tail & 31, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[code];
+    ++histo[64];
+  } else if (copylen < 2120) {
+    const size_t tail = copylen - 72;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 28;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[code];
+    ++histo[64];
+  } else {
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[39];
+    ++histo[64];
+  }
+}
+
+static BROTLI_INLINE void EmitDistance(size_t distance,
+                                       const uint8_t depth[128],
+                                       const uint16_t bits[128],
+                                       uint32_t histo[128],
+                                       size_t* storage_ix, uint8_t* storage) {
+  const size_t d = distance + 3;
+  const uint32_t nbits = Log2FloorNonZero(d) - 1u;
+  const size_t prefix = (d >> nbits) & 1;
+  const size_t offset = (2 + prefix) << nbits;
+  const size_t distcode = 2 * (nbits - 1) + prefix + 80;
+  BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
+  BrotliWriteBits(nbits, d - offset, storage_ix, storage);
+  ++histo[distcode];
+}
+
+static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
+                                       const uint8_t depth[256],
+                                       const uint16_t bits[256],
+                                       size_t* storage_ix, uint8_t* storage) {
+  size_t j;
+  for (j = 0; j < len; j++) {
+    const uint8_t lit = input[j];
+    BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
+  }
+}
+
+/* REQUIRES: len <= 1 << 24. */
+static void BrotliStoreMetaBlockHeader(
+    size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
+    uint8_t* storage) {
+  size_t nibbles = 6;
+  /* ISLAST */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  if (len <= (1U << 16)) {
+    nibbles = 4;
+  } else if (len <= (1U << 20)) {
+    nibbles = 5;
+  }
+  BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
+  BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
+  /* ISUNCOMPRESSED */
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
+}
+
+static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
+    uint8_t* array) {
+  while (n_bits > 0) {
+    size_t byte_pos = pos >> 3;
+    size_t n_unchanged_bits = pos & 7;
+    size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
+    size_t total_bits = n_unchanged_bits + n_changed_bits;
+    uint32_t mask =
+        (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
+    uint32_t unchanged_bits = array[byte_pos] & mask;
+    uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
+    array[byte_pos] =
+        (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
+    n_bits -= n_changed_bits;
+    bits >>= n_changed_bits;
+    pos += n_changed_bits;
+  }
+}
+
+static void RewindBitPosition(const size_t new_storage_ix,
+                              size_t* storage_ix, uint8_t* storage) {
+  const size_t bitpos = new_storage_ix & 7;
+  const size_t mask = (1u << bitpos) - 1;
+  storage[new_storage_ix >> 3] &= (uint8_t)mask;
+  *storage_ix = new_storage_ix;
+}
+
+static BROTLI_BOOL ShouldMergeBlock(
+    const uint8_t* data, size_t len, const uint8_t* depths) {
+  size_t histo[256] = { 0 };
+  static const size_t kSampleRate = 43;
+  size_t i;
+  for (i = 0; i < len; i += kSampleRate) {
+    ++histo[data[i]];
+  }
+  {
+    const size_t total = (len + kSampleRate - 1) / kSampleRate;
+    double r = (FastLog2(total) + 0.5) * (double)total + 200;
+    for (i = 0; i < 256; ++i) {
+      r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
+    }
+    return TO_BROTLI_BOOL(r >= 0.0);
+  }
+}
+
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 980
+
+static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
+    const uint8_t* metablock_start, const uint8_t* next_emit,
+    const size_t insertlen, const size_t literal_ratio) {
+  const size_t compressed = (size_t)(next_emit - metablock_start);
+  if (compressed * 50 > insertlen) {
+    return BROTLI_FALSE;
+  } else {
+    return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
+  }
+}
+
+static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
+                                      const size_t storage_ix_start,
+                                      size_t* storage_ix, uint8_t* storage) {
+  const size_t len = (size_t)(end - begin);
+  RewindBitPosition(storage_ix_start, storage_ix, storage);
+  BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  memcpy(&storage[*storage_ix >> 3], begin, len);
+  *storage_ix += len << 3;
+  storage[*storage_ix >> 3] = 0;
+}
+
+static uint32_t kCmdHistoSeed[128] = {
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 0, 0, 0, 0,
+};
+
+static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
+    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
+    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
+    size_t* storage_ix, uint8_t* storage) {
+  uint32_t cmd_histo[128];
+  const uint8_t* ip_end;
+
+  /* "next_emit" is a pointer to the first byte that is not covered by a
+     previous copy. Bytes between "next_emit" and the start of the next copy or
+     the end of the input will be emitted as literal bytes. */
+  const uint8_t* next_emit = input;
+  /* Save the start of the first block for position and distance computations.
+  */
+  const uint8_t* base_ip = input;
+
+  static const size_t kFirstBlockSize = 3 << 15;
+  static const size_t kMergeBlockSize = 1 << 16;
+
+  const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
+  const size_t kMinMatchLen = 5;
+
+  const uint8_t* metablock_start = input;
+  size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
+  size_t total_block_size = block_size;
+  /* Save the bit position of the MLEN field of the meta-block header, so that
+     we can update it later if we decide to extend this meta-block. */
+  size_t mlen_storage_ix = *storage_ix + 3;
+
+  uint8_t lit_depth[256];
+  uint16_t lit_bits[256];
+
+  size_t literal_ratio;
+
+  const uint8_t* ip;
+  int last_distance;
+
+  const size_t shift = 64u - table_bits;
+
+  BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+  /* No block splits, no contexts. */
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  literal_ratio = BuildAndStoreLiteralPrefixCode(
+      m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+
+  {
+    /* Store the pre-compressed command and distance prefix codes. */
+    size_t i;
+    for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
+      BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
+    }
+  }
+  BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
+                  storage_ix, storage);
+
+ emit_commands:
+  /* Initialize the command and distance histograms. We will gather
+     statistics of command and distance codes during the processing
+     of this block and use it to update the command and distance
+     prefix codes for the next block. */
+  memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
+
+  /* "ip" is the input pointer. */
+  ip = input;
+  last_distance = -1;
+  ip_end = input + block_size;
+
+  if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
+    /* For the last block, we need to keep a 16 bytes margin so that we can be
+       sure that all distances are at most window size - 16.
+       For all other blocks, we only need to keep a margin of 5 bytes so that
+       we don't go over the block size with a copy. */
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
+                                        input_size - kInputMarginBytes);
+    const uint8_t* ip_limit = input + len_limit;
+
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift); ; ) {
+      /* Step 1: Scan forward in the input looking for a 5-byte-long match.
+         If we get close to exhausting the input then goto emit_remainder.
+
+         Heuristic match skipping: If 32 bytes are scanned with no matches
+         found, start looking only at every other byte. If 32 more bytes are
+         scanned, look at every third byte, etc.. When a match is found,
+         immediately go back to looking at every byte. This is a small loss
+         (~5% performance, ~0.1% density) for compressible data due to more
+         bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+         win since the compressor quickly "realizes" the data is incompressible
+         and doesn't bother looking for matches everywhere.
+
+         The "skip" variable keeps track of how many bytes there are since the
+         last match; dividing it by 32 (i.e. right-shifting by five) gives the
+         number of bytes to move ahead for each iteration. */
+      uint32_t skip = 32;
+
+      const uint8_t* next_ip = ip;
+      const uint8_t* candidate;
+      BROTLI_DCHECK(next_emit < ip);
+trawl:
+      do {
+        uint32_t hash = next_hash;
+        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        BROTLI_DCHECK(hash == Hash(next_ip, shift));
+        ip = next_ip;
+        next_ip = ip + bytes_between_hash_lookups;
+        if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
+          goto emit_remainder;
+        }
+        next_hash = Hash(next_ip, shift);
+        candidate = ip - last_distance;
+        if (IsMatch(ip, candidate)) {
+          if (BROTLI_PREDICT_TRUE(candidate < ip)) {
+            table[hash] = (int)(ip - base_ip);
+            break;
+          }
+        }
+        candidate = base_ip + table[hash];
+        BROTLI_DCHECK(candidate >= base_ip);
+        BROTLI_DCHECK(candidate < ip);
+
+        table[hash] = (int)(ip - base_ip);
+      } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
+
+      /* Check copy distance. If candidate is not feasible, continue search.
+         Checking is done outside of hot loop to reduce overhead. */
+      if (ip - candidate > MAX_DISTANCE) goto trawl;
+
+      /* Step 2: Emit the found match together with the literal bytes from
+         "next_emit" to the bit stream, and then see if we can find a next match
+         immediately afterwards. Repeat until we find no match for the input
+         without emitting some literal bytes. */
+
+      {
+        /* We have a 5-byte match at ip, and we need to emit bytes in
+           [next_emit, ip). */
+        const uint8_t* base = ip;
+        size_t matched = 5 + FindMatchLengthWithLimit(
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
+        int distance = (int)(base - candidate);  /* > 0 */
+        size_t insert = (size_t)(base - next_emit);
+        ip += matched;
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        if (BROTLI_PREDICT_TRUE(insert < 6210)) {
+          EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                        storage_ix, storage);
+        } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
+                                             literal_ratio)) {
+          EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
+                                    storage_ix, storage);
+          input_size -= (size_t)(base - input);
+          input = base;
+          next_emit = input;
+          goto next_block;
+        } else {
+          EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                            storage_ix, storage);
+        }
+        EmitLiterals(next_emit, insert, lit_depth, lit_bits,
+                     storage_ix, storage);
+        if (distance == last_distance) {
+          BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
+          ++cmd_histo[64];
+        } else {
+          EmitDistance((size_t)distance, cmd_depth, cmd_bits,
+                       cmd_histo, storage_ix, storage);
+          last_distance = distance;
+        }
+        EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
+                                storage_ix, storage);
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        /* We could immediately start working at ip now, but to improve
+           compression we first update "table" with the hashes of some positions
+           within the last copy. */
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      while (IsMatch(ip, candidate)) {
+        /* We have a 5-byte match at ip, and no need to emit any literal bytes
+           prior to ip. */
+        const uint8_t* base = ip;
+        size_t matched = 5 + FindMatchLengthWithLimit(
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
+        if (ip - candidate > MAX_DISTANCE) break;
+        ip += matched;
+        last_distance = (int)(base - candidate);  /* > 0 */
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
+                    storage_ix, storage);
+        EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
+                     cmd_histo, storage_ix, storage);
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        /* We could immediately start working at ip now, but to improve
+           compression we first update "table" with the hashes of some positions
+           within the last copy. */
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      next_hash = Hash(++ip, shift);
+    }
+  }
+
+ emit_remainder:
+  BROTLI_DCHECK(next_emit <= ip_end);
+  input += block_size;
+  input_size -= block_size;
+  block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
+
+  /* Decide if we want to continue this meta-block instead of emitting the
+     last insert-only command. */
+  if (input_size > 0 &&
+      total_block_size + block_size <= (1 << 20) &&
+      ShouldMergeBlock(input, block_size, lit_depth)) {
+    BROTLI_DCHECK(total_block_size > (1 << 16));
+    /* Update the size of the current meta-block and continue emitting commands.
+       We can do this because the current size and the new size both have 5
+       nibbles. */
+    total_block_size += block_size;
+    UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
+    goto emit_commands;
+  }
+
+  /* Emit the remaining bytes as literals. */
+  if (next_emit < ip_end) {
+    const size_t insert = (size_t)(ip_end - next_emit);
+    if (BROTLI_PREDICT_TRUE(insert < 6210)) {
+      EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                    storage_ix, storage);
+      EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
+    } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
+                                         literal_ratio)) {
+      EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
+                                storage_ix, storage);
+    } else {
+      EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                        storage_ix, storage);
+      EmitLiterals(next_emit, insert, lit_depth, lit_bits,
+                   storage_ix, storage);
+    }
+  }
+  next_emit = ip_end;
+
+next_block:
+  /* If we have more data, write a new meta-block header and prefix codes and
+     then continue emitting commands. */
+  if (input_size > 0) {
+    metablock_start = input;
+    block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
+    total_block_size = block_size;
+    /* Save the bit position of the MLEN field of the meta-block header, so that
+       we can update it later if we decide to extend this meta-block. */
+    mlen_storage_ix = *storage_ix + 3;
+    BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+    /* No block splits, no contexts. */
+    BrotliWriteBits(13, 0, storage_ix, storage);
+    literal_ratio = BuildAndStoreLiteralPrefixCode(
+        m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
+                                   storage_ix, storage);
+    goto emit_commands;
+  }
+
+  if (!is_last) {
+    /* If this is not the last block, update the command and distance prefix
+       codes for the next block and store the compressed forms. */
+    cmd_code[0] = 0;
+    *cmd_code_numbits = 0;
+    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
+                                   cmd_code_numbits, cmd_code);
+  }
+}
+
+#define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
+
+#define BAKE_METHOD_PARAM_(B) \
+static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
+    MemoryManager* m, const uint8_t* input, size_t input_size,               \
+    BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128],                 \
+    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,     \
+    size_t* storage_ix, uint8_t* storage) {                                  \
+  BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B,    \
+      cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
+}
+FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
+#undef BAKE_METHOD_PARAM_
+
+void BrotliCompressFragmentFast(
+    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
+    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
+    size_t* storage_ix, uint8_t* storage) {
+  const size_t initial_storage_ix = *storage_ix;
+  const size_t table_bits = Log2FloorNonZero(table_size);
+
+  if (input_size == 0) {
+    BROTLI_DCHECK(is_last);
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+    return;
+  }
+
+  switch (table_bits) {
+#define CASE_(B)                                                     \
+    case B:                                                          \
+      BrotliCompressFragmentFastImpl ## B(                           \
+          m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
+          cmd_code_numbits, cmd_code, storage_ix, storage);          \
+      break;
+    FOR_TABLE_BITS_(CASE_)
+#undef CASE_
+    default: BROTLI_DCHECK(0); break;
+  }
+
+  /* If output is larger than single uncompressed block, rewrite it. */
+  if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
+    EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
+                              storage_ix, storage);
+  }
+
+  if (is_last) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+  }
+}
+
+#undef FOR_TABLE_BITS_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment.h
new file mode 100755
index 0000000000..80007f5dca
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment.h
@@ -0,0 +1,61 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses one-pass processing: when we find a backward
+   match, we immediately emit the corresponding command and literal codes to
+   the bit stream. */
+
+#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
+#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Compresses "input" string to the "*storage" buffer as one or more complete
+   meta-blocks, and updates the "*storage_ix" bit position.
+
+   If "is_last" is 1, emits an additional empty last meta-block.
+
+   "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
+   (see comment in encode.h) used for the encoding of this input fragment.
+   If "is_last" is 0, they are updated to reflect the statistics
+   of this input fragment, to be used for the encoding of the next fragment.
+
+   "*cmd_code_numbits" is the number of bits of the compressed representation
+   of the command and distance prefix codes, and "cmd_code" is an array of
+   at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
+   command and distance prefix codes. If "is_last" is 0, these are also
+   updated to represent the updated "cmd_depth" and "cmd_bits".
+
+   REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
+   REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
+   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+   REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
+   OUTPUT: maximal copy distance <= |input_size|
+   OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
+BROTLI_INTERNAL void BrotliCompressFragmentFast(MemoryManager* m,
+                                                const uint8_t* input,
+                                                size_t input_size,
+                                                BROTLI_BOOL is_last,
+                                                int* table, size_t table_size,
+                                                uint8_t cmd_depth[128],
+                                                uint16_t cmd_bits[128],
+                                                size_t* cmd_code_numbits,
+                                                uint8_t* cmd_code,
+                                                size_t* storage_ix,
+                                                uint8_t* storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment_two_pass.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment_two_pass.c
new file mode 100755
index 0000000000..f8a5606384
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment_two_pass.c
@@ -0,0 +1,645 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses two-pass processing: in the first pass we save
+   the found backward matches and literal bytes into a buffer, and in the
+   second pass we emit them into the bit stream using prefix codes built based
+   on the actual command and literal byte histograms. */
+
+#include "./compress_fragment_two_pass.h"
+
+#include <string.h>  /* memcmp, memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./bit_cost.h"
+#include "./brotli_bit_stream.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./memory.h"
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p,
+    size_t shift, size_t length) {
+  const uint64_t h =
+      (BROTLI_UNALIGNED_LOAD64LE(p) << ((8 - length) * 8)) * kHashMul32;
+  return (uint32_t)(h >> shift);
+}
+
+static BROTLI_INLINE uint32_t HashBytesAtOffset(uint64_t v, size_t offset,
+    size_t shift, size_t length) {
+  BROTLI_DCHECK(offset <= 8 - length);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << ((8 - length) * 8)) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
+}
+
+static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2,
+    size_t length) {
+  if (BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2)) {
+    if (length == 4) return BROTLI_TRUE;
+    return TO_BROTLI_BOOL(p1[4] == p2[4] && p1[5] == p2[5]);
+  }
+  return BROTLI_FALSE;
+}
+
+/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
+   "bits" based on "histogram" and stores it into the bit stream. */
+static void BuildAndStoreCommandPrefixCode(
+    const uint32_t histogram[128],
+    uint8_t depth[128], uint16_t bits[128],
+    size_t* storage_ix, uint8_t* storage) {
+  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
+  HuffmanTree tree[129];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
+  uint16_t cmd_bits[64];
+  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  /* We have to jump through a few hoops here in order to compute
+     the command bits because the symbols are in a different order than in
+     the full alphabet. This looks complicated, but having the symbols
+     in this order in the command bits saves a few branches in the Emit*
+     functions. */
+  memcpy(cmd_depth, depth + 24, 24);
+  memcpy(cmd_depth + 24, depth, 8);
+  memcpy(cmd_depth + 32, depth + 48, 8);
+  memcpy(cmd_depth + 40, depth + 8, 8);
+  memcpy(cmd_depth + 48, depth + 56, 8);
+  memcpy(cmd_depth + 56, depth + 16, 8);
+  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  memcpy(bits, cmd_bits + 24, 16);
+  memcpy(bits + 8, cmd_bits + 40, 16);
+  memcpy(bits + 16, cmd_bits + 56, 16);
+  memcpy(bits + 24, cmd_bits, 48);
+  memcpy(bits + 48, cmd_bits + 32, 16);
+  memcpy(bits + 56, cmd_bits + 48, 16);
+  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  {
+    /* Create the bit length array for the full command alphabet. */
+    size_t i;
+    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
+    memcpy(cmd_depth, depth + 24, 8);
+    memcpy(cmd_depth + 64, depth + 32, 8);
+    memcpy(cmd_depth + 128, depth + 40, 8);
+    memcpy(cmd_depth + 192, depth + 48, 8);
+    memcpy(cmd_depth + 384, depth + 56, 8);
+    for (i = 0; i < 8; ++i) {
+      cmd_depth[128 + 8 * i] = depth[i];
+      cmd_depth[256 + 8 * i] = depth[8 + i];
+      cmd_depth[448 + 8 * i] = depth[16 + i];
+    }
+    BrotliStoreHuffmanTree(
+        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
+  }
+  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+}
+
+static BROTLI_INLINE void EmitInsertLen(
+    uint32_t insertlen, uint32_t** commands) {
+  if (insertlen < 6) {
+    **commands = insertlen;
+  } else if (insertlen < 130) {
+    const uint32_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const uint32_t prefix = tail >> nbits;
+    const uint32_t inscode = (nbits << 1) + prefix + 2;
+    const uint32_t extra = tail - (prefix << nbits);
+    **commands = inscode | (extra << 8);
+  } else if (insertlen < 2114) {
+    const uint32_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
+    const uint32_t code = nbits + 10;
+    const uint32_t extra = tail - (1u << nbits);
+    **commands = code | (extra << 8);
+  } else if (insertlen < 6210) {
+    const uint32_t extra = insertlen - 2114;
+    **commands = 21 | (extra << 8);
+  } else if (insertlen < 22594) {
+    const uint32_t extra = insertlen - 6210;
+    **commands = 22 | (extra << 8);
+  } else {
+    const uint32_t extra = insertlen - 22594;
+    **commands = 23 | (extra << 8);
+  }
+  ++(*commands);
+}
+
+static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
+  if (copylen < 10) {
+    **commands = (uint32_t)(copylen + 38);
+  } else if (copylen < 134) {
+    const size_t tail = copylen - 6;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 44;
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+  } else if (copylen < 2118) {
+    const size_t tail = copylen - 70;
+    const size_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 52;
+    const size_t extra = tail - ((size_t)1 << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+  } else {
+    const size_t extra = copylen - 2118;
+    **commands = (uint32_t)(63 | (extra << 8));
+  }
+  ++(*commands);
+}
+
+static BROTLI_INLINE void EmitCopyLenLastDistance(
+    size_t copylen, uint32_t** commands) {
+  if (copylen < 12) {
+    **commands = (uint32_t)(copylen + 20);
+    ++(*commands);
+  } else if (copylen < 72) {
+    const size_t tail = copylen - 8;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
+    const size_t code = (nbits << 1) + prefix + 28;
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+    ++(*commands);
+  } else if (copylen < 136) {
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 54;
+    const size_t extra = tail & 31;
+    **commands = (uint32_t)(code | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  } else if (copylen < 2120) {
+    const size_t tail = copylen - 72;
+    const size_t nbits = Log2FloorNonZero(tail);
+    const size_t code = nbits + 52;
+    const size_t extra = tail - ((size_t)1 << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  } else {
+    const size_t extra = copylen - 2120;
+    **commands = (uint32_t)(63 | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  }
+}
+
+static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
+  uint32_t d = distance + 3;
+  uint32_t nbits = Log2FloorNonZero(d) - 1;
+  const uint32_t prefix = (d >> nbits) & 1;
+  const uint32_t offset = (2 + prefix) << nbits;
+  const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
+  uint32_t extra = d - offset;
+  **commands = distcode | (extra << 8);
+  ++(*commands);
+}
+
+/* REQUIRES: len <= 1 << 24. */
+static void BrotliStoreMetaBlockHeader(
+    size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
+    uint8_t* storage) {
+  size_t nibbles = 6;
+  /* ISLAST */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  if (len <= (1U << 16)) {
+    nibbles = 4;
+  } else if (len <= (1U << 20)) {
+    nibbles = 5;
+  }
+  BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
+  BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
+  /* ISUNCOMPRESSED */
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
+}
+
+static BROTLI_INLINE void CreateCommands(const uint8_t* input,
+    size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
+    size_t table_bits, size_t min_match,
+    uint8_t** literals, uint32_t** commands) {
+  /* "ip" is the input pointer. */
+  const uint8_t* ip = input;
+  const size_t shift = 64u - table_bits;
+  const uint8_t* ip_end = input + block_size;
+  /* "next_emit" is a pointer to the first byte that is not covered by a
+     previous copy. Bytes between "next_emit" and the start of the next copy or
+     the end of the input will be emitted as literal bytes. */
+  const uint8_t* next_emit = input;
+
+  int last_distance = -1;
+  const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
+
+  if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
+    /* For the last block, we need to keep a 16 bytes margin so that we can be
+       sure that all distances are at most window size - 16.
+       For all other blocks, we only need to keep a margin of 5 bytes so that
+       we don't go over the block size with a copy. */
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - min_match,
+                                        input_size - kInputMarginBytes);
+    const uint8_t* ip_limit = input + len_limit;
+
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift, min_match); ; ) {
+      /* Step 1: Scan forward in the input looking for a 6-byte-long match.
+         If we get close to exhausting the input then goto emit_remainder.
+
+         Heuristic match skipping: If 32 bytes are scanned with no matches
+         found, start looking only at every other byte. If 32 more bytes are
+         scanned, look at every third byte, etc.. When a match is found,
+         immediately go back to looking at every byte. This is a small loss
+         (~5% performance, ~0.1% density) for compressible data due to more
+         bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+         win since the compressor quickly "realizes" the data is incompressible
+         and doesn't bother looking for matches everywhere.
+
+         The "skip" variable keeps track of how many bytes there are since the
+         last match; dividing it by 32 (ie. right-shifting by five) gives the
+         number of bytes to move ahead for each iteration. */
+      uint32_t skip = 32;
+
+      const uint8_t* next_ip = ip;
+      const uint8_t* candidate;
+
+      BROTLI_DCHECK(next_emit < ip);
+trawl:
+      do {
+        uint32_t hash = next_hash;
+        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        ip = next_ip;
+        BROTLI_DCHECK(hash == Hash(ip, shift, min_match));
+        next_ip = ip + bytes_between_hash_lookups;
+        if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
+          goto emit_remainder;
+        }
+        next_hash = Hash(next_ip, shift, min_match);
+        candidate = ip - last_distance;
+        if (IsMatch(ip, candidate, min_match)) {
+          if (BROTLI_PREDICT_TRUE(candidate < ip)) {
+            table[hash] = (int)(ip - base_ip);
+            break;
+          }
+        }
+        candidate = base_ip + table[hash];
+        BROTLI_DCHECK(candidate >= base_ip);
+        BROTLI_DCHECK(candidate < ip);
+
+        table[hash] = (int)(ip - base_ip);
+      } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate, min_match)));
+
+      /* Check copy distance. If candidate is not feasible, continue search.
+         Checking is done outside of hot loop to reduce overhead. */
+      if (ip - candidate > MAX_DISTANCE) goto trawl;
+
+      /* Step 2: Emit the found match together with the literal bytes from
+         "next_emit", and then see if we can find a next match immediately
+         afterwards. Repeat until we find no match for the input
+         without emitting some literal bytes. */
+
+      {
+        /* We have a 6-byte match at ip, and we need to emit bytes in
+           [next_emit, ip). */
+        const uint8_t* base = ip;
+        size_t matched = min_match + FindMatchLengthWithLimit(
+            candidate + min_match, ip + min_match,
+            (size_t)(ip_end - ip) - min_match);
+        int distance = (int)(base - candidate);  /* > 0 */
+        int insert = (int)(base - next_emit);
+        ip += matched;
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        EmitInsertLen((uint32_t)insert, commands);
+        memcpy(*literals, next_emit, (size_t)insert);
+        *literals += insert;
+        if (distance == last_distance) {
+          **commands = 64;
+          ++(*commands);
+        } else {
+          EmitDistance((uint32_t)distance, commands);
+          last_distance = distance;
+        }
+        EmitCopyLenLastDistance(matched, commands);
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        {
+          /* We could immediately start working at ip now, but to improve
+             compression we first update "table" with the hashes of some
+             positions within the last copy. */
+          uint64_t input_bytes;
+          uint32_t cur_hash;
+          uint32_t prev_hash;
+          if (min_match == 4) {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+            cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          } else {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 4);
+            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
+            cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          }
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      while (ip - candidate <= MAX_DISTANCE &&
+          IsMatch(ip, candidate, min_match)) {
+        /* We have a 6-byte match at ip, and no need to emit any
+           literal bytes prior to ip. */
+        const uint8_t* base = ip;
+        size_t matched = min_match + FindMatchLengthWithLimit(
+            candidate + min_match, ip + min_match,
+            (size_t)(ip_end - ip) - min_match);
+        ip += matched;
+        last_distance = (int)(base - candidate);  /* > 0 */
+        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
+        EmitCopyLen(matched, commands);
+        EmitDistance((uint32_t)last_distance, commands);
+
+        next_emit = ip;
+        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        {
+          /* We could immediately start working at ip now, but to improve
+             compression we first update "table" with the hashes of some
+             positions within the last copy. */
+          uint64_t input_bytes;
+          uint32_t cur_hash;
+          uint32_t prev_hash;
+          if (min_match == 4) {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
+            cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          } else {
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 5);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 4);
+            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 3);
+            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
+            cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
+            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 2);
+            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
+            table[prev_hash] = (int)(ip - base_ip - 1);
+          }
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
+      }
+
+      next_hash = Hash(++ip, shift, min_match);
+    }
+  }
+
+emit_remainder:
+  BROTLI_DCHECK(next_emit <= ip_end);
+  /* Emit the remaining bytes as literals. */
+  if (next_emit < ip_end) {
+    const uint32_t insert = (uint32_t)(ip_end - next_emit);
+    EmitInsertLen(insert, commands);
+    memcpy(*literals, next_emit, insert);
+    *literals += insert;
+  }
+}
+
+static void StoreCommands(MemoryManager* m,
+                          const uint8_t* literals, const size_t num_literals,
+                          const uint32_t* commands, const size_t num_commands,
+                          size_t* storage_ix, uint8_t* storage) {
+  static const uint32_t kNumExtraBits[128] = {
+    0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+    9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
+    17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
+  };
+  static const uint32_t kInsertOffset[24] = {
+    0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
+    1090, 2114, 6210, 22594,
+  };
+
+  uint8_t lit_depths[256];
+  uint16_t lit_bits[256];
+  uint32_t lit_histo[256] = { 0 };
+  uint8_t cmd_depths[128] = { 0 };
+  uint16_t cmd_bits[128] = { 0 };
+  uint32_t cmd_histo[128] = { 0 };
+  size_t i;
+  for (i = 0; i < num_literals; ++i) {
+    ++lit_histo[literals[i]];
+  }
+  BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
+                                     /* max_bits = */ 8,
+                                     lit_depths, lit_bits,
+                                     storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < num_commands; ++i) {
+    const uint32_t code = commands[i] & 0xFF;
+    BROTLI_DCHECK(code < 128);
+    ++cmd_histo[code];
+  }
+  cmd_histo[1] += 1;
+  cmd_histo[2] += 1;
+  cmd_histo[64] += 1;
+  cmd_histo[84] += 1;
+  BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
+                                 storage_ix, storage);
+
+  for (i = 0; i < num_commands; ++i) {
+    const uint32_t cmd = commands[i];
+    const uint32_t code = cmd & 0xFF;
+    const uint32_t extra = cmd >> 8;
+    BROTLI_DCHECK(code < 128);
+    BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
+    BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
+    if (code < 24) {
+      const uint32_t insert = kInsertOffset[code] + extra;
+      uint32_t j;
+      for (j = 0; j < insert; ++j) {
+        const uint8_t lit = *literals;
+        BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
+        ++literals;
+      }
+    }
+  }
+}
+
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 0.98
+#define SAMPLE_RATE 43
+
+static BROTLI_BOOL ShouldCompress(
+    const uint8_t* input, size_t input_size, size_t num_literals) {
+  double corpus_size = (double)input_size;
+  if (num_literals < MIN_RATIO * corpus_size) {
+    return BROTLI_TRUE;
+  } else {
+    uint32_t literal_histo[256] = { 0 };
+    const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
+    size_t i;
+    for (i = 0; i < input_size; i += SAMPLE_RATE) {
+      ++literal_histo[input[i]];
+    }
+    return TO_BROTLI_BOOL(BitsEntropy(literal_histo, 256) < max_total_bit_cost);
+  }
+}
+
+static void RewindBitPosition(const size_t new_storage_ix,
+                              size_t* storage_ix, uint8_t* storage) {
+  const size_t bitpos = new_storage_ix & 7;
+  const size_t mask = (1u << bitpos) - 1;
+  storage[new_storage_ix >> 3] &= (uint8_t)mask;
+  *storage_ix = new_storage_ix;
+}
+
+static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
+                                      size_t* storage_ix, uint8_t* storage) {
+  BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  memcpy(&storage[*storage_ix >> 3], input, input_size);
+  *storage_ix += input_size << 3;
+  storage[*storage_ix >> 3] = 0;
+}
+
+static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
+    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
+    int* table, size_t table_bits, size_t min_match,
+    size_t* storage_ix, uint8_t* storage) {
+  /* Save the start of the first block for position and distance computations.
+  */
+  const uint8_t* base_ip = input;
+  BROTLI_UNUSED(is_last);
+
+  while (input_size > 0) {
+    size_t block_size =
+        BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
+    uint32_t* commands = command_buf;
+    uint8_t* literals = literal_buf;
+    size_t num_literals;
+    CreateCommands(input, block_size, input_size, base_ip, table,
+                   table_bits, min_match, &literals, &commands);
+    num_literals = (size_t)(literals - literal_buf);
+    if (ShouldCompress(input, block_size, num_literals)) {
+      const size_t num_commands = (size_t)(commands - command_buf);
+      BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+      /* No block splits, no contexts. */
+      BrotliWriteBits(13, 0, storage_ix, storage);
+      StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
+                    storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return;
+    } else {
+      /* Since we did not find many backward references and the entropy of
+         the data is close to 8 bits, we can simply emit an uncompressed block.
+         This makes compression speed of uncompressible data about 3x faster. */
+      EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
+    }
+    input += block_size;
+    input_size -= block_size;
+  }
+}
+
+#define FOR_TABLE_BITS_(X) \
+  X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
+
+#define BAKE_METHOD_PARAM_(B)                                                  \
+static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B(            \
+    MemoryManager* m, const uint8_t* input, size_t input_size,                 \
+    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,          \
+    int* table, size_t* storage_ix, uint8_t* storage) {                        \
+  size_t min_match = (B <= 15) ? 4 : 6;                                        \
+  BrotliCompressFragmentTwoPassImpl(m, input, input_size, is_last, command_buf,\
+      literal_buf, table, B, min_match, storage_ix, storage);                  \
+}
+FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
+#undef BAKE_METHOD_PARAM_
+
+void BrotliCompressFragmentTwoPass(
+    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
+    int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
+  const size_t initial_storage_ix = *storage_ix;
+  const size_t table_bits = Log2FloorNonZero(table_size);
+  switch (table_bits) {
+#define CASE_(B)                                      \
+    case B:                                           \
+      BrotliCompressFragmentTwoPassImpl ## B(         \
+          m, input, input_size, is_last, command_buf, \
+          literal_buf, table, storage_ix, storage);   \
+      break;
+    FOR_TABLE_BITS_(CASE_)
+#undef CASE_
+    default: BROTLI_DCHECK(0); break;
+  }
+
+  /* If output is larger than single uncompressed block, rewrite it. */
+  if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
+    RewindBitPosition(initial_storage_ix, storage_ix, storage);
+    EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
+  }
+
+  if (is_last) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+  }
+}
+
+#undef FOR_TABLE_BITS_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment_two_pass.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment_two_pass.h
new file mode 100755
index 0000000000..928677df42
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/compress_fragment_two_pass.h
@@ -0,0 +1,54 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function for fast encoding of an input fragment, independently from the input
+   history. This function uses two-pass processing: in the first pass we save
+   the found backward matches and literal bytes into a buffer, and in the
+   second pass we emit them into the bit stream using prefix codes built based
+   on the actual command and literal byte histograms. */
+
+#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
+#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./memory.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
+
+/* Compresses "input" string to the "*storage" buffer as one or more complete
+   meta-blocks, and updates the "*storage_ix" bit position.
+
+   If "is_last" is 1, emits an additional empty last meta-block.
+
+   REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
+   REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
+   REQUIRES: "command_buf" and "literal_buf" point to at least
+              kCompressFragmentTwoPassBlockSize long arrays.
+   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+   REQUIRES: "table_size" is a power of two
+   OUTPUT: maximal copy distance <= |input_size|
+   OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
+BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(MemoryManager* m,
+                                                   const uint8_t* input,
+                                                   size_t input_size,
+                                                   BROTLI_BOOL is_last,
+                                                   uint32_t* command_buf,
+                                                   uint8_t* literal_buf,
+                                                   int* table,
+                                                   size_t table_size,
+                                                   size_t* storage_ix,
+                                                   uint8_t* storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/dictionary_hash.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/dictionary_hash.c
new file mode 100755
index 0000000000..3677d7ddb4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/dictionary_hash.c
@@ -0,0 +1,1120 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Hash table on the 4-byte prefixes of static dictionary words. */
+
+#include "../common/platform.h"
+#include "./dictionary_hash.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_INTERNAL const uint16_t kStaticDictionaryHash[32768] = {
+32072,0,0,0,0,0,0,0,0,21860,0,0,0,0,0,0,0,40486,0,0,0,0,0,45798,0,0,0,0,0,0,1292
+,0,0,0,0,4964,278,23717,0,19972,0,0,0,0,0,0,0,0,0,0,0,0,2126,16102,0,0,0,14437,0
+,0,0,0,0,0,0,26727,2253,0,0,17252,0,0,0,0,0,0,0,0,0,3622,0,0,0,0,22984,0,0,0,0,0
+,0,16647,0,34247,0,0,0,0,0,48486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2511,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19532,0,0,24004,0,0,0,9828,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30853,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,31974,0,0,0,0,0,0,0,0,20650,2404,0,20773,1677,9031,0,6404,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51879,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6761,7206
+,0,0,21992,22983,0,0,3529,0,1864,0,0,0,0,0,0,11046,0,0,9641,0,0,0,6507,0,0,36934
+,21576,62375,0,0,0,0,0,0,0,0,0,8294,0,0,0,0,0,0,0,40807,0,0,0,39398,8136,0,0,0,0
+,0,0,0,8875,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7941,0,0,25609,0,0,0,936,
+3716,3213,15687,0,0,0,0,0,52519,0,17381,0,0,0,0,1320,5797,0,21029,0,0,6472,807,0
+,0,0,0,0,0,0,0,0,0,13545,0,0,0,3624,0,0,0,29674,30820,0,31237,0,6596,0,0,0,0,0,0
+,0,0,0,64070,0,0,0,0,0,0,0,0,0,0,0,22278,0,37446,0,0,0,0,7240,423,0,24612,21705,
+17636,0,0,0,0,0,0,1833,0,0,0,328,6021,0,0,0,19974,0,0,0,0,0,0,0,0,0,62119,4178,0
+,0,0,0,12100,8617,0,0,16900,0,36678,0,0,0,35366,0,51718,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,20998,0,62086,0,0,0,0,0,5542,0,0,0,0,0,0,0,0,0,0,0,14629,10952,25927,0,0,0
+,0,19849,0,0,0,0,0,0,0,30952,3046,14314,12998,0,0,0,15268,0,40582,30216,62118,0,
+0,0,20132,0,0,0,0,0,12005,0,0,0,52358,0,0,0,0,24778,0,44,33095,0,0,0,0,0,26372,0
+,0,0,0,0,3781,0,0,17928,9479,0,0,0,0,0,0,0,0,32297,28613,0,0,0,0,0,0,0,0,0,0,0,0
+,0,47174,11723,0,0,0,0,0,0,0,0,0,2536,55143,0,0,6410,0,0,0,0,0,0,0,0,56294,11914
+,0,529,0,30184,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8261,0,0,28808,58854,22633,
+965,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,64135,0,0,331,3684,0,1605,0,0,0,0,0,0,
+0,0,0,0,16650,37,0,23622,3144,15429,0,0,0,0,0,0,0,0,0,0,22443,69,0,0,0,0,0,0,0,0
+,17832,0,0,0,0,0,0,0,0,0,11113,0,0,0,0,18309,0,0,0,0,0,0,0,0,0,26630,0,0,25512,
+25895,0,0,0,0,0,0,0,0,0,0,0,16901,0,0,0,27558,0,0,9418,0,0,0,3508,0,0,0,0,0,0,0,
+0,37990,9289,8517,0,0,0,0,1578,1604,23944,0,0,14916,12781,0,0,0,0,0,0,0,12105,0,
+16617,0,0,0,0,0,0,0,0,0,0,0,0,21348,11240,28870,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,5772,0,0,27812,0,0,0,0,0,0,0,8324,0,0,0,0,0,0,0,0,0,0,16748,1157,0,0,18794,
+16324,25898,935,8333,0,0,0,0,0,0,0,0,18246,0,18086,0,46854,0,0,0,0,0,0,339,0,0,
+25188,12780,12166,6409,0,0,0,0,16516,0,27012,28395,0,0,0,0,0,0,0,1420,0,0,0,9768
+,52967,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25163,324,0,0,0,0,0,0,0,0,0,64998,0,0,0,0,0,
+21893,0,0,0,0,0,47366,0,0,0,870,0,0,0,12646,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,26020,16360,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1809,0,0,0,6601,15878,0,0,0,0,0,
+29092,0,28516,0,0,0,0,0,0,0,0,0,21988,0,0,0,42950,0,0,0,0,0,0,0,0,0,0,5133,1318,
+0,0,0,0,0,0,0,0,0,0,0,54982,24904,0,0,0,0,0,0,0,0,0,0,51526,0,0,0,0,0,3685,0,0,0
+,0,10062,9412,0,0,0,31460,5708,6181,0,0,0,0,0,0,0,0,0,5575,0,0,0,0,0,0,0,0,0,0,
+27144,57478,0,0,0,0,0,0,7084,0,21993,53126,0,0,0,0,8397,0,0,5733,0,0,0,0,0,2116,
+0,24742,0,11271,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1133,0,4873,0,0,38310,0,0,0,0,0,
+0,0,0,0,0,0,0,17932,0,0,18053,0,0,0,25510,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17798,0,
+26214,0,0,0,0,0,0,0,0,23016,17415,20392,164,0,0,0,0,0,0,0,0,0,0,0,3239,0,46119,0
+,0,0,28580,0,0,0,0,0,0,0,0,0,7621,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,41478,0,0,31016,
+55334,10056,1924,0,0,0,0,0,36614,0,36711,0,0,0,0,0,0,0,0,0,0,13994,59303,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,26501,0,5639,0,0,0,0,0,0,13897,1253,0,0,0,0,0,5095,0,0,0,
+28869,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8646,0,0,0,0,25641,17796,0,0,0,0,0,0,0,
+13316,620,6309,11819,0,0,0,0,0,0,0,0,0,904,1095,0,24229,0,0,28744,49703,0,23077,
+0,0,0,0,32392,0,0,0,0,35271,0,28740,5866,0,0,0,0,0,0,0,4361,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7917,8869,0,0,0,13924,0,0,0,0,
+0,41958,0,0,0,0,0,0,6766,13989,0,0,0,903,0,0,24010,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,64390,0,22468,0,25861,0,0,0,0,23656,5317,0,0,0,0,0,0,23017,5445,16009
+,0,0,0,0,0,0,0,0,48006,10473,0,0,14404,0,0,0,42183,0,0,0,51270,0,0,10602,24132,0
+,0,0,0,0,43782,0,0,17834,0,0,0,25576,27205,0,0,0,0,0,0,0,0,29066,0,0,0,0,0,626,
+1988,14700,0,0,0,0,0,0,0,0,0,0,0,0,57670,0,0,0,0,0,0,0,0,0,44710,0,0,0,0,3848,
+7623,0,0,0,0,0,0,0,0,0,0,0,42374,0,0,0,0,0,0,0,0,19272,6436,0,0,5256,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,19685,0,0,0,0,0,0,0,0,0,0,0,0,0,39783,0,0,0,0,30984,0,0,0,0,0,0
+,28230,0,0,0,29028,10538,3205,0,0,0,0,0,0,0,0,0,0,0,5636,840,295,0,0,8488,8198,0
+,0,0,0,0,0,0,0,0,20580,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4074,19526,0,0,0,0,
+31144,64038,0,0,0,0,0,0,16716,0,0,0,0,0,0,0,0,0,0,0,17706,0,0,0,0,0,0,50630,0,
+50503,0,0,0,0,0,0,0,0,0,0,0,25446,0,0,0,13831,0,0,0,0,0,0,2696,4039,0,0,0,0,
+25288,0,12076,2054,0,48934,0,0,0,0,16969,59431,17259,35335,0,0,0,0,0,0,0,0,0,0,0
+,0,31275,0,0,0,1097,0,0,0,0,0,0,0,0,0,0,0,776,839,0,0,29386,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,5864,12134,0,0,0,0,0,0,0,25349,0,0,0,0,0,0,0,0,0,61447
+,0,0,0,0,0,0,0,0,0,24678,0,0,0,63335,0,28836,8142,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4494,0,0,0,0,0,14088,1188,0,16260,0,0,0,
+16421,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,276,0,0,17060,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24076,29445,0,33543,0,4901,0
+,0,12522,0,0,62471,0,0,0,0,0,0,0,0,0,0,4046,0,0,0,0,20486,0,15460,2217,51719,0,0
+,0,0,0,23495,0,0,0,0,0,0,15370,0,15849,0,15113,0,0,0,0,0,0,0,0,27972,7337,0,0,0,
+0,30342,0,0,0,0,0,0,0,0,32299,23940,0,17766,0,0,0,0,0,0,6184,0,20904,0,0,0,0,0,0
+,0,0,0,0,31492,0,0,0,5509,0,0,0,0,0,0,0,0,2669,50182,0,0,12299,0,0,0,0,0,0,0,
+5257,28167,0,0,0,0,0,0,0,0,0,0,0,11750,3890,0,0,26500,0,0,0,0,0,0,0,49318,0,0,0,
+0,0,0,0,10981,0,0,0,0,0,0,0,0,17961,1831,0,0,0,0,0,0,0,29638,0,0,0,0,26473,0,
+6216,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,711,0,0,0,0,0,0,0,0,0,0,28683,39975
+,0,0,0,0,0,51654,0,0,0,27527,0,0,0,0,0,0,0,0,30859,3268,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28772,0,18212,0,0,0,0,25448,65446,0,0,0,0,
+0,0,3337,1670,0,0,0,0,0,19332,0,0,0,0,24936,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1043,0
+,0,0,0,15814,0,21670,0,0,0,0,0,0,0,16263,0,0,0,0,0,0,0,0,0,32454,0,30630,0,0,
+20170,9926,0,0,0,18247,0,0,14376,0,2056,17191,0,0,0,0,0,0,0,7812,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,22474,52806,1588,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10825,0,
+0,0,0,40934,0,0,0,0,0,0,0,28677,0,0,5714,0,0,0,0,0,0,0,0,0,0,0,0,0,25865,22246,0
+,0,0,0,17256,35751,0,0,0,0,0,0,0,0,8236,0,32108,0,0,0,43,14342,0,16517,0,0,30732
+,0,4012,133,0,40583,971,23942,0,0,27275,0,0,0,204,0,0,27140,7564,44327,27592,
+57958,0,0,0,0,22344,25701,0,0,0,0,0,0,0,19524,31755,0,0,28102,0,59111,0,0,0,0,0,
+0,0,12261,0,44934,0,0,0,0,31560,0,11114,0,0,0,0,0,0,0,0,0,0,0,18953,18311,0,
+45159,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2059,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+19399,0,0,0,0,0,0,0,0,0,0,0,0,0,58534,0,0,0,0,0,0,0,0,0,0,0,0,22411,23943,0,0,0,
+0,0,0,11690,0,0,4069,0,0,2668,6342,0,0,0,0,0,0,27658,1766,0,0,0,0,23240,56070,0,
+0,0,0,0,0,0,0,0,0,0,0,0,34119,0,24453,0,0,0,0,21867,0,17610,9894,0,0,27976,38790
+,0,0,0,43654,0,31559,12202,23142,0,0,0,50343,0,0,0,0,0,32806,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,49895,0,0,0,0,15786,4263,0,0,0,0,4746,3814,0,0,0,0,0,0,17192,
+453,17323,0,20328,4036,0,0,0,15844,0,0,0,0,27561,31940,32296,0,0,0,0,0,0,0,11499
+,11782,0,0,0,0,9738,50471,0,0,0,0,0,35430,0,0,0,0,0,29734,0,0,0,36551,0,0,0,0,
+9257,5606,0,13829,0,7015,0,0,0,0,0,25127,0,0,19051,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2572,0,0,0,0,0,0,29797,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,42342,0,0,0,0,9293,
+0,17896,56038,4077,0,0,0,29899,37351,0,30823,0,8326,0,0,0,18342,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18569,54054,0,0,0,0,0,0,0,0,0,37254,0,0,31433,
+61510,0,2022,0,0,0,0,0,25381,0,0,0,0,0,0,0,0,0,0,0,0,0,2149,25289,0,0,0,0,0,0,0,
+0,0,0,12516,14185,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8676,0,0,0,0,0
+,0,0,0,0,0,0,36486,0,0,0,0,10889,9607,0,28711,0,0,0,0,0,0,0,0,0,0,28490,0,0,0,0,
+26181,10283,1701,0,0,0,0,0,0,0,0,0,14980,0,7783,0,27846,0,0,0,56486,3892,0,0,0,
+5770,16583,0,26309,13422,20292,0,0,0,0,0,0,0,0,0,28742,0,0,0,0,14536,1158,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25801,0,0,0,0,0,0,0,0,0,0,0,0,42438,0,3332,0,0
+,0,0,0,0,0,0,0,8327,0,0,0,0,0,0,0,0,0,0,0,0,17353,1447,0,0,8427,48518,1359,0,0,0
+,0,0,14986,0,32168,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9230,2791,0,0,0,0,0,0,0,0,
+16073,31623,4269,0,0,0,0,0,0,4519,0,0,27912,58950,0,0,0,0,0,0,0,0,8361,19812,0,0
+,0,0,6056,7877,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21701,0,0,0,0,0,0,0,0,0,0,0
+,0,9128,1125,0,16548,0,0,0,0,0,0,0,0,0,0,17292,6854,21352,0,2380,0,0,4007,0,0,0,
+0,0,24357,4202,0,0,0,0,0,0,0,0,0,0,0,0,0,10664,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,42823,3022,0,0,0,0,0,0,0,0,0,0,0,0,14373,0,20677,3304,2759,20522,64903,0,
+0,0,38,0,0,0,0,0,0,0,0,0,0,0,27814,2802,8870,3758,1255,0,0,0,0,0,0,0,0,30027,
+9510,0,0,0,0,17864,14855,0,0,0,0,0,0,0,0,0,0,23404,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+51462,0,0,0,0,0,0,0,45734,0,0,23467,32327,0,0,10826,52999,0,0,0,33222,31336,
+64326,0,0,0,0,0,0,0,32166,0,0,3891,0,0,0,7017,645,0,0,0,0,0,0,27915,46087,0,0,0,
+21863,0,34246,0,0,16715,0,0,0,0,14052,21416,0,0,0,0,0,0,0,0,39846,0,0,0,0,0,
+38982,0,0,17512,7460,0,0,0,0,0,0,0,0,0,15428,0,0,0,0,0,0,0,28356,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,25445,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11879,0,0,0,0,0,0
+,0,0,0,0,0,0,0,19911,0,20007,0,0,0,10855,943,0,0,10821,0,0,0,0,4170,0,0,0,0,0,0,
+0,0,0,9836,0,0,0,0,0,0,0,0,0,0,65415,0,0,0,0,0,0,0,0,9865,24646,0,0,0,0,0,40519,
+0,0,0,0,0,0,0,0,0,12804,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22091,23655,0,0,0,0,0,0,
+0,31686,0,0,0,58599,0,0,0,0,0,0,0,0,0,0,0,0,0,19620,0,0,0,0,0,0,0,0,0,0,0,0,0,
+24421,0,28100,0,0,0,31268,0,3204,0,0,0,0,0,0,0,0,0,14822,0,0,0,0,19947,10182,0,0
+,9480,14821,4398,0,0,14532,0,0,0,48871,1873,0,0,0,0,0,0,0,589,1541,0,0,0,0,0,
+23333,0,0,0,14149,0,0,0,0,1296,14374,0,27300,0,0,0,0,0,0,7276,0,0,0,0,0,0,47718,
+0,0,0,0,0,0,0,0,0,0,5164,1765,0,14405,0,37574,1994,0,6636,0,0,0,0,0,0,0,0,27815,
+0,0,0,0,2568,6820,0,0,0,0,0,0,0,0,0,0,11336,26247,0,0,23912,0,0,0,30536,0,0,
+34342,0,17799,0,0,0,22149,0,6118,0,25732,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,26600,5190,0,0,1142,0,0,0,0,0,0,0,0,39527,0,0,0,0,0,39494,0,0,0,0,0,0,0
+,0,0,0,3085,0,0,0,0,0,0,0,4786,0,0,0,28873,6532,0,0,26664,0,9193,11719,0,0,0,0,0
+,0,31752,64646,0,0,0,0,0,0,0,0,0,0,0,11397,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25094,0
+,0,18153,20167,0,0,0,17254,0,0,878,0,0,0,0,0,0,0,0,0,0,24166,0,0,0,0,0,0,0,0,0,0
+,0,0,26059,0,0,0,0,0,0,0,0,0,0,0,0,0,31592,0,0,8167,24362,6212,0,34758,0,0,0,0,0
+,0,32520,0,0,44679,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17989,8681,29222,0,0,0,
+0,0,0,0,0,10251,4902,1452,15207,0,0,0,0,0,0,0,22822,0,10469,0,0,0,0,0,0,19337,
+17670,107,11494,0,0,0,0,27305,2565,0,0,0,0,0,0,0,64518,200,28389,0,0,0,0,31208,0
+,30762,0,0,0,0,0,29321,60518,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3209,3237,
+12490,22663,0,0,0,18789,31464,16391,0,0,0,0,0,0,0,0,0,0,0,20646,0,0,0,27238,0,0,
+0,0,0,15940,4488,6951,0,0,0,46342,0,0,0,0,0,0,0,0,0,28965,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,20584,3367,0,25350,0,0,0,0,0,0,0,0,0,0,0,0,1814,0,0,0,0,0,0,0,0,0,0,17125,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,55943,0,0,0,0,0,24133,0,0,0,0,0,0,0,0,0,0,0,0,2929
+,0,0,50086,0,2918,25356,30052,115,11846,0,0,0,0,3056,0,0,0,0,17639,239,19815,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36806,0,0,0,0,0,0,0,0,0,0,0,0,0,21479,0,0,0,0,0,
+28420,11786,4772,0,0,3368,36295,0,31463,0,0,14665,996,0,20582,0,0,0,9988,0,23685
+,0,0,0,52551,0,0,0,0,0,0,0,7556,0,0,0,0,0,0,0,1895,2186,0,0,0,0,0,27755,25447,0,
+0,0,0,31052,63270,0,0,0,0,0,0,0,36742,0,24804,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,31048,0,0,0,0,0,0,0,0,0,21290,2276,0,0,0,0,26475,0,0,0,0,0,0,0,0,0
+,0,15332,0,0,0,0,0,0,0,0,3176,19431,0,0,0,0,0,0,0,62726,0,0,0,25380,0,0,27883,
+1316,0,0,7724,3015,0,0,0,0,6697,0,0,47910,0,0,0,0,0,0,0,0,0,3141,0,0,0,14820,0,0
+,0,0,9326,0,0,0,0,0,0,0,0,0,0,31493,0,0,0,0,0,6566,0,0,0,0,0,0,6569,1348,0,25638
+,0,0,0,0,0,20324,0,0,17067,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11876,0,41030,0,0,0,26405
+,0,0,0,0,0,0,0,0,0,11431,28137,14950,0,10151,0,0,0,0,0,0,0,29574,0,0,0,0,27176,
+57446,0,0,0,0,28650,57574,1387,0,0,0,0,0,0,0,0,0,0,58247,0,0,0,0,0,0,0,16805,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3526,0,15781,0,5572,13352,0,0,0,0,0,18665
+,23463,0,0,0,0,0,0,15405,6885,0,0,0,0,15272,0,0,0,0,0,0,0,0,9861,0,0,0,0,0,0,0,0
+,9512,4037,0,0,11563,49639,0,0,0,0,0,0,27880,57830,0,0,0,0,0,41831,0,21924,0,0,0
+,0,0,0,0,25509,0,27462,0,18085,0,0,0,0,0,0,0,0,0,0,0,0,13898,8068,26441,0,0,0,0,
+0,0,25316,0,0,0,0,16298,7397,5706,19239,0,0,0,0,0,0,0,0,1392,50919,0,0,0,0,0,
+53863,0,0,0,0,1451,0,0,0,0,0,0,0,0,0,0,35847,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,17801,15813,0,12740,0,0,0,32967,0,0,0,0,0,0,5389,0,0,0,0,0,0,0,0,0,0,31143,0,
+20548,0,0,0,0,0,0,0,0,0,51686,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+12109,19015,0,34983,0,21732,3600,0,0,0,0,47750,17288,43975,22857,47559,0,0,0,0,
+26408,48358,0,0,0,0,0,0,0,0,0,0,0,0,0,30470,0,0,23560,4581,0,22404,0,49286,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49831,0,0,0,27525,31691,7,0,0,25835,0,0,0,0,0,
+4201,16485,0,20676,0,0,0,0,3753,23303,16264,3878,0,0,0,0,0,0,11434,0,0,0,0,0,0,
+7589,0,0,0,0,0,0,0,0,0,57095,0,0,0,0,0,0,0,0,0,0,0,22820,11146,49158,0,23623,0,0
+,0,0,0,0,0,13893,0,0,0,0,0,0,11722,60071,1258,0,0,0,0,0,0,18564,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,27945,0,0,0,0,5479,0,20006,17608,3431,10988,30180,0,0,0,0,0,0,0,
+24581,14,0,0,0,0,0,0,25572,0,0,0,28612,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,53543,0
+,0,0,0,0,0,0,0,0,0,0,33670,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8710,0,14116,0,0,116,
+292,0,0,0,37831,0,43078,0,0,0,0,0,0,0,0,21832,0,0,32134,783,0,0,30982,0,0,0,68,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5932,0,0,0,18505,
+15175,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3630,16965,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,17797,0,0,0,0,0,0,520,42150,0,0,3122,0,0,0,22506,0,0,0,0,0,0,0,0,28550,0,
+0,0,50278,0,0,13641,5958,0,35238,0,0,0,0,0,0,0,0,29993,18724,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,20619,9319,0,0,0,0,23977,0,5193,0,0,12196,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,24390,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20105,677,0,0,0,0
+,0,0,0,0,29419,0,0,0,0,0,0,0,0,0,20266,0,0,0,0,10631,0,0,0,0,0,0,0,0,0,47655,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26628,12744,0,20648,0,0,0,432,0,0
+,0,0,0,0,0,0,0,0,646,0,25604,0,0,0,0,0,0,0,0,0,0,0,0,0,63782,0,0,0,0,24616,0,0,0
+,21291,0,0,0,0,0,0,0,0,0,0,45638,0,0,0,0,1931,0,0,0,20521,59975,0,20614,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,56231,0,0,0,0,0,29991,0,52871,0,20934,0,0,0,0,0,0,0,16871,
+0,0,0,0,0,0,0,0,0,0,0,0,0,7237,0,0,0,0,0,47558,0,0,0,0,0,0,0,0,0,0,0,10406,0,0,0
+,0,0,0,0,43046,0,0,2930,0,12936,0,0,0,0,0,0,0,0,0,0,0,0,31141,0,0,0,0,0,0,0,
+37639,0,17572,0,0,0,0,0,0,0,0,0,0,31240,0,0,0,0,0,688,0,0,0,0,0,1648,0,0,0,0,
+10055,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,146,0,0,0,0,0,0,0,0,0,6345,199,0,34982,0,0
+,0,0,0,0,0,0,0,0,0,0,0,56839,0,0,0,0,0,48902,0,13412,0,0,0,0,0,0,0,0,2441,4420,0
+,0,0,0,20428,933,0,0,0,0,0,0,0,45383,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,54726,0,0,0,0,0,0,0,0,0,0,0,0,17036,741,0,0,0,0,0,0,0,27589,0,0,30282
+,18950,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2248,0,0,0,0,0,0,0,0,0,25993,0,0,0,
+2443,0,0,31622,0,14150,0,0,0,28679,0,0,0,0,0,0,15464,0,0,0,0,54694,0,0,0,0,0,0,
+3827,0,0,0,3756,0,9897,0,0,0,0,0,19082,31239,0,0,0,0,0,0,0,0,0,0,0,24580,0,0,0,0
+,0,0,0,0,0,16580,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27625,0,0,0,784,4647,32652,0,0,
+63494,0,0,0,0,0,0,0,21062,0,0,0,0,0,0,0,0,0,0,3404,58470,0,32325,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,18634,2789,0,0,0,0,0,0,0,8548,0,0,0,22501,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,15881,0,0,0,0,35879,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7978,17956,0,0,0,
+0,0,0,0,24324,0,0,4937,0,0,0,8168,0,13420,10340,0,0,0,0,0,11780,0,0,0,0,0,0,0,0,
+0,0,16712,0,0,0,0,0,0,0,17640,17991,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,2953,0,0,0,0,0,0,0,9100,16806,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30667,0,0,
+19013,0,0,0,0,0,0,205,15334,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1969,0,0,0,0,0,0,0,26248
+,52518,0,49798,0,0,0,0,0,0,0,9668,0,0,0,0,0,4742,0,0,21641,0,0,0,0,0,0,53574,0,0
+,0,0,0,0,5707,0,0,0,0,0,0,0,3018,12454,0,0,0,0,2920,262,0,0,0,0,0,0,0,0,0,0,3593
+,0,0,0,0,0,0,0,0,0,0,23910,0,0,0,0,0,0,0,55879,0,0,0,0,0,775,0,43270,5066,48967,
+0,0,22986,4165,8971,44838,0,0,0,0,0,62279,272,0,0,0,0,51430,0,0,0,0,0,0,28234,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13349,0,0,0,51111,20265,13861,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,585,7494,0,0,0,0,0,0,0,0,21768,62407,0,0,0,0,7979,166,0,
+0,0,0,0,0,0,0,0,38918,0,56742,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16296,5767,0,0,0,0,0,0
+,0,32068,0,0,0,0,0,0,0,0,0,0,0,0,0,29796,0,0,0,0,0,0,0,0,23916,30183,0,58791,0,0
+,0,0,0,0,0,20518,0,0,0,0,8969,0,0,0,183,0,0,0,0,0,2314,17445,0,0,0,0,0,0,0,0,0,
+23748,0,0,8139,4839,27914,0,0,0,0,0,0,0,0,0,0,0,0,29478,0,0,16552,26663,0,53767,
+0,0,13960,8039,18696,0,0,0,0,0,0,0,0,0,0,0,782,16005,0,0,0,0,0,0,0,0,6258,56806,
+16456,12455,0,0,0,0,0,0,0,23780,0,0,0,0,0,0,9355,0,0,0,7273,41063,24780,57766,0,
+0,0,0,0,0,0,0,0,0,3820,2597,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29225,61126,0,0,0,58439,
+15691,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37190,22408,967,0,0,0,
+23078,26858,0,0,0,19753,0,0,0,0,0,0,0,0,0,5416,13702,0,0,0,0,0,52742,20394,38567
+,0,0,0,51079,0,0,136,8516,0,0,0,0,0,0,0,0,0,0,0,27588,0,0,0,0,0,0,0,0,0,0,531,0,
+0,0,0,0,0,0,0,0,8936,5031,12520,19334,0,0,22827,30247,28074,31140,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,27497,18148,20104,59079,0,0,0,0,0,0,0,0,0,24389,0,0,6125,0,0,0,0,
+9541,0,0,24553,29095,0,0,0,0,0,0,0,25444,0,0,9643,0,0,63047,0,0,0,0,0,0,0,0,0,
+39558,0,0,0,0,0,0,20620,11815,499,0,5128,2278,0,0,0,0,0,46310,0,0,0,0,0,0,0,0,
+23530,40166,2440,0,0,0,0,0,0,0,0,0,0,15174,0,0,0,0,0,0,0,0,0,0,26922,0,0,0,0,0,0
+,0,0,0,0,26758,0,0,0,0,0,51911,0,0,23532,0,0,0,0,51238,25737,44486,12622,0,0,0,0
+,0,0,3078,0,9253,0,0,1128,22023,0,0,0,21350,0,16420,0,0,0,0,0,0,0,65094,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22532,0,48774,0,34503,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,9797,0,0,0,0,0,0,0,13797,0,38279,0,0,1738,0,489,46343,0,45382,0,0,0,0,0,0,
+0,0,0,29030,0,0,0,0,0,0,6220,56550,0,0,0,0,0,26885,0,28806,0,0,0,0,0,0,0,0,0,0,0
+,45958,0,0,0,0,20553,49927,0,0,0,0,0,0,3019,12358,0,0,0,0,0,0,0,0,0,0,26571,
+13319,0,0,653,23399,0,0,0,0,0,0,0,0,22316,0,0,21188,0,0,0,0,0,0,0,0,0,27556,0,0,
+0,0,0,0,0,27878,21483,27653,0,29701,237,0,10632,0,0,0,0,33766,0,0,0,0,0,0,31563,
+0,0,0,0,0,1416,2439,0,0,0,0,0,0,0,0,0,0,9611,0,0,0,0,0,0,0,5611,16581,26601,
+35462,0,0,0,26756,0,59271,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26984,57734,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,7882,0,0,0,19528,6469,0,0,1161,0,0,0,7688,20935,425,0,
+0,0,0,0,0,0,0,12519,0,12902,0,0,0,0,0,0,0,0,0,0,2411,0,11725,26086,0,0,20201,0,0
+,0,0,0,0,0,0,11045,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30471,0,0,0,0,0,0,0,0,0,0,0,
+21541,1141,21190,0,9188,0,0,0,0,0,0,0,0,0,0,0,0,0,0,184,1093,0,0,0,0,0,0,0,0,
+4842,0,13672,0,0,12230,0,0,0,10532,0,0,8937,0,0,0,0,0,0,0,0,0,0,28996,0,0,11720,
+26982,0,46182,0,43911,31754,0,1160,3940,0,20772,0,0,0,0,0,24549,0,32582,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,31845,0,0,0,0,0,0,0,2310,11788,0,0,43047,0,0,0,18853,0
+,0,0,0,0,0,0,0,0,63622,0,0,7048,17318,0,0,0,21957,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,1039,6279,0,0,0,0,0,0,0,0,0,0,0,0,0,12197,0,0,0,0,0,0,0,0,0,
+46470,0,0,24,19719,0,0,0,0,0,0,0,0,0,39335,0,0,0,0,0,0,0,0,0,0,21353,3846,0,0,0,
+0,0,0,0,36679,0,0,0,0,0,0,0,0,0,0,0,11268,0,0,0,0,0,9382,0,0,0,0,0,0,0,0,0,0,0,0
+,0,29926,0,33606,0,4708,2828,0,0,29543,0,0,0,0,0,29893,0,0,0,0,0,0,0,0,3663,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10920,7111,0,0,0,0,0,0,0,0,0,0
+,9384,0,0,0,0,0,0,0,0,0,0,0,0,20388,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37094,0,0,0,
+27110,0,0,0,0,0,0,21865,0,27753,30214,0,0,0,0,0,57895,0,0,0,0,0,0,0,0,0,0,12648,
+5446,0,0,0,0,0,0,0,0,0,0,19784,17124,0,52007,0,0,0,0,0,0,0,0,758,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,24900,0,0,0,0,0,1476,0,65031,0,0,1205,46663,0,30023,11625,
+1094,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10058,0,0,0,0,0,0,28455,0,0,0,0,0,0,0,0,0,0,0,
+14788,0,0,0,0,16808,0,0,742,0,0,0,0,0,0,0,0,0,0,0,21636,0,0,0,0,0,0,0,0,0,0,0,0,
+15944,23207,0,0,0,0,247,0,0,0,0,24743,0,0,0,5252,0,0,0,0,0,0,0,0,29961,18660,
+21099,46791,0,7045,0,0,0,0,25707,0,0,17412,3828,0,0,0,0,0,0,0,0,0,0,0,5803,5637,
+0,38151,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60103,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,853,0,0,0,0,0,0,30215,0,0,0,0,0,0,0,8741,0,0,0,0,0,27366,0,0,0,0,171,
+4070,0,0,0,0,0,0,0,0,24073,7366,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2184,5189,0,
+20932,1545,4996,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7684,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6313,0,0,0,0,0,0,0,30826,0,0,0,0,0,0,
+0,0,0,0,27463,0,0,0,0,0,0,0,0,0,0,21640,63303,0,0,3275,31111,0,0,0,0,0,0,0,11556
+,0,14756,0,0,0,15108,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23914,28966,0,0,0,4965,0,0,0,0,
+0,0,0,0,0,0,10216,5223,0,0,0,0,0,0,0,0,0,27142,0,0,1173,20198,0,0,0,0,0,56614,0,
+0,0,0,0,4612,0,0,0,0,0,0,0,0,0,0,11822,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17769,7910,
+0,0,31880,0,0,6055,0,0,0,0,0,0,0,0,0,0,8970,0,0,0,0,0,0,0,0,0,0,0,16840,23879,0,
+0,11051,0,0,0,32552,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20842,13701,0,0,0,37191,7373,
+10471,17482,25348,0,0,0,38502,0,0,0,0,0,0,0,0,0,21509,6058,0,0,0,0,0,0,3173,0,0,
+0,9543,0,0,0,0,0,0,17768,12708,0,0,0,0,0,37030,0,0,0,0,0,0,0,0,0,0,12748,48743,0
+,11718,0,0,25194,0,0,0,9033,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5028,0,30118,0,0,0,0,0,
+42759,0,0,3720,0,0,0,0,0,0,25190,0,0,0,0,0,0,0,0,0,0,5450,5125,0,58086,0,0,0,0,0
+,27716,0,0,0,0,0,0,0,0,0,22052,0,0,0,0,26249,0,15947,3460,0,0,0,35814,0,0,0,7813
+,19500,32167,0,18597,0,0,0,0,0,28644,0,0,0,60743,0,0,0,0,0,29636,0,0,0,0,0,0,0,0
+,0,0,0,0,0,17220,15885,9414,9642,0,0,0,593,0,0,24228,0,0,0,0,0,40422,0,26244,0,
+23109,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,64902,0,0,0,0,3979,60007,0,0,0,28199
+,0,0,0,43142,0,0,0,0,0,0,0,29158,0,30532,0,0,0,0,13256,0,0,0,0,16549,0,0,0,0,0,
+26116,0,0,0,0,0,0,0,0,22825,0,0,0,0,0,0,0,1065,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,18985,4805,0,0,0,0,0,17702,0,0,0,0,0,0,0,0,0,0,3468,0,0,0,0,13447,0,0,0
+,0,0,0,0,0,0,0,0,56871,0,0,1776,15780,0,0,2603,0,10280,31366,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,11592,3591,0,2372,0,0,0,0,0,0,0,20004,0,0,0,0,0,0,12072,518,0,0,1960,
+8999,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7178,32999,0,0,0,0,0,0,1641,0,0,0,0,0,0,0,6764,
+9893,490,4005,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25258,5541,0,14053,306,20743,0,0,
+9422,0,0,0,0,0,0,0,11977,260,0,35175,0,0,0,0,0,0,0,18405,0,0,0,16582,0,0,0,22470
+,0,0,0,0,0,0,2792,0,0,0,14026,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14858,3909,0,0,0
+,57671,0,0,0,0,0,0,15979,0,0,0,2794,15239,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26884,
+9070,0,0,0,0,51846,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19499,37127,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,19205,10350,11910,0,0,0,0,15083,23108,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,169,0,0,0,0,0,0,0,0,0,0,0,15274,41735,0,56774,0,0,2825,0,14025,
+389,0,0,0,0,0,0,0,0,21482,31910,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20456,710,0,0,25032,
+21797,0,0,0,0,0,0,0,0,0,0,32427,21252,0,30150,0,43174,0,0,0,0,0,0,0,0,0,0,0,0,
+11403,0,0,1029,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6892,9252,0,63206,
+3496,14406,0,0,0,0,0,0,0,0,0,0,22568,0,0,21253,0,0,0,0,0,0,0,39623,0,0,10189,0,0
+,0,0,0,0,0,0,0,0,0,0,0,30729,59910,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3305,0,0,0,0,0,0,
+0,0,0,7660,24871,0,838,0,0,0,0,0,0,0,0,0,0,0,0,12013,13252,0,551,0,0,0,43207,0,
+30567,0,0,0,0,0,0,0,0,28394,30724,0,0,0,0,0,0,0,0,0,0,22665,22725,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,29414,0,0,0,0,16074,8966,245,1445,0,0,0,0,24872,0,0,0,0,
+13124,0,35527,0,0,0,0,0,0,13259,10917,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+25191,0,0,0,13956,0,0,0,0,0,0,0,54631,19625,12070,3083,0,0,0,0,14436,0,0,0,0,0,0
+,0,0,0,0,0,0,0,21766,0,15463,29322,0,0,0,0,0,0,29990,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,23653,0,0,0,0,0,0,0,0,2643,0,0,21223,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,4114,0,0,0,0,0,0,0,0,34790,0,0,0,0,0,0,0,16103,0,0,0,0,0,0,297,3620,3338,
+10372,0,14727,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29924,22473,13895,
+15529,32455,30378,13540,0,28807,0,0,0,0,0,0,0,64582,18380,0,0,0,0,0,0,0,0,0,0,0,
+0,38598,0,0,0,0,0,0,0,0,1236,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32710,0,0,0,0,4590,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,64935,0,0,0,0,0,0,0,0,0,0,0,0,16744,0,0,
+0,0,0,0,20005,0,0,13608,1191,0,0,0,62183,0,0,0,0,0,24484,0,0,0,0,0,0,0,0,0,0,
+17643,0,0,0,0,0,0,0,0,0,0,0,0,5380,0,0,32328,0,0,63814,0,0,0,2919,0,0,0,0,17034,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,60295,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,7690,486,0,0,0,39270,0,49094,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12555,0,0,0,0,0,0,0,0
+,0,0,0,0,20967,17993,12647,0,0,0,16036,32616,0,0,0,0,16294,8555,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35174,0,0,0,0,0,0,30346,0,0,0,0,0,0,0,
+14797,3652,0,0,8268,12934,0,54950,0,0,0,0,2632,33959,0,23175,0,0,0,0,0,36262,0,0
+,0,0,0,0,32684,26918,0,32676,0,0,0,0,0,0,0,0,0,0,15625,11943,1206,0,0,0,0,18052,
+0,0,0,0,0,16422,0,0,0,26404,0,0,28777,0,0,24902,0,0,408,45351,0,35719,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,3658,17446,0,165,0,0,0,0,0,0,0,6151,0,0,24424,0,0,0,0,0,0,0,
+24170,24293,0,0,0,0,0,0,0,0,0,11847,0,39591,0,0,0,0,0,0,9549,2788,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1010,0,0,0,0,0,0,26055,31724,0,24233,
+1828,0,0,0,0,0,0,0,0,0,17284,0,0,0,0,19464,0,0,0,0,0,0,0,0,32452,0,0,0,28871,0,0
+,0,0,17704,53383,0,0,0,0,0,0,0,0,0,17892,1938,0,0,0,0,0,16362,0,0,21605,0,0,5003
+,0,0,0,0,0,0,22693,0,22342,0,0,0,55846,0,0,0,0,0,0,0,0,0,22853,0,0,0,0,0,0,0,0,
+6600,263,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24836,0,0,0,0,0,0,0,0,0,
+40711,0,0,0,0,0,33894,0,0,0,0,0,0,13000,0,0,0,0,0,0,0,0,0,0,30308,0,0,0,0,0,0,
+5386,0,0,0,0,0,0,27844,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17740,0,0,0,0
+,25093,29064,0,0,0,0,0,0,0,12680,11462,0,0,0,0,0,0,0,0,84,7303,0,0,0,0,0,0,0,0,0
+,0,0,27044,457,0,22924,58246,19016,0,2606,45703,0,5157,0,25028,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,2065,0,0,0,0,0,31946,0,0,0,0,0,0,0,0,0,0,0,0,33382,0,
+47878,0,0,0,0,0,0,0,0,25004,0,0,0,0,0,0,0,26153,35654,0,58055,30668,0,0,0,0,
+25988,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4456,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,7560,20583,0,0,0,0,0,0,0,0,0,37510,0,0,0,0,0,0,0,0,0,42822,0,0,0,0,0,0,0,0,
+0,0,0,1733,0,0,0,8196,0,0,11241,0,30572,60326,0,15013,0,0,0,40646,0,23812,0,
+10022,0,0,0,0,0,0,0,0,12874,31015,0,0,0,0,0,0,1608,0,0,0,0,18308,0,0,0,0,27114,0
+,0,0,0,0,0,0,7944,1382,0,11813,0,0,0,0,0,0,0,0,0,0,0,0,0,24517,0,11621,0,0,0,0,0
+,0,0,0,0,0,0,21702,0,0,13100,8262,2644,7973,0,0,0,0,0,0,0,0,0,0,0,0,1033,12581,0
+,25221,0,0,0,40998,16301,62983,0,0,0,0,1263,9318,0,0,0,18854,0,0,1741,33895,0,0,
+0,0,0,0,26377,0,0,0,0,0,0,0,0,0,0,32165,0,51143,0,0,0,0,0,29412,0,0,0,0,0,0,0,0,
+1674,4230,0,0,0,0,0,10502,0,0,0,0,5545,0,0,0,0,0,2099,45158,0,0,0,0,0,0,0,0,0,0,
+14157,0,26955,0,0,0,0,0,0,0,0,0,17096,0,0,0,0,0,0,0,0,0,0,0,0,0,27050,6726,0,0,0
+,0,0,0,0,0,28554,0,0,7142,0,0,0,0,16936,0,0,0,25833,0,4399,6980,0,46214,0,0,0,0,
+0,10630,21164,0,0,0,0,0,0,0,2446,48551,0,0,0,0,0,0,0,0,0,0,0,13381,0,0,0,0,0,0,0
+,0,15400,12135,0,0,0,0,0,4774,586,0,0,0,0,0,0,0,0,23751,9736,4548,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25577,29607,6250,1637,0,0,0,0,
+22024,0,0,0,0,22308,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37414,24044,0,0,0,14474,29735,
+0,7077,0,45990,0,0,0,0,30568,40039,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6150,0,4228,0,0,0,0,0,27687,0,0,0,0,0,0,0,24548,21513,1350,0,0,0,33607,0,0,0,0,0
+,0,0,0,11784,1414,0,0,0,0,0,0,0,18244,940,0,0,0,0,0,0,7270,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,16709,0,0,0,0,0,0,0,48935,0,0,0,0,0,0,23660,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,53350,0,0,0,0,0,0,4236,16358,0,4422,6665,32644,0,0,744,18084,0,11014,0,0,0,0,0,
+29508,0,0,0,0,0,0,0,7686,0,0,13289,5478,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,12872,0,0,24134,1005,22916,0,31429,23400,0,0,0,0,0,0,0,28424,0,0,0,
+25706,27109,0,0,26345,0,0,0,0,0,0,25126,0,0,88,0,0,0,0,0,0,0,17032,0,0,21799,0,0
+,10060,0,12296,21892,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20777,14311,0,58182,
+32232,0,10282,0,2121,11527,0,0,0,12325,0,0,0,0,0,0,0,28804,2344,8133,0,0,0,0,
+21864,62695,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2771,0,0,
+23204,0,0,0,0,0,6278,0,0,0,0,0,26597,0,0,0,0,23144,0,0,0,0,0,31816,20070,0,0,0,0
+,0,0,0,0,0,0,24456,2118,0,0,0,0,6570,1156,0,0,0,0,0,0,0,30406,0,0,0,28388,3572,0
+,0,26599,12426,5286,0,0,0,0,0,4967,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24970,24167,0,0,0
+,0,28745,4678,0,0,0,0,0,0,0,1444,236,0,0,0,0,0,0,0,0,19428,0,0,0,0,0,0,2092,0,0,
+0,0,0,0,0,0,0,2827,0,0,0,0,0,19881,19204,0,11749,0,0,0,0,0,0,0,17958,0,17894,0,
+18726,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9190,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,21510,5033,0,0,0,0,22855,0,0,0,0,0,14598,0,29605,0,0,0,0,0,0,0,0,
+617,0,0,0,0,47142,0,0,0,0,0,0,0,0,0,0,3627,0,0,0,0,0,0,0,0,0,0,0,0,0,2225,14823,
+0,0,2637,6182,78,15078,0,0,0,0,20264,0,0,0,0,0,0,36743,4140,44551,17352,25703,0,
+0,0,0,0,0,0,0,0,0,0,0,14024,0,0,0,0,0,0,28004,0,0,0,0,0,7588,0,0,0,0,0,0,0,2087,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18028,0,0,0,300,14212,0,0,1386,40327,0,0,0,0
+,0,0,31082,0,0,22374,0,0,0,0,0,35718,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+26532,7756,0,0,18982,0,0,0,0,0,0,0,0,6440,1159,7180,0,0,0,0,0,0,45766,0,57798,0,
+16740,0,0,6802,60454,0,0,0,26470,0,0,0,0,0,65382,4362,7750,0,0,0,0,0,0,9096,4743
+,334,0,0,0,0,0,0,39974,0,0,0,25828,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3027,0,0,
+0,15816,0,0,0,0,0,0,0,0,48327,0,0,0,0,0,0,0,0,0,0,16168,41799,0,0,24458,8581,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12292,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,54503,0,0,0,0,5097,30852,18664,0,0,0,0,0,0,16484,0,0,27337,0,0,0,
+0,0,0,0,0,0,0,0,0,35942,0,0,0,0,0,0,0,4356,0,0,0,0,0,57030,0,0,1417,41191,0,0,0,
+0,0,23429,0,0,0,0,10024,21735,0,0,10126,0,0,0,0,19046,0,0,0,0,0,0,24105,4710,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4394,0,0,0,0,13253,0,56391,0,0,0,0,0,0,0,0,0,0,
+0,19174,0,0,0,0,0,0,0,0,0,55974,0,0,0,52070,0,15620,0,0,0,0,0,2660,0,0,0,0,21644
+,0,0,52455,0,0,0,0,0,0,0,0,0,8902,0,0,0,0,0,0,3116,0,464,34726,0,0,0,0,0,0,25003
+,12423,0,27172,1896,7335,0,0,0,0,0,35686,0,0,0,0,3472,0,0,0,0,22406,0,0,0,0,0,0,
+0,0,0,45254,0,0,0,0,0,0,0,0,0,0,0,0,0,21124,23594,33127,0,0,0,0,0,0,16684,22087,
+0,0,0,0,0,0,0,0,0,0,0,0,8714,0,0,0,0,0,0,0,0,0,0,55814,0,0,0,0,0,0,4109,23460,0,
+0,8874,0,0,0,0,0,0,0,0,0,147,0,0,0,0,0,0,0,0,0,0,0,0,0,29960,63398,1302,0,0,0,0,
+0,0,0,0,24806,0,0,0,0,0,0,0,0,0,9799,0,0,0,0,0,0,0,31333,0,0,0,0,0,19557,0,0,0,0
+,0,5701,0,0,0,63014,0,0,0,0,0,0,0,21254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12484,0,0,
+0,48326,0,0,0,0,0,0,0,0,0,0,0,15783,0,0,1202,0,0,0,0,23174,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,3086,49191,0,0,5387,15141,0,0,0,3365,0,0,0,0,20076,14021,
+0,0,0,0,0,0,0,0,0,0,376,40198,0,0,0,52039,0,24932,0,0,0,0,808,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,9860,0,0,0,0,0,23719,0,21476,0,0,0,0,20776,4807,0,0,3177,16678,0,0,110
+,10853,0,0,0,17382,0,0,0,0,0,0,0,0,0,43462,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,7500,4966,0,0,0,0,0,0,0,52102,0,24516,0,0,0,0,0,0,0,0,0,0,0,0,0,26535,0,0
+,0,46247,0,0,0,15557,0,0,0,0,76,52327,0,0,0,0,17866,0,0,0,0,0,0,0,0,0,0,46758,0,
+0,0,0,0,19173,0,0,0,0,0,0,0,0,0,44038,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2985,0
+,0,0,0,0,0,14310,0,0,2125,45831,0,0,0,0,0,0,9838,0,13227,19492,0,0,0,29764,0,0,0
+,0,686,30053,0,0,0,0,0,30789,139,20837,0,0,0,0,502,18533,0,0,0,0,0,19111,0,0,0,0
+,0,31396,0,0,0,17444,0,0,0,0,0,0,0,49862,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25610,550,0
+,0,561,0,29034,0,0,0,3528,0,0,0,1715,14661,18,63463,0,0,0,0,0,0,0,0,0,0,14186,0,
+0,0,0,0,0,0,0,0,0,0,29578,59014,0,39430,0,0,0,0,2250,16612,0,31780,0,0,0,0,0,0,
+462,16967,0,29029,0,0,0,0,0,23462,0,0,0,0,0,0,0,0,1768,0,6025,16998,1804,0,0,
+54182,0,0,0,0,0,0,0,0,14124,0,6154,29702,0,0,0,0,0,7716,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,48807,0,8292,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16389,5933,0,
+14857,51303,0,0,0,0,0,0,0,0,0,0,0,35623,9097,23047,0,0,23112,0,0,0,0,0,438,0,0,0
+,0,0,0,0,151,9254,1390,0,0,0,0,0,0,54215,0,0,0,0,6187,0,0,0,0,13095,0,0,0,0,0,0,
+0,0,0,0,0,0,9866,0,0,59622,0,0,0,0,0,0,0,0,0,25286,0,0,23848,32069,0,0,0,0,0,0,0
+,0,0,9255,2187,15270,437,0,0,0,0,0,0,0,0,0,0,19493,0,0,0,0,0,0,0,0,0,0,0,11748,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16902,0,0,0,0,0,0,0,0,0,22212,1865,17543,0,
+0,0,0,0,0,21996,0,0,0,0,55975,0,0,0,0,0,0,0,0,32138,21156,0,0,0,0,0,0,14249,0,0,
+0,2388,0,0,0,0,6823,0,0,0,0,0,0,0,0,0,0,0,0,0,26694,0,0,6059,53511,0,0,0,0,0,0,0
+,49542,6159,0,0,0,0,0,0,0,0,0,0,0,0,0,1036,24036,0,2501,0,0,0,0,0,0,17419,51271,
+3377,15142,0,0,0,0,0,0,5007,62374,0,56935,0,0,0,0,0,0,0,0,0,0,0,24422,0,0,0,0,0,
+0,0,0,942,0,0,0,0,0,0,0,0,0,0,28263,0,0,0,0,0,0,0,15622,0,19749,0,0,1611,0,22219
+,48583,25129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17476,0,0,0,0,0,0,0,0,0,0,
+721,0,0,0,0,32518,0,0,0,18469,0,0,0,0,0,0,5896,29927,3657,23046,0,0,3214,0,0,0,0
+,0,0,0,0,0,112,0,0,0,0,0,3048,455,0,31012,0,0,0,0,0,0,0,23270,0,32677,0,0,0,0,0,
+38086,0,0,0,0,0,0,0,0,0,0,0,0,0,4900,0,0,0,0,0,0,0,0,0,25541,0,18788,0,0,22248,
+1351,0,61734,4524,30629,0,14887,242,29063,0,0,14408,4741,0,0,0,37318,0,0,0,0,0,0
+,0,0,0,0,0,0,8106,0,32107,0,0,0,0,0,0,0,0,0,0,0,1481,0,0,28132,0,25798,0,59783,0
+,0,0,0,0,59078,0,0,0,23366,0,0,0,0,0,0,0,30887,0,0,0,0,16200,0,0,0,335,0,0,0,714
+,0,0,0,0,0,0,0,0,0,0,0,0,0,30730,9478,0,0,0,0,0,0,0,0,0,0,0,18790,0,0,0,0,663,0,
+0,0,1034,31431,0,0,0,0,0,0,0,0,0,0,30120,0,0,0,0,13925,0,0,0,0,0,0,2280,13414,0,
+0,0,0,0,0,22028,23687,3017,11047,0,0,21738,18630,0,0,0,0,0,0,0,30246,0,0,0,0,0,0
+,0,0,0,0,0,0,17257,0,21896,63783,0,0,0,21094,0,18662,0,25700,0,22533,0,0,0,0,0,
+6341,5800,11111,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15399,
+12970,6501,0,0,3179,26438,0,0,0,0,0,0,0,15750,0,13062,0,0,0,0,0,0,0,0,0,0,142,0,
+0,0,0,21284,11177,4391,0,0,0,0,19595,40647,0,0,0,0,0,11877,0,0,0,26439,0,0,0,0,
+695,49126,27467,11972,0,0,0,0,0,0,9961,0,0,0,31722,62982,0,0,0,0,15817,52710,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24614,0,0,0,0,0,20550,0,0,5034,3942,0,0,0,
+45927,0,0,0,0,0,0,0,0,0,0,2548,0,0,0,0,0,0,45606,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,3405,12582,15563,54087,0,0,0,0,0,0,0,0,0,0,0,0,24202,5893,0,0,0,
+44230,0,0,0,5605,0,47782,0,32230,0,0,0,0,0,0,0,0,0,0,0,7014,0,0,0,0,16488,3175,0
+,27237,0,0,0,0,0,40902,0,0,0,0,0,0,0,32004,31434,0,24392,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,29130,58214,0,0,0,0,0,0,29002,0,0,0,0,0,0,0,0,0,0,55366,0,0,0,0,0,0,0
+,0,0,0,0,37926,0,0,0,0,0,0,0,0,1290,0,0,0,4713,0,0,0,0,0,0,0,0,0,0,0,0,0,20812,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1803,966,0,17700,0,0,654,19109,0,51655,0,0,0,0,0,
+10470,1584,0,0,0,0,0,0,0,2506,0,0,25159,4303,0,0,0,395,15879,0,0,0,0,0,0,0,0,0,0
+,1352,6535,0,19652,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4722,7909,0,0,0,0
+,30152,0,0,64742,0,0,0,0,0,0,2153,9125,0,0,279,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,41894,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,1328,17030,0,0,0,0,0,0,0,54151,0,0,0,0,1775,54535,0,0,0,0,31624,0,0,0,
+7150,0,0,0,0,0,0,0,1840,35943,0,0,0,0,0,56455,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+64486,0,0,0,51174,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4134
+,0,0,0,0,0,0,0,0,0,0,0,17092,0,0,0,0,0,0,0,0,0,0,0,0,12,16134,19883,39943,10281,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,44711,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+14125,2407,0,0,0,0,0,0,0,0,0,0,26921,0,0,0,0,0,22188,0,20810,10053,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29220,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28170,0,15208,0,0
+,32517,5736,19271,3562,10534,0,0,0,59655,0,0,0,0,27084,60422,0,0,24969,0,0,0,
+2636,0,0,0,0,26277,0,0,0,0,0,0,0,0,0,0,0,30596,3594,0,0,0,8362,14565,0,0,0,0,0,0
+,10793,12326,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5906,59686,0,0,23081,517,0,15556,0,0,0,
+8486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19877
+,0,0,0,0,0,0,0,0,7497,0,0,26085,0,0,23784,63591,6568,6310,0,0,0,0,0,0,0,0,0,
+10054,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7018,14470,18858,0,5641,10660,0,0,0,0,0,0,0,
+35526,1515,0,0,0,0,0,0,0,0,0,0,0,27656,0,0,9606,0,39590,0,0,0,0,0,0,0,0,0,0,0,
+53926,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,232,4327,12649,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,20199,0,0,0,0,0,0,26730,0,0,0,19400,14695,0,31334,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19589,0,0,0,0,0,0,0,0,5064,11908,0,27333,0,
+0,0,0,0,0,0,47751,0,0,0,26662,0,0,0,0,0,0,0,55655,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,6245,0,0,0,0,0,0,0,0,23368,63911,0,0,0,0,0,0,0,0,1974,0,0,0,0,0,0,0,
+8520,24037,0,0,0,0,0,0,0,26279,0,0,0,22886,0,0,0,27782,0,30694,0,0,0,0,0,0,0,0,0
+,0,0,33703,0,0,0,30405,0,34598,0,51047,0,0,0,0,1908,0,0,0,0,0,0,0,0,0,0,1511,
+21897,0,0,0,0,0,0,51398,0,24870,0,32647,0,0,0,35015,0,0,0,0,0,0,0,11204,0,0,0,0,
+0,0,7758,57991,0,0,0,30949,0,0,22,15140,9162,0,0,0,0,0,0,25540,20136,7108,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16427,10789,9805,0,0,0,0,0,0,0,0,0,4680,0,0,52679,
+0,0,0,0,0,14884,0,0,0,16804,0,0,0,0,0,0,9578,5287,0,0,0,0,0,0,0,34054,0,0,0,0,0,
+19076,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7627,55719,0,39463,14446,58374,0,0,0,0,23465,
+15845,0,0,0,0,0,38534,0,0,0,17893,10922,0,7176,678,0,0,0,0,0,0,0,0,3113,46279,0,
+0,0,0,0,0,0,23334,0,0,18088,23268,0,62342,0,0,0,16613,0,0,0,0,0,0,0,0,0,38182,0,
+0,0,0,0,0,25292,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10344,71,6446,0,0,1893,0,0,
+1106,0,28680,30756,0,41126,0,0,1492,0,15341,0,0,0,0,17575,0,21220,0,0,0,0,0,
+25060,2088,21828,0,0,0,0,0,358,0,0,0,0,0,16708,0,0,0,1668,0,0,0,0,0,12260,0,0,0,
+0,0,0,0,0,4078,0,0,0,0,0,12713,6215,0,0,20329,0,0,0,0,0,0,0,0,0,0,31204,0,0,0,0,
+0,0,0,0,0,0,3732,0,1646,0,0,27460,0,34406,17128,14341,0,0,0,0,0,19527,0,0,0,0,0,
+0,0,0,0,0,6120,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8551,21546,10212,3020,
+2951,0,17638,0,0,6985,44999,2218,8197,0,0,30472,63366,0,26660,0,0,0,0,0,0,0,0,0,
+0,0,0,1265,0,0,0,0,0,0,0,2610,0,0,0,11278,20295,0,0,0,0,0,19780,0,0,0,0,0,0,2353
+,10852,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5421,24292,0,0,0,0,0,0,0,0,0,0,0,0,0,34407,0,
+0,0,0,0,0,15432,20774,0,0,0,0,0,0,0,0,12360,10757,0,0,0,33126,0,0,0,0,0,0,0,0,0,
+0,0,29573,0,2343,0,0,0,0,0,63079,0,0,0,0,0,0,0,0,0,43015,0,16038,0,0,0,0,0,0,0,0
+,1480,25573,0,0,0,0,0,0,0,8839,0,0,0,0,0,0,0,24645,0,0,0,0,0,0,0,0,0,0,0,0,0,
+5063,0,0,0,0,0,45830,0,0,0,0,0,0,0,0,0,0,823,0,0,64039,0,0,0,0,0,0,0,0,0,0,0,0,0
+,15300,0,0,0,0,0,0,2924,46759,6760,19268,0,0,0,0,0,0,0,0,0,34182,0,0,3977,18149,
+0,0,0,32199,0,0,0,0,0,0,0,0,0,23524,25994,0,0,10343,0,0,0,9733,0,0,0,0,0,0,0,0,0
+,4740,0,0,0,0,0,0,0,0,0,16741,0,0,4626,23367,0,0,31400,0,0,3557,0,0,4234,0,0,0,0
+,0,0,28486,0,0,0,0,0,14213,0,57191,0,0,0,0,0,0,0,0,0,0,240,0,0,0,0,65318,29832,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29989,0,31846,0,0,8170,0,0,4421,27626,30884,0
+,0,20204,0,0,0,0,44614,534,20868,0,0,0,0,0,0,0,0,0,0,0,0,0,28710,0,10277,0,0,0,0
+,0,29511,0,19813,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27020,0,0,0,0,0,0,53094
+,0,35207,0,0,0,37542,0,61766,8584,8037,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12488
+,22757,0,0,0,0,0,0,0,0,0,0,0,0,0,23814,0,0,0,0,0,0,0,0,0,19973,0,0,0,63943,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36006,0,0,0,19012,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,8580,0,0,0,0,0,0,0,18021,0,0,0,0,0,0,0,0,80,1254,0,0,0,42630,0,0,0,0,0,
+0,0,16262,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2124,25479,0,0,0,0,16873,0,0,0,0,3142,
+0,0,18443,0,0,0,0,0,3917,0,8841,1190,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,20645,0,0,0,0,0,0,0,0,0,0,0,9284,0,0,24394,41351,0,0,0,42087,0,62566,0,0,0,0,
+0,0,0,0,0,0,6728,4199,0,0,0,0,25515,0,1231,0,374,15623,0,29956,0,14118,0,0,0,0,0
+,19047,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31718,0,0,0,0,0,0,0,0,0,0,0,20900,0,16743,0
+,0,0,28902,0,0,0,0,0,0,0,0,0,0,0,0,2578,0,0,0,0,0,0,0,0,0,13838,0,0,10052,0,0,0,
+0,7432,43783,17097,0,0,0,0,0,873,0,0,0,398,0,0,0,0,0,0,0,0,0,8459,23559,0,53030,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35750,0,4071,0,0,0,38662,0,41414,0,0,0,0,11656,0,0
+,0,0,0,4011,42695,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25353,0,0,0,0,0,0,0,27177,22372,0,
+0,0,0,0,30980,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,46278,3976,12711
+,0,0,0,0,0,0,0,0,0,0,0,20517,0,0,0,0,0,0,0,0,0,0,0,0,4072,11078,0,0,16553,2405,0
+,0,0,0,0,0,0,0,2670,0,0,0,0,0,0,32998,0,0,0,0,0,0,0,47046,0,30533,0,0,11050,9734
+,13129,0,0,0,0,23494,0,0,0,0,0,58310,0,0,0,57543,0,0,0,0,0,0,0,0,0,0,0,0,0,454,0
+,0,0,0,0,0,5163,59687,2220,0,0,0,0,0,0,29510,0,0,0,0,0,0,0,0,0,0,0,17316,0,20069
+,0,0,0,0,0,0,0,0,0,5319,0,0,0,0,0,0,0,0,0,27174,0,0,0,0,0,0,0,22949,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,19208,0,0,0,0,0,0,20933,0,0,6026,8742,0,0,0,17380,0,13127,2797,0
+,0,30116,0,0,5963,8004,0,57126,0,0,0,0,0,42854,14792,30759,0,24964,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,16933,0,0,0,0,0,0,15176,40839,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+788,30341,0,0,0,0,21036,24102,0,0,0,0,30123,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+22597,31531,26789,0,59559,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9352,29863,0,0,0,0,0,0,0,0
+,0,24551,0,0,0,0,0,0,0,20516,0,0,0,39462,3665,0,28265,0,8778,64262,0,57414,9132,
+0,0,18276,0,0,0,0,0,0,0,0,0,0,26344,30725,524,19751,0,13796,0,0,0,0,0,0,0,0,0,0,
+18155,0,12841,0,74,24998,13579,1061,0,64199,0,0,8776,0,0,60231,0,25412,0,0,0,
+59143,0,0,0,0,0,0,14344,1510,0,0,0,38374,0,0,0,0,0,0,0,0,13353,0,0,0,0,0,0,0,0,0
+,0,9446,0,0,0,0,0,0,0,32613,0,0,0,0,0,0,0,0,0,0,0,0,0,19844,0,0,0,0,14859,0,0,0,
+0,6662,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14154,0,29770,0,0,0,0,0,
+16520,2182,0,0,0,0,0,36102,3340,0,0,0,0,0,0,0,0,25189,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,15720,0,0,0,0,0,0,22758,0,0,304,0,3243,14117,0,0,0,0,0,0,0,0,0,0,5130,
+12679,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21733,10441,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,36103,0,0,0,0,0,23590,0,57479,0,0,0,0,0,0,0,0,0,0,0,0,10824,
+18372,0,0,0,0,0,35078,15722,12967,0,0,0,0,0,34599,0,0,0,0,0,0,0,0,0,0,0,0,0,
+53639,0,38630,0,0,0,0,0,0,31017,11333,0,0,0,0,19144,0,9513,0,0,0,0,0,0,0,0,56711
+,24042,0,1197,0,0,58502,0,0,0,0,0,0,0,0,0,8230,6121,18628,0,0,0,0,0,0,25290,0,0,
+0,0,0,1514,0,0,0,0,0,0,0,14378,9798,32363,0,0,0,0,0,9577,0,0,0,0,0,0,26788,0,0,0
+,0,0,0,330,10533,0,0,0,0,0,42246,0,0,0,0,0,0,5074,21028,0,38119,0,0,0,0,0,0,248,
+0,31176,62054,0,53287,0,0,0,0,271,0,0,0,0,0,0,0,0,0,0,0,0,0,9224,2117,0,0,0,0,
+15818,5607,0,52582,0,0,0,0,0,0,0,0,18248,24005,23018,0,0,0,0,0,0,0,0,0,0,0,0,0,
+427,0,0,39910,0,0,7080,11399,0,0,0,0,0,0,0,0,0,0,22220,57894,0,0,0,0,0,0,0,13156
+,0,1413,1007,0,0,0,0,21415,0,21543,0,0,0,0,0,0,0,0,0,41702,22538,9573,0,0,0,8806
+,0,0,6920,56359,0,0,0,0,0,0,0,0,0,0,0,42215,0,0,13708,0,0,0,0,0,0,0,0,0,0,16453,
+0,0,0,0,0,0,1582,1764,3282,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11653,0,0,0,0,12139,0,
+29482,31076,1673,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40262,0,0,0,33862,0
+,0,0,0,0,20996,0,0,0,0,0,4615,0,0,0,0,0,0,0,0,0,0,0,43943,333,19367,0,0,0,0,0,0,
+0,26821,0,32389,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4936,11687,0,0,0,0,0,0,0,0,0,10885,0
+,0,0,0,0,25926,0,0,0,0,0,0,15851,0,0,0,0,0,0,0,0,0,8360,0,17130,7942,0,11460,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,18150,14248,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+22310,0,0,0,42758,0,0,0,0,0,0,0,0,29354,5574,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,31109,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11236,0,0,0,0,0,0,0,0
+,0,0,0,0,0,9156,0,0,1801,14023,0,0,0,0,0,62406,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+23620,0,0,0,0,0,0,0,0,0,0,31018,65510,0,0,0,0,0,0,0,26182,0,0,0,0,0,0,0,27717,0,
+0,0,0,0,0,0,46950,0,0,0,0,0,0,0,0,0,0,0,0,0,31108,0,11366,0,0,0,3717,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8552,6054,3339,0,0,0,0,51622,0,
+0,0,0,0,0,0,3718,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28358,0,2756,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1462,0,0,27622,0,0,0,0,0,0,0,62502,14410,56743,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,12206,0,0,0,0,0,0,0,0,0,0,0,0,36550,0,38054,0,0,0,
+21221,0,0,0,0,0,0,0,27077,0,0,16906,0,12587,12101,0,0,0,0,0,0,10414,28775,21769,
+60167,0,56646,0,0,0,0,0,20740,0,0,0,0,0,0,5931,5351,0,65478,0,0,0,0,0,0,7977,
+52647,0,4868,0,0,0,55463,0,0,0,0,0,32197,0,0,0,0,0,13445,0,0,0,26631,0,0,0,0,0,
+11237,0,0,0,0,209,1285,0,0,1928,0,0,0,0,43334,23849,23172,0,0,0,0,0,0,0,0,24712,
+62439,8811,3463,20457,0,0,0,0,0,0,0,0,0,16008,56263,0,0,0,0,0,0,0,0,0,0,0,60358,
+22761,6565,0,0,30888,27686,0,0,0,17093,0,0,0,0,22121,0,0,0,7593,14182,0,28103,0,
+0,0,0,0,45126,0,0,0,0,0,0,0,0,0,0,0,0,0,31844,0,0,0,0,0,0,0,0,0,0,0,0,0,18500,0,
+0,0,0,28202,0,0,0,0,0,0,0,0,26308,0,29541,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+29572,0,0,0,21285,0,0,0,0,0,60839,0,0,0,0,0,30407,15949,2981,0,0,0,46439,0,0,0,0
+,0,23911,26505,25222,12811,5895,0,6343,0,0,0,0,0,0,0,0,0,0,0,31815,0,0,0,0,0,0,0
+,0,19688,10245,0,0,0,31301,26985,28964,0,0,0,0,0,0,0,0,27208,31172,0,0,0,0,216,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16292,0,0,0,0,0,8743,0,0,0,0,0,0,0,0,
+0,0,0,0,0,6438,0,0,0,33319,0,0,0,33286,0,0,0,0,0,0,0,0,0,22181,7499,24774,0,
+10756,0,44775,724,0,25768,25669,24873,5349,25257,0,0,54566,0,0,0,0,0,0,0,0,0,0,0
+,327,439,357,0,0,6536,8452,0,0,1802,0,0,61350,0,15045,0,0,0,0,0,0,0,0,0,0,0,
+38343,0,0,0,0,0,0,0,0,0,0,32491,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+22885,0,0,32073,0,0,0,9546,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27748,0,0,23176,0,0,0
+,0,0,0,0,0,0,0,0,0,24583,0,0,0,0,0,34118,0,0,0,0,2158,0,5586,30340,0,0,0,0,0,0,0
+,0,0,0,0,0,0,24452,0,0,0,0,2409,4390,0,24196,0,0,0,0,0,0,32264,26948,20587,0,0,0
+,2155,0,0,0,0,0,0,0,0,0,0,0,4328,26276,0,0,0,0,0,0,0,0,23564,0,12458,11367,0,0,
+25162,0,0,0,0,0,0,65414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32966,0,0,0,
+34662,0,0,0,0,0,39238,0,0,0,0,11400,10214,266,12452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,15173,0,0,0,13668,0,13222,0,23364,0,0,0,0,0,11941,0,0,0,0,0,0,0,0,0,
+25575,0,0,0,57383,0,0,0,10308,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,2865,9287,75,0,0,0,0,0,0,0,0,0,0,21508,22380,59526,0,0,0,23589,0,0,0,51590
+,0,0,0,0,0,0,0,0,0,0,0,4645,3980,28295,0,0,0,0,0,12388,0,0,0,0,0,0,0,0,0,0,0,
+21734,0,17607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,41767,0,0,0,0,0,0,0,18436,0,0,0,0,0,
+0,0,21958,0,19430,0,0,1204,0,0,0,0,0,0,0,0,0,3240,55239,0,0,0,0,0,30660,0,0,0,
+28901,0,0,0,0,4716,0,0,0,0,0,0,0,0,0,0,0,11754,0,0,0,0,22086,0,22564,8749,0,0,
+28391,0,0,0,0,0,0,0,0,0,0,0,2886,0,0,0,0,0,0,0,29062,0,0,0,0,0,0,0,40358,0,0,
+15916,39526,0,13735,0,0,0,0,28938,0,407,4006,0,0,0,26916,0,0,0,0,0,27526,30280,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24586,0,24649,5126,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8684,0,0,0,0,0,0,0,23019,0,22377,18599,0
+,0,0,0,0,0,0,0,0,0,27593,9735,0,20196,0,0,0,0,28168,48423,0,0,0,0,0,31399,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,13892,0,0,0,0,0,17606,0,0,15242,29767,26378,17701,0,0,
+14472,0,4840,0,0,0,0,0,0,24708,0,9349,4330,0,0,0,0,0,0,0,16137,0,0,34854,0,0,0,0
+,0,0,0,0,0,0,0,25063,0,0,0,0,0,0,6603,12583,0,0,0,0,0,0,0,0,7433,29188,0,0,0,0,0
+,31270,0,0,22920,3143,0,0,0,0,0,23461,0,0,0,0,0,0,0,0,618,0,0,0,0,21381,0,11524,
+0,0,0,0,0,0,21004,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,312,23239,0,0,0,0,0,0,0,0,0,0,0,
+0,2313,0,0,40614,0,0,14825,0,0,0,0,0,0,46535,0,41190,7853,0,31656,0,0,0,0,0,0,0,
+0,0,3433,5255,0,0,0,0,0,0,0,33958,0,0,0,0,72,15493,0,0,0,0,0,0,0,36070,0,0,0,0,0
+,0,0,14724,0,0,0,0,0,29828,0,0,0,0,0,0,0,18822,20008,0,0,0,0,2438,2952,0,0,0,0,0
+,0,0,0,0,0,0,0,0,3342,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24420,0,19908,0,0,0,8101,0,
+17479,0,0,3530,0,8202,29319,0,0,1132,6789,0,0,23881,0,0,0,4810,0,0,46918,0,0,0,
+41574,0,0,0,0,0,0,0,0,0,48582,0,0,0,0,0,0,0,0,0,0,0,0,0,39334,0,0,0,26117,0,0,0,
+0,0,0,5100,0,0,0,0,0,23496,27813,4045,54918,0,0,0,0,0,0,6473,7428,0,0,0,0,6792,0
+,0,0,0,0,3560,32103,0,0,0,0,0,0,0,0,0,0,0,54790,0,0,6926,0,0,0,0,16518,0,0,0,0,0
+,20806,0,0,0,0,1841,3174,0,0,0,0,9612,18374,0,0,0,0,32744,0,0,9671,0,59879,0,
+23300,8073,0,0,14758,0,0,0,10342,0,0,0,0,0,0,24808,14759,0,0,0,0,0,0,5515,0,0,
+14852,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2354,23271,0,32740,0,0,0,0,0,0,0,0
+,0,0,18472,0,0,0,0,0,0,0,0,33190,0,0,0,0,0,0,0,0,8972,21669,0,0,0,0,0,0,0,0,0,0,
+0,25574,0,0,0,0,5096,0,14283,55367,0,0,0,0,0,0,0,0,0,12644,0,0,0,0,4651,0,0,0,0,
+0,0,0,661,0,0,13638,19466,0,0,0,0,0,31273,0,8010,0,0,0,0,0,3211,0,0,0,0,63430,0,
+0,0,0,0,15237,0,0,0,0,0,0,19018,2437,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14312,0
+,0,0,0,16836,0,0,471,35975,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,6023,0,0,0,0,0,0,0,0,11593,9639,0,0,0,55783,0,5700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27908,0,26598,0,0,6667,6470,0,0,0,0,0,0,0,
+62534,0,0,0,0,16522,27911,0,0,10025,7172,0,0,779,0,360,17477,0,0,0,61991,7752,
+7717,1494,0,0,0,26569,40742,0,0,0,0,0,0,0,26406,10474,32196,0,0,0,0,0,50567,
+16521,11716,0,0,0,0,0,55558,0,0,0,0,0,0,0,0,0,0,0,61926,0,26436,0,0,0,0,4459,
+10598,0,0,0,0,0,0,0,0,0,0,0,9223,0,29318,0,0,0,0,0,60423,0,0,0,0,0,0,0,47078,0,
+50246,0,12612,0,0,0,0,0,0,0,61799,0,55015,0,21060,7309,0,0,0,0,0,11976,0,0,0,0,
+23527,0,0,0,0,0,0,10347,15942,0,34023,0,0,0,0,4969,0,0,0,0,0,0,0,0,28997,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36454,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3466,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19716,
+28872,0,0,0,0,0,22152,0,0,0,0,0,0,26342,0,0,0,9764,0,0,0,0,0,0,0,21798,0,0,0,0,
+13,6853,32136,0,0,0,0,0,0,0,750,0,0,54502,0,0,0,0,0,0,0,0,0,46183,0,0,625,22854,
+0,0,0,0,2061,23588,0,0,11049,56262,0,0,18538,1509,0,0,17258,4453,0,0,0,0,12429,0
+,0,0,0,8102,0,0,0,0,0,0,8074,0,23852,0,0,0,0,0,0,0,0,0,0,0,16136,3428,0,27876,0,
+0,0,7332,0,0,0,0,0,28900,0,0,2284,0,0,17573,201,1508,0,0,0,0,0,0,0,0,0,31365,
+27688,22565,0,0,0,5159,0,0,0,0,4584,42599,0,0,0,44422,1068,23173,0,0,0,613,0,0,0
+,12645,0,0,0,0,0,27076,6732,0,0,0,3913,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,22244,29992,15911,0,0,0,0,0,22982,0,0,0,0,0,0,0,50598,0,0
+,0,0,0,0,5161,1574,0,0,0,0,0,0,0,0,0,19108,0,0,0,35014,0,0,0,25956,29067,0,0,0,0
+,0,0,0,0,0,0,47079,0,0,0,0,0,0,0,0,0,0,1356,61927,0,0,0,64455,2122,64231,0,0,
+18763,0,0,0,0,0,0,0,0,0,907,34471,0,0,0,39078,0,0,1995,0,0,0,0,0,0,0,0,0,0,56518
+,0,0,0,0,0,0,0,0,0,0,0,0,822,0,15978,44423,0,0,3112,325,0,0,0,0,0,15397,0,0,0,0,
+0,0,0,0,0,0,1193,4294,4968,15559,0,46150,0,0,0,0,0,18917,0,0,0,0,0,0,9928,37543,
+0,0,0,0,13097,36999,0,0,0,15430,0,0,8424,29639,0,0,0,0,0,0,0,0,0,0,0,0,0,25734,0
+,0,0,0,0,0,0,0,0,0,0,0,0,40487,0,13284,0,11141,0,0,0,32388,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5298,57702,0,0,0,0,0,0,0,13060,0,0,0,0,0,0,
+8233,42278,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36422,0,0,0,7972,0,0,0,
+18437,0,0,0,0,7406,0,0,0,9225,0,0,0,0,0,0,0,0,0,13865,47591,18220,53703,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2796,0,0,0,0,0,24940,17223,0,0,0
+,13221,0,0,0,0,0,0,0,0,0,0,15848,0,0,0,0,0,6122,1735,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,16968,18151,0,0,0,0,0,47494,0,0,0,0,0,0,26089,19494,0,0,0,0,0,15494,0,0
+,0,0,0,0,0,0,0,0,28809,0,0,0,0,42727,0,55174,0,0,0,0,0,0,0,0,0,0,0,20485,0,0,0,0
+,0,0,0,0,0,0,0,58598,0,0,0,0,0,0,0,0,0,0,0,0,0,15172,0,0,0,0,0,0,0,0,0,35302,0,
+48135,20972,33094,0,0,0,0,0,9765,0,0,0,0,0,0,0,0,0,39559,0,0,13736,6950,0,0,0,0,
+23658,8903,0,0,0,0,0,0,0,22662,0,0,0,0,0,58886,7468,0,0,0,0,0,0,0,0,64550,0,0,0,
+0,0,47622,0,0,0,50886,0,0,0,0,0,57606,912,0,0,0,0,0,0,0,0,0,1449,0,1169,0,718,
+46151,12104,0,0,0,0,0,0,48230,0,0,0,0,0,0,0,0,0,0,1259,0,0,33734,23208,62567,0,
+65158,0,0,0,0,0,0,0,0,0,0,28684,59878,0,0,0,0,0,0,0,0,0,0,25769,0,0,0,0,65479,0,
+0,0,0,555,22789,0,19748,1769,10246,8680,0,0,0,0,0,0,0,0,0,14250,0,5899,3303,0,0,
+0,0,0,0,0,0,21097,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21638,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,10795,0,0,0,16204,0,0,0,0,0,26986,2469,0,14660,0,0,0,0,0,45447,
+12234,3494,4555,10566,0,0,0,0,0,0,0,0,0,0,0,0,2801,0,0,0,15755,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,39654,0,0,0,0,0,0,6763,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+33574,0,10279,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,63527,0,0,3912,0,0,7492,0,0,0,35142,
+0,0,0,0,0,0,17576,8103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16713,4198,0,0,4782,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,16228,0,0,0,0,25961,20166,0,0,0,10980,0,0,0,0,0,14340,
+18922,14567,0,44199,0,0,0,0,0,0,0,18406,0,0,0,0,0,37606,0,0,0,0,0,0,0,0,0,20902,
+0,0,0,56358,0,38342,0,0,0,0,9514,36071,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21700,0,0,
+5266,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1134,0,1453,0,0,0,0,0,3882,0,0,0,
+0,0,0,0,0,4004,0,0,0,51910,0,0,0,0,0,23076,4648,0,0,0,31051,25351,0,0,0,22884,0,
+0,0,0,0,63975,0,0,2376,16997,0,0,2096,0,0,0,3373,7046,0,0,0,0,0,0,0,30726,0,0,0,
+0,20,0,13707,614,0,0,12840,3079,0,0,0,0,0,51046,3729,0,32680,0,0,0,0,0,24008,
+62759,0,0,4745,0,0,0,0,0,0,0,0,0,0,0,0,0,2414,0,0,44262,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,24937,0,0,0,0,0,0,0,0,19140,0,13575,0,0,0,0,0,0,0,39110,0,0,0,28036,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,4261,0,0,0,0,5992,0,264,0,0,0,0,0,0,0,13739,0,21928,0,
+0,0,0,0,0,0,0,0,0,0,4232,15110,0,0,0,0,0,0,0,0,0,30022,0,0,27977,0,0,0,0,0,24776
+,0,0,0,0,0,2962,0,0,0,0,0,0,26564,22441,0,0,0,0,0,13640,11205,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,19305,1894,0,0,0,0,0,0,0,0,0,0,9389,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14119,
+5224,135,0,0,0,0,0,0,0,0,0,25796,0,0,0,0,0,0,7470,0,0,0,0,63815,0,55654,0,0,
+12584,0,1524,33223,0,0,0,9895,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11624,
+0,0,0,5614,0,0,0,0,0,0,0,21320,0,0,53607,0,51206,0,0,0,25863,0,0,0,0,0,0,0,0,0,0
+,0,8964,1740,0,0,0,0,0,0,0,0,13476,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7975,0,
+0,3306,8134,0,8389,48,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25766,0,0,0,0,0,0,0,0,0,
+52166,0,0,0,0,0,0,0,0,0,0,0,0,0,21477,31112,31652,0,0,0,0,0,0,0,28452,0,0,0,
+44231,0,0,0,0,0,0,0,24805,0,0,0,0,0,0,0,0,0,0,12428,6471,0,0,0,0,525,17926,0,0,0
+,26919,0,0,18120,0,0,0,30024,0,0,0,0,0,0,0,0,0,0,29189,0,0,0,43559,0,0,0,0,0,0,
+19787,7557,0,59334,0,0,10184,6085,0,44039,0,0,0,0,0,0,0,11175,0,0,0,0,30440,
+63110,0,0,0,0,0,0,11017,0,0,0,0,0,0,0,0,27204,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,29126,0,0,0,0,0,0,0,0,0,0,0,0,0,0,622,0,5226,2727,0,15588,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4650,0,2675,0,0,32420,0,0,0,61511,0,0,
+5419,17829,2123,0,0,0,0,0,0,0,0,0,0,0,0,0,0,38183,2640,0,11274,14533,1842,0,0,
+42663,12681,3430,0,11845,0,0,0,0,0,0,0,0,0,6533,0,0,0,0,0,54598,0,0,0,0,0,0,0,0,
+0,0,0,0,12616,38535,0,0,0,0,0,32229,0,0,0,54279,0,48614,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,31401,0,0,0,0,34310,0,0,0,22788,0,52134,0,0,0,0,0,0,0,23302,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,40678,0,0,0,51463,535,0,0,0,0,15525,0,0,0,0,0,0,4904,869,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,63718,0,0,0,0,0,0,0,0,1678,0,692,0,0,0,
+0,0,0,0,0,0,26216,0,0,0,0,0,29355,0,0,0,0,25095,0,0,0,0,4335,0,0,0,0,0,14538,0,0
+,0,0,0,0,0,0,0,27273,55014,0,0,0,0,0,27271,0,0,0,0,0,30468,0,0,0,0,18186,0,0,0,0
+,0,14345,0,0,0,2152,0,0,0,0,0,0,0,0,0,0,0,0,0,0,58438,21034,0,23339,21318,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,21412,0,0,0,0,0,0,0,12869,0,0,4875,0,0,0,0,29191,0,0,0,0
+,0,0,1640,10247,0,14244,0,0,0,0,9867,0,0,0,0,0,12363,0,0,7653,0,0,4168,2663,0,
+4580,0,11143,0,0,0,0,0,0,0,30662,0,0,0,0,0,6724,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,13764,0,0,0,0,0,0,0,0,0,0,0,0,234,6821,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,25639,0,0,0,0,0,0,0,0,0,29958,0,3461,0,0,0,0,0,0,0,0,0,28324,
+18795,7013,12746,11655,0,37287,0,0,10953,7718,9705,0,0,0,0,0,0,0,0,0,0,46534,0,0
+,0,0,0,0,0,0,0,0,8137,17988,0,25156,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,41415,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15784,6918,0,0,0,0,7019,10919,0,0,0,
+0,0,0,0,0,0,0,4171,55495,4940,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22440,19333,0,0,
+28136,0,6249,21317,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,53414,0,0,0,57318,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39303,0,0,0,0,0,19940,0,0,0,0,0,0,0,25543,0,0,0
+,0,0,0,0,0,0,0,0,0,2698,3911,0,0,0,26790,0,0,0,0,0,0,32424,0,0,18470,0,0,0,14726
+,29834,0,0,0,0,0,0,0,0,0,0,0,1000,4197,0,0,0,19366,0,0,0,39878,0,0,0,0,2185,8901
+,5288,9829,25000,0,0,0,0,0,0,1062,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35622,0,
+0,23048,62503,6506,0,0,0,0,0,0,0,13609,10438,0,0,0,0,0,0,0,0,0,0,7723,42119,0,0,
+0,0,0,13317,0,0,0,41606,0,27111,0,0,21194,11461,0,0,0,0,26856,58342,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,20940,48710,0,0,0,0,0,0,5227,0,0,0,0,0,10061,31300,0,0,0,19236,0
+,0,0,0,0,30277,13896,0,0,0,12876,13159,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,428,
+46951,13134,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15462,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,21668,0,0,0,0,0,0,0,0,0,0,0,0,2249,0,0,0,0,44967,0,0,0,0
+,0,0,3465,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24868,0,0,0,0,0,23909,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,2190,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16164,0,
+10437,0,0,5263,20102,20938,0,0,0,1192,1030,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,21385,4870,0,0,0,0,0,0,0,0,0,0,0,18596,0,0,0,0,1422,4038,2858,0,0,0,0,0,0,0,0
+,48998,0,0,0,0,0,0,0,0,6508,37350,0,0,0,0,0,0,0,0,17001,39431,0,0,0,0,0,30182,0,
+21445,7403,28164,0,51750,0,0,0,62631,0,0,0,0,0,0,0,31206,0,0,0,0,0,0,0,0,0,0,0,
+7751,0,0,0,0,0,0,0,0,0,13477,0,0,456,26693,0,0,0,0,0,0,0,0,0,0,0,0,14890,0,0,0,0
+,0,26697,22022,13225,27364,0,0,0,18884,0,0,0,0,0,0,0,0,3659,0,0,0,0,0,0,0,0,0,0,
+0,1448,5413,0,0,0,0,0,0,0,0,0,0,0,6340,0,0,18091,18725,0,0,0,0,0,0,0,0,0,0,0,0,0
+,22118,0,0,0,18981,0,0,0,0,0,0,0,0,0,29223,3724,0,0,0,0,43526,0,0,0,25668,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21545,9862,0,22692,32201,60646,0,7300,0,0,
+0,58887,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19460,0,0,0,0,0,0,0,0,0,50342,0,
+65255,4360,17286,0,0,0,0,0,0,0,28708,0,0,30025,60102,0,0,0,0,0,0,0,0,0,47014,0,
+31973,0,9572,0,0,0,0,0,0,0,18501,0,0,0,0,0,14597,0,0,0,53735,5228,22183,0,0,0,0,
+0,0,1554,24164,0,0,0,0,0,0,0,0,0,0,0,0,10827,0,0,0,0,34918,0,0,0,0,22252,0,0,
+46855,0,0,0,0,0,31207,0,0,10733,0,0,63334,0,0,0,0,8616,50119,20169,12678,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,58087,20298,5,0,0,30920,0,0,0,0,0,0,0,296,13190,0
+,30663,0,0,18536,12228,0,6788,0,0,0,0,30890,21796,0,0,526,0,0,0,0,0,0,0,0,0,0,0,
+0,20965,0,0,0,0,2161,0,0,0,0,0,0,24038,0,0,0,0,13544,7398,0,0,32522,9605,0,0,0,0
+,3208,7590,0,0,0,43846,0,0,0,38663,0,0,0,0,0,39014,4142,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,6373,0,0,13676,0,0,0,0,30374,21288,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22791,0,0,0,0,0,37958,0,0,0,0,0,0,0,0,0,0,9452,
+9990,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4841,0,0,0,0,18820,152,0,0,0,0,0,13260,3334,0,0
+,24234,8422,0,17957,0,0,0,10244,0,0,0,0,0,0,0,0,0,0,0,7204,0,0,0,0,1201,26151,0,
+31173,0,0,0,0,0,0,0,0,0,0,0,0,0,64838,4203,7525,521,0,18888,37031,0,0,0,0,0,0,0,
+0,7082,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4490,12487,0,0,0,0,0,0,0,36615,0,0,
+0,14854,0,0,0,0,0,0,0,0,0,0,0,0,6539,13029,9704,38983,0,0,0,0,168,10405,0,0,0,0,
+394,25607,0,57063,0,0,0,0,0,0,0,0,0,0,16141,19878,0,0,0,0,0,0,0,0,0,29446,0,
+12036,0,0,0,0,0,6982,18572,0,24584,14535,0,0,0,0,0,0,0,0,0,0,16,0,21642,0,0,0,0,
+0,0,5254,0,0,0,0,0,0,0,0,1622,0,0,0,0,0,0,0,0,0,0,0,0,0,3853,9126,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,7241,10982,0,0,0,0,0,0,0,0,0,0,0,0,0,0,950,0,0,57990,0,0,277,0
+,0,0,694,36007,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,42470,0,0,0,0,18409,
+51142,0,0,0,0,0,0,0,0,0,28646,0,0,0,30693,0,0,0,0,0,56295,5544,0,0,0,0,8518,8366
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45670,0,0,
+9608,33062,0,0,0,0,0,0,0,0,0,0,0,18694,0,0,0,0,1672,23493,0,0,6955,7655,0,36134,
+0,0,0,0,0,0,0,0,23432,647,0,0,0,0,0,0,0,0,0,0,0,13382,0,0,0,19621,0,0,0,0,0,0,0,
+0,0,20228,0,0,2728,31495,0,0,0,0,29096,22213,235,35495,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,5348,0,0,0,0,8968,1989,0,0,1066,0,0,11492,5965,31367,0,0,0,0,0,0,0,0,0,0,0,
+18727,0,0,0,6757,0,0,10765,4646,0,36166,0,27943,0,0,26888,8420,0,0,0,0,0,0,0,0,0
+,29316,0,0,0,0,0,0,0,0,0,0,4975,0,0,0,14762,3111,0,0,0,0,0,43399,0,0,0,0,0,0,0,
+18980,0,0,0,0,0,44550,0,0,0,0,4051,0,0,0,0,37734,0,0,0,0,0,5188,0,0,0,0,0,24486,
+0,5989,0,41159,0,0,0,0,0,0,0,0,0,20326,0,0,747,6884,0,0,0,41798,0,0,3117,22919,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21032,0,0,0,0,0,0,0,0,9574,0,0,0,0,0,0,
+4302,0,0,0,0,0,0,0,0,0,0,0,21068,34630,0,0,0,0,0,64071,0,0,0,0,0,0,26667,7943,0,
+0,0,0,0,52934,0,0,17002,0,0,0,0,0,0,20294,0,0,0,0,0,0,0,0,0,27301,18347,7974,0,0
+,0,0,0,0,0,0,16874,0,0,0,0,45414,0,0,0,0,0,0,648,1575,0,0,0,31749,0,0,0,23301,0,
+0,0,0,0,0,0,0,0,0,0,0,15912,50535,0,0,0,0,1993,8582,0,0,0,0,0,0,0,38438,0,0,0,0,
+0,0,0,0,0,0,0,0,15850,6183,0,0,0,0,3402,0,0,27494,0,0,749,0,0,0,0,0,0,0,26025,
+29606,0,0,7144,19622,30504,0,0,0,0,0,0,0,0,21316,0,0,0,0,0,0,0,0,0,21444,0,0,
+1289,6919,0,0,0,0,0,0,8299,0,0,0,14090,35655,0,0,0,0,0,0,0,0,2377,15206,0,0,6028
+,4452,0,25508,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50310,0,0,0,0,1269,0,0,0,0,0,0,0,0,0
+,0,0,0,51014,0,0,0,0,0,0,0,9286,0,7429,0,0,28393,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,16680,452,0,0,0,0,0,23718,0,0,0,31750,0,0,0,0,0,0,0,0,3568,0,0,13604,0,0,
+0,0,0,25255,0,0,0,50982,0,56582,0,0,7467,0,0,0,0,30181,0,0,0,0,0,0,0,30564,7208,
+7845,0,0,0,0,0,0,7726,0,0,0,0,62182,0,0,0,41094,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,22695,0,0,0,0,0,0,17736,0,0,0,0,0,0,0,0,50054,0,0,0,14180,0,0,0,0,
+0,0,8974,0,0,0,0,0,0,0,0,23332,0,0,0,11140,0,0,0,0,0,0,0,24262,27145,9540,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,26537,45510,6062,3879,0,0,20233,25991,0,0,17803,0,0,0,0,0,
+13962,5508,16971,27013,7437,31494,0,0,0,0,0,0,0,0,0,0,4714,0,0,0,0,0,0,0,0,17189
+,0,0,0,0,0,27492,0,0,26953,0,0,0,0,0,0,0,0,0,0,41319,0,0,0,0,0,0,0,0,0,0,0,47430
+,19596,12549,0,0,0,8390,1006,0,0,0,0,0,0,0,0,0,0,24100,17577,4,0,0,0,0,0,22277,0
+,0,0,0,0,0,0,26692,0,0,0,0,0,24676,0,0,0,0,0,0,0,0,0,0,0,0,0,29477,0,0,0,21573,0
+,0,0,0,0,0,0,0,0,0,9864,14214,0,0,0,0,0,0,25771,5766,0,0,8909,8679,0,0,6861,
+16166,0,38887,0,0,0,0,0,0,12392,8678,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+52646,1354,2950,0,14692,0,0,10572,49830,0,0,0,0,0,0,0,0,3626,582,0,0,0,55750,0,0
+,0,30885,0,0,0,0,0,0,0,0,0,0,0,0,0,5830,0,0,2090,0,0,0,0,0,0,0,0,0,0,0,0,31142,0
+,0,0,0,0,10503,0,0,18825,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+57158,0,0,30792,63526,0,0,0,9863,16267,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,18824,0,0,0,0,0,0,19653,25388,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9292,0,0,0,0,0,0,0,0,0,0,36358,0,0,0,0,0,0,0,0,
+0,0,25480,23015,0,0,10440,6725,0,0,0,22436,24265,15109,0,0,0,62311,8906,34534,0,
+0,0,0,0,0,15913,1319,0,0,20296,1477,30760,0,25928,16772,0,0,1069,0,0,0,0,0,0,0,0
+,0,0,0,0,17029,0,31909,0,0,0,0,0,0,0,0,0,0,0,41638,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,41542,0,21478,0,0,0,9796,0,0,0,0,0,0,0,0,0,0,22187,58343,0,0,0,24295,0,
+0,0,0,0,61831,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2833,5829,0,0,0,62855,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,16676,0,0,0,0,0,0,13577,27431,0,0,0,0,21480,10501,0,16932,
+0,0,0,0,0,22918,0,48294,2574,2150,0,0,0,0,1897,4518,0,0,0,0,0,0,25064,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47942,0,0,0,0,10990,13767,
+25705,37863,21672,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,43430,1712,0,0,0,0,
+18886,0,0,0,0,0,0,0,0,0,0,0,0,0,10535,0,0,0,0,0,0,0,15012,0,0,0,0,0,0,0,0,0,0,
+14734,0,0,55782,0,0,30824,10886,0,0,0,0,0,51302,0,0,8012,0,0,0,0,0,20680,6981,0,
+57415,11,0,0,18277,0,14564,0,0,0,32390,0,0,0,0,0,0,0,0,0,0,0,0,19113,5158,0,
+11172,0,16774,0,0,0,0,0,0,0,0,0,0,0,0,10315,13830,0,0,0,0,0,0,10410,7141,0,0,0,0
+,0,18116,0,0,0,44615,15403,13958,0,1540,14632,19525,24201,19781,0,0,0,24165,0,
+38951,0,0,0,0,0,6308,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17416,15749,3438,13255,0,0,0,0,
+0,0,0,0,0,0,0,32228,0,0,0,0,176,0,0,50566,0,0,0,0,0,0,0,0,0,0,0,0,0,21540,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,58982,0,0,0,0,0,0,0,0,0,5284,0,0,0,0,0,0,25897,28326,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15945,0,0,0,9804,293,0,0,0,0,0,0,0,13988,23082,4677
+,0,0,0,0,0,0,0,0,0,0,0,5670,0,0,0,0,0,0,0,44070,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,6405,0,30692,0,0,0,61702,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,45926,0,15398,0,0,0,0,0,0,4554,2692,0,0,0,0,0,32485,0,0,0,10084,0,0,0,0,0,0,
+24297,0,0,0,0,0,0,0,0,22790,0,0,0,55110,0,0,0,0,0,0,0,0,0,0,0,0,7112,0,31530,
+45255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40743,17226,22599,0,
+0,0,0,0,0,0,0,3695,0,0,0,0,0,0,0,0,0,0,56999,0,0,0,0,0,13799,3114,21287,1353,
+7591,0,0,0,8455,0,0,6824,0,0,0,0,0,0,0,14569,0,0,0,29000,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,19979,0,18376,0,0,0,0,0,0,0,0,0,0,0,0,11332,0,0,0,49863,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,2191,7527,23148,58022,0,0,0,30631,0,26565,0,0,0,0,0,0,0,0,0,0,0,0,754
+,0,0,15877,0,0,0,0,0,0,0,17510,7657,2821,0,0,0,0,0,0,0,0,0,41927,0,0,0,0,0,0,0,0
+,2569,34439,0,0,3790,0,0,0,15339,8775,0,0,0,0,0,0,0,0,0,0,0,15908,0,0,21419,8359
+,0,0,0,0,424,0,0,0,0,0,0,25318,8008,20551,0,0,0,45735,30058,30372,0,0,0,0,0,0,0,
+0,0,0,0,0,0,26180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31432,10567,0,0,0,0,
+17450,0,0,0,0,0,0,30310,0,38022,0,0,0,0,0,28932,0,0,0,0,0,43910,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,22180,12075,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22247,0,0,22826,
+12359,0,0,0,0,4105,50407,0,0,0,0,0,0,13581,28583,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,28936,0,0,0,0,0,17673,10310,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,905,57862,
+1580,0,0,0,0,58630,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13479,0,0,
+14153,13286,0,0,9259,0,0,0,0,0,6606,3524,0,0,0,0,0,6567,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,631,49255,0,0,0,0,0,42886,0,38215,0,0,0,0,0,0,0,0,17580,
+0,0,0,0,0,0,0,0,55046,0,0,0,0,0,0,0,0,0,10213,0,0,0,0,3604,37767,0,0,0,0,0,0,0,0
+,0,0,0,30950,0,0,0,0,0,0,0,0,0,62087,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23528,0
+,0,0,0,0,0,0,0,0,28715,4229,0,0,0,0,0,0,0,0,0,0,1226,26820,0,0,0,12133,6984,261,
+21130,32548,0,0,0,0,0,0,3565,12390,20713,28071,0,0,1706,25287,0,0,0,0,0,0,0,0,
+14670,0,0,0,0,0,0,30534,0,0,0,12615,0,43750,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28228,
+0,0,0,0,0,0,0,45095,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1224,3975,10954,6375,0,0,0,0
+,0,0,0,0,0,0,23180,20100,0,0,0,0,25736,8519,0,0,0,0,0,6663,0,2534,0,0,0,0,0,0,0,
+0,23720,0,0,0,0,0,0,0,0,0,0,19398,0,47814,26281,49702,0,0,4332,12965,0,0,5704,
+3206,0,0,0,0,0,0,0,0,0,0,0,0,0,15396,0,0,0,44102,0,0,0,0,0,0,0,0,0,0,0,0,0,25317
+,1064,39271,27433,0,14952,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14308,0,0,0,0,0,0,2763
+,4100,0,0,0,0,0,0,18792,0,0,0,22154,32583,0,6244,0,0,0,0,0,0,0,49478,0,0,0,0,0,0
+,0,0,0,0,0,21894,0,0,11048,0,0,0,0,0,0,11685,0,53862,0,0,15114,0,13870,0,0,0,0,0
+,0,0,919,0,0,0,31916,0,22570,101,0,0,0,0,0,0,0,0,0,7333,0,0,0,0,3272,0,0,0,0,
+27718,32712,0,0,0,0,0,0,0,0,0,1782,0,3688,0,0,0,0,29862,0,0,0,0,0,0,0,22469,0,0,
+0,0,0,0,0,55302,850,15492,0,0,0,5927,19786,13350,0,25702,0,0,0,0,0,0,0,0,0,0,0,
+40390,0,0,0,0,0,0,0,0,0,0,0,0,0,20260,0,0,0,0,0,0,0,0,0,0,0,0,0,15335,8394,0,0,0
+,0,0,0,26566,0,0,0,0,843,2245,0,0,0,0,0,0,0,0,6959,0,20488,1638,0,0,11533,50759,
+0,0,0,0,0,20871,0,0,0,0,0,24519,0,0,0,0,9544,23591,0,0,0,0,0,0,0,0,0,0,20969,
+7109,29001,0,0,32422,31720,64294,0,0,0,0,16106,0,0,0,6930,4933,0,0,0,22917,0,
+27015,0,0,0,0,19880,8070,0,0,0,0,23945,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3310,0,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18439,0,0,0,20742,0,0,0,10597,0,0,0,0,0,0,
+0,0,0,0,0,0,20236,0,0,0,16584,3429,0,0,0,0,0,0,0,0,27241,0,0,0,0,16132,0,0,0,0,0
+,0,0,0,0,0,0,0,244,28261,0,0,0,0,0,0,0,29509,0,0,0,0,0,0,0,0,2921,31781,0,0,0,0,
+0,0,6408,4196,344,0,0,0,0,0,0,0,0,0,0,0,0,0,11689,45863,0,0,0,0,906,3301,0,0,
+25544,32421,0,0,0,0,0,0,0,0,1260,61607,0,27302,0,0,8682,16614,0,0,0,0,10830,0,0,
+9604,15049,13413,0,0,0,0,0,0,26761,0,0,0,0,0,0,61990,0,0,0,0,0,12580,0,0,11432,0
+,0,0,0,0,22507,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12389,0,0,0,0,0,0,
+2408,22661,14507,43239,0,9700,0,0,24714,0,0,0,0,0,0,0,0,23972,0,0,0,0,0,0,0,0,0,
+0,0,0,0,34086,0,0,22955,7238,0,0,0,0,0,28485,13806,20038,0,0,0,0,22602,0,0,0,
+1645,22340,0,0,0,0,0,0,0,0,0,0,0,26502,0,0,554,0,0,0,0,61735,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,2694,0,0,0,0,0,0,0,0,0,0,0,0,0,0,883,27879,15948,0,3242,57382,0,0
+,0,0,0,0,13930,0,0,0,0,0,30922,0,4137,52615,0,0,0,0,0,0,0,0,0,0,0,0,0,31911,
+16072,0,0,0,0,0,0,0,0,0,0,0,0,26340,0,61671,0,0,0,0,3145,56199,0,0,0,0,0,0,0,0,0
+,0,0,0,280,0,5131,33479,0,15751,0,0,0,0,0,0,4136,1446,0,0,0,0,0,0,11304,17863,0,
+0,0,0,0,25125,0,0,0,36646,6057,0,0,0,855,11301,0,0,0,0,0,64774,0,0,0,19397,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,1040,27367,0,0,0,0,0,0,0,0,0,0,0,0,0,64358,0,0,178,
+132,0,0,14763,24455,0,0,0,46374,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,46246,0,0,0,37382,
+0,0,0,7462,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8166,0,0,6921,0,0,0,9163,0,0,0,119,
+0,0,0,23146,17156,0,0,0,0,0,9127,0,0,0,17927,0,0,0,0,0,22084,0,0,0,0,0,39879,0,0
+,2035,0,1067,0,0,0,0,0,16652,59591,0,0,0,0,0,0,0,0,0,0,0,0,20171,0,0,0,0,17733,0
+,0,0,0,0,32037,0,0,0,0,0,14277,0,0,0,0,0,0,0,42022,0,0,26793,20358,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,8907,0,0,0,0,0,0,0,0,27780,0,0,0,0,32330,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,39399,0,9732,0,16199,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,854,0,2984,45063,25418,26980,22539,0,9133,3653,15528,28743,4649,0,616,
+65127,0,0,0,61863,0,0,0,0,0,0,0,55303,0,0,0,0,0,0,0,0,23880,0,0,0,0,0,0,0,31848,
+62854,0,0,0,0,0,0,0,0,0,0,0,49606,0,27974,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,32580,0,0,0,26052,4043,0,0,40454,0,0,26056,30565,0,0,0,0,0,0,0,31398,0,0,0,0
+,0,0,0,0,29288,1797,0,0,3220,0,0,0,0,0,0,0,0,0,20427,0,0,0,0,23621,0,0,0,0,0,0,0
+,0,0,24261,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35591,0,0,6862,0,0,0,4265,0,6285,
+5383,0,0,0,0,0,36870,0,39847,0,0,17224,5414,27882,58118,0,0,13224,4262,0,0,0,
+31302,0,0,1388,2982,11881,0,0,0,0,0,0,16837,809,0,24140,10724,0,0,0,0,5835,0,0,0
+,0,0,0,0,0,0,0,0,1256,19237,0,0,0,0,0,0,0,5796,11848,0,0,52870,11464,0,0,0,0,0,0
+,0,0,0,5645,9158,0,25223,0,0,0,0,0,39142,24968,8135,32104,28678,0,0,0,46311,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23820,0,0,0,0,0,4050,0,1323,25220,0,0,0,20133,0,
+0,0,0,0,0,0,0,0,0,0,9381,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,13444,1198,60806,0,0,0,0,17356,50247,30632,0,0,0,0,0,0,0,11944,
+999,0,0,0,0,4010,10404,0,0,0,0,0,0,10346,0,0,49510,0,0,0,0,0,0,0,0,0,0,0,0,0,
+38919,0,0,0,0,0,0,6351,60966,20137,487,0,0,0,0,0,0,655,2406,17387,43303,0,0,0,
+17063,0,0,213,0,0,0,0,0,0,17221,0,0,0,0,0,0,0,0,0,0,0,10820,0,0,0,0,369,6,0,0,
+9098,21093,0,31653,0,0,0,0,0,0,0,0,0,27143,0,0,16234,0,0,0,0,0,0,0,0,6020,31723,
+28293,0,0,0,0,1936,30695,0,0,0,0,0,52902,0,0,29512,10791,0,20420,0,0,16010,0,0,0
+,0,0,0,0,0,0,0,0,5324,0,0,0,0,0,0,0,0,0,0,0,0,13383,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,24328,0,0,0,0,0,0,40870,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,24648,0,0,0,0,0,0,0,7786,2852,0,0,0,0,0,0,0,0,0,44678,0,17925,0,0,105,53062,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18762,0,0,40679,0,0,0,16165,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,20390,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,62310,1322,
+14247,0,0,0,0,0,0,0,0,1832,6052,0,0,11882,0,0,0,0,17668,0,28262,0,29542,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28007,0,0,0,57223,
+1585,0,0,0,0,0,0,0,0,0,0,0,21162,0,0,62247,0,0,0,0,0,25414,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,36326,0,0,0,23845,0,0,0,0,0,0,0,2693,0,0,0,0,0,0,0,
+13125,0,31236,0,0,0,0,0,22502,0,0,0,0,0,0,5994,10309,0,0,0,7269,0,0,0,0,17929,0,
+1011,44647,0,0,0,0,0,14919,0,0,0,0,20586,5350,0,0,0,45702,0,13189,0,0,0,0,0,0,0,
+0,0,35782,17992,0,0,0,0,0,8203,0,0,0,0,0,0,56678,0,0,0,0,0,38087,4233,0,2127,0,0
+,0,0,0,0,0,0,0,0,10148,0,0,0,2021,0,0,0,0,0,0,0,47206,0,0,0,0,0,0,0,9220,0,0,0,0
+,19465,0,0,0,0,0,0,0,0,39206,0,38055,0,0,0,0,0,0,0,46982,0,0,0,0,0,22054,3850,0,
+0,0,0,0,0,0,55,0,10542,0,0,0,0,7239,0,0,0,0,0,59367,0,0,14761,0,0,0,0,43079,0,0,
+0,0,0,2726,0,0,9582,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37478,0,
+0,0,31364,0,0,0,0,0,0,0,0,20393,8933,0,0,0,0,0,9380,0,0,0,0,16905,549,0,0,0,0,
+182,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1558,0,0,0,0,0,0,0,19242,0
+,0,0,0,0,0,24933,0,6276,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,42310,23595,
+24068,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13191,6158,2567,0,0,268,47047,0,0,0,0,0,0,0,
+0,0,27940,0,0,0,0,0,26726,0,0,0,0,8200,1222,31562,0,0,0,0,0,0,0,0,0,2922,8231,
+8904,29157,0,0,0,0,0,0,0,0,23976,4836,0,0,0,0,0,0,0,0,0,0,0,0,31658,0,0,31685,0,
+0,2889,6213,0,0,0,0,0,13605,0,0,0,0,0,24772,0,0,0,0,0,0,0,0,0,0,20684,26468,
+24075,0,0,0,21193,0,715,679,0,0,0,0,0,0,3050,7654,0,0,0,13798,0,0,0,0,15,27973,0
+,0,8491,2086,0,0,0,43206,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60391,0,0,0,0,0,0
+,0,25892,0,22276,0,34374,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20165,0,0,25672,0
+,0,0,1811,24839,0,31044,0,0,25513,0,0,0,0,0,12810,0,0,62438,0,0,1325,0,364,3782,
+0,0,0,0,0,0,0,0,8042,19687,0,0,0,33415,0,0,0,0,0,0,0,0,0,0,0,7205,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,11844,0,0,0,0,3341,1543,6698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,29766,0,0,0,0,0,0,0,0,0,0,0,41158,0,24294,0,3844,12329,
+0,0,0,13738,0,0,0,0,0,0,0,0,26245,0,0,0,0,0,0,6378,0,343,4838,0,0,0,24358,11688,
+0,0,0,0,0,0,0,0,0,1489,34759,0,0,0,0,363,51974,1878,11013,0,0,32265,59782,0,0,0,
+28421,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22756,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14089,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,29257,61222,0,30661,0,28327,0,0,0,0,0,0,0,0,0,0,0,0,0,
+27108,8843,0,9673,2084,0,0,0,16327,0,48455,0,0,0,0,0,0,4876,9316,0,0,0,0,0,0,0,0
+,0,0,0,0,9035,18852,0,0,0,0,0,0,0,0,0,0,0,0,0,4164,0,0,14827,1349,0,0,0,0,0,
+11909,0,0,0,0,0,0,0,0,0,21765,0,0,0,0,0,0,0,0,31272,63910,0,0,0,25924,0,0,0,0,0,
+0,0,0,0,44487,0,0,0,20612,0,0,27754,31428,0,0,0,0,0,0,0,17287,0,3943,0,0,0,63302
+,0,0,0,0,25256,19942,0,55142,0,39046,0,0,0,0,0,0,0,15367,0,0,0,0,0,0,0,0,0,0,0,
+28422,0,0,0,0,0,0,0,0,0,0,0,0,9576,63847,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,25226,5734,0,0,0,0,0,0,13801,4997,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,43942,1270,2566,6284,0,0,16230,0,0,0,20678,0,0,0,0,0,38855,0,0,0,0,
+29643,0,0,0,41,3655,0,0,0,0,0,14276,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,15686,0,0,0,15718,0,50694,0,0,16232,8007,0,0,0,5060,
+329,11591,51,0,0,0,0,0,0,0,0,0,13065,7302,27530,15366,0,24934,0,0,0,17828,0,0,
+4552,6311,0,0,0,0,0,0,0,0,0,0,0,47686,368,12103,10122,33830,0,0,599,18534,9579,
+49479,0,5668,0,0,0,0,0,0,0,0,0,13157,0,0,0,0,23274,14055,0,0,0,0,0,48903,0,0,0,0
+,0,0,1871,0,15434,0,0,0,16174,62470,0,0,0,0,0,0,0,0,0,0,0,7749,0,0,0,0,0,0,0,
+30501,0,0,0,0,25675,0,0,0,0,0,0,0,0,0,0,9285,0,0,25323,1669,0,0,0,0,0,0,0,0,0,0,
+0,3588,0,0,0,0,0,32902,0,0,4426,0,0,0,0,57959,0,0,0,0,0,0,29898,58278,0,0,0,0,
+11880,1220,0,0,0,41479,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23141,0,0,0,0,0,0,0,0,0,0,0
+,0,0,42566,0,0,0,0,0,0,0,40167,9484,3493,0,0,0,0,0,21126,0,0,0,0,8649,18918,0,0,
+0,0,0,0,0,34886,2601,0,0,0,0,12518,0,0,0,0,7976,10311,0,0,0,0,0,0,0,45190,0,0,0,
+0,0,0,0,0,0,0,16842,20229,0,0,0,0,0,0,7528,4614,0,0,0,0,0,30086,0,0,0,1671,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,9896,6277,0,0,0,0,0,61191,0,41287,0,21956,0,0,20010,0,0,0
+,0,0,0,0,0,0,0,0,13195,0,0,0,0,1381,0,0,0,0,365,30951,24268,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,7044,0,0,0,0,0,0,0,0,0,0,27944,359,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,28487,0,0,77,0,0,0,0,0,0,0,0,56775,12586,8421,0,0,0,0,0,0,26185,14599,0,0,
+8040,5702,12585,3109,0,0,0,0,0,21574,5388,0,0,0,0,0,0,0,5106,52454,0,0,0,0,0,0,0
+,0,1907,29895,0,6116,0,0,0,0,11081,5285,0,28069,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4104
+,0,0,0,0,0,0,41511,0,0,0,0,0,0,5262,0,0,0,503,4231,7720,34343,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7304,10374,1718,0,0,29127,0,0,0,0,0,0,0,0,23497,
+22567,6952,2340,0,0,0,0,0,0,0,0,20360,12453,0,45094,0,0,0,0,0,28582,0,0,0,0,680,
+0,0,0,0,0,0,0,0,0,0,0,0,0,23084,0,0,0,30696,0,0,0,0,45862,0,0,0,0,0,0,0,0,0,0,0,
+0,5580,6053,0,0,0,0,0,0,0,0,0,0,712,70,0,0,26091,11335,0,0,0,0,13612,0,13160,
+1926,435,51559,0,0,0,0,0,0,0,0,0,0,0,0,0,47302,19083,0,0,12742,0,1607,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,6155,37095,0,0,0,0,0,18948,7146,0,0,0,0,0,0,0,7848,
+2055,0,0,0,0,8910,0,19336,0,0,48070,8490,0,0,0,0,0,0,0,9932,56423,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,4133,0,0,0,0,0,0,0,0,0,43398,0,0,0,0,0,0,0,0,16173,0,0,0,0,0
+,0,0,32011,0,0,30918,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26854,0,0,0,0,0,0,0,0,0,0,0,0
+,0,20389,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18889,0,0,0,0,8965,0,0,0,44358,0,0,0,8997,0
+,34055,0,0,0,0,0,29350,0,0,501,17767,0,0,32457,60262,0,0,0,30886,0,0,3757,1063,0
+,0,0,25637,0,0,0,0,0,28068,0,26374,0,0,0,0,0,0,0,0,0,0,0,11684,0,0,0,0,0,0,24779
+,229,0,13766,0,0,7402,11525,0,0,0,0,0,0,0,0,26313,23686,0,0,29736,47527,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27268,0,0,0,0,553,0,0,0,0,0,0,32038,0,0,0,0,1135,
+26596,0,0,12300,14631,0,0,0,43238,0,871,0,0,31496,0,8457,17669,0,12836,0,0,0,
+22726,0,38758,0,0,375,6564,0,0,0,0,0,0,0,0,0,0,0,0,170,18535,0,22948,0,0,32360,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,25764,0,0,0,0,0,0,0,0,0,0,0,15652,0,0,0,32774,0,0,0,0
+,0,0,0,0,0,28551,0,0,0,0,0,0,0,0,0,0,0,0,15145,0,0,0,21100,27654,0,0,0,0,0,0,
+4874,26215,0,1639,0,0,0,0,0,0,0,0,0,0,4169,0,0,0,0,0,7336,0,0,0,0,21572,0,0,0,0,
+0,0,0,0,0,24644,1675,2533,0,0,0,53318,0,13094,0,0,0,0,0,0,0,6246,0,22020,0,0,0,0
+,0,0,0,0,0,0,0,28453,5576,5124,0,0,0,0,0,0,0,0,0,0,0,0,0,27910,0,29382,18216,
+8583,0,0,0,39174,0,43558,0,0,0,0,0,0,0,0,0,11973,0,0,0,0,0,23397,0,0,0,0,6091,0,
+0,0,0,0,0,0,6474,16197,14217,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,26728,0,567,48839,0,0,0,0,0,15271,0,0,31818,43974,2450,0,0,0,0,0,0,0,
+11368,9191,0,44454,0,0,0,0,0,0,14568,12293,0,0,0,8453,0,0,0,0,0,0,0,0,0,0,0,0,
+32040,0,0,0,0,0,0,0,0,0,0,0,0,902,0,0,0,27236,5612,11495,0,0,0,0,0,0,0,0,9194,
+23684,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27430,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+26217,44870,0,0,0,0,0,0,5581,7173,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+52775,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20968,18340,0,0,0,0,0,0,0,0,4107,11239
+,0,0,0,0,0,0,0,0,0,0,0,29381,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21990,0,0,0,0
+,0,0,0,0,0,48806,0,0,0,32292,0,0,0,0,0,0,0,10884,0,0,0,0,0,0,0,0,0,0,27562,0,
+5643,0,0,0,0,0,0,0,0,0,3089,31525,0,19684,0,0,0,0,0,0,0,61415,0,0,0,0,0,36198,0,
+0,0,0,0,0,0,7908,0,0,0,0,872,743,0,0,0,0,0,0,0,0,1229,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,32484,0,0,0,0,0,34822,0,0,0,0,0,50726,0,0,0,0,7274,0,0,0,15304,
+11526,0,0,0,3047,0,0,0,0,0,0,22376,0,0,0,846,0,0,0,0,35815,0,0,0,23652,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,23721,2148,0,0,0,0,0,0,14856,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,1358,0,3082,0,0,0,848,10949,0,0,0,0,0,0,6504,0,0,14372,0,0,0,0,0,0
+,0,0,0,0,8201,9958,0,0,0,0,0,0,24266,0,0,0,0,0,0,0,0,26469,0,0,0,0,18604,2053,0,
+33511,0,0,0,0,0,9222,0,0,0,0,0,44006,0,0,0,0,0,0,0,0,0,0,0,41895,0,0,0,0,0,0,0,0
+,12044,390,0,0,0,0,0,4935,0,48646,0,56102,3052,16070,0,0,0,0,0,0,0,8612,9320,
+38311,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,0,0,0,0,42918,0,
+32550,0,0,0,0,0,0,0,0,27434,57926,17064,0,0,46502,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,26760,6756,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,624,0,5000,0,0,0,0,32293,0,
+0,0,0,0,0,0,0,0,0,0,0,0,26246,0,0,0,0,0,0,0,0,23,7301,0,0,0,36199,0,40838,0,0,0,
+0,0,0,0,0,0,0,0,0,27178,57350,0,0,12457,9317,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16810,0,0,0,14510,0,0,0,0,21319,0,0,0,13508,17,
+11365,0,0,0,0,5291,0,8329,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27685,0,0,0,0,0,0,0,
+52006,0,0,0,7493,0,44263,0,0,0,0,0,0,0,0,9800,0,0,0,25676,61478,0,0,0,0,0,0,5773
+,0,0,0,0,41991,26057,0,0,0,0,0,0,0,0,0,0,22629,0,0,0,47783,362,1959,23468,0,0,0,
+10921,0,0,0,3150,0,0,0,0,0,0,0,0,0,0,0,32456,0,0,0,0,0,0,0,4559,3270,0,0,983,0,0
+,26343,0,33446,0,0,0,61767,0,48390,0,0,0,0,0,0,0,2790,0,39782,7849,0,0,0,0,0,0,0
+,1544,2183,0,0,0,0,0,0,0,0,4040,2471,20009,30020,0,0,11242,0,0,0,5578,53382,0,
+22631,0,0,0,0,0,0,0,0,0,0,0,12901,0,0,0,0,0,0,0,0,0,0,0,0,215,0,0,9030,0,0,0,0,0
+,0,265,1412,0,0,11626,3687,0,0,0,0,0,0,0,0,0,0,0,0,17449,24359,0,0,26729,40134,0
+,0,0,0,29768,61958,0,0,0,0,0,0,0,0,20908,0,0,0,0,0,11016,0,0,47462,21547,5926,0,
+0,14728,2983,24104,15301,0,0,0,0,0,32645,0,0,0,0,0,0,0,3300,0,0,0,15972,0,0,0,0,
+0,0,6634,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3076,0,30983,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,106,0,0,0,0,12775,0,0,0,0,7177,18022,0,0,0,0,0,22534,0,0,0,0,0,0,0,0,0,0,0,
+49894,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27560,0,0,0,0,30278,10668,23877,0,
+0,0,0,0,0,0,29124,0,0,0,0,0,0,0,0,0,0,0,0,0,20870,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,44582,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48454,0,0,0,0,6442,0,16330,22951,0,0
+,16904,0,0,0,0,644,0,0,0,40038,0,0,0,37222,0,0,0,9830,0,0,0,0,0,34919,0,0,0,0,0,
+0,0,0,0,0,0,13733,0,0,0,28196,0,0,0,0,0,19876,0,0,0,0,0,0,0,23558,0,11142,0,
+27781,0,0,0,0,13864,0,0,0,24682,47847,0,0,0,0,6890,0,0,0,0,0,3981,0,0,0,0,0,0,0,
+0,0,0,0,1772,0,0,0,0,0,0,0,3603,1991,0,27396,8652,0,18312,0,0,0,0,30054,0,0,0,0,
+0,0,0,11270,0,0,0,0,0,0,0,0,0,20708,0,0,0,0,338,0,0,0,0,0,0,0,7050,0,0,0,0,0,0,0
+,0,0,0,0,0,0,14862,0,0,3492,0,0,0,55878,0,0,0,16486,0,0,0,18119,0,0,0,0,0,0,2154
+,1284,0,0,23113,31751,0,0,29547,0,0,0,0,0,0,36647,0,0,0,0,0,0,0,0,0,0,0,18183,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,7913,0,0,0,0,20644,0,0,10508,0,0,0,0,0,0,0,0,0,0,0,0,
+43622,0,0,0,0,0,40966,0,0,0,0,0,0,0,0,0,0,246,901,11529,5191,0,0,0,0,0,0,0,0,0,0
+,0,24454,0,0,26665,27590,0,27397,0,0,0,0,0,0,23562,2949,0,0,30344,62214,0,47334,
+2026,18885,0,0,0,48678,0,0,0,22694,0,0,1972,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,15465,0,0,0,0,38822,0,0,0,0,945,32708,0,54791,0,14918,0,0,0,0,0,0,
+0,0,0,0,0,0,0,23396,0,0,0,0,0,0,5486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7722,0,0,
+0,0,0,0,0,0,0,0,0,30856,64166,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35206,0,0,0,30535,0,
+0,0,0,0,0,0,0,0,62663,0,0,1096,17574,31820,0,0,14375,4402,27207,0,0,21448,4676,0
+,0,0,0,16585,5094,0,0,0,0,0,0,0,0,4845,0,0,32870,0,0,0,0,0,0,0,0,0,0,31466,0,0,0
+,0,31783,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4522,16039,0,0,0,0,0,0,0,0,
+0,14469,0,0,0,0,0,0,0,0,0,0,7464,4773,0,0,0,0,0,0,0,0,18636,0,0,0,25640,0,0,0,0,
+0,0,2244,0,0,11818,0,1168,0,0,0,0,0,0,0,6540,23079,13770,7719,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,58150,528,
+34502,32682,0,0,12997,0,0,0,0,0,2214,0,0,0,0,0,58567,0,0,0,26375,0,0,0,0,0,0,0,0
+,0,26437,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26121,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,32005,22952,59047,0,13543,0,0,0,0,0,0,0,0,16328,0,0,33542,0,0,0,19782,0,0,0,
+16644,0,0,0,0,31688,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10276,0,0,0,
+0,0,0,0,36327,0,0,29480,0,0,0,777,12709,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27398,0,0,
+0,4455,9037,31397,0,9221,0,0,0,60487,20840,1796,0,0,0,0,0,0,0,0,8364,0,0,0,0,0,0
+,0,0,0,0,0,19752,44902,0,38566,0,0,18027,0,0,0,0,0,0,0,0,10662,0,0,0,0,0,11812,0
+,0,0,0,0,0,0,0,0,19910,0,0,0,45030,0,0,0,0,0,0,0,0,0,0,0,0,19978,5127,0,11620,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,19817,0,5579,9350,0,0,21002,19718,0,0,0,21926,0,0,0,0,0,0,0,0,0,0,0,0,0,20711,
+0,0,0,20197,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40550,0,0,0,57510,0,0,0,53895,
+0,0,15017,0,17000,39367,2347,0,0,0,0,0,0,0,0,0,8588,0,0,0,0,0,3273,17862,3498,
+2085,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19048,0,0,0,0,0,11978,58631,0,0,0,0
+,0,0,523,0,12969,198,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28197,0,47846,0,0,0,0,0,0
+,0,0,0,4549,0,0,0,0,0,0,0,0,0,0,687,14917,748,8229,0,0,0,0,0,0,2476,12935,0,0,0,
+0,0,0,22792,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27528,59142,0,0,20876,20134,0,0,0,
+0,440,12068,0,58951,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48038,0,0,0,60999,0,0,0,0,
+0,0,0,0,0,0,0,0,0,15716,7498,5476,0,0,0,0,20202,37959,0,0,0,0,0,0,0,0,0,0,0,0,
+29801,0,5451,0,0,0,0,0,0,0,0,0,0,50790,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24485,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13573,0,0,22856,0,0,0,0,21927,0,0,0,0,0
+,0,9130,0,0,0,0,0,0,13732,0,0,0,0,0,0,0,0,0,0,2282,583,0,0,0,0,0,0,0,0,0,0,3726,
+26503,0,0,0,0,0,0,9258,0,0,0,0,0,0,0,0,21604,0,0,0,45574,0,0,0,0,0,20710,0,0,0,
+42694,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1163,6694,0,0,0,0,0,0,0,10948,0,0,0,29700,0,0,
+0,0,0,58823,3796,27399,20939,10180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+19,29287,28649,14534,0,0,16428,45607,0,0,0,0,0,0,25322,0,4908,0,0,0,0,0,0,25476,
+29097,14246,11053,0,0,0,0,0,0,0,0,18502,0,0,0,44390,0,0,0,17765,0,0,0,0,0,0,
+24520,0,0,0,0,0,0,0,0,0,0,17319,0,0,0,0,0,0,0,0,0,0,0,0,0,28166,0,0,0,0,0,48198,
+0,0,31467,0,24585,0,0,0,0,18692,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23596,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,7236,968,13637,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3763,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14791,0,0,0,12324,0,12741,0,0,0
+,0,0,0,0,0,0,11108,0,0,0,0,4009,40295,20616,4357,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+15015,0,0,0,0,0,43751,0,0,0,0,0,0,0,0,0,0,0,23013,0,0,0,0,0,0,0,0,0,0,0,0,0,
+45542,0,0,0,0,0,0,0,0,0,23974,0,0,0,0,17480,20647,0,0,0,0,0,0,8876,0,0,40806,0,0
+,0,0,0,0,0,14502,17160,17764,0,0,31594,35431,0,0,2890,0,0,0,0,0,0,0,0,27524,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8228,0,56583,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,34278,0,0,0,0,0,0,0,0,0,0,0,0,0,2662,0,26724,0,0,0,0,0,0,0,64198,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22281,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3049,
+54983,0,0,0,0,0,0,0,837,0,17604,0,0,0,0,0,28838,0,0,0,0,0,0,26312,0,0,3910,0,0,0
+,25830,0,0,0,0,0,8391,0,19845,19240,1092,0,0,5449,0,0,0,0,17188,0,0,0,0,0,0,0,0,
+0,10629,0,0,6671,61094,5832,8358,0,0,0,55078,0,0,0,0,0,29860,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,51494,0,28647,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25989,0,0,30153,61318
+,0,0,0,0,0,0,0,24903,0,0,0,4388,0,42054,0,0,0,0,0,0,0,53158,0,0,0,0,0,0,0,50918,
+0,0,0,0,0,0,26251,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5929,2853,0,37126,
+7372,197,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2027,934,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,55686,0,0,5672,5447,0,62758,0,0,0,0,0,0,0,0,0,0,0,0,2923,0,556,1415,
+0,0,0,0,0,0,0,0,0,8645,0,9477,0,0,0,0,0,0,0,48742,0,0,0,0,0,0,0,0,0,0,24235,228,
+0,0,0,0,0,0,0,0,0,0,16970,18823,0,0,0,0,0,0,0,0,0,25158,0,0,0,0,0,18567,20072,
+2823,14313,1830,0,0,0,0,0,0,0,0,27048,23526,0,0,0,0,0,997,492,0,14730,16677,396,
+13574,0,0,0,41671,0,0,0,0,0,0,0,19045,0,0,0,421,17545,3110,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,47111,14475,56551,0,0,0,0,0,0,0,0,0,0,3697,0,0,0,0,0,0,49382,0,35559,0,
+0,0,0,40,0,11496,15621,0,8550,0,0,0,63462,0,0,0,0,0,0,0,36966,0,50406,0,46022,
+1001,0,0,12069,3249,0,0,0,0,0,0,0,0,0,0,0,0,0,15241,0,0,0,0,0,0,0,0,64743,0,0,0,
+0,0,58759,0,0,0,0,1136,26981,0,0,0,0,0,0,0,17732,0,0,0,17157,20011,6629,0,43879,
+0,0,0,13572,25128,10759,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28676,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,875,24007,0,0,0,0,7628,0,0,0,0,0,12268,0,0,0,0,0,0,0,0,19300
+,23210,356,0,0,0,0,0,0,0,0,0,15236,0,0,0,0,0,49670,0,0,0,0,0,0,0,21764,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,13931,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45799,0,0,436,3589,
+0,0,11402,0,0,0,0,0,0,0,0,0,0,62822,0,0,0,39814,588,0,0,0,0,0,0,27750,0,0,0,0,0,
+0,1609,22660,2346,18951,0,16068,0,0,0,0,0,0,5162,11110,0,0,0,0,15048,1060,0,7879
+,18280,326,0,14886,19656,0,7594,0,0,0,0,0,781,581,0,16198,0,0,0,0,0,0,1078,9892,
+0,0,0,0,0,0,0,0,0,0,4489,0,0,0,0,33798,0,0,0,54534,0,0,0,0,0,0,0,33158,0,0,0,0,0
+,0,0,0,0,42086,13834,2757,8456,16773,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3434,0,0,0,
+0,0,3946,29668,0,0,30634,36775,0,0,0,0,0,24901,0,16069,6280,0,0,0,0,41990,0,0,0,
+0,0,0,0,27365,0,0,0,0,0,0,0,0,0,0,1450,44807,0,0,0,32100,0,0,0,0,0,35110,0,0,0,0
+,0,0,0,0,17448,19591,0,0,0,0,0,0,0,0,0,0,0,0,1739,0,0,0,0,5511,0,0,0,32934,0,0,0
+,0,0,0,0,0,0,18180,0,0,0,23428,19754,0,0,31174,3021,31655,23464,0,0,0,0,0,0,
+57255,0,0,21292,64487,0,0,0,0,0,0,25802,9189,0,0,0,0,0,49254,0,0,0,0,0,0,0,0,0,0
+,5837,50023,0,0,0,0,0,0,0,0,0,15495,0,0,0,0,0,51942,0,0,0,0,0,0,0,0,28104,58662,
+0,50214,0,0,0,0,0,0,0,0,2988,0,22888,31812,0,0,0,0,0,2020,0,18916,0,0,0,0,0,0,0,
+23973,0,0,0,0,17516,11717,0,0,0,55911,0,0,0,0,0,0,0,2855,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,46822,0,24710,28586,0,0,0,1556,0,0,30117,0,0,22090,57127,3403,14087,0
+,0,0,0,0,0,0,0,0,0,1041,0,10633,6916,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27269,0,0,13322,18055,0,29380,0,56454,0,0,120
+,0,0,8773,0,0,0,0,16040,0,0,0,0,0,0,0,27242,23781,0,1572,0,28134,0,0,1512,0,0,0,
+0,0,0,27684,0,38470,0,0,0,0,0,0,1513,8709,0,0,0,0,0,0,0,0,0,0,0,46566,0,0,0,0,
+28521,61159,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24356,0,0,0,0,0,0,0,0,0,13028,0,
+5863,0,0,15693,0,0,0,0,0,0,0,1131,23398,0,0,0,0,0,0,0,26212,0,0,0,0,0,0,0,0,0,0,
+0,0,0,18404,0,0,0,0,1457,26183,0,0,2475,7110,0,0,0,0,27180,60166,0,0,0,20262,0,
+41862,0,0,0,0,0,0,0,0,0,0,2762,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26148,0,0,0,0,0,0,0,0
+,0,28229,0,0,0,29254,0,0,0,0,0,0,0,0,0,0,27690,0,0,13636,12776,1862,0,0,0,0,0,0,
+17225,3271,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28039,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,4457,18117,0,2023,402,0,0,0,0,0,0,0,0,0,0,0,0,0,104,3654,0,0,
+0,0,0,0,0,0,18440,0,0,0,0,0,0,0,0,29861,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,22150,0,0,0,0,0,0,0,0,0,0,0,0,24074,0,0,0,0,0,0,0,0,12004,0,32358,
+0,0,3081,0,0,0,0,0,0,0,0,0,4749,0,0,0,0,0,0,0,0,0,0,0,10792,1799,21322,0,7880,
+12613,0,0,0,0,0,0,0,0,13993,0,0,0,16202,0,0,0,0,32102,0,37223,0,10500,0,0,0,0,0,
+0,0,0,32008,0,0,0,0,0,23816,3236,0,0,0,0,0,23237,0,0,5642,0,4684,294,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,26852,0,0,0,0,0,0,7148,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,7890,61798,939,0,0,56679,0,0,0,0,0,27078,202,5029,0,0,0,0,0,
+0,0,0,0,28005,0,0,15273,24741,5676,20452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,55910,0,0
+,0,0,5069,27942,0,21092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12517,0,0,0,0,0,0,
+0,0,0,0,0,0,21384,28260,0,2502,20108,0,0,0,0,0,0,0,0,0,0,0,0,46726,0,30790,0,0,0
+,0,0,14725,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1099,6372,0,0,0,12422,15182,0,8683,0,
+10665,19462,0,0,0,0,0,0,1590,0,31628,0,22632,19750,0,0,0,0,0,0,0,24198,0,0,0,0,0
+,50662,0,0,0,0,0,0,0,0,0,0,9131,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11015,0,0,0,0,0,0,0,
+0,16490,54695,0,0,0,0,0,0,0,0,12937,0,0,0,0,16004,0,0,0,0,0,0,0,0,0,2181,6923,0,
+0,0,0,0,0,0,15624,11302,0,0,5673,7559,0,0,14668,15684,0,0,0,0,0,0,24204,48134,0,
+24230,0,55527,0,0,3464,19141,0,0,0,0};
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/dictionary_hash.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/dictionary_hash.h
new file mode 100755
index 0000000000..b3bb9599f4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/dictionary_hash.h
@@ -0,0 +1,24 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Hash table on the 4-byte prefixes of static dictionary words. */
+
+#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
+#define BROTLI_ENC_DICTIONARY_HASH_H_
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+extern const uint16_t kStaticDictionaryHash[32768];
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_DICTIONARY_HASH_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/encode.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/encode.c
new file mode 100755
index 0000000000..141e70aa2a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/encode.c
@@ -0,0 +1,1862 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Implementation of Brotli compressor. */
+
+#include <brotli/encode.h>
+
+#include <stdlib.h>  /* free, malloc */
+#include <string.h>  /* memcpy, memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include "../common/version.h"
+#include "./backward_references.h"
+#include "./backward_references_hq.h"
+#include "./bit_cost.h"
+#include "./brotli_bit_stream.h"
+#include "./compress_fragment.h"
+#include "./compress_fragment_two_pass.h"
+#include "./encoder_dict.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./hash.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./metablock.h"
+#include "./prefix.h"
+#include "./quality.h"
+#include "./ringbuffer.h"
+#include "./utf8_util.h"
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
+
+typedef enum BrotliEncoderStreamState {
+  /* Default state. */
+  BROTLI_STREAM_PROCESSING = 0,
+  /* Intermediate state; after next block is emitted, byte-padding should be
+     performed before getting back to default state. */
+  BROTLI_STREAM_FLUSH_REQUESTED = 1,
+  /* Last metablock was produced; no more input is acceptable. */
+  BROTLI_STREAM_FINISHED = 2,
+  /* Flushing compressed block and writing meta-data block header. */
+  BROTLI_STREAM_METADATA_HEAD = 3,
+  /* Writing metadata block body. */
+  BROTLI_STREAM_METADATA_BODY = 4
+} BrotliEncoderStreamState;
+
+typedef struct BrotliEncoderStateStruct {
+  BrotliEncoderParams params;
+
+  MemoryManager memory_manager_;
+
+  HasherHandle hasher_;
+  uint64_t input_pos_;
+  RingBuffer ringbuffer_;
+  size_t cmd_alloc_size_;
+  Command* commands_;
+  size_t num_commands_;
+  size_t num_literals_;
+  size_t last_insert_len_;
+  uint64_t last_flush_pos_;
+  uint64_t last_processed_pos_;
+  int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
+  int saved_dist_cache_[4];
+  uint16_t last_bytes_;
+  uint8_t last_bytes_bits_;
+  uint8_t prev_byte_;
+  uint8_t prev_byte2_;
+  size_t storage_size_;
+  uint8_t* storage_;
+  /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
+  int small_table_[1 << 10];  /* 4KiB */
+  int* large_table_;          /* Allocated only when needed */
+  size_t large_table_size_;
+  /* Command and distance prefix codes (each 64 symbols, stored back-to-back)
+     used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command
+     prefix code is over a smaller alphabet with the following 64 symbols:
+        0 - 15: insert length code 0, copy length code 0 - 15, same distance
+       16 - 39: insert length code 0, copy length code 0 - 23
+       40 - 63: insert length code 0 - 23, copy length code 0
+     Note that symbols 16 and 40 represent the same code in the full alphabet,
+     but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */
+  uint8_t cmd_depths_[128];
+  uint16_t cmd_bits_[128];
+  /* The compressed form of the command and distance prefix codes for the next
+     block in FAST_ONE_PASS_COMPRESSION_QUALITY. */
+  uint8_t cmd_code_[512];
+  size_t cmd_code_numbits_;
+  /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
+  uint32_t* command_buf_;
+  uint8_t* literal_buf_;
+
+  uint8_t* next_out_;
+  size_t available_out_;
+  size_t total_out_;
+  /* Temporary buffer for padding flush bits or metadata block header / body. */
+  union {
+    uint64_t u64[2];
+    uint8_t u8[16];
+  } tiny_buf_;
+  uint32_t remaining_metadata_bytes_;
+  BrotliEncoderStreamState stream_state_;
+
+  BROTLI_BOOL is_last_block_emitted_;
+  BROTLI_BOOL is_initialized_;
+} BrotliEncoderStateStruct;
+
+static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s);
+
+static size_t InputBlockSize(BrotliEncoderState* s) {
+  return (size_t)1 << s->params.lgblock;
+}
+
+static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
+  return s->input_pos_ - s->last_processed_pos_;
+}
+
+static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
+  const uint64_t delta = UnprocessedInputSize(s);
+  size_t block_size = InputBlockSize(s);
+  if (delta >= block_size) return 0;
+  return block_size - (size_t)delta;
+}
+
+BROTLI_BOOL BrotliEncoderSetParameter(
+    BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
+  /* Changing parameters on the fly is not implemented yet. */
+  if (state->is_initialized_) return BROTLI_FALSE;
+  /* TODO: Validate/clamp parameters here. */
+  switch (p) {
+    case BROTLI_PARAM_MODE:
+      state->params.mode = (BrotliEncoderMode)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_QUALITY:
+      state->params.quality = (int)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_LGWIN:
+      state->params.lgwin = (int)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_LGBLOCK:
+      state->params.lgblock = (int)value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
+      if ((value != 0) && (value != 1)) return BROTLI_FALSE;
+      state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_SIZE_HINT:
+      state->params.size_hint = value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_LARGE_WINDOW:
+      state->params.large_window = TO_BROTLI_BOOL(!!value);
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_NPOSTFIX:
+      state->params.dist.distance_postfix_bits = value;
+      return BROTLI_TRUE;
+
+    case BROTLI_PARAM_NDIRECT:
+      state->params.dist.num_direct_distance_codes = value;
+      return BROTLI_TRUE;
+
+    default: return BROTLI_FALSE;
+  }
+}
+
+/* Wraps 64-bit input position to 32-bit ring-buffer position preserving
+   "not-a-first-lap" feature. */
+static uint32_t WrapPosition(uint64_t position) {
+  uint32_t result = (uint32_t)position;
+  uint64_t gb = position >> 30;
+  if (gb > 2) {
+    /* Wrap every 2GiB; The first 3GB are continuous. */
+    result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
+  }
+  return result;
+}
+
+static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
+  MemoryManager* m = &s->memory_manager_;
+  if (s->storage_size_ < size) {
+    BROTLI_FREE(m, s->storage_);
+    s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
+    if (BROTLI_IS_OOM(m)) return NULL;
+    s->storage_size_ = size;
+  }
+  return s->storage_;
+}
+
+static size_t HashTableSize(size_t max_table_size, size_t input_size) {
+  size_t htsize = 256;
+  while (htsize < max_table_size && htsize < input_size) {
+    htsize <<= 1;
+  }
+  return htsize;
+}
+
+static int* GetHashTable(BrotliEncoderState* s, int quality,
+                         size_t input_size, size_t* table_size) {
+  /* Use smaller hash table when input.size() is smaller, since we
+     fill the table, incurring O(hash table size) overhead for
+     compression, and if the input is short, we won't need that
+     many hash table entries anyway. */
+  MemoryManager* m = &s->memory_manager_;
+  const size_t max_table_size = MaxHashTableSize(quality);
+  size_t htsize = HashTableSize(max_table_size, input_size);
+  int* table;
+  BROTLI_DCHECK(max_table_size >= 256);
+  if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+    /* Only odd shifts are supported by fast-one-pass. */
+    if ((htsize & 0xAAAAA) == 0) {
+      htsize <<= 1;
+    }
+  }
+
+  if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
+    table = s->small_table_;
+  } else {
+    if (htsize > s->large_table_size_) {
+      s->large_table_size_ = htsize;
+      BROTLI_FREE(m, s->large_table_);
+      s->large_table_ = BROTLI_ALLOC(m, int, htsize);
+      if (BROTLI_IS_OOM(m)) return 0;
+    }
+    table = s->large_table_;
+  }
+
+  *table_size = htsize;
+  memset(table, 0, htsize * sizeof(*table));
+  return table;
+}
+
+static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
+    uint16_t* last_bytes, uint8_t* last_bytes_bits) {
+  if (large_window) {
+    *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
+    *last_bytes_bits = 14;
+  } else {
+    if (lgwin == 16) {
+      *last_bytes = 0;
+      *last_bytes_bits = 1;
+    } else if (lgwin == 17) {
+      *last_bytes = 1;
+      *last_bytes_bits = 7;
+    } else if (lgwin > 17) {
+      *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
+      *last_bytes_bits = 4;
+    } else {
+      *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
+      *last_bytes_bits = 7;
+    }
+  }
+}
+
+/* Initializes the command and distance prefix codes for the first block. */
+static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
+                                   uint16_t cmd_bits[128],
+                                   uint8_t cmd_code[512],
+                                   size_t* cmd_code_numbits) {
+  static const uint8_t kDefaultCommandDepths[128] = {
+    0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
+    0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
+    7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
+    7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
+    4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  };
+  static const uint16_t kDefaultCommandBits[128] = {
+    0,   0,   8,   9,   3,  35,   7,   71,
+    39, 103,  23,  47, 175, 111, 239,   31,
+    0,   0,   0,   4,  12,   2,  10,    6,
+    13,  29,  11,  43,  27,  59,  87,   55,
+    15,  79, 319, 831, 191, 703, 447,  959,
+    0,  14,   1,  25,   5,  21,  19,   51,
+    119, 159,  95, 223, 479, 991,  63,  575,
+    127, 639, 383, 895, 255, 767, 511, 1023,
+    14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
+    2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
+    767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
+  };
+  static const uint8_t kDefaultCommandCode[] = {
+    0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
+    0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
+    0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
+    0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
+    0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
+  };
+  static const size_t kDefaultCommandCodeNumBits = 448;
+  COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
+  COPY_ARRAY(cmd_bits, kDefaultCommandBits);
+
+  /* Initialize the pre-compressed form of the command and distance prefix
+     codes. */
+  COPY_ARRAY(cmd_code, kDefaultCommandCode);
+  *cmd_code_numbits = kDefaultCommandCodeNumBits;
+}
+
+/* Decide about the context map based on the ability of the prediction
+   ability of the previous byte UTF8-prefix on the next byte. The
+   prediction ability is calculated as Shannon entropy. Here we need
+   Shannon entropy instead of 'BitsEntropy' since the prefix will be
+   encoded with the remaining 6 bits of the following byte, and
+   BitsEntropy will assume that symbol to be stored alone using Huffman
+   coding. */
+static void ChooseContextMap(int quality,
+                             uint32_t* bigram_histo,
+                             size_t* num_literal_contexts,
+                             const uint32_t** literal_context_map) {
+  static const uint32_t kStaticContextMapContinuation[64] = {
+    1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  static const uint32_t kStaticContextMapSimpleUTF8[64] = {
+    0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+
+  uint32_t monogram_histo[3] = { 0 };
+  uint32_t two_prefix_histo[6] = { 0 };
+  size_t total;
+  size_t i;
+  size_t dummy;
+  double entropy[4];
+  for (i = 0; i < 9; ++i) {
+    monogram_histo[i % 3] += bigram_histo[i];
+    two_prefix_histo[i % 6] += bigram_histo[i];
+  }
+  entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
+  entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
+                ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
+  entropy[3] = 0;
+  for (i = 0; i < 3; ++i) {
+    entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
+  }
+
+  total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
+  BROTLI_DCHECK(total != 0);
+  entropy[0] = 1.0 / (double)total;
+  entropy[1] *= entropy[0];
+  entropy[2] *= entropy[0];
+  entropy[3] *= entropy[0];
+
+  if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
+    /* 3 context models is a bit slower, don't use it at lower qualities. */
+    entropy[3] = entropy[1] * 10;
+  }
+  /* If expected savings by symbol are less than 0.2 bits, skip the
+     context modeling -- in exchange for faster decoding speed. */
+  if (entropy[1] - entropy[2] < 0.2 &&
+      entropy[1] - entropy[3] < 0.2) {
+    *num_literal_contexts = 1;
+  } else if (entropy[2] - entropy[3] < 0.02) {
+    *num_literal_contexts = 2;
+    *literal_context_map = kStaticContextMapSimpleUTF8;
+  } else {
+    *num_literal_contexts = 3;
+    *literal_context_map = kStaticContextMapContinuation;
+  }
+}
+
+/* Decide if we want to use a more complex static context map containing 13
+   context values, based on the entropy reduction of histograms over the
+   first 5 bits of literals. */
+static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
+    size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
+    size_t* num_literal_contexts, const uint32_t** literal_context_map) {
+  static const uint32_t kStaticContextMapComplexUTF8[64] = {
+    11, 11, 12, 12, /* 0 special */
+    0, 0, 0, 0, /* 4 lf */
+    1, 1, 9, 9, /* 8 space */
+    2, 2, 2, 2, /* !, first after space/lf and after something else. */
+    1, 1, 1, 1, /* " */
+    8, 3, 3, 3, /* % */
+    1, 1, 1, 1, /* ({[ */
+    2, 2, 2, 2, /* }]) */
+    8, 4, 4, 4, /* :; */
+    8, 7, 4, 4, /* . */
+    8, 0, 0, 0, /* > */
+    3, 3, 3, 3, /* [0..9] */
+    5, 5, 10, 5, /* [A-Z] */
+    5, 5, 10, 5,
+    6, 6, 6, 6, /* [a-z] */
+    6, 6, 6, 6,
+  };
+  BROTLI_UNUSED(quality);
+  /* Try the more complex static context map only for long data. */
+  if (size_hint < (1 << 20)) {
+    return BROTLI_FALSE;
+  } else {
+    const size_t end_pos = start_pos + length;
+    /* To make entropy calculations faster and to fit on the stack, we collect
+       histograms over the 5 most significant bits of literals. One histogram
+       without context and 13 additional histograms for each context value. */
+    uint32_t combined_histo[32] = { 0 };
+    uint32_t context_histo[13][32] = { { 0 } };
+    uint32_t total = 0;
+    double entropy[3];
+    size_t dummy;
+    size_t i;
+    ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
+    for (; start_pos + 64 <= end_pos; start_pos += 4096) {
+      const size_t stride_end_pos = start_pos + 64;
+      uint8_t prev2 = input[start_pos & mask];
+      uint8_t prev1 = input[(start_pos + 1) & mask];
+      size_t pos;
+      /* To make the analysis of the data faster we only examine 64 byte long
+         strides at every 4kB intervals. */
+      for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
+        const uint8_t literal = input[pos & mask];
+        const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
+            BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
+        ++total;
+        ++combined_histo[literal >> 3];
+        ++context_histo[context][literal >> 3];
+        prev2 = prev1;
+        prev1 = literal;
+      }
+    }
+    entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
+    entropy[2] = 0;
+    for (i = 0; i < 13; ++i) {
+      entropy[2] += ShannonEntropy(&context_histo[i][0], 32, &dummy);
+    }
+    entropy[0] = 1.0 / (double)total;
+    entropy[1] *= entropy[0];
+    entropy[2] *= entropy[0];
+    /* The triggering heuristics below were tuned by compressing the individual
+       files of the silesia corpus. If we skip this kind of context modeling
+       for not very well compressible input (i.e. entropy using context modeling
+       is 60% of maximal entropy) or if expected savings by symbol are less
+       than 0.2 bits, then in every case when it triggers, the final compression
+       ratio is improved. Note however that this heuristics might be too strict
+       for some cases and could be tuned further. */
+    if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
+      return BROTLI_FALSE;
+    } else {
+      *num_literal_contexts = 13;
+      *literal_context_map = kStaticContextMapComplexUTF8;
+      return BROTLI_TRUE;
+    }
+  }
+}
+
+static void DecideOverLiteralContextModeling(const uint8_t* input,
+    size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
+    size_t* num_literal_contexts, const uint32_t** literal_context_map) {
+  if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
+    return;
+  } else if (ShouldUseComplexStaticContextMap(
+      input, start_pos, length, mask, quality, size_hint,
+      num_literal_contexts, literal_context_map)) {
+    /* Context map was already set, nothing else to do. */
+  } else {
+    /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
+       UTF8 data faster we only examine 64 byte long strides at every 4kB
+       intervals. */
+    const size_t end_pos = start_pos + length;
+    uint32_t bigram_prefix_histo[9] = { 0 };
+    for (; start_pos + 64 <= end_pos; start_pos += 4096) {
+      static const int lut[4] = { 0, 0, 1, 2 };
+      const size_t stride_end_pos = start_pos + 64;
+      int prev = lut[input[start_pos & mask] >> 6] * 3;
+      size_t pos;
+      for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
+        const uint8_t literal = input[pos & mask];
+        ++bigram_prefix_histo[prev + lut[literal >> 6]];
+        prev = lut[literal >> 6] * 3;
+      }
+    }
+    ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
+                     literal_context_map);
+  }
+}
+
+static BROTLI_BOOL ShouldCompress(
+    const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
+    const size_t bytes, const size_t num_literals, const size_t num_commands) {
+  /* TODO: find more precise minimal block overhead. */
+  if (bytes <= 2) return BROTLI_FALSE;
+  if (num_commands < (bytes >> 8) + 2) {
+    if (num_literals > 0.99 * (double)bytes) {
+      uint32_t literal_histo[256] = { 0 };
+      static const uint32_t kSampleRate = 13;
+      static const double kMinEntropy = 7.92;
+      const double bit_cost_threshold =
+          (double)bytes * kMinEntropy / kSampleRate;
+      size_t t = (bytes + kSampleRate - 1) / kSampleRate;
+      uint32_t pos = (uint32_t)last_flush_pos;
+      size_t i;
+      for (i = 0; i < t; i++) {
+        ++literal_histo[data[pos & mask]];
+        pos += kSampleRate;
+      }
+      if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
+        return BROTLI_FALSE;
+      }
+    }
+  }
+  return BROTLI_TRUE;
+}
+
+/* Chooses the literal context mode for a metablock */
+static ContextType ChooseContextMode(const BrotliEncoderParams* params,
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length) {
+  /* We only do the computation for the option of something else than
+     CONTEXT_UTF8 for the highest qualities */
+  if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
+      !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
+    return CONTEXT_SIGNED;
+  }
+  return CONTEXT_UTF8;
+}
+
+static void WriteMetaBlockInternal(MemoryManager* m,
+                                   const uint8_t* data,
+                                   const size_t mask,
+                                   const uint64_t last_flush_pos,
+                                   const size_t bytes,
+                                   const BROTLI_BOOL is_last,
+                                   ContextType literal_context_mode,
+                                   const BrotliEncoderParams* params,
+                                   const uint8_t prev_byte,
+                                   const uint8_t prev_byte2,
+                                   const size_t num_literals,
+                                   const size_t num_commands,
+                                   Command* commands,
+                                   const int* saved_dist_cache,
+                                   int* dist_cache,
+                                   size_t* storage_ix,
+                                   uint8_t* storage) {
+  const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
+  uint16_t last_bytes;
+  uint8_t last_bytes_bits;
+  ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
+  BrotliEncoderParams block_params = *params;
+
+  if (bytes == 0) {
+    /* Write the ISLAST and ISEMPTY bits. */
+    BrotliWriteBits(2, 3, storage_ix, storage);
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+    return;
+  }
+
+  if (!ShouldCompress(data, mask, last_flush_pos, bytes,
+                      num_literals, num_commands)) {
+    /* Restore the distance cache, as its last update by
+       CreateBackwardReferences is now unused. */
+    memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+    BrotliStoreUncompressedMetaBlock(is_last, data,
+                                     wrapped_last_flush_pos, mask, bytes,
+                                     storage_ix, storage);
+    return;
+  }
+
+  BROTLI_DCHECK(*storage_ix <= 14);
+  last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
+  last_bytes_bits = (uint8_t)(*storage_ix);
+  if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
+    BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
+                             bytes, mask, is_last, params,
+                             commands, num_commands,
+                             storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
+    BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
+                                bytes, mask, is_last, params,
+                                commands, num_commands,
+                                storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+  } else {
+    MetaBlockSplit mb;
+    InitMetaBlockSplit(&mb);
+    if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
+      size_t num_literal_contexts = 1;
+      const uint32_t* literal_context_map = NULL;
+      if (!params->disable_literal_context_modeling) {
+        DecideOverLiteralContextModeling(
+            data, wrapped_last_flush_pos, bytes, mask, params->quality,
+            params->size_hint, &num_literal_contexts,
+            &literal_context_map);
+      }
+      BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
+          prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
+          literal_context_map, commands, num_commands, &mb);
+      if (BROTLI_IS_OOM(m)) return;
+    } else {
+      BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
+                           prev_byte, prev_byte2,
+                           commands, num_commands,
+                           literal_context_mode,
+                           &mb);
+      if (BROTLI_IS_OOM(m)) return;
+    }
+    if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
+      /* The number of distance symbols effectively used for distance
+         histograms. It might be less than distance alphabet size
+         for "Large Window Brotli" (32-bit). */
+      uint32_t num_effective_dist_codes = block_params.dist.alphabet_size;
+      if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
+        num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
+      }
+      BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
+    }
+    BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
+                         prev_byte, prev_byte2,
+                         is_last,
+                         &block_params,
+                         literal_context_mode,
+                         commands, num_commands,
+                         &mb,
+                         storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    DestroyMetaBlockSplit(m, &mb);
+  }
+  if (bytes + 4 < (*storage_ix >> 3)) {
+    /* Restore the distance cache and last byte. */
+    memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+    storage[0] = (uint8_t)last_bytes;
+    storage[1] = (uint8_t)(last_bytes >> 8);
+    *storage_ix = last_bytes_bits;
+    BrotliStoreUncompressedMetaBlock(is_last, data,
+                                     wrapped_last_flush_pos, mask,
+                                     bytes, storage_ix, storage);
+  }
+}
+
+static void ChooseDistanceParams(BrotliEncoderParams* params) {
+  uint32_t distance_postfix_bits = 0;
+  uint32_t num_direct_distance_codes = 0;
+
+  if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
+    uint32_t ndirect_msb;
+    if (params->mode == BROTLI_MODE_FONT) {
+      distance_postfix_bits = 1;
+      num_direct_distance_codes = 12;
+    } else {
+      distance_postfix_bits = params->dist.distance_postfix_bits;
+      num_direct_distance_codes = params->dist.num_direct_distance_codes;
+    }
+    ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
+    if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
+        num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
+        (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
+      distance_postfix_bits = 0;
+      num_direct_distance_codes = 0;
+    }
+  }
+
+  BrotliInitDistanceParams(
+      params, distance_postfix_bits, num_direct_distance_codes);
+}
+
+static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
+  if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
+  if (s->is_initialized_) return BROTLI_TRUE;
+
+  s->last_bytes_bits_ = 0;
+  s->last_bytes_ = 0;
+  s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
+
+  SanitizeParams(&s->params);
+  s->params.lgblock = ComputeLgBlock(&s->params);
+  ChooseDistanceParams(&s->params);
+
+  RingBufferSetup(&s->params, &s->ringbuffer_);
+
+  /* Initialize last byte with stream header. */
+  {
+    int lgwin = s->params.lgwin;
+    if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+        s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+      lgwin = BROTLI_MAX(int, lgwin, 18);
+    }
+    EncodeWindowBits(lgwin, s->params.large_window,
+                     &s->last_bytes_, &s->last_bytes_bits_);
+  }
+
+  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+    InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_,
+                           s->cmd_code_, &s->cmd_code_numbits_);
+  }
+
+  s->is_initialized_ = BROTLI_TRUE;
+  return BROTLI_TRUE;
+}
+
+static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
+  params->mode = BROTLI_DEFAULT_MODE;
+  params->large_window = BROTLI_FALSE;
+  params->quality = BROTLI_DEFAULT_QUALITY;
+  params->lgwin = BROTLI_DEFAULT_WINDOW;
+  params->lgblock = 0;
+  params->size_hint = 0;
+  params->disable_literal_context_modeling = BROTLI_FALSE;
+  BrotliInitEncoderDictionary(&params->dictionary);
+  params->dist.distance_postfix_bits = 0;
+  params->dist.num_direct_distance_codes = 0;
+  params->dist.alphabet_size =
+      BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
+  params->dist.max_distance = BROTLI_MAX_DISTANCE;
+}
+
+static void BrotliEncoderInitState(BrotliEncoderState* s) {
+  BrotliEncoderInitParams(&s->params);
+  s->input_pos_ = 0;
+  s->num_commands_ = 0;
+  s->num_literals_ = 0;
+  s->last_insert_len_ = 0;
+  s->last_flush_pos_ = 0;
+  s->last_processed_pos_ = 0;
+  s->prev_byte_ = 0;
+  s->prev_byte2_ = 0;
+  s->storage_size_ = 0;
+  s->storage_ = 0;
+  s->hasher_ = NULL;
+  s->large_table_ = NULL;
+  s->large_table_size_ = 0;
+  s->cmd_code_numbits_ = 0;
+  s->command_buf_ = NULL;
+  s->literal_buf_ = NULL;
+  s->next_out_ = NULL;
+  s->available_out_ = 0;
+  s->total_out_ = 0;
+  s->stream_state_ = BROTLI_STREAM_PROCESSING;
+  s->is_last_block_emitted_ = BROTLI_FALSE;
+  s->is_initialized_ = BROTLI_FALSE;
+
+  RingBufferInit(&s->ringbuffer_);
+
+  s->commands_ = 0;
+  s->cmd_alloc_size_ = 0;
+
+  /* Initialize distance cache. */
+  s->dist_cache_[0] = 4;
+  s->dist_cache_[1] = 11;
+  s->dist_cache_[2] = 15;
+  s->dist_cache_[3] = 16;
+  /* Save the state of the distance cache in case we need to restore it for
+     emitting an uncompressed block. */
+  memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
+}
+
+BrotliEncoderState* BrotliEncoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  BrotliEncoderState* state = 0;
+  if (!alloc_func && !free_func) {
+    state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));
+  } else if (alloc_func && free_func) {
+    state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState));
+  }
+  if (state == 0) {
+    /* BROTLI_DUMP(); */
+    return 0;
+  }
+  BrotliInitMemoryManager(
+      &state->memory_manager_, alloc_func, free_func, opaque);
+  BrotliEncoderInitState(state);
+  return state;
+}
+
+static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
+  MemoryManager* m = &s->memory_manager_;
+  if (BROTLI_IS_OOM(m)) {
+    BrotliWipeOutMemoryManager(m);
+    return;
+  }
+  BROTLI_FREE(m, s->storage_);
+  BROTLI_FREE(m, s->commands_);
+  RingBufferFree(m, &s->ringbuffer_);
+  DestroyHasher(m, &s->hasher_);
+  BROTLI_FREE(m, s->large_table_);
+  BROTLI_FREE(m, s->command_buf_);
+  BROTLI_FREE(m, s->literal_buf_);
+}
+
+/* Deinitializes and frees BrotliEncoderState instance. */
+void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
+  if (!state) {
+    return;
+  } else {
+    MemoryManager* m = &state->memory_manager_;
+    brotli_free_func free_func = m->free_func;
+    void* opaque = m->opaque;
+    BrotliEncoderCleanupState(state);
+    free_func(opaque, state);
+  }
+}
+
+/*
+   Copies the given input data to the internal ring buffer of the compressor.
+   No processing of the data occurs at this time and this function can be
+   called multiple times before calling WriteBrotliData() to process the
+   accumulated input. At most input_block_size() bytes of input data can be
+   copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
+ */
+static void CopyInputToRingBuffer(BrotliEncoderState* s,
+                                  const size_t input_size,
+                                  const uint8_t* input_buffer) {
+  RingBuffer* ringbuffer_ = &s->ringbuffer_;
+  MemoryManager* m = &s->memory_manager_;
+  RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
+  if (BROTLI_IS_OOM(m)) return;
+  s->input_pos_ += input_size;
+
+  /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
+     hashing not depend on uninitialized data. This makes compression
+     deterministic and it prevents uninitialized memory warnings in Valgrind.
+     Even without erasing, the output would be valid (but nondeterministic).
+
+     Background information: The compressor stores short (at most 8 bytes)
+     substrings of the input already read in a hash table, and detects
+     repetitions by looking up such substrings in the hash table. If it
+     can find a substring, it checks whether the substring is really there
+     in the ring buffer (or it's just a hash collision). Should the hash
+     table become corrupt, this check makes sure that the output is
+     still valid, albeit the compression ratio would be bad.
+
+     The compressor populates the hash table from the ring buffer as it's
+     reading new bytes from the input. However, at the last few indexes of
+     the ring buffer, there are not enough bytes to build full-length
+     substrings from. Since the hash table always contains full-length
+     substrings, we erase with dummy zeros here to make sure that those
+     substrings will contain zeros at the end instead of uninitialized
+     data.
+
+     Please note that erasing is not necessary (because the
+     memory region is already initialized since he ring buffer
+     has a `tail' that holds a copy of the beginning,) so we
+     skip erasing if we have already gone around at least once in
+     the ring buffer.
+
+     Only clear during the first round of ring-buffer writes. On
+     subsequent rounds data in the ring-buffer would be affected. */
+  if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
+    /* This is the first time when the ring buffer is being written.
+       We clear 7 bytes just after the bytes that have been copied from
+       the input buffer.
+
+       The ring-buffer has a "tail" that holds a copy of the beginning,
+       but only once the ring buffer has been fully written once, i.e.,
+       pos <= mask. For the first time, we need to write values
+       in this tail (where index may be larger than mask), so that
+       we have exactly defined behavior and don't read uninitialized
+       memory. Due to performance reasons, hashing reads data using a
+       LOAD64, which can go 7 bytes beyond the bytes written in the
+       ring-buffer. */
+    memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
+  }
+}
+
+/* Marks all input as processed.
+   Returns true if position wrapping occurs. */
+static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
+  uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
+  uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
+  s->last_processed_pos_ = s->input_pos_;
+  return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
+}
+
+static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
+                              uint32_t* wrapped_last_processed_pos) {
+  Command* last_command = &s->commands_[s->num_commands_ - 1];
+  const uint8_t* data = s->ringbuffer_.buffer_;
+  const uint32_t mask = s->ringbuffer_.mask_;
+  uint64_t max_backward_distance =
+      (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
+  uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
+  uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
+  uint64_t max_distance = last_processed_pos < max_backward_distance ?
+      last_processed_pos : max_backward_distance;
+  uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
+  uint32_t distance_code = CommandRestoreDistanceCode(last_command,
+                                                      &s->params.dist);
+  if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
+      distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
+    if (cmd_dist <= max_distance) {
+      while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
+             data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
+        last_command->copy_len_++;
+        (*bytes)--;
+        (*wrapped_last_processed_pos)++;
+      }
+    }
+    /* The copy length is at most the metablock size, and thus expressible. */
+    GetLengthCode(last_command->insert_len_,
+                  (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
+                           (int)(last_command->copy_len_ >> 25)),
+                  TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
+                  &last_command->cmd_prefix_);
+  }
+}
+
+/*
+   Processes the accumulated input data and sets |*out_size| to the length of
+   the new output meta-block, or to zero if no new output meta-block has been
+   created (in this case the processed input data is buffered internally).
+   If |*out_size| is positive, |*output| points to the start of the output
+   data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
+   always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
+   to 7 bits of the last byte of output. To force encoder to dump the remaining
+   bits use WriteMetadata() to append an empty meta-data block.
+   Returns BROTLI_FALSE if the size of the input data is larger than
+   input_block_size().
+ */
+static BROTLI_BOOL EncodeData(
+    BrotliEncoderState* s, const BROTLI_BOOL is_last,
+    const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
+  const uint64_t delta = UnprocessedInputSize(s);
+  uint32_t bytes = (uint32_t)delta;
+  uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
+  uint8_t* data;
+  uint32_t mask;
+  MemoryManager* m = &s->memory_manager_;
+  ContextType literal_context_mode;
+
+  data = s->ringbuffer_.buffer_;
+  mask = s->ringbuffer_.mask_;
+
+  /* Adding more blocks after "last" block is forbidden. */
+  if (s->is_last_block_emitted_) return BROTLI_FALSE;
+  if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
+
+  if (delta > InputBlockSize(s)) {
+    return BROTLI_FALSE;
+  }
+  if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
+      !s->command_buf_) {
+    s->command_buf_ =
+        BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
+    s->literal_buf_ =
+        BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  }
+
+  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    uint8_t* storage;
+    size_t storage_ix = s->last_bytes_bits_;
+    size_t table_size;
+    int* table;
+
+    if (delta == 0 && !is_last) {
+      /* We have no new input data and we don't have to finish the stream, so
+         nothing to do. */
+      *out_size = 0;
+      return BROTLI_TRUE;
+    }
+    storage = GetBrotliStorage(s, 2 * bytes + 503);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    storage[0] = (uint8_t)s->last_bytes_;
+    storage[1] = (uint8_t)(s->last_bytes_ >> 8);
+    table = GetHashTable(s, s->params.quality, bytes, &table_size);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+      BrotliCompressFragmentFast(
+          m, &data[wrapped_last_processed_pos & mask],
+          bytes, is_last,
+          table, table_size,
+          s->cmd_depths_, s->cmd_bits_,
+          &s->cmd_code_numbits_, s->cmd_code_,
+          &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    } else {
+      BrotliCompressFragmentTwoPass(
+          m, &data[wrapped_last_processed_pos & mask],
+          bytes, is_last,
+          s->command_buf_, s->literal_buf_,
+          table, table_size,
+          &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    }
+    s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+    s->last_bytes_bits_ = storage_ix & 7u;
+    UpdateLastProcessedPos(s);
+    *output = &storage[0];
+    *out_size = storage_ix >> 3;
+    return BROTLI_TRUE;
+  }
+
+  {
+    /* Theoretical max number of commands is 1 per 2 bytes. */
+    size_t newsize = s->num_commands_ + bytes / 2 + 1;
+    if (newsize > s->cmd_alloc_size_) {
+      Command* new_commands;
+      /* Reserve a bit more memory to allow merging with a next block
+         without reallocation: that would impact speed. */
+      newsize += (bytes / 4) + 16;
+      s->cmd_alloc_size_ = newsize;
+      new_commands = BROTLI_ALLOC(m, Command, newsize);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      if (s->commands_) {
+        memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
+        BROTLI_FREE(m, s->commands_);
+      }
+      s->commands_ = new_commands;
+    }
+  }
+
+  InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
+      wrapped_last_processed_pos, bytes, is_last);
+
+  literal_context_mode = ChooseContextMode(
+      &s->params, data, WrapPosition(s->last_flush_pos_),
+      mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
+
+  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+
+  if (s->num_commands_ && s->last_insert_len_ == 0) {
+    ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
+  }
+
+  if (s->params.quality == ZOPFLIFICATION_QUALITY) {
+    BROTLI_DCHECK(s->params.hasher.type == 10);
+    BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
+        data, mask, &s->params, s->hasher_, s->dist_cache_,
+        &s->last_insert_len_, &s->commands_[s->num_commands_],
+        &s->num_commands_, &s->num_literals_);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
+    BROTLI_DCHECK(s->params.hasher.type == 10);
+    BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
+        data, mask, &s->params, s->hasher_, s->dist_cache_,
+        &s->last_insert_len_, &s->commands_[s->num_commands_],
+        &s->num_commands_, &s->num_literals_);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+  } else {
+    BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
+        data, mask, &s->params, s->hasher_, s->dist_cache_,
+        &s->last_insert_len_, &s->commands_[s->num_commands_],
+        &s->num_commands_, &s->num_literals_);
+  }
+
+  {
+    const size_t max_length = MaxMetablockSize(&s->params);
+    const size_t max_literals = max_length / 8;
+    const size_t max_commands = max_length / 8;
+    const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
+    /* If maximal possible additional block doesn't fit metablock, flush now. */
+    /* TODO: Postpone decision until next block arrives? */
+    const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
+        processed_bytes + InputBlockSize(s) <= max_length);
+    /* If block splitting is not used, then flush as soon as there is some
+       amount of commands / literals produced. */
+    const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
+        s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
+        s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
+    if (!is_last && !force_flush && !should_flush &&
+        next_input_fits_metablock &&
+        s->num_literals_ < max_literals &&
+        s->num_commands_ < max_commands) {
+      /* Merge with next input block. Everything will happen later. */
+      if (UpdateLastProcessedPos(s)) {
+        HasherReset(s->hasher_);
+      }
+      *out_size = 0;
+      return BROTLI_TRUE;
+    }
+  }
+
+  /* Create the last insert-only command. */
+  if (s->last_insert_len_ > 0) {
+    InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
+    s->num_literals_ += s->last_insert_len_;
+    s->last_insert_len_ = 0;
+  }
+
+  if (!is_last && s->input_pos_ == s->last_flush_pos_) {
+    /* We have no new input data and we don't have to finish the stream, so
+       nothing to do. */
+    *out_size = 0;
+    return BROTLI_TRUE;
+  }
+  BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
+  BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
+  BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
+  {
+    const uint32_t metablock_size =
+        (uint32_t)(s->input_pos_ - s->last_flush_pos_);
+    uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
+    size_t storage_ix = s->last_bytes_bits_;
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    storage[0] = (uint8_t)s->last_bytes_;
+    storage[1] = (uint8_t)(s->last_bytes_ >> 8);
+    WriteMetaBlockInternal(
+        m, data, mask, s->last_flush_pos_, metablock_size, is_last,
+        literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
+        s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
+        s->dist_cache_, &storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+    s->last_bytes_bits_ = storage_ix & 7u;
+    s->last_flush_pos_ = s->input_pos_;
+    if (UpdateLastProcessedPos(s)) {
+      HasherReset(s->hasher_);
+    }
+    if (s->last_flush_pos_ > 0) {
+      s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
+    }
+    if (s->last_flush_pos_ > 1) {
+      s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
+    }
+    s->num_commands_ = 0;
+    s->num_literals_ = 0;
+    /* Save the state of the distance cache in case we need to restore it for
+       emitting an uncompressed block. */
+    memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
+    *output = &storage[0];
+    *out_size = storage_ix >> 3;
+    return BROTLI_TRUE;
+  }
+}
+
+/* Dumps remaining output bits and metadata header to |header|.
+   Returns number of produced bytes.
+   REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
+   REQUIRED: |block_size| <= (1 << 24). */
+static size_t WriteMetadataHeader(
+    BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
+  size_t storage_ix;
+  storage_ix = s->last_bytes_bits_;
+  header[0] = (uint8_t)s->last_bytes_;
+  header[1] = (uint8_t)(s->last_bytes_ >> 8);
+  s->last_bytes_ = 0;
+  s->last_bytes_bits_ = 0;
+
+  BrotliWriteBits(1, 0, &storage_ix, header);
+  BrotliWriteBits(2, 3, &storage_ix, header);
+  BrotliWriteBits(1, 0, &storage_ix, header);
+  if (block_size == 0) {
+    BrotliWriteBits(2, 0, &storage_ix, header);
+  } else {
+    uint32_t nbits = (block_size == 1) ? 0 :
+        (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
+    uint32_t nbytes = (nbits + 7) / 8;
+    BrotliWriteBits(2, nbytes, &storage_ix, header);
+    BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
+  }
+  return (storage_ix + 7u) >> 3;
+}
+
+static BROTLI_BOOL BrotliCompressBufferQuality10(
+    int lgwin, size_t input_size, const uint8_t* input_buffer,
+    size_t* encoded_size, uint8_t* encoded_buffer) {
+  MemoryManager memory_manager;
+  MemoryManager* m = &memory_manager;
+
+  const size_t mask = BROTLI_SIZE_MAX >> 1;
+  int dist_cache[4] = { 4, 11, 15, 16 };
+  int saved_dist_cache[4] = { 4, 11, 15, 16 };
+  BROTLI_BOOL ok = BROTLI_TRUE;
+  const size_t max_out_size = *encoded_size;
+  size_t total_out_size = 0;
+  uint16_t last_bytes;
+  uint8_t last_bytes_bits;
+  HasherHandle hasher = NULL;
+
+  const size_t hasher_eff_size = BROTLI_MIN(size_t,
+      input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
+
+  BrotliEncoderParams params;
+
+  const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
+  size_t max_block_size;
+  const size_t max_metablock_size = (size_t)1 << lgmetablock;
+  const size_t max_literals_per_metablock = max_metablock_size / 8;
+  const size_t max_commands_per_metablock = max_metablock_size / 8;
+  size_t metablock_start = 0;
+  uint8_t prev_byte = 0;
+  uint8_t prev_byte2 = 0;
+
+  BrotliEncoderInitParams(&params);
+  params.quality = 10;
+  params.lgwin = lgwin;
+  if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+    params.large_window = BROTLI_TRUE;
+  }
+  SanitizeParams(&params);
+  params.lgblock = ComputeLgBlock(&params);
+  ChooseDistanceParams(&params);
+  max_block_size = (size_t)1 << params.lgblock;
+
+  BrotliInitMemoryManager(m, 0, 0, 0);
+
+  BROTLI_DCHECK(input_size <= mask + 1);
+  EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits);
+  InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, &params,
+      0, hasher_eff_size, BROTLI_TRUE);
+  if (BROTLI_IS_OOM(m)) goto oom;
+
+  while (ok && metablock_start < input_size) {
+    const size_t metablock_end =
+        BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
+    const size_t expected_num_commands =
+        (metablock_end - metablock_start) / 12 + 16;
+    Command* commands = 0;
+    size_t num_commands = 0;
+    size_t last_insert_len = 0;
+    size_t num_literals = 0;
+    size_t metablock_size = 0;
+    size_t cmd_alloc_size = 0;
+    BROTLI_BOOL is_last;
+    uint8_t* storage;
+    size_t storage_ix;
+
+    ContextType literal_context_mode = ChooseContextMode(&params,
+        input_buffer, metablock_start, mask, metablock_end - metablock_start);
+
+    size_t block_start;
+    for (block_start = metablock_start; block_start < metablock_end; ) {
+      size_t block_size =
+          BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);
+      ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);
+      size_t path_size;
+      size_t new_cmd_alloc_size;
+      if (BROTLI_IS_OOM(m)) goto oom;
+      BrotliInitZopfliNodes(nodes, block_size + 1);
+      StitchToPreviousBlockH10(hasher, block_size, block_start,
+                               input_buffer, mask);
+      path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
+          input_buffer, mask, &params, dist_cache, hasher,
+          nodes);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      /* We allocate a command buffer in the first iteration of this loop that
+         will be likely big enough for the whole metablock, so that for most
+         inputs we will not have to reallocate in later iterations. We do the
+         allocation here and not before the loop, because if the input is small,
+         this will be allocated after the Zopfli cost model is freed, so this
+         will not increase peak memory usage.
+         TODO: If the first allocation is too small, increase command
+         buffer size exponentially. */
+      new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,
+                                      num_commands + path_size + 1);
+      if (cmd_alloc_size != new_cmd_alloc_size) {
+        Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);
+        if (BROTLI_IS_OOM(m)) goto oom;
+        cmd_alloc_size = new_cmd_alloc_size;
+        if (commands) {
+          memcpy(new_commands, commands, sizeof(Command) * num_commands);
+          BROTLI_FREE(m, commands);
+        }
+        commands = new_commands;
+      }
+      BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache,
+          &last_insert_len, &params, &commands[num_commands], &num_literals);
+      num_commands += path_size;
+      block_start += block_size;
+      metablock_size += block_size;
+      BROTLI_FREE(m, nodes);
+      if (num_literals > max_literals_per_metablock ||
+          num_commands > max_commands_per_metablock) {
+        break;
+      }
+    }
+
+    if (last_insert_len > 0) {
+      InitInsertCommand(&commands[num_commands++], last_insert_len);
+      num_literals += last_insert_len;
+    }
+
+    is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
+    storage = NULL;
+    storage_ix = last_bytes_bits;
+
+    if (metablock_size == 0) {
+      /* Write the ISLAST and ISEMPTY bits. */
+      storage = BROTLI_ALLOC(m, uint8_t, 16);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      storage[0] = (uint8_t)last_bytes;
+      storage[1] = (uint8_t)(last_bytes >> 8);
+      BrotliWriteBits(2, 3, &storage_ix, storage);
+      storage_ix = (storage_ix + 7u) & ~7u;
+    } else if (!ShouldCompress(input_buffer, mask, metablock_start,
+                               metablock_size, num_literals, num_commands)) {
+      /* Restore the distance cache, as its last update by
+         CreateBackwardReferences is now unused. */
+      memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+      storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      storage[0] = (uint8_t)last_bytes;
+      storage[1] = (uint8_t)(last_bytes >> 8);
+      BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
+                                       metablock_start, mask, metablock_size,
+                                       &storage_ix, storage);
+    } else {
+      MetaBlockSplit mb;
+      BrotliEncoderParams block_params = params;
+      InitMetaBlockSplit(&mb);
+      BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,
+                           &block_params,
+                           prev_byte, prev_byte2,
+                           commands, num_commands,
+                           literal_context_mode,
+                           &mb);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      {
+        /* The number of distance symbols effectively used for distance
+           histograms. It might be less than distance alphabet size
+           for "Large Window Brotli" (32-bit). */
+        uint32_t num_effective_dist_codes = block_params.dist.alphabet_size;
+        if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
+          num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
+        }
+        BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
+      }
+      storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      storage[0] = (uint8_t)last_bytes;
+      storage[1] = (uint8_t)(last_bytes >> 8);
+      BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
+                           mask, prev_byte, prev_byte2,
+                           is_last,
+                           &block_params,
+                           literal_context_mode,
+                           commands, num_commands,
+                           &mb,
+                           &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      if (metablock_size + 4 < (storage_ix >> 3)) {
+        /* Restore the distance cache and last byte. */
+        memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
+        storage[0] = (uint8_t)last_bytes;
+        storage[1] = (uint8_t)(last_bytes >> 8);
+        storage_ix = last_bytes_bits;
+        BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
+                                         metablock_start, mask,
+                                         metablock_size, &storage_ix, storage);
+      }
+      DestroyMetaBlockSplit(m, &mb);
+    }
+    last_bytes = (uint16_t)(storage[storage_ix >> 3]);
+    last_bytes_bits = storage_ix & 7u;
+    metablock_start += metablock_size;
+    if (metablock_start < input_size) {
+      prev_byte = input_buffer[metablock_start - 1];
+      prev_byte2 = input_buffer[metablock_start - 2];
+    }
+    /* Save the state of the distance cache in case we need to restore it for
+       emitting an uncompressed block. */
+    memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
+
+    {
+      const size_t out_size = storage_ix >> 3;
+      total_out_size += out_size;
+      if (total_out_size <= max_out_size) {
+        memcpy(encoded_buffer, storage, out_size);
+        encoded_buffer += out_size;
+      } else {
+        ok = BROTLI_FALSE;
+      }
+    }
+    BROTLI_FREE(m, storage);
+    BROTLI_FREE(m, commands);
+  }
+
+  *encoded_size = total_out_size;
+  DestroyHasher(m, &hasher);
+  return ok;
+
+oom:
+  BrotliWipeOutMemoryManager(m);
+  return BROTLI_FALSE;
+}
+
+size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
+  /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
+  size_t num_large_blocks = input_size >> 14;
+  size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
+  size_t result = input_size + overhead;
+  if (input_size == 0) return 2;
+  return (result < input_size) ? 0 : result;
+}
+
+/* Wraps data to uncompressed brotli stream with minimal window size.
+   |output| should point at region with at least BrotliEncoderMaxCompressedSize
+   addressable bytes.
+   Returns the length of stream. */
+static size_t MakeUncompressedStream(
+    const uint8_t* input, size_t input_size, uint8_t* output) {
+  size_t size = input_size;
+  size_t result = 0;
+  size_t offset = 0;
+  if (input_size == 0) {
+    output[0] = 6;
+    return 1;
+  }
+  output[result++] = 0x21;  /* window bits = 10, is_last = false */
+  output[result++] = 0x03;  /* empty metadata, padding */
+  while (size > 0) {
+    uint32_t nibbles = 0;
+    uint32_t chunk_size;
+    uint32_t bits;
+    chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
+    if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
+    bits =
+        (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
+    output[result++] = (uint8_t)bits;
+    output[result++] = (uint8_t)(bits >> 8);
+    output[result++] = (uint8_t)(bits >> 16);
+    if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
+    memcpy(&output[result], &input[offset], chunk_size);
+    result += chunk_size;
+    offset += chunk_size;
+    size -= chunk_size;
+  }
+  output[result++] = 3;
+  return result;
+}
+
+BROTLI_BOOL BrotliEncoderCompress(
+    int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
+    const uint8_t* input_buffer, size_t* encoded_size,
+    uint8_t* encoded_buffer) {
+  BrotliEncoderState* s;
+  size_t out_size = *encoded_size;
+  const uint8_t* input_start = input_buffer;
+  uint8_t* output_start = encoded_buffer;
+  size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
+  if (out_size == 0) {
+    /* Output buffer needs at least one byte. */
+    return BROTLI_FALSE;
+  }
+  if (input_size == 0) {
+    /* Handle the special case of empty input. */
+    *encoded_size = 1;
+    *encoded_buffer = 6;
+    return BROTLI_TRUE;
+  }
+  if (quality == 10) {
+    /* TODO: Implement this direct path for all quality levels. */
+    const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
+                                       BROTLI_MAX(int, 16, lgwin));
+    int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
+                                           encoded_size, encoded_buffer);
+    if (!ok || (max_out_size && *encoded_size > max_out_size)) {
+      goto fallback;
+    }
+    return BROTLI_TRUE;
+  }
+
+  s = BrotliEncoderCreateInstance(0, 0, 0);
+  if (!s) {
+    return BROTLI_FALSE;
+  } else {
+    size_t available_in = input_size;
+    const uint8_t* next_in = input_buffer;
+    size_t available_out = *encoded_size;
+    uint8_t* next_out = encoded_buffer;
+    size_t total_out = 0;
+    BROTLI_BOOL result = BROTLI_FALSE;
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
+    BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
+    if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+      BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
+    }
+    result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
+        &available_in, &next_in, &available_out, &next_out, &total_out);
+    if (!BrotliEncoderIsFinished(s)) result = 0;
+    *encoded_size = total_out;
+    BrotliEncoderDestroyInstance(s);
+    if (!result || (max_out_size && *encoded_size > max_out_size)) {
+      goto fallback;
+    }
+    return BROTLI_TRUE;
+  }
+fallback:
+  *encoded_size = 0;
+  if (!max_out_size) return BROTLI_FALSE;
+  if (out_size >= max_out_size) {
+    *encoded_size =
+        MakeUncompressedStream(input_start, input_size, output_start);
+    return BROTLI_TRUE;
+  }
+  return BROTLI_FALSE;
+}
+
+static void InjectBytePaddingBlock(BrotliEncoderState* s) {
+  uint32_t seal = s->last_bytes_;
+  size_t seal_bits = s->last_bytes_bits_;
+  uint8_t* destination;
+  s->last_bytes_ = 0;
+  s->last_bytes_bits_ = 0;
+  /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
+  seal |= 0x6u << seal_bits;
+  seal_bits += 6;
+  /* If we have already created storage, then append to it.
+     Storage is valid until next block is being compressed. */
+  if (s->next_out_) {
+    destination = s->next_out_ + s->available_out_;
+  } else {
+    destination = s->tiny_buf_.u8;
+    s->next_out_ = destination;
+  }
+  destination[0] = (uint8_t)seal;
+  if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
+  if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
+  s->available_out_ += (seal_bits + 7) >> 3;
+}
+
+/* Injects padding bits or pushes compressed data to output.
+   Returns false if nothing is done. */
+static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
+    size_t* available_out, uint8_t** next_out, size_t* total_out) {
+  if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
+      s->last_bytes_bits_ != 0) {
+    InjectBytePaddingBlock(s);
+    return BROTLI_TRUE;
+  }
+
+  if (s->available_out_ != 0 && *available_out != 0) {
+    size_t copy_output_size =
+        BROTLI_MIN(size_t, s->available_out_, *available_out);
+    memcpy(*next_out, s->next_out_, copy_output_size);
+    *next_out += copy_output_size;
+    *available_out -= copy_output_size;
+    s->next_out_ += copy_output_size;
+    s->available_out_ -= copy_output_size;
+    s->total_out_ += copy_output_size;
+    if (total_out) *total_out = s->total_out_;
+    return BROTLI_TRUE;
+  }
+
+  return BROTLI_FALSE;
+}
+
+static void CheckFlushComplete(BrotliEncoderState* s) {
+  if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
+      s->available_out_ == 0) {
+    s->stream_state_ = BROTLI_STREAM_PROCESSING;
+    s->next_out_ = 0;
+  }
+}
+
+static BROTLI_BOOL BrotliEncoderCompressStreamFast(
+    BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
+    size_t* total_out) {
+  const size_t block_size_limit = (size_t)1 << s->params.lgwin;
+  const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
+      BROTLI_MIN(size_t, *available_in, block_size_limit));
+  uint32_t* tmp_command_buf = NULL;
+  uint32_t* command_buf = NULL;
+  uint8_t* tmp_literal_buf = NULL;
+  uint8_t* literal_buf = NULL;
+  MemoryManager* m = &s->memory_manager_;
+  if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
+      s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    return BROTLI_FALSE;
+  }
+  if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
+      s->command_buf_ =
+          BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
+      s->literal_buf_ =
+          BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    }
+    if (s->command_buf_) {
+      command_buf = s->command_buf_;
+      literal_buf = s->literal_buf_;
+    } else {
+      tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
+      tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      command_buf = tmp_command_buf;
+      literal_buf = tmp_literal_buf;
+    }
+  }
+
+  while (BROTLI_TRUE) {
+    if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
+      continue;
+    }
+
+    /* Compress block only when internal output buffer is empty, stream is not
+       finished, there is no pending flush request, and there is either
+       additional input or pending operation. */
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_PROCESSING &&
+        (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
+      size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
+      BROTLI_BOOL is_last =
+          (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
+      BROTLI_BOOL force_flush =
+          (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
+      size_t max_out_size = 2 * block_size + 503;
+      BROTLI_BOOL inplace = BROTLI_TRUE;
+      uint8_t* storage = NULL;
+      size_t storage_ix = s->last_bytes_bits_;
+      size_t table_size;
+      int* table;
+
+      if (force_flush && block_size == 0) {
+        s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+        continue;
+      }
+      if (max_out_size <= *available_out) {
+        storage = *next_out;
+      } else {
+        inplace = BROTLI_FALSE;
+        storage = GetBrotliStorage(s, max_out_size);
+        if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      }
+      storage[0] = (uint8_t)s->last_bytes_;
+      storage[1] = (uint8_t)(s->last_bytes_ >> 8);
+      table = GetHashTable(s, s->params.quality, block_size, &table_size);
+      if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+
+      if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
+        BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table,
+            table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_,
+            s->cmd_code_, &storage_ix, storage);
+        if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      } else {
+        BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last,
+            command_buf, literal_buf, table, table_size,
+            &storage_ix, storage);
+        if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+      }
+      *next_in += block_size;
+      *available_in -= block_size;
+      if (inplace) {
+        size_t out_bytes = storage_ix >> 3;
+        BROTLI_DCHECK(out_bytes <= *available_out);
+        BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
+        *next_out += out_bytes;
+        *available_out -= out_bytes;
+        s->total_out_ += out_bytes;
+        if (total_out) *total_out = s->total_out_;
+      } else {
+        size_t out_bytes = storage_ix >> 3;
+        s->next_out_ = storage;
+        s->available_out_ = out_bytes;
+      }
+      s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+      s->last_bytes_bits_ = storage_ix & 7u;
+
+      if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+      if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
+      continue;
+    }
+    break;
+  }
+  BROTLI_FREE(m, tmp_command_buf);
+  BROTLI_FREE(m, tmp_literal_buf);
+  CheckFlushComplete(s);
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProcessMetadata(
+    BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
+    size_t* available_out, uint8_t** next_out, size_t* total_out) {
+  if (*available_in > (1u << 24)) return BROTLI_FALSE;
+  /* Switch to metadata block workflow, if required. */
+  if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
+    s->remaining_metadata_bytes_ = (uint32_t)*available_in;
+    s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
+  }
+  if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
+      s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
+    return BROTLI_FALSE;
+  }
+
+  while (BROTLI_TRUE) {
+    if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
+      continue;
+    }
+    if (s->available_out_ != 0) break;
+
+    if (s->input_pos_ != s->last_flush_pos_) {
+      BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
+          &s->available_out_, &s->next_out_);
+      if (!result) return BROTLI_FALSE;
+      continue;
+    }
+
+    if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
+      s->next_out_ = s->tiny_buf_.u8;
+      s->available_out_ =
+          WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
+      s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
+      continue;
+    } else {
+      /* Exit workflow only when there is no more input and no more output.
+         Otherwise client may continue producing empty metadata blocks. */
+      if (s->remaining_metadata_bytes_ == 0) {
+        s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
+        s->stream_state_ = BROTLI_STREAM_PROCESSING;
+        break;
+      }
+      if (*available_out) {
+        /* Directly copy input to output. */
+        uint32_t copy = (uint32_t)BROTLI_MIN(
+            size_t, s->remaining_metadata_bytes_, *available_out);
+        memcpy(*next_out, *next_in, copy);
+        *next_in += copy;
+        *available_in -= copy;
+        s->remaining_metadata_bytes_ -= copy;
+        *next_out += copy;
+        *available_out -= copy;
+      } else {
+        /* This guarantees progress in "TakeOutput" workflow. */
+        uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
+        s->next_out_ = s->tiny_buf_.u8;
+        memcpy(s->next_out_, *next_in, copy);
+        *next_in += copy;
+        *available_in -= copy;
+        s->remaining_metadata_bytes_ -= copy;
+        s->available_out_ = copy;
+      }
+      continue;
+    }
+  }
+
+  return BROTLI_TRUE;
+}
+
+static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
+  if (s->params.size_hint == 0) {
+    uint64_t delta = UnprocessedInputSize(s);
+    uint64_t tail = available_in;
+    uint32_t limit = 1u << 30;
+    uint32_t total;
+    if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
+      total = limit;
+    } else {
+      total = (uint32_t)(delta + tail);
+    }
+    s->params.size_hint = total;
+  }
+}
+
+BROTLI_BOOL BrotliEncoderCompressStream(
+    BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out,uint8_t** next_out,
+    size_t* total_out) {
+  if (!EnsureInitialized(s)) return BROTLI_FALSE;
+
+  /* Unfinished metadata block; check requirements. */
+  if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
+    if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
+    if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
+  }
+
+  if (op == BROTLI_OPERATION_EMIT_METADATA) {
+    UpdateSizeHint(s, 0);  /* First data metablock might be emitted here. */
+    return ProcessMetadata(
+        s, available_in, next_in, available_out, next_out, total_out);
+  }
+
+  if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
+      s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
+    return BROTLI_FALSE;
+  }
+
+  if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
+    return BROTLI_FALSE;
+  }
+  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
+        available_out, next_out, total_out);
+  }
+  while (BROTLI_TRUE) {
+    size_t remaining_block_size = RemainingInputBlockSize(s);
+
+    if (remaining_block_size != 0 && *available_in != 0) {
+      size_t copy_input_size =
+          BROTLI_MIN(size_t, remaining_block_size, *available_in);
+      CopyInputToRingBuffer(s, copy_input_size, *next_in);
+      *next_in += copy_input_size;
+      *available_in -= copy_input_size;
+      continue;
+    }
+
+    if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
+      continue;
+    }
+
+    /* Compress data only when internal output buffer is empty, stream is not
+       finished and there is no pending flush request. */
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_PROCESSING) {
+      if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
+        BROTLI_BOOL is_last = TO_BROTLI_BOOL(
+            (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
+        BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
+            (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
+        BROTLI_BOOL result;
+        UpdateSizeHint(s, *available_in);
+        result = EncodeData(s, is_last, force_flush,
+            &s->available_out_, &s->next_out_);
+        if (!result) return BROTLI_FALSE;
+        if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+        if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
+        continue;
+      }
+    }
+    break;
+  }
+  CheckFlushComplete(s);
+  return BROTLI_TRUE;
+}
+
+BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
+  return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
+      !BrotliEncoderHasMoreOutput(s));
+}
+
+BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
+  return TO_BROTLI_BOOL(s->available_out_ != 0);
+}
+
+const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
+  size_t consumed_size = s->available_out_;
+  uint8_t* result = s->next_out_;
+  if (*size) {
+    consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
+  }
+  if (consumed_size) {
+    s->next_out_ += consumed_size;
+    s->available_out_ -= consumed_size;
+    s->total_out_ += consumed_size;
+    CheckFlushComplete(s);
+    *size = consumed_size;
+  } else {
+    *size = 0;
+    result = 0;
+  }
+  return result;
+}
+
+uint32_t BrotliEncoderVersion(void) {
+  return BROTLI_VERSION;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/encoder_dict.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/encoder_dict.c
new file mode 100755
index 0000000000..8b2f6ad4a4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/encoder_dict.c
@@ -0,0 +1,32 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./encoder_dict.h"
+
+#include "../common/dictionary.h"
+#include "../common/transform.h"
+#include "./dictionary_hash.h"
+#include "./hash.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
+  dict->words = BrotliGetDictionary();
+
+  dict->hash_table = kStaticDictionaryHash;
+  dict->buckets = kStaticDictionaryBuckets;
+  dict->dict_words = kStaticDictionaryWords;
+
+  dict->cutoffTransformsCount = kCutoffTransformsCount;
+  dict->cutoffTransforms = kCutoffTransforms;
+
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/encoder_dict.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/encoder_dict.h
new file mode 100755
index 0000000000..3cb6b0ac15
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/encoder_dict.h
@@ -0,0 +1,41 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#ifndef BROTLI_ENC_ENCODER_DICT_H_
+#define BROTLI_ENC_ENCODER_DICT_H_
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./static_dict_lut.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Dictionary data (words and transforms) for 1 possible context */
+typedef struct BrotliEncoderDictionary {
+  const BrotliDictionary* words;
+
+  /* cut off for fast encoder */
+  uint32_t cutoffTransformsCount;
+  uint64_t cutoffTransforms;
+
+  /* from dictionary_hash.h, for fast encoder */
+  const uint16_t* hash_table;
+
+  /* from static_dict_lut.h, for slow encoder */
+  const uint16_t* buckets;
+  const DictWord* dict_words;
+} BrotliEncoderDictionary;
+
+BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENCODER_DICT_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode.c
new file mode 100755
index 0000000000..97f9dfb82a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode.c
@@ -0,0 +1,501 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Entropy encoding (Huffman) utilities. */
+
+#include "./entropy_encode.h"
+
+#include <string.h>  /* memset */
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+BROTLI_BOOL BrotliSetDepth(
+    int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
+  int stack[16];
+  int level = 0;
+  int p = p0;
+  BROTLI_DCHECK(max_depth <= 15);
+  stack[0] = -1;
+  while (BROTLI_TRUE) {
+    if (pool[p].index_left_ >= 0) {
+      level++;
+      if (level > max_depth) return BROTLI_FALSE;
+      stack[level] = pool[p].index_right_or_value_;
+      p = pool[p].index_left_;
+      continue;
+    } else {
+      depth[pool[p].index_right_or_value_] = (uint8_t)level;
+    }
+    while (level >= 0 && stack[level] == -1) level--;
+    if (level < 0) return BROTLI_TRUE;
+    p = stack[level];
+    stack[level] = -1;
+  }
+}
+
+/* Sort the root nodes, least popular first. */
+static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
+    const HuffmanTree* v0, const HuffmanTree* v1) {
+  if (v0->total_count_ != v1->total_count_) {
+    return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
+  }
+  return TO_BROTLI_BOOL(v0->index_right_or_value_ > v1->index_right_or_value_);
+}
+
+/* This function will create a Huffman tree.
+
+   The catch here is that the tree cannot be arbitrarily deep.
+   Brotli specifies a maximum depth of 15 bits for "code trees"
+   and 7 bits for "code length code trees."
+
+   count_limit is the value that is to be faked as the minimum value
+   and this minimum value is raised until the tree matches the
+   maximum length requirement.
+
+   This algorithm is not of excellent performance for very long data blocks,
+   especially when population counts are longer than 2**tree_limit, but
+   we are not planning to use this with extremely long blocks.
+
+   See http://en.wikipedia.org/wiki/Huffman_coding */
+void BrotliCreateHuffmanTree(const uint32_t* data,
+                             const size_t length,
+                             const int tree_limit,
+                             HuffmanTree* tree,
+                             uint8_t* depth) {
+  uint32_t count_limit;
+  HuffmanTree sentinel;
+  InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
+  /* For block sizes below 64 kB, we never need to do a second iteration
+     of this loop. Probably all of our block sizes will be smaller than
+     that, so this loop is mostly of academic interest. If we actually
+     would need this, we would be better off with the Katajainen algorithm. */
+  for (count_limit = 1; ; count_limit *= 2) {
+    size_t n = 0;
+    size_t i;
+    size_t j;
+    size_t k;
+    for (i = length; i != 0;) {
+      --i;
+      if (data[i]) {
+        const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
+        InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
+      }
+    }
+
+    if (n == 1) {
+      depth[tree[0].index_right_or_value_] = 1;  /* Only one element. */
+      break;
+    }
+
+    SortHuffmanTreeItems(tree, n, SortHuffmanTree);
+
+    /* The nodes are:
+       [0, n): the sorted leaf nodes that we start with.
+       [n]: we add a sentinel here.
+       [n + 1, 2n): new parent nodes are added here, starting from
+                    (n+1). These are naturally in ascending order.
+       [2n]: we add a sentinel at the end as well.
+       There will be (2n+1) elements at the end. */
+    tree[n] = sentinel;
+    tree[n + 1] = sentinel;
+
+    i = 0;      /* Points to the next leaf node. */
+    j = n + 1;  /* Points to the next non-leaf node. */
+    for (k = n - 1; k != 0; --k) {
+      size_t left, right;
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      {
+        /* The sentinel node becomes the parent node. */
+        size_t j_end = 2 * n - k;
+        tree[j_end].total_count_ =
+            tree[left].total_count_ + tree[right].total_count_;
+        tree[j_end].index_left_ = (int16_t)left;
+        tree[j_end].index_right_or_value_ = (int16_t)right;
+
+        /* Add back the last sentinel node. */
+        tree[j_end + 1] = sentinel;
+      }
+    }
+    if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
+      /* We need to pack the Huffman tree in tree_limit bits. If this was not
+         successful, add fake entities to the lowest values and retry. */
+      break;
+    }
+  }
+}
+
+static void Reverse(uint8_t* v, size_t start, size_t end) {
+  --end;
+  while (start < end) {
+    uint8_t tmp = v[start];
+    v[start] = v[end];
+    v[end] = tmp;
+    ++start;
+    --end;
+  }
+}
+
+static void BrotliWriteHuffmanTreeRepetitions(
+    const uint8_t previous_value,
+    const uint8_t value,
+    size_t repetitions,
+    size_t* tree_size,
+    uint8_t* tree,
+    uint8_t* extra_bits_data) {
+  BROTLI_DCHECK(repetitions > 0);
+  if (previous_value != value) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions == 7) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = value;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    size_t start = *tree_size;
+    repetitions -= 3;
+    while (BROTLI_TRUE) {
+      tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
+      extra_bits_data[*tree_size] = repetitions & 0x3;
+      ++(*tree_size);
+      repetitions >>= 2;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+static void BrotliWriteHuffmanTreeRepetitionsZeros(
+    size_t repetitions,
+    size_t* tree_size,
+    uint8_t* tree,
+    uint8_t* extra_bits_data) {
+  if (repetitions == 11) {
+    tree[*tree_size] = 0;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = 0;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    size_t start = *tree_size;
+    repetitions -= 3;
+    while (BROTLI_TRUE) {
+      tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
+      extra_bits_data[*tree_size] = repetitions & 0x7;
+      ++(*tree_size);
+      repetitions >>= 3;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
+                                       uint8_t* good_for_rle) {
+  size_t nonzero_count = 0;
+  size_t stride;
+  size_t limit;
+  size_t sum;
+  const size_t streak_limit = 1240;
+  /* Let's make the Huffman code more compatible with RLE encoding. */
+  size_t i;
+  for (i = 0; i < length; i++) {
+    if (counts[i]) {
+      ++nonzero_count;
+    }
+  }
+  if (nonzero_count < 16) {
+    return;
+  }
+  while (length != 0 && counts[length - 1] == 0) {
+    --length;
+  }
+  if (length == 0) {
+    return;  /* All zeros. */
+  }
+  /* Now counts[0..length - 1] does not have trailing zeros. */
+  {
+    size_t nonzeros = 0;
+    uint32_t smallest_nonzero = 1 << 30;
+    for (i = 0; i < length; ++i) {
+      if (counts[i] != 0) {
+        ++nonzeros;
+        if (smallest_nonzero > counts[i]) {
+          smallest_nonzero = counts[i];
+        }
+      }
+    }
+    if (nonzeros < 5) {
+      /* Small histogram will model it well. */
+      return;
+    }
+    if (smallest_nonzero < 4) {
+      size_t zeros = length - nonzeros;
+      if (zeros < 6) {
+        for (i = 1; i < length - 1; ++i) {
+          if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
+            counts[i] = 1;
+          }
+        }
+      }
+    }
+    if (nonzeros < 28) {
+      return;
+    }
+  }
+  /* 2) Let's mark all population counts that already can be encoded
+     with an RLE code. */
+  memset(good_for_rle, 0, length);
+  {
+    /* Let's not spoil any of the existing good RLE codes.
+       Mark any seq of 0's that is longer as 5 as a good_for_rle.
+       Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
+    uint32_t symbol = counts[0];
+    size_t step = 0;
+    for (i = 0; i <= length; ++i) {
+      if (i == length || counts[i] != symbol) {
+        if ((symbol == 0 && step >= 5) ||
+            (symbol != 0 && step >= 7)) {
+          size_t k;
+          for (k = 0; k < step; ++k) {
+            good_for_rle[i - k - 1] = 1;
+          }
+        }
+        step = 1;
+        if (i != length) {
+          symbol = counts[i];
+        }
+      } else {
+        ++step;
+      }
+    }
+  }
+  /* 3) Let's replace those population counts that lead to more RLE codes.
+     Math here is in 24.8 fixed point representation. */
+  stride = 0;
+  limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
+  sum = 0;
+  for (i = 0; i <= length; ++i) {
+    if (i == length || good_for_rle[i] ||
+        (i != 0 && good_for_rle[i - 1]) ||
+        (256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
+      if (stride >= 4 || (stride >= 3 && sum == 0)) {
+        size_t k;
+        /* The stride must end, collapse what we have, if we have enough (4). */
+        size_t count = (sum + stride / 2) / stride;
+        if (count == 0) {
+          count = 1;
+        }
+        if (sum == 0) {
+          /* Don't make an all zeros stride to be upgraded to ones. */
+          count = 0;
+        }
+        for (k = 0; k < stride; ++k) {
+          /* We don't want to change value at counts[i],
+             that is already belonging to the next stride. Thus - 1. */
+          counts[i - k - 1] = (uint32_t)count;
+        }
+      }
+      stride = 0;
+      sum = 0;
+      if (i < length - 2) {
+        /* All interesting strides have a count of at least 4, */
+        /* at least when non-zeros. */
+        limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
+      } else if (i < length) {
+        limit = 256 * counts[i];
+      } else {
+        limit = 0;
+      }
+    }
+    ++stride;
+    if (i != length) {
+      sum += counts[i];
+      if (stride >= 4) {
+        limit = (256 * sum + stride / 2) / stride;
+      }
+      if (stride == 4) {
+        limit += 120;
+      }
+    }
+  }
+}
+
+static void DecideOverRleUse(const uint8_t* depth, const size_t length,
+                             BROTLI_BOOL* use_rle_for_non_zero,
+                             BROTLI_BOOL* use_rle_for_zero) {
+  size_t total_reps_zero = 0;
+  size_t total_reps_non_zero = 0;
+  size_t count_reps_zero = 1;
+  size_t count_reps_non_zero = 1;
+  size_t i;
+  for (i = 0; i < length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    size_t k;
+    for (k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    if (reps >= 3 && value == 0) {
+      total_reps_zero += reps;
+      ++count_reps_zero;
+    }
+    if (reps >= 4 && value != 0) {
+      total_reps_non_zero += reps;
+      ++count_reps_non_zero;
+    }
+    i += reps;
+  }
+  *use_rle_for_non_zero =
+      TO_BROTLI_BOOL(total_reps_non_zero > count_reps_non_zero * 2);
+  *use_rle_for_zero = TO_BROTLI_BOOL(total_reps_zero > count_reps_zero * 2);
+}
+
+void BrotliWriteHuffmanTree(const uint8_t* depth,
+                            size_t length,
+                            size_t* tree_size,
+                            uint8_t* tree,
+                            uint8_t* extra_bits_data) {
+  uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
+  size_t i;
+  BROTLI_BOOL use_rle_for_non_zero = BROTLI_FALSE;
+  BROTLI_BOOL use_rle_for_zero = BROTLI_FALSE;
+
+  /* Throw away trailing zeros. */
+  size_t new_length = length;
+  for (i = 0; i < length; ++i) {
+    if (depth[length - i - 1] == 0) {
+      --new_length;
+    } else {
+      break;
+    }
+  }
+
+  /* First gather statistics on if it is a good idea to do RLE. */
+  if (length > 50) {
+    /* Find RLE coding for longer codes.
+       Shorter codes seem not to benefit from RLE. */
+    DecideOverRleUse(depth, new_length,
+                     &use_rle_for_non_zero, &use_rle_for_zero);
+  }
+
+  /* Actual RLE coding. */
+  for (i = 0; i < new_length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    if ((value != 0 && use_rle_for_non_zero) ||
+        (value == 0 && use_rle_for_zero)) {
+      size_t k;
+      for (k = i + 1; k < new_length && depth[k] == value; ++k) {
+        ++reps;
+      }
+    }
+    if (value == 0) {
+      BrotliWriteHuffmanTreeRepetitionsZeros(
+          reps, tree_size, tree, extra_bits_data);
+    } else {
+      BrotliWriteHuffmanTreeRepetitions(previous_value,
+                                        value, reps, tree_size,
+                                        tree, extra_bits_data);
+      previous_value = value;
+    }
+    i += reps;
+  }
+}
+
+static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
+  static const size_t kLut[16] = {  /* Pre-reversed 4-bit values. */
+    0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E,
+    0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F
+  };
+  size_t retval = kLut[bits & 0x0F];
+  size_t i;
+  for (i = 4; i < num_bits; i += 4) {
+    retval <<= 4;
+    bits = (uint16_t)(bits >> 4);
+    retval |= kLut[bits & 0x0F];
+  }
+  retval >>= ((0 - num_bits) & 0x03);
+  return (uint16_t)retval;
+}
+
+/* 0..15 are values for bits */
+#define MAX_HUFFMAN_BITS 16
+
+void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
+                                     size_t len,
+                                     uint16_t* bits) {
+  /* In Brotli, all bit depths are [1..15]
+     0 bit depth means that the symbol does not exist. */
+  uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
+  uint16_t next_code[MAX_HUFFMAN_BITS];
+  size_t i;
+  int code = 0;
+  for (i = 0; i < len; ++i) {
+    ++bl_count[depth[i]];
+  }
+  bl_count[0] = 0;
+  next_code[0] = 0;
+  for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
+    code = (code + bl_count[i - 1]) << 1;
+    next_code[i] = (uint16_t)code;
+  }
+  for (i = 0; i < len; ++i) {
+    if (depth[i]) {
+      bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode.h
new file mode 100755
index 0000000000..f23d9c379d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode.h
@@ -0,0 +1,122 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Entropy encoding (Huffman) utilities. */
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* A node of a Huffman tree. */
+typedef struct HuffmanTree {
+  uint32_t total_count_;
+  int16_t index_left_;
+  int16_t index_right_or_value_;
+} HuffmanTree;
+
+static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
+    int16_t left, int16_t right) {
+  self->total_count_ = count;
+  self->index_left_ = left;
+  self->index_right_or_value_ = right;
+}
+
+/* Returns 1 is assignment of depths succeeded, otherwise 0. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
+    int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
+
+/* This function will create a Huffman tree.
+
+   The (data,length) contains the population counts.
+   The tree_limit is the maximum bit depth of the Huffman codes.
+
+   The depth contains the tree, i.e., how many bits are used for
+   the symbol.
+
+   The actual Huffman tree is constructed in the tree[] array, which has to
+   be at least 2 * length + 1 long.
+
+   See http://en.wikipedia.org/wiki/Huffman_coding */
+BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
+                                             const size_t length,
+                                             const int tree_limit,
+                                             HuffmanTree* tree,
+                                             uint8_t* depth);
+
+/* Change the population counts in a way that the consequent
+   Huffman tree compression, especially its RLE-part will be more
+   likely to compress this data more efficiently.
+
+   length contains the size of the histogram.
+   counts contains the population counts.
+   good_for_rle is a buffer of at least length size */
+BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
+    size_t length, uint32_t* counts, uint8_t* good_for_rle);
+
+/* Write a Huffman tree from bit depths into the bit-stream representation
+   of a Huffman tree. The generated Huffman tree is to be compressed once
+   more using a Huffman tree */
+BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
+                                            size_t num,
+                                            size_t* tree_size,
+                                            uint8_t* tree,
+                                            uint8_t* extra_bits_data);
+
+/* Get the actual bit values for a tree of bit depths. */
+BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
+                                                     size_t len,
+                                                     uint16_t* bits);
+
+/* Input size optimized Shell sort. */
+typedef BROTLI_BOOL (*HuffmanTreeComparator)(
+    const HuffmanTree*, const HuffmanTree*);
+static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
+    const size_t n, HuffmanTreeComparator comparator) {
+  static const size_t gaps[] = {132, 57, 23, 10, 4, 1};
+  if (n < 13) {
+    /* Insertion sort. */
+    size_t i;
+    for (i = 1; i < n; ++i) {
+      HuffmanTree tmp = items[i];
+      size_t k = i;
+      size_t j = i - 1;
+      while (comparator(&tmp, &items[j])) {
+        items[k] = items[j];
+        k = j;
+        if (!j--) break;
+      }
+      items[k] = tmp;
+    }
+    return;
+  } else {
+    /* Shell sort. */
+    int g = n < 57 ? 2 : 0;
+    for (; g < 6; ++g) {
+      size_t gap = gaps[g];
+      size_t i;
+      for (i = gap; i < n; ++i) {
+        size_t j = i;
+        HuffmanTree tmp = items[i];
+        for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
+          items[j] = items[j - gap];
+        }
+        items[j] = tmp;
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode_static.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode_static.h
new file mode 100755
index 0000000000..62b99a954c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/entropy_encode_static.h
@@ -0,0 +1,539 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Static entropy codes used for faster meta-block encoding. */
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./write_bits.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const uint8_t kCodeLengthDepth[18] = {
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
+};
+
+static const uint8_t kStaticCommandCodeDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+};
+
+static const uint8_t kStaticDistanceCodeDepth[64] = {
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+};
+
+static const uint32_t kCodeLengthBits[18] = {
+  0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
+};
+
+static BROTLI_INLINE void StoreStaticCodeLengthCode(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage);
+}
+
+static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
+  0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
+  0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
+  0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
+  0x00003b87, 0x00000397, 0x00000b97, 0x00001397, 0x00001b97, 0x00002397,
+  0x00002b97, 0x00003397, 0x00003b97, 0x000003a7, 0x00000ba7, 0x000013a7,
+  0x00001ba7, 0x000023a7, 0x00002ba7, 0x000033a7, 0x00003ba7, 0x000003b7,
+  0x00000bb7, 0x000013b7, 0x00001bb7, 0x000023b7, 0x00002bb7, 0x000033b7,
+  0x00003bb7, 0x000003c7, 0x00000bc7, 0x000013c7, 0x00001bc7, 0x000023c7,
+  0x00002bc7, 0x000033c7, 0x00003bc7, 0x000003d7, 0x00000bd7, 0x000013d7,
+  0x00001bd7, 0x000023d7, 0x00002bd7, 0x000033d7, 0x00003bd7, 0x000003e7,
+  0x00000be7, 0x000013e7, 0x00001be7, 0x000023e7, 0x00002be7, 0x000033e7,
+  0x00003be7, 0x000003f7, 0x00000bf7, 0x000013f7, 0x00001bf7, 0x000023f7,
+  0x00002bf7, 0x000033f7, 0x00003bf7, 0x0001c387, 0x0005c387, 0x0009c387,
+  0x000dc387, 0x0011c387, 0x0015c387, 0x0019c387, 0x001dc387, 0x0001cb87,
+  0x0005cb87, 0x0009cb87, 0x000dcb87, 0x0011cb87, 0x0015cb87, 0x0019cb87,
+  0x001dcb87, 0x0001d387, 0x0005d387, 0x0009d387, 0x000dd387, 0x0011d387,
+  0x0015d387, 0x0019d387, 0x001dd387, 0x0001db87, 0x0005db87, 0x0009db87,
+  0x000ddb87, 0x0011db87, 0x0015db87, 0x0019db87, 0x001ddb87, 0x0001e387,
+  0x0005e387, 0x0009e387, 0x000de387, 0x0011e387, 0x0015e387, 0x0019e387,
+  0x001de387, 0x0001eb87, 0x0005eb87, 0x0009eb87, 0x000deb87, 0x0011eb87,
+  0x0015eb87, 0x0019eb87, 0x001deb87, 0x0001f387, 0x0005f387, 0x0009f387,
+  0x000df387, 0x0011f387, 0x0015f387, 0x0019f387, 0x001df387, 0x0001fb87,
+  0x0005fb87, 0x0009fb87, 0x000dfb87, 0x0011fb87, 0x0015fb87, 0x0019fb87,
+  0x001dfb87, 0x0001c397, 0x0005c397, 0x0009c397, 0x000dc397, 0x0011c397,
+  0x0015c397, 0x0019c397, 0x001dc397, 0x0001cb97, 0x0005cb97, 0x0009cb97,
+  0x000dcb97, 0x0011cb97, 0x0015cb97, 0x0019cb97, 0x001dcb97, 0x0001d397,
+  0x0005d397, 0x0009d397, 0x000dd397, 0x0011d397, 0x0015d397, 0x0019d397,
+  0x001dd397, 0x0001db97, 0x0005db97, 0x0009db97, 0x000ddb97, 0x0011db97,
+  0x0015db97, 0x0019db97, 0x001ddb97, 0x0001e397, 0x0005e397, 0x0009e397,
+  0x000de397, 0x0011e397, 0x0015e397, 0x0019e397, 0x001de397, 0x0001eb97,
+  0x0005eb97, 0x0009eb97, 0x000deb97, 0x0011eb97, 0x0015eb97, 0x0019eb97,
+  0x001deb97, 0x0001f397, 0x0005f397, 0x0009f397, 0x000df397, 0x0011f397,
+  0x0015f397, 0x0019f397, 0x001df397, 0x0001fb97, 0x0005fb97, 0x0009fb97,
+  0x000dfb97, 0x0011fb97, 0x0015fb97, 0x0019fb97, 0x001dfb97, 0x0001c3a7,
+  0x0005c3a7, 0x0009c3a7, 0x000dc3a7, 0x0011c3a7, 0x0015c3a7, 0x0019c3a7,
+  0x001dc3a7, 0x0001cba7, 0x0005cba7, 0x0009cba7, 0x000dcba7, 0x0011cba7,
+  0x0015cba7, 0x0019cba7, 0x001dcba7, 0x0001d3a7, 0x0005d3a7, 0x0009d3a7,
+  0x000dd3a7, 0x0011d3a7, 0x0015d3a7, 0x0019d3a7, 0x001dd3a7, 0x0001dba7,
+  0x0005dba7, 0x0009dba7, 0x000ddba7, 0x0011dba7, 0x0015dba7, 0x0019dba7,
+  0x001ddba7, 0x0001e3a7, 0x0005e3a7, 0x0009e3a7, 0x000de3a7, 0x0011e3a7,
+  0x0015e3a7, 0x0019e3a7, 0x001de3a7, 0x0001eba7, 0x0005eba7, 0x0009eba7,
+  0x000deba7, 0x0011eba7, 0x0015eba7, 0x0019eba7, 0x001deba7, 0x0001f3a7,
+  0x0005f3a7, 0x0009f3a7, 0x000df3a7, 0x0011f3a7, 0x0015f3a7, 0x0019f3a7,
+  0x001df3a7, 0x0001fba7, 0x0005fba7, 0x0009fba7, 0x000dfba7, 0x0011fba7,
+  0x0015fba7, 0x0019fba7, 0x001dfba7, 0x0001c3b7, 0x0005c3b7, 0x0009c3b7,
+  0x000dc3b7, 0x0011c3b7, 0x0015c3b7, 0x0019c3b7, 0x001dc3b7, 0x0001cbb7,
+  0x0005cbb7, 0x0009cbb7, 0x000dcbb7, 0x0011cbb7, 0x0015cbb7, 0x0019cbb7,
+  0x001dcbb7, 0x0001d3b7, 0x0005d3b7, 0x0009d3b7, 0x000dd3b7, 0x0011d3b7,
+  0x0015d3b7, 0x0019d3b7, 0x001dd3b7, 0x0001dbb7, 0x0005dbb7, 0x0009dbb7,
+  0x000ddbb7, 0x0011dbb7, 0x0015dbb7, 0x0019dbb7, 0x001ddbb7, 0x0001e3b7,
+  0x0005e3b7, 0x0009e3b7, 0x000de3b7, 0x0011e3b7, 0x0015e3b7, 0x0019e3b7,
+  0x001de3b7, 0x0001ebb7, 0x0005ebb7, 0x0009ebb7, 0x000debb7, 0x0011ebb7,
+  0x0015ebb7, 0x0019ebb7, 0x001debb7, 0x0001f3b7, 0x0005f3b7, 0x0009f3b7,
+  0x000df3b7, 0x0011f3b7, 0x0015f3b7, 0x0019f3b7, 0x001df3b7, 0x0001fbb7,
+  0x0005fbb7, 0x0009fbb7, 0x000dfbb7, 0x0011fbb7, 0x0015fbb7, 0x0019fbb7,
+  0x001dfbb7, 0x0001c3c7, 0x0005c3c7, 0x0009c3c7, 0x000dc3c7, 0x0011c3c7,
+  0x0015c3c7, 0x0019c3c7, 0x001dc3c7, 0x0001cbc7, 0x0005cbc7, 0x0009cbc7,
+  0x000dcbc7, 0x0011cbc7, 0x0015cbc7, 0x0019cbc7, 0x001dcbc7, 0x0001d3c7,
+  0x0005d3c7, 0x0009d3c7, 0x000dd3c7, 0x0011d3c7, 0x0015d3c7, 0x0019d3c7,
+  0x001dd3c7, 0x0001dbc7, 0x0005dbc7, 0x0009dbc7, 0x000ddbc7, 0x0011dbc7,
+  0x0015dbc7, 0x0019dbc7, 0x001ddbc7, 0x0001e3c7, 0x0005e3c7, 0x0009e3c7,
+  0x000de3c7, 0x0011e3c7, 0x0015e3c7, 0x0019e3c7, 0x001de3c7, 0x0001ebc7,
+  0x0005ebc7, 0x0009ebc7, 0x000debc7, 0x0011ebc7, 0x0015ebc7, 0x0019ebc7,
+  0x001debc7, 0x0001f3c7, 0x0005f3c7, 0x0009f3c7, 0x000df3c7, 0x0011f3c7,
+  0x0015f3c7, 0x0019f3c7, 0x001df3c7, 0x0001fbc7, 0x0005fbc7, 0x0009fbc7,
+  0x000dfbc7, 0x0011fbc7, 0x0015fbc7, 0x0019fbc7, 0x001dfbc7, 0x0001c3d7,
+  0x0005c3d7, 0x0009c3d7, 0x000dc3d7, 0x0011c3d7, 0x0015c3d7, 0x0019c3d7,
+  0x001dc3d7, 0x0001cbd7, 0x0005cbd7, 0x0009cbd7, 0x000dcbd7, 0x0011cbd7,
+  0x0015cbd7, 0x0019cbd7, 0x001dcbd7, 0x0001d3d7, 0x0005d3d7, 0x0009d3d7,
+  0x000dd3d7, 0x0011d3d7, 0x0015d3d7, 0x0019d3d7, 0x001dd3d7, 0x0001dbd7,
+  0x0005dbd7, 0x0009dbd7, 0x000ddbd7, 0x0011dbd7, 0x0015dbd7, 0x0019dbd7,
+  0x001ddbd7, 0x0001e3d7, 0x0005e3d7, 0x0009e3d7, 0x000de3d7, 0x0011e3d7,
+  0x0015e3d7, 0x0019e3d7, 0x001de3d7, 0x0001ebd7, 0x0005ebd7, 0x0009ebd7,
+  0x000debd7, 0x0011ebd7, 0x0015ebd7, 0x0019ebd7, 0x001debd7, 0x0001f3d7,
+  0x0005f3d7, 0x0009f3d7, 0x000df3d7, 0x0011f3d7, 0x0015f3d7, 0x0019f3d7,
+  0x001df3d7, 0x0001fbd7, 0x0005fbd7, 0x0009fbd7, 0x000dfbd7, 0x0011fbd7,
+  0x0015fbd7, 0x0019fbd7, 0x001dfbd7, 0x0001c3e7, 0x0005c3e7, 0x0009c3e7,
+  0x000dc3e7, 0x0011c3e7, 0x0015c3e7, 0x0019c3e7, 0x001dc3e7, 0x0001cbe7,
+  0x0005cbe7, 0x0009cbe7, 0x000dcbe7, 0x0011cbe7, 0x0015cbe7, 0x0019cbe7,
+  0x001dcbe7, 0x0001d3e7, 0x0005d3e7, 0x0009d3e7, 0x000dd3e7, 0x0011d3e7,
+  0x0015d3e7, 0x0019d3e7, 0x001dd3e7, 0x0001dbe7, 0x0005dbe7, 0x0009dbe7,
+  0x000ddbe7, 0x0011dbe7, 0x0015dbe7, 0x0019dbe7, 0x001ddbe7, 0x0001e3e7,
+  0x0005e3e7, 0x0009e3e7, 0x000de3e7, 0x0011e3e7, 0x0015e3e7, 0x0019e3e7,
+  0x001de3e7, 0x0001ebe7, 0x0005ebe7, 0x0009ebe7, 0x000debe7, 0x0011ebe7,
+  0x0015ebe7, 0x0019ebe7, 0x001debe7, 0x0001f3e7, 0x0005f3e7, 0x0009f3e7,
+  0x000df3e7, 0x0011f3e7, 0x0015f3e7, 0x0019f3e7, 0x001df3e7, 0x0001fbe7,
+  0x0005fbe7, 0x0009fbe7, 0x000dfbe7, 0x0011fbe7, 0x0015fbe7, 0x0019fbe7,
+  0x001dfbe7, 0x0001c3f7, 0x0005c3f7, 0x0009c3f7, 0x000dc3f7, 0x0011c3f7,
+  0x0015c3f7, 0x0019c3f7, 0x001dc3f7, 0x0001cbf7, 0x0005cbf7, 0x0009cbf7,
+  0x000dcbf7, 0x0011cbf7, 0x0015cbf7, 0x0019cbf7, 0x001dcbf7, 0x0001d3f7,
+  0x0005d3f7, 0x0009d3f7, 0x000dd3f7, 0x0011d3f7, 0x0015d3f7, 0x0019d3f7,
+  0x001dd3f7, 0x0001dbf7, 0x0005dbf7, 0x0009dbf7, 0x000ddbf7, 0x0011dbf7,
+  0x0015dbf7, 0x0019dbf7, 0x001ddbf7, 0x0001e3f7, 0x0005e3f7, 0x0009e3f7,
+  0x000de3f7, 0x0011e3f7, 0x0015e3f7, 0x0019e3f7, 0x001de3f7, 0x0001ebf7,
+  0x0005ebf7, 0x0009ebf7, 0x000debf7, 0x0011ebf7, 0x0015ebf7, 0x0019ebf7,
+  0x001debf7, 0x0001f3f7, 0x0005f3f7, 0x0009f3f7, 0x000df3f7, 0x0011f3f7,
+  0x0015f3f7, 0x0019f3f7, 0x001df3f7, 0x0001fbf7, 0x0005fbf7, 0x0009fbf7,
+  0x000dfbf7, 0x0011fbf7, 0x0015fbf7, 0x0019fbf7, 0x001dfbf7, 0x00e1c387,
+  0x02e1c387, 0x04e1c387, 0x06e1c387, 0x08e1c387, 0x0ae1c387, 0x0ce1c387,
+  0x0ee1c387, 0x00e5c387, 0x02e5c387, 0x04e5c387, 0x06e5c387, 0x08e5c387,
+  0x0ae5c387, 0x0ce5c387, 0x0ee5c387, 0x00e9c387, 0x02e9c387, 0x04e9c387,
+  0x06e9c387, 0x08e9c387, 0x0ae9c387, 0x0ce9c387, 0x0ee9c387, 0x00edc387,
+  0x02edc387, 0x04edc387, 0x06edc387, 0x08edc387, 0x0aedc387, 0x0cedc387,
+  0x0eedc387, 0x00f1c387, 0x02f1c387, 0x04f1c387, 0x06f1c387, 0x08f1c387,
+  0x0af1c387, 0x0cf1c387, 0x0ef1c387, 0x00f5c387, 0x02f5c387, 0x04f5c387,
+  0x06f5c387, 0x08f5c387, 0x0af5c387, 0x0cf5c387, 0x0ef5c387, 0x00f9c387,
+  0x02f9c387, 0x04f9c387, 0x06f9c387, 0x08f9c387, 0x0af9c387, 0x0cf9c387,
+  0x0ef9c387, 0x00fdc387, 0x02fdc387, 0x04fdc387, 0x06fdc387, 0x08fdc387,
+  0x0afdc387, 0x0cfdc387, 0x0efdc387, 0x00e1cb87, 0x02e1cb87, 0x04e1cb87,
+  0x06e1cb87, 0x08e1cb87, 0x0ae1cb87, 0x0ce1cb87, 0x0ee1cb87, 0x00e5cb87,
+  0x02e5cb87, 0x04e5cb87, 0x06e5cb87, 0x08e5cb87, 0x0ae5cb87, 0x0ce5cb87,
+  0x0ee5cb87, 0x00e9cb87, 0x02e9cb87, 0x04e9cb87, 0x06e9cb87, 0x08e9cb87,
+  0x0ae9cb87, 0x0ce9cb87, 0x0ee9cb87, 0x00edcb87, 0x02edcb87, 0x04edcb87,
+  0x06edcb87, 0x08edcb87, 0x0aedcb87, 0x0cedcb87, 0x0eedcb87, 0x00f1cb87,
+  0x02f1cb87, 0x04f1cb87, 0x06f1cb87, 0x08f1cb87, 0x0af1cb87, 0x0cf1cb87,
+  0x0ef1cb87, 0x00f5cb87, 0x02f5cb87, 0x04f5cb87, 0x06f5cb87, 0x08f5cb87,
+  0x0af5cb87, 0x0cf5cb87, 0x0ef5cb87, 0x00f9cb87, 0x02f9cb87, 0x04f9cb87,
+  0x06f9cb87, 0x08f9cb87,
+};
+
+static const uint32_t kZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
+   0,  4,  8,  7,  7,  7,  7,  7,  7,  7,  7, 11, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+};
+
+static const uint64_t kNonZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
+  0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
+  0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
+  0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
+  0x00000afb, 0x00000efb, 0x0000b2cb, 0x0001b2cb, 0x0002b2cb, 0x0003b2cb,
+  0x0000b6cb, 0x0001b6cb, 0x0002b6cb, 0x0003b6cb, 0x0000bacb, 0x0001bacb,
+  0x0002bacb, 0x0003bacb, 0x0000becb, 0x0001becb, 0x0002becb, 0x0003becb,
+  0x0000b2db, 0x0001b2db, 0x0002b2db, 0x0003b2db, 0x0000b6db, 0x0001b6db,
+  0x0002b6db, 0x0003b6db, 0x0000badb, 0x0001badb, 0x0002badb, 0x0003badb,
+  0x0000bedb, 0x0001bedb, 0x0002bedb, 0x0003bedb, 0x0000b2eb, 0x0001b2eb,
+  0x0002b2eb, 0x0003b2eb, 0x0000b6eb, 0x0001b6eb, 0x0002b6eb, 0x0003b6eb,
+  0x0000baeb, 0x0001baeb, 0x0002baeb, 0x0003baeb, 0x0000beeb, 0x0001beeb,
+  0x0002beeb, 0x0003beeb, 0x0000b2fb, 0x0001b2fb, 0x0002b2fb, 0x0003b2fb,
+  0x0000b6fb, 0x0001b6fb, 0x0002b6fb, 0x0003b6fb, 0x0000bafb, 0x0001bafb,
+  0x0002bafb, 0x0003bafb, 0x0000befb, 0x0001befb, 0x0002befb, 0x0003befb,
+  0x002cb2cb, 0x006cb2cb, 0x00acb2cb, 0x00ecb2cb, 0x002db2cb, 0x006db2cb,
+  0x00adb2cb, 0x00edb2cb, 0x002eb2cb, 0x006eb2cb, 0x00aeb2cb, 0x00eeb2cb,
+  0x002fb2cb, 0x006fb2cb, 0x00afb2cb, 0x00efb2cb, 0x002cb6cb, 0x006cb6cb,
+  0x00acb6cb, 0x00ecb6cb, 0x002db6cb, 0x006db6cb, 0x00adb6cb, 0x00edb6cb,
+  0x002eb6cb, 0x006eb6cb, 0x00aeb6cb, 0x00eeb6cb, 0x002fb6cb, 0x006fb6cb,
+  0x00afb6cb, 0x00efb6cb, 0x002cbacb, 0x006cbacb, 0x00acbacb, 0x00ecbacb,
+  0x002dbacb, 0x006dbacb, 0x00adbacb, 0x00edbacb, 0x002ebacb, 0x006ebacb,
+  0x00aebacb, 0x00eebacb, 0x002fbacb, 0x006fbacb, 0x00afbacb, 0x00efbacb,
+  0x002cbecb, 0x006cbecb, 0x00acbecb, 0x00ecbecb, 0x002dbecb, 0x006dbecb,
+  0x00adbecb, 0x00edbecb, 0x002ebecb, 0x006ebecb, 0x00aebecb, 0x00eebecb,
+  0x002fbecb, 0x006fbecb, 0x00afbecb, 0x00efbecb, 0x002cb2db, 0x006cb2db,
+  0x00acb2db, 0x00ecb2db, 0x002db2db, 0x006db2db, 0x00adb2db, 0x00edb2db,
+  0x002eb2db, 0x006eb2db, 0x00aeb2db, 0x00eeb2db, 0x002fb2db, 0x006fb2db,
+  0x00afb2db, 0x00efb2db, 0x002cb6db, 0x006cb6db, 0x00acb6db, 0x00ecb6db,
+  0x002db6db, 0x006db6db, 0x00adb6db, 0x00edb6db, 0x002eb6db, 0x006eb6db,
+  0x00aeb6db, 0x00eeb6db, 0x002fb6db, 0x006fb6db, 0x00afb6db, 0x00efb6db,
+  0x002cbadb, 0x006cbadb, 0x00acbadb, 0x00ecbadb, 0x002dbadb, 0x006dbadb,
+  0x00adbadb, 0x00edbadb, 0x002ebadb, 0x006ebadb, 0x00aebadb, 0x00eebadb,
+  0x002fbadb, 0x006fbadb, 0x00afbadb, 0x00efbadb, 0x002cbedb, 0x006cbedb,
+  0x00acbedb, 0x00ecbedb, 0x002dbedb, 0x006dbedb, 0x00adbedb, 0x00edbedb,
+  0x002ebedb, 0x006ebedb, 0x00aebedb, 0x00eebedb, 0x002fbedb, 0x006fbedb,
+  0x00afbedb, 0x00efbedb, 0x002cb2eb, 0x006cb2eb, 0x00acb2eb, 0x00ecb2eb,
+  0x002db2eb, 0x006db2eb, 0x00adb2eb, 0x00edb2eb, 0x002eb2eb, 0x006eb2eb,
+  0x00aeb2eb, 0x00eeb2eb, 0x002fb2eb, 0x006fb2eb, 0x00afb2eb, 0x00efb2eb,
+  0x002cb6eb, 0x006cb6eb, 0x00acb6eb, 0x00ecb6eb, 0x002db6eb, 0x006db6eb,
+  0x00adb6eb, 0x00edb6eb, 0x002eb6eb, 0x006eb6eb, 0x00aeb6eb, 0x00eeb6eb,
+  0x002fb6eb, 0x006fb6eb, 0x00afb6eb, 0x00efb6eb, 0x002cbaeb, 0x006cbaeb,
+  0x00acbaeb, 0x00ecbaeb, 0x002dbaeb, 0x006dbaeb, 0x00adbaeb, 0x00edbaeb,
+  0x002ebaeb, 0x006ebaeb, 0x00aebaeb, 0x00eebaeb, 0x002fbaeb, 0x006fbaeb,
+  0x00afbaeb, 0x00efbaeb, 0x002cbeeb, 0x006cbeeb, 0x00acbeeb, 0x00ecbeeb,
+  0x002dbeeb, 0x006dbeeb, 0x00adbeeb, 0x00edbeeb, 0x002ebeeb, 0x006ebeeb,
+  0x00aebeeb, 0x00eebeeb, 0x002fbeeb, 0x006fbeeb, 0x00afbeeb, 0x00efbeeb,
+  0x002cb2fb, 0x006cb2fb, 0x00acb2fb, 0x00ecb2fb, 0x002db2fb, 0x006db2fb,
+  0x00adb2fb, 0x00edb2fb, 0x002eb2fb, 0x006eb2fb, 0x00aeb2fb, 0x00eeb2fb,
+  0x002fb2fb, 0x006fb2fb, 0x00afb2fb, 0x00efb2fb, 0x002cb6fb, 0x006cb6fb,
+  0x00acb6fb, 0x00ecb6fb, 0x002db6fb, 0x006db6fb, 0x00adb6fb, 0x00edb6fb,
+  0x002eb6fb, 0x006eb6fb, 0x00aeb6fb, 0x00eeb6fb, 0x002fb6fb, 0x006fb6fb,
+  0x00afb6fb, 0x00efb6fb, 0x002cbafb, 0x006cbafb, 0x00acbafb, 0x00ecbafb,
+  0x002dbafb, 0x006dbafb, 0x00adbafb, 0x00edbafb, 0x002ebafb, 0x006ebafb,
+  0x00aebafb, 0x00eebafb, 0x002fbafb, 0x006fbafb, 0x00afbafb, 0x00efbafb,
+  0x002cbefb, 0x006cbefb, 0x00acbefb, 0x00ecbefb, 0x002dbefb, 0x006dbefb,
+  0x00adbefb, 0x00edbefb, 0x002ebefb, 0x006ebefb, 0x00aebefb, 0x00eebefb,
+  0x002fbefb, 0x006fbefb, 0x00afbefb, 0x00efbefb, 0x0b2cb2cb, 0x1b2cb2cb,
+  0x2b2cb2cb, 0x3b2cb2cb, 0x0b6cb2cb, 0x1b6cb2cb, 0x2b6cb2cb, 0x3b6cb2cb,
+  0x0bacb2cb, 0x1bacb2cb, 0x2bacb2cb, 0x3bacb2cb, 0x0becb2cb, 0x1becb2cb,
+  0x2becb2cb, 0x3becb2cb, 0x0b2db2cb, 0x1b2db2cb, 0x2b2db2cb, 0x3b2db2cb,
+  0x0b6db2cb, 0x1b6db2cb, 0x2b6db2cb, 0x3b6db2cb, 0x0badb2cb, 0x1badb2cb,
+  0x2badb2cb, 0x3badb2cb, 0x0bedb2cb, 0x1bedb2cb, 0x2bedb2cb, 0x3bedb2cb,
+  0x0b2eb2cb, 0x1b2eb2cb, 0x2b2eb2cb, 0x3b2eb2cb, 0x0b6eb2cb, 0x1b6eb2cb,
+  0x2b6eb2cb, 0x3b6eb2cb, 0x0baeb2cb, 0x1baeb2cb, 0x2baeb2cb, 0x3baeb2cb,
+  0x0beeb2cb, 0x1beeb2cb, 0x2beeb2cb, 0x3beeb2cb, 0x0b2fb2cb, 0x1b2fb2cb,
+  0x2b2fb2cb, 0x3b2fb2cb, 0x0b6fb2cb, 0x1b6fb2cb, 0x2b6fb2cb, 0x3b6fb2cb,
+  0x0bafb2cb, 0x1bafb2cb, 0x2bafb2cb, 0x3bafb2cb, 0x0befb2cb, 0x1befb2cb,
+  0x2befb2cb, 0x3befb2cb, 0x0b2cb6cb, 0x1b2cb6cb, 0x2b2cb6cb, 0x3b2cb6cb,
+  0x0b6cb6cb, 0x1b6cb6cb, 0x2b6cb6cb, 0x3b6cb6cb, 0x0bacb6cb, 0x1bacb6cb,
+  0x2bacb6cb, 0x3bacb6cb, 0x0becb6cb, 0x1becb6cb, 0x2becb6cb, 0x3becb6cb,
+  0x0b2db6cb, 0x1b2db6cb, 0x2b2db6cb, 0x3b2db6cb, 0x0b6db6cb, 0x1b6db6cb,
+  0x2b6db6cb, 0x3b6db6cb, 0x0badb6cb, 0x1badb6cb, 0x2badb6cb, 0x3badb6cb,
+  0x0bedb6cb, 0x1bedb6cb, 0x2bedb6cb, 0x3bedb6cb, 0x0b2eb6cb, 0x1b2eb6cb,
+  0x2b2eb6cb, 0x3b2eb6cb, 0x0b6eb6cb, 0x1b6eb6cb, 0x2b6eb6cb, 0x3b6eb6cb,
+  0x0baeb6cb, 0x1baeb6cb, 0x2baeb6cb, 0x3baeb6cb, 0x0beeb6cb, 0x1beeb6cb,
+  0x2beeb6cb, 0x3beeb6cb, 0x0b2fb6cb, 0x1b2fb6cb, 0x2b2fb6cb, 0x3b2fb6cb,
+  0x0b6fb6cb, 0x1b6fb6cb, 0x2b6fb6cb, 0x3b6fb6cb, 0x0bafb6cb, 0x1bafb6cb,
+  0x2bafb6cb, 0x3bafb6cb, 0x0befb6cb, 0x1befb6cb, 0x2befb6cb, 0x3befb6cb,
+  0x0b2cbacb, 0x1b2cbacb, 0x2b2cbacb, 0x3b2cbacb, 0x0b6cbacb, 0x1b6cbacb,
+  0x2b6cbacb, 0x3b6cbacb, 0x0bacbacb, 0x1bacbacb, 0x2bacbacb, 0x3bacbacb,
+  0x0becbacb, 0x1becbacb, 0x2becbacb, 0x3becbacb, 0x0b2dbacb, 0x1b2dbacb,
+  0x2b2dbacb, 0x3b2dbacb, 0x0b6dbacb, 0x1b6dbacb, 0x2b6dbacb, 0x3b6dbacb,
+  0x0badbacb, 0x1badbacb, 0x2badbacb, 0x3badbacb, 0x0bedbacb, 0x1bedbacb,
+  0x2bedbacb, 0x3bedbacb, 0x0b2ebacb, 0x1b2ebacb, 0x2b2ebacb, 0x3b2ebacb,
+  0x0b6ebacb, 0x1b6ebacb, 0x2b6ebacb, 0x3b6ebacb, 0x0baebacb, 0x1baebacb,
+  0x2baebacb, 0x3baebacb, 0x0beebacb, 0x1beebacb, 0x2beebacb, 0x3beebacb,
+  0x0b2fbacb, 0x1b2fbacb, 0x2b2fbacb, 0x3b2fbacb, 0x0b6fbacb, 0x1b6fbacb,
+  0x2b6fbacb, 0x3b6fbacb, 0x0bafbacb, 0x1bafbacb, 0x2bafbacb, 0x3bafbacb,
+  0x0befbacb, 0x1befbacb, 0x2befbacb, 0x3befbacb, 0x0b2cbecb, 0x1b2cbecb,
+  0x2b2cbecb, 0x3b2cbecb, 0x0b6cbecb, 0x1b6cbecb, 0x2b6cbecb, 0x3b6cbecb,
+  0x0bacbecb, 0x1bacbecb, 0x2bacbecb, 0x3bacbecb, 0x0becbecb, 0x1becbecb,
+  0x2becbecb, 0x3becbecb, 0x0b2dbecb, 0x1b2dbecb, 0x2b2dbecb, 0x3b2dbecb,
+  0x0b6dbecb, 0x1b6dbecb, 0x2b6dbecb, 0x3b6dbecb, 0x0badbecb, 0x1badbecb,
+  0x2badbecb, 0x3badbecb, 0x0bedbecb, 0x1bedbecb, 0x2bedbecb, 0x3bedbecb,
+  0x0b2ebecb, 0x1b2ebecb, 0x2b2ebecb, 0x3b2ebecb, 0x0b6ebecb, 0x1b6ebecb,
+  0x2b6ebecb, 0x3b6ebecb, 0x0baebecb, 0x1baebecb, 0x2baebecb, 0x3baebecb,
+  0x0beebecb, 0x1beebecb, 0x2beebecb, 0x3beebecb, 0x0b2fbecb, 0x1b2fbecb,
+  0x2b2fbecb, 0x3b2fbecb, 0x0b6fbecb, 0x1b6fbecb, 0x2b6fbecb, 0x3b6fbecb,
+  0x0bafbecb, 0x1bafbecb, 0x2bafbecb, 0x3bafbecb, 0x0befbecb, 0x1befbecb,
+  0x2befbecb, 0x3befbecb, 0x0b2cb2db, 0x1b2cb2db, 0x2b2cb2db, 0x3b2cb2db,
+  0x0b6cb2db, 0x1b6cb2db, 0x2b6cb2db, 0x3b6cb2db, 0x0bacb2db, 0x1bacb2db,
+  0x2bacb2db, 0x3bacb2db, 0x0becb2db, 0x1becb2db, 0x2becb2db, 0x3becb2db,
+  0x0b2db2db, 0x1b2db2db, 0x2b2db2db, 0x3b2db2db, 0x0b6db2db, 0x1b6db2db,
+  0x2b6db2db, 0x3b6db2db, 0x0badb2db, 0x1badb2db, 0x2badb2db, 0x3badb2db,
+  0x0bedb2db, 0x1bedb2db, 0x2bedb2db, 0x3bedb2db, 0x0b2eb2db, 0x1b2eb2db,
+  0x2b2eb2db, 0x3b2eb2db, 0x0b6eb2db, 0x1b6eb2db, 0x2b6eb2db, 0x3b6eb2db,
+  0x0baeb2db, 0x1baeb2db, 0x2baeb2db, 0x3baeb2db, 0x0beeb2db, 0x1beeb2db,
+  0x2beeb2db, 0x3beeb2db, 0x0b2fb2db, 0x1b2fb2db, 0x2b2fb2db, 0x3b2fb2db,
+  0x0b6fb2db, 0x1b6fb2db, 0x2b6fb2db, 0x3b6fb2db, 0x0bafb2db, 0x1bafb2db,
+  0x2bafb2db, 0x3bafb2db, 0x0befb2db, 0x1befb2db, 0x2befb2db, 0x3befb2db,
+  0x0b2cb6db, 0x1b2cb6db, 0x2b2cb6db, 0x3b2cb6db, 0x0b6cb6db, 0x1b6cb6db,
+  0x2b6cb6db, 0x3b6cb6db, 0x0bacb6db, 0x1bacb6db, 0x2bacb6db, 0x3bacb6db,
+  0x0becb6db, 0x1becb6db, 0x2becb6db, 0x3becb6db, 0x0b2db6db, 0x1b2db6db,
+  0x2b2db6db, 0x3b2db6db, 0x0b6db6db, 0x1b6db6db, 0x2b6db6db, 0x3b6db6db,
+  0x0badb6db, 0x1badb6db, 0x2badb6db, 0x3badb6db, 0x0bedb6db, 0x1bedb6db,
+  0x2bedb6db, 0x3bedb6db, 0x0b2eb6db, 0x1b2eb6db, 0x2b2eb6db, 0x3b2eb6db,
+  0x0b6eb6db, 0x1b6eb6db, 0x2b6eb6db, 0x3b6eb6db, 0x0baeb6db, 0x1baeb6db,
+  0x2baeb6db, 0x3baeb6db,
+};
+
+static const uint32_t kNonZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
+   6,  6,  6,  6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+};
+
+static const uint16_t kStaticCommandCodeBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
+    0,  256,  128,  384,   64,  320,  192,  448,
+   32,  288,  160,  416,   96,  352,  224,  480,
+   16,  272,  144,  400,   80,  336,  208,  464,
+   48,  304,  176,  432,  112,  368,  240,  496,
+    8,  264,  136,  392,   72,  328,  200,  456,
+   40,  296,  168,  424,  104,  360,  232,  488,
+   24,  280,  152,  408,   88,  344,  216,  472,
+   56,  312,  184,  440,  120,  376,  248,  504,
+    4,  260,  132,  388,   68,  324,  196,  452,
+   36,  292,  164,  420,  100,  356,  228,  484,
+   20,  276,  148,  404,   84,  340,  212,  468,
+   52,  308,  180,  436,  116,  372,  244,  500,
+   12,  268,  140,  396,   76,  332,  204,  460,
+   44,  300,  172,  428,  108,  364,  236,  492,
+   28,  284,  156,  412,   92,  348,  220,  476,
+   60,  316,  188,  444,  124,  380,  252,  508,
+    2,  258,  130,  386,   66,  322,  194,  450,
+   34,  290,  162,  418,   98,  354,  226,  482,
+   18,  274,  146,  402,   82,  338,  210,  466,
+   50,  306,  178,  434,  114,  370,  242,  498,
+   10,  266,  138,  394,   74,  330,  202,  458,
+   42,  298,  170,  426,  106,  362,  234,  490,
+   26,  282,  154,  410,   90,  346,  218,  474,
+   58,  314,  186,  442,  122,  378,  250,  506,
+    6,  262,  134,  390,   70,  326,  198,  454,
+   38,  294,  166,  422,  102,  358,  230,  486,
+   22,  278,  150,  406,   86,  342,  214,  470,
+   54,  310,  182,  438,  118,  374,  246,  502,
+   14,  270,  142,  398,   78,  334,  206,  462,
+   46,  302,  174,  430,  110,  366,  238,  494,
+   30,  286,  158,  414,   94,  350,  222,  478,
+   62,  318,  190,  446,  126,  382,  254,  510,
+    1,  257,  129,  385,   65,  321,  193,  449,
+   33,  289,  161,  417,   97,  353,  225,  481,
+   17,  273,  145,  401,   81,  337,  209,  465,
+   49,  305,  177,  433,  113,  369,  241,  497,
+    9,  265,  137,  393,   73,  329,  201,  457,
+   41,  297,  169,  425,  105,  361,  233,  489,
+   25,  281,  153,  409,   89,  345,  217,  473,
+   57,  313,  185,  441,  121,  377,  249,  505,
+    5,  261,  133,  389,   69,  325,  197,  453,
+   37,  293,  165,  421,  101,  357,  229,  485,
+   21,  277,  149,  405,   85,  341,  213,  469,
+   53,  309,  181,  437,  117,  373,  245,  501,
+   13,  269,  141,  397,   77,  333,  205,  461,
+   45,  301,  173,  429,  109,  365,  237,  493,
+   29,  285,  157,  413,   93,  349,  221,  477,
+   61,  317,  189,  445,  125,  381,  253,  509,
+    3,  259,  131,  387,   67,  323,  195,  451,
+   35,  291,  163,  419,   99,  355,  227,  483,
+   19,  275,  147,  403,   83,  339,  211,  467,
+   51,  307,  179,  435,  115,  371,  243,  499,
+   11,  267,  139,  395,   75,  331,  203,  459,
+   43,  299,  171,  427,  107,  363,  235,  491,
+   27,  283,  155,  411,   91,  347,  219,  475,
+   59,  315,  187,  443,  123,  379,  251,  507,
+    7, 1031,  519, 1543,  263, 1287,  775, 1799,
+  135, 1159,  647, 1671,  391, 1415,  903, 1927,
+   71, 1095,  583, 1607,  327, 1351,  839, 1863,
+  199, 1223,  711, 1735,  455, 1479,  967, 1991,
+   39, 1063,  551, 1575,  295, 1319,  807, 1831,
+  167, 1191,  679, 1703,  423, 1447,  935, 1959,
+  103, 1127,  615, 1639,  359, 1383,  871, 1895,
+  231, 1255,  743, 1767,  487, 1511,  999, 2023,
+   23, 1047,  535, 1559,  279, 1303,  791, 1815,
+  151, 1175,  663, 1687,  407, 1431,  919, 1943,
+   87, 1111,  599, 1623,  343, 1367,  855, 1879,
+  215, 1239,  727, 1751,  471, 1495,  983, 2007,
+   55, 1079,  567, 1591,  311, 1335,  823, 1847,
+  183, 1207,  695, 1719,  439, 1463,  951, 1975,
+  119, 1143,  631, 1655,  375, 1399,  887, 1911,
+  247, 1271,  759, 1783,  503, 1527, 1015, 2039,
+   15, 1039,  527, 1551,  271, 1295,  783, 1807,
+  143, 1167,  655, 1679,  399, 1423,  911, 1935,
+   79, 1103,  591, 1615,  335, 1359,  847, 1871,
+  207, 1231,  719, 1743,  463, 1487,  975, 1999,
+   47, 1071,  559, 1583,  303, 1327,  815, 1839,
+  175, 1199,  687, 1711,  431, 1455,  943, 1967,
+  111, 1135,  623, 1647,  367, 1391,  879, 1903,
+  239, 1263,  751, 1775,  495, 1519, 1007, 2031,
+   31, 1055,  543, 1567,  287, 1311,  799, 1823,
+  159, 1183,  671, 1695,  415, 1439,  927, 1951,
+   95, 1119,  607, 1631,  351, 1375,  863, 1887,
+  223, 1247,  735, 1759,  479, 1503,  991, 2015,
+   63, 1087,  575, 1599,  319, 1343,  831, 1855,
+  191, 1215,  703, 1727,  447, 1471,  959, 1983,
+  127, 1151,  639, 1663,  383, 1407,  895, 1919,
+  255, 1279,  767, 1791,  511, 1535, 1023, 2047,
+};
+
+static BROTLI_INLINE void StoreStaticCommandHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      56, BROTLI_MAKE_UINT64_T(0x926244U, 0x16307003U), storage_ix, storage);
+  BrotliWriteBits(3, 0x00000000U, storage_ix, storage);
+}
+
+static const uint16_t kStaticDistanceCodeBits[64] = {
+   0, 32, 16, 48,  8, 40, 24, 56,  4, 36, 20, 52, 12, 44, 28, 60,
+   2, 34, 18, 50, 10, 42, 26, 58,  6, 38, 22, 54, 14, 46, 30, 62,
+   1, 33, 17, 49,  9, 41, 25, 57,  5, 37, 21, 53, 13, 45, 29, 61,
+   3, 35, 19, 51, 11, 43, 27, 59,  7, 39, 23, 55, 15, 47, 31, 63,
+};
+
+static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/fast_log.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/fast_log.h
new file mode 100755
index 0000000000..cade1235ad
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/fast_log.h
@@ -0,0 +1,147 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Utilities for fast computation of logarithms. */
+
+#ifndef BROTLI_ENC_FAST_LOG_H_
+#define BROTLI_ENC_FAST_LOG_H_
+
+#include <math.h>
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
+  /* TODO: generalize and move to platform.h */
+#if BROTLI_GNUC_HAS_BUILTIN(__builtin_clz, 3, 4, 0) || \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
+  return 31u ^ (uint32_t)__builtin_clz((uint32_t)n);
+#else
+  uint32_t result = 0;
+  while (n >>= 1) result++;
+  return result;
+#endif
+}
+
+/* A lookup table for small values of log2(int) to be used in entropy
+   computation.
+
+   ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
+static const float kLog2Table[] = {
+  0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+  1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+  2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+  3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+  3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+  3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+  4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+  4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+  4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+  4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+  4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+  5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+  5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+  5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+  5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+  5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+  5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+  5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+  5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+  5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+  5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+  5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+  6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+  6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+  6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+  6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+  6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+  6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+  6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+  6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+  6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+  6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+  6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+  6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+  6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+  6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+  6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+  6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+  6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+  6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+  6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+  7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+  7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+  7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+  7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+  7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+  7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+  7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+  7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+  7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+  7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+  7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+  7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+  7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+  7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+  7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+  7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+  7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+  7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+  7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+  7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+  7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+  7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+  7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+  7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+  7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+  7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+  7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+  7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+  7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+  7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+  7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+  7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+  7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+  7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+  7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+  7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+  7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+  7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+  7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+  7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+  7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+  7.9943534368588578f
+};
+
+#define LOG_2_INV 1.4426950408889634
+
+/* Faster logarithm for small integers, with the property of log2(0) == 0. */
+static BROTLI_INLINE double FastLog2(size_t v) {
+  if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
+    return kLog2Table[v];
+  }
+#if (defined(_MSC_VER) && _MSC_VER <= 1700) || \
+    (defined(__ANDROID_API__) && __ANDROID_API__ < 18)
+  /* Visual Studio 2012 and Android API levels < 18 do not have the log2()
+   * function defined, so we use log() and a multiplication instead. */
+  return log((double)v) * LOG_2_INV;
+#else
+  return log2((double)v);
+#endif
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_FAST_LOG_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/find_match_length.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/find_match_length.h
new file mode 100755
index 0000000000..bc428cffda
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/find_match_length.h
@@ -0,0 +1,80 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Function to find maximal matching prefixes of strings. */
+
+#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
+#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Separate implementation for little-endian 64-bit targets, for speed. */
+#if defined(__GNUC__) && defined(_LP64) && defined(BROTLI_LITTLE_ENDIAN)
+
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
+  size_t matched = 0;
+  size_t limit2 = (limit >> 3) + 1;  /* + 1 is for pre-decrement in while */
+  while (BROTLI_PREDICT_TRUE(--limit2)) {
+    if (BROTLI_PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64LE(s2) ==
+                      BROTLI_UNALIGNED_LOAD64LE(s1 + matched))) {
+      s2 += 8;
+      matched += 8;
+    } else {
+      uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
+          BROTLI_UNALIGNED_LOAD64LE(s1 + matched);
+      size_t matching_bits = (size_t)__builtin_ctzll(x);
+      matched += matching_bits >> 3;
+      return matched;
+    }
+  }
+  limit = (limit & 7) + 1;  /* + 1 is for pre-decrement in while */
+  while (--limit) {
+    if (BROTLI_PREDICT_TRUE(s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    } else {
+      return matched;
+    }
+  }
+  return matched;
+}
+#else
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
+  size_t matched = 0;
+  const uint8_t* s2_limit = s2 + limit;
+  const uint8_t* s2_ptr = s2;
+  /* Find out how long the match is. We loop over the data 32 bits at a
+     time until we find a 32-bit block that doesn't match; then we find
+     the first non-matching bit and use that to calculate the total
+     length of the match. */
+  while (s2_ptr <= s2_limit - 4 &&
+         BrotliUnalignedRead32(s2_ptr) ==
+         BrotliUnalignedRead32(s1 + matched)) {
+    s2_ptr += 4;
+    matched += 4;
+  }
+  while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
+    ++s2_ptr;
+    ++matched;
+  }
+  return matched;
+}
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash.h
new file mode 100755
index 0000000000..8c5a7bb5ad
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash.h
@@ -0,0 +1,498 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data. */
+
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
+#include <string.h>  /* memcmp, memset */
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./encoder_dict.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./memory.h"
+#include "./quality.h"
+#include "./static_dict.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Pointer to hasher data.
+ *
+ * Excluding initialization and destruction, hasher can be passed as
+ * HasherHandle by value.
+ *
+ * Typically hasher data consists of 3 sections:
+ * * HasherCommon structure
+ * * private structured hasher data, depending on hasher type
+ * * private dynamic hasher data, depending on hasher type and parameters
+ *
+ * Using "define" instead of "typedef", because on MSVC __restrict does not work
+ * on typedef pointer types. */
+#define HasherHandle uint8_t*
+
+typedef struct {
+  BrotliHasherParams params;
+
+  /* False if hasher needs to be "prepared" before use. */
+  BROTLI_BOOL is_prepared_;
+
+  size_t dict_num_lookups;
+  size_t dict_num_matches;
+} HasherCommon;
+
+static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) {
+  return (HasherCommon*)handle;
+}
+
+#define score_t size_t
+
+static const uint32_t kCutoffTransformsCount = 10;
+/*   0,  12,   27,    23,    42,    63,    56,    48,    59,    64 */
+/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
+static const uint64_t kCutoffTransforms =
+    BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
+
+typedef struct HasherSearchResult {
+  size_t len;
+  size_t distance;
+  score_t score;
+  int len_code_delta; /* == len_code - len */
+} HasherSearchResult;
+
+/* kHashMul32 multiplier has these properties:
+   * The multiplier must be odd. Otherwise we may lose the highest bit.
+   * No long streaks of ones or zeros.
+   * There is no effort to ensure that it is a prime, the oddity is enough
+     for this use.
+   * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
+static const uint64_t kHashMul64Long =
+    BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
+
+static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - 14);
+}
+
+static BROTLI_INLINE void PrepareDistanceCache(
+    int* BROTLI_RESTRICT distance_cache, const int num_distances) {
+  if (num_distances > 4) {
+    int last_distance = distance_cache[0];
+    distance_cache[4] = last_distance - 1;
+    distance_cache[5] = last_distance + 1;
+    distance_cache[6] = last_distance - 2;
+    distance_cache[7] = last_distance + 2;
+    distance_cache[8] = last_distance - 3;
+    distance_cache[9] = last_distance + 3;
+    if (num_distances > 10) {
+      int next_last_distance = distance_cache[1];
+      distance_cache[10] = next_last_distance - 1;
+      distance_cache[11] = next_last_distance + 1;
+      distance_cache[12] = next_last_distance - 2;
+      distance_cache[13] = next_last_distance + 2;
+      distance_cache[14] = next_last_distance - 3;
+      distance_cache[15] = next_last_distance + 3;
+    }
+  }
+}
+
+#define BROTLI_LITERAL_BYTE_SCORE 135
+#define BROTLI_DISTANCE_BIT_PENALTY 30
+/* Score must be positive after applying maximal penalty. */
+#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
+
+/* Usually, we always choose the longest backward reference. This function
+   allows for the exception of that rule.
+
+   If we choose a backward reference that is further away, it will
+   usually be coded with more bits. We approximate this by assuming
+   log2(distance). If the distance can be expressed in terms of the
+   last four distances, we use some heuristic constants to estimate
+   the bits cost. For the first up to four literals we use the bit
+   cost of the literals from the literal cost model, after that we
+   use the average bit cost of the cost model.
+
+   This function is used to sometimes discard a longer backward reference
+   when it is not much longer and the bit cost for encoding it is more
+   than the saved literals.
+
+   backward_reference_offset MUST be positive. */
+static BROTLI_INLINE score_t BackwardReferenceScore(
+    size_t copy_length, size_t backward_reference_offset) {
+  return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
+      BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
+}
+
+static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
+    size_t copy_length) {
+  return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
+      BROTLI_SCORE_BASE + 15;
+}
+
+static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
+    size_t distance_short_code) {
+  return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
+}
+
+static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
+    const BrotliEncoderDictionary* dictionary, size_t item,
+    const uint8_t* data, size_t max_length, size_t max_backward,
+    size_t max_distance, HasherSearchResult* out) {
+  size_t len;
+  size_t word_idx;
+  size_t offset;
+  size_t matchlen;
+  size_t backward;
+  score_t score;
+  len = item & 0x1F;
+  word_idx = item >> 5;
+  offset = dictionary->words->offsets_by_length[len] + len * word_idx;
+  if (len > max_length) {
+    return BROTLI_FALSE;
+  }
+
+  matchlen =
+      FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
+  if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
+    return BROTLI_FALSE;
+  }
+  {
+    size_t cut = len - matchlen;
+    size_t transform_id = (cut << 2) +
+        (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
+    backward = max_backward + 1 + word_idx +
+        (transform_id << dictionary->words->size_bits_by_length[len]);
+  }
+  if (backward > max_distance) {
+    return BROTLI_FALSE;
+  }
+  score = BackwardReferenceScore(matchlen, backward);
+  if (score < out->score) {
+    return BROTLI_FALSE;
+  }
+  out->len = matchlen;
+  out->len_code_delta = (int)len - (int)matchlen;
+  out->distance = backward;
+  out->score = score;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void SearchInStaticDictionary(
+    const BrotliEncoderDictionary* dictionary,
+    HasherHandle handle, const uint8_t* data, size_t max_length,
+    size_t max_backward, size_t max_distance,
+    HasherSearchResult* out, BROTLI_BOOL shallow) {
+  size_t key;
+  size_t i;
+  HasherCommon* self = GetHasherCommon(handle);
+  if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {
+    return;
+  }
+  key = Hash14(data) << 1;
+  for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
+    size_t item = dictionary->hash_table[key];
+    self->dict_num_lookups++;
+    if (item != 0) {
+      BROTLI_BOOL item_matches = TestStaticDictionaryItem(
+          dictionary, item, data,
+          max_length, max_backward, max_distance, out);
+      if (item_matches) {
+        self->dict_num_matches++;
+      }
+    }
+  }
+}
+
+typedef struct BackwardMatch {
+  uint32_t distance;
+  uint32_t length_and_code;
+} BackwardMatch;
+
+static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
+    size_t dist, size_t len) {
+  self->distance = (uint32_t)dist;
+  self->length_and_code = (uint32_t)(len << 5);
+}
+
+static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
+    size_t dist, size_t len, size_t len_code) {
+  self->distance = (uint32_t)dist;
+  self->length_and_code =
+      (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
+}
+
+static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
+  return self->length_and_code >> 5;
+}
+
+static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
+  size_t code = self->length_and_code & 31;
+  return code ? code : BackwardMatchLength(self);
+}
+
+#define EXPAND_CAT(a, b) CAT(a, b)
+#define CAT(a, b) a ## b
+#define FN(X) EXPAND_CAT(X, HASHER())
+
+#define HASHER() H10
+#define BUCKET_BITS 17
+#define MAX_TREE_SEARCH_DEPTH 64
+#define MAX_TREE_COMP_LENGTH 128
+#include "./hash_to_binary_tree_inc.h"  /* NOLINT(build/include) */
+#undef MAX_TREE_SEARCH_DEPTH
+#undef MAX_TREE_COMP_LENGTH
+#undef BUCKET_BITS
+#undef HASHER
+/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
+#define MAX_NUM_MATCHES_H10 128
+
+/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
+   a little faster (0.5% - 1%) and it compresses 0.15% better on small text
+   and HTML inputs. */
+
+#define HASHER() H2
+#define BUCKET_BITS 16
+#define BUCKET_SWEEP 1
+#define HASH_LEN 5
+#define USE_DICTIONARY 1
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef BUCKET_SWEEP
+#undef USE_DICTIONARY
+#undef HASHER
+
+#define HASHER() H3
+#define BUCKET_SWEEP 2
+#define USE_DICTIONARY 0
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef BUCKET_SWEEP
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H4
+#define BUCKET_BITS 17
+#define BUCKET_SWEEP 4
+#define USE_DICTIONARY 1
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef HASH_LEN
+#undef BUCKET_SWEEP
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H5
+#include "./hash_longest_match_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+
+#define HASHER() H6
+#include "./hash_longest_match64_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+
+#define BUCKET_BITS 15
+
+#define NUM_LAST_DISTANCES_TO_CHECK 4
+#define NUM_BANKS 1
+#define BANK_BITS 16
+#define HASHER() H40
+#include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+
+#define NUM_LAST_DISTANCES_TO_CHECK 10
+#define HASHER() H41
+#include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef NUM_BANKS
+#undef BANK_BITS
+
+#define NUM_LAST_DISTANCES_TO_CHECK 16
+#define NUM_BANKS 512
+#define BANK_BITS 9
+#define HASHER() H42
+#include "./hash_forgetful_chain_inc.h"  /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef NUM_BANKS
+#undef BANK_BITS
+
+#undef BUCKET_BITS
+
+#define HASHER() H54
+#define BUCKET_BITS 20
+#define BUCKET_SWEEP 4
+#define HASH_LEN 7
+#define USE_DICTIONARY 0
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef HASH_LEN
+#undef BUCKET_SWEEP
+#undef BUCKET_BITS
+#undef HASHER
+
+/* fast large window hashers */
+
+#define HASHER() HROLLING_FAST
+#define CHUNKLEN 32
+#define JUMP 4
+#define NUMBUCKETS 16777216
+#define MASK ((NUMBUCKETS * 64) - 1)
+#include "./hash_rolling_inc.h"  /* NOLINT(build/include) */
+#undef JUMP
+#undef HASHER
+
+
+#define HASHER() HROLLING
+#define JUMP 1
+#include "./hash_rolling_inc.h"  /* NOLINT(build/include) */
+#undef MASK
+#undef NUMBUCKETS
+#undef JUMP
+#undef CHUNKLEN
+#undef HASHER
+
+#define HASHER() H35
+#define HASHER_A H3
+#define HASHER_B HROLLING_FAST
+#include "./hash_composite_inc.h"  /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H55
+#define HASHER_A H54
+#define HASHER_B HROLLING_FAST
+#include "./hash_composite_inc.h"  /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H65
+#define HASHER_A H6
+#define HASHER_B HROLLING
+#include "./hash_composite_inc.h"  /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#undef FN
+#undef CAT
+#undef EXPAND_CAT
+
+#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)\
+                               H(35) H(55) H(65)
+#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
+
+static BROTLI_INLINE void DestroyHasher(
+    MemoryManager* m, HasherHandle* handle) {
+  if (*handle == NULL) return;
+  BROTLI_FREE(m, *handle);
+}
+
+static BROTLI_INLINE void HasherReset(HasherHandle handle) {
+  if (handle == NULL) return;
+  GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE;
+}
+
+static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
+    BROTLI_BOOL one_shot, const size_t input_size) {
+  size_t result = sizeof(HasherCommon);
+  switch (params->hasher.type) {
+#define SIZE_(N)                                                         \
+    case N:                                                              \
+      result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \
+      break;
+    FOR_ALL_HASHERS(SIZE_)
+#undef SIZE_
+    default:
+      break;
+  }
+  return result;
+}
+
+static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
+    BrotliEncoderParams* params, const uint8_t* data, size_t position,
+    size_t input_size, BROTLI_BOOL is_last) {
+  HasherHandle self = NULL;
+  HasherCommon* common = NULL;
+  BROTLI_BOOL one_shot = (position == 0 && is_last);
+  if (*handle == NULL) {
+    size_t alloc_size;
+    ChooseHasher(params, &params->hasher);
+    alloc_size = HasherSize(params, one_shot, input_size);
+    self = BROTLI_ALLOC(m, uint8_t, alloc_size);
+    if (BROTLI_IS_OOM(m)) return;
+    *handle = self;
+    common = GetHasherCommon(self);
+    common->params = params->hasher;
+    switch (common->params.type) {
+#define INITIALIZE_(N)                     \
+      case N:                              \
+        InitializeH ## N(*handle, params); \
+        break;
+      FOR_ALL_HASHERS(INITIALIZE_);
+#undef INITIALIZE_
+      default:
+        break;
+    }
+    HasherReset(*handle);
+  }
+
+  self = *handle;
+  common = GetHasherCommon(self);
+  if (!common->is_prepared_) {
+    switch (common->params.type) {
+#define PREPARE_(N)                                      \
+      case N:                                            \
+        PrepareH ## N(self, one_shot, input_size, data); \
+        break;
+      FOR_ALL_HASHERS(PREPARE_)
+#undef PREPARE_
+      default: break;
+    }
+    if (position == 0) {
+        common->dict_num_lookups = 0;
+        common->dict_num_matches = 0;
+    }
+    common->is_prepared_ = BROTLI_TRUE;
+  }
+}
+
+static BROTLI_INLINE void InitOrStitchToPreviousBlock(
+    MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,
+    BrotliEncoderParams* params, size_t position, size_t input_size,
+    BROTLI_BOOL is_last) {
+  HasherHandle self;
+  HasherSetup(m, handle, params, data, position, input_size, is_last);
+  if (BROTLI_IS_OOM(m)) return;
+  self = *handle;
+  switch (GetHasherCommon(self)->params.type) {
+#define INIT_(N)                                                           \
+    case N:                                                                \
+      StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \
+    break;
+    FOR_ALL_HASHERS(INIT_)
+#undef INIT_
+    default: break;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_HASH_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_composite_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_composite_inc.h
new file mode 100755
index 0000000000..b266aa2f8d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_composite_inc.h
@@ -0,0 +1,136 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, HASHER_A, HASHER_B */
+
+/* Composite hasher: This hasher allows to combine two other hashers, HASHER_A
+   and HASHER_B. */
+
+#define HashComposite HASHER()
+
+#define FN_A(X) EXPAND_CAT(X, HASHER_A)
+#define FN_B(X) EXPAND_CAT(X, HASHER_B)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) {
+  size_t a =  FN_A(HashTypeLength)();
+  size_t b =  FN_B(HashTypeLength)();
+  return a > b ? a : b;
+}
+
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
+  size_t a =  FN_A(StoreLookahead)();
+  size_t b =  FN_B(StoreLookahead)();
+  return a > b ? a : b;
+}
+
+typedef struct HashComposite {
+  HasherHandle ha;
+  HasherHandle hb;
+  const BrotliEncoderParams* params;
+} HashComposite;
+
+static BROTLI_INLINE HashComposite* FN(Self)(HasherHandle handle) {
+  return (HashComposite*)&(GetHasherCommon(handle)[1]);
+}
+
+static void FN(Initialize)(
+    HasherHandle handle, const BrotliEncoderParams* params) {
+  HashComposite* self = FN(Self)(handle);
+  self->ha = 0;
+  self->hb = 0;
+  self->params = params;
+  /* TODO: Initialize of the hashers is defered to Prepare (and params
+     remembered here) because we don't get the one_shot and input_size params
+     here that are needed to know the memory size of them. Instead provide
+     those params to all hashers FN(Initialize) */
+}
+
+static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* data) {
+  HashComposite* self = FN(Self)(handle);
+  if (!self->ha) {
+    HasherCommon* common_a;
+    HasherCommon* common_b;
+
+    self->ha = handle + sizeof(HasherCommon) + sizeof(HashComposite);
+    common_a = (HasherCommon*)self->ha;
+    common_a->params = self->params->hasher;
+    common_a->is_prepared_ = BROTLI_FALSE;
+    common_a->dict_num_lookups = 0;
+    common_a->dict_num_matches = 0;
+    FN_A(Initialize)(self->ha, self->params);
+
+    self->hb = self->ha + sizeof(HasherCommon) + FN_A(HashMemAllocInBytes)(
+        self->params, one_shot, input_size);
+    common_b = (HasherCommon*)self->hb;
+    common_b->params = self->params->hasher;
+    common_b->is_prepared_ = BROTLI_FALSE;
+    common_b->dict_num_lookups = 0;
+    common_b->dict_num_matches = 0;
+    FN_B(Initialize)(self->hb, self->params);
+  }
+  FN_A(Prepare)(self->ha, one_shot, input_size, data);
+  FN_B(Prepare)(self->hb, one_shot, input_size, data);
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  return sizeof(HashComposite) + 2 * sizeof(HasherCommon) +
+      FN_A(HashMemAllocInBytes)(params, one_shot, input_size) +
+      FN_B(HashMemAllocInBytes)(params, one_shot, input_size);
+}
+
+static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  HashComposite* self = FN(Self)(handle);
+  FN_A(Store)(self->ha, data, mask, ix);
+  FN_B(Store)(self->hb, data, mask, ix);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
+    const uint8_t* data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  HashComposite* self = FN(Self)(handle);
+  FN_A(StoreRange)(self->ha, data, mask, ix_start, ix_end);
+  FN_B(StoreRange)(self->hb, data, mask, ix_start, ix_end);
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ring_buffer_mask) {
+  HashComposite* self = FN(Self)(handle);
+  FN_A(StitchToPreviousBlock)(self->ha, num_bytes, position, ringbuffer,
+      ring_buffer_mask);
+  FN_B(StitchToPreviousBlock)(self->hb, num_bytes, position, ringbuffer,
+      ring_buffer_mask);
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
+  HashComposite* self = FN(Self)(handle);
+  FN_A(PrepareDistanceCache)(self->ha, distance_cache);
+  FN_B(PrepareDistanceCache)(self->hb, distance_cache);
+}
+
+static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t gap, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  HashComposite* self = FN(Self)(handle);
+  FN_A(FindLongestMatch)(self->ha, dictionary, data, ring_buffer_mask,
+      distance_cache, cur_ix, max_length, max_backward, gap,
+      max_distance, out);
+  FN_B(FindLongestMatch)(self->hb, dictionary, data, ring_buffer_mask,
+      distance_cache, cur_ix, max_length, max_backward, gap,
+      max_distance, out);
+}
+
+#undef HashComposite
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_forgetful_chain_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_forgetful_chain_inc.h
new file mode 100755
index 0000000000..41cb3ff03a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_forgetful_chain_inc.h
@@ -0,0 +1,254 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
+                        NUM_LAST_DISTANCES_TO_CHECK */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   Hashes are stored in chains which are bucketed to groups. Group of chains
+   share a storage "bank". When more than "bank size" chain nodes are added,
+   oldest nodes are replaced; this way several chains may share a tail. */
+
+#define HashForgetfulChain HASHER()
+
+#define BANK_SIZE (1 << BANK_BITS)
+
+/* Number of hash buckets. */
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+#define CAPPED_CHAINS 0
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* HashBytes is the function that chooses the bucket to place the address in.*/
+static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* data) {
+  const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - BUCKET_BITS);
+}
+
+typedef struct FN(Slot) {
+  uint16_t delta;
+  uint16_t next;
+} FN(Slot);
+
+typedef struct FN(Bank) {
+  FN(Slot) slots[BANK_SIZE];
+} FN(Bank);
+
+typedef struct HashForgetfulChain {
+  uint32_t addr[BUCKET_SIZE];
+  uint16_t head[BUCKET_SIZE];
+  /* Truncated hash used for quick rejection of "distance cache" candidates. */
+  uint8_t tiny_hash[65536];
+  FN(Bank) banks[NUM_BANKS];
+  uint16_t free_slot_idx[NUM_BANKS];
+  size_t max_hops;
+} HashForgetfulChain;
+
+static BROTLI_INLINE HashForgetfulChain* FN(Self)(HasherHandle handle) {
+  return (HashForgetfulChain*)&(GetHasherCommon(handle)[1]);
+}
+
+static void FN(Initialize)(
+    HasherHandle handle, const BrotliEncoderParams* params) {
+  FN(Self)(handle)->max_hops =
+      (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
+}
+
+static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* data) {
+  HashForgetfulChain* self = FN(Self)(handle);
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      size_t bucket = FN(HashBytes)(&data[i]);
+      /* See InitEmpty comment. */
+      self->addr[bucket] = 0xCCCCCCCC;
+      self->head[bucket] = 0xCCCC;
+    }
+  } else {
+    /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
+       processed by hasher never reaches 3GB + 64M; this makes all new chains
+       to be terminated after the first node. */
+    memset(self->addr, 0xCC, sizeof(self->addr));
+    memset(self->head, 0, sizeof(self->head));
+  }
+  memset(self->tiny_hash, 0, sizeof(self->tiny_hash));
+  memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  BROTLI_UNUSED(params);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  return sizeof(HashForgetfulChain);
+}
+
+/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
+   node to corresponding chain; also update tiny_hash for current position. */
+static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  HashForgetfulChain* self = FN(Self)(handle);
+  const size_t key = FN(HashBytes)(&data[ix & mask]);
+  const size_t bank = key & (NUM_BANKS - 1);
+  const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
+  size_t delta = ix - self->addr[key];
+  self->tiny_hash[(uint16_t)ix] = (uint8_t)key;
+  if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
+  self->banks[bank].slots[idx].delta = (uint16_t)delta;
+  self->banks[bank].slots[idx].next = self->head[key];
+  self->addr[key] = (uint32_t)ix;
+  self->head[key] = (uint16_t)idx;
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
+    const uint8_t* data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(handle, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ring_buffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 3);
+    FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 2);
+    FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
+  BROTLI_UNUSED(handle);
+  PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache,
+    const size_t cur_ix, const size_t max_length, const size_t max_backward,
+    const size_t gap, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  HashForgetfulChain* self = FN(Self)(handle);
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* Don't accept a short copy from far away. */
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  const uint8_t tiny_hash = (uint8_t)(key);
+  out->len = 0;
+  out->len_code_delta = 0;
+  /* Try last distance first. */
+  for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
+    const size_t backward = (size_t)distance_cache[i];
+    size_t prev_ix = (cur_ix - backward);
+    /* For distance code 0 we want to consider 2-byte matches. */
+    if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue;
+    if (prev_ix >= cur_ix || backward > max_backward) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 2) {
+        score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+  }
+  {
+    const size_t bank = key & (NUM_BANKS - 1);
+    size_t backward = 0;
+    size_t hops = self->max_hops;
+    size_t delta = cur_ix - self->addr[key];
+    size_t slot = self->head[key];
+    while (hops--) {
+      size_t prev_ix;
+      size_t last = slot;
+      backward += delta;
+      if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
+      prev_ix = (cur_ix - backward) & ring_buffer_mask;
+      slot = self->banks[bank].slots[last].next;
+      delta = self->banks[bank].slots[last].delta;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          score_t score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+    FN(Store)(handle, data, ring_buffer_mask, cur_ix);
+  }
+  if (out->score == min_score) {
+    SearchInStaticDictionary(dictionary,
+        handle, &data[cur_ix_masked], max_length, max_backward + gap,
+        max_distance, out, BROTLI_FALSE);
+  }
+}
+
+#undef BANK_SIZE
+#undef BUCKET_SIZE
+#undef CAPPED_CHAINS
+
+#undef HashForgetfulChain
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_longest_match64_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_longest_match64_inc.h
new file mode 100755
index 0000000000..cb953a644f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_longest_match64_inc.h
@@ -0,0 +1,267 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (bucket_size_) to a ring buffer of
+   fixed size (block_size_). The ring buffer contains the last block_size_
+   index positions of the given hash key in the compressed data. */
+
+#define HashLongestMatch HASHER()
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
+
+/* HashBytes is the function that chooses the bucket to place the address in. */
+static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* data,
+                                            const uint64_t mask,
+                                            const int shift) {
+  const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> shift);
+}
+
+typedef struct HashLongestMatch {
+  /* Number of hash buckets. */
+  size_t bucket_size_;
+  /* Only block_size_ newest backward references are kept,
+     and the older are forgotten. */
+  size_t block_size_;
+  /* Left-shift for computing hash bucket index from hash value. */
+  int hash_shift_;
+  /* Mask for selecting the next 4-8 bytes of input */
+  uint64_t hash_mask_;
+  /* Mask for accessing entries in a block (in a ring-buffer manner). */
+  uint32_t block_mask_;
+
+  /* --- Dynamic size members --- */
+
+  /* Number of entries in a particular bucket. */
+  /* uint16_t num[bucket_size]; */
+
+  /* Buckets containing block_size_ of backward references. */
+  /* uint32_t* buckets[bucket_size * block_size]; */
+} HashLongestMatch;
+
+static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
+  return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
+}
+
+static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
+  return (uint16_t*)(&self[1]);
+}
+
+static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
+  return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
+}
+
+static void FN(Initialize)(
+    HasherHandle handle, const BrotliEncoderParams* params) {
+  HasherCommon* common = GetHasherCommon(handle);
+  HashLongestMatch* self = FN(Self)(handle);
+  BROTLI_UNUSED(params);
+  self->hash_shift_ = 64 - common->params.bucket_bits;
+  self->hash_mask_ = (~((uint64_t)0U)) >> (64 - 8 * common->params.hash_len);
+  self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
+  self->block_size_ = (size_t)1 << common->params.block_bits;
+  self->block_mask_ = (uint32_t)(self->block_size_ - 1);
+}
+
+static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* data) {
+  HashLongestMatch* self = FN(Self)(handle);
+  uint16_t* num = FN(Num)(self);
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = self->bucket_size_ >> 6;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      const uint32_t key = FN(HashBytes)(&data[i], self->hash_mask_,
+                                         self->hash_shift_);
+      num[key] = 0;
+    }
+  } else {
+    memset(num, 0, self->bucket_size_ * sizeof(num[0]));
+  }
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
+  size_t block_size = (size_t)1 << params->hasher.block_bits;
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
+    const size_t mask, const size_t ix) {
+  HashLongestMatch* self = FN(Self)(handle);
+  uint16_t* num = FN(Num)(self);
+  const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_mask_,
+                                     self->hash_shift_);
+  const size_t minor_ix = num[key] & self->block_mask_;
+  const size_t offset =
+      minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
+  FN(Buckets)(self)[offset] = (uint32_t)ix;
+  ++num[key];
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
+    const uint8_t* data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(handle, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
+  PrepareDistanceCache(distance_cache,
+      GetHasherCommon(handle)->params.num_last_distances_to_check);
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t gap, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  HasherCommon* common = GetHasherCommon(handle);
+  HashLongestMatch* self = FN(Self)(handle);
+  uint16_t* num = FN(Num)(self);
+  uint32_t* buckets = FN(Buckets)(self);
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* Don't accept a short copy from far away. */
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  out->len = 0;
+  out->len_code_delta = 0;
+  /* Try last distance first. */
+  for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
+    const size_t backward = (size_t)distance_cache[i];
+    size_t prev_ix = (size_t)(cur_ix - backward);
+    if (prev_ix >= cur_ix) {
+      continue;
+    }
+    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        prev_ix + best_len > ring_buffer_mask ||
+        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+      continue;
+    }
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        /* Comparing for >= 2 does not change the semantics, but just saves for
+           a few unnecessary binary logarithms in backward reference score,
+           since we are not interested in such short matches. */
+        score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+  }
+  {
+    const uint32_t key = FN(HashBytes)(
+        &data[cur_ix_masked], self->hash_mask_, self->hash_shift_);
+    uint32_t* BROTLI_RESTRICT bucket =
+        &buckets[key << common->params.block_bits];
+    const size_t down =
+        (num[key] > self->block_size_) ?
+        (num[key] - self->block_size_) : 0u;
+    for (i = num[key]; i > down;) {
+      size_t prev_ix = bucket[--i & self->block_mask_];
+      const size_t backward = cur_ix - prev_ix;
+      if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          score_t score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+    bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
+    ++num[key];
+  }
+  if (min_score == out->score) {
+    SearchInStaticDictionary(dictionary,
+        handle, &data[cur_ix_masked], max_length, max_backward + gap,
+        max_distance, out, BROTLI_FALSE);
+  }
+}
+
+#undef HashLongestMatch
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_longest_match_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_longest_match_inc.h
new file mode 100755
index 0000000000..457f5a9ed2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_longest_match_inc.h
@@ -0,0 +1,259 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (bucket_size_) to a ring buffer of
+   fixed size (block_size_). The ring buffer contains the last block_size_
+   index positions of the given hash key in the compressed data. */
+
+#define HashLongestMatch HASHER()
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* HashBytes is the function that chooses the bucket to place the address in. */
+static uint32_t FN(HashBytes)(const uint8_t* data, const int shift) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> shift);
+}
+
+typedef struct HashLongestMatch {
+  /* Number of hash buckets. */
+  size_t bucket_size_;
+  /* Only block_size_ newest backward references are kept,
+     and the older are forgotten. */
+  size_t block_size_;
+  /* Left-shift for computing hash bucket index from hash value. */
+  int hash_shift_;
+  /* Mask for accessing entries in a block (in a ring-buffer manner). */
+  uint32_t block_mask_;
+
+  /* --- Dynamic size members --- */
+
+  /* Number of entries in a particular bucket. */
+  /* uint16_t num[bucket_size]; */
+
+  /* Buckets containing block_size_ of backward references. */
+  /* uint32_t* buckets[bucket_size * block_size]; */
+} HashLongestMatch;
+
+static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
+  return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
+}
+
+static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
+  return (uint16_t*)(&self[1]);
+}
+
+static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
+  return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
+}
+
+static void FN(Initialize)(
+    HasherHandle handle, const BrotliEncoderParams* params) {
+  HasherCommon* common = GetHasherCommon(handle);
+  HashLongestMatch* self = FN(Self)(handle);
+  BROTLI_UNUSED(params);
+  self->hash_shift_ = 32 - common->params.bucket_bits;
+  self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
+  self->block_size_ = (size_t)1 << common->params.block_bits;
+  self->block_mask_ = (uint32_t)(self->block_size_ - 1);
+}
+
+static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* data) {
+  HashLongestMatch* self = FN(Self)(handle);
+  uint16_t* num = FN(Num)(self);
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = self->bucket_size_ >> 6;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
+      num[key] = 0;
+    }
+  } else {
+    memset(num, 0, self->bucket_size_ * sizeof(num[0]));
+  }
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
+  size_t block_size = (size_t)1 << params->hasher.block_bits;
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
+    const size_t mask, const size_t ix) {
+  HashLongestMatch* self = FN(Self)(handle);
+  uint16_t* num = FN(Num)(self);
+  const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
+  const size_t minor_ix = num[key] & self->block_mask_;
+  const size_t offset =
+      minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
+  FN(Buckets)(self)[offset] = (uint32_t)ix;
+  ++num[key];
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
+    const uint8_t* data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(handle, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
+  PrepareDistanceCache(distance_cache,
+      GetHasherCommon(handle)->params.num_last_distances_to_check);
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
+             values; if this method is invoked repeatedly with the same distance
+             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t gap, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  HasherCommon* common = GetHasherCommon(handle);
+  HashLongestMatch* self = FN(Self)(handle);
+  uint16_t* num = FN(Num)(self);
+  uint32_t* buckets = FN(Buckets)(self);
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* Don't accept a short copy from far away. */
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = out->len;
+  size_t i;
+  out->len = 0;
+  out->len_code_delta = 0;
+  /* Try last distance first. */
+  for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
+    const size_t backward = (size_t)distance_cache[i];
+    size_t prev_ix = (size_t)(cur_ix - backward);
+    if (prev_ix >= cur_ix) {
+      continue;
+    }
+    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        prev_ix + best_len > ring_buffer_mask ||
+        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+      continue;
+    }
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        /* Comparing for >= 2 does not change the semantics, but just saves for
+           a few unnecessary binary logarithms in backward reference score,
+           since we are not interested in such short matches. */
+        score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+  }
+  {
+    const uint32_t key =
+        FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
+    uint32_t* BROTLI_RESTRICT bucket =
+        &buckets[key << common->params.block_bits];
+    const size_t down =
+        (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
+    for (i = num[key]; i > down;) {
+      size_t prev_ix = bucket[--i & self->block_mask_];
+      const size_t backward = cur_ix - prev_ix;
+      if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          score_t score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            out->len = best_len;
+            out->distance = backward;
+            out->score = best_score;
+          }
+        }
+      }
+    }
+    bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
+    ++num[key];
+  }
+  if (min_score == out->score) {
+    SearchInStaticDictionary(dictionary,
+        handle, &data[cur_ix_masked], max_length, max_backward + gap,
+        max_distance, out, BROTLI_FALSE);
+  }
+}
+
+#undef HashLongestMatch
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_longest_match_quickly_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_longest_match_quickly_inc.h
new file mode 100755
index 0000000000..a7b9639feb
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_longest_match_quickly_inc.h
@@ -0,0 +1,235 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP, HASH_LEN,
+                        USE_DICTIONARY
+ */
+
+#define HashLongestMatchQuickly HASHER()
+
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+#define HASH_MAP_SIZE (4 << BUCKET_BITS)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
+
+/* HashBytes is the function that chooses the bucket to place
+   the address in. The HashLongestMatch and HashLongestMatchQuickly
+   classes have separate, different implementations of hashing. */
+static uint32_t FN(HashBytes)(const uint8_t* data) {
+  const uint64_t h = ((BROTLI_UNALIGNED_LOAD64LE(data) << (64 - 8 * HASH_LEN)) *
+                      kHashMul64);
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> (64 - BUCKET_BITS));
+}
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (BUCKET_SIZE). Starting from the
+   given index, BUCKET_SWEEP buckets are used to store values of a key. */
+typedef struct HashLongestMatchQuickly {
+  uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP];
+} HashLongestMatchQuickly;
+
+static BROTLI_INLINE HashLongestMatchQuickly* FN(Self)(HasherHandle handle) {
+  return (HashLongestMatchQuickly*)&(GetHasherCommon(handle)[1]);
+}
+
+static void FN(Initialize)(
+    HasherHandle handle, const BrotliEncoderParams* params) {
+  BROTLI_UNUSED(handle);
+  BROTLI_UNUSED(params);
+}
+
+static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* data) {
+  HashLongestMatchQuickly* self = FN(Self)(handle);
+  /* Partial preparation is 100 times slower (per socket). */
+  size_t partial_prepare_threshold = HASH_MAP_SIZE >> 7;
+  if (one_shot && input_size <= partial_prepare_threshold) {
+    size_t i;
+    for (i = 0; i < input_size; ++i) {
+      const uint32_t key = FN(HashBytes)(&data[i]);
+      memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
+    }
+  } else {
+    /* It is not strictly necessary to fill this buffer here, but
+       not filling will make the results of the compression stochastic
+       (but correct). This is because random data would cause the
+       system to find accidentally good backward references here and there. */
+    memset(&self->buckets_[0], 0, sizeof(self->buckets_));
+  }
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  BROTLI_UNUSED(params);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  return sizeof(HashLongestMatchQuickly);
+}
+
+/* Look at 5 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value somewhere within
+   [ix .. ix+3]. */
+static BROTLI_INLINE void FN(Store)(HasherHandle handle,
+    const uint8_t* data, const size_t mask, const size_t ix) {
+  const uint32_t key = FN(HashBytes)(&data[ix & mask]);
+  /* Wiggle the value with the bucket sweep range. */
+  const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
+  FN(Self)(handle)->buckets_[key + off] = (uint32_t)ix;
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
+    const uint8_t* data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(handle, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HasherHandle handle, size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
+  BROTLI_UNUSED(handle);
+  BROTLI_UNUSED(distance_cache);
+}
+
+/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
+   up to the length of max_length and stores the position cur_ix in the
+   hash table.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best match into |out|.
+   |out|->score is updated only if a better match is found. */
+static BROTLI_INLINE void FN(FindLongestMatch)(
+    HasherHandle handle, const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data,
+    const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
+    const size_t cur_ix, const size_t max_length, const size_t max_backward,
+    const size_t gap, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  HashLongestMatchQuickly* self = FN(Self)(handle);
+  const size_t best_len_in = out->len;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  int compare_char = data[cur_ix_masked + best_len_in];
+  score_t min_score = out->score;
+  score_t best_score = out->score;
+  size_t best_len = best_len_in;
+  size_t cached_backward = (size_t)distance_cache[0];
+  size_t prev_ix = cur_ix - cached_backward;
+  out->len_code_delta = 0;
+  if (prev_ix < cur_ix) {
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char == data[prev_ix + best_len]) {
+      size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                            &data[cur_ix_masked],
+                                            max_length);
+      if (len >= 4) {
+        const score_t score = BackwardReferenceScoreUsingLastDistance(len);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          out->len = len;
+          out->distance = cached_backward;
+          out->score = best_score;
+          compare_char = data[cur_ix_masked + best_len];
+          if (BUCKET_SWEEP == 1) {
+            self->buckets_[key] = (uint32_t)cur_ix;
+            return;
+          }
+        }
+      }
+    }
+  }
+  if (BUCKET_SWEEP == 1) {
+    size_t backward;
+    size_t len;
+    /* Only one to look for, don't bother to prepare for a loop. */
+    prev_ix = self->buckets_[key];
+    self->buckets_[key] = (uint32_t)cur_ix;
+    backward = cur_ix - prev_ix;
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char != data[prev_ix + best_len_in]) {
+      return;
+    }
+    if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+      return;
+    }
+    len = FindMatchLengthWithLimit(&data[prev_ix],
+                                   &data[cur_ix_masked],
+                                   max_length);
+    if (len >= 4) {
+      const score_t score = BackwardReferenceScore(len, backward);
+      if (best_score < score) {
+        out->len = len;
+        out->distance = backward;
+        out->score = score;
+        return;
+      }
+    }
+  } else {
+    uint32_t* bucket = self->buckets_ + key;
+    int i;
+    prev_ix = *bucket++;
+    for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) {
+      const size_t backward = cur_ix - prev_ix;
+      size_t len;
+      prev_ix &= (uint32_t)ring_buffer_mask;
+      if (compare_char != data[prev_ix + best_len]) {
+        continue;
+      }
+      if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        continue;
+      }
+      len = FindMatchLengthWithLimit(&data[prev_ix],
+                                     &data[cur_ix_masked],
+                                     max_length);
+      if (len >= 4) {
+        const score_t score = BackwardReferenceScore(len, backward);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          out->len = best_len;
+          out->distance = backward;
+          out->score = score;
+          compare_char = data[cur_ix_masked + best_len];
+        }
+      }
+    }
+  }
+  if (USE_DICTIONARY && min_score == out->score) {
+    SearchInStaticDictionary(dictionary,
+        handle, &data[cur_ix_masked], max_length, max_backward + gap,
+        max_distance, out, BROTLI_TRUE);
+  }
+  self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
+}
+
+#undef HASH_MAP_SIZE
+#undef BUCKET_SIZE
+
+#undef HashLongestMatchQuickly
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_rolling_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_rolling_inc.h
new file mode 100755
index 0000000000..17f8a408e2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_rolling_inc.h
@@ -0,0 +1,216 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
+/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
+/* JUMP = skip bytes for speedup */
+
+/* Rolling hash for long distance long string matches. Stores one position
+   per bucket, bucket key is computed over a long region. */
+
+#define HashRolling HASHER()
+
+static const uint32_t FN(kRollingHashMul32) = 69069;
+static const uint32_t FN(kInvalidPos) = 0xffffffff;
+
+/* This hasher uses a longer forward length, but returning a higher value here
+   will hurt compression by the main hasher when combined with a composite
+   hasher. The hasher tests for forward itself instead. */
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* Computes a code from a single byte. A lookup table of 256 values could be
+   used, but simply adding 1 works about as good. */
+static uint32_t FN(HashByte)(uint8_t byte) {
+  return (uint32_t)byte + 1u;
+}
+
+static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
+                                               uint32_t factor) {
+  return (uint32_t)(factor * state + FN(HashByte)(add));
+}
+
+static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
+                                        uint8_t rem, uint32_t factor,
+                                        uint32_t factor_remove) {
+  return (uint32_t)(factor * state +
+      FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
+}
+
+typedef struct HashRolling {
+  uint32_t state;
+  uint32_t* table;
+  size_t next_ix;
+
+  uint32_t chunk_len;
+  uint32_t factor;
+  uint32_t factor_remove;
+} HashRolling;
+
+static BROTLI_INLINE HashRolling* FN(Self)(HasherHandle handle) {
+  return (HashRolling*)&(GetHasherCommon(handle)[1]);
+}
+
+static void FN(Initialize)(
+    HasherHandle handle, const BrotliEncoderParams* params) {
+  HashRolling* self = FN(Self)(handle);
+  size_t i;
+  self->state = 0;
+  self->next_ix = 0;
+
+  self->factor = FN(kRollingHashMul32);
+
+  /* Compute the factor of the oldest byte to remove: factor**steps modulo
+     0xffffffff (the multiplications rely on 32-bit overflow) */
+  self->factor_remove = 1;
+  for (i = 0; i < CHUNKLEN; i += JUMP) {
+    self->factor_remove *= self->factor;
+  }
+
+  self->table = (uint32_t*)((HasherHandle)self + sizeof(HashRolling));
+  for (i = 0; i < NUMBUCKETS; i++) {
+    self->table[i] = FN(kInvalidPos);
+  }
+
+  BROTLI_UNUSED(params);
+}
+
+static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* data) {
+  HashRolling* self = FN(Self)(handle);
+  size_t i;
+  /* Too small size, cannot use this hasher. */
+  if (input_size < CHUNKLEN) return;
+  self->state = 0;
+  for (i = 0; i < CHUNKLEN; i += JUMP) {
+    self->state = FN(HashRollingFunctionInitial)(
+        self->state, data[i], self->factor);
+  }
+  BROTLI_UNUSED(one_shot);
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  return sizeof(HashRolling) + NUMBUCKETS * sizeof(uint32_t);
+  BROTLI_UNUSED(params);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+}
+
+static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
+    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+  BROTLI_UNUSED(handle);
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(mask);
+  BROTLI_UNUSED(ix);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
+    const uint8_t* data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  BROTLI_UNUSED(handle);
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(mask);
+  BROTLI_UNUSED(ix_start);
+  BROTLI_UNUSED(ix_end);
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ring_buffer_mask) {
+  /* In this case we must re-initialize the hasher from scratch from the
+     current position. */
+  HashRolling* self = FN(Self)(handle);
+  size_t position_masked;
+  size_t available = num_bytes;
+  if ((position & (JUMP - 1)) != 0) {
+    size_t diff = JUMP - (position & (JUMP - 1));
+    available = (diff > available) ? 0 : (available - diff);
+    position += diff;
+  }
+  position_masked = position & ring_buffer_mask;
+  /* wrapping around ringbuffer not handled. */
+  if (available > ring_buffer_mask - position_masked) {
+    available = ring_buffer_mask - position_masked;
+  }
+
+  FN(Prepare)(handle, BROTLI_FALSE, available,
+      ringbuffer + (position & ring_buffer_mask));
+  self->next_ix = position;
+  BROTLI_UNUSED(num_bytes);
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+    HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
+  BROTLI_UNUSED(handle);
+  BROTLI_UNUSED(distance_cache);
+}
+
+static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t gap, const size_t max_distance,
+    HasherSearchResult* BROTLI_RESTRICT out) {
+  HashRolling* self = FN(Self)(handle);
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  size_t pos = self->next_ix;
+
+  if ((cur_ix & (JUMP - 1)) != 0) return;
+
+  /* Not enough lookahead */
+  if (max_length < CHUNKLEN) return;
+
+  for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
+    uint32_t code = self->state & MASK;
+
+    uint8_t rem = data[pos & ring_buffer_mask];
+    uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
+    size_t found_ix = FN(kInvalidPos);
+
+    self->state = FN(HashRollingFunction)(
+        self->state, add, rem, self->factor, self->factor_remove);
+
+    if (code < NUMBUCKETS) {
+      found_ix = self->table[code];
+      self->table[code] = (uint32_t)pos;
+      if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
+        /* The cast to 32-bit makes backward distances up to 4GB work even
+           if cur_ix is above 4GB, despite using 32-bit values in the table. */
+        size_t backward = (uint32_t)(cur_ix - found_ix);
+        if (backward <= max_backward) {
+          const size_t found_ix_masked = found_ix & ring_buffer_mask;
+          const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
+                                                      &data[cur_ix_masked],
+                                                      max_length);
+          if (len >= 4 && len > out->len) {
+            score_t score = BackwardReferenceScore(len, backward);
+            if (score > out->score) {
+              out->len = len;
+              out->distance = backward;
+              out->score = score;
+              out->len_code_delta = 0;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  self->next_ix = cur_ix + JUMP;
+
+  /* NOTE: this hasher does not search in the dictionary. It is used as
+     backup-hasher, the main hasher already searches in it. */
+  BROTLI_UNUSED(dictionary);
+  BROTLI_UNUSED(distance_cache);
+  BROTLI_UNUSED(gap);
+  BROTLI_UNUSED(max_distance);
+}
+
+#undef HashRolling
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_to_binary_tree_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_to_binary_tree_inc.h
new file mode 100755
index 0000000000..7fb0356f55
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/hash_to_binary_tree_inc.h
@@ -0,0 +1,328 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, MAX_TREE_COMP_LENGTH,
+                        MAX_TREE_SEARCH_DEPTH */
+
+/* A (forgetful) hash table where each hash bucket contains a binary tree of
+   sequences whose first 4 bytes share the same hash code.
+   Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
+   position in the input data. The binary tree is sorted by the lexicographic
+   order of the sequences, and it is also a max-heap with respect to the
+   starting positions. */
+
+#define HashToBinaryTree HASHER()
+
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
+  return MAX_TREE_COMP_LENGTH;
+}
+
+static uint32_t FN(HashBytes)(const uint8_t* data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - BUCKET_BITS);
+}
+
+typedef struct HashToBinaryTree {
+  /* The window size minus 1 */
+  size_t window_mask_;
+
+  /* Hash table that maps the 4-byte hashes of the sequence to the last
+     position where this hash was found, which is the root of the binary
+     tree of sequences that share this hash bucket. */
+  uint32_t buckets_[BUCKET_SIZE];
+
+  /* A position used to mark a non-existent sequence, i.e. a tree is empty if
+     its root is at invalid_pos_ and a node is a leaf if both its children
+     are at invalid_pos_. */
+  uint32_t invalid_pos_;
+
+  /* --- Dynamic size members --- */
+
+  /* The union of the binary trees of each hash bucket. The root of the tree
+     corresponding to a hash is a sequence starting at buckets_[hash] and
+     the left and right children of a sequence starting at pos are
+     forest_[2 * pos] and forest_[2 * pos + 1]. */
+  /* uint32_t forest[2 * num_nodes] */
+} HashToBinaryTree;
+
+static BROTLI_INLINE HashToBinaryTree* FN(Self)(HasherHandle handle) {
+  return (HashToBinaryTree*)&(GetHasherCommon(handle)[1]);
+}
+
+static BROTLI_INLINE uint32_t* FN(Forest)(HashToBinaryTree* self) {
+  return (uint32_t*)(&self[1]);
+}
+
+static void FN(Initialize)(
+    HasherHandle handle, const BrotliEncoderParams* params) {
+  HashToBinaryTree* self = FN(Self)(handle);
+  self->window_mask_ = (1u << params->lgwin) - 1u;
+  self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
+}
+
+static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
+    size_t input_size, const uint8_t* data) {
+  HashToBinaryTree* self = FN(Self)(handle);
+  uint32_t invalid_pos = self->invalid_pos_;
+  uint32_t i;
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(one_shot);
+  BROTLI_UNUSED(input_size);
+  for (i = 0; i < BUCKET_SIZE; i++) {
+    self->buckets_[i] = invalid_pos;
+  }
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+    size_t input_size) {
+  size_t num_nodes = (size_t)1 << params->lgwin;
+  if (one_shot && input_size < num_nodes) {
+    num_nodes = input_size;
+  }
+  return sizeof(HashToBinaryTree) + 2 * sizeof(uint32_t) * num_nodes;
+}
+
+static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,
+    const size_t pos) {
+  return 2 * (pos & self->window_mask_);
+}
+
+static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,
+    const size_t pos) {
+  return 2 * (pos & self->window_mask_) + 1;
+}
+
+/* Stores the hash of the next 4 bytes and in a single tree-traversal, the
+   hash bucket's binary tree is searched for matches and is re-rooted at the
+   current position.
+
+   If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
+   current position is searched for matches, but the state of the hash table
+   is not changed, since we can not know the final sorting order of the
+   current (incomplete) sequence.
+
+   This function must be called with increasing cur_ix positions. */
+static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
+    HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,
+    const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
+    const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
+    BackwardMatch* BROTLI_RESTRICT matches) {
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  const size_t max_comp_len =
+      BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
+  const BROTLI_BOOL should_reroot_tree =
+      TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
+  const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  uint32_t* forest = FN(Forest)(self);
+  size_t prev_ix = self->buckets_[key];
+  /* The forest index of the rightmost node of the left subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t node_left = FN(LeftChildIndex)(self, cur_ix);
+  /* The forest index of the leftmost node of the right subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t node_right = FN(RightChildIndex)(self, cur_ix);
+  /* The match length of the rightmost node of the left subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t best_len_left = 0;
+  /* The match length of the leftmost node of the right subtree of the new
+     root, updated as we traverse and re-root the tree of the hash bucket. */
+  size_t best_len_right = 0;
+  size_t depth_remaining;
+  if (should_reroot_tree) {
+    self->buckets_[key] = (uint32_t)cur_ix;
+  }
+  for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
+    const size_t backward = cur_ix - prev_ix;
+    const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
+    if (backward == 0 || backward > max_backward || depth_remaining == 0) {
+      if (should_reroot_tree) {
+        forest[node_left] = self->invalid_pos_;
+        forest[node_right] = self->invalid_pos_;
+      }
+      break;
+    }
+    {
+      const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
+      size_t len;
+      BROTLI_DCHECK(cur_len <= MAX_TREE_COMP_LENGTH);
+      len = cur_len +
+          FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
+                                   &data[prev_ix_masked + cur_len],
+                                   max_length - cur_len);
+      BROTLI_DCHECK(
+          0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
+      if (matches && len > *best_len) {
+        *best_len = len;
+        InitBackwardMatch(matches++, backward, len);
+      }
+      if (len >= max_comp_len) {
+        if (should_reroot_tree) {
+          forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];
+          forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];
+        }
+        break;
+      }
+      if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
+        best_len_left = len;
+        if (should_reroot_tree) {
+          forest[node_left] = (uint32_t)prev_ix;
+        }
+        node_left = FN(RightChildIndex)(self, prev_ix);
+        prev_ix = forest[node_left];
+      } else {
+        best_len_right = len;
+        if (should_reroot_tree) {
+          forest[node_right] = (uint32_t)prev_ix;
+        }
+        node_right = FN(LeftChildIndex)(self, prev_ix);
+        prev_ix = forest[node_right];
+      }
+    }
+  }
+  return matches;
+}
+
+/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
+   length of max_length and stores the position cur_ix in the hash table.
+
+   Sets *num_matches to the number of matches found, and stores the found
+   matches in matches[0] to matches[*num_matches - 1]. The matches will be
+   sorted by strictly increasing length and (non-strictly) increasing
+   distance. */
+static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
+    const BrotliEncoderDictionary* dictionary, const uint8_t* data,
+    const size_t ring_buffer_mask, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    const size_t gap, const BrotliEncoderParams* params,
+    BackwardMatch* matches) {
+  BackwardMatch* const orig_matches = matches;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  size_t best_len = 1;
+  const size_t short_match_max_backward =
+      params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
+  size_t stop = cur_ix - short_match_max_backward;
+  uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
+  size_t i;
+  if (cur_ix < short_match_max_backward) { stop = 0; }
+  for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
+    size_t prev_ix = i;
+    const size_t backward = cur_ix - prev_ix;
+    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
+      break;
+    }
+    prev_ix &= ring_buffer_mask;
+    if (data[cur_ix_masked] != data[prev_ix] ||
+        data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+      continue;
+    }
+    {
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                   max_length);
+      if (len > best_len) {
+        best_len = len;
+        InitBackwardMatch(matches++, backward, len);
+      }
+    }
+  }
+  if (best_len < max_length) {
+    matches = FN(StoreAndFindMatches)(FN(Self)(handle), data, cur_ix,
+        ring_buffer_mask, max_length, max_backward, &best_len, matches);
+  }
+  for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
+    dict_matches[i] = kInvalidMatch;
+  }
+  {
+    size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
+    if (BrotliFindAllStaticDictionaryMatches(dictionary,
+        &data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {
+      size_t maxlen = BROTLI_MIN(
+          size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
+      size_t l;
+      for (l = minlen; l <= maxlen; ++l) {
+        uint32_t dict_id = dict_matches[l];
+        if (dict_id < kInvalidMatch) {
+          size_t distance = max_backward + gap + (dict_id >> 5) + 1;
+          if (distance <= params->dist.max_distance) {
+            InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
+          }
+        }
+      }
+    }
+  }
+  return (size_t)(matches - orig_matches);
+}
+
+/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
+   current sequence, without returning any matches.
+   REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
+static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
+    const size_t mask, const size_t ix) {
+  HashToBinaryTree* self = FN(Self)(handle);
+  /* Maximum distance is window size - 16, see section 9.1. of the spec. */
+  const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
+  FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
+      max_backward, NULL, NULL);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
+    const uint8_t* data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i = ix_start;
+  size_t j = ix_start;
+  if (ix_start + 63 <= ix_end) {
+    i = ix_end - 63;
+  }
+  if (ix_start + 512 <= i) {
+    for (; j < i; j += 8) {
+      FN(Store)(handle, data, mask, j);
+    }
+  }
+  for (; i < ix_end; ++i) {
+    FN(Store)(handle, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  HashToBinaryTree* self = FN(Self)(handle);
+  if (num_bytes >= FN(HashTypeLength)() - 1 &&
+      position >= MAX_TREE_COMP_LENGTH) {
+    /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
+    const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
+    size_t i;
+    for (i = i_start; i < i_end; ++i) {
+      /* Maximum distance is window size - 16, see section 9.1. of the spec.
+         Furthermore, we have to make sure that we don't look further back
+         from the start of the next block than the window size, otherwise we
+         could access already overwritten areas of the ring-buffer. */
+      const size_t max_backward =
+          self->window_mask_ - BROTLI_MAX(size_t,
+                                          BROTLI_WINDOW_GAP - 1,
+                                          position - i);
+      /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
+         end of the current block and that we have at least
+         MAX_TREE_COMP_LENGTH tail in the ring-buffer. */
+      FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
+          MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
+    }
+  }
+}
+
+#undef BUCKET_SIZE
+
+#undef HashToBinaryTree
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram.c
new file mode 100755
index 0000000000..6da2ff6bb4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram.c
@@ -0,0 +1,100 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Build per-context histograms of literals, commands and distance codes. */
+
+#include "./histogram.h"
+
+#include "../common/context.h"
+#include "./block_splitter.h"
+#include "./command.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BlockSplitIterator {
+  const BlockSplit* split_;  /* Not owned. */
+  size_t idx_;
+  size_t type_;
+  size_t length_;
+} BlockSplitIterator;
+
+static void InitBlockSplitIterator(BlockSplitIterator* self,
+    const BlockSplit* split) {
+  self->split_ = split;
+  self->idx_ = 0;
+  self->type_ = 0;
+  self->length_ = split->lengths ? split->lengths[0] : 0;
+}
+
+static void BlockSplitIteratorNext(BlockSplitIterator* self) {
+  if (self->length_ == 0) {
+    ++self->idx_;
+    self->type_ = self->split_->types[self->idx_];
+    self->length_ = self->split_->lengths[self->idx_];
+  }
+  --self->length_;
+}
+
+void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms) {
+  size_t pos = start_pos;
+  BlockSplitIterator literal_it;
+  BlockSplitIterator insert_and_copy_it;
+  BlockSplitIterator dist_it;
+  size_t i;
+
+  InitBlockSplitIterator(&literal_it, literal_split);
+  InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
+  InitBlockSplitIterator(&dist_it, dist_split);
+  for (i = 0; i < num_commands; ++i) {
+    const Command* cmd = &cmds[i];
+    size_t j;
+    BlockSplitIteratorNext(&insert_and_copy_it);
+    HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
+        cmd->cmd_prefix_);
+    /* TODO: unwrap iterator blocks. */
+    for (j = cmd->insert_len_; j != 0; --j) {
+      size_t context;
+      BlockSplitIteratorNext(&literal_it);
+      context = literal_it.type_;
+      if (context_modes) {
+        ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
+        context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
+            BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
+      }
+      HistogramAddLiteral(&literal_histograms[context],
+          ringbuffer[pos & mask]);
+      prev_byte2 = prev_byte;
+      prev_byte = ringbuffer[pos & mask];
+      ++pos;
+    }
+    pos += CommandCopyLen(cmd);
+    if (CommandCopyLen(cmd)) {
+      prev_byte2 = ringbuffer[(pos - 2) & mask];
+      prev_byte = ringbuffer[(pos - 1) & mask];
+      if (cmd->cmd_prefix_ >= 128) {
+        size_t context;
+        BlockSplitIteratorNext(&dist_it);
+        context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
+            CommandDistanceContext(cmd);
+        HistogramAddDistance(&copy_dist_histograms[context],
+            cmd->dist_prefix_ & 0x3FF);
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram.h
new file mode 100755
index 0000000000..42af3c3f9d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram.h
@@ -0,0 +1,63 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Models the histograms of literals, commands and distance codes. */
+
+#ifndef BROTLI_ENC_HISTOGRAM_H_
+#define BROTLI_ENC_HISTOGRAM_H_
+
+#include <string.h>  /* memset */
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./block_splitter.h"
+#include "./command.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
+#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
+
+#define FN(X) X ## Literal
+#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
+#define DataType uint8_t
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN
+
+#define FN(X) X ## Command
+#define DataType uint16_t
+#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DATA_SIZE
+#undef FN
+
+#define FN(X) X ## Distance
+#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN
+
+BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_HISTOGRAM_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram_inc.h
new file mode 100755
index 0000000000..50eaf7468d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/histogram_inc.h
@@ -0,0 +1,51 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: Histogram, DATA_SIZE, DataType */
+
+/* A simple container for histograms of data in blocks. */
+
+typedef struct FN(Histogram) {
+  uint32_t data_[DATA_SIZE];
+  size_t total_count_;
+  double bit_cost_;
+} FN(Histogram);
+
+static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
+  memset(self->data_, 0, sizeof(self->data_));
+  self->total_count_ = 0;
+  self->bit_cost_ = HUGE_VAL;
+}
+
+static BROTLI_INLINE void FN(ClearHistograms)(
+    FN(Histogram)* array, size_t length) {
+  size_t i;
+  for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
+}
+
+static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
+  ++self->data_[val];
+  ++self->total_count_;
+}
+
+static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
+    const DataType* p, size_t n) {
+  self->total_count_ += n;
+  n += 1;
+  while (--n) ++self->data_[*p++];
+}
+
+static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
+    const FN(Histogram)* v) {
+  size_t i;
+  self->total_count_ += v->total_count_;
+  for (i = 0; i < DATA_SIZE; ++i) {
+    self->data_[i] += v->data_[i];
+  }
+}
+
+static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/literal_cost.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/literal_cost.c
new file mode 100755
index 0000000000..c231100e34
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/literal_cost.c
@@ -0,0 +1,175 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Literal cost model to allow backward reference replacement to be efficient.
+*/
+
+#include "./literal_cost.h"
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+#include "./utf8_util.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
+  if (c < 128) {
+    return 0;  /* Next one is the 'Byte 1' again. */
+  } else if (c >= 192) {  /* Next one is the 'Byte 2' of utf-8 encoding. */
+    return BROTLI_MIN(size_t, 1, clamp);
+  } else {
+    /* Let's decide over the last byte if this ends the sequence. */
+    if (last < 0xE0) {
+      return 0;  /* Completed two or three byte coding. */
+    } else {  /* Next one is the 'Byte 3' of utf-8 encoding. */
+      return BROTLI_MIN(size_t, 2, clamp);
+    }
+  }
+}
+
+static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
+                                        const uint8_t* data) {
+  size_t counts[3] = { 0 };
+  size_t max_utf8 = 1;  /* should be 2, but 1 compresses better. */
+  size_t last_c = 0;
+  size_t i;
+  for (i = 0; i < len; ++i) {
+    size_t c = data[(pos + i) & mask];
+    ++counts[UTF8Position(last_c, c, 2)];
+    last_c = c;
+  }
+  if (counts[2] < 500) {
+    max_utf8 = 1;
+  }
+  if (counts[1] + counts[2] < 25) {
+    max_utf8 = 0;
+  }
+  return max_utf8;
+}
+
+static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
+                                            const uint8_t* data, float* cost) {
+  /* max_utf8 is 0 (normal ASCII single byte modeling),
+     1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
+  const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
+  size_t histogram[3][256] = { { 0 } };
+  size_t window_half = 495;
+  size_t in_window = BROTLI_MIN(size_t, window_half, len);
+  size_t in_window_utf8[3] = { 0 };
+
+  size_t i;
+  {  /* Bootstrap histograms. */
+    size_t last_c = 0;
+    size_t utf8_pos = 0;
+    for (i = 0; i < in_window; ++i) {
+      size_t c = data[(pos + i) & mask];
+      ++histogram[utf8_pos][c];
+      ++in_window_utf8[utf8_pos];
+      utf8_pos = UTF8Position(last_c, c, max_utf8);
+      last_c = c;
+    }
+  }
+
+  /* Compute bit costs with sliding window. */
+  for (i = 0; i < len; ++i) {
+    if (i >= window_half) {
+      /* Remove a byte in the past. */
+      size_t c =
+          i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
+      size_t last_c =
+          i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
+      --in_window_utf8[utf8_pos2];
+    }
+    if (i + window_half < len) {
+      /* Add a byte in the future. */
+      size_t c = data[(pos + i + window_half - 1) & mask];
+      size_t last_c = data[(pos + i + window_half - 2) & mask];
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
+      ++in_window_utf8[utf8_pos2];
+    }
+    {
+      size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
+      size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
+      size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
+      size_t masked_pos = (pos + i) & mask;
+      size_t histo = histogram[utf8_pos][data[masked_pos]];
+      double lit_cost;
+      if (histo == 0) {
+        histo = 1;
+      }
+      lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
+      lit_cost += 0.02905;
+      if (lit_cost < 1.0) {
+        lit_cost *= 0.5;
+        lit_cost += 0.5;
+      }
+      /* Make the first bytes more expensive -- seems to help, not sure why.
+         Perhaps because the entropy source is changing its properties
+         rapidly in the beginning of the file, perhaps because the beginning
+         of the data is a statistical "anomaly". */
+      if (i < 2000) {
+        lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
+      }
+      cost[i] = (float)lit_cost;
+    }
+  }
+}
+
+void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+                                       const uint8_t* data, float* cost) {
+  if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
+    EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
+    return;
+  } else {
+    size_t histogram[256] = { 0 };
+    size_t window_half = 2000;
+    size_t in_window = BROTLI_MIN(size_t, window_half, len);
+
+    /* Bootstrap histogram. */
+    size_t i;
+    for (i = 0; i < in_window; ++i) {
+      ++histogram[data[(pos + i) & mask]];
+    }
+
+    /* Compute bit costs with sliding window. */
+    for (i = 0; i < len; ++i) {
+      size_t histo;
+      if (i >= window_half) {
+        /* Remove a byte in the past. */
+        --histogram[data[(pos + i - window_half) & mask]];
+        --in_window;
+      }
+      if (i + window_half < len) {
+        /* Add a byte in the future. */
+        ++histogram[data[(pos + i + window_half) & mask]];
+        ++in_window;
+      }
+      histo = histogram[data[(pos + i) & mask]];
+      if (histo == 0) {
+        histo = 1;
+      }
+      {
+        double lit_cost = FastLog2(in_window) - FastLog2(histo);
+        lit_cost += 0.029;
+        if (lit_cost < 1.0) {
+          lit_cost *= 0.5;
+          lit_cost += 0.5;
+        }
+        cost[i] = (float)lit_cost;
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/literal_cost.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/literal_cost.h
new file mode 100755
index 0000000000..8f53f39d3f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/literal_cost.h
@@ -0,0 +1,30 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Literal cost model to allow backward reference replacement to be efficient.
+*/
+
+#ifndef BROTLI_ENC_LITERAL_COST_H_
+#define BROTLI_ENC_LITERAL_COST_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Estimates how many bits the literals in the interval [pos, pos + len) in the
+   ring-buffer (data, mask) will take entropy coded and writes these estimates
+   to the cost[0..len) array. */
+BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
+    size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_LITERAL_COST_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/memory.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/memory.c
new file mode 100755
index 0000000000..f6ed7e3cb7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/memory.c
@@ -0,0 +1,170 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#include "./memory.h"
+
+#include <stdlib.h>  /* exit, free, malloc */
+#include <string.h>  /* memcpy */
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_PERM_ALLOCATED 128
+#define MAX_NEW_ALLOCATED 64
+#define MAX_NEW_FREED 64
+
+#define PERM_ALLOCATED_OFFSET 0
+#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
+#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
+
+void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque) {
+  if (!alloc_func) {
+    m->alloc_func = BrotliDefaultAllocFunc;
+    m->free_func = BrotliDefaultFreeFunc;
+    m->opaque = 0;
+  } else {
+    m->alloc_func = alloc_func;
+    m->free_func = free_func;
+    m->opaque = opaque;
+  }
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  m->is_oom = BROTLI_FALSE;
+  m->perm_allocated = 0;
+  m->new_allocated = 0;
+  m->new_freed = 0;
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) exit(EXIT_FAILURE);
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  m->free_func(m->opaque, p);
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  BROTLI_UNUSED(m);
+}
+
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+static void SortPointers(void** items, const size_t n) {
+  /* Shell sort. */
+  static const size_t gaps[] = {23, 10, 4, 1};
+  int g = 0;
+  for (; g < 4; ++g) {
+    size_t gap = gaps[g];
+    size_t i;
+    for (i = gap; i < n; ++i) {
+      size_t j = i;
+      void* tmp = items[i];
+      for (; j >= gap && tmp < items[j - gap]; j -= gap) {
+        items[j] = items[j - gap];
+      }
+      items[j] = tmp;
+    }
+  }
+}
+
+static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
+  size_t a_read_index = 0;
+  size_t b_read_index = 0;
+  size_t a_write_index = 0;
+  size_t b_write_index = 0;
+  size_t annihilated = 0;
+  while (a_read_index < a_len && b_read_index < b_len) {
+    if (a[a_read_index] == b[b_read_index]) {
+      a_read_index++;
+      b_read_index++;
+      annihilated++;
+    } else if (a[a_read_index] < b[b_read_index]) {
+      a[a_write_index++] = a[a_read_index++];
+    } else {
+      b[b_write_index++] = b[b_read_index++];
+    }
+  }
+  while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
+  while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
+  return annihilated;
+}
+
+static void CollectGarbagePointers(MemoryManager* m) {
+  size_t annihilated;
+  SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
+  SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  annihilated = Annihilate(
+      m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
+      m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  m->new_allocated -= annihilated;
+  m->new_freed -= annihilated;
+
+  if (m->new_freed != 0) {
+    annihilated = Annihilate(
+        m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
+        m->pointers + NEW_FREED_OFFSET, m->new_freed);
+    m->perm_allocated -= annihilated;
+    m->new_freed -= annihilated;
+    BROTLI_DCHECK(m->new_freed == 0);
+  }
+
+  if (m->new_allocated != 0) {
+    BROTLI_DCHECK(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
+    memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
+           m->pointers + NEW_ALLOCATED_OFFSET,
+           sizeof(void*) * m->new_allocated);
+    m->perm_allocated += m->new_allocated;
+    m->new_allocated = 0;
+    SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
+  }
+}
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) {
+    m->is_oom = BROTLI_TRUE;
+    return NULL;
+  }
+  if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
+  m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  if (!p) return;
+  m->free_func(m->opaque, p);
+  if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
+  m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  size_t i;
+  CollectGarbagePointers(m);
+  /* Now all unfreed pointers are in perm-allocated list. */
+  for (i = 0; i < m->perm_allocated; ++i) {
+    m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
+  }
+  m->perm_allocated = 0;
+}
+
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/memory.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/memory.h
new file mode 100755
index 0000000000..ab928d019b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/memory.h
@@ -0,0 +1,102 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for memory management. */
+
+#ifndef BROTLI_ENC_MEMORY_H_
+#define BROTLI_ENC_MEMORY_H_
+
+#include <string.h>  /* memcpy */
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
+    !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_ENCODER_EXIT_ON_OOM
+#endif
+
+typedef struct MemoryManager {
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* opaque;
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  BROTLI_BOOL is_oom;
+  size_t perm_allocated;
+  size_t new_allocated;
+  size_t new_freed;
+  void* pointers[256];
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+} MemoryManager;
+
+BROTLI_INTERNAL void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque);
+
+BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
+#define BROTLI_ALLOC(M, T, N)                               \
+  ((N) > 0 ? ((T*)BrotliAllocate((M), (N) * sizeof(T))) : NULL)
+
+BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
+#define BROTLI_FREE(M, P) { \
+  BrotliFree((M), (P));     \
+  P = NULL;                 \
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_IS_OOM(M) (!!0)
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+#define BROTLI_IS_OOM(M) (!!(M)->is_oom)
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
+
+/*
+Dynamically grows array capacity to at least the requested size
+M: MemoryManager
+T: data type
+A: array
+C: capacity
+R: requested size
+*/
+#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) {  \
+  if (C < (R)) {                                 \
+    size_t _new_size = (C == 0) ? (R) : C;       \
+    T* new_array;                                \
+    while (_new_size < (R)) _new_size *= 2;      \
+    new_array = BROTLI_ALLOC((M), T, _new_size); \
+    if (!BROTLI_IS_OOM(M) && C != 0)             \
+      memcpy(new_array, A, C * sizeof(T));       \
+    BROTLI_FREE((M), A);                         \
+    A = new_array;                               \
+    C = _new_size;                               \
+  }                                              \
+}
+
+/*
+Appends value and dynamically grows array capacity when needed
+M: MemoryManager
+T: data type
+A: array
+C: array capacity
+S: array size
+V: value to append
+*/
+#define BROTLI_ENSURE_CAPACITY_APPEND(M, T, A, C, S, V) { \
+  (S)++;                                                  \
+  BROTLI_ENSURE_CAPACITY(M, T, A, C, S);                  \
+  A[(S) - 1] = (V);                                       \
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_MEMORY_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock.c
new file mode 100755
index 0000000000..4e80044f31
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock.c
@@ -0,0 +1,667 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#include "./metablock.h"
+
+#include "../common/constants.h"
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./bit_cost.h"
+#include "./block_splitter.h"
+#include "./cluster.h"
+#include "./entropy_encode.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliInitDistanceParams(BrotliEncoderParams* params,
+    uint32_t npostfix, uint32_t ndirect) {
+  BrotliDistanceParams* dist_params = &params->dist;
+  uint32_t alphabet_size, max_distance;
+
+  dist_params->distance_postfix_bits = npostfix;
+  dist_params->num_direct_distance_codes = ndirect;
+
+  alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE(
+      npostfix, ndirect, BROTLI_MAX_DISTANCE_BITS);
+  max_distance = ndirect + (1U << (BROTLI_MAX_DISTANCE_BITS + npostfix + 2)) -
+      (1U << (npostfix + 2));
+
+  if (params->large_window) {
+    static const uint32_t bound[BROTLI_MAX_NPOSTFIX + 1] = {0, 4, 12, 28};
+    uint32_t postfix = 1U << npostfix;
+    alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE(
+        npostfix, ndirect, BROTLI_LARGE_MAX_DISTANCE_BITS);
+    /* The maximum distance is set so that no distance symbol used can encode
+       a distance larger than BROTLI_MAX_ALLOWED_DISTANCE with all
+       its extra bits set. */
+    if (ndirect < bound[npostfix]) {
+      max_distance = BROTLI_MAX_ALLOWED_DISTANCE - (bound[npostfix] - ndirect);
+    } else if (ndirect >= bound[npostfix] + postfix) {
+      max_distance = (3U << 29) - 4 + (ndirect - bound[npostfix]);
+    } else {
+      max_distance = BROTLI_MAX_ALLOWED_DISTANCE;
+    }
+  }
+
+  dist_params->alphabet_size = alphabet_size;
+  dist_params->max_distance = max_distance;
+}
+
+static void RecomputeDistancePrefixes(Command* cmds,
+                                      size_t num_commands,
+                                      const BrotliDistanceParams* orig_params,
+                                      const BrotliDistanceParams* new_params) {
+  size_t i;
+
+  if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
+      orig_params->num_direct_distance_codes ==
+      new_params->num_direct_distance_codes) {
+    return;
+  }
+
+  for (i = 0; i < num_commands; ++i) {
+    Command* cmd = &cmds[i];
+    if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+      PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd, orig_params),
+                               new_params->num_direct_distance_codes,
+                               new_params->distance_postfix_bits,
+                               &cmd->dist_prefix_,
+                               &cmd->dist_extra_);
+    }
+  }
+}
+
+static BROTLI_BOOL ComputeDistanceCost(const Command* cmds,
+                                       size_t num_commands,
+                                       const BrotliDistanceParams* orig_params,
+                                       const BrotliDistanceParams* new_params,
+                                       double* cost) {
+  size_t i;
+  BROTLI_BOOL equal_params = BROTLI_FALSE;
+  uint16_t dist_prefix;
+  uint32_t dist_extra;
+  double extra_bits = 0.0;
+  HistogramDistance histo;
+  HistogramClearDistance(&histo);
+
+  if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
+      orig_params->num_direct_distance_codes ==
+      new_params->num_direct_distance_codes) {
+    equal_params = BROTLI_TRUE;
+  }
+
+  for (i = 0; i < num_commands; i++) {
+    const Command* cmd = &cmds[i];
+    if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+      if (equal_params) {
+        dist_prefix = cmd->dist_prefix_;
+      } else {
+        uint32_t distance = CommandRestoreDistanceCode(cmd, orig_params);
+        if (distance > new_params->max_distance) {
+          return BROTLI_FALSE;
+        }
+        PrefixEncodeCopyDistance(distance,
+                                 new_params->num_direct_distance_codes,
+                                 new_params->distance_postfix_bits,
+                                 &dist_prefix,
+                                 &dist_extra);
+      }
+      HistogramAddDistance(&histo, dist_prefix & 0x3FF);
+      extra_bits += dist_prefix >> 10;
+    }
+  }
+
+  *cost = BrotliPopulationCostDistance(&histo) + extra_bits;
+  return BROTLI_TRUE;
+}
+
+void BrotliBuildMetaBlock(MemoryManager* m,
+                          const uint8_t* ringbuffer,
+                          const size_t pos,
+                          const size_t mask,
+                          BrotliEncoderParams* params,
+                          uint8_t prev_byte,
+                          uint8_t prev_byte2,
+                          Command* cmds,
+                          size_t num_commands,
+                          ContextType literal_context_mode,
+                          MetaBlockSplit* mb) {
+  /* Histogram ids need to fit in one byte. */
+  static const size_t kMaxNumberOfHistograms = 256;
+  HistogramDistance* distance_histograms;
+  HistogramLiteral* literal_histograms;
+  ContextType* literal_context_modes = NULL;
+  size_t literal_histograms_size;
+  size_t distance_histograms_size;
+  size_t i;
+  size_t literal_context_multiplier = 1;
+  uint32_t npostfix;
+  uint32_t ndirect_msb = 0;
+  BROTLI_BOOL check_orig = BROTLI_TRUE;
+  double best_dist_cost = 1e99;
+  BrotliEncoderParams orig_params = *params;
+  BrotliEncoderParams new_params = *params;
+
+  for (npostfix = 0; npostfix <= BROTLI_MAX_NPOSTFIX; npostfix++) {
+    for (; ndirect_msb < 16; ndirect_msb++) {
+      uint32_t ndirect = ndirect_msb << npostfix;
+      BROTLI_BOOL skip;
+      double dist_cost;
+      BrotliInitDistanceParams(&new_params, npostfix, ndirect);
+      if (npostfix == orig_params.dist.distance_postfix_bits &&
+          ndirect == orig_params.dist.num_direct_distance_codes) {
+        check_orig = BROTLI_FALSE;
+      }
+      skip = !ComputeDistanceCost(
+          cmds, num_commands,
+          &orig_params.dist, &new_params.dist, &dist_cost);
+      if (skip || (dist_cost > best_dist_cost)) {
+        break;
+      }
+      best_dist_cost = dist_cost;
+      params->dist = new_params.dist;
+    }
+    if (ndirect_msb > 0) ndirect_msb--;
+    ndirect_msb /= 2;
+  }
+  if (check_orig) {
+    double dist_cost;
+    ComputeDistanceCost(cmds, num_commands,
+                        &orig_params.dist, &orig_params.dist, &dist_cost);
+    if (dist_cost < best_dist_cost) {
+      /* NB: currently unused; uncomment when more param tuning is added. */
+      /* best_dist_cost = dist_cost; */
+      params->dist = orig_params.dist;
+    }
+  }
+  RecomputeDistancePrefixes(cmds, num_commands,
+                            &orig_params.dist, &params->dist);
+
+  BrotliSplitBlock(m, cmds, num_commands,
+                   ringbuffer, pos, mask, params,
+                   &mb->literal_split,
+                   &mb->command_split,
+                   &mb->distance_split);
+  if (BROTLI_IS_OOM(m)) return;
+
+  if (!params->disable_literal_context_modeling) {
+    literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS;
+    literal_context_modes =
+        BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < mb->literal_split.num_types; ++i) {
+      literal_context_modes[i] = literal_context_mode;
+    }
+  }
+
+  literal_histograms_size =
+      mb->literal_split.num_types * literal_context_multiplier;
+  literal_histograms =
+      BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  ClearHistogramsLiteral(literal_histograms, literal_histograms_size);
+
+  distance_histograms_size =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  ClearHistogramsDistance(distance_histograms, distance_histograms_size);
+
+  BROTLI_DCHECK(mb->command_histograms == 0);
+  mb->command_histograms_size = mb->command_split.num_types;
+  mb->command_histograms =
+      BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
+
+  BrotliBuildHistogramsWithContext(cmds, num_commands,
+      &mb->literal_split, &mb->command_split, &mb->distance_split,
+      ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
+      literal_histograms, mb->command_histograms, distance_histograms);
+  BROTLI_FREE(m, literal_context_modes);
+
+  BROTLI_DCHECK(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  BROTLI_DCHECK(mb->literal_histograms == 0);
+  mb->literal_histograms_size = mb->literal_context_map_size;
+  mb->literal_histograms =
+      BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size,
+      kMaxNumberOfHistograms, mb->literal_histograms,
+      &mb->literal_histograms_size, mb->literal_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, literal_histograms);
+
+  if (params->disable_literal_context_modeling) {
+    /* Distribute assignment to all contexts. */
+    for (i = mb->literal_split.num_types; i != 0;) {
+      size_t j = 0;
+      i--;
+      for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) {
+        mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
+            mb->literal_context_map[i];
+      }
+    }
+  }
+
+  BROTLI_DCHECK(mb->distance_context_map == 0);
+  mb->distance_context_map_size =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  mb->distance_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  BROTLI_DCHECK(mb->distance_histograms == 0);
+  mb->distance_histograms_size = mb->distance_context_map_size;
+  mb->distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  BrotliClusterHistogramsDistance(m, distance_histograms,
+                                  mb->distance_context_map_size,
+                                  kMaxNumberOfHistograms,
+                                  mb->distance_histograms,
+                                  &mb->distance_histograms_size,
+                                  mb->distance_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, distance_histograms);
+}
+
+#define FN(X) X ## Literal
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define BROTLI_MAX_STATIC_CONTEXTS 13
+
+/* Greedy block splitter for one block category (literal, command or distance).
+   Gathers histograms for all context buckets. */
+typedef struct ContextBlockSplitter {
+  /* Alphabet size of particular block category. */
+  size_t alphabet_size_;
+  size_t num_contexts_;
+  size_t max_block_types_;
+  /* We collect at least this many symbols for each block. */
+  size_t min_block_size_;
+  /* We merge histograms A and B if
+       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+     where A is the current histogram and B is the histogram of the last or the
+     second last block type. */
+  double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  /* not owned */
+  HistogramLiteral* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */
+
+  /* The number of symbols that we want to collect before deciding on whether
+     or not to merge the block with a previous one or emit a new block. */
+  size_t target_block_size_;
+  /* The number of symbols in the current histogram. */
+  size_t block_size_;
+  /* Offset of the current histogram. */
+  size_t curr_histogram_ix_;
+  /* Offset of the histograms of the previous two block types. */
+  size_t last_histogram_ix_[2];
+  /* Entropy of the previous two block types. */
+  double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS];
+  /* The number of times we merged the current block with the last one. */
+  size_t merge_last_count_;
+} ContextBlockSplitter;
+
+static void InitContextBlockSplitter(
+    MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
+    size_t num_contexts, size_t min_block_size, double split_threshold,
+    size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
+    size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  size_t max_num_types;
+  BROTLI_DCHECK(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS);
+
+  self->alphabet_size_ = alphabet_size;
+  self->num_contexts_ = num_contexts;
+  self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;
+
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  split->num_blocks = max_num_blocks;
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_DCHECK(*histograms == 0);
+  *histograms_size = max_num_types * num_contexts;
+  *histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m)) return;
+  /* Clear only current histogram. */
+  ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
+}
+
+/* Does either of three things:
+     (1) emits the current block with a new block type;
+     (2) emits the current block with the type of the second last block;
+     (3) merges the current block with the last block. */
+static void ContextBlockSplitterFinishBlock(
+    ContextBlockSplitter* self, MemoryManager* m, BROTLI_BOOL is_final) {
+  BlockSplit* split = self->split_;
+  const size_t num_contexts = self->num_contexts_;
+  double* last_entropy = self->last_entropy_;
+  HistogramLiteral* histograms = self->histograms_;
+
+  if (self->block_size_ < self->min_block_size_) {
+    self->block_size_ = self->min_block_size_;
+  }
+  if (self->num_blocks_ == 0) {
+    size_t i;
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+
+    for (i = 0; i < num_contexts; ++i) {
+      last_entropy[i] =
+          BitsEntropy(histograms[i].data_, self->alphabet_size_);
+      last_entropy[num_contexts + i] = last_entropy[i];
+    }
+    ++self->num_blocks_;
+    ++split->num_types;
+    self->curr_histogram_ix_ += num_contexts;
+    if (self->curr_histogram_ix_ < *self->histograms_size_) {
+      ClearHistogramsLiteral(
+          &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+    }
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
+    /* Try merging the set of histograms for the current block type with the
+       respective set of histograms for the last and second last block types.
+       Decide over the split based on the total reduction of entropy across
+       all contexts. */
+    double entropy[BROTLI_MAX_STATIC_CONTEXTS];
+    HistogramLiteral* combined_histo =
+        BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
+    double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS];
+    double diff[2] = { 0.0 };
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < num_contexts; ++i) {
+      size_t curr_histo_ix = self->curr_histogram_ix_ + i;
+      size_t j;
+      entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
+                               self->alphabet_size_);
+      for (j = 0; j < 2; ++j) {
+        size_t jx = j * num_contexts + i;
+        size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
+        combined_histo[jx] = histograms[curr_histo_ix];
+        HistogramAddHistogramLiteral(&combined_histo[jx],
+            &histograms[last_histogram_ix]);
+        combined_entropy[jx] = BitsEntropy(
+            &combined_histo[jx].data_[0], self->alphabet_size_);
+        diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
+      }
+    }
+
+    if (split->num_types < self->max_block_types_ &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = split->num_types * num_contexts;
+      for (i = 0; i < num_contexts; ++i) {
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = entropy[i];
+      }
+      ++self->num_blocks_;
+      ++split->num_types;
+      self->curr_histogram_ix_ += num_contexts;
+      if (self->curr_histogram_ix_ < *self->histograms_size_) {
+        ClearHistogramsLiteral(
+            &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+      }
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] =
+            combined_histo[num_contexts + i];
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = combined_entropy[num_contexts + i];
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
+        last_entropy[i] = combined_entropy[i];
+        if (split->num_types == 1) {
+          last_entropy[num_contexts + i] = last_entropy[i];
+        }
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      self->block_size_ = 0;
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
+      }
+    }
+    BROTLI_FREE(m, combined_histo);
+  }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types * num_contexts;
+    split->num_blocks = self->num_blocks_;
+  }
+}
+
+/* Adds the next symbol to the current block type and context. When the
+   current block reaches the target size, decides on merging the block. */
+static void ContextBlockSplitterAddSymbol(
+    ContextBlockSplitter* self, MemoryManager* m,
+    size_t symbol, size_t context) {
+  HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
+      symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    ContextBlockSplitterFinishBlock(self, m, /* is_final = */ BROTLI_FALSE);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+}
+
+static void MapStaticContexts(MemoryManager* m,
+                              size_t num_contexts,
+                              const uint32_t* static_context_map,
+                              MetaBlockSplit* mb) {
+  size_t i;
+  BROTLI_DCHECK(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    uint32_t offset = (uint32_t)(i * num_contexts);
+    size_t j;
+    for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
+      mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
+          offset + static_context_map[j];
+    }
+  }
+}
+
+static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
+    MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
+    const size_t num_contexts, const uint32_t* static_context_map,
+    const Command* commands, size_t n_commands, MetaBlockSplit* mb) {
+  union {
+    BlockSplitterLiteral plain;
+    ContextBlockSplitter ctx;
+  } lit_blocks;
+  BlockSplitterCommand cmd_blocks;
+  BlockSplitterDistance dist_blocks;
+  size_t num_literals = 0;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    num_literals += commands[i].insert_len_;
+  }
+
+  if (num_contexts == 1) {
+    InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0,
+        num_literals, &mb->literal_split, &mb->literal_histograms,
+        &mb->literal_histograms_size);
+  } else {
+    InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0,
+        num_literals, &mb->literal_split, &mb->literal_histograms,
+        &mb->literal_histograms_size);
+  }
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
+      500.0, n_commands, &mb->command_split, &mb->command_histograms,
+      &mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms,
+      &mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t j;
+    BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      uint8_t literal = ringbuffer[pos & mask];
+      if (num_contexts == 1) {
+        BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
+      } else {
+        size_t context =
+            BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
+        ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
+                                      static_context_map[context]);
+        if (BROTLI_IS_OOM(m)) return;
+      }
+      prev_byte2 = prev_byte;
+      prev_byte = literal;
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd)) {
+      prev_byte2 = ringbuffer[(pos - 2) & mask];
+      prev_byte = ringbuffer[(pos - 1) & mask];
+      if (cmd.cmd_prefix_ >= 128) {
+        BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF);
+      }
+    }
+  }
+
+  if (num_contexts == 1) {
+    BlockSplitterFinishBlockLiteral(
+        &lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
+  } else {
+    ContextBlockSplitterFinishBlock(
+        &lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+  BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
+  BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
+
+  if (num_contexts > 1) {
+    MapStaticContexts(m, num_contexts, static_context_map, mb);
+  }
+}
+
+void BrotliBuildMetaBlockGreedy(MemoryManager* m,
+                                const uint8_t* ringbuffer,
+                                size_t pos,
+                                size_t mask,
+                                uint8_t prev_byte,
+                                uint8_t prev_byte2,
+                                ContextLut literal_context_lut,
+                                size_t num_contexts,
+                                const uint32_t* static_context_map,
+                                const Command* commands,
+                                size_t n_commands,
+                                MetaBlockSplit* mb) {
+  if (num_contexts == 1) {
+    BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
+        prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb);
+  } else {
+    BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
+        prev_byte2, literal_context_lut, num_contexts, static_context_map,
+        commands, n_commands, mb);
+  }
+}
+
+void BrotliOptimizeHistograms(uint32_t num_distance_codes,
+                              MetaBlockSplit* mb) {
+  uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
+  size_t i;
+  for (i = 0; i < mb->literal_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
+                                      good_for_rle);
+  }
+  for (i = 0; i < mb->command_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
+                                      mb->command_histograms[i].data_,
+                                      good_for_rle);
+  }
+  for (i = 0; i < mb->distance_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
+                                      mb->distance_histograms[i].data_,
+                                      good_for_rle);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock.h
new file mode 100755
index 0000000000..334a79a443
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock.h
@@ -0,0 +1,105 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#ifndef BROTLI_ENC_METABLOCK_H_
+#define BROTLI_ENC_METABLOCK_H_
+
+#include "../common/context.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./block_splitter.h"
+#include "./command.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct MetaBlockSplit {
+  BlockSplit literal_split;
+  BlockSplit command_split;
+  BlockSplit distance_split;
+  uint32_t* literal_context_map;
+  size_t literal_context_map_size;
+  uint32_t* distance_context_map;
+  size_t distance_context_map_size;
+  HistogramLiteral* literal_histograms;
+  size_t literal_histograms_size;
+  HistogramCommand* command_histograms;
+  size_t command_histograms_size;
+  HistogramDistance* distance_histograms;
+  size_t distance_histograms_size;
+} MetaBlockSplit;
+
+static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
+  BrotliInitBlockSplit(&mb->literal_split);
+  BrotliInitBlockSplit(&mb->command_split);
+  BrotliInitBlockSplit(&mb->distance_split);
+  mb->literal_context_map = 0;
+  mb->literal_context_map_size = 0;
+  mb->distance_context_map = 0;
+  mb->distance_context_map_size = 0;
+  mb->literal_histograms = 0;
+  mb->literal_histograms_size = 0;
+  mb->command_histograms = 0;
+  mb->command_histograms_size = 0;
+  mb->distance_histograms = 0;
+  mb->distance_histograms_size = 0;
+}
+
+static BROTLI_INLINE void DestroyMetaBlockSplit(
+    MemoryManager* m, MetaBlockSplit* mb) {
+  BrotliDestroyBlockSplit(m, &mb->literal_split);
+  BrotliDestroyBlockSplit(m, &mb->command_split);
+  BrotliDestroyBlockSplit(m, &mb->distance_split);
+  BROTLI_FREE(m, mb->literal_context_map);
+  BROTLI_FREE(m, mb->distance_context_map);
+  BROTLI_FREE(m, mb->literal_histograms);
+  BROTLI_FREE(m, mb->command_histograms);
+  BROTLI_FREE(m, mb->distance_histograms);
+}
+
+/* Uses the slow shortest-path block splitter and does context clustering.
+   The distance parameters are dynamically selected based on the commands
+   which get recomputed under the new distance parameters. The new distance
+   parameters are stored into *params. */
+BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
+                                          const uint8_t* ringbuffer,
+                                          const size_t pos,
+                                          const size_t mask,
+                                          BrotliEncoderParams* params,
+                                          uint8_t prev_byte,
+                                          uint8_t prev_byte2,
+                                          Command* cmds,
+                                          size_t num_commands,
+                                          ContextType literal_context_mode,
+                                          MetaBlockSplit* mb);
+
+/* Uses a fast greedy block splitter that tries to merge current block with the
+   last or the second last block and uses a static context clustering which
+   is the same for all block types. */
+BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
+    MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
+    size_t num_contexts, const uint32_t* static_context_map,
+    const Command* commands, size_t n_commands, MetaBlockSplit* mb);
+
+BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
+                                              MetaBlockSplit* mb);
+
+BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliEncoderParams* params,
+    uint32_t npostfix, uint32_t ndirect);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_METABLOCK_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock_inc.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock_inc.h
new file mode 100755
index 0000000000..dcc9d3c4a6
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/metablock_inc.h
@@ -0,0 +1,183 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Greedy block splitter for one block category (literal, command or distance).
+*/
+typedef struct FN(BlockSplitter) {
+  /* Alphabet size of particular block category. */
+  size_t alphabet_size_;
+  /* We collect at least this many symbols for each block. */
+  size_t min_block_size_;
+  /* We merge histograms A and B if
+       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+     where A is the current histogram and B is the histogram of the last or the
+     second last block type. */
+  double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  /* not owned */
+  HistogramType* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */
+
+  /* The number of symbols that we want to collect before deciding on whether
+     or not to merge the block with a previous one or emit a new block. */
+  size_t target_block_size_;
+  /* The number of symbols in the current histogram. */
+  size_t block_size_;
+  /* Offset of the current histogram. */
+  size_t curr_histogram_ix_;
+  /* Offset of the histograms of the previous two block types. */
+  size_t last_histogram_ix_[2];
+  /* Entropy of the previous two block types. */
+  double last_entropy_[2];
+  /* The number of times we merged the current block with the last one. */
+  size_t merge_last_count_;
+} FN(BlockSplitter);
+
+static void FN(InitBlockSplitter)(
+    MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
+    size_t min_block_size, double split_threshold, size_t num_symbols,
+    BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  size_t max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
+  self->alphabet_size_ = alphabet_size;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  self->split_->num_blocks = max_num_blocks;
+  BROTLI_DCHECK(*histograms == 0);
+  *histograms_size = max_num_types;
+  *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m)) return;
+  /* Clear only current histogram. */
+  FN(HistogramClear)(&self->histograms_[0]);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
+}
+
+/* Does either of three things:
+     (1) emits the current block with a new block type;
+     (2) emits the current block with the type of the second last block;
+     (3) merges the current block with the last block. */
+static void FN(BlockSplitterFinishBlock)(
+    FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
+  BlockSplit* split = self->split_;
+  double* last_entropy = self->last_entropy_;
+  HistogramType* histograms = self->histograms_;
+  self->block_size_ =
+      BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
+  if (self->num_blocks_ == 0) {
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+    last_entropy[0] =
+        BitsEntropy(histograms[0].data_, self->alphabet_size_);
+    last_entropy[1] = last_entropy[0];
+    ++self->num_blocks_;
+    ++split->num_types;
+    ++self->curr_histogram_ix_;
+    if (self->curr_histogram_ix_ < *self->histograms_size_)
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
+    double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
+                                 self->alphabet_size_);
+    HistogramType combined_histo[2];
+    double combined_entropy[2];
+    double diff[2];
+    size_t j;
+    for (j = 0; j < 2; ++j) {
+      size_t last_histogram_ix = self->last_histogram_ix_[j];
+      combined_histo[j] = histograms[self->curr_histogram_ix_];
+      FN(HistogramAddHistogram)(&combined_histo[j],
+          &histograms[last_histogram_ix]);
+      combined_entropy[j] = BitsEntropy(
+          &combined_histo[j].data_[0], self->alphabet_size_);
+      diff[j] = combined_entropy[j] - entropy - last_entropy[j];
+    }
+
+    if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = (uint8_t)split->num_types;
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = entropy;
+      ++self->num_blocks_;
+      ++split->num_types;
+      ++self->curr_histogram_ix_;
+      if (self->curr_histogram_ix_ < *self->histograms_size_)
+        FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      histograms[self->last_histogram_ix_[0]] = combined_histo[1];
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = combined_entropy[1];
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      histograms[self->last_histogram_ix_[0]] = combined_histo[0];
+      last_entropy[0] = combined_entropy[0];
+      if (split->num_types == 1) {
+        last_entropy[1] = last_entropy[0];
+      }
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
+      }
+    }
+  }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types;
+    split->num_blocks = self->num_blocks_;
+  }
+}
+
+/* Adds the next symbol to the current histogram. When the current histogram
+   reaches the target size, decides on merging the block. */
+static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
+  FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
+  }
+}
+
+#undef HistogramType
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/params.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/params.h
new file mode 100755
index 0000000000..6ecf1d3f99
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/params.h
@@ -0,0 +1,44 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Parameters for the Brotli encoder with chosen quality levels. */
+
+#ifndef BROTLI_ENC_PARAMS_H_
+#define BROTLI_ENC_PARAMS_H_
+
+#include <brotli/encode.h>
+#include "./encoder_dict.h"
+
+typedef struct BrotliHasherParams {
+  int type;
+  int bucket_bits;
+  int block_bits;
+  int hash_len;
+  int num_last_distances_to_check;
+} BrotliHasherParams;
+
+typedef struct BrotliDistanceParams {
+  uint32_t distance_postfix_bits;
+  uint32_t num_direct_distance_codes;
+  uint32_t alphabet_size;
+  size_t max_distance;
+} BrotliDistanceParams;
+
+/* Encoding parameters */
+typedef struct BrotliEncoderParams {
+  BrotliEncoderMode mode;
+  int quality;
+  int lgwin;
+  int lgblock;
+  size_t size_hint;
+  BROTLI_BOOL disable_literal_context_modeling;
+  BROTLI_BOOL large_window;
+  BrotliHasherParams hasher;
+  BrotliDistanceParams dist;
+  BrotliEncoderDictionary dictionary;
+} BrotliEncoderParams;
+
+#endif  /* BROTLI_ENC_PARAMS_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/prefix.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/prefix.h
new file mode 100755
index 0000000000..fd359a478d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/prefix.h
@@ -0,0 +1,53 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for encoding of integers into prefix codes the amount of extra
+   bits, and the actual values of the extra bits. */
+
+#ifndef BROTLI_ENC_PREFIX_H_
+#define BROTLI_ENC_PREFIX_H_
+
+#include "../common/constants.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./fast_log.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Here distance_code is an intermediate code, i.e. one of the special codes or
+   the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
+static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
+                                                   size_t num_direct_codes,
+                                                   size_t postfix_bits,
+                                                   uint16_t* code,
+                                                   uint32_t* extra_bits) {
+  if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
+    *code = (uint16_t)distance_code;
+    *extra_bits = 0;
+    return;
+  } else {
+    size_t dist = ((size_t)1 << (postfix_bits + 2u)) +
+        (distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
+    size_t bucket = Log2FloorNonZero(dist) - 1;
+    size_t postfix_mask = (1u << postfix_bits) - 1;
+    size_t postfix = dist & postfix_mask;
+    size_t prefix = (dist >> bucket) & 1;
+    size_t offset = (2 + prefix) << bucket;
+    size_t nbits = bucket - postfix_bits;
+    *code = (uint16_t)((nbits << 10) |
+        (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
+         ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
+    *extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_PREFIX_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/quality.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/quality.h
new file mode 100755
index 0000000000..5f4d034503
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/quality.h
@@ -0,0 +1,165 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Constants and formulas that affect speed-ratio trade-offs and thus define
+   quality levels. */
+
+#ifndef BROTLI_ENC_QUALITY_H_
+#define BROTLI_ENC_QUALITY_H_
+
+#include "../common/platform.h"
+#include <brotli/encode.h>
+#include "./params.h"
+
+#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
+#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
+#define ZOPFLIFICATION_QUALITY 10
+#define HQ_ZOPFLIFICATION_QUALITY 11
+
+#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
+#define MIN_QUALITY_FOR_BLOCK_SPLIT 4
+#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
+#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
+#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
+#define MIN_QUALITY_FOR_CONTEXT_MODELING 5
+#define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
+#define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
+
+/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
+   so we buffer at most this much literals and commands. */
+#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
+
+/* Returns hash-table size for quality levels 0 and 1. */
+static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
+  return quality == FAST_ONE_PASS_COMPRESSION_QUALITY ? 1 << 15 : 1 << 17;
+}
+
+/* The maximum length for which the zopflification uses distinct distances. */
+#define MAX_ZOPFLI_LEN_QUALITY_10 150
+#define MAX_ZOPFLI_LEN_QUALITY_11 325
+
+/* Do not thoroughly search when a long copy is found. */
+#define BROTLI_LONG_COPY_QUICK_STEP 16384
+
+static BROTLI_INLINE size_t MaxZopfliLen(const BrotliEncoderParams* params) {
+  return params->quality <= 10 ?
+      MAX_ZOPFLI_LEN_QUALITY_10 :
+      MAX_ZOPFLI_LEN_QUALITY_11;
+}
+
+/* Number of best candidates to evaluate to expand Zopfli chain. */
+static BROTLI_INLINE size_t MaxZopfliCandidates(
+  const BrotliEncoderParams* params) {
+  return params->quality <= 10 ? 1 : 5;
+}
+
+static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
+  params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
+      BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
+  if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
+    params->large_window = BROTLI_FALSE;
+  }
+  if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+    params->lgwin = BROTLI_MIN_WINDOW_BITS;
+  } else {
+    int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
+                                           BROTLI_MAX_WINDOW_BITS;
+    if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
+  }
+}
+
+/* Returns optimized lg_block value. */
+static BROTLI_INLINE int ComputeLgBlock(const BrotliEncoderParams* params) {
+  int lgblock = params->lgblock;
+  if (params->quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+      params->quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    lgblock = params->lgwin;
+  } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
+    lgblock = 14;
+  } else if (lgblock == 0) {
+    lgblock = 16;
+    if (params->quality >= 9 && params->lgwin > lgblock) {
+      lgblock = BROTLI_MIN(int, 18, params->lgwin);
+    }
+  } else {
+    lgblock = BROTLI_MIN(int, BROTLI_MAX_INPUT_BLOCK_BITS,
+        BROTLI_MAX(int, BROTLI_MIN_INPUT_BLOCK_BITS, lgblock));
+  }
+  return lgblock;
+}
+
+/* Returns log2 of the size of main ring buffer area.
+   Allocate at least lgwin + 1 bits for the ring buffer so that the newly
+   added block fits there completely and we still get lgwin bits and at least
+   read_block_size_bits + 1 bits because the copy tail length needs to be
+   smaller than ring-buffer size. */
+static BROTLI_INLINE int ComputeRbBits(const BrotliEncoderParams* params) {
+  return 1 + BROTLI_MAX(int, params->lgwin, params->lgblock);
+}
+
+static BROTLI_INLINE size_t MaxMetablockSize(
+    const BrotliEncoderParams* params) {
+  int bits =
+      BROTLI_MIN(int, ComputeRbBits(params), BROTLI_MAX_INPUT_BLOCK_BITS);
+  return (size_t)1 << bits;
+}
+
+/* When searching for backward references and have not seen matches for a long
+   time, we can skip some match lookups. Unsuccessful match lookups are very
+   expensive and this kind of a heuristic speeds up compression quite a lot.
+   At first 8 byte strides are taken and every second byte is put to hasher.
+   After 4x more literals stride by 16 bytes, every put 4-th byte to hasher.
+   Applied only to qualities 2 to 9. */
+static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
+    const BrotliEncoderParams* params) {
+  return params->quality < 9 ? 64 : 512;
+}
+
+static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
+                                       BrotliHasherParams* hparams) {
+  if (params->quality > 9) {
+    hparams->type = 10;
+  } else if (params->quality == 4 && params->size_hint >= (1 << 20)) {
+    hparams->type = 54;
+  } else if (params->quality < 5) {
+    hparams->type = params->quality;
+  } else if (params->lgwin <= 16) {
+    hparams->type = params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
+  } else if (params->size_hint >= (1 << 20) && params->lgwin >= 19) {
+    hparams->type = 6;
+    hparams->block_bits = params->quality - 1;
+    hparams->bucket_bits = 15;
+    hparams->hash_len = 5;
+    hparams->num_last_distances_to_check =
+        params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
+  } else {
+    hparams->type = 5;
+    hparams->block_bits = params->quality - 1;
+    hparams->bucket_bits = params->quality < 7 ? 14 : 15;
+    hparams->num_last_distances_to_check =
+        params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
+  }
+
+  if (params->lgwin > 24) {
+    /* Different hashers for large window brotli: not for qualities <= 2,
+       these are too fast for large window. Not for qualities >= 10: their
+       hasher already works well with large window. So the changes are:
+       H3 --> H35: for quality 3.
+       H54 --> H55: for quality 4 with size hint > 1MB
+       H6 --> H65: for qualities 5, 6, 7, 8, 9. */
+    if (hparams->type == 3) {
+      hparams->type = 35;
+    }
+    if (hparams->type == 54) {
+      hparams->type = 55;
+    }
+    if (hparams->type == 6) {
+      hparams->type = 65;
+    }
+  }
+}
+
+#endif  /* BROTLI_ENC_QUALITY_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/ringbuffer.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/ringbuffer.h
new file mode 100755
index 0000000000..86079a89d3
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/ringbuffer.h
@@ -0,0 +1,164 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Sliding window over the input data. */
+
+#ifndef BROTLI_ENC_RINGBUFFER_H_
+#define BROTLI_ENC_RINGBUFFER_H_
+
+#include <string.h>  /* memcpy */
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./memory.h"
+#include "./quality.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
+   data in a circular manner: writing a byte writes it to:
+     `position() % (1 << window_bits)'.
+   For convenience, the RingBuffer array contains another copy of the
+   first `1 << tail_bits' bytes:
+     buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
+   and another copy of the last two bytes:
+     buffer_[-1] == buffer_[(1 << window_bits) - 1] and
+     buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
+typedef struct RingBuffer {
+  /* Size of the ring-buffer is (1 << window_bits) + tail_size_. */
+  const uint32_t size_;
+  const uint32_t mask_;
+  const uint32_t tail_size_;
+  const uint32_t total_size_;
+
+  uint32_t cur_size_;
+  /* Position to write in the ring buffer. */
+  uint32_t pos_;
+  /* The actual ring buffer containing the copy of the last two bytes, the data,
+     and the copy of the beginning as a tail. */
+  uint8_t* data_;
+  /* The start of the ring-buffer. */
+  uint8_t* buffer_;
+} RingBuffer;
+
+static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
+  rb->cur_size_ = 0;
+  rb->pos_ = 0;
+  rb->data_ = 0;
+  rb->buffer_ = 0;
+}
+
+static BROTLI_INLINE void RingBufferSetup(
+    const BrotliEncoderParams* params, RingBuffer* rb) {
+  int window_bits = ComputeRbBits(params);
+  int tail_bits = params->lgblock;
+  *(uint32_t*)&rb->size_ = 1u << window_bits;
+  *(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
+  *(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
+  *(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
+}
+
+static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
+  BROTLI_FREE(m, rb->data_);
+}
+
+/* Allocates or re-allocates data_ to the given length + plus some slack
+   region before and after. Fills the slack regions with zeros. */
+static BROTLI_INLINE void RingBufferInitBuffer(
+    MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
+  static const size_t kSlackForEightByteHashingEverywhere = 7;
+  uint8_t* new_data = BROTLI_ALLOC(
+      m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
+  size_t i;
+  if (BROTLI_IS_OOM(m)) return;
+  if (rb->data_) {
+    memcpy(new_data, rb->data_,
+        2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
+    BROTLI_FREE(m, rb->data_);
+  }
+  rb->data_ = new_data;
+  rb->cur_size_ = buflen;
+  rb->buffer_ = rb->data_ + 2;
+  rb->buffer_[-2] = rb->buffer_[-1] = 0;
+  for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
+    rb->buffer_[rb->cur_size_ + i] = 0;
+  }
+}
+
+static BROTLI_INLINE void RingBufferWriteTail(
+    const uint8_t* bytes, size_t n, RingBuffer* rb) {
+  const size_t masked_pos = rb->pos_ & rb->mask_;
+  if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
+    /* Just fill the tail buffer with the beginning data. */
+    const size_t p = rb->size_ + masked_pos;
+    memcpy(&rb->buffer_[p], bytes,
+        BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
+  }
+}
+
+/* Push bytes into the ring buffer. */
+static BROTLI_INLINE void RingBufferWrite(
+    MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
+  if (rb->pos_ == 0 && n < rb->tail_size_) {
+    /* Special case for the first write: to process the first block, we don't
+       need to allocate the whole ring-buffer and we don't need the tail
+       either. However, we do this memory usage optimization only if the
+       first write is less than the tail size, which is also the input block
+       size, otherwise it is likely that other blocks will follow and we
+       will need to reallocate to the full size anyway. */
+    rb->pos_ = (uint32_t)n;
+    RingBufferInitBuffer(m, rb->pos_, rb);
+    if (BROTLI_IS_OOM(m)) return;
+    memcpy(rb->buffer_, bytes, n);
+    return;
+  }
+  if (rb->cur_size_ < rb->total_size_) {
+    /* Lazily allocate the full buffer. */
+    RingBufferInitBuffer(m, rb->total_size_, rb);
+    if (BROTLI_IS_OOM(m)) return;
+    /* Initialize the last two bytes to zero, so that we don't have to worry
+       later when we copy the last two bytes to the first two positions. */
+    rb->buffer_[rb->size_ - 2] = 0;
+    rb->buffer_[rb->size_ - 1] = 0;
+  }
+  {
+    const size_t masked_pos = rb->pos_ & rb->mask_;
+    /* The length of the writes is limited so that we do not need to worry
+       about a write */
+    RingBufferWriteTail(bytes, n, rb);
+    if (BROTLI_PREDICT_TRUE(masked_pos + n <= rb->size_)) {
+      /* A single write fits. */
+      memcpy(&rb->buffer_[masked_pos], bytes, n);
+    } else {
+      /* Split into two writes.
+         Copy into the end of the buffer, including the tail buffer. */
+      memcpy(&rb->buffer_[masked_pos], bytes,
+             BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
+      /* Copy into the beginning of the buffer */
+      memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
+             n - (rb->size_ - masked_pos));
+    }
+  }
+  {
+    BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
+    uint32_t rb_pos_mask = (1u << 31) - 1;
+    rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
+    rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
+    rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
+    if (not_first_lap) {
+      /* Wrap, but preserve not-a-first-lap feature. */
+      rb->pos_ |= 1u << 31;
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_RINGBUFFER_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict.c
new file mode 100755
index 0000000000..7299ab7203
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict.c
@@ -0,0 +1,486 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./static_dict.h"
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/transform.h"
+#include "./encoder_dict.h"
+#include "./find_match_length.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - kDictNumBits);
+}
+
+static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
+                                   uint32_t* matches) {
+  uint32_t match = (uint32_t)((distance << 5) + len_code);
+  matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
+}
+
+static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
+                                            const uint8_t* data,
+                                            size_t id,
+                                            size_t len,
+                                            size_t maxlen) {
+  const size_t offset = dictionary->offsets_by_length[len] + len * id;
+  return FindMatchLengthWithLimit(&dictionary->data[offset], data,
+                                  BROTLI_MIN(size_t, len, maxlen));
+}
+
+static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
+    DictWord w, const uint8_t* data, size_t max_length) {
+  if (w.len > max_length) {
+    return BROTLI_FALSE;
+  } else {
+    const size_t offset = dictionary->offsets_by_length[w.len] +
+        (size_t)w.len * (size_t)w.idx;
+    const uint8_t* dict = &dictionary->data[offset];
+    if (w.transform == 0) {
+      /* Match against base dictionary word. */
+      return
+          TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
+    } else if (w.transform == 10) {
+      /* Match against uppercase first transform.
+         Note that there are only ASCII uppercase words in the lookup table. */
+      return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
+              (dict[0] ^ 32) == data[0] &&
+              FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
+              w.len - 1u);
+    } else {
+      /* Match against uppercase all transform.
+         Note that there are only ASCII uppercase words in the lookup table. */
+      size_t i;
+      for (i = 0; i < w.len; ++i) {
+        if (dict[i] >= 'a' && dict[i] <= 'z') {
+          if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
+        } else {
+          if (dict[i] != data[i]) return BROTLI_FALSE;
+        }
+      }
+      return BROTLI_TRUE;
+    }
+  }
+}
+
+BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
+    const BrotliEncoderDictionary* dictionary, const uint8_t* data,
+    size_t min_length, size_t max_length, uint32_t* matches) {
+  BROTLI_BOOL has_found_match = BROTLI_FALSE;
+  {
+    size_t offset = dictionary->buckets[Hash(data)];
+    BROTLI_BOOL end = !offset;
+    while (!end) {
+      DictWord w = dictionary->dict_words[offset++];
+      const size_t l = w.len & 0x1F;
+      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+      const size_t id = w.idx;
+      end = !!(w.len & 0x80);
+      w.len = (uint8_t)l;
+      if (w.transform == 0) {
+        const size_t matchlen =
+            DictMatchLength(dictionary->words, data, id, l, max_length);
+        const uint8_t* s;
+        size_t minlen;
+        size_t maxlen;
+        size_t len;
+        /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
+        if (matchlen == l) {
+          AddMatch(id, l, l, matches);
+          has_found_match = BROTLI_TRUE;
+        }
+        /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
+                      "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
+        if (matchlen >= l - 1) {
+          AddMatch(id + 12 * n, l - 1, l, matches);
+          if (l + 2 < max_length &&
+              data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
+              data[l + 2] == ' ') {
+            AddMatch(id + 49 * n, l + 3, l, matches);
+          }
+          has_found_match = BROTLI_TRUE;
+        }
+        /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
+        minlen = min_length;
+        if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
+        maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
+        for (len = minlen; len <= maxlen; ++len) {
+          size_t cut = l - len;
+          size_t transform_id = (cut << 2) +
+              (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
+          AddMatch(id + transform_id * n, len, l, matches);
+          has_found_match = BROTLI_TRUE;
+        }
+        if (matchlen < l || l + 6 >= max_length) {
+          continue;
+        }
+        s = &data[l];
+        /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
+        if (s[0] == ' ') {
+          AddMatch(id + n, l + 1, l, matches);
+          if (s[1] == 'a') {
+            if (s[2] == ' ') {
+              AddMatch(id + 28 * n, l + 3, l, matches);
+            } else if (s[2] == 's') {
+              if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
+            } else if (s[2] == 't') {
+              if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
+            } else if (s[2] == 'n') {
+              if (s[3] == 'd' && s[4] == ' ') {
+                AddMatch(id + 10 * n, l + 5, l, matches);
+              }
+            }
+          } else if (s[1] == 'b') {
+            if (s[2] == 'y' && s[3] == ' ') {
+              AddMatch(id + 38 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'i') {
+            if (s[2] == 'n') {
+              if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
+            } else if (s[2] == 's') {
+              if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'f') {
+            if (s[2] == 'o') {
+              if (s[3] == 'r' && s[4] == ' ') {
+                AddMatch(id + 25 * n, l + 5, l, matches);
+              }
+            } else if (s[2] == 'r') {
+              if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
+                AddMatch(id + 37 * n, l + 6, l, matches);
+              }
+            }
+          } else if (s[1] == 'o') {
+            if (s[2] == 'f') {
+              if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
+            } else if (s[2] == 'n') {
+              if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'n') {
+            if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
+              AddMatch(id + 80 * n, l + 5, l, matches);
+            }
+          } else if (s[1] == 't') {
+            if (s[2] == 'h') {
+              if (s[3] == 'e') {
+                if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
+              } else if (s[3] == 'a') {
+                if (s[4] == 't' && s[5] == ' ') {
+                  AddMatch(id + 29 * n, l + 6, l, matches);
+                }
+              }
+            } else if (s[2] == 'o') {
+              if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'w') {
+            if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
+              AddMatch(id + 35 * n, l + 6, l, matches);
+            }
+          }
+        } else if (s[0] == '"') {
+          AddMatch(id + 19 * n, l + 1, l, matches);
+          if (s[1] == '>') {
+            AddMatch(id + 21 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + 20 * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + 31 * n, l + 2, l, matches);
+            if (s[2] == 'T' && s[3] == 'h') {
+              if (s[4] == 'e') {
+                if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
+              } else if (s[4] == 'i') {
+                if (s[5] == 's' && s[6] == ' ') {
+                  AddMatch(id + 75 * n, l + 7, l, matches);
+                }
+              }
+            }
+          }
+        } else if (s[0] == ',') {
+          AddMatch(id + 76 * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + 14 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '\n') {
+          AddMatch(id + 22 * n, l + 1, l, matches);
+          if (s[1] == '\t') {
+            AddMatch(id + 50 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == ']') {
+          AddMatch(id + 24 * n, l + 1, l, matches);
+        } else if (s[0] == '\'') {
+          AddMatch(id + 36 * n, l + 1, l, matches);
+        } else if (s[0] == ':') {
+          AddMatch(id + 51 * n, l + 1, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + 57 * n, l + 1, l, matches);
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + 70 * n, l + 2, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + 86 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == 'a') {
+          if (s[1] == 'l' && s[2] == ' ') {
+            AddMatch(id + 84 * n, l + 3, l, matches);
+          }
+        } else if (s[0] == 'e') {
+          if (s[1] == 'd') {
+            if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
+          } else if (s[1] == 'r') {
+            if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
+          } else if (s[1] == 's') {
+            if (s[2] == 't' && s[3] == ' ') {
+              AddMatch(id + 95 * n, l + 4, l, matches);
+            }
+          }
+        } else if (s[0] == 'f') {
+          if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
+            AddMatch(id + 90 * n, l + 4, l, matches);
+          }
+        } else if (s[0] == 'i') {
+          if (s[1] == 'v') {
+            if (s[2] == 'e' && s[3] == ' ') {
+              AddMatch(id + 92 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'z') {
+            if (s[2] == 'e' && s[3] == ' ') {
+              AddMatch(id + 100 * n, l + 4, l, matches);
+            }
+          }
+        } else if (s[0] == 'l') {
+          if (s[1] == 'e') {
+            if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
+              AddMatch(id + 93 * n, l + 5, l, matches);
+            }
+          } else if (s[1] == 'y') {
+            if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
+          }
+        } else if (s[0] == 'o') {
+          if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
+            AddMatch(id + 106 * n, l + 4, l, matches);
+          }
+        }
+      } else {
+        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+           transform. */
+        const BROTLI_BOOL is_all_caps =
+            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
+        const uint8_t* s;
+        if (!IsMatch(dictionary->words, w, data, max_length)) {
+          continue;
+        }
+        /* Transform "" + kUppercase{First,All} + "" */
+        AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
+        has_found_match = BROTLI_TRUE;
+        if (l + 1 >= max_length) {
+          continue;
+        }
+        /* Transforms "" + kUppercase{First,All} + <suffix> */
+        s = &data[l];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
+        } else if (s[0] == '"') {
+          AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
+          if (s[1] == '>') {
+            AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == ',') {
+          AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '\'') {
+          AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
+          }
+        }
+      }
+    }
+  }
+  /* Transforms with prefixes " " and "." */
+  if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
+    BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
+    size_t offset = dictionary->buckets[Hash(&data[1])];
+    BROTLI_BOOL end = !offset;
+    while (!end) {
+      DictWord w = dictionary->dict_words[offset++];
+      const size_t l = w.len & 0x1F;
+      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+      const size_t id = w.idx;
+      end = !!(w.len & 0x80);
+      w.len = (uint8_t)l;
+      if (w.transform == 0) {
+        const uint8_t* s;
+        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
+          continue;
+        }
+        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
+                      "." + BROTLI_TRANSFORM_IDENTITY + "" */
+        AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
+        has_found_match = BROTLI_TRUE;
+        if (l + 2 >= max_length) {
+          continue;
+        }
+        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
+                      "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
+        */
+        s = &data[l + 1];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
+        } else if (is_space) {
+          if (s[0] == ',') {
+            AddMatch(id + 103 * n, l + 2, l, matches);
+            if (s[1] == ' ') {
+              AddMatch(id + 33 * n, l + 3, l, matches);
+            }
+          } else if (s[0] == '.') {
+            AddMatch(id + 71 * n, l + 2, l, matches);
+            if (s[1] == ' ') {
+              AddMatch(id + 52 * n, l + 3, l, matches);
+            }
+          } else if (s[0] == '=') {
+            if (s[1] == '"') {
+              AddMatch(id + 81 * n, l + 3, l, matches);
+            } else if (s[1] == '\'') {
+              AddMatch(id + 98 * n, l + 3, l, matches);
+            }
+          }
+        }
+      } else if (is_space) {
+        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+           transform. */
+        const BROTLI_BOOL is_all_caps =
+            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
+        const uint8_t* s;
+        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
+          continue;
+        }
+        /* Transforms " " + kUppercase{First,All} + "" */
+        AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
+        has_found_match = BROTLI_TRUE;
+        if (l + 2 >= max_length) {
+          continue;
+        }
+        /* Transforms " " + kUppercase{First,All} + <suffix> */
+        s = &data[l + 1];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
+        } else if (s[0] == ',') {
+          if (!is_all_caps) {
+            AddMatch(id + 109 * n, l + 2, l, matches);
+          }
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
+          }
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
+          }
+        }
+      }
+    }
+  }
+  if (max_length >= 6) {
+    /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
+    if ((data[1] == ' ' &&
+         (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
+        (data[0] == 0xC2 && data[1] == 0xA0)) {
+      size_t offset = dictionary->buckets[Hash(&data[2])];
+      BROTLI_BOOL end = !offset;
+      while (!end) {
+        DictWord w = dictionary->dict_words[offset++];
+        const size_t l = w.len & 0x1F;
+        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+        const size_t id = w.idx;
+        end = !!(w.len & 0x80);
+        w.len = (uint8_t)l;
+        if (w.transform == 0 &&
+            IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
+          if (data[0] == 0xC2) {
+            AddMatch(id + 102 * n, l + 2, l, matches);
+            has_found_match = BROTLI_TRUE;
+          } else if (l + 2 < max_length && data[l + 2] == ' ') {
+            size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
+            AddMatch(id + t * n, l + 3, l, matches);
+            has_found_match = BROTLI_TRUE;
+          }
+        }
+      }
+    }
+  }
+  if (max_length >= 9) {
+    /* Transforms with prefixes " the " and ".com/" */
+    if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
+         data[3] == 'e' && data[4] == ' ') ||
+        (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
+         data[3] == 'm' && data[4] == '/')) {
+      size_t offset = dictionary->buckets[Hash(&data[5])];
+      BROTLI_BOOL end = !offset;
+      while (!end) {
+        DictWord w = dictionary->dict_words[offset++];
+        const size_t l = w.len & 0x1F;
+        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
+        const size_t id = w.idx;
+        end = !!(w.len & 0x80);
+        w.len = (uint8_t)l;
+        if (w.transform == 0 &&
+            IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
+          AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
+          has_found_match = BROTLI_TRUE;
+          if (l + 5 < max_length) {
+            const uint8_t* s = &data[l + 5];
+            if (data[0] == ' ') {
+              if (l + 8 < max_length &&
+                  s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
+                AddMatch(id + 62 * n, l + 9, l, matches);
+                if (l + 12 < max_length &&
+                    s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
+                  AddMatch(id + 73 * n, l + 13, l, matches);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return has_found_match;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict.h
new file mode 100755
index 0000000000..6b5d4eb0c9
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict.h
@@ -0,0 +1,40 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Class to model the static dictionary. */
+
+#ifndef BROTLI_ENC_STATIC_DICT_H_
+#define BROTLI_ENC_STATIC_DICT_H_
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./encoder_dict.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
+static const uint32_t kInvalidMatch = 0xFFFFFFF;
+
+/* Matches data against static dictionary words, and for each length l,
+   for which a match is found, updates matches[l] to be the minimum possible
+     (distance << 5) + len_code.
+   Returns 1 if matches have been found, otherwise 0.
+   Prerequisites:
+     matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
+     all elements are initialized to kInvalidMatch */
+BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
+    const BrotliEncoderDictionary* dictionary,
+    const uint8_t* data, size_t min_length, size_t max_length,
+    uint32_t* matches);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_STATIC_DICT_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict_lut.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict_lut.h
new file mode 100755
index 0000000000..e299cda6d8
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/static_dict_lut.h
@@ -0,0 +1,5864 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Lookup table for static dictionary and transforms. */
+
+#ifndef BROTLI_ENC_STATIC_DICT_LUT_H_
+#define BROTLI_ENC_STATIC_DICT_LUT_H_
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct DictWord {
+  /* Highest bit is used to indicate end of bucket. */
+  uint8_t len;
+  uint8_t transform;
+  uint16_t idx;
+} DictWord;
+
+static const int kDictNumBits = 15;
+static const uint32_t kDictHashMul32 = 0x1E35A7BD;
+
+static const uint16_t kStaticDictionaryBuckets[32768] = {
+1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29,
+0,53,0,0,0,0,0,0,55,0,0,0,0,0,0,61,76,0,0,0,94,0,0,0,0,0,0,96,0,97,0,98,0,0,0,0,
+0,0,0,99,101,106,108,0,0,0,0,0,110,0,111,112,0,113,118,124,0,0,0,0,0,125,128,0,0
+,0,0,129,0,0,131,0,0,0,0,0,0,132,0,0,135,0,0,0,137,0,0,0,0,0,138,139,0,0,0,0,0,0
+,0,142,143,144,0,0,0,0,0,145,0,0,0,146,149,151,152,0,0,153,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,154,0,0,0,0,0,0,155,0,0,0,0,160,182,0,0,0,0,0,0,183,0,0,0,188,189,0,0,
+192,0,0,0,0,0,0,194,0,0,0,0,0,0,0,0,197,202,209,0,0,210,0,224,0,0,0,225,0,0,0,0,
+0,0,0,0,0,0,231,0,0,0,232,0,240,0,0,242,0,0,0,0,0,0,0,0,0,0,0,244,0,0,0,246,0,0,
+249,251,253,0,0,0,0,0,258,0,0,261,263,0,0,0,267,0,0,268,0,269,0,0,0,0,0,0,0,0,0,
+271,0,0,0,0,0,0,272,0,273,0,277,0,278,286,0,0,0,0,287,0,289,290,291,0,0,0,295,0,
+0,296,297,0,0,0,0,0,0,0,0,0,0,298,0,0,0,299,0,0,305,0,324,0,0,0,0,0,327,0,328,
+329,0,0,0,0,336,0,0,340,0,341,342,343,0,0,346,0,348,0,0,0,0,0,0,349,351,0,0,355,
+0,363,0,364,0,368,369,0,370,0,0,0,0,0,0,0,372,0,0,0,0,0,0,0,0,0,0,0,373,0,375,0,
+0,0,0,376,377,0,0,394,395,396,0,0,398,0,0,0,0,400,0,0,408,0,0,0,0,420,0,0,0,0,0,
+0,421,0,0,422,423,0,0,429,435,436,442,0,0,443,0,444,445,453,456,0,457,0,0,0,0,0,
+458,0,0,0,459,0,0,0,460,0,462,463,465,0,0,0,0,0,0,466,469,0,0,0,0,0,0,470,0,0,0,
+474,0,476,0,0,0,0,483,0,485,0,0,0,486,0,0,488,491,492,0,0,497,499,500,0,501,0,0,
+0,505,0,0,506,0,0,0,507,0,0,0,509,0,0,0,0,511,512,519,0,0,0,0,0,0,529,530,0,0,0,
+534,0,0,0,0,543,0,0,0,0,0,0,0,0,0,553,0,0,0,0,557,560,0,0,0,0,0,0,561,0,564,0,0,
+0,0,0,0,565,566,0,575,0,619,0,620,0,0,623,624,0,0,0,625,0,0,626,627,0,0,628,0,0,
+0,0,630,0,631,0,0,0,0,0,0,0,0,0,641,0,0,0,0,643,656,668,0,0,0,673,0,0,0,674,0,0,
+0,0,0,0,0,0,682,0,687,0,690,0,693,699,700,0,0,0,0,0,0,704,705,0,0,0,0,707,710,0,
+711,0,0,0,0,726,0,0,729,0,0,0,730,731,0,0,0,0,0,752,0,0,0,762,0,763,0,0,767,0,0,
+0,770,774,0,0,775,0,0,0,0,0,0,0,0,0,0,776,0,0,0,777,783,0,0,0,785,788,0,0,0,0,
+790,0,0,0,793,0,0,0,0,794,0,0,804,819,821,0,827,0,0,0,834,0,0,835,0,0,0,841,0,
+844,0,850,851,859,0,860,0,0,0,0,0,0,0,874,0,876,0,877,890,0,0,0,0,0,0,0,0,893,
+894,898,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,899,0,0,0,900,904,906,0,0,0,907,0,908,909,
+0,910,0,0,0,0,911,0,0,0,0,0,916,0,0,0,922,925,0,930,0,934,0,0,0,0,0,943,0,0,944,
+0,953,954,0,0,0,0,0,0,955,0,962,963,0,0,976,0,0,977,978,979,980,0,981,0,0,0,0,
+984,0,0,985,0,0,987,989,991,0,0,0,0,0,0,0,0,0,992,0,0,0,993,0,0,0,0,0,0,996,0,0,
+0,1000,0,0,0,0,0,1002,0,0,0,0,1005,1007,0,0,0,1009,0,0,0,1010,0,0,0,0,0,0,1011,0
+,1012,0,0,0,0,1014,1016,0,0,0,1020,0,1021,0,0,0,0,1022,0,0,0,1024,0,0,0,0,0,0,
+1025,0,0,1026,1027,0,0,0,0,0,1031,0,1033,0,0,0,0,1034,0,0,0,1037,1040,0,0,0,1042
+,1043,0,0,1053,0,1054,0,0,1057,0,0,0,1058,0,0,1060,0,0,0,0,0,0,0,1061,0,0,1062,0
+,0,0,0,1063,0,0,0,0,1064,0,0,0,0,0,1065,0,0,0,0,1066,1067,0,0,0,1069,1070,1072,0
+,0,0,0,0,0,1073,0,1075,0,0,0,0,0,0,1080,1084,0,0,0,0,1088,0,0,0,0,0,0,1094,0,
+1095,0,1107,0,0,0,1112,1114,0,1119,0,1122,0,0,1126,0,1129,0,1130,0,0,0,0,0,1132,
+0,0,0,0,0,0,1144,0,0,1145,1146,0,1148,1149,0,0,1150,1151,0,0,0,0,1152,0,1153,0,0
+,0,0,0,1154,0,1163,0,0,0,1164,0,0,0,0,0,1165,0,1167,0,1170,0,0,0,0,0,1171,1172,0
+,0,0,0,0,0,0,0,1173,1175,1177,0,1186,0,0,0,0,0,0,0,0,0,0,1195,0,0,1221,0,0,1224,
+0,0,1227,0,0,0,0,0,1228,1229,0,0,1230,0,0,0,0,0,0,0,0,0,1231,0,0,0,1233,0,0,1243
+,1244,1246,1248,0,0,0,0,1254,1255,1258,1259,0,0,0,1260,0,0,1261,0,0,0,1262,1264,
+0,0,1265,0,0,0,0,0,0,0,0,0,0,0,0,1266,0,1267,0,0,0,0,1273,1274,1276,1289,0,0,
+1291,1292,1293,0,0,1294,1295,1296,0,0,0,0,1302,0,1304,0,0,0,0,0,0,0,0,0,1311,
+1312,0,1314,0,1316,1320,1321,0,0,0,0,0,0,0,1322,1323,1324,0,1335,0,1336,0,0,0,0,
+1341,1342,0,1346,0,1357,0,0,0,1358,1360,0,0,0,0,0,0,1361,0,0,0,1362,1365,0,1366,
+0,0,0,0,0,0,0,1379,0,0,0,0,0,0,0,0,0,0,0,0,1386,0,1388,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,1395,0,0,0,0,1403,0,1405,0,0,1407,0,0,0,0,0,1408,1409,0,1410,0,0,0,1412,1413,
+1416,0,0,1429,1451,0,0,1454,0,0,0,0,0,0,0,1455,0,0,0,0,0,0,0,1456,0,0,0,0,1459,
+1460,1461,1475,0,0,0,0,0,0,1477,0,1480,0,1481,0,0,1486,0,0,1495,0,0,0,1496,0,0,
+1498,1499,1501,1520,1521,0,0,0,1526,0,0,0,0,1528,1529,0,1533,1536,0,0,0,1537,
+1538,1549,0,1550,1558,1559,1572,0,1573,0,0,0,0,0,0,0,0,0,1575,0,0,0,0,0,1579,0,
+1599,0,1603,0,1604,0,1605,0,0,0,0,0,1608,1610,0,0,0,0,1611,0,1615,0,1616,1618,0,
+1619,0,0,1622,0,0,0,0,1634,0,0,0,1635,0,0,0,1641,0,0,0,0,0,0,0,0,0,1643,0,0,0,
+1650,0,0,1652,0,0,0,0,0,1653,0,0,0,1654,0,0,0,0,1655,0,1662,0,0,1663,1664,0,0,
+1668,0,0,1669,1670,0,1672,1673,0,0,0,0,0,1674,0,0,0,1675,1676,1680,0,1682,0,0,
+1687,0,0,0,0,0,1704,0,0,1705,0,0,1721,0,0,0,0,1734,1735,0,0,0,0,1737,0,0,0,0,
+1739,0,0,1740,0,0,0,0,0,0,0,0,0,0,1741,1743,0,0,0,0,1745,0,0,0,1749,0,0,0,1751,0
+,0,0,0,0,0,1760,0,0,0,0,1765,0,0,0,0,0,1784,0,1785,1787,0,0,0,0,1788,1789,0,0,0,
+0,1790,1791,1793,0,1798,1799,0,0,0,0,1801,0,1803,1805,0,0,0,1806,1811,0,1812,
+1814,0,1821,0,0,0,0,0,1822,1833,0,0,0,0,0,0,1848,0,0,0,0,0,0,1857,0,0,0,1859,0,0
+,0,0,1861,0,0,0,0,0,0,0,1866,0,1921,1925,0,0,0,1929,1930,0,0,0,0,0,0,0,0,0,1931,
+0,0,0,0,1932,0,0,0,1934,0,0,0,0,0,0,0,0,1946,0,0,1948,0,0,0,0,1950,0,1957,0,1958
+,0,0,0,0,0,1965,1967,0,0,0,0,1968,0,1969,0,1971,1972,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,1973,0,0,0,0,1975,0,0,0,0,1976,1979,0,1982,0,0,0,0,1984,1988,0,0,0,0,1990,
+2004,2008,0,0,0,2012,2013,0,0,0,0,0,0,0,0,0,0,2015,0,2016,2017,0,0,0,0,2021,0,0,
+2025,0,0,0,0,0,2029,2036,2040,0,2042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2043,0,0,0,0,0,
+2045,0,0,0,0,0,0,0,2046,2047,0,2048,2049,0,2059,0,0,2063,0,2064,2065,0,0,2066,0,
+0,0,0,0,0,2069,0,0,0,0,2070,0,2071,0,2072,0,0,0,0,2080,2082,2083,0,0,0,0,0,2085,
+0,2086,2088,2089,2105,0,0,0,0,2107,0,0,2116,2117,0,2120,0,0,2122,0,0,0,0,0,2123,
+0,0,2125,2127,2128,0,0,0,2130,0,0,0,2137,2139,2140,2141,0,0,0,0,0,0,0,0,0,2144,
+2145,0,0,2146,2149,0,0,0,0,2150,0,0,2151,2158,0,2159,0,2160,0,0,0,0,0,0,2161,
+2162,0,0,2194,2202,0,0,0,0,0,0,2205,2217,0,2220,0,2221,0,2222,2224,0,0,0,0,2237,
+0,0,0,0,0,2238,0,2239,2241,0,0,2242,0,0,0,0,0,2243,0,0,0,0,0,0,2252,0,0,2253,0,0
+,0,2257,2258,0,0,0,2260,0,0,0,0,0,0,0,2262,0,2264,0,0,0,0,0,2269,2270,0,0,0,0,0,
+0,0,0,0,2271,0,2273,0,0,0,0,2277,0,0,0,0,2278,0,0,0,0,2279,0,2280,0,2283,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2287,0,0,0,0,0,0,0,2289,2290,0,0,0,0,2291,0,2292,0,
+0,0,2293,2295,2296,0,0,0,0,0,0,0,2298,0,0,0,0,0,2303,0,2305,0,0,2306,0,2307,0,0,
+0,0,0,0,0,0,0,0,0,0,2313,2314,2315,2316,0,0,2318,0,2319,0,2322,0,0,2323,0,2324,0
+,2326,0,0,0,0,0,0,0,2335,0,2336,2338,2339,0,2340,0,0,0,2355,0,2375,0,2382,2386,0
+,2387,0,0,2394,0,0,0,0,2395,0,2397,0,0,0,0,0,2398,0,0,0,0,0,0,0,2399,2402,2404,
+2408,2411,0,0,0,2413,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2415,0,0,2416,2417,2419,0,2420,
+0,0,0,0,0,2425,0,0,0,2426,0,0,0,0,0,0,0,0,0,0,0,0,2427,2428,0,2429,0,0,2430,2434
+,0,2436,0,0,0,0,0,0,2441,2442,0,2445,0,0,2446,2457,0,2459,0,0,2462,0,2464,0,2477
+,0,2478,2486,0,0,0,2491,0,0,2493,0,0,2494,0,2495,0,2513,2523,0,0,0,0,2524,0,0,0,
+0,0,0,2528,2529,2530,0,0,2531,0,2533,0,0,2534,2535,0,2536,2537,0,2538,0,2539,
+2540,0,0,0,2545,2546,0,0,0,0,0,0,0,2548,0,0,2549,0,2550,2555,0,0,0,0,0,2557,0,
+2560,0,0,0,0,0,0,0,0,0,0,0,2561,0,2576,0,0,0,0,0,0,0,0,0,2577,2578,0,0,0,2579,0,
+0,0,0,0,0,0,2580,0,0,0,0,2581,0,0,0,0,2583,0,2584,0,2588,2590,0,0,0,2591,0,0,0,0
+,2593,2594,0,2595,0,2601,2602,0,0,2603,0,2605,0,0,0,2606,2607,2611,0,2615,0,0,0,
+2617,0,0,0,0,0,0,0,0,0,0,0,0,0,2619,0,0,2620,0,0,0,2621,0,2623,0,2625,0,0,2628,
+2629,0,0,2635,2636,2637,0,0,2639,0,0,0,2642,0,0,0,0,2643,0,2644,0,2649,0,0,0,0,0
+,0,2655,2656,0,0,2657,0,0,0,0,0,2658,0,0,0,0,0,2659,0,0,0,0,2664,2685,0,2687,0,
+2688,0,0,2689,0,0,2694,0,2695,0,0,2698,0,2701,2706,0,0,0,2707,0,2709,2710,2711,0
+,0,0,2720,2730,2735,0,0,0,0,2738,2740,0,0,0,0,2747,0,0,0,0,0,0,2748,0,0,2749,0,0
+,0,0,0,2750,0,0,2752,2754,0,0,0,0,0,2758,0,0,0,0,2762,0,0,0,0,2763,0,0,0,0,0,0,0
+,2764,2767,0,0,0,0,2768,0,0,2770,0,0,0,0,0,0,0,2771,0,0,0,0,0,0,0,0,0,2772,0,0,0
+,0,0,2773,2776,0,0,2783,0,0,2784,0,2789,0,2790,0,0,0,2792,0,0,0,0,0,0,0,0,0,0,
+2793,2795,0,0,0,0,0,0,2796,0,0,0,0,0,0,2797,2799,0,0,0,0,2803,0,0,0,0,2806,0,
+2807,2808,2817,2819,0,0,0,0,0,2821,0,0,0,0,2822,2823,0,0,0,0,0,0,0,2824,0,0,2828
+,0,2834,0,0,0,0,0,0,2836,0,2838,0,0,2839,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2841,
+0,0,0,2842,0,0,0,0,0,2843,2844,0,0,0,0,2846,0,0,2847,0,2849,0,2853,0,0,0,0,0,
+2857,0,0,0,0,2858,0,2859,0,0,2860,0,2862,2868,0,0,0,0,2875,0,2876,0,0,2877,2878,
+2884,2889,2890,0,0,2891,0,0,2892,0,0,0,2906,2912,0,2913,0,0,0,0,0,0,0,0,2916,0,
+2934,0,0,0,0,0,2935,0,0,0,0,2939,0,2940,0,0,0,0,0,0,0,2941,0,0,0,2946,0,2949,0,0
+,2950,2954,2955,0,0,0,2959,2961,0,0,2962,0,2963,0,0,0,0,0,0,2964,2965,2966,2967,
+0,0,0,0,0,0,0,2969,0,0,0,0,0,2970,2975,0,2982,2983,2984,0,0,0,0,0,2989,0,0,2990,
+0,0,0,0,0,0,0,2991,0,0,0,0,0,0,0,0,2998,0,3000,3001,0,0,3002,0,0,0,3003,0,0,3012
+,0,0,3022,0,0,3024,0,0,3025,3027,0,0,0,3030,0,0,0,0,3034,3035,0,0,3036,0,3039,0,
+3049,0,0,3050,0,0,0,0,0,0,3051,0,3053,0,0,0,0,3057,0,3058,0,0,0,0,0,0,0,0,3063,0
+,0,3073,3074,3078,3079,0,3080,3086,0,0,0,0,0,0,0,0,3087,0,3092,0,3095,0,3099,0,0
+,0,3100,0,3101,3102,0,3122,0,0,0,3124,0,3125,0,0,0,0,0,0,3132,3134,0,0,3136,0,0,
+0,0,0,0,0,3147,0,0,3149,0,0,0,0,0,3150,3151,3152,0,0,0,0,3158,0,0,3160,0,0,3161,
+0,0,3162,0,3163,3166,3168,0,0,3169,3170,0,0,3171,0,0,0,0,0,0,0,3182,0,3184,0,0,
+3188,0,0,3194,0,0,0,0,0,0,3204,0,0,0,0,3209,0,0,0,0,0,0,0,0,0,0,0,3216,3217,0,0,
+0,0,0,0,0,3219,0,0,3220,3222,0,3223,0,0,0,0,3224,0,3225,3226,0,3228,3233,0,3239,
+3241,3242,0,0,3251,3252,3253,3255,0,0,0,0,0,0,0,0,3260,0,0,3261,0,0,0,3267,0,0,0
+,0,0,0,0,0,3271,0,0,0,3278,0,3282,0,0,0,3284,0,0,0,3285,3286,0,0,0,0,0,0,0,3287,
+3292,0,0,0,0,3294,3296,0,0,3299,3300,3301,0,3302,0,0,0,0,0,3304,3306,0,0,0,0,0,0
+,3308,0,0,0,0,0,0,0,0,0,3311,0,0,0,0,0,0,0,0,3312,3314,3315,0,3318,0,0,0,0,0,0,0
+,0,3319,0,0,0,0,0,3321,0,0,0,0,0,0,0,0,0,3322,0,0,3324,3325,0,0,3326,0,0,3328,
+3329,3331,0,0,3335,0,0,3337,0,3338,0,0,0,0,3343,3347,0,0,0,3348,0,0,3351,0,0,0,0
+,0,0,3354,0,0,0,0,0,0,0,0,0,0,3355,0,0,3365,3366,3367,0,0,0,0,0,0,3368,3369,0,
+3370,0,0,3373,0,0,3376,0,0,3377,0,3379,3387,0,0,0,0,0,3390,0,0,0,0,0,0,0,3402,0,
+3403,3436,3437,3439,0,0,3441,0,0,0,3442,0,0,3449,0,0,0,3450,0,0,0,0,0,0,0,3451,0
+,0,3452,0,3453,3456,0,3457,0,0,3458,0,3459,0,0,0,0,0,0,0,0,0,3460,0,0,3469,3470,
+0,0,3475,0,0,0,3480,3487,3489,0,3490,0,0,3491,3499,0,3500,0,0,3501,0,0,0,3502,0,
+3514,0,0,0,3516,3517,0,0,0,3518,0,0,0,0,3520,3521,3522,0,0,3526,3530,0,0,0,0,
+3531,0,0,0,0,3536,0,0,0,0,0,0,0,3539,3541,0,0,3542,3544,0,3547,3548,0,0,3550,0,
+3553,0,0,0,0,0,0,0,3554,0,3555,0,3558,0,3559,0,0,0,0,0,0,0,0,3563,0,3581,0,0,0,
+3599,0,0,0,3600,0,3601,0,3602,3603,0,0,3606,3608,0,3610,3611,0,0,0,0,0,0,0,0,0,
+3612,3616,3619,0,0,0,0,0,0,0,0,0,0,0,0,0,3624,3628,0,3629,3634,3635,0,0,0,0,0,0,
+3636,0,3637,0,0,3638,3651,0,0,0,0,0,0,3652,3653,0,0,0,0,3656,3657,0,0,0,0,0,3658
+,0,0,0,0,3659,0,3661,3663,3664,0,3665,0,3692,0,0,0,3694,3696,0,0,0,0,0,0,0,0,0,0
+,0,0,3698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3700,0,0,3701,0,0,0,3708,3709,0,0,0,3711
+,3712,0,0,0,0,0,3723,0,3724,3725,0,0,3726,0,0,0,0,0,0,3728,3729,0,3734,3735,3737
+,0,0,0,3743,0,3745,0,0,3746,0,0,3747,3748,0,3757,0,3759,3766,3767,0,3768,0,0,0,0
+,3769,0,0,3771,0,3774,0,0,0,0,0,0,3775,0,0,0,0,0,0,3776,0,3777,3786,0,3788,3789,
+0,0,0,0,0,0,0,0,0,3791,0,3811,0,0,0,0,0,3814,3815,3816,3820,0,0,0,0,0,0,0,3821,0
+,0,3825,0,0,0,0,3835,0,0,3848,3849,0,0,0,0,3850,3851,3853,0,0,0,0,3859,0,3860,
+3862,0,0,0,0,0,3863,0,0,0,0,0,0,0,0,3873,0,3874,0,3875,3886,0,3887,0,0,0,0,3892,
+3913,0,3914,0,0,0,3925,3931,0,0,0,0,3934,3941,3942,0,0,0,0,3943,0,0,0,3944,0,0,0
+,0,0,3945,0,3947,0,0,0,3956,3957,0,0,0,0,0,0,0,0,0,3958,0,3959,3965,0,0,0,0,3966
+,0,0,0,3967,0,0,0,3968,3974,0,0,0,0,0,3975,3977,3978,0,0,0,0,3980,0,3985,0,0,0,0
+,0,0,0,0,3986,4011,0,0,4017,0,0,0,0,0,0,0,0,0,0,0,4018,0,0,0,0,4019,0,4023,0,0,0
+,4027,4028,0,0,0,0,0,0,0,0,4031,4034,0,0,4035,4037,4039,4040,0,0,0,0,0,4059,0,
+4060,4061,0,4062,4063,4066,0,0,4072,0,0,0,0,0,0,0,0,0,0,0,0,0,4088,0,0,0,0,0,
+4091,0,0,0,0,4094,4095,0,0,4096,0,0,0,0,0,4098,4099,0,0,0,4101,0,4104,0,0,0,4105
+,4108,0,4113,0,0,4115,4116,0,4126,0,0,4127,0,0,0,0,0,0,0,4128,4132,4133,0,4134,0
+,0,0,4137,0,0,4141,0,0,0,0,4144,4146,4147,0,0,0,0,4148,0,0,4311,0,0,0,4314,4329,
+0,4331,4332,0,4333,0,4334,0,0,0,4335,0,4336,0,0,0,4337,0,0,0,4342,4345,4346,4350
+,0,4351,4352,0,4354,4355,0,0,4364,0,0,0,0,4369,0,0,0,4373,0,4374,0,0,0,0,4377,0,
+0,0,0,4378,0,0,0,4380,0,0,0,4381,4382,0,0,0,0,0,0,0,4384,0,0,0,0,4385,0,0,0,4386
+,0,0,0,4391,4398,0,0,0,0,4407,4409,0,0,0,0,4410,0,0,4411,0,4414,4415,4418,0,4427
+,4428,4430,0,4431,0,4448,0,0,0,0,0,4449,0,0,0,4451,4452,0,4453,4454,0,4456,0,0,0
+,0,0,0,0,4459,0,4463,0,0,0,0,0,4466,0,4467,0,4469,0,0,0,0,0,0,0,0,0,0,0,0,0,4470
+,4471,0,4473,0,0,4475,0,0,0,0,4477,4478,0,0,0,4479,4481,0,4482,0,4484,0,0,0,0,0,
+0,0,4486,0,0,4488,0,0,4497,0,4508,0,0,4510,4511,0,4520,4523,0,4524,0,4525,0,4527
+,0,0,4528,0,0,0,0,4530,0,4531,0,0,4532,0,0,0,4533,0,0,0,0,0,4535,0,0,0,4536,0,0,
+0,0,0,4541,4543,4544,4545,4547,0,4548,0,0,0,0,4550,4551,0,4553,0,0,0,0,4562,0,0,
+4571,0,0,0,4574,0,0,0,4575,0,4576,0,4577,0,0,0,4581,0,0,0,0,0,4582,0,0,4586,0,0,
+0,4588,0,0,4597,0,4598,0,0,0,0,4616,4617,0,4618,0,0,0,0,4619,0,4620,0,0,4621,0,
+4624,0,0,0,0,0,4625,0,0,0,0,4657,0,4659,0,4667,0,0,0,4668,4670,0,4672,0,0,0,0,0,
+4673,4676,0,0,0,0,4687,0,0,0,0,4697,0,0,0,0,4699,0,4701,0,0,0,0,4702,0,0,4706,0,
+0,4713,0,0,0,4714,4715,4716,0,0,0,0,0,0,0,0,0,0,0,0,4717,0,0,4720,0,4721,4729,
+4735,0,0,0,4737,0,0,0,4739,0,0,0,4740,0,0,0,4741,0,0,0,0,0,4742,0,4745,4746,4747
+,0,0,0,0,0,0,0,0,4748,0,0,0,4749,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4751,
+4786,0,4787,0,4788,4796,0,0,4797,4798,0,4799,4806,4807,0,0,0,0,4809,4810,0,0,0,0
+,0,0,4811,0,0,0,0,0,4812,0,4813,0,0,4815,0,4821,4822,0,0,0,0,4823,0,0,0,0,0,0,0,
+0,0,0,4824,0,0,0,0,4826,0,0,0,4828,0,4829,0,0,0,4843,0,0,4847,0,4853,4855,4858,0
+,0,0,0,0,4859,0,4864,0,0,4879,0,0,0,0,4880,0,0,0,0,4881,0,4882,0,0,0,0,0,0,0,0,0
+,4883,0,0,0,0,4884,0,0,0,0,0,4886,4887,4888,4894,4896,0,4902,0,0,4905,0,0,4915,0
+,0,0,0,0,0,0,4916,4917,4919,4921,0,0,0,0,0,4926,0,0,0,0,4927,0,0,0,0,0,0,0,0,
+4929,0,4930,4931,0,4938,0,4952,0,4953,4957,4960,4964,0,0,0,0,0,0,0,5019,5020,
+5022,0,0,0,0,0,5023,0,0,0,5024,0,0,0,5025,0,0,0,0,5028,0,0,0,0,5029,5030,5031,0,
+5033,0,0,0,0,0,0,0,0,0,5034,5035,0,5036,0,0,5037,0,0,0,0,5038,0,0,5039,0,0,0,
+5041,5042,0,0,0,0,5044,5049,5054,0,5055,0,5057,0,0,0,5060,0,0,0,0,0,5063,0,5064,
+5065,0,5067,0,0,0,5068,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5076,0,0,0,0,0,0,
+0,5077,0,0,5078,5080,0,0,5083,0,0,0,0,0,0,0,0,5085,0,0,0,0,0,0,5098,5099,5101,
+5105,5107,0,5108,0,5109,0,0,0,0,0,0,0,5110,0,0,0,0,0,5117,5118,0,5121,0,5122,0,0
+,5130,0,0,0,5137,0,0,0,5148,0,0,0,0,0,0,0,5151,5154,0,0,0,5155,0,0,5156,5159,
+5161,0,0,0,0,5162,0,0,0,0,5163,5164,0,5166,0,0,0,0,0,0,0,0,0,0,5167,0,0,0,5172,0
+,0,0,0,0,0,5178,5179,0,0,5190,0,0,5191,5192,5194,0,0,5198,5201,0,0,0,0,0,5203,0,
+5206,5209,0,0,0,0,0,0,5213,0,5214,5216,0,0,0,0,0,5217,0,0,0,0,0,0,0,0,5218,5219,
+0,5231,0,0,5244,5249,0,5254,0,5255,0,0,5257,0,0,0,0,0,5258,0,5260,5270,0,5277,0,
+0,0,0,0,0,5280,5281,5282,5283,0,0,0,0,0,5284,0,5285,0,0,0,0,0,5287,5288,0,0,0,0,
+0,0,0,0,0,0,5289,5291,0,0,5294,0,0,5295,0,0,0,0,0,0,0,5304,0,0,5306,5307,5308,0,
+5309,0,0,5310,0,0,0,0,5311,5312,0,5313,0,0,0,0,0,5316,0,0,0,5317,0,0,0,0,0,0,0,0
+,0,5325,0,0,0,0,0,0,5326,0,5327,5329,0,5332,0,0,0,0,5338,0,0,0,0,0,0,0,0,5340,0,
+0,5341,0,0,0,5342,0,5343,5344,0,0,5345,0,0,0,0,0,0,5347,5348,0,0,0,0,0,0,0,0,0,
+5349,0,5350,0,5354,0,0,0,0,5358,0,0,5359,0,0,5361,0,0,5365,0,5367,0,5373,0,0,0,
+5379,0,0,0,5380,0,0,0,5382,0,5384,0,0,0,0,0,0,5385,0,0,0,0,5387,0,0,0,0,0,0,5388
+,5390,5393,0,0,0,0,0,0,0,0,0,0,0,5396,0,0,0,0,5397,5402,0,0,0,0,0,5403,0,0,0,
+5404,5405,0,0,0,0,0,0,0,0,0,0,0,0,5406,0,0,0,0,5410,0,0,5411,0,5415,0,0,0,0,5416
+,5434,0,0,0,0,0,0,0,0,0,0,0,5438,0,5440,0,0,0,0,0,0,5441,5442,0,0,0,5443,5444,
+5447,0,0,5448,5449,5451,0,0,0,5456,5457,0,0,0,5459,0,0,0,5461,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,5464,0,5466,0,0,5467,0,5470,0,0,5473,0,0,5474,0,0,5476,0,0,0,0,0,0,0,0
+,0,0,0,5477,0,0,0,0,0,0,0,5484,0,0,5485,5486,0,0,0,0,0,5488,0,0,0,0,0,0,0,5489,0
+,0,0,0,0,5507,0,0,0,5510,0,5511,0,0,5512,0,0,0,5513,0,5515,0,0,5516,5517,0,5518,
+0,0,5522,0,0,0,0,0,5534,5535,0,0,5536,0,5538,0,0,5543,0,5544,0,0,5545,0,5547,0,
+5557,0,0,5558,0,5560,5567,0,0,0,0,5568,0,0,0,5571,5573,0,5574,0,5575,0,0,0,0,
+5577,0,0,5598,0,0,0,0,0,0,0,0,0,5600,5609,0,0,0,0,5610,0,0,5612,0,5624,0,5625,0,
+0,0,5629,0,5641,0,5642,5643,0,0,0,0,0,0,5651,0,0,0,5652,5653,0,5661,5662,5678,0,
+5679,0,0,0,0,5685,5686,0,0,0,0,0,5690,5692,0,5703,0,0,0,0,0,5706,0,0,0,0,5707,0,
+0,0,0,0,0,5708,0,0,5709,0,5710,0,0,0,5712,0,5733,0,5734,5735,0,0,5744,5751,0,0,0
+,0,0,0,0,0,0,0,0,0,5752,0,5754,0,0,0,0,0,0,5757,5758,0,5760,5761,0,0,0,0,5763,
+5764,5765,0,5766,0,5767,5768,0,5770,0,0,0,0,5776,5780,0,0,0,0,5782,0,0,0,0,5784,
+0,0,5788,0,0,0,0,0,0,0,0,0,0,0,5797,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5799,0,0,5801,
+0,0,0,5811,0,0,0,0,0,0,5816,0,0,5827,0,0,0,0,0,0,0,0,5830,5831,0,0,5832,0,0,5833
+,0,5835,5844,5845,0,5846,0,0,0,0,0,5850,0,0,0,0,0,5852,0,5855,5857,0,0,5859,0,
+5861,0,0,5863,0,5865,0,0,0,5873,5875,0,0,0,5877,0,5879,0,0,0,5888,0,0,5889,5891,
+0,5894,0,0,0,0,0,0,5895,0,5897,0,0,0,0,0,0,5907,0,5911,0,0,5912,0,5913,5922,5924
+,0,5927,5928,0,0,0,0,5929,5930,0,5933,0,0,0,0,5949,0,0,5951,0,0,0,0,0,0,0,0,5953
+,0,0,5954,0,5959,5960,5961,0,5964,0,0,0,5976,5978,5987,5990,0,0,0,0,0,5991,0,
+5992,0,0,0,5994,5995,0,0,5996,0,0,6001,6003,0,0,0,0,6007,0,0,0,0,0,6008,0,0,6009
+,0,6010,0,0,0,6011,6015,0,6017,0,6019,0,6023,0,0,0,0,0,0,0,6025,0,0,0,0,0,0,0,0,
+0,0,6026,0,6030,0,0,6032,0,0,0,6033,6038,6040,0,0,0,6041,6045,0,0,6046,0,0,6053,
+0,0,6054,0,6055,0,0,0,0,0,0,6057,0,6063,0,0,0,6064,0,6066,6071,6072,0,0,0,0,0,0,
+6075,6076,0,0,6077,0,0,0,0,0,0,0,0,0,6078,6079,0,0,0,0,0,0,0,0,6080,0,6083,0,0,0
+,0,0,6084,0,0,6088,0,6089,0,0,6093,6105,0,0,6107,0,6110,0,0,0,6111,6125,6126,0,0
+,0,6129,0,0,0,0,6130,0,0,0,6131,6134,0,0,0,0,0,0,6142,0,0,0,0,0,6144,0,0,6146,
+6151,6153,0,6156,0,6163,0,6180,6181,0,0,0,0,0,6182,0,0,0,0,6184,6195,0,0,6206,0,
+6208,0,0,6212,6213,6214,0,6215,0,0,0,6228,0,0,0,6234,0,0,0,0,0,0,6235,6240,0,
+6242,6243,6244,0,6250,6255,0,0,0,0,0,6257,0,0,0,6258,6278,0,6284,0,0,0,6285,0,0,
+0,0,0,0,0,0,6286,0,0,0,6320,0,0,6322,6332,0,0,0,0,0,0,0,0,6334,0,0,0,0,0,0,0,
+6335,0,0,6337,0,6338,0,6339,6340,0,0,6356,6357,6369,0,0,0,6370,6371,6372,0,6373,
+0,0,0,0,0,6376,0,0,0,0,0,6382,6383,6384,0,0,0,0,6386,0,6389,6397,6400,6411,0,
+6414,0,0,0,0,0,0,0,6415,6416,0,0,0,0,0,0,6417,0,0,0,0,6418,0,0,0,0,0,0,0,6420,0,
+6421,6423,6425,0,6429,6430,0,6433,6438,0,0,0,0,0,0,0,0,0,0,6439,6440,0,0,6441,0,
+0,6444,0,0,0,0,6446,0,0,0,0,6447,6448,0,0,6450,0,0,0,6454,0,0,6455,0,6461,0,0,0,
+0,0,0,6462,0,0,6463,0,6464,0,6465,6467,0,0,0,6468,0,6479,6480,0,0,0,0,0,0,0,6481
+,0,0,6485,6487,0,0,0,0,0,0,6493,0,0,0,0,0,0,0,0,6494,6495,6496,0,0,0,0,0,6498,0,
+0,0,6507,6508,0,0,0,0,0,0,0,0,0,0,6511,6512,0,0,0,0,6513,0,0,0,6514,0,0,0,0,0,
+6516,0,0,6517,6518,0,0,0,6519,6520,6521,0,6523,0,0,0,0,6524,6528,0,6530,0,0,6532
+,0,6578,0,0,0,6583,0,6584,0,0,0,6587,0,0,0,6590,0,6591,0,0,0,0,0,6592,0,0,0,0,
+6593,6594,0,0,0,0,0,6599,6600,0,0,6601,6602,6604,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6608,0,0,0,0,0,0,0,0,6610,6611,0,6615,0,6616,6618,6620,0,6637,0,0,0,0,6639,0,0,0
+,0,6641,0,6642,0,0,0,6647,0,6660,6663,0,6664,0,6666,6669,0,6675,6676,6677,0,0,0,
+0,0,0,0,0,0,6678,0,0,0,6679,0,6680,0,0,0,0,0,0,0,6693,0,0,0,0,0,0,0,0,0,6704,
+6705,6706,0,0,6711,6713,0,0,0,0,0,6716,0,0,0,6717,0,6719,6724,0,0,0,0,0,0,0,0,
+6725,6726,0,0,0,0,0,6728,6729,6735,0,6737,6742,0,0,6743,6750,0,6751,0,0,6752,
+6753,0,0,0,0,0,0,6754,0,0,0,0,0,6756,0,0,0,0,0,0,6763,0,0,6764,6765,0,0,0,6770,0
+,0,0,6776,6780,0,6781,0,0,0,6783,0,6784,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+6785,0,0,0,6792,0,0,0,6793,0,0,6802,0,0,0,0,0,6803,0,0,0,6804,0,0,0,6812,0,0,
+6823,0,6824,6839,0,0,0,0,6852,0,0,6854,0,6856,6857,0,0,0,0,0,0,0,0,0,6867,0,6868
+,6870,6872,0,0,0,6873,6874,0,0,0,0,0,6875,0,0,6877,0,0,0,0,0,0,0,6878,0,0,0,6879
+,0,6880,0,0,0,0,0,0,0,0,0,0,6887,0,6888,6891,6893,0,6895,0,0,0,0,0,0,0,0,6899,0,
+0,0,0,6901,0,0,0,0,6910,0,6911,0,0,6912,0,0,6913,6914,0,0,0,6915,0,0,0,6916,6919
+,0,0,0,0,0,0,6924,0,6925,0,0,0,6926,6927,6928,0,6929,0,6930,0,0,6931,6935,0,6936
+,0,0,0,0,6939,6940,6941,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6942,6948,6949,0,0,0,0,0,0
+,0,6952,6954,6963,6965,6966,0,0,6967,6968,0,0,0,0,0,0,0,0,0,6969,0,0,6970,6979,0
+,0,6980,0,0,6983,0,0,0,0,0,6984,0,0,0,0,0,0,0,6988,6990,6992,0,0,0,0,0,0,0,6995,
+0,0,0,7012,0,0,0,0,0,0,0,0,0,7019,0,0,0,0,0,0,0,0,7021,0,0,7022,7023,7028,0,7030
+,7033,0,0,0,0,0,0,7038,0,0,0,0,0,0,0,0,0,0,7039,0,0,0,0,0,7046,0,7047,0,0,0,0,0,
+0,0,0,0,0,0,7048,7052,0,0,0,0,0,7054,0,7060,0,0,0,0,7061,0,7065,0,0,0,0,7067,
+7069,0,7070,7071,7072,0,0,7078,0,7080,7081,0,7083,0,0,0,7084,7087,7088,0,0,7090,
+0,7093,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7107,0,0,7108,0,0,0,0,0,0,0,0,7110,0,7114,0
+,0,0,0,0,0,0,7115,0,7116,0,0,0,0,0,7117,0,0,7118,0,0,7124,0,7125,0,0,7126,0,0,0,
+0,7128,0,0,0,0,0,7129,0,7130,0,7132,7133,0,0,7134,0,0,7139,0,7148,7150,0,0,0,0,
+7152,0,0,0,7153,7156,7157,0,0,0,0,0,7158,0,0,0,0,0,0,0,0,0,0,7163,7165,7169,0,
+7171,0,0,0,0,0,0,0,0,0,7172,0,7173,7181,0,0,0,0,0,7182,7185,0,0,0,0,7187,0,7201,
+7204,0,0,0,0,0,7206,7207,0,0,0,0,7211,7216,0,7218,0,0,0,0,7226,7228,7230,7232,
+7233,7235,7237,0,0,0,0,7238,7241,0,7242,0,0,7247,0,0,0,7266,0,0,0,0,0,0,0,7289,0
+,0,7290,7291,0,0,7292,0,7297,0,0,0,0,0,0,0,0,0,0,7300,0,7301,0,0,0,0,0,0,0,0,0,0
+,0,0,7302,0,0,0,0,7305,0,0,0,0,7307,0,7308,0,7310,0,7335,0,0,0,0,0,0,0,7337,0,
+7343,7347,0,0,0,0,0,7348,0,7349,7350,7352,7354,0,0,0,0,7357,0,7358,7366,0,7367,
+7368,0,0,7373,0,0,0,7374,0,0,0,0,0,0,0,7376,0,0,0,7377,0,0,0,0,0,7378,0,7379,
+7380,0,0,0,0,0,7383,0,0,7386,0,0,0,0,7398,0,0,0,7399,7400,0,7401,0,0,0,0,0,0,0,
+7402,0,0,0,0,0,7405,0,0,0,0,0,7406,0,0,0,0,0,0,0,0,7421,7427,7429,0,0,0,7435,0,0
+,7436,0,0,0,7437,0,0,0,0,0,0,7438,7443,0,7446,0,7448,0,0,0,0,0,0,0,0,0,0,7456,0,
+0,0,0,0,7457,0,0,7461,0,0,0,0,0,7462,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7463,7466,7472,
+0,7476,0,0,7490,0,7491,0,0,7493,0,0,0,7498,7499,0,0,7508,0,0,0,0,0,7512,0,0,0,
+7513,7514,7516,0,0,0,0,7518,0,0,7519,7521,7522,0,0,0,7526,0,0,7529,0,0,7531,0,
+7536,0,7538,0,7539,0,0,7541,7542,7546,0,0,0,0,0,7547,0,7548,0,0,0,0,0,7550,0,0,
+7552,7553,0,0,0,0,0,0,0,0,0,0,7554,7563,0,7573,0,0,0,0,0,0,7574,7576,0,7578,7581
+,7583,0,0,0,7584,0,7587,0,0,0,0,0,7589,0,0,0,7594,0,0,7595,0,0,7600,7602,7610,0,
+0,0,0,0,7612,0,7613,7614,0,0,7615,0,0,7616,0,7620,0,7621,7622,0,7623,0,0,0,0,
+7626,0,0,0,0,7627,7629,7631,0,0,7633,0,0,0,0,0,7639,0,7640,7642,0,0,7643,0,0,0,0
+,7644,0,0,0,0,0,0,0,7645,0,0,0,0,0,7661,7662,7663,7665,0,7666,0,7667,0,7684,7688
+,7690,0,7691,0,0,0,0,0,0,7692,0,0,7700,0,7707,0,7708,0,7709,0,7721,0,0,0,7722,0,
+7724,0,0,0,0,0,0,7729,7731,0,7732,0,7733,7735,0,0,0,0,0,0,0,7739,0,0,7741,7745,0
+,7748,0,0,0,7751,0,0,0,7752,0,0,0,0,0,0,0,7753,0,0,7756,0,7757,0,7759,0,7760,0,0
+,0,0,7761,7768,0,0,7769,0,0,7770,0,0,7771,0,0,7772,0,0,7773,0,0,0,0,0,7778,7783,
+0,0,0,0,0,7784,7785,0,7790,0,0,0,0,7792,0,7798,0,0,0,0,0,7799,0,7810,0,0,7813,0,
+7814,0,7816,0,7818,7824,7825,7826,0,7828,7830,0,0,0,7840,0,7842,0,7843,0,0,0,0,
+7844,0,0,0,0,0,0,0,7846,0,0,0,0,0,7856,7857,7858,7862,0,7865,0,0,7866,0,0,7913,0
+,0,0,0,7914,0,0,7915,7917,7918,7919,0,7920,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7921,
+7922,0,7924,0,0,7925,0,0,7927,0,7930,7935,0,0,7937,0,0,0,0,0,0,7939,0,7940,0,0,0
+,0,0,7941,0,0,0,0,7945,0,0,0,0,7949,0,0,0,0,0,0,0,0,7950,0,7953,0,0,0,0,0,0,0,
+7968,0,0,0,0,7969,7972,7992,0,7993,0,0,0,0,0,0,0,0,0,0,0,7994,0,0,0,0,8007,8008,
+0,0,0,0,0,0,0,0,0,0,0,0,8010,0,0,0,8012,0,0,0,0,0,0,0,0,8018,0,8028,8029,0,0,
+8030,0,0,8032,8033,0,0,8034,8036,0,0,0,0,0,0,0,0,0,0,8037,0,0,0,8043,8052,8059,
+8060,0,0,8061,0,0,0,8062,0,8063,0,8064,0,8066,8068,0,0,0,8080,8081,0,8089,0,0,0,
+0,0,8092,0,0,0,0,0,0,8093,8110,0,0,0,0,0,0,0,8111,0,0,0,0,0,8112,8115,0,8117,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8120,8121,8122,8128,8129,8130,8131,0,0,8139,0,0,
+8144,0,0,0,0,8145,8146,8153,0,0,0,0,0,0,0,0,8154,0,8157,8160,8162,0,8164,8165,0,
+0,0,0,8166,8167,0,0,8179,0,0,0,8185,0,0,0,8186,0,0,8187,0,0,0,8188,0,0,0,0,0,
+8204,0,0,0,0,8210,0,0,0,0,0,8213,0,8214,0,0,8215,0,0,0,0,0,0,8218,0,0,0,0,0,0,0,
+0,0,8219,0,8221,0,0,8222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8225,0,0,0,8233,0,0,
+8242,0,0,0,0,0,0,0,0,0,0,0,8247,0,8248,8252,0,8256,8257,0,0,8261,0,8264,8265,0,0
+,0,0,8267,0,0,0,8269,0,0,0,0,0,0,0,0,0,8270,0,0,0,8278,0,8279,8283,0,0,8285,8286
+,8289,8292,0,0,0,0,8293,8295,8299,8300,8301,0,0,0,0,0,0,8304,8307,0,0,0,0,0,0,0,
+8321,0,0,0,8322,8323,8325,8326,8327,0,0,8332,8338,0,0,8340,0,0,0,0,0,8350,0,0,
+8351,0,8354,8355,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8360,8372,0,0,0,0,0,0,0,0,8377,0,0,
+0,0,8380,0,0,0,8383,0,8384,0,0,0,0,8386,8392,0,0,8394,0,0,0,0,0,0,0,8396,8397,0,
+8398,0,8399,0,0,0,0,0,8400,0,8401,8410,8411,0,8412,8413,8422,0,0,0,0,8423,0,0,0,
+0,8424,0,0,8425,0,0,0,0,0,0,0,8441,8442,0,0,0,0,0,0,8443,0,0,8444,0,8447,0,0,0,0
+,8451,0,8458,0,8462,0,0,8468,0,8469,0,0,0,8470,0,8473,8479,8480,0,0,0,0,8481,
+8483,0,0,0,0,0,0,0,0,0,8484,0,0,8490,0,0,0,0,0,0,8491,8493,8494,0,8528,0,0,0,0,0
+,0,0,8530,0,0,0,0,0,0,0,0,8534,8538,8540,0,0,8541,0,0,8545,0,8557,0,0,8569,8570,
+0,0,8571,8574,8575,8579,0,8583,0,0,0,0,8591,0,0,0,0,0,0,0,0,8606,0,8607,0,0,0,0,
+0,0,0,0,0,8608,0,0,8609,0,0,0,8610,0,0,0,8611,0,0,8613,8617,8621,0,0,8622,0,8623
+,0,8624,8625,0,0,0,0,0,0,0,0,0,8637,8638,8639,8650,0,0,0,0,8652,8654,8655,0,0,0,
+0,0,0,0,0,0,0,8656,0,0,0,0,0,8657,0,0,0,0,0,0,0,0,0,8658,0,0,8659,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,8660,0,0,0,0,0,0,8661,8663,8664,0,0,0,0,8665,0,8669,0,
+0,0,0,0,0,0,8671,8674,0,8684,0,8686,0,0,0,8689,0,0,0,8690,0,8706,0,0,0,0,0,0,0,0
+,0,0,0,8710,0,8711,8713,8714,8724,8727,8728,8733,8736,0,8737,8739,0,0,0,0,8742,
+8743,8745,8754,0,0,0,0,8756,0,0,0,0,0,0,8757,8760,0,0,0,0,0,8762,8763,8764,0,
+8766,8769,8770,8773,0,8774,0,8779,0,0,0,0,8780,0,0,8781,0,0,8783,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8784,0,0,0,0,0,0,0,0,8785,0,0,0,0,8786,0,0,0,0,8788
+,8790,0,0,0,8803,0,8813,8814,0,0,0,0,0,8815,8816,0,0,0,0,8818,0,0,0,0,8822,8828,
+8829,0,8831,0,0,0,0,8833,0,0,0,8834,0,0,0,8835,0,8836,0,0,0,8837,0,0,0,0,0,0,
+8838,8839,0,0,0,0,0,0,0,0,0,0,0,8840,0,0,0,8841,0,8842,0,0,0,8846,0,0,0,0,0,0,0,
+8847,0,8848,0,0,8864,0,0,8866,0,0,8870,8872,0,0,8873,8874,0,0,0,0,0,0,8875,0,
+8876,0,0,0,0,8896,8900,0,0,0,0,8901,0,0,0,0,0,8904,0,8907,0,0,0,0,8911,8912,8913
+,0,0,0,8914,0,8915,0,0,0,0,0,0,0,0,0,0,0,0,8916,0,0,0,8929,0,0,0,0,0,0,0,0,0,0,
+8930,0,8932,0,8943,0,0,0,8945,8947,0,0,0,0,8949,0,8950,0,8954,8957,0,0,8970,0,0,
+0,0,8971,0,8996,0,0,0,0,8997,9000,0,0,0,0,9001,9002,0,9004,9009,9024,0,0,0,0,0,0
+,0,0,0,0,0,0,9027,9082,0,0,9083,9089,0,0,0,0,0,0,9090,0,0,0,9092,0,0,9093,0,9095
+,0,0,9096,9097,9101,9102,0,0,0,0,0,0,0,0,9112,0,0,0,0,0,0,9114,0,0,9120,0,9121,
+9122,0,0,0,9123,9124,0,0,9125,0,0,9126,0,9127,0,0,9129,9131,0,0,0,9132,0,0,9136,
+0,9144,0,0,9148,0,0,0,0,0,0,9149,0,9152,9163,0,0,9165,0,0,0,0,0,0,0,0,0,0,0,0,0,
+9166,0,9169,0,0,0,0,0,0,0,9170,0,0,0,0,9172,0,9174,9175,9176,0,9177,0,0,0,0,0,0,
+0,0,9186,0,9187,0,0,0,9188,9189,0,0,9190,0,0,0,0,9191,0,0,0,9193,0,0,0,0,9197,
+9198,0,0,0,9208,9211,0,0,0,0,9216,9217,0,9220,0,0,0,0,9221,9222,9223,0,9224,9225
+,0,0,9227,0,9228,9229,0,0,9230,0,9232,0,9233,0,0,0,0,0,9234,9235,0,0,9237,0,0,0,
+0,0,0,0,0,9238,9240,0,0,9241,0,0,0,0,9244,0,0,0,0,9247,0,0,0,0,0,0,0,0,0,0,9248,
+0,0,0,9249,0,0,0,0,0,9250,0,0,0,0,9251,0,0,9252,9255,0,0,0,9256,0,0,0,0,0,0,0,
+9257,0,0,9258,0,0,0,0,0,0,9259,0,0,0,0,0,9262,9263,0,0,9265,9266,0,0,0,0,0,0,0,0
+,9268,9271,0,0,0,0,0,0,0,0,0,9273,0,0,0,9276,9277,9279,0,0,0,0,0,0,0,9280,0,0,
+9293,0,0,0,0,0,9297,9301,0,0,0,0,0,0,0,0,0,0,0,9308,9309,9313,9321,9322,0,9326,
+9327,0,0,9477,0,9479,0,0,0,0,9482,0,0,0,9483,0,9484,0,0,0,0,0,0,0,0,0,9485,0,0,
+9486,0,0,0,9489,0,0,0,0,9490,9491,0,0,0,0,9493,0,9495,9496,0,0,0,0,0,0,0,0,9500,
+0,9502,0,0,0,0,0,9504,9507,0,9509,0,9511,0,0,9513,0,0,0,0,0,0,0,0,9515,0,0,0,0,0
+,0,9516,9517,0,0,0,0,9532,0,0,9533,0,0,9538,0,9539,9540,0,0,0,0,9541,0,0,0,9542,
+0,0,0,0,0,0,0,0,9544,9545,0,9546,0,0,0,0,0,0,9547,9548,0,0,0,9550,0,9557,0,9558,
+0,9561,0,9563,9570,0,9572,9574,9575,0,0,0,9577,9592,0,0,9596,0,0,0,9598,0,9600,0
+,9601,0,0,0,0,0,0,9608,0,9638,9639,0,0,0,0,0,0,0,9641,0,0,9643,9644,9645,9646,0,
+0,0,9648,0,0,0,0,0,0,0,9650,9654,0,0,0,0,0,0,0,0,9655,0,0,0,0,0,9656,0,9657,0,0,
+0,0,9658,0,0,9659,0,0,9664,0,0,9665,0,9667,9669,0,0,0,0,0,0,0,0,0,0,0,0,9671,0,
+9673,9681,0,0,0,0,9682,9683,9684,0,0,0,0,9686,9698,0,0,9700,9701,9702,0,9703,
+9717,0,0,0,0,9718,0,9726,0,0,0,0,9727,0,0,0,9728,0,9742,0,9744,0,0,0,9750,0,9754
+,9755,0,0,0,0,0,9756,0,9757,9768,0,9769,0,0,0,9770,9771,0,9773,0,9774,0,9775,0,0
+,0,9776,9777,9784,0,0,0,9786,0,9789,0,0,0,0,9793,9794,0,0,0,9808,0,0,0,0,0,9811,
+0,0,0,0,0,0,0,0,0,0,0,0,9812,0,9820,0,9823,0,9828,0,0,0,0,9830,0,0,9833,9836,0,0
+,0,9840,0,0,0,9841,0,0,9842,0,9845,0,0,0,9847,9848,0,0,9855,0,0,0,0,0,0,9856,
+9863,9865,0,0,0,0,0,0,0,0,9866,9867,9868,9873,9875,0,0,0,0,0,0,9880,0,9886,0,0,0
+,9887,0,0,9891,0,0,0,0,0,0,0,9906,9907,9908,0,0,0,9909,0,0,0,0,0,0,9910,0,0,0,0,
+9913,0,0,0,0,9914,0,0,0,0,0,9922,0,0,0,0,9923,9925,0,0,0,0,0,0,9930,0,0,0,9931,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9932,0,9939,0,0,9940,9962,9966,0,9969,9970,0,0,9974
+,0,9979,9981,9982,0,0,0,9985,0,0,0,0,0,0,9987,0,0,0,0,0,0,0,9988,9993,0,0,9994,0
+,0,0,9997,0,10004,0,0,0,0,0,10007,10019,10020,10022,0,0,0,10031,0,0,0,0,0,10032,
+0,0,10034,0,10036,0,0,0,0,10038,0,10039,10040,10041,10042,0,0,0,0,0,10043,0,0,0,
+0,0,10045,10054,0,0,0,0,10055,0,0,10057,10058,0,0,0,0,0,0,10059,0,0,0,0,0,0,0,
+10060,0,0,0,0,0,0,0,10063,0,10066,0,0,0,10070,0,10072,0,0,10076,10077,0,0,10084,
+0,10087,10090,10091,0,0,0,10094,10097,0,0,0,0,0,0,10098,0,0,0,0,0,0,10103,0,
+10104,0,10108,0,0,0,0,0,0,0,0,10120,0,0,0,10122,0,0,10125,0,0,0,0,10127,10128,0,
+0,10134,0,10135,10136,0,10137,0,0,10147,0,10149,10150,0,0,10156,0,10158,10159,
+10160,10168,0,0,10171,0,10173,0,0,0,10176,0,0,0,0,10177,0,0,0,0,10178,0,0,0,0,
+10194,0,10202,0,0,10203,10204,0,10205,10206,0,10207,0,0,0,0,10209,0,0,0,0,0,0,0,
+10213,0,0,0,0,0,0,10217,0,10229,0,10230,10231,0,0,10232,0,0,10237,10238,10244,0,
+0,0,0,0,10250,0,10252,0,0,0,0,0,0,10255,0,0,10257,0,0,0,0,0,0,10258,0,10259,0,0,
+0,0,0,0,0,0,10260,0,0,0,0,0,0,0,10284,10288,10289,0,0,0,10290,0,10296,0,0,0,0,0,
+10297,0,0,0,0,0,0,10298,0,0,0,0,10299,10303,0,0,0,0,0,10306,0,0,0,10307,0,10308,
+0,0,0,0,10311,0,0,0,0,0,0,0,10315,10317,0,0,0,10318,10319,0,10321,0,10326,0,
+10328,0,0,0,0,10329,0,0,10331,0,10332,0,0,0,0,0,0,10334,0,0,10335,10338,0,0,0,0,
+0,10339,10349,0,0,0,0,0,0,10351,0,10353,0,0,0,0,0,0,10362,0,10368,0,10369,0,0,0,
+10372,10373,0,0,0,0,0,10374,0,0,0,10375,0,10376,0,0,10386,10388,10390,0,0,0,0,0,
+0,0,10391,0,0,10392,10394,0,0,10396,0,10397,0,10403,0,0,0,0,0,0,0,0,10404,0,
+10405,10410,0,0,10411,0,10412,0,0,0,0,0,0,0,10421,10422,10423,0,0,0,0,0,0,0,0,0,
+10425,0,0,10427,0,0,10430,0,0,0,0,0,10432,0,10433,10434,0,0,0,0,10436,10437,0,
+10438,0,10439,0,10444,10446,0,0,0,0,0,10448,0,0,0,0,0,10449,0,0,0,0,0,0,0,10451,
+0,10453,0,0,0,10454,10457,0,0,10459,0,10469,0,0,0,0,0,10472,10481,0,0,0,0,0,
+10482,10483,0,10492,0,0,0,0,0,0,0,0,0,0,10499,0,0,0,10502,0,0,10510,0,10521,
+10524,0,0,10525,10526,10528,0,0,0,0,0,0,0,0,10530,0,0,0,0,10533,0,10534,0,0,0,0,
+0,0,0,0,0,0,10535,10536,0,0,10544,0,10553,10556,0,10557,10559,0,0,0,0,0,10562,
+10563,10564,0,10565,0,0,0,10566,0,10567,0,0,0,0,10575,0,0,10576,0,10578,0,0,0,0,
+0,0,0,0,0,0,10585,10586,10587,10589,0,10590,0,0,10594,0,0,0,0,0,10598,0,0,10601,
+0,0,0,10602,0,10603,0,10604,0,10605,0,0,10607,0,10626,0,10627,0,0,0,0,0,10629,
+10630,10631,0,0,0,10646,0,0,0,10647,0,10650,0,10651,0,0,0,10652,10653,10655,0,
+10658,0,0,10659,0,10667,0,0,0,0,10669,0,0,0,0,0,0,0,0,0,10670,0,0,0,10671,0,0,0,
+0,10672,10673,0,10674,0,0,0,10676,0,0,0,0,0,0,10678,0,10682,0,0,10692,0,10697,0,
+0,0,0,10698,0,0,0,10700,0,0,0,0,0,10703,0,10704,0,0,0,0,0,0,0,10705,0,10715,
+10718,10720,0,0,10722,0,0,0,0,0,0,0,0,10723,0,0,0,0,10726,0,0,0,0,0,10727,10730,
+10743,0,0,0,0,0,0,10744,0,0,10745,0,0,0,0,0,0,10748,0,0,0,0,10750,0,0,10752,
+10753,0,0,0,10756,0,0,0,0,0,0,10758,0,0,0,10759,0,10769,0,0,10772,0,0,0,0,0,0,
+10773,0,0,0,10777,0,0,10779,0,0,0,0,0,0,0,0,10780,10784,0,0,0,10789,0,0,0,10791,
+0,0,0,0,0,0,0,0,0,10795,0,0,10796,0,10808,0,10809,0,0,0,10810,0,0,0,10812,0,0,
+10814,0,0,0,0,0,0,0,0,0,10815,0,0,0,0,10816,10817,0,0,0,0,10819,0,10820,0,0,0,0,
+10821,10822,10823,0,10826,10849,0,0,0,0,10850,0,0,10852,0,10853,0,0,10856,0,0,
+10857,10858,10859,10860,0,0,0,0,0,0,10863,0,10866,10867,10872,10890,0,0,10891,
+10892,0,0,0,0,0,10893,0,0,0,10896,10899,0,0,10900,10902,0,0,0,0,0,10903,0,0,0,0,
+0,0,0,0,0,0,0,0,10905,0,10906,0,0,0,0,10908,10911,0,10912,0,0,10916,0,0,0,0,0,
+10917,0,10918,0,0,0,10923,0,0,0,0,0,10924,0,0,10928,10929,0,0,10930,0,0,0,10932,
+0,0,0,0,10939,0,0,10945,0,0,0,10947,0,0,10948,0,0,0,0,0,0,0,0,0,0,0,0,10958,0,
+10960,10962,0,0,10964,0,0,0,10966,0,0,0,0,0,0,0,0,0,0,10967,0,0,0,10968,0,0,0,
+10973,0,0,0,0,0,10975,0,0,0,10976,10978,0,0,10982,10984,10987,0,0,10988,0,10989,
+0,0,10991,0,0,0,0,10992,0,0,0,10993,0,10995,0,0,0,10996,10997,0,0,0,10998,0,
+10999,0,11001,0,0,0,0,0,0,11010,11012,0,11013,11016,11017,0,0,11019,11020,11021,
+0,0,0,0,0,0,0,0,0,0,0,0,11022,0,0,11023,11029,0,0,0,0,11031,0,0,0,11034,0,0,0,0,
+11055,0,0,0,0,0,11056,11060,0,0,0,0,0,0,11061,0,0,11064,11065,0,11066,0,11069,0,
+11085,0,0,0,0,0,11086,0,0,0,11088,0,0,0,11094,0,0,0,11095,11096,0,0,0,0,0,0,
+11097,11098,0,0,0,0,0,0,11099,0,0,11102,11108,0,0,0,11109,0,11114,11119,0,11131,
+0,0,0,11142,0,0,11143,0,11146,0,11147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11148,0,
+11149,11152,11153,11154,0,11156,0,11157,0,0,0,11158,0,0,11159,11160,0,0,0,0,0,0,
+0,0,0,0,0,0,11163,0,0,11164,11166,0,0,0,11172,11174,0,0,0,11176,0,0,0,0,0,11182,
+11183,0,0,0,11184,11187,0,0,11188,11189,0,0,0,0,0,0,11194,0,0,0,0,0,0,0,11200,
+11202,0,0,0,0,0,0,11203,0,11204,0,0,0,0,0,11205,0,0,0,11206,0,11207,0,0,11209,0,
+11211,0,11214,0,0,11231,0,0,0,11293,11295,0,0,11296,11297,11302,0,0,0,11307,0,0,
+0,0,11309,11310,0,11311,0,0,0,11313,0,11314,0,0,0,0,11334,0,11338,0,0,0,11339,0,
+0,0,0,0,11340,0,11341,11342,0,11344,0,11345,0,0,0,11348,11349,0,0,11350,0,0,0,
+11355,0,0,0,0,0,0,11356,0,11357,11370,0,0,11371,0,11374,11376,0,0,0,11377,0,0,
+11378,11383,0,11386,11399,0,11400,11406,0,0,0,11408,0,0,11409,11412,0,0,0,0,
+11417,0,0,0,11418,0,11421,0,11426,11429,0,0,0,0,0,11430,0,11437,0,11438,0,0,0,0,
+0,11440,11453,0,0,0,0,0,0,11454,0,0,0,0,11455,0,0,11456,11460,11461,11463,0,
+11469,0,11473,0,0,0,0,11474,0,0,0,11475,0,11476,11477,11480,0,0,0,0,11481,0,0,
+11484,0,0,11487,0,0,0,0,0,0,0,0,0,0,11497,0,0,11502,0,11509,0,0,11510,11511,
+11513,0,0,0,0,0,0,0,0,0,0,11515,0,0,0,0,11516,0,11520,11521,0,0,0,0,0,0,0,0,0,0,
+0,11529,11530,11531,11534,0,0,11543,0,0,0,0,0,11547,0,11548,0,0,0,0,0,11552,
+11556,0,11557,0,0,11559,0,11560,0,0,0,0,0,0,11561,0,0,11563,11564,0,11565,0,0,0,
+0,11567,0,0,0,11569,0,11574,0,11575,0,0,0,11577,0,11578,0,0,0,11580,11581,0,0,0,
+11582,11584,0,0,0,0,0,0,0,11587,0,11588,11591,0,11595,0,0,0,0,0,0,0,0,11596,0,
+11597,0,0,0,0,11598,11601,0,0,0,11602,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11603,
+11604,0,11606,0,0,11608,0,0,0,0,11610,0,0,11611,0,0,0,0,11613,0,11622,0,0,0,
+11623,0,0,0,0,11625,0,0,11626,11627,11628,11630,0,0,0,0,0,0,11639,0,0,11646,0,
+11648,11649,0,11650,0,0,0,0,0,0,0,0,0,11651,0,0,11652,11653,11656,0,0,11677,
+11679,0,0,0,0,11680,0,0,11681,0,11685,0,0,0,0,0,0,0,0,11688,0,0,0,11716,0,11719,
+0,0,0,0,0,11721,0,0,11724,11743,0,0,0,0,0,0,0,0,11745,11748,11750,0,0,0,0,0,
+11751,0,0,0,11752,11754,0,11755,0,0,0,0,0,0,0,11759,0,0,0,0,0,0,11760,0,0,0,
+11761,0,0,0,0,0,0,11766,11767,0,11772,11773,0,11774,0,0,11775,0,11777,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,11778,11780,0,0,0,0,0,0,0,11783,0,11784,0,0,0,11785,
+0,0,0,11786,0,0,0,0,11788,0,0,11789,11791,11792,0,0,0,0,11795,11834,11835,11836,
+0,0,11837,0,0,0,11838,0,0,11846,11851,0,11852,0,11869,0,0,0,11871,0,0,0,11872,
+11874,0,0,0,0,0,0,11875,0,11876,11877,0,0,0,0,0,0,0,0,0,0,11883,0,0,0,0,0,0,0,
+11884,0,11885,0,11886,0,0,11887,0,11894,11895,11897,11909,11910,0,11912,11918,0,
+0,11920,0,11922,11924,11927,11928,0,0,0,0,11929,0,11934,0,0,0,0,0,11941,11943,
+11944,0,11945,0,0,0,0,11948,11949,0,0,0,0,11953,0,11954,0,11955,0,11956,0,0,0,0,
+0,11957,0,0,11959,0,0,0,0,0,0,0,0,11961,0,0,0,0,0,11978,0,0,0,11979,11980,11986,
+11987,0,11992,0,0,0,0,0,11993,0,0,0,11994,0,11999,12004,12005,12006,0,0,0,0,0,
+12011,0,0,12012,12014,0,0,12015,0,0,12019,12028,0,0,12029,0,0,12032,12033,0,0,0,
+0,12034,0,12041,12043,0,0,12044,0,0,0,0,0,0,0,12046,0,0,0,0,0,0,0,12054,12055,0,
+12056,0,0,0,12060,12064,0,0,0,0,0,12065,12067,12068,0,0,0,0,0,0,0,0,12074,0,0,0,
+12075,12076,0,0,0,12079,0,12081,12086,12087,0,0,12088,0,0,0,0,12089,0,12092,0,0,
+0,0,12097,0,0,0,0,0,0,0,0,12098,0,0,0,0,0,0,0,0,0,0,0,0,0,12102,12103,12104,
+12111,0,0,12114,12116,0,0,0,12118,0,0,0,12119,12120,12128,0,0,0,0,12130,0,0,0,0,
+0,0,12131,0,0,0,12132,12134,0,0,0,0,12137,0,12139,0,12141,0,0,12142,0,0,0,12144,
+0,0,0,0,0,12145,0,12148,0,12153,0,0,0,0,12154,12171,12173,0,0,0,12175,0,0,0,0,
+12178,0,0,0,0,0,0,0,12183,0,0,0,0,0,0,0,0,12184,0,0,0,12186,0,0,0,0,0,12187,
+12188,0,0,12189,0,12196,0,12197,0,0,12198,0,12201,0,0,0,0,12203,0,12209,0,0,0,0,
+12210,12211,12212,12213,0,12217,12218,0,0,0,0,0,0,0,0,0,12222,0,0,0,0,0,0,0,
+12223,0,0,12229,0,0,0,0,12233,0,0,0,0,12234,0,0,12236,12242,0,0,0,12243,0,0,0,
+12244,12253,0,12254,12256,0,12257,0,0,12275,0,0,0,0,0,12277,0,0,0,0,0,12278,0,
+12289,0,0,12290,0,12292,12293,0,0,12294,0,12295,0,0,12296,0,12297,0,12298,0,0,0,
+0,12301,0,0,0,0,0,0,0,0,0,0,0,0,0,12309,0,12338,12340,0,0,0,0,12341,0,0,0,0,0,0,
+0,0,12342,12343,0,12344,0,0,0,0,0,0,0,0,0,12345,0,0,0,0,0,0,0,0,12346,0,0,0,0,
+12348,0,0,0,0,0,0,0,0,0,0,0,0,12350,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12351,0,12355,
+12356,12357,0,0,12367,12370,12371,0,0,0,0,0,12372,12376,0,0,0,0,0,0,0,0,12379,0,
+12382,0,12383,0,0,12384,0,0,0,0,12393,0,0,12394,0,0,0,0,12398,12403,0,0,12404,0,
+0,0,0,0,0,0,0,0,0,0,0,0,12410,0,0,0,12411,0,0,0,12412,0,0,0,0,12420,0,12421,0,0,
+0,0,0,12423,0,12425,12429,0,0,0,12431,12432,0,0,0,0,0,0,0,0,0,0,0,0,12434,0,0,0,
+0,0,12435,12436,0,0,0,0,0,0,0,0,12437,0,0,0,0,0,12438,0,0,0,0,0,0,0,0,12445,0,0,
+0,12450,12451,0,0,0,0,0,0,0,0,12452,12475,0,0,12493,12494,0,0,0,12495,0,0,0,0,
+12496,12502,12509,0,0,0,0,12510,0,12512,12513,0,0,0,0,12514,0,0,0,12515,0,12520,
+0,0,0,12524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12527,0,0,0,12528,0,0,0,12529,0,0,0,
+0,0,12530,0,12535,0,0,12536,0,12538,0,0,0,0,0,0,0,0,0,0,0,0,12540,0,12548,0,0,0,
+0,0,12550,0,0,0,12551,12552,0,0,0,12554,0,0,0,0,0,0,0,0,12555,0,0,12562,0,12565,
+0,12566,0,0,0,0,0,0,0,0,0,0,0,0,12569,0,0,0,12571,12574,0,0,0,0,0,0,0,12577,0,0,
+0,0,0,0,0,12578,12579,12603,0,12608,0,0,12611,0,12612,0,12615,0,12625,0,0,0,0,
+12627,12646,0,12648,0,0,12657,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12670,0,0,12671,0,
+12673,12677,0,0,0,0,0,0,0,0,0,0,0,12679,0,12681,0,12682,12693,0,12694,0,12697,0,
+12701,0,0,0,12703,12704,0,0,0,0,12707,12737,0,0,12739,0,0,12740,0,0,12742,12743,
+0,0,0,0,0,0,0,0,0,12745,0,12746,12747,0,12748,0,0,12759,12767,0,0,0,0,12773,0,
+12774,12778,0,0,0,0,0,0,0,12779,0,0,0,0,0,12780,12793,0,12824,0,12825,0,12836,0,
+0,0,0,12839,0,12842,0,0,0,0,0,0,0,0,0,0,0,0,12843,12845,0,12846,0,0,0,0,12847,0,
+0,12850,12852,12853,0,0,0,12854,0,0,0,12855,0,12856,0,12858,0,0,12859,0,12862,0,
+12863,0,0,12866,0,12869,12872,12873,0,0,0,0,0,0,0,0,0,12875,0,12877,0,0,12878,0,
+0,0,0,0,0,0,0,0,12884,12885,12888,0,12889,0,0,0,0,12893,0,0,0,12895,12896,12898,
+0,0,0,0,0,0,0,12902,0,12909,12910,0,12926,0,12928,0,0,0,12929,0,12930,0,0,0,0,
+12931,0,12932,12933,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12934,0,12942,0,0,0,0,12944,
+0,0,0,0,0,0,0,0,12946,0,0,12948,0,0,12949,0,0,0,0,12950,0,0,0,0,12951,0,12952,0,
+12953,0,0,0,12954,12958,12959,0,0,0,0,0,12960,12964,0,0,0,0,0,12966,0,0,0,0,0,0,
+0,0,12970,0,12971,0,0,0,0,0,0,12972,0,0,12982,0,0,0,12984,12985,0,12986,12996,
+12997,13001,13002,0,0,0,0,13004,0,0,13005,0,0,13007,13009,0,13017,0,0,0,13020,0,
+13021,0,0,0,0,0,0,0,0,0,0,13022,0,0,0,0,0,0,0,0,13024,13027,0,0,0,0,0,13028,0,0,
+13029,0,0,0,0,0,0,0,13032,0,13037,0,0,0,0,0,0,13040,0,0,13041,0,0,0,13043,13044,
+13046,0,0,0,0,13047,0,0,0,0,0,0,0,13049,13054,0,13056,0,0,13060,13061,0,0,0,0,0,
+13067,0,0,13068,0,13071,0,0,0,0,0,13077,13078,0,0,0,0,0,13079,13080,13081,0,
+13082,0,0,0,13085,0,0,0,0,0,0,0,13086,0,13087,13088,0,0,0,0,0,13094,0,13099,0,
+13100,0,0,0,13101,0,13125,13126,13128,13129,0,0,13130,0,13131,0,0,0,0,0,0,13134,
+0,0,0,0,0,0,0,0,0,0,0,13150,0,13168,0,0,0,0,0,0,0,0,0,13169,0,0,13170,0,0,0,0,
+13174,0,0,0,13176,0,0,0,0,0,13177,0,13178,13183,13187,0,0,0,13189,0,0,13190,0,0,
+13191,0,0,13206,0,0,0,13207,0,0,0,0,0,0,0,0,0,0,13212,0,0,13219,13232,0,0,0,
+13241,0,13249,13253,0,0,0,0,0,13255,13259,0,13260,13261,0,13262,0,13272,0,0,0,0,
+13276,0,0,0,0,13277,13299,0,0,13301,13302,0,0,13303,0,0,13305,0,13310,0,0,0,
+13311,0,0,0,0,13325,0,13328,0,0,0,13329,0,0,0,0,0,0,13330,0,0,13331,0,13335,0,0,
+13342,0,0,0,0,0,13343,0,13354,0,13362,0,13366,13367,13369,0,0,13371,13372,0,
+13373,13374,0,13376,0,13380,13381,13386,0,13387,13388,0,13389,13391,13395,0,0,0,
+0,0,13401,13409,0,13410,0,0,0,0,13420,0,0,0,0,0,13422,0,0,0,0,13423,0,0,0,0,
+13425,0,0,0,0,0,13427,0,0,0,13428,0,0,13430,13438,0,13439,0,13445,0,13448,13449,
+0,0,0,0,0,0,13451,0,13457,0,0,0,0,13458,13459,0,13460,0,0,0,0,13464,13465,13466,
+13470,0,13471,13472,13474,13475,0,13476,0,0,13478,13479,0,13481,0,0,0,0,13487,0,
+13490,0,13493,0,0,13494,0,0,13495,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13496,13497,0,
+13500,0,0,13516,13522,0,0,13525,13528,0,0,0,13530,13535,0,13537,13539,0,13540,0,
+13543,0,13544,0,0,0,0,0,0,13545,0,0,0,0,0,0,13547,0,0,0,13549,13555,0,0,0,13556,
+13557,0,0,0,0,0,0,0,13558,0,13563,0,0,0,0,13564,0,0,0,0,0,0,0,0,13566,0,0,0,0,0,
+0,13569,0,0,13571,0,0,0,0,13573,0,0,0,0,0,0,13578,0,0,0,0,0,0,0,0,0,0,13581,0,
+13586,0,13595,0,13600,0,0,0,0,0,0,0,0,13601,13603,0,13604,13605,13606,13607,0,0,
+13617,13618,0,0,0,0,0,0,0,13623,0,13625,13627,0,0,0,0,0,0,0,0,13629,0,0,0,13634,
+0,0,0,13638,0,0,0,0,0,0,0,0,13654,0,0,0,0,0,0,0,0,0,0,13656,0,13659,0,0,13660,0,
+0,13662,0,0,0,13663,0,13664,0,0,0,0,0,13668,0,13669,13671,0,0,13672,0,0,0,0,0,0,
+13675,13685,0,13686,0,0,0,13687,0,0,0,13692,13694,13697,0,0,0,13702,0,0,0,0,0,
+13705,0,0,0,0,13707,0,0,0,13714,0,0,0,0,0,0,0,0,0,13715,0,13716,13717,0,0,13719,
+13724,13730,13731,0,0,0,0,0,0,0,0,13732,0,0,0,0,0,0,0,13734,0,13736,0,0,13737,
+13738,13747,0,13751,0,0,13752,0,0,0,13753,0,13757,0,0,13762,13763,0,13764,13765,
+0,13766,0,0,13767,0,0,0,13768,0,0,0,0,0,0,0,13769,0,0,13772,0,13775,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,13776,13778,13787,0,0,0,13797,0,13798,0,13801,0,13804,
+13806,0,0,0,0,13816,13817,0,0,0,0,0,0,0,0,0,0,0,0,0,13834,0,13836,0,0,13838,0,0,
+13839,0,13840,0,0,0,0,13842,0,0,0,0,0,0,13843,0,0,0,0,0,0,0,0,0,13845,0,0,0,0,0,
+13858,0,0,13860,0,0,13861,0,0,13862,13863,0,13868,0,13869,13870,0,0,0,0,0,0,0,0,
+0,0,13872,0,0,0,0,13873,13878,0,0,0,0,0,0,0,0,0,0,13886,0,13888,13889,13890,0,0,
+13891,13894,0,13897,13899,13900,13904,0,0,13906,0,0,0,13909,0,0,0,13910,0,0,0,
+13911,0,0,0,0,0,13912,13917,0,0,0,0,13918,0,13919,0,0,13920,0,0,0,13921,0,0,
+13922,0,0,0,0,0,0,0,13924,0,13927,0,0,0,0,0,13932,0,13933,0,13934,0,0,13935,0,
+13944,0,0,0,13954,0,0,13955,0,0,0,0,13956,0,13957,0,13967,13969,0,0,0,0,0,0,0,0,
+0,0,0,0,13970,13990,0,13991,13994,0,13995,0,0,0,0,13996,0,0,13999,0,0,0,14018,0,
+14019,0,14021,0,0,0,0,0,0,14041,0,0,0,0,0,0,0,0,14043,0,0,0,0,14046,0,0,0,14048,
+14049,0,0,0,0,0,0,0,0,0,0,14051,0,0,14052,14056,0,14063,0,14064,14066,0,0,14067,
+0,0,0,0,0,0,0,0,0,14068,0,0,0,14072,0,14074,14075,0,14076,14079,14085,14086,
+14087,14093,0,0,0,0,14095,0,0,0,0,0,0,14096,14097,0,0,0,0,0,0,0,14098,0,14102,0,
+0,0,0,0,14103,0,0,0,14104,0,0,14105,0,0,0,14107,14108,0,0,14109,0,0,0,0,0,0,0,0,
+14117,0,0,0,0,14118,0,0,0,0,14119,0,0,14120,0,0,14121,0,14122,14127,0,14128,
+14136,0,0,14138,0,14140,0,0,0,14141,14142,0,0,0,0,14146,0,0,14149,0,14151,0,0,0,
+14152,0,0,14153,0,0,0,0,0,0,0,0,0,14154,0,14156,14157,0,0,14159,0,14161,0,0,0,0,
+14162,0,0,0,0,0,0,14163,0,0,14173,0,0,0,0,0,0,14174,0,0,14176,0,0,14178,0,0,
+14179,14181,0,0,14182,14185,14187,0,14190,0,0,14197,0,0,0,0,0,0,0,0,0,0,0,0,
+14198,0,0,0,0,0,0,14199,14200,0,0,0,14204,0,0,14208,0,0,0,0,0,0,0,0,0,0,0,14231,
+0,0,0,0,0,0,0,0,0,14234,0,0,14235,0,0,0,14240,14241,0,0,0,14246,0,0,0,14247,0,
+14250,0,0,14251,0,0,14254,0,0,14256,0,0,0,14260,0,14261,0,0,0,0,14262,14267,
+14269,0,0,14277,0,0,14278,0,14279,14282,0,0,0,14283,0,0,0,14284,14285,0,0,0,0,
+14286,0,0,0,14288,0,0,0,14289,0,14290,0,14293,14301,14302,14304,14305,0,14307,0,
+14308,14309,0,0,0,0,0,0,0,0,0,0,0,14311,14312,0,0,14317,0,0,0,0,0,0,0,14318,0,0,
+0,0,14320,0,0,0,0,14321,14322,0,0,0,0,0,14326,14329,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+14330,14331,0,0,0,0,14332,0,0,0,14333,0,0,14337,14340,0,14341,0,0,14342,0,14345,
+14346,0,0,14347,0,14362,0,0,0,0,0,14364,14365,14371,0,14373,0,0,14374,0,14379,0,
+14400,0,0,0,0,0,14401,0,0,14405,0,14406,0,14408,14409,0,0,0,14417,0,0,14424,0,0,
+0,0,0,0,0,0,0,14430,0,0,0,14431,0,0,14435,0,14440,0,0,0,0,0,0,14442,0,0,14443,0,
+0,0,0,0,14446,0,0,0,0,0,0,0,14454,0,14457,0,14460,0,0,14466,0,0,0,0,0,14467,0,0,
+0,0,0,0,14469,0,14477,0,0,0,0,0,0,14478,14482,0,0,0,14483,0,0,0,14485,14486,0,0,
+0,14487,14488,14489,14492,14493,14494,14495,14496,14497,0,14499,0,14501,0,0,0,0,
+0,0,0,0,0,0,14502,0,14507,14512,14513,14514,0,0,0,0,0,0,0,0,0,0,0,14515,14526,
+14530,0,14537,0,14544,0,14547,0,0,14548,14550,14551,0,0,14552,0,0,0,14553,0,
+14554,0,0,0,0,14556,14564,0,0,14565,14566,0,0,0,0,0,0,14568,0,0,14569,0,0,0,
+14571,14576,0,0,14577,14578,14579,0,0,14580,0,0,0,0,14582,0,0,0,0,0,0,0,0,0,0,0,
+0,14583,0,0,0,0,0,14587,0,14588,0,0,14600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,14601,0,0,14604,14605,14611,0,14613,0,0,0,0,14615,0,0,0,0,0,0,14627,0,14628,0,
+0,0,0,14631,0,14633,14634,0,0,0,0,14635,0,0,0,0,0,0,0,0,14636,0,0,14639,14642,0,
+0,0,0,14644,0,0,0,0,14645,14646,0,14653,0,0,14654,0,14658,0,14661,0,0,0,14665,0,
+0,0,14668,0,0,0,0,0,0,0,0,0,14669,0,0,14670,0,0,0,14680,0,0,14681,0,0,0,0,0,
+14682,14683,0,0,0,0,14686,0,0,0,0,14687,14697,0,0,0,0,14699,14705,14711,0,0,0,0,
+0,0,0,0,0,0,14712,0,0,0,14713,0,0,0,0,14719,0,14720,14721,14726,0,0,0,14728,
+14729,0,0,0,0,14731,0,0,0,0,0,0,0,14733,14736,14737,0,0,14740,14742,0,0,0,14744,
+14753,0,0,0,0,14755,14758,14760,0,0,0,0,0,14761,14762,14765,14771,0,14772,0,
+14773,14774,0,0,14775,0,0,14776,0,0,0,0,14777,0,14779,0,0,14782,0,0,14785,14786,
+14788,0,0,0,0,0,14795,0,0,0,0,0,0,14798,0,14803,14804,14806,0,0,0,14809,0,0,0,0,
+0,0,14810,0,0,0,0,14811,0,14812,0,0,0,0,0,14815,0,0,0,0,0,0,0,0,14816,0,14818,0,
+0,0,0,0,0,14819,0,14820,0,14823,0,0,0,14824,0,0,14826,14827,0,0,0,0,0,0,0,0,0,0,
+0,0,14830,0,0,0,0,0,14833,0,14845,0,0,0,0,0,14846,0,0,14847,14871,0,14873,0,
+14876,0,14877,14878,14880,0,0,0,0,0,14881,0,14882,14894,0,0,0,0,14895,0,14907,0,
+14908,0,0,0,0,0,0,0,14911,0,0,0,0,14920,0,0,14931,0,14932,14934,14935,0,0,14936,
+0,14945,0,0,0,0,0,0,0,14947,0,0,14948,14949,14951,0,0,14952,0,0,0,14964,14973,0,
+0,14990,0,0,0,0,14995,0,0,14998,15001,0,0,15002,15020,0,0,0,0,0,0,15021,0,15022,
+0,0,0,0,15023,0,0,15025,15029,15033,0,0,0,15034,0,0,0,15035,0,0,0,0,0,15043,
+15044,0,0,0,15045,15046,15048,15050,0,15065,0,0,0,0,15066,0,0,15075,15082,15084,
+0,0,15085,15086,0,0,0,0,0,0,0,0,15088,0,0,0,15089,0,0,0,0,15094,0,15096,0,15097,
+0,15100,0,0,15102,0,0,0,0,0,0,0,0,15105,0,0,15106,0,15109,15113,0,0,0,15115,0,
+15118,0,0,0,0,0,0,15119,0,0,15120,0,0,0,0,0,15123,15129,0,0,0,15130,0,15131,0,0,
+15134,0,15135,0,0,0,15137,15138,0,0,0,0,0,0,15139,0,0,0,0,0,15140,0,0,15154,
+15162,0,15169,15170,0,15175,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15177,0,15178,15179,0,
+0,0,0,0,15183,0,0,0,0,0,0,0,0,0,0,0,0,15185,15187,0,15194,15195,15196,0,0,0,0,0,
+0,0,15204,0,0,0,0,15206,0,0,0,0,0,15207,0,0,0,0,0,0,0,0,0,15213,0,15214,0,0,0,0,
+0,0,0,15232,0,0,0,0,15234,0,15238,15240,0,15248,0,0,0,0,15250,15251,0,0,0,0,0,0,
+0,15252,0,0,0,15255,15262,15266,0,0,0,15267,0,0,0,15277,15279,0,0,0,15280,15281,
+15282,0,0,0,0,0,15285,0,0,0,0,15289,0,0,15291,0,0,0,0,0,0,0,15296,15297,0,0,
+15304,0,0,0,0,15306,0,0,0,0,0,0,15307,15308,0,15309,0,0,15311,0,0,15312,15313,0,
+0,0,0,0,0,0,0,0,0,0,0,15314,15317,0,0,0,15318,15319,0,0,0,0,15320,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,15321,0,0,0,0,0,15324,0,15325,15326,0,15330,0,0,0,0,15334,0,
+15335,0,15341,0,0,15342,0,0,15343,15344,0,0,0,0,15345,0,0,0,0,15347,0,0,15348,
+15349,15350,0,15356,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15357,0,15358,0,0,0,0,0,0,0,
+15359,15360,15364,0,15380,0,0,0,0,0,15392,0,0,15393,0,15395,0,0,0,0,0,0,0,0,
+15396,0,0,15397,15398,0,0,0,0,0,0,0,0,0,15399,0,15400,0,0,0,15402,0,15405,15410,
+0,0,0,0,15411,0,0,0,15412,0,15416,0,0,0,0,0,0,0,15428,0,15435,0,0,15438,0,0,0,0,
+15439,0,0,0,15440,0,0,0,15441,15449,15451,0,0,0,0,0,0,0,15452,0,0,15455,0,0,0,
+15456,0,0,15458,0,15460,15461,0,0,0,0,0,15462,15464,0,15465,0,0,15466,0,0,15467,
+0,0,0,0,0,15468,0,0,0,0,15481,0,0,15484,0,15485,15486,0,0,0,15487,0,0,0,0,0,
+15488,0,15492,15498,0,0,0,15499,0,0,0,15500,0,15501,0,0,15512,0,15522,0,0,0,
+15524,0,15525,15526,0,0,15527,0,0,15545,15546,0,15548,15552,0,15553,0,0,0,15554,
+0,15555,0,15557,15565,15573,15577,15578,0,15582,0,15583,0,0,0,0,0,0,0,0,0,0,0,0,
+0,15586,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15588,0,0,0,0,0,15589,0,0,0,0,0,0,0,15593,
+15594,0,0,0,0,15595,0,0,0,0,0,0,15596,0,0,0,15597,0,0,0,0,15600,0,0,15601,0,0,0,
+0,15602,15603,0,0,0,0,0,0,15604,0,15609,0,0,15612,0,0,15613,0,0,15615,15617,
+15618,0,0,15620,0,15636,15637,0,0,15649,0,0,0,0,0,0,0,15650,0,0,15651,0,0,0,
+15656,0,15658,0,0,0,15664,0,0,15665,0,0,15668,0,0,0,0,0,15669,0,0,15674,0,0,
+15675,0,0,0,0,15676,0,0,0,0,0,0,0,0,0,0,0,15677,0,0,0,0,15678,0,0,0,0,0,15679,0,
+0,15681,0,15686,0,0,0,0,15687,0,15688,0,0,15690,0,0,0,15697,0,15699,15700,0,0,0,
+0,0,0,0,0,0,15701,0,15702,15703,0,15704,0,15705,0,15707,0,15709,0,15712,15716,0,
+15717,0,15718,15720,0,0,0,0,0,15724,0,0,0,15725,0,15726,0,0,0,15740,0,15745,
+15746,0,0,15747,0,15748,0,0,0,0,0,15749,0,0,0,15752,0,15753,0,0,0,0,0,0,15759,0,
+0,0,15765,0,0,0,0,0,0,0,0,0,15767,0,0,0,15771,0,0,15784,0,0,0,0,15785,15790,
+15791,0,0,15792,0,0,0,15807,0,15811,0,0,0,0,0,0,0,0,0,0,0,0,15818,0,0,0,15819,0,
+0,0,0,15821,0,0,0,0,0,15822,15824,0,0,15827,0,0,15829,15831,0,15832,0,0,15833,0,
+15835,15838,15839,15843,0,0,0,0,0,0,0,0,0,0,0,15844,0,0,0,0,15845,15851,15856,0,
+0,0,0,0,0,0,15858,15860,0,15861,0,0,0,15864,0,0,0,0,15865,0,0,0,0,0,0,15866,0,
+15872,0,0,15876,0,0,0,0,15877,15878,15883,15885,0,0,15888,0,0,0,0,0,15889,15890,
+0,0,0,0,0,0,0,0,15892,0,0,0,0,0,0,0,15893,0,0,15894,0,0,0,15895,0,15896,15897,0,
+15898,15901,15902,0,15911,15915,0,15916,0,15924,15935,0,15937,0,0,0,0,0,15950,0,
+0,0,0,0,0,0,15958,0,0,0,15961,0,0,15966,0,15967,0,0,15977,0,0,15978,0,0,15981,
+15982,15983,0,0,0,0,0,0,0,15986,0,0,0,15990,0,15991,15995,15998,0,15999,0,16000,
+0,0,0,0,16008,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16009,16011,0,16013,0,0,0,0,
+0,0,0,0,16014,0,0,16015,16023,16024,16025,0,0,16026,0,16030,0,16032,0,16033,0,0,
+0,0,0,0,16035,16036,16037,0,0,0,0,0,16039,0,0,0,0,16041,0,0,0,0,0,16043,16044,0,
+0,16047,0,0,0,16048,0,0,16049,16050,16052,0,0,0,0,0,16055,0,0,0,0,0,0,0,0,16056,
+0,0,0,0,0,0,0,16058,16060,16061,0,0,16063,0,0,16064,0,0,0,16067,16068,0,0,16069,
+16078,0,0,0,16079,0,0,0,16080,0,16081,0,0,0,16088,0,0,0,0,0,0,0,0,0,0,0,16089,
+16093,0,16097,0,16103,0,16104,16105,0,0,16256,0,0,16259,0,0,0,0,0,0,0,16260,
+16261,0,0,16262,0,0,16263,0,16268,0,0,0,0,0,0,0,16269,0,0,16270,16273,0,16274,0,
+0,0,0,16275,16276,16277,16280,0,0,0,16281,16284,0,0,0,16286,0,16289,0,0,0,0,0,0,
+0,0,0,16290,0,0,0,0,16291,0,0,0,0,0,0,0,16292,0,0,0,0,0,0,0,0,16293,16295,16297,
+0,16302,0,16304,0,16305,0,16306,0,0,0,0,0,0,0,0,0,0,0,0,16307,16308,16312,0,0,0,
+0,0,0,16313,16315,0,16318,0,0,0,16321,0,0,0,0,0,0,0,16326,16333,16336,0,0,0,0,
+16337,16340,0,0,0,0,0,16345,0,0,16346,0,0,0,0,0,0,0,0,0,16347,0,0,16348,0,0,0,0,
+16349,0,0,0,16350,0,16357,0,0,0,0,16359,16360,0,0,0,0,16362,16363,16364,16365,0,
+0,16366,0,0,0,0,16367,16368,0,16369,16374,0,0,0,0,0,0,0,16376,0,0,0,0,16378,
+16379,0,16380,0,0,0,16381,16383,0,0,0,0,0,16390,0,0,0,16399,0,16402,16404,16406,
+16407,0,0,0,16409,16411,0,0,0,0,16412,0,16413,16415,16423,0,0,0,0,0,16424,0,0,0,
+16428,16434,16435,16449,0,16450,16451,0,0,0,16453,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+16454,0,0,16456,16458,0,0,16459,0,0,16460,0,0,0,0,16462,0,16463,0,0,16466,0,0,0,
+0,0,16479,0,0,16480,0,16481,16484,0,0,0,0,0,0,0,0,0,0,16485,0,0,0,0,0,0,16489,0,
+0,0,0,0,16491,0,0,16498,0,0,16503,0,16505,0,0,0,0,0,0,0,0,16506,0,0,0,16508,
+16509,0,0,0,0,0,0,0,0,16511,16513,0,0,0,16516,0,16517,0,16519,0,16529,0,0,16531,
+0,0,0,0,0,0,16534,0,0,16541,16542,0,0,0,0,0,0,0,0,0,16543,16547,16548,0,0,0,
+16551,0,16552,0,0,0,16553,0,0,16558,0,0,16562,16565,0,0,0,16570,0,0,0,16573,
+16585,0,0,0,16586,16587,16595,0,16596,0,16598,0,0,0,16600,0,0,0,0,0,0,0,0,0,0,0,
+0,0,16601,0,0,0,0,16603,0,0,0,0,0,0,0,16604,16612,0,0,0,0,16613,0,16618,0,0,0,
+16640,0,0,16641,0,0,0,0,0,0,16645,0,0,0,0,16646,0,0,0,0,0,0,16651,0,0,0,0,16653,
+16654,0,0,0,16655,0,0,16656,16667,0,0,0,0,16671,0,16672,0,0,0,16673,0,0,0,0,0,
+16676,0,16686,0,0,0,0,16689,0,16690,0,16692,0,16693,0,16694,0,16696,0,0,0,16705,
+0,0,0,0,0,0,16707,0,0,0,16709,0,0,0,0,16711,0,16712,16713,0,0,0,16715,0,0,0,0,
+16716,0,0,0,0,0,0,0,0,0,16718,16724,0,0,16726,16727,0,0,0,0,0,0,0,16728,0,16729,
+0,0,16730,0,0,0,0,0,16731,0,0,0,16732,0,0,0,0,16734,16738,0,0,0,0,0,0,0,0,16743,
+0,0,16745,0,0,0,0,0,16749,0,16752,0,0,0,0,16756,0,0,16758,0,16759,0,0,0,0,0,
+16760,0,0,0,0,0,0,0,16762,0,16769,0,16770,0,16772,0,0,0,16777,16780,0,0,0,0,0,0,
+16781,0,0,16782,0,16784,0,0,16785,16787,16792,0,0,16794,0,0,0,16798,0,0,16809,0,
+0,16814,16816,16817,0,16819,0,0,0,0,0,0,0,0,0,0,16820,0,0,16836,16839,0,0,16841,
+16851,16857,0,0,16858,16859,0,0,16860,0,0,0,0,0,0,0,0,16862,0,16863,0,0,0,0,0,0,
+0,16864,0,0,0,0,0,0,0,16876,0,16881,16882,0,16885,16886,0,16887,0,0,0,16889,
+16891,0,0,0,0,0,16894,16895,0,0,0,0,0,0,0,0,0,0,0,16897,0,16898,0,0,0,0,0,16913,
+0,0,16924,16925,16926,0,0,16927,0,0,0,16937,16938,0,0,0,16940,16941,0,0,0,16942,
+16945,0,16946,16949,16950,0,0,0,16952,16955,0,0,0,16965,0,16969,0,0,16975,0,0,
+16976,0,0,0,0,16978,0,0,16981,0,16983,16989,0,0,0,0,16990,0,0,16991,0,0,0,16993,
+0,16994,16996,17000,0,0,0,0,0,17002,17004,0,17006,0,0,17007,0,0,0,0,17008,17013,
+17014,0,0,0,0,0,0,0,0,0,17021,0,17031,0,0,0,0,0,17033,17036,0,17038,0,0,17039,0,
+17045,0,0,17046,17047,0,0,0,0,17048,0,17049,17050,0,17051,17053,0,17054,0,17055,
+0,0,0,0,0,17063,0,0,17064,0,0,0,0,0,0,0,17065,0,0,17068,0,0,0,0,0,17072,0,0,0,0,
+0,0,17073,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17074,0,17080,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,17081,17083,17084,0,0,0,17085,0,0,0,0,17092,0,0,0,0,0,0,0,
+0,0,17093,0,17095,17102,0,0,0,0,0,0,17103,0,0,17105,0,17107,0,0,0,0,17114,0,0,0,
+0,0,17115,17125,17127,0,0,17128,0,0,0,17129,17130,0,17131,0,0,0,0,0,17132,17135,
+17145,0,0,0,0,0,0,0,0,17146,0,17147,0,17148,0,0,0,0,0,0,17149,17150,0,17151,
+17153,0,17155,0,0,0,0,17163,17171,0,17174,0,0,0,0,17179,0,0,17182,17185,0,0,0,0,
+0,17186,0,0,17188,0,0,0,0,0,0,0,17189,17191,0,17194,0,0,0,0,0,0,0,0,0,17195,
+17196,17203,17204,0,0,17205,17217,0,0,0,0,0,17218,0,0,0,0,17219,0,17220,0,17221,
+0,0,17230,0,0,0,0,0,17236,0,17238,17239,0,0,0,17241,17244,0,0,17245,0,17248,0,0,
+17251,0,17252,0,0,17264,0,17266,0,0,0,17268,0,0,0,0,17271,17272,0,17273,0,17295,
+0,17302,0,17305,0,0,0,17306,0,0,0,0,0,0,0,17308,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+17309,0,17310,17313,0,0,0,0,17314,17315,0,17317,0,0,0,0,17318,0,0,0,0,0,0,0,
+17320,0,0,0,0,0,0,17334,0,17344,17348,0,0,0,17350,17351,0,0,17353,0,0,17354,0,0,
+0,0,0,0,0,0,0,17355,0,0,0,0,0,0,17356,17357,0,0,17359,0,0,0,17371,0,17372,0,0,0,
+17393,0,0,0,0,17394,0,0,0,0,0,17395,0,0,17399,0,0,0,17401,17417,0,17418,0,17419,
+0,0,0,0,0,17422,17423,0,0,0,0,0,17424,0,0,0,0,0,17428,17429,17433,0,0,0,17437,0,
+0,17441,0,0,17442,0,0,17453,0,0,0,0,0,0,0,0,17454,17456,17462,0,0,17466,0,0,
+17468,0,0,17469,0,0,0,0,17470,0,17475,0,0,0,0,0,17479,0,0,0,17483,17484,0,17485,
+0,17486,0,17491,17492,0,0,17493,0,17494,17495,0,0,0,17496,0,0,0,17497,0,0,0,
+17502,0,0,0,0,0,17503,0,17505,0,17507,0,0,0,17512,17513,17514,0,0,17515,0,0,0,
+17519,0,0,0,17522,0,0,17523,0,0,0,0,0,0,0,0,0,17527,0,0,0,17528,0,0,0,17534,0,0,
+0,0,17536,0,0,0,17539,0,17540,17543,17549,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17556,
+0,0,17558,0,17559,0,0,17560,0,0,0,17563,0,0,0,0,0,0,17564,0,0,17565,17566,0,
+17567,0,0,0,0,0,0,17569,17570,0,17575,0,0,0,0,0,0,0,0,0,0,0,17581,0,0,0,17582,
+17583,0,17586,0,0,17587,0,0,0,0,0,0,0,17588,0,0,0,0,17596,17597,0,0,17598,17600,
+0,0,0,0,0,0,17601,0,0,0,17604,0,0,17605,0,0,17607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,17612,0,0,17618,0,17621,17622,0,0,0,0,17623,0,0,17624,0,0,17630,0,0,
+17631,17633,17634,0,0,0,0,0,0,0,17635,0,0,17636,0,0,17637,0,17638,0,17640,0,0,0,
+0,0,0,0,0,0,0,17641,0,0,0,0,0,0,0,0,0,0,17643,0,0,0,0,17645,0,0,0,0,0,0,0,0,
+17646,17662,0,0,0,0,0,0,0,0,0,17663,17664,0,17665,17666,0,0,0,17669,17671,17673,
+0,17679,0,0,0,0,0,0,0,17684,0,0,0,17686,0,17714,0,0,17720,17722,17726,0,0,17728,
+0,0,17729,0,0,0,17732,0,17733,0,17734,0,0,0,17735,0,0,0,0,17737,0,0,0,0,17739,0,
+0,0,17741,17742,0,0,0,0,17743,17744,17745,0,0,0,17749,0,17750,17751,17752,17754,
+17761,17762,0,17763,0,17766,0,17772,0,0,0,0,0,17775,0,0,0,0,0,0,0,17776,0,0,
+17777,0,0,17778,17779,0,17782,17783,0,0,0,0,0,0,0,0,0,0,17784,0,0,0,0,0,0,0,
+17821,0,0,0,17822,0,0,0,17823,17825,0,0,0,0,0,17826,17831,17832,17833,0,0,17845,
+0,0,0,17846,0,0,0,17848,17850,17854,0,17855,0,0,17859,0,0,0,0,0,0,17860,17861,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17870,17871,0,0,0,0,0,0,17872,0,0,0,17879,0,
+0,0,17881,17883,0,17884,0,17885,0,0,17886,0,0,17887,17891,17953,0,0,0,0,17954,0,
+0,17955,0,17968,0,0,17972,0,0,0,0,0,17974,0,0,0,0,17976,17978,0,0,17983,0,0,0,0,
+18003,0,0,0,0,0,18007,0,0,0,0,0,18009,0,0,0,0,0,0,0,18010,0,0,0,0,0,0,18012,0,0,
+18014,0,0,0,18015,0,0,0,18016,0,18017,0,0,0,18030,0,0,0,0,0,0,0,18031,0,0,18036,
+18037,18038,0,0,18049,18056,0,18057,18058,0,18059,0,0,0,0,0,0,0,0,18062,0,0,0,0,
+18064,0,0,0,0,0,0,0,0,18067,0,0,0,18068,0,0,18075,0,0,18078,18093,18094,0,0,0,0,
+0,0,0,0,18097,0,0,0,0,0,18098,18100,0,0,0,18108,0,18111,0,0,18112,0,18113,0,0,
+18115,18116,0,18118,0,0,0,0,18121,0,0,0,0,18123,0,0,0,0,0,0,0,0,0,18124,0,0,0,0,
+18125,18126,0,18127,0,0,18128,18135,0,0,0,0,0,0,0,0,0,18150,0,0,0,0,0,18151,
+18152,0,0,18156,18164,0,18166,18171,0,0,0,0,0,0,0,0,0,18172,18183,0,18184,0,0,0,
+0,18185,0,18187,0,0,0,0,0,18188,0,0,0,0,0,0,0,0,18189,0,0,18190,0,0,18191,18192,
+0,0,18194,18195,18196,0,0,0,18197,0,18203,0,18204,0,0,0,0,18205,0,0,0,18207,
+18208,0,0,18214,0,0,0,18215,18216,0,0,0,18220,0,0,18222,0,0,0,0,0,18223,0,18225,
+18231,0,18234,0,18235,0,0,0,0,18240,0,0,18241,18242,0,0,0,0,0,18243,18251,0,
+18253,0,18254,0,0,0,18266,0,0,0,0,0,0,18269,18270,18271,18273,18281,0,0,0,0,0,0,
+0,0,0,0,0,0,18282,0,18283,0,18284,0,0,0,0,0,0,18285,0,18287,18289,0,0,18290,0,0,
+0,0,18308,0,0,0,18310,0,0,0,0,0,0,0,0,0,0,0,0,18311,0,18312,18313,0,18315,0,0,
+18316,18320,0,18331,0,18332,0,18336,0,0,0,0,18337,0,18340,0,0,0,0,0,0,0,0,0,
+18341,0,18344,18345,0,18346,0,0,0,0,0,18348,0,18351,0,0,18356,0,0,0,0,0,0,18357,
+0,0,0,0,0,18367,0,0,0,18368,0,18369,0,18370,18371,0,0,0,18437,18444,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,18445,18450,0,0,0,0,18451,0,18452,0,0,0,18453,0,0,0,0,0,18455,0,
+0,0,18456,0,18457,0,18460,0,0,18461,0,0,0,0,0,0,0,0,18466,0,0,18467,0,0,0,0,
+18473,0,0,0,18476,0,18477,0,0,0,18478,18479,18480,0,0,0,18485,0,0,0,18486,0,0,0,
+0,0,0,18488,18490,0,0,0,0,0,0,18491,0,0,0,0,0,18495,0,0,18496,0,0,0,0,0,0,18505,
+0,18521,0,18522,18523,0,0,0,18525,18526,0,0,0,0,0,18527,0,0,0,0,18532,18533,0,
+18534,0,0,0,0,0,0,18535,18537,0,18538,0,0,0,0,0,0,18540,18541,18542,18543,0,
+18546,0,0,0,0,18553,18556,0,0,18558,0,0,18569,18571,0,0,0,18572,0,18574,0,0,0,0,
+18586,0,0,0,0,0,18588,0,0,18589,0,0,0,0,0,0,18590,0,18592,0,0,0,0,18594,0,0,0,
+18596,0,0,18597,18598,0,0,18601,0,0,0,0,18602,0,0,0,18603,18604,0,18605,0,0,0,0,
+18608,0,0,18611,0,0,0,0,0,0,0,0,0,18612,0,18616,0,0,18617,18619,0,0,0,18628,0,0,
+0,18629,0,0,18630,0,0,0,0,0,0,0,18631,0,18632,0,0,18635,18637,0,0,0,0,0,0,18641,
+18643,18648,0,18652,0,0,18653,0,18655,18656,0,0,0,18657,0,0,18666,18674,0,0,0,0,
+18677,18684,18685,0,0,18686,0,0,18690,0,0,0,0,0,0,0,18695,18696,0,0,0,0,0,0,0,0,
+0,0,18697,0,0,18700,0,0,0,0,0,0,18702,0,18708,0,0,18709,0,18710,0,0,18711,0,
+18714,0,0,18718,0,0,0,0,0,0,18719,0,0,18722,0,18726,0,0,0,0,0,0,0,0,0,0,0,0,0,
+18731,0,0,0,0,0,18739,18741,0,0,18742,0,18743,18744,18746,18748,0,18752,18753,0,
+0,18754,18763,0,18765,0,0,0,18766,0,0,0,18769,0,0,0,0,0,18773,18778,18779,18781,
+0,0,18784,18787,0,18788,0,18793,0,0,0,0,0,0,18795,0,0,18800,0,0,0,0,0,18801,
+18804,0,0,0,0,0,0,0,18806,0,0,0,18811,18815,18816,0,0,0,0,18825,0,0,18827,18829,
+0,0,18830,0,0,0,0,18831,0,0,18832,0,0,0,0,18833,0,18840,0,18841,0,18842,0,0,0,0,
+18843,0,18844,0,0,0,0,0,0,18845,18846,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+18848,0,0,0,18853,18860,0,0,18862,18866,0,0,18867,18869,0,0,18874,18881,18891,0,
+0,0,0,0,0,0,0,0,0,18892,0,0,0,0,0,0,0,0,18895,0,18896,0,0,0,18900,0,0,0,18901,0,
+18902,18915,18916,0,0,0,0,0,0,0,0,18919,0,0,0,0,0,18920,0,0,0,18921,18929,0,0,0,
+0,18930,0,0,0,0,0,0,18932,0,0,0,0,18934,18942,0,0,0,18951,18957,0,0,0,0,18958,0,
+0,0,0,18959,18960,0,0,18961,0,0,18962,0,0,0,0,18963,18964,0,0,0,18965,0,18967,0,
+0,0,0,0,0,0,0,0,18968,0,18969,0,18970,18973,18976,0,0,0,0,0,0,18977,0,0,0,18981,
+0,0,0,18990,0,18998,0,0,0,0,0,18999,19003,0,0,19005,0,0,0,19006,0,0,0,0,0,0,
+19008,19011,0,0,19018,0,0,19019,0,19024,0,19031,19032,0,19039,0,19041,19050,0,0,
+0,19051,19055,19056,0,19059,19063,19064,0,0,19088,0,0,0,19093,19094,0,0,0,0,
+19095,0,19096,0,0,0,19097,0,0,19098,0,19099,19100,0,0,19103,0,0,0,0,0,0,0,19111,
+0,0,0,0,0,0,19112,0,0,0,19116,19117,0,19121,19122,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,19123,19124,0,0,0,0,0,0,0,19125,19126,0,19128,0,0,0,0,0,0,0,0,0,0,
+19129,19130,19131,19132,0,0,19146,0,0,19147,19156,19158,0,0,0,0,0,0,0,0,19182,
+19185,0,0,19187,0,0,0,19193,0,0,0,0,0,19194,0,19197,0,0,0,0,19198,0,0,0,0,0,0,0,
+0,0,0,19202,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19203,0,19205,19210,
+0,0,0,19213,0,19218,0,0,0,19223,19229,0,0,19230,0,0,19231,19232,19233,19239,0,0,
+0,0,0,19240,0,19248,19249,0,0,0,0,19254,0,19256,19258,19259,0,0,19261,0,19266,0,
+0,0,19272,0,19278,19281,19282,0,0,0,0,0,0,0,0,0,0,0,0,19283,0,0,19284,0,0,19285,
+19287,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19288,19291,0,19292,0,0,0,0,19297,0,19298,0,0,
+0,0,19302,19303,0,0,0,0,19304,19305,0,0,0,0,19314,0,0,19315,0,0,19321,0,0,0,0,0,
+0,0,19322,0,19333,0,19334,19335,0,19336,19337,0,0,0,0,0,0,0,0,0,0,0,19346,0,0,
+19353,0,19354,19362,0,19366,19367,0,0,19369,0,19375,0,19377,19380,19388,0,0,0,0,
+0,19389,19390,0,0,0,0,19392,0,0,0,0,0,19402,0,0,0,0,0,0,0,0,19412,0,0,19413,
+19422,0,19424,0,0,0,19425,0,0,0,19428,0,0,0,0,19431,0,0,0,0,0,19432,0,0,0,0,0,
+19448,19459,0,0,19461,0,19462,19463,0,19467,19474,19482,0,0,0,0,19494,0,0,0,0,
+19501,0,0,0,0,0,0,0,0,0,0,19502,19504,0,0,0,0,0,0,0,19505,0,0,0,0,19506,19507,0,
+0,0,19508,0,0,19511,0,0,19514,0,19515,0,19516,0,19518,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,19530,0,19537,19538,0,19543,19546,0,19547,19551,0,0,0,0,0,0,19552,
+19553,0,0,0,0,0,0,0,0,0,0,0,0,19555,0,0,19556,0,0,0,0,0,0,0,0,0,0,0,0,19560,
+19561,0,0,19562,0,0,0,0,0,0,19565,19567,0,19568,0,0,0,19569,19570,0,19578,0,0,0,
+0,19580,0,0,0,0,19581,19584,0,0,0,0,0,0,0,19585,19586,0,0,0,19587,19588,0,19589,
+0,0,0,0,0,0,19592,19593,19599,0,19600,0,0,19604,0,0,19605,0,19606,19608,19610,0,
+19613,19614,0,0,0,0,0,0,19616,19617,0,0,19618,0,0,19619,0,0,0,19620,19621,19631,
+0,0,19632,19634,19636,0,19643,0,0,19644,19658,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,19659,0,0,0,0,0,0,0,0,0,0,0,19675,19677,0,0,0,0,19679,0,19683,0,19684,0,0,
+0,0,0,0,19687,0,0,0,0,0,0,0,0,19688,19689,19692,0,0,0,0,0,0,0,19695,19697,0,0,0,
+0,0,19698,19699,0,0,19700,0,19702,0,0,19703,0,0,0,0,0,0,19704,19708,0,19710,0,
+19713,0,0,0,19715,0,0,0,0,19718,0,0,0,0,0,0,0,19720,0,19722,0,0,19725,0,0,0,0,0,
+0,0,0,0,0,0,0,0,19730,0,0,0,0,0,19731,0,19734,19735,19739,0,0,19740,0,19741,0,0,
+0,19746,0,0,19747,0,19771,0,0,0,0,0,0,0,0,19772,19775,0,0,0,0,0,0,19778,0,0,0,0,
+0,19779,0,0,19780,19790,0,19791,0,0,19792,0,0,0,19793,0,0,19796,19797,0,0,0,
+19799,0,0,0,19801,0,0,0,0,19803,0,19804,0,19805,0,0,19807,0,0,0,19808,0,0,0,0,0,
+0,19809,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19816,0,19821,0,19822,19830,19831,0,0,
+0,19833,0,0,0,0,0,0,0,0,0,0,19838,0,0,0,0,19839,0,0,19843,0,0,0,0,19845,0,0,0,0,
+19847,0,0,19848,0,19849,0,0,0,0,0,0,0,19851,0,0,0,19854,0,0,0,0,0,0,0,0,0,19864,
+0,19865,0,19866,0,0,0,0,0,0,0,19868,0,0,19870,0,0,19871,0,0,19872,19873,19875,0,
+19880,19882,19884,0,0,19885,19886,19888,0,0,0,0,0,0,0,0,0,0,0,0,19890,19892,
+19893,0,0,19894,0,0,0,19895,0,19896,19902,0,0,19903,0,0,19905,0,0,0,19906,0,
+19908,0,19909,19911,0,0,0,19913,19920,0,19938,19939,19940,0,0,0,0,0,0,0,19942,0,
+19943,0,19945,0,0,0,19951,19952,19954,19960,0,19965,0,19971,0,0,0,0,0,19975,0,
+19976,0,19990,0,0,19991,0,19993,0,19995,0,0,0,19998,19999,20001,0,20003,20005,0,
+20011,20012,0,0,0,0,0,0,20014,0,20020,0,0,0,0,20021,0,0,0,0,0,20023,20024,0,0,0,
+0,0,20025,0,0,20027,0,0,20029,0,0,20032,0,0,0,0,20044,20045,0,20048,20049,0,0,
+20050,0,20052,0,0,20054,20057,0,0,0,0,0,0,0,0,0,20059,0,0,20061,0,20062,0,20064,
+0,0,20066,0,0,20067,0,0,0,0,20069,0,0,0,0,0,0,20070,20071,0,0,0,0,0,0,0,0,0,0,0,
+20072,0,0,20073,20074,0,0,0,0,0,20075,0,20078,0,0,0,0,20080,0,20081,0,0,0,0,0,0,
+20095,0,20098,0,0,0,0,0,0,0,20107,0,0,0,0,0,0,0,0,20112,0,0,0,20113,20114,0,0,0,
+20115,20123,20124,0,0,0,20131,20133,20134,0,0,0,0,20136,0,0,20137,20138,20150,0,
+20152,0,0,0,20153,0,0,20154,0,0,0,20158,0,20163,0,0,20164,0,0,0,0,0,0,0,20166,0,
+20168,0,20170,0,20175,0,0,20178,0,0,0,0,20223,0,0,0,0,20224,0,20226,0,0,20230,0,
+20231,0,0,0,0,20232,0,0,20233,20234,0,20244,0,20247,0,0,0,0,0,0,20249,0,0,0,
+20250,0,0,0,0,20251,0,20253,0,20254,0,0,0,0,20256,0,0,20264,0,0,0,0,20266,0,0,0,
+20278,0,0,20279,20282,0,0,0,0,0,20283,0,20284,0,20285,0,20287,20290,0,0,0,0,
+20292,0,0,0,0,20293,20297,0,0,0,0,0,0,20299,0,20300,20303,0,0,0,0,0,0,20307,0,0,
+20308,0,20309,0,20310,0,0,0,0,0,0,20312,0,0,0,20314,0,0,0,0,20315,20316,0,20322,
+0,0,0,0,0,0,20339,0,0,0,20342,0,0,0,0,20352,0,0,0,0,0,0,0,0,0,0,20362,0,0,20365,
+0,20375,20377,0,0,0,0,0,0,0,0,0,0,0,20378,20379,0,20380,0,0,20381,0,20382,0,
+20383,0,20388,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20390,20392,20393,0,0,20395,0,0,0,0,0,
+20396,0,0,0,0,0,0,0,0,20398,20415,0,0,0,20417,0,0,20420,0,0,20426,20428,0,20431,
+0,0,20432,0,20433,20434,20435,0,0,0,0,20440,0,0,0,0,0,20442,0,20443,0,20446,0,0,
+0,0,20448,0,20451,0,0,0,0,0,0,0,0,0,20452,20453,0,0,20454,0,0,0,0,0,0,20457,0,
+20458,0,0,0,20465,0,0,0,0,0,20469,0,0,0,20473,0,20476,0,0,0,0,0,0,0,0,20477,0,0,
+20485,0,0,20486,0,0,20487,0,20496,0,20497,0,0,20498,0,0,0,0,0,0,0,0,0,0,20499,
+20500,0,20501,0,0,0,0,0,20520,20527,0,20529,0,0,0,0,20539,0,0,20540,0,0,0,20543,
+0,0,0,20546,0,0,0,0,0,20548,0,0,20563,0,0,20564,0,20566,0,0,0,0,0,20589,0,0,0,0,
+20590,0,0,20593,20594,0,0,0,0,20595,0,20597,20598,0,0,0,20618,20620,0,0,0,0,
+20621,0,0,0,0,20627,0,0,0,0,0,20628,0,0,0,20629,0,20630,0,0,20639,0,0,0,0,0,
+20707,0,0,20709,0,0,0,20713,20714,0,0,0,0,0,20724,20725,0,0,0,0,20726,20728,
+20729,0,20733,0,20734,0,20735,20736,0,20737,0,0,20744,0,20745,0,20748,0,0,20749,
+0,0,0,0,0,0,0,0,20750,0,0,0,0,20754,0,0,0,20761,0,0,20763,0,0,0,0,0,0,0,20766,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,20767,0,0,0,0,20768,0,20769,20777,0,0,0,0,0,0,20785,0,
+0,0,20786,20795,20801,0,20802,0,20807,0,0,20808,0,0,20810,0,0,20811,0,20812,0,0,
+0,0,0,20813,0,0,20818,20820,20821,0,0,0,20822,0,20823,0,0,0,20826,0,0,0,0,0,0,0,
+20829,20830,20831,0,20832,20836,0,0,20839,0,0,20840,20842,0,20843,0,20844,0,
+20854,0,0,0,20855,0,0,0,0,20856,0,0,0,20869,0,0,20871,0,0,0,0,0,0,0,20873,0,0,0,
+0,0,20876,0,0,0,0,0,20880,0,0,20882,0,0,0,0,20883,20884,0,0,20890,0,0,0,0,0,0,0,
+0,0,20891,0,0,0,0,0,20905,0,20906,20910,0,0,20912,20915,0,0,0,0,0,20916,0,20917,
+0,20919,20920,20922,0,20927,0,20928,20929,20930,0,0,20935,0,0,20939,0,0,20941,0,
+0,0,20943,0,0,0,20946,20947,0,0,0,0,0,20950,0,20954,0,0,20955,20964,0,0,20967,0,
+0,0,0,0,20973,20975,0,0,0,20984,0,20987,20988,0,0,0,0,0,20989,0,0,0,20995,0,
+20998,0,20999,0,0,0,0,21000,21001,0,0,0,0,21008,0,21010,0,21016,0,0,0,21017,
+21018,0,0,0,0,0,21021,21026,21027,21028,0,0,21029,0,0,0,0,0,21030,0,0,0,0,0,0,0,
+0,0,0,0,0,0,21031,21032,0,0,0,0,0,21037,0,0,21038,0,0,0,0,0,0,0,0,0,21039,0,
+21041,0,21046,21047,0,0,0,21049,21053,0,0,21057,21064,21065,0,0,21066,21067,0,0,
+0,21069,0,0,0,21071,21072,0,0,21073,0,21074,0,0,21078,0,0,0,0,21079,0,0,21080,
+21081,0,0,21086,21087,0,21089,0,0,0,0,0,0,0,21091,0,21093,0,21094,0,0,0,0,0,0,0,
+0,21095,0,0,0,0,0,21096,0,21098,0,0,0,0,0,0,0,21099,0,0,21100,21101,21102,0,0,0,
+0,0,21103,0,21104,0,0,0,0,0,21105,21108,21109,0,0,21112,21113,0,0,0,0,0,0,21115,
+21122,21123,0,0,0,0,0,21125,0,0,0,0,0,0,0,0,21129,21131,0,0,21134,0,0,0,21137,
+21142,0,21143,0,0,21144,0,21145,21146,0,21152,21154,21155,21156,0,0,0,21160,0,0,
+0,0,0,0,21161,0,21164,0,21166,0,0,0,0,21170,0,0,0,0,21171,0,0,21172,0,21174,0,
+21175,0,0,0,0,0,21176,21179,21188,0,0,0,21189,0,0,21190,0,0,0,21192,0,0,21193,0,
+0,0,21198,0,21212,0,0,21213,0,0,0,0,0,0,21215,21216,0,0,21223,21225,0,21226,0,0,
+0,0,21227,21228,0,0,21229,0,0,0,0,21230,21236,0,0,0,0,0,0,0,0,0,0,0,0,0,21237,0,
+0,21238,21239,0,0,0,0,21256,0,0,0,0,0,21257,0,0,0,0,0,0,0,21259,0,0,0,21263,0,
+21272,0,21274,0,21282,0,0,0,0,0,0,0,0,21283,0,0,0,0,0,0,0,0,21294,0,0,21297,0,0,
+0,0,21298,0,0,0,21299,0,21300,21302,0,21316,0,21318,21322,21323,0,21324,0,21326,
+0,0,0,21327,21328,0,0,0,21352,0,0,21354,21361,0,0,0,0,0,0,0,0,0,0,0,0,0,21362,0,
+0,0,21363,0,0,0,0,0,0,0,0,0,21366,0,0,21367,21372,21374,0,0,0,21375,21377,0,
+21378,0,0,0,21380,0,0,0,0,0,0,0,0,0,0,21381,0,0,0,0,0,0,21382,0,21383,0,0,21384,
+0,0,21385,0,0,0,0,21389,21390,0,0,0,0,0,0,0,0,0,0,0,0,0,21397,21398,0,0,0,0,0,0,
+0,0,0,0,21399,0,21400,0,0,0,0,21402,0,0,0,21403,21404,0,21405,21406,0,0,0,21407,
+0,0,0,0,0,0,0,0,0,0,0,0,21408,0,0,0,0,21409,0,21421,0,21422,0,0,0,21425,21428,0,
+0,0,0,21429,0,0,0,0,0,21433,0,0,0,0,0,0,0,0,0,0,21434,0,21443,0,21444,21449,0,
+21452,0,21453,21454,0,0,0,21457,0,0,21458,0,0,0,21460,21461,0,0,21464,0,0,0,
+21473,21478,0,0,21479,0,0,21481,21483,0,0,0,0,0,0,0,0,21484,0,0,21485,21486,0,0,
+21488,0,0,0,0,0,0,21523,0,0,21525,0,0,0,0,0,0,0,21526,0,0,0,0,0,0,21529,21530,0,
+0,21531,0,0,21533,0,0,21539,21564,0,21567,0,0,0,0,0,0,0,0,21575,0,0,0,0,21577,0,
+0,0,0,0,21591,0,0,21604,0,0,0,0,0,0,0,0,0,21605,0,21606,0,0,21617,21618,21619,
+21620,0,0,0,0,0,0,0,0,0,0,0,0,0,21623,0,0,0,0,21631,0,21635,0,0,0,0,21639,21646,
+21653,21662,0,0,21663,21664,0,21666,0,0,21667,0,21670,21672,21673,0,21674,21683,
+0,0,0,0,0,21684,0,21694,0,0,0,0,21695,21700,0,21703,0,21704,0,0,21709,0,0,0,
+21710,0,0,0,0,0,0,0,0,21711,0,0,0,21712,0,21717,0,21730,0,0,0,21731,21733,0,0,0,
+0,21737,21741,21742,0,21747,0,0,0,21749,0,0,0,0,0,0,0,0,0,0,0,0,0,21750,0,0,0,0,
+0,21752,0,0,0,0,21753,0,0,0,0,0,0,21755,21756,0,21757,0,0,0,0,0,0,21760,0,0,
+21763,0,0,0,0,0,0,0,0,0,21764,0,0,21766,0,0,21767,0,0,0,0,0,0,0,0,0,21773,0,
+21774,0,0,21775,0,0,0,0,21776,0,0,21777,0,0,0,0,0,0,0,0,0,21780,21787,21788,
+21791,0,0,0,21797,0,0,0,0,0,21805,0,0,0,0,21806,0,21807,21809,0,21810,21811,0,
+21817,21819,21820,0,21823,0,21824,0,0,21825,0,0,21826,21832,0,0,0,0,0,21833,
+21848,21849,0,0,21867,21870,21871,21873,0,0,0,21874,0,0,0,0,0,0,0,0,0,21875,0,
+21878,0,0,0,21879,0,21881,21886,0,0,0,0,21887,0,0,21888,21894,21895,21897,0,
+21901,0,21904,0,0,21906,0,0,0,21909,21910,21911,0,0,21912,0,0,21913,21914,21915,
+0,21919,0,0,0,0,0,0,0,21921,0,0,21922,21933,21939,0,0,0,0,0,0,0,0,0,0,0,21944,0,
+0,0,0,0,21945,0,21947,0,0,0,0,0,0,0,0,0,0,21949,0,0,0,21950,0,0,0,0,0,0,0,0,0,0,
+0,0,0,21951,0,21952,0,0,0,0,0,0,0,0,0,21954,21957,0,0,0,0,21958,0,21959,0,0,0,0,
+0,0,21962,21963,0,0,0,0,0,0,0,0,21964,21965,0,0,21969,21970,0,0,0,21974,0,0,
+21980,21981,0,21982,0,0,0,0,0,21985,0,21988,0,21992,0,21999,0,0,0,0,0,0,22001,0,
+22002,0,0,0,0,0,0,22003,0,0,0,0,0,22004,0,0,0,22008,0,22009,22015,0,0,22016,0,0,
+0,22017,22019,0,0,0,0,0,0,0,0,0,22020,0,0,0,0,0,0,0,0,0,0,22021,22037,0,22039,0,
+0,0,22040,0,0,0,22048,22049,0,0,22053,22055,22056,22059,0,0,22060,22061,0,0,
+22064,0,0,0,0,22066,0,0,0,0,0,0,0,22073,0,0,0,22074,22075,0,0,0,0,0,0,0,22076,0,
+0,0,0,22077,22084,22099,0,0,0,0,0,0,0,22104,0,0,22107,0,22108,0,22109,0,22110,0,
+0,0,0,0,0,0,22111,22119,0,22120,22122,0,0,0,0,22125,0,0,0,22128,22129,0,0,0,0,0,
+0,22141,0,0,0,22142,0,0,22144,22146,0,22148,22149,22151,22154,0,0,0,22162,0,0,0,
+0,22164,22177,0,0,0,0,22179,0,22182,22183,0,0,22184,22188,0,0,0,0,0,0,0,0,22190,
+0,22194,22201,0,0,22208,0,22209,0,22212,0,0,22215,0,22223,22231,0,0,22232,0,
+22234,0,0,22235,22236,0,22237,0,22240,0,0,0,0,0,22241,0,0,0,22242,22246,22247,0,
+0,0,22259,22268,0,22269,0,0,0,0,0,0,0,22270,0,0,0,0,22271,0,22272,0,22277,0,0,0,
+0,0,22278,22280,22283,22286,0,0,22287,22289,0,0,22290,0,22293,0,0,0,0,0,0,0,0,0,
+0,22295,0,22301,22302,0,0,0,22305,0,22308,0,0,0,0,0,0,0,0,0,0,22315,0,0,0,22317,
+0,22334,0,0,0,22335,0,0,0,0,0,22336,0,22338,22344,0,22347,22349,0,22350,0,0,0,0,
+0,0,0,22357,0,0,0,0,0,22358,0,0,0,0,0,0,0,0,0,0,22359,22360,0,0,0,0,0,0,0,0,
+22361,22366,0,0,22369,0,22370,22373,0,0,0,0,0,22375,0,22377,0,0,0,0,0,22378,0,0,
+0,0,22381,0,0,0,0,22382,0,22383,0,0,0,0,0,0,0,0,0,22391,0,0,22392,22395,22396,
+22402,0,0,0,0,0,0,0,0,0,0,0,0,0,22405,0,0,22406,0,0,22408,0,0,22409,22410,0,0,0,
+0,0,0,22424,0,0,0,0,22426,0,0,0,22427,0,22428,0,22432,0,22435,22442,22443,0,0,0,
+0,22444,0,0,0,0,0,22446,0,22454,0,22455,0,0,0,22465,0,22470,0,22471,0,0,0,0,
+22472,22473,0,22487,0,0,0,22488,0,0,0,0,22489,0,0,22499,0,0,0,0,0,0,22514,0,0,
+22515,0,0,0,0,0,0,0,22516,0,0,0,22517,22520,0,0,0,22534,0,0,22535,0,0,22536,0,
+22540,22553,0,22555,0,0,0,0,22561,0,0,22562,0,0,0,0,0,0,0,0,0,0,0,22566,0,0,0,0,
+22567,22568,0,0,22575,0,22579,0,22582,22583,22585,0,0,0,0,0,22586,0,0,22587,0,0,
+22590,0,0,0,0,0,22591,0,22592,0,0,0,0,0,22593,0,22602,0,0,22604,0,0,22609,0,0,
+22618,0,0,0,0,0,0,22619,0,22624,22625,0,0,22638,0,0,0,0,0,22639,0,0,22640,0,0,0,
+0,0,0,0,22644,0,22645,22647,0,0,0,0,22652,22653,0,0,0,22654,0,22655,0,0,0,22656,
+0,0,0,0,0,0,0,0,0,0,22673,22675,22676,0,0,22678,22679,0,22691,0,0,0,0,0,0,0,
+22693,0,0,22696,0,22699,22707,22708,0,0,0,0,0,0,0,0,22718,0,22719,0,0,0,0,22723,
+0,0,0,22724,22725,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22726,22728,0,0,0,0,0,0,0,0,22729,
+0,0,22731,0,0,0,0,22732,22735,22736,0,0,0,0,22739,0,22749,0,0,22751,0,0,0,0,0,0,
+0,0,0,0,0,22758,0,0,0,0,0,22760,0,0,0,0,0,22764,22765,22766,0,22768,0,0,0,0,0,
+22769,22770,0,0,0,0,0,0,22771,0,0,22772,22775,0,22776,22777,22780,0,0,22782,
+22784,0,22787,0,22789,22796,0,0,0,0,0,22798,0,0,0,0,0,0,22802,0,22803,22804,0,0,
+0,0,0,0,0,0,0,0,22805,0,0,22810,22811,22814,22816,0,22825,22826,0,22831,22833,0,
+0,0,0,0,0,0,0,0,22834,0,22836,22838,0,22839,0,0,0,0,0,22840,0,22847,0,0,0,0,0,
+22856,22857,0,22858,22859,0,0,22862,0,0,22864,0,0,0,0,22865,0,0,0,0,0,0,0,0,0,0,
+0,22866,0,22867,22868,0,0,0,0,22869,0,22871,0,22872,0,22873,22881,22882,22884,
+22885,0,0,0,0,0,0,0,22886,22887,0,22894,0,22895,0,0,0,22900,0,22901,0,0,0,0,
+22904,0,0,0,0,22905,22907,0,0,0,22915,22917,0,0,22918,0,0,0,22920,0,0,0,22929,
+22930,0,0,0,22941,22942,0,0,0,22943,0,0,0,22944,0,0,0,0,0,0,0,22946,0,22947,0,0,
+22954,0,22956,0,0,22962,0,0,0,0,0,0,0,22963,0,0,22964,0,0,0,0,0,0,0,22965,0,
+22968,0,0,0,22969,0,0,0,0,0,22970,0,22971,0,0,0,0,0,22978,0,0,22979,0,22987,0,0,
+22989,0,0,0,0,0,0,22990,0,23005,0,0,0,0,0,0,0,23006,23007,23008,0,0,23023,23024,
+23029,0,0,0,0,23030,0,0,0,0,0,23032,0,0,0,0,0,23035,0,0,0,0,23038,0,0,0,23048,0,
+23049,23052,23053,23060,23061,0,23063,0,0,0,0,23067,23068,0,0,0,23069,23073,0,0,
+0,23127,0,23128,0,0,0,0,0,23129,0,23138,23141,0,23149,0,0,23150,0,0,0,23152,0,0,
+0,0,0,0,0,0,23154,0,0,0,0,23157,23159,23160,0,0,0,0,0,0,0,0,0,0,0,0,23180,0,0,0,
+0,23181,0,0,23188,0,23189,0,0,0,0,0,0,0,0,0,0,0,0,23195,0,0,23196,23199,0,0,0,0,
+0,0,0,0,0,23202,0,23204,0,23207,0,23209,23210,0,0,0,0,0,0,23227,23229,0,0,23230,
+23234,23238,0,0,0,23245,23246,23248,0,0,0,0,23249,23254,0,0,0,23265,0,0,0,0,0,0,
+0,23268,0,23276,0,0,0,0,23277,0,23297,0,23298,0,0,0,0,23299,0,23302,0,0,23303,
+23312,0,0,23314,0,23320,0,0,0,0,23324,0,23325,0,23328,0,23334,0,0,0,23337,0,0,0,
+0,23343,23344,23346,0,23348,0,0,0,0,0,0,0,0,23353,0,0,0,0,23355,0,23356,23358,0,
+0,0,23359,23360,0,23361,0,23367,0,23369,0,0,23373,0,23378,23379,0,23382,23383,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,23387,0,0,0,0,0,0,23388,23390,0,0,23393,23398,0,0,0,
+23399,0,0,0,23400,0,0,0,0,23401,0,0,0,23415,0,0,0,0,0,0,0,0,23416,0,23422,0,
+23443,23444,0,0,0,0,23448,0,23454,0,0,0,0,0,0,23456,0,0,23458,23464,0,0,0,0,0,0,
+23465,0,0,0,23470,23471,0,0,23472,0,0,0,23473,23496,0,0,0,0,0,0,0,0,23497,0,
+23499,0,0,23502,0,0,23503,0,0,23513,0,0,23515,0,0,0,23517,0,0,0,0,23518,23519,
+23521,23524,0,23525,23528,23539,0,0,0,0,0,23541,0,0,23544,0,0,23556,0,0,23557,0,
+0,0,0,0,0,0,0,0,0,0,0,0,23559,0,23560,0,0,23561,0,0,23566,0,0,0,0,0,23568,23569,
+23570,0,0,0,0,23571,0,23574,0,0,0,0,0,0,0,0,0,0,0,23575,0,23579,0,0,23581,0,0,0,
+0,0,0,23587,0,0,0,0,0,0,0,23596,23598,0,0,0,0,23602,23606,0,0,23607,0,23608,0,0,
+0,23614,23616,0,0,0,0,0,23618,0,0,23619,0,0,0,0,23621,23626,0,23627,0,0,0,0,0,0,
+0,23629,0,23630,0,0,0,0,23634,0,23636,0,0,0,0,0,0,23638,0,0,0,0,23640,23667,0,
+23669,0,0,0,23681,0,0,0,0,0,0,0,23682,0,23683,0,0,0,0,0,23684,0,0,0,23685,23689,
+0,23693,23694,23700,0,23702,0,23709,0,0,0,0,0,0,0,23712,0,0,0,0,0,23714,0,0,
+23715,0,0,0,0,23718,0,0,23720,0,0,0,0,23722,0,0,0,23726,23729,0,23741,23746,0,
+23748,0,0,0,0,23749,0,0,0,0,0,23750,0,0,0,0,23751,0,23753,0,0,0,0,23757,23765,0,
+0,0,23770,0,0,0,0,0,0,0,23771,0,23772,23781,0,0,23796,0,0,0,0,23798,0,23799,0,0,
+0,23802,0,0,23806,0,23807,0,0,23808,0,23809,0,23819,0,0,0,23821,0,23827,0,0,0,
+23829,0,0,0,0,0,0,0,23830,0,0,0,0,0,0,23832,23833,23834,23835,0,0,0,0,23837,
+23838,0,0,0,0,0,23846,0,0,0,0,0,0,23847,0,0,0,0,0,23879,23881,0,0,23882,23883,
+23895,0,23899,0,0,0,0,23901,0,0,0,0,0,0,23902,0,0,0,0,0,23903,23905,0,23906,0,
+23907,23918,23919,23920,0,23922,0,23924,0,23927,0,23934,0,23937,23941,0,23942,
+23946,0,0,0,0,0,23955,23956,23958,0,0,0,0,0,0,23959,0,23962,23965,0,23966,0,0,0,
+0,23967,23968,0,0,23973,0,0,23974,0,0,0,0,23975,0,23976,0,0,0,0,0,0,0,0,0,0,0,0,
+0,23977,0,0,0,0,0,0,0,0,23980,0,0,23984,0,23985,0,0,23987,0,0,23988,23990,23991,
+0,0,0,0,0,0,23992,0,0,0,0,0,0,0,0,23994,0,0,0,23998,0,0,0,0,0,0,0,0,0,23999,0,0,
+24003,0,24004,0,24006,0,0,0,24007,0,0,24008,0,0,0,0,0,0,0,24009,0,0,24010,0,0,
+24011,0,0,24013,24014,0,0,24015,24016,24027,0,24028,24029,0,24030,0,0,0,0,0,
+24033,24034,0,24035,0,0,24036,0,0,24044,0,24048,24049,24063,24067,0,24068,24070,
+0,0,24071,24078,24087,0,24090,0,0,0,24095,0,24098,24101,24104,24106,0,24107,0,0,
+0,24108,0,0,0,0,24110,24111,0,24113,0,0,24115,24120,0,0,0,0,0,0,24124,0,24125,0,
+24126,0,24127,0,0,0,0,0,24135,0,0,24136,0,24137,24142,0,0,0,24146,0,0,24147,
+24149,24154,0,24163,0,0,0,24165,24166,24167,0,0,0,0,0,0,0,0,0,0,24169,24170,
+24175,0,0,0,24178,0,0,24179,0,0,24181,0,24184,24197,0,24201,24204,0,0,0,0,0,0,
+24206,24212,24220,0,0,0,24224,0,0,0,0,0,0,0,0,24226,0,24234,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,24235,0,24236,0,0,0,0,0,24239,24240,24241,0,0,24248,0,0,24249,0,
+24251,0,0,0,0,0,0,24253,0,24268,0,0,0,24269,0,24271,24272,0,0,0,0,24273,0,0,
+24274,0,0,24279,0,0,0,0,0,0,0,24280,0,24293,24294,0,0,0,0,0,0,24296,0,0,24323,0,
+0,0,24329,24330,24331,24339,0,24351,0,0,24369,24370,0,0,0,24371,0,0,0,0,24372,
+24373,24374,0,0,0,0,0,24378,0,0,0,0,24379,0,24381,0,24383,24389,0,24390,0,0,
+24394,24395,24400,0,0,0,24401,24402,0,24406,0,0,0,24411,0,0,0,24415,0,24416,0,0,
+0,0,0,24417,0,24419,0,24422,0,24423,24428,0,24435,0,0,0,24439,0,0,0,24440,24442,
+24446,0,0,0,24447,24448,24449,24452,0,0,0,0,24453,24457,0,0,24458,24459,24460,0,
+24465,0,0,0,0,0,0,0,24470,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24471,0,24473,
+24474,24475,24476,0,24478,0,0,0,0,24480,0,0,0,0,0,0,0,0,0,0,24481,0,0,0,0,0,0,0,
+0,0,0,24482,24485,0,0,0,0,24486,0,0,0,24488,0,0,0,24494,0,0,0,0,24497,0,0,24498,
+0,0,0,24499,24506,0,0,0,24507,0,0,24511,0,0,24513,24514,0,0,0,0,0,24517,0,24518,
+0,24520,0,24521,24524,24525,0,0,0,0,0,24527,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24528,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24537,24539,0,24540,0,0,0,24548,0,0,0,0,0,24549,
+24550,0,0,0,24553,24554,0,24555,0,24556,0,24558,0,0,0,0,0,24560,0,0,0,24561,0,0,
+0,0,0,24562,0,0,0,0,0,0,0,0,0,0,0,0,0,24567,0,0,0,0,0,24569,0,0,0,24574,0,24575,
+0,0,0,0,0,0,0,0,0,0,0,24577,24581,0,24584,0,0,0,0,0,24585,0,0,0,0,0,24586,0,0,
+24587,0,24588,0,0,0,0,0,0,0,0,0,0,24590,24591,0,0,0,0,24592,0,0,0,0,0,0,0,24594,
+0,0,0,0,0,0,0,24596,24597,0,0,0,0,24602,24603,0,0,0,0,24604,0,0,24605,0,24610,0,
+0,24611,0,0,0,0,24612,24615,24616,24624,0,0,0,24627,0,24638,24639,0,0,0,0,24640,
+0,0,0,24655,24656,24657,0,0,0,0,0,0,0,0,24662,0,24663,24664,0,0,0,0,0,24665,0,0,
+0,0,24667,0,0,0,0,0,0,24668,24669,0,24670,24674,0,0,0,24675,0,24678,0,0,24679,0,
+0,0,24681,0,24683,0,0,0,0,24684,0,24685,0,0,24686,0,0,24688,24689,0,0,0,0,24690,
+24691,0,0,0,0,0,0,0,24697,0,24698,0,0,0,0,0,0,0,0,24709,0,0,0,0,0,24710,0,24712,
+0,0,0,0,0,0,24713,24714,0,24715,0,24716,24718,0,24719,0,0,0,0,24720,0,0,24725,0,
+0,24738,0,24749,24750,0,0,0,24752,0,0,0,24753,0,0,0,24758,0,0,0,0,0,24762,0,
+24763,0,0,0,0,0,0,0,24764,0,0,0,0,0,24765,24767,24768,0,24772,0,0,0,0,24773,0,0,
+0,0,24777,0,0,0,0,0,24785,0,24786,24788,0,0,0,24789,0,0,0,0,24794,24798,0,24799,
+24800,0,0,0,24803,0,24804,24806,0,24807,0,0,0,24810,0,0,0,0,0,0,24827,24828,0,
+24835,0,0,0,0,0,0,24836,0,0,0,0,0,24839,0,24843,24844,0,0,0,0,0,0,0,0,0,0,24847,
+0,0,24848,0,0,0,0,0,0,24849,0,24850,24851,0,0,0,24852,0,24853,0,0,0,0,0,0,0,0,0,
+24854,0,24855,0,0,24868,0,0,0,24883,0,0,0,24884,0,24895,24897,0,0,0,0,0,24899,0,
+0,0,0,0,24900,0,24913,0,0,0,0,0,0,24914,0,0,24917,24930,24931,0,0,0,24932,0,0,
+24939,0,0,24942,0,0,0,0,0,0,0,0,0,24945,24950,0,24951,0,0,24953,0,0,0,24954,0,
+24959,0,0,0,24961,0,0,24962,0,24964,24968,24970,24972,0,0,0,0,0,24976,0,0,0,
+24977,0,24982,0,0,24983,0,0,24984,0,0,0,24993,0,0,0,24994,0,0,25001,0,0,0,25003,
+0,0,25018,0,0,25023,0,0,0,25034,0,0,25035,25036,0,25037,0,0,0,0,0,0,0,25039,0,0,
+0,0,0,25040,0,0,0,0,0,0,0,25042,0,0,25043,25045,0,0,0,0,0,0,25049,0,0,25051,0,
+25052,25053,0,0,25054,0,0,0,25055,0,0,0,0,25057,25059,0,0,25060,25064,0,25065,
+25069,25070,0,0,0,0,25072,0,25073,0,25090,0,0,25092,25093,25101,0,0,0,0,0,0,
+25105,25108,0,0,25113,0,0,25115,25116,0,0,0,0,0,0,25117,0,0,0,25120,25121,0,0,0,
+0,0,0,0,25125,0,0,0,25126,0,25130,25134,0,25139,0,25143,0,0,0,25151,0,25161,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25163,0,0,0,0,0,0,0,25174,0,25175,0,25207,0,0,
+0,25209,0,0,0,0,25213,0,25219,0,25223,0,25225,0,0,0,25227,0,0,0,25228,0,0,0,
+25229,0,0,0,0,0,0,0,25231,25233,0,0,0,0,25237,25239,0,0,0,25243,0,0,0,25252,0,
+25257,25258,0,0,0,0,25260,25265,0,25268,0,0,25273,25324,0,25325,0,25326,0,0,0,0,
+0,0,0,0,25327,0,0,0,0,0,25328,0,0,0,0,0,0,25332,0,0,0,25333,0,0,0,25336,25337,
+25338,0,0,25343,0,25350,0,0,0,0,0,0,0,25352,0,25354,0,25375,0,25379,0,0,0,0,
+25384,0,0,0,0,0,0,0,0,0,25386,0,25388,0,25390,0,0,25399,0,0,25401,0,0,0,25402,0,
+0,0,25407,0,0,0,0,0,0,0,0,0,0,0,25413,25415,0,0,25417,0,0,0,0,0,0,0,25419,0,0,0,
+25421,0,0,0,25424,0,0,0,0,25433,0,0,0,0,0,0,0,0,0,25435,0,0,0,0,0,0,25436,0,0,0,
+25437,0,0,25440,0,0,0,0,0,0,25442,0,0,25443,0,25446,0,0,25449,0,0,0,25450,0,0,0,
+0,25452,0,25453,25454,25455,0,0,0,25456,0,25457,0,0,0,25459,0,25461,0,25468,0,0,
+0,0,0,0,0,0,25469,0,0,0,0,0,25471,0,0,0,0,0,25474,0,0,0,0,0,0,0,0,25475,0,0,0,0,
+25477,0,0,0,0,25483,0,0,0,0,0,25484,0,0,0,0,0,0,0,0,0,0,0,0,25485,0,25497,0,0,
+25498,0,25504,0,25510,0,25512,0,0,25513,25514,0,0,0,0,0,0,25517,25518,25519,0,
+25520,0,0,0,0,0,0,0,25521,0,25522,25527,25534,0,25536,0,25537,0,0,25548,25550,0,
+0,25551,0,25552,0,0,0,0,0,25554,0,25555,0,25556,25557,25568,0,0,0,25570,25571,0,
+0,0,0,0,0,25574,0,0,0,0,25579,0,0,0,25581,0,0,0,25582,0,0,0,0,0,0,0,0,0,25588,0,
+0,0,0,25589,0,0,0,0,25590,0,25591,25592,25593,0,25594,0,0,0,25596,0,25597,25615,
+0,0,0,0,0,25618,0,0,0,0,25619,25623,0,0,25629,0,0,25631,0,0,0,25635,25636,0,0,
+25649,0,0,0,0,25654,0,0,0,25661,25663,0,0,25671,0,0,25678,25698,0,25699,25702,
+25703,0,0,0,0,0,0,0,0,25704,0,0,0,0,0,25706,0,0,25710,0,25711,0,25712,0,25715,
+25716,25717,0,0,25718,25728,25732,0,0,0,25734,0,0,0,0,0,0,0,0,0,25737,0,0,25739,
+0,0,0,25740,0,25741,25745,0,25746,0,25748,25772,25778,0,0,0,0,0,25780,0,0,0,0,
+25781,0,25782,25784,25785,0,0,0,25789,0,0,0,0,0,0,25797,25801,0,0,0,25808,25809,
+0,0,25811,25814,25815,0,0,25817,0,0,0,0,0,0,0,0,25820,0,0,0,0,25832,25833,0,0,0,
+25846,0,0,0,25847,25848,0,0,0,0,0,0,0,0,0,25849,25850,0,0,25851,0,0,25852,0,
+25862,0,0,0,25863,25865,0,0,0,0,0,0,0,25867,25868,0,25869,25874,0,25875,0,25876,
+25877,0,0,0,0,25878,25902,0,0,0,0,0,0,0,25903,25904,25905,0,0,0,25908,25909,0,0,
+0,0,25910,0,0,0,0,0,0,0,25912,0,25913,0,0,0,0,0,0,0,0,25914,0,0,25916,0,0,0,0,0,
+25917,25927,0,0,0,0,25928,0,0,25930,0,0,0,25933,0,0,25938,25942,0,0,0,0,0,0,0,
+25945,0,25950,0,25956,0,0,25961,25962,0,0,25963,0,25964,25965,25966,0,0,0,0,0,
+25967,0,0,0,0,25968,0,0,0,25969,25971,0,0,0,0,0,25973,25975,0,0,0,0,0,0,0,25978,
+0,25981,0,0,0,25982,0,0,0,25984,0,0,0,0,0,0,0,25993,0,0,0,0,0,0,0,0,0,0,0,0,0,
+26002,0,0,0,26005,0,0,0,26006,26007,0,0,26014,26015,26016,0,0,0,0,0,0,26017,
+26018,26020,0,26022,26023,0,0,0,26024,26028,0,26029,26033,26034,26044,0,0,0,0,0,
+26046,0,0,26047,0,0,26049,0,26050,0,26051,0,0,0,0,0,26053,0,0,0,0,26054,26059,0,
+0,0,0,0,0,26060,0,26066,0,0,0,0,0,0,0,0,0,0,0,0,26067,0,26069,0,0,26071,0,0,0,
+26073,0,26074,26077,0,0,0,0,26078,0,0,0,26079,0,26090,0,0,26094,0,0,0,0,0,0,0,0,
+26095,0,0,0,0,0,0,0,0,0,0,0,26096,26101,0,26107,26122,0,26124,0,0,26125,0,0,0,0,
+0,0,26136,26141,26155,0,0,0,0,0,0,0,0,0,26164,26166,0,0,0,26167,0,26170,26171,0,
+0,26172,0,0,26174,0,0,0,0,0,0,0,0,0,0,0,0,0,26175,0,0,0,26176,26177,0,26321,
+26322,0,26323,0,0,26324,0,0,0,0,0,0,0,26325,0,26331,0,0,0,0,0,0,26335,0,0,0,
+26350,0,0,0,26379,0,0,26382,26383,26385,0,0,26392,26406,0,0,0,0,26411,0,0,0,0,0,
+26412,0,0,26420,0,0,26423,0,26424,26426,26432,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+26435,0,26436,0,0,0,0,0,26441,0,26444,0,0,0,26446,0,0,0,0,26447,0,0,0,0,26449,0,
+26450,26452,0,26453,26454,0,0,0,26455,0,0,0,26456,0,0,26458,0,0,26460,0,26463,0,
+0,0,0,0,0,0,0,26464,26470,0,0,0,0,0,0,0,0,0,26473,0,0,26474,0,0,0,0,0,0,0,26475,
+0,0,0,0,0,0,0,26477,0,26485,0,0,26486,0,26487,0,0,26488,26493,26494,0,0,26495,0,
+26497,26504,26506,0,0,0,0,0,26507,0,0,0,0,0,26509,0,0,26510,0,0,0,0,0,0,0,0,0,0,
+0,0,0,26512,0,26513,26515,0,0,0,26518,0,0,0,26519,0,26524,26526,0,0,0,26527,0,
+26532,0,26533,26537,26558,0,0,0,26559,0,0,0,26571,0,0,26573,0,26588,0,26593,0,0,
+0,0,0,0,26603,0,26604,0,0,0,0,0,0,0,0,0,0,26606,0,0,0,0,0,0,0,26607,26609,26611,
+26614,0,0,0,26616,26620,0,26621,0,0,0,0,0,26627,0,26629,0,0,26630,0,0,26632,
+26643,0,0,0,26644,0,0,0,0,0,0,0,0,0,26646,26647,0,0,0,26650,0,0,26656,0,0,0,0,
+26663,26670,26671,0,0,0,26685,26686,26687,0,26689,0,0,0,0,26744,0,26745,0,26747,
+26748,0,26749,26750,26751,0,0,0,0,26752,26755,0,0,0,26756,26769,0,0,0,26774,0,0,
+0,0,0,26775,0,26777,26778,0,26786,0,0,0,26787,0,0,0,0,0,0,0,0,0,0,0,0,0,26788,0,
+0,26789,0,0,0,0,0,26791,0,26792,26793,0,0,0,26794,0,26797,26798,0,0,0,26800,0,0,
+26803,0,26804,0,0,0,0,0,0,0,0,0,26805,0,0,26808,0,0,26809,0,0,0,0,0,0,0,26812,0,
+26825,0,0,0,0,0,0,0,26826,0,0,26827,26829,26834,0,0,0,0,26835,0,0,26849,0,26851,
+0,0,0,0,0,0,0,0,0,26852,0,26853,26857,0,26858,0,26859,0,0,0,0,0,0,0,26876,0,
+26878,26882,26883,0,0,0,0,26890,26894,0,0,0,0,26895,26896,0,0,0,0,0,26900,0,0,0,
+0,0,0,0,26911,26913,26914,26915,26916,26919,0,0,0,26921,26922,0,0,26925,0,0,0,
+26928,0,0,26929,26930,0,0,0,26931,0,26932,0,0,0,0,0,26933,0,0,0,0,0,0,26937,0,0,
+26943,0,0,26944,0,0,0,26946,0,0,0,0,0,0,0,26956,0,26958,0,0,26963,0,0,0,0,0,0,0,
+26965,0,26969,26970,26972,0,0,0,0,0,26973,0,26974,0,26978,0,26980,0,0,0,0,0,0,
+26982,0,26986,26987,0,26990,0,0,0,0,27003,27006,0,0,27007,27010,27012,27013,0,0,
+0,0,0,0,0,0,27014,27015,27018,0,27019,0,0,0,0,0,27025,0,0,0,27026,0,0,0,0,27029,
+27030,27031,27034,0,0,27036,27037,0,0,0,27038,27042,0,0,0,27044,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,27045,0,0,0,0,0,0,0,27046,0,0,0,0,0,0,0,27047,27049,0,27050,0,0,0,
+27051,27052,0,27055,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27056,27058,27059,0,
+27061,0,27064,0,0,0,0,0,27069,0,0,27070,0,0,0,0,0,0,0,27072,0,0,0,0,0,0,0,0,
+27076,0,0,0,0,0,27078,0,27079,0,0,0,27081,0,0,0,0,0,0,27082,0,27083,27086,0,0,0,
+0,27087,0,0,0,0,0,27088,27090,0,27094,0,0,27095,0,27099,27102,0,0,0,27103,0,0,0,
+0,27105,0,0,0,27106,0,0,0,0,0,0,27107,0,0,0,0,27108,27117,0,0,0,0,27118,0,0,
+27124,0,27126,0,0,27130,27131,0,0,0,0,0,0,27147,0,0,0,0,27148,27149,0,0,0,0,
+27150,27151,0,27152,0,27159,0,0,0,27164,0,0,0,0,0,0,0,27175,0,27189,0,0,27191,0,
+27193,0,27195,0,27198,0,0,0,0,0,27200,0,0,0,0,27202,0,0,0,0,27203,0,0,27204,0,0,
+27206,0,27207,0,0,0,0,27209,0,0,0,27213,0,0,27216,27219,27220,27222,27223,0,
+27224,0,27225,27226,0,0,27233,0,0,0,0,27235,0,27237,0,27238,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,27239,0,27242,27243,0,27250,0,0,0,27251,0,27253,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,27254,27255,27258,0,0,0,27259,0,0,0,0,0,0,27267,0,27276,27278,
+0,0,0,0,0,0,0,0,0,27296,27297,27301,0,0,0,0,0,0,27302,0,0,0,0,0,0,27312,27313,0,
+0,0,0,0,27318,0,27320,0,27329,0,27330,27331,0,27332,0,0,0,0,27340,0,0,0,27348,0,
+0,0,0,0,0,27350,0,27351,0,0,0,0,27355,0,0,27358,27359,27361,0,0,0,27365,0,27367,
+0,27376,27378,0,0,27379,0,0,0,0,0,0,27396,0,27397,27404,0,0,0,0,0,27408,0,0,0,0,
+27453,0,0,0,27456,0,0,0,27458,0,0,0,0,0,0,0,27459,0,0,0,27460,0,0,27461,0,27465,
+27467,0,0,27469,0,27470,0,27471,0,27477,27482,0,0,0,0,0,0,27484,0,0,0,0,0,0,
+27485,0,0,0,0,0,27493,0,27494,27502,0,0,0,0,0,0,0,0,0,0,0,0,27511,27532,0,0,0,
+27533,27545,0,0,0,27546,0,0,0,0,0,0,0,0,0,0,27547,0,0,27549,27550,0,27551,0,0,0,
+0,0,0,0,27555,0,0,27571,0,27573,27574,27575,27577,0,27578,0,0,27579,27585,0,0,0,
+0,0,27586,0,0,27588,27589,0,0,0,0,27596,0,0,27600,0,0,0,0,0,0,0,0,0,0,0,27608,0,
+0,0,0,0,0,0,0,0,0,0,27610,0,0,0,27618,0,0,27620,0,0,0,27631,0,0,27632,27634,0,
+27636,27638,0,0,0,27643,0,27644,27649,0,0,0,0,0,0,0,0,0,0,0,0,0,27651,27660,0,
+27661,0,0,0,0,0,0,0,27662,0,0,27664,0,27665,0,0,0,27669,0,27671,0,0,0,27673,
+27674,0,0,0,27682,0,0,0,27711,0,27712,27713,27719,27720,0,0,27728,0,27729,0,0,0,
+0,0,0,0,0,0,27731,0,0,27732,0,27733,0,27738,0,0,0,27742,0,0,0,27743,27744,0,0,0,
+0,0,0,27745,27746,0,0,0,27747,27748,27751,27752,0,0,0,27768,27770,0,0,0,27774,
+27775,0,27776,27777,0,0,27781,0,27784,0,27786,0,0,27791,0,27792,27793,27804,0,
+27812,27813,0,0,0,0,0,0,0,0,27814,0,27825,0,27827,0,0,0,0,27828,27861,27862,0,0,
+0,27864,0,0,0,27865,27884,0,27889,0,0,0,0,0,27890,0,27891,0,0,0,27892,0,0,0,0,0,
+27897,27898,0,0,27899,0,0,0,27901,27905,0,0,27920,0,0,27921,0,27922,0,0,0,27931,
+27934,0,0,0,0,0,0,0,0,0,0,27941,0,27942,0,27945,0,27947,27954,0,0,0,0,27960,
+27963,0,0,0,0,0,0,0,0,27964,27965,0,0,0,27967,0,27969,27975,0,27976,27977,0,
+27981,0,27983,28051,28052,0,0,0,0,0,28056,0,0,0,0,0,0,28058,28059,0,0,28061,0,0,
+0,0,0,0,0,28063,0,0,0,0,0,0,28066,0,0,0,0,0,0,28069,28070,28072,0,28073,0,0,
+28074,0,0,0,0,28075,0,0,0,0,0,0,0,28078,0,0,0,0,28085,0,0,0,0,28086,0,0,0,0,0,0,
+28088,0,0,0,0,0,0,0,0,28090,0,28097,28114,28115,0,0,0,0,0,0,0,28116,0,0,0,0,0,
+28118,0,28129,0,28131,0,0,28135,0,0,0,28140,28141,0,0,0,28146,0,0,0,0,28152,0,0,
+0,0,28155,28157,28161,0,0,0,0,28166,0,28167,0,0,0,0,0,0,0,0,0,0,0,28172,0,0,0,0,
+0,0,28173,0,0,28175,0,0,0,0,0,0,0,0,0,28178,28188,0,28190,0,0,0,0,0,28191,0,
+28193,28206,0,0,28207,28209,0,28211,0,28213,0,0,0,28215,28216,28217,0,28222,0,
+28223,28225,0,0,0,28226,0,28227,28229,28232,0,0,0,0,0,0,0,0,0,28235,0,28241,0,0,
+28242,0,0,0,0,28243,0,0,0,28245,0,0,0,28248,28250,0,28251,28252,0,0,0,0,0,0,
+28253,0,0,28254,28255,0,0,28256,0,0,28258,0,0,0,0,0,28259,0,0,28260,0,0,28261,0,
+0,0,0,28262,28263,0,0,28264,0,0,0,28266,0,28268,28269,0,28270,28272,28274,0,
+28277,28278,0,0,0,28279,0,28280,28281,28283,0,28292,0,28294,0,28297,0,0,0,0,
+28299,0,0,0,0,0,28300,0,0,0,0,0,0,0,28301,0,0,0,0,0,0,0,0,0,0,0,0,0,28302,28303,
+0,0,0,0,28304,0,0,28305,0,28312,0,28313,28314,0,0,0,0,0,0,28315,0,0,0,28320,
+28321,0,0,28328,0,0,0,28329,28338,0,28339,0,0,28344,0,0,0,0,0,0,0,0,28347,0,0,0,
+0,0,0,0,0,28348,0,0,0,0,0,28411,0,28412,28413,0,28416,0,0,0,28420,0,0,0,0,0,
+28421,0,0,0,0,28423,0,0,0,28424,0,0,28428,0,0,0,0,0,28429,0,0,0,28431,28434,0,
+28458,0,0,0,0,0,0,0,0,0,0,0,28464,0,0,0,0,28465,0,28467,0,0,0,0,0,0,28471,0,0,0,
+0,28474,0,28480,0,28481,0,0,28485,0,0,0,0,28486,28488,0,0,28489,0,0,0,0,28492,0,
+0,0,28495,0,28497,0,28499,0,0,0,0,28500,0,0,28502,28503,0,0,0,28508,0,0,0,28510,
+0,0,28512,28513,28514,28521,0,28526,0,28527,28528,0,0,0,0,28529,0,0,28532,0,0,
+28537,28538,0,0,0,28539,0,28548,0,28553,28554,0,0,0,0,0,0,0,0,0,0,0,0,28560,
+28563,0,0,28564,0,0,0,0,28565,0,0,0,0,0,0,0,28566,28568,0,0,0,0,0,0,28569,0,0,0,
+28570,0,28572,28573,0,0,0,0,28575,0,0,0,0,28576,28581,28588,0,0,28589,0,0,0,
+28590,28595,0,28598,0,0,28601,0,0,28605,0,0,0,0,28614,28615,28619,0,0,0,0,0,0,
+28620,0,28626,0,0,28628,0,28631,0,28632,0,0,0,0,0,0,28635,0,0,0,28637,28638,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28639,0,28643,0,0,28652,0,0,0,28662,0,
+28670,28671,0,0,0,0,0,0,0,0,0,28672,28673,28675,28676,0,0,0,0,0,0,0,28691,0,0,0,
+28695,0,0,0,28696,0,28697,28698,0,28705,0,28707,28708,28710,0,0,0,0,0,0,0,28711,
+28728,0,0,0,28736,0,0,0,28737,0,0,0,0,0,0,0,0,0,28738,0,28739,0,28741,0,0,28742,
+0,0,0,0,0,0,0,0,0,0,0,28745,0,0,0,0,0,0,28749,28750,28752,28754,28756,0,28757,0,
+0,0,0,28759,28760,0,0,0,0,0,0,28762,0,0,0,28764,0,0,0,0,0,0,28766,0,28767,28768,
+0,0,0,0,28769,28770,0,0,0,0,0,0,0,0,0,0,0,0,0,28771,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,28772,0,28773,0,28782,0,0,0,0,0,0,28784,0,28785,0,28786,0,0,0,28787,0,0,0,
+28797,0,0,0,0,0,0,28799,0,0,28801,0,0,0,0,28802,0,28805,0,0,28806,0,0,28807,0,0,
+0,0,0,0,0,28808,0,0,0,0,0,28810,28812,0,0,28816,28819,0,0,28821,0,28826,0,0,0,
+28842,28852,0,0,28853,0,28854,28855,0,0,0,28857,0,0,0,28858,0,28867,28868,28869,
+0,0,0,28874,28880,28882,28890,28892,0,0,0,0,0,0,0,28895,0,0,0,28898,28899,0,0,0,
+28900,0,0,28904,0,28906,0,0,0,0,28907,0,0,0,0,0,0,28908,0,0,0,28910,0,28914,0,0,
+0,0,0,0,0,28915,28916,28919,0,0,28920,0,28921,0,0,0,0,0,0,0,0,28924,0,0,0,0,
+28926,28929,0,0,0,28930,0,28936,0,28939,0,0,0,0,28942,0,0,0,0,0,0,28956,0,0,0,
+28966,0,0,0,0,28967,0,0,0,0,0,0,0,0,0,28968,0,28971,0,28975,28976,0,28982,28983,
+0,0,28984,28989,28996,28997,28998,0,0,0,0,0,0,28999,0,0,0,0,0,29000,0,29001,0,0,
+0,29009,0,0,29011,0,0,29021,0,0,0,0,29024,0,29025,0,0,0,0,0,29026,0,0,0,29036,0,
+0,0,29037,0,0,0,0,29038,0,29045,0,29047,0,0,0,0,0,0,0,0,0,29051,0,0,0,29054,
+29056,29062,0,29070,29082,0,0,0,29083,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29084,0,0,
+0,0,29085,29088,0,0,0,0,0,0,0,29090,29097,0,0,0,29103,0,0,0,0,0,0,0,0,29105,0,0,
+0,0,0,29107,0,29109,0,0,0,29115,0,0,29120,0,0,29138,29140,0,0,0,0,0,0,0,0,0,
+29152,0,29160,29174,0,29176,0,0,29180,0,29181,0,0,0,0,0,0,0,0,29228,0,0,29229,0,
+0,29230,0,0,0,0,0,0,0,0,0,0,29234,0,0,0,29241,0,29245,0,29248,0,29250,29256,
+29280,0,29282,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29285,0,0,29286,29291,29292,0,0,0,0,
+29294,0,29295,0,0,0,0,0,29296,29297,29298,29300,0,29302,0,0,29304,29307,0,29312,
+0,0,0,29322,0,0,29323,0,0,29324,29326,29328,0,29335,0,0,0,0,0,0,0,29338,29339,0,
+0,0,0,0,29341,29343,0,0,0,0,29344,0,0,0,0,0,29345,0,0,0,0,29346,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,29347,29348,29349,0,0,29354,0,0,29355,0,0,0,0,0,0,0,0,29357,0,0,
+0,0,29364,0,29365,0,0,0,0,0,0,0,29366,0,0,29368,0,0,0,0,0,0,0,0,29378,0,29381,0,
+0,0,0,0,0,0,0,29386,0,0,0,0,0,0,29389,0,0,0,29390,0,0,29391,29397,0,29398,29412,
+29414,29418,29419,0,0,0,0,0,0,0,29420,0,0,0,0,0,0,0,29423,0,0,0,29435,0,0,0,
+29437,0,0,29439,0,29441,0,0,0,0,29443,0,29446,29450,29452,0,0,0,0,0,29456,0,0,0,
+0,0,29461,0,0,0,29464,0,0,0,0,0,0,0,0,29468,0,29473,0,0,0,29486,0,0,0,29490,0,0,
+0,29491,29492,0,0,29497,0,0,0,29498,0,29499,0,29502,29505,0,29509,0,0,0,29510,0,
+0,0,29512,0,0,0,29516,0,0,0,0,0,0,0,0,29518,0,29519,0,0,0,0,0,29520,29521,29529,
+0,0,0,0,0,0,0,0,29530,0,0,29531,29538,0,29540,0,0,0,29542,0,29543,29544,29547,0,
+0,29548,0,0,0,29549,0,0,0,29550,0,0,29552,0,0,0,0,29558,29561,0,29562,29564,0,0,
+29565,0,0,29566,0,0,0,0,0,0,0,0,0,0,29578,29584,29586,29591,0,0,0,0,29593,29594,
+0,0,29597,0,0,29613,0,29614,0,29615,0,0,0,0,29616,29617,0,0,29625,0,0,0,29632,0,
+0,0,0,0,0,0,29633,0,0,0,0,0,29634,29635,29637,0,29638,0,29641,29643,0,0,0,0,0,0,
+29644,0,29645,0,29649,0,0,0,29650,0,29653,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29656,
+29659,0,0,29660,0,0,0,29661,0,0,0,0,0,29664,0,0,0,29671,29673,0,0,0,0,0,0,0,
+29675,0,29677,29679,0,0,29684,0,0,0,0,0,29685,0,0,0,29687,0,0,0,29688,0,29689,
+29690,29700,0,29701,0,0,0,29702,0,29706,0,0,0,0,0,0,0,29720,0,29721,0,29727,0,
+29733,29734,0,29750,29761,0,29763,0,0,0,0,0,29764,0,0,29765,0,0,0,29771,0,0,0,0,
+0,0,0,0,0,0,0,0,29772,0,0,0,29773,29774,29775,0,0,0,0,0,0,0,0,0,0,0,29822,0,0,0,
+29824,0,29825,0,0,0,0,0,29827,0,0,0,0,0,0,0,0,29829,0,29832,29834,0,0,29835,0,0,
+29837,29838,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29843,0,0,0,0,29844,29845,0,0,0,
+0,0,0,0,0,0,29849,0,0,29869,29872,29890,29905,0,0,0,0,0,29907,29921,0,29922,0,0,
+29923,29926,29944,29946,0,0,0,0,0,0,0,29947,29948,0,0,0,29951,0,0,0,0,0,29953,0,
+0,29956,0,29957,0,0,29962,0,0,0,0,29971,0,0,0,29972,0,0,0,0,0,29978,0,29979,
+29992,30007,30008,30010,0,0,0,30013,0,0,0,0,30014,30016,0,0,0,0,0,0,0,0,0,0,0,
+30017,0,0,0,0,0,30023,30031,0,0,30033,0,0,0,0,0,0,0,0,0,0,30034,0,30038,0,30039,
+0,30040,0,0,0,0,0,0,30067,30068,0,0,0,30069,0,30072,0,0,0,30073,0,0,0,0,30075,0,
+0,0,0,0,0,30079,0,0,30080,0,0,0,0,0,30082,0,0,0,0,0,0,0,0,0,0,0,30084,30090,0,0,
+30091,0,0,0,0,30098,30118,0,30119,0,30121,30130,0,0,0,0,0,0,0,0,0,0,0,0,0,30131,
+30132,30133,0,0,0,0,0,0,30135,0,0,0,0,0,0,0,0,0,0,0,30136,0,0,30137,30138,0,0,0,
+30139,30146,0,0,0,0,0,30147,0,0,30148,30151,0,0,0,30168,0,30172,30173,0,0,0,0,0,
+0,0,0,30180,30181,0,30192,0,0,0,0,0,0,0,30194,30196,0,0,30199,0,0,30202,0,0,0,0,
+30203,0,0,0,0,0,0,0,0,0,0,30213,0,0,0,30216,0,0,30217,0,0,0,30218,0,0,0,0,30219,
+0,30220,0,30222,30227,0,0,0,0,0,30231,0,0,30233,30235,0,0,0,0,30238,0,30240,
+30243,30245,0,30250,30252,0,0,0,30269,0,0,30271,30272,0,0,0,30278,30280,0,0,
+30282,0,30284,0,30294,0,0,0,0,30295,30296,0,0,0,0,0,30298,30299,30302,30304,
+30306,0,0,0,0,0,0,30316,30317,0,0,0,30318,0,0,0,30319,0,30320,30322,30326,0,0,0,
+0,0,30327,0,30332,30348,30349,0,0,30356,0,0,0,0,0,0,0,0,30357,0,30358,0,30359,
+30360,0,0,30365,30366,30378,0,0,0,0,30379,0,0,30381,0,30385,0,30388,30397,0,0,0,
+30401,0,0,0,0,30403,0,0,0,0,0,30404,0,0,30405,0,30406,30408,0,30409,0,30410,0,0,
+0,30417,0,0,30418,30419,0,30420,0,30424,0,0,0,30427,30430,30432,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,30433,0,0,0,0,0,0,0,30436,0,30437,30438,0,30441,30442,0,0,
+0,30445,0,0,0,0,30452,30456,30457,0,0,0,30458,0,30464,0,0,0,0,0,0,30467,0,30469,
+0,0,0,0,0,30477,0,0,30484,0,0,0,0,0,30485,0,0,0,0,0,30486,30487,30497,30498,0,0,
+0,0,0,0,0,0,0,0,30505,0,30508,0,0,0,30509,30510,0,30514,30516,0,0,0,0,0,0,0,0,0,
+0,0,30523,0,30524,0,30525,0,0,0,0,30537,0,0,30538,0,0,0,0,0,30553,0,0,30555,
+30556,30558,30559,30560,0,0,30561,0,30562,0,0,0,0,0,0,0,0,30563,30570,30571,0,
+30586,30587,0,0,30590,0,0,30594,0,0,0,0,30611,30612,30623,30634,0,0,30636,30640,
+30655,30656,0,30657,0,0,30658,30669,0,30670,0,30676,30678,0,0,0,0,0,0,0,30679,0,
+0,0,0,0,0,0,0,0,0,0,30695,0,0,30698,0,0,0,0,30700,0,0,0,0,30701,0,30702,30703,0,
+0,0,0,30707,0,0,0,30709,0,0,30710,30719,30729,0,0,0,0,0,0,0,0,0,30731,0,0,30733,
+0,0,0,30734,0,0,0,0,0,30736,30737,0,0,0,30740,0,0,0,30743,0,30746,0,30747,30748,
+0,0,30751,30752,30753,0,0,0,30754,0,0,30760,0,0,0,0,0,0,0,30763,0,30764,0,0,
+30766,0,30769,30770,30771,30774,30777,0,0,30779,30780,30781,0,0,0,0,30790,0,0,0,
+30792,0,0,0,0,30810,0,0,0,0,0,0,0,30812,30819,0,0,30823,30824,0,30825,0,30827,0,
+0,0,0,0,0,30828,0,0,30830,0,0,0,30834,0,30835,0,30837,30838,0,30845,0,0,0,0,0,
+30846,30847,0,0,30849,0,30851,0,0,0,0,0,30852,30858,0,0,30859,0,30865,0,0,30866,
+0,0,30868,0,0,30869,0,0,0,30881,30883,0,0,0,0,0,30889,0,30891,0,0,0,0,30894,0,
+30895,0,30897,0,30898,0,0,0,30904,30906,0,30909,0,0,0,0,0,0,30910,0,0,0,30915,
+30933,30942,0,0,0,0,30943,0,0,30945,0,0,0,0,0,0,30946,0,0,30947,0,0,30955,30956,
+0,0,30960,0,0,30961,30962,30966,0,0,30969,30974,0,0,0,30976,0,0,30977,0,30978,
+30982,0,0,0,0,0,0,0,30994,30995,30998,0,31000,0,0,31001,0,0,31003,31005,0,0,
+31006,31011,0,0,31014,0,31016,0,0,0,0,31018,0,0,31020,31023,31024,31025,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,31027,31028,31029,0,0,0,0,0,0,31032,0,0,0,0,0,0,0,0,0,0,0,
+31036,31037,31038,0,0,0,31041,31043,31045,0,31047,0,0,0,31048,0,31049,0,0,0,
+31053,31054,31055,0,0,31063,0,0,0,0,0,31066,0,31068,31071,0,0,0,31072,31073,0,0,
+0,0,31075,0,0,31076,0,0,0,31077,31079,0,31080,0,0,0,0,0,0,0,0,0,0,31087,0,31142,
+0,31144,0,0,31145,31146,31147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31149,0,31151,31152,0,
+0,0,0,0,0,0,31162,31171,31174,31175,0,0,0,31176,0,0,0,0,0,0,0,31179,0,0,0,31186,
+0,0,0,31192,31195,0,0,31196,0,0,0,0,0,0,0,0,31198,0,0,0,0,0,31199,0,0,0,31205,0,
+0,0,0,31211,31215,0,0,0,0,31231,0,31232,0,0,0,0,0,0,0,0,0,0,31233,31236,31253,0,
+31254,0,0,0,0,0,0,31255,0,0,31257,0,0,0,0,0,0,0,0,0,31258,31259,0,0,31260,0,
+31261,0,0,0,0,0,31262,31263,0,0,31264,0,31266,0,31267,0,0,0,0,0,31281,0,31282,0,
+31284,0,0,31285,31287,31288,0,0,31290,0,0,0,31292,31295,0,31299,0,31300,0,0,0,0,
+0,31302,0,0,0,0,31303,0,0,0,0,0,0,31304,0,0,0,0,0,31305,31308,31309,31315,0,
+31317,0,0,0,0,0,31323,0,31324,0,0,0,0,0,31325,31327,0,0,31331,0,0,0,0,0,31333,0,
+0,0,0,0,31336,0,0,31337,0,0,0,0,0,0,31338,0,0,0,0,0,0,0,0,0,0,0,0,31339,0,0,0,0,
+0,0,0,31342,0,0,0,0,31345,0,0,0,0,0,0,0,0,31347,0,0,0,0,0,0,31348,0,0,31350,
+31351,0,31352,0,0,31354,0,0,0,0,31355,0,0,31356,0,0,0,0,0,0,0,0,0,0,31363,0,
+31372,0,0,31373,0,0,0,0,0,0,0,0,0,31376,0,31388,0,31389,0,31392,0,31401,0,31405,
+31407,31408,0,31409,0,0,0,0,0,0,31413,31415,0,0,0,31416,31418,0,0,0,0,0,0,31422,
+31423,0,0,31424,0,31425,31432,0,0,0,0,0,0,0,0,0,31433,0,0,0,0,0,0,0,0,31434,0,0,
+0,0,0,0,31435,0,0,0,0,31438,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31442,0,31444,0,
+31448,0,0,31451,0,0,0,0,31452,0,31461,31465,0,0,31466,0,0,31467,0,0,31468,0,0,0,
+31469,31473,0,31476,0,0,0,0,31489,31490,0,0,0,0,0,0,0,31492,31493,31494,0,0,0,0,
+31501,31504,31505,0,0,0,0,0,0,0,0,0,31509,0,0,0,0,31510,0,0,31511,0,0,31513,0,0,
+0,0,0,0,0,0,0,31514,0,31522,31536,31539,31540,0,31541,0,0,0,0,0,0,31546,31553,
+31559,0,0,0,31560,31561,31562,0,0,31564,31567,0,31569,0,0,0,31570,0,0,0,0,31571,
+0,0,0,0,0,0,31572,31574,31580,31581,0,0,31582,31584,31585,31586,31595,0,31596,0,
+0,0,0,31597,0,31599,0,31600,31601,0,0,31603,31604,0,0,31608,31610,0,0,0,31611,0,
+31615,0,0,0,0,31616,0,0,0,0,0,0,31617,0,0,0,0,0,31618,0,0,0,0,0,0,31621,0,0,0,0,
+0,0,0,0,0,31622,31625,0,0,0,0,31627,0,31641,0,0,31642,0,0,31643,0,0,0,0,0,0,0,0,
+0,31644,0,31646,0,0,0,0,31648,0,0,0,31652,0,0,0,31657,0,0,31676,0,0,0,0,0,0,0,
+31689,31691,31692,0,31694,0,0,0,31696,0,31702,0,31703,0};
+
+static const DictWord kStaticDictionaryWords[31705] = {
+{0,0,0},{8,0,1002},{136,0,1015},{4,0,683},{4,10,325},{138,10,125},{7,11,572},{9,
+11,592},{11,11,680},{11,11,842},{11,11,924},{12,11,356},{12,11,550},{13,11,317},
+{13,11,370},{13,11,469},{13,11,471},{14,11,397},{18,11,69},{146,11,145},{134,0,
+1265},{136,11,534},{134,0,1431},{11,0,138},{140,0,40},{4,0,155},{7,0,1689},{4,10
+,718},{135,10,1216},{4,0,245},{5,0,151},{5,0,741},{6,0,1147},{7,0,498},{7,0,870}
+,{7,0,1542},{12,0,213},{14,0,36},{14,0,391},{17,0,111},{18,0,6},{18,0,46},{18,0,
+151},{19,0,36},{20,0,32},{20,0,56},{20,0,69},{20,0,102},{21,0,4},{22,0,8},{22,0,
+10},{22,0,14},{150,0,31},{4,0,624},{135,0,1752},{5,10,124},{5,10,144},{6,10,548}
+,{7,10,15},{7,10,153},{137,10,629},{6,0,503},{9,0,586},{13,0,468},{14,0,66},{16,
+0,58},{7,10,1531},{8,10,416},{9,10,275},{10,10,100},{11,10,658},{11,10,979},{12,
+10,86},{14,10,207},{15,10,20},{143,10,25},{5,0,603},{7,0,1212},{9,0,565},{14,0,
+301},{5,10,915},{6,10,1783},{7,10,211},{7,10,1353},{9,10,83},{10,10,376},{10,10,
+431},{11,10,543},{12,10,664},{13,10,280},{13,10,428},{14,10,128},{17,10,52},{145
+,10,81},{4,0,492},{133,0,451},{135,0,835},{141,0,70},{132,0,539},{7,11,748},{139
+,11,700},{7,11,1517},{11,11,597},{14,11,76},{14,11,335},{148,11,33},{6,0,113},{
+135,0,436},{4,10,338},{133,10,400},{136,0,718},{133,11,127},{133,11,418},{6,0,
+1505},{7,0,520},{6,11,198},{11,10,892},{140,11,83},{4,10,221},{5,10,659},{5,10,
+989},{7,10,697},{7,10,1211},{138,10,284},{135,0,1070},{5,11,276},{6,11,55},{135,
+11,1369},{134,0,1515},{6,11,1752},{136,11,726},{138,10,507},{15,0,78},{4,10,188}
+,{135,10,805},{5,10,884},{139,10,991},{133,11,764},{134,10,1653},{6,11,309},{7,
+11,331},{138,11,550},{135,11,1861},{132,11,348},{135,11,986},{135,11,1573},{12,0
+,610},{13,0,431},{144,0,59},{9,11,799},{140,10,166},{134,0,1530},{132,0,750},{
+132,0,307},{133,0,964},{6,11,194},{7,11,133},{10,11,493},{10,11,570},{139,11,664
+},{5,11,24},{5,11,569},{6,11,3},{6,11,119},{6,11,143},{6,11,440},{7,11,295},{7,
+11,599},{7,11,1686},{7,11,1854},{8,11,424},{9,11,43},{9,11,584},{9,11,760},{10,
+11,148},{10,11,328},{11,11,159},{11,11,253},{11,11,506},{12,11,487},{12,11,531},
+{144,11,33},{136,10,760},{5,11,14},{5,11,892},{6,11,283},{7,11,234},{136,11,537}
+,{135,11,1251},{4,11,126},{8,11,635},{147,11,34},{4,11,316},{135,11,1561},{6,0,
+999},{6,0,1310},{137,11,861},{4,11,64},{5,11,352},{5,11,720},{6,11,368},{139,11,
+359},{4,0,75},{5,0,180},{6,0,500},{7,0,58},{7,0,710},{10,0,645},{136,10,770},{
+133,0,649},{6,0,276},{7,0,282},{7,0,879},{7,0,924},{8,0,459},{9,0,599},{9,0,754}
+,{11,0,574},{12,0,128},{12,0,494},{13,0,52},{13,0,301},{15,0,30},{143,0,132},{
+132,0,200},{4,10,89},{5,10,489},{6,10,315},{7,10,553},{7,10,1745},{138,10,243},{
+135,11,1050},{7,0,1621},{6,10,1658},{9,10,3},{10,10,154},{11,10,641},{13,10,85},
+{13,10,201},{141,10,346},{6,11,175},{137,11,289},{5,11,432},{133,11,913},{6,0,
+225},{137,0,211},{7,0,718},{8,0,687},{139,0,374},{4,10,166},{133,10,505},{9,0,
+110},{134,10,1670},{8,0,58},{9,0,724},{11,0,809},{13,0,113},{145,0,72},{6,0,345}
+,{7,0,1247},{144,11,82},{5,11,931},{134,11,1698},{8,0,767},{8,0,803},{9,0,301},{
+137,0,903},{139,0,203},{134,0,1154},{7,0,1949},{136,0,674},{134,0,259},{135,0,
+1275},{5,11,774},{6,11,1637},{6,11,1686},{134,11,1751},{134,0,1231},{7,10,445},{
+8,10,307},{8,10,704},{10,10,41},{10,10,439},{11,10,237},{11,10,622},{140,10,201}
+,{136,0,254},{6,11,260},{135,11,1484},{139,0,277},{135,10,1977},{4,10,189},{5,10
+,713},{6,11,573},{136,10,57},{138,10,371},{132,10,552},{134,11,344},{133,0,248},
+{9,0,800},{10,0,693},{11,0,482},{11,0,734},{11,0,789},{134,11,240},{4,0,116},{5,
+0,95},{5,0,445},{7,0,1688},{8,0,29},{9,0,272},{11,0,509},{11,0,915},{4,11,292},{
+4,11,736},{5,11,871},{6,11,171},{6,11,1689},{7,11,1324},{7,11,1944},{9,11,415},{
+9,11,580},{14,11,230},{146,11,68},{7,0,490},{13,0,100},{143,0,75},{135,0,1641},{
+133,0,543},{7,11,209},{8,11,661},{10,11,42},{11,11,58},{12,11,58},{12,11,118},{
+141,11,32},{5,0,181},{8,0,41},{6,11,63},{135,11,920},{133,0,657},{133,11,793},{
+138,0,709},{7,0,25},{8,0,202},{138,0,536},{5,11,665},{135,10,1788},{145,10,49},{
+9,0,423},{140,0,89},{5,11,67},{6,11,62},{6,11,374},{135,11,1391},{8,0,113},{9,0,
+877},{10,0,554},{11,0,83},{12,0,136},{19,0,109},{9,11,790},{140,11,47},{138,10,
+661},{4,0,963},{10,0,927},{14,0,442},{135,10,1945},{133,0,976},{132,0,206},{4,11
+,391},{135,11,1169},{134,0,2002},{6,0,696},{134,0,1008},{134,0,1170},{132,11,271
+},{7,0,13},{8,0,226},{10,0,537},{11,0,570},{11,0,605},{11,0,799},{11,0,804},{12,
+0,85},{12,0,516},{12,0,623},{13,0,112},{13,0,361},{14,0,77},{14,0,78},{17,0,28},
+{19,0,110},{140,11,314},{132,0,769},{134,0,1544},{4,0,551},{137,0,678},{5,10,84}
+,{134,10,163},{9,0,57},{9,0,459},{10,0,425},{11,0,119},{12,0,184},{12,0,371},{13
+,0,358},{145,0,51},{5,0,188},{5,0,814},{8,0,10},{9,0,421},{9,0,729},{10,0,609},{
+11,0,689},{4,11,253},{5,10,410},{5,11,544},{7,11,300},{137,11,340},{134,0,624},{
+138,11,321},{135,0,1941},{18,0,130},{5,10,322},{8,10,186},{9,10,262},{10,10,187}
+,{142,10,208},{5,11,53},{5,11,541},{6,11,94},{6,11,499},{7,11,230},{139,11,321},
+{133,10,227},{4,0,378},{4,11,920},{5,11,25},{5,11,790},{6,11,457},{135,11,853},{
+137,0,269},{132,0,528},{134,0,1146},{7,10,1395},{8,10,486},{9,10,236},{9,10,878}
+,{10,10,218},{11,10,95},{19,10,17},{147,10,31},{7,10,2043},{8,10,672},{141,10,
+448},{134,0,1105},{134,0,1616},{134,11,1765},{140,11,163},{5,10,412},{133,11,822
+},{132,11,634},{6,0,656},{134,11,1730},{134,0,1940},{5,0,104},{6,0,173},{135,0,
+1631},{136,10,562},{6,11,36},{7,11,658},{8,11,454},{147,11,86},{5,0,457},{134,10
+,1771},{7,0,810},{8,0,138},{8,0,342},{9,0,84},{10,0,193},{11,0,883},{140,0,359},
+{9,0,620},{135,10,1190},{137,10,132},{7,11,975},{137,11,789},{6,0,95},{6,0,1934}
+,{136,0,967},{141,11,335},{6,0,406},{10,0,409},{10,0,447},{11,0,44},{140,0,100},
+{4,10,317},{135,10,1279},{132,0,477},{134,0,1268},{6,0,1941},{8,0,944},{5,10,63}
+,{133,10,509},{132,0,629},{132,11,104},{4,0,246},{133,0,375},{6,0,1636},{132,10,
+288},{135,11,1614},{9,0,49},{10,0,774},{8,10,89},{8,10,620},{11,10,628},{12,10,
+322},{143,10,124},{4,0,282},{7,0,1034},{11,0,398},{11,0,634},{12,0,1},{12,0,79},
+{12,0,544},{14,0,237},{17,0,10},{146,0,20},{132,0,824},{7,11,45},{9,11,542},{9,
+11,566},{138,11,728},{5,0,118},{5,0,499},{6,0,476},{6,0,665},{6,0,1176},{6,0,
+1196},{7,0,600},{7,0,888},{135,0,1096},{7,0,296},{7,0,596},{8,0,560},{8,0,586},{
+9,0,612},{11,0,304},{12,0,46},{13,0,89},{14,0,112},{145,0,122},{5,0,894},{6,0,
+1772},{9,0,1009},{138,10,120},{5,11,533},{7,11,755},{138,11,780},{151,10,1},{6,0
+,1474},{7,11,87},{142,11,288},{139,0,366},{137,10,461},{7,11,988},{7,11,1939},{9
+,11,64},{9,11,502},{12,11,7},{12,11,34},{13,11,12},{13,11,234},{147,11,77},{7,0,
+1599},{7,0,1723},{8,0,79},{8,0,106},{8,0,190},{8,0,302},{8,0,383},{8,0,713},{9,0
+,119},{9,0,233},{9,0,419},{9,0,471},{10,0,181},{10,0,406},{11,0,57},{11,0,85},{
+11,0,120},{11,0,177},{11,0,296},{11,0,382},{11,0,454},{11,0,758},{11,0,999},{12,
+0,27},{12,0,98},{12,0,131},{12,0,245},{12,0,312},{12,0,446},{12,0,454},{13,0,25}
+,{13,0,98},{13,0,426},{13,0,508},{14,0,70},{14,0,163},{14,0,272},{14,0,277},{14,
+0,370},{15,0,95},{15,0,138},{15,0,167},{17,0,38},{148,0,96},{135,10,1346},{10,0,
+200},{19,0,2},{151,0,22},{135,11,141},{134,10,85},{134,0,1759},{138,0,372},{145,
+0,16},{8,0,943},{132,11,619},{139,11,88},{5,11,246},{8,11,189},{9,11,355},{9,11,
+512},{10,11,124},{10,11,453},{11,11,143},{11,11,416},{11,11,859},{141,11,341},{5
+,0,258},{134,0,719},{6,0,1798},{6,0,1839},{8,0,900},{10,0,874},{10,0,886},{12,0,
+698},{12,0,732},{12,0,770},{16,0,106},{18,0,163},{18,0,170},{18,0,171},{152,0,20
+},{9,0,707},{11,0,326},{11,0,339},{12,0,423},{12,0,502},{20,0,62},{9,11,707},{11
+,11,326},{11,11,339},{12,11,423},{12,11,502},{148,11,62},{5,0,30},{7,0,495},{8,0
+,134},{9,0,788},{140,0,438},{133,11,678},{5,10,279},{6,10,235},{7,10,468},{8,10,
+446},{9,10,637},{10,10,717},{11,10,738},{140,10,514},{5,11,35},{6,11,287},{7,11,
+862},{7,11,1886},{138,11,179},{7,0,1948},{7,0,2004},{132,11,517},{5,10,17},{6,10
+,371},{137,10,528},{4,0,115},{5,0,669},{6,0,407},{8,0,311},{11,0,10},{141,0,5},{
+137,0,381},{5,0,50},{6,0,439},{7,0,780},{135,0,1040},{136,11,667},{11,11,403},{
+146,11,83},{5,0,1},{6,0,81},{138,0,520},{134,0,738},{5,0,482},{8,0,98},{9,0,172}
+,{10,0,360},{10,0,700},{10,0,822},{11,0,302},{11,0,778},{12,0,50},{12,0,127},{12
+,0,396},{13,0,62},{13,0,328},{14,0,122},{147,0,72},{9,11,157},{10,11,131},{140,
+11,72},{135,11,714},{135,11,539},{5,0,2},{6,0,512},{7,0,797},{7,0,1494},{8,0,253
+},{8,0,589},{9,0,77},{10,0,1},{10,0,129},{10,0,225},{11,0,118},{11,0,226},{11,0,
+251},{11,0,430},{11,0,701},{11,0,974},{11,0,982},{12,0,64},{12,0,260},{12,0,488}
+,{140,0,690},{5,11,394},{7,11,367},{7,11,487},{7,11,857},{7,11,1713},{8,11,246},
+{9,11,537},{10,11,165},{12,11,219},{140,11,561},{136,0,557},{5,10,779},{5,10,807
+},{6,10,1655},{134,10,1676},{4,10,196},{5,10,558},{133,10,949},{11,11,827},{12,
+11,56},{14,11,34},{143,11,148},{137,0,347},{133,0,572},{134,0,832},{4,0,12},{7,0
+,504},{7,0,522},{7,0,809},{8,0,797},{141,0,88},{4,10,752},{133,11,449},{7,11,86}
+,{8,11,103},{145,11,69},{7,11,2028},{138,11,641},{5,0,528},{6,11,1},{142,11,2},{
+134,0,861},{10,0,294},{4,10,227},{5,10,159},{5,10,409},{7,10,80},{10,10,479},{12
+,10,418},{14,10,50},{14,10,249},{142,10,295},{7,10,1470},{8,10,66},{8,10,137},{8
+,10,761},{9,10,638},{11,10,80},{11,10,212},{11,10,368},{11,10,418},{12,10,8},{13
+,10,15},{16,10,61},{17,10,59},{19,10,28},{148,10,84},{20,0,109},{135,11,1148},{6
+,11,277},{7,11,1274},{7,11,1386},{7,11,1392},{12,11,129},{146,11,87},{6,11,187},
+{7,11,39},{7,11,1203},{8,11,380},{8,11,542},{14,11,117},{149,11,28},{134,0,1187}
+,{5,0,266},{9,0,290},{9,0,364},{10,0,293},{11,0,606},{142,0,45},{6,11,297},{7,11
+,793},{139,11,938},{4,0,50},{6,0,594},{9,0,121},{10,0,49},{10,0,412},{139,0,834}
+,{136,0,748},{7,11,464},{8,11,438},{11,11,105},{11,11,363},{12,11,231},{14,11,
+386},{15,11,102},{148,11,75},{132,0,466},{13,0,399},{14,0,337},{6,10,38},{7,10,
+1220},{8,10,185},{8,10,256},{9,10,22},{9,10,331},{10,10,738},{11,10,205},{11,10,
+540},{11,10,746},{13,10,465},{142,10,194},{9,0,378},{141,0,162},{137,0,519},{4,
+10,159},{6,10,115},{7,10,252},{7,10,257},{7,10,1928},{8,10,69},{9,10,384},{10,10
+,91},{10,10,615},{12,10,375},{14,10,235},{18,10,117},{147,10,123},{5,11,604},{5,
+10,911},{136,10,278},{132,0,667},{8,0,351},{9,0,322},{4,10,151},{135,10,1567},{
+134,0,902},{133,10,990},{12,0,180},{5,10,194},{7,10,1662},{137,10,90},{4,0,869},
+{134,0,1996},{134,0,813},{133,10,425},{137,11,761},{132,0,260},{133,10,971},{5,
+11,20},{6,11,298},{7,11,659},{7,11,1366},{137,11,219},{4,0,39},{5,0,36},{7,0,
+1843},{8,0,407},{11,0,144},{140,0,523},{4,0,510},{10,0,587},{139,10,752},{7,0,29
+},{7,0,66},{7,0,1980},{10,0,487},{138,0,809},{13,0,260},{14,0,82},{18,0,63},{137
+,10,662},{5,10,72},{6,10,264},{7,10,21},{7,10,46},{7,10,2013},{8,10,215},{8,10,
+513},{10,10,266},{139,10,22},{134,0,570},{6,0,565},{7,0,1667},{4,11,439},{10,10,
+95},{11,10,603},{12,11,242},{13,10,443},{14,10,160},{143,10,4},{134,0,1464},{134
+,10,431},{9,0,372},{15,0,2},{19,0,10},{19,0,18},{5,10,874},{6,10,1677},{143,10,0
+},{132,0,787},{6,0,380},{12,0,399},{21,0,19},{7,10,939},{7,10,1172},{7,10,1671},
+{9,10,540},{10,10,696},{11,10,265},{11,10,732},{11,10,928},{11,10,937},{141,10,
+438},{137,0,200},{132,11,233},{132,0,516},{134,11,577},{132,0,844},{11,0,887},{
+14,0,365},{142,0,375},{132,11,482},{8,0,821},{140,0,44},{7,0,1655},{136,0,305},{
+5,10,682},{135,10,1887},{135,11,346},{132,10,696},{4,0,10},{7,0,917},{139,0,786}
+,{5,11,795},{6,11,1741},{8,11,417},{137,11,782},{4,0,1016},{134,0,2031},{5,0,684
+},{4,10,726},{133,10,630},{6,0,1021},{134,0,1480},{8,10,802},{136,10,838},{134,0
+,27},{134,0,395},{135,11,622},{7,11,625},{135,11,1750},{4,11,203},{135,11,1936},
+{6,10,118},{7,10,215},{7,10,1521},{140,10,11},{132,0,813},{136,0,511},{7,10,615}
+,{138,10,251},{135,10,1044},{145,0,56},{133,10,225},{6,0,342},{6,0,496},{8,0,275
+},{137,0,206},{4,0,909},{133,0,940},{132,0,891},{7,11,311},{9,11,308},{140,11,
+255},{4,10,370},{5,10,756},{135,10,1326},{4,0,687},{134,0,1596},{134,0,1342},{6,
+10,1662},{7,10,48},{8,10,771},{10,10,116},{13,10,104},{14,10,105},{14,10,184},{
+15,10,168},{19,10,92},{148,10,68},{138,10,209},{4,11,400},{5,11,267},{135,11,232
+},{151,11,12},{6,0,41},{141,0,160},{141,11,314},{134,0,1718},{136,0,778},{142,11
+,261},{134,0,1610},{133,0,115},{132,0,294},{14,0,314},{132,10,120},{132,0,983},{
+5,0,193},{140,0,178},{138,10,429},{5,10,820},{135,10,931},{6,0,994},{6,0,1051},{
+6,0,1439},{7,0,174},{133,11,732},{4,11,100},{7,11,679},{8,11,313},{138,10,199},{
+6,10,151},{6,10,1675},{7,10,383},{151,10,10},{6,0,1796},{8,0,848},{8,0,867},{8,0
+,907},{10,0,855},{140,0,703},{140,0,221},{4,0,122},{5,0,796},{5,0,952},{6,0,1660
+},{6,0,1671},{8,0,567},{9,0,687},{9,0,742},{10,0,686},{11,0,682},{11,0,909},{140
+,0,281},{5,11,362},{5,11,443},{6,11,318},{7,11,1019},{139,11,623},{5,11,463},{
+136,11,296},{11,0,583},{13,0,262},{6,10,1624},{12,10,422},{142,10,360},{5,0,179}
+,{7,0,1095},{135,0,1213},{4,10,43},{4,11,454},{5,10,344},{133,10,357},{4,0,66},{
+7,0,722},{135,0,904},{134,0,773},{7,0,352},{133,10,888},{5,11,48},{5,11,404},{6,
+11,557},{7,11,458},{8,11,597},{10,11,455},{10,11,606},{11,11,49},{11,11,548},{12
+,11,476},{13,11,18},{141,11,450},{134,11,418},{132,10,711},{5,11,442},{135,11,
+1984},{141,0,35},{137,0,152},{134,0,1197},{135,11,1093},{137,11,203},{137,10,440
+},{10,0,592},{10,0,753},{12,0,317},{12,0,355},{12,0,465},{12,0,469},{12,0,560},{
+12,0,578},{141,0,243},{133,0,564},{134,0,797},{5,10,958},{133,10,987},{5,11,55},
+{7,11,376},{140,11,161},{133,11,450},{134,0,556},{134,0,819},{11,10,276},{142,10
+,293},{7,0,544},{138,0,61},{8,0,719},{4,10,65},{5,10,479},{5,10,1004},{7,10,1913
+},{8,10,317},{9,10,302},{10,10,612},{141,10,22},{4,0,5},{5,0,498},{8,0,637},{9,0
+,521},{4,11,213},{4,10,261},{7,11,223},{7,10,510},{136,11,80},{5,0,927},{7,0,101
+},{4,10,291},{7,11,381},{7,11,806},{7,11,820},{8,11,354},{8,11,437},{8,11,787},{
+9,10,515},{9,11,657},{10,11,58},{10,11,339},{10,11,749},{11,11,914},{12,10,152},
+{12,11,162},{12,10,443},{13,11,75},{13,10,392},{14,11,106},{14,11,198},{14,11,
+320},{14,10,357},{14,11,413},{146,11,43},{6,0,1153},{7,0,1441},{136,11,747},{4,0
+,893},{5,0,780},{133,0,893},{138,11,654},{133,11,692},{133,0,238},{134,11,191},{
+4,10,130},{135,10,843},{6,0,1296},{5,10,42},{5,10,879},{7,10,245},{7,10,324},{7,
+10,1532},{11,10,463},{11,10,472},{13,10,363},{144,10,52},{134,0,1729},{6,0,1999}
+,{136,0,969},{4,10,134},{133,10,372},{4,0,60},{7,0,941},{7,0,1800},{8,0,314},{9,
+0,700},{139,0,487},{134,0,1144},{6,11,162},{7,11,1960},{136,11,831},{132,11,706}
+,{135,0,1147},{138,11,426},{138,11,89},{7,0,1853},{138,0,437},{136,0,419},{135,
+10,1634},{133,0,828},{5,0,806},{7,0,176},{7,0,178},{7,0,1240},{7,0,1976},{132,10
+,644},{135,11,1877},{5,11,420},{135,11,1449},{4,0,51},{5,0,39},{6,0,4},{7,0,591}
+,{7,0,849},{7,0,951},{7,0,1613},{7,0,1760},{7,0,1988},{9,0,434},{10,0,754},{11,0
+,25},{139,0,37},{10,11,57},{138,11,277},{135,10,540},{132,11,204},{135,0,159},{
+139,11,231},{133,0,902},{7,0,928},{7,11,366},{9,11,287},{12,11,199},{12,11,556},
+{140,11,577},{6,10,623},{136,10,789},{4,10,908},{5,10,359},{5,10,508},{6,10,1723
+},{7,10,343},{7,10,1996},{135,10,2026},{134,0,270},{4,10,341},{135,10,480},{5,11
+,356},{135,11,224},{11,11,588},{11,11,864},{11,11,968},{143,11,160},{132,0,556},
+{137,0,801},{132,0,416},{142,0,372},{5,0,152},{5,0,197},{7,0,340},{7,0,867},{10,
+0,548},{10,0,581},{11,0,6},{12,0,3},{12,0,19},{14,0,110},{142,0,289},{139,0,369}
+,{7,11,630},{9,11,567},{11,11,150},{11,11,444},{141,11,119},{134,11,539},{7,10,
+1995},{8,10,299},{11,10,890},{140,10,674},{7,0,34},{7,0,190},{8,0,28},{8,0,141},
+{8,0,444},{8,0,811},{9,0,468},{11,0,334},{12,0,24},{12,0,386},{140,0,576},{133,0
+,757},{7,0,1553},{136,0,898},{133,0,721},{136,0,1012},{4,0,789},{5,0,647},{135,0
+,1102},{132,0,898},{10,0,183},{4,10,238},{5,10,503},{6,10,179},{7,10,2003},{8,10
+,381},{8,10,473},{9,10,149},{10,10,788},{15,10,45},{15,10,86},{20,10,110},{150,
+10,57},{9,0,136},{19,0,107},{4,10,121},{5,10,156},{5,10,349},{10,10,605},{142,10
+,342},{4,11,235},{135,11,255},{4,11,194},{5,11,584},{6,11,384},{7,11,583},{10,11
+,761},{11,11,760},{139,11,851},{6,10,80},{6,10,1694},{7,10,173},{7,10,1974},{9,
+10,547},{10,10,730},{14,10,18},{150,10,39},{4,10,923},{134,10,1711},{5,0,277},{
+141,0,247},{132,0,435},{133,11,562},{134,0,1311},{5,11,191},{137,11,271},{132,10
+,595},{7,11,1537},{14,11,96},{143,11,73},{5,0,437},{7,0,502},{7,0,519},{7,0,1122
+},{7,0,1751},{14,0,211},{6,10,459},{7,10,1753},{7,10,1805},{8,10,658},{9,10,1},{
+11,10,959},{141,10,446},{6,0,814},{4,11,470},{5,11,473},{6,11,153},{7,11,1503},{
+7,11,1923},{10,11,701},{11,11,132},{11,11,168},{11,11,227},{11,11,320},{11,11,
+436},{11,11,525},{11,11,855},{12,11,41},{12,11,286},{13,11,103},{13,11,284},{14,
+11,255},{14,11,262},{15,11,117},{143,11,127},{5,0,265},{6,0,212},{135,0,28},{138
+,0,750},{133,11,327},{6,11,552},{7,11,1754},{137,11,604},{134,0,2012},{132,0,702
+},{5,11,80},{6,11,405},{7,11,403},{7,11,1502},{7,11,1626},{8,11,456},{9,11,487},
+{9,11,853},{9,11,889},{10,11,309},{11,11,721},{11,11,994},{12,11,430},{141,11,
+165},{5,0,808},{135,0,2045},{5,0,166},{8,0,739},{140,0,511},{134,10,490},{4,11,
+453},{5,11,887},{6,11,535},{8,11,6},{136,11,543},{4,0,119},{5,0,170},{5,0,447},{
+7,0,1708},{7,0,1889},{9,0,357},{9,0,719},{12,0,486},{140,0,596},{137,0,500},{7,
+10,250},{136,10,507},{132,10,158},{6,0,809},{134,0,1500},{9,0,327},{11,0,350},{
+11,0,831},{13,0,352},{4,10,140},{7,10,362},{8,10,209},{9,10,10},{9,10,503},{9,10
+,614},{10,10,689},{11,10,327},{11,10,725},{12,10,252},{12,10,583},{13,10,192},{
+14,10,269},{14,10,356},{148,10,50},{135,11,741},{4,0,450},{7,0,1158},{19,10,1},{
+19,10,26},{150,10,9},{6,0,597},{135,0,1318},{134,0,1602},{6,10,228},{7,10,1341},
+{9,10,408},{138,10,343},{7,0,1375},{7,0,1466},{138,0,331},{132,0,754},{132,10,
+557},{5,11,101},{6,11,88},{6,11,543},{7,11,1677},{9,11,100},{10,11,677},{14,11,
+169},{14,11,302},{14,11,313},{15,11,48},{143,11,84},{134,0,1368},{4,11,310},{9,
+11,795},{10,11,733},{11,11,451},{12,11,249},{14,11,115},{14,11,286},{143,11,100}
+,{132,10,548},{10,0,557},{7,10,197},{8,10,142},{8,10,325},{9,10,150},{9,10,596},
+{10,10,353},{11,10,74},{11,10,315},{12,10,662},{12,10,681},{14,10,423},{143,10,
+141},{133,11,587},{5,0,850},{136,0,799},{10,0,908},{12,0,701},{12,0,757},{142,0,
+466},{4,0,62},{5,0,275},{18,0,19},{6,10,399},{6,10,579},{7,10,692},{7,10,846},{7
+,10,1015},{7,10,1799},{8,10,403},{9,10,394},{10,10,133},{12,10,4},{12,10,297},{
+12,10,452},{16,10,81},{18,10,25},{21,10,14},{22,10,12},{151,10,18},{12,0,459},{7
+,10,1546},{11,10,299},{142,10,407},{132,10,177},{132,11,498},{7,11,217},{8,11,
+140},{138,11,610},{5,10,411},{135,10,653},{134,0,1802},{7,10,439},{10,10,727},{
+11,10,260},{139,10,684},{133,11,905},{11,11,580},{142,11,201},{134,0,1397},{5,10
+,208},{7,10,753},{135,10,1528},{7,0,238},{7,0,2033},{8,0,120},{8,0,188},{8,0,659
+},{9,0,598},{10,0,466},{12,0,342},{12,0,588},{13,0,503},{14,0,246},{143,0,92},{
+135,11,1041},{4,11,456},{7,11,105},{7,11,358},{7,11,1637},{8,11,643},{139,11,483
+},{6,0,1318},{134,0,1324},{4,0,201},{7,0,1744},{8,0,602},{11,0,247},{11,0,826},{
+17,0,65},{133,10,242},{8,0,164},{146,0,62},{133,10,953},{139,10,802},{133,0,615}
+,{7,11,1566},{8,11,269},{9,11,212},{9,11,718},{14,11,15},{14,11,132},{142,11,227
+},{133,10,290},{132,10,380},{5,10,52},{7,10,277},{9,10,368},{139,10,791},{135,0,
+1243},{133,11,539},{11,11,919},{141,11,409},{136,0,968},{133,11,470},{134,0,882}
+,{132,0,907},{5,0,100},{10,0,329},{12,0,416},{149,0,29},{10,10,138},{139,10,476}
+,{5,10,725},{5,10,727},{6,11,91},{7,11,435},{135,10,1811},{4,11,16},{5,11,316},{
+5,11,842},{6,11,370},{6,11,1778},{8,11,166},{11,11,812},{12,11,206},{12,11,351},
+{14,11,418},{16,11,15},{16,11,34},{18,11,3},{19,11,3},{19,11,7},{20,11,4},{149,
+11,21},{132,0,176},{5,0,636},{5,0,998},{7,0,9},{7,0,1508},{8,0,26},{9,0,317},{9,
+0,358},{10,0,210},{10,0,292},{10,0,533},{11,0,555},{12,0,526},{12,0,607},{13,0,
+263},{13,0,459},{142,0,271},{6,0,256},{8,0,265},{4,10,38},{7,10,307},{7,10,999},
+{7,10,1481},{7,10,1732},{7,10,1738},{9,10,414},{11,10,316},{12,10,52},{13,10,420
+},{147,10,100},{135,10,1296},{4,11,611},{133,11,606},{4,0,643},{142,11,21},{133,
+11,715},{133,10,723},{6,0,610},{135,11,597},{10,0,127},{141,0,27},{6,0,1995},{6,
+0,2001},{8,0,119},{136,0,973},{4,11,149},{138,11,368},{12,0,522},{4,11,154},{5,
+10,109},{6,10,1784},{7,11,1134},{7,10,1895},{8,11,105},{12,10,296},{140,10,302},
+{4,11,31},{6,11,429},{7,11,962},{9,11,458},{139,11,691},{10,0,553},{11,0,876},{
+13,0,193},{13,0,423},{14,0,166},{19,0,84},{4,11,312},{5,10,216},{7,10,1879},{9,
+10,141},{9,10,270},{9,10,679},{10,10,159},{11,10,197},{12,10,538},{12,10,559},{
+14,10,144},{14,10,167},{143,10,67},{134,0,1582},{7,0,1578},{135,11,1578},{137,10
+,81},{132,11,236},{134,10,391},{134,0,795},{7,10,322},{136,10,249},{5,11,836},{5
+,11,857},{6,11,1680},{7,11,59},{147,11,53},{135,0,432},{10,11,68},{139,11,494},{
+4,11,81},{139,11,867},{7,0,126},{136,0,84},{142,11,280},{5,11,282},{8,11,650},{9
+,11,295},{9,11,907},{138,11,443},{136,0,790},{5,10,632},{138,10,526},{6,0,64},{
+12,0,377},{13,0,309},{14,0,141},{14,0,429},{14,11,141},{142,11,429},{134,0,1529}
+,{6,0,321},{7,0,1857},{9,0,530},{19,0,99},{7,10,948},{7,10,1042},{8,10,235},{8,
+10,461},{9,10,453},{10,10,354},{145,10,77},{7,0,1104},{11,0,269},{11,0,539},{11,
+0,627},{11,0,706},{11,0,975},{12,0,248},{12,0,434},{12,0,600},{12,0,622},{13,0,
+297},{13,0,485},{14,0,69},{14,0,409},{143,0,108},{4,10,362},{7,10,52},{7,10,303}
+,{10,11,70},{12,11,26},{14,11,17},{14,11,178},{15,11,34},{149,11,12},{11,0,977},
+{141,0,507},{9,0,34},{139,0,484},{5,10,196},{6,10,486},{7,10,212},{8,10,309},{
+136,10,346},{6,0,1700},{7,0,26},{7,0,293},{7,0,382},{7,0,1026},{7,0,1087},{7,0,
+2027},{8,0,24},{8,0,114},{8,0,252},{8,0,727},{8,0,729},{9,0,30},{9,0,199},{9,0,
+231},{9,0,251},{9,0,334},{9,0,361},{9,0,712},{10,0,55},{10,0,60},{10,0,232},{10,
+0,332},{10,0,384},{10,0,396},{10,0,504},{10,0,542},{10,0,652},{11,0,20},{11,0,48
+},{11,0,207},{11,0,291},{11,0,298},{11,0,342},{11,0,365},{11,0,394},{11,0,620},{
+11,0,705},{11,0,1017},{12,0,123},{12,0,340},{12,0,406},{12,0,643},{13,0,61},{13,
+0,269},{13,0,311},{13,0,319},{13,0,486},{14,0,234},{15,0,62},{15,0,85},{16,0,71}
+,{18,0,119},{20,0,105},{135,10,1912},{4,11,71},{5,11,376},{7,11,119},{138,11,665
+},{10,0,918},{10,0,926},{4,10,686},{136,11,55},{138,10,625},{136,10,706},{132,11
+,479},{4,10,30},{133,10,43},{6,0,379},{7,0,270},{8,0,176},{8,0,183},{9,0,432},{9
+,0,661},{12,0,247},{12,0,617},{18,0,125},{7,11,607},{8,11,99},{152,11,4},{5,0,
+792},{133,0,900},{4,11,612},{133,11,561},{4,11,41},{4,10,220},{5,11,74},{7,10,
+1535},{7,11,1627},{11,11,871},{140,11,619},{135,0,1920},{7,11,94},{11,11,329},{
+11,11,965},{12,11,241},{14,11,354},{15,11,22},{148,11,63},{9,11,209},{137,11,300
+},{134,0,771},{135,0,1979},{4,0,901},{133,0,776},{142,0,254},{133,11,98},{9,11,
+16},{141,11,386},{133,11,984},{4,11,182},{6,11,205},{135,11,220},{7,10,1725},{7,
+10,1774},{138,10,393},{5,10,263},{134,10,414},{4,11,42},{9,11,205},{9,11,786},{
+138,11,659},{14,0,140},{148,0,41},{8,0,440},{10,0,359},{6,10,178},{6,11,289},{6,
+10,1750},{7,11,1670},{9,10,690},{10,10,155},{10,10,373},{11,10,698},{12,11,57},{
+13,10,155},{20,10,93},{151,11,4},{4,0,37},{5,0,334},{7,0,1253},{151,11,25},{4,0,
+508},{4,11,635},{5,10,97},{137,10,393},{139,11,533},{4,0,640},{133,0,513},{134,
+10,1639},{132,11,371},{4,11,272},{7,11,836},{7,11,1651},{145,11,89},{5,11,825},{
+6,11,444},{6,11,1640},{136,11,308},{4,10,191},{7,10,934},{8,10,647},{145,10,97},
+{12,0,246},{15,0,162},{19,0,64},{20,0,8},{20,0,95},{22,0,24},{152,0,17},{4,0,533
+},{5,10,165},{9,10,346},{138,10,655},{5,11,737},{139,10,885},{133,10,877},{8,10,
+128},{139,10,179},{137,11,307},{140,0,752},{133,0,920},{135,0,1048},{5,0,153},{6
+,0,580},{6,10,1663},{7,10,132},{7,10,1154},{7,10,1415},{7,10,1507},{12,10,493},{
+15,10,105},{151,10,15},{5,10,459},{7,10,1073},{8,10,241},{136,10,334},{138,0,391
+},{135,0,1952},{133,11,525},{8,11,641},{11,11,388},{140,11,580},{142,0,126},{134
+,0,640},{132,0,483},{7,0,1616},{9,0,69},{6,10,324},{6,10,520},{7,10,338},{7,10,
+1729},{8,10,228},{139,10,750},{5,11,493},{134,11,528},{135,0,734},{4,11,174},{
+135,11,911},{138,0,480},{9,0,495},{146,0,104},{135,10,705},{9,0,472},{4,10,73},{
+6,10,612},{7,10,927},{7,10,1330},{7,10,1822},{8,10,217},{9,10,765},{9,10,766},{
+10,10,408},{11,10,51},{11,10,793},{12,10,266},{15,10,158},{20,10,89},{150,10,32}
+,{7,11,548},{137,11,58},{4,11,32},{5,11,215},{6,11,269},{7,11,1782},{7,11,1892},
+{10,11,16},{11,11,822},{11,11,954},{141,11,481},{132,0,874},{9,0,229},{5,10,389}
+,{136,10,636},{7,11,1749},{136,11,477},{134,0,948},{5,11,308},{135,11,1088},{4,0
+,748},{139,0,1009},{136,10,21},{6,0,555},{135,0,485},{5,11,126},{8,11,297},{9,11
+,366},{9,11,445},{12,11,53},{12,11,374},{141,11,492},{7,11,1551},{139,11,361},{
+136,0,193},{136,0,472},{8,0,653},{13,0,93},{147,0,14},{132,0,984},{132,11,175},{
+5,0,172},{6,0,1971},{132,11,685},{149,11,8},{133,11,797},{13,0,83},{5,10,189},{7
+,10,442},{7,10,443},{8,10,281},{12,10,174},{141,10,261},{134,0,1568},{133,11,565
+},{139,0,384},{133,0,260},{7,0,758},{7,0,880},{7,0,1359},{9,0,164},{9,0,167},{10
+,0,156},{10,0,588},{12,0,101},{14,0,48},{15,0,70},{6,10,2},{7,10,1262},{7,10,
+1737},{8,10,22},{8,10,270},{8,10,612},{9,10,312},{9,10,436},{10,10,311},{10,10,
+623},{11,10,72},{11,10,330},{11,10,455},{12,10,321},{12,10,504},{12,10,530},{12,
+10,543},{13,10,17},{13,10,156},{13,10,334},{17,10,60},{148,10,64},{4,11,252},{7,
+11,1068},{10,11,434},{11,11,228},{11,11,426},{13,11,231},{18,11,106},{148,11,87}
+,{7,10,354},{10,10,410},{139,10,815},{6,0,367},{7,10,670},{7,10,1327},{8,10,411}
+,{8,10,435},{9,10,653},{9,10,740},{10,10,385},{11,10,222},{11,10,324},{11,10,829
+},{140,10,611},{7,0,1174},{6,10,166},{135,10,374},{146,0,121},{132,0,828},{5,11,
+231},{138,11,509},{7,11,601},{9,11,277},{9,11,674},{10,11,178},{10,11,257},{10,
+11,418},{11,11,531},{11,11,544},{11,11,585},{12,11,113},{12,11,475},{13,11,99},{
+142,11,428},{134,0,1541},{135,11,1779},{5,0,343},{134,10,398},{135,10,50},{135,
+11,1683},{4,0,440},{7,0,57},{8,0,167},{8,0,375},{9,0,82},{9,0,561},{9,0,744},{10
+,0,620},{137,11,744},{134,0,926},{6,10,517},{7,10,1159},{10,10,621},{139,10,192}
+,{137,0,827},{8,0,194},{136,0,756},{10,10,223},{139,10,645},{7,10,64},{136,10,
+245},{4,11,399},{5,11,119},{5,11,494},{7,11,751},{137,11,556},{132,0,808},{135,0
+,22},{7,10,1763},{140,10,310},{5,0,639},{7,0,1249},{11,0,896},{134,11,584},{134,
+0,1614},{135,0,860},{135,11,1121},{5,10,129},{6,10,61},{135,10,947},{4,0,102},{7
+,0,815},{7,0,1699},{139,0,964},{13,10,505},{141,10,506},{139,10,1000},{132,11,
+679},{132,0,899},{132,0,569},{5,11,694},{137,11,714},{136,0,795},{6,0,2045},{139
+,11,7},{6,0,52},{9,0,104},{9,0,559},{12,0,308},{147,0,87},{4,0,301},{132,0,604},
+{133,10,637},{136,0,779},{5,11,143},{5,11,769},{6,11,1760},{7,11,682},{7,11,1992
+},{136,11,736},{137,10,590},{147,0,32},{137,11,527},{5,10,280},{135,10,1226},{
+134,0,494},{6,0,677},{6,0,682},{134,0,1044},{133,10,281},{135,10,1064},{7,0,508}
+,{133,11,860},{6,11,422},{7,11,0},{7,11,1544},{9,11,577},{11,11,990},{12,11,141}
+,{12,11,453},{13,11,47},{141,11,266},{134,0,1014},{5,11,515},{137,11,131},{134,0
+,957},{132,11,646},{6,0,310},{7,0,1849},{8,0,72},{8,0,272},{8,0,431},{9,0,12},{9
+,0,376},{10,0,563},{10,0,630},{10,0,796},{10,0,810},{11,0,367},{11,0,599},{11,0,
+686},{140,0,672},{7,0,570},{4,11,396},{7,10,120},{7,11,728},{8,10,489},{9,11,117
+},{9,10,319},{10,10,820},{11,10,1004},{12,10,379},{12,10,679},{13,10,117},{13,11
+,202},{13,10,412},{14,10,25},{15,10,52},{15,10,161},{16,10,47},{20,11,51},{149,
+10,2},{6,11,121},{6,11,124},{6,11,357},{7,11,1138},{7,11,1295},{8,11,162},{139,
+11,655},{8,0,449},{4,10,937},{5,10,801},{136,11,449},{139,11,958},{6,0,181},{7,0
+,537},{8,0,64},{9,0,127},{10,0,496},{12,0,510},{141,0,384},{138,11,253},{4,0,244
+},{135,0,233},{133,11,237},{132,10,365},{6,0,1650},{10,0,702},{139,0,245},{5,10,
+7},{139,10,774},{13,0,463},{20,0,49},{13,11,463},{148,11,49},{4,10,734},{5,10,
+662},{134,10,430},{4,10,746},{135,10,1090},{5,10,360},{136,10,237},{137,0,338},{
+143,11,10},{7,11,571},{138,11,366},{134,0,1279},{9,11,513},{10,11,22},{10,11,39}
+,{12,11,122},{140,11,187},{133,0,896},{146,0,178},{134,0,695},{137,0,808},{134,
+11,587},{7,11,107},{7,11,838},{8,11,550},{138,11,401},{7,0,1117},{136,0,539},{4,
+10,277},{5,10,608},{6,10,493},{7,10,457},{140,10,384},{133,11,768},{12,0,257},{7
+,10,27},{135,10,316},{140,0,1003},{4,0,207},{5,0,586},{5,0,676},{6,0,448},{8,0,
+244},{11,0,1},{13,0,3},{16,0,54},{17,0,4},{18,0,13},{133,10,552},{4,10,401},{137
+,10,264},{5,0,516},{7,0,1883},{135,11,1883},{12,0,960},{132,11,894},{5,0,4},{5,0
+,810},{6,0,13},{6,0,538},{6,0,1690},{6,0,1726},{7,0,499},{7,0,1819},{8,0,148},{8
+,0,696},{8,0,791},{12,0,125},{143,0,9},{135,0,1268},{11,0,30},{14,0,315},{9,10,
+543},{10,10,524},{12,10,524},{16,10,18},{20,10,26},{148,10,65},{6,0,748},{4,10,
+205},{5,10,623},{7,10,104},{136,10,519},{11,0,542},{139,0,852},{140,0,6},{132,0,
+848},{7,0,1385},{11,0,582},{11,0,650},{11,0,901},{11,0,949},{12,0,232},{12,0,236
+},{13,0,413},{13,0,501},{18,0,116},{7,10,579},{9,10,41},{9,10,244},{9,10,669},{
+10,10,5},{11,10,861},{11,10,951},{139,10,980},{4,0,945},{6,0,1811},{6,0,1845},{6
+,0,1853},{6,0,1858},{8,0,862},{12,0,782},{12,0,788},{18,0,160},{148,0,117},{132,
+10,717},{4,0,925},{5,0,803},{8,0,698},{138,0,828},{134,0,1416},{132,0,610},{139,
+0,992},{6,0,878},{134,0,1477},{135,0,1847},{138,11,531},{137,11,539},{134,11,272
+},{133,0,383},{134,0,1404},{132,10,489},{4,11,9},{5,11,128},{7,11,368},{11,11,
+480},{148,11,3},{136,0,986},{9,0,660},{138,0,347},{135,10,892},{136,11,682},{7,0
+,572},{9,0,592},{11,0,680},{12,0,356},{140,0,550},{7,0,1411},{138,11,527},{4,11,
+2},{7,11,545},{135,11,894},{137,10,473},{11,0,64},{7,11,481},{7,10,819},{9,10,26
+},{9,10,392},{9,11,792},{10,10,152},{10,10,226},{12,10,276},{12,10,426},{12,10,
+589},{13,10,460},{15,10,97},{19,10,48},{148,10,104},{135,10,51},{136,11,445},{
+136,11,646},{135,0,606},{132,10,674},{6,0,1829},{134,0,1830},{132,10,770},{5,10,
+79},{7,10,1027},{7,10,1477},{139,10,52},{5,11,530},{142,11,113},{134,10,1666},{7
+,0,748},{139,0,700},{134,10,195},{133,10,789},{9,0,87},{10,0,365},{4,10,251},{4,
+10,688},{7,10,513},{135,10,1284},{136,11,111},{133,0,127},{6,0,198},{140,0,83},{
+133,11,556},{133,10,889},{4,10,160},{5,10,330},{7,10,1434},{136,10,174},{5,0,276
+},{6,0,55},{7,0,1369},{138,0,864},{8,11,16},{140,11,568},{6,0,1752},{136,0,726},
+{135,0,1066},{133,0,764},{6,11,186},{137,11,426},{11,0,683},{139,11,683},{6,0,
+309},{7,0,331},{138,0,550},{133,10,374},{6,0,1212},{6,0,1852},{7,0,1062},{8,0,
+874},{8,0,882},{138,0,936},{132,11,585},{134,0,1364},{7,0,986},{133,10,731},{6,0
+,723},{6,0,1408},{138,0,381},{135,0,1573},{134,0,1025},{4,10,626},{5,10,642},{6,
+10,425},{10,10,202},{139,10,141},{4,11,93},{5,11,252},{6,11,229},{7,11,291},{9,
+11,550},{139,11,644},{137,11,749},{137,11,162},{132,11,381},{135,0,1559},{6,0,
+194},{7,0,133},{10,0,493},{10,0,570},{139,0,664},{5,0,24},{5,0,569},{6,0,3},{6,0
+,119},{6,0,143},{6,0,440},{7,0,295},{7,0,599},{7,0,1686},{7,0,1854},{8,0,424},{9
+,0,43},{9,0,584},{9,0,760},{10,0,148},{10,0,328},{11,0,159},{11,0,253},{11,0,506
+},{12,0,487},{140,0,531},{6,0,661},{134,0,1517},{136,10,835},{151,10,17},{5,0,14
+},{5,0,892},{6,0,283},{7,0,234},{136,0,537},{139,0,541},{4,0,126},{8,0,635},{147
+,0,34},{4,0,316},{4,0,495},{135,0,1561},{4,11,187},{5,11,184},{5,11,690},{7,11,
+1869},{138,11,756},{139,11,783},{4,0,998},{137,0,861},{136,0,1009},{139,11,292},
+{5,11,21},{6,11,77},{6,11,157},{7,11,974},{7,11,1301},{7,11,1339},{7,11,1490},{7
+,11,1873},{137,11,628},{7,11,1283},{9,11,227},{9,11,499},{10,11,341},{11,11,325}
+,{11,11,408},{14,11,180},{15,11,144},{18,11,47},{147,11,49},{4,0,64},{5,0,352},{
+5,0,720},{6,0,368},{139,0,359},{5,10,384},{8,10,455},{140,10,48},{5,10,264},{134
+,10,184},{7,0,1577},{10,0,304},{10,0,549},{12,0,365},{13,0,220},{13,0,240},{142,
+0,33},{134,0,1107},{134,0,929},{135,0,1142},{6,0,175},{137,0,289},{5,0,432},{133
+,0,913},{6,0,279},{7,0,219},{5,10,633},{135,10,1323},{7,0,785},{7,10,359},{8,10,
+243},{140,10,175},{139,0,595},{132,10,105},{8,11,398},{9,11,681},{139,11,632},{
+140,0,80},{5,0,931},{134,0,1698},{142,11,241},{134,11,20},{134,0,1323},{11,0,526
+},{11,0,939},{141,0,290},{5,0,774},{6,0,780},{6,0,1637},{6,0,1686},{6,0,1751},{8
+,0,559},{141,0,109},{141,0,127},{7,0,1167},{11,0,934},{13,0,391},{17,0,76},{135,
+11,709},{135,0,963},{6,0,260},{135,0,1484},{134,0,573},{4,10,758},{139,11,941},{
+135,10,1649},{145,11,36},{4,0,292},{137,0,580},{4,0,736},{5,0,871},{6,0,1689},{
+135,0,1944},{7,11,945},{11,11,713},{139,11,744},{134,0,1164},{135,11,937},{6,0,
+1922},{9,0,982},{15,0,173},{15,0,178},{15,0,200},{18,0,189},{18,0,207},{21,0,47}
+,{135,11,1652},{7,0,1695},{139,10,128},{6,0,63},{135,0,920},{133,0,793},{143,11,
+134},{133,10,918},{5,0,67},{6,0,62},{6,0,374},{135,0,1391},{9,0,790},{12,0,47},{
+4,11,579},{5,11,226},{5,11,323},{135,11,960},{10,11,784},{141,11,191},{4,0,391},
+{135,0,1169},{137,0,443},{13,11,232},{146,11,35},{132,10,340},{132,0,271},{137,
+11,313},{5,11,973},{137,11,659},{134,0,1140},{6,11,135},{135,11,1176},{4,0,253},
+{5,0,544},{7,0,300},{137,0,340},{7,0,897},{5,10,985},{7,10,509},{145,10,96},{138
+,11,735},{135,10,1919},{138,0,890},{5,0,818},{134,0,1122},{5,0,53},{5,0,541},{6,
+0,94},{6,0,499},{7,0,230},{139,0,321},{4,0,920},{5,0,25},{5,0,790},{6,0,457},{7,
+0,853},{8,0,788},{142,11,31},{132,10,247},{135,11,314},{132,0,468},{7,0,243},{6,
+10,337},{7,10,494},{8,10,27},{8,10,599},{138,10,153},{4,10,184},{5,10,390},{7,10
+,618},{7,10,1456},{139,10,710},{134,0,870},{134,0,1238},{134,0,1765},{10,0,853},
+{10,0,943},{14,0,437},{14,0,439},{14,0,443},{14,0,446},{14,0,452},{14,0,469},{14
+,0,471},{14,0,473},{16,0,93},{16,0,102},{16,0,110},{148,0,121},{4,0,605},{7,0,
+518},{7,0,1282},{7,0,1918},{10,0,180},{139,0,218},{133,0,822},{4,0,634},{11,0,
+916},{142,0,419},{6,11,281},{7,11,6},{8,11,282},{8,11,480},{8,11,499},{9,11,198}
+,{10,11,143},{10,11,169},{10,11,211},{10,11,417},{10,11,574},{11,11,147},{11,11,
+395},{12,11,75},{12,11,407},{12,11,608},{13,11,500},{142,11,251},{134,0,898},{6,
+0,36},{7,0,658},{8,0,454},{150,11,48},{133,11,674},{135,11,1776},{4,11,419},{10,
+10,227},{11,10,497},{11,10,709},{140,10,415},{6,10,360},{7,10,1664},{136,10,478}
+,{137,0,806},{12,11,508},{14,11,102},{14,11,226},{144,11,57},{135,11,1123},{4,11
+,138},{7,11,1012},{7,11,1280},{137,11,76},{5,11,29},{140,11,638},{136,10,699},{
+134,0,1326},{132,0,104},{135,11,735},{132,10,739},{134,0,1331},{7,0,260},{135,11
+,260},{135,11,1063},{7,0,45},{9,0,542},{9,0,566},{10,0,728},{137,10,869},{4,10,
+67},{5,10,422},{7,10,1037},{7,10,1289},{7,10,1555},{9,10,741},{145,10,108},{139,
+0,263},{134,0,1516},{14,0,146},{15,0,42},{16,0,23},{17,0,86},{146,0,17},{138,0,
+468},{136,0,1005},{4,11,17},{5,11,23},{7,11,995},{11,11,383},{11,11,437},{12,11,
+460},{140,11,532},{7,0,87},{142,0,288},{138,10,96},{135,11,626},{144,10,26},{7,0
+,988},{7,0,1939},{9,0,64},{9,0,502},{12,0,22},{12,0,34},{13,0,12},{13,0,234},{
+147,0,77},{13,0,133},{8,10,203},{11,10,823},{11,10,846},{12,10,482},{13,10,277},
+{13,10,302},{13,10,464},{14,10,205},{142,10,221},{4,10,449},{133,10,718},{135,0,
+141},{6,0,1842},{136,0,872},{8,11,70},{12,11,171},{141,11,272},{4,10,355},{6,10,
+311},{9,10,256},{138,10,404},{132,0,619},{137,0,261},{10,11,233},{10,10,758},{
+139,11,76},{5,0,246},{8,0,189},{9,0,355},{9,0,512},{10,0,124},{10,0,453},{11,0,
+143},{11,0,416},{11,0,859},{141,0,341},{134,11,442},{133,10,827},{5,10,64},{140,
+10,581},{4,10,442},{7,10,1047},{7,10,1352},{135,10,1643},{134,11,1709},{5,0,678}
+,{6,0,305},{7,0,775},{7,0,1065},{133,10,977},{11,11,69},{12,11,105},{12,11,117},
+{13,11,213},{14,11,13},{14,11,62},{14,11,177},{14,11,421},{15,11,19},{146,11,141
+},{137,11,309},{5,0,35},{7,0,862},{7,0,1886},{138,0,179},{136,0,285},{132,0,517}
+,{7,11,976},{9,11,146},{10,11,206},{10,11,596},{13,11,218},{142,11,153},{132,10,
+254},{6,0,214},{12,0,540},{4,10,275},{7,10,1219},{140,10,376},{8,0,667},{11,0,
+403},{146,0,83},{12,0,74},{10,11,648},{11,11,671},{143,11,46},{135,0,125},{134,
+10,1753},{133,0,761},{6,0,912},{4,11,518},{6,10,369},{6,10,502},{7,10,1036},{7,
+11,1136},{8,10,348},{9,10,452},{10,10,26},{11,10,224},{11,10,387},{11,10,772},{
+12,10,95},{12,10,629},{13,10,195},{13,10,207},{13,10,241},{14,10,260},{14,10,270
+},{143,10,140},{10,0,131},{140,0,72},{132,10,269},{5,10,480},{7,10,532},{7,10,
+1197},{7,10,1358},{8,10,291},{11,10,349},{142,10,396},{8,11,689},{137,11,863},{8
+,0,333},{138,0,182},{4,11,18},{7,11,145},{7,11,444},{7,11,1278},{8,11,49},{8,11,
+400},{9,11,71},{9,11,250},{10,11,459},{12,11,160},{144,11,24},{14,11,35},{142,11
+,191},{135,11,1864},{135,0,1338},{148,10,15},{14,0,94},{15,0,65},{16,0,4},{16,0,
+77},{16,0,80},{145,0,5},{12,11,82},{143,11,36},{133,11,1010},{133,0,449},{133,0,
+646},{7,0,86},{8,0,103},{135,10,657},{7,0,2028},{138,0,641},{136,10,533},{134,0,
+1},{139,11,970},{5,11,87},{7,11,313},{7,11,1103},{10,11,112},{10,11,582},{11,11,
+389},{11,11,813},{12,11,385},{13,11,286},{14,11,124},{146,11,108},{6,0,869},{132
+,11,267},{6,0,277},{7,0,1274},{7,0,1386},{146,0,87},{6,0,187},{7,0,39},{7,0,1203
+},{8,0,380},{14,0,117},{149,0,28},{4,10,211},{4,10,332},{5,10,335},{6,10,238},{7
+,10,269},{7,10,811},{7,10,1797},{8,10,836},{9,10,507},{141,10,242},{4,0,785},{5,
+0,368},{6,0,297},{7,0,793},{139,0,938},{7,0,464},{8,0,558},{11,0,105},{12,0,231}
+,{14,0,386},{15,0,102},{148,0,75},{133,10,1009},{8,0,877},{140,0,731},{139,11,
+289},{10,11,249},{139,11,209},{132,11,561},{134,0,1608},{132,11,760},{134,0,1429
+},{9,11,154},{140,11,485},{5,10,228},{6,10,203},{7,10,156},{8,10,347},{137,10,
+265},{7,0,1010},{11,0,733},{11,0,759},{13,0,34},{14,0,427},{146,0,45},{7,10,1131
+},{135,10,1468},{136,11,255},{7,0,1656},{9,0,369},{10,0,338},{10,0,490},{11,0,
+154},{11,0,545},{11,0,775},{13,0,77},{141,0,274},{133,11,621},{134,0,1038},{4,11
+,368},{135,11,641},{6,0,2010},{8,0,979},{8,0,985},{10,0,951},{138,0,1011},{134,0
+,1005},{19,0,121},{5,10,291},{5,10,318},{7,10,765},{9,10,389},{140,10,548},{5,0,
+20},{6,0,298},{7,0,659},{137,0,219},{7,0,1440},{11,0,854},{11,0,872},{11,0,921},
+{12,0,551},{13,0,472},{142,0,367},{5,0,490},{6,0,615},{6,0,620},{135,0,683},{6,0
+,1070},{134,0,1597},{139,0,522},{132,0,439},{136,0,669},{6,0,766},{6,0,1143},{6,
+0,1245},{10,10,525},{139,10,82},{9,11,92},{147,11,91},{6,0,668},{134,0,1218},{6,
+11,525},{9,11,876},{140,11,284},{132,0,233},{136,0,547},{132,10,422},{5,10,355},
+{145,10,0},{6,11,300},{135,11,1515},{4,0,482},{137,10,905},{4,0,886},{7,0,346},{
+133,11,594},{133,10,865},{5,10,914},{134,10,1625},{135,0,334},{5,0,795},{6,0,
+1741},{133,10,234},{135,10,1383},{6,11,1641},{136,11,820},{135,0,371},{7,11,1313
+},{138,11,660},{135,10,1312},{135,0,622},{7,0,625},{135,0,1750},{135,0,339},{4,0
+,203},{135,0,1936},{15,0,29},{16,0,38},{15,11,29},{144,11,38},{5,0,338},{135,0,
+1256},{135,10,1493},{10,0,130},{6,10,421},{7,10,61},{7,10,1540},{138,10,501},{6,
+11,389},{7,11,149},{9,11,142},{138,11,94},{137,10,341},{11,0,678},{12,0,307},{
+142,10,98},{6,11,8},{7,11,1881},{136,11,91},{135,0,2044},{6,0,770},{6,0,802},{6,
+0,812},{7,0,311},{9,0,308},{12,0,255},{6,10,102},{7,10,72},{15,10,142},{147,10,
+67},{151,10,30},{135,10,823},{135,0,1266},{135,11,1746},{135,10,1870},{4,0,400},
+{5,0,267},{135,0,232},{7,11,24},{11,11,542},{139,11,852},{135,11,1739},{4,11,503
+},{135,11,1661},{5,11,130},{7,11,1314},{9,11,610},{10,11,718},{11,11,601},{11,11
+,819},{11,11,946},{140,11,536},{10,11,149},{11,11,280},{142,11,336},{7,0,739},{
+11,0,690},{7,11,1946},{8,10,48},{8,10,88},{8,10,582},{8,10,681},{9,10,373},{9,10
+,864},{11,10,157},{11,10,843},{148,10,27},{134,0,990},{4,10,88},{5,10,137},{5,10
+,174},{5,10,777},{6,10,1664},{6,10,1725},{7,10,77},{7,10,426},{7,10,1317},{7,10,
+1355},{8,10,126},{8,10,563},{9,10,523},{9,10,750},{10,10,310},{10,10,836},{11,10
+,42},{11,10,318},{11,10,731},{12,10,68},{12,10,92},{12,10,507},{12,10,692},{13,
+10,81},{13,10,238},{13,10,374},{14,10,436},{18,10,138},{19,10,78},{19,10,111},{
+20,10,55},{20,10,77},{148,10,92},{141,10,418},{7,0,1831},{132,10,938},{6,0,776},
+{134,0,915},{138,10,351},{5,11,348},{6,11,522},{6,10,1668},{7,10,1499},{8,10,117
+},{9,10,314},{138,10,174},{135,10,707},{132,0,613},{133,10,403},{132,11,392},{5,
+11,433},{9,11,633},{139,11,629},{133,0,763},{132,0,878},{132,0,977},{132,0,100},
+{6,0,463},{4,10,44},{5,10,311},{7,10,639},{7,10,762},{7,10,1827},{9,10,8},{9,10,
+462},{148,10,83},{134,11,234},{4,10,346},{7,10,115},{9,10,180},{9,10,456},{138,
+10,363},{5,0,362},{5,0,443},{6,0,318},{7,0,1019},{139,0,623},{5,0,463},{8,0,296}
+,{7,11,140},{7,11,1950},{8,11,680},{11,11,817},{147,11,88},{7,11,1222},{138,11,
+386},{142,0,137},{132,0,454},{7,0,1914},{6,11,5},{7,10,1051},{9,10,545},{11,11,
+249},{12,11,313},{16,11,66},{145,11,26},{135,0,1527},{145,0,58},{148,11,59},{5,0
+,48},{5,0,404},{6,0,557},{7,0,458},{8,0,597},{10,0,455},{10,0,606},{11,0,49},{11
+,0,548},{12,0,476},{13,0,18},{141,0,450},{5,11,963},{134,11,1773},{133,0,729},{
+138,11,586},{5,0,442},{135,0,1984},{134,0,449},{144,0,40},{4,0,853},{7,11,180},{
+8,11,509},{136,11,792},{6,10,185},{7,10,1899},{9,10,875},{139,10,673},{134,11,
+524},{12,0,227},{4,10,327},{5,10,478},{7,10,1332},{136,10,753},{6,0,1491},{5,10,
+1020},{133,10,1022},{4,10,103},{133,10,401},{132,11,931},{4,10,499},{135,10,1421
+},{5,0,55},{7,0,376},{140,0,161},{133,0,450},{6,0,1174},{134,0,1562},{10,0,62},{
+13,0,400},{135,11,1837},{140,0,207},{135,0,869},{4,11,773},{5,11,618},{137,11,
+756},{132,10,96},{4,0,213},{7,0,223},{8,0,80},{135,10,968},{4,11,90},{5,11,337},
+{5,11,545},{7,11,754},{9,11,186},{10,11,72},{10,11,782},{11,11,513},{11,11,577},
+{11,11,610},{11,11,889},{11,11,961},{12,11,354},{12,11,362},{12,11,461},{12,11,
+595},{13,11,79},{143,11,121},{7,0,381},{7,0,806},{7,0,820},{8,0,354},{8,0,437},{
+8,0,787},{9,0,657},{10,0,58},{10,0,339},{10,0,749},{11,0,914},{12,0,162},{13,0,
+75},{14,0,106},{14,0,198},{14,0,320},{14,0,413},{146,0,43},{136,0,747},{136,0,
+954},{134,0,1073},{135,0,556},{7,11,151},{9,11,329},{139,11,254},{5,0,692},{134,
+0,1395},{6,10,563},{137,10,224},{134,0,191},{132,0,804},{9,11,187},{10,11,36},{
+17,11,44},{146,11,64},{7,11,165},{7,11,919},{136,11,517},{4,11,506},{5,11,295},{
+7,11,1680},{15,11,14},{144,11,5},{4,0,706},{6,0,162},{7,0,1960},{136,0,831},{135
+,11,1376},{7,11,987},{9,11,688},{10,11,522},{11,11,788},{140,11,566},{150,0,35},
+{138,0,426},{135,0,1235},{135,11,1741},{7,11,389},{7,11,700},{7,11,940},{8,11,
+514},{9,11,116},{9,11,535},{10,11,118},{11,11,107},{11,11,148},{11,11,922},{12,
+11,254},{12,11,421},{142,11,238},{134,0,1234},{132,11,743},{4,10,910},{5,10,832}
+,{135,11,1335},{141,0,96},{135,11,185},{146,0,149},{4,0,204},{137,0,902},{4,11,
+784},{133,11,745},{136,0,833},{136,0,949},{7,0,366},{9,0,287},{12,0,199},{12,0,
+556},{12,0,577},{5,11,81},{7,11,146},{7,11,1342},{7,11,1446},{8,11,53},{8,11,561
+},{8,11,694},{8,11,754},{9,11,97},{9,11,115},{9,11,894},{10,11,462},{10,11,813},
+{11,11,230},{11,11,657},{11,11,699},{11,11,748},{12,11,119},{12,11,200},{12,11,
+283},{14,11,273},{145,11,15},{5,11,408},{137,11,747},{9,11,498},{140,11,181},{6,
+0,2020},{136,0,992},{5,0,356},{135,0,224},{134,0,784},{7,0,630},{9,0,567},{11,0,
+150},{11,0,444},{13,0,119},{8,10,528},{137,10,348},{134,0,539},{4,10,20},{133,10
+,616},{142,0,27},{7,11,30},{8,11,86},{8,11,315},{8,11,700},{9,11,576},{9,11,858}
+,{11,11,310},{11,11,888},{11,11,904},{12,11,361},{141,11,248},{138,11,839},{134,
+0,755},{134,0,1063},{7,10,1091},{135,10,1765},{134,11,428},{7,11,524},{8,11,169}
+,{8,11,234},{9,11,480},{138,11,646},{139,0,814},{7,11,1462},{139,11,659},{4,10,
+26},{5,10,429},{6,10,245},{7,10,704},{7,10,1379},{135,10,1474},{7,11,1205},{138,
+11,637},{139,11,803},{132,10,621},{136,0,987},{4,11,266},{8,11,4},{9,11,39},{10,
+11,166},{11,11,918},{12,11,635},{20,11,10},{22,11,27},{150,11,43},{4,0,235},{135
+,0,255},{4,0,194},{5,0,584},{6,0,384},{7,0,583},{10,0,761},{11,0,760},{139,0,851
+},{133,10,542},{134,0,1086},{133,10,868},{8,0,1016},{136,0,1018},{7,0,1396},{7,
+11,1396},{136,10,433},{135,10,1495},{138,10,215},{141,10,124},{7,11,157},{8,11,
+279},{9,11,759},{16,11,31},{16,11,39},{16,11,75},{18,11,24},{20,11,42},{152,11,1
+},{5,0,562},{134,11,604},{134,0,913},{5,0,191},{137,0,271},{4,0,470},{6,0,153},{
+7,0,1503},{7,0,1923},{10,0,701},{11,0,132},{11,0,227},{11,0,320},{11,0,436},{11,
+0,525},{11,0,855},{11,0,873},{12,0,41},{12,0,286},{13,0,103},{13,0,284},{14,0,
+255},{14,0,262},{15,0,117},{143,0,127},{7,0,475},{12,0,45},{147,10,112},{132,11,
+567},{137,11,859},{6,0,713},{6,0,969},{6,0,1290},{134,0,1551},{133,0,327},{6,0,
+552},{6,0,1292},{7,0,1754},{137,0,604},{4,0,223},{6,0,359},{11,0,3},{13,0,108},{
+14,0,89},{16,0,22},{5,11,762},{7,11,1880},{9,11,680},{139,11,798},{5,0,80},{6,0,
+405},{7,0,403},{7,0,1502},{8,0,456},{9,0,487},{9,0,853},{9,0,889},{10,0,309},{11
+,0,721},{11,0,994},{12,0,430},{141,0,165},{133,11,298},{132,10,647},{134,0,2016}
+,{18,10,10},{146,11,10},{4,0,453},{5,0,887},{6,0,535},{8,0,6},{8,0,543},{136,0,
+826},{136,0,975},{10,0,961},{138,0,962},{138,10,220},{6,0,1891},{6,0,1893},{9,0,
+916},{9,0,965},{9,0,972},{12,0,801},{12,0,859},{12,0,883},{15,0,226},{149,0,51},
+{132,10,109},{135,11,267},{7,11,92},{7,11,182},{8,11,453},{9,11,204},{11,11,950}
+,{12,11,94},{12,11,644},{16,11,20},{16,11,70},{16,11,90},{147,11,55},{134,10,
+1746},{6,11,71},{7,11,845},{7,11,1308},{8,11,160},{137,11,318},{5,0,101},{6,0,88
+},{7,0,263},{7,0,628},{7,0,1677},{8,0,349},{9,0,100},{10,0,677},{14,0,169},{14,0
+,302},{14,0,313},{15,0,48},{15,0,84},{7,11,237},{8,11,664},{9,11,42},{9,11,266},
+{9,11,380},{9,11,645},{10,11,177},{138,11,276},{138,11,69},{4,0,310},{7,0,708},{
+7,0,996},{9,0,795},{10,0,390},{10,0,733},{11,0,451},{12,0,249},{14,0,115},{14,0,
+286},{143,0,100},{5,0,587},{4,10,40},{10,10,67},{11,10,117},{11,10,768},{139,10,
+935},{6,0,1942},{7,0,512},{136,0,983},{7,10,992},{8,10,301},{9,10,722},{12,10,63
+},{13,10,29},{14,10,161},{143,10,18},{136,11,76},{139,10,923},{134,0,645},{134,0
+,851},{4,0,498},{132,11,293},{7,0,217},{8,0,140},{10,0,610},{14,11,352},{17,11,
+53},{18,11,146},{18,11,152},{19,11,11},{150,11,54},{134,0,1448},{138,11,841},{
+133,0,905},{4,11,605},{7,11,518},{7,11,1282},{7,11,1918},{10,11,180},{139,11,218
+},{139,11,917},{135,10,825},{140,10,328},{4,0,456},{7,0,105},{7,0,358},{7,0,1637
+},{8,0,643},{139,0,483},{134,0,792},{6,11,96},{135,11,1426},{137,11,691},{4,11,
+651},{133,11,289},{7,11,688},{8,11,35},{9,11,511},{10,11,767},{147,11,118},{150,
+0,56},{5,0,243},{5,0,535},{6,10,204},{10,10,320},{10,10,583},{13,10,502},{14,10,
+72},{14,10,274},{14,10,312},{14,10,344},{15,10,159},{16,10,62},{16,10,69},{17,10
+,30},{18,10,42},{18,10,53},{18,10,84},{18,10,140},{19,10,68},{19,10,85},{20,10,5
+},{20,10,45},{20,10,101},{22,10,7},{150,10,20},{4,10,558},{6,10,390},{7,10,162},
+{7,10,689},{9,10,360},{138,10,653},{146,11,23},{135,0,1748},{5,10,856},{6,10,
+1672},{6,10,1757},{134,10,1781},{5,0,539},{5,0,754},{6,0,876},{132,11,704},{135,
+11,1078},{5,10,92},{10,10,736},{140,10,102},{17,0,91},{5,10,590},{137,10,213},{
+134,0,1565},{6,0,91},{135,0,435},{4,0,939},{140,0,792},{134,0,1399},{4,0,16},{5,
+0,316},{5,0,842},{6,0,370},{6,0,1778},{8,0,166},{11,0,812},{12,0,206},{12,0,351}
+,{14,0,418},{16,0,15},{16,0,34},{18,0,3},{19,0,3},{19,0,7},{20,0,4},{21,0,21},{4
+,11,720},{133,11,306},{144,0,95},{133,11,431},{132,11,234},{135,0,551},{4,0,999}
+,{6,0,1966},{134,0,2042},{7,0,619},{10,0,547},{11,0,122},{12,0,601},{15,0,7},{
+148,0,20},{5,11,464},{6,11,236},{7,11,276},{7,11,696},{7,11,914},{7,11,1108},{7,
+11,1448},{9,11,15},{9,11,564},{10,11,14},{12,11,565},{13,11,449},{14,11,53},{15,
+11,13},{16,11,64},{145,11,41},{6,0,884},{6,0,1019},{134,0,1150},{6,11,1767},{12,
+11,194},{145,11,107},{136,10,503},{133,11,840},{7,0,671},{134,10,466},{132,0,888
+},{4,0,149},{138,0,368},{4,0,154},{7,0,1134},{136,0,105},{135,0,983},{9,11,642},
+{11,11,236},{142,11,193},{4,0,31},{6,0,429},{7,0,962},{9,0,458},{139,0,691},{6,0
+,643},{134,0,1102},{132,0,312},{4,11,68},{5,11,634},{6,11,386},{7,11,794},{8,11,
+273},{9,11,563},{10,11,105},{10,11,171},{11,11,94},{139,11,354},{133,0,740},{135
+,0,1642},{4,11,95},{7,11,416},{8,11,211},{139,11,830},{132,0,236},{138,10,241},{
+7,11,731},{13,11,20},{143,11,11},{5,0,836},{5,0,857},{6,0,1680},{135,0,59},{10,0
+,68},{11,0,494},{152,11,6},{4,0,81},{139,0,867},{135,0,795},{133,11,689},{4,0,
+1001},{5,0,282},{6,0,1932},{6,0,1977},{6,0,1987},{6,0,1992},{8,0,650},{8,0,919},
+{8,0,920},{8,0,923},{8,0,926},{8,0,927},{8,0,931},{8,0,939},{8,0,947},{8,0,956},
+{8,0,997},{9,0,907},{10,0,950},{10,0,953},{10,0,954},{10,0,956},{10,0,958},{10,0
+,959},{10,0,964},{10,0,970},{10,0,972},{10,0,973},{10,0,975},{10,0,976},{10,0,
+980},{10,0,981},{10,0,984},{10,0,988},{10,0,990},{10,0,995},{10,0,999},{10,0,
+1002},{10,0,1003},{10,0,1005},{10,0,1006},{10,0,1008},{10,0,1009},{10,0,1012},{
+10,0,1014},{10,0,1015},{10,0,1019},{10,0,1020},{10,0,1022},{12,0,959},{12,0,961}
+,{12,0,962},{12,0,963},{12,0,964},{12,0,965},{12,0,967},{12,0,968},{12,0,969},{
+12,0,970},{12,0,971},{12,0,972},{12,0,973},{12,0,974},{12,0,975},{12,0,976},{12,
+0,977},{12,0,979},{12,0,981},{12,0,982},{12,0,983},{12,0,984},{12,0,985},{12,0,
+986},{12,0,987},{12,0,989},{12,0,990},{12,0,992},{12,0,993},{12,0,995},{12,0,998
+},{12,0,999},{12,0,1000},{12,0,1001},{12,0,1002},{12,0,1004},{12,0,1005},{12,0,
+1006},{12,0,1007},{12,0,1008},{12,0,1009},{12,0,1010},{12,0,1011},{12,0,1012},{
+12,0,1014},{12,0,1015},{12,0,1016},{12,0,1017},{12,0,1018},{12,0,1019},{12,0,
+1022},{12,0,1023},{14,0,475},{14,0,477},{14,0,478},{14,0,479},{14,0,480},{14,0,
+482},{14,0,483},{14,0,484},{14,0,485},{14,0,486},{14,0,487},{14,0,488},{14,0,489
+},{14,0,490},{14,0,491},{14,0,492},{14,0,493},{14,0,494},{14,0,495},{14,0,496},{
+14,0,497},{14,0,498},{14,0,499},{14,0,500},{14,0,501},{14,0,502},{14,0,503},{14,
+0,504},{14,0,506},{14,0,507},{14,0,508},{14,0,509},{14,0,510},{14,0,511},{16,0,
+113},{16,0,114},{16,0,115},{16,0,117},{16,0,118},{16,0,119},{16,0,121},{16,0,122
+},{16,0,123},{16,0,124},{16,0,125},{16,0,126},{16,0,127},{18,0,242},{18,0,243},{
+18,0,244},{18,0,245},{18,0,248},{18,0,249},{18,0,250},{18,0,251},{18,0,252},{18,
+0,253},{18,0,254},{18,0,255},{20,0,125},{20,0,126},{148,0,127},{7,11,1717},{7,11
+,1769},{138,11,546},{7,11,1127},{7,11,1572},{10,11,297},{10,11,422},{11,11,764},
+{11,11,810},{12,11,264},{13,11,102},{13,11,300},{13,11,484},{14,11,147},{14,11,
+229},{17,11,71},{18,11,118},{147,11,120},{6,0,1148},{134,0,1586},{132,0,775},{
+135,10,954},{133,11,864},{133,11,928},{138,11,189},{135,10,1958},{6,10,549},{8,
+10,34},{8,10,283},{9,10,165},{138,10,475},{5,10,652},{5,10,701},{135,10,449},{
+135,11,695},{4,10,655},{7,10,850},{17,10,75},{146,10,137},{140,11,682},{133,11,
+523},{8,0,970},{136,10,670},{136,11,555},{7,11,76},{8,11,44},{9,11,884},{10,11,
+580},{11,11,399},{11,11,894},{15,11,122},{18,11,144},{147,11,61},{6,10,159},{6,
+10,364},{7,10,516},{7,10,1439},{137,10,518},{4,0,71},{5,0,376},{7,0,119},{138,0,
+665},{141,10,151},{11,0,827},{14,0,34},{143,0,148},{133,11,518},{4,0,479},{135,
+11,1787},{135,11,1852},{135,10,993},{7,0,607},{136,0,99},{134,0,1960},{132,0,793
+},{4,0,41},{5,0,74},{7,0,1627},{11,0,871},{140,0,619},{7,0,94},{11,0,329},{11,0,
+965},{12,0,241},{14,0,354},{15,0,22},{148,0,63},{7,10,501},{9,10,111},{10,10,141
+},{11,10,332},{13,10,43},{13,10,429},{14,10,130},{14,10,415},{145,10,102},{9,0,
+209},{137,0,300},{134,0,1497},{138,11,255},{4,11,934},{5,11,138},{136,11,610},{
+133,0,98},{6,0,1316},{10,11,804},{138,11,832},{8,11,96},{9,11,36},{10,11,607},{
+11,11,423},{11,11,442},{12,11,309},{14,11,199},{15,11,90},{145,11,110},{132,0,
+463},{5,10,149},{136,10,233},{133,10,935},{4,11,652},{8,11,320},{9,11,13},{9,11,
+398},{9,11,727},{10,11,75},{10,11,184},{10,11,230},{10,11,564},{10,11,569},{11,
+11,973},{12,11,70},{12,11,189},{13,11,57},{13,11,257},{22,11,6},{150,11,16},{142
+,0,291},{12,10,582},{146,10,131},{136,10,801},{133,0,984},{145,11,116},{4,11,692
+},{133,11,321},{4,0,182},{6,0,205},{135,0,220},{4,0,42},{9,0,205},{9,0,786},{138
+,0,659},{6,0,801},{11,11,130},{140,11,609},{132,0,635},{5,11,345},{135,11,1016},
+{139,0,533},{132,0,371},{4,0,272},{135,0,836},{6,0,1282},{135,11,1100},{5,0,825}
+,{134,0,1640},{135,11,1325},{133,11,673},{4,11,287},{133,11,1018},{135,0,357},{6
+,0,467},{137,0,879},{7,0,317},{135,0,569},{6,0,924},{134,0,1588},{5,11,34},{5,10
+,406},{10,11,724},{12,11,444},{13,11,354},{18,11,32},{23,11,24},{23,11,31},{152,
+11,5},{6,0,1795},{6,0,1835},{6,0,1836},{6,0,1856},{8,0,844},{8,0,849},{8,0,854},
+{8,0,870},{8,0,887},{10,0,852},{138,0,942},{6,10,69},{135,10,117},{137,0,307},{4
+,0,944},{6,0,1799},{6,0,1825},{10,0,848},{10,0,875},{10,0,895},{10,0,899},{10,0,
+902},{140,0,773},{11,0,43},{13,0,72},{141,0,142},{135,10,1830},{134,11,382},{4,
+10,432},{135,10,824},{132,11,329},{7,0,1820},{139,11,124},{133,10,826},{133,0,
+525},{132,11,906},{7,11,1940},{136,11,366},{138,11,10},{4,11,123},{4,11,649},{5,
+11,605},{7,11,1509},{136,11,36},{6,0,110},{135,0,1681},{133,0,493},{133,11,767},
+{4,0,174},{135,0,911},{138,11,786},{8,0,417},{137,0,782},{133,10,1000},{7,0,733}
+,{137,0,583},{4,10,297},{6,10,529},{7,10,152},{7,10,713},{7,10,1845},{8,10,710},
+{8,10,717},{12,10,639},{140,10,685},{4,0,32},{5,0,215},{6,0,269},{7,0,1782},{7,0
+,1892},{10,0,16},{11,0,822},{11,0,954},{141,0,481},{4,11,273},{5,11,658},{133,11
+,995},{136,0,477},{134,11,72},{135,11,1345},{5,0,308},{7,0,1088},{4,10,520},{135
+,10,575},{133,11,589},{5,0,126},{8,0,297},{9,0,366},{140,0,374},{7,0,1551},{139,
+0,361},{5,11,117},{6,11,514},{6,11,541},{7,11,1164},{7,11,1436},{8,11,220},{8,11
+,648},{10,11,688},{139,11,560},{133,11,686},{4,0,946},{6,0,1807},{8,0,871},{10,0
+,854},{10,0,870},{10,0,888},{10,0,897},{10,0,920},{12,0,722},{12,0,761},{12,0,
+763},{12,0,764},{14,0,454},{14,0,465},{16,0,107},{18,0,167},{18,0,168},{146,0,
+172},{132,0,175},{135,0,1307},{132,0,685},{135,11,1834},{133,0,797},{6,0,745},{6
+,0,858},{134,0,963},{133,0,565},{5,10,397},{6,10,154},{7,11,196},{7,10,676},{8,
+10,443},{8,10,609},{9,10,24},{9,10,325},{10,10,35},{10,11,765},{11,11,347},{11,
+10,535},{11,11,552},{11,11,576},{11,10,672},{11,11,790},{11,10,1018},{12,11,263}
+,{12,10,637},{13,11,246},{13,11,270},{13,11,395},{14,11,74},{14,11,176},{14,11,
+190},{14,11,398},{14,11,412},{15,11,32},{15,11,63},{16,10,30},{16,11,88},{147,11
+,105},{13,11,84},{141,11,122},{4,0,252},{7,0,1068},{10,0,434},{11,0,228},{11,0,
+426},{13,0,231},{18,0,106},{148,0,87},{137,0,826},{4,11,589},{139,11,282},{5,11,
+381},{135,11,1792},{132,0,791},{5,0,231},{10,0,509},{133,10,981},{7,0,601},{9,0,
+277},{9,0,674},{10,0,178},{10,0,418},{10,0,571},{11,0,531},{12,0,113},{12,0,475}
+,{13,0,99},{142,0,428},{4,10,56},{7,11,616},{7,10,1791},{8,10,607},{8,10,651},{
+10,11,413},{11,10,465},{11,10,835},{12,10,337},{141,10,480},{7,0,1591},{144,0,43
+},{9,10,158},{138,10,411},{135,0,1683},{8,0,289},{11,0,45},{12,0,278},{140,0,537
+},{6,11,120},{7,11,1188},{7,11,1710},{8,11,286},{9,11,667},{11,11,592},{139,11,
+730},{136,10,617},{135,0,1120},{135,11,1146},{139,10,563},{4,11,352},{4,10,369},
+{135,11,687},{143,11,38},{4,0,399},{5,0,119},{5,0,494},{7,0,751},{9,0,556},{14,
+11,179},{15,11,151},{150,11,11},{4,11,192},{5,11,49},{6,11,200},{6,11,293},{6,11
+,1696},{135,11,488},{4,0,398},{133,0,660},{7,0,1030},{134,10,622},{135,11,595},{
+141,0,168},{132,11,147},{7,0,973},{10,10,624},{142,10,279},{132,10,363},{132,0,
+642},{133,11,934},{134,0,1615},{7,11,505},{135,11,523},{7,0,594},{7,0,851},{7,0,
+1858},{9,0,411},{9,0,574},{9,0,666},{9,0,737},{10,0,346},{10,0,712},{11,0,246},{
+11,0,432},{11,0,517},{11,0,647},{11,0,679},{11,0,727},{12,0,304},{12,0,305},{12,
+0,323},{12,0,483},{12,0,572},{12,0,593},{12,0,602},{13,0,95},{13,0,101},{13,0,
+171},{13,0,315},{13,0,378},{13,0,425},{13,0,475},{14,0,63},{14,0,380},{14,0,384}
+,{15,0,133},{18,0,112},{148,0,72},{135,0,1093},{132,0,679},{8,0,913},{10,0,903},
+{10,0,915},{12,0,648},{12,0,649},{14,0,455},{16,0,112},{138,11,438},{137,0,203},
+{134,10,292},{134,0,1492},{7,0,1374},{8,0,540},{5,10,177},{6,10,616},{7,10,827},
+{9,10,525},{138,10,656},{135,0,1486},{9,0,714},{138,10,31},{136,0,825},{134,0,
+1511},{132,11,637},{134,0,952},{4,10,161},{133,10,631},{5,0,143},{5,0,769},{6,0,
+1760},{7,0,682},{7,0,1992},{136,0,736},{132,0,700},{134,0,1540},{132,11,777},{9,
+11,867},{138,11,837},{7,0,1557},{135,10,1684},{133,0,860},{6,0,422},{7,0,0},{7,0
+,1544},{9,0,605},{11,0,990},{12,0,235},{12,0,453},{13,0,47},{13,0,266},{9,10,469
+},{9,10,709},{12,10,512},{14,10,65},{145,10,12},{11,0,807},{10,10,229},{11,10,73
+},{139,10,376},{6,11,170},{7,11,1080},{8,11,395},{8,11,487},{11,11,125},{141,11,
+147},{5,0,515},{137,0,131},{7,0,1605},{11,0,962},{146,0,139},{132,0,646},{4,0,
+396},{7,0,728},{9,0,117},{13,0,202},{148,0,51},{6,0,121},{6,0,124},{6,0,357},{7,
+0,1138},{7,0,1295},{8,0,162},{8,0,508},{11,0,655},{4,11,535},{6,10,558},{7,10,
+651},{8,11,618},{9,10,0},{10,10,34},{139,10,1008},{135,11,1245},{138,0,357},{150
+,11,23},{133,0,237},{135,0,1784},{7,10,1832},{138,10,374},{132,0,713},{132,11,46
+},{6,0,1536},{10,0,348},{5,11,811},{6,11,1679},{6,11,1714},{135,11,2032},{11,11,
+182},{142,11,195},{6,0,523},{7,0,738},{7,10,771},{7,10,1731},{9,10,405},{138,10,
+421},{7,11,1458},{9,11,407},{139,11,15},{6,11,34},{7,11,69},{7,11,640},{7,11,
+1089},{8,11,708},{8,11,721},{9,11,363},{9,11,643},{10,11,628},{148,11,98},{133,0
+,434},{135,0,1877},{7,0,571},{138,0,366},{5,10,881},{133,10,885},{9,0,513},{10,0
+,25},{10,0,39},{12,0,122},{140,0,187},{132,0,580},{5,10,142},{134,10,546},{132,
+11,462},{137,0,873},{5,10,466},{11,10,571},{12,10,198},{13,10,283},{14,10,186},{
+15,10,21},{143,10,103},{7,0,171},{4,10,185},{5,10,257},{5,10,839},{5,10,936},{9,
+10,399},{10,10,258},{10,10,395},{10,10,734},{11,10,1014},{12,10,23},{13,10,350},
+{14,10,150},{147,10,6},{134,0,625},{7,0,107},{7,0,838},{8,0,550},{138,0,401},{5,
+11,73},{6,11,23},{134,11,338},{4,0,943},{6,0,1850},{12,0,713},{142,0,434},{11,0,
+588},{11,0,864},{11,0,936},{11,0,968},{12,0,73},{12,0,343},{12,0,394},{13,0,275}
+,{14,0,257},{15,0,160},{7,10,404},{7,10,1377},{7,10,1430},{7,10,2017},{8,10,149}
+,{8,10,239},{8,10,512},{8,10,793},{8,10,818},{9,10,474},{9,10,595},{10,10,122},{
+10,10,565},{10,10,649},{10,10,783},{11,10,239},{11,10,295},{11,10,447},{11,10,
+528},{11,10,639},{11,10,800},{12,10,25},{12,10,157},{12,10,316},{12,10,390},{12,
+10,391},{12,10,395},{12,10,478},{12,10,503},{12,10,592},{12,10,680},{13,10,50},{
+13,10,53},{13,10,132},{13,10,198},{13,10,322},{13,10,415},{13,10,511},{14,10,71}
+,{14,10,395},{15,10,71},{15,10,136},{17,10,123},{18,10,93},{147,10,58},{133,0,
+768},{11,0,103},{142,0,0},{136,10,712},{132,0,799},{132,0,894},{7,11,725},{8,11,
+498},{139,11,268},{135,11,1798},{135,11,773},{141,11,360},{4,10,377},{152,10,13}
+,{135,0,1673},{132,11,583},{134,0,1052},{133,11,220},{140,11,69},{132,11,544},{4
+,10,180},{135,10,1906},{134,0,272},{4,0,441},{134,0,1421},{4,0,9},{5,0,128},{7,0
+,368},{11,0,480},{148,0,3},{5,11,176},{6,11,437},{6,11,564},{11,11,181},{141,11,
+183},{132,10,491},{7,0,1182},{141,11,67},{6,0,1346},{4,10,171},{138,10,234},{4,
+10,586},{7,10,1186},{138,10,631},{136,0,682},{134,0,1004},{15,0,24},{143,11,24},
+{134,0,968},{4,0,2},{6,0,742},{6,0,793},{7,0,545},{7,0,894},{9,10,931},{10,10,
+334},{148,10,71},{136,11,600},{133,10,765},{9,0,769},{140,0,185},{4,11,790},{5,
+11,273},{134,11,394},{7,0,474},{137,0,578},{4,11,135},{6,11,127},{7,11,1185},{7,
+11,1511},{8,11,613},{11,11,5},{12,11,133},{12,11,495},{12,11,586},{14,11,385},{
+15,11,118},{17,11,20},{146,11,98},{133,10,424},{5,0,530},{142,0,113},{6,11,230},
+{7,11,961},{7,11,1085},{136,11,462},{7,11,1954},{137,11,636},{136,10,714},{149,
+11,6},{135,10,685},{9,10,420},{10,10,269},{10,10,285},{10,10,576},{11,10,397},{
+13,10,175},{145,10,90},{132,10,429},{5,0,556},{5,11,162},{136,11,68},{132,11,654
+},{4,11,156},{7,11,998},{7,11,1045},{7,11,1860},{9,11,48},{9,11,692},{11,11,419}
+,{139,11,602},{6,0,1317},{8,0,16},{9,0,825},{12,0,568},{7,11,1276},{8,11,474},{
+137,11,652},{18,0,97},{7,10,18},{7,10,699},{7,10,1966},{8,10,752},{9,10,273},{9,
+10,412},{9,10,703},{10,10,71},{10,10,427},{138,10,508},{10,0,703},{7,11,1454},{
+138,11,703},{4,10,53},{5,10,186},{135,10,752},{134,0,892},{134,0,1571},{8,10,575
+},{10,10,289},{139,10,319},{6,0,186},{137,0,426},{134,0,1101},{132,10,675},{132,
+0,585},{6,0,1870},{137,0,937},{152,11,10},{9,11,197},{10,11,300},{12,11,473},{13
+,11,90},{141,11,405},{4,0,93},{5,0,252},{6,0,229},{7,0,291},{9,0,550},{139,0,644
+},{137,0,749},{9,0,162},{6,10,209},{8,10,468},{9,10,210},{11,10,36},{12,10,28},{
+12,10,630},{13,10,21},{13,10,349},{14,10,7},{145,10,13},{132,0,381},{132,11,606}
+,{4,10,342},{135,10,1179},{7,11,1587},{7,11,1707},{10,11,528},{139,11,504},{12,
+11,39},{13,11,265},{141,11,439},{4,10,928},{133,10,910},{7,10,1838},{7,11,1978},
+{136,11,676},{6,0,762},{6,0,796},{134,0,956},{4,10,318},{4,10,496},{7,10,856},{
+139,10,654},{137,11,242},{4,11,361},{133,11,315},{132,11,461},{132,11,472},{132,
+0,857},{5,0,21},{6,0,77},{6,0,157},{7,0,974},{7,0,1301},{7,0,1339},{7,0,1490},{7
+,0,1873},{9,0,628},{7,10,915},{8,10,247},{147,10,0},{4,10,202},{5,10,382},{6,10,
+454},{7,10,936},{7,10,1803},{8,10,758},{9,10,375},{9,10,895},{10,10,743},{10,10,
+792},{11,10,978},{11,10,1012},{142,10,109},{7,11,617},{10,11,498},{11,11,501},{
+12,11,16},{140,11,150},{7,10,1150},{7,10,1425},{7,10,1453},{10,11,747},{140,10,
+513},{133,11,155},{11,0,919},{141,0,409},{138,10,791},{10,0,633},{139,11,729},{7
+,11,163},{8,11,319},{9,11,402},{10,11,24},{10,11,681},{11,11,200},{11,11,567},{
+12,11,253},{12,11,410},{142,11,219},{5,11,475},{7,11,1780},{9,11,230},{11,11,297
+},{11,11,558},{14,11,322},{147,11,76},{7,0,332},{6,10,445},{137,10,909},{135,11,
+1956},{136,11,274},{134,10,578},{135,0,1489},{135,11,1848},{5,11,944},{134,11,
+1769},{132,11,144},{136,10,766},{4,0,832},{135,10,541},{8,0,398},{9,0,681},{139,
+0,632},{136,0,645},{9,0,791},{10,0,93},{16,0,13},{17,0,23},{18,0,135},{19,0,12},
+{20,0,1},{20,0,12},{148,0,14},{6,11,247},{137,11,555},{134,0,20},{132,0,800},{
+135,0,1841},{139,10,983},{137,10,768},{132,10,584},{141,11,51},{6,0,1993},{4,11,
+620},{138,11,280},{136,0,769},{11,0,290},{11,0,665},{7,11,1810},{11,11,866},{12,
+11,103},{13,11,495},{17,11,67},{147,11,74},{134,0,1426},{139,0,60},{4,10,326},{
+135,10,1770},{7,0,1874},{9,0,641},{132,10,226},{6,0,644},{5,10,426},{8,10,30},{9
+,10,2},{11,10,549},{147,10,122},{5,11,428},{138,11,442},{135,11,1871},{135,0,
+1757},{147,10,117},{135,0,937},{135,0,1652},{6,0,654},{134,0,1476},{133,11,99},{
+135,0,527},{132,10,345},{4,10,385},{4,11,397},{7,10,265},{135,10,587},{4,0,579},
+{5,0,226},{5,0,323},{135,0,960},{134,0,1486},{8,11,502},{144,11,9},{4,10,347},{5
+,10,423},{5,10,996},{135,10,1329},{7,11,727},{146,11,73},{4,11,485},{7,11,353},{
+7,10,1259},{7,11,1523},{9,10,125},{139,10,65},{6,0,325},{5,10,136},{6,11,366},{7
+,11,1384},{7,11,1601},{136,10,644},{138,11,160},{6,0,1345},{137,11,282},{18,0,91
+},{147,0,70},{136,0,404},{4,11,157},{133,11,471},{133,0,973},{6,0,135},{135,0,
+1176},{8,11,116},{11,11,551},{142,11,159},{4,0,549},{4,10,433},{133,10,719},{136
+,0,976},{5,11,160},{7,11,363},{7,11,589},{10,11,170},{141,11,55},{144,0,21},{144
+,0,51},{135,0,314},{135,10,1363},{4,11,108},{7,11,405},{10,11,491},{139,11,498},
+{146,0,4},{4,10,555},{8,10,536},{10,10,288},{139,10,1005},{135,11,1005},{6,0,281
+},{7,0,6},{8,0,282},{8,0,480},{8,0,499},{9,0,198},{10,0,143},{10,0,169},{10,0,
+211},{10,0,417},{10,0,574},{11,0,147},{11,0,395},{12,0,75},{12,0,407},{12,0,608}
+,{13,0,500},{142,0,251},{6,0,1093},{6,0,1405},{9,10,370},{138,10,90},{4,11,926},
+{133,11,983},{135,0,1776},{134,0,1528},{132,0,419},{132,11,538},{6,11,294},{7,11
+,1267},{136,11,624},{135,11,1772},{138,11,301},{4,10,257},{135,10,2031},{4,0,138
+},{7,0,1012},{7,0,1280},{9,0,76},{135,10,1768},{132,11,757},{5,0,29},{140,0,638}
+,{7,11,655},{135,11,1844},{7,0,1418},{6,11,257},{135,11,1522},{8,11,469},{138,11
+,47},{142,11,278},{6,10,83},{6,10,1733},{135,10,1389},{11,11,204},{11,11,243},{
+140,11,293},{135,11,1875},{6,0,1710},{135,0,2038},{137,11,299},{4,0,17},{5,0,23}
+,{7,0,995},{11,0,383},{11,0,437},{12,0,460},{140,0,532},{133,0,862},{137,10,696}
+,{6,0,592},{138,0,946},{138,11,599},{7,10,1718},{9,10,95},{9,10,274},{10,10,279}
+,{10,10,317},{10,10,420},{11,10,303},{11,10,808},{12,10,134},{12,10,367},{13,10,
+149},{13,10,347},{14,10,349},{14,10,406},{18,10,22},{18,10,89},{18,10,122},{147,
+10,47},{8,0,70},{12,0,171},{141,0,272},{133,10,26},{132,10,550},{137,0,812},{10,
+0,233},{139,0,76},{134,0,988},{134,0,442},{136,10,822},{7,0,896},{4,10,902},{5,
+10,809},{134,10,122},{5,11,150},{7,11,106},{8,11,603},{9,11,593},{9,11,634},{10,
+11,44},{10,11,173},{11,11,462},{11,11,515},{13,11,216},{13,11,288},{142,11,400},
+{136,0,483},{135,10,262},{6,0,1709},{133,10,620},{4,10,34},{5,10,574},{7,10,279}
+,{7,10,1624},{136,10,601},{137,10,170},{147,0,119},{12,11,108},{141,11,291},{11,
+0,69},{12,0,105},{12,0,117},{13,0,213},{14,0,13},{14,0,62},{14,0,177},{14,0,421}
+,{15,0,19},{146,0,141},{137,0,309},{11,11,278},{142,11,73},{7,0,608},{7,0,976},{
+9,0,146},{10,0,206},{10,0,596},{13,0,218},{142,0,153},{133,10,332},{6,10,261},{8
+,10,182},{139,10,943},{4,11,493},{144,11,55},{134,10,1721},{132,0,768},{4,10,933
+},{133,10,880},{7,11,555},{7,11,1316},{7,11,1412},{7,11,1839},{9,11,192},{9,11,
+589},{11,11,241},{11,11,676},{11,11,811},{11,11,891},{12,11,140},{12,11,346},{12
+,11,479},{13,11,30},{13,11,49},{13,11,381},{14,11,188},{15,11,150},{16,11,76},{
+18,11,30},{148,11,52},{4,0,518},{135,0,1136},{6,11,568},{7,11,112},{7,11,1804},{
+8,11,362},{8,11,410},{8,11,830},{9,11,514},{11,11,649},{142,11,157},{135,11,673}
+,{8,0,689},{137,0,863},{4,0,18},{7,0,145},{7,0,444},{7,0,1278},{8,0,49},{8,0,400
+},{9,0,71},{9,0,250},{10,0,459},{12,0,160},{16,0,24},{132,11,625},{140,0,1020},{
+4,0,997},{6,0,1946},{6,0,1984},{134,0,1998},{6,11,16},{6,11,158},{7,11,43},{7,11
+,129},{7,11,181},{8,11,276},{8,11,377},{10,11,523},{11,11,816},{12,11,455},{13,
+11,303},{142,11,135},{133,10,812},{134,0,658},{4,11,1},{7,11,1143},{7,11,1463},{
+8,11,61},{9,11,207},{9,11,390},{9,11,467},{139,11,836},{150,11,26},{140,0,106},{
+6,0,1827},{10,0,931},{18,0,166},{20,0,114},{4,10,137},{7,10,1178},{7,11,1319},{
+135,10,1520},{133,0,1010},{4,11,723},{5,11,895},{7,11,1031},{8,11,199},{8,11,340
+},{9,11,153},{9,11,215},{10,11,21},{10,11,59},{10,11,80},{10,11,224},{11,11,229}
+,{11,11,652},{12,11,192},{13,11,146},{142,11,91},{132,11,295},{6,11,619},{7,11,
+898},{7,11,1092},{8,11,485},{18,11,28},{147,11,116},{137,11,51},{6,10,1661},{7,
+10,1975},{7,10,2009},{135,10,2011},{5,11,309},{140,11,211},{5,0,87},{7,0,313},{7
+,0,1103},{10,0,208},{10,0,582},{11,0,389},{11,0,813},{12,0,385},{13,0,286},{14,0
+,124},{146,0,108},{5,11,125},{8,11,77},{138,11,15},{132,0,267},{133,0,703},{137,
+11,155},{133,11,439},{11,11,164},{140,11,76},{9,0,496},{5,10,89},{7,10,1915},{9,
+10,185},{9,10,235},{10,10,64},{10,10,270},{10,10,403},{10,10,469},{10,10,529},{
+10,10,590},{11,10,140},{11,10,860},{13,10,1},{13,10,422},{14,10,341},{14,10,364}
+,{17,10,93},{18,10,113},{19,10,97},{147,10,113},{133,10,695},{135,0,1121},{5,10,
+6},{6,10,183},{7,10,680},{7,10,978},{7,10,1013},{7,10,1055},{12,10,230},{13,10,
+172},{146,10,29},{4,11,8},{7,11,1152},{7,11,1153},{7,11,1715},{9,11,374},{10,11,
+478},{139,11,648},{135,11,1099},{6,10,29},{139,10,63},{4,0,561},{10,0,249},{139,
+0,209},{132,0,760},{7,11,799},{138,11,511},{136,11,87},{9,0,154},{140,0,485},{
+136,0,255},{132,0,323},{140,0,419},{132,10,311},{134,10,1740},{4,0,368},{135,0,
+641},{7,10,170},{8,10,90},{8,10,177},{8,10,415},{11,10,714},{142,10,281},{4,11,
+69},{5,11,122},{9,11,656},{138,11,464},{5,11,849},{134,11,1633},{8,0,522},{142,0
+,328},{11,10,91},{13,10,129},{15,10,101},{145,10,125},{7,0,562},{8,0,551},{4,10,
+494},{6,10,74},{7,10,44},{11,11,499},{12,10,17},{15,10,5},{148,10,11},{4,10,276}
+,{133,10,296},{9,0,92},{147,0,91},{4,10,7},{5,10,90},{5,10,158},{6,10,542},{7,10
+,221},{7,10,1574},{9,10,490},{10,10,540},{11,10,443},{139,10,757},{6,0,525},{6,0
+,1976},{8,0,806},{9,0,876},{140,0,284},{5,11,859},{7,10,588},{7,11,1160},{8,11,
+107},{9,10,175},{9,11,291},{9,11,439},{10,10,530},{10,11,663},{11,11,609},{140,
+11,197},{7,11,168},{13,11,196},{141,11,237},{139,0,958},{133,0,594},{135,10,580}
+,{7,10,88},{136,10,627},{6,0,479},{6,0,562},{7,0,1060},{13,0,6},{5,10,872},{6,10
+,57},{7,10,471},{9,10,447},{137,10,454},{136,11,413},{145,11,19},{4,11,117},{6,
+11,372},{7,11,1905},{142,11,323},{4,11,722},{139,11,471},{17,0,61},{5,10,31},{
+134,10,614},{8,10,330},{140,10,477},{7,10,1200},{138,10,460},{6,10,424},{135,10,
+1866},{6,0,1641},{136,0,820},{6,0,1556},{134,0,1618},{9,11,5},{12,11,216},{12,11
+,294},{12,11,298},{12,11,400},{12,11,518},{13,11,229},{143,11,139},{15,11,155},{
+144,11,79},{4,0,302},{135,0,1766},{5,10,13},{134,10,142},{6,0,148},{7,0,1313},{7
+,10,116},{8,10,322},{8,10,755},{9,10,548},{10,10,714},{11,10,884},{141,10,324},{
+137,0,676},{9,11,88},{139,11,270},{5,11,12},{7,11,375},{137,11,438},{134,0,1674}
+,{7,10,1472},{135,10,1554},{11,0,178},{7,10,1071},{7,10,1541},{7,10,1767},{7,10,
+1806},{11,10,162},{11,10,242},{12,10,605},{15,10,26},{144,10,44},{6,0,389},{7,0,
+149},{9,0,142},{138,0,94},{140,11,71},{145,10,115},{6,0,8},{7,0,1881},{8,0,91},{
+11,11,966},{12,11,287},{13,11,342},{13,11,402},{15,11,110},{143,11,163},{4,11,
+258},{136,11,639},{6,11,22},{7,11,903},{138,11,577},{133,11,681},{135,10,1111},{
+135,11,1286},{9,0,112},{8,10,1},{138,10,326},{5,10,488},{6,10,527},{7,10,489},{7
+,10,1636},{8,10,121},{8,10,144},{8,10,359},{9,10,193},{9,10,241},{9,10,336},{9,
+10,882},{11,10,266},{11,10,372},{11,10,944},{12,10,401},{140,10,641},{4,11,664},
+{133,11,804},{6,0,747},{134,0,1015},{135,0,1746},{9,10,31},{10,10,244},{10,10,
+699},{12,10,149},{141,10,497},{133,10,377},{135,0,24},{6,0,1352},{5,11,32},{145,
+10,101},{7,0,1530},{10,0,158},{13,0,13},{13,0,137},{13,0,258},{14,0,111},{14,0,
+225},{14,0,253},{14,0,304},{14,0,339},{14,0,417},{146,0,33},{4,0,503},{135,0,
+1661},{5,0,130},{6,0,845},{7,0,1314},{9,0,610},{10,0,718},{11,0,601},{11,0,819},
+{11,0,946},{140,0,536},{10,0,149},{11,0,280},{142,0,336},{134,0,1401},{135,0,
+1946},{8,0,663},{144,0,8},{134,0,1607},{135,10,2023},{4,11,289},{7,11,629},{7,11
+,1698},{7,11,1711},{140,11,215},{6,11,450},{136,11,109},{10,0,882},{10,0,883},{
+10,0,914},{138,0,928},{133,10,843},{136,11,705},{132,10,554},{133,10,536},{5,0,
+417},{9,10,79},{11,10,625},{145,10,7},{7,11,1238},{142,11,37},{4,0,392},{135,0,
+1597},{5,0,433},{9,0,633},{11,0,629},{132,10,424},{7,10,336},{136,10,785},{134,
+11,355},{6,0,234},{7,0,769},{9,0,18},{138,0,358},{4,10,896},{134,10,1777},{138,
+11,323},{7,0,140},{7,0,1950},{8,0,680},{11,0,817},{147,0,88},{7,0,1222},{138,0,
+386},{139,11,908},{11,0,249},{12,0,313},{16,0,66},{145,0,26},{134,0,5},{7,10,750
+},{9,10,223},{11,10,27},{11,10,466},{12,10,624},{14,10,265},{146,10,61},{134,11,
+26},{134,0,1216},{5,0,963},{134,0,1773},{4,11,414},{5,11,467},{9,11,654},{10,11,
+451},{12,11,59},{141,11,375},{135,11,17},{4,10,603},{133,10,661},{4,10,11},{6,10
+,128},{7,10,231},{7,10,1533},{138,10,725},{135,11,955},{7,0,180},{8,0,509},{136,
+0,792},{132,10,476},{132,0,1002},{133,11,538},{135,10,1807},{132,0,931},{7,0,943
+},{11,0,614},{140,0,747},{135,0,1837},{9,10,20},{10,10,324},{10,10,807},{139,10,
+488},{134,0,641},{6,11,280},{10,11,502},{11,11,344},{140,11,38},{5,11,45},{7,11,
+1161},{11,11,448},{11,11,880},{13,11,139},{13,11,407},{15,11,16},{17,11,95},{18,
+11,66},{18,11,88},{18,11,123},{149,11,7},{9,0,280},{138,0,134},{22,0,22},{23,0,5
+},{151,0,29},{136,11,777},{4,0,90},{5,0,545},{7,0,754},{9,0,186},{10,0,72},{10,0
+,782},{11,0,577},{11,0,610},{11,0,960},{12,0,354},{12,0,362},{12,0,595},{4,11,
+410},{135,11,521},{135,11,1778},{5,10,112},{6,10,103},{134,10,150},{138,10,356},
+{132,0,742},{7,0,151},{9,0,329},{139,0,254},{8,0,853},{8,0,881},{8,0,911},{8,0,
+912},{10,0,872},{12,0,741},{12,0,742},{152,0,18},{4,11,573},{136,11,655},{6,0,
+921},{134,0,934},{9,0,187},{10,0,36},{11,0,1016},{17,0,44},{146,0,64},{7,0,833},
+{136,0,517},{4,0,506},{5,0,295},{135,0,1680},{4,10,708},{8,10,15},{9,10,50},{9,
+10,386},{11,10,18},{11,10,529},{140,10,228},{7,0,251},{7,0,1701},{8,0,436},{4,10
+,563},{7,10,592},{7,10,637},{7,10,770},{8,10,463},{9,10,60},{9,10,335},{9,10,904
+},{10,10,73},{11,10,434},{12,10,585},{13,10,331},{18,10,110},{148,10,60},{132,10
+,502},{136,0,584},{6,10,347},{138,10,161},{7,0,987},{9,0,688},{10,0,522},{11,0,
+788},{12,0,137},{12,0,566},{14,0,9},{14,0,24},{14,0,64},{7,11,899},{142,11,325},
+{4,0,214},{5,0,500},{5,10,102},{6,10,284},{7,10,1079},{7,10,1423},{7,10,1702},{8
+,10,470},{9,10,554},{9,10,723},{139,10,333},{7,10,246},{135,10,840},{6,10,10},{8
+,10,571},{9,10,739},{143,10,91},{133,10,626},{146,0,195},{134,0,1775},{7,0,389},
+{7,0,700},{7,0,940},{8,0,514},{9,0,116},{9,0,535},{10,0,118},{11,0,107},{11,0,
+148},{11,0,922},{12,0,254},{12,0,421},{142,0,238},{5,10,18},{6,10,526},{13,10,24
+},{13,10,110},{19,10,5},{147,10,44},{132,0,743},{11,0,292},{4,10,309},{5,10,462}
+,{7,10,970},{135,10,1097},{22,10,30},{150,10,33},{139,11,338},{135,11,1598},{7,0
+,1283},{9,0,227},{11,0,325},{11,0,408},{14,0,180},{146,0,47},{4,0,953},{6,0,1805
+},{6,0,1814},{6,0,1862},{140,0,774},{6,11,611},{135,11,1733},{135,11,1464},{5,0,
+81},{7,0,146},{7,0,1342},{8,0,53},{8,0,561},{8,0,694},{8,0,754},{9,0,115},{9,0,
+179},{9,0,894},{10,0,462},{10,0,813},{11,0,230},{11,0,657},{11,0,699},{11,0,748}
+,{12,0,119},{12,0,200},{12,0,283},{142,0,273},{5,0,408},{6,0,789},{6,0,877},{6,0
+,1253},{6,0,1413},{137,0,747},{134,10,1704},{135,11,663},{6,0,1910},{6,0,1915},{
+6,0,1923},{9,0,913},{9,0,928},{9,0,950},{9,0,954},{9,0,978},{9,0,993},{12,0,812}
+,{12,0,819},{12,0,831},{12,0,833},{12,0,838},{12,0,909},{12,0,928},{12,0,931},{
+12,0,950},{15,0,186},{15,0,187},{15,0,195},{15,0,196},{15,0,209},{15,0,215},{15,
+0,236},{15,0,241},{15,0,249},{15,0,253},{18,0,180},{18,0,221},{18,0,224},{18,0,
+227},{18,0,229},{149,0,60},{7,0,1826},{135,0,1938},{11,0,490},{18,0,143},{5,10,
+86},{7,10,743},{9,10,85},{10,10,281},{10,10,432},{12,10,251},{13,10,118},{142,10
+,378},{5,10,524},{133,10,744},{141,11,442},{10,10,107},{140,10,436},{135,11,503}
+,{134,0,1162},{132,10,927},{7,0,30},{8,0,86},{8,0,315},{8,0,700},{9,0,576},{9,0,
+858},{10,0,414},{11,0,310},{11,0,888},{11,0,904},{12,0,361},{13,0,248},{13,0,371
+},{14,0,142},{12,10,670},{146,10,94},{134,0,721},{4,11,113},{5,11,163},{5,11,735
+},{7,11,1009},{7,10,1149},{9,11,9},{9,10,156},{9,11,771},{12,11,90},{13,11,138},
+{13,11,410},{143,11,128},{138,0,839},{133,10,778},{137,0,617},{133,10,502},{8,10
+,196},{10,10,283},{139,10,406},{6,0,428},{7,0,524},{8,0,169},{8,0,234},{9,0,480}
+,{138,0,646},{133,10,855},{134,0,1648},{7,0,1205},{138,0,637},{7,0,1596},{4,11,
+935},{133,11,823},{5,11,269},{7,11,434},{7,11,891},{8,11,339},{9,11,702},{11,11,
+594},{11,11,718},{145,11,100},{7,11,878},{9,11,485},{141,11,264},{4,0,266},{8,0,
+4},{9,0,39},{10,0,166},{11,0,918},{12,0,635},{20,0,10},{22,0,27},{22,0,43},{22,0
+,52},{134,11,1713},{7,10,1400},{9,10,446},{138,10,45},{135,11,900},{132,0,862},{
+134,0,1554},{135,11,1033},{19,0,16},{147,11,16},{135,11,1208},{7,0,157},{136,0,
+279},{6,0,604},{136,0,391},{13,10,455},{15,10,99},{15,10,129},{144,10,68},{135,
+10,172},{7,0,945},{11,0,713},{139,0,744},{4,0,973},{10,0,877},{10,0,937},{10,0,
+938},{140,0,711},{139,0,1022},{132,10,568},{142,11,143},{4,0,567},{9,0,859},{132
+,10,732},{7,0,1846},{136,0,628},{136,10,733},{133,0,762},{4,10,428},{135,10,1789
+},{10,0,784},{13,0,191},{7,10,2015},{140,10,665},{133,0,298},{7,0,633},{7,0,905}
+,{7,0,909},{7,0,1538},{9,0,767},{140,0,636},{138,10,806},{132,0,795},{139,0,301}
+,{135,0,1970},{5,11,625},{135,11,1617},{135,11,275},{7,11,37},{8,11,425},{8,11,
+693},{9,11,720},{10,11,380},{10,11,638},{11,11,273},{11,11,307},{11,11,473},{12,
+11,61},{143,11,43},{135,11,198},{134,0,1236},{7,0,369},{12,0,644},{12,0,645},{
+144,0,90},{19,0,15},{149,0,27},{6,0,71},{7,0,845},{8,0,160},{9,0,318},{6,10,1623
+},{134,10,1681},{134,0,1447},{134,0,1255},{138,0,735},{8,0,76},{132,11,168},{6,
+10,1748},{8,10,715},{9,10,802},{10,10,46},{10,10,819},{13,10,308},{14,10,351},{
+14,10,363},{146,10,67},{135,11,91},{6,0,474},{4,10,63},{133,10,347},{133,10,749}
+,{138,0,841},{133,10,366},{6,0,836},{132,11,225},{135,0,1622},{135,10,89},{140,0
+,735},{134,0,1601},{138,11,145},{6,0,1390},{137,0,804},{142,0,394},{6,11,15},{7,
+11,70},{10,11,240},{147,11,93},{6,0,96},{135,0,1426},{4,0,651},{133,0,289},{7,11
+,956},{7,10,977},{7,11,1157},{7,11,1506},{7,11,1606},{7,11,1615},{7,11,1619},{7,
+11,1736},{7,11,1775},{8,11,590},{9,11,324},{9,11,736},{9,11,774},{9,11,776},{9,
+11,784},{10,11,567},{10,11,708},{11,11,518},{11,11,613},{11,11,695},{11,11,716},
+{11,11,739},{11,11,770},{11,11,771},{11,11,848},{11,11,857},{11,11,931},{11,11,
+947},{12,11,326},{12,11,387},{12,11,484},{12,11,528},{12,11,552},{12,11,613},{13
+,11,189},{13,11,256},{13,11,340},{13,11,432},{13,11,436},{13,11,440},{13,11,454}
+,{14,11,174},{14,11,220},{14,11,284},{14,11,390},{145,11,121},{7,0,688},{8,0,35}
+,{9,0,511},{10,0,767},{147,0,118},{134,0,667},{4,0,513},{5,10,824},{133,10,941},
+{7,10,440},{8,10,230},{139,10,106},{134,0,2034},{135,11,1399},{143,11,66},{135,
+11,1529},{4,11,145},{6,11,176},{7,11,395},{9,11,562},{144,11,28},{132,11,501},{
+132,0,704},{134,0,1524},{7,0,1078},{134,11,464},{6,11,509},{10,11,82},{20,11,91}
+,{151,11,13},{4,0,720},{133,0,306},{133,0,431},{7,0,1196},{4,10,914},{5,10,800},
+{133,10,852},{135,11,1189},{10,0,54},{141,10,115},{7,10,564},{142,10,168},{5,0,
+464},{6,0,236},{7,0,696},{7,0,914},{7,0,1108},{7,0,1448},{9,0,15},{9,0,564},{10,
+0,14},{12,0,565},{13,0,449},{14,0,53},{15,0,13},{16,0,64},{17,0,41},{4,10,918},{
+133,10,876},{6,0,1418},{134,10,1764},{4,10,92},{133,10,274},{134,0,907},{4,11,
+114},{8,10,501},{9,11,492},{13,11,462},{142,11,215},{4,11,77},{5,11,361},{6,11,
+139},{6,11,401},{6,11,404},{7,11,413},{7,11,715},{7,11,1716},{11,11,279},{12,11,
+179},{12,11,258},{13,11,244},{142,11,358},{6,0,1767},{12,0,194},{145,0,107},{134
+,11,1717},{5,10,743},{142,11,329},{4,10,49},{7,10,280},{135,10,1633},{5,0,840},{
+7,11,1061},{8,11,82},{11,11,250},{12,11,420},{141,11,184},{135,11,724},{134,0,
+900},{136,10,47},{134,0,1436},{144,11,0},{6,0,675},{7,0,1008},{7,0,1560},{9,0,
+642},{11,0,236},{14,0,193},{5,10,272},{5,10,908},{5,10,942},{8,10,197},{9,10,47}
+,{11,10,538},{139,10,742},{4,0,68},{5,0,628},{5,0,634},{6,0,386},{7,0,794},{8,0,
+273},{9,0,563},{10,0,105},{10,0,171},{11,0,94},{139,0,354},{135,10,1911},{137,10
+,891},{4,0,95},{6,0,1297},{6,0,1604},{7,0,416},{139,0,830},{6,11,513},{135,11,
+1052},{7,0,731},{13,0,20},{143,0,11},{137,11,899},{10,0,850},{140,0,697},{4,0,
+662},{7,11,1417},{12,11,382},{17,11,48},{152,11,12},{133,0,736},{132,0,861},{4,
+10,407},{132,10,560},{141,10,490},{6,11,545},{7,11,565},{7,11,1669},{10,11,114},
+{11,11,642},{140,11,618},{6,0,871},{134,0,1000},{5,0,864},{10,0,648},{11,0,671},
+{15,0,46},{133,11,5},{133,0,928},{11,0,90},{13,0,7},{4,10,475},{11,10,35},{13,10
+,71},{13,10,177},{142,10,422},{136,0,332},{135,11,192},{134,0,1055},{136,11,763}
+,{11,0,986},{140,0,682},{7,0,76},{8,0,44},{9,0,884},{10,0,580},{11,0,399},{11,0,
+894},{143,0,122},{135,11,1237},{135,10,636},{11,0,300},{6,10,222},{7,10,1620},{8
+,10,409},{137,10,693},{4,11,87},{5,11,250},{10,11,601},{13,11,298},{13,11,353},{
+141,11,376},{5,0,518},{10,0,340},{11,0,175},{149,0,16},{140,0,771},{6,0,1108},{
+137,0,831},{132,0,836},{135,0,1852},{4,0,957},{6,0,1804},{8,0,842},{8,0,843},{8,
+0,851},{8,0,855},{140,0,767},{135,11,814},{4,11,57},{7,11,1195},{7,11,1438},{7,
+11,1548},{7,11,1835},{7,11,1904},{9,11,757},{10,11,604},{139,11,519},{133,10,882
+},{138,0,246},{4,0,934},{5,0,202},{8,0,610},{7,11,1897},{12,11,290},{13,11,80},{
+13,11,437},{145,11,74},{8,0,96},{9,0,36},{10,0,607},{10,0,804},{10,0,832},{11,0,
+423},{11,0,442},{12,0,309},{14,0,199},{15,0,90},{145,0,110},{132,10,426},{7,0,
+654},{8,0,240},{6,10,58},{7,10,745},{7,10,1969},{8,10,675},{9,10,479},{9,10,731}
+,{10,10,330},{10,10,593},{10,10,817},{11,10,32},{11,10,133},{11,10,221},{145,10,
+68},{9,0,13},{9,0,398},{9,0,727},{10,0,75},{10,0,184},{10,0,230},{10,0,564},{10,
+0,569},{11,0,973},{12,0,70},{12,0,189},{13,0,57},{141,0,257},{4,11,209},{135,11,
+902},{7,0,391},{137,10,538},{134,0,403},{6,11,303},{7,11,335},{7,11,1437},{7,11,
+1668},{8,11,553},{8,11,652},{8,11,656},{9,11,558},{11,11,743},{149,11,18},{132,
+11,559},{11,0,75},{142,0,267},{6,0,815},{141,11,2},{141,0,366},{137,0,631},{133,
+11,1017},{5,0,345},{135,0,1016},{133,11,709},{134,11,1745},{133,10,566},{7,0,952
+},{6,10,48},{9,10,139},{10,10,399},{11,10,469},{12,10,634},{141,10,223},{133,0,
+673},{9,0,850},{7,11,8},{136,11,206},{6,0,662},{149,0,35},{4,0,287},{133,0,1018}
+,{6,10,114},{7,10,1224},{7,10,1556},{136,10,3},{8,10,576},{137,10,267},{4,0,884}
+,{5,0,34},{10,0,724},{12,0,444},{13,0,354},{18,0,32},{23,0,24},{23,0,31},{152,0,
+5},{133,10,933},{132,11,776},{138,0,151},{136,0,427},{134,0,382},{132,0,329},{9,
+0,846},{10,0,827},{138,11,33},{9,0,279},{10,0,407},{14,0,84},{22,0,18},{135,11,
+1297},{136,11,406},{132,0,906},{136,0,366},{134,0,843},{134,0,1443},{135,0,1372}
+,{138,0,992},{4,0,123},{5,0,605},{7,0,1509},{136,0,36},{132,0,649},{8,11,175},{
+10,11,168},{138,11,573},{133,0,767},{134,0,1018},{135,11,1305},{12,10,30},{13,10
+,148},{14,10,87},{14,10,182},{16,10,42},{148,10,70},{134,11,607},{4,0,273},{5,0,
+658},{133,0,995},{6,0,72},{139,11,174},{10,0,483},{12,0,368},{7,10,56},{7,10,
+1989},{8,10,337},{8,10,738},{9,10,600},{13,10,447},{142,10,92},{5,11,784},{138,
+10,666},{135,0,1345},{139,11,882},{134,0,1293},{133,0,589},{134,0,1988},{5,0,117
+},{6,0,514},{6,0,541},{7,0,1164},{7,0,1436},{8,0,220},{8,0,648},{10,0,688},{139,
+0,560},{136,0,379},{5,0,686},{7,10,866},{135,10,1163},{132,10,328},{9,11,14},{9,
+11,441},{10,11,306},{139,11,9},{4,10,101},{135,10,1171},{5,10,833},{136,10,744},
+{5,11,161},{7,11,839},{135,11,887},{7,0,196},{10,0,765},{11,0,347},{11,0,552},{
+11,0,790},{12,0,263},{13,0,246},{13,0,270},{13,0,395},{14,0,176},{14,0,190},{14,
+0,398},{14,0,412},{15,0,32},{15,0,63},{16,0,88},{147,0,105},{6,10,9},{6,10,397},
+{7,10,53},{7,10,1742},{10,10,632},{11,10,828},{140,10,146},{5,0,381},{135,0,1792
+},{134,0,1452},{135,11,429},{8,0,367},{10,0,760},{14,0,79},{20,0,17},{152,0,0},{
+7,0,616},{138,0,413},{11,10,417},{12,10,223},{140,10,265},{7,11,1611},{13,11,14}
+,{15,11,44},{19,11,13},{148,11,76},{135,0,1229},{6,0,120},{7,0,1188},{7,0,1710},
+{8,0,286},{9,0,667},{11,0,592},{139,0,730},{135,11,1814},{135,0,1146},{4,10,186}
+,{5,10,157},{8,10,168},{138,10,6},{4,0,352},{135,0,687},{4,0,192},{5,0,49},{6,0,
+200},{6,0,293},{6,0,1696},{135,0,1151},{133,10,875},{5,10,773},{5,10,991},{6,10,
+1635},{134,10,1788},{7,10,111},{136,10,581},{6,0,935},{134,0,1151},{134,0,1050},
+{132,0,650},{132,0,147},{11,0,194},{12,0,62},{12,0,88},{11,11,194},{12,11,62},{
+140,11,88},{6,0,339},{135,0,923},{134,10,1747},{7,11,643},{136,11,236},{133,0,
+934},{7,10,1364},{7,10,1907},{141,10,158},{132,10,659},{4,10,404},{135,10,675},{
+7,11,581},{9,11,644},{137,11,699},{13,0,211},{14,0,133},{14,0,204},{15,0,64},{15
+,0,69},{15,0,114},{16,0,10},{19,0,23},{19,0,35},{19,0,39},{19,0,51},{19,0,71},{
+19,0,75},{152,0,15},{133,10,391},{5,11,54},{135,11,1513},{7,0,222},{8,0,341},{5,
+10,540},{134,10,1697},{134,10,78},{132,11,744},{136,0,293},{137,11,701},{7,11,
+930},{10,11,402},{10,11,476},{13,11,452},{18,11,55},{147,11,104},{132,0,637},{
+133,10,460},{8,11,50},{137,11,624},{132,11,572},{134,0,1159},{4,10,199},{139,10,
+34},{134,0,847},{134,10,388},{6,11,43},{7,11,38},{8,11,248},{9,11,504},{138,11,
+513},{9,0,683},{4,10,511},{6,10,608},{9,10,333},{10,10,602},{11,10,441},{11,10,
+723},{11,10,976},{140,10,357},{9,0,867},{138,0,837},{6,0,944},{135,11,326},{135,
+0,1809},{5,10,938},{7,11,783},{136,10,707},{133,11,766},{133,11,363},{6,0,170},{
+7,0,1080},{8,0,395},{8,0,487},{141,0,147},{6,11,258},{140,11,409},{4,0,535},{8,0
+,618},{5,11,249},{148,11,82},{6,0,1379},{149,11,15},{135,0,1625},{150,0,23},{5,
+11,393},{6,11,378},{7,11,1981},{9,11,32},{9,11,591},{10,11,685},{10,11,741},{142
+,11,382},{133,11,788},{7,11,1968},{10,11,19},{139,11,911},{7,11,1401},{135,11,
+1476},{4,11,61},{5,11,58},{5,11,171},{5,11,635},{5,11,683},{5,11,700},{6,11,291}
+,{6,11,566},{7,11,1650},{11,11,523},{12,11,273},{12,11,303},{15,11,39},{143,11,
+111},{6,10,469},{7,10,1709},{138,10,515},{4,0,778},{134,11,589},{132,0,46},{5,0,
+811},{6,0,1679},{6,0,1714},{135,0,2032},{7,0,1458},{9,0,407},{11,0,15},{12,0,651
+},{149,0,37},{7,0,938},{132,10,500},{6,0,34},{7,0,69},{7,0,1089},{7,0,1281},{8,0
+,708},{8,0,721},{9,0,363},{148,0,98},{10,11,231},{147,11,124},{7,11,726},{152,11
+,9},{5,10,68},{134,10,383},{136,11,583},{4,11,917},{133,11,1005},{11,10,216},{
+139,10,340},{135,11,1675},{8,0,441},{10,0,314},{143,0,3},{132,11,919},{4,10,337}
+,{6,10,353},{7,10,1934},{8,10,488},{137,10,429},{7,0,889},{7,10,1795},{8,10,259}
+,{9,10,135},{9,10,177},{9,10,860},{10,10,825},{11,10,115},{11,10,370},{11,10,405
+},{11,10,604},{12,10,10},{12,10,667},{12,10,669},{13,10,76},{14,10,310},{15,10,
+76},{15,10,147},{148,10,23},{4,10,15},{4,11,255},{5,10,22},{5,11,302},{6,11,132}
+,{6,10,244},{7,10,40},{7,11,128},{7,10,200},{7,11,283},{7,10,906},{7,10,1199},{7
+,11,1299},{9,10,616},{10,11,52},{10,11,514},{10,10,716},{11,10,635},{11,10,801},
+{11,11,925},{12,10,458},{13,11,92},{142,11,309},{132,0,462},{137,11,173},{135,10
+,1735},{8,0,525},{5,10,598},{7,10,791},{8,10,108},{137,10,123},{5,0,73},{6,0,23}
+,{134,0,338},{132,0,676},{132,10,683},{7,0,725},{8,0,498},{139,0,268},{12,0,21},
+{151,0,7},{135,0,773},{4,10,155},{135,10,1689},{4,0,164},{5,0,730},{5,10,151},{5
+,10,741},{6,11,210},{7,10,498},{7,10,870},{7,10,1542},{12,10,213},{14,10,36},{14
+,10,391},{17,10,111},{18,10,6},{18,10,46},{18,10,151},{19,10,36},{20,10,32},{20,
+10,56},{20,10,69},{20,10,102},{21,10,4},{22,10,8},{22,10,10},{22,10,14},{150,10,
+31},{4,10,624},{135,10,1752},{4,0,583},{9,0,936},{15,0,214},{18,0,199},{24,0,26}
+,{134,11,588},{7,0,1462},{11,0,659},{4,11,284},{134,11,223},{133,0,220},{139,0,
+803},{132,0,544},{4,10,492},{133,10,451},{16,0,98},{148,0,119},{4,11,218},{7,11,
+526},{143,11,137},{135,10,835},{4,11,270},{5,11,192},{6,11,332},{7,11,1322},{13,
+11,9},{13,10,70},{14,11,104},{142,11,311},{132,10,539},{140,11,661},{5,0,176},{6
+,0,437},{6,0,564},{11,0,181},{141,0,183},{135,0,1192},{6,10,113},{135,10,436},{
+136,10,718},{135,10,520},{135,0,1878},{140,11,196},{7,11,379},{8,11,481},{137,11
+,377},{5,11,1003},{6,11,149},{137,11,746},{8,11,262},{9,11,627},{10,11,18},{11,
+11,214},{11,11,404},{11,11,457},{11,11,780},{11,11,849},{11,11,913},{13,11,330},
+{13,11,401},{142,11,200},{149,0,26},{136,11,304},{132,11,142},{135,0,944},{4,0,
+790},{5,0,273},{134,0,394},{134,0,855},{4,0,135},{6,0,127},{7,0,1185},{7,0,1511}
+,{8,0,613},{11,0,5},{12,0,336},{12,0,495},{12,0,586},{12,0,660},{12,0,668},{14,0
+,385},{15,0,118},{17,0,20},{146,0,98},{6,0,230},{9,0,752},{18,0,109},{12,10,610}
+,{13,10,431},{144,10,59},{7,0,1954},{135,11,925},{4,11,471},{5,11,51},{6,11,602}
+,{8,11,484},{10,11,195},{140,11,159},{132,10,307},{136,11,688},{132,11,697},{7,
+11,812},{7,11,1261},{7,11,1360},{9,11,632},{140,11,352},{5,0,162},{8,0,68},{133,
+10,964},{4,0,654},{136,11,212},{4,0,156},{7,0,998},{7,0,1045},{7,0,1860},{9,0,48
+},{9,0,692},{11,0,419},{139,0,602},{133,11,221},{4,11,373},{5,11,283},{6,11,480}
+,{135,11,609},{142,11,216},{132,0,240},{6,11,192},{9,11,793},{145,11,55},{4,10,
+75},{5,10,180},{6,10,500},{7,10,58},{7,10,710},{138,10,645},{4,11,132},{5,11,69}
+,{5,10,649},{135,11,1242},{6,10,276},{7,10,282},{7,10,879},{7,10,924},{8,10,459}
+,{9,10,599},{9,10,754},{11,10,574},{12,10,128},{12,10,494},{13,10,52},{13,10,301
+},{15,10,30},{143,10,132},{132,10,200},{4,11,111},{135,11,302},{9,0,197},{10,0,
+300},{12,0,473},{13,0,90},{141,0,405},{132,11,767},{6,11,42},{7,11,1416},{7,11,
+1590},{7,11,2005},{8,11,131},{8,11,466},{9,11,672},{13,11,252},{148,11,103},{8,0
+,958},{8,0,999},{10,0,963},{138,0,1001},{135,10,1621},{135,0,858},{4,0,606},{137
+,11,444},{6,11,44},{136,11,368},{139,11,172},{4,11,570},{133,11,120},{139,11,624
+},{7,0,1978},{8,0,676},{6,10,225},{137,10,211},{7,0,972},{11,0,102},{136,10,687}
+,{6,11,227},{135,11,1589},{8,10,58},{9,10,724},{11,10,809},{13,10,113},{145,10,
+72},{4,0,361},{133,0,315},{132,0,461},{6,10,345},{135,10,1247},{132,0,472},{8,10
+,767},{8,10,803},{9,10,301},{137,10,903},{135,11,1333},{135,11,477},{7,10,1949},
+{136,10,674},{6,0,905},{138,0,747},{133,0,155},{134,10,259},{7,0,163},{8,0,319},
+{9,0,402},{10,0,24},{10,0,681},{11,0,200},{12,0,253},{12,0,410},{142,0,219},{5,0
+,475},{7,0,1780},{9,0,230},{11,0,297},{11,0,558},{14,0,322},{19,0,76},{6,11,1667
+},{7,11,2036},{138,11,600},{136,10,254},{6,0,848},{135,0,1956},{6,11,511},{140,
+11,132},{5,11,568},{6,11,138},{135,11,1293},{6,0,631},{137,0,838},{149,0,36},{4,
+11,565},{8,11,23},{136,11,827},{5,0,944},{134,0,1769},{4,0,144},{6,0,842},{6,0,
+1400},{4,11,922},{133,11,1023},{133,10,248},{9,10,800},{10,10,693},{11,10,482},{
+11,10,734},{139,10,789},{7,11,1002},{139,11,145},{4,10,116},{5,10,95},{5,10,445}
+,{7,10,1688},{8,10,29},{9,10,272},{11,10,509},{139,10,915},{14,0,369},{146,0,72}
+,{135,10,1641},{132,11,740},{133,10,543},{140,11,116},{6,0,247},{9,0,555},{5,10,
+181},{136,10,41},{133,10,657},{136,0,996},{138,10,709},{7,0,189},{8,10,202},{138
+,10,536},{136,11,402},{4,11,716},{141,11,31},{10,0,280},{138,0,797},{9,10,423},{
+140,10,89},{8,10,113},{9,10,877},{10,10,554},{11,10,83},{12,10,136},{147,10,109}
+,{133,10,976},{7,0,746},{132,10,206},{136,0,526},{139,0,345},{136,0,1017},{8,11,
+152},{9,11,53},{9,11,268},{9,11,901},{10,11,518},{10,11,829},{11,11,188},{13,11,
+74},{14,11,46},{15,11,17},{15,11,33},{17,11,40},{18,11,36},{19,11,20},{22,11,1},
+{152,11,2},{133,11,736},{136,11,532},{5,0,428},{138,0,651},{135,11,681},{135,0,
+1162},{7,0,327},{13,0,230},{17,0,113},{8,10,226},{10,10,537},{11,10,570},{11,10,
+605},{11,10,799},{11,10,804},{12,10,85},{12,10,516},{12,10,623},{12,11,677},{13,
+10,361},{14,10,77},{14,10,78},{147,10,110},{4,0,792},{7,0,1717},{10,0,546},{132,
+10,769},{4,11,684},{136,11,384},{132,10,551},{134,0,1203},{9,10,57},{9,10,459},{
+10,10,425},{11,10,119},{12,10,184},{12,10,371},{13,10,358},{145,10,51},{5,0,672}
+,{5,10,814},{8,10,10},{9,10,421},{9,10,729},{10,10,609},{139,10,689},{138,0,189}
+,{134,10,624},{7,11,110},{7,11,188},{8,11,290},{8,11,591},{9,11,382},{9,11,649},
+{11,11,71},{11,11,155},{11,11,313},{12,11,5},{13,11,325},{142,11,287},{133,0,99}
+,{6,0,1053},{135,0,298},{7,11,360},{7,11,425},{9,11,66},{9,11,278},{138,11,644},
+{4,0,397},{136,0,555},{137,10,269},{132,10,528},{4,11,900},{133,11,861},{6,0,
+1157},{5,11,254},{7,11,985},{136,11,73},{7,11,1959},{136,11,683},{12,0,398},{20,
+0,39},{21,0,11},{150,0,41},{4,0,485},{7,0,353},{135,0,1523},{6,0,366},{7,0,1384}
+,{135,0,1601},{138,0,787},{137,0,282},{5,10,104},{6,10,173},{135,10,1631},{139,
+11,146},{4,0,157},{133,0,471},{134,0,941},{132,11,725},{7,0,1336},{8,10,138},{8,
+10,342},{9,10,84},{10,10,193},{11,10,883},{140,10,359},{134,11,196},{136,0,116},
+{133,11,831},{134,0,787},{134,10,95},{6,10,406},{10,10,409},{10,10,447},{11,10,
+44},{140,10,100},{5,0,160},{7,0,363},{7,0,589},{10,0,170},{141,0,55},{134,0,1815
+},{132,0,866},{6,0,889},{6,0,1067},{6,0,1183},{4,11,321},{134,11,569},{5,11,848}
+,{134,11,66},{4,11,36},{6,10,1636},{7,11,1387},{10,11,205},{11,11,755},{141,11,
+271},{132,0,689},{9,0,820},{4,10,282},{7,10,1034},{11,10,398},{11,10,634},{12,10
+,1},{12,10,79},{12,10,544},{14,10,237},{17,10,10},{146,10,20},{4,0,108},{7,0,804
+},{139,0,498},{132,11,887},{6,0,1119},{135,11,620},{6,11,165},{138,11,388},{5,0,
+244},{5,10,499},{6,10,476},{7,10,600},{7,10,888},{135,10,1096},{140,0,609},{135,
+0,1005},{4,0,412},{133,0,581},{4,11,719},{135,11,155},{7,10,296},{7,10,596},{8,
+10,560},{8,10,586},{9,10,612},{11,10,304},{12,10,46},{13,10,89},{14,10,112},{145
+,10,122},{4,0,895},{133,0,772},{142,11,307},{135,0,1898},{4,0,926},{133,0,983},{
+4,11,353},{6,11,146},{6,11,1789},{7,11,288},{7,11,990},{7,11,1348},{9,11,665},{9
+,11,898},{11,11,893},{142,11,212},{132,0,538},{133,11,532},{6,0,294},{7,0,1267},
+{8,0,624},{141,0,496},{7,0,1325},{4,11,45},{135,11,1257},{138,0,301},{9,0,298},{
+12,0,291},{13,0,276},{14,0,6},{17,0,18},{21,0,32},{7,10,1599},{7,10,1723},{8,10,
+79},{8,10,106},{8,10,190},{8,10,302},{8,10,383},{8,10,713},{9,10,119},{9,10,233}
+,{9,10,419},{9,10,471},{10,10,181},{10,10,406},{11,10,57},{11,10,85},{11,10,120}
+,{11,10,177},{11,10,296},{11,10,382},{11,10,454},{11,10,758},{11,10,999},{12,10,
+27},{12,10,131},{12,10,245},{12,10,312},{12,10,446},{12,10,454},{13,10,98},{13,
+10,426},{13,10,508},{14,10,163},{14,10,272},{14,10,277},{14,10,370},{15,10,95},{
+15,10,138},{15,10,167},{17,10,38},{148,10,96},{132,0,757},{134,0,1263},{4,0,820}
+,{134,10,1759},{133,0,722},{136,11,816},{138,10,372},{145,10,16},{134,0,1039},{4
+,0,991},{134,0,2028},{133,10,258},{7,0,1875},{139,0,124},{6,11,559},{6,11,1691},
+{135,11,586},{5,0,324},{7,0,881},{8,10,134},{9,10,788},{140,10,438},{7,11,1823},
+{139,11,693},{6,0,1348},{134,0,1545},{134,0,911},{132,0,954},{8,0,329},{8,0,414}
+,{7,10,1948},{135,10,2004},{5,0,517},{6,10,439},{7,10,780},{135,10,1040},{132,0,
+816},{5,10,1},{6,10,81},{138,10,520},{9,0,713},{10,0,222},{5,10,482},{8,10,98},{
+10,10,700},{10,10,822},{11,10,302},{11,10,778},{12,10,50},{12,10,127},{12,10,396
+},{13,10,62},{13,10,328},{14,10,122},{147,10,72},{137,0,33},{5,10,2},{7,10,1494}
+,{136,10,589},{6,10,512},{7,10,797},{8,10,253},{9,10,77},{10,10,1},{10,11,108},{
+10,10,129},{10,10,225},{11,11,116},{11,10,118},{11,10,226},{11,10,251},{11,10,
+430},{11,10,701},{11,10,974},{11,10,982},{12,10,64},{12,10,260},{12,10,488},{140
+,10,690},{134,11,456},{133,11,925},{5,0,150},{7,0,106},{7,0,774},{8,0,603},{9,0,
+593},{9,0,634},{10,0,44},{10,0,173},{11,0,462},{11,0,515},{13,0,216},{13,0,288},
+{142,0,400},{137,10,347},{5,0,748},{134,0,553},{12,0,108},{141,0,291},{7,0,420},
+{4,10,12},{7,10,522},{7,10,809},{8,10,797},{141,10,88},{6,11,193},{7,11,240},{7,
+11,1682},{10,11,51},{10,11,640},{11,11,410},{13,11,82},{14,11,247},{14,11,331},{
+142,11,377},{133,10,528},{135,0,1777},{4,0,493},{144,0,55},{136,11,633},{139,0,
+81},{6,0,980},{136,0,321},{148,10,109},{5,10,266},{9,10,290},{9,10,364},{10,10,
+293},{11,10,606},{142,10,45},{6,0,568},{7,0,112},{7,0,1804},{8,0,362},{8,0,410},
+{8,0,830},{9,0,514},{11,0,649},{142,0,157},{4,0,74},{6,0,510},{6,10,594},{9,10,
+121},{10,10,49},{10,10,412},{139,10,834},{134,0,838},{136,10,748},{132,10,466},{
+132,0,625},{135,11,1443},{4,11,237},{135,11,514},{9,10,378},{141,10,162},{6,0,16
+},{6,0,158},{7,0,43},{7,0,129},{7,0,181},{8,0,276},{8,0,377},{10,0,523},{11,0,
+816},{12,0,455},{13,0,303},{142,0,135},{135,0,281},{4,0,1},{7,0,1143},{7,0,1463}
+,{8,0,61},{9,0,207},{9,0,390},{9,0,467},{139,0,836},{6,11,392},{7,11,65},{135,11
+,2019},{132,10,667},{4,0,723},{5,0,895},{7,0,1031},{8,0,199},{8,0,340},{9,0,153}
+,{9,0,215},{10,0,21},{10,0,59},{10,0,80},{10,0,224},{10,0,838},{11,0,229},{11,0,
+652},{12,0,192},{13,0,146},{142,0,91},{132,0,295},{137,0,51},{9,11,222},{10,11,
+43},{139,11,900},{5,0,309},{140,0,211},{5,0,125},{8,0,77},{138,0,15},{136,11,604
+},{138,0,789},{5,0,173},{4,10,39},{7,10,1843},{8,10,407},{11,10,144},{140,10,523
+},{138,11,265},{133,0,439},{132,10,510},{7,0,648},{7,0,874},{11,0,164},{12,0,76}
+,{18,0,9},{7,10,1980},{10,10,487},{138,10,809},{12,0,111},{14,0,294},{19,0,45},{
+13,10,260},{146,10,63},{133,11,549},{134,10,570},{4,0,8},{7,0,1152},{7,0,1153},{
+7,0,1715},{9,0,374},{10,0,478},{139,0,648},{135,0,1099},{5,0,575},{6,0,354},{135
+,0,701},{7,11,36},{8,11,201},{136,11,605},{4,10,787},{136,11,156},{6,0,518},{149
+,11,13},{140,11,224},{134,0,702},{132,10,516},{5,11,724},{10,11,305},{11,11,151}
+,{12,11,33},{12,11,121},{12,11,381},{17,11,3},{17,11,27},{17,11,78},{18,11,18},{
+19,11,54},{149,11,5},{8,0,87},{4,11,523},{5,11,638},{11,10,887},{14,10,365},{142
+,10,375},{138,0,438},{136,10,821},{135,11,1908},{6,11,242},{7,11,227},{7,11,1581
+},{8,11,104},{9,11,113},{9,11,220},{9,11,427},{10,11,74},{10,11,239},{11,11,579}
+,{11,11,1023},{13,11,4},{13,11,204},{13,11,316},{18,11,95},{148,11,86},{4,0,69},
+{5,0,122},{5,0,849},{6,0,1633},{9,0,656},{138,0,464},{7,0,1802},{4,10,10},{139,
+10,786},{135,11,861},{139,0,499},{7,0,476},{7,0,1592},{138,0,87},{133,10,684},{4
+,0,840},{134,10,27},{142,0,283},{6,0,1620},{7,11,1328},{136,11,494},{5,0,859},{7
+,0,1160},{8,0,107},{9,0,291},{9,0,439},{10,0,663},{11,0,609},{140,0,197},{7,11,
+1306},{8,11,505},{9,11,482},{10,11,126},{11,11,225},{12,11,347},{12,11,449},{13,
+11,19},{142,11,218},{5,11,268},{10,11,764},{12,11,120},{13,11,39},{145,11,127},{
+145,10,56},{7,11,1672},{10,11,472},{11,11,189},{143,11,51},{6,10,342},{6,10,496}
+,{8,10,275},{137,10,206},{133,0,600},{4,0,117},{6,0,372},{7,0,1905},{142,0,323},
+{4,10,909},{5,10,940},{135,11,1471},{132,10,891},{4,0,722},{139,0,471},{4,11,384
+},{135,11,1022},{132,10,687},{9,0,5},{12,0,216},{12,0,294},{12,0,298},{12,0,400}
+,{12,0,518},{13,0,229},{143,0,139},{135,11,1703},{7,11,1602},{10,11,698},{12,11,
+212},{141,11,307},{6,10,41},{141,10,160},{135,11,1077},{9,11,159},{11,11,28},{
+140,11,603},{4,0,514},{7,0,1304},{138,0,477},{134,0,1774},{9,0,88},{139,0,270},{
+5,0,12},{7,0,375},{9,0,438},{134,10,1718},{132,11,515},{136,10,778},{8,11,632},{
+8,11,697},{137,11,854},{6,0,362},{6,0,997},{146,0,51},{7,0,816},{7,0,1241},{9,0,
+283},{9,0,520},{10,0,213},{10,0,307},{10,0,463},{10,0,671},{10,0,746},{11,0,401}
+,{11,0,794},{12,0,517},{18,0,107},{147,0,115},{133,10,115},{150,11,28},{4,11,136
+},{133,11,551},{142,10,314},{132,0,258},{6,0,22},{7,0,903},{7,0,1963},{8,0,639},
+{138,0,577},{5,0,681},{8,0,782},{13,0,130},{17,0,84},{5,10,193},{140,10,178},{9,
+11,17},{138,11,291},{7,11,1287},{9,11,44},{10,11,552},{10,11,642},{11,11,839},{
+12,11,274},{12,11,275},{12,11,372},{13,11,91},{142,11,125},{135,10,174},{4,0,664
+},{5,0,804},{139,0,1013},{134,0,942},{6,0,1349},{6,0,1353},{6,0,1450},{7,11,1518
+},{139,11,694},{11,0,356},{4,10,122},{5,10,796},{5,10,952},{6,10,1660},{6,10,
+1671},{8,10,567},{9,10,687},{9,10,742},{10,10,686},{11,10,682},{140,10,281},{5,0
+,32},{6,11,147},{7,11,886},{9,11,753},{138,11,268},{5,10,179},{7,10,1095},{135,
+10,1213},{4,10,66},{7,10,722},{135,10,904},{135,10,352},{9,11,245},{138,11,137},
+{4,0,289},{7,0,629},{7,0,1698},{7,0,1711},{12,0,215},{133,11,414},{6,0,1975},{
+135,11,1762},{6,0,450},{136,0,109},{141,10,35},{134,11,599},{136,0,705},{133,0,
+664},{134,11,1749},{11,11,402},{12,11,109},{12,11,431},{13,11,179},{13,11,206},{
+14,11,175},{14,11,217},{16,11,3},{148,11,53},{135,0,1238},{134,11,1627},{132,11,
+488},{13,0,318},{10,10,592},{10,10,753},{12,10,317},{12,10,355},{12,10,465},{12,
+10,469},{12,10,560},{140,10,578},{133,10,564},{132,11,83},{140,11,676},{6,0,1872
+},{6,0,1906},{6,0,1907},{9,0,934},{9,0,956},{9,0,960},{9,0,996},{12,0,794},{12,0
+,876},{12,0,880},{12,0,918},{15,0,230},{18,0,234},{18,0,238},{21,0,38},{149,0,62
+},{134,10,556},{134,11,278},{137,0,103},{7,10,544},{8,10,719},{138,10,61},{4,10,
+5},{5,10,498},{8,10,637},{137,10,521},{7,0,777},{12,0,229},{12,0,239},{15,0,12},
+{12,11,229},{12,11,239},{143,11,12},{6,0,26},{7,11,388},{7,11,644},{139,11,781},
+{7,11,229},{8,11,59},{9,11,190},{9,11,257},{10,11,378},{140,11,191},{133,10,927}
+,{135,10,1441},{4,10,893},{5,10,780},{133,10,893},{4,0,414},{5,0,467},{9,0,654},
+{10,0,451},{12,0,59},{141,0,375},{142,0,173},{135,0,17},{7,0,1350},{133,10,238},
+{135,0,955},{4,0,960},{10,0,887},{12,0,753},{18,0,161},{18,0,162},{152,0,19},{
+136,11,344},{6,10,1729},{137,11,288},{132,11,660},{4,0,217},{5,0,710},{7,0,760},
+{7,0,1926},{9,0,428},{9,0,708},{10,0,254},{10,0,296},{10,0,720},{11,0,109},{11,0
+,255},{12,0,165},{12,0,315},{13,0,107},{13,0,203},{14,0,54},{14,0,99},{14,0,114}
+,{14,0,388},{16,0,85},{17,0,9},{17,0,33},{20,0,25},{20,0,28},{20,0,29},{21,0,9},
+{21,0,10},{21,0,34},{22,0,17},{4,10,60},{7,10,1800},{8,10,314},{9,10,700},{139,
+10,487},{7,11,1035},{138,11,737},{7,11,690},{9,11,217},{9,11,587},{140,11,521},{
+6,0,919},{7,11,706},{7,11,1058},{138,11,538},{7,10,1853},{138,10,437},{136,10,
+419},{6,0,280},{10,0,502},{11,0,344},{140,0,38},{5,0,45},{7,0,1161},{11,0,448},{
+11,0,880},{13,0,139},{13,0,407},{15,0,16},{17,0,95},{18,0,66},{18,0,88},{18,0,
+123},{149,0,7},{11,11,92},{11,11,196},{11,11,409},{11,11,450},{11,11,666},{11,11
+,777},{12,11,262},{13,11,385},{13,11,393},{15,11,115},{16,11,45},{145,11,82},{
+136,0,777},{134,11,1744},{4,0,410},{7,0,521},{133,10,828},{134,0,673},{7,0,1110}
+,{7,0,1778},{7,10,176},{135,10,178},{5,10,806},{7,11,268},{7,10,1976},{136,11,
+569},{4,11,733},{9,11,194},{10,11,92},{11,11,198},{12,11,84},{12,11,87},{13,11,
+128},{144,11,74},{5,0,341},{7,0,1129},{11,0,414},{4,10,51},{6,10,4},{7,10,591},{
+7,10,849},{7,10,951},{7,10,1613},{7,10,1760},{7,10,1988},{9,10,434},{10,10,754},
+{11,10,25},{139,10,37},{133,10,902},{135,10,928},{135,0,787},{132,0,436},{134,10
+,270},{7,0,1587},{135,0,1707},{6,0,377},{7,0,1025},{9,0,613},{145,0,104},{7,11,
+982},{7,11,1361},{10,11,32},{143,11,56},{139,0,96},{132,0,451},{132,10,416},{142
+,10,372},{5,10,152},{5,10,197},{7,11,306},{7,10,340},{7,10,867},{10,10,548},{10,
+10,581},{11,10,6},{12,10,3},{12,10,19},{14,10,110},{142,10,289},{134,0,680},{134
+,11,609},{7,0,483},{7,10,190},{8,10,28},{8,10,141},{8,10,444},{8,10,811},{9,10,
+468},{11,10,334},{12,10,24},{12,10,386},{140,10,576},{10,0,916},{133,10,757},{5,
+10,721},{135,10,1553},{133,11,178},{134,0,937},{132,10,898},{133,0,739},{147,0,
+82},{135,0,663},{146,0,128},{5,10,277},{141,10,247},{134,0,1087},{132,10,435},{6
+,11,381},{7,11,645},{7,11,694},{136,11,546},{7,0,503},{135,0,1885},{6,0,1965},{8
+,0,925},{138,0,955},{4,0,113},{5,0,163},{5,0,735},{7,0,1009},{9,0,9},{9,0,771},{
+12,0,90},{13,0,138},{13,0,410},{143,0,128},{4,0,324},{138,0,104},{7,0,460},{5,10
+,265},{134,10,212},{133,11,105},{7,11,261},{7,11,1107},{7,11,1115},{7,11,1354},{
+7,11,1588},{7,11,1705},{7,11,1902},{9,11,465},{10,11,248},{10,11,349},{10,11,647
+},{11,11,527},{11,11,660},{11,11,669},{12,11,529},{141,11,305},{5,11,438},{9,11,
+694},{12,11,627},{141,11,210},{152,11,11},{4,0,935},{133,0,823},{132,10,702},{5,
+0,269},{7,0,434},{7,0,891},{8,0,339},{9,0,702},{11,0,594},{11,0,718},{17,0,100},
+{5,10,808},{135,10,2045},{7,0,1014},{9,0,485},{141,0,264},{134,0,1713},{7,0,1810
+},{11,0,866},{12,0,103},{13,0,495},{140,11,233},{4,0,423},{10,0,949},{138,0,1013
+},{135,0,900},{8,11,25},{138,11,826},{5,10,166},{8,10,739},{140,10,511},{134,0,
+2018},{7,11,1270},{139,11,612},{4,10,119},{5,10,170},{5,10,447},{7,10,1708},{7,
+10,1889},{9,10,357},{9,10,719},{12,10,486},{140,10,596},{12,0,574},{140,11,574},
+{132,11,308},{6,0,964},{6,0,1206},{134,0,1302},{4,10,450},{135,10,1158},{135,11,
+150},{136,11,649},{14,0,213},{148,0,38},{9,11,45},{9,11,311},{141,11,42},{134,11
+,521},{7,10,1375},{7,10,1466},{138,10,331},{132,10,754},{5,11,339},{7,11,1442},{
+14,11,3},{15,11,41},{147,11,66},{136,11,378},{134,0,1022},{5,10,850},{136,10,799
+},{142,0,143},{135,0,2029},{134,11,1628},{8,0,523},{150,0,34},{5,0,625},{135,0,
+1617},{7,0,275},{7,10,238},{7,10,2033},{8,10,120},{8,10,188},{8,10,659},{9,10,
+598},{10,10,466},{12,10,342},{12,10,588},{13,10,503},{14,10,246},{143,10,92},{7,
+0,37},{8,0,425},{8,0,693},{9,0,720},{10,0,380},{10,0,638},{11,0,273},{11,0,473},
+{12,0,61},{143,0,43},{135,11,829},{135,0,1943},{132,0,765},{5,11,486},{135,11,
+1349},{7,11,1635},{8,11,17},{10,11,217},{138,11,295},{4,10,201},{7,10,1744},{8,
+10,602},{11,10,247},{11,10,826},{145,10,65},{138,11,558},{11,0,551},{142,0,159},
+{8,10,164},{146,10,62},{139,11,176},{132,0,168},{136,0,1010},{134,0,1994},{135,0
+,91},{138,0,532},{135,10,1243},{135,0,1884},{132,10,907},{5,10,100},{10,10,329},
+{12,10,416},{149,10,29},{134,11,447},{132,10,176},{5,10,636},{5,10,998},{7,10,9}
+,{7,10,1508},{8,10,26},{9,10,317},{9,10,358},{10,10,210},{10,10,292},{10,10,533}
+,{11,10,555},{12,10,526},{12,10,607},{13,10,263},{13,10,459},{142,10,271},{4,11,
+609},{135,11,756},{6,0,15},{7,0,70},{10,0,240},{147,0,93},{4,11,930},{133,11,947
+},{134,0,1227},{134,0,1534},{133,11,939},{133,11,962},{5,11,651},{8,11,170},{9,
+11,61},{9,11,63},{10,11,23},{10,11,37},{10,11,834},{11,11,4},{11,11,187},{11,11,
+281},{11,11,503},{11,11,677},{12,11,96},{12,11,130},{12,11,244},{14,11,5},{14,11
+,40},{14,11,162},{14,11,202},{146,11,133},{4,11,406},{5,11,579},{12,11,492},{150
+,11,15},{139,0,392},{6,10,610},{10,10,127},{141,10,27},{7,0,655},{7,0,1844},{136
+,10,119},{4,0,145},{6,0,176},{7,0,395},{137,0,562},{132,0,501},{140,11,145},{136
+,0,1019},{134,0,509},{139,0,267},{6,11,17},{7,11,16},{7,11,1001},{7,11,1982},{9,
+11,886},{10,11,489},{10,11,800},{11,11,782},{12,11,320},{13,11,467},{14,11,145},
+{14,11,387},{143,11,119},{145,11,17},{6,0,1099},{133,11,458},{7,11,1983},{8,11,0
+},{8,11,171},{9,11,120},{9,11,732},{10,11,473},{11,11,656},{11,11,998},{18,11,0}
+,{18,11,2},{147,11,21},{12,11,427},{146,11,38},{10,0,948},{138,0,968},{7,10,126}
+,{136,10,84},{136,10,790},{4,0,114},{9,0,492},{13,0,462},{142,0,215},{6,10,64},{
+12,10,377},{141,10,309},{4,0,77},{5,0,361},{6,0,139},{6,0,401},{6,0,404},{7,0,
+413},{7,0,715},{7,0,1716},{11,0,279},{12,0,179},{12,0,258},{13,0,244},{142,0,358
+},{134,0,1717},{7,0,772},{7,0,1061},{7,0,1647},{8,0,82},{11,0,250},{11,0,607},{
+12,0,311},{12,0,420},{13,0,184},{13,0,367},{7,10,1104},{11,10,269},{11,10,539},{
+11,10,627},{11,10,706},{11,10,975},{12,10,248},{12,10,434},{12,10,600},{12,10,
+622},{13,10,297},{13,10,485},{14,10,69},{14,10,409},{143,10,108},{135,0,724},{4,
+11,512},{4,11,519},{133,11,342},{134,0,1133},{145,11,29},{11,10,977},{141,10,507
+},{6,0,841},{6,0,1042},{6,0,1194},{10,0,993},{140,0,1021},{6,11,31},{7,11,491},{
+7,11,530},{8,11,592},{9,10,34},{11,11,53},{11,10,484},{11,11,779},{12,11,167},{
+12,11,411},{14,11,14},{14,11,136},{15,11,72},{16,11,17},{144,11,72},{4,0,1021},{
+6,0,2037},{133,11,907},{7,0,373},{8,0,335},{8,0,596},{9,0,488},{6,10,1700},{7,10
+,293},{7,10,382},{7,10,1026},{7,10,1087},{7,10,2027},{8,10,252},{8,10,727},{8,10
+,729},{9,10,30},{9,10,199},{9,10,231},{9,10,251},{9,10,334},{9,10,361},{9,10,712
+},{10,10,55},{10,10,60},{10,10,232},{10,10,332},{10,10,384},{10,10,396},{10,10,
+504},{10,10,542},{10,10,652},{11,10,20},{11,10,48},{11,10,207},{11,10,291},{11,
+10,298},{11,10,342},{11,10,365},{11,10,394},{11,10,620},{11,10,705},{11,10,1017}
+,{12,10,123},{12,10,340},{12,10,406},{12,10,643},{13,10,61},{13,10,269},{13,10,
+311},{13,10,319},{13,10,486},{14,10,234},{15,10,62},{15,10,85},{16,10,71},{18,10
+,119},{148,10,105},{150,0,37},{4,11,208},{5,11,106},{6,11,531},{8,11,408},{9,11,
+188},{138,11,572},{132,0,564},{6,0,513},{135,0,1052},{132,0,825},{9,0,899},{140,
+11,441},{134,0,778},{133,11,379},{7,0,1417},{12,0,382},{17,0,48},{152,0,12},{132
+,11,241},{7,0,1116},{6,10,379},{7,10,270},{8,10,176},{8,10,183},{9,10,432},{9,10
+,661},{12,10,247},{12,10,617},{146,10,125},{5,10,792},{133,10,900},{6,0,545},{7,
+0,565},{7,0,1669},{10,0,114},{11,0,642},{140,0,618},{133,0,5},{138,11,7},{132,11
+,259},{135,0,192},{134,0,701},{136,0,763},{135,10,1979},{4,10,901},{133,10,776},
+{10,0,755},{147,0,29},{133,0,759},{4,11,173},{5,11,312},{5,11,512},{135,11,1285}
+,{7,11,1603},{7,11,1691},{9,11,464},{11,11,195},{12,11,279},{12,11,448},{14,11,
+11},{147,11,102},{7,0,370},{7,0,1007},{7,0,1177},{135,0,1565},{135,0,1237},{4,0,
+87},{5,0,250},{141,0,298},{4,11,452},{5,11,583},{5,11,817},{6,11,433},{7,11,593}
+,{7,11,720},{7,11,1378},{8,11,161},{9,11,284},{10,11,313},{139,11,886},{4,11,547
+},{135,11,1409},{136,11,722},{4,10,37},{5,10,334},{135,10,1253},{132,10,508},{12
+,0,107},{146,0,31},{8,11,420},{139,11,193},{135,0,814},{135,11,409},{140,0,991},
+{4,0,57},{7,0,1195},{7,0,1438},{7,0,1548},{7,0,1835},{7,0,1904},{9,0,757},{10,0,
+604},{139,0,519},{132,0,540},{138,11,308},{132,10,533},{136,0,608},{144,11,65},{
+4,0,1014},{134,0,2029},{4,0,209},{7,0,902},{5,11,1002},{136,11,745},{134,0,2030}
+,{6,0,303},{7,0,335},{7,0,1437},{7,0,1668},{8,0,553},{8,0,652},{8,0,656},{9,0,
+558},{11,0,743},{149,0,18},{5,11,575},{6,11,354},{135,11,701},{4,11,239},{6,11,
+477},{7,11,1607},{11,11,68},{139,11,617},{132,0,559},{8,0,527},{18,0,60},{147,0,
+24},{133,10,920},{138,0,511},{133,0,1017},{133,0,675},{138,10,391},{11,0,156},{
+135,10,1952},{138,11,369},{132,11,367},{133,0,709},{6,0,698},{134,0,887},{142,10
+,126},{134,0,1745},{132,10,483},{13,11,299},{142,11,75},{133,0,714},{7,0,8},{136
+,0,206},{138,10,480},{4,11,694},{9,10,495},{146,10,104},{7,11,1248},{11,11,621},
+{139,11,702},{140,11,687},{132,0,776},{139,10,1009},{135,0,1272},{134,0,1059},{8
+,10,653},{13,10,93},{147,10,14},{135,11,213},{136,0,406},{133,10,172},{132,0,947
+},{8,0,175},{10,0,168},{138,0,573},{132,0,870},{6,0,1567},{151,11,28},{134,11,
+472},{5,10,260},{136,11,132},{4,11,751},{11,11,390},{140,11,32},{4,11,409},{133,
+11,78},{12,0,554},{6,11,473},{145,11,105},{133,0,784},{8,0,908},{136,11,306},{
+139,0,882},{6,0,358},{7,0,1393},{8,0,396},{10,0,263},{14,0,154},{16,0,48},{17,0,
+8},{7,11,1759},{8,11,396},{10,11,263},{14,11,154},{16,11,48},{145,11,8},{13,11,
+163},{13,11,180},{18,11,78},{148,11,35},{14,0,32},{18,0,85},{20,0,2},{152,0,16},
+{7,0,228},{10,0,770},{8,10,167},{8,10,375},{9,10,82},{9,10,561},{138,10,620},{
+132,0,845},{9,0,14},{9,0,441},{10,0,306},{139,0,9},{11,0,966},{12,0,287},{13,0,
+342},{13,0,402},{15,0,110},{15,0,163},{8,10,194},{136,10,756},{134,0,1578},{4,0,
+967},{6,0,1820},{6,0,1847},{140,0,716},{136,0,594},{7,0,1428},{7,0,1640},{7,0,
+1867},{9,0,169},{9,0,182},{9,0,367},{9,0,478},{9,0,506},{9,0,551},{9,0,557},{9,0
+,648},{9,0,697},{9,0,705},{9,0,725},{9,0,787},{9,0,794},{10,0,198},{10,0,214},{
+10,0,267},{10,0,275},{10,0,456},{10,0,551},{10,0,561},{10,0,613},{10,0,627},{10,
+0,668},{10,0,675},{10,0,691},{10,0,695},{10,0,707},{10,0,715},{11,0,183},{11,0,
+201},{11,0,244},{11,0,262},{11,0,352},{11,0,439},{11,0,493},{11,0,572},{11,0,591
+},{11,0,608},{11,0,611},{11,0,646},{11,0,674},{11,0,711},{11,0,751},{11,0,761},{
+11,0,776},{11,0,785},{11,0,850},{11,0,853},{11,0,862},{11,0,865},{11,0,868},{11,
+0,875},{11,0,898},{11,0,902},{11,0,903},{11,0,910},{11,0,932},{11,0,942},{11,0,
+957},{11,0,967},{11,0,972},{12,0,148},{12,0,195},{12,0,220},{12,0,237},{12,0,318
+},{12,0,339},{12,0,393},{12,0,445},{12,0,450},{12,0,474},{12,0,505},{12,0,509},{
+12,0,533},{12,0,591},{12,0,594},{12,0,597},{12,0,621},{12,0,633},{12,0,642},{13,
+0,59},{13,0,60},{13,0,145},{13,0,239},{13,0,250},{13,0,329},{13,0,344},{13,0,365
+},{13,0,372},{13,0,387},{13,0,403},{13,0,414},{13,0,456},{13,0,470},{13,0,478},{
+13,0,483},{13,0,489},{14,0,55},{14,0,57},{14,0,81},{14,0,90},{14,0,148},{14,0,
+239},{14,0,266},{14,0,321},{14,0,326},{14,0,327},{14,0,330},{14,0,347},{14,0,355
+},{14,0,401},{14,0,404},{14,0,411},{14,0,414},{14,0,416},{14,0,420},{15,0,61},{
+15,0,74},{15,0,87},{15,0,88},{15,0,94},{15,0,96},{15,0,116},{15,0,149},{15,0,154
+},{16,0,50},{16,0,63},{16,0,73},{17,0,2},{17,0,66},{17,0,92},{17,0,103},{17,0,
+112},{17,0,120},{18,0,50},{18,0,54},{18,0,82},{18,0,86},{18,0,90},{18,0,111},{18
+,0,115},{18,0,156},{19,0,40},{19,0,79},{20,0,78},{21,0,22},{135,11,883},{5,0,161
+},{135,0,839},{4,0,782},{13,11,293},{142,11,56},{133,11,617},{139,11,50},{135,10
+,22},{145,0,64},{5,10,639},{7,10,1249},{139,10,896},{138,0,998},{135,11,2042},{4
+,11,546},{142,11,233},{6,0,1043},{134,0,1574},{134,0,1496},{4,10,102},{7,10,815}
+,{7,10,1699},{139,10,964},{12,0,781},{142,0,461},{4,11,313},{133,11,577},{6,0,
+639},{6,0,1114},{137,0,817},{8,11,184},{141,11,433},{7,0,1814},{135,11,935},{10,
+0,997},{140,0,958},{4,0,812},{137,11,625},{132,10,899},{136,10,795},{5,11,886},{
+6,11,46},{6,11,1790},{7,11,14},{7,11,732},{7,11,1654},{8,11,95},{8,11,327},{8,11
+,616},{10,11,598},{10,11,769},{11,11,134},{11,11,747},{12,11,378},{142,11,97},{
+136,0,139},{6,10,52},{9,10,104},{9,10,559},{12,10,308},{147,10,87},{133,11,1021}
+,{132,10,604},{132,10,301},{136,10,779},{7,0,643},{136,0,236},{132,11,153},{134,
+0,1172},{147,10,32},{133,11,798},{6,0,1338},{132,11,587},{6,11,598},{7,11,42},{8
+,11,695},{10,11,212},{11,11,158},{14,11,196},{145,11,85},{135,10,508},{5,11,957}
+,{5,11,1008},{135,11,249},{4,11,129},{135,11,465},{5,0,54},{7,11,470},{7,11,1057
+},{7,11,1201},{9,11,755},{11,11,906},{140,11,527},{7,11,908},{146,11,7},{5,11,
+148},{136,11,450},{144,11,1},{4,0,256},{135,0,1488},{9,0,351},{6,10,310},{7,10,
+1849},{8,10,72},{8,10,272},{8,10,431},{9,10,12},{10,10,563},{10,10,630},{10,10,
+796},{10,10,810},{11,10,367},{11,10,599},{11,10,686},{140,10,672},{6,0,1885},{6,
+0,1898},{6,0,1899},{140,0,955},{4,0,714},{133,0,469},{6,0,1270},{134,0,1456},{
+132,0,744},{6,0,313},{7,10,537},{8,10,64},{9,10,127},{10,10,496},{12,10,510},{
+141,10,384},{4,11,217},{4,10,244},{5,11,710},{7,10,233},{7,11,1926},{9,11,428},{
+9,11,708},{10,11,254},{10,11,296},{10,11,720},{11,11,109},{11,11,255},{12,11,165
+},{12,11,315},{13,11,107},{13,11,203},{14,11,54},{14,11,99},{14,11,114},{14,11,
+388},{16,11,85},{17,11,9},{17,11,33},{20,11,25},{20,11,28},{20,11,29},{21,11,9},
+{21,11,10},{21,11,34},{150,11,17},{138,0,402},{7,0,969},{146,0,55},{8,0,50},{137
+,0,624},{134,0,1355},{132,0,572},{134,10,1650},{10,10,702},{139,10,245},{10,0,
+847},{142,0,445},{6,0,43},{7,0,38},{8,0,248},{138,0,513},{133,0,369},{137,10,338
+},{133,0,766},{133,0,363},{133,10,896},{8,11,392},{11,11,54},{13,11,173},{13,11,
+294},{148,11,7},{134,0,678},{7,11,1230},{136,11,531},{6,0,258},{140,0,409},{5,0,
+249},{148,0,82},{7,10,1117},{136,10,539},{5,0,393},{6,0,378},{7,0,1981},{9,0,32}
+,{9,0,591},{10,0,685},{10,0,741},{142,0,382},{133,0,788},{134,0,1281},{134,0,
+1295},{7,0,1968},{141,0,509},{4,0,61},{5,0,58},{5,0,171},{5,0,683},{6,0,291},{6,
+0,566},{7,0,1650},{11,0,523},{12,0,273},{12,0,303},{15,0,39},{143,0,111},{6,0,
+706},{134,0,1283},{134,0,589},{135,11,1433},{133,11,435},{7,0,1059},{13,0,54},{5
+,10,4},{5,10,810},{6,10,13},{6,10,538},{6,10,1690},{6,10,1726},{7,10,1819},{8,10
+,148},{8,10,696},{8,10,791},{12,10,125},{143,10,9},{135,10,1268},{5,11,85},{6,11
+,419},{7,11,134},{7,11,305},{7,11,361},{7,11,1337},{8,11,71},{140,11,519},{137,0
+,824},{140,11,688},{5,11,691},{7,11,345},{7,10,1385},{9,11,94},{11,10,582},{11,
+10,650},{11,10,901},{11,10,949},{12,11,169},{12,10,232},{12,10,236},{13,10,413},
+{13,10,501},{146,10,116},{4,0,917},{133,0,1005},{7,0,1598},{5,11,183},{6,11,582}
+,{9,11,344},{10,11,679},{140,11,435},{4,10,925},{5,10,803},{8,10,698},{138,10,
+828},{132,0,919},{135,11,511},{139,10,992},{4,0,255},{5,0,302},{6,0,132},{7,0,
+128},{7,0,283},{7,0,1299},{10,0,52},{10,0,514},{11,0,925},{13,0,92},{142,0,309},
+{134,0,1369},{135,10,1847},{134,0,328},{7,11,1993},{136,11,684},{133,10,383},{
+137,0,173},{134,11,583},{134,0,1411},{19,0,65},{5,11,704},{8,11,357},{10,11,745}
+,{14,11,426},{17,11,94},{147,11,57},{9,10,660},{138,10,347},{4,11,179},{5,11,198
+},{133,11,697},{7,11,347},{7,11,971},{8,11,181},{138,11,711},{141,0,442},{11,0,
+842},{11,0,924},{13,0,317},{13,0,370},{13,0,469},{13,0,471},{14,0,397},{18,0,69}
+,{18,0,145},{7,10,572},{9,10,592},{11,10,680},{12,10,356},{140,10,550},{14,11,19
+},{14,11,28},{144,11,29},{136,0,534},{4,11,243},{5,11,203},{7,11,19},{7,11,71},{
+7,11,113},{10,11,405},{11,11,357},{142,11,240},{6,0,210},{10,0,845},{138,0,862},
+{7,11,1351},{9,11,581},{10,11,639},{11,11,453},{140,11,584},{7,11,1450},{139,11,
+99},{10,0,892},{12,0,719},{144,0,105},{4,0,284},{6,0,223},{134,11,492},{5,11,134
+},{6,11,408},{6,11,495},{135,11,1593},{136,0,529},{137,0,807},{4,0,218},{7,0,526
+},{143,0,137},{6,0,1444},{142,11,4},{132,11,665},{4,0,270},{5,0,192},{6,0,332},{
+7,0,1322},{4,11,248},{7,11,137},{137,11,349},{140,0,661},{7,0,1517},{11,0,597},{
+14,0,76},{14,0,335},{20,0,33},{7,10,748},{139,10,700},{5,11,371},{135,11,563},{
+146,11,57},{133,10,127},{133,0,418},{4,11,374},{7,11,547},{7,11,1700},{7,11,1833
+},{139,11,858},{6,10,198},{140,10,83},{7,11,1812},{13,11,259},{13,11,356},{14,11
+,242},{147,11,114},{7,0,379},{8,0,481},{9,0,377},{5,10,276},{6,10,55},{135,10,
+1369},{138,11,286},{5,0,1003},{6,0,149},{6,10,1752},{136,10,726},{8,0,262},{9,0,
+627},{10,0,18},{11,0,214},{11,0,404},{11,0,457},{11,0,780},{11,0,913},{13,0,401}
+,{14,0,200},{6,11,1647},{7,11,1552},{7,11,2010},{9,11,494},{137,11,509},{135,0,
+742},{136,0,304},{132,0,142},{133,10,764},{6,10,309},{7,10,331},{138,10,550},{
+135,10,1062},{6,11,123},{7,11,214},{7,10,986},{9,11,728},{10,11,157},{11,11,346}
+,{11,11,662},{143,11,106},{135,10,1573},{7,0,925},{137,0,799},{4,0,471},{5,0,51}
+,{6,0,602},{8,0,484},{138,0,195},{136,0,688},{132,0,697},{6,0,1169},{6,0,1241},{
+6,10,194},{7,10,133},{10,10,493},{10,10,570},{139,10,664},{140,0,751},{7,0,929},
+{10,0,452},{11,0,878},{16,0,33},{5,10,24},{5,10,569},{6,10,3},{6,10,119},{6,10,
+143},{6,10,440},{7,10,599},{7,10,1686},{7,10,1854},{8,10,424},{9,10,43},{9,10,
+584},{9,10,760},{10,10,328},{11,10,159},{11,10,253},{12,10,487},{140,10,531},{4,
+11,707},{13,11,106},{18,11,49},{147,11,41},{5,0,221},{5,11,588},{134,11,393},{
+134,0,1437},{6,11,211},{7,11,1690},{11,11,486},{140,11,369},{5,10,14},{5,10,892}
+,{6,10,283},{7,10,234},{136,10,537},{4,0,988},{136,0,955},{135,0,1251},{4,10,126
+},{8,10,635},{147,10,34},{4,10,316},{135,10,1561},{137,10,861},{4,10,64},{5,10,
+352},{5,10,720},{6,10,368},{139,10,359},{134,0,192},{4,0,132},{5,0,69},{135,0,
+1242},{7,10,1577},{10,10,304},{10,10,549},{12,10,365},{13,10,220},{13,10,240},{
+142,10,33},{4,0,111},{7,0,865},{134,11,219},{5,11,582},{6,11,1646},{7,11,99},{7,
+11,1962},{7,11,1986},{8,11,515},{8,11,773},{9,11,23},{9,11,491},{12,11,620},{14,
+11,52},{145,11,50},{132,0,767},{7,11,568},{148,11,21},{6,0,42},{7,0,1416},{7,0,
+2005},{8,0,131},{8,0,466},{9,0,672},{13,0,252},{20,0,103},{133,11,851},{135,0,
+1050},{6,10,175},{137,10,289},{5,10,432},{133,10,913},{6,0,44},{136,0,368},{135,
+11,784},{132,0,570},{133,0,120},{139,10,595},{140,0,29},{6,0,227},{135,0,1589},{
+4,11,98},{7,11,1365},{9,11,422},{9,11,670},{10,11,775},{11,11,210},{13,11,26},{
+13,11,457},{141,11,476},{140,10,80},{5,10,931},{134,10,1698},{133,0,522},{134,0,
+1120},{135,0,1529},{12,0,739},{14,0,448},{142,0,467},{11,10,526},{11,10,939},{
+141,10,290},{5,10,774},{6,10,1637},{6,10,1686},{134,10,1751},{6,0,1667},{135,0,
+2036},{7,10,1167},{11,10,934},{13,10,391},{145,10,76},{137,11,147},{6,10,260},{7
+,10,1484},{11,11,821},{12,11,110},{12,11,153},{18,11,41},{150,11,19},{6,0,511},{
+12,0,132},{134,10,573},{5,0,568},{6,0,138},{135,0,1293},{132,0,1020},{8,0,258},{
+9,0,208},{137,0,359},{4,0,565},{8,0,23},{136,0,827},{134,0,344},{4,0,922},{5,0,
+1023},{13,11,477},{14,11,120},{148,11,61},{134,0,240},{5,11,209},{6,11,30},{11,
+11,56},{139,11,305},{6,0,171},{7,0,1002},{7,0,1324},{9,0,415},{14,0,230},{18,0,
+68},{4,10,292},{4,10,736},{5,10,871},{6,10,1689},{7,10,1944},{137,10,580},{9,11,
+635},{139,11,559},{4,11,150},{5,11,303},{134,11,327},{6,10,63},{135,10,920},{133
+,10,793},{8,11,192},{10,11,78},{10,11,555},{11,11,308},{13,11,359},{147,11,95},{
+135,11,786},{135,11,1712},{136,0,402},{6,0,754},{6,11,1638},{7,11,79},{7,11,496}
+,{9,11,138},{10,11,336},{11,11,12},{12,11,412},{12,11,440},{142,11,305},{4,0,716
+},{141,0,31},{133,0,982},{8,0,691},{8,0,731},{5,10,67},{6,10,62},{6,10,374},{135
+,10,1391},{9,10,790},{140,10,47},{139,11,556},{151,11,1},{7,11,204},{7,11,415},{
+8,11,42},{10,11,85},{11,11,33},{11,11,564},{12,11,571},{149,11,1},{8,0,888},{7,
+11,610},{135,11,1501},{4,10,391},{135,10,1169},{5,0,847},{9,0,840},{138,0,803},{
+137,0,823},{134,0,785},{8,0,152},{9,0,53},{9,0,268},{9,0,901},{10,0,518},{10,0,
+829},{11,0,188},{13,0,74},{14,0,46},{15,0,17},{15,0,33},{17,0,40},{18,0,36},{19,
+0,20},{22,0,1},{152,0,2},{4,11,3},{5,11,247},{5,11,644},{7,11,744},{7,11,1207},{
+7,11,1225},{7,11,1909},{146,11,147},{136,0,532},{135,0,681},{132,10,271},{140,0,
+314},{140,0,677},{4,0,684},{136,0,384},{5,11,285},{9,11,67},{13,11,473},{143,11,
+82},{4,10,253},{5,10,544},{7,10,300},{137,10,340},{7,0,110},{7,0,447},{8,0,290},
+{8,0,591},{9,0,382},{9,0,649},{11,0,71},{11,0,155},{11,0,313},{12,0,5},{13,0,325
+},{142,0,287},{134,0,1818},{136,0,1007},{138,0,321},{7,0,360},{7,0,425},{9,0,66}
+,{9,0,278},{138,0,644},{133,10,818},{5,0,385},{5,10,541},{6,10,94},{6,10,499},{7
+,10,230},{139,10,321},{4,10,920},{5,10,25},{5,10,790},{6,10,457},{7,10,853},{136
+,10,788},{4,0,900},{133,0,861},{5,0,254},{7,0,985},{136,0,73},{7,0,1959},{136,0,
+683},{134,10,1765},{133,10,822},{132,10,634},{4,11,29},{6,11,532},{7,11,1628},{7
+,11,1648},{9,11,303},{9,11,350},{10,11,433},{11,11,97},{11,11,557},{11,11,745},{
+12,11,289},{12,11,335},{12,11,348},{12,11,606},{13,11,116},{13,11,233},{13,11,
+466},{14,11,181},{14,11,209},{14,11,232},{14,11,236},{14,11,300},{16,11,41},{148
+,11,97},{19,0,86},{6,10,36},{7,10,658},{136,10,454},{135,11,1692},{132,0,725},{5
+,11,501},{7,11,1704},{9,11,553},{11,11,520},{12,11,557},{141,11,249},{134,0,196}
+,{133,0,831},{136,0,723},{7,0,1897},{13,0,80},{13,0,437},{145,0,74},{4,0,992},{6
+,0,627},{136,0,994},{135,11,1294},{132,10,104},{5,0,848},{6,0,66},{136,0,764},{4
+,0,36},{7,0,1387},{10,0,205},{139,0,755},{6,0,1046},{134,0,1485},{134,0,950},{
+132,0,887},{14,0,450},{148,0,111},{7,0,620},{7,0,831},{9,10,542},{9,10,566},{138
+,10,728},{6,0,165},{138,0,388},{139,10,263},{4,0,719},{135,0,155},{138,10,468},{
+6,11,453},{144,11,36},{134,11,129},{5,0,533},{7,0,755},{138,0,780},{134,0,1465},
+{4,0,353},{6,0,146},{6,0,1789},{7,0,427},{7,0,990},{7,0,1348},{9,0,665},{9,0,898
+},{11,0,893},{142,0,212},{7,10,87},{142,10,288},{4,0,45},{135,0,1257},{12,0,7},{
+7,10,988},{7,10,1939},{9,10,64},{9,10,502},{12,10,34},{13,10,12},{13,10,234},{
+147,10,77},{4,0,607},{5,11,60},{6,11,504},{7,11,614},{7,11,1155},{140,11,0},{135
+,10,141},{8,11,198},{11,11,29},{140,11,534},{140,0,65},{136,0,816},{132,10,619},
+{139,0,88},{5,10,246},{8,10,189},{9,10,355},{9,10,512},{10,10,124},{10,10,453},{
+11,10,143},{11,10,416},{11,10,859},{141,10,341},{4,11,379},{135,11,1397},{4,0,
+600},{137,0,621},{133,0,367},{134,0,561},{6,0,559},{134,0,1691},{6,0,585},{134,
+11,585},{135,11,1228},{4,11,118},{5,10,678},{6,11,274},{6,11,361},{7,11,75},{141
+,11,441},{135,11,1818},{137,11,841},{5,0,573},{6,0,287},{7,10,862},{7,10,1886},{
+138,10,179},{132,10,517},{140,11,693},{5,11,314},{6,11,221},{7,11,419},{10,11,
+650},{11,11,396},{12,11,156},{13,11,369},{14,11,333},{145,11,47},{140,10,540},{
+136,10,667},{11,10,403},{146,10,83},{6,0,672},{133,10,761},{9,0,157},{10,10,131}
+,{140,10,72},{7,0,714},{134,11,460},{134,0,456},{133,0,925},{5,11,682},{135,11,
+1887},{136,11,510},{136,11,475},{133,11,1016},{9,0,19},{7,11,602},{8,11,179},{10
+,11,781},{140,11,126},{6,11,329},{138,11,111},{6,0,822},{134,0,1473},{144,11,86}
+,{11,0,113},{139,11,113},{5,11,821},{134,11,1687},{133,10,449},{7,0,463},{17,0,
+69},{136,10,103},{7,10,2028},{138,10,641},{6,0,193},{7,0,240},{7,0,1682},{10,0,
+51},{10,0,640},{11,0,410},{13,0,82},{14,0,247},{14,0,331},{142,0,377},{6,0,471},
+{11,0,411},{142,0,2},{5,11,71},{7,11,1407},{9,11,388},{9,11,704},{10,11,261},{10
+,11,619},{11,11,547},{11,11,619},{143,11,157},{136,0,633},{135,0,1148},{6,0,554}
+,{7,0,1392},{12,0,129},{7,10,1274},{7,10,1386},{7,11,2008},{9,11,337},{10,11,517
+},{146,10,87},{7,0,803},{8,0,542},{6,10,187},{7,10,1203},{8,10,380},{14,10,117},
+{149,10,28},{6,10,297},{7,10,793},{139,10,938},{8,0,438},{11,0,363},{7,10,464},{
+11,10,105},{12,10,231},{14,10,386},{15,10,102},{148,10,75},{5,11,16},{6,11,86},{
+6,11,603},{7,11,292},{7,11,561},{8,11,257},{8,11,382},{9,11,721},{9,11,778},{11,
+11,581},{140,11,466},{6,0,717},{4,11,486},{133,11,491},{132,0,875},{132,11,72},{
+6,11,265},{135,11,847},{4,0,237},{135,0,514},{6,0,392},{7,0,65},{135,0,2019},{
+140,11,261},{135,11,922},{137,11,404},{12,0,563},{14,0,101},{18,0,129},{7,10,
+1010},{11,10,733},{11,10,759},{13,10,34},{146,10,45},{7,10,1656},{9,10,369},{10,
+10,338},{10,10,490},{11,10,154},{11,10,545},{11,10,775},{13,10,77},{141,10,274},
+{4,0,444},{10,0,146},{140,0,9},{139,11,163},{7,0,1260},{135,0,1790},{9,0,222},{
+10,0,43},{139,0,900},{137,11,234},{138,0,971},{137,0,761},{134,0,699},{136,11,
+434},{6,0,1116},{7,0,1366},{5,10,20},{6,11,197},{6,10,298},{7,10,659},{8,11,205}
+,{137,10,219},{132,11,490},{11,11,820},{150,11,51},{7,10,1440},{11,10,854},{11,
+10,872},{11,10,921},{12,10,551},{13,10,472},{142,10,367},{140,11,13},{132,0,829}
+,{12,0,242},{132,10,439},{136,10,669},{6,0,593},{6,11,452},{7,11,312},{138,11,
+219},{4,11,333},{9,11,176},{12,11,353},{141,11,187},{7,0,36},{8,0,201},{136,0,
+605},{140,0,224},{132,10,233},{134,0,1430},{134,0,1806},{4,0,523},{133,0,638},{6
+,0,1889},{9,0,958},{9,0,971},{9,0,976},{12,0,796},{12,0,799},{12,0,808},{12,0,
+835},{12,0,836},{12,0,914},{12,0,946},{15,0,216},{15,0,232},{18,0,183},{18,0,187
+},{18,0,194},{18,0,212},{18,0,232},{149,0,49},{132,10,482},{6,0,827},{134,0,1434
+},{135,10,346},{134,0,2043},{6,0,242},{7,0,227},{7,0,1581},{8,0,104},{9,0,113},{
+9,0,220},{9,0,427},{10,0,136},{10,0,239},{11,0,579},{11,0,1023},{13,0,4},{13,0,
+204},{13,0,316},{148,0,86},{134,11,1685},{7,0,148},{8,0,284},{141,0,63},{142,0,
+10},{135,11,584},{134,0,1249},{7,0,861},{135,10,334},{5,10,795},{6,10,1741},{137
+,11,70},{132,0,807},{7,11,135},{8,11,7},{8,11,62},{9,11,243},{10,11,658},{10,11,
+697},{11,11,456},{139,11,756},{9,11,395},{138,11,79},{137,11,108},{147,0,94},{
+136,0,494},{135,11,631},{135,10,622},{7,0,1510},{135,10,1750},{4,10,203},{135,10
+,1936},{7,11,406},{7,11,459},{8,11,606},{139,11,726},{7,0,1306},{8,0,505},{9,0,
+482},{10,0,126},{11,0,225},{12,0,347},{12,0,449},{13,0,19},{14,0,218},{142,0,435
+},{5,0,268},{10,0,764},{12,0,120},{13,0,39},{145,0,127},{142,11,68},{11,10,678},
+{140,10,307},{12,11,268},{12,11,640},{142,11,119},{135,10,2044},{133,11,612},{4,
+11,372},{7,11,482},{8,11,158},{9,11,602},{9,11,615},{10,11,245},{10,11,678},{10,
+11,744},{11,11,248},{139,11,806},{7,10,311},{9,10,308},{140,10,255},{4,0,384},{
+135,0,1022},{5,11,854},{135,11,1991},{135,10,1266},{4,10,400},{5,10,267},{135,10
+,232},{135,0,1703},{9,0,159},{11,0,661},{140,0,603},{4,0,964},{14,0,438},{14,0,
+444},{14,0,456},{22,0,60},{22,0,63},{9,11,106},{9,11,163},{9,11,296},{10,11,167}
+,{10,11,172},{10,11,777},{139,11,16},{136,0,583},{132,0,515},{8,0,632},{8,0,697}
+,{137,0,854},{5,11,195},{135,11,1685},{6,0,1123},{134,0,1365},{134,11,328},{7,11
+,1997},{8,11,730},{139,11,1006},{4,0,136},{133,0,551},{134,0,1782},{7,0,1287},{9
+,0,44},{10,0,552},{10,0,642},{11,0,839},{12,0,274},{12,0,275},{12,0,372},{13,0,
+91},{142,0,125},{5,11,751},{11,11,797},{140,11,203},{133,0,732},{7,0,679},{8,0,
+313},{4,10,100},{135,11,821},{10,0,361},{142,0,316},{134,0,595},{6,0,147},{7,0,
+886},{9,0,753},{138,0,268},{5,10,362},{5,10,443},{6,10,318},{7,10,1019},{139,10,
+623},{5,10,463},{136,10,296},{4,10,454},{5,11,950},{5,11,994},{134,11,351},{138,
+0,137},{5,10,48},{5,10,404},{6,10,557},{7,10,458},{8,10,597},{10,10,455},{10,10,
+606},{11,10,49},{11,10,548},{12,10,476},{13,10,18},{141,10,450},{133,0,414},{135
+,0,1762},{5,11,421},{135,11,47},{5,10,442},{135,10,1984},{134,0,599},{134,0,1749
+},{134,0,1627},{4,0,488},{132,11,350},{137,11,751},{132,0,83},{140,0,676},{133,
+11,967},{7,0,1639},{5,10,55},{140,10,161},{4,11,473},{7,11,623},{8,11,808},{9,11
+,871},{9,11,893},{11,11,38},{11,11,431},{12,11,112},{12,11,217},{12,11,243},{12,
+11,562},{12,11,683},{13,11,141},{13,11,197},{13,11,227},{13,11,406},{13,11,487},
+{14,11,156},{14,11,203},{14,11,224},{14,11,256},{18,11,58},{150,11,0},{133,10,
+450},{7,11,736},{139,11,264},{134,0,278},{4,11,222},{7,11,286},{136,11,629},{135
+,10,869},{140,0,97},{144,0,14},{134,0,1085},{4,10,213},{7,10,223},{136,10,80},{7
+,0,388},{7,0,644},{139,0,781},{132,0,849},{7,0,229},{8,0,59},{9,0,190},{10,0,378
+},{140,0,191},{7,10,381},{7,10,806},{7,10,820},{8,10,354},{8,10,437},{8,10,787},
+{9,10,657},{10,10,58},{10,10,339},{10,10,749},{11,10,914},{12,10,162},{13,10,75}
+,{14,10,106},{14,10,198},{14,10,320},{14,10,413},{146,10,43},{141,11,306},{136,
+10,747},{134,0,1115},{16,0,94},{16,0,108},{136,11,146},{6,0,700},{6,0,817},{134,
+0,1002},{133,10,692},{4,11,465},{135,11,1663},{134,10,191},{6,0,1414},{135,11,
+913},{132,0,660},{7,0,1035},{138,0,737},{6,10,162},{7,10,1960},{136,10,831},{132
+,10,706},{7,0,690},{9,0,217},{9,0,587},{140,0,521},{138,10,426},{135,10,1235},{6
+,11,82},{7,11,138},{7,11,517},{9,11,673},{139,11,238},{138,0,272},{5,11,495},{7,
+11,834},{9,11,733},{139,11,378},{134,0,1744},{132,0,1011},{7,11,828},{142,11,116
+},{4,0,733},{9,0,194},{10,0,92},{11,0,198},{12,0,84},{13,0,128},{133,11,559},{10
+,0,57},{10,0,277},{6,11,21},{6,11,1737},{7,11,1444},{136,11,224},{4,10,204},{137
+,10,902},{136,10,833},{11,0,348},{12,0,99},{18,0,1},{18,0,11},{19,0,4},{7,10,366
+},{9,10,287},{12,10,199},{12,10,556},{140,10,577},{6,0,1981},{136,0,936},{21,0,
+33},{150,0,40},{5,11,519},{138,11,204},{5,10,356},{135,10,224},{134,0,775},{135,
+0,306},{7,10,630},{9,10,567},{11,10,150},{11,10,444},{141,10,119},{5,0,979},{134
+,10,539},{133,0,611},{4,11,402},{135,11,1679},{5,0,178},{7,11,2},{8,11,323},{136
+,11,479},{5,11,59},{135,11,672},{4,0,1010},{6,0,1969},{138,11,237},{133,11,412},
+{146,11,34},{7,11,1740},{146,11,48},{134,0,664},{139,10,814},{4,11,85},{135,11,
+549},{133,11,94},{133,11,457},{132,0,390},{134,0,1510},{4,10,235},{135,10,255},{
+4,10,194},{5,10,584},{6,11,11},{6,10,384},{7,11,187},{7,10,583},{10,10,761},{11,
+10,760},{139,10,851},{4,11,522},{139,11,802},{135,0,493},{10,11,776},{13,11,345}
+,{142,11,425},{146,0,37},{4,11,52},{135,11,661},{134,0,724},{134,0,829},{133,11,
+520},{133,10,562},{4,11,281},{5,11,38},{7,11,194},{7,11,668},{7,11,1893},{137,11
+,397},{5,10,191},{137,10,271},{7,0,1537},{14,0,96},{143,0,73},{5,0,473},{11,0,
+168},{4,10,470},{6,10,153},{7,10,1503},{7,10,1923},{10,10,701},{11,10,132},{11,
+10,227},{11,10,320},{11,10,436},{11,10,525},{11,10,855},{12,10,41},{12,10,286},{
+13,10,103},{13,10,284},{14,10,255},{14,10,262},{15,10,117},{143,10,127},{133,0,
+105},{5,0,438},{9,0,694},{12,0,627},{141,0,210},{133,10,327},{6,10,552},{7,10,
+1754},{137,10,604},{134,0,1256},{152,0,11},{5,11,448},{11,11,98},{139,11,524},{7
+,0,1626},{5,10,80},{6,10,405},{7,10,403},{7,10,1502},{8,10,456},{9,10,487},{9,10
+,853},{9,10,889},{10,10,309},{11,10,721},{11,10,994},{12,10,430},{13,10,165},{14
+,11,16},{146,11,44},{132,0,779},{8,0,25},{138,0,826},{4,10,453},{5,10,887},{6,10
+,535},{8,10,6},{8,10,543},{136,10,826},{137,11,461},{140,11,632},{132,0,308},{
+135,0,741},{132,0,671},{7,0,150},{8,0,649},{136,0,1020},{9,0,99},{6,11,336},{8,
+11,552},{9,11,285},{10,11,99},{139,11,568},{134,0,521},{5,0,339},{14,0,3},{15,0,
+41},{15,0,166},{147,0,66},{6,11,423},{7,11,665},{7,11,1210},{9,11,218},{141,11,
+222},{6,0,543},{5,10,101},{5,11,256},{6,10,88},{7,10,1677},{9,10,100},{10,10,677
+},{14,10,169},{14,10,302},{14,10,313},{15,10,48},{143,10,84},{4,10,310},{7,10,
+708},{7,10,996},{9,10,795},{10,10,390},{10,10,733},{11,10,451},{12,10,249},{14,
+10,115},{14,10,286},{143,10,100},{133,10,587},{13,11,417},{14,11,129},{143,11,15
+},{134,0,1358},{136,11,554},{132,10,498},{7,10,217},{8,10,140},{138,10,610},{135
+,11,989},{135,11,634},{6,0,155},{140,0,234},{135,11,462},{132,11,618},{134,0,
+1628},{132,0,766},{4,11,339},{5,10,905},{135,11,259},{135,0,829},{4,11,759},{141
+,11,169},{7,0,1445},{4,10,456},{7,10,358},{7,10,1637},{8,10,643},{139,10,483},{5
+,0,486},{135,0,1349},{5,11,688},{135,11,712},{7,0,1635},{8,0,17},{10,0,217},{10,
+0,295},{12,0,2},{140,11,2},{138,0,558},{150,10,56},{4,11,278},{5,11,465},{135,11
+,1367},{136,11,482},{133,10,535},{6,0,1362},{6,0,1461},{10,11,274},{10,11,625},{
+139,11,530},{5,0,599},{5,11,336},{6,11,341},{6,11,478},{6,11,1763},{136,11,386},
+{7,10,1748},{137,11,151},{134,0,1376},{133,10,539},{135,11,73},{135,11,1971},{
+139,11,283},{9,0,93},{139,0,474},{6,10,91},{135,10,435},{6,0,447},{5,11,396},{
+134,11,501},{4,10,16},{5,10,316},{5,10,842},{6,10,370},{6,10,1778},{8,10,166},{
+11,10,812},{12,10,206},{12,10,351},{14,10,418},{16,10,15},{16,10,34},{18,10,3},{
+19,10,3},{19,10,7},{20,10,4},{149,10,21},{7,0,577},{7,0,1432},{9,0,475},{9,0,505
+},{9,0,526},{9,0,609},{9,0,689},{9,0,726},{9,0,735},{9,0,738},{10,0,556},{10,0,
+674},{10,0,684},{11,0,89},{11,0,202},{11,0,272},{11,0,380},{11,0,415},{11,0,505}
+,{11,0,537},{11,0,550},{11,0,562},{11,0,640},{11,0,667},{11,0,688},{11,0,847},{
+11,0,927},{11,0,930},{11,0,940},{12,0,144},{12,0,325},{12,0,329},{12,0,389},{12,
+0,403},{12,0,451},{12,0,515},{12,0,604},{12,0,616},{12,0,626},{13,0,66},{13,0,
+131},{13,0,167},{13,0,236},{13,0,368},{13,0,411},{13,0,434},{13,0,453},{13,0,461
+},{13,0,474},{14,0,59},{14,0,60},{14,0,139},{14,0,152},{14,0,276},{14,0,353},{14
+,0,402},{15,0,28},{15,0,81},{15,0,123},{15,0,152},{18,0,136},{148,0,88},{4,11,
+929},{133,11,799},{136,11,46},{142,0,307},{4,0,609},{7,0,756},{9,0,544},{11,0,
+413},{144,0,25},{10,0,687},{7,10,619},{10,10,547},{11,10,122},{140,10,601},{4,0,
+930},{133,0,947},{133,0,939},{142,0,21},{4,11,892},{133,11,770},{133,0,962},{5,0
+,651},{8,0,170},{9,0,61},{9,0,63},{10,0,23},{10,0,37},{10,0,834},{11,0,4},{11,0,
+187},{11,0,281},{11,0,503},{11,0,677},{12,0,96},{12,0,130},{12,0,244},{14,0,5},{
+14,0,40},{14,0,162},{14,0,202},{146,0,133},{4,0,406},{5,0,579},{12,0,492},{150,0
+,15},{135,11,158},{135,0,597},{132,0,981},{132,10,888},{4,10,149},{138,10,368},{
+132,0,545},{4,10,154},{7,10,1134},{136,10,105},{135,11,2001},{134,0,1558},{4,10,
+31},{6,10,429},{7,10,962},{9,10,458},{139,10,691},{132,10,312},{135,10,1642},{6,
+0,17},{6,0,1304},{7,0,16},{7,0,1001},{9,0,886},{10,0,489},{10,0,800},{11,0,782},
+{12,0,320},{13,0,467},{14,0,145},{14,0,387},{143,0,119},{135,0,1982},{17,0,17},{
+7,11,1461},{140,11,91},{4,10,236},{132,11,602},{138,0,907},{136,0,110},{7,0,272}
+,{19,0,53},{5,10,836},{5,10,857},{134,10,1680},{5,0,458},{7,11,1218},{136,11,303
+},{7,0,1983},{8,0,0},{8,0,171},{9,0,120},{9,0,732},{10,0,473},{11,0,656},{11,0,
+998},{18,0,0},{18,0,2},{19,0,21},{10,10,68},{139,10,494},{137,11,662},{4,11,13},
+{5,11,567},{7,11,1498},{9,11,124},{11,11,521},{140,11,405},{4,10,81},{139,10,867
+},{135,11,1006},{7,11,800},{7,11,1783},{138,11,12},{9,0,295},{10,0,443},{5,10,
+282},{8,10,650},{137,10,907},{132,11,735},{4,11,170},{4,10,775},{135,11,323},{6,
+0,1844},{10,0,924},{11,11,844},{12,11,104},{140,11,625},{5,11,304},{7,11,1403},{
+140,11,498},{134,0,1232},{4,0,519},{10,0,70},{12,0,26},{14,0,17},{14,0,178},{15,
+0,34},{149,0,12},{132,0,993},{4,11,148},{133,11,742},{6,0,31},{7,0,491},{7,0,530
+},{8,0,592},{11,0,53},{11,0,779},{12,0,167},{12,0,411},{14,0,14},{14,0,136},{15,
+0,72},{16,0,17},{144,0,72},{133,0,907},{134,0,733},{133,11,111},{4,10,71},{5,10,
+376},{7,10,119},{138,10,665},{136,0,55},{8,0,430},{136,11,430},{4,0,208},{5,0,
+106},{6,0,531},{8,0,408},{9,0,188},{138,0,572},{12,0,56},{11,10,827},{14,10,34},
+{143,10,148},{134,0,1693},{133,11,444},{132,10,479},{140,0,441},{9,0,449},{10,0,
+192},{138,0,740},{134,0,928},{4,0,241},{7,10,607},{136,10,99},{8,11,123},{15,11,
+6},{144,11,7},{6,11,285},{8,11,654},{11,11,749},{12,11,190},{12,11,327},{13,11,
+120},{13,11,121},{13,11,327},{15,11,47},{146,11,40},{4,10,41},{5,10,74},{7,10,
+1627},{11,10,871},{140,10,619},{7,0,1525},{11,10,329},{11,10,965},{12,10,241},{
+14,10,354},{15,10,22},{148,10,63},{132,0,259},{135,11,183},{9,10,209},{137,10,
+300},{5,11,937},{135,11,100},{133,10,98},{4,0,173},{5,0,312},{5,0,512},{135,0,
+1285},{141,0,185},{7,0,1603},{7,0,1691},{9,0,464},{11,0,195},{12,0,279},{12,0,
+448},{14,0,11},{147,0,102},{135,0,1113},{133,10,984},{4,0,452},{5,0,583},{135,0,
+720},{4,0,547},{5,0,817},{6,0,433},{7,0,593},{7,0,1378},{8,0,161},{9,0,284},{10,
+0,313},{139,0,886},{8,0,722},{4,10,182},{6,10,205},{135,10,220},{150,0,13},{4,10
+,42},{9,10,205},{9,10,786},{138,10,659},{6,0,289},{7,0,1670},{12,0,57},{151,0,4}
+,{132,10,635},{14,0,43},{146,0,21},{139,10,533},{135,0,1694},{8,0,420},{139,0,
+193},{135,0,409},{132,10,371},{4,10,272},{135,10,836},{5,10,825},{134,10,1640},{
+5,11,251},{5,11,956},{8,11,268},{9,11,214},{146,11,142},{138,0,308},{6,0,1863},{
+141,11,37},{137,10,879},{7,10,317},{135,10,569},{132,11,294},{134,0,790},{5,0,
+1002},{136,0,745},{5,11,346},{5,11,711},{136,11,390},{135,0,289},{5,0,504},{11,0
+,68},{137,10,307},{4,0,239},{6,0,477},{7,0,1607},{139,0,617},{149,0,13},{133,0,
+609},{133,11,624},{5,11,783},{7,11,1998},{135,11,2047},{133,10,525},{132,0,367},
+{132,11,594},{6,0,528},{133,10,493},{4,10,174},{135,10,911},{8,10,417},{137,10,
+782},{132,0,694},{7,0,548},{137,0,58},{4,10,32},{5,10,215},{6,10,269},{7,10,1782
+},{7,10,1892},{10,10,16},{11,10,822},{11,10,954},{141,10,481},{140,0,687},{7,0,
+1749},{136,10,477},{132,11,569},{133,10,308},{135,10,1088},{4,0,661},{138,0,1004
+},{5,11,37},{6,11,39},{6,11,451},{7,11,218},{7,11,667},{7,11,1166},{7,11,1687},{
+8,11,662},{144,11,2},{9,0,445},{12,0,53},{13,0,492},{5,10,126},{8,10,297},{9,10,
+366},{140,10,374},{7,10,1551},{139,10,361},{148,0,74},{134,11,508},{135,0,213},{
+132,10,175},{132,10,685},{6,0,760},{6,0,834},{134,0,1248},{7,11,453},{7,11,635},
+{7,11,796},{8,11,331},{9,11,328},{9,11,330},{9,11,865},{10,11,119},{10,11,235},{
+11,11,111},{11,11,129},{11,11,240},{12,11,31},{12,11,66},{12,11,222},{12,11,269}
+,{12,11,599},{12,11,689},{13,11,186},{13,11,364},{142,11,345},{7,0,1672},{139,0,
+189},{133,10,797},{133,10,565},{6,0,1548},{6,11,98},{7,11,585},{135,11,702},{9,0
+,968},{15,0,192},{149,0,56},{4,10,252},{6,11,37},{7,11,299},{7,10,1068},{7,11,
+1666},{8,11,195},{8,11,316},{9,11,178},{9,11,276},{9,11,339},{9,11,536},{10,11,
+102},{10,11,362},{10,10,434},{10,11,785},{11,11,55},{11,11,149},{11,10,228},{11,
+10,426},{11,11,773},{13,10,231},{13,11,416},{13,11,419},{14,11,38},{14,11,41},{
+14,11,210},{18,10,106},{148,10,87},{4,0,751},{11,0,390},{140,0,32},{4,0,409},{
+133,0,78},{11,11,458},{12,11,15},{140,11,432},{7,0,1602},{10,0,257},{10,0,698},{
+11,0,544},{11,0,585},{12,0,212},{13,0,307},{5,10,231},{7,10,601},{9,10,277},{9,
+10,674},{10,10,178},{10,10,418},{10,10,509},{11,10,531},{12,10,113},{12,10,475},
+{13,10,99},{142,10,428},{6,0,473},{145,0,105},{6,0,1949},{15,0,156},{133,11,645}
+,{7,10,1591},{144,10,43},{135,0,1779},{135,10,1683},{4,11,290},{135,11,1356},{
+134,0,763},{6,11,70},{7,11,1292},{10,11,762},{139,11,288},{142,0,29},{140,11,428
+},{7,0,883},{7,11,131},{7,11,422},{8,11,210},{140,11,573},{134,0,488},{4,10,399}
+,{5,10,119},{5,10,494},{7,10,751},{137,10,556},{133,0,617},{132,11,936},{139,0,
+50},{7,0,1518},{139,0,694},{137,0,785},{4,0,546},{135,0,2042},{7,11,716},{13,11,
+97},{141,11,251},{132,11,653},{145,0,22},{134,0,1016},{4,0,313},{133,0,577},{136
+,11,657},{8,0,184},{141,0,433},{135,0,935},{6,0,720},{9,0,114},{146,11,80},{12,0
+,186},{12,0,292},{14,0,100},{18,0,70},{7,10,594},{7,10,851},{7,10,1858},{9,10,
+411},{9,10,574},{9,10,666},{9,10,737},{10,10,346},{10,10,712},{11,10,246},{11,10
+,432},{11,10,517},{11,10,647},{11,10,679},{11,10,727},{12,10,304},{12,10,305},{
+12,10,323},{12,10,483},{12,10,572},{12,10,593},{12,10,602},{13,10,95},{13,10,101
+},{13,10,171},{13,10,315},{13,10,378},{13,10,425},{13,10,475},{14,10,63},{14,10,
+380},{14,10,384},{15,10,133},{18,10,112},{148,10,72},{135,10,1093},{135,11,1836}
+,{132,10,679},{137,10,203},{11,0,402},{12,0,109},{12,0,431},{13,0,179},{13,0,206
+},{14,0,217},{16,0,3},{148,0,53},{7,11,1368},{8,11,232},{8,11,361},{10,11,682},{
+138,11,742},{137,10,714},{5,0,886},{6,0,46},{6,0,1790},{7,0,14},{7,0,732},{7,0,
+1654},{8,0,95},{8,0,327},{8,0,616},{9,0,892},{10,0,598},{10,0,769},{11,0,134},{
+11,0,747},{12,0,378},{14,0,97},{137,11,534},{4,0,969},{136,10,825},{137,11,27},{
+6,0,727},{142,11,12},{133,0,1021},{134,0,1190},{134,11,1657},{5,10,143},{5,10,
+769},{6,10,1760},{7,10,682},{7,10,1992},{136,10,736},{132,0,153},{135,11,127},{
+133,0,798},{132,0,587},{6,0,598},{7,0,42},{8,0,695},{10,0,212},{11,0,158},{14,0,
+196},{145,0,85},{133,10,860},{6,0,1929},{134,0,1933},{5,0,957},{5,0,1008},{9,0,
+577},{12,0,141},{6,10,422},{7,10,0},{7,10,1544},{8,11,364},{11,10,990},{12,10,
+453},{13,10,47},{141,10,266},{134,0,1319},{4,0,129},{135,0,465},{7,0,470},{7,0,
+1057},{7,0,1201},{9,0,755},{11,0,906},{140,0,527},{7,0,908},{146,0,7},{5,0,148},
+{136,0,450},{5,10,515},{137,10,131},{7,10,1605},{11,10,962},{146,10,139},{132,10
+,646},{134,0,1166},{4,10,396},{7,10,728},{9,10,117},{13,10,202},{148,10,51},{6,
+10,121},{6,10,124},{6,10,357},{7,10,1138},{7,10,1295},{8,10,162},{139,10,655},{
+14,0,374},{142,11,374},{138,0,253},{139,0,1003},{5,11,909},{9,11,849},{138,11,
+805},{133,10,237},{7,11,525},{7,11,1579},{8,11,497},{136,11,573},{137,0,46},{132
+,0,879},{134,0,806},{135,0,1868},{6,0,1837},{134,0,1846},{6,0,730},{134,0,881},{
+7,0,965},{7,0,1460},{7,0,1604},{7,11,193},{7,11,397},{7,11,1105},{8,11,124},{8,
+11,619},{9,11,305},{10,11,264},{11,11,40},{12,11,349},{13,11,134},{13,11,295},{
+14,11,155},{15,11,120},{146,11,105},{136,0,506},{143,0,10},{4,11,262},{7,11,342}
+,{7,10,571},{7,10,1877},{10,10,366},{141,11,23},{133,11,641},{10,0,22},{9,10,513
+},{10,10,39},{12,10,122},{140,10,187},{135,11,1431},{150,11,49},{4,11,99},{6,11,
+250},{6,11,346},{8,11,127},{138,11,81},{6,0,2014},{8,0,928},{10,0,960},{10,0,979
+},{140,0,996},{134,0,296},{132,11,915},{5,11,75},{9,11,517},{10,11,470},{12,11,
+155},{141,11,224},{137,10,873},{4,0,854},{140,11,18},{134,0,587},{7,10,107},{7,
+10,838},{8,10,550},{138,10,401},{11,0,636},{15,0,145},{17,0,34},{19,0,50},{23,0,
+20},{11,10,588},{11,10,864},{11,10,968},{143,10,160},{135,11,216},{7,0,982},{10,
+0,32},{143,0,56},{133,10,768},{133,11,954},{6,11,304},{7,11,1114},{8,11,418},{10
+,11,345},{11,11,341},{11,11,675},{141,11,40},{9,11,410},{139,11,425},{136,0,941}
+,{5,0,435},{132,10,894},{5,0,85},{6,0,419},{7,0,134},{7,0,305},{7,0,361},{7,0,
+1337},{8,0,71},{140,0,519},{140,0,688},{135,0,740},{5,0,691},{7,0,345},{9,0,94},
+{140,0,169},{5,0,183},{6,0,582},{10,0,679},{140,0,435},{134,11,14},{6,0,945},{
+135,0,511},{134,11,1708},{5,11,113},{6,11,243},{7,11,1865},{11,11,161},{16,11,37
+},{145,11,99},{132,11,274},{137,0,539},{7,0,1993},{8,0,684},{134,10,272},{6,0,
+659},{134,0,982},{4,10,9},{5,10,128},{7,10,368},{11,10,480},{148,10,3},{134,0,
+583},{132,0,803},{133,0,704},{4,0,179},{5,0,198},{133,0,697},{7,0,347},{7,0,971}
+,{8,0,181},{10,0,711},{135,11,166},{136,10,682},{4,10,2},{7,10,545},{7,10,894},{
+136,11,521},{135,0,481},{132,0,243},{5,0,203},{7,0,19},{7,0,71},{7,0,113},{10,0,
+405},{11,0,357},{142,0,240},{5,11,725},{5,11,727},{135,11,1811},{6,0,826},{137,
+11,304},{7,0,1450},{139,0,99},{133,11,654},{134,0,492},{5,0,134},{6,0,408},{6,0,
+495},{7,0,1593},{6,11,273},{10,11,188},{13,11,377},{146,11,77},{9,10,769},{140,
+10,185},{135,11,410},{142,0,4},{4,0,665},{134,11,1785},{4,0,248},{7,0,137},{137,
+0,349},{5,10,530},{142,10,113},{7,0,1270},{139,0,612},{132,11,780},{5,0,371},{
+135,0,563},{135,0,826},{6,0,1535},{23,0,21},{151,0,23},{4,0,374},{7,0,547},{7,0,
+1700},{7,0,1833},{139,0,858},{133,10,556},{7,11,612},{8,11,545},{8,11,568},{8,11
+,642},{9,11,717},{10,11,541},{10,11,763},{11,11,449},{12,11,489},{13,11,153},{13
+,11,296},{14,11,138},{14,11,392},{15,11,50},{16,11,6},{16,11,12},{148,11,9},{9,0
+,311},{141,0,42},{8,10,16},{140,10,568},{6,0,1968},{6,0,2027},{138,0,991},{6,0,
+1647},{7,0,1552},{7,0,2010},{9,0,494},{137,0,509},{133,11,948},{6,10,186},{137,
+10,426},{134,0,769},{134,0,642},{132,10,585},{6,0,123},{7,0,214},{9,0,728},{10,0
+,157},{11,0,346},{11,0,662},{143,0,106},{142,11,381},{135,0,1435},{4,11,532},{5,
+11,706},{135,11,662},{5,11,837},{134,11,1651},{4,10,93},{5,10,252},{6,10,229},{7
+,10,291},{9,10,550},{139,10,644},{148,0,79},{137,10,749},{134,0,1425},{137,10,
+162},{4,11,362},{7,11,52},{7,11,303},{140,11,166},{132,10,381},{4,11,330},{7,11,
+933},{7,11,2012},{136,11,292},{135,11,767},{4,0,707},{5,0,588},{6,0,393},{13,0,
+106},{18,0,49},{147,0,41},{6,0,211},{7,0,1690},{11,0,486},{140,0,369},{137,11,
+883},{4,11,703},{135,11,207},{4,0,187},{5,0,184},{5,0,690},{7,0,1869},{10,0,756}
+,{139,0,783},{132,11,571},{134,0,1382},{5,0,175},{6,10,77},{6,10,157},{7,10,974}
+,{7,10,1301},{7,10,1339},{7,10,1490},{7,10,1873},{137,10,628},{134,0,1493},{5,11
+,873},{133,11,960},{134,0,1007},{12,11,93},{12,11,501},{13,11,362},{14,11,151},{
+15,11,40},{15,11,59},{16,11,46},{17,11,25},{18,11,14},{18,11,134},{19,11,25},{19
+,11,69},{20,11,16},{20,11,19},{20,11,66},{21,11,23},{21,11,25},{150,11,42},{11,
+10,919},{141,10,409},{134,0,219},{5,0,582},{6,0,1646},{7,0,99},{7,0,1962},{7,0,
+1986},{8,0,515},{8,0,773},{9,0,23},{9,0,491},{12,0,620},{142,0,93},{133,0,851},{
+5,11,33},{134,11,470},{135,11,1291},{134,0,1278},{135,11,1882},{135,10,1489},{
+132,0,1000},{138,0,982},{8,0,762},{8,0,812},{137,0,910},{6,11,47},{7,11,90},{7,
+11,664},{7,11,830},{7,11,1380},{7,11,2025},{8,11,448},{136,11,828},{4,0,98},{4,0
+,940},{6,0,1819},{6,0,1834},{6,0,1841},{7,0,1365},{8,0,859},{8,0,897},{8,0,918},
+{9,0,422},{9,0,670},{10,0,775},{10,0,894},{10,0,909},{10,0,910},{10,0,935},{11,0
+,210},{12,0,750},{12,0,755},{13,0,26},{13,0,457},{13,0,476},{16,0,100},{16,0,109
+},{18,0,173},{18,0,175},{8,10,398},{9,10,681},{139,10,632},{9,11,417},{137,11,
+493},{136,10,645},{138,0,906},{134,0,1730},{134,10,20},{133,11,1019},{134,0,1185
+},{10,0,40},{136,10,769},{9,0,147},{134,11,208},{140,0,650},{5,0,209},{6,0,30},{
+11,0,56},{139,0,305},{132,0,553},{138,11,344},{6,11,68},{7,11,398},{7,11,448},{7
+,11,1629},{7,11,1813},{8,11,387},{8,11,442},{9,11,710},{10,11,282},{138,11,722},
+{5,0,597},{14,0,20},{142,11,20},{135,0,1614},{135,10,1757},{4,0,150},{5,0,303},{
+6,0,327},{135,10,937},{16,0,49},{7,10,1652},{144,11,49},{8,0,192},{10,0,78},{141
+,0,359},{135,0,786},{143,0,134},{6,0,1638},{7,0,79},{7,0,496},{9,0,138},{10,0,
+336},{11,0,12},{12,0,412},{12,0,440},{142,0,305},{136,11,491},{4,10,579},{5,10,
+226},{5,10,323},{135,10,960},{7,0,204},{7,0,415},{8,0,42},{10,0,85},{139,0,564},
+{132,0,614},{4,11,403},{5,11,441},{7,11,450},{11,11,101},{12,11,193},{141,11,430
+},{135,11,1927},{135,11,1330},{4,0,3},{5,0,247},{5,0,644},{7,0,744},{7,0,1207},{
+7,0,1225},{7,0,1909},{146,0,147},{136,0,942},{4,0,1019},{134,0,2023},{5,11,679},
+{133,10,973},{5,0,285},{9,0,67},{13,0,473},{143,0,82},{7,11,328},{137,11,326},{
+151,0,8},{6,10,135},{135,10,1176},{135,11,1128},{134,0,1309},{135,11,1796},{135,
+10,314},{4,11,574},{7,11,350},{7,11,1024},{8,11,338},{9,11,677},{10,11,808},{139
+,11,508},{7,11,818},{17,11,14},{17,11,45},{18,11,75},{148,11,18},{146,10,4},{135
+,11,1081},{4,0,29},{6,0,532},{7,0,1628},{7,0,1648},{9,0,350},{10,0,433},{11,0,97
+},{11,0,557},{11,0,745},{12,0,289},{12,0,335},{12,0,348},{12,0,606},{13,0,116},{
+13,0,233},{13,0,466},{14,0,181},{14,0,209},{14,0,232},{14,0,236},{14,0,300},{16,
+0,41},{148,0,97},{7,0,318},{6,10,281},{8,10,282},{8,10,480},{8,10,499},{9,10,198
+},{10,10,143},{10,10,169},{10,10,211},{10,10,417},{10,10,574},{11,10,147},{11,10
+,395},{12,10,75},{12,10,407},{12,10,608},{13,10,500},{142,10,251},{135,11,1676},
+{135,11,2037},{135,0,1692},{5,0,501},{7,0,1704},{9,0,553},{11,0,520},{12,0,557},
+{141,0,249},{6,0,1527},{14,0,324},{15,0,55},{15,0,80},{14,11,324},{15,11,55},{
+143,11,80},{135,10,1776},{8,0,988},{137,11,297},{132,10,419},{142,0,223},{139,11
+,234},{7,0,1123},{12,0,508},{14,0,102},{14,0,226},{144,0,57},{4,10,138},{7,10,
+1012},{7,10,1280},{137,10,76},{7,0,1764},{5,10,29},{140,10,638},{134,0,2015},{
+134,0,1599},{138,11,56},{6,11,306},{7,11,1140},{7,11,1340},{8,11,133},{138,11,
+449},{139,11,1011},{6,10,1710},{135,10,2038},{7,11,1763},{140,11,310},{6,0,129},
+{4,10,17},{5,10,23},{7,10,995},{11,10,383},{11,10,437},{12,10,460},{140,10,532},
+{5,11,329},{136,11,260},{133,10,862},{132,0,534},{6,0,811},{135,0,626},{132,11,
+657},{4,0,25},{5,0,60},{6,0,504},{7,0,614},{7,0,1155},{12,0,0},{152,11,7},{7,0,
+1248},{11,0,621},{139,0,702},{137,0,321},{8,10,70},{12,10,171},{141,10,272},{10,
+10,233},{139,10,76},{4,0,379},{7,0,1397},{134,10,442},{5,11,66},{7,11,1896},{136
+,11,288},{134,11,1643},{134,10,1709},{4,11,21},{5,11,91},{5,11,570},{5,11,648},{
+5,11,750},{5,11,781},{6,11,54},{6,11,112},{6,11,402},{6,11,1732},{7,11,315},{7,
+11,749},{7,11,1347},{7,11,1900},{9,11,78},{9,11,508},{10,11,611},{11,11,510},{11
+,11,728},{13,11,36},{14,11,39},{16,11,83},{17,11,124},{148,11,30},{4,0,118},{6,0
+,274},{6,0,361},{7,0,75},{141,0,441},{10,11,322},{10,11,719},{139,11,407},{147,
+10,119},{12,11,549},{14,11,67},{147,11,60},{11,10,69},{12,10,105},{12,10,117},{
+13,10,213},{14,10,13},{14,10,62},{14,10,177},{14,10,421},{15,10,19},{146,10,141}
+,{9,0,841},{137,10,309},{7,10,608},{7,10,976},{8,11,125},{8,11,369},{8,11,524},{
+9,10,146},{10,10,206},{10,11,486},{10,10,596},{11,11,13},{11,11,381},{11,11,736}
+,{11,11,766},{11,11,845},{13,11,114},{13,10,218},{13,11,292},{14,11,47},{142,10,
+153},{12,0,693},{135,11,759},{5,0,314},{6,0,221},{7,0,419},{10,0,650},{11,0,396}
+,{12,0,156},{13,0,369},{14,0,333},{145,0,47},{6,11,1684},{6,11,1731},{7,11,356},
+{7,11,1932},{8,11,54},{8,11,221},{9,11,225},{9,11,356},{10,11,77},{10,11,446},{
+10,11,731},{12,11,404},{141,11,491},{132,11,375},{4,10,518},{135,10,1136},{4,0,
+913},{4,11,411},{11,11,643},{140,11,115},{4,11,80},{133,11,44},{8,10,689},{137,
+10,863},{138,0,880},{4,10,18},{7,10,145},{7,10,444},{7,10,1278},{8,10,49},{8,10,
+400},{9,10,71},{9,10,250},{10,10,459},{12,10,160},{144,10,24},{136,0,475},{5,0,
+1016},{5,11,299},{135,11,1083},{7,0,602},{8,0,179},{10,0,781},{140,0,126},{6,0,
+329},{138,0,111},{135,0,1864},{4,11,219},{7,11,1761},{137,11,86},{6,0,1888},{6,0
+,1892},{6,0,1901},{6,0,1904},{9,0,953},{9,0,985},{9,0,991},{9,0,1001},{12,0,818}
+,{12,0,846},{12,0,847},{12,0,861},{12,0,862},{12,0,873},{12,0,875},{12,0,877},{
+12,0,879},{12,0,881},{12,0,884},{12,0,903},{12,0,915},{12,0,926},{12,0,939},{15,
+0,182},{15,0,219},{15,0,255},{18,0,191},{18,0,209},{18,0,211},{149,0,41},{5,11,
+328},{135,11,918},{137,0,780},{12,0,82},{143,0,36},{133,10,1010},{5,0,821},{134,
+0,1687},{133,11,514},{132,0,956},{134,0,1180},{10,0,112},{5,10,87},{7,10,313},{7
+,10,1103},{10,10,582},{11,10,389},{11,10,813},{12,10,385},{13,10,286},{14,10,124
+},{146,10,108},{5,0,71},{7,0,1407},{9,0,704},{10,0,261},{10,0,619},{11,0,547},{
+11,0,619},{143,0,157},{4,0,531},{5,0,455},{5,11,301},{6,11,571},{14,11,49},{146,
+11,102},{132,10,267},{6,0,385},{7,0,2008},{9,0,337},{138,0,517},{133,11,726},{
+133,11,364},{4,11,76},{7,11,1550},{9,11,306},{9,11,430},{9,11,663},{10,11,683},{
+11,11,427},{11,11,753},{12,11,334},{12,11,442},{14,11,258},{14,11,366},{143,11,
+131},{6,0,1865},{6,0,1879},{6,0,1881},{6,0,1894},{6,0,1908},{9,0,915},{9,0,926},
+{9,0,940},{9,0,943},{9,0,966},{9,0,980},{9,0,989},{9,0,1005},{9,0,1010},{12,0,
+813},{12,0,817},{12,0,840},{12,0,843},{12,0,855},{12,0,864},{12,0,871},{12,0,872
+},{12,0,899},{12,0,905},{12,0,924},{15,0,171},{15,0,181},{15,0,224},{15,0,235},{
+15,0,251},{146,0,184},{137,11,52},{5,0,16},{6,0,86},{6,0,603},{7,0,292},{7,0,561
+},{8,0,257},{8,0,382},{9,0,721},{9,0,778},{11,0,581},{140,0,466},{4,0,486},{5,0,
+491},{135,10,1121},{4,0,72},{6,0,265},{135,0,1300},{135,11,1183},{10,10,249},{
+139,10,209},{132,10,561},{137,11,519},{4,11,656},{4,10,760},{135,11,779},{9,10,
+154},{140,10,485},{135,11,1793},{135,11,144},{136,10,255},{133,0,621},{4,10,368}
+,{135,10,641},{135,11,1373},{7,11,554},{7,11,605},{141,11,10},{137,0,234},{5,0,
+815},{6,0,1688},{134,0,1755},{5,11,838},{5,11,841},{134,11,1649},{7,0,1987},{7,0
+,2040},{136,0,743},{133,11,1012},{6,0,197},{136,0,205},{6,0,314},{134,11,314},{
+144,11,53},{6,11,251},{7,11,365},{7,11,1357},{7,11,1497},{8,11,154},{141,11,281}
+,{133,11,340},{6,0,452},{7,0,312},{138,0,219},{138,0,589},{4,0,333},{9,0,176},{
+12,0,353},{141,0,187},{9,10,92},{147,10,91},{134,0,1110},{11,0,47},{139,11,495},
+{6,10,525},{8,10,806},{9,10,876},{140,10,284},{8,11,261},{9,11,144},{9,11,466},{
+10,11,370},{12,11,470},{13,11,144},{142,11,348},{137,11,897},{8,0,863},{8,0,864}
+,{8,0,868},{8,0,884},{10,0,866},{10,0,868},{10,0,873},{10,0,911},{10,0,912},{10,
+0,944},{12,0,727},{6,11,248},{9,11,546},{10,11,535},{11,11,681},{141,11,135},{6,
+0,300},{135,0,1515},{134,0,1237},{139,10,958},{133,10,594},{140,11,250},{134,0,
+1685},{134,11,567},{7,0,135},{8,0,7},{8,0,62},{9,0,243},{10,0,658},{10,0,697},{
+11,0,456},{139,0,756},{9,0,395},{138,0,79},{6,10,1641},{136,10,820},{4,10,302},{
+135,10,1766},{134,11,174},{135,10,1313},{135,0,631},{134,10,1674},{134,11,395},{
+138,0,835},{7,0,406},{7,0,459},{8,0,606},{139,0,726},{134,11,617},{134,0,979},{6
+,10,389},{7,10,149},{9,10,142},{138,10,94},{5,11,878},{133,11,972},{6,10,8},{7,
+10,1881},{8,10,91},{136,11,511},{133,0,612},{132,11,351},{4,0,372},{7,0,482},{8,
+0,158},{9,0,602},{9,0,615},{10,0,245},{10,0,678},{10,0,744},{11,0,248},{139,0,
+806},{5,0,854},{135,0,1991},{132,11,286},{135,11,344},{7,11,438},{7,11,627},{7,
+11,1516},{8,11,40},{9,11,56},{9,11,294},{10,11,30},{10,11,259},{11,11,969},{146,
+11,148},{135,0,1492},{5,11,259},{7,11,414},{7,11,854},{142,11,107},{135,10,1746}
+,{6,0,833},{134,0,998},{135,10,24},{6,0,750},{135,0,1739},{4,10,503},{135,10,
+1661},{5,10,130},{7,10,1314},{9,10,610},{10,10,718},{11,10,601},{11,10,819},{11,
+10,946},{140,10,536},{10,10,149},{11,10,280},{142,10,336},{132,11,738},{135,10,
+1946},{5,0,195},{135,0,1685},{7,0,1997},{8,0,730},{139,0,1006},{151,11,17},{133,
+11,866},{14,0,463},{14,0,470},{150,0,61},{5,0,751},{8,0,266},{11,0,578},{4,10,
+392},{135,10,1597},{5,10,433},{9,10,633},{139,10,629},{135,0,821},{6,0,715},{134
+,0,1325},{133,11,116},{6,0,868},{132,11,457},{134,0,959},{6,10,234},{138,11,199}
+,{7,0,1053},{7,10,1950},{8,10,680},{11,10,817},{147,10,88},{7,10,1222},{138,10,
+386},{5,0,950},{5,0,994},{6,0,351},{134,0,1124},{134,0,1081},{7,0,1595},{6,10,5}
+,{11,10,249},{12,10,313},{16,10,66},{145,10,26},{148,0,59},{5,11,527},{6,11,189}
+,{135,11,859},{5,10,963},{6,10,1773},{11,11,104},{11,11,554},{15,11,60},{143,11,
+125},{135,0,47},{137,0,684},{134,11,116},{134,0,1606},{134,0,777},{7,0,1020},{8,
+10,509},{136,10,792},{135,0,1094},{132,0,350},{133,11,487},{4,11,86},{5,11,667},
+{5,11,753},{6,11,316},{6,11,455},{135,11,946},{7,0,1812},{13,0,259},{13,0,356},{
+14,0,242},{147,0,114},{132,10,931},{133,0,967},{4,0,473},{7,0,623},{8,0,808},{9,
+0,871},{9,0,893},{11,0,38},{11,0,431},{12,0,112},{12,0,217},{12,0,243},{12,0,562
+},{12,0,663},{12,0,683},{13,0,141},{13,0,197},{13,0,227},{13,0,406},{13,0,487},{
+14,0,156},{14,0,203},{14,0,224},{14,0,256},{18,0,58},{150,0,0},{138,0,286},{7,10
+,943},{139,10,614},{135,10,1837},{150,11,45},{132,0,798},{4,0,222},{7,0,286},{
+136,0,629},{4,11,79},{7,11,1773},{10,11,450},{11,11,589},{13,11,332},{13,11,493}
+,{14,11,183},{14,11,334},{14,11,362},{14,11,368},{14,11,376},{14,11,379},{19,11,
+90},{19,11,103},{19,11,127},{148,11,90},{5,0,337},{11,0,513},{11,0,889},{11,0,
+961},{12,0,461},{13,0,79},{15,0,121},{4,10,90},{5,10,545},{7,10,754},{9,10,186},
+{10,10,72},{10,10,782},{11,10,577},{11,10,610},{12,10,354},{12,10,362},{140,10,
+595},{141,0,306},{136,0,146},{7,0,1646},{9,10,329},{11,10,254},{141,11,124},{4,0
+,465},{135,0,1663},{132,0,525},{133,11,663},{10,0,299},{18,0,74},{9,10,187},{11,
+10,1016},{145,10,44},{7,0,165},{7,0,919},{4,10,506},{136,10,517},{5,10,295},{135
+,10,1680},{133,11,846},{134,0,1064},{5,11,378},{7,11,1402},{7,11,1414},{8,11,465
+},{9,11,286},{10,11,185},{10,11,562},{10,11,635},{11,11,31},{11,11,393},{12,11,
+456},{13,11,312},{18,11,65},{18,11,96},{147,11,89},{132,0,596},{7,10,987},{9,10,
+688},{10,10,522},{11,10,788},{140,10,566},{6,0,82},{7,0,138},{7,0,517},{7,0,1741
+},{11,0,238},{4,11,648},{134,10,1775},{7,0,1233},{7,10,700},{7,10,940},{8,10,514
+},{9,10,116},{9,10,535},{10,10,118},{11,10,107},{11,10,148},{11,10,922},{12,10,
+254},{12,10,421},{142,10,238},{4,0,962},{6,0,1824},{8,0,894},{12,0,708},{12,0,
+725},{14,0,451},{20,0,94},{22,0,59},{150,0,62},{5,11,945},{6,11,1656},{6,11,1787
+},{7,11,167},{8,11,824},{9,11,391},{10,11,375},{139,11,185},{5,0,495},{7,0,834},
+{9,0,733},{139,0,378},{4,10,743},{135,11,1273},{6,0,1204},{7,11,1645},{8,11,352}
+,{137,11,249},{139,10,292},{133,0,559},{132,11,152},{9,0,499},{10,0,341},{15,0,
+144},{19,0,49},{7,10,1283},{9,10,227},{11,10,325},{11,10,408},{14,10,180},{146,
+10,47},{6,0,21},{6,0,1737},{7,0,1444},{136,0,224},{133,11,1006},{7,0,1446},{9,0,
+97},{17,0,15},{5,10,81},{7,10,146},{7,10,1342},{8,10,53},{8,10,561},{8,10,694},{
+8,10,754},{9,10,115},{9,10,894},{10,10,462},{10,10,813},{11,10,230},{11,10,657},
+{11,10,699},{11,10,748},{12,10,119},{12,10,200},{12,10,283},{142,10,273},{5,10,
+408},{137,10,747},{135,11,431},{135,11,832},{6,0,729},{134,0,953},{4,0,727},{8,0
+,565},{5,11,351},{7,11,264},{136,11,565},{134,0,1948},{5,0,519},{5,11,40},{7,11,
+598},{7,11,1638},{8,11,78},{9,11,166},{9,11,640},{9,11,685},{9,11,773},{11,11,
+215},{13,11,65},{14,11,172},{14,11,317},{145,11,6},{8,11,60},{9,11,343},{139,11,
+769},{137,11,455},{134,0,1193},{140,0,790},{7,11,1951},{8,11,765},{8,11,772},{
+140,11,671},{7,11,108},{8,11,219},{8,11,388},{9,11,639},{9,11,775},{11,11,275},{
+140,11,464},{132,11,468},{7,10,30},{8,10,86},{8,10,315},{8,10,700},{9,10,576},{9
+,10,858},{11,10,310},{11,10,888},{11,10,904},{12,10,361},{141,10,248},{5,11,15},
+{6,11,56},{7,11,1758},{8,11,500},{9,11,730},{11,11,331},{13,11,150},{142,11,282}
+,{4,0,402},{7,0,2},{8,0,323},{136,0,479},{138,10,839},{11,0,580},{142,0,201},{5,
+0,59},{135,0,672},{137,10,617},{146,0,34},{134,11,1886},{4,0,961},{136,0,896},{6
+,0,1285},{5,11,205},{6,11,438},{137,11,711},{134,10,428},{7,10,524},{8,10,169},{
+8,10,234},{9,10,480},{138,10,646},{148,0,46},{141,0,479},{133,11,534},{6,0,2019}
+,{134,10,1648},{4,0,85},{7,0,549},{7,10,1205},{138,10,637},{4,0,663},{5,0,94},{7
+,11,235},{7,11,1475},{15,11,68},{146,11,120},{6,11,443},{9,11,237},{9,11,571},{9
+,11,695},{10,11,139},{11,11,715},{12,11,417},{141,11,421},{132,0,783},{4,0,682},
+{8,0,65},{9,10,39},{10,10,166},{11,10,918},{12,10,635},{20,10,10},{22,10,27},{22
+,10,43},{150,10,52},{6,0,11},{135,0,187},{132,0,522},{4,0,52},{135,0,661},{4,0,
+383},{133,0,520},{135,11,546},{11,0,343},{142,0,127},{4,11,578},{7,10,157},{7,11
+,624},{7,11,916},{8,10,279},{10,11,256},{11,11,87},{139,11,703},{134,10,604},{4,
+0,281},{5,0,38},{7,0,194},{7,0,668},{7,0,1893},{137,0,397},{7,10,945},{11,10,713
+},{139,10,744},{139,10,1022},{9,0,635},{139,0,559},{5,11,923},{7,11,490},{12,11,
+553},{13,11,100},{14,11,118},{143,11,75},{132,0,975},{132,10,567},{137,10,859},{
+7,10,1846},{7,11,1846},{8,10,628},{136,11,628},{148,0,116},{138,11,750},{14,0,51
+},{14,11,51},{15,11,7},{148,11,20},{132,0,858},{134,0,1075},{4,11,924},{133,10,
+762},{136,0,535},{133,0,448},{10,10,784},{141,10,191},{133,10,298},{7,0,610},{
+135,0,1501},{7,10,633},{7,10,905},{7,10,909},{7,10,1538},{9,10,767},{140,10,636}
+,{4,11,265},{7,11,807},{135,11,950},{5,11,93},{12,11,267},{144,11,26},{136,0,191
+},{139,10,301},{135,10,1970},{135,0,267},{4,0,319},{5,0,699},{138,0,673},{6,0,
+336},{7,0,92},{7,0,182},{8,0,453},{8,0,552},{9,0,204},{9,0,285},{10,0,99},{11,0,
+568},{11,0,950},{12,0,94},{16,0,20},{16,0,70},{19,0,55},{12,10,644},{144,10,90},
+{6,0,551},{7,0,1308},{7,10,845},{7,11,994},{8,10,160},{137,10,318},{19,11,1},{19
+,11,26},{150,11,9},{7,0,1406},{9,0,218},{141,0,222},{5,0,256},{138,0,69},{5,11,
+233},{5,11,320},{6,11,140},{7,11,330},{136,11,295},{6,0,1980},{136,0,952},{4,0,
+833},{137,11,678},{133,11,978},{4,11,905},{6,11,1701},{137,11,843},{138,10,735},
+{136,10,76},{17,0,39},{148,0,36},{18,0,81},{146,11,81},{14,0,352},{17,0,53},{18,
+0,146},{18,0,152},{19,0,11},{150,0,54},{135,0,634},{138,10,841},{132,0,618},{4,0
+,339},{7,0,259},{17,0,73},{4,11,275},{140,11,376},{132,11,509},{7,11,273},{139,
+11,377},{4,0,759},{13,0,169},{137,10,804},{6,10,96},{135,10,1426},{4,10,651},{
+133,10,289},{7,0,1075},{8,10,35},{9,10,511},{10,10,767},{147,10,118},{6,0,649},{
+6,0,670},{136,0,482},{5,0,336},{6,0,341},{6,0,478},{6,0,1763},{136,0,386},{5,11,
+802},{7,11,2021},{8,11,805},{14,11,94},{15,11,65},{16,11,4},{16,11,77},{16,11,80
+},{145,11,5},{6,0,1035},{5,11,167},{5,11,899},{6,11,410},{137,11,777},{134,11,
+1705},{5,0,924},{133,0,969},{132,10,704},{135,0,73},{135,11,10},{135,10,1078},{5
+,11,11},{6,11,117},{6,11,485},{7,11,1133},{9,11,582},{9,11,594},{11,11,21},{11,
+11,818},{12,11,535},{141,11,86},{135,0,1971},{4,11,264},{7,11,1067},{8,11,204},{
+8,11,385},{139,11,953},{6,0,1458},{135,0,1344},{5,0,396},{134,0,501},{4,10,720},
+{133,10,306},{4,0,929},{5,0,799},{8,0,46},{8,0,740},{133,10,431},{7,11,646},{7,
+11,1730},{11,11,446},{141,11,178},{7,0,276},{5,10,464},{6,10,236},{7,10,696},{7,
+10,914},{7,10,1108},{7,10,1448},{9,10,15},{9,10,564},{10,10,14},{12,10,565},{13,
+10,449},{14,10,53},{15,10,13},{16,10,64},{145,10,41},{4,0,892},{133,0,770},{6,10
+,1767},{12,10,194},{145,10,107},{135,0,158},{5,10,840},{138,11,608},{134,0,1432}
+,{138,11,250},{8,11,794},{9,11,400},{10,11,298},{142,11,228},{151,0,25},{7,11,
+1131},{135,11,1468},{135,0,2001},{9,10,642},{11,10,236},{142,10,193},{4,10,68},{
+5,10,634},{6,10,386},{7,10,794},{8,10,273},{9,10,563},{10,10,105},{10,10,171},{
+11,10,94},{139,10,354},{136,11,724},{132,0,478},{11,11,512},{13,11,205},{19,11,
+30},{22,11,36},{151,11,19},{7,0,1461},{140,0,91},{6,11,190},{7,11,768},{135,11,
+1170},{4,0,602},{8,0,211},{4,10,95},{7,10,416},{139,10,830},{7,10,731},{13,10,20
+},{143,10,11},{6,0,1068},{135,0,1872},{4,0,13},{5,0,567},{7,0,1498},{9,0,124},{
+11,0,521},{12,0,405},{135,11,1023},{135,0,1006},{132,0,735},{138,0,812},{4,0,170
+},{135,0,323},{6,11,137},{9,11,75},{9,11,253},{10,11,194},{138,11,444},{5,0,304}
+,{7,0,1403},{5,10,864},{10,10,648},{11,10,671},{143,10,46},{135,11,1180},{133,10
+,928},{4,0,148},{133,0,742},{11,10,986},{140,10,682},{133,0,523},{135,11,1743},{
+7,0,730},{18,0,144},{19,0,61},{8,10,44},{9,10,884},{10,10,580},{11,10,399},{11,
+10,894},{143,10,122},{5,11,760},{7,11,542},{8,11,135},{136,11,496},{136,0,981},{
+133,0,111},{10,0,132},{11,0,191},{11,0,358},{139,0,460},{7,11,319},{7,11,355},{7
+,11,763},{10,11,389},{145,11,43},{134,0,890},{134,0,1420},{136,11,557},{133,10,
+518},{133,0,444},{135,0,1787},{135,10,1852},{8,0,123},{15,0,6},{144,0,7},{6,0,
+2041},{10,11,38},{139,11,784},{136,0,932},{5,0,937},{135,0,100},{6,0,995},{4,11,
+58},{5,11,286},{6,11,319},{7,11,402},{7,11,1254},{7,11,1903},{8,11,356},{140,11,
+408},{4,11,389},{9,11,181},{9,11,255},{10,11,8},{10,11,29},{10,11,816},{11,11,
+311},{11,11,561},{12,11,67},{141,11,181},{138,0,255},{5,0,138},{4,10,934},{136,
+10,610},{4,0,965},{10,0,863},{138,0,898},{10,10,804},{138,10,832},{12,0,631},{8,
+10,96},{9,10,36},{10,10,607},{11,10,423},{11,10,442},{12,10,309},{14,10,199},{15
+,10,90},{145,10,110},{134,0,1394},{4,0,652},{8,0,320},{22,0,6},{22,0,16},{9,10,
+13},{9,10,398},{9,10,727},{10,10,75},{10,10,184},{10,10,230},{10,10,564},{10,10,
+569},{11,10,973},{12,10,70},{12,10,189},{13,10,57},{141,10,257},{6,0,897},{134,0
+,1333},{4,0,692},{133,0,321},{133,11,373},{135,0,922},{5,0,619},{133,0,698},{137
+,10,631},{5,10,345},{135,10,1016},{9,0,957},{9,0,1018},{12,0,828},{12,0,844},{12
+,0,897},{12,0,901},{12,0,943},{15,0,180},{18,0,197},{18,0,200},{18,0,213},{18,0,
+214},{146,0,226},{5,0,917},{134,0,1659},{135,0,1100},{134,0,1173},{134,0,1930},{
+5,0,251},{5,0,956},{8,0,268},{9,0,214},{146,0,142},{133,10,673},{137,10,850},{4,
+10,287},{133,10,1018},{132,11,672},{5,0,346},{5,0,711},{8,0,390},{11,11,752},{
+139,11,885},{5,10,34},{10,10,724},{12,10,444},{13,10,354},{18,10,32},{23,10,24},
+{23,10,31},{152,10,5},{4,11,710},{134,11,606},{134,0,744},{134,10,382},{133,11,
+145},{4,10,329},{7,11,884},{140,11,124},{4,11,467},{5,11,405},{134,11,544},{9,10
+,846},{138,10,827},{133,0,624},{9,11,372},{15,11,2},{19,11,10},{147,11,18},{4,11
+,387},{135,11,1288},{5,0,783},{7,0,1998},{135,0,2047},{132,10,906},{136,10,366},
+{135,11,550},{4,10,123},{4,10,649},{5,10,605},{7,10,1509},{136,10,36},{134,0,
+1125},{132,0,594},{133,10,767},{135,11,1227},{136,11,467},{4,11,576},{135,11,
+1263},{4,0,268},{7,0,1534},{135,11,1534},{4,10,273},{5,10,658},{5,11,919},{5,10,
+995},{134,11,1673},{133,0,563},{134,10,72},{135,10,1345},{4,11,82},{5,11,333},{5
+,11,904},{6,11,207},{7,11,325},{7,11,1726},{8,11,101},{10,11,778},{139,11,220},{
+5,0,37},{6,0,39},{6,0,451},{7,0,218},{7,0,667},{7,0,1166},{7,0,1687},{8,0,662},{
+16,0,2},{133,10,589},{134,0,1332},{133,11,903},{134,0,508},{5,10,117},{6,10,514}
+,{6,10,541},{7,10,1164},{7,10,1436},{8,10,220},{8,10,648},{10,10,688},{11,10,560
+},{140,11,147},{6,11,555},{135,11,485},{133,10,686},{7,0,453},{7,0,635},{7,0,796
+},{8,0,331},{9,0,330},{9,0,865},{10,0,119},{10,0,235},{11,0,111},{11,0,129},{11,
+0,240},{12,0,31},{12,0,66},{12,0,222},{12,0,269},{12,0,599},{12,0,684},{12,0,689
+},{12,0,691},{142,0,345},{135,0,1834},{4,11,705},{7,11,615},{138,11,251},{136,11
+,345},{137,0,527},{6,0,98},{7,0,702},{135,0,991},{11,0,576},{14,0,74},{7,10,196}
+,{10,10,765},{11,10,347},{11,10,552},{11,10,790},{12,10,263},{13,10,246},{13,10,
+270},{13,10,395},{14,10,176},{14,10,190},{14,10,398},{14,10,412},{15,10,32},{15,
+10,63},{16,10,88},{147,10,105},{134,11,90},{13,0,84},{141,0,122},{6,0,37},{7,0,
+299},{7,0,1666},{8,0,195},{8,0,316},{9,0,178},{9,0,276},{9,0,339},{9,0,536},{10,
+0,102},{10,0,362},{10,0,785},{11,0,55},{11,0,149},{11,0,773},{13,0,416},{13,0,
+419},{14,0,38},{14,0,41},{142,0,210},{5,10,381},{135,10,1792},{7,11,813},{12,11,
+497},{141,11,56},{7,10,616},{138,10,413},{133,0,645},{6,11,125},{135,11,1277},{
+132,0,290},{6,0,70},{7,0,1292},{10,0,762},{139,0,288},{6,10,120},{7,10,1188},{7,
+10,1710},{8,10,286},{9,10,667},{11,10,592},{139,10,730},{135,11,1784},{7,0,1315}
+,{135,11,1315},{134,0,1955},{135,10,1146},{7,0,131},{7,0,422},{8,0,210},{140,0,
+573},{4,10,352},{135,10,687},{139,0,797},{143,0,38},{14,0,179},{15,0,151},{150,0
+,11},{7,0,488},{4,10,192},{5,10,49},{6,10,200},{6,10,293},{134,10,1696},{132,0,
+936},{135,11,703},{6,11,160},{7,11,1106},{9,11,770},{10,11,618},{11,11,112},{140
+,11,413},{5,0,453},{134,0,441},{135,0,595},{132,10,650},{132,10,147},{6,0,991},{
+6,0,1182},{12,11,271},{145,11,109},{133,10,934},{140,11,221},{132,0,653},{7,0,
+505},{135,0,523},{134,0,903},{135,11,479},{7,11,304},{9,11,646},{9,11,862},{10,
+11,262},{11,11,696},{12,11,208},{15,11,79},{147,11,108},{146,0,80},{135,11,981},
+{142,0,432},{132,0,314},{137,11,152},{7,0,1368},{8,0,232},{8,0,361},{10,0,682},{
+138,0,742},{135,11,1586},{9,0,534},{4,11,434},{11,11,663},{12,11,210},{13,11,166
+},{13,11,310},{14,11,373},{147,11,43},{7,11,1091},{135,11,1765},{6,11,550},{135,
+11,652},{137,0,27},{142,0,12},{4,10,637},{5,11,553},{7,11,766},{138,11,824},{7,
+11,737},{8,11,298},{136,11,452},{7,0,736},{139,0,264},{134,0,1657},{133,11,292},
+{138,11,135},{6,0,844},{134,0,1117},{135,0,127},{9,10,867},{138,10,837},{6,0,
+1184},{134,0,1208},{134,0,1294},{136,0,364},{6,0,1415},{7,0,1334},{11,0,125},{6,
+10,170},{7,11,393},{8,10,395},{8,10,487},{10,11,603},{11,11,206},{141,10,147},{
+137,11,748},{4,11,912},{137,11,232},{4,10,535},{136,10,618},{137,0,792},{7,11,
+1973},{136,11,716},{135,11,98},{5,0,909},{9,0,849},{138,0,805},{4,0,630},{132,0,
+699},{5,11,733},{14,11,103},{150,10,23},{12,11,158},{18,11,8},{19,11,62},{20,11,
+6},{22,11,4},{23,11,2},{151,11,9},{132,0,968},{132,10,778},{132,10,46},{5,10,811
+},{6,10,1679},{6,10,1714},{135,10,2032},{6,0,1446},{7,10,1458},{9,10,407},{139,
+10,15},{7,0,206},{7,0,397},{7,0,621},{7,0,640},{8,0,124},{8,0,619},{9,0,305},{9,
+0,643},{10,0,264},{10,0,628},{11,0,40},{12,0,349},{13,0,134},{13,0,295},{14,0,
+155},{15,0,120},{18,0,105},{6,10,34},{7,10,1089},{8,10,708},{8,10,721},{9,10,363
+},{148,10,98},{4,0,262},{5,0,641},{135,0,342},{137,11,72},{4,0,99},{6,0,250},{6,
+0,346},{8,0,127},{138,0,81},{132,0,915},{5,0,75},{9,0,517},{10,0,470},{12,0,155}
+,{141,0,224},{132,10,462},{11,11,600},{11,11,670},{141,11,245},{142,0,83},{5,10,
+73},{6,10,23},{134,10,338},{6,0,1031},{139,11,923},{7,11,164},{7,11,1571},{9,11,
+107},{140,11,225},{134,0,1470},{133,0,954},{6,0,304},{8,0,418},{10,0,345},{11,0,
+341},{139,0,675},{9,0,410},{139,0,425},{4,11,27},{5,11,484},{5,11,510},{6,11,434
+},{7,11,1000},{7,11,1098},{8,11,2},{136,11,200},{134,0,734},{140,11,257},{7,10,
+725},{8,10,498},{139,10,268},{134,0,1822},{135,0,1798},{135,10,773},{132,11,460}
+,{4,11,932},{133,11,891},{134,0,14},{132,10,583},{7,10,1462},{8,11,625},{139,10,
+659},{5,0,113},{6,0,243},{6,0,1708},{7,0,1865},{11,0,161},{16,0,37},{17,0,99},{
+133,10,220},{134,11,76},{5,11,461},{135,11,1925},{140,0,69},{8,11,92},{137,11,
+221},{139,10,803},{132,10,544},{4,0,274},{134,0,922},{132,0,541},{5,0,627},{6,10
+,437},{6,10,564},{11,10,181},{141,10,183},{135,10,1192},{7,0,166},{132,11,763},{
+133,11,253},{134,0,849},{9,11,73},{10,11,110},{14,11,185},{145,11,119},{5,11,212
+},{12,11,35},{141,11,382},{133,0,717},{137,0,304},{136,0,600},{133,0,654},{6,0,
+273},{10,0,188},{13,0,377},{146,0,77},{4,10,790},{5,10,273},{134,10,394},{132,0,
+543},{135,0,410},{11,0,98},{11,0,524},{141,0,87},{132,0,941},{135,11,1175},{4,0,
+250},{7,0,1612},{11,0,186},{12,0,133},{6,10,127},{7,10,1511},{8,10,613},{12,10,
+495},{12,10,586},{12,10,660},{12,10,668},{14,10,385},{15,10,118},{17,10,20},{146
+,10,98},{6,0,1785},{133,11,816},{134,0,1339},{7,0,961},{7,0,1085},{7,0,1727},{8,
+0,462},{6,10,230},{135,11,1727},{9,0,636},{135,10,1954},{132,0,780},{5,11,869},{
+5,11,968},{6,11,1626},{8,11,734},{136,11,784},{4,11,542},{6,11,1716},{6,11,1727}
+,{7,11,1082},{7,11,1545},{8,11,56},{8,11,118},{8,11,412},{8,11,564},{9,11,888},{
+9,11,908},{10,11,50},{10,11,423},{11,11,685},{11,11,697},{11,11,933},{12,11,299}
+,{13,11,126},{13,11,136},{13,11,170},{141,11,190},{134,11,226},{4,11,232},{9,11,
+202},{10,11,474},{140,11,433},{137,11,500},{5,0,529},{136,10,68},{132,10,654},{4
+,10,156},{7,10,998},{7,10,1045},{7,10,1860},{9,10,48},{9,10,692},{11,10,419},{
+139,10,602},{7,0,1276},{8,0,474},{9,0,652},{6,11,108},{7,11,1003},{7,11,1181},{
+136,11,343},{7,11,1264},{7,11,1678},{11,11,945},{12,11,341},{12,11,471},{140,11,
+569},{134,11,1712},{5,0,948},{12,0,468},{19,0,96},{148,0,24},{4,11,133},{7,11,
+711},{7,11,1298},{7,11,1585},{135,11,1929},{6,0,753},{140,0,657},{139,0,941},{6,
+11,99},{7,11,1808},{145,11,57},{6,11,574},{7,11,428},{7,11,1250},{10,11,669},{11
+,11,485},{11,11,840},{12,11,300},{142,11,250},{4,0,532},{5,0,706},{135,0,662},{5
+,0,837},{6,0,1651},{139,0,985},{7,0,1861},{9,10,197},{10,10,300},{12,10,473},{13
+,10,90},{141,10,405},{137,11,252},{6,11,323},{135,11,1564},{4,0,330},{4,0,863},{
+7,0,933},{7,0,2012},{8,0,292},{7,11,461},{8,11,775},{138,11,435},{132,10,606},{4
+,11,655},{7,11,850},{17,11,75},{146,11,137},{135,0,767},{7,10,1978},{136,10,676}
+,{132,0,641},{135,11,1559},{134,0,1233},{137,0,242},{17,0,114},{4,10,361},{133,
+10,315},{137,0,883},{132,10,461},{138,0,274},{134,0,2008},{134,0,1794},{4,0,703}
+,{135,0,207},{12,0,285},{132,10,472},{132,0,571},{5,0,873},{5,0,960},{8,0,823},{
+9,0,881},{136,11,577},{7,0,617},{10,0,498},{11,0,501},{12,0,16},{140,0,150},{138
+,10,747},{132,0,431},{133,10,155},{11,0,283},{11,0,567},{7,10,163},{8,10,319},{9
+,10,402},{10,10,24},{10,10,681},{11,10,200},{12,10,253},{12,10,410},{142,10,219}
+,{4,11,413},{5,11,677},{8,11,432},{140,11,280},{9,0,401},{5,10,475},{7,10,1780},
+{11,10,297},{11,10,558},{14,10,322},{147,10,76},{6,0,781},{9,0,134},{10,0,2},{10
+,0,27},{10,0,333},{11,0,722},{143,0,1},{5,0,33},{6,0,470},{139,0,424},{135,0,
+2006},{12,0,783},{135,10,1956},{136,0,274},{135,0,1882},{132,0,794},{135,0,1848}
+,{5,10,944},{134,10,1769},{6,0,47},{7,0,90},{7,0,664},{7,0,830},{7,0,1380},{7,0,
+2025},{8,0,448},{136,0,828},{132,10,144},{134,0,1199},{4,11,395},{139,11,762},{
+135,11,1504},{9,0,417},{137,0,493},{9,11,174},{10,11,164},{11,11,440},{11,11,841
+},{143,11,98},{134,11,426},{139,11,1002},{134,0,295},{134,0,816},{6,10,247},{137
+,10,555},{133,0,1019},{4,0,620},{5,11,476},{10,10,280},{138,10,797},{139,0,464},
+{5,11,76},{6,11,458},{6,11,497},{7,11,764},{7,11,868},{9,11,658},{10,11,594},{11
+,11,173},{11,11,566},{12,11,20},{12,11,338},{141,11,200},{134,0,208},{4,11,526},
+{7,11,1029},{135,11,1054},{132,11,636},{6,11,233},{7,11,660},{7,11,1124},{17,11,
+31},{19,11,22},{151,11,14},{10,0,442},{133,10,428},{10,0,930},{140,0,778},{6,0,
+68},{7,0,448},{7,0,1629},{7,0,1769},{7,0,1813},{8,0,442},{8,0,516},{9,0,710},{10
+,0,282},{10,0,722},{7,10,1717},{138,10,546},{134,0,1128},{11,0,844},{12,0,104},{
+140,0,625},{4,11,432},{135,11,824},{138,10,189},{133,0,787},{133,10,99},{4,11,
+279},{7,11,301},{137,11,362},{8,0,491},{4,10,397},{136,10,555},{4,11,178},{133,
+11,399},{134,0,711},{144,0,9},{4,0,403},{5,0,441},{7,0,450},{10,0,840},{11,0,101
+},{12,0,193},{141,0,430},{135,11,1246},{12,10,398},{20,10,39},{21,10,11},{150,10
+,41},{4,10,485},{7,10,353},{135,10,1523},{6,10,366},{7,10,1384},{7,10,1601},{135
+,11,1912},{7,0,396},{10,0,160},{135,11,396},{137,10,282},{134,11,1692},{4,10,157
+},{5,10,471},{6,11,202},{10,11,448},{11,11,208},{12,11,360},{17,11,117},{17,11,
+118},{18,11,27},{148,11,67},{133,0,679},{137,0,326},{136,10,116},{7,11,872},{10,
+11,516},{139,11,167},{132,11,224},{5,11,546},{7,11,35},{8,11,11},{8,11,12},{9,11
+,315},{9,11,533},{10,11,802},{11,11,166},{12,11,525},{142,11,243},{7,0,1128},{
+135,11,1920},{5,11,241},{8,11,242},{9,11,451},{10,11,667},{11,11,598},{140,11,
+429},{6,0,737},{5,10,160},{7,10,363},{7,10,589},{10,10,170},{141,10,55},{135,0,
+1796},{142,11,254},{4,0,574},{7,0,350},{7,0,1024},{8,0,338},{9,0,677},{138,0,808
+},{134,0,1096},{137,11,516},{7,0,405},{10,0,491},{4,10,108},{4,11,366},{139,10,
+498},{11,11,337},{142,11,303},{134,11,1736},{7,0,1081},{140,11,364},{7,10,1005},
+{140,10,609},{7,0,1676},{4,10,895},{133,10,772},{135,0,2037},{6,0,1207},{11,11,
+916},{142,11,419},{14,11,140},{148,11,41},{6,11,331},{136,11,623},{9,0,944},{9,0
+,969},{9,0,1022},{12,0,913},{12,0,936},{15,0,177},{15,0,193},{4,10,926},{133,10,
+983},{5,0,354},{135,11,506},{8,0,598},{9,0,664},{138,0,441},{4,11,640},{133,11,
+513},{137,0,297},{132,10,538},{6,10,294},{7,10,1267},{136,10,624},{7,0,1772},{7,
+11,1888},{8,11,289},{11,11,45},{12,11,278},{140,11,537},{135,10,1325},{138,0,751
+},{141,0,37},{134,0,1828},{132,10,757},{132,11,394},{6,0,257},{135,0,1522},{4,0,
+582},{9,0,191},{135,11,1931},{7,11,574},{7,11,1719},{137,11,145},{132,11,658},{
+10,0,790},{132,11,369},{9,11,781},{10,11,144},{11,11,385},{13,11,161},{13,11,228
+},{13,11,268},{148,11,107},{8,0,469},{10,0,47},{136,11,374},{6,0,306},{7,0,1140}
+,{7,0,1340},{8,0,133},{138,0,449},{139,0,1011},{7,10,1875},{139,10,124},{4,11,
+344},{6,11,498},{139,11,323},{137,0,299},{132,0,837},{133,11,906},{5,0,329},{8,0
+,260},{138,0,10},{134,0,1320},{4,0,657},{146,0,158},{135,0,1191},{152,0,7},{6,0,
+1939},{8,0,974},{138,0,996},{135,0,1665},{11,11,126},{139,11,287},{143,0,8},{14,
+11,149},{14,11,399},{143,11,57},{5,0,66},{7,0,1896},{136,0,288},{7,0,175},{10,0,
+494},{5,10,150},{8,10,603},{9,10,593},{9,10,634},{10,10,173},{11,10,462},{11,10,
+515},{13,10,216},{13,10,288},{142,10,400},{134,0,1643},{136,11,21},{4,0,21},{5,0
+,91},{5,0,648},{5,0,750},{5,0,781},{6,0,54},{6,0,112},{6,0,402},{6,0,1732},{7,0,
+315},{7,0,749},{7,0,1427},{7,0,1900},{9,0,78},{9,0,508},{10,0,611},{10,0,811},{
+11,0,510},{11,0,728},{13,0,36},{14,0,39},{16,0,83},{17,0,124},{148,0,30},{4,0,
+668},{136,0,570},{10,0,322},{10,0,719},{139,0,407},{135,11,1381},{136,11,193},{
+12,10,108},{141,10,291},{132,11,616},{136,11,692},{8,0,125},{8,0,369},{8,0,524},
+{10,0,486},{11,0,13},{11,0,381},{11,0,736},{11,0,766},{11,0,845},{13,0,114},{13,
+0,292},{142,0,47},{134,0,1247},{6,0,1684},{6,0,1731},{7,0,356},{8,0,54},{8,0,221
+},{9,0,225},{9,0,356},{10,0,77},{10,0,446},{10,0,731},{12,0,404},{141,0,491},{
+135,10,1777},{4,11,305},{4,10,493},{144,10,55},{4,0,951},{6,0,1809},{6,0,1849},{
+8,0,846},{8,0,866},{8,0,899},{10,0,896},{12,0,694},{142,0,468},{5,11,214},{7,11,
+603},{8,11,611},{9,11,686},{10,11,88},{11,11,459},{11,11,496},{12,11,463},{12,11
+,590},{13,11,0},{142,11,214},{132,0,411},{4,0,80},{133,0,44},{140,11,74},{143,0,
+31},{7,0,669},{6,10,568},{7,10,1804},{8,10,362},{8,10,410},{8,10,830},{9,10,514}
+,{11,10,649},{142,10,157},{7,0,673},{134,11,1703},{132,10,625},{134,0,1303},{5,0
+,299},{135,0,1083},{138,0,704},{6,0,275},{7,0,408},{6,10,158},{7,10,129},{7,10,
+181},{8,10,276},{8,10,377},{10,10,523},{11,10,816},{12,10,455},{13,10,303},{142,
+10,135},{4,0,219},{7,0,367},{7,0,1713},{7,0,1761},{9,0,86},{9,0,537},{10,0,165},
+{12,0,219},{140,0,561},{8,0,216},{4,10,1},{4,11,737},{6,11,317},{7,10,1143},{7,
+10,1463},{9,10,207},{9,10,390},{9,10,467},{10,11,98},{11,11,294},{11,10,836},{12
+,11,60},{12,11,437},{13,11,64},{13,11,380},{142,11,430},{6,11,1758},{8,11,520},{
+9,11,345},{9,11,403},{142,11,350},{5,11,47},{10,11,242},{138,11,579},{5,11,139},
+{7,11,1168},{138,11,539},{135,0,1319},{4,10,295},{4,10,723},{5,10,895},{7,10,
+1031},{8,10,199},{8,10,340},{9,10,153},{9,10,215},{10,10,21},{10,10,59},{10,10,
+80},{10,10,224},{10,10,838},{11,10,229},{11,10,652},{12,10,192},{13,10,146},{142
+,10,91},{140,0,428},{137,10,51},{133,0,514},{5,10,309},{140,10,211},{6,0,1010},{
+5,10,125},{8,10,77},{138,10,15},{4,0,55},{5,0,301},{6,0,571},{142,0,49},{146,0,
+102},{136,11,370},{4,11,107},{7,11,613},{8,11,358},{8,11,439},{8,11,504},{9,11,
+501},{10,11,383},{139,11,477},{132,11,229},{133,0,364},{133,10,439},{4,11,903},{
+135,11,1816},{11,0,379},{140,10,76},{4,0,76},{4,0,971},{7,0,1550},{9,0,306},{9,0
+,430},{9,0,663},{10,0,683},{10,0,921},{11,0,427},{11,0,753},{12,0,334},{12,0,442
+},{14,0,258},{14,0,366},{143,0,131},{137,0,52},{4,11,47},{6,11,373},{7,11,452},{
+7,11,543},{7,11,1714},{7,11,1856},{9,11,6},{11,11,257},{139,11,391},{4,10,8},{7,
+10,1152},{7,10,1153},{7,10,1715},{9,10,374},{10,10,478},{139,10,648},{4,11,785},
+{133,11,368},{135,10,1099},{135,11,860},{5,11,980},{134,11,1754},{134,0,1258},{6
+,0,1058},{6,0,1359},{7,11,536},{7,11,1331},{136,11,143},{4,0,656},{135,0,779},{
+136,10,87},{5,11,19},{6,11,533},{146,11,126},{7,0,144},{138,10,438},{5,11,395},{
+5,11,951},{134,11,1776},{135,0,1373},{7,0,554},{7,0,605},{141,0,10},{4,10,69},{5
+,10,122},{9,10,656},{138,10,464},{5,10,849},{134,10,1633},{5,0,838},{5,0,841},{
+134,0,1649},{133,0,1012},{139,10,499},{7,10,476},{7,10,1592},{138,10,87},{6,0,
+251},{7,0,365},{7,0,1357},{7,0,1497},{8,0,154},{141,0,281},{132,11,441},{132,11,
+695},{7,11,497},{9,11,387},{147,11,81},{133,0,340},{14,10,283},{142,11,283},{134
+,0,810},{135,11,1894},{139,0,495},{5,11,284},{6,11,49},{6,11,350},{7,11,1},{7,11
+,377},{7,11,1693},{8,11,18},{8,11,678},{9,11,161},{9,11,585},{9,11,671},{9,11,
+839},{11,11,912},{141,11,427},{5,10,859},{7,10,1160},{8,10,107},{9,10,291},{9,10
+,439},{10,10,663},{11,10,609},{140,10,197},{8,0,261},{9,0,144},{9,0,466},{10,0,
+370},{12,0,470},{13,0,144},{142,0,348},{137,0,897},{6,0,248},{9,0,546},{10,0,535
+},{11,0,681},{141,0,135},{4,0,358},{135,0,1496},{134,0,567},{136,0,445},{4,10,
+117},{6,10,372},{7,10,1905},{142,10,323},{4,10,722},{139,10,471},{6,0,697},{134,
+0,996},{7,11,2007},{9,11,101},{9,11,450},{10,11,66},{10,11,842},{11,11,536},{140
+,11,587},{132,0,577},{134,0,1336},{9,10,5},{12,10,216},{12,10,294},{12,10,298},{
+12,10,400},{12,10,518},{13,10,229},{143,10,139},{6,0,174},{138,0,917},{134,10,
+1774},{5,10,12},{7,10,375},{9,10,88},{9,10,438},{11,11,62},{139,10,270},{134,11,
+1766},{6,11,0},{7,11,84},{7,10,816},{7,10,1241},{9,10,283},{9,10,520},{10,10,213
+},{10,10,307},{10,10,463},{10,10,671},{10,10,746},{11,10,401},{11,10,794},{11,11
+,895},{12,10,517},{17,11,11},{18,10,107},{147,10,115},{5,0,878},{133,0,972},{6,
+11,1665},{7,11,256},{7,11,1388},{138,11,499},{4,10,258},{136,10,639},{4,11,22},{
+5,11,10},{6,10,22},{7,11,848},{7,10,903},{7,10,1963},{8,11,97},{138,10,577},{5,
+10,681},{136,10,782},{133,11,481},{132,0,351},{4,10,664},{5,10,804},{139,10,1013
+},{6,11,134},{7,11,437},{7,11,959},{9,11,37},{14,11,285},{14,11,371},{144,11,60}
+,{7,11,486},{8,11,155},{11,11,93},{140,11,164},{132,0,286},{7,0,438},{7,0,627},{
+7,0,1516},{8,0,40},{9,0,56},{9,0,294},{10,0,30},{11,0,969},{11,0,995},{146,0,148
+},{5,11,591},{135,11,337},{134,0,1950},{133,10,32},{138,11,500},{5,11,380},{5,11
+,650},{136,11,310},{4,11,364},{7,11,1156},{7,11,1187},{137,11,409},{4,0,738},{
+134,11,482},{4,11,781},{6,11,487},{7,11,926},{8,11,263},{139,11,500},{135,11,418
+},{6,0,2047},{10,0,969},{4,10,289},{7,10,629},{7,10,1698},{7,10,1711},{140,10,
+215},{6,10,450},{136,10,109},{134,0,818},{136,10,705},{133,0,866},{4,11,94},{135
+,11,1265},{132,11,417},{134,0,1467},{135,10,1238},{4,0,972},{6,0,1851},{134,0,
+1857},{134,0,355},{133,0,116},{132,0,457},{135,11,1411},{4,11,408},{4,11,741},{
+135,11,500},{134,10,26},{142,11,137},{5,0,527},{6,0,189},{7,0,859},{136,0,267},{
+11,0,104},{11,0,554},{15,0,60},{143,0,125},{134,0,1613},{4,10,414},{5,10,467},{9
+,10,654},{10,10,451},{12,10,59},{141,10,375},{135,10,17},{134,0,116},{135,11,541
+},{135,10,955},{6,11,73},{135,11,177},{133,11,576},{134,0,886},{133,0,487},{4,0,
+86},{5,0,667},{5,0,753},{6,0,316},{6,0,455},{135,0,946},{142,11,231},{150,0,45},
+{134,0,863},{134,0,1953},{6,10,280},{10,10,502},{11,10,344},{140,10,38},{4,0,79}
+,{7,0,1773},{10,0,450},{11,0,589},{13,0,332},{13,0,493},{14,0,183},{14,0,334},{
+14,0,362},{14,0,368},{14,0,376},{14,0,379},{19,0,90},{19,0,103},{19,0,127},{148,
+0,90},{5,10,45},{7,10,1161},{11,10,448},{11,10,880},{13,10,139},{13,10,407},{15,
+10,16},{17,10,95},{18,10,66},{18,10,88},{18,10,123},{149,10,7},{136,10,777},{4,
+10,410},{135,10,521},{135,10,1778},{135,11,538},{142,0,381},{133,11,413},{134,0,
+1142},{6,0,1189},{136,11,495},{5,0,663},{6,0,1962},{134,0,2003},{7,11,54},{8,11,
+312},{10,11,191},{10,11,614},{140,11,567},{132,10,436},{133,0,846},{10,0,528},{
+11,0,504},{7,10,1587},{135,10,1707},{5,0,378},{8,0,465},{9,0,286},{10,0,185},{10
+,0,562},{10,0,635},{11,0,31},{11,0,393},{13,0,312},{18,0,65},{18,0,96},{147,0,89
+},{7,0,899},{14,0,325},{6,11,468},{7,11,567},{7,11,1478},{8,11,530},{142,11,290}
+,{7,0,1880},{9,0,680},{139,0,798},{134,0,1770},{132,0,648},{150,11,35},{5,0,945}
+,{6,0,1656},{6,0,1787},{7,0,167},{8,0,824},{9,0,391},{10,0,375},{139,0,185},{6,
+11,484},{135,11,822},{134,0,2046},{7,0,1645},{8,0,352},{137,0,249},{132,0,152},{
+6,0,611},{135,0,1733},{6,11,1724},{135,11,2022},{133,0,1006},{141,11,96},{5,0,
+420},{135,0,1449},{146,11,149},{135,0,832},{135,10,663},{133,0,351},{5,0,40},{7,
+0,598},{7,0,1638},{8,0,78},{9,0,166},{9,0,640},{9,0,685},{9,0,773},{11,0,215},{
+13,0,65},{14,0,172},{14,0,317},{145,0,6},{8,0,60},{9,0,343},{139,0,769},{134,0,
+1354},{132,0,724},{137,0,745},{132,11,474},{7,0,1951},{8,0,765},{8,0,772},{140,0
+,671},{7,0,108},{8,0,219},{8,0,388},{9,0,775},{11,0,275},{140,0,464},{137,0,639}
+,{135,10,503},{133,11,366},{5,0,15},{6,0,56},{7,0,1758},{8,0,500},{9,0,730},{11,
+0,331},{13,0,150},{14,0,282},{5,11,305},{9,11,560},{141,11,208},{4,10,113},{5,10
+,163},{5,10,735},{7,10,1009},{9,10,9},{9,10,771},{12,10,90},{13,10,138},{13,10,
+410},{143,10,128},{4,10,324},{138,10,104},{135,11,466},{142,11,27},{134,0,1886},
+{5,0,205},{6,0,438},{9,0,711},{4,11,480},{6,11,167},{6,11,302},{6,11,1642},{7,11
+,130},{7,11,656},{7,11,837},{7,11,1547},{7,11,1657},{8,11,429},{9,11,228},{10,11
+,643},{13,11,289},{13,11,343},{147,11,101},{134,0,865},{6,0,2025},{136,0,965},{7
+,11,278},{10,11,739},{11,11,708},{141,11,348},{133,0,534},{135,11,1922},{137,0,
+691},{4,10,935},{133,10,823},{6,0,443},{9,0,237},{9,0,571},{9,0,695},{10,0,139},
+{11,0,715},{12,0,417},{141,0,421},{5,10,269},{7,10,434},{7,10,891},{8,10,339},{9
+,10,702},{11,10,594},{11,10,718},{145,10,100},{6,0,1555},{7,0,878},{9,10,485},{
+141,10,264},{134,10,1713},{7,10,1810},{11,10,866},{12,10,103},{141,10,495},{135,
+10,900},{6,0,1410},{9,11,316},{139,11,256},{4,0,995},{135,0,1033},{132,0,578},{
+10,0,881},{12,0,740},{12,0,743},{140,0,759},{132,0,822},{133,0,923},{142,10,143}
+,{135,11,1696},{6,11,363},{7,11,1955},{136,11,725},{132,0,924},{133,0,665},{135,
+10,2029},{135,0,1901},{4,0,265},{6,0,1092},{6,0,1417},{7,0,807},{135,0,950},{5,0
+,93},{12,0,267},{141,0,498},{135,0,1451},{5,11,813},{135,11,2046},{5,10,625},{
+135,10,1617},{135,0,747},{6,0,788},{137,0,828},{7,0,184},{11,0,307},{11,0,400},{
+15,0,130},{5,11,712},{7,11,1855},{8,10,425},{8,10,693},{9,10,720},{10,10,380},{
+10,10,638},{11,11,17},{11,10,473},{12,10,61},{13,11,321},{144,11,67},{135,0,198}
+,{6,11,320},{7,11,781},{7,11,1921},{9,11,55},{10,11,186},{10,11,273},{10,11,664}
+,{10,11,801},{11,11,996},{11,11,997},{13,11,157},{142,11,170},{136,11,271},{135,
+0,994},{7,11,103},{7,11,863},{11,11,184},{14,11,299},{145,11,62},{11,10,551},{
+142,10,159},{5,0,233},{5,0,320},{6,0,140},{8,0,295},{8,0,615},{136,11,615},{133,
+0,978},{4,0,905},{6,0,1701},{137,0,843},{132,10,168},{4,0,974},{8,0,850},{12,0,
+709},{12,0,768},{140,0,786},{135,10,91},{152,0,6},{138,10,532},{135,10,1884},{
+132,0,509},{6,0,1307},{135,0,273},{5,11,77},{7,11,1455},{10,11,843},{19,11,73},{
+150,11,5},{132,11,458},{135,11,1420},{6,11,109},{138,11,382},{6,0,201},{6,11,330
+},{7,10,70},{7,11,1084},{10,10,240},{11,11,142},{147,10,93},{7,0,1041},{140,11,
+328},{133,11,354},{134,0,1040},{133,0,693},{134,0,774},{139,0,234},{132,0,336},{
+7,0,1399},{139,10,392},{20,0,22},{148,11,22},{5,0,802},{7,0,2021},{136,0,805},{5
+,0,167},{5,0,899},{6,0,410},{137,0,777},{137,0,789},{134,0,1705},{7,10,655},{135
+,10,1844},{4,10,145},{6,10,176},{7,10,395},{137,10,562},{132,10,501},{135,0,10},
+{5,0,11},{6,0,117},{6,0,485},{7,0,1133},{9,0,582},{9,0,594},{10,0,82},{11,0,21},
+{11,0,818},{12,0,535},{13,0,86},{20,0,91},{23,0,13},{134,10,509},{4,0,264},{7,0,
+1067},{8,0,204},{8,0,385},{139,0,953},{139,11,737},{138,0,56},{134,0,1917},{133,
+0,470},{10,11,657},{14,11,297},{142,11,361},{135,11,412},{7,0,1198},{7,11,1198},
+{8,11,556},{14,11,123},{14,11,192},{143,11,27},{7,11,1985},{14,11,146},{15,11,42
+},{16,11,23},{17,11,86},{146,11,17},{11,0,1015},{136,11,122},{4,10,114},{9,10,
+492},{13,10,462},{142,10,215},{4,10,77},{5,10,361},{6,10,139},{6,10,401},{6,10,
+404},{7,10,413},{7,10,715},{7,10,1716},{11,10,279},{12,10,179},{12,10,258},{13,
+10,244},{142,10,358},{134,10,1717},{7,10,1061},{8,10,82},{11,10,250},{12,10,420}
+,{141,10,184},{133,0,715},{135,10,724},{9,0,919},{9,0,922},{9,0,927},{9,0,933},{
+9,0,962},{9,0,1000},{9,0,1002},{9,0,1021},{12,0,890},{12,0,907},{12,0,930},{15,0
+,207},{15,0,228},{15,0,238},{149,0,61},{8,0,794},{9,0,400},{10,0,298},{142,0,228
+},{5,11,430},{5,11,932},{6,11,131},{7,11,417},{9,11,522},{11,11,314},{141,11,390
+},{132,0,867},{8,0,724},{132,11,507},{137,11,261},{4,11,343},{133,11,511},{6,0,
+190},{7,0,768},{135,0,1170},{6,10,513},{135,10,1052},{7,11,455},{138,11,591},{
+134,0,1066},{137,10,899},{14,0,67},{147,0,60},{4,0,948},{18,0,174},{146,0,176},{
+135,0,1023},{7,10,1417},{12,10,382},{17,10,48},{152,10,12},{134,11,575},{132,0,
+764},{6,10,545},{7,10,565},{7,10,1669},{10,10,114},{11,10,642},{140,10,618},{6,0
+,137},{9,0,75},{9,0,253},{10,0,194},{138,0,444},{4,0,756},{133,10,5},{8,0,1008},
+{135,10,192},{132,0,842},{11,0,643},{12,0,115},{136,10,763},{139,0,67},{133,10,
+759},{4,0,821},{5,0,760},{7,0,542},{8,0,135},{8,0,496},{135,11,580},{7,10,370},{
+7,10,1007},{7,10,1177},{135,10,1565},{135,10,1237},{140,0,736},{7,0,319},{7,0,
+355},{7,0,763},{10,0,389},{145,0,43},{8,11,333},{138,11,182},{4,10,87},{5,10,250
+},{141,10,298},{138,0,786},{134,0,2044},{8,11,330},{140,11,477},{135,11,1338},{
+132,11,125},{134,0,1030},{134,0,1083},{132,11,721},{135,10,814},{7,11,776},{8,11
+,145},{147,11,56},{134,0,1226},{4,10,57},{7,10,1195},{7,10,1438},{7,10,1548},{7,
+10,1835},{7,10,1904},{9,10,757},{10,10,604},{139,10,519},{7,11,792},{8,11,147},{
+10,11,821},{139,11,1021},{137,11,797},{4,0,58},{5,0,286},{6,0,319},{7,0,402},{7,
+0,1254},{7,0,1903},{8,0,356},{140,0,408},{4,0,389},{4,0,815},{9,0,181},{9,0,255}
+,{10,0,8},{10,0,29},{10,0,816},{11,0,311},{11,0,561},{12,0,67},{141,0,181},{7,11
+,1472},{135,11,1554},{7,11,1071},{7,11,1541},{7,11,1767},{7,11,1806},{7,11,1999}
+,{9,11,248},{10,11,400},{11,11,162},{11,11,178},{11,11,242},{12,11,605},{15,11,
+26},{144,11,44},{5,11,168},{5,11,930},{8,11,74},{9,11,623},{12,11,500},{12,11,
+579},{13,11,41},{143,11,93},{6,11,220},{7,11,1101},{141,11,105},{5,0,474},{7,0,
+507},{4,10,209},{7,11,507},{135,10,902},{132,0,427},{6,0,413},{7,10,335},{7,10,
+1437},{7,10,1668},{8,10,553},{8,10,652},{8,10,656},{9,10,558},{11,10,743},{149,
+10,18},{132,0,730},{6,11,19},{7,11,1413},{139,11,428},{133,0,373},{132,10,559},{
+7,11,96},{8,11,401},{137,11,896},{7,0,799},{7,0,1972},{5,10,1017},{138,10,511},{
+135,0,1793},{7,11,1961},{7,11,1965},{8,11,702},{136,11,750},{8,11,150},{8,11,737
+},{140,11,366},{132,0,322},{133,10,709},{8,11,800},{9,11,148},{9,11,872},{9,11,
+890},{11,11,309},{11,11,1001},{13,11,267},{141,11,323},{134,10,1745},{7,0,290},{
+136,10,206},{7,0,1651},{145,0,89},{139,0,2},{132,0,672},{6,0,1860},{8,0,905},{10
+,0,844},{10,0,846},{10,0,858},{12,0,699},{12,0,746},{140,0,772},{135,11,424},{
+133,11,547},{133,0,737},{5,11,490},{6,11,615},{6,11,620},{135,11,683},{6,0,746},
+{134,0,1612},{132,10,776},{9,11,385},{149,11,17},{133,0,145},{135,10,1272},{7,0,
+884},{140,0,124},{4,0,387},{135,0,1288},{5,11,133},{136,10,406},{136,11,187},{6,
+0,679},{8,11,8},{138,11,0},{135,0,550},{135,11,798},{136,11,685},{7,11,1086},{
+145,11,46},{8,10,175},{10,10,168},{138,10,573},{135,0,1305},{4,0,576},{135,0,
+1263},{6,0,686},{134,0,1563},{134,0,607},{5,0,919},{134,0,1673},{148,0,37},{8,11
+,774},{10,11,670},{140,11,51},{133,10,784},{139,10,882},{4,0,82},{5,0,333},{5,0,
+904},{6,0,207},{7,0,325},{7,0,1726},{8,0,101},{10,0,778},{139,0,220},{135,11,371
+},{132,0,958},{133,0,903},{4,11,127},{5,11,350},{6,11,356},{8,11,426},{9,11,572}
+,{10,11,247},{139,11,312},{140,0,147},{6,11,59},{7,11,885},{9,11,603},{141,11,
+397},{10,0,367},{9,10,14},{9,10,441},{139,10,9},{11,10,966},{12,10,287},{13,10,
+342},{13,10,402},{15,10,110},{143,10,163},{134,0,690},{132,0,705},{9,0,651},{11,
+0,971},{13,0,273},{7,10,1428},{7,10,1640},{7,10,1867},{9,10,169},{9,10,182},{9,
+10,367},{9,10,478},{9,10,506},{9,10,551},{9,10,557},{9,10,648},{9,10,697},{9,10,
+705},{9,10,725},{9,10,787},{9,10,794},{10,10,198},{10,10,214},{10,10,267},{10,10
+,275},{10,10,456},{10,10,551},{10,10,561},{10,10,613},{10,10,627},{10,10,668},{
+10,10,675},{10,10,691},{10,10,695},{10,10,707},{10,10,715},{11,10,183},{11,10,
+201},{11,10,262},{11,10,352},{11,10,439},{11,10,493},{11,10,572},{11,10,591},{11
+,10,608},{11,10,611},{11,10,646},{11,10,674},{11,10,711},{11,10,751},{11,10,761}
+,{11,10,776},{11,10,785},{11,10,850},{11,10,853},{11,10,862},{11,10,865},{11,10,
+868},{11,10,875},{11,10,898},{11,10,902},{11,10,903},{11,10,910},{11,10,932},{11
+,10,942},{11,10,957},{11,10,967},{11,10,972},{12,10,148},{12,10,195},{12,10,220}
+,{12,10,237},{12,10,318},{12,10,339},{12,10,393},{12,10,445},{12,10,450},{12,10,
+474},{12,10,505},{12,10,509},{12,10,533},{12,10,591},{12,10,594},{12,10,597},{12
+,10,621},{12,10,633},{12,10,642},{13,10,59},{13,10,60},{13,10,145},{13,10,239},{
+13,10,250},{13,10,329},{13,10,344},{13,10,365},{13,10,372},{13,10,387},{13,10,
+403},{13,10,414},{13,10,456},{13,10,470},{13,10,478},{13,10,483},{13,10,489},{14
+,10,55},{14,10,57},{14,10,81},{14,10,90},{14,10,148},{14,10,239},{14,10,266},{14
+,10,321},{14,10,326},{14,10,327},{14,10,330},{14,10,347},{14,10,355},{14,10,401}
+,{14,10,404},{14,10,411},{14,10,414},{14,10,416},{14,10,420},{15,10,61},{15,10,
+74},{15,10,87},{15,10,88},{15,10,94},{15,10,96},{15,10,116},{15,10,149},{15,10,
+154},{16,10,50},{16,10,63},{16,10,73},{17,10,2},{17,10,66},{17,10,92},{17,10,103
+},{17,10,112},{17,10,120},{18,10,50},{18,10,54},{18,10,82},{18,10,86},{18,10,90}
+,{18,10,111},{18,10,115},{18,10,156},{19,10,40},{19,10,79},{20,10,78},{149,10,22
+},{7,0,887},{5,10,161},{135,10,839},{142,11,98},{134,0,90},{138,11,356},{135,11,
+441},{6,11,111},{7,11,4},{8,11,163},{8,11,776},{138,11,566},{134,0,908},{134,0,
+1261},{7,0,813},{12,0,497},{141,0,56},{134,0,1235},{135,0,429},{135,11,1994},{
+138,0,904},{6,0,125},{7,0,1277},{137,0,772},{151,0,12},{4,0,841},{5,0,386},{133,
+11,386},{5,11,297},{135,11,1038},{6,0,860},{6,0,1069},{135,11,309},{136,0,946},{
+135,10,1814},{141,11,418},{136,11,363},{10,0,768},{139,0,787},{22,11,30},{150,11
+,33},{6,0,160},{7,0,1106},{9,0,770},{11,0,112},{140,0,413},{11,11,216},{139,11,
+340},{136,10,139},{135,11,1390},{135,11,808},{132,11,280},{12,0,271},{17,0,109},
+{7,10,643},{136,10,236},{140,11,54},{4,11,421},{133,11,548},{11,0,719},{12,0,36}
+,{141,0,337},{7,0,581},{9,0,644},{137,0,699},{11,11,511},{13,11,394},{14,11,298}
+,{14,11,318},{146,11,103},{7,0,304},{9,0,646},{9,0,862},{11,0,696},{12,0,208},{
+15,0,79},{147,0,108},{4,0,631},{7,0,1126},{135,0,1536},{135,11,1527},{8,0,880},{
+10,0,869},{138,0,913},{7,0,1513},{5,10,54},{6,11,254},{9,11,109},{138,11,103},{
+135,0,981},{133,11,729},{132,10,744},{132,0,434},{134,0,550},{7,0,930},{10,0,476
+},{13,0,452},{19,0,104},{6,11,1630},{10,10,402},{146,10,55},{5,0,553},{138,0,824
+},{136,0,452},{8,0,151},{137,10,624},{132,10,572},{132,0,772},{133,11,671},{133,
+0,292},{138,0,135},{132,11,889},{140,11,207},{9,0,504},{6,10,43},{7,10,38},{8,10
+,248},{138,10,513},{6,0,1089},{135,11,1910},{4,11,627},{133,11,775},{135,0,783},
+{133,10,766},{133,10,363},{7,0,387},{135,11,387},{7,0,393},{10,0,603},{11,0,206}
+,{7,11,202},{11,11,362},{11,11,948},{140,11,388},{6,11,507},{7,11,451},{8,11,389
+},{12,11,490},{13,11,16},{13,11,215},{13,11,351},{18,11,132},{147,11,125},{4,0,
+912},{9,0,232},{135,11,841},{6,10,258},{140,10,409},{5,10,249},{148,10,82},{136,
+11,566},{6,0,977},{135,11,1214},{7,0,1973},{136,0,716},{135,0,98},{133,0,733},{5
+,11,912},{134,11,1695},{5,10,393},{6,10,378},{7,10,1981},{9,10,32},{9,10,591},{
+10,10,685},{10,10,741},{142,10,382},{133,10,788},{10,0,19},{11,0,911},{7,10,1968
+},{141,10,509},{5,0,668},{5,11,236},{6,11,572},{8,11,492},{11,11,618},{144,11,56
+},{135,11,1789},{4,0,360},{5,0,635},{5,0,700},{5,10,58},{5,10,171},{5,10,683},{6
+,10,291},{6,10,566},{7,10,1650},{11,10,523},{12,10,273},{12,10,303},{15,10,39},{
+143,10,111},{133,0,901},{134,10,589},{5,11,190},{136,11,318},{140,0,656},{7,0,
+726},{152,0,9},{4,10,917},{133,10,1005},{135,10,1598},{134,11,491},{4,10,919},{
+133,11,434},{137,0,72},{6,0,1269},{6,0,1566},{134,0,1621},{9,0,463},{10,0,595},{
+4,10,255},{5,10,302},{6,10,132},{7,10,128},{7,10,283},{7,10,1299},{10,10,52},{10
+,10,514},{11,10,925},{13,10,92},{142,10,309},{135,0,1454},{134,0,1287},{11,0,600
+},{13,0,245},{137,10,173},{136,0,989},{7,0,164},{7,0,1571},{9,0,107},{140,0,225}
+,{6,0,1061},{141,10,442},{4,0,27},{5,0,484},{5,0,510},{6,0,434},{7,0,1000},{7,0,
+1098},{136,0,2},{7,11,85},{7,11,247},{8,11,585},{10,11,163},{138,11,316},{11,11,
+103},{142,11,0},{134,0,1127},{4,0,460},{134,0,852},{134,10,210},{4,0,932},{133,0
+,891},{6,0,588},{147,11,83},{8,0,625},{4,10,284},{134,10,223},{134,0,76},{8,0,92
+},{137,0,221},{4,11,124},{10,11,457},{11,11,121},{11,11,169},{11,11,422},{11,11,
+870},{12,11,214},{13,11,389},{14,11,187},{143,11,77},{9,11,618},{138,11,482},{4,
+10,218},{7,10,526},{143,10,137},{13,0,9},{14,0,104},{14,0,311},{4,10,270},{5,10,
+192},{6,10,332},{135,10,1322},{140,10,661},{135,11,1193},{6,11,107},{7,11,638},{
+7,11,1632},{137,11,396},{132,0,763},{4,0,622},{5,11,370},{134,11,1756},{133,0,
+253},{135,0,546},{9,0,73},{10,0,110},{14,0,185},{17,0,119},{133,11,204},{7,0,624
+},{7,0,916},{10,0,256},{139,0,87},{7,10,379},{8,10,481},{137,10,377},{5,0,212},{
+12,0,35},{13,0,382},{5,11,970},{134,11,1706},{9,0,746},{5,10,1003},{134,10,149},
+{10,0,150},{11,0,849},{13,0,330},{8,10,262},{9,10,627},{11,10,214},{11,10,404},{
+11,10,457},{11,10,780},{11,10,913},{13,10,401},{142,10,200},{134,0,1466},{135,11
+,3},{6,0,1299},{4,11,35},{5,11,121},{5,11,483},{5,11,685},{6,11,489},{7,11,1204}
+,{136,11,394},{135,10,742},{4,10,142},{136,10,304},{4,11,921},{133,11,1007},{134
+,0,1518},{6,0,1229},{135,0,1175},{133,0,816},{12,0,159},{4,10,471},{4,11,712},{5
+,10,51},{6,10,602},{7,10,925},{8,10,484},{138,10,195},{134,11,1629},{5,0,869},{5
+,0,968},{6,0,1626},{8,0,734},{136,0,784},{4,0,542},{6,0,1716},{6,0,1727},{7,0,
+1082},{7,0,1545},{8,0,56},{8,0,118},{8,0,412},{8,0,564},{9,0,888},{9,0,908},{10,
+0,50},{10,0,423},{11,0,685},{11,0,697},{11,0,933},{12,0,299},{13,0,126},{13,0,
+136},{13,0,170},{13,0,190},{136,10,688},{132,10,697},{4,0,232},{9,0,202},{10,0,
+474},{140,0,433},{136,0,212},{6,0,108},{7,0,1003},{7,0,1181},{8,0,111},{136,0,
+343},{5,10,221},{135,11,1255},{133,11,485},{134,0,1712},{142,0,216},{5,0,643},{6
+,0,516},{4,11,285},{5,11,317},{6,11,301},{7,11,7},{8,11,153},{10,11,766},{11,11,
+468},{12,11,467},{141,11,143},{4,0,133},{7,0,711},{7,0,1298},{135,0,1585},{134,0
+,650},{135,11,512},{6,0,99},{7,0,1808},{145,0,57},{6,0,246},{6,0,574},{7,0,428},
+{9,0,793},{10,0,669},{11,0,485},{11,0,840},{12,0,300},{14,0,250},{145,0,55},{4,
+10,132},{5,10,69},{135,10,1242},{136,0,1023},{7,0,302},{132,10,111},{135,0,1871}
+,{132,0,728},{9,0,252},{132,10,767},{6,0,461},{7,0,1590},{7,10,1416},{7,10,2005}
+,{8,10,131},{8,10,466},{9,10,672},{13,10,252},{148,10,103},{6,0,323},{135,0,1564
+},{7,0,461},{136,0,775},{6,10,44},{136,10,368},{139,0,172},{132,0,464},{4,10,570
+},{133,10,120},{137,11,269},{6,10,227},{135,10,1589},{6,11,1719},{6,11,1735},{7,
+11,2016},{7,11,2020},{8,11,837},{137,11,852},{7,0,727},{146,0,73},{132,0,1023},{
+135,11,852},{135,10,1529},{136,0,577},{138,11,568},{134,0,1037},{8,11,67},{138,
+11,419},{4,0,413},{5,0,677},{8,0,432},{140,0,280},{10,0,600},{6,10,1667},{7,11,
+967},{7,10,2036},{141,11,11},{6,10,511},{140,10,132},{6,0,799},{5,10,568},{6,10,
+138},{135,10,1293},{8,0,159},{4,10,565},{136,10,827},{7,0,646},{7,0,1730},{11,0,
+446},{141,0,178},{4,10,922},{133,10,1023},{135,11,11},{132,0,395},{11,0,145},{
+135,10,1002},{9,0,174},{10,0,164},{11,0,440},{11,0,514},{11,0,841},{15,0,98},{
+149,0,20},{134,0,426},{10,0,608},{139,0,1002},{7,11,320},{8,11,51},{12,11,481},{
+12,11,570},{148,11,106},{9,0,977},{9,0,983},{132,11,445},{138,0,250},{139,0,100}
+,{6,0,1982},{136,10,402},{133,11,239},{4,10,716},{141,10,31},{5,0,476},{7,11,83}
+,{7,11,1990},{8,11,130},{139,11,720},{8,10,691},{136,10,731},{5,11,123},{6,11,
+530},{7,11,348},{135,11,1419},{5,0,76},{6,0,458},{6,0,497},{7,0,868},{9,0,658},{
+10,0,594},{11,0,173},{11,0,566},{12,0,20},{12,0,338},{141,0,200},{9,11,139},{10,
+11,399},{11,11,469},{12,11,634},{141,11,223},{9,10,840},{138,10,803},{133,10,847
+},{11,11,223},{140,11,168},{132,11,210},{8,0,447},{9,10,53},{9,10,268},{9,10,901
+},{10,10,518},{10,10,829},{11,10,188},{13,10,74},{14,10,46},{15,10,17},{15,10,33
+},{17,10,40},{18,10,36},{19,10,20},{22,10,1},{152,10,2},{4,0,526},{7,0,1029},{
+135,0,1054},{19,11,59},{150,11,2},{4,0,636},{6,0,1875},{6,0,1920},{9,0,999},{12,
+0,807},{12,0,825},{15,0,179},{15,0,190},{18,0,182},{136,10,532},{6,0,1699},{7,0,
+660},{7,0,1124},{17,0,31},{19,0,22},{151,0,14},{135,10,681},{132,11,430},{140,10
+,677},{4,10,684},{136,10,384},{132,11,756},{133,11,213},{7,0,188},{7,10,110},{8,
+10,290},{8,10,591},{9,10,382},{9,10,649},{11,10,71},{11,10,155},{11,10,313},{12,
+10,5},{13,10,325},{142,10,287},{7,10,360},{7,10,425},{9,10,66},{9,10,278},{138,
+10,644},{142,11,164},{4,0,279},{7,0,301},{137,0,362},{134,11,586},{135,0,1743},{
+4,0,178},{133,0,399},{4,10,900},{133,10,861},{5,10,254},{7,10,985},{136,10,73},{
+133,11,108},{7,10,1959},{136,10,683},{133,11,219},{4,11,193},{5,11,916},{7,11,
+364},{10,11,398},{10,11,726},{11,11,317},{11,11,626},{12,11,142},{12,11,288},{12
+,11,678},{13,11,313},{15,11,113},{18,11,114},{21,11,30},{150,11,53},{6,11,241},{
+7,11,907},{8,11,832},{9,11,342},{10,11,729},{11,11,284},{11,11,445},{11,11,651},
+{11,11,863},{13,11,398},{146,11,99},{132,0,872},{134,0,831},{134,0,1692},{6,0,
+202},{6,0,1006},{9,0,832},{10,0,636},{11,0,208},{12,0,360},{17,0,118},{18,0,27},
+{20,0,67},{137,11,734},{132,10,725},{7,11,993},{138,11,666},{134,0,1954},{134,10
+,196},{7,0,872},{10,0,516},{139,0,167},{133,10,831},{4,11,562},{9,11,254},{139,
+11,879},{137,0,313},{4,0,224},{132,11,786},{11,0,24},{12,0,170},{136,10,723},{5,
+0,546},{7,0,35},{8,0,11},{8,0,12},{9,0,315},{9,0,533},{10,0,802},{11,0,166},{12,
+0,525},{142,0,243},{7,0,1937},{13,10,80},{13,10,437},{145,10,74},{5,0,241},{8,0,
+242},{9,0,451},{10,0,667},{11,0,598},{140,0,429},{150,0,46},{6,0,1273},{137,0,
+830},{5,10,848},{6,10,66},{136,10,764},{6,0,825},{134,0,993},{4,0,1006},{10,0,
+327},{13,0,271},{4,10,36},{7,10,1387},{139,10,755},{134,0,1023},{135,0,1580},{4,
+0,366},{137,0,516},{132,10,887},{6,0,1736},{135,0,1891},{6,11,216},{7,11,901},{7
+,11,1343},{136,11,493},{6,10,165},{138,10,388},{7,11,341},{139,11,219},{4,10,719
+},{135,10,155},{134,0,1935},{132,0,826},{6,0,331},{6,0,1605},{8,0,623},{11,0,139
+},{139,0,171},{135,11,1734},{10,11,115},{11,11,420},{12,11,154},{13,11,404},{14,
+11,346},{15,11,54},{143,11,112},{7,0,288},{4,10,353},{6,10,146},{6,10,1789},{7,
+10,990},{7,10,1348},{9,10,665},{9,10,898},{11,10,893},{142,10,212},{6,0,916},{
+134,0,1592},{7,0,1888},{4,10,45},{135,10,1257},{5,11,1011},{136,11,701},{139,11,
+596},{4,11,54},{5,11,666},{7,11,1039},{7,11,1130},{9,11,195},{138,11,302},{134,0
+,1471},{134,0,1570},{132,0,394},{140,10,65},{136,10,816},{135,0,1931},{7,0,574},
+{135,0,1719},{134,11,467},{132,0,658},{9,0,781},{10,0,144},{11,0,385},{13,0,161}
+,{13,0,228},{13,0,268},{20,0,107},{134,11,1669},{136,0,374},{135,0,735},{4,0,344
+},{6,0,498},{139,0,323},{7,0,586},{7,0,1063},{6,10,559},{134,10,1691},{137,0,155
+},{133,0,906},{7,11,122},{9,11,259},{10,11,84},{11,11,470},{12,11,541},{141,11,
+379},{134,0,1139},{10,0,108},{139,0,116},{134,10,456},{133,10,925},{5,11,82},{5,
+11,131},{7,11,1755},{8,11,31},{9,11,168},{9,11,764},{139,11,869},{134,11,605},{5
+,11,278},{137,11,68},{4,11,163},{5,11,201},{5,11,307},{5,11,310},{6,11,335},{7,
+11,284},{136,11,165},{135,11,1660},{6,11,33},{135,11,1244},{4,0,616},{136,11,483
+},{8,0,857},{8,0,902},{8,0,910},{10,0,879},{12,0,726},{4,11,199},{139,11,34},{
+136,0,692},{6,10,193},{7,10,240},{7,10,1682},{10,10,51},{10,10,640},{11,10,410},
+{13,10,82},{14,10,247},{14,10,331},{142,10,377},{6,0,823},{134,0,983},{139,10,
+411},{132,0,305},{136,10,633},{138,11,203},{134,0,681},{6,11,326},{7,11,677},{
+137,11,425},{5,0,214},{7,0,603},{8,0,611},{9,0,686},{10,0,88},{11,0,459},{11,0,
+496},{12,0,463},{12,0,590},{141,0,0},{136,0,1004},{142,0,23},{134,0,1703},{147,
+11,8},{145,11,56},{135,0,1443},{4,10,237},{135,10,514},{6,0,714},{145,0,19},{5,
+11,358},{7,11,473},{7,11,1184},{10,11,662},{13,11,212},{13,11,304},{13,11,333},{
+145,11,98},{4,0,737},{10,0,98},{11,0,294},{12,0,60},{12,0,437},{13,0,64},{13,0,
+380},{142,0,430},{6,10,392},{7,10,65},{135,10,2019},{6,0,1758},{8,0,520},{9,0,
+345},{9,0,403},{142,0,350},{5,0,47},{10,0,242},{138,0,579},{5,0,139},{7,0,1168},
+{138,0,539},{134,0,1459},{13,0,388},{141,11,388},{134,0,253},{7,10,1260},{135,10
+,1790},{10,0,252},{9,10,222},{139,10,900},{140,0,745},{133,11,946},{4,0,107},{7,
+0,613},{8,0,439},{8,0,504},{9,0,501},{10,0,383},{139,0,477},{135,11,1485},{132,0
+,871},{7,11,411},{7,11,590},{8,11,631},{9,11,323},{10,11,355},{11,11,491},{12,11
+,143},{12,11,402},{13,11,73},{14,11,408},{15,11,107},{146,11,71},{132,0,229},{
+132,0,903},{140,0,71},{133,0,549},{4,0,47},{6,0,373},{7,0,452},{7,0,543},{7,0,
+1828},{7,0,1856},{9,0,6},{11,0,257},{139,0,391},{7,11,1467},{8,11,328},{10,11,
+544},{11,11,955},{13,11,320},{145,11,83},{5,0,980},{134,0,1754},{136,0,865},{5,0
+,705},{137,0,606},{7,0,161},{8,10,201},{136,10,605},{143,11,35},{5,11,835},{6,11
+,483},{140,10,224},{7,0,536},{7,0,1331},{136,0,143},{134,0,1388},{5,0,724},{10,0
+,305},{11,0,151},{12,0,33},{12,0,121},{12,0,381},{17,0,3},{17,0,27},{17,0,78},{
+18,0,18},{19,0,54},{149,0,5},{4,10,523},{133,10,638},{5,0,19},{134,0,533},{5,0,
+395},{5,0,951},{134,0,1776},{135,0,1908},{132,0,846},{10,0,74},{11,0,663},{12,0,
+210},{13,0,166},{13,0,310},{14,0,373},{18,0,95},{19,0,43},{6,10,242},{7,10,227},
+{7,10,1581},{8,10,104},{9,10,113},{9,10,220},{9,10,427},{10,10,239},{11,10,579},
+{11,10,1023},{13,10,4},{13,10,204},{13,10,316},{148,10,86},{9,11,716},{11,11,108
+},{13,11,123},{14,11,252},{19,11,38},{21,11,3},{151,11,11},{8,0,372},{9,0,122},{
+138,0,175},{132,11,677},{7,11,1374},{136,11,540},{135,10,861},{132,0,695},{7,0,
+497},{9,0,387},{147,0,81},{136,0,937},{134,0,718},{7,0,1328},{136,10,494},{132,
+11,331},{6,0,1581},{133,11,747},{5,0,284},{6,0,49},{6,0,350},{7,0,1},{7,0,377},{
+7,0,1693},{8,0,18},{8,0,678},{9,0,161},{9,0,585},{9,0,671},{9,0,839},{11,0,912},
+{141,0,427},{7,10,1306},{8,10,505},{9,10,482},{10,10,126},{11,10,225},{12,10,347
+},{12,10,449},{13,10,19},{14,10,218},{142,10,435},{10,10,764},{12,10,120},{13,10
+,39},{145,10,127},{4,0,597},{133,10,268},{134,0,1094},{4,0,1008},{134,0,1973},{
+132,0,811},{139,0,908},{135,0,1471},{133,11,326},{4,10,384},{135,10,1022},{7,0,
+1935},{8,0,324},{12,0,42},{4,11,691},{7,11,1935},{8,11,324},{9,11,35},{10,11,680
+},{11,11,364},{12,11,42},{13,11,357},{146,11,16},{135,0,2014},{7,0,2007},{9,0,
+101},{9,0,450},{10,0,66},{10,0,842},{11,0,536},{12,0,587},{6,11,32},{7,11,385},{
+7,11,757},{7,11,1916},{8,11,37},{8,11,94},{8,11,711},{9,11,541},{10,11,162},{10,
+11,795},{11,11,989},{11,11,1010},{12,11,14},{142,11,308},{139,0,586},{135,10,
+1703},{7,0,1077},{11,0,28},{9,10,159},{140,10,603},{6,0,1221},{136,10,583},{6,11
+,152},{6,11,349},{6,11,1682},{7,11,1252},{8,11,112},{9,11,435},{9,11,668},{10,11
+,290},{10,11,319},{10,11,815},{11,11,180},{11,11,837},{12,11,240},{13,11,152},{
+13,11,219},{142,11,158},{139,0,62},{132,10,515},{8,10,632},{8,10,697},{137,10,
+854},{134,0,1766},{132,11,581},{6,11,126},{7,11,573},{8,11,397},{142,11,44},{150
+,0,28},{11,0,670},{22,0,25},{4,10,136},{133,10,551},{6,0,1665},{7,0,256},{7,0,
+1388},{138,0,499},{4,0,22},{5,0,10},{7,0,1576},{136,0,97},{134,10,1782},{5,0,481
+},{7,10,1287},{9,10,44},{10,10,552},{10,10,642},{11,10,839},{12,10,274},{12,10,
+275},{12,10,372},{13,10,91},{142,10,125},{133,11,926},{7,11,1232},{137,11,531},{
+6,0,134},{7,0,437},{7,0,1824},{9,0,37},{14,0,285},{142,0,371},{7,0,486},{8,0,155
+},{11,0,93},{140,0,164},{6,0,1391},{134,0,1442},{133,11,670},{133,0,591},{6,10,
+147},{7,10,886},{7,11,1957},{9,10,753},{138,10,268},{5,0,380},{5,0,650},{7,0,
+1173},{136,0,310},{4,0,364},{7,0,1156},{7,0,1187},{137,0,409},{135,11,1621},{134
+,0,482},{133,11,506},{4,0,781},{6,0,487},{7,0,926},{8,0,263},{139,0,500},{138,10
+,137},{135,11,242},{139,11,96},{133,10,414},{135,10,1762},{134,0,804},{5,11,834}
+,{7,11,1202},{8,11,14},{9,11,481},{137,11,880},{134,10,599},{4,0,94},{135,0,1265
+},{4,0,415},{132,0,417},{5,0,348},{6,0,522},{6,10,1749},{7,11,1526},{138,11,465}
+,{134,10,1627},{132,0,1012},{132,10,488},{4,11,357},{6,11,172},{7,11,143},{137,
+11,413},{4,10,83},{4,11,590},{146,11,76},{140,10,676},{7,11,287},{8,11,355},{9,
+11,293},{137,11,743},{134,10,278},{6,0,1803},{18,0,165},{24,0,21},{5,11,169},{7,
+11,333},{136,11,45},{12,10,97},{140,11,97},{4,0,408},{4,0,741},{135,0,500},{132,
+11,198},{7,10,388},{7,10,644},{139,10,781},{4,11,24},{5,11,140},{5,11,185},{7,11
+,1500},{11,11,565},{139,11,838},{6,0,1321},{9,0,257},{7,10,229},{8,10,59},{9,10,
+190},{10,10,378},{140,10,191},{4,11,334},{133,11,593},{135,11,1885},{134,0,1138}
+,{4,0,249},{6,0,73},{135,0,177},{133,0,576},{142,0,231},{137,0,288},{132,10,660}
+,{7,10,1035},{138,10,737},{135,0,1487},{6,0,989},{9,0,433},{7,10,690},{9,10,587}
+,{140,10,521},{7,0,1264},{7,0,1678},{11,0,945},{12,0,341},{12,0,471},{140,0,569}
+,{132,11,709},{133,11,897},{5,11,224},{13,11,174},{146,11,52},{135,11,1840},{134
+,10,1744},{12,0,87},{16,0,74},{4,10,733},{9,10,194},{10,10,92},{11,10,198},{12,
+10,84},{141,10,128},{140,0,779},{135,0,538},{4,11,608},{133,11,497},{133,0,413},
+{7,11,1375},{7,11,1466},{138,11,331},{136,0,495},{6,11,540},{136,11,136},{7,0,54
+},{8,0,312},{10,0,191},{10,0,614},{140,0,567},{6,0,468},{7,0,567},{7,0,1478},{8,
+0,530},{14,0,290},{133,11,999},{4,11,299},{7,10,306},{135,11,1004},{142,11,296},
+{134,0,1484},{133,10,979},{6,0,609},{9,0,815},{12,11,137},{14,11,9},{14,11,24},{
+142,11,64},{133,11,456},{6,0,484},{135,0,822},{133,10,178},{136,11,180},{132,11,
+755},{137,0,900},{135,0,1335},{6,0,1724},{135,0,2022},{135,11,1139},{5,0,640},{
+132,10,390},{6,0,1831},{138,11,633},{135,11,566},{4,11,890},{5,11,805},{5,11,819
+},{5,11,961},{6,11,396},{6,11,1631},{6,11,1678},{7,11,1967},{7,11,2041},{9,11,
+630},{11,11,8},{11,11,1019},{12,11,176},{13,11,225},{14,11,292},{149,11,24},{132
+,0,474},{134,0,1103},{135,0,1504},{134,0,1576},{6,0,961},{6,0,1034},{140,0,655},
+{11,11,514},{149,11,20},{5,0,305},{135,11,1815},{7,11,1505},{10,11,190},{10,11,
+634},{11,11,792},{12,11,358},{140,11,447},{5,11,0},{6,11,536},{7,11,604},{13,11,
+445},{145,11,126},{7,0,1236},{133,10,105},{4,0,480},{6,0,217},{6,0,302},{6,0,
+1642},{7,0,130},{7,0,837},{7,0,1321},{7,0,1547},{7,0,1657},{8,0,429},{9,0,228},{
+13,0,289},{13,0,343},{19,0,101},{6,11,232},{6,11,412},{7,11,1074},{8,11,9},{8,11
+,157},{8,11,786},{9,11,196},{9,11,352},{9,11,457},{10,11,337},{11,11,232},{11,11
+,877},{12,11,480},{140,11,546},{5,10,438},{7,11,958},{9,10,694},{12,10,627},{13,
+11,38},{141,10,210},{4,11,382},{136,11,579},{7,0,278},{10,0,739},{11,0,708},{141
+,0,348},{4,11,212},{135,11,1206},{135,11,1898},{6,0,708},{6,0,1344},{152,10,11},
+{137,11,768},{134,0,1840},{140,0,233},{8,10,25},{138,10,826},{6,0,2017},{133,11,
+655},{6,0,1488},{139,11,290},{132,10,308},{134,0,1590},{134,0,1800},{134,0,1259}
+,{16,0,28},{6,11,231},{7,11,95},{136,11,423},{133,11,300},{135,10,150},{136,10,
+649},{7,11,1874},{137,11,641},{6,11,237},{7,11,611},{8,11,100},{9,11,416},{11,11
+,335},{12,11,173},{146,11,101},{137,0,45},{134,10,521},{17,0,36},{14,11,26},{146
+,11,150},{7,0,1442},{14,0,22},{5,10,339},{15,10,41},{15,10,166},{147,10,66},{8,0
+,378},{6,11,581},{135,11,1119},{134,0,1507},{147,11,117},{139,0,39},{134,0,1054}
+,{6,0,363},{7,0,1955},{136,0,725},{134,0,2036},{133,11,199},{6,0,1871},{9,0,935}
+,{9,0,961},{9,0,1004},{9,0,1016},{12,0,805},{12,0,852},{12,0,853},{12,0,869},{12
+,0,882},{12,0,896},{12,0,906},{12,0,917},{12,0,940},{15,0,170},{15,0,176},{15,0,
+188},{15,0,201},{15,0,205},{15,0,212},{15,0,234},{15,0,244},{18,0,181},{18,0,193
+},{18,0,196},{18,0,201},{18,0,202},{18,0,210},{18,0,217},{18,0,235},{18,0,236},{
+18,0,237},{21,0,54},{21,0,55},{21,0,58},{21,0,59},{152,0,22},{134,10,1628},{137,
+0,805},{5,0,813},{135,0,2046},{142,11,42},{5,0,712},{6,0,1240},{11,0,17},{13,0,
+321},{144,0,67},{132,0,617},{135,10,829},{6,0,320},{7,0,781},{7,0,1921},{9,0,55}
+,{10,0,186},{10,0,273},{10,0,664},{10,0,801},{11,0,996},{11,0,997},{13,0,157},{
+142,0,170},{136,0,271},{5,10,486},{135,10,1349},{18,11,91},{147,11,70},{10,0,445
+},{7,10,1635},{8,10,17},{138,10,295},{136,11,404},{7,0,103},{7,0,863},{11,0,184}
+,{145,0,62},{138,10,558},{137,0,659},{6,11,312},{6,11,1715},{10,11,584},{11,11,
+546},{11,11,692},{12,11,259},{12,11,295},{13,11,46},{141,11,154},{134,0,676},{
+132,11,588},{4,11,231},{5,11,61},{6,11,104},{7,11,729},{7,11,964},{7,11,1658},{
+140,11,414},{6,11,263},{138,11,757},{11,0,337},{142,0,303},{135,11,1363},{132,11
+,320},{140,0,506},{134,10,447},{5,0,77},{7,0,1455},{10,0,843},{147,0,73},{7,10,
+577},{7,10,1432},{9,10,475},{9,10,505},{9,10,526},{9,10,609},{9,10,689},{9,10,
+726},{9,10,735},{9,10,738},{10,10,556},{10,10,674},{10,10,684},{11,10,89},{11,10
+,202},{11,10,272},{11,10,380},{11,10,415},{11,10,505},{11,10,537},{11,10,550},{
+11,10,562},{11,10,640},{11,10,667},{11,10,688},{11,10,847},{11,10,927},{11,10,
+930},{11,10,940},{12,10,144},{12,10,325},{12,10,329},{12,10,389},{12,10,403},{12
+,10,451},{12,10,515},{12,10,604},{12,10,616},{12,10,626},{13,10,66},{13,10,131},
+{13,10,167},{13,10,236},{13,10,368},{13,10,411},{13,10,434},{13,10,453},{13,10,
+461},{13,10,474},{14,10,59},{14,10,60},{14,10,139},{14,10,152},{14,10,276},{14,
+10,353},{14,10,402},{15,10,28},{15,10,81},{15,10,123},{15,10,152},{18,10,136},{
+148,10,88},{132,0,458},{135,0,1420},{6,0,109},{10,0,382},{4,11,405},{4,10,609},{
+7,10,756},{7,11,817},{9,10,544},{11,10,413},{14,11,58},{14,10,307},{16,10,25},{
+17,11,37},{146,11,124},{6,0,330},{7,0,1084},{11,0,142},{133,11,974},{4,10,930},{
+133,10,947},{5,10,939},{142,11,394},{16,0,91},{145,0,87},{5,11,235},{5,10,962},{
+7,11,1239},{11,11,131},{140,11,370},{11,0,492},{5,10,651},{8,10,170},{9,10,61},{
+9,10,63},{10,10,23},{10,10,37},{10,10,834},{11,10,4},{11,10,281},{11,10,503},{11
+,10,677},{12,10,96},{12,10,130},{12,10,244},{14,10,5},{14,10,40},{14,10,162},{14
+,10,202},{146,10,133},{4,10,406},{5,10,579},{12,10,492},{150,10,15},{9,11,137},{
+138,11,221},{134,0,1239},{11,0,211},{140,0,145},{7,11,390},{138,11,140},{135,11,
+1418},{135,11,1144},{134,0,1049},{7,0,321},{6,10,17},{7,10,1001},{7,10,1982},{9,
+10,886},{10,10,489},{10,10,800},{11,10,782},{12,10,320},{13,10,467},{14,10,145},
+{14,10,387},{143,10,119},{145,10,17},{5,11,407},{11,11,489},{19,11,37},{20,11,73
+},{150,11,38},{133,10,458},{135,0,1985},{7,10,1983},{8,10,0},{8,10,171},{9,10,
+120},{9,10,732},{10,10,473},{11,10,656},{11,10,998},{18,10,0},{18,10,2},{147,10,
+21},{5,11,325},{7,11,1483},{8,11,5},{8,11,227},{9,11,105},{10,11,585},{140,11,
+614},{136,0,122},{132,0,234},{135,11,1196},{6,0,976},{6,0,1098},{134,0,1441},{7,
+0,253},{136,0,549},{6,11,621},{13,11,504},{144,11,19},{132,10,519},{5,0,430},{5,
+0,932},{6,0,131},{7,0,417},{9,0,522},{11,0,314},{141,0,390},{14,0,149},{14,0,399
+},{143,0,57},{5,10,907},{6,10,31},{6,11,218},{7,10,491},{7,10,530},{8,10,592},{
+11,10,53},{11,10,779},{12,10,167},{12,10,411},{14,10,14},{14,10,136},{15,10,72},
+{16,10,17},{144,10,72},{140,11,330},{7,11,454},{7,11,782},{136,11,768},{132,0,
+507},{10,11,676},{140,11,462},{6,0,630},{9,0,811},{4,10,208},{5,10,106},{6,10,
+531},{8,10,408},{9,10,188},{138,10,572},{4,0,343},{5,0,511},{134,10,1693},{134,
+11,164},{132,0,448},{7,0,455},{138,0,591},{135,0,1381},{12,10,441},{150,11,50},{
+9,10,449},{10,10,192},{138,10,740},{6,0,575},{132,10,241},{134,0,1175},{134,0,
+653},{134,0,1761},{134,0,1198},{132,10,259},{6,11,343},{7,11,195},{9,11,226},{10
+,11,197},{10,11,575},{11,11,502},{139,11,899},{7,0,1127},{7,0,1572},{10,0,297},{
+10,0,422},{11,0,764},{11,0,810},{12,0,264},{13,0,102},{13,0,300},{13,0,484},{14,
+0,147},{14,0,229},{17,0,71},{18,0,118},{147,0,120},{135,11,666},{132,0,678},{4,
+10,173},{5,10,312},{5,10,512},{135,10,1285},{7,10,1603},{7,10,1691},{9,10,464},{
+11,10,195},{12,10,279},{12,10,448},{14,10,11},{147,10,102},{16,0,99},{146,0,164}
+,{7,11,1125},{9,11,143},{11,11,61},{14,11,405},{150,11,21},{137,11,260},{4,10,
+452},{5,10,583},{5,10,817},{6,10,433},{7,10,593},{7,10,720},{7,10,1378},{8,10,
+161},{9,10,284},{10,10,313},{139,10,886},{132,10,547},{136,10,722},{14,0,35},{
+142,0,191},{141,0,45},{138,0,121},{132,0,125},{134,0,1622},{133,11,959},{8,10,
+420},{139,10,193},{132,0,721},{135,10,409},{136,0,145},{7,0,792},{8,0,147},{10,0
+,821},{11,0,970},{11,0,1021},{136,11,173},{134,11,266},{132,0,715},{7,0,1999},{
+138,10,308},{133,0,531},{5,0,168},{5,0,930},{8,0,74},{9,0,623},{12,0,500},{140,0
+,579},{144,0,65},{138,11,246},{6,0,220},{7,0,1101},{13,0,105},{142,11,314},{5,10
+,1002},{136,10,745},{134,0,960},{20,0,0},{148,11,0},{4,0,1005},{4,10,239},{6,10,
+477},{7,10,1607},{11,10,68},{139,10,617},{6,0,19},{7,0,1413},{139,0,428},{149,10
+,13},{7,0,96},{8,0,401},{8,0,703},{9,0,896},{136,11,300},{134,0,1595},{145,0,116
+},{136,0,1021},{7,0,1961},{7,0,1965},{7,0,2030},{8,0,150},{8,0,702},{8,0,737},{8
+,0,750},{140,0,366},{11,11,75},{142,11,267},{132,10,367},{8,0,800},{9,0,148},{9,
+0,872},{9,0,890},{11,0,309},{11,0,1001},{13,0,267},{13,0,323},{5,11,427},{5,11,
+734},{7,11,478},{136,11,52},{7,11,239},{11,11,217},{142,11,165},{132,11,323},{
+140,11,419},{13,0,299},{142,0,75},{6,11,87},{6,11,1734},{7,11,20},{7,11,1056},{8
+,11,732},{9,11,406},{9,11,911},{138,11,694},{134,0,1383},{132,10,694},{133,11,
+613},{137,0,779},{4,0,598},{140,10,687},{6,0,970},{135,0,424},{133,0,547},{7,11,
+32},{7,11,984},{8,11,85},{8,11,709},{9,11,579},{9,11,847},{9,11,856},{10,11,799}
+,{11,11,258},{11,11,1007},{12,11,331},{12,11,615},{13,11,188},{13,11,435},{14,11
+,8},{15,11,165},{16,11,27},{148,11,40},{6,0,1222},{134,0,1385},{132,0,876},{138,
+11,151},{135,10,213},{4,11,167},{135,11,82},{133,0,133},{6,11,24},{7,11,74},{7,
+11,678},{137,11,258},{5,11,62},{6,11,534},{7,11,684},{7,11,1043},{7,11,1072},{8,
+11,280},{8,11,541},{8,11,686},{10,11,519},{11,11,252},{140,11,282},{136,0,187},{
+8,0,8},{10,0,0},{10,0,818},{139,0,988},{132,11,359},{11,0,429},{15,0,51},{135,10
+,1672},{136,0,685},{5,11,211},{7,11,88},{136,11,627},{134,0,472},{136,0,132},{6,
+11,145},{141,11,336},{4,10,751},{11,10,390},{140,10,32},{6,0,938},{6,0,1060},{4,
+11,263},{4,10,409},{133,10,78},{137,0,874},{8,0,774},{10,0,670},{12,0,51},{4,11,
+916},{6,10,473},{7,10,1602},{10,10,698},{12,10,212},{13,10,307},{145,10,105},{
+146,0,92},{143,10,156},{132,0,830},{137,0,701},{4,11,599},{6,11,1634},{7,11,5},{
+7,11,55},{7,11,67},{7,11,97},{7,11,691},{7,11,979},{7,11,1697},{8,11,207},{8,11,
+214},{8,11,231},{8,11,294},{8,11,336},{8,11,428},{8,11,451},{8,11,460},{8,11,471
+},{8,11,622},{8,11,626},{8,11,679},{8,11,759},{8,11,829},{9,11,11},{9,11,246},{9
+,11,484},{9,11,573},{9,11,706},{9,11,762},{9,11,798},{9,11,855},{9,11,870},{9,11
+,912},{10,11,303},{10,11,335},{10,11,424},{10,11,461},{10,11,543},{10,11,759},{
+10,11,814},{11,11,59},{11,11,199},{11,11,235},{11,11,475},{11,11,590},{11,11,929
+},{11,11,963},{12,11,114},{12,11,182},{12,11,226},{12,11,332},{12,11,439},{12,11
+,575},{12,11,598},{13,11,8},{13,11,125},{13,11,194},{13,11,287},{14,11,197},{14,
+11,383},{15,11,53},{17,11,63},{19,11,46},{19,11,98},{19,11,106},{148,11,85},{4,0
+,127},{5,0,350},{6,0,356},{8,0,426},{9,0,572},{10,0,247},{139,0,312},{134,0,1215
+},{6,0,59},{9,0,603},{13,0,397},{7,11,1853},{138,11,437},{134,0,1762},{147,11,
+126},{135,10,883},{13,0,293},{142,0,56},{133,10,617},{139,10,50},{5,11,187},{7,
+10,1518},{139,10,694},{135,0,441},{6,0,111},{7,0,4},{8,0,163},{8,0,776},{138,0,
+566},{132,0,806},{4,11,215},{9,11,38},{10,11,3},{11,11,23},{11,11,127},{139,11,
+796},{14,0,233},{4,10,546},{135,10,2042},{135,0,1994},{134,0,1739},{135,11,1530}
+,{136,0,393},{5,0,297},{7,0,1038},{14,0,359},{19,0,52},{148,0,47},{135,0,309},{4
+,10,313},{133,10,577},{8,10,184},{141,10,433},{135,10,935},{12,10,186},{12,10,
+292},{14,10,100},{146,10,70},{136,0,363},{14,0,175},{11,10,402},{12,10,109},{12,
+10,431},{13,10,179},{13,10,206},{14,10,217},{16,10,3},{148,10,53},{5,10,886},{6,
+10,46},{6,10,1790},{7,10,14},{7,10,732},{7,10,1654},{8,10,95},{8,10,327},{8,10,
+616},{9,10,892},{10,10,598},{10,10,769},{11,10,134},{11,10,747},{12,10,378},{142
+,10,97},{136,0,666},{135,0,1675},{6,0,655},{134,0,1600},{135,0,808},{133,10,1021
+},{4,11,28},{5,11,440},{7,11,248},{11,11,833},{140,11,344},{134,11,1654},{132,0,
+280},{140,0,54},{4,0,421},{133,0,548},{132,10,153},{6,11,339},{135,11,923},{133,
+11,853},{133,10,798},{132,10,587},{6,11,249},{7,11,1234},{139,11,573},{6,10,598}
+,{7,10,42},{8,10,695},{10,10,212},{11,10,158},{14,10,196},{145,10,85},{7,0,249},
+{5,10,957},{133,10,1008},{4,10,129},{135,10,465},{6,0,254},{7,0,842},{7,0,1659},
+{9,0,109},{10,0,103},{7,10,908},{7,10,1201},{9,10,755},{11,10,906},{12,10,527},{
+146,10,7},{5,0,262},{136,10,450},{144,0,1},{10,11,201},{142,11,319},{7,11,49},{7
+,11,392},{8,11,20},{8,11,172},{8,11,690},{9,11,383},{9,11,845},{10,11,48},{11,11
+,293},{11,11,832},{11,11,920},{141,11,221},{5,11,858},{133,11,992},{134,0,805},{
+139,10,1003},{6,0,1630},{134,11,307},{7,11,1512},{135,11,1794},{6,11,268},{137,
+11,62},{135,10,1868},{133,0,671},{4,0,989},{8,0,972},{136,0,998},{132,11,423},{
+132,0,889},{135,0,1382},{135,0,1910},{7,10,965},{7,10,1460},{135,10,1604},{4,0,
+627},{5,0,775},{138,11,106},{134,11,348},{7,0,202},{11,0,362},{11,0,948},{140,0,
+388},{138,11,771},{6,11,613},{136,11,223},{6,0,560},{7,0,451},{8,0,389},{12,0,
+490},{13,0,16},{13,0,215},{13,0,351},{18,0,132},{147,0,125},{135,0,841},{136,0,
+566},{136,0,938},{132,11,670},{5,0,912},{6,0,1695},{140,11,55},{9,11,40},{139,11
+,136},{7,0,1361},{7,10,982},{10,10,32},{143,10,56},{11,11,259},{140,11,270},{5,0
+,236},{6,0,572},{8,0,492},{11,0,618},{144,0,56},{8,11,572},{9,11,310},{9,11,682}
+,{137,11,698},{134,0,1854},{5,0,190},{136,0,318},{133,10,435},{135,0,1376},{4,11
+,296},{6,11,352},{7,11,401},{7,11,1410},{7,11,1594},{7,11,1674},{8,11,63},{8,11,
+660},{137,11,74},{7,0,349},{5,10,85},{6,10,419},{7,10,305},{7,10,361},{7,10,1337
+},{8,10,71},{140,10,519},{4,11,139},{4,11,388},{140,11,188},{6,0,1972},{6,0,2013
+},{8,0,951},{10,0,947},{10,0,974},{10,0,1018},{142,0,476},{140,10,688},{135,10,
+740},{5,10,691},{7,10,345},{9,10,94},{140,10,169},{9,0,344},{5,10,183},{6,10,582
+},{10,10,679},{140,10,435},{135,10,511},{132,0,850},{8,11,441},{10,11,314},{143,
+11,3},{7,10,1993},{136,10,684},{4,11,747},{6,11,290},{6,10,583},{7,11,649},{7,11
+,1479},{135,11,1583},{133,11,232},{133,10,704},{134,0,910},{4,10,179},{5,10,198}
+,{133,10,697},{7,10,347},{7,10,971},{8,10,181},{138,10,711},{136,11,525},{14,0,
+19},{14,0,28},{144,0,29},{7,0,85},{7,0,247},{8,0,585},{138,0,163},{4,0,487},{7,
+11,472},{7,11,1801},{10,11,748},{141,11,458},{4,10,243},{5,10,203},{7,10,19},{7,
+10,71},{7,10,113},{10,10,405},{11,10,357},{142,10,240},{7,10,1450},{139,10,99},{
+132,11,425},{138,0,145},{147,0,83},{6,10,492},{137,11,247},{4,0,1013},{134,0,
+2033},{5,10,134},{6,10,408},{6,10,495},{135,10,1593},{135,0,1922},{134,11,1768},
+{4,0,124},{10,0,457},{11,0,121},{11,0,169},{11,0,870},{11,0,874},{12,0,214},{14,
+0,187},{143,0,77},{5,0,557},{135,0,1457},{139,0,66},{5,11,943},{6,11,1779},{142,
+10,4},{4,10,248},{4,10,665},{7,10,137},{137,10,349},{7,0,1193},{5,11,245},{6,11,
+576},{7,11,582},{136,11,225},{144,0,82},{7,10,1270},{139,10,612},{5,0,454},{10,0
+,352},{138,11,352},{18,0,57},{5,10,371},{135,10,563},{135,0,1333},{6,0,107},{7,0
+,638},{7,0,1632},{9,0,396},{134,11,610},{5,0,370},{134,0,1756},{4,10,374},{7,10,
+547},{7,10,1700},{7,10,1833},{139,10,858},{133,0,204},{6,0,1305},{9,10,311},{141
+,10,42},{5,0,970},{134,0,1706},{6,10,1647},{7,10,1552},{7,10,2010},{9,10,494},{
+137,10,509},{13,11,455},{15,11,99},{15,11,129},{144,11,68},{135,0,3},{4,0,35},{5
+,0,121},{5,0,483},{5,0,685},{6,0,489},{6,0,782},{6,0,1032},{7,0,1204},{136,0,394
+},{4,0,921},{133,0,1007},{8,11,360},{138,11,63},{135,0,1696},{134,0,1519},{132,
+11,443},{135,11,944},{6,10,123},{7,10,214},{9,10,728},{10,10,157},{11,10,346},{
+11,10,662},{143,10,106},{137,0,981},{135,10,1435},{134,0,1072},{132,0,712},{134,
+0,1629},{134,0,728},{4,11,298},{137,11,483},{6,0,1177},{6,0,1271},{5,11,164},{7,
+11,121},{142,11,189},{7,0,1608},{4,10,707},{5,10,588},{6,10,393},{13,10,106},{18
+,10,49},{147,10,41},{23,0,16},{151,11,16},{6,10,211},{7,10,1690},{11,10,486},{
+140,10,369},{133,0,485},{19,11,15},{149,11,27},{4,11,172},{9,11,611},{10,11,436}
+,{12,11,673},{141,11,255},{5,11,844},{10,11,484},{11,11,754},{12,11,457},{14,11,
+171},{14,11,389},{146,11,153},{4,0,285},{5,0,27},{5,0,317},{6,0,301},{7,0,7},{8,
+0,153},{10,0,766},{11,0,468},{12,0,467},{141,0,143},{134,0,1462},{9,11,263},{10,
+11,147},{138,11,492},{133,11,537},{6,0,1945},{6,0,1986},{6,0,1991},{134,0,2038},
+{134,10,219},{137,11,842},{14,0,52},{17,0,50},{5,10,582},{6,10,1646},{7,10,99},{
+7,10,1962},{7,10,1986},{8,10,515},{8,10,773},{9,10,23},{9,10,491},{12,10,620},{
+142,10,93},{138,11,97},{20,0,21},{20,0,44},{133,10,851},{136,0,819},{139,0,917},
+{5,11,230},{5,11,392},{6,11,420},{8,10,762},{8,10,812},{9,11,568},{9,10,910},{
+140,11,612},{135,0,784},{15,0,135},{143,11,135},{10,0,454},{140,0,324},{4,11,0},
+{5,11,41},{7,11,1459},{7,11,1469},{7,11,1618},{7,11,1859},{9,11,549},{139,11,905
+},{4,10,98},{7,10,1365},{9,10,422},{9,10,670},{10,10,775},{11,10,210},{13,10,26}
+,{13,10,457},{141,10,476},{6,0,1719},{6,0,1735},{7,0,2016},{7,0,2020},{8,0,837},
+{137,0,852},{133,11,696},{135,0,852},{132,0,952},{134,10,1730},{132,11,771},{138
+,0,568},{137,0,448},{139,0,146},{8,0,67},{138,0,419},{133,11,921},{137,10,147},{
+134,0,1826},{10,0,657},{14,0,297},{142,0,361},{6,0,666},{6,0,767},{134,0,1542},{
+139,0,729},{6,11,180},{7,11,1137},{8,11,751},{139,11,805},{4,11,183},{7,11,271},
+{11,11,824},{11,11,952},{13,11,278},{13,11,339},{13,11,482},{14,11,424},{148,11,
+99},{4,0,669},{5,11,477},{5,11,596},{6,11,505},{7,11,1221},{11,11,907},{12,11,
+209},{141,11,214},{135,11,1215},{5,0,402},{6,10,30},{11,10,56},{139,10,305},{7,
+11,564},{142,11,168},{139,0,152},{7,0,912},{135,10,1614},{4,10,150},{5,10,303},{
+134,10,327},{7,0,320},{8,0,51},{9,0,868},{10,0,833},{12,0,481},{12,0,570},{148,0
+,106},{132,0,445},{7,11,274},{11,11,263},{11,11,479},{11,11,507},{140,11,277},{
+10,0,555},{11,0,308},{19,0,95},{6,11,1645},{8,10,192},{10,10,78},{141,10,359},{
+135,10,786},{6,11,92},{6,11,188},{7,11,1269},{7,11,1524},{7,11,1876},{10,11,228}
+,{139,11,1020},{4,11,459},{133,11,966},{11,0,386},{6,10,1638},{7,10,79},{7,10,
+496},{9,10,138},{10,10,336},{12,10,412},{12,10,440},{142,10,305},{133,0,239},{7,
+0,83},{7,0,1990},{8,0,130},{139,0,720},{138,11,709},{4,0,143},{5,0,550},{133,0,
+752},{5,0,123},{6,0,530},{7,0,348},{135,0,1419},{135,0,2024},{6,11,18},{7,11,179
+},{7,11,721},{7,11,932},{8,11,548},{8,11,757},{9,11,54},{9,11,65},{9,11,532},{9,
+11,844},{10,11,113},{10,11,117},{10,11,236},{10,11,315},{10,11,430},{10,11,798},
+{11,11,153},{11,11,351},{11,11,375},{12,11,78},{12,11,151},{12,11,392},{14,11,
+248},{143,11,23},{7,10,204},{7,10,415},{8,10,42},{10,10,85},{139,10,564},{134,0,
+958},{133,11,965},{132,0,210},{135,11,1429},{138,11,480},{134,11,182},{139,11,
+345},{10,11,65},{10,11,488},{138,11,497},{4,10,3},{5,10,247},{5,10,644},{7,10,
+744},{7,10,1207},{7,10,1225},{7,10,1909},{146,10,147},{132,0,430},{5,10,285},{9,
+10,67},{13,10,473},{143,10,82},{144,11,16},{7,11,1162},{9,11,588},{10,11,260},{
+151,10,8},{133,0,213},{138,0,7},{135,0,801},{134,11,1786},{135,11,308},{6,0,936}
+,{134,0,1289},{133,0,108},{132,0,885},{133,0,219},{139,0,587},{4,0,193},{5,0,916
+},{6,0,1041},{7,0,364},{10,0,398},{10,0,726},{11,0,317},{11,0,626},{12,0,142},{
+12,0,288},{12,0,678},{13,0,313},{15,0,113},{146,0,114},{135,0,1165},{6,0,241},{9
+,0,342},{10,0,729},{11,0,284},{11,0,445},{11,0,651},{11,0,863},{13,0,398},{146,0
+,99},{7,0,907},{136,0,832},{9,0,303},{4,10,29},{6,10,532},{7,10,1628},{7,10,1648
+},{9,10,350},{10,10,433},{11,10,97},{11,10,557},{11,10,745},{12,10,289},{12,10,
+335},{12,10,348},{12,10,606},{13,10,116},{13,10,233},{13,10,466},{14,10,181},{14
+,10,209},{14,10,232},{14,10,236},{14,10,300},{16,10,41},{148,10,97},{7,11,423},{
+7,10,1692},{136,11,588},{6,0,931},{134,0,1454},{5,10,501},{7,10,1704},{9,10,553}
+,{11,10,520},{12,10,557},{141,10,249},{136,11,287},{4,0,562},{9,0,254},{139,0,
+879},{132,0,786},{14,11,32},{18,11,85},{20,11,2},{152,11,16},{135,0,1294},{7,11,
+723},{135,11,1135},{6,0,216},{7,0,901},{7,0,1343},{8,0,493},{134,11,403},{7,11,
+719},{8,11,809},{136,11,834},{5,11,210},{6,11,213},{7,11,60},{10,11,364},{139,11
+,135},{7,0,341},{11,0,219},{5,11,607},{8,11,326},{136,11,490},{4,11,701},{5,11,
+472},{5,11,639},{7,11,1249},{9,11,758},{139,11,896},{135,11,380},{135,11,1947},{
+139,0,130},{135,0,1734},{10,0,115},{11,0,420},{12,0,154},{13,0,404},{14,0,346},{
+143,0,54},{134,10,129},{4,11,386},{7,11,41},{8,11,405},{9,11,497},{11,11,110},{
+11,11,360},{15,11,37},{144,11,84},{141,11,282},{5,11,46},{7,11,1452},{7,11,1480}
+,{8,11,634},{140,11,472},{4,11,524},{136,11,810},{10,11,238},{141,11,33},{133,0,
+604},{5,0,1011},{136,0,701},{8,0,856},{8,0,858},{8,0,879},{12,0,702},{142,0,447}
+,{4,0,54},{5,0,666},{7,0,1039},{7,0,1130},{9,0,195},{138,0,302},{4,10,25},{5,10,
+60},{6,10,504},{7,10,614},{7,10,1155},{140,10,0},{7,10,1248},{11,10,621},{139,10
+,702},{133,11,997},{137,10,321},{134,0,1669},{134,0,1791},{4,10,379},{135,10,
+1397},{138,11,372},{5,11,782},{5,11,829},{134,11,1738},{135,0,1228},{4,10,118},{
+6,10,274},{6,10,361},{7,10,75},{141,10,441},{132,0,623},{9,11,279},{10,11,407},{
+14,11,84},{150,11,18},{137,10,841},{135,0,798},{140,10,693},{5,10,314},{6,10,221
+},{7,10,419},{10,10,650},{11,10,396},{12,10,156},{13,10,369},{14,10,333},{145,10
+,47},{135,11,1372},{7,0,122},{9,0,259},{10,0,84},{11,0,470},{12,0,541},{141,0,
+379},{134,0,837},{8,0,1013},{4,11,78},{5,11,96},{5,11,182},{7,11,1724},{7,11,
+1825},{10,11,394},{10,11,471},{11,11,532},{14,11,340},{145,11,88},{134,0,577},{
+135,11,1964},{132,10,913},{134,0,460},{8,0,891},{10,0,901},{10,0,919},{10,0,932}
+,{12,0,715},{12,0,728},{12,0,777},{14,0,457},{144,0,103},{5,0,82},{5,0,131},{7,0
+,1755},{8,0,31},{9,0,168},{9,0,764},{139,0,869},{136,10,475},{6,0,605},{5,10,
+1016},{9,11,601},{9,11,619},{10,11,505},{10,11,732},{11,11,355},{140,11,139},{7,
+10,602},{8,10,179},{10,10,781},{140,10,126},{134,0,1246},{6,10,329},{138,10,111}
+,{6,11,215},{7,11,1028},{7,11,1473},{7,11,1721},{9,11,424},{138,11,779},{5,0,278
+},{137,0,68},{6,0,932},{6,0,1084},{144,0,86},{4,0,163},{5,0,201},{5,0,307},{5,0,
+310},{6,0,335},{7,0,284},{7,0,1660},{136,0,165},{136,0,781},{134,0,707},{6,0,33}
+,{135,0,1244},{5,10,821},{6,11,67},{6,10,1687},{7,11,258},{7,11,1630},{9,11,354}
+,{9,11,675},{10,11,830},{14,11,80},{145,11,80},{6,11,141},{7,11,225},{9,11,59},{
+9,11,607},{10,11,312},{11,11,687},{12,11,555},{13,11,373},{13,11,494},{148,11,58
+},{134,0,1113},{9,0,388},{5,10,71},{7,10,1407},{9,10,704},{10,10,261},{10,10,619
+},{11,10,547},{11,10,619},{143,10,157},{7,0,1953},{136,0,720},{138,0,203},{7,10,
+2008},{9,10,337},{138,10,517},{6,0,326},{7,0,677},{137,0,425},{139,11,81},{7,0,
+1316},{7,0,1412},{7,0,1839},{9,0,589},{11,0,241},{11,0,676},{11,0,811},{11,0,891
+},{12,0,140},{12,0,346},{12,0,479},{13,0,140},{13,0,381},{14,0,188},{18,0,30},{
+148,0,108},{5,0,416},{6,10,86},{6,10,603},{7,10,292},{7,10,561},{8,10,257},{8,10
+,382},{9,10,721},{9,10,778},{11,10,581},{140,10,466},{4,10,486},{133,10,491},{
+134,0,1300},{132,10,72},{7,0,847},{6,10,265},{7,11,430},{139,11,46},{5,11,602},{
+6,11,106},{7,11,1786},{7,11,1821},{7,11,2018},{9,11,418},{137,11,763},{5,0,358},
+{7,0,535},{7,0,1184},{10,0,662},{13,0,212},{13,0,304},{13,0,333},{145,0,98},{5,
+11,65},{6,11,416},{7,11,1720},{7,11,1924},{8,11,677},{10,11,109},{11,11,14},{11,
+11,70},{11,11,569},{11,11,735},{15,11,153},{148,11,80},{6,0,1823},{8,0,839},{8,0
+,852},{8,0,903},{10,0,940},{12,0,707},{140,0,775},{135,11,1229},{6,0,1522},{140,
+0,654},{136,11,595},{139,0,163},{141,0,314},{132,0,978},{4,0,601},{6,0,2035},{
+137,10,234},{5,10,815},{6,10,1688},{134,10,1755},{133,0,946},{136,0,434},{6,10,
+197},{136,10,205},{7,0,411},{7,0,590},{8,0,631},{9,0,323},{10,0,355},{11,0,491},
+{12,0,143},{12,0,402},{13,0,73},{14,0,408},{15,0,107},{146,0,71},{7,0,1467},{8,0
+,328},{10,0,544},{11,0,955},{12,0,13},{13,0,320},{145,0,83},{142,0,410},{11,0,
+511},{13,0,394},{14,0,298},{14,0,318},{146,0,103},{6,10,452},{7,10,312},{138,10,
+219},{138,10,589},{4,10,333},{9,10,176},{12,10,353},{141,10,187},{135,11,329},{
+132,11,469},{5,0,835},{134,0,483},{134,11,1743},{5,11,929},{6,11,340},{8,11,376}
+,{136,11,807},{134,10,1685},{132,0,677},{5,11,218},{7,11,1610},{138,11,83},{5,11
+,571},{135,11,1842},{132,11,455},{137,0,70},{135,0,1405},{7,10,135},{8,10,7},{8,
+10,62},{9,10,243},{10,10,658},{10,10,697},{11,10,456},{139,10,756},{9,10,395},{
+138,10,79},{137,0,108},{6,11,161},{7,11,372},{137,11,597},{132,11,349},{132,0,
+777},{132,0,331},{135,10,631},{133,0,747},{6,11,432},{6,11,608},{139,11,322},{
+138,10,835},{5,11,468},{7,11,1809},{10,11,325},{11,11,856},{12,11,345},{143,11,
+104},{133,11,223},{7,10,406},{7,10,459},{8,10,606},{139,10,726},{132,11,566},{
+142,0,68},{4,11,59},{135,11,1394},{6,11,436},{139,11,481},{4,11,48},{5,11,271},{
+135,11,953},{139,11,170},{5,11,610},{136,11,457},{133,11,755},{135,11,1217},{133
+,10,612},{132,11,197},{132,0,505},{4,10,372},{7,10,482},{8,10,158},{9,10,602},{9
+,10,615},{10,10,245},{10,10,678},{10,10,744},{11,10,248},{139,10,806},{133,0,326
+},{5,10,854},{135,10,1991},{4,0,691},{146,0,16},{6,0,628},{9,0,35},{10,0,680},{
+10,0,793},{11,0,364},{13,0,357},{143,0,164},{138,0,654},{6,0,32},{7,0,385},{7,0,
+757},{7,0,1916},{8,0,37},{8,0,94},{8,0,711},{9,0,541},{10,0,162},{10,0,795},{11,
+0,989},{11,0,1010},{12,0,14},{142,0,308},{133,11,217},{6,0,152},{6,0,349},{6,0,
+1682},{7,0,1252},{8,0,112},{9,0,435},{9,0,668},{10,0,290},{10,0,319},{10,0,815},
+{11,0,180},{11,0,837},{12,0,240},{13,0,152},{13,0,219},{142,0,158},{4,0,581},{
+134,0,726},{5,10,195},{135,10,1685},{6,0,126},{7,0,573},{8,0,397},{142,0,44},{
+138,0,89},{7,10,1997},{8,10,730},{139,10,1006},{134,0,1531},{134,0,1167},{5,0,
+926},{12,0,203},{133,10,751},{4,11,165},{7,11,1398},{135,11,1829},{7,0,1232},{
+137,0,531},{135,10,821},{134,0,943},{133,0,670},{4,0,880},{139,0,231},{134,0,
+1617},{135,0,1957},{5,11,9},{7,11,297},{7,11,966},{140,11,306},{6,0,975},{134,0,
+985},{5,10,950},{5,10,994},{134,10,351},{12,11,21},{151,11,7},{5,11,146},{6,11,
+411},{138,11,721},{7,0,242},{135,0,1942},{6,11,177},{135,11,467},{5,0,421},{7,10
+,47},{137,10,684},{5,0,834},{7,0,1202},{8,0,14},{9,0,481},{137,0,880},{138,0,465
+},{6,0,688},{9,0,834},{132,10,350},{132,0,855},{4,0,357},{6,0,172},{7,0,143},{
+137,0,413},{133,11,200},{132,0,590},{7,10,1812},{13,10,259},{13,10,356},{14,10,
+242},{147,10,114},{133,10,967},{11,0,114},{4,10,473},{7,10,623},{8,10,808},{9,10
+,871},{9,10,893},{11,10,431},{12,10,112},{12,10,217},{12,10,243},{12,10,562},{12
+,10,663},{12,10,683},{13,10,141},{13,10,197},{13,10,227},{13,10,406},{13,10,487}
+,{14,10,156},{14,10,203},{14,10,224},{14,10,256},{18,10,58},{150,10,0},{138,10,
+286},{4,10,222},{7,10,286},{136,10,629},{5,0,169},{7,0,333},{136,0,45},{134,11,
+481},{132,0,198},{4,0,24},{5,0,140},{5,0,185},{7,0,1500},{11,0,565},{11,0,838},{
+4,11,84},{7,11,1482},{10,11,76},{138,11,142},{133,0,585},{141,10,306},{133,11,
+1015},{4,11,315},{5,11,507},{135,11,1370},{136,10,146},{6,0,691},{134,0,1503},{4
+,0,334},{133,0,593},{4,10,465},{135,10,1663},{142,11,173},{135,0,913},{12,0,116}
+,{134,11,1722},{134,0,1360},{132,0,802},{8,11,222},{8,11,476},{9,11,238},{11,11,
+516},{11,11,575},{15,11,109},{146,11,100},{6,0,308},{9,0,673},{7,10,138},{7,10,
+517},{139,10,238},{132,0,709},{6,0,1876},{6,0,1895},{9,0,994},{9,0,1006},{12,0,
+829},{12,0,888},{12,0,891},{146,0,185},{148,10,94},{4,0,228},{133,0,897},{7,0,
+1840},{5,10,495},{7,10,834},{9,10,733},{139,10,378},{133,10,559},{6,10,21},{6,10
+,1737},{7,10,1444},{136,10,224},{4,0,608},{133,0,497},{6,11,40},{135,11,1781},{
+134,0,1573},{135,0,2039},{6,0,540},{136,0,136},{4,0,897},{5,0,786},{133,10,519},
+{6,0,1878},{6,0,1884},{9,0,938},{9,0,948},{9,0,955},{9,0,973},{9,0,1012},{12,0,
+895},{12,0,927},{143,0,254},{134,0,1469},{133,0,999},{4,0,299},{135,0,1004},{4,0
+,745},{133,0,578},{136,11,574},{133,0,456},{134,0,1457},{7,0,1679},{132,10,402},
+{7,0,693},{8,0,180},{12,0,163},{8,10,323},{136,10,479},{11,10,580},{142,10,201},
+{5,10,59},{135,10,672},{132,11,354},{146,10,34},{4,0,755},{135,11,1558},{7,0,
+1740},{146,0,48},{4,10,85},{135,10,549},{139,0,338},{133,10,94},{134,0,1091},{
+135,11,469},{12,0,695},{12,0,704},{20,0,113},{5,11,830},{14,11,338},{148,11,81},
+{135,0,1464},{6,10,11},{135,10,187},{135,0,975},{13,0,335},{132,10,522},{134,0,
+1979},{5,11,496},{135,11,203},{4,10,52},{135,10,661},{7,0,1566},{8,0,269},{9,0,
+212},{9,0,718},{14,0,15},{14,0,132},{142,0,227},{4,0,890},{5,0,805},{5,0,819},{5
+,0,961},{6,0,396},{6,0,1631},{6,0,1678},{7,0,1967},{7,0,2041},{9,0,630},{11,0,8}
+,{11,0,1019},{12,0,176},{13,0,225},{14,0,292},{21,0,24},{4,10,383},{133,10,520},
+{134,11,547},{135,11,1748},{5,11,88},{137,11,239},{146,11,128},{7,11,650},{135,
+11,1310},{4,10,281},{5,10,38},{7,10,194},{7,10,668},{7,10,1893},{137,10,397},{
+135,0,1815},{9,10,635},{139,10,559},{7,0,1505},{10,0,190},{10,0,634},{11,0,792},
+{12,0,358},{140,0,447},{5,0,0},{6,0,536},{7,0,604},{13,0,445},{145,0,126},{7,11,
+1076},{9,11,80},{11,11,78},{11,11,421},{11,11,534},{140,11,545},{8,0,966},{10,0,
+1023},{14,11,369},{146,11,72},{135,11,1641},{6,0,232},{6,0,412},{7,0,1074},{8,0,
+9},{8,0,157},{8,0,786},{9,0,196},{9,0,352},{9,0,457},{10,0,337},{11,0,232},{11,0
+,877},{12,0,480},{140,0,546},{135,0,958},{4,0,382},{136,0,579},{4,0,212},{135,0,
+1206},{4,11,497},{5,11,657},{135,11,1584},{132,0,681},{8,0,971},{138,0,965},{5,
+10,448},{136,10,535},{14,0,16},{146,0,44},{11,0,584},{11,0,616},{14,0,275},{11,
+11,584},{11,11,616},{142,11,275},{136,11,13},{7,10,610},{135,10,1501},{7,11,642}
+,{8,11,250},{11,11,123},{11,11,137},{13,11,48},{142,11,95},{133,0,655},{17,0,67}
+,{147,0,74},{134,0,751},{134,0,1967},{6,0,231},{136,0,423},{5,0,300},{138,0,1016
+},{4,10,319},{5,10,699},{138,10,673},{6,0,237},{7,0,611},{8,0,100},{9,0,416},{11
+,0,335},{12,0,173},{18,0,101},{6,10,336},{8,10,552},{9,10,285},{10,10,99},{139,
+10,568},{134,0,1370},{7,10,1406},{9,10,218},{141,10,222},{133,10,256},{135,0,
+1208},{14,11,213},{148,11,38},{6,0,1219},{135,11,1642},{13,0,417},{14,0,129},{
+143,0,15},{10,11,545},{140,11,301},{17,10,39},{148,10,36},{133,0,199},{4,11,904}
+,{133,11,794},{12,0,427},{146,0,38},{134,0,949},{8,0,665},{135,10,634},{132,10,
+618},{135,10,259},{132,10,339},{133,11,761},{141,10,169},{132,10,759},{5,0,688},
+{7,0,539},{135,0,712},{7,11,386},{138,11,713},{134,0,1186},{6,11,7},{6,11,35},{7
+,11,147},{7,11,1069},{7,11,1568},{7,11,1575},{7,11,1917},{8,11,43},{8,11,208},{9
+,11,128},{9,11,866},{10,11,20},{11,11,981},{147,11,33},{7,11,893},{8,10,482},{
+141,11,424},{6,0,312},{6,0,1715},{10,0,584},{11,0,546},{11,0,692},{12,0,259},{12
+,0,295},{13,0,46},{141,0,154},{5,10,336},{6,10,341},{6,10,478},{6,10,1763},{136,
+10,386},{137,0,151},{132,0,588},{152,0,4},{6,11,322},{9,11,552},{11,11,274},{13,
+11,209},{13,11,499},{14,11,85},{15,11,126},{145,11,70},{135,10,73},{4,0,231},{5,
+0,61},{6,0,104},{7,0,729},{7,0,964},{7,0,1658},{140,0,414},{6,0,263},{138,0,757}
+,{135,10,1971},{4,0,612},{133,0,561},{132,0,320},{135,10,1344},{8,11,83},{8,11,
+817},{9,11,28},{9,11,29},{9,11,885},{10,11,387},{11,11,633},{11,11,740},{13,11,
+235},{13,11,254},{15,11,143},{143,11,146},{5,10,396},{134,10,501},{140,11,49},{
+132,0,225},{4,10,929},{5,10,799},{8,10,46},{136,10,740},{4,0,405},{7,0,817},{14,
+0,58},{17,0,37},{146,0,124},{133,0,974},{4,11,412},{133,11,581},{4,10,892},{133,
+10,770},{4,0,996},{134,0,2026},{4,0,527},{5,0,235},{7,0,1239},{11,0,131},{140,0,
+370},{9,0,16},{13,0,386},{135,11,421},{7,0,956},{7,0,1157},{7,0,1506},{7,0,1606}
+,{7,0,1615},{7,0,1619},{7,0,1736},{7,0,1775},{8,0,590},{9,0,324},{9,0,736},{9,0,
+774},{9,0,776},{9,0,784},{10,0,567},{10,0,708},{11,0,518},{11,0,613},{11,0,695},
+{11,0,716},{11,0,739},{11,0,770},{11,0,771},{11,0,848},{11,0,857},{11,0,931},{11
+,0,947},{12,0,326},{12,0,387},{12,0,484},{12,0,528},{12,0,552},{12,0,613},{13,0,
+189},{13,0,256},{13,0,340},{13,0,432},{13,0,436},{13,0,440},{13,0,454},{14,0,174
+},{14,0,220},{14,0,284},{14,0,390},{145,0,121},{135,10,158},{9,0,137},{138,0,221
+},{4,11,110},{10,11,415},{10,11,597},{142,11,206},{141,11,496},{135,11,205},{151
+,10,25},{135,11,778},{7,11,1656},{7,10,2001},{9,11,369},{10,11,338},{10,11,490},
+{11,11,154},{11,11,545},{11,11,775},{13,11,77},{141,11,274},{4,11,444},{10,11,
+146},{140,11,9},{7,0,390},{138,0,140},{135,0,1144},{134,0,464},{7,10,1461},{140,
+10,91},{132,10,602},{4,11,283},{135,11,1194},{5,0,407},{11,0,204},{11,0,243},{11
+,0,489},{12,0,293},{19,0,37},{20,0,73},{150,0,38},{7,0,1218},{136,0,303},{5,0,
+325},{8,0,5},{8,0,227},{9,0,105},{10,0,585},{12,0,614},{4,10,13},{5,10,567},{7,
+10,1498},{9,10,124},{11,10,521},{140,10,405},{135,10,1006},{7,0,800},{10,0,12},{
+134,11,1720},{135,0,1783},{132,10,735},{138,10,812},{4,10,170},{135,10,323},{6,0
+,621},{13,0,504},{144,0,89},{5,10,304},{135,10,1403},{137,11,216},{6,0,920},{6,0
+,1104},{9,11,183},{139,11,286},{4,0,376},{133,10,742},{134,0,218},{8,0,641},{11,
+0,388},{140,0,580},{7,0,454},{7,0,782},{8,0,768},{140,0,686},{137,11,33},{133,10
+,111},{144,0,0},{10,0,676},{140,0,462},{6,0,164},{136,11,735},{133,10,444},{150,
+0,50},{7,11,1862},{12,11,491},{12,11,520},{13,11,383},{14,11,244},{146,11,12},{5
+,11,132},{9,11,486},{9,11,715},{10,11,458},{11,11,373},{11,11,668},{11,11,795},{
+11,11,897},{12,11,272},{12,11,424},{12,11,539},{12,11,558},{14,11,245},{14,11,
+263},{14,11,264},{14,11,393},{142,11,403},{8,10,123},{15,10,6},{144,10,7},{6,0,
+285},{8,0,654},{11,0,749},{12,0,190},{12,0,327},{13,0,120},{13,0,121},{13,0,327}
+,{15,0,47},{146,0,40},{5,11,8},{6,11,89},{6,11,400},{7,11,1569},{7,11,1623},{7,
+11,1850},{8,11,218},{8,11,422},{9,11,570},{138,11,626},{6,11,387},{7,11,882},{
+141,11,111},{6,0,343},{7,0,195},{9,0,226},{10,0,197},{10,0,575},{11,0,502},{11,0
+,899},{6,11,224},{7,11,877},{137,11,647},{5,10,937},{135,10,100},{135,11,790},{
+150,0,29},{147,0,8},{134,0,1812},{149,0,8},{135,11,394},{7,0,1125},{9,0,143},{11
+,0,61},{14,0,405},{150,0,21},{10,11,755},{147,11,29},{9,11,378},{141,11,162},{
+135,10,922},{5,10,619},{133,10,698},{134,0,1327},{6,0,1598},{137,0,575},{9,11,
+569},{12,11,12},{12,11,81},{12,11,319},{13,11,69},{14,11,259},{16,11,87},{17,11,
+1},{17,11,21},{17,11,24},{18,11,15},{18,11,56},{18,11,59},{18,11,127},{18,11,154
+},{19,11,19},{148,11,31},{6,0,895},{135,11,1231},{5,0,959},{7,11,124},{136,11,38
+},{5,11,261},{7,11,78},{7,11,199},{8,11,815},{9,11,126},{138,11,342},{5,10,917},
+{134,10,1659},{7,0,1759},{5,11,595},{135,11,1863},{136,0,173},{134,0,266},{142,0
+,261},{132,11,628},{5,10,251},{5,10,956},{8,10,268},{9,10,214},{146,10,142},{7,
+11,266},{136,11,804},{135,11,208},{6,11,79},{7,11,1021},{135,11,1519},{11,11,704
+},{141,11,396},{5,10,346},{5,10,711},{136,10,390},{136,11,741},{134,11,376},{134
+,0,1427},{6,0,1033},{6,0,1217},{136,0,300},{133,10,624},{6,11,100},{7,11,244},{7
+,11,632},{7,11,1609},{8,11,178},{8,11,638},{141,11,58},{6,0,584},{5,10,783},{7,
+10,1998},{135,10,2047},{5,0,427},{5,0,734},{7,0,478},{136,0,52},{7,0,239},{11,0,
+217},{142,0,165},{134,0,1129},{6,0,168},{6,0,1734},{7,0,20},{7,0,1056},{8,0,732}
+,{9,0,406},{9,0,911},{138,0,694},{132,10,594},{133,11,791},{7,11,686},{8,11,33},
+{8,11,238},{10,11,616},{11,11,467},{11,11,881},{13,11,217},{13,11,253},{142,11,
+268},{137,11,476},{134,0,418},{133,0,613},{132,0,632},{132,11,447},{7,0,32},{7,0
+,984},{8,0,85},{8,0,709},{9,0,579},{9,0,847},{9,0,856},{10,0,799},{11,0,258},{11
+,0,1007},{12,0,331},{12,0,615},{13,0,188},{13,0,435},{14,0,8},{15,0,165},{16,0,
+27},{20,0,40},{144,11,35},{4,11,128},{5,11,415},{6,11,462},{7,11,294},{7,11,578}
+,{10,11,710},{139,11,86},{5,0,694},{136,0,909},{7,0,1109},{11,0,7},{5,10,37},{6,
+10,39},{6,10,451},{7,10,218},{7,10,1166},{7,10,1687},{8,10,662},{144,10,2},{136,
+11,587},{6,11,427},{7,11,1018},{138,11,692},{4,11,195},{6,10,508},{135,11,802},{
+4,0,167},{135,0,82},{5,0,62},{6,0,24},{6,0,534},{7,0,74},{7,0,678},{7,0,684},{7,
+0,1043},{7,0,1072},{8,0,280},{8,0,541},{8,0,686},{9,0,258},{10,0,519},{11,0,252}
+,{140,0,282},{138,0,33},{4,0,359},{133,11,738},{7,0,980},{9,0,328},{13,0,186},{
+13,0,364},{7,10,635},{7,10,796},{8,10,331},{9,10,330},{9,10,865},{10,10,119},{10
+,10,235},{11,10,111},{11,10,129},{11,10,240},{12,10,31},{12,10,66},{12,10,222},{
+12,10,269},{12,10,599},{12,10,684},{12,10,689},{12,10,691},{142,10,345},{137,10,
+527},{6,0,596},{7,0,585},{135,10,702},{134,11,1683},{133,0,211},{6,0,145},{141,0
+,336},{134,0,1130},{7,0,873},{6,10,37},{7,10,1666},{8,10,195},{8,10,316},{9,10,
+178},{9,10,276},{9,10,339},{9,10,536},{10,10,102},{10,10,362},{10,10,785},{11,10
+,55},{11,10,149},{11,10,773},{13,10,416},{13,10,419},{14,10,38},{14,10,41},{142,
+10,210},{8,0,840},{136,0,841},{132,0,263},{5,11,3},{8,11,578},{9,11,118},{10,11,
+705},{12,11,383},{141,11,279},{132,0,916},{133,11,229},{133,10,645},{15,0,155},{
+16,0,79},{8,11,102},{10,11,578},{10,11,672},{12,11,496},{13,11,408},{14,11,121},
+{145,11,106},{4,0,599},{5,0,592},{6,0,1634},{7,0,5},{7,0,55},{7,0,67},{7,0,97},{
+7,0,691},{7,0,979},{7,0,1600},{7,0,1697},{8,0,207},{8,0,214},{8,0,231},{8,0,294}
+,{8,0,336},{8,0,428},{8,0,471},{8,0,622},{8,0,626},{8,0,679},{8,0,759},{8,0,829}
+,{9,0,11},{9,0,246},{9,0,484},{9,0,573},{9,0,706},{9,0,762},{9,0,798},{9,0,855},
+{9,0,870},{9,0,912},{10,0,303},{10,0,335},{10,0,424},{10,0,461},{10,0,543},{10,0
+,759},{10,0,814},{11,0,59},{11,0,199},{11,0,235},{11,0,590},{11,0,631},{11,0,929
+},{11,0,963},{11,0,987},{12,0,114},{12,0,182},{12,0,226},{12,0,332},{12,0,439},{
+12,0,575},{12,0,598},{12,0,675},{13,0,8},{13,0,125},{13,0,194},{13,0,287},{14,0,
+197},{14,0,383},{15,0,53},{17,0,63},{19,0,46},{19,0,98},{19,0,106},{148,0,85},{7
+,0,1356},{132,10,290},{6,10,70},{7,10,1292},{10,10,762},{139,10,288},{150,11,55}
+,{4,0,593},{8,11,115},{8,11,350},{9,11,489},{10,11,128},{11,11,306},{12,11,373},
+{14,11,30},{17,11,79},{147,11,80},{135,11,1235},{134,0,1392},{4,11,230},{133,11,
+702},{147,0,126},{7,10,131},{7,10,422},{8,10,210},{140,10,573},{134,0,1179},{139
+,11,435},{139,10,797},{134,11,1728},{4,0,162},{18,11,26},{19,11,42},{20,11,43},{
+21,11,0},{23,11,27},{152,11,14},{132,10,936},{6,0,765},{5,10,453},{134,10,441},{
+133,0,187},{135,0,1286},{6,0,635},{6,0,904},{6,0,1210},{134,0,1489},{4,0,215},{8
+,0,890},{9,0,38},{10,0,923},{11,0,23},{11,0,127},{139,0,796},{6,0,1165},{134,0,
+1306},{7,0,716},{13,0,97},{141,0,251},{132,10,653},{136,0,657},{146,10,80},{5,11
+,622},{7,11,1032},{11,11,26},{11,11,213},{11,11,707},{12,11,380},{13,11,226},{
+141,11,355},{6,0,299},{5,11,70},{6,11,334},{9,11,171},{11,11,637},{12,11,202},{
+14,11,222},{145,11,42},{142,0,134},{4,11,23},{5,11,313},{5,11,1014},{6,11,50},{6
+,11,51},{7,11,142},{7,11,384},{9,11,783},{139,11,741},{4,11,141},{7,11,559},{8,
+11,640},{9,11,460},{12,11,183},{141,11,488},{136,11,614},{7,10,1368},{8,10,232},
+{8,10,361},{10,10,682},{138,10,742},{137,10,534},{6,0,1082},{140,0,658},{137,10,
+27},{135,0,2002},{142,10,12},{4,0,28},{5,0,440},{7,0,248},{11,0,833},{140,0,344}
+,{7,10,736},{139,10,264},{134,10,1657},{134,0,1654},{138,0,531},{5,11,222},{9,11
+,140},{138,11,534},{6,0,634},{6,0,798},{134,0,840},{138,11,503},{135,10,127},{
+133,0,853},{5,11,154},{7,11,1491},{10,11,379},{138,11,485},{6,0,249},{7,0,1234},
+{139,0,573},{133,11,716},{7,11,1570},{140,11,542},{136,10,364},{138,0,527},{4,11
+,91},{5,11,388},{5,11,845},{6,11,206},{6,11,252},{6,11,365},{7,11,136},{7,11,531
+},{8,11,264},{136,11,621},{134,0,1419},{135,11,1441},{7,0,49},{7,0,392},{8,0,20}
+,{8,0,172},{8,0,690},{9,0,383},{9,0,845},{10,0,48},{11,0,293},{11,0,832},{11,0,
+920},{11,0,984},{141,0,221},{5,0,858},{133,0,992},{5,0,728},{137,10,792},{5,10,
+909},{9,10,849},{138,10,805},{7,0,525},{7,0,1579},{8,0,497},{136,0,573},{6,0,268
+},{137,0,62},{135,11,576},{134,0,1201},{5,11,771},{5,11,863},{5,11,898},{6,11,
+1632},{6,11,1644},{134,11,1780},{133,11,331},{7,0,193},{7,0,1105},{10,0,495},{7,
+10,397},{8,10,124},{8,10,619},{9,10,305},{11,10,40},{12,10,349},{13,10,134},{13,
+10,295},{14,10,155},{15,10,120},{146,10,105},{138,0,106},{6,0,859},{5,11,107},{7
+,11,201},{136,11,518},{6,11,446},{135,11,1817},{13,0,23},{4,10,262},{135,10,342}
+,{133,10,641},{137,11,851},{6,0,925},{137,0,813},{132,11,504},{6,0,613},{136,0,
+223},{4,10,99},{6,10,250},{6,10,346},{8,10,127},{138,10,81},{136,0,953},{132,10,
+915},{139,11,892},{5,10,75},{9,10,517},{10,10,470},{12,10,155},{141,10,224},{4,0
+,666},{7,0,1017},{7,11,996},{138,11,390},{5,11,883},{133,11,975},{14,10,83},{142
+,11,83},{4,0,670},{5,11,922},{134,11,1707},{135,0,216},{9,0,40},{11,0,136},{135,
+11,787},{5,10,954},{5,11,993},{7,11,515},{137,11,91},{139,0,259},{7,0,1114},{9,0
+,310},{9,0,682},{10,0,440},{13,0,40},{6,10,304},{8,10,418},{11,10,341},{139,10,
+675},{14,0,296},{9,10,410},{139,10,425},{10,11,377},{12,11,363},{13,11,68},{13,
+11,94},{14,11,108},{142,11,306},{7,0,1401},{135,0,1476},{4,0,296},{6,0,475},{7,0
+,401},{7,0,1410},{7,0,1594},{7,0,1674},{8,0,63},{8,0,660},{137,0,74},{4,0,139},{
+4,0,388},{140,0,188},{132,0,797},{132,11,766},{5,11,103},{7,11,921},{8,11,580},{
+8,11,593},{8,11,630},{138,11,28},{4,11,911},{5,11,867},{133,11,1013},{134,10,14}
+,{134,0,1572},{134,10,1708},{21,0,39},{5,10,113},{6,10,243},{7,10,1865},{11,10,
+161},{16,10,37},{145,10,99},{7,11,1563},{141,11,182},{5,11,135},{6,11,519},{7,11
+,1722},{10,11,271},{11,11,261},{145,11,54},{132,10,274},{134,0,1594},{4,11,300},
+{5,11,436},{135,11,484},{4,0,747},{6,0,290},{7,0,649},{7,0,1479},{135,0,1583},{
+133,11,535},{147,11,82},{133,0,232},{137,0,887},{135,10,166},{136,0,521},{4,0,14
+},{7,0,472},{7,0,1801},{10,0,748},{141,0,458},{134,0,741},{134,0,992},{16,0,111}
+,{137,10,304},{4,0,425},{5,11,387},{7,11,557},{12,11,547},{142,11,86},{135,11,
+1747},{5,10,654},{135,11,1489},{7,0,789},{4,11,6},{5,11,708},{136,11,75},{6,10,
+273},{10,10,188},{13,10,377},{146,10,77},{6,0,1593},{4,11,303},{7,11,619},{10,11
+,547},{10,11,687},{11,11,122},{140,11,601},{134,0,1768},{135,10,410},{138,11,772
+},{11,0,233},{139,10,524},{5,0,943},{134,0,1779},{134,10,1785},{136,11,529},{132
+,0,955},{5,0,245},{6,0,576},{7,0,582},{136,0,225},{132,10,780},{142,0,241},{134,
+0,1943},{4,11,106},{7,11,310},{7,11,1785},{10,11,690},{139,11,717},{134,0,1284},
+{5,11,890},{133,11,988},{6,11,626},{142,11,431},{10,11,706},{145,11,32},{137,11,
+332},{132,11,698},{135,0,709},{5,10,948},{138,11,17},{136,0,554},{134,0,1564},{
+139,10,941},{132,0,443},{134,0,909},{134,11,84},{142,0,280},{4,10,532},{5,10,706
+},{135,10,662},{132,0,729},{5,10,837},{6,10,1651},{139,10,985},{135,10,1861},{4,
+0,348},{152,11,3},{5,11,986},{6,11,130},{7,11,1582},{8,11,458},{10,11,101},{10,
+11,318},{138,11,823},{134,0,758},{4,0,298},{137,0,848},{4,10,330},{7,10,933},{7,
+10,2012},{136,10,292},{7,11,1644},{137,11,129},{6,0,1422},{9,0,829},{135,10,767}
+,{5,0,164},{7,0,121},{142,0,189},{7,0,812},{7,0,1261},{7,0,1360},{9,0,632},{140,
+0,352},{135,11,1788},{139,0,556},{135,11,997},{145,10,114},{4,0,172},{9,0,611},{
+10,0,436},{12,0,673},{13,0,255},{137,10,883},{11,0,530},{138,10,274},{133,0,844}
+,{134,0,984},{13,0,232},{18,0,35},{4,10,703},{135,10,207},{132,10,571},{9,0,263}
+,{10,0,147},{138,0,492},{7,11,1756},{137,11,98},{5,10,873},{5,10,960},{8,10,823}
+,{137,10,881},{133,0,537},{132,0,859},{7,11,1046},{139,11,160},{137,0,842},{139,
+10,283},{5,10,33},{6,10,470},{139,10,424},{6,11,45},{7,11,433},{8,11,129},{9,11,
+21},{10,11,392},{11,11,79},{12,11,499},{13,11,199},{141,11,451},{135,0,1291},{
+135,10,1882},{7,11,558},{136,11,353},{134,0,1482},{5,0,230},{5,0,392},{6,0,420},
+{9,0,568},{140,0,612},{6,0,262},{7,10,90},{7,10,664},{7,10,830},{7,10,1380},{7,
+10,2025},{8,11,81},{8,10,448},{8,10,828},{9,11,189},{9,11,201},{11,11,478},{11,
+11,712},{141,11,338},{142,0,31},{5,11,353},{151,11,26},{132,0,753},{4,0,0},{5,0,
+41},{7,0,1459},{7,0,1469},{7,0,1859},{9,0,549},{139,0,905},{9,10,417},{137,10,
+493},{135,11,1113},{133,0,696},{141,11,448},{134,10,295},{132,0,834},{4,0,771},{
+5,10,1019},{6,11,25},{7,11,855},{7,11,1258},{144,11,32},{134,0,1076},{133,0,921}
+,{133,0,674},{4,11,4},{7,11,1118},{7,11,1320},{7,11,1706},{8,11,277},{9,11,622},
+{10,11,9},{11,11,724},{12,11,350},{12,11,397},{13,11,28},{13,11,159},{15,11,89},
+{18,11,5},{19,11,9},{20,11,34},{150,11,47},{134,10,208},{6,0,444},{136,0,308},{6
+,0,180},{7,0,1137},{8,0,751},{139,0,805},{4,0,183},{7,0,271},{11,0,824},{11,0,
+952},{13,0,278},{13,0,339},{13,0,482},{14,0,424},{148,0,99},{7,11,317},{135,11,
+569},{4,0,19},{5,0,477},{5,0,596},{6,0,505},{7,0,1221},{11,0,907},{12,0,209},{
+141,0,214},{135,0,1215},{6,0,271},{7,0,398},{8,0,387},{10,0,344},{7,10,448},{7,
+10,1629},{7,10,1813},{8,10,442},{9,10,710},{10,10,282},{138,10,722},{11,10,844},
+{12,10,104},{140,10,625},{134,11,255},{133,10,787},{134,0,1645},{11,11,956},{151
+,11,3},{6,0,92},{6,0,188},{7,0,209},{7,0,1269},{7,0,1524},{7,0,1876},{8,0,661},{
+10,0,42},{10,0,228},{11,0,58},{11,0,1020},{12,0,58},{12,0,118},{141,0,32},{4,0,
+459},{133,0,966},{4,11,536},{7,11,1141},{10,11,723},{139,11,371},{140,0,330},{
+134,0,1557},{7,11,285},{135,11,876},{136,10,491},{135,11,560},{6,0,18},{7,0,179}
+,{7,0,932},{8,0,548},{8,0,757},{9,0,54},{9,0,65},{9,0,532},{9,0,844},{10,0,113},
+{10,0,117},{10,0,315},{10,0,560},{10,0,622},{10,0,798},{11,0,153},{11,0,351},{11
+,0,375},{12,0,78},{12,0,151},{12,0,392},{12,0,666},{14,0,248},{143,0,23},{6,0,
+1742},{132,11,690},{4,10,403},{5,10,441},{7,10,450},{10,10,840},{11,10,101},{12,
+10,193},{141,10,430},{133,0,965},{134,0,182},{10,0,65},{10,0,488},{138,0,497},{
+135,11,1346},{6,0,973},{6,0,1158},{10,11,200},{19,11,2},{151,11,22},{4,11,190},{
+133,11,554},{133,10,679},{7,0,328},{137,10,326},{133,11,1001},{9,0,588},{138,0,
+260},{133,11,446},{135,10,1128},{135,10,1796},{147,11,119},{134,0,1786},{6,0,
+1328},{6,0,1985},{8,0,962},{138,0,1017},{135,0,308},{11,0,508},{4,10,574},{7,10,
+350},{7,10,1024},{8,10,338},{9,10,677},{138,10,808},{138,11,752},{135,10,1081},{
+137,11,96},{7,10,1676},{135,10,2037},{136,0,588},{132,11,304},{133,0,614},{140,0
+,793},{136,0,287},{137,10,297},{141,10,37},{6,11,53},{6,11,199},{7,11,1408},{8,
+11,32},{8,11,93},{9,11,437},{10,11,397},{10,11,629},{11,11,593},{11,11,763},{13,
+11,326},{145,11,35},{134,11,105},{9,11,320},{10,11,506},{138,11,794},{5,11,114},
+{5,11,255},{141,11,285},{140,0,290},{7,11,2035},{8,11,19},{9,11,89},{138,11,831}
+,{134,0,1136},{7,0,719},{8,0,796},{8,0,809},{8,0,834},{6,10,306},{7,10,1140},{7,
+10,1340},{8,10,133},{138,10,449},{139,10,1011},{5,0,210},{6,0,213},{7,0,60},{10,
+0,364},{139,0,135},{5,0,607},{8,0,326},{136,0,490},{138,11,176},{132,0,701},{5,0
+,472},{7,0,380},{137,0,758},{135,0,1947},{6,0,1079},{138,0,278},{138,11,391},{5,
+10,329},{8,10,260},{139,11,156},{4,0,386},{7,0,41},{8,0,405},{8,0,728},{9,0,497}
+,{11,0,110},{11,0,360},{15,0,37},{144,0,84},{5,0,46},{7,0,1452},{7,0,1480},{8,0,
+634},{140,0,472},{136,0,961},{4,0,524},{136,0,810},{10,0,238},{141,0,33},{132,10
+,657},{152,10,7},{133,0,532},{5,0,997},{135,10,1665},{7,11,594},{7,11,851},{7,11
+,1858},{9,11,411},{9,11,574},{9,11,666},{9,11,737},{10,11,346},{10,11,712},{11,
+11,246},{11,11,432},{11,11,517},{11,11,647},{11,11,679},{11,11,727},{12,11,304},
+{12,11,305},{12,11,323},{12,11,483},{12,11,572},{12,11,593},{12,11,602},{13,11,
+95},{13,11,101},{13,11,171},{13,11,315},{13,11,378},{13,11,425},{13,11,475},{14,
+11,63},{14,11,380},{14,11,384},{15,11,133},{18,11,112},{148,11,72},{5,11,955},{
+136,11,814},{134,0,1301},{5,10,66},{7,10,1896},{136,10,288},{133,11,56},{134,10,
+1643},{6,0,1298},{148,11,100},{5,0,782},{5,0,829},{6,0,671},{6,0,1156},{6,0,1738
+},{137,11,621},{4,0,306},{5,0,570},{7,0,1347},{5,10,91},{5,10,648},{5,10,750},{5
+,10,781},{6,10,54},{6,10,112},{6,10,402},{6,10,1732},{7,10,315},{7,10,749},{7,10
+,1900},{9,10,78},{9,10,508},{10,10,611},{10,10,811},{11,10,510},{11,10,728},{13,
+10,36},{14,10,39},{16,10,83},{17,10,124},{148,10,30},{8,10,570},{9,11,477},{141,
+11,78},{4,11,639},{10,11,4},{10,10,322},{10,10,719},{11,10,407},{11,11,638},{12,
+11,177},{148,11,57},{7,0,1823},{139,0,693},{7,0,759},{5,11,758},{8,10,125},{8,10
+,369},{8,10,524},{10,10,486},{11,10,13},{11,10,381},{11,10,736},{11,10,766},{11,
+10,845},{13,10,114},{13,10,292},{142,10,47},{7,0,1932},{6,10,1684},{6,10,1731},{
+7,10,356},{8,10,54},{8,10,221},{9,10,225},{9,10,356},{10,10,77},{10,10,446},{10,
+10,731},{12,10,404},{141,10,491},{135,11,552},{135,11,1112},{4,0,78},{5,0,96},{5
+,0,182},{6,0,1257},{7,0,1724},{7,0,1825},{10,0,394},{10,0,471},{11,0,532},{14,0,
+340},{145,0,88},{139,11,328},{135,0,1964},{132,10,411},{4,10,80},{5,10,44},{137,
+11,133},{5,11,110},{6,11,169},{6,11,1702},{7,11,400},{8,11,538},{9,11,184},{9,11
+,524},{140,11,218},{4,0,521},{5,10,299},{7,10,1083},{140,11,554},{6,11,133},{9,
+11,353},{12,11,628},{146,11,79},{6,0,215},{7,0,584},{7,0,1028},{7,0,1473},{7,0,
+1721},{9,0,424},{138,0,779},{7,0,857},{7,0,1209},{7,10,1713},{9,10,537},{10,10,
+165},{12,10,219},{140,10,561},{4,10,219},{6,11,93},{7,11,1422},{7,10,1761},{7,11
+,1851},{8,11,673},{9,10,86},{9,11,529},{140,11,43},{137,11,371},{136,0,671},{5,0
+,328},{135,0,918},{132,0,529},{9,11,25},{10,11,467},{138,11,559},{4,11,335},{135
+,11,942},{134,0,716},{134,0,1509},{6,0,67},{7,0,258},{7,0,1630},{9,0,354},{9,0,
+675},{10,0,830},{14,0,80},{17,0,80},{140,10,428},{134,0,1112},{6,0,141},{7,0,225
+},{9,0,59},{9,0,607},{10,0,312},{11,0,687},{12,0,555},{13,0,373},{13,0,494},{148
+,0,58},{133,10,514},{8,11,39},{10,11,773},{11,11,84},{12,11,205},{142,11,1},{8,0
+,783},{5,11,601},{133,11,870},{136,11,594},{4,10,55},{5,10,301},{6,10,571},{14,
+10,49},{146,10,102},{132,11,181},{134,11,1652},{133,10,364},{4,11,97},{5,11,147}
+,{6,11,286},{7,11,1362},{141,11,176},{4,10,76},{7,10,1550},{9,10,306},{9,10,430}
+,{9,10,663},{10,10,683},{11,10,427},{11,10,753},{12,10,334},{12,10,442},{14,10,
+258},{14,10,366},{143,10,131},{137,10,52},{6,0,955},{134,0,1498},{6,11,375},{7,
+11,169},{7,11,254},{136,11,780},{7,0,430},{11,0,46},{14,0,343},{142,11,343},{135
+,0,1183},{5,0,602},{7,0,2018},{9,0,418},{9,0,803},{135,11,1447},{8,0,677},{135,
+11,1044},{139,11,285},{4,10,656},{135,10,779},{135,10,144},{5,11,629},{135,11,
+1549},{135,10,1373},{138,11,209},{7,10,554},{7,10,605},{141,10,10},{5,10,838},{5
+,10,841},{134,10,1649},{133,10,1012},{6,0,1357},{134,0,1380},{144,0,53},{6,0,590
+},{7,10,365},{7,10,1357},{7,10,1497},{8,10,154},{141,10,281},{133,10,340},{132,
+11,420},{135,0,329},{147,11,32},{4,0,469},{10,11,429},{139,10,495},{8,10,261},{9
+,10,144},{9,10,466},{10,10,370},{12,10,470},{13,10,144},{142,10,348},{142,0,460}
+,{4,11,325},{9,10,897},{138,11,125},{6,0,1743},{6,10,248},{9,10,546},{10,10,535}
+,{11,10,681},{141,10,135},{4,0,990},{5,0,929},{6,0,340},{8,0,376},{8,0,807},{8,0
+,963},{8,0,980},{138,0,1007},{134,0,1603},{140,0,250},{4,11,714},{133,11,469},{
+134,10,567},{136,10,445},{5,0,218},{7,0,1610},{8,0,646},{10,0,83},{11,11,138},{
+140,11,40},{7,0,1512},{135,0,1794},{135,11,1216},{11,0,0},{16,0,78},{132,11,718}
+,{133,0,571},{132,0,455},{134,0,1012},{5,11,124},{5,11,144},{6,11,548},{7,11,15}
+,{7,11,153},{137,11,629},{142,11,10},{6,11,75},{7,11,1531},{8,11,416},{9,11,240}
+,{9,11,275},{10,11,100},{11,11,658},{11,11,979},{12,11,86},{13,11,468},{14,11,66
+},{14,11,207},{15,11,20},{15,11,25},{144,11,58},{132,10,577},{5,11,141},{5,11,
+915},{6,11,1783},{7,11,211},{7,11,698},{7,11,1353},{9,11,83},{9,11,281},{10,11,
+376},{10,11,431},{11,11,543},{12,11,664},{13,11,280},{13,11,428},{14,11,61},{14,
+11,128},{17,11,52},{145,11,81},{6,0,161},{7,0,372},{137,0,597},{132,0,349},{10,
+11,702},{139,11,245},{134,0,524},{134,10,174},{6,0,432},{9,0,751},{139,0,322},{
+147,11,94},{4,11,338},{133,11,400},{5,0,468},{10,0,325},{11,0,856},{12,0,345},{
+143,0,104},{133,0,223},{132,0,566},{4,11,221},{5,11,659},{5,11,989},{7,11,697},{
+7,11,1211},{138,11,284},{135,11,1070},{4,0,59},{135,0,1394},{6,0,436},{11,0,481}
+,{5,10,878},{133,10,972},{4,0,48},{5,0,271},{135,0,953},{5,0,610},{136,0,457},{4
+,0,773},{5,0,618},{137,0,756},{133,0,755},{135,0,1217},{138,11,507},{132,10,351}
+,{132,0,197},{143,11,78},{4,11,188},{7,11,805},{11,11,276},{142,11,293},{5,11,
+884},{139,11,991},{132,10,286},{10,0,259},{10,0,428},{7,10,438},{7,10,627},{7,10
+,1516},{8,10,40},{9,10,56},{9,10,294},{11,10,969},{11,10,995},{146,10,148},{4,0,
+356},{5,0,217},{5,0,492},{5,0,656},{8,0,544},{136,11,544},{5,0,259},{6,0,1230},{
+7,0,414},{7,0,854},{142,0,107},{132,0,1007},{15,0,14},{144,0,5},{6,0,1580},{132,
+10,738},{132,11,596},{132,0,673},{133,10,866},{6,0,1843},{135,11,1847},{4,0,165}
+,{7,0,1398},{135,0,1829},{135,11,1634},{147,11,65},{6,0,885},{6,0,1009},{137,0,
+809},{133,10,116},{132,10,457},{136,11,770},{9,0,498},{12,0,181},{10,11,361},{
+142,11,316},{134,11,595},{5,0,9},{7,0,297},{7,0,966},{140,0,306},{4,11,89},{5,11
+,489},{6,11,315},{7,11,553},{7,11,1745},{138,11,243},{134,0,1487},{132,0,437},{5
+,0,146},{6,0,411},{138,0,721},{5,10,527},{6,10,189},{135,10,859},{11,10,104},{11
+,10,554},{15,10,60},{143,10,125},{6,11,1658},{9,11,3},{10,11,154},{11,11,641},{
+13,11,85},{13,11,201},{141,11,346},{6,0,177},{135,0,467},{134,0,1377},{134,10,
+116},{136,11,645},{4,11,166},{5,11,505},{6,11,1670},{137,11,110},{133,10,487},{4
+,10,86},{5,10,667},{5,10,753},{6,10,316},{6,10,455},{135,10,946},{133,0,200},{
+132,0,959},{6,0,1928},{134,0,1957},{139,11,203},{150,10,45},{4,10,79},{7,10,1773
+},{10,10,450},{11,10,589},{13,10,332},{13,10,493},{14,10,183},{14,10,334},{14,10
+,362},{14,10,368},{14,10,376},{14,10,379},{19,10,90},{19,10,103},{19,10,127},{
+148,10,90},{6,0,1435},{135,11,1275},{134,0,481},{7,11,445},{8,11,307},{8,11,704}
+,{10,11,41},{10,11,439},{11,11,237},{11,11,622},{140,11,201},{135,11,869},{4,0,
+84},{7,0,1482},{10,0,76},{138,0,142},{11,11,277},{144,11,14},{135,11,1977},{4,11
+,189},{5,11,713},{136,11,57},{133,0,1015},{138,11,371},{4,0,315},{5,0,507},{135,
+0,1370},{4,11,552},{142,10,381},{9,0,759},{16,0,31},{16,0,39},{16,0,75},{18,0,24
+},{20,0,42},{152,0,1},{134,0,712},{134,0,1722},{133,10,663},{133,10,846},{8,0,
+222},{8,0,476},{9,0,238},{11,0,516},{11,0,575},{15,0,109},{146,0,100},{7,0,1402}
+,{7,0,1414},{12,0,456},{5,10,378},{8,10,465},{9,10,286},{10,10,185},{10,10,562},
+{10,10,635},{11,10,31},{11,10,393},{13,10,312},{18,10,65},{18,10,96},{147,10,89}
+,{4,0,986},{6,0,1958},{6,0,2032},{8,0,934},{138,0,985},{7,10,1880},{9,10,680},{
+139,10,798},{134,10,1770},{145,11,49},{132,11,614},{132,10,648},{5,10,945},{6,10
+,1656},{6,10,1787},{7,10,167},{8,10,824},{9,10,391},{10,10,375},{139,10,185},{
+138,11,661},{7,0,1273},{135,11,1945},{7,0,706},{7,0,1058},{138,0,538},{7,10,1645
+},{8,10,352},{137,10,249},{132,10,152},{11,0,92},{11,0,196},{11,0,409},{11,0,450
+},{11,0,666},{11,0,777},{12,0,262},{13,0,385},{13,0,393},{15,0,115},{16,0,45},{
+145,0,82},{133,10,1006},{6,0,40},{135,0,1781},{9,11,614},{139,11,327},{5,10,420}
+,{135,10,1449},{135,0,431},{10,0,97},{135,10,832},{6,0,423},{7,0,665},{135,0,
+1210},{7,0,237},{8,0,664},{9,0,42},{9,0,266},{9,0,380},{9,0,645},{10,0,177},{138
+,0,276},{7,0,264},{133,10,351},{8,0,213},{5,10,40},{7,10,598},{7,10,1638},{9,10,
+166},{9,10,640},{9,10,685},{9,10,773},{11,10,215},{13,10,65},{14,10,172},{14,10,
+317},{145,10,6},{5,11,84},{134,11,163},{8,10,60},{9,10,343},{139,10,769},{137,0,
+455},{133,11,410},{8,0,906},{12,0,700},{12,0,706},{140,0,729},{21,11,33},{150,11
+,40},{7,10,1951},{8,10,765},{8,10,772},{140,10,671},{7,10,108},{8,10,219},{8,10,
+388},{9,10,639},{9,10,775},{11,10,275},{140,10,464},{5,11,322},{7,11,1941},{8,11
+,186},{9,11,262},{10,11,187},{14,11,208},{146,11,130},{139,0,624},{8,0,574},{5,
+11,227},{140,11,29},{7,11,1546},{11,11,299},{142,11,407},{5,10,15},{6,10,56},{7,
+10,1758},{8,10,500},{9,10,730},{11,10,331},{13,10,150},{142,10,282},{7,11,1395},
+{8,11,486},{9,11,236},{9,11,878},{10,11,218},{11,11,95},{19,11,17},{147,11,31},{
+135,11,2043},{4,0,354},{146,11,4},{140,11,80},{135,0,1558},{134,10,1886},{5,10,
+205},{6,10,438},{137,10,711},{133,11,522},{133,10,534},{7,0,235},{7,0,1475},{15,
+0,68},{146,0,120},{137,10,691},{4,0,942},{6,0,1813},{8,0,917},{10,0,884},{12,0,
+696},{12,0,717},{12,0,723},{12,0,738},{12,0,749},{12,0,780},{16,0,97},{146,0,169
+},{6,10,443},{8,11,562},{9,10,237},{9,10,571},{9,10,695},{10,10,139},{11,10,715}
+,{12,10,417},{141,10,421},{135,0,957},{133,0,830},{134,11,1771},{146,0,23},{5,0,
+496},{6,0,694},{7,0,203},{7,11,1190},{137,11,620},{137,11,132},{6,0,547},{134,0,
+1549},{8,11,258},{9,11,208},{137,11,359},{4,0,864},{5,0,88},{137,0,239},{135,11,
+493},{4,11,317},{135,11,1279},{132,11,477},{4,10,578},{5,11,63},{133,11,509},{7,
+0,650},{135,0,1310},{7,0,1076},{9,0,80},{11,0,78},{11,0,421},{11,0,534},{140,0,
+545},{132,11,288},{12,0,553},{14,0,118},{133,10,923},{7,0,274},{11,0,479},{139,0
+,507},{8,11,89},{8,11,620},{9,11,49},{10,11,774},{11,11,628},{12,11,322},{143,11
+,124},{4,0,497},{135,0,1584},{7,0,261},{7,0,1115},{7,0,1354},{7,0,1404},{7,0,
+1588},{7,0,1705},{7,0,1902},{9,0,465},{10,0,248},{10,0,349},{10,0,647},{11,0,527
+},{11,0,660},{11,0,669},{12,0,529},{13,0,305},{132,10,924},{133,10,665},{136,0,
+13},{6,0,791},{138,11,120},{7,0,642},{8,0,250},{11,0,123},{11,0,137},{13,0,48},{
+142,0,95},{4,10,265},{7,10,807},{135,10,950},{5,10,93},{140,10,267},{135,0,1429}
+,{4,0,949},{10,0,885},{10,0,891},{10,0,900},{10,0,939},{12,0,760},{142,0,449},{
+139,11,366},{132,0,818},{134,11,85},{135,10,994},{7,0,330},{5,10,233},{5,10,320}
+,{6,10,140},{136,10,295},{4,0,1004},{8,0,982},{136,0,993},{133,10,978},{4,10,905
+},{6,10,1701},{137,10,843},{10,0,545},{140,0,301},{6,0,947},{134,0,1062},{134,0,
+1188},{4,0,904},{5,0,794},{152,10,6},{134,0,1372},{135,11,608},{5,11,279},{6,11,
+235},{7,11,468},{8,11,446},{9,11,637},{10,11,717},{11,11,738},{140,11,514},{132,
+10,509},{5,11,17},{6,11,371},{137,11,528},{132,0,693},{4,11,115},{5,11,669},{6,
+11,407},{8,11,311},{11,11,10},{141,11,5},{11,0,377},{7,10,273},{137,11,381},{135
+,0,695},{7,0,386},{138,0,713},{135,10,1041},{134,0,1291},{6,0,7},{6,0,35},{7,0,
+147},{7,0,1069},{7,0,1568},{7,0,1575},{7,0,1917},{8,0,43},{8,0,208},{9,0,128},{9
+,0,866},{10,0,20},{11,0,981},{147,0,33},{7,0,893},{141,0,424},{139,10,234},{150,
+11,56},{5,11,779},{5,11,807},{6,11,1655},{134,11,1676},{5,10,802},{7,10,2021},{
+136,10,805},{4,11,196},{5,10,167},{5,11,558},{5,10,899},{5,11,949},{6,10,410},{
+137,10,777},{137,10,789},{134,10,1705},{8,0,904},{140,0,787},{6,0,322},{9,0,552}
+,{11,0,274},{13,0,209},{13,0,499},{14,0,85},{15,0,126},{145,0,70},{135,10,10},{5
+,10,11},{6,10,117},{6,10,485},{7,10,1133},{9,10,582},{9,10,594},{11,10,21},{11,
+10,818},{12,10,535},{141,10,86},{4,10,264},{7,10,1067},{8,10,204},{8,10,385},{
+139,10,953},{132,11,752},{138,10,56},{133,10,470},{6,0,1808},{8,0,83},{8,0,742},
+{8,0,817},{9,0,28},{9,0,29},{9,0,885},{10,0,387},{11,0,633},{11,0,740},{13,0,235
+},{13,0,254},{15,0,143},{143,0,146},{140,0,49},{134,0,1832},{4,11,227},{5,11,159
+},{5,11,409},{7,11,80},{10,11,294},{10,11,479},{12,11,418},{14,11,50},{14,11,249
+},{142,11,295},{7,11,1470},{8,11,66},{8,11,137},{8,11,761},{9,11,638},{11,11,80}
+,{11,11,212},{11,11,368},{11,11,418},{12,11,8},{13,11,15},{16,11,61},{17,11,59},
+{19,11,28},{148,11,84},{139,10,1015},{138,11,468},{135,0,421},{6,0,415},{7,0,
+1049},{137,0,442},{6,11,38},{7,11,1220},{8,11,185},{8,11,256},{9,11,22},{9,11,
+331},{10,11,738},{11,11,205},{11,11,540},{11,11,746},{13,11,399},{13,11,465},{14
+,11,88},{142,11,194},{139,0,289},{133,10,715},{4,0,110},{10,0,415},{10,0,597},{
+142,0,206},{4,11,159},{6,11,115},{7,11,252},{7,11,257},{7,11,1928},{8,11,69},{9,
+11,384},{10,11,91},{10,11,615},{12,11,375},{14,11,235},{18,11,117},{147,11,123},
+{5,11,911},{136,11,278},{7,0,205},{7,0,2000},{8,10,794},{9,10,400},{10,10,298},{
+142,10,228},{135,11,1774},{4,11,151},{7,11,1567},{8,11,351},{137,11,322},{136,10
+,724},{133,11,990},{7,0,1539},{11,0,512},{13,0,205},{19,0,30},{22,0,36},{23,0,19
+},{135,11,1539},{5,11,194},{7,11,1662},{9,11,90},{140,11,180},{6,10,190},{7,10,
+768},{135,10,1170},{134,0,1340},{4,0,283},{135,0,1194},{133,11,425},{133,11,971}
+,{12,0,549},{14,10,67},{147,10,60},{135,10,1023},{134,0,1720},{138,11,587},{5,11
+,72},{6,11,264},{7,11,21},{7,11,46},{7,11,2013},{8,11,215},{8,11,513},{10,11,266
+},{139,11,22},{5,0,319},{135,0,534},{6,10,137},{9,10,75},{9,10,253},{10,10,194},
+{138,10,444},{7,0,1180},{20,0,112},{6,11,239},{7,11,118},{10,11,95},{11,11,603},
+{13,11,443},{14,11,160},{143,11,4},{134,11,431},{5,11,874},{6,11,1677},{11,10,
+643},{12,10,115},{143,11,0},{134,0,967},{6,11,65},{7,11,939},{7,11,1172},{7,11,
+1671},{9,11,540},{10,11,696},{11,11,265},{11,11,732},{11,11,928},{11,11,937},{12
+,11,399},{13,11,438},{149,11,19},{137,11,200},{135,0,1940},{5,10,760},{7,10,542}
+,{8,10,135},{136,10,496},{140,11,44},{7,11,1655},{136,11,305},{7,10,319},{7,10,
+355},{7,10,763},{10,10,389},{145,10,43},{136,0,735},{138,10,786},{137,11,19},{
+132,11,696},{5,0,132},{9,0,486},{9,0,715},{10,0,458},{11,0,373},{11,0,668},{11,0
+,795},{11,0,897},{12,0,272},{12,0,424},{12,0,539},{12,0,558},{14,0,245},{14,0,
+263},{14,0,264},{14,0,393},{142,0,403},{10,0,38},{139,0,784},{132,0,838},{4,11,
+302},{135,11,1766},{133,0,379},{5,0,8},{6,0,89},{6,0,400},{7,0,1569},{7,0,1623},
+{7,0,1850},{8,0,218},{8,0,422},{9,0,570},{10,0,626},{4,11,726},{133,11,630},{4,0
+,1017},{138,0,660},{6,0,387},{7,0,882},{141,0,111},{6,0,224},{7,0,877},{137,0,
+647},{4,10,58},{5,10,286},{6,10,319},{7,10,402},{7,10,1254},{7,10,1903},{8,10,
+356},{140,10,408},{135,0,790},{9,0,510},{10,0,53},{4,10,389},{9,10,181},{10,10,
+29},{10,10,816},{11,10,311},{11,10,561},{12,10,67},{141,10,181},{142,0,458},{6,
+11,118},{7,11,215},{7,11,1521},{140,11,11},{134,0,954},{135,0,394},{134,0,1367},
+{5,11,225},{133,10,373},{132,0,882},{7,0,1409},{135,10,1972},{135,10,1793},{4,11
+,370},{5,11,756},{135,11,1326},{150,11,13},{7,11,354},{10,11,410},{139,11,815},{
+6,11,1662},{7,11,48},{8,11,771},{10,11,116},{13,11,104},{14,11,105},{14,11,184},
+{15,11,168},{19,11,92},{148,11,68},{7,0,124},{136,0,38},{5,0,261},{7,0,78},{7,0,
+199},{8,0,815},{9,0,126},{10,0,342},{140,0,647},{4,0,628},{140,0,724},{7,0,266},
+{8,0,804},{7,10,1651},{145,10,89},{135,0,208},{134,0,1178},{6,0,79},{135,0,1519}
+,{132,10,672},{133,10,737},{136,0,741},{132,11,120},{4,0,710},{6,0,376},{134,0,
+606},{134,0,1347},{134,0,1494},{6,0,850},{6,0,1553},{137,0,821},{5,10,145},{134,
+11,593},{7,0,1311},{140,0,135},{4,0,467},{5,0,405},{134,0,544},{5,11,820},{135,
+11,931},{6,0,100},{7,0,244},{7,0,632},{7,0,1609},{8,0,178},{8,0,638},{141,0,58},
+{4,10,387},{135,10,1288},{6,11,151},{6,11,1675},{7,11,383},{151,11,10},{132,0,
+481},{135,10,550},{134,0,1378},{6,11,1624},{11,11,11},{12,11,422},{13,11,262},{
+142,11,360},{133,0,791},{4,11,43},{5,11,344},{133,11,357},{7,0,1227},{140,0,978}
+,{7,0,686},{8,0,33},{8,0,238},{10,0,616},{11,0,467},{11,0,881},{13,0,217},{13,0,
+253},{142,0,268},{137,0,857},{8,0,467},{8,0,1006},{7,11,148},{8,11,284},{141,11,
+63},{4,10,576},{135,10,1263},{133,11,888},{5,10,919},{134,10,1673},{20,10,37},{
+148,11,37},{132,0,447},{132,11,711},{4,0,128},{5,0,415},{6,0,462},{7,0,294},{7,0
+,578},{10,0,710},{139,0,86},{4,10,82},{5,10,333},{5,10,904},{6,10,207},{7,10,325
+},{7,10,1726},{8,10,101},{10,10,778},{139,10,220},{136,0,587},{137,11,440},{133,
+10,903},{6,0,427},{7,0,1018},{138,0,692},{4,0,195},{135,0,802},{140,10,147},{134
+,0,1546},{134,0,684},{132,10,705},{136,0,345},{11,11,678},{140,11,307},{133,0,
+365},{134,0,1683},{4,11,65},{5,11,479},{5,11,1004},{7,11,1913},{8,11,317},{9,11,
+302},{10,11,612},{141,11,22},{138,0,472},{4,11,261},{135,11,510},{134,10,90},{
+142,0,433},{151,0,28},{4,11,291},{7,11,101},{9,11,515},{12,11,152},{12,11,443},{
+13,11,392},{142,11,357},{140,0,997},{5,0,3},{8,0,578},{9,0,118},{10,0,705},{141,
+0,279},{135,11,1266},{7,10,813},{12,10,497},{141,10,56},{133,0,229},{6,10,125},{
+135,10,1277},{8,0,102},{10,0,578},{10,0,672},{12,0,496},{13,0,408},{14,0,121},{
+17,0,106},{151,10,12},{6,0,866},{134,0,1080},{136,0,1022},{4,11,130},{135,11,843
+},{5,11,42},{5,11,879},{7,11,245},{7,11,324},{7,11,1532},{11,11,463},{11,11,472}
+,{13,11,363},{144,11,52},{150,0,55},{8,0,115},{8,0,350},{9,0,489},{10,0,128},{11
+,0,306},{12,0,373},{14,0,30},{17,0,79},{19,0,80},{4,11,134},{133,11,372},{134,0,
+657},{134,0,933},{135,11,1147},{4,0,230},{133,0,702},{134,0,1728},{4,0,484},{18,
+0,26},{19,0,42},{20,0,43},{21,0,0},{23,0,27},{152,0,14},{7,0,185},{135,0,703},{6
+,0,417},{10,0,618},{7,10,1106},{9,10,770},{11,10,112},{140,10,413},{134,0,803},{
+132,11,644},{134,0,1262},{7,11,540},{12,10,271},{145,10,109},{135,11,123},{132,0
+,633},{134,11,623},{4,11,908},{5,11,359},{5,11,508},{6,11,1723},{7,11,343},{7,11
+,1996},{135,11,2026},{135,0,479},{10,0,262},{7,10,304},{9,10,646},{9,10,862},{11
+,10,696},{12,10,208},{15,10,79},{147,10,108},{4,11,341},{135,11,480},{134,0,830}
+,{5,0,70},{5,0,622},{6,0,334},{7,0,1032},{9,0,171},{11,0,26},{11,0,213},{11,0,
+637},{11,0,707},{12,0,202},{12,0,380},{13,0,226},{13,0,355},{14,0,222},{145,0,42
+},{135,10,981},{143,0,217},{137,11,114},{4,0,23},{4,0,141},{5,0,313},{5,0,1014},
+{6,0,50},{6,0,51},{7,0,142},{7,0,384},{7,0,559},{8,0,640},{9,0,460},{9,0,783},{
+11,0,741},{12,0,183},{141,0,488},{141,0,360},{7,0,1586},{7,11,1995},{8,11,299},{
+11,11,890},{140,11,674},{132,10,434},{7,0,652},{134,10,550},{7,0,766},{5,10,553}
+,{138,10,824},{7,0,737},{8,0,298},{136,10,452},{4,11,238},{5,11,503},{6,11,179},
+{7,11,2003},{8,11,381},{8,11,473},{9,11,149},{10,11,183},{15,11,45},{143,11,86},
+{133,10,292},{5,0,222},{9,0,655},{138,0,534},{138,10,135},{4,11,121},{5,11,156},
+{5,11,349},{9,11,136},{10,11,605},{14,11,342},{147,11,107},{137,0,906},{6,0,1013
+},{134,0,1250},{6,0,1956},{6,0,2009},{8,0,991},{144,0,120},{135,11,1192},{138,0,
+503},{5,0,154},{7,0,1491},{10,0,379},{138,0,485},{6,0,1867},{6,0,1914},{6,0,1925
+},{9,0,917},{9,0,925},{9,0,932},{9,0,951},{9,0,1007},{9,0,1013},{12,0,806},{12,0
+,810},{12,0,814},{12,0,816},{12,0,824},{12,0,832},{12,0,837},{12,0,863},{12,0,
+868},{12,0,870},{12,0,889},{12,0,892},{12,0,900},{12,0,902},{12,0,908},{12,0,933
+},{12,0,942},{12,0,949},{12,0,954},{15,0,175},{15,0,203},{15,0,213},{15,0,218},{
+15,0,225},{15,0,231},{15,0,239},{15,0,248},{15,0,252},{18,0,190},{18,0,204},{18,
+0,215},{18,0,216},{18,0,222},{18,0,225},{18,0,230},{18,0,239},{18,0,241},{21,0,
+42},{21,0,43},{21,0,44},{21,0,45},{21,0,46},{21,0,53},{24,0,27},{152,0,31},{133,
+0,716},{135,0,844},{4,0,91},{5,0,388},{5,0,845},{6,0,206},{6,0,252},{6,0,365},{7
+,0,136},{7,0,531},{136,0,621},{7,10,393},{10,10,603},{139,10,206},{6,11,80},{6,
+11,1694},{7,11,173},{7,11,1974},{9,11,547},{10,11,730},{14,11,18},{150,11,39},{
+137,0,748},{4,11,923},{134,11,1711},{4,10,912},{137,10,232},{7,10,98},{7,10,1973
+},{136,10,716},{14,0,103},{133,10,733},{132,11,595},{12,0,158},{18,0,8},{19,0,62
+},{20,0,6},{22,0,4},{23,0,2},{23,0,9},{5,11,240},{6,11,459},{7,11,12},{7,11,114}
+,{7,11,502},{7,11,1751},{7,11,1753},{7,11,1805},{8,11,658},{9,11,1},{11,11,959},
+{13,11,446},{142,11,211},{135,0,576},{5,0,771},{5,0,863},{5,0,898},{6,0,648},{6,
+0,1632},{6,0,1644},{134,0,1780},{133,0,331},{7,11,633},{7,11,905},{7,11,909},{7,
+11,1538},{9,11,767},{140,11,636},{140,0,632},{5,0,107},{7,0,201},{136,0,518},{6,
+0,446},{7,0,1817},{134,11,490},{9,0,851},{141,0,510},{7,11,250},{8,11,506},{136,
+11,507},{4,0,504},{137,10,72},{132,11,158},{4,11,140},{7,11,362},{8,11,209},{9,
+11,10},{9,11,160},{9,11,503},{10,11,689},{11,11,350},{11,11,553},{11,11,725},{12
+,11,252},{12,11,583},{13,11,192},{13,11,352},{14,11,269},{14,11,356},{148,11,50}
+,{6,11,597},{135,11,1318},{135,10,1454},{5,0,883},{5,0,975},{8,0,392},{148,0,7},
+{6,11,228},{7,11,1341},{9,11,408},{138,11,343},{11,11,348},{11,10,600},{12,11,99
+},{13,10,245},{18,11,1},{18,11,11},{147,11,4},{134,11,296},{5,0,922},{134,0,1707
+},{132,11,557},{4,11,548},{7,10,164},{7,10,1571},{9,10,107},{140,10,225},{7,11,
+197},{8,11,142},{8,11,325},{9,11,150},{9,11,596},{10,11,350},{10,11,353},{11,11,
+74},{11,11,315},{14,11,423},{143,11,141},{5,0,993},{7,0,515},{137,0,91},{4,0,131
+},{8,0,200},{5,10,484},{5,10,510},{6,10,434},{7,10,1000},{7,10,1098},{136,10,2},
+{152,0,10},{4,11,62},{5,11,83},{6,11,399},{6,11,579},{7,11,692},{7,11,846},{7,11
+,1015},{7,11,1799},{8,11,403},{9,11,394},{10,11,133},{12,11,4},{12,11,297},{12,
+11,452},{16,11,81},{18,11,19},{18,11,25},{21,11,14},{22,11,12},{151,11,18},{140,
+11,459},{132,11,177},{7,0,1433},{9,0,365},{137,11,365},{132,10,460},{5,0,103},{6
+,0,2004},{7,0,921},{8,0,580},{8,0,593},{8,0,630},{10,0,28},{5,11,411},{135,11,
+653},{4,10,932},{133,10,891},{4,0,911},{5,0,867},{5,0,1013},{7,0,2034},{8,0,798}
+,{136,0,813},{7,11,439},{10,11,727},{11,11,260},{139,11,684},{136,10,625},{5,11,
+208},{7,11,753},{135,11,1528},{5,0,461},{7,0,1925},{12,0,39},{13,0,265},{13,0,
+439},{134,10,76},{6,0,853},{8,10,92},{137,10,221},{5,0,135},{6,0,519},{7,0,1722}
+,{10,0,271},{11,0,261},{145,0,54},{139,11,814},{14,0,338},{148,0,81},{4,0,300},{
+133,0,436},{5,0,419},{5,0,687},{7,0,864},{9,0,470},{135,11,864},{9,0,836},{133,
+11,242},{134,0,1937},{4,10,763},{133,11,953},{132,10,622},{132,0,393},{133,10,
+253},{8,0,357},{10,0,745},{14,0,426},{17,0,94},{19,0,57},{135,10,546},{5,11,615}
+,{146,11,37},{9,10,73},{10,10,110},{14,10,185},{145,10,119},{11,0,703},{7,10,624
+},{7,10,916},{10,10,256},{139,10,87},{133,11,290},{5,10,212},{12,10,35},{141,10,
+382},{132,11,380},{5,11,52},{7,11,277},{9,11,368},{139,11,791},{133,0,387},{10,
+11,138},{139,11,476},{4,0,6},{5,0,708},{136,0,75},{7,0,1351},{9,0,581},{10,0,639
+},{11,0,453},{140,0,584},{132,0,303},{138,0,772},{135,10,1175},{4,0,749},{5,10,
+816},{6,11,256},{7,11,307},{7,11,999},{7,11,1481},{7,11,1732},{7,11,1738},{8,11,
+265},{9,11,414},{11,11,316},{12,11,52},{13,11,420},{147,11,100},{135,11,1296},{6
+,0,1065},{5,10,869},{5,10,968},{6,10,1626},{8,10,734},{136,10,784},{4,10,542},{6
+,10,1716},{6,10,1727},{7,10,1082},{7,10,1545},{8,10,56},{8,10,118},{8,10,412},{8
+,10,564},{9,10,888},{9,10,908},{10,10,50},{10,10,423},{11,10,685},{11,10,697},{
+11,10,933},{12,10,299},{13,10,126},{13,10,136},{13,10,170},{141,10,190},{134,0,
+226},{4,0,106},{7,0,310},{11,0,717},{133,11,723},{5,0,890},{5,0,988},{4,10,232},
+{9,10,202},{10,10,474},{140,10,433},{6,0,626},{142,0,431},{10,0,706},{150,0,44},
+{13,0,51},{6,10,108},{7,10,1003},{7,10,1181},{8,10,111},{136,10,343},{132,0,698}
+,{5,11,109},{6,11,1784},{7,11,1895},{12,11,296},{140,11,302},{134,0,828},{134,10
+,1712},{138,0,17},{7,0,1929},{4,10,133},{5,11,216},{7,10,711},{7,10,1298},{7,10,
+1585},{7,11,1879},{9,11,141},{9,11,270},{9,11,679},{10,11,159},{10,11,553},{11,
+11,197},{11,11,438},{12,11,538},{12,11,559},{13,11,193},{13,11,423},{14,11,144},
+{14,11,166},{14,11,167},{15,11,67},{147,11,84},{141,11,127},{7,11,1872},{137,11,
+81},{6,10,99},{7,10,1808},{145,10,57},{134,11,391},{5,0,689},{6,0,84},{7,0,1250}
+,{6,10,574},{7,10,428},{10,10,669},{11,10,485},{11,10,840},{12,10,300},{142,10,
+250},{7,11,322},{136,11,249},{7,11,432},{135,11,1649},{135,10,1871},{137,10,252}
+,{6,11,155},{140,11,234},{7,0,871},{19,0,27},{147,11,27},{140,0,498},{5,0,986},{
+6,0,130},{138,0,823},{6,0,1793},{7,0,1582},{8,0,458},{10,0,101},{10,0,318},{10,0
+,945},{12,0,734},{16,0,104},{18,0,177},{6,10,323},{135,10,1564},{5,11,632},{138,
+11,526},{10,0,435},{7,10,461},{136,10,775},{6,11,144},{7,11,948},{7,11,1042},{7,
+11,1857},{8,11,235},{8,11,461},{9,11,453},{9,11,530},{10,11,354},{17,11,77},{19,
+11,99},{148,11,79},{138,0,966},{7,0,1644},{137,0,129},{135,0,997},{136,0,502},{5
+,11,196},{6,11,486},{7,11,212},{8,11,309},{136,11,346},{7,10,727},{146,10,73},{
+132,0,823},{132,11,686},{135,0,1927},{4,0,762},{7,0,1756},{137,0,98},{136,10,577
+},{24,0,8},{4,11,30},{5,11,43},{152,11,8},{7,0,1046},{139,0,160},{7,0,492},{4,10
+,413},{5,10,677},{7,11,492},{8,10,432},{140,10,280},{6,0,45},{7,0,433},{8,0,129}
+,{9,0,21},{10,0,392},{11,0,79},{12,0,499},{13,0,199},{141,0,451},{7,0,558},{136,
+0,353},{4,11,220},{7,11,1535},{9,11,93},{139,11,474},{7,10,646},{7,10,1730},{11,
+10,446},{141,10,178},{133,0,785},{134,0,1145},{8,0,81},{9,0,189},{9,0,201},{11,0
+,478},{11,0,712},{141,0,338},{5,0,353},{151,0,26},{11,0,762},{132,10,395},{134,0
+,2024},{4,0,611},{133,0,606},{9,10,174},{10,10,164},{11,10,440},{11,10,841},{143
+,10,98},{134,10,426},{10,10,608},{139,10,1002},{138,10,250},{6,0,25},{7,0,855},{
+7,0,1258},{144,0,32},{7,11,1725},{138,11,393},{5,11,263},{134,11,414},{6,0,2011}
+,{133,10,476},{4,0,4},{7,0,1118},{7,0,1320},{7,0,1706},{8,0,277},{9,0,622},{10,0
+,9},{11,0,724},{12,0,350},{12,0,397},{13,0,28},{13,0,159},{15,0,89},{18,0,5},{19
+,0,9},{20,0,34},{22,0,47},{6,11,178},{6,11,1750},{8,11,251},{9,11,690},{10,11,
+155},{10,11,196},{10,11,373},{11,11,698},{13,11,155},{148,11,93},{5,11,97},{137,
+11,393},{7,0,764},{11,0,461},{12,0,172},{5,10,76},{6,10,458},{6,10,497},{7,10,
+868},{9,10,658},{10,10,594},{11,10,566},{12,10,338},{141,10,200},{134,0,1449},{
+138,11,40},{134,11,1639},{134,0,1445},{6,0,1168},{4,10,526},{7,10,1029},{135,10,
+1054},{4,11,191},{7,11,934},{8,11,647},{145,11,97},{132,10,636},{6,0,233},{7,10,
+660},{7,10,1124},{17,10,31},{19,10,22},{151,10,14},{6,10,1699},{136,11,110},{12,
+11,246},{15,11,162},{19,11,64},{20,11,8},{20,11,95},{22,11,24},{152,11,17},{5,11
+,165},{9,11,346},{138,11,655},{5,11,319},{135,11,534},{134,0,255},{9,0,216},{8,
+11,128},{139,11,179},{9,0,183},{139,0,286},{11,0,956},{151,0,3},{4,0,536},{7,0,
+1141},{10,0,723},{139,0,371},{4,10,279},{7,10,301},{137,10,362},{7,0,285},{5,11,
+57},{6,11,101},{6,11,1663},{7,11,132},{7,11,1048},{7,11,1154},{7,11,1415},{7,11,
+1507},{12,11,493},{15,11,105},{151,11,15},{5,11,459},{7,11,1073},{7,10,1743},{8,
+11,241},{136,11,334},{4,10,178},{133,10,399},{135,0,560},{132,0,690},{135,0,1246
+},{18,0,157},{147,0,63},{10,0,599},{11,0,33},{12,0,571},{149,0,1},{6,11,324},{6,
+11,520},{7,11,338},{7,11,1616},{7,11,1729},{8,11,228},{9,11,69},{139,11,750},{7,
+0,1862},{12,0,491},{12,0,520},{13,0,383},{142,0,244},{135,11,734},{134,10,1692},
+{10,0,448},{11,0,630},{17,0,117},{6,10,202},{7,11,705},{12,10,360},{17,10,118},{
+18,10,27},{148,10,67},{4,11,73},{6,11,612},{7,11,927},{7,11,1822},{8,11,217},{9,
+11,472},{9,11,765},{9,11,766},{10,11,408},{11,11,51},{11,11,793},{12,11,266},{15
+,11,158},{20,11,89},{150,11,32},{4,0,190},{133,0,554},{133,0,1001},{5,11,389},{8
+,11,636},{137,11,229},{5,0,446},{7,10,872},{10,10,516},{139,10,167},{137,10,313}
+,{132,10,224},{134,0,1313},{5,10,546},{7,10,35},{8,10,11},{8,10,12},{9,10,315},{
+9,10,533},{10,10,802},{11,10,166},{12,10,525},{142,10,243},{6,0,636},{137,0,837}
+,{5,10,241},{8,10,242},{9,10,451},{10,10,667},{11,10,598},{140,10,429},{22,10,46
+},{150,11,46},{136,11,472},{11,0,278},{142,0,73},{141,11,185},{132,0,868},{134,0
+,972},{4,10,366},{137,10,516},{138,0,1010},{5,11,189},{6,10,1736},{7,11,442},{7,
+11,443},{8,11,281},{12,11,174},{13,11,83},{141,11,261},{139,11,384},{6,11,2},{7,
+11,191},{7,11,446},{7,11,758},{7,11,1262},{7,11,1737},{8,11,22},{8,11,270},{8,11
+,612},{9,11,4},{9,11,167},{9,11,312},{9,11,436},{10,11,156},{10,11,216},{10,11,
+311},{10,11,623},{11,11,72},{11,11,330},{11,11,455},{12,11,101},{12,11,321},{12,
+11,504},{12,11,530},{12,11,543},{13,11,17},{13,11,156},{13,11,334},{14,11,48},{
+15,11,70},{17,11,60},{148,11,64},{6,10,331},{136,10,623},{135,0,1231},{132,0,304
+},{6,11,60},{7,11,670},{7,11,1327},{8,11,411},{8,11,435},{9,11,653},{9,11,740},{
+10,11,385},{11,11,222},{11,11,324},{11,11,829},{140,11,611},{7,0,506},{6,11,166}
+,{7,11,374},{135,11,1174},{14,11,43},{146,11,21},{135,11,1694},{135,10,1888},{5,
+11,206},{134,11,398},{135,11,50},{150,0,26},{6,0,53},{6,0,199},{7,0,1408},{8,0,
+32},{8,0,93},{10,0,397},{10,0,629},{11,0,593},{11,0,763},{13,0,326},{145,0,35},{
+134,0,105},{132,10,394},{4,0,843},{138,0,794},{11,0,704},{141,0,396},{5,0,114},{
+5,0,255},{141,0,285},{6,0,619},{7,0,898},{7,0,1092},{8,0,485},{18,0,28},{19,0,
+116},{135,10,1931},{9,0,145},{7,10,574},{135,10,1719},{7,0,2035},{8,0,19},{9,0,
+89},{138,0,831},{132,10,658},{6,11,517},{7,11,1159},{10,11,621},{139,11,192},{7,
+0,1933},{7,11,1933},{9,10,781},{10,10,144},{11,10,385},{13,10,161},{13,10,228},{
+13,10,268},{148,10,107},{136,10,374},{10,11,223},{139,11,645},{135,0,1728},{7,11
+,64},{7,11,289},{136,11,245},{4,10,344},{6,10,498},{139,10,323},{136,0,746},{135
+,10,1063},{137,10,155},{4,0,987},{6,0,1964},{6,0,1974},{6,0,1990},{136,0,995},{
+133,11,609},{133,10,906},{134,0,1550},{134,0,874},{5,11,129},{6,11,61},{135,11,
+947},{4,0,1018},{6,0,1938},{6,0,2021},{134,0,2039},{132,0,814},{11,0,126},{139,0
+,287},{134,0,1264},{5,0,955},{136,0,814},{141,11,506},{132,11,314},{6,0,981},{
+139,11,1000},{5,0,56},{8,0,892},{8,0,915},{140,0,776},{148,0,100},{10,0,4},{10,0
+,13},{11,0,638},{148,0,57},{148,11,74},{5,0,738},{132,10,616},{133,11,637},{136,
+10,692},{133,0,758},{132,10,305},{137,11,590},{5,11,280},{135,11,1226},{134,11,
+494},{135,0,1112},{133,11,281},{13,0,44},{14,0,214},{5,10,214},{7,10,603},{8,10,
+611},{9,10,686},{10,10,88},{11,10,459},{11,10,496},{12,10,463},{140,10,590},{139
+,0,328},{135,11,1064},{137,0,133},{7,0,168},{13,0,196},{141,0,237},{134,10,1703}
+,{134,0,1152},{135,0,1245},{5,0,110},{6,0,169},{6,0,1702},{7,0,400},{8,0,538},{9
+,0,184},{9,0,524},{140,0,218},{6,0,1816},{10,0,871},{12,0,769},{140,0,785},{132,
+11,630},{7,11,33},{7,11,120},{8,11,489},{9,11,319},{10,11,820},{11,11,1004},{12,
+11,379},{13,11,117},{13,11,412},{14,11,25},{15,11,52},{15,11,161},{16,11,47},{
+149,11,2},{6,0,133},{8,0,413},{9,0,353},{139,0,993},{145,10,19},{4,11,937},{133,
+11,801},{134,0,978},{6,0,93},{6,0,1508},{7,0,1422},{7,0,1851},{8,0,673},{9,0,529
+},{140,0,43},{6,0,317},{10,0,512},{4,10,737},{11,10,294},{12,10,60},{12,10,437},
+{13,10,64},{13,10,380},{142,10,430},{9,0,371},{7,11,1591},{144,11,43},{6,10,1758
+},{8,10,520},{9,10,345},{9,10,403},{142,10,350},{5,0,526},{10,10,242},{138,10,
+579},{9,0,25},{10,0,467},{138,0,559},{5,10,139},{7,10,1168},{138,10,539},{4,0,
+335},{135,0,942},{140,0,754},{132,11,365},{11,0,182},{142,0,195},{142,11,29},{5,
+11,7},{139,11,774},{4,11,746},{135,11,1090},{8,0,39},{10,0,773},{11,0,84},{12,0,
+205},{142,0,1},{5,0,601},{5,0,870},{5,11,360},{136,11,237},{132,0,181},{136,0,
+370},{134,0,1652},{8,0,358},{4,10,107},{7,10,613},{8,10,439},{8,10,504},{9,10,
+501},{10,10,383},{139,10,477},{132,10,229},{137,11,785},{4,0,97},{5,0,147},{6,0,
+286},{7,0,1362},{141,0,176},{6,0,537},{7,0,788},{7,0,1816},{132,10,903},{140,10,
+71},{6,0,743},{134,0,1223},{6,0,375},{7,0,169},{7,0,254},{8,0,780},{135,11,1493}
+,{7,0,1714},{4,10,47},{6,10,373},{7,10,452},{7,10,543},{7,10,1856},{9,10,6},{11,
+10,257},{139,10,391},{6,0,896},{136,0,1003},{135,0,1447},{137,11,341},{5,10,980}
+,{134,10,1754},{145,11,22},{4,11,277},{5,11,608},{6,11,493},{7,11,457},{140,11,
+384},{7,10,536},{7,10,1331},{136,10,143},{140,0,744},{7,11,27},{135,11,316},{18,
+0,126},{5,10,19},{134,10,533},{4,0,788},{11,0,41},{5,11,552},{5,11,586},{5,11,
+676},{6,11,448},{8,11,244},{11,11,1},{11,11,41},{13,11,3},{16,11,54},{17,11,4},{
+146,11,13},{4,0,985},{6,0,1801},{4,11,401},{137,11,264},{5,10,395},{5,10,951},{
+134,10,1776},{5,0,629},{135,0,1549},{11,10,663},{12,10,210},{13,10,166},{13,10,
+310},{14,10,373},{147,10,43},{9,11,543},{10,11,524},{11,11,30},{12,11,524},{14,
+11,315},{16,11,18},{20,11,26},{148,11,65},{4,11,205},{5,11,623},{7,11,104},{136,
+11,519},{5,0,293},{134,0,601},{7,11,579},{9,11,41},{9,11,244},{9,11,669},{10,11,
+5},{11,11,861},{11,11,951},{139,11,980},{132,11,717},{132,10,695},{7,10,497},{9,
+10,387},{147,10,81},{132,0,420},{142,0,37},{6,0,1134},{6,0,1900},{12,0,830},{12,
+0,878},{12,0,894},{15,0,221},{143,0,245},{132,11,489},{7,0,1570},{140,0,542},{8,
+0,933},{136,0,957},{6,0,1371},{7,0,31},{8,0,373},{5,10,284},{6,10,49},{6,10,350}
+,{7,10,377},{7,10,1693},{8,10,678},{9,10,161},{9,10,585},{9,10,671},{9,10,839},{
+11,10,912},{141,10,427},{135,11,892},{4,0,325},{138,0,125},{139,11,47},{132,10,
+597},{138,0,323},{6,0,1547},{7,11,1605},{9,11,473},{11,11,962},{146,11,139},{139
+,10,908},{7,11,819},{9,11,26},{9,11,392},{10,11,152},{10,11,226},{11,11,19},{12,
+11,276},{12,11,426},{12,11,589},{13,11,460},{15,11,97},{19,11,48},{148,11,104},{
+135,11,51},{4,0,718},{135,0,1216},{6,0,1896},{6,0,1905},{6,0,1912},{9,0,947},{9,
+0,974},{12,0,809},{12,0,850},{12,0,858},{12,0,874},{12,0,887},{12,0,904},{12,0,
+929},{12,0,948},{12,0,952},{15,0,198},{15,0,206},{15,0,220},{15,0,227},{15,0,247
+},{18,0,188},{21,0,48},{21,0,50},{24,0,25},{24,0,29},{7,11,761},{7,11,1051},{137
+,11,545},{5,0,124},{5,0,144},{6,0,548},{7,0,15},{7,0,153},{137,0,629},{135,11,
+606},{135,10,2014},{7,10,2007},{9,11,46},{9,10,101},{9,10,450},{10,10,66},{10,10
+,842},{11,10,536},{140,10,587},{6,0,75},{7,0,1531},{8,0,416},{9,0,240},{9,0,275}
+,{10,0,100},{11,0,658},{11,0,979},{12,0,86},{14,0,207},{15,0,20},{143,0,25},{5,0
+,141},{5,0,915},{6,0,1783},{7,0,211},{7,0,698},{7,0,1353},{9,0,83},{9,0,281},{10
+,0,376},{10,0,431},{11,0,543},{12,0,664},{13,0,280},{13,0,428},{14,0,61},{14,0,
+128},{17,0,52},{145,0,81},{132,11,674},{135,0,533},{149,0,6},{132,11,770},{133,0
+,538},{5,11,79},{7,11,1027},{7,11,1477},{139,11,52},{139,10,62},{4,0,338},{133,0
+,400},{5,11,789},{134,11,195},{4,11,251},{4,11,688},{7,11,513},{7,11,1284},{9,11
+,87},{138,11,365},{134,10,1766},{6,0,0},{7,0,84},{11,0,895},{145,0,11},{139,0,
+892},{4,0,221},{5,0,659},{7,0,697},{7,0,1211},{138,0,284},{133,0,989},{133,11,
+889},{4,11,160},{5,11,330},{7,11,1434},{136,11,174},{6,10,1665},{7,10,256},{7,10
+,1388},{10,10,499},{139,10,670},{7,0,848},{4,10,22},{5,10,10},{136,10,97},{138,0
+,507},{133,10,481},{4,0,188},{135,0,805},{5,0,884},{6,0,732},{139,0,991},{135,11
+,968},{11,11,636},{15,11,145},{17,11,34},{19,11,50},{151,11,20},{7,0,959},{16,0,
+60},{6,10,134},{7,10,437},{9,10,37},{14,10,285},{142,10,371},{7,10,486},{8,10,
+155},{11,10,93},{140,10,164},{134,0,1653},{7,0,337},{133,10,591},{6,0,1989},{8,0
+,922},{8,0,978},{133,11,374},{132,0,638},{138,0,500},{133,11,731},{5,10,380},{5,
+10,650},{136,10,310},{138,11,381},{4,10,364},{7,10,1156},{7,10,1187},{137,10,409
+},{137,11,224},{140,0,166},{134,10,482},{4,11,626},{5,11,642},{6,11,425},{10,11,
+202},{139,11,141},{4,10,781},{6,10,487},{7,10,926},{8,10,263},{139,10,500},{135,
+0,418},{4,10,94},{135,10,1265},{136,0,760},{132,10,417},{136,11,835},{5,10,348},
+{134,10,522},{6,0,1277},{134,0,1538},{139,11,541},{135,11,1597},{5,11,384},{8,11
+,455},{140,11,48},{136,0,770},{5,11,264},{134,11,184},{4,0,89},{5,0,489},{6,0,
+315},{7,0,553},{7,0,1745},{138,0,243},{4,10,408},{4,10,741},{135,10,500},{134,0,
+1396},{133,0,560},{6,0,1658},{9,0,3},{10,0,154},{11,0,641},{13,0,85},{13,0,201},
+{141,0,346},{135,11,1595},{5,11,633},{6,11,28},{7,11,219},{135,11,1323},{9,11,
+769},{140,11,185},{135,11,785},{7,11,359},{8,11,243},{140,11,175},{138,0,586},{7
+,0,1271},{134,10,73},{132,11,105},{4,0,166},{5,0,505},{134,0,1670},{133,10,576},
+{4,11,324},{138,11,104},{142,10,231},{6,0,637},{7,10,1264},{7,10,1678},{11,10,
+945},{12,10,341},{12,10,471},{12,10,569},{23,11,21},{151,11,23},{8,11,559},{141,
+11,109},{134,0,1947},{7,0,445},{8,0,307},{8,0,704},{10,0,41},{10,0,439},{11,0,
+237},{11,0,622},{140,0,201},{135,11,963},{135,0,1977},{4,0,189},{5,0,713},{136,0
+,57},{138,0,371},{135,10,538},{132,0,552},{6,0,883},{133,10,413},{6,0,923},{132,
+11,758},{138,11,215},{136,10,495},{7,10,54},{8,10,312},{10,10,191},{10,10,614},{
+140,10,567},{7,11,351},{139,11,128},{7,0,875},{6,10,468},{7,10,1478},{8,10,530},
+{142,10,290},{135,0,1788},{17,0,49},{133,11,918},{12,11,398},{20,11,39},{21,11,
+11},{150,11,41},{10,0,661},{6,10,484},{135,10,822},{135,0,1945},{134,0,794},{137
+,10,900},{135,10,1335},{6,10,1724},{135,10,2022},{132,11,340},{134,0,1135},{4,0,
+784},{133,0,745},{5,0,84},{134,0,163},{133,0,410},{4,0,976},{5,11,985},{7,11,509
+},{7,11,529},{145,11,96},{132,10,474},{134,0,703},{135,11,1919},{5,0,322},{8,0,
+186},{9,0,262},{10,0,187},{142,0,208},{135,10,1504},{133,0,227},{9,0,560},{13,0,
+208},{133,10,305},{132,11,247},{7,0,1395},{8,0,486},{9,0,236},{9,0,878},{10,0,
+218},{11,0,95},{19,0,17},{147,0,31},{7,0,2043},{8,0,672},{141,0,448},{4,11,184},
+{5,11,390},{6,11,337},{7,11,23},{7,11,494},{7,11,618},{7,11,1456},{8,11,27},{8,
+11,599},{10,11,153},{139,11,710},{135,0,466},{135,10,1236},{6,0,167},{7,0,186},{
+7,0,656},{10,0,643},{4,10,480},{6,10,302},{6,10,1642},{7,10,837},{7,10,1547},{7,
+10,1657},{8,10,429},{9,10,228},{13,10,289},{13,10,343},{147,10,101},{134,0,1428}
+,{134,0,1440},{5,0,412},{7,10,278},{10,10,739},{11,10,708},{141,10,348},{134,0,
+1118},{136,0,562},{148,11,46},{9,0,316},{139,0,256},{134,0,1771},{135,0,1190},{
+137,0,132},{10,11,227},{11,11,497},{11,11,709},{140,11,415},{143,0,66},{6,11,360
+},{7,11,1664},{136,11,478},{144,10,28},{4,0,317},{135,0,1279},{5,0,63},{133,0,
+509},{136,11,699},{145,10,36},{134,0,1475},{11,11,343},{142,11,127},{132,11,739}
+,{132,0,288},{135,11,1757},{8,0,89},{8,0,620},{9,0,608},{11,0,628},{12,0,322},{
+143,0,124},{134,0,1225},{7,0,1189},{4,11,67},{5,11,422},{6,10,363},{7,11,1037},{
+7,11,1289},{7,11,1555},{7,10,1955},{8,10,725},{9,11,741},{145,11,108},{134,0,
+1468},{6,0,689},{134,0,1451},{138,0,120},{151,0,1},{137,10,805},{142,0,329},{5,
+10,813},{135,10,2046},{135,0,226},{138,11,96},{7,0,1855},{5,10,712},{11,10,17},{
+13,10,321},{144,10,67},{9,0,461},{6,10,320},{7,10,781},{7,10,1921},{9,10,55},{10
+,10,186},{10,10,273},{10,10,664},{10,10,801},{11,10,996},{11,10,997},{13,10,157}
+,{142,10,170},{8,11,203},{8,10,271},{11,11,823},{11,11,846},{12,11,482},{13,11,
+133},{13,11,277},{13,11,302},{13,11,464},{14,11,205},{142,11,221},{135,0,1346},{
+4,11,449},{133,11,718},{134,0,85},{14,0,299},{7,10,103},{7,10,863},{11,10,184},{
+145,10,62},{4,11,355},{6,11,311},{9,11,256},{138,11,404},{137,10,659},{138,11,
+758},{133,11,827},{5,11,64},{140,11,581},{134,0,1171},{4,11,442},{7,11,1047},{7,
+11,1352},{135,11,1643},{132,0,980},{5,11,977},{6,11,288},{7,11,528},{135,11,1065
+},{5,0,279},{6,0,235},{7,0,468},{8,0,446},{9,0,637},{10,0,717},{11,0,738},{140,0
+,514},{132,0,293},{11,10,337},{142,10,303},{136,11,285},{5,0,17},{6,0,371},{9,0,
+528},{12,0,364},{132,11,254},{5,10,77},{7,10,1455},{10,10,843},{147,10,73},{150,
+0,5},{132,10,458},{6,11,12},{7,11,1219},{145,11,73},{135,10,1420},{6,10,109},{
+138,10,382},{135,11,125},{6,10,330},{7,10,1084},{139,10,142},{6,11,369},{6,11,
+502},{7,11,1036},{8,11,348},{9,11,452},{10,11,26},{11,11,224},{11,11,387},{11,11
+,772},{12,11,95},{12,11,629},{13,11,195},{13,11,207},{13,11,241},{14,11,260},{14
+,11,270},{143,11,140},{132,11,269},{5,11,480},{7,11,532},{7,11,1197},{7,11,1358}
+,{8,11,291},{11,11,349},{142,11,396},{150,0,48},{10,0,601},{13,0,353},{141,0,376
+},{5,0,779},{5,0,807},{6,0,1655},{134,0,1676},{142,11,223},{4,0,196},{5,0,558},{
+133,0,949},{148,11,15},{135,11,1764},{134,0,1322},{132,0,752},{139,0,737},{135,
+11,657},{136,11,533},{135,0,412},{4,0,227},{5,0,159},{5,0,409},{7,0,80},{8,0,556
+},{10,0,479},{12,0,418},{14,0,50},{14,0,123},{14,0,192},{14,0,249},{14,0,295},{
+143,0,27},{7,0,1470},{8,0,66},{8,0,137},{8,0,761},{9,0,638},{11,0,80},{11,0,212}
+,{11,0,368},{11,0,418},{12,0,8},{13,0,15},{16,0,61},{17,0,59},{19,0,28},{148,0,
+84},{135,10,1985},{4,11,211},{4,11,332},{5,11,335},{6,11,238},{7,11,269},{7,11,
+811},{7,11,1797},{8,10,122},{8,11,836},{9,11,507},{141,11,242},{6,0,683},{134,0,
+1252},{4,0,873},{132,10,234},{134,0,835},{6,0,38},{7,0,1220},{8,0,185},{8,0,256}
+,{9,0,22},{9,0,331},{10,0,738},{11,0,205},{11,0,540},{11,0,746},{13,0,465},{14,0
+,88},{142,0,194},{138,0,986},{5,11,1009},{12,11,582},{146,11,131},{4,0,159},{6,0
+,115},{7,0,252},{7,0,257},{7,0,1928},{8,0,69},{9,0,384},{10,0,91},{10,0,615},{12
+,0,375},{14,0,235},{18,0,117},{147,0,123},{133,0,911},{136,0,278},{5,10,430},{5,
+10,932},{6,10,131},{7,10,417},{9,10,522},{11,10,314},{141,10,390},{14,10,149},{
+14,10,399},{143,10,57},{4,0,151},{7,0,1567},{136,0,749},{5,11,228},{6,11,203},{7
+,11,156},{8,11,347},{137,11,265},{132,10,507},{10,0,989},{140,0,956},{133,0,990}
+,{5,0,194},{6,0,927},{7,0,1662},{9,0,90},{140,0,564},{4,10,343},{133,10,511},{
+133,0,425},{7,10,455},{138,10,591},{4,0,774},{7,11,476},{7,11,1592},{138,11,87},
+{5,0,971},{135,10,1381},{5,11,318},{147,11,121},{5,11,291},{7,11,765},{9,11,389}
+,{140,11,548},{134,10,575},{4,0,827},{12,0,646},{12,0,705},{12,0,712},{140,0,714
+},{139,0,752},{137,0,662},{5,0,72},{6,0,264},{7,0,21},{7,0,46},{7,0,2013},{8,0,
+215},{8,0,513},{10,0,266},{139,0,22},{139,11,522},{6,0,239},{7,0,118},{10,0,95},
+{11,0,603},{13,0,443},{14,0,160},{143,0,4},{6,0,431},{134,0,669},{7,10,1127},{7,
+10,1572},{10,10,297},{10,10,422},{11,10,764},{11,10,810},{12,10,264},{13,10,102}
+,{13,10,300},{13,10,484},{14,10,147},{14,10,229},{17,10,71},{18,10,118},{147,10,
+120},{5,0,874},{6,0,1677},{15,0,0},{10,11,525},{139,11,82},{6,0,65},{7,0,939},{7
+,0,1172},{7,0,1671},{9,0,540},{10,0,696},{11,0,265},{11,0,732},{11,0,928},{11,0,
+937},{141,0,438},{134,0,1350},{136,11,547},{132,11,422},{5,11,355},{145,11,0},{
+137,11,905},{5,0,682},{135,0,1887},{132,0,809},{4,0,696},{133,11,865},{6,0,1074}
+,{6,0,1472},{14,10,35},{142,10,191},{5,11,914},{134,11,1625},{133,11,234},{135,
+11,1383},{137,11,780},{132,10,125},{4,0,726},{133,0,630},{8,0,802},{136,0,838},{
+132,10,721},{6,0,1337},{7,0,776},{19,0,56},{136,10,145},{132,0,970},{7,10,792},{
+8,10,147},{10,10,821},{139,10,1021},{139,10,970},{8,0,940},{137,0,797},{135,11,
+1312},{9,0,248},{10,0,400},{7,11,816},{7,11,1241},{7,10,1999},{9,11,283},{9,11,
+520},{10,11,213},{10,11,307},{10,11,463},{10,11,671},{10,11,746},{11,11,401},{11
+,11,794},{12,11,517},{18,11,107},{147,11,115},{6,0,1951},{134,0,2040},{135,11,
+339},{13,0,41},{15,0,93},{5,10,168},{5,10,930},{8,10,74},{9,10,623},{12,10,500},
+{140,10,579},{6,0,118},{7,0,215},{7,0,1521},{140,0,11},{6,10,220},{7,10,1101},{
+141,10,105},{6,11,421},{7,11,61},{7,11,1540},{10,11,11},{138,11,501},{7,0,615},{
+138,0,251},{140,11,631},{135,0,1044},{6,10,19},{7,10,1413},{139,10,428},{133,0,
+225},{7,10,96},{8,10,401},{8,10,703},{137,10,896},{145,10,116},{6,11,102},{7,11,
+72},{15,11,142},{147,11,67},{7,10,1961},{7,10,1965},{8,10,702},{136,10,750},{7,
+10,2030},{8,10,150},{8,10,737},{12,10,366},{151,11,30},{4,0,370},{5,0,756},{7,0,
+1326},{135,11,823},{8,10,800},{9,10,148},{9,10,872},{9,10,890},{11,10,309},{11,
+10,1001},{13,10,267},{141,10,323},{6,0,1662},{7,0,48},{8,0,771},{10,0,116},{13,0
+,104},{14,0,105},{14,0,184},{15,0,168},{19,0,92},{148,0,68},{10,0,209},{135,11,
+1870},{7,11,68},{8,11,48},{8,11,88},{8,11,582},{8,11,681},{9,11,373},{9,11,864},
+{11,11,157},{11,11,336},{11,11,843},{148,11,27},{134,0,930},{4,11,88},{5,11,137}
+,{5,11,174},{5,11,777},{6,11,1664},{6,11,1725},{7,11,77},{7,11,426},{7,11,1317},
+{7,11,1355},{8,11,126},{8,11,563},{9,11,523},{9,11,750},{10,11,310},{10,11,836},
+{11,11,42},{11,11,318},{11,11,731},{12,11,68},{12,11,92},{12,11,507},{12,11,692}
+,{13,11,81},{13,11,238},{13,11,374},{18,11,138},{19,11,78},{19,11,111},{20,11,55
+},{20,11,77},{148,11,92},{4,11,938},{135,11,1831},{5,10,547},{7,10,424},{8,11,
+617},{138,11,351},{6,0,1286},{6,11,1668},{7,11,1499},{8,11,117},{9,11,314},{138,
+11,174},{6,0,759},{6,0,894},{7,11,707},{139,11,563},{4,0,120},{135,0,1894},{9,0,
+385},{149,0,17},{138,0,429},{133,11,403},{5,0,820},{135,0,931},{10,0,199},{133,
+10,133},{6,0,151},{6,0,1675},{7,0,383},{151,0,10},{6,0,761},{136,10,187},{8,0,
+365},{10,10,0},{10,10,818},{139,10,988},{4,11,44},{5,11,311},{6,11,156},{7,11,
+639},{7,11,762},{7,11,1827},{9,11,8},{9,11,462},{148,11,83},{4,11,346},{7,11,115
+},{9,11,180},{9,11,456},{138,11,363},{136,10,685},{7,0,1086},{145,0,46},{6,0,
+1624},{11,0,11},{12,0,422},{13,0,444},{142,0,360},{6,0,1020},{6,0,1260},{134,0,
+1589},{4,0,43},{5,0,344},{5,0,357},{14,0,472},{150,0,58},{6,0,1864},{6,0,1866},{
+6,0,1868},{6,0,1869},{6,0,1874},{6,0,1877},{6,0,1903},{6,0,1911},{9,0,920},{9,0,
+921},{9,0,924},{9,0,946},{9,0,959},{9,0,963},{9,0,970},{9,0,997},{9,0,1008},{9,0
+,1017},{12,0,795},{12,0,797},{12,0,798},{12,0,800},{12,0,803},{12,0,811},{12,0,
+820},{12,0,821},{12,0,839},{12,0,841},{12,0,848},{12,0,911},{12,0,921},{12,0,922
+},{12,0,925},{12,0,937},{12,0,944},{12,0,945},{12,0,953},{15,0,184},{15,0,191},{
+15,0,199},{15,0,237},{15,0,240},{15,0,243},{15,0,246},{18,0,203},{21,0,40},{21,0
+,52},{21,0,57},{24,0,23},{24,0,28},{152,0,30},{134,0,725},{145,11,58},{133,0,888
+},{137,10,874},{4,0,711},{8,10,774},{10,10,670},{140,10,51},{144,11,40},{6,11,
+185},{7,11,1899},{139,11,673},{137,10,701},{137,0,440},{4,11,327},{5,11,478},{7,
+11,1332},{8,11,753},{140,11,227},{4,10,127},{5,10,350},{6,10,356},{8,10,426},{9,
+10,572},{10,10,247},{139,10,312},{5,11,1020},{133,11,1022},{4,11,103},{133,11,
+401},{6,0,1913},{6,0,1926},{6,0,1959},{9,0,914},{9,0,939},{9,0,952},{9,0,979},{9
+,0,990},{9,0,998},{9,0,1003},{9,0,1023},{12,0,827},{12,0,834},{12,0,845},{12,0,
+912},{12,0,935},{12,0,951},{15,0,172},{15,0,174},{18,0,198},{149,0,63},{5,0,958}
+,{5,0,987},{4,11,499},{135,11,1421},{7,0,885},{6,10,59},{6,10,1762},{9,10,603},{
+141,10,397},{10,11,62},{141,11,164},{4,0,847},{135,0,326},{11,0,276},{142,0,293}
+,{4,0,65},{5,0,479},{5,0,1004},{7,0,1913},{8,0,317},{9,0,302},{10,0,612},{13,0,
+22},{132,11,96},{4,0,261},{135,0,510},{135,0,1514},{6,10,111},{7,10,4},{8,10,163
+},{8,10,776},{138,10,566},{4,0,291},{9,0,515},{12,0,152},{12,0,443},{13,0,392},{
+142,0,357},{7,11,399},{135,11,1492},{4,0,589},{139,0,282},{6,11,563},{135,10,
+1994},{5,10,297},{135,10,1038},{4,0,130},{7,0,843},{135,0,1562},{5,0,42},{5,0,
+879},{7,0,245},{7,0,324},{7,0,1532},{11,0,463},{11,0,472},{13,0,363},{144,0,52},
+{4,0,134},{133,0,372},{133,0,680},{136,10,363},{6,0,1997},{8,0,935},{136,0,977},
+{4,0,810},{135,0,1634},{135,10,1675},{7,0,1390},{4,11,910},{133,11,832},{7,10,
+808},{8,11,266},{139,11,578},{132,0,644},{4,0,982},{138,0,867},{132,10,280},{135
+,0,540},{140,10,54},{135,0,123},{134,0,1978},{4,10,421},{133,10,548},{6,0,623},{
+136,0,789},{4,0,908},{5,0,359},{5,0,508},{6,0,1723},{7,0,343},{7,0,1996},{135,0,
+2026},{134,0,1220},{4,0,341},{135,0,480},{6,10,254},{9,10,109},{138,10,103},{134
+,0,888},{8,11,528},{137,11,348},{7,0,1995},{8,0,299},{11,0,890},{12,0,674},{4,11
+,20},{133,11,616},{135,11,1094},{134,10,1630},{4,0,238},{5,0,503},{6,0,179},{7,0
+,2003},{8,0,381},{8,0,473},{9,0,149},{10,0,788},{15,0,45},{15,0,86},{20,0,110},{
+150,0,57},{133,10,671},{4,11,26},{5,11,429},{6,11,245},{7,11,704},{7,11,1379},{
+135,11,1474},{4,0,121},{5,0,156},{5,0,349},{9,0,431},{10,0,605},{142,0,342},{7,
+11,943},{139,11,614},{132,10,889},{132,11,621},{7,10,1382},{7,11,1382},{135,10,
+1910},{132,10,627},{133,10,775},{133,11,542},{133,11,868},{136,11,433},{6,0,1373
+},{7,0,1011},{11,10,362},{11,10,948},{140,10,388},{6,0,80},{7,0,173},{9,0,547},{
+10,0,730},{14,0,18},{22,0,39},{135,11,1495},{6,0,1694},{135,0,1974},{140,0,196},
+{4,0,923},{6,0,507},{6,0,1711},{7,10,451},{8,10,389},{12,10,490},{13,10,16},{13,
+10,215},{13,10,351},{18,10,132},{147,10,125},{6,0,646},{134,0,1047},{135,10,841}
+,{136,10,566},{6,0,1611},{135,0,1214},{139,0,926},{132,11,525},{132,0,595},{5,0,
+240},{6,0,459},{7,0,12},{7,0,114},{7,0,949},{7,0,1753},{7,0,1805},{8,0,658},{9,0
+,1},{11,0,959},{141,0,446},{5,10,912},{134,10,1695},{132,0,446},{7,11,62},{12,11
+,45},{147,11,112},{5,10,236},{6,10,572},{8,10,492},{11,10,618},{144,10,56},{5,10
+,190},{136,10,318},{135,10,1376},{4,11,223},{6,11,359},{11,11,3},{13,11,108},{14
+,11,89},{144,11,22},{132,11,647},{134,0,490},{134,0,491},{134,0,1584},{135,11,
+685},{138,11,220},{7,0,250},{136,0,507},{132,0,158},{4,0,140},{7,0,362},{8,0,209
+},{9,0,10},{9,0,160},{9,0,503},{9,0,614},{10,0,689},{11,0,327},{11,0,553},{11,0,
+725},{11,0,767},{12,0,252},{12,0,583},{13,0,192},{14,0,269},{14,0,356},{148,0,50
+},{19,0,1},{19,0,26},{150,0,9},{132,11,109},{6,0,228},{7,0,1341},{9,0,408},{138,
+0,343},{4,0,373},{5,0,283},{6,0,480},{7,0,609},{10,0,860},{138,0,878},{6,0,779},
+{134,0,1209},{4,0,557},{7,11,263},{7,11,628},{136,11,349},{132,0,548},{7,0,197},
+{8,0,142},{8,0,325},{9,0,150},{9,0,596},{10,0,350},{10,0,353},{11,0,74},{11,0,
+315},{12,0,662},{12,0,681},{14,0,423},{143,0,141},{4,11,40},{10,11,67},{11,11,
+117},{11,11,768},{139,11,935},{7,11,992},{8,11,301},{9,11,722},{12,11,63},{13,11
+,29},{14,11,161},{143,11,18},{6,0,1490},{138,11,532},{5,0,580},{7,0,378},{7,0,
+674},{7,0,1424},{15,0,83},{16,0,11},{15,11,83},{144,11,11},{6,0,1057},{6,0,1335}
+,{10,0,316},{7,10,85},{7,10,247},{8,10,585},{138,10,163},{4,0,169},{5,0,83},{6,0
+,399},{6,0,579},{6,0,1513},{7,0,692},{7,0,846},{7,0,1015},{7,0,1799},{8,0,403},{
+9,0,394},{10,0,133},{12,0,4},{12,0,297},{12,0,452},{16,0,81},{18,0,25},{21,0,14}
+,{22,0,12},{151,0,18},{134,0,1106},{7,0,1546},{11,0,299},{142,0,407},{134,0,1192
+},{132,0,177},{5,0,411},{135,0,653},{7,0,439},{10,0,727},{11,0,260},{139,0,684},
+{138,10,145},{147,10,83},{5,0,208},{7,0,753},{135,0,1528},{137,11,617},{135,10,
+1922},{135,11,825},{11,0,422},{13,0,389},{4,10,124},{10,10,457},{11,10,121},{11,
+10,169},{11,10,870},{12,10,214},{14,10,187},{143,10,77},{11,0,615},{15,0,58},{11
+,11,615},{143,11,58},{9,0,618},{138,0,482},{6,0,1952},{6,0,1970},{142,0,505},{7,
+10,1193},{135,11,1838},{133,0,242},{135,10,1333},{6,10,107},{7,10,638},{7,10,
+1632},{137,10,396},{133,0,953},{5,10,370},{134,10,1756},{5,11,28},{6,11,204},{10
+,11,320},{10,11,583},{13,11,502},{14,11,72},{14,11,274},{14,11,312},{14,11,344},
+{15,11,159},{16,11,62},{16,11,69},{17,11,30},{18,11,42},{18,11,53},{18,11,84},{
+18,11,140},{19,11,68},{19,11,85},{20,11,5},{20,11,45},{20,11,101},{22,11,7},{150
+,11,20},{4,11,558},{6,11,390},{7,11,162},{7,11,689},{9,11,360},{138,11,653},{11,
+0,802},{141,0,67},{133,10,204},{133,0,290},{5,10,970},{134,10,1706},{132,0,380},
+{5,0,52},{7,0,277},{9,0,368},{139,0,791},{5,11,856},{6,11,1672},{6,11,1757},{6,
+11,1781},{7,11,1150},{7,11,1425},{7,11,1453},{140,11,513},{5,11,92},{7,10,3},{10
+,11,736},{140,11,102},{4,0,112},{5,0,653},{5,10,483},{5,10,685},{6,10,489},{7,10
+,1204},{136,10,394},{132,10,921},{6,0,1028},{133,10,1007},{5,11,590},{9,11,213},
+{145,11,91},{135,10,1696},{10,0,138},{139,0,476},{5,0,725},{5,0,727},{135,0,1811
+},{4,0,979},{6,0,1821},{6,0,1838},{8,0,876},{8,0,883},{8,0,889},{8,0,893},{8,0,
+895},{10,0,934},{12,0,720},{14,0,459},{148,0,123},{135,11,551},{4,0,38},{6,0,435
+},{7,0,307},{7,0,999},{7,0,1481},{7,0,1732},{7,0,1738},{8,0,371},{9,0,414},{11,0
+,316},{12,0,52},{13,0,420},{147,0,100},{135,0,1296},{132,10,712},{134,10,1629},{
+133,0,723},{134,0,651},{136,11,191},{9,11,791},{10,11,93},{11,11,301},{16,11,13}
+,{17,11,23},{18,11,135},{19,11,12},{20,11,1},{20,11,12},{148,11,14},{136,11,503}
+,{6,11,466},{135,11,671},{6,0,1200},{134,0,1330},{135,0,1255},{134,0,986},{5,0,
+109},{6,0,1784},{7,0,1895},{12,0,296},{140,0,302},{135,11,983},{133,10,485},{134
+,0,660},{134,0,800},{5,0,216},{5,0,294},{6,0,591},{7,0,1879},{9,0,141},{9,0,270}
+,{9,0,679},{10,0,159},{11,0,197},{11,0,438},{12,0,538},{12,0,559},{14,0,144},{14
+,0,167},{15,0,67},{4,10,285},{5,10,317},{6,10,301},{7,10,7},{8,10,153},{10,10,
+766},{11,10,468},{12,10,467},{141,10,143},{136,0,945},{134,0,1090},{137,0,81},{
+12,11,468},{19,11,96},{148,11,24},{134,0,391},{138,11,241},{7,0,322},{136,0,249}
+,{134,0,1412},{135,11,795},{5,0,632},{138,0,526},{136,10,819},{6,0,144},{7,0,948
+},{7,0,1042},{8,0,235},{8,0,461},{9,0,453},{9,0,796},{10,0,354},{17,0,77},{135,
+11,954},{139,10,917},{6,0,940},{134,0,1228},{4,0,362},{7,0,52},{135,0,303},{6,11
+,549},{8,11,34},{8,11,283},{9,11,165},{138,11,475},{7,11,370},{7,11,1007},{7,11,
+1177},{135,11,1565},{5,11,652},{5,11,701},{135,11,449},{5,0,196},{6,0,486},{7,0,
+212},{8,0,309},{136,0,346},{6,10,1719},{6,10,1735},{7,10,2016},{7,10,2020},{8,10
+,837},{137,10,852},{6,11,159},{6,11,364},{7,11,516},{7,11,1439},{137,11,518},{
+135,0,1912},{135,0,1290},{132,0,686},{141,11,151},{138,0,625},{136,0,706},{138,
+10,568},{139,0,412},{4,0,30},{133,0,43},{8,10,67},{138,10,419},{7,0,967},{141,0,
+11},{12,0,758},{14,0,441},{142,0,462},{10,10,657},{14,10,297},{142,10,361},{139,
+10,729},{4,0,220},{135,0,1535},{7,11,501},{9,11,111},{10,11,141},{11,11,332},{13
+,11,43},{13,11,429},{14,11,130},{14,11,415},{145,11,102},{4,0,950},{6,0,1859},{7
+,0,11},{8,0,873},{12,0,710},{12,0,718},{12,0,748},{12,0,765},{148,0,124},{5,11,
+149},{5,11,935},{136,11,233},{142,11,291},{134,0,1579},{7,0,890},{8,10,51},{9,10
+,868},{10,10,833},{12,10,481},{12,10,570},{148,10,106},{141,0,2},{132,10,445},{
+136,11,801},{135,0,1774},{7,0,1725},{138,0,393},{5,0,263},{134,0,414},{132,11,
+322},{133,10,239},{7,0,456},{7,10,1990},{8,10,130},{139,10,720},{137,0,818},{5,
+10,123},{6,10,530},{7,10,348},{135,10,1419},{135,10,2024},{6,0,178},{6,0,1750},{
+8,0,251},{9,0,690},{10,0,155},{10,0,196},{10,0,373},{11,0,698},{13,0,155},{148,0
+,93},{5,0,97},{137,0,393},{134,0,674},{11,0,223},{140,0,168},{132,10,210},{139,
+11,464},{6,0,1639},{146,0,159},{139,11,2},{7,0,934},{8,0,647},{17,0,97},{19,0,59
+},{150,0,2},{132,0,191},{5,0,165},{9,0,346},{10,0,655},{11,0,885},{4,10,430},{
+135,11,357},{133,0,877},{5,10,213},{133,11,406},{8,0,128},{139,0,179},{6,11,69},
+{135,11,117},{135,0,1297},{11,11,43},{13,11,72},{141,11,142},{135,11,1830},{142,
+0,164},{5,0,57},{6,0,101},{6,0,586},{6,0,1663},{7,0,132},{7,0,1154},{7,0,1415},{
+7,0,1507},{12,0,493},{15,0,105},{151,0,15},{5,0,459},{7,0,1073},{8,0,241},{136,0
+,334},{133,11,826},{133,10,108},{5,10,219},{10,11,132},{11,11,191},{11,11,358},{
+139,11,460},{6,0,324},{6,0,520},{7,0,338},{7,0,1729},{8,0,228},{139,0,750},{21,0
+,30},{22,0,53},{4,10,193},{5,10,916},{7,10,364},{10,10,398},{10,10,726},{11,10,
+317},{11,10,626},{12,10,142},{12,10,288},{12,10,678},{13,10,313},{15,10,113},{
+146,10,114},{6,11,110},{135,11,1681},{135,0,910},{6,10,241},{7,10,907},{8,10,832
+},{9,10,342},{10,10,729},{11,10,284},{11,10,445},{11,10,651},{11,10,863},{13,10,
+398},{146,10,99},{7,0,705},{9,0,734},{5,11,1000},{7,11,733},{137,11,583},{4,0,73
+},{6,0,612},{7,0,927},{7,0,1822},{8,0,217},{9,0,765},{9,0,766},{10,0,408},{11,0,
+51},{11,0,793},{12,0,266},{15,0,158},{20,0,89},{150,0,32},{7,0,1330},{4,11,297},
+{6,11,529},{7,11,152},{7,11,713},{7,11,1845},{8,11,710},{8,11,717},{140,11,639},
+{5,0,389},{136,0,636},{134,0,1409},{4,10,562},{9,10,254},{139,10,879},{134,0,893
+},{132,10,786},{4,11,520},{135,11,575},{136,0,21},{140,0,721},{136,0,959},{7,11,
+1428},{7,11,1640},{9,11,169},{9,11,182},{9,11,367},{9,11,478},{9,11,506},{9,11,
+551},{9,11,648},{9,11,651},{9,11,697},{9,11,705},{9,11,725},{9,11,787},{9,11,794
+},{10,11,198},{10,11,214},{10,11,267},{10,11,275},{10,11,456},{10,11,551},{10,11
+,561},{10,11,613},{10,11,627},{10,11,668},{10,11,675},{10,11,691},{10,11,695},{
+10,11,707},{10,11,715},{11,11,183},{11,11,201},{11,11,244},{11,11,262},{11,11,
+352},{11,11,439},{11,11,493},{11,11,572},{11,11,591},{11,11,608},{11,11,611},{11
+,11,646},{11,11,674},{11,11,711},{11,11,751},{11,11,761},{11,11,776},{11,11,785}
+,{11,11,850},{11,11,853},{11,11,862},{11,11,865},{11,11,868},{11,11,898},{11,11,
+902},{11,11,903},{11,11,910},{11,11,932},{11,11,942},{11,11,957},{11,11,967},{11
+,11,972},{12,11,148},{12,11,195},{12,11,220},{12,11,237},{12,11,318},{12,11,339}
+,{12,11,393},{12,11,445},{12,11,450},{12,11,474},{12,11,509},{12,11,533},{12,11,
+591},{12,11,594},{12,11,597},{12,11,621},{12,11,633},{12,11,642},{13,11,59},{13,
+11,60},{13,11,145},{13,11,239},{13,11,250},{13,11,273},{13,11,329},{13,11,344},{
+13,11,365},{13,11,372},{13,11,387},{13,11,403},{13,11,414},{13,11,456},{13,11,
+478},{13,11,483},{13,11,489},{14,11,55},{14,11,57},{14,11,81},{14,11,90},{14,11,
+148},{14,11,239},{14,11,266},{14,11,321},{14,11,326},{14,11,327},{14,11,330},{14
+,11,347},{14,11,355},{14,11,401},{14,11,411},{14,11,414},{14,11,416},{14,11,420}
+,{15,11,61},{15,11,74},{15,11,87},{15,11,88},{15,11,94},{15,11,96},{15,11,116},{
+15,11,149},{15,11,154},{16,11,50},{16,11,63},{16,11,73},{17,11,2},{17,11,66},{17
+,11,92},{17,11,103},{17,11,112},{18,11,50},{18,11,54},{18,11,82},{18,11,86},{18,
+11,90},{18,11,111},{18,11,115},{18,11,156},{19,11,40},{19,11,79},{20,11,78},{149
+,11,22},{137,11,170},{134,0,1433},{135,11,1307},{139,11,411},{5,0,189},{7,0,442}
+,{7,0,443},{8,0,281},{12,0,174},{141,0,261},{6,10,216},{7,10,901},{7,10,1343},{
+136,10,493},{5,11,397},{6,11,154},{7,10,341},{7,11,676},{8,11,443},{8,11,609},{9
+,11,24},{9,11,325},{10,11,35},{11,10,219},{11,11,535},{11,11,672},{11,11,1018},{
+12,11,637},{144,11,30},{6,0,2},{7,0,191},{7,0,446},{7,0,1262},{7,0,1737},{8,0,22
+},{8,0,270},{8,0,612},{9,0,4},{9,0,312},{9,0,436},{9,0,626},{10,0,216},{10,0,311
+},{10,0,521},{10,0,623},{11,0,72},{11,0,330},{11,0,455},{12,0,321},{12,0,504},{
+12,0,530},{12,0,543},{13,0,17},{13,0,156},{13,0,334},{14,0,131},{17,0,60},{148,0
+,64},{7,0,354},{10,0,410},{139,0,815},{139,10,130},{7,10,1734},{137,11,631},{12,
+0,425},{15,0,112},{10,10,115},{11,10,420},{13,10,404},{14,10,346},{143,10,54},{6
+,0,60},{6,0,166},{7,0,374},{7,0,670},{7,0,1327},{8,0,411},{8,0,435},{9,0,653},{9
+,0,740},{10,0,385},{11,0,222},{11,0,324},{11,0,829},{140,0,611},{7,0,1611},{13,0
+,14},{15,0,44},{19,0,13},{148,0,76},{133,11,981},{4,11,56},{7,11,1791},{8,11,607
+},{8,11,651},{11,11,465},{11,11,835},{12,11,337},{141,11,480},{6,0,1478},{5,10,
+1011},{136,10,701},{139,0,596},{5,0,206},{134,0,398},{4,10,54},{5,10,666},{7,10,
+1039},{7,10,1130},{9,10,195},{138,10,302},{7,0,50},{9,11,158},{138,11,411},{135,
+11,1120},{6,0,517},{7,0,1159},{10,0,621},{11,0,192},{134,10,1669},{4,0,592},{6,0
+,600},{135,0,1653},{10,0,223},{139,0,645},{136,11,139},{7,0,64},{136,0,245},{142
+,0,278},{6,11,622},{135,11,1030},{136,0,604},{134,0,1502},{138,0,265},{141,11,
+168},{7,0,1763},{140,0,310},{7,10,798},{139,11,719},{7,11,160},{10,11,624},{142,
+11,279},{132,11,363},{7,10,122},{9,10,259},{10,10,84},{11,10,470},{12,10,541},{
+141,10,379},{5,0,129},{6,0,61},{135,0,947},{134,0,1356},{135,11,1191},{13,0,505}
+,{141,0,506},{11,0,1000},{5,10,82},{5,10,131},{7,10,1755},{8,10,31},{9,10,168},{
+9,10,764},{139,10,869},{134,0,966},{134,10,605},{134,11,292},{5,11,177},{6,11,
+616},{7,11,827},{9,11,525},{138,11,656},{135,11,1486},{138,11,31},{5,10,278},{
+137,10,68},{4,10,163},{5,10,201},{5,10,307},{5,10,310},{6,10,335},{7,10,284},{
+136,10,165},{6,0,839},{135,10,1660},{136,10,781},{6,10,33},{135,10,1244},{133,0,
+637},{4,11,161},{133,11,631},{137,0,590},{7,10,1953},{136,10,720},{5,0,280},{7,0
+,1226},{138,10,203},{134,0,1386},{5,0,281},{6,0,1026},{6,10,326},{7,10,677},{137
+,10,425},{7,11,1557},{135,11,1684},{135,0,1064},{9,11,469},{9,11,709},{12,11,512
+},{14,11,65},{145,11,12},{134,0,917},{10,11,229},{11,11,73},{11,11,376},{139,11,
+433},{7,0,555},{9,0,192},{13,0,30},{13,0,49},{15,0,150},{16,0,76},{20,0,52},{7,
+10,1316},{7,10,1412},{7,10,1839},{9,10,589},{11,10,241},{11,10,676},{11,10,811},
+{11,10,891},{12,10,140},{12,10,346},{12,10,479},{13,10,381},{14,10,188},{146,10,
+30},{149,0,15},{6,0,1882},{6,0,1883},{6,0,1897},{9,0,945},{9,0,1014},{9,0,1020},
+{12,0,823},{12,0,842},{12,0,866},{12,0,934},{15,0,242},{146,0,208},{6,0,965},{
+134,0,1499},{7,0,33},{7,0,120},{8,0,489},{9,0,319},{10,0,820},{11,0,1004},{12,0,
+379},{12,0,679},{13,0,117},{13,0,412},{14,0,25},{15,0,52},{15,0,161},{16,0,47},{
+149,0,2},{6,11,558},{7,11,651},{8,11,421},{9,11,0},{138,11,34},{4,0,937},{5,0,
+801},{7,0,473},{5,10,358},{7,10,1184},{10,10,662},{13,10,212},{13,10,304},{13,10
+,333},{145,10,98},{132,0,877},{6,0,693},{134,0,824},{132,0,365},{7,11,1832},{138
+,11,374},{5,0,7},{139,0,774},{4,0,734},{5,0,662},{134,0,430},{4,0,746},{135,0,
+1090},{5,0,360},{8,0,237},{10,0,231},{147,0,124},{138,11,348},{6,11,6},{7,11,81}
+,{7,11,771},{7,11,1731},{9,11,405},{138,11,421},{6,0,740},{137,0,822},{133,10,
+946},{7,0,1485},{136,0,929},{7,10,411},{8,10,631},{9,10,323},{10,10,355},{11,10,
+491},{12,10,143},{12,10,402},{13,10,73},{14,10,408},{15,10,107},{146,10,71},{135
+,10,590},{5,11,881},{133,11,885},{150,11,25},{4,0,852},{5,11,142},{134,11,546},{
+7,10,1467},{8,10,328},{10,10,544},{11,10,955},{13,10,320},{145,10,83},{9,0,17},{
+10,0,291},{11,10,511},{13,10,394},{14,10,298},{14,10,318},{146,10,103},{5,11,466
+},{11,11,571},{12,11,198},{13,11,283},{14,11,186},{15,11,21},{143,11,103},{134,0
+,1001},{4,11,185},{5,11,257},{5,11,839},{5,11,936},{7,11,171},{9,11,399},{10,11,
+258},{10,11,395},{10,11,734},{11,11,1014},{12,11,23},{13,11,350},{14,11,150},{
+147,11,6},{143,0,35},{132,0,831},{5,10,835},{134,10,483},{4,0,277},{5,0,608},{6,
+0,493},{7,0,457},{12,0,384},{7,11,404},{7,11,1377},{7,11,1430},{7,11,2017},{8,11
+,149},{8,11,239},{8,11,512},{8,11,793},{8,11,818},{9,11,474},{9,11,595},{10,11,
+122},{10,11,565},{10,11,649},{10,11,783},{11,11,239},{11,11,295},{11,11,447},{11
+,11,528},{11,11,639},{11,11,800},{11,11,936},{12,11,25},{12,11,73},{12,11,77},{
+12,11,157},{12,11,316},{12,11,390},{12,11,391},{12,11,394},{12,11,395},{12,11,
+478},{12,11,503},{12,11,592},{12,11,680},{13,11,50},{13,11,53},{13,11,132},{13,
+11,198},{13,11,275},{13,11,322},{13,11,415},{14,11,71},{14,11,257},{14,11,395},{
+15,11,71},{15,11,136},{17,11,123},{18,11,93},{147,11,58},{134,0,1351},{7,0,27},{
+135,0,316},{136,11,712},{136,0,984},{133,0,552},{137,0,264},{132,0,401},{6,0,710
+},{6,0,1111},{134,0,1343},{134,0,1211},{9,0,543},{10,0,524},{11,0,108},{11,0,653
+},{12,0,524},{13,0,123},{14,0,252},{16,0,18},{19,0,38},{20,0,26},{20,0,65},{21,0
+,3},{151,0,11},{4,0,205},{5,0,623},{7,0,104},{8,0,519},{137,0,716},{132,10,677},
+{4,11,377},{152,11,13},{135,11,1673},{7,0,579},{9,0,41},{9,0,244},{9,0,669},{10,
+0,5},{11,0,861},{11,0,951},{139,0,980},{132,0,717},{136,0,1011},{132,0,805},{4,
+11,180},{135,11,1906},{132,10,777},{132,10,331},{132,0,489},{6,0,1024},{4,11,491
+},{133,10,747},{135,11,1182},{4,11,171},{138,11,234},{4,11,586},{7,11,1186},{138
+,11,631},{135,0,892},{135,11,336},{9,11,931},{10,11,334},{148,11,71},{137,0,473}
+,{6,0,864},{12,0,659},{139,11,926},{7,0,819},{9,0,26},{9,0,392},{10,0,152},{10,0
+,226},{11,0,19},{12,0,276},{12,0,426},{12,0,589},{13,0,460},{15,0,97},{19,0,48},
+{148,0,104},{135,0,51},{133,10,326},{4,10,691},{146,10,16},{9,0,130},{11,0,765},
+{10,10,680},{10,10,793},{141,10,357},{133,11,765},{8,0,229},{6,10,32},{7,10,385}
+,{7,10,757},{7,10,1916},{8,10,94},{8,10,711},{9,10,541},{10,10,162},{10,10,795},
+{11,10,989},{11,10,1010},{12,10,14},{142,10,308},{7,11,474},{137,11,578},{132,0,
+674},{132,0,770},{5,0,79},{7,0,1027},{7,0,1477},{139,0,52},{133,11,424},{134,0,
+1666},{6,0,409},{6,10,349},{6,10,1682},{7,10,1252},{8,10,112},{8,11,714},{9,10,
+435},{9,10,668},{10,10,290},{10,10,319},{10,10,815},{11,10,180},{11,10,837},{12,
+10,240},{13,10,152},{13,10,219},{142,10,158},{5,0,789},{134,0,195},{4,0,251},{4,
+0,688},{7,0,513},{135,0,1284},{132,10,581},{9,11,420},{10,11,269},{10,11,285},{
+10,11,576},{11,11,397},{13,11,175},{145,11,90},{6,10,126},{7,10,573},{8,10,397},
+{142,10,44},{132,11,429},{133,0,889},{4,0,160},{5,0,330},{7,0,1434},{136,0,174},
+{7,11,18},{7,11,699},{7,11,1966},{8,11,752},{9,11,273},{9,11,412},{9,11,703},{10
+,11,71},{10,11,427},{10,11,508},{146,11,97},{6,0,872},{134,0,899},{133,10,926},{
+134,0,1126},{134,0,918},{4,11,53},{5,11,186},{135,11,752},{7,0,268},{136,0,569},
+{134,0,1224},{6,0,1361},{7,10,1232},{137,10,531},{8,11,575},{10,11,289},{139,11,
+319},{133,10,670},{132,11,675},{133,0,374},{135,10,1957},{133,0,731},{11,0,190},
+{15,0,49},{11,11,190},{143,11,49},{4,0,626},{5,0,506},{5,0,642},{6,0,425},{10,0,
+202},{139,0,141},{137,0,444},{7,10,242},{135,10,1942},{6,11,209},{8,11,468},{9,
+11,210},{11,11,36},{12,11,28},{12,11,630},{13,11,21},{13,11,349},{14,11,7},{145,
+11,13},{4,11,342},{135,11,1179},{5,10,834},{7,10,1202},{8,10,14},{9,10,481},{137
+,10,880},{4,11,928},{133,11,910},{4,11,318},{4,11,496},{7,11,856},{139,11,654},{
+136,0,835},{7,0,1526},{138,10,465},{151,0,17},{135,0,477},{4,10,357},{6,10,172},
+{7,10,143},{137,10,413},{6,0,1374},{138,0,994},{18,0,76},{132,10,590},{7,0,287},
+{8,0,355},{9,0,293},{137,0,743},{134,0,1389},{7,11,915},{8,11,247},{147,11,0},{4
+,11,202},{5,11,382},{6,11,454},{7,11,936},{7,11,1803},{8,11,758},{9,11,375},{9,
+11,895},{10,11,743},{10,11,792},{11,11,978},{11,11,1012},{142,11,109},{5,0,384},
+{8,0,455},{140,0,48},{132,11,390},{5,10,169},{7,10,333},{136,10,45},{5,0,264},{
+134,0,184},{138,11,791},{133,11,717},{132,10,198},{6,11,445},{7,11,332},{137,11,
+909},{136,0,1001},{4,10,24},{5,10,140},{5,10,185},{7,10,1500},{11,10,565},{139,
+10,838},{134,11,578},{5,0,633},{6,0,28},{135,0,1323},{132,0,851},{136,11,267},{7
+,0,359},{8,0,243},{140,0,175},{4,10,334},{133,10,593},{141,11,87},{136,11,766},{
+10,0,287},{12,0,138},{10,11,287},{140,11,138},{4,0,105},{132,0,740},{140,10,116}
+,{134,0,857},{135,11,1841},{6,0,1402},{137,0,819},{132,11,584},{132,10,709},{133
+,10,897},{5,0,224},{13,0,174},{146,0,52},{135,10,1840},{4,10,608},{133,10,497},{
+139,11,60},{4,0,758},{135,0,1649},{4,11,226},{4,11,326},{135,11,1770},{5,11,426}
+,{8,11,30},{9,11,2},{11,11,549},{147,11,122},{135,10,2039},{6,10,540},{136,10,
+136},{4,0,573},{8,0,655},{4,10,897},{133,10,786},{7,0,351},{139,0,128},{133,10,
+999},{4,10,299},{135,10,1004},{133,0,918},{132,11,345},{4,11,385},{7,11,265},{
+135,11,587},{133,10,456},{136,10,180},{6,0,687},{134,0,1537},{4,11,347},{5,11,
+423},{5,11,996},{135,11,1329},{132,10,755},{7,11,1259},{9,11,125},{11,11,65},{
+140,11,285},{5,11,136},{6,11,136},{136,11,644},{134,0,1525},{4,0,1009},{135,0,
+1139},{139,10,338},{132,0,340},{135,10,1464},{8,0,847},{10,0,861},{10,0,876},{10
+,0,889},{10,0,922},{10,0,929},{10,0,933},{12,0,784},{140,0,791},{139,0,176},{9,
+11,134},{10,11,2},{10,11,27},{10,11,333},{11,11,722},{143,11,1},{4,11,433},{133,
+11,719},{5,0,985},{7,0,509},{7,0,529},{145,0,96},{132,0,615},{4,10,890},{5,10,
+805},{5,10,819},{5,10,961},{6,10,396},{6,10,1631},{6,10,1678},{7,10,1967},{7,10,
+2041},{9,10,630},{11,10,8},{11,10,1019},{12,10,176},{13,10,225},{14,10,292},{149
+,10,24},{135,0,1919},{134,0,1131},{144,11,21},{144,11,51},{135,10,1815},{4,0,247
+},{7,10,1505},{10,10,190},{10,10,634},{11,10,792},{12,10,358},{140,10,447},{5,10
+,0},{6,10,536},{7,10,604},{13,10,445},{145,10,126},{4,0,184},{5,0,390},{6,0,337}
+,{7,0,23},{7,0,494},{7,0,618},{7,0,1456},{8,0,27},{8,0,599},{10,0,153},{139,0,
+710},{6,10,232},{6,10,412},{7,10,1074},{8,10,9},{8,10,157},{8,10,786},{9,10,196}
+,{9,10,352},{9,10,457},{10,10,337},{11,10,232},{11,10,877},{12,10,480},{140,10,
+546},{13,0,38},{135,10,958},{4,10,382},{136,10,579},{4,10,212},{135,10,1206},{4,
+11,555},{8,11,536},{138,11,288},{11,11,139},{139,11,171},{9,11,370},{138,11,90},
+{132,0,1015},{134,0,1088},{5,10,655},{135,11,977},{134,0,1585},{17,10,67},{147,
+10,74},{10,0,227},{11,0,497},{11,0,709},{140,0,415},{6,0,360},{7,0,1664},{136,0,
+478},{7,0,95},{6,10,231},{136,10,423},{140,11,65},{4,11,257},{135,11,2031},{135,
+11,1768},{133,10,300},{139,11,211},{136,0,699},{6,10,237},{7,10,611},{8,10,100},
+{9,10,416},{11,10,335},{12,10,173},{146,10,101},{14,0,26},{146,0,150},{6,0,581},
+{135,0,1119},{135,10,1208},{132,0,739},{6,11,83},{6,11,1733},{135,11,1389},{137,
+0,869},{4,0,67},{5,0,422},{7,0,1037},{7,0,1289},{7,0,1555},{9,0,741},{145,0,108}
+,{133,10,199},{12,10,427},{146,10,38},{136,0,464},{142,0,42},{10,0,96},{8,11,501
+},{137,11,696},{134,11,592},{4,0,512},{4,0,966},{5,0,342},{6,0,1855},{8,0,869},{
+8,0,875},{8,0,901},{144,0,26},{8,0,203},{11,0,823},{11,0,846},{12,0,482},{13,0,
+277},{13,0,302},{13,0,464},{14,0,205},{142,0,221},{4,0,449},{133,0,718},{7,11,
+1718},{9,11,95},{9,11,274},{10,11,279},{10,11,317},{10,11,420},{11,11,303},{11,
+11,808},{12,11,134},{12,11,367},{13,11,149},{13,11,347},{14,11,349},{14,11,406},
+{18,11,22},{18,11,89},{18,11,122},{147,11,47},{133,11,26},{4,0,355},{6,0,311},{9
+,0,256},{138,0,404},{132,11,550},{10,0,758},{6,10,312},{6,10,1715},{10,10,584},{
+11,10,546},{11,10,692},{12,10,259},{12,10,295},{13,10,46},{141,10,154},{136,11,
+822},{5,0,827},{4,11,902},{5,11,809},{6,11,122},{135,11,896},{5,0,64},{140,0,581
+},{4,0,442},{6,0,739},{7,0,1047},{7,0,1352},{7,0,1643},{7,11,1911},{9,11,449},{
+10,11,192},{138,11,740},{135,11,262},{132,10,588},{133,11,620},{5,0,977},{6,0,
+288},{7,0,528},{4,11,34},{5,11,574},{7,11,279},{7,11,1624},{136,11,601},{6,0,
+1375},{4,10,231},{5,10,61},{6,10,104},{7,10,729},{7,10,964},{7,10,1658},{140,10,
+414},{6,10,263},{138,10,757},{132,10,320},{4,0,254},{7,0,1309},{5,11,332},{135,
+11,1309},{6,11,261},{8,11,182},{139,11,943},{132,10,225},{6,0,12},{135,0,1219},{
+4,0,275},{12,0,376},{6,11,1721},{141,11,490},{4,11,933},{133,11,880},{6,0,951},{
+6,0,1109},{6,0,1181},{7,0,154},{4,10,405},{7,10,817},{14,10,58},{17,10,37},{146,
+10,124},{6,0,1520},{133,10,974},{134,0,1753},{6,0,369},{6,0,502},{7,0,1036},{8,0
+,348},{9,0,452},{10,0,26},{11,0,224},{11,0,387},{11,0,772},{12,0,95},{12,0,629},
+{13,0,195},{13,0,207},{13,0,241},{14,0,260},{14,0,270},{143,0,140},{132,0,269},{
+5,0,480},{7,0,532},{7,0,1197},{7,0,1358},{8,0,291},{11,0,349},{142,0,396},{5,10,
+235},{7,10,1239},{11,10,131},{140,10,370},{7,10,956},{7,10,1157},{7,10,1506},{7,
+10,1606},{7,10,1615},{7,10,1619},{7,10,1736},{7,10,1775},{8,10,590},{9,10,324},{
+9,10,736},{9,10,774},{9,10,776},{9,10,784},{10,10,567},{10,10,708},{11,10,518},{
+11,10,613},{11,10,695},{11,10,716},{11,10,739},{11,10,770},{11,10,771},{11,10,
+848},{11,10,857},{11,10,931},{11,10,947},{12,10,326},{12,10,387},{12,10,484},{12
+,10,528},{12,10,552},{12,10,613},{13,10,189},{13,10,256},{13,10,340},{13,10,432}
+,{13,10,436},{13,10,440},{13,10,454},{14,10,174},{14,10,220},{14,10,284},{14,10,
+390},{145,10,121},{8,11,598},{9,11,664},{138,11,441},{9,10,137},{138,10,221},{
+133,11,812},{148,0,15},{134,0,1341},{6,0,1017},{4,11,137},{7,11,1178},{135,11,
+1520},{7,10,390},{138,10,140},{7,11,1260},{135,11,1790},{137,11,191},{135,10,
+1144},{6,0,1810},{7,0,657},{8,0,886},{10,0,857},{14,0,440},{144,0,96},{8,0,533},
+{6,11,1661},{7,11,1975},{7,11,2009},{135,11,2011},{6,0,1453},{134,10,464},{132,
+11,715},{5,10,407},{11,10,204},{11,10,243},{11,10,489},{12,10,293},{19,10,37},{
+20,10,73},{150,10,38},{133,11,703},{4,0,211},{7,0,1483},{5,10,325},{8,10,5},{8,
+10,227},{9,10,105},{10,10,585},{140,10,614},{4,0,332},{5,0,335},{6,0,238},{7,0,
+269},{7,0,811},{7,0,1797},{8,0,836},{9,0,507},{141,0,242},{5,11,89},{7,11,1915},
+{9,11,185},{9,11,235},{9,11,496},{10,11,64},{10,11,270},{10,11,403},{10,11,469},
+{10,11,529},{10,11,590},{11,11,140},{11,11,860},{13,11,1},{13,11,422},{14,11,341
+},{14,11,364},{17,11,93},{18,11,113},{19,11,97},{147,11,113},{133,11,695},{16,0,
+19},{5,11,6},{6,11,183},{6,10,621},{7,11,680},{7,11,978},{7,11,1013},{7,11,1055}
+,{12,11,230},{13,11,172},{13,10,504},{146,11,29},{136,0,156},{133,0,1009},{6,11,
+29},{139,11,63},{134,0,820},{134,10,218},{7,10,454},{7,10,782},{8,10,768},{140,
+10,686},{5,0,228},{6,0,203},{7,0,156},{8,0,347},{9,0,265},{18,0,39},{20,0,54},{
+21,0,31},{22,0,3},{23,0,0},{15,11,8},{18,11,39},{20,11,54},{21,11,31},{22,11,3},
+{151,11,0},{7,0,1131},{135,0,1468},{144,10,0},{134,0,1276},{10,10,676},{140,10,
+462},{132,11,311},{134,11,1740},{7,11,170},{8,11,90},{8,11,177},{8,11,415},{11,
+11,714},{142,11,281},{134,10,164},{6,0,1792},{138,0,849},{150,10,50},{5,0,291},{
+5,0,318},{7,0,765},{9,0,389},{12,0,548},{8,11,522},{142,11,328},{11,11,91},{13,
+11,129},{15,11,101},{145,11,125},{4,11,494},{6,11,74},{7,11,44},{7,11,407},{8,11
+,551},{12,11,17},{15,11,5},{148,11,11},{4,11,276},{133,11,296},{6,10,343},{7,10,
+195},{7,11,1777},{9,10,226},{10,10,197},{10,10,575},{11,10,502},{139,10,899},{10
+,0,525},{139,0,82},{14,0,453},{4,11,7},{5,11,90},{5,11,158},{6,11,542},{7,11,221
+},{7,11,1574},{9,11,490},{10,11,540},{11,11,443},{139,11,757},{135,0,666},{22,10
+,29},{150,11,29},{4,0,422},{147,10,8},{5,0,355},{145,0,0},{6,0,1873},{9,0,918},{
+7,11,588},{9,11,175},{138,11,530},{143,11,31},{11,0,165},{7,10,1125},{9,10,143},
+{14,10,405},{150,10,21},{9,0,260},{137,0,905},{5,11,872},{6,11,57},{6,11,479},{6
+,11,562},{7,11,471},{7,11,1060},{9,11,447},{9,11,454},{141,11,6},{138,11,704},{
+133,0,865},{5,0,914},{134,0,1625},{133,0,234},{7,0,1383},{5,11,31},{6,11,614},{
+145,11,61},{7,11,1200},{138,11,460},{6,11,424},{135,11,1866},{136,0,306},{5,10,
+959},{12,11,30},{13,11,148},{14,11,87},{14,11,182},{16,11,42},{18,11,92},{148,11
+,70},{6,0,1919},{6,0,1921},{9,0,923},{9,0,930},{9,0,941},{9,0,949},{9,0,987},{9,
+0,988},{9,0,992},{12,0,802},{12,0,815},{12,0,856},{12,0,885},{12,0,893},{12,0,
+898},{12,0,919},{12,0,920},{12,0,941},{12,0,947},{15,0,183},{15,0,185},{15,0,189
+},{15,0,197},{15,0,202},{15,0,233},{18,0,218},{18,0,219},{18,0,233},{143,11,156}
+,{135,10,1759},{136,10,173},{13,0,163},{13,0,180},{18,0,78},{20,0,35},{5,11,13},
+{134,11,142},{134,10,266},{6,11,97},{7,11,116},{8,11,322},{8,11,755},{9,11,548},
+{10,11,714},{11,11,884},{141,11,324},{135,0,1312},{9,0,814},{137,11,676},{133,0,
+707},{135,0,1493},{6,0,421},{7,0,61},{7,0,1540},{10,0,11},{138,0,501},{12,0,733}
+,{12,0,766},{7,11,866},{135,11,1163},{137,0,341},{142,0,98},{145,11,115},{135,11
+,1111},{136,10,300},{136,0,1014},{8,11,1},{9,11,112},{138,11,326},{132,11,730},{
+5,11,488},{6,11,527},{7,11,489},{7,11,1636},{8,11,121},{8,11,144},{8,11,359},{9,
+11,193},{9,11,241},{9,11,336},{9,11,882},{11,11,266},{11,11,372},{11,11,944},{12
+,11,401},{140,11,641},{6,0,971},{134,0,1121},{6,0,102},{7,0,72},{15,0,142},{147,
+0,67},{151,0,30},{135,0,823},{134,0,1045},{5,10,427},{5,10,734},{7,10,478},{136,
+10,52},{7,0,1930},{11,10,217},{142,10,165},{6,0,1512},{135,0,1870},{9,11,31},{10
+,11,244},{10,11,699},{12,11,149},{141,11,497},{133,11,377},{145,11,101},{10,11,
+158},{13,11,13},{13,11,137},{13,11,258},{14,11,111},{14,11,225},{14,11,253},{14,
+11,304},{14,11,339},{14,11,417},{146,11,33},{6,0,87},{6,10,1734},{7,10,20},{7,10
+,1056},{8,10,732},{9,10,406},{9,10,911},{138,10,694},{134,0,1243},{137,0,245},{7
+,0,68},{8,0,48},{8,0,88},{8,0,582},{8,0,681},{9,0,373},{9,0,864},{11,0,157},{11,
+0,336},{11,0,843},{148,0,27},{8,11,663},{144,11,8},{133,10,613},{4,0,88},{5,0,
+137},{5,0,174},{5,0,777},{6,0,1664},{6,0,1725},{7,0,77},{7,0,426},{7,0,1317},{7,
+0,1355},{8,0,126},{8,0,563},{9,0,523},{9,0,750},{10,0,310},{10,0,836},{11,0,42},
+{11,0,318},{11,0,731},{12,0,68},{12,0,92},{12,0,507},{12,0,692},{13,0,81},{13,0,
+238},{13,0,374},{14,0,436},{18,0,138},{19,0,78},{19,0,111},{20,0,55},{20,0,77},{
+148,0,92},{141,0,418},{4,0,938},{137,0,625},{138,0,351},{5,11,843},{7,10,32},{7,
+10,984},{8,10,85},{8,10,709},{9,10,579},{9,10,847},{9,10,856},{10,10,799},{11,10
+,258},{11,10,1007},{12,10,331},{12,10,615},{13,10,188},{13,10,435},{14,10,8},{15
+,10,165},{16,10,27},{148,10,40},{6,0,1668},{7,0,1499},{8,0,117},{9,0,314},{138,0
+,174},{135,0,707},{132,11,554},{133,11,536},{5,0,403},{5,11,207},{9,11,79},{11,
+11,625},{145,11,7},{132,11,424},{136,11,785},{4,10,167},{135,10,82},{9,0,7},{23,
+0,6},{9,11,7},{151,11,6},{6,0,282},{5,10,62},{6,10,534},{7,10,74},{7,10,678},{7,
+10,684},{7,10,1043},{7,10,1072},{8,10,280},{8,10,541},{8,10,686},{9,10,258},{10,
+10,519},{11,10,252},{140,10,282},{138,10,33},{132,10,359},{4,0,44},{5,0,311},{6,
+0,156},{7,0,639},{7,0,762},{7,0,1827},{9,0,8},{9,0,462},{148,0,83},{7,11,769},{9
+,11,18},{138,11,358},{4,0,346},{7,0,115},{9,0,180},{9,0,456},{10,0,363},{4,11,
+896},{134,11,1777},{133,10,211},{7,0,761},{7,0,1051},{137,0,545},{6,10,145},{141
+,10,336},{7,11,750},{9,11,223},{11,11,27},{11,11,466},{12,11,624},{14,11,265},{
+146,11,61},{6,0,752},{6,0,768},{6,0,1195},{6,0,1254},{6,0,1619},{137,0,835},{6,0
+,1936},{8,0,930},{136,0,960},{132,10,263},{132,11,249},{12,0,653},{132,10,916},{
+4,11,603},{133,11,661},{8,0,344},{4,11,11},{6,11,128},{7,11,231},{7,11,1533},{
+138,11,725},{134,0,1483},{134,0,875},{6,0,185},{7,0,1899},{9,0,875},{139,0,673},
+{15,10,155},{144,10,79},{7,0,93},{7,0,210},{7,0,1223},{8,0,451},{8,0,460},{11,0,
+353},{11,0,475},{4,10,599},{6,10,1634},{7,10,67},{7,10,691},{7,10,979},{7,10,
+1697},{8,10,207},{8,10,214},{8,10,231},{8,10,294},{8,10,336},{8,10,428},{8,10,
+471},{8,10,622},{8,10,626},{8,10,679},{8,10,759},{8,10,829},{9,10,11},{9,10,246}
+,{9,10,484},{9,10,573},{9,10,706},{9,10,762},{9,10,798},{9,10,855},{9,10,870},{9
+,10,912},{10,10,303},{10,10,335},{10,10,424},{10,10,461},{10,10,543},{10,10,759}
+,{10,10,814},{11,10,59},{11,10,235},{11,10,590},{11,10,929},{11,10,963},{11,10,
+987},{12,10,114},{12,10,182},{12,10,226},{12,10,332},{12,10,439},{12,10,575},{12
+,10,598},{12,10,675},{13,10,8},{13,10,125},{13,10,194},{13,10,287},{14,10,197},{
+14,10,383},{15,10,53},{17,10,63},{19,10,46},{19,10,98},{19,10,106},{148,10,85},{
+132,11,476},{4,0,327},{5,0,478},{7,0,1332},{136,0,753},{5,0,1020},{133,0,1022},{
+135,11,1807},{4,0,103},{133,0,401},{4,0,499},{135,0,1421},{10,0,207},{13,0,164},
+{147,10,126},{9,11,20},{10,11,324},{139,11,488},{132,0,96},{9,11,280},{138,11,
+134},{135,0,968},{133,10,187},{135,10,1286},{5,11,112},{6,11,103},{134,11,150},{
+8,0,914},{10,0,3},{4,10,215},{9,10,38},{11,10,23},{11,10,127},{139,10,796},{135,
+0,399},{6,0,563},{137,0,224},{6,0,704},{134,0,1214},{4,11,708},{8,11,15},{9,11,
+50},{9,11,386},{11,11,18},{11,11,529},{140,11,228},{4,11,563},{7,11,109},{7,11,
+592},{7,11,637},{7,11,770},{7,11,1701},{8,11,436},{8,11,463},{9,11,60},{9,11,335
+},{9,11,904},{10,11,73},{11,11,434},{12,11,585},{13,11,331},{18,11,110},{148,11,
+60},{134,0,1559},{132,11,502},{6,11,347},{138,11,161},{4,11,33},{5,11,102},{5,11
+,500},{6,11,284},{7,11,1079},{7,11,1423},{7,11,1702},{8,11,470},{9,11,554},{9,11
+,723},{139,11,333},{7,11,246},{135,11,840},{6,11,10},{8,11,571},{9,11,739},{143,
+11,91},{8,0,861},{10,0,905},{12,0,730},{12,0,789},{133,11,626},{134,0,946},{5,0,
+746},{12,0,333},{14,0,332},{12,11,333},{142,11,332},{5,11,18},{6,11,526},{13,11,
+24},{13,11,110},{19,11,5},{147,11,44},{4,0,910},{5,0,832},{135,10,2002},{10,11,
+768},{139,11,787},{4,11,309},{5,11,462},{7,11,970},{135,11,1097},{4,10,28},{5,10
+,440},{7,10,248},{11,10,833},{140,10,344},{134,10,1654},{6,0,632},{6,0,652},{6,0
+,1272},{6,0,1384},{134,0,1560},{134,11,1704},{6,0,1393},{133,10,853},{6,10,249},
+{7,10,1234},{139,10,573},{5,11,86},{7,11,743},{9,11,85},{10,11,281},{10,11,432},
+{11,11,490},{12,11,251},{13,11,118},{14,11,378},{146,11,143},{5,11,524},{133,11,
+744},{134,0,1514},{10,0,201},{142,0,319},{7,0,717},{10,0,510},{7,10,392},{8,10,
+20},{8,10,172},{8,10,690},{9,10,383},{9,10,845},{11,10,293},{11,10,832},{11,10,
+920},{11,10,984},{141,10,221},{134,0,1381},{5,10,858},{133,10,992},{8,0,528},{
+137,0,348},{10,11,107},{140,11,436},{4,0,20},{133,0,616},{134,0,1251},{132,11,
+927},{10,11,123},{12,11,670},{13,11,371},{14,11,142},{146,11,94},{134,0,1163},{7
+,11,1149},{137,11,156},{134,0,307},{133,11,778},{7,0,1091},{135,0,1765},{5,11,
+502},{6,10,268},{137,10,62},{8,11,196},{10,11,283},{139,11,406},{4,0,26},{5,0,
+429},{6,0,245},{7,0,704},{7,0,1379},{135,0,1474},{133,11,855},{132,0,881},{4,0,
+621},{135,11,1596},{7,11,1400},{9,11,446},{138,11,45},{6,0,736},{138,10,106},{
+133,0,542},{134,0,348},{133,0,868},{136,0,433},{135,0,1495},{138,0,771},{6,10,
+613},{136,10,223},{138,0,215},{141,0,124},{136,11,391},{135,11,172},{132,10,670}
+,{140,0,55},{9,10,40},{139,10,136},{7,0,62},{147,0,112},{132,0,856},{132,11,568}
+,{12,0,270},{139,10,259},{8,0,572},{137,0,698},{4,11,732},{9,10,310},{137,10,682
+},{142,10,296},{134,0,939},{136,11,733},{135,11,1435},{7,10,1401},{135,10,1476},
+{6,0,352},{4,10,296},{7,10,401},{7,10,1410},{7,10,1594},{7,10,1674},{8,10,63},{8
+,10,660},{137,10,74},{4,11,428},{133,11,668},{4,10,139},{4,10,388},{140,10,188},
+{7,11,2015},{140,11,665},{132,0,647},{146,0,10},{138,0,220},{142,0,464},{132,0,
+109},{134,0,1746},{6,0,515},{4,10,747},{6,11,1623},{6,11,1681},{7,10,649},{7,10,
+1479},{135,10,1583},{133,10,232},{135,0,566},{137,10,887},{4,0,40},{10,0,67},{11
+,0,117},{11,0,768},{139,0,935},{132,0,801},{7,0,992},{8,0,301},{9,0,722},{12,0,
+63},{13,0,29},{14,0,161},{143,0,18},{139,0,923},{6,11,1748},{8,11,715},{9,11,802
+},{10,11,46},{10,11,819},{13,11,308},{14,11,351},{14,11,363},{146,11,67},{137,11
+,745},{7,0,1145},{4,10,14},{7,10,1801},{10,10,748},{141,10,458},{4,11,63},{5,11,
+347},{134,11,474},{135,0,568},{4,10,425},{7,11,577},{7,11,1432},{9,11,475},{9,11
+,505},{9,11,526},{9,11,609},{9,11,689},{9,11,726},{9,11,735},{9,11,738},{10,11,
+556},{10,11,674},{10,11,684},{11,11,89},{11,11,202},{11,11,272},{11,11,380},{11,
+11,415},{11,11,505},{11,11,537},{11,11,550},{11,11,562},{11,11,640},{11,11,667},
+{11,11,688},{11,11,847},{11,11,927},{11,11,930},{11,11,940},{12,11,144},{12,11,
+325},{12,11,329},{12,11,389},{12,11,403},{12,11,451},{12,11,515},{12,11,604},{12
+,11,616},{12,11,626},{13,11,66},{13,11,131},{13,11,167},{13,11,236},{13,11,368},
+{13,11,411},{13,11,434},{13,11,453},{13,11,461},{13,11,474},{14,11,59},{14,11,60
+},{14,11,139},{14,11,152},{14,11,276},{14,11,353},{14,11,402},{15,11,28},{15,11,
+81},{15,11,123},{15,11,152},{18,11,136},{148,11,88},{137,0,247},{135,11,1622},{9
+,11,544},{11,11,413},{144,11,25},{4,0,645},{7,0,825},{6,10,1768},{135,11,89},{
+140,0,328},{5,10,943},{134,10,1779},{134,0,1363},{5,10,245},{6,10,576},{7,10,582
+},{136,10,225},{134,0,1280},{5,11,824},{133,11,941},{7,11,440},{8,11,230},{139,
+11,106},{5,0,28},{6,0,204},{10,0,320},{10,0,583},{13,0,502},{14,0,72},{14,0,274}
+,{14,0,312},{14,0,344},{15,0,159},{16,0,62},{16,0,69},{17,0,30},{18,0,42},{18,0,
+53},{18,0,84},{18,0,140},{19,0,68},{19,0,85},{20,0,5},{20,0,45},{20,0,101},{22,0
+,7},{150,0,20},{4,0,558},{6,0,390},{7,0,162},{7,0,689},{9,0,360},{138,0,653},{
+134,0,764},{6,0,862},{137,0,833},{5,0,856},{6,0,1672},{6,0,1757},{134,0,1781},{5
+,0,92},{10,0,736},{140,0,102},{6,0,1927},{6,0,1944},{8,0,924},{8,0,948},{10,0,
+967},{138,0,978},{134,0,1479},{5,0,590},{8,0,360},{9,0,213},{138,0,63},{134,0,
+1521},{6,0,709},{134,0,891},{132,10,443},{13,0,477},{14,0,120},{148,0,61},{4,11,
+914},{5,11,800},{133,11,852},{10,11,54},{141,11,115},{4,11,918},{133,11,876},{
+139,11,152},{4,11,92},{133,11,274},{135,11,1901},{9,11,800},{10,11,693},{11,11,
+482},{11,11,734},{139,11,789},{9,0,483},{132,10,298},{6,0,1213},{141,11,498},{
+135,11,1451},{133,11,743},{4,0,1022},{10,0,1000},{12,0,957},{12,0,980},{12,0,
+1013},{14,0,481},{144,0,116},{8,0,503},{17,0,29},{4,11,49},{7,11,280},{135,11,
+1633},{135,0,1712},{134,0,466},{136,11,47},{5,10,164},{7,10,121},{142,10,189},{7
+,10,812},{7,10,1261},{7,10,1360},{9,10,632},{140,10,352},{139,10,556},{132,0,731
+},{5,11,272},{5,11,908},{5,11,942},{7,11,1008},{7,11,1560},{8,11,197},{9,11,47},
+{11,11,538},{139,11,742},{4,10,172},{9,10,611},{10,10,436},{12,10,673},{141,10,
+255},{133,10,844},{10,0,484},{11,0,754},{12,0,457},{14,0,171},{14,0,389},{146,0,
+153},{9,10,263},{10,10,147},{138,10,492},{137,11,891},{138,0,241},{133,10,537},{
+6,0,2005},{136,0,964},{137,10,842},{151,11,8},{4,11,407},{132,11,560},{135,11,
+1884},{6,0,1100},{134,0,1242},{135,0,954},{5,10,230},{5,10,392},{6,10,420},{9,10
+,568},{140,10,612},{4,11,475},{11,11,35},{11,11,90},{13,11,7},{13,11,71},{13,11,
+177},{142,11,422},{136,11,332},{135,0,1958},{6,0,549},{8,0,34},{8,0,283},{9,0,
+165},{138,0,475},{10,0,952},{12,0,966},{140,0,994},{5,0,652},{5,0,701},{135,0,
+449},{4,0,655},{7,0,850},{17,0,75},{146,0,137},{4,0,146},{7,0,1618},{8,0,670},{5
+,10,41},{7,10,1459},{7,10,1469},{7,10,1859},{9,10,549},{139,10,905},{133,10,696}
+,{6,0,159},{6,0,364},{7,0,516},{137,0,518},{135,0,1439},{6,11,222},{7,11,636},{7
+,11,1620},{8,11,409},{9,11,693},{139,11,77},{13,0,151},{141,11,45},{6,0,1027},{4
+,11,336},{132,10,771},{139,11,392},{10,11,121},{11,11,175},{149,11,16},{8,0,950}
+,{138,0,983},{133,10,921},{135,0,993},{6,10,180},{7,10,1137},{8,10,751},{139,10,
+805},{7,0,501},{9,0,111},{10,0,141},{11,0,332},{13,0,43},{13,0,429},{14,0,130},{
+14,0,415},{145,0,102},{4,10,183},{5,11,882},{7,10,271},{11,10,824},{11,10,952},{
+13,10,278},{13,10,339},{13,10,482},{14,10,424},{148,10,99},{4,10,19},{5,10,477},
+{5,10,596},{6,10,505},{7,10,1221},{11,10,907},{12,10,209},{141,10,214},{135,10,
+1215},{133,0,452},{132,11,426},{5,0,149},{136,0,233},{133,0,935},{6,11,58},{7,11
+,654},{7,11,745},{7,11,1969},{8,11,240},{8,11,675},{9,11,479},{9,11,731},{10,11,
+330},{10,11,593},{10,11,817},{11,11,32},{11,11,133},{11,11,221},{145,11,68},{12,
+0,582},{18,0,131},{7,11,102},{137,11,538},{136,0,801},{134,10,1645},{132,0,70},{
+6,10,92},{6,10,188},{7,10,1269},{7,10,1524},{7,10,1876},{10,10,228},{139,10,1020
+},{4,10,459},{133,10,966},{138,0,369},{16,0,36},{140,10,330},{141,11,366},{7,0,
+721},{10,0,236},{12,0,204},{6,10,18},{7,10,932},{8,10,757},{9,10,54},{9,10,65},{
+9,10,844},{10,10,113},{10,10,315},{10,10,798},{11,10,153},{12,10,151},{12,10,392
+},{12,10,666},{142,10,248},{7,0,241},{10,0,430},{8,10,548},{9,10,532},{10,10,117
+},{11,10,351},{11,10,375},{143,10,23},{134,10,1742},{133,10,965},{133,11,566},{6
+,11,48},{135,11,63},{134,10,182},{10,10,65},{10,10,488},{138,10,497},{6,11,114},
+{7,11,1224},{7,11,1556},{136,11,3},{134,0,1817},{8,11,576},{137,11,267},{6,0,
+1078},{144,0,16},{9,10,588},{138,10,260},{138,0,1021},{5,0,406},{134,0,2022},{
+133,11,933},{6,0,69},{135,0,117},{7,0,1830},{136,11,427},{4,0,432},{135,0,824},{
+134,10,1786},{133,0,826},{139,11,67},{133,11,759},{135,10,308},{137,0,816},{133,
+0,1000},{4,0,297},{6,0,529},{7,0,152},{7,0,713},{7,0,1845},{8,0,710},{8,0,717},{
+12,0,639},{140,0,685},{7,0,423},{136,10,588},{136,10,287},{136,0,510},{134,0,
+1048},{6,0,618},{7,11,56},{7,11,1989},{8,11,337},{8,11,738},{9,11,600},{10,11,
+483},{12,11,37},{13,11,447},{142,11,92},{4,0,520},{135,0,575},{8,0,990},{138,0,
+977},{135,11,774},{9,11,347},{11,11,24},{140,11,170},{136,11,379},{140,10,290},{
+132,11,328},{4,0,321},{134,0,569},{4,11,101},{135,11,1171},{7,0,723},{7,0,1135},
+{5,11,833},{136,11,744},{7,10,719},{8,10,809},{136,10,834},{8,0,921},{136,10,796
+},{5,10,210},{6,10,213},{7,10,60},{10,10,364},{139,10,135},{5,0,397},{6,0,154},{
+7,0,676},{8,0,443},{8,0,609},{9,0,24},{9,0,325},{10,0,35},{11,0,535},{11,0,672},
+{11,0,1018},{12,0,637},{16,0,30},{5,10,607},{8,10,326},{136,10,490},{4,10,701},{
+5,10,472},{6,11,9},{6,11,397},{7,11,53},{7,11,1742},{9,10,758},{10,11,632},{11,
+11,828},{140,11,146},{135,10,380},{135,10,1947},{148,11,109},{10,10,278},{138,11
+,278},{134,0,856},{7,0,139},{4,10,386},{8,10,405},{8,10,728},{9,10,497},{11,10,
+110},{11,10,360},{15,10,37},{144,10,84},{141,0,282},{133,0,981},{5,0,288},{7,10,
+1452},{7,10,1480},{8,10,634},{140,10,472},{7,0,1890},{8,11,367},{10,11,760},{14,
+11,79},{20,11,17},{152,11,0},{4,10,524},{136,10,810},{4,0,56},{7,0,1791},{8,0,
+607},{8,0,651},{11,0,465},{11,0,835},{12,0,337},{141,0,480},{10,10,238},{141,10,
+33},{11,11,417},{12,11,223},{140,11,265},{9,0,158},{10,0,411},{140,0,261},{133,
+10,532},{133,10,997},{12,11,186},{12,11,292},{14,11,100},{146,11,70},{6,0,1403},
+{136,0,617},{134,0,1205},{139,0,563},{4,0,242},{134,0,333},{4,11,186},{5,11,157}
+,{8,11,168},{138,11,6},{132,0,369},{133,11,875},{5,10,782},{5,10,829},{134,10,
+1738},{134,0,622},{135,11,1272},{6,0,1407},{7,11,111},{136,11,581},{7,10,1823},{
+139,10,693},{7,0,160},{10,0,624},{142,0,279},{132,0,363},{10,11,589},{12,11,111}
+,{13,11,260},{14,11,82},{18,11,63},{147,11,45},{7,11,1364},{7,11,1907},{141,11,
+158},{4,11,404},{4,11,659},{135,11,675},{13,11,211},{14,11,133},{14,11,204},{15,
+11,64},{15,11,69},{15,11,114},{16,11,10},{19,11,23},{19,11,35},{19,11,39},{19,11
+,51},{19,11,71},{19,11,75},{152,11,15},{4,10,78},{5,10,96},{5,10,182},{7,10,1724
+},{7,10,1825},{10,10,394},{10,10,471},{11,10,532},{14,10,340},{145,10,88},{135,
+10,1964},{133,11,391},{11,11,887},{14,11,365},{142,11,375},{5,11,540},{6,11,1697
+},{7,11,222},{136,11,341},{134,11,78},{9,0,601},{9,0,619},{10,0,505},{10,0,732},
+{11,0,355},{140,0,139},{134,0,292},{139,0,174},{5,0,177},{6,0,616},{7,0,827},{9,
+0,525},{138,0,656},{10,0,31},{6,10,215},{7,10,1028},{7,10,1473},{7,10,1721},{9,
+10,424},{138,10,779},{135,10,584},{136,11,293},{134,0,685},{135,11,1868},{133,11
+,460},{7,0,647},{6,10,67},{7,10,1630},{9,10,354},{9,10,675},{10,10,830},{14,10,
+80},{145,10,80},{4,0,161},{133,0,631},{6,10,141},{7,10,225},{9,10,59},{9,10,607}
+,{10,10,312},{11,10,687},{12,10,555},{13,10,373},{13,10,494},{148,10,58},{7,11,
+965},{7,11,1460},{135,11,1604},{136,10,783},{134,11,388},{6,0,722},{6,0,1267},{4
+,11,511},{9,11,333},{9,11,379},{10,11,602},{11,11,441},{11,11,723},{11,11,976},{
+140,11,357},{134,0,1797},{135,0,1684},{9,0,469},{9,0,709},{12,0,512},{14,0,65},{
+17,0,12},{5,11,938},{136,11,707},{7,0,1230},{136,0,531},{10,0,229},{11,0,73},{11
+,0,376},{139,0,433},{12,0,268},{12,0,640},{142,0,119},{7,10,430},{139,10,46},{6,
+0,558},{7,0,651},{8,0,421},{9,0,0},{10,0,34},{139,0,1008},{6,0,106},{7,0,1786},{
+7,0,1821},{9,0,102},{9,0,763},{5,10,602},{7,10,2018},{137,10,418},{5,0,65},{6,0,
+416},{7,0,1720},{7,0,1924},{10,0,109},{11,0,14},{11,0,70},{11,0,569},{11,0,735},
+{15,0,153},{20,0,80},{136,10,677},{135,11,1625},{137,11,772},{136,0,595},{6,11,
+469},{7,11,1709},{138,11,515},{7,0,1832},{138,0,374},{9,0,106},{9,0,163},{9,0,
+296},{10,0,167},{10,0,172},{10,0,777},{139,0,16},{6,0,6},{7,0,81},{7,0,771},{7,0
+,1731},{9,0,405},{138,0,421},{4,11,500},{135,11,938},{5,11,68},{134,11,383},{5,0
+,881},{133,0,885},{6,0,854},{6,0,1132},{6,0,1495},{6,0,1526},{6,0,1533},{134,0,
+1577},{4,11,337},{6,11,353},{7,11,1934},{8,11,488},{137,11,429},{7,11,236},{7,11
+,1795},{8,11,259},{9,11,135},{9,11,177},{10,11,825},{11,11,115},{11,11,370},{11,
+11,405},{11,11,604},{12,11,10},{12,11,667},{12,11,669},{13,11,76},{14,11,310},{
+15,11,76},{15,11,147},{148,11,23},{5,0,142},{134,0,546},{4,11,15},{5,11,22},{6,
+11,244},{7,11,40},{7,11,200},{7,11,906},{7,11,1199},{9,11,616},{10,11,716},{11,
+11,635},{11,11,801},{140,11,458},{5,0,466},{11,0,571},{12,0,198},{13,0,283},{14,
+0,186},{15,0,21},{15,0,103},{135,10,329},{4,0,185},{5,0,257},{5,0,839},{5,0,936}
+,{9,0,399},{10,0,258},{10,0,395},{10,0,734},{11,0,1014},{12,0,23},{13,0,350},{14
+,0,150},{19,0,6},{135,11,1735},{12,11,36},{141,11,337},{5,11,598},{7,11,791},{8,
+11,108},{137,11,123},{132,10,469},{7,0,404},{7,0,1377},{7,0,1430},{7,0,2017},{8,
+0,149},{8,0,239},{8,0,512},{8,0,793},{8,0,818},{9,0,474},{9,0,595},{10,0,122},{
+10,0,565},{10,0,649},{10,0,783},{11,0,239},{11,0,295},{11,0,447},{11,0,528},{11,
+0,639},{11,0,800},{12,0,25},{12,0,77},{12,0,157},{12,0,256},{12,0,316},{12,0,390
+},{12,0,391},{12,0,395},{12,0,478},{12,0,503},{12,0,592},{12,0,680},{13,0,50},{
+13,0,53},{13,0,132},{13,0,198},{13,0,322},{13,0,415},{13,0,511},{14,0,71},{14,0,
+395},{15,0,71},{15,0,136},{17,0,123},{18,0,93},{147,0,58},{136,0,712},{134,10,
+1743},{5,10,929},{6,10,340},{8,10,376},{136,10,807},{6,0,1848},{8,0,860},{10,0,
+856},{10,0,859},{10,0,925},{10,0,941},{140,0,762},{6,0,629},{6,0,906},{9,0,810},
+{140,0,652},{5,10,218},{7,10,1610},{138,10,83},{7,10,1512},{135,10,1794},{4,0,
+377},{24,0,13},{4,11,155},{7,11,1689},{11,10,0},{144,10,78},{4,11,164},{5,11,151
+},{5,11,730},{5,11,741},{7,11,498},{7,11,870},{7,11,1542},{12,11,213},{14,11,36}
+,{14,11,391},{17,11,111},{18,11,6},{18,11,46},{18,11,151},{19,11,36},{20,11,32},
+{20,11,56},{20,11,69},{20,11,102},{21,11,4},{22,11,8},{22,11,10},{22,11,14},{150
+,11,31},{7,0,1842},{133,10,571},{4,10,455},{4,11,624},{135,11,1752},{134,0,1501}
+,{4,11,492},{5,11,451},{6,10,161},{7,10,372},{137,10,597},{132,10,349},{4,0,180}
+,{135,0,1906},{135,11,835},{141,11,70},{132,0,491},{137,10,751},{6,10,432},{139,
+10,322},{4,0,171},{138,0,234},{6,11,113},{135,11,436},{4,0,586},{7,0,1186},{138,
+0,631},{5,10,468},{10,10,325},{11,10,856},{12,10,345},{143,10,104},{5,10,223},{
+10,11,592},{10,11,753},{12,11,317},{12,11,355},{12,11,465},{12,11,469},{12,11,
+560},{12,11,578},{141,11,243},{132,10,566},{135,11,520},{4,10,59},{135,10,1394},
+{6,10,436},{139,10,481},{9,0,931},{10,0,334},{20,0,71},{4,10,48},{5,10,271},{7,
+10,953},{135,11,1878},{11,0,170},{5,10,610},{136,10,457},{133,10,755},{6,0,1587}
+,{135,10,1217},{4,10,197},{149,11,26},{133,11,585},{137,11,521},{133,0,765},{133
+,10,217},{139,11,586},{133,0,424},{9,11,752},{12,11,610},{13,11,431},{16,11,59},
+{146,11,109},{136,0,714},{7,0,685},{132,11,307},{9,0,420},{10,0,269},{10,0,285},
+{10,0,576},{11,0,397},{13,0,175},{145,0,90},{132,0,429},{133,11,964},{9,11,463},
+{138,11,595},{7,0,18},{7,0,699},{7,0,1966},{8,0,752},{9,0,273},{9,0,412},{9,0,
+703},{10,0,71},{10,0,427},{138,0,508},{4,10,165},{7,10,1398},{135,10,1829},{4,0,
+53},{5,0,186},{7,0,752},{7,0,828},{142,0,116},{8,0,575},{10,0,289},{139,0,319},{
+132,0,675},{134,0,1424},{4,11,75},{5,11,180},{6,11,500},{7,11,58},{7,11,710},{
+138,11,645},{133,11,649},{6,11,276},{7,11,282},{7,11,879},{7,11,924},{8,11,459},
+{9,11,599},{9,11,754},{11,11,574},{12,11,128},{12,11,494},{13,11,52},{13,11,301}
+,{15,11,30},{143,11,132},{6,0,647},{134,0,1095},{5,10,9},{7,10,297},{7,10,966},{
+140,10,306},{132,11,200},{134,0,1334},{5,10,146},{6,10,411},{138,10,721},{6,0,
+209},{6,0,1141},{6,0,1288},{8,0,468},{9,0,210},{11,0,36},{12,0,28},{12,0,630},{
+13,0,21},{13,0,349},{14,0,7},{145,0,13},{6,10,177},{135,10,467},{4,0,342},{135,0
+,1179},{10,11,454},{140,11,324},{4,0,928},{133,0,910},{7,0,1838},{6,11,225},{137
+,11,211},{16,0,101},{20,0,115},{20,0,118},{148,0,122},{4,0,496},{135,0,856},{4,0
+,318},{11,0,654},{7,11,718},{139,11,102},{8,11,58},{9,11,724},{11,11,809},{13,11
+,113},{145,11,72},{5,10,200},{6,11,345},{135,11,1247},{8,11,767},{8,11,803},{9,
+11,301},{137,11,903},{7,0,915},{8,0,247},{19,0,0},{7,11,1949},{136,11,674},{4,0,
+202},{5,0,382},{6,0,454},{7,0,936},{7,0,1803},{8,0,758},{9,0,375},{9,0,895},{10,
+0,743},{10,0,792},{11,0,978},{11,0,1012},{142,0,109},{7,0,1150},{7,0,1425},{7,0,
+1453},{140,0,513},{134,11,259},{138,0,791},{11,0,821},{12,0,110},{12,0,153},{18,
+0,41},{150,0,19},{134,10,481},{132,0,796},{6,0,445},{9,0,909},{136,11,254},{10,0
+,776},{13,0,345},{142,0,425},{4,10,84},{7,10,1482},{10,10,76},{138,10,142},{135,
+11,742},{6,0,578},{133,10,1015},{6,0,1387},{4,10,315},{5,10,507},{135,10,1370},{
+4,0,438},{133,0,555},{136,0,766},{133,11,248},{134,10,1722},{4,11,116},{5,11,95}
+,{5,11,445},{7,11,1688},{8,11,29},{9,11,272},{11,11,509},{139,11,915},{135,0,541
+},{133,11,543},{8,10,222},{8,10,476},{9,10,238},{11,10,516},{11,10,575},{15,10,
+109},{146,10,100},{6,0,880},{134,0,1191},{5,11,181},{136,11,41},{134,0,1506},{
+132,11,681},{7,11,25},{8,11,202},{138,11,536},{139,0,983},{137,0,768},{132,0,584
+},{9,11,423},{140,11,89},{8,11,113},{9,11,877},{10,11,554},{11,11,83},{12,11,136
+},{147,11,109},{7,10,706},{7,10,1058},{138,10,538},{133,11,976},{4,11,206},{135,
+11,746},{136,11,526},{140,0,737},{11,10,92},{11,10,196},{11,10,409},{11,10,450},
+{11,10,666},{11,10,777},{12,10,262},{13,10,385},{13,10,393},{15,10,115},{16,10,
+45},{145,10,82},{4,0,226},{4,0,326},{7,0,1770},{4,11,319},{5,11,699},{138,11,673
+},{6,10,40},{135,10,1781},{5,0,426},{8,0,30},{9,0,2},{11,0,549},{147,0,122},{6,0
+,1161},{134,0,1329},{138,10,97},{6,10,423},{7,10,665},{135,10,1210},{7,11,13},{8
+,11,226},{10,11,537},{11,11,570},{11,11,605},{11,11,799},{11,11,804},{12,11,85},
+{12,11,516},{12,11,623},{13,11,112},{13,11,361},{14,11,77},{14,11,78},{17,11,28}
+,{147,11,110},{132,11,769},{132,11,551},{132,11,728},{147,0,117},{9,11,57},{9,11
+,459},{10,11,425},{11,11,119},{12,11,184},{12,11,371},{13,11,358},{145,11,51},{5
+,11,188},{5,11,814},{8,11,10},{9,11,421},{9,11,729},{10,11,609},{139,11,689},{
+134,11,624},{135,11,298},{135,0,462},{4,0,345},{139,10,624},{136,10,574},{4,0,
+385},{7,0,265},{135,0,587},{6,0,808},{132,11,528},{133,0,398},{132,10,354},{4,0,
+347},{5,0,423},{5,0,996},{135,0,1329},{135,10,1558},{7,0,1259},{9,0,125},{139,0,
+65},{5,0,136},{6,0,136},{136,0,644},{5,11,104},{6,11,173},{135,11,1631},{135,0,
+469},{133,10,830},{4,0,278},{5,0,465},{135,0,1367},{7,11,810},{8,11,138},{8,11,
+342},{9,11,84},{10,11,193},{11,11,883},{140,11,359},{5,10,496},{135,10,203},{4,0
+,433},{133,0,719},{6,11,95},{134,10,547},{5,10,88},{137,10,239},{6,11,406},{10,
+11,409},{10,11,447},{11,11,44},{140,11,100},{134,0,1423},{7,10,650},{135,10,1310
+},{134,0,749},{135,11,1243},{135,0,1363},{6,0,381},{7,0,645},{7,0,694},{8,0,546}
+,{7,10,1076},{9,10,80},{11,10,78},{11,10,421},{11,10,534},{140,10,545},{134,11,
+1636},{135,11,1344},{12,0,277},{7,10,274},{11,10,479},{139,10,507},{6,0,705},{6,
+0,783},{6,0,1275},{6,0,1481},{4,11,282},{7,11,1034},{11,11,398},{11,11,634},{12,
+11,1},{12,11,79},{12,11,544},{14,11,237},{17,11,10},{146,11,20},{134,0,453},{4,0
+,555},{8,0,536},{10,0,288},{11,0,1005},{4,10,497},{135,10,1584},{5,11,118},{5,11
+,499},{6,11,476},{7,11,600},{7,11,888},{135,11,1096},{138,0,987},{7,0,1107},{7,
+10,261},{7,10,1115},{7,10,1354},{7,10,1588},{7,10,1705},{7,10,1902},{9,10,465},{
+10,10,248},{10,10,349},{10,10,647},{11,10,527},{11,10,660},{11,10,669},{12,10,
+529},{141,10,305},{7,11,296},{7,11,596},{8,11,560},{8,11,586},{9,11,612},{11,11,
+100},{11,11,304},{12,11,46},{13,11,89},{14,11,112},{145,11,122},{9,0,370},{138,0
+,90},{136,10,13},{132,0,860},{7,10,642},{8,10,250},{11,10,123},{11,10,137},{13,
+10,48},{142,10,95},{135,10,1429},{137,11,321},{132,0,257},{135,0,2031},{7,0,1768
+},{7,11,1599},{7,11,1723},{8,11,79},{8,11,106},{8,11,190},{8,11,302},{8,11,383},
+{9,11,119},{9,11,233},{9,11,298},{9,11,419},{9,11,471},{10,11,181},{10,11,406},{
+11,11,57},{11,11,85},{11,11,120},{11,11,177},{11,11,296},{11,11,382},{11,11,454}
+,{11,11,758},{11,11,999},{12,11,27},{12,11,98},{12,11,131},{12,11,245},{12,11,
+312},{12,11,446},{12,11,454},{13,11,25},{13,11,98},{13,11,426},{13,11,508},{14,
+11,6},{14,11,163},{14,11,272},{14,11,277},{14,11,370},{15,11,95},{15,11,138},{15
+,11,167},{17,11,18},{17,11,38},{20,11,96},{149,11,32},{5,11,722},{134,11,1759},{
+145,11,16},{6,0,1071},{134,0,1561},{10,10,545},{140,10,301},{6,0,83},{6,0,1733},
+{135,0,1389},{4,0,835},{135,0,1818},{133,11,258},{4,10,904},{133,10,794},{134,0,
+2006},{5,11,30},{7,11,495},{8,11,134},{9,11,788},{140,11,438},{135,11,2004},{137
+,0,696},{5,11,50},{6,11,439},{7,11,780},{135,11,1040},{7,11,772},{7,11,1104},{7,
+11,1647},{11,11,269},{11,11,539},{11,11,607},{11,11,627},{11,11,706},{11,11,975}
+,{12,11,248},{12,11,311},{12,11,434},{12,11,600},{12,11,622},{13,11,297},{13,11,
+367},{13,11,485},{14,11,69},{14,11,409},{143,11,108},{5,11,1},{6,11,81},{138,11,
+520},{7,0,1718},{9,0,95},{9,0,274},{10,0,279},{10,0,317},{10,0,420},{11,0,303},{
+11,0,808},{12,0,134},{12,0,367},{13,0,149},{13,0,347},{14,0,349},{14,0,406},{18,
+0,22},{18,0,89},{18,0,122},{147,0,47},{5,11,482},{8,11,98},{9,11,172},{10,11,222
+},{10,11,700},{10,11,822},{11,11,302},{11,11,778},{12,11,50},{12,11,127},{12,11,
+396},{13,11,62},{13,11,328},{14,11,122},{147,11,72},{7,10,386},{138,10,713},{6,
+10,7},{6,10,35},{7,10,147},{7,10,1069},{7,10,1568},{7,10,1575},{7,10,1917},{8,10
+,43},{8,10,208},{9,10,128},{9,10,866},{10,10,20},{11,10,981},{147,10,33},{133,0,
+26},{132,0,550},{5,11,2},{7,11,1494},{136,11,589},{6,11,512},{7,11,797},{8,11,
+253},{9,11,77},{10,11,1},{10,11,129},{10,11,225},{11,11,118},{11,11,226},{11,11,
+251},{11,11,430},{11,11,701},{11,11,974},{11,11,982},{12,11,64},{12,11,260},{12,
+11,488},{140,11,690},{7,10,893},{141,10,424},{134,0,901},{136,0,822},{4,0,902},{
+5,0,809},{134,0,122},{6,0,807},{134,0,1366},{7,0,262},{5,11,748},{134,11,553},{
+133,0,620},{4,0,34},{5,0,574},{7,0,279},{7,0,1624},{136,0,601},{9,0,170},{6,10,
+322},{9,10,552},{11,10,274},{13,10,209},{13,10,499},{14,10,85},{15,10,126},{145,
+10,70},{132,0,537},{4,11,12},{7,11,420},{7,11,522},{7,11,809},{8,11,797},{141,11
+,88},{133,0,332},{8,10,83},{8,10,742},{8,10,817},{9,10,28},{9,10,29},{9,10,885},
+{10,10,387},{11,10,633},{11,10,740},{13,10,235},{13,10,254},{15,10,143},{143,10,
+146},{6,0,1909},{9,0,964},{12,0,822},{12,0,854},{12,0,865},{12,0,910},{12,0,938}
+,{15,0,169},{15,0,208},{15,0,211},{18,0,205},{18,0,206},{18,0,220},{18,0,223},{
+152,0,24},{140,10,49},{5,11,528},{135,11,1580},{6,0,261},{8,0,182},{139,0,943},{
+134,0,1721},{4,0,933},{133,0,880},{136,11,321},{5,11,266},{9,11,290},{9,11,364},
+{10,11,293},{11,11,606},{142,11,45},{6,0,1609},{4,11,50},{6,11,510},{6,11,594},{
+9,11,121},{10,11,49},{10,11,412},{139,11,834},{7,0,895},{136,11,748},{132,11,466
+},{4,10,110},{10,10,415},{10,10,597},{142,10,206},{133,0,812},{135,11,281},{6,0,
+1890},{6,0,1902},{6,0,1916},{9,0,929},{9,0,942},{9,0,975},{9,0,984},{9,0,986},{9
+,0,1011},{9,0,1019},{12,0,804},{12,0,851},{12,0,867},{12,0,916},{12,0,923},{15,0
+,194},{15,0,204},{15,0,210},{15,0,222},{15,0,223},{15,0,229},{15,0,250},{18,0,
+179},{18,0,186},{18,0,192},{7,10,205},{135,10,2000},{132,11,667},{135,0,778},{4,
+0,137},{7,0,1178},{135,0,1520},{134,0,1314},{4,11,242},{134,11,333},{6,0,1661},{
+7,0,1975},{7,0,2009},{135,0,2011},{134,0,1591},{4,10,283},{135,10,1194},{11,0,
+820},{150,0,51},{4,11,39},{5,11,36},{7,11,1843},{8,11,407},{11,11,144},{140,11,
+523},{134,10,1720},{4,11,510},{7,11,29},{7,11,66},{7,11,1980},{10,11,487},{10,11
+,809},{146,11,9},{5,0,89},{7,0,1915},{9,0,185},{9,0,235},{10,0,64},{10,0,270},{
+10,0,403},{10,0,469},{10,0,529},{10,0,590},{11,0,140},{11,0,860},{13,0,1},{13,0,
+422},{14,0,341},{14,0,364},{17,0,93},{18,0,113},{19,0,97},{147,0,113},{133,0,695
+},{6,0,987},{134,0,1160},{5,0,6},{6,0,183},{7,0,680},{7,0,978},{7,0,1013},{7,0,
+1055},{12,0,230},{13,0,172},{146,0,29},{134,11,570},{132,11,787},{134,11,518},{6
+,0,29},{139,0,63},{132,11,516},{136,11,821},{132,0,311},{134,0,1740},{7,0,170},{
+8,0,90},{8,0,177},{8,0,415},{11,0,714},{14,0,281},{136,10,735},{134,0,1961},{135
+,11,1405},{4,11,10},{7,11,917},{139,11,786},{5,10,132},{9,10,486},{9,10,715},{10
+,10,458},{11,10,373},{11,10,668},{11,10,795},{11,10,897},{12,10,272},{12,10,424}
+,{12,10,539},{12,10,558},{14,10,245},{14,10,263},{14,10,264},{14,10,393},{142,10
+,403},{11,0,91},{13,0,129},{15,0,101},{145,0,125},{135,0,1132},{4,0,494},{6,0,74
+},{7,0,44},{7,0,407},{12,0,17},{15,0,5},{148,0,11},{133,10,379},{5,0,270},{5,11,
+684},{6,10,89},{6,10,400},{7,10,1569},{7,10,1623},{7,10,1850},{8,10,218},{8,10,
+422},{9,10,570},{138,10,626},{4,0,276},{133,0,296},{6,0,1523},{134,11,27},{6,10,
+387},{7,10,882},{141,10,111},{6,10,224},{7,10,877},{137,10,647},{135,10,790},{4,
+0,7},{5,0,90},{5,0,158},{6,0,542},{7,0,221},{7,0,1574},{9,0,490},{10,0,540},{11,
+0,443},{139,0,757},{7,0,588},{9,0,175},{138,0,530},{135,10,394},{142,11,23},{134
+,0,786},{135,0,580},{7,0,88},{136,0,627},{5,0,872},{6,0,57},{7,0,471},{9,0,447},
+{137,0,454},{6,11,342},{6,11,496},{8,11,275},{137,11,206},{4,11,909},{133,11,940
+},{6,0,735},{132,11,891},{8,0,845},{8,0,916},{135,10,1409},{5,0,31},{134,0,614},
+{11,0,458},{12,0,15},{140,0,432},{8,0,330},{140,0,477},{4,0,530},{5,0,521},{7,0,
+1200},{10,0,460},{132,11,687},{6,0,424},{135,0,1866},{9,0,569},{12,0,12},{12,0,
+81},{12,0,319},{13,0,69},{14,0,259},{16,0,87},{17,0,1},{17,0,21},{17,0,24},{18,0
+,15},{18,0,56},{18,0,59},{18,0,127},{18,0,154},{19,0,19},{148,0,31},{7,0,1302},{
+136,10,38},{134,11,253},{5,10,261},{7,10,78},{7,10,199},{8,10,815},{9,10,126},{
+138,10,342},{5,0,595},{135,0,1863},{6,11,41},{141,11,160},{5,0,13},{134,0,142},{
+6,0,97},{7,0,116},{8,0,322},{8,0,755},{9,0,548},{10,0,714},{11,0,884},{13,0,324}
+,{7,11,1304},{138,11,477},{132,10,628},{134,11,1718},{7,10,266},{136,10,804},{
+135,10,208},{7,0,1021},{6,10,79},{135,10,1519},{7,0,1472},{135,0,1554},{6,11,362
+},{146,11,51},{7,0,1071},{7,0,1541},{7,0,1767},{7,0,1806},{11,0,162},{11,0,242},
+{11,0,452},{12,0,605},{15,0,26},{144,0,44},{136,10,741},{133,11,115},{145,0,115}
+,{134,10,376},{6,0,1406},{134,0,1543},{5,11,193},{12,11,178},{13,11,130},{145,11
+,84},{135,0,1111},{8,0,1},{9,0,650},{10,0,326},{5,11,705},{137,11,606},{5,0,488}
+,{6,0,527},{7,0,489},{7,0,1636},{8,0,121},{8,0,144},{8,0,359},{9,0,193},{9,0,241
+},{9,0,336},{9,0,882},{11,0,266},{11,0,372},{11,0,944},{12,0,401},{140,0,641},{
+135,11,174},{6,0,267},{7,10,244},{7,10,632},{7,10,1609},{8,10,178},{8,10,638},{
+141,10,58},{134,0,1983},{134,0,1155},{134,0,1575},{134,0,1438},{9,0,31},{10,0,
+244},{10,0,699},{12,0,149},{141,0,497},{133,0,377},{4,11,122},{5,11,796},{5,11,
+952},{6,11,1660},{6,11,1671},{8,11,567},{9,11,687},{9,11,742},{10,11,686},{11,11
+,356},{11,11,682},{140,11,281},{145,0,101},{11,11,0},{144,11,78},{5,11,179},{5,
+10,791},{7,11,1095},{135,11,1213},{8,11,372},{9,11,122},{138,11,175},{7,10,686},
+{8,10,33},{8,10,238},{10,10,616},{11,10,467},{11,10,881},{13,10,217},{13,10,253}
+,{142,10,268},{9,0,476},{4,11,66},{7,11,722},{135,11,904},{7,11,352},{137,11,684
+},{135,0,2023},{135,0,1836},{132,10,447},{5,0,843},{144,0,35},{137,11,779},{141,
+11,35},{4,10,128},{5,10,415},{6,10,462},{7,10,294},{7,10,578},{10,10,710},{139,
+10,86},{132,0,554},{133,0,536},{136,10,587},{5,0,207},{9,0,79},{11,0,625},{145,0
+,7},{7,0,1371},{6,10,427},{138,10,692},{4,0,424},{4,10,195},{135,10,802},{8,0,
+785},{133,11,564},{135,0,336},{4,0,896},{6,0,1777},{134,11,556},{137,11,103},{
+134,10,1683},{7,11,544},{8,11,719},{138,11,61},{138,10,472},{4,11,5},{5,11,498},
+{136,11,637},{7,0,750},{9,0,223},{11,0,27},{11,0,466},{12,0,624},{14,0,265},{146
+,0,61},{12,0,238},{18,0,155},{12,11,238},{146,11,155},{151,10,28},{133,11,927},{
+12,0,383},{5,10,3},{8,10,578},{9,10,118},{10,10,705},{141,10,279},{4,11,893},{5,
+11,780},{133,11,893},{4,0,603},{133,0,661},{4,0,11},{6,0,128},{7,0,231},{7,0,
+1533},{10,0,725},{5,10,229},{5,11,238},{135,11,1350},{8,10,102},{10,10,578},{10,
+10,672},{12,10,496},{13,10,408},{14,10,121},{145,10,106},{132,0,476},{134,0,1552
+},{134,11,1729},{8,10,115},{8,10,350},{9,10,489},{10,10,128},{11,10,306},{12,10,
+373},{14,10,30},{17,10,79},{19,10,80},{150,10,55},{135,0,1807},{4,0,680},{4,11,
+60},{7,11,760},{7,11,1800},{8,11,314},{9,11,700},{139,11,487},{4,10,230},{5,10,
+702},{148,11,94},{132,11,228},{139,0,435},{9,0,20},{10,0,324},{10,0,807},{139,0,
+488},{6,10,1728},{136,11,419},{4,10,484},{18,10,26},{19,10,42},{20,10,43},{21,10
+,0},{23,10,27},{152,10,14},{135,0,1431},{133,11,828},{5,0,112},{6,0,103},{6,0,
+150},{7,0,1303},{9,0,292},{10,0,481},{20,0,13},{7,11,176},{7,11,178},{7,11,1110}
+,{10,11,481},{148,11,13},{138,0,356},{4,11,51},{5,11,39},{6,11,4},{7,11,591},{7,
+11,849},{7,11,951},{7,11,1129},{7,11,1613},{7,11,1760},{7,11,1988},{9,11,434},{
+10,11,754},{11,11,25},{11,11,37},{139,11,414},{6,0,1963},{134,0,2000},{132,10,
+633},{6,0,1244},{133,11,902},{135,11,928},{140,0,18},{138,0,204},{135,11,1173},{
+134,0,867},{4,0,708},{8,0,15},{9,0,50},{9,0,386},{11,0,18},{11,0,529},{140,0,228
+},{134,11,270},{4,0,563},{7,0,109},{7,0,592},{7,0,637},{7,0,770},{8,0,463},{9,0,
+60},{9,0,335},{9,0,904},{10,0,73},{11,0,434},{12,0,585},{13,0,331},{18,0,110},{
+148,0,60},{132,0,502},{14,11,359},{19,11,52},{148,11,47},{6,11,377},{7,11,1025},
+{9,11,613},{145,11,104},{6,0,347},{10,0,161},{5,10,70},{5,10,622},{6,10,334},{7,
+10,1032},{9,10,171},{11,10,26},{11,10,213},{11,10,637},{11,10,707},{12,10,202},{
+12,10,380},{13,10,226},{13,10,355},{14,10,222},{145,10,42},{132,11,416},{4,0,33}
+,{5,0,102},{6,0,284},{7,0,1079},{7,0,1423},{7,0,1702},{8,0,470},{9,0,554},{9,0,
+723},{11,0,333},{142,11,372},{5,11,152},{5,11,197},{7,11,340},{7,11,867},{10,11,
+548},{10,11,581},{11,11,6},{12,11,3},{12,11,19},{14,11,110},{142,11,289},{7,0,
+246},{135,0,840},{6,0,10},{8,0,571},{9,0,739},{143,0,91},{6,0,465},{7,0,1465},{4
+,10,23},{4,10,141},{5,10,313},{5,10,1014},{6,10,50},{7,10,142},{7,10,559},{8,10,
+640},{9,10,460},{9,10,783},{11,10,741},{12,10,183},{141,10,488},{133,0,626},{136
+,0,614},{138,0,237},{7,11,34},{7,11,190},{8,11,28},{8,11,141},{8,11,444},{8,11,
+811},{9,11,468},{11,11,334},{12,11,24},{12,11,386},{140,11,576},{133,11,757},{5,
+0,18},{6,0,526},{13,0,24},{13,0,110},{19,0,5},{147,0,44},{6,0,506},{134,11,506},
+{135,11,1553},{4,0,309},{5,0,462},{7,0,970},{7,0,1097},{22,0,30},{22,0,33},{7,11
+,1385},{11,11,582},{11,11,650},{11,11,901},{11,11,949},{12,11,232},{12,11,236},{
+13,11,413},{13,11,501},{146,11,116},{9,0,140},{5,10,222},{138,10,534},{6,0,1056}
+,{137,10,906},{134,0,1704},{138,10,503},{134,0,1036},{5,10,154},{7,10,1491},{10,
+10,379},{138,10,485},{4,11,383},{133,10,716},{134,0,1315},{5,0,86},{7,0,743},{9,
+0,85},{10,0,281},{10,0,432},{11,0,825},{12,0,251},{13,0,118},{142,0,378},{8,0,
+264},{4,10,91},{5,10,388},{5,10,845},{6,10,206},{6,10,252},{6,10,365},{7,10,136}
+,{7,10,531},{136,10,621},{5,0,524},{133,0,744},{5,11,277},{141,11,247},{132,11,
+435},{10,0,107},{140,0,436},{132,0,927},{10,0,123},{12,0,670},{146,0,94},{7,0,
+1149},{9,0,156},{138,0,957},{5,11,265},{6,11,212},{135,11,28},{133,0,778},{133,0
+,502},{8,0,196},{10,0,283},{139,0,406},{135,10,576},{136,11,535},{134,0,1312},{5
+,10,771},{5,10,863},{5,10,898},{6,10,1632},{6,10,1644},{134,10,1780},{5,0,855},{
+5,10,331},{135,11,1487},{132,11,702},{5,11,808},{135,11,2045},{7,0,1400},{9,0,
+446},{138,0,45},{140,10,632},{132,0,1003},{5,11,166},{8,11,739},{140,11,511},{5,
+10,107},{7,10,201},{136,10,518},{6,10,446},{135,10,1817},{134,0,1532},{134,0,
+1097},{4,11,119},{5,11,170},{5,11,447},{7,11,1708},{7,11,1889},{9,11,357},{9,11,
+719},{12,11,486},{140,11,596},{9,10,851},{141,10,510},{7,0,612},{8,0,545},{8,0,
+568},{8,0,642},{9,0,717},{10,0,541},{10,0,763},{11,0,449},{12,0,489},{13,0,153},
+{13,0,296},{14,0,138},{14,0,392},{15,0,50},{16,0,6},{16,0,12},{20,0,9},{132,10,
+504},{4,11,450},{135,11,1158},{11,0,54},{13,0,173},{13,0,294},{5,10,883},{5,10,
+975},{8,10,392},{148,10,7},{13,0,455},{15,0,99},{15,0,129},{144,0,68},{135,0,172
+},{132,11,754},{5,10,922},{134,10,1707},{134,0,1029},{17,11,39},{148,11,36},{4,0
+,568},{5,10,993},{7,10,515},{137,10,91},{132,0,732},{10,0,617},{138,11,617},{134
+,0,974},{7,0,989},{10,0,377},{12,0,363},{13,0,68},{13,0,94},{14,0,108},{142,0,
+306},{136,0,733},{132,0,428},{7,0,1789},{135,11,1062},{7,0,2015},{140,0,665},{
+135,10,1433},{5,0,287},{7,10,921},{8,10,580},{8,10,593},{8,10,630},{138,10,28},{
+138,0,806},{4,10,911},{5,10,867},{5,10,1013},{7,10,2034},{8,10,798},{136,10,813}
+,{134,0,1539},{8,11,523},{150,11,34},{135,11,740},{7,11,238},{7,11,2033},{8,11,
+120},{8,11,188},{8,11,659},{9,11,598},{10,11,466},{12,11,342},{12,11,588},{13,11
+,503},{14,11,246},{143,11,92},{7,0,1563},{141,0,182},{5,10,135},{6,10,519},{7,10
+,1722},{10,10,271},{11,10,261},{145,10,54},{14,10,338},{148,10,81},{7,0,484},{4,
+10,300},{133,10,436},{145,11,114},{6,0,1623},{134,0,1681},{133,11,640},{4,11,201
+},{7,11,1744},{8,11,602},{11,11,247},{11,11,826},{145,11,65},{8,11,164},{146,11,
+62},{6,0,1833},{6,0,1861},{136,0,878},{134,0,1569},{8,10,357},{10,10,745},{14,10
+,426},{17,10,94},{147,10,57},{12,0,93},{12,0,501},{13,0,362},{14,0,151},{15,0,40
+},{15,0,59},{16,0,46},{17,0,25},{18,0,14},{18,0,134},{19,0,25},{19,0,69},{20,0,
+16},{20,0,19},{20,0,66},{21,0,23},{21,0,25},{150,0,42},{6,0,1748},{8,0,715},{9,0
+,802},{10,0,46},{10,0,819},{13,0,308},{14,0,351},{14,0,363},{146,0,67},{132,0,
+994},{4,0,63},{133,0,347},{132,0,591},{133,0,749},{7,11,1577},{10,11,304},{10,11
+,549},{11,11,424},{12,11,365},{13,11,220},{13,11,240},{142,11,33},{133,0,366},{7
+,0,557},{12,0,547},{14,0,86},{133,10,387},{135,0,1747},{132,11,907},{5,11,100},{
+10,11,329},{12,11,416},{149,11,29},{4,10,6},{5,10,708},{136,10,75},{7,10,1351},{
+9,10,581},{10,10,639},{11,10,453},{140,10,584},{7,0,89},{132,10,303},{138,10,772
+},{132,11,176},{5,11,636},{5,11,998},{8,11,26},{137,11,358},{7,11,9},{7,11,1508}
+,{9,11,317},{10,11,210},{10,11,292},{10,11,533},{11,11,555},{12,11,526},{12,11,
+607},{13,11,263},{13,11,459},{142,11,271},{134,0,1463},{6,0,772},{6,0,1137},{139
+,11,595},{7,0,977},{139,11,66},{138,0,893},{20,0,48},{148,11,48},{5,0,824},{133,
+0,941},{134,11,295},{7,0,1543},{7,0,1785},{10,0,690},{4,10,106},{139,10,717},{7,
+0,440},{8,0,230},{139,0,106},{5,10,890},{133,10,988},{6,10,626},{142,10,431},{10
+,11,127},{141,11,27},{17,0,32},{10,10,706},{150,10,44},{132,0,216},{137,0,332},{
+4,10,698},{136,11,119},{139,11,267},{138,10,17},{11,11,526},{11,11,939},{141,11,
+290},{7,11,1167},{11,11,934},{13,11,391},{145,11,76},{139,11,39},{134,10,84},{4,
+0,914},{5,0,800},{133,0,852},{10,0,416},{141,0,115},{7,0,564},{142,0,168},{4,0,
+918},{133,0,876},{134,0,1764},{152,0,3},{4,0,92},{5,0,274},{7,11,126},{136,11,84
+},{140,10,498},{136,11,790},{8,0,501},{5,10,986},{6,10,130},{7,10,1582},{8,10,
+458},{10,10,101},{10,10,318},{138,10,823},{6,11,64},{12,11,377},{141,11,309},{5,
+0,743},{138,0,851},{4,0,49},{7,0,280},{135,0,1633},{134,0,879},{136,0,47},{7,10,
+1644},{137,10,129},{132,0,865},{134,0,1202},{9,11,34},{139,11,484},{135,10,997},
+{5,0,272},{5,0,908},{5,0,942},{8,0,197},{9,0,47},{11,0,538},{139,0,742},{6,11,
+1700},{7,11,26},{7,11,293},{7,11,382},{7,11,1026},{7,11,1087},{7,11,2027},{8,11,
+24},{8,11,114},{8,11,252},{8,11,727},{8,11,729},{9,11,30},{9,11,199},{9,11,231},
+{9,11,251},{9,11,334},{9,11,361},{9,11,488},{9,11,712},{10,11,55},{10,11,60},{10
+,11,232},{10,11,332},{10,11,384},{10,11,396},{10,11,504},{10,11,542},{10,11,652}
+,{11,11,20},{11,11,48},{11,11,207},{11,11,291},{11,11,298},{11,11,342},{11,11,
+365},{11,11,394},{11,11,620},{11,11,705},{11,11,1017},{12,11,123},{12,11,340},{
+12,11,406},{12,11,643},{13,11,61},{13,11,269},{13,11,311},{13,11,319},{13,11,486
+},{14,11,234},{15,11,62},{15,11,85},{16,11,71},{18,11,119},{148,11,105},{6,0,
+1455},{150,11,37},{135,10,1927},{135,0,1911},{137,0,891},{7,10,1756},{137,10,98}
+,{7,10,1046},{139,10,160},{132,0,761},{6,11,379},{7,11,270},{7,11,1116},{8,11,
+176},{8,11,183},{9,11,432},{9,11,661},{12,11,247},{12,11,617},{146,11,125},{6,10
+,45},{7,10,433},{8,10,129},{9,10,21},{10,10,392},{11,10,79},{12,10,499},{13,10,
+199},{141,10,451},{4,0,407},{5,11,792},{133,11,900},{132,0,560},{135,0,183},{13,
+0,490},{7,10,558},{136,10,353},{4,0,475},{6,0,731},{11,0,35},{13,0,71},{13,0,177
+},{14,0,422},{133,10,785},{8,10,81},{9,10,189},{9,10,201},{11,10,478},{11,10,712
+},{141,10,338},{4,0,418},{4,0,819},{133,10,353},{151,10,26},{4,11,901},{133,11,
+776},{132,0,575},{7,0,818},{16,0,92},{17,0,14},{17,0,45},{18,0,75},{148,0,18},{6
+,0,222},{7,0,636},{7,0,1620},{8,0,409},{9,0,693},{139,0,77},{6,10,25},{7,10,855}
+,{7,10,1258},{144,10,32},{6,0,1880},{6,0,1887},{6,0,1918},{6,0,1924},{9,0,967},{
+9,0,995},{9,0,1015},{12,0,826},{12,0,849},{12,0,857},{12,0,860},{12,0,886},{12,0
+,932},{18,0,228},{18,0,231},{146,0,240},{134,0,633},{134,0,1308},{4,11,37},{5,11
+,334},{135,11,1253},{10,0,86},{4,10,4},{7,10,1118},{7,10,1320},{7,10,1706},{8,10
+,277},{9,10,622},{11,10,724},{12,10,350},{12,10,397},{13,10,28},{13,10,159},{15,
+10,89},{18,10,5},{19,10,9},{20,10,34},{150,10,47},{132,11,508},{137,11,448},{12,
+11,107},{146,11,31},{132,0,817},{134,0,663},{133,0,882},{134,0,914},{132,11,540}
+,{132,11,533},{136,11,608},{8,0,885},{138,0,865},{132,0,426},{6,0,58},{7,0,745},
+{7,0,1969},{8,0,399},{8,0,675},{9,0,479},{9,0,731},{10,0,330},{10,0,593},{10,0,
+817},{11,0,32},{11,0,133},{11,0,221},{145,0,68},{134,10,255},{7,0,102},{137,0,
+538},{137,10,216},{7,11,253},{136,11,549},{135,11,912},{9,10,183},{139,10,286},{
+11,10,956},{151,10,3},{8,11,527},{18,11,60},{147,11,24},{4,10,536},{7,10,1141},{
+10,10,723},{139,10,371},{133,11,920},{7,0,876},{135,10,285},{135,10,560},{132,10
+,690},{142,11,126},{11,10,33},{12,10,571},{149,10,1},{133,0,566},{9,0,139},{10,0
+,399},{11,0,469},{12,0,634},{13,0,223},{132,11,483},{6,0,48},{135,0,63},{18,0,12
+},{7,10,1862},{12,10,491},{12,10,520},{13,10,383},{142,10,244},{135,11,1665},{
+132,11,448},{9,11,495},{146,11,104},{6,0,114},{7,0,1224},{7,0,1556},{136,0,3},{4
+,10,190},{133,10,554},{8,0,576},{9,0,267},{133,10,1001},{133,10,446},{133,0,933}
+,{139,11,1009},{8,11,653},{13,11,93},{147,11,14},{6,0,692},{6,0,821},{134,0,1077
+},{5,11,172},{135,11,801},{138,0,752},{4,0,375},{134,0,638},{134,0,1011},{140,11
+,540},{9,0,96},{133,11,260},{139,11,587},{135,10,1231},{12,0,30},{13,0,148},{14,
+0,87},{14,0,182},{16,0,42},{20,0,70},{132,10,304},{6,0,1398},{7,0,56},{7,0,1989}
+,{8,0,337},{8,0,738},{9,0,600},{12,0,37},{13,0,447},{142,0,92},{138,0,666},{5,0,
+394},{7,0,487},{136,0,246},{9,0,437},{6,10,53},{6,10,199},{7,10,1408},{8,10,32},
+{8,10,93},{10,10,397},{10,10,629},{11,10,593},{11,10,763},{13,10,326},{145,10,35
+},{134,10,105},{9,0,320},{10,0,506},{138,10,794},{7,11,57},{8,11,167},{8,11,375}
+,{9,11,82},{9,11,561},{10,11,620},{10,11,770},{11,10,704},{141,10,396},{6,0,1003
+},{5,10,114},{5,10,255},{141,10,285},{7,0,866},{135,0,1163},{133,11,531},{132,0,
+328},{7,10,2035},{8,10,19},{9,10,89},{138,10,831},{8,11,194},{136,11,756},{136,0
+,1000},{5,11,453},{134,11,441},{4,0,101},{5,0,833},{7,0,1171},{136,0,744},{133,0
+,726},{136,10,746},{138,0,176},{6,0,9},{6,0,397},{7,0,53},{7,0,1742},{10,0,632},
+{11,0,828},{140,0,146},{135,11,22},{145,11,64},{132,0,839},{11,0,417},{12,0,223}
+,{140,0,265},{4,11,102},{7,11,815},{7,11,1699},{139,11,964},{5,10,955},{136,10,
+814},{6,0,1931},{6,0,2007},{18,0,246},{146,0,247},{8,0,198},{11,0,29},{140,0,534
+},{135,0,1771},{6,0,846},{7,11,1010},{11,11,733},{11,11,759},{12,11,563},{13,11,
+34},{14,11,101},{18,11,45},{146,11,129},{4,0,186},{5,0,157},{8,0,168},{138,0,6},
+{132,11,899},{133,10,56},{148,10,100},{133,0,875},{5,0,773},{5,0,991},{6,0,1635}
+,{134,0,1788},{6,0,1274},{9,0,477},{141,0,78},{4,0,639},{7,0,111},{8,0,581},{12,
+0,177},{6,11,52},{9,11,104},{9,11,559},{10,10,4},{10,10,13},{11,10,638},{12,11,
+308},{19,11,87},{148,10,57},{132,11,604},{4,11,301},{133,10,738},{133,10,758},{
+134,0,1747},{7,11,1440},{11,11,854},{11,11,872},{11,11,921},{12,11,551},{13,11,
+472},{142,11,367},{7,0,1364},{7,0,1907},{141,0,158},{134,0,873},{4,0,404},{4,0,
+659},{7,0,552},{135,0,675},{135,10,1112},{139,10,328},{7,11,508},{137,10,133},{
+133,0,391},{5,10,110},{6,10,169},{6,10,1702},{7,10,400},{8,10,538},{9,10,184},{9
+,10,524},{140,10,218},{6,11,310},{7,11,1849},{8,11,72},{8,11,272},{8,11,431},{9,
+11,12},{9,11,351},{10,11,563},{10,11,630},{10,11,810},{11,11,367},{11,11,599},{
+11,11,686},{140,11,672},{5,0,540},{6,0,1697},{136,0,668},{132,0,883},{134,0,78},
+{12,0,628},{18,0,79},{6,10,133},{9,10,353},{139,10,993},{6,11,181},{7,11,537},{8
+,11,64},{9,11,127},{10,11,496},{12,11,510},{141,11,384},{6,10,93},{7,10,1422},{7
+,10,1851},{8,10,673},{9,10,529},{140,10,43},{137,10,371},{134,0,1460},{134,0,962
+},{4,11,244},{135,11,233},{9,10,25},{10,10,467},{138,10,559},{4,10,335},{135,10,
+942},{133,0,460},{135,11,334},{134,11,1650},{4,0,199},{139,0,34},{5,10,601},{8,
+10,39},{10,10,773},{11,10,84},{12,10,205},{142,10,1},{133,10,870},{134,0,388},{
+14,0,474},{148,0,120},{133,11,369},{139,0,271},{4,0,511},{9,0,333},{9,0,379},{10
+,0,602},{11,0,441},{11,0,723},{11,0,976},{12,0,357},{132,10,181},{134,0,608},{
+134,10,1652},{22,0,49},{137,11,338},{140,0,988},{134,0,617},{5,0,938},{136,0,707
+},{132,10,97},{5,10,147},{6,10,286},{7,10,1362},{141,10,176},{6,0,756},{134,0,
+1149},{133,11,896},{6,10,375},{7,10,169},{7,10,254},{136,10,780},{134,0,1583},{
+135,10,1447},{139,0,285},{7,11,1117},{8,11,393},{136,11,539},{135,0,344},{6,0,
+469},{7,0,1709},{138,0,515},{5,10,629},{135,10,1549},{5,11,4},{5,11,810},{6,11,
+13},{6,11,538},{6,11,1690},{6,11,1726},{7,11,499},{7,11,1819},{8,11,148},{8,11,
+696},{8,11,791},{12,11,125},{13,11,54},{143,11,9},{135,11,1268},{137,0,404},{132
+,0,500},{5,0,68},{134,0,383},{11,0,216},{139,0,340},{4,11,925},{5,11,803},{8,11,
+698},{138,11,828},{4,0,337},{6,0,353},{7,0,1934},{8,0,488},{137,0,429},{7,0,236}
+,{7,0,1795},{8,0,259},{9,0,135},{9,0,177},{9,0,860},{10,0,825},{11,0,115},{11,0,
+370},{11,0,405},{11,0,604},{12,0,10},{12,0,667},{12,0,669},{13,0,76},{14,0,310},
+{15,0,76},{15,0,147},{148,0,23},{4,0,15},{4,0,490},{5,0,22},{6,0,244},{7,0,40},{
+7,0,200},{7,0,906},{7,0,1199},{9,0,616},{10,0,716},{11,0,635},{11,0,801},{140,0,
+458},{12,0,756},{132,10,420},{134,0,1504},{6,0,757},{133,11,383},{6,0,1266},{135
+,0,1735},{5,0,598},{7,0,791},{8,0,108},{9,0,123},{7,10,1570},{140,10,542},{142,
+11,410},{9,11,660},{138,11,347}
+};
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_STATIC_DICT_LUT_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/utf8_util.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/utf8_util.c
new file mode 100755
index 0000000000..04a7805161
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/utf8_util.c
@@ -0,0 +1,85 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Heuristics for deciding about the UTF8-ness of strings. */
+
+#include "./utf8_util.h"
+
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static size_t BrotliParseAsUTF8(
+    int* symbol, const uint8_t* input, size_t size) {
+  /* ASCII */
+  if ((input[0] & 0x80) == 0) {
+    *symbol = input[0];
+    if (*symbol > 0) {
+      return 1;
+    }
+  }
+  /* 2-byte UTF8 */
+  if (size > 1u &&
+      (input[0] & 0xE0) == 0xC0 &&
+      (input[1] & 0xC0) == 0x80) {
+    *symbol = (((input[0] & 0x1F) << 6) |
+               (input[1] & 0x3F));
+    if (*symbol > 0x7F) {
+      return 2;
+    }
+  }
+  /* 3-byte UFT8 */
+  if (size > 2u &&
+      (input[0] & 0xF0) == 0xE0 &&
+      (input[1] & 0xC0) == 0x80 &&
+      (input[2] & 0xC0) == 0x80) {
+    *symbol = (((input[0] & 0x0F) << 12) |
+               ((input[1] & 0x3F) << 6) |
+               (input[2] & 0x3F));
+    if (*symbol > 0x7FF) {
+      return 3;
+    }
+  }
+  /* 4-byte UFT8 */
+  if (size > 3u &&
+      (input[0] & 0xF8) == 0xF0 &&
+      (input[1] & 0xC0) == 0x80 &&
+      (input[2] & 0xC0) == 0x80 &&
+      (input[3] & 0xC0) == 0x80) {
+    *symbol = (((input[0] & 0x07) << 18) |
+               ((input[1] & 0x3F) << 12) |
+               ((input[2] & 0x3F) << 6) |
+               (input[3] & 0x3F));
+    if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
+      return 4;
+    }
+  }
+  /* Not UTF8, emit a special symbol above the UTF8-code space */
+  *symbol = 0x110000 | input[0];
+  return 1;
+}
+
+/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
+BROTLI_BOOL BrotliIsMostlyUTF8(
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length, const double min_fraction) {
+  size_t size_utf8 = 0;
+  size_t i = 0;
+  while (i < length) {
+    int symbol;
+    size_t bytes_read =
+        BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
+    i += bytes_read;
+    if (symbol < 0x110000) size_utf8 += bytes_read;
+  }
+  return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/utf8_util.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/utf8_util.h
new file mode 100755
index 0000000000..8fda80c220
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/utf8_util.h
@@ -0,0 +1,32 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Heuristics for deciding about the UTF8-ness of strings. */
+
+#ifndef BROTLI_ENC_UTF8_UTIL_H_
+#define BROTLI_ENC_UTF8_UTIL_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const double kMinUTF8Ratio = 0.75;
+
+/* Returns 1 if at least min_fraction of the bytes between pos and
+   pos + length in the (data, mask) ring-buffer is UTF8-encoded, otherwise
+   returns 0. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliIsMostlyUTF8(
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length, const double min_fraction);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_UTF8_UTIL_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/write_bits.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/write_bits.h
new file mode 100755
index 0000000000..36515a6893
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/enc/write_bits.h
@@ -0,0 +1,85 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Write bits into a byte array. */
+
+#ifndef BROTLI_ENC_WRITE_BITS_H_
+#define BROTLI_ENC_WRITE_BITS_H_
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/*#define BIT_WRITER_DEBUG */
+
+/* This function writes bits into bytes in increasing addresses, and within
+   a byte least-significant-bit first.
+
+   The function can write up to 56 bits in one go with WriteBits
+   Example: let's assume that 3 bits (Rs below) have been written already:
+
+   BYTE-0     BYTE+1       BYTE+2
+
+   0000 0RRR    0000 0000    0000 0000
+
+   Now, we could write 5 or less bits in MSB by just sifting by 3
+   and OR'ing to BYTE-0.
+
+   For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+   and locate the rest in BYTE+1, BYTE+2, etc. */
+static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
+                                          uint64_t bits,
+                                          size_t* BROTLI_RESTRICT pos,
+                                          uint8_t* BROTLI_RESTRICT array) {
+#if defined(BROTLI_LITTLE_ENDIAN)
+  /* This branch of the code can write up to 56 bits at a time,
+     7 bits are lost by being perhaps already in *p and at least
+     1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+     bits are in *p and we write 57 bits, then the next write will
+     access a byte that was never initialized). */
+  uint8_t* p = &array[*pos >> 3];
+  uint64_t v = (uint64_t)(*p);  /* Zero-extend 8 to 64 bits. */
+  BROTLI_LOG(("WriteBits  %2d  0x%08x%08x  %10d\n", (int)n_bits,
+      (uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
+      (int)*pos));
+  BROTLI_DCHECK((bits >> n_bits) == 0);
+  BROTLI_DCHECK(n_bits <= 56);
+  v |= bits << (*pos & 7);
+  BROTLI_UNALIGNED_STORE64LE(p, v);  /* Set some bits. */
+  *pos += n_bits;
+#else
+  /* implicit & 0xFF is assumed for uint8_t arithmetics */
+  uint8_t* array_pos = &array[*pos >> 3];
+  const size_t bits_reserved_in_first_byte = (*pos & 7);
+  size_t bits_left_to_write;
+  bits <<= bits_reserved_in_first_byte;
+  *array_pos++ |= (uint8_t)bits;
+  for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
+       bits_left_to_write >= 9;
+       bits_left_to_write -= 8) {
+    bits >>= 8;
+    *array_pos++ = (uint8_t)bits;
+  }
+  *array_pos = 0;
+  *pos += n_bits;
+#endif
+}
+
+static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
+    size_t pos, uint8_t* array) {
+  BROTLI_LOG(("WriteBitsPrepareStorage            %10d\n", (int)pos));
+  BROTLI_DCHECK((pos & 7) == 0);
+  array[pos >> 3] = 0;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_WRITE_BITS_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/decode_fuzzer.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/decode_fuzzer.c
new file mode 100755
index 0000000000..46144e07eb
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/decode_fuzzer.c
@@ -0,0 +1,58 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <brotli/decode.h>
+
+// Entry point for LibFuzzer.
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  size_t addend = 0;
+  if (size > 0)
+    addend = data[size - 1] & 7;
+  const uint8_t* next_in = data;
+
+  const int kBufferSize = 1024;
+  uint8_t* buffer = (uint8_t*) malloc(kBufferSize);
+  if (!buffer) {
+    // OOM is out-of-scope here.
+    return 0;
+  }
+  /* The biggest "magic number" in brotli is 16MiB - 16, so no need to check
+     the cases with much longer output. */
+  const size_t total_out_limit = (addend == 0) ? (1 << 26) : (1 << 24);
+  size_t total_out = 0;
+
+  BrotliDecoderState* state = BrotliDecoderCreateInstance(0, 0, 0);
+
+  if (addend == 0)
+    addend = size;
+  /* Test both fast (addend == size) and slow (addend <= 7) decoding paths. */
+  for (size_t i = 0; i < size;) {
+    size_t next_i = i + addend;
+    if (next_i > size)
+      next_i = size;
+    size_t avail_in = next_i - i;
+    i = next_i;
+    BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
+    while (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+      size_t avail_out = kBufferSize;
+      uint8_t* next_out = buffer;
+      result = BrotliDecoderDecompressStream(
+          state, &avail_in, &next_in, &avail_out, &next_out, &total_out);
+      if (total_out > total_out_limit)
+        break;
+    }
+    if (total_out > total_out_limit)
+      break;
+    if (result != BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT)
+      break;
+  }
+
+  BrotliDecoderDestroyInstance(state);
+  free(buffer);
+  return 0;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/run_decode_fuzzer.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/run_decode_fuzzer.c
new file mode 100755
index 0000000000..c84f98a32b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/run_decode_fuzzer.c
@@ -0,0 +1,44 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Simple runner for decode_fuzzer.cc */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+void LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+
+int main(int argc, char* *argv) {
+  if (argc != 2) {
+    fprintf(stderr, "Exactly one argument is expected.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  FILE* f = fopen(argv[1], "r");
+  if (!f) {
+    fprintf(stderr, "Failed to open input file.");
+    exit(EXIT_FAILURE);
+  }
+
+  size_t max_len = 1 << 20;
+  unsigned char* tmp = (unsigned char*)malloc(max_len);
+  size_t len = fread(tmp, 1, max_len, f);
+  if (ferror(f)) {
+    fclose(f);
+    fprintf(stderr, "Failed read input file.");
+    exit(EXIT_FAILURE);
+  }
+  /* Make data after the end "inaccessible". */
+  unsigned char* data = (unsigned char*)malloc(len);
+  memcpy(data, tmp, len);
+  free(tmp);
+
+  LLVMFuzzerTestOneInput(data, len);
+  free(data);
+  exit(EXIT_SUCCESS);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/test_fuzzer.sh b/codec/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/test_fuzzer.sh
new file mode 100755
index 0000000000..9985194a19
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/fuzz/test_fuzzer.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+set -e
+
+export CC=${CC:-cc}
+
+BROTLI="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
+SRC=$BROTLI/c
+
+cd $BROTLI
+
+rm -rf bin
+mkdir bin
+cd bin
+
+cmake $BROTLI -DCMAKE_C_COMPILER="$CC" \
+    -DBUILD_TESTING=OFF -DENABLE_SANITIZER=address
+make -j$(nproc) brotlidec-static
+
+${CC} -o run_decode_fuzzer -std=c99 -fsanitize=address -I$SRC/include \
+    $SRC/fuzz/decode_fuzzer.c $SRC/fuzz/run_decode_fuzzer.c \
+    ./libbrotlidec-static.a ./libbrotlicommon-static.a
+
+mkdir decode_corpora
+unzip $BROTLI/java/org/brotli/integration/fuzz_data.zip -d decode_corpora
+
+for f in `ls decode_corpora`
+do
+ echo "Testing $f"
+ ./run_decode_fuzzer decode_corpora/$f
+done
+
+cd $BROTLI
+rm -rf bin
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/decode.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/decode.h
new file mode 100755
index 0000000000..0f5c8f9d11
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/decode.h
@@ -0,0 +1,344 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * API for Brotli decompression.
+ */
+
+#ifndef BROTLI_DEC_DECODE_H_
+#define BROTLI_DEC_DECODE_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Opaque structure that holds decoder state.
+ *
+ * Allocated and initialized with ::BrotliDecoderCreateInstance.
+ * Cleaned up and deallocated with ::BrotliDecoderDestroyInstance.
+ */
+typedef struct BrotliDecoderStateStruct BrotliDecoderState;
+
+/**
+ * Result type for ::BrotliDecoderDecompress and
+ * ::BrotliDecoderDecompressStream functions.
+ */
+typedef enum {
+  /** Decoding error, e.g. corrupted input or memory allocation problem. */
+  BROTLI_DECODER_RESULT_ERROR = 0,
+  /** Decoding successfully completed. */
+  BROTLI_DECODER_RESULT_SUCCESS = 1,
+  /** Partially done; should be called again with more input. */
+  BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT = 2,
+  /** Partially done; should be called again with more output. */
+  BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT = 3
+} BrotliDecoderResult;
+
+/**
+ * Template that evaluates items of ::BrotliDecoderErrorCode.
+ *
+ * Example: @code {.cpp}
+ * // Log Brotli error code.
+ * switch (brotliDecoderErrorCode) {
+ * #define CASE_(PREFIX, NAME, CODE) \
+ *   case BROTLI_DECODER ## PREFIX ## NAME: \
+ *     LOG(INFO) << "error code:" << #NAME; \
+ *     break;
+ * #define NEWLINE_
+ * BROTLI_DECODER_ERROR_CODES_LIST(CASE_, NEWLINE_)
+ * #undef CASE_
+ * #undef NEWLINE_
+ *   default: LOG(FATAL) << "unknown brotli error code";
+ * }
+ * @endcode
+ */
+#define BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE, SEPARATOR)      \
+  BROTLI_ERROR_CODE(_, NO_ERROR, 0) SEPARATOR                              \
+  /* Same as BrotliDecoderResult values */                                 \
+  BROTLI_ERROR_CODE(_, SUCCESS, 1) SEPARATOR                               \
+  BROTLI_ERROR_CODE(_, NEEDS_MORE_INPUT, 2) SEPARATOR                      \
+  BROTLI_ERROR_CODE(_, NEEDS_MORE_OUTPUT, 3) SEPARATOR                     \
+                                                                           \
+  /* Errors caused by invalid input */                                     \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, EXUBERANT_NIBBLE, -1) SEPARATOR        \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, RESERVED, -2) SEPARATOR                \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, EXUBERANT_META_NIBBLE, -3) SEPARATOR   \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, SIMPLE_HUFFMAN_ALPHABET, -4) SEPARATOR \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, SIMPLE_HUFFMAN_SAME, -5) SEPARATOR     \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, CL_SPACE, -6) SEPARATOR                \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, HUFFMAN_SPACE, -7) SEPARATOR           \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, CONTEXT_MAP_REPEAT, -8) SEPARATOR      \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, BLOCK_LENGTH_1, -9) SEPARATOR          \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, BLOCK_LENGTH_2, -10) SEPARATOR         \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, TRANSFORM, -11) SEPARATOR              \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, DICTIONARY, -12) SEPARATOR             \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, WINDOW_BITS, -13) SEPARATOR            \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_1, -14) SEPARATOR              \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR              \
+  BROTLI_ERROR_CODE(_ERROR_FORMAT_, DISTANCE, -16) SEPARATOR               \
+                                                                           \
+  /* -17..-18 codes are reserved */                                        \
+                                                                           \
+  BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR            \
+  BROTLI_ERROR_CODE(_ERROR_, INVALID_ARGUMENTS, -20) SEPARATOR             \
+                                                                           \
+  /* Memory allocation problems */                                         \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, CONTEXT_MODES, -21) SEPARATOR           \
+  /* Literal, insert and distance trees together */                        \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, TREE_GROUPS, -22) SEPARATOR             \
+  /* -23..-24 codes are reserved for distinct tree groups */               \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, CONTEXT_MAP, -25) SEPARATOR             \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, RING_BUFFER_1, -26) SEPARATOR           \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, RING_BUFFER_2, -27) SEPARATOR           \
+  /* -28..-29 codes are reserved for dynamic ring-buffer allocation */     \
+  BROTLI_ERROR_CODE(_ERROR_ALLOC_, BLOCK_TYPE_TREES, -30) SEPARATOR        \
+                                                                           \
+  /* "Impossible" states */                                                \
+  BROTLI_ERROR_CODE(_ERROR_, UNREACHABLE, -31)
+
+/**
+ * Error code for detailed logging / production debugging.
+ *
+ * See ::BrotliDecoderGetErrorCode and ::BROTLI_LAST_ERROR_CODE.
+ */
+typedef enum {
+#define BROTLI_COMMA_ ,
+#define BROTLI_ERROR_CODE_ENUM_ITEM_(PREFIX, NAME, CODE) \
+    BROTLI_DECODER ## PREFIX ## NAME = CODE
+  BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE_ENUM_ITEM_, BROTLI_COMMA_)
+} BrotliDecoderErrorCode;
+#undef BROTLI_ERROR_CODE_ENUM_ITEM_
+#undef BROTLI_COMMA_
+
+/**
+ * The value of the last error code, negative integer.
+ *
+ * All other error code values are in the range from ::BROTLI_LAST_ERROR_CODE
+ * to @c -1. There are also 4 other possible non-error codes @c 0 .. @c 3 in
+ * ::BrotliDecoderErrorCode enumeration.
+ */
+#define BROTLI_LAST_ERROR_CODE BROTLI_DECODER_ERROR_UNREACHABLE
+
+/** Options to be used with ::BrotliDecoderSetParameter. */
+typedef enum BrotliDecoderParameter {
+  /**
+   * Disable "canny" ring buffer allocation strategy.
+   *
+   * Ring buffer is allocated according to window size, despite the real size of
+   * the content.
+   */
+  BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0,
+  /**
+   * Flag that determines if "Large Window Brotli" is used.
+   */
+  BROTLI_DECODER_PARAM_LARGE_WINDOW = 1
+} BrotliDecoderParameter;
+
+/**
+ * Sets the specified parameter to the given decoder instance.
+ *
+ * @param state decoder instance
+ * @param param parameter to set
+ * @param value new parameter value
+ * @returns ::BROTLI_FALSE if parameter is unrecognized, or value is invalid
+ * @returns ::BROTLI_TRUE if value is accepted
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderSetParameter(
+    BrotliDecoderState* state, BrotliDecoderParameter param, uint32_t value);
+
+/**
+ * Creates an instance of ::BrotliDecoderState and initializes it.
+ *
+ * The instance can be used once for decoding and should then be destroyed with
+ * ::BrotliDecoderDestroyInstance, it cannot be reused for a new decoding
+ * session.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ * @returns @c 0 if instance can not be allocated or initialized
+ * @returns pointer to initialized ::BrotliDecoderState otherwise
+ */
+BROTLI_DEC_API BrotliDecoderState* BrotliDecoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+/**
+ * Deinitializes and frees ::BrotliDecoderState instance.
+ *
+ * @param state decoder instance to be cleaned up and deallocated
+ */
+BROTLI_DEC_API void BrotliDecoderDestroyInstance(BrotliDecoderState* state);
+
+/**
+ * Performs one-shot memory-to-memory decompression.
+ *
+ * Decompresses the data in @p encoded_buffer into @p decoded_buffer, and sets
+ * @p *decoded_size to the decompressed length.
+ *
+ * @param encoded_size size of @p encoded_buffer
+ * @param encoded_buffer compressed data buffer with at least @p encoded_size
+ *        addressable bytes
+ * @param[in, out] decoded_size @b in: size of @p decoded_buffer; \n
+ *                 @b out: length of decompressed data written to
+ *                 @p decoded_buffer
+ * @param decoded_buffer decompressed data destination buffer
+ * @returns ::BROTLI_DECODER_RESULT_ERROR if input is corrupted, memory
+ *          allocation failed, or @p decoded_buffer is not large enough;
+ * @returns ::BROTLI_DECODER_RESULT_SUCCESS otherwise
+ */
+BROTLI_DEC_API BrotliDecoderResult BrotliDecoderDecompress(
+    size_t encoded_size,
+    const uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(encoded_size)],
+    size_t* decoded_size,
+    uint8_t decoded_buffer[BROTLI_ARRAY_PARAM(*decoded_size)]);
+
+/**
+ * Decompresses the input stream to the output stream.
+ *
+ * The values @p *available_in and @p *available_out must specify the number of
+ * bytes addressable at @p *next_in and @p *next_out respectively.
+ * When @p *available_out is @c 0, @p next_out is allowed to be @c NULL.
+ *
+ * After each call, @p *available_in will be decremented by the amount of input
+ * bytes consumed, and the @p *next_in pointer will be incremented by that
+ * amount. Similarly, @p *available_out will be decremented by the amount of
+ * output bytes written, and the @p *next_out pointer will be incremented by
+ * that amount.
+ *
+ * @p total_out, if it is not a null-pointer, will be set to the number
+ * of bytes decompressed since the last @p state initialization.
+ *
+ * @note Input is never overconsumed, so @p next_in and @p available_in could be
+ * passed to the next consumer after decoding is complete.
+ *
+ * @param state decoder instance
+ * @param[in, out] available_in @b in: amount of available input; \n
+ *                 @b out: amount of unused input
+ * @param[in, out] next_in pointer to the next compressed byte
+ * @param[in, out] available_out @b in: length of output buffer; \n
+ *                 @b out: remaining size of output buffer
+ * @param[in, out] next_out output buffer cursor;
+ *                 can be @c NULL if @p available_out is @c 0
+ * @param[out] total_out number of bytes decompressed so far; can be @c NULL
+ * @returns ::BROTLI_DECODER_RESULT_ERROR if input is corrupted, memory
+ *          allocation failed, arguments were invalid, etc.;
+ *          use ::BrotliDecoderGetErrorCode to get detailed error code
+ * @returns ::BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT decoding is blocked until
+ *          more input data is provided
+ * @returns ::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT decoding is blocked until
+ *          more output space is provided
+ * @returns ::BROTLI_DECODER_RESULT_SUCCESS decoding is finished, no more
+ *          input might be consumed and no more output will be produced
+ */
+BROTLI_DEC_API BrotliDecoderResult BrotliDecoderDecompressStream(
+  BrotliDecoderState* state, size_t* available_in, const uint8_t** next_in,
+  size_t* available_out, uint8_t** next_out, size_t* total_out);
+
+/**
+ * Checks if decoder has more output.
+ *
+ * @param state decoder instance
+ * @returns ::BROTLI_TRUE, if decoder has some unconsumed output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderHasMoreOutput(
+    const BrotliDecoderState* state);
+
+/**
+ * Acquires pointer to internal output buffer.
+ *
+ * This method is used to make language bindings easier and more efficient:
+ *  -# push data to ::BrotliDecoderDecompressStream,
+ *     until ::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT is reported
+ *  -# use ::BrotliDecoderTakeOutput to peek bytes and copy to language-specific
+ *     entity
+ *
+ * Also this could be useful if there is an output stream that is able to
+ * consume all the provided data (e.g. when data is saved to file system).
+ *
+ * @attention After every call to ::BrotliDecoderTakeOutput @p *size bytes of
+ *            output are considered consumed for all consecutive calls to the
+ *            instance methods; returned pointer becomes invalidated as well.
+ *
+ * @note Decoder output is not guaranteed to be contiguous. This means that
+ *       after the size-unrestricted call to ::BrotliDecoderTakeOutput,
+ *       immediate next call to ::BrotliDecoderTakeOutput may return more data.
+ *
+ * @param state decoder instance
+ * @param[in, out] size @b in: number of bytes caller is ready to take, @c 0 if
+ *                 any amount could be handled; \n
+ *                 @b out: amount of data pointed by returned pointer and
+ *                 considered consumed; \n
+ *                 out value is never greater than in value, unless it is @c 0
+ * @returns pointer to output data
+ */
+BROTLI_DEC_API const uint8_t* BrotliDecoderTakeOutput(
+    BrotliDecoderState* state, size_t* size);
+
+/**
+ * Checks if instance has already consumed input.
+ *
+ * Instance that returns ::BROTLI_FALSE is considered "fresh" and could be
+ * reused.
+ *
+ * @param state decoder instance
+ * @returns ::BROTLI_TRUE if decoder has already used some input bytes
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderIsUsed(const BrotliDecoderState* state);
+
+/**
+ * Checks if decoder instance reached the final state.
+ *
+ * @param state decoder instance
+ * @returns ::BROTLI_TRUE if decoder is in a state where it reached the end of
+ *          the input and produced all of the output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderIsFinished(
+    const BrotliDecoderState* state);
+
+/**
+ * Acquires a detailed error code.
+ *
+ * Should be used only after ::BrotliDecoderDecompressStream returns
+ * ::BROTLI_DECODER_RESULT_ERROR.
+ *
+ * See also ::BrotliDecoderErrorString
+ *
+ * @param state decoder instance
+ * @returns last saved error code
+ */
+BROTLI_DEC_API BrotliDecoderErrorCode BrotliDecoderGetErrorCode(
+    const BrotliDecoderState* state);
+
+/**
+ * Converts error code to a c-string.
+ */
+BROTLI_DEC_API const char* BrotliDecoderErrorString(BrotliDecoderErrorCode c);
+
+/**
+ * Gets a decoder library version.
+ *
+ * Look at BROTLI_VERSION for more information.
+ */
+BROTLI_DEC_API uint32_t BrotliDecoderVersion(void);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif  /* BROTLI_DEC_DECODE_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/encode.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/encode.h
new file mode 100755
index 0000000000..0ced7e55be
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/encode.h
@@ -0,0 +1,427 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * API for Brotli compression.
+ */
+
+#ifndef BROTLI_ENC_ENCODE_H_
+#define BROTLI_ENC_ENCODE_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Minimal value for ::BROTLI_PARAM_LGWIN parameter. */
+#define BROTLI_MIN_WINDOW_BITS 10
+/**
+ * Maximal value for ::BROTLI_PARAM_LGWIN parameter.
+ *
+ * @note equal to @c BROTLI_MAX_DISTANCE_BITS constant.
+ */
+#define BROTLI_MAX_WINDOW_BITS 24
+/**
+ * Maximal value for ::BROTLI_PARAM_LGWIN parameter
+ * in "Large Window Brotli" (32-bit).
+ */
+#define BROTLI_LARGE_MAX_WINDOW_BITS 30
+/** Minimal value for ::BROTLI_PARAM_LGBLOCK parameter. */
+#define BROTLI_MIN_INPUT_BLOCK_BITS 16
+/** Maximal value for ::BROTLI_PARAM_LGBLOCK parameter. */
+#define BROTLI_MAX_INPUT_BLOCK_BITS 24
+/** Minimal value for ::BROTLI_PARAM_QUALITY parameter. */
+#define BROTLI_MIN_QUALITY 0
+/** Maximal value for ::BROTLI_PARAM_QUALITY parameter. */
+#define BROTLI_MAX_QUALITY 11
+
+/** Options for ::BROTLI_PARAM_MODE parameter. */
+typedef enum BrotliEncoderMode {
+  /**
+   * Default compression mode.
+   *
+   * In this mode compressor does not know anything in advance about the
+   * properties of the input.
+   */
+  BROTLI_MODE_GENERIC = 0,
+  /** Compression mode for UTF-8 formatted text input. */
+  BROTLI_MODE_TEXT = 1,
+  /** Compression mode used in WOFF 2.0. */
+  BROTLI_MODE_FONT = 2
+} BrotliEncoderMode;
+
+/** Default value for ::BROTLI_PARAM_QUALITY parameter. */
+#define BROTLI_DEFAULT_QUALITY 11
+/** Default value for ::BROTLI_PARAM_LGWIN parameter. */
+#define BROTLI_DEFAULT_WINDOW 22
+/** Default value for ::BROTLI_PARAM_MODE parameter. */
+#define BROTLI_DEFAULT_MODE BROTLI_MODE_GENERIC
+
+/** Operations that can be performed by streaming encoder. */
+typedef enum BrotliEncoderOperation {
+  /**
+   * Process input.
+   *
+   * Encoder may postpone producing output, until it has processed enough input.
+   */
+  BROTLI_OPERATION_PROCESS = 0,
+  /**
+   * Produce output for all processed input.
+   *
+   * Actual flush is performed when input stream is depleted and there is enough
+   * space in output stream. This means that client should repeat
+   * ::BROTLI_OPERATION_FLUSH operation until @p available_in becomes @c 0, and
+   * ::BrotliEncoderHasMoreOutput returns ::BROTLI_FALSE. If output is acquired
+   * via ::BrotliEncoderTakeOutput, then operation should be repeated after
+   * output buffer is drained.
+   *
+   * @warning Until flush is complete, client @b SHOULD @b NOT swap,
+   *          reduce or extend input stream.
+   *
+   * When flush is complete, output data will be sufficient for decoder to
+   * reproduce all the given input.
+   */
+  BROTLI_OPERATION_FLUSH = 1,
+  /**
+   * Finalize the stream.
+   *
+   * Actual finalization is performed when input stream is depleted and there is
+   * enough space in output stream. This means that client should repeat
+   * ::BROTLI_OPERATION_FINISH operation until @p available_in becomes @c 0, and
+   * ::BrotliEncoderHasMoreOutput returns ::BROTLI_FALSE. If output is acquired
+   * via ::BrotliEncoderTakeOutput, then operation should be repeated after
+   * output buffer is drained.
+   *
+   * @warning Until finalization is complete, client @b SHOULD @b NOT swap,
+   *          reduce or extend input stream.
+   *
+   * Helper function ::BrotliEncoderIsFinished checks if stream is finalized and
+   * output fully dumped.
+   *
+   * Adding more input data to finalized stream is impossible.
+   */
+  BROTLI_OPERATION_FINISH = 2,
+  /**
+   * Emit metadata block to stream.
+   *
+   * Metadata is opaque to Brotli: neither encoder, nor decoder processes this
+   * data or relies on it. It may be used to pass some extra information from
+   * encoder client to decoder client without interfering with main data stream.
+   *
+   * @note Encoder may emit empty metadata blocks internally, to pad encoded
+   *       stream to byte boundary.
+   *
+   * @warning Until emitting metadata is complete client @b SHOULD @b NOT swap,
+   *          reduce or extend input stream.
+   *
+   * @warning The whole content of input buffer is considered to be the content
+   *          of metadata block. Do @b NOT @e append metadata to input stream,
+   *          before it is depleted with other operations.
+   *
+   * Stream is soft-flushed before metadata block is emitted. Metadata block
+   * @b MUST be no longer than than 16MiB.
+   */
+  BROTLI_OPERATION_EMIT_METADATA = 3
+} BrotliEncoderOperation;
+
+/** Options to be used with ::BrotliEncoderSetParameter. */
+typedef enum BrotliEncoderParameter {
+  /**
+   * Tune encoder for specific input.
+   *
+   * ::BrotliEncoderMode enumerates all available values.
+   */
+  BROTLI_PARAM_MODE = 0,
+  /**
+   * The main compression speed-density lever.
+   *
+   * The higher the quality, the slower the compression. Range is
+   * from ::BROTLI_MIN_QUALITY to ::BROTLI_MAX_QUALITY.
+   */
+  BROTLI_PARAM_QUALITY = 1,
+  /**
+   * Recommended sliding LZ77 window size.
+   *
+   * Encoder may reduce this value, e.g. if input is much smaller than
+   * window size.
+   *
+   * Window size is `(1 << value) - 16`.
+   *
+   * Range is from ::BROTLI_MIN_WINDOW_BITS to ::BROTLI_MAX_WINDOW_BITS.
+   */
+  BROTLI_PARAM_LGWIN = 2,
+  /**
+   * Recommended input block size.
+   *
+   * Encoder may reduce this value, e.g. if input is much smaller than input
+   * block size.
+   *
+   * Range is from ::BROTLI_MIN_INPUT_BLOCK_BITS to
+   * ::BROTLI_MAX_INPUT_BLOCK_BITS.
+   *
+   * @note Bigger input block size allows better compression, but consumes more
+   *       memory. \n The rough formula of memory used for temporary input
+   *       storage is `3 << lgBlock`.
+   */
+  BROTLI_PARAM_LGBLOCK = 3,
+  /**
+   * Flag that affects usage of "literal context modeling" format feature.
+   *
+   * This flag is a "decoding-speed vs compression ratio" trade-off.
+   */
+  BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING = 4,
+  /**
+   * Estimated total input size for all ::BrotliEncoderCompressStream calls.
+   *
+   * The default value is 0, which means that the total input size is unknown.
+   */
+  BROTLI_PARAM_SIZE_HINT = 5,
+  /**
+   * Flag that determines if "Large Window Brotli" is used.
+   */
+  BROTLI_PARAM_LARGE_WINDOW = 6,
+  /**
+   * Recommended number of postfix bits (NPOSTFIX).
+   *
+   * Encoder may change this value.
+   *
+   * Range is from 0 to ::BROTLI_MAX_NPOSTFIX.
+   */
+  BROTLI_PARAM_NPOSTFIX = 7,
+  /**
+   * Recommended number of direct distance codes (NDIRECT).
+   *
+   * Encoder may change this value.
+   *
+   * Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX).
+   */
+  BROTLI_PARAM_NDIRECT = 8
+} BrotliEncoderParameter;
+
+/**
+ * Opaque structure that holds encoder state.
+ *
+ * Allocated and initialized with ::BrotliEncoderCreateInstance.
+ * Cleaned up and deallocated with ::BrotliEncoderDestroyInstance.
+ */
+typedef struct BrotliEncoderStateStruct BrotliEncoderState;
+
+/**
+ * Sets the specified parameter to the given encoder instance.
+ *
+ * @param state encoder instance
+ * @param param parameter to set
+ * @param value new parameter value
+ * @returns ::BROTLI_FALSE if parameter is unrecognized, or value is invalid
+ * @returns ::BROTLI_FALSE if value of parameter can not be changed at current
+ *          encoder state (e.g. when encoding is started, window size might be
+ *          already encoded and therefore it is impossible to change it)
+ * @returns ::BROTLI_TRUE if value is accepted
+ * @warning invalid values might be accepted in case they would not break
+ *          encoding process.
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderSetParameter(
+    BrotliEncoderState* state, BrotliEncoderParameter param, uint32_t value);
+
+/**
+ * Creates an instance of ::BrotliEncoderState and initializes it.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ * @returns @c 0 if instance can not be allocated or initialized
+ * @returns pointer to initialized ::BrotliEncoderState otherwise
+ */
+BROTLI_ENC_API BrotliEncoderState* BrotliEncoderCreateInstance(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+/**
+ * Deinitializes and frees ::BrotliEncoderState instance.
+ *
+ * @param state decoder instance to be cleaned up and deallocated
+ */
+BROTLI_ENC_API void BrotliEncoderDestroyInstance(BrotliEncoderState* state);
+
+/**
+ * Calculates the output size bound for the given @p input_size.
+ *
+ * @warning Result is only valid if quality is at least @c 2 and, in
+ *          case ::BrotliEncoderCompressStream was used, no flushes
+ *          (::BROTLI_OPERATION_FLUSH) were performed.
+ *
+ * @param input_size size of projected input
+ * @returns @c 0 if result does not fit @c size_t
+ */
+BROTLI_ENC_API size_t BrotliEncoderMaxCompressedSize(size_t input_size);
+
+/**
+ * Performs one-shot memory-to-memory compression.
+ *
+ * Compresses the data in @p input_buffer into @p encoded_buffer, and sets
+ * @p *encoded_size to the compressed length.
+ *
+ * @note If ::BrotliEncoderMaxCompressedSize(@p input_size) returns non-zero
+ *       value, then output is guaranteed to be no longer than that.
+ *
+ * @param quality quality parameter value, e.g. ::BROTLI_DEFAULT_QUALITY
+ * @param lgwin lgwin parameter value, e.g. ::BROTLI_DEFAULT_WINDOW
+ * @param mode mode parameter value, e.g. ::BROTLI_DEFAULT_MODE
+ * @param input_size size of @p input_buffer
+ * @param input_buffer input data buffer with at least @p input_size
+ *        addressable bytes
+ * @param[in, out] encoded_size @b in: size of @p encoded_buffer; \n
+ *                 @b out: length of compressed data written to
+ *                 @p encoded_buffer, or @c 0 if compression fails
+ * @param encoded_buffer compressed data destination buffer
+ * @returns ::BROTLI_FALSE in case of compression error
+ * @returns ::BROTLI_FALSE if output buffer is too small
+ * @returns ::BROTLI_TRUE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderCompress(
+    int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
+    const uint8_t input_buffer[BROTLI_ARRAY_PARAM(input_size)],
+    size_t* encoded_size,
+    uint8_t encoded_buffer[BROTLI_ARRAY_PARAM(*encoded_size)]);
+
+/**
+ * Compresses input stream to output stream.
+ *
+ * The values @p *available_in and @p *available_out must specify the number of
+ * bytes addressable at @p *next_in and @p *next_out respectively.
+ * When @p *available_out is @c 0, @p next_out is allowed to be @c NULL.
+ *
+ * After each call, @p *available_in will be decremented by the amount of input
+ * bytes consumed, and the @p *next_in pointer will be incremented by that
+ * amount. Similarly, @p *available_out will be decremented by the amount of
+ * output bytes written, and the @p *next_out pointer will be incremented by
+ * that amount.
+ *
+ * @p total_out, if it is not a null-pointer, will be set to the number
+ * of bytes compressed since the last @p state initialization.
+ *
+ *
+ *
+ * Internally workflow consists of 3 tasks:
+ *  -# (optionally) copy input data to internal buffer
+ *  -# actually compress data and (optionally) store it to internal buffer
+ *  -# (optionally) copy compressed bytes from internal buffer to output stream
+ *
+ * Whenever all 3 tasks can't move forward anymore, or error occurs, this
+ * method returns the control flow to caller.
+ *
+ * @p op is used to perform flush, finish the stream, or inject metadata block.
+ * See ::BrotliEncoderOperation for more information.
+ *
+ * Flushing the stream means forcing encoding of all input passed to encoder and
+ * completing the current output block, so it could be fully decoded by stream
+ * decoder. To perform flush set @p op to ::BROTLI_OPERATION_FLUSH.
+ * Under some circumstances (e.g. lack of output stream capacity) this operation
+ * would require several calls to ::BrotliEncoderCompressStream. The method must
+ * be called again until both input stream is depleted and encoder has no more
+ * output (see ::BrotliEncoderHasMoreOutput) after the method is called.
+ *
+ * Finishing the stream means encoding of all input passed to encoder and
+ * adding specific "final" marks, so stream decoder could determine that stream
+ * is complete. To perform finish set @p op to ::BROTLI_OPERATION_FINISH.
+ * Under some circumstances (e.g. lack of output stream capacity) this operation
+ * would require several calls to ::BrotliEncoderCompressStream. The method must
+ * be called again until both input stream is depleted and encoder has no more
+ * output (see ::BrotliEncoderHasMoreOutput) after the method is called.
+ *
+ * @warning When flushing and finishing, @p op should not change until operation
+ *          is complete; input stream should not be swapped, reduced or
+ *          extended as well.
+ *
+ * @param state encoder instance
+ * @param op requested operation
+ * @param[in, out] available_in @b in: amount of available input; \n
+ *                 @b out: amount of unused input
+ * @param[in, out] next_in pointer to the next input byte
+ * @param[in, out] available_out @b in: length of output buffer; \n
+ *                 @b out: remaining size of output buffer
+ * @param[in, out] next_out compressed output buffer cursor;
+ *                 can be @c NULL if @p available_out is @c 0
+ * @param[out] total_out number of bytes produced so far; can be @c NULL
+ * @returns ::BROTLI_FALSE if there was an error
+ * @returns ::BROTLI_TRUE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderCompressStream(
+    BrotliEncoderState* state, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
+    size_t* total_out);
+
+/**
+ * Checks if encoder instance reached the final state.
+ *
+ * @param state encoder instance
+ * @returns ::BROTLI_TRUE if encoder is in a state where it reached the end of
+ *          the input and produced all of the output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* state);
+
+/**
+ * Checks if encoder has more output.
+ *
+ * @param state encoder instance
+ * @returns ::BROTLI_TRUE, if encoder has some unconsumed output
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderHasMoreOutput(
+    BrotliEncoderState* state);
+
+/**
+ * Acquires pointer to internal output buffer.
+ *
+ * This method is used to make language bindings easier and more efficient:
+ *  -# push data to ::BrotliEncoderCompressStream,
+ *     until ::BrotliEncoderHasMoreOutput returns BROTL_TRUE
+ *  -# use ::BrotliEncoderTakeOutput to peek bytes and copy to language-specific
+ *     entity
+ *
+ * Also this could be useful if there is an output stream that is able to
+ * consume all the provided data (e.g. when data is saved to file system).
+ *
+ * @attention After every call to ::BrotliEncoderTakeOutput @p *size bytes of
+ *            output are considered consumed for all consecutive calls to the
+ *            instance methods; returned pointer becomes invalidated as well.
+ *
+ * @note Encoder output is not guaranteed to be contiguous. This means that
+ *       after the size-unrestricted call to ::BrotliEncoderTakeOutput,
+ *       immediate next call to ::BrotliEncoderTakeOutput may return more data.
+ *
+ * @param state encoder instance
+ * @param[in, out] size @b in: number of bytes caller is ready to take, @c 0 if
+ *                 any amount could be handled; \n
+ *                 @b out: amount of data pointed by returned pointer and
+ *                 considered consumed; \n
+ *                 out value is never greater than in value, unless it is @c 0
+ * @returns pointer to output data
+ */
+BROTLI_ENC_API const uint8_t* BrotliEncoderTakeOutput(
+    BrotliEncoderState* state, size_t* size);
+
+
+/**
+ * Gets an encoder library version.
+ *
+ * Look at BROTLI_VERSION for more information.
+ */
+BROTLI_ENC_API uint32_t BrotliEncoderVersion(void);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_ENCODE_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/port.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/port.h
new file mode 100755
index 0000000000..20dc2314d8
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/port.h
@@ -0,0 +1,274 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for compiler / platform specific API declarations. */
+
+#ifndef BROTLI_COMMON_PORT_H_
+#define BROTLI_COMMON_PORT_H_
+
+/* The following macros were borrowed from https://github.com/nemequ/hedley
+ * with permission of original author - Evan Nemerson <evan@nemerson.com> */
+
+/* >>> >>> >>> hedley macros */
+
+#define BROTLI_MAKE_VERSION(major, minor, revision) \
+  (((major) * 1000000) + ((minor) * 1000) + (revision))
+
+#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__)
+#define BROTLI_GNUC_VERSION \
+  BROTLI_MAKE_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
+#elif defined(__GNUC__)
+#define BROTLI_GNUC_VERSION BROTLI_MAKE_VERSION(__GNUC__, __GNUC_MINOR__, 0)
+#endif
+
+#if defined(BROTLI_GNUC_VERSION)
+#define BROTLI_GNUC_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_GNUC_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_GNUC_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000)
+#define BROTLI_MSVC_VERSION                                \
+  BROTLI_MAKE_VERSION((_MSC_FULL_VER / 10000000),          \
+                      (_MSC_FULL_VER % 10000000) / 100000, \
+                      (_MSC_FULL_VER % 100000) / 100)
+#elif defined(_MSC_FULL_VER)
+#define BROTLI_MSVC_VERSION                              \
+  BROTLI_MAKE_VERSION((_MSC_FULL_VER / 1000000),         \
+                      (_MSC_FULL_VER % 1000000) / 10000, \
+                      (_MSC_FULL_VER % 10000) / 10)
+#elif defined(_MSC_VER)
+#define BROTLI_MSVC_VERSION \
+  BROTLI_MAKE_VERSION(_MSC_VER / 100, _MSC_VER % 100, 0)
+#endif
+
+#if !defined(_MSC_VER)
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) (0)
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) \
+  (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch)))
+#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) \
+  (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch)))
+#else
+#define BROTLI_MSVC_VERSION_CHECK(major, minor, patch) \
+  (_MSC_VER >= ((major * 100) + (minor)))
+#endif
+
+#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE)
+#define BROTLI_INTEL_VERSION                   \
+  BROTLI_MAKE_VERSION(__INTEL_COMPILER / 100,  \
+                      __INTEL_COMPILER % 100,  \
+                      __INTEL_COMPILER_UPDATE)
+#elif defined(__INTEL_COMPILER)
+#define BROTLI_INTEL_VERSION \
+  BROTLI_MAKE_VERSION(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)
+#endif
+
+#if defined(BROTLI_INTEL_VERSION)
+#define BROTLI_INTEL_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_INTEL_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_INTEL_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__PGI) && \
+    defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__)
+#define BROTLI_PGI_VERSION \
+  BROTLI_MAKE_VERSION(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__)
+#endif
+
+#if defined(BROTLI_PGI_VERSION)
+#define BROTLI_PGI_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_PGI_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_PGI_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000)
+#define BROTLI_SUNPRO_VERSION                                       \
+  BROTLI_MAKE_VERSION(                                              \
+    (((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), \
+    (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf),   \
+    (__SUNPRO_C & 0xf) * 10)
+#elif defined(__SUNPRO_C)
+#define BROTLI_SUNPRO_VERSION                  \
+  BROTLI_MAKE_VERSION((__SUNPRO_C >> 8) & 0xf, \
+                      (__SUNPRO_C >> 4) & 0xf, \
+                      (__SUNPRO_C) & 0xf)
+#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000)
+#define BROTLI_SUNPRO_VERSION                                         \
+  BROTLI_MAKE_VERSION(                                                \
+    (((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), \
+    (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf),   \
+    (__SUNPRO_CC & 0xf) * 10)
+#elif defined(__SUNPRO_CC)
+#define BROTLI_SUNPRO_VERSION                   \
+  BROTLI_MAKE_VERSION((__SUNPRO_CC >> 8) & 0xf, \
+                      (__SUNPRO_CC >> 4) & 0xf, \
+                      (__SUNPRO_CC) & 0xf)
+#endif
+
+#if defined(BROTLI_SUNPRO_VERSION)
+#define BROTLI_SUNPRO_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_SUNPRO_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_SUNPRO_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION)
+#define BROTLI_ARM_VERSION                                       \
+  BROTLI_MAKE_VERSION((__ARMCOMPILER_VERSION / 1000000),         \
+                      (__ARMCOMPILER_VERSION % 1000000) / 10000, \
+                      (__ARMCOMPILER_VERSION % 10000) / 100)
+#elif defined(__CC_ARM) && defined(__ARMCC_VERSION)
+#define BROTLI_ARM_VERSION                                 \
+  BROTLI_MAKE_VERSION((__ARMCC_VERSION / 1000000),         \
+                      (__ARMCC_VERSION % 1000000) / 10000, \
+                      (__ARMCC_VERSION % 10000) / 100)
+#endif
+
+#if defined(BROTLI_ARM_VERSION)
+#define BROTLI_ARM_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_ARM_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_ARM_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__ibmxl__)
+#define BROTLI_IBM_VERSION                    \
+  BROTLI_MAKE_VERSION(__ibmxl_version__,      \
+                      __ibmxl_release__,      \
+                      __ibmxl_modification__)
+#elif defined(__xlC__) && defined(__xlC_ver__)
+#define BROTLI_IBM_VERSION \
+  BROTLI_MAKE_VERSION(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff)
+#elif defined(__xlC__)
+#define BROTLI_IBM_VERSION BROTLI_MAKE_VERSION(__xlC__ >> 8, __xlC__ & 0xff, 0)
+#endif
+
+#if defined(BROTLI_IBM_VERSION)
+#define BROTLI_IBM_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_IBM_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_IBM_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__TI_COMPILER_VERSION__)
+#define BROTLI_TI_VERSION                                         \
+  BROTLI_MAKE_VERSION((__TI_COMPILER_VERSION__ / 1000000),        \
+                      (__TI_COMPILER_VERSION__ % 1000000) / 1000, \
+                      (__TI_COMPILER_VERSION__ % 1000))
+#endif
+
+#if defined(BROTLI_TI_VERSION)
+#define BROTLI_TI_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_TI_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_TI_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__IAR_SYSTEMS_ICC__)
+#if __VER__ > 1000
+#define BROTLI_IAR_VERSION                     \
+  BROTLI_MAKE_VERSION((__VER__ / 1000000),     \
+                      (__VER__ / 1000) % 1000, \
+                      (__VER__ % 1000))
+#else
+#define BROTLI_IAR_VERSION BROTLI_MAKE_VERSION(VER / 100, __VER__ % 100, 0)
+#endif
+#endif
+
+#if defined(BROTLI_IAR_VERSION)
+#define BROTLI_IAR_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_IAR_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_IAR_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__TINYC__)
+#define BROTLI_TINYC_VERSION \
+  BROTLI_MAKE_VERSION(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100)
+#endif
+
+#if defined(BROTLI_TINYC_VERSION)
+#define BROTLI_TINYC_VERSION_CHECK(major, minor, patch) \
+  (BROTLI_TINYC_VERSION >= BROTLI_MAKE_VERSION(major, minor, patch))
+#else
+#define BROTLI_TINYC_VERSION_CHECK(major, minor, patch) (0)
+#endif
+
+#if defined(__has_attribute)
+#define BROTLI_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
+  __has_attribute(attribute)
+#else
+#define BROTLI_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \
+  BROTLI_GNUC_VERSION_CHECK(major, minor, patch)
+#endif
+
+#if defined(__has_builtin)
+#define BROTLI_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \
+  __has_builtin(builtin)
+#else
+#define BROTLI_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \
+  BROTLI_GNUC_VERSION_CHECK(major, minor, patch)
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define BROTLI_PUBLIC
+#elif BROTLI_GNUC_VERSION_CHECK(3, 3, 0) ||                         \
+    BROTLI_TI_VERSION_CHECK(8, 0, 0) ||                             \
+    BROTLI_INTEL_VERSION_CHECK(16, 0, 0) ||                         \
+    BROTLI_ARM_VERSION_CHECK(4, 1, 0) ||                            \
+    BROTLI_IBM_VERSION_CHECK(13, 1, 0) ||                           \
+    BROTLI_SUNPRO_VERSION_CHECK(5, 11, 0) ||                        \
+    (BROTLI_TI_VERSION_CHECK(7, 3, 0) &&                            \
+     defined(__TI_GNU_ATTRIBUTE_SUPPORT__) && defined(__TI_EABI__))
+#define BROTLI_PUBLIC __attribute__ ((visibility ("default")))
+#else
+#define BROTLI_PUBLIC
+#endif
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+    !defined(__STDC_NO_VLA__) && !defined(__cplusplus) &&         \
+    !defined(__PGI) && !defined(__PGIC__) && !defined(__TINYC__)
+#define BROTLI_ARRAY_PARAM(name) (name)
+#else
+#define BROTLI_ARRAY_PARAM(name)
+#endif
+
+/* <<< <<< <<< end of hedley macros. */
+
+#if defined(BROTLI_SHARED_COMPILATION)
+#if defined(_WIN32)
+#if defined(BROTLICOMMON_SHARED_COMPILATION)
+#define BROTLI_COMMON_API __declspec(dllexport)
+#else
+#define BROTLI_COMMON_API __declspec(dllimport)
+#endif  /* BROTLICOMMON_SHARED_COMPILATION */
+#if defined(BROTLIDEC_SHARED_COMPILATION)
+#define BROTLI_DEC_API __declspec(dllexport)
+#else
+#define BROTLI_DEC_API __declspec(dllimport)
+#endif  /* BROTLIDEC_SHARED_COMPILATION */
+#if defined(BROTLIENC_SHARED_COMPILATION)
+#define BROTLI_ENC_API __declspec(dllexport)
+#else
+#define BROTLI_ENC_API __declspec(dllimport)
+#endif  /* BROTLIENC_SHARED_COMPILATION */
+#else  /* _WIN32 */
+#define BROTLI_COMMON_API BROTLI_PUBLIC
+#define BROTLI_DEC_API BROTLI_PUBLIC
+#define BROTLI_ENC_API BROTLI_PUBLIC
+#endif  /* _WIN32 */
+#else  /* BROTLI_SHARED_COMPILATION */
+#define BROTLI_COMMON_API
+#define BROTLI_DEC_API
+#define BROTLI_ENC_API
+#endif
+
+#endif  /* BROTLI_COMMON_PORT_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/types.h b/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/types.h
new file mode 100755
index 0000000000..eff1a3cd07
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/include/brotli/types.h
@@ -0,0 +1,83 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/**
+ * @file
+ * Common types used in decoder and encoder API.
+ */
+
+#ifndef BROTLI_COMMON_TYPES_H_
+#define BROTLI_COMMON_TYPES_H_
+
+#include <stddef.h>  /* for size_t */
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+typedef __int64 int64_t;
+#else
+#include <stdint.h>
+#endif  /* defined(_MSC_VER) && (_MSC_VER < 1600) */
+
+/**
+ * A portable @c bool replacement.
+ *
+ * ::BROTLI_BOOL is a "documentation" type: actually it is @c int, but in API it
+ * denotes a type, whose only values are ::BROTLI_TRUE and ::BROTLI_FALSE.
+ *
+ * ::BROTLI_BOOL values passed to Brotli should either be ::BROTLI_TRUE or
+ * ::BROTLI_FALSE, or be a result of ::TO_BROTLI_BOOL macros.
+ *
+ * ::BROTLI_BOOL values returned by Brotli should not be tested for equality
+ * with @c true, @c false, ::BROTLI_TRUE, ::BROTLI_FALSE, but rather should be
+ * evaluated, for example: @code{.cpp}
+ * if (SomeBrotliFunction(encoder, BROTLI_TRUE) &&
+ *     !OtherBrotliFunction(decoder, BROTLI_FALSE)) {
+ *   bool x = !!YetAnotherBrotliFunction(encoder, TO_BROLTI_BOOL(2 * 2 == 4));
+ *   DoSomething(x);
+ * }
+ * @endcode
+ */
+#define BROTLI_BOOL int
+/** Portable @c true replacement. */
+#define BROTLI_TRUE 1
+/** Portable @c false replacement. */
+#define BROTLI_FALSE 0
+/** @c bool to ::BROTLI_BOOL conversion macros. */
+#define TO_BROTLI_BOOL(X) (!!(X) ? BROTLI_TRUE : BROTLI_FALSE)
+
+#define BROTLI_MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
+
+#define BROTLI_UINT32_MAX (~((uint32_t)0))
+#define BROTLI_SIZE_MAX (~((size_t)0))
+
+/**
+ * Allocating function pointer type.
+ *
+ * @param opaque custom memory manager handle provided by client
+ * @param size requested memory region size; can not be @c 0
+ * @returns @c 0 in the case of failure
+ * @returns a valid pointer to a memory region of at least @p size bytes
+ *          long otherwise
+ */
+typedef void* (*brotli_alloc_func)(void* opaque, size_t size);
+
+/**
+ * Deallocating function pointer type.
+ *
+ * This function @b SHOULD do nothing if @p address is @c 0.
+ *
+ * @param opaque custom memory manager handle provided by client
+ * @param address memory region pointer returned by ::brotli_alloc_func, or @c 0
+ */
+typedef void (*brotli_free_func)(void* opaque, void* address);
+
+#endif  /* BROTLI_COMMON_TYPES_H_ */
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/tools/brotli.c b/codec/L2/demos/pikEnc/host/third_party/brotli/c/tools/brotli.c
new file mode 100755
index 0000000000..ce05b641b2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/tools/brotli.c
@@ -0,0 +1,1061 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Command line interface for Brotli library. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include "../common/constants.h"
+#include "../common/version.h"
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <utime.h>
+#define MAKE_BINARY(FILENO) (FILENO)
+#else
+#include <io.h>
+#include <share.h>
+#include <sys/utime.h>
+
+#define MAKE_BINARY(FILENO) (_setmode((FILENO), _O_BINARY), (FILENO))
+
+#if !defined(__MINGW32__)
+#define STDIN_FILENO _fileno(stdin)
+#define STDOUT_FILENO _fileno(stdout)
+#define S_IRUSR S_IREAD
+#define S_IWUSR S_IWRITE
+#endif
+
+#define fdopen _fdopen
+#define isatty _isatty
+#define unlink _unlink
+#define utimbuf _utimbuf
+#define utime _utime
+
+#define fopen ms_fopen
+#define open ms_open
+
+#define chmod(F, P) (0)
+#define chown(F, O, G) (0)
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+#define fseek _fseeki64
+#define ftell _ftelli64
+#endif
+
+static FILE* ms_fopen(const char* filename, const char* mode) {
+  FILE* result = 0;
+  fopen_s(&result, filename, mode);
+  return result;
+}
+
+static int ms_open(const char* filename, int oflag, int pmode) {
+  int result = -1;
+  _sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode);
+  return result;
+}
+#endif  /* WIN32 */
+
+typedef enum {
+  COMMAND_COMPRESS,
+  COMMAND_DECOMPRESS,
+  COMMAND_HELP,
+  COMMAND_INVALID,
+  COMMAND_TEST_INTEGRITY,
+  COMMAND_NOOP,
+  COMMAND_VERSION
+} Command;
+
+#define DEFAULT_LGWIN 24
+#define DEFAULT_SUFFIX ".br"
+#define MAX_OPTIONS 20
+
+typedef struct {
+  /* Parameters */
+  int quality;
+  int lgwin;
+  BROTLI_BOOL force_overwrite;
+  BROTLI_BOOL junk_source;
+  BROTLI_BOOL copy_stat;
+  BROTLI_BOOL verbose;
+  BROTLI_BOOL write_to_stdout;
+  BROTLI_BOOL test_integrity;
+  BROTLI_BOOL decompress;
+  BROTLI_BOOL large_window;
+  const char* output_path;
+  const char* suffix;
+  int not_input_indices[MAX_OPTIONS];
+  size_t longest_path_len;
+  size_t input_count;
+
+  /* Inner state */
+  int argc;
+  char** argv;
+  char* modified_path;  /* Storage for path with appended / cut suffix */
+  int iterator;
+  int ignore;
+  BROTLI_BOOL iterator_error;
+  uint8_t* buffer;
+  uint8_t* input;
+  uint8_t* output;
+  const char* current_input_path;
+  const char* current_output_path;
+  int64_t input_file_length;  /* -1, if impossible to calculate */
+  FILE* fin;
+  FILE* fout;
+
+  /* I/O buffers */
+  size_t available_in;
+  const uint8_t* next_in;
+  size_t available_out;
+  uint8_t* next_out;
+} Context;
+
+/* Parse up to 5 decimal digits. */
+static BROTLI_BOOL ParseInt(const char* s, int low, int high, int* result) {
+  int value = 0;
+  int i;
+  for (i = 0; i < 5; ++i) {
+    char c = s[i];
+    if (c == 0) break;
+    if (s[i] < '0' || s[i] > '9') return BROTLI_FALSE;
+    value = (10 * value) + (c - '0');
+  }
+  if (i == 0) return BROTLI_FALSE;
+  if (i > 1 && s[0] == '0') return BROTLI_FALSE;
+  if (s[i] != 0) return BROTLI_FALSE;
+  if (value < low || value > high) return BROTLI_FALSE;
+  *result = value;
+  return BROTLI_TRUE;
+}
+
+/* Returns "base file name" or its tail, if it contains '/' or '\'. */
+static const char* FileName(const char* path) {
+  const char* separator_position = strrchr(path, '/');
+  if (separator_position) path = separator_position + 1;
+  separator_position = strrchr(path, '\\');
+  if (separator_position) path = separator_position + 1;
+  return path;
+}
+
+/* Detect if the program name is a special alias that infers a command type. */
+static Command ParseAlias(const char* name) {
+  /* TODO: cast name to lower case? */
+  const char* unbrotli = "unbrotli";
+  size_t unbrotli_len = strlen(unbrotli);
+  name = FileName(name);
+  /* Partial comparison. On Windows there could be ".exe" suffix. */
+  if (strncmp(name, unbrotli, unbrotli_len) == 0) {
+    char terminator = name[unbrotli_len];
+    if (terminator == 0 || terminator == '.') return COMMAND_DECOMPRESS;
+  }
+  return COMMAND_COMPRESS;
+}
+
+static Command ParseParams(Context* params) {
+  int argc = params->argc;
+  char** argv = params->argv;
+  int i;
+  int next_option_index = 0;
+  size_t input_count = 0;
+  size_t longest_path_len = 1;
+  BROTLI_BOOL command_set = BROTLI_FALSE;
+  BROTLI_BOOL quality_set = BROTLI_FALSE;
+  BROTLI_BOOL output_set = BROTLI_FALSE;
+  BROTLI_BOOL keep_set = BROTLI_FALSE;
+  BROTLI_BOOL lgwin_set = BROTLI_FALSE;
+  BROTLI_BOOL suffix_set = BROTLI_FALSE;
+  BROTLI_BOOL after_dash_dash = BROTLI_FALSE;
+  Command command = ParseAlias(argv[0]);
+
+  for (i = 1; i < argc; ++i) {
+    const char* arg = argv[i];
+    /* C99 5.1.2.2.1: "members argv[0] through argv[argc-1] inclusive shall
+       contain pointers to strings"; NULL and 0-length are not forbidden. */
+    size_t arg_len = arg ? strlen(arg) : 0;
+
+    if (arg_len == 0) {
+      params->not_input_indices[next_option_index++] = i;
+      continue;
+    }
+
+    /* Too many options. The expected longest option list is:
+       "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total.
+       This check is an additional guard that is never triggered, but provides
+       a guard for future changes. */
+    if (next_option_index > (MAX_OPTIONS - 2)) {
+      fprintf(stderr, "too many options passed\n");
+      return COMMAND_INVALID;
+    }
+
+    /* Input file entry. */
+    if (after_dash_dash || arg[0] != '-' || arg_len == 1) {
+      input_count++;
+      if (longest_path_len < arg_len) longest_path_len = arg_len;
+      continue;
+    }
+
+    /* Not a file entry. */
+    params->not_input_indices[next_option_index++] = i;
+
+    /* '--' entry stop parsing arguments. */
+    if (arg_len == 2 && arg[1] == '-') {
+      after_dash_dash = BROTLI_TRUE;
+      continue;
+    }
+
+    /* Simple / coalesced options. */
+    if (arg[1] != '-') {
+      size_t j;
+      for (j = 1; j < arg_len; ++j) {
+        char c = arg[j];
+        if (c >= '0' && c <= '9') {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = BROTLI_TRUE;
+          params->quality = c - '0';
+          continue;
+        } else if (c == 'c') {
+          if (output_set) {
+            fprintf(stderr, "write to standard output already set\n");
+            return COMMAND_INVALID;
+          }
+          output_set = BROTLI_TRUE;
+          params->write_to_stdout = BROTLI_TRUE;
+          continue;
+        } else if (c == 'd') {
+          if (command_set) {
+            fprintf(stderr, "command already set when parsing -d\n");
+            return COMMAND_INVALID;
+          }
+          command_set = BROTLI_TRUE;
+          command = COMMAND_DECOMPRESS;
+          continue;
+        } else if (c == 'f') {
+          if (params->force_overwrite) {
+            fprintf(stderr, "force output overwrite already set\n");
+            return COMMAND_INVALID;
+          }
+          params->force_overwrite = BROTLI_TRUE;
+          continue;
+        } else if (c == 'h') {
+          /* Don't parse further. */
+          return COMMAND_HELP;
+        } else if (c == 'j' || c == 'k') {
+          if (keep_set) {
+            fprintf(stderr, "argument --rm / -j or --keep / -n already set\n");
+            return COMMAND_INVALID;
+          }
+          keep_set = BROTLI_TRUE;
+          params->junk_source = TO_BROTLI_BOOL(c == 'j');
+          continue;
+        } else if (c == 'n') {
+          if (!params->copy_stat) {
+            fprintf(stderr, "argument --no-copy-stat / -n already set\n");
+            return COMMAND_INVALID;
+          }
+          params->copy_stat = BROTLI_FALSE;
+          continue;
+        } else if (c == 't') {
+          if (command_set) {
+            fprintf(stderr, "command already set when parsing -t\n");
+            return COMMAND_INVALID;
+          }
+          command_set = BROTLI_TRUE;
+          command = COMMAND_TEST_INTEGRITY;
+          continue;
+        } else if (c == 'v') {
+          if (params->verbose) {
+            fprintf(stderr, "argument --verbose / -v already set\n");
+            return COMMAND_INVALID;
+          }
+          params->verbose = BROTLI_TRUE;
+          continue;
+        } else if (c == 'V') {
+          /* Don't parse further. */
+          return COMMAND_VERSION;
+        } else if (c == 'Z') {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = BROTLI_TRUE;
+          params->quality = 11;
+          continue;
+        }
+        /* o/q/w/D/S with parameter is expected */
+        if (c != 'o' && c != 'q' && c != 'w' && c != 'D' && c != 'S') {
+          fprintf(stderr, "invalid argument -%c\n", c);
+          return COMMAND_INVALID;
+        }
+        if (j + 1 != arg_len) {
+          fprintf(stderr, "expected parameter for argument -%c\n", c);
+          return COMMAND_INVALID;
+        }
+        i++;
+        if (i == argc || !argv[i] || argv[i][0] == 0) {
+          fprintf(stderr, "expected parameter for argument -%c\n", c);
+          return COMMAND_INVALID;
+        }
+        params->not_input_indices[next_option_index++] = i;
+        if (c == 'o') {
+          if (output_set) {
+            fprintf(stderr, "write to standard output already set (-o)\n");
+            return COMMAND_INVALID;
+          }
+          params->output_path = argv[i];
+        } else if (c == 'q') {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = ParseInt(argv[i], BROTLI_MIN_QUALITY,
+                                 BROTLI_MAX_QUALITY, &params->quality);
+          if (!quality_set) {
+            fprintf(stderr, "error parsing quality value [%s]\n", argv[i]);
+            return COMMAND_INVALID;
+          }
+        } else if (c == 'w') {
+          if (lgwin_set) {
+            fprintf(stderr, "lgwin parameter already set\n");
+            return COMMAND_INVALID;
+          }
+          lgwin_set = ParseInt(argv[i], 0,
+                               BROTLI_MAX_WINDOW_BITS, &params->lgwin);
+          if (!lgwin_set) {
+            fprintf(stderr, "error parsing lgwin value [%s]\n", argv[i]);
+            return COMMAND_INVALID;
+          }
+          if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+            fprintf(stderr,
+                    "lgwin parameter (%d) smaller than the minimum (%d)\n",
+                    params->lgwin, BROTLI_MIN_WINDOW_BITS);
+            return COMMAND_INVALID;
+          }
+        } else if (c == 'S') {
+          if (suffix_set) {
+            fprintf(stderr, "suffix already set\n");
+            return COMMAND_INVALID;
+          }
+          suffix_set = BROTLI_TRUE;
+          params->suffix = argv[i];
+        }
+      }
+    } else {  /* Double-dash. */
+      arg = &arg[2];
+      if (strcmp("best", arg) == 0) {
+        if (quality_set) {
+          fprintf(stderr, "quality already set\n");
+          return COMMAND_INVALID;
+        }
+        quality_set = BROTLI_TRUE;
+        params->quality = 11;
+      } else if (strcmp("decompress", arg) == 0) {
+        if (command_set) {
+          fprintf(stderr, "command already set when parsing --decompress\n");
+          return COMMAND_INVALID;
+        }
+        command_set = BROTLI_TRUE;
+        command = COMMAND_DECOMPRESS;
+      } else if (strcmp("force", arg) == 0) {
+        if (params->force_overwrite) {
+          fprintf(stderr, "force output overwrite already set\n");
+          return COMMAND_INVALID;
+        }
+        params->force_overwrite = BROTLI_TRUE;
+      } else if (strcmp("help", arg) == 0) {
+        /* Don't parse further. */
+        return COMMAND_HELP;
+      } else if (strcmp("keep", arg) == 0) {
+        if (keep_set) {
+          fprintf(stderr, "argument --rm / -j or --keep / -n already set\n");
+          return COMMAND_INVALID;
+        }
+        keep_set = BROTLI_TRUE;
+        params->junk_source = BROTLI_FALSE;
+      } else if (strcmp("no-copy-stat", arg) == 0) {
+        if (!params->copy_stat) {
+          fprintf(stderr, "argument --no-copy-stat / -n already set\n");
+          return COMMAND_INVALID;
+        }
+        params->copy_stat = BROTLI_FALSE;
+      } else if (strcmp("rm", arg) == 0) {
+        if (keep_set) {
+          fprintf(stderr, "argument --rm / -j or --keep / -n already set\n");
+          return COMMAND_INVALID;
+        }
+        keep_set = BROTLI_TRUE;
+        params->junk_source = BROTLI_TRUE;
+      } else if (strcmp("stdout", arg) == 0) {
+        if (output_set) {
+          fprintf(stderr, "write to standard output already set\n");
+          return COMMAND_INVALID;
+        }
+        output_set = BROTLI_TRUE;
+        params->write_to_stdout = BROTLI_TRUE;
+      } else if (strcmp("test", arg) == 0) {
+        if (command_set) {
+          fprintf(stderr, "command already set when parsing --test\n");
+          return COMMAND_INVALID;
+        }
+        command_set = BROTLI_TRUE;
+        command = COMMAND_TEST_INTEGRITY;
+      } else if (strcmp("verbose", arg) == 0) {
+        if (params->verbose) {
+          fprintf(stderr, "argument --verbose / -v already set\n");
+          return COMMAND_INVALID;
+        }
+        params->verbose = BROTLI_TRUE;
+      } else if (strcmp("version", arg) == 0) {
+        /* Don't parse further. */
+        return COMMAND_VERSION;
+      } else {
+        /* key=value */
+        const char* value = strrchr(arg, '=');
+        size_t key_len;
+        if (!value || value[1] == 0) {
+          fprintf(stderr, "must pass the parameter as --%s=value\n", arg);
+          return COMMAND_INVALID;
+        }
+        key_len = (size_t)(value - arg);
+        value++;
+        if (strncmp("lgwin", arg, key_len) == 0) {
+          if (lgwin_set) {
+            fprintf(stderr, "lgwin parameter already set\n");
+            return COMMAND_INVALID;
+          }
+          lgwin_set = ParseInt(value, 0,
+                               BROTLI_MAX_WINDOW_BITS, &params->lgwin);
+          if (!lgwin_set) {
+            fprintf(stderr, "error parsing lgwin value [%s]\n", value);
+            return COMMAND_INVALID;
+          }
+          if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+            fprintf(stderr,
+                    "lgwin parameter (%d) smaller than the minimum (%d)\n",
+                    params->lgwin, BROTLI_MIN_WINDOW_BITS);
+            return COMMAND_INVALID;
+          }
+        } else if (strncmp("large_window", arg, key_len) == 0) {
+          /* This option is intentionally not mentioned in help. */
+          if (lgwin_set) {
+            fprintf(stderr, "lgwin parameter already set\n");
+            return COMMAND_INVALID;
+          }
+          lgwin_set = ParseInt(value, 0,
+                               BROTLI_LARGE_MAX_WINDOW_BITS, &params->lgwin);
+          if (!lgwin_set) {
+            fprintf(stderr, "error parsing lgwin value [%s]\n", value);
+            return COMMAND_INVALID;
+          }
+          if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
+            fprintf(stderr,
+                    "lgwin parameter (%d) smaller than the minimum (%d)\n",
+                    params->lgwin, BROTLI_MIN_WINDOW_BITS);
+            return COMMAND_INVALID;
+          }
+        } else if (strncmp("output", arg, key_len) == 0) {
+          if (output_set) {
+            fprintf(stderr,
+                    "write to standard output already set (--output)\n");
+            return COMMAND_INVALID;
+          }
+          params->output_path = value;
+        } else if (strncmp("quality", arg, key_len) == 0) {
+          if (quality_set) {
+            fprintf(stderr, "quality already set\n");
+            return COMMAND_INVALID;
+          }
+          quality_set = ParseInt(value, BROTLI_MIN_QUALITY,
+                                 BROTLI_MAX_QUALITY, &params->quality);
+          if (!quality_set) {
+            fprintf(stderr, "error parsing quality value [%s]\n", value);
+            return COMMAND_INVALID;
+          }
+        } else if (strncmp("suffix", arg, key_len) == 0) {
+          if (suffix_set) {
+            fprintf(stderr, "suffix already set\n");
+            return COMMAND_INVALID;
+          }
+          suffix_set = BROTLI_TRUE;
+          params->suffix = value;
+        } else {
+          fprintf(stderr, "invalid parameter: [%s]\n", arg);
+          return COMMAND_INVALID;
+        }
+      }
+    }
+  }
+
+  params->input_count = input_count;
+  params->longest_path_len = longest_path_len;
+  params->decompress = (command == COMMAND_DECOMPRESS);
+  params->test_integrity = (command == COMMAND_TEST_INTEGRITY);
+
+  if (input_count > 1 && output_set) return COMMAND_INVALID;
+  if (params->test_integrity) {
+    if (params->output_path) return COMMAND_INVALID;
+    if (params->write_to_stdout) return COMMAND_INVALID;
+  }
+  if (strchr(params->suffix, '/') || strchr(params->suffix, '\\')) {
+    return COMMAND_INVALID;
+  }
+
+  return command;
+}
+
+static void PrintVersion(void) {
+  int major = BROTLI_VERSION >> 24;
+  int minor = (BROTLI_VERSION >> 12) & 0xFFF;
+  int patch = BROTLI_VERSION & 0xFFF;
+  fprintf(stdout, "brotli %d.%d.%d\n", major, minor, patch);
+}
+
+static void PrintHelp(const char* name, BROTLI_BOOL error) {
+  FILE* media = error ? stderr : stdout;
+  /* String is cut to pieces with length less than 509, to conform C90 spec. */
+  fprintf(media,
+"Usage: %s [OPTION]... [FILE]...\n",
+          name);
+  fprintf(media,
+"Options:\n"
+"  -#                          compression level (0-9)\n"
+"  -c, --stdout                write on standard output\n"
+"  -d, --decompress            decompress\n"
+"  -f, --force                 force output file overwrite\n"
+"  -h, --help                  display this help and exit\n");
+  fprintf(media,
+"  -j, --rm                    remove source file(s)\n"
+"  -k, --keep                  keep source file(s) (default)\n"
+"  -n, --no-copy-stat          do not copy source file(s) attributes\n"
+"  -o FILE, --output=FILE      output file (only if 1 input file)\n");
+  fprintf(media,
+"  -q NUM, --quality=NUM       compression level (%d-%d)\n",
+          BROTLI_MIN_QUALITY, BROTLI_MAX_QUALITY);
+  fprintf(media,
+"  -t, --test                  test compressed file integrity\n"
+"  -v, --verbose               verbose mode\n");
+  fprintf(media,
+"  -w NUM, --lgwin=NUM         set LZ77 window size (0, %d-%d)\n",
+          BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS);
+  fprintf(media,
+"                              window size = 2**NUM - 16\n"
+"                              0 lets compressor choose the optimal value\n");
+  fprintf(media,
+"  -S SUF, --suffix=SUF        output file suffix (default:'%s')\n",
+          DEFAULT_SUFFIX);
+  fprintf(media,
+"  -V, --version               display version and exit\n"
+"  -Z, --best                  use best compression level (11) (default)\n"
+"Simple options could be coalesced, i.e. '-9kf' is equivalent to '-9 -k -f'.\n"
+"With no FILE, or when FILE is -, read standard input.\n"
+"All arguments after '--' are treated as files.\n");
+}
+
+static const char* PrintablePath(const char* path) {
+  return path ? path : "con";
+}
+
+static BROTLI_BOOL OpenInputFile(const char* input_path, FILE** f) {
+  *f = NULL;
+  if (!input_path) {
+    *f = fdopen(MAKE_BINARY(STDIN_FILENO), "rb");
+    return BROTLI_TRUE;
+  }
+  *f = fopen(input_path, "rb");
+  if (!*f) {
+    fprintf(stderr, "failed to open input file [%s]: %s\n",
+            PrintablePath(input_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL OpenOutputFile(const char* output_path, FILE** f,
+                                  BROTLI_BOOL force) {
+  int fd;
+  *f = NULL;
+  if (!output_path) {
+    *f = fdopen(MAKE_BINARY(STDOUT_FILENO), "wb");
+    return BROTLI_TRUE;
+  }
+  fd = open(output_path, O_CREAT | (force ? 0 : O_EXCL) | O_WRONLY | O_TRUNC,
+            S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    fprintf(stderr, "failed to open output file [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  *f = fdopen(fd, "wb");
+  if (!*f) {
+    fprintf(stderr, "failed to open output file [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static int64_t FileSize(const char* path) {
+  FILE* f = fopen(path, "rb");
+  int64_t retval;
+  if (f == NULL) {
+    return -1;
+  }
+  if (fseek(f, 0L, SEEK_END) != 0) {
+    fclose(f);
+    return -1;
+  }
+  retval = ftell(f);
+  if (fclose(f) != 0) {
+    return -1;
+  }
+  return retval;
+}
+
+/* Copy file times and permissions.
+   TODO: this is a "best effort" implementation; honest cross-platform
+   fully featured implementation is way too hacky; add more hacks by request. */
+static void CopyStat(const char* input_path, const char* output_path) {
+  struct stat statbuf;
+  struct utimbuf times;
+  int res;
+  if (input_path == 0 || output_path == 0) {
+    return;
+  }
+  if (stat(input_path, &statbuf) != 0) {
+    return;
+  }
+  times.actime = statbuf.st_atime;
+  times.modtime = statbuf.st_mtime;
+  utime(output_path, &times);
+  res = chmod(output_path, statbuf.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO));
+  if (res != 0) {
+    fprintf(stderr, "setting access bits failed for [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+  }
+  res = chown(output_path, (uid_t)-1, statbuf.st_gid);
+  if (res != 0) {
+    fprintf(stderr, "setting group failed for [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+  }
+  res = chown(output_path, statbuf.st_uid, (gid_t)-1);
+  if (res != 0) {
+    fprintf(stderr, "setting user failed for [%s]: %s\n",
+            PrintablePath(output_path), strerror(errno));
+  }
+}
+
+static BROTLI_BOOL NextFile(Context* context) {
+  const char* arg;
+  size_t arg_len;
+
+  /* Iterator points to last used arg; increment to search for the next one. */
+  context->iterator++;
+
+  context->input_file_length = -1;
+
+  /* No input path; read from console. */
+  if (context->input_count == 0) {
+    if (context->iterator > 1) return BROTLI_FALSE;
+    context->current_input_path = NULL;
+    /* Either write to the specified path, or to console. */
+    context->current_output_path = context->output_path;
+    return BROTLI_TRUE;
+  }
+
+  /* Skip option arguments. */
+  while (context->iterator == context->not_input_indices[context->ignore]) {
+    context->iterator++;
+    context->ignore++;
+  }
+
+  /* All args are scanned already. */
+  if (context->iterator >= context->argc) return BROTLI_FALSE;
+
+  /* Iterator now points to the input file name. */
+  arg = context->argv[context->iterator];
+  arg_len = strlen(arg);
+  /* Read from console. */
+  if (arg_len == 1 && arg[0] == '-') {
+    context->current_input_path = NULL;
+    context->current_output_path = context->output_path;
+    return BROTLI_TRUE;
+  }
+
+  context->current_input_path = arg;
+  context->input_file_length = FileSize(arg);
+  context->current_output_path = context->output_path;
+
+  if (context->output_path) return BROTLI_TRUE;
+  if (context->write_to_stdout) return BROTLI_TRUE;
+
+  strcpy(context->modified_path, arg);
+  context->current_output_path = context->modified_path;
+  /* If output is not specified, input path suffix should match. */
+  if (context->decompress) {
+    size_t suffix_len = strlen(context->suffix);
+    char* name = (char*)FileName(context->modified_path);
+    char* name_suffix;
+    size_t name_len = strlen(name);
+    if (name_len < suffix_len + 1) {
+      fprintf(stderr, "empty output file name for [%s] input file\n",
+              PrintablePath(arg));
+      context->iterator_error = BROTLI_TRUE;
+      return BROTLI_FALSE;
+    }
+    name_suffix = name + name_len - suffix_len;
+    if (strcmp(context->suffix, name_suffix) != 0) {
+      fprintf(stderr, "input file [%s] suffix mismatch\n",
+              PrintablePath(arg));
+      context->iterator_error = BROTLI_TRUE;
+      return BROTLI_FALSE;
+    }
+    name_suffix[0] = 0;
+    return BROTLI_TRUE;
+  } else {
+    strcpy(context->modified_path + arg_len, context->suffix);
+    return BROTLI_TRUE;
+  }
+}
+
+static BROTLI_BOOL OpenFiles(Context* context) {
+  BROTLI_BOOL is_ok = OpenInputFile(context->current_input_path, &context->fin);
+  if (!context->test_integrity && is_ok) {
+    is_ok = OpenOutputFile(
+        context->current_output_path, &context->fout, context->force_overwrite);
+  }
+  return is_ok;
+}
+
+static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) {
+  BROTLI_BOOL is_ok = BROTLI_TRUE;
+  if (!context->test_integrity && context->fout) {
+    if (!success && context->current_output_path) {
+      unlink(context->current_output_path);
+    }
+    if (fclose(context->fout) != 0) {
+      if (success) {
+        fprintf(stderr, "fclose failed [%s]: %s\n",
+                PrintablePath(context->current_output_path), strerror(errno));
+      }
+      is_ok = BROTLI_FALSE;
+    }
+
+    /* TOCTOU violation, but otherwise it is impossible to set file times. */
+    if (success && is_ok && context->copy_stat) {
+      CopyStat(context->current_input_path, context->current_output_path);
+    }
+  }
+
+  if (context->fin) {
+    if (fclose(context->fin) != 0) {
+      if (is_ok) {
+        fprintf(stderr, "fclose failed [%s]: %s\n",
+                PrintablePath(context->current_input_path), strerror(errno));
+      }
+      is_ok = BROTLI_FALSE;
+    }
+  }
+  if (success && context->junk_source && context->current_input_path) {
+    unlink(context->current_input_path);
+  }
+
+  context->fin = NULL;
+  context->fout = NULL;
+
+  return is_ok;
+}
+
+static const size_t kFileBufferSize = 1 << 19;
+
+static void InitializeBuffers(Context* context) {
+  context->available_in = 0;
+  context->next_in = NULL;
+  context->available_out = kFileBufferSize;
+  context->next_out = context->output;
+}
+
+static BROTLI_BOOL HasMoreInput(Context* context) {
+  return feof(context->fin) ? BROTLI_FALSE : BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideInput(Context* context) {
+  context->available_in =
+      fread(context->input, 1, kFileBufferSize, context->fin);
+  context->next_in = context->input;
+  if (ferror(context->fin)) {
+    fprintf(stderr, "failed to read input [%s]: %s\n",
+            PrintablePath(context->current_input_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+/* Internal: should be used only in Provide-/Flush-Output. */
+static BROTLI_BOOL WriteOutput(Context* context) {
+  size_t out_size = (size_t)(context->next_out - context->output);
+  if (out_size == 0) return BROTLI_TRUE;
+  if (context->test_integrity) return BROTLI_TRUE;
+
+  fwrite(context->output, 1, out_size, context->fout);
+  if (ferror(context->fout)) {
+    fprintf(stderr, "failed to write output [%s]: %s\n",
+            PrintablePath(context->current_output_path), strerror(errno));
+    return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideOutput(Context* context) {
+  if (!WriteOutput(context)) return BROTLI_FALSE;
+  context->available_out = kFileBufferSize;
+  context->next_out = context->output;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL FlushOutput(Context* context) {
+  if (!WriteOutput(context)) return BROTLI_FALSE;
+  context->available_out = 0;
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
+  BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+  InitializeBuffers(context);
+  for (;;) {
+    if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+      if (!HasMoreInput(context)) {
+        fprintf(stderr, "corrupt input [%s]\n",
+                PrintablePath(context->current_input_path));
+        return BROTLI_FALSE;
+      }
+      if (!ProvideInput(context)) return BROTLI_FALSE;
+    } else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+      if (!ProvideOutput(context)) return BROTLI_FALSE;
+    } else if (result == BROTLI_DECODER_RESULT_SUCCESS) {
+      if (!FlushOutput(context)) return BROTLI_FALSE;
+      if (context->available_in != 0 || HasMoreInput(context)) {
+        fprintf(stderr, "corrupt input [%s]\n",
+                PrintablePath(context->current_input_path));
+        return BROTLI_FALSE;
+      }
+      return BROTLI_TRUE;
+    } else {
+      fprintf(stderr, "corrupt input [%s]\n",
+              PrintablePath(context->current_input_path));
+      return BROTLI_FALSE;
+    }
+
+    result = BrotliDecoderDecompressStream(s, &context->available_in,
+        &context->next_in, &context->available_out, &context->next_out, 0);
+  }
+}
+
+static BROTLI_BOOL DecompressFiles(Context* context) {
+  while (NextFile(context)) {
+    BROTLI_BOOL is_ok = BROTLI_TRUE;
+    BrotliDecoderState* s = BrotliDecoderCreateInstance(NULL, NULL, NULL);
+    if (!s) {
+      fprintf(stderr, "out of memory\n");
+      return BROTLI_FALSE;
+    }
+    /* This allows decoding "large-window" streams. Though it creates
+       fragmentation (new builds decode streams that old builds don't),
+       it is better from used experience perspective. */
+    BrotliDecoderSetParameter(s, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u);
+    is_ok = OpenFiles(context);
+    if (is_ok && !context->current_input_path &&
+        !context->force_overwrite && isatty(STDIN_FILENO)) {
+      fprintf(stderr, "Use -h help. Use -f to force input from a terminal.\n");
+      is_ok = BROTLI_FALSE;
+    }
+    if (is_ok) is_ok = DecompressFile(context, s);
+    BrotliDecoderDestroyInstance(s);
+    if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
+    if (!is_ok) return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
+  BROTLI_BOOL is_eof = BROTLI_FALSE;
+  InitializeBuffers(context);
+  for (;;) {
+    if (context->available_in == 0 && !is_eof) {
+      if (!ProvideInput(context)) return BROTLI_FALSE;
+      is_eof = !HasMoreInput(context);
+    }
+
+    if (!BrotliEncoderCompressStream(s,
+        is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+        &context->available_in, &context->next_in,
+        &context->available_out, &context->next_out, NULL)) {
+      /* Should detect OOM? */
+      fprintf(stderr, "failed to compress data [%s]\n",
+              PrintablePath(context->current_input_path));
+      return BROTLI_FALSE;
+    }
+
+    if (context->available_out == 0) {
+      if (!ProvideOutput(context)) return BROTLI_FALSE;
+    }
+
+    if (BrotliEncoderIsFinished(s)) {
+      return FlushOutput(context);
+    }
+  }
+}
+
+static BROTLI_BOOL CompressFiles(Context* context) {
+  while (NextFile(context)) {
+    BROTLI_BOOL is_ok = BROTLI_TRUE;
+    BrotliEncoderState* s = BrotliEncoderCreateInstance(NULL, NULL, NULL);
+    if (!s) {
+      fprintf(stderr, "out of memory\n");
+      return BROTLI_FALSE;
+    }
+    BrotliEncoderSetParameter(s,
+        BROTLI_PARAM_QUALITY, (uint32_t)context->quality);
+    if (context->lgwin > 0) {
+      /* Specified by user. */
+      /* Do not enable "large-window" extension, if not required. */
+      if (context->lgwin > BROTLI_MAX_WINDOW_BITS) {
+        BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, 1u);
+      }
+      BrotliEncoderSetParameter(s,
+          BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin);
+    } else {
+      /* 0, or not specified by user; could be chosen by compressor. */
+      uint32_t lgwin = DEFAULT_LGWIN;
+      /* Use file size to limit lgwin. */
+      if (context->input_file_length >= 0) {
+        lgwin = BROTLI_MIN_WINDOW_BITS;
+        while (BROTLI_MAX_BACKWARD_LIMIT(lgwin) <
+               (uint64_t)context->input_file_length) {
+          lgwin++;
+          if (lgwin == BROTLI_MAX_WINDOW_BITS) break;
+        }
+      }
+      BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, lgwin);
+    }
+    if (context->input_file_length > 0) {
+      uint32_t size_hint = context->input_file_length < (1 << 30) ?
+          (uint32_t)context->input_file_length : (1u << 30);
+      BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint);
+    }
+    is_ok = OpenFiles(context);
+    if (is_ok && !context->current_output_path &&
+        !context->force_overwrite && isatty(STDOUT_FILENO)) {
+      fprintf(stderr, "Use -h help. Use -f to force output to a terminal.\n");
+      is_ok = BROTLI_FALSE;
+    }
+    if (is_ok) is_ok = CompressFile(context, s);
+    BrotliEncoderDestroyInstance(s);
+    if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
+    if (!is_ok) return BROTLI_FALSE;
+  }
+  return BROTLI_TRUE;
+}
+
+int main(int argc, char** argv) {
+  Command command;
+  Context context;
+  BROTLI_BOOL is_ok = BROTLI_TRUE;
+  int i;
+
+  context.quality = 11;
+  context.lgwin = -1;
+  context.force_overwrite = BROTLI_FALSE;
+  context.junk_source = BROTLI_FALSE;
+  context.copy_stat = BROTLI_TRUE;
+  context.test_integrity = BROTLI_FALSE;
+  context.verbose = BROTLI_FALSE;
+  context.write_to_stdout = BROTLI_FALSE;
+  context.decompress = BROTLI_FALSE;
+  context.large_window = BROTLI_FALSE;
+  context.output_path = NULL;
+  context.suffix = DEFAULT_SUFFIX;
+  for (i = 0; i < MAX_OPTIONS; ++i) context.not_input_indices[i] = 0;
+  context.longest_path_len = 1;
+  context.input_count = 0;
+
+  context.argc = argc;
+  context.argv = argv;
+  context.modified_path = NULL;
+  context.iterator = 0;
+  context.ignore = 0;
+  context.iterator_error = BROTLI_FALSE;
+  context.buffer = NULL;
+  context.current_input_path = NULL;
+  context.current_output_path = NULL;
+  context.fin = NULL;
+  context.fout = NULL;
+
+  command = ParseParams(&context);
+
+  if (command == COMMAND_COMPRESS || command == COMMAND_DECOMPRESS ||
+      command == COMMAND_TEST_INTEGRITY) {
+    if (is_ok) {
+      size_t modified_path_len =
+          context.longest_path_len + strlen(context.suffix) + 1;
+      context.modified_path = (char*)malloc(modified_path_len);
+      context.buffer = (uint8_t*)malloc(kFileBufferSize * 2);
+      if (!context.modified_path || !context.buffer) {
+        fprintf(stderr, "out of memory\n");
+        is_ok = BROTLI_FALSE;
+      } else {
+        context.input = context.buffer;
+        context.output = context.buffer + kFileBufferSize;
+      }
+    }
+  }
+
+  if (!is_ok) command = COMMAND_NOOP;
+
+  switch (command) {
+    case COMMAND_NOOP:
+      break;
+
+    case COMMAND_VERSION:
+      PrintVersion();
+      break;
+
+    case COMMAND_COMPRESS:
+      is_ok = CompressFiles(&context);
+      break;
+
+    case COMMAND_DECOMPRESS:
+    case COMMAND_TEST_INTEGRITY:
+      is_ok = DecompressFiles(&context);
+      break;
+
+    case COMMAND_HELP:
+    case COMMAND_INVALID:
+    default:
+      is_ok = (command == COMMAND_HELP);
+      PrintHelp(FileName(argv[0]), is_ok);
+      break;
+  }
+
+  if (context.iterator_error) is_ok = BROTLI_FALSE;
+
+  free(context.modified_path);
+  free(context.buffer);
+
+  if (!is_ok) exit(1);
+  return 0;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/c/tools/brotli.md b/codec/L2/demos/pikEnc/host/third_party/brotli/c/tools/brotli.md
new file mode 100755
index 0000000000..c029869bce
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/c/tools/brotli.md
@@ -0,0 +1,107 @@
+brotli(1) -- brotli, unbrotli - compress or decompress files
+================================================================
+
+SYNOPSIS
+--------
+
+`brotli` [*OPTION|FILE*]...
+
+`unbrotli` is equivalent to `brotli --decompress`
+
+DESCRIPTION
+-----------
+`brotli` is a generic-purpose lossless compression algorithm that compresses
+data using a combination of a modern variant of the **LZ77** algorithm, Huffman
+coding and 2-nd order context modeling, with a compression ratio comparable to
+the best currently available general-purpose compression methods. It is similar
+in speed with deflate but offers more dense compression.
+
+`brotli` command line syntax similar to `gzip (1)` and `zstd (1)`.
+Unlike `gzip (1)`, source files are preserved by default. It is possible to
+remove them after processing by using the `--rm` _option_.
+
+Arguments that look like "`--name`" or "`--name=value`" are _options_. Every
+_option_ has a short form "`-x`" or "`-x value`". Multiple short form _options_
+could be coalesced:
+
+* "`--decompress --stdout --suffix=.b`" works the same as
+* "`-d -s -S .b`" and
+* "`-dsS .b`"
+
+`brotli` has 3 operation modes:
+
+* default mode is compression;
+* `--decompress` option activates decompression mode;
+* `--test` option switches to integrity test mode; this option is equivalent to
+  "`--decompress --stdout`" except that the decompressed data is discarded
+  instead of being written to standard output.
+
+Every non-option argument is a _file_ entry. If no _files_ are given or _file_
+is "`-`", `brotli` reads from standard input. All arguments after "`--`" are
+_file_ entries.
+
+Unless `--stdout` or `--output` is specified, _files_ are written to a new file
+whose name is derived from the source _file_ name:
+
+* when compressing, a suffix is appended to the source filename to
+  get the target filename
+* when decompressing, a suffix is removed from the source filename to
+  get the target filename
+
+Default suffix is `.br`, but it could be specified with `--suffix` option.
+
+Conflicting or duplicate _options_ are not allowed.
+
+OPTIONS
+-------
+
+* `-#`:
+    compression level (0-9); bigger values cause denser, but slower compression
+* `-c`, `--stdout`:
+    write on standard output
+* `-d`, `--decompress`:
+    decompress mode
+* `-f`, `--force`:
+    force output file overwrite
+* `-h`, `--help`:
+    display this help and exit
+* `-j`, `--rm`:
+    remove source file(s); `gzip (1)`-like behaviour
+* `-k`, `--keep`:
+    keep source file(s); `zstd (1)`-like behaviour
+* `-n`, `--no-copy-stat`:
+    do not copy source file(s) attributes
+* `-o FILE`, `--output=FILE`
+    output file; valid only if there is a single input entry
+* `-q NUM`, `--quality=NUM`:
+    compression level (0-11); bigger values cause denser, but slower compression
+* `-t`, `--test`:
+    test file integrity mode
+* `-v`, `--verbose`:
+    increase output verbosity
+* `-w NUM`, `--lgwin=NUM`:
+    set LZ77 window size (0, 10-24) (default: 22); window size is
+    `(2**NUM - 16)`; 0 lets compressor decide over the optimal value; bigger
+    windows size improve density; decoder might require up to window size
+    memory to operate
+* `-S SUF`, `--suffix=SUF`:
+    output file suffix (default: `.br`)
+* `-V`, `--version`:
+    display version and exit
+* `-Z`, `--best`:
+    use best compression level (default); same as "`-q 11`"
+
+SEE ALSO
+--------
+
+`brotli` file format is defined in
+[RFC 7932](https://www.ietf.org/rfc/rfc7932.txt).
+
+`brotli` is open-sourced under the
+[MIT License](https://opensource.org/licenses/MIT).
+
+Mailing list: https://groups.google.com/forum/#!forum/brotli
+
+BUGS
+----
+Report bugs at: https://github.com/google/brotli/issues
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/docs/brotli-comparison-study-2015-09-22.pdf b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/brotli-comparison-study-2015-09-22.pdf
new file mode 100755
index 0000000000..040f179e2b
Binary files /dev/null and b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/brotli-comparison-study-2015-09-22.pdf differ
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/docs/brotli.1 b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/brotli.1
new file mode 100755
index 0000000000..7242a32550
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/brotli.1
@@ -0,0 +1,132 @@
+.TH "BROTLI" "1" "February 2018" "brotli 1.0.0" "User commands"
+.SH "NAME"
+\fBbrotli\fR \- brotli, unbrotli \- compress or decompress files
+.SH SYNOPSIS
+.P
+\fBbrotli\fP [\fIOPTION|FILE\fR]\.\.\.
+.P
+\fBunbrotli\fP is equivalent to \fBbrotli \-\-decompress\fP
+.SH DESCRIPTION
+.P
+\fBbrotli\fP is a generic\-purpose lossless compression algorithm that compresses
+data using a combination of a modern variant of the \fBLZ77\fR algorithm, Huffman
+coding and 2\-nd order context modeling, with a compression ratio comparable to
+the best currently available general\-purpose compression methods\. It is similar
+in speed with deflate but offers more dense compression\.
+.P
+\fBbrotli\fP command line syntax similar to \fBgzip (1)\fP and \fBzstd (1)\fP\|\.
+Unlike \fBgzip (1)\fP, source files are preserved by default\. It is possible to
+remove them after processing by using the \fB\-\-rm\fP \fIoption\fR\|\.
+.P
+Arguments that look like "\fB\-\-name\fP" or "\fB\-\-name=value\fP" are \fIoptions\fR\|\. Every
+\fIoption\fR has a short form "\fB\-x\fP" or "\fB\-x value\fP"\. Multiple short form \fIoptions\fR
+could be coalesced:
+.RS 0
+.IP \(bu 2
+"\fB\-\-decompress \-\-stdout \-\-suffix=\.b\fP" works the same as
+.IP \(bu 2
+"\fB\-d \-s \-S \.b\fP" and
+.IP \(bu 2
+"\fB\-dsS \.b\fP"
+
+.RE
+.P
+\fBbrotli\fP has 3 operation modes:
+.RS 0
+.IP \(bu 2
+default mode is compression;
+.IP \(bu 2
+\fB\-\-decompress\fP option activates decompression mode;
+.IP \(bu 2
+\fB\-\-test\fP option switches to integrity test mode; this option is equivalent to
+"\fB\-\-decompress \-\-stdout\fP" except that the decompressed data is discarded
+instead of being written to standard output\.
+
+.RE
+.P
+Every non\-option argument is a \fIfile\fR entry\. If no \fIfiles\fR are given or \fIfile\fR
+is "\fB\-\fP", \fBbrotli\fP reads from standard input\. All arguments after "\fB\-\-\fP" are
+\fIfile\fR entries\.
+.P
+Unless \fB\-\-stdout\fP or \fB\-\-output\fP is specified, \fIfiles\fR are written to a new file
+whose name is derived from the source \fIfile\fR name:
+.RS 0
+.IP \(bu 2
+when compressing, a suffix is appended to the source filename to
+get the target filename
+.IP \(bu 2
+when decompressing, a suffix is removed from the source filename to
+get the target filename
+
+.RE
+.P
+Default suffix is \fB\|\.br\fP, but it could be specified with \fB\-\-suffix\fP option\.
+.P
+Conflicting or duplicate \fIoptions\fR are not allowed\.
+.SH OPTIONS
+.RS 0
+.IP \(bu 2
+\fB\-#\fP:
+  compression level (0\-9); bigger values cause denser, but slower compression
+.IP \(bu 2
+\fB\-c\fP, \fB\-\-stdout\fP:
+  write on standard output
+.IP \(bu 2
+\fB\-d\fP, \fB\-\-decompress\fP:
+  decompress mode
+.IP \(bu 2
+\fB\-f\fP, \fB\-\-force\fP:
+  force output file overwrite
+.IP \(bu 2
+\fB\-h\fP, \fB\-\-help\fP:
+  display this help and exit
+.IP \(bu 2
+\fB\-j\fP, \fB\-\-rm\fP:
+  remove source file(s); \fBgzip (1)\fP\-like behaviour
+.IP \(bu 2
+\fB\-k\fP, \fB\-\-keep\fP:
+  keep source file(s); \fBzstd (1)\fP\-like behaviour
+.IP \(bu 2
+\fB\-n\fP, \fB\-\-no\-copy\-stat\fP:
+  do not copy source file(s) attributes
+.IP \(bu 2
+\fB\-o FILE\fP, \fB\-\-output=FILE\fP
+  output file; valid only if there is a single input entry
+.IP \(bu 2
+\fB\-q NUM\fP, \fB\-\-quality=NUM\fP:
+  compression level (0\-11); bigger values cause denser, but slower compression
+.IP \(bu 2
+\fB\-t\fP, \fB\-\-test\fP:
+  test file integrity mode
+.IP \(bu 2
+\fB\-v\fP, \fB\-\-verbose\fP:
+  increase output verbosity
+.IP \(bu 2
+\fB\-w NUM\fP, \fB\-\-lgwin=NUM\fP:
+  set LZ77 window size (0, 10\-24) (default: 22); window size is
+  \fB(2**NUM \- 16)\fP; 0 lets compressor decide over the optimal value; bigger
+  windows size improve density; decoder might require up to window size
+  memory to operate
+.IP \(bu 2
+\fB\-S SUF\fP, \fB\-\-suffix=SUF\fP:
+  output file suffix (default: \fB\|\.br\fP)
+.IP \(bu 2
+\fB\-V\fP, \fB\-\-version\fP:
+  display version and exit
+.IP \(bu 2
+\fB\-Z\fP, \fB\-\-best\fP:
+  use best compression level (default); same as "\fB\-q 11\fP"
+
+.RE
+.SH SEE ALSO
+.P
+\fBbrotli\fP file format is defined in
+RFC 7932 \fIhttps://www\.ietf\.org/rfc/rfc7932\.txt\fR\|\.
+.P
+\fBbrotli\fP is open\-sourced under the
+MIT License \fIhttps://opensource\.org/licenses/MIT\fR\|\.
+.P
+Mailing list: https://groups\.google\.com/forum/#!forum/brotli
+.SH BUGS
+.P
+Report bugs at: https://github\.com/google/brotli/issues
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/docs/decode.h.3 b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/decode.h.3
new file mode 100755
index 0000000000..7b8581cc2c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/decode.h.3
@@ -0,0 +1,415 @@
+.TH "decode.h" 3 "Thu Feb 22 2018" "Brotli" \" -*- nroff -*-
+.ad l
+.nh
+.SH NAME
+decode.h \- API for Brotli decompression\&.  
+
+.SH SYNOPSIS
+.br
+.PP
+.SS "Macros"
+
+.in +1c
+.ti -1c
+.RI "#define \fBBROTLI_DECODER_ERROR_CODES_LIST\fP(BROTLI_ERROR_CODE,  SEPARATOR)        "
+.br
+.RI "\fITemplate that evaluates items of \fBBrotliDecoderErrorCode\fP\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_LAST_ERROR_CODE\fP   BROTLI_DECODER_ERROR_UNREACHABLE"
+.br
+.RI "\fIThe value of the last error code, negative integer\&. \fP"
+.in -1c
+.SS "Typedefs"
+
+.in +1c
+.ti -1c
+.RI "typedef enum \fBBrotliDecoderParameter\fP \fBBrotliDecoderParameter\fP"
+.br
+.RI "\fIOptions to be used with \fBBrotliDecoderSetParameter\fP\&. \fP"
+.ti -1c
+.RI "typedef struct BrotliDecoderStateStruct \fBBrotliDecoderState\fP"
+.br
+.RI "\fIOpaque structure that holds decoder state\&. \fP"
+.in -1c
+.SS "Enumerations"
+.SS "Functions"
+
+.in +1c
+.ti -1c
+.RI "\fBBrotliDecoderState\fP * \fBBrotliDecoderCreateInstance\fP (\fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void *opaque)"
+.br
+.RI "\fICreates an instance of \fBBrotliDecoderState\fP and initializes it\&. \fP"
+.ti -1c
+.RI "\fBBrotliDecoderResult\fP \fBBrotliDecoderDecompress\fP (size_t encoded_size, const uint8_t encoded_buffer[encoded_size], size_t *decoded_size, uint8_t decoded_buffer[*decoded_size])"
+.br
+.RI "\fIPerforms one-shot memory-to-memory decompression\&. \fP"
+.ti -1c
+.RI "\fBBrotliDecoderResult\fP \fBBrotliDecoderDecompressStream\fP (\fBBrotliDecoderState\fP *state, size_t *available_in, const uint8_t **next_in, size_t *available_out, uint8_t **next_out, size_t *total_out)"
+.br
+.RI "\fIDecompresses the input stream to the output stream\&. \fP"
+.ti -1c
+.RI "void \fBBrotliDecoderDestroyInstance\fP (\fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIDeinitializes and frees \fBBrotliDecoderState\fP instance\&. \fP"
+.ti -1c
+.RI "const char * \fBBrotliDecoderErrorString\fP (\fBBrotliDecoderErrorCode\fP c)"
+.br
+.RI "\fIConverts error code to a c-string\&. \fP"
+.ti -1c
+.RI "\fBBrotliDecoderErrorCode\fP \fBBrotliDecoderGetErrorCode\fP (const \fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIAcquires a detailed error code\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliDecoderHasMoreOutput\fP (const \fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIChecks if decoder has more output\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliDecoderIsFinished\fP (const \fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIChecks if decoder instance reached the final state\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliDecoderIsUsed\fP (const \fBBrotliDecoderState\fP *state)"
+.br
+.RI "\fIChecks if instance has already consumed input\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliDecoderSetParameter\fP (\fBBrotliDecoderState\fP *state, \fBBrotliDecoderParameter\fP param, uint32_t value)"
+.br
+.RI "\fISets the specified parameter to the given decoder instance\&. \fP"
+.ti -1c
+.RI "const uint8_t * \fBBrotliDecoderTakeOutput\fP (\fBBrotliDecoderState\fP *state, size_t *size)"
+.br
+.RI "\fIAcquires pointer to internal output buffer\&. \fP"
+.ti -1c
+.RI "uint32_t \fBBrotliDecoderVersion\fP (void)"
+.br
+.RI "\fIGets a decoder library version\&. \fP"
+.in -1c
+.SH "Detailed Description"
+.PP 
+API for Brotli decompression\&. 
+
+
+.SH "Macro Definition Documentation"
+.PP 
+.SS "#define BROTLI_DECODER_ERROR_CODES_LIST(BROTLI_ERROR_CODE, SEPARATOR)"
+
+.PP
+Template that evaluates items of \fBBrotliDecoderErrorCode\fP\&. Example:
+.PP
+.nf
+// Log Brotli error code\&.
+switch (brotliDecoderErrorCode) {
+#define CASE_(PREFIX, NAME, CODE) \
+  case BROTLI_DECODER ## PREFIX ## NAME: \
+    LOG(INFO) << "error code:" << #NAME; \
+    break;
+#define NEWLINE_
+BROTLI_DECODER_ERROR_CODES_LIST(CASE_, NEWLINE_)
+#undef CASE_
+#undef NEWLINE_
+  default: LOG(FATAL) << "unknown brotli error code";
+}
+
+.fi
+.PP
+ 
+.SS "#define BROTLI_LAST_ERROR_CODE   BROTLI_DECODER_ERROR_UNREACHABLE"
+
+.PP
+The value of the last error code, negative integer\&. All other error code values are in the range from \fBBROTLI_LAST_ERROR_CODE\fP to \fC-1\fP\&. There are also 4 other possible non-error codes \fC0\fP \&.\&. \fC3\fP in \fBBrotliDecoderErrorCode\fP enumeration\&. 
+.SH "Typedef Documentation"
+.PP 
+.SS "typedef enum \fBBrotliDecoderParameter\fP  \fBBrotliDecoderParameter\fP"
+
+.PP
+Options to be used with \fBBrotliDecoderSetParameter\fP\&. 
+.SS "typedef struct BrotliDecoderStateStruct \fBBrotliDecoderState\fP"
+
+.PP
+Opaque structure that holds decoder state\&. Allocated and initialized with \fBBrotliDecoderCreateInstance\fP\&. Cleaned up and deallocated with \fBBrotliDecoderDestroyInstance\fP\&. 
+.SH "Enumeration Type Documentation"
+.PP 
+.SS "enum \fBBrotliDecoderErrorCode\fP"
+
+.PP
+Error code for detailed logging / production debugging\&. See \fBBrotliDecoderGetErrorCode\fP and \fBBROTLI_LAST_ERROR_CODE\fP\&. 
+.SS "enum \fBBrotliDecoderParameter\fP"
+
+.PP
+Options to be used with \fBBrotliDecoderSetParameter\fP\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION \fP\fP
+Disable 'canny' ring buffer allocation strategy\&. Ring buffer is allocated according to window size, despite the real size of the content\&. 
+.TP
+\fB\fIBROTLI_DECODER_PARAM_LARGE_WINDOW \fP\fP
+Flag that determines if 'Large Window Brotli' is used\&. 
+.SS "enum \fBBrotliDecoderResult\fP"
+
+.PP
+Result type for \fBBrotliDecoderDecompress\fP and \fBBrotliDecoderDecompressStream\fP functions\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_DECODER_RESULT_ERROR \fP\fP
+Decoding error, e\&.g\&. corrupted input or memory allocation problem\&. 
+.TP
+\fB\fIBROTLI_DECODER_RESULT_SUCCESS \fP\fP
+Decoding successfully completed\&. 
+.TP
+\fB\fIBROTLI_DECODER_RESULT_NEEDS_MORE_INPUT \fP\fP
+Partially done; should be called again with more input\&. 
+.TP
+\fB\fIBROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT \fP\fP
+Partially done; should be called again with more output\&. 
+.SH "Function Documentation"
+.PP 
+.SS "\fBBrotliDecoderState\fP* BrotliDecoderCreateInstance (\fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void * opaque)"
+
+.PP
+Creates an instance of \fBBrotliDecoderState\fP and initializes it\&. The instance can be used once for decoding and should then be destroyed with \fBBrotliDecoderDestroyInstance\fP, it cannot be reused for a new decoding session\&.
+.PP
+\fCalloc_func\fP and \fCfree_func\fP \fBMUST\fP be both zero or both non-zero\&. In the case they are both zero, default memory allocators are used\&. \fCopaque\fP is passed to \fCalloc_func\fP and \fCfree_func\fP when they are called\&. \fCfree_func\fP has to return without doing anything when asked to free a NULL pointer\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIalloc_func\fP custom memory allocation function 
+.br
+\fIfree_func\fP custom memory free function 
+.br
+\fIopaque\fP custom memory manager handle 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fC0\fP if instance can not be allocated or initialized 
+.PP
+pointer to initialized \fBBrotliDecoderState\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBrotliDecoderResult\fP BrotliDecoderDecompress (size_t encoded_size, const uint8_t encoded_buffer[encoded_size], size_t * decoded_size, uint8_t decoded_buffer[*decoded_size])"
+
+.PP
+Performs one-shot memory-to-memory decompression\&. Decompresses the data in \fCencoded_buffer\fP into \fCdecoded_buffer\fP, and sets \fC*decoded_size\fP to the decompressed length\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIencoded_size\fP size of \fCencoded_buffer\fP 
+.br
+\fIencoded_buffer\fP compressed data buffer with at least \fCencoded_size\fP addressable bytes 
+.br
+\fIdecoded_size\fP \fBin:\fP size of \fCdecoded_buffer\fP; 
+.br
+ \fBout:\fP length of decompressed data written to \fCdecoded_buffer\fP 
+.br
+\fIdecoded_buffer\fP decompressed data destination buffer 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_DECODER_RESULT_ERROR\fP if input is corrupted, memory allocation failed, or \fCdecoded_buffer\fP is not large enough; 
+.PP
+\fBBROTLI_DECODER_RESULT_SUCCESS\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBrotliDecoderResult\fP BrotliDecoderDecompressStream (\fBBrotliDecoderState\fP * state, size_t * available_in, const uint8_t ** next_in, size_t * available_out, uint8_t ** next_out, size_t * total_out)"
+
+.PP
+Decompresses the input stream to the output stream\&. The values \fC*available_in\fP and \fC*available_out\fP must specify the number of bytes addressable at \fC*next_in\fP and \fC*next_out\fP respectively\&. When \fC*available_out\fP is \fC0\fP, \fCnext_out\fP is allowed to be \fCNULL\fP\&.
+.PP
+After each call, \fC*available_in\fP will be decremented by the amount of input bytes consumed, and the \fC*next_in\fP pointer will be incremented by that amount\&. Similarly, \fC*available_out\fP will be decremented by the amount of output bytes written, and the \fC*next_out\fP pointer will be incremented by that amount\&.
+.PP
+\fCtotal_out\fP, if it is not a null-pointer, will be set to the number of bytes decompressed since the last \fCstate\fP initialization\&.
+.PP
+\fBNote:\fP
+.RS 4
+Input is never overconsumed, so \fCnext_in\fP and \fCavailable_in\fP could be passed to the next consumer after decoding is complete\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.br
+\fIavailable_in\fP \fBin:\fP amount of available input; 
+.br
+ \fBout:\fP amount of unused input 
+.br
+\fInext_in\fP pointer to the next compressed byte 
+.br
+\fIavailable_out\fP \fBin:\fP length of output buffer; 
+.br
+ \fBout:\fP remaining size of output buffer 
+.br
+\fInext_out\fP output buffer cursor; can be \fCNULL\fP if \fCavailable_out\fP is \fC0\fP 
+.br
+\fItotal_out\fP number of bytes decompressed so far; can be \fCNULL\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_DECODER_RESULT_ERROR\fP if input is corrupted, memory allocation failed, arguments were invalid, etc\&.; use \fBBrotliDecoderGetErrorCode\fP to get detailed error code 
+.PP
+\fBBROTLI_DECODER_RESULT_NEEDS_MORE_INPUT\fP decoding is blocked until more input data is provided 
+.PP
+\fBBROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT\fP decoding is blocked until more output space is provided 
+.PP
+\fBBROTLI_DECODER_RESULT_SUCCESS\fP decoding is finished, no more input might be consumed and no more output will be produced 
+.RE
+.PP
+
+.SS "void BrotliDecoderDestroyInstance (\fBBrotliDecoderState\fP * state)"
+
+.PP
+Deinitializes and frees \fBBrotliDecoderState\fP instance\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance to be cleaned up and deallocated 
+.RE
+.PP
+
+.SS "\fBBrotliDecoderErrorCode\fP BrotliDecoderGetErrorCode (const \fBBrotliDecoderState\fP * state)"
+
+.PP
+Acquires a detailed error code\&. Should be used only after \fBBrotliDecoderDecompressStream\fP returns \fBBROTLI_DECODER_RESULT_ERROR\fP\&.
+.PP
+See also \fBBrotliDecoderErrorString\fP
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+last saved error code 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliDecoderHasMoreOutput (const \fBBrotliDecoderState\fP * state)"
+
+.PP
+Checks if decoder has more output\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP, if decoder has some unconsumed output 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliDecoderIsFinished (const \fBBrotliDecoderState\fP * state)"
+
+.PP
+Checks if decoder instance reached the final state\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP if decoder is in a state where it reached the end of the input and produced all of the output 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliDecoderIsUsed (const \fBBrotliDecoderState\fP * state)"
+
+.PP
+Checks if instance has already consumed input\&. Instance that returns \fBBROTLI_FALSE\fP is considered 'fresh' and could be reused\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP if decoder has already used some input bytes 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliDecoderSetParameter (\fBBrotliDecoderState\fP * state, \fBBrotliDecoderParameter\fP param, uint32_t value)"
+
+.PP
+Sets the specified parameter to the given decoder instance\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.br
+\fIparam\fP parameter to set 
+.br
+\fIvalue\fP new parameter value 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP if parameter is unrecognized, or value is invalid 
+.PP
+\fBBROTLI_TRUE\fP if value is accepted 
+.RE
+.PP
+
+.SS "const uint8_t* BrotliDecoderTakeOutput (\fBBrotliDecoderState\fP * state, size_t * size)"
+
+.PP
+Acquires pointer to internal output buffer\&. This method is used to make language bindings easier and more efficient:
+.IP "1." 4
+push data to \fBBrotliDecoderDecompressStream\fP, until \fBBROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT\fP is reported
+.IP "2." 4
+use \fBBrotliDecoderTakeOutput\fP to peek bytes and copy to language-specific entity
+.PP
+.PP
+Also this could be useful if there is an output stream that is able to consume all the provided data (e\&.g\&. when data is saved to file system)\&.
+.PP
+\fBAttention:\fP
+.RS 4
+After every call to \fBBrotliDecoderTakeOutput\fP \fC*size\fP bytes of output are considered consumed for all consecutive calls to the instance methods; returned pointer becomes invalidated as well\&.
+.RE
+.PP
+\fBNote:\fP
+.RS 4
+Decoder output is not guaranteed to be contiguous\&. This means that after the size-unrestricted call to \fBBrotliDecoderTakeOutput\fP, immediate next call to \fBBrotliDecoderTakeOutput\fP may return more data\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance 
+.br
+\fIsize\fP \fBin:\fP number of bytes caller is ready to take, \fC0\fP if any amount could be handled; 
+.br
+ \fBout:\fP amount of data pointed by returned pointer and considered consumed; 
+.br
+ out value is never greater than in value, unless it is \fC0\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+pointer to output data 
+.RE
+.PP
+
+.SS "uint32_t BrotliDecoderVersion (void)"
+
+.PP
+Gets a decoder library version\&. Look at BROTLI_VERSION for more information\&. 
+.SH "Author"
+.PP 
+Generated automatically by Doxygen for Brotli from the source code\&.
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/docs/encode.h.3 b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/encode.h.3
new file mode 100755
index 0000000000..eff57bddb0
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/encode.h.3
@@ -0,0 +1,572 @@
+.TH "encode.h" 3 "Thu Feb 22 2018" "Brotli" \" -*- nroff -*-
+.ad l
+.nh
+.SH NAME
+encode.h \- API for Brotli compression\&.  
+
+.SH SYNOPSIS
+.br
+.PP
+.SS "Macros"
+
+.in +1c
+.ti -1c
+.RI "#define \fBBROTLI_DEFAULT_MODE\fP   \fBBROTLI_MODE_GENERIC\fP"
+.br
+.RI "\fIDefault value for \fBBROTLI_PARAM_MODE\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_DEFAULT_QUALITY\fP   11"
+.br
+.RI "\fIDefault value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_DEFAULT_WINDOW\fP   22"
+.br
+.RI "\fIDefault value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_LARGE_MAX_WINDOW_BITS\fP   30"
+.br
+.RI "\fIMaximal value for \fBBROTLI_PARAM_LGWIN\fP parameter in 'Large Window Brotli' (32-bit)\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MAX_INPUT_BLOCK_BITS\fP   24"
+.br
+.RI "\fIMaximal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MAX_QUALITY\fP   11"
+.br
+.RI "\fIMaximal value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MAX_WINDOW_BITS\fP   24"
+.br
+.RI "\fIMaximal value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MIN_INPUT_BLOCK_BITS\fP   16"
+.br
+.RI "\fIMinimal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MIN_QUALITY\fP   0"
+.br
+.RI "\fIMinimal value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_MIN_WINDOW_BITS\fP   10"
+.br
+.RI "\fIMinimal value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. \fP"
+.in -1c
+.SS "Typedefs"
+
+.in +1c
+.ti -1c
+.RI "typedef enum \fBBrotliEncoderMode\fP \fBBrotliEncoderMode\fP"
+.br
+.RI "\fIOptions for \fBBROTLI_PARAM_MODE\fP parameter\&. \fP"
+.ti -1c
+.RI "typedef enum \fBBrotliEncoderOperation\fP \fBBrotliEncoderOperation\fP"
+.br
+.RI "\fIOperations that can be performed by streaming encoder\&. \fP"
+.ti -1c
+.RI "typedef enum \fBBrotliEncoderParameter\fP \fBBrotliEncoderParameter\fP"
+.br
+.RI "\fIOptions to be used with \fBBrotliEncoderSetParameter\fP\&. \fP"
+.ti -1c
+.RI "typedef struct BrotliEncoderStateStruct \fBBrotliEncoderState\fP"
+.br
+.RI "\fIOpaque structure that holds encoder state\&. \fP"
+.in -1c
+.SS "Enumerations"
+.SS "Functions"
+
+.in +1c
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderCompress\fP (int quality, int lgwin, \fBBrotliEncoderMode\fP mode, size_t input_size, const uint8_t input_buffer[input_size], size_t *encoded_size, uint8_t encoded_buffer[*encoded_size])"
+.br
+.RI "\fIPerforms one-shot memory-to-memory compression\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderCompressStream\fP (\fBBrotliEncoderState\fP *state, \fBBrotliEncoderOperation\fP op, size_t *available_in, const uint8_t **next_in, size_t *available_out, uint8_t **next_out, size_t *total_out)"
+.br
+.RI "\fICompresses input stream to output stream\&. \fP"
+.ti -1c
+.RI "\fBBrotliEncoderState\fP * \fBBrotliEncoderCreateInstance\fP (\fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void *opaque)"
+.br
+.RI "\fICreates an instance of \fBBrotliEncoderState\fP and initializes it\&. \fP"
+.ti -1c
+.RI "void \fBBrotliEncoderDestroyInstance\fP (\fBBrotliEncoderState\fP *state)"
+.br
+.RI "\fIDeinitializes and frees \fBBrotliEncoderState\fP instance\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderHasMoreOutput\fP (\fBBrotliEncoderState\fP *state)"
+.br
+.RI "\fIChecks if encoder has more output\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderIsFinished\fP (\fBBrotliEncoderState\fP *state)"
+.br
+.RI "\fIChecks if encoder instance reached the final state\&. \fP"
+.ti -1c
+.RI "size_t \fBBrotliEncoderMaxCompressedSize\fP (size_t input_size)"
+.br
+.RI "\fICalculates the output size bound for the given \fCinput_size\fP\&. \fP"
+.ti -1c
+.RI "\fBBROTLI_BOOL\fP \fBBrotliEncoderSetParameter\fP (\fBBrotliEncoderState\fP *state, \fBBrotliEncoderParameter\fP param, uint32_t value)"
+.br
+.RI "\fISets the specified parameter to the given encoder instance\&. \fP"
+.ti -1c
+.RI "const uint8_t * \fBBrotliEncoderTakeOutput\fP (\fBBrotliEncoderState\fP *state, size_t *size)"
+.br
+.RI "\fIAcquires pointer to internal output buffer\&. \fP"
+.ti -1c
+.RI "uint32_t \fBBrotliEncoderVersion\fP (void)"
+.br
+.RI "\fIGets an encoder library version\&. \fP"
+.in -1c
+.SH "Detailed Description"
+.PP 
+API for Brotli compression\&. 
+
+
+.SH "Macro Definition Documentation"
+.PP 
+.SS "#define BROTLI_DEFAULT_MODE   \fBBROTLI_MODE_GENERIC\fP"
+
+.PP
+Default value for \fBBROTLI_PARAM_MODE\fP parameter\&. 
+.SS "#define BROTLI_DEFAULT_QUALITY   11"
+
+.PP
+Default value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. 
+.SS "#define BROTLI_DEFAULT_WINDOW   22"
+
+.PP
+Default value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. 
+.SS "#define BROTLI_MAX_INPUT_BLOCK_BITS   24"
+
+.PP
+Maximal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. 
+.SS "#define BROTLI_MAX_QUALITY   11"
+
+.PP
+Maximal value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. 
+.SS "#define BROTLI_MAX_WINDOW_BITS   24"
+
+.PP
+Maximal value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. 
+.PP
+\fBNote:\fP
+.RS 4
+equal to \fCBROTLI_MAX_DISTANCE_BITS\fP constant\&. 
+.RE
+.PP
+
+.SS "#define BROTLI_MIN_INPUT_BLOCK_BITS   16"
+
+.PP
+Minimal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. 
+.SS "#define BROTLI_MIN_QUALITY   0"
+
+.PP
+Minimal value for \fBBROTLI_PARAM_QUALITY\fP parameter\&. 
+.SS "#define BROTLI_MIN_WINDOW_BITS   10"
+
+.PP
+Minimal value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. 
+.SH "Typedef Documentation"
+.PP 
+.SS "typedef enum \fBBrotliEncoderMode\fP  \fBBrotliEncoderMode\fP"
+
+.PP
+Options for \fBBROTLI_PARAM_MODE\fP parameter\&. 
+.SS "typedef enum \fBBrotliEncoderOperation\fP  \fBBrotliEncoderOperation\fP"
+
+.PP
+Operations that can be performed by streaming encoder\&. 
+.SS "typedef enum \fBBrotliEncoderParameter\fP  \fBBrotliEncoderParameter\fP"
+
+.PP
+Options to be used with \fBBrotliEncoderSetParameter\fP\&. 
+.SS "typedef struct BrotliEncoderStateStruct \fBBrotliEncoderState\fP"
+
+.PP
+Opaque structure that holds encoder state\&. Allocated and initialized with \fBBrotliEncoderCreateInstance\fP\&. Cleaned up and deallocated with \fBBrotliEncoderDestroyInstance\fP\&. 
+.SH "Enumeration Type Documentation"
+.PP 
+.SS "enum \fBBrotliEncoderMode\fP"
+
+.PP
+Options for \fBBROTLI_PARAM_MODE\fP parameter\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_MODE_GENERIC \fP\fP
+Default compression mode\&. In this mode compressor does not know anything in advance about the properties of the input\&. 
+.TP
+\fB\fIBROTLI_MODE_TEXT \fP\fP
+Compression mode for UTF-8 formatted text input\&. 
+.TP
+\fB\fIBROTLI_MODE_FONT \fP\fP
+Compression mode used in WOFF 2\&.0\&. 
+.SS "enum \fBBrotliEncoderOperation\fP"
+
+.PP
+Operations that can be performed by streaming encoder\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_OPERATION_PROCESS \fP\fP
+Process input\&. Encoder may postpone producing output, until it has processed enough input\&. 
+.TP
+\fB\fIBROTLI_OPERATION_FLUSH \fP\fP
+Produce output for all processed input\&. Actual flush is performed when input stream is depleted and there is enough space in output stream\&. This means that client should repeat \fBBROTLI_OPERATION_FLUSH\fP operation until \fCavailable_in\fP becomes \fC0\fP, and \fBBrotliEncoderHasMoreOutput\fP returns \fBBROTLI_FALSE\fP\&. If output is acquired via \fBBrotliEncoderTakeOutput\fP, then operation should be repeated after output buffer is drained\&.
+.PP
+\fBWarning:\fP
+.RS 4
+Until flush is complete, client \fBSHOULD\fP \fBNOT\fP swap, reduce or extend input stream\&.
+.RE
+.PP
+When flush is complete, output data will be sufficient for decoder to reproduce all the given input\&. 
+.TP
+\fB\fIBROTLI_OPERATION_FINISH \fP\fP
+Finalize the stream\&. Actual finalization is performed when input stream is depleted and there is enough space in output stream\&. This means that client should repeat \fBBROTLI_OPERATION_FINISH\fP operation until \fCavailable_in\fP becomes \fC0\fP, and \fBBrotliEncoderHasMoreOutput\fP returns \fBBROTLI_FALSE\fP\&. If output is acquired via \fBBrotliEncoderTakeOutput\fP, then operation should be repeated after output buffer is drained\&.
+.PP
+\fBWarning:\fP
+.RS 4
+Until finalization is complete, client \fBSHOULD\fP \fBNOT\fP swap, reduce or extend input stream\&.
+.RE
+.PP
+Helper function \fBBrotliEncoderIsFinished\fP checks if stream is finalized and output fully dumped\&.
+.PP
+Adding more input data to finalized stream is impossible\&. 
+.TP
+\fB\fIBROTLI_OPERATION_EMIT_METADATA \fP\fP
+Emit metadata block to stream\&. Metadata is opaque to Brotli: neither encoder, nor decoder processes this data or relies on it\&. It may be used to pass some extra information from encoder client to decoder client without interfering with main data stream\&.
+.PP
+\fBNote:\fP
+.RS 4
+Encoder may emit empty metadata blocks internally, to pad encoded stream to byte boundary\&.
+.RE
+.PP
+\fBWarning:\fP
+.RS 4
+Until emitting metadata is complete client \fBSHOULD\fP \fBNOT\fP swap, reduce or extend input stream\&.
+.PP
+The whole content of input buffer is considered to be the content of metadata block\&. Do \fBNOT\fP \fIappend\fP metadata to input stream, before it is depleted with other operations\&.
+.RE
+.PP
+Stream is soft-flushed before metadata block is emitted\&. Metadata block \fBMUST\fP be no longer than than 16MiB\&. 
+.SS "enum \fBBrotliEncoderParameter\fP"
+
+.PP
+Options to be used with \fBBrotliEncoderSetParameter\fP\&. 
+.PP
+\fBEnumerator\fP
+.in +1c
+.TP
+\fB\fIBROTLI_PARAM_MODE \fP\fP
+Tune encoder for specific input\&. \fBBrotliEncoderMode\fP enumerates all available values\&. 
+.TP
+\fB\fIBROTLI_PARAM_QUALITY \fP\fP
+The main compression speed-density lever\&. The higher the quality, the slower the compression\&. Range is from \fBBROTLI_MIN_QUALITY\fP to \fBBROTLI_MAX_QUALITY\fP\&. 
+.TP
+\fB\fIBROTLI_PARAM_LGWIN \fP\fP
+Recommended sliding LZ77 window size\&. Encoder may reduce this value, e\&.g\&. if input is much smaller than window size\&.
+.PP
+Window size is \fC(1 << value) - 16\fP\&.
+.PP
+Range is from \fBBROTLI_MIN_WINDOW_BITS\fP to \fBBROTLI_MAX_WINDOW_BITS\fP\&. 
+.TP
+\fB\fIBROTLI_PARAM_LGBLOCK \fP\fP
+Recommended input block size\&. Encoder may reduce this value, e\&.g\&. if input is much smaller than input block size\&.
+.PP
+Range is from \fBBROTLI_MIN_INPUT_BLOCK_BITS\fP to \fBBROTLI_MAX_INPUT_BLOCK_BITS\fP\&.
+.PP
+\fBNote:\fP
+.RS 4
+Bigger input block size allows better compression, but consumes more memory\&. 
+.br
+ The rough formula of memory used for temporary input storage is \fC3 << lgBlock\fP\&. 
+.RE
+.PP
+
+.TP
+\fB\fIBROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING \fP\fP
+Flag that affects usage of 'literal context modeling' format feature\&. This flag is a 'decoding-speed vs compression ratio' trade-off\&. 
+.TP
+\fB\fIBROTLI_PARAM_SIZE_HINT \fP\fP
+Estimated total input size for all \fBBrotliEncoderCompressStream\fP calls\&. The default value is 0, which means that the total input size is unknown\&. 
+.TP
+\fB\fIBROTLI_PARAM_LARGE_WINDOW \fP\fP
+Flag that determines if 'Large Window Brotli' is used\&. 
+.TP
+\fB\fIBROTLI_PARAM_NPOSTFIX \fP\fP
+Recommended number of postfix bits (NPOSTFIX)\&. Encoder may change this value\&.
+.PP
+Range is from 0 to ::BROTLI_MAX_NPOSTFIX\&. 
+.TP
+\fB\fIBROTLI_PARAM_NDIRECT \fP\fP
+Recommended number of direct distance codes (NDIRECT)\&. Encoder may change this value\&.
+.PP
+Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX)\&. 
+.SH "Function Documentation"
+.PP 
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderCompress (int quality, int lgwin, \fBBrotliEncoderMode\fP mode, size_t input_size, const uint8_t input_buffer[input_size], size_t * encoded_size, uint8_t encoded_buffer[*encoded_size])"
+
+.PP
+Performs one-shot memory-to-memory compression\&. Compresses the data in \fCinput_buffer\fP into \fCencoded_buffer\fP, and sets \fC*encoded_size\fP to the compressed length\&.
+.PP
+\fBNote:\fP
+.RS 4
+If \fBBrotliEncoderMaxCompressedSize\fP(\fCinput_size\fP) returns non-zero value, then output is guaranteed to be no longer than that\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIquality\fP quality parameter value, e\&.g\&. \fBBROTLI_DEFAULT_QUALITY\fP 
+.br
+\fIlgwin\fP lgwin parameter value, e\&.g\&. \fBBROTLI_DEFAULT_WINDOW\fP 
+.br
+\fImode\fP mode parameter value, e\&.g\&. \fBBROTLI_DEFAULT_MODE\fP 
+.br
+\fIinput_size\fP size of \fCinput_buffer\fP 
+.br
+\fIinput_buffer\fP input data buffer with at least \fCinput_size\fP addressable bytes 
+.br
+\fIencoded_size\fP \fBin:\fP size of \fCencoded_buffer\fP; 
+.br
+ \fBout:\fP length of compressed data written to \fCencoded_buffer\fP, or \fC0\fP if compression fails 
+.br
+\fIencoded_buffer\fP compressed data destination buffer 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP in case of compression error 
+.PP
+\fBBROTLI_FALSE\fP if output buffer is too small 
+.PP
+\fBBROTLI_TRUE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderCompressStream (\fBBrotliEncoderState\fP * state, \fBBrotliEncoderOperation\fP op, size_t * available_in, const uint8_t ** next_in, size_t * available_out, uint8_t ** next_out, size_t * total_out)"
+
+.PP
+Compresses input stream to output stream\&. The values \fC*available_in\fP and \fC*available_out\fP must specify the number of bytes addressable at \fC*next_in\fP and \fC*next_out\fP respectively\&. When \fC*available_out\fP is \fC0\fP, \fCnext_out\fP is allowed to be \fCNULL\fP\&.
+.PP
+After each call, \fC*available_in\fP will be decremented by the amount of input bytes consumed, and the \fC*next_in\fP pointer will be incremented by that amount\&. Similarly, \fC*available_out\fP will be decremented by the amount of output bytes written, and the \fC*next_out\fP pointer will be incremented by that amount\&.
+.PP
+\fCtotal_out\fP, if it is not a null-pointer, will be set to the number of bytes compressed since the last \fCstate\fP initialization\&.
+.PP
+Internally workflow consists of 3 tasks:
+.IP "1." 4
+(optionally) copy input data to internal buffer
+.IP "2." 4
+actually compress data and (optionally) store it to internal buffer
+.IP "3." 4
+(optionally) copy compressed bytes from internal buffer to output stream
+.PP
+.PP
+Whenever all 3 tasks can't move forward anymore, or error occurs, this method returns the control flow to caller\&.
+.PP
+\fCop\fP is used to perform flush, finish the stream, or inject metadata block\&. See \fBBrotliEncoderOperation\fP for more information\&.
+.PP
+Flushing the stream means forcing encoding of all input passed to encoder and completing the current output block, so it could be fully decoded by stream decoder\&. To perform flush set \fCop\fP to \fBBROTLI_OPERATION_FLUSH\fP\&. Under some circumstances (e\&.g\&. lack of output stream capacity) this operation would require several calls to \fBBrotliEncoderCompressStream\fP\&. The method must be called again until both input stream is depleted and encoder has no more output (see \fBBrotliEncoderHasMoreOutput\fP) after the method is called\&.
+.PP
+Finishing the stream means encoding of all input passed to encoder and adding specific 'final' marks, so stream decoder could determine that stream is complete\&. To perform finish set \fCop\fP to \fBBROTLI_OPERATION_FINISH\fP\&. Under some circumstances (e\&.g\&. lack of output stream capacity) this operation would require several calls to \fBBrotliEncoderCompressStream\fP\&. The method must be called again until both input stream is depleted and encoder has no more output (see \fBBrotliEncoderHasMoreOutput\fP) after the method is called\&.
+.PP
+\fBWarning:\fP
+.RS 4
+When flushing and finishing, \fCop\fP should not change until operation is complete; input stream should not be swapped, reduced or extended as well\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.br
+\fIop\fP requested operation 
+.br
+\fIavailable_in\fP \fBin:\fP amount of available input; 
+.br
+ \fBout:\fP amount of unused input 
+.br
+\fInext_in\fP pointer to the next input byte 
+.br
+\fIavailable_out\fP \fBin:\fP length of output buffer; 
+.br
+ \fBout:\fP remaining size of output buffer 
+.br
+\fInext_out\fP compressed output buffer cursor; can be \fCNULL\fP if \fCavailable_out\fP is \fC0\fP 
+.br
+\fItotal_out\fP number of bytes produced so far; can be \fCNULL\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP if there was an error 
+.PP
+\fBBROTLI_TRUE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBrotliEncoderState\fP* BrotliEncoderCreateInstance (\fBbrotli_alloc_func\fP alloc_func, \fBbrotli_free_func\fP free_func, void * opaque)"
+
+.PP
+Creates an instance of \fBBrotliEncoderState\fP and initializes it\&. \fCalloc_func\fP and \fCfree_func\fP \fBMUST\fP be both zero or both non-zero\&. In the case they are both zero, default memory allocators are used\&. \fCopaque\fP is passed to \fCalloc_func\fP and \fCfree_func\fP when they are called\&. \fCfree_func\fP has to return without doing anything when asked to free a NULL pointer\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIalloc_func\fP custom memory allocation function 
+.br
+\fIfree_func\fP custom memory free function 
+.br
+\fIopaque\fP custom memory manager handle 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fC0\fP if instance can not be allocated or initialized 
+.PP
+pointer to initialized \fBBrotliEncoderState\fP otherwise 
+.RE
+.PP
+
+.SS "void BrotliEncoderDestroyInstance (\fBBrotliEncoderState\fP * state)"
+
+.PP
+Deinitializes and frees \fBBrotliEncoderState\fP instance\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP decoder instance to be cleaned up and deallocated 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderHasMoreOutput (\fBBrotliEncoderState\fP * state)"
+
+.PP
+Checks if encoder has more output\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP, if encoder has some unconsumed output 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderIsFinished (\fBBrotliEncoderState\fP * state)"
+
+.PP
+Checks if encoder instance reached the final state\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_TRUE\fP if encoder is in a state where it reached the end of the input and produced all of the output 
+.PP
+\fBBROTLI_FALSE\fP otherwise 
+.RE
+.PP
+
+.SS "size_t BrotliEncoderMaxCompressedSize (size_t input_size)"
+
+.PP
+Calculates the output size bound for the given \fCinput_size\fP\&. 
+.PP
+\fBWarning:\fP
+.RS 4
+Result is only valid if quality is at least \fC2\fP and, in case \fBBrotliEncoderCompressStream\fP was used, no flushes (\fBBROTLI_OPERATION_FLUSH\fP) were performed\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIinput_size\fP size of projected input 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fC0\fP if result does not fit \fCsize_t\fP 
+.RE
+.PP
+
+.SS "\fBBROTLI_BOOL\fP BrotliEncoderSetParameter (\fBBrotliEncoderState\fP * state, \fBBrotliEncoderParameter\fP param, uint32_t value)"
+
+.PP
+Sets the specified parameter to the given encoder instance\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.br
+\fIparam\fP parameter to set 
+.br
+\fIvalue\fP new parameter value 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fBBROTLI_FALSE\fP if parameter is unrecognized, or value is invalid 
+.PP
+\fBBROTLI_FALSE\fP if value of parameter can not be changed at current encoder state (e\&.g\&. when encoding is started, window size might be already encoded and therefore it is impossible to change it) 
+.PP
+\fBBROTLI_TRUE\fP if value is accepted 
+.RE
+.PP
+\fBWarning:\fP
+.RS 4
+invalid values might be accepted in case they would not break encoding process\&. 
+.RE
+.PP
+
+.SS "const uint8_t* BrotliEncoderTakeOutput (\fBBrotliEncoderState\fP * state, size_t * size)"
+
+.PP
+Acquires pointer to internal output buffer\&. This method is used to make language bindings easier and more efficient:
+.IP "1." 4
+push data to \fBBrotliEncoderCompressStream\fP, until \fBBrotliEncoderHasMoreOutput\fP returns BROTL_TRUE
+.IP "2." 4
+use \fBBrotliEncoderTakeOutput\fP to peek bytes and copy to language-specific entity
+.PP
+.PP
+Also this could be useful if there is an output stream that is able to consume all the provided data (e\&.g\&. when data is saved to file system)\&.
+.PP
+\fBAttention:\fP
+.RS 4
+After every call to \fBBrotliEncoderTakeOutput\fP \fC*size\fP bytes of output are considered consumed for all consecutive calls to the instance methods; returned pointer becomes invalidated as well\&.
+.RE
+.PP
+\fBNote:\fP
+.RS 4
+Encoder output is not guaranteed to be contiguous\&. This means that after the size-unrestricted call to \fBBrotliEncoderTakeOutput\fP, immediate next call to \fBBrotliEncoderTakeOutput\fP may return more data\&.
+.RE
+.PP
+\fBParameters:\fP
+.RS 4
+\fIstate\fP encoder instance 
+.br
+\fIsize\fP \fBin:\fP number of bytes caller is ready to take, \fC0\fP if any amount could be handled; 
+.br
+ \fBout:\fP amount of data pointed by returned pointer and considered consumed; 
+.br
+ out value is never greater than in value, unless it is \fC0\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+pointer to output data 
+.RE
+.PP
+
+.SS "uint32_t BrotliEncoderVersion (void)"
+
+.PP
+Gets an encoder library version\&. Look at BROTLI_VERSION for more information\&. 
+.SH "Author"
+.PP 
+Generated automatically by Doxygen for Brotli from the source code\&.
diff --git a/codec/L2/demos/pikEnc/host/third_party/brotli/docs/types.h.3 b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/types.h.3
new file mode 100755
index 0000000000..bef9313032
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/brotli/docs/types.h.3
@@ -0,0 +1,117 @@
+.TH "types.h" 3 "Thu Feb 22 2018" "Brotli" \" -*- nroff -*-
+.ad l
+.nh
+.SH NAME
+types.h \- Common types used in decoder and encoder API\&.  
+
+.SH SYNOPSIS
+.br
+.PP
+.SS "Macros"
+
+.in +1c
+.ti -1c
+.RI "#define \fBBROTLI_BOOL\fP   int"
+.br
+.RI "\fIA portable \fCbool\fP replacement\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_FALSE\fP   0"
+.br
+.RI "\fIPortable \fCfalse\fP replacement\&. \fP"
+.ti -1c
+.RI "#define \fBBROTLI_TRUE\fP   1"
+.br
+.RI "\fIPortable \fCtrue\fP replacement\&. \fP"
+.ti -1c
+.RI "#define \fBTO_BROTLI_BOOL\fP(X)   (!!(X) ? \fBBROTLI_TRUE\fP : \fBBROTLI_FALSE\fP)"
+.br
+.RI "\fI\fCbool\fP to \fBBROTLI_BOOL\fP conversion macros\&. \fP"
+.in -1c
+.SS "Typedefs"
+
+.in +1c
+.ti -1c
+.RI "typedef void *(* \fBbrotli_alloc_func\fP) (void *opaque, size_t size)"
+.br
+.RI "\fIAllocating function pointer type\&. \fP"
+.ti -1c
+.RI "typedef void(* \fBbrotli_free_func\fP) (void *opaque, void *address)"
+.br
+.RI "\fIDeallocating function pointer type\&. \fP"
+.in -1c
+.SH "Detailed Description"
+.PP 
+Common types used in decoder and encoder API\&. 
+
+
+.SH "Macro Definition Documentation"
+.PP 
+.SS "#define BROTLI_BOOL   int"
+
+.PP
+A portable \fCbool\fP replacement\&. \fBBROTLI_BOOL\fP is a 'documentation' type: actually it is \fCint\fP, but in API it denotes a type, whose only values are \fBBROTLI_TRUE\fP and \fBBROTLI_FALSE\fP\&.
+.PP
+\fBBROTLI_BOOL\fP values passed to Brotli should either be \fBBROTLI_TRUE\fP or \fBBROTLI_FALSE\fP, or be a result of \fBTO_BROTLI_BOOL\fP macros\&.
+.PP
+\fBBROTLI_BOOL\fP values returned by Brotli should not be tested for equality with \fCtrue\fP, \fCfalse\fP, \fBBROTLI_TRUE\fP, \fBBROTLI_FALSE\fP, but rather should be evaluated, for example:
+.PP
+.nf
+if (SomeBrotliFunction(encoder, BROTLI_TRUE) &&
+    !OtherBrotliFunction(decoder, BROTLI_FALSE)) {
+  bool x = !!YetAnotherBrotliFunction(encoder, TO_BROLTI_BOOL(2 * 2 == 4));
+  DoSomething(x);
+}
+
+.fi
+.PP
+ 
+.SS "#define BROTLI_FALSE   0"
+
+.PP
+Portable \fCfalse\fP replacement\&. 
+.SS "#define BROTLI_TRUE   1"
+
+.PP
+Portable \fCtrue\fP replacement\&. 
+.SS "#define TO_BROTLI_BOOL(X)   (!!(X) ? \fBBROTLI_TRUE\fP : \fBBROTLI_FALSE\fP)"
+
+.PP
+\fCbool\fP to \fBBROTLI_BOOL\fP conversion macros\&. 
+.SH "Typedef Documentation"
+.PP 
+.SS "typedef void*(* brotli_alloc_func) (void *opaque, size_t size)"
+
+.PP
+Allocating function pointer type\&. 
+.PP
+\fBParameters:\fP
+.RS 4
+\fIopaque\fP custom memory manager handle provided by client 
+.br
+\fIsize\fP requested memory region size; can not be \fC0\fP 
+.RE
+.PP
+\fBReturns:\fP
+.RS 4
+\fC0\fP in the case of failure 
+.PP
+a valid pointer to a memory region of at least \fCsize\fP bytes long otherwise 
+.RE
+.PP
+
+.SS "typedef void(* brotli_free_func) (void *opaque, void *address)"
+
+.PP
+Deallocating function pointer type\&. This function \fBSHOULD\fP do nothing if \fCaddress\fP is \fC0\fP\&.
+.PP
+\fBParameters:\fP
+.RS 4
+\fIopaque\fP custom memory manager handle provided by client 
+.br
+\fIaddress\fP memory region pointer returned by \fBbrotli_alloc_func\fP, or \fC0\fP 
+.RE
+.PP
+
+.SH "Author"
+.PP 
+Generated automatically by Doxygen for Brotli from the source code\&.
diff --git a/codec/L2/demos/pikEnc/host/third_party/fse_error_wrapper.h b/codec/L2/demos/pikEnc/host/third_party/fse_error_wrapper.h
new file mode 100755
index 0000000000..a08933244f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/fse_error_wrapper.h
@@ -0,0 +1,16 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// fse_decompress.c uses a "#define FSE_isError" so we can't use that define in
+// our wrapper for that specific file. This removes the #define FSE_isError from
+// our wrapper, which is meant to be included only on that file.
+
+#ifndef __THIRD_PARTY_FINISTESTATEENTROPY_FSE_ERROR_WRAPPER_H__
+#define __THIRD_PARTY_FINISTESTATEENTROPY_FSE_ERROR_WRAPPER_H__
+
+#undef FSE_isError
+
+#endif // __THIRD_PARTY_FINISTESTATEENTROPY_FSE_ERROR_WRAPPER_H__
diff --git a/codec/L2/demos/pikEnc/host/third_party/fse_wrapper.h b/codec/L2/demos/pikEnc/host/third_party/fse_wrapper.h
new file mode 100755
index 0000000000..432d8aa817
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/fse_wrapper.h
@@ -0,0 +1,77 @@
+// Copyright 2019 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+// Wrapper module to make the symbols of FiniteStateEntropy have a different
+// name than the ones exported by Zstd. Unfortunately, FSE is also bundled
+// inside Zstd but not as a library and with some differences, therefore we use
+// this wrapper to avoid the conflict.
+
+#ifndef __THIRD_PARTY_FINISTESTATEENTROPY_FSE_WRAPPER_H__
+#define __THIRD_PARTY_FINISTESTATEENTROPY_FSE_WRAPPER_H__
+
+#define FSE_buildCTable _FSE_FSE_buildCTable
+#define FSE_buildCTable_raw _FSE_FSE_buildCTable_raw
+#define FSE_buildCTable_rle _FSE_FSE_buildCTable_rle
+#define FSE_buildDTable _FSE_FSE_buildDTable
+#define FSE_buildDTable_raw _FSE_FSE_buildDTable_raw
+#define FSE_buildDTable_rle _FSE_FSE_buildDTable_rle
+#define FSE_compress _FSE_FSE_compress
+//#define FSE_compress2 _FSE_FSE_compress2
+#define FSE_compressBound _FSE_FSE_compressBound
+#define FSE_compress_usingCTable _FSE_FSE_compress_usingCTable
+#define FSE_count _FSE_FSE_count
+#define FSE_countFast _FSE_FSE_countFast
+#define FSE_createCTable _FSE_FSE_createCTable
+#define FSE_createDTable _FSE_FSE_createDTable
+#define FSE_decompress _FSE_FSE_decompress
+#define FSE_decompress_usingDTable _FSE_FSE_decompress_usingDTable
+#define FSE_freeCTable _FSE_FSE_freeCTable
+#define FSE_freeDTable _FSE_FSE_freeDTable
+//#define FSE_getErrorName _FSE_FSE_getErrorName
+//#define FSE_isError _FSE_FSE_isError
+#define FSE_NCountWriteBound _FSE_FSE_NCountWriteBound
+#define FSE_normalizeCount _FSE_FSE_normalizeCount
+#define FSE_optimalTableLog _FSE_FSE_optimalTableLog
+#define FSE_optimalTableLog_internal _FSE_FSE_optimalTableLog_internal
+#define FSE_readNCount _FSE_FSE_readNCount
+#define FSE_sizeof_CTable _FSE_FSE_sizeof_CTable
+#define FSE_writeNCount _FSE_FSE_writeNCount
+#define HUF_buildCTable _FSE_HUF_buildCTable
+#define HUF_compress _FSE_HUF_compress
+#define HUF_compress1X _FSE_HUF_compress1X
+#define HUF_compress1X_usingCTable _FSE_HUF_compress1X_usingCTable
+#define HUF_compress2 _FSE_HUF_compress2
+#define HUF_compress4X_usingCTable _FSE_HUF_compress4X_usingCTable
+#define HUF_compressBound _FSE_HUF_compressBound
+#define HUF_decompress _FSE_HUF_decompress
+#define HUF_decompress1X2 _FSE_HUF_decompress1X2
+#define HUF_decompress1X2_DCtx _FSE_HUF_decompress1X2_DCtx
+#define HUF_decompress1X2_usingDTable _FSE_HUF_decompress1X2_usingDTable
+#define HUF_decompress1X4 _FSE_HUF_decompress1X4
+#define HUF_decompress1X4_DCtx _FSE_HUF_decompress1X4_DCtx
+#define HUF_decompress1X4_usingDTable _FSE_HUF_decompress1X4_usingDTable
+#define HUF_decompress1X_DCtx _FSE_HUF_decompress1X_DCtx
+#define HUF_decompress1X_usingDTable _FSE_HUF_decompress1X_usingDTable
+#define HUF_decompress4X2 _FSE_HUF_decompress4X2
+#define HUF_decompress4X2_DCtx _FSE_HUF_decompress4X2_DCtx
+#define HUF_decompress4X2_usingDTable _FSE_HUF_decompress4X2_usingDTable
+#define HUF_decompress4X4 _FSE_HUF_decompress4X4
+#define HUF_decompress4X4_DCtx _FSE_HUF_decompress4X4_DCtx
+#define HUF_decompress4X4_usingDTable _FSE_HUF_decompress4X4_usingDTable
+#define HUF_decompress4X_DCtx _FSE_HUF_decompress4X_DCtx
+#define HUF_decompress4X_hufOnly _FSE_HUF_decompress4X_hufOnly
+#define HUF_decompress4X_usingDTable _FSE_HUF_decompress4X_usingDTable
+#define HUF_getErrorName _FSE_HUF_getErrorName
+#define HUF_isError _FSE_HUF_isError
+#define HUF_optimalTableLog _FSE_HUF_optimalTableLog
+#define HUF_readCTable _FSE_HUF_readCTable
+#define HUF_readDTableX2 _FSE_HUF_readDTableX2
+#define HUF_readDTableX4 _FSE_HUF_readDTableX4
+#define HUF_readStats _FSE_HUF_readStats
+#define HUF_selectDecoder _FSE_HUF_selectDecoder
+#define HUF_writeCTable _FSE_HUF_writeCTable
+
+#endif // __THIRD_PARTY_FINISTESTATEENTROPY_FSE_WRAPPER_H__
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/.travis.yml b/codec/L2/demos/pikEnc/host/third_party/lcms/.travis.yml
new file mode 100755
index 0000000000..637eb6af5e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/.travis.yml
@@ -0,0 +1,195 @@
+language: c
+
+matrix:
+  include:
+# OSX standard test build
+# this is really a GCC frontend with an LLVM back-end
+    - os: osx
+      env:
+        - MATRIX_EVAL="CC=gcc && CXX=g++"
+
+# OSX and pure GCCs
+# https://docs.travis-ci.com/user/languages/c/#GCC-on-OS-X
+    - os: osx
+      env:
+        - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
+
+    - os: osx
+      env:
+        - MATRIX_EVAL="brew install gcc5 && CC=gcc-5 && CXX=g++-5"
+
+    - os: osx
+      env:
+        - MATRIX_EVAL="brew install gcc6 && CC=gcc-6 && CXX=g++-6"
+
+    - os: osx
+      env:
+        - MATRIX_EVAL="brew install gcc && CC=gcc-7 && CXX=g++-7"
+
+# OSX and pure LLVMs - the version is controlled by the osx_image variable
+# images as per https://blog.travis-ci.com/2017-10-16-a-new-default-os-x-image-is-coming
+    - os: osx
+      osx_image: xcode6.4
+      env:
+        - MATRIX_EVAL="CC=clang && CXX=clang"
+
+    - os: osx
+      osx_image: xcode7.3
+      env:
+        - MATRIX_EVAL="CC=clang && CXX=clang"
+
+    - os: osx
+      osx_image: xcode8.3
+      env:
+        - MATRIX_EVAL="CC=clang && CXX=clang"
+
+    - os: osx
+      osx_image: xcode9
+      env:
+        - MATRIX_EVAL="CC=clang && CXX=clang"
+
+    - os: osx
+      osx_image: xcode9.1
+      env:
+        - MATRIX_EVAL="CC=clang && CXX=clang"
+
+# Linux and GCCs
+    # gcc 4.8 at the time of writing
+    - os: linux
+      dist: trusty
+      env:
+        - MATRIX_EVAL="CC=gcc && CXX=g++"
+
+# for newer GCCs, add ubuntu-toolchain-r-test
+# https://docs.travis-ci.com/user/languages/c/
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+          packages:
+            - g++-4.9
+      env:
+         - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
+
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+          packages:
+            - g++-5
+      env:
+         - MATRIX_EVAL="CC=gcc-5 && CXX=g++-5"
+
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+          packages:
+            - g++-6
+      env:
+        - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
+
+
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+          packages:
+            - g++-7
+      env:
+        - MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
+
+# Linux and LLVMs
+# https://docs.travis-ci.com/user/languages/c/#Clang
+    - os: linux
+      dist: trusty
+      env:
+        - MATRIX_EVAL="CC=clang && CXX=clang"
+
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+            - llvm-toolchain-precise-3.6
+          packages:
+            - clang-3.6
+      env:
+        - MATRIX_EVAL="CC=clang-3.6 && CXX=clang++-3.6"
+
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+            - llvm-toolchain-precise-3.7
+          packages:
+            - clang-3.7
+      env:
+        - MATRIX_EVAL="CC=clang-3.7 && CXX=clang++-3.7"
+
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+            - llvm-toolchain-precise-3.8
+          packages:
+            - clang-3.8
+      env:
+        - MATRIX_EVAL="CC=clang-3.8 && CXX=clang++-3.8"
+
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - llvm-toolchain-trusty-3.9
+          packages:
+            - clang-3.9
+      env:
+        - MATRIX_EVAL="CC=clang-3.9 && CXX=clang++-3.9"
+
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - llvm-toolchain-trusty-4.0
+          packages:
+            - clang-4.0
+      env:
+        - MATRIX_EVAL="CC=clang-4.0 && CXX=clang++-4.0"
+
+    - os: linux
+      dist: trusty
+      addons:
+        apt:
+          sources:
+            - llvm-toolchain-trusty-5.0
+          packages:
+            - clang-5.0
+      env:
+        - MATRIX_EVAL="CC=clang-5.0 && CXX=clang++-5.0"
+
+
+before_install:
+# avoid issues with outdated Homebrew installation on Mac OSX by updating it
+# before anything else
+# TODO: remove "brew update" line after 2017-11-15, see https://blog.travis-ci.com/2017-10-16-a-new-default-os-x-image-is-coming and
+# https://github.com/travis-ci/travis-ci/issues/8552
+    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
+    - eval "${MATRIX_EVAL}"
+
+script: autoreconf -V && autoreconf --force && ./configure && cat config.log && make && make check
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/AUTHORS b/codec/L2/demos/pikEnc/host/third_party/lcms/AUTHORS
new file mode 100755
index 0000000000..147246a38c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/AUTHORS
@@ -0,0 +1,51 @@
+
+Main Author
+------------
+Marti Maria 
+
+
+Contributors 
+------------
+Bob Friesenhahn 
+Kai-Uwe Behrmann
+Stuart Nixon
+Jordi Vilar
+Richard Hughes
+Auke Nauta
+Chris Evans (Google)
+Lorenzo Ridolfi 
+Robin Watts (Artifex)
+Shawn Pedersen 
+Andrew Brygin 
+Samuli Suominen 
+Florian H�ch
+Aurelien Jarno 
+Claudiu Cebuc
+Michael Vhrel (Artifex)
+Michal Cihar 
+Daniel Kaneider 
+Mateusz Jurczyk (Google)
+Paul Miller
+S�bastien L�on
+Christian Schmitz
+XhmikosR
+Stanislav Brabec (SuSe)
+Leonhard Gruenschloss (Google)
+Patrick Noffke
+Christopher James Halse Rogers
+John Hein
+Thomas Weber (Debian)
+Mark Allen
+Noel Carboni
+
+Special Thanks 
+--------------
+Artifex software
+AlienSkin software
+Jan Morovic
+Jos Vernon (WebSupergoo)
+Harald Schneider (Maxon)
+Christian Albrecht 
+Dimitrios Anastassakis 
+Lemke Software 
+Tim Zaman
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/COPYING b/codec/L2/demos/pikEnc/host/third_party/lcms/COPYING
new file mode 100755
index 0000000000..fda5c9eb57
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/COPYING
@@ -0,0 +1,8 @@
+Little CMS
+Copyright (c) 1998-2011 Marti Maria Saguer
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/ChangeLog b/codec/L2/demos/pikEnc/host/third_party/lcms/ChangeLog
new file mode 100755
index 0000000000..704dc78c5f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/ChangeLog
@@ -0,0 +1,191 @@
+
+-----------------------
+2.9 Maintenance release
+-----------------------
+Several fixes related with security, and therefore not detailed here.
+C++ compiles now without warnings
+Added OSX and clang in travis
+Added a travis-ci test matrix for many compilers and OS. Thanks to Thomas Weber (debian) for this great improvement.
+
+-----------------------
+2.8 Featured release
+-----------------------
+
+Changed ChangeLog direction
+Fixed many typos in comments, thanks to Stefan Weil for doing that.
+Fixed localization bug, added a new test case crayons.icc thanks to Richard Hughes for providing the profile. 
+Fixed a bug in optimizer that made some formats (i.e, bits planar) unavailable
+Fixed misalignment problems on Alpha. The compiler does not align strings, and accessing begin of string as a uint16 makes code to fail.
+Added some extra checks to the tools and examples.
+Fixed a bug that prevented to read luminance tag
+BIG amount of functionality contributed/Sponsored  by Alien Skin Software: TransformStride, copyAlpha, performance plug-ins. Fixes some warnings as well.
+Added an extra _ to _stdcall to make it more portable
+Fixed a bug in transicc for named color profiles
+Fixed several compiler warnings
+Added support for Visual Studio 2015
+Fixed for XCODE project
+
+-----------------------
+2.7 Maintenance release
+-----------------------
+
+Added a version retrieval function 
+Added an option in transicc for working in bounded mode
+Fixed wrong handling of extra channels in some formatters.
+Added a project for VS2013
+Added license for iccjpeg.c
+New project for mac
+Added a global optimization that merges consecutive matrices in pipelines. Fixes loss of precision in some transforms
+Added a flag  to clip negative values in unbounded transforms (only gray, rgb, cmyk)
+Move unused var suppresor before the `return` statements.
+Remove dead code.
+Add missing comma in CGATS parser    
+utils/jpgicc/iccjpeg.c: Fix check if unsigned variable 'total_length'… …
+Some maintenance fixes
+Remove unused vcproj files
+Added a function to retrieve the iohandler of a given profile object
+Added a safety check on named color lists
+Fixed a macro clash on SNONE. 
+Fixed a possible segmentation fault in a non-happy path
+
+-----------------------
+2.6 Featured release
+-----------------------
+
+Added pthread dependency. From now lcms supports multithreading
+Fix for delete tag memory corruption
+Added directories for tiff, jpeg in configure script
+New locking plug-in, from Artifex
+Big revamp on Contexts, from Artifex
+Fixed memory leaks on error handling
+Changed endianness detection for PowerPC
+Added a way to retrieve matrix shaper always, no matter LUT is present
+Fixed a bug in PCS/Colorspace order when reading V2 Lab devicelinks
+Fixed some indexing out of bounds in floating point interpolation
+Fixed a double free in recovering from a previous error in default intent handler.
+
+-----------------------
+2.5 Maintenance release
+-----------------------
+
+Added some checks for non-happy path, mostly failing mallocs
+Transform2Devicelink now keeps white point when guessing deviceclass is enabled
+Rendering intent used when creating the transform is now propagated to profile header in cmsTransform2Devicelink. This is because 7.2.15 in spec 4.3
+Added a simple project for cppcheck
+Added support for VS2012
+Remove spurious tabs added by git merge of pull request
+Fixed a bug in parametric curves
+Added some fixes from XhmikosR
+Added TIFF Lab16 handling on tifficc
+More changes from Artifex
+Added identity curves support for write V2 LUT 
+Added a way to read the profile creator from header
+Added a reference for Mac MLU tag
+Fixed devicelink generation for 8 bits
+Several minor issues found by cppcheck
+Several improvements in cgats parser.
+Fixed some bugs on floating point curves.
+Fixed a bug on  big endian platforms not supporting uint64 or long long.
+Added error descriptions on cmsSmoothToneCurve 
+Added new cmsPlugInTHR() and fixed some race conditions (thanks to Artifex)
+update black point detection algorithm to reflect ICC changes
+Fixed some 64 bit warnings on size_t to uint32 conversions
+Fixed a multithead bug on optimization (StageDEF)
+RGB profiles using same tone curves for several channels are storing now only one copy of the curve (saves space)
+User defined parametric curves can now be saved in ICC profiles.
+
+--------------------
+2.4 Featured release
+--------------------
+
+Added a check for maximum input channels
+Fixed an uninitialized read on PatchLUT
+Fixed a bug in XYZ floating point PCS
+added half float variants (ABGR and so)
+Added formatter resolution after xform optimization plugin
+Fixed a bug in transicc when clot tables are present
+Added a conditional compilatio flag for "half" support
+Fixed a bug on named color profiles.
+Fixed a typo on tificc and jpgicc names, thanks to Elle Stone for reporting.
+Added half float support
+Increased security checks, thanks to Mateusz Jurczyk, from Google.
+Fixed a bug on IT8 reading of negative numbers.
+Fixed a bug on ending zero when saving a IT8 to memory
+Internal stage structs are now accessible through plug-in API
+Added a new plug-in type
+Added getPipelineContextID
+Fixed a bug in pipeline duplication
+gamma 1.0 can now operate in unbounded mode
+Exposed internal overview table for tone curves
+Added a new plug in entry for full transform
+Added support for transforms on planar data with different stride
+Added black point detection algorithm from Adobe paper
+Fixed a bug in black preservation checking
+Added performance improvements from several contributors, mostly Artifex
+Fixed uint64 to work in systems without long long native type
+Fixed a bug in the named color devicelink generation
+
+-----------------------
+2.3 Maintenance release
+-----------------------
+
+Added compatibility with Argyll's CGATS parser
+Updated to ICC spec 4.3
+Adding a memory alignment macro for CGATS parser
+Fixed a bug on the range of data in transicc, when colorant tag is specified
+Fixed Absolute colorimetric intent issues
+Fixed encoding for floating point tags in Lab/XYZ 
+Fixed a 0 byte allocation issue in _cmsCreateSubAllocChunk
+
+-----------------------
+2.2 Maintenance release
+-----------------------
+
+Pascal unit now is supported by Free Pascal Compiler
+Fixed a bug on ReadRAWtag 
+Added dictionary metatag support
+Fixed a bug in black preservation and sligtly non-monotonic curves
+Added named color functionality
+Fixed a bug that made crash black preservation on CMYK2CMYK devicelinks
+Added functions to retrieve formatters from transforms
+Profiles with weird curves are not prone to p`relinearization optimization.
+changed memmove to memcpy in cache for xput improvement 
+Fixed GBD bug (out of bounds memory overwrite) 
+Fixed some potential issues as NULL dereferencing
+Updated linkicc to 2.1, cleanup
+Removed pthreads need
+Fixed severa bugs in absolute colorimetric intent
+
+-----------------------
+2.1 Maintenance release
+-----------------------
+
+Added bound check in floating point interpolation
+Fixed a bug on curve reversing when source curves have few points
+Added Duotone support (Bilinear interpolation)
+Fixed delphi interface
+linkicc now stores the rendering intent in the profile header
+Fixed several integer overflow and other integrity checks, thanks to Chris Evans
+Fixed an issue on curve inversion
+Fixed memory leaks on when recovering from errors
+Fixed a bug in psid and profile sequence tags 
+Fixed a bug in device link creation on v4 profiles
+Fixed a bug in tificc in floating point formats
+Peliminary Delphi wrapper 
+Fixed some typos in error messages
+Added cmsTagLinkedTo
+Fixed VC2010, VC2008 projects
+Added a check on jpgicc for NULL transforms
+Added UTILS_UNUSED_PARAMETER for samples
+Added cmsChangeBufferFormat for backwards compatibility
+Fixed a bug on Lab + Alpha float formatters, added such predefined formatters as well
+Fixed a bug on transicc that made profiles with output colorants info to malfunction
+Fixed a bug that prevented linkicc to work
+Fixed a bug on V2 CHAD construction, affects absolute colorimetric intent
+
+-----------------------
+2.0 Major version bump
+-----------------------
+
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/Lib/BC/BC.txt b/codec/L2/demos/pikEnc/host/third_party/lcms/Lib/BC/BC.txt
new file mode 100755
index 0000000000..146228d6af
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/Lib/BC/BC.txt
@@ -0,0 +1 @@
+BC
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/Lib/MS/MS.TXT b/codec/L2/demos/pikEnc/host/third_party/lcms/Lib/MS/MS.TXT
new file mode 100755
index 0000000000..32dfbc9fa6
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/Lib/MS/MS.TXT
@@ -0,0 +1 @@
+MS
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/doc/LittleCMS2.9 API.pdf b/codec/L2/demos/pikEnc/host/third_party/lcms/doc/LittleCMS2.9 API.pdf
new file mode 100755
index 0000000000..16c16f6b39
Binary files /dev/null and b/codec/L2/demos/pikEnc/host/third_party/lcms/doc/LittleCMS2.9 API.pdf differ
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/doc/LittleCMS2.9 Plugin API.pdf b/codec/L2/demos/pikEnc/host/third_party/lcms/doc/LittleCMS2.9 Plugin API.pdf
new file mode 100755
index 0000000000..83f3043834
Binary files /dev/null and b/codec/L2/demos/pikEnc/host/third_party/lcms/doc/LittleCMS2.9 Plugin API.pdf differ
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/doc/LittleCMS2.9 tutorial.pdf b/codec/L2/demos/pikEnc/host/third_party/lcms/doc/LittleCMS2.9 tutorial.pdf
new file mode 100755
index 0000000000..2fd3335179
Binary files /dev/null and b/codec/L2/demos/pikEnc/host/third_party/lcms/doc/LittleCMS2.9 tutorial.pdf differ
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/doc/src.zip b/codec/L2/demos/pikEnc/host/third_party/lcms/doc/src.zip
new file mode 100755
index 0000000000..13f3aeb675
Binary files /dev/null and b/codec/L2/demos/pikEnc/host/third_party/lcms/doc/src.zip differ
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/include/Makefile.am b/codec/L2/demos/pikEnc/host/third_party/lcms/include/Makefile.am
new file mode 100755
index 0000000000..7dbe0e43bd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/include/Makefile.am
@@ -0,0 +1,7 @@
+#
+# Makefile for include directory
+# Based on a work by Bob Friesenhahn
+
+include_HEADERS = lcms2.h lcms2_plugin.h
+
+EXTRA_DIST = lcms2.h lcms2_plugin.h
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/include/Makefile.in b/codec/L2/demos/pikEnc/host/third_party/lcms/include/Makefile.in
new file mode 100755
index 0000000000..22901ea0af
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/include/Makefile.in
@@ -0,0 +1,590 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for include directory
+# Based on a work by Bob Friesenhahn
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = include
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \
+	$(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(includedir)"
+HEADERS = $(include_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+include_HEADERS = lcms2.h lcms2_plugin.h
+EXTRA_DIST = lcms2.h lcms2_plugin.h
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign include/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign include/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-includeHEADERS: $(include_HEADERS)
+	@$(NORMAL_INSTALL)
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
+	fi; \
+	for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; \
+	done | $(am__base_list) | \
+	while read files; do \
+	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
+	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
+	done
+
+uninstall-includeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+	for dir in "$(DESTDIR)$(includedir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-includeHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-includeHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libtool cscopelist-am ctags ctags-am distclean \
+	distclean-generic distclean-libtool distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-includeHEADERS install-info install-info-am \
+	install-man install-pdf install-pdf-am install-ps \
+	install-ps-am install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-includeHEADERS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/include/lcms2.h b/codec/L2/demos/pikEnc/host/third_party/lcms/include/lcms2.h
new file mode 100755
index 0000000000..3e2b4dd70c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/include/lcms2.h
@@ -0,0 +1,1906 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+// Version 2.9rc3
+//
+
+#ifndef _lcms2_H
+
+// ********** Configuration toggles ****************************************
+
+// Uncomment this one if you are using big endian machines
+// #define CMS_USE_BIG_ENDIAN   1
+
+// Uncomment this one if your compiler/machine does NOT support the
+// "long long" type.
+// #define CMS_DONT_USE_INT64        1
+
+// Uncomment this if your compiler doesn't work with fast floor function
+// #define CMS_DONT_USE_FAST_FLOOR 1
+
+// Uncomment this line if you want lcms to use the black point tag in profile,
+// if commented, lcms will compute the black point by its own.
+// It is safer to leave it commented out
+// #define CMS_USE_PROFILE_BLACK_POINT_TAG    1
+
+// Uncomment this line if you are compiling as C++ and want a C++ API
+// #define CMS_USE_CPP_API
+
+// Uncomment this line if you need strict CGATS syntax. Makes CGATS files to
+// require "KEYWORD" on undefined identifiers, keep it commented out unless needed
+// #define CMS_STRICT_CGATS  1
+
+// Uncomment to get rid of the tables for "half" float support
+// #define CMS_NO_HALF_SUPPORT 1
+
+// Uncomment to get rid of pthreads/windows dependency
+// #define CMS_NO_PTHREADS  1
+
+// Uncomment this for special windows mutex initialization (see lcms2_internal.h)
+// #define CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+
+// ********** End of configuration toggles ******************************
+
+// Needed for streams
+#include <stdio.h>
+
+// Needed for portability (C99 per 7.1.2)
+#include <limits.h>
+#include <time.h>
+#include <stddef.h>
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+extern "C" {
+#   endif
+#endif
+
+// Version/release
+#define LCMS_VERSION        2090
+
+// I will give the chance of redefining basic types for compilers that are not fully C99 compliant
+#ifndef CMS_BASIC_TYPES_ALREADY_DEFINED
+
+// Base types
+typedef unsigned char        cmsUInt8Number;   // That is guaranteed by the C99 spec
+typedef signed char          cmsInt8Number;    // That is guaranteed by the C99 spec
+
+#if CHAR_BIT != 8
+#  error "Unable to find 8 bit type, unsupported compiler"
+#endif
+
+// IEEE float storage numbers
+typedef float                cmsFloat32Number;
+typedef double               cmsFloat64Number;
+
+// 16-bit base types
+#if (USHRT_MAX == 65535U)
+ typedef unsigned short      cmsUInt16Number;
+#elif (UINT_MAX == 65535U)
+ typedef unsigned int        cmsUInt16Number;
+#else
+#  error "Unable to find 16 bits unsigned type, unsupported compiler"
+#endif
+
+#if (SHRT_MAX == 32767)
+  typedef  short             cmsInt16Number;
+#elif (INT_MAX == 32767)
+  typedef  int               cmsInt16Number;
+#else
+#  error "Unable to find 16 bits signed type, unsupported compiler"
+#endif
+
+// 32-bit base type
+#if (UINT_MAX == 4294967295U)
+ typedef unsigned int        cmsUInt32Number;
+#elif (ULONG_MAX == 4294967295U)
+ typedef unsigned long       cmsUInt32Number;
+#else
+#  error "Unable to find 32 bit unsigned type, unsupported compiler"
+#endif
+
+#if (INT_MAX == +2147483647)
+ typedef  int                cmsInt32Number;
+#elif (LONG_MAX == +2147483647)
+ typedef  long               cmsInt32Number;
+#else
+#  error "Unable to find 32 bit signed type, unsupported compiler"
+#endif
+
+// 64-bit base types
+#ifndef CMS_DONT_USE_INT64
+#  if (ULONG_MAX  == 18446744073709551615U)
+    typedef unsigned long   cmsUInt64Number;
+#  elif (ULLONG_MAX == 18446744073709551615U)
+      typedef unsigned long long   cmsUInt64Number;
+#  else
+#     define CMS_DONT_USE_INT64 1
+#  endif
+#  if (LONG_MAX == +9223372036854775807)
+      typedef  long          cmsInt64Number;
+#  elif (LLONG_MAX == +9223372036854775807)
+      typedef  long long     cmsInt64Number;
+#  else
+#     define CMS_DONT_USE_INT64 1
+#  endif
+#endif
+#endif
+
+// In the case 64 bit numbers are not supported by the compiler
+#ifdef CMS_DONT_USE_INT64
+    typedef cmsUInt32Number      cmsUInt64Number[2];
+    typedef cmsInt32Number       cmsInt64Number[2];
+#endif
+
+// Derivative types
+typedef cmsUInt32Number      cmsSignature;
+typedef cmsUInt16Number      cmsU8Fixed8Number;
+typedef cmsInt32Number       cmsS15Fixed16Number;
+typedef cmsUInt32Number      cmsU16Fixed16Number;
+
+// Boolean type, which will be using the native integer
+typedef int                  cmsBool;
+
+// Try to detect windows
+#if defined (_WIN32) || defined(_WIN64) || defined(WIN32) || defined(_WIN32_)
+#  define CMS_IS_WINDOWS_ 1
+#endif
+
+#ifdef _MSC_VER
+#  define CMS_IS_WINDOWS_ 1
+#endif
+
+#ifdef __BORLANDC__
+#  define CMS_IS_WINDOWS_ 1
+#endif
+
+// Try to detect big endian platforms. This list can be endless, so primarily rely on the configure script
+// on Unix-like systems, and allow it to be set on the compiler command line using
+// -DCMS_USE_BIG_ENDIAN or something similar
+#ifdef CMS_USE_BIG_ENDIAN // set at compiler command line takes overall precedence
+
+#  if CMS_USE_BIG_ENDIAN == 0
+#    undef CMS_USE_BIG_ENDIAN
+#  endif
+
+#else // CMS_USE_BIG_ENDIAN
+
+#  ifdef WORDS_BIGENDIAN // set by configure (or explicitly on compiler command line)
+#    define CMS_USE_BIG_ENDIAN 1
+#  else // WORDS_BIGENDIAN
+// Fall back to platform/compiler specific tests
+#    if defined(__sgi__) || defined(__sgi) || defined(sparc)
+#      define CMS_USE_BIG_ENDIAN      1
+#    endif
+
+#    if defined(__s390__) || defined(__s390x__)
+#      define CMS_USE_BIG_ENDIAN   1
+#    endif
+
+#    ifdef macintosh
+#      ifdef __BIG_ENDIAN__
+#        define CMS_USE_BIG_ENDIAN      1
+#      endif
+#      ifdef __LITTLE_ENDIAN__
+#        undef CMS_USE_BIG_ENDIAN
+#      endif
+#    endif
+#  endif  // WORDS_BIGENDIAN
+
+#  if defined(_HOST_BIG_ENDIAN) || defined(__BIG_ENDIAN__)
+#    define CMS_USE_BIG_ENDIAN      1
+#  endif
+
+#endif  // CMS_USE_BIG_ENDIAN
+
+
+// Calling convention -- this is hardly platform and compiler dependent
+#ifdef CMS_IS_WINDOWS_
+#  if defined(CMS_DLL) || defined(CMS_DLL_BUILD)
+#     ifdef __BORLANDC__
+#        define CMSEXPORT       __stdcall _export
+#        define CMSAPI
+#     else
+#        define CMSEXPORT      __stdcall
+#        ifdef CMS_DLL_BUILD
+#            define CMSAPI    __declspec(dllexport)
+#        else
+#           define CMSAPI     __declspec(dllimport)
+#        endif
+#     endif
+#  else
+#     define CMSEXPORT
+#     define CMSAPI
+#  endif
+#else  // not Windows
+#  ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#     define CMSEXPORT
+#     define CMSAPI    __attribute__((visibility("default")))
+#  else
+#     define CMSEXPORT
+#     define CMSAPI
+#  endif
+#endif  // CMS_IS_WINDOWS_
+
+#ifdef HasTHREADS
+# if HasTHREADS == 1
+#    undef CMS_NO_PTHREADS
+# else
+#    define CMS_NO_PTHREADS 1
+# endif
+#endif
+
+// Some common definitions
+#define cmsMAX_PATH     256
+
+#ifndef FALSE
+#       define FALSE 0
+#endif
+#ifndef TRUE
+#       define TRUE  1
+#endif
+
+// D50 XYZ normalized to Y=1.0
+#define cmsD50X  0.9642
+#define cmsD50Y  1.0
+#define cmsD50Z  0.8249
+
+// V4 perceptual black
+#define cmsPERCEPTUAL_BLACK_X  0.00336
+#define cmsPERCEPTUAL_BLACK_Y  0.0034731
+#define cmsPERCEPTUAL_BLACK_Z  0.00287
+
+// Definitions in ICC spec
+#define cmsMagicNumber  0x61637370     // 'acsp'
+#define lcmsSignature   0x6c636d73     // 'lcms'
+
+
+// Base ICC type definitions
+typedef enum {
+    cmsSigChromaticityType                  = 0x6368726D,  // 'chrm'
+    cmsSigColorantOrderType                 = 0x636C726F,  // 'clro'
+    cmsSigColorantTableType                 = 0x636C7274,  // 'clrt'
+    cmsSigCrdInfoType                       = 0x63726469,  // 'crdi'
+    cmsSigCurveType                         = 0x63757276,  // 'curv'
+    cmsSigDataType                          = 0x64617461,  // 'data'
+    cmsSigDictType                          = 0x64696374,  // 'dict'
+    cmsSigDateTimeType                      = 0x6474696D,  // 'dtim'
+    cmsSigDeviceSettingsType                = 0x64657673,  // 'devs'
+    cmsSigLut16Type                         = 0x6d667432,  // 'mft2'
+    cmsSigLut8Type                          = 0x6d667431,  // 'mft1'
+    cmsSigLutAtoBType                       = 0x6d414220,  // 'mAB '
+    cmsSigLutBtoAType                       = 0x6d424120,  // 'mBA '
+    cmsSigMeasurementType                   = 0x6D656173,  // 'meas'
+    cmsSigMultiLocalizedUnicodeType         = 0x6D6C7563,  // 'mluc'
+    cmsSigMultiProcessElementType           = 0x6D706574,  // 'mpet'
+    cmsSigNamedColorType                    = 0x6E636f6C,  // 'ncol' -- DEPRECATED!
+    cmsSigNamedColor2Type                   = 0x6E636C32,  // 'ncl2'
+    cmsSigParametricCurveType               = 0x70617261,  // 'para'
+    cmsSigProfileSequenceDescType           = 0x70736571,  // 'pseq'
+    cmsSigProfileSequenceIdType             = 0x70736964,  // 'psid'
+    cmsSigResponseCurveSet16Type            = 0x72637332,  // 'rcs2'
+    cmsSigS15Fixed16ArrayType               = 0x73663332,  // 'sf32'
+    cmsSigScreeningType                     = 0x7363726E,  // 'scrn'
+    cmsSigSignatureType                     = 0x73696720,  // 'sig '
+    cmsSigTextType                          = 0x74657874,  // 'text'
+    cmsSigTextDescriptionType               = 0x64657363,  // 'desc'
+    cmsSigU16Fixed16ArrayType               = 0x75663332,  // 'uf32'
+    cmsSigUcrBgType                         = 0x62666420,  // 'bfd '
+    cmsSigUInt16ArrayType                   = 0x75693136,  // 'ui16'
+    cmsSigUInt32ArrayType                   = 0x75693332,  // 'ui32'
+    cmsSigUInt64ArrayType                   = 0x75693634,  // 'ui64'
+    cmsSigUInt8ArrayType                    = 0x75693038,  // 'ui08'
+    cmsSigVcgtType                          = 0x76636774,  // 'vcgt'
+    cmsSigViewingConditionsType             = 0x76696577,  // 'view'
+    cmsSigXYZType                           = 0x58595A20   // 'XYZ '
+
+
+} cmsTagTypeSignature;
+
+// Base ICC tag definitions
+typedef enum {
+    cmsSigAToB0Tag                          = 0x41324230,  // 'A2B0'
+    cmsSigAToB1Tag                          = 0x41324231,  // 'A2B1'
+    cmsSigAToB2Tag                          = 0x41324232,  // 'A2B2'
+    cmsSigBlueColorantTag                   = 0x6258595A,  // 'bXYZ'
+    cmsSigBlueMatrixColumnTag               = 0x6258595A,  // 'bXYZ'
+    cmsSigBlueTRCTag                        = 0x62545243,  // 'bTRC'
+    cmsSigBToA0Tag                          = 0x42324130,  // 'B2A0'
+    cmsSigBToA1Tag                          = 0x42324131,  // 'B2A1'
+    cmsSigBToA2Tag                          = 0x42324132,  // 'B2A2'
+    cmsSigCalibrationDateTimeTag            = 0x63616C74,  // 'calt'
+    cmsSigCharTargetTag                     = 0x74617267,  // 'targ'
+    cmsSigChromaticAdaptationTag            = 0x63686164,  // 'chad'
+    cmsSigChromaticityTag                   = 0x6368726D,  // 'chrm'
+    cmsSigColorantOrderTag                  = 0x636C726F,  // 'clro'
+    cmsSigColorantTableTag                  = 0x636C7274,  // 'clrt'
+    cmsSigColorantTableOutTag               = 0x636C6F74,  // 'clot'
+    cmsSigColorimetricIntentImageStateTag   = 0x63696973,  // 'ciis'
+    cmsSigCopyrightTag                      = 0x63707274,  // 'cprt'
+    cmsSigCrdInfoTag                        = 0x63726469,  // 'crdi'
+    cmsSigDataTag                           = 0x64617461,  // 'data'
+    cmsSigDateTimeTag                       = 0x6474696D,  // 'dtim'
+    cmsSigDeviceMfgDescTag                  = 0x646D6E64,  // 'dmnd'
+    cmsSigDeviceModelDescTag                = 0x646D6464,  // 'dmdd'
+    cmsSigDeviceSettingsTag                 = 0x64657673,  // 'devs'
+    cmsSigDToB0Tag                          = 0x44324230,  // 'D2B0'
+    cmsSigDToB1Tag                          = 0x44324231,  // 'D2B1'
+    cmsSigDToB2Tag                          = 0x44324232,  // 'D2B2'
+    cmsSigDToB3Tag                          = 0x44324233,  // 'D2B3'
+    cmsSigBToD0Tag                          = 0x42324430,  // 'B2D0'
+    cmsSigBToD1Tag                          = 0x42324431,  // 'B2D1'
+    cmsSigBToD2Tag                          = 0x42324432,  // 'B2D2'
+    cmsSigBToD3Tag                          = 0x42324433,  // 'B2D3'
+    cmsSigGamutTag                          = 0x67616D74,  // 'gamt'
+    cmsSigGrayTRCTag                        = 0x6b545243,  // 'kTRC'
+    cmsSigGreenColorantTag                  = 0x6758595A,  // 'gXYZ'
+    cmsSigGreenMatrixColumnTag              = 0x6758595A,  // 'gXYZ'
+    cmsSigGreenTRCTag                       = 0x67545243,  // 'gTRC'
+    cmsSigLuminanceTag                      = 0x6C756d69,  // 'lumi'
+    cmsSigMeasurementTag                    = 0x6D656173,  // 'meas'
+    cmsSigMediaBlackPointTag                = 0x626B7074,  // 'bkpt'
+    cmsSigMediaWhitePointTag                = 0x77747074,  // 'wtpt'
+    cmsSigNamedColorTag                     = 0x6E636f6C,  // 'ncol' // Deprecated by the ICC
+    cmsSigNamedColor2Tag                    = 0x6E636C32,  // 'ncl2'
+    cmsSigOutputResponseTag                 = 0x72657370,  // 'resp'
+    cmsSigPerceptualRenderingIntentGamutTag = 0x72696730,  // 'rig0'
+    cmsSigPreview0Tag                       = 0x70726530,  // 'pre0'
+    cmsSigPreview1Tag                       = 0x70726531,  // 'pre1'
+    cmsSigPreview2Tag                       = 0x70726532,  // 'pre2'
+    cmsSigProfileDescriptionTag             = 0x64657363,  // 'desc'
+    cmsSigProfileDescriptionMLTag           = 0x6473636d,  // 'dscm'
+    cmsSigProfileSequenceDescTag            = 0x70736571,  // 'pseq'
+    cmsSigProfileSequenceIdTag              = 0x70736964,  // 'psid'
+    cmsSigPs2CRD0Tag                        = 0x70736430,  // 'psd0'
+    cmsSigPs2CRD1Tag                        = 0x70736431,  // 'psd1'
+    cmsSigPs2CRD2Tag                        = 0x70736432,  // 'psd2'
+    cmsSigPs2CRD3Tag                        = 0x70736433,  // 'psd3'
+    cmsSigPs2CSATag                         = 0x70733273,  // 'ps2s'
+    cmsSigPs2RenderingIntentTag             = 0x70733269,  // 'ps2i'
+    cmsSigRedColorantTag                    = 0x7258595A,  // 'rXYZ'
+    cmsSigRedMatrixColumnTag                = 0x7258595A,  // 'rXYZ'
+    cmsSigRedTRCTag                         = 0x72545243,  // 'rTRC'
+    cmsSigSaturationRenderingIntentGamutTag = 0x72696732,  // 'rig2'
+    cmsSigScreeningDescTag                  = 0x73637264,  // 'scrd'
+    cmsSigScreeningTag                      = 0x7363726E,  // 'scrn'
+    cmsSigTechnologyTag                     = 0x74656368,  // 'tech'
+    cmsSigUcrBgTag                          = 0x62666420,  // 'bfd '
+    cmsSigViewingCondDescTag                = 0x76756564,  // 'vued'
+    cmsSigViewingConditionsTag              = 0x76696577,  // 'view'
+    cmsSigVcgtTag                           = 0x76636774,  // 'vcgt'
+    cmsSigMetaTag                           = 0x6D657461,  // 'meta'
+    cmsSigArgyllArtsTag                     = 0x61727473   // 'arts'
+
+} cmsTagSignature;
+
+
+// ICC Technology tag
+typedef enum {
+    cmsSigDigitalCamera                     = 0x6463616D,  // 'dcam'
+    cmsSigFilmScanner                       = 0x6673636E,  // 'fscn'
+    cmsSigReflectiveScanner                 = 0x7273636E,  // 'rscn'
+    cmsSigInkJetPrinter                     = 0x696A6574,  // 'ijet'
+    cmsSigThermalWaxPrinter                 = 0x74776178,  // 'twax'
+    cmsSigElectrophotographicPrinter        = 0x6570686F,  // 'epho'
+    cmsSigElectrostaticPrinter              = 0x65737461,  // 'esta'
+    cmsSigDyeSublimationPrinter             = 0x64737562,  // 'dsub'
+    cmsSigPhotographicPaperPrinter          = 0x7270686F,  // 'rpho'
+    cmsSigFilmWriter                        = 0x6670726E,  // 'fprn'
+    cmsSigVideoMonitor                      = 0x7669646D,  // 'vidm'
+    cmsSigVideoCamera                       = 0x76696463,  // 'vidc'
+    cmsSigProjectionTelevision              = 0x706A7476,  // 'pjtv'
+    cmsSigCRTDisplay                        = 0x43525420,  // 'CRT '
+    cmsSigPMDisplay                         = 0x504D4420,  // 'PMD '
+    cmsSigAMDisplay                         = 0x414D4420,  // 'AMD '
+    cmsSigPhotoCD                           = 0x4B504344,  // 'KPCD'
+    cmsSigPhotoImageSetter                  = 0x696D6773,  // 'imgs'
+    cmsSigGravure                           = 0x67726176,  // 'grav'
+    cmsSigOffsetLithography                 = 0x6F666673,  // 'offs'
+    cmsSigSilkscreen                        = 0x73696C6B,  // 'silk'
+    cmsSigFlexography                       = 0x666C6578,  // 'flex'
+    cmsSigMotionPictureFilmScanner          = 0x6D706673,  // 'mpfs'
+    cmsSigMotionPictureFilmRecorder         = 0x6D706672,  // 'mpfr'
+    cmsSigDigitalMotionPictureCamera        = 0x646D7063,  // 'dmpc'
+    cmsSigDigitalCinemaProjector            = 0x64636A70   // 'dcpj'
+
+} cmsTechnologySignature;
+
+
+// ICC Color spaces
+typedef enum {
+    cmsSigXYZData                           = 0x58595A20,  // 'XYZ '
+    cmsSigLabData                           = 0x4C616220,  // 'Lab '
+    cmsSigLuvData                           = 0x4C757620,  // 'Luv '
+    cmsSigYCbCrData                         = 0x59436272,  // 'YCbr'
+    cmsSigYxyData                           = 0x59787920,  // 'Yxy '
+    cmsSigRgbData                           = 0x52474220,  // 'RGB '
+    cmsSigGrayData                          = 0x47524159,  // 'GRAY'
+    cmsSigHsvData                           = 0x48535620,  // 'HSV '
+    cmsSigHlsData                           = 0x484C5320,  // 'HLS '
+    cmsSigCmykData                          = 0x434D594B,  // 'CMYK'
+    cmsSigCmyData                           = 0x434D5920,  // 'CMY '
+    cmsSigMCH1Data                          = 0x4D434831,  // 'MCH1'
+    cmsSigMCH2Data                          = 0x4D434832,  // 'MCH2'
+    cmsSigMCH3Data                          = 0x4D434833,  // 'MCH3'
+    cmsSigMCH4Data                          = 0x4D434834,  // 'MCH4'
+    cmsSigMCH5Data                          = 0x4D434835,  // 'MCH5'
+    cmsSigMCH6Data                          = 0x4D434836,  // 'MCH6'
+    cmsSigMCH7Data                          = 0x4D434837,  // 'MCH7'
+    cmsSigMCH8Data                          = 0x4D434838,  // 'MCH8'
+    cmsSigMCH9Data                          = 0x4D434839,  // 'MCH9'
+    cmsSigMCHAData                          = 0x4D434841,  // 'MCHA'
+    cmsSigMCHBData                          = 0x4D434842,  // 'MCHB'
+    cmsSigMCHCData                          = 0x4D434843,  // 'MCHC'
+    cmsSigMCHDData                          = 0x4D434844,  // 'MCHD'
+    cmsSigMCHEData                          = 0x4D434845,  // 'MCHE'
+    cmsSigMCHFData                          = 0x4D434846,  // 'MCHF'
+    cmsSigNamedData                         = 0x6e6d636c,  // 'nmcl'
+    cmsSig1colorData                        = 0x31434C52,  // '1CLR'
+    cmsSig2colorData                        = 0x32434C52,  // '2CLR'
+    cmsSig3colorData                        = 0x33434C52,  // '3CLR'
+    cmsSig4colorData                        = 0x34434C52,  // '4CLR'
+    cmsSig5colorData                        = 0x35434C52,  // '5CLR'
+    cmsSig6colorData                        = 0x36434C52,  // '6CLR'
+    cmsSig7colorData                        = 0x37434C52,  // '7CLR'
+    cmsSig8colorData                        = 0x38434C52,  // '8CLR'
+    cmsSig9colorData                        = 0x39434C52,  // '9CLR'
+    cmsSig10colorData                       = 0x41434C52,  // 'ACLR'
+    cmsSig11colorData                       = 0x42434C52,  // 'BCLR'
+    cmsSig12colorData                       = 0x43434C52,  // 'CCLR'
+    cmsSig13colorData                       = 0x44434C52,  // 'DCLR'
+    cmsSig14colorData                       = 0x45434C52,  // 'ECLR'
+    cmsSig15colorData                       = 0x46434C52,  // 'FCLR'
+    cmsSigLuvKData                          = 0x4C75764B   // 'LuvK'
+
+} cmsColorSpaceSignature;
+
+// ICC Profile Class
+typedef enum {
+    cmsSigInputClass                        = 0x73636E72,  // 'scnr'
+    cmsSigDisplayClass                      = 0x6D6E7472,  // 'mntr'
+    cmsSigOutputClass                       = 0x70727472,  // 'prtr'
+    cmsSigLinkClass                         = 0x6C696E6B,  // 'link'
+    cmsSigAbstractClass                     = 0x61627374,  // 'abst'
+    cmsSigColorSpaceClass                   = 0x73706163,  // 'spac'
+    cmsSigNamedColorClass                   = 0x6e6d636c   // 'nmcl'
+
+} cmsProfileClassSignature;
+
+// ICC Platforms
+typedef enum {
+    cmsSigMacintosh                         = 0x4150504C,  // 'APPL'
+    cmsSigMicrosoft                         = 0x4D534654,  // 'MSFT'
+    cmsSigSolaris                           = 0x53554E57,  // 'SUNW'
+    cmsSigSGI                               = 0x53474920,  // 'SGI '
+    cmsSigTaligent                          = 0x54474E54,  // 'TGNT'
+    cmsSigUnices                            = 0x2A6E6978   // '*nix'   // From argyll -- Not official
+
+} cmsPlatformSignature;
+
+// Reference gamut
+#define  cmsSigPerceptualReferenceMediumGamut         0x70726d67  //'prmg'
+
+// For cmsSigColorimetricIntentImageStateTag
+#define  cmsSigSceneColorimetryEstimates              0x73636F65  //'scoe'
+#define  cmsSigSceneAppearanceEstimates               0x73617065  //'sape'
+#define  cmsSigFocalPlaneColorimetryEstimates         0x66706365  //'fpce'
+#define  cmsSigReflectionHardcopyOriginalColorimetry  0x72686F63  //'rhoc'
+#define  cmsSigReflectionPrintOutputColorimetry       0x72706F63  //'rpoc'
+
+// Multi process elements types
+typedef enum {
+    cmsSigCurveSetElemType              = 0x63767374,  //'cvst'
+    cmsSigMatrixElemType                = 0x6D617466,  //'matf'
+    cmsSigCLutElemType                  = 0x636C7574,  //'clut'
+
+    cmsSigBAcsElemType                  = 0x62414353,  // 'bACS'
+    cmsSigEAcsElemType                  = 0x65414353,  // 'eACS'
+
+    // Custom from here, not in the ICC Spec
+    cmsSigXYZ2LabElemType               = 0x6C327820,  // 'l2x '
+    cmsSigLab2XYZElemType               = 0x78326C20,  // 'x2l '
+    cmsSigNamedColorElemType            = 0x6E636C20,  // 'ncl '
+    cmsSigLabV2toV4                     = 0x32203420,  // '2 4 '
+    cmsSigLabV4toV2                     = 0x34203220,  // '4 2 '
+  
+    // Identities
+    cmsSigIdentityElemType              = 0x69646E20,  // 'idn '
+
+    // Float to floatPCS
+    cmsSigLab2FloatPCS                  = 0x64326C20,  // 'd2l '
+    cmsSigFloatPCS2Lab                  = 0x6C326420,  // 'l2d '
+    cmsSigXYZ2FloatPCS                  = 0x64327820,  // 'd2x '
+    cmsSigFloatPCS2XYZ                  = 0x78326420,  // 'x2d '  
+    cmsSigClipNegativesElemType         = 0x636c7020   // 'clp '
+
+} cmsStageSignature;
+
+// Types of CurveElements
+typedef enum {
+
+    cmsSigFormulaCurveSeg               = 0x70617266, // 'parf'
+    cmsSigSampledCurveSeg               = 0x73616D66, // 'samf'
+    cmsSigSegmentedCurve                = 0x63757266  // 'curf'
+
+} cmsCurveSegSignature;
+
+// Used in ResponseCurveType
+#define  cmsSigStatusA                    0x53746141 //'StaA'
+#define  cmsSigStatusE                    0x53746145 //'StaE'
+#define  cmsSigStatusI                    0x53746149 //'StaI'
+#define  cmsSigStatusT                    0x53746154 //'StaT'
+#define  cmsSigStatusM                    0x5374614D //'StaM'
+#define  cmsSigDN                         0x444E2020 //'DN  '
+#define  cmsSigDNP                        0x444E2050 //'DN P'
+#define  cmsSigDNN                        0x444E4E20 //'DNN '
+#define  cmsSigDNNP                       0x444E4E50 //'DNNP'
+
+// Device attributes, currently defined values correspond to the low 4 bytes
+// of the 8 byte attribute quantity
+#define cmsReflective     0
+#define cmsTransparency   1
+#define cmsGlossy         0
+#define cmsMatte          2
+
+// Common structures in ICC tags
+typedef struct {
+    cmsUInt32Number len;
+    cmsUInt32Number flag;
+    cmsUInt8Number  data[1];
+
+} cmsICCData;
+
+// ICC date time
+typedef struct {
+    cmsUInt16Number      year;
+    cmsUInt16Number      month;
+    cmsUInt16Number      day;
+    cmsUInt16Number      hours;
+    cmsUInt16Number      minutes;
+    cmsUInt16Number      seconds;
+
+} cmsDateTimeNumber;
+
+// ICC XYZ
+typedef struct {
+    cmsS15Fixed16Number  X;
+    cmsS15Fixed16Number  Y;
+    cmsS15Fixed16Number  Z;
+
+} cmsEncodedXYZNumber;
+
+
+typedef union {
+    cmsUInt8Number       ID8[16];
+    cmsUInt16Number      ID16[8];
+    cmsUInt32Number      ID32[4];
+
+} cmsProfileID;
+
+
+// ----------------------------------------------------------------------------------------------
+// ICC profile internal base types. Strictly, shouldn't be declared in this header, but maybe
+// somebody want to use this info for accessing profile header directly, so here it is.
+
+// Profile header -- it is 32-bit aligned, so no issues are expected on alignment
+typedef struct {
+    cmsUInt32Number              size;           // Profile size in bytes
+    cmsSignature                 cmmId;          // CMM for this profile
+    cmsUInt32Number              version;        // Format version number
+    cmsProfileClassSignature     deviceClass;    // Type of profile
+    cmsColorSpaceSignature       colorSpace;     // Color space of data
+    cmsColorSpaceSignature       pcs;            // PCS, XYZ or Lab only
+    cmsDateTimeNumber            date;           // Date profile was created
+    cmsSignature                 magic;          // Magic Number to identify an ICC profile
+    cmsPlatformSignature         platform;       // Primary Platform
+    cmsUInt32Number              flags;          // Various bit settings
+    cmsSignature                 manufacturer;   // Device manufacturer
+    cmsUInt32Number              model;          // Device model number
+    cmsUInt64Number              attributes;     // Device attributes
+    cmsUInt32Number              renderingIntent;// Rendering intent
+    cmsEncodedXYZNumber          illuminant;     // Profile illuminant
+    cmsSignature                 creator;        // Profile creator
+    cmsProfileID                 profileID;      // Profile ID
+    cmsInt8Number                reserved[28];   // Reserved for future use
+
+} cmsICCHeader;
+
+// ICC base tag
+typedef struct {
+    cmsTagTypeSignature  sig;
+    cmsInt8Number        reserved[4];
+
+} cmsTagBase;
+
+// A tag entry in directory
+typedef struct {
+    cmsTagSignature      sig;            // The tag signature
+    cmsUInt32Number      offset;         // Start of tag
+    cmsUInt32Number      size;           // Size in bytes
+
+} cmsTagEntry;
+
+// ----------------------------------------------------------------------------------------------
+
+// Little CMS specific typedefs
+
+typedef void* cmsHANDLE ;              // Generic handle
+typedef void* cmsHPROFILE;             // Opaque typedefs to hide internals
+typedef void* cmsHTRANSFORM;
+
+#define cmsMAXCHANNELS  16                // Maximum number of channels in ICC profiles
+
+// Format of pixel is defined by one cmsUInt32Number, using bit fields as follows
+//
+//                               2                1          0
+//                          3 2 10987 6 5 4 3 2 1 098 7654 321
+//                          A O TTTTT U Y F P X S EEE CCCC BBB
+//
+//            A: Floating point -- With this flag we can differentiate 16 bits as float and as int
+//            O: Optimized -- previous optimization already returns the final 8-bit value
+//            T: Pixeltype
+//            F: Flavor  0=MinIsBlack(Chocolate) 1=MinIsWhite(Vanilla)
+//            P: Planar? 0=Chunky, 1=Planar
+//            X: swap 16 bps endianness?
+//            S: Do swap? ie, BGR, KYMC
+//            E: Extra samples
+//            C: Channels (Samples per pixel)
+//            B: bytes per sample
+//            Y: Swap first - changes ABGR to BGRA and KCMY to CMYK
+
+#define FLOAT_SH(a)            ((a) << 22)
+#define OPTIMIZED_SH(s)        ((s) << 21)
+#define COLORSPACE_SH(s)       ((s) << 16)
+#define SWAPFIRST_SH(s)        ((s) << 14)
+#define FLAVOR_SH(s)           ((s) << 13)
+#define PLANAR_SH(p)           ((p) << 12)
+#define ENDIAN16_SH(e)         ((e) << 11)
+#define DOSWAP_SH(e)           ((e) << 10)
+#define EXTRA_SH(e)            ((e) << 7)
+#define CHANNELS_SH(c)         ((c) << 3)
+#define BYTES_SH(b)            (b)
+
+// These macros unpack format specifiers into integers
+#define T_FLOAT(a)            (((a)>>22)&1)
+#define T_OPTIMIZED(o)        (((o)>>21)&1)
+#define T_COLORSPACE(s)       (((s)>>16)&31)
+#define T_SWAPFIRST(s)        (((s)>>14)&1)
+#define T_FLAVOR(s)           (((s)>>13)&1)
+#define T_PLANAR(p)           (((p)>>12)&1)
+#define T_ENDIAN16(e)         (((e)>>11)&1)
+#define T_DOSWAP(e)           (((e)>>10)&1)
+#define T_EXTRA(e)            (((e)>>7)&7)
+#define T_CHANNELS(c)         (((c)>>3)&15)
+#define T_BYTES(b)            ((b)&7)
+
+
+// Pixel types
+#define PT_ANY       0    // Don't check colorspace
+                          // 1 & 2 are reserved
+#define PT_GRAY      3
+#define PT_RGB       4
+#define PT_CMY       5
+#define PT_CMYK      6
+#define PT_YCbCr     7
+#define PT_YUV       8      // Lu'v'
+#define PT_XYZ       9
+#define PT_Lab       10
+#define PT_YUVK      11     // Lu'v'K
+#define PT_HSV       12
+#define PT_HLS       13
+#define PT_Yxy       14
+
+#define PT_MCH1      15
+#define PT_MCH2      16
+#define PT_MCH3      17
+#define PT_MCH4      18
+#define PT_MCH5      19
+#define PT_MCH6      20
+#define PT_MCH7      21
+#define PT_MCH8      22
+#define PT_MCH9      23
+#define PT_MCH10     24
+#define PT_MCH11     25
+#define PT_MCH12     26
+#define PT_MCH13     27
+#define PT_MCH14     28
+#define PT_MCH15     29
+
+#define PT_LabV2     30     // Identical to PT_Lab, but using the V2 old encoding
+
+// Some (not all!) representations
+
+#ifndef TYPE_RGB_8      // TYPE_RGB_8 is a very common identifier, so don't include ours
+                        // if user has it already defined.
+
+#define TYPE_GRAY_8            (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(1))
+#define TYPE_GRAY_8_REV        (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(1)|FLAVOR_SH(1))
+#define TYPE_GRAY_16           (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2))
+#define TYPE_GRAY_16_REV       (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2)|FLAVOR_SH(1))
+#define TYPE_GRAY_16_SE        (COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_GRAYA_8           (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(1))
+#define TYPE_GRAYA_16          (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(2))
+#define TYPE_GRAYA_16_SE       (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_GRAYA_8_PLANAR    (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_GRAYA_16_PLANAR   (COLORSPACE_SH(PT_GRAY)|EXTRA_SH(1)|CHANNELS_SH(1)|BYTES_SH(2)|PLANAR_SH(1))
+
+#define TYPE_RGB_8             (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_RGB_8_PLANAR      (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_BGR_8             (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_BGR_8_PLANAR      (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_RGB_16            (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_RGB_16_PLANAR     (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_RGB_16_SE         (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_BGR_16            (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_BGR_16_PLANAR     (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_BGR_16_SE         (COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+#define TYPE_RGBA_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_RGBA_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_RGBA_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_RGBA_16_PLANAR    (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_RGBA_16_SE        (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+#define TYPE_ARGB_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ARGB_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|SWAPFIRST_SH(1)|PLANAR_SH(1))
+#define TYPE_ARGB_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|SWAPFIRST_SH(1))
+
+#define TYPE_ABGR_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_ABGR_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_ABGR_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_ABGR_16_PLANAR    (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|PLANAR_SH(1))
+#define TYPE_ABGR_16_SE        (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+#define TYPE_BGRA_8            (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_BGRA_8_PLANAR     (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1)|PLANAR_SH(1))
+#define TYPE_BGRA_16           (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_BGRA_16_SE        (COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+
+#define TYPE_CMY_8             (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_CMY_8_PLANAR      (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_CMY_16            (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_CMY_16_PLANAR     (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_CMY_16_SE         (COLORSPACE_SH(PT_CMY)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+#define TYPE_CMYK_8            (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1))
+#define TYPE_CMYKA_8           (COLORSPACE_SH(PT_CMYK)|EXTRA_SH(1)|CHANNELS_SH(4)|BYTES_SH(1))
+#define TYPE_CMYK_8_REV        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1))
+#define TYPE_YUVK_8            TYPE_CMYK_8_REV
+#define TYPE_CMYK_8_PLANAR     (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_CMYK_16           (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2))
+#define TYPE_CMYK_16_REV       (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1))
+#define TYPE_YUVK_16           TYPE_CMYK_16_REV
+#define TYPE_CMYK_16_PLANAR    (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_CMYK_16_SE        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+#define TYPE_KYMC_8            (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC_16           (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC_16_SE        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+#define TYPE_KCMY_8            (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_8_REV        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_16           (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_16_REV       (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_KCMY_16_SE        (COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2)|ENDIAN16_SH(1)|SWAPFIRST_SH(1))
+
+#define TYPE_CMYK5_8           (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(1))
+#define TYPE_CMYK5_16          (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2))
+#define TYPE_CMYK5_16_SE       (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC5_8           (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC5_16          (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC5_16_SE       (COLORSPACE_SH(PT_MCH5)|CHANNELS_SH(5)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK6_8           (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(1))
+#define TYPE_CMYK6_8_PLANAR    (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_CMYK6_16          (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(2))
+#define TYPE_CMYK6_16_PLANAR   (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_CMYK6_16_SE       (COLORSPACE_SH(PT_MCH6)|CHANNELS_SH(6)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_CMYK7_8           (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(1))
+#define TYPE_CMYK7_16          (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2))
+#define TYPE_CMYK7_16_SE       (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC7_8           (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC7_16          (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC7_16_SE       (COLORSPACE_SH(PT_MCH7)|CHANNELS_SH(7)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK8_8           (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(1))
+#define TYPE_CMYK8_16          (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2))
+#define TYPE_CMYK8_16_SE       (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC8_8           (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC8_16          (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC8_16_SE       (COLORSPACE_SH(PT_MCH8)|CHANNELS_SH(8)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK9_8           (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(1))
+#define TYPE_CMYK9_16          (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2))
+#define TYPE_CMYK9_16_SE       (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC9_8           (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC9_16          (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC9_16_SE       (COLORSPACE_SH(PT_MCH9)|CHANNELS_SH(9)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK10_8          (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(1))
+#define TYPE_CMYK10_16         (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2))
+#define TYPE_CMYK10_16_SE      (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC10_8          (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC10_16         (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC10_16_SE      (COLORSPACE_SH(PT_MCH10)|CHANNELS_SH(10)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK11_8          (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(1))
+#define TYPE_CMYK11_16         (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2))
+#define TYPE_CMYK11_16_SE      (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC11_8          (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC11_16         (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC11_16_SE      (COLORSPACE_SH(PT_MCH11)|CHANNELS_SH(11)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+#define TYPE_CMYK12_8          (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(1))
+#define TYPE_CMYK12_16         (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2))
+#define TYPE_CMYK12_16_SE      (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2)|ENDIAN16_SH(1))
+#define TYPE_KYMC12_8          (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(1)|DOSWAP_SH(1))
+#define TYPE_KYMC12_16         (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_KYMC12_16_SE      (COLORSPACE_SH(PT_MCH12)|CHANNELS_SH(12)|BYTES_SH(2)|DOSWAP_SH(1)|ENDIAN16_SH(1))
+
+// Colorimetric
+#define TYPE_XYZ_16            (COLORSPACE_SH(PT_XYZ)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_Lab_8             (COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_LabV2_8           (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(1))
+
+#define TYPE_ALab_8            (COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ALabV2_8          (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_Lab_16            (COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_LabV2_16          (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_Yxy_16            (COLORSPACE_SH(PT_Yxy)|CHANNELS_SH(3)|BYTES_SH(2))
+
+// YCbCr
+#define TYPE_YCbCr_8           (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_YCbCr_8_PLANAR    (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_YCbCr_16          (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_YCbCr_16_PLANAR   (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_YCbCr_16_SE       (COLORSPACE_SH(PT_YCbCr)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// YUV
+#define TYPE_YUV_8             (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_YUV_8_PLANAR      (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_YUV_16            (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_YUV_16_PLANAR     (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_YUV_16_SE         (COLORSPACE_SH(PT_YUV)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// HLS
+#define TYPE_HLS_8             (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_HLS_8_PLANAR      (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_HLS_16            (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_HLS_16_PLANAR     (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_HLS_16_SE         (COLORSPACE_SH(PT_HLS)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// HSV
+#define TYPE_HSV_8             (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(1))
+#define TYPE_HSV_8_PLANAR      (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(1)|PLANAR_SH(1))
+#define TYPE_HSV_16            (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_HSV_16_PLANAR     (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(2)|PLANAR_SH(1))
+#define TYPE_HSV_16_SE         (COLORSPACE_SH(PT_HSV)|CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1))
+
+// Named color index. Only 16 bits allowed (don't check colorspace)
+#define TYPE_NAMED_COLOR_INDEX (CHANNELS_SH(1)|BYTES_SH(2))
+
+// Float formatters.
+#define TYPE_XYZ_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_XYZ)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_Lab_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_LabA_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_Lab)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_GRAY_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(4))
+#define TYPE_RGB_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4))
+
+#define TYPE_RGBA_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4))
+#define TYPE_ARGB_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4)|SWAPFIRST_SH(1))
+#define TYPE_BGR_FLT          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(4)|DOSWAP_SH(1))
+#define TYPE_BGRA_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ABGR_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(4)|DOSWAP_SH(1))
+
+#define TYPE_CMYK_FLT         (FLOAT_SH(1)|COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(4))
+
+// Floating point formatters.
+// NOTE THAT 'BYTES' FIELD IS SET TO ZERO ON DLB because 8 bytes overflows the bitfield
+#define TYPE_XYZ_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_XYZ)|CHANNELS_SH(3)|BYTES_SH(0))
+#define TYPE_Lab_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_Lab)|CHANNELS_SH(3)|BYTES_SH(0))
+#define TYPE_GRAY_DBL         (FLOAT_SH(1)|COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(0))
+#define TYPE_RGB_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(0))
+#define TYPE_BGR_DBL          (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(0)|DOSWAP_SH(1))
+#define TYPE_CMYK_DBL         (FLOAT_SH(1)|COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(0))
+
+// IEEE 754-2008 "half"
+#define TYPE_GRAY_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_GRAY)|CHANNELS_SH(1)|BYTES_SH(2))
+#define TYPE_RGB_HALF_FLT     (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_RGBA_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_CMYK_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_CMYK)|CHANNELS_SH(4)|BYTES_SH(2))
+
+#define TYPE_RGBA_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2))
+#define TYPE_ARGB_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|SWAPFIRST_SH(1))
+#define TYPE_BGR_HALF_FLT     (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+#define TYPE_BGRA_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|EXTRA_SH(1)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1)|SWAPFIRST_SH(1))
+#define TYPE_ABGR_HALF_FLT    (FLOAT_SH(1)|COLORSPACE_SH(PT_RGB)|CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1))
+
+#endif
+
+// Colorspaces
+typedef struct {
+        cmsFloat64Number X;
+        cmsFloat64Number Y;
+        cmsFloat64Number Z;
+
+    } cmsCIEXYZ;
+
+typedef struct {
+        cmsFloat64Number x;
+        cmsFloat64Number y;
+        cmsFloat64Number Y;
+
+    } cmsCIExyY;
+
+typedef struct {
+        cmsFloat64Number L;
+        cmsFloat64Number a;
+        cmsFloat64Number b;
+
+    } cmsCIELab;
+
+typedef struct {
+        cmsFloat64Number L;
+        cmsFloat64Number C;
+        cmsFloat64Number h;
+
+    } cmsCIELCh;
+
+typedef struct {
+        cmsFloat64Number J;
+        cmsFloat64Number C;
+        cmsFloat64Number h;
+
+    } cmsJCh;
+
+typedef struct {
+        cmsCIEXYZ  Red;
+        cmsCIEXYZ  Green;
+        cmsCIEXYZ  Blue;
+
+    } cmsCIEXYZTRIPLE;
+
+typedef struct {
+        cmsCIExyY  Red;
+        cmsCIExyY  Green;
+        cmsCIExyY  Blue;
+
+    } cmsCIExyYTRIPLE;
+
+// Illuminant types for structs below
+#define cmsILLUMINANT_TYPE_UNKNOWN 0x0000000
+#define cmsILLUMINANT_TYPE_D50     0x0000001
+#define cmsILLUMINANT_TYPE_D65     0x0000002
+#define cmsILLUMINANT_TYPE_D93     0x0000003
+#define cmsILLUMINANT_TYPE_F2      0x0000004
+#define cmsILLUMINANT_TYPE_D55     0x0000005
+#define cmsILLUMINANT_TYPE_A       0x0000006
+#define cmsILLUMINANT_TYPE_E       0x0000007
+#define cmsILLUMINANT_TYPE_F8      0x0000008
+
+typedef struct {
+        cmsUInt32Number  Observer;    // 0 = unknown, 1=CIE 1931, 2=CIE 1964
+        cmsCIEXYZ        Backing;     // Value of backing
+        cmsUInt32Number  Geometry;    // 0=unknown, 1=45/0, 0/45 2=0d, d/0
+        cmsFloat64Number Flare;       // 0..1.0
+        cmsUInt32Number  IlluminantType;
+
+    } cmsICCMeasurementConditions;
+
+typedef struct {
+        cmsCIEXYZ       IlluminantXYZ;   // Not the same struct as CAM02,
+        cmsCIEXYZ       SurroundXYZ;     // This is for storing the tag
+        cmsUInt32Number IlluminantType;  // viewing condition
+
+    } cmsICCViewingConditions;
+
+// Get LittleCMS version (for shared objects) -----------------------------------------------------------------------------
+
+CMSAPI int               CMSEXPORT cmsGetEncodedCMMversion(void);
+
+// Support of non-standard functions --------------------------------------------------------------------------------------
+
+CMSAPI int               CMSEXPORT cmsstrcasecmp(const char* s1, const char* s2);
+CMSAPI long int          CMSEXPORT cmsfilelength(FILE* f);
+
+
+// Context handling --------------------------------------------------------------------------------------------------------
+
+// Each context holds its owns globals and its own plug-ins. There is a global context with the id = 0 for lecacy compatibility
+// though using the global context is not recommended. Proper context handling makes lcms more thread-safe.
+
+typedef struct _cmsContext_struct* cmsContext;
+
+CMSAPI cmsContext       CMSEXPORT cmsCreateContext(void* Plugin, void* UserData);
+CMSAPI void             CMSEXPORT cmsDeleteContext(cmsContext ContexID);
+CMSAPI cmsContext       CMSEXPORT cmsDupContext(cmsContext ContextID, void* NewUserData);
+CMSAPI void*            CMSEXPORT cmsGetContextUserData(cmsContext ContextID);
+
+// Plug-In registering  --------------------------------------------------------------------------------------------------
+
+CMSAPI cmsBool           CMSEXPORT cmsPlugin(void* Plugin);
+CMSAPI cmsBool           CMSEXPORT cmsPluginTHR(cmsContext ContextID, void* Plugin);
+CMSAPI void              CMSEXPORT cmsUnregisterPlugins(void);
+CMSAPI void              CMSEXPORT cmsUnregisterPluginsTHR(cmsContext ContextID);
+
+// Error logging ----------------------------------------------------------------------------------------------------------
+
+// There is no error handling at all. When a function fails, it returns proper value.
+// For example, all create functions does return NULL on failure. Other may return FALSE.
+// It may be interesting, for the developer, to know why the function is failing.
+// for that reason, lcms2 does offer a logging function. This function will get
+// an ENGLISH string with some clues on what is going wrong. You can show this
+// info to the end user if you wish, or just create some sort of log on disk.
+// The logging function should NOT terminate the program, as this obviously can leave
+// unfreed resources. It is the programmer's responsibility to check each function
+// return code to make sure it didn't fail.
+
+#define cmsERROR_UNDEFINED                    0
+#define cmsERROR_FILE                         1
+#define cmsERROR_RANGE                        2
+#define cmsERROR_INTERNAL                     3
+#define cmsERROR_NULL                         4
+#define cmsERROR_READ                         5
+#define cmsERROR_SEEK                         6
+#define cmsERROR_WRITE                        7
+#define cmsERROR_UNKNOWN_EXTENSION            8
+#define cmsERROR_COLORSPACE_CHECK             9
+#define cmsERROR_ALREADY_DEFINED              10
+#define cmsERROR_BAD_SIGNATURE                11
+#define cmsERROR_CORRUPTION_DETECTED          12
+#define cmsERROR_NOT_SUITABLE                 13
+
+// Error logger is called with the ContextID when a message is raised. This gives the
+// chance to know which thread is responsible of the warning and any environment associated
+// with it. Non-multithreading applications may safely ignore this parameter.
+// Note that under certain special circumstances, ContextID may be NULL.
+typedef void  (* cmsLogErrorHandlerFunction)(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text);
+
+// Allows user to set any specific logger
+CMSAPI void              CMSEXPORT cmsSetLogErrorHandler(cmsLogErrorHandlerFunction Fn);
+CMSAPI void              CMSEXPORT cmsSetLogErrorHandlerTHR(cmsContext ContextID, cmsLogErrorHandlerFunction Fn);
+
+// Conversions --------------------------------------------------------------------------------------------------------------
+
+// Returns pointers to constant structs
+CMSAPI const cmsCIEXYZ*  CMSEXPORT cmsD50_XYZ(void);
+CMSAPI const cmsCIExyY*  CMSEXPORT cmsD50_xyY(void);
+
+// Colorimetric space conversions
+CMSAPI void              CMSEXPORT cmsXYZ2xyY(cmsCIExyY* Dest, const cmsCIEXYZ* Source);
+CMSAPI void              CMSEXPORT cmsxyY2XYZ(cmsCIEXYZ* Dest, const cmsCIExyY* Source);
+CMSAPI void              CMSEXPORT cmsXYZ2Lab(const cmsCIEXYZ* WhitePoint, cmsCIELab* Lab, const cmsCIEXYZ* xyz);
+CMSAPI void              CMSEXPORT cmsLab2XYZ(const cmsCIEXYZ* WhitePoint, cmsCIEXYZ* xyz, const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsLab2LCh(cmsCIELCh*LCh, const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsLCh2Lab(cmsCIELab* Lab, const cmsCIELCh* LCh);
+
+// Encoding /Decoding on PCS
+CMSAPI void              CMSEXPORT cmsLabEncoded2Float(cmsCIELab* Lab, const cmsUInt16Number wLab[3]);
+CMSAPI void              CMSEXPORT cmsLabEncoded2FloatV2(cmsCIELab* Lab, const cmsUInt16Number wLab[3]);
+CMSAPI void              CMSEXPORT cmsFloat2LabEncoded(cmsUInt16Number wLab[3], const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsFloat2LabEncodedV2(cmsUInt16Number wLab[3], const cmsCIELab* Lab);
+CMSAPI void              CMSEXPORT cmsXYZEncoded2Float(cmsCIEXYZ* fxyz, const cmsUInt16Number XYZ[3]);
+CMSAPI void              CMSEXPORT cmsFloat2XYZEncoded(cmsUInt16Number XYZ[3], const cmsCIEXYZ* fXYZ);
+
+// DeltaE metrics
+CMSAPI cmsFloat64Number  CMSEXPORT cmsDeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsCIE94DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsBFDdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsCMCdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2, cmsFloat64Number l, cmsFloat64Number c);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsCIE2000DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2, cmsFloat64Number Kl, cmsFloat64Number Kc, cmsFloat64Number Kh);
+
+// Temperature <-> Chromaticity (Black body)
+CMSAPI cmsBool           CMSEXPORT cmsWhitePointFromTemp(cmsCIExyY* WhitePoint, cmsFloat64Number  TempK);
+CMSAPI cmsBool           CMSEXPORT cmsTempFromWhitePoint(cmsFloat64Number* TempK, const cmsCIExyY* WhitePoint);
+
+// Chromatic adaptation
+CMSAPI cmsBool           CMSEXPORT cmsAdaptToIlluminant(cmsCIEXYZ* Result, const cmsCIEXYZ* SourceWhitePt,
+                                                                           const cmsCIEXYZ* Illuminant,
+                                                                           const cmsCIEXYZ* Value);
+
+// CIECAM02 ---------------------------------------------------------------------------------------------------
+
+// Viewing conditions. Please note those are CAM model viewing conditions, and not the ICC tag viewing
+// conditions, which I'm naming cmsICCViewingConditions to make differences evident. Unfortunately, the tag
+// cannot deal with surround La, Yb and D value so is basically useless to store CAM02 viewing conditions.
+
+
+#define AVG_SURROUND       1
+#define DIM_SURROUND       2
+#define DARK_SURROUND      3
+#define CUTSHEET_SURROUND  4
+
+#define D_CALCULATE        (-1)
+
+typedef struct {
+    cmsCIEXYZ        whitePoint;
+    cmsFloat64Number Yb;
+    cmsFloat64Number La;
+    cmsUInt32Number  surround;
+    cmsFloat64Number D_value;
+
+    } cmsViewingConditions;
+
+CMSAPI cmsHANDLE         CMSEXPORT cmsCIECAM02Init(cmsContext ContextID, const cmsViewingConditions* pVC);
+CMSAPI void              CMSEXPORT cmsCIECAM02Done(cmsHANDLE hModel);
+CMSAPI void              CMSEXPORT cmsCIECAM02Forward(cmsHANDLE hModel, const cmsCIEXYZ* pIn, cmsJCh* pOut);
+CMSAPI void              CMSEXPORT cmsCIECAM02Reverse(cmsHANDLE hModel, const cmsJCh* pIn,    cmsCIEXYZ* pOut);
+
+
+// Tone curves -----------------------------------------------------------------------------------------
+
+// This describes a curve segment. For a table of supported types, see the manual. User can increase the number of
+// available types by using a proper plug-in. Parametric segments allow 10 parameters at most
+
+typedef struct {
+    cmsFloat32Number   x0, x1;           // Domain; for x0 < x <= x1
+    cmsInt32Number     Type;             // Parametric type, Type == 0 means sampled segment. Negative values are reserved
+    cmsFloat64Number   Params[10];       // Parameters if Type != 0
+    cmsUInt32Number    nGridPoints;      // Number of grid points if Type == 0
+    cmsFloat32Number*  SampledPoints;    // Points to an array of floats if Type == 0
+
+} cmsCurveSegment;
+
+// The internal representation is none of your business.
+typedef struct _cms_curve_struct cmsToneCurve;
+
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildSegmentedToneCurve(cmsContext ContextID, cmsUInt32Number nSegments, const cmsCurveSegment Segments[]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildParametricToneCurve(cmsContext ContextID, cmsInt32Number Type, const cmsFloat64Number Params[]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildGamma(cmsContext ContextID, cmsFloat64Number Gamma);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildTabulatedToneCurve16(cmsContext ContextID, cmsUInt32Number nEntries, const cmsUInt16Number values[]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsBuildTabulatedToneCurveFloat(cmsContext ContextID, cmsUInt32Number nEntries, const cmsFloat32Number values[]);
+CMSAPI void              CMSEXPORT cmsFreeToneCurve(cmsToneCurve* Curve);
+CMSAPI void              CMSEXPORT cmsFreeToneCurveTriple(cmsToneCurve* Curve[3]);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsDupToneCurve(const cmsToneCurve* Src);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsReverseToneCurve(const cmsToneCurve* InGamma);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsReverseToneCurveEx(cmsUInt32Number nResultSamples, const cmsToneCurve* InGamma);
+CMSAPI cmsToneCurve*     CMSEXPORT cmsJoinToneCurve(cmsContext ContextID, const cmsToneCurve* X,  const cmsToneCurve* Y, cmsUInt32Number nPoints);
+CMSAPI cmsBool           CMSEXPORT cmsSmoothToneCurve(cmsToneCurve* Tab, cmsFloat64Number lambda);
+CMSAPI cmsFloat32Number  CMSEXPORT cmsEvalToneCurveFloat(const cmsToneCurve* Curve, cmsFloat32Number v);
+CMSAPI cmsUInt16Number   CMSEXPORT cmsEvalToneCurve16(const cmsToneCurve* Curve, cmsUInt16Number v);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveMultisegment(const cmsToneCurve* InGamma);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveLinear(const cmsToneCurve* Curve);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveMonotonic(const cmsToneCurve* t);
+CMSAPI cmsBool           CMSEXPORT cmsIsToneCurveDescending(const cmsToneCurve* t);
+CMSAPI cmsInt32Number    CMSEXPORT cmsGetToneCurveParametricType(const cmsToneCurve* t);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsEstimateGamma(const cmsToneCurve* t, cmsFloat64Number Precision);
+
+// Tone curve tabular estimation
+CMSAPI cmsUInt32Number         CMSEXPORT cmsGetToneCurveEstimatedTableEntries(const cmsToneCurve* t);
+CMSAPI const cmsUInt16Number*  CMSEXPORT cmsGetToneCurveEstimatedTable(const cmsToneCurve* t);
+
+
+// Implements pipelines of multi-processing elements -------------------------------------------------------------
+
+// Nothing to see here, move along
+typedef struct _cmsPipeline_struct cmsPipeline;
+typedef struct _cmsStage_struct cmsStage;
+
+// Those are hi-level pipelines
+CMSAPI cmsPipeline*      CMSEXPORT cmsPipelineAlloc(cmsContext ContextID, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels);
+CMSAPI void              CMSEXPORT cmsPipelineFree(cmsPipeline* lut);
+CMSAPI cmsPipeline*      CMSEXPORT cmsPipelineDup(const cmsPipeline* Orig);
+
+CMSAPI cmsContext        CMSEXPORT cmsGetPipelineContextID(const cmsPipeline* lut);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsPipelineInputChannels(const cmsPipeline* lut);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsPipelineOutputChannels(const cmsPipeline* lut);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsPipelineStageCount(const cmsPipeline* lut);
+CMSAPI cmsStage*         CMSEXPORT cmsPipelineGetPtrToFirstStage(const cmsPipeline* lut);
+CMSAPI cmsStage*         CMSEXPORT cmsPipelineGetPtrToLastStage(const cmsPipeline* lut);
+
+CMSAPI void              CMSEXPORT cmsPipelineEval16(const cmsUInt16Number In[], cmsUInt16Number Out[], const cmsPipeline* lut);
+CMSAPI void              CMSEXPORT cmsPipelineEvalFloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsPipeline* lut);
+CMSAPI cmsBool           CMSEXPORT cmsPipelineEvalReverseFloat(cmsFloat32Number Target[], cmsFloat32Number Result[], cmsFloat32Number Hint[], const cmsPipeline* lut);
+CMSAPI cmsBool           CMSEXPORT cmsPipelineCat(cmsPipeline* l1, const cmsPipeline* l2);
+CMSAPI cmsBool           CMSEXPORT cmsPipelineSetSaveAs8bitsFlag(cmsPipeline* lut, cmsBool On);
+
+// Where to place/locate the stages in the pipeline chain
+typedef enum { cmsAT_BEGIN, cmsAT_END } cmsStageLoc;
+
+CMSAPI cmsBool           CMSEXPORT cmsPipelineInsertStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage* mpe);
+CMSAPI void              CMSEXPORT cmsPipelineUnlinkStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage** mpe);
+
+// This function is quite useful to analyze the structure of a Pipeline and retrieve the Stage elements
+// that conform the Pipeline. It should be called with the Pipeline, the number of expected elements and
+// then a list of expected types followed with a list of double pointers to Stage elements. If
+// the function founds a match with current pipeline, it fills the pointers and returns TRUE
+// if not, returns FALSE without touching anything.
+CMSAPI cmsBool           CMSEXPORT cmsPipelineCheckAndRetreiveStages(const cmsPipeline* Lut, cmsUInt32Number n, ...);
+
+// Matrix has double precision and CLUT has only float precision. That is because an ICC profile can encode
+// matrices with far more precision that CLUTS
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocIdentity(cmsContext ContextID, cmsUInt32Number nChannels);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocToneCurves(cmsContext ContextID, cmsUInt32Number nChannels, cmsToneCurve* const Curves[]);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocMatrix(cmsContext ContextID, cmsUInt32Number Rows, cmsUInt32Number Cols, const cmsFloat64Number* Matrix, const cmsFloat64Number* Offset);
+
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLut16bit(cmsContext ContextID, cmsUInt32Number nGridPoints, cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsUInt16Number* Table);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLutFloat(cmsContext ContextID, cmsUInt32Number nGridPoints, cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table);
+
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLut16bitGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsUInt16Number* Table);
+CMSAPI cmsStage*         CMSEXPORT cmsStageAllocCLutFloatGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table);
+
+CMSAPI cmsStage*         CMSEXPORT cmsStageDup(cmsStage* mpe);
+CMSAPI void              CMSEXPORT cmsStageFree(cmsStage* mpe);
+CMSAPI cmsStage*         CMSEXPORT cmsStageNext(const cmsStage* mpe);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsStageInputChannels(const cmsStage* mpe);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsStageOutputChannels(const cmsStage* mpe);
+CMSAPI cmsStageSignature CMSEXPORT cmsStageType(const cmsStage* mpe);
+CMSAPI void*             CMSEXPORT cmsStageData(const cmsStage* mpe);
+
+// Sampling
+typedef cmsInt32Number (* cmsSAMPLER16)   (register const cmsUInt16Number In[],
+                                            register cmsUInt16Number Out[],
+                                            register void * Cargo);
+
+typedef cmsInt32Number (* cmsSAMPLERFLOAT)(register const cmsFloat32Number In[],
+                                            register cmsFloat32Number Out[],
+                                            register void * Cargo);
+
+// Use this flag to prevent changes being written to destination
+#define SAMPLER_INSPECT     0x01000000
+
+// For CLUT only
+CMSAPI cmsBool           CMSEXPORT cmsStageSampleCLut16bit(cmsStage* mpe,    cmsSAMPLER16 Sampler, void* Cargo, cmsUInt32Number dwFlags);
+CMSAPI cmsBool           CMSEXPORT cmsStageSampleCLutFloat(cmsStage* mpe, cmsSAMPLERFLOAT Sampler, void* Cargo, cmsUInt32Number dwFlags);
+
+// Slicers
+CMSAPI cmsBool           CMSEXPORT cmsSliceSpace16(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                                   cmsSAMPLER16 Sampler, void * Cargo);
+
+CMSAPI cmsBool           CMSEXPORT cmsSliceSpaceFloat(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                                   cmsSAMPLERFLOAT Sampler, void * Cargo);
+
+// Multilocalized Unicode management ---------------------------------------------------------------------------------------
+
+typedef struct _cms_MLU_struct cmsMLU;
+
+#define  cmsNoLanguage "\0\0"
+#define  cmsNoCountry  "\0\0"
+
+CMSAPI cmsMLU*           CMSEXPORT cmsMLUalloc(cmsContext ContextID, cmsUInt32Number nItems);
+CMSAPI void              CMSEXPORT cmsMLUfree(cmsMLU* mlu);
+CMSAPI cmsMLU*           CMSEXPORT cmsMLUdup(const cmsMLU* mlu);
+
+CMSAPI cmsBool           CMSEXPORT cmsMLUsetASCII(cmsMLU* mlu,
+                                                  const char LanguageCode[3], const char CountryCode[3],
+                                                  const char* ASCIIString);
+CMSAPI cmsBool           CMSEXPORT cmsMLUsetWide(cmsMLU* mlu,
+                                                  const char LanguageCode[3], const char CountryCode[3],
+                                                  const wchar_t* WideString);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsMLUgetASCII(const cmsMLU* mlu,
+                                                  const char LanguageCode[3], const char CountryCode[3],
+                                                  char* Buffer,    cmsUInt32Number BufferSize);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsMLUgetWide(const cmsMLU* mlu,
+                                                 const char LanguageCode[3], const char CountryCode[3],
+                                                 wchar_t* Buffer, cmsUInt32Number BufferSize);
+
+CMSAPI cmsBool           CMSEXPORT cmsMLUgetTranslation(const cmsMLU* mlu,
+                                                         const char LanguageCode[3], const char CountryCode[3],
+                                                         char ObtainedLanguage[3], char ObtainedCountry[3]);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsMLUtranslationsCount(const cmsMLU* mlu);
+
+CMSAPI cmsBool           CMSEXPORT cmsMLUtranslationsCodes(const cmsMLU* mlu,
+                                                             cmsUInt32Number idx,
+                                                             char LanguageCode[3],
+                                                             char CountryCode[3]);
+ 
+// Undercolorremoval & black generation -------------------------------------------------------------------------------------
+
+typedef struct {
+        cmsToneCurve* Ucr;
+        cmsToneCurve* Bg;
+        cmsMLU*       Desc;
+
+} cmsUcrBg;
+
+// Screening ----------------------------------------------------------------------------------------------------------------
+
+#define cmsPRINTER_DEFAULT_SCREENS     0x0001
+#define cmsFREQUENCE_UNITS_LINES_CM    0x0000
+#define cmsFREQUENCE_UNITS_LINES_INCH  0x0002
+
+#define cmsSPOT_UNKNOWN         0
+#define cmsSPOT_PRINTER_DEFAULT 1
+#define cmsSPOT_ROUND           2
+#define cmsSPOT_DIAMOND         3
+#define cmsSPOT_ELLIPSE         4
+#define cmsSPOT_LINE            5
+#define cmsSPOT_SQUARE          6
+#define cmsSPOT_CROSS           7
+
+typedef struct {
+    cmsFloat64Number  Frequency;
+    cmsFloat64Number  ScreenAngle;
+    cmsUInt32Number   SpotShape;
+
+} cmsScreeningChannel;
+
+typedef struct {
+    cmsUInt32Number Flag;
+    cmsUInt32Number nChannels;
+    cmsScreeningChannel Channels[cmsMAXCHANNELS];
+
+} cmsScreening;
+
+
+// Named color -----------------------------------------------------------------------------------------------------------------
+
+typedef struct _cms_NAMEDCOLORLIST_struct cmsNAMEDCOLORLIST;
+
+CMSAPI cmsNAMEDCOLORLIST* CMSEXPORT cmsAllocNamedColorList(cmsContext ContextID,
+                                                           cmsUInt32Number n,
+                                                           cmsUInt32Number ColorantCount,
+                                                           const char* Prefix, const char* Suffix);
+
+CMSAPI void               CMSEXPORT cmsFreeNamedColorList(cmsNAMEDCOLORLIST* v);
+CMSAPI cmsNAMEDCOLORLIST* CMSEXPORT cmsDupNamedColorList(const cmsNAMEDCOLORLIST* v);
+CMSAPI cmsBool            CMSEXPORT cmsAppendNamedColor(cmsNAMEDCOLORLIST* v, const char* Name,
+                                                            cmsUInt16Number PCS[3],
+                                                            cmsUInt16Number Colorant[cmsMAXCHANNELS]);
+
+CMSAPI cmsUInt32Number    CMSEXPORT cmsNamedColorCount(const cmsNAMEDCOLORLIST* v);
+CMSAPI cmsInt32Number     CMSEXPORT cmsNamedColorIndex(const cmsNAMEDCOLORLIST* v, const char* Name);
+
+CMSAPI cmsBool            CMSEXPORT cmsNamedColorInfo(const cmsNAMEDCOLORLIST* NamedColorList, cmsUInt32Number nColor,
+                                                      char* Name,
+                                                      char* Prefix,
+                                                      char* Suffix,
+                                                      cmsUInt16Number* PCS,
+                                                      cmsUInt16Number* Colorant);
+
+// Retrieve named color list from transform
+CMSAPI cmsNAMEDCOLORLIST* CMSEXPORT cmsGetNamedColorList(cmsHTRANSFORM xform);
+
+// Profile sequence -----------------------------------------------------------------------------------------------------
+
+// Profile sequence descriptor. Some fields come from profile sequence descriptor tag, others
+// come from Profile Sequence Identifier Tag
+typedef struct {
+
+    cmsSignature           deviceMfg;
+    cmsSignature           deviceModel;
+    cmsUInt64Number        attributes;
+    cmsTechnologySignature technology;
+    cmsProfileID           ProfileID;
+    cmsMLU*                Manufacturer;
+    cmsMLU*                Model;
+    cmsMLU*                Description;
+
+} cmsPSEQDESC;
+
+typedef struct {
+
+    cmsUInt32Number n;
+    cmsContext      ContextID;
+    cmsPSEQDESC*    seq;
+
+} cmsSEQ;
+
+CMSAPI cmsSEQ*           CMSEXPORT cmsAllocProfileSequenceDescription(cmsContext ContextID, cmsUInt32Number n);
+CMSAPI cmsSEQ*           CMSEXPORT cmsDupProfileSequenceDescription(const cmsSEQ* pseq);
+CMSAPI void              CMSEXPORT cmsFreeProfileSequenceDescription(cmsSEQ* pseq);
+
+// Dictionaries --------------------------------------------------------------------------------------------------------
+
+typedef struct _cmsDICTentry_struct {
+
+    struct _cmsDICTentry_struct* Next;
+
+    cmsMLU *DisplayName;
+    cmsMLU *DisplayValue;
+    wchar_t* Name;
+    wchar_t* Value;
+
+} cmsDICTentry;
+
+CMSAPI cmsHANDLE           CMSEXPORT cmsDictAlloc(cmsContext ContextID);
+CMSAPI void                CMSEXPORT cmsDictFree(cmsHANDLE hDict);
+CMSAPI cmsHANDLE           CMSEXPORT cmsDictDup(cmsHANDLE hDict);
+
+CMSAPI cmsBool             CMSEXPORT cmsDictAddEntry(cmsHANDLE hDict, const wchar_t* Name, const wchar_t* Value, const cmsMLU *DisplayName, const cmsMLU *DisplayValue);
+CMSAPI const cmsDICTentry* CMSEXPORT cmsDictGetEntryList(cmsHANDLE hDict);
+CMSAPI const cmsDICTentry* CMSEXPORT cmsDictNextEntry(const cmsDICTentry* e);
+
+// Access to Profile data ----------------------------------------------------------------------------------------------
+CMSAPI cmsHPROFILE       CMSEXPORT cmsCreateProfilePlaceholder(cmsContext ContextID);
+
+CMSAPI cmsContext        CMSEXPORT cmsGetProfileContextID(cmsHPROFILE hProfile);
+CMSAPI cmsInt32Number    CMSEXPORT cmsGetTagCount(cmsHPROFILE hProfile);
+CMSAPI cmsTagSignature   CMSEXPORT cmsGetTagSignature(cmsHPROFILE hProfile, cmsUInt32Number n);
+CMSAPI cmsBool           CMSEXPORT cmsIsTag(cmsHPROFILE hProfile, cmsTagSignature sig);
+
+// Read and write pre-formatted data
+CMSAPI void*             CMSEXPORT cmsReadTag(cmsHPROFILE hProfile, cmsTagSignature sig);
+CMSAPI cmsBool           CMSEXPORT cmsWriteTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data);
+CMSAPI cmsBool           CMSEXPORT cmsLinkTag(cmsHPROFILE hProfile, cmsTagSignature sig, cmsTagSignature dest);
+CMSAPI cmsTagSignature   CMSEXPORT cmsTagLinkedTo(cmsHPROFILE hProfile, cmsTagSignature sig);
+
+// Read and write raw data
+CMSAPI cmsUInt32Number   CMSEXPORT cmsReadRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, void* Buffer, cmsUInt32Number BufferSize);
+CMSAPI cmsBool           CMSEXPORT cmsWriteRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data, cmsUInt32Number Size);
+
+// Access header data
+#define cmsEmbeddedProfileFalse    0x00000000
+#define cmsEmbeddedProfileTrue     0x00000001
+#define cmsUseAnywhere             0x00000000
+#define cmsUseWithEmbeddedDataOnly 0x00000002
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderFlags(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsGetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number* Flags);
+CMSAPI void              CMSEXPORT cmsGetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID);
+CMSAPI cmsBool           CMSEXPORT cmsGetHeaderCreationDateTime(cmsHPROFILE hProfile, struct tm *Dest);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderRenderingIntent(cmsHPROFILE hProfile);
+
+CMSAPI void              CMSEXPORT cmsSetHeaderFlags(cmsHPROFILE hProfile, cmsUInt32Number Flags);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderManufacturer(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetHeaderManufacturer(cmsHPROFILE hProfile, cmsUInt32Number manufacturer);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderCreator(cmsHPROFILE hProfile);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetHeaderModel(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetHeaderModel(cmsHPROFILE hProfile, cmsUInt32Number model);
+CMSAPI void              CMSEXPORT cmsSetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number Flags);
+CMSAPI void              CMSEXPORT cmsSetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID);
+CMSAPI void              CMSEXPORT cmsSetHeaderRenderingIntent(cmsHPROFILE hProfile, cmsUInt32Number RenderingIntent);
+
+CMSAPI cmsColorSpaceSignature
+                         CMSEXPORT cmsGetPCS(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetPCS(cmsHPROFILE hProfile, cmsColorSpaceSignature pcs);
+CMSAPI cmsColorSpaceSignature
+                         CMSEXPORT cmsGetColorSpace(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetColorSpace(cmsHPROFILE hProfile, cmsColorSpaceSignature sig);
+CMSAPI cmsProfileClassSignature
+                         CMSEXPORT cmsGetDeviceClass(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetDeviceClass(cmsHPROFILE hProfile, cmsProfileClassSignature sig);
+CMSAPI void              CMSEXPORT cmsSetProfileVersion(cmsHPROFILE hProfile, cmsFloat64Number Version);
+CMSAPI cmsFloat64Number  CMSEXPORT cmsGetProfileVersion(cmsHPROFILE hProfile);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetEncodedICCversion(cmsHPROFILE hProfile);
+CMSAPI void              CMSEXPORT cmsSetEncodedICCversion(cmsHPROFILE hProfile, cmsUInt32Number Version);
+
+// How profiles may be used
+#define LCMS_USED_AS_INPUT      0
+#define LCMS_USED_AS_OUTPUT     1
+#define LCMS_USED_AS_PROOF      2
+
+CMSAPI cmsBool           CMSEXPORT cmsIsIntentSupported(cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number UsedDirection);
+CMSAPI cmsBool           CMSEXPORT cmsIsMatrixShaper(cmsHPROFILE hProfile);
+CMSAPI cmsBool           CMSEXPORT cmsIsCLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number UsedDirection);
+
+// Translate form/to our notation to ICC
+CMSAPI cmsColorSpaceSignature   CMSEXPORT _cmsICCcolorSpace(int OurNotation);
+CMSAPI int                      CMSEXPORT _cmsLCMScolorSpace(cmsColorSpaceSignature ProfileSpace);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsChannelsOf(cmsColorSpaceSignature ColorSpace);
+
+// Build a suitable formatter for the colorspace of this profile. nBytes=1 means 8 bits, nBytes=2 means 16 bits. 
+CMSAPI cmsUInt32Number   CMSEXPORT cmsFormatterForColorspaceOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat);
+CMSAPI cmsUInt32Number   CMSEXPORT cmsFormatterForPCSOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat);
+
+
+// Localized info
+typedef enum {
+             cmsInfoDescription  = 0,
+             cmsInfoManufacturer = 1,
+             cmsInfoModel        = 2,
+             cmsInfoCopyright    = 3
+} cmsInfoType;
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetProfileInfo(cmsHPROFILE hProfile, cmsInfoType Info,
+                                                            const char LanguageCode[3], const char CountryCode[3],
+                                                            wchar_t* Buffer, cmsUInt32Number BufferSize);
+
+CMSAPI cmsUInt32Number   CMSEXPORT cmsGetProfileInfoASCII(cmsHPROFILE hProfile, cmsInfoType Info,
+                                                            const char LanguageCode[3], const char CountryCode[3],
+                                                            char* Buffer, cmsUInt32Number BufferSize);
+
+// IO handlers ----------------------------------------------------------------------------------------------------------
+
+typedef struct _cms_io_handler cmsIOHANDLER;
+
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromFile(cmsContext ContextID, const char* FileName, const char* AccessMode);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromStream(cmsContext ContextID, FILE* Stream);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromMem(cmsContext ContextID, void *Buffer, cmsUInt32Number size, const char* AccessMode);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsOpenIOhandlerFromNULL(cmsContext ContextID);
+CMSAPI cmsIOHANDLER*     CMSEXPORT cmsGetProfileIOhandler(cmsHPROFILE hProfile);
+CMSAPI cmsBool           CMSEXPORT cmsCloseIOhandler(cmsIOHANDLER* io);
+
+// Profile high level functions ------------------------------------------------------------------------------------------
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromFile(const char *ICCProfile, const char *sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromFileTHR(cmsContext ContextID, const char *ICCProfile, const char *sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char* sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromStreamTHR(cmsContext ContextID, FILE* ICCProfile, const char* sAccess);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromMem(const void * MemPtr, cmsUInt32Number dwSize);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromMemTHR(cmsContext ContextID, const void * MemPtr, cmsUInt32Number dwSize);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromIOhandlerTHR(cmsContext ContextID, cmsIOHANDLER* io);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsOpenProfileFromIOhandler2THR(cmsContext ContextID, cmsIOHANDLER* io, cmsBool write);
+CMSAPI cmsBool          CMSEXPORT cmsCloseProfile(cmsHPROFILE hProfile);
+
+CMSAPI cmsBool          CMSEXPORT cmsSaveProfileToFile(cmsHPROFILE hProfile, const char* FileName);
+CMSAPI cmsBool          CMSEXPORT cmsSaveProfileToStream(cmsHPROFILE hProfile, FILE* Stream);
+CMSAPI cmsBool          CMSEXPORT cmsSaveProfileToMem(cmsHPROFILE hProfile, void *MemPtr, cmsUInt32Number* BytesNeeded);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsSaveProfileToIOhandler(cmsHPROFILE hProfile, cmsIOHANDLER* io);
+
+// Predefined virtual profiles ------------------------------------------------------------------------------------------
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateRGBProfileTHR(cmsContext ContextID,
+                                                   const cmsCIExyY* WhitePoint,
+                                                   const cmsCIExyYTRIPLE* Primaries,
+                                                   cmsToneCurve* const TransferFunction[3]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateRGBProfile(const cmsCIExyY* WhitePoint,
+                                                   const cmsCIExyYTRIPLE* Primaries,
+                                                   cmsToneCurve* const TransferFunction[3]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateGrayProfileTHR(cmsContext ContextID,
+                                                    const cmsCIExyY* WhitePoint,
+                                                    const cmsToneCurve* TransferFunction);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateGrayProfile(const cmsCIExyY* WhitePoint,
+                                                    const cmsToneCurve* TransferFunction);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLinearizationDeviceLinkTHR(cmsContext ContextID,
+                                                                cmsColorSpaceSignature ColorSpace,
+                                                                cmsToneCurve* const TransferFunctions[]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLinearizationDeviceLink(cmsColorSpaceSignature ColorSpace,
+                                                                cmsToneCurve* const TransferFunctions[]);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateInkLimitingDeviceLinkTHR(cmsContext ContextID,
+                                                              cmsColorSpaceSignature ColorSpace, cmsFloat64Number Limit);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateInkLimitingDeviceLink(cmsColorSpaceSignature ColorSpace, cmsFloat64Number Limit);
+
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab2ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab2Profile(const cmsCIExyY* WhitePoint);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab4ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateLab4Profile(const cmsCIExyY* WhitePoint);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateXYZProfileTHR(cmsContext ContextID);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateXYZProfile(void);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreate_sRGBProfileTHR(cmsContext ContextID);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreate_sRGBProfile(void);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateBCHSWabstractProfileTHR(cmsContext ContextID,
+                                                             cmsUInt32Number nLUTPoints,
+                                                             cmsFloat64Number Bright,
+                                                             cmsFloat64Number Contrast,
+                                                             cmsFloat64Number Hue,
+                                                             cmsFloat64Number Saturation,
+                                                             cmsUInt32Number TempSrc,
+                                                             cmsUInt32Number TempDest);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateBCHSWabstractProfile(cmsUInt32Number nLUTPoints,
+                                                             cmsFloat64Number Bright,
+                                                             cmsFloat64Number Contrast,
+                                                             cmsFloat64Number Hue,
+                                                             cmsFloat64Number Saturation,
+                                                             cmsUInt32Number TempSrc,
+                                                             cmsUInt32Number TempDest);
+
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateNULLProfileTHR(cmsContext ContextID);
+CMSAPI cmsHPROFILE      CMSEXPORT cmsCreateNULLProfile(void);
+
+// Converts a transform to a devicelink profile
+CMSAPI cmsHPROFILE      CMSEXPORT cmsTransform2DeviceLink(cmsHTRANSFORM hTransform, cmsFloat64Number Version, cmsUInt32Number dwFlags);
+
+// Intents ----------------------------------------------------------------------------------------------
+
+// ICC Intents
+#define INTENT_PERCEPTUAL                              0
+#define INTENT_RELATIVE_COLORIMETRIC                   1
+#define INTENT_SATURATION                              2
+#define INTENT_ABSOLUTE_COLORIMETRIC                   3
+
+// Non-ICC intents
+#define INTENT_PRESERVE_K_ONLY_PERCEPTUAL             10
+#define INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC  11
+#define INTENT_PRESERVE_K_ONLY_SATURATION             12
+#define INTENT_PRESERVE_K_PLANE_PERCEPTUAL            13
+#define INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC 14
+#define INTENT_PRESERVE_K_PLANE_SATURATION            15
+
+// Call with NULL as parameters to get the intent count
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetSupportedIntents(cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetSupportedIntentsTHR(cmsContext ContextID, cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions);
+
+// Flags
+
+#define cmsFLAGS_NOCACHE                  0x0040    // Inhibit 1-pixel cache
+#define cmsFLAGS_NOOPTIMIZE               0x0100    // Inhibit optimizations
+#define cmsFLAGS_NULLTRANSFORM            0x0200    // Don't transform anyway
+
+// Proofing flags
+#define cmsFLAGS_GAMUTCHECK               0x1000    // Out of Gamut alarm
+#define cmsFLAGS_SOFTPROOFING             0x4000    // Do softproofing
+
+// Misc
+#define cmsFLAGS_BLACKPOINTCOMPENSATION   0x2000
+#define cmsFLAGS_NOWHITEONWHITEFIXUP      0x0004    // Don't fix scum dot
+#define cmsFLAGS_HIGHRESPRECALC           0x0400    // Use more memory to give better accurancy
+#define cmsFLAGS_LOWRESPRECALC            0x0800    // Use less memory to minimize resources
+
+// For devicelink creation
+#define cmsFLAGS_8BITS_DEVICELINK         0x0008   // Create 8 bits devicelinks
+#define cmsFLAGS_GUESSDEVICECLASS         0x0020   // Guess device class (for transform2devicelink)
+#define cmsFLAGS_KEEP_SEQUENCE            0x0080   // Keep profile sequence for devicelink creation
+
+// Specific to a particular optimizations
+#define cmsFLAGS_FORCE_CLUT               0x0002    // Force CLUT optimization
+#define cmsFLAGS_CLUT_POST_LINEARIZATION  0x0001    // create postlinearization tables if possible
+#define cmsFLAGS_CLUT_PRE_LINEARIZATION   0x0010    // create prelinearization tables if possible
+
+// Specific to unbounded mode
+#define cmsFLAGS_NONEGATIVES              0x8000    // Prevent negative numbers in floating point transforms
+
+// Copy alpha channels when transforming           
+#define cmsFLAGS_COPY_ALPHA               0x04000000 // Alpha channels are copied on cmsDoTransform()
+
+// Fine-tune control over number of gridpoints
+#define cmsFLAGS_GRIDPOINTS(n)           (((n) & 0xFF) << 16)
+
+// CRD special
+#define cmsFLAGS_NODEFAULTRESOURCEDEF     0x01000000
+
+// Transforms ---------------------------------------------------------------------------------------------------
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateTransformTHR(cmsContext ContextID,
+                                                  cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateTransform(cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateProofingTransformTHR(cmsContext ContextID,
+                                                  cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsHPROFILE Proofing,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number ProofingIntent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateProofingTransform(cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsHPROFILE Proofing,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number ProofingIntent,
+                                                  cmsUInt32Number dwFlags);
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateMultiprofileTransformTHR(cmsContext ContextID,
+                                                  cmsHPROFILE hProfiles[],
+                                                  cmsUInt32Number nProfiles,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateMultiprofileTransform(cmsHPROFILE hProfiles[],
+                                                  cmsUInt32Number nProfiles,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags);
+
+
+CMSAPI cmsHTRANSFORM    CMSEXPORT cmsCreateExtendedTransform(cmsContext ContextID,
+                                                   cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[],
+                                                   cmsBool  BPC[],
+                                                   cmsUInt32Number Intents[],
+                                                   cmsFloat64Number AdaptationStates[],
+                                                   cmsHPROFILE hGamutProfile,
+                                                   cmsUInt32Number nGamutPCSposition,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsUInt32Number dwFlags);
+
+CMSAPI void             CMSEXPORT cmsDeleteTransform(cmsHTRANSFORM hTransform);
+
+CMSAPI void             CMSEXPORT cmsDoTransform(cmsHTRANSFORM Transform,
+                                                 const void * InputBuffer,
+                                                 void * OutputBuffer,
+                                                 cmsUInt32Number Size);
+
+CMSAPI void             CMSEXPORT cmsDoTransformStride(cmsHTRANSFORM Transform,   // Deprecated
+                                                 const void * InputBuffer,
+                                                 void * OutputBuffer,
+                                                 cmsUInt32Number Size,
+                                                 cmsUInt32Number Stride);
+
+CMSAPI void             CMSEXPORT cmsDoTransformLineStride(cmsHTRANSFORM  Transform,
+                                                 const void* InputBuffer,
+                                                 void* OutputBuffer,
+                                                 cmsUInt32Number PixelsPerLine,
+                                                 cmsUInt32Number LineCount,
+                                                 cmsUInt32Number BytesPerLineIn,
+                                                 cmsUInt32Number BytesPerLineOut,
+                                                 cmsUInt32Number BytesPerPlaneIn,
+                                                 cmsUInt32Number BytesPerPlaneOut);
+
+
+CMSAPI void             CMSEXPORT cmsSetAlarmCodes(const cmsUInt16Number NewAlarm[cmsMAXCHANNELS]);
+CMSAPI void             CMSEXPORT cmsGetAlarmCodes(cmsUInt16Number NewAlarm[cmsMAXCHANNELS]);
+
+
+CMSAPI void             CMSEXPORT cmsSetAlarmCodesTHR(cmsContext ContextID, 
+                                                          const cmsUInt16Number AlarmCodes[cmsMAXCHANNELS]);
+CMSAPI void             CMSEXPORT cmsGetAlarmCodesTHR(cmsContext ContextID, 
+                                                          cmsUInt16Number AlarmCodes[cmsMAXCHANNELS]);
+
+
+
+// Adaptation state for absolute colorimetric intent
+CMSAPI cmsFloat64Number CMSEXPORT cmsSetAdaptationState(cmsFloat64Number d);
+CMSAPI cmsFloat64Number CMSEXPORT cmsSetAdaptationStateTHR(cmsContext ContextID, cmsFloat64Number d);
+
+
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+CMSAPI cmsContext       CMSEXPORT cmsGetTransformContextID(cmsHTRANSFORM hTransform);
+
+// Grab the input/output formats
+CMSAPI cmsUInt32Number CMSEXPORT cmsGetTransformInputFormat(cmsHTRANSFORM hTransform);
+CMSAPI cmsUInt32Number CMSEXPORT cmsGetTransformOutputFormat(cmsHTRANSFORM hTransform);
+
+// For backwards compatibility
+CMSAPI cmsBool          CMSEXPORT cmsChangeBuffersFormat(cmsHTRANSFORM hTransform,
+                                                         cmsUInt32Number InputFormat,
+                                                         cmsUInt32Number OutputFormat);
+
+
+
+// PostScript ColorRenderingDictionary and ColorSpaceArray ----------------------------------------------------
+
+typedef enum { cmsPS_RESOURCE_CSA, cmsPS_RESOURCE_CRD } cmsPSResourceType;
+
+// lcms2 unified method to access postscript color resources
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetPostScriptColorResource(cmsContext ContextID,
+                                                                cmsPSResourceType Type,
+                                                                cmsHPROFILE hProfile,
+                                                                cmsUInt32Number Intent,
+                                                                cmsUInt32Number dwFlags,
+                                                                cmsIOHANDLER* io);
+
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetPostScriptCSA(cmsContext ContextID, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags, void* Buffer, cmsUInt32Number dwBufferLen);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsGetPostScriptCRD(cmsContext ContextID, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags, void* Buffer, cmsUInt32Number dwBufferLen);
+
+
+// IT8.7 / CGATS.17-200x handling -----------------------------------------------------------------------------
+
+CMSAPI cmsHANDLE        CMSEXPORT cmsIT8Alloc(cmsContext ContextID);
+CMSAPI void             CMSEXPORT cmsIT8Free(cmsHANDLE hIT8);
+
+// Tables
+CMSAPI cmsUInt32Number  CMSEXPORT cmsIT8TableCount(cmsHANDLE hIT8);
+CMSAPI cmsInt32Number   CMSEXPORT cmsIT8SetTable(cmsHANDLE hIT8, cmsUInt32Number nTable);
+
+// Persistence
+CMSAPI cmsHANDLE        CMSEXPORT cmsIT8LoadFromFile(cmsContext ContextID, const char* cFileName);
+CMSAPI cmsHANDLE        CMSEXPORT cmsIT8LoadFromMem(cmsContext ContextID, const void *Ptr, cmsUInt32Number len);
+// CMSAPI cmsHANDLE        CMSEXPORT cmsIT8LoadFromIOhandler(cmsContext ContextID, cmsIOHANDLER* io);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SaveToFile(cmsHANDLE hIT8, const char* cFileName);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SaveToMem(cmsHANDLE hIT8, void *MemPtr, cmsUInt32Number* BytesNeeded);
+
+// Properties
+CMSAPI const char*      CMSEXPORT cmsIT8GetSheetType(cmsHANDLE hIT8);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetSheetType(cmsHANDLE hIT8, const char* Type);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetComment(cmsHANDLE hIT8, const char* cComment);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyStr(cmsHANDLE hIT8, const char* cProp, const char *Str);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyDbl(cmsHANDLE hIT8, const char* cProp, cmsFloat64Number Val);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyHex(cmsHANDLE hIT8, const char* cProp, cmsUInt32Number Val);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char* SubKey, const char *Buffer);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetPropertyUncooked(cmsHANDLE hIT8, const char* Key, const char* Buffer);
+
+
+CMSAPI const char*      CMSEXPORT cmsIT8GetProperty(cmsHANDLE hIT8, const char* cProp);
+CMSAPI cmsFloat64Number CMSEXPORT cmsIT8GetPropertyDbl(cmsHANDLE hIT8, const char* cProp);
+CMSAPI const char*      CMSEXPORT cmsIT8GetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char *SubKey);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsIT8EnumProperties(cmsHANDLE hIT8, char ***PropertyNames);
+CMSAPI cmsUInt32Number  CMSEXPORT cmsIT8EnumPropertyMulti(cmsHANDLE hIT8, const char* cProp, const char ***SubpropertyNames);
+
+// Datasets
+CMSAPI const char*      CMSEXPORT cmsIT8GetDataRowCol(cmsHANDLE hIT8, int row, int col);
+CMSAPI cmsFloat64Number CMSEXPORT cmsIT8GetDataRowColDbl(cmsHANDLE hIT8, int row, int col);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataRowCol(cmsHANDLE hIT8, int row, int col,
+                                                const char* Val);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataRowColDbl(cmsHANDLE hIT8, int row, int col,
+                                                cmsFloat64Number Val);
+
+CMSAPI const char*      CMSEXPORT cmsIT8GetData(cmsHANDLE hIT8, const char* cPatch, const char* cSample);
+
+
+CMSAPI cmsFloat64Number CMSEXPORT cmsIT8GetDataDbl(cmsHANDLE hIT8, const char* cPatch, const char* cSample);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetData(cmsHANDLE hIT8, const char* cPatch,
+                                                const char* cSample,
+                                                const char *Val);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataDbl(cmsHANDLE hIT8, const char* cPatch,
+                                                const char* cSample,
+                                                cmsFloat64Number Val);
+
+CMSAPI int              CMSEXPORT cmsIT8FindDataFormat(cmsHANDLE hIT8, const char* cSample);
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetDataFormat(cmsHANDLE hIT8, int n, const char *Sample);
+CMSAPI int              CMSEXPORT cmsIT8EnumDataFormat(cmsHANDLE hIT8, char ***SampleNames);
+
+CMSAPI const char*      CMSEXPORT cmsIT8GetPatchName(cmsHANDLE hIT8, int nPatch, char* buffer);
+CMSAPI int              CMSEXPORT cmsIT8GetPatchByName(cmsHANDLE hIT8, const char *cPatch);
+
+// The LABEL extension
+CMSAPI int              CMSEXPORT cmsIT8SetTableByLabel(cmsHANDLE hIT8, const char* cSet, const char* cField, const char* ExpectedType);
+
+CMSAPI cmsBool          CMSEXPORT cmsIT8SetIndexColumn(cmsHANDLE hIT8, const char* cSample);
+
+// Formatter for double
+CMSAPI void             CMSEXPORT cmsIT8DefineDblFormat(cmsHANDLE hIT8, const char* Formatter);
+
+// Gamut boundary description routines ------------------------------------------------------------------------------
+
+CMSAPI cmsHANDLE        CMSEXPORT cmsGBDAlloc(cmsContext ContextID);
+CMSAPI void             CMSEXPORT cmsGBDFree(cmsHANDLE hGBD);
+CMSAPI cmsBool          CMSEXPORT cmsGDBAddPoint(cmsHANDLE hGBD, const cmsCIELab* Lab);
+CMSAPI cmsBool          CMSEXPORT cmsGDBCompute(cmsHANDLE  hGDB, cmsUInt32Number dwFlags);
+CMSAPI cmsBool          CMSEXPORT cmsGDBCheckPoint(cmsHANDLE hGBD, const cmsCIELab* Lab);
+
+// Feature detection  ----------------------------------------------------------------------------------------------
+
+// Estimate the black point
+CMSAPI cmsBool          CMSEXPORT cmsDetectBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags);
+CMSAPI cmsBool          CMSEXPORT cmsDetectDestinationBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags);
+
+// Estimate total area coverage
+CMSAPI cmsFloat64Number CMSEXPORT cmsDetectTAC(cmsHPROFILE hProfile);
+
+
+// Poor man's gamut mapping
+CMSAPI cmsBool          CMSEXPORT cmsDesaturateLab(cmsCIELab* Lab,
+                                                   double amax, double amin,
+                                                   double bmax, double bmin);
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+    }
+#   endif
+#endif
+
+#define _lcms2_H
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/include/lcms2_plugin.h b/codec/L2/demos/pikEnc/host/third_party/lcms/include/lcms2_plugin.h
new file mode 100755
index 0000000000..17bec42724
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/include/lcms2_plugin.h
@@ -0,0 +1,665 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+// This is the plug-in header file. Normal LittleCMS clients should not use it.
+// It is provided for plug-in writters that may want to access the support
+// functions to do low level operations. All plug-in related structures
+// are defined here. Including this file forces to include the standard API too.
+
+#ifndef _lcms_plugin_H
+
+// Deal with Microsoft's attempt at deprecating C standard runtime functions
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1400)
+#      ifndef _CRT_SECURE_NO_DEPRECATE
+#        define _CRT_SECURE_NO_DEPRECATE
+#      endif
+#      ifndef _CRT_SECURE_NO_WARNINGS
+#        define _CRT_SECURE_NO_WARNINGS
+#      endif
+#    endif
+#endif
+
+#ifndef _lcms2_H
+#include "lcms2.h"
+#endif
+
+// We need some standard C functions.
+#include <stdlib.h>
+#include <math.h>
+#include <stdarg.h>
+#include <memory.h>
+#include <string.h>
+
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+extern "C" {
+#   endif
+#endif
+
+// Vector & Matrix operations -----------------------------------------------------------------------
+
+// Axis of the matrix/array. No specific meaning at all.
+#define VX      0
+#define VY      1
+#define VZ      2
+
+// Vectors
+typedef struct {
+    cmsFloat64Number n[3];
+
+    } cmsVEC3;
+
+// 3x3 Matrix
+typedef struct {
+    cmsVEC3 v[3];
+
+    } cmsMAT3;
+
+CMSAPI void               CMSEXPORT _cmsVEC3init(cmsVEC3* r, cmsFloat64Number x, cmsFloat64Number y, cmsFloat64Number z);
+CMSAPI void               CMSEXPORT _cmsVEC3minus(cmsVEC3* r, const cmsVEC3* a, const cmsVEC3* b);
+CMSAPI void               CMSEXPORT _cmsVEC3cross(cmsVEC3* r, const cmsVEC3* u, const cmsVEC3* v);
+CMSAPI cmsFloat64Number   CMSEXPORT _cmsVEC3dot(const cmsVEC3* u, const cmsVEC3* v);
+CMSAPI cmsFloat64Number   CMSEXPORT _cmsVEC3length(const cmsVEC3* a);
+CMSAPI cmsFloat64Number   CMSEXPORT _cmsVEC3distance(const cmsVEC3* a, const cmsVEC3* b);
+
+CMSAPI void               CMSEXPORT _cmsMAT3identity(cmsMAT3* a);
+CMSAPI cmsBool            CMSEXPORT _cmsMAT3isIdentity(const cmsMAT3* a);
+CMSAPI void               CMSEXPORT _cmsMAT3per(cmsMAT3* r, const cmsMAT3* a, const cmsMAT3* b);
+CMSAPI cmsBool            CMSEXPORT _cmsMAT3inverse(const cmsMAT3* a, cmsMAT3* b);
+CMSAPI cmsBool            CMSEXPORT _cmsMAT3solve(cmsVEC3* x, cmsMAT3* a, cmsVEC3* b);
+CMSAPI void               CMSEXPORT _cmsMAT3eval(cmsVEC3* r, const cmsMAT3* a, const cmsVEC3* v);
+
+
+// Error logging  -------------------------------------------------------------------------------------
+
+CMSAPI void               CMSEXPORT  cmsSignalError(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *ErrorText, ...);
+
+// Memory management ----------------------------------------------------------------------------------
+
+CMSAPI void*              CMSEXPORT _cmsMalloc(cmsContext ContextID, cmsUInt32Number size);
+CMSAPI void*              CMSEXPORT _cmsMallocZero(cmsContext ContextID, cmsUInt32Number size);
+CMSAPI void*              CMSEXPORT _cmsCalloc(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size);
+CMSAPI void*              CMSEXPORT _cmsRealloc(cmsContext ContextID, void* Ptr, cmsUInt32Number NewSize);
+CMSAPI void               CMSEXPORT _cmsFree(cmsContext ContextID, void* Ptr);
+CMSAPI void*              CMSEXPORT _cmsDupMem(cmsContext ContextID, const void* Org, cmsUInt32Number size);
+
+// I/O handler ----------------------------------------------------------------------------------
+
+struct _cms_io_handler {
+
+    void* stream;   // Associated stream, which is implemented differently depending on media.
+
+    cmsContext        ContextID;
+    cmsUInt32Number   UsedSpace;
+    cmsUInt32Number   ReportedSize;
+    char              PhysicalFile[cmsMAX_PATH];
+
+    cmsUInt32Number   (* Read)(struct _cms_io_handler* iohandler, void *Buffer,
+                                                                  cmsUInt32Number size,
+                                                                  cmsUInt32Number count);
+    cmsBool           (* Seek)(struct _cms_io_handler* iohandler, cmsUInt32Number offset);
+    cmsBool           (* Close)(struct _cms_io_handler* iohandler);
+    cmsUInt32Number   (* Tell)(struct _cms_io_handler* iohandler);
+    cmsBool           (* Write)(struct _cms_io_handler* iohandler, cmsUInt32Number size,
+                                                                   const void* Buffer);
+};
+
+// Endianness adjust functions
+CMSAPI cmsUInt16Number   CMSEXPORT  _cmsAdjustEndianess16(cmsUInt16Number Word);
+CMSAPI cmsUInt32Number   CMSEXPORT  _cmsAdjustEndianess32(cmsUInt32Number Value);
+CMSAPI void              CMSEXPORT  _cmsAdjustEndianess64(cmsUInt64Number* Result, cmsUInt64Number* QWord);
+
+// Helper IO functions
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt8Number(cmsIOHANDLER* io,  cmsUInt8Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt16Number(cmsIOHANDLER* io, cmsUInt16Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt32Number(cmsIOHANDLER* io, cmsUInt32Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadFloat32Number(cmsIOHANDLER* io, cmsFloat32Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsRead15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadXYZNumber(cmsIOHANDLER* io, cmsCIEXYZ* XYZ);
+CMSAPI cmsBool           CMSEXPORT  _cmsReadUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, cmsUInt16Number* Array);
+
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt8Number(cmsIOHANDLER* io, cmsUInt8Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt16Number(cmsIOHANDLER* io, cmsUInt16Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt32Number(cmsIOHANDLER* io, cmsUInt32Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteFloat32Number(cmsIOHANDLER* io, cmsFloat32Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWrite15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number n);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteXYZNumber(cmsIOHANDLER* io, const cmsCIEXYZ* XYZ);
+CMSAPI cmsBool           CMSEXPORT  _cmsWriteUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, const cmsUInt16Number* Array);
+
+// ICC base tag
+typedef struct {
+    cmsTagTypeSignature  sig;
+    cmsInt8Number        reserved[4];
+
+} _cmsTagBase;
+
+// Type base helper functions
+CMSAPI cmsTagTypeSignature  CMSEXPORT _cmsReadTypeBase(cmsIOHANDLER* io);
+CMSAPI cmsBool              CMSEXPORT _cmsWriteTypeBase(cmsIOHANDLER* io, cmsTagTypeSignature sig);
+
+// Alignment functions
+CMSAPI cmsBool             CMSEXPORT _cmsReadAlignment(cmsIOHANDLER* io);
+CMSAPI cmsBool             CMSEXPORT _cmsWriteAlignment(cmsIOHANDLER* io);
+
+// To deal with text streams. 2K at most
+CMSAPI cmsBool             CMSEXPORT _cmsIOPrintf(cmsIOHANDLER* io, const char* frm, ...);
+
+// Fixed point helper functions
+CMSAPI cmsFloat64Number    CMSEXPORT _cms8Fixed8toDouble(cmsUInt16Number fixed8);
+CMSAPI cmsUInt16Number     CMSEXPORT _cmsDoubleTo8Fixed8(cmsFloat64Number val);
+
+CMSAPI cmsFloat64Number    CMSEXPORT _cms15Fixed16toDouble(cmsS15Fixed16Number fix32);
+CMSAPI cmsS15Fixed16Number CMSEXPORT _cmsDoubleTo15Fixed16(cmsFloat64Number v);
+
+// Date/time helper functions
+CMSAPI void                CMSEXPORT _cmsEncodeDateTimeNumber(cmsDateTimeNumber *Dest, const struct tm *Source);
+CMSAPI void                CMSEXPORT _cmsDecodeDateTimeNumber(const cmsDateTimeNumber *Source, struct tm *Dest);
+
+//----------------------------------------------------------------------------------------------------------
+
+// Shared callbacks for user data
+typedef void     (* _cmsFreeUserDataFn)(cmsContext ContextID, void* Data);
+typedef void*    (* _cmsDupUserDataFn)(cmsContext ContextID, const void* Data);
+
+//----------------------------------------------------------------------------------------------------------
+
+// Plug-in foundation
+#define cmsPluginMagicNumber                 0x61637070     // 'acpp'
+
+#define cmsPluginMemHandlerSig               0x6D656D48     // 'memH'
+#define cmsPluginInterpolationSig            0x696E7048     // 'inpH'
+#define cmsPluginParametricCurveSig          0x70617248     // 'parH'
+#define cmsPluginFormattersSig               0x66726D48     // 'frmH
+#define cmsPluginTagTypeSig                  0x74797048     // 'typH'
+#define cmsPluginTagSig                      0x74616748     // 'tagH'
+#define cmsPluginRenderingIntentSig          0x696E7448     // 'intH'
+#define cmsPluginMultiProcessElementSig      0x6D706548     // 'mpeH'
+#define cmsPluginOptimizationSig             0x6F707448     // 'optH'
+#define cmsPluginTransformSig                0x7A666D48     // 'xfmH'
+#define cmsPluginMutexSig                    0x6D747A48     // 'mtxH'
+
+typedef struct _cmsPluginBaseStruct {
+
+        cmsUInt32Number                Magic;               // 'acpp' signature
+        cmsUInt32Number                ExpectedVersion;     // Expected version of LittleCMS
+        cmsUInt32Number                Type;                // Type of plug-in
+        struct _cmsPluginBaseStruct*   Next;                // For multiple plugin definition. NULL for end of list.
+
+} cmsPluginBase;
+
+// Maximum number of types in a plugin array
+#define MAX_TYPES_IN_LCMS_PLUGIN    20
+
+//----------------------------------------------------------------------------------------------------------
+
+// Memory handler. Each new plug-in type replaces current behaviour
+
+typedef void* (* _cmsMallocFnPtrType)(cmsContext ContextID, cmsUInt32Number size); 
+typedef void  (* _cmsFreeFnPtrType)(cmsContext ContextID, void *Ptr);
+typedef void* (* _cmsReallocFnPtrType)(cmsContext ContextID, void* Ptr, cmsUInt32Number NewSize);
+
+typedef void* (* _cmsMalloZerocFnPtrType)(cmsContext ContextID, cmsUInt32Number size); 
+typedef void* (* _cmsCallocFnPtrType)(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size);
+typedef void* (* _cmsDupFnPtrType)(cmsContext ContextID, const void* Org, cmsUInt32Number size);
+
+typedef struct {
+
+        cmsPluginBase base;
+
+        // Required
+        _cmsMallocFnPtrType  MallocPtr;
+        _cmsFreeFnPtrType    FreePtr;
+        _cmsReallocFnPtrType ReallocPtr;
+
+        // Optional
+       _cmsMalloZerocFnPtrType MallocZeroPtr;
+       _cmsCallocFnPtrType     CallocPtr;
+       _cmsDupFnPtrType        DupPtr;
+
+} cmsPluginMemHandler;
+
+
+// ------------------------------------------------------------------------------------------------------------------
+
+// Interpolation. 16 bits and floating point versions.
+struct _cms_interp_struc;
+
+// Interpolation callbacks
+
+// 16 bits forward interpolation. This function performs precision-limited linear interpolation
+// and is supposed to be quite fast. Implementation may be tetrahedral or trilinear, and plug-ins may
+// choose to implement any other interpolation algorithm.
+typedef void (* _cmsInterpFn16)(register const cmsUInt16Number Input[],
+                                register cmsUInt16Number Output[],
+                                register const struct _cms_interp_struc* p);
+
+// Floating point forward interpolation. Full precision interpolation using floats. This is not a
+// time critical function. Implementation may be tetrahedral or trilinear, and plug-ins may
+// choose to implement any other interpolation algorithm.
+typedef void (* _cmsInterpFnFloat)(cmsFloat32Number const Input[],
+                                   cmsFloat32Number Output[],
+                                   const struct _cms_interp_struc* p);
+
+
+
+// This type holds a pointer to an interpolator that can be either 16 bits or float
+typedef union {
+    _cmsInterpFn16       Lerp16;            // Forward interpolation in 16 bits
+    _cmsInterpFnFloat    LerpFloat;         // Forward interpolation in floating point
+} cmsInterpFunction;
+
+// Flags for interpolator selection
+#define CMS_LERP_FLAGS_16BITS             0x0000        // The default
+#define CMS_LERP_FLAGS_FLOAT              0x0001        // Requires different implementation
+#define CMS_LERP_FLAGS_TRILINEAR          0x0100        // Hint only
+
+
+#define MAX_INPUT_DIMENSIONS 8
+
+typedef struct _cms_interp_struc {  // Used on all interpolations. Supplied by lcms2 when calling the interpolation function
+
+    cmsContext ContextID;     // The calling thread
+
+    cmsUInt32Number dwFlags;  // Keep original flags
+    cmsUInt32Number nInputs;  // != 1 only in 3D interpolation
+    cmsUInt32Number nOutputs; // != 1 only in 3D interpolation
+
+    cmsUInt32Number nSamples[MAX_INPUT_DIMENSIONS];  // Valid on all kinds of tables
+    cmsUInt32Number Domain[MAX_INPUT_DIMENSIONS];    // Domain = nSamples - 1
+
+    cmsUInt32Number opta[MAX_INPUT_DIMENSIONS];     // Optimization for 3D CLUT. This is the number of nodes premultiplied for each
+                                                    // dimension. For example, in 7 nodes, 7, 7^2 , 7^3, 7^4, etc. On non-regular
+                                                    // Samplings may vary according of the number of nodes for each dimension.
+
+    const void *Table;                // Points to the actual interpolation table
+    cmsInterpFunction Interpolation;  // Points to the function to do the interpolation
+
+ } cmsInterpParams;
+
+// Interpolators factory
+typedef cmsInterpFunction (* cmsInterpFnFactory)(cmsUInt32Number nInputChannels, cmsUInt32Number nOutputChannels, cmsUInt32Number dwFlags);
+
+// The plug-in
+typedef struct {
+    cmsPluginBase base;
+
+    // Points to a user-supplied function which implements the factory
+    cmsInterpFnFactory InterpolatorsFactory;
+
+} cmsPluginInterpolation;
+
+//----------------------------------------------------------------------------------------------------------
+
+// Parametric curves. A negative type means same function but analytically inverted. Max. number of params is 10
+
+// Evaluator callback for user-supplied parametric curves. May implement more than one type
+typedef  cmsFloat64Number (* cmsParametricCurveEvaluator)(cmsInt32Number Type, const cmsFloat64Number Params[10], cmsFloat64Number R);
+
+// Plug-in may implement an arbitrary number of parametric curves
+typedef struct {
+    cmsPluginBase base;
+
+    cmsUInt32Number nFunctions;                                     // Number of supported functions
+    cmsUInt32Number FunctionTypes[MAX_TYPES_IN_LCMS_PLUGIN];        // The identification types
+    cmsUInt32Number ParameterCount[MAX_TYPES_IN_LCMS_PLUGIN];       // Number of parameters for each function
+
+    cmsParametricCurveEvaluator    Evaluator;                       // The evaluator
+
+} cmsPluginParametricCurves;
+//----------------------------------------------------------------------------------------------------------
+
+// Formatters. This plug-in adds new handlers, replacing them if they already exist. Formatters dealing with
+// cmsFloat32Number (bps = 4) or double (bps = 0) types are requested via FormatterFloat callback. Others come across
+// Formatter16 callback
+
+struct _cmstransform_struct;
+
+typedef cmsUInt8Number* (* cmsFormatter16)(register struct _cmstransform_struct* CMMcargo,
+                                           register cmsUInt16Number Values[],
+                                           register cmsUInt8Number* Buffer,
+                                           register cmsUInt32Number Stride);
+
+typedef cmsUInt8Number* (* cmsFormatterFloat)(struct _cmstransform_struct* CMMcargo,
+                                              cmsFloat32Number Values[],
+                                              cmsUInt8Number*  Buffer,
+                                              cmsUInt32Number  Stride);
+
+// This type holds a pointer to a formatter that can be either 16 bits or cmsFloat32Number
+typedef union {
+    cmsFormatter16    Fmt16;
+    cmsFormatterFloat FmtFloat;
+
+} cmsFormatter;
+
+#define CMS_PACK_FLAGS_16BITS       0x0000
+#define CMS_PACK_FLAGS_FLOAT        0x0001
+
+typedef enum { cmsFormatterInput=0, cmsFormatterOutput=1 } cmsFormatterDirection;
+
+typedef cmsFormatter (* cmsFormatterFactory)(cmsUInt32Number Type,           // Specific type, i.e. TYPE_RGB_8
+                                             cmsFormatterDirection Dir,
+                                             cmsUInt32Number dwFlags);      // precision
+
+// Plug-in may implement an arbitrary number of formatters
+typedef struct {
+    cmsPluginBase          base;
+    cmsFormatterFactory    FormattersFactory;
+
+} cmsPluginFormatters;
+
+//----------------------------------------------------------------------------------------------------------
+
+// Tag type handler. Each type is free to return anything it wants, and it is up to the caller to
+// know in advance what is the type contained in the tag.
+typedef struct _cms_typehandler_struct {
+
+        cmsTagTypeSignature Signature;     // The signature of the type
+
+        // Allocates and reads items
+        void *   (* ReadPtr)(struct _cms_typehandler_struct* self,
+                             cmsIOHANDLER*      io,
+                             cmsUInt32Number*   nItems,
+                             cmsUInt32Number    SizeOfTag);
+
+        // Writes n Items
+        cmsBool  (* WritePtr)(struct _cms_typehandler_struct* self,
+                              cmsIOHANDLER*     io,
+                              void*             Ptr,
+                              cmsUInt32Number   nItems);
+
+        // Duplicate an item or array of items
+        void*   (* DupPtr)(struct _cms_typehandler_struct* self,
+                           const void *Ptr,
+                           cmsUInt32Number n);
+
+        // Free all resources
+        void    (* FreePtr)(struct _cms_typehandler_struct* self,
+                            void *Ptr);
+
+        // Additional parameters used by the calling thread
+        cmsContext       ContextID;
+        cmsUInt32Number  ICCVersion;
+
+} cmsTagTypeHandler;
+
+// Each plug-in implements a single type
+typedef struct {
+        cmsPluginBase      base;
+        cmsTagTypeHandler  Handler;
+
+} cmsPluginTagType;
+
+//----------------------------------------------------------------------------------------------------------
+
+// This is the tag plugin, which identifies tags. For writing, a pointer to function is provided.
+// This function should return the desired type for this tag, given the version of profile
+// and the data being serialized.
+typedef struct {
+
+    cmsUInt32Number     ElemCount;          // If this tag needs an array, how many elements should keep
+
+    // For reading.
+    cmsUInt32Number     nSupportedTypes;    // In how many types this tag can come (MAX_TYPES_IN_LCMS_PLUGIN maximum)
+    cmsTagTypeSignature SupportedTypes[MAX_TYPES_IN_LCMS_PLUGIN];
+
+    // For writing
+    cmsTagTypeSignature (* DecideType)(cmsFloat64Number ICCVersion, const void *Data);
+
+} cmsTagDescriptor;
+
+// Plug-in implements a single tag
+typedef struct {
+    cmsPluginBase    base;
+
+    cmsTagSignature  Signature;
+    cmsTagDescriptor Descriptor;
+
+} cmsPluginTag;
+
+//----------------------------------------------------------------------------------------------------------
+
+// Custom intents. This function should join all profiles specified in the array in
+// a single LUT. Any custom intent in the chain redirects to custom function. If more than
+// one custom intent is found, the one located first is invoked. Usually users should use only one
+// custom intent, so mixing custom intents in same multiprofile transform is not supported.
+
+typedef cmsPipeline* (* cmsIntentFn)( cmsContext       ContextID,
+                                      cmsUInt32Number  nProfiles,
+                                      cmsUInt32Number  Intents[],
+                                      cmsHPROFILE      hProfiles[],
+                                      cmsBool          BPC[],
+                                      cmsFloat64Number AdaptationStates[],
+                                      cmsUInt32Number  dwFlags);
+
+
+// Each plug-in defines a single intent number.
+typedef struct {
+    cmsPluginBase     base;
+    cmsUInt32Number   Intent;
+    cmsIntentFn       Link;
+    char              Description[256];
+
+} cmsPluginRenderingIntent;
+
+
+// The default ICC intents (perceptual, saturation, rel.col and abs.col)
+CMSAPI cmsPipeline*  CMSEXPORT _cmsDefaultICCintents(cmsContext       ContextID,
+                                                     cmsUInt32Number  nProfiles,
+                                                     cmsUInt32Number  Intents[],
+                                                     cmsHPROFILE      hProfiles[],
+                                                     cmsBool          BPC[],
+                                                     cmsFloat64Number AdaptationStates[],
+                                                     cmsUInt32Number  dwFlags);
+
+
+//----------------------------------------------------------------------------------------------------------
+
+// Pipelines, Multi Process Elements.
+
+typedef void (* _cmsStageEvalFn)     (const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage* mpe);
+typedef void*(* _cmsStageDupElemFn)  (cmsStage* mpe);
+typedef void (* _cmsStageFreeElemFn) (cmsStage* mpe);
+
+
+// This function allocates a generic MPE
+CMSAPI cmsStage* CMSEXPORT _cmsStageAllocPlaceholder(cmsContext ContextID,
+                                cmsStageSignature     Type,
+                                cmsUInt32Number       InputChannels,
+                                cmsUInt32Number       OutputChannels,
+                                _cmsStageEvalFn       EvalPtr,            // Points to fn that evaluates the element (always in floating point)
+                                _cmsStageDupElemFn    DupElemPtr,         // Points to a fn that duplicates the stage
+                                _cmsStageFreeElemFn   FreePtr,            // Points to a fn that sets the element free
+                                void*                 Data);              // A generic pointer to whatever memory needed by the element
+typedef struct {
+      cmsPluginBase     base;
+      cmsTagTypeHandler Handler;
+
+}  cmsPluginMultiProcessElement;
+
+
+// Data kept in "Element" member of cmsStage
+
+// Curves
+typedef struct {
+    cmsUInt32Number nCurves;
+    cmsToneCurve**  TheCurves;
+
+} _cmsStageToneCurvesData;
+
+// Matrix
+typedef struct {
+    cmsFloat64Number*  Double;          // floating point for the matrix
+    cmsFloat64Number*  Offset;          // The offset
+
+} _cmsStageMatrixData;
+
+// CLUT
+typedef struct {
+
+    union {                       // Can have only one of both representations at same time
+        cmsUInt16Number*  T;      // Points to the table 16 bits table
+        cmsFloat32Number* TFloat; // Points to the cmsFloat32Number table
+
+    } Tab;
+
+    cmsInterpParams* Params;
+    cmsUInt32Number  nEntries;
+    cmsBool          HasFloatValues;
+
+} _cmsStageCLutData;
+
+
+//----------------------------------------------------------------------------------------------------------
+// Optimization. Using this plug-in, additional optimization strategies may be implemented.
+// The function should return TRUE if any optimization is done on the LUT, this terminates
+// the optimization  search. Or FALSE if it is unable to optimize and want to give a chance
+// to the rest of optimizers.
+
+typedef void     (* _cmsOPTeval16Fn)(register const cmsUInt16Number In[],
+                                     register cmsUInt16Number Out[],
+                                     register const void* Data);
+
+
+typedef cmsBool  (* _cmsOPToptimizeFn)(cmsPipeline** Lut,
+                                       cmsUInt32Number  Intent,
+                                       cmsUInt32Number* InputFormat,
+                                       cmsUInt32Number* OutputFormat,
+                                       cmsUInt32Number* dwFlags);
+
+// This function may be used to set the optional evaluator and a block of private data. If private data is being used, an optional
+// duplicator and free functions should also be specified in order to duplicate the LUT construct. Use NULL to inhibit such functionality.
+
+CMSAPI void CMSEXPORT _cmsPipelineSetOptimizationParameters(cmsPipeline* Lut,
+                                               _cmsOPTeval16Fn Eval16,
+                                               void* PrivateData,
+                                               _cmsFreeUserDataFn FreePrivateDataFn,
+                                               _cmsDupUserDataFn DupPrivateDataFn);
+
+typedef struct {
+      cmsPluginBase     base;
+
+      // Optimize entry point
+      _cmsOPToptimizeFn  OptimizePtr;
+
+}  cmsPluginOptimization;
+
+//----------------------------------------------------------------------------------------------------------
+// Full xform
+
+typedef struct {
+       cmsUInt32Number BytesPerLineIn;
+       cmsUInt32Number BytesPerLineOut;
+       cmsUInt32Number BytesPerPlaneIn;
+       cmsUInt32Number BytesPerPlaneOut;
+
+} cmsStride;
+
+typedef void     (* _cmsTransformFn)(struct _cmstransform_struct *CMMcargo,   // Legacy function, handles just ONE scanline.
+                                     const void* InputBuffer,
+                                     void* OutputBuffer,
+                                     cmsUInt32Number Size,
+                                     cmsUInt32Number Stride);                 // Stride in bytes to the next plana in planar formats
+
+
+typedef void     (*_cmsTransform2Fn)(struct _cmstransform_struct *CMMcargo,
+                                     const void* InputBuffer,
+                                     void* OutputBuffer,
+                                     cmsUInt32Number PixelsPerLine,     
+                                     cmsUInt32Number LineCount,          
+                                     const cmsStride* Stride);  
+
+typedef cmsBool  (* _cmsTransformFactory)(_cmsTransformFn* xform,
+                                         void** UserData,
+                                         _cmsFreeUserDataFn* FreePrivateDataFn,
+                                         cmsPipeline** Lut,
+                                         cmsUInt32Number* InputFormat,
+                                         cmsUInt32Number* OutputFormat,
+                                         cmsUInt32Number* dwFlags);
+
+typedef cmsBool  (* _cmsTransform2Factory)(_cmsTransform2Fn* xform,
+                                         void** UserData,
+                                         _cmsFreeUserDataFn* FreePrivateDataFn,
+                                         cmsPipeline** Lut,
+                                         cmsUInt32Number* InputFormat,
+                                         cmsUInt32Number* OutputFormat,
+                                         cmsUInt32Number* dwFlags);
+
+
+// Retrieve user data as specified by the factory
+CMSAPI void   CMSEXPORT _cmsSetTransformUserData(struct _cmstransform_struct *CMMcargo, void* ptr, _cmsFreeUserDataFn FreePrivateDataFn);
+CMSAPI void * CMSEXPORT _cmsGetTransformUserData(struct _cmstransform_struct *CMMcargo);
+
+
+// Retrieve formatters
+CMSAPI void   CMSEXPORT _cmsGetTransformFormatters16   (struct _cmstransform_struct *CMMcargo, cmsFormatter16* FromInput, cmsFormatter16* ToOutput);
+CMSAPI void   CMSEXPORT _cmsGetTransformFormattersFloat(struct _cmstransform_struct *CMMcargo, cmsFormatterFloat* FromInput, cmsFormatterFloat* ToOutput);
+
+typedef struct {
+      cmsPluginBase     base;
+
+      // Transform entry point
+      union {
+             _cmsTransformFactory        legacy_xform;
+             _cmsTransform2Factory       xform;
+      } factories;
+
+}  cmsPluginTransform;
+
+//----------------------------------------------------------------------------------------------------------
+// Mutex 
+
+typedef void*    (* _cmsCreateMutexFnPtrType)(cmsContext ContextID);
+typedef void     (* _cmsDestroyMutexFnPtrType)(cmsContext ContextID, void* mtx);
+typedef cmsBool  (* _cmsLockMutexFnPtrType)(cmsContext ContextID, void* mtx);
+typedef void     (* _cmsUnlockMutexFnPtrType)(cmsContext ContextID, void* mtx);
+
+typedef struct {
+      cmsPluginBase     base;
+
+     _cmsCreateMutexFnPtrType  CreateMutexPtr;
+     _cmsDestroyMutexFnPtrType DestroyMutexPtr;
+     _cmsLockMutexFnPtrType    LockMutexPtr;
+     _cmsUnlockMutexFnPtrType  UnlockMutexPtr;
+
+}  cmsPluginMutex;
+
+CMSAPI void*   CMSEXPORT _cmsCreateMutex(cmsContext ContextID);
+CMSAPI void    CMSEXPORT _cmsDestroyMutex(cmsContext ContextID, void* mtx);
+CMSAPI cmsBool CMSEXPORT _cmsLockMutex(cmsContext ContextID, void* mtx);
+CMSAPI void    CMSEXPORT _cmsUnlockMutex(cmsContext ContextID, void* mtx);
+
+
+#ifndef CMS_USE_CPP_API
+#   ifdef __cplusplus
+    }
+#   endif
+#endif
+
+#define _lcms_plugin_H
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/Makefile.am b/codec/L2/demos/pikEnc/host/third_party/lcms/src/Makefile.am
new file mode 100755
index 0000000000..1d7ded8c39
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/Makefile.am
@@ -0,0 +1,31 @@
+#
+# Makefile for building lcms 2 library
+#
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+
+# CFLAGS = -pedantic -Wall -std=c99 -O3
+
+includedir = ${prefix}/include
+
+# Shared libraries built in this directory
+lib_LTLIBRARIES = liblcms2.la
+
+LIBRARY_CURRENT    = @LIBRARY_CURRENT@
+LIBRARY_REVISION   = @LIBRARY_REVISION@
+LIBRARY_AGE        = @LIBRARY_AGE@
+
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
+
+liblcms2_la_LDFLAGS = -no-undefined \
+  -version-info $(LIBRARY_CURRENT):$(LIBRARY_REVISION):$(LIBRARY_AGE)
+
+liblcms2_la_LIBADD = $(LCMS_LIB_DEPLIBS)
+
+liblcms2_la_SOURCES = \
+  cmscnvrt.c cmserr.c cmsgamma.c cmsgmt.c cmsintrp.c cmsio0.c cmsio1.c cmslut.c \
+  cmsplugin.c cmssm.c cmsmtrx.c cmspack.c cmspcs.c cmswtpnt.c cmsxform.c \
+  cmssamp.c cmsnamed.c cmscam02.c cmsvirt.c cmstypes.c cmscgats.c cmsps2.c cmsopt.c \
+  cmshalf.c cmsalpha.c lcms2_internal.h
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/Makefile.in b/codec/L2/demos/pikEnc/host/third_party/lcms/src/Makefile.in
new file mode 100755
index 0000000000..5d699a0b86
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/Makefile.in
@@ -0,0 +1,723 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building lcms 2 library
+#
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(libdir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+liblcms2_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
+am_liblcms2_la_OBJECTS = cmscnvrt.lo cmserr.lo cmsgamma.lo cmsgmt.lo \
+	cmsintrp.lo cmsio0.lo cmsio1.lo cmslut.lo cmsplugin.lo \
+	cmssm.lo cmsmtrx.lo cmspack.lo cmspcs.lo cmswtpnt.lo \
+	cmsxform.lo cmssamp.lo cmsnamed.lo cmscam02.lo cmsvirt.lo \
+	cmstypes.lo cmscgats.lo cmsps2.lo cmsopt.lo cmshalf.lo \
+	cmsalpha.lo
+liblcms2_la_OBJECTS = $(am_liblcms2_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+liblcms2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(liblcms2_la_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(liblcms2_la_SOURCES)
+DIST_SOURCES = $(liblcms2_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+
+# CFLAGS = -pedantic -Wall -std=c99 -O3
+includedir = ${prefix}/include
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+
+# Shared libraries built in this directory
+lib_LTLIBRARIES = liblcms2.la
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
+liblcms2_la_LDFLAGS = -no-undefined \
+  -version-info $(LIBRARY_CURRENT):$(LIBRARY_REVISION):$(LIBRARY_AGE)
+
+liblcms2_la_LIBADD = $(LCMS_LIB_DEPLIBS)
+liblcms2_la_SOURCES = \
+  cmscnvrt.c cmserr.c cmsgamma.c cmsgmt.c cmsintrp.c cmsio0.c cmsio1.c cmslut.c \
+  cmsplugin.c cmssm.c cmsmtrx.c cmspack.c cmspcs.c cmswtpnt.c cmsxform.c \
+  cmssamp.c cmsnamed.c cmscam02.c cmsvirt.c cmstypes.c cmscgats.c cmsps2.c cmsopt.c \
+  cmshalf.c cmsalpha.c lcms2_internal.h
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign src/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+	}
+
+uninstall-libLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  $(am__strip_dir) \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+	done
+
+clean-libLTLIBRARIES:
+	-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+	@list='$(lib_LTLIBRARIES)'; \
+	locs=`for p in $$list; do echo $$p; done | \
+	      sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+	      sort -u`; \
+	test -z "$$locs" || { \
+	  echo rm -f $${locs}; \
+	  rm -f $${locs}; \
+	}
+
+liblcms2.la: $(liblcms2_la_OBJECTS) $(liblcms2_la_DEPENDENCIES) $(EXTRA_liblcms2_la_DEPENDENCIES) 
+	$(AM_V_CCLD)$(liblcms2_la_LINK) -rpath $(libdir) $(liblcms2_la_OBJECTS) $(liblcms2_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsalpha.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmscam02.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmscgats.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmscnvrt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmserr.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsgamma.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsgmt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmshalf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsintrp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsio0.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsio1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmslut.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsmtrx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsnamed.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsopt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmspack.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmspcs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsplugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsps2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmssamp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmssm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmstypes.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsvirt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmswtpnt.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmsxform.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@	$(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+	for dir in "$(DESTDIR)$(libdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+	mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \
+	ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-libLTLIBRARIES install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsalpha.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsalpha.cpp
new file mode 100755
index 0000000000..6dae644e31
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsalpha.cpp
@@ -0,0 +1,559 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Alpha copy ------------------------------------------------------------------------------------------------------------------
+
+// Floor to byte, taking care of saturation
+cmsINLINE cmsUInt8Number _cmsQuickSaturateByte(cmsFloat64Number d)
+{
+       d += 0.5;
+       if (d <= 0) return 0;
+       if (d >= 255.0) return 255;
+
+       return (cmsUInt8Number) _cmsQuickFloorWord(d);
+}
+
+
+// Return the size in bytes of a given formatter
+static
+cmsUInt32Number trueBytesSize(cmsUInt32Number Format)
+{
+    cmsUInt32Number fmt_bytes = T_BYTES(Format);
+
+    // For double, the T_BYTES field returns zero
+    if (fmt_bytes == 0)
+        return sizeof(double);
+
+    // Otherwise, it is already correct for all formats
+    return fmt_bytes;
+}
+
+
+// Several format converters
+
+typedef void(*cmsFormatterAlphaFn)(void* dst, const void* src);
+
+
+// From 8
+
+static
+void copy8(void* dst, const void* src)
+{
+       memmove(dst, src, 1);
+}
+
+static
+void from8to16(void* dst, const void* src)
+{
+       cmsUInt8Number n = *(cmsUInt8Number*)src;
+       *(cmsUInt16Number*) dst = FROM_8_TO_16(n);
+}
+
+static
+void from8toFLT(void* dst, const void* src)
+{
+       *(cmsFloat32Number*)dst = (*(cmsUInt8Number*)src) / 255.0f;
+}
+
+static
+void from8toDBL(void* dst, const void* src)
+{
+       *(cmsFloat64Number*)dst = (*(cmsUInt8Number*)src) / 255.0;
+}
+
+static
+void from8toHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = (*(cmsUInt8Number*)src) / 255.0f;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+// From 16
+
+static
+void from16to8(void* dst, const void* src)
+{
+       cmsUInt16Number n = *(cmsUInt16Number*)src;
+       *(cmsUInt8Number*) dst = FROM_16_TO_8(n);
+}
+
+static
+void copy16(void* dst, const void* src)
+{
+       memmove(dst, src, 2);
+}
+
+void from16toFLT(void* dst, const void* src)
+{
+       *(cmsFloat32Number*)dst = (*(cmsUInt16Number*)src) / 65535.0f;
+}
+
+void from16toDBL(void* dst, const void* src)
+{
+       *(cmsFloat64Number*)dst = (*(cmsUInt16Number*)src) / 65535.0f;
+}
+
+static
+void from16toHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = (*(cmsUInt16Number*)src) / 65535.0f;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+// From Float
+
+static 
+void fromFLTto8(void* dst, const void* src)
+{
+       cmsFloat32Number n = *(cmsFloat32Number*)src;   
+       *(cmsUInt8Number*)dst = _cmsQuickSaturateByte(n * 255.0f);
+}
+
+static
+void fromFLTto16(void* dst, const void* src)
+{
+       cmsFloat32Number n = *(cmsFloat32Number*)src;      
+       *(cmsUInt16Number*)dst = _cmsQuickSaturateWord(n * 65535.0f);
+}
+
+static
+void copy32(void* dst, const void* src)
+{
+       memmove(dst, src, sizeof(cmsFloat32Number));
+}
+
+static
+void fromFLTtoDBL(void* dst, const void* src)
+{
+       cmsFloat32Number n = *(cmsFloat32Number*)src;
+       *(cmsFloat64Number*)dst = (cmsFloat64Number)n;
+}
+
+static
+void fromFLTtoHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = *(cmsFloat32Number*)src;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+
+// From HALF
+
+static
+void fromHLFto8(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = _cmsHalf2Float(*(cmsUInt16Number*)src);
+       *(cmsUInt8Number*)dst = _cmsQuickSaturateByte(n * 255.0f);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+
+}
+
+static
+void fromHLFto16(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = _cmsHalf2Float(*(cmsUInt16Number*)src);
+       *(cmsUInt16Number*)dst = _cmsQuickSaturateWord(n * 65535.0f);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void fromHLFtoFLT(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       *(cmsFloat32Number*)dst = _cmsHalf2Float(*(cmsUInt16Number*)src);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void fromHLFtoDBL(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       *(cmsFloat64Number*)dst = (cmsFloat64Number)_cmsHalf2Float(*(cmsUInt16Number*)src);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+// From double
+static
+void fromDBLto8(void* dst, const void* src)
+{
+       cmsFloat64Number n = *(cmsFloat64Number*)src;
+       *(cmsUInt8Number*)dst = _cmsQuickSaturateByte(n * 255.0);
+}
+
+static
+void fromDBLto16(void* dst, const void* src)
+{
+       cmsFloat64Number n = *(cmsFloat64Number*)src;
+       *(cmsUInt16Number*)dst = _cmsQuickSaturateWord(n * 65535.0f);
+}
+
+static
+void fromDBLtoFLT(void* dst, const void* src)
+{
+       cmsFloat64Number n = *(cmsFloat64Number*)src;
+       *(cmsFloat32Number*)dst = (cmsFloat32Number) n;
+}
+
+static
+void fromDBLtoHLF(void* dst, const void* src)
+{
+#ifndef CMS_NO_HALF_SUPPORT
+       cmsFloat32Number n = (cmsFloat32Number) *(cmsFloat64Number*)src;
+       *(cmsUInt16Number*)dst = _cmsFloat2Half(n);
+#else
+    cmsUNUSED_PARAMETER(dst);
+    cmsUNUSED_PARAMETER(src);
+#endif
+}
+
+static
+void copy64(void* dst, const void* src)
+{
+       memmove(dst, src, sizeof(cmsFloat64Number));
+}
+
+
+// Returns the position (x or y) of the formatter in the table of functions
+static
+int FormatterPos(cmsUInt32Number frm)
+{
+    cmsUInt32Number  b = T_BYTES(frm);
+
+    if (b == 0 && T_FLOAT(frm))
+        return 4; // DBL
+#ifndef CMS_NO_HALF_SUPPORT
+    if (b == 2 && T_FLOAT(frm))
+        return 2; // HLF
+#endif
+    if (b == 4 && T_FLOAT(frm))
+        return 3; // FLT
+    if (b == 2 && !T_FLOAT(frm))
+        return 1; // 16
+    if (b == 1 && !T_FLOAT(frm))
+        return 0; // 8
+
+    return -1; // not recognized
+}
+
+// Obtains a alpha-to-alpha funmction formatter
+static
+cmsFormatterAlphaFn _cmsGetFormatterAlpha(cmsContext id, cmsUInt32Number in, cmsUInt32Number out)
+{
+static cmsFormatterAlphaFn FormattersAlpha[5][5] = {
+
+       /* from 8 */  { copy8,      from8to16,   from8toHLF,   from8toFLT,   from8toDBL   },
+       /* from 16*/  { from16to8,  copy16,      from16toHLF,  from16toFLT,  from16toDBL  },
+       /* from HLF*/ { fromHLFto8, fromHLFto16, copy16,       fromHLFtoFLT, fromHLFtoDBL },
+       /* from FLT*/ { fromFLTto8, fromFLTto16, fromFLTtoHLF, copy32,       fromFLTtoDBL },
+       /* from DBL*/ { fromDBLto8, fromDBLto16, fromDBLtoHLF, fromDBLtoFLT, copy64 }};
+
+        int in_n  = FormatterPos(in);
+        int out_n = FormatterPos(out);
+
+        if (in_n < 0 || out_n < 0 || in_n > 4 || out_n > 4) {
+
+               cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "Unrecognized alpha channel width");
+               return NULL;
+        }
+
+        return FormattersAlpha[in_n][out_n];
+}
+
+
+
+// This function computes the distance from each component to the next one in bytes. 
+static
+void ComputeIncrementsForChunky(cmsUInt32Number Format,                                 
+                                cmsUInt32Number ComponentStartingOrder[], 
+                                cmsUInt32Number ComponentPointerIncrements[])
+{
+       cmsUInt32Number channels[cmsMAXCHANNELS];
+       cmsUInt32Number extra = T_EXTRA(Format);
+       cmsUInt32Number nchannels = T_CHANNELS(Format);
+       cmsUInt32Number total_chans = nchannels + extra;
+       cmsUInt32Number i;
+       cmsUInt32Number channelSize = trueBytesSize(Format);
+       cmsUInt32Number pixelSize = channelSize * total_chans;
+       
+	   // Sanity check
+	   if (total_chans <= 0 || total_chans >= cmsMAXCHANNELS)
+		   return;
+
+        memset(channels, 0, sizeof(channels));
+
+       // Separation is independent of starting point and only depends on channel size
+       for (i = 0; i < extra; i++)
+              ComponentPointerIncrements[i] = pixelSize;
+
+       // Handle do swap
+       for (i = 0; i < total_chans; i++)
+       {
+              if (T_DOSWAP(Format)) {
+                     channels[i] = total_chans - i - 1;
+              }
+              else {
+                     channels[i] = i;
+              }
+       }
+
+       // Handle swap first (ROL of positions), example CMYK -> KCMY | 0123 -> 3012
+       if (T_SWAPFIRST(Format) && total_chans > 1) {
+              
+              cmsUInt32Number tmp = channels[0];
+              for (i = 0; i < total_chans-1; i++)
+                     channels[i] = channels[i + 1];
+
+              channels[total_chans - 1] = tmp;
+       }
+
+       // Handle size
+       if (channelSize > 1)
+              for (i = 0; i < total_chans; i++) {
+                     channels[i] *= channelSize;
+              }
+
+       for (i = 0; i < extra; i++)
+              ComponentStartingOrder[i] = channels[i + nchannels];
+}
+
+
+
+//  On planar configurations, the distance is the stride added to any non-negative
+static
+void ComputeIncrementsForPlanar(cmsUInt32Number Format, 
+                                cmsUInt32Number BytesPerPlane,
+                                cmsUInt32Number ComponentStartingOrder[], 
+                                cmsUInt32Number ComponentPointerIncrements[])
+{
+       cmsUInt32Number channels[cmsMAXCHANNELS];       
+       cmsUInt32Number extra = T_EXTRA(Format);
+       cmsUInt32Number nchannels = T_CHANNELS(Format);
+       cmsUInt32Number total_chans = nchannels + extra;
+       cmsUInt32Number i;
+       cmsUInt32Number channelSize = trueBytesSize(Format);
+      
+       // Sanity check
+       if (total_chans <= 0 || total_chans >= cmsMAXCHANNELS)
+           return;
+
+       memset(channels, 0, sizeof(channels));
+
+       // Separation is independent of starting point and only depends on channel size
+       for (i = 0; i < extra; i++)
+              ComponentPointerIncrements[i] = channelSize;
+
+       // Handle do swap
+       for (i = 0; i < total_chans; i++)
+       {
+              if (T_DOSWAP(Format)) {
+                     channels[i] = total_chans - i - 1;
+              }
+              else {
+                     channels[i] = i;
+              }
+       }
+
+       // Handle swap first (ROL of positions), example CMYK -> KCMY | 0123 -> 3012
+       if (T_SWAPFIRST(Format) && total_chans > 0) {
+
+              cmsUInt32Number tmp = channels[0];
+              for (i = 0; i < total_chans - 1; i++)
+                     channels[i] = channels[i + 1];
+
+              channels[total_chans - 1] = tmp;
+       }
+
+       // Handle size
+       for (i = 0; i < total_chans; i++) {
+              channels[i] *= BytesPerPlane;
+       }
+
+       for (i = 0; i < extra; i++)
+              ComponentStartingOrder[i] = channels[i + nchannels];
+}
+
+
+
+// Dispatcher por chunky and planar RGB
+static
+void  ComputeComponentIncrements(cmsUInt32Number Format,
+                                 cmsUInt32Number BytesPerPlane,
+                                 cmsUInt32Number ComponentStartingOrder[], 
+                                 cmsUInt32Number ComponentPointerIncrements[])
+{
+       if (T_PLANAR(Format)) {
+
+              ComputeIncrementsForPlanar(Format,  BytesPerPlane, ComponentStartingOrder, ComponentPointerIncrements);
+       }
+       else {
+              ComputeIncrementsForChunky(Format,  ComponentStartingOrder, ComponentPointerIncrements);
+       }
+
+}
+
+
+
+// Handles extra channels copying alpha if requested by the flags
+void _cmsHandleExtraChannels(_cmsTRANSFORM* p, const void* in,
+                                               void* out,
+                                               cmsUInt32Number PixelsPerLine,
+                                               cmsUInt32Number LineCount,
+                                               const cmsStride* Stride)
+{
+    cmsUInt32Number i, j, k;
+    cmsUInt32Number nExtra;
+    cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS];
+    cmsUInt32Number SourceIncrements[cmsMAXCHANNELS];
+    cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS];
+    cmsUInt32Number DestIncrements[cmsMAXCHANNELS];
+
+    cmsFormatterAlphaFn copyValueFn;
+
+    // Make sure we need some copy
+    if (!(p->dwOriginalFlags & cmsFLAGS_COPY_ALPHA))
+        return;
+
+    // Exit early if in-place color-management is occurring - no need to copy extra channels to themselves.
+    if (p->InputFormat == p->OutputFormat && in == out)
+        return;
+
+    // Make sure we have same number of alpha channels. If not, just return as this should be checked at transform creation time.
+    nExtra = T_EXTRA(p->InputFormat);
+    if (nExtra != T_EXTRA(p->OutputFormat))
+        return;
+
+    // Anything to do?
+    if (nExtra == 0)
+        return;
+
+    // Compute the increments 
+    ComputeComponentIncrements(p->InputFormat, Stride->BytesPerPlaneIn, SourceStartingOrder, SourceIncrements);
+    ComputeComponentIncrements(p->OutputFormat, Stride->BytesPerPlaneOut, DestStartingOrder, DestIncrements);
+
+    // Check for conversions 8, 16, half, float, dbl
+    copyValueFn = _cmsGetFormatterAlpha(p->ContextID, p->InputFormat, p->OutputFormat);
+
+    if (nExtra == 1) { // Optimized routine for copying a single extra channel quickly
+
+        cmsUInt8Number* SourcePtr;
+        cmsUInt8Number* DestPtr;
+
+        cmsUInt32Number SourceStrideIncrement = 0;
+        cmsUInt32Number DestStrideIncrement = 0;
+
+        // The loop itself
+        for (i = 0; i < LineCount; i++) {
+
+            // Prepare pointers for the loop
+            SourcePtr = (cmsUInt8Number*)in + SourceStartingOrder[0] + SourceStrideIncrement;
+            DestPtr = (cmsUInt8Number*)out + DestStartingOrder[0] + DestStrideIncrement;
+
+            for (j = 0; j < PixelsPerLine; j++) {
+
+                copyValueFn(DestPtr, SourcePtr);
+
+                SourcePtr += SourceIncrements[0];
+                DestPtr += DestIncrements[0];
+            }
+
+            SourceStrideIncrement += Stride->BytesPerLineIn;
+            DestStrideIncrement += Stride->BytesPerLineOut;
+        }
+
+    }
+    else { // General case with more than one extra channel
+
+        cmsUInt8Number* SourcePtr[cmsMAXCHANNELS];
+        cmsUInt8Number* DestPtr[cmsMAXCHANNELS];
+
+        cmsUInt32Number SourceStrideIncrements[cmsMAXCHANNELS];
+        cmsUInt32Number DestStrideIncrements[cmsMAXCHANNELS];
+
+        memset(SourceStrideIncrements, 0, sizeof(SourceStrideIncrements));
+        memset(DestStrideIncrements, 0, sizeof(DestStrideIncrements));
+
+        // The loop itself       
+        for (i = 0; i < LineCount; i++) {
+
+            // Prepare pointers for the loop
+            for (j = 0; j < nExtra; j++) {
+
+                SourcePtr[j] = (cmsUInt8Number*)in + SourceStartingOrder[j] + SourceStrideIncrements[j];
+                DestPtr[j] = (cmsUInt8Number*)out + DestStartingOrder[j] + DestStrideIncrements[j];
+            }
+
+            for (j = 0; j < PixelsPerLine; j++) {
+
+                for (k = 0; k < nExtra; k++) {
+
+                    copyValueFn(DestPtr[k], SourcePtr[k]);
+
+                    SourcePtr[k] += SourceIncrements[k];
+                    DestPtr[k] += DestIncrements[k];
+                }
+            }
+
+            for (j = 0; j < nExtra; j++) {
+
+                SourceStrideIncrements[j] += Stride->BytesPerLineIn;
+                DestStrideIncrements[j] += Stride->BytesPerLineOut;
+            }
+        }
+    }
+}
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmscam02.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmscam02.cpp
new file mode 100755
index 0000000000..9cc49fbf20
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmscam02.cpp
@@ -0,0 +1,486 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// CIECAM 02 appearance model. Many thanks to Jordi Vilar for the debugging.
+
+// ---------- Implementation --------------------------------------------
+
+typedef struct  {
+
+    cmsFloat64Number XYZ[3];
+    cmsFloat64Number RGB[3];
+    cmsFloat64Number RGBc[3];
+    cmsFloat64Number RGBp[3];
+    cmsFloat64Number RGBpa[3];
+    cmsFloat64Number a, b, h, e, H, A, J, Q, s, t, C, M;
+    cmsFloat64Number abC[2];
+    cmsFloat64Number abs[2];
+    cmsFloat64Number abM[2];
+
+} CAM02COLOR;
+
+typedef struct  {
+
+    CAM02COLOR adoptedWhite;
+    cmsFloat64Number LA, Yb;
+    cmsFloat64Number F, c, Nc;
+    cmsUInt32Number surround;
+    cmsFloat64Number n, Nbb, Ncb, z, FL, D;
+
+    cmsContext ContextID;
+
+} cmsCIECAM02;
+
+
+static
+cmsFloat64Number compute_n(cmsCIECAM02* pMod)
+{
+    return (pMod -> Yb / pMod -> adoptedWhite.XYZ[1]);
+}
+
+static
+cmsFloat64Number compute_z(cmsCIECAM02* pMod)
+{
+    return (1.48 + pow(pMod -> n, 0.5));
+}
+
+static
+cmsFloat64Number computeNbb(cmsCIECAM02* pMod)
+{
+    return (0.725 * pow((1.0 / pMod -> n), 0.2));
+}
+
+static
+cmsFloat64Number computeFL(cmsCIECAM02* pMod)
+{
+    cmsFloat64Number k, FL;
+
+    k = 1.0 / ((5.0 * pMod->LA) + 1.0);
+    FL = 0.2 * pow(k, 4.0) * (5.0 * pMod->LA) + 0.1 *
+        (pow((1.0 - pow(k, 4.0)), 2.0)) *
+        (pow((5.0 * pMod->LA), (1.0 / 3.0)));
+
+    return FL;
+}
+
+static
+cmsFloat64Number computeD(cmsCIECAM02* pMod)
+{
+    cmsFloat64Number D;
+
+    D = pMod->F - (1.0/3.6)*(exp(((-pMod ->LA-42) / 92.0)));
+
+    return D;
+}
+
+
+static
+CAM02COLOR XYZtoCAT02(CAM02COLOR clr)
+{
+    clr.RGB[0] = (clr.XYZ[0] *  0.7328) + (clr.XYZ[1] *  0.4296) + (clr.XYZ[2] * -0.1624);
+    clr.RGB[1] = (clr.XYZ[0] * -0.7036) + (clr.XYZ[1] *  1.6975) + (clr.XYZ[2] *  0.0061);
+    clr.RGB[2] = (clr.XYZ[0] *  0.0030) + (clr.XYZ[1] *  0.0136) + (clr.XYZ[2] *  0.9834);
+
+    return clr;
+}
+
+static
+CAM02COLOR ChromaticAdaptation(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+
+    for (i = 0; i < 3; i++) {
+        clr.RGBc[i] = ((pMod -> adoptedWhite.XYZ[1] *
+            (pMod->D / pMod -> adoptedWhite.RGB[i])) +
+            (1.0 - pMod->D)) * clr.RGB[i];
+    }
+
+    return clr;
+}
+
+
+static
+CAM02COLOR CAT02toHPE(CAM02COLOR clr)
+{
+    cmsFloat64Number M[9];
+
+    M[0] =(( 0.38971 *  1.096124) + (0.68898 * 0.454369) + (-0.07868 * -0.009628));
+    M[1] =(( 0.38971 * -0.278869) + (0.68898 * 0.473533) + (-0.07868 * -0.005698));
+    M[2] =(( 0.38971 *  0.182745) + (0.68898 * 0.072098) + (-0.07868 *  1.015326));
+    M[3] =((-0.22981 *  1.096124) + (1.18340 * 0.454369) + ( 0.04641 * -0.009628));
+    M[4] =((-0.22981 * -0.278869) + (1.18340 * 0.473533) + ( 0.04641 * -0.005698));
+    M[5] =((-0.22981 *  0.182745) + (1.18340 * 0.072098) + ( 0.04641 *  1.015326));
+    M[6] =(-0.009628);
+    M[7] =(-0.005698);
+    M[8] =( 1.015326);
+
+    clr.RGBp[0] = (clr.RGBc[0] * M[0]) +  (clr.RGBc[1] * M[1]) + (clr.RGBc[2] * M[2]);
+    clr.RGBp[1] = (clr.RGBc[0] * M[3]) +  (clr.RGBc[1] * M[4]) + (clr.RGBc[2] * M[5]);
+    clr.RGBp[2] = (clr.RGBc[0] * M[6]) +  (clr.RGBc[1] * M[7]) + (clr.RGBc[2] * M[8]);
+
+    return  clr;
+}
+
+static
+CAM02COLOR NonlinearCompression(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+    cmsFloat64Number temp;
+
+    for (i = 0; i < 3; i++) {
+        if (clr.RGBp[i] < 0) {
+
+            temp = pow((-1.0 * pMod->FL * clr.RGBp[i] / 100.0), 0.42);
+            clr.RGBpa[i] = (-1.0 * 400.0 * temp) / (temp + 27.13) + 0.1;
+        }
+        else {
+            temp = pow((pMod->FL * clr.RGBp[i] / 100.0), 0.42);
+            clr.RGBpa[i] = (400.0 * temp) / (temp + 27.13) + 0.1;
+        }
+    }
+
+    clr.A = (((2.0 * clr.RGBpa[0]) + clr.RGBpa[1] +
+        (clr.RGBpa[2] / 20.0)) - 0.305) * pMod->Nbb;
+
+    return clr;
+}
+
+static
+CAM02COLOR ComputeCorrelates(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsFloat64Number a, b, temp, e, t, r2d, d2r;
+
+    a = clr.RGBpa[0] - (12.0 * clr.RGBpa[1] / 11.0) + (clr.RGBpa[2] / 11.0);
+    b = (clr.RGBpa[0] + clr.RGBpa[1] - (2.0 * clr.RGBpa[2])) / 9.0;
+
+    r2d = (180.0 / 3.141592654);
+    if (a == 0) {
+        if (b == 0)     clr.h = 0;
+        else if (b > 0) clr.h = 90;
+        else            clr.h = 270;
+    }
+    else if (a > 0) {
+        temp = b / a;
+        if (b > 0)       clr.h = (r2d * atan(temp));
+        else if (b == 0) clr.h = 0;
+        else             clr.h = (r2d * atan(temp)) + 360;
+    }
+    else {
+        temp = b / a;
+        clr.h = (r2d * atan(temp)) + 180;
+    }
+
+    d2r = (3.141592654 / 180.0);
+    e = ((12500.0 / 13.0) * pMod->Nc * pMod->Ncb) *
+        (cos((clr.h * d2r + 2.0)) + 3.8);
+
+    if (clr.h < 20.14) {
+        temp = ((clr.h + 122.47)/1.2) + ((20.14 - clr.h)/0.8);
+        clr.H = 300 + (100*((clr.h + 122.47)/1.2)) / temp;
+    }
+    else if (clr.h < 90.0) {
+        temp = ((clr.h - 20.14)/0.8) + ((90.00 - clr.h)/0.7);
+        clr.H = (100*((clr.h - 20.14)/0.8)) / temp;
+    }
+    else if (clr.h < 164.25) {
+        temp = ((clr.h - 90.00)/0.7) + ((164.25 - clr.h)/1.0);
+        clr.H = 100 + ((100*((clr.h - 90.00)/0.7)) / temp);
+    }
+    else if (clr.h < 237.53) {
+        temp = ((clr.h - 164.25)/1.0) + ((237.53 - clr.h)/1.2);
+        clr.H = 200 + ((100*((clr.h - 164.25)/1.0)) / temp);
+    }
+    else {
+        temp = ((clr.h - 237.53)/1.2) + ((360 - clr.h + 20.14)/0.8);
+        clr.H = 300 + ((100*((clr.h - 237.53)/1.2)) / temp);
+    }
+
+    clr.J = 100.0 * pow((clr.A / pMod->adoptedWhite.A),
+        (pMod->c * pMod->z));
+
+    clr.Q = (4.0 / pMod->c) * pow((clr.J / 100.0), 0.5) *
+        (pMod->adoptedWhite.A + 4.0) * pow(pMod->FL, 0.25);
+
+    t = (e * pow(((a * a) + (b * b)), 0.5)) /
+        (clr.RGBpa[0] + clr.RGBpa[1] +
+        ((21.0 / 20.0) * clr.RGBpa[2]));
+
+    clr.C = pow(t, 0.9) * pow((clr.J / 100.0), 0.5) *
+        pow((1.64 - pow(0.29, pMod->n)), 0.73);
+
+    clr.M = clr.C * pow(pMod->FL, 0.25);
+    clr.s = 100.0 * pow((clr.M / clr.Q), 0.5);
+
+    return clr;
+}
+
+
+static
+CAM02COLOR InverseCorrelates(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+
+    cmsFloat64Number t, e, p1, p2, p3, p4, p5, hr, d2r;
+    d2r = 3.141592654 / 180.0;
+
+    t = pow( (clr.C / (pow((clr.J / 100.0), 0.5) *
+        (pow((1.64 - pow(0.29, pMod->n)), 0.73)))),
+        (1.0 / 0.9) );
+    e = ((12500.0 / 13.0) * pMod->Nc * pMod->Ncb) *
+        (cos((clr.h * d2r + 2.0)) + 3.8);
+
+    clr.A = pMod->adoptedWhite.A * pow(
+           (clr.J / 100.0),
+           (1.0 / (pMod->c * pMod->z)));
+
+    p1 = e / t;
+    p2 = (clr.A / pMod->Nbb) + 0.305;
+    p3 = 21.0 / 20.0;
+
+    hr = clr.h * d2r;
+
+    if (fabs(sin(hr)) >= fabs(cos(hr))) {
+        p4 = p1 / sin(hr);
+        clr.b = (p2 * (2.0 + p3) * (460.0 / 1403.0)) /
+            (p4 + (2.0 + p3) * (220.0 / 1403.0) *
+            (cos(hr) / sin(hr)) - (27.0 / 1403.0) +
+            p3 * (6300.0 / 1403.0));
+        clr.a = clr.b * (cos(hr) / sin(hr));
+    }
+    else {
+        p5 = p1 / cos(hr);
+        clr.a = (p2 * (2.0 + p3) * (460.0 / 1403.0)) /
+            (p5 + (2.0 + p3) * (220.0 / 1403.0) -
+            ((27.0 / 1403.0) - p3 * (6300.0 / 1403.0)) *
+            (sin(hr) / cos(hr)));
+        clr.b = clr.a * (sin(hr) / cos(hr));
+    }
+
+    clr.RGBpa[0] = ((460.0 / 1403.0) * p2) +
+              ((451.0 / 1403.0) * clr.a) +
+              ((288.0 / 1403.0) * clr.b);
+    clr.RGBpa[1] = ((460.0 / 1403.0) * p2) -
+              ((891.0 / 1403.0) * clr.a) -
+              ((261.0 / 1403.0) * clr.b);
+    clr.RGBpa[2] = ((460.0 / 1403.0) * p2) -
+              ((220.0 / 1403.0) * clr.a) -
+              ((6300.0 / 1403.0) * clr.b);
+
+    return clr;
+}
+
+static
+CAM02COLOR InverseNonlinearity(CAM02COLOR clr, cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+    cmsFloat64Number c1;
+
+    for (i = 0; i < 3; i++) {
+        if ((clr.RGBpa[i] - 0.1) < 0) c1 = -1;
+        else                               c1 = 1;
+        clr.RGBp[i] = c1 * (100.0 / pMod->FL) *
+            pow(((27.13 * fabs(clr.RGBpa[i] - 0.1)) /
+            (400.0 - fabs(clr.RGBpa[i] - 0.1))),
+            (1.0 / 0.42));
+    }
+
+    return clr;
+}
+
+static
+CAM02COLOR HPEtoCAT02(CAM02COLOR clr)
+{
+    cmsFloat64Number M[9];
+
+    M[0] = (( 0.7328 *  1.910197) + (0.4296 * 0.370950));
+    M[1] = (( 0.7328 * -1.112124) + (0.4296 * 0.629054));
+    M[2] = (( 0.7328 *  0.201908) + (0.4296 * 0.000008) - 0.1624);
+    M[3] = ((-0.7036 *  1.910197) + (1.6975 * 0.370950));
+    M[4] = ((-0.7036 * -1.112124) + (1.6975 * 0.629054));
+    M[5] = ((-0.7036 *  0.201908) + (1.6975 * 0.000008) + 0.0061);
+    M[6] = (( 0.0030 *  1.910197) + (0.0136 * 0.370950));
+    M[7] = (( 0.0030 * -1.112124) + (0.0136 * 0.629054));
+    M[8] = (( 0.0030 *  0.201908) + (0.0136 * 0.000008) + 0.9834);;
+
+    clr.RGBc[0] = (clr.RGBp[0] * M[0]) + (clr.RGBp[1] * M[1]) + (clr.RGBp[2] * M[2]);
+    clr.RGBc[1] = (clr.RGBp[0] * M[3]) + (clr.RGBp[1] * M[4]) + (clr.RGBp[2] * M[5]);
+    clr.RGBc[2] = (clr.RGBp[0] * M[6]) + (clr.RGBp[1] * M[7]) + (clr.RGBp[2] * M[8]);
+    return clr;
+}
+
+
+static
+CAM02COLOR InverseChromaticAdaptation(CAM02COLOR clr,  cmsCIECAM02* pMod)
+{
+    cmsUInt32Number i;
+    for (i = 0; i < 3; i++) {
+        clr.RGB[i] = clr.RGBc[i] /
+            ((pMod->adoptedWhite.XYZ[1] * pMod->D / pMod->adoptedWhite.RGB[i]) + 1.0 - pMod->D);
+    }
+    return clr;
+}
+
+
+static
+CAM02COLOR CAT02toXYZ(CAM02COLOR clr)
+{
+    clr.XYZ[0] = (clr.RGB[0] *  1.096124) + (clr.RGB[1] * -0.278869) + (clr.RGB[2] *  0.182745);
+    clr.XYZ[1] = (clr.RGB[0] *  0.454369) + (clr.RGB[1] *  0.473533) + (clr.RGB[2] *  0.072098);
+    clr.XYZ[2] = (clr.RGB[0] * -0.009628) + (clr.RGB[1] * -0.005698) + (clr.RGB[2] *  1.015326);
+
+    return clr;
+}
+
+
+cmsHANDLE  CMSEXPORT cmsCIECAM02Init(cmsContext ContextID, const cmsViewingConditions* pVC)
+{
+    cmsCIECAM02* lpMod;
+
+    _cmsAssert(pVC != NULL);
+
+    if((lpMod = (cmsCIECAM02*) _cmsMallocZero(ContextID, sizeof(cmsCIECAM02))) == NULL) {
+        return NULL;
+    }
+
+    lpMod ->ContextID = ContextID;
+
+    lpMod ->adoptedWhite.XYZ[0] = pVC ->whitePoint.X;
+    lpMod ->adoptedWhite.XYZ[1] = pVC ->whitePoint.Y;
+    lpMod ->adoptedWhite.XYZ[2] = pVC ->whitePoint.Z;
+
+    lpMod -> LA       = pVC ->La;
+    lpMod -> Yb       = pVC ->Yb;
+    lpMod -> D        = pVC ->D_value;
+    lpMod -> surround = pVC ->surround;
+
+    switch (lpMod -> surround) {
+
+
+    case CUTSHEET_SURROUND:
+        lpMod->F = 0.8;
+        lpMod->c = 0.41;
+        lpMod->Nc = 0.8;
+        break;
+
+    case DARK_SURROUND:
+        lpMod -> F  = 0.8;
+        lpMod -> c  = 0.525;
+        lpMod -> Nc = 0.8;
+        break;
+
+    case DIM_SURROUND:
+        lpMod -> F  = 0.9;
+        lpMod -> c  = 0.59;
+        lpMod -> Nc = 0.95;
+        break;
+
+    default:
+        // Average surround
+        lpMod -> F  = 1.0;
+        lpMod -> c  = 0.69;
+        lpMod -> Nc = 1.0;
+    }
+
+    lpMod -> n   = compute_n(lpMod);
+    lpMod -> z   = compute_z(lpMod);
+    lpMod -> Nbb = computeNbb(lpMod);
+    lpMod -> FL  = computeFL(lpMod);
+
+    if (lpMod -> D == D_CALCULATE) {
+        lpMod -> D   = computeD(lpMod);
+    }
+
+    lpMod -> Ncb = lpMod -> Nbb;
+
+    lpMod -> adoptedWhite = XYZtoCAT02(lpMod -> adoptedWhite);
+    lpMod -> adoptedWhite = ChromaticAdaptation(lpMod -> adoptedWhite, lpMod);
+    lpMod -> adoptedWhite = CAT02toHPE(lpMod -> adoptedWhite);
+    lpMod -> adoptedWhite = NonlinearCompression(lpMod -> adoptedWhite, lpMod);
+
+    return (cmsHANDLE) lpMod;
+
+}
+
+void CMSEXPORT cmsCIECAM02Done(cmsHANDLE hModel)
+{
+    cmsCIECAM02* lpMod = (cmsCIECAM02*) hModel;
+
+    if (lpMod) _cmsFree(lpMod ->ContextID, lpMod);
+}
+
+
+void CMSEXPORT cmsCIECAM02Forward(cmsHANDLE hModel, const cmsCIEXYZ* pIn, cmsJCh* pOut)
+{
+    CAM02COLOR clr;
+    cmsCIECAM02* lpMod = (cmsCIECAM02*) hModel;
+  
+    _cmsAssert(lpMod != NULL);
+    _cmsAssert(pIn != NULL);
+    _cmsAssert(pOut != NULL);
+
+    memset(&clr, 0, sizeof(clr));
+
+    clr.XYZ[0] = pIn ->X;
+    clr.XYZ[1] = pIn ->Y;
+    clr.XYZ[2] = pIn ->Z;
+
+    clr = XYZtoCAT02(clr);
+    clr = ChromaticAdaptation(clr, lpMod);
+    clr = CAT02toHPE(clr);
+    clr = NonlinearCompression(clr, lpMod);
+    clr = ComputeCorrelates(clr, lpMod);
+
+    pOut ->J = clr.J;
+    pOut ->C = clr.C;
+    pOut ->h = clr.h;
+}
+
+void CMSEXPORT cmsCIECAM02Reverse(cmsHANDLE hModel, const cmsJCh* pIn, cmsCIEXYZ* pOut)
+{
+    CAM02COLOR clr;
+    cmsCIECAM02* lpMod = (cmsCIECAM02*) hModel;
+    
+    _cmsAssert(lpMod != NULL);
+    _cmsAssert(pIn != NULL);
+    _cmsAssert(pOut != NULL);
+
+    memset(&clr, 0, sizeof(clr));
+
+    clr.J = pIn -> J;
+    clr.C = pIn -> C;
+    clr.h = pIn -> h;
+
+    clr = InverseCorrelates(clr, lpMod);
+    clr = InverseNonlinearity(clr, lpMod);
+    clr = HPEtoCAT02(clr);
+    clr = InverseChromaticAdaptation(clr, lpMod);
+    clr = CAT02toXYZ(clr);
+
+    pOut ->X = clr.XYZ[0];
+    pOut ->Y = clr.XYZ[1];
+    pOut ->Z = clr.XYZ[2];
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmscgats.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmscgats.cpp
new file mode 100755
index 0000000000..7df5bbc4da
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmscgats.cpp
@@ -0,0 +1,2784 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// IT8.7 / CGATS.17-200x handling -----------------------------------------------------------------------------
+
+
+#define MAXID        128     // Max length of identifier
+#define MAXSTR      1024     // Max length of string
+#define MAXTABLES    255     // Max Number of tables in a single stream
+#define MAXINCLUDE    20     // Max number of nested includes
+
+#define DEFAULT_DBL_FORMAT  "%.10g" // Double formatting
+
+#ifdef CMS_IS_WINDOWS_
+#    include <io.h>
+#    define DIR_CHAR    '\\'
+#else
+#    define DIR_CHAR    '/'
+#endif
+
+
+// Symbols
+typedef enum {
+
+        SUNDEFINED,
+        SINUM,      // Integer
+        SDNUM,      // Real
+        SIDENT,     // Identifier
+        SSTRING,    // string
+        SCOMMENT,   // comment
+        SEOLN,      // End of line
+        SEOF,       // End of stream
+        SSYNERROR,  // Syntax error found on stream
+
+        // Keywords
+
+        SBEGIN_DATA,
+        SBEGIN_DATA_FORMAT,
+        SEND_DATA,
+        SEND_DATA_FORMAT,
+        SKEYWORD,
+        SDATA_FORMAT_ID,
+        SINCLUDE
+
+    } SYMBOL;
+
+
+// How to write the value
+typedef enum {
+
+        WRITE_UNCOOKED,
+        WRITE_STRINGIFY,
+        WRITE_HEXADECIMAL,
+        WRITE_BINARY,
+        WRITE_PAIR
+
+    } WRITEMODE;
+
+// Linked list of variable names
+typedef struct _KeyVal {
+
+        struct _KeyVal*  Next;
+        char*            Keyword;       // Name of variable
+        struct _KeyVal*  NextSubkey;    // If key is a dictionary, points to the next item
+        char*            Subkey;        // If key is a dictionary, points to the subkey name
+        char*            Value;         // Points to value
+        WRITEMODE        WriteAs;       // How to write the value
+
+   } KEYVALUE;
+
+
+// Linked list of memory chunks (Memory sink)
+typedef struct _OwnedMem {
+
+        struct _OwnedMem* Next;
+        void *            Ptr;          // Point to value
+
+   } OWNEDMEM;
+
+// Suballocator
+typedef struct _SubAllocator {
+
+         cmsUInt8Number* Block;
+         cmsUInt32Number BlockSize;
+         cmsUInt32Number Used;
+
+    } SUBALLOCATOR;
+
+// Table. Each individual table can hold properties and rows & cols
+typedef struct _Table {
+
+        char SheetType[MAXSTR];               // The first row of the IT8 (the type)
+
+        int            nSamples, nPatches;    // Cols, Rows
+        int            SampleID;              // Pos of ID
+
+        KEYVALUE*      HeaderList;            // The properties
+
+        char**         DataFormat;            // The binary stream descriptor
+        char**         Data;                  // The binary stream
+
+    } TABLE;
+
+// File stream being parsed
+typedef struct _FileContext {
+        char           FileName[cmsMAX_PATH];    // File name if being readed from file
+        FILE*          Stream;                   // File stream or NULL if holded in memory
+    } FILECTX;
+
+// This struct hold all information about an open IT8 handler.
+typedef struct {
+
+
+        cmsUInt32Number  TablesCount;                     // How many tables in this stream
+        cmsUInt32Number  nTable;                          // The actual table
+
+        TABLE Tab[MAXTABLES];
+
+        // Memory management
+        OWNEDMEM*      MemorySink;            // The storage backend
+        SUBALLOCATOR   Allocator;             // String suballocator -- just to keep it fast
+
+        // Parser state machine
+        SYMBOL             sy;                // Current symbol
+        int                ch;                // Current character
+
+        cmsInt32Number     inum;              // integer value
+        cmsFloat64Number   dnum;              // real value
+
+        char           id[MAXID];             // identifier
+        char           str[MAXSTR];           // string
+
+        // Allowed keywords & datasets. They have visibility on whole stream
+        KEYVALUE*      ValidKeywords;
+        KEYVALUE*      ValidSampleID;
+
+        char*          Source;                // Points to loc. being parsed
+        cmsInt32Number lineno;                // line counter for error reporting
+
+        FILECTX*       FileStack[MAXINCLUDE]; // Stack of files being parsed
+        cmsInt32Number IncludeSP;             // Include Stack Pointer
+
+        char*          MemoryBlock;           // The stream if holded in memory
+
+        char           DoubleFormatter[MAXID];// Printf-like 'cmsFloat64Number' formatter
+
+        cmsContext    ContextID;              // The threading context
+
+   } cmsIT8;
+
+
+// The stream for save operations
+typedef struct {
+
+        FILE* stream;   // For save-to-file behaviour
+
+        cmsUInt8Number* Base;
+        cmsUInt8Number* Ptr;        // For save-to-mem behaviour
+        cmsUInt32Number Used;
+        cmsUInt32Number Max;
+
+    } SAVESTREAM;
+
+
+// ------------------------------------------------------ cmsIT8 parsing routines
+
+
+// A keyword
+typedef struct {
+
+        const char *id;
+        SYMBOL sy;
+
+   } KEYWORD;
+
+// The keyword->symbol translation table. Sorting is required.
+static const KEYWORD TabKeys[] = {
+
+        {"$INCLUDE",               SINCLUDE},   // This is an extension!
+        {".INCLUDE",               SINCLUDE},   // This is an extension!
+
+        {"BEGIN_DATA",             SBEGIN_DATA },
+        {"BEGIN_DATA_FORMAT",      SBEGIN_DATA_FORMAT },
+        {"DATA_FORMAT_IDENTIFIER", SDATA_FORMAT_ID},
+        {"END_DATA",               SEND_DATA},
+        {"END_DATA_FORMAT",        SEND_DATA_FORMAT},
+        {"KEYWORD",                SKEYWORD}
+        };
+
+#define NUMKEYS (sizeof(TabKeys)/sizeof(KEYWORD))
+
+// Predefined properties
+
+// A property
+typedef struct {
+        const char *id;    // The identifier
+        WRITEMODE as;      // How is supposed to be written
+    } PROPERTY;
+
+static PROPERTY PredefinedProperties[] = {
+
+        {"NUMBER_OF_FIELDS", WRITE_UNCOOKED},    // Required - NUMBER OF FIELDS
+        {"NUMBER_OF_SETS",   WRITE_UNCOOKED},    // Required - NUMBER OF SETS
+        {"ORIGINATOR",       WRITE_STRINGIFY},   // Required - Identifies the specific system, organization or individual that created the data file.
+        {"FILE_DESCRIPTOR",  WRITE_STRINGIFY},   // Required - Describes the purpose or contents of the data file.
+        {"CREATED",          WRITE_STRINGIFY},   // Required - Indicates date of creation of the data file.
+        {"DESCRIPTOR",       WRITE_STRINGIFY},   // Required  - Describes the purpose or contents of the data file.
+        {"DIFFUSE_GEOMETRY", WRITE_STRINGIFY},   // The diffuse geometry used. Allowed values are "sphere" or "opal".
+        {"MANUFACTURER",     WRITE_STRINGIFY},
+        {"MANUFACTURE",      WRITE_STRINGIFY},   // Some broken Fuji targets does store this value
+        {"PROD_DATE",        WRITE_STRINGIFY},   // Identifies year and month of production of the target in the form yyyy:mm.
+        {"SERIAL",           WRITE_STRINGIFY},   // Uniquely identifies individual physical target.
+
+        {"MATERIAL",         WRITE_STRINGIFY},    // Identifies the material on which the target was produced using a code
+                                                  // uniquely identifying th e material. This is intend ed to be used for IT8.7
+                                                  // physical targets only (i.e . IT8.7/1 a nd IT8.7/2).
+
+        {"INSTRUMENTATION",  WRITE_STRINGIFY},    // Used to report the specific instrumentation used (manufacturer and
+                                                  // model number) to generate the data reported. This data will often
+                                                  // provide more information about the particular data collected than an
+                                                  // extensive list of specific details. This is particularly important for
+                                                  // spectral data or data derived from spectrophotometry.
+
+        {"MEASUREMENT_SOURCE", WRITE_STRINGIFY},  // Illumination used for spectral measurements. This data helps provide
+                                                  // a guide to the potential for issues of paper fluorescence, etc.
+
+        {"PRINT_CONDITIONS", WRITE_STRINGIFY},     // Used to define the characteristics of the printed sheet being reported.
+                                                   // Where standard conditions have been defined (e.g., SWOP at nominal)
+                                                   // named conditions may suffice. Otherwise, detailed information is
+                                                   // needed.
+
+        {"SAMPLE_BACKING",   WRITE_STRINGIFY},     // Identifies the backing material used behind the sample during
+                                                   // measurement. Allowed values are �black�, �white�, or {"na".
+                                                  
+        {"CHISQ_DOF",        WRITE_STRINGIFY},     // Degrees of freedom associated with the Chi squared statistic
+                                                   // below properties are new in recent specs:
+
+        {"MEASUREMENT_GEOMETRY", WRITE_STRINGIFY}, // The type of measurement, either reflection or transmission, should be indicated
+                                                   // along with details of the geometry and the aperture size and shape. For example,
+                                                   // for transmission measurements it is important to identify 0/diffuse, diffuse/0,
+                                                   // opal or integrating sphere, etc. For reflection it is important to identify 0/45,
+                                                   // 45/0, sphere (specular included or excluded), etc.
+
+       {"FILTER",            WRITE_STRINGIFY},     // Identifies the use of physical filter(s) during measurement. Typically used to
+                                                   // denote the use of filters such as none, D65, Red, Green or Blue.
+                                                  
+       {"POLARIZATION",      WRITE_STRINGIFY},     // Identifies the use of a physical polarization filter during measurement. Allowed
+                                                   // values are {"yes�, �white�, �none� or �na�.
+
+       {"WEIGHTING_FUNCTION", WRITE_PAIR},         // Indicates such functions as: the CIE standard observer functions used in the
+                                                   // calculation of various data parameters (2 degree and 10 degree), CIE standard
+                                                   // illuminant functions used in the calculation of various data parameters (e.g., D50,
+                                                   // D65, etc.), density status response, etc. If used there shall be at least one
+                                                   // name-value pair following the WEIGHTING_FUNCTION tag/keyword. The first attribute
+                                                   // in the set shall be {"name" and shall identify the particular parameter used.
+                                                   // The second shall be {"value" and shall provide the value associated with that name.
+                                                   // For ASCII data, a string containing the Name and Value attribute pairs shall follow
+                                                   // the weighting function keyword. A semi-colon separates attribute pairs from each
+                                                   // other and within the attribute the name and value are separated by a comma.
+
+       {"COMPUTATIONAL_PARAMETER", WRITE_PAIR},    // Parameter that is used in computing a value from measured data. Name is the name
+                                                   // of the calculation, parameter is the name of the parameter used in the calculation
+                                                   // and value is the value of the parameter.
+                                                   
+       {"TARGET_TYPE",        WRITE_STRINGIFY},    // The type of target being measured, e.g. IT8.7/1, IT8.7/3, user defined, etc.
+                                                  
+       {"COLORANT",           WRITE_STRINGIFY},    // Identifies the colorant(s) used in creating the target.
+                                                  
+       {"TABLE_DESCRIPTOR",   WRITE_STRINGIFY},    // Describes the purpose or contents of a data table.
+                                                  
+       {"TABLE_NAME",         WRITE_STRINGIFY}     // Provides a short name for a data table.
+};
+
+#define NUMPREDEFINEDPROPS (sizeof(PredefinedProperties)/sizeof(PROPERTY))
+
+
+// Predefined sample types on dataset
+static const char* PredefinedSampleID[] = {
+        "SAMPLE_ID",      // Identifies sample that data represents
+        "STRING",         // Identifies label, or other non-machine readable value.
+                          // Value must begin and end with a " symbol
+
+        "CMYK_C",         // Cyan component of CMYK data expressed as a percentage
+        "CMYK_M",         // Magenta component of CMYK data expressed as a percentage
+        "CMYK_Y",         // Yellow component of CMYK data expressed as a percentage
+        "CMYK_K",         // Black component of CMYK data expressed as a percentage
+        "D_RED",          // Red filter density
+        "D_GREEN",        // Green filter density
+        "D_BLUE",         // Blue filter density
+        "D_VIS",          // Visual filter density
+        "D_MAJOR_FILTER", // Major filter d ensity
+        "RGB_R",          // Red component of RGB data
+        "RGB_G",          // Green component of RGB data
+        "RGB_B",          // Blue com ponent of RGB data
+        "SPECTRAL_NM",    // Wavelength of measurement expressed in nanometers
+        "SPECTRAL_PCT",   // Percentage reflectance/transmittance
+        "SPECTRAL_DEC",   // Reflectance/transmittance
+        "XYZ_X",          // X component of tristimulus data
+        "XYZ_Y",          // Y component of tristimulus data
+        "XYZ_Z",          // Z component of tristimulus data
+        "XYY_X",          // x component of chromaticity data
+        "XYY_Y",          // y component of chromaticity data
+        "XYY_CAPY",       // Y component of tristimulus data
+        "LAB_L",          // L* component of Lab data
+        "LAB_A",          // a* component of Lab data
+        "LAB_B",          // b* component of Lab data
+        "LAB_C",          // C*ab component of Lab data
+        "LAB_H",          // hab component of Lab data
+        "LAB_DE",         // CIE dE
+        "LAB_DE_94",      // CIE dE using CIE 94
+        "LAB_DE_CMC",     // dE using CMC
+        "LAB_DE_2000",    // CIE dE using CIE DE 2000
+        "MEAN_DE",        // Mean Delta E (LAB_DE) of samples compared to batch average
+                          // (Used for data files for ANSI IT8.7/1 and IT8.7/2 targets)
+        "STDEV_X",        // Standard deviation of X (tristimulus data)
+        "STDEV_Y",        // Standard deviation of Y (tristimulus data)
+        "STDEV_Z",        // Standard deviation of Z (tristimulus data)
+        "STDEV_L",        // Standard deviation of L*
+        "STDEV_A",        // Standard deviation of a*
+        "STDEV_B",        // Standard deviation of b*
+        "STDEV_DE",       // Standard deviation of CIE dE
+        "CHI_SQD_PAR"};   // The average of the standard deviations of L*, a* and b*. It is
+                          // used to derive an estimate of the chi-squared parameter which is
+                          // recommended as the predictor of the variability of dE
+
+#define NUMPREDEFINEDSAMPLEID (sizeof(PredefinedSampleID)/sizeof(char *))
+
+//Forward declaration of some internal functions
+static void* AllocChunk(cmsIT8* it8, cmsUInt32Number size);
+
+// Checks whatever c is a separator
+static
+cmsBool isseparator(int c)
+{
+    return (c == ' ') || (c == '\t') ; 
+}
+
+// Checks whatever c is a valid identifier char
+static
+cmsBool ismiddle(int c)
+{
+   return (!isseparator(c) && (c != '#') && (c !='\"') && (c != '\'') && (c > 32) && (c < 127));
+}
+
+// Checks whatsever c is a valid identifier middle char.
+static
+cmsBool isidchar(int c)
+{
+   return isalnum(c) || ismiddle(c);
+}
+
+// Checks whatsever c is a valid identifier first char.
+static
+cmsBool isfirstidchar(int c)
+{
+     return !isdigit(c) && ismiddle(c);
+}
+
+// Guess whether the supplied path looks like an absolute path
+static
+cmsBool isabsolutepath(const char *path)
+{
+    char ThreeChars[4];
+
+    if(path == NULL)
+        return FALSE;
+    if (path[0] == 0)
+        return FALSE;
+
+    strncpy(ThreeChars, path, 3);
+    ThreeChars[3] = 0;
+
+    if(ThreeChars[0] == DIR_CHAR)
+        return TRUE;
+
+#ifdef  CMS_IS_WINDOWS_
+    if (isalpha((int) ThreeChars[0]) && ThreeChars[1] == ':')
+        return TRUE;
+#endif
+    return FALSE;
+}
+
+
+// Makes a file path based on a given reference path
+// NOTE: this function doesn't check if the path exists or even if it's legal
+static
+cmsBool BuildAbsolutePath(const char *relPath, const char *basePath, char *buffer, cmsUInt32Number MaxLen)
+{
+    char *tail;
+    cmsUInt32Number len;
+
+    // Already absolute?
+    if (isabsolutepath(relPath)) {
+
+        strncpy(buffer, relPath, MaxLen);
+        buffer[MaxLen-1] = 0;
+        return TRUE;
+    }
+
+    // No, search for last
+    strncpy(buffer, basePath, MaxLen);
+    buffer[MaxLen-1] = 0;
+
+    tail = strrchr(buffer, DIR_CHAR);
+    if (tail == NULL) return FALSE;    // Is not absolute and has no separators??
+
+    len = (cmsUInt32Number) (tail - buffer);
+    if (len >= MaxLen) return FALSE;
+
+    // No need to assure zero terminator over here
+    strncpy(tail + 1, relPath, MaxLen - len);
+
+    return TRUE;
+}
+
+
+// Make sure no exploit is being even tried
+static
+const char* NoMeta(const char* str)
+{
+    if (strchr(str, '%') != NULL)
+        return "**** CORRUPTED FORMAT STRING ***";
+
+    return str;
+}
+
+// Syntax error
+static
+cmsBool SynError(cmsIT8* it8, const char *Txt, ...)
+{
+    char Buffer[256], ErrMsg[1024];
+    va_list args;
+
+    va_start(args, Txt);
+    vsnprintf(Buffer, 255, Txt, args);
+    Buffer[255] = 0;
+    va_end(args);
+
+    snprintf(ErrMsg, 1023, "%s: Line %d, %s", it8->FileStack[it8 ->IncludeSP]->FileName, it8->lineno, Buffer);
+    ErrMsg[1023] = 0;
+    it8->sy = SSYNERROR;
+    cmsSignalError(it8 ->ContextID, cmsERROR_CORRUPTION_DETECTED, "%s", ErrMsg);
+    return FALSE;
+}
+
+// Check if current symbol is same as specified. issue an error else.
+static
+cmsBool Check(cmsIT8* it8, SYMBOL sy, const char* Err)
+{
+        if (it8 -> sy != sy)
+                return SynError(it8, NoMeta(Err));
+        return TRUE;
+}
+
+// Read Next character from stream
+static
+void NextCh(cmsIT8* it8)
+{
+    if (it8 -> FileStack[it8 ->IncludeSP]->Stream) {
+
+        it8 ->ch = fgetc(it8 ->FileStack[it8 ->IncludeSP]->Stream);
+
+        if (feof(it8 -> FileStack[it8 ->IncludeSP]->Stream))  {
+
+            if (it8 ->IncludeSP > 0) {
+
+                fclose(it8 ->FileStack[it8->IncludeSP--]->Stream);
+                it8 -> ch = ' ';                            // Whitespace to be ignored
+
+            } else
+                it8 ->ch = 0;   // EOF
+        }
+    }
+    else {
+        it8->ch = *it8->Source;
+        if (it8->ch) it8->Source++;
+    }
+}
+
+
+// Try to see if current identifier is a keyword, if so return the referred symbol
+static
+SYMBOL BinSrchKey(const char *id)
+{
+    int l = 1;
+    int r = NUMKEYS;
+    int x, res;
+
+    while (r >= l)
+    {
+        x = (l+r)/2;
+        res = cmsstrcasecmp(id, TabKeys[x-1].id);
+        if (res == 0) return TabKeys[x-1].sy;
+        if (res < 0) r = x - 1;
+        else l = x + 1;
+    }
+
+    return SUNDEFINED;
+}
+
+
+// 10 ^n
+static
+cmsFloat64Number xpow10(int n)
+{
+    return pow(10, (cmsFloat64Number) n);
+}
+
+
+//  Reads a Real number, tries to follow from integer number
+static
+void ReadReal(cmsIT8* it8, cmsInt32Number inum)
+{
+    it8->dnum = (cmsFloat64Number)inum;
+
+    while (isdigit(it8->ch)) {
+
+        it8->dnum = (cmsFloat64Number)it8->dnum * 10.0 + (cmsFloat64Number)(it8->ch - '0');
+        NextCh(it8);
+    }
+
+    if (it8->ch == '.') {        // Decimal point
+
+        cmsFloat64Number frac = 0.0;      // fraction
+        int prec = 0;                     // precision
+
+        NextCh(it8);               // Eats dec. point
+
+        while (isdigit(it8->ch)) {
+
+            frac = frac * 10.0 + (cmsFloat64Number)(it8->ch - '0');
+            prec++;
+            NextCh(it8);
+        }
+
+        it8->dnum = it8->dnum + (frac / xpow10(prec));
+    }
+
+    // Exponent, example 34.00E+20
+    if (toupper(it8->ch) == 'E') {
+
+        cmsInt32Number e;
+        cmsInt32Number sgn;
+
+        NextCh(it8); sgn = 1;
+
+        if (it8->ch == '-') {
+
+            sgn = -1; NextCh(it8);
+        }
+        else
+            if (it8->ch == '+') {
+
+                sgn = +1;
+                NextCh(it8);
+            }
+
+        e = 0;
+        while (isdigit(it8->ch)) {
+
+            cmsInt32Number digit = (it8->ch - '0');
+
+            if ((cmsFloat64Number)e * 10.0 + (cmsFloat64Number)digit < (cmsFloat64Number)+2147483647.0)
+                e = e * 10 + digit;
+
+            NextCh(it8);
+        }
+
+        e = sgn*e;
+        it8->dnum = it8->dnum * xpow10(e);
+    }
+}
+
+// Parses a float number
+// This can not call directly atof because it uses locale dependent
+// parsing, while CCMX files always use . as decimal separator
+static
+cmsFloat64Number ParseFloatNumber(const char *Buffer)
+{
+    cmsFloat64Number dnum = 0.0;
+    int sign = 1;
+
+    // keep safe
+    if (Buffer == NULL) return 0.0;
+
+    if (*Buffer == '-' || *Buffer == '+') {
+
+        sign = (*Buffer == '-') ? -1 : 1;
+        Buffer++;
+    }
+
+
+    while (*Buffer && isdigit((int)*Buffer)) {
+
+        dnum = dnum * 10.0 + (*Buffer - '0');
+        if (*Buffer) Buffer++;
+    }
+
+    if (*Buffer == '.') {
+
+        cmsFloat64Number frac = 0.0;      // fraction
+        int prec = 0;                     // precision
+
+        if (*Buffer) Buffer++;
+
+        while (*Buffer && isdigit((int)*Buffer)) {
+
+            frac = frac * 10.0 + (*Buffer - '0');
+            prec++;
+            if (*Buffer) Buffer++;
+        }
+
+        dnum = dnum + (frac / xpow10(prec));
+    }
+
+    // Exponent, example 34.00E+20
+    if (*Buffer && toupper(*Buffer) == 'E') {
+
+        int e;
+        int sgn;
+
+        if (*Buffer) Buffer++;
+        sgn = 1;
+
+        if (*Buffer == '-') {
+
+            sgn = -1;
+            if (*Buffer) Buffer++;
+        }
+        else
+            if (*Buffer == '+') {
+
+                sgn = +1;
+                if (*Buffer) Buffer++;
+            }
+
+        e = 0;
+        while (*Buffer && isdigit((int)*Buffer)) {
+
+            cmsInt32Number digit = (*Buffer - '0');
+
+            if ((cmsFloat64Number)e * 10.0 + digit < (cmsFloat64Number)+2147483647.0)
+                e = e * 10 + digit;
+
+            if (*Buffer) Buffer++;
+        }
+
+        e = sgn*e;
+        dnum = dnum * xpow10(e);
+    }
+
+    return sign * dnum;
+}
+
+
+// Reads next symbol
+static
+void InSymbol(cmsIT8* it8)
+{
+    register char *idptr;
+    register int k;
+    SYMBOL key;
+    int sng;
+    
+    do {
+
+        while (isseparator(it8->ch))
+            NextCh(it8);
+
+        if (isfirstidchar(it8->ch)) {          // Identifier
+
+            k = 0;
+            idptr = it8->id;
+
+            do {
+
+                if (++k < MAXID) *idptr++ = (char) it8->ch;
+
+                NextCh(it8);
+
+            } while (isidchar(it8->ch));
+
+            *idptr = '\0';
+
+
+            key = BinSrchKey(it8->id);
+            if (key == SUNDEFINED) it8->sy = SIDENT;
+            else it8->sy = key;
+
+        }
+        else                         // Is a number?
+            if (isdigit(it8->ch) || it8->ch == '.' || it8->ch == '-' || it8->ch == '+')
+            {
+                int sign = 1;
+
+                if (it8->ch == '-') {
+                    sign = -1;
+                    NextCh(it8);
+                }
+
+                it8->inum = 0;
+                it8->sy   = SINUM;
+
+                if (it8->ch == '0') {          // 0xnnnn (Hexa) or 0bnnnn (Binary)
+
+                    NextCh(it8);
+                    if (toupper(it8->ch) == 'X') {
+
+                        int j;
+
+                        NextCh(it8);
+                        while (isxdigit(it8->ch))
+                        {
+                            it8->ch = toupper(it8->ch);
+                            if (it8->ch >= 'A' && it8->ch <= 'F')  j = it8->ch -'A'+10;
+                            else j = it8->ch - '0';
+
+                            if ((cmsFloat64Number) it8->inum * 16.0 + (cmsFloat64Number) j > (cmsFloat64Number)+2147483647.0)
+                            {
+                                SynError(it8, "Invalid hexadecimal number");
+                                return;
+                            }
+
+                            it8->inum = it8->inum * 16 + j;
+                            NextCh(it8);
+                        }
+                        return;
+                    }
+
+                    if (toupper(it8->ch) == 'B') {  // Binary
+
+                        int j;
+
+                        NextCh(it8);
+                        while (it8->ch == '0' || it8->ch == '1')
+                        {
+                            j = it8->ch - '0';
+
+                            if ((cmsFloat64Number) it8->inum * 2.0 + j > (cmsFloat64Number)+2147483647.0)
+                            {
+                                SynError(it8, "Invalid binary number");
+                                return;
+                            }
+
+                            it8->inum = it8->inum * 2 + j;
+                            NextCh(it8);
+                        }
+                        return;
+                    }
+                }
+
+
+                while (isdigit(it8->ch)) {
+
+                    cmsInt32Number digit = (it8->ch - '0');
+
+                    if ((cmsFloat64Number) it8->inum * 10.0 + (cmsFloat64Number) digit > (cmsFloat64Number) +2147483647.0) {
+                        ReadReal(it8, it8->inum);
+                        it8->sy = SDNUM;
+                        it8->dnum *= sign;
+                        return;
+                    }
+
+                    it8->inum = it8->inum * 10 + digit;
+                    NextCh(it8);
+                }
+
+                if (it8->ch == '.') {
+
+                    ReadReal(it8, it8->inum);
+                    it8->sy = SDNUM;
+                    it8->dnum *= sign;
+                    return;
+                }
+
+                it8 -> inum *= sign;
+
+                // Special case. Numbers followed by letters are taken as identifiers
+
+                if (isidchar(it8 ->ch)) {
+
+                    if (it8 ->sy == SINUM) {
+
+                        snprintf(it8->id, 127, "%d", it8->inum);
+                    }
+                    else {
+
+                        snprintf(it8->id, 127, it8 ->DoubleFormatter, it8->dnum);
+                    }
+
+                    k = (int) strlen(it8 ->id);
+                    idptr = it8 ->id + k;
+                    do {
+
+                        if (++k < MAXID) *idptr++ = (char) it8->ch;
+
+                        NextCh(it8);
+
+                    } while (isidchar(it8->ch));
+
+                    *idptr = '\0';
+                    it8->sy = SIDENT;
+                }
+                return;
+
+            }
+            else
+                switch ((int) it8->ch) {
+
+        // EOF marker -- ignore it
+        case '\x1a':
+            NextCh(it8);
+            break;
+
+        // Eof stream markers
+        case 0:
+        case -1:
+            it8->sy = SEOF;
+            break;
+
+
+        // Next line
+        case '\r':
+            NextCh(it8);
+            if (it8 ->ch == '\n') 
+                NextCh(it8);
+            it8->sy = SEOLN;
+            it8->lineno++;
+            break;
+
+        case '\n':
+            NextCh(it8);
+            it8->sy = SEOLN;
+            it8->lineno++;
+            break;
+
+        // Comment
+        case '#':
+            NextCh(it8);
+            while (it8->ch && it8->ch != '\n' && it8->ch != '\r')
+                NextCh(it8);
+
+            it8->sy = SCOMMENT;
+            break;
+
+        // String.
+        case '\'':
+        case '\"':
+            idptr = it8->str;
+            sng = it8->ch;
+            k = 0;
+            NextCh(it8);
+
+            while (k < (MAXSTR-1) && it8->ch != sng) {
+
+                if (it8->ch == '\n'|| it8->ch == '\r') k = MAXSTR+1;
+                else {
+                    *idptr++ = (char) it8->ch;
+                    NextCh(it8);
+                    k++;
+                }
+            }
+
+            it8->sy = SSTRING;
+            *idptr = '\0';
+            NextCh(it8);
+            break;
+
+
+        default:
+            SynError(it8, "Unrecognized character: 0x%x", it8 ->ch);
+            return;
+            }
+
+    } while (it8->sy == SCOMMENT);
+
+    // Handle the include special token
+
+    if (it8 -> sy == SINCLUDE) {
+
+                FILECTX* FileNest;
+
+                if(it8 -> IncludeSP >= (MAXINCLUDE-1)) {
+
+                    SynError(it8, "Too many recursion levels");
+                    return;
+                }
+
+                InSymbol(it8);
+                if (!Check(it8, SSTRING, "Filename expected")) return;
+
+                FileNest = it8 -> FileStack[it8 -> IncludeSP + 1];
+                if(FileNest == NULL) {
+
+                    FileNest = it8 ->FileStack[it8 -> IncludeSP + 1] = (FILECTX*)AllocChunk(it8, sizeof(FILECTX));
+                    //if(FileNest == NULL)
+                    //  TODO: how to manage out-of-memory conditions?
+                }
+
+                if (BuildAbsolutePath(it8->str,
+                                      it8->FileStack[it8->IncludeSP]->FileName,
+                                      FileNest->FileName, cmsMAX_PATH-1) == FALSE) {
+                    SynError(it8, "File path too long");
+                    return;
+                }
+
+                FileNest->Stream = fopen(FileNest->FileName, "rt");
+                if (FileNest->Stream == NULL) {
+
+                        SynError(it8, "File %s not found", FileNest->FileName);
+                        return;
+                }
+                it8->IncludeSP++;
+
+                it8 ->ch = ' ';
+                InSymbol(it8);
+    }
+
+}
+
+// Checks end of line separator
+static
+cmsBool CheckEOLN(cmsIT8* it8)
+{
+        if (!Check(it8, SEOLN, "Expected separator")) return FALSE;
+        while (it8 -> sy == SEOLN)
+                        InSymbol(it8);
+        return TRUE;
+
+}
+
+// Skip a symbol
+
+static
+void Skip(cmsIT8* it8, SYMBOL sy)
+{
+        if (it8->sy == sy && it8->sy != SEOF)
+                        InSymbol(it8);
+}
+
+
+// Skip multiple EOLN
+static
+void SkipEOLN(cmsIT8* it8)
+{
+    while (it8->sy == SEOLN) {
+             InSymbol(it8);
+    }
+}
+
+
+// Returns a string holding current value
+static
+cmsBool GetVal(cmsIT8* it8, char* Buffer, cmsUInt32Number max, const char* ErrorTitle)
+{
+    switch (it8->sy) {
+
+    case SEOLN:   // Empty value
+                  Buffer[0]=0;
+                  break;
+    case SIDENT:  strncpy(Buffer, it8->id, max);
+                  Buffer[max-1]=0;
+                  break;
+    case SINUM:   snprintf(Buffer, max, "%d", it8 -> inum); break;
+    case SDNUM:   snprintf(Buffer, max, it8->DoubleFormatter, it8 -> dnum); break;
+    case SSTRING: strncpy(Buffer, it8->str, max);
+                  Buffer[max-1] = 0;
+                  break;
+
+
+    default:
+         return SynError(it8, "%s", ErrorTitle);
+    }
+
+    Buffer[max] = 0;
+    return TRUE;
+}
+
+// ---------------------------------------------------------- Table
+
+static
+TABLE* GetTable(cmsIT8* it8)
+{
+   if ((it8 -> nTable >= it8 ->TablesCount)) {
+
+           SynError(it8, "Table %d out of sequence", it8 -> nTable);
+           return it8 -> Tab;
+   }
+
+   return it8 ->Tab + it8 ->nTable;
+}
+
+// ---------------------------------------------------------- Memory management
+
+
+// Frees an allocator and owned memory
+void CMSEXPORT cmsIT8Free(cmsHANDLE hIT8)
+{
+   cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    if (it8 == NULL)
+        return;
+
+    if (it8->MemorySink) {
+
+        OWNEDMEM* p;
+        OWNEDMEM* n;
+
+        for (p = it8->MemorySink; p != NULL; p = n) {
+
+            n = p->Next;
+            if (p->Ptr) _cmsFree(it8 ->ContextID, p->Ptr);
+            _cmsFree(it8 ->ContextID, p);
+        }
+    }
+
+    if (it8->MemoryBlock)
+        _cmsFree(it8 ->ContextID, it8->MemoryBlock);
+
+    _cmsFree(it8 ->ContextID, it8);
+}
+
+
+// Allocates a chunk of data, keep linked list
+static
+void* AllocBigBlock(cmsIT8* it8, cmsUInt32Number size)
+{
+    OWNEDMEM* ptr1;
+    void* ptr = _cmsMallocZero(it8->ContextID, size);
+
+    if (ptr != NULL) {
+
+        ptr1 = (OWNEDMEM*) _cmsMallocZero(it8 ->ContextID, sizeof(OWNEDMEM));
+
+        if (ptr1 == NULL) {
+
+            _cmsFree(it8 ->ContextID, ptr);
+            return NULL;
+        }
+
+        ptr1-> Ptr        = ptr;
+        ptr1-> Next       = it8 -> MemorySink;
+        it8 -> MemorySink = ptr1;
+    }
+
+    return ptr;
+}
+
+
+// Suballocator.
+static
+void* AllocChunk(cmsIT8* it8, cmsUInt32Number size)
+{
+    cmsUInt32Number Free = it8 ->Allocator.BlockSize - it8 ->Allocator.Used;
+    cmsUInt8Number* ptr;
+
+    size = _cmsALIGNMEM(size);
+
+    if (size > Free) {
+
+        if (it8 -> Allocator.BlockSize == 0)
+
+                it8 -> Allocator.BlockSize = 20*1024;
+        else
+                it8 ->Allocator.BlockSize *= 2;
+
+        if (it8 ->Allocator.BlockSize < size)
+                it8 ->Allocator.BlockSize = size;
+
+        it8 ->Allocator.Used = 0;
+        it8 ->Allocator.Block = (cmsUInt8Number*)  AllocBigBlock(it8, it8 ->Allocator.BlockSize);
+    }
+
+    ptr = it8 ->Allocator.Block + it8 ->Allocator.Used;
+    it8 ->Allocator.Used += size;
+
+    return (void*) ptr;
+
+}
+
+
+// Allocates a string
+static
+char *AllocString(cmsIT8* it8, const char* str)
+{
+    cmsUInt32Number Size = (cmsUInt32Number) strlen(str)+1;
+    char *ptr;
+
+
+    ptr = (char *) AllocChunk(it8, Size);
+    if (ptr) strncpy (ptr, str, Size-1);
+
+    return ptr;
+}
+
+// Searches through linked list
+
+static
+cmsBool IsAvailableOnList(KEYVALUE* p, const char* Key, const char* Subkey, KEYVALUE** LastPtr)
+{
+    if (LastPtr) *LastPtr = p;
+
+    for (;  p != NULL; p = p->Next) {
+
+        if (LastPtr) *LastPtr = p;
+
+        if (*Key != '#') { // Comments are ignored
+
+            if (cmsstrcasecmp(Key, p->Keyword) == 0)
+                break;
+        }
+    }
+
+    if (p == NULL)
+        return FALSE;
+
+    if (Subkey == 0)
+        return TRUE;
+
+    for (; p != NULL; p = p->NextSubkey) {
+
+        if (p ->Subkey == NULL) continue;
+
+        if (LastPtr) *LastPtr = p;
+
+        if (cmsstrcasecmp(Subkey, p->Subkey) == 0)
+            return TRUE;
+    }
+
+    return FALSE;
+}
+
+
+
+// Add a property into a linked list
+static
+KEYVALUE* AddToList(cmsIT8* it8, KEYVALUE** Head, const char *Key, const char *Subkey, const char* xValue, WRITEMODE WriteAs)
+{
+    KEYVALUE* p;
+    KEYVALUE* last;
+
+
+    // Check if property is already in list
+
+    if (IsAvailableOnList(*Head, Key, Subkey, &p)) {
+
+        // This may work for editing properties
+
+        //     return SynError(it8, "duplicate key <%s>", Key);
+    }
+    else {
+
+        last = p;
+
+        // Allocate the container
+        p = (KEYVALUE*) AllocChunk(it8, sizeof(KEYVALUE));
+        if (p == NULL)
+        {
+            SynError(it8, "AddToList: out of memory");
+            return NULL;
+        }
+
+        // Store name and value
+        p->Keyword = AllocString(it8, Key);
+        p->Subkey = (Subkey == NULL) ? NULL : AllocString(it8, Subkey);
+
+        // Keep the container in our list
+        if (*Head == NULL) {
+            *Head = p;
+        }
+        else
+        {
+            if (Subkey != NULL && last != NULL) {
+
+                last->NextSubkey = p;
+
+                // If Subkey is not null, then last is the last property with the same key,
+                // but not necessarily is the last property in the list, so we need to move
+                // to the actual list end
+                while (last->Next != NULL)
+                         last = last->Next;
+            }
+
+            if (last != NULL) last->Next = p;
+        }
+
+        p->Next    = NULL;
+        p->NextSubkey = NULL;
+    }
+
+    p->WriteAs = WriteAs;
+
+    if (xValue != NULL) {
+
+        p->Value   = AllocString(it8, xValue);
+    }
+    else {
+        p->Value   = NULL;
+    }
+
+    return p;
+}
+
+static
+KEYVALUE* AddAvailableProperty(cmsIT8* it8, const char* Key, WRITEMODE as)
+{
+    return AddToList(it8, &it8->ValidKeywords, Key, NULL, NULL, as);
+}
+
+
+static
+KEYVALUE* AddAvailableSampleID(cmsIT8* it8, const char* Key)
+{
+    return AddToList(it8, &it8->ValidSampleID, Key, NULL, NULL, WRITE_UNCOOKED);
+}
+
+
+static
+void AllocTable(cmsIT8* it8)
+{
+    TABLE* t;
+
+    t = it8 ->Tab + it8 ->TablesCount;
+
+    t->HeaderList = NULL;
+    t->DataFormat = NULL;
+    t->Data       = NULL;
+
+    it8 ->TablesCount++;
+}
+
+
+cmsInt32Number CMSEXPORT cmsIT8SetTable(cmsHANDLE  IT8, cmsUInt32Number nTable)
+{
+     cmsIT8* it8 = (cmsIT8*) IT8;
+
+     if (nTable >= it8 ->TablesCount) {
+
+         if (nTable == it8 ->TablesCount) {
+
+             AllocTable(it8);
+         }
+         else {
+             SynError(it8, "Table %d is out of sequence", nTable);
+             return -1;
+         }
+     }
+
+     it8 ->nTable = nTable;
+
+     return (cmsInt32Number) nTable;
+}
+
+
+
+// Init an empty container
+cmsHANDLE  CMSEXPORT cmsIT8Alloc(cmsContext ContextID)
+{
+    cmsIT8* it8;
+    cmsUInt32Number i;
+
+    it8 = (cmsIT8*) _cmsMallocZero(ContextID, sizeof(cmsIT8));
+    if (it8 == NULL) return NULL;
+
+    AllocTable(it8);
+
+    it8->MemoryBlock = NULL;
+    it8->MemorySink  = NULL;
+
+    it8 ->nTable = 0;
+
+    it8->ContextID = ContextID;
+    it8->Allocator.Used = 0;
+    it8->Allocator.Block = NULL;
+    it8->Allocator.BlockSize = 0;
+
+    it8->ValidKeywords = NULL;
+    it8->ValidSampleID = NULL;
+
+    it8 -> sy = SUNDEFINED;
+    it8 -> ch = ' ';
+    it8 -> Source = NULL;
+    it8 -> inum = 0;
+    it8 -> dnum = 0.0;
+
+    it8->FileStack[0] = (FILECTX*)AllocChunk(it8, sizeof(FILECTX));
+    it8->IncludeSP   = 0;
+    it8 -> lineno = 1;
+
+    strcpy(it8->DoubleFormatter, DEFAULT_DBL_FORMAT);
+    cmsIT8SetSheetType((cmsHANDLE) it8, "CGATS.17");
+
+    // Initialize predefined properties & data
+
+    for (i=0; i < NUMPREDEFINEDPROPS; i++)
+            AddAvailableProperty(it8, PredefinedProperties[i].id, PredefinedProperties[i].as);
+
+    for (i=0; i < NUMPREDEFINEDSAMPLEID; i++)
+            AddAvailableSampleID(it8, PredefinedSampleID[i]);
+
+
+   return (cmsHANDLE) it8;
+}
+
+
+const char* CMSEXPORT cmsIT8GetSheetType(cmsHANDLE hIT8)
+{
+        return GetTable((cmsIT8*) hIT8)->SheetType;
+}
+
+cmsBool CMSEXPORT cmsIT8SetSheetType(cmsHANDLE hIT8, const char* Type)
+{
+        TABLE* t = GetTable((cmsIT8*) hIT8);
+
+        strncpy(t ->SheetType, Type, MAXSTR-1);
+        t ->SheetType[MAXSTR-1] = 0;
+        return TRUE;
+}
+
+cmsBool CMSEXPORT cmsIT8SetComment(cmsHANDLE hIT8, const char* Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    if (!Val) return FALSE;
+    if (!*Val) return FALSE;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, "# ", NULL, Val, WRITE_UNCOOKED) != NULL;
+}
+
+// Sets a property
+cmsBool CMSEXPORT cmsIT8SetPropertyStr(cmsHANDLE hIT8, const char* Key, const char *Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    if (!Val) return FALSE;
+    if (!*Val) return FALSE;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, Key, NULL, Val, WRITE_STRINGIFY) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyDbl(cmsHANDLE hIT8, const char* cProp, cmsFloat64Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buffer[1024];
+
+    snprintf(Buffer, 1023, it8->DoubleFormatter, Val);
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, cProp, NULL, Buffer, WRITE_UNCOOKED) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyHex(cmsHANDLE hIT8, const char* cProp, cmsUInt32Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buffer[1024];
+
+    snprintf(Buffer, 1023, "%u", Val);
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, cProp, NULL, Buffer, WRITE_HEXADECIMAL) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyUncooked(cmsHANDLE hIT8, const char* Key, const char* Buffer)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, Key, NULL, Buffer, WRITE_UNCOOKED) != NULL;
+}
+
+cmsBool CMSEXPORT cmsIT8SetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char* SubKey, const char *Buffer)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    return AddToList(it8, &GetTable(it8)->HeaderList, Key, SubKey, Buffer, WRITE_PAIR) != NULL;
+}
+
+// Gets a property
+const char* CMSEXPORT cmsIT8GetProperty(cmsHANDLE hIT8, const char* Key)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE* p;
+
+    if (IsAvailableOnList(GetTable(it8) -> HeaderList, Key, NULL, &p))
+    {
+        return p -> Value;
+    }
+    return NULL;
+}
+
+
+cmsFloat64Number CMSEXPORT cmsIT8GetPropertyDbl(cmsHANDLE hIT8, const char* cProp)
+{
+    const char *v = cmsIT8GetProperty(hIT8, cProp);
+
+    if (v == NULL) return 0.0;
+
+    return ParseFloatNumber(v);
+}
+
+const char* CMSEXPORT cmsIT8GetPropertyMulti(cmsHANDLE hIT8, const char* Key, const char *SubKey)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE* p;
+
+    if (IsAvailableOnList(GetTable(it8) -> HeaderList, Key, SubKey, &p)) {
+        return p -> Value;
+    }
+    return NULL;
+}
+
+// ----------------------------------------------------------------- Datasets
+
+
+static
+void AllocateDataFormat(cmsIT8* it8)
+{
+    TABLE* t = GetTable(it8);
+
+    if (t -> DataFormat) return;    // Already allocated
+
+    t -> nSamples  = (int) cmsIT8GetPropertyDbl(it8, "NUMBER_OF_FIELDS");
+
+    if (t -> nSamples <= 0) {
+
+        SynError(it8, "AllocateDataFormat: Unknown NUMBER_OF_FIELDS");
+        t -> nSamples = 10;
+        }
+
+    t -> DataFormat = (char**) AllocChunk (it8, ((cmsUInt32Number) t->nSamples + 1) * sizeof(char *));
+    if (t->DataFormat == NULL) {
+
+        SynError(it8, "AllocateDataFormat: Unable to allocate dataFormat array");
+    }
+
+}
+
+static
+const char *GetDataFormat(cmsIT8* it8, int n)
+{
+    TABLE* t = GetTable(it8);
+
+    if (t->DataFormat)
+        return t->DataFormat[n];
+
+    return NULL;
+}
+
+static
+cmsBool SetDataFormat(cmsIT8* it8, int n, const char *label)
+{
+    TABLE* t = GetTable(it8);
+
+    if (!t->DataFormat)
+        AllocateDataFormat(it8);
+
+    if (n > t -> nSamples) {
+        SynError(it8, "More than NUMBER_OF_FIELDS fields.");
+        return FALSE;
+    }
+
+    if (t->DataFormat) {
+        t->DataFormat[n] = AllocString(it8, label);
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataFormat(cmsHANDLE  h, int n, const char *Sample)
+{
+    cmsIT8* it8 = (cmsIT8*)h;
+    return SetDataFormat(it8, n, Sample);
+}
+
+static
+void AllocateDataSet(cmsIT8* it8)
+{
+    TABLE* t = GetTable(it8);
+
+    if (t -> Data) return;    // Already allocated
+
+    t-> nSamples   = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_FIELDS"));
+    t-> nPatches   = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_SETS"));
+
+    t-> Data = (char**)AllocChunk (it8, ((cmsUInt32Number) t->nSamples + 1) * ((cmsUInt32Number) t->nPatches + 1) *sizeof (char*));
+    if (t->Data == NULL) {
+
+        SynError(it8, "AllocateDataSet: Unable to allocate data array");
+    }
+
+}
+
+static
+char* GetData(cmsIT8* it8, int nSet, int nField)
+{
+    TABLE* t = GetTable(it8);
+    int nSamples    = t -> nSamples;
+    int nPatches    = t -> nPatches;
+
+    if (nSet >= nPatches || nField >= nSamples)
+        return NULL;
+
+    if (!t->Data) return NULL;
+    return t->Data [nSet * nSamples + nField];
+}
+
+static
+cmsBool SetData(cmsIT8* it8, int nSet, int nField, const char *Val)
+{
+    TABLE* t = GetTable(it8);
+
+    if (!t->Data)
+        AllocateDataSet(it8);
+
+    if (!t->Data) return FALSE;
+
+    if (nSet > t -> nPatches || nSet < 0) {
+
+            return SynError(it8, "Patch %d out of range, there are %d patches", nSet, t -> nPatches);
+    }
+
+    if (nField > t ->nSamples || nField < 0) {
+            return SynError(it8, "Sample %d out of range, there are %d samples", nField, t ->nSamples);
+
+    }
+
+    t->Data [nSet * t -> nSamples + nField] = AllocString(it8, Val);
+    return TRUE;
+}
+
+
+// --------------------------------------------------------------- File I/O
+
+
+// Writes a string to file
+static
+void WriteStr(SAVESTREAM* f, const char *str)
+{
+    cmsUInt32Number len;
+
+    if (str == NULL)
+        str = " ";
+
+    // Length to write
+    len = (cmsUInt32Number) strlen(str);
+    f ->Used += len;
+
+
+    if (f ->stream) {   // Should I write it to a file?
+
+        if (fwrite(str, 1, len, f->stream) != len) {
+            cmsSignalError(0, cmsERROR_WRITE, "Write to file error in CGATS parser");
+            return;
+        }
+
+    }
+    else {  // Or to a memory block?
+
+        if (f ->Base) {   // Am I just counting the bytes?
+
+            if (f ->Used > f ->Max) {
+
+                 cmsSignalError(0, cmsERROR_WRITE, "Write to memory overflows in CGATS parser");
+                 return;
+            }
+
+            memmove(f ->Ptr, str, len);
+            f->Ptr += len;
+        }
+
+    }
+}
+
+
+// Write formatted
+
+static
+void Writef(SAVESTREAM* f, const char* frm, ...)
+{
+    char Buffer[4096];
+    va_list args;
+
+    va_start(args, frm);
+    vsnprintf(Buffer, 4095, frm, args);
+    Buffer[4095] = 0;
+    WriteStr(f, Buffer);
+    va_end(args);
+
+}
+
+// Writes full header
+static
+void WriteHeader(cmsIT8* it8, SAVESTREAM* fp)
+{
+    KEYVALUE* p;
+    TABLE* t = GetTable(it8);
+
+    // Writes the type
+    WriteStr(fp, t->SheetType);
+    WriteStr(fp, "\n");
+
+    for (p = t->HeaderList; (p != NULL); p = p->Next)
+    {
+        if (*p ->Keyword == '#') {
+
+            char* Pt;
+
+            WriteStr(fp, "#\n# ");
+            for (Pt = p ->Value; *Pt; Pt++) {
+
+
+                Writef(fp, "%c", *Pt);
+
+                if (*Pt == '\n') {
+                    WriteStr(fp, "# ");
+                }
+            }
+
+            WriteStr(fp, "\n#\n");
+            continue;
+        }
+
+
+        if (!IsAvailableOnList(it8-> ValidKeywords, p->Keyword, NULL, NULL)) {
+
+#ifdef CMS_STRICT_CGATS
+            WriteStr(fp, "KEYWORD\t\"");
+            WriteStr(fp, p->Keyword);
+            WriteStr(fp, "\"\n");
+#endif
+
+            AddAvailableProperty(it8, p->Keyword, WRITE_UNCOOKED);
+        }
+
+        WriteStr(fp, p->Keyword);
+        if (p->Value) {
+
+            switch (p ->WriteAs) {
+
+            case WRITE_UNCOOKED:
+                    Writef(fp, "\t%s", p ->Value);
+                    break;
+
+            case WRITE_STRINGIFY:
+                    Writef(fp, "\t\"%s\"", p->Value );
+                    break;
+
+            case WRITE_HEXADECIMAL:
+                    Writef(fp, "\t0x%X", atoi(p ->Value));
+                    break;
+
+            case WRITE_BINARY:
+                    Writef(fp, "\t0x%B", atoi(p ->Value));
+                    break;
+
+            case WRITE_PAIR:
+                    Writef(fp, "\t\"%s,%s\"", p->Subkey, p->Value);
+                    break;
+
+            default: SynError(it8, "Unknown write mode %d", p ->WriteAs);
+                     return;
+            }
+        }
+
+        WriteStr (fp, "\n");
+    }
+
+}
+
+
+// Writes the data format
+static
+void WriteDataFormat(SAVESTREAM* fp, cmsIT8* it8)
+{
+    int i, nSamples;
+    TABLE* t = GetTable(it8);
+
+    if (!t -> DataFormat) return;
+
+       WriteStr(fp, "BEGIN_DATA_FORMAT\n");
+       WriteStr(fp, " ");
+       nSamples = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_FIELDS"));
+
+       for (i = 0; i < nSamples; i++) {
+
+              WriteStr(fp, t->DataFormat[i]);
+              WriteStr(fp, ((i == (nSamples-1)) ? "\n" : "\t"));
+          }
+
+       WriteStr (fp, "END_DATA_FORMAT\n");
+}
+
+
+// Writes data array
+static
+void WriteData(SAVESTREAM* fp, cmsIT8* it8)
+{
+       int  i, j;
+       TABLE* t = GetTable(it8);
+
+       if (!t->Data) return;
+
+       WriteStr (fp, "BEGIN_DATA\n");
+
+       t->nPatches = atoi(cmsIT8GetProperty(it8, "NUMBER_OF_SETS"));
+
+       for (i = 0; i < t-> nPatches; i++) {
+
+              WriteStr(fp, " ");
+
+              for (j = 0; j < t->nSamples; j++) {
+
+                     char *ptr = t->Data[i*t->nSamples+j];
+
+                     if (ptr == NULL) WriteStr(fp, "\"\"");
+                     else {
+                         // If value contains whitespace, enclose within quote
+
+                         if (strchr(ptr, ' ') != NULL) {
+
+                             WriteStr(fp, "\"");
+                             WriteStr(fp, ptr);
+                             WriteStr(fp, "\"");
+                         }
+                         else
+                            WriteStr(fp, ptr);
+                     }
+
+                     WriteStr(fp, ((j == (t->nSamples-1)) ? "\n" : "\t"));
+              }
+       }
+       WriteStr (fp, "END_DATA\n");
+}
+
+
+
+// Saves whole file
+cmsBool CMSEXPORT cmsIT8SaveToFile(cmsHANDLE hIT8, const char* cFileName)
+{
+    SAVESTREAM sd;
+    cmsUInt32Number i;
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    memset(&sd, 0, sizeof(sd));
+
+    sd.stream = fopen(cFileName, "wt");
+    if (!sd.stream) return FALSE;
+
+    for (i=0; i < it8 ->TablesCount; i++) {
+
+            cmsIT8SetTable(hIT8, i);
+            WriteHeader(it8, &sd);
+            WriteDataFormat(&sd, it8);
+            WriteData(&sd, it8);
+    }
+
+    if (fclose(sd.stream) != 0) return FALSE;
+
+    return TRUE;
+}
+
+
+// Saves to memory
+cmsBool CMSEXPORT cmsIT8SaveToMem(cmsHANDLE hIT8, void *MemPtr, cmsUInt32Number* BytesNeeded)
+{
+    SAVESTREAM sd;
+    cmsUInt32Number i;
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    memset(&sd, 0, sizeof(sd));
+
+    sd.stream = NULL;
+    sd.Base   = (cmsUInt8Number*)  MemPtr;
+    sd.Ptr    = sd.Base;
+
+    sd.Used = 0;
+
+    if (sd.Base)
+        sd.Max  = *BytesNeeded;     // Write to memory?
+    else
+        sd.Max  = 0;                // Just counting the needed bytes
+
+    for (i=0; i < it8 ->TablesCount; i++) {
+
+        cmsIT8SetTable(hIT8, i);
+        WriteHeader(it8, &sd);
+        WriteDataFormat(&sd, it8);
+        WriteData(&sd, it8);
+    }
+
+    sd.Used++;  // The \0 at the very end
+
+    if (sd.Base)
+        *sd.Ptr = 0;
+
+    *BytesNeeded = sd.Used;
+
+    return TRUE;
+}
+
+
+// -------------------------------------------------------------- Higher level parsing
+
+static
+cmsBool DataFormatSection(cmsIT8* it8)
+{
+    int iField = 0;
+    TABLE* t = GetTable(it8);
+
+    InSymbol(it8);   // Eats "BEGIN_DATA_FORMAT"
+    CheckEOLN(it8);
+
+    while (it8->sy != SEND_DATA_FORMAT &&
+        it8->sy != SEOLN &&
+        it8->sy != SEOF &&
+        it8->sy != SSYNERROR)  {
+
+            if (it8->sy != SIDENT) {
+
+                return SynError(it8, "Sample type expected");
+            }
+
+            if (!SetDataFormat(it8, iField, it8->id)) return FALSE;
+            iField++;
+
+            InSymbol(it8);
+            SkipEOLN(it8);
+       }
+
+       SkipEOLN(it8);
+       Skip(it8, SEND_DATA_FORMAT);
+       SkipEOLN(it8);
+
+       if (iField != t ->nSamples) {
+           SynError(it8, "Count mismatch. NUMBER_OF_FIELDS was %d, found %d\n", t ->nSamples, iField);
+
+
+       }
+
+       return TRUE;
+}
+
+
+
+static
+cmsBool DataSection (cmsIT8* it8)
+{
+    int  iField = 0;
+    int  iSet   = 0;
+    char Buffer[256];
+    TABLE* t = GetTable(it8);
+
+    InSymbol(it8);   // Eats "BEGIN_DATA"
+    CheckEOLN(it8);
+
+    if (!t->Data)
+        AllocateDataSet(it8);
+
+    while (it8->sy != SEND_DATA && it8->sy != SEOF)
+    {
+        if (iField >= t -> nSamples) {
+            iField = 0;
+            iSet++;
+
+        }
+
+        if (it8->sy != SEND_DATA && it8->sy != SEOF) {
+
+            if (!GetVal(it8, Buffer, 255, "Sample data expected"))
+                return FALSE;
+
+            if (!SetData(it8, iSet, iField, Buffer))
+                return FALSE;
+
+            iField++;
+
+            InSymbol(it8);
+            SkipEOLN(it8);
+        }
+    }
+
+    SkipEOLN(it8);
+    Skip(it8, SEND_DATA);
+    SkipEOLN(it8);
+
+    // Check for data completion.
+
+    if ((iSet+1) != t -> nPatches)
+        return SynError(it8, "Count mismatch. NUMBER_OF_SETS was %d, found %d\n", t ->nPatches, iSet+1);
+
+    return TRUE;
+}
+
+
+
+
+static
+cmsBool HeaderSection(cmsIT8* it8)
+{
+    char VarName[MAXID];
+    char Buffer[MAXSTR];
+    KEYVALUE* Key;
+
+        while (it8->sy != SEOF &&
+               it8->sy != SSYNERROR &&
+               it8->sy != SBEGIN_DATA_FORMAT &&
+               it8->sy != SBEGIN_DATA) {
+
+
+        switch (it8 -> sy) {
+
+        case SKEYWORD:
+                InSymbol(it8);
+                if (!GetVal(it8, Buffer, MAXSTR-1, "Keyword expected")) return FALSE;
+                if (!AddAvailableProperty(it8, Buffer, WRITE_UNCOOKED)) return FALSE;
+                InSymbol(it8);
+                break;
+
+
+        case SDATA_FORMAT_ID:
+                InSymbol(it8);
+                if (!GetVal(it8, Buffer, MAXSTR-1, "Keyword expected")) return FALSE;
+                if (!AddAvailableSampleID(it8, Buffer)) return FALSE;
+                InSymbol(it8);
+                break;
+
+
+        case SIDENT:
+            strncpy(VarName, it8->id, MAXID - 1);
+            VarName[MAXID - 1] = 0;
+
+            if (!IsAvailableOnList(it8->ValidKeywords, VarName, NULL, &Key)) {
+
+#ifdef CMS_STRICT_CGATS
+                return SynError(it8, "Undefined keyword '%s'", VarName);
+#else
+                Key = AddAvailableProperty(it8, VarName, WRITE_UNCOOKED);
+                if (Key == NULL) return FALSE;
+#endif
+            }
+
+            InSymbol(it8);
+            if (!GetVal(it8, Buffer, MAXSTR - 1, "Property data expected")) return FALSE;
+
+            if (Key->WriteAs != WRITE_PAIR) {
+                AddToList(it8, &GetTable(it8)->HeaderList, VarName, NULL, Buffer,
+                    (it8->sy == SSTRING) ? WRITE_STRINGIFY : WRITE_UNCOOKED);
+            }
+            else {
+                const char *Subkey;
+                char *Nextkey;
+                if (it8->sy != SSTRING)
+                    return SynError(it8, "Invalid value '%s' for property '%s'.", Buffer, VarName);
+
+                // chop the string as a list of "subkey, value" pairs, using ';' as a separator
+                for (Subkey = Buffer; Subkey != NULL; Subkey = Nextkey)
+                {
+                    char *Value, *temp;
+
+                    //  identify token pair boundary
+                    Nextkey = (char*)strchr(Subkey, ';');
+                    if (Nextkey)
+                        *Nextkey++ = '\0';
+
+                    // for each pair, split the subkey and the value
+                    Value = (char*)strrchr(Subkey, ',');
+                    if (Value == NULL)
+                        return SynError(it8, "Invalid value for property '%s'.", VarName);
+
+                    // gobble the spaces before the coma, and the coma itself
+                    temp = Value++;
+                    do *temp-- = '\0'; while (temp >= Subkey && *temp == ' ');
+
+                    // gobble any space at the right
+                    temp = Value + strlen(Value) - 1;
+                    while (*temp == ' ') *temp-- = '\0';
+
+                    // trim the strings from the left
+                    Subkey += strspn(Subkey, " ");
+                    Value += strspn(Value, " ");
+
+                    if (Subkey[0] == 0 || Value[0] == 0)
+                        return SynError(it8, "Invalid value for property '%s'.", VarName);
+                    AddToList(it8, &GetTable(it8)->HeaderList, VarName, Subkey, Value, WRITE_PAIR);
+                }
+            }
+
+            InSymbol(it8);
+            break;
+
+
+        case SEOLN: break;
+
+        default:
+                return SynError(it8, "expected keyword or identifier");
+        }
+
+    SkipEOLN(it8);
+    }
+
+    return TRUE;
+
+}
+
+
+static
+void ReadType(cmsIT8* it8, char* SheetTypePtr)
+{
+    cmsInt32Number cnt = 0;
+
+    // First line is a very special case.
+
+    while (isseparator(it8->ch))
+            NextCh(it8);
+
+    while (it8->ch != '\r' && it8 ->ch != '\n' && it8->ch != '\t' && it8 -> ch != 0) {
+
+        if (cnt++ < MAXSTR) 
+            *SheetTypePtr++= (char) it8 ->ch;
+        NextCh(it8);
+    }
+
+    *SheetTypePtr = 0;
+}
+
+
+static
+cmsBool ParseIT8(cmsIT8* it8, cmsBool nosheet)
+{
+    char* SheetTypePtr = it8 ->Tab[0].SheetType;
+
+    if (nosheet == 0) {
+        ReadType(it8, SheetTypePtr);
+    }
+
+    InSymbol(it8);
+
+    SkipEOLN(it8);
+
+    while (it8-> sy != SEOF &&
+           it8-> sy != SSYNERROR) {
+
+            switch (it8 -> sy) {
+
+            case SBEGIN_DATA_FORMAT:
+                    if (!DataFormatSection(it8)) return FALSE;
+                    break;
+
+            case SBEGIN_DATA:
+
+                    if (!DataSection(it8)) return FALSE;
+
+                    if (it8 -> sy != SEOF) {
+
+                            AllocTable(it8);
+                            it8 ->nTable = it8 ->TablesCount - 1;
+
+                            // Read sheet type if present. We only support identifier and string.
+                            // <ident> <eoln> is a type string
+                            // anything else, is not a type string
+                            if (nosheet == 0) {
+
+                                if (it8 ->sy == SIDENT) {
+
+                                    // May be a type sheet or may be a prop value statement. We cannot use insymbol in
+                                    // this special case...
+                                     while (isseparator(it8->ch))
+                                         NextCh(it8);
+
+                                     // If a newline is found, then this is a type string
+                                    if (it8 ->ch == '\n' || it8->ch == '\r') {
+
+                                         cmsIT8SetSheetType(it8, it8 ->id);
+                                         InSymbol(it8);
+                                    }
+                                    else
+                                    {
+                                        // It is not. Just continue
+                                        cmsIT8SetSheetType(it8, "");
+                                    }
+                                }
+                                else
+                                    // Validate quoted strings
+                                    if (it8 ->sy == SSTRING) {
+                                        cmsIT8SetSheetType(it8, it8 ->str);
+                                        InSymbol(it8);
+                                    }
+                           }
+
+                    }
+                    break;
+
+            case SEOLN:
+                    SkipEOLN(it8);
+                    break;
+
+            default:
+                    if (!HeaderSection(it8)) return FALSE;
+           }
+
+    }
+
+    return (it8 -> sy != SSYNERROR);
+}
+
+
+
+// Init useful pointers
+
+static
+void CookPointers(cmsIT8* it8)
+{
+    int idField, i;
+    char* Fld;
+    cmsUInt32Number j;
+    cmsUInt32Number nOldTable = it8 ->nTable;
+
+    for (j=0; j < it8 ->TablesCount; j++) {
+
+    TABLE* t = it8 ->Tab + j;
+
+    t -> SampleID = 0;
+    it8 ->nTable = j;
+
+    for (idField = 0; idField < t -> nSamples; idField++)
+    {
+        if (t ->DataFormat == NULL){
+            SynError(it8, "Undefined DATA_FORMAT");
+            return;
+        }
+
+        Fld = t->DataFormat[idField];
+        if (!Fld) continue;
+
+
+        if (cmsstrcasecmp(Fld, "SAMPLE_ID") == 0) {
+
+            t -> SampleID = idField;
+
+            for (i=0; i < t -> nPatches; i++) {
+
+                char *Data = GetData(it8, i, idField);
+                if (Data) {
+                    char Buffer[256];
+
+                    strncpy(Buffer, Data, 255);
+                    Buffer[255] = 0;
+
+                    if (strlen(Buffer) <= strlen(Data))
+                        strcpy(Data, Buffer);
+                    else
+                        SetData(it8, i, idField, Buffer);
+
+                }
+            }
+
+        }
+
+        // "LABEL" is an extension. It keeps references to forward tables
+
+        if ((cmsstrcasecmp(Fld, "LABEL") == 0) || Fld[0] == '$' ) {
+
+                    // Search for table references...
+                    for (i=0; i < t -> nPatches; i++) {
+
+                            char *Label = GetData(it8, i, idField);
+
+                            if (Label) {
+
+                                cmsUInt32Number k;
+
+                                // This is the label, search for a table containing
+                                // this property
+
+                                for (k=0; k < it8 ->TablesCount; k++) {
+
+                                    TABLE* Table = it8 ->Tab + k;
+                                    KEYVALUE* p;
+
+                                    if (IsAvailableOnList(Table->HeaderList, Label, NULL, &p)) {
+
+                                        // Available, keep type and table
+                                        char Buffer[256];
+
+                                        char *Type  = p ->Value;
+                                        int  nTable = (int) k;
+
+                                        snprintf(Buffer, 255, "%s %d %s", Label, nTable, Type );
+
+                                        SetData(it8, i, idField, Buffer);
+                                    }
+                                }
+
+
+                            }
+
+                    }
+
+
+        }
+
+    }
+    }
+
+    it8 ->nTable = nOldTable;
+}
+
+// Try to infere if the file is a CGATS/IT8 file at all. Read first line
+// that should be something like some printable characters plus a \n
+// returns 0 if this is not like a CGATS, or an integer otherwise. This integer is the number of words in first line?
+static
+int IsMyBlock(const cmsUInt8Number* Buffer, cmsUInt32Number n)
+{
+    int words = 1, space = 0, quot = 0;
+    cmsUInt32Number i;
+
+    if (n < 10) return 0;   // Too small
+
+    if (n > 132)
+        n = 132;
+
+    for (i = 1; i < n; i++) {
+
+        switch(Buffer[i])
+        {
+        case '\n':
+        case '\r':
+            return ((quot == 1) || (words > 2)) ? 0 : words;
+        case '\t':
+        case ' ':
+            if(!quot && !space)
+                space = 1;
+            break;
+        case '\"':
+            quot = !quot;
+            break;
+        default:
+            if (Buffer[i] < 32) return 0;
+            if (Buffer[i] > 127) return 0;
+            words += space;
+            space = 0;
+            break;
+        }
+    }
+
+    return 0;
+}
+
+
+static
+cmsBool IsMyFile(const char* FileName)
+{
+   FILE *fp;
+   cmsUInt32Number Size;
+   cmsUInt8Number Ptr[133];
+
+   fp = fopen(FileName, "rt");
+   if (!fp) {
+       cmsSignalError(0, cmsERROR_FILE, "File '%s' not found", FileName);
+       return FALSE;
+   }
+
+   Size = (cmsUInt32Number) fread(Ptr, 1, 132, fp);
+
+   if (fclose(fp) != 0)
+       return FALSE;
+
+   Ptr[Size] = '\0';
+
+   return IsMyBlock(Ptr, Size);
+}
+
+// ---------------------------------------------------------- Exported routines
+
+
+cmsHANDLE  CMSEXPORT cmsIT8LoadFromMem(cmsContext ContextID, const void *Ptr, cmsUInt32Number len)
+{
+    cmsHANDLE hIT8;
+    cmsIT8*  it8;
+    int type;
+
+    _cmsAssert(Ptr != NULL);
+    _cmsAssert(len != 0);
+
+    type = IsMyBlock((const cmsUInt8Number*)Ptr, len);
+    if (type == 0) return NULL;
+
+    hIT8 = cmsIT8Alloc(ContextID);
+    if (!hIT8) return NULL;
+
+    it8 = (cmsIT8*) hIT8;
+    it8 ->MemoryBlock = (char*) _cmsMalloc(ContextID, len + 1);
+
+    strncpy(it8 ->MemoryBlock, (const char*) Ptr, len);
+    it8 ->MemoryBlock[len] = 0;
+
+    strncpy(it8->FileStack[0]->FileName, "", cmsMAX_PATH-1);
+    it8-> Source = it8 -> MemoryBlock;
+
+    if (!ParseIT8(it8, type-1)) {
+
+        cmsIT8Free(hIT8);
+        return FALSE;
+    }
+
+    CookPointers(it8);
+    it8 ->nTable = 0;
+
+    _cmsFree(ContextID, it8->MemoryBlock);
+    it8 -> MemoryBlock = NULL;
+
+    return hIT8;
+
+
+}
+
+
+cmsHANDLE  CMSEXPORT cmsIT8LoadFromFile(cmsContext ContextID, const char* cFileName)
+{
+
+     cmsHANDLE hIT8;
+     cmsIT8*  it8;
+     int type;
+
+     _cmsAssert(cFileName != NULL);
+
+     type = IsMyFile(cFileName);
+     if (type == 0) return NULL;
+
+     hIT8 = cmsIT8Alloc(ContextID);
+     it8 = (cmsIT8*) hIT8;
+     if (!hIT8) return NULL;
+
+
+     it8 ->FileStack[0]->Stream = fopen(cFileName, "rt");
+
+     if (!it8 ->FileStack[0]->Stream) {
+         cmsIT8Free(hIT8);
+         return NULL;
+     }
+
+
+    strncpy(it8->FileStack[0]->FileName, cFileName, cmsMAX_PATH-1);
+    it8->FileStack[0]->FileName[cmsMAX_PATH-1] = 0;
+
+    if (!ParseIT8(it8, type-1)) {
+
+            fclose(it8 ->FileStack[0]->Stream);
+            cmsIT8Free(hIT8);
+            return NULL;
+    }
+
+    CookPointers(it8);
+    it8 ->nTable = 0;
+
+    if (fclose(it8 ->FileStack[0]->Stream)!= 0) {
+            cmsIT8Free(hIT8);
+            return NULL;
+    }
+
+    return hIT8;
+
+}
+
+int CMSEXPORT cmsIT8EnumDataFormat(cmsHANDLE hIT8, char ***SampleNames)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+
+    if (SampleNames)
+        *SampleNames = t -> DataFormat;
+    return t -> nSamples;
+}
+
+
+cmsUInt32Number CMSEXPORT cmsIT8EnumProperties(cmsHANDLE hIT8, char ***PropertyNames)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE* p;
+    cmsUInt32Number n;
+    char **Props;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+
+    // Pass#1 - count properties
+
+    n = 0;
+    for (p = t -> HeaderList;  p != NULL; p = p->Next) {
+        n++;
+    }
+
+
+    Props = (char **) AllocChunk(it8, sizeof(char *) * n);
+
+    // Pass#2 - Fill pointers
+    n = 0;
+    for (p = t -> HeaderList;  p != NULL; p = p->Next) {
+        Props[n++] = p -> Keyword;
+    }
+
+    *PropertyNames = Props;
+    return n;
+}
+
+cmsUInt32Number CMSEXPORT cmsIT8EnumPropertyMulti(cmsHANDLE hIT8, const char* cProp, const char ***SubpropertyNames)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    KEYVALUE *p, *tmp;
+    cmsUInt32Number n;
+    const char **Props;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+
+    t = GetTable(it8);
+
+    if(!IsAvailableOnList(t->HeaderList, cProp, NULL, &p)) {
+        *SubpropertyNames = 0;
+        return 0;
+    }
+
+    // Pass#1 - count properties
+
+    n = 0;
+    for (tmp = p;  tmp != NULL; tmp = tmp->NextSubkey) {
+        if(tmp->Subkey != NULL)
+            n++;
+    }
+
+
+    Props = (const char **) AllocChunk(it8, sizeof(char *) * n);
+
+    // Pass#2 - Fill pointers
+    n = 0;
+    for (tmp = p;  tmp != NULL; tmp = tmp->NextSubkey) {
+        if(tmp->Subkey != NULL)
+            Props[n++] = p ->Subkey;
+    }
+
+    *SubpropertyNames = Props;
+    return n;
+}
+
+static
+int LocatePatch(cmsIT8* it8, const char* cPatch)
+{
+    int i;
+    const char *data;
+    TABLE* t = GetTable(it8);
+
+    for (i=0; i < t-> nPatches; i++) {
+
+        data = GetData(it8, i, t->SampleID);
+
+        if (data != NULL) {
+
+                if (cmsstrcasecmp(data, cPatch) == 0)
+                        return i;
+                }
+        }
+
+        // SynError(it8, "Couldn't find patch '%s'\n", cPatch);
+        return -1;
+}
+
+
+static
+int LocateEmptyPatch(cmsIT8* it8)
+{
+    int i;
+    const char *data;
+    TABLE* t = GetTable(it8);
+
+    for (i=0; i < t-> nPatches; i++) {
+
+        data = GetData(it8, i, t->SampleID);
+
+        if (data == NULL)
+            return i;
+
+    }
+
+    return -1;
+}
+
+static
+int LocateSample(cmsIT8* it8, const char* cSample)
+{
+    int i;
+    const char *fld;
+    TABLE* t = GetTable(it8);
+
+    for (i=0; i < t->nSamples; i++) {
+
+        fld = GetDataFormat(it8, i);
+        if (fld != NULL) {
+            if (cmsstrcasecmp(fld, cSample) == 0)
+                return i;
+        }
+    }
+
+    return -1;
+
+}
+
+
+int CMSEXPORT cmsIT8FindDataFormat(cmsHANDLE hIT8, const char* cSample)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return LocateSample(it8, cSample);
+}
+
+
+
+const char* CMSEXPORT cmsIT8GetDataRowCol(cmsHANDLE hIT8, int row, int col)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return GetData(it8, row, col);
+}
+
+
+cmsFloat64Number CMSEXPORT cmsIT8GetDataRowColDbl(cmsHANDLE hIT8, int row, int col)
+{
+    const char* Buffer;
+
+    Buffer = cmsIT8GetDataRowCol(hIT8, row, col);
+
+    if (Buffer == NULL) return 0.0;
+
+    return ParseFloatNumber(Buffer);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataRowCol(cmsHANDLE hIT8, int row, int col, const char* Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return SetData(it8, row, col, Val);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataRowColDbl(cmsHANDLE hIT8, int row, int col, cmsFloat64Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buff[256];
+
+    _cmsAssert(hIT8 != NULL);
+
+    snprintf(Buff, 255, it8->DoubleFormatter, Val);
+
+    return SetData(it8, row, col, Buff);
+}
+
+
+
+const char* CMSEXPORT cmsIT8GetData(cmsHANDLE hIT8, const char* cPatch, const char* cSample)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    int iField, iSet;
+
+    _cmsAssert(hIT8 != NULL);
+
+    iField = LocateSample(it8, cSample);
+    if (iField < 0) {
+        return NULL;
+    }
+
+    iSet = LocatePatch(it8, cPatch);
+    if (iSet < 0) {
+            return NULL;
+    }
+
+    return GetData(it8, iSet, iField);
+}
+
+
+cmsFloat64Number CMSEXPORT cmsIT8GetDataDbl(cmsHANDLE  it8, const char* cPatch, const char* cSample)
+{
+    const char* Buffer;
+
+    Buffer = cmsIT8GetData(it8, cPatch, cSample);
+
+    return ParseFloatNumber(Buffer);
+}
+
+
+
+cmsBool CMSEXPORT cmsIT8SetData(cmsHANDLE hIT8, const char* cPatch, const char* cSample, const char *Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    int iField, iSet;
+    TABLE* t;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+
+    iField = LocateSample(it8, cSample);
+
+    if (iField < 0)
+        return FALSE;
+
+    if (t-> nPatches == 0) {
+
+        AllocateDataFormat(it8);
+        AllocateDataSet(it8);
+        CookPointers(it8);
+    }
+
+    if (cmsstrcasecmp(cSample, "SAMPLE_ID") == 0) {
+
+        iSet   = LocateEmptyPatch(it8);
+        if (iSet < 0) {
+            return SynError(it8, "Couldn't add more patches '%s'\n", cPatch);
+        }
+
+        iField = t -> SampleID;
+    }
+    else {
+        iSet = LocatePatch(it8, cPatch);
+        if (iSet < 0) {
+            return FALSE;
+        }
+    }
+
+    return SetData(it8, iSet, iField, Val);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetDataDbl(cmsHANDLE hIT8, const char* cPatch,
+                                   const char* cSample,
+                                   cmsFloat64Number Val)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    char Buff[256];
+
+    _cmsAssert(hIT8 != NULL);
+
+    snprintf(Buff, 255, it8->DoubleFormatter, Val);
+    return cmsIT8SetData(hIT8, cPatch, cSample, Buff);
+}
+
+// Buffer should get MAXSTR at least
+
+const char* CMSEXPORT cmsIT8GetPatchName(cmsHANDLE hIT8, int nPatch, char* buffer)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    TABLE* t;
+    char* Data;
+
+    _cmsAssert(hIT8 != NULL);
+
+    t = GetTable(it8);
+    Data = GetData(it8, nPatch, t->SampleID);
+
+    if (!Data) return NULL;
+    if (!buffer) return Data;
+
+    strncpy(buffer, Data, MAXSTR-1);
+    buffer[MAXSTR-1] = 0;
+    return buffer;
+}
+
+int CMSEXPORT cmsIT8GetPatchByName(cmsHANDLE hIT8, const char *cPatch)
+{
+    _cmsAssert(hIT8 != NULL);
+
+    return LocatePatch((cmsIT8*)hIT8, cPatch);
+}
+
+cmsUInt32Number CMSEXPORT cmsIT8TableCount(cmsHANDLE hIT8)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    return it8 ->TablesCount;
+}
+
+// This handles the "LABEL" extension.
+// Label, nTable, Type
+
+int CMSEXPORT cmsIT8SetTableByLabel(cmsHANDLE hIT8, const char* cSet, const char* cField, const char* ExpectedType)
+{
+    const char* cLabelFld;
+    char Type[256], Label[256];
+    cmsUInt32Number nTable;
+
+    _cmsAssert(hIT8 != NULL);
+
+    if (cField != NULL && *cField == 0)
+            cField = "LABEL";
+
+    if (cField == NULL)
+            cField = "LABEL";
+
+    cLabelFld = cmsIT8GetData(hIT8, cSet, cField);
+    if (!cLabelFld) return -1;
+
+    if (sscanf(cLabelFld, "%255s %u %255s", Label, &nTable, Type) != 3)
+            return -1;
+
+    if (ExpectedType != NULL && *ExpectedType == 0)
+        ExpectedType = NULL;
+
+    if (ExpectedType) {
+
+        if (cmsstrcasecmp(Type, ExpectedType) != 0) return -1;
+    }
+
+    return cmsIT8SetTable(hIT8, nTable);
+}
+
+
+cmsBool CMSEXPORT cmsIT8SetIndexColumn(cmsHANDLE hIT8, const char* cSample)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+    int pos;
+
+    _cmsAssert(hIT8 != NULL);
+
+    pos = LocateSample(it8, cSample);
+    if(pos == -1)
+        return FALSE;
+
+    it8->Tab[it8->nTable].SampleID = pos;
+    return TRUE;
+}
+
+
+void CMSEXPORT cmsIT8DefineDblFormat(cmsHANDLE hIT8, const char* Formatter)
+{
+    cmsIT8* it8 = (cmsIT8*) hIT8;
+
+    _cmsAssert(hIT8 != NULL);
+
+    if (Formatter == NULL)
+        strcpy(it8->DoubleFormatter, DEFAULT_DBL_FORMAT);
+    else
+        strncpy(it8->DoubleFormatter, Formatter, sizeof(it8->DoubleFormatter));
+
+    it8 ->DoubleFormatter[sizeof(it8 ->DoubleFormatter)-1] = 0;
+}
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmscnvrt.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmscnvrt.cpp
new file mode 100755
index 0000000000..082f1f8119
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmscnvrt.cpp
@@ -0,0 +1,1162 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// Link several profiles to obtain a single LUT modelling the whole color transform. Intents, Black point
+// compensation and Adaptation parameters may vary across profiles. BPC and Adaptation refers to the PCS
+// after the profile. I.e, BPC[0] refers to connexion between profile(0) and profile(1)
+cmsPipeline* _cmsLinkProfiles(cmsContext     ContextID,
+                              cmsUInt32Number nProfiles,
+                              cmsUInt32Number Intents[],
+                              cmsHPROFILE     hProfiles[],
+                              cmsBool         BPC[],
+                              cmsFloat64Number AdaptationStates[],
+                              cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+// This is the default routine for ICC-style intents. A user may decide to override it by using a plugin.
+// Supported intents are perceptual, relative colorimetric, saturation and ICC-absolute colorimetric
+static
+cmsPipeline* DefaultICCintents(cmsContext     ContextID,
+                               cmsUInt32Number nProfiles,
+                               cmsUInt32Number Intents[],
+                               cmsHPROFILE     hProfiles[],
+                               cmsBool         BPC[],
+                               cmsFloat64Number AdaptationStates[],
+                               cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+// This is the entry for black-preserving K-only intents, which are non-ICC. Last profile have to be a output profile
+// to do the trick (no devicelinks allowed at that position)
+static
+cmsPipeline*  BlackPreservingKOnlyIntents(cmsContext     ContextID,
+                                          cmsUInt32Number nProfiles,
+                                          cmsUInt32Number Intents[],
+                                          cmsHPROFILE     hProfiles[],
+                                          cmsBool         BPC[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+// This is the entry for black-plane preserving, which are non-ICC. Again, Last profile have to be a output profile
+// to do the trick (no devicelinks allowed at that position)
+static
+cmsPipeline*  BlackPreservingKPlaneIntents(cmsContext     ContextID,
+                                           cmsUInt32Number nProfiles,
+                                           cmsUInt32Number Intents[],
+                                           cmsHPROFILE     hProfiles[],
+                                           cmsBool         BPC[],
+                                           cmsFloat64Number AdaptationStates[],
+                                           cmsUInt32Number dwFlags);
+
+//---------------------------------------------------------------------------------
+
+
+// This is a structure holding implementations for all supported intents.
+typedef struct _cms_intents_list {
+
+    cmsUInt32Number Intent;
+    char            Description[256];
+    cmsIntentFn     Link;
+    struct _cms_intents_list*  Next;
+
+} cmsIntentsList;
+
+
+// Built-in intents
+static cmsIntentsList DefaultIntents[] = {
+
+    { INTENT_PERCEPTUAL,                            "Perceptual",                                   DefaultICCintents,            &DefaultIntents[1] },
+    { INTENT_RELATIVE_COLORIMETRIC,                 "Relative colorimetric",                        DefaultICCintents,            &DefaultIntents[2] },
+    { INTENT_SATURATION,                            "Saturation",                                   DefaultICCintents,            &DefaultIntents[3] },
+    { INTENT_ABSOLUTE_COLORIMETRIC,                 "Absolute colorimetric",                        DefaultICCintents,            &DefaultIntents[4] },
+    { INTENT_PRESERVE_K_ONLY_PERCEPTUAL,            "Perceptual preserving black ink",              BlackPreservingKOnlyIntents,  &DefaultIntents[5] },
+    { INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC, "Relative colorimetric preserving black ink",   BlackPreservingKOnlyIntents,  &DefaultIntents[6] },
+    { INTENT_PRESERVE_K_ONLY_SATURATION,            "Saturation preserving black ink",              BlackPreservingKOnlyIntents,  &DefaultIntents[7] },
+    { INTENT_PRESERVE_K_PLANE_PERCEPTUAL,           "Perceptual preserving black plane",            BlackPreservingKPlaneIntents, &DefaultIntents[8] },
+    { INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC,"Relative colorimetric preserving black plane", BlackPreservingKPlaneIntents, &DefaultIntents[9] },
+    { INTENT_PRESERVE_K_PLANE_SATURATION,           "Saturation preserving black plane",            BlackPreservingKPlaneIntents, NULL }
+};
+
+
+// A pointer to the beginning of the list
+_cmsIntentsPluginChunkType _cmsIntentsPluginChunk = { NULL };
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginIntentsList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsIntentsPluginChunkType newHead = { NULL };
+   cmsIntentsList*  entry;
+   cmsIntentsList*  Anterior = NULL;
+   _cmsIntentsPluginChunkType* head = (_cmsIntentsPluginChunkType*) src->chunks[IntentPlugin];
+
+    // Walk the list copying all nodes
+   for (entry = head->Intents;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            cmsIntentsList *newEntry = ( cmsIntentsList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(cmsIntentsList));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.Intents == NULL)
+                newHead.Intents = newEntry;
+    }
+
+  ctx ->chunks[IntentPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsIntentsPluginChunkType));
+}
+
+void  _cmsAllocIntentsPluginChunk(struct _cmsContext_struct* ctx, 
+                                         const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+
+        // Copy all linked list
+        DupPluginIntentsList(ctx, src);
+    }
+    else {
+        static _cmsIntentsPluginChunkType IntentsPluginChunkType = { NULL };
+        ctx ->chunks[IntentPlugin] = _cmsSubAllocDup(ctx ->MemPool, &IntentsPluginChunkType, sizeof(_cmsIntentsPluginChunkType));
+    }
+}
+
+
+// Search the list for a suitable intent. Returns NULL if not found
+static
+cmsIntentsList* SearchIntent(cmsContext ContextID, cmsUInt32Number Intent)
+{
+    _cmsIntentsPluginChunkType* ctx = ( _cmsIntentsPluginChunkType*) _cmsContextGetClientChunk(ContextID, IntentPlugin);
+    cmsIntentsList* pt;
+
+    for (pt = ctx -> Intents; pt != NULL; pt = pt -> Next)
+        if (pt ->Intent == Intent) return pt;
+
+    for (pt = DefaultIntents; pt != NULL; pt = pt -> Next)
+        if (pt ->Intent == Intent) return pt;
+
+    return NULL;
+}
+
+// Black point compensation. Implemented as a linear scaling in XYZ. Black points
+// should come relative to the white point. Fills an matrix/offset element m
+// which is organized as a 4x4 matrix.
+static
+void ComputeBlackPointCompensation(const cmsCIEXYZ* BlackPointIn,
+                                   const cmsCIEXYZ* BlackPointOut,
+                                   cmsMAT3* m, cmsVEC3* off)
+{
+  cmsFloat64Number ax, ay, az, bx, by, bz, tx, ty, tz;
+
+   // Now we need to compute a matrix plus an offset m and of such of
+   // [m]*bpin + off = bpout
+   // [m]*D50  + off = D50
+   //
+   // This is a linear scaling in the form ax+b, where
+   // a = (bpout - D50) / (bpin - D50)
+   // b = - D50* (bpout - bpin) / (bpin - D50)
+
+   tx = BlackPointIn->X - cmsD50_XYZ()->X;
+   ty = BlackPointIn->Y - cmsD50_XYZ()->Y;
+   tz = BlackPointIn->Z - cmsD50_XYZ()->Z;
+
+   ax = (BlackPointOut->X - cmsD50_XYZ()->X) / tx;
+   ay = (BlackPointOut->Y - cmsD50_XYZ()->Y) / ty;
+   az = (BlackPointOut->Z - cmsD50_XYZ()->Z) / tz;
+
+   bx = - cmsD50_XYZ()-> X * (BlackPointOut->X - BlackPointIn->X) / tx;
+   by = - cmsD50_XYZ()-> Y * (BlackPointOut->Y - BlackPointIn->Y) / ty;
+   bz = - cmsD50_XYZ()-> Z * (BlackPointOut->Z - BlackPointIn->Z) / tz;
+
+   _cmsVEC3init(&m ->v[0], ax, 0,  0);
+   _cmsVEC3init(&m ->v[1], 0, ay,  0);
+   _cmsVEC3init(&m ->v[2], 0,  0,  az);
+   _cmsVEC3init(off, bx, by, bz);
+
+}
+
+
+// Approximate a blackbody illuminant based on CHAD information
+static
+cmsFloat64Number CHAD2Temp(const cmsMAT3* Chad)
+{
+    // Convert D50 across inverse CHAD to get the absolute white point
+    cmsVEC3 d, s;
+    cmsCIEXYZ Dest;
+    cmsCIExyY DestChromaticity;
+    cmsFloat64Number TempK;
+    cmsMAT3 m1, m2;
+
+    m1 = *Chad;
+    if (!_cmsMAT3inverse(&m1, &m2)) return FALSE;
+
+    s.n[VX] = cmsD50_XYZ() -> X;
+    s.n[VY] = cmsD50_XYZ() -> Y;
+    s.n[VZ] = cmsD50_XYZ() -> Z;
+
+    _cmsMAT3eval(&d, &m2, &s);
+
+    Dest.X = d.n[VX];
+    Dest.Y = d.n[VY];
+    Dest.Z = d.n[VZ];
+
+    cmsXYZ2xyY(&DestChromaticity, &Dest);
+
+    if (!cmsTempFromWhitePoint(&TempK, &DestChromaticity))
+        return -1.0;
+
+    return TempK;
+}
+
+// Compute a CHAD based on a given temperature
+static
+    void Temp2CHAD(cmsMAT3* Chad, cmsFloat64Number Temp)
+{
+    cmsCIEXYZ White;
+    cmsCIExyY ChromaticityOfWhite;
+
+    cmsWhitePointFromTemp(&ChromaticityOfWhite, Temp);
+    cmsxyY2XYZ(&White, &ChromaticityOfWhite);
+    _cmsAdaptationMatrix(Chad, NULL, &White, cmsD50_XYZ());
+}
+
+// Join scalings to obtain relative input to absolute and then to relative output.
+// Result is stored in a 3x3 matrix
+static
+cmsBool  ComputeAbsoluteIntent(cmsFloat64Number AdaptationState,
+                               const cmsCIEXYZ* WhitePointIn,
+                               const cmsMAT3* ChromaticAdaptationMatrixIn,
+                               const cmsCIEXYZ* WhitePointOut,
+                               const cmsMAT3* ChromaticAdaptationMatrixOut,
+                               cmsMAT3* m)
+{
+    cmsMAT3 Scale, m1, m2, m3, m4;
+
+    // TODO: Follow Marc Mahy's recommendation to check if CHAD is same by using M1*M2 == M2*M1. If so, do nothing.
+    // TODO: Add support for ArgyllArts tag
+
+    // Adaptation state
+    if (AdaptationState == 1.0) {
+
+        // Observer is fully adapted. Keep chromatic adaptation.
+        // That is the standard V4 behaviour
+        _cmsVEC3init(&m->v[0], WhitePointIn->X / WhitePointOut->X, 0, 0);
+        _cmsVEC3init(&m->v[1], 0, WhitePointIn->Y / WhitePointOut->Y, 0);
+        _cmsVEC3init(&m->v[2], 0, 0, WhitePointIn->Z / WhitePointOut->Z);
+
+    }
+    else  {
+
+        // Incomplete adaptation. This is an advanced feature.
+        _cmsVEC3init(&Scale.v[0], WhitePointIn->X / WhitePointOut->X, 0, 0);
+        _cmsVEC3init(&Scale.v[1], 0,  WhitePointIn->Y / WhitePointOut->Y, 0);
+        _cmsVEC3init(&Scale.v[2], 0, 0,  WhitePointIn->Z / WhitePointOut->Z);
+
+
+        if (AdaptationState == 0.0) {
+        
+            m1 = *ChromaticAdaptationMatrixOut;
+            _cmsMAT3per(&m2, &m1, &Scale);
+            // m2 holds CHAD from output white to D50 times abs. col. scaling
+
+            // Observer is not adapted, undo the chromatic adaptation
+            _cmsMAT3per(m, &m2, ChromaticAdaptationMatrixOut);
+
+            m3 = *ChromaticAdaptationMatrixIn;
+            if (!_cmsMAT3inverse(&m3, &m4)) return FALSE;
+            _cmsMAT3per(m, &m2, &m4);
+
+        } else {
+
+            cmsMAT3 MixedCHAD;
+            cmsFloat64Number TempSrc, TempDest, Temp;
+
+            m1 = *ChromaticAdaptationMatrixIn;
+            if (!_cmsMAT3inverse(&m1, &m2)) return FALSE;
+            _cmsMAT3per(&m3, &m2, &Scale);
+            // m3 holds CHAD from input white to D50 times abs. col. scaling
+
+            TempSrc  = CHAD2Temp(ChromaticAdaptationMatrixIn);
+            TempDest = CHAD2Temp(ChromaticAdaptationMatrixOut);
+
+            if (TempSrc < 0.0 || TempDest < 0.0) return FALSE; // Something went wrong
+
+            if (_cmsMAT3isIdentity(&Scale) && fabs(TempSrc - TempDest) < 0.01) {
+
+                _cmsMAT3identity(m);
+                return TRUE;
+            }
+
+            Temp = (1.0 - AdaptationState) * TempDest + AdaptationState * TempSrc;
+
+            // Get a CHAD from whatever output temperature to D50. This replaces output CHAD
+            Temp2CHAD(&MixedCHAD, Temp);
+
+            _cmsMAT3per(m, &m3, &MixedCHAD);
+        }
+
+    }
+    return TRUE;
+
+}
+
+// Just to see if m matrix should be applied
+static
+cmsBool IsEmptyLayer(cmsMAT3* m, cmsVEC3* off)
+{
+    cmsFloat64Number diff = 0;
+    cmsMAT3 Ident;
+    int i;
+
+    if (m == NULL && off == NULL) return TRUE;  // NULL is allowed as an empty layer
+    if (m == NULL && off != NULL) return FALSE; // This is an internal error
+
+    _cmsMAT3identity(&Ident);
+
+    for (i=0; i < 3*3; i++)
+        diff += fabs(((cmsFloat64Number*)m)[i] - ((cmsFloat64Number*)&Ident)[i]);
+
+    for (i=0; i < 3; i++)
+        diff += fabs(((cmsFloat64Number*)off)[i]);
+
+
+    return (diff < 0.002);
+}
+
+
+// Compute the conversion layer
+static
+cmsBool ComputeConversion(cmsUInt32Number i, 
+                          cmsHPROFILE hProfiles[],
+                          cmsUInt32Number Intent,
+                          cmsBool BPC,
+                          cmsFloat64Number AdaptationState,
+                          cmsMAT3* m, cmsVEC3* off)
+{
+
+    int k;
+
+    // m  and off are set to identity and this is detected latter on
+    _cmsMAT3identity(m);
+    _cmsVEC3init(off, 0, 0, 0);
+
+    // If intent is abs. colorimetric,
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC) {
+
+        cmsCIEXYZ WhitePointIn, WhitePointOut;
+        cmsMAT3 ChromaticAdaptationMatrixIn, ChromaticAdaptationMatrixOut;
+
+        _cmsReadMediaWhitePoint(&WhitePointIn,  hProfiles[i-1]);
+        _cmsReadCHAD(&ChromaticAdaptationMatrixIn, hProfiles[i-1]);
+
+        _cmsReadMediaWhitePoint(&WhitePointOut,  hProfiles[i]);
+        _cmsReadCHAD(&ChromaticAdaptationMatrixOut, hProfiles[i]);
+
+        if (!ComputeAbsoluteIntent(AdaptationState,
+                                  &WhitePointIn,  &ChromaticAdaptationMatrixIn,
+                                  &WhitePointOut, &ChromaticAdaptationMatrixOut, m)) return FALSE;
+
+    }
+    else {
+        // Rest of intents may apply BPC.
+
+        if (BPC) {
+
+            cmsCIEXYZ BlackPointIn, BlackPointOut;
+
+            cmsDetectBlackPoint(&BlackPointIn,  hProfiles[i-1], Intent, 0);
+            cmsDetectDestinationBlackPoint(&BlackPointOut, hProfiles[i], Intent, 0);
+
+            // If black points are equal, then do nothing
+            if (BlackPointIn.X != BlackPointOut.X ||
+                BlackPointIn.Y != BlackPointOut.Y ||
+                BlackPointIn.Z != BlackPointOut.Z)
+                    ComputeBlackPointCompensation(&BlackPointIn, &BlackPointOut, m, off);
+        }
+    }
+
+    // Offset should be adjusted because the encoding. We encode XYZ normalized to 0..1.0,
+    // to do that, we divide by MAX_ENCODEABLE_XZY. The conversion stage goes XYZ -> XYZ so
+    // we have first to convert from encoded to XYZ and then convert back to encoded.
+    // y = Mx + Off
+    // x = x'c
+    // y = M x'c + Off
+    // y = y'c; y' = y / c
+    // y' = (Mx'c + Off) /c = Mx' + (Off / c)
+
+    for (k=0; k < 3; k++) {
+        off ->n[k] /= MAX_ENCODEABLE_XYZ;
+    }
+
+    return TRUE;
+}
+
+
+// Add a conversion stage if needed. If a matrix/offset m is given, it applies to XYZ space
+static
+cmsBool AddConversion(cmsPipeline* Result, cmsColorSpaceSignature InPCS, cmsColorSpaceSignature OutPCS, cmsMAT3* m, cmsVEC3* off)
+{
+    cmsFloat64Number* m_as_dbl = (cmsFloat64Number*) m;
+    cmsFloat64Number* off_as_dbl = (cmsFloat64Number*) off;
+
+    // Handle PCS mismatches. A specialized stage is added to the LUT in such case
+    switch (InPCS) {
+
+    case cmsSigXYZData: // Input profile operates in XYZ
+
+        switch (OutPCS) {
+
+        case cmsSigXYZData:  // XYZ -> XYZ
+            if (!IsEmptyLayer(m, off) &&
+                !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)))
+                return FALSE;
+            break;
+
+        case cmsSigLabData:  // XYZ -> Lab
+            if (!IsEmptyLayer(m, off) &&
+                !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)))
+                return FALSE;
+            if (!cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocXYZ2Lab(Result ->ContextID)))
+                return FALSE;
+            break;
+
+        default:
+            return FALSE;   // Colorspace mismatch
+        }
+        break;
+
+    case cmsSigLabData: // Input profile operates in Lab
+
+        switch (OutPCS) {
+
+        case cmsSigXYZData:  // Lab -> XYZ
+
+            if (!cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocLab2XYZ(Result ->ContextID)))
+                return FALSE;
+            if (!IsEmptyLayer(m, off) &&
+                !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)))
+                return FALSE;
+            break;
+
+        case cmsSigLabData:  // Lab -> Lab
+
+            if (!IsEmptyLayer(m, off)) {
+                if (!cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocLab2XYZ(Result ->ContextID)) ||
+                    !cmsPipelineInsertStage(Result, cmsAT_END, cmsStageAllocMatrix(Result ->ContextID, 3, 3, m_as_dbl, off_as_dbl)) ||
+                    !cmsPipelineInsertStage(Result, cmsAT_END, _cmsStageAllocXYZ2Lab(Result ->ContextID)))
+                    return FALSE;
+            }
+            break;
+
+        default:
+            return FALSE;  // Mismatch
+        }
+        break;
+
+        // On colorspaces other than PCS, check for same space
+    default:
+        if (InPCS != OutPCS) return FALSE;
+        break;
+    }
+
+    return TRUE;
+}
+
+
+// Is a given space compatible with another?
+static
+cmsBool ColorSpaceIsCompatible(cmsColorSpaceSignature a, cmsColorSpaceSignature b)
+{
+    // If they are same, they are compatible.
+    if (a == b) return TRUE;
+
+    // Check for MCH4 substitution of CMYK
+    if ((a == cmsSig4colorData) && (b == cmsSigCmykData)) return TRUE;
+    if ((a == cmsSigCmykData) && (b == cmsSig4colorData)) return TRUE;
+
+    // Check for XYZ/Lab. Those spaces are interchangeable as they can be computed one from other.
+    if ((a == cmsSigXYZData) && (b == cmsSigLabData)) return TRUE;
+    if ((a == cmsSigLabData) && (b == cmsSigXYZData)) return TRUE;
+
+    return FALSE;
+}
+
+
+// Default handler for ICC-style intents
+static
+cmsPipeline* DefaultICCintents(cmsContext       ContextID,
+                               cmsUInt32Number  nProfiles,
+                               cmsUInt32Number  TheIntents[],
+                               cmsHPROFILE      hProfiles[],
+                               cmsBool          BPC[],
+                               cmsFloat64Number AdaptationStates[],
+                               cmsUInt32Number  dwFlags)
+{
+    cmsPipeline* Lut = NULL;
+    cmsPipeline* Result;
+    cmsHPROFILE hProfile;
+    cmsMAT3 m;
+    cmsVEC3 off;
+    cmsColorSpaceSignature ColorSpaceIn, ColorSpaceOut = cmsSigLabData, CurrentColorSpace;
+    cmsProfileClassSignature ClassSig;
+    cmsUInt32Number  i, Intent;
+
+    // For safety
+    if (nProfiles == 0) return NULL;
+
+    // Allocate an empty LUT for holding the result. 0 as channel count means 'undefined'
+    Result = cmsPipelineAlloc(ContextID, 0, 0);
+    if (Result == NULL) return NULL;
+
+    CurrentColorSpace = cmsGetColorSpace(hProfiles[0]);
+
+    for (i=0; i < nProfiles; i++) {
+
+        cmsBool  lIsDeviceLink, lIsInput;
+
+        hProfile      = hProfiles[i];
+        ClassSig      = cmsGetDeviceClass(hProfile);
+        lIsDeviceLink = (ClassSig == cmsSigLinkClass || ClassSig == cmsSigAbstractClass );
+
+        // First profile is used as input unless devicelink or abstract
+        if ((i == 0) && !lIsDeviceLink) {
+            lIsInput = TRUE;
+        }
+        else {
+          // Else use profile in the input direction if current space is not PCS
+        lIsInput      = (CurrentColorSpace != cmsSigXYZData) &&
+                        (CurrentColorSpace != cmsSigLabData);
+        }
+
+        Intent        = TheIntents[i];
+
+        if (lIsInput || lIsDeviceLink) {
+
+            ColorSpaceIn    = cmsGetColorSpace(hProfile);
+            ColorSpaceOut   = cmsGetPCS(hProfile);
+        }
+        else {
+
+            ColorSpaceIn    = cmsGetPCS(hProfile);
+            ColorSpaceOut   = cmsGetColorSpace(hProfile);
+        }
+
+        if (!ColorSpaceIsCompatible(ColorSpaceIn, CurrentColorSpace)) {
+
+            cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "ColorSpace mismatch");
+            goto Error;
+        }
+
+        // If devicelink is found, then no custom intent is allowed and we can
+        // read the LUT to be applied. Settings don't apply here.
+        if (lIsDeviceLink || ((ClassSig == cmsSigNamedColorClass) && (nProfiles == 1))) {
+
+            // Get the involved LUT from the profile
+            Lut = _cmsReadDevicelinkLUT(hProfile, Intent);
+            if (Lut == NULL) goto Error;
+
+            // What about abstract profiles?
+             if (ClassSig == cmsSigAbstractClass && i > 0) {
+                if (!ComputeConversion(i, hProfiles, Intent, BPC[i], AdaptationStates[i], &m, &off)) goto Error;
+             }
+             else {
+                _cmsMAT3identity(&m);
+                _cmsVEC3init(&off, 0, 0, 0);
+             }
+
+
+            if (!AddConversion(Result, CurrentColorSpace, ColorSpaceIn, &m, &off)) goto Error;
+
+        }
+        else {
+
+            if (lIsInput) {
+                // Input direction means non-pcs connection, so proceed like devicelinks
+                Lut = _cmsReadInputLUT(hProfile, Intent);
+                if (Lut == NULL) goto Error;
+            }
+            else {
+
+                // Output direction means PCS connection. Intent may apply here
+                Lut = _cmsReadOutputLUT(hProfile, Intent);
+                if (Lut == NULL) goto Error;
+
+
+                if (!ComputeConversion(i, hProfiles, Intent, BPC[i], AdaptationStates[i], &m, &off)) goto Error;
+                if (!AddConversion(Result, CurrentColorSpace, ColorSpaceIn, &m, &off)) goto Error;
+
+            }
+        }
+
+        // Concatenate to the output LUT
+        if (!cmsPipelineCat(Result, Lut))
+            goto Error;
+
+        cmsPipelineFree(Lut);
+        Lut = NULL;
+
+        // Update current space
+        CurrentColorSpace = ColorSpaceOut;
+    }
+
+    // Check for non-negatives clip
+    if (dwFlags & cmsFLAGS_NONEGATIVES) {
+
+           if (ColorSpaceOut == cmsSigGrayData ||
+                  ColorSpaceOut == cmsSigRgbData ||
+                  ColorSpaceOut == cmsSigCmykData) {
+
+                  cmsStage* clip = _cmsStageClipNegatives(Result->ContextID, cmsChannelsOf(ColorSpaceOut));
+                  if (clip == NULL) goto Error;
+
+                  if (!cmsPipelineInsertStage(Result, cmsAT_END, clip))
+                         goto Error;
+           }
+
+    }
+
+    return Result;
+
+Error:
+
+    if (Lut != NULL) cmsPipelineFree(Lut);
+    if (Result != NULL) cmsPipelineFree(Result);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+
+// Wrapper for DLL calling convention
+cmsPipeline*  CMSEXPORT _cmsDefaultICCintents(cmsContext     ContextID,
+                                              cmsUInt32Number nProfiles,
+                                              cmsUInt32Number TheIntents[],
+                                              cmsHPROFILE     hProfiles[],
+                                              cmsBool         BPC[],
+                                              cmsFloat64Number AdaptationStates[],
+                                              cmsUInt32Number dwFlags)
+{
+    return DefaultICCintents(ContextID, nProfiles, TheIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+}
+
+// Black preserving intents ---------------------------------------------------------------------------------------------
+
+// Translate black-preserving intents to ICC ones
+static
+cmsUInt32Number TranslateNonICCIntents(cmsUInt32Number Intent)
+{
+    switch (Intent) {
+        case INTENT_PRESERVE_K_ONLY_PERCEPTUAL:
+        case INTENT_PRESERVE_K_PLANE_PERCEPTUAL:
+            return INTENT_PERCEPTUAL;
+
+        case INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC:
+        case INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC:
+            return INTENT_RELATIVE_COLORIMETRIC;
+
+        case INTENT_PRESERVE_K_ONLY_SATURATION:
+        case INTENT_PRESERVE_K_PLANE_SATURATION:
+            return INTENT_SATURATION;
+
+        default: return Intent;
+    }
+}
+
+// Sampler for Black-only preserving CMYK->CMYK transforms
+
+typedef struct {
+    cmsPipeline*    cmyk2cmyk;      // The original transform
+    cmsToneCurve*   KTone;          // Black-to-black tone curve
+
+} GrayOnlyParams;
+
+
+// Preserve black only if that is the only ink used
+static
+int BlackPreservingGrayOnlySampler(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
+{
+    GrayOnlyParams* bp = (GrayOnlyParams*) Cargo;
+
+    // If going across black only, keep black only
+    if (In[0] == 0 && In[1] == 0 && In[2] == 0) {
+
+        // TAC does not apply because it is black ink!
+        Out[0] = Out[1] = Out[2] = 0;
+        Out[3] = cmsEvalToneCurve16(bp->KTone, In[3]);
+        return TRUE;
+    }
+
+    // Keep normal transform for other colors
+    bp ->cmyk2cmyk ->Eval16Fn(In, Out, bp ->cmyk2cmyk->Data);
+    return TRUE;
+}
+
+// This is the entry for black-preserving K-only intents, which are non-ICC
+static
+cmsPipeline*  BlackPreservingKOnlyIntents(cmsContext     ContextID,
+                                          cmsUInt32Number nProfiles,
+                                          cmsUInt32Number TheIntents[],
+                                          cmsHPROFILE     hProfiles[],
+                                          cmsBool         BPC[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number dwFlags)
+{
+    GrayOnlyParams  bp;
+    cmsPipeline*    Result;
+    cmsUInt32Number ICCIntents[256];
+    cmsStage*         CLUT;
+    cmsUInt32Number i, nGridPoints;
+
+
+    // Sanity check
+    if (nProfiles < 1 || nProfiles > 255) return NULL;
+
+    // Translate black-preserving intents to ICC ones
+    for (i=0; i < nProfiles; i++)
+        ICCIntents[i] = TranslateNonICCIntents(TheIntents[i]);
+
+    // Check for non-cmyk profiles
+    if (cmsGetColorSpace(hProfiles[0]) != cmsSigCmykData ||
+        cmsGetColorSpace(hProfiles[nProfiles-1]) != cmsSigCmykData)
+           return DefaultICCintents(ContextID, nProfiles, ICCIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+
+    memset(&bp, 0, sizeof(bp));
+
+    // Allocate an empty LUT for holding the result
+    Result = cmsPipelineAlloc(ContextID, 4, 4);
+    if (Result == NULL) return NULL;
+
+    // Create a LUT holding normal ICC transform
+    bp.cmyk2cmyk = DefaultICCintents(ContextID,
+        nProfiles,
+        ICCIntents,
+        hProfiles,
+        BPC,
+        AdaptationStates,
+        dwFlags);
+
+    if (bp.cmyk2cmyk == NULL) goto Error;
+
+    // Now, compute the tone curve
+    bp.KTone = _cmsBuildKToneCurve(ContextID,
+        4096,
+        nProfiles,
+        ICCIntents,
+        hProfiles,
+        BPC,
+        AdaptationStates,
+        dwFlags);
+
+    if (bp.KTone == NULL) goto Error;
+
+
+    // How many gridpoints are we going to use?
+    nGridPoints = _cmsReasonableGridpointsByColorspace(cmsSigCmykData, dwFlags);
+
+    // Create the CLUT. 16 bits
+    CLUT = cmsStageAllocCLut16bit(ContextID, nGridPoints, 4, 4, NULL);
+    if (CLUT == NULL) goto Error;
+
+    // This is the one and only MPE in this LUT
+    if (!cmsPipelineInsertStage(Result, cmsAT_BEGIN, CLUT))
+        goto Error;
+
+    // Sample it. We cannot afford pre/post linearization this time.
+    if (!cmsStageSampleCLut16bit(CLUT, BlackPreservingGrayOnlySampler, (void*) &bp, 0))
+        goto Error;
+
+    // Get rid of xform and tone curve
+    cmsPipelineFree(bp.cmyk2cmyk);
+    cmsFreeToneCurve(bp.KTone);
+
+    return Result;
+
+Error:
+
+    if (bp.cmyk2cmyk != NULL) cmsPipelineFree(bp.cmyk2cmyk);
+    if (bp.KTone != NULL)  cmsFreeToneCurve(bp.KTone);
+    if (Result != NULL) cmsPipelineFree(Result);
+    return NULL;
+
+}
+
+// K Plane-preserving CMYK to CMYK ------------------------------------------------------------------------------------
+
+typedef struct {
+
+    cmsPipeline*     cmyk2cmyk;     // The original transform
+    cmsHTRANSFORM    hProofOutput;  // Output CMYK to Lab (last profile)
+    cmsHTRANSFORM    cmyk2Lab;      // The input chain
+    cmsToneCurve*    KTone;         // Black-to-black tone curve
+    cmsPipeline*     LabK2cmyk;     // The output profile
+    cmsFloat64Number MaxError;
+
+    cmsHTRANSFORM    hRoundTrip;
+    cmsFloat64Number MaxTAC;
+
+
+} PreserveKPlaneParams;
+
+
+// The CLUT will be stored at 16 bits, but calculations are performed at cmsFloat32Number precision
+static
+int BlackPreservingSampler(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
+{
+    int i;
+    cmsFloat32Number Inf[4], Outf[4];
+    cmsFloat32Number LabK[4];
+    cmsFloat64Number SumCMY, SumCMYK, Error, Ratio;
+    cmsCIELab ColorimetricLab, BlackPreservingLab;
+    PreserveKPlaneParams* bp = (PreserveKPlaneParams*) Cargo;
+
+    // Convert from 16 bits to floating point
+    for (i=0; i < 4; i++)
+        Inf[i] = (cmsFloat32Number) (In[i] / 65535.0);
+
+    // Get the K across Tone curve
+    LabK[3] = cmsEvalToneCurveFloat(bp ->KTone, Inf[3]);
+
+    // If going across black only, keep black only
+    if (In[0] == 0 && In[1] == 0 && In[2] == 0) {
+
+        Out[0] = Out[1] = Out[2] = 0;
+        Out[3] = _cmsQuickSaturateWord(LabK[3] * 65535.0);
+        return TRUE;
+    }
+
+    // Try the original transform,
+    cmsPipelineEvalFloat( Inf, Outf, bp ->cmyk2cmyk);
+
+    // Store a copy of the floating point result into 16-bit
+    for (i=0; i < 4; i++)
+            Out[i] = _cmsQuickSaturateWord(Outf[i] * 65535.0);
+
+    // Maybe K is already ok (mostly on K=0)
+    if ( fabs(Outf[3] - LabK[3]) < (3.0 / 65535.0) ) {
+        return TRUE;
+    }
+
+    // K differ, mesure and keep Lab measurement for further usage
+    // this is done in relative colorimetric intent
+    cmsDoTransform(bp->hProofOutput, Out, &ColorimetricLab, 1);
+
+    // Is not black only and the transform doesn't keep black.
+    // Obtain the Lab of output CMYK. After that we have Lab + K
+    cmsDoTransform(bp ->cmyk2Lab, Outf, LabK, 1);
+
+    // Obtain the corresponding CMY using reverse interpolation
+    // (K is fixed in LabK[3])
+    if (!cmsPipelineEvalReverseFloat(LabK, Outf, Outf, bp ->LabK2cmyk)) {
+
+        // Cannot find a suitable value, so use colorimetric xform
+        // which is already stored in Out[]
+        return TRUE;
+    }
+
+    // Make sure to pass through K (which now is fixed)
+    Outf[3] = LabK[3];
+
+    // Apply TAC if needed
+    SumCMY   = Outf[0]  + Outf[1] + Outf[2];
+    SumCMYK  = SumCMY + Outf[3];
+
+    if (SumCMYK > bp ->MaxTAC) {
+
+        Ratio = 1 - ((SumCMYK - bp->MaxTAC) / SumCMY);
+        if (Ratio < 0)
+            Ratio = 0;
+    }
+    else
+       Ratio = 1.0;
+
+    Out[0] = _cmsQuickSaturateWord(Outf[0] * Ratio * 65535.0);     // C
+    Out[1] = _cmsQuickSaturateWord(Outf[1] * Ratio * 65535.0);     // M
+    Out[2] = _cmsQuickSaturateWord(Outf[2] * Ratio * 65535.0);     // Y
+    Out[3] = _cmsQuickSaturateWord(Outf[3] * 65535.0);
+
+    // Estimate the error (this goes 16 bits to Lab DBL)
+    cmsDoTransform(bp->hProofOutput, Out, &BlackPreservingLab, 1);
+    Error = cmsDeltaE(&ColorimetricLab, &BlackPreservingLab);
+    if (Error > bp -> MaxError)
+        bp->MaxError = Error;
+
+    return TRUE;
+}
+
+// This is the entry for black-plane preserving, which are non-ICC
+static
+cmsPipeline* BlackPreservingKPlaneIntents(cmsContext     ContextID,
+                                          cmsUInt32Number nProfiles,
+                                          cmsUInt32Number TheIntents[],
+                                          cmsHPROFILE     hProfiles[],
+                                          cmsBool         BPC[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number dwFlags)
+{
+    PreserveKPlaneParams bp;
+    cmsPipeline*    Result = NULL;
+    cmsUInt32Number ICCIntents[256];
+    cmsStage*         CLUT;
+    cmsUInt32Number i, nGridPoints;
+    cmsHPROFILE hLab;
+
+    // Sanity check
+    if (nProfiles < 1 || nProfiles > 255) return NULL;
+
+    // Translate black-preserving intents to ICC ones
+    for (i=0; i < nProfiles; i++)
+        ICCIntents[i] = TranslateNonICCIntents(TheIntents[i]);
+
+    // Check for non-cmyk profiles
+    if (cmsGetColorSpace(hProfiles[0]) != cmsSigCmykData ||
+        !(cmsGetColorSpace(hProfiles[nProfiles-1]) == cmsSigCmykData ||
+        cmsGetDeviceClass(hProfiles[nProfiles-1]) == cmsSigOutputClass))
+           return  DefaultICCintents(ContextID, nProfiles, ICCIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+
+    // Allocate an empty LUT for holding the result
+    Result = cmsPipelineAlloc(ContextID, 4, 4);
+    if (Result == NULL) return NULL;
+
+
+    memset(&bp, 0, sizeof(bp));
+
+    // We need the input LUT of the last profile, assuming this one is responsible of
+    // black generation. This LUT will be searched in inverse order.
+    bp.LabK2cmyk = _cmsReadInputLUT(hProfiles[nProfiles-1], INTENT_RELATIVE_COLORIMETRIC);
+    if (bp.LabK2cmyk == NULL) goto Cleanup;
+
+    // Get total area coverage (in 0..1 domain)
+    bp.MaxTAC = cmsDetectTAC(hProfiles[nProfiles-1]) / 100.0;
+    if (bp.MaxTAC <= 0) goto Cleanup;
+
+
+    // Create a LUT holding normal ICC transform
+    bp.cmyk2cmyk = DefaultICCintents(ContextID,
+                                         nProfiles,
+                                         ICCIntents,
+                                         hProfiles,
+                                         BPC,
+                                         AdaptationStates,
+                                         dwFlags);
+    if (bp.cmyk2cmyk == NULL) goto Cleanup;
+
+    // Now the tone curve
+    bp.KTone = _cmsBuildKToneCurve(ContextID, 4096, nProfiles,
+                                   ICCIntents,
+                                   hProfiles,
+                                   BPC,
+                                   AdaptationStates,
+                                   dwFlags);
+    if (bp.KTone == NULL) goto Cleanup;
+
+    // To measure the output, Last profile to Lab
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    bp.hProofOutput = cmsCreateTransformTHR(ContextID, hProfiles[nProfiles-1],
+                                         CHANNELS_SH(4)|BYTES_SH(2), hLab, TYPE_Lab_DBL,
+                                         INTENT_RELATIVE_COLORIMETRIC,
+                                         cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+    if ( bp.hProofOutput == NULL) goto Cleanup;
+
+    // Same as anterior, but lab in the 0..1 range
+    bp.cmyk2Lab = cmsCreateTransformTHR(ContextID, hProfiles[nProfiles-1],
+                                         FLOAT_SH(1)|CHANNELS_SH(4)|BYTES_SH(4), hLab,
+                                         FLOAT_SH(1)|CHANNELS_SH(3)|BYTES_SH(4),
+                                         INTENT_RELATIVE_COLORIMETRIC,
+                                         cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+    if (bp.cmyk2Lab == NULL) goto Cleanup;
+    cmsCloseProfile(hLab);
+
+    // Error estimation (for debug only)
+    bp.MaxError = 0;
+
+    // How many gridpoints are we going to use?
+    nGridPoints = _cmsReasonableGridpointsByColorspace(cmsSigCmykData, dwFlags);
+
+
+    CLUT = cmsStageAllocCLut16bit(ContextID, nGridPoints, 4, 4, NULL);
+    if (CLUT == NULL) goto Cleanup;
+
+    if (!cmsPipelineInsertStage(Result, cmsAT_BEGIN, CLUT))
+        goto Cleanup;
+
+    cmsStageSampleCLut16bit(CLUT, BlackPreservingSampler, (void*) &bp, 0);
+
+Cleanup:
+
+    if (bp.cmyk2cmyk) cmsPipelineFree(bp.cmyk2cmyk);
+    if (bp.cmyk2Lab) cmsDeleteTransform(bp.cmyk2Lab);
+    if (bp.hProofOutput) cmsDeleteTransform(bp.hProofOutput);
+
+    if (bp.KTone) cmsFreeToneCurve(bp.KTone);
+    if (bp.LabK2cmyk) cmsPipelineFree(bp.LabK2cmyk);
+
+    return Result;
+}
+
+// Link routines ------------------------------------------------------------------------------------------------------
+
+// Chain several profiles into a single LUT. It just checks the parameters and then calls the handler
+// for the first intent in chain. The handler may be user-defined. Is up to the handler to deal with the
+// rest of intents in chain. A maximum of 255 profiles at time are supported, which is pretty reasonable.
+cmsPipeline* _cmsLinkProfiles(cmsContext     ContextID,
+                              cmsUInt32Number nProfiles,
+                              cmsUInt32Number TheIntents[],
+                              cmsHPROFILE     hProfiles[],
+                              cmsBool         BPC[],
+                              cmsFloat64Number AdaptationStates[],
+                              cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsIntentsList* Intent;
+
+    // Make sure a reasonable number of profiles is provided
+    if (nProfiles <= 0 || nProfiles > 255) {
+         cmsSignalError(ContextID, cmsERROR_RANGE, "Couldn't link '%d' profiles", nProfiles);
+        return NULL;
+    }
+
+    for (i=0; i < nProfiles; i++) {
+
+        // Check if black point is really needed or allowed. Note that
+        // following Adobe's document:
+        // BPC does not apply to devicelink profiles, nor to abs colorimetric,
+        // and applies always on V4 perceptual and saturation.
+
+        if (TheIntents[i] == INTENT_ABSOLUTE_COLORIMETRIC)
+            BPC[i] = FALSE;
+
+        if (TheIntents[i] == INTENT_PERCEPTUAL || TheIntents[i] == INTENT_SATURATION) {
+
+            // Force BPC for V4 profiles in perceptual and saturation
+            if (cmsGetEncodedICCversion(hProfiles[i]) >= 0x4000000)
+                BPC[i] = TRUE;
+        }
+    }
+
+    // Search for a handler. The first intent in the chain defines the handler. That would
+    // prevent using multiple custom intents in a multiintent chain, but the behaviour of
+    // this case would present some issues if the custom intent tries to do things like
+    // preserve primaries. This solution is not perfect, but works well on most cases.
+
+    Intent = SearchIntent(ContextID, TheIntents[0]);
+    if (Intent == NULL) {
+        cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported intent '%d'", TheIntents[0]);
+        return NULL;
+    }
+
+    // Call the handler
+    return Intent ->Link(ContextID, nProfiles, TheIntents, hProfiles, BPC, AdaptationStates, dwFlags);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+// Get information about available intents. nMax is the maximum space for the supplied "Codes"
+// and "Descriptions" the function returns the total number of intents, which may be greater
+// than nMax, although the matrices are not populated beyond this level.
+cmsUInt32Number CMSEXPORT cmsGetSupportedIntentsTHR(cmsContext ContextID, cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions)
+{
+    _cmsIntentsPluginChunkType* ctx = ( _cmsIntentsPluginChunkType*) _cmsContextGetClientChunk(ContextID, IntentPlugin);
+    cmsIntentsList* pt;
+    cmsUInt32Number nIntents;
+
+
+    for (nIntents=0, pt = ctx->Intents; pt != NULL; pt = pt -> Next)
+    {
+        if (nIntents < nMax) {
+            if (Codes != NULL)
+                Codes[nIntents] = pt ->Intent;
+
+            if (Descriptions != NULL)
+                Descriptions[nIntents] = pt ->Description;
+        }
+
+        nIntents++;
+    }
+
+    for (nIntents=0, pt = DefaultIntents; pt != NULL; pt = pt -> Next)
+    {
+        if (nIntents < nMax) {
+            if (Codes != NULL)
+                Codes[nIntents] = pt ->Intent;
+
+            if (Descriptions != NULL)
+                Descriptions[nIntents] = pt ->Description;
+        }
+
+        nIntents++;
+    }
+    return nIntents;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetSupportedIntents(cmsUInt32Number nMax, cmsUInt32Number* Codes, char** Descriptions)
+{
+    return cmsGetSupportedIntentsTHR(NULL, nMax, Codes, Descriptions);
+}
+
+// The plug-in registration. User can add new intents or override default routines
+cmsBool  _cmsRegisterRenderingIntentPlugin(cmsContext id, cmsPluginBase* Data)
+{
+    _cmsIntentsPluginChunkType* ctx = ( _cmsIntentsPluginChunkType*) _cmsContextGetClientChunk(id, IntentPlugin);
+    cmsPluginRenderingIntent* Plugin = (cmsPluginRenderingIntent*) Data;
+    cmsIntentsList* fl;
+
+    // Do we have to reset the custom intents?
+    if (Data == NULL) {
+
+        ctx->Intents = NULL;
+        return TRUE;
+    }
+
+    fl = (cmsIntentsList*) _cmsPluginMalloc(id, sizeof(cmsIntentsList));
+    if (fl == NULL) return FALSE;
+
+
+    fl ->Intent  = Plugin ->Intent;
+    strncpy(fl ->Description, Plugin ->Description, sizeof(fl ->Description)-1);
+    fl ->Description[sizeof(fl ->Description)-1] = 0;
+
+    fl ->Link    = Plugin ->Link;
+
+    fl ->Next = ctx ->Intents;
+    ctx ->Intents = fl;
+
+    return TRUE;
+}
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmserr.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmserr.cpp
new file mode 100755
index 0000000000..30408f842f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmserr.cpp
@@ -0,0 +1,663 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#include "lcms2_internal.h"
+
+
+// This function is here to help applications to prevent mixing lcms versions on header and shared objects.
+int CMSEXPORT cmsGetEncodedCMMversion(void)
+{
+       return LCMS_VERSION;
+}
+
+// I am so tired about incompatibilities on those functions that here are some replacements
+// that hopefully would be fully portable.
+
+// compare two strings ignoring case
+int CMSEXPORT cmsstrcasecmp(const char* s1, const char* s2)
+{
+    register const unsigned char *us1 = (const unsigned char *)s1,
+                                 *us2 = (const unsigned char *)s2;
+
+    while (toupper(*us1) == toupper(*us2++))
+        if (*us1++ == '\0')
+            return 0;
+
+    return (toupper(*us1) - toupper(*--us2));
+}
+
+// long int because C99 specifies ftell in such way (7.19.9.2)
+long int CMSEXPORT cmsfilelength(FILE* f)
+{
+    long int p , n;
+
+    p = ftell(f); // register current file position
+    if (p == -1L) 
+        return -1L;
+
+    if (fseek(f, 0, SEEK_END) != 0) {
+        return -1L;
+    }
+
+    n = ftell(f);
+    fseek(f, p, SEEK_SET); // file position restored
+
+    return n;
+}
+
+
+// Memory handling ------------------------------------------------------------------
+//
+// This is the interface to low-level memory management routines. By default a simple
+// wrapping to malloc/free/realloc is provided, although there is a limit on the max
+// amount of memoy that can be reclaimed. This is mostly as a safety feature to prevent 
+// bogus or evil code to allocate huge blocks that otherwise lcms would never need.
+
+#define MAX_MEMORY_FOR_ALLOC  ((cmsUInt32Number)(1024U*1024U*512U))
+
+// User may override this behaviour by using a memory plug-in, which basically replaces
+// the default memory management functions. In this case, no check is performed and it
+// is up to the plug-in writter to keep in the safe side. There are only three functions
+// required to be implemented: malloc, realloc and free, although the user may want to
+// replace the optional mallocZero, calloc and dup as well.
+
+cmsBool   _cmsRegisterMemHandlerPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// *********************************************************************************
+
+// This is the default memory allocation function. It does a very coarse
+// check of amount of memory, just to prevent exploits
+static
+void* _cmsMallocDefaultFn(cmsContext ContextID, cmsUInt32Number size)
+{
+    if (size > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never allow over maximum
+
+    return (void*) malloc(size);
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+// Generic allocate & zero
+static
+void* _cmsMallocZeroDefaultFn(cmsContext ContextID, cmsUInt32Number size)
+{
+    void *pt = _cmsMalloc(ContextID, size);
+    if (pt == NULL) return NULL;
+
+    memset(pt, 0, size);
+    return pt;
+}
+
+
+// The default free function. The only check proformed is against NULL pointers
+static
+void _cmsFreeDefaultFn(cmsContext ContextID, void *Ptr)
+{
+    // free(NULL) is defined a no-op by C99, therefore it is safe to
+    // avoid the check, but it is here just in case...
+
+    if (Ptr) free(Ptr);
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+// The default realloc function. Again it checks for exploits. If Ptr is NULL,
+// realloc behaves the same way as malloc and allocates a new block of size bytes.
+static
+void* _cmsReallocDefaultFn(cmsContext ContextID, void* Ptr, cmsUInt32Number size)
+{
+
+    if (size > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never realloc over 512Mb
+
+    return realloc(Ptr, size);
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+
+// The default calloc function. Allocates an array of num elements, each one of size bytes
+// all memory is initialized to zero.
+static
+void* _cmsCallocDefaultFn(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size)
+{
+    cmsUInt32Number Total = num * size;
+
+    // Preserve calloc behaviour
+    if (Total == 0) return NULL;
+
+    // Safe check for overflow.
+    if (num >= UINT_MAX / size) return NULL;
+
+    // Check for overflow
+    if (Total < num || Total < size) {
+        return NULL;
+    }
+
+    if (Total > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never alloc over 512Mb
+
+    return _cmsMallocZero(ContextID, Total);
+}
+
+// Generic block duplication
+static
+void* _cmsDupDefaultFn(cmsContext ContextID, const void* Org, cmsUInt32Number size)
+{
+    void* mem;
+
+    if (size > MAX_MEMORY_FOR_ALLOC) return NULL;  // Never dup over 512Mb
+
+    mem = _cmsMalloc(ContextID, size);
+
+    if (mem != NULL && Org != NULL)
+        memmove(mem, Org, size);
+
+    return mem;
+}
+
+
+// Pointers to memory manager functions in Context0
+_cmsMemPluginChunkType _cmsMemPluginChunk = { _cmsMallocDefaultFn, _cmsMallocZeroDefaultFn, _cmsFreeDefaultFn, 
+                                              _cmsReallocDefaultFn, _cmsCallocDefaultFn,    _cmsDupDefaultFn
+                                            };
+
+
+// Reset and duplicate memory manager
+void _cmsAllocMemPluginChunk(struct _cmsContext_struct* ctx, const struct _cmsContext_struct* src)
+{
+    _cmsAssert(ctx != NULL);
+
+    if (src != NULL) {    
+
+        // Duplicate
+        ctx ->chunks[MemPlugin] = _cmsSubAllocDup(ctx ->MemPool, src ->chunks[MemPlugin], sizeof(_cmsMemPluginChunkType));  
+    }
+    else {
+
+        // To reset it, we use the default allocators, which cannot be overridden
+        ctx ->chunks[MemPlugin] = &ctx ->DefaultMemoryManager;
+    } 
+}
+
+// Auxiliary to fill memory management functions from plugin (or context 0 defaults)
+void _cmsInstallAllocFunctions(cmsPluginMemHandler* Plugin, _cmsMemPluginChunkType* ptr)
+{
+    if (Plugin == NULL) {
+
+        memcpy(ptr, &_cmsMemPluginChunk, sizeof(_cmsMemPluginChunk));
+    }
+    else {
+
+        ptr ->MallocPtr  = Plugin -> MallocPtr;
+        ptr ->FreePtr    = Plugin -> FreePtr;
+        ptr ->ReallocPtr = Plugin -> ReallocPtr;
+
+        // Make sure we revert to defaults
+        ptr ->MallocZeroPtr= _cmsMallocZeroDefaultFn;
+        ptr ->CallocPtr    = _cmsCallocDefaultFn;
+        ptr ->DupPtr       = _cmsDupDefaultFn;
+      
+        if (Plugin ->MallocZeroPtr != NULL) ptr ->MallocZeroPtr = Plugin -> MallocZeroPtr;
+        if (Plugin ->CallocPtr != NULL)     ptr ->CallocPtr     = Plugin -> CallocPtr;
+        if (Plugin ->DupPtr != NULL)        ptr ->DupPtr        = Plugin -> DupPtr;
+        
+    }
+}
+
+
+// Plug-in replacement entry
+cmsBool  _cmsRegisterMemHandlerPlugin(cmsContext ContextID, cmsPluginBase *Data)
+{
+    cmsPluginMemHandler* Plugin = (cmsPluginMemHandler*) Data;     
+    _cmsMemPluginChunkType* ptr;
+
+    // NULL forces to reset to defaults. In this special case, the defaults are stored in the context structure. 
+    // Remaining plug-ins does NOT have any copy in the context structure, but this is somehow special as the
+    // context internal data should be malloce'd by using those functions. 
+    if (Data == NULL) {
+
+       struct _cmsContext_struct* ctx = ( struct _cmsContext_struct*) ContextID;
+
+       // Return to the default allocators
+        if (ContextID != NULL) {
+            ctx->chunks[MemPlugin] = (void*) &ctx->DefaultMemoryManager;
+        }
+        return TRUE;
+    }
+
+    // Check for required callbacks
+    if (Plugin -> MallocPtr == NULL ||
+        Plugin -> FreePtr == NULL ||
+        Plugin -> ReallocPtr == NULL) return FALSE;
+
+    // Set replacement functions
+    ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    if (ptr == NULL) 
+        return FALSE;
+
+    _cmsInstallAllocFunctions(Plugin, ptr);
+    return TRUE;
+}
+
+// Generic allocate
+void* CMSEXPORT _cmsMalloc(cmsContext ContextID, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr ->MallocPtr(ContextID, size);
+}
+
+// Generic allocate & zero
+void* CMSEXPORT _cmsMallocZero(cmsContext ContextID, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr->MallocZeroPtr(ContextID, size);
+}
+
+// Generic calloc
+void* CMSEXPORT _cmsCalloc(cmsContext ContextID, cmsUInt32Number num, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr->CallocPtr(ContextID, num, size);
+}
+
+// Generic reallocate
+void* CMSEXPORT _cmsRealloc(cmsContext ContextID, void* Ptr, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr->ReallocPtr(ContextID, Ptr, size);
+}
+
+// Generic free memory
+void CMSEXPORT _cmsFree(cmsContext ContextID, void* Ptr)
+{
+    if (Ptr != NULL) {
+        _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+        ptr ->FreePtr(ContextID, Ptr);
+    }
+}
+
+// Generic block duplication
+void* CMSEXPORT _cmsDupMem(cmsContext ContextID, const void* Org, cmsUInt32Number size)
+{
+    _cmsMemPluginChunkType* ptr = (_cmsMemPluginChunkType*) _cmsContextGetClientChunk(ContextID, MemPlugin);
+    return ptr ->DupPtr(ContextID, Org, size);
+}
+
+// ********************************************************************************************
+
+// Sub allocation takes care of many pointers of small size. The memory allocated in
+// this way have be freed at once. Next function allocates a single chunk for linked list
+// I prefer this method over realloc due to the big inpact on xput realloc may have if
+// memory is being swapped to disk. This approach is safer (although that may not be true on all platforms)
+static
+_cmsSubAllocator_chunk* _cmsCreateSubAllocChunk(cmsContext ContextID, cmsUInt32Number Initial)
+{
+    _cmsSubAllocator_chunk* chunk;
+
+    // 20K by default
+    if (Initial == 0)
+        Initial = 20*1024;
+
+    // Create the container
+    chunk = (_cmsSubAllocator_chunk*) _cmsMallocZero(ContextID, sizeof(_cmsSubAllocator_chunk));
+    if (chunk == NULL) return NULL;
+
+    // Initialize values
+    chunk ->Block     = (cmsUInt8Number*) _cmsMalloc(ContextID, Initial);
+    if (chunk ->Block == NULL) {
+
+        // Something went wrong
+        _cmsFree(ContextID, chunk);
+        return NULL;
+    }
+
+    chunk ->BlockSize = Initial;
+    chunk ->Used      = 0;
+    chunk ->next      = NULL;
+
+    return chunk;
+}
+
+// The suballocated is nothing but a pointer to the first element in the list. We also keep
+// the thread ID in this structure.
+_cmsSubAllocator* _cmsCreateSubAlloc(cmsContext ContextID, cmsUInt32Number Initial)
+{
+    _cmsSubAllocator* sub;
+
+    // Create the container
+    sub = (_cmsSubAllocator*) _cmsMallocZero(ContextID, sizeof(_cmsSubAllocator));
+    if (sub == NULL) return NULL;
+
+    sub ->ContextID = ContextID;
+
+    sub ->h = _cmsCreateSubAllocChunk(ContextID, Initial);
+    if (sub ->h == NULL) {
+        _cmsFree(ContextID, sub);
+        return NULL;
+    }
+
+    return sub;
+}
+
+
+// Get rid of whole linked list
+void _cmsSubAllocDestroy(_cmsSubAllocator* sub)
+{
+    _cmsSubAllocator_chunk *chunk, *n;
+
+    for (chunk = sub ->h; chunk != NULL; chunk = n) {
+
+        n = chunk->next;
+        if (chunk->Block != NULL) _cmsFree(sub ->ContextID, chunk->Block);
+        _cmsFree(sub ->ContextID, chunk);
+    }
+
+    // Free the header
+    _cmsFree(sub ->ContextID, sub);
+}
+
+
+// Get a pointer to small memory block.
+void*  _cmsSubAlloc(_cmsSubAllocator* sub, cmsUInt32Number size)
+{
+    cmsUInt32Number Free = sub -> h ->BlockSize - sub -> h -> Used;
+    cmsUInt8Number* ptr;
+
+    size = _cmsALIGNMEM(size);
+
+    // Check for memory. If there is no room, allocate a new chunk of double memory size.
+    if (size > Free) {
+
+        _cmsSubAllocator_chunk* chunk;
+        cmsUInt32Number newSize;
+
+        newSize = sub -> h ->BlockSize * 2;
+        if (newSize < size) newSize = size;
+
+        chunk = _cmsCreateSubAllocChunk(sub -> ContextID, newSize);
+        if (chunk == NULL) return NULL;
+
+        // Link list
+        chunk ->next = sub ->h;
+        sub ->h    = chunk;
+
+    }
+
+    ptr =  sub -> h ->Block + sub -> h ->Used;
+    sub -> h -> Used += size;
+
+    return (void*) ptr;
+}
+
+// Duplicate in pool
+void* _cmsSubAllocDup(_cmsSubAllocator* s, const void *ptr, cmsUInt32Number size)
+{
+    void *NewPtr;
+    
+    // Dup of null pointer is also NULL
+    if (ptr == NULL)
+        return NULL;
+
+    NewPtr = _cmsSubAlloc(s, size);
+
+    if (ptr != NULL && NewPtr != NULL) {
+        memcpy(NewPtr, ptr, size);
+    }
+
+    return NewPtr;
+}
+
+
+
+// Error logging ******************************************************************
+
+// There is no error handling at all. When a function fails, it returns proper value.
+// For example, all create functions does return NULL on failure. Other return FALSE
+// It may be interesting, for the developer, to know why the function is failing.
+// for that reason, lcms2 does offer a logging function. This function does recive
+// a ENGLISH string with some clues on what is going wrong. You can show this
+// info to the end user, or just create some sort of log.
+// The logging function should NOT terminate the program, as this obviously can leave
+// resources. It is the programmer's responsibility to check each function return code
+// to make sure it didn't fail.
+
+// Error messages are limited to MAX_ERROR_MESSAGE_LEN
+
+#define MAX_ERROR_MESSAGE_LEN   1024
+
+// ---------------------------------------------------------------------------------------------------------
+
+// This is our default log error
+static void DefaultLogErrorHandlerFunction(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text);
+
+// Context0 storage, which is global
+_cmsLogErrorChunkType _cmsLogErrorChunk = { DefaultLogErrorHandlerFunction };
+
+// Allocates and inits error logger container for a given context. If src is NULL, only initializes the value
+// to the default. Otherwise, it duplicates the value. The interface is standard across all context clients
+void _cmsAllocLogErrorChunk(struct _cmsContext_struct* ctx, 
+                            const struct _cmsContext_struct* src)
+{    
+    static _cmsLogErrorChunkType LogErrorChunk = { DefaultLogErrorHandlerFunction };
+    void* from;
+     
+     if (src != NULL) {
+        from = src ->chunks[Logger];       
+    }
+    else {
+       from = &LogErrorChunk;
+    }
+    
+    ctx ->chunks[Logger] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsLogErrorChunkType));   
+}
+
+// The default error logger does nothing.
+static
+void DefaultLogErrorHandlerFunction(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text)
+{
+    // fprintf(stderr, "[lcms]: %s\n", Text);
+    // fflush(stderr);
+
+     cmsUNUSED_PARAMETER(ContextID);
+     cmsUNUSED_PARAMETER(ErrorCode);
+     cmsUNUSED_PARAMETER(Text);
+}
+
+// Change log error, context based
+void CMSEXPORT cmsSetLogErrorHandlerTHR(cmsContext ContextID, cmsLogErrorHandlerFunction Fn)
+{
+    _cmsLogErrorChunkType* lhg = (_cmsLogErrorChunkType*) _cmsContextGetClientChunk(ContextID, Logger);
+
+    if (lhg != NULL) {
+
+        if (Fn == NULL)
+            lhg -> LogErrorHandler = DefaultLogErrorHandlerFunction;
+        else
+            lhg -> LogErrorHandler = Fn;
+    }
+}
+
+// Change log error, legacy
+void CMSEXPORT cmsSetLogErrorHandler(cmsLogErrorHandlerFunction Fn)
+{
+    cmsSetLogErrorHandlerTHR(NULL, Fn);    
+}
+
+// Log an error
+// ErrorText is a text holding an english description of error.
+void CMSEXPORT cmsSignalError(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *ErrorText, ...)
+{
+    va_list args;
+    char Buffer[MAX_ERROR_MESSAGE_LEN];
+    _cmsLogErrorChunkType* lhg;
+
+
+    va_start(args, ErrorText);
+    vsnprintf(Buffer, MAX_ERROR_MESSAGE_LEN-1, ErrorText, args);
+    va_end(args);
+
+    // Check for the context, if specified go there. If not, go for the global
+    lhg = (_cmsLogErrorChunkType*) _cmsContextGetClientChunk(ContextID, Logger);
+    if (lhg ->LogErrorHandler) {
+        lhg ->LogErrorHandler(ContextID, ErrorCode, Buffer);
+    }   
+}
+
+// Utility function to print signatures
+void _cmsTagSignature2String(char String[5], cmsTagSignature sig)
+{
+    cmsUInt32Number be;
+
+    // Convert to big endian
+    be = _cmsAdjustEndianess32((cmsUInt32Number) sig);
+
+    // Move chars
+    memmove(String, &be, 4);
+
+    // Make sure of terminator
+    String[4] = 0;
+}
+
+//--------------------------------------------------------------------------------------------------
+
+
+static
+void* defMtxCreate(cmsContext id)
+{
+    _cmsMutex* ptr_mutex = (_cmsMutex*) _cmsMalloc(id, sizeof(_cmsMutex));
+    _cmsInitMutexPrimitive(ptr_mutex);
+    return (void*) ptr_mutex;   
+}
+
+static
+void defMtxDestroy(cmsContext id, void* mtx)
+{
+    _cmsDestroyMutexPrimitive((_cmsMutex *) mtx); 
+    _cmsFree(id, mtx);
+}
+
+static
+cmsBool defMtxLock(cmsContext id, void* mtx)
+{
+    cmsUNUSED_PARAMETER(id);
+    return _cmsLockPrimitive((_cmsMutex *) mtx) == 0;     
+}
+
+static
+void defMtxUnlock(cmsContext id, void* mtx)
+{
+    cmsUNUSED_PARAMETER(id);
+    _cmsUnlockPrimitive((_cmsMutex *) mtx); 
+}
+
+
+
+// Pointers to memory manager functions in Context0
+_cmsMutexPluginChunkType _cmsMutexPluginChunk = { defMtxCreate, defMtxDestroy, defMtxLock, defMtxUnlock };
+
+// Allocate and init mutex container.
+void _cmsAllocMutexPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src)
+{
+    static _cmsMutexPluginChunkType MutexChunk = {defMtxCreate, defMtxDestroy, defMtxLock, defMtxUnlock };
+    void* from;
+     
+     if (src != NULL) {
+        from = src ->chunks[MutexPlugin];       
+    }
+    else {
+       from = &MutexChunk;
+    }
+    
+    ctx ->chunks[MutexPlugin] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsMutexPluginChunkType));   
+}
+
+// Register new ways to transform
+cmsBool  _cmsRegisterMutexPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginMutex* Plugin = (cmsPluginMutex*) Data;
+    _cmsMutexPluginChunkType* ctx = ( _cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (Data == NULL) {
+
+        // No lock routines
+        ctx->CreateMutexPtr = NULL; 
+        ctx->DestroyMutexPtr = NULL; 
+        ctx->LockMutexPtr = NULL;
+        ctx ->UnlockMutexPtr = NULL;
+        return TRUE;
+    }
+
+    // Factory callback is required
+    if (Plugin ->CreateMutexPtr == NULL || Plugin ->DestroyMutexPtr == NULL || 
+        Plugin ->LockMutexPtr == NULL || Plugin ->UnlockMutexPtr == NULL) return FALSE;
+
+
+    ctx->CreateMutexPtr  = Plugin->CreateMutexPtr;
+    ctx->DestroyMutexPtr = Plugin ->DestroyMutexPtr;
+    ctx ->LockMutexPtr   = Plugin ->LockMutexPtr;
+    ctx ->UnlockMutexPtr = Plugin ->UnlockMutexPtr;
+
+    // All is ok
+    return TRUE;
+}
+
+// Generic Mutex fns
+void* CMSEXPORT _cmsCreateMutex(cmsContext ContextID)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->CreateMutexPtr == NULL) return NULL;
+
+    return ptr ->CreateMutexPtr(ContextID);
+}
+
+void CMSEXPORT _cmsDestroyMutex(cmsContext ContextID, void* mtx)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->DestroyMutexPtr != NULL) {
+
+        ptr ->DestroyMutexPtr(ContextID, mtx);
+    }
+}
+
+cmsBool CMSEXPORT _cmsLockMutex(cmsContext ContextID, void* mtx)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->LockMutexPtr == NULL) return TRUE;
+
+    return ptr ->LockMutexPtr(ContextID, mtx);
+}
+
+void CMSEXPORT _cmsUnlockMutex(cmsContext ContextID, void* mtx)
+{
+    _cmsMutexPluginChunkType* ptr = (_cmsMutexPluginChunkType*) _cmsContextGetClientChunk(ContextID, MutexPlugin);
+
+    if (ptr ->UnlockMutexPtr != NULL) {
+
+        ptr ->UnlockMutexPtr(ContextID, mtx);
+    }
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsgamma.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsgamma.cpp
new file mode 100755
index 0000000000..dccade2fbe
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsgamma.cpp
@@ -0,0 +1,1433 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2013 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+#include "lcms2_internal.h"
+
+// Tone curves are powerful constructs that can contain curves specified in diverse ways.
+// The curve is stored in segments, where each segment can be sampled or specified by parameters.
+// a 16.bit simplification of the *whole* curve is kept for optimization purposes. For float operation,
+// each segment is evaluated separately. Plug-ins may be used to define new parametric schemes,
+// each plug-in may define up to MAX_TYPES_IN_LCMS_PLUGIN functions types. For defining a function,
+// the plug-in should provide the type id, how many parameters each type has, and a pointer to
+// a procedure that evaluates the function. In the case of reverse evaluation, the evaluator will
+// be called with the type id as a negative value, and a sampled version of the reversed curve
+// will be built.
+
+// ----------------------------------------------------------------- Implementation
+// Maxim number of nodes
+#define MAX_NODES_IN_CURVE   4097
+#define MINUS_INF            (-1E22F)
+#define PLUS_INF             (+1E22F)
+
+// The list of supported parametric curves
+typedef struct _cmsParametricCurvesCollection_st {
+
+    cmsUInt32Number nFunctions;                                     // Number of supported functions in this chunk
+    cmsInt32Number  FunctionTypes[MAX_TYPES_IN_LCMS_PLUGIN];        // The identification types
+    cmsUInt32Number ParameterCount[MAX_TYPES_IN_LCMS_PLUGIN];       // Number of parameters for each function
+
+    cmsParametricCurveEvaluator Evaluator;                          // The evaluator
+
+    struct _cmsParametricCurvesCollection_st* Next; // Next in list
+
+} _cmsParametricCurvesCollection;
+
+// This is the default (built-in) evaluator
+static cmsFloat64Number DefaultEvalParametricFn(cmsInt32Number Type, const cmsFloat64Number Params[], cmsFloat64Number R);
+
+// The built-in list
+static _cmsParametricCurvesCollection DefaultCurves = {
+    9,                                  // # of curve types
+    { 1, 2, 3, 4, 5, 6, 7, 8, 108 },    // Parametric curve ID
+    { 1, 3, 4, 5, 7, 4, 5, 5, 1 },      // Parameters by type
+    DefaultEvalParametricFn,            // Evaluator
+    NULL                                // Next in chain
+};
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginCurvesList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsCurvesPluginChunkType newHead = { NULL };
+   _cmsParametricCurvesCollection*  entry;
+   _cmsParametricCurvesCollection*  Anterior = NULL;
+   _cmsCurvesPluginChunkType* head = (_cmsCurvesPluginChunkType*) src->chunks[CurvesPlugin];
+
+    _cmsAssert(head != NULL);
+
+    // Walk the list copying all nodes
+   for (entry = head->ParametricCurves;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            _cmsParametricCurvesCollection *newEntry = ( _cmsParametricCurvesCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsParametricCurvesCollection));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.ParametricCurves == NULL)
+                newHead.ParametricCurves = newEntry;
+    }
+
+  ctx ->chunks[CurvesPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsCurvesPluginChunkType));
+}
+
+// The allocator have to follow the chain
+void _cmsAllocCurvesPluginChunk(struct _cmsContext_struct* ctx, 
+                                const struct _cmsContext_struct* src)
+{
+    _cmsAssert(ctx != NULL);
+
+    if (src != NULL) {
+
+        // Copy all linked list
+       DupPluginCurvesList(ctx, src);
+    }
+    else {
+        static _cmsCurvesPluginChunkType CurvesPluginChunk = { NULL };
+        ctx ->chunks[CurvesPlugin] = _cmsSubAllocDup(ctx ->MemPool, &CurvesPluginChunk, sizeof(_cmsCurvesPluginChunkType));
+    }
+}
+
+
+// The linked list head
+_cmsCurvesPluginChunkType _cmsCurvesPluginChunk = { NULL };
+
+// As a way to install new parametric curves
+cmsBool _cmsRegisterParametricCurvesPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    _cmsCurvesPluginChunkType* ctx = ( _cmsCurvesPluginChunkType*) _cmsContextGetClientChunk(ContextID, CurvesPlugin);
+    cmsPluginParametricCurves* Plugin = (cmsPluginParametricCurves*) Data;
+    _cmsParametricCurvesCollection* fl;
+
+    if (Data == NULL) {
+
+          ctx -> ParametricCurves =  NULL;
+          return TRUE;
+    }
+
+    fl = (_cmsParametricCurvesCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsParametricCurvesCollection));
+    if (fl == NULL) return FALSE;
+
+    // Copy the parameters
+    fl ->Evaluator  = Plugin ->Evaluator;
+    fl ->nFunctions = Plugin ->nFunctions;
+
+    // Make sure no mem overwrites
+    if (fl ->nFunctions > MAX_TYPES_IN_LCMS_PLUGIN)
+        fl ->nFunctions = MAX_TYPES_IN_LCMS_PLUGIN;
+
+    // Copy the data
+    memmove(fl->FunctionTypes,  Plugin ->FunctionTypes,   fl->nFunctions * sizeof(cmsUInt32Number));
+    memmove(fl->ParameterCount, Plugin ->ParameterCount,  fl->nFunctions * sizeof(cmsUInt32Number));
+
+    // Keep linked list
+    fl ->Next = ctx->ParametricCurves;
+    ctx->ParametricCurves = fl;
+
+    // All is ok
+    return TRUE;
+}
+
+
+// Search in type list, return position or -1 if not found
+static
+int IsInSet(int Type, _cmsParametricCurvesCollection* c)
+{
+    int i;
+
+    for (i=0; i < (int) c ->nFunctions; i++)
+        if (abs(Type) == c ->FunctionTypes[i]) return i;
+
+    return -1;
+}
+
+
+// Search for the collection which contains a specific type
+static
+_cmsParametricCurvesCollection *GetParametricCurveByType(cmsContext ContextID, int Type, int* index)
+{
+    _cmsParametricCurvesCollection* c;
+    int Position;
+    _cmsCurvesPluginChunkType* ctx = ( _cmsCurvesPluginChunkType*) _cmsContextGetClientChunk(ContextID, CurvesPlugin);
+
+    for (c = ctx->ParametricCurves; c != NULL; c = c ->Next) {
+
+        Position = IsInSet(Type, c);
+
+        if (Position != -1) {
+            if (index != NULL)
+                *index = Position;
+            return c;
+        }
+    }
+    // If none found, revert for defaults
+    for (c = &DefaultCurves; c != NULL; c = c ->Next) {
+
+        Position = IsInSet(Type, c);
+
+        if (Position != -1) {
+            if (index != NULL)
+                *index = Position;
+            return c;
+        }
+    }
+
+    return NULL;
+}
+
+// Low level allocate, which takes care of memory details. nEntries may be zero, and in this case
+// no optimation curve is computed. nSegments may also be zero in the inverse case, where only the
+// optimization curve is given. Both features simultaneously is an error
+static
+cmsToneCurve* AllocateToneCurveStruct(cmsContext ContextID, cmsUInt32Number nEntries,
+                                      cmsUInt32Number nSegments, const cmsCurveSegment* Segments,
+                                      const cmsUInt16Number* Values)
+{
+    cmsToneCurve* p;
+    cmsUInt32Number i;
+
+    // We allow huge tables, which are then restricted for smoothing operations
+    if (nEntries > 65530) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Couldn't create tone curve of more than 65530 entries");
+        return NULL;
+    }
+
+    if (nEntries == 0 && nSegments == 0) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Couldn't create tone curve with zero segments and no table");
+        return NULL;
+    }
+
+    // Allocate all required pointers, etc.
+    p = (cmsToneCurve*) _cmsMallocZero(ContextID, sizeof(cmsToneCurve));
+    if (!p) return NULL;
+
+    // In this case, there are no segments
+    if (nSegments == 0) {
+        p ->Segments = NULL;
+        p ->Evals = NULL;
+    }
+    else {
+        p ->Segments = (cmsCurveSegment*) _cmsCalloc(ContextID, nSegments, sizeof(cmsCurveSegment));
+        if (p ->Segments == NULL) goto Error;
+
+        p ->Evals    = (cmsParametricCurveEvaluator*) _cmsCalloc(ContextID, nSegments, sizeof(cmsParametricCurveEvaluator));
+        if (p ->Evals == NULL) goto Error;
+    }
+
+    p -> nSegments = nSegments;
+
+    // This 16-bit table contains a limited precision representation of the whole curve and is kept for
+    // increasing xput on certain operations.
+    if (nEntries == 0) {
+        p ->Table16 = NULL;
+    }
+    else {
+       p ->Table16 = (cmsUInt16Number*)  _cmsCalloc(ContextID, nEntries, sizeof(cmsUInt16Number));
+       if (p ->Table16 == NULL) goto Error;
+    }
+
+    p -> nEntries  = nEntries;
+
+    // Initialize members if requested
+    if (Values != NULL && (nEntries > 0)) {
+
+        for (i=0; i < nEntries; i++)
+            p ->Table16[i] = Values[i];
+    }
+
+    // Initialize the segments stuff. The evaluator for each segment is located and a pointer to it
+    // is placed in advance to maximize performance.
+    if (Segments != NULL && (nSegments > 0)) {
+
+        _cmsParametricCurvesCollection *c;
+
+        p ->SegInterp = (cmsInterpParams**) _cmsCalloc(ContextID, nSegments, sizeof(cmsInterpParams*));
+        if (p ->SegInterp == NULL) goto Error;
+
+        for (i=0; i < nSegments; i++) {
+
+            // Type 0 is a special marker for table-based curves
+            if (Segments[i].Type == 0)
+                p ->SegInterp[i] = _cmsComputeInterpParams(ContextID, Segments[i].nGridPoints, 1, 1, NULL, CMS_LERP_FLAGS_FLOAT);
+
+            memmove(&p ->Segments[i], &Segments[i], sizeof(cmsCurveSegment));
+
+            if (Segments[i].Type == 0 && Segments[i].SampledPoints != NULL)
+                p ->Segments[i].SampledPoints = (cmsFloat32Number*) _cmsDupMem(ContextID, Segments[i].SampledPoints, sizeof(cmsFloat32Number) * Segments[i].nGridPoints);
+            else
+                p ->Segments[i].SampledPoints = NULL;
+
+
+            c = GetParametricCurveByType(ContextID, Segments[i].Type, NULL);
+            if (c != NULL)
+                    p ->Evals[i] = c ->Evaluator;
+        }
+    }
+
+    p ->InterpParams = _cmsComputeInterpParams(ContextID, p ->nEntries, 1, 1, p->Table16, CMS_LERP_FLAGS_16BITS);
+    if (p->InterpParams != NULL)
+        return p;
+
+Error:
+    if (p -> Segments) _cmsFree(ContextID, p ->Segments);
+    if (p -> Evals) _cmsFree(ContextID, p -> Evals);
+    if (p ->Table16) _cmsFree(ContextID, p ->Table16);
+    _cmsFree(ContextID, p);
+    return NULL;
+}
+
+
+// Parametric Fn using floating point
+static
+cmsFloat64Number DefaultEvalParametricFn(cmsInt32Number Type, const cmsFloat64Number Params[], cmsFloat64Number R)
+{
+    cmsFloat64Number e, Val, disc;
+
+    switch (Type) {
+
+   // X = Y ^ Gamma
+    case 1:
+        if (R < 0) {
+
+            if (fabs(Params[0] - 1.0) < MATRIX_DET_TOLERANCE)
+                Val = R;
+            else
+                Val = 0;
+        }
+        else
+            Val = pow(R, Params[0]);
+        break;
+
+    // Type 1 Reversed: X = Y ^1/gamma
+    case -1:
+        if (R < 0) {
+
+            if (fabs(Params[0] - 1.0) < MATRIX_DET_TOLERANCE)
+                Val = R;
+            else
+                Val = 0;
+        }
+        else
+        {
+            if (fabs(Params[0]) < MATRIX_DET_TOLERANCE)
+                Val = PLUS_INF;
+            else
+                Val = pow(R, 1 / Params[0]);
+        }
+        break;
+
+    // CIE 122-1966
+    // Y = (aX + b)^Gamma  | X >= -b/a
+    // Y = 0               | else
+    case 2:
+    {
+
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            disc = -Params[2] / Params[1];
+
+            if (R >= disc) {
+
+                e = Params[1] * R + Params[2];
+
+                if (e > 0)
+                    Val = pow(e, Params[0]);
+                else
+                    Val = 0;
+            }
+            else
+                Val = 0;
+        }
+    }
+    break;
+
+     // Type 2 Reversed
+     // X = (Y ^1/g  - b) / a
+     case -2:
+     {
+         if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+             fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+         {
+             Val = 0;
+         }
+         else
+         {
+             if (R < 0)
+                 Val = 0;
+             else
+                 Val = (pow(R, 1.0 / Params[0]) - Params[2]) / Params[1];
+
+             if (Val < 0)
+                 Val = 0;
+         }
+     }         
+     break;
+
+
+    // IEC 61966-3
+    // Y = (aX + b)^Gamma | X <= -b/a
+    // Y = c              | else
+    case 3:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            disc = -Params[2] / Params[1];
+            if (disc < 0)
+                disc = 0;
+
+            if (R >= disc) {
+
+                e = Params[1] * R + Params[2];
+
+                if (e > 0)
+                    Val = pow(e, Params[0]) + Params[3];
+                else
+                    Val = 0;
+            }
+            else
+                Val = Params[3];
+        }
+    }
+    break;
+
+
+    // Type 3 reversed
+    // X=((Y-c)^1/g - b)/a      | (Y>=c)
+    // X=-b/a                   | (Y<c)
+    case -3:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            if (R >= Params[3]) {
+
+                e = R - Params[3];
+
+                if (e > 0)
+                    Val = (pow(e, 1 / Params[0]) - Params[2]) / Params[1];
+                else
+                    Val = 0;
+            }
+            else {
+                Val = -Params[2] / Params[1];
+            }
+        }
+    }
+    break;
+
+
+    // IEC 61966-2.1 (sRGB)
+    // Y = (aX + b)^Gamma | X >= d
+    // Y = cX             | X < d
+    case 4:
+        if (R >= Params[4]) {
+
+            e = Params[1]*R + Params[2];
+
+            if (e > 0)
+                Val = pow(e, Params[0]);
+            else
+                Val = 0;
+        }
+        else
+            Val = R * Params[3];
+        break;
+
+    // Type 4 reversed
+    // X=((Y^1/g-b)/a)    | Y >= (ad+b)^g
+    // X=Y/c              | Y< (ad+b)^g
+    case -4:
+    {
+        if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[1]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[3]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            e = Params[1] * Params[4] + Params[2];
+            if (e < 0)
+                disc = 0;
+            else
+                disc = pow(e, Params[0]);
+
+            if (R >= disc) {
+
+                Val = (pow(R, 1.0 / Params[0]) - Params[2]) / Params[1];
+            }
+            else {
+                Val = R / Params[3];
+            }
+        }
+    }
+    break;
+
+
+    // Y = (aX + b)^Gamma + e | X >= d
+    // Y = cX + f             | X < d
+    case 5:
+        if (R >= Params[4]) {
+
+            e = Params[1]*R + Params[2];
+
+            if (e > 0)
+                Val = pow(e, Params[0]) + Params[5];
+            else
+                Val = Params[5];
+        }
+        else
+            Val = R*Params[3] + Params[6];
+        break;
+
+
+    // Reversed type 5
+    // X=((Y-e)1/g-b)/a   | Y >=(ad+b)^g+e), cd+f
+    // X=(Y-f)/c          | else
+    case -5:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[3]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            disc = Params[3] * Params[4] + Params[6];
+            if (R >= disc) {
+
+                e = R - Params[5];
+                if (e < 0)
+                    Val = 0;
+                else
+                    Val = (pow(e, 1.0 / Params[0]) - Params[2]) / Params[1];
+            }
+            else {
+                Val = (R - Params[6]) / Params[3];
+            }
+        }
+    }
+    break;
+
+
+    // Types 6,7,8 comes from segmented curves as described in ICCSpecRevision_02_11_06_Float.pdf
+    // Type 6 is basically identical to type 5 without d
+
+    // Y = (a * X + b) ^ Gamma + c
+    case 6:
+        e = Params[1]*R + Params[2];
+
+        if (e < 0)
+            Val = Params[3];
+        else
+            Val = pow(e, Params[0]) + Params[3];
+        break;
+
+    // ((Y - c) ^1/Gamma - b) / a
+    case -6:
+    {
+        if (fabs(Params[1]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            e = R - Params[3];
+            if (e < 0)
+                Val = 0;
+            else
+                Val = (pow(e, 1.0 / Params[0]) - Params[2]) / Params[1];
+        }
+    }
+    break;
+
+
+    // Y = a * log (b * X^Gamma + c) + d
+    case 7:
+
+       e = Params[2] * pow(R, Params[0]) + Params[3];
+       if (e <= 0)
+           Val = Params[4];
+       else
+           Val = Params[1]*log10(e) + Params[4];
+       break;
+
+    // (Y - d) / a = log(b * X ^Gamma + c)
+    // pow(10, (Y-d) / a) = b * X ^Gamma + c
+    // pow((pow(10, (Y-d) / a) - c) / b, 1/g) = X
+    case -7:
+    {
+        if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[1]) < MATRIX_DET_TOLERANCE ||
+            fabs(Params[2]) < MATRIX_DET_TOLERANCE)
+        {
+            Val = 0;
+        }
+        else
+        {
+            Val = pow((pow(10.0, (R - Params[4]) / Params[1]) - Params[3]) / Params[2], 1.0 / Params[0]);
+        }
+    }
+    break;
+
+
+   //Y = a * b^(c*X+d) + e
+   case 8:
+       Val = (Params[0] * pow(Params[1], Params[2] * R + Params[3]) + Params[4]);
+       break;
+
+
+   // Y = (log((y-e) / a) / log(b) - d ) / c
+   // a=0, b=1, c=2, d=3, e=4,
+   case -8:
+
+       disc = R - Params[4];
+       if (disc < 0) Val = 0;
+       else
+       {
+           if (fabs(Params[0]) < MATRIX_DET_TOLERANCE ||
+               fabs(Params[2]) < MATRIX_DET_TOLERANCE)
+           {
+               Val = 0;
+           }
+           else
+           {
+               Val = (log(disc / Params[0]) / log(Params[1]) - Params[3]) / Params[2];
+           }
+       }
+       break;
+
+   // S-Shaped: (1 - (1-x)^1/g)^1/g
+   case 108:
+       if (fabs(Params[0]) < MATRIX_DET_TOLERANCE)
+           Val = 0;
+       else
+           Val = pow(1.0 - pow(1 - R, 1/Params[0]), 1/Params[0]);
+      break;
+
+    // y = (1 - (1-x)^1/g)^1/g
+    // y^g = (1 - (1-x)^1/g)
+    // 1 - y^g = (1-x)^1/g
+    // (1 - y^g)^g = 1 - x
+    // 1 - (1 - y^g)^g
+    case -108:
+        Val = 1 - pow(1 - pow(R, Params[0]), Params[0]);
+        break;
+
+    default:
+        // Unsupported parametric curve. Should never reach here
+        return 0;
+    }
+
+    return Val;
+}
+
+// Evaluate a segmented function for a single value. Return -Inf if no valid segment found .
+// If fn type is 0, perform an interpolation on the table
+static
+cmsFloat64Number EvalSegmentedFn(const cmsToneCurve *g, cmsFloat64Number R)
+{
+    int i;
+    cmsFloat32Number Out32;
+    cmsFloat64Number Out;
+
+    for (i = (int) g->nSegments - 1; i >= 0; --i) {
+
+        // Check for domain
+        if ((R > g->Segments[i].x0) && (R <= g->Segments[i].x1)) {
+
+            // Type == 0 means segment is sampled
+            if (g->Segments[i].Type == 0) {
+
+                cmsFloat32Number R1 = (cmsFloat32Number)(R - g->Segments[i].x0) / (g->Segments[i].x1 - g->Segments[i].x0);
+
+                // Setup the table (TODO: clean that)
+                g->SegInterp[i]->Table = g->Segments[i].SampledPoints;
+
+                g->SegInterp[i]->Interpolation.LerpFloat(&R1, &Out32, g->SegInterp[i]);
+                Out = (cmsFloat64Number) Out32;
+
+            }
+            else {
+                Out = g->Evals[i](g->Segments[i].Type, g->Segments[i].Params, R);
+            }
+
+            if (isinf(Out))
+                return PLUS_INF;
+            else
+            {
+                if (isinf(-Out))
+                    return MINUS_INF;
+            }
+
+            return Out;
+        }
+    }
+
+    return MINUS_INF;
+}
+
+// Access to estimated low-res table
+cmsUInt32Number CMSEXPORT cmsGetToneCurveEstimatedTableEntries(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+    return t ->nEntries;
+}
+
+const cmsUInt16Number* CMSEXPORT cmsGetToneCurveEstimatedTable(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+    return t ->Table16;
+}
+
+
+// Create an empty gamma curve, by using tables. This specifies only the limited-precision part, and leaves the
+// floating point description empty.
+cmsToneCurve* CMSEXPORT cmsBuildTabulatedToneCurve16(cmsContext ContextID, cmsUInt32Number nEntries, const cmsUInt16Number Values[])
+{
+    return AllocateToneCurveStruct(ContextID, nEntries, 0, NULL, Values);
+}
+
+static
+cmsUInt32Number EntriesByGamma(cmsFloat64Number Gamma)
+{
+    if (fabs(Gamma - 1.0) < 0.001) return 2;
+    return 4096;
+}
+
+
+// Create a segmented gamma, fill the table
+cmsToneCurve* CMSEXPORT cmsBuildSegmentedToneCurve(cmsContext ContextID,
+                                                   cmsUInt32Number nSegments, const cmsCurveSegment Segments[])
+{
+    cmsUInt32Number i;
+    cmsFloat64Number R, Val;
+    cmsToneCurve* g;
+    cmsUInt32Number nGridPoints = 4096;
+
+    _cmsAssert(Segments != NULL);
+
+    // Optimizatin for identity curves.
+    if (nSegments == 1 && Segments[0].Type == 1) {
+
+        nGridPoints = EntriesByGamma(Segments[0].Params[0]);
+    }
+
+    g = AllocateToneCurveStruct(ContextID, nGridPoints, nSegments, Segments, NULL);
+    if (g == NULL) return NULL;
+
+    // Once we have the floating point version, we can approximate a 16 bit table of 4096 entries
+    // for performance reasons. This table would normally not be used except on 8/16 bits transforms.
+    for (i = 0; i < nGridPoints; i++) {
+
+        R   = (cmsFloat64Number) i / (nGridPoints-1);
+
+        Val = EvalSegmentedFn(g, R);
+
+        // Round and saturate
+        g ->Table16[i] = _cmsQuickSaturateWord(Val * 65535.0);
+    }
+
+    return g;
+}
+
+// Use a segmented curve to store the floating point table
+cmsToneCurve* CMSEXPORT cmsBuildTabulatedToneCurveFloat(cmsContext ContextID, cmsUInt32Number nEntries, const cmsFloat32Number values[])
+{
+    cmsCurveSegment Seg[3];
+
+    // A segmented tone curve should have function segments in the first and last positions
+    // Initialize segmented curve part up to 0 to constant value = samples[0]
+    Seg[0].x0 = MINUS_INF;
+    Seg[0].x1 = 0;
+    Seg[0].Type = 6;
+
+    Seg[0].Params[0] = 1;
+    Seg[0].Params[1] = 0;
+    Seg[0].Params[2] = 0;
+    Seg[0].Params[3] = values[0];
+    Seg[0].Params[4] = 0;
+
+    // From zero to 1
+    Seg[1].x0 = 0;
+    Seg[1].x1 = 1.0;
+    Seg[1].Type = 0;
+
+    Seg[1].nGridPoints = nEntries;
+    Seg[1].SampledPoints = (cmsFloat32Number*) values;
+
+    // Final segment is constant = lastsample
+    Seg[2].x0 = 1.0;
+    Seg[2].x1 = PLUS_INF;
+    Seg[2].Type = 6;
+    
+    Seg[2].Params[0] = 1;
+    Seg[2].Params[1] = 0;
+    Seg[2].Params[2] = 0;
+    Seg[2].Params[3] = values[nEntries-1];
+    Seg[2].Params[4] = 0;
+    
+
+    return cmsBuildSegmentedToneCurve(ContextID, 3, Seg);
+}
+
+// Parametric curves
+//
+// Parameters goes as: Curve, a, b, c, d, e, f
+// Type is the ICC type +1
+// if type is negative, then the curve is analyticaly inverted
+cmsToneCurve* CMSEXPORT cmsBuildParametricToneCurve(cmsContext ContextID, cmsInt32Number Type, const cmsFloat64Number Params[])
+{
+    cmsCurveSegment Seg0;
+    int Pos = 0;
+    cmsUInt32Number size;
+    _cmsParametricCurvesCollection* c = GetParametricCurveByType(ContextID, Type, &Pos);
+
+    _cmsAssert(Params != NULL);
+
+    if (c == NULL) {
+        cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Invalid parametric curve type %d", Type);
+        return NULL;
+    }
+
+    memset(&Seg0, 0, sizeof(Seg0));
+
+    Seg0.x0   = MINUS_INF;
+    Seg0.x1   = PLUS_INF;
+    Seg0.Type = Type;
+
+    size = c->ParameterCount[Pos] * sizeof(cmsFloat64Number);
+    memmove(Seg0.Params, Params, size);
+
+    return cmsBuildSegmentedToneCurve(ContextID, 1, &Seg0);
+}
+
+
+
+// Build a gamma table based on gamma constant
+cmsToneCurve* CMSEXPORT cmsBuildGamma(cmsContext ContextID, cmsFloat64Number Gamma)
+{
+    return cmsBuildParametricToneCurve(ContextID, 1, &Gamma);
+}
+
+
+// Free all memory taken by the gamma curve
+void CMSEXPORT cmsFreeToneCurve(cmsToneCurve* Curve)
+{
+    cmsContext ContextID;
+
+    if (Curve == NULL) return;
+
+    ContextID = Curve ->InterpParams->ContextID;
+
+    _cmsFreeInterpParams(Curve ->InterpParams);
+
+    if (Curve -> Table16)
+        _cmsFree(ContextID, Curve ->Table16);
+
+    if (Curve ->Segments) {
+
+        cmsUInt32Number i;
+
+        for (i=0; i < Curve ->nSegments; i++) {
+
+            if (Curve ->Segments[i].SampledPoints) {
+                _cmsFree(ContextID, Curve ->Segments[i].SampledPoints);
+            }
+
+            if (Curve ->SegInterp[i] != 0)
+                _cmsFreeInterpParams(Curve->SegInterp[i]);
+        }
+
+        _cmsFree(ContextID, Curve ->Segments);
+        _cmsFree(ContextID, Curve ->SegInterp);
+    }
+
+    if (Curve -> Evals)
+        _cmsFree(ContextID, Curve -> Evals);
+
+    if (Curve) _cmsFree(ContextID, Curve);
+}
+
+// Utility function, free 3 gamma tables
+void CMSEXPORT cmsFreeToneCurveTriple(cmsToneCurve* Curve[3])
+{
+
+    _cmsAssert(Curve != NULL);
+
+    if (Curve[0] != NULL) cmsFreeToneCurve(Curve[0]);
+    if (Curve[1] != NULL) cmsFreeToneCurve(Curve[1]);
+    if (Curve[2] != NULL) cmsFreeToneCurve(Curve[2]);
+
+    Curve[0] = Curve[1] = Curve[2] = NULL;
+}
+
+
+// Duplicate a gamma table
+cmsToneCurve* CMSEXPORT cmsDupToneCurve(const cmsToneCurve* In)
+{
+    if (In == NULL) return NULL;
+
+    return  AllocateToneCurveStruct(In ->InterpParams ->ContextID, In ->nEntries, In ->nSegments, In ->Segments, In ->Table16);
+}
+
+// Joins two curves for X and Y. Curves should be monotonic.
+// We want to get
+//
+//      y = Y^-1(X(t))
+//
+cmsToneCurve* CMSEXPORT cmsJoinToneCurve(cmsContext ContextID,
+                                      const cmsToneCurve* X,
+                                      const cmsToneCurve* Y, cmsUInt32Number nResultingPoints)
+{
+    cmsToneCurve* out = NULL;
+    cmsToneCurve* Yreversed = NULL;
+    cmsFloat32Number t, x;
+    cmsFloat32Number* Res = NULL;
+    cmsUInt32Number i;
+
+
+    _cmsAssert(X != NULL);
+    _cmsAssert(Y != NULL);
+
+    Yreversed = cmsReverseToneCurveEx(nResultingPoints, Y);
+    if (Yreversed == NULL) goto Error;
+
+    Res = (cmsFloat32Number*) _cmsCalloc(ContextID, nResultingPoints, sizeof(cmsFloat32Number));
+    if (Res == NULL) goto Error;
+
+    //Iterate
+    for (i=0; i <  nResultingPoints; i++) {
+
+        t = (cmsFloat32Number) i / (nResultingPoints-1);
+        x = cmsEvalToneCurveFloat(X,  t);
+        Res[i] = cmsEvalToneCurveFloat(Yreversed, x);
+    }
+
+    // Allocate space for output
+    out = cmsBuildTabulatedToneCurveFloat(ContextID, nResultingPoints, Res);
+
+Error:
+
+    if (Res != NULL) _cmsFree(ContextID, Res);
+    if (Yreversed != NULL) cmsFreeToneCurve(Yreversed);
+
+    return out;
+}
+
+
+
+// Get the surrounding nodes. This is tricky on non-monotonic tables
+static
+int GetInterval(cmsFloat64Number In, const cmsUInt16Number LutTable[], const struct _cms_interp_struc* p)
+{
+    int i;
+    int y0, y1;
+
+    // A 1 point table is not allowed
+    if (p -> Domain[0] < 1) return -1;
+
+    // Let's see if ascending or descending.
+    if (LutTable[0] < LutTable[p ->Domain[0]]) {
+
+        // Table is overall ascending
+        for (i = (int) p->Domain[0] - 1; i >= 0; --i) {
+
+            y0 = LutTable[i];
+            y1 = LutTable[i+1];
+
+            if (y0 <= y1) { // Increasing
+                if (In >= y0 && In <= y1) return i;
+            }
+            else
+                if (y1 < y0) { // Decreasing
+                    if (In >= y1 && In <= y0) return i;
+                }
+        }
+    }
+    else {
+        // Table is overall descending
+        for (i=0; i < (int) p -> Domain[0]; i++) {
+
+            y0 = LutTable[i];
+            y1 = LutTable[i+1];
+
+            if (y0 <= y1) { // Increasing
+                if (In >= y0 && In <= y1) return i;
+            }
+            else
+                if (y1 < y0) { // Decreasing
+                    if (In >= y1 && In <= y0) return i;
+                }
+        }
+    }
+
+    return -1;
+}
+
+// Reverse a gamma table
+cmsToneCurve* CMSEXPORT cmsReverseToneCurveEx(cmsUInt32Number nResultSamples, const cmsToneCurve* InCurve)
+{
+    cmsToneCurve *out;
+    cmsFloat64Number a = 0, b = 0, y, x1, y1, x2, y2;
+    int i, j;
+    int Ascending;
+
+    _cmsAssert(InCurve != NULL);
+
+    // Try to reverse it analytically whatever possible
+ 
+    if (InCurve ->nSegments == 1 && InCurve ->Segments[0].Type > 0 && 
+        /* InCurve -> Segments[0].Type <= 5 */ 
+        GetParametricCurveByType(InCurve ->InterpParams->ContextID, InCurve ->Segments[0].Type, NULL) != NULL) {
+
+        return cmsBuildParametricToneCurve(InCurve ->InterpParams->ContextID,
+                                       -(InCurve -> Segments[0].Type),
+                                       InCurve -> Segments[0].Params);
+    }
+
+    // Nope, reverse the table.
+    out = cmsBuildTabulatedToneCurve16(InCurve ->InterpParams->ContextID, nResultSamples, NULL);
+    if (out == NULL)
+        return NULL;
+
+    // We want to know if this is an ascending or descending table
+    Ascending = !cmsIsToneCurveDescending(InCurve);
+
+    // Iterate across Y axis
+    for (i=0; i < (int) nResultSamples; i++) {
+
+        y = (cmsFloat64Number) i * 65535.0 / (nResultSamples - 1);
+
+        // Find interval in which y is within.
+        j = GetInterval(y, InCurve->Table16, InCurve->InterpParams);
+        if (j >= 0) {
+
+
+            // Get limits of interval
+            x1 = InCurve ->Table16[j];
+            x2 = InCurve ->Table16[j+1];
+
+            y1 = (cmsFloat64Number) (j * 65535.0) / (InCurve ->nEntries - 1);
+            y2 = (cmsFloat64Number) ((j+1) * 65535.0 ) / (InCurve ->nEntries - 1);
+
+            // If collapsed, then use any
+            if (x1 == x2) {
+
+                out ->Table16[i] = _cmsQuickSaturateWord(Ascending ? y2 : y1);
+                continue;
+
+            } else {
+
+                // Interpolate
+                a = (y2 - y1) / (x2 - x1);
+                b = y2 - a * x2;
+            }
+        }
+
+        out ->Table16[i] = _cmsQuickSaturateWord(a* y + b);
+    }
+
+
+    return out;
+}
+
+// Reverse a gamma table
+cmsToneCurve* CMSEXPORT cmsReverseToneCurve(const cmsToneCurve* InGamma)
+{
+    _cmsAssert(InGamma != NULL);
+
+    return cmsReverseToneCurveEx(4096, InGamma);
+}
+
+// From: Eilers, P.H.C. (1994) Smoothing and interpolation with finite
+// differences. in: Graphic Gems IV, Heckbert, P.S. (ed.), Academic press.
+//
+// Smoothing and interpolation with second differences.
+//
+//   Input:  weights (w), data (y): vector from 1 to m.
+//   Input:  smoothing parameter (lambda), length (m).
+//   Output: smoothed vector (z): vector from 1 to m.
+
+static
+cmsBool smooth2(cmsContext ContextID, cmsFloat32Number w[], cmsFloat32Number y[], 
+                cmsFloat32Number z[], cmsFloat32Number lambda, int m)
+{
+    int i, i1, i2;
+    cmsFloat32Number *c, *d, *e;
+    cmsBool st;
+
+
+    c = (cmsFloat32Number*) _cmsCalloc(ContextID, MAX_NODES_IN_CURVE, sizeof(cmsFloat32Number));
+    d = (cmsFloat32Number*) _cmsCalloc(ContextID, MAX_NODES_IN_CURVE, sizeof(cmsFloat32Number));
+    e = (cmsFloat32Number*) _cmsCalloc(ContextID, MAX_NODES_IN_CURVE, sizeof(cmsFloat32Number));
+
+    if (c != NULL && d != NULL && e != NULL) {
+
+
+    d[1] = w[1] + lambda;
+    c[1] = -2 * lambda / d[1];
+    e[1] = lambda /d[1];
+    z[1] = w[1] * y[1];
+    d[2] = w[2] + 5 * lambda - d[1] * c[1] *  c[1];
+    c[2] = (-4 * lambda - d[1] * c[1] * e[1]) / d[2];
+    e[2] = lambda / d[2];
+    z[2] = w[2] * y[2] - c[1] * z[1];
+
+    for (i = 3; i < m - 1; i++) {
+        i1 = i - 1; i2 = i - 2;
+        d[i]= w[i] + 6 * lambda - c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2];
+        c[i] = (-4 * lambda -d[i1] * c[i1] * e[i1])/ d[i];
+        e[i] = lambda / d[i];
+        z[i] = w[i] * y[i] - c[i1] * z[i1] - e[i2] * z[i2];
+    }
+
+    i1 = m - 2; i2 = m - 3;
+
+    d[m - 1] = w[m - 1] + 5 * lambda -c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2];
+    c[m - 1] = (-2 * lambda - d[i1] * c[i1] * e[i1]) / d[m - 1];
+    z[m - 1] = w[m - 1] * y[m - 1] - c[i1] * z[i1] - e[i2] * z[i2];
+    i1 = m - 1; i2 = m - 2;
+
+    d[m] = w[m] + lambda - c[i1] * c[i1] * d[i1] - e[i2] * e[i2] * d[i2];
+    z[m] = (w[m] * y[m] - c[i1] * z[i1] - e[i2] * z[i2]) / d[m];
+    z[m - 1] = z[m - 1] / d[m - 1] - c[m - 1] * z[m];
+
+    for (i = m - 2; 1<= i; i--)
+        z[i] = z[i] / d[i] - c[i] * z[i + 1] - e[i] * z[i + 2];
+
+      st = TRUE;
+    }
+    else st = FALSE;
+
+    if (c != NULL) _cmsFree(ContextID, c);
+    if (d != NULL) _cmsFree(ContextID, d);
+    if (e != NULL) _cmsFree(ContextID, e);
+
+    return st;
+}
+
+// Smooths a curve sampled at regular intervals.
+cmsBool  CMSEXPORT cmsSmoothToneCurve(cmsToneCurve* Tab, cmsFloat64Number lambda)
+{
+    cmsBool SuccessStatus = TRUE;
+    cmsFloat32Number *w, *y, *z;
+    cmsUInt32Number i, nItems, Zeros, Poles;
+
+    if (Tab != NULL && Tab->InterpParams != NULL)
+    {
+        cmsContext ContextID = Tab->InterpParams->ContextID;
+
+        if (!cmsIsToneCurveLinear(Tab)) // Only non-linear curves need smoothing
+        {
+            nItems = Tab->nEntries;
+            if (nItems < MAX_NODES_IN_CURVE)
+            {
+                // Allocate one more item than needed
+                w = (cmsFloat32Number *)_cmsCalloc(ContextID, nItems + 1, sizeof(cmsFloat32Number));
+                y = (cmsFloat32Number *)_cmsCalloc(ContextID, nItems + 1, sizeof(cmsFloat32Number));
+                z = (cmsFloat32Number *)_cmsCalloc(ContextID, nItems + 1, sizeof(cmsFloat32Number));
+
+                if (w != NULL && y != NULL && z != NULL) // Ensure no memory allocation failure
+                {
+                    memset(w, 0, (nItems + 1) * sizeof(cmsFloat32Number));
+                    memset(y, 0, (nItems + 1) * sizeof(cmsFloat32Number));
+                    memset(z, 0, (nItems + 1) * sizeof(cmsFloat32Number));
+
+                    for (i = 0; i < nItems; i++)
+                    {
+                        y[i + 1] = (cmsFloat32Number)Tab->Table16[i];
+                        w[i + 1] = 1.0;
+                    }
+
+                    if (smooth2(ContextID, w, y, z, (cmsFloat32Number)lambda, (int)nItems))
+                    {
+                        // Do some reality - checking...
+
+                        Zeros = Poles = 0;
+                        for (i = nItems; i > 1; --i)
+                        {
+                            if (z[i] == 0.) Zeros++;
+                            if (z[i] >= 65535.) Poles++;
+                            if (z[i] < z[i - 1])
+                            {
+                                cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Non-Monotonic.");
+                                SuccessStatus = FALSE;
+                                break;
+                            }
+                        }
+
+                        if (SuccessStatus && Zeros > (nItems / 3))
+                        {
+                            cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Degenerated, mostly zeros.");
+                            SuccessStatus = FALSE;
+                        }
+
+                        if (SuccessStatus && Poles > (nItems / 3))
+                        {
+                            cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Degenerated, mostly poles.");
+                            SuccessStatus = FALSE;
+                        }
+
+                        if (SuccessStatus) // Seems ok
+                        {
+                            for (i = 0; i < nItems; i++)
+                            {
+                                // Clamp to cmsUInt16Number
+                                Tab->Table16[i] = _cmsQuickSaturateWord(z[i + 1]);
+                            }
+                        }
+                    }
+                    else // Could not smooth
+                    {
+                        cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Function smooth2 failed.");
+                        SuccessStatus = FALSE;
+                    }
+                }
+                else // One or more buffers could not be allocated
+                {
+                    cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Could not allocate memory.");
+                    SuccessStatus = FALSE;
+                }
+
+                if (z != NULL)
+                    _cmsFree(ContextID, z);
+
+                if (y != NULL)
+                    _cmsFree(ContextID, y);
+
+                if (w != NULL)
+                    _cmsFree(ContextID, w);
+            }
+            else // too many items in the table
+            {
+                cmsSignalError(ContextID, cmsERROR_RANGE, "cmsSmoothToneCurve: Too many points.");
+                SuccessStatus = FALSE;
+            }
+        }
+    }
+    else // Tab parameter or Tab->InterpParams is NULL
+    {
+        // Can't signal an error here since the ContextID is not known at this point
+        SuccessStatus = FALSE;
+    }
+
+    return SuccessStatus;
+}
+
+// Is a table linear? Do not use parametric since we cannot guarantee some weird parameters resulting
+// in a linear table. This way assures it is linear in 12 bits, which should be enought in most cases.
+cmsBool CMSEXPORT cmsIsToneCurveLinear(const cmsToneCurve* Curve)
+{
+    int i;
+    int diff;
+
+    _cmsAssert(Curve != NULL);
+
+    for (i=0; i < (int) Curve ->nEntries; i++) {
+
+        diff = abs((int) Curve->Table16[i] - (int) _cmsQuantizeVal(i, Curve ->nEntries));
+        if (diff > 0x0f)
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Same, but for monotonicity
+cmsBool  CMSEXPORT cmsIsToneCurveMonotonic(const cmsToneCurve* t)
+{
+    cmsUInt32Number n;
+    int i, last;
+    cmsBool lDescending;
+
+    _cmsAssert(t != NULL);
+
+    // Degenerated curves are monotonic? Ok, let's pass them
+    n = t ->nEntries;
+    if (n < 2) return TRUE;
+
+    // Curve direction
+    lDescending = cmsIsToneCurveDescending(t);
+
+    if (lDescending) {
+
+        last = t ->Table16[0];
+
+        for (i = 1; i < (int) n; i++) {
+
+            if (t ->Table16[i] - last > 2) // We allow some ripple
+                return FALSE;
+            else
+                last = t ->Table16[i];
+
+        }
+    }
+    else {
+
+        last = t ->Table16[n-1];
+
+        for (i = (int) n - 2; i >= 0; --i) {
+
+            if (t ->Table16[i] - last > 2)
+                return FALSE;
+            else
+                last = t ->Table16[i];
+
+        }
+    }
+
+    return TRUE;
+}
+
+// Same, but for descending tables
+cmsBool  CMSEXPORT cmsIsToneCurveDescending(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+
+    return t ->Table16[0] > t ->Table16[t ->nEntries-1];
+}
+
+
+// Another info fn: is out gamma table multisegment?
+cmsBool  CMSEXPORT cmsIsToneCurveMultisegment(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+
+    return t -> nSegments > 1;
+}
+
+cmsInt32Number  CMSEXPORT cmsGetToneCurveParametricType(const cmsToneCurve* t)
+{
+    _cmsAssert(t != NULL);
+
+    if (t -> nSegments != 1) return 0;
+    return t ->Segments[0].Type;
+}
+
+// We need accuracy this time
+cmsFloat32Number CMSEXPORT cmsEvalToneCurveFloat(const cmsToneCurve* Curve, cmsFloat32Number v)
+{
+    _cmsAssert(Curve != NULL);
+
+    // Check for 16 bits table. If so, this is a limited-precision tone curve
+    if (Curve ->nSegments == 0) {
+
+        cmsUInt16Number In, Out;
+
+        In = (cmsUInt16Number) _cmsQuickSaturateWord(v * 65535.0);
+        Out = cmsEvalToneCurve16(Curve, In);
+
+        return (cmsFloat32Number) (Out / 65535.0);
+    }
+
+    return (cmsFloat32Number) EvalSegmentedFn(Curve, v);
+}
+
+// We need xput over here
+cmsUInt16Number CMSEXPORT cmsEvalToneCurve16(const cmsToneCurve* Curve, cmsUInt16Number v)
+{
+    cmsUInt16Number out;
+
+    _cmsAssert(Curve != NULL);
+
+    Curve ->InterpParams ->Interpolation.Lerp16(&v, &out, Curve ->InterpParams);
+    return out;
+}
+
+
+// Least squares fitting.
+// A mathematical procedure for finding the best-fitting curve to a given set of points by
+// minimizing the sum of the squares of the offsets ("the residuals") of the points from the curve.
+// The sum of the squares of the offsets is used instead of the offset absolute values because
+// this allows the residuals to be treated as a continuous differentiable quantity.
+//
+// y = f(x) = x ^ g
+//
+// R  = (yi - (xi^g))
+// R2 = (yi - (xi^g))2
+// SUM R2 = SUM (yi - (xi^g))2
+//
+// dR2/dg = -2 SUM x^g log(x)(y - x^g)
+// solving for dR2/dg = 0
+//
+// g = 1/n * SUM(log(y) / log(x))
+
+cmsFloat64Number CMSEXPORT cmsEstimateGamma(const cmsToneCurve* t, cmsFloat64Number Precision)
+{
+    cmsFloat64Number gamma, sum, sum2;
+    cmsFloat64Number n, x, y, Std;
+    cmsUInt32Number i;
+
+    _cmsAssert(t != NULL);
+
+    sum = sum2 = n = 0;
+
+    // Excluding endpoints
+    for (i=1; i < (MAX_NODES_IN_CURVE-1); i++) {
+
+        x = (cmsFloat64Number) i / (MAX_NODES_IN_CURVE-1);
+        y = (cmsFloat64Number) cmsEvalToneCurveFloat(t, (cmsFloat32Number) x);
+
+        // Avoid 7% on lower part to prevent
+        // artifacts due to linear ramps
+
+        if (y > 0. && y < 1. && x > 0.07) {
+
+            gamma = log(y) / log(x);
+            sum  += gamma;
+            sum2 += gamma * gamma;
+            n++;
+        }
+    }
+
+    // Take a look on SD to see if gamma isn't exponential at all
+    Std = sqrt((n * sum2 - sum * sum) / (n*(n-1)));
+
+    if (Std > Precision)
+        return -1.0;
+
+    return (sum / n);   // The mean
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsgmt.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsgmt.cpp
new file mode 100755
index 0000000000..5d7a320e32
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsgmt.cpp
@@ -0,0 +1,590 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// Auxiliary: append a Lab identity after the given sequence of profiles
+// and return the transform. Lab profile is closed, rest of profiles are kept open.
+cmsHTRANSFORM _cmsChain2Lab(cmsContext            ContextID,
+                            cmsUInt32Number        nProfiles,
+                            cmsUInt32Number        InputFormat,
+                            cmsUInt32Number        OutputFormat,
+                            const cmsUInt32Number  Intents[],
+                            const cmsHPROFILE      hProfiles[],
+                            const cmsBool          BPC[],
+                            const cmsFloat64Number AdaptationStates[],
+                            cmsUInt32Number        dwFlags)
+{
+    cmsHTRANSFORM xform;
+    cmsHPROFILE   hLab;
+    cmsHPROFILE   ProfileList[256];
+    cmsBool       BPCList[256];
+    cmsFloat64Number AdaptationList[256];
+    cmsUInt32Number IntentList[256];
+    cmsUInt32Number i;
+
+    // This is a rather big number and there is no need of dynamic memory
+    // since we are adding a profile, 254 + 1 = 255 and this is the limit
+    if (nProfiles > 254) return NULL;
+
+    // The output space
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) return NULL;
+
+    // Create a copy of parameters
+    for (i=0; i < nProfiles; i++) {
+
+        ProfileList[i]    = hProfiles[i];
+        BPCList[i]        = BPC[i];
+        AdaptationList[i] = AdaptationStates[i];
+        IntentList[i]     = Intents[i];
+    }
+
+    // Place Lab identity at chain's end.
+    ProfileList[nProfiles]    = hLab;
+    BPCList[nProfiles]        = 0;
+    AdaptationList[nProfiles] = 1.0;
+    IntentList[nProfiles]     = INTENT_RELATIVE_COLORIMETRIC;
+
+    // Create the transform
+    xform = cmsCreateExtendedTransform(ContextID, nProfiles + 1, ProfileList,
+                                       BPCList,
+                                       IntentList,
+                                       AdaptationList,
+                                       NULL, 0,
+                                       InputFormat,
+                                       OutputFormat,
+                                       dwFlags);
+
+    cmsCloseProfile(hLab);
+
+    return xform;
+}
+
+
+// Compute K -> L* relationship. Flags may include black point compensation. In this case,
+// the relationship is assumed from the profile with BPC to a black point zero.
+static
+cmsToneCurve* ComputeKToLstar(cmsContext            ContextID,
+                               cmsUInt32Number       nPoints,
+                               cmsUInt32Number       nProfiles,
+                               const cmsUInt32Number Intents[],
+                               const cmsHPROFILE     hProfiles[],
+                               const cmsBool         BPC[],
+                               const cmsFloat64Number AdaptationStates[],
+                               cmsUInt32Number dwFlags)
+{
+    cmsToneCurve* out = NULL;
+    cmsUInt32Number i;
+    cmsHTRANSFORM xform;
+    cmsCIELab Lab;
+    cmsFloat32Number cmyk[4];
+    cmsFloat32Number* SampledPoints;
+
+    xform = _cmsChain2Lab(ContextID, nProfiles, TYPE_CMYK_FLT, TYPE_Lab_DBL, Intents, hProfiles, BPC, AdaptationStates, dwFlags);
+    if (xform == NULL) return NULL;
+
+    SampledPoints = (cmsFloat32Number*) _cmsCalloc(ContextID, nPoints, sizeof(cmsFloat32Number));
+    if (SampledPoints  == NULL) goto Error;
+
+    for (i=0; i < nPoints; i++) {
+
+        cmyk[0] = 0;
+        cmyk[1] = 0;
+        cmyk[2] = 0;
+        cmyk[3] = (cmsFloat32Number) ((i * 100.0) / (nPoints-1));
+
+        cmsDoTransform(xform, cmyk, &Lab, 1);
+        SampledPoints[i]= (cmsFloat32Number) (1.0 - Lab.L / 100.0); // Negate K for easier operation
+    }
+
+    out = cmsBuildTabulatedToneCurveFloat(ContextID, nPoints, SampledPoints);
+
+Error:
+
+    cmsDeleteTransform(xform);
+    if (SampledPoints) _cmsFree(ContextID, SampledPoints);
+
+    return out;
+}
+
+
+// Compute Black tone curve on a CMYK -> CMYK transform. This is done by
+// using the proof direction on both profiles to find K->L* relationship
+// then joining both curves. dwFlags may include black point compensation.
+cmsToneCurve* _cmsBuildKToneCurve(cmsContext        ContextID,
+                                   cmsUInt32Number   nPoints,
+                                   cmsUInt32Number   nProfiles,
+                                   const cmsUInt32Number Intents[],
+                                   const cmsHPROFILE hProfiles[],
+                                   const cmsBool     BPC[],
+                                   const cmsFloat64Number AdaptationStates[],
+                                   cmsUInt32Number   dwFlags)
+{
+    cmsToneCurve *in, *out, *KTone;
+
+    // Make sure CMYK -> CMYK
+    if (cmsGetColorSpace(hProfiles[0]) != cmsSigCmykData ||
+        cmsGetColorSpace(hProfiles[nProfiles-1])!= cmsSigCmykData) return NULL;
+
+
+    // Make sure last is an output profile
+    if (cmsGetDeviceClass(hProfiles[nProfiles - 1]) != cmsSigOutputClass) return NULL;
+
+    // Create individual curves. BPC works also as each K to L* is
+    // computed as a BPC to zero black point in case of L*
+    in  = ComputeKToLstar(ContextID, nPoints, nProfiles - 1, Intents, hProfiles, BPC, AdaptationStates, dwFlags);
+    if (in == NULL) return NULL;
+
+    out = ComputeKToLstar(ContextID, nPoints, 1,
+                            Intents + (nProfiles - 1),
+                            &hProfiles [nProfiles - 1],
+                            BPC + (nProfiles - 1),
+                            AdaptationStates + (nProfiles - 1),
+                            dwFlags);
+    if (out == NULL) {
+        cmsFreeToneCurve(in);
+        return NULL;
+    }
+
+    // Build the relationship. This effectively limits the maximum accuracy to 16 bits, but
+    // since this is used on black-preserving LUTs, we are not losing  accuracy in any case
+    KTone = cmsJoinToneCurve(ContextID, in, out, nPoints);
+
+    // Get rid of components
+    cmsFreeToneCurve(in); cmsFreeToneCurve(out);
+
+    // Something went wrong...
+    if (KTone == NULL) return NULL;
+
+    // Make sure it is monotonic
+    if (!cmsIsToneCurveMonotonic(KTone)) {
+        cmsFreeToneCurve(KTone);
+        return NULL;
+    }
+
+    return KTone;
+}
+
+
+// Gamut LUT Creation -----------------------------------------------------------------------------------------
+
+// Used by gamut & softproofing
+
+typedef struct {
+
+    cmsHTRANSFORM hInput;               // From whatever input color space. 16 bits to DBL
+    cmsHTRANSFORM hForward, hReverse;   // Transforms going from Lab to colorant and back
+    cmsFloat64Number Thereshold;        // The thereshold after which is considered out of gamut
+
+    } GAMUTCHAIN;
+
+// This sampler does compute gamut boundaries by comparing original
+// values with a transform going back and forth. Values above ERR_THERESHOLD
+// of maximum are considered out of gamut.
+
+#define ERR_THERESHOLD      5
+
+
+static
+int GamutSampler(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
+{
+    GAMUTCHAIN*  t = (GAMUTCHAIN* ) Cargo;
+    cmsCIELab LabIn1, LabOut1;
+    cmsCIELab LabIn2, LabOut2;
+    cmsUInt16Number Proof[cmsMAXCHANNELS], Proof2[cmsMAXCHANNELS];
+    cmsFloat64Number dE1, dE2, ErrorRatio;
+
+    // Assume in-gamut by default.
+    ErrorRatio = 1.0;
+
+    // Convert input to Lab
+    cmsDoTransform(t -> hInput, In, &LabIn1, 1);
+
+    // converts from PCS to colorant. This always
+    // does return in-gamut values,
+    cmsDoTransform(t -> hForward, &LabIn1, Proof, 1);
+
+    // Now, do the inverse, from colorant to PCS.
+    cmsDoTransform(t -> hReverse, Proof, &LabOut1, 1);
+
+    memmove(&LabIn2, &LabOut1, sizeof(cmsCIELab));
+
+    // Try again, but this time taking Check as input
+    cmsDoTransform(t -> hForward, &LabOut1, Proof2, 1);
+    cmsDoTransform(t -> hReverse, Proof2, &LabOut2, 1);
+
+    // Take difference of direct value
+    dE1 = cmsDeltaE(&LabIn1, &LabOut1);
+
+    // Take difference of converted value
+    dE2 = cmsDeltaE(&LabIn2, &LabOut2);
+
+
+    // if dE1 is small and dE2 is small, value is likely to be in gamut
+    if (dE1 < t->Thereshold && dE2 < t->Thereshold)
+        Out[0] = 0;
+    else {
+
+        // if dE1 is small and dE2 is big, undefined. Assume in gamut
+        if (dE1 < t->Thereshold && dE2 > t->Thereshold)
+            Out[0] = 0;
+        else
+            // dE1 is big and dE2 is small, clearly out of gamut
+            if (dE1 > t->Thereshold && dE2 < t->Thereshold)
+                Out[0] = (cmsUInt16Number) _cmsQuickFloor((dE1 - t->Thereshold) + .5);
+            else  {
+
+                // dE1 is big and dE2 is also big, could be due to perceptual mapping
+                // so take error ratio
+                if (dE2 == 0.0)
+                    ErrorRatio = dE1;
+                else
+                    ErrorRatio = dE1 / dE2;
+
+                if (ErrorRatio > t->Thereshold)
+                    Out[0] = (cmsUInt16Number)  _cmsQuickFloor((ErrorRatio - t->Thereshold) + .5);
+                else
+                    Out[0] = 0;
+            }
+    }
+
+
+    return TRUE;
+}
+
+// Does compute a gamut LUT going back and forth across pcs -> relativ. colorimetric intent -> pcs
+// the dE obtained is then annotated on the LUT. Values truly out of gamut are clipped to dE = 0xFFFE
+// and values changed are supposed to be handled by any gamut remapping, so, are out of gamut as well.
+//
+// **WARNING: This algorithm does assume that gamut remapping algorithms does NOT move in-gamut colors,
+// of course, many perceptual and saturation intents does not work in such way, but relativ. ones should.
+
+cmsPipeline* _cmsCreateGamutCheckPipeline(cmsContext ContextID,
+                                          cmsHPROFILE hProfiles[],
+                                          cmsBool  BPC[],
+                                          cmsUInt32Number Intents[],
+                                          cmsFloat64Number AdaptationStates[],
+                                          cmsUInt32Number nGamutPCSposition,
+                                          cmsHPROFILE hGamut)
+{
+    cmsHPROFILE hLab;
+    cmsPipeline* Gamut;
+    cmsStage* CLUT;
+    cmsUInt32Number dwFormat;
+    GAMUTCHAIN Chain;
+    cmsUInt32Number nChannels, nGridpoints;
+    cmsColorSpaceSignature ColorSpace;
+    cmsUInt32Number i;
+    cmsHPROFILE ProfileList[256];
+    cmsBool     BPCList[256];
+    cmsFloat64Number AdaptationList[256];
+    cmsUInt32Number IntentList[256];
+
+    memset(&Chain, 0, sizeof(GAMUTCHAIN));
+
+
+    if (nGamutPCSposition <= 0 || nGamutPCSposition > 255) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Wrong position of PCS. 1..255 expected, %d found.", nGamutPCSposition);
+        return NULL;
+    }
+
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) return NULL;
+
+
+    // The figure of merit. On matrix-shaper profiles, should be almost zero as
+    // the conversion is pretty exact. On LUT based profiles, different resolutions
+    // of input and output CLUT may result in differences.
+
+    if (cmsIsMatrixShaper(hGamut)) {
+
+        Chain.Thereshold = 1.0;
+    }
+    else {
+        Chain.Thereshold = ERR_THERESHOLD;
+    }
+
+
+    // Create a copy of parameters
+    for (i=0; i < nGamutPCSposition; i++) {
+        ProfileList[i]    = hProfiles[i];
+        BPCList[i]        = BPC[i];
+        AdaptationList[i] = AdaptationStates[i];
+        IntentList[i]     = Intents[i];
+    }
+
+    // Fill Lab identity
+    ProfileList[nGamutPCSposition] = hLab;
+    BPCList[nGamutPCSposition] = 0;
+    AdaptationList[nGamutPCSposition] = 1.0;
+    IntentList[nGamutPCSposition] = INTENT_RELATIVE_COLORIMETRIC;
+
+
+    ColorSpace  = cmsGetColorSpace(hGamut);
+
+    nChannels   = cmsChannelsOf(ColorSpace);
+    nGridpoints = _cmsReasonableGridpointsByColorspace(ColorSpace, cmsFLAGS_HIGHRESPRECALC);
+    dwFormat    = (CHANNELS_SH(nChannels)|BYTES_SH(2));
+
+    // 16 bits to Lab double
+    Chain.hInput = cmsCreateExtendedTransform(ContextID,
+        nGamutPCSposition + 1,
+        ProfileList,
+        BPCList,
+        IntentList,
+        AdaptationList,
+        NULL, 0,
+        dwFormat, TYPE_Lab_DBL,
+        cmsFLAGS_NOCACHE);
+
+
+    // Does create the forward step. Lab double to device
+    dwFormat    = (CHANNELS_SH(nChannels)|BYTES_SH(2));
+    Chain.hForward = cmsCreateTransformTHR(ContextID,
+        hLab, TYPE_Lab_DBL,
+        hGamut, dwFormat,
+        INTENT_RELATIVE_COLORIMETRIC,
+        cmsFLAGS_NOCACHE);
+
+    // Does create the backwards step
+    Chain.hReverse = cmsCreateTransformTHR(ContextID, hGamut, dwFormat,
+        hLab, TYPE_Lab_DBL,
+        INTENT_RELATIVE_COLORIMETRIC,
+        cmsFLAGS_NOCACHE);
+
+
+    // All ok?
+    if (Chain.hInput && Chain.hForward && Chain.hReverse) {
+
+        // Go on, try to compute gamut LUT from PCS. This consist on a single channel containing
+        // dE when doing a transform back and forth on the colorimetric intent.
+
+        Gamut = cmsPipelineAlloc(ContextID, 3, 1);
+        if (Gamut != NULL) {
+
+            CLUT = cmsStageAllocCLut16bit(ContextID, nGridpoints, nChannels, 1, NULL);
+            if (!cmsPipelineInsertStage(Gamut, cmsAT_BEGIN, CLUT)) {
+                cmsPipelineFree(Gamut);
+                Gamut = NULL;
+            } 
+            else {
+                cmsStageSampleCLut16bit(CLUT, GamutSampler, (void*) &Chain, 0);
+            }
+        }
+    }
+    else
+        Gamut = NULL;   // Didn't work...
+
+    // Free all needed stuff.
+    if (Chain.hInput)   cmsDeleteTransform(Chain.hInput);
+    if (Chain.hForward) cmsDeleteTransform(Chain.hForward);
+    if (Chain.hReverse) cmsDeleteTransform(Chain.hReverse);
+    if (hLab) cmsCloseProfile(hLab);
+
+    // And return computed hull
+    return Gamut;
+}
+
+// Total Area Coverage estimation ----------------------------------------------------------------
+
+typedef struct {
+    cmsUInt32Number  nOutputChans;
+    cmsHTRANSFORM    hRoundTrip;
+    cmsFloat32Number MaxTAC;
+    cmsFloat32Number MaxInput[cmsMAXCHANNELS];
+
+} cmsTACestimator;
+
+
+// This callback just accounts the maximum ink dropped in the given node. It does not populate any
+// memory, as the destination table is NULL. Its only purpose it to know the global maximum.
+static
+int EstimateTAC(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void * Cargo)
+{
+    cmsTACestimator* bp = (cmsTACestimator*) Cargo;
+    cmsFloat32Number RoundTrip[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+    cmsFloat32Number Sum;
+
+
+    // Evaluate the xform
+    cmsDoTransform(bp->hRoundTrip, In, RoundTrip, 1);
+
+    // All all amounts of ink
+    for (Sum=0, i=0; i < bp ->nOutputChans; i++)
+            Sum += RoundTrip[i];
+
+    // If above maximum, keep track of input values
+    if (Sum > bp ->MaxTAC) {
+
+            bp ->MaxTAC = Sum;
+
+            for (i=0; i < bp ->nOutputChans; i++) {
+                bp ->MaxInput[i] = In[i];
+            }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(Out);
+}
+
+
+// Detect Total area coverage of the profile
+cmsFloat64Number CMSEXPORT cmsDetectTAC(cmsHPROFILE hProfile)
+{
+    cmsTACestimator bp;
+    cmsUInt32Number dwFormatter;
+    cmsUInt32Number GridPoints[MAX_INPUT_DIMENSIONS];
+    cmsHPROFILE hLab;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    // TAC only works on output profiles
+    if (cmsGetDeviceClass(hProfile) != cmsSigOutputClass) {
+        return 0;
+    }
+
+    // Create a fake formatter for result
+    dwFormatter = cmsFormatterForColorspaceOfProfile(hProfile, 4, TRUE);
+
+    bp.nOutputChans = T_CHANNELS(dwFormatter);
+    bp.MaxTAC = 0;    // Initial TAC is 0
+
+    //  for safety
+    if (bp.nOutputChans >= cmsMAXCHANNELS) return 0;
+
+    hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) return 0;
+    // Setup a roundtrip on perceptual intent in output profile for TAC estimation
+    bp.hRoundTrip = cmsCreateTransformTHR(ContextID, hLab, TYPE_Lab_16,
+                                          hProfile, dwFormatter, INTENT_PERCEPTUAL, cmsFLAGS_NOOPTIMIZE|cmsFLAGS_NOCACHE);
+
+    cmsCloseProfile(hLab);
+    if (bp.hRoundTrip == NULL) return 0;
+
+    // For L* we only need black and white. For C* we need many points
+    GridPoints[0] = 6;
+    GridPoints[1] = 74;
+    GridPoints[2] = 74;
+
+
+    if (!cmsSliceSpace16(3, GridPoints, EstimateTAC, &bp)) {
+        bp.MaxTAC = 0;
+    }
+
+    cmsDeleteTransform(bp.hRoundTrip);
+
+    // Results in %
+    return bp.MaxTAC;
+}
+
+
+// Carefully,  clamp on CIELab space.
+
+cmsBool CMSEXPORT cmsDesaturateLab(cmsCIELab* Lab,
+                                   double amax, double amin,
+                                   double bmax, double bmin)
+{
+
+    // Whole Luma surface to zero
+
+    if (Lab -> L < 0) {
+
+        Lab-> L = Lab->a = Lab-> b = 0.0;
+        return FALSE;
+    }
+
+    // Clamp white, DISCARD HIGHLIGHTS. This is done
+    // in such way because icc spec doesn't allow the
+    // use of L>100 as a highlight means.
+
+    if (Lab->L > 100)
+        Lab -> L = 100;
+
+    // Check out gamut prism, on a, b faces
+
+    if (Lab -> a < amin || Lab->a > amax||
+        Lab -> b < bmin || Lab->b > bmax) {
+
+            cmsCIELCh LCh;
+            double h, slope;
+
+            // Falls outside a, b limits. Transports to LCh space,
+            // and then do the clipping
+
+
+            if (Lab -> a == 0.0) { // Is hue exactly 90?
+
+                // atan will not work, so clamp here
+                Lab -> b = Lab->b < 0 ? bmin : bmax;
+                return TRUE;
+            }
+
+            cmsLab2LCh(&LCh, Lab);
+
+            slope = Lab -> b / Lab -> a;
+            h = LCh.h;
+
+            // There are 4 zones
+
+            if ((h >= 0. && h < 45.) ||
+                (h >= 315 && h <= 360.)) {
+
+                    // clip by amax
+                    Lab -> a = amax;
+                    Lab -> b = amax * slope;
+            }
+            else
+                if (h >= 45. && h < 135.)
+                {
+                    // clip by bmax
+                    Lab -> b = bmax;
+                    Lab -> a = bmax / slope;
+                }
+                else
+                    if (h >= 135. && h < 225.) {
+                        // clip by amin
+                        Lab -> a = amin;
+                        Lab -> b = amin * slope;
+
+                    }
+                    else
+                        if (h >= 225. && h < 315.) {
+                            // clip by bmin
+                            Lab -> b = bmin;
+                            Lab -> a = bmin / slope;
+                        }
+                        else  {
+                            cmsSignalError(0, cmsERROR_RANGE, "Invalid angle");
+                            return FALSE;
+                        }
+
+    }
+
+    return TRUE;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmshalf.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmshalf.cpp
new file mode 100755
index 0000000000..c2540f8043
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmshalf.cpp
@@ -0,0 +1,535 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+//
+#include "lcms2_internal.h"
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// This code is inspired in the paper "Fast Half Float Conversions"
+// by Jeroen van der Zijp
+
+static cmsUInt32Number Mantissa[2048] = {
+
+0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000,
+0x34c00000, 0x34e00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000,
+0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000,
+0x35900000, 0x35980000, 0x35a00000, 0x35a80000, 0x35b00000, 0x35b80000,
+0x35c00000, 0x35c80000, 0x35d00000, 0x35d80000, 0x35e00000, 0x35e80000,
+0x35f00000, 0x35f80000, 0x36000000, 0x36040000, 0x36080000, 0x360c0000,
+0x36100000, 0x36140000, 0x36180000, 0x361c0000, 0x36200000, 0x36240000,
+0x36280000, 0x362c0000, 0x36300000, 0x36340000, 0x36380000, 0x363c0000,
+0x36400000, 0x36440000, 0x36480000, 0x364c0000, 0x36500000, 0x36540000,
+0x36580000, 0x365c0000, 0x36600000, 0x36640000, 0x36680000, 0x366c0000,
+0x36700000, 0x36740000, 0x36780000, 0x367c0000, 0x36800000, 0x36820000,
+0x36840000, 0x36860000, 0x36880000, 0x368a0000, 0x368c0000, 0x368e0000,
+0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369a0000,
+0x369c0000, 0x369e0000, 0x36a00000, 0x36a20000, 0x36a40000, 0x36a60000,
+0x36a80000, 0x36aa0000, 0x36ac0000, 0x36ae0000, 0x36b00000, 0x36b20000,
+0x36b40000, 0x36b60000, 0x36b80000, 0x36ba0000, 0x36bc0000, 0x36be0000,
+0x36c00000, 0x36c20000, 0x36c40000, 0x36c60000, 0x36c80000, 0x36ca0000,
+0x36cc0000, 0x36ce0000, 0x36d00000, 0x36d20000, 0x36d40000, 0x36d60000,
+0x36d80000, 0x36da0000, 0x36dc0000, 0x36de0000, 0x36e00000, 0x36e20000,
+0x36e40000, 0x36e60000, 0x36e80000, 0x36ea0000, 0x36ec0000, 0x36ee0000,
+0x36f00000, 0x36f20000, 0x36f40000, 0x36f60000, 0x36f80000, 0x36fa0000,
+0x36fc0000, 0x36fe0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,
+0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000,
+0x370a0000, 0x370b0000, 0x370c0000, 0x370d0000, 0x370e0000, 0x370f0000,
+0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,
+0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371a0000, 0x371b0000,
+0x371c0000, 0x371d0000, 0x371e0000, 0x371f0000, 0x37200000, 0x37210000,
+0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
+0x37280000, 0x37290000, 0x372a0000, 0x372b0000, 0x372c0000, 0x372d0000,
+0x372e0000, 0x372f0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000,
+0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,
+0x373a0000, 0x373b0000, 0x373c0000, 0x373d0000, 0x373e0000, 0x373f0000,
+0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000,
+0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374a0000, 0x374b0000,
+0x374c0000, 0x374d0000, 0x374e0000, 0x374f0000, 0x37500000, 0x37510000,
+0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
+0x37580000, 0x37590000, 0x375a0000, 0x375b0000, 0x375c0000, 0x375d0000,
+0x375e0000, 0x375f0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000,
+0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000,
+0x376a0000, 0x376b0000, 0x376c0000, 0x376d0000, 0x376e0000, 0x376f0000,
+0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000,
+0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377a0000, 0x377b0000,
+0x377c0000, 0x377d0000, 0x377e0000, 0x377f0000, 0x37800000, 0x37808000,
+0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
+0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000,
+0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,
+0x378a0000, 0x378a8000, 0x378b0000, 0x378b8000, 0x378c0000, 0x378c8000,
+0x378d0000, 0x378d8000, 0x378e0000, 0x378e8000, 0x378f0000, 0x378f8000,
+0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,
+0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000,
+0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000,
+0x37990000, 0x37998000, 0x379a0000, 0x379a8000, 0x379b0000, 0x379b8000,
+0x379c0000, 0x379c8000, 0x379d0000, 0x379d8000, 0x379e0000, 0x379e8000,
+0x379f0000, 0x379f8000, 0x37a00000, 0x37a08000, 0x37a10000, 0x37a18000,
+0x37a20000, 0x37a28000, 0x37a30000, 0x37a38000, 0x37a40000, 0x37a48000,
+0x37a50000, 0x37a58000, 0x37a60000, 0x37a68000, 0x37a70000, 0x37a78000,
+0x37a80000, 0x37a88000, 0x37a90000, 0x37a98000, 0x37aa0000, 0x37aa8000,
+0x37ab0000, 0x37ab8000, 0x37ac0000, 0x37ac8000, 0x37ad0000, 0x37ad8000,
+0x37ae0000, 0x37ae8000, 0x37af0000, 0x37af8000, 0x37b00000, 0x37b08000,
+0x37b10000, 0x37b18000, 0x37b20000, 0x37b28000, 0x37b30000, 0x37b38000,
+0x37b40000, 0x37b48000, 0x37b50000, 0x37b58000, 0x37b60000, 0x37b68000,
+0x37b70000, 0x37b78000, 0x37b80000, 0x37b88000, 0x37b90000, 0x37b98000,
+0x37ba0000, 0x37ba8000, 0x37bb0000, 0x37bb8000, 0x37bc0000, 0x37bc8000,
+0x37bd0000, 0x37bd8000, 0x37be0000, 0x37be8000, 0x37bf0000, 0x37bf8000,
+0x37c00000, 0x37c08000, 0x37c10000, 0x37c18000, 0x37c20000, 0x37c28000,
+0x37c30000, 0x37c38000, 0x37c40000, 0x37c48000, 0x37c50000, 0x37c58000,
+0x37c60000, 0x37c68000, 0x37c70000, 0x37c78000, 0x37c80000, 0x37c88000,
+0x37c90000, 0x37c98000, 0x37ca0000, 0x37ca8000, 0x37cb0000, 0x37cb8000,
+0x37cc0000, 0x37cc8000, 0x37cd0000, 0x37cd8000, 0x37ce0000, 0x37ce8000,
+0x37cf0000, 0x37cf8000, 0x37d00000, 0x37d08000, 0x37d10000, 0x37d18000,
+0x37d20000, 0x37d28000, 0x37d30000, 0x37d38000, 0x37d40000, 0x37d48000,
+0x37d50000, 0x37d58000, 0x37d60000, 0x37d68000, 0x37d70000, 0x37d78000,
+0x37d80000, 0x37d88000, 0x37d90000, 0x37d98000, 0x37da0000, 0x37da8000,
+0x37db0000, 0x37db8000, 0x37dc0000, 0x37dc8000, 0x37dd0000, 0x37dd8000,
+0x37de0000, 0x37de8000, 0x37df0000, 0x37df8000, 0x37e00000, 0x37e08000,
+0x37e10000, 0x37e18000, 0x37e20000, 0x37e28000, 0x37e30000, 0x37e38000,
+0x37e40000, 0x37e48000, 0x37e50000, 0x37e58000, 0x37e60000, 0x37e68000,
+0x37e70000, 0x37e78000, 0x37e80000, 0x37e88000, 0x37e90000, 0x37e98000,
+0x37ea0000, 0x37ea8000, 0x37eb0000, 0x37eb8000, 0x37ec0000, 0x37ec8000,
+0x37ed0000, 0x37ed8000, 0x37ee0000, 0x37ee8000, 0x37ef0000, 0x37ef8000,
+0x37f00000, 0x37f08000, 0x37f10000, 0x37f18000, 0x37f20000, 0x37f28000,
+0x37f30000, 0x37f38000, 0x37f40000, 0x37f48000, 0x37f50000, 0x37f58000,
+0x37f60000, 0x37f68000, 0x37f70000, 0x37f78000, 0x37f80000, 0x37f88000,
+0x37f90000, 0x37f98000, 0x37fa0000, 0x37fa8000, 0x37fb0000, 0x37fb8000,
+0x37fc0000, 0x37fc8000, 0x37fd0000, 0x37fd8000, 0x37fe0000, 0x37fe8000,
+0x37ff0000, 0x37ff8000, 0x38000000, 0x38004000, 0x38008000, 0x3800c000,
+0x38010000, 0x38014000, 0x38018000, 0x3801c000, 0x38020000, 0x38024000,
+0x38028000, 0x3802c000, 0x38030000, 0x38034000, 0x38038000, 0x3803c000,
+0x38040000, 0x38044000, 0x38048000, 0x3804c000, 0x38050000, 0x38054000,
+0x38058000, 0x3805c000, 0x38060000, 0x38064000, 0x38068000, 0x3806c000,
+0x38070000, 0x38074000, 0x38078000, 0x3807c000, 0x38080000, 0x38084000,
+0x38088000, 0x3808c000, 0x38090000, 0x38094000, 0x38098000, 0x3809c000,
+0x380a0000, 0x380a4000, 0x380a8000, 0x380ac000, 0x380b0000, 0x380b4000,
+0x380b8000, 0x380bc000, 0x380c0000, 0x380c4000, 0x380c8000, 0x380cc000,
+0x380d0000, 0x380d4000, 0x380d8000, 0x380dc000, 0x380e0000, 0x380e4000,
+0x380e8000, 0x380ec000, 0x380f0000, 0x380f4000, 0x380f8000, 0x380fc000,
+0x38100000, 0x38104000, 0x38108000, 0x3810c000, 0x38110000, 0x38114000,
+0x38118000, 0x3811c000, 0x38120000, 0x38124000, 0x38128000, 0x3812c000,
+0x38130000, 0x38134000, 0x38138000, 0x3813c000, 0x38140000, 0x38144000,
+0x38148000, 0x3814c000, 0x38150000, 0x38154000, 0x38158000, 0x3815c000,
+0x38160000, 0x38164000, 0x38168000, 0x3816c000, 0x38170000, 0x38174000,
+0x38178000, 0x3817c000, 0x38180000, 0x38184000, 0x38188000, 0x3818c000,
+0x38190000, 0x38194000, 0x38198000, 0x3819c000, 0x381a0000, 0x381a4000,
+0x381a8000, 0x381ac000, 0x381b0000, 0x381b4000, 0x381b8000, 0x381bc000,
+0x381c0000, 0x381c4000, 0x381c8000, 0x381cc000, 0x381d0000, 0x381d4000,
+0x381d8000, 0x381dc000, 0x381e0000, 0x381e4000, 0x381e8000, 0x381ec000,
+0x381f0000, 0x381f4000, 0x381f8000, 0x381fc000, 0x38200000, 0x38204000,
+0x38208000, 0x3820c000, 0x38210000, 0x38214000, 0x38218000, 0x3821c000,
+0x38220000, 0x38224000, 0x38228000, 0x3822c000, 0x38230000, 0x38234000,
+0x38238000, 0x3823c000, 0x38240000, 0x38244000, 0x38248000, 0x3824c000,
+0x38250000, 0x38254000, 0x38258000, 0x3825c000, 0x38260000, 0x38264000,
+0x38268000, 0x3826c000, 0x38270000, 0x38274000, 0x38278000, 0x3827c000,
+0x38280000, 0x38284000, 0x38288000, 0x3828c000, 0x38290000, 0x38294000,
+0x38298000, 0x3829c000, 0x382a0000, 0x382a4000, 0x382a8000, 0x382ac000,
+0x382b0000, 0x382b4000, 0x382b8000, 0x382bc000, 0x382c0000, 0x382c4000,
+0x382c8000, 0x382cc000, 0x382d0000, 0x382d4000, 0x382d8000, 0x382dc000,
+0x382e0000, 0x382e4000, 0x382e8000, 0x382ec000, 0x382f0000, 0x382f4000,
+0x382f8000, 0x382fc000, 0x38300000, 0x38304000, 0x38308000, 0x3830c000,
+0x38310000, 0x38314000, 0x38318000, 0x3831c000, 0x38320000, 0x38324000,
+0x38328000, 0x3832c000, 0x38330000, 0x38334000, 0x38338000, 0x3833c000,
+0x38340000, 0x38344000, 0x38348000, 0x3834c000, 0x38350000, 0x38354000,
+0x38358000, 0x3835c000, 0x38360000, 0x38364000, 0x38368000, 0x3836c000,
+0x38370000, 0x38374000, 0x38378000, 0x3837c000, 0x38380000, 0x38384000,
+0x38388000, 0x3838c000, 0x38390000, 0x38394000, 0x38398000, 0x3839c000,
+0x383a0000, 0x383a4000, 0x383a8000, 0x383ac000, 0x383b0000, 0x383b4000,
+0x383b8000, 0x383bc000, 0x383c0000, 0x383c4000, 0x383c8000, 0x383cc000,
+0x383d0000, 0x383d4000, 0x383d8000, 0x383dc000, 0x383e0000, 0x383e4000,
+0x383e8000, 0x383ec000, 0x383f0000, 0x383f4000, 0x383f8000, 0x383fc000,
+0x38400000, 0x38404000, 0x38408000, 0x3840c000, 0x38410000, 0x38414000,
+0x38418000, 0x3841c000, 0x38420000, 0x38424000, 0x38428000, 0x3842c000,
+0x38430000, 0x38434000, 0x38438000, 0x3843c000, 0x38440000, 0x38444000,
+0x38448000, 0x3844c000, 0x38450000, 0x38454000, 0x38458000, 0x3845c000,
+0x38460000, 0x38464000, 0x38468000, 0x3846c000, 0x38470000, 0x38474000,
+0x38478000, 0x3847c000, 0x38480000, 0x38484000, 0x38488000, 0x3848c000,
+0x38490000, 0x38494000, 0x38498000, 0x3849c000, 0x384a0000, 0x384a4000,
+0x384a8000, 0x384ac000, 0x384b0000, 0x384b4000, 0x384b8000, 0x384bc000,
+0x384c0000, 0x384c4000, 0x384c8000, 0x384cc000, 0x384d0000, 0x384d4000,
+0x384d8000, 0x384dc000, 0x384e0000, 0x384e4000, 0x384e8000, 0x384ec000,
+0x384f0000, 0x384f4000, 0x384f8000, 0x384fc000, 0x38500000, 0x38504000,
+0x38508000, 0x3850c000, 0x38510000, 0x38514000, 0x38518000, 0x3851c000,
+0x38520000, 0x38524000, 0x38528000, 0x3852c000, 0x38530000, 0x38534000,
+0x38538000, 0x3853c000, 0x38540000, 0x38544000, 0x38548000, 0x3854c000,
+0x38550000, 0x38554000, 0x38558000, 0x3855c000, 0x38560000, 0x38564000,
+0x38568000, 0x3856c000, 0x38570000, 0x38574000, 0x38578000, 0x3857c000,
+0x38580000, 0x38584000, 0x38588000, 0x3858c000, 0x38590000, 0x38594000,
+0x38598000, 0x3859c000, 0x385a0000, 0x385a4000, 0x385a8000, 0x385ac000,
+0x385b0000, 0x385b4000, 0x385b8000, 0x385bc000, 0x385c0000, 0x385c4000,
+0x385c8000, 0x385cc000, 0x385d0000, 0x385d4000, 0x385d8000, 0x385dc000,
+0x385e0000, 0x385e4000, 0x385e8000, 0x385ec000, 0x385f0000, 0x385f4000,
+0x385f8000, 0x385fc000, 0x38600000, 0x38604000, 0x38608000, 0x3860c000,
+0x38610000, 0x38614000, 0x38618000, 0x3861c000, 0x38620000, 0x38624000,
+0x38628000, 0x3862c000, 0x38630000, 0x38634000, 0x38638000, 0x3863c000,
+0x38640000, 0x38644000, 0x38648000, 0x3864c000, 0x38650000, 0x38654000,
+0x38658000, 0x3865c000, 0x38660000, 0x38664000, 0x38668000, 0x3866c000,
+0x38670000, 0x38674000, 0x38678000, 0x3867c000, 0x38680000, 0x38684000,
+0x38688000, 0x3868c000, 0x38690000, 0x38694000, 0x38698000, 0x3869c000,
+0x386a0000, 0x386a4000, 0x386a8000, 0x386ac000, 0x386b0000, 0x386b4000,
+0x386b8000, 0x386bc000, 0x386c0000, 0x386c4000, 0x386c8000, 0x386cc000,
+0x386d0000, 0x386d4000, 0x386d8000, 0x386dc000, 0x386e0000, 0x386e4000,
+0x386e8000, 0x386ec000, 0x386f0000, 0x386f4000, 0x386f8000, 0x386fc000,
+0x38700000, 0x38704000, 0x38708000, 0x3870c000, 0x38710000, 0x38714000,
+0x38718000, 0x3871c000, 0x38720000, 0x38724000, 0x38728000, 0x3872c000,
+0x38730000, 0x38734000, 0x38738000, 0x3873c000, 0x38740000, 0x38744000,
+0x38748000, 0x3874c000, 0x38750000, 0x38754000, 0x38758000, 0x3875c000,
+0x38760000, 0x38764000, 0x38768000, 0x3876c000, 0x38770000, 0x38774000,
+0x38778000, 0x3877c000, 0x38780000, 0x38784000, 0x38788000, 0x3878c000,
+0x38790000, 0x38794000, 0x38798000, 0x3879c000, 0x387a0000, 0x387a4000,
+0x387a8000, 0x387ac000, 0x387b0000, 0x387b4000, 0x387b8000, 0x387bc000,
+0x387c0000, 0x387c4000, 0x387c8000, 0x387cc000, 0x387d0000, 0x387d4000,
+0x387d8000, 0x387dc000, 0x387e0000, 0x387e4000, 0x387e8000, 0x387ec000,
+0x387f0000, 0x387f4000, 0x387f8000, 0x387fc000, 0x38000000, 0x38002000,
+0x38004000, 0x38006000, 0x38008000, 0x3800a000, 0x3800c000, 0x3800e000,
+0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801a000,
+0x3801c000, 0x3801e000, 0x38020000, 0x38022000, 0x38024000, 0x38026000,
+0x38028000, 0x3802a000, 0x3802c000, 0x3802e000, 0x38030000, 0x38032000,
+0x38034000, 0x38036000, 0x38038000, 0x3803a000, 0x3803c000, 0x3803e000,
+0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804a000,
+0x3804c000, 0x3804e000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,
+0x38058000, 0x3805a000, 0x3805c000, 0x3805e000, 0x38060000, 0x38062000,
+0x38064000, 0x38066000, 0x38068000, 0x3806a000, 0x3806c000, 0x3806e000,
+0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807a000,
+0x3807c000, 0x3807e000, 0x38080000, 0x38082000, 0x38084000, 0x38086000,
+0x38088000, 0x3808a000, 0x3808c000, 0x3808e000, 0x38090000, 0x38092000,
+0x38094000, 0x38096000, 0x38098000, 0x3809a000, 0x3809c000, 0x3809e000,
+0x380a0000, 0x380a2000, 0x380a4000, 0x380a6000, 0x380a8000, 0x380aa000,
+0x380ac000, 0x380ae000, 0x380b0000, 0x380b2000, 0x380b4000, 0x380b6000,
+0x380b8000, 0x380ba000, 0x380bc000, 0x380be000, 0x380c0000, 0x380c2000,
+0x380c4000, 0x380c6000, 0x380c8000, 0x380ca000, 0x380cc000, 0x380ce000,
+0x380d0000, 0x380d2000, 0x380d4000, 0x380d6000, 0x380d8000, 0x380da000,
+0x380dc000, 0x380de000, 0x380e0000, 0x380e2000, 0x380e4000, 0x380e6000,
+0x380e8000, 0x380ea000, 0x380ec000, 0x380ee000, 0x380f0000, 0x380f2000,
+0x380f4000, 0x380f6000, 0x380f8000, 0x380fa000, 0x380fc000, 0x380fe000,
+0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810a000,
+0x3810c000, 0x3810e000, 0x38110000, 0x38112000, 0x38114000, 0x38116000,
+0x38118000, 0x3811a000, 0x3811c000, 0x3811e000, 0x38120000, 0x38122000,
+0x38124000, 0x38126000, 0x38128000, 0x3812a000, 0x3812c000, 0x3812e000,
+0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813a000,
+0x3813c000, 0x3813e000, 0x38140000, 0x38142000, 0x38144000, 0x38146000,
+0x38148000, 0x3814a000, 0x3814c000, 0x3814e000, 0x38150000, 0x38152000,
+0x38154000, 0x38156000, 0x38158000, 0x3815a000, 0x3815c000, 0x3815e000,
+0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816a000,
+0x3816c000, 0x3816e000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,
+0x38178000, 0x3817a000, 0x3817c000, 0x3817e000, 0x38180000, 0x38182000,
+0x38184000, 0x38186000, 0x38188000, 0x3818a000, 0x3818c000, 0x3818e000,
+0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819a000,
+0x3819c000, 0x3819e000, 0x381a0000, 0x381a2000, 0x381a4000, 0x381a6000,
+0x381a8000, 0x381aa000, 0x381ac000, 0x381ae000, 0x381b0000, 0x381b2000,
+0x381b4000, 0x381b6000, 0x381b8000, 0x381ba000, 0x381bc000, 0x381be000,
+0x381c0000, 0x381c2000, 0x381c4000, 0x381c6000, 0x381c8000, 0x381ca000,
+0x381cc000, 0x381ce000, 0x381d0000, 0x381d2000, 0x381d4000, 0x381d6000,
+0x381d8000, 0x381da000, 0x381dc000, 0x381de000, 0x381e0000, 0x381e2000,
+0x381e4000, 0x381e6000, 0x381e8000, 0x381ea000, 0x381ec000, 0x381ee000,
+0x381f0000, 0x381f2000, 0x381f4000, 0x381f6000, 0x381f8000, 0x381fa000,
+0x381fc000, 0x381fe000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,
+0x38208000, 0x3820a000, 0x3820c000, 0x3820e000, 0x38210000, 0x38212000,
+0x38214000, 0x38216000, 0x38218000, 0x3821a000, 0x3821c000, 0x3821e000,
+0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822a000,
+0x3822c000, 0x3822e000, 0x38230000, 0x38232000, 0x38234000, 0x38236000,
+0x38238000, 0x3823a000, 0x3823c000, 0x3823e000, 0x38240000, 0x38242000,
+0x38244000, 0x38246000, 0x38248000, 0x3824a000, 0x3824c000, 0x3824e000,
+0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825a000,
+0x3825c000, 0x3825e000, 0x38260000, 0x38262000, 0x38264000, 0x38266000,
+0x38268000, 0x3826a000, 0x3826c000, 0x3826e000, 0x38270000, 0x38272000,
+0x38274000, 0x38276000, 0x38278000, 0x3827a000, 0x3827c000, 0x3827e000,
+0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828a000,
+0x3828c000, 0x3828e000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,
+0x38298000, 0x3829a000, 0x3829c000, 0x3829e000, 0x382a0000, 0x382a2000,
+0x382a4000, 0x382a6000, 0x382a8000, 0x382aa000, 0x382ac000, 0x382ae000,
+0x382b0000, 0x382b2000, 0x382b4000, 0x382b6000, 0x382b8000, 0x382ba000,
+0x382bc000, 0x382be000, 0x382c0000, 0x382c2000, 0x382c4000, 0x382c6000,
+0x382c8000, 0x382ca000, 0x382cc000, 0x382ce000, 0x382d0000, 0x382d2000,
+0x382d4000, 0x382d6000, 0x382d8000, 0x382da000, 0x382dc000, 0x382de000,
+0x382e0000, 0x382e2000, 0x382e4000, 0x382e6000, 0x382e8000, 0x382ea000,
+0x382ec000, 0x382ee000, 0x382f0000, 0x382f2000, 0x382f4000, 0x382f6000,
+0x382f8000, 0x382fa000, 0x382fc000, 0x382fe000, 0x38300000, 0x38302000,
+0x38304000, 0x38306000, 0x38308000, 0x3830a000, 0x3830c000, 0x3830e000,
+0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831a000,
+0x3831c000, 0x3831e000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,
+0x38328000, 0x3832a000, 0x3832c000, 0x3832e000, 0x38330000, 0x38332000,
+0x38334000, 0x38336000, 0x38338000, 0x3833a000, 0x3833c000, 0x3833e000,
+0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834a000,
+0x3834c000, 0x3834e000, 0x38350000, 0x38352000, 0x38354000, 0x38356000,
+0x38358000, 0x3835a000, 0x3835c000, 0x3835e000, 0x38360000, 0x38362000,
+0x38364000, 0x38366000, 0x38368000, 0x3836a000, 0x3836c000, 0x3836e000,
+0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837a000,
+0x3837c000, 0x3837e000, 0x38380000, 0x38382000, 0x38384000, 0x38386000,
+0x38388000, 0x3838a000, 0x3838c000, 0x3838e000, 0x38390000, 0x38392000,
+0x38394000, 0x38396000, 0x38398000, 0x3839a000, 0x3839c000, 0x3839e000,
+0x383a0000, 0x383a2000, 0x383a4000, 0x383a6000, 0x383a8000, 0x383aa000,
+0x383ac000, 0x383ae000, 0x383b0000, 0x383b2000, 0x383b4000, 0x383b6000,
+0x383b8000, 0x383ba000, 0x383bc000, 0x383be000, 0x383c0000, 0x383c2000,
+0x383c4000, 0x383c6000, 0x383c8000, 0x383ca000, 0x383cc000, 0x383ce000,
+0x383d0000, 0x383d2000, 0x383d4000, 0x383d6000, 0x383d8000, 0x383da000,
+0x383dc000, 0x383de000, 0x383e0000, 0x383e2000, 0x383e4000, 0x383e6000,
+0x383e8000, 0x383ea000, 0x383ec000, 0x383ee000, 0x383f0000, 0x383f2000,
+0x383f4000, 0x383f6000, 0x383f8000, 0x383fa000, 0x383fc000, 0x383fe000,
+0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840a000,
+0x3840c000, 0x3840e000, 0x38410000, 0x38412000, 0x38414000, 0x38416000,
+0x38418000, 0x3841a000, 0x3841c000, 0x3841e000, 0x38420000, 0x38422000,
+0x38424000, 0x38426000, 0x38428000, 0x3842a000, 0x3842c000, 0x3842e000,
+0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843a000,
+0x3843c000, 0x3843e000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,
+0x38448000, 0x3844a000, 0x3844c000, 0x3844e000, 0x38450000, 0x38452000,
+0x38454000, 0x38456000, 0x38458000, 0x3845a000, 0x3845c000, 0x3845e000,
+0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846a000,
+0x3846c000, 0x3846e000, 0x38470000, 0x38472000, 0x38474000, 0x38476000,
+0x38478000, 0x3847a000, 0x3847c000, 0x3847e000, 0x38480000, 0x38482000,
+0x38484000, 0x38486000, 0x38488000, 0x3848a000, 0x3848c000, 0x3848e000,
+0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849a000,
+0x3849c000, 0x3849e000, 0x384a0000, 0x384a2000, 0x384a4000, 0x384a6000,
+0x384a8000, 0x384aa000, 0x384ac000, 0x384ae000, 0x384b0000, 0x384b2000,
+0x384b4000, 0x384b6000, 0x384b8000, 0x384ba000, 0x384bc000, 0x384be000,
+0x384c0000, 0x384c2000, 0x384c4000, 0x384c6000, 0x384c8000, 0x384ca000,
+0x384cc000, 0x384ce000, 0x384d0000, 0x384d2000, 0x384d4000, 0x384d6000,
+0x384d8000, 0x384da000, 0x384dc000, 0x384de000, 0x384e0000, 0x384e2000,
+0x384e4000, 0x384e6000, 0x384e8000, 0x384ea000, 0x384ec000, 0x384ee000,
+0x384f0000, 0x384f2000, 0x384f4000, 0x384f6000, 0x384f8000, 0x384fa000,
+0x384fc000, 0x384fe000, 0x38500000, 0x38502000, 0x38504000, 0x38506000,
+0x38508000, 0x3850a000, 0x3850c000, 0x3850e000, 0x38510000, 0x38512000,
+0x38514000, 0x38516000, 0x38518000, 0x3851a000, 0x3851c000, 0x3851e000,
+0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852a000,
+0x3852c000, 0x3852e000, 0x38530000, 0x38532000, 0x38534000, 0x38536000,
+0x38538000, 0x3853a000, 0x3853c000, 0x3853e000, 0x38540000, 0x38542000,
+0x38544000, 0x38546000, 0x38548000, 0x3854a000, 0x3854c000, 0x3854e000,
+0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855a000,
+0x3855c000, 0x3855e000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,
+0x38568000, 0x3856a000, 0x3856c000, 0x3856e000, 0x38570000, 0x38572000,
+0x38574000, 0x38576000, 0x38578000, 0x3857a000, 0x3857c000, 0x3857e000,
+0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858a000,
+0x3858c000, 0x3858e000, 0x38590000, 0x38592000, 0x38594000, 0x38596000,
+0x38598000, 0x3859a000, 0x3859c000, 0x3859e000, 0x385a0000, 0x385a2000,
+0x385a4000, 0x385a6000, 0x385a8000, 0x385aa000, 0x385ac000, 0x385ae000,
+0x385b0000, 0x385b2000, 0x385b4000, 0x385b6000, 0x385b8000, 0x385ba000,
+0x385bc000, 0x385be000, 0x385c0000, 0x385c2000, 0x385c4000, 0x385c6000,
+0x385c8000, 0x385ca000, 0x385cc000, 0x385ce000, 0x385d0000, 0x385d2000,
+0x385d4000, 0x385d6000, 0x385d8000, 0x385da000, 0x385dc000, 0x385de000,
+0x385e0000, 0x385e2000, 0x385e4000, 0x385e6000, 0x385e8000, 0x385ea000,
+0x385ec000, 0x385ee000, 0x385f0000, 0x385f2000, 0x385f4000, 0x385f6000,
+0x385f8000, 0x385fa000, 0x385fc000, 0x385fe000, 0x38600000, 0x38602000,
+0x38604000, 0x38606000, 0x38608000, 0x3860a000, 0x3860c000, 0x3860e000,
+0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861a000,
+0x3861c000, 0x3861e000, 0x38620000, 0x38622000, 0x38624000, 0x38626000,
+0x38628000, 0x3862a000, 0x3862c000, 0x3862e000, 0x38630000, 0x38632000,
+0x38634000, 0x38636000, 0x38638000, 0x3863a000, 0x3863c000, 0x3863e000,
+0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864a000,
+0x3864c000, 0x3864e000, 0x38650000, 0x38652000, 0x38654000, 0x38656000,
+0x38658000, 0x3865a000, 0x3865c000, 0x3865e000, 0x38660000, 0x38662000,
+0x38664000, 0x38666000, 0x38668000, 0x3866a000, 0x3866c000, 0x3866e000,
+0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867a000,
+0x3867c000, 0x3867e000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,
+0x38688000, 0x3868a000, 0x3868c000, 0x3868e000, 0x38690000, 0x38692000,
+0x38694000, 0x38696000, 0x38698000, 0x3869a000, 0x3869c000, 0x3869e000,
+0x386a0000, 0x386a2000, 0x386a4000, 0x386a6000, 0x386a8000, 0x386aa000,
+0x386ac000, 0x386ae000, 0x386b0000, 0x386b2000, 0x386b4000, 0x386b6000,
+0x386b8000, 0x386ba000, 0x386bc000, 0x386be000, 0x386c0000, 0x386c2000,
+0x386c4000, 0x386c6000, 0x386c8000, 0x386ca000, 0x386cc000, 0x386ce000,
+0x386d0000, 0x386d2000, 0x386d4000, 0x386d6000, 0x386d8000, 0x386da000,
+0x386dc000, 0x386de000, 0x386e0000, 0x386e2000, 0x386e4000, 0x386e6000,
+0x386e8000, 0x386ea000, 0x386ec000, 0x386ee000, 0x386f0000, 0x386f2000,
+0x386f4000, 0x386f6000, 0x386f8000, 0x386fa000, 0x386fc000, 0x386fe000,
+0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870a000,
+0x3870c000, 0x3870e000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,
+0x38718000, 0x3871a000, 0x3871c000, 0x3871e000, 0x38720000, 0x38722000,
+0x38724000, 0x38726000, 0x38728000, 0x3872a000, 0x3872c000, 0x3872e000,
+0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873a000,
+0x3873c000, 0x3873e000, 0x38740000, 0x38742000, 0x38744000, 0x38746000,
+0x38748000, 0x3874a000, 0x3874c000, 0x3874e000, 0x38750000, 0x38752000,
+0x38754000, 0x38756000, 0x38758000, 0x3875a000, 0x3875c000, 0x3875e000,
+0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876a000,
+0x3876c000, 0x3876e000, 0x38770000, 0x38772000, 0x38774000, 0x38776000,
+0x38778000, 0x3877a000, 0x3877c000, 0x3877e000, 0x38780000, 0x38782000,
+0x38784000, 0x38786000, 0x38788000, 0x3878a000, 0x3878c000, 0x3878e000,
+0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879a000,
+0x3879c000, 0x3879e000, 0x387a0000, 0x387a2000, 0x387a4000, 0x387a6000,
+0x387a8000, 0x387aa000, 0x387ac000, 0x387ae000, 0x387b0000, 0x387b2000,
+0x387b4000, 0x387b6000, 0x387b8000, 0x387ba000, 0x387bc000, 0x387be000,
+0x387c0000, 0x387c2000, 0x387c4000, 0x387c6000, 0x387c8000, 0x387ca000,
+0x387cc000, 0x387ce000, 0x387d0000, 0x387d2000, 0x387d4000, 0x387d6000,
+0x387d8000, 0x387da000, 0x387dc000, 0x387de000, 0x387e0000, 0x387e2000,
+0x387e4000, 0x387e6000, 0x387e8000, 0x387ea000, 0x387ec000, 0x387ee000,
+0x387f0000, 0x387f2000, 0x387f4000, 0x387f6000, 0x387f8000, 0x387fa000,
+0x387fc000, 0x387fe000
+};
+
+static cmsUInt16Number Offset[64] = {
+0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0000, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
+0x0400, 0x0400, 0x0400, 0x0400
+};
+
+static cmsUInt32Number Exponent[64] = {
+0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,
+0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000,
+0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000,
+0x09000000, 0x09800000, 0x0a000000, 0x0a800000, 0x0b000000, 0x0b800000,
+0x0c000000, 0x0c800000, 0x0d000000, 0x0d800000, 0x0e000000, 0x0e800000,
+0x0f000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000,
+0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,
+0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
+0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8a000000, 0x8a800000,
+0x8b000000, 0x8b800000, 0x8c000000, 0x8c800000, 0x8d000000, 0x8d800000,
+0x8e000000, 0x8e800000, 0x8f000000, 0xc7800000
+};
+
+static cmsUInt16Number Base[512] = {
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
+0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00,
+0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400,
+0x4800, 0x4c00, 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00,
+0x7000, 0x7400, 0x7800, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,
+0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400,
+0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
+0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400,
+0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+0xfc00, 0xfc00
+};
+
+static cmsUInt8Number  Shift[512] = {
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
+0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
+0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x0d
+};
+
+cmsFloat32Number _cmsHalf2Float(cmsUInt16Number h)
+{
+    union {
+        cmsFloat32Number flt;
+        cmsUInt32Number  num;
+    } out;
+
+    int n = h >> 10;
+
+    out.num = Mantissa[  (h & 0x3ff) + Offset[ n ] ] + Exponent[ n ];
+    return out.flt;
+}
+
+cmsUInt16Number _cmsFloat2Half(cmsFloat32Number flt)
+{
+    union {
+        cmsFloat32Number flt;
+        cmsUInt32Number  num;
+    } in;
+
+    cmsUInt32Number n, j;
+
+    in.flt = flt;
+    n = in.num;
+    j = (n >> 23) & 0x1ff;
+
+    return (cmsUInt16Number) ((cmsUInt32Number) Base[ j ] + (( n & 0x007fffff) >> Shift[ j ]));
+}
+
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsintrp.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsintrp.cpp
new file mode 100755
index 0000000000..e44ab3e4e4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsintrp.cpp
@@ -0,0 +1,1514 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// This module incorporates several interpolation routines, for 1 to 8 channels on input and
+// up to 65535 channels on output. The user may change those by using the interpolation plug-in
+
+// Some people may want to compile as C++ with all warnings on, in this case make compiler silent
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1400)
+#       pragma warning( disable : 4365 )
+#    endif
+#endif
+
+// Interpolation routines by default
+static cmsInterpFunction DefaultInterpolatorsFactory(cmsUInt32Number nInputChannels, cmsUInt32Number nOutputChannels, cmsUInt32Number dwFlags);
+
+// This is the default factory
+_cmsInterpPluginChunkType _cmsInterpPluginChunk = { NULL };
+
+// The interpolation plug-in memory chunk allocator/dup
+void _cmsAllocInterpPluginChunk(struct _cmsContext_struct* ctx, const struct _cmsContext_struct* src)
+{
+    void* from;
+
+    _cmsAssert(ctx != NULL);
+
+    if (src != NULL) {
+        from = src ->chunks[InterpPlugin];       
+    }
+    else { 
+        static _cmsInterpPluginChunkType InterpPluginChunk = { NULL };
+
+        from = &InterpPluginChunk;
+    }
+
+    _cmsAssert(from != NULL);
+    ctx ->chunks[InterpPlugin] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsInterpPluginChunkType));
+}
+
+
+// Main plug-in entry
+cmsBool  _cmsRegisterInterpPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginInterpolation* Plugin = (cmsPluginInterpolation*) Data;
+    _cmsInterpPluginChunkType* ptr = (_cmsInterpPluginChunkType*) _cmsContextGetClientChunk(ContextID, InterpPlugin);
+
+    if (Data == NULL) {
+
+        ptr ->Interpolators = NULL;
+        return TRUE;
+    }
+
+    // Set replacement functions
+    ptr ->Interpolators = Plugin ->InterpolatorsFactory;
+    return TRUE;
+}
+
+
+// Set the interpolation method
+cmsBool _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p)
+{      
+    _cmsInterpPluginChunkType* ptr = (_cmsInterpPluginChunkType*) _cmsContextGetClientChunk(ContextID, InterpPlugin);
+
+    p ->Interpolation.Lerp16 = NULL;
+
+   // Invoke factory, possibly in the Plug-in
+    if (ptr ->Interpolators != NULL)
+        p ->Interpolation = ptr->Interpolators(p -> nInputs, p ->nOutputs, p ->dwFlags);
+    
+    // If unsupported by the plug-in, go for the LittleCMS default.
+    // If happens only if an extern plug-in is being used
+    if (p ->Interpolation.Lerp16 == NULL)
+        p ->Interpolation = DefaultInterpolatorsFactory(p ->nInputs, p ->nOutputs, p ->dwFlags);
+
+    // Check for valid interpolator (we just check one member of the union)
+    if (p ->Interpolation.Lerp16 == NULL) {
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+// This function precalculates as many parameters as possible to speed up the interpolation.
+cmsInterpParams* _cmsComputeInterpParamsEx(cmsContext ContextID,
+                                           const cmsUInt32Number nSamples[],
+                                           cmsUInt32Number InputChan, cmsUInt32Number OutputChan,
+                                           const void *Table,
+                                           cmsUInt32Number dwFlags)
+{
+    cmsInterpParams* p;
+    cmsUInt32Number i;
+
+    // Check for maximum inputs
+    if (InputChan > MAX_INPUT_DIMENSIONS) {
+             cmsSignalError(ContextID, cmsERROR_RANGE, "Too many input channels (%d channels, max=%d)", InputChan, MAX_INPUT_DIMENSIONS);
+            return NULL;
+    }
+
+    // Creates an empty object
+    p = (cmsInterpParams*) _cmsMallocZero(ContextID, sizeof(cmsInterpParams));
+    if (p == NULL) return NULL;
+
+    // Keep original parameters
+    p -> dwFlags  = dwFlags;
+    p -> nInputs  = InputChan;
+    p -> nOutputs = OutputChan;
+    p ->Table     = Table;
+    p ->ContextID  = ContextID;
+
+    // Fill samples per input direction and domain (which is number of nodes minus one)
+    for (i=0; i < InputChan; i++) {
+
+        p -> nSamples[i] = nSamples[i];
+        p -> Domain[i]   = nSamples[i] - 1;
+    }
+
+    // Compute factors to apply to each component to index the grid array
+    p -> opta[0] = p -> nOutputs;
+    for (i=1; i < InputChan; i++)
+        p ->opta[i] = p ->opta[i-1] * nSamples[InputChan-i];
+
+
+    if (!_cmsSetInterpolationRoutine(ContextID, p)) {
+         cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported interpolation (%d->%d channels)", InputChan, OutputChan);
+        _cmsFree(ContextID, p);
+        return NULL;
+    }
+
+    // All seems ok
+    return p;
+}
+
+
+// This one is a wrapper on the anterior, but assuming all directions have same number of nodes
+cmsInterpParams* _cmsComputeInterpParams(cmsContext ContextID, cmsUInt32Number nSamples, 
+                                         cmsUInt32Number InputChan, cmsUInt32Number OutputChan, const void* Table, cmsUInt32Number dwFlags)
+{
+    int i;
+    cmsUInt32Number Samples[MAX_INPUT_DIMENSIONS];
+
+    // Fill the auxiliary array
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Samples[i] = nSamples;
+
+    // Call the extended function
+    return _cmsComputeInterpParamsEx(ContextID, Samples, InputChan, OutputChan, Table, dwFlags);
+}
+
+
+// Free all associated memory
+void _cmsFreeInterpParams(cmsInterpParams* p)
+{
+    if (p != NULL) _cmsFree(p ->ContextID, p);
+}
+
+
+// Inline fixed point interpolation
+cmsINLINE cmsUInt16Number LinearInterp(cmsS15Fixed16Number a, cmsS15Fixed16Number l, cmsS15Fixed16Number h)
+{
+    cmsUInt32Number dif = (cmsUInt32Number) (h - l) * a + 0x8000;
+    dif = (dif >> 16) + l;
+    return (cmsUInt16Number) (dif);
+}
+
+
+//  Linear interpolation (Fixed-point optimized)
+static
+void LinLerp1D(register const cmsUInt16Number Value[],
+               register cmsUInt16Number Output[],
+               register const cmsInterpParams* p)
+{
+    cmsUInt16Number y1, y0;
+    int cell0, rest;
+    int val3;
+    const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+
+    // if last value...
+    if (Value[0] == 0xffff) {
+
+        Output[0] = LutTable[p -> Domain[0]];
+        return;
+    }
+
+    val3 = p -> Domain[0] * Value[0];
+    val3 = _cmsToFixedDomain(val3);    // To fixed 15.16
+
+    cell0 = FIXED_TO_INT(val3);             // Cell is 16 MSB bits
+    rest  = FIXED_REST_TO_INT(val3);        // Rest is 16 LSB bits
+
+    y0 = LutTable[cell0];
+    y1 = LutTable[cell0+1];
+
+
+    Output[0] = LinearInterp(rest, y0, y1);
+}
+
+// To prevent out of bounds indexing
+cmsINLINE cmsFloat32Number fclamp(cmsFloat32Number v) 
+{
+    return ((v < 1.0e-9f) || isnan(v)) ? 0.0f : (v > 1.0f ? 1.0f : v);
+}
+
+// Floating-point version of 1D interpolation
+static
+void LinLerp1Dfloat(const cmsFloat32Number Value[],
+                    cmsFloat32Number Output[],
+                    const cmsInterpParams* p)
+{
+       cmsFloat32Number y1, y0;
+       cmsFloat32Number val2, rest;
+       int cell0, cell1;
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+
+       val2 = fclamp(Value[0]);
+
+       // if last value...
+       if (val2 == 1.0) {
+           Output[0] = LutTable[p -> Domain[0]];
+           return;
+       }
+
+       val2 *= p -> Domain[0];
+
+       cell0 = (int) floor(val2);
+       cell1 = (int) ceil(val2);
+
+       // Rest is 16 LSB bits
+       rest = val2 - cell0;
+
+       y0 = LutTable[cell0] ;
+       y1 = LutTable[cell1] ;
+
+       Output[0] = y0 + (y1 - y0) * rest;
+}
+
+
+
+// Eval gray LUT having only one input channel
+static
+void Eval1Input(register const cmsUInt16Number Input[],
+                register cmsUInt16Number Output[],
+                register const cmsInterpParams* p16)
+{
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, k1, rk, K0, K1;
+       int v;
+       cmsUInt32Number OutChan;
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+
+       v = Input[0] * p16 -> Domain[0];
+       fk = _cmsToFixedDomain(v);
+
+       k0 = FIXED_TO_INT(fk);
+       rk = (cmsUInt16Number) FIXED_REST_TO_INT(fk);
+
+       k1 = k0 + (Input[0] != 0xFFFFU ? 1 : 0);
+
+       K0 = p16 -> opta[0] * k0;
+       K1 = p16 -> opta[0] * k1;
+
+       for (OutChan=0; OutChan < p16->nOutputs; OutChan++) {
+
+           Output[OutChan] = LinearInterp(rk, LutTable[K0+OutChan], LutTable[K1+OutChan]);
+       }
+}
+
+
+
+// Eval gray LUT having only one input channel
+static
+void Eval1InputFloat(const cmsFloat32Number Value[],
+                     cmsFloat32Number Output[],
+                     const cmsInterpParams* p)
+{
+    cmsFloat32Number y1, y0;
+    cmsFloat32Number val2, rest;
+    int cell0, cell1;
+    cmsUInt32Number OutChan;
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+
+    val2 = fclamp(Value[0]);
+
+        // if last value...
+       if (val2 == 1.0) {
+           Output[0] = LutTable[p -> Domain[0]];
+           return;
+       }
+
+       val2 *= p -> Domain[0];
+
+       cell0 = (int) floor(val2);
+       cell1 = (int) ceil(val2);
+
+       // Rest is 16 LSB bits
+       rest = val2 - cell0;
+
+       cell0 *= p -> opta[0];
+       cell1 *= p -> opta[0];
+
+       for (OutChan=0; OutChan < p->nOutputs; OutChan++) {
+
+            y0 = LutTable[cell0 + OutChan] ;
+            y1 = LutTable[cell1 + OutChan] ;
+
+            Output[OutChan] = y0 + (y1 - y0) * rest;
+       }
+}
+
+// Bilinear interpolation (16 bits) - cmsFloat32Number version
+static
+void BilinearInterpFloat(const cmsFloat32Number Input[],
+                         cmsFloat32Number Output[],
+                         const cmsInterpParams* p)
+
+{
+#   define LERP(a,l,h)    (cmsFloat32Number) ((l)+(((h)-(l))*(a)))
+#   define DENS(i,j)      (LutTable[(i)+(j)+OutChan])
+
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+    cmsFloat32Number      px, py;
+    int        x0, y0,
+               X0, Y0, X1, Y1;
+    int        TotalOut, OutChan;
+    cmsFloat32Number      fx, fy,
+        d00, d01, d10, d11,
+        dx0, dx1,
+        dxy;
+
+    TotalOut   = p -> nOutputs;
+    px = fclamp(Input[0]) * p->Domain[0];
+    py = fclamp(Input[1]) * p->Domain[1];
+
+    x0 = (int) _cmsQuickFloor(px); fx = px - (cmsFloat32Number) x0;
+    y0 = (int) _cmsQuickFloor(py); fy = py - (cmsFloat32Number) y0;
+
+    X0 = p -> opta[1] * x0;
+    X1 = X0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[1]);
+
+    Y0 = p -> opta[0] * y0;
+    Y1 = Y0 + (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d00 = DENS(X0, Y0);
+        d01 = DENS(X0, Y1);
+        d10 = DENS(X1, Y0);
+        d11 = DENS(X1, Y1);
+
+        dx0 = LERP(fx, d00, d10);
+        dx1 = LERP(fx, d01, d11);
+
+        dxy = LERP(fy, dx0, dx1);
+
+        Output[OutChan] = dxy;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+// Bilinear interpolation (16 bits) - optimized version
+static
+void BilinearInterp16(register const cmsUInt16Number Input[],
+                      register cmsUInt16Number Output[],
+                      register const cmsInterpParams* p)
+
+{
+#define DENS(i,j) (LutTable[(i)+(j)+OutChan])
+#define LERP(a,l,h)     (cmsUInt16Number) (l + ROUND_FIXED_TO_INT(((h-l)*a)))
+
+           const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+           int        OutChan, TotalOut;
+           cmsS15Fixed16Number    fx, fy;
+  register int        rx, ry;
+           int        x0, y0;
+  register int        X0, X1, Y0, Y1;
+           int        d00, d01, d10, d11,
+                      dx0, dx1,
+                      dxy;
+
+    TotalOut   = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    x0  = FIXED_TO_INT(fx);
+    rx  = FIXED_REST_TO_INT(fx);    // Rest in 0..1.0 domain
+
+
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    y0  = FIXED_TO_INT(fy);
+    ry  = FIXED_REST_TO_INT(fy);
+
+
+    X0 = p -> opta[1] * x0;
+    X1 = X0 + (Input[0] == 0xFFFFU ? 0 : p->opta[1]);
+
+    Y0 = p -> opta[0] * y0;
+    Y1 = Y0 + (Input[1] == 0xFFFFU ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d00 = DENS(X0, Y0);
+        d01 = DENS(X0, Y1);
+        d10 = DENS(X1, Y0);
+        d11 = DENS(X1, Y1);
+
+        dx0 = LERP(rx, d00, d10);
+        dx1 = LERP(rx, d01, d11);
+
+        dxy = LERP(ry, dx0, dx1);
+
+        Output[OutChan] = (cmsUInt16Number) dxy;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+
+// Trilinear interpolation (16 bits) - cmsFloat32Number version
+static
+void TrilinearInterpFloat(const cmsFloat32Number Input[],
+                          cmsFloat32Number Output[],
+                          const cmsInterpParams* p)
+
+{
+#   define LERP(a,l,h)      (cmsFloat32Number) ((l)+(((h)-(l))*(a)))
+#   define DENS(i,j,k)      (LutTable[(i)+(j)+(k)+OutChan])
+
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p ->Table;
+    cmsFloat32Number      px, py, pz;
+    int        x0, y0, z0,
+               X0, Y0, Z0, X1, Y1, Z1;
+    int        TotalOut, OutChan;
+    cmsFloat32Number      fx, fy, fz,
+        d000, d001, d010, d011,
+        d100, d101, d110, d111,
+        dx00, dx01, dx10, dx11,
+        dxy0, dxy1, dxyz;
+
+    TotalOut   = p -> nOutputs;
+
+    // We need some clipping here
+    px = fclamp(Input[0]) * p->Domain[0];
+    py = fclamp(Input[1]) * p->Domain[1];
+    pz = fclamp(Input[2]) * p->Domain[2];
+
+    x0 = (int) floor(px); fx = px - (cmsFloat32Number) x0;  // We need full floor funcionality here
+    y0 = (int) floor(py); fy = py - (cmsFloat32Number) y0;
+    z0 = (int) floor(pz); fz = pz - (cmsFloat32Number) z0;
+
+    X0 = p -> opta[2] * x0;
+    X1 = X0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = Y0 + (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = Z0 + (fclamp(Input[2]) >= 1.0 ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d000 = DENS(X0, Y0, Z0);
+        d001 = DENS(X0, Y0, Z1);
+        d010 = DENS(X0, Y1, Z0);
+        d011 = DENS(X0, Y1, Z1);
+
+        d100 = DENS(X1, Y0, Z0);
+        d101 = DENS(X1, Y0, Z1);
+        d110 = DENS(X1, Y1, Z0);
+        d111 = DENS(X1, Y1, Z1);
+
+
+        dx00 = LERP(fx, d000, d100);
+        dx01 = LERP(fx, d001, d101);
+        dx10 = LERP(fx, d010, d110);
+        dx11 = LERP(fx, d011, d111);
+
+        dxy0 = LERP(fy, dx00, dx10);
+        dxy1 = LERP(fy, dx01, dx11);
+
+        dxyz = LERP(fz, dxy0, dxy1);
+
+        Output[OutChan] = dxyz;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+// Trilinear interpolation (16 bits) - optimized version
+static
+void TrilinearInterp16(register const cmsUInt16Number Input[],
+                       register cmsUInt16Number Output[],
+                       register const cmsInterpParams* p)
+
+{
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+#define LERP(a,l,h)     (cmsUInt16Number) (l + ROUND_FIXED_TO_INT(((h-l)*a)))
+
+           const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+           int        OutChan, TotalOut;
+           cmsS15Fixed16Number    fx, fy, fz;
+  register int        rx, ry, rz;
+           int        x0, y0, z0;
+  register int        X0, X1, Y0, Y1, Z0, Z1;
+           int        d000, d001, d010, d011,
+                      d100, d101, d110, d111,
+                      dx00, dx01, dx10, dx11,
+                      dxy0, dxy1, dxyz;
+
+    TotalOut   = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    x0  = FIXED_TO_INT(fx);
+    rx  = FIXED_REST_TO_INT(fx);    // Rest in 0..1.0 domain
+
+
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    y0  = FIXED_TO_INT(fy);
+    ry  = FIXED_REST_TO_INT(fy);
+
+    fz = _cmsToFixedDomain((int) Input[2] * p -> Domain[2]);
+    z0 = FIXED_TO_INT(fz);
+    rz = FIXED_REST_TO_INT(fz);
+
+
+    X0 = p -> opta[2] * x0;
+    X1 = X0 + (Input[0] == 0xFFFFU ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = Y0 + (Input[1] == 0xFFFFU ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = Z0 + (Input[2] == 0xFFFFU ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d000 = DENS(X0, Y0, Z0);
+        d001 = DENS(X0, Y0, Z1);
+        d010 = DENS(X0, Y1, Z0);
+        d011 = DENS(X0, Y1, Z1);
+
+        d100 = DENS(X1, Y0, Z0);
+        d101 = DENS(X1, Y0, Z1);
+        d110 = DENS(X1, Y1, Z0);
+        d111 = DENS(X1, Y1, Z1);
+
+
+        dx00 = LERP(rx, d000, d100);
+        dx01 = LERP(rx, d001, d101);
+        dx10 = LERP(rx, d010, d110);
+        dx11 = LERP(rx, d011, d111);
+
+        dxy0 = LERP(ry, dx00, dx10);
+        dxy1 = LERP(ry, dx01, dx11);
+
+        dxyz = LERP(rz, dxy0, dxy1);
+
+        Output[OutChan] = (cmsUInt16Number) dxyz;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
+
+// Tetrahedral interpolation, using Sakamoto algorithm.
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+static
+void TetrahedralInterpFloat(const cmsFloat32Number Input[],
+                            cmsFloat32Number Output[],
+                            const cmsInterpParams* p)
+{
+    const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+    cmsFloat32Number     px, py, pz;
+    int        x0, y0, z0,
+               X0, Y0, Z0, X1, Y1, Z1;
+    cmsFloat32Number     rx, ry, rz;
+    cmsFloat32Number     c0, c1=0, c2=0, c3=0;
+    int                  OutChan, TotalOut;
+
+    TotalOut   = p -> nOutputs;
+
+    // We need some clipping here
+    px = fclamp(Input[0]) * p->Domain[0];
+    py = fclamp(Input[1]) * p->Domain[1];
+    pz = fclamp(Input[2]) * p->Domain[2];
+
+    x0 = (int) floor(px); rx = (px - (cmsFloat32Number) x0);  // We need full floor functionality here
+    y0 = (int) floor(py); ry = (py - (cmsFloat32Number) y0);
+    z0 = (int) floor(pz); rz = (pz - (cmsFloat32Number) z0);
+
+
+    X0 = p -> opta[2] * x0;
+    X1 = X0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = Y0 + (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = Z0 + (fclamp(Input[2]) >= 1.0 ? 0 : p->opta[0]);
+
+    for (OutChan=0; OutChan < TotalOut; OutChan++) {
+
+       // These are the 6 Tetrahedral
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz) {
+
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+        }
+        else
+            if (rx >= rz && rz >= ry) {
+
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+            }
+            else
+                if (rz >= rx && rx >= ry) {
+
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+
+                }
+                else
+                    if (ry >= rx && rx >= rz) {
+
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                    }
+                    else
+                        if (ry >= rz && rz >= rx) {
+
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                        }
+                        else
+                            if (rz >= ry && ry >= rx) {
+
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+
+                            }
+                            else  {
+                                c1 = c2 = c3 = 0;
+                            }
+
+       Output[OutChan] = c0 + c1 * rx + c2 * ry + c3 * rz;
+       }
+
+}
+
+#undef DENS
+
+
+
+
+static
+void TetrahedralInterp16(register const cmsUInt16Number Input[],
+                         register cmsUInt16Number Output[],
+                         register const cmsInterpParams* p)
+{
+    const cmsUInt16Number* LutTable = (cmsUInt16Number*) p -> Table;
+    cmsS15Fixed16Number fx, fy, fz;
+    cmsS15Fixed16Number rx, ry, rz;
+    int x0, y0, z0;
+    cmsS15Fixed16Number c0, c1, c2, c3, Rest;
+    cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
+    cmsUInt32Number TotalOut = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    fz = _cmsToFixedDomain((int) Input[2] * p -> Domain[2]);
+
+    x0 = FIXED_TO_INT(fx);
+    y0 = FIXED_TO_INT(fy);
+    z0 = FIXED_TO_INT(fz);
+
+    rx = FIXED_REST_TO_INT(fx);
+    ry = FIXED_REST_TO_INT(fy);
+    rz = FIXED_REST_TO_INT(fz);
+
+    X0 = p -> opta[2] * x0;
+    X1 = (Input[0] == 0xFFFFU ? 0 : p->opta[2]);
+
+    Y0 = p -> opta[1] * y0;
+    Y1 = (Input[1] == 0xFFFFU ? 0 : p->opta[1]);
+
+    Z0 = p -> opta[0] * z0;
+    Z1 = (Input[2] == 0xFFFFU ? 0 : p->opta[0]);
+
+    LutTable = &LutTable[X0+Y0+Z0];
+
+    // Output should be computed as x = ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest))
+    // which expands as: x = (Rest + ((Rest+0x7fff)/0xFFFF) + 0x8000)>>16
+    // This can be replaced by: t = Rest+0x8001, x = (t + (t>>16))>>16
+    // at the cost of being off by one at 7fff and 17ffe.
+
+    if (rx >= ry) {
+        if (ry >= rz) {
+            Y1 += X1;
+            Z1 += Y1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c3 -= c2;
+                c2 -= c1;
+                c1 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else if (rz >= rx) {
+            X1 += Z1;
+            Y1 += X1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c2 -= c1;
+                c1 -= c3;
+                c3 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else {
+            Z1 += X1;
+            Y1 += Z1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c2 -= c3;
+                c3 -= c1;
+                c1 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        }
+    } else {
+        if (rx >= rz) {
+            X1 += Y1;
+            Z1 += X1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c3 -= c1;
+                c1 -= c2;
+                c2 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else if (ry >= rz) {
+            Z1 += Y1;
+            X1 += Z1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c1 -= c3;
+                c3 -= c2;
+                c2 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        } else {
+            Y1 += Z1;
+            X1 += Y1;
+            for (; TotalOut; TotalOut--) {
+                c1 = LutTable[X1];
+                c2 = LutTable[Y1];
+                c3 = LutTable[Z1];
+                c0 = *LutTable++;
+                c1 -= c2;
+                c2 -= c3;
+                c3 -= c0;
+                Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                *Output++ = (cmsUInt16Number) c0 + ((Rest + (Rest>>16))>>16);
+            }
+        }
+    }
+}
+
+
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+static
+void Eval4Inputs(register const cmsUInt16Number Input[],
+                     register cmsUInt16Number Output[],
+                     register const cmsInterpParams* p16)
+{
+    const cmsUInt16Number* LutTable;
+    cmsS15Fixed16Number fk;
+    cmsS15Fixed16Number k0, rk;
+    int K0, K1;
+    cmsS15Fixed16Number    fx, fy, fz;
+    cmsS15Fixed16Number    rx, ry, rz;
+    int                    x0, y0, z0;
+    cmsS15Fixed16Number    X0, X1, Y0, Y1, Z0, Z1;
+    cmsUInt32Number i;
+    cmsS15Fixed16Number    c0, c1, c2, c3, Rest;
+    cmsUInt32Number        OutChan;
+    cmsUInt16Number        Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+
+
+    fk  = _cmsToFixedDomain((int) Input[0] * p16 -> Domain[0]);
+    fx  = _cmsToFixedDomain((int) Input[1] * p16 -> Domain[1]);
+    fy  = _cmsToFixedDomain((int) Input[2] * p16 -> Domain[2]);
+    fz  = _cmsToFixedDomain((int) Input[3] * p16 -> Domain[3]);
+
+    k0  = FIXED_TO_INT(fk);
+    x0  = FIXED_TO_INT(fx);
+    y0  = FIXED_TO_INT(fy);
+    z0  = FIXED_TO_INT(fz);
+
+    rk  = FIXED_REST_TO_INT(fk);
+    rx  = FIXED_REST_TO_INT(fx);
+    ry  = FIXED_REST_TO_INT(fy);
+    rz  = FIXED_REST_TO_INT(fz);
+
+    K0 = p16 -> opta[3] * k0;
+    K1 = K0 + (Input[0] == 0xFFFFU ? 0 : p16->opta[3]);
+
+    X0 = p16 -> opta[2] * x0;
+    X1 = X0 + (Input[1] == 0xFFFFU ? 0 : p16->opta[2]);
+
+    Y0 = p16 -> opta[1] * y0;
+    Y1 = Y0 + (Input[2] == 0xFFFFU ? 0 : p16->opta[1]);
+
+    Z0 = p16 -> opta[0] * z0;
+    Z1 = Z0 + (Input[3] == 0xFFFFU ? 0 : p16->opta[0]);
+
+    LutTable = (cmsUInt16Number*) p16 -> Table;
+    LutTable += K0;
+
+    for (OutChan=0; OutChan < p16 -> nOutputs; OutChan++) {
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz) {
+
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+        }
+        else
+            if (rx >= rz && rz >= ry) {
+
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+            }
+            else
+                if (rz >= rx && rx >= ry) {
+
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+
+                }
+                else
+                    if (ry >= rx && rx >= rz) {
+
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                    }
+                    else
+                        if (ry >= rz && rz >= rx) {
+
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                        }
+                        else
+                            if (rz >= ry && ry >= rx) {
+
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+
+                            }
+                            else {
+                                c1 = c2 = c3 = 0;
+                            }
+
+                            Rest = c1 * rx + c2 * ry + c3 * rz;
+
+                            Tmp1[OutChan] = (cmsUInt16Number)(c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
+    }
+
+
+    LutTable = (cmsUInt16Number*) p16 -> Table;
+    LutTable += K1;
+
+    for (OutChan=0; OutChan < p16 -> nOutputs; OutChan++) {
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz) {
+
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+        }
+        else
+            if (rx >= rz && rz >= ry) {
+
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+            }
+            else
+                if (rz >= rx && rx >= ry) {
+
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+
+                }
+                else
+                    if (ry >= rx && rx >= rz) {
+
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                    }
+                    else
+                        if (ry >= rz && rz >= rx) {
+
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                        }
+                        else
+                            if (rz >= ry && ry >= rx) {
+
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+
+                            }
+                            else  {
+                                c1 = c2 = c3 = 0;
+                            }
+
+                            Rest = c1 * rx + c2 * ry + c3 * rz;
+
+                            Tmp2[OutChan] = (cmsUInt16Number) (c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
+    }
+
+
+
+    for (i=0; i < p16 -> nOutputs; i++) {
+        Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+    }
+}
+#undef DENS
+
+
+// For more that 3 inputs (i.e., CMYK)
+// evaluate two 3-dimensional interpolations and then linearly interpolate between them.
+
+
+static
+void Eval4InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[3] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[3]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 3*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       TetrahedralInterpFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+       TetrahedralInterpFloat(Input + 1,  Tmp2, &p1);
+
+       for (i=0; i < p -> nOutputs; i++)
+       {
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+static
+void Eval5Inputs(register const cmsUInt16Number Input[],
+                 register cmsUInt16Number Output[],
+
+                 register const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[4] * k0;
+       K1 = p16 -> opta[4] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 4*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval4Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval4Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+
+}
+
+
+static
+void Eval5InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[4] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[4]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 4*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval4InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval4InputsFloat(Input + 1,  Tmp2, &p1);
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+
+static
+void Eval6Inputs(register const cmsUInt16Number Input[],
+                 register cmsUInt16Number Output[],
+                 register const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[5] * k0;
+       K1 = p16 -> opta[5] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 5*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval5Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval5Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+
+}
+
+
+static
+void Eval6InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[5] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[5]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 5*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval5InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval5InputsFloat(Input + 1,  Tmp2, &p1);
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+
+static
+void Eval7Inputs(register const cmsUInt16Number Input[],
+                 register cmsUInt16Number Output[],
+                 register const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[6] * k0;
+       K1 = p16 -> opta[6] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 6*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval6Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval6Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+}
+
+
+static
+void Eval7InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[6] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[6]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 6*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval6InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval6InputsFloat(Input + 1,  Tmp2, &p1);
+
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+
+       }
+}
+
+static
+void Eval8Inputs(register const cmsUInt16Number Input[],
+                 register cmsUInt16Number Output[],
+                 register const cmsInterpParams* p16)
+{
+       const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
+       cmsS15Fixed16Number fk;
+       cmsS15Fixed16Number k0, rk;
+       int K0, K1;
+       const cmsUInt16Number* T;
+       cmsUInt32Number i;
+       cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+       k0 = FIXED_TO_INT(fk);
+       rk = FIXED_REST_TO_INT(fk);
+
+       K0 = p16 -> opta[7] * k0;
+       K1 = p16 -> opta[7] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+
+       p1 = *p16;
+       memmove(&p1.Domain[0], &p16 ->Domain[1], 7*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval7Inputs(Input + 1, Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+       Eval7Inputs(Input + 1, Tmp2, &p1);
+
+       for (i=0; i < p16 -> nOutputs; i++) {
+              Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+       }
+}
+
+
+
+static
+void Eval8InputsFloat(const cmsFloat32Number Input[],
+                      cmsFloat32Number Output[],
+                      const cmsInterpParams* p)
+{
+       const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+       cmsFloat32Number rest;
+       cmsFloat32Number pk;
+       int k0, K0, K1;
+       const cmsFloat32Number* T;
+       cmsUInt32Number i;
+       cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+       cmsInterpParams p1;
+
+       pk = fclamp(Input[0]) * p->Domain[0];
+       k0 = _cmsQuickFloor(pk);
+       rest = pk - (cmsFloat32Number) k0;
+
+       K0 = p -> opta[7] * k0;
+       K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[7]);
+
+       p1 = *p;
+       memmove(&p1.Domain[0], &p ->Domain[1], 7*sizeof(cmsUInt32Number));
+
+       T = LutTable + K0;
+       p1.Table = T;
+
+       Eval7InputsFloat(Input + 1,  Tmp1, &p1);
+
+       T = LutTable + K1;
+       p1.Table = T;
+
+       Eval7InputsFloat(Input + 1,  Tmp2, &p1);
+
+
+       for (i=0; i < p -> nOutputs; i++) {
+
+              cmsFloat32Number y0 = Tmp1[i];
+              cmsFloat32Number y1 = Tmp2[i];
+
+              Output[i] = y0 + (y1 - y0) * rest;
+       }
+}
+
+// The default factory
+static
+cmsInterpFunction DefaultInterpolatorsFactory(cmsUInt32Number nInputChannels, cmsUInt32Number nOutputChannels, cmsUInt32Number dwFlags)
+{
+
+    cmsInterpFunction Interpolation;
+    cmsBool  IsFloat     = (dwFlags & CMS_LERP_FLAGS_FLOAT);
+    cmsBool  IsTrilinear = (dwFlags & CMS_LERP_FLAGS_TRILINEAR);
+
+    memset(&Interpolation, 0, sizeof(Interpolation));
+
+    // Safety check
+    if (nInputChannels >= 4 && nOutputChannels >= MAX_STAGE_CHANNELS)
+        return Interpolation;
+
+    switch (nInputChannels) {
+
+           case 1: // Gray LUT / linear
+
+               if (nOutputChannels == 1) {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = LinLerp1Dfloat;
+                   else
+                       Interpolation.Lerp16 = LinLerp1D;
+
+               }
+               else {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = Eval1InputFloat;
+                   else
+                       Interpolation.Lerp16 = Eval1Input;
+               }
+               break;
+
+           case 2: // Duotone
+               if (IsFloat)
+                      Interpolation.LerpFloat =  BilinearInterpFloat;
+               else
+                      Interpolation.Lerp16    =  BilinearInterp16;
+               break;
+
+           case 3:  // RGB et al
+
+               if (IsTrilinear) {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = TrilinearInterpFloat;
+                   else
+                       Interpolation.Lerp16 = TrilinearInterp16;
+               }
+               else {
+
+                   if (IsFloat)
+                       Interpolation.LerpFloat = TetrahedralInterpFloat;
+                   else {
+
+                       Interpolation.Lerp16 = TetrahedralInterp16;
+                   }
+               }
+               break;
+
+           case 4:  // CMYK lut
+
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval4InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval4Inputs;
+               break;
+
+           case 5: // 5 Inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval5InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval5Inputs;
+               break;
+
+           case 6: // 6 Inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval6InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval6Inputs;
+               break;
+
+           case 7: // 7 inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval7InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval7Inputs;
+               break;
+
+           case 8: // 8 inks
+               if (IsFloat)
+                   Interpolation.LerpFloat =  Eval8InputsFloat;
+               else
+                   Interpolation.Lerp16    =  Eval8Inputs;
+               break;
+
+               break;
+
+           default:
+               Interpolation.Lerp16 = NULL;
+    }
+
+    return Interpolation;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsio0.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsio0.cpp
new file mode 100755
index 0000000000..208a4b0353
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsio0.cpp
@@ -0,0 +1,1948 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Generic I/O, tag dictionary management, profile struct
+
+// IOhandlers are abstractions used by littleCMS to read from whatever file, stream,
+// memory block or any storage. Each IOhandler provides implementations for read,
+// write, seek and tell functions. LittleCMS code deals with IO across those objects.
+// In this way, is easier to add support for new storage media.
+
+// NULL stream, for taking care of used space -------------------------------------
+
+// NULL IOhandler basically does nothing but keep track on how many bytes have been
+// written. This is handy when creating profiles, where the file size is needed in the
+// header. Then, whole profile is serialized across NULL IOhandler and a second pass
+// writes the bytes to the pertinent IOhandler.
+
+typedef struct {
+    cmsUInt32Number Pointer;         // Points to current location
+} FILENULL;
+
+static
+cmsUInt32Number NULLRead(cmsIOHANDLER* iohandler, void *Buffer, cmsUInt32Number size, cmsUInt32Number count)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    cmsUInt32Number len = size * count;
+    ResData -> Pointer += len;
+    return count;
+
+    cmsUNUSED_PARAMETER(Buffer);
+}
+
+static
+cmsBool  NULLSeek(cmsIOHANDLER* iohandler, cmsUInt32Number offset)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    ResData ->Pointer = offset;
+    return TRUE;
+}
+
+static
+cmsUInt32Number NULLTell(cmsIOHANDLER* iohandler)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+    return ResData -> Pointer;
+}
+
+static
+cmsBool  NULLWrite(cmsIOHANDLER* iohandler, cmsUInt32Number size, const void *Ptr)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    ResData ->Pointer += size;
+    if (ResData ->Pointer > iohandler->UsedSpace)
+        iohandler->UsedSpace = ResData ->Pointer;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(Ptr);
+}
+
+static
+cmsBool  NULLClose(cmsIOHANDLER* iohandler)
+{
+    FILENULL* ResData = (FILENULL*) iohandler ->stream;
+
+    _cmsFree(iohandler ->ContextID, ResData);
+    _cmsFree(iohandler ->ContextID, iohandler);
+    return TRUE;
+}
+
+// The NULL IOhandler creator
+cmsIOHANDLER*  CMSEXPORT cmsOpenIOhandlerFromNULL(cmsContext ContextID)
+{
+    struct _cms_io_handler* iohandler = NULL;
+    FILENULL* fm = NULL;
+
+    iohandler = (struct _cms_io_handler*) _cmsMallocZero(ContextID, sizeof(struct _cms_io_handler));
+    if (iohandler == NULL) return NULL;
+
+    fm = (FILENULL*) _cmsMallocZero(ContextID, sizeof(FILENULL));
+    if (fm == NULL) goto Error;
+
+    fm ->Pointer = 0;
+
+    iohandler ->ContextID = ContextID;
+    iohandler ->stream  = (void*) fm;
+    iohandler ->UsedSpace = 0;
+    iohandler ->ReportedSize = 0;
+    iohandler ->PhysicalFile[0] = 0;
+
+    iohandler ->Read    = NULLRead;
+    iohandler ->Seek    = NULLSeek;
+    iohandler ->Close   = NULLClose;
+    iohandler ->Tell    = NULLTell;
+    iohandler ->Write   = NULLWrite;
+
+    return iohandler;
+
+Error:    
+    if (iohandler) _cmsFree(ContextID, iohandler);
+    return NULL;
+
+}
+
+
+// Memory-based stream --------------------------------------------------------------
+
+// Those functions implements an iohandler which takes a block of memory as storage medium.
+
+typedef struct {
+    cmsUInt8Number* Block;    // Points to allocated memory
+    cmsUInt32Number Size;     // Size of allocated memory
+    cmsUInt32Number Pointer;  // Points to current location
+    int FreeBlockOnClose;     // As title
+
+} FILEMEM;
+
+static
+cmsUInt32Number MemoryRead(struct _cms_io_handler* iohandler, void *Buffer, cmsUInt32Number size, cmsUInt32Number count)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+    cmsUInt8Number* Ptr;
+    cmsUInt32Number len = size * count;
+
+    if (ResData -> Pointer + len > ResData -> Size){
+
+        len = (ResData -> Size - ResData -> Pointer);
+        cmsSignalError(iohandler ->ContextID, cmsERROR_READ, "Read from memory error. Got %d bytes, block should be of %d bytes", len, count * size);
+        return 0;
+    }
+
+    Ptr  = ResData -> Block;
+    Ptr += ResData -> Pointer;
+    memmove(Buffer, Ptr, len);
+    ResData -> Pointer += len;
+
+    return count;
+}
+
+// SEEK_CUR is assumed
+static
+cmsBool  MemorySeek(struct _cms_io_handler* iohandler, cmsUInt32Number offset)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (offset > ResData ->Size) {
+        cmsSignalError(iohandler ->ContextID, cmsERROR_SEEK,  "Too few data; probably corrupted profile");
+        return FALSE;
+    }
+
+    ResData ->Pointer = offset;
+    return TRUE;
+}
+
+// Tell for memory
+static
+cmsUInt32Number MemoryTell(struct _cms_io_handler* iohandler)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (ResData == NULL) return 0;
+    return ResData -> Pointer;
+}
+
+
+// Writes data to memory, also keeps used space for further reference.
+static
+cmsBool MemoryWrite(struct _cms_io_handler* iohandler, cmsUInt32Number size, const void *Ptr)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (ResData == NULL) return FALSE; // Housekeeping
+
+    // Check for available space. Clip.
+    if (ResData->Pointer + size > ResData->Size) {
+        size = ResData ->Size - ResData->Pointer;
+    }
+      
+    if (size == 0) return TRUE;     // Write zero bytes is ok, but does nothing
+
+    memmove(ResData ->Block + ResData ->Pointer, Ptr, size);
+    ResData ->Pointer += size;
+
+    if (ResData ->Pointer > iohandler->UsedSpace)
+        iohandler->UsedSpace = ResData ->Pointer;
+
+    return TRUE;
+}
+
+
+static
+cmsBool  MemoryClose(struct _cms_io_handler* iohandler)
+{
+    FILEMEM* ResData = (FILEMEM*) iohandler ->stream;
+
+    if (ResData ->FreeBlockOnClose) {
+
+        if (ResData ->Block) _cmsFree(iohandler ->ContextID, ResData ->Block);
+    }
+
+    _cmsFree(iohandler ->ContextID, ResData);
+    _cmsFree(iohandler ->ContextID, iohandler);
+
+    return TRUE;
+}
+
+// Create a iohandler for memory block. AccessMode=='r' assumes the iohandler is going to read, and makes
+// a copy of the memory block for letting user to free the memory after invoking open profile. In write
+// mode ("w"), Buffere points to the begin of memory block to be written.
+cmsIOHANDLER* CMSEXPORT cmsOpenIOhandlerFromMem(cmsContext ContextID, void *Buffer, cmsUInt32Number size, const char* AccessMode)
+{
+    cmsIOHANDLER* iohandler = NULL;
+    FILEMEM* fm = NULL;
+
+    _cmsAssert(AccessMode != NULL);
+
+    iohandler = (cmsIOHANDLER*) _cmsMallocZero(ContextID, sizeof(cmsIOHANDLER));
+    if (iohandler == NULL) return NULL;
+
+    switch (*AccessMode) {
+
+    case 'r':
+        fm = (FILEMEM*) _cmsMallocZero(ContextID, sizeof(FILEMEM));
+        if (fm == NULL) goto Error;
+
+        if (Buffer == NULL) {
+            cmsSignalError(ContextID, cmsERROR_READ, "Couldn't read profile from NULL pointer");
+            goto Error;
+        }
+
+        fm ->Block = (cmsUInt8Number*) _cmsMalloc(ContextID, size);
+        if (fm ->Block == NULL) {
+
+            _cmsFree(ContextID, fm);
+            _cmsFree(ContextID, iohandler);
+            cmsSignalError(ContextID, cmsERROR_READ, "Couldn't allocate %ld bytes for profile", size);
+            return NULL;
+        }
+
+
+        memmove(fm->Block, Buffer, size);
+        fm ->FreeBlockOnClose = TRUE;
+        fm ->Size    = size;
+        fm ->Pointer = 0;
+        iohandler -> ReportedSize = size;
+        break;
+
+    case 'w':
+        fm = (FILEMEM*) _cmsMallocZero(ContextID, sizeof(FILEMEM));
+        if (fm == NULL) goto Error;
+
+        fm ->Block = (cmsUInt8Number*) Buffer;
+        fm ->FreeBlockOnClose = FALSE;
+        fm ->Size    = size;
+        fm ->Pointer = 0;
+        iohandler -> ReportedSize = 0;
+        break;
+
+    default:
+        cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown access mode '%c'", *AccessMode);
+        return NULL;
+    }
+
+    iohandler ->ContextID = ContextID;
+    iohandler ->stream  = (void*) fm;
+    iohandler ->UsedSpace = 0;
+    iohandler ->PhysicalFile[0] = 0;
+
+    iohandler ->Read    = MemoryRead;
+    iohandler ->Seek    = MemorySeek;
+    iohandler ->Close   = MemoryClose;
+    iohandler ->Tell    = MemoryTell;
+    iohandler ->Write   = MemoryWrite;
+
+    return iohandler;
+
+Error:
+    if (fm) _cmsFree(ContextID, fm);
+    if (iohandler) _cmsFree(ContextID, iohandler);
+    return NULL;
+}
+
+// File-based stream -------------------------------------------------------
+
+// Read count elements of size bytes each. Return number of elements read
+static
+cmsUInt32Number FileRead(cmsIOHANDLER* iohandler, void *Buffer, cmsUInt32Number size, cmsUInt32Number count)
+{
+    cmsUInt32Number nReaded = (cmsUInt32Number) fread(Buffer, size, count, (FILE*) iohandler->stream);
+
+    if (nReaded != count) {
+            cmsSignalError(iohandler ->ContextID, cmsERROR_FILE, "Read error. Got %d bytes, block should be of %d bytes", nReaded * size, count * size);
+            return 0;
+    }
+
+    return nReaded;
+}
+
+// Position file pointer in the file
+static
+cmsBool  FileSeek(cmsIOHANDLER* iohandler, cmsUInt32Number offset)
+{
+    if (fseek((FILE*) iohandler ->stream, (long) offset, SEEK_SET) != 0) {
+
+       cmsSignalError(iohandler ->ContextID, cmsERROR_FILE, "Seek error; probably corrupted file");
+       return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Returns file pointer position or 0 on error, which is also a valid position.
+static
+cmsUInt32Number FileTell(cmsIOHANDLER* iohandler)
+{
+    long t = ftell((FILE*)iohandler ->stream);
+    if (t == -1L) {
+        cmsSignalError(iohandler->ContextID, cmsERROR_FILE, "Tell error; probably corrupted file");
+        return 0;
+    }
+
+    return (cmsUInt32Number)t;
+}
+
+// Writes data to stream, also keeps used space for further reference. Returns TRUE on success, FALSE on error
+static
+cmsBool  FileWrite(cmsIOHANDLER* iohandler, cmsUInt32Number size, const void* Buffer)
+{
+    if (size == 0) return TRUE;  // We allow to write 0 bytes, but nothing is written
+
+    iohandler->UsedSpace += size;
+    return (fwrite(Buffer, size, 1, (FILE*)iohandler->stream) == 1);
+}
+
+// Closes the file
+static
+cmsBool  FileClose(cmsIOHANDLER* iohandler)
+{
+    if (fclose((FILE*) iohandler ->stream) != 0) return FALSE;
+    _cmsFree(iohandler ->ContextID, iohandler);
+    return TRUE;
+}
+
+// Create a iohandler for disk based files.
+cmsIOHANDLER* CMSEXPORT cmsOpenIOhandlerFromFile(cmsContext ContextID, const char* FileName, const char* AccessMode)
+{
+    cmsIOHANDLER* iohandler = NULL;
+    FILE* fm = NULL;
+    cmsInt32Number fileLen;
+
+    _cmsAssert(FileName != NULL);
+    _cmsAssert(AccessMode != NULL);
+
+    iohandler = (cmsIOHANDLER*) _cmsMallocZero(ContextID, sizeof(cmsIOHANDLER));
+    if (iohandler == NULL) return NULL;
+
+    switch (*AccessMode) {
+
+    case 'r':
+        fm = fopen(FileName, "rb");
+        if (fm == NULL) {
+            _cmsFree(ContextID, iohandler);
+             cmsSignalError(ContextID, cmsERROR_FILE, "File '%s' not found", FileName);
+            return NULL;
+        }                                     
+        fileLen = cmsfilelength(fm);
+        if (fileLen < 0)
+        {
+            fclose(fm);
+            _cmsFree(ContextID, iohandler);
+            cmsSignalError(ContextID, cmsERROR_FILE, "Cannot get size of file '%s'", FileName);
+            return NULL;
+        }
+
+        iohandler -> ReportedSize = (cmsUInt32Number) fileLen;
+        break;
+
+    case 'w':
+        fm = fopen(FileName, "wb");
+        if (fm == NULL) {
+            _cmsFree(ContextID, iohandler);
+             cmsSignalError(ContextID, cmsERROR_FILE, "Couldn't create '%s'", FileName);
+            return NULL;
+        }
+        iohandler -> ReportedSize = 0;
+        break;
+
+    default:
+        _cmsFree(ContextID, iohandler);
+         cmsSignalError(ContextID, cmsERROR_FILE, "Unknown access mode '%c'", *AccessMode);
+        return NULL;
+    }
+
+    iohandler ->ContextID = ContextID;
+    iohandler ->stream = (void*) fm;
+    iohandler ->UsedSpace = 0;
+
+    // Keep track of the original file    
+    strncpy(iohandler -> PhysicalFile, FileName, sizeof(iohandler -> PhysicalFile)-1);
+    iohandler -> PhysicalFile[sizeof(iohandler -> PhysicalFile)-1] = 0;
+
+    iohandler ->Read    = FileRead;
+    iohandler ->Seek    = FileSeek;
+    iohandler ->Close   = FileClose;
+    iohandler ->Tell    = FileTell;
+    iohandler ->Write   = FileWrite;
+
+    return iohandler;
+}
+
+// Create a iohandler for stream based files
+cmsIOHANDLER* CMSEXPORT cmsOpenIOhandlerFromStream(cmsContext ContextID, FILE* Stream)
+{
+    cmsIOHANDLER* iohandler = NULL;
+    cmsInt32Number fileSize;
+
+    fileSize = cmsfilelength(Stream);
+    if (fileSize < 0)
+    {
+        cmsSignalError(ContextID, cmsERROR_FILE, "Cannot get size of stream");
+        return NULL;
+    }
+
+    iohandler = (cmsIOHANDLER*) _cmsMallocZero(ContextID, sizeof(cmsIOHANDLER));
+    if (iohandler == NULL) return NULL;
+
+    iohandler -> ContextID = ContextID;
+    iohandler -> stream = (void*) Stream;
+    iohandler -> UsedSpace = 0;
+    iohandler -> ReportedSize = (cmsUInt32Number) fileSize;
+    iohandler -> PhysicalFile[0] = 0;
+
+    iohandler ->Read    = FileRead;
+    iohandler ->Seek    = FileSeek;
+    iohandler ->Close   = FileClose;
+    iohandler ->Tell    = FileTell;
+    iohandler ->Write   = FileWrite;
+
+    return iohandler;
+}
+
+
+
+// Close an open IO handler
+cmsBool CMSEXPORT cmsCloseIOhandler(cmsIOHANDLER* io)
+{
+    return io -> Close(io);
+}
+
+// -------------------------------------------------------------------------------------------------------
+
+cmsIOHANDLER* CMSEXPORT cmsGetProfileIOhandler(cmsHPROFILE hProfile)
+{
+	_cmsICCPROFILE* Icc = (_cmsICCPROFILE*)hProfile;
+
+	if (Icc == NULL) return NULL;
+	return Icc->IOhandler;
+}
+
+// Creates an empty structure holding all required parameters
+cmsHPROFILE CMSEXPORT cmsCreateProfilePlaceholder(cmsContext ContextID)
+{
+    time_t now = time(NULL);
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) _cmsMallocZero(ContextID, sizeof(_cmsICCPROFILE));
+    if (Icc == NULL) return NULL;
+
+    Icc ->ContextID = ContextID;
+
+    // Set it to empty
+    Icc -> TagCount   = 0;
+
+    // Set default version
+    Icc ->Version =  0x02100000;
+
+    // Set creation date/time
+    memmove(&Icc ->Created, gmtime(&now), sizeof(Icc ->Created));
+
+    // Create a mutex if the user provided proper plugin. NULL otherwise
+    Icc ->UsrMutex = _cmsCreateMutex(ContextID);
+
+    // Return the handle
+    return (cmsHPROFILE) Icc;
+}
+
+cmsContext CMSEXPORT cmsGetProfileContextID(cmsHPROFILE hProfile)
+{
+     _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+
+    if (Icc == NULL) return NULL;
+    return Icc -> ContextID;
+}
+
+
+// Return the number of tags
+cmsInt32Number CMSEXPORT cmsGetTagCount(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    if (Icc == NULL) return -1;
+
+    return  (cmsInt32Number) Icc->TagCount;
+}
+
+// Return the tag signature of a given tag number
+cmsTagSignature CMSEXPORT cmsGetTagSignature(cmsHPROFILE hProfile, cmsUInt32Number n)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+
+    if (n > Icc->TagCount) return (cmsTagSignature) 0;  // Mark as not available
+    if (n >= MAX_TABLE_TAG) return (cmsTagSignature) 0; // As double check
+
+    return Icc ->TagNames[n];
+}
+
+
+static
+int SearchOneTag(_cmsICCPROFILE* Profile, cmsTagSignature sig)
+{
+    int i;
+
+    for (i=0; i < (int) Profile -> TagCount; i++) {
+
+        if (sig == Profile -> TagNames[i])
+            return i;
+    }
+
+    return -1;
+}
+
+// Search for a specific tag in tag dictionary. Returns position or -1 if tag not found.
+// If followlinks is turned on, then the position of the linked tag is returned
+int _cmsSearchTag(_cmsICCPROFILE* Icc, cmsTagSignature sig, cmsBool lFollowLinks)
+{
+    int n;
+    cmsTagSignature LinkedSig;
+
+    do {
+
+        // Search for given tag in ICC profile directory
+        n = SearchOneTag(Icc, sig);
+        if (n < 0)
+            return -1;        // Not found
+
+        if (!lFollowLinks)
+            return n;         // Found, don't follow links
+
+        // Is this a linked tag?
+        LinkedSig = Icc ->TagLinked[n];
+
+        // Yes, follow link
+        if (LinkedSig != (cmsTagSignature) 0) {
+            sig = LinkedSig;
+        }
+
+    } while (LinkedSig != (cmsTagSignature) 0);
+
+    return n;
+}
+
+// Deletes a tag entry
+
+static
+void _cmsDeleteTagByPos(_cmsICCPROFILE* Icc, int i)
+{
+    _cmsAssert(Icc != NULL);
+    _cmsAssert(i >= 0);
+
+   
+    if (Icc -> TagPtrs[i] != NULL) {
+
+        // Free previous version
+        if (Icc ->TagSaveAsRaw[i]) {
+            _cmsFree(Icc ->ContextID, Icc ->TagPtrs[i]);
+        }
+        else {
+            cmsTagTypeHandler* TypeHandler = Icc ->TagTypeHandlers[i];
+
+            if (TypeHandler != NULL) {
+
+                cmsTagTypeHandler LocalTypeHandler = *TypeHandler;
+                LocalTypeHandler.ContextID = Icc ->ContextID;              // As an additional parameter
+                LocalTypeHandler.ICCVersion = Icc ->Version;
+                LocalTypeHandler.FreePtr(&LocalTypeHandler, Icc -> TagPtrs[i]);
+                Icc ->TagPtrs[i] = NULL;
+            }
+        }
+
+    } 
+}
+
+
+// Creates a new tag entry
+static
+cmsBool _cmsNewTag(_cmsICCPROFILE* Icc, cmsTagSignature sig, int* NewPos)
+{
+    int i;
+
+    // Search for the tag
+    i = _cmsSearchTag(Icc, sig, FALSE);
+    if (i >= 0) {
+
+        // Already exists? delete it
+        _cmsDeleteTagByPos(Icc, i);
+        *NewPos = i;
+    }
+    else  {
+
+        // No, make a new one
+
+        if (Icc -> TagCount >= MAX_TABLE_TAG) {
+            cmsSignalError(Icc ->ContextID, cmsERROR_RANGE, "Too many tags (%d)", MAX_TABLE_TAG);
+            return FALSE;
+        }
+
+        *NewPos = (int) Icc ->TagCount;
+        Icc -> TagCount++;
+    }
+
+    return TRUE;
+}
+
+
+// Check existence
+cmsBool CMSEXPORT cmsIsTag(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+       _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) (void*) hProfile;
+       return _cmsSearchTag(Icc, sig, FALSE) >= 0;
+}
+
+
+
+// Enforces that the profile version is per. spec.
+// Operates on the big endian bytes from the profile.
+// Called before converting to platform endianness.
+// Byte 0 is BCD major version, so max 9.
+// Byte 1 is 2 BCD digits, one per nibble.
+// Reserved bytes 2 & 3 must be 0.
+static 
+cmsUInt32Number _validatedVersion(cmsUInt32Number DWord)
+{
+    cmsUInt8Number* pByte = (cmsUInt8Number*) &DWord;
+    cmsUInt8Number temp1;
+    cmsUInt8Number temp2;
+
+    if (*pByte > 0x09) *pByte = (cmsUInt8Number) 0x09;
+    temp1 = (cmsUInt8Number) (*(pByte+1) & 0xf0);
+    temp2 = (cmsUInt8Number) (*(pByte+1) & 0x0f);
+    if (temp1 > 0x90U) temp1 = 0x90U;
+    if (temp2 > 0x09U) temp2 = 0x09U;
+    *(pByte+1) = (cmsUInt8Number)(temp1 | temp2);
+    *(pByte+2) = (cmsUInt8Number)0;
+    *(pByte+3) = (cmsUInt8Number)0;
+
+    return DWord;
+}
+
+// Read profile header and validate it
+cmsBool _cmsReadHeader(_cmsICCPROFILE* Icc)
+{
+    cmsTagEntry Tag;
+    cmsICCHeader Header;
+    cmsUInt32Number i, j;
+    cmsUInt32Number HeaderSize;
+    cmsIOHANDLER* io = Icc ->IOhandler;
+    cmsUInt32Number TagCount;
+
+
+    // Read the header
+    if (io -> Read(io, &Header, sizeof(cmsICCHeader), 1) != 1) {
+        return FALSE;
+    }
+
+    // Validate file as an ICC profile
+    if (_cmsAdjustEndianess32(Header.magic) != cmsMagicNumber) {
+        cmsSignalError(Icc ->ContextID, cmsERROR_BAD_SIGNATURE, "not an ICC profile, invalid signature");
+        return FALSE;
+    }
+
+    // Adjust endianness of the used parameters
+    Icc -> DeviceClass     = (cmsProfileClassSignature) _cmsAdjustEndianess32(Header.deviceClass);
+    Icc -> ColorSpace      = (cmsColorSpaceSignature)   _cmsAdjustEndianess32(Header.colorSpace);
+    Icc -> PCS             = (cmsColorSpaceSignature)   _cmsAdjustEndianess32(Header.pcs);
+   
+    Icc -> RenderingIntent = _cmsAdjustEndianess32(Header.renderingIntent);
+    Icc -> flags           = _cmsAdjustEndianess32(Header.flags);
+    Icc -> manufacturer    = _cmsAdjustEndianess32(Header.manufacturer);
+    Icc -> model           = _cmsAdjustEndianess32(Header.model);
+    Icc -> creator         = _cmsAdjustEndianess32(Header.creator);
+    
+    _cmsAdjustEndianess64(&Icc -> attributes, &Header.attributes);
+    Icc -> Version         = _cmsAdjustEndianess32(_validatedVersion(Header.version));
+
+    // Get size as reported in header
+    HeaderSize = _cmsAdjustEndianess32(Header.size);
+
+    // Make sure HeaderSize is lower than profile size
+    if (HeaderSize >= Icc ->IOhandler ->ReportedSize)
+            HeaderSize = Icc ->IOhandler ->ReportedSize;
+
+
+    // Get creation date/time
+    _cmsDecodeDateTimeNumber(&Header.date, &Icc ->Created);
+
+    // The profile ID are 32 raw bytes
+    memmove(Icc ->ProfileID.ID32, Header.profileID.ID32, 16);
+
+
+    // Read tag directory
+    if (!_cmsReadUInt32Number(io, &TagCount)) return FALSE;
+    if (TagCount > MAX_TABLE_TAG) {
+
+        cmsSignalError(Icc ->ContextID, cmsERROR_RANGE, "Too many tags (%d)", TagCount);
+        return FALSE;
+    }
+
+
+    // Read tag directory
+    Icc -> TagCount = 0;
+    for (i=0; i < TagCount; i++) {
+
+        if (!_cmsReadUInt32Number(io, (cmsUInt32Number *) &Tag.sig)) return FALSE;
+        if (!_cmsReadUInt32Number(io, &Tag.offset)) return FALSE;
+        if (!_cmsReadUInt32Number(io, &Tag.size)) return FALSE;
+
+        // Perform some sanity check. Offset + size should fall inside file.
+        if (Tag.offset + Tag.size > HeaderSize ||
+            Tag.offset + Tag.size < Tag.offset)
+                  continue;
+
+        Icc -> TagNames[Icc ->TagCount]   = Tag.sig;
+        Icc -> TagOffsets[Icc ->TagCount] = Tag.offset;
+        Icc -> TagSizes[Icc ->TagCount]   = Tag.size;
+
+       // Search for links
+        for (j=0; j < Icc ->TagCount; j++) {
+
+            if ((Icc ->TagOffsets[j] == Tag.offset) &&
+                (Icc ->TagSizes[j]   == Tag.size)) {
+
+                Icc ->TagLinked[Icc ->TagCount] = Icc ->TagNames[j];
+            }
+
+        }
+
+        Icc ->TagCount++;
+    }
+
+    return TRUE;
+}
+
+// Saves profile header
+cmsBool _cmsWriteHeader(_cmsICCPROFILE* Icc, cmsUInt32Number UsedSpace)
+{
+    cmsICCHeader Header;
+    cmsUInt32Number i;
+    cmsTagEntry Tag;
+    cmsUInt32Number Count;
+
+    Header.size        = _cmsAdjustEndianess32(UsedSpace);
+    Header.cmmId       = _cmsAdjustEndianess32(lcmsSignature);
+    Header.version     = _cmsAdjustEndianess32(Icc ->Version);
+
+    Header.deviceClass = (cmsProfileClassSignature) _cmsAdjustEndianess32(Icc -> DeviceClass);
+    Header.colorSpace  = (cmsColorSpaceSignature) _cmsAdjustEndianess32(Icc -> ColorSpace);
+    Header.pcs         = (cmsColorSpaceSignature) _cmsAdjustEndianess32(Icc -> PCS);
+
+    //   NOTE: in v4 Timestamp must be in UTC rather than in local time
+    _cmsEncodeDateTimeNumber(&Header.date, &Icc ->Created);
+
+    Header.magic       = _cmsAdjustEndianess32(cmsMagicNumber);
+
+#ifdef CMS_IS_WINDOWS_
+    Header.platform    = (cmsPlatformSignature) _cmsAdjustEndianess32(cmsSigMicrosoft);
+#else
+    Header.platform    = (cmsPlatformSignature) _cmsAdjustEndianess32(cmsSigMacintosh);
+#endif
+
+    Header.flags        = _cmsAdjustEndianess32(Icc -> flags);
+    Header.manufacturer = _cmsAdjustEndianess32(Icc -> manufacturer);
+    Header.model        = _cmsAdjustEndianess32(Icc -> model);
+
+    _cmsAdjustEndianess64(&Header.attributes, &Icc -> attributes);
+
+    // Rendering intent in the header (for embedded profiles)
+    Header.renderingIntent = _cmsAdjustEndianess32(Icc -> RenderingIntent);
+
+    // Illuminant is always D50
+    Header.illuminant.X = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(cmsD50_XYZ()->X));
+    Header.illuminant.Y = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(cmsD50_XYZ()->Y));
+    Header.illuminant.Z = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(cmsD50_XYZ()->Z));
+
+    // Created by LittleCMS (that's me!)
+    Header.creator      = _cmsAdjustEndianess32(lcmsSignature);
+
+    memset(&Header.reserved, 0, sizeof(Header.reserved));
+
+    // Set profile ID. Endianness is always big endian
+    memmove(&Header.profileID, &Icc ->ProfileID, 16);
+
+    // Dump the header
+    if (!Icc -> IOhandler->Write(Icc->IOhandler, sizeof(cmsICCHeader), &Header)) return FALSE;
+
+    // Saves Tag directory
+
+    // Get true count
+    Count = 0;
+    for (i=0;  i < Icc -> TagCount; i++) {
+        if (Icc ->TagNames[i] != (cmsTagSignature) 0)
+            Count++;
+    }
+
+    // Store number of tags
+    if (!_cmsWriteUInt32Number(Icc ->IOhandler, Count)) return FALSE;
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        if (Icc ->TagNames[i] == (cmsTagSignature) 0) continue;   // It is just a placeholder
+
+        Tag.sig    = (cmsTagSignature) _cmsAdjustEndianess32((cmsUInt32Number) Icc -> TagNames[i]);
+        Tag.offset = _cmsAdjustEndianess32((cmsUInt32Number) Icc -> TagOffsets[i]);
+        Tag.size   = _cmsAdjustEndianess32((cmsUInt32Number) Icc -> TagSizes[i]);
+
+        if (!Icc ->IOhandler -> Write(Icc-> IOhandler, sizeof(cmsTagEntry), &Tag)) return FALSE;
+    }
+
+    return TRUE;
+}
+
+// ----------------------------------------------------------------------- Set/Get several struct members
+
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderRenderingIntent(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> RenderingIntent;
+}
+
+void CMSEXPORT cmsSetHeaderRenderingIntent(cmsHPROFILE hProfile, cmsUInt32Number RenderingIntent)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> RenderingIntent = RenderingIntent;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderFlags(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return (cmsUInt32Number) Icc -> flags;
+}
+
+void CMSEXPORT cmsSetHeaderFlags(cmsHPROFILE hProfile, cmsUInt32Number Flags)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> flags = (cmsUInt32Number) Flags;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderManufacturer(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc ->manufacturer;
+}
+
+void CMSEXPORT cmsSetHeaderManufacturer(cmsHPROFILE hProfile, cmsUInt32Number manufacturer)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> manufacturer = manufacturer;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderCreator(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc ->creator;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetHeaderModel(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc ->model;
+}
+
+void CMSEXPORT cmsSetHeaderModel(cmsHPROFILE hProfile, cmsUInt32Number model)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> model = model;
+}
+
+void CMSEXPORT cmsGetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number* Flags)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(Flags, &Icc -> attributes, sizeof(cmsUInt64Number));
+}
+
+void CMSEXPORT cmsSetHeaderAttributes(cmsHPROFILE hProfile, cmsUInt64Number Flags)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(&Icc -> attributes, &Flags, sizeof(cmsUInt64Number));
+}
+
+void CMSEXPORT cmsGetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(ProfileID, Icc ->ProfileID.ID8, 16);
+}
+
+void CMSEXPORT cmsSetHeaderProfileID(cmsHPROFILE hProfile, cmsUInt8Number* ProfileID)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(&Icc -> ProfileID, ProfileID, 16);
+}
+
+cmsBool  CMSEXPORT cmsGetHeaderCreationDateTime(cmsHPROFILE hProfile, struct tm *Dest)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    memmove(Dest, &Icc ->Created, sizeof(struct tm));
+    return TRUE;
+}
+
+cmsColorSpaceSignature CMSEXPORT cmsGetPCS(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> PCS;
+}
+
+void CMSEXPORT cmsSetPCS(cmsHPROFILE hProfile, cmsColorSpaceSignature pcs)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> PCS = pcs;
+}
+
+cmsColorSpaceSignature CMSEXPORT cmsGetColorSpace(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> ColorSpace;
+}
+
+void CMSEXPORT cmsSetColorSpace(cmsHPROFILE hProfile, cmsColorSpaceSignature sig)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> ColorSpace = sig;
+}
+
+cmsProfileClassSignature CMSEXPORT cmsGetDeviceClass(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> DeviceClass;
+}
+
+void CMSEXPORT cmsSetDeviceClass(cmsHPROFILE hProfile, cmsProfileClassSignature sig)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> DeviceClass = sig;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetEncodedICCversion(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    return Icc -> Version;
+}
+
+void CMSEXPORT cmsSetEncodedICCversion(cmsHPROFILE hProfile, cmsUInt32Number Version)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    Icc -> Version = Version;
+}
+
+// Get an hexadecimal number with same digits as v
+static
+cmsUInt32Number BaseToBase(cmsUInt32Number in, int BaseIn, int BaseOut)
+{
+    char Buff[100];
+    int i, len;
+    cmsUInt32Number out;
+
+    for (len=0; in > 0 && len < 100; len++) {
+
+        Buff[len] = (char) (in % BaseIn);
+        in /= BaseIn;
+    }
+
+    for (i=len-1, out=0; i >= 0; --i) {
+        out = out * BaseOut + Buff[i];
+    }
+
+    return out;
+}
+
+void  CMSEXPORT cmsSetProfileVersion(cmsHPROFILE hProfile, cmsFloat64Number Version)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+
+    // 4.2 -> 0x4200000
+
+    Icc -> Version = BaseToBase((cmsUInt32Number) floor(Version * 100.0 + 0.5), 10, 16) << 16;
+}
+
+cmsFloat64Number CMSEXPORT cmsGetProfileVersion(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE*  Icc = (_cmsICCPROFILE*) hProfile;
+    cmsUInt32Number n = Icc -> Version >> 16;
+
+    return BaseToBase(n, 16, 10) / 100.0;
+}
+// --------------------------------------------------------------------------------------------------------------
+
+
+// Create profile from IOhandler
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromIOhandlerTHR(cmsContext ContextID, cmsIOHANDLER* io)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = io;
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+// Create profile from IOhandler
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromIOhandler2THR(cmsContext ContextID, cmsIOHANDLER* io, cmsBool write)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = io;
+    if (write) {
+
+        NewIcc -> IsWrite = TRUE;
+        return hEmpty;
+    }
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+
+// Create profile from disk file
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromFileTHR(cmsContext ContextID, const char *lpFileName, const char *sAccess)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = cmsOpenIOhandlerFromFile(ContextID, lpFileName, sAccess);
+    if (NewIcc ->IOhandler == NULL) goto Error;
+
+    if (*sAccess == 'W' || *sAccess == 'w') {
+
+        NewIcc -> IsWrite = TRUE;
+
+        return hEmpty;
+    }
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromFile(const char *ICCProfile, const char *sAccess)
+{
+    return cmsOpenProfileFromFileTHR(NULL, ICCProfile, sAccess);
+}
+
+
+cmsHPROFILE  CMSEXPORT cmsOpenProfileFromStreamTHR(cmsContext ContextID, FILE* ICCProfile, const char *sAccess)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty = cmsCreateProfilePlaceholder(ContextID);
+
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    NewIcc ->IOhandler = cmsOpenIOhandlerFromStream(ContextID, ICCProfile);
+    if (NewIcc ->IOhandler == NULL) goto Error;
+
+    if (*sAccess == 'w') {
+
+        NewIcc -> IsWrite = TRUE;
+        return hEmpty;
+    }
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+
+}
+
+cmsHPROFILE  CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char *sAccess)
+{
+    return cmsOpenProfileFromStreamTHR(NULL, ICCProfile, sAccess);
+}
+
+
+// Open from memory block
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromMemTHR(cmsContext ContextID, const void* MemPtr, cmsUInt32Number dwSize)
+{
+    _cmsICCPROFILE* NewIcc;
+    cmsHPROFILE hEmpty;
+
+    hEmpty = cmsCreateProfilePlaceholder(ContextID);
+    if (hEmpty == NULL) return NULL;
+
+    NewIcc = (_cmsICCPROFILE*) hEmpty;
+
+    // Ok, in this case const void* is casted to void* just because open IO handler
+    // shares read and writing modes. Don't abuse this feature!
+    NewIcc ->IOhandler = cmsOpenIOhandlerFromMem(ContextID, (void*) MemPtr, dwSize, "r");
+    if (NewIcc ->IOhandler == NULL) goto Error;
+
+    if (!_cmsReadHeader(NewIcc)) goto Error;
+
+    return hEmpty;
+
+Error:
+    cmsCloseProfile(hEmpty);
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsOpenProfileFromMem(const void* MemPtr, cmsUInt32Number dwSize)
+{
+    return cmsOpenProfileFromMemTHR(NULL, MemPtr, dwSize);
+}
+
+
+
+// Dump tag contents. If the profile is being modified, untouched tags are copied from FileOrig
+static
+cmsBool SaveTags(_cmsICCPROFILE* Icc, _cmsICCPROFILE* FileOrig)
+{
+    cmsUInt8Number* Data;
+    cmsUInt32Number i;
+    cmsUInt32Number Begin;
+    cmsIOHANDLER* io = Icc ->IOhandler;
+    cmsTagDescriptor* TagDescriptor;
+    cmsTagTypeSignature TypeBase;
+    cmsTagTypeSignature Type;
+    cmsTagTypeHandler* TypeHandler;
+    cmsFloat64Number   Version = cmsGetProfileVersion((cmsHPROFILE) Icc);
+    cmsTagTypeHandler LocalTypeHandler;
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        if (Icc ->TagNames[i] == (cmsTagSignature) 0) continue;
+
+        // Linked tags are not written
+        if (Icc ->TagLinked[i] != (cmsTagSignature) 0) continue;
+
+        Icc -> TagOffsets[i] = Begin = io ->UsedSpace;
+
+        Data = (cmsUInt8Number*)  Icc -> TagPtrs[i];
+
+        if (!Data) {
+
+            // Reach here if we are copying a tag from a disk-based ICC profile which has not been modified by user.
+            // In this case a blind copy of the block data is performed
+            if (FileOrig != NULL && Icc -> TagOffsets[i]) {
+
+                cmsUInt32Number TagSize   = FileOrig -> TagSizes[i];
+                cmsUInt32Number TagOffset = FileOrig -> TagOffsets[i];
+                void* Mem;
+
+                if (!FileOrig ->IOhandler->Seek(FileOrig ->IOhandler, TagOffset)) return FALSE;
+
+                Mem = _cmsMalloc(Icc ->ContextID, TagSize);
+                if (Mem == NULL) return FALSE;
+
+                if (FileOrig ->IOhandler->Read(FileOrig->IOhandler, Mem, TagSize, 1) != 1) return FALSE;
+                if (!io ->Write(io, TagSize, Mem)) return FALSE;
+                _cmsFree(Icc ->ContextID, Mem);
+
+                Icc -> TagSizes[i] = (io ->UsedSpace - Begin);
+
+
+                // Align to 32 bit boundary.
+                if (! _cmsWriteAlignment(io))
+                    return FALSE;
+            }
+
+            continue;
+        }
+
+
+        // Should this tag be saved as RAW? If so, tagsizes should be specified in advance (no further cooking is done)
+        if (Icc ->TagSaveAsRaw[i]) {
+
+            if (io -> Write(io, Icc ->TagSizes[i], Data) != 1) return FALSE;
+        }
+        else {
+
+            // Search for support on this tag
+            TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, Icc -> TagNames[i]);
+            if (TagDescriptor == NULL) continue;                        // Unsupported, ignore it
+           
+            if (TagDescriptor ->DecideType != NULL) {
+
+                Type = TagDescriptor ->DecideType(Version, Data);
+            }
+            else {
+
+                Type = TagDescriptor ->SupportedTypes[0];
+            }
+
+            TypeHandler =  _cmsGetTagTypeHandler(Icc->ContextID, Type);
+
+            if (TypeHandler == NULL) {
+                cmsSignalError(Icc ->ContextID, cmsERROR_INTERNAL, "(Internal) no handler for tag %x", Icc -> TagNames[i]);
+                continue;
+            }
+
+            TypeBase = TypeHandler ->Signature;
+            if (!_cmsWriteTypeBase(io, TypeBase))
+                return FALSE;
+
+            LocalTypeHandler = *TypeHandler;
+            LocalTypeHandler.ContextID  = Icc ->ContextID;
+            LocalTypeHandler.ICCVersion = Icc ->Version;
+            if (!LocalTypeHandler.WritePtr(&LocalTypeHandler, io, Data, TagDescriptor ->ElemCount)) {
+
+                char String[5];
+
+                _cmsTagSignature2String(String, (cmsTagSignature) TypeBase);
+                cmsSignalError(Icc ->ContextID, cmsERROR_WRITE, "Couldn't write type '%s'", String);
+                return FALSE;
+            }
+        }
+
+
+        Icc -> TagSizes[i] = (io ->UsedSpace - Begin);
+
+        // Align to 32 bit boundary.
+        if (! _cmsWriteAlignment(io))
+            return FALSE;
+    }
+
+
+    return TRUE;
+}
+
+
+// Fill the offset and size fields for all linked tags
+static
+cmsBool SetLinks( _cmsICCPROFILE* Icc)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        cmsTagSignature lnk = Icc ->TagLinked[i];
+        if (lnk != (cmsTagSignature) 0) {
+
+            int j = _cmsSearchTag(Icc, lnk, FALSE);
+            if (j >= 0) {
+
+                Icc ->TagOffsets[i] = Icc ->TagOffsets[j];
+                Icc ->TagSizes[i]   = Icc ->TagSizes[j];
+            }
+
+        }
+    }
+
+    return TRUE;
+}
+
+// Low-level save to IOHANDLER. It returns the number of bytes used to
+// store the profile, or zero on error. io may be NULL and in this case
+// no data is written--only sizes are calculated
+cmsUInt32Number CMSEXPORT cmsSaveProfileToIOhandler(cmsHPROFILE hProfile, cmsIOHANDLER* io)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    _cmsICCPROFILE Keep;
+    cmsIOHANDLER* PrevIO = NULL;
+    cmsUInt32Number UsedSpace;
+    cmsContext ContextID;
+
+    _cmsAssert(hProfile != NULL);
+    
+    if (!_cmsLockMutex(Icc->ContextID, Icc->UsrMutex)) return 0;
+    memmove(&Keep, Icc, sizeof(_cmsICCPROFILE));
+
+    ContextID = cmsGetProfileContextID(hProfile);
+    PrevIO = Icc ->IOhandler = cmsOpenIOhandlerFromNULL(ContextID);
+    if (PrevIO == NULL) {
+        _cmsUnlockMutex(Icc->ContextID, Icc->UsrMutex);
+        return 0;
+    }
+
+    // Pass #1 does compute offsets
+
+    if (!_cmsWriteHeader(Icc, 0)) goto Error;
+    if (!SaveTags(Icc, &Keep)) goto Error;
+
+    UsedSpace = PrevIO ->UsedSpace;
+
+    // Pass #2 does save to iohandler
+
+    if (io != NULL) {
+
+        Icc ->IOhandler = io;
+        if (!SetLinks(Icc)) goto Error;
+        if (!_cmsWriteHeader(Icc, UsedSpace)) goto Error;
+        if (!SaveTags(Icc, &Keep)) goto Error;
+    }
+
+    memmove(Icc, &Keep, sizeof(_cmsICCPROFILE));
+    if (!cmsCloseIOhandler(PrevIO)) 
+        UsedSpace = 0; // As a error marker
+
+    _cmsUnlockMutex(Icc->ContextID, Icc->UsrMutex);
+
+    return UsedSpace;
+
+
+Error:
+    cmsCloseIOhandler(PrevIO);
+    memmove(Icc, &Keep, sizeof(_cmsICCPROFILE));
+    _cmsUnlockMutex(Icc->ContextID, Icc->UsrMutex);
+
+    return 0;
+}
+
+
+// Low-level save to disk.
+cmsBool  CMSEXPORT cmsSaveProfileToFile(cmsHPROFILE hProfile, const char* FileName)
+{
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsIOHANDLER* io = cmsOpenIOhandlerFromFile(ContextID, FileName, "w");
+    cmsBool rc;
+
+    if (io == NULL) return FALSE;
+
+    rc = (cmsSaveProfileToIOhandler(hProfile, io) != 0);
+    rc &= cmsCloseIOhandler(io);
+
+    if (rc == FALSE) {          // remove() is C99 per 7.19.4.1
+            remove(FileName);   // We have to IGNORE return value in this case
+    }
+    return rc;
+}
+
+// Same as anterior, but for streams
+cmsBool CMSEXPORT cmsSaveProfileToStream(cmsHPROFILE hProfile, FILE* Stream)
+{
+    cmsBool rc;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsIOHANDLER* io = cmsOpenIOhandlerFromStream(ContextID, Stream);
+
+    if (io == NULL) return FALSE;
+
+    rc = (cmsSaveProfileToIOhandler(hProfile, io) != 0);
+    rc &= cmsCloseIOhandler(io);
+
+    return rc;
+}
+
+
+// Same as anterior, but for memory blocks. In this case, a NULL as MemPtr means calculate needed space only
+cmsBool CMSEXPORT cmsSaveProfileToMem(cmsHPROFILE hProfile, void *MemPtr, cmsUInt32Number* BytesNeeded)
+{
+    cmsBool rc;
+    cmsIOHANDLER* io;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    _cmsAssert(BytesNeeded != NULL);
+
+    // Should we just calculate the needed space?
+    if (MemPtr == NULL) {
+
+           *BytesNeeded =  cmsSaveProfileToIOhandler(hProfile, NULL);
+            return (*BytesNeeded == 0) ? FALSE : TRUE;
+    }
+
+    // That is a real write operation
+    io =  cmsOpenIOhandlerFromMem(ContextID, MemPtr, *BytesNeeded, "w");
+    if (io == NULL) return FALSE;
+
+    rc = (cmsSaveProfileToIOhandler(hProfile, io) != 0);
+    rc &= cmsCloseIOhandler(io);
+
+    return rc;
+}
+
+
+
+// Closes a profile freeing any involved resources
+cmsBool  CMSEXPORT cmsCloseProfile(cmsHPROFILE hProfile)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsBool  rc = TRUE;
+    cmsUInt32Number i;
+
+    if (!Icc) return FALSE;
+
+    // Was open in write mode?
+    if (Icc ->IsWrite) {
+
+        Icc ->IsWrite = FALSE;      // Assure no further writing
+        rc &= cmsSaveProfileToFile(hProfile, Icc ->IOhandler->PhysicalFile);
+    }
+
+    for (i=0; i < Icc -> TagCount; i++) {
+
+        if (Icc -> TagPtrs[i]) {
+
+            cmsTagTypeHandler* TypeHandler = Icc ->TagTypeHandlers[i];
+
+            if (TypeHandler != NULL) {
+                cmsTagTypeHandler LocalTypeHandler = *TypeHandler;
+
+                LocalTypeHandler.ContextID = Icc ->ContextID;              // As an additional parameters
+                LocalTypeHandler.ICCVersion = Icc ->Version;
+                LocalTypeHandler.FreePtr(&LocalTypeHandler, Icc -> TagPtrs[i]);
+            }
+            else
+                _cmsFree(Icc ->ContextID, Icc ->TagPtrs[i]);
+        }
+    }
+
+    if (Icc ->IOhandler != NULL) {
+        rc &= cmsCloseIOhandler(Icc->IOhandler);
+    }
+
+    _cmsDestroyMutex(Icc->ContextID, Icc->UsrMutex);
+
+    _cmsFree(Icc ->ContextID, Icc);   // Free placeholder memory
+
+    return rc;
+}
+
+
+// -------------------------------------------------------------------------------------------------------------------
+
+
+// Returns TRUE if a given tag is supported by a plug-in
+static
+cmsBool IsTypeSupported(cmsTagDescriptor* TagDescriptor, cmsTagTypeSignature Type)
+{
+    cmsUInt32Number i, nMaxTypes;
+
+    nMaxTypes = TagDescriptor->nSupportedTypes;
+    if (nMaxTypes >= MAX_TYPES_IN_LCMS_PLUGIN)
+        nMaxTypes = MAX_TYPES_IN_LCMS_PLUGIN;
+
+    for (i=0; i < nMaxTypes; i++) {
+        if (Type == TagDescriptor ->SupportedTypes[i]) return TRUE;
+    }
+
+    return FALSE;
+}
+
+
+// That's the main read function
+void* CMSEXPORT cmsReadTag(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsIOHANDLER* io = Icc ->IOhandler;
+    cmsTagTypeHandler* TypeHandler;
+    cmsTagTypeHandler LocalTypeHandler;
+    cmsTagDescriptor*  TagDescriptor;
+    cmsTagTypeSignature BaseType;
+    cmsUInt32Number Offset, TagSize;
+    cmsUInt32Number ElemCount;
+    int n;
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return NULL;
+
+    n = _cmsSearchTag(Icc, sig, TRUE);
+    if (n < 0) goto Error;               // Not found, return NULL
+
+
+    // If the element is already in memory, return the pointer
+    if (Icc -> TagPtrs[n]) {
+
+        if (Icc->TagTypeHandlers[n] == NULL) goto Error;
+
+        // Sanity check
+        BaseType = Icc->TagTypeHandlers[n]->Signature;
+        if (BaseType == 0) goto Error;
+
+        TagDescriptor = _cmsGetTagDescriptor(Icc->ContextID, sig);
+        if (TagDescriptor == NULL) goto Error;
+
+        if (!IsTypeSupported(TagDescriptor, BaseType)) goto Error;
+
+        if (Icc ->TagSaveAsRaw[n]) goto Error;  // We don't support read raw tags as cooked
+
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return Icc -> TagPtrs[n];
+    }
+
+    // We need to read it. Get the offset and size to the file
+    Offset    = Icc -> TagOffsets[n];
+    TagSize   = Icc -> TagSizes[n];
+
+    if (TagSize < 8) goto Error;
+
+    // Seek to its location
+    if (!io -> Seek(io, Offset))
+        goto Error;
+
+    // Search for support on this tag
+    TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, sig);
+    if (TagDescriptor == NULL) {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, sig);
+
+        // An unknown element was found.
+        cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown tag type '%s' found.", String);
+        goto Error;     // Unsupported.
+    }
+
+    // if supported, get type and check if in list
+    BaseType = _cmsReadTypeBase(io);
+    if (BaseType == 0) goto Error;
+
+    if (!IsTypeSupported(TagDescriptor, BaseType)) goto Error;
+   
+    TagSize  -= 8;       // Alredy read by the type base logic
+
+    // Get type handler
+    TypeHandler = _cmsGetTagTypeHandler(Icc ->ContextID, BaseType);
+    if (TypeHandler == NULL) goto Error;
+    LocalTypeHandler = *TypeHandler;
+
+
+    // Read the tag
+    Icc -> TagTypeHandlers[n] = TypeHandler;
+
+    LocalTypeHandler.ContextID = Icc ->ContextID;
+    LocalTypeHandler.ICCVersion = Icc ->Version;
+    Icc -> TagPtrs[n] = LocalTypeHandler.ReadPtr(&LocalTypeHandler, io, &ElemCount, TagSize);
+
+    // The tag type is supported, but something wrong happened and we cannot read the tag.
+    // let know the user about this (although it is just a warning)
+    if (Icc -> TagPtrs[n] == NULL) {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, sig);
+        cmsSignalError(Icc ->ContextID, cmsERROR_CORRUPTION_DETECTED, "Corrupted tag '%s'", String);
+        goto Error;
+    }
+
+    // This is a weird error that may be a symptom of something more serious, the number of
+    // stored item is actually less than the number of required elements.
+    if (ElemCount < TagDescriptor ->ElemCount) {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, sig);
+        cmsSignalError(Icc ->ContextID, cmsERROR_CORRUPTION_DETECTED, "'%s' Inconsistent number of items: expected %d, got %d",
+            String, TagDescriptor ->ElemCount, ElemCount);
+    }
+
+
+    // Return the data
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return Icc -> TagPtrs[n];
+
+
+    // Return error and unlock tha data
+Error:
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return NULL;
+}
+
+
+// Get true type of data
+cmsTagTypeSignature _cmsGetTagTrueType(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsTagTypeHandler* TypeHandler;
+    int n;
+
+    // Search for given tag in ICC profile directory
+    n = _cmsSearchTag(Icc, sig, TRUE);
+    if (n < 0) return (cmsTagTypeSignature) 0;                // Not found, return NULL
+
+    // Get the handler. The true type is there
+    TypeHandler =  Icc -> TagTypeHandlers[n];
+    return TypeHandler ->Signature;
+}
+
+
+// Write a single tag. This just keeps track of the tak into a list of "to be written". If the tag is already
+// in that list, the previous version is deleted.
+cmsBool CMSEXPORT cmsWriteTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    cmsTagTypeHandler* TypeHandler = NULL;
+    cmsTagTypeHandler LocalTypeHandler;
+    cmsTagDescriptor* TagDescriptor = NULL;
+    cmsTagTypeSignature Type;
+    int i;
+    cmsFloat64Number Version;
+    char TypeString[5], SigString[5];
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return FALSE;
+
+    // To delete tags.
+    if (data == NULL) {
+
+         // Delete the tag
+         i = _cmsSearchTag(Icc, sig, FALSE);
+         if (i >= 0) {
+                
+             // Use zero as a mark of deleted 
+             _cmsDeleteTagByPos(Icc, i);
+             Icc ->TagNames[i] = (cmsTagSignature) 0;
+             _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+             return TRUE;
+         }
+         // Didn't find the tag
+        goto Error;
+    }
+
+    if (!_cmsNewTag(Icc, sig, &i)) goto Error;
+
+    // This is not raw
+    Icc ->TagSaveAsRaw[i] = FALSE;
+
+    // This is not a link
+    Icc ->TagLinked[i] = (cmsTagSignature) 0;
+
+    // Get information about the TAG.
+    TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, sig);
+    if (TagDescriptor == NULL){
+         cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported tag '%x'", sig);
+        goto Error;
+    }
+
+
+    // Now we need to know which type to use. It depends on the version.
+    Version = cmsGetProfileVersion(hProfile);
+
+    if (TagDescriptor ->DecideType != NULL) {
+
+        // Let the tag descriptor to decide the type base on depending on
+        // the data. This is useful for example on parametric curves, where
+        // curves specified by a table cannot be saved as parametric and needs
+        // to be casted to single v2-curves, even on v4 profiles.
+
+        Type = TagDescriptor ->DecideType(Version, data);
+    }
+    else {
+
+        Type = TagDescriptor ->SupportedTypes[0];
+    }
+
+    // Does the tag support this type?
+    if (!IsTypeSupported(TagDescriptor, Type)) {
+
+        _cmsTagSignature2String(TypeString, (cmsTagSignature) Type);
+        _cmsTagSignature2String(SigString,  sig);
+
+        cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported type '%s' for tag '%s'", TypeString, SigString);
+        goto Error;
+    }
+
+    // Does we have a handler for this type?
+    TypeHandler =  _cmsGetTagTypeHandler(Icc->ContextID, Type);
+    if (TypeHandler == NULL) {
+
+        _cmsTagSignature2String(TypeString, (cmsTagSignature) Type);
+        _cmsTagSignature2String(SigString,  sig);
+
+        cmsSignalError(Icc ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported type '%s' for tag '%s'", TypeString, SigString);
+        goto Error;           // Should never happen
+    }
+
+
+    // Fill fields on icc structure
+    Icc ->TagTypeHandlers[i]  = TypeHandler;
+    Icc ->TagNames[i]         = sig;
+    Icc ->TagSizes[i]         = 0;
+    Icc ->TagOffsets[i]       = 0;
+
+    LocalTypeHandler = *TypeHandler;
+    LocalTypeHandler.ContextID  = Icc ->ContextID;
+    LocalTypeHandler.ICCVersion = Icc ->Version;
+    Icc ->TagPtrs[i]            = LocalTypeHandler.DupPtr(&LocalTypeHandler, data, TagDescriptor ->ElemCount);
+
+    if (Icc ->TagPtrs[i] == NULL)  {
+
+        _cmsTagSignature2String(TypeString, (cmsTagSignature) Type);
+        _cmsTagSignature2String(SigString,  sig);
+        cmsSignalError(Icc ->ContextID, cmsERROR_CORRUPTION_DETECTED, "Malformed struct in type '%s' for tag '%s'", TypeString, SigString);
+
+        goto Error;
+    }
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return TRUE;
+
+Error:
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return FALSE;
+
+}
+
+// Read and write raw data. The only way those function would work and keep consistence with normal read and write
+// is to do an additional step of serialization. That means, readRaw would issue a normal read and then convert the obtained
+// data to raw bytes by using the "write" serialization logic. And vice-versa. I know this may end in situations where
+// raw data written does not exactly correspond with the raw data proposed to cmsWriteRaw data, but this approach allows
+// to write a tag as raw data and the read it as handled.
+
+cmsUInt32Number CMSEXPORT cmsReadRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, void* data, cmsUInt32Number BufferSize)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    void *Object;
+    int i;
+    cmsIOHANDLER* MemIO;
+    cmsTagTypeHandler* TypeHandler = NULL;
+    cmsTagTypeHandler LocalTypeHandler;
+    cmsTagDescriptor* TagDescriptor = NULL;
+    cmsUInt32Number rc;
+    cmsUInt32Number Offset, TagSize;
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return 0;
+
+    // Search for given tag in ICC profile directory
+    i = _cmsSearchTag(Icc, sig, TRUE);
+    if (i < 0) goto Error;                 // Not found, 
+
+    // It is already read?
+    if (Icc -> TagPtrs[i] == NULL) {
+
+        // No yet, get original position
+        Offset   = Icc ->TagOffsets[i];
+        TagSize  = Icc ->TagSizes[i];
+
+        // read the data directly, don't keep copy
+        if (data != NULL) {
+
+            if (BufferSize < TagSize)
+                TagSize = BufferSize;
+
+            if (!Icc ->IOhandler ->Seek(Icc ->IOhandler, Offset)) goto Error;
+            if (!Icc ->IOhandler ->Read(Icc ->IOhandler, data, 1, TagSize)) goto Error;
+
+            _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+            return TagSize;
+        }
+
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return Icc ->TagSizes[i];
+    }
+
+    // The data has been already read, or written. But wait!, maybe the user choosed to save as
+    // raw data. In this case, return the raw data directly
+    if (Icc ->TagSaveAsRaw[i]) {
+
+        if (data != NULL)  {
+
+            TagSize  = Icc ->TagSizes[i];
+            if (BufferSize < TagSize)
+                TagSize = BufferSize;
+
+            memmove(data, Icc ->TagPtrs[i], TagSize);
+
+            _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+            return TagSize;
+        }
+
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return Icc ->TagSizes[i];
+    }
+
+    // Already readed, or previously set by cmsWriteTag(). We need to serialize that
+    // data to raw in order to maintain consistency.
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    Object = cmsReadTag(hProfile, sig);
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return 0;
+
+    if (Object == NULL) goto Error;
+
+    // Now we need to serialize to a memory block: just use a memory iohandler
+
+    if (data == NULL) {
+        MemIO = cmsOpenIOhandlerFromNULL(cmsGetProfileContextID(hProfile));
+    } else{
+        MemIO = cmsOpenIOhandlerFromMem(cmsGetProfileContextID(hProfile), data, BufferSize, "w");
+    }
+    if (MemIO == NULL) goto Error;
+
+    // Obtain type handling for the tag
+    TypeHandler = Icc ->TagTypeHandlers[i];
+    TagDescriptor = _cmsGetTagDescriptor(Icc-> ContextID, sig);
+    if (TagDescriptor == NULL) {
+        cmsCloseIOhandler(MemIO);
+        goto Error;
+    }
+    
+    if (TypeHandler == NULL) goto Error;
+
+    // Serialize
+    LocalTypeHandler = *TypeHandler;
+    LocalTypeHandler.ContextID  = Icc ->ContextID;
+    LocalTypeHandler.ICCVersion = Icc ->Version;
+
+    if (!_cmsWriteTypeBase(MemIO, TypeHandler ->Signature)) {
+        cmsCloseIOhandler(MemIO);
+        goto Error;
+    }
+
+    if (!LocalTypeHandler.WritePtr(&LocalTypeHandler, MemIO, Object, TagDescriptor ->ElemCount)) {
+        cmsCloseIOhandler(MemIO);
+        goto Error;
+    }
+
+    // Get Size and close
+    rc = MemIO ->Tell(MemIO);
+    cmsCloseIOhandler(MemIO);      // Ignore return code this time
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return rc;
+
+Error:
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return 0;
+}
+
+// Similar to the anterior. This function allows to write directly to the ICC profile any data, without
+// checking anything. As a rule, mixing Raw with cooked doesn't work, so writing a tag as raw and then reading
+// it as cooked without serializing does result into an error. If that is what you want, you will need to dump
+// the profile to memry or disk and then reopen it.
+cmsBool CMSEXPORT cmsWriteRawTag(cmsHPROFILE hProfile, cmsTagSignature sig, const void* data, cmsUInt32Number Size)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    int i;
+
+    if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return 0;
+
+    if (!_cmsNewTag(Icc, sig, &i)) {
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+         return FALSE;
+    }
+
+    // Mark the tag as being written as RAW
+    Icc ->TagSaveAsRaw[i] = TRUE;
+    Icc ->TagNames[i]     = sig;
+    Icc ->TagLinked[i]    = (cmsTagSignature) 0;
+
+    // Keep a copy of the block
+    Icc ->TagPtrs[i]  = _cmsDupMem(Icc ->ContextID, data, Size);
+    Icc ->TagSizes[i] = Size;
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+
+    if (Icc->TagPtrs[i] == NULL) {           
+           Icc->TagNames[i] = (cmsTagSignature) 0;
+           return FALSE;
+    }
+    return TRUE;
+}
+
+// Using this function you can collapse several tag entries to the same block in the profile
+cmsBool CMSEXPORT cmsLinkTag(cmsHPROFILE hProfile, cmsTagSignature sig, cmsTagSignature dest)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    int i;
+
+     if (!_cmsLockMutex(Icc->ContextID, Icc ->UsrMutex)) return FALSE;
+
+    if (!_cmsNewTag(Icc, sig, &i)) {
+        _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+        return FALSE;
+    }
+
+    // Keep necessary information
+    Icc ->TagSaveAsRaw[i] = FALSE;
+    Icc ->TagNames[i]     = sig;
+    Icc ->TagLinked[i]    = dest;
+
+    Icc ->TagPtrs[i]    = NULL;
+    Icc ->TagSizes[i]   = 0;
+    Icc ->TagOffsets[i] = 0;
+
+    _cmsUnlockMutex(Icc->ContextID, Icc ->UsrMutex);
+    return TRUE;
+}
+
+
+// Returns the tag linked to sig, in the case two tags are sharing same resource
+cmsTagSignature  CMSEXPORT cmsTagLinkedTo(cmsHPROFILE hProfile, cmsTagSignature sig)
+{
+    _cmsICCPROFILE* Icc = (_cmsICCPROFILE*) hProfile;
+    int i;
+
+    // Search for given tag in ICC profile directory
+    i = _cmsSearchTag(Icc, sig, FALSE);
+    if (i < 0) return (cmsTagSignature) 0;                 // Not found, return 0
+
+    return Icc -> TagLinked[i];
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsio1.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsio1.cpp
new file mode 100755
index 0000000000..2888f610e2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsio1.cpp
@@ -0,0 +1,1029 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Read tags using low-level functions, provides necessary glue code to adapt versions, etc.
+
+// LUT tags
+static const cmsTagSignature Device2PCS16[]   =  {cmsSigAToB0Tag,     // Perceptual
+                                                  cmsSigAToB1Tag,     // Relative colorimetric
+                                                  cmsSigAToB2Tag,     // Saturation
+                                                  cmsSigAToB1Tag };   // Absolute colorimetric
+
+static const cmsTagSignature Device2PCSFloat[] = {cmsSigDToB0Tag,     // Perceptual
+                                                  cmsSigDToB1Tag,     // Relative colorimetric
+                                                  cmsSigDToB2Tag,     // Saturation
+                                                  cmsSigDToB3Tag };   // Absolute colorimetric
+
+static const cmsTagSignature PCS2Device16[]    = {cmsSigBToA0Tag,     // Perceptual
+                                                  cmsSigBToA1Tag,     // Relative colorimetric
+                                                  cmsSigBToA2Tag,     // Saturation
+                                                  cmsSigBToA1Tag };   // Absolute colorimetric
+
+static const cmsTagSignature PCS2DeviceFloat[] = {cmsSigBToD0Tag,     // Perceptual
+                                                  cmsSigBToD1Tag,     // Relative colorimetric
+                                                  cmsSigBToD2Tag,     // Saturation
+                                                  cmsSigBToD3Tag };   // Absolute colorimetric
+
+
+// Factors to convert from 1.15 fixed point to 0..1.0 range and vice-versa
+#define InpAdj   (1.0/MAX_ENCODEABLE_XYZ)     // (65536.0/(65535.0*2.0))
+#define OutpAdj  (MAX_ENCODEABLE_XYZ)         // ((2.0*65535.0)/65536.0)
+
+// Several resources for gray conversions.
+static const cmsFloat64Number GrayInputMatrix[] = { (InpAdj*cmsD50X),  (InpAdj*cmsD50Y),  (InpAdj*cmsD50Z) };
+static const cmsFloat64Number OneToThreeInputMatrix[] = { 1, 1, 1 };
+static const cmsFloat64Number PickYMatrix[] = { 0, (OutpAdj*cmsD50Y), 0 };
+static const cmsFloat64Number PickLstarMatrix[] = { 1, 0, 0 };
+
+// Get a media white point fixing some issues found in certain old profiles
+cmsBool  _cmsReadMediaWhitePoint(cmsCIEXYZ* Dest, cmsHPROFILE hProfile)
+{
+    cmsCIEXYZ* Tag;
+
+    _cmsAssert(Dest != NULL);
+
+    Tag = (cmsCIEXYZ*) cmsReadTag(hProfile, cmsSigMediaWhitePointTag);
+
+    // If no wp, take D50
+    if (Tag == NULL) {
+        *Dest = *cmsD50_XYZ();
+        return TRUE;
+    }
+
+    // V2 display profiles should give D50
+    if (cmsGetEncodedICCversion(hProfile) < 0x4000000) {
+
+        if (cmsGetDeviceClass(hProfile) == cmsSigDisplayClass) {
+            *Dest = *cmsD50_XYZ();
+            return TRUE;
+        }
+    }
+
+    // All seems ok
+    *Dest = *Tag;
+    return TRUE;
+}
+
+
+// Chromatic adaptation matrix. Fix some issues as well
+cmsBool  _cmsReadCHAD(cmsMAT3* Dest, cmsHPROFILE hProfile)
+{
+    cmsMAT3* Tag;
+
+    _cmsAssert(Dest != NULL);
+
+    Tag = (cmsMAT3*) cmsReadTag(hProfile, cmsSigChromaticAdaptationTag);
+
+    if (Tag != NULL) {
+        *Dest = *Tag;
+        return TRUE;
+    }
+
+    // No CHAD available, default it to identity
+    _cmsMAT3identity(Dest);
+
+    // V2 display profiles should give D50
+    if (cmsGetEncodedICCversion(hProfile) < 0x4000000) {
+
+        if (cmsGetDeviceClass(hProfile) == cmsSigDisplayClass) {
+
+            cmsCIEXYZ* White = (cmsCIEXYZ*) cmsReadTag(hProfile, cmsSigMediaWhitePointTag);
+
+            if (White == NULL) {
+
+                _cmsMAT3identity(Dest);
+                return TRUE;
+            }
+
+            return _cmsAdaptationMatrix(Dest, NULL, White, cmsD50_XYZ());
+        }
+    }
+
+    return TRUE;
+}
+
+
+// Auxiliary, read colorants as a MAT3 structure. Used by any function that needs a matrix-shaper
+static
+cmsBool ReadICCMatrixRGB2XYZ(cmsMAT3* r, cmsHPROFILE hProfile)
+{
+    cmsCIEXYZ *PtrRed, *PtrGreen, *PtrBlue;
+
+    _cmsAssert(r != NULL);
+
+    PtrRed   = (cmsCIEXYZ *) cmsReadTag(hProfile, cmsSigRedColorantTag);
+    PtrGreen = (cmsCIEXYZ *) cmsReadTag(hProfile, cmsSigGreenColorantTag);
+    PtrBlue  = (cmsCIEXYZ *) cmsReadTag(hProfile, cmsSigBlueColorantTag);
+
+    if (PtrRed == NULL || PtrGreen == NULL || PtrBlue == NULL)
+        return FALSE;
+
+    _cmsVEC3init(&r -> v[0], PtrRed -> X, PtrGreen -> X,  PtrBlue -> X);
+    _cmsVEC3init(&r -> v[1], PtrRed -> Y, PtrGreen -> Y,  PtrBlue -> Y);
+    _cmsVEC3init(&r -> v[2], PtrRed -> Z, PtrGreen -> Z,  PtrBlue -> Z);
+
+    return TRUE;
+}
+
+
+// Gray input pipeline
+static
+cmsPipeline* BuildGrayInputMatrixPipeline(cmsHPROFILE hProfile)
+{
+    cmsToneCurve *GrayTRC;
+    cmsPipeline* Lut;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    GrayTRC = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGrayTRCTag);
+    if (GrayTRC == NULL) return NULL;
+
+    Lut = cmsPipelineAlloc(ContextID, 1, 3);
+    if (Lut == NULL)
+        goto Error;
+
+    if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+        // In this case we implement the profile as an  identity matrix plus 3 tone curves
+        cmsUInt16Number Zero[2] = { 0x8080, 0x8080 };
+        cmsToneCurve* EmptyTab;
+        cmsToneCurve* LabCurves[3];
+
+        EmptyTab = cmsBuildTabulatedToneCurve16(ContextID, 2, Zero);
+
+        if (EmptyTab == NULL)
+            goto Error;
+
+        LabCurves[0] = GrayTRC;
+        LabCurves[1] = EmptyTab;
+        LabCurves[2] = EmptyTab;
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3,  1, OneToThreeInputMatrix, NULL)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 3, LabCurves))) {
+                cmsFreeToneCurve(EmptyTab);
+                goto Error;
+        }
+
+        cmsFreeToneCurve(EmptyTab);
+
+    }
+    else  {
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 1, &GrayTRC)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3,  1, GrayInputMatrix, NULL)))
+            goto Error;
+    }
+
+    return Lut;
+
+Error:
+    cmsFreeToneCurve(GrayTRC);
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// RGB Matrix shaper
+static
+cmsPipeline* BuildRGBInputMatrixShaper(cmsHPROFILE hProfile)
+{
+    cmsPipeline* Lut;
+    cmsMAT3 Mat;
+    cmsToneCurve *Shapes[3];
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    int i, j;
+
+    if (!ReadICCMatrixRGB2XYZ(&Mat, hProfile)) return NULL;
+
+    // XYZ PCS in encoded in 1.15 format, and the matrix output comes in 0..0xffff range, so
+    // we need to adjust the output by a factor of (0x10000/0xffff) to put data in
+    // a 1.16 range, and then a >> 1 to obtain 1.15. The total factor is (65536.0)/(65535.0*2)
+
+    for (i=0; i < 3; i++)
+        for (j=0; j < 3; j++)
+            Mat.v[i].n[j] *= InpAdj;
+
+
+    Shapes[0] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigRedTRCTag);
+    Shapes[1] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGreenTRCTag);
+    Shapes[2] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigBlueTRCTag);
+
+    if (!Shapes[0] || !Shapes[1] || !Shapes[2])
+        return NULL;
+
+    Lut = cmsPipelineAlloc(ContextID, 3, 3);
+    if (Lut != NULL) {
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 3, Shapes)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3, 3, (cmsFloat64Number*) &Mat, NULL)))
+            goto Error;
+
+        // Note that it is certainly possible a single profile would have a LUT based
+        // tag for output working in lab and a matrix-shaper for the fallback cases. 
+        // This is not allowed by the spec, but this code is tolerant to those cases    
+        if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocXYZ2Lab(ContextID)))
+                goto Error;
+        }
+
+    }
+
+    return Lut;
+
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+
+// Read the DToAX tag, adjusting the encoding of Lab or XYZ if neded
+static
+cmsPipeline* _cmsReadFloatInputTag(cmsHPROFILE hProfile, cmsTagSignature tagFloat)
+{
+    cmsContext ContextID       = cmsGetProfileContextID(hProfile);
+    cmsPipeline* Lut           = cmsPipelineDup((cmsPipeline*) cmsReadTag(hProfile, tagFloat));
+    cmsColorSpaceSignature spc = cmsGetColorSpace(hProfile);
+    cmsColorSpaceSignature PCS = cmsGetPCS(hProfile);
+    
+    if (Lut == NULL) return NULL;
+    
+    // input and output of transform are in lcms 0..1 encoding.  If XYZ or Lab spaces are used, 
+    //  these need to be normalized into the appropriate ranges (Lab = 100,0,0, XYZ=1.0,1.0,1.0)
+    if ( spc == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToLabFloat(ContextID)))
+            goto Error;
+    }
+    else if (spc == cmsSigXYZData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToXyzFloat(ContextID)))
+            goto Error;
+    }
+    
+    if ( PCS == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromLabFloat(ContextID)))
+            goto Error;
+    }
+    else if( PCS == cmsSigXYZData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromXyzFloat(ContextID)))
+            goto Error;
+    }
+    
+    return Lut;
+
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+// Read and create a BRAND NEW MPE LUT from a given profile. All stuff dependent of version, etc
+// is adjusted here in order to create a LUT that takes care of all those details.
+// We add intent = 0xffffffff as a way to read matrix shaper always, no matter of other LUT
+cmsPipeline* _cmsReadInputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsTagTypeSignature OriginalType;
+    cmsTagSignature tag16;
+    cmsTagSignature tagFloat;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    // On named color, take the appropriate tag
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        cmsPipeline* Lut;
+        cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*) cmsReadTag(hProfile, cmsSigNamedColor2Tag);
+
+        if (nc == NULL) return NULL;
+
+        Lut = cmsPipelineAlloc(ContextID, 0, 0);
+        if (Lut == NULL) {
+            cmsFreeNamedColorList(nc);
+            return NULL;
+        }
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocNamedColor(nc, TRUE)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID))) {
+            cmsPipelineFree(Lut);
+            return NULL;
+        }
+        return Lut;
+    }
+
+    // This is an attempt to reuse this function to retrieve the matrix-shaper as pipeline no
+    // matter other LUT are present and have precedence. Intent = 0xffffffff can be used for that.
+    if (Intent <= INTENT_ABSOLUTE_COLORIMETRIC) {
+
+        tag16 = Device2PCS16[Intent];
+        tagFloat = Device2PCSFloat[Intent];
+
+        if (cmsIsTag(hProfile, tagFloat)) {  // Float tag takes precedence
+
+            // Floating point LUT are always V4, but the encoding range is no
+            // longer 0..1.0, so we need to add an stage depending on the color space
+            return _cmsReadFloatInputTag(hProfile, tagFloat);
+        }
+
+        // Revert to perceptual if no tag is found
+        if (!cmsIsTag(hProfile, tag16)) {
+            tag16 = Device2PCS16[0];
+        }
+
+        if (cmsIsTag(hProfile, tag16)) { // Is there any LUT-Based table?
+
+            // Check profile version and LUT type. Do the necessary adjustments if needed
+
+            // First read the tag
+            cmsPipeline* Lut = (cmsPipeline*) cmsReadTag(hProfile, tag16);
+            if (Lut == NULL) return NULL;
+
+            // After reading it, we have now info about the original type
+            OriginalType =  _cmsGetTagTrueType(hProfile, tag16);
+
+            // The profile owns the Lut, so we need to copy it
+            Lut = cmsPipelineDup(Lut);
+
+            // We need to adjust data only for Lab16 on output
+            if (OriginalType != cmsSigLut16Type || cmsGetPCS(hProfile) != cmsSigLabData)
+                return Lut;
+
+            // If the input is Lab, add also a conversion at the begin
+            if (cmsGetColorSpace(hProfile) == cmsSigLabData &&
+                !cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV4ToV2(ContextID)))
+                goto Error;
+
+            // Add a matrix for conversion V2 to V4 Lab PCS
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+                goto Error;
+
+            return Lut;
+Error:
+            cmsPipelineFree(Lut);
+            return NULL;
+        }
+    }
+
+    // Lut was not found, try to create a matrix-shaper
+
+    // Check if this is a grayscale profile.
+    if (cmsGetColorSpace(hProfile) == cmsSigGrayData) {
+
+        // if so, build appropriate conversion tables.
+        // The tables are the PCS iluminant, scaled across GrayTRC
+        return BuildGrayInputMatrixPipeline(hProfile);
+    }
+
+    // Not gray, create a normal matrix-shaper
+    return BuildRGBInputMatrixShaper(hProfile);
+}
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Gray output pipeline.
+// XYZ -> Gray or Lab -> Gray. Since we only know the GrayTRC, we need to do some assumptions. Gray component will be
+// given by Y on XYZ PCS and by L* on Lab PCS, Both across inverse TRC curve.
+// The complete pipeline on XYZ is Matrix[3:1] -> Tone curve and in Lab Matrix[3:1] -> Tone Curve as well.
+
+static
+cmsPipeline* BuildGrayOutputPipeline(cmsHPROFILE hProfile)
+{
+    cmsToneCurve *GrayTRC, *RevGrayTRC;
+    cmsPipeline* Lut;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    GrayTRC = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGrayTRCTag);
+    if (GrayTRC == NULL) return NULL;
+
+    RevGrayTRC = cmsReverseToneCurve(GrayTRC);
+    if (RevGrayTRC == NULL) return NULL;
+
+    Lut = cmsPipelineAlloc(ContextID, 3, 1);
+    if (Lut == NULL) {
+        cmsFreeToneCurve(RevGrayTRC);
+        return NULL;
+    }
+
+    if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 1,  3, PickLstarMatrix, NULL)))
+            goto Error;
+    }
+    else  {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 1,  3, PickYMatrix, NULL)))
+            goto Error;
+    }
+
+    if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 1, &RevGrayTRC)))
+        goto Error;
+
+    cmsFreeToneCurve(RevGrayTRC);
+    return Lut;
+
+Error:
+    cmsFreeToneCurve(RevGrayTRC);
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+static
+cmsPipeline* BuildRGBOutputMatrixShaper(cmsHPROFILE hProfile)
+{
+    cmsPipeline* Lut;
+    cmsToneCurve *Shapes[3], *InvShapes[3];
+    cmsMAT3 Mat, Inv;
+    int i, j;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    if (!ReadICCMatrixRGB2XYZ(&Mat, hProfile))
+        return NULL;
+
+    if (!_cmsMAT3inverse(&Mat, &Inv))
+        return NULL;
+
+    // XYZ PCS in encoded in 1.15 format, and the matrix input should come in 0..0xffff range, so
+    // we need to adjust the input by a << 1 to obtain a 1.16 fixed and then by a factor of
+    // (0xffff/0x10000) to put data in 0..0xffff range. Total factor is (2.0*65535.0)/65536.0;
+
+    for (i=0; i < 3; i++)
+        for (j=0; j < 3; j++)
+            Inv.v[i].n[j] *= OutpAdj;
+
+    Shapes[0] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigRedTRCTag);
+    Shapes[1] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigGreenTRCTag);
+    Shapes[2] = (cmsToneCurve *) cmsReadTag(hProfile, cmsSigBlueTRCTag);
+
+    if (!Shapes[0] || !Shapes[1] || !Shapes[2])
+        return NULL;
+
+    InvShapes[0] = cmsReverseToneCurve(Shapes[0]);
+    InvShapes[1] = cmsReverseToneCurve(Shapes[1]);
+    InvShapes[2] = cmsReverseToneCurve(Shapes[2]);
+
+    if (!InvShapes[0] || !InvShapes[1] || !InvShapes[2]) {
+        return NULL;
+    }
+
+    Lut = cmsPipelineAlloc(ContextID, 3, 3);
+    if (Lut != NULL) {
+
+        // Note that it is certainly possible a single profile would have a LUT based
+        // tag for output working in lab and a matrix-shaper for the fallback cases. 
+        // This is not allowed by the spec, but this code is tolerant to those cases    
+        if (cmsGetPCS(hProfile) == cmsSigLabData) {
+
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLab2XYZ(ContextID)))
+                goto Error;
+        }
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocMatrix(ContextID, 3, 3, (cmsFloat64Number*) &Inv, NULL)) ||
+            !cmsPipelineInsertStage(Lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, 3, InvShapes)))
+            goto Error;
+    }
+
+    cmsFreeToneCurveTriple(InvShapes);
+    return Lut;
+Error:
+    cmsFreeToneCurveTriple(InvShapes);
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+
+// Change CLUT interpolation to trilinear
+static
+void ChangeInterpolationToTrilinear(cmsPipeline* Lut)
+{
+    cmsStage* Stage;
+
+    for (Stage = cmsPipelineGetPtrToFirstStage(Lut);
+        Stage != NULL;
+        Stage = cmsStageNext(Stage)) {
+
+            if (cmsStageType(Stage) == cmsSigCLutElemType) {
+
+                _cmsStageCLutData* CLUT = (_cmsStageCLutData*) Stage ->Data;
+
+                CLUT ->Params->dwFlags |= CMS_LERP_FLAGS_TRILINEAR;
+                _cmsSetInterpolationRoutine(Lut->ContextID, CLUT ->Params);
+            }
+    }
+}
+
+
+// Read the DToAX tag, adjusting the encoding of Lab or XYZ if neded
+static
+cmsPipeline* _cmsReadFloatOutputTag(cmsHPROFILE hProfile, cmsTagSignature tagFloat)
+{
+    cmsContext ContextID       = cmsGetProfileContextID(hProfile);
+    cmsPipeline* Lut           = cmsPipelineDup((cmsPipeline*) cmsReadTag(hProfile, tagFloat));
+    cmsColorSpaceSignature PCS = cmsGetPCS(hProfile);
+    cmsColorSpaceSignature dataSpace = cmsGetColorSpace(hProfile);
+    
+    if (Lut == NULL) return NULL;
+    
+    // If PCS is Lab or XYZ, the floating point tag is accepting data in the space encoding,
+    // and since the formatter has already accommodated to 0..1.0, we should undo this change
+    if ( PCS == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToLabFloat(ContextID)))
+            goto Error;
+    }
+    else
+        if (PCS == cmsSigXYZData)
+        {
+            if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToXyzFloat(ContextID)))
+                goto Error;
+        }
+    
+    // the output can be Lab or XYZ, in which case normalisation is needed on the end of the pipeline
+    if ( dataSpace == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromLabFloat(ContextID)))
+            goto Error;
+    }
+    else if (dataSpace == cmsSigXYZData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromXyzFloat(ContextID)))
+            goto Error;
+    }
+    
+    return Lut;
+
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// Create an output MPE LUT from agiven profile. Version mismatches are handled here
+cmsPipeline* _cmsReadOutputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsTagTypeSignature OriginalType;
+    cmsTagSignature tag16;
+    cmsTagSignature tagFloat;
+    cmsContext ContextID  = cmsGetProfileContextID(hProfile);
+
+
+    if (Intent <= INTENT_ABSOLUTE_COLORIMETRIC) {
+
+        tag16 = PCS2Device16[Intent];
+        tagFloat = PCS2DeviceFloat[Intent];
+
+        if (cmsIsTag(hProfile, tagFloat)) {  // Float tag takes precedence
+
+            // Floating point LUT are always V4
+            return _cmsReadFloatOutputTag(hProfile, tagFloat);
+        }
+
+        // Revert to perceptual if no tag is found
+        if (!cmsIsTag(hProfile, tag16)) {
+            tag16 = PCS2Device16[0];
+        }
+
+        if (cmsIsTag(hProfile, tag16)) { // Is there any LUT-Based table?
+
+            // Check profile version and LUT type. Do the necessary adjustments if needed
+
+            // First read the tag
+            cmsPipeline* Lut = (cmsPipeline*) cmsReadTag(hProfile, tag16);
+            if (Lut == NULL) return NULL;
+
+            // After reading it, we have info about the original type
+            OriginalType =  _cmsGetTagTrueType(hProfile, tag16);
+
+            // The profile owns the Lut, so we need to copy it
+            Lut = cmsPipelineDup(Lut);
+            if (Lut == NULL) return NULL;
+
+            // Now it is time for a controversial stuff. I found that for 3D LUTS using
+            // Lab used as indexer space,  trilinear interpolation should be used
+            if (cmsGetPCS(hProfile) == cmsSigLabData)
+                ChangeInterpolationToTrilinear(Lut);
+
+            // We need to adjust data only for Lab and Lut16 type
+            if (OriginalType != cmsSigLut16Type || cmsGetPCS(hProfile) != cmsSigLabData)
+                return Lut;
+
+            // Add a matrix for conversion V4 to V2 Lab PCS
+            if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV4ToV2(ContextID)))
+                goto Error;
+
+            // If the output is Lab, add also a conversion at the end
+            if (cmsGetColorSpace(hProfile) == cmsSigLabData)
+                if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+                    goto Error;
+
+            return Lut;
+Error:
+            cmsPipelineFree(Lut);
+            return NULL;
+        }
+    }
+
+    // Lut not found, try to create a matrix-shaper
+
+    // Check if this is a grayscale profile.
+    if (cmsGetColorSpace(hProfile) == cmsSigGrayData) {
+
+        // if so, build appropriate conversion tables.
+        // The tables are the PCS iluminant, scaled across GrayTRC
+        return BuildGrayOutputPipeline(hProfile);
+    }
+
+    // Not gray, create a normal matrix-shaper, which only operates in XYZ space  
+    return BuildRGBOutputMatrixShaper(hProfile);
+}
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Read the AToD0 tag, adjusting the encoding of Lab or XYZ if neded
+static
+cmsPipeline* _cmsReadFloatDevicelinkTag(cmsHPROFILE hProfile, cmsTagSignature tagFloat)
+{
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsPipeline* Lut = cmsPipelineDup((cmsPipeline*)cmsReadTag(hProfile, tagFloat));
+    cmsColorSpaceSignature PCS = cmsGetPCS(hProfile);
+    cmsColorSpaceSignature spc = cmsGetColorSpace(hProfile);
+
+    if (Lut == NULL) return NULL;
+
+    if (spc == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToLabFloat(ContextID)))
+            goto Error;
+    }
+    else
+        if (spc == cmsSigXYZData)
+        {
+            if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageNormalizeToXyzFloat(ContextID)))
+                goto Error;
+        }
+
+    if (PCS == cmsSigLabData)
+    {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromLabFloat(ContextID)))
+            goto Error;
+    }
+    else
+        if (PCS == cmsSigXYZData)
+        {
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageNormalizeFromXyzFloat(ContextID)))
+                goto Error;
+        }
+
+    return Lut;
+Error:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// This one includes abstract profiles as well. Matrix-shaper cannot be obtained on that device class. The
+// tag name here may default to AToB0
+cmsPipeline* _cmsReadDevicelinkLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsPipeline* Lut;
+    cmsTagTypeSignature OriginalType;
+    cmsTagSignature tag16;
+    cmsTagSignature tagFloat;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+
+    if (Intent > INTENT_ABSOLUTE_COLORIMETRIC)
+        return NULL;
+
+    tag16 = Device2PCS16[Intent];
+    tagFloat = Device2PCSFloat[Intent];
+
+    // On named color, take the appropriate tag
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*)cmsReadTag(hProfile, cmsSigNamedColor2Tag);
+
+        if (nc == NULL) return NULL;
+
+        Lut = cmsPipelineAlloc(ContextID, 0, 0);
+        if (Lut == NULL)
+            goto Error;
+
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocNamedColor(nc, FALSE)))
+            goto Error;
+
+        if (cmsGetColorSpace(hProfile) == cmsSigLabData)
+            if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+                goto Error;
+
+        return Lut;
+    Error:
+        cmsPipelineFree(Lut);
+        cmsFreeNamedColorList(nc);
+        return NULL;
+    }
+
+
+    if (cmsIsTag(hProfile, tagFloat)) {  // Float tag takes precedence
+
+        // Floating point LUT are always V
+        return _cmsReadFloatDevicelinkTag(hProfile, tagFloat);
+    }
+
+    tagFloat = Device2PCSFloat[0];
+    if (cmsIsTag(hProfile, tagFloat)) {
+
+        return cmsPipelineDup((cmsPipeline*)cmsReadTag(hProfile, tagFloat));
+    }
+
+    if (!cmsIsTag(hProfile, tag16)) {  // Is there any LUT-Based table?
+
+        tag16 = Device2PCS16[0];
+        if (!cmsIsTag(hProfile, tag16)) return NULL;
+    }
+
+    // Check profile version and LUT type. Do the necessary adjustments if needed
+
+    // Read the tag
+    Lut = (cmsPipeline*)cmsReadTag(hProfile, tag16);
+    if (Lut == NULL) return NULL;
+
+    // The profile owns the Lut, so we need to copy it
+    Lut = cmsPipelineDup(Lut);
+    if (Lut == NULL) return NULL;
+
+    // Now it is time for a controversial stuff. I found that for 3D LUTS using
+    // Lab used as indexer space,  trilinear interpolation should be used
+    if (cmsGetPCS(hProfile) == cmsSigLabData)
+        ChangeInterpolationToTrilinear(Lut);
+
+    // After reading it, we have info about the original type
+    OriginalType = _cmsGetTagTrueType(hProfile, tag16);
+
+    // We need to adjust data for Lab16 on output
+    if (OriginalType != cmsSigLut16Type) return Lut;
+
+    // Here it is possible to get Lab on both sides
+
+    if (cmsGetColorSpace(hProfile) == cmsSigLabData) {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_BEGIN, _cmsStageAllocLabV4ToV2(ContextID)))
+            goto Error2;
+    }
+
+    if (cmsGetPCS(hProfile) == cmsSigLabData) {
+        if (!cmsPipelineInsertStage(Lut, cmsAT_END, _cmsStageAllocLabV2ToV4(ContextID)))
+            goto Error2;
+    }
+
+    return Lut;
+
+Error2:
+    cmsPipelineFree(Lut);
+    return NULL;
+}
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Returns TRUE if the profile is implemented as matrix-shaper
+cmsBool  CMSEXPORT cmsIsMatrixShaper(cmsHPROFILE hProfile)
+{
+    switch (cmsGetColorSpace(hProfile)) {
+
+    case cmsSigGrayData:
+
+        return cmsIsTag(hProfile, cmsSigGrayTRCTag);
+
+    case cmsSigRgbData:
+
+        return (cmsIsTag(hProfile, cmsSigRedColorantTag) &&
+                cmsIsTag(hProfile, cmsSigGreenColorantTag) &&
+                cmsIsTag(hProfile, cmsSigBlueColorantTag) &&
+                cmsIsTag(hProfile, cmsSigRedTRCTag) &&
+                cmsIsTag(hProfile, cmsSigGreenTRCTag) &&
+                cmsIsTag(hProfile, cmsSigBlueTRCTag));
+
+    default:
+
+        return FALSE;
+    }
+}
+
+// Returns TRUE if the intent is implemented as CLUT
+cmsBool  CMSEXPORT cmsIsCLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number UsedDirection)
+{
+    const cmsTagSignature* TagTable;
+
+    // For devicelinks, the supported intent is that one stated in the header
+    if (cmsGetDeviceClass(hProfile) == cmsSigLinkClass) {
+            return (cmsGetHeaderRenderingIntent(hProfile) == Intent);
+    }
+
+    switch (UsedDirection) {
+
+       case LCMS_USED_AS_INPUT: TagTable = Device2PCS16; break;
+       case LCMS_USED_AS_OUTPUT:TagTable = PCS2Device16; break;
+
+       // For proofing, we need rel. colorimetric in output. Let's do some recursion
+       case LCMS_USED_AS_PROOF:
+           return cmsIsIntentSupported(hProfile, Intent, LCMS_USED_AS_INPUT) &&
+                  cmsIsIntentSupported(hProfile, INTENT_RELATIVE_COLORIMETRIC, LCMS_USED_AS_OUTPUT);
+
+       default:
+           cmsSignalError(cmsGetProfileContextID(hProfile), cmsERROR_RANGE, "Unexpected direction (%d)", UsedDirection);
+           return FALSE;
+    }
+
+    return cmsIsTag(hProfile, TagTable[Intent]);
+
+}
+
+
+// Return info about supported intents
+cmsBool  CMSEXPORT cmsIsIntentSupported(cmsHPROFILE hProfile,
+                                        cmsUInt32Number Intent, cmsUInt32Number UsedDirection)
+{
+
+    if (cmsIsCLUT(hProfile, Intent, UsedDirection)) return TRUE;
+
+    // Is there any matrix-shaper? If so, the intent is supported. This is a bit odd, since V2 matrix shaper
+    // does not fully support relative colorimetric because they cannot deal with non-zero black points, but
+    // many profiles claims that, and this is certainly not true for V4 profiles. Lets answer "yes" no matter
+    // the accuracy would be less than optimal in rel.col and v2 case.
+
+    return cmsIsMatrixShaper(hProfile);
+}
+
+
+// ---------------------------------------------------------------------------------------------------------------
+
+// Read both, profile sequence description and profile sequence id if present. Then combine both to
+// create qa unique structure holding both. Shame on ICC to store things in such complicated way.
+cmsSEQ* _cmsReadProfileSequence(cmsHPROFILE hProfile)
+{
+    cmsSEQ* ProfileSeq;
+    cmsSEQ* ProfileId;
+    cmsSEQ* NewSeq;
+    cmsUInt32Number i;
+
+    // Take profile sequence description first
+    ProfileSeq = (cmsSEQ*) cmsReadTag(hProfile, cmsSigProfileSequenceDescTag);
+
+    // Take profile sequence ID
+    ProfileId  = (cmsSEQ*) cmsReadTag(hProfile, cmsSigProfileSequenceIdTag);
+
+    if (ProfileSeq == NULL && ProfileId == NULL) return NULL;
+
+    if (ProfileSeq == NULL) return cmsDupProfileSequenceDescription(ProfileId);
+    if (ProfileId  == NULL) return cmsDupProfileSequenceDescription(ProfileSeq);
+
+    // We have to mix both together. For that they must agree
+    if (ProfileSeq ->n != ProfileId ->n) return cmsDupProfileSequenceDescription(ProfileSeq);
+
+    NewSeq = cmsDupProfileSequenceDescription(ProfileSeq);
+
+    // Ok, proceed to the mixing
+    if (NewSeq != NULL) {
+        for (i=0; i < ProfileSeq ->n; i++) {
+
+            memmove(&NewSeq ->seq[i].ProfileID, &ProfileId ->seq[i].ProfileID, sizeof(cmsProfileID));
+            NewSeq ->seq[i].Description = cmsMLUdup(ProfileId ->seq[i].Description);
+        }
+    }
+    return NewSeq;
+}
+
+// Dump the contents of profile sequence in both tags (if v4 available)
+cmsBool _cmsWriteProfileSequence(cmsHPROFILE hProfile, const cmsSEQ* seq)
+{
+    if (!cmsWriteTag(hProfile, cmsSigProfileSequenceDescTag, seq)) return FALSE;
+
+    if (cmsGetEncodedICCversion(hProfile) >= 0x4000000) {
+
+            if (!cmsWriteTag(hProfile, cmsSigProfileSequenceIdTag, seq)) return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+// Auxiliary, read and duplicate a MLU if found.
+static
+cmsMLU* GetMLUFromProfile(cmsHPROFILE h, cmsTagSignature sig)
+{
+    cmsMLU* mlu = (cmsMLU*) cmsReadTag(h, sig);
+    if (mlu == NULL) return NULL;
+
+    return cmsMLUdup(mlu);
+}
+
+// Create a sequence description out of an array of profiles
+cmsSEQ* _cmsCompileProfileSequence(cmsContext ContextID, cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[])
+{
+    cmsUInt32Number i;
+    cmsSEQ* seq = cmsAllocProfileSequenceDescription(ContextID, nProfiles);
+
+    if (seq == NULL) return NULL;
+
+    for (i=0; i < nProfiles; i++) {
+
+        cmsPSEQDESC* ps = &seq ->seq[i];
+        cmsHPROFILE h = hProfiles[i];
+        cmsTechnologySignature* techpt;
+
+        cmsGetHeaderAttributes(h, &ps ->attributes);
+        cmsGetHeaderProfileID(h, ps ->ProfileID.ID8);
+        ps ->deviceMfg   = cmsGetHeaderManufacturer(h);
+        ps ->deviceModel = cmsGetHeaderModel(h);
+
+        techpt = (cmsTechnologySignature*) cmsReadTag(h, cmsSigTechnologyTag);
+        if (techpt == NULL)
+            ps ->technology   =  (cmsTechnologySignature) 0;
+        else
+            ps ->technology   = *techpt;
+
+        ps ->Manufacturer = GetMLUFromProfile(h,  cmsSigDeviceMfgDescTag);
+        ps ->Model        = GetMLUFromProfile(h,  cmsSigDeviceModelDescTag);
+        ps ->Description  = GetMLUFromProfile(h, cmsSigProfileDescriptionTag);
+
+    }
+
+    return seq;
+}
+
+// -------------------------------------------------------------------------------------------------------------------
+
+
+static
+const cmsMLU* GetInfo(cmsHPROFILE hProfile, cmsInfoType Info)
+{
+    cmsTagSignature sig;
+
+    switch (Info) {
+
+    case cmsInfoDescription:
+        sig = cmsSigProfileDescriptionTag;
+        break;
+
+    case cmsInfoManufacturer:
+        sig = cmsSigDeviceMfgDescTag;
+        break;
+
+    case cmsInfoModel:
+        sig = cmsSigDeviceModelDescTag;
+         break;
+
+    case cmsInfoCopyright:
+        sig = cmsSigCopyrightTag;
+        break;
+
+    default: return NULL;
+    }
+
+
+    return (cmsMLU*) cmsReadTag(hProfile, sig);
+}
+
+
+
+cmsUInt32Number CMSEXPORT cmsGetProfileInfo(cmsHPROFILE hProfile, cmsInfoType Info,
+                                            const char LanguageCode[3], const char CountryCode[3],
+                                            wchar_t* Buffer, cmsUInt32Number BufferSize)
+{
+    const cmsMLU* mlu = GetInfo(hProfile, Info);
+    if (mlu == NULL) return 0;
+
+    return cmsMLUgetWide(mlu, LanguageCode, CountryCode, Buffer, BufferSize);
+}
+
+
+cmsUInt32Number  CMSEXPORT cmsGetProfileInfoASCII(cmsHPROFILE hProfile, cmsInfoType Info,
+                                                          const char LanguageCode[3], const char CountryCode[3],
+                                                          char* Buffer, cmsUInt32Number BufferSize)
+{
+    const cmsMLU* mlu = GetInfo(hProfile, Info);
+    if (mlu == NULL) return 0;
+
+    return cmsMLUgetASCII(mlu, LanguageCode, CountryCode, Buffer, BufferSize);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmslut.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmslut.cpp
new file mode 100755
index 0000000000..5518f6a86b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmslut.cpp
@@ -0,0 +1,1843 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// Allocates an empty multi profile element
+cmsStage* CMSEXPORT _cmsStageAllocPlaceholder(cmsContext ContextID,
+                                cmsStageSignature Type,
+                                cmsUInt32Number InputChannels,
+                                cmsUInt32Number OutputChannels,
+                                _cmsStageEvalFn     EvalPtr,
+                                _cmsStageDupElemFn  DupElemPtr,
+                                _cmsStageFreeElemFn FreePtr,
+                                void*             Data)
+{
+    cmsStage* ph = (cmsStage*) _cmsMallocZero(ContextID, sizeof(cmsStage));
+
+    if (ph == NULL) return NULL;
+
+
+    ph ->ContextID = ContextID;
+
+    ph ->Type       = Type;
+    ph ->Implements = Type;   // By default, no clue on what is implementing
+
+    ph ->InputChannels  = InputChannels;
+    ph ->OutputChannels = OutputChannels;
+    ph ->EvalPtr        = EvalPtr;
+    ph ->DupElemPtr     = DupElemPtr;
+    ph ->FreePtr        = FreePtr;
+    ph ->Data           = Data;
+
+    return ph;
+}
+
+
+static
+void EvaluateIdentity(const cmsFloat32Number In[],
+                            cmsFloat32Number Out[],
+                      const cmsStage *mpe)
+{
+    memmove(Out, In, mpe ->InputChannels * sizeof(cmsFloat32Number));
+}
+
+
+cmsStage* CMSEXPORT cmsStageAllocIdentity(cmsContext ContextID, cmsUInt32Number nChannels)
+{
+    return _cmsStageAllocPlaceholder(ContextID,
+                                   cmsSigIdentityElemType,
+                                   nChannels, nChannels,
+                                   EvaluateIdentity,
+                                   NULL,
+                                   NULL,
+                                   NULL);
+ }
+
+// Conversion functions. From floating point to 16 bits
+static
+void FromFloatTo16(const cmsFloat32Number In[], cmsUInt16Number Out[], cmsUInt32Number n)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < n; i++) {
+        Out[i] = _cmsQuickSaturateWord(In[i] * 65535.0);
+    }
+}
+
+// From 16 bits to floating point
+static
+void From16ToFloat(const cmsUInt16Number In[], cmsFloat32Number Out[], cmsUInt32Number n)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < n; i++) {
+        Out[i] = (cmsFloat32Number) In[i] / 65535.0F;
+    }
+}
+
+
+// This function is quite useful to analyze the structure of a LUT and retrieve the MPE elements
+// that conform the LUT. It should be called with the LUT, the number of expected elements and
+// then a list of expected types followed with a list of cmsFloat64Number pointers to MPE elements. If
+// the function founds a match with current pipeline, it fills the pointers and returns TRUE
+// if not, returns FALSE without touching anything. Setting pointers to NULL does bypass
+// the storage process.
+cmsBool  CMSEXPORT cmsPipelineCheckAndRetreiveStages(const cmsPipeline* Lut, cmsUInt32Number n, ...)
+{
+    va_list args;
+    cmsUInt32Number i;
+    cmsStage* mpe;
+    cmsStageSignature Type;
+    void** ElemPtr;
+
+    // Make sure same number of elements
+    if (cmsPipelineStageCount(Lut) != n) return FALSE;
+
+    va_start(args, n);
+
+    // Iterate across asked types
+    mpe = Lut ->Elements;
+    for (i=0; i < n; i++) {
+
+        // Get asked type. cmsStageSignature is promoted to int by compiler
+        Type  = (cmsStageSignature)va_arg(args, int);
+        if (mpe ->Type != Type) {
+
+            va_end(args);       // Mismatch. We are done.
+            return FALSE;
+        }
+        mpe = mpe ->Next;
+    }
+
+    // Found a combination, fill pointers if not NULL
+    mpe = Lut ->Elements;
+    for (i=0; i < n; i++) {
+
+        ElemPtr = va_arg(args, void**);
+        if (ElemPtr != NULL)
+            *ElemPtr = mpe;
+
+        mpe = mpe ->Next;
+    }
+
+    va_end(args);
+    return TRUE;
+}
+
+// Below there are implementations for several types of elements. Each type may be implemented by a
+// evaluation function, a duplication function, a function to free resources and a constructor.
+
+// *************************************************************************************************
+// Type cmsSigCurveSetElemType (curves)
+// *************************************************************************************************
+
+cmsToneCurve** _cmsStageGetPtrToCurveSet(const cmsStage* mpe)
+{
+    _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) mpe ->Data;
+
+    return Data ->TheCurves;
+}
+
+static
+void EvaluateCurves(const cmsFloat32Number In[],
+                    cmsFloat32Number Out[],
+                    const cmsStage *mpe)
+{
+    _cmsStageToneCurvesData* Data;
+    cmsUInt32Number i;
+
+    _cmsAssert(mpe != NULL);
+
+    Data = (_cmsStageToneCurvesData*) mpe ->Data;
+    if (Data == NULL) return;
+
+    if (Data ->TheCurves == NULL) return;
+
+    for (i=0; i < Data ->nCurves; i++) {
+        Out[i] = cmsEvalToneCurveFloat(Data ->TheCurves[i], In[i]);
+    }
+}
+
+static
+void CurveSetElemTypeFree(cmsStage* mpe)
+{
+    _cmsStageToneCurvesData* Data;
+    cmsUInt32Number i;
+
+    _cmsAssert(mpe != NULL);
+
+    Data = (_cmsStageToneCurvesData*) mpe ->Data;
+    if (Data == NULL) return;
+
+    if (Data ->TheCurves != NULL) {
+        for (i=0; i < Data ->nCurves; i++) {
+            if (Data ->TheCurves[i] != NULL)
+                cmsFreeToneCurve(Data ->TheCurves[i]);
+        }
+    }
+    _cmsFree(mpe ->ContextID, Data ->TheCurves);
+    _cmsFree(mpe ->ContextID, Data);
+}
+
+
+static
+void* CurveSetDup(cmsStage* mpe)
+{
+    _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) mpe ->Data;
+    _cmsStageToneCurvesData* NewElem;
+    cmsUInt32Number i;
+
+    NewElem = (_cmsStageToneCurvesData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageToneCurvesData));
+    if (NewElem == NULL) return NULL;
+
+    NewElem ->nCurves   = Data ->nCurves;
+    NewElem ->TheCurves = (cmsToneCurve**) _cmsCalloc(mpe ->ContextID, NewElem ->nCurves, sizeof(cmsToneCurve*));
+
+    if (NewElem ->TheCurves == NULL) goto Error;
+
+    for (i=0; i < NewElem ->nCurves; i++) {
+
+        // Duplicate each curve. It may fail.
+        NewElem ->TheCurves[i] = cmsDupToneCurve(Data ->TheCurves[i]);
+        if (NewElem ->TheCurves[i] == NULL) goto Error;
+
+
+    }
+    return (void*) NewElem;
+
+Error:
+
+    if (NewElem ->TheCurves != NULL) {
+        for (i=0; i < NewElem ->nCurves; i++) {
+            if (NewElem ->TheCurves[i])
+                cmsFreeToneCurve(NewElem ->TheCurves[i]);
+        }
+    }
+    _cmsFree(mpe ->ContextID, NewElem ->TheCurves);
+    _cmsFree(mpe ->ContextID, NewElem);
+    return NULL;
+}
+
+
+// Curves == NULL forces identity curves
+cmsStage* CMSEXPORT cmsStageAllocToneCurves(cmsContext ContextID, cmsUInt32Number nChannels, cmsToneCurve* const Curves[])
+{
+    cmsUInt32Number i;
+    _cmsStageToneCurvesData* NewElem;
+    cmsStage* NewMPE;
+
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCurveSetElemType, nChannels, nChannels,
+                                     EvaluateCurves, CurveSetDup, CurveSetElemTypeFree, NULL );
+    if (NewMPE == NULL) return NULL;
+
+    NewElem = (_cmsStageToneCurvesData*) _cmsMallocZero(ContextID, sizeof(_cmsStageToneCurvesData));
+    if (NewElem == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+
+    NewElem ->nCurves   = nChannels;
+    NewElem ->TheCurves = (cmsToneCurve**) _cmsCalloc(ContextID, nChannels, sizeof(cmsToneCurve*));
+    if (NewElem ->TheCurves == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    for (i=0; i < nChannels; i++) {
+
+        if (Curves == NULL) {
+            NewElem ->TheCurves[i] = cmsBuildGamma(ContextID, 1.0);
+        }
+        else {
+            NewElem ->TheCurves[i] = cmsDupToneCurve(Curves[i]);
+        }
+
+        if (NewElem ->TheCurves[i] == NULL) {
+            cmsStageFree(NewMPE);
+            return NULL;
+        }
+
+    }
+
+   return NewMPE;
+}
+
+
+// Create a bunch of identity curves
+cmsStage* _cmsStageAllocIdentityCurves(cmsContext ContextID, cmsUInt32Number nChannels)
+{
+    cmsStage* mpe = cmsStageAllocToneCurves(ContextID, nChannels, NULL);
+
+    if (mpe == NULL) return NULL;
+    mpe ->Implements = cmsSigIdentityElemType;
+    return mpe;
+}
+
+
+// *************************************************************************************************
+// Type cmsSigMatrixElemType (Matrices)
+// *************************************************************************************************
+
+
+// Special care should be taken here because precision loss. A temporary cmsFloat64Number buffer is being used
+static
+void EvaluateMatrix(const cmsFloat32Number In[],
+                    cmsFloat32Number Out[],
+                    const cmsStage *mpe)
+{
+    cmsUInt32Number i, j;
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+    cmsFloat64Number Tmp;
+
+    // Input is already in 0..1.0 notation
+    for (i=0; i < mpe ->OutputChannels; i++) {
+
+        Tmp = 0;
+        for (j=0; j < mpe->InputChannels; j++) {
+            Tmp += In[j] * Data->Double[i*mpe->InputChannels + j];
+        }
+
+        if (Data ->Offset != NULL)
+            Tmp += Data->Offset[i];
+
+        Out[i] = (cmsFloat32Number) Tmp;
+    }
+
+
+    // Output in 0..1.0 domain
+}
+
+
+// Duplicate a yet-existing matrix element
+static
+void* MatrixElemDup(cmsStage* mpe)
+{
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+    _cmsStageMatrixData* NewElem;
+    cmsUInt32Number sz;
+
+    NewElem = (_cmsStageMatrixData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageMatrixData));
+    if (NewElem == NULL) return NULL;
+
+    sz = mpe ->InputChannels * mpe ->OutputChannels;
+
+    NewElem ->Double = (cmsFloat64Number*) _cmsDupMem(mpe ->ContextID, Data ->Double, sz * sizeof(cmsFloat64Number)) ;
+
+    if (Data ->Offset)
+        NewElem ->Offset = (cmsFloat64Number*) _cmsDupMem(mpe ->ContextID,
+                                                Data ->Offset, mpe -> OutputChannels * sizeof(cmsFloat64Number)) ;
+
+    return (void*) NewElem;
+}
+
+
+static
+void MatrixElemTypeFree(cmsStage* mpe)
+{
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+    if (Data == NULL)
+        return;
+    if (Data ->Double)
+        _cmsFree(mpe ->ContextID, Data ->Double);
+
+    if (Data ->Offset)
+        _cmsFree(mpe ->ContextID, Data ->Offset);
+
+    _cmsFree(mpe ->ContextID, mpe ->Data);
+}
+
+
+
+cmsStage*  CMSEXPORT cmsStageAllocMatrix(cmsContext ContextID, cmsUInt32Number Rows, cmsUInt32Number Cols,
+                                     const cmsFloat64Number* Matrix, const cmsFloat64Number* Offset)
+{
+    cmsUInt32Number i, n;
+    _cmsStageMatrixData* NewElem;
+    cmsStage* NewMPE;
+
+    n = Rows * Cols;
+
+    // Check for overflow
+    if (n == 0) return NULL;
+    if (n >= UINT_MAX / Cols) return NULL;
+    if (n >= UINT_MAX / Rows) return NULL;
+    if (n < Rows || n < Cols) return NULL;
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigMatrixElemType, Cols, Rows,
+                                     EvaluateMatrix, MatrixElemDup, MatrixElemTypeFree, NULL );
+    if (NewMPE == NULL) return NULL;
+
+
+    NewElem = (_cmsStageMatrixData*) _cmsMallocZero(ContextID, sizeof(_cmsStageMatrixData));
+    if (NewElem == NULL) return NULL;
+
+
+    NewElem ->Double = (cmsFloat64Number*) _cmsCalloc(ContextID, n, sizeof(cmsFloat64Number));
+
+    if (NewElem->Double == NULL) {
+        MatrixElemTypeFree(NewMPE);
+        return NULL;
+    }
+
+    for (i=0; i < n; i++) {
+        NewElem ->Double[i] = Matrix[i];
+    }
+
+
+    if (Offset != NULL) {
+
+        NewElem ->Offset = (cmsFloat64Number*) _cmsCalloc(ContextID, Rows, sizeof(cmsFloat64Number));
+        if (NewElem->Offset == NULL) {
+           MatrixElemTypeFree(NewMPE);
+           return NULL;
+        }
+
+        for (i=0; i < Rows; i++) {
+                NewElem ->Offset[i] = Offset[i];
+        }
+
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+    return NewMPE;
+}
+
+
+// *************************************************************************************************
+// Type cmsSigCLutElemType
+// *************************************************************************************************
+
+
+// Evaluate in true floating point
+static
+void EvaluateCLUTfloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+
+    Data -> Params ->Interpolation.LerpFloat(In, Out, Data->Params);
+}
+
+
+// Convert to 16 bits, evaluate, and back to floating point
+static
+void EvaluateCLUTfloatIn16(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+    cmsUInt16Number In16[MAX_STAGE_CHANNELS], Out16[MAX_STAGE_CHANNELS];
+
+    _cmsAssert(mpe ->InputChannels  <= MAX_STAGE_CHANNELS);
+    _cmsAssert(mpe ->OutputChannels <= MAX_STAGE_CHANNELS);
+
+    FromFloatTo16(In, In16, mpe ->InputChannels);
+    Data -> Params ->Interpolation.Lerp16(In16, Out16, Data->Params);
+    From16ToFloat(Out16, Out,  mpe ->OutputChannels);
+}
+
+
+// Given an hypercube of b dimensions, with Dims[] number of nodes by dimension, calculate the total amount of nodes
+static
+cmsUInt32Number CubeSize(const cmsUInt32Number Dims[], cmsUInt32Number b)
+{
+    cmsUInt32Number rv, dim;
+
+    _cmsAssert(Dims != NULL);
+
+    for (rv = 1; b > 0; b--) {
+
+        dim = Dims[b-1];
+        if (dim == 0) return 0;  // Error
+
+        rv *= dim;
+
+        // Check for overflow
+        if (rv > UINT_MAX / dim) return 0;
+    }
+
+    return rv;
+}
+
+static
+void* CLUTElemDup(cmsStage* mpe)
+{
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+    _cmsStageCLutData* NewElem;
+
+
+    NewElem = (_cmsStageCLutData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageCLutData));
+    if (NewElem == NULL) return NULL;
+
+    NewElem ->nEntries       = Data ->nEntries;
+    NewElem ->HasFloatValues = Data ->HasFloatValues;
+
+    if (Data ->Tab.T) {
+
+        if (Data ->HasFloatValues) {
+            NewElem ->Tab.TFloat = (cmsFloat32Number*) _cmsDupMem(mpe ->ContextID, Data ->Tab.TFloat, Data ->nEntries * sizeof (cmsFloat32Number));
+            if (NewElem ->Tab.TFloat == NULL)
+                goto Error;
+        } else {
+            NewElem ->Tab.T = (cmsUInt16Number*) _cmsDupMem(mpe ->ContextID, Data ->Tab.T, Data ->nEntries * sizeof (cmsUInt16Number));
+            if (NewElem ->Tab.T == NULL)
+                goto Error;
+        }
+    }
+
+    NewElem ->Params   = _cmsComputeInterpParamsEx(mpe ->ContextID,
+                                                   Data ->Params ->nSamples,
+                                                   Data ->Params ->nInputs,
+                                                   Data ->Params ->nOutputs,
+                                                   NewElem ->Tab.T,
+                                                   Data ->Params ->dwFlags);
+    if (NewElem->Params != NULL)
+        return (void*) NewElem;
+ Error:
+    if (NewElem->Tab.T)
+        // This works for both types
+        _cmsFree(mpe ->ContextID, NewElem -> Tab.T);
+    _cmsFree(mpe ->ContextID, NewElem);
+    return NULL;
+}
+
+
+static
+void CLutElemTypeFree(cmsStage* mpe)
+{
+
+    _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
+
+    // Already empty
+    if (Data == NULL) return;
+
+    // This works for both types
+    if (Data -> Tab.T)
+        _cmsFree(mpe ->ContextID, Data -> Tab.T);
+
+    _cmsFreeInterpParams(Data ->Params);
+    _cmsFree(mpe ->ContextID, mpe ->Data);
+}
+
+
+// Allocates a 16-bit multidimensional CLUT. This is evaluated at 16-bit precision. Table may have different
+// granularity on each dimension.
+cmsStage* CMSEXPORT cmsStageAllocCLut16bitGranular(cmsContext ContextID,
+                                         const cmsUInt32Number clutPoints[],
+                                         cmsUInt32Number inputChan,
+                                         cmsUInt32Number outputChan,
+                                         const cmsUInt16Number* Table)
+{
+    cmsUInt32Number i, n;
+    _cmsStageCLutData* NewElem;
+    cmsStage* NewMPE;
+
+    _cmsAssert(clutPoints != NULL);
+
+    if (inputChan > MAX_INPUT_DIMENSIONS) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Too many input channels (%d channels, max=%d)", inputChan, MAX_INPUT_DIMENSIONS);
+        return NULL;
+    }
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCLutElemType, inputChan, outputChan,
+                                     EvaluateCLUTfloatIn16, CLUTElemDup, CLutElemTypeFree, NULL );
+
+    if (NewMPE == NULL) return NULL;
+
+    NewElem = (_cmsStageCLutData*) _cmsMallocZero(ContextID, sizeof(_cmsStageCLutData));
+    if (NewElem == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+
+    NewElem -> nEntries = n = outputChan * CubeSize(clutPoints, inputChan);
+    NewElem -> HasFloatValues = FALSE;
+
+    if (n == 0) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+
+    NewElem ->Tab.T  = (cmsUInt16Number*) _cmsCalloc(ContextID, n, sizeof(cmsUInt16Number));
+    if (NewElem ->Tab.T == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    if (Table != NULL) {
+        for (i=0; i < n; i++) {
+            NewElem ->Tab.T[i] = Table[i];
+        }
+    }
+
+    NewElem ->Params = _cmsComputeInterpParamsEx(ContextID, clutPoints, inputChan, outputChan, NewElem ->Tab.T, CMS_LERP_FLAGS_16BITS);
+    if (NewElem ->Params == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    return NewMPE;
+}
+
+cmsStage* CMSEXPORT cmsStageAllocCLut16bit(cmsContext ContextID,
+                                    cmsUInt32Number nGridPoints,
+                                    cmsUInt32Number inputChan,
+                                    cmsUInt32Number outputChan,
+                                    const cmsUInt16Number* Table)
+{
+    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+    int i;
+
+   // Our resulting LUT would be same gridpoints on all dimensions
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Dimensions[i] = nGridPoints;
+
+    return cmsStageAllocCLut16bitGranular(ContextID, Dimensions, inputChan, outputChan, Table);
+}
+
+
+cmsStage* CMSEXPORT cmsStageAllocCLutFloat(cmsContext ContextID,
+                                       cmsUInt32Number nGridPoints,
+                                       cmsUInt32Number inputChan,
+                                       cmsUInt32Number outputChan,
+                                       const cmsFloat32Number* Table)
+{
+   cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+   int i;
+
+    // Our resulting LUT would be same gridpoints on all dimensions
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Dimensions[i] = nGridPoints;
+
+    return cmsStageAllocCLutFloatGranular(ContextID, Dimensions, inputChan, outputChan, Table);
+}
+
+
+
+cmsStage* CMSEXPORT cmsStageAllocCLutFloatGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table)
+{
+    cmsUInt32Number i, n;
+    _cmsStageCLutData* NewElem;
+    cmsStage* NewMPE;
+
+    _cmsAssert(clutPoints != NULL);
+
+    if (inputChan > MAX_INPUT_DIMENSIONS) {
+        cmsSignalError(ContextID, cmsERROR_RANGE, "Too many input channels (%d channels, max=%d)", inputChan, MAX_INPUT_DIMENSIONS);
+        return NULL;
+    }
+
+    NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCLutElemType, inputChan, outputChan,
+                                             EvaluateCLUTfloat, CLUTElemDup, CLutElemTypeFree, NULL);
+    if (NewMPE == NULL) return NULL;
+
+
+    NewElem = (_cmsStageCLutData*) _cmsMallocZero(ContextID, sizeof(_cmsStageCLutData));
+    if (NewElem == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewMPE ->Data  = (void*) NewElem;
+
+    // There is a potential integer overflow on conputing n and nEntries.
+    NewElem -> nEntries = n = outputChan * CubeSize(clutPoints, inputChan);
+    NewElem -> HasFloatValues = TRUE;
+
+    if (n == 0) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    NewElem ->Tab.TFloat  = (cmsFloat32Number*) _cmsCalloc(ContextID, n, sizeof(cmsFloat32Number));
+    if (NewElem ->Tab.TFloat == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    if (Table != NULL) {
+        for (i=0; i < n; i++) {
+            NewElem ->Tab.TFloat[i] = Table[i];
+        }
+    }
+
+    NewElem ->Params = _cmsComputeInterpParamsEx(ContextID, clutPoints,  inputChan, outputChan, NewElem ->Tab.TFloat, CMS_LERP_FLAGS_FLOAT);
+    if (NewElem ->Params == NULL) {
+        cmsStageFree(NewMPE);
+        return NULL;
+    }
+
+    return NewMPE;
+}
+
+
+static
+int IdentitySampler(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void * Cargo)
+{
+    int nChan = *(int*) Cargo;
+    int i;
+
+    for (i=0; i < nChan; i++)
+        Out[i] = In[i];
+
+    return 1;
+}
+
+// Creates an MPE that just copies input to output
+cmsStage* _cmsStageAllocIdentityCLut(cmsContext ContextID, cmsUInt32Number nChan)
+{
+    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+    cmsStage* mpe ;
+    int i;
+
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
+        Dimensions[i] = 2;
+
+    mpe = cmsStageAllocCLut16bitGranular(ContextID, Dimensions, nChan, nChan, NULL);
+    if (mpe == NULL) return NULL;
+
+    if (!cmsStageSampleCLut16bit(mpe, IdentitySampler, &nChan, 0)) {
+        cmsStageFree(mpe);
+        return NULL;
+    }
+
+    mpe ->Implements = cmsSigIdentityElemType;
+    return mpe;
+}
+
+
+
+// Quantize a value 0 <= i < MaxSamples to 0..0xffff
+cmsUInt16Number _cmsQuantizeVal(cmsFloat64Number i, cmsUInt32Number MaxSamples)
+{
+    cmsFloat64Number x;
+
+    x = ((cmsFloat64Number) i * 65535.) / (cmsFloat64Number) (MaxSamples - 1);
+    return _cmsQuickSaturateWord(x);
+}
+
+
+// This routine does a sweep on whole input space, and calls its callback
+// function on knots. returns TRUE if all ok, FALSE otherwise.
+cmsBool CMSEXPORT cmsStageSampleCLut16bit(cmsStage* mpe, cmsSAMPLER16 Sampler, void * Cargo, cmsUInt32Number dwFlags)
+{
+    int i, t, index, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsUInt32Number nInputs, nOutputs;
+    cmsUInt32Number* nSamples;
+    cmsUInt16Number In[MAX_INPUT_DIMENSIONS+1], Out[MAX_STAGE_CHANNELS];
+    _cmsStageCLutData* clut;
+
+    if (mpe == NULL) return FALSE;
+
+    clut = (_cmsStageCLutData*) mpe->Data;
+
+    if (clut == NULL) return FALSE;
+
+    nSamples = clut->Params ->nSamples;
+    nInputs  = clut->Params ->nInputs;
+    nOutputs = clut->Params ->nOutputs;
+
+    if (nInputs <= 0) return FALSE;
+    if (nOutputs <= 0) return FALSE;
+    if (nInputs > MAX_INPUT_DIMENSIONS) return FALSE;
+    if (nOutputs >= MAX_STAGE_CHANNELS) return FALSE;
+
+    memset(In, 0, sizeof(In));
+    memset(Out, 0, sizeof(Out));
+
+    nTotalPoints = CubeSize(nSamples, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    index = 0;
+    for (i = 0; i < (int) nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int)nInputs - 1; t >= 0; --t) {
+
+            cmsUInt32Number  Colorant = rest % nSamples[t];
+
+            rest /= nSamples[t];
+
+            In[t] = _cmsQuantizeVal(Colorant, nSamples[t]);
+        }
+
+        if (clut ->Tab.T != NULL) {
+            for (t = 0; t < (int)nOutputs; t++)
+                Out[t] = clut->Tab.T[index + t];
+        }
+
+        if (!Sampler(In, Out, Cargo))
+            return FALSE;
+
+        if (!(dwFlags & SAMPLER_INSPECT)) {
+
+            if (clut ->Tab.T != NULL) {
+                for (t=0; t < (int) nOutputs; t++)
+                    clut->Tab.T[index + t] = Out[t];
+            }
+        }
+
+        index += nOutputs;
+    }
+
+    return TRUE;
+}
+
+// Same as anterior, but for floating point
+cmsBool CMSEXPORT cmsStageSampleCLutFloat(cmsStage* mpe, cmsSAMPLERFLOAT Sampler, void * Cargo, cmsUInt32Number dwFlags)
+{
+    int i, t, index, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsUInt32Number nInputs, nOutputs;
+    cmsUInt32Number* nSamples;
+    cmsFloat32Number In[MAX_INPUT_DIMENSIONS+1], Out[MAX_STAGE_CHANNELS];
+    _cmsStageCLutData* clut = (_cmsStageCLutData*) mpe->Data;
+
+    nSamples = clut->Params ->nSamples;
+    nInputs  = clut->Params ->nInputs;
+    nOutputs = clut->Params ->nOutputs;
+
+    if (nInputs <= 0) return FALSE;
+    if (nOutputs <= 0) return FALSE;
+    if (nInputs  > MAX_INPUT_DIMENSIONS) return FALSE;
+    if (nOutputs >= MAX_STAGE_CHANNELS) return FALSE;
+
+    nTotalPoints = CubeSize(nSamples, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    index = 0;
+    for (i = 0; i < (int)nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int) nInputs-1; t >=0; --t) {
+
+            cmsUInt32Number  Colorant = rest % nSamples[t];
+
+            rest /= nSamples[t];
+
+            In[t] =  (cmsFloat32Number) (_cmsQuantizeVal(Colorant, nSamples[t]) / 65535.0);
+        }
+
+        if (clut ->Tab.TFloat != NULL) {
+            for (t=0; t < (int) nOutputs; t++)
+                Out[t] = clut->Tab.TFloat[index + t];
+        }
+
+        if (!Sampler(In, Out, Cargo))
+            return FALSE;
+
+        if (!(dwFlags & SAMPLER_INSPECT)) {
+
+            if (clut ->Tab.TFloat != NULL) {
+                for (t=0; t < (int) nOutputs; t++)
+                    clut->Tab.TFloat[index + t] = Out[t];
+            }
+        }
+
+        index += nOutputs;
+    }
+
+    return TRUE;
+}
+
+
+
+// This routine does a sweep on whole input space, and calls its callback
+// function on knots. returns TRUE if all ok, FALSE otherwise.
+cmsBool CMSEXPORT cmsSliceSpace16(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                         cmsSAMPLER16 Sampler, void * Cargo)
+{
+    int i, t, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsUInt16Number In[cmsMAXCHANNELS];
+
+    if (nInputs >= cmsMAXCHANNELS) return FALSE;
+
+    nTotalPoints = CubeSize(clutPoints, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    for (i = 0; i < (int) nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int) nInputs-1; t >=0; --t) {
+
+            cmsUInt32Number  Colorant = rest % clutPoints[t];
+
+            rest /= clutPoints[t];
+            In[t] = _cmsQuantizeVal(Colorant, clutPoints[t]);
+
+        }
+
+        if (!Sampler(In, NULL, Cargo))
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+cmsInt32Number CMSEXPORT cmsSliceSpaceFloat(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
+                                            cmsSAMPLERFLOAT Sampler, void * Cargo)
+{
+    int i, t, rest;
+    cmsUInt32Number nTotalPoints;
+    cmsFloat32Number In[cmsMAXCHANNELS];
+
+    if (nInputs >= cmsMAXCHANNELS) return FALSE;
+
+    nTotalPoints = CubeSize(clutPoints, nInputs);
+    if (nTotalPoints == 0) return FALSE;
+
+    for (i = 0; i < (int) nTotalPoints; i++) {
+
+        rest = i;
+        for (t = (int) nInputs-1; t >=0; --t) {
+
+            cmsUInt32Number  Colorant = rest % clutPoints[t];
+
+            rest /= clutPoints[t];
+            In[t] =  (cmsFloat32Number) (_cmsQuantizeVal(Colorant, clutPoints[t]) / 65535.0);
+
+        }
+
+        if (!Sampler(In, NULL, Cargo))
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+// ********************************************************************************
+// Type cmsSigLab2XYZElemType
+// ********************************************************************************
+
+
+static
+void EvaluateLab2XYZ(const cmsFloat32Number In[],
+                     cmsFloat32Number Out[],
+                     const cmsStage *mpe)
+{
+    cmsCIELab Lab;
+    cmsCIEXYZ XYZ;
+    const cmsFloat64Number XYZadj = MAX_ENCODEABLE_XYZ;
+
+    // V4 rules
+    Lab.L = In[0] * 100.0;
+    Lab.a = In[1] * 255.0 - 128.0;
+    Lab.b = In[2] * 255.0 - 128.0;
+
+    cmsLab2XYZ(NULL, &XYZ, &Lab);
+
+    // From XYZ, range 0..19997 to 0..1.0, note that 1.99997 comes from 0xffff
+    // encoded as 1.15 fixed point, so 1 + (32767.0 / 32768.0)
+
+    Out[0] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.X / XYZadj);
+    Out[1] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Y / XYZadj);
+    Out[2] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Z / XYZadj);
+    return;
+
+    cmsUNUSED_PARAMETER(mpe);
+}
+
+
+// No dup or free routines needed, as the structure has no pointers in it.
+cmsStage* _cmsStageAllocLab2XYZ(cmsContext ContextID)
+{
+    return _cmsStageAllocPlaceholder(ContextID, cmsSigLab2XYZElemType, 3, 3, EvaluateLab2XYZ, NULL, NULL, NULL);
+}
+
+// ********************************************************************************
+
+// v2 L=100 is supposed to be placed on 0xFF00. There is no reasonable
+// number of gridpoints that would make exact match. However, a prelinearization
+// of 258 entries, would map 0xFF00 exactly on entry 257, and this is good to avoid scum dot.
+// Almost all what we need but unfortunately, the rest of entries should be scaled by
+// (255*257/256) and this is not exact.
+
+cmsStage* _cmsStageAllocLabV2ToV4curves(cmsContext ContextID)
+{
+    cmsStage* mpe;
+    cmsToneCurve* LabTable[3];
+    int i, j;
+
+    LabTable[0] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
+    LabTable[1] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
+    LabTable[2] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
+
+    for (j=0; j < 3; j++) {
+
+        if (LabTable[j] == NULL) {
+            cmsFreeToneCurveTriple(LabTable);
+            return NULL;
+        }
+
+        // We need to map * (0xffff / 0xff00), that's same as (257 / 256)
+        // So we can use 258-entry tables to do the trick (i / 257) * (255 * 257) * (257 / 256);
+        for (i=0; i < 257; i++)  {
+
+            LabTable[j]->Table16[i] = (cmsUInt16Number) ((i * 0xffff + 0x80) >> 8);
+        }
+
+        LabTable[j] ->Table16[257] = 0xffff;
+    }
+
+    mpe = cmsStageAllocToneCurves(ContextID, 3, LabTable);
+    cmsFreeToneCurveTriple(LabTable);
+
+    if (mpe == NULL) return NULL;
+    mpe ->Implements = cmsSigLabV2toV4;
+    return mpe;
+}
+
+// ********************************************************************************
+
+// Matrix-based conversion, which is more accurate, but slower and cannot properly be saved in devicelink profiles
+cmsStage* _cmsStageAllocLabV2ToV4(cmsContext ContextID)
+{
+    static const cmsFloat64Number V2ToV4[] = { 65535.0/65280.0, 0, 0,
+                                     0, 65535.0/65280.0, 0,
+                                     0, 0, 65535.0/65280.0
+                                     };
+
+    cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, V2ToV4, NULL);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigLabV2toV4;
+    return mpe;
+}
+
+
+// Reverse direction
+cmsStage* _cmsStageAllocLabV4ToV2(cmsContext ContextID)
+{
+    static const cmsFloat64Number V4ToV2[] = { 65280.0/65535.0, 0, 0,
+                                     0, 65280.0/65535.0, 0,
+                                     0, 0, 65280.0/65535.0
+                                     };
+
+     cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, V4ToV2, NULL);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigLabV4toV2;
+    return mpe;
+}
+
+
+// To Lab to float. Note that the MPE gives numbers in normal Lab range
+// and we need 0..1.0 range for the formatters
+// L* : 0...100 => 0...1.0  (L* / 100)
+// ab* : -128..+127 to 0..1  ((ab* + 128) / 255)
+
+cmsStage* _cmsStageNormalizeFromLabFloat(cmsContext ContextID)
+{
+    static const cmsFloat64Number a1[] = {
+        1.0/100.0, 0, 0,
+        0, 1.0/255.0, 0,
+        0, 0, 1.0/255.0
+    };
+
+    static const cmsFloat64Number o1[] = {
+        0,
+        128.0/255.0,
+        128.0/255.0
+    };
+
+    cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, a1, o1);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigLab2FloatPCS;
+    return mpe;
+}
+
+// Fom XYZ to floating point PCS
+cmsStage* _cmsStageNormalizeFromXyzFloat(cmsContext ContextID)
+{
+#define n (32768.0/65535.0)
+    static const cmsFloat64Number a1[] = {
+        n, 0, 0,
+        0, n, 0,
+        0, 0, n
+    };
+#undef n
+
+    cmsStage *mpe =  cmsStageAllocMatrix(ContextID, 3, 3, a1, NULL);
+
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigXYZ2FloatPCS;
+    return mpe;
+}
+
+cmsStage* _cmsStageNormalizeToLabFloat(cmsContext ContextID)
+{
+    static const cmsFloat64Number a1[] = {
+        100.0, 0, 0,
+        0, 255.0, 0,
+        0, 0, 255.0
+    };
+
+    static const cmsFloat64Number o1[] = {
+        0,
+        -128.0,
+        -128.0
+    };
+
+    cmsStage *mpe =  cmsStageAllocMatrix(ContextID, 3, 3, a1, o1);
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigFloatPCS2Lab;
+    return mpe;
+}
+
+cmsStage* _cmsStageNormalizeToXyzFloat(cmsContext ContextID)
+{
+#define n (65535.0/32768.0)
+
+    static const cmsFloat64Number a1[] = {
+        n, 0, 0,
+        0, n, 0,
+        0, 0, n
+    };
+#undef n
+
+    cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, a1, NULL);
+    if (mpe == NULL) return mpe;
+    mpe ->Implements = cmsSigFloatPCS2XYZ;
+    return mpe;
+}
+
+// Clips values smaller than zero
+static
+void Clipper(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+       cmsUInt32Number i;
+       for (i = 0; i < mpe->InputChannels; i++) {
+
+              cmsFloat32Number n = In[i];
+              Out[i] = n < 0 ? 0 : n;
+       }
+}
+
+cmsStage*  _cmsStageClipNegatives(cmsContext ContextID, cmsUInt32Number nChannels)
+{
+       return _cmsStageAllocPlaceholder(ContextID, cmsSigClipNegativesElemType,
+              nChannels, nChannels, Clipper, NULL, NULL, NULL);
+}
+
+// ********************************************************************************
+// Type cmsSigXYZ2LabElemType
+// ********************************************************************************
+
+static
+void EvaluateXYZ2Lab(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    cmsCIELab Lab;
+    cmsCIEXYZ XYZ;
+    const cmsFloat64Number XYZadj = MAX_ENCODEABLE_XYZ;
+
+    // From 0..1.0 to XYZ
+
+    XYZ.X = In[0] * XYZadj;
+    XYZ.Y = In[1] * XYZadj;
+    XYZ.Z = In[2] * XYZadj;
+
+    cmsXYZ2Lab(NULL, &Lab, &XYZ);
+
+    // From V4 Lab to 0..1.0
+
+    Out[0] = (cmsFloat32Number) (Lab.L / 100.0);
+    Out[1] = (cmsFloat32Number) ((Lab.a + 128.0) / 255.0);
+    Out[2] = (cmsFloat32Number) ((Lab.b + 128.0) / 255.0);
+    return;
+
+    cmsUNUSED_PARAMETER(mpe);
+}
+
+cmsStage* _cmsStageAllocXYZ2Lab(cmsContext ContextID)
+{
+    return _cmsStageAllocPlaceholder(ContextID, cmsSigXYZ2LabElemType, 3, 3, EvaluateXYZ2Lab, NULL, NULL, NULL);
+
+}
+
+// ********************************************************************************
+
+// For v4, S-Shaped curves are placed in a/b axis to increase resolution near gray
+
+cmsStage* _cmsStageAllocLabPrelin(cmsContext ContextID)
+{
+    cmsToneCurve* LabTable[3];
+    cmsFloat64Number Params[1] =  {2.4} ;
+
+    LabTable[0] = cmsBuildGamma(ContextID, 1.0);
+    LabTable[1] = cmsBuildParametricToneCurve(ContextID, 108, Params);
+    LabTable[2] = cmsBuildParametricToneCurve(ContextID, 108, Params);
+
+    return cmsStageAllocToneCurves(ContextID, 3, LabTable);
+}
+
+
+// Free a single MPE
+void CMSEXPORT cmsStageFree(cmsStage* mpe)
+{
+    if (mpe ->FreePtr)
+        mpe ->FreePtr(mpe);
+
+    _cmsFree(mpe ->ContextID, mpe);
+}
+
+
+cmsUInt32Number  CMSEXPORT cmsStageInputChannels(const cmsStage* mpe)
+{
+    return mpe ->InputChannels;
+}
+
+cmsUInt32Number  CMSEXPORT cmsStageOutputChannels(const cmsStage* mpe)
+{
+    return mpe ->OutputChannels;
+}
+
+cmsStageSignature CMSEXPORT cmsStageType(const cmsStage* mpe)
+{
+    return mpe -> Type;
+}
+
+void* CMSEXPORT cmsStageData(const cmsStage* mpe)
+{
+    return mpe -> Data;
+}
+
+cmsStage*  CMSEXPORT cmsStageNext(const cmsStage* mpe)
+{
+    return mpe -> Next;
+}
+
+
+// Duplicates an MPE
+cmsStage* CMSEXPORT cmsStageDup(cmsStage* mpe)
+{
+    cmsStage* NewMPE;
+
+    if (mpe == NULL) return NULL;
+    NewMPE = _cmsStageAllocPlaceholder(mpe ->ContextID,
+                                     mpe ->Type,
+                                     mpe ->InputChannels,
+                                     mpe ->OutputChannels,
+                                     mpe ->EvalPtr,
+                                     mpe ->DupElemPtr,
+                                     mpe ->FreePtr,
+                                     NULL);
+    if (NewMPE == NULL) return NULL;
+
+    NewMPE ->Implements = mpe ->Implements;
+
+    if (mpe ->DupElemPtr) {
+
+        NewMPE ->Data = mpe ->DupElemPtr(mpe);
+
+        if (NewMPE->Data == NULL) {
+
+            cmsStageFree(NewMPE);
+            return NULL;
+        }
+
+    } else {
+
+        NewMPE ->Data       = NULL;
+    }
+
+    return NewMPE;
+}
+
+
+// ***********************************************************************************************************
+
+// This function sets up the channel count
+static
+cmsBool BlessLUT(cmsPipeline* lut)
+{
+    // We can set the input/output channels only if we have elements.
+    if (lut ->Elements != NULL) {
+
+        cmsStage* prev;
+        cmsStage* next;
+        cmsStage* First;
+        cmsStage* Last;
+
+        First  = cmsPipelineGetPtrToFirstStage(lut);
+        Last   = cmsPipelineGetPtrToLastStage(lut);
+
+        if (First == NULL || Last == NULL) return FALSE;
+
+        lut->InputChannels = First->InputChannels;
+        lut->OutputChannels = Last->OutputChannels;
+
+        // Check chain consistency
+        prev = First;
+        next = prev->Next;
+
+        while (next != NULL)
+        {
+            if (next->InputChannels != prev->OutputChannels)
+                return FALSE;
+
+            next = next->Next;
+            prev = prev->Next;
+    }
+}
+
+    return TRUE;    
+}
+
+
+// Default to evaluate the LUT on 16 bit-basis. Precision is retained.
+static
+void _LUTeval16(register const cmsUInt16Number In[], register cmsUInt16Number Out[],  register const void* D)
+{
+    cmsPipeline* lut = (cmsPipeline*) D;
+    cmsStage *mpe;
+    cmsFloat32Number Storage[2][MAX_STAGE_CHANNELS];
+    int Phase = 0, NextPhase;
+
+    From16ToFloat(In, &Storage[Phase][0], lut ->InputChannels);
+
+    for (mpe = lut ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+             NextPhase = Phase ^ 1;
+             mpe ->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe);
+             Phase = NextPhase;
+    }
+
+
+    FromFloatTo16(&Storage[Phase][0], Out, lut ->OutputChannels);
+}
+
+
+
+// Does evaluate the LUT on cmsFloat32Number-basis.
+static
+void _LUTevalFloat(register const cmsFloat32Number In[], register cmsFloat32Number Out[], const void* D)
+{
+    cmsPipeline* lut = (cmsPipeline*) D;
+    cmsStage *mpe;
+    cmsFloat32Number Storage[2][MAX_STAGE_CHANNELS];
+    int Phase = 0, NextPhase;
+
+    memmove(&Storage[Phase][0], In, lut ->InputChannels  * sizeof(cmsFloat32Number));
+
+    for (mpe = lut ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+              NextPhase = Phase ^ 1;
+              mpe ->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe);
+              Phase = NextPhase;
+    }
+
+    memmove(Out, &Storage[Phase][0], lut ->OutputChannels * sizeof(cmsFloat32Number));
+}
+
+
+// LUT Creation & Destruction
+cmsPipeline* CMSEXPORT cmsPipelineAlloc(cmsContext ContextID, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels)
+{
+       cmsPipeline* NewLUT;
+
+       // A value of zero in channels is allowed as placeholder
+       if (InputChannels >= cmsMAXCHANNELS ||
+           OutputChannels >= cmsMAXCHANNELS) return NULL;
+
+       NewLUT = (cmsPipeline*) _cmsMallocZero(ContextID, sizeof(cmsPipeline));
+       if (NewLUT == NULL) return NULL;
+
+       NewLUT -> InputChannels  = InputChannels;
+       NewLUT -> OutputChannels = OutputChannels;
+
+       NewLUT ->Eval16Fn    = _LUTeval16;
+       NewLUT ->EvalFloatFn = _LUTevalFloat;
+       NewLUT ->DupDataFn   = NULL;
+       NewLUT ->FreeDataFn  = NULL;
+       NewLUT ->Data        = NewLUT;
+       NewLUT ->ContextID   = ContextID;
+
+       if (!BlessLUT(NewLUT))
+       {
+           _cmsFree(ContextID, NewLUT);
+           return NULL;
+       }
+
+       return NewLUT;
+}
+
+cmsContext CMSEXPORT cmsGetPipelineContextID(const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    return lut ->ContextID;
+}
+
+cmsUInt32Number CMSEXPORT cmsPipelineInputChannels(const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    return lut ->InputChannels;
+}
+
+cmsUInt32Number CMSEXPORT cmsPipelineOutputChannels(const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    return lut ->OutputChannels;
+}
+
+// Free a profile elements LUT
+void CMSEXPORT cmsPipelineFree(cmsPipeline* lut)
+{
+    cmsStage *mpe, *Next;
+
+    if (lut == NULL) return;
+
+    for (mpe = lut ->Elements;
+        mpe != NULL;
+        mpe = Next) {
+
+            Next = mpe ->Next;
+            cmsStageFree(mpe);
+    }
+
+    if (lut ->FreeDataFn) lut ->FreeDataFn(lut ->ContextID, lut ->Data);
+
+    _cmsFree(lut ->ContextID, lut);
+}
+
+
+// Default to evaluate the LUT on 16 bit-basis.
+void CMSEXPORT cmsPipelineEval16(const cmsUInt16Number In[], cmsUInt16Number Out[],  const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    lut ->Eval16Fn(In, Out, lut->Data);
+}
+
+
+// Does evaluate the LUT on cmsFloat32Number-basis.
+void CMSEXPORT cmsPipelineEvalFloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsPipeline* lut)
+{
+    _cmsAssert(lut != NULL);
+    lut ->EvalFloatFn(In, Out, lut);
+}
+
+
+
+// Duplicates a LUT
+cmsPipeline* CMSEXPORT cmsPipelineDup(const cmsPipeline* lut)
+{
+    cmsPipeline* NewLUT;
+    cmsStage *NewMPE, *Anterior = NULL, *mpe;
+    cmsBool  First = TRUE;
+
+    if (lut == NULL) return NULL;
+
+    NewLUT = cmsPipelineAlloc(lut ->ContextID, lut ->InputChannels, lut ->OutputChannels);
+    if (NewLUT == NULL) return NULL;
+
+    for (mpe = lut ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+             NewMPE = cmsStageDup(mpe);
+
+             if (NewMPE == NULL) {
+                 cmsPipelineFree(NewLUT);
+                 return NULL;
+             }
+
+             if (First) {
+                 NewLUT ->Elements = NewMPE;
+                 First = FALSE;
+             }
+             else {
+                if (Anterior != NULL) 
+                    Anterior ->Next = NewMPE;
+             }
+
+            Anterior = NewMPE;
+    }
+
+    NewLUT ->Eval16Fn    = lut ->Eval16Fn;
+    NewLUT ->EvalFloatFn = lut ->EvalFloatFn;
+    NewLUT ->DupDataFn   = lut ->DupDataFn;
+    NewLUT ->FreeDataFn  = lut ->FreeDataFn;
+
+    if (NewLUT ->DupDataFn != NULL)
+        NewLUT ->Data = NewLUT ->DupDataFn(lut ->ContextID, lut->Data);
+
+
+    NewLUT ->SaveAs8Bits    = lut ->SaveAs8Bits;
+
+    if (!BlessLUT(NewLUT))
+    {
+        _cmsFree(lut->ContextID, NewLUT);
+        return NULL;
+    }
+
+    return NewLUT;
+}
+
+
+int CMSEXPORT cmsPipelineInsertStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage* mpe)
+{
+    cmsStage* Anterior = NULL, *pt;
+
+    if (lut == NULL || mpe == NULL)
+        return FALSE;
+
+    switch (loc) {
+
+        case cmsAT_BEGIN:
+            mpe ->Next = lut ->Elements;
+            lut ->Elements = mpe;
+            break;
+
+        case cmsAT_END:
+
+            if (lut ->Elements == NULL)
+                lut ->Elements = mpe;
+            else {
+
+                for (pt = lut ->Elements;
+                     pt != NULL;
+                     pt = pt -> Next) Anterior = pt;
+                
+                Anterior ->Next = mpe;
+                mpe ->Next = NULL;
+            }
+            break;
+        default:;
+            return FALSE;
+    }
+
+    return BlessLUT(lut);    
+}
+
+// Unlink an element and return the pointer to it
+void CMSEXPORT cmsPipelineUnlinkStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage** mpe)
+{
+    cmsStage *Anterior, *pt, *Last;
+    cmsStage *Unlinked = NULL;
+
+
+    // If empty LUT, there is nothing to remove
+    if (lut ->Elements == NULL) {
+        if (mpe) *mpe = NULL;
+        return;
+    }
+
+    // On depending on the strategy...
+    switch (loc) {
+
+        case cmsAT_BEGIN:
+            {
+                cmsStage* elem = lut ->Elements;
+
+                lut ->Elements = elem -> Next;
+                elem ->Next = NULL;
+                Unlinked = elem;
+
+            }
+            break;
+
+        case cmsAT_END:
+            Anterior = Last = NULL;
+            for (pt = lut ->Elements;
+                pt != NULL;
+                pt = pt -> Next) {
+                    Anterior = Last;
+                    Last = pt;
+            }
+
+            Unlinked = Last;  // Next already points to NULL
+
+            // Truncate the chain
+            if (Anterior)
+                Anterior ->Next = NULL;
+            else
+                lut ->Elements = NULL;
+            break;
+        default:;
+    }
+
+    if (mpe)
+        *mpe = Unlinked;
+    else
+        cmsStageFree(Unlinked);
+
+    // May fail, but we ignore it
+    BlessLUT(lut);
+}
+
+
+// Concatenate two LUT into a new single one
+cmsBool  CMSEXPORT cmsPipelineCat(cmsPipeline* l1, const cmsPipeline* l2)
+{
+    cmsStage* mpe;
+
+    // If both LUTS does not have elements, we need to inherit
+    // the number of channels
+    if (l1 ->Elements == NULL && l2 ->Elements == NULL) {
+        l1 ->InputChannels  = l2 ->InputChannels;
+        l1 ->OutputChannels = l2 ->OutputChannels;
+    }
+
+    // Cat second
+    for (mpe = l2 ->Elements;
+         mpe != NULL;
+         mpe = mpe ->Next) {
+
+            // We have to dup each element
+            if (!cmsPipelineInsertStage(l1, cmsAT_END, cmsStageDup(mpe)))
+                return FALSE;
+    }
+
+    return BlessLUT(l1);    
+}
+
+
+cmsBool CMSEXPORT cmsPipelineSetSaveAs8bitsFlag(cmsPipeline* lut, cmsBool On)
+{
+    cmsBool Anterior = lut ->SaveAs8Bits;
+
+    lut ->SaveAs8Bits = On;
+    return Anterior;
+}
+
+
+cmsStage* CMSEXPORT cmsPipelineGetPtrToFirstStage(const cmsPipeline* lut)
+{
+    return lut ->Elements;
+}
+
+cmsStage* CMSEXPORT cmsPipelineGetPtrToLastStage(const cmsPipeline* lut)
+{
+    cmsStage *mpe, *Anterior = NULL;
+
+    for (mpe = lut ->Elements; mpe != NULL; mpe = mpe ->Next)
+        Anterior = mpe;
+
+    return Anterior;
+}
+
+cmsUInt32Number CMSEXPORT cmsPipelineStageCount(const cmsPipeline* lut)
+{
+    cmsStage *mpe;
+    cmsUInt32Number n;
+
+    for (n=0, mpe = lut ->Elements; mpe != NULL; mpe = mpe ->Next)
+            n++;
+
+    return n;
+}
+
+// This function may be used to set the optional evaluator and a block of private data. If private data is being used, an optional
+// duplicator and free functions should also be specified in order to duplicate the LUT construct. Use NULL to inhibit such functionality.
+void CMSEXPORT _cmsPipelineSetOptimizationParameters(cmsPipeline* Lut,
+                                        _cmsOPTeval16Fn Eval16,
+                                        void* PrivateData,
+                                        _cmsFreeUserDataFn FreePrivateDataFn,
+                                        _cmsDupUserDataFn  DupPrivateDataFn)
+{
+
+    Lut ->Eval16Fn = Eval16;
+    Lut ->DupDataFn = DupPrivateDataFn;
+    Lut ->FreeDataFn = FreePrivateDataFn;
+    Lut ->Data = PrivateData;
+}
+
+
+// ----------------------------------------------------------- Reverse interpolation
+// Here's how it goes. The derivative Df(x) of the function f is the linear
+// transformation that best approximates f near the point x. It can be represented
+// by a matrix A whose entries are the partial derivatives of the components of f
+// with respect to all the coordinates. This is know as the Jacobian
+//
+// The best linear approximation to f is given by the matrix equation:
+//
+// y-y0 = A (x-x0)
+//
+// So, if x0 is a good "guess" for the zero of f, then solving for the zero of this
+// linear approximation will give a "better guess" for the zero of f. Thus let y=0,
+// and since y0=f(x0) one can solve the above equation for x. This leads to the
+// Newton's method formula:
+//
+// xn+1 = xn - A-1 f(xn)
+//
+// where xn+1 denotes the (n+1)-st guess, obtained from the n-th guess xn in the
+// fashion described above. Iterating this will give better and better approximations
+// if you have a "good enough" initial guess.
+
+
+#define JACOBIAN_EPSILON            0.001f
+#define INVERSION_MAX_ITERATIONS    30
+
+// Increment with reflexion on boundary
+static
+void IncDelta(cmsFloat32Number *Val)
+{
+    if (*Val < (1.0 - JACOBIAN_EPSILON))
+
+        *Val += JACOBIAN_EPSILON;
+
+    else
+        *Val -= JACOBIAN_EPSILON;
+
+}
+
+
+
+// Euclidean distance between two vectors of n elements each one
+static
+cmsFloat32Number EuclideanDistance(cmsFloat32Number a[], cmsFloat32Number b[], int n)
+{
+    cmsFloat32Number sum = 0;
+    int i;
+
+    for (i=0; i < n; i++) {
+        cmsFloat32Number dif = b[i] - a[i];
+        sum +=  dif * dif;
+    }
+
+    return sqrtf(sum);
+}
+
+
+// Evaluate a LUT in reverse direction. It only searches on 3->3 LUT. Uses Newton method
+//
+// x1 <- x - [J(x)]^-1 * f(x)
+//
+// lut: The LUT on where to do the search
+// Target: LabK, 3 values of Lab plus destination K which is fixed
+// Result: The obtained CMYK
+// Hint:   Location where begin the search
+
+cmsBool CMSEXPORT cmsPipelineEvalReverseFloat(cmsFloat32Number Target[],
+                                              cmsFloat32Number Result[],
+                                              cmsFloat32Number Hint[],
+                                              const cmsPipeline* lut)
+{
+    cmsUInt32Number  i, j;
+    cmsFloat64Number  error, LastError = 1E20;
+    cmsFloat32Number  fx[4], x[4], xd[4], fxd[4];
+    cmsVEC3 tmp, tmp2;
+    cmsMAT3 Jacobian;
+    
+    // Only 3->3 and 4->3 are supported
+    if (lut ->InputChannels != 3 && lut ->InputChannels != 4) return FALSE;
+    if (lut ->OutputChannels != 3) return FALSE;
+   
+    // Take the hint as starting point if specified
+    if (Hint == NULL) {
+
+        // Begin at any point, we choose 1/3 of CMY axis
+        x[0] = x[1] = x[2] = 0.3f;
+    }
+    else {
+
+        // Only copy 3 channels from hint...
+        for (j=0; j < 3; j++)
+            x[j] = Hint[j];
+    }
+
+    // If Lut is 4-dimensions, then grab target[3], which is fixed
+    if (lut ->InputChannels == 4) {
+        x[3] = Target[3];
+    }
+    else x[3] = 0; // To keep lint happy
+
+
+    // Iterate
+    for (i = 0; i < INVERSION_MAX_ITERATIONS; i++) {
+
+        // Get beginning fx
+        cmsPipelineEvalFloat(x, fx, lut);
+
+        // Compute error
+        error = EuclideanDistance(fx, Target, 3);
+
+        // If not convergent, return last safe value
+        if (error >= LastError)
+            break;
+
+        // Keep latest values
+        LastError     = error;
+        for (j=0; j < lut ->InputChannels; j++)
+                Result[j] = x[j];
+
+        // Found an exact match?
+        if (error <= 0)
+            break;
+
+        // Obtain slope (the Jacobian)
+        for (j = 0; j < 3; j++) {
+
+            xd[0] = x[0];
+            xd[1] = x[1];
+            xd[2] = x[2];
+            xd[3] = x[3];  // Keep fixed channel
+
+            IncDelta(&xd[j]);
+
+            cmsPipelineEvalFloat(xd, fxd, lut);
+
+            Jacobian.v[0].n[j] = ((fxd[0] - fx[0]) / JACOBIAN_EPSILON);
+            Jacobian.v[1].n[j] = ((fxd[1] - fx[1]) / JACOBIAN_EPSILON);
+            Jacobian.v[2].n[j] = ((fxd[2] - fx[2]) / JACOBIAN_EPSILON);
+        }
+
+        // Solve system
+        tmp2.n[0] = fx[0] - Target[0];
+        tmp2.n[1] = fx[1] - Target[1];
+        tmp2.n[2] = fx[2] - Target[2];
+
+        if (!_cmsMAT3solve(&tmp, &Jacobian, &tmp2))
+            return FALSE;
+
+        // Move our guess
+        x[0] -= (cmsFloat32Number) tmp.n[0];
+        x[1] -= (cmsFloat32Number) tmp.n[1];
+        x[2] -= (cmsFloat32Number) tmp.n[2];
+
+        // Some clipping....
+        for (j=0; j < 3; j++) {
+            if (x[j] < 0) x[j] = 0;
+            else
+                if (x[j] > 1.0) x[j] = 1.0;
+        }
+    }
+
+    return TRUE;
+}
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsmtrx.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsmtrx.cpp
new file mode 100755
index 0000000000..a83d39ddb6
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsmtrx.cpp
@@ -0,0 +1,176 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+#define DSWAP(x, y)     {cmsFloat64Number tmp = (x); (x)=(y); (y)=tmp;}
+
+
+// Initiate a vector
+void CMSEXPORT _cmsVEC3init(cmsVEC3* r, cmsFloat64Number x, cmsFloat64Number y, cmsFloat64Number z)
+{
+    r -> n[VX] = x;
+    r -> n[VY] = y;
+    r -> n[VZ] = z;
+}
+
+// Vector subtraction
+void CMSEXPORT _cmsVEC3minus(cmsVEC3* r, const cmsVEC3* a, const cmsVEC3* b)
+{
+  r -> n[VX] = a -> n[VX] - b -> n[VX];
+  r -> n[VY] = a -> n[VY] - b -> n[VY];
+  r -> n[VZ] = a -> n[VZ] - b -> n[VZ];
+}
+
+// Vector cross product
+void CMSEXPORT _cmsVEC3cross(cmsVEC3* r, const cmsVEC3* u, const cmsVEC3* v)
+{
+    r ->n[VX] = u->n[VY] * v->n[VZ] - v->n[VY] * u->n[VZ];
+    r ->n[VY] = u->n[VZ] * v->n[VX] - v->n[VZ] * u->n[VX];
+    r ->n[VZ] = u->n[VX] * v->n[VY] - v->n[VX] * u->n[VY];
+}
+
+// Vector dot product
+cmsFloat64Number CMSEXPORT _cmsVEC3dot(const cmsVEC3* u, const cmsVEC3* v)
+{
+    return u->n[VX] * v->n[VX] + u->n[VY] * v->n[VY] + u->n[VZ] * v->n[VZ];
+}
+
+// Euclidean length
+cmsFloat64Number CMSEXPORT _cmsVEC3length(const cmsVEC3* a)
+{
+    return sqrt(a ->n[VX] * a ->n[VX] +
+                a ->n[VY] * a ->n[VY] +
+                a ->n[VZ] * a ->n[VZ]);
+}
+
+// Euclidean distance
+cmsFloat64Number CMSEXPORT _cmsVEC3distance(const cmsVEC3* a, const cmsVEC3* b)
+{
+    cmsFloat64Number d1 = a ->n[VX] - b ->n[VX];
+    cmsFloat64Number d2 = a ->n[VY] - b ->n[VY];
+    cmsFloat64Number d3 = a ->n[VZ] - b ->n[VZ];
+
+    return sqrt(d1*d1 + d2*d2 + d3*d3);
+}
+
+
+
+// 3x3 Identity
+void CMSEXPORT _cmsMAT3identity(cmsMAT3* a)
+{
+    _cmsVEC3init(&a-> v[0], 1.0, 0.0, 0.0);
+    _cmsVEC3init(&a-> v[1], 0.0, 1.0, 0.0);
+    _cmsVEC3init(&a-> v[2], 0.0, 0.0, 1.0);
+}
+
+static
+cmsBool CloseEnough(cmsFloat64Number a, cmsFloat64Number b)
+{
+    return fabs(b - a) < (1.0 / 65535.0);
+}
+
+
+cmsBool CMSEXPORT _cmsMAT3isIdentity(const cmsMAT3* a)
+{
+    cmsMAT3 Identity;
+    int i, j;
+
+    _cmsMAT3identity(&Identity);
+
+    for (i=0; i < 3; i++)
+        for (j=0; j < 3; j++)
+            if (!CloseEnough(a ->v[i].n[j], Identity.v[i].n[j])) return FALSE;
+
+    return TRUE;
+}
+
+
+// Multiply two matrices
+void CMSEXPORT _cmsMAT3per(cmsMAT3* r, const cmsMAT3* a, const cmsMAT3* b)
+{
+#define ROWCOL(i, j) \
+    a->v[i].n[0]*b->v[0].n[j] + a->v[i].n[1]*b->v[1].n[j] + a->v[i].n[2]*b->v[2].n[j]
+
+    _cmsVEC3init(&r-> v[0], ROWCOL(0,0), ROWCOL(0,1), ROWCOL(0,2));
+    _cmsVEC3init(&r-> v[1], ROWCOL(1,0), ROWCOL(1,1), ROWCOL(1,2));
+    _cmsVEC3init(&r-> v[2], ROWCOL(2,0), ROWCOL(2,1), ROWCOL(2,2));
+
+#undef ROWCOL //(i, j)
+}
+
+
+
+// Inverse of a matrix b = a^(-1)
+cmsBool  CMSEXPORT _cmsMAT3inverse(const cmsMAT3* a, cmsMAT3* b)
+{
+   cmsFloat64Number det, c0, c1, c2;
+
+   c0 =  a -> v[1].n[1]*a -> v[2].n[2] - a -> v[1].n[2]*a -> v[2].n[1];
+   c1 = -a -> v[1].n[0]*a -> v[2].n[2] + a -> v[1].n[2]*a -> v[2].n[0];
+   c2 =  a -> v[1].n[0]*a -> v[2].n[1] - a -> v[1].n[1]*a -> v[2].n[0];
+
+   det = a -> v[0].n[0]*c0 + a -> v[0].n[1]*c1 + a -> v[0].n[2]*c2;
+
+   if (fabs(det) < MATRIX_DET_TOLERANCE) return FALSE;  // singular matrix; can't invert
+
+   b -> v[0].n[0] = c0/det;
+   b -> v[0].n[1] = (a -> v[0].n[2]*a -> v[2].n[1] - a -> v[0].n[1]*a -> v[2].n[2])/det;
+   b -> v[0].n[2] = (a -> v[0].n[1]*a -> v[1].n[2] - a -> v[0].n[2]*a -> v[1].n[1])/det;
+   b -> v[1].n[0] = c1/det;
+   b -> v[1].n[1] = (a -> v[0].n[0]*a -> v[2].n[2] - a -> v[0].n[2]*a -> v[2].n[0])/det;
+   b -> v[1].n[2] = (a -> v[0].n[2]*a -> v[1].n[0] - a -> v[0].n[0]*a -> v[1].n[2])/det;
+   b -> v[2].n[0] = c2/det;
+   b -> v[2].n[1] = (a -> v[0].n[1]*a -> v[2].n[0] - a -> v[0].n[0]*a -> v[2].n[1])/det;
+   b -> v[2].n[2] = (a -> v[0].n[0]*a -> v[1].n[1] - a -> v[0].n[1]*a -> v[1].n[0])/det;
+
+   return TRUE;
+}
+
+
+// Solve a system in the form Ax = b
+cmsBool  CMSEXPORT _cmsMAT3solve(cmsVEC3* x, cmsMAT3* a, cmsVEC3* b)
+{
+    cmsMAT3 m, a_1;
+
+    memmove(&m, a, sizeof(cmsMAT3));
+
+    if (!_cmsMAT3inverse(&m, &a_1)) return FALSE;  // Singular matrix
+
+    _cmsMAT3eval(x, &a_1, b);
+    return TRUE;
+}
+
+// Evaluate a vector across a matrix
+void CMSEXPORT _cmsMAT3eval(cmsVEC3* r, const cmsMAT3* a, const cmsVEC3* v)
+{
+    r->n[VX] = a->v[0].n[VX]*v->n[VX] + a->v[0].n[VY]*v->n[VY] + a->v[0].n[VZ]*v->n[VZ];
+    r->n[VY] = a->v[1].n[VX]*v->n[VX] + a->v[1].n[VY]*v->n[VY] + a->v[1].n[VZ]*v->n[VZ];
+    r->n[VZ] = a->v[2].n[VX]*v->n[VX] + a->v[2].n[VY]*v->n[VY] + a->v[2].n[VZ]*v->n[VZ];
+}
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsnamed.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsnamed.cpp
new file mode 100755
index 0000000000..42bd36530b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsnamed.cpp
@@ -0,0 +1,970 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Multilocalized unicode objects. That is an attempt to encapsulate i18n.
+
+
+// Allocates an empty multi localizad unicode object
+cmsMLU* CMSEXPORT cmsMLUalloc(cmsContext ContextID, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu;
+
+    // nItems should be positive if given
+    if (nItems <= 0) nItems = 2;
+
+    // Create the container
+    mlu = (cmsMLU*) _cmsMallocZero(ContextID, sizeof(cmsMLU));
+    if (mlu == NULL) return NULL;
+
+    mlu ->ContextID = ContextID;
+
+    // Create entry array
+    mlu ->Entries = (_cmsMLUentry*) _cmsCalloc(ContextID, nItems, sizeof(_cmsMLUentry));
+    if (mlu ->Entries == NULL) {
+        _cmsFree(ContextID, mlu);
+        return NULL;
+    }
+
+    // Ok, keep indexes up to date
+    mlu ->AllocatedEntries    = nItems;
+    mlu ->UsedEntries         = 0;
+
+    return mlu;
+}
+
+
+// Grows a mempool table for a MLU. Each time this function is called, mempool size is multiplied times two.
+static
+cmsBool GrowMLUpool(cmsMLU* mlu)
+{
+    cmsUInt32Number size;
+    void *NewPtr;
+
+    // Sanity check
+    if (mlu == NULL) return FALSE;
+
+    if (mlu ->PoolSize == 0)
+        size = 256;
+    else
+        size = mlu ->PoolSize * 2;
+
+    // Check for overflow
+    if (size < mlu ->PoolSize) return FALSE;
+
+    // Reallocate the pool
+    NewPtr = _cmsRealloc(mlu ->ContextID, mlu ->MemPool, size);
+    if (NewPtr == NULL) return FALSE;
+
+
+    mlu ->MemPool  = NewPtr;
+    mlu ->PoolSize = size;
+
+    return TRUE;
+}
+
+
+// Grows a entry table for a MLU. Each time this function is called, table size is multiplied times two.
+static
+cmsBool GrowMLUtable(cmsMLU* mlu)
+{
+    cmsUInt32Number AllocatedEntries;
+    _cmsMLUentry *NewPtr;
+
+    // Sanity check
+    if (mlu == NULL) return FALSE;
+
+    AllocatedEntries = mlu ->AllocatedEntries * 2;
+
+    // Check for overflow
+    if (AllocatedEntries / 2 != mlu ->AllocatedEntries) return FALSE;
+
+    // Reallocate the memory
+    NewPtr = (_cmsMLUentry*)_cmsRealloc(mlu ->ContextID, mlu ->Entries, AllocatedEntries*sizeof(_cmsMLUentry));
+    if (NewPtr == NULL) return FALSE;
+
+    mlu ->Entries          = NewPtr;
+    mlu ->AllocatedEntries = AllocatedEntries;
+
+    return TRUE;
+}
+
+
+// Search for a specific entry in the structure. Language and Country are used.
+static
+int SearchMLUEntry(cmsMLU* mlu, cmsUInt16Number LanguageCode, cmsUInt16Number CountryCode)
+{
+    cmsUInt32Number i;
+
+    // Sanity check
+    if (mlu == NULL) return -1;
+
+    // Iterate whole table
+    for (i=0; i < mlu ->UsedEntries; i++) {
+
+        if (mlu ->Entries[i].Country  == CountryCode &&
+            mlu ->Entries[i].Language == LanguageCode) return (int) i;
+    }
+
+    // Not found
+    return -1;
+}
+
+// Add a block of characters to the intended MLU. Language and country are specified.
+// Only one entry for Language/country pair is allowed.
+static
+cmsBool AddMLUBlock(cmsMLU* mlu, cmsUInt32Number size, const wchar_t *Block,
+                     cmsUInt16Number LanguageCode, cmsUInt16Number CountryCode)
+{
+    cmsUInt32Number Offset;
+    cmsUInt8Number* Ptr;
+
+    // Sanity check
+    if (mlu == NULL) return FALSE;
+
+    // Is there any room available?
+    if (mlu ->UsedEntries >= mlu ->AllocatedEntries) {
+        if (!GrowMLUtable(mlu)) return FALSE;
+    }
+
+    // Only one ASCII string
+    if (SearchMLUEntry(mlu, LanguageCode, CountryCode) >= 0) return FALSE;  // Only one  is allowed!
+
+    // Check for size
+    while ((mlu ->PoolSize - mlu ->PoolUsed) < size) {
+
+            if (!GrowMLUpool(mlu)) return FALSE;
+    }
+
+    Offset = mlu ->PoolUsed;
+
+    Ptr = (cmsUInt8Number*) mlu ->MemPool;
+    if (Ptr == NULL) return FALSE;
+
+    // Set the entry
+    memmove(Ptr + Offset, Block, size);
+    mlu ->PoolUsed += size;
+
+    mlu ->Entries[mlu ->UsedEntries].StrW     = Offset;
+    mlu ->Entries[mlu ->UsedEntries].Len      = size;
+    mlu ->Entries[mlu ->UsedEntries].Country  = CountryCode;
+    mlu ->Entries[mlu ->UsedEntries].Language = LanguageCode;
+    mlu ->UsedEntries++;
+
+    return TRUE;
+}
+
+// Convert from a 3-char code to a cmsUInt16Number. It is done in this way because some
+// compilers don't properly align beginning of strings
+
+static
+cmsUInt16Number strTo16(const char str[3])
+{
+    const cmsUInt8Number* ptr8 = (const cmsUInt8Number*)str;
+    cmsUInt16Number n = (cmsUInt16Number) (((cmsUInt16Number) ptr8[1] << 8) | ptr8[0]);
+
+    return _cmsAdjustEndianess16(n);
+}
+
+static
+void strFrom16(char str[3], cmsUInt16Number n)
+{
+    // Assuming this would be aligned
+    union {
+
+       cmsUInt16Number n;
+       cmsUInt8Number str[2];
+       
+    } c;
+
+    c.n = _cmsAdjustEndianess16(n);  
+
+    str[0] = (char) c.str[0]; str[1] = (char) c.str[1]; str[2] = (char) 0;
+
+}
+
+// Add an ASCII entry. Do not add any \0 termination (ICC1v43_2010-12.pdf page 61)
+cmsBool CMSEXPORT cmsMLUsetASCII(cmsMLU* mlu, const char LanguageCode[3], const char CountryCode[3], const char* ASCIIString)
+{
+    cmsUInt32Number i, len = (cmsUInt32Number) strlen(ASCIIString);
+    wchar_t* WStr;
+    cmsBool  rc;
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+
+    if (mlu == NULL) return FALSE;
+
+    WStr = (wchar_t*) _cmsCalloc(mlu ->ContextID, len,  sizeof(wchar_t));
+    if (WStr == NULL) return FALSE;
+
+    for (i=0; i < len; i++)
+        WStr[i] = (wchar_t) ASCIIString[i];
+
+    rc = AddMLUBlock(mlu, len  * sizeof(wchar_t), WStr, Lang, Cntry);
+
+    _cmsFree(mlu ->ContextID, WStr);
+    return rc;
+
+}
+
+// We don't need any wcs support library
+static
+cmsUInt32Number mywcslen(const wchar_t *s)
+{
+    const wchar_t *p;
+
+    p = s;
+    while (*p)
+        p++;
+
+    return (cmsUInt32Number)(p - s);
+}
+
+// Add a wide entry. Do not add any \0 terminator (ICC1v43_2010-12.pdf page 61)
+cmsBool  CMSEXPORT cmsMLUsetWide(cmsMLU* mlu, const char Language[3], const char Country[3], const wchar_t* WideString)
+{
+    cmsUInt16Number Lang  = strTo16(Language);
+    cmsUInt16Number Cntry = strTo16(Country);
+    cmsUInt32Number len;
+
+    if (mlu == NULL) return FALSE;
+    if (WideString == NULL) return FALSE;
+
+    len = (cmsUInt32Number) (mywcslen(WideString)) * sizeof(wchar_t);
+    return AddMLUBlock(mlu, len, WideString, Lang, Cntry);
+}
+
+// Duplicating a MLU is as easy as copying all members
+cmsMLU* CMSEXPORT cmsMLUdup(const cmsMLU* mlu)
+{
+    cmsMLU* NewMlu = NULL;
+
+    // Duplicating a NULL obtains a NULL
+    if (mlu == NULL) return NULL;
+
+    NewMlu = cmsMLUalloc(mlu ->ContextID, mlu ->UsedEntries);
+    if (NewMlu == NULL) return NULL;
+
+    // Should never happen
+    if (NewMlu ->AllocatedEntries < mlu ->UsedEntries)
+        goto Error;
+
+    // Sanitize...
+    if (NewMlu ->Entries == NULL || mlu ->Entries == NULL)  goto Error;
+
+    memmove(NewMlu ->Entries, mlu ->Entries, mlu ->UsedEntries * sizeof(_cmsMLUentry));
+    NewMlu ->UsedEntries = mlu ->UsedEntries;
+
+    // The MLU may be empty
+    if (mlu ->PoolUsed == 0) {
+        NewMlu ->MemPool = NULL;
+    }
+    else {
+        // It is not empty
+        NewMlu ->MemPool = _cmsMalloc(mlu ->ContextID, mlu ->PoolUsed);
+        if (NewMlu ->MemPool == NULL) goto Error;
+    }
+
+    NewMlu ->PoolSize = mlu ->PoolUsed;
+
+    if (NewMlu ->MemPool == NULL || mlu ->MemPool == NULL) goto Error;
+
+    memmove(NewMlu ->MemPool, mlu->MemPool, mlu ->PoolUsed);
+    NewMlu ->PoolUsed = mlu ->PoolUsed;
+
+    return NewMlu;
+
+Error:
+
+    if (NewMlu != NULL) cmsMLUfree(NewMlu);
+    return NULL;
+}
+
+// Free any used memory
+void CMSEXPORT cmsMLUfree(cmsMLU* mlu)
+{
+    if (mlu) {
+
+        if (mlu -> Entries) _cmsFree(mlu ->ContextID, mlu->Entries);
+        if (mlu -> MemPool) _cmsFree(mlu ->ContextID, mlu->MemPool);
+
+        _cmsFree(mlu ->ContextID, mlu);
+    }
+}
+
+
+// The algorithm first searches for an exact match of country and language, if not found it uses
+// the Language. If none is found, first entry is used instead.
+static
+const wchar_t* _cmsMLUgetWide(const cmsMLU* mlu,
+                              cmsUInt32Number *len,
+                              cmsUInt16Number LanguageCode, cmsUInt16Number CountryCode,
+                              cmsUInt16Number* UsedLanguageCode, cmsUInt16Number* UsedCountryCode)
+{
+    cmsUInt32Number i;
+    int Best = -1;
+    _cmsMLUentry* v;
+
+    if (mlu == NULL) return NULL;
+
+    if (mlu -> AllocatedEntries <= 0) return NULL;
+
+    for (i=0; i < mlu ->UsedEntries; i++) {
+
+        v = mlu ->Entries + i;
+
+        if (v -> Language == LanguageCode) {
+
+            if (Best == -1) Best = (int) i;
+
+            if (v -> Country == CountryCode) {
+
+                if (UsedLanguageCode != NULL) *UsedLanguageCode = v ->Language;
+                if (UsedCountryCode  != NULL) *UsedCountryCode = v ->Country;
+
+                if (len != NULL) *len = v ->Len;
+
+                return (wchar_t*) ((cmsUInt8Number*) mlu ->MemPool + v -> StrW);        // Found exact match
+            }
+        }
+    }
+
+    // No string found. Return First one
+    if (Best == -1)
+        Best = 0;
+
+    v = mlu ->Entries + Best;
+
+    if (UsedLanguageCode != NULL) *UsedLanguageCode = v ->Language;
+    if (UsedCountryCode  != NULL) *UsedCountryCode = v ->Country;
+
+    if (len != NULL) *len   = v ->Len;
+
+    return(wchar_t*) ((cmsUInt8Number*) mlu ->MemPool + v ->StrW);
+}
+
+
+// Obtain an ASCII representation of the wide string. Setting buffer to NULL returns the len
+cmsUInt32Number CMSEXPORT cmsMLUgetASCII(const cmsMLU* mlu,
+                                       const char LanguageCode[3], const char CountryCode[3],
+                                       char* Buffer, cmsUInt32Number BufferSize)
+{
+    const wchar_t *Wide;
+    cmsUInt32Number  StrLen = 0;
+    cmsUInt32Number ASCIIlen, i;
+
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+
+    // Sanitize
+    if (mlu == NULL) return 0;
+
+    // Get WideChar
+    Wide = _cmsMLUgetWide(mlu, &StrLen, Lang, Cntry, NULL, NULL);
+    if (Wide == NULL) return 0;
+
+    ASCIIlen = StrLen / sizeof(wchar_t);
+
+    // Maybe we want only to know the len?
+    if (Buffer == NULL) return ASCIIlen + 1; // Note the zero at the end
+
+    // No buffer size means no data
+    if (BufferSize <= 0) return 0;
+
+    // Some clipping may be required
+    if (BufferSize < ASCIIlen + 1)
+        ASCIIlen = BufferSize - 1;
+
+    // Precess each character
+    for (i=0; i < ASCIIlen; i++) {
+
+        if (Wide[i] == 0)
+            Buffer[i] = 0;
+        else
+            Buffer[i] = (char) Wide[i];
+    }
+
+    // We put a termination "\0"
+    Buffer[ASCIIlen] = 0;
+    return ASCIIlen + 1;
+}
+
+// Obtain a wide representation of the MLU, on depending on current locale settings
+cmsUInt32Number CMSEXPORT cmsMLUgetWide(const cmsMLU* mlu,
+                                      const char LanguageCode[3], const char CountryCode[3],
+                                      wchar_t* Buffer, cmsUInt32Number BufferSize)
+{
+    const wchar_t *Wide;
+    cmsUInt32Number  StrLen = 0;
+
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+
+    // Sanitize
+    if (mlu == NULL) return 0;
+
+    Wide = _cmsMLUgetWide(mlu, &StrLen, Lang, Cntry, NULL, NULL);
+    if (Wide == NULL) return 0;
+
+    // Maybe we want only to know the len?
+    if (Buffer == NULL) return StrLen + sizeof(wchar_t);
+
+  // No buffer size means no data
+    if (BufferSize <= 0) return 0;
+
+    // Some clipping may be required
+    if (BufferSize < StrLen + sizeof(wchar_t))
+        StrLen = BufferSize - + sizeof(wchar_t);
+
+    memmove(Buffer, Wide, StrLen);
+    Buffer[StrLen / sizeof(wchar_t)] = 0;
+
+    return StrLen + sizeof(wchar_t);
+}
+
+
+// Get also the language and country
+CMSAPI cmsBool CMSEXPORT cmsMLUgetTranslation(const cmsMLU* mlu,
+                                              const char LanguageCode[3], const char CountryCode[3],
+                                              char ObtainedLanguage[3], char ObtainedCountry[3])
+{
+    const wchar_t *Wide;
+
+    cmsUInt16Number Lang  = strTo16(LanguageCode);
+    cmsUInt16Number Cntry = strTo16(CountryCode);
+    cmsUInt16Number ObtLang, ObtCode;
+
+    // Sanitize
+    if (mlu == NULL) return FALSE;
+
+    Wide = _cmsMLUgetWide(mlu, NULL, Lang, Cntry, &ObtLang, &ObtCode);
+    if (Wide == NULL) return FALSE;
+
+    // Get used language and code
+    strFrom16(ObtainedLanguage, ObtLang);
+    strFrom16(ObtainedCountry, ObtCode);
+
+    return TRUE;
+}
+
+
+
+// Get the number of translations in the MLU object
+cmsUInt32Number CMSEXPORT cmsMLUtranslationsCount(const cmsMLU* mlu)
+{
+    if (mlu == NULL) return 0;
+    return mlu->UsedEntries;
+}
+
+// Get the language and country codes for a specific MLU index
+cmsBool CMSEXPORT cmsMLUtranslationsCodes(const cmsMLU* mlu,
+                                          cmsUInt32Number idx,
+                                          char LanguageCode[3],
+                                          char CountryCode[3])
+{
+    _cmsMLUentry *entry;
+
+    if (mlu == NULL) return FALSE;
+
+    if (idx >= mlu->UsedEntries) return FALSE;
+
+    entry = &mlu->Entries[idx];
+    
+    strFrom16(LanguageCode, entry->Language);
+    strFrom16(CountryCode, entry->Country);
+
+    return TRUE;
+}
+
+
+// Named color lists --------------------------------------------------------------------------------------------
+
+// Grow the list to keep at least NumElements
+static
+cmsBool  GrowNamedColorList(cmsNAMEDCOLORLIST* v)
+{
+    cmsUInt32Number size;
+    _cmsNAMEDCOLOR * NewPtr;
+
+    if (v == NULL) return FALSE;
+
+    if (v ->Allocated == 0)
+        size = 64;   // Initial guess
+    else
+        size = v ->Allocated * 2;
+
+    // Keep a maximum color lists can grow, 100K entries seems reasonable
+    if (size > 1024 * 100) {
+        _cmsFree(v->ContextID, (void*) v->List);
+        v->List = NULL;
+        return FALSE;
+    }
+
+    NewPtr = (_cmsNAMEDCOLOR*) _cmsRealloc(v ->ContextID, v ->List, size * sizeof(_cmsNAMEDCOLOR));
+    if (NewPtr == NULL)
+        return FALSE;
+
+    v ->List      = NewPtr;
+    v ->Allocated = size;
+    return TRUE;
+}
+
+// Allocate a list for n elements
+cmsNAMEDCOLORLIST* CMSEXPORT cmsAllocNamedColorList(cmsContext ContextID, cmsUInt32Number n, cmsUInt32Number ColorantCount, const char* Prefix, const char* Suffix)
+{
+    cmsNAMEDCOLORLIST* v = (cmsNAMEDCOLORLIST*) _cmsMallocZero(ContextID, sizeof(cmsNAMEDCOLORLIST));
+
+    if (v == NULL) return NULL;
+
+    v ->List      = NULL;
+    v ->nColors   = 0;
+    v ->ContextID  = ContextID;
+
+    while (v -> Allocated < n) {
+        if (!GrowNamedColorList(v)) {
+            _cmsFree(ContextID, (void*) v);
+            return NULL;
+        }
+    }
+
+    strncpy(v ->Prefix, Prefix, sizeof(v ->Prefix)-1);
+    strncpy(v ->Suffix, Suffix, sizeof(v ->Suffix)-1);
+    v->Prefix[32] = v->Suffix[32] = 0;
+
+    v -> ColorantCount = ColorantCount;
+
+    return v;
+}
+
+// Free a list
+void CMSEXPORT cmsFreeNamedColorList(cmsNAMEDCOLORLIST* v)
+{
+    if (v == NULL) return;
+    if (v ->List) _cmsFree(v ->ContextID, v ->List);
+    _cmsFree(v ->ContextID, v);
+}
+
+cmsNAMEDCOLORLIST* CMSEXPORT cmsDupNamedColorList(const cmsNAMEDCOLORLIST* v)
+{
+    cmsNAMEDCOLORLIST* NewNC;
+
+    if (v == NULL) return NULL;
+
+    NewNC= cmsAllocNamedColorList(v ->ContextID, v -> nColors, v ->ColorantCount, v ->Prefix, v ->Suffix);
+    if (NewNC == NULL) return NULL;
+
+    // For really large tables we need this
+    while (NewNC ->Allocated < v ->Allocated){
+        if (!GrowNamedColorList(NewNC)) return NULL;
+    }
+
+    memmove(NewNC ->Prefix, v ->Prefix, sizeof(v ->Prefix));
+    memmove(NewNC ->Suffix, v ->Suffix, sizeof(v ->Suffix));
+    NewNC ->ColorantCount = v ->ColorantCount;
+    memmove(NewNC->List, v ->List, v->nColors * sizeof(_cmsNAMEDCOLOR));
+    NewNC ->nColors = v ->nColors;
+    return NewNC;
+}
+
+
+// Append a color to a list. List pointer may change if reallocated
+cmsBool  CMSEXPORT cmsAppendNamedColor(cmsNAMEDCOLORLIST* NamedColorList,
+                                       const char* Name,
+                                       cmsUInt16Number PCS[3], cmsUInt16Number Colorant[cmsMAXCHANNELS])
+{
+    cmsUInt32Number i;
+
+    if (NamedColorList == NULL) return FALSE;
+
+    if (NamedColorList ->nColors + 1 > NamedColorList ->Allocated) {
+        if (!GrowNamedColorList(NamedColorList)) return FALSE;
+    }
+
+    for (i=0; i < NamedColorList ->ColorantCount; i++)
+        NamedColorList ->List[NamedColorList ->nColors].DeviceColorant[i] = Colorant == NULL ? (cmsUInt16Number)0 : Colorant[i];
+
+    for (i=0; i < 3; i++)
+        NamedColorList ->List[NamedColorList ->nColors].PCS[i] = PCS == NULL ? (cmsUInt16Number) 0 : PCS[i];
+
+    if (Name != NULL) {
+
+        strncpy(NamedColorList ->List[NamedColorList ->nColors].Name, Name, cmsMAX_PATH-1);
+        NamedColorList ->List[NamedColorList ->nColors].Name[cmsMAX_PATH-1] = 0;
+
+    }
+    else
+        NamedColorList ->List[NamedColorList ->nColors].Name[0] = 0;
+
+
+    NamedColorList ->nColors++;
+    return TRUE;
+}
+
+// Returns number of elements
+cmsUInt32Number CMSEXPORT cmsNamedColorCount(const cmsNAMEDCOLORLIST* NamedColorList)
+{
+     if (NamedColorList == NULL) return 0;
+     return NamedColorList ->nColors;
+}
+
+// Info aboout a given color
+cmsBool  CMSEXPORT cmsNamedColorInfo(const cmsNAMEDCOLORLIST* NamedColorList, cmsUInt32Number nColor,
+                                     char* Name,
+                                     char* Prefix,
+                                     char* Suffix,
+                                     cmsUInt16Number* PCS,
+                                     cmsUInt16Number* Colorant)
+{
+    if (NamedColorList == NULL) return FALSE;
+
+    if (nColor >= cmsNamedColorCount(NamedColorList)) return FALSE;
+
+    // strcpy instead of strncpy because many apps are using small buffers
+    if (Name) strcpy(Name, NamedColorList->List[nColor].Name);
+    if (Prefix) strcpy(Prefix, NamedColorList->Prefix);
+    if (Suffix) strcpy(Suffix, NamedColorList->Suffix);
+    if (PCS)
+        memmove(PCS, NamedColorList ->List[nColor].PCS, 3*sizeof(cmsUInt16Number));
+
+    if (Colorant)
+        memmove(Colorant, NamedColorList ->List[nColor].DeviceColorant,
+                                sizeof(cmsUInt16Number) * NamedColorList ->ColorantCount);
+
+
+    return TRUE;
+}
+
+// Search for a given color name (no prefix or suffix)
+cmsInt32Number CMSEXPORT cmsNamedColorIndex(const cmsNAMEDCOLORLIST* NamedColorList, const char* Name)
+{
+    cmsUInt32Number i;
+    cmsUInt32Number n;
+
+    if (NamedColorList == NULL) return -1;
+    n = cmsNamedColorCount(NamedColorList);
+    for (i=0; i < n; i++) {
+        if (cmsstrcasecmp(Name,  NamedColorList->List[i].Name) == 0)
+            return (cmsInt32Number) i;
+    }
+
+    return -1;
+}
+
+// MPE support -----------------------------------------------------------------------------------------------------------------
+
+static
+void FreeNamedColorList(cmsStage* mpe)
+{
+    cmsNAMEDCOLORLIST* List = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    cmsFreeNamedColorList(List);
+}
+
+static
+void* DupNamedColorList(cmsStage* mpe)
+{
+    cmsNAMEDCOLORLIST* List = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    return cmsDupNamedColorList(List);
+}
+
+static
+void EvalNamedColorPCS(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    cmsUInt16Number index = (cmsUInt16Number) _cmsQuickSaturateWord(In[0] * 65535.0);
+
+    if (index >= NamedColorList-> nColors) {
+        cmsSignalError(NamedColorList ->ContextID, cmsERROR_RANGE, "Color %d out of range", index);
+        Out[0] = Out[1] = Out[2] = 0.0f;
+    }
+    else {
+
+            // Named color always uses Lab
+            Out[0] = (cmsFloat32Number) (NamedColorList->List[index].PCS[0] / 65535.0);
+            Out[1] = (cmsFloat32Number) (NamedColorList->List[index].PCS[1] / 65535.0);
+            Out[2] = (cmsFloat32Number) (NamedColorList->List[index].PCS[2] / 65535.0);
+    }
+}
+
+static
+void EvalNamedColor(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) mpe ->Data;
+    cmsUInt16Number index = (cmsUInt16Number) _cmsQuickSaturateWord(In[0] * 65535.0);
+    cmsUInt32Number j;
+
+    if (index >= NamedColorList-> nColors) {
+        cmsSignalError(NamedColorList ->ContextID, cmsERROR_RANGE, "Color %d out of range", index);
+        for (j = 0; j < NamedColorList->ColorantCount; j++)
+            Out[j] = 0.0f;
+
+    }
+    else {
+        for (j=0; j < NamedColorList ->ColorantCount; j++)
+            Out[j] = (cmsFloat32Number) (NamedColorList->List[index].DeviceColorant[j] / 65535.0);
+    }
+}
+
+
+// Named color lookup element
+cmsStage* _cmsStageAllocNamedColor(cmsNAMEDCOLORLIST* NamedColorList, cmsBool UsePCS)
+{
+    return _cmsStageAllocPlaceholder(NamedColorList ->ContextID,
+                                   cmsSigNamedColorElemType,
+                                   1, UsePCS ? 3 : NamedColorList ->ColorantCount,
+                                   UsePCS ? EvalNamedColorPCS : EvalNamedColor,
+                                   DupNamedColorList,
+                                   FreeNamedColorList,
+                                   cmsDupNamedColorList(NamedColorList));
+
+}
+
+
+// Retrieve the named color list from a transform. Should be first element in the LUT
+cmsNAMEDCOLORLIST* CMSEXPORT cmsGetNamedColorList(cmsHTRANSFORM xform)
+{
+    _cmsTRANSFORM* v = (_cmsTRANSFORM*) xform;
+    cmsStage* mpe  = v ->Lut->Elements;
+
+    if (mpe ->Type != cmsSigNamedColorElemType) return NULL;
+    return (cmsNAMEDCOLORLIST*) mpe ->Data;
+}
+
+
+// Profile sequence description routines -------------------------------------------------------------------------------------
+
+cmsSEQ* CMSEXPORT cmsAllocProfileSequenceDescription(cmsContext ContextID, cmsUInt32Number n)
+{
+    cmsSEQ* Seq;
+    cmsUInt32Number i;
+
+    if (n == 0) return NULL;
+
+    // In a absolutely arbitrary way, I hereby decide to allow a maxim of 255 profiles linked
+    // in a devicelink. It makes not sense anyway and may be used for exploits, so let's close the door!
+    if (n > 255) return NULL;
+
+    Seq = (cmsSEQ*) _cmsMallocZero(ContextID, sizeof(cmsSEQ));
+    if (Seq == NULL) return NULL;
+
+    Seq -> ContextID = ContextID;
+    Seq -> seq      = (cmsPSEQDESC*) _cmsCalloc(ContextID, n, sizeof(cmsPSEQDESC));
+    Seq -> n        = n;
+
+    if (Seq -> seq == NULL) {
+        _cmsFree(ContextID, Seq);
+        return NULL;
+    }
+
+    for (i=0; i < n; i++) {
+        Seq -> seq[i].Manufacturer = NULL;
+        Seq -> seq[i].Model        = NULL;
+        Seq -> seq[i].Description  = NULL;
+    }
+
+    return Seq;
+}
+
+void CMSEXPORT cmsFreeProfileSequenceDescription(cmsSEQ* pseq)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < pseq ->n; i++) {
+        if (pseq ->seq[i].Manufacturer != NULL) cmsMLUfree(pseq ->seq[i].Manufacturer);
+        if (pseq ->seq[i].Model != NULL) cmsMLUfree(pseq ->seq[i].Model);
+        if (pseq ->seq[i].Description != NULL) cmsMLUfree(pseq ->seq[i].Description);
+    }
+
+    if (pseq ->seq != NULL) _cmsFree(pseq ->ContextID, pseq ->seq);
+    _cmsFree(pseq -> ContextID, pseq);
+}
+
+cmsSEQ* CMSEXPORT cmsDupProfileSequenceDescription(const cmsSEQ* pseq)
+{
+    cmsSEQ *NewSeq;
+    cmsUInt32Number i;
+
+    if (pseq == NULL)
+        return NULL;
+
+    NewSeq = (cmsSEQ*) _cmsMalloc(pseq -> ContextID, sizeof(cmsSEQ));
+    if (NewSeq == NULL) return NULL;
+
+
+    NewSeq -> seq      = (cmsPSEQDESC*) _cmsCalloc(pseq ->ContextID, pseq ->n, sizeof(cmsPSEQDESC));
+    if (NewSeq ->seq == NULL) goto Error;
+
+    NewSeq -> ContextID = pseq ->ContextID;
+    NewSeq -> n        = pseq ->n;
+
+    for (i=0; i < pseq->n; i++) {
+
+        memmove(&NewSeq ->seq[i].attributes, &pseq ->seq[i].attributes, sizeof(cmsUInt64Number));
+
+        NewSeq ->seq[i].deviceMfg   = pseq ->seq[i].deviceMfg;
+        NewSeq ->seq[i].deviceModel = pseq ->seq[i].deviceModel;
+        memmove(&NewSeq ->seq[i].ProfileID, &pseq ->seq[i].ProfileID, sizeof(cmsProfileID));
+        NewSeq ->seq[i].technology  = pseq ->seq[i].technology;
+
+        NewSeq ->seq[i].Manufacturer = cmsMLUdup(pseq ->seq[i].Manufacturer);
+        NewSeq ->seq[i].Model        = cmsMLUdup(pseq ->seq[i].Model);
+        NewSeq ->seq[i].Description  = cmsMLUdup(pseq ->seq[i].Description);
+
+    }
+
+    return NewSeq;
+
+Error:
+
+    cmsFreeProfileSequenceDescription(NewSeq);
+    return NULL;
+}
+
+// Dictionaries --------------------------------------------------------------------------------------------------------
+
+// Dictionaries are just very simple linked lists
+
+
+typedef struct _cmsDICT_struct {
+    cmsDICTentry* head;
+    cmsContext ContextID;
+} _cmsDICT;
+
+
+// Allocate an empty dictionary
+cmsHANDLE CMSEXPORT cmsDictAlloc(cmsContext ContextID)
+{
+    _cmsDICT* dict = (_cmsDICT*) _cmsMallocZero(ContextID, sizeof(_cmsDICT));
+    if (dict == NULL) return NULL;
+
+    dict ->ContextID = ContextID;
+    return (cmsHANDLE) dict;
+
+}
+
+// Dispose resources
+void CMSEXPORT cmsDictFree(cmsHANDLE hDict)
+{
+    _cmsDICT* dict = (_cmsDICT*) hDict;
+    cmsDICTentry *entry, *next;
+
+    _cmsAssert(dict != NULL);
+
+    // Walk the list freeing all nodes
+    entry = dict ->head;
+    while (entry != NULL) {
+
+            if (entry ->DisplayName  != NULL) cmsMLUfree(entry ->DisplayName);
+            if (entry ->DisplayValue != NULL) cmsMLUfree(entry ->DisplayValue);
+            if (entry ->Name != NULL) _cmsFree(dict ->ContextID, entry -> Name);
+            if (entry ->Value != NULL) _cmsFree(dict ->ContextID, entry -> Value);
+
+            // Don't fall in the habitual trap...
+            next = entry ->Next;
+            _cmsFree(dict ->ContextID, entry);
+
+            entry = next;
+    }
+
+    _cmsFree(dict ->ContextID, dict);
+}
+
+
+// Duplicate a wide char string
+static
+wchar_t* DupWcs(cmsContext ContextID, const wchar_t* ptr)
+{
+    if (ptr == NULL) return NULL;
+    return (wchar_t*) _cmsDupMem(ContextID, ptr, (mywcslen(ptr) + 1) * sizeof(wchar_t));
+}
+
+// Add a new entry to the linked list
+cmsBool CMSEXPORT cmsDictAddEntry(cmsHANDLE hDict, const wchar_t* Name, const wchar_t* Value, const cmsMLU *DisplayName, const cmsMLU *DisplayValue)
+{
+    _cmsDICT* dict = (_cmsDICT*) hDict;
+    cmsDICTentry *entry;
+
+    _cmsAssert(dict != NULL);
+    _cmsAssert(Name != NULL);
+
+    entry = (cmsDICTentry*) _cmsMallocZero(dict ->ContextID, sizeof(cmsDICTentry));
+    if (entry == NULL) return FALSE;
+
+    entry ->DisplayName  = cmsMLUdup(DisplayName);
+    entry ->DisplayValue = cmsMLUdup(DisplayValue);
+    entry ->Name         = DupWcs(dict ->ContextID, Name);
+    entry ->Value        = DupWcs(dict ->ContextID, Value);
+
+    entry ->Next = dict ->head;
+    dict ->head = entry;
+
+    return TRUE;
+}
+
+
+// Duplicates an existing dictionary
+cmsHANDLE CMSEXPORT cmsDictDup(cmsHANDLE hDict)
+{
+    _cmsDICT* old_dict = (_cmsDICT*) hDict;
+    cmsHANDLE hNew;
+    cmsDICTentry *entry;
+
+    _cmsAssert(old_dict != NULL);
+
+    hNew  = cmsDictAlloc(old_dict ->ContextID);
+    if (hNew == NULL) return NULL;
+
+    // Walk the list freeing all nodes
+    entry = old_dict ->head;
+    while (entry != NULL) {
+
+        if (!cmsDictAddEntry(hNew, entry ->Name, entry ->Value, entry ->DisplayName, entry ->DisplayValue)) {
+
+            cmsDictFree(hNew);
+            return NULL;
+        }
+
+        entry = entry -> Next;
+    }
+
+    return hNew;
+}
+
+// Get a pointer to the linked list
+const cmsDICTentry* CMSEXPORT cmsDictGetEntryList(cmsHANDLE hDict)
+{
+    _cmsDICT* dict = (_cmsDICT*) hDict;
+
+    if (dict == NULL) return NULL;
+    return dict ->head;
+}
+
+// Helper For external languages
+const cmsDICTentry* CMSEXPORT cmsDictNextEntry(const cmsDICTentry* e)
+{
+     if (e == NULL) return NULL;
+     return e ->Next;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsopt.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsopt.cpp
new file mode 100755
index 0000000000..f838b6eb77
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsopt.cpp
@@ -0,0 +1,1961 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+//----------------------------------------------------------------------------------
+
+// Optimization for 8 bits, Shaper-CLUT (3 inputs only)
+typedef struct {
+
+    cmsContext ContextID;
+
+    const cmsInterpParams* p;   // Tetrahedrical interpolation parameters. This is a not-owned pointer.
+
+    cmsUInt16Number rx[256], ry[256], rz[256];
+    cmsUInt32Number X0[256], Y0[256], Z0[256];  // Precomputed nodes and offsets for 8-bit input data
+
+
+} Prelin8Data;
+
+
+// Generic optimization for 16 bits Shaper-CLUT-Shaper (any inputs)
+typedef struct {
+
+    cmsContext ContextID;
+
+    // Number of channels
+    cmsUInt32Number nInputs;
+    cmsUInt32Number nOutputs;
+
+    _cmsInterpFn16 EvalCurveIn16[MAX_INPUT_DIMENSIONS];       // The maximum number of input channels is known in advance
+    cmsInterpParams*  ParamsCurveIn16[MAX_INPUT_DIMENSIONS];
+
+    _cmsInterpFn16 EvalCLUT;            // The evaluator for 3D grid
+    const cmsInterpParams* CLUTparams;  // (not-owned pointer)
+
+
+    _cmsInterpFn16* EvalCurveOut16;       // Points to an array of curve evaluators in 16 bits (not-owned pointer)
+    cmsInterpParams**  ParamsCurveOut16;  // Points to an array of references to interpolation params (not-owned pointer)
+
+
+} Prelin16Data;
+
+
+// Optimization for matrix-shaper in 8 bits. Numbers are operated in n.14 signed, tables are stored in 1.14 fixed
+
+typedef cmsInt32Number cmsS1Fixed14Number;   // Note that this may hold more than 16 bits!
+
+#define DOUBLE_TO_1FIXED14(x) ((cmsS1Fixed14Number) floor((x) * 16384.0 + 0.5))
+
+typedef struct {
+
+    cmsContext ContextID;
+
+    cmsS1Fixed14Number Shaper1R[256];  // from 0..255 to 1.14  (0.0...1.0)
+    cmsS1Fixed14Number Shaper1G[256];
+    cmsS1Fixed14Number Shaper1B[256];
+
+    cmsS1Fixed14Number Mat[3][3];     // n.14 to n.14 (needs a saturation after that)
+    cmsS1Fixed14Number Off[3];
+
+    cmsUInt16Number Shaper2R[16385];    // 1.14 to 0..255
+    cmsUInt16Number Shaper2G[16385];
+    cmsUInt16Number Shaper2B[16385];
+
+} MatShaper8Data;
+
+// Curves, optimization is shared between 8 and 16 bits
+typedef struct {
+
+    cmsContext ContextID;
+
+    cmsUInt32Number nCurves;      // Number of curves
+    cmsUInt32Number nElements;    // Elements in curves
+    cmsUInt16Number** Curves;     // Points to a dynamically  allocated array
+
+} Curves16Data;
+
+
+// Simple optimizations ----------------------------------------------------------------------------------------------------------
+
+
+// Remove an element in linked chain
+static
+void _RemoveElement(cmsStage** head)
+{
+    cmsStage* mpe = *head;
+    cmsStage* next = mpe ->Next;
+    *head = next;
+    cmsStageFree(mpe);
+}
+
+// Remove all identities in chain. Note that pt actually is a double pointer to the element that holds the pointer.
+static
+cmsBool _Remove1Op(cmsPipeline* Lut, cmsStageSignature UnaryOp)
+{
+    cmsStage** pt = &Lut ->Elements;
+    cmsBool AnyOpt = FALSE;
+
+    while (*pt != NULL) {
+
+        if ((*pt) ->Implements == UnaryOp) {
+            _RemoveElement(pt);
+            AnyOpt = TRUE;
+        }
+        else
+            pt = &((*pt) -> Next);
+    }
+
+    return AnyOpt;
+}
+
+// Same, but only if two adjacent elements are found
+static
+cmsBool _Remove2Op(cmsPipeline* Lut, cmsStageSignature Op1, cmsStageSignature Op2)
+{
+    cmsStage** pt1;
+    cmsStage** pt2;
+    cmsBool AnyOpt = FALSE;
+
+    pt1 = &Lut ->Elements;
+    if (*pt1 == NULL) return AnyOpt;
+
+    while (*pt1 != NULL) {
+
+        pt2 = &((*pt1) -> Next);
+        if (*pt2 == NULL) return AnyOpt;
+
+        if ((*pt1) ->Implements == Op1 && (*pt2) ->Implements == Op2) {
+            _RemoveElement(pt2);
+            _RemoveElement(pt1);
+            AnyOpt = TRUE;
+        }
+        else
+            pt1 = &((*pt1) -> Next);
+    }
+
+    return AnyOpt;
+}
+
+
+static
+cmsBool CloseEnoughFloat(cmsFloat64Number a, cmsFloat64Number b)
+{
+       return fabs(b - a) < 0.00001f;
+}
+
+static
+cmsBool  isFloatMatrixIdentity(const cmsMAT3* a)
+{
+       cmsMAT3 Identity;
+       int i, j;
+
+       _cmsMAT3identity(&Identity);
+
+       for (i = 0; i < 3; i++)
+              for (j = 0; j < 3; j++)
+                     if (!CloseEnoughFloat(a->v[i].n[j], Identity.v[i].n[j])) return FALSE;
+
+       return TRUE;
+}
+// if two adjacent matrices are found, multiply them. 
+static
+cmsBool _MultiplyMatrix(cmsPipeline* Lut)
+{
+       cmsStage** pt1;
+       cmsStage** pt2;
+       cmsStage*  chain;
+       cmsBool AnyOpt = FALSE;
+
+       pt1 = &Lut->Elements;
+       if (*pt1 == NULL) return AnyOpt;
+
+       while (*pt1 != NULL) {
+
+              pt2 = &((*pt1)->Next);
+              if (*pt2 == NULL) return AnyOpt;
+
+              if ((*pt1)->Implements == cmsSigMatrixElemType && (*pt2)->Implements == cmsSigMatrixElemType) {
+
+                     // Get both matrices
+                     _cmsStageMatrixData* m1 = (_cmsStageMatrixData*) cmsStageData(*pt1);
+                     _cmsStageMatrixData* m2 = (_cmsStageMatrixData*) cmsStageData(*pt2);
+                     cmsMAT3 res;
+                     
+                     // Input offset and output offset should be zero to use this optimization
+                     if (m1->Offset != NULL || m2 ->Offset != NULL || 
+                            cmsStageInputChannels(*pt1) != 3 || cmsStageOutputChannels(*pt1) != 3 ||                            
+                            cmsStageInputChannels(*pt2) != 3 || cmsStageOutputChannels(*pt2) != 3)
+                            return FALSE;
+
+                     // Multiply both matrices to get the result
+                     _cmsMAT3per(&res, (cmsMAT3*)m2->Double, (cmsMAT3*)m1->Double);
+
+                     // Get the next in chain after the matrices
+                     chain = (*pt2)->Next;
+
+                     // Remove both matrices
+                     _RemoveElement(pt2);
+                     _RemoveElement(pt1);
+
+                     // Now what if the result is a plain identity?                     
+                     if (!isFloatMatrixIdentity(&res)) {
+
+                            // We can not get rid of full matrix                            
+                            cmsStage* Multmat = cmsStageAllocMatrix(Lut->ContextID, 3, 3, (const cmsFloat64Number*) &res, NULL);
+                            if (Multmat == NULL) return FALSE;  // Should never happen
+
+                            // Recover the chain
+                            Multmat->Next = chain;
+                            *pt1 = Multmat;
+                     }
+
+                     AnyOpt = TRUE;
+              }
+              else
+                     pt1 = &((*pt1)->Next);
+       }
+
+       return AnyOpt;
+}
+
+
+// Preoptimize just gets rif of no-ops coming paired. Conversion from v2 to v4 followed
+// by a v4 to v2 and vice-versa. The elements are then discarded.
+static
+cmsBool PreOptimize(cmsPipeline* Lut)
+{
+    cmsBool AnyOpt = FALSE, Opt;
+
+    do {
+
+        Opt = FALSE;
+
+        // Remove all identities
+        Opt |= _Remove1Op(Lut, cmsSigIdentityElemType);
+
+        // Remove XYZ2Lab followed by Lab2XYZ
+        Opt |= _Remove2Op(Lut, cmsSigXYZ2LabElemType, cmsSigLab2XYZElemType);
+
+        // Remove Lab2XYZ followed by XYZ2Lab
+        Opt |= _Remove2Op(Lut, cmsSigLab2XYZElemType, cmsSigXYZ2LabElemType);
+
+        // Remove V4 to V2 followed by V2 to V4
+        Opt |= _Remove2Op(Lut, cmsSigLabV4toV2, cmsSigLabV2toV4);
+
+        // Remove V2 to V4 followed by V4 to V2
+        Opt |= _Remove2Op(Lut, cmsSigLabV2toV4, cmsSigLabV4toV2);
+
+        // Remove float pcs Lab conversions
+        Opt |= _Remove2Op(Lut, cmsSigLab2FloatPCS, cmsSigFloatPCS2Lab);
+
+        // Remove float pcs Lab conversions
+        Opt |= _Remove2Op(Lut, cmsSigXYZ2FloatPCS, cmsSigFloatPCS2XYZ);
+
+        // Simplify matrix. 
+        Opt |= _MultiplyMatrix(Lut);
+
+        if (Opt) AnyOpt = TRUE;
+
+    } while (Opt);
+
+    return AnyOpt;
+}
+
+static
+void Eval16nop1D(register const cmsUInt16Number Input[],
+                 register cmsUInt16Number Output[],
+                 register const struct _cms_interp_struc* p)
+{
+    Output[0] = Input[0];
+
+    cmsUNUSED_PARAMETER(p);
+}
+
+static
+void PrelinEval16(register const cmsUInt16Number Input[],
+                  register cmsUInt16Number Output[],
+                  register const void* D)
+{
+    Prelin16Data* p16 = (Prelin16Data*) D;
+    cmsUInt16Number  StageABC[MAX_INPUT_DIMENSIONS];
+    cmsUInt16Number  StageDEF[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+
+    for (i=0; i < p16 ->nInputs; i++) {
+
+        p16 ->EvalCurveIn16[i](&Input[i], &StageABC[i], p16 ->ParamsCurveIn16[i]);
+    }
+
+    p16 ->EvalCLUT(StageABC, StageDEF, p16 ->CLUTparams);
+
+    for (i=0; i < p16 ->nOutputs; i++) {
+
+        p16 ->EvalCurveOut16[i](&StageDEF[i], &Output[i], p16 ->ParamsCurveOut16[i]);
+    }
+}
+
+
+static
+void PrelinOpt16free(cmsContext ContextID, void* ptr)
+{
+    Prelin16Data* p16 = (Prelin16Data*) ptr;
+
+    _cmsFree(ContextID, p16 ->EvalCurveOut16);
+    _cmsFree(ContextID, p16 ->ParamsCurveOut16);
+
+    _cmsFree(ContextID, p16);
+}
+
+static
+void* Prelin16dup(cmsContext ContextID, const void* ptr)
+{
+    Prelin16Data* p16 = (Prelin16Data*) ptr;
+    Prelin16Data* Duped = (Prelin16Data*) _cmsDupMem(ContextID, p16, sizeof(Prelin16Data));
+
+    if (Duped == NULL) return NULL;
+
+    Duped->EvalCurveOut16 = (_cmsInterpFn16*) _cmsDupMem(ContextID, p16->EvalCurveOut16, p16->nOutputs * sizeof(_cmsInterpFn16));
+    Duped->ParamsCurveOut16 = (cmsInterpParams**)_cmsDupMem(ContextID, p16->ParamsCurveOut16, p16->nOutputs * sizeof(cmsInterpParams*));
+
+    return Duped;
+}
+
+
+static
+Prelin16Data* PrelinOpt16alloc(cmsContext ContextID,
+                               const cmsInterpParams* ColorMap,
+                               cmsUInt32Number nInputs, cmsToneCurve** In,
+                               cmsUInt32Number nOutputs, cmsToneCurve** Out )
+{
+    cmsUInt32Number i;
+    Prelin16Data* p16 = (Prelin16Data*)_cmsMallocZero(ContextID, sizeof(Prelin16Data));
+    if (p16 == NULL) return NULL;
+
+    p16 ->nInputs = nInputs;
+    p16 ->nOutputs = nOutputs;
+
+
+    for (i=0; i < nInputs; i++) {
+
+        if (In == NULL) {
+            p16 -> ParamsCurveIn16[i] = NULL;
+            p16 -> EvalCurveIn16[i] = Eval16nop1D;
+
+        }
+        else {
+            p16 -> ParamsCurveIn16[i] = In[i] ->InterpParams;
+            p16 -> EvalCurveIn16[i] = p16 ->ParamsCurveIn16[i]->Interpolation.Lerp16;
+        }
+    }
+
+    p16 ->CLUTparams = ColorMap;
+    p16 ->EvalCLUT   = ColorMap ->Interpolation.Lerp16;
+
+
+    p16 -> EvalCurveOut16 = (_cmsInterpFn16*) _cmsCalloc(ContextID, nOutputs, sizeof(_cmsInterpFn16));
+    p16 -> ParamsCurveOut16 = (cmsInterpParams**) _cmsCalloc(ContextID, nOutputs, sizeof(cmsInterpParams* ));
+
+    for (i=0; i < nOutputs; i++) {
+
+        if (Out == NULL) {
+            p16 ->ParamsCurveOut16[i] = NULL;
+            p16 -> EvalCurveOut16[i] = Eval16nop1D;
+        }
+        else {
+
+            p16 ->ParamsCurveOut16[i] = Out[i] ->InterpParams;
+            p16 -> EvalCurveOut16[i] = p16 ->ParamsCurveOut16[i]->Interpolation.Lerp16;
+        }
+    }
+
+    return p16;
+}
+
+
+
+// Resampling ---------------------------------------------------------------------------------
+
+#define PRELINEARIZATION_POINTS 4096
+
+// Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for
+// almost any transform. We use floating point precision and then convert from floating point to 16 bits.
+static
+cmsInt32Number XFormSampler16(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Cargo;
+    cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+
+    _cmsAssert(Lut -> InputChannels < cmsMAXCHANNELS);
+    _cmsAssert(Lut -> OutputChannels < cmsMAXCHANNELS);
+
+    // From 16 bit to floating point
+    for (i=0; i < Lut ->InputChannels; i++)
+        InFloat[i] = (cmsFloat32Number) (In[i] / 65535.0);
+
+    // Evaluate in floating point
+    cmsPipelineEvalFloat(InFloat, OutFloat, Lut);
+
+    // Back to 16 bits representation
+    for (i=0; i < Lut ->OutputChannels; i++)
+        Out[i] = _cmsQuickSaturateWord(OutFloat[i] * 65535.0);
+
+    // Always succeed
+    return TRUE;
+}
+
+// Try to see if the curves of a given MPE are linear
+static
+cmsBool AllCurvesAreLinear(cmsStage* mpe)
+{
+    cmsToneCurve** Curves;
+    cmsUInt32Number i, n;
+
+    Curves = _cmsStageGetPtrToCurveSet(mpe);
+    if (Curves == NULL) return FALSE;
+
+    n = cmsStageOutputChannels(mpe);
+
+    for (i=0; i < n; i++) {
+        if (!cmsIsToneCurveLinear(Curves[i])) return FALSE;
+    }
+
+    return TRUE;
+}
+
+// This function replaces a specific node placed in "At" by the "Value" numbers. Its purpose
+// is to fix scum dot on broken profiles/transforms. Works on 1, 3 and 4 channels
+static
+cmsBool  PatchLUT(cmsStage* CLUT, cmsUInt16Number At[], cmsUInt16Number Value[],
+                  cmsUInt32Number nChannelsOut, cmsUInt32Number nChannelsIn)
+{
+    _cmsStageCLutData* Grid = (_cmsStageCLutData*) CLUT ->Data;
+    cmsInterpParams* p16  = Grid ->Params;
+    cmsFloat64Number px, py, pz, pw;
+    int        x0, y0, z0, w0;
+    int        i, index;
+
+    if (CLUT -> Type != cmsSigCLutElemType) {
+        cmsSignalError(CLUT->ContextID, cmsERROR_INTERNAL, "(internal) Attempt to PatchLUT on non-lut stage");
+        return FALSE;
+    }
+
+    if (nChannelsIn == 4) {
+
+        px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0;
+        py = ((cmsFloat64Number) At[1] * (p16->Domain[1])) / 65535.0;
+        pz = ((cmsFloat64Number) At[2] * (p16->Domain[2])) / 65535.0;
+        pw = ((cmsFloat64Number) At[3] * (p16->Domain[3])) / 65535.0;
+
+        x0 = (int) floor(px);
+        y0 = (int) floor(py);
+        z0 = (int) floor(pz);
+        w0 = (int) floor(pw);
+
+        if (((px - x0) != 0) ||
+            ((py - y0) != 0) ||
+            ((pz - z0) != 0) ||
+            ((pw - w0) != 0)) return FALSE; // Not on exact node
+
+        index = (int) p16 -> opta[3] * x0 +
+                (int) p16 -> opta[2] * y0 +
+                (int) p16 -> opta[1] * z0 +
+                (int) p16 -> opta[0] * w0;
+    }
+    else
+        if (nChannelsIn == 3) {
+
+            px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0;
+            py = ((cmsFloat64Number) At[1] * (p16->Domain[1])) / 65535.0;
+            pz = ((cmsFloat64Number) At[2] * (p16->Domain[2])) / 65535.0;
+           
+            x0 = (int) floor(px);
+            y0 = (int) floor(py);
+            z0 = (int) floor(pz);
+           
+            if (((px - x0) != 0) ||
+                ((py - y0) != 0) ||
+                ((pz - z0) != 0)) return FALSE;  // Not on exact node
+
+            index = (int) p16 -> opta[2] * x0 +
+                    (int) p16 -> opta[1] * y0 +
+                    (int) p16 -> opta[0] * z0;
+        }
+        else
+            if (nChannelsIn == 1) {
+
+                px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0;
+                
+                x0 = (int) floor(px);
+                
+                if (((px - x0) != 0)) return FALSE; // Not on exact node
+
+                index = (int) p16 -> opta[0] * x0;
+            }
+            else {
+                cmsSignalError(CLUT->ContextID, cmsERROR_INTERNAL, "(internal) %d Channels are not supported on PatchLUT", nChannelsIn);
+                return FALSE;
+            }
+
+            for (i = 0; i < (int) nChannelsOut; i++)
+                Grid->Tab.T[index + i] = Value[i];
+
+            return TRUE;
+}
+
+// Auxiliary, to see if two values are equal or very different
+static
+cmsBool WhitesAreEqual(cmsUInt32Number n, cmsUInt16Number White1[], cmsUInt16Number White2[] )
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < n; i++) {
+
+        if (abs(White1[i] - White2[i]) > 0xf000) return TRUE;  // Values are so extremely different that the fixup should be avoided
+        if (White1[i] != White2[i]) return FALSE;
+    }
+    return TRUE;
+}
+
+
+// Locate the node for the white point and fix it to pure white in order to avoid scum dot.
+static
+cmsBool FixWhiteMisalignment(cmsPipeline* Lut, cmsColorSpaceSignature EntryColorSpace, cmsColorSpaceSignature ExitColorSpace)
+{
+    cmsUInt16Number *WhitePointIn, *WhitePointOut;
+    cmsUInt16Number  WhiteIn[cmsMAXCHANNELS], WhiteOut[cmsMAXCHANNELS], ObtainedOut[cmsMAXCHANNELS];
+    cmsUInt32Number i, nOuts, nIns;
+    cmsStage *PreLin = NULL, *CLUT = NULL, *PostLin = NULL;
+
+    if (!_cmsEndPointsBySpace(EntryColorSpace,
+        &WhitePointIn, NULL, &nIns)) return FALSE;
+
+    if (!_cmsEndPointsBySpace(ExitColorSpace,
+        &WhitePointOut, NULL, &nOuts)) return FALSE;
+
+    // It needs to be fixed?
+    if (Lut ->InputChannels != nIns) return FALSE;
+    if (Lut ->OutputChannels != nOuts) return FALSE;
+
+    cmsPipelineEval16(WhitePointIn, ObtainedOut, Lut);
+
+    if (WhitesAreEqual(nOuts, WhitePointOut, ObtainedOut)) return TRUE; // whites already match
+
+    // Check if the LUT comes as Prelin, CLUT or Postlin. We allow all combinations
+    if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &PreLin, &CLUT, &PostLin))
+        if (!cmsPipelineCheckAndRetreiveStages(Lut, 2, cmsSigCurveSetElemType, cmsSigCLutElemType, &PreLin, &CLUT))
+            if (!cmsPipelineCheckAndRetreiveStages(Lut, 2, cmsSigCLutElemType, cmsSigCurveSetElemType, &CLUT, &PostLin))
+                if (!cmsPipelineCheckAndRetreiveStages(Lut, 1, cmsSigCLutElemType, &CLUT))
+                    return FALSE;
+
+    // We need to interpolate white points of both, pre and post curves
+    if (PreLin) {
+
+        cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PreLin);
+
+        for (i=0; i < nIns; i++) {
+            WhiteIn[i] = cmsEvalToneCurve16(Curves[i], WhitePointIn[i]);
+        }
+    }
+    else {
+        for (i=0; i < nIns; i++)
+            WhiteIn[i] = WhitePointIn[i];
+    }
+
+    // If any post-linearization, we need to find how is represented white before the curve, do
+    // a reverse interpolation in this case.
+    if (PostLin) {
+
+        cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PostLin);
+
+        for (i=0; i < nOuts; i++) {
+
+            cmsToneCurve* InversePostLin = cmsReverseToneCurve(Curves[i]);
+            if (InversePostLin == NULL) {
+                WhiteOut[i] = WhitePointOut[i];    
+
+            } else {
+
+                WhiteOut[i] = cmsEvalToneCurve16(InversePostLin, WhitePointOut[i]);
+                cmsFreeToneCurve(InversePostLin);
+            }
+        }
+    }
+    else {
+        for (i=0; i < nOuts; i++)
+            WhiteOut[i] = WhitePointOut[i];
+    }
+
+    // Ok, proceed with patching. May fail and we don't care if it fails
+    PatchLUT(CLUT, WhiteIn, WhiteOut, nOuts, nIns);
+
+    return TRUE;
+}
+
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+// This function creates simple LUT from complex ones. The generated LUT has an optional set of
+// prelinearization curves, a CLUT of nGridPoints and optional postlinearization tables.
+// These curves have to exist in the original LUT in order to be used in the simplified output.
+// Caller may also use the flags to allow this feature.
+// LUTS with all curves will be simplified to a single curve. Parametric curves are lost.
+// This function should be used on 16-bits LUTS only, as floating point losses precision when simplified
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+
+static
+cmsBool OptimizeByResampling(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+    cmsPipeline* Src = NULL;
+    cmsPipeline* Dest = NULL;
+    cmsStage* mpe;
+    cmsStage* CLUT;
+    cmsStage *KeepPreLin = NULL, *KeepPostLin = NULL;
+    cmsUInt32Number nGridPoints;
+    cmsColorSpaceSignature ColorSpace, OutputColorSpace;
+    cmsStage *NewPreLin = NULL;
+    cmsStage *NewPostLin = NULL;
+    _cmsStageCLutData* DataCLUT;
+    cmsToneCurve** DataSetIn;
+    cmsToneCurve** DataSetOut;
+    Prelin16Data* p16;
+
+    // This is a loosy optimization! does not apply in floating-point cases
+    if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
+
+    ColorSpace       = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
+    OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
+
+    // Color space must be specified
+    if (ColorSpace == (cmsColorSpaceSignature)0 ||
+        OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
+
+    nGridPoints      = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
+
+    // For empty LUTs, 2 points are enough
+    if (cmsPipelineStageCount(*Lut) == 0)
+        nGridPoints = 2;
+
+    Src = *Lut;
+
+    // Named color pipelines cannot be optimized either
+    for (mpe = cmsPipelineGetPtrToFirstStage(Src);
+        mpe != NULL;
+        mpe = cmsStageNext(mpe)) {
+            if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
+    }
+
+    // Allocate an empty LUT
+    Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
+    if (!Dest) return FALSE;
+
+    // Prelinearization tables are kept unless indicated by flags
+    if (*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION) {
+
+        // Get a pointer to the prelinearization element
+        cmsStage* PreLin = cmsPipelineGetPtrToFirstStage(Src);
+
+        // Check if suitable
+        if (PreLin && PreLin ->Type == cmsSigCurveSetElemType) {
+
+            // Maybe this is a linear tram, so we can avoid the whole stuff
+            if (!AllCurvesAreLinear(PreLin)) {
+
+                // All seems ok, proceed.
+                NewPreLin = cmsStageDup(PreLin);
+                if(!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, NewPreLin))
+                    goto Error;
+
+                // Remove prelinearization. Since we have duplicated the curve
+                // in destination LUT, the sampling should be applied after this stage.
+                cmsPipelineUnlinkStage(Src, cmsAT_BEGIN, &KeepPreLin);
+            }
+        }
+    }
+
+    // Allocate the CLUT
+    CLUT = cmsStageAllocCLut16bit(Src ->ContextID, nGridPoints, Src ->InputChannels, Src->OutputChannels, NULL);
+    if (CLUT == NULL) goto Error;
+
+    // Add the CLUT to the destination LUT
+    if (!cmsPipelineInsertStage(Dest, cmsAT_END, CLUT)) {
+        goto Error;
+    }
+
+    // Postlinearization tables are kept unless indicated by flags
+    if (*dwFlags & cmsFLAGS_CLUT_POST_LINEARIZATION) {
+
+        // Get a pointer to the postlinearization if present
+        cmsStage* PostLin = cmsPipelineGetPtrToLastStage(Src);
+
+        // Check if suitable
+        if (PostLin && cmsStageType(PostLin) == cmsSigCurveSetElemType) {
+
+            // Maybe this is a linear tram, so we can avoid the whole stuff
+            if (!AllCurvesAreLinear(PostLin)) {
+
+                // All seems ok, proceed.
+                NewPostLin = cmsStageDup(PostLin);
+                if (!cmsPipelineInsertStage(Dest, cmsAT_END, NewPostLin))
+                    goto Error;
+
+                // In destination LUT, the sampling should be applied after this stage.
+                cmsPipelineUnlinkStage(Src, cmsAT_END, &KeepPostLin);
+            }
+        }
+    }
+
+    // Now its time to do the sampling. We have to ignore pre/post linearization
+    // The source LUT without pre/post curves is passed as parameter.
+    if (!cmsStageSampleCLut16bit(CLUT, XFormSampler16, (void*) Src, 0)) {
+Error:
+        // Ops, something went wrong, Restore stages
+        if (KeepPreLin != NULL) {
+            if (!cmsPipelineInsertStage(Src, cmsAT_BEGIN, KeepPreLin)) {
+                _cmsAssert(0); // This never happens
+            }
+        }
+        if (KeepPostLin != NULL) {
+            if (!cmsPipelineInsertStage(Src, cmsAT_END,   KeepPostLin)) {
+                _cmsAssert(0); // This never happens
+            }
+        }
+        cmsPipelineFree(Dest);
+        return FALSE;
+    }
+
+    // Done.
+
+    if (KeepPreLin != NULL) cmsStageFree(KeepPreLin);
+    if (KeepPostLin != NULL) cmsStageFree(KeepPostLin);
+    cmsPipelineFree(Src);
+
+    DataCLUT = (_cmsStageCLutData*) CLUT ->Data;
+
+    if (NewPreLin == NULL) DataSetIn = NULL;
+    else DataSetIn = ((_cmsStageToneCurvesData*) NewPreLin ->Data) ->TheCurves;
+
+    if (NewPostLin == NULL) DataSetOut = NULL;
+    else  DataSetOut = ((_cmsStageToneCurvesData*) NewPostLin ->Data) ->TheCurves;
+
+
+    if (DataSetIn == NULL && DataSetOut == NULL) {
+
+        _cmsPipelineSetOptimizationParameters(Dest, (_cmsOPTeval16Fn) DataCLUT->Params->Interpolation.Lerp16, DataCLUT->Params, NULL, NULL);
+    }
+    else {
+
+        p16 = PrelinOpt16alloc(Dest ->ContextID,
+            DataCLUT ->Params,
+            Dest ->InputChannels,
+            DataSetIn,
+            Dest ->OutputChannels,
+            DataSetOut);
+
+        _cmsPipelineSetOptimizationParameters(Dest, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
+    }
+
+
+    // Don't fix white on absolute colorimetric
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
+        *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP;
+
+    if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
+
+        FixWhiteMisalignment(Dest, ColorSpace, OutputColorSpace);
+    }
+
+    *Lut = Dest;
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(Intent);
+}
+
+
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+// Fixes the gamma balancing of transform. This is described in my paper "Prelinearization Stages on
+// Color-Management Application-Specific Integrated Circuits (ASICs)" presented at NIP24. It only works
+// for RGB transforms. See the paper for more details
+// -----------------------------------------------------------------------------------------------------------------------------------------------
+
+
+// Normalize endpoints by slope limiting max and min. This assures endpoints as well.
+// Descending curves are handled as well.
+static
+void SlopeLimiting(cmsToneCurve* g)
+{
+    int BeginVal, EndVal;
+    int AtBegin = (int) floor((cmsFloat64Number) g ->nEntries * 0.02 + 0.5);   // Cutoff at 2%
+    int AtEnd   = (int) g ->nEntries - AtBegin - 1;                                  // And 98%
+    cmsFloat64Number Val, Slope, beta;
+    int i;
+
+    if (cmsIsToneCurveDescending(g)) {
+        BeginVal = 0xffff; EndVal = 0;
+    }
+    else {
+        BeginVal = 0; EndVal = 0xffff;
+    }
+
+    // Compute slope and offset for begin of curve
+    Val   = g ->Table16[AtBegin];
+    Slope = (Val - BeginVal) / AtBegin;
+    beta  = Val - Slope * AtBegin;
+
+    for (i=0; i < AtBegin; i++)
+        g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
+
+    // Compute slope and offset for the end
+    Val   = g ->Table16[AtEnd];
+    Slope = (EndVal - Val) / AtBegin;   // AtBegin holds the X interval, which is same in both cases
+    beta  = Val - Slope * AtEnd;
+
+    for (i = AtEnd; i < (int) g ->nEntries; i++)
+        g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
+}
+
+
+// Precomputes tables for 8-bit on input devicelink.
+static
+Prelin8Data* PrelinOpt8alloc(cmsContext ContextID, const cmsInterpParams* p, cmsToneCurve* G[3])
+{
+    int i;
+    cmsUInt16Number Input[3];
+    cmsS15Fixed16Number v1, v2, v3;
+    Prelin8Data* p8;
+
+    p8 = (Prelin8Data*)_cmsMallocZero(ContextID, sizeof(Prelin8Data));
+    if (p8 == NULL) return NULL;
+
+    // Since this only works for 8 bit input, values comes always as x * 257,
+    // we can safely take msb byte (x << 8 + x)
+
+    for (i=0; i < 256; i++) {
+
+        if (G != NULL) {
+
+            // Get 16-bit representation
+            Input[0] = cmsEvalToneCurve16(G[0], FROM_8_TO_16(i));
+            Input[1] = cmsEvalToneCurve16(G[1], FROM_8_TO_16(i));
+            Input[2] = cmsEvalToneCurve16(G[2], FROM_8_TO_16(i));
+        }
+        else {
+            Input[0] = FROM_8_TO_16(i);
+            Input[1] = FROM_8_TO_16(i);
+            Input[2] = FROM_8_TO_16(i);
+        }
+
+
+        // Move to 0..1.0 in fixed domain
+        v1 = _cmsToFixedDomain((int) (Input[0] * p -> Domain[0]));
+        v2 = _cmsToFixedDomain((int) (Input[1] * p -> Domain[1]));
+        v3 = _cmsToFixedDomain((int) (Input[2] * p -> Domain[2]));
+
+        // Store the precalculated table of nodes
+        p8 ->X0[i] = (p->opta[2] * FIXED_TO_INT(v1));
+        p8 ->Y0[i] = (p->opta[1] * FIXED_TO_INT(v2));
+        p8 ->Z0[i] = (p->opta[0] * FIXED_TO_INT(v3));
+
+        // Store the precalculated table of offsets
+        p8 ->rx[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v1);
+        p8 ->ry[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v2);
+        p8 ->rz[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v3);
+    }
+
+    p8 ->ContextID = ContextID;
+    p8 ->p = p;
+
+    return p8;
+}
+
+static
+void Prelin8free(cmsContext ContextID, void* ptr)
+{
+    _cmsFree(ContextID, ptr);
+}
+
+static
+void* Prelin8dup(cmsContext ContextID, const void* ptr)
+{
+    return _cmsDupMem(ContextID, ptr, sizeof(Prelin8Data));
+}
+
+
+
+// A optimized interpolation for 8-bit input.
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+static
+void PrelinEval8(register const cmsUInt16Number Input[],
+                  register cmsUInt16Number Output[],
+                  register const void* D)
+{
+
+    cmsUInt8Number         r, g, b;
+    cmsS15Fixed16Number    rx, ry, rz;
+    cmsS15Fixed16Number    c0, c1, c2, c3, Rest;
+    int                    OutChan;
+    register cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
+    Prelin8Data* p8 = (Prelin8Data*) D;
+    register const cmsInterpParams* p = p8 ->p;
+    int                    TotalOut = (int) p -> nOutputs;
+    const cmsUInt16Number* LutTable = (const cmsUInt16Number*) p->Table;
+
+    r = (cmsUInt8Number) (Input[0] >> 8);
+    g = (cmsUInt8Number) (Input[1] >> 8);
+    b = (cmsUInt8Number) (Input[2] >> 8);
+
+    X0 = X1 = (cmsS15Fixed16Number) p8->X0[r];
+    Y0 = Y1 = (cmsS15Fixed16Number) p8->Y0[g];
+    Z0 = Z1 = (cmsS15Fixed16Number) p8->Z0[b];
+
+    rx = p8 ->rx[r];
+    ry = p8 ->ry[g];
+    rz = p8 ->rz[b];
+
+    X1 = X0 + (cmsS15Fixed16Number)((rx == 0) ? 0 :  p ->opta[2]);
+    Y1 = Y0 + (cmsS15Fixed16Number)((ry == 0) ? 0 :  p ->opta[1]);
+    Z1 = Z0 + (cmsS15Fixed16Number)((rz == 0) ? 0 :  p ->opta[0]);
+
+
+    // These are the 6 Tetrahedral
+    for (OutChan=0; OutChan < TotalOut; OutChan++) {
+
+        c0 = DENS(X0, Y0, Z0);
+
+        if (rx >= ry && ry >= rz)
+        {
+            c1 = DENS(X1, Y0, Z0) - c0;
+            c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+            c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+        }
+        else
+            if (rx >= rz && rz >= ry)
+            {
+                c1 = DENS(X1, Y0, Z0) - c0;
+                c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+            }
+            else
+                if (rz >= rx && rx >= ry)
+                {
+                    c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                    c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                    c3 = DENS(X0, Y0, Z1) - c0;
+                }
+                else
+                    if (ry >= rx && rx >= rz)
+                    {
+                        c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                        c2 = DENS(X0, Y1, Z0) - c0;
+                        c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+                    }
+                    else
+                        if (ry >= rz && rz >= rx)
+                        {
+                            c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                            c2 = DENS(X0, Y1, Z0) - c0;
+                            c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+                        }
+                        else
+                            if (rz >= ry && ry >= rx)
+                            {
+                                c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                c3 = DENS(X0, Y0, Z1) - c0;
+                            }
+                            else  {
+                                c1 = c2 = c3 = 0;
+                            }
+
+                            Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001;
+                            Output[OutChan] = (cmsUInt16Number) (c0 + ((Rest + (Rest >> 16)) >> 16));
+
+    }
+}
+
+#undef DENS
+
+
+// Curves that contain wide empty areas are not optimizeable
+static
+cmsBool IsDegenerated(const cmsToneCurve* g)
+{
+    cmsUInt32Number i, Zeros = 0, Poles = 0;
+    cmsUInt32Number nEntries = g ->nEntries;
+
+    for (i=0; i < nEntries; i++) {
+
+        if (g ->Table16[i] == 0x0000) Zeros++;
+        if (g ->Table16[i] == 0xffff) Poles++;
+    }
+
+    if (Zeros == 1 && Poles == 1) return FALSE;  // For linear tables
+    if (Zeros > (nEntries / 20)) return TRUE;  // Degenerated, many zeros
+    if (Poles > (nEntries / 20)) return TRUE;  // Degenerated, many poles
+
+    return FALSE;
+}
+
+// --------------------------------------------------------------------------------------------------------------
+// We need xput over here
+
+static
+cmsBool OptimizeByComputingLinearization(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+    cmsPipeline* OriginalLut;
+    cmsUInt32Number nGridPoints;
+    cmsToneCurve *Trans[cmsMAXCHANNELS], *TransReverse[cmsMAXCHANNELS];
+    cmsUInt32Number t, i;
+    cmsFloat32Number v, In[cmsMAXCHANNELS], Out[cmsMAXCHANNELS];
+    cmsBool lIsSuitable, lIsLinear;
+    cmsPipeline* OptimizedLUT = NULL, *LutPlusCurves = NULL;
+    cmsStage* OptimizedCLUTmpe;
+    cmsColorSpaceSignature ColorSpace, OutputColorSpace;
+    cmsStage* OptimizedPrelinMpe;
+    cmsStage* mpe;
+    cmsToneCurve** OptimizedPrelinCurves;
+    _cmsStageCLutData* OptimizedPrelinCLUT;
+
+
+    // This is a loosy optimization! does not apply in floating-point cases
+    if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
+
+    // Only on chunky RGB
+    if (T_COLORSPACE(*InputFormat)  != PT_RGB) return FALSE;
+    if (T_PLANAR(*InputFormat)) return FALSE;
+
+    if (T_COLORSPACE(*OutputFormat) != PT_RGB) return FALSE;
+    if (T_PLANAR(*OutputFormat)) return FALSE;
+
+    // On 16 bits, user has to specify the feature
+    if (!_cmsFormatterIs8bit(*InputFormat)) {
+        if (!(*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION)) return FALSE;
+    }
+
+    OriginalLut = *Lut;
+
+   // Named color pipelines cannot be optimized either
+   for (mpe = cmsPipelineGetPtrToFirstStage(OriginalLut);
+         mpe != NULL;
+         mpe = cmsStageNext(mpe)) {
+            if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
+    }
+
+    ColorSpace       = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
+    OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
+
+    // Color space must be specified
+    if (ColorSpace == (cmsColorSpaceSignature)0 ||
+        OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE;
+
+    nGridPoints      = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
+
+    // Empty gamma containers
+    memset(Trans, 0, sizeof(Trans));
+    memset(TransReverse, 0, sizeof(TransReverse));
+
+    // If the last stage of the original lut are curves, and those curves are
+    // degenerated, it is likely the transform is squeezing and clipping
+    // the output from previous CLUT. We cannot optimize this case     
+    {
+        cmsStage* last = cmsPipelineGetPtrToLastStage(OriginalLut);
+
+        if (cmsStageType(last) == cmsSigCurveSetElemType) {
+
+            _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*)cmsStageData(last);
+            for (i = 0; i < Data->nCurves; i++) {
+                if (IsDegenerated(Data->TheCurves[i]))
+                    goto Error;
+            }
+        }
+    }
+
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+        Trans[t] = cmsBuildTabulatedToneCurve16(OriginalLut ->ContextID, PRELINEARIZATION_POINTS, NULL);
+        if (Trans[t] == NULL) goto Error;
+    }
+
+    // Populate the curves
+    for (i=0; i < PRELINEARIZATION_POINTS; i++) {
+
+        v = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1));
+
+        // Feed input with a gray ramp
+        for (t=0; t < OriginalLut ->InputChannels; t++)
+            In[t] = v;
+
+        // Evaluate the gray value
+        cmsPipelineEvalFloat(In, Out, OriginalLut);
+
+        // Store result in curve
+        for (t=0; t < OriginalLut ->InputChannels; t++)
+            Trans[t] ->Table16[i] = _cmsQuickSaturateWord(Out[t] * 65535.0);
+    }
+
+    // Slope-limit the obtained curves
+    for (t = 0; t < OriginalLut ->InputChannels; t++)
+        SlopeLimiting(Trans[t]);
+
+    // Check for validity
+    lIsSuitable = TRUE;
+    lIsLinear   = TRUE;
+    for (t=0; (lIsSuitable && (t < OriginalLut ->InputChannels)); t++) {
+
+        // Exclude if already linear
+        if (!cmsIsToneCurveLinear(Trans[t]))
+            lIsLinear = FALSE;
+
+        // Exclude if non-monotonic
+        if (!cmsIsToneCurveMonotonic(Trans[t]))
+            lIsSuitable = FALSE;
+
+        if (IsDegenerated(Trans[t]))
+            lIsSuitable = FALSE;
+    }
+
+    // If it is not suitable, just quit
+    if (!lIsSuitable) goto Error;
+
+    // Invert curves if possible
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+        TransReverse[t] = cmsReverseToneCurveEx(PRELINEARIZATION_POINTS, Trans[t]);
+        if (TransReverse[t] == NULL) goto Error;
+    }
+
+    // Now inset the reversed curves at the begin of transform
+    LutPlusCurves = cmsPipelineDup(OriginalLut);
+    if (LutPlusCurves == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LutPlusCurves, cmsAT_BEGIN, cmsStageAllocToneCurves(OriginalLut ->ContextID, OriginalLut ->InputChannels, TransReverse)))
+        goto Error;
+
+    // Create the result LUT
+    OptimizedLUT = cmsPipelineAlloc(OriginalLut ->ContextID, OriginalLut ->InputChannels, OriginalLut ->OutputChannels);
+    if (OptimizedLUT == NULL) goto Error;
+
+    OptimizedPrelinMpe = cmsStageAllocToneCurves(OriginalLut ->ContextID, OriginalLut ->InputChannels, Trans);
+
+    // Create and insert the curves at the beginning
+    if (!cmsPipelineInsertStage(OptimizedLUT, cmsAT_BEGIN, OptimizedPrelinMpe))
+        goto Error;
+
+    // Allocate the CLUT for result
+    OptimizedCLUTmpe = cmsStageAllocCLut16bit(OriginalLut ->ContextID, nGridPoints, OriginalLut ->InputChannels, OriginalLut ->OutputChannels, NULL);
+
+    // Add the CLUT to the destination LUT
+    if (!cmsPipelineInsertStage(OptimizedLUT, cmsAT_END, OptimizedCLUTmpe))
+        goto Error;
+
+    // Resample the LUT
+    if (!cmsStageSampleCLut16bit(OptimizedCLUTmpe, XFormSampler16, (void*) LutPlusCurves, 0)) goto Error;
+
+    // Free resources
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+
+        if (Trans[t]) cmsFreeToneCurve(Trans[t]);
+        if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
+    }
+
+    cmsPipelineFree(LutPlusCurves);
+
+
+    OptimizedPrelinCurves = _cmsStageGetPtrToCurveSet(OptimizedPrelinMpe);
+    OptimizedPrelinCLUT   = (_cmsStageCLutData*) OptimizedCLUTmpe ->Data;
+
+    // Set the evaluator if 8-bit
+    if (_cmsFormatterIs8bit(*InputFormat)) {
+
+        Prelin8Data* p8 = PrelinOpt8alloc(OptimizedLUT ->ContextID,
+                                                OptimizedPrelinCLUT ->Params,
+                                                OptimizedPrelinCurves);
+        if (p8 == NULL) return FALSE;
+
+        _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval8, (void*) p8, Prelin8free, Prelin8dup);
+
+    }
+    else
+    {
+        Prelin16Data* p16 = PrelinOpt16alloc(OptimizedLUT ->ContextID,
+            OptimizedPrelinCLUT ->Params,
+            3, OptimizedPrelinCurves, 3, NULL);
+        if (p16 == NULL) return FALSE;
+
+        _cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
+
+    }
+
+    // Don't fix white on absolute colorimetric
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
+        *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP;
+
+    if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
+
+        if (!FixWhiteMisalignment(OptimizedLUT, ColorSpace, OutputColorSpace)) {
+
+            return FALSE;
+        }
+    }
+
+    // And return the obtained LUT
+
+    cmsPipelineFree(OriginalLut);
+    *Lut = OptimizedLUT;
+    return TRUE;
+
+Error:
+
+    for (t = 0; t < OriginalLut ->InputChannels; t++) {
+
+        if (Trans[t]) cmsFreeToneCurve(Trans[t]);
+        if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
+    }
+
+    if (LutPlusCurves != NULL) cmsPipelineFree(LutPlusCurves);
+    if (OptimizedLUT != NULL) cmsPipelineFree(OptimizedLUT);
+
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(Intent);
+    cmsUNUSED_PARAMETER(lIsLinear);
+}
+
+
+// Curves optimizer ------------------------------------------------------------------------------------------------------------------
+
+static
+void CurvesFree(cmsContext ContextID, void* ptr)
+{
+     Curves16Data* Data = (Curves16Data*) ptr;
+     cmsUInt32Number i;
+
+     for (i=0; i < Data -> nCurves; i++) {
+
+         _cmsFree(ContextID, Data ->Curves[i]);
+     }
+
+     _cmsFree(ContextID, Data ->Curves);
+     _cmsFree(ContextID, ptr);
+}
+
+static
+void* CurvesDup(cmsContext ContextID, const void* ptr)
+{
+    Curves16Data* Data = (Curves16Data*)_cmsDupMem(ContextID, ptr, sizeof(Curves16Data));
+    cmsUInt32Number i;
+
+    if (Data == NULL) return NULL;
+
+    Data->Curves = (cmsUInt16Number**) _cmsDupMem(ContextID, Data->Curves, Data->nCurves * sizeof(cmsUInt16Number*));
+
+    for (i=0; i < Data -> nCurves; i++) {
+        Data->Curves[i] = (cmsUInt16Number*) _cmsDupMem(ContextID, Data->Curves[i], Data->nElements * sizeof(cmsUInt16Number));
+    }
+
+    return (void*) Data;
+}
+
+// Precomputes tables for 8-bit on input devicelink.
+static
+Curves16Data* CurvesAlloc(cmsContext ContextID, cmsUInt32Number nCurves, cmsUInt32Number nElements, cmsToneCurve** G)
+{
+    cmsUInt32Number i, j;
+    Curves16Data* c16;
+
+    c16 = (Curves16Data*)_cmsMallocZero(ContextID, sizeof(Curves16Data));
+    if (c16 == NULL) return NULL;
+
+    c16 ->nCurves = nCurves;
+    c16 ->nElements = nElements;
+
+    c16->Curves = (cmsUInt16Number**) _cmsCalloc(ContextID, nCurves, sizeof(cmsUInt16Number*));
+    if (c16->Curves == NULL) {
+        _cmsFree(ContextID, c16);
+        return NULL;
+    }
+
+    for (i=0; i < nCurves; i++) {
+
+        c16->Curves[i] = (cmsUInt16Number*) _cmsCalloc(ContextID, nElements, sizeof(cmsUInt16Number));
+
+        if (c16->Curves[i] == NULL) {
+
+            for (j=0; j < i; j++) {
+                _cmsFree(ContextID, c16->Curves[j]);
+            }
+            _cmsFree(ContextID, c16->Curves);
+            _cmsFree(ContextID, c16);
+            return NULL;
+        }
+
+        if (nElements == 256U) {
+
+            for (j=0; j < nElements; j++) {
+
+                c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], FROM_8_TO_16(j));
+            }
+        }
+        else {
+
+            for (j=0; j < nElements; j++) {
+                c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], (cmsUInt16Number) j);
+            }
+        }
+    }
+
+    return c16;
+}
+
+static
+void FastEvaluateCurves8(register const cmsUInt16Number In[],
+                          register cmsUInt16Number Out[],
+                          register const void* D)
+{
+    Curves16Data* Data = (Curves16Data*) D;
+    int x;
+    cmsUInt32Number i;
+
+    for (i=0; i < Data ->nCurves; i++) {
+
+         x = (In[i] >> 8);
+         Out[i] = Data -> Curves[i][x];
+    }
+}
+
+
+static
+void FastEvaluateCurves16(register const cmsUInt16Number In[],
+                          register cmsUInt16Number Out[],
+                          register const void* D)
+{
+    Curves16Data* Data = (Curves16Data*) D;
+    cmsUInt32Number i;
+
+    for (i=0; i < Data ->nCurves; i++) {
+         Out[i] = Data -> Curves[i][In[i]];
+    }
+}
+
+
+static
+void FastIdentity16(register const cmsUInt16Number In[],
+                    register cmsUInt16Number Out[],
+                    register const void* D)
+{
+    cmsPipeline* Lut = (cmsPipeline*) D;
+    cmsUInt32Number i;
+
+    for (i=0; i < Lut ->InputChannels; i++) {
+         Out[i] = In[i];
+    }
+}
+
+
+// If the target LUT holds only curves, the optimization procedure is to join all those
+// curves together. That only works on curves and does not work on matrices.
+static
+cmsBool OptimizeByJoiningCurves(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+    cmsToneCurve** GammaTables = NULL;
+    cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
+    cmsUInt32Number i, j;
+    cmsPipeline* Src = *Lut;
+    cmsPipeline* Dest = NULL;
+    cmsStage* mpe;
+    cmsStage* ObtainedCurves = NULL;
+
+
+    // This is a loosy optimization! does not apply in floating-point cases
+    if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE;
+
+    //  Only curves in this LUT?
+    for (mpe = cmsPipelineGetPtrToFirstStage(Src);
+         mpe != NULL;
+         mpe = cmsStageNext(mpe)) {
+            if (cmsStageType(mpe) != cmsSigCurveSetElemType) return FALSE;
+    }
+
+    // Allocate an empty LUT
+    Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
+    if (Dest == NULL) return FALSE;
+
+    // Create target curves
+    GammaTables = (cmsToneCurve**) _cmsCalloc(Src ->ContextID, Src ->InputChannels, sizeof(cmsToneCurve*));
+    if (GammaTables == NULL) goto Error;
+
+    for (i=0; i < Src ->InputChannels; i++) {
+        GammaTables[i] = cmsBuildTabulatedToneCurve16(Src ->ContextID, PRELINEARIZATION_POINTS, NULL);
+        if (GammaTables[i] == NULL) goto Error;
+    }
+
+    // Compute 16 bit result by using floating point
+    for (i=0; i < PRELINEARIZATION_POINTS; i++) {
+
+        for (j=0; j < Src ->InputChannels; j++)
+            InFloat[j] = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1));
+
+        cmsPipelineEvalFloat(InFloat, OutFloat, Src);
+
+        for (j=0; j < Src ->InputChannels; j++)
+            GammaTables[j] -> Table16[i] = _cmsQuickSaturateWord(OutFloat[j] * 65535.0);
+    }
+
+    ObtainedCurves = cmsStageAllocToneCurves(Src ->ContextID, Src ->InputChannels, GammaTables);
+    if (ObtainedCurves == NULL) goto Error;
+
+    for (i=0; i < Src ->InputChannels; i++) {
+        cmsFreeToneCurve(GammaTables[i]);
+        GammaTables[i] = NULL;
+    }
+
+    if (GammaTables != NULL) {
+        _cmsFree(Src->ContextID, GammaTables);
+        GammaTables = NULL;
+    }
+
+    // Maybe the curves are linear at the end
+    if (!AllCurvesAreLinear(ObtainedCurves)) {
+
+        if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, ObtainedCurves))
+            goto Error;
+
+        // If the curves are to be applied in 8 bits, we can save memory
+        if (_cmsFormatterIs8bit(*InputFormat)) {
+
+            _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) ObtainedCurves ->Data;
+             Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 256, Data ->TheCurves);
+
+             if (c16 == NULL) goto Error; 
+             *dwFlags |= cmsFLAGS_NOCACHE;
+            _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves8, c16, CurvesFree, CurvesDup);
+
+        }
+        else {
+
+            _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) cmsStageData(ObtainedCurves);
+             Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, 65536, Data ->TheCurves);
+
+             if (c16 == NULL) goto Error; 
+             *dwFlags |= cmsFLAGS_NOCACHE;
+            _cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves16, c16, CurvesFree, CurvesDup);
+        }
+    }
+    else {
+
+        // LUT optimizes to nothing. Set the identity LUT
+        cmsStageFree(ObtainedCurves);
+        ObtainedCurves = NULL;
+
+        if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageAllocIdentity(Dest ->ContextID, Src ->InputChannels)))
+            goto Error;
+
+        *dwFlags |= cmsFLAGS_NOCACHE;
+        _cmsPipelineSetOptimizationParameters(Dest, FastIdentity16, (void*) Dest, NULL, NULL);
+    }
+
+    // We are done.
+    cmsPipelineFree(Src);
+    *Lut = Dest;
+    return TRUE;
+
+Error:
+
+    if (ObtainedCurves != NULL) cmsStageFree(ObtainedCurves);
+    if (GammaTables != NULL) {
+        for (i=0; i < Src ->InputChannels; i++) {
+            if (GammaTables[i] != NULL) cmsFreeToneCurve(GammaTables[i]);
+        }
+
+        _cmsFree(Src ->ContextID, GammaTables);
+    }
+
+    if (Dest != NULL) cmsPipelineFree(Dest);
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(Intent);
+    cmsUNUSED_PARAMETER(InputFormat);
+    cmsUNUSED_PARAMETER(OutputFormat);
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+// -------------------------------------------------------------------------------------------------------------------------------------
+// LUT is Shaper - Matrix - Matrix - Shaper, which is very frequent when combining two matrix-shaper profiles
+
+
+static
+void  FreeMatShaper(cmsContext ContextID, void* Data)
+{
+    if (Data != NULL) _cmsFree(ContextID, Data);
+}
+
+static
+void* DupMatShaper(cmsContext ContextID, const void* Data)
+{
+    return _cmsDupMem(ContextID, Data, sizeof(MatShaper8Data));
+}
+
+
+// A fast matrix-shaper evaluator for 8 bits. This is a bit ticky since I'm using 1.14 signed fixed point
+// to accomplish some performance. Actually it takes 256x3 16 bits tables and 16385 x 3 tables of 8 bits,
+// in total about 50K, and the performance boost is huge!
+static
+void MatShaperEval16(register const cmsUInt16Number In[],
+                     register cmsUInt16Number Out[],
+                     register const void* D)
+{
+    MatShaper8Data* p = (MatShaper8Data*) D;
+    cmsS1Fixed14Number l1, l2, l3, r, g, b;
+    cmsUInt32Number ri, gi, bi;
+
+    // In this case (and only in this case!) we can use this simplification since
+    // In[] is assured to come from a 8 bit number. (a << 8 | a)
+    ri = In[0] & 0xFFU;
+    gi = In[1] & 0xFFU;
+    bi = In[2] & 0xFFU;
+
+    // Across first shaper, which also converts to 1.14 fixed point
+    r = p->Shaper1R[ri];
+    g = p->Shaper1G[gi];
+    b = p->Shaper1B[bi];
+
+    // Evaluate the matrix in 1.14 fixed point
+    l1 =  (p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b + p->Off[0] + 0x2000) >> 14;
+    l2 =  (p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b + p->Off[1] + 0x2000) >> 14;
+    l3 =  (p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b + p->Off[2] + 0x2000) >> 14;
+
+    // Now we have to clip to 0..1.0 range
+    ri = (l1 < 0) ? 0 : ((l1 > 16384) ? 16384U : (cmsUInt32Number) l1);
+    gi = (l2 < 0) ? 0 : ((l2 > 16384) ? 16384U : (cmsUInt32Number) l2);
+    bi = (l3 < 0) ? 0 : ((l3 > 16384) ? 16384U : (cmsUInt32Number) l3);
+
+    // And across second shaper,
+    Out[0] = p->Shaper2R[ri];
+    Out[1] = p->Shaper2G[gi];
+    Out[2] = p->Shaper2B[bi];
+
+}
+
+// This table converts from 8 bits to 1.14 after applying the curve
+static
+void FillFirstShaper(cmsS1Fixed14Number* Table, cmsToneCurve* Curve)
+{
+    int i;
+    cmsFloat32Number R, y;
+
+    for (i=0; i < 256; i++) {
+
+        R   = (cmsFloat32Number) (i / 255.0);
+        y   = cmsEvalToneCurveFloat(Curve, R);
+
+        if (y < 131072.0)
+            Table[i] = DOUBLE_TO_1FIXED14(y);
+        else
+            Table[i] = 0x7fffffff;
+    }
+}
+
+// This table converts form 1.14 (being 0x4000 the last entry) to 8 bits after applying the curve
+static
+void FillSecondShaper(cmsUInt16Number* Table, cmsToneCurve* Curve, cmsBool Is8BitsOutput)
+{
+    int i;
+    cmsFloat32Number R, Val;
+
+    for (i=0; i < 16385; i++) {
+
+        R   = (cmsFloat32Number) (i / 16384.0);
+        Val = cmsEvalToneCurveFloat(Curve, R);    // Val comes 0..1.0
+
+        if (Val < 0)
+            Val = 0;
+
+        if (Val > 1.0)
+            Val = 1.0;
+
+        if (Is8BitsOutput) {
+
+            // If 8 bits output, we can optimize further by computing the / 257 part.
+            // first we compute the resulting byte and then we store the byte times
+            // 257. This quantization allows to round very quick by doing a >> 8, but
+            // since the low byte is always equal to msb, we can do a & 0xff and this works!
+            cmsUInt16Number w = _cmsQuickSaturateWord(Val * 65535.0);
+            cmsUInt8Number  b = FROM_16_TO_8(w);
+
+            Table[i] = FROM_8_TO_16(b);
+        }
+        else Table[i]  = _cmsQuickSaturateWord(Val * 65535.0);
+    }
+}
+
+// Compute the matrix-shaper structure
+static
+cmsBool SetMatShaper(cmsPipeline* Dest, cmsToneCurve* Curve1[3], cmsMAT3* Mat, cmsVEC3* Off, cmsToneCurve* Curve2[3], cmsUInt32Number* OutputFormat)
+{
+    MatShaper8Data* p;
+    int i, j;
+    cmsBool Is8Bits = _cmsFormatterIs8bit(*OutputFormat);
+
+    // Allocate a big chuck of memory to store precomputed tables
+    p = (MatShaper8Data*) _cmsMalloc(Dest ->ContextID, sizeof(MatShaper8Data));
+    if (p == NULL) return FALSE;
+
+    p -> ContextID = Dest -> ContextID;
+
+    // Precompute tables
+    FillFirstShaper(p ->Shaper1R, Curve1[0]);
+    FillFirstShaper(p ->Shaper1G, Curve1[1]);
+    FillFirstShaper(p ->Shaper1B, Curve1[2]);
+
+    FillSecondShaper(p ->Shaper2R, Curve2[0], Is8Bits);
+    FillSecondShaper(p ->Shaper2G, Curve2[1], Is8Bits);
+    FillSecondShaper(p ->Shaper2B, Curve2[2], Is8Bits);
+
+    // Convert matrix to nFixed14. Note that those values may take more than 16 bits 
+    for (i=0; i < 3; i++) {
+        for (j=0; j < 3; j++) {
+            p ->Mat[i][j] = DOUBLE_TO_1FIXED14(Mat->v[i].n[j]);
+        }
+    }
+
+    for (i=0; i < 3; i++) {
+
+        if (Off == NULL) {
+            p ->Off[i] = 0;
+        }
+        else {
+            p ->Off[i] = DOUBLE_TO_1FIXED14(Off->n[i]);
+        }
+    }
+
+    // Mark as optimized for faster formatter
+    if (Is8Bits)
+        *OutputFormat |= OPTIMIZED_SH(1);
+
+    // Fill function pointers
+    _cmsPipelineSetOptimizationParameters(Dest, MatShaperEval16, (void*) p, FreeMatShaper, DupMatShaper);
+    return TRUE;
+}
+
+//  8 bits on input allows matrix-shaper boot up to 25 Mpixels per second on RGB. That's fast!
+static
+cmsBool OptimizeMatrixShaper(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+       cmsStage* Curve1, *Curve2;
+       cmsStage* Matrix1, *Matrix2;
+       cmsMAT3 res;
+       cmsBool IdentityMat;
+       cmsPipeline* Dest, *Src;
+       cmsFloat64Number* Offset;
+
+       // Only works on RGB to RGB
+       if (T_CHANNELS(*InputFormat) != 3 || T_CHANNELS(*OutputFormat) != 3) return FALSE;
+
+       // Only works on 8 bit input
+       if (!_cmsFormatterIs8bit(*InputFormat)) return FALSE;
+
+       // Seems suitable, proceed
+       Src = *Lut;
+
+       // Check for:
+       // 
+       //    shaper-matrix-matrix-shaper 
+       //    shaper-matrix-shaper
+       // 
+       // Both of those constructs are possible (first because abs. colorimetric). 
+       // additionally, In the first case, the input matrix offset should be zero.
+
+       IdentityMat = FALSE;
+       if (cmsPipelineCheckAndRetreiveStages(Src, 4,
+              cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+              &Curve1, &Matrix1, &Matrix2, &Curve2)) {
+
+              // Get both matrices
+              _cmsStageMatrixData* Data1 = (_cmsStageMatrixData*)cmsStageData(Matrix1);
+              _cmsStageMatrixData* Data2 = (_cmsStageMatrixData*)cmsStageData(Matrix2);
+
+              // Input offset should be zero
+              if (Data1->Offset != NULL) return FALSE;
+
+              // Multiply both matrices to get the result
+              _cmsMAT3per(&res, (cmsMAT3*)Data2->Double, (cmsMAT3*)Data1->Double);
+
+              // Only 2nd matrix has offset, or it is zero 
+              Offset = Data2->Offset;
+
+              // Now the result is in res + Data2 -> Offset. Maybe is a plain identity?
+              if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
+
+                     // We can get rid of full matrix
+                     IdentityMat = TRUE;
+              }
+
+       }
+       else {
+
+              if (cmsPipelineCheckAndRetreiveStages(Src, 3,
+                     cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+                     &Curve1, &Matrix1, &Curve2)) {
+
+                     _cmsStageMatrixData* Data = (_cmsStageMatrixData*)cmsStageData(Matrix1);
+
+                     // Copy the matrix to our result
+                     memcpy(&res, Data->Double, sizeof(res));
+
+                     // Preserve the Odffset (may be NULL as a zero offset)
+                     Offset = Data->Offset;
+
+                     if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
+
+                            // We can get rid of full matrix
+                            IdentityMat = TRUE;
+                     }
+              }
+              else
+                     return FALSE; // Not optimizeable this time
+
+       }
+
+      // Allocate an empty LUT
+    Dest =  cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
+    if (!Dest) return FALSE;
+
+    // Assamble the new LUT
+    if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageDup(Curve1)))
+        goto Error;
+
+    if (!IdentityMat) {
+
+           if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageAllocMatrix(Dest->ContextID, 3, 3, (const cmsFloat64Number*)&res, Offset)))
+                  goto Error;
+    }
+
+    if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2)))
+        goto Error;
+
+    // If identity on matrix, we can further optimize the curves, so call the join curves routine
+    if (IdentityMat) {
+
+        OptimizeByJoiningCurves(&Dest, Intent, InputFormat, OutputFormat, dwFlags);
+    }
+    else {
+        _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
+        _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
+
+        // In this particular optimization, cach� does not help as it takes more time to deal with
+        // the cach� that with the pixel handling
+        *dwFlags |= cmsFLAGS_NOCACHE;
+
+        // Setup the optimizarion routines
+        SetMatShaper(Dest, mpeC1 ->TheCurves, &res, (cmsVEC3*) Offset, mpeC2->TheCurves, OutputFormat);
+    }
+
+    cmsPipelineFree(Src);
+    *Lut = Dest;
+    return TRUE;
+Error:
+    // Leave Src unchanged
+    cmsPipelineFree(Dest);
+    return FALSE;
+}
+
+
+// -------------------------------------------------------------------------------------------------------------------------------------
+// Optimization plug-ins
+
+// List of optimizations
+typedef struct _cmsOptimizationCollection_st {
+
+    _cmsOPToptimizeFn  OptimizePtr;
+
+    struct _cmsOptimizationCollection_st *Next;
+
+} _cmsOptimizationCollection;
+
+
+// The built-in list. We currently implement 4 types of optimizations. Joining of curves, matrix-shaper, linearization and resampling
+static _cmsOptimizationCollection DefaultOptimization[] = {
+
+    { OptimizeByJoiningCurves,            &DefaultOptimization[1] },
+    { OptimizeMatrixShaper,               &DefaultOptimization[2] },
+    { OptimizeByComputingLinearization,   &DefaultOptimization[3] },
+    { OptimizeByResampling,               NULL }
+};
+
+// The linked list head
+_cmsOptimizationPluginChunkType _cmsOptimizationPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginOptimizationList(struct _cmsContext_struct* ctx, 
+                               const struct _cmsContext_struct* src)
+{
+   _cmsOptimizationPluginChunkType newHead = { NULL };
+   _cmsOptimizationCollection*  entry;
+   _cmsOptimizationCollection*  Anterior = NULL;
+   _cmsOptimizationPluginChunkType* head = (_cmsOptimizationPluginChunkType*) src->chunks[OptimizationPlugin];
+
+    _cmsAssert(ctx != NULL);
+    _cmsAssert(head != NULL);
+
+    // Walk the list copying all nodes
+   for (entry = head->OptimizationCollection;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            _cmsOptimizationCollection *newEntry = ( _cmsOptimizationCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsOptimizationCollection));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.OptimizationCollection == NULL)
+                newHead.OptimizationCollection = newEntry;
+    }
+
+  ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsOptimizationPluginChunkType));
+}
+
+void  _cmsAllocOptimizationPluginChunk(struct _cmsContext_struct* ctx, 
+                                         const struct _cmsContext_struct* src)
+{
+  if (src != NULL) {
+
+        // Copy all linked list
+       DupPluginOptimizationList(ctx, src);
+    }
+    else {
+        static _cmsOptimizationPluginChunkType OptimizationPluginChunkType = { NULL };
+        ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx ->MemPool, &OptimizationPluginChunkType, sizeof(_cmsOptimizationPluginChunkType));
+    }
+}
+
+
+// Register new ways to optimize
+cmsBool  _cmsRegisterOptimizationPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginOptimization* Plugin = (cmsPluginOptimization*) Data;
+    _cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin);
+    _cmsOptimizationCollection* fl;
+
+    if (Data == NULL) {
+
+        ctx->OptimizationCollection = NULL;
+        return TRUE;
+    }
+
+    // Optimizer callback is required
+    if (Plugin ->OptimizePtr == NULL) return FALSE;
+
+    fl = (_cmsOptimizationCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsOptimizationCollection));
+    if (fl == NULL) return FALSE;
+
+    // Copy the parameters
+    fl ->OptimizePtr = Plugin ->OptimizePtr;
+
+    // Keep linked list
+    fl ->Next = ctx->OptimizationCollection;
+
+    // Set the head
+    ctx ->OptimizationCollection = fl;
+
+    // All is ok
+    return TRUE;
+}
+
+// The entry point for LUT optimization
+cmsBool _cmsOptimizePipeline(cmsContext ContextID,
+                             cmsPipeline**    PtrLut,
+                             cmsUInt32Number  Intent,
+                             cmsUInt32Number* InputFormat,
+                             cmsUInt32Number* OutputFormat,
+                             cmsUInt32Number* dwFlags)
+{
+    _cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin);
+    _cmsOptimizationCollection* Opts;
+    cmsBool AnySuccess = FALSE;
+
+    // A CLUT is being asked, so force this specific optimization
+    if (*dwFlags & cmsFLAGS_FORCE_CLUT) {
+
+        PreOptimize(*PtrLut);
+        return OptimizeByResampling(PtrLut, Intent, InputFormat, OutputFormat, dwFlags);
+    }
+
+    // Anything to optimize?
+    if ((*PtrLut) ->Elements == NULL) {
+        _cmsPipelineSetOptimizationParameters(*PtrLut, FastIdentity16, (void*) *PtrLut, NULL, NULL);
+        return TRUE;
+    }
+
+    // Try to get rid of identities and trivial conversions.
+    AnySuccess = PreOptimize(*PtrLut);
+
+    // After removal do we end with an identity?
+    if ((*PtrLut) ->Elements == NULL) {
+        _cmsPipelineSetOptimizationParameters(*PtrLut, FastIdentity16, (void*) *PtrLut, NULL, NULL);
+        return TRUE;
+    }
+
+    // Do not optimize, keep all precision
+    if (*dwFlags & cmsFLAGS_NOOPTIMIZE)
+        return FALSE;
+
+    // Try plug-in optimizations 
+    for (Opts = ctx->OptimizationCollection;
+         Opts != NULL;
+         Opts = Opts ->Next) {
+
+            // If one schema succeeded, we are done
+            if (Opts ->OptimizePtr(PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) {
+
+                return TRUE;    // Optimized!
+            }
+    }
+
+   // Try built-in optimizations 
+    for (Opts = DefaultOptimization;
+         Opts != NULL;
+         Opts = Opts ->Next) {
+
+            if (Opts ->OptimizePtr(PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) {
+
+                return TRUE;  
+            }
+    }
+
+    // Only simple optimizations succeeded
+    return AnySuccess;
+}
+
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmspack.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmspack.cpp
new file mode 100755
index 0000000000..84b0097b98
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmspack.cpp
@@ -0,0 +1,3353 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// This module handles all formats supported by lcms. There are two flavors, 16 bits and
+// floating point. Floating point is supported only in a subset, those formats holding
+// cmsFloat32Number (4 bytes per component) and double (marked as 0 bytes per component
+// as special case)
+
+// ---------------------------------------------------------------------------
+
+
+// This macro return words stored as big endian
+#define CHANGE_ENDIAN(w)    (cmsUInt16Number) ((cmsUInt16Number) ((w)<<8)|((w)>>8))
+
+// These macros handles reversing (negative)
+#define REVERSE_FLAVOR_8(x)     ((cmsUInt8Number) (0xff-(x)))
+#define REVERSE_FLAVOR_16(x)    ((cmsUInt16Number)(0xffff-(x)))
+
+// * 0xffff / 0xff00 = (255 * 257) / (255 * 256) = 257 / 256
+cmsINLINE cmsUInt16Number FomLabV2ToLabV4(cmsUInt16Number x)
+{
+    int a = (x << 8 | x) >> 8;  // * 257 / 256
+    if ( a > 0xffff) return 0xffff;
+    return (cmsUInt16Number) a;
+}
+
+// * 0xf00 / 0xffff = * 256 / 257
+cmsINLINE cmsUInt16Number FomLabV4ToLabV2(cmsUInt16Number x)
+{
+    return (cmsUInt16Number) (((x << 8) + 0x80) / 257);
+}
+
+
+typedef struct {
+    cmsUInt32Number Type;
+    cmsUInt32Number Mask;
+    cmsFormatter16  Frm;
+
+} cmsFormatters16;
+
+typedef struct {
+    cmsUInt32Number    Type;
+    cmsUInt32Number    Mask;
+    cmsFormatterFloat  Frm;
+
+} cmsFormattersFloat;
+
+
+#define ANYSPACE        COLORSPACE_SH(31)
+#define ANYCHANNELS     CHANNELS_SH(15)
+#define ANYEXTRA        EXTRA_SH(7)
+#define ANYPLANAR       PLANAR_SH(1)
+#define ANYENDIAN       ENDIAN16_SH(1)
+#define ANYSWAP         DOSWAP_SH(1)
+#define ANYSWAPFIRST    SWAPFIRST_SH(1)
+#define ANYFLAVOR       FLAVOR_SH(1)
+
+
+// Suppress waning about info never being used
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+// Unpacking routines (16 bits) ----------------------------------------------------------------------------------------
+
+
+// Does almost everything but is slow
+static
+cmsUInt8Number* UnrollChunkyBytes(register _cmsTRANSFORM* info,
+                                  register cmsUInt16Number wIn[],
+                                  register cmsUInt8Number* accum,
+                                  register cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt16Number v;
+    cmsUInt32Number i;
+
+    if (ExtraFirst) {
+        accum += Extra;
+    }
+
+    for (i=0; i < nChan; i++) {
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = FROM_8_TO_16(*accum);
+        v = Reverse ? REVERSE_FLAVOR_16(v) : v;
+        wIn[index] = v;
+        accum++;
+    }
+
+    if (!ExtraFirst) {
+        accum += Extra;
+    }
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+
+}
+
+// Extra channels are just ignored because come in the next planes
+static
+cmsUInt8Number* UnrollPlanarBytes(register _cmsTRANSFORM* info,
+                                  register cmsUInt16Number wIn[],
+                                  register cmsUInt8Number* accum,
+                                  register cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan     = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap    = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number SwapFirst = T_SWAPFIRST(info ->InputFormat);
+    cmsUInt32Number Reverse   = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = accum;
+
+    if (DoSwap ^ SwapFirst) {
+        accum += T_EXTRA(info -> InputFormat) * Stride;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt16Number v = FROM_8_TO_16(*accum);
+
+        wIn[index] = Reverse ? REVERSE_FLAVOR_16(v) : v;
+        accum += Stride;
+    }
+
+    return (Init + 1);
+}
+
+// Special cases, provided for performance
+static
+cmsUInt8Number* Unroll4Bytes(register _cmsTRANSFORM* info,
+                             register cmsUInt16Number wIn[],
+                             register cmsUInt8Number* accum,
+                             register cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // C
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // M
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // Y
+    wIn[3] = FROM_8_TO_16(*accum); accum++; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4BytesReverse(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wIn[],
+                                    register cmsUInt8Number* accum,
+                                    register cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // C
+    wIn[1] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // M
+    wIn[2] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // Y
+    wIn[3] = FROM_8_TO_16(REVERSE_FLAVOR_8(*accum)); accum++; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4BytesSwapFirst(register _cmsTRANSFORM* info,
+                                      register cmsUInt16Number wIn[],
+                                      register cmsUInt8Number* accum,
+                                      register cmsUInt32Number Stride)
+{
+    wIn[3] = FROM_8_TO_16(*accum); accum++; // K
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // C
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // M
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // Y
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KYMC
+static
+cmsUInt8Number* Unroll4BytesSwap(register _cmsTRANSFORM* info,
+                                 register cmsUInt16Number wIn[],
+                                 register cmsUInt8Number* accum,
+                                 register cmsUInt32Number Stride)
+{
+    wIn[3] = FROM_8_TO_16(*accum); accum++;  // K
+    wIn[2] = FROM_8_TO_16(*accum); accum++;  // Y
+    wIn[1] = FROM_8_TO_16(*accum); accum++;  // M
+    wIn[0] = FROM_8_TO_16(*accum); accum++;  // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4BytesSwapSwapFirst(register _cmsTRANSFORM* info,
+                                          register cmsUInt16Number wIn[],
+                                          register cmsUInt8Number* accum,
+                                          register cmsUInt32Number Stride)
+{
+    wIn[2] = FROM_8_TO_16(*accum); accum++;  // K
+    wIn[1] = FROM_8_TO_16(*accum); accum++;  // Y
+    wIn[0] = FROM_8_TO_16(*accum); accum++;  // M
+    wIn[3] = FROM_8_TO_16(*accum); accum++;  // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3Bytes(register _cmsTRANSFORM* info,
+                             register cmsUInt16Number wIn[],
+                             register cmsUInt8Number* accum,
+                             register cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(*accum); accum++;     // R
+    wIn[1] = FROM_8_TO_16(*accum); accum++;     // G
+    wIn[2] = FROM_8_TO_16(*accum); accum++;     // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3BytesSkip1Swap(register _cmsTRANSFORM* info,
+                                      register cmsUInt16Number wIn[],
+                                      register cmsUInt8Number* accum,
+                                      register cmsUInt32Number Stride)
+{
+    accum++; // A
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // B
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // G
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // R
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3BytesSkip1SwapSwapFirst(register _cmsTRANSFORM* info, 
+                                              register cmsUInt16Number wIn[], 
+                                              register cmsUInt8Number* accum,
+                                              register cmsUInt32Number Stride)
+{
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // B
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // G
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // R
+    accum++; // A
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3BytesSkip1SwapFirst(register _cmsTRANSFORM* info, 
+                                           register cmsUInt16Number wIn[], 
+                                           register cmsUInt8Number* accum,
+                                           register cmsUInt32Number Stride)
+{
+    accum++; // A
+    wIn[0] = FROM_8_TO_16(*accum); accum++; // R
+    wIn[1] = FROM_8_TO_16(*accum); accum++; // G
+    wIn[2] = FROM_8_TO_16(*accum); accum++; // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+// BRG
+static
+cmsUInt8Number* Unroll3BytesSwap(register _cmsTRANSFORM* info,
+                                 register cmsUInt16Number wIn[],
+                                 register cmsUInt8Number* accum,
+                                 register cmsUInt32Number Stride)
+{
+    wIn[2] = FROM_8_TO_16(*accum); accum++;     // B
+    wIn[1] = FROM_8_TO_16(*accum); accum++;     // G
+    wIn[0] = FROM_8_TO_16(*accum); accum++;     // R
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollLabV2_8(register _cmsTRANSFORM* info,
+                              register cmsUInt16Number wIn[],
+                              register cmsUInt8Number* accum,
+                              register cmsUInt32Number Stride)
+{
+    wIn[0] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // L
+    wIn[1] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // a
+    wIn[2] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // b
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollALabV2_8(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wIn[],
+                               register cmsUInt8Number* accum,
+                               register cmsUInt32Number Stride)
+{
+    accum++;  // A
+    wIn[0] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // L
+    wIn[1] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // a
+    wIn[2] = FomLabV2ToLabV4(FROM_8_TO_16(*accum)); accum++;     // b
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollLabV2_16(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wIn[],
+                               register cmsUInt8Number* accum,
+                               register cmsUInt32Number Stride)
+{
+    wIn[0] = FomLabV2ToLabV4(*(cmsUInt16Number*) accum); accum += 2;     // L
+    wIn[1] = FomLabV2ToLabV4(*(cmsUInt16Number*) accum); accum += 2;     // a
+    wIn[2] = FomLabV2ToLabV4(*(cmsUInt16Number*) accum); accum += 2;     // b
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// for duplex
+static
+cmsUInt8Number* Unroll2Bytes(register _cmsTRANSFORM* info,
+                                     register cmsUInt16Number wIn[],
+                                     register cmsUInt8Number* accum,
+                                     register cmsUInt32Number Stride)
+{
+    wIn[0] = FROM_8_TO_16(*accum); accum++;     // ch1
+    wIn[1] = FROM_8_TO_16(*accum); accum++;     // ch2
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+
+
+// Monochrome duplicates L into RGB for null-transforms
+static
+cmsUInt8Number* Unroll1Byte(register _cmsTRANSFORM* info,
+                            register cmsUInt16Number wIn[],
+                            register cmsUInt8Number* accum,
+                            register cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = FROM_8_TO_16(*accum); accum++;     // L
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Unroll1ByteSkip1(register _cmsTRANSFORM* info,
+                                 register cmsUInt16Number wIn[],
+                                 register cmsUInt8Number* accum,
+                                 register cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = FROM_8_TO_16(*accum); accum++;     // L
+    accum += 1;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1ByteSkip2(register _cmsTRANSFORM* info,
+                                 register cmsUInt16Number wIn[],
+                                 register cmsUInt8Number* accum,
+                                 register cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = FROM_8_TO_16(*accum); accum++;     // L
+    accum += 2;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1ByteReversed(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wIn[],
+                                    register cmsUInt8Number* accum,
+                                    register cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = REVERSE_FLAVOR_16(FROM_8_TO_16(*accum)); accum++;     // L
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* UnrollAnyWords(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wIn[],
+                               register cmsUInt8Number* accum,
+                               register cmsUInt32Number Stride)
+{
+   cmsUInt32Number nChan       = T_CHANNELS(info -> InputFormat);
+   cmsUInt32Number SwapEndian  = T_ENDIAN16(info -> InputFormat);
+   cmsUInt32Number DoSwap      = T_DOSWAP(info ->InputFormat);
+   cmsUInt32Number Reverse     = T_FLAVOR(info ->InputFormat);
+   cmsUInt32Number SwapFirst   = T_SWAPFIRST(info -> InputFormat);
+   cmsUInt32Number Extra       = T_EXTRA(info -> InputFormat);
+   cmsUInt32Number ExtraFirst  = DoSwap ^ SwapFirst;
+   cmsUInt32Number i;
+
+    if (ExtraFirst) {
+        accum += Extra * sizeof(cmsUInt16Number);
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt16Number v = *(cmsUInt16Number*) accum;
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        wIn[index] = Reverse ? REVERSE_FLAVOR_16(v) : v;
+
+        accum += sizeof(cmsUInt16Number);
+    }
+
+    if (!ExtraFirst) {
+        accum += Extra * sizeof(cmsUInt16Number);
+    }
+
+    if (Extra == 0 && SwapFirst) {
+
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* UnrollPlanarWords(register _cmsTRANSFORM* info,
+                                  register cmsUInt16Number wIn[],
+                                  register cmsUInt8Number* accum,
+                                  register cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap= T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse= T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapEndian = T_ENDIAN16(info -> InputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = accum;
+
+    if (DoSwap) {
+        accum += T_EXTRA(info -> InputFormat) * Stride * sizeof(cmsUInt16Number);
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt16Number v = *(cmsUInt16Number*) accum;
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        wIn[index] = Reverse ? REVERSE_FLAVOR_16(v) : v;
+
+        accum +=  Stride * sizeof(cmsUInt16Number);
+    }
+
+    return (Init + sizeof(cmsUInt16Number));
+}
+
+
+static
+cmsUInt8Number* Unroll4Words(register _cmsTRANSFORM* info,
+                             register cmsUInt16Number wIn[],
+                             register cmsUInt8Number* accum,
+                             register cmsUInt32Number Stride)
+{
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // C
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4WordsReverse(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wIn[],
+                                    register cmsUInt8Number* accum,
+                                    register cmsUInt32Number Stride)
+{
+    wIn[0] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // C
+    wIn[1] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // M
+    wIn[2] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // Y
+    wIn[3] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2; // K
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4WordsSwapFirst(register _cmsTRANSFORM* info,
+                                      register cmsUInt16Number wIn[],
+                                      register cmsUInt8Number* accum,
+                                      register cmsUInt32Number Stride)
+{
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // K
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // C
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KYMC
+static
+cmsUInt8Number* Unroll4WordsSwap(register _cmsTRANSFORM* info,
+                                 register cmsUInt16Number wIn[],
+                                 register cmsUInt8Number* accum,
+                                 register cmsUInt32Number Stride)
+{
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // K
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll4WordsSwapSwapFirst(register _cmsTRANSFORM* info,
+                                          register cmsUInt16Number wIn[],
+                                          register cmsUInt8Number* accum,
+                                          register cmsUInt32Number Stride)
+{
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2; // K
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2; // Y
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2; // M
+    wIn[3] = *(cmsUInt16Number*) accum; accum+= 2; // C
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3Words(register _cmsTRANSFORM* info,
+                             register cmsUInt16Number wIn[],
+                             register cmsUInt8Number* accum,
+                             register cmsUInt32Number Stride)
+{
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2;  // C R
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2;  // M G
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2;  // Y B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3WordsSwap(register _cmsTRANSFORM* info,
+                                 register cmsUInt16Number wIn[],
+                                 register cmsUInt8Number* accum,
+                                 register cmsUInt32Number Stride)
+{
+    wIn[2] = *(cmsUInt16Number*) accum; accum+= 2;  // C R
+    wIn[1] = *(cmsUInt16Number*) accum; accum+= 2;  // M G
+    wIn[0] = *(cmsUInt16Number*) accum; accum+= 2;  // Y B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3WordsSkip1Swap(register _cmsTRANSFORM* info,
+                                      register cmsUInt16Number wIn[],
+                                      register cmsUInt8Number* accum,
+                                      register cmsUInt32Number Stride)
+{
+    accum += 2; // A
+    wIn[2] = *(cmsUInt16Number*) accum; accum += 2; // R
+    wIn[1] = *(cmsUInt16Number*) accum; accum += 2; // G
+    wIn[0] = *(cmsUInt16Number*) accum; accum += 2; // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll3WordsSkip1SwapFirst(register _cmsTRANSFORM* info,
+                                           register cmsUInt16Number wIn[],
+                                           register cmsUInt8Number* accum,
+                                           register cmsUInt32Number Stride)
+{
+    accum += 2; // A
+    wIn[0] = *(cmsUInt16Number*) accum; accum += 2; // R
+    wIn[1] = *(cmsUInt16Number*) accum; accum += 2; // G
+    wIn[2] = *(cmsUInt16Number*) accum; accum += 2; // B
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1Word(register _cmsTRANSFORM* info,
+                            register cmsUInt16Number wIn[],
+                            register cmsUInt8Number* accum,
+                            register cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = *(cmsUInt16Number*) accum; accum+= 2;   // L
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1WordReversed(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wIn[],
+                                    register cmsUInt8Number* accum,
+                                    register cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = REVERSE_FLAVOR_16(*(cmsUInt16Number*) accum); accum+= 2;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll1WordSkip3(register _cmsTRANSFORM* info,
+                                 register cmsUInt16Number wIn[],
+                                 register cmsUInt8Number* accum,
+                                 register cmsUInt32Number Stride)
+{
+    wIn[0] = wIn[1] = wIn[2] = *(cmsUInt16Number*) accum;
+
+    accum += 8;
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Unroll2Words(register _cmsTRANSFORM* info,
+                                     register cmsUInt16Number wIn[],
+                                     register cmsUInt8Number* accum,
+                                     register cmsUInt32Number Stride)
+{
+    wIn[0] = *(cmsUInt16Number*) accum; accum += 2;    // ch1
+    wIn[1] = *(cmsUInt16Number*) accum; accum += 2;    // ch2
+
+    return accum;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+// This is a conversion of Lab double to 16 bits
+static
+cmsUInt8Number* UnrollLabDoubleTo16(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wIn[],
+                                    register cmsUInt8Number* accum,
+                                    register cmsUInt32Number  Stride)
+{
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsFloat64Number* Pt = (cmsFloat64Number*) accum;
+
+        cmsCIELab Lab;
+
+        Lab.L = Pt[0];
+        Lab.a = Pt[Stride];
+        Lab.b = Pt[Stride*2];
+
+        cmsFloat2LabEncoded(wIn, &Lab);
+        return accum + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        cmsFloat2LabEncoded(wIn, (cmsCIELab*) accum);
+        accum += sizeof(cmsCIELab) + T_EXTRA(info ->InputFormat) * sizeof(cmsFloat64Number);
+        return accum;
+    }
+}
+
+
+// This is a conversion of Lab float to 16 bits
+static
+cmsUInt8Number* UnrollLabFloatTo16(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wIn[],
+                                    register cmsUInt8Number* accum,
+                                    register cmsUInt32Number  Stride)
+{
+    cmsCIELab Lab;
+    
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+
+     
+        Lab.L = Pt[0];
+        Lab.a = Pt[Stride];
+        Lab.b = Pt[Stride*2];
+
+        cmsFloat2LabEncoded(wIn, &Lab);
+        return accum + sizeof(cmsFloat32Number);
+    }
+    else {
+ 
+        Lab.L = ((cmsFloat32Number*) accum)[0];
+        Lab.a = ((cmsFloat32Number*) accum)[1];
+        Lab.b = ((cmsFloat32Number*) accum)[2];
+
+        cmsFloat2LabEncoded(wIn, &Lab);
+        accum += (3 + T_EXTRA(info ->InputFormat)) * sizeof(cmsFloat32Number);
+        return accum;
+    }
+}
+
+// This is a conversion of XYZ double to 16 bits
+static
+cmsUInt8Number* UnrollXYZDoubleTo16(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wIn[],
+                                    register cmsUInt8Number* accum,
+                                    register cmsUInt32Number Stride)
+{
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsFloat64Number* Pt = (cmsFloat64Number*) accum;
+        cmsCIEXYZ XYZ;
+
+        XYZ.X = Pt[0];
+        XYZ.Y = Pt[Stride];
+        XYZ.Z = Pt[Stride*2];
+        cmsFloat2XYZEncoded(wIn, &XYZ);
+
+        return accum + sizeof(cmsFloat64Number);
+
+    }
+
+    else {
+        cmsFloat2XYZEncoded(wIn, (cmsCIEXYZ*) accum);
+        accum += sizeof(cmsCIEXYZ) + T_EXTRA(info ->InputFormat) * sizeof(cmsFloat64Number);
+
+        return accum;
+    }
+}
+
+// This is a conversion of XYZ float to 16 bits
+static
+cmsUInt8Number* UnrollXYZFloatTo16(register _cmsTRANSFORM* info,
+                                   register cmsUInt16Number wIn[],
+                                   register cmsUInt8Number* accum,
+                                   register cmsUInt32Number Stride)
+{
+    if (T_PLANAR(info -> InputFormat)) {
+
+        cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+        cmsCIEXYZ XYZ;
+
+        XYZ.X = Pt[0];
+        XYZ.Y = Pt[Stride];
+        XYZ.Z = Pt[Stride*2];
+        cmsFloat2XYZEncoded(wIn, &XYZ);
+
+        return accum + sizeof(cmsFloat32Number);
+
+    }
+
+    else {
+        cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+        cmsCIEXYZ XYZ;
+
+        XYZ.X = Pt[0];
+        XYZ.Y = Pt[1];
+        XYZ.Z = Pt[2];
+        cmsFloat2XYZEncoded(wIn, &XYZ);
+
+        accum += 3 * sizeof(cmsFloat32Number) + T_EXTRA(info ->InputFormat) * sizeof(cmsFloat32Number);
+
+        return accum;
+    }
+}
+
+// Check if space is marked as ink
+cmsINLINE cmsBool IsInkSpace(cmsUInt32Number Type)
+{
+    switch (T_COLORSPACE(Type)) {
+
+     case PT_CMY:
+     case PT_CMYK:
+     case PT_MCH5:
+     case PT_MCH6:
+     case PT_MCH7:
+     case PT_MCH8:
+     case PT_MCH9:
+     case PT_MCH10:
+     case PT_MCH11:
+     case PT_MCH12:
+     case PT_MCH13:
+     case PT_MCH14:
+     case PT_MCH15: return TRUE;
+
+     default: return FALSE;
+    }
+}
+
+// Inks does come in percentage, remaining cases are between 0..1.0, again to 16 bits
+static
+cmsUInt8Number* UnrollDoubleTo16(register _cmsTRANSFORM* info,
+                                register cmsUInt16Number wIn[],
+                                register cmsUInt8Number* accum,
+                                register cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat64Number v;
+    cmsUInt16Number  vi;
+    cmsUInt32Number i, start = 0;
+    cmsFloat64Number maximum = IsInkSpace(info ->InputFormat) ? 655.35 : 65535.0;
+
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat32Number) ((cmsFloat64Number*) accum)[(i + start) * Stride];
+        else
+            v = (cmsFloat32Number) ((cmsFloat64Number*) accum)[i + start];
+
+        vi = _cmsQuickSaturateWord(v * maximum);
+
+        if (Reverse)
+            vi = REVERSE_FLAVOR_16(vi);
+
+        wIn[index] = vi;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat64Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat64Number);
+}
+
+
+
+static
+cmsUInt8Number* UnrollFloatTo16(register _cmsTRANSFORM* info,
+                                register cmsUInt16Number wIn[],
+                                register cmsUInt8Number* accum,
+                                register cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan  = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt16Number  vi;
+    cmsUInt32Number i, start = 0;
+    cmsFloat64Number maximum = IsInkSpace(info ->InputFormat) ? 655.35 : 65535.0;
+
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[(i + start) * Stride];
+        else
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[i + start];
+
+        vi = _cmsQuickSaturateWord(v * maximum);
+
+        if (Reverse)
+            vi = REVERSE_FLAVOR_16(vi);
+
+        wIn[index] = vi;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat32Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+
+
+
+// For 1 channel, we need to duplicate data (it comes in 0..1.0 range)
+static
+cmsUInt8Number* UnrollDouble1Chan(register _cmsTRANSFORM* info,
+                                  register cmsUInt16Number wIn[],
+                                  register cmsUInt8Number* accum,
+                                  register cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Inks = (cmsFloat64Number*) accum;
+
+    wIn[0] = wIn[1] = wIn[2] = _cmsQuickSaturateWord(Inks[0] * 65535.0);
+
+    return accum + sizeof(cmsFloat64Number);
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+//-------------------------------------------------------------------------------------------------------------------
+
+// For anything going from cmsFloat32Number
+static
+cmsUInt8Number* UnrollFloatsToFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wIn[],
+                                    cmsUInt8Number* accum,
+                                    cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan  = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat32Number maximum = IsInkSpace(info ->InputFormat) ? 100.0F : 1.0F;
+
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[(i + start) * Stride];
+        else
+            v = (cmsFloat32Number) ((cmsFloat32Number*) accum)[i + start];
+
+        v /= maximum;
+
+        wIn[index] = Reverse ? 1 - v : v;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsFloat32Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsFloat32Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat32Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+// For anything going from double
+
+static
+cmsUInt8Number* UnrollDoublesToFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wIn[],
+                                    cmsUInt8Number* accum,
+                                    cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan  = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat64Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat64Number maximum = IsInkSpace(info ->InputFormat) ? 100.0 : 1.0;
+
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = (cmsFloat64Number) ((cmsFloat64Number*) accum)[(i + start)  * Stride];
+        else
+            v = (cmsFloat64Number) ((cmsFloat64Number*) accum)[i + start];
+
+        v /= maximum;
+
+        wIn[index] = (cmsFloat32Number) (Reverse ? 1.0 - v : v);
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsFloat32Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsFloat32Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsFloat64Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsFloat64Number);
+}
+
+
+
+// From Lab double to cmsFloat32Number
+static
+cmsUInt8Number* UnrollLabDoubleToFloat(_cmsTRANSFORM* info,
+                                       cmsFloat32Number wIn[],
+                                       cmsUInt8Number* accum,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Pt = (cmsFloat64Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);                            // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[Stride] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[Stride*2] + 128) / 255.0);
+
+        return accum + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);            // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[1] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[2] + 128) / 255.0);
+
+        accum += sizeof(cmsFloat64Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+// From Lab double to cmsFloat32Number
+static
+cmsUInt8Number* UnrollLabFloatToFloat(_cmsTRANSFORM* info,
+                                      cmsFloat32Number wIn[],
+                                      cmsUInt8Number* accum,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);                 // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[Stride] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[Stride*2] + 128) / 255.0);
+
+        return accum + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / 100.0);            // from 0..100 to 0..1
+        wIn[1] = (cmsFloat32Number) ((Pt[1] + 128) / 255.0);    // form -128..+127 to 0..1
+        wIn[2] = (cmsFloat32Number) ((Pt[2] + 128) / 255.0);
+
+        accum += sizeof(cmsFloat32Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+
+
+// 1.15 fixed point, that means maximum value is MAX_ENCODEABLE_XYZ (0xFFFF)
+static
+cmsUInt8Number* UnrollXYZDoubleToFloat(_cmsTRANSFORM* info,
+                                       cmsFloat32Number wIn[],
+                                       cmsUInt8Number* accum,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Pt = (cmsFloat64Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[Stride] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[Stride*2] / MAX_ENCODEABLE_XYZ);
+
+        return accum + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[1] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[2] / MAX_ENCODEABLE_XYZ);
+
+        accum += sizeof(cmsFloat64Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+static
+cmsUInt8Number* UnrollXYZFloatToFloat(_cmsTRANSFORM* info,
+                                      cmsFloat32Number wIn[],
+                                      cmsUInt8Number* accum,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Pt = (cmsFloat32Number*) accum;
+
+    if (T_PLANAR(info -> InputFormat)) {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[Stride] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[Stride*2] / MAX_ENCODEABLE_XYZ);
+
+        return accum + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        wIn[0] = (cmsFloat32Number) (Pt[0] / MAX_ENCODEABLE_XYZ);
+        wIn[1] = (cmsFloat32Number) (Pt[1] / MAX_ENCODEABLE_XYZ);
+        wIn[2] = (cmsFloat32Number) (Pt[2] / MAX_ENCODEABLE_XYZ);
+
+        accum += sizeof(cmsFloat32Number)*(3 + T_EXTRA(info ->InputFormat));
+        return accum;
+    }
+}
+
+
+
+// Packing routines -----------------------------------------------------------------------------------------------------------
+
+
+// Generic chunky for byte
+
+static
+cmsUInt8Number* PackAnyBytes(register _cmsTRANSFORM* info,
+                             register cmsUInt16Number wOut[],
+                             register cmsUInt8Number* output,
+                             register cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan  = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> OutputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> OutputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt8Number* swap1;
+    cmsUInt8Number v = 0;
+    cmsUInt32Number i;
+
+    swap1 = output;
+
+    if (ExtraFirst) {
+        output += Extra;
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = FROM_16_TO_8(wOut[index]);
+
+        if (Reverse)
+            v = REVERSE_FLAVOR_8(v);
+
+        *output++ = v;
+    }
+
+    if (!ExtraFirst) {
+        output += Extra;
+    }
+
+    if (Extra == 0 && SwapFirst) {
+
+        memmove(swap1 + 1, swap1, nChan-1);
+        *swap1 = v;
+    }
+
+
+    return output;
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+
+static
+cmsUInt8Number* PackAnyWords(register _cmsTRANSFORM* info,
+                             register cmsUInt16Number wOut[],
+                             register cmsUInt8Number* output,
+                             register cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan  = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number SwapEndian = T_ENDIAN16(info -> OutputFormat);
+    cmsUInt32Number DoSwap   = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number Extra   = T_EXTRA(info -> OutputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> OutputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt16Number* swap1;
+    cmsUInt16Number v = 0;
+    cmsUInt32Number i;
+
+    swap1 = (cmsUInt16Number*) output;
+
+    if (ExtraFirst) {
+        output += Extra * sizeof(cmsUInt16Number);
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = wOut[index];
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        if (Reverse)
+            v = REVERSE_FLAVOR_16(v);
+
+        *(cmsUInt16Number*) output = v;
+
+        output += sizeof(cmsUInt16Number);
+    }
+
+    if (!ExtraFirst) {
+        output += Extra * sizeof(cmsUInt16Number);
+    }
+
+    if (Extra == 0 && SwapFirst) {
+
+        memmove(swap1 + 1, swap1, (nChan-1)* sizeof(cmsUInt16Number));
+        *swap1 = v;
+    }
+
+
+    return output;
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* PackPlanarBytes(register _cmsTRANSFORM* info,
+                                register cmsUInt16Number wOut[],
+                                register cmsUInt8Number* output,
+                                register cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan     = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap    = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number SwapFirst = T_SWAPFIRST(info ->OutputFormat);
+    cmsUInt32Number Reverse   = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = output;
+
+
+    if (DoSwap ^ SwapFirst) {
+        output += T_EXTRA(info -> OutputFormat) * Stride;
+    }
+
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+        cmsUInt8Number v = FROM_16_TO_8(wOut[index]);
+
+        *(cmsUInt8Number*)  output = (cmsUInt8Number) (Reverse ? REVERSE_FLAVOR_8(v) : v);
+        output += Stride;
+    }
+
+    return (Init + 1);
+
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* PackPlanarWords(register _cmsTRANSFORM* info,
+                                register cmsUInt16Number wOut[],
+                                register cmsUInt8Number* output,
+                                register cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan      = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number SwapEndian = T_ENDIAN16(info -> OutputFormat);
+    cmsUInt32Number i;
+    cmsUInt8Number* Init = output;
+    cmsUInt16Number v;
+
+    if (DoSwap) {
+        output += T_EXTRA(info -> OutputFormat) * Stride * sizeof(cmsUInt16Number);
+    }
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = wOut[index];
+
+        if (SwapEndian)
+            v = CHANGE_ENDIAN(v);
+
+        if (Reverse)
+            v =  REVERSE_FLAVOR_16(v);
+
+        *(cmsUInt16Number*) output = v;
+        output += (Stride * sizeof(cmsUInt16Number));
+    }
+
+    return (Init + sizeof(cmsUInt16Number));
+}
+
+// CMYKcm (unrolled for speed)
+
+static
+cmsUInt8Number* Pack6Bytes(register _cmsTRANSFORM* info,
+                           register cmsUInt16Number wOut[],
+                           register cmsUInt8Number* output,
+                           register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[4]);
+    *output++ = FROM_16_TO_8(wOut[5]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KCMYcm
+
+static
+cmsUInt8Number* Pack6BytesSwap(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wOut[],
+                               register cmsUInt8Number* output,
+                               register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[5]);
+    *output++ = FROM_16_TO_8(wOut[4]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// CMYKcm
+static
+cmsUInt8Number* Pack6Words(register _cmsTRANSFORM* info,
+                           register cmsUInt16Number wOut[],
+                           register cmsUInt8Number* output,
+                           register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[4];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[5];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// KCMYcm
+static
+cmsUInt8Number* Pack6WordsSwap(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wOut[],
+                               register cmsUInt8Number* output,
+                               register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[5];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[4];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack4Bytes(register _cmsTRANSFORM* info,
+                           register cmsUInt16Number wOut[],
+                           register cmsUInt8Number* output,
+                           register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4BytesReverse(register _cmsTRANSFORM* info,
+                                  register cmsUInt16Number wOut[],
+                                  register cmsUInt8Number* output,
+                                  register cmsUInt32Number Stride)
+{
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[0]));
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[1]));
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[2]));
+    *output++ = REVERSE_FLAVOR_8(FROM_16_TO_8(wOut[3]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack4BytesSwapFirst(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wOut[],
+                                    register cmsUInt8Number* output,
+                                    register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// ABGR
+static
+cmsUInt8Number* Pack4BytesSwap(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wOut[],
+                               register cmsUInt8Number* output,
+                               register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[3]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4BytesSwapSwapFirst(register _cmsTRANSFORM* info,
+                                        register cmsUInt16Number wOut[],
+                                        register cmsUInt8Number* output,
+                                        register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[3]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4Words(register _cmsTRANSFORM* info,
+                           register cmsUInt16Number wOut[],
+                           register cmsUInt8Number* output,
+                           register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack4WordsReverse(register _cmsTRANSFORM* info,
+                                  register cmsUInt16Number wOut[],
+                                  register cmsUInt8Number* output,
+                                  register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[0]);
+    output+= 2;
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[1]);
+    output+= 2;
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[2]);
+    output+= 2;
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[3]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// ABGR
+static
+cmsUInt8Number* Pack4WordsSwap(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wOut[],
+                               register cmsUInt8Number* output,
+                               register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[3];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+// CMYK
+static
+cmsUInt8Number* Pack4WordsBigEndian(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wOut[],
+                                    register cmsUInt8Number* output,
+                                    register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[0]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[1]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[2]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[3]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* PackLabV2_8(register _cmsTRANSFORM* info,
+                            register cmsUInt16Number wOut[],
+                            register cmsUInt8Number* output,
+                            register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[0]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[1]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[2]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* PackALabV2_8(register _cmsTRANSFORM* info,
+                             register cmsUInt16Number wOut[],
+                             register cmsUInt8Number* output,
+                             register cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[0]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[1]));
+    *output++ = FROM_16_TO_8(FomLabV4ToLabV2(wOut[2]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* PackLabV2_16(register _cmsTRANSFORM* info,
+                             register cmsUInt16Number wOut[],
+                             register cmsUInt8Number* output,
+                             register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = FomLabV4ToLabV2(wOut[0]);
+    output += 2;
+    *(cmsUInt16Number*) output = FomLabV4ToLabV2(wOut[1]);
+    output += 2;
+    *(cmsUInt16Number*) output = FomLabV4ToLabV2(wOut[2]);
+    output += 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3Bytes(register _cmsTRANSFORM* info,
+                           register cmsUInt16Number wOut[],
+                           register cmsUInt8Number* output,
+                           register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesOptimized(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wOut[],
+                                    register cmsUInt8Number* output,
+                                    register cmsUInt32Number Stride)
+{
+    *output++ = (wOut[0] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[2] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesSwap(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wOut[],
+                               register cmsUInt8Number* output,
+                               register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesSwapOptimized(register _cmsTRANSFORM* info,
+                                        register cmsUInt16Number wOut[],
+                                        register cmsUInt8Number* output,
+                                        register cmsUInt32Number Stride)
+{
+    *output++ = (wOut[2] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[0] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3Words(register _cmsTRANSFORM* info,
+                           register cmsUInt16Number wOut[],
+                           register cmsUInt8Number* output,
+                           register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsSwap(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wOut[],
+                               register cmsUInt8Number* output,
+                               register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsBigEndian(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wOut[],
+                                    register cmsUInt8Number* output,
+                                    register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[0]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[1]);
+    output+= 2;
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[2]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1(register _cmsTRANSFORM* info,
+                                   register cmsUInt16Number wOut[],
+                                   register cmsUInt8Number* output,
+                                   register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1Optimized(register _cmsTRANSFORM* info,
+                                            register cmsUInt16Number wOut[],
+                                            register cmsUInt8Number* output,
+                                            register cmsUInt32Number Stride)
+{
+    *output++ = (wOut[0] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[2] & 0xFFU);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapFirst(register _cmsTRANSFORM* info,
+                                            register cmsUInt16Number wOut[],
+                                            register cmsUInt8Number* output,
+                                            register cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(wOut[0]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[2]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapFirstOptimized(register _cmsTRANSFORM* info,
+                                                     register cmsUInt16Number wOut[],
+                                                     register cmsUInt8Number* output,
+                                                     register cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = (wOut[0] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[2] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1Swap(register _cmsTRANSFORM* info,
+                                       register cmsUInt16Number wOut[],
+                                       register cmsUInt8Number* output,
+                                       register cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapOptimized(register _cmsTRANSFORM* info,
+                                                register cmsUInt16Number wOut[],
+                                                register cmsUInt8Number* output,
+                                                register cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = (wOut[2] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[0] & 0xFFU);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapSwapFirst(register _cmsTRANSFORM* info,
+                                                register cmsUInt16Number wOut[],
+                                                register cmsUInt8Number* output,
+                                                register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[2]);
+    *output++ = FROM_16_TO_8(wOut[1]);
+    *output++ = FROM_16_TO_8(wOut[0]);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3BytesAndSkip1SwapSwapFirstOptimized(register _cmsTRANSFORM* info,
+                                                         register cmsUInt16Number wOut[],
+                                                         register cmsUInt8Number* output,
+                                                         register cmsUInt32Number Stride)
+{
+    *output++ = (wOut[2] & 0xFFU);
+    *output++ = (wOut[1] & 0xFFU);
+    *output++ = (wOut[0] & 0xFFU);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1(register _cmsTRANSFORM* info,
+                                   register cmsUInt16Number wOut[],
+                                   register cmsUInt8Number* output,
+                                   register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1Swap(register _cmsTRANSFORM* info,
+                                       register cmsUInt16Number wOut[],
+                                       register cmsUInt8Number* output,
+                                       register cmsUInt32Number Stride)
+{
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1SwapFirst(register _cmsTRANSFORM* info,
+                                            register cmsUInt16Number wOut[],
+                                            register cmsUInt8Number* output,
+                                            register cmsUInt32Number Stride)
+{
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack3WordsAndSkip1SwapSwapFirst(register _cmsTRANSFORM* info,
+                                                register cmsUInt16Number wOut[],
+                                                register cmsUInt8Number* output,
+                                                register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[2];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[1];
+    output+= 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+
+static
+cmsUInt8Number* Pack1Byte(register _cmsTRANSFORM* info,
+                          register cmsUInt16Number wOut[],
+                          register cmsUInt8Number* output,
+                          register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1ByteReversed(register _cmsTRANSFORM* info,
+                                  register cmsUInt16Number wOut[],
+                                  register cmsUInt8Number* output,
+                                  register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(REVERSE_FLAVOR_16(wOut[0]));
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1ByteSkip1(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wOut[],
+                               register cmsUInt8Number* output,
+                               register cmsUInt32Number Stride)
+{
+    *output++ = FROM_16_TO_8(wOut[0]);
+    output++;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1ByteSkip1SwapFirst(register _cmsTRANSFORM* info,
+                                        register cmsUInt16Number wOut[],
+                                        register cmsUInt8Number* output,
+                                        register cmsUInt32Number Stride)
+{
+    output++;
+    *output++ = FROM_16_TO_8(wOut[0]);
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack1Word(register _cmsTRANSFORM* info,
+                          register cmsUInt16Number wOut[],
+                          register cmsUInt8Number* output,
+                          register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1WordReversed(register _cmsTRANSFORM* info,
+                                  register cmsUInt16Number wOut[],
+                                  register cmsUInt8Number* output,
+                                  register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = REVERSE_FLAVOR_16(wOut[0]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack1WordBigEndian(register _cmsTRANSFORM* info,
+                                   register cmsUInt16Number wOut[],
+                                   register cmsUInt8Number* output,
+                                   register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = CHANGE_ENDIAN(wOut[0]);
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+static
+cmsUInt8Number* Pack1WordSkip1(register _cmsTRANSFORM* info,
+                               register cmsUInt16Number wOut[],
+                               register cmsUInt8Number* output,
+                               register cmsUInt32Number Stride)
+{
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 4;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+static
+cmsUInt8Number* Pack1WordSkip1SwapFirst(register _cmsTRANSFORM* info,
+                                        register cmsUInt16Number wOut[],
+                                        register cmsUInt8Number* output,
+                                        register cmsUInt32Number Stride)
+{
+    output += 2;
+    *(cmsUInt16Number*) output = wOut[0];
+    output+= 2;
+
+    return output;
+
+    cmsUNUSED_PARAMETER(info);
+    cmsUNUSED_PARAMETER(Stride);
+}
+
+
+// Unencoded Float values -- don't try optimize speed
+static
+cmsUInt8Number* PackLabDoubleFrom16(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wOut[],
+                                    register cmsUInt8Number* output,
+                                    register cmsUInt32Number Stride)
+{
+
+    if (T_PLANAR(info -> OutputFormat)) {
+
+        cmsCIELab  Lab;
+        cmsFloat64Number* Out = (cmsFloat64Number*) output;
+        cmsLabEncoded2Float(&Lab, wOut);
+
+        Out[0]        = Lab.L;
+        Out[Stride]   = Lab.a;
+        Out[Stride*2] = Lab.b;
+
+        return output + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        cmsLabEncoded2Float((cmsCIELab*) output, wOut);
+        return output + (sizeof(cmsCIELab) + T_EXTRA(info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+}
+
+
+static
+cmsUInt8Number* PackLabFloatFrom16(register _cmsTRANSFORM* info,
+                                    register cmsUInt16Number wOut[],
+                                    register cmsUInt8Number* output,
+                                    register cmsUInt32Number Stride)
+{
+    cmsCIELab  Lab;
+    cmsLabEncoded2Float(&Lab, wOut);
+
+    if (T_PLANAR(info -> OutputFormat)) {
+       
+        cmsFloat32Number* Out = (cmsFloat32Number*) output;
+    
+        Out[0]        = (cmsFloat32Number)Lab.L;
+        Out[Stride]   = (cmsFloat32Number)Lab.a;
+        Out[Stride*2] = (cmsFloat32Number)Lab.b;
+
+        return output + sizeof(cmsFloat32Number);
+    }
+    else {
+
+       ((cmsFloat32Number*) output)[0] = (cmsFloat32Number) Lab.L;
+       ((cmsFloat32Number*) output)[1] = (cmsFloat32Number) Lab.a;
+       ((cmsFloat32Number*) output)[2] = (cmsFloat32Number) Lab.b;
+
+        return output + (3 + T_EXTRA(info ->OutputFormat)) * sizeof(cmsFloat32Number);
+    }
+}
+
+static
+cmsUInt8Number* PackXYZDoubleFrom16(register _cmsTRANSFORM* Info,
+                                    register cmsUInt16Number wOut[],
+                                    register cmsUInt8Number* output,
+                                    register cmsUInt32Number Stride)
+{
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsFloat64Number* Out = (cmsFloat64Number*) output;
+        cmsXYZEncoded2Float(&XYZ, wOut);
+
+        Out[0]        = XYZ.X;
+        Out[Stride]   = XYZ.Y;
+        Out[Stride*2] = XYZ.Z;
+
+        return output + sizeof(cmsFloat64Number);
+
+    }
+    else {
+
+        cmsXYZEncoded2Float((cmsCIEXYZ*) output, wOut);
+
+        return output + (sizeof(cmsCIEXYZ) + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+}
+
+static
+cmsUInt8Number* PackXYZFloatFrom16(register _cmsTRANSFORM* Info,
+                                   register cmsUInt16Number wOut[],
+                                   register cmsUInt8Number* output,
+                                   register cmsUInt32Number Stride)
+{
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        cmsCIEXYZ XYZ;
+        cmsFloat32Number* Out = (cmsFloat32Number*) output;
+        cmsXYZEncoded2Float(&XYZ, wOut);
+
+        Out[0]        = (cmsFloat32Number) XYZ.X;
+        Out[Stride]   = (cmsFloat32Number) XYZ.Y;
+        Out[Stride*2] = (cmsFloat32Number) XYZ.Z;
+
+        return output + sizeof(cmsFloat32Number);
+
+    }
+    else {
+
+        cmsCIEXYZ XYZ;
+        cmsFloat32Number* Out = (cmsFloat32Number*) output;
+        cmsXYZEncoded2Float(&XYZ, wOut);
+
+        Out[0] = (cmsFloat32Number) XYZ.X;
+        Out[1] = (cmsFloat32Number) XYZ.Y;
+        Out[2] = (cmsFloat32Number) XYZ.Z;
+
+        return output + (3 * sizeof(cmsFloat32Number) + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat32Number));
+    }
+}
+
+static
+cmsUInt8Number* PackDoubleFrom16(register _cmsTRANSFORM* info,
+                                register cmsUInt16Number wOut[],
+                                register cmsUInt8Number* output,
+                                register cmsUInt32Number Stride)
+{
+    cmsUInt32Number nChan      = T_CHANNELS(info -> OutputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->OutputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->OutputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> OutputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> OutputFormat);
+    cmsUInt32Number Planar     = T_PLANAR(info -> OutputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsFloat64Number maximum = IsInkSpace(info ->OutputFormat) ? 655.35 : 65535.0;
+    cmsFloat64Number v = 0;
+    cmsFloat64Number* swap1 = (cmsFloat64Number*) output;
+    cmsUInt32Number i, start = 0;
+
+    if (ExtraFirst)
+        start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        v = (cmsFloat64Number) wOut[index] / maximum;
+
+        if (Reverse)
+            v = maximum - v;
+
+        if (Planar)
+            ((cmsFloat64Number*) output)[(i + start)  * Stride]= v;
+        else
+            ((cmsFloat64Number*) output)[i + start] = v;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+
+         memmove(swap1 + 1, swap1, (nChan-1)* sizeof(cmsFloat64Number));
+        *swap1 = v;
+    }
+
+    if (T_PLANAR(info -> OutputFormat))
+        return output + sizeof(cmsFloat64Number);
+    else
+        return output + (nChan + Extra) * sizeof(cmsFloat64Number);
+
+}
+
+
+static
+cmsUInt8Number* PackFloatFrom16(register _cmsTRANSFORM* info,
+                                register cmsUInt16Number wOut[],
+                                register cmsUInt8Number* output,
+                                register cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat64Number maximum = IsInkSpace(info->OutputFormat) ? 655.35 : 65535.0;
+       cmsFloat64Number v = 0;
+       cmsFloat32Number* swap1 = (cmsFloat32Number*)output;
+       cmsUInt32Number i, start = 0;
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = (cmsFloat64Number)wOut[index] / maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsFloat32Number*)output)[(i + start) * Stride] = (cmsFloat32Number)v;
+              else
+                     ((cmsFloat32Number*)output)[i + start] = (cmsFloat32Number)v;
+       }
+
+       
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsFloat32Number));
+              *swap1 = (cmsFloat32Number)v;
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsFloat32Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+
+
+// --------------------------------------------------------------------------------------------------------
+
+static
+cmsUInt8Number* PackFloatsFromFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wOut[],
+                                    cmsUInt8Number* output,
+                                    cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat64Number maximum = IsInkSpace(info->OutputFormat) ? 100.0 : 1.0;
+       cmsFloat32Number* swap1 = (cmsFloat32Number*)output;
+       cmsFloat64Number v = 0;
+       cmsUInt32Number i, start = 0;
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = wOut[index] * maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsFloat32Number*)output)[(i + start)* Stride] = (cmsFloat32Number)v;
+              else
+                     ((cmsFloat32Number*)output)[i + start] = (cmsFloat32Number)v;
+       }
+
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsFloat32Number));
+              *swap1 = (cmsFloat32Number)v;
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsFloat32Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsFloat32Number);
+}
+
+static
+cmsUInt8Number* PackDoublesFromFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wOut[],
+                                    cmsUInt8Number* output,
+                                    cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat64Number maximum = IsInkSpace(info->OutputFormat) ? 100.0 : 1.0;
+       cmsFloat64Number v = 0;
+       cmsFloat64Number* swap1 = (cmsFloat64Number*)output;
+       cmsUInt32Number i, start = 0;
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = wOut[index] * maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsFloat64Number*)output)[(i + start) * Stride] = v;
+              else
+                     ((cmsFloat64Number*)output)[i + start] = v;
+       }
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsFloat64Number));
+              *swap1 = v;
+       }
+
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsFloat64Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsFloat64Number);
+
+}
+
+
+
+
+
+static
+cmsUInt8Number* PackLabFloatFromFloat(_cmsTRANSFORM* Info,
+                                      cmsFloat32Number wOut[],
+                                      cmsUInt8Number* output,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Out = (cmsFloat32Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Out[0]        = (cmsFloat32Number) (wOut[0] * 100.0);
+        Out[Stride]   = (cmsFloat32Number) (wOut[1] * 255.0 - 128.0);
+        Out[Stride*2] = (cmsFloat32Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat32Number) (wOut[0] * 100.0);
+        Out[1] = (cmsFloat32Number) (wOut[1] * 255.0 - 128.0);
+        Out[2] = (cmsFloat32Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + (sizeof(cmsFloat32Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat32Number));
+    }
+
+}
+
+
+static
+cmsUInt8Number* PackLabDoubleFromFloat(_cmsTRANSFORM* Info,
+                                       cmsFloat32Number wOut[],
+                                       cmsUInt8Number* output,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Out = (cmsFloat64Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Out[0]        = (cmsFloat64Number) (wOut[0] * 100.0);
+        Out[Stride]   = (cmsFloat64Number) (wOut[1] * 255.0 - 128.0);
+        Out[Stride*2] = (cmsFloat64Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat64Number) (wOut[0] * 100.0);
+        Out[1] = (cmsFloat64Number) (wOut[1] * 255.0 - 128.0);
+        Out[2] = (cmsFloat64Number) (wOut[2] * 255.0 - 128.0);
+
+        return output + (sizeof(cmsFloat64Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+
+}
+
+
+// From 0..1 range to 0..MAX_ENCODEABLE_XYZ
+static
+cmsUInt8Number* PackXYZFloatFromFloat(_cmsTRANSFORM* Info,
+                                      cmsFloat32Number wOut[],
+                                      cmsUInt8Number* output,
+                                      cmsUInt32Number Stride)
+{
+    cmsFloat32Number* Out = (cmsFloat32Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Out[0]        = (cmsFloat32Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[Stride]   = (cmsFloat32Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[Stride*2] = (cmsFloat32Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + sizeof(cmsFloat32Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat32Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[1] = (cmsFloat32Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[2] = (cmsFloat32Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + (sizeof(cmsFloat32Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat32Number));
+    }
+
+}
+
+// Same, but convert to double
+static
+cmsUInt8Number* PackXYZDoubleFromFloat(_cmsTRANSFORM* Info,
+                                       cmsFloat32Number wOut[],
+                                       cmsUInt8Number* output,
+                                       cmsUInt32Number Stride)
+{
+    cmsFloat64Number* Out = (cmsFloat64Number*) output;
+
+    if (T_PLANAR(Info -> OutputFormat)) {
+
+        Out[0]        = (cmsFloat64Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[Stride]   = (cmsFloat64Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[Stride*2] = (cmsFloat64Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + sizeof(cmsFloat64Number);
+    }
+    else {
+
+        Out[0] = (cmsFloat64Number) (wOut[0] * MAX_ENCODEABLE_XYZ);
+        Out[1] = (cmsFloat64Number) (wOut[1] * MAX_ENCODEABLE_XYZ);
+        Out[2] = (cmsFloat64Number) (wOut[2] * MAX_ENCODEABLE_XYZ);
+
+        return output + (sizeof(cmsFloat64Number)*3 + T_EXTRA(Info ->OutputFormat) * sizeof(cmsFloat64Number));
+    }
+
+}
+
+
+// ----------------------------------------------------------------------------------------------------------------
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// Decodes an stream of half floats to wIn[] described by input format
+
+static
+cmsUInt8Number* UnrollHalfTo16(register _cmsTRANSFORM* info,
+                                register cmsUInt16Number wIn[],
+                                register cmsUInt8Number* accum,
+                                register cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat32Number maximum = IsInkSpace(info ->InputFormat) ? 655.35F : 65535.0F;
+
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v = _cmsHalf2Float ( ((cmsUInt16Number*) accum)[(i + start) * Stride] );
+        else
+            v = _cmsHalf2Float ( ((cmsUInt16Number*) accum)[i + start] ) ;
+
+        if (Reverse) v = maximum - v;
+
+        wIn[index] = _cmsQuickSaturateWord(v * maximum);
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsUInt16Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsUInt16Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsUInt16Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsUInt16Number);
+}
+
+// Decodes an stream of half floats to wIn[] described by input format
+
+static
+cmsUInt8Number* UnrollHalfToFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wIn[],
+                                    cmsUInt8Number* accum,
+                                    cmsUInt32Number Stride)
+{
+
+    cmsUInt32Number nChan      = T_CHANNELS(info -> InputFormat);
+    cmsUInt32Number DoSwap     = T_DOSWAP(info ->InputFormat);
+    cmsUInt32Number Reverse    = T_FLAVOR(info ->InputFormat);
+    cmsUInt32Number SwapFirst  = T_SWAPFIRST(info -> InputFormat);
+    cmsUInt32Number Extra      = T_EXTRA(info -> InputFormat);
+    cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+    cmsUInt32Number Planar     = T_PLANAR(info -> InputFormat);
+    cmsFloat32Number v;
+    cmsUInt32Number i, start = 0;
+    cmsFloat32Number maximum = IsInkSpace(info ->InputFormat) ? 100.0F : 1.0F;
+
+
+    if (ExtraFirst)
+            start = Extra;
+
+    for (i=0; i < nChan; i++) {
+
+        cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+        if (Planar)
+            v =  _cmsHalf2Float ( ((cmsUInt16Number*) accum)[(i + start) * Stride] );
+        else
+            v =  _cmsHalf2Float ( ((cmsUInt16Number*) accum)[i + start] ) ;
+
+        v /= maximum;
+
+        wIn[index] = Reverse ? 1 - v : v;
+    }
+
+
+    if (Extra == 0 && SwapFirst) {
+        cmsFloat32Number tmp = wIn[0];
+
+        memmove(&wIn[0], &wIn[1], (nChan-1) * sizeof(cmsFloat32Number));
+        wIn[nChan-1] = tmp;
+    }
+
+    if (T_PLANAR(info -> InputFormat))
+        return accum + sizeof(cmsUInt16Number);
+    else
+        return accum + (nChan + Extra) * sizeof(cmsUInt16Number);
+}
+
+
+static
+cmsUInt8Number* PackHalfFrom16(register _cmsTRANSFORM* info,
+                                register cmsUInt16Number wOut[],
+                                register cmsUInt8Number* output,
+                                register cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat32Number maximum = IsInkSpace(info->OutputFormat) ? 655.35F : 65535.0F;
+       cmsFloat32Number v = 0;
+       cmsUInt16Number* swap1 = (cmsUInt16Number*)output;
+       cmsUInt32Number i, start = 0;
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+              cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = (cmsFloat32Number)wOut[index] / maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsUInt16Number*)output)[(i + start) * Stride] = _cmsFloat2Half(v);
+              else
+                     ((cmsUInt16Number*)output)[i + start] = _cmsFloat2Half(v);
+       }
+
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsUInt16Number));
+              *swap1 = _cmsFloat2Half(v);
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsUInt16Number);
+       else
+              return output + (nChan + Extra) * sizeof(cmsUInt16Number);
+}
+
+
+
+static
+cmsUInt8Number* PackHalfFromFloat(_cmsTRANSFORM* info,
+                                    cmsFloat32Number wOut[],
+                                    cmsUInt8Number* output,
+                                    cmsUInt32Number Stride)
+{
+       cmsUInt32Number nChan      = T_CHANNELS(info->OutputFormat);
+       cmsUInt32Number DoSwap     = T_DOSWAP(info->OutputFormat);
+       cmsUInt32Number Reverse    = T_FLAVOR(info->OutputFormat);
+       cmsUInt32Number Extra      = T_EXTRA(info->OutputFormat);
+       cmsUInt32Number SwapFirst  = T_SWAPFIRST(info->OutputFormat);
+       cmsUInt32Number Planar     = T_PLANAR(info->OutputFormat);
+       cmsUInt32Number ExtraFirst = DoSwap ^ SwapFirst;
+       cmsFloat32Number maximum = IsInkSpace(info->OutputFormat) ? 100.0F : 1.0F;
+       cmsUInt16Number* swap1 = (cmsUInt16Number*)output;
+       cmsFloat32Number v = 0;
+       cmsUInt32Number i, start = 0;
+
+       if (ExtraFirst)
+              start = Extra;
+
+       for (i = 0; i < nChan; i++) {
+
+           cmsUInt32Number index = DoSwap ? (nChan - i - 1) : i;
+
+              v = wOut[index] * maximum;
+
+              if (Reverse)
+                     v = maximum - v;
+
+              if (Planar)
+                     ((cmsUInt16Number*)output)[(i + start)* Stride] = _cmsFloat2Half(v);
+              else
+                     ((cmsUInt16Number*)output)[i + start] = _cmsFloat2Half(v);
+       }
+
+
+       if (Extra == 0 && SwapFirst) {
+
+              memmove(swap1 + 1, swap1, (nChan - 1)* sizeof(cmsUInt16Number));
+              *swap1 = (cmsUInt16Number)_cmsFloat2Half(v);
+       }
+
+       if (T_PLANAR(info->OutputFormat))
+              return output + sizeof(cmsUInt16Number);
+       else
+              return output + (nChan + Extra)* sizeof(cmsUInt16Number);
+}
+
+#endif
+
+// ----------------------------------------------------------------------------------------------------------------
+
+
+static const cmsFormatters16 InputFormatters16[] = {
+
+    //    Type                                          Mask                  Function
+    //  ----------------------------   ------------------------------------  ----------------------------
+    { TYPE_Lab_DBL,                                 ANYPLANAR|ANYEXTRA,   UnrollLabDoubleTo16},
+    { TYPE_XYZ_DBL,                                 ANYPLANAR|ANYEXTRA,   UnrollXYZDoubleTo16},
+    { TYPE_Lab_FLT,                                 ANYPLANAR|ANYEXTRA,   UnrollLabFloatTo16},
+    { TYPE_XYZ_FLT,                                 ANYPLANAR|ANYEXTRA,   UnrollXYZFloatTo16},
+    { TYPE_GRAY_DBL,                                                 0,   UnrollDouble1Chan},
+    { FLOAT_SH(1)|BYTES_SH(0), ANYCHANNELS|ANYPLANAR|ANYSWAPFIRST|ANYFLAVOR|
+                                             ANYSWAP|ANYEXTRA|ANYSPACE,   UnrollDoubleTo16},
+    { FLOAT_SH(1)|BYTES_SH(4), ANYCHANNELS|ANYPLANAR|ANYSWAPFIRST|ANYFLAVOR|
+                                             ANYSWAP|ANYEXTRA|ANYSPACE,   UnrollFloatTo16},
+#ifndef CMS_NO_HALF_SUPPORT 
+    { FLOAT_SH(1)|BYTES_SH(2), ANYCHANNELS|ANYPLANAR|ANYSWAPFIRST|ANYFLAVOR|
+                                            ANYEXTRA|ANYSWAP|ANYSPACE,   UnrollHalfTo16},
+#endif
+
+    { CHANNELS_SH(1)|BYTES_SH(1),                              ANYSPACE,  Unroll1Byte},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(1),                  ANYSPACE,  Unroll1ByteSkip1},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(2),                  ANYSPACE,  Unroll1ByteSkip2},
+    { CHANNELS_SH(1)|BYTES_SH(1)|FLAVOR_SH(1),                 ANYSPACE,  Unroll1ByteReversed},
+    { COLORSPACE_SH(PT_MCH2)|CHANNELS_SH(2)|BYTES_SH(1),              0,  Unroll2Bytes},
+
+    { TYPE_LabV2_8,                                                   0,  UnrollLabV2_8 },
+    { TYPE_ALabV2_8,                                                  0,  UnrollALabV2_8 },
+    { TYPE_LabV2_16,                                                  0,  UnrollLabV2_16 },
+
+    { CHANNELS_SH(3)|BYTES_SH(1),                              ANYSPACE,  Unroll3Bytes},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1),                 ANYSPACE,  Unroll3BytesSwap},
+    { CHANNELS_SH(3)|EXTRA_SH(1)|BYTES_SH(1)|DOSWAP_SH(1),     ANYSPACE,  Unroll3BytesSkip1Swap},
+    { CHANNELS_SH(3)|EXTRA_SH(1)|BYTES_SH(1)|SWAPFIRST_SH(1),  ANYSPACE,  Unroll3BytesSkip1SwapFirst},
+
+    { CHANNELS_SH(3)|EXTRA_SH(1)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),  
+                                                               ANYSPACE,  Unroll3BytesSkip1SwapSwapFirst},
+
+    { CHANNELS_SH(4)|BYTES_SH(1),                              ANYSPACE,  Unroll4Bytes},
+    { CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1),                 ANYSPACE,  Unroll4BytesReverse},
+    { CHANNELS_SH(4)|BYTES_SH(1)|SWAPFIRST_SH(1),              ANYSPACE,  Unroll4BytesSwapFirst},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1),                 ANYSPACE,  Unroll4BytesSwap},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1), ANYSPACE,  Unroll4BytesSwapSwapFirst},
+
+    { BYTES_SH(1)|PLANAR_SH(1), ANYFLAVOR|ANYSWAPFIRST|
+                                   ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, UnrollPlanarBytes},
+
+    { BYTES_SH(1),    ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                           ANYEXTRA|ANYCHANNELS|ANYSPACE, UnrollChunkyBytes},
+
+    { CHANNELS_SH(1)|BYTES_SH(2),                              ANYSPACE,  Unroll1Word},
+    { CHANNELS_SH(1)|BYTES_SH(2)|FLAVOR_SH(1),                 ANYSPACE,  Unroll1WordReversed},
+    { CHANNELS_SH(1)|BYTES_SH(2)|EXTRA_SH(3),                  ANYSPACE,  Unroll1WordSkip3},
+
+    { CHANNELS_SH(2)|BYTES_SH(2),                              ANYSPACE,  Unroll2Words},
+    { CHANNELS_SH(3)|BYTES_SH(2),                              ANYSPACE,  Unroll3Words},
+    { CHANNELS_SH(4)|BYTES_SH(2),                              ANYSPACE,  Unroll4Words},
+
+    { CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1),                 ANYSPACE,  Unroll3WordsSwap},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|SWAPFIRST_SH(1),  ANYSPACE,  Unroll3WordsSkip1SwapFirst},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|DOSWAP_SH(1),     ANYSPACE,  Unroll3WordsSkip1Swap},
+    { CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1),                 ANYSPACE,  Unroll4WordsReverse},
+    { CHANNELS_SH(4)|BYTES_SH(2)|SWAPFIRST_SH(1),              ANYSPACE,  Unroll4WordsSwapFirst},
+    { CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1),                 ANYSPACE,  Unroll4WordsSwap},
+    { CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1)|SWAPFIRST_SH(1), ANYSPACE,  Unroll4WordsSwapSwapFirst},
+
+
+    { BYTES_SH(2)|PLANAR_SH(1),  ANYFLAVOR|ANYSWAP|ANYENDIAN|ANYEXTRA|ANYCHANNELS|ANYSPACE,  UnrollPlanarWords},
+    { BYTES_SH(2),  ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYENDIAN|ANYEXTRA|ANYCHANNELS|ANYSPACE,  UnrollAnyWords},
+};
+
+
+
+static const cmsFormattersFloat InputFormattersFloat[] = {
+
+    //    Type                                          Mask                  Function
+    //  ----------------------------   ------------------------------------  ----------------------------
+    {     TYPE_Lab_DBL,                                ANYPLANAR|ANYEXTRA,   UnrollLabDoubleToFloat},
+    {     TYPE_Lab_FLT,                                ANYPLANAR|ANYEXTRA,   UnrollLabFloatToFloat},
+
+    {     TYPE_XYZ_DBL,                                ANYPLANAR|ANYEXTRA,   UnrollXYZDoubleToFloat},
+    {     TYPE_XYZ_FLT,                                ANYPLANAR|ANYEXTRA,   UnrollXYZFloatToFloat},
+
+    {     FLOAT_SH(1)|BYTES_SH(4), ANYPLANAR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|
+                                                      ANYCHANNELS|ANYSPACE,  UnrollFloatsToFloat},
+
+    {     FLOAT_SH(1)|BYTES_SH(0), ANYPLANAR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|
+                                                        ANYCHANNELS|ANYSPACE,  UnrollDoublesToFloat},
+#ifndef CMS_NO_HALF_SUPPORT 
+    {     FLOAT_SH(1)|BYTES_SH(2), ANYPLANAR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|
+                                                        ANYCHANNELS|ANYSPACE,  UnrollHalfToFloat},
+#endif
+};
+
+
+// Bit fields set to one in the mask are not compared
+static
+cmsFormatter _cmsGetStockInputFormatter(cmsUInt32Number dwInput, cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsFormatter fr;
+
+    switch (dwFlags) {
+
+    case CMS_PACK_FLAGS_16BITS: {
+        for (i=0; i < sizeof(InputFormatters16) / sizeof(cmsFormatters16); i++) {
+            const cmsFormatters16* f = InputFormatters16 + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.Fmt16 = f ->Frm;
+                return fr;
+            }
+        }
+    }
+    break;
+
+    case CMS_PACK_FLAGS_FLOAT: {
+        for (i=0; i < sizeof(InputFormattersFloat) / sizeof(cmsFormattersFloat); i++) {
+            const cmsFormattersFloat* f = InputFormattersFloat + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.FmtFloat = f ->Frm;
+                return fr;
+            }
+        }
+    }
+    break;
+
+    default:;
+
+    }
+
+    fr.Fmt16 = NULL;
+    return fr;
+}
+
+static const cmsFormatters16 OutputFormatters16[] = {
+    //    Type                                          Mask                  Function
+    //  ----------------------------   ------------------------------------  ----------------------------
+
+    { TYPE_Lab_DBL,                                      ANYPLANAR|ANYEXTRA,  PackLabDoubleFrom16},
+    { TYPE_XYZ_DBL,                                      ANYPLANAR|ANYEXTRA,  PackXYZDoubleFrom16},
+
+    { TYPE_Lab_FLT,                                      ANYPLANAR|ANYEXTRA,  PackLabFloatFrom16},
+    { TYPE_XYZ_FLT,                                      ANYPLANAR|ANYEXTRA,  PackXYZFloatFrom16},
+    
+    { FLOAT_SH(1)|BYTES_SH(0),      ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                    ANYCHANNELS|ANYPLANAR|ANYEXTRA|ANYSPACE,  PackDoubleFrom16},
+    { FLOAT_SH(1)|BYTES_SH(4),      ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                    ANYCHANNELS|ANYPLANAR|ANYEXTRA|ANYSPACE,  PackFloatFrom16},
+#ifndef CMS_NO_HALF_SUPPORT 
+    { FLOAT_SH(1)|BYTES_SH(2),      ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|
+                                    ANYCHANNELS|ANYPLANAR|ANYEXTRA|ANYSPACE,  PackHalfFrom16},
+#endif
+
+    { CHANNELS_SH(1)|BYTES_SH(1),                                  ANYSPACE,  Pack1Byte},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(1),                      ANYSPACE,  Pack1ByteSkip1},
+    { CHANNELS_SH(1)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack1ByteSkip1SwapFirst},
+
+    { CHANNELS_SH(1)|BYTES_SH(1)|FLAVOR_SH(1),                     ANYSPACE,  Pack1ByteReversed},
+
+    { TYPE_LabV2_8,                                                       0,  PackLabV2_8 },
+    { TYPE_ALabV2_8,                                                      0,  PackALabV2_8 },
+    { TYPE_LabV2_16,                                                      0,  PackLabV2_16 },
+
+    { CHANNELS_SH(3)|BYTES_SH(1)|OPTIMIZED_SH(1),                  ANYSPACE,  Pack3BytesOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|OPTIMIZED_SH(1),      ANYSPACE,  Pack3BytesAndSkip1Optimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1)|OPTIMIZED_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapFirstOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1)|OPTIMIZED_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapSwapFirstOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|EXTRA_SH(1)|OPTIMIZED_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapOptimized},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|OPTIMIZED_SH(1),     ANYSPACE,  Pack3BytesSwapOptimized},
+
+
+
+    { CHANNELS_SH(3)|BYTES_SH(1),                                  ANYSPACE,  Pack3Bytes},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1),                      ANYSPACE,  Pack3BytesAndSkip1},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack3BytesAndSkip1SwapFirst},
+    { CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),
+                                                                   ANYSPACE,  Pack3BytesAndSkip1SwapSwapFirst},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1)|EXTRA_SH(1),         ANYSPACE,  Pack3BytesAndSkip1Swap},
+    { CHANNELS_SH(3)|BYTES_SH(1)|DOSWAP_SH(1),                     ANYSPACE,  Pack3BytesSwap},
+    { CHANNELS_SH(6)|BYTES_SH(1),                                  ANYSPACE,  Pack6Bytes},
+    { CHANNELS_SH(6)|BYTES_SH(1)|DOSWAP_SH(1),                     ANYSPACE,  Pack6BytesSwap},
+    { CHANNELS_SH(4)|BYTES_SH(1),                                  ANYSPACE,  Pack4Bytes},
+    { CHANNELS_SH(4)|BYTES_SH(1)|FLAVOR_SH(1),                     ANYSPACE,  Pack4BytesReverse},
+    { CHANNELS_SH(4)|BYTES_SH(1)|SWAPFIRST_SH(1),                  ANYSPACE,  Pack4BytesSwapFirst},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1),                     ANYSPACE,  Pack4BytesSwap},
+    { CHANNELS_SH(4)|BYTES_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),     ANYSPACE,  Pack4BytesSwapSwapFirst},
+
+    { BYTES_SH(1),                 ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackAnyBytes},
+    { BYTES_SH(1)|PLANAR_SH(1),    ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackPlanarBytes},
+
+    { CHANNELS_SH(1)|BYTES_SH(2),                                  ANYSPACE,  Pack1Word},
+    { CHANNELS_SH(1)|BYTES_SH(2)|EXTRA_SH(1),                      ANYSPACE,  Pack1WordSkip1},
+    { CHANNELS_SH(1)|BYTES_SH(2)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack1WordSkip1SwapFirst},
+    { CHANNELS_SH(1)|BYTES_SH(2)|FLAVOR_SH(1),                     ANYSPACE,  Pack1WordReversed},
+    { CHANNELS_SH(1)|BYTES_SH(2)|ENDIAN16_SH(1),                   ANYSPACE,  Pack1WordBigEndian},
+    { CHANNELS_SH(3)|BYTES_SH(2),                                  ANYSPACE,  Pack3Words},
+    { CHANNELS_SH(3)|BYTES_SH(2)|DOSWAP_SH(1),                     ANYSPACE,  Pack3WordsSwap},
+    { CHANNELS_SH(3)|BYTES_SH(2)|ENDIAN16_SH(1),                   ANYSPACE,  Pack3WordsBigEndian},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1),                      ANYSPACE,  Pack3WordsAndSkip1},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|DOSWAP_SH(1),         ANYSPACE,  Pack3WordsAndSkip1Swap},
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|SWAPFIRST_SH(1),      ANYSPACE,  Pack3WordsAndSkip1SwapFirst},
+
+    { CHANNELS_SH(3)|BYTES_SH(2)|EXTRA_SH(1)|DOSWAP_SH(1)|SWAPFIRST_SH(1),
+                                                                   ANYSPACE,  Pack3WordsAndSkip1SwapSwapFirst},
+
+    { CHANNELS_SH(4)|BYTES_SH(2),                                  ANYSPACE,  Pack4Words},
+    { CHANNELS_SH(4)|BYTES_SH(2)|FLAVOR_SH(1),                     ANYSPACE,  Pack4WordsReverse},
+    { CHANNELS_SH(4)|BYTES_SH(2)|DOSWAP_SH(1),                     ANYSPACE,  Pack4WordsSwap},
+    { CHANNELS_SH(4)|BYTES_SH(2)|ENDIAN16_SH(1),                   ANYSPACE,  Pack4WordsBigEndian},
+
+    { CHANNELS_SH(6)|BYTES_SH(2),                                  ANYSPACE,  Pack6Words},
+    { CHANNELS_SH(6)|BYTES_SH(2)|DOSWAP_SH(1),                     ANYSPACE,  Pack6WordsSwap},
+
+    { BYTES_SH(2)|PLANAR_SH(1),     ANYFLAVOR|ANYENDIAN|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackPlanarWords},
+    { BYTES_SH(2),                  ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYENDIAN|ANYEXTRA|ANYCHANNELS|ANYSPACE, PackAnyWords}
+
+};
+
+
+static const cmsFormattersFloat OutputFormattersFloat[] = {
+    //    Type                                          Mask                                 Function
+    //  ----------------------------   ---------------------------------------------------  ----------------------------
+    {     TYPE_Lab_FLT,                                                ANYPLANAR|ANYEXTRA,   PackLabFloatFromFloat},
+    {     TYPE_XYZ_FLT,                                                ANYPLANAR|ANYEXTRA,   PackXYZFloatFromFloat},
+
+    {     TYPE_Lab_DBL,                                                ANYPLANAR|ANYEXTRA,   PackLabDoubleFromFloat},
+    {     TYPE_XYZ_DBL,                                                ANYPLANAR|ANYEXTRA,   PackXYZDoubleFromFloat},
+
+    {     FLOAT_SH(1)|BYTES_SH(4), ANYPLANAR|
+                             ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE,   PackFloatsFromFloat },
+    {     FLOAT_SH(1)|BYTES_SH(0), ANYPLANAR|
+                             ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE,   PackDoublesFromFloat },
+#ifndef CMS_NO_HALF_SUPPORT 
+    {     FLOAT_SH(1)|BYTES_SH(2),                                   
+                             ANYFLAVOR|ANYSWAPFIRST|ANYSWAP|ANYEXTRA|ANYCHANNELS|ANYSPACE,   PackHalfFromFloat },
+#endif
+
+};
+
+
+// Bit fields set to one in the mask are not compared
+static
+cmsFormatter _cmsGetStockOutputFormatter(cmsUInt32Number dwInput, cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsFormatter fr;
+
+    // Optimization is only a hint
+    dwInput &= ~OPTIMIZED_SH(1);
+
+    switch (dwFlags)
+    {
+
+     case CMS_PACK_FLAGS_16BITS: {
+
+        for (i=0; i < sizeof(OutputFormatters16) / sizeof(cmsFormatters16); i++) {
+            const cmsFormatters16* f = OutputFormatters16 + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.Fmt16 = f ->Frm;
+                return fr;
+            }
+        }
+        }
+        break;
+
+    case CMS_PACK_FLAGS_FLOAT: {
+
+        for (i=0; i < sizeof(OutputFormattersFloat) / sizeof(cmsFormattersFloat); i++) {
+            const cmsFormattersFloat* f = OutputFormattersFloat + i;
+
+            if ((dwInput & ~f ->Mask) == f ->Type) {
+                fr.FmtFloat = f ->Frm;
+                return fr;
+            }
+        }
+        }
+        break;
+
+    default:;
+
+    }
+
+    fr.Fmt16 = NULL;
+    return fr;
+}
+
+
+typedef struct _cms_formatters_factory_list {
+
+    cmsFormatterFactory Factory;
+    struct _cms_formatters_factory_list *Next;
+
+} cmsFormattersFactoryList;
+
+_cmsFormattersPluginChunkType _cmsFormattersPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupFormatterFactoryList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsFormattersPluginChunkType newHead = { NULL };
+   cmsFormattersFactoryList*  entry;
+   cmsFormattersFactoryList*  Anterior = NULL;
+   _cmsFormattersPluginChunkType* head = (_cmsFormattersPluginChunkType*) src->chunks[FormattersPlugin];
+
+     _cmsAssert(head != NULL);
+
+   // Walk the list copying all nodes
+   for (entry = head->FactoryList;
+       entry != NULL;
+       entry = entry ->Next) {
+
+           cmsFormattersFactoryList *newEntry = ( cmsFormattersFactoryList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(cmsFormattersFactoryList));
+
+           if (newEntry == NULL) 
+               return;
+
+           // We want to keep the linked list order, so this is a little bit tricky
+           newEntry -> Next = NULL;
+           if (Anterior)
+               Anterior -> Next = newEntry;
+
+           Anterior = newEntry;
+
+           if (newHead.FactoryList == NULL)
+               newHead.FactoryList = newEntry;
+   }
+
+   ctx ->chunks[FormattersPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsFormattersPluginChunkType));
+}
+
+// The interpolation plug-in memory chunk allocator/dup
+void _cmsAllocFormattersPluginChunk(struct _cmsContext_struct* ctx, 
+                                    const struct _cmsContext_struct* src)
+{
+      _cmsAssert(ctx != NULL);
+
+     if (src != NULL) {
+        
+         // Duplicate the LIST
+         DupFormatterFactoryList(ctx, src);
+     }
+     else {
+          static _cmsFormattersPluginChunkType FormattersPluginChunk = { NULL };
+          ctx ->chunks[FormattersPlugin] = _cmsSubAllocDup(ctx ->MemPool, &FormattersPluginChunk, sizeof(_cmsFormattersPluginChunkType));
+     }
+}
+
+
+
+// Formatters management
+cmsBool  _cmsRegisterFormattersPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    _cmsFormattersPluginChunkType* ctx = ( _cmsFormattersPluginChunkType*) _cmsContextGetClientChunk(ContextID, FormattersPlugin);
+    cmsPluginFormatters* Plugin = (cmsPluginFormatters*) Data;
+    cmsFormattersFactoryList* fl ;
+
+    // Reset to built-in defaults
+    if (Data == NULL) {
+
+          ctx ->FactoryList = NULL;
+          return TRUE;
+    }
+
+    fl = (cmsFormattersFactoryList*) _cmsPluginMalloc(ContextID, sizeof(cmsFormattersFactoryList));
+    if (fl == NULL) return FALSE;
+
+    fl ->Factory    = Plugin ->FormattersFactory;
+
+    fl ->Next = ctx -> FactoryList;
+    ctx ->FactoryList = fl;
+
+    return TRUE;
+}
+
+cmsFormatter _cmsGetFormatter(cmsContext ContextID,
+                             cmsUInt32Number Type,         // Specific type, i.e. TYPE_RGB_8
+                             cmsFormatterDirection Dir,
+                             cmsUInt32Number dwFlags)
+{
+    _cmsFormattersPluginChunkType* ctx = ( _cmsFormattersPluginChunkType*) _cmsContextGetClientChunk(ContextID, FormattersPlugin);
+    cmsFormattersFactoryList* f;
+
+    for (f =ctx->FactoryList; f != NULL; f = f ->Next) {
+
+        cmsFormatter fn = f ->Factory(Type, Dir, dwFlags);
+        if (fn.Fmt16 != NULL) return fn;
+    }
+
+    // Revert to default
+    if (Dir == cmsFormatterInput)
+        return _cmsGetStockInputFormatter(Type, dwFlags);
+    else
+        return _cmsGetStockOutputFormatter(Type, dwFlags);
+}
+
+
+// Return whatever given formatter refers to float values
+cmsBool  _cmsFormatterIsFloat(cmsUInt32Number Type)
+{
+    return T_FLOAT(Type) ? TRUE : FALSE;
+}
+
+// Return whatever given formatter refers to 8 bits
+cmsBool  _cmsFormatterIs8bit(cmsUInt32Number Type)
+{
+    cmsUInt32Number Bytes = T_BYTES(Type);
+
+    return (Bytes == 1);
+}
+
+// Build a suitable formatter for the colorspace of this profile
+cmsUInt32Number CMSEXPORT cmsFormatterForColorspaceOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat)
+{
+
+    cmsColorSpaceSignature ColorSpace      = cmsGetColorSpace(hProfile);
+    cmsUInt32Number        ColorSpaceBits  = (cmsUInt32Number) _cmsLCMScolorSpace(ColorSpace);
+    cmsUInt32Number        nOutputChans    = cmsChannelsOf(ColorSpace);
+    cmsUInt32Number        Float           = lIsFloat ? 1U : 0;
+
+    // Create a fake formatter for result
+    return FLOAT_SH(Float) | COLORSPACE_SH(ColorSpaceBits) | BYTES_SH(nBytes) | CHANNELS_SH(nOutputChans);
+}
+
+// Build a suitable formatter for the colorspace of this profile
+cmsUInt32Number CMSEXPORT cmsFormatterForPCSOfProfile(cmsHPROFILE hProfile, cmsUInt32Number nBytes, cmsBool lIsFloat)
+{
+
+    cmsColorSpaceSignature ColorSpace = cmsGetPCS(hProfile);
+
+    cmsUInt32Number ColorSpaceBits = (cmsUInt32Number) _cmsLCMScolorSpace(ColorSpace);
+    cmsUInt32Number nOutputChans = cmsChannelsOf(ColorSpace);
+    cmsUInt32Number Float = lIsFloat ? 1U : 0;
+
+    // Create a fake formatter for result
+    return FLOAT_SH(Float) | COLORSPACE_SH(ColorSpaceBits) | BYTES_SH(nBytes) | CHANNELS_SH(nOutputChans);
+}
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmspcs.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmspcs.cpp
new file mode 100755
index 0000000000..ea70484d5b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmspcs.cpp
@@ -0,0 +1,940 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+//      inter PCS conversions XYZ <-> CIE L* a* b*
+/*
+
+
+       CIE 15:2004 CIELab is defined as:
+
+       L* = 116*f(Y/Yn) - 16                     0 <= L* <= 100
+       a* = 500*[f(X/Xn) - f(Y/Yn)]
+       b* = 200*[f(Y/Yn) - f(Z/Zn)]
+
+       and
+
+              f(t) = t^(1/3)                     1 >= t >  (24/116)^3
+                     (841/108)*t + (16/116)      0 <= t <= (24/116)^3
+
+
+       Reverse transform is:
+
+       X = Xn*[a* / 500 + (L* + 16) / 116] ^ 3   if (X/Xn) > (24/116)
+         = Xn*(a* / 500 + L* / 116) / 7.787      if (X/Xn) <= (24/116)
+
+
+
+       PCS in Lab2 is encoded as:
+
+              8 bit Lab PCS:
+
+                     L*      0..100 into a 0..ff byte.
+                     a*      t + 128 range is -128.0  +127.0
+                     b*
+
+             16 bit Lab PCS:
+
+                     L*     0..100  into a 0..ff00 word.
+                     a*     t + 128  range is  -128.0  +127.9961
+                     b*
+
+
+
+Interchange Space   Component     Actual Range        Encoded Range
+CIE XYZ             X             0 -> 1.99997        0x0000 -> 0xffff
+CIE XYZ             Y             0 -> 1.99997        0x0000 -> 0xffff
+CIE XYZ             Z             0 -> 1.99997        0x0000 -> 0xffff
+
+Version 2,3
+-----------
+
+CIELAB (16 bit)     L*            0 -> 100.0          0x0000 -> 0xff00
+CIELAB (16 bit)     a*            -128.0 -> +127.996  0x0000 -> 0x8000 -> 0xffff
+CIELAB (16 bit)     b*            -128.0 -> +127.996  0x0000 -> 0x8000 -> 0xffff
+
+
+Version 4
+---------
+
+CIELAB (16 bit)     L*            0 -> 100.0          0x0000 -> 0xffff
+CIELAB (16 bit)     a*            -128.0 -> +127      0x0000 -> 0x8080 -> 0xffff
+CIELAB (16 bit)     b*            -128.0 -> +127      0x0000 -> 0x8080 -> 0xffff
+
+*/
+
+// Conversions
+void CMSEXPORT cmsXYZ2xyY(cmsCIExyY* Dest, const cmsCIEXYZ* Source)
+{
+    cmsFloat64Number ISum;
+
+    ISum = 1./(Source -> X + Source -> Y + Source -> Z);
+
+    Dest -> x = (Source -> X) * ISum;
+    Dest -> y = (Source -> Y) * ISum;
+    Dest -> Y = Source -> Y;
+}
+
+void CMSEXPORT cmsxyY2XYZ(cmsCIEXYZ* Dest, const cmsCIExyY* Source)
+{
+    Dest -> X = (Source -> x / Source -> y) * Source -> Y;
+    Dest -> Y = Source -> Y;
+    Dest -> Z = ((1 - Source -> x - Source -> y) / Source -> y) * Source -> Y;
+}
+
+/*
+       The break point (24/116)^3 = (6/29)^3 is a very small amount of tristimulus 
+       primary (0.008856).  Generally, this only happens for 
+       nearly ideal blacks and for some orange / amber colors in transmission mode.  
+       For example, the Z value of the orange turn indicator lamp lens on an 
+       automobile will often be below this value.  But the Z does not 
+       contribute to the perceived color directly.
+*/
+
+static
+cmsFloat64Number f(cmsFloat64Number t)
+{
+    const cmsFloat64Number Limit = (24.0/116.0) * (24.0/116.0) * (24.0/116.0);
+
+    if (t <= Limit)
+        return (841.0/108.0) * t + (16.0/116.0);
+    else
+        return pow(t, 1.0/3.0);
+}
+
+static
+cmsFloat64Number f_1(cmsFloat64Number t)
+{
+    const cmsFloat64Number Limit = (24.0/116.0);
+
+    if (t <= Limit) {
+        return (108.0/841.0) * (t - (16.0/116.0));
+    }
+
+    return t * t * t;
+}
+
+
+// Standard XYZ to Lab. it can handle negative XZY numbers in some cases
+void CMSEXPORT cmsXYZ2Lab(const cmsCIEXYZ* WhitePoint, cmsCIELab* Lab, const cmsCIEXYZ* xyz)
+{
+    cmsFloat64Number fx, fy, fz;
+
+    if (WhitePoint == NULL)
+        WhitePoint = cmsD50_XYZ();
+
+    fx = f(xyz->X / WhitePoint->X);
+    fy = f(xyz->Y / WhitePoint->Y);
+    fz = f(xyz->Z / WhitePoint->Z);
+
+    Lab->L = 116.0*fy - 16.0;
+    Lab->a = 500.0*(fx - fy);
+    Lab->b = 200.0*(fy - fz);
+}
+
+
+// Standard XYZ to Lab. It can return negative XYZ in some cases
+void CMSEXPORT cmsLab2XYZ(const cmsCIEXYZ* WhitePoint, cmsCIEXYZ* xyz,  const cmsCIELab* Lab)
+{
+    cmsFloat64Number x, y, z;
+
+    if (WhitePoint == NULL)
+        WhitePoint = cmsD50_XYZ();
+
+    y = (Lab-> L + 16.0) / 116.0;
+    x = y + 0.002 * Lab -> a;
+    z = y - 0.005 * Lab -> b;
+
+    xyz -> X = f_1(x) * WhitePoint -> X;
+    xyz -> Y = f_1(y) * WhitePoint -> Y;
+    xyz -> Z = f_1(z) * WhitePoint -> Z;
+
+}
+
+static
+cmsFloat64Number L2float2(cmsUInt16Number v)
+{
+    return (cmsFloat64Number) v / 652.800;
+}
+
+// the a/b part
+static
+cmsFloat64Number ab2float2(cmsUInt16Number v)
+{
+    return ((cmsFloat64Number) v / 256.0) - 128.0;
+}
+
+static
+cmsUInt16Number L2Fix2(cmsFloat64Number L)
+{
+    return _cmsQuickSaturateWord(L *  652.8);
+}
+
+static
+cmsUInt16Number ab2Fix2(cmsFloat64Number ab)
+{
+    return _cmsQuickSaturateWord((ab + 128.0) * 256.0);
+}
+
+
+static
+cmsFloat64Number L2float4(cmsUInt16Number v)
+{
+    return (cmsFloat64Number) v / 655.35;
+}
+
+// the a/b part
+static
+cmsFloat64Number ab2float4(cmsUInt16Number v)
+{
+    return ((cmsFloat64Number) v / 257.0) - 128.0;
+}
+
+
+void CMSEXPORT cmsLabEncoded2FloatV2(cmsCIELab* Lab, const cmsUInt16Number wLab[3])
+{
+        Lab->L = L2float2(wLab[0]);
+        Lab->a = ab2float2(wLab[1]);
+        Lab->b = ab2float2(wLab[2]);
+}
+
+
+void CMSEXPORT cmsLabEncoded2Float(cmsCIELab* Lab, const cmsUInt16Number wLab[3])
+{
+        Lab->L = L2float4(wLab[0]);
+        Lab->a = ab2float4(wLab[1]);
+        Lab->b = ab2float4(wLab[2]);
+}
+
+static
+cmsFloat64Number Clamp_L_doubleV2(cmsFloat64Number L)
+{
+    const cmsFloat64Number L_max = (cmsFloat64Number) (0xFFFF * 100.0) / 0xFF00;
+
+    if (L < 0) L = 0;
+    if (L > L_max) L = L_max;
+
+    return L;
+}
+
+
+static
+cmsFloat64Number Clamp_ab_doubleV2(cmsFloat64Number ab)
+{
+    if (ab < MIN_ENCODEABLE_ab2) ab = MIN_ENCODEABLE_ab2;
+    if (ab > MAX_ENCODEABLE_ab2) ab = MAX_ENCODEABLE_ab2;
+
+    return ab;
+}
+
+void CMSEXPORT cmsFloat2LabEncodedV2(cmsUInt16Number wLab[3], const cmsCIELab* fLab)
+{
+    cmsCIELab Lab;
+
+    Lab.L = Clamp_L_doubleV2(fLab ->L);
+    Lab.a = Clamp_ab_doubleV2(fLab ->a);
+    Lab.b = Clamp_ab_doubleV2(fLab ->b);
+
+    wLab[0] = L2Fix2(Lab.L);
+    wLab[1] = ab2Fix2(Lab.a);
+    wLab[2] = ab2Fix2(Lab.b);
+}
+
+
+static
+cmsFloat64Number Clamp_L_doubleV4(cmsFloat64Number L)
+{
+    if (L < 0) L = 0;
+    if (L > 100.0) L = 100.0;
+
+    return L;
+}
+
+static
+cmsFloat64Number Clamp_ab_doubleV4(cmsFloat64Number ab)
+{
+    if (ab < MIN_ENCODEABLE_ab4) ab = MIN_ENCODEABLE_ab4;
+    if (ab > MAX_ENCODEABLE_ab4) ab = MAX_ENCODEABLE_ab4;
+
+    return ab;
+}
+
+static
+cmsUInt16Number L2Fix4(cmsFloat64Number L)
+{
+    return _cmsQuickSaturateWord(L *  655.35);
+}
+
+static
+cmsUInt16Number ab2Fix4(cmsFloat64Number ab)
+{
+    return _cmsQuickSaturateWord((ab + 128.0) * 257.0);
+}
+
+void CMSEXPORT cmsFloat2LabEncoded(cmsUInt16Number wLab[3], const cmsCIELab* fLab)
+{
+    cmsCIELab Lab;
+
+    Lab.L = Clamp_L_doubleV4(fLab ->L);
+    Lab.a = Clamp_ab_doubleV4(fLab ->a);
+    Lab.b = Clamp_ab_doubleV4(fLab ->b);
+
+    wLab[0] = L2Fix4(Lab.L);
+    wLab[1] = ab2Fix4(Lab.a);
+    wLab[2] = ab2Fix4(Lab.b);
+}
+
+// Auxiliary: convert to Radians
+static
+cmsFloat64Number RADIANS(cmsFloat64Number deg)
+{
+    return (deg * M_PI) / 180.;
+}
+
+
+// Auxiliary: atan2 but operating in degrees and returning 0 if a==b==0
+static
+cmsFloat64Number atan2deg(cmsFloat64Number a, cmsFloat64Number b)
+{
+   cmsFloat64Number h;
+
+   if (a == 0 && b == 0)
+            h   = 0;
+    else
+            h = atan2(a, b);
+
+    h *= (180. / M_PI);
+
+    while (h > 360.)
+        h -= 360.;
+
+    while ( h < 0)
+        h += 360.;
+
+    return h;
+}
+
+
+// Auxiliary: Square
+static
+cmsFloat64Number Sqr(cmsFloat64Number v)
+{
+    return v *  v;
+}
+// From cylindrical coordinates. No check is performed, then negative values are allowed
+void CMSEXPORT cmsLab2LCh(cmsCIELCh* LCh, const cmsCIELab* Lab)
+{
+    LCh -> L = Lab -> L;
+    LCh -> C = pow(Sqr(Lab ->a) + Sqr(Lab ->b), 0.5);
+    LCh -> h = atan2deg(Lab ->b, Lab ->a);
+}
+
+
+// To cylindrical coordinates. No check is performed, then negative values are allowed
+void CMSEXPORT cmsLCh2Lab(cmsCIELab* Lab, const cmsCIELCh* LCh)
+{
+    cmsFloat64Number h = (LCh -> h * M_PI) / 180.0;
+
+    Lab -> L = LCh -> L;
+    Lab -> a = LCh -> C * cos(h);
+    Lab -> b = LCh -> C * sin(h);
+}
+
+// In XYZ All 3 components are encoded using 1.15 fixed point
+static
+cmsUInt16Number XYZ2Fix(cmsFloat64Number d)
+{
+    return _cmsQuickSaturateWord(d * 32768.0);
+}
+
+void CMSEXPORT cmsFloat2XYZEncoded(cmsUInt16Number XYZ[3], const cmsCIEXYZ* fXYZ)
+{
+    cmsCIEXYZ xyz;
+
+    xyz.X = fXYZ -> X;
+    xyz.Y = fXYZ -> Y;
+    xyz.Z = fXYZ -> Z;
+
+    // Clamp to encodeable values.
+    if (xyz.Y <= 0) {
+
+        xyz.X = 0;
+        xyz.Y = 0;
+        xyz.Z = 0;
+    }
+
+    if (xyz.X > MAX_ENCODEABLE_XYZ)
+        xyz.X = MAX_ENCODEABLE_XYZ;
+
+    if (xyz.X < 0)
+        xyz.X = 0;
+
+    if (xyz.Y > MAX_ENCODEABLE_XYZ)
+        xyz.Y = MAX_ENCODEABLE_XYZ;
+
+    if (xyz.Y < 0)
+        xyz.Y = 0;
+
+    if (xyz.Z > MAX_ENCODEABLE_XYZ)
+        xyz.Z = MAX_ENCODEABLE_XYZ;
+
+    if (xyz.Z < 0)
+        xyz.Z = 0;
+
+
+    XYZ[0] = XYZ2Fix(xyz.X);
+    XYZ[1] = XYZ2Fix(xyz.Y);
+    XYZ[2] = XYZ2Fix(xyz.Z);
+}
+
+
+//  To convert from Fixed 1.15 point to cmsFloat64Number
+static
+cmsFloat64Number XYZ2float(cmsUInt16Number v)
+{
+    cmsS15Fixed16Number fix32;
+
+    // From 1.15 to 15.16
+    fix32 = v << 1;
+
+    // From fixed 15.16 to cmsFloat64Number
+    return _cms15Fixed16toDouble(fix32);
+}
+
+
+void CMSEXPORT cmsXYZEncoded2Float(cmsCIEXYZ* fXYZ, const cmsUInt16Number XYZ[3])
+{
+    fXYZ -> X = XYZ2float(XYZ[0]);
+    fXYZ -> Y = XYZ2float(XYZ[1]);
+    fXYZ -> Z = XYZ2float(XYZ[2]);
+}
+
+
+// Returns dE on two Lab values
+cmsFloat64Number CMSEXPORT cmsDeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2)
+{
+    cmsFloat64Number dL, da, db;
+
+    dL = fabs(Lab1 -> L - Lab2 -> L);
+    da = fabs(Lab1 -> a - Lab2 -> a);
+    db = fabs(Lab1 -> b - Lab2 -> b);
+
+    return pow(Sqr(dL) + Sqr(da) + Sqr(db), 0.5);
+}
+
+
+// Return the CIE94 Delta E
+cmsFloat64Number CMSEXPORT cmsCIE94DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2)
+{
+    cmsCIELCh LCh1, LCh2;
+    cmsFloat64Number dE, dL, dC, dh, dhsq;
+    cmsFloat64Number c12, sc, sh;
+
+    dL = fabs(Lab1 ->L - Lab2 ->L);
+
+    cmsLab2LCh(&LCh1, Lab1);
+    cmsLab2LCh(&LCh2, Lab2);
+
+    dC  = fabs(LCh1.C - LCh2.C);
+    dE  = cmsDeltaE(Lab1, Lab2);
+
+    dhsq = Sqr(dE) - Sqr(dL) - Sqr(dC);
+    if (dhsq < 0)
+        dh = 0;
+    else
+        dh = pow(dhsq, 0.5);
+
+    c12 = sqrt(LCh1.C * LCh2.C);
+
+    sc = 1.0 + (0.048 * c12);
+    sh = 1.0 + (0.014 * c12);
+
+    return sqrt(Sqr(dL)  + Sqr(dC) / Sqr(sc) + Sqr(dh) / Sqr(sh));
+}
+
+
+// Auxiliary
+static
+cmsFloat64Number ComputeLBFD(const cmsCIELab* Lab)
+{
+  cmsFloat64Number yt;
+
+  if (Lab->L > 7.996969)
+        yt = (Sqr((Lab->L+16)/116)*((Lab->L+16)/116))*100;
+  else
+        yt = 100 * (Lab->L / 903.3);
+
+  return (54.6 * (M_LOG10E * (log(yt + 1.5))) - 9.6);
+}
+
+
+
+// bfd - gets BFD(1:1) difference between Lab1, Lab2
+cmsFloat64Number CMSEXPORT cmsBFDdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2)
+{
+    cmsFloat64Number lbfd1,lbfd2,AveC,Aveh,dE,deltaL,
+        deltaC,deltah,dc,t,g,dh,rh,rc,rt,bfd;
+    cmsCIELCh LCh1, LCh2;
+
+
+    lbfd1 = ComputeLBFD(Lab1);
+    lbfd2 = ComputeLBFD(Lab2);
+    deltaL = lbfd2 - lbfd1;
+
+    cmsLab2LCh(&LCh1, Lab1);
+    cmsLab2LCh(&LCh2, Lab2);
+
+    deltaC = LCh2.C - LCh1.C;
+    AveC = (LCh1.C+LCh2.C)/2;
+    Aveh = (LCh1.h+LCh2.h)/2;
+
+    dE = cmsDeltaE(Lab1, Lab2);
+
+    if (Sqr(dE)>(Sqr(Lab2->L-Lab1->L)+Sqr(deltaC)))
+        deltah = sqrt(Sqr(dE)-Sqr(Lab2->L-Lab1->L)-Sqr(deltaC));
+    else
+        deltah =0;
+
+
+    dc   = 0.035 * AveC / (1 + 0.00365 * AveC)+0.521;
+    g    = sqrt(Sqr(Sqr(AveC))/(Sqr(Sqr(AveC))+14000));
+    t    = 0.627+(0.055*cos((Aveh-254)/(180/M_PI))-
+           0.040*cos((2*Aveh-136)/(180/M_PI))+
+           0.070*cos((3*Aveh-31)/(180/M_PI))+
+           0.049*cos((4*Aveh+114)/(180/M_PI))-
+           0.015*cos((5*Aveh-103)/(180/M_PI)));
+
+    dh    = dc*(g*t+1-g);
+    rh    = -0.260*cos((Aveh-308)/(180/M_PI))-
+           0.379*cos((2*Aveh-160)/(180/M_PI))-
+           0.636*cos((3*Aveh+254)/(180/M_PI))+
+           0.226*cos((4*Aveh+140)/(180/M_PI))-
+           0.194*cos((5*Aveh+280)/(180/M_PI));
+
+    rc = sqrt((AveC*AveC*AveC*AveC*AveC*AveC)/((AveC*AveC*AveC*AveC*AveC*AveC)+70000000));
+    rt = rh*rc;
+
+    bfd = sqrt(Sqr(deltaL)+Sqr(deltaC/dc)+Sqr(deltah/dh)+(rt*(deltaC/dc)*(deltah/dh)));
+
+    return bfd;
+}
+
+
+//  cmc - CMC(l:c) difference between Lab1, Lab2
+cmsFloat64Number CMSEXPORT cmsCMCdeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2, cmsFloat64Number l, cmsFloat64Number c)
+{
+  cmsFloat64Number dE,dL,dC,dh,sl,sc,sh,t,f,cmc;
+  cmsCIELCh LCh1, LCh2;
+
+  if (Lab1 ->L == 0 && Lab2 ->L == 0) return 0;
+
+  cmsLab2LCh(&LCh1, Lab1);
+  cmsLab2LCh(&LCh2, Lab2);
+
+
+  dL = Lab2->L-Lab1->L;
+  dC = LCh2.C-LCh1.C;
+
+  dE = cmsDeltaE(Lab1, Lab2);
+
+  if (Sqr(dE)>(Sqr(dL)+Sqr(dC)))
+            dh = sqrt(Sqr(dE)-Sqr(dL)-Sqr(dC));
+  else
+            dh =0;
+
+  if ((LCh1.h > 164) && (LCh1.h < 345))
+      t = 0.56 + fabs(0.2 * cos(((LCh1.h + 168)/(180/M_PI))));
+  else
+      t = 0.36 + fabs(0.4 * cos(((LCh1.h + 35 )/(180/M_PI))));
+
+   sc  = 0.0638   * LCh1.C / (1 + 0.0131  * LCh1.C) + 0.638;
+   sl  = 0.040975 * Lab1->L /(1 + 0.01765 * Lab1->L);
+
+   if (Lab1->L<16)
+         sl = 0.511;
+
+   f   = sqrt((LCh1.C * LCh1.C * LCh1.C * LCh1.C)/((LCh1.C * LCh1.C * LCh1.C * LCh1.C)+1900));
+   sh  = sc*(t*f+1-f);
+   cmc = sqrt(Sqr(dL/(l*sl))+Sqr(dC/(c*sc))+Sqr(dh/sh));
+
+   return cmc;
+}
+
+// dE2000 The weightings KL, KC and KH can be modified to reflect the relative
+// importance of lightness, chroma and hue in different industrial applications
+cmsFloat64Number CMSEXPORT cmsCIE2000DeltaE(const cmsCIELab* Lab1, const cmsCIELab* Lab2,
+                                  cmsFloat64Number Kl, cmsFloat64Number Kc, cmsFloat64Number Kh)
+{
+    cmsFloat64Number L1  = Lab1->L;
+    cmsFloat64Number a1  = Lab1->a;
+    cmsFloat64Number b1  = Lab1->b;
+    cmsFloat64Number C   = sqrt( Sqr(a1) + Sqr(b1) );
+
+    cmsFloat64Number Ls = Lab2 ->L;
+    cmsFloat64Number as = Lab2 ->a;
+    cmsFloat64Number bs = Lab2 ->b;
+    cmsFloat64Number Cs = sqrt( Sqr(as) + Sqr(bs) );
+
+    cmsFloat64Number G = 0.5 * ( 1 - sqrt(pow((C + Cs) / 2 , 7.0) / (pow((C + Cs) / 2, 7.0) + pow(25.0, 7.0) ) ));
+
+    cmsFloat64Number a_p = (1 + G ) * a1;
+    cmsFloat64Number b_p = b1;
+    cmsFloat64Number C_p = sqrt( Sqr(a_p) + Sqr(b_p));
+    cmsFloat64Number h_p = atan2deg(b_p, a_p);
+
+
+    cmsFloat64Number a_ps = (1 + G) * as;
+    cmsFloat64Number b_ps = bs;
+    cmsFloat64Number C_ps = sqrt(Sqr(a_ps) + Sqr(b_ps));
+    cmsFloat64Number h_ps = atan2deg(b_ps, a_ps);
+
+    cmsFloat64Number meanC_p =(C_p + C_ps) / 2;
+
+    cmsFloat64Number hps_plus_hp  = h_ps + h_p;
+    cmsFloat64Number hps_minus_hp = h_ps - h_p;
+
+    cmsFloat64Number meanh_p = fabs(hps_minus_hp) <= 180.000001 ? (hps_plus_hp)/2 :
+                            (hps_plus_hp) < 360 ? (hps_plus_hp + 360)/2 :
+                                                 (hps_plus_hp - 360)/2;
+
+    cmsFloat64Number delta_h = (hps_minus_hp) <= -180.000001 ?  (hps_minus_hp + 360) :
+                            (hps_minus_hp) > 180 ? (hps_minus_hp - 360) :
+                                                    (hps_minus_hp);
+    cmsFloat64Number delta_L = (Ls - L1);
+    cmsFloat64Number delta_C = (C_ps - C_p );
+
+
+    cmsFloat64Number delta_H =2 * sqrt(C_ps*C_p) * sin(RADIANS(delta_h) / 2);
+
+    cmsFloat64Number T = 1 - 0.17 * cos(RADIANS(meanh_p-30))
+                 + 0.24 * cos(RADIANS(2*meanh_p))
+                 + 0.32 * cos(RADIANS(3*meanh_p + 6))
+                 - 0.2  * cos(RADIANS(4*meanh_p - 63));
+
+    cmsFloat64Number Sl = 1 + (0.015 * Sqr((Ls + L1) /2- 50) )/ sqrt(20 + Sqr( (Ls+L1)/2 - 50) );
+
+    cmsFloat64Number Sc = 1 + 0.045 * (C_p + C_ps)/2;
+    cmsFloat64Number Sh = 1 + 0.015 * ((C_ps + C_p)/2) * T;
+
+    cmsFloat64Number delta_ro = 30 * exp( -Sqr(((meanh_p - 275 ) / 25)));
+
+    cmsFloat64Number Rc = 2 * sqrt(( pow(meanC_p, 7.0) )/( pow(meanC_p, 7.0) + pow(25.0, 7.0)));
+
+    cmsFloat64Number Rt = -sin(2 * RADIANS(delta_ro)) * Rc;
+
+    cmsFloat64Number deltaE00 = sqrt( Sqr(delta_L /(Sl * Kl)) +
+                            Sqr(delta_C/(Sc * Kc))  +
+                            Sqr(delta_H/(Sh * Kh))  +
+                            Rt*(delta_C/(Sc * Kc)) * (delta_H / (Sh * Kh)));
+
+    return deltaE00;
+}
+
+// This function returns a number of gridpoints to be used as LUT table. It assumes same number
+// of gripdpoints in all dimensions. Flags may override the choice.
+cmsUInt32Number _cmsReasonableGridpointsByColorspace(cmsColorSpaceSignature Colorspace, cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number nChannels;
+
+    // Already specified?
+    if (dwFlags & 0x00FF0000) {
+            // Yes, grab'em
+            return (dwFlags >> 16) & 0xFF;
+    }
+
+    nChannels = cmsChannelsOf(Colorspace);
+
+    // HighResPrecalc is maximum resolution
+    if (dwFlags & cmsFLAGS_HIGHRESPRECALC) {
+
+        if (nChannels > 4)
+                return 7;       // 7 for Hifi
+
+        if (nChannels == 4)     // 23 for CMYK
+                return 23;
+
+        return 49;      // 49 for RGB and others
+    }
+
+
+    // LowResPrecal is lower resolution
+    if (dwFlags & cmsFLAGS_LOWRESPRECALC) {
+
+        if (nChannels > 4)
+                return 6;       // 6 for more than 4 channels
+
+        if (nChannels == 1)
+                return 33;      // For monochrome
+
+        return 17;              // 17 for remaining
+    }
+
+    // Default values
+    if (nChannels > 4)
+                return 7;       // 7 for Hifi
+
+    if (nChannels == 4)
+                return 17;      // 17 for CMYK
+
+    return 33;                  // 33 for RGB
+}
+
+
+cmsBool  _cmsEndPointsBySpace(cmsColorSpaceSignature Space,
+                             cmsUInt16Number **White,
+                             cmsUInt16Number **Black,
+                             cmsUInt32Number *nOutputs)
+{
+       // Only most common spaces
+
+       static cmsUInt16Number RGBblack[4]  = { 0, 0, 0 };
+       static cmsUInt16Number RGBwhite[4]  = { 0xffff, 0xffff, 0xffff };
+       static cmsUInt16Number CMYKblack[4] = { 0xffff, 0xffff, 0xffff, 0xffff };   // 400% of ink
+       static cmsUInt16Number CMYKwhite[4] = { 0, 0, 0, 0 };
+       static cmsUInt16Number LABblack[4]  = { 0, 0x8080, 0x8080 };               // V4 Lab encoding
+       static cmsUInt16Number LABwhite[4]  = { 0xFFFF, 0x8080, 0x8080 };
+       static cmsUInt16Number CMYblack[4]  = { 0xffff, 0xffff, 0xffff };
+       static cmsUInt16Number CMYwhite[4]  = { 0, 0, 0 };
+       static cmsUInt16Number Grayblack[4] = { 0 };
+       static cmsUInt16Number GrayWhite[4] = { 0xffff };
+
+       switch (Space) {
+
+       case cmsSigGrayData: if (White)    *White = GrayWhite;
+                           if (Black)    *Black = Grayblack;
+                           if (nOutputs) *nOutputs = 1;
+                           return TRUE;
+
+       case cmsSigRgbData:  if (White)    *White = RGBwhite;
+                           if (Black)    *Black = RGBblack;
+                           if (nOutputs) *nOutputs = 3;
+                           return TRUE;
+
+       case cmsSigLabData:  if (White)    *White = LABwhite;
+                           if (Black)    *Black = LABblack;
+                           if (nOutputs) *nOutputs = 3;
+                           return TRUE;
+
+       case cmsSigCmykData: if (White)    *White = CMYKwhite;
+                           if (Black)    *Black = CMYKblack;
+                           if (nOutputs) *nOutputs = 4;
+                           return TRUE;
+
+       case cmsSigCmyData:  if (White)    *White = CMYwhite;
+                           if (Black)    *Black = CMYblack;
+                           if (nOutputs) *nOutputs = 3;
+                           return TRUE;
+
+       default:;
+       }
+
+  return FALSE;
+}
+
+
+
+// Several utilities -------------------------------------------------------
+
+// Translate from our colorspace to ICC representation
+
+cmsColorSpaceSignature CMSEXPORT _cmsICCcolorSpace(int OurNotation)
+{
+       switch (OurNotation) {
+
+       case 1:
+       case PT_GRAY: return cmsSigGrayData;
+
+       case 2:
+       case PT_RGB:  return cmsSigRgbData;
+
+       case PT_CMY:  return cmsSigCmyData;
+       case PT_CMYK: return cmsSigCmykData;
+       case PT_YCbCr:return cmsSigYCbCrData;
+       case PT_YUV:  return cmsSigLuvData;
+       case PT_XYZ:  return cmsSigXYZData;
+
+       case PT_LabV2:
+       case PT_Lab:  return cmsSigLabData;
+
+       case PT_YUVK: return cmsSigLuvKData;
+       case PT_HSV:  return cmsSigHsvData;
+       case PT_HLS:  return cmsSigHlsData;
+       case PT_Yxy:  return cmsSigYxyData;
+
+       case PT_MCH1: return cmsSigMCH1Data;
+       case PT_MCH2: return cmsSigMCH2Data;
+       case PT_MCH3: return cmsSigMCH3Data;
+       case PT_MCH4: return cmsSigMCH4Data;
+       case PT_MCH5: return cmsSigMCH5Data;
+       case PT_MCH6: return cmsSigMCH6Data;
+       case PT_MCH7: return cmsSigMCH7Data;
+       case PT_MCH8: return cmsSigMCH8Data;
+
+       case PT_MCH9:  return cmsSigMCH9Data;
+       case PT_MCH10: return cmsSigMCHAData;
+       case PT_MCH11: return cmsSigMCHBData;
+       case PT_MCH12: return cmsSigMCHCData;
+       case PT_MCH13: return cmsSigMCHDData;
+       case PT_MCH14: return cmsSigMCHEData;
+       case PT_MCH15: return cmsSigMCHFData;
+
+       default:  return (cmsColorSpaceSignature) 0;
+       }
+}
+
+
+int CMSEXPORT _cmsLCMScolorSpace(cmsColorSpaceSignature ProfileSpace)
+{
+    switch (ProfileSpace) {
+
+    case cmsSigGrayData: return  PT_GRAY;
+    case cmsSigRgbData:  return  PT_RGB;
+    case cmsSigCmyData:  return  PT_CMY;
+    case cmsSigCmykData: return  PT_CMYK;
+    case cmsSigYCbCrData:return  PT_YCbCr;
+    case cmsSigLuvData:  return  PT_YUV;
+    case cmsSigXYZData:  return  PT_XYZ;
+    case cmsSigLabData:  return  PT_Lab;
+    case cmsSigLuvKData: return  PT_YUVK;
+    case cmsSigHsvData:  return  PT_HSV;
+    case cmsSigHlsData:  return  PT_HLS;
+    case cmsSigYxyData:  return  PT_Yxy;
+
+    case cmsSig1colorData:
+    case cmsSigMCH1Data: return PT_MCH1;
+
+    case cmsSig2colorData:
+    case cmsSigMCH2Data: return PT_MCH2;
+
+    case cmsSig3colorData:
+    case cmsSigMCH3Data: return PT_MCH3;
+
+    case cmsSig4colorData:
+    case cmsSigMCH4Data: return PT_MCH4;
+
+    case cmsSig5colorData:
+    case cmsSigMCH5Data: return PT_MCH5;
+
+    case cmsSig6colorData:
+    case cmsSigMCH6Data: return PT_MCH6;
+
+    case cmsSigMCH7Data:
+    case cmsSig7colorData:return PT_MCH7;
+
+    case cmsSigMCH8Data:
+    case cmsSig8colorData:return PT_MCH8;
+
+    case cmsSigMCH9Data:
+    case cmsSig9colorData:return PT_MCH9;
+
+    case cmsSigMCHAData:
+    case cmsSig10colorData:return PT_MCH10;
+
+    case cmsSigMCHBData:
+    case cmsSig11colorData:return PT_MCH11;
+
+    case cmsSigMCHCData:
+    case cmsSig12colorData:return PT_MCH12;
+
+    case cmsSigMCHDData:
+    case cmsSig13colorData:return PT_MCH13;
+
+    case cmsSigMCHEData:
+    case cmsSig14colorData:return PT_MCH14;
+
+    case cmsSigMCHFData:
+    case cmsSig15colorData:return PT_MCH15;
+
+    default:  return (cmsColorSpaceSignature) 0;
+    }
+}
+
+
+cmsUInt32Number CMSEXPORT cmsChannelsOf(cmsColorSpaceSignature ColorSpace)
+{
+    switch (ColorSpace) {
+
+    case cmsSigMCH1Data:
+    case cmsSig1colorData:
+    case cmsSigGrayData: return 1;
+
+    case cmsSigMCH2Data:
+    case cmsSig2colorData:  return 2;
+
+    case cmsSigXYZData:
+    case cmsSigLabData:
+    case cmsSigLuvData:
+    case cmsSigYCbCrData:
+    case cmsSigYxyData:
+    case cmsSigRgbData:
+    case cmsSigHsvData:
+    case cmsSigHlsData:
+    case cmsSigCmyData:
+    case cmsSigMCH3Data:
+    case cmsSig3colorData:  return 3;
+
+    case cmsSigLuvKData:
+    case cmsSigCmykData:
+    case cmsSigMCH4Data:
+    case cmsSig4colorData:  return 4;
+
+    case cmsSigMCH5Data:
+    case cmsSig5colorData:  return 5;
+
+    case cmsSigMCH6Data:
+    case cmsSig6colorData:  return 6;
+
+    case cmsSigMCH7Data:
+    case cmsSig7colorData:  return  7;
+
+    case cmsSigMCH8Data:
+    case cmsSig8colorData:  return  8;
+
+    case cmsSigMCH9Data:
+    case cmsSig9colorData:  return  9;
+
+    case cmsSigMCHAData:
+    case cmsSig10colorData: return 10;
+
+    case cmsSigMCHBData:
+    case cmsSig11colorData: return 11;
+
+    case cmsSigMCHCData:
+    case cmsSig12colorData: return 12;
+
+    case cmsSigMCHDData:
+    case cmsSig13colorData: return 13;
+
+    case cmsSigMCHEData:
+    case cmsSig14colorData: return 14;
+
+    case cmsSigMCHFData:
+    case cmsSig15colorData: return 15;
+
+    default: return 3;
+    }
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsplugin.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsplugin.cpp
new file mode 100755
index 0000000000..d54b4d19e4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsplugin.cpp
@@ -0,0 +1,992 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// ----------------------------------------------------------------------------------
+// Encoding & Decoding support functions
+// ----------------------------------------------------------------------------------
+
+//      Little-Endian to Big-Endian
+
+// Adjust a word value after being readed/ before being written from/to an ICC profile
+cmsUInt16Number CMSEXPORT  _cmsAdjustEndianess16(cmsUInt16Number Word)
+{
+#ifndef CMS_USE_BIG_ENDIAN
+
+    cmsUInt8Number* pByte = (cmsUInt8Number*) &Word;
+    cmsUInt8Number tmp;
+
+    tmp = pByte[0];
+    pByte[0] = pByte[1];
+    pByte[1] = tmp;
+#endif
+
+    return Word;
+}
+
+
+// Transports to properly encoded values - note that icc profiles does use big endian notation.
+
+// 1 2 3 4
+// 4 3 2 1
+
+cmsUInt32Number CMSEXPORT  _cmsAdjustEndianess32(cmsUInt32Number DWord)
+{
+#ifndef CMS_USE_BIG_ENDIAN
+
+    cmsUInt8Number* pByte = (cmsUInt8Number*) &DWord;
+    cmsUInt8Number temp1;
+    cmsUInt8Number temp2;
+
+    temp1 = *pByte++;
+    temp2 = *pByte++;
+    *(pByte-1) = *pByte;
+    *pByte++ = temp2;
+    *(pByte-3) = *pByte;
+    *pByte = temp1;
+#endif
+    return DWord;
+}
+
+// 1 2 3 4 5 6 7 8
+// 8 7 6 5 4 3 2 1
+
+void CMSEXPORT  _cmsAdjustEndianess64(cmsUInt64Number* Result, cmsUInt64Number* QWord)
+{
+
+#ifndef CMS_USE_BIG_ENDIAN
+
+    cmsUInt8Number* pIn  = (cmsUInt8Number*) QWord;
+    cmsUInt8Number* pOut = (cmsUInt8Number*) Result;
+
+    _cmsAssert(Result != NULL);
+
+    pOut[7] = pIn[0];
+    pOut[6] = pIn[1];
+    pOut[5] = pIn[2];
+    pOut[4] = pIn[3];
+    pOut[3] = pIn[4];
+    pOut[2] = pIn[5];
+    pOut[1] = pIn[6];
+    pOut[0] = pIn[7];
+
+#else
+    _cmsAssert(Result != NULL);
+
+#  ifdef CMS_DONT_USE_INT64
+    (*Result)[0] = QWord[0];
+    (*Result)[1] = QWord[1];
+#  else
+    *Result = *QWord;
+#  endif
+#endif
+}
+
+// Auxiliary -- read 8, 16 and 32-bit numbers
+cmsBool CMSEXPORT  _cmsReadUInt8Number(cmsIOHANDLER* io, cmsUInt8Number* n)
+{
+    cmsUInt8Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt8Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) *n = tmp;
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadUInt16Number(cmsIOHANDLER* io, cmsUInt16Number* n)
+{
+    cmsUInt16Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt16Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) *n = _cmsAdjustEndianess16(tmp);
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, cmsUInt16Number* Array)
+{
+    cmsUInt32Number i;
+
+    _cmsAssert(io != NULL);
+
+    for (i=0; i < n; i++) {
+
+        if (Array != NULL) {
+            if (!_cmsReadUInt16Number(io, Array + i)) return FALSE;
+        }
+        else {
+            if (!_cmsReadUInt16Number(io, NULL)) return FALSE;
+        }
+
+    }
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadUInt32Number(cmsIOHANDLER* io, cmsUInt32Number* n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt32Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) *n = _cmsAdjustEndianess32(tmp);
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsReadFloat32Number(cmsIOHANDLER* io, cmsFloat32Number* n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io->Read(io, &tmp, sizeof(cmsUInt32Number), 1) != 1)
+        return FALSE;
+
+    if (n != NULL) {
+
+        tmp = _cmsAdjustEndianess32(tmp);
+        *n = *(cmsFloat32Number*)(void*)&tmp;
+        
+        // Safeguard which covers against absurd values
+        if (*n > 1E+20 || *n < -1E+20) return FALSE;
+
+        #if defined(_MSC_VER) && _MSC_VER < 1800
+           return TRUE;
+        #elif defined (__BORLANDC__)
+           return TRUE;
+        #else
+
+           // fpclassify() required by C99 (only provided by MSVC >= 1800, VS2013 onwards)
+           return ((fpclassify(*n) == FP_ZERO) || (fpclassify(*n) == FP_NORMAL));
+        #endif        
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT   _cmsReadUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n)
+{
+    cmsUInt64Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt64Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) {
+
+        _cmsAdjustEndianess64(n, &tmp);
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT  _cmsRead15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number* n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &tmp, sizeof(cmsUInt32Number), 1) != 1)
+            return FALSE;
+
+    if (n != NULL) {
+        *n = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32(tmp));
+    }
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT  _cmsReadXYZNumber(cmsIOHANDLER* io, cmsCIEXYZ* XYZ)
+{
+    cmsEncodedXYZNumber xyz;
+
+    _cmsAssert(io != NULL);
+
+    if (io ->Read(io, &xyz, sizeof(cmsEncodedXYZNumber), 1) != 1) return FALSE;
+
+    if (XYZ != NULL) {
+
+        XYZ->X = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) xyz.X));
+        XYZ->Y = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) xyz.Y));
+        XYZ->Z = _cms15Fixed16toDouble((cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) xyz.Z));
+    }
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt8Number(cmsIOHANDLER* io, cmsUInt8Number n)
+{
+    _cmsAssert(io != NULL);
+
+    if (io -> Write(io, sizeof(cmsUInt8Number), &n) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt16Number(cmsIOHANDLER* io, cmsUInt16Number n)
+{
+    cmsUInt16Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = _cmsAdjustEndianess16(n);
+    if (io -> Write(io, sizeof(cmsUInt16Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt16Array(cmsIOHANDLER* io, cmsUInt32Number n, const cmsUInt16Number* Array)
+{
+    cmsUInt32Number i;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(Array != NULL);
+
+    for (i=0; i < n; i++) {
+        if (!_cmsWriteUInt16Number(io, Array[i])) return FALSE;
+    }
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt32Number(cmsIOHANDLER* io, cmsUInt32Number n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = _cmsAdjustEndianess32(n);
+    if (io -> Write(io, sizeof(cmsUInt32Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+
+cmsBool CMSEXPORT  _cmsWriteFloat32Number(cmsIOHANDLER* io, cmsFloat32Number n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = *(cmsUInt32Number*) (void*) &n;
+    tmp = _cmsAdjustEndianess32(tmp);
+    if (io -> Write(io, sizeof(cmsUInt32Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteUInt64Number(cmsIOHANDLER* io, cmsUInt64Number* n)
+{
+    cmsUInt64Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    _cmsAdjustEndianess64(&tmp, n);
+    if (io -> Write(io, sizeof(cmsUInt64Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWrite15Fixed16Number(cmsIOHANDLER* io, cmsFloat64Number n)
+{
+    cmsUInt32Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    tmp = _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(n));
+    if (io -> Write(io, sizeof(cmsUInt32Number), &tmp) != 1)
+            return FALSE;
+
+    return TRUE;
+}
+
+cmsBool CMSEXPORT  _cmsWriteXYZNumber(cmsIOHANDLER* io, const cmsCIEXYZ* XYZ)
+{
+    cmsEncodedXYZNumber xyz;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(XYZ != NULL);
+
+    xyz.X = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(XYZ->X));
+    xyz.Y = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(XYZ->Y));
+    xyz.Z = (cmsS15Fixed16Number) _cmsAdjustEndianess32((cmsUInt32Number) _cmsDoubleTo15Fixed16(XYZ->Z));
+
+    return io -> Write(io,  sizeof(cmsEncodedXYZNumber), &xyz);
+}
+
+// from Fixed point 8.8 to double
+cmsFloat64Number CMSEXPORT _cms8Fixed8toDouble(cmsUInt16Number fixed8)
+{
+       cmsUInt8Number  msb, lsb;
+
+       lsb = (cmsUInt8Number) (fixed8 & 0xff);
+       msb = (cmsUInt8Number) (((cmsUInt16Number) fixed8 >> 8) & 0xff);
+
+       return (cmsFloat64Number) ((cmsFloat64Number) msb + ((cmsFloat64Number) lsb / 256.0));
+}
+
+cmsUInt16Number CMSEXPORT _cmsDoubleTo8Fixed8(cmsFloat64Number val)
+{
+    cmsS15Fixed16Number GammaFixed32 = _cmsDoubleTo15Fixed16(val);
+    return  (cmsUInt16Number) ((GammaFixed32 >> 8) & 0xFFFF);
+}
+
+// from Fixed point 15.16 to double
+cmsFloat64Number CMSEXPORT _cms15Fixed16toDouble(cmsS15Fixed16Number fix32)
+{
+    cmsFloat64Number floater, sign, mid;
+    int Whole, FracPart;
+
+    sign  = (fix32 < 0 ? -1 : 1);
+    fix32 = abs(fix32);
+
+    Whole     = (cmsUInt16Number)(fix32 >> 16) & 0xffff;
+    FracPart  = (cmsUInt16Number)(fix32 & 0xffff);
+
+    mid     = (cmsFloat64Number) FracPart / 65536.0;
+    floater = (cmsFloat64Number) Whole + mid;
+
+    return sign * floater;
+}
+
+// from double to Fixed point 15.16
+cmsS15Fixed16Number CMSEXPORT _cmsDoubleTo15Fixed16(cmsFloat64Number v)
+{
+    return ((cmsS15Fixed16Number) floor((v)*65536.0 + 0.5));
+}
+
+// Date/Time functions
+
+void CMSEXPORT _cmsDecodeDateTimeNumber(const cmsDateTimeNumber *Source, struct tm *Dest)
+{
+
+    _cmsAssert(Dest != NULL);
+    _cmsAssert(Source != NULL);
+
+    Dest->tm_sec   = _cmsAdjustEndianess16(Source->seconds);
+    Dest->tm_min   = _cmsAdjustEndianess16(Source->minutes);
+    Dest->tm_hour  = _cmsAdjustEndianess16(Source->hours);
+    Dest->tm_mday  = _cmsAdjustEndianess16(Source->day);
+    Dest->tm_mon   = _cmsAdjustEndianess16(Source->month) - 1;
+    Dest->tm_year  = _cmsAdjustEndianess16(Source->year) - 1900;
+    Dest->tm_wday  = -1;
+    Dest->tm_yday  = -1;
+    Dest->tm_isdst = 0;
+}
+
+void CMSEXPORT _cmsEncodeDateTimeNumber(cmsDateTimeNumber *Dest, const struct tm *Source)
+{
+    _cmsAssert(Dest != NULL);
+    _cmsAssert(Source != NULL);
+
+    Dest->seconds = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_sec);
+    Dest->minutes = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_min);
+    Dest->hours   = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_hour);
+    Dest->day     = _cmsAdjustEndianess16((cmsUInt16Number) Source->tm_mday);
+    Dest->month   = _cmsAdjustEndianess16((cmsUInt16Number) (Source->tm_mon + 1));
+    Dest->year    = _cmsAdjustEndianess16((cmsUInt16Number) (Source->tm_year + 1900));
+}
+
+// Read base and return type base
+cmsTagTypeSignature CMSEXPORT _cmsReadTypeBase(cmsIOHANDLER* io)
+{
+    _cmsTagBase Base;
+
+    _cmsAssert(io != NULL);
+
+    if (io -> Read(io, &Base, sizeof(_cmsTagBase), 1) != 1)
+        return (cmsTagTypeSignature) 0;
+
+    return (cmsTagTypeSignature) _cmsAdjustEndianess32(Base.sig);
+}
+
+// Setup base marker
+cmsBool  CMSEXPORT _cmsWriteTypeBase(cmsIOHANDLER* io, cmsTagTypeSignature sig)
+{
+    _cmsTagBase  Base;
+
+    _cmsAssert(io != NULL);
+
+    Base.sig = (cmsTagTypeSignature) _cmsAdjustEndianess32(sig);
+    memset(&Base.reserved, 0, sizeof(Base.reserved));
+    return io -> Write(io, sizeof(_cmsTagBase), &Base);
+}
+
+cmsBool CMSEXPORT _cmsReadAlignment(cmsIOHANDLER* io)
+{
+    cmsUInt8Number  Buffer[4];
+    cmsUInt32Number NextAligned, At;
+    cmsUInt32Number BytesToNextAlignedPos;
+
+    _cmsAssert(io != NULL);
+
+    At = io -> Tell(io);
+    NextAligned = _cmsALIGNLONG(At);
+    BytesToNextAlignedPos = NextAligned - At;
+    if (BytesToNextAlignedPos == 0) return TRUE;
+    if (BytesToNextAlignedPos > 4)  return FALSE;
+
+    return (io ->Read(io, Buffer, BytesToNextAlignedPos, 1) == 1);
+}
+
+cmsBool CMSEXPORT _cmsWriteAlignment(cmsIOHANDLER* io)
+{
+    cmsUInt8Number  Buffer[4];
+    cmsUInt32Number NextAligned, At;
+    cmsUInt32Number BytesToNextAlignedPos;
+
+    _cmsAssert(io != NULL);
+
+    At = io -> Tell(io);
+    NextAligned = _cmsALIGNLONG(At);
+    BytesToNextAlignedPos = NextAligned - At;
+    if (BytesToNextAlignedPos == 0) return TRUE;
+    if (BytesToNextAlignedPos > 4)  return FALSE;
+
+    memset(Buffer, 0, BytesToNextAlignedPos);
+    return io -> Write(io, BytesToNextAlignedPos, Buffer);
+}
+
+
+// To deal with text streams. 2K at most
+cmsBool CMSEXPORT _cmsIOPrintf(cmsIOHANDLER* io, const char* frm, ...)
+{
+    va_list args;
+    int len;
+    cmsUInt8Number Buffer[2048];
+    cmsBool rc;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(frm != NULL);
+
+    va_start(args, frm);
+
+    len = vsnprintf((char*) Buffer, 2047, frm, args);
+    if (len < 0) {
+        va_end(args);
+        return FALSE;   // Truncated, which is a fatal error for us
+    }
+
+    rc = io ->Write(io, (cmsUInt32Number) len, Buffer);
+
+    va_end(args);
+
+    return rc;
+}
+
+
+// Plugin memory management -------------------------------------------------------------------------------------------------
+
+// Specialized malloc for plug-ins, that is freed upon exit.
+void* _cmsPluginMalloc(cmsContext ContextID, cmsUInt32Number size)
+{
+    struct _cmsContext_struct* ctx = _cmsGetContext(ContextID);
+
+    if (ctx ->MemPool == NULL) {
+
+        if (ContextID == NULL) {
+
+            ctx->MemPool = _cmsCreateSubAlloc(0, 2*1024);
+            if (ctx->MemPool == NULL) return NULL;
+        }
+        else {
+            cmsSignalError(ContextID, cmsERROR_CORRUPTION_DETECTED, "NULL memory pool on context");
+            return NULL;
+        }
+    }
+
+    return _cmsSubAlloc(ctx->MemPool, size);
+}
+
+
+// Main plug-in dispatcher
+cmsBool CMSEXPORT cmsPlugin(void* Plug_in)
+{
+    return cmsPluginTHR(NULL, Plug_in);
+}
+
+cmsBool CMSEXPORT cmsPluginTHR(cmsContext id, void* Plug_in)
+{
+    cmsPluginBase* Plugin;
+
+    for (Plugin = (cmsPluginBase*) Plug_in;
+         Plugin != NULL;
+         Plugin = Plugin -> Next) {
+
+            if (Plugin -> Magic != cmsPluginMagicNumber) {
+                cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "Unrecognized plugin");
+                return FALSE;
+            }
+
+            if (Plugin ->ExpectedVersion > LCMS_VERSION) {
+                cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "plugin needs Little CMS %d, current version is %d",
+                    Plugin ->ExpectedVersion, LCMS_VERSION);
+                return FALSE;
+            }
+
+            switch (Plugin -> Type) {
+
+                case cmsPluginMemHandlerSig:
+                    if (!_cmsRegisterMemHandlerPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginInterpolationSig:
+                    if (!_cmsRegisterInterpPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginTagTypeSig:
+                    if (!_cmsRegisterTagTypePlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginTagSig:
+                    if (!_cmsRegisterTagPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginFormattersSig:
+                    if (!_cmsRegisterFormattersPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginRenderingIntentSig:
+                    if (!_cmsRegisterRenderingIntentPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginParametricCurveSig:
+                    if (!_cmsRegisterParametricCurvesPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginMultiProcessElementSig:
+                    if (!_cmsRegisterMultiProcessElementPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginOptimizationSig:
+                    if (!_cmsRegisterOptimizationPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginTransformSig:
+                    if (!_cmsRegisterTransformPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                case cmsPluginMutexSig:
+                    if (!_cmsRegisterMutexPlugin(id, Plugin)) return FALSE;
+                    break;
+
+                default:
+                    cmsSignalError(id, cmsERROR_UNKNOWN_EXTENSION, "Unrecognized plugin type '%X'", Plugin -> Type);
+                    return FALSE;
+            }
+    }
+
+    // Keep a reference to the plug-in
+    return TRUE;
+}
+
+
+// Revert all plug-ins to default
+void CMSEXPORT cmsUnregisterPlugins(void)
+{
+    cmsUnregisterPluginsTHR(NULL);
+}
+
+
+// The Global storage for system context. This is the one and only global variable
+// pointers structure. All global vars are referenced here.
+static struct _cmsContext_struct globalContext = {
+
+    NULL,                              // Not in the linked list
+    NULL,                              // No suballocator
+    {
+        NULL,                          //  UserPtr,            
+        &_cmsLogErrorChunk,            //  Logger,
+        &_cmsAlarmCodesChunk,          //  AlarmCodes,
+        &_cmsAdaptationStateChunk,     //  AdaptationState, 
+        &_cmsMemPluginChunk,           //  MemPlugin,
+        &_cmsInterpPluginChunk,        //  InterpPlugin,
+        &_cmsCurvesPluginChunk,        //  CurvesPlugin,
+        &_cmsFormattersPluginChunk,    //  FormattersPlugin,
+        &_cmsTagTypePluginChunk,       //  TagTypePlugin,
+        &_cmsTagPluginChunk,           //  TagPlugin,
+        &_cmsIntentsPluginChunk,       //  IntentPlugin,
+        &_cmsMPETypePluginChunk,       //  MPEPlugin,
+        &_cmsOptimizationPluginChunk,  //  OptimizationPlugin,
+        &_cmsTransformPluginChunk,     //  TransformPlugin,
+        &_cmsMutexPluginChunk          //  MutexPlugin
+    },
+    
+    { NULL, NULL, NULL, NULL, NULL, NULL } // The default memory allocator is not used for context 0
+};
+
+
+// The context pool (linked list head)
+static _cmsMutex _cmsContextPoolHeadMutex = CMS_MUTEX_INITIALIZER;
+static struct _cmsContext_struct* _cmsContextPoolHead = NULL;
+
+// Internal, get associated pointer, with guessing. Never returns NULL.
+struct _cmsContext_struct* _cmsGetContext(cmsContext ContextID)
+{
+    struct _cmsContext_struct* id = (struct _cmsContext_struct*) ContextID;
+    struct _cmsContext_struct* ctx;
+
+
+    // On 0, use global settings
+    if (id == NULL) 
+        return &globalContext;
+
+    // Search
+    for (ctx = _cmsContextPoolHead;
+         ctx != NULL;
+         ctx = ctx ->Next) {
+
+            // Found it?
+            if (id == ctx)
+                return ctx; // New-style context, 
+    }
+
+    return &globalContext;
+}
+
+
+// Internal: get the memory area associanted with each context client
+// Returns the block assigned to the specific zone. Never return NULL.
+void* _cmsContextGetClientChunk(cmsContext ContextID, _cmsMemoryClient mc)
+{
+    struct _cmsContext_struct* ctx;
+    void *ptr;
+
+    if ((int) mc < 0 || mc >= MemoryClientMax) {
+        
+           cmsSignalError(ContextID, cmsERROR_INTERNAL, "Bad context client -- possible corruption");
+
+           // This is catastrophic. Should never reach here
+           _cmsAssert(0);
+
+           // Reverts to global context
+           return globalContext.chunks[UserPtr];
+    }
+    
+    ctx = _cmsGetContext(ContextID);
+    ptr = ctx ->chunks[mc];
+
+    if (ptr != NULL)
+        return ptr;
+
+    // A null ptr means no special settings for that context, and this 
+    // reverts to Context0 globals
+    return globalContext.chunks[mc];    
+}
+
+
+// This function returns the given context its default pristine state,
+// as no plug-ins were declared. There is no way to unregister a single 
+// plug-in, as a single call to cmsPluginTHR() function may register 
+// many different plug-ins simultaneously, then there is no way to 
+// identify which plug-in to unregister.
+void CMSEXPORT cmsUnregisterPluginsTHR(cmsContext ContextID)
+{
+    _cmsRegisterMemHandlerPlugin(ContextID, NULL);
+    _cmsRegisterInterpPlugin(ContextID, NULL);
+    _cmsRegisterTagTypePlugin(ContextID, NULL);
+    _cmsRegisterTagPlugin(ContextID, NULL);
+    _cmsRegisterFormattersPlugin(ContextID, NULL);
+    _cmsRegisterRenderingIntentPlugin(ContextID, NULL);
+    _cmsRegisterParametricCurvesPlugin(ContextID, NULL);
+    _cmsRegisterMultiProcessElementPlugin(ContextID, NULL);
+    _cmsRegisterOptimizationPlugin(ContextID, NULL);
+    _cmsRegisterTransformPlugin(ContextID, NULL);    
+    _cmsRegisterMutexPlugin(ContextID, NULL);
+}
+
+
+// Returns the memory manager plug-in, if any, from the Plug-in bundle
+static
+cmsPluginMemHandler* _cmsFindMemoryPlugin(void* PluginBundle)
+{
+    cmsPluginBase* Plugin;
+
+    for (Plugin = (cmsPluginBase*) PluginBundle;
+        Plugin != NULL;
+        Plugin = Plugin -> Next) {
+
+            if (Plugin -> Magic == cmsPluginMagicNumber && 
+                Plugin -> ExpectedVersion <= LCMS_VERSION && 
+                Plugin -> Type == cmsPluginMemHandlerSig) {
+
+                    // Found!
+                    return (cmsPluginMemHandler*) Plugin;  
+            }
+    }
+
+    // Nope, revert to defaults 
+    return NULL;
+}
+
+
+// Creates a new context with optional associated plug-ins. Caller may also specify an optional pointer to user-defined 
+// data that will be forwarded to plug-ins and logger.
+cmsContext CMSEXPORT cmsCreateContext(void* Plugin, void* UserData)
+{
+    struct _cmsContext_struct* ctx;
+    struct _cmsContext_struct  fakeContext;
+        
+    // See the comments regarding locking in lcms2_internal.h
+    // for an explanation of why we need the following code.
+#ifdef CMS_IS_WINDOWS_
+#ifndef CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+    {
+        static HANDLE _cmsWindowsInitMutex = NULL;
+        static volatile HANDLE* mutex = &_cmsWindowsInitMutex;
+
+        if (*mutex == NULL)
+        {
+            HANDLE p = CreateMutex(NULL, FALSE, NULL);
+            if (p && InterlockedCompareExchangePointer((void **)mutex, (void*)p, NULL) != NULL)
+                CloseHandle(p);
+        }
+        if (*mutex == NULL || WaitForSingleObject(*mutex, INFINITE) == WAIT_FAILED)
+            return NULL;
+        if (((void **)&_cmsContextPoolHeadMutex)[0] == NULL)
+            InitializeCriticalSection(&_cmsContextPoolHeadMutex);
+        if (*mutex == NULL || !ReleaseMutex(*mutex))
+            return NULL;
+    }
+#endif
+#endif
+
+    _cmsInstallAllocFunctions(_cmsFindMemoryPlugin(Plugin), &fakeContext.DefaultMemoryManager);
+    
+    fakeContext.chunks[UserPtr]     = UserData;
+    fakeContext.chunks[MemPlugin]   = &fakeContext.DefaultMemoryManager;
+
+    // Create the context structure.
+    ctx = (struct _cmsContext_struct*) _cmsMalloc(&fakeContext, sizeof(struct _cmsContext_struct));
+    if (ctx == NULL)   
+        return NULL;     // Something very wrong happened!
+
+    // Init the structure and the memory manager
+    memset(ctx, 0, sizeof(struct _cmsContext_struct));
+
+    // Keep memory manager
+    memcpy(&ctx->DefaultMemoryManager, &fakeContext.DefaultMemoryManager, sizeof(_cmsMemPluginChunk)); 
+   
+    // Maintain the linked list (with proper locking)
+    _cmsEnterCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+       ctx ->Next = _cmsContextPoolHead;
+       _cmsContextPoolHead = ctx;
+    _cmsLeaveCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+
+    ctx ->chunks[UserPtr]     = UserData;
+    ctx ->chunks[MemPlugin]   = &ctx->DefaultMemoryManager;
+   
+    // Now we can allocate the pool by using default memory manager
+    ctx ->MemPool = _cmsCreateSubAlloc(ctx, 22 * sizeof(void*));  // default size about 22 pointers
+    if (ctx ->MemPool == NULL) {
+
+         cmsDeleteContext(ctx);
+        return NULL;
+    }
+
+    _cmsAllocLogErrorChunk(ctx, NULL);
+    _cmsAllocAlarmCodesChunk(ctx, NULL);
+    _cmsAllocAdaptationStateChunk(ctx, NULL);
+    _cmsAllocMemPluginChunk(ctx, NULL);
+    _cmsAllocInterpPluginChunk(ctx, NULL);
+    _cmsAllocCurvesPluginChunk(ctx, NULL);
+    _cmsAllocFormattersPluginChunk(ctx, NULL);
+    _cmsAllocTagTypePluginChunk(ctx, NULL);
+    _cmsAllocMPETypePluginChunk(ctx, NULL);
+    _cmsAllocTagPluginChunk(ctx, NULL);
+    _cmsAllocIntentsPluginChunk(ctx, NULL);
+    _cmsAllocOptimizationPluginChunk(ctx, NULL);
+    _cmsAllocTransformPluginChunk(ctx, NULL);
+    _cmsAllocMutexPluginChunk(ctx, NULL);
+
+    // Setup the plug-ins
+    if (!cmsPluginTHR(ctx, Plugin)) {
+    
+        cmsDeleteContext(ctx);
+        return NULL;
+    }
+
+    return (cmsContext) ctx;  
+}
+
+// Duplicates a context with all associated plug-ins. 
+// Caller may specify an optional pointer to user-defined 
+// data that will be forwarded to plug-ins and logger. 
+cmsContext CMSEXPORT cmsDupContext(cmsContext ContextID, void* NewUserData)
+{
+    int i;
+    struct _cmsContext_struct* ctx;
+    const struct _cmsContext_struct* src = _cmsGetContext(ContextID);
+
+    void* userData = (NewUserData != NULL) ? NewUserData : src -> chunks[UserPtr];
+    
+    
+    ctx = (struct _cmsContext_struct*) _cmsMalloc(ContextID, sizeof(struct _cmsContext_struct));
+    if (ctx == NULL)   
+        return NULL;     // Something very wrong happened
+
+    // Setup default memory allocators
+    memcpy(&ctx->DefaultMemoryManager, &src->DefaultMemoryManager, sizeof(ctx->DefaultMemoryManager));
+
+    // Maintain the linked list
+    _cmsEnterCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+       ctx ->Next = _cmsContextPoolHead;
+       _cmsContextPoolHead = ctx;
+    _cmsLeaveCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+
+    ctx ->chunks[UserPtr]    = userData;
+    ctx ->chunks[MemPlugin]  = &ctx->DefaultMemoryManager;
+
+    ctx ->MemPool = _cmsCreateSubAlloc(ctx, 22 * sizeof(void*));
+    if (ctx ->MemPool == NULL) {
+
+         cmsDeleteContext(ctx);
+        return NULL;
+    }
+
+    // Allocate all required chunks.
+    _cmsAllocLogErrorChunk(ctx, src);
+    _cmsAllocAlarmCodesChunk(ctx, src);
+    _cmsAllocAdaptationStateChunk(ctx, src);
+    _cmsAllocMemPluginChunk(ctx, src);
+    _cmsAllocInterpPluginChunk(ctx, src);
+    _cmsAllocCurvesPluginChunk(ctx, src);
+    _cmsAllocFormattersPluginChunk(ctx, src);
+    _cmsAllocTagTypePluginChunk(ctx, src);
+    _cmsAllocMPETypePluginChunk(ctx, src);
+    _cmsAllocTagPluginChunk(ctx, src);
+    _cmsAllocIntentsPluginChunk(ctx, src);
+    _cmsAllocOptimizationPluginChunk(ctx, src);
+    _cmsAllocTransformPluginChunk(ctx, src);
+    _cmsAllocMutexPluginChunk(ctx, src);
+
+    // Make sure no one failed
+    for (i=Logger; i < MemoryClientMax; i++) {
+
+        if (src ->chunks[i] == NULL) {
+            cmsDeleteContext((cmsContext) ctx);
+            return NULL;
+        }
+    }
+
+    return (cmsContext) ctx;
+}
+
+
+/*
+static
+struct _cmsContext_struct* FindPrev(struct _cmsContext_struct* id)
+{
+    struct _cmsContext_struct* prev;
+
+    // Search for previous
+    for (prev = _cmsContextPoolHead; 
+             prev != NULL;
+             prev = prev ->Next)
+    {
+        if (prev ->Next == id)
+            return prev;
+    }
+
+    return NULL;  // List is empty or only one element!
+}
+*/
+
+// Frees any resources associated with the given context, 
+// and destroys the context placeholder. 
+// The ContextID can no longer be used in any THR operation.  
+void CMSEXPORT cmsDeleteContext(cmsContext ContextID)
+{
+    if (ContextID != NULL) {
+
+        struct _cmsContext_struct* ctx = (struct _cmsContext_struct*) ContextID;              
+        struct _cmsContext_struct  fakeContext;  
+        struct _cmsContext_struct* prev;
+
+        memcpy(&fakeContext.DefaultMemoryManager, &ctx->DefaultMemoryManager, sizeof(ctx->DefaultMemoryManager));
+
+        fakeContext.chunks[UserPtr]     = ctx ->chunks[UserPtr];
+        fakeContext.chunks[MemPlugin]   = &fakeContext.DefaultMemoryManager;
+
+        // Get rid of plugins
+        cmsUnregisterPluginsTHR(ContextID); 
+
+        // Since all memory is allocated in the private pool, all what we need to do is destroy the pool
+        if (ctx -> MemPool != NULL)
+              _cmsSubAllocDestroy(ctx ->MemPool);
+        ctx -> MemPool = NULL;
+
+        // Maintain list
+        _cmsEnterCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+        if (_cmsContextPoolHead == ctx) { 
+
+            _cmsContextPoolHead = ctx->Next;
+        }
+        else {
+
+            // Search for previous
+            for (prev = _cmsContextPoolHead; 
+                 prev != NULL;
+                 prev = prev ->Next)
+            {
+                if (prev -> Next == ctx) {
+                    prev -> Next = ctx ->Next;
+                    break;
+                }
+            }
+        }
+        _cmsLeaveCriticalSectionPrimitive(&_cmsContextPoolHeadMutex);
+
+        // free the memory block itself
+        _cmsFree(&fakeContext, ctx);
+    }
+}
+
+// Returns the user data associated to the given ContextID, or NULL if no user data was attached on context creation
+void* CMSEXPORT cmsGetContextUserData(cmsContext ContextID)
+{
+    return _cmsContextGetClientChunk(ContextID, UserPtr);
+}
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsps2.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsps2.cpp
new file mode 100755
index 0000000000..5802a14292
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsps2.cpp
@@ -0,0 +1,1597 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// PostScript ColorRenderingDictionary and ColorSpaceArray
+
+
+#define MAXPSCOLS   60      // Columns on tables
+
+/*
+    Implementation
+    --------------
+
+  PostScript does use XYZ as its internal PCS. But since PostScript
+  interpolation tables are limited to 8 bits, I use Lab as a way to
+  improve the accuracy, favoring perceptual results. So, for the creation
+  of each CRD, CSA the profiles are converted to Lab via a device
+  link between  profile -> Lab or Lab -> profile. The PS code necessary to
+  convert Lab <-> XYZ is also included.
+
+
+
+  Color Space Arrays (CSA)
+  ==================================================================================
+
+  In order to obtain precision, code chooses between three ways to implement
+  the device -> XYZ transform. These cases identifies monochrome profiles (often
+  implemented as a set of curves), matrix-shaper and Pipeline-based.
+
+  Monochrome
+  -----------
+
+  This is implemented as /CIEBasedA CSA. The prelinearization curve is
+  placed into /DecodeA section, and matrix equals to D50. Since here is
+  no interpolation tables, I do the conversion directly to XYZ
+
+  NOTE: CLUT-based monochrome profiles are NOT supported. So, cmsFLAGS_MATRIXINPUT
+  flag is forced on such profiles.
+
+    [ /CIEBasedA
+      <<
+            /DecodeA { transfer function } bind
+            /MatrixA [D50]
+            /RangeLMN [ 0.0 cmsD50X 0.0 cmsD50Y 0.0 cmsD50Z ]
+            /WhitePoint [D50]
+            /BlackPoint [BP]
+            /RenderingIntent (intent)
+      >>
+    ]
+
+   On simpler profiles, the PCS is already XYZ, so no conversion is required.
+
+
+   Matrix-shaper based
+   -------------------
+
+   This is implemented both with /CIEBasedABC or /CIEBasedDEF on dependig
+   of profile implementation. Since here there are no interpolation tables, I do
+   the conversion directly to XYZ
+
+
+
+    [ /CIEBasedABC
+            <<
+                /DecodeABC [ {transfer1} {transfer2} {transfer3} ]
+                /MatrixABC [Matrix]
+                /RangeLMN [ 0.0 cmsD50X 0.0 cmsD50Y 0.0 cmsD50Z ]
+                /DecodeLMN [ { / 2} dup dup ]
+                /WhitePoint [D50]
+                /BlackPoint [BP]
+                /RenderingIntent (intent)
+            >>
+    ]
+
+
+    CLUT based
+    ----------
+
+     Lab is used in such cases.
+
+    [ /CIEBasedDEF
+            <<
+            /DecodeDEF [ <prelinearization> ]
+            /Table [ p p p [<...>]]
+            /RangeABC [ 0 1 0 1 0 1]
+            /DecodeABC[ <postlinearization> ]
+            /RangeLMN [ -0.236 1.254 0 1 -0.635 1.640 ]
+               % -128/500 1+127/500 0 1  -127/200 1+128/200
+            /MatrixABC [ 1 1 1 1 0 0 0 0 -1]
+            /WhitePoint [D50]
+            /BlackPoint [BP]
+            /RenderingIntent (intent)
+    ]
+
+
+  Color Rendering Dictionaries (CRD)
+  ==================================
+  These are always implemented as CLUT, and always are using Lab. Since CRD are expected to
+  be used as resources, the code adds the definition as well.
+
+  <<
+    /ColorRenderingType 1
+    /WhitePoint [ D50 ]
+    /BlackPoint [BP]
+    /MatrixPQR [ Bradford ]
+    /RangePQR [-0.125 1.375 -0.125 1.375 -0.125 1.375 ]
+    /TransformPQR [
+    {4 index 3 get div 2 index 3 get mul exch pop exch pop exch pop exch pop } bind
+    {4 index 4 get div 2 index 4 get mul exch pop exch pop exch pop exch pop } bind
+    {4 index 5 get div 2 index 5 get mul exch pop exch pop exch pop exch pop } bind
+    ]
+    /MatrixABC <...>
+    /EncodeABC <...>
+    /RangeABC  <.. used for  XYZ -> Lab>
+    /EncodeLMN
+    /RenderTable [ p p p [<...>]]
+
+    /RenderingIntent (Perceptual)
+  >>
+  /Current exch /ColorRendering defineresource pop
+
+
+  The following stages are used to convert from XYZ to Lab
+  --------------------------------------------------------
+
+  Input is given at LMN stage on X, Y, Z
+
+  Encode LMN gives us f(X/Xn), f(Y/Yn), f(Z/Zn)
+
+  /EncodeLMN [
+
+    { 0.964200  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind
+    { 1.000000  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind
+    { 0.824900  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind
+
+    ]
+
+
+  MatrixABC is used to compute f(Y/Yn), f(X/Xn) - f(Y/Yn), f(Y/Yn) - f(Z/Zn)
+
+  | 0  1  0|
+  | 1 -1  0|
+  | 0  1 -1|
+
+  /MatrixABC [ 0 1 0 1 -1 1 0 0 -1 ]
+
+ EncodeABC finally gives Lab values.
+
+  /EncodeABC [
+    { 116 mul  16 sub 100 div  } bind
+    { 500 mul 128 add 255 div  } bind
+    { 200 mul 128 add 255 div  } bind
+    ]
+
+  The following stages are used to convert Lab to XYZ
+  ----------------------------------------------------
+
+    /RangeABC [ 0 1 0 1 0 1]
+    /DecodeABC [ { 100 mul 16 add 116 div } bind
+                 { 255 mul 128 sub 500 div } bind
+                 { 255 mul 128 sub 200 div } bind
+               ]
+
+    /MatrixABC [ 1 1 1 1 0 0 0 0 -1]
+    /DecodeLMN [
+                {dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.964200 mul} bind
+                {dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse } bind
+                {dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.824900 mul} bind
+                ]
+
+
+*/
+
+/*
+
+ PostScript algorithms discussion.
+ =========================================================================================================
+
+  1D interpolation algorithm
+
+
+  1D interpolation (float)
+  ------------------------
+
+    val2 = Domain * Value;
+
+    cell0 = (int) floor(val2);
+    cell1 = (int) ceil(val2);
+
+    rest = val2 - cell0;
+
+    y0 = LutTable[cell0] ;
+    y1 = LutTable[cell1] ;
+
+    y = y0 + (y1 - y0) * rest;
+
+
+
+  PostScript code                   Stack
+  ================================================
+
+  {                                 % v
+    <check 0..1.0>
+    [array]                         % v tab
+    dup                             % v tab tab
+    length 1 sub                    % v tab dom
+
+    3 -1 roll                       % tab dom v
+
+    mul                             % tab val2
+    dup                             % tab val2 val2
+    dup                             % tab val2 val2 val2
+    floor cvi                       % tab val2 val2 cell0
+    exch                            % tab val2 cell0 val2
+    ceiling cvi                     % tab val2 cell0 cell1
+
+    3 index                         % tab val2 cell0 cell1 tab
+    exch                            % tab val2 cell0 tab cell1
+    get                             % tab val2 cell0 y1
+
+    4 -1 roll                       % val2 cell0 y1 tab
+    3 -1 roll                       % val2 y1 tab cell0
+    get                             % val2 y1 y0
+
+    dup                             % val2 y1 y0 y0
+    3 1 roll                        % val2 y0 y1 y0
+
+    sub                             % val2 y0 (y1-y0)
+    3 -1 roll                       % y0 (y1-y0) val2
+    dup                             % y0 (y1-y0) val2 val2
+    floor cvi                       % y0 (y1-y0) val2 floor(val2)
+    sub                             % y0 (y1-y0) rest
+    mul                             % y0 t1
+    add                             % y
+    65535 div                       % result
+
+  } bind
+
+
+*/
+
+
+// This struct holds the memory block currently being write
+typedef struct {
+    _cmsStageCLutData* Pipeline;
+    cmsIOHANDLER* m;
+
+    int FirstComponent;
+    int SecondComponent;
+
+    const char* PreMaj;
+    const char* PostMaj;
+    const char* PreMin;
+    const char* PostMin;
+
+    int  FixWhite;    // Force mapping of pure white
+
+    cmsColorSpaceSignature  ColorSpace;  // ColorSpace of profile
+
+
+} cmsPsSamplerCargo;
+
+static int _cmsPSActualColumn = 0;
+
+
+// Convert to byte
+static
+cmsUInt8Number Word2Byte(cmsUInt16Number w)
+{
+    return (cmsUInt8Number) floor((cmsFloat64Number) w / 257.0 + 0.5);
+}
+
+
+// Convert to byte (using ICC2 notation)
+/*
+static
+cmsUInt8Number L2Byte(cmsUInt16Number w)
+{
+    int ww = w + 0x0080;
+
+    if (ww > 0xFFFF) return 0xFF;
+
+    return (cmsUInt8Number) ((cmsUInt16Number) (ww >> 8) & 0xFF);
+}
+*/
+
+// Write a cooked byte
+
+static
+void WriteByte(cmsIOHANDLER* m, cmsUInt8Number b)
+{
+    _cmsIOPrintf(m, "%02x", b);
+    _cmsPSActualColumn += 2;
+
+    if (_cmsPSActualColumn > MAXPSCOLS) {
+
+        _cmsIOPrintf(m, "\n");
+        _cmsPSActualColumn = 0;
+    }
+}
+
+// ----------------------------------------------------------------- PostScript generation
+
+
+// Removes offending Carriage returns
+static
+char* RemoveCR(const char* txt)
+{
+    static char Buffer[2048];
+    char* pt;
+
+    strncpy(Buffer, txt, 2047);
+    Buffer[2047] = 0;
+    for (pt = Buffer; *pt; pt++)
+            if (*pt == '\n' || *pt == '\r') *pt = ' ';
+
+    return Buffer;
+
+}
+
+static
+void EmitHeader(cmsIOHANDLER* m, const char* Title, cmsHPROFILE hProfile)
+{
+    time_t timer;
+    cmsMLU *Description, *Copyright;
+    char DescASCII[256], CopyrightASCII[256];
+
+    time(&timer);
+
+    Description = (cmsMLU*) cmsReadTag(hProfile, cmsSigProfileDescriptionTag);
+    Copyright   = (cmsMLU*) cmsReadTag(hProfile, cmsSigCopyrightTag);
+
+    DescASCII[0] = DescASCII[255] = 0;
+    CopyrightASCII[0] = CopyrightASCII[255] = 0;
+
+    if (Description != NULL) cmsMLUgetASCII(Description,  cmsNoLanguage, cmsNoCountry, DescASCII,       255);
+    if (Copyright != NULL)   cmsMLUgetASCII(Copyright,    cmsNoLanguage, cmsNoCountry, CopyrightASCII,  255);
+
+    _cmsIOPrintf(m, "%%!PS-Adobe-3.0\n");
+    _cmsIOPrintf(m, "%%\n");
+    _cmsIOPrintf(m, "%% %s\n", Title);
+    _cmsIOPrintf(m, "%% Source: %s\n", RemoveCR(DescASCII));
+    _cmsIOPrintf(m, "%%         %s\n", RemoveCR(CopyrightASCII));
+    _cmsIOPrintf(m, "%% Created: %s", ctime(&timer)); // ctime appends a \n!!!
+    _cmsIOPrintf(m, "%%\n");
+    _cmsIOPrintf(m, "%%%%BeginResource\n");
+
+}
+
+
+// Emits White & Black point. White point is always D50, Black point is the device
+// Black point adapted to D50.
+
+static
+void EmitWhiteBlackD50(cmsIOHANDLER* m, cmsCIEXYZ* BlackPoint)
+{
+
+    _cmsIOPrintf(m, "/BlackPoint [%f %f %f]\n", BlackPoint -> X,
+                                          BlackPoint -> Y,
+                                          BlackPoint -> Z);
+
+    _cmsIOPrintf(m, "/WhitePoint [%f %f %f]\n", cmsD50_XYZ()->X,
+                                          cmsD50_XYZ()->Y,
+                                          cmsD50_XYZ()->Z);
+}
+
+
+static
+void EmitRangeCheck(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m, "dup 0.0 lt { pop 0.0 } if "
+                    "dup 1.0 gt { pop 1.0 } if ");
+
+}
+
+// Does write the intent
+
+static
+void EmitIntent(cmsIOHANDLER* m, cmsUInt32Number RenderingIntent)
+{
+    const char *intent;
+
+    switch (RenderingIntent) {
+
+        case INTENT_PERCEPTUAL:            intent = "Perceptual"; break;
+        case INTENT_RELATIVE_COLORIMETRIC: intent = "RelativeColorimetric"; break;
+        case INTENT_ABSOLUTE_COLORIMETRIC: intent = "AbsoluteColorimetric"; break;
+        case INTENT_SATURATION:            intent = "Saturation"; break;
+
+        default: intent = "Undefined"; break;
+    }
+
+    _cmsIOPrintf(m, "/RenderingIntent (%s)\n", intent );
+}
+
+//
+//  Convert L* to Y
+//
+//      Y = Yn*[ (L* + 16) / 116] ^ 3   if (L*) >= 6 / 29
+//        = Yn*( L* / 116) / 7.787      if (L*) < 6 / 29
+//
+
+/*
+static
+void EmitL2Y(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m,
+            "{ "
+                "100 mul 16 add 116 div "               // (L * 100 + 16) / 116
+                 "dup 6 29 div ge "                     // >= 6 / 29 ?
+                 "{ dup dup mul mul } "                 // yes, ^3 and done
+                 "{ 4 29 div sub 108 841 div mul } "    // no, slope limiting
+            "ifelse } bind ");
+}
+*/
+
+
+// Lab -> XYZ, see the discussion above
+
+static
+void EmitLab2XYZ(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m, "/RangeABC [ 0 1 0 1 0 1]\n");
+    _cmsIOPrintf(m, "/DecodeABC [\n");
+    _cmsIOPrintf(m, "{100 mul  16 add 116 div } bind\n");
+    _cmsIOPrintf(m, "{255 mul 128 sub 500 div } bind\n");
+    _cmsIOPrintf(m, "{255 mul 128 sub 200 div } bind\n");
+    _cmsIOPrintf(m, "]\n");
+    _cmsIOPrintf(m, "/MatrixABC [ 1 1 1 1 0 0 0 0 -1]\n");
+    _cmsIOPrintf(m, "/RangeLMN [ -0.236 1.254 0 1 -0.635 1.640 ]\n");
+    _cmsIOPrintf(m, "/DecodeLMN [\n");
+    _cmsIOPrintf(m, "{dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.964200 mul} bind\n");
+    _cmsIOPrintf(m, "{dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse } bind\n");
+    _cmsIOPrintf(m, "{dup 6 29 div ge {dup dup mul mul} {4 29 div sub 108 841 div mul} ifelse 0.824900 mul} bind\n");
+    _cmsIOPrintf(m, "]\n");
+}
+
+
+
+// Outputs a table of words. It does use 16 bits
+
+static
+void Emit1Gamma(cmsIOHANDLER* m, cmsToneCurve* Table)
+{
+    cmsUInt32Number i;
+    cmsFloat64Number gamma;
+
+    if (Table == NULL) return; // Error
+
+    if (Table ->nEntries <= 0) return;  // Empty table
+
+    // Suppress whole if identity
+    if (cmsIsToneCurveLinear(Table)) return;
+
+    // Check if is really an exponential. If so, emit "exp"
+    gamma = cmsEstimateGamma(Table, 0.001);
+     if (gamma > 0) {
+            _cmsIOPrintf(m, "{ %g exp } bind ", gamma);
+            return;
+     }
+
+    _cmsIOPrintf(m, "{ ");
+
+    // Bounds check
+    EmitRangeCheck(m);
+
+    // Emit intepolation code
+
+    // PostScript code                      Stack
+    // ===============                      ========================
+                                            // v
+    _cmsIOPrintf(m, " [");
+
+    for (i=0; i < Table->nEntries; i++) {
+        _cmsIOPrintf(m, "%d ", Table->Table16[i]);
+    }
+
+    _cmsIOPrintf(m, "] ");                        // v tab
+
+    _cmsIOPrintf(m, "dup ");                      // v tab tab
+    _cmsIOPrintf(m, "length 1 sub ");             // v tab dom
+    _cmsIOPrintf(m, "3 -1 roll ");                // tab dom v
+    _cmsIOPrintf(m, "mul ");                      // tab val2
+    _cmsIOPrintf(m, "dup ");                      // tab val2 val2
+    _cmsIOPrintf(m, "dup ");                      // tab val2 val2 val2
+    _cmsIOPrintf(m, "floor cvi ");                // tab val2 val2 cell0
+    _cmsIOPrintf(m, "exch ");                     // tab val2 cell0 val2
+    _cmsIOPrintf(m, "ceiling cvi ");              // tab val2 cell0 cell1
+    _cmsIOPrintf(m, "3 index ");                  // tab val2 cell0 cell1 tab
+    _cmsIOPrintf(m, "exch ");                     // tab val2 cell0 tab cell1
+    _cmsIOPrintf(m, "get ");                      // tab val2 cell0 y1
+    _cmsIOPrintf(m, "4 -1 roll ");                // val2 cell0 y1 tab
+    _cmsIOPrintf(m, "3 -1 roll ");                // val2 y1 tab cell0
+    _cmsIOPrintf(m, "get ");                      // val2 y1 y0
+    _cmsIOPrintf(m, "dup ");                      // val2 y1 y0 y0
+    _cmsIOPrintf(m, "3 1 roll ");                 // val2 y0 y1 y0
+    _cmsIOPrintf(m, "sub ");                      // val2 y0 (y1-y0)
+    _cmsIOPrintf(m, "3 -1 roll ");                // y0 (y1-y0) val2
+    _cmsIOPrintf(m, "dup ");                      // y0 (y1-y0) val2 val2
+    _cmsIOPrintf(m, "floor cvi ");                // y0 (y1-y0) val2 floor(val2)
+    _cmsIOPrintf(m, "sub ");                      // y0 (y1-y0) rest
+    _cmsIOPrintf(m, "mul ");                      // y0 t1
+    _cmsIOPrintf(m, "add ");                      // y
+    _cmsIOPrintf(m, "65535 div ");                // result
+
+    _cmsIOPrintf(m, " } bind ");
+}
+
+
+// Compare gamma table
+
+static
+cmsBool GammaTableEquals(cmsUInt16Number* g1, cmsUInt16Number* g2, cmsUInt32Number nEntries)
+{
+    return memcmp(g1, g2, nEntries* sizeof(cmsUInt16Number)) == 0;
+}
+
+
+// Does write a set of gamma curves
+
+static
+void EmitNGamma(cmsIOHANDLER* m, cmsUInt32Number n, cmsToneCurve* g[])
+{
+    cmsUInt32Number i;
+
+    for( i=0; i < n; i++ )
+    {
+        if (g[i] == NULL) return; // Error
+
+        if (i > 0 && GammaTableEquals(g[i-1]->Table16, g[i]->Table16, g[i]->nEntries)) {
+
+            _cmsIOPrintf(m, "dup ");
+        }
+        else {
+            Emit1Gamma(m, g[i]);
+        }
+    }
+
+}
+
+
+
+
+
+// Following code dumps a LUT onto memory stream
+
+
+// This is the sampler. Intended to work in SAMPLER_INSPECT mode,
+// that is, the callback will be called for each knot with
+//
+//          In[]  The grid location coordinates, normalized to 0..ffff
+//          Out[] The Pipeline values, normalized to 0..ffff
+//
+//  Returning a value other than 0 does terminate the sampling process
+//
+//  Each row contains Pipeline values for all but first component. So, I
+//  detect row changing by keeping a copy of last value of first
+//  component. -1 is used to mark beginning of whole block.
+
+static
+int OutputValueSampler(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
+{
+    cmsPsSamplerCargo* sc = (cmsPsSamplerCargo*) Cargo;
+    cmsUInt32Number i;
+
+
+    if (sc -> FixWhite) {
+
+        if (In[0] == 0xFFFF) {  // Only in L* = 100, ab = [-8..8]
+
+            if ((In[1] >= 0x7800 && In[1] <= 0x8800) &&
+                (In[2] >= 0x7800 && In[2] <= 0x8800)) {
+
+                cmsUInt16Number* Black;
+                cmsUInt16Number* White;
+                cmsUInt32Number nOutputs;
+
+                if (!_cmsEndPointsBySpace(sc ->ColorSpace, &White, &Black, &nOutputs))
+                        return 0;
+
+                for (i=0; i < nOutputs; i++)
+                        Out[i] = White[i];
+            }
+
+
+        }
+    }
+
+
+    // Hadle the parenthesis on rows
+
+    if (In[0] != sc ->FirstComponent) {
+
+            if (sc ->FirstComponent != -1) {
+
+                    _cmsIOPrintf(sc ->m, sc ->PostMin);
+                    sc ->SecondComponent = -1;
+                    _cmsIOPrintf(sc ->m, sc ->PostMaj);
+            }
+
+            // Begin block
+            _cmsPSActualColumn = 0;
+
+            _cmsIOPrintf(sc ->m, sc ->PreMaj);
+            sc ->FirstComponent = In[0];
+    }
+
+
+      if (In[1] != sc ->SecondComponent) {
+
+            if (sc ->SecondComponent != -1) {
+
+                    _cmsIOPrintf(sc ->m, sc ->PostMin);
+            }
+
+            _cmsIOPrintf(sc ->m, sc ->PreMin);
+            sc ->SecondComponent = In[1];
+    }
+
+      // Dump table.
+
+      for (i=0; i < sc -> Pipeline ->Params->nOutputs; i++) {
+
+          cmsUInt16Number wWordOut = Out[i];
+          cmsUInt8Number wByteOut;           // Value as byte
+
+
+          // We always deal with Lab4
+
+          wByteOut = Word2Byte(wWordOut);
+          WriteByte(sc -> m, wByteOut);
+      }
+
+      return 1;
+}
+
+// Writes a Pipeline on memstream. Could be 8 or 16 bits based
+
+static
+void WriteCLUT(cmsIOHANDLER* m, cmsStage* mpe, const char* PreMaj,
+                                             const char* PostMaj,
+                                             const char* PreMin,
+                                             const char* PostMin,
+                                             int FixWhite,
+                                             cmsColorSpaceSignature ColorSpace)
+{
+    cmsUInt32Number i;
+    cmsPsSamplerCargo sc;
+
+    sc.FirstComponent = -1;
+    sc.SecondComponent = -1;
+    sc.Pipeline = (_cmsStageCLutData *) mpe ->Data;
+    sc.m   = m;
+    sc.PreMaj = PreMaj;
+    sc.PostMaj= PostMaj;
+
+    sc.PreMin   = PreMin;
+    sc.PostMin  = PostMin;
+    sc.FixWhite = FixWhite;
+    sc.ColorSpace = ColorSpace;
+
+    _cmsIOPrintf(m, "[");
+
+    for (i=0; i < sc.Pipeline->Params->nInputs; i++)
+        _cmsIOPrintf(m, " %d ", sc.Pipeline->Params->nSamples[i]);
+
+    _cmsIOPrintf(m, " [\n");
+
+    cmsStageSampleCLut16bit(mpe, OutputValueSampler, (void*) &sc, SAMPLER_INSPECT);
+
+    _cmsIOPrintf(m, PostMin);
+    _cmsIOPrintf(m, PostMaj);
+    _cmsIOPrintf(m, "] ");
+
+}
+
+
+// Dumps CIEBasedA Color Space Array
+
+static
+int EmitCIEBasedA(cmsIOHANDLER* m, cmsToneCurve* Curve, cmsCIEXYZ* BlackPoint)
+{
+
+    _cmsIOPrintf(m, "[ /CIEBasedA\n");
+    _cmsIOPrintf(m, "  <<\n");
+
+    _cmsIOPrintf(m, "/DecodeA ");
+
+    Emit1Gamma(m, Curve);
+
+    _cmsIOPrintf(m, " \n");
+
+    _cmsIOPrintf(m, "/MatrixA [ 0.9642 1.0000 0.8249 ]\n");
+    _cmsIOPrintf(m, "/RangeLMN [ 0.0 0.9642 0.0 1.0000 0.0 0.8249 ]\n");
+
+    EmitWhiteBlackD50(m, BlackPoint);
+    EmitIntent(m, INTENT_PERCEPTUAL);
+
+    _cmsIOPrintf(m, ">>\n");
+    _cmsIOPrintf(m, "]\n");
+
+    return 1;
+}
+
+
+// Dumps CIEBasedABC Color Space Array
+
+static
+int EmitCIEBasedABC(cmsIOHANDLER* m, cmsFloat64Number* Matrix, cmsToneCurve** CurveSet, cmsCIEXYZ* BlackPoint)
+{
+    int i;
+
+    _cmsIOPrintf(m, "[ /CIEBasedABC\n");
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "/DecodeABC [ ");
+
+    EmitNGamma(m, 3, CurveSet);
+
+    _cmsIOPrintf(m, "]\n");
+
+    _cmsIOPrintf(m, "/MatrixABC [ " );
+
+    for( i=0; i < 3; i++ ) {
+
+        _cmsIOPrintf(m, "%.6f %.6f %.6f ", Matrix[i + 3*0],
+                                           Matrix[i + 3*1],
+                                           Matrix[i + 3*2]);
+    }
+
+
+    _cmsIOPrintf(m, "]\n");
+
+    _cmsIOPrintf(m, "/RangeLMN [ 0.0 0.9642 0.0 1.0000 0.0 0.8249 ]\n");
+
+    EmitWhiteBlackD50(m, BlackPoint);
+    EmitIntent(m, INTENT_PERCEPTUAL);
+
+    _cmsIOPrintf(m, ">>\n");
+    _cmsIOPrintf(m, "]\n");
+
+
+    return 1;
+}
+
+
+static
+int EmitCIEBasedDEF(cmsIOHANDLER* m, cmsPipeline* Pipeline, cmsUInt32Number Intent, cmsCIEXYZ* BlackPoint)
+{
+    const char* PreMaj;
+    const char* PostMaj;
+    const char* PreMin, *PostMin;
+    cmsStage* mpe;
+
+    mpe = Pipeline ->Elements;
+
+    switch (cmsStageInputChannels(mpe)) {
+    case 3:
+
+            _cmsIOPrintf(m, "[ /CIEBasedDEF\n");
+            PreMaj ="<";
+            PostMaj= ">\n";
+            PreMin = PostMin = "";
+            break;
+    case 4:
+            _cmsIOPrintf(m, "[ /CIEBasedDEFG\n");
+            PreMaj = "[";
+            PostMaj = "]\n";
+            PreMin = "<";
+            PostMin = ">\n";
+            break;
+    default:
+            return 0;
+
+    }
+
+    _cmsIOPrintf(m, "<<\n");
+
+    if (cmsStageType(mpe) == cmsSigCurveSetElemType) {
+
+        _cmsIOPrintf(m, "/DecodeDEF [ ");
+        EmitNGamma(m, cmsStageOutputChannels(mpe), _cmsStageGetPtrToCurveSet(mpe));
+        _cmsIOPrintf(m, "]\n");
+
+        mpe = mpe ->Next;
+    }
+
+    if (cmsStageType(mpe) == cmsSigCLutElemType) {
+
+            _cmsIOPrintf(m, "/Table ");
+            WriteCLUT(m, mpe, PreMaj, PostMaj, PreMin, PostMin, FALSE, (cmsColorSpaceSignature) 0);
+            _cmsIOPrintf(m, "]\n");
+    }
+
+    EmitLab2XYZ(m);
+    EmitWhiteBlackD50(m, BlackPoint);
+    EmitIntent(m, Intent);
+
+    _cmsIOPrintf(m, "   >>\n");
+    _cmsIOPrintf(m, "]\n");
+
+    return 1;
+}
+
+// Generates a curve from a gray profile
+
+static
+cmsToneCurve* ExtractGray2Y(cmsContext ContextID, cmsHPROFILE hProfile, cmsUInt32Number Intent)
+{
+    cmsToneCurve* Out = cmsBuildTabulatedToneCurve16(ContextID, 256, NULL);
+    cmsHPROFILE hXYZ  = cmsCreateXYZProfile();
+    cmsHTRANSFORM xform = cmsCreateTransformTHR(ContextID, hProfile, TYPE_GRAY_8, hXYZ, TYPE_XYZ_DBL, Intent, cmsFLAGS_NOOPTIMIZE);
+    int i;
+
+    if (Out != NULL && xform != NULL) {
+        for (i=0; i < 256; i++) {
+
+            cmsUInt8Number Gray = (cmsUInt8Number) i;
+            cmsCIEXYZ XYZ;
+
+            cmsDoTransform(xform, &Gray, &XYZ, 1);
+
+            Out ->Table16[i] =_cmsQuickSaturateWord(XYZ.Y * 65535.0);
+        }
+    }
+
+    if (xform) cmsDeleteTransform(xform);
+    if (hXYZ) cmsCloseProfile(hXYZ);
+    return Out;
+}
+
+
+
+// Because PostScript has only 8 bits in /Table, we should use
+// a more perceptually uniform space... I do choose Lab.
+
+static
+int WriteInputLUT(cmsIOHANDLER* m, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hLab;
+    cmsHTRANSFORM xform;
+    cmsUInt32Number nChannels;
+    cmsUInt32Number InputFormat;
+    int rc;
+    cmsHPROFILE Profiles[2];
+    cmsCIEXYZ BlackPointAdaptedToD50;
+
+    // Does create a device-link based transform.
+    // The DeviceLink is next dumped as working CSA.
+
+    InputFormat = cmsFormatterForColorspaceOfProfile(hProfile, 2, FALSE);
+    nChannels   = T_CHANNELS(InputFormat);
+
+
+    cmsDetectBlackPoint(&BlackPointAdaptedToD50, hProfile, Intent, 0);
+
+    // Adjust output to Lab4
+    hLab = cmsCreateLab4ProfileTHR(m ->ContextID, NULL);
+
+    Profiles[0] = hProfile;
+    Profiles[1] = hLab;
+
+    xform = cmsCreateMultiprofileTransform(Profiles, 2,  InputFormat, TYPE_Lab_DBL, Intent, 0);
+    cmsCloseProfile(hLab);
+
+    if (xform == NULL) {
+
+        cmsSignalError(m ->ContextID, cmsERROR_COLORSPACE_CHECK, "Cannot create transform Profile -> Lab");
+        return 0;
+    }
+
+    // Only 1, 3 and 4 channels are allowed
+
+    switch (nChannels) {
+
+    case 1: {
+            cmsToneCurve* Gray2Y = ExtractGray2Y(m ->ContextID, hProfile, Intent);
+            EmitCIEBasedA(m, Gray2Y, &BlackPointAdaptedToD50);
+            cmsFreeToneCurve(Gray2Y);
+            }
+            break;
+
+    case 3:
+    case 4: {
+            cmsUInt32Number OutFrm = TYPE_Lab_16;
+            cmsPipeline* DeviceLink;
+            _cmsTRANSFORM* v = (_cmsTRANSFORM*) xform;
+
+            DeviceLink = cmsPipelineDup(v ->Lut);
+            if (DeviceLink == NULL) return 0;
+
+            dwFlags |= cmsFLAGS_FORCE_CLUT;
+            _cmsOptimizePipeline(m->ContextID, &DeviceLink, Intent, &InputFormat, &OutFrm, &dwFlags);
+
+            rc = EmitCIEBasedDEF(m, DeviceLink, Intent, &BlackPointAdaptedToD50);
+            cmsPipelineFree(DeviceLink);
+            if (rc == 0) return 0;
+            }
+            break;
+
+    default:
+
+        cmsSignalError(m ->ContextID, cmsERROR_COLORSPACE_CHECK, "Only 3, 4 channels supported for CSA. This profile has %d channels.", nChannels);
+        return 0;
+    }
+
+
+    cmsDeleteTransform(xform);
+
+    return 1;
+}
+
+static
+cmsFloat64Number* GetPtrToMatrix(const cmsStage* mpe)
+{
+    _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
+
+    return Data -> Double;
+}
+
+
+// Does create CSA based on matrix-shaper. Allowed types are gray and RGB based
+static
+int WriteInputMatrixShaper(cmsIOHANDLER* m, cmsHPROFILE hProfile, cmsStage* Matrix, cmsStage* Shaper)
+{
+    cmsColorSpaceSignature ColorSpace;
+    int rc;
+    cmsCIEXYZ BlackPointAdaptedToD50;
+
+    ColorSpace = cmsGetColorSpace(hProfile);
+
+    cmsDetectBlackPoint(&BlackPointAdaptedToD50, hProfile, INTENT_RELATIVE_COLORIMETRIC, 0);
+
+    if (ColorSpace == cmsSigGrayData) {
+
+        cmsToneCurve** ShaperCurve = _cmsStageGetPtrToCurveSet(Shaper);
+        rc = EmitCIEBasedA(m, ShaperCurve[0], &BlackPointAdaptedToD50);
+
+    }
+    else
+        if (ColorSpace == cmsSigRgbData) {
+
+            cmsMAT3 Mat;
+            int i, j;
+
+            memmove(&Mat, GetPtrToMatrix(Matrix), sizeof(Mat));
+
+            for (i = 0; i < 3; i++)
+                for (j = 0; j < 3; j++)
+                    Mat.v[i].n[j] *= MAX_ENCODEABLE_XYZ;
+
+            rc = EmitCIEBasedABC(m, (cmsFloat64Number *)&Mat,
+                _cmsStageGetPtrToCurveSet(Shaper),
+                &BlackPointAdaptedToD50);
+        }
+        else {
+
+            cmsSignalError(m->ContextID, cmsERROR_COLORSPACE_CHECK, "Profile is not suitable for CSA. Unsupported colorspace.");
+            return 0;
+        }
+
+        return rc;
+}
+
+
+
+// Creates a PostScript color list from a named profile data.
+// This is a HP extension, and it works in Lab instead of XYZ
+
+static
+int WriteNamedColorCSA(cmsIOHANDLER* m, cmsHPROFILE hNamedColor, cmsUInt32Number Intent)
+{
+    cmsHTRANSFORM xform;
+    cmsHPROFILE   hLab;
+    cmsUInt32Number i, nColors;
+    char ColorName[cmsMAX_PATH];
+    cmsNAMEDCOLORLIST* NamedColorList;
+
+    hLab  = cmsCreateLab4ProfileTHR(m ->ContextID, NULL);
+    xform = cmsCreateTransform(hNamedColor, TYPE_NAMED_COLOR_INDEX, hLab, TYPE_Lab_DBL, Intent, 0);
+    if (xform == NULL) return 0;
+
+    NamedColorList = cmsGetNamedColorList(xform);
+    if (NamedColorList == NULL) return 0;
+
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "(colorlistcomment) (%s)\n", "Named color CSA");
+    _cmsIOPrintf(m, "(Prefix) [ (Pantone ) (PANTONE ) ]\n");
+    _cmsIOPrintf(m, "(Suffix) [ ( CV) ( CVC) ( C) ]\n");
+
+    nColors   = cmsNamedColorCount(NamedColorList);
+
+
+    for (i=0; i < nColors; i++) {
+
+        cmsUInt16Number In[1];
+        cmsCIELab Lab;
+
+        In[0] = (cmsUInt16Number) i;
+
+        if (!cmsNamedColorInfo(NamedColorList, i, ColorName, NULL, NULL, NULL, NULL))
+                continue;
+
+        cmsDoTransform(xform, In, &Lab, 1);
+        _cmsIOPrintf(m, "  (%s) [ %.3f %.3f %.3f ]\n", ColorName, Lab.L, Lab.a, Lab.b);
+    }
+
+
+
+    _cmsIOPrintf(m, ">>\n");
+
+    cmsDeleteTransform(xform);
+    cmsCloseProfile(hLab);
+    return 1;
+}
+
+
+// Does create a Color Space Array on XYZ colorspace for PostScript usage
+static
+cmsUInt32Number GenerateCSA(cmsContext ContextID,
+                            cmsHPROFILE hProfile,
+                            cmsUInt32Number Intent,
+                            cmsUInt32Number dwFlags,
+                            cmsIOHANDLER* mem)
+{
+    cmsUInt32Number dwBytesUsed;
+    cmsPipeline* lut = NULL;
+    cmsStage* Matrix, *Shaper;
+
+
+    // Is a named color profile?
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        if (!WriteNamedColorCSA(mem, hProfile, Intent)) goto Error;
+    }
+    else {
+
+
+        // Any profile class are allowed (including devicelink), but
+        // output (PCS) colorspace must be XYZ or Lab
+        cmsColorSpaceSignature ColorSpace = cmsGetPCS(hProfile);
+
+        if (ColorSpace != cmsSigXYZData &&
+            ColorSpace != cmsSigLabData) {
+
+                cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "Invalid output color space");
+                goto Error;
+        }
+
+
+        // Read the lut with all necessary conversion stages
+        lut = _cmsReadInputLUT(hProfile, Intent);
+        if (lut == NULL) goto Error;
+
+
+        // Tone curves + matrix can be implemented without any LUT
+        if (cmsPipelineCheckAndRetreiveStages(lut, 2, cmsSigCurveSetElemType, cmsSigMatrixElemType, &Shaper, &Matrix)) {
+
+            if (!WriteInputMatrixShaper(mem, hProfile, Matrix, Shaper)) goto Error;
+
+        }
+        else {
+           // We need a LUT for the rest
+           if (!WriteInputLUT(mem, hProfile, Intent, dwFlags)) goto Error;
+        }
+    }
+
+
+    // Done, keep memory usage
+    dwBytesUsed = mem ->UsedSpace;
+
+    // Get rid of LUT
+    if (lut != NULL) cmsPipelineFree(lut);
+
+    // Finally, return used byte count
+    return dwBytesUsed;
+
+Error:
+    if (lut != NULL) cmsPipelineFree(lut);
+    return 0;
+}
+
+// ------------------------------------------------------ Color Rendering Dictionary (CRD)
+
+
+
+/*
+
+  Black point compensation plus chromatic adaptation:
+
+  Step 1 - Chromatic adaptation
+  =============================
+
+          WPout
+    X = ------- PQR
+          Wpin
+
+  Step 2 - Black point compensation
+  =================================
+
+          (WPout - BPout)*X - WPout*(BPin - BPout)
+    out = ---------------------------------------
+                        WPout - BPin
+
+
+  Algorithm discussion
+  ====================
+
+  TransformPQR(WPin, BPin, WPout, BPout, PQR)
+
+  Wpin,etc= { Xws Yws Zws Pws Qws Rws }
+
+
+  Algorithm             Stack 0...n
+  ===========================================================
+                        PQR BPout WPout BPin WPin
+  4 index 3 get         WPin PQR BPout WPout BPin WPin
+  div                   (PQR/WPin) BPout WPout BPin WPin
+  2 index 3 get         WPout (PQR/WPin) BPout WPout BPin WPin
+  mult                  WPout*(PQR/WPin) BPout WPout BPin WPin
+
+  2 index 3 get         WPout WPout*(PQR/WPin) BPout WPout BPin WPin
+  2 index 3 get         BPout WPout WPout*(PQR/WPin) BPout WPout BPin WPin
+  sub                   (WPout-BPout) WPout*(PQR/WPin) BPout WPout BPin WPin
+  mult                  (WPout-BPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+
+  2 index 3 get         WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  4 index 3 get         BPin WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  3 index 3 get         BPout BPin WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+
+  sub                   (BPin-BPout) WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  mult                  (BPin-BPout)*WPout (BPout-WPout)* WPout*(PQR/WPin) BPout WPout BPin WPin
+  sub                   (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+
+  3 index 3 get         BPin (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+  3 index 3 get         WPout BPin (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+  exch
+  sub                   (WPout-BPin) (BPout-WPout)* WPout*(PQR/WPin)-(BPin-BPout)*WPout BPout WPout BPin WPin
+  div
+
+  exch pop
+  exch pop
+  exch pop
+  exch pop
+
+*/
+
+
+static
+void EmitPQRStage(cmsIOHANDLER* m, cmsHPROFILE hProfile, int DoBPC, int lIsAbsolute)
+{
+
+
+        if (lIsAbsolute) {
+
+            // For absolute colorimetric intent, encode back to relative
+            // and generate a relative Pipeline
+
+            // Relative encoding is obtained across XYZpcs*(D50/WhitePoint)
+
+            cmsCIEXYZ White;
+
+            _cmsReadMediaWhitePoint(&White, hProfile);
+
+            _cmsIOPrintf(m,"/MatrixPQR [1 0 0 0 1 0 0 0 1 ]\n");
+            _cmsIOPrintf(m,"/RangePQR [ -0.5 2 -0.5 2 -0.5 2 ]\n");
+
+            _cmsIOPrintf(m, "%% Absolute colorimetric -- encode to relative to maximize LUT usage\n"
+                      "/TransformPQR [\n"
+                      "{0.9642 mul %g div exch pop exch pop exch pop exch pop} bind\n"
+                      "{1.0000 mul %g div exch pop exch pop exch pop exch pop} bind\n"
+                      "{0.8249 mul %g div exch pop exch pop exch pop exch pop} bind\n]\n",
+                      White.X, White.Y, White.Z);
+            return;
+        }
+
+
+        _cmsIOPrintf(m,"%% Bradford Cone Space\n"
+                 "/MatrixPQR [0.8951 -0.7502 0.0389 0.2664 1.7135 -0.0685 -0.1614 0.0367 1.0296 ] \n");
+
+        _cmsIOPrintf(m, "/RangePQR [ -0.5 2 -0.5 2 -0.5 2 ]\n");
+
+
+        // No BPC
+
+        if (!DoBPC) {
+
+            _cmsIOPrintf(m, "%% VonKries-like transform in Bradford Cone Space\n"
+                      "/TransformPQR [\n"
+                      "{exch pop exch 3 get mul exch pop exch 3 get div} bind\n"
+                      "{exch pop exch 4 get mul exch pop exch 4 get div} bind\n"
+                      "{exch pop exch 5 get mul exch pop exch 5 get div} bind\n]\n");
+        } else {
+
+            // BPC
+
+            _cmsIOPrintf(m, "%% VonKries-like transform in Bradford Cone Space plus BPC\n"
+                      "/TransformPQR [\n");
+
+            _cmsIOPrintf(m, "{4 index 3 get div 2 index 3 get mul "
+                    "2 index 3 get 2 index 3 get sub mul "
+                    "2 index 3 get 4 index 3 get 3 index 3 get sub mul sub "
+                    "3 index 3 get 3 index 3 get exch sub div "
+                    "exch pop exch pop exch pop exch pop } bind\n");
+
+            _cmsIOPrintf(m, "{4 index 4 get div 2 index 4 get mul "
+                    "2 index 4 get 2 index 4 get sub mul "
+                    "2 index 4 get 4 index 4 get 3 index 4 get sub mul sub "
+                    "3 index 4 get 3 index 4 get exch sub div "
+                    "exch pop exch pop exch pop exch pop } bind\n");
+
+            _cmsIOPrintf(m, "{4 index 5 get div 2 index 5 get mul "
+                    "2 index 5 get 2 index 5 get sub mul "
+                    "2 index 5 get 4 index 5 get 3 index 5 get sub mul sub "
+                    "3 index 5 get 3 index 5 get exch sub div "
+                    "exch pop exch pop exch pop exch pop } bind\n]\n");
+
+        }
+
+
+}
+
+
+static
+void EmitXYZ2Lab(cmsIOHANDLER* m)
+{
+    _cmsIOPrintf(m, "/RangeLMN [ -0.635 2.0 0 2 -0.635 2.0 ]\n");
+    _cmsIOPrintf(m, "/EncodeLMN [\n");
+    _cmsIOPrintf(m, "{ 0.964200  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind\n");
+    _cmsIOPrintf(m, "{ 1.000000  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind\n");
+    _cmsIOPrintf(m, "{ 0.824900  div dup 0.008856 le {7.787 mul 16 116 div add}{1 3 div exp} ifelse } bind\n");
+    _cmsIOPrintf(m, "]\n");
+    _cmsIOPrintf(m, "/MatrixABC [ 0 1 0 1 -1 1 0 0 -1 ]\n");
+    _cmsIOPrintf(m, "/EncodeABC [\n");
+
+
+    _cmsIOPrintf(m, "{ 116 mul  16 sub 100 div  } bind\n");
+    _cmsIOPrintf(m, "{ 500 mul 128 add 256 div  } bind\n");
+    _cmsIOPrintf(m, "{ 200 mul 128 add 256 div  } bind\n");
+
+
+    _cmsIOPrintf(m, "]\n");
+
+
+}
+
+// Due to impedance mismatch between XYZ and almost all RGB and CMYK spaces
+// I choose to dump LUTS in Lab instead of XYZ. There is still a lot of wasted
+// space on 3D CLUT, but since space seems not to be a problem here, 33 points
+// would give a reasonable accurancy. Note also that CRD tables must operate in
+// 8 bits.
+
+static
+int WriteOutputLUT(cmsIOHANDLER* m, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hLab;
+    cmsHTRANSFORM xform;
+    cmsUInt32Number i, nChannels;
+    cmsUInt32Number OutputFormat;
+    _cmsTRANSFORM* v;
+    cmsPipeline* DeviceLink;
+    cmsHPROFILE Profiles[3];
+    cmsCIEXYZ BlackPointAdaptedToD50;
+    cmsBool lDoBPC = (cmsBool) (dwFlags & cmsFLAGS_BLACKPOINTCOMPENSATION);
+    cmsBool lFixWhite = (cmsBool) !(dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP);
+    cmsUInt32Number InFrm = TYPE_Lab_16;
+    cmsUInt32Number RelativeEncodingIntent;
+    cmsColorSpaceSignature ColorSpace;
+
+
+    hLab = cmsCreateLab4ProfileTHR(m ->ContextID, NULL);
+    if (hLab == NULL) return 0;
+
+    OutputFormat = cmsFormatterForColorspaceOfProfile(hProfile, 2, FALSE);
+    nChannels    = T_CHANNELS(OutputFormat);
+
+    ColorSpace = cmsGetColorSpace(hProfile);
+
+    // For absolute colorimetric, the LUT is encoded as relative in order to preserve precision.
+
+    RelativeEncodingIntent = Intent;
+    if (RelativeEncodingIntent == INTENT_ABSOLUTE_COLORIMETRIC)
+        RelativeEncodingIntent = INTENT_RELATIVE_COLORIMETRIC;
+
+
+    // Use V4 Lab always
+    Profiles[0] = hLab;
+    Profiles[1] = hProfile;
+
+    xform = cmsCreateMultiprofileTransformTHR(m ->ContextID,
+                                              Profiles, 2, TYPE_Lab_DBL,
+                                              OutputFormat, RelativeEncodingIntent, 0);
+    cmsCloseProfile(hLab);
+
+    if (xform == NULL) {
+
+        cmsSignalError(m ->ContextID, cmsERROR_COLORSPACE_CHECK, "Cannot create transform Lab -> Profile in CRD creation");
+        return 0;
+    }
+
+    // Get a copy of the internal devicelink
+    v = (_cmsTRANSFORM*) xform;
+    DeviceLink = cmsPipelineDup(v ->Lut);
+    if (DeviceLink == NULL) return 0;
+
+
+    // We need a CLUT
+    dwFlags |= cmsFLAGS_FORCE_CLUT;
+    _cmsOptimizePipeline(m->ContextID, &DeviceLink, RelativeEncodingIntent, &InFrm, &OutputFormat, &dwFlags);
+
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "/ColorRenderingType 1\n");
+
+
+    cmsDetectBlackPoint(&BlackPointAdaptedToD50, hProfile, Intent, 0);
+
+    // Emit headers, etc.
+    EmitWhiteBlackD50(m, &BlackPointAdaptedToD50);
+    EmitPQRStage(m, hProfile, lDoBPC, Intent == INTENT_ABSOLUTE_COLORIMETRIC);
+    EmitXYZ2Lab(m);
+
+
+    // FIXUP: map Lab (100, 0, 0) to perfect white, because the particular encoding for Lab
+    // does map a=b=0 not falling into any specific node. Since range a,b goes -128..127,
+    // zero is slightly moved towards right, so assure next node (in L=100 slice) is mapped to
+    // zero. This would sacrifice a bit of highlights, but failure to do so would cause
+    // scum dot. Ouch.
+
+    if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
+            lFixWhite = FALSE;
+
+    _cmsIOPrintf(m, "/RenderTable ");
+
+
+    WriteCLUT(m, cmsPipelineGetPtrToFirstStage(DeviceLink), "<", ">\n", "", "", lFixWhite, ColorSpace);
+
+    _cmsIOPrintf(m, " %d {} bind ", nChannels);
+
+    for (i=1; i < nChannels; i++)
+            _cmsIOPrintf(m, "dup ");
+
+    _cmsIOPrintf(m, "]\n");
+
+
+    EmitIntent(m, Intent);
+
+    _cmsIOPrintf(m, ">>\n");
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+        _cmsIOPrintf(m, "/Current exch /ColorRendering defineresource pop\n");
+    }
+
+    cmsPipelineFree(DeviceLink);
+    cmsDeleteTransform(xform);
+
+    return 1;
+}
+
+
+// Builds a ASCII string containing colorant list in 0..1.0 range
+static
+void BuildColorantList(char *Colorant, cmsUInt32Number nColorant, cmsUInt16Number Out[])
+{
+    char Buff[32];
+    cmsUInt32Number j;
+
+    Colorant[0] = 0;
+    if (nColorant > cmsMAXCHANNELS)
+        nColorant = cmsMAXCHANNELS;
+
+    for (j = 0; j < nColorant; j++) {
+
+        snprintf(Buff, 31, "%.3f", Out[j] / 65535.0);
+        Buff[31] = 0;
+        strcat(Colorant, Buff);
+        if (j < nColorant - 1)
+            strcat(Colorant, " ");
+
+    }
+}
+
+
+// Creates a PostScript color list from a named profile data.
+// This is a HP extension.
+
+static
+int WriteNamedColorCRD(cmsIOHANDLER* m, cmsHPROFILE hNamedColor, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsHTRANSFORM xform;
+    cmsUInt32Number i, nColors, nColorant;
+    cmsUInt32Number OutputFormat;
+    char ColorName[cmsMAX_PATH];
+    char Colorant[128];
+    cmsNAMEDCOLORLIST* NamedColorList;
+
+
+    OutputFormat = cmsFormatterForColorspaceOfProfile(hNamedColor, 2, FALSE);
+    nColorant    = T_CHANNELS(OutputFormat);
+
+
+    xform = cmsCreateTransform(hNamedColor, TYPE_NAMED_COLOR_INDEX, NULL, OutputFormat, Intent, dwFlags);
+    if (xform == NULL) return 0;
+
+
+    NamedColorList = cmsGetNamedColorList(xform);
+    if (NamedColorList == NULL) return 0;
+
+    _cmsIOPrintf(m, "<<\n");
+    _cmsIOPrintf(m, "(colorlistcomment) (%s) \n", "Named profile");
+    _cmsIOPrintf(m, "(Prefix) [ (Pantone ) (PANTONE ) ]\n");
+    _cmsIOPrintf(m, "(Suffix) [ ( CV) ( CVC) ( C) ]\n");
+
+    nColors   = cmsNamedColorCount(NamedColorList);
+
+    for (i=0; i < nColors; i++) {
+
+        cmsUInt16Number In[1];
+        cmsUInt16Number Out[cmsMAXCHANNELS];
+
+        In[0] = (cmsUInt16Number) i;
+
+        if (!cmsNamedColorInfo(NamedColorList, i, ColorName, NULL, NULL, NULL, NULL))
+                continue;
+
+        cmsDoTransform(xform, In, Out, 1);
+        BuildColorantList(Colorant, nColorant, Out);
+        _cmsIOPrintf(m, "  (%s) [ %s ]\n", ColorName, Colorant);
+    }
+
+    _cmsIOPrintf(m, "   >>");
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+    _cmsIOPrintf(m, " /Current exch /HPSpotTable defineresource pop\n");
+    }
+
+    cmsDeleteTransform(xform);
+    return 1;
+}
+
+
+
+// This one does create a Color Rendering Dictionary.
+// CRD are always LUT-Based, no matter if profile is
+// implemented as matrix-shaper.
+
+static
+cmsUInt32Number  GenerateCRD(cmsContext ContextID,
+                             cmsHPROFILE hProfile,
+                             cmsUInt32Number Intent, cmsUInt32Number dwFlags,
+                             cmsIOHANDLER* mem)
+{
+    cmsUInt32Number dwBytesUsed;
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+        EmitHeader(mem, "Color Rendering Dictionary (CRD)", hProfile);
+    }
+
+
+    // Is a named color profile?
+    if (cmsGetDeviceClass(hProfile) == cmsSigNamedColorClass) {
+
+        if (!WriteNamedColorCRD(mem, hProfile, Intent, dwFlags)) {
+            return 0;
+        }
+    }
+    else {
+
+        // CRD are always implemented as LUT
+
+        if (!WriteOutputLUT(mem, hProfile, Intent, dwFlags)) {
+            return 0;
+        }
+    }
+
+    if (!(dwFlags & cmsFLAGS_NODEFAULTRESOURCEDEF)) {
+
+        _cmsIOPrintf(mem, "%%%%EndResource\n");
+        _cmsIOPrintf(mem, "\n%% CRD End\n");
+    }
+
+    // Done, keep memory usage
+    dwBytesUsed = mem ->UsedSpace;
+
+    // Finally, return used byte count
+    return dwBytesUsed;
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+
+
+
+cmsUInt32Number CMSEXPORT cmsGetPostScriptColorResource(cmsContext ContextID,
+                                                               cmsPSResourceType Type,
+                                                               cmsHPROFILE hProfile,
+                                                               cmsUInt32Number Intent,
+                                                               cmsUInt32Number dwFlags,
+                                                               cmsIOHANDLER* io)
+{
+    cmsUInt32Number  rc;
+
+
+    switch (Type) {
+
+        case cmsPS_RESOURCE_CSA:
+            rc = GenerateCSA(ContextID, hProfile, Intent, dwFlags, io);
+            break;
+
+        default:
+        case cmsPS_RESOURCE_CRD:
+            rc = GenerateCRD(ContextID, hProfile, Intent, dwFlags, io);
+            break;
+    }
+
+    return rc;
+}
+
+
+
+cmsUInt32Number CMSEXPORT cmsGetPostScriptCRD(cmsContext ContextID,
+                              cmsHPROFILE hProfile,
+                              cmsUInt32Number Intent, cmsUInt32Number dwFlags,
+                              void* Buffer, cmsUInt32Number dwBufferLen)
+{
+    cmsIOHANDLER* mem;
+    cmsUInt32Number dwBytesUsed;
+
+    // Set up the serialization engine
+    if (Buffer == NULL)
+        mem = cmsOpenIOhandlerFromNULL(ContextID);
+    else
+        mem = cmsOpenIOhandlerFromMem(ContextID, Buffer, dwBufferLen, "w");
+
+    if (!mem) return 0;
+
+    dwBytesUsed =  cmsGetPostScriptColorResource(ContextID, cmsPS_RESOURCE_CRD, hProfile, Intent, dwFlags, mem);
+
+    // Get rid of memory stream
+    cmsCloseIOhandler(mem);
+
+    return dwBytesUsed;
+}
+
+
+
+// Does create a Color Space Array on XYZ colorspace for PostScript usage
+cmsUInt32Number CMSEXPORT cmsGetPostScriptCSA(cmsContext ContextID,
+                                              cmsHPROFILE hProfile,
+                                              cmsUInt32Number Intent,
+                                              cmsUInt32Number dwFlags,
+                                              void* Buffer,
+                                              cmsUInt32Number dwBufferLen)
+{
+    cmsIOHANDLER* mem;
+    cmsUInt32Number dwBytesUsed;
+
+    if (Buffer == NULL)
+        mem = cmsOpenIOhandlerFromNULL(ContextID);
+    else
+        mem = cmsOpenIOhandlerFromMem(ContextID, Buffer, dwBufferLen, "w");
+
+    if (!mem) return 0;
+
+    dwBytesUsed =  cmsGetPostScriptColorResource(ContextID, cmsPS_RESOURCE_CSA, hProfile, Intent, dwFlags, mem);
+
+    // Get rid of memory stream
+    cmsCloseIOhandler(mem);
+
+    return dwBytesUsed;
+
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmssamp.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmssamp.cpp
new file mode 100755
index 0000000000..1fc5f5d467
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmssamp.cpp
@@ -0,0 +1,547 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+#define cmsmin(a, b) (((a) < (b)) ? (a) : (b))
+#define cmsmax(a, b) (((a) > (b)) ? (a) : (b))
+
+// This file contains routines for resampling and LUT optimization, black point detection
+// and black preservation.
+
+// Black point detection -------------------------------------------------------------------------
+
+
+// PCS -> PCS round trip transform, always uses relative intent on the device -> pcs
+static
+cmsHTRANSFORM CreateRoundtripXForm(cmsHPROFILE hProfile, cmsUInt32Number nIntent)
+{
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsHPROFILE hLab = cmsCreateLab4ProfileTHR(ContextID, NULL);
+    cmsHTRANSFORM xform;
+    cmsBool BPC[4] = { FALSE, FALSE, FALSE, FALSE };
+    cmsFloat64Number States[4] = { 1.0, 1.0, 1.0, 1.0 };
+    cmsHPROFILE hProfiles[4];
+    cmsUInt32Number Intents[4];
+
+    hProfiles[0] = hLab; hProfiles[1] = hProfile; hProfiles[2] = hProfile; hProfiles[3] = hLab;
+    Intents[0]   = INTENT_RELATIVE_COLORIMETRIC; Intents[1] = nIntent; Intents[2] = INTENT_RELATIVE_COLORIMETRIC; Intents[3] = INTENT_RELATIVE_COLORIMETRIC;
+
+    xform =  cmsCreateExtendedTransform(ContextID, 4, hProfiles, BPC, Intents,
+        States, NULL, 0, TYPE_Lab_DBL, TYPE_Lab_DBL, cmsFLAGS_NOCACHE|cmsFLAGS_NOOPTIMIZE);
+
+    cmsCloseProfile(hLab);
+    return xform;
+}
+
+// Use darker colorants to obtain black point. This works in the relative colorimetric intent and
+// assumes more ink results in darker colors. No ink limit is assumed.
+static
+cmsBool  BlackPointAsDarkerColorant(cmsHPROFILE    hInput,
+                                    cmsUInt32Number Intent,
+                                    cmsCIEXYZ* BlackPoint,
+                                    cmsUInt32Number dwFlags)
+{
+    cmsUInt16Number *Black;
+    cmsHTRANSFORM xform;
+    cmsColorSpaceSignature Space;
+    cmsUInt32Number nChannels;
+    cmsUInt32Number dwFormat;
+    cmsHPROFILE hLab;
+    cmsCIELab  Lab;
+    cmsCIEXYZ  BlackXYZ;
+    cmsContext ContextID = cmsGetProfileContextID(hInput);
+
+    // If the profile does not support input direction, assume Black point 0
+    if (!cmsIsIntentSupported(hInput, Intent, LCMS_USED_AS_INPUT)) {
+
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    // Create a formatter which has n channels and floating point
+    dwFormat = cmsFormatterForColorspaceOfProfile(hInput, 2, FALSE);
+
+   // Try to get black by using black colorant
+    Space = cmsGetColorSpace(hInput);
+
+    // This function returns darker colorant in 16 bits for several spaces
+    if (!_cmsEndPointsBySpace(Space, NULL, &Black, &nChannels)) {
+
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    if (nChannels != T_CHANNELS(dwFormat)) {
+       BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+       return FALSE;
+    }
+
+    // Lab will be used as the output space, but lab2 will avoid recursion
+    hLab = cmsCreateLab2ProfileTHR(ContextID, NULL);
+    if (hLab == NULL) {
+       BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+       return FALSE;
+    }
+
+    // Create the transform
+    xform = cmsCreateTransformTHR(ContextID, hInput, dwFormat,
+                                hLab, TYPE_Lab_DBL, Intent, cmsFLAGS_NOOPTIMIZE|cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hLab);
+
+    if (xform == NULL) {
+
+        // Something went wrong. Get rid of open resources and return zero as black
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    // Convert black to Lab
+    cmsDoTransform(xform, Black, &Lab, 1);
+
+    // Force it to be neutral, clip to max. L* of 50
+    Lab.a = Lab.b = 0;
+    if (Lab.L > 50) Lab.L = 50;
+
+    // Free the resources
+    cmsDeleteTransform(xform);
+
+    // Convert from Lab (which is now clipped) to XYZ.
+    cmsLab2XYZ(NULL, &BlackXYZ, &Lab);
+
+    if (BlackPoint != NULL)
+        *BlackPoint = BlackXYZ;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+// Get a black point of output CMYK profile, discounting any ink-limiting embedded
+// in the profile. For doing that, we use perceptual intent in input direction:
+// Lab (0, 0, 0) -> [Perceptual] Profile -> CMYK -> [Rel. colorimetric] Profile -> Lab
+static
+cmsBool BlackPointUsingPerceptualBlack(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile)
+{
+    cmsHTRANSFORM hRoundTrip;
+    cmsCIELab LabIn, LabOut;
+    cmsCIEXYZ  BlackXYZ;
+
+     // Is the intent supported by the profile?
+    if (!cmsIsIntentSupported(hProfile, INTENT_PERCEPTUAL, LCMS_USED_AS_INPUT)) {
+
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return TRUE;
+    }
+
+    hRoundTrip = CreateRoundtripXForm(hProfile, INTENT_PERCEPTUAL);
+    if (hRoundTrip == NULL) {
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+    LabIn.L = LabIn.a = LabIn.b = 0;
+    cmsDoTransform(hRoundTrip, &LabIn, &LabOut, 1);
+
+    // Clip Lab to reasonable limits
+    if (LabOut.L > 50) LabOut.L = 50;
+    LabOut.a = LabOut.b = 0;
+
+    cmsDeleteTransform(hRoundTrip);
+
+    // Convert it to XYZ
+    cmsLab2XYZ(NULL, &BlackXYZ, &LabOut);
+
+    if (BlackPoint != NULL)
+        *BlackPoint = BlackXYZ;
+
+    return TRUE;
+}
+
+// This function shouldn't exist at all -- there is such quantity of broken
+// profiles on black point tag, that we must somehow fix chromaticity to
+// avoid huge tint when doing Black point compensation. This function does
+// just that. There is a special flag for using black point tag, but turned
+// off by default because it is bogus on most profiles. The detection algorithm
+// involves to turn BP to neutral and to use only L component.
+cmsBool CMSEXPORT cmsDetectBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsProfileClassSignature devClass;
+
+    // Make sure the device class is adequate
+    devClass = cmsGetDeviceClass(hProfile);
+    if (devClass == cmsSigLinkClass ||
+        devClass == cmsSigAbstractClass ||
+        devClass == cmsSigNamedColorClass) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+    // Make sure intent is adequate
+    if (Intent != INTENT_PERCEPTUAL &&
+        Intent != INTENT_RELATIVE_COLORIMETRIC &&
+        Intent != INTENT_SATURATION) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+    // v4 + perceptual & saturation intents does have its own black point, and it is
+    // well specified enough to use it. Black point tag is deprecated in V4.
+    if ((cmsGetEncodedICCversion(hProfile) >= 0x4000000) &&
+        (Intent == INTENT_PERCEPTUAL || Intent == INTENT_SATURATION)) {
+
+            // Matrix shaper share MRC & perceptual intents
+            if (cmsIsMatrixShaper(hProfile))
+                return BlackPointAsDarkerColorant(hProfile, INTENT_RELATIVE_COLORIMETRIC, BlackPoint, 0);
+
+            // Get Perceptual black out of v4 profiles. That is fixed for perceptual & saturation intents
+            BlackPoint -> X = cmsPERCEPTUAL_BLACK_X;
+            BlackPoint -> Y = cmsPERCEPTUAL_BLACK_Y;
+            BlackPoint -> Z = cmsPERCEPTUAL_BLACK_Z;
+
+            return TRUE;
+    }
+
+
+#ifdef CMS_USE_PROFILE_BLACK_POINT_TAG
+
+    // v2, v4 rel/abs colorimetric
+    if (cmsIsTag(hProfile, cmsSigMediaBlackPointTag) &&
+        Intent == INTENT_RELATIVE_COLORIMETRIC) {
+
+            cmsCIEXYZ *BlackPtr, BlackXYZ, UntrustedBlackPoint, TrustedBlackPoint, MediaWhite;
+            cmsCIELab Lab;
+
+            // If black point is specified, then use it,
+
+            BlackPtr = cmsReadTag(hProfile, cmsSigMediaBlackPointTag);
+            if (BlackPtr != NULL) {
+
+                BlackXYZ = *BlackPtr;
+                _cmsReadMediaWhitePoint(&MediaWhite, hProfile);
+
+                // Black point is absolute XYZ, so adapt to D50 to get PCS value
+                cmsAdaptToIlluminant(&UntrustedBlackPoint, &MediaWhite, cmsD50_XYZ(), &BlackXYZ);
+
+                // Force a=b=0 to get rid of any chroma
+                cmsXYZ2Lab(NULL, &Lab, &UntrustedBlackPoint);
+                Lab.a = Lab.b = 0;
+                if (Lab.L > 50) Lab.L = 50; // Clip to L* <= 50
+                cmsLab2XYZ(NULL, &TrustedBlackPoint, &Lab);
+
+                if (BlackPoint != NULL)
+                    *BlackPoint = TrustedBlackPoint;
+
+                return TRUE;
+            }
+    }
+#endif
+
+    // That is about v2 profiles.
+
+    // If output profile, discount ink-limiting and that's all
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC &&
+        (cmsGetDeviceClass(hProfile) == cmsSigOutputClass) &&
+        (cmsGetColorSpace(hProfile)  == cmsSigCmykData))
+        return BlackPointUsingPerceptualBlack(BlackPoint, hProfile);
+
+    // Nope, compute BP using current intent.
+    return BlackPointAsDarkerColorant(hProfile, Intent, BlackPoint, dwFlags);
+}
+
+
+
+// ---------------------------------------------------------------------------------------------------------
+
+// Least Squares Fit of a Quadratic Curve to Data
+// http://www.personal.psu.edu/jhm/f90/lectures/lsq2.html
+
+static
+cmsFloat64Number RootOfLeastSquaresFitQuadraticCurve(int n, cmsFloat64Number x[], cmsFloat64Number y[])
+{
+    double sum_x = 0, sum_x2 = 0, sum_x3 = 0, sum_x4 = 0;
+    double sum_y = 0, sum_yx = 0, sum_yx2 = 0;
+    double d, a, b, c;
+    int i;
+    cmsMAT3 m;
+    cmsVEC3 v, res;
+
+    if (n < 4) return 0;
+
+    for (i=0; i < n; i++) {
+
+        double xn = x[i];
+        double yn = y[i];
+
+        sum_x  += xn;
+        sum_x2 += xn*xn;
+        sum_x3 += xn*xn*xn;
+        sum_x4 += xn*xn*xn*xn;
+
+        sum_y += yn;
+        sum_yx += yn*xn;
+        sum_yx2 += yn*xn*xn;
+    }
+
+    _cmsVEC3init(&m.v[0], n,      sum_x,  sum_x2);
+    _cmsVEC3init(&m.v[1], sum_x,  sum_x2, sum_x3);
+    _cmsVEC3init(&m.v[2], sum_x2, sum_x3, sum_x4);
+
+    _cmsVEC3init(&v, sum_y, sum_yx, sum_yx2);
+
+    if (!_cmsMAT3solve(&res, &m, &v)) return 0;
+
+      
+    a = res.n[2];
+    b = res.n[1];
+    c = res.n[0];
+
+    if (fabs(a) < 1.0E-10) {
+    
+        return cmsmin(0, cmsmax(50, -c/b ));
+    }
+    else {
+
+         d = b*b - 4.0 * a * c;
+         if (d <= 0) {
+             return 0;
+         }
+         else {
+
+             double rt = (-b + sqrt(d)) / (2.0 * a);
+
+             return cmsmax(0, cmsmin(50, rt));
+         }
+   }
+
+}
+
+
+
+// Calculates the black point of a destination profile.
+// This algorithm comes from the Adobe paper disclosing its black point compensation method.
+cmsBool CMSEXPORT cmsDetectDestinationBlackPoint(cmsCIEXYZ* BlackPoint, cmsHPROFILE hProfile, cmsUInt32Number Intent, cmsUInt32Number dwFlags)
+{
+    cmsColorSpaceSignature ColorSpace;
+    cmsHTRANSFORM hRoundTrip = NULL;
+    cmsCIELab InitialLab, destLab, Lab;
+    cmsFloat64Number inRamp[256], outRamp[256];
+    cmsFloat64Number MinL, MaxL;
+    cmsBool NearlyStraightMidrange = TRUE;  
+    cmsFloat64Number yRamp[256];
+    cmsFloat64Number x[256], y[256];
+    cmsFloat64Number lo, hi;
+    int n, l;
+    cmsProfileClassSignature devClass;
+
+    // Make sure the device class is adequate
+    devClass = cmsGetDeviceClass(hProfile);
+    if (devClass == cmsSigLinkClass ||
+        devClass == cmsSigAbstractClass ||
+        devClass == cmsSigNamedColorClass) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+    // Make sure intent is adequate
+    if (Intent != INTENT_PERCEPTUAL &&
+        Intent != INTENT_RELATIVE_COLORIMETRIC &&
+        Intent != INTENT_SATURATION) {
+            BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+            return FALSE;
+    }
+
+
+    // v4 + perceptual & saturation intents does have its own black point, and it is
+    // well specified enough to use it. Black point tag is deprecated in V4.
+    if ((cmsGetEncodedICCversion(hProfile) >= 0x4000000) &&
+        (Intent == INTENT_PERCEPTUAL || Intent == INTENT_SATURATION)) {
+
+            // Matrix shaper share MRC & perceptual intents
+            if (cmsIsMatrixShaper(hProfile))
+                return BlackPointAsDarkerColorant(hProfile, INTENT_RELATIVE_COLORIMETRIC, BlackPoint, 0);
+
+            // Get Perceptual black out of v4 profiles. That is fixed for perceptual & saturation intents
+            BlackPoint -> X = cmsPERCEPTUAL_BLACK_X;
+            BlackPoint -> Y = cmsPERCEPTUAL_BLACK_Y;
+            BlackPoint -> Z = cmsPERCEPTUAL_BLACK_Z;
+            return TRUE;
+    }
+
+
+    // Check if the profile is lut based and gray, rgb or cmyk (7.2 in Adobe's document)
+    ColorSpace = cmsGetColorSpace(hProfile);
+    if (!cmsIsCLUT(hProfile, Intent, LCMS_USED_AS_OUTPUT ) ||
+        (ColorSpace != cmsSigGrayData &&
+         ColorSpace != cmsSigRgbData  &&
+         ColorSpace != cmsSigCmykData)) {
+
+        // In this case, handle as input case
+        return cmsDetectBlackPoint(BlackPoint, hProfile, Intent, dwFlags);
+    }
+
+    // It is one of the valid cases!, use Adobe algorithm
+
+    
+    // Set a first guess, that should work on good profiles.
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC) {
+
+        cmsCIEXYZ IniXYZ;
+
+        // calculate initial Lab as source black point
+        if (!cmsDetectBlackPoint(&IniXYZ, hProfile, Intent, dwFlags)) {
+            return FALSE;
+        }
+
+        // convert the XYZ to lab
+        cmsXYZ2Lab(NULL, &InitialLab, &IniXYZ);
+
+    } else {
+
+        // set the initial Lab to zero, that should be the black point for perceptual and saturation
+        InitialLab.L = 0;
+        InitialLab.a = 0;
+        InitialLab.b = 0;
+    }
+
+
+    // Step 2
+    // ======
+
+    // Create a roundtrip. Define a Transform BT for all x in L*a*b*
+    hRoundTrip = CreateRoundtripXForm(hProfile, Intent);
+    if (hRoundTrip == NULL)  return FALSE;
+
+    // Compute ramps
+
+    for (l=0; l < 256; l++) {
+
+        Lab.L = (cmsFloat64Number) (l * 100.0) / 255.0;
+        Lab.a = cmsmin(50, cmsmax(-50, InitialLab.a));
+        Lab.b = cmsmin(50, cmsmax(-50, InitialLab.b));
+
+        cmsDoTransform(hRoundTrip, &Lab, &destLab, 1);
+
+        inRamp[l]  = Lab.L;
+        outRamp[l] = destLab.L;
+    }
+
+    // Make monotonic
+    for (l = 254; l > 0; --l) {
+        outRamp[l] = cmsmin(outRamp[l], outRamp[l+1]);
+    }
+
+    // Check
+    if (! (outRamp[0] < outRamp[255])) {
+
+        cmsDeleteTransform(hRoundTrip);
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+
+    // Test for mid range straight (only on relative colorimetric)
+    NearlyStraightMidrange = TRUE;
+    MinL = outRamp[0]; MaxL = outRamp[255];
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC) {
+      
+        for (l=0; l < 256; l++) {
+
+            if (! ((inRamp[l] <= MinL + 0.2 * (MaxL - MinL) ) ||   
+                (fabs(inRamp[l] - outRamp[l]) < 4.0 )))
+                NearlyStraightMidrange = FALSE;
+        }
+
+        // If the mid range is straight (as determined above) then the 
+        // DestinationBlackPoint shall be the same as initialLab. 
+        // Otherwise, the DestinationBlackPoint shall be determined 
+        // using curve fitting.
+        if (NearlyStraightMidrange) {
+
+            cmsLab2XYZ(NULL, BlackPoint, &InitialLab);
+            cmsDeleteTransform(hRoundTrip);
+            return TRUE;
+        }
+    }
+
+ 
+    // curve fitting: The round-trip curve normally looks like a nearly constant section at the black point,
+    // with a corner and a nearly straight line to the white point.  
+    for (l=0; l < 256; l++) {
+    
+        yRamp[l] = (outRamp[l] - MinL) / (MaxL - MinL);
+    }
+
+    // find the black point using the least squares error quadratic curve fitting
+    if (Intent == INTENT_RELATIVE_COLORIMETRIC) {
+        lo = 0.1;
+        hi = 0.5;
+    }
+    else {
+
+        // Perceptual and saturation
+        lo = 0.03;
+        hi = 0.25;
+    }
+
+    // Capture shadow points for the fitting.
+    n = 0;
+    for (l=0; l < 256; l++) {
+    
+        cmsFloat64Number ff = yRamp[l];
+
+        if (ff >= lo && ff < hi) {
+            x[n] = inRamp[l];
+            y[n] = yRamp[l];
+            n++;
+        }    
+    }
+
+    
+    // No suitable points
+    if (n < 3 ) {
+        cmsDeleteTransform(hRoundTrip);
+        BlackPoint -> X = BlackPoint ->Y = BlackPoint -> Z = 0.0;
+        return FALSE;
+    }
+
+  
+    // fit and get the vertex of quadratic curve
+    Lab.L = RootOfLeastSquaresFitQuadraticCurve(n, x, y);
+
+    if (Lab.L < 0.0) { // clip to zero L* if the vertex is negative
+        Lab.L = 0;
+    }
+
+    Lab.a = InitialLab.a;
+    Lab.b = InitialLab.b;
+
+    cmsLab2XYZ(NULL, BlackPoint, &Lab);
+
+    cmsDeleteTransform(hRoundTrip);
+    return TRUE;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmssm.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmssm.cpp
new file mode 100755
index 0000000000..a0fdbc86c5
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmssm.cpp
@@ -0,0 +1,736 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// ------------------------------------------------------------------------
+
+// Gamut boundary description by using Jan Morovic's Segment maxima method
+// Many thanks to Jan for allowing me to use his algorithm.
+
+// r = C*
+// alpha = Hab
+// theta = L*
+
+#define SECTORS 16      // number of divisions in alpha and theta
+
+// Spherical coordinates
+typedef struct {
+
+    cmsFloat64Number r;
+    cmsFloat64Number alpha;
+    cmsFloat64Number theta;
+
+} cmsSpherical;
+
+typedef  enum {
+        GP_EMPTY,
+        GP_SPECIFIED,
+        GP_MODELED
+
+    } GDBPointType;
+
+
+typedef struct {
+
+    GDBPointType Type;
+    cmsSpherical p;         // Keep also alpha & theta of maximum
+
+} cmsGDBPoint;
+
+
+typedef struct {
+
+    cmsContext ContextID;
+    cmsGDBPoint Gamut[SECTORS][SECTORS];
+
+} cmsGDB;
+
+
+// A line using the parametric form
+// P = a + t*u
+typedef struct {
+
+    cmsVEC3 a;
+    cmsVEC3 u;
+
+} cmsLine;
+
+
+// A plane using the parametric form
+// Q = b + r*v + s*w
+typedef struct {
+
+    cmsVEC3 b;
+    cmsVEC3 v;
+    cmsVEC3 w;
+
+} cmsPlane;
+
+
+
+// --------------------------------------------------------------------------------------------
+
+// ATAN2() which always returns degree positive numbers
+
+static
+cmsFloat64Number _cmsAtan2(cmsFloat64Number y, cmsFloat64Number x)
+{
+    cmsFloat64Number a;
+
+    // Deal with undefined case
+    if (x == 0.0 && y == 0.0) return 0;
+
+    a = (atan2(y, x) * 180.0) / M_PI;
+
+    while (a < 0) {
+        a += 360;
+    }
+
+    return a;
+}
+
+// Convert to spherical coordinates
+static
+void ToSpherical(cmsSpherical* sp, const cmsVEC3* v)
+{
+
+    cmsFloat64Number L, a, b;
+
+    L = v ->n[VX];
+    a = v ->n[VY];
+    b = v ->n[VZ];
+
+    sp ->r = sqrt( L*L + a*a + b*b );
+
+   if (sp ->r == 0) {
+        sp ->alpha = sp ->theta = 0;
+        return;
+    }
+
+    sp ->alpha = _cmsAtan2(a, b);
+    sp ->theta = _cmsAtan2(sqrt(a*a + b*b), L);
+}
+
+
+// Convert to cartesian from spherical
+static
+void ToCartesian(cmsVEC3* v, const cmsSpherical* sp)
+{
+    cmsFloat64Number sin_alpha;
+    cmsFloat64Number cos_alpha;
+    cmsFloat64Number sin_theta;
+    cmsFloat64Number cos_theta;
+    cmsFloat64Number L, a, b;
+
+    sin_alpha = sin((M_PI * sp ->alpha) / 180.0);
+    cos_alpha = cos((M_PI * sp ->alpha) / 180.0);
+    sin_theta = sin((M_PI * sp ->theta) / 180.0);
+    cos_theta = cos((M_PI * sp ->theta) / 180.0);
+
+    a = sp ->r * sin_theta * sin_alpha;
+    b = sp ->r * sin_theta * cos_alpha;
+    L = sp ->r * cos_theta;
+
+    v ->n[VX] = L;
+    v ->n[VY] = a;
+    v ->n[VZ] = b;
+}
+
+
+// Quantize sector of a spherical coordinate. Saturate 360, 180 to last sector
+// The limits are the centers of each sector, so
+static
+void QuantizeToSector(const cmsSpherical* sp, int* alpha, int* theta)
+{
+    *alpha = (int) floor(((sp->alpha * (SECTORS)) / 360.0) );
+    *theta = (int) floor(((sp->theta * (SECTORS)) / 180.0) );
+
+    if (*alpha >= SECTORS)
+        *alpha = SECTORS-1;
+    if (*theta >= SECTORS)
+        *theta = SECTORS-1;
+}
+
+
+// Line determined by 2 points
+static
+void LineOf2Points(cmsLine* line, cmsVEC3* a, cmsVEC3* b)
+{
+
+    _cmsVEC3init(&line ->a, a ->n[VX], a ->n[VY], a ->n[VZ]);
+    _cmsVEC3init(&line ->u, b ->n[VX] - a ->n[VX],
+                            b ->n[VY] - a ->n[VY],
+                            b ->n[VZ] - a ->n[VZ]);
+}
+
+
+// Evaluate parametric line
+static
+void GetPointOfLine(cmsVEC3* p, const cmsLine* line, cmsFloat64Number t)
+{
+    p ->n[VX] = line ->a.n[VX] + t * line->u.n[VX];
+    p ->n[VY] = line ->a.n[VY] + t * line->u.n[VY];
+    p ->n[VZ] = line ->a.n[VZ] + t * line->u.n[VZ];
+}
+
+
+
+/*
+    Closest point in sector line1 to sector line2 (both are defined as 0 <=t <= 1)
+    http://softsurfer.com/Archive/algorithm_0106/algorithm_0106.htm
+
+    Copyright 2001, softSurfer (www.softsurfer.com)
+    This code may be freely used and modified for any purpose
+    providing that this copyright notice is included with it.
+    SoftSurfer makes no warranty for this code, and cannot be held
+    liable for any real or imagined damage resulting from its use.
+    Users of this code must verify correctness for their application.
+
+*/
+
+static
+cmsBool ClosestLineToLine(cmsVEC3* r, const cmsLine* line1, const cmsLine* line2)
+{
+    cmsFloat64Number a, b, c, d, e, D;
+    cmsFloat64Number sc, sN, sD;
+    //cmsFloat64Number tc; // left for future use
+    cmsFloat64Number tN, tD;
+    cmsVEC3 w0;
+
+    _cmsVEC3minus(&w0, &line1 ->a, &line2 ->a);
+
+    a  = _cmsVEC3dot(&line1 ->u, &line1 ->u);
+    b  = _cmsVEC3dot(&line1 ->u, &line2 ->u);
+    c  = _cmsVEC3dot(&line2 ->u, &line2 ->u);
+    d  = _cmsVEC3dot(&line1 ->u, &w0);
+    e  = _cmsVEC3dot(&line2 ->u, &w0);
+
+    D  = a*c - b * b;      // Denominator
+    sD = tD = D;           // default sD = D >= 0
+
+    if (D <  MATRIX_DET_TOLERANCE) {   // the lines are almost parallel
+
+        sN = 0.0;        // force using point P0 on segment S1
+        sD = 1.0;        // to prevent possible division by 0.0 later
+        tN = e;
+        tD = c;
+    }
+    else {                // get the closest points on the infinite lines
+
+        sN = (b*e - c*d);
+        tN = (a*e - b*d);
+
+        if (sN < 0.0) {       // sc < 0 => the s=0 edge is visible
+
+            sN = 0.0;
+            tN = e;
+            tD = c;
+        }
+        else if (sN > sD) {   // sc > 1 => the s=1 edge is visible
+            sN = sD;
+            tN = e + b;
+            tD = c;
+        }
+    }
+
+    if (tN < 0.0) {           // tc < 0 => the t=0 edge is visible
+
+        tN = 0.0;
+        // recompute sc for this edge
+        if (-d < 0.0)
+            sN = 0.0;
+        else if (-d > a)
+            sN = sD;
+        else {
+            sN = -d;
+            sD = a;
+        }
+    }
+    else if (tN > tD) {      // tc > 1 => the t=1 edge is visible
+
+        tN = tD;
+
+        // recompute sc for this edge
+        if ((-d + b) < 0.0)
+            sN = 0;
+        else if ((-d + b) > a)
+            sN = sD;
+        else {
+            sN = (-d + b);
+            sD = a;
+        }
+    }
+    // finally do the division to get sc and tc
+    sc = (fabs(sN) < MATRIX_DET_TOLERANCE ? 0.0 : sN / sD);
+    //tc = (fabs(tN) < MATRIX_DET_TOLERANCE ? 0.0 : tN / tD); // left for future use.
+
+    GetPointOfLine(r, line1, sc);
+    return TRUE;
+}
+
+
+
+// ------------------------------------------------------------------ Wrapper
+
+
+// Allocate & free structure
+cmsHANDLE  CMSEXPORT cmsGBDAlloc(cmsContext ContextID)
+{
+    cmsGDB* gbd = (cmsGDB*) _cmsMallocZero(ContextID, sizeof(cmsGDB));
+    if (gbd == NULL) return NULL;
+
+    gbd -> ContextID = ContextID;
+
+    return (cmsHANDLE) gbd;
+}
+
+
+void CMSEXPORT cmsGBDFree(cmsHANDLE hGBD)
+{
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    if (hGBD != NULL)
+        _cmsFree(gbd->ContextID, (void*) gbd);
+}
+
+
+// Auxiliary to retrieve a pointer to the segmentr containing the Lab value
+static
+cmsGDBPoint* GetPoint(cmsGDB* gbd, const cmsCIELab* Lab, cmsSpherical* sp)
+{
+    cmsVEC3 v;
+    int alpha, theta;
+
+    // Housekeeping
+    _cmsAssert(gbd != NULL);
+    _cmsAssert(Lab != NULL);
+    _cmsAssert(sp != NULL);
+
+    // Center L* by subtracting half of its domain, that's 50
+    _cmsVEC3init(&v, Lab ->L - 50.0, Lab ->a, Lab ->b);
+
+    // Convert to spherical coordinates
+    ToSpherical(sp, &v);
+
+    if (sp ->r < 0 || sp ->alpha < 0 || sp->theta < 0) {
+         cmsSignalError(gbd ->ContextID, cmsERROR_RANGE, "spherical value out of range");
+         return NULL;
+    }
+
+    // On which sector it falls?
+    QuantizeToSector(sp, &alpha, &theta);
+
+    if (alpha < 0 || theta < 0 || alpha >= SECTORS || theta >= SECTORS) {
+         cmsSignalError(gbd ->ContextID, cmsERROR_RANGE, " quadrant out of range");
+         return NULL;
+    }
+
+    // Get pointer to the sector
+    return &gbd ->Gamut[theta][alpha];
+}
+
+// Add a point to gamut descriptor. Point to add is in Lab color space.
+// GBD is centered on a=b=0 and L*=50
+cmsBool CMSEXPORT cmsGDBAddPoint(cmsHANDLE hGBD, const cmsCIELab* Lab)
+{
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    cmsGDBPoint* ptr;
+    cmsSpherical sp;
+
+
+    // Get pointer to the sector
+    ptr = GetPoint(gbd, Lab, &sp);
+    if (ptr == NULL) return FALSE;
+
+    // If no samples at this sector, add it
+    if (ptr ->Type == GP_EMPTY) {
+
+        ptr -> Type = GP_SPECIFIED;
+        ptr -> p    = sp;
+    }
+    else {
+
+
+        // Substitute only if radius is greater
+        if (sp.r > ptr -> p.r) {
+
+                ptr -> Type = GP_SPECIFIED;
+                ptr -> p    = sp;
+        }
+    }
+
+    return TRUE;
+}
+
+// Check if a given point falls inside gamut
+cmsBool CMSEXPORT cmsGDBCheckPoint(cmsHANDLE hGBD, const cmsCIELab* Lab)
+{
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    cmsGDBPoint* ptr;
+    cmsSpherical sp;
+
+    // Get pointer to the sector
+    ptr = GetPoint(gbd, Lab, &sp);
+    if (ptr == NULL) return FALSE;
+
+    // If no samples at this sector, return no data
+    if (ptr ->Type == GP_EMPTY) return FALSE;
+
+    // In gamut only if radius is greater
+
+    return (sp.r <= ptr -> p.r);
+}
+
+// -----------------------------------------------------------------------------------------------------------------------
+
+// Find near sectors. The list of sectors found is returned on Close[].
+// The function returns the number of sectors as well.
+
+// 24   9  10  11  12
+// 23   8   1   2  13
+// 22   7   *   3  14
+// 21   6   5   4  15
+// 20  19  18  17  16
+//
+// Those are the relative movements
+// {-2,-2}, {-1, -2}, {0, -2}, {+1, -2}, {+2,  -2},
+// {-2,-1}, {-1, -1}, {0, -1}, {+1, -1}, {+2,  -1},
+// {-2, 0}, {-1,  0}, {0,  0}, {+1,  0}, {+2,   0},
+// {-2,+1}, {-1, +1}, {0, +1}, {+1,  +1}, {+2,  +1},
+// {-2,+2}, {-1, +2}, {0, +2}, {+1,  +2}, {+2,  +2}};
+
+
+static
+const struct _spiral {
+
+    int AdvX, AdvY;
+
+    } Spiral[] = { {0,  -1}, {+1, -1}, {+1,  0}, {+1, +1}, {0,  +1}, {-1, +1},
+                   {-1,  0}, {-1, -1}, {-1, -2}, {0,  -2}, {+1, -2}, {+2, -2},
+                   {+2, -1}, {+2,  0}, {+2, +1}, {+2, +2}, {+1, +2}, {0,  +2},
+                   {-1, +2}, {-2, +2}, {-2, +1}, {-2, 0},  {-2, -1}, {-2, -2} };
+
+#define NSTEPS (sizeof(Spiral) / sizeof(struct _spiral))
+
+static
+int FindNearSectors(cmsGDB* gbd, int alpha, int theta, cmsGDBPoint* Close[])
+{
+    int nSectors = 0;
+    int a, t;
+    cmsUInt32Number i;
+    cmsGDBPoint* pt;
+
+    for (i=0; i < NSTEPS; i++) {
+
+        a = alpha + Spiral[i].AdvX;
+        t = theta + Spiral[i].AdvY;
+
+        // Cycle at the end
+        a %= SECTORS;
+        t %= SECTORS;
+
+        // Cycle at the begin
+        if (a < 0) a = SECTORS + a;
+        if (t < 0) t = SECTORS + t;
+
+        pt = &gbd ->Gamut[t][a];
+
+        if (pt -> Type != GP_EMPTY) {
+
+            Close[nSectors++] = pt;
+        }
+    }
+
+    return nSectors;
+}
+
+
+// Interpolate a missing sector. Method identifies whatever this is top, bottom or mid
+static
+cmsBool InterpolateMissingSector(cmsGDB* gbd, int alpha, int theta)
+{
+    cmsSpherical sp;
+    cmsVEC3 Lab;
+    cmsVEC3 Centre;
+    cmsLine ray;
+    int nCloseSectors;
+    cmsGDBPoint* Close[NSTEPS + 1];
+    cmsSpherical closel, templ;
+    cmsLine edge;
+    int k, m;
+
+    // Is that point already specified?
+    if (gbd ->Gamut[theta][alpha].Type != GP_EMPTY) return TRUE;
+
+    // Fill close points
+    nCloseSectors = FindNearSectors(gbd, alpha, theta, Close);
+
+
+    // Find a central point on the sector
+    sp.alpha = (cmsFloat64Number) ((alpha + 0.5) * 360.0) / (SECTORS);
+    sp.theta = (cmsFloat64Number) ((theta + 0.5) * 180.0) / (SECTORS);
+    sp.r     = 50.0;
+
+    // Convert to Cartesian
+    ToCartesian(&Lab, &sp);
+
+    // Create a ray line from centre to this point
+    _cmsVEC3init(&Centre, 50.0, 0, 0);
+    LineOf2Points(&ray, &Lab, &Centre);
+
+    // For all close sectors
+    closel.r = 0.0;
+    closel.alpha = 0;
+    closel.theta = 0;
+
+    for (k=0; k < nCloseSectors; k++) {
+
+        for(m = k+1; m < nCloseSectors; m++) {
+
+            cmsVEC3 temp, a1, a2;
+
+            // A line from sector to sector
+            ToCartesian(&a1, &Close[k]->p);
+            ToCartesian(&a2, &Close[m]->p);
+
+            LineOf2Points(&edge, &a1, &a2);
+
+            // Find a line
+            ClosestLineToLine(&temp, &ray, &edge);
+
+            // Convert to spherical
+            ToSpherical(&templ, &temp);
+
+
+            if ( templ.r > closel.r &&
+                 templ.theta >= (theta*180.0/SECTORS) &&
+                 templ.theta <= ((theta+1)*180.0/SECTORS) &&
+                 templ.alpha >= (alpha*360.0/SECTORS) &&
+                 templ.alpha <= ((alpha+1)*360.0/SECTORS)) {
+
+                closel = templ;
+            }
+        }
+    }
+
+    gbd ->Gamut[theta][alpha].p = closel;
+    gbd ->Gamut[theta][alpha].Type = GP_MODELED;
+
+    return TRUE;
+
+}
+
+
+// Interpolate missing parts. The algorithm fist computes slices at
+// theta=0 and theta=Max.
+cmsBool CMSEXPORT cmsGDBCompute(cmsHANDLE hGBD, cmsUInt32Number dwFlags)
+{
+    int alpha, theta;
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+
+    _cmsAssert(hGBD != NULL);
+
+    // Interpolate black
+    for (alpha = 0; alpha < SECTORS; alpha++) {
+
+        if (!InterpolateMissingSector(gbd, alpha, 0)) return FALSE;
+    }
+
+    // Interpolate white
+    for (alpha = 0; alpha < SECTORS; alpha++) {
+
+        if (!InterpolateMissingSector(gbd, alpha, SECTORS-1)) return FALSE;
+    }
+
+
+    // Interpolate Mid
+    for (theta = 1; theta < SECTORS; theta++) {
+        for (alpha = 0; alpha < SECTORS; alpha++) {
+
+            if (!InterpolateMissingSector(gbd, alpha, theta)) return FALSE;
+        }
+    }
+
+    // Done
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(dwFlags);
+}
+
+
+
+
+// --------------------------------------------------------------------------------------------------------
+
+// Great for debug, but not suitable for real use
+
+#if 0
+cmsBool cmsGBDdumpVRML(cmsHANDLE hGBD, const char* fname)
+{
+    FILE* fp;
+    int   i, j;
+    cmsGDB* gbd = (cmsGDB*) hGBD;
+    cmsGDBPoint* pt;
+
+    fp = fopen (fname, "wt");
+    if (fp == NULL)
+        return FALSE;
+
+    fprintf (fp, "#VRML V2.0 utf8\n");
+
+    // set the viewing orientation and distance
+    fprintf (fp, "DEF CamTest Group {\n");
+    fprintf (fp, "\tchildren [\n");
+    fprintf (fp, "\t\tDEF Cameras Group {\n");
+    fprintf (fp, "\t\t\tchildren [\n");
+    fprintf (fp, "\t\t\t\tDEF DefaultView Viewpoint {\n");
+    fprintf (fp, "\t\t\t\t\tposition 0 0 340\n");
+    fprintf (fp, "\t\t\t\t\torientation 0 0 1 0\n");
+    fprintf (fp, "\t\t\t\t\tdescription \"default view\"\n");
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t]\n");
+    fprintf (fp, "\t\t},\n");
+    fprintf (fp, "\t]\n");
+    fprintf (fp, "}\n");
+
+    // Output the background stuff
+    fprintf (fp, "Background {\n");
+    fprintf (fp, "\tskyColor [\n");
+    fprintf (fp, "\t\t.5 .5 .5\n");
+    fprintf (fp, "\t]\n");
+    fprintf (fp, "}\n");
+
+    // Output the shape stuff
+    fprintf (fp, "Transform {\n");
+    fprintf (fp, "\tscale .3 .3 .3\n");
+    fprintf (fp, "\tchildren [\n");
+
+    // Draw the axes as a shape:
+    fprintf (fp, "\t\tShape {\n");
+    fprintf (fp, "\t\t\tappearance Appearance {\n");
+    fprintf (fp, "\t\t\t\tmaterial Material {\n");
+    fprintf (fp, "\t\t\t\t\tdiffuseColor 0 0.8 0\n");
+    fprintf (fp, "\t\t\t\t\temissiveColor 1.0 1.0 1.0\n");
+    fprintf (fp, "\t\t\t\t\tshininess 0.8\n");
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t\tgeometry IndexedLineSet {\n");
+    fprintf (fp, "\t\t\t\tcoord Coordinate {\n");
+    fprintf (fp, "\t\t\t\t\tpoint [\n");
+    fprintf (fp, "\t\t\t\t\t0.0 0.0 0.0,\n");
+    fprintf (fp, "\t\t\t\t\t%f 0.0 0.0,\n",  255.0);
+    fprintf (fp, "\t\t\t\t\t0.0 %f 0.0,\n",  255.0);
+    fprintf (fp, "\t\t\t\t\t0.0 0.0 %f]\n",  255.0);
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t\tcoordIndex [\n");
+    fprintf (fp, "\t\t\t\t\t0, 1, -1\n");
+    fprintf (fp, "\t\t\t\t\t0, 2, -1\n");
+    fprintf (fp, "\t\t\t\t\t0, 3, -1]\n");
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t}\n");
+
+
+    fprintf (fp, "\t\tShape {\n");
+    fprintf (fp, "\t\t\tappearance Appearance {\n");
+    fprintf (fp, "\t\t\t\tmaterial Material {\n");
+    fprintf (fp, "\t\t\t\t\tdiffuseColor 0 0.8 0\n");
+    fprintf (fp, "\t\t\t\t\temissiveColor 1 1 1\n");
+    fprintf (fp, "\t\t\t\t\tshininess 0.8\n");
+    fprintf (fp, "\t\t\t\t}\n");
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t\tgeometry PointSet {\n");
+
+    // fill in the points here
+    fprintf (fp, "\t\t\t\tcoord Coordinate {\n");
+    fprintf (fp, "\t\t\t\t\tpoint [\n");
+
+    // We need to transverse all gamut hull.
+    for (i=0; i < SECTORS; i++)
+        for (j=0; j < SECTORS; j++) {
+
+            cmsVEC3 v;
+
+            pt = &gbd ->Gamut[i][j];
+            ToCartesian(&v, &pt ->p);
+
+            fprintf (fp, "\t\t\t\t\t%g %g %g", v.n[0]+50, v.n[1], v.n[2]);
+
+            if ((j == SECTORS - 1) && (i == SECTORS - 1))
+                fprintf (fp, "]\n");
+            else
+                fprintf (fp, ",\n");
+
+        }
+
+        fprintf (fp, "\t\t\t\t}\n");
+
+
+
+    // fill in the face colors
+    fprintf (fp, "\t\t\t\tcolor Color {\n");
+    fprintf (fp, "\t\t\t\t\tcolor [\n");
+
+    for (i=0; i < SECTORS; i++)
+        for (j=0; j < SECTORS; j++) {
+
+           cmsVEC3 v;
+
+            pt = &gbd ->Gamut[i][j];
+
+
+            ToCartesian(&v, &pt ->p);
+
+
+        if (pt ->Type == GP_EMPTY)
+            fprintf (fp, "\t\t\t\t\t%g %g %g", 0.0, 0.0, 0.0);
+        else
+            if (pt ->Type == GP_MODELED)
+                fprintf (fp, "\t\t\t\t\t%g %g %g", 1.0, .5, .5);
+            else {
+                fprintf (fp, "\t\t\t\t\t%g %g %g", 1.0, 1.0, 1.0);
+
+            }
+
+        if ((j == SECTORS - 1) && (i == SECTORS - 1))
+                fprintf (fp, "]\n");
+            else
+                fprintf (fp, ",\n");
+    }
+    fprintf (fp, "\t\t\t}\n");
+
+
+    fprintf (fp, "\t\t\t}\n");
+    fprintf (fp, "\t\t}\n");
+    fprintf (fp, "\t]\n");
+    fprintf (fp, "}\n");
+
+    fclose (fp);
+
+    return TRUE;
+}
+#endif
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmstypes.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmstypes.cpp
new file mode 100755
index 0000000000..6cb8d4be36
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmstypes.cpp
@@ -0,0 +1,5633 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Tag Serialization  -----------------------------------------------------------------------------
+// This file implements every single tag and tag type as described in the ICC spec. Some types
+// have been deprecated, like ncl and Data. There is no implementation for those types as there
+// are no profiles holding them. The programmer can also extend this list by defining his own types
+// by using the appropriate plug-in. There are three types of plug ins regarding that. First type
+// allows to define new tags using any existing type. Next plug-in type allows to define new types
+// and the third one is very specific: allows to extend the number of elements in the multiprocessing
+// elements special type.
+//--------------------------------------------------------------------------------------------------
+
+// Some broken types
+#define cmsCorbisBrokenXYZtype    ((cmsTagTypeSignature) 0x17A505B8)
+#define cmsMonacoBrokenCurveType  ((cmsTagTypeSignature) 0x9478ee00)
+
+// This is the linked list that keeps track of the defined types
+typedef struct _cmsTagTypeLinkedList_st {
+
+    cmsTagTypeHandler Handler;
+    struct _cmsTagTypeLinkedList_st* Next;
+
+} _cmsTagTypeLinkedList;
+
+// Some macros to define callbacks.
+#define READ_FN(x)  Type_##x##_Read
+#define WRITE_FN(x) Type_##x##_Write
+#define FREE_FN(x)  Type_##x##_Free
+#define DUP_FN(x)   Type_##x##_Dup
+
+// Helper macro to define a handler. Callbacks do have a fixed naming convention.
+#define TYPE_HANDLER(t, x)  { (t), READ_FN(x), WRITE_FN(x), DUP_FN(x), FREE_FN(x), NULL, 0 }
+
+// Helper macro to define a MPE handler. Callbacks do have a fixed naming convention
+#define TYPE_MPE_HANDLER(t, x)  { (t), READ_FN(x), WRITE_FN(x), GenericMPEdup, GenericMPEfree, NULL, 0 }
+
+// Infinites
+#define MINUS_INF   (-1E22F)
+#define PLUS_INF    (+1E22F)
+
+
+// Register a new type handler. This routine is shared between normal types and MPE. LinkedList points to the optional list head
+static
+cmsBool RegisterTypesPlugin(cmsContext id, cmsPluginBase* Data, _cmsMemoryClient pos)
+{
+    cmsPluginTagType* Plugin = (cmsPluginTagType*) Data;
+    _cmsTagTypePluginChunkType* ctx = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(id, pos);
+    _cmsTagTypeLinkedList *pt;
+
+    // Calling the function with NULL as plug-in would unregister the plug in.
+    if (Data == NULL) {
+
+        // There is no need to set free the memory, as pool is destroyed as a whole.
+        ctx ->TagTypes = NULL;
+        return TRUE;
+    }
+
+    // Registering happens in plug-in memory pool.
+    pt = (_cmsTagTypeLinkedList*) _cmsPluginMalloc(id, sizeof(_cmsTagTypeLinkedList));
+    if (pt == NULL) return FALSE;
+
+    pt ->Handler   = Plugin ->Handler;
+    pt ->Next      = ctx ->TagTypes;
+
+    ctx ->TagTypes = pt;
+     
+    return TRUE;
+}
+
+// Return handler for a given type or NULL if not found. Shared between normal types and MPE. It first tries the additons 
+// made by plug-ins and then the built-in defaults.
+static
+cmsTagTypeHandler* GetHandler(cmsTagTypeSignature sig, _cmsTagTypeLinkedList* PluginLinkedList, _cmsTagTypeLinkedList* DefaultLinkedList)
+{
+    _cmsTagTypeLinkedList* pt;
+
+    for (pt = PluginLinkedList;
+         pt != NULL;
+         pt = pt ->Next) {
+
+            if (sig == pt -> Handler.Signature) return &pt ->Handler;
+    }
+
+    for (pt = DefaultLinkedList;
+         pt != NULL;
+         pt = pt ->Next) {
+
+            if (sig == pt -> Handler.Signature) return &pt ->Handler;
+    }
+
+    return NULL;
+}
+
+
+// Auxiliary to convert UTF-32 to UTF-16 in some cases
+static
+cmsBool _cmsWriteWCharArray(cmsIOHANDLER* io, cmsUInt32Number n, const wchar_t* Array)
+{
+    cmsUInt32Number i;
+
+    _cmsAssert(io != NULL);
+    _cmsAssert(!(Array == NULL && n > 0));
+
+    for (i=0; i < n; i++) {
+        if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) Array[i])) return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Auxiliary to read an array of wchar_t
+static
+cmsBool _cmsReadWCharArray(cmsIOHANDLER* io, cmsUInt32Number n, wchar_t* Array)
+{
+    cmsUInt32Number i;
+    cmsUInt16Number tmp;
+
+    _cmsAssert(io != NULL);
+
+    for (i=0; i < n; i++) {
+
+        if (Array != NULL) {
+
+            if (!_cmsReadUInt16Number(io, &tmp)) return FALSE;
+            Array[i] = (wchar_t) tmp;
+        }
+        else {
+            if (!_cmsReadUInt16Number(io, NULL)) return FALSE;
+        }
+
+    }
+    return TRUE;
+}
+
+// To deal with position tables
+typedef cmsBool (* PositionTableEntryFn)(struct _cms_typehandler_struct* self,
+                                             cmsIOHANDLER* io,
+                                             void* Cargo,
+                                             cmsUInt32Number n,
+                                             cmsUInt32Number SizeOfTag);
+
+// Helper function to deal with position tables as described in ICC spec 4.3
+// A table of n elements is readed, where first comes n records containing offsets and sizes and
+// then a block containing the data itself. This allows to reuse same data in more than one entry
+static
+cmsBool ReadPositionTable(struct _cms_typehandler_struct* self,
+                              cmsIOHANDLER* io,
+                              cmsUInt32Number Count,
+                              cmsUInt32Number BaseOffset,
+                              void *Cargo,
+                              PositionTableEntryFn ElementFn)
+{
+    cmsUInt32Number i;
+    cmsUInt32Number *ElementOffsets = NULL, *ElementSizes = NULL;
+    cmsUInt32Number currentPosition;
+
+    currentPosition = io->Tell(io);
+
+    // Verify there is enough space left to read at least two cmsUInt32Number items for Count items.
+    if (((io->ReportedSize - currentPosition) / (2 * sizeof(cmsUInt32Number))) < Count)
+        return FALSE;
+
+    // Let's take the offsets to each element
+    ElementOffsets = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementOffsets == NULL) goto Error;
+
+    ElementSizes = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementSizes == NULL) goto Error;
+
+    for (i=0; i < Count; i++) {
+
+        if (!_cmsReadUInt32Number(io, &ElementOffsets[i])) goto Error;
+        if (!_cmsReadUInt32Number(io, &ElementSizes[i])) goto Error;
+
+        ElementOffsets[i] += BaseOffset;
+    }
+
+    // Seek to each element and read it
+    for (i=0; i < Count; i++) {
+
+        if (!io -> Seek(io, ElementOffsets[i])) goto Error;
+
+        // This is the reader callback
+        if (!ElementFn(self, io, Cargo, i, ElementSizes[i])) goto Error;
+    }
+
+    // Success
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return TRUE;
+
+Error:
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return FALSE;
+}
+
+// Same as anterior, but for write position tables
+static
+cmsBool WritePositionTable(struct _cms_typehandler_struct* self,
+                               cmsIOHANDLER* io,
+                               cmsUInt32Number SizeOfTag,
+                               cmsUInt32Number Count,
+                               cmsUInt32Number BaseOffset,
+                               void *Cargo,
+                               PositionTableEntryFn ElementFn)
+{
+    cmsUInt32Number i;
+    cmsUInt32Number DirectoryPos, CurrentPos, Before;
+    cmsUInt32Number *ElementOffsets = NULL, *ElementSizes = NULL;
+
+     // Create table
+    ElementOffsets = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementOffsets == NULL) goto Error;
+
+    ElementSizes = (cmsUInt32Number *) _cmsCalloc(io ->ContextID, Count, sizeof(cmsUInt32Number));
+    if (ElementSizes == NULL) goto Error;
+
+    // Keep starting position of curve offsets
+    DirectoryPos = io ->Tell(io);
+  
+    // Write a fake directory to be filled latter on
+    for (i=0; i < Count; i++) {
+
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // Offset
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // size
+    }
+
+    // Write each element. Keep track of the size as well.
+    for (i=0; i < Count; i++) {
+
+        Before = io ->Tell(io);
+        ElementOffsets[i] = Before - BaseOffset;
+
+        // Callback to write...
+        if (!ElementFn(self, io, Cargo, i, SizeOfTag)) goto Error;
+
+        // Now the size
+        ElementSizes[i] = io ->Tell(io) - Before;
+    }
+
+    // Write the directory
+    CurrentPos = io ->Tell(io);
+    if (!io ->Seek(io, DirectoryPos)) goto Error;
+
+    for (i=0; i <  Count; i++) {
+        if (!_cmsWriteUInt32Number(io, ElementOffsets[i])) goto Error;
+        if (!_cmsWriteUInt32Number(io, ElementSizes[i])) goto Error;
+    }
+
+    if (!io ->Seek(io, CurrentPos)) goto Error;
+
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return TRUE;
+
+Error:
+    if (ElementOffsets != NULL) _cmsFree(io ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(io ->ContextID, ElementSizes);
+    return FALSE;
+}
+
+
+// ********************************************************************************
+// Type XYZ. Only one value is allowed
+// ********************************************************************************
+
+//The XYZType contains an array of three encoded values for the XYZ tristimulus
+//values. Tristimulus values must be non-negative. The signed encoding allows for
+//implementation optimizations by minimizing the number of fixed formats.
+
+
+static
+void *Type_XYZ_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsCIEXYZ* xyz;
+
+    *nItems = 0;
+    xyz = (cmsCIEXYZ*) _cmsMallocZero(self ->ContextID, sizeof(cmsCIEXYZ));
+    if (xyz == NULL) return NULL;
+
+    if (!_cmsReadXYZNumber(io, xyz)) {
+        _cmsFree(self ->ContextID, xyz);
+        return NULL;
+    }
+
+    *nItems = 1;
+    return (void*) xyz;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_XYZ_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    return _cmsWriteXYZNumber(io, (cmsCIEXYZ*) Ptr);
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_XYZ_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsCIEXYZ));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_XYZ_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+static
+cmsTagTypeSignature DecideXYZtype(cmsFloat64Number ICCVersion, const void *Data)
+{
+    return cmsSigXYZType;
+
+    cmsUNUSED_PARAMETER(ICCVersion);
+    cmsUNUSED_PARAMETER(Data);
+}
+
+
+// ********************************************************************************
+// Type chromaticity. Only one value is allowed
+// ********************************************************************************
+// The chromaticity tag type provides basic chromaticity data and type of
+// phosphors or colorants of a monitor to applications and utilities.
+
+static
+void *Type_Chromaticity_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsCIExyYTRIPLE* chrm;
+    cmsUInt16Number nChans, Table;
+
+    *nItems = 0;
+    chrm =  (cmsCIExyYTRIPLE*) _cmsMallocZero(self ->ContextID, sizeof(cmsCIExyYTRIPLE));
+    if (chrm == NULL) return NULL;
+
+    if (!_cmsReadUInt16Number(io, &nChans)) goto Error;
+
+    // Let's recover from a bug introduced in early versions of lcms1
+    if (nChans == 0 && SizeOfTag == 32) {
+
+        if (!_cmsReadUInt16Number(io, NULL)) goto Error;
+        if (!_cmsReadUInt16Number(io, &nChans)) goto Error;
+    }
+
+    if (nChans != 3) goto Error;
+
+    if (!_cmsReadUInt16Number(io, &Table)) goto Error;
+
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Red.x)) goto Error;
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Red.y)) goto Error;
+
+    chrm ->Red.Y = 1.0;
+
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Green.x)) goto Error;
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Green.y)) goto Error;
+
+    chrm ->Green.Y = 1.0;
+
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Blue.x)) goto Error;
+    if (!_cmsRead15Fixed16Number(io, &chrm ->Blue.y)) goto Error;
+
+    chrm ->Blue.Y = 1.0;
+
+    *nItems = 1;
+    return (void*) chrm;
+
+Error:
+    _cmsFree(self ->ContextID, (void*) chrm);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  SaveOneChromaticity(cmsFloat64Number x, cmsFloat64Number y, cmsIOHANDLER* io)
+{
+    if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) _cmsDoubleTo15Fixed16(x))) return FALSE;
+    if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) _cmsDoubleTo15Fixed16(y))) return FALSE;
+
+    return TRUE;
+}
+
+static
+cmsBool  Type_Chromaticity_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsCIExyYTRIPLE* chrm = (cmsCIExyYTRIPLE*) Ptr;
+
+    if (!_cmsWriteUInt16Number(io, 3)) return FALSE;        // nChannels
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;        // Table
+
+    if (!SaveOneChromaticity(chrm -> Red.x,   chrm -> Red.y, io)) return FALSE;
+    if (!SaveOneChromaticity(chrm -> Green.x, chrm -> Green.y, io)) return FALSE;
+    if (!SaveOneChromaticity(chrm -> Blue.x,  chrm -> Blue.y, io)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_Chromaticity_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsCIExyYTRIPLE));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_Chromaticity_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigColorantOrderType
+// ********************************************************************************
+
+// This is an optional tag which specifies the laydown order in which colorants will
+// be printed on an n-colorant device. The laydown order may be the same as the
+// channel generation order listed in the colorantTableTag or the channel order of a
+// colour space such as CMYK, in which case this tag is not needed. When this is not
+// the case (for example, ink-towers sometimes use the order KCMY), this tag may be
+// used to specify the laydown order of the colorants.
+
+
+static
+void *Type_ColorantOrderType_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number* ColorantOrder;
+    cmsUInt32Number Count;
+
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    if (Count > cmsMAXCHANNELS) return NULL;
+
+    ColorantOrder = (cmsUInt8Number*) _cmsCalloc(self ->ContextID, cmsMAXCHANNELS, sizeof(cmsUInt8Number));
+    if (ColorantOrder == NULL) return NULL;
+
+    // We use FF as end marker
+    memset(ColorantOrder, 0xFF, cmsMAXCHANNELS * sizeof(cmsUInt8Number));
+
+    if (io ->Read(io, ColorantOrder, sizeof(cmsUInt8Number), Count) != Count) {
+
+        _cmsFree(self ->ContextID, (void*) ColorantOrder);
+        return NULL;
+    }
+
+    *nItems = 1;
+    return (void*) ColorantOrder;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool Type_ColorantOrderType_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt8Number*  ColorantOrder = (cmsUInt8Number*) Ptr;
+    cmsUInt32Number i, sz, Count;
+
+    // Get the length
+    for (Count=i=0; i < cmsMAXCHANNELS; i++) {
+        if (ColorantOrder[i] != 0xFF) Count++;
+    }
+
+    if (!_cmsWriteUInt32Number(io, Count)) return FALSE;
+
+    sz = Count * sizeof(cmsUInt8Number);
+    if (!io -> Write(io, sz, ColorantOrder)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_ColorantOrderType_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, cmsMAXCHANNELS * sizeof(cmsUInt8Number));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_ColorantOrderType_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigS15Fixed16ArrayType
+// ********************************************************************************
+// This type represents an array of generic 4-byte/32-bit fixed point quantity.
+// The number of values is determined from the size of the tag.
+
+static
+void *Type_S15Fixed16_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsFloat64Number*  array_double;
+    cmsUInt32Number i, n;
+
+    *nItems = 0;
+    n = SizeOfTag / sizeof(cmsUInt32Number);
+    array_double = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, n, sizeof(cmsFloat64Number));
+    if (array_double == NULL) return NULL;
+
+    for (i=0; i < n; i++) {
+
+        if (!_cmsRead15Fixed16Number(io, &array_double[i])) {
+
+            _cmsFree(self ->ContextID, array_double);
+            return NULL;
+        }
+    }
+
+    *nItems = n;
+    return (void*) array_double;
+}
+
+static
+cmsBool Type_S15Fixed16_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsFloat64Number* Value = (cmsFloat64Number*) Ptr;
+    cmsUInt32Number i;
+
+    for (i=0; i < nItems; i++) {
+
+        if (!_cmsWrite15Fixed16Number(io, Value[i])) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_S15Fixed16_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsFloat64Number));
+}
+
+
+static
+void Type_S15Fixed16_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigU16Fixed16ArrayType
+// ********************************************************************************
+// This type represents an array of generic 4-byte/32-bit quantity.
+// The number of values is determined from the size of the tag.
+
+
+static
+void *Type_U16Fixed16_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsFloat64Number*  array_double;
+    cmsUInt32Number v;
+    cmsUInt32Number i, n;
+
+    *nItems = 0;
+    n = SizeOfTag / sizeof(cmsUInt32Number);
+    array_double = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, n, sizeof(cmsFloat64Number));
+    if (array_double == NULL) return NULL;
+
+    for (i=0; i < n; i++) {
+
+        if (!_cmsReadUInt32Number(io, &v)) {
+            _cmsFree(self ->ContextID, (void*) array_double);
+            return NULL;
+        }
+
+        // Convert to cmsFloat64Number
+        array_double[i] =  (cmsFloat64Number) (v / 65536.0);
+    }
+
+    *nItems = n;
+    return (void*) array_double;
+}
+
+static
+cmsBool Type_U16Fixed16_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsFloat64Number* Value = (cmsFloat64Number*) Ptr;
+    cmsUInt32Number i;
+
+    for (i=0; i < nItems; i++) {
+
+        cmsUInt32Number v = (cmsUInt32Number) floor(Value[i]*65536.0 + 0.5);
+
+        if (!_cmsWriteUInt32Number(io, v)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_U16Fixed16_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsFloat64Number));
+}
+
+static
+void Type_U16Fixed16_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigSignatureType
+// ********************************************************************************
+//
+// The signatureType contains a four-byte sequence, Sequences of less than four
+// characters are padded at the end with spaces, 20h.
+// Typically this type is used for registered tags that can be displayed on many
+// development systems as a sequence of four characters.
+
+static
+void *Type_Signature_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsSignature* SigPtr = (cmsSignature*) _cmsMalloc(self ->ContextID, sizeof(cmsSignature));
+    if (SigPtr == NULL) return NULL;
+
+     if (!_cmsReadUInt32Number(io, SigPtr)) return NULL;
+     *nItems = 1;
+
+     return SigPtr;
+
+     cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_Signature_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsSignature* SigPtr = (cmsSignature*) Ptr;
+
+    return _cmsWriteUInt32Number(io, *SigPtr);
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_Signature_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, n * sizeof(cmsSignature));
+}
+
+static
+void Type_Signature_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigTextType
+// ********************************************************************************
+//
+// The textType is a simple text structure that contains a 7-bit ASCII text string.
+// The length of the string is obtained by subtracting 8 from the element size portion
+// of the tag itself. This string must be terminated with a 00h byte.
+
+static
+void *Type_Text_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    char* Text = NULL;
+    cmsMLU* mlu = NULL;
+
+    // Create a container
+    mlu = cmsMLUalloc(self ->ContextID, 1);
+    if (mlu == NULL) return NULL;
+
+    *nItems = 0;
+
+    // We need to store the "\0" at the end, so +1
+    if (SizeOfTag == UINT_MAX) goto Error;
+
+    Text = (char*) _cmsMalloc(self ->ContextID, SizeOfTag + 1);
+    if (Text == NULL) goto Error;
+
+    if (io -> Read(io, Text, sizeof(char), SizeOfTag) != SizeOfTag) goto Error;
+
+    // Make sure text is properly ended
+    Text[SizeOfTag] = 0;
+    *nItems = 1;
+
+    // Keep the result
+    if (!cmsMLUsetASCII(mlu, cmsNoLanguage, cmsNoCountry, Text)) goto Error;
+
+    _cmsFree(self ->ContextID, Text);
+    return (void*) mlu;
+
+Error:
+    if (mlu != NULL)
+        cmsMLUfree(mlu);
+    if (Text != NULL)
+        _cmsFree(self ->ContextID, Text);
+
+    return NULL;
+}
+
+// The conversion implies to choose a language. So, we choose the actual language.
+static
+cmsBool Type_Text_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+    cmsUInt32Number size;
+    cmsBool  rc;
+    char* Text;
+
+    // Get the size of the string. Note there is an extra "\0" at the end
+    size = cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry, NULL, 0);
+    if (size == 0) return FALSE;       // Cannot be zero!
+
+    // Create memory
+    Text = (char*) _cmsMalloc(self ->ContextID, size);
+    if (Text == NULL) return FALSE;
+
+    cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry, Text, size);
+
+    // Write it, including separator
+    rc = io ->Write(io, size, Text);
+
+    _cmsFree(self ->ContextID, Text);
+    return rc;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_Text_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_Text_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+    cmsMLUfree(mlu);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+cmsTagTypeSignature DecideTextType(cmsFloat64Number ICCVersion, const void *Data)
+{
+    if (ICCVersion >= 4.0)
+        return cmsSigMultiLocalizedUnicodeType;
+
+    return cmsSigTextType;
+
+    cmsUNUSED_PARAMETER(Data);
+}
+
+
+// ********************************************************************************
+// Type cmsSigDataType
+// ********************************************************************************
+
+// General purpose data type
+static
+void *Type_Data_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsICCData* BinData;
+    cmsUInt32Number LenOfData;
+
+    *nItems = 0;
+
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+
+    LenOfData = SizeOfTag - sizeof(cmsUInt32Number);
+    if (LenOfData > INT_MAX) return NULL;
+
+    BinData = (cmsICCData*) _cmsMalloc(self ->ContextID, sizeof(cmsICCData) + LenOfData - 1);
+    if (BinData == NULL) return NULL;
+
+    BinData ->len = LenOfData;
+    if (!_cmsReadUInt32Number(io, &BinData->flag)) {
+        _cmsFree(self ->ContextID, BinData);
+        return NULL;
+    }
+
+    if (io -> Read(io, BinData ->data, sizeof(cmsUInt8Number), LenOfData) != LenOfData) {
+
+        _cmsFree(self ->ContextID, BinData);
+        return NULL;
+    }
+
+    *nItems = 1;
+
+    return (void*) BinData;
+}
+
+
+static
+cmsBool Type_Data_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+   cmsICCData* BinData = (cmsICCData*) Ptr;
+
+   if (!_cmsWriteUInt32Number(io, BinData ->flag)) return FALSE;
+
+   return io ->Write(io, BinData ->len, BinData ->data);
+
+   cmsUNUSED_PARAMETER(nItems);
+   cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_Data_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    cmsICCData* BinData = (cmsICCData*) Ptr;
+
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsICCData) + BinData ->len - 1);
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_Data_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigTextDescriptionType
+// ********************************************************************************
+
+static
+void *Type_Text_Description_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    char* Text = NULL;
+    cmsMLU* mlu = NULL;
+    cmsUInt32Number  AsciiCount;
+    cmsUInt32Number  i, UnicodeCode, UnicodeCount;
+    cmsUInt16Number  ScriptCodeCode, Dummy;
+    cmsUInt8Number   ScriptCodeCount;
+
+    *nItems = 0;
+
+    //  One dword should be there
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+
+    // Read len of ASCII
+    if (!_cmsReadUInt32Number(io, &AsciiCount)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Check for size
+    if (SizeOfTag < AsciiCount) return NULL;
+
+    // All seems Ok, allocate the container
+    mlu = cmsMLUalloc(self ->ContextID, 1);
+    if (mlu == NULL) return NULL;
+
+    // As many memory as size of tag
+    Text = (char*) _cmsMalloc(self ->ContextID, AsciiCount + 1);
+    if (Text == NULL) goto Error;
+
+    // Read it
+    if (io ->Read(io, Text, sizeof(char), AsciiCount) != AsciiCount) goto Error;
+    SizeOfTag -= AsciiCount;
+
+    // Make sure there is a terminator
+    Text[AsciiCount] = 0;
+
+    // Set the MLU entry. From here we can be tolerant to wrong types
+    if (!cmsMLUsetASCII(mlu, cmsNoLanguage, cmsNoCountry, Text)) goto Error;
+    _cmsFree(self ->ContextID, (void*) Text);
+    Text = NULL;
+
+    // Skip Unicode code
+    if (SizeOfTag < 2* sizeof(cmsUInt32Number)) goto Done;
+    if (!_cmsReadUInt32Number(io, &UnicodeCode)) goto Done;
+    if (!_cmsReadUInt32Number(io, &UnicodeCount)) goto Done;
+    SizeOfTag -= 2* sizeof(cmsUInt32Number);
+
+    if (SizeOfTag < UnicodeCount*sizeof(cmsUInt16Number)) goto Done;
+
+    for (i=0; i < UnicodeCount; i++) {
+        if (!io ->Read(io, &Dummy, sizeof(cmsUInt16Number), 1)) goto Done;
+    }
+    SizeOfTag -= UnicodeCount*sizeof(cmsUInt16Number);
+
+    // Skip ScriptCode code if present. Some buggy profiles does have less
+    // data that stricttly required. We need to skip it as this type may come
+    // embedded in other types.
+
+    if (SizeOfTag >= sizeof(cmsUInt16Number) + sizeof(cmsUInt8Number) + 67) {
+
+        if (!_cmsReadUInt16Number(io, &ScriptCodeCode)) goto Done;
+        if (!_cmsReadUInt8Number(io,  &ScriptCodeCount)) goto Done;
+
+        // Skip rest of tag
+        for (i=0; i < 67; i++) {
+            if (!io ->Read(io, &Dummy, sizeof(cmsUInt8Number), 1)) goto Error;
+        }
+    }
+
+Done:
+
+    *nItems = 1;
+    return mlu;
+
+Error:
+    if (Text) _cmsFree(self ->ContextID, (void*) Text);
+    if (mlu) cmsMLUfree(mlu);
+    return NULL;
+}
+
+
+// This tag can come IN UNALIGNED SIZE. In order to prevent issues, we force zeros on description to align it
+static
+cmsBool  Type_Text_Description_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+    char *Text = NULL;
+    wchar_t *Wide = NULL;
+    cmsUInt32Number len, len_text, len_tag_requirement, len_aligned;
+    cmsBool  rc = FALSE;
+    char Filler[68];
+
+    // Used below for writing zeroes
+    memset(Filler, 0, sizeof(Filler));
+
+    // Get the len of string
+    len = cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry, NULL, 0);
+
+    // Specification ICC.1:2001-04 (v2.4.0): It has been found that textDescriptionType can contain misaligned data
+    //(see clause 4.1 for the definition of �aligned�). Because the Unicode language
+    // code and Unicode count immediately follow the ASCII description, their
+    // alignment is not correct if the ASCII count is not a multiple of four. The
+    // ScriptCode code is misaligned when the ASCII count is odd. Profile reading and
+    // writing software must be written carefully in order to handle these alignment
+    // problems.
+    //
+    // The above last sentence suggest to handle alignment issues in the
+    // parser. The provided example (Table 69 on Page 60) makes this clear. 
+    // The padding only in the ASCII count is not sufficient for a aligned tag
+    // size, with the same text size in ASCII and Unicode.
+
+    // Null strings
+    if (len <= 0) {
+
+        Text = (char*)    _cmsDupMem(self ->ContextID, "", sizeof(char));
+        Wide = (wchar_t*) _cmsDupMem(self ->ContextID, L"", sizeof(wchar_t));
+    }
+    else {
+        // Create independent buffers
+        Text = (char*) _cmsCalloc(self ->ContextID, len, sizeof(char));
+        if (Text == NULL) goto Error;
+
+        Wide = (wchar_t*) _cmsCalloc(self ->ContextID, len, sizeof(wchar_t));
+        if (Wide == NULL) goto Error;
+
+        // Get both representations.
+        cmsMLUgetASCII(mlu, cmsNoLanguage, cmsNoCountry,  Text, len * sizeof(char));
+        cmsMLUgetWide(mlu,  cmsNoLanguage, cmsNoCountry,  Wide, len * sizeof(wchar_t));
+    }
+
+    // Tell the real text len including the null terminator and padding
+    len_text = (cmsUInt32Number) strlen(Text) + 1;
+    // Compute an total tag size requirement
+    len_tag_requirement = (8+4+len_text+4+4+2*len_text+2+1+67);
+    len_aligned = _cmsALIGNLONG(len_tag_requirement);
+
+  // * cmsUInt32Number       count;          * Description length
+  // * cmsInt8Number         desc[count]     * NULL terminated ascii string
+  // * cmsUInt32Number       ucLangCode;     * UniCode language code
+  // * cmsUInt32Number       ucCount;        * UniCode description length
+  // * cmsInt16Number        ucDesc[ucCount];* The UniCode description
+  // * cmsUInt16Number       scCode;         * ScriptCode code
+  // * cmsUInt8Number        scCount;        * ScriptCode count
+  // * cmsInt8Number         scDesc[67];     * ScriptCode Description
+
+    if (!_cmsWriteUInt32Number(io, len_text)) goto Error;
+    if (!io ->Write(io, len_text, Text)) goto Error;
+
+    if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // ucLanguageCode
+
+    if (!_cmsWriteUInt32Number(io, len_text)) goto Error;
+    // Note that in some compilers sizeof(cmsUInt16Number) != sizeof(wchar_t)
+    if (!_cmsWriteWCharArray(io, len_text, Wide)) goto Error;
+
+    // ScriptCode Code & count (unused)
+    if (!_cmsWriteUInt16Number(io, 0)) goto Error;
+    if (!_cmsWriteUInt8Number(io, 0)) goto Error;
+
+    if (!io ->Write(io, 67, Filler)) goto Error;
+
+    // possibly add pad at the end of tag
+    if(len_aligned - len_tag_requirement > 0)
+      if (!io ->Write(io, len_aligned - len_tag_requirement, Filler)) goto Error;
+
+    rc = TRUE;
+
+Error:
+    if (Text) _cmsFree(self ->ContextID, Text);
+    if (Wide) _cmsFree(self ->ContextID, Wide);
+
+    return rc;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_Text_Description_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_Text_Description_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+
+    cmsMLUfree(mlu);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+cmsTagTypeSignature DecideTextDescType(cmsFloat64Number ICCVersion, const void *Data)
+{
+    if (ICCVersion >= 4.0)
+        return cmsSigMultiLocalizedUnicodeType;
+
+    return cmsSigTextDescriptionType;
+
+    cmsUNUSED_PARAMETER(Data);
+}
+
+
+// ********************************************************************************
+// Type cmsSigCurveType
+// ********************************************************************************
+
+static
+void *Type_Curve_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number Count;
+    cmsToneCurve* NewGamma;
+    
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+
+    switch (Count) {
+
+           case 0:   // Linear.
+               {
+                   cmsFloat64Number SingleGamma = 1.0;
+
+                   NewGamma = cmsBuildParametricToneCurve(self ->ContextID, 1, &SingleGamma);
+                   if (!NewGamma) return NULL;
+                   *nItems = 1;
+                   return NewGamma;
+               }
+              
+           case 1:  // Specified as the exponent of gamma function
+               {
+                   cmsUInt16Number SingleGammaFixed;
+                   cmsFloat64Number SingleGamma;
+
+                   if (!_cmsReadUInt16Number(io, &SingleGammaFixed)) return NULL;
+                   SingleGamma = _cms8Fixed8toDouble(SingleGammaFixed);
+
+                   *nItems = 1;
+                   return cmsBuildParametricToneCurve(self ->ContextID, 1, &SingleGamma);
+               }
+
+           default:  // Curve
+
+               if (Count > 0x7FFF)
+                   return NULL; // This is to prevent bad guys for doing bad things
+
+               NewGamma = cmsBuildTabulatedToneCurve16(self ->ContextID, Count, NULL);
+               if (!NewGamma) return NULL;
+
+               if (!_cmsReadUInt16Array(io, Count, NewGamma -> Table16)) {
+                   cmsFreeToneCurve(NewGamma);
+                   return NULL;
+               }
+
+               *nItems = 1;
+               return NewGamma;
+    }
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_Curve_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsToneCurve* Curve = (cmsToneCurve*) Ptr;
+
+    if (Curve ->nSegments == 1 && Curve ->Segments[0].Type == 1) {
+
+            // Single gamma, preserve number
+            cmsUInt16Number SingleGammaFixed = _cmsDoubleTo8Fixed8(Curve ->Segments[0].Params[0]);
+
+            if (!_cmsWriteUInt32Number(io, 1)) return FALSE;
+            if (!_cmsWriteUInt16Number(io, SingleGammaFixed)) return FALSE;
+            return TRUE;
+
+    }
+
+    if (!_cmsWriteUInt32Number(io, Curve ->nEntries)) return FALSE;
+    return _cmsWriteUInt16Array(io, Curve ->nEntries, Curve ->Table16);
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_Curve_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupToneCurve((cmsToneCurve*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_Curve_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsToneCurve* gamma = (cmsToneCurve*) Ptr;
+
+    cmsFreeToneCurve(gamma);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigParametricCurveType
+// ********************************************************************************
+
+
+// Decide which curve type to use on writing
+static
+cmsTagTypeSignature DecideCurveType(cmsFloat64Number ICCVersion, const void *Data)
+{
+    cmsToneCurve* Curve = (cmsToneCurve*) Data;
+
+    if (ICCVersion < 4.0) return cmsSigCurveType;
+    if (Curve ->nSegments != 1) return cmsSigCurveType;          // Only 1-segment curves can be saved as parametric
+    if (Curve ->Segments[0].Type < 0) return cmsSigCurveType;    // Only non-inverted curves
+    if (Curve ->Segments[0].Type > 5) return cmsSigCurveType;    // Only ICC parametric curves
+
+    return cmsSigParametricCurveType;
+}
+
+static
+void *Type_ParametricCurve_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    static const int ParamsByType[] = { 1, 3, 4, 5, 7 };
+    cmsFloat64Number Params[10];
+    cmsUInt16Number Type;
+    int i, n;
+    cmsToneCurve* NewGamma;
+
+    if (!_cmsReadUInt16Number(io, &Type)) return NULL;
+    if (!_cmsReadUInt16Number(io, NULL)) return NULL;   // Reserved
+
+    if (Type > 4) {
+
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown parametric curve type '%d'", Type);
+        return NULL;
+    }
+
+    memset(Params, 0, sizeof(Params));
+    n = ParamsByType[Type];
+
+    for (i=0; i < n; i++) {
+
+        if (!_cmsRead15Fixed16Number(io, &Params[i])) return NULL;
+    }
+
+    NewGamma = cmsBuildParametricToneCurve(self ->ContextID, Type+1, Params);
+
+    *nItems = 1;
+    return NewGamma;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_ParametricCurve_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsToneCurve* Curve = (cmsToneCurve*) Ptr;
+    int i, nParams, typen;
+    static const int ParamsByType[] = { 0, 1, 3, 4, 5, 7 };
+
+    typen = Curve -> Segments[0].Type;
+
+    if (Curve ->nSegments > 1 || typen < 1) {
+
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Multisegment or Inverted parametric curves cannot be written");
+        return FALSE;
+    }
+
+    if (typen > 5) {
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported parametric curve");
+        return FALSE;
+    }
+
+    nParams = ParamsByType[typen];
+
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) (Curve ->Segments[0].Type - 1))) return FALSE;
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;        // Reserved
+
+    for (i=0; i < nParams; i++) {
+
+        if (!_cmsWrite15Fixed16Number(io, Curve -> Segments[0].Params[i])) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_ParametricCurve_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupToneCurve((cmsToneCurve*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_ParametricCurve_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsToneCurve* gamma = (cmsToneCurve*) Ptr;
+
+    cmsFreeToneCurve(gamma);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigDateTimeType
+// ********************************************************************************
+
+// A 12-byte value representation of the time and date, where the byte usage is assigned
+// as specified in table 1. The actual values are encoded as 16-bit unsigned integers
+// (uInt16Number - see 5.1.6).
+//
+// All the dateTimeNumber values in a profile shall be in Coordinated Universal Time
+// (UTC, also known as GMT or ZULU Time). Profile writers are required to convert local
+// time to UTC when setting these values. Programmes that display these values may show
+// the dateTimeNumber as UTC, show the equivalent local time (at current locale), or
+// display both UTC and local versions of the dateTimeNumber.
+
+static
+void *Type_DateTime_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsDateTimeNumber timestamp;
+    struct tm * NewDateTime;
+
+    *nItems = 0;
+    NewDateTime = (struct tm*) _cmsMalloc(self ->ContextID, sizeof(struct tm));
+    if (NewDateTime == NULL) return NULL;
+
+    if (io->Read(io, &timestamp, sizeof(cmsDateTimeNumber), 1) != 1) return NULL;
+
+     _cmsDecodeDateTimeNumber(&timestamp, NewDateTime);
+
+     *nItems = 1;
+     return NewDateTime;
+
+     cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_DateTime_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    struct tm * DateTime = (struct tm*) Ptr;
+    cmsDateTimeNumber timestamp;
+
+    _cmsEncodeDateTimeNumber(&timestamp, DateTime);
+    if (!io ->Write(io, sizeof(cmsDateTimeNumber), &timestamp)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_DateTime_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return _cmsDupMem(self ->ContextID, Ptr, sizeof(struct tm));
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_DateTime_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+
+// ********************************************************************************
+// Type icMeasurementType
+// ********************************************************************************
+
+/*
+The measurementType information refers only to the internal profile data and is
+meant to provide profile makers an alternative to the default measurement
+specifications.
+*/
+
+static
+void *Type_Measurement_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsICCMeasurementConditions mc;
+
+	
+    memset(&mc, 0, sizeof(mc));
+	
+    if (!_cmsReadUInt32Number(io, &mc.Observer)) return NULL;
+    if (!_cmsReadXYZNumber(io,    &mc.Backing)) return NULL;
+    if (!_cmsReadUInt32Number(io, &mc.Geometry)) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &mc.Flare)) return NULL;
+    if (!_cmsReadUInt32Number(io, &mc.IlluminantType)) return NULL;
+
+    *nItems = 1;
+    return _cmsDupMem(self ->ContextID, &mc, sizeof(cmsICCMeasurementConditions));
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool  Type_Measurement_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsICCMeasurementConditions* mc =(cmsICCMeasurementConditions*) Ptr;
+
+    if (!_cmsWriteUInt32Number(io, mc->Observer)) return FALSE;
+    if (!_cmsWriteXYZNumber(io,    &mc->Backing)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, mc->Geometry)) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, mc->Flare)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, mc->IlluminantType)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_Measurement_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+     return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsICCMeasurementConditions));
+
+     cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_Measurement_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigMultiLocalizedUnicodeType
+// ********************************************************************************
+//
+//   Do NOT trust SizeOfTag as there is an issue on the definition of profileSequenceDescTag. See the TechNote from
+//   Max Derhak and Rohit Patil about this: basically the size of the string table should be guessed and cannot be
+//   taken from the size of tag if this tag is embedded as part of bigger structures (profileSequenceDescTag, for instance)
+//
+
+static
+void *Type_MLU_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsMLU* mlu;
+    cmsUInt32Number Count, RecLen, NumOfWchar;
+    cmsUInt32Number SizeOfHeader;
+    cmsUInt32Number  Len, Offset;
+    cmsUInt32Number  i;
+    wchar_t*         Block;
+    cmsUInt32Number  BeginOfThisString, EndOfThisString, LargestPosition;
+
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    if (!_cmsReadUInt32Number(io, &RecLen)) return NULL;
+
+    if (RecLen != 12) {
+
+        cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "multiLocalizedUnicodeType of len != 12 is not supported.");
+        return NULL;
+    }
+
+    mlu = cmsMLUalloc(self ->ContextID, Count);
+    if (mlu == NULL) return NULL;
+
+    mlu ->UsedEntries = Count;
+
+    SizeOfHeader = 12 * Count + sizeof(_cmsTagBase);
+    LargestPosition = 0;
+
+    for (i=0; i < Count; i++) {
+
+        if (!_cmsReadUInt16Number(io, &mlu ->Entries[i].Language)) goto Error;
+        if (!_cmsReadUInt16Number(io, &mlu ->Entries[i].Country))  goto Error;
+
+        // Now deal with Len and offset.
+        if (!_cmsReadUInt32Number(io, &Len)) goto Error;
+        if (!_cmsReadUInt32Number(io, &Offset)) goto Error;
+
+        // Check for overflow
+        if (Offset < (SizeOfHeader + 8)) goto Error;        
+        if (((Offset + Len) < Len) || ((Offset + Len) > SizeOfTag + 8)) goto Error;
+
+        // True begin of the string
+        BeginOfThisString = Offset - SizeOfHeader - 8;
+
+        // Ajust to wchar_t elements
+        mlu ->Entries[i].Len = (Len * sizeof(wchar_t)) / sizeof(cmsUInt16Number);
+        mlu ->Entries[i].StrW = (BeginOfThisString * sizeof(wchar_t)) / sizeof(cmsUInt16Number);
+
+        // To guess maximum size, add offset + len
+        EndOfThisString = BeginOfThisString + Len;
+        if (EndOfThisString > LargestPosition)
+            LargestPosition = EndOfThisString;
+    }
+
+    // Now read the remaining of tag and fill all strings. Subtract the directory
+    SizeOfTag   = (LargestPosition * sizeof(wchar_t)) / sizeof(cmsUInt16Number);
+    if (SizeOfTag == 0)
+    {
+        Block = NULL;
+        NumOfWchar = 0;
+
+    }
+    else
+    {
+        Block = (wchar_t*) _cmsMalloc(self ->ContextID, SizeOfTag);
+        if (Block == NULL) goto Error;
+        NumOfWchar = SizeOfTag / sizeof(wchar_t);
+        if (!_cmsReadWCharArray(io, NumOfWchar, Block)) goto Error;
+    }
+
+    mlu ->MemPool  = Block;
+    mlu ->PoolSize = SizeOfTag;
+    mlu ->PoolUsed = SizeOfTag;
+
+    *nItems = 1;
+    return (void*) mlu;
+
+Error:
+    if (mlu) cmsMLUfree(mlu);
+    return NULL;
+}
+
+static
+cmsBool  Type_MLU_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsMLU* mlu =(cmsMLU*) Ptr;
+    cmsUInt32Number HeaderSize;
+    cmsUInt32Number  Len, Offset;
+    cmsUInt32Number i;
+
+    if (Ptr == NULL) {
+
+          // Empty placeholder
+          if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+          if (!_cmsWriteUInt32Number(io, 12)) return FALSE;
+          return TRUE;
+    }
+
+    if (!_cmsWriteUInt32Number(io, mlu ->UsedEntries)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 12)) return FALSE;
+
+    HeaderSize = 12 * mlu ->UsedEntries + sizeof(_cmsTagBase);
+
+    for (i=0; i < mlu ->UsedEntries; i++) {
+
+        Len    =  mlu ->Entries[i].Len;
+        Offset =  mlu ->Entries[i].StrW;
+
+        Len    = (Len * sizeof(cmsUInt16Number)) / sizeof(wchar_t);
+        Offset = (Offset * sizeof(cmsUInt16Number)) / sizeof(wchar_t) + HeaderSize + 8;
+
+        if (!_cmsWriteUInt16Number(io, mlu ->Entries[i].Language)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, mlu ->Entries[i].Country))  return FALSE;
+        if (!_cmsWriteUInt32Number(io, Len)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, Offset)) return FALSE;
+    }
+
+    if (!_cmsWriteWCharArray(io, mlu ->PoolUsed / sizeof(wchar_t), (wchar_t*)  mlu ->MemPool)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_MLU_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_MLU_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsMLUfree((cmsMLU*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigLut8Type
+// ********************************************************************************
+
+// Decide which LUT type to use on writing
+static
+cmsTagTypeSignature DecideLUTtypeA2B(cmsFloat64Number ICCVersion, const void *Data)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Data;
+
+    if (ICCVersion < 4.0) {
+        if (Lut ->SaveAs8Bits) return cmsSigLut8Type;
+        return cmsSigLut16Type;
+    }
+    else {
+         return cmsSigLutAtoBType;
+    }
+}
+
+static
+cmsTagTypeSignature DecideLUTtypeB2A(cmsFloat64Number ICCVersion, const void *Data)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Data;
+
+    if (ICCVersion < 4.0) {
+        if (Lut ->SaveAs8Bits) return cmsSigLut8Type;
+        return cmsSigLut16Type;
+    }
+    else {
+         return cmsSigLutBtoAType;
+    }
+}
+
+/*
+This structure represents a colour transform using tables of 8-bit precision.
+This type contains four processing elements: a 3 by 3 matrix (which shall be
+the identity matrix unless the input colour space is XYZ), a set of one dimensional
+input tables, a multidimensional lookup table, and a set of one dimensional output
+tables. Data is processed using these elements via the following sequence:
+(matrix) -> (1d input tables)  -> (multidimensional lookup table - CLUT) -> (1d output tables)
+
+Byte Position   Field Length (bytes)  Content Encoded as...
+8                  1          Number of Input Channels (i)    uInt8Number
+9                  1          Number of Output Channels (o)   uInt8Number
+10                 1          Number of CLUT grid points (identical for each side) (g) uInt8Number
+11                 1          Reserved for padding (fill with 00h)
+
+12..15             4          Encoded e00 parameter   s15Fixed16Number
+*/
+
+
+// Read 8 bit tables as gamma functions
+static
+cmsBool  Read8bitTables(cmsContext ContextID, cmsIOHANDLER* io, cmsPipeline* lut, cmsUInt32Number nChannels)
+{
+    cmsUInt8Number* Temp = NULL;
+    cmsUInt32Number i, j;
+    cmsToneCurve* Tables[cmsMAXCHANNELS];
+
+    if (nChannels > cmsMAXCHANNELS) return FALSE;
+    if (nChannels <= 0) return FALSE;
+
+    memset(Tables, 0, sizeof(Tables));
+
+    Temp = (cmsUInt8Number*) _cmsMalloc(ContextID, 256);
+    if (Temp == NULL) return FALSE;
+
+    for (i=0; i < nChannels; i++) {
+        Tables[i] = cmsBuildTabulatedToneCurve16(ContextID, 256, NULL);
+        if (Tables[i] == NULL) goto Error;
+    }
+
+    for (i=0; i < nChannels; i++) {
+
+        if (io ->Read(io, Temp, 256, 1) != 1) goto Error;
+
+        for (j=0; j < 256; j++)
+            Tables[i]->Table16[j] = (cmsUInt16Number) FROM_8_TO_16(Temp[j]);
+    }
+
+    _cmsFree(ContextID, Temp);
+    Temp = NULL;
+
+    if (!cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, nChannels, Tables)))
+        goto Error;
+
+    for (i=0; i < nChannels; i++)
+        cmsFreeToneCurve(Tables[i]);
+
+    return TRUE;
+
+Error:
+    for (i=0; i < nChannels; i++) {
+        if (Tables[i]) cmsFreeToneCurve(Tables[i]);
+    }
+
+    if (Temp) _cmsFree(ContextID, Temp);
+    return FALSE;
+}
+
+
+static
+cmsBool Write8bitTables(cmsContext ContextID, cmsIOHANDLER* io, cmsUInt32Number n, _cmsStageToneCurvesData* Tables)
+{
+    int j;
+    cmsUInt32Number i;
+    cmsUInt8Number val;
+
+    for (i=0; i < n; i++) {
+
+        if (Tables) {
+
+            // Usual case of identity curves
+            if ((Tables ->TheCurves[i]->nEntries == 2) && 
+                (Tables->TheCurves[i]->Table16[0] == 0) && 
+                (Tables->TheCurves[i]->Table16[1] == 65535)) {
+
+                    for (j=0; j < 256; j++) {
+                        if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) j)) return FALSE;
+                    }
+            }
+            else 
+                if (Tables ->TheCurves[i]->nEntries != 256) {
+                    cmsSignalError(ContextID, cmsERROR_RANGE, "LUT8 needs 256 entries on prelinearization");
+                    return FALSE;                
+                }
+                else
+                    for (j=0; j < 256; j++) {
+
+                        val = (cmsUInt8Number) FROM_16_TO_8(Tables->TheCurves[i]->Table16[j]);
+
+                        if (!_cmsWriteUInt8Number(io, val)) return FALSE;
+                    }
+        }
+    }
+    return TRUE;
+}
+
+
+// Check overflow
+static
+cmsUInt32Number uipow(cmsUInt32Number n, cmsUInt32Number a, cmsUInt32Number b)
+{
+    cmsUInt32Number rv = 1, rc;
+
+    if (a == 0) return 0;
+    if (n == 0) return 0;
+
+    for (; b > 0; b--) {
+
+        rv *= a;
+
+        // Check for overflow
+        if (rv > UINT_MAX / a) return (cmsUInt32Number) -1;
+
+    }
+
+    rc = rv * n;
+
+    if (rv != rc / n) return (cmsUInt32Number) -1;
+    return rc;
+}
+
+
+// That will create a MPE LUT with Matrix, pre tables, CLUT and post tables.
+// 8 bit lut may be scaled easely to v4 PCS, but we need also to properly adjust
+// PCS on BToAxx tags and AtoB if abstract. We need to fix input direction.
+
+static
+void *Type_LUT8_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number InputChannels, OutputChannels, CLUTpoints;
+    cmsUInt8Number* Temp = NULL;
+    cmsPipeline* NewLUT = NULL;
+    cmsUInt32Number nTabSize, i;
+    cmsFloat64Number Matrix[3*3];
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt8Number(io, &InputChannels)) goto Error;
+    if (!_cmsReadUInt8Number(io, &OutputChannels)) goto Error;
+    if (!_cmsReadUInt8Number(io, &CLUTpoints)) goto Error;
+
+     if (CLUTpoints == 1) goto Error; // Impossible value, 0 for no CLUT and then 2 at least
+
+    // Padding
+    if (!_cmsReadUInt8Number(io, NULL)) goto Error;
+
+    // Do some checking
+    if (InputChannels == 0 || InputChannels > cmsMAXCHANNELS)  goto Error;
+    if (OutputChannels == 0 || OutputChannels > cmsMAXCHANNELS) goto Error;
+
+   // Allocates an empty Pipeline
+    NewLUT = cmsPipelineAlloc(self ->ContextID, InputChannels, OutputChannels);
+    if (NewLUT == NULL) goto Error;
+
+    // Read the Matrix
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[0])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[1])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[2])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[3])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[4])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[5])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[6])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[7])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[8])) goto Error;
+
+
+    // Only operates if not identity...
+    if ((InputChannels == 3) && !_cmsMAT3isIdentity((cmsMAT3*) Matrix)) {
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_BEGIN, cmsStageAllocMatrix(self ->ContextID, 3, 3, Matrix, NULL)))
+            goto Error;
+    }
+
+    // Get input tables
+    if (!Read8bitTables(self ->ContextID, io,  NewLUT, InputChannels)) goto Error;
+
+    // Get 3D CLUT. Check the overflow....
+    nTabSize = uipow(OutputChannels, CLUTpoints, InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) goto Error;
+    if (nTabSize > 0) {
+
+        cmsUInt16Number *PtrW, *T;
+       
+        PtrW = T  = (cmsUInt16Number*) _cmsCalloc(self ->ContextID, nTabSize, sizeof(cmsUInt16Number));
+        if (T  == NULL) goto Error;
+
+        Temp = (cmsUInt8Number*) _cmsMalloc(self ->ContextID, nTabSize);
+        if (Temp == NULL) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+
+        if (io ->Read(io, Temp, nTabSize, 1) != 1) {
+            _cmsFree(self ->ContextID, T);
+            _cmsFree(self ->ContextID, Temp);
+            goto Error;
+        }
+
+        for (i = 0; i < nTabSize; i++) {
+
+            *PtrW++ = FROM_8_TO_16(Temp[i]);
+        }
+        _cmsFree(self ->ContextID, Temp);
+        Temp = NULL;
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, cmsStageAllocCLut16bit(self ->ContextID, CLUTpoints, InputChannels, OutputChannels, T))) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+        _cmsFree(self ->ContextID, T);
+    }
+
+
+    // Get output tables
+    if (!Read8bitTables(self ->ContextID, io,  NewLUT, OutputChannels)) goto Error;
+
+    *nItems = 1;
+    return NewLUT;
+
+Error:
+    if (NewLUT != NULL) cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// We only allow a specific MPE structure: Matrix plus prelin, plus clut, plus post-lin.
+static
+cmsBool  Type_LUT8_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number j, nTabSize;
+    cmsUInt8Number  val;
+    cmsPipeline* NewLUT = (cmsPipeline*) Ptr;
+    cmsStage* mpe;
+    _cmsStageToneCurvesData* PreMPE = NULL, *PostMPE = NULL;
+    _cmsStageMatrixData* MatMPE = NULL;
+    _cmsStageCLutData* clut = NULL;
+    cmsUInt32Number clutPoints;
+
+    // Disassemble the LUT into components.
+    mpe = NewLUT -> Elements;
+    if (mpe ->Type == cmsSigMatrixElemType) {
+
+        MatMPE = (_cmsStageMatrixData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PreMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCLutElemType) {
+        clut  = (_cmsStageCLutData*) mpe -> Data;
+        mpe = mpe ->Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PostMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    // That should be all
+    if (mpe != NULL) {
+        cmsSignalError(mpe->ContextID, cmsERROR_UNKNOWN_EXTENSION, "LUT is not suitable to be saved as LUT8");
+        return FALSE;
+    }
+
+
+    if (clut == NULL)
+        clutPoints = 0;
+    else
+        clutPoints    = clut->Params->nSamples[0];
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) NewLUT ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) NewLUT ->OutputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) clutPoints)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE; // Padding
+
+
+    if (MatMPE != NULL) {
+
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[0])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[1])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[2])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[3])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[4])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[5])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[6])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[7])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[8])) return FALSE;
+
+    }
+    else {
+
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+    }
+
+    // The prelinearization table
+    if (!Write8bitTables(self ->ContextID, io, NewLUT ->InputChannels, PreMPE)) return FALSE;
+
+    nTabSize = uipow(NewLUT->OutputChannels, clutPoints, NewLUT ->InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) return FALSE;
+    if (nTabSize > 0) {
+
+        // The 3D CLUT.
+        if (clut != NULL) {
+
+            for (j=0; j < nTabSize; j++) {
+
+                val = (cmsUInt8Number) FROM_16_TO_8(clut ->Tab.T[j]);
+                if (!_cmsWriteUInt8Number(io, val)) return FALSE;
+            }
+        }
+    }
+
+    // The postlinearization table
+    if (!Write8bitTables(self ->ContextID, io, NewLUT ->OutputChannels, PostMPE)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_LUT8_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUT8_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+// ********************************************************************************
+// Type cmsSigLut16Type
+// ********************************************************************************
+
+// Read 16 bit tables as gamma functions
+static
+cmsBool  Read16bitTables(cmsContext ContextID, cmsIOHANDLER* io, cmsPipeline* lut, 
+                                    cmsUInt32Number nChannels, cmsUInt32Number nEntries)
+{
+    cmsUInt32Number i;
+    cmsToneCurve* Tables[cmsMAXCHANNELS];
+
+    // Maybe an empty table? (this is a lcms extension)
+    if (nEntries <= 0) return TRUE;
+
+    // Check for malicious profiles
+    if (nEntries < 2) return FALSE;
+    if (nChannels > cmsMAXCHANNELS) return FALSE;
+
+    // Init table to zero
+    memset(Tables, 0, sizeof(Tables));
+
+    for (i=0; i < nChannels; i++) {
+
+        Tables[i] = cmsBuildTabulatedToneCurve16(ContextID, nEntries, NULL);
+        if (Tables[i] == NULL) goto Error;
+
+        if (!_cmsReadUInt16Array(io, nEntries, Tables[i]->Table16)) goto Error;
+    }
+
+
+    // Add the table (which may certainly be an identity, but this is up to the optimizer, not the reading code)
+    if (!cmsPipelineInsertStage(lut, cmsAT_END, cmsStageAllocToneCurves(ContextID, nChannels, Tables)))
+        goto Error;
+
+    for (i=0; i < nChannels; i++)
+        cmsFreeToneCurve(Tables[i]);
+
+    return TRUE;
+
+Error:
+    for (i=0; i < nChannels; i++) {
+        if (Tables[i]) cmsFreeToneCurve(Tables[i]);
+    }
+
+    return FALSE;
+}
+
+static
+cmsBool Write16bitTables(cmsContext ContextID, cmsIOHANDLER* io, _cmsStageToneCurvesData* Tables)
+{
+    cmsUInt32Number j;
+    cmsUInt32Number i;
+    cmsUInt16Number val;
+    cmsUInt32Number nEntries;
+
+    _cmsAssert(Tables != NULL);
+
+    nEntries = Tables->TheCurves[0]->nEntries;
+
+    for (i=0; i < Tables ->nCurves; i++) {
+
+        for (j=0; j < nEntries; j++) {
+
+            val = Tables->TheCurves[i]->Table16[j];        
+            if (!_cmsWriteUInt16Number(io, val)) return FALSE;
+        }
+    }
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(ContextID);
+}
+
+static
+void *Type_LUT16_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number InputChannels, OutputChannels, CLUTpoints;
+    cmsPipeline* NewLUT = NULL;
+    cmsUInt32Number nTabSize;
+    cmsFloat64Number Matrix[3*3];
+    cmsUInt16Number InputEntries, OutputEntries;
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt8Number(io, &InputChannels)) return NULL;
+    if (!_cmsReadUInt8Number(io, &OutputChannels)) return NULL;
+    if (!_cmsReadUInt8Number(io, &CLUTpoints)) return NULL;   // 255 maximum
+
+    // Padding
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+
+    // Do some checking
+    if (InputChannels == 0 || InputChannels > cmsMAXCHANNELS)  goto Error;
+    if (OutputChannels == 0 || OutputChannels > cmsMAXCHANNELS) goto Error;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, InputChannels, OutputChannels);
+    if (NewLUT == NULL) goto Error;
+
+    // Read the Matrix
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[0])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[1])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[2])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[3])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[4])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[5])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[6])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[7])) goto Error;
+    if (!_cmsRead15Fixed16Number(io,  &Matrix[8])) goto Error;
+
+
+    // Only operates on 3 channels
+    if ((InputChannels == 3) && !_cmsMAT3isIdentity((cmsMAT3*) Matrix)) {
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, cmsStageAllocMatrix(self ->ContextID, 3, 3, Matrix, NULL)))
+            goto Error;
+    }
+
+    if (!_cmsReadUInt16Number(io, &InputEntries)) goto Error;
+    if (!_cmsReadUInt16Number(io, &OutputEntries)) goto Error;
+
+    if (InputEntries > 0x7FFF || OutputEntries > 0x7FFF) goto Error;
+    if (CLUTpoints == 1) goto Error; // Impossible value, 0 for no CLUT and then 2 at least
+
+    // Get input tables
+    if (!Read16bitTables(self ->ContextID, io,  NewLUT, InputChannels, InputEntries)) goto Error;
+
+    // Get 3D CLUT
+    nTabSize = uipow(OutputChannels, CLUTpoints, InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) goto Error;
+    if (nTabSize > 0) {
+
+        cmsUInt16Number *T;
+
+        T  = (cmsUInt16Number*) _cmsCalloc(self ->ContextID, nTabSize, sizeof(cmsUInt16Number));
+        if (T  == NULL) goto Error;
+
+        if (!_cmsReadUInt16Array(io, nTabSize, T)) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, cmsStageAllocCLut16bit(self ->ContextID, CLUTpoints, InputChannels, OutputChannels, T))) {
+            _cmsFree(self ->ContextID, T);
+            goto Error;
+        }
+        _cmsFree(self ->ContextID, T);
+    }
+
+
+    // Get output tables
+    if (!Read16bitTables(self ->ContextID, io,  NewLUT, OutputChannels, OutputEntries)) goto Error;
+
+    *nItems = 1;
+    return NewLUT;
+
+Error:
+    if (NewLUT != NULL) cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// We only allow some specific MPE structures: Matrix plus prelin, plus clut, plus post-lin.
+// Some empty defaults are created for missing parts
+
+static
+cmsBool  Type_LUT16_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number nTabSize;
+    cmsPipeline* NewLUT = (cmsPipeline*) Ptr;
+    cmsStage* mpe;
+    _cmsStageToneCurvesData* PreMPE = NULL, *PostMPE = NULL;
+    _cmsStageMatrixData* MatMPE = NULL;
+    _cmsStageCLutData* clut = NULL;
+    cmsUInt32Number i, InputChannels, OutputChannels, clutPoints;
+
+    // Disassemble the LUT into components.
+    mpe = NewLUT -> Elements;
+    if (mpe != NULL && mpe ->Type == cmsSigMatrixElemType) {
+
+        MatMPE = (_cmsStageMatrixData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PreMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCLutElemType) {
+        clut  = (_cmsStageCLutData*) mpe -> Data;
+        mpe = mpe ->Next;
+    }
+
+    if (mpe != NULL && mpe ->Type == cmsSigCurveSetElemType) {
+        PostMPE = (_cmsStageToneCurvesData*) mpe ->Data;
+        mpe = mpe -> Next;
+    }
+
+    // That should be all
+    if (mpe != NULL) {
+        cmsSignalError(mpe->ContextID, cmsERROR_UNKNOWN_EXTENSION, "LUT is not suitable to be saved as LUT16");
+        return FALSE;
+    }
+
+    InputChannels  = cmsPipelineInputChannels(NewLUT);
+    OutputChannels = cmsPipelineOutputChannels(NewLUT);
+
+    if (clut == NULL)
+        clutPoints = 0;
+    else
+        clutPoints    = clut->Params->nSamples[0];
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) InputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) OutputChannels)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) clutPoints)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE; // Padding
+
+
+    if (MatMPE != NULL) {
+
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[0])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[1])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[2])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[3])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[4])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[5])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[6])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[7])) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, MatMPE -> Double[8])) return FALSE;
+    }
+    else {
+
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 1)) return FALSE;
+    }
+
+
+    if (PreMPE != NULL) {
+        if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) PreMPE ->TheCurves[0]->nEntries)) return FALSE;
+    } else {
+            if (!_cmsWriteUInt16Number(io, 2)) return FALSE;
+    }
+
+    if (PostMPE != NULL) {
+        if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) PostMPE ->TheCurves[0]->nEntries)) return FALSE;
+    } else {
+        if (!_cmsWriteUInt16Number(io, 2)) return FALSE;
+
+    }
+
+    // The prelinearization table
+
+    if (PreMPE != NULL) {
+        if (!Write16bitTables(self ->ContextID, io, PreMPE)) return FALSE;
+    }
+    else {
+        for (i=0; i < InputChannels; i++) {
+
+            if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+            if (!_cmsWriteUInt16Number(io, 0xffff)) return FALSE;
+        }
+    }
+
+    nTabSize = uipow(OutputChannels, clutPoints, InputChannels);
+    if (nTabSize == (cmsUInt32Number) -1) return FALSE;
+    if (nTabSize > 0) {
+        // The 3D CLUT.
+        if (clut != NULL) {
+            if (!_cmsWriteUInt16Array(io, nTabSize, clut->Tab.T)) return FALSE;
+        }
+    }
+
+    // The postlinearization table
+    if (PostMPE != NULL) {
+        if (!Write16bitTables(self ->ContextID, io, PostMPE)) return FALSE;
+    }
+    else {
+        for (i=0; i < OutputChannels; i++) {
+
+            if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+            if (!_cmsWriteUInt16Number(io, 0xffff)) return FALSE;
+        }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_LUT16_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUT16_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigLutAToBType
+// ********************************************************************************
+
+
+// V4 stuff. Read matrix for LutAtoB and LutBtoA
+
+static
+cmsStage* ReadMatrix(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number Offset)
+{
+    cmsFloat64Number dMat[3*3];
+    cmsFloat64Number dOff[3];
+    cmsStage* Mat;
+
+    // Go to address
+    if (!io -> Seek(io, Offset)) return NULL;
+
+    // Read the Matrix
+    if (!_cmsRead15Fixed16Number(io, &dMat[0])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[1])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[2])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[3])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[4])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[5])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[6])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[7])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dMat[8])) return NULL;
+
+    if (!_cmsRead15Fixed16Number(io, &dOff[0])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dOff[1])) return NULL;
+    if (!_cmsRead15Fixed16Number(io, &dOff[2])) return NULL;
+
+    Mat = cmsStageAllocMatrix(self ->ContextID, 3, 3, dMat, dOff);
+
+     return Mat;
+}
+
+
+
+
+//  V4 stuff. Read CLUT part for LutAtoB and LutBtoA
+
+static
+cmsStage* ReadCLUT(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, 
+                   cmsUInt32Number Offset, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels)
+{
+    cmsUInt8Number  gridPoints8[cmsMAXCHANNELS]; // Number of grid points in each dimension.
+    cmsUInt32Number GridPoints[cmsMAXCHANNELS], i;
+    cmsUInt8Number  Precision;
+    cmsStage* CLUT;
+    _cmsStageCLutData* Data;
+
+    if (!io -> Seek(io, Offset)) return NULL;
+    if (io -> Read(io, gridPoints8, cmsMAXCHANNELS, 1) != 1) return NULL;
+
+
+    for (i=0; i < cmsMAXCHANNELS; i++) {
+
+        if (gridPoints8[i] == 1) return NULL; // Impossible value, 0 for no CLUT and then 2 at least
+        GridPoints[i] = gridPoints8[i];
+    }
+
+    if (!_cmsReadUInt8Number(io, &Precision)) return NULL;
+
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+    if (!_cmsReadUInt8Number(io, NULL)) return NULL;
+
+    CLUT = cmsStageAllocCLut16bitGranular(self ->ContextID, GridPoints, InputChannels, OutputChannels, NULL);
+    if (CLUT == NULL) return NULL;
+
+    Data = (_cmsStageCLutData*) CLUT ->Data;
+
+    // Precision can be 1 or 2 bytes
+    if (Precision == 1) {
+
+        cmsUInt8Number  v;
+
+        for (i=0; i < Data ->nEntries; i++) {
+
+            if (io ->Read(io, &v, sizeof(cmsUInt8Number), 1) != 1) {
+                cmsStageFree(CLUT);
+                return NULL;
+            }
+            Data ->Tab.T[i] = FROM_8_TO_16(v);
+        }
+
+    }
+    else
+        if (Precision == 2) {
+
+            if (!_cmsReadUInt16Array(io, Data->nEntries, Data ->Tab.T)) {
+                cmsStageFree(CLUT);
+                return NULL;
+            }
+        }
+        else {
+            cmsStageFree(CLUT);
+            cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown precision of '%d'", Precision);
+            return NULL;
+        }
+
+        return CLUT;
+}
+
+static
+cmsToneCurve* ReadEmbeddedCurve(struct _cms_typehandler_struct* self, cmsIOHANDLER* io)
+{
+    cmsTagTypeSignature  BaseType;
+    cmsUInt32Number nItems;
+
+    BaseType = _cmsReadTypeBase(io);
+    switch (BaseType) {
+
+            case cmsSigCurveType:
+                return (cmsToneCurve*) Type_Curve_Read(self, io, &nItems, 0);
+
+            case cmsSigParametricCurveType:
+                return (cmsToneCurve*) Type_ParametricCurve_Read(self, io, &nItems, 0);
+
+            default:
+                {
+                    char String[5];
+
+                    _cmsTagSignature2String(String, (cmsTagSignature) BaseType);
+                    cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown curve type '%s'", String);
+                }
+                return NULL;
+    }
+}
+
+
+// Read a set of curves from specific offset
+static
+cmsStage* ReadSetOfCurves(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number Offset, cmsUInt32Number nCurves)
+{
+    cmsToneCurve* Curves[cmsMAXCHANNELS];
+    cmsUInt32Number i;
+    cmsStage* Lin = NULL;
+
+    if (nCurves > cmsMAXCHANNELS) return FALSE;
+
+    if (!io -> Seek(io, Offset)) return FALSE;
+
+    for (i=0; i < nCurves; i++)
+        Curves[i] = NULL;
+
+    for (i=0; i < nCurves; i++) {
+
+        Curves[i] = ReadEmbeddedCurve(self, io);
+        if (Curves[i] == NULL) goto Error;
+        if (!_cmsReadAlignment(io)) goto Error;
+
+    }
+
+    Lin = cmsStageAllocToneCurves(self ->ContextID, nCurves, Curves);
+
+Error:
+    for (i=0; i < nCurves; i++)
+        cmsFreeToneCurve(Curves[i]);
+
+    return Lin;
+}
+
+
+// LutAtoB type
+
+// This structure represents a colour transform. The type contains up to five processing
+// elements which are stored in the AtoBTag tag in the following order: a set of one
+// dimensional curves, a 3 by 3 matrix with offset terms, a set of one dimensional curves,
+// a multidimensional lookup table, and a set of one dimensional output curves.
+// Data are processed using these elements via the following sequence:
+//
+//("A" curves) -> (multidimensional lookup table - CLUT) -> ("M" curves) -> (matrix) -> ("B" curves).
+//
+/*
+It is possible to use any or all of these processing elements. At least one processing element
+must be included.Only the following combinations are allowed:
+
+B
+M - Matrix - B
+A - CLUT - B
+A - CLUT - M - Matrix - B
+
+*/
+
+static
+void* Type_LUTA2B_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number      BaseOffset;
+    cmsUInt8Number       inputChan;      // Number of input channels
+    cmsUInt8Number       outputChan;     // Number of output channels
+    cmsUInt32Number      offsetB;        // Offset to first "B" curve
+    cmsUInt32Number      offsetMat;      // Offset to matrix
+    cmsUInt32Number      offsetM;        // Offset to first "M" curve
+    cmsUInt32Number      offsetC;        // Offset to CLUT
+    cmsUInt32Number      offsetA;        // Offset to first "A" curve
+    cmsPipeline* NewLUT = NULL;
+
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!_cmsReadUInt8Number(io, &inputChan)) return NULL;
+    if (!_cmsReadUInt8Number(io, &outputChan)) return NULL;
+
+    if (!_cmsReadUInt16Number(io, NULL)) return NULL;
+
+    if (!_cmsReadUInt32Number(io, &offsetB)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetMat)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetM)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetC)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetA)) return NULL;
+
+    if (inputChan == 0 || inputChan >= cmsMAXCHANNELS) return NULL;
+    if (outputChan == 0 || outputChan >= cmsMAXCHANNELS) return NULL;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, inputChan, outputChan);
+    if (NewLUT == NULL) return NULL;
+
+    if (offsetA!= 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetA, inputChan)))
+            goto Error;
+    }
+
+    if (offsetC != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadCLUT(self, io, BaseOffset + offsetC, inputChan, outputChan)))
+            goto Error;
+    }
+
+    if (offsetM != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetM, outputChan)))
+            goto Error;
+    }
+
+    if (offsetMat != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadMatrix(self, io, BaseOffset + offsetMat)))
+            goto Error;
+    }
+
+    if (offsetB != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetB, outputChan)))
+            goto Error;
+    }
+
+    *nItems = 1;
+    return NewLUT;
+Error:
+    cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// Write a set of curves
+static
+cmsBool  WriteMatrix(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsStage* mpe)
+{
+    _cmsStageMatrixData* m = (_cmsStageMatrixData*) mpe -> Data;
+
+    // Write the Matrix
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[0])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[1])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[2])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[3])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[4])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[5])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[6])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[7])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Double[8])) return FALSE;
+
+    if (m ->Offset != NULL) {
+
+    if (!_cmsWrite15Fixed16Number(io, m -> Offset[0])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Offset[1])) return FALSE;
+    if (!_cmsWrite15Fixed16Number(io, m -> Offset[2])) return FALSE;
+    }
+    else {
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, 0)) return FALSE;
+
+    }
+
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// Write a set of curves
+static
+cmsBool WriteSetOfCurves(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsTagTypeSignature Type, cmsStage* mpe)
+{
+    cmsUInt32Number i, n;
+    cmsTagTypeSignature CurrentType;
+    cmsToneCurve** Curves;
+
+
+    n      = cmsStageOutputChannels(mpe);
+    Curves = _cmsStageGetPtrToCurveSet(mpe);
+
+    for (i=0; i < n; i++) {
+
+        // If this is a table-based curve, use curve type even on V4
+        CurrentType = Type;
+
+        if ((Curves[i] ->nSegments == 0)||
+            ((Curves[i]->nSegments == 2) && (Curves[i] ->Segments[1].Type == 0)) )
+            CurrentType = cmsSigCurveType;
+        else
+        if (Curves[i] ->Segments[0].Type < 0)
+            CurrentType = cmsSigCurveType;
+
+        if (!_cmsWriteTypeBase(io, CurrentType)) return FALSE;
+
+        switch (CurrentType) {
+
+            case cmsSigCurveType:
+                if (!Type_Curve_Write(self, io, Curves[i], 1)) return FALSE;
+                break;
+
+            case cmsSigParametricCurveType:
+                if (!Type_ParametricCurve_Write(self, io, Curves[i], 1)) return FALSE;
+                break;
+
+            default:
+                {
+                    char String[5];
+
+                    _cmsTagSignature2String(String, (cmsTagSignature) Type);
+                    cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown curve type '%s'", String);
+                }
+                return FALSE;
+        }
+
+        if (!_cmsWriteAlignment(io)) return FALSE;
+    }
+
+
+    return TRUE;
+}
+
+
+static
+cmsBool WriteCLUT(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt8Number  Precision, cmsStage* mpe)
+{
+    cmsUInt8Number  gridPoints[cmsMAXCHANNELS]; // Number of grid points in each dimension.
+    cmsUInt32Number i;
+    _cmsStageCLutData* CLUT = ( _cmsStageCLutData*) mpe -> Data;
+
+    if (CLUT ->HasFloatValues) {
+         cmsSignalError(self ->ContextID, cmsERROR_NOT_SUITABLE, "Cannot save floating point data, CLUT are 8 or 16 bit only");
+         return FALSE;
+    }
+
+    memset(gridPoints, 0, sizeof(gridPoints));
+    for (i=0; i < (cmsUInt32Number) CLUT ->Params ->nInputs; i++)
+        gridPoints[i] = (cmsUInt8Number) CLUT ->Params ->nSamples[i];
+
+    if (!io -> Write(io, cmsMAXCHANNELS*sizeof(cmsUInt8Number), gridPoints)) return FALSE;
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) Precision)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, 0)) return FALSE;
+
+    // Precision can be 1 or 2 bytes
+    if (Precision == 1) {
+
+        for (i=0; i < CLUT->nEntries; i++) {
+
+            if (!_cmsWriteUInt8Number(io, FROM_16_TO_8(CLUT->Tab.T[i]))) return FALSE;
+        }
+    }
+    else
+        if (Precision == 2) {
+
+            if (!_cmsWriteUInt16Array(io, CLUT->nEntries, CLUT ->Tab.T)) return FALSE;
+        }
+        else {
+             cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown precision of '%d'", Precision);
+            return FALSE;
+        }
+
+        if (!_cmsWriteAlignment(io)) return FALSE;
+
+        return TRUE;
+}
+
+
+
+
+static
+cmsBool Type_LUTA2B_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Ptr;
+    cmsUInt32Number inputChan, outputChan;
+    cmsStage *A = NULL, *B = NULL, *M = NULL;
+    cmsStage * Matrix = NULL;
+    cmsStage * CLUT = NULL;
+    cmsUInt32Number offsetB = 0, offsetMat = 0, offsetM = 0, offsetC = 0, offsetA = 0;
+    cmsUInt32Number BaseOffset, DirectoryPos, CurrentPos;
+
+    // Get the base for all offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (Lut ->Elements != NULL)
+        if (!cmsPipelineCheckAndRetreiveStages(Lut, 1, cmsSigCurveSetElemType, &B))
+            if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, &M, &Matrix, &B))
+                if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &A, &CLUT, &B))
+                    if (!cmsPipelineCheckAndRetreiveStages(Lut, 5, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType,
+                        cmsSigMatrixElemType, cmsSigCurveSetElemType, &A, &CLUT, &M, &Matrix, &B)) {
+
+                            cmsSignalError(self->ContextID, cmsERROR_NOT_SUITABLE, "LUT is not suitable to be saved as LutAToB");
+                            return FALSE;
+                    }
+
+    // Get input, output channels
+    inputChan  = cmsPipelineInputChannels(Lut);
+    outputChan = cmsPipelineOutputChannels(Lut);
+
+    // Write channel count
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) inputChan)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) outputChan)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+
+    // Keep directory to be filled latter
+    DirectoryPos = io ->Tell(io);
+
+    // Write the directory
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+
+    if (A != NULL) {
+
+        offsetA = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, A)) return FALSE;
+    }
+
+    if (CLUT != NULL) {
+        offsetC = io ->Tell(io) - BaseOffset;
+        if (!WriteCLUT(self, io, (Lut ->SaveAs8Bits ? 1U : 2U), CLUT)) return FALSE;
+
+    }
+    if (M != NULL) {
+
+        offsetM = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, M)) return FALSE;
+    }
+
+    if (Matrix != NULL) {
+        offsetMat = io ->Tell(io) - BaseOffset;
+        if (!WriteMatrix(self, io, Matrix)) return FALSE;
+    }
+
+    if (B != NULL) {
+
+        offsetB = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, B)) return FALSE;
+    }
+
+    CurrentPos = io ->Tell(io);
+
+    if (!io ->Seek(io, DirectoryPos)) return FALSE;
+
+    if (!_cmsWriteUInt32Number(io, offsetB)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetMat)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetM)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetC)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetA)) return FALSE;
+
+    if (!io ->Seek(io, CurrentPos)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_LUTA2B_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUTA2B_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// LutBToA type
+
+static
+void* Type_LUTB2A_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt8Number       inputChan;      // Number of input channels
+    cmsUInt8Number       outputChan;     // Number of output channels
+    cmsUInt32Number      BaseOffset;     // Actual position in file
+    cmsUInt32Number      offsetB;        // Offset to first "B" curve
+    cmsUInt32Number      offsetMat;      // Offset to matrix
+    cmsUInt32Number      offsetM;        // Offset to first "M" curve
+    cmsUInt32Number      offsetC;        // Offset to CLUT
+    cmsUInt32Number      offsetA;        // Offset to first "A" curve
+    cmsPipeline* NewLUT = NULL;
+
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!_cmsReadUInt8Number(io, &inputChan)) return NULL;
+    if (!_cmsReadUInt8Number(io, &outputChan)) return NULL;
+
+    if (inputChan == 0 || inputChan >= cmsMAXCHANNELS) return NULL;
+    if (outputChan == 0 || outputChan >= cmsMAXCHANNELS) return NULL;
+
+    // Padding
+    if (!_cmsReadUInt16Number(io, NULL)) return NULL;
+
+    if (!_cmsReadUInt32Number(io, &offsetB)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetMat)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetM)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetC)) return NULL;
+    if (!_cmsReadUInt32Number(io, &offsetA)) return NULL;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, inputChan, outputChan);
+    if (NewLUT == NULL) return NULL;
+
+    if (offsetB != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetB, inputChan)))
+            goto Error;
+    }
+
+    if (offsetMat != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadMatrix(self, io, BaseOffset + offsetMat)))
+            goto Error;
+    }
+
+    if (offsetM != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetM, inputChan)))
+            goto Error;
+    }
+
+    if (offsetC != 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadCLUT(self, io, BaseOffset + offsetC, inputChan, outputChan)))
+            goto Error;
+    }
+
+    if (offsetA!= 0) {
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, ReadSetOfCurves(self, io, BaseOffset + offsetA, outputChan)))
+            goto Error;
+    }
+
+    *nItems = 1;
+    return NewLUT;
+Error:
+    cmsPipelineFree(NewLUT);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+/*
+B
+B - Matrix - M
+B - CLUT - A
+B - Matrix - M - CLUT - A
+*/
+
+static
+cmsBool  Type_LUTB2A_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsPipeline* Lut = (cmsPipeline*) Ptr;
+    cmsUInt32Number inputChan, outputChan;
+    cmsStage *A = NULL, *B = NULL, *M = NULL;
+    cmsStage *Matrix = NULL;
+    cmsStage *CLUT = NULL;
+    cmsUInt32Number offsetB = 0, offsetMat = 0, offsetM = 0, offsetC = 0, offsetA = 0;
+    cmsUInt32Number BaseOffset, DirectoryPos, CurrentPos;
+
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!cmsPipelineCheckAndRetreiveStages(Lut, 1, cmsSigCurveSetElemType, &B))
+        if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, &B, &Matrix, &M))
+            if (!cmsPipelineCheckAndRetreiveStages(Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &B, &CLUT, &A))
+                if (!cmsPipelineCheckAndRetreiveStages(Lut, 5, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+                    cmsSigCLutElemType, cmsSigCurveSetElemType, &B, &Matrix, &M, &CLUT, &A)) {
+                        cmsSignalError(self->ContextID, cmsERROR_NOT_SUITABLE, "LUT is not suitable to be saved as LutBToA");
+                        return FALSE;
+                }
+
+    inputChan  = cmsPipelineInputChannels(Lut);
+    outputChan = cmsPipelineOutputChannels(Lut);
+
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) inputChan)) return FALSE;
+    if (!_cmsWriteUInt8Number(io, (cmsUInt8Number) outputChan)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, 0)) return FALSE;
+
+    DirectoryPos = io ->Tell(io);
+
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+
+    if (A != NULL) {
+
+        offsetA = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, A)) return FALSE;
+    }
+
+    if (CLUT != NULL) {
+        offsetC = io ->Tell(io) - BaseOffset;
+        if (!WriteCLUT(self, io, (Lut ->SaveAs8Bits ? 1U : 2U), CLUT)) return FALSE;
+
+    }
+    if (M != NULL) {
+
+        offsetM = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, M)) return FALSE;
+    }
+
+    if (Matrix != NULL) {
+        offsetMat = io ->Tell(io) - BaseOffset;
+        if (!WriteMatrix(self, io, Matrix)) return FALSE;
+    }
+
+    if (B != NULL) {
+
+        offsetB = io ->Tell(io) - BaseOffset;
+        if (!WriteSetOfCurves(self, io, cmsSigParametricCurveType, B)) return FALSE;
+    }
+
+    CurrentPos = io ->Tell(io);
+
+    if (!io ->Seek(io, DirectoryPos)) return FALSE;
+
+    if (!_cmsWriteUInt32Number(io, offsetB)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetMat)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetM)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetC)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, offsetA)) return FALSE;
+
+    if (!io ->Seek(io, CurrentPos)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+
+static
+void* Type_LUTB2A_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_LUTB2A_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+
+// ********************************************************************************
+// Type cmsSigColorantTableType
+// ********************************************************************************
+/*
+The purpose of this tag is to identify the colorants used in the profile by a
+unique name and set of XYZ or L*a*b* values to give the colorant an unambiguous
+value. The first colorant listed is the colorant of the first device channel of
+a lut tag. The second colorant listed is the colorant of the second device channel
+of a lut tag, and so on.
+*/
+
+static
+void *Type_ColorantTable_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number i, Count;
+    cmsNAMEDCOLORLIST* List;
+    char Name[34];
+    cmsUInt16Number PCS[3];
+
+
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+
+    if (Count > cmsMAXCHANNELS) {
+        cmsSignalError(self->ContextID, cmsERROR_RANGE, "Too many colorants '%d'", Count);
+        return NULL;
+    }
+
+    List = cmsAllocNamedColorList(self ->ContextID, Count, 0, "", "");
+    for (i=0; i < Count; i++) {
+
+        if (io ->Read(io, Name, 32, 1) != 1) goto Error;
+        Name[32] = 0;
+
+        if (!_cmsReadUInt16Array(io, 3, PCS)) goto Error;
+
+        if (!cmsAppendNamedColor(List, Name, PCS, NULL)) goto Error;
+
+    }
+
+    *nItems = 1;
+    return List;
+
+Error:
+    *nItems = 0;
+    cmsFreeNamedColorList(List);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+
+// Saves a colorant table. It is using the named color structure for simplicity sake
+static
+cmsBool  Type_ColorantTable_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) Ptr;
+    cmsUInt32Number i, nColors;
+
+    nColors = cmsNamedColorCount(NamedColorList);
+
+    if (!_cmsWriteUInt32Number(io, nColors)) return FALSE;
+
+    for (i=0; i < nColors; i++) {
+
+        char root[cmsMAX_PATH];
+        cmsUInt16Number PCS[3];
+
+        memset(root, 0, sizeof(root));
+
+        if (!cmsNamedColorInfo(NamedColorList, i, root, NULL, NULL, PCS, NULL)) return 0;
+        root[32] = 0;
+
+        if (!io ->Write(io, 32, root)) return FALSE;
+        if (!_cmsWriteUInt16Array(io, 3, PCS)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_ColorantTable_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*) Ptr;
+    return (void*) cmsDupNamedColorList(nc);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_ColorantTable_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeNamedColorList((cmsNAMEDCOLORLIST*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigNamedColor2Type
+// ********************************************************************************
+//
+//The namedColor2Type is a count value and array of structures that provide color
+//coordinates for 7-bit ASCII color names. For each named color, a PCS and optional
+//device representation of the color are given. Both representations are 16-bit values.
+//The device representation corresponds to the header�s �color space of data� field.
+//This representation should be consistent with the �number of device components�
+//field in the namedColor2Type. If this field is 0, device coordinates are not provided.
+//The PCS representation corresponds to the header�s PCS field. The PCS representation
+//is always provided. Color names are fixed-length, 32-byte fields including null
+//termination. In order to maintain maximum portability, it is strongly recommended
+//that special characters of the 7-bit ASCII set not be used.
+
+static
+void *Type_NamedColor_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+
+    cmsUInt32Number      vendorFlag;     // Bottom 16 bits for ICC use
+    cmsUInt32Number      count;          // Count of named colors
+    cmsUInt32Number      nDeviceCoords;  // Num of device coordinates
+    char                 prefix[32];     // Prefix for each color name
+    char                 suffix[32];     // Suffix for each color name
+    cmsNAMEDCOLORLIST*   v;
+    cmsUInt32Number      i;
+
+
+    *nItems = 0;
+    if (!_cmsReadUInt32Number(io, &vendorFlag)) return NULL;
+    if (!_cmsReadUInt32Number(io, &count)) return NULL;
+    if (!_cmsReadUInt32Number(io, &nDeviceCoords)) return NULL;
+
+    if (io -> Read(io, prefix, 32, 1) != 1) return NULL;
+    if (io -> Read(io, suffix, 32, 1) != 1) return NULL;
+
+    prefix[31] = suffix[31] = 0;
+
+    v = cmsAllocNamedColorList(self ->ContextID, count, nDeviceCoords, prefix, suffix);
+    if (v == NULL) {
+        cmsSignalError(self->ContextID, cmsERROR_RANGE, "Too many named colors '%d'", count);
+        return NULL;
+    }
+
+    if (nDeviceCoords > cmsMAXCHANNELS) {
+        cmsSignalError(self->ContextID, cmsERROR_RANGE, "Too many device coordinates '%d'", nDeviceCoords);
+        goto Error;
+    }
+    for (i=0; i < count; i++) {
+
+        cmsUInt16Number PCS[3];
+        cmsUInt16Number Colorant[cmsMAXCHANNELS];
+        char Root[33];
+
+        memset(Colorant, 0, sizeof(Colorant));
+        if (io -> Read(io, Root, 32, 1) != 1) goto Error;
+        Root[32] = 0;  // To prevent exploits
+
+        if (!_cmsReadUInt16Array(io, 3, PCS)) goto Error;
+        if (!_cmsReadUInt16Array(io, nDeviceCoords, Colorant)) goto Error;
+
+        if (!cmsAppendNamedColor(v, Root, PCS, Colorant)) goto Error;
+    }
+
+    *nItems = 1;
+    return (void*) v ;
+
+Error:
+    cmsFreeNamedColorList(v);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+// Saves a named color list into a named color profile
+static
+cmsBool Type_NamedColor_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsNAMEDCOLORLIST* NamedColorList = (cmsNAMEDCOLORLIST*) Ptr;
+    char                prefix[33];     // Prefix for each color name
+    char                suffix[33];     // Suffix for each color name
+    cmsUInt32Number     i, nColors;
+
+    nColors = cmsNamedColorCount(NamedColorList);
+
+    if (!_cmsWriteUInt32Number(io, 0)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, nColors)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, NamedColorList ->ColorantCount)) return FALSE;
+
+    strncpy(prefix, (const char*) NamedColorList->Prefix, 32);
+    strncpy(suffix, (const char*) NamedColorList->Suffix, 32);
+
+    suffix[32] = prefix[32] = 0;
+
+    if (!io ->Write(io, 32, prefix)) return FALSE;
+    if (!io ->Write(io, 32, suffix)) return FALSE;
+
+    for (i=0; i < nColors; i++) {
+
+       cmsUInt16Number PCS[3];
+       cmsUInt16Number Colorant[cmsMAXCHANNELS];
+       char Root[cmsMAX_PATH];
+
+        if (!cmsNamedColorInfo(NamedColorList, i, Root, NULL, NULL, PCS, Colorant)) return 0;
+        Root[32] = 0;
+        if (!io ->Write(io, 32 , Root)) return FALSE;
+        if (!_cmsWriteUInt16Array(io, 3, PCS)) return FALSE;
+        if (!_cmsWriteUInt16Array(io, NamedColorList ->ColorantCount, Colorant)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void* Type_NamedColor_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    cmsNAMEDCOLORLIST* nc = (cmsNAMEDCOLORLIST*) Ptr;
+
+    return (void*) cmsDupNamedColorList(nc);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_NamedColor_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeNamedColorList((cmsNAMEDCOLORLIST*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigProfileSequenceDescType
+// ********************************************************************************
+
+// This type is an array of structures, each of which contains information from the
+// header fields and tags from the original profiles which were combined to create
+// the final profile. The order of the structures is the order in which the profiles
+// were combined and includes a structure for the final profile. This provides a
+// description of the profile sequence from source to destination,
+// typically used with the DeviceLink profile.
+
+static
+cmsBool ReadEmbeddedText(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU** mlu, cmsUInt32Number SizeOfTag)
+{
+    cmsTagTypeSignature  BaseType;
+    cmsUInt32Number nItems;
+
+    BaseType = _cmsReadTypeBase(io);
+
+    switch (BaseType) {
+
+       case cmsSigTextType:
+           if (*mlu) cmsMLUfree(*mlu);
+           *mlu = (cmsMLU*)Type_Text_Read(self, io, &nItems, SizeOfTag);
+           return (*mlu != NULL);
+
+       case cmsSigTextDescriptionType:
+           if (*mlu) cmsMLUfree(*mlu);
+           *mlu =  (cmsMLU*) Type_Text_Description_Read(self, io, &nItems, SizeOfTag);
+           return (*mlu != NULL);
+
+           /*
+           TBD: Size is needed for MLU, and we have no idea on which is the available size
+           */
+
+       case cmsSigMultiLocalizedUnicodeType:
+           if (*mlu) cmsMLUfree(*mlu);
+           *mlu =  (cmsMLU*) Type_MLU_Read(self, io, &nItems, SizeOfTag);
+           return (*mlu != NULL);
+
+       default: return FALSE;
+    }
+}
+
+
+static
+void *Type_ProfileSequenceDesc_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* OutSeq;
+    cmsUInt32Number i, Count;
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+
+    OutSeq = cmsAllocProfileSequenceDescription(self ->ContextID, Count);
+    if (OutSeq == NULL) return NULL;
+
+    OutSeq ->n = Count;
+
+    // Get structures as well
+
+    for (i=0; i < Count; i++) {
+
+        cmsPSEQDESC* sec = &OutSeq -> seq[i];
+
+        if (!_cmsReadUInt32Number(io, &sec ->deviceMfg)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt32Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt32Number);
+
+        if (!_cmsReadUInt32Number(io, &sec ->deviceModel)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt32Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt32Number);
+
+        if (!_cmsReadUInt64Number(io, &sec ->attributes)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt64Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt64Number);
+
+        if (!_cmsReadUInt32Number(io, (cmsUInt32Number *)&sec ->technology)) goto Error;
+        if (SizeOfTag < sizeof(cmsUInt32Number)) goto Error;
+        SizeOfTag -= sizeof(cmsUInt32Number);
+
+        if (!ReadEmbeddedText(self, io, &sec ->Manufacturer, SizeOfTag)) goto Error;
+        if (!ReadEmbeddedText(self, io, &sec ->Model, SizeOfTag)) goto Error;
+    }
+
+    *nItems = 1;
+    return OutSeq;
+
+Error:
+    cmsFreeProfileSequenceDescription(OutSeq);
+    return NULL;
+}
+
+
+// Aux--Embed a text description type. It can be of type text description or multilocalized unicode
+// and it depends of the version number passed on cmsTagDescriptor structure instead of stack
+static
+cmsBool  SaveDescription(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU* Text)
+{
+    if (self ->ICCVersion < 0x4000000) {
+
+        if (!_cmsWriteTypeBase(io, cmsSigTextDescriptionType)) return FALSE;
+        return Type_Text_Description_Write(self, io, Text, 1);
+    }
+    else {
+        if (!_cmsWriteTypeBase(io, cmsSigMultiLocalizedUnicodeType)) return FALSE;
+        return Type_MLU_Write(self, io, Text, 1);
+    }
+}
+
+
+static
+cmsBool  Type_ProfileSequenceDesc_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsSEQ* Seq = (cmsSEQ*) Ptr;
+    cmsUInt32Number i;
+
+    if (!_cmsWriteUInt32Number(io, Seq->n)) return FALSE;
+
+    for (i=0; i < Seq ->n; i++) {
+
+        cmsPSEQDESC* sec = &Seq -> seq[i];
+
+        if (!_cmsWriteUInt32Number(io, sec ->deviceMfg)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, sec ->deviceModel)) return FALSE;
+        if (!_cmsWriteUInt64Number(io, &sec ->attributes)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, sec ->technology)) return FALSE;
+
+        if (!SaveDescription(self, io, sec ->Manufacturer)) return FALSE;
+        if (!SaveDescription(self, io, sec ->Model)) return FALSE;
+    }
+
+     return TRUE;
+
+     cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_ProfileSequenceDesc_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupProfileSequenceDescription((cmsSEQ*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_ProfileSequenceDesc_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeProfileSequenceDescription((cmsSEQ*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigProfileSequenceIdType
+// ********************************************************************************
+/*
+In certain workflows using ICC Device Link Profiles, it is necessary to identify the
+original profiles that were combined to create the Device Link Profile.
+This type is an array of structures, each of which contains information for
+identification of a profile used in a sequence
+*/
+
+
+static
+cmsBool ReadSeqID(struct _cms_typehandler_struct* self,
+                                             cmsIOHANDLER* io,
+                                             void* Cargo,
+                                             cmsUInt32Number n,
+                                             cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* OutSeq = (cmsSEQ*) Cargo;
+    cmsPSEQDESC* seq = &OutSeq ->seq[n];
+
+    if (io -> Read(io, seq ->ProfileID.ID8, 16, 1) != 1) return FALSE;
+    if (!ReadEmbeddedText(self, io, &seq ->Description, SizeOfTag)) return FALSE;
+
+    return TRUE;
+}
+
+
+
+static
+void *Type_ProfileSequenceId_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* OutSeq;
+    cmsUInt32Number Count;
+    cmsUInt32Number BaseOffset;
+
+    *nItems = 0;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Get table count
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Allocate an empty structure
+    OutSeq = cmsAllocProfileSequenceDescription(self ->ContextID, Count);
+    if (OutSeq == NULL) return NULL;
+
+
+    // Read the position table
+    if (!ReadPositionTable(self, io, Count, BaseOffset, OutSeq, ReadSeqID)) {
+
+        cmsFreeProfileSequenceDescription(OutSeq);
+        return NULL;
+    }
+
+    // Success
+    *nItems = 1;
+    return OutSeq;
+
+}
+
+
+static
+cmsBool WriteSeqID(struct _cms_typehandler_struct* self,
+                                             cmsIOHANDLER* io,
+                                             void* Cargo,
+                                             cmsUInt32Number n,
+                                             cmsUInt32Number SizeOfTag)
+{
+    cmsSEQ* Seq = (cmsSEQ*) Cargo;
+
+    if (!io ->Write(io, 16, Seq ->seq[n].ProfileID.ID8)) return FALSE;
+
+    // Store here the MLU
+    if (!SaveDescription(self, io, Seq ->seq[n].Description)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_ProfileSequenceId_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsSEQ* Seq = (cmsSEQ*) Ptr;
+    cmsUInt32Number BaseOffset;
+
+    // Keep the base offset
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // This is the table count
+    if (!_cmsWriteUInt32Number(io, Seq ->n)) return FALSE;
+
+    // This is the position table and content
+    if (!WritePositionTable(self, io, 0, Seq ->n, BaseOffset, Seq, WriteSeqID)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_ProfileSequenceId_Dup(struct _cms_typehandler_struct* self, const void* Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsDupProfileSequenceDescription((cmsSEQ*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_ProfileSequenceId_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeProfileSequenceDescription((cmsSEQ*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigUcrBgType
+// ********************************************************************************
+/*
+This type contains curves representing the under color removal and black
+generation and a text string which is a general description of the method used
+for the ucr/bg.
+*/
+
+static
+void *Type_UcrBg_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUcrBg* n = (cmsUcrBg*) _cmsMallocZero(self ->ContextID, sizeof(cmsUcrBg));
+    cmsUInt32Number CountUcr, CountBg;
+    char* ASCIIString;
+
+    *nItems = 0;
+    if (n == NULL) return NULL;
+
+    // First curve is Under color removal
+    if (!_cmsReadUInt32Number(io, &CountUcr)) return NULL;
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    n ->Ucr = cmsBuildTabulatedToneCurve16(self ->ContextID, CountUcr, NULL);
+    if (n ->Ucr == NULL) return NULL;
+
+    if (!_cmsReadUInt16Array(io, CountUcr, n ->Ucr->Table16)) return NULL;
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= CountUcr * sizeof(cmsUInt16Number);
+
+    // Second curve is Black generation
+    if (!_cmsReadUInt32Number(io, &CountBg)) return NULL;
+    if (SizeOfTag < sizeof(cmsUInt32Number)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    n ->Bg = cmsBuildTabulatedToneCurve16(self ->ContextID, CountBg, NULL);
+    if (n ->Bg == NULL) return NULL;
+    if (!_cmsReadUInt16Array(io, CountBg, n ->Bg->Table16)) return NULL;
+    if (SizeOfTag < CountBg * sizeof(cmsUInt16Number)) return NULL;
+    SizeOfTag -= CountBg * sizeof(cmsUInt16Number);
+    if (SizeOfTag == UINT_MAX) return NULL;
+
+    // Now comes the text. The length is specified by the tag size
+    n ->Desc = cmsMLUalloc(self ->ContextID, 1);
+    if (n ->Desc == NULL) return NULL;
+
+    ASCIIString = (char*) _cmsMalloc(self ->ContextID, SizeOfTag + 1);
+    if (io ->Read(io, ASCIIString, sizeof(char), SizeOfTag) != SizeOfTag) return NULL;
+    ASCIIString[SizeOfTag] = 0;
+    cmsMLUsetASCII(n ->Desc, cmsNoLanguage, cmsNoCountry, ASCIIString);
+    _cmsFree(self ->ContextID, ASCIIString);
+
+    *nItems = 1;
+    return (void*) n;
+}
+
+static
+cmsBool  Type_UcrBg_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUcrBg* Value = (cmsUcrBg*) Ptr;
+    cmsUInt32Number TextSize;
+    char* Text;
+
+    // First curve is Under color removal
+    if (!_cmsWriteUInt32Number(io, Value ->Ucr ->nEntries)) return FALSE;
+    if (!_cmsWriteUInt16Array(io, Value ->Ucr ->nEntries, Value ->Ucr ->Table16)) return FALSE;
+
+    // Then black generation
+    if (!_cmsWriteUInt32Number(io, Value ->Bg ->nEntries)) return FALSE;
+    if (!_cmsWriteUInt16Array(io, Value ->Bg ->nEntries, Value ->Bg ->Table16)) return FALSE;
+
+    // Now comes the text. The length is specified by the tag size
+    TextSize = cmsMLUgetASCII(Value ->Desc, cmsNoLanguage, cmsNoCountry, NULL, 0);
+    Text     = (char*) _cmsMalloc(self ->ContextID, TextSize);
+    if (cmsMLUgetASCII(Value ->Desc, cmsNoLanguage, cmsNoCountry, Text, TextSize) != TextSize) return FALSE;
+
+    if (!io ->Write(io, TextSize, Text)) return FALSE;
+    _cmsFree(self ->ContextID, Text);
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_UcrBg_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    cmsUcrBg* Src = (cmsUcrBg*) Ptr;
+    cmsUcrBg* NewUcrBg = (cmsUcrBg*) _cmsMallocZero(self ->ContextID, sizeof(cmsUcrBg));
+
+    if (NewUcrBg == NULL) return NULL;
+
+    NewUcrBg ->Bg   = cmsDupToneCurve(Src ->Bg);
+    NewUcrBg ->Ucr  = cmsDupToneCurve(Src ->Ucr);
+    NewUcrBg ->Desc = cmsMLUdup(Src ->Desc);
+
+    return (void*) NewUcrBg;
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+static
+void Type_UcrBg_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+   cmsUcrBg* Src = (cmsUcrBg*) Ptr;
+
+   if (Src ->Ucr) cmsFreeToneCurve(Src ->Ucr);
+   if (Src ->Bg)  cmsFreeToneCurve(Src ->Bg);
+   if (Src ->Desc) cmsMLUfree(Src ->Desc);
+
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigCrdInfoType
+// ********************************************************************************
+
+/*
+This type contains the PostScript product name to which this profile corresponds
+and the names of the companion CRDs. Recall that a single profile can generate
+multiple CRDs. It is implemented as a MLU being the language code "PS" and then
+country varies for each element:
+
+                nm: PostScript product name
+                #0: Rendering intent 0 CRD name
+                #1: Rendering intent 1 CRD name
+                #2: Rendering intent 2 CRD name
+                #3: Rendering intent 3 CRD name
+*/
+
+
+
+// Auxiliary, read an string specified as count + string
+static
+cmsBool  ReadCountAndSting(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU* mlu, cmsUInt32Number* SizeOfTag, const char* Section)
+{
+    cmsUInt32Number Count;
+    char* Text;
+
+    if (*SizeOfTag < sizeof(cmsUInt32Number)) return FALSE;
+
+    if (!_cmsReadUInt32Number(io, &Count)) return FALSE;
+
+    if (Count > UINT_MAX - sizeof(cmsUInt32Number)) return FALSE;
+    if (*SizeOfTag < Count + sizeof(cmsUInt32Number)) return FALSE;
+
+    Text     = (char*) _cmsMalloc(self ->ContextID, Count+1);
+    if (Text == NULL) return FALSE;
+
+    if (io ->Read(io, Text, sizeof(cmsUInt8Number), Count) != Count) {
+        _cmsFree(self ->ContextID, Text);
+        return FALSE;
+    }
+
+    Text[Count] = 0;
+
+    cmsMLUsetASCII(mlu, "PS", Section, Text);
+    _cmsFree(self ->ContextID, Text);
+
+    *SizeOfTag -= (Count + sizeof(cmsUInt32Number));
+    return TRUE;
+}
+
+static
+cmsBool  WriteCountAndSting(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsMLU* mlu, const char* Section)
+{
+ cmsUInt32Number TextSize;
+ char* Text;
+
+    TextSize = cmsMLUgetASCII(mlu, "PS", Section, NULL, 0);
+    Text     = (char*) _cmsMalloc(self ->ContextID, TextSize);
+
+    if (!_cmsWriteUInt32Number(io, TextSize)) return FALSE;
+
+    if (cmsMLUgetASCII(mlu, "PS", Section, Text, TextSize) == 0) return FALSE;
+
+    if (!io ->Write(io, TextSize, Text)) return FALSE;
+    _cmsFree(self ->ContextID, Text);
+
+    return TRUE;
+}
+
+static
+void *Type_CrdInfo_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsMLU* mlu = cmsMLUalloc(self ->ContextID, 5);
+
+    *nItems = 0;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "nm")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#0")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#1")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#2")) goto Error;
+    if (!ReadCountAndSting(self, io, mlu, &SizeOfTag, "#3")) goto Error;
+
+    *nItems = 1;
+    return (void*) mlu;
+
+Error:
+    cmsMLUfree(mlu);
+    return NULL;
+
+}
+
+static
+cmsBool  Type_CrdInfo_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+
+    cmsMLU* mlu = (cmsMLU*) Ptr;
+
+    if (!WriteCountAndSting(self, io, mlu, "nm")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#0")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#1")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#2")) goto Error;
+    if (!WriteCountAndSting(self, io, mlu, "#3")) goto Error;
+
+    return TRUE;
+
+Error:
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_CrdInfo_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsMLUdup((cmsMLU*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_CrdInfo_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    cmsMLUfree((cmsMLU*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+// ********************************************************************************
+// Type cmsSigScreeningType
+// ********************************************************************************
+//
+//The screeningType describes various screening parameters including screen
+//frequency, screening angle, and spot shape.
+
+static
+void *Type_Screening_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsScreening* sc = NULL;
+    cmsUInt32Number i;
+
+    sc = (cmsScreening*) _cmsMallocZero(self ->ContextID, sizeof(cmsScreening));
+    if (sc == NULL) return NULL;
+
+    *nItems = 0;
+
+    if (!_cmsReadUInt32Number(io, &sc ->Flag)) goto Error;
+    if (!_cmsReadUInt32Number(io, &sc ->nChannels)) goto Error;
+
+    if (sc ->nChannels > cmsMAXCHANNELS - 1)
+        sc ->nChannels = cmsMAXCHANNELS - 1;
+
+    for (i=0; i < sc ->nChannels; i++) {
+
+        if (!_cmsRead15Fixed16Number(io, &sc ->Channels[i].Frequency)) goto Error;
+        if (!_cmsRead15Fixed16Number(io, &sc ->Channels[i].ScreenAngle)) goto Error;
+        if (!_cmsReadUInt32Number(io, &sc ->Channels[i].SpotShape)) goto Error;
+    }
+
+
+    *nItems = 1;
+
+    return (void*) sc;
+
+Error:
+    if (sc != NULL)
+        _cmsFree(self ->ContextID, sc);
+
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool Type_Screening_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsScreening* sc = (cmsScreening* ) Ptr;
+    cmsUInt32Number i;
+
+    if (!_cmsWriteUInt32Number(io, sc ->Flag)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, sc ->nChannels)) return FALSE;
+
+    for (i=0; i < sc ->nChannels; i++) {
+
+        if (!_cmsWrite15Fixed16Number(io, sc ->Channels[i].Frequency)) return FALSE;
+        if (!_cmsWrite15Fixed16Number(io, sc ->Channels[i].ScreenAngle)) return FALSE;
+        if (!_cmsWriteUInt32Number(io, sc ->Channels[i].SpotShape)) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_Screening_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+   return _cmsDupMem(self ->ContextID, Ptr, sizeof(cmsScreening));
+
+   cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_Screening_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+// ********************************************************************************
+// Type cmsSigViewingConditionsType
+// ********************************************************************************
+//
+//This type represents a set of viewing condition parameters including:
+//CIE �absolute� illuminant white point tristimulus values and CIE �absolute�
+//surround tristimulus values.
+
+static
+void *Type_ViewingConditions_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsICCViewingConditions* vc = NULL;
+
+    vc = (cmsICCViewingConditions*) _cmsMallocZero(self ->ContextID, sizeof(cmsICCViewingConditions));
+    if (vc == NULL) return NULL;
+
+    *nItems = 0;
+
+    if (!_cmsReadXYZNumber(io, &vc ->IlluminantXYZ)) goto Error;
+    if (!_cmsReadXYZNumber(io, &vc ->SurroundXYZ)) goto Error;
+    if (!_cmsReadUInt32Number(io, &vc ->IlluminantType)) goto Error;
+
+    *nItems = 1;
+
+    return (void*) vc;
+
+Error:
+    if (vc != NULL)
+        _cmsFree(self ->ContextID, vc);
+
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+static
+cmsBool Type_ViewingConditions_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsICCViewingConditions* sc = (cmsICCViewingConditions* ) Ptr;
+
+    if (!_cmsWriteXYZNumber(io, &sc ->IlluminantXYZ)) return FALSE;
+    if (!_cmsWriteXYZNumber(io, &sc ->SurroundXYZ)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, sc ->IlluminantType)) return FALSE;
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void* Type_ViewingConditions_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+   return _cmsDupMem(self->ContextID, Ptr, sizeof(cmsICCViewingConditions));
+
+   cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_ViewingConditions_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+   _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigMultiProcessElementType
+// ********************************************************************************
+
+
+static
+void* GenericMPEdup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsStageDup((cmsStage*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void GenericMPEfree(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    cmsStageFree((cmsStage*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+// Each curve is stored in one or more curve segments, with break-points specified between curve segments.
+// The first curve segment always starts at �Infinity, and the last curve segment always ends at +Infinity. The
+// first and last curve segments shall be specified in terms of a formula, whereas the other segments shall be
+// specified either in terms of a formula, or by a sampled curve.
+
+
+// Read an embedded segmented curve
+static
+cmsToneCurve* ReadSegmentedCurve(struct _cms_typehandler_struct* self, cmsIOHANDLER* io)
+{
+    cmsCurveSegSignature ElementSig;
+    cmsUInt32Number i, j;
+    cmsUInt16Number nSegments;
+    cmsCurveSegment*  Segments;
+    cmsToneCurve* Curve;
+    cmsFloat32Number PrevBreak = MINUS_INF;    // - infinite
+
+    // Take signature and channels for each element.
+     if (!_cmsReadUInt32Number(io, (cmsUInt32Number*) &ElementSig)) return NULL;
+
+     // That should be a segmented curve
+     if (ElementSig != cmsSigSegmentedCurve) return NULL;
+
+     if (!_cmsReadUInt32Number(io, NULL)) return NULL;
+     if (!_cmsReadUInt16Number(io, &nSegments)) return NULL;
+     if (!_cmsReadUInt16Number(io, NULL)) return NULL;
+
+     if (nSegments < 1) return NULL;
+     Segments = (cmsCurveSegment*) _cmsCalloc(self ->ContextID, nSegments, sizeof(cmsCurveSegment));
+     if (Segments == NULL) return NULL;
+
+     // Read breakpoints
+     for (i=0; i < (cmsUInt32Number) nSegments - 1; i++) {
+
+         Segments[i].x0 = PrevBreak;
+         if (!_cmsReadFloat32Number(io, &Segments[i].x1)) goto Error;
+         PrevBreak = Segments[i].x1;
+     }
+
+     Segments[nSegments-1].x0 = PrevBreak;
+     Segments[nSegments-1].x1 = PLUS_INF;     // A big cmsFloat32Number number
+
+     // Read segments
+     for (i=0; i < nSegments; i++) {
+
+          if (!_cmsReadUInt32Number(io, (cmsUInt32Number*) &ElementSig)) goto Error;
+          if (!_cmsReadUInt32Number(io, NULL)) goto Error;
+
+           switch (ElementSig) {
+
+            case cmsSigFormulaCurveSeg: {
+
+                cmsUInt16Number Type;
+                cmsUInt32Number ParamsByType[] = {4, 5, 5 };
+
+                if (!_cmsReadUInt16Number(io, &Type)) goto Error;
+                if (!_cmsReadUInt16Number(io, NULL)) goto Error;
+
+                Segments[i].Type = Type + 6;
+                if (Type > 2) goto Error;
+
+                for (j=0; j < ParamsByType[Type]; j++) {
+
+                    cmsFloat32Number f;
+                    if (!_cmsReadFloat32Number(io, &f)) goto Error;
+                    Segments[i].Params[j] = f;
+                }
+                }
+                break;
+
+
+            case cmsSigSampledCurveSeg: {
+                cmsUInt32Number Count;
+
+                if (!_cmsReadUInt32Number(io, &Count)) goto Error;
+
+                Segments[i].nGridPoints = Count;
+                Segments[i].SampledPoints = (cmsFloat32Number*) _cmsCalloc(self ->ContextID, Count, sizeof(cmsFloat32Number));
+                if (Segments[i].SampledPoints == NULL) goto Error;
+
+                for (j=0; j < Count; j++) {
+                    if (!_cmsReadFloat32Number(io, &Segments[i].SampledPoints[j])) goto Error;
+                }
+                }
+                break;
+
+            default:
+                {
+                char String[5];
+
+                _cmsTagSignature2String(String, (cmsTagSignature) ElementSig);
+                cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown curve element type '%s' found.", String);
+                }
+                goto Error;
+
+         }
+     }
+
+     Curve = cmsBuildSegmentedToneCurve(self ->ContextID, nSegments, Segments);
+
+     for (i=0; i < nSegments; i++) {
+         if (Segments[i].SampledPoints) _cmsFree(self ->ContextID, Segments[i].SampledPoints);
+     }
+     _cmsFree(self ->ContextID, Segments);
+     return Curve;
+
+Error:
+     if (Segments) {
+         for (i=0; i < nSegments; i++) {
+             if (Segments[i].SampledPoints) _cmsFree(self ->ContextID, Segments[i].SampledPoints);
+         }
+         _cmsFree(self ->ContextID, Segments);
+     }
+     return NULL;
+}
+
+
+static
+cmsBool ReadMPECurve(struct _cms_typehandler_struct* self,
+                     cmsIOHANDLER* io,
+                     void* Cargo,
+                     cmsUInt32Number n,
+                     cmsUInt32Number SizeOfTag)
+{
+      cmsToneCurve** GammaTables = ( cmsToneCurve**) Cargo;
+
+      GammaTables[n] = ReadSegmentedCurve(self, io);
+      return (GammaTables[n] != NULL);
+
+      cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+void *Type_MPEcurve_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsStage* mpe = NULL;
+    cmsUInt16Number InputChans, OutputChans;
+    cmsUInt32Number i, BaseOffset;
+    cmsToneCurve** GammaTables;
+
+    *nItems = 0;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+    if (InputChans != OutputChans) return NULL;
+
+    GammaTables = (cmsToneCurve**) _cmsCalloc(self ->ContextID, InputChans, sizeof(cmsToneCurve*));
+    if (GammaTables == NULL) return NULL;
+
+    if (ReadPositionTable(self, io, InputChans, BaseOffset, GammaTables, ReadMPECurve)) {
+
+        mpe = cmsStageAllocToneCurves(self ->ContextID, InputChans, GammaTables);
+    }
+    else {
+        mpe = NULL;
+    }
+
+    for (i=0; i < InputChans; i++) {
+        if (GammaTables[i]) cmsFreeToneCurve(GammaTables[i]);
+    }
+
+    _cmsFree(self ->ContextID, GammaTables);
+    *nItems = (mpe != NULL) ? 1U : 0;
+    return mpe;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+// Write a single segmented curve. NO CHECK IS PERFORMED ON VALIDITY
+static
+cmsBool WriteSegmentedCurve(cmsIOHANDLER* io, cmsToneCurve* g)
+{
+    cmsUInt32Number i, j;
+    cmsCurveSegment* Segments = g ->Segments;
+    cmsUInt32Number nSegments = g ->nSegments;
+
+    if (!_cmsWriteUInt32Number(io, cmsSigSegmentedCurve)) goto Error;
+    if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) nSegments)) goto Error;
+    if (!_cmsWriteUInt16Number(io, 0)) goto Error;
+
+    // Write the break-points
+    for (i=0; i < nSegments - 1; i++) {
+        if (!_cmsWriteFloat32Number(io, Segments[i].x1)) goto Error;
+    }
+
+    // Write the segments
+    for (i=0; i < g ->nSegments; i++) {
+
+        cmsCurveSegment* ActualSeg = Segments + i;
+
+        if (ActualSeg -> Type == 0) {
+
+            // This is a sampled curve
+            if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) cmsSigSampledCurveSeg)) goto Error;
+            if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+            if (!_cmsWriteUInt32Number(io, ActualSeg -> nGridPoints)) goto Error;
+
+            for (j=0; j < g ->Segments[i].nGridPoints; j++) {
+                if (!_cmsWriteFloat32Number(io, ActualSeg -> SampledPoints[j])) goto Error;
+            }
+
+        }
+        else {
+            int Type;
+            cmsUInt32Number ParamsByType[] = { 4, 5, 5 };
+
+            // This is a formula-based
+            if (!_cmsWriteUInt32Number(io, (cmsUInt32Number) cmsSigFormulaCurveSeg)) goto Error;
+            if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+
+            // We only allow 1, 2 and 3 as types
+            Type = ActualSeg ->Type - 6;
+            if (Type > 2 || Type < 0) goto Error;
+
+            if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) Type)) goto Error;
+            if (!_cmsWriteUInt16Number(io, 0)) goto Error;
+
+            for (j=0; j < ParamsByType[Type]; j++) {
+                if (!_cmsWriteFloat32Number(io, (cmsFloat32Number) ActualSeg ->Params[j])) goto Error;
+            }
+        }
+
+        // It seems there is no need to align. Code is here, and for safety commented out
+        // if (!_cmsWriteAlignment(io)) goto Error;
+    }
+
+    return TRUE;
+
+Error:
+    return FALSE;
+}
+
+
+static
+cmsBool WriteMPECurve(struct _cms_typehandler_struct* self,
+                      cmsIOHANDLER* io,
+                      void* Cargo,
+                      cmsUInt32Number n,
+                      cmsUInt32Number SizeOfTag)
+{
+    _cmsStageToneCurvesData* Curves  = (_cmsStageToneCurvesData*) Cargo;
+
+    return WriteSegmentedCurve(io, Curves ->TheCurves[n]);
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+    cmsUNUSED_PARAMETER(self);
+}
+
+// Write a curve, checking first for validity
+static
+cmsBool  Type_MPEcurve_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number BaseOffset;
+    cmsStage* mpe = (cmsStage*) Ptr;
+    _cmsStageToneCurvesData* Curves = (_cmsStageToneCurvesData*) mpe ->Data;
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Write the header. Since those are curves, input and output channels are same
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+
+    if (!WritePositionTable(self, io, 0,
+                                mpe ->InputChannels, BaseOffset, Curves, WriteMPECurve)) return FALSE;
+
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+
+// The matrix is organized as an array of PxQ+Q elements, where P is the number of input channels to the
+// matrix, and Q is the number of output channels. The matrix elements are each float32Numbers. The array
+// is organized as follows:
+// array = [e11, e12, �, e1P, e21, e22, �, e2P, �, eQ1, eQ2, �, eQP, e1, e2, �, eQ]
+
+static
+void *Type_MPEmatrix_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsStage* mpe;
+    cmsUInt16Number   InputChans, OutputChans;
+    cmsUInt32Number   nElems, i;
+    cmsFloat64Number* Matrix;
+    cmsFloat64Number* Offsets;
+
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+
+    // Input and output chans may be ANY (up to 0xffff), 
+    // but we choose to limit to 16 channels for now
+    if (InputChans >= cmsMAXCHANNELS) return NULL;
+    if (OutputChans >= cmsMAXCHANNELS) return NULL;
+
+    nElems = (cmsUInt32Number) InputChans * OutputChans;
+
+    Matrix = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, nElems, sizeof(cmsFloat64Number));
+    if (Matrix == NULL) return NULL;
+
+    Offsets = (cmsFloat64Number*) _cmsCalloc(self ->ContextID, OutputChans, sizeof(cmsFloat64Number));
+    if (Offsets == NULL) {
+
+        _cmsFree(self ->ContextID, Matrix);
+        return NULL;
+    }
+
+    for (i=0; i < nElems; i++) {
+
+        cmsFloat32Number v;
+
+        if (!_cmsReadFloat32Number(io, &v)) {
+            _cmsFree(self ->ContextID, Matrix);
+            _cmsFree(self ->ContextID, Offsets);
+            return NULL;
+        }
+        Matrix[i] = v;
+    }
+
+
+    for (i=0; i < OutputChans; i++) {
+
+        cmsFloat32Number v;
+
+        if (!_cmsReadFloat32Number(io, &v)) {
+            _cmsFree(self ->ContextID, Matrix);
+            _cmsFree(self ->ContextID, Offsets);
+            return NULL;
+        }
+        Offsets[i] = v;
+    }
+
+
+    mpe = cmsStageAllocMatrix(self ->ContextID, OutputChans, InputChans, Matrix, Offsets);
+    _cmsFree(self ->ContextID, Matrix);
+    _cmsFree(self ->ContextID, Offsets);
+
+    *nItems = 1;
+
+    return mpe;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+static
+cmsBool  Type_MPEmatrix_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number i, nElems;
+    cmsStage* mpe = (cmsStage*) Ptr;
+    _cmsStageMatrixData* Matrix = (_cmsStageMatrixData*) mpe ->Data;
+
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->OutputChannels)) return FALSE;
+
+    nElems = mpe ->InputChannels * mpe ->OutputChannels;
+
+    for (i=0; i < nElems; i++) {
+        if (!_cmsWriteFloat32Number(io, (cmsFloat32Number) Matrix->Double[i])) return FALSE;
+    }
+
+
+    for (i=0; i < mpe ->OutputChannels; i++) {
+
+        if (Matrix ->Offset == NULL) {
+
+               if (!_cmsWriteFloat32Number(io, 0)) return FALSE;
+        }
+        else {
+               if (!_cmsWriteFloat32Number(io, (cmsFloat32Number) Matrix->Offset[i])) return FALSE;
+        }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+
+static
+void *Type_MPEclut_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsStage* mpe = NULL;
+    cmsUInt16Number InputChans, OutputChans;
+    cmsUInt8Number Dimensions8[16];
+    cmsUInt32Number i, nMaxGrids, GridPoints[MAX_INPUT_DIMENSIONS];
+    _cmsStageCLutData* clut;
+
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+    if (InputChans == 0) goto Error;
+    if (OutputChans == 0) goto Error;
+
+    if (io ->Read(io, Dimensions8, sizeof(cmsUInt8Number), 16) != 16)
+        goto Error;
+
+    // Copy MAX_INPUT_DIMENSIONS at most. Expand to cmsUInt32Number
+    nMaxGrids = InputChans > MAX_INPUT_DIMENSIONS ? (cmsUInt32Number) MAX_INPUT_DIMENSIONS : InputChans;
+
+    for (i = 0; i < nMaxGrids; i++) {
+        if (Dimensions8[i] == 1) goto Error; // Impossible value, 0 for no CLUT and then 2 at least
+        GridPoints[i] = (cmsUInt32Number)Dimensions8[i];
+    }
+    
+    // Allocate the true CLUT
+    mpe = cmsStageAllocCLutFloatGranular(self ->ContextID, GridPoints, InputChans, OutputChans, NULL);
+    if (mpe == NULL) goto Error;
+
+    // Read and sanitize the data
+    clut = (_cmsStageCLutData*) mpe ->Data;
+    for (i=0; i < clut ->nEntries; i++) {
+
+        if (!_cmsReadFloat32Number(io, &clut->Tab.TFloat[i])) goto Error;       
+    }
+
+    *nItems = 1;
+    return mpe;
+
+Error:
+    *nItems = 0;
+    if (mpe != NULL) cmsStageFree(mpe);
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+// Write a CLUT in floating point
+static
+cmsBool  Type_MPEclut_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt8Number Dimensions8[16];  // 16 because the spec says 16 and not max number of channels
+    cmsUInt32Number i;
+    cmsStage* mpe = (cmsStage*) Ptr;
+    _cmsStageCLutData* clut = (_cmsStageCLutData*) mpe ->Data;
+
+    // Check for maximum number of channels supported by lcms
+    if (mpe -> InputChannels > MAX_INPUT_DIMENSIONS) return FALSE;
+
+    // Only floats are supported in MPE
+    if (clut ->HasFloatValues == FALSE) return FALSE;
+
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->InputChannels)) return FALSE;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) mpe ->OutputChannels)) return FALSE;
+
+    memset(Dimensions8, 0, sizeof(Dimensions8));
+
+    for (i=0; i < mpe ->InputChannels; i++)
+        Dimensions8[i] = (cmsUInt8Number) clut ->Params ->nSamples[i];
+
+    if (!io ->Write(io, 16, Dimensions8)) return FALSE;
+
+    for (i=0; i < clut ->nEntries; i++) {
+
+        if (!_cmsWriteFloat32Number(io, clut ->Tab.TFloat[i])) return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(nItems);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+
+// This is the list of built-in MPE types
+static _cmsTagTypeLinkedList SupportedMPEtypes[] = {
+
+{{ (cmsTagTypeSignature) cmsSigBAcsElemType, NULL, NULL, NULL, NULL, NULL, 0 }, &SupportedMPEtypes[1] },   // Ignore those elements for now
+{{ (cmsTagTypeSignature) cmsSigEAcsElemType, NULL, NULL, NULL, NULL, NULL, 0 }, &SupportedMPEtypes[2] },   // (That's what the spec says)
+
+{TYPE_MPE_HANDLER((cmsTagTypeSignature) cmsSigCurveSetElemType,     MPEcurve),      &SupportedMPEtypes[3] },
+{TYPE_MPE_HANDLER((cmsTagTypeSignature) cmsSigMatrixElemType,       MPEmatrix),     &SupportedMPEtypes[4] },
+{TYPE_MPE_HANDLER((cmsTagTypeSignature) cmsSigCLutElemType,         MPEclut),        NULL },
+};
+
+_cmsTagTypePluginChunkType _cmsMPETypePluginChunk = { NULL };
+
+static
+cmsBool ReadMPEElem(struct _cms_typehandler_struct* self,
+                    cmsIOHANDLER* io,
+                    void* Cargo,
+                    cmsUInt32Number n,
+                    cmsUInt32Number SizeOfTag)
+{
+    cmsStageSignature ElementSig;
+    cmsTagTypeHandler* TypeHandler;
+    cmsUInt32Number nItems;
+    cmsPipeline *NewLUT = (cmsPipeline *) Cargo;
+    _cmsTagTypePluginChunkType* MPETypePluginChunk  = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(self->ContextID, MPEPlugin);
+
+
+    // Take signature and channels for each element.
+    if (!_cmsReadUInt32Number(io, (cmsUInt32Number*) &ElementSig)) return FALSE;
+
+    // The reserved placeholder
+    if (!_cmsReadUInt32Number(io, NULL)) return FALSE;
+
+    // Read diverse MPE types
+    TypeHandler = GetHandler((cmsTagTypeSignature) ElementSig, MPETypePluginChunk ->TagTypes, SupportedMPEtypes);
+    if (TypeHandler == NULL)  {
+
+        char String[5];
+
+        _cmsTagSignature2String(String, (cmsTagSignature) ElementSig);
+
+        // An unknown element was found.
+        cmsSignalError(self ->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown MPE type '%s' found.", String);
+        return FALSE;
+    }
+
+    // If no read method, just ignore the element (valid for cmsSigBAcsElemType and cmsSigEAcsElemType)
+    // Read the MPE. No size is given
+    if (TypeHandler ->ReadPtr != NULL) {
+
+        // This is a real element which should be read and processed
+        if (!cmsPipelineInsertStage(NewLUT, cmsAT_END, (cmsStage*) TypeHandler ->ReadPtr(self, io, &nItems, SizeOfTag)))
+            return FALSE;
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+    cmsUNUSED_PARAMETER(n);
+}
+
+
+// This is the main dispatcher for MPE
+static
+void *Type_MPE_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+    cmsUInt16Number InputChans, OutputChans;
+    cmsUInt32Number ElementCount;
+    cmsPipeline *NewLUT = NULL;
+    cmsUInt32Number BaseOffset;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Read channels and element count
+    if (!_cmsReadUInt16Number(io, &InputChans)) return NULL;
+    if (!_cmsReadUInt16Number(io, &OutputChans)) return NULL;
+
+    if (InputChans == 0 || InputChans >= cmsMAXCHANNELS) return NULL;
+    if (OutputChans == 0 || OutputChans >= cmsMAXCHANNELS) return NULL;
+
+    // Allocates an empty LUT
+    NewLUT = cmsPipelineAlloc(self ->ContextID, InputChans, OutputChans);
+    if (NewLUT == NULL) return NULL;
+
+    if (!_cmsReadUInt32Number(io, &ElementCount)) goto Error;    
+    if (!ReadPositionTable(self, io, ElementCount, BaseOffset, NewLUT, ReadMPEElem)) goto Error;
+
+    // Check channel count
+    if (InputChans != NewLUT->InputChannels ||
+        OutputChans != NewLUT->OutputChannels) goto Error;
+
+    // Success
+    *nItems = 1;
+    return NewLUT;
+
+    // Error
+Error:    
+    if (NewLUT != NULL) cmsPipelineFree(NewLUT);
+    *nItems = 0;
+    return NULL;
+
+    cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+
+// This one is a liitle bit more complex, so we don't use position tables this time.
+static
+cmsBool Type_MPE_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsUInt32Number i, BaseOffset, DirectoryPos, CurrentPos;
+    cmsUInt32Number inputChan, outputChan;
+    cmsUInt32Number ElemCount;
+    cmsUInt32Number *ElementOffsets = NULL, *ElementSizes = NULL, Before;
+    cmsStageSignature ElementSig;
+    cmsPipeline* Lut = (cmsPipeline*) Ptr;
+    cmsStage* Elem = Lut ->Elements;
+    cmsTagTypeHandler* TypeHandler;
+    _cmsTagTypePluginChunkType* MPETypePluginChunk  = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(self->ContextID, MPEPlugin);
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    inputChan  = cmsPipelineInputChannels(Lut);
+    outputChan = cmsPipelineOutputChannels(Lut);
+    ElemCount  = cmsPipelineStageCount(Lut);
+
+    ElementOffsets = (cmsUInt32Number *) _cmsCalloc(self ->ContextID, ElemCount, sizeof(cmsUInt32Number));
+    if (ElementOffsets == NULL) goto Error;
+
+    ElementSizes = (cmsUInt32Number *) _cmsCalloc(self ->ContextID, ElemCount, sizeof(cmsUInt32Number));
+    if (ElementSizes == NULL) goto Error;
+
+    // Write the head
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) inputChan)) goto Error;
+    if (!_cmsWriteUInt16Number(io, (cmsUInt16Number) outputChan)) goto Error;
+    if (!_cmsWriteUInt32Number(io, (cmsUInt16Number) ElemCount)) goto Error;
+
+    DirectoryPos = io ->Tell(io);
+
+    // Write a fake directory to be filled latter on
+    for (i=0; i < ElemCount; i++) {
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // Offset
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;  // size
+    }
+
+    // Write each single tag. Keep track of the size as well.
+    for (i=0; i < ElemCount; i++) {
+
+        ElementOffsets[i] = io ->Tell(io) - BaseOffset;
+
+        ElementSig = Elem ->Type;
+
+        TypeHandler = GetHandler((cmsTagTypeSignature) ElementSig, MPETypePluginChunk->TagTypes, SupportedMPEtypes);
+        if (TypeHandler == NULL)  {
+
+                char String[5];
+
+                _cmsTagSignature2String(String, (cmsTagSignature) ElementSig);
+
+                 // An unknown element was found.
+                 cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Found unknown MPE type '%s'", String);
+                 goto Error;
+        }
+
+        if (!_cmsWriteUInt32Number(io, ElementSig)) goto Error;
+        if (!_cmsWriteUInt32Number(io, 0)) goto Error;
+        Before = io ->Tell(io);
+        if (!TypeHandler ->WritePtr(self, io, Elem, 1)) goto Error;
+        if (!_cmsWriteAlignment(io)) goto Error;
+
+        ElementSizes[i] = io ->Tell(io) - Before;
+
+        Elem = Elem ->Next;
+    }
+
+    // Write the directory
+    CurrentPos = io ->Tell(io);
+
+    if (!io ->Seek(io, DirectoryPos)) goto Error;
+
+    for (i=0; i < ElemCount; i++) {
+        if (!_cmsWriteUInt32Number(io, ElementOffsets[i])) goto Error;
+        if (!_cmsWriteUInt32Number(io, ElementSizes[i])) goto Error;
+    }
+
+    if (!io ->Seek(io, CurrentPos)) goto Error;
+
+    if (ElementOffsets != NULL) _cmsFree(self ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(self ->ContextID, ElementSizes);
+    return TRUE;
+
+Error:
+    if (ElementOffsets != NULL) _cmsFree(self ->ContextID, ElementOffsets);
+    if (ElementSizes != NULL) _cmsFree(self ->ContextID, ElementSizes);
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_MPE_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*) cmsPipelineDup((cmsPipeline*) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+static
+void Type_MPE_Free(struct _cms_typehandler_struct* self, void *Ptr)
+{
+    cmsPipelineFree((cmsPipeline*) Ptr);
+    return;
+
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type cmsSigVcgtType
+// ********************************************************************************
+
+
+#define cmsVideoCardGammaTableType    0
+#define cmsVideoCardGammaFormulaType  1
+
+// Used internally
+typedef struct {
+    double Gamma;
+    double Min;
+    double Max;
+} _cmsVCGTGAMMA;
+
+
+static
+void *Type_vcgt_Read(struct _cms_typehandler_struct* self,
+                     cmsIOHANDLER* io,
+                     cmsUInt32Number* nItems,
+                     cmsUInt32Number SizeOfTag)
+{
+    cmsUInt32Number TagType, n, i;
+    cmsToneCurve** Curves;
+
+    *nItems = 0;
+
+    // Read tag type
+    if (!_cmsReadUInt32Number(io, &TagType)) return NULL;
+
+    // Allocate space for the array
+    Curves = ( cmsToneCurve**) _cmsCalloc(self ->ContextID, 3, sizeof(cmsToneCurve*));
+    if (Curves == NULL) return NULL;
+
+    // There are two possible flavors
+    switch (TagType) {
+
+    // Gamma is stored as a table
+    case cmsVideoCardGammaTableType:
+    {
+       cmsUInt16Number nChannels, nElems, nBytes;
+
+       // Check channel count, which should be 3 (we don't support monochrome this time)
+       if (!_cmsReadUInt16Number(io, &nChannels)) goto Error;
+
+       if (nChannels != 3) {
+           cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported number of channels for VCGT '%d'", nChannels);
+           goto Error;
+       }
+
+       // Get Table element count and bytes per element
+       if (!_cmsReadUInt16Number(io, &nElems)) goto Error;
+       if (!_cmsReadUInt16Number(io, &nBytes)) goto Error;
+
+       // Adobe's quirk fixup. Fixing broken profiles...
+       if (nElems == 256 && nBytes == 1 && SizeOfTag == 1576)
+           nBytes = 2;
+
+
+       // Populate tone curves
+       for (n=0; n < 3; n++) {
+
+           Curves[n] = cmsBuildTabulatedToneCurve16(self ->ContextID, nElems, NULL);
+           if (Curves[n] == NULL) goto Error;
+
+           // On depending on byte depth
+           switch (nBytes) {
+
+           // One byte, 0..255
+           case 1:
+               for (i=0; i < nElems; i++) {
+
+                   cmsUInt8Number v;
+
+                      if (!_cmsReadUInt8Number(io, &v)) goto Error;
+                      Curves[n] ->Table16[i] = FROM_8_TO_16(v);
+               }
+               break;
+
+           // One word 0..65535
+           case 2:
+              if (!_cmsReadUInt16Array(io, nElems, Curves[n]->Table16)) goto Error;
+              break;
+
+          // Unsupported
+           default:
+              cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported bit depth for VCGT '%d'", nBytes * 8);
+              goto Error;
+           }
+       } // For all 3 channels
+    }
+    break;
+
+   // In this case, gamma is stored as a formula
+   case cmsVideoCardGammaFormulaType:
+   {
+       _cmsVCGTGAMMA Colorant[3];
+
+        // Populate tone curves
+       for (n=0; n < 3; n++) {
+
+           double Params[10];
+
+           if (!_cmsRead15Fixed16Number(io, &Colorant[n].Gamma)) goto Error;
+           if (!_cmsRead15Fixed16Number(io, &Colorant[n].Min)) goto Error;
+           if (!_cmsRead15Fixed16Number(io, &Colorant[n].Max)) goto Error;
+
+            // Parametric curve type 5 is:
+            // Y = (aX + b)^Gamma + e | X >= d
+            // Y = cX + f             | X < d
+
+            // vcgt formula is:
+            // Y = (Max � Min) * (X ^ Gamma) + Min
+
+            // So, the translation is
+            // a = (Max � Min) ^ ( 1 / Gamma)
+            // e = Min
+            // b=c=d=f=0
+
+           Params[0] = Colorant[n].Gamma;
+           Params[1] = pow((Colorant[n].Max - Colorant[n].Min), (1.0 / Colorant[n].Gamma));
+           Params[2] = 0;
+           Params[3] = 0;
+           Params[4] = 0;
+           Params[5] = Colorant[n].Min;
+           Params[6] = 0;
+
+           Curves[n] = cmsBuildParametricToneCurve(self ->ContextID, 5, Params);
+           if (Curves[n] == NULL) goto Error;
+       }
+   }
+   break;
+
+   // Unsupported
+   default:
+      cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported tag type for VCGT '%d'", TagType);
+      goto Error;
+   }
+
+   *nItems = 1;
+   return (void*) Curves;
+
+// Regret,  free all resources
+Error:
+
+    cmsFreeToneCurveTriple(Curves);
+    _cmsFree(self ->ContextID, Curves);
+    return NULL;
+
+     cmsUNUSED_PARAMETER(SizeOfTag);
+}
+
+
+// We don't support all flavors, only 16bits tables and formula
+static
+cmsBool Type_vcgt_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsToneCurve** Curves =  (cmsToneCurve**) Ptr;
+    cmsUInt32Number i, j;
+
+    if (cmsGetToneCurveParametricType(Curves[0]) == 5 &&
+        cmsGetToneCurveParametricType(Curves[1]) == 5 &&
+        cmsGetToneCurveParametricType(Curves[2]) == 5) {
+
+            if (!_cmsWriteUInt32Number(io, cmsVideoCardGammaFormulaType)) return FALSE;
+
+            // Save parameters
+            for (i=0; i < 3; i++) {
+
+                _cmsVCGTGAMMA v;
+
+                v.Gamma = Curves[i] ->Segments[0].Params[0];
+                v.Min   = Curves[i] ->Segments[0].Params[5];
+                v.Max   = pow(Curves[i] ->Segments[0].Params[1], v.Gamma) + v.Min;
+
+                if (!_cmsWrite15Fixed16Number(io, v.Gamma)) return FALSE;
+                if (!_cmsWrite15Fixed16Number(io, v.Min)) return FALSE;
+                if (!_cmsWrite15Fixed16Number(io, v.Max)) return FALSE;
+            }
+    }
+
+    else {
+
+        // Always store as a table of 256 words
+        if (!_cmsWriteUInt32Number(io, cmsVideoCardGammaTableType)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, 3)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, 256)) return FALSE;
+        if (!_cmsWriteUInt16Number(io, 2)) return FALSE;
+
+        for (i=0; i < 3; i++) {
+            for (j=0; j < 256; j++) {
+
+                cmsFloat32Number v = cmsEvalToneCurveFloat(Curves[i], (cmsFloat32Number) (j / 255.0));
+                cmsUInt16Number  n = _cmsQuickSaturateWord(v * 65535.0);
+
+                if (!_cmsWriteUInt16Number(io, n)) return FALSE;
+            }
+        }
+    }
+
+    return TRUE;
+
+    cmsUNUSED_PARAMETER(self);
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+static
+void* Type_vcgt_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    cmsToneCurve** OldCurves =  (cmsToneCurve**) Ptr;
+    cmsToneCurve** NewCurves;
+
+    NewCurves = ( cmsToneCurve**) _cmsCalloc(self ->ContextID, 3, sizeof(cmsToneCurve*));
+    if (NewCurves == NULL) return NULL;
+
+    NewCurves[0] = cmsDupToneCurve(OldCurves[0]);
+    NewCurves[1] = cmsDupToneCurve(OldCurves[1]);
+    NewCurves[2] = cmsDupToneCurve(OldCurves[2]);
+
+    return (void*) NewCurves;
+
+    cmsUNUSED_PARAMETER(n);
+}
+
+
+static
+void Type_vcgt_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsFreeToneCurveTriple((cmsToneCurve**) Ptr);
+    _cmsFree(self ->ContextID, Ptr);
+}
+
+
+// ********************************************************************************
+// Type cmsSigDictType
+// ********************************************************************************
+
+// Single column of the table can point to wchar or MLUC elements. Holds arrays of data
+typedef struct {
+    cmsContext ContextID;
+    cmsUInt32Number *Offsets;
+    cmsUInt32Number *Sizes;
+} _cmsDICelem;
+
+typedef struct {
+    _cmsDICelem Name, Value, DisplayName, DisplayValue;
+
+} _cmsDICarray;
+
+// Allocate an empty array element
+static
+cmsBool AllocElem(cmsContext ContextID, _cmsDICelem* e,  cmsUInt32Number Count)
+{
+    e->Offsets = (cmsUInt32Number *) _cmsCalloc(ContextID, Count, sizeof(cmsUInt32Number));
+    if (e->Offsets == NULL) return FALSE;
+
+    e->Sizes = (cmsUInt32Number *) _cmsCalloc(ContextID, Count, sizeof(cmsUInt32Number));
+    if (e->Sizes == NULL) {
+
+        _cmsFree(ContextID, e -> Offsets);
+        return FALSE;
+    }
+
+    e ->ContextID = ContextID;
+    return TRUE;
+}
+
+// Free an array element
+static
+void FreeElem(_cmsDICelem* e)
+{
+    if (e ->Offsets != NULL)  _cmsFree(e -> ContextID, e -> Offsets);
+    if (e ->Sizes   != NULL)  _cmsFree(e -> ContextID, e -> Sizes);
+    e->Offsets = e ->Sizes = NULL;
+}
+
+// Get rid of whole array
+static
+void FreeArray( _cmsDICarray* a)
+{
+    if (a ->Name.Offsets != NULL) FreeElem(&a->Name);
+    if (a ->Value.Offsets != NULL) FreeElem(&a ->Value);
+    if (a ->DisplayName.Offsets != NULL) FreeElem(&a->DisplayName);
+    if (a ->DisplayValue.Offsets != NULL) FreeElem(&a ->DisplayValue);
+}
+
+
+// Allocate whole array
+static
+cmsBool AllocArray(cmsContext ContextID, _cmsDICarray* a, cmsUInt32Number Count, cmsUInt32Number Length)
+{
+    // Empty values
+    memset(a, 0, sizeof(_cmsDICarray));
+
+    // On depending on record size, create column arrays
+    if (!AllocElem(ContextID, &a ->Name, Count)) goto Error;
+    if (!AllocElem(ContextID, &a ->Value, Count)) goto Error;
+
+    if (Length > 16) {
+        if (!AllocElem(ContextID, &a -> DisplayName, Count)) goto Error;
+
+    }
+    if (Length > 24) {
+        if (!AllocElem(ContextID, &a ->DisplayValue, Count)) goto Error;
+    }
+    return TRUE;
+
+Error:
+    FreeArray(a);
+    return FALSE;
+}
+
+// Read one element
+static
+cmsBool ReadOneElem(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, cmsUInt32Number BaseOffset)
+{
+    if (!_cmsReadUInt32Number(io, &e->Offsets[i])) return FALSE;
+    if (!_cmsReadUInt32Number(io, &e ->Sizes[i])) return FALSE;
+
+    // An offset of zero has special meaning and shal be preserved
+    if (e ->Offsets[i] > 0)
+        e ->Offsets[i] += BaseOffset;
+    return TRUE;
+}
+
+
+static
+cmsBool ReadOffsetArray(cmsIOHANDLER* io,  _cmsDICarray* a, cmsUInt32Number Count, cmsUInt32Number Length, cmsUInt32Number BaseOffset)
+{
+    cmsUInt32Number i;
+
+    // Read column arrays
+    for (i=0; i < Count; i++) {
+
+        if (!ReadOneElem(io, &a -> Name, i, BaseOffset)) return FALSE;
+        if (!ReadOneElem(io, &a -> Value, i, BaseOffset)) return FALSE;
+
+        if (Length > 16) {
+
+            if (!ReadOneElem(io, &a ->DisplayName, i, BaseOffset)) return FALSE;
+
+        }
+
+        if (Length > 24) {
+
+            if (!ReadOneElem(io, & a -> DisplayValue, i, BaseOffset)) return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+
+// Write one element
+static
+cmsBool WriteOneElem(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i)
+{
+    if (!_cmsWriteUInt32Number(io, e->Offsets[i])) return FALSE;
+    if (!_cmsWriteUInt32Number(io, e ->Sizes[i])) return FALSE;
+
+    return TRUE;
+}
+
+static
+cmsBool WriteOffsetArray(cmsIOHANDLER* io,  _cmsDICarray* a, cmsUInt32Number Count, cmsUInt32Number Length)
+{
+    cmsUInt32Number i;
+
+    for (i=0; i < Count; i++) {
+
+        if (!WriteOneElem(io, &a -> Name, i)) return FALSE;
+        if (!WriteOneElem(io, &a -> Value, i))  return FALSE;
+
+        if (Length > 16) {
+
+            if (!WriteOneElem(io, &a -> DisplayName, i))  return FALSE;
+        }
+
+        if (Length > 24) {
+
+            if (!WriteOneElem(io, &a -> DisplayValue, i))  return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+static
+cmsBool ReadOneWChar(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, wchar_t ** wcstr)
+{
+
+    cmsUInt32Number nChars;
+
+      // Special case for undefined strings (see ICC Votable
+      // Proposal Submission, Dictionary Type and Metadata TAG Definition)
+      if (e -> Offsets[i] == 0) {
+
+          *wcstr = NULL;
+          return TRUE;
+      }
+
+      if (!io -> Seek(io, e -> Offsets[i])) return FALSE;
+
+      nChars = e ->Sizes[i] / sizeof(cmsUInt16Number);
+
+
+      *wcstr = (wchar_t*) _cmsMallocZero(e ->ContextID, (nChars + 1) * sizeof(wchar_t));
+      if (*wcstr == NULL) return FALSE;
+
+      if (!_cmsReadWCharArray(io, nChars, *wcstr)) {
+          _cmsFree(e ->ContextID, *wcstr);
+          return FALSE;
+      }
+
+      // End of string marker
+      (*wcstr)[nChars] = 0;
+      return TRUE;
+}
+
+static
+cmsUInt32Number mywcslen(const wchar_t *s)
+{
+    const wchar_t *p;
+
+    p = s;
+    while (*p)
+        p++;
+
+    return (cmsUInt32Number)(p - s);
+}
+
+static
+cmsBool WriteOneWChar(cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, const wchar_t * wcstr, cmsUInt32Number BaseOffset)
+{
+    cmsUInt32Number Before = io ->Tell(io);
+    cmsUInt32Number n;
+
+    e ->Offsets[i] = Before - BaseOffset;
+
+    if (wcstr == NULL) {
+        e ->Sizes[i] = 0;
+        e ->Offsets[i] = 0;
+        return TRUE;
+    }
+
+    n = mywcslen(wcstr);
+    if (!_cmsWriteWCharArray(io,  n, wcstr)) return FALSE;
+
+    e ->Sizes[i] = io ->Tell(io) - Before;
+    return TRUE;
+}
+
+static
+cmsBool ReadOneMLUC(struct _cms_typehandler_struct* self, cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, cmsMLU** mlu)
+{
+    cmsUInt32Number nItems = 0;
+
+    // A way to get null MLUCs
+    if (e -> Offsets[i] == 0 || e ->Sizes[i] == 0) {
+
+        *mlu = NULL;
+        return TRUE;
+    }
+
+    if (!io -> Seek(io, e -> Offsets[i])) return FALSE;
+
+    *mlu = (cmsMLU*) Type_MLU_Read(self, io, &nItems, e ->Sizes[i]);
+    return *mlu != NULL;
+}
+
+static
+cmsBool WriteOneMLUC(struct _cms_typehandler_struct* self, cmsIOHANDLER* io,  _cmsDICelem* e, cmsUInt32Number i, const cmsMLU* mlu, cmsUInt32Number BaseOffset)
+{
+    cmsUInt32Number Before;
+
+     // Special case for undefined strings (see ICC Votable
+     // Proposal Submission, Dictionary Type and Metadata TAG Definition)
+     if (mlu == NULL) {
+        e ->Sizes[i] = 0;
+        e ->Offsets[i] = 0;
+        return TRUE;
+    }
+
+    Before = io ->Tell(io);
+    e ->Offsets[i] = Before - BaseOffset;
+
+    if (!Type_MLU_Write(self, io, (void*) mlu, 1)) return FALSE;
+
+    e ->Sizes[i] = io ->Tell(io) - Before;
+    return TRUE;
+}
+
+
+static
+void *Type_Dictionary_Read(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, cmsUInt32Number* nItems, cmsUInt32Number SizeOfTag)
+{
+   cmsHANDLE hDict;
+   cmsUInt32Number i, Count, Length;
+   cmsUInt32Number BaseOffset;
+   _cmsDICarray a;
+   wchar_t *NameWCS = NULL, *ValueWCS = NULL;
+   cmsMLU *DisplayNameMLU = NULL, *DisplayValueMLU=NULL;
+   cmsBool rc;
+
+    *nItems = 0;
+
+    // Get actual position as a basis for element offsets
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Get name-value record count
+    if (!_cmsReadUInt32Number(io, &Count)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Get rec length
+    if (!_cmsReadUInt32Number(io, &Length)) return NULL;
+    SizeOfTag -= sizeof(cmsUInt32Number);
+
+    // Check for valid lengths
+    if (Length != 16 && Length != 24 && Length != 32) {
+         cmsSignalError(self->ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unknown record length in dictionary '%d'", Length);
+         return NULL;
+    }
+
+    // Creates an empty dictionary
+    hDict = cmsDictAlloc(self -> ContextID);
+    if (hDict == NULL) return NULL;
+
+    // On depending on record size, create column arrays
+    if (!AllocArray(self -> ContextID, &a, Count, Length)) goto Error;
+
+    // Read column arrays
+    if (!ReadOffsetArray(io, &a, Count, Length, BaseOffset)) goto Error;
+
+    // Seek to each element and read it
+    for (i=0; i < Count; i++) {
+
+        if (!ReadOneWChar(io, &a.Name, i, &NameWCS)) goto Error;
+        if (!ReadOneWChar(io, &a.Value, i, &ValueWCS)) goto Error;
+
+        if (Length > 16) {
+            if (!ReadOneMLUC(self, io, &a.DisplayName, i, &DisplayNameMLU)) goto Error;
+        }
+
+        if (Length > 24) {
+            if (!ReadOneMLUC(self, io, &a.DisplayValue, i, &DisplayValueMLU)) goto Error;
+        }
+
+        if (NameWCS == NULL || ValueWCS == NULL) {
+        
+            cmsSignalError(self->ContextID, cmsERROR_CORRUPTION_DETECTED, "Bad dictionary Name/Value");        
+            rc = FALSE;
+        }
+        else {
+
+            rc = cmsDictAddEntry(hDict, NameWCS, ValueWCS, DisplayNameMLU, DisplayValueMLU);
+        }
+
+        if (NameWCS != NULL) _cmsFree(self ->ContextID, NameWCS);
+        if (ValueWCS != NULL) _cmsFree(self ->ContextID, ValueWCS);
+        if (DisplayNameMLU != NULL) cmsMLUfree(DisplayNameMLU);
+        if (DisplayValueMLU != NULL) cmsMLUfree(DisplayValueMLU);
+
+        if (!rc) goto Error;
+    }
+
+   FreeArray(&a);
+   *nItems = 1;
+   return (void*) hDict;
+
+Error:
+   FreeArray(&a);
+   cmsDictFree(hDict);
+   return NULL;
+}
+
+
+static
+cmsBool Type_Dictionary_Write(struct _cms_typehandler_struct* self, cmsIOHANDLER* io, void* Ptr, cmsUInt32Number nItems)
+{
+    cmsHANDLE hDict = (cmsHANDLE) Ptr;
+    const cmsDICTentry* p;
+    cmsBool AnyName, AnyValue;
+    cmsUInt32Number i, Count, Length;
+    cmsUInt32Number DirectoryPos, CurrentPos, BaseOffset;
+   _cmsDICarray a;
+
+    if (hDict == NULL) return FALSE;
+
+    BaseOffset = io ->Tell(io) - sizeof(_cmsTagBase);
+
+    // Let's inspect the dictionary
+    Count = 0; AnyName = FALSE; AnyValue = FALSE;
+    for (p = cmsDictGetEntryList(hDict); p != NULL; p = cmsDictNextEntry(p)) {
+
+        if (p ->DisplayName != NULL) AnyName = TRUE;
+        if (p ->DisplayValue != NULL) AnyValue = TRUE;
+        Count++;
+    }
+
+    Length = 16;
+    if (AnyName)  Length += 8;
+    if (AnyValue) Length += 8;
+
+    if (!_cmsWriteUInt32Number(io, Count)) return FALSE;
+    if (!_cmsWriteUInt32Number(io, Length)) return FALSE;
+
+    // Keep starting position of offsets table
+    DirectoryPos = io ->Tell(io);
+
+    // Allocate offsets array
+    if (!AllocArray(self ->ContextID, &a, Count, Length)) goto Error;
+
+    // Write a fake directory to be filled latter on
+    if (!WriteOffsetArray(io, &a, Count, Length)) goto Error;
+
+    // Write each element. Keep track of the size as well.
+    p = cmsDictGetEntryList(hDict);
+    for (i=0; i < Count; i++) {
+
+        if (!WriteOneWChar(io, &a.Name, i,  p ->Name, BaseOffset)) goto Error;
+        if (!WriteOneWChar(io, &a.Value, i, p ->Value, BaseOffset)) goto Error;
+
+        if (p ->DisplayName != NULL) {
+            if (!WriteOneMLUC(self, io, &a.DisplayName, i, p ->DisplayName, BaseOffset)) goto Error;
+        }
+
+        if (p ->DisplayValue != NULL) {
+            if (!WriteOneMLUC(self, io, &a.DisplayValue, i, p ->DisplayValue, BaseOffset)) goto Error;
+        }
+
+       p = cmsDictNextEntry(p);
+    }
+
+    // Write the directory
+    CurrentPos = io ->Tell(io);
+    if (!io ->Seek(io, DirectoryPos)) goto Error;
+
+    if (!WriteOffsetArray(io, &a, Count, Length)) goto Error;
+
+    if (!io ->Seek(io, CurrentPos)) goto Error;
+
+    FreeArray(&a);
+    return TRUE;
+
+Error:
+    FreeArray(&a);
+    return FALSE;
+
+    cmsUNUSED_PARAMETER(nItems);
+}
+
+
+static
+void* Type_Dictionary_Dup(struct _cms_typehandler_struct* self, const void *Ptr, cmsUInt32Number n)
+{
+    return (void*)  cmsDictDup((cmsHANDLE) Ptr);
+
+    cmsUNUSED_PARAMETER(n);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+static
+void Type_Dictionary_Free(struct _cms_typehandler_struct* self, void* Ptr)
+{
+    cmsDictFree((cmsHANDLE) Ptr);
+    cmsUNUSED_PARAMETER(self);
+}
+
+
+// ********************************************************************************
+// Type support main routines
+// ********************************************************************************
+
+
+// This is the list of built-in types
+static const _cmsTagTypeLinkedList SupportedTagTypes[] = {
+
+{TYPE_HANDLER(cmsSigChromaticityType,          Chromaticity),       (_cmsTagTypeLinkedList*) &SupportedTagTypes[1] },
+{TYPE_HANDLER(cmsSigColorantOrderType,         ColorantOrderType),  (_cmsTagTypeLinkedList*) &SupportedTagTypes[2] },
+{TYPE_HANDLER(cmsSigS15Fixed16ArrayType,       S15Fixed16),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[3] },
+{TYPE_HANDLER(cmsSigU16Fixed16ArrayType,       U16Fixed16),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[4] },
+{TYPE_HANDLER(cmsSigTextType,                  Text),               (_cmsTagTypeLinkedList*) &SupportedTagTypes[5] },
+{TYPE_HANDLER(cmsSigTextDescriptionType,       Text_Description),   (_cmsTagTypeLinkedList*) &SupportedTagTypes[6] },
+{TYPE_HANDLER(cmsSigCurveType,                 Curve),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[7] },
+{TYPE_HANDLER(cmsSigParametricCurveType,       ParametricCurve),    (_cmsTagTypeLinkedList*) &SupportedTagTypes[8] },
+{TYPE_HANDLER(cmsSigDateTimeType,              DateTime),           (_cmsTagTypeLinkedList*) &SupportedTagTypes[9] },
+{TYPE_HANDLER(cmsSigLut8Type,                  LUT8),               (_cmsTagTypeLinkedList*) &SupportedTagTypes[10] },
+{TYPE_HANDLER(cmsSigLut16Type,                 LUT16),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[11] },
+{TYPE_HANDLER(cmsSigColorantTableType,         ColorantTable),      (_cmsTagTypeLinkedList*) &SupportedTagTypes[12] },
+{TYPE_HANDLER(cmsSigNamedColor2Type,           NamedColor),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[13] },
+{TYPE_HANDLER(cmsSigMultiLocalizedUnicodeType, MLU),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[14] },
+{TYPE_HANDLER(cmsSigProfileSequenceDescType,   ProfileSequenceDesc),(_cmsTagTypeLinkedList*) &SupportedTagTypes[15] },
+{TYPE_HANDLER(cmsSigSignatureType,             Signature),          (_cmsTagTypeLinkedList*) &SupportedTagTypes[16] },
+{TYPE_HANDLER(cmsSigMeasurementType,           Measurement),        (_cmsTagTypeLinkedList*) &SupportedTagTypes[17] },
+{TYPE_HANDLER(cmsSigDataType,                  Data),               (_cmsTagTypeLinkedList*) &SupportedTagTypes[18] },
+{TYPE_HANDLER(cmsSigLutAtoBType,               LUTA2B),             (_cmsTagTypeLinkedList*) &SupportedTagTypes[19] },
+{TYPE_HANDLER(cmsSigLutBtoAType,               LUTB2A),             (_cmsTagTypeLinkedList*) &SupportedTagTypes[20] },
+{TYPE_HANDLER(cmsSigUcrBgType,                 UcrBg),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[21] },
+{TYPE_HANDLER(cmsSigCrdInfoType,               CrdInfo),            (_cmsTagTypeLinkedList*) &SupportedTagTypes[22] },
+{TYPE_HANDLER(cmsSigMultiProcessElementType,   MPE),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[23] },
+{TYPE_HANDLER(cmsSigScreeningType,             Screening),          (_cmsTagTypeLinkedList*) &SupportedTagTypes[24] },
+{TYPE_HANDLER(cmsSigViewingConditionsType,     ViewingConditions),  (_cmsTagTypeLinkedList*) &SupportedTagTypes[25] },
+{TYPE_HANDLER(cmsSigXYZType,                   XYZ),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[26] },
+{TYPE_HANDLER(cmsCorbisBrokenXYZtype,          XYZ),                (_cmsTagTypeLinkedList*) &SupportedTagTypes[27] },
+{TYPE_HANDLER(cmsMonacoBrokenCurveType,        Curve),              (_cmsTagTypeLinkedList*) &SupportedTagTypes[28] },
+{TYPE_HANDLER(cmsSigProfileSequenceIdType,     ProfileSequenceId),  (_cmsTagTypeLinkedList*) &SupportedTagTypes[29] },
+{TYPE_HANDLER(cmsSigDictType,                  Dictionary),         (_cmsTagTypeLinkedList*) &SupportedTagTypes[30] },
+{TYPE_HANDLER(cmsSigVcgtType,                  vcgt),                NULL }
+};
+
+
+_cmsTagTypePluginChunkType _cmsTagTypePluginChunk = { NULL };
+
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupTagTypeList(struct _cmsContext_struct* ctx, 
+                    const struct _cmsContext_struct* src, 
+                    int loc)
+{
+   _cmsTagTypePluginChunkType newHead = { NULL };
+   _cmsTagTypeLinkedList*  entry;
+   _cmsTagTypeLinkedList*  Anterior = NULL;
+   _cmsTagTypePluginChunkType* head = (_cmsTagTypePluginChunkType*) src->chunks[loc];
+
+   // Walk the list copying all nodes
+   for (entry = head->TagTypes;
+       entry != NULL;
+       entry = entry ->Next) {
+
+           _cmsTagTypeLinkedList *newEntry = ( _cmsTagTypeLinkedList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsTagTypeLinkedList));
+
+           if (newEntry == NULL) 
+               return;
+
+           // We want to keep the linked list order, so this is a little bit tricky
+           newEntry -> Next = NULL;
+           if (Anterior)
+               Anterior -> Next = newEntry;
+
+           Anterior = newEntry;
+
+           if (newHead.TagTypes == NULL)
+               newHead.TagTypes = newEntry;
+   }
+
+   ctx ->chunks[loc] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsTagTypePluginChunkType));
+}
+
+
+void _cmsAllocTagTypePluginChunk(struct _cmsContext_struct* ctx, 
+                                 const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+        
+        // Duplicate the LIST
+        DupTagTypeList(ctx, src, TagTypePlugin);
+    }
+    else {
+        static _cmsTagTypePluginChunkType TagTypePluginChunk = { NULL };
+        ctx ->chunks[TagTypePlugin] = _cmsSubAllocDup(ctx ->MemPool, &TagTypePluginChunk, sizeof(_cmsTagTypePluginChunkType));
+    }
+}
+
+void _cmsAllocMPETypePluginChunk(struct _cmsContext_struct* ctx, 
+                               const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+        
+        // Duplicate the LIST
+        DupTagTypeList(ctx, src, MPEPlugin);
+    }
+    else {
+        static _cmsTagTypePluginChunkType TagTypePluginChunk = { NULL };
+        ctx ->chunks[MPEPlugin] = _cmsSubAllocDup(ctx ->MemPool, &TagTypePluginChunk, sizeof(_cmsTagTypePluginChunkType));
+    }
+
+}
+
+
+// Both kind of plug-ins share same structure
+cmsBool  _cmsRegisterTagTypePlugin(cmsContext id, cmsPluginBase* Data)
+{
+    return RegisterTypesPlugin(id, Data, TagTypePlugin);
+}
+
+cmsBool  _cmsRegisterMultiProcessElementPlugin(cmsContext id, cmsPluginBase* Data)
+{
+    return RegisterTypesPlugin(id, Data,MPEPlugin);
+}
+
+
+// Wrapper for tag types
+cmsTagTypeHandler* _cmsGetTagTypeHandler(cmsContext ContextID, cmsTagTypeSignature sig)
+{
+    _cmsTagTypePluginChunkType* ctx = ( _cmsTagTypePluginChunkType*) _cmsContextGetClientChunk(ContextID, TagTypePlugin);
+
+    return GetHandler(sig, ctx->TagTypes, (_cmsTagTypeLinkedList*) SupportedTagTypes);
+}
+
+// ********************************************************************************
+// Tag support main routines
+// ********************************************************************************
+
+typedef struct _cmsTagLinkedList_st {
+
+            cmsTagSignature Signature;
+            cmsTagDescriptor Descriptor;
+            struct _cmsTagLinkedList_st* Next;
+
+} _cmsTagLinkedList;
+
+// This is the list of built-in tags. The data of this list can be modified by plug-ins
+static _cmsTagLinkedList SupportedTags[] = {
+
+    { cmsSigAToB0Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutAtoBType, cmsSigLut8Type}, DecideLUTtypeA2B}, &SupportedTags[1]},
+    { cmsSigAToB1Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutAtoBType, cmsSigLut8Type}, DecideLUTtypeA2B}, &SupportedTags[2]},
+    { cmsSigAToB2Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutAtoBType, cmsSigLut8Type}, DecideLUTtypeA2B}, &SupportedTags[3]},
+    { cmsSigBToA0Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutBtoAType, cmsSigLut8Type}, DecideLUTtypeB2A}, &SupportedTags[4]},
+    { cmsSigBToA1Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutBtoAType, cmsSigLut8Type}, DecideLUTtypeB2A}, &SupportedTags[5]},
+    { cmsSigBToA2Tag,               { 1, 3,  { cmsSigLut16Type,  cmsSigLutBtoAType, cmsSigLut8Type}, DecideLUTtypeB2A}, &SupportedTags[6]},
+
+    // Allow corbis  and its broken XYZ type
+    { cmsSigRedColorantTag,         { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, DecideXYZtype}, &SupportedTags[7]},
+    { cmsSigGreenColorantTag,       { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, DecideXYZtype}, &SupportedTags[8]},
+    { cmsSigBlueColorantTag,        { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, DecideXYZtype}, &SupportedTags[9]},
+
+    { cmsSigRedTRCTag,              { 1, 3, { cmsSigCurveType, cmsSigParametricCurveType, cmsMonacoBrokenCurveType }, DecideCurveType}, &SupportedTags[10]},
+    { cmsSigGreenTRCTag,            { 1, 3, { cmsSigCurveType, cmsSigParametricCurveType, cmsMonacoBrokenCurveType }, DecideCurveType}, &SupportedTags[11]},
+    { cmsSigBlueTRCTag,             { 1, 3, { cmsSigCurveType, cmsSigParametricCurveType, cmsMonacoBrokenCurveType }, DecideCurveType}, &SupportedTags[12]},
+
+    { cmsSigCalibrationDateTimeTag, { 1, 1, { cmsSigDateTimeType }, NULL}, &SupportedTags[13]},
+    { cmsSigCharTargetTag,          { 1, 1, { cmsSigTextType },     NULL}, &SupportedTags[14]},
+
+    { cmsSigChromaticAdaptationTag, { 9, 1, { cmsSigS15Fixed16ArrayType }, NULL}, &SupportedTags[15]},
+    { cmsSigChromaticityTag,        { 1, 1, { cmsSigChromaticityType    }, NULL}, &SupportedTags[16]},
+    { cmsSigColorantOrderTag,       { 1, 1, { cmsSigColorantOrderType   }, NULL}, &SupportedTags[17]},
+    { cmsSigColorantTableTag,       { 1, 1, { cmsSigColorantTableType   }, NULL}, &SupportedTags[18]},
+    { cmsSigColorantTableOutTag,    { 1, 1, { cmsSigColorantTableType   }, NULL}, &SupportedTags[19]},
+
+    { cmsSigCopyrightTag,           { 1, 3, { cmsSigTextType,  cmsSigMultiLocalizedUnicodeType, cmsSigTextDescriptionType}, DecideTextType}, &SupportedTags[20]},
+    { cmsSigDateTimeTag,            { 1, 1, { cmsSigDateTimeType }, NULL}, &SupportedTags[21]},
+
+    { cmsSigDeviceMfgDescTag,       { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[22]},
+    { cmsSigDeviceModelDescTag,     { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[23]},
+
+    { cmsSigGamutTag,               { 1, 3, { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[24]},
+
+    { cmsSigGrayTRCTag,             { 1, 2, { cmsSigCurveType, cmsSigParametricCurveType }, DecideCurveType}, &SupportedTags[25]},
+    { cmsSigLuminanceTag,           { 1, 1, { cmsSigXYZType }, NULL}, &SupportedTags[26]},
+
+    { cmsSigMediaBlackPointTag,     { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, NULL}, &SupportedTags[27]},
+    { cmsSigMediaWhitePointTag,     { 1, 2, { cmsSigXYZType, cmsCorbisBrokenXYZtype }, NULL}, &SupportedTags[28]},
+
+    { cmsSigNamedColor2Tag,         { 1, 1, { cmsSigNamedColor2Type }, NULL}, &SupportedTags[29]},
+
+    { cmsSigPreview0Tag,            { 1, 3,  { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[30]},
+    { cmsSigPreview1Tag,            { 1, 3,  { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[31]},
+    { cmsSigPreview2Tag,            { 1, 3,  { cmsSigLut16Type, cmsSigLutBtoAType, cmsSigLut8Type }, DecideLUTtypeB2A}, &SupportedTags[32]},
+
+    { cmsSigProfileDescriptionTag,  { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[33]},
+    { cmsSigProfileSequenceDescTag, { 1, 1, { cmsSigProfileSequenceDescType }, NULL},  &SupportedTags[34]},
+    { cmsSigTechnologyTag,          { 1, 1, { cmsSigSignatureType }, NULL},  &SupportedTags[35]},
+
+    { cmsSigColorimetricIntentImageStateTag,   { 1, 1, { cmsSigSignatureType }, NULL}, &SupportedTags[36]},
+    { cmsSigPerceptualRenderingIntentGamutTag, { 1, 1, { cmsSigSignatureType }, NULL}, &SupportedTags[37]},
+    { cmsSigSaturationRenderingIntentGamutTag, { 1, 1, { cmsSigSignatureType }, NULL}, &SupportedTags[38]},
+
+    { cmsSigMeasurementTag,         { 1, 1, { cmsSigMeasurementType }, NULL}, &SupportedTags[39]},
+
+    { cmsSigPs2CRD0Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[40]},
+    { cmsSigPs2CRD1Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[41]},
+    { cmsSigPs2CRD2Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[42]},
+    { cmsSigPs2CRD3Tag,             { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[43]},
+    { cmsSigPs2CSATag,              { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[44]},
+    { cmsSigPs2RenderingIntentTag,  { 1, 1, { cmsSigDataType }, NULL}, &SupportedTags[45]},
+
+    { cmsSigViewingCondDescTag,     { 1, 3, { cmsSigTextDescriptionType, cmsSigMultiLocalizedUnicodeType, cmsSigTextType}, DecideTextDescType}, &SupportedTags[46]},
+
+    { cmsSigUcrBgTag,               { 1, 1, { cmsSigUcrBgType}, NULL},    &SupportedTags[47]},
+    { cmsSigCrdInfoTag,             { 1, 1, { cmsSigCrdInfoType}, NULL},  &SupportedTags[48]},
+
+    { cmsSigDToB0Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[49]},
+    { cmsSigDToB1Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[50]},
+    { cmsSigDToB2Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[51]},
+    { cmsSigDToB3Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[52]},
+    { cmsSigBToD0Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[53]},
+    { cmsSigBToD1Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[54]},
+    { cmsSigBToD2Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[55]},
+    { cmsSigBToD3Tag,               { 1, 1, { cmsSigMultiProcessElementType}, NULL}, &SupportedTags[56]},
+
+    { cmsSigScreeningDescTag,       { 1, 1, { cmsSigTextDescriptionType },    NULL}, &SupportedTags[57]},
+    { cmsSigViewingConditionsTag,   { 1, 1, { cmsSigViewingConditionsType },  NULL}, &SupportedTags[58]},
+
+    { cmsSigScreeningTag,           { 1, 1, { cmsSigScreeningType},          NULL }, &SupportedTags[59]},
+    { cmsSigVcgtTag,                { 1, 1, { cmsSigVcgtType},               NULL }, &SupportedTags[60]},
+    { cmsSigMetaTag,                { 1, 1, { cmsSigDictType},               NULL }, &SupportedTags[61]},
+    { cmsSigProfileSequenceIdTag,   { 1, 1, { cmsSigProfileSequenceIdType},  NULL }, &SupportedTags[62]},
+
+    { cmsSigProfileDescriptionMLTag,{ 1, 1, { cmsSigMultiLocalizedUnicodeType}, NULL}, &SupportedTags[63]},
+    { cmsSigArgyllArtsTag,          { 9, 1, { cmsSigS15Fixed16ArrayType},    NULL}, NULL}
+
+};
+
+/*
+    Not supported                 Why
+    =======================       =========================================
+    cmsSigOutputResponseTag   ==> WARNING, POSSIBLE PATENT ON THIS SUBJECT!
+    cmsSigNamedColorTag       ==> Deprecated
+    cmsSigDataTag             ==> Ancient, unused
+    cmsSigDeviceSettingsTag   ==> Deprecated, useless
+*/
+
+
+_cmsTagPluginChunkType _cmsTagPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupTagList(struct _cmsContext_struct* ctx, 
+                    const struct _cmsContext_struct* src)
+{
+   _cmsTagPluginChunkType newHead = { NULL };
+   _cmsTagLinkedList*  entry;
+   _cmsTagLinkedList*  Anterior = NULL;
+   _cmsTagPluginChunkType* head = (_cmsTagPluginChunkType*) src->chunks[TagPlugin];
+
+   // Walk the list copying all nodes
+   for (entry = head->Tag;
+       entry != NULL;
+       entry = entry ->Next) {
+
+           _cmsTagLinkedList *newEntry = ( _cmsTagLinkedList *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsTagLinkedList));
+
+           if (newEntry == NULL) 
+               return;
+
+           // We want to keep the linked list order, so this is a little bit tricky
+           newEntry -> Next = NULL;
+           if (Anterior)
+               Anterior -> Next = newEntry;
+
+           Anterior = newEntry;
+
+           if (newHead.Tag == NULL)
+               newHead.Tag = newEntry;
+   }
+
+   ctx ->chunks[TagPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsTagPluginChunkType));
+}
+
+void _cmsAllocTagPluginChunk(struct _cmsContext_struct* ctx, 
+                                 const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+
+        DupTagList(ctx, src);
+    }
+    else {
+        static _cmsTagPluginChunkType TagPluginChunk = { NULL };
+        ctx ->chunks[TagPlugin] = _cmsSubAllocDup(ctx ->MemPool, &TagPluginChunk, sizeof(_cmsTagPluginChunkType));
+    }
+
+}
+
+cmsBool  _cmsRegisterTagPlugin(cmsContext id, cmsPluginBase* Data)
+{
+    cmsPluginTag* Plugin = (cmsPluginTag*) Data;
+    _cmsTagLinkedList *pt;
+    _cmsTagPluginChunkType* TagPluginChunk = ( _cmsTagPluginChunkType*) _cmsContextGetClientChunk(id, TagPlugin);
+
+    if (Data == NULL) {
+
+        TagPluginChunk->Tag = NULL;
+        return TRUE;
+    }
+
+    pt = (_cmsTagLinkedList*) _cmsPluginMalloc(id, sizeof(_cmsTagLinkedList));
+    if (pt == NULL) return FALSE;
+
+    pt ->Signature  = Plugin ->Signature;
+    pt ->Descriptor = Plugin ->Descriptor;
+    pt ->Next       = TagPluginChunk ->Tag;
+
+    TagPluginChunk ->Tag = pt;
+    
+    return TRUE;
+}
+
+// Return a descriptor for a given tag or NULL
+cmsTagDescriptor* _cmsGetTagDescriptor(cmsContext ContextID, cmsTagSignature sig)
+{
+    _cmsTagLinkedList* pt;
+    _cmsTagPluginChunkType* TagPluginChunk = ( _cmsTagPluginChunkType*) _cmsContextGetClientChunk(ContextID, TagPlugin);
+
+    for (pt = TagPluginChunk->Tag;
+             pt != NULL;
+             pt = pt ->Next) {
+
+                if (sig == pt -> Signature) return &pt ->Descriptor;
+    }
+
+    for (pt = SupportedTags;
+            pt != NULL;
+            pt = pt ->Next) {
+
+                if (sig == pt -> Signature) return &pt ->Descriptor;
+    }
+
+    return NULL;
+}
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsvirt.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsvirt.cpp
new file mode 100755
index 0000000000..19e0cafb10
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsvirt.cpp
@@ -0,0 +1,1216 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Virtual (built-in) profiles
+// -----------------------------------------------------------------------------------
+
+static
+cmsBool SetTextTags(cmsHPROFILE hProfile, const wchar_t* Description)
+{
+    cmsMLU *DescriptionMLU, *CopyrightMLU;
+    cmsBool  rc = FALSE;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    DescriptionMLU  = cmsMLUalloc(ContextID, 1);
+    CopyrightMLU    = cmsMLUalloc(ContextID, 1);
+
+    if (DescriptionMLU == NULL || CopyrightMLU == NULL) goto Error;
+
+    if (!cmsMLUsetWide(DescriptionMLU,  "en", "US", Description)) goto Error;
+    if (!cmsMLUsetWide(CopyrightMLU,    "en", "US", L"No copyright, use freely")) goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigProfileDescriptionTag,  DescriptionMLU)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigCopyrightTag,           CopyrightMLU)) goto Error;
+
+    rc = TRUE;
+
+Error:
+
+    if (DescriptionMLU)
+        cmsMLUfree(DescriptionMLU);
+    if (CopyrightMLU)
+        cmsMLUfree(CopyrightMLU);
+    return rc;
+}
+
+
+static
+cmsBool  SetSeqDescTag(cmsHPROFILE hProfile, const char* Model)
+{
+    cmsBool  rc = FALSE;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+    cmsSEQ* Seq = cmsAllocProfileSequenceDescription(ContextID, 1);
+
+    if (Seq == NULL) return FALSE;
+
+    Seq->seq[0].deviceMfg = (cmsSignature) 0;
+    Seq->seq[0].deviceModel = (cmsSignature) 0;
+
+#ifdef CMS_DONT_USE_INT64
+    Seq->seq[0].attributes[0] = 0;
+    Seq->seq[0].attributes[1] = 0;
+#else
+    Seq->seq[0].attributes = 0;
+#endif
+
+    Seq->seq[0].technology = (cmsTechnologySignature) 0;
+
+    cmsMLUsetASCII( Seq->seq[0].Manufacturer, cmsNoLanguage, cmsNoCountry, "Little CMS");
+    cmsMLUsetASCII( Seq->seq[0].Model,        cmsNoLanguage, cmsNoCountry, Model);
+
+    if (!_cmsWriteProfileSequence(hProfile, Seq)) goto Error;
+
+    rc = TRUE;
+
+Error:
+    if (Seq)
+        cmsFreeProfileSequenceDescription(Seq);
+
+    return rc;
+}
+
+
+
+// This function creates a profile based on White point, primaries and
+// transfer functions.
+cmsHPROFILE CMSEXPORT cmsCreateRGBProfileTHR(cmsContext ContextID,
+                                          const cmsCIExyY* WhitePoint,
+                                          const cmsCIExyYTRIPLE* Primaries,
+                                          cmsToneCurve* const TransferFunction[3])
+{
+    cmsHPROFILE hICC;
+    cmsMAT3 MColorants;
+    cmsCIEXYZTRIPLE Colorants;
+    cmsCIExyY MaxWhite;
+    cmsMAT3 CHAD;
+    cmsCIEXYZ WhitePointXYZ;
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigDisplayClass);
+    cmsSetColorSpace(hICC,       cmsSigRgbData);
+    cmsSetPCS(hICC,              cmsSigXYZData);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+
+    // Implement profile using following tags:
+    //
+    //  1 cmsSigProfileDescriptionTag
+    //  2 cmsSigMediaWhitePointTag
+    //  3 cmsSigRedColorantTag
+    //  4 cmsSigGreenColorantTag
+    //  5 cmsSigBlueColorantTag
+    //  6 cmsSigRedTRCTag
+    //  7 cmsSigGreenTRCTag
+    //  8 cmsSigBlueTRCTag
+    //  9 Chromatic adaptation Tag
+    // This conforms a standard RGB DisplayProfile as says ICC, and then I add (As per addendum II)
+    // 10 cmsSigChromaticityTag
+
+
+    if (!SetTextTags(hICC, L"RGB built-in")) goto Error;
+
+    if (WhitePoint) {
+
+        if (!cmsWriteTag(hICC, cmsSigMediaWhitePointTag, cmsD50_XYZ())) goto Error;
+
+        cmsxyY2XYZ(&WhitePointXYZ, WhitePoint);
+        _cmsAdaptationMatrix(&CHAD, NULL, &WhitePointXYZ, cmsD50_XYZ());
+
+        // This is a V4 tag, but many CMM does read and understand it no matter which version
+        if (!cmsWriteTag(hICC, cmsSigChromaticAdaptationTag, (void*) &CHAD)) goto Error;
+    }
+
+    if (WhitePoint && Primaries) {
+
+        MaxWhite.x =  WhitePoint -> x;
+        MaxWhite.y =  WhitePoint -> y;
+        MaxWhite.Y =  1.0;
+
+        if (!_cmsBuildRGB2XYZtransferMatrix(&MColorants, &MaxWhite, Primaries)) goto Error;
+
+        Colorants.Red.X   = MColorants.v[0].n[0];
+        Colorants.Red.Y   = MColorants.v[1].n[0];
+        Colorants.Red.Z   = MColorants.v[2].n[0];
+
+        Colorants.Green.X = MColorants.v[0].n[1];
+        Colorants.Green.Y = MColorants.v[1].n[1];
+        Colorants.Green.Z = MColorants.v[2].n[1];
+
+        Colorants.Blue.X  = MColorants.v[0].n[2];
+        Colorants.Blue.Y  = MColorants.v[1].n[2];
+        Colorants.Blue.Z  = MColorants.v[2].n[2];
+
+        if (!cmsWriteTag(hICC, cmsSigRedColorantTag,   (void*) &Colorants.Red)) goto Error;
+        if (!cmsWriteTag(hICC, cmsSigBlueColorantTag,  (void*) &Colorants.Blue)) goto Error;
+        if (!cmsWriteTag(hICC, cmsSigGreenColorantTag, (void*) &Colorants.Green)) goto Error;
+    }
+
+
+    if (TransferFunction) {
+
+        // Tries to minimize space. Thanks to Richard Hughes for this nice idea         
+        if (!cmsWriteTag(hICC, cmsSigRedTRCTag,   (void*) TransferFunction[0])) goto Error;
+
+        if (TransferFunction[1] == TransferFunction[0]) {
+
+            if (!cmsLinkTag (hICC, cmsSigGreenTRCTag, cmsSigRedTRCTag)) goto Error;
+
+        } else {
+
+            if (!cmsWriteTag(hICC, cmsSigGreenTRCTag, (void*) TransferFunction[1])) goto Error;
+        }
+
+        if (TransferFunction[2] == TransferFunction[0]) {
+
+            if (!cmsLinkTag (hICC, cmsSigBlueTRCTag, cmsSigRedTRCTag)) goto Error;
+
+        } else {
+
+            if (!cmsWriteTag(hICC, cmsSigBlueTRCTag, (void*) TransferFunction[2])) goto Error;
+        }
+    }
+
+    if (Primaries) {
+        if (!cmsWriteTag(hICC, cmsSigChromaticityTag, (void*) Primaries)) goto Error;
+    }
+
+
+    return hICC;
+
+Error:
+    if (hICC)
+        cmsCloseProfile(hICC);
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateRGBProfile(const cmsCIExyY* WhitePoint,
+                                          const cmsCIExyYTRIPLE* Primaries,
+                                          cmsToneCurve* const TransferFunction[3])
+{
+    return cmsCreateRGBProfileTHR(NULL, WhitePoint, Primaries, TransferFunction);
+}
+
+
+
+// This function creates a profile based on White point and transfer function.
+cmsHPROFILE CMSEXPORT cmsCreateGrayProfileTHR(cmsContext ContextID,
+                                           const cmsCIExyY* WhitePoint,
+                                           const cmsToneCurve* TransferFunction)
+{
+    cmsHPROFILE hICC;
+    cmsCIEXYZ tmp;
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigDisplayClass);
+    cmsSetColorSpace(hICC,       cmsSigGrayData);
+    cmsSetPCS(hICC,              cmsSigXYZData);
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+
+    // Implement profile using following tags:
+    //
+    //  1 cmsSigProfileDescriptionTag
+    //  2 cmsSigMediaWhitePointTag
+    //  3 cmsSigGrayTRCTag
+
+    // This conforms a standard Gray DisplayProfile
+
+    // Fill-in the tags
+
+    if (!SetTextTags(hICC, L"gray built-in")) goto Error;
+
+
+    if (WhitePoint) {
+
+        cmsxyY2XYZ(&tmp, WhitePoint);
+        if (!cmsWriteTag(hICC, cmsSigMediaWhitePointTag, (void*) &tmp)) goto Error;
+    }
+
+    if (TransferFunction) {
+
+        if (!cmsWriteTag(hICC, cmsSigGrayTRCTag, (void*) TransferFunction)) goto Error;
+    }
+
+    return hICC;
+
+Error:
+    if (hICC)
+        cmsCloseProfile(hICC);
+    return NULL;
+}
+
+
+
+cmsHPROFILE CMSEXPORT cmsCreateGrayProfile(const cmsCIExyY* WhitePoint,
+                                                    const cmsToneCurve* TransferFunction)
+{
+    return cmsCreateGrayProfileTHR(NULL, WhitePoint, TransferFunction);
+}
+
+// This is a devicelink operating in the target colorspace with as many transfer functions as components
+
+cmsHPROFILE CMSEXPORT cmsCreateLinearizationDeviceLinkTHR(cmsContext ContextID,
+                                                          cmsColorSpaceSignature ColorSpace,
+                                                          cmsToneCurve* const TransferFunctions[])
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* Pipeline;
+    cmsUInt32Number nChannels;
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigLinkClass);
+    cmsSetColorSpace(hICC,       ColorSpace);
+    cmsSetPCS(hICC,              ColorSpace);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+    // Set up channels
+    nChannels = cmsChannelsOf(ColorSpace);
+
+    // Creates a Pipeline with prelinearization step only
+    Pipeline = cmsPipelineAlloc(ContextID, nChannels, nChannels);
+    if (Pipeline == NULL) goto Error;
+
+
+    // Copy tables to Pipeline
+    if (!cmsPipelineInsertStage(Pipeline, cmsAT_BEGIN, cmsStageAllocToneCurves(ContextID, nChannels, TransferFunctions)))
+        goto Error;
+
+    // Create tags
+    if (!SetTextTags(hICC, L"Linearization built-in")) goto Error;
+    if (!cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) Pipeline)) goto Error;
+    if (!SetSeqDescTag(hICC, "Linearization built-in")) goto Error;
+
+    // Pipeline is already on virtual profile
+    cmsPipelineFree(Pipeline);
+
+    // Ok, done
+    return hICC;
+
+Error:
+    cmsPipelineFree(Pipeline);
+    if (hICC)
+        cmsCloseProfile(hICC);
+
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateLinearizationDeviceLink(cmsColorSpaceSignature ColorSpace,
+                                                                 cmsToneCurve* const TransferFunctions[])
+{
+    return cmsCreateLinearizationDeviceLinkTHR(NULL, ColorSpace, TransferFunctions);
+}
+
+// Ink-limiting algorithm
+//
+//  Sum = C + M + Y + K
+//  If Sum > InkLimit
+//        Ratio= 1 - (Sum - InkLimit) / (C + M + Y)
+//        if Ratio <0
+//              Ratio=0
+//        endif
+//     Else
+//         Ratio=1
+//     endif
+//
+//     C = Ratio * C
+//     M = Ratio * M
+//     Y = Ratio * Y
+//     K: Does not change
+
+static
+int InkLimitingSampler(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
+{
+    cmsFloat64Number InkLimit = *(cmsFloat64Number *) Cargo;
+    cmsFloat64Number SumCMY, SumCMYK, Ratio;
+
+    InkLimit = (InkLimit * 655.35);
+
+    SumCMY   = In[0]  + In[1] + In[2];
+    SumCMYK  = SumCMY + In[3];
+
+    if (SumCMYK > InkLimit) {
+
+        Ratio = 1 - ((SumCMYK - InkLimit) / SumCMY);
+        if (Ratio < 0)
+            Ratio = 0;
+    }
+    else Ratio = 1;
+
+    Out[0] = _cmsQuickSaturateWord(In[0] * Ratio);     // C
+    Out[1] = _cmsQuickSaturateWord(In[1] * Ratio);     // M
+    Out[2] = _cmsQuickSaturateWord(In[2] * Ratio);     // Y
+
+    Out[3] = In[3];                                 // K (untouched)
+
+    return TRUE;
+}
+
+// This is a devicelink operating in CMYK for ink-limiting
+
+cmsHPROFILE CMSEXPORT cmsCreateInkLimitingDeviceLinkTHR(cmsContext ContextID,
+                                                     cmsColorSpaceSignature ColorSpace,
+                                                     cmsFloat64Number Limit)
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* LUT;
+    cmsStage* CLUT;
+    cmsUInt32Number nChannels;
+
+    if (ColorSpace != cmsSigCmykData) {
+        cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "InkLimiting: Only CMYK currently supported");
+        return NULL;
+    }
+
+    if (Limit < 0.0 || Limit > 400) {
+
+        cmsSignalError(ContextID, cmsERROR_RANGE, "InkLimiting: Limit should be between 0..400");
+        if (Limit < 0) Limit = 0;
+        if (Limit > 400) Limit = 400;
+
+    }
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hICC, 4.3);
+
+    cmsSetDeviceClass(hICC,      cmsSigLinkClass);
+    cmsSetColorSpace(hICC,       ColorSpace);
+    cmsSetPCS(hICC,              ColorSpace);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+
+    // Creates a Pipeline with 3D grid only
+    LUT = cmsPipelineAlloc(ContextID, 4, 4);
+    if (LUT == NULL) goto Error;
+
+
+    nChannels = cmsChannelsOf(ColorSpace);
+
+    CLUT = cmsStageAllocCLut16bit(ContextID, 17, nChannels, nChannels, NULL);
+    if (CLUT == NULL) goto Error;
+
+    if (!cmsStageSampleCLut16bit(CLUT, InkLimitingSampler, (void*) &Limit, 0)) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, nChannels)) ||
+        !cmsPipelineInsertStage(LUT, cmsAT_END, CLUT) ||
+        !cmsPipelineInsertStage(LUT, cmsAT_END, _cmsStageAllocIdentityCurves(ContextID, nChannels)))
+        goto Error;
+
+    // Create tags
+    if (!SetTextTags(hICC, L"ink-limiting built-in")) goto Error;
+
+    if (!cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) LUT))  goto Error;
+    if (!SetSeqDescTag(hICC, "ink-limiting built-in")) goto Error;
+
+    // cmsPipeline is already on virtual profile
+    cmsPipelineFree(LUT);
+
+    // Ok, done
+    return hICC;
+
+Error:
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hICC != NULL)
+        cmsCloseProfile(hICC);
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateInkLimitingDeviceLink(cmsColorSpaceSignature ColorSpace, cmsFloat64Number Limit)
+{
+    return cmsCreateInkLimitingDeviceLinkTHR(NULL, ColorSpace, Limit);
+}
+
+
+// Creates a fake Lab identity.
+cmsHPROFILE CMSEXPORT cmsCreateLab2ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+
+    hProfile = cmsCreateRGBProfileTHR(ContextID, WhitePoint == NULL ? cmsD50_xyY() : WhitePoint, NULL, NULL);
+    if (hProfile == NULL) return NULL;
+
+    cmsSetProfileVersion(hProfile, 2.1);
+
+    cmsSetDeviceClass(hProfile, cmsSigAbstractClass);
+    cmsSetColorSpace(hProfile,  cmsSigLabData);
+    cmsSetPCS(hProfile,         cmsSigLabData);
+
+    if (!SetTextTags(hProfile, L"Lab identity built-in")) return NULL;
+
+    // An identity LUT is all we need
+    LUT = cmsPipelineAlloc(ContextID, 3, 3);
+    if (LUT == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCLut(ContextID, 3)))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigAToB0Tag, LUT)) goto Error;
+    cmsPipelineFree(LUT);
+
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+
+cmsHPROFILE CMSEXPORT cmsCreateLab2Profile(const cmsCIExyY* WhitePoint)
+{
+    return cmsCreateLab2ProfileTHR(NULL, WhitePoint);
+}
+
+
+// Creates a fake Lab V4 identity.
+cmsHPROFILE CMSEXPORT cmsCreateLab4ProfileTHR(cmsContext ContextID, const cmsCIExyY* WhitePoint)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+
+    hProfile = cmsCreateRGBProfileTHR(ContextID, WhitePoint == NULL ? cmsD50_xyY() : WhitePoint, NULL, NULL);
+    if (hProfile == NULL) return NULL;
+
+    cmsSetProfileVersion(hProfile, 4.3);
+
+    cmsSetDeviceClass(hProfile, cmsSigAbstractClass);
+    cmsSetColorSpace(hProfile,  cmsSigLabData);
+    cmsSetPCS(hProfile,         cmsSigLabData);
+
+    if (!SetTextTags(hProfile, L"Lab identity built-in")) goto Error;
+
+    // An empty LUTs is all we need
+    LUT = cmsPipelineAlloc(ContextID, 3, 3);
+    if (LUT == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, 3)))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigAToB0Tag, LUT)) goto Error;
+    cmsPipelineFree(LUT);
+
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateLab4Profile(const cmsCIExyY* WhitePoint)
+{
+    return cmsCreateLab4ProfileTHR(NULL, WhitePoint);
+}
+
+
+// Creates a fake XYZ identity
+cmsHPROFILE CMSEXPORT cmsCreateXYZProfileTHR(cmsContext ContextID)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+
+    hProfile = cmsCreateRGBProfileTHR(ContextID, cmsD50_xyY(), NULL, NULL);
+    if (hProfile == NULL) return NULL;
+
+    cmsSetProfileVersion(hProfile, 4.3);
+
+    cmsSetDeviceClass(hProfile, cmsSigAbstractClass);
+    cmsSetColorSpace(hProfile,  cmsSigXYZData);
+    cmsSetPCS(hProfile,         cmsSigXYZData);
+
+    if (!SetTextTags(hProfile, L"XYZ identity built-in")) goto Error;
+
+    // An identity LUT is all we need
+    LUT = cmsPipelineAlloc(ContextID, 3, 3);
+    if (LUT == NULL) goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, 3)))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigAToB0Tag, LUT)) goto Error;
+    cmsPipelineFree(LUT);
+
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+
+cmsHPROFILE CMSEXPORT cmsCreateXYZProfile(void)
+{
+    return cmsCreateXYZProfileTHR(NULL);
+}
+
+
+//sRGB Curves are defined by:
+//
+//If  R�sRGB,G�sRGB, B�sRGB < 0.04045
+//
+//    R =  R�sRGB / 12.92
+//    G =  G�sRGB / 12.92
+//    B =  B�sRGB / 12.92
+//
+//
+//else if  R�sRGB,G�sRGB, B�sRGB >= 0.04045
+//
+//    R = ((R�sRGB + 0.055) / 1.055)^2.4
+//    G = ((G�sRGB + 0.055) / 1.055)^2.4
+//    B = ((B�sRGB + 0.055) / 1.055)^2.4
+
+static
+cmsToneCurve* Build_sRGBGamma(cmsContext ContextID)
+{
+    cmsFloat64Number Parameters[5];
+
+    Parameters[0] = 2.4;
+    Parameters[1] = 1. / 1.055;
+    Parameters[2] = 0.055 / 1.055;
+    Parameters[3] = 1. / 12.92;
+    Parameters[4] = 0.04045;
+
+    return cmsBuildParametricToneCurve(ContextID, 4, Parameters);
+}
+
+// Create the ICC virtual profile for sRGB space
+cmsHPROFILE CMSEXPORT cmsCreate_sRGBProfileTHR(cmsContext ContextID)
+{
+       cmsCIExyY       D65 = { 0.3127, 0.3290, 1.0 };
+       cmsCIExyYTRIPLE Rec709Primaries = {
+                                   {0.6400, 0.3300, 1.0},
+                                   {0.3000, 0.6000, 1.0},
+                                   {0.1500, 0.0600, 1.0}
+                                   };
+       cmsToneCurve* Gamma22[3];
+       cmsHPROFILE  hsRGB;
+
+      // cmsWhitePointFromTemp(&D65, 6504);
+       Gamma22[0] = Gamma22[1] = Gamma22[2] = Build_sRGBGamma(ContextID);
+       if (Gamma22[0] == NULL) return NULL;
+
+       hsRGB = cmsCreateRGBProfileTHR(ContextID, &D65, &Rec709Primaries, Gamma22);
+       cmsFreeToneCurve(Gamma22[0]);
+       if (hsRGB == NULL) return NULL;
+
+       if (!SetTextTags(hsRGB, L"sRGB built-in")) {
+           cmsCloseProfile(hsRGB);
+           return NULL;
+       }
+
+       return hsRGB;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreate_sRGBProfile(void)
+{
+    return cmsCreate_sRGBProfileTHR(NULL);
+}
+
+
+
+typedef struct {
+                cmsFloat64Number Brightness;
+                cmsFloat64Number Contrast;
+                cmsFloat64Number Hue;
+                cmsFloat64Number Saturation;
+                cmsBool          lAdjustWP;
+                cmsCIEXYZ WPsrc, WPdest;
+
+} BCHSWADJUSTS, *LPBCHSWADJUSTS;
+
+
+static
+int bchswSampler(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
+{
+    cmsCIELab LabIn, LabOut;
+    cmsCIELCh LChIn, LChOut;
+    cmsCIEXYZ XYZ;
+    LPBCHSWADJUSTS bchsw = (LPBCHSWADJUSTS) Cargo;
+
+
+    cmsLabEncoded2Float(&LabIn, In);
+
+
+    cmsLab2LCh(&LChIn, &LabIn);
+
+    // Do some adjusts on LCh
+
+    LChOut.L = LChIn.L * bchsw ->Contrast + bchsw ->Brightness;
+    LChOut.C = LChIn.C + bchsw -> Saturation;
+    LChOut.h = LChIn.h + bchsw -> Hue;
+
+
+    cmsLCh2Lab(&LabOut, &LChOut);
+
+    // Move white point in Lab
+    if (bchsw->lAdjustWP) {
+           cmsLab2XYZ(&bchsw->WPsrc, &XYZ, &LabOut);
+           cmsXYZ2Lab(&bchsw->WPdest, &LabOut, &XYZ);
+    }
+
+    // Back to encoded
+
+    cmsFloat2LabEncoded(Out, &LabOut);
+
+    return TRUE;
+}
+
+
+// Creates an abstract profile operating in Lab space for Brightness,
+// contrast, Saturation and white point displacement
+
+cmsHPROFILE CMSEXPORT cmsCreateBCHSWabstractProfileTHR(cmsContext ContextID,
+                                                       cmsUInt32Number nLUTPoints,
+                                                       cmsFloat64Number Bright,
+                                                       cmsFloat64Number Contrast,
+                                                       cmsFloat64Number Hue,
+                                                       cmsFloat64Number Saturation,
+                                                       cmsUInt32Number TempSrc,
+                                                       cmsUInt32Number TempDest)
+{
+    cmsHPROFILE hICC;
+    cmsPipeline* Pipeline;
+    BCHSWADJUSTS bchsw;
+    cmsCIExyY WhitePnt;
+    cmsStage* CLUT;
+    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
+    cmsUInt32Number i;
+
+    bchsw.Brightness = Bright;
+    bchsw.Contrast   = Contrast;
+    bchsw.Hue        = Hue;
+    bchsw.Saturation = Saturation;
+    if (TempSrc == TempDest) {
+
+           bchsw.lAdjustWP = FALSE;
+    }
+    else {
+           bchsw.lAdjustWP = TRUE;
+           cmsWhitePointFromTemp(&WhitePnt, TempSrc);
+           cmsxyY2XYZ(&bchsw.WPsrc, &WhitePnt);
+           cmsWhitePointFromTemp(&WhitePnt, TempDest);
+           cmsxyY2XYZ(&bchsw.WPdest, &WhitePnt);
+     
+    }
+
+    hICC = cmsCreateProfilePlaceholder(ContextID);
+    if (!hICC)                          // can't allocate
+        return NULL;
+
+    cmsSetDeviceClass(hICC,      cmsSigAbstractClass);
+    cmsSetColorSpace(hICC,       cmsSigLabData);
+    cmsSetPCS(hICC,              cmsSigLabData);
+
+    cmsSetHeaderRenderingIntent(hICC,  INTENT_PERCEPTUAL);
+
+    // Creates a Pipeline with 3D grid only
+    Pipeline = cmsPipelineAlloc(ContextID, 3, 3);
+    if (Pipeline == NULL) {
+        cmsCloseProfile(hICC);
+        return NULL;
+    }
+
+    for (i=0; i < MAX_INPUT_DIMENSIONS; i++) Dimensions[i] = nLUTPoints;
+    CLUT = cmsStageAllocCLut16bitGranular(ContextID, Dimensions, 3, 3, NULL);
+    if (CLUT == NULL) goto Error;
+
+
+    if (!cmsStageSampleCLut16bit(CLUT, bchswSampler, (void*) &bchsw, 0)) {
+
+        // Shouldn't reach here
+        goto Error;
+    }
+
+    if (!cmsPipelineInsertStage(Pipeline, cmsAT_END, CLUT)) {
+        goto Error;
+    }
+
+    // Create tags
+    if (!SetTextTags(hICC, L"BCHS built-in")) return NULL;
+
+    cmsWriteTag(hICC, cmsSigMediaWhitePointTag, (void*) cmsD50_XYZ());
+
+    cmsWriteTag(hICC, cmsSigAToB0Tag, (void*) Pipeline);
+
+    // Pipeline is already on virtual profile
+    cmsPipelineFree(Pipeline);
+
+    // Ok, done
+    return hICC;
+
+Error:
+    cmsPipelineFree(Pipeline);
+    cmsCloseProfile(hICC);
+    return NULL;
+}
+
+
+CMSAPI cmsHPROFILE   CMSEXPORT cmsCreateBCHSWabstractProfile(cmsUInt32Number nLUTPoints,
+                                                             cmsFloat64Number Bright,
+                                                             cmsFloat64Number Contrast,
+                                                             cmsFloat64Number Hue,
+                                                             cmsFloat64Number Saturation,
+                                                             cmsUInt32Number TempSrc,
+                                                             cmsUInt32Number TempDest)
+{
+    return cmsCreateBCHSWabstractProfileTHR(NULL, nLUTPoints, Bright, Contrast, Hue, Saturation, TempSrc, TempDest);
+}
+
+
+// Creates a fake NULL profile. This profile return 1 channel as always 0.
+// Is useful only for gamut checking tricks
+cmsHPROFILE CMSEXPORT cmsCreateNULLProfileTHR(cmsContext ContextID)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* LUT = NULL;
+    cmsStage* PostLin;
+    cmsStage* OutLin;
+    cmsToneCurve* EmptyTab[3];
+    cmsUInt16Number Zero[2] = { 0, 0 };
+    const cmsFloat64Number PickLstarMatrix[] = { 1, 0, 0 };
+
+    hProfile = cmsCreateProfilePlaceholder(ContextID);
+    if (!hProfile)                          // can't allocate
+        return NULL;
+
+    cmsSetProfileVersion(hProfile, 4.3);
+
+    if (!SetTextTags(hProfile, L"NULL profile built-in")) goto Error;
+
+
+    cmsSetDeviceClass(hProfile, cmsSigOutputClass);
+    cmsSetColorSpace(hProfile,  cmsSigGrayData);
+    cmsSetPCS(hProfile,         cmsSigLabData);
+
+    // Create a valid ICC 4 structure
+    LUT = cmsPipelineAlloc(ContextID, 3, 1);
+    if (LUT == NULL) goto Error;
+    
+    EmptyTab[0] = EmptyTab[1] = EmptyTab[2] = cmsBuildTabulatedToneCurve16(ContextID, 2, Zero);
+    PostLin = cmsStageAllocToneCurves(ContextID, 3, EmptyTab);
+    OutLin  = cmsStageAllocToneCurves(ContextID, 1, EmptyTab);
+    cmsFreeToneCurve(EmptyTab[0]);
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_END, PostLin))
+        goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_END, cmsStageAllocMatrix(ContextID, 1, 3, PickLstarMatrix, NULL)))
+        goto Error;
+
+    if (!cmsPipelineInsertStage(LUT, cmsAT_END, OutLin))
+        goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigBToA0Tag, (void*) LUT)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigMediaWhitePointTag, cmsD50_XYZ())) goto Error;
+
+    cmsPipelineFree(LUT);
+    return hProfile;
+
+Error:
+
+    if (LUT != NULL)
+        cmsPipelineFree(LUT);
+
+    if (hProfile != NULL)
+        cmsCloseProfile(hProfile);
+
+    return NULL;
+}
+
+cmsHPROFILE CMSEXPORT cmsCreateNULLProfile(void)
+{
+    return cmsCreateNULLProfileTHR(NULL);
+}
+
+
+static
+int IsPCS(cmsColorSpaceSignature ColorSpace)
+{
+    return (ColorSpace == cmsSigXYZData ||
+            ColorSpace == cmsSigLabData);
+}
+
+
+static
+void FixColorSpaces(cmsHPROFILE hProfile,
+                              cmsColorSpaceSignature ColorSpace,
+                              cmsColorSpaceSignature PCS,
+                              cmsUInt32Number dwFlags)
+{
+    if (dwFlags & cmsFLAGS_GUESSDEVICECLASS) {
+
+            if (IsPCS(ColorSpace) && IsPCS(PCS)) {
+
+                    cmsSetDeviceClass(hProfile,      cmsSigAbstractClass);
+                    cmsSetColorSpace(hProfile,       ColorSpace);
+                    cmsSetPCS(hProfile,              PCS);
+                    return;
+            }
+
+            if (IsPCS(ColorSpace) && !IsPCS(PCS)) {
+
+                    cmsSetDeviceClass(hProfile, cmsSigOutputClass);
+                    cmsSetPCS(hProfile,         ColorSpace);
+                    cmsSetColorSpace(hProfile,  PCS);
+                    return;
+            }
+
+            if (IsPCS(PCS) && !IsPCS(ColorSpace)) {
+
+                   cmsSetDeviceClass(hProfile,  cmsSigInputClass);
+                   cmsSetColorSpace(hProfile,   ColorSpace);
+                   cmsSetPCS(hProfile,          PCS);
+                   return;
+            }
+    }
+
+    cmsSetDeviceClass(hProfile,      cmsSigLinkClass);
+    cmsSetColorSpace(hProfile,       ColorSpace);
+    cmsSetPCS(hProfile,              PCS);
+}
+
+
+
+// This function creates a named color profile dumping all the contents of transform to a single profile
+// In this way, LittleCMS may be used to "group" several named color databases into a single profile.
+// It has, however, several minor limitations. PCS is always Lab, which is not very critic since this
+// is the normal PCS for named color profiles.
+static
+cmsHPROFILE CreateNamedColorDevicelink(cmsHTRANSFORM xform)
+{
+    _cmsTRANSFORM* v = (_cmsTRANSFORM*) xform;
+    cmsHPROFILE hICC = NULL;
+    cmsUInt32Number i, nColors;
+    cmsNAMEDCOLORLIST *nc2 = NULL, *Original = NULL;
+
+    // Create an empty placeholder
+    hICC = cmsCreateProfilePlaceholder(v->ContextID);
+    if (hICC == NULL) return NULL;
+
+    // Critical information
+    cmsSetDeviceClass(hICC, cmsSigNamedColorClass);
+    cmsSetColorSpace(hICC, v ->ExitColorSpace);
+    cmsSetPCS(hICC, cmsSigLabData);
+
+    // Tag profile with information
+    if (!SetTextTags(hICC, L"Named color devicelink")) goto Error;
+
+    Original = cmsGetNamedColorList(xform);
+    if (Original == NULL) goto Error;
+
+    nColors = cmsNamedColorCount(Original);
+    nc2     = cmsDupNamedColorList(Original);
+    if (nc2 == NULL) goto Error;
+
+    // Colorant count now depends on the output space
+    nc2 ->ColorantCount = cmsPipelineOutputChannels(v ->Lut);
+
+    // Make sure we have proper formatters
+    cmsChangeBuffersFormat(xform, TYPE_NAMED_COLOR_INDEX,
+        FLOAT_SH(0) | COLORSPACE_SH(_cmsLCMScolorSpace(v ->ExitColorSpace))
+        | BYTES_SH(2) | CHANNELS_SH(cmsChannelsOf(v ->ExitColorSpace)));
+
+    // Apply the transfor to colorants.
+    for (i=0; i < nColors; i++) {
+        cmsDoTransform(xform, &i, nc2 ->List[i].DeviceColorant, 1);
+    }
+
+    if (!cmsWriteTag(hICC, cmsSigNamedColor2Tag, (void*) nc2)) goto Error;
+    cmsFreeNamedColorList(nc2);
+
+    return hICC;
+
+Error:
+    if (hICC != NULL) cmsCloseProfile(hICC);
+    return NULL;
+}
+
+
+// This structure holds information about which MPU can be stored on a profile based on the version
+
+typedef struct {
+    cmsBool              IsV4;             // Is a V4 tag?
+    cmsTagSignature      RequiredTag;      // Set to 0 for both types
+    cmsTagTypeSignature  LutType;          // The LUT type
+    int                  nTypes;           // Number of types (up to 5)
+    cmsStageSignature    MpeTypes[5];      // 5 is the maximum number
+
+} cmsAllowedLUT;
+
+#define cmsSig0 ((cmsTagSignature) 0) 
+
+static const cmsAllowedLUT AllowedLUTTypes[] = {
+
+    { FALSE, cmsSig0,        cmsSigLut16Type, 4, { cmsSigMatrixElemType, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType } },
+    { FALSE, cmsSig0,        cmsSigLut16Type, 3, { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType } },
+    { FALSE, cmsSig0,        cmsSigLut16Type, 2, { cmsSigCurveSetElemType, cmsSigCLutElemType } },
+    { TRUE,  cmsSig0,        cmsSigLutAtoBType, 1, { cmsSigCurveSetElemType } },
+    { TRUE , cmsSigAToB0Tag, cmsSigLutAtoBType,  3,  { cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType } },
+    { TRUE , cmsSigAToB0Tag, cmsSigLutAtoBType,  3,  { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType   } },
+    { TRUE , cmsSigAToB0Tag, cmsSigLutAtoBType,  5,  { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  1,  { cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  3,  { cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  3,  { cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType }},
+    { TRUE , cmsSigBToA0Tag, cmsSigLutBtoAType,  5,  { cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType }}
+};
+
+#define SIZE_OF_ALLOWED_LUT (sizeof(AllowedLUTTypes)/sizeof(cmsAllowedLUT))
+
+// Check a single entry
+static
+cmsBool CheckOne(const cmsAllowedLUT* Tab, const cmsPipeline* Lut)
+{
+    cmsStage* mpe;
+    int n;
+
+    for (n=0, mpe = Lut ->Elements; mpe != NULL; mpe = mpe ->Next, n++) {
+
+        if (n > Tab ->nTypes) return FALSE;
+        if (cmsStageType(mpe) != Tab ->MpeTypes[n]) return FALSE;
+    }
+
+    return (n == Tab ->nTypes);
+}
+
+
+static
+const cmsAllowedLUT* FindCombination(const cmsPipeline* Lut, cmsBool IsV4, cmsTagSignature DestinationTag)
+{
+    cmsUInt32Number n;
+
+    for (n=0; n < SIZE_OF_ALLOWED_LUT; n++) {
+
+        const cmsAllowedLUT* Tab = AllowedLUTTypes + n;
+
+        if (IsV4 ^ Tab -> IsV4) continue;
+        if ((Tab ->RequiredTag != 0) && (Tab ->RequiredTag != DestinationTag)) continue;
+
+        if (CheckOne(Tab, Lut)) return Tab;
+    }
+
+    return NULL;
+}
+
+
+// Does convert a transform into a device link profile
+cmsHPROFILE CMSEXPORT cmsTransform2DeviceLink(cmsHTRANSFORM hTransform, cmsFloat64Number Version, cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hProfile = NULL;
+    cmsUInt32Number FrmIn, FrmOut, ChansIn, ChansOut;
+    int ColorSpaceBitsIn, ColorSpaceBitsOut;
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+    cmsPipeline* LUT = NULL;
+    cmsStage* mpe;
+    cmsContext ContextID = cmsGetTransformContextID(hTransform);
+    const cmsAllowedLUT* AllowedLUT;
+    cmsTagSignature DestinationTag;
+    cmsProfileClassSignature deviceClass; 
+
+    _cmsAssert(hTransform != NULL);
+
+    // Get the first mpe to check for named color
+    mpe = cmsPipelineGetPtrToFirstStage(xform ->Lut);
+
+    // Check if is a named color transform
+    if (mpe != NULL) {
+
+        if (cmsStageType(mpe) == cmsSigNamedColorElemType) {
+            return CreateNamedColorDevicelink(hTransform);
+        }
+    }
+
+    // First thing to do is to get a copy of the transformation
+    LUT = cmsPipelineDup(xform ->Lut);
+    if (LUT == NULL) return NULL;
+
+    // Time to fix the Lab2/Lab4 issue.
+    if ((xform ->EntryColorSpace == cmsSigLabData) && (Version < 4.0)) {
+
+        if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocLabV2ToV4curves(ContextID)))
+            goto Error;
+    }
+
+    // On the output side too
+    if ((xform ->ExitColorSpace) == cmsSigLabData && (Version < 4.0)) {
+
+        if (!cmsPipelineInsertStage(LUT, cmsAT_END, _cmsStageAllocLabV4ToV2(ContextID)))
+            goto Error;
+    }
+
+
+    hProfile = cmsCreateProfilePlaceholder(ContextID);
+    if (!hProfile) goto Error;                    // can't allocate
+
+    cmsSetProfileVersion(hProfile, Version);
+
+    FixColorSpaces(hProfile, xform -> EntryColorSpace, xform -> ExitColorSpace, dwFlags);
+
+    // Optimize the LUT and precalculate a devicelink
+
+    ChansIn  = cmsChannelsOf(xform -> EntryColorSpace);
+    ChansOut = cmsChannelsOf(xform -> ExitColorSpace);
+
+    ColorSpaceBitsIn  = _cmsLCMScolorSpace(xform -> EntryColorSpace);
+    ColorSpaceBitsOut = _cmsLCMScolorSpace(xform -> ExitColorSpace);
+
+    FrmIn  = COLORSPACE_SH(ColorSpaceBitsIn) | CHANNELS_SH(ChansIn)|BYTES_SH(2);
+    FrmOut = COLORSPACE_SH(ColorSpaceBitsOut) | CHANNELS_SH(ChansOut)|BYTES_SH(2);
+
+    deviceClass = cmsGetDeviceClass(hProfile);
+
+     if (deviceClass == cmsSigOutputClass)
+         DestinationTag = cmsSigBToA0Tag;
+     else
+         DestinationTag = cmsSigAToB0Tag;
+
+    // Check if the profile/version can store the result
+    if (dwFlags & cmsFLAGS_FORCE_CLUT)
+        AllowedLUT = NULL;
+    else
+        AllowedLUT = FindCombination(LUT, Version >= 4.0, DestinationTag);
+
+    if (AllowedLUT == NULL) {
+
+        // Try to optimize
+        _cmsOptimizePipeline(ContextID, &LUT, xform ->RenderingIntent, &FrmIn, &FrmOut, &dwFlags);
+        AllowedLUT = FindCombination(LUT, Version >= 4.0, DestinationTag);
+
+    }
+
+    // If no way, then force CLUT that for sure can be written
+    if (AllowedLUT == NULL) {
+
+        cmsStage* FirstStage;
+        cmsStage* LastStage;
+
+        dwFlags |= cmsFLAGS_FORCE_CLUT;
+        _cmsOptimizePipeline(ContextID, &LUT, xform ->RenderingIntent, &FrmIn, &FrmOut, &dwFlags);
+
+        // Put identity curves if needed
+        FirstStage = cmsPipelineGetPtrToFirstStage(LUT);
+        if (FirstStage != NULL && FirstStage ->Type != cmsSigCurveSetElemType)
+             if (!cmsPipelineInsertStage(LUT, cmsAT_BEGIN, _cmsStageAllocIdentityCurves(ContextID, ChansIn)))
+                 goto Error;
+
+        LastStage = cmsPipelineGetPtrToLastStage(LUT);
+        if (LastStage != NULL && LastStage ->Type != cmsSigCurveSetElemType)
+             if (!cmsPipelineInsertStage(LUT, cmsAT_END,   _cmsStageAllocIdentityCurves(ContextID, ChansOut)))
+                 goto Error;
+
+        AllowedLUT = FindCombination(LUT, Version >= 4.0, DestinationTag);
+    }
+
+    // Somethings is wrong...
+    if (AllowedLUT == NULL) {
+        goto Error;
+    }
+
+
+    if (dwFlags & cmsFLAGS_8BITS_DEVICELINK)
+                     cmsPipelineSetSaveAs8bitsFlag(LUT, TRUE);
+
+    // Tag profile with information
+    if (!SetTextTags(hProfile, L"devicelink")) goto Error;
+
+    // Store result
+    if (!cmsWriteTag(hProfile, DestinationTag, LUT)) goto Error;
+
+
+    if (xform -> InputColorant != NULL) {
+           if (!cmsWriteTag(hProfile, cmsSigColorantTableTag, xform->InputColorant)) goto Error;
+    }
+
+    if (xform -> OutputColorant != NULL) {
+           if (!cmsWriteTag(hProfile, cmsSigColorantTableOutTag, xform->OutputColorant)) goto Error;
+    }
+
+    if ((deviceClass == cmsSigLinkClass) && (xform ->Sequence != NULL)) {
+        if (!_cmsWriteProfileSequence(hProfile, xform ->Sequence)) goto Error;
+    }
+
+    // Set the white point
+    if (deviceClass == cmsSigInputClass) {
+        if (!cmsWriteTag(hProfile, cmsSigMediaWhitePointTag, &xform ->EntryWhitePoint)) goto Error;
+    }
+    else {
+         if (!cmsWriteTag(hProfile, cmsSigMediaWhitePointTag, &xform ->ExitWhitePoint)) goto Error;
+    }
+
+  
+    // Per 7.2.15 in spec 4.3
+    cmsSetHeaderRenderingIntent(hProfile, xform ->RenderingIntent);
+
+    cmsPipelineFree(LUT);
+    return hProfile;
+
+Error:
+    if (LUT != NULL) cmsPipelineFree(LUT);
+    cmsCloseProfile(hProfile);
+    return NULL;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmswtpnt.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmswtpnt.cpp
new file mode 100755
index 0000000000..9f90d6a6f8
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmswtpnt.cpp
@@ -0,0 +1,350 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+
+// D50 - Widely used
+const cmsCIEXYZ* CMSEXPORT cmsD50_XYZ(void)
+{
+    static cmsCIEXYZ D50XYZ = {cmsD50X, cmsD50Y, cmsD50Z};
+
+    return &D50XYZ;
+}
+
+const cmsCIExyY* CMSEXPORT cmsD50_xyY(void)
+{
+    static cmsCIExyY D50xyY;
+
+    cmsXYZ2xyY(&D50xyY, cmsD50_XYZ());
+
+    return &D50xyY;
+}
+
+// Obtains WhitePoint from Temperature
+cmsBool  CMSEXPORT cmsWhitePointFromTemp(cmsCIExyY* WhitePoint, cmsFloat64Number TempK)
+{
+    cmsFloat64Number x, y;
+    cmsFloat64Number T, T2, T3;
+    // cmsFloat64Number M1, M2;
+
+    _cmsAssert(WhitePoint != NULL);
+
+    T = TempK;
+    T2 = T*T;            // Square
+    T3 = T2*T;           // Cube
+
+    // For correlated color temperature (T) between 4000K and 7000K:
+
+    if (T >= 4000. && T <= 7000.)
+    {
+        x = -4.6070*(1E9/T3) + 2.9678*(1E6/T2) + 0.09911*(1E3/T) + 0.244063;
+    }
+    else
+        // or for correlated color temperature (T) between 7000K and 25000K:
+
+        if (T > 7000.0 && T <= 25000.0)
+        {
+            x = -2.0064*(1E9/T3) + 1.9018*(1E6/T2) + 0.24748*(1E3/T) + 0.237040;
+        }
+        else {
+            cmsSignalError(0, cmsERROR_RANGE, "cmsWhitePointFromTemp: invalid temp");
+            return FALSE;
+        }
+
+        // Obtain y(x)
+        y = -3.000*(x*x) + 2.870*x - 0.275;
+
+        // wave factors (not used, but here for futures extensions)
+
+        // M1 = (-1.3515 - 1.7703*x + 5.9114 *y)/(0.0241 + 0.2562*x - 0.7341*y);
+        // M2 = (0.0300 - 31.4424*x + 30.0717*y)/(0.0241 + 0.2562*x - 0.7341*y);
+
+        WhitePoint -> x = x;
+        WhitePoint -> y = y;
+        WhitePoint -> Y = 1.0;
+
+        return TRUE;
+}
+
+
+
+typedef struct {
+
+    cmsFloat64Number mirek;  // temp (in microreciprocal kelvin)
+    cmsFloat64Number ut;     // u coord of intersection w/ blackbody locus
+    cmsFloat64Number vt;     // v coord of intersection w/ blackbody locus
+    cmsFloat64Number tt;     // slope of ISOTEMPERATURE. line
+
+    } ISOTEMPERATURE;
+
+static const ISOTEMPERATURE isotempdata[] = {
+//  {Mirek, Ut,       Vt,      Tt      }
+    {0,     0.18006,  0.26352,  -0.24341},
+    {10,    0.18066,  0.26589,  -0.25479},
+    {20,    0.18133,  0.26846,  -0.26876},
+    {30,    0.18208,  0.27119,  -0.28539},
+    {40,    0.18293,  0.27407,  -0.30470},
+    {50,    0.18388,  0.27709,  -0.32675},
+    {60,    0.18494,  0.28021,  -0.35156},
+    {70,    0.18611,  0.28342,  -0.37915},
+    {80,    0.18740,  0.28668,  -0.40955},
+    {90,    0.18880,  0.28997,  -0.44278},
+    {100,   0.19032,  0.29326,  -0.47888},
+    {125,   0.19462,  0.30141,  -0.58204},
+    {150,   0.19962,  0.30921,  -0.70471},
+    {175,   0.20525,  0.31647,  -0.84901},
+    {200,   0.21142,  0.32312,  -1.0182 },
+    {225,   0.21807,  0.32909,  -1.2168 },
+    {250,   0.22511,  0.33439,  -1.4512 },
+    {275,   0.23247,  0.33904,  -1.7298 },
+    {300,   0.24010,  0.34308,  -2.0637 },
+    {325,   0.24702,  0.34655,  -2.4681 },
+    {350,   0.25591,  0.34951,  -2.9641 },
+    {375,   0.26400,  0.35200,  -3.5814 },
+    {400,   0.27218,  0.35407,  -4.3633 },
+    {425,   0.28039,  0.35577,  -5.3762 },
+    {450,   0.28863,  0.35714,  -6.7262 },
+    {475,   0.29685,  0.35823,  -8.5955 },
+    {500,   0.30505,  0.35907,  -11.324 },
+    {525,   0.31320,  0.35968,  -15.628 },
+    {550,   0.32129,  0.36011,  -23.325 },
+    {575,   0.32931,  0.36038,  -40.770 },
+    {600,   0.33724,  0.36051,  -116.45  }
+};
+
+#define NISO sizeof(isotempdata)/sizeof(ISOTEMPERATURE)
+
+
+// Robertson's method
+cmsBool  CMSEXPORT cmsTempFromWhitePoint(cmsFloat64Number* TempK, const cmsCIExyY* WhitePoint)
+{
+    cmsUInt32Number j;
+    cmsFloat64Number us,vs;
+    cmsFloat64Number uj,vj,tj,di,dj,mi,mj;
+    cmsFloat64Number xs, ys;
+
+    _cmsAssert(WhitePoint != NULL);
+    _cmsAssert(TempK != NULL);
+
+    di = mi = 0;
+    xs = WhitePoint -> x;
+    ys = WhitePoint -> y;
+
+    // convert (x,y) to CIE 1960 (u,WhitePoint)
+
+    us = (2*xs) / (-xs + 6*ys + 1.5);
+    vs = (3*ys) / (-xs + 6*ys + 1.5);
+
+
+    for (j=0; j < NISO; j++) {
+
+        uj = isotempdata[j].ut;
+        vj = isotempdata[j].vt;
+        tj = isotempdata[j].tt;
+        mj = isotempdata[j].mirek;
+
+        dj = ((vs - vj) - tj * (us - uj)) / sqrt(1.0 + tj * tj);
+
+        if ((j != 0) && (di/dj < 0.0)) {
+
+            // Found a match
+            *TempK = 1000000.0 / (mi + (di / (di - dj)) * (mj - mi));
+            return TRUE;
+        }
+
+        di = dj;
+        mi = mj;
+    }
+
+    // Not found
+    return FALSE;
+}
+
+
+// Compute chromatic adaptation matrix using Chad as cone matrix
+
+static
+cmsBool ComputeChromaticAdaptation(cmsMAT3* Conversion,
+                                const cmsCIEXYZ* SourceWhitePoint,
+                                const cmsCIEXYZ* DestWhitePoint,
+                                const cmsMAT3* Chad)
+
+{
+
+    cmsMAT3 Chad_Inv;
+    cmsVEC3 ConeSourceXYZ, ConeSourceRGB;
+    cmsVEC3 ConeDestXYZ, ConeDestRGB;
+    cmsMAT3 Cone, Tmp;
+
+
+    Tmp = *Chad;
+    if (!_cmsMAT3inverse(&Tmp, &Chad_Inv)) return FALSE;
+
+    _cmsVEC3init(&ConeSourceXYZ, SourceWhitePoint -> X,
+                             SourceWhitePoint -> Y,
+                             SourceWhitePoint -> Z);
+
+    _cmsVEC3init(&ConeDestXYZ,   DestWhitePoint -> X,
+                             DestWhitePoint -> Y,
+                             DestWhitePoint -> Z);
+
+    _cmsMAT3eval(&ConeSourceRGB, Chad, &ConeSourceXYZ);
+    _cmsMAT3eval(&ConeDestRGB,   Chad, &ConeDestXYZ);
+
+    // Build matrix
+    _cmsVEC3init(&Cone.v[0], ConeDestRGB.n[0]/ConeSourceRGB.n[0],    0.0,  0.0);
+    _cmsVEC3init(&Cone.v[1], 0.0,   ConeDestRGB.n[1]/ConeSourceRGB.n[1],   0.0);
+    _cmsVEC3init(&Cone.v[2], 0.0,   0.0,   ConeDestRGB.n[2]/ConeSourceRGB.n[2]);
+
+
+    // Normalize
+    _cmsMAT3per(&Tmp, &Cone, Chad);
+    _cmsMAT3per(Conversion, &Chad_Inv, &Tmp);
+
+    return TRUE;
+}
+
+// Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
+// The cone matrix can be specified in ConeMatrix. If NULL, Bradford is assumed
+cmsBool  _cmsAdaptationMatrix(cmsMAT3* r, const cmsMAT3* ConeMatrix, const cmsCIEXYZ* FromIll, const cmsCIEXYZ* ToIll)
+{
+    cmsMAT3 LamRigg   = {{ // Bradford matrix
+        {{  0.8951,  0.2664, -0.1614 }},
+        {{ -0.7502,  1.7135,  0.0367 }},
+        {{  0.0389, -0.0685,  1.0296 }}
+    }};
+
+    if (ConeMatrix == NULL)
+        ConeMatrix = &LamRigg;
+
+    return ComputeChromaticAdaptation(r, FromIll, ToIll, ConeMatrix);
+}
+
+// Same as anterior, but assuming D50 destination. White point is given in xyY
+static
+cmsBool _cmsAdaptMatrixToD50(cmsMAT3* r, const cmsCIExyY* SourceWhitePt)
+{
+    cmsCIEXYZ Dn;
+    cmsMAT3 Bradford;
+    cmsMAT3 Tmp;
+
+    cmsxyY2XYZ(&Dn, SourceWhitePt);
+
+    if (!_cmsAdaptationMatrix(&Bradford, NULL, &Dn, cmsD50_XYZ())) return FALSE;
+
+    Tmp = *r;
+    _cmsMAT3per(r, &Bradford, &Tmp);
+
+    return TRUE;
+}
+
+// Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
+// This is just an approximation, I am not handling all the non-linear
+// aspects of the RGB to XYZ process, and assumming that the gamma correction
+// has transitive property in the transformation chain.
+//
+// the alghoritm:
+//
+//            - First I build the absolute conversion matrix using
+//              primaries in XYZ. This matrix is next inverted
+//            - Then I eval the source white point across this matrix
+//              obtaining the coeficients of the transformation
+//            - Then, I apply these coeficients to the original matrix
+//
+cmsBool _cmsBuildRGB2XYZtransferMatrix(cmsMAT3* r, const cmsCIExyY* WhitePt, const cmsCIExyYTRIPLE* Primrs)
+{
+    cmsVEC3 WhitePoint, Coef;
+    cmsMAT3 Result, Primaries;
+    cmsFloat64Number xn, yn;
+    cmsFloat64Number xr, yr;
+    cmsFloat64Number xg, yg;
+    cmsFloat64Number xb, yb;
+
+    xn = WhitePt -> x;
+    yn = WhitePt -> y;
+    xr = Primrs -> Red.x;
+    yr = Primrs -> Red.y;
+    xg = Primrs -> Green.x;
+    yg = Primrs -> Green.y;
+    xb = Primrs -> Blue.x;
+    yb = Primrs -> Blue.y;
+
+    // Build Primaries matrix
+    _cmsVEC3init(&Primaries.v[0], xr,        xg,         xb);
+    _cmsVEC3init(&Primaries.v[1], yr,        yg,         yb);
+    _cmsVEC3init(&Primaries.v[2], (1-xr-yr), (1-xg-yg),  (1-xb-yb));
+
+
+    // Result = Primaries ^ (-1) inverse matrix
+    if (!_cmsMAT3inverse(&Primaries, &Result))
+        return FALSE;
+
+
+    _cmsVEC3init(&WhitePoint, xn/yn, 1.0, (1.0-xn-yn)/yn);
+
+    // Across inverse primaries ...
+    _cmsMAT3eval(&Coef, &Result, &WhitePoint);
+
+    // Give us the Coefs, then I build transformation matrix
+    _cmsVEC3init(&r -> v[0], Coef.n[VX]*xr,          Coef.n[VY]*xg,          Coef.n[VZ]*xb);
+    _cmsVEC3init(&r -> v[1], Coef.n[VX]*yr,          Coef.n[VY]*yg,          Coef.n[VZ]*yb);
+    _cmsVEC3init(&r -> v[2], Coef.n[VX]*(1.0-xr-yr), Coef.n[VY]*(1.0-xg-yg), Coef.n[VZ]*(1.0-xb-yb));
+
+
+    return _cmsAdaptMatrixToD50(r, WhitePt);
+
+}
+
+
+// Adapts a color to a given illuminant. Original color is expected to have
+// a SourceWhitePt white point.
+cmsBool CMSEXPORT cmsAdaptToIlluminant(cmsCIEXYZ* Result,
+                                       const cmsCIEXYZ* SourceWhitePt,
+                                       const cmsCIEXYZ* Illuminant,
+                                       const cmsCIEXYZ* Value)
+{
+    cmsMAT3 Bradford;
+    cmsVEC3 In, Out;
+
+    _cmsAssert(Result != NULL);
+    _cmsAssert(SourceWhitePt != NULL);
+    _cmsAssert(Illuminant != NULL);
+    _cmsAssert(Value != NULL);
+
+    if (!_cmsAdaptationMatrix(&Bradford, NULL, SourceWhitePt, Illuminant)) return FALSE;
+
+    _cmsVEC3init(&In, Value -> X, Value -> Y, Value -> Z);
+    _cmsMAT3eval(&Out, &Bradford, &In);
+
+    Result -> X = Out.n[0];
+    Result -> Y = Out.n[1];
+    Result -> Z = Out.n[2];
+
+    return TRUE;
+}
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsxform.cpp b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsxform.cpp
new file mode 100755
index 0000000000..6b2950e4eb
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/cmsxform.cpp
@@ -0,0 +1,1339 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2_internal.h"
+
+// Transformations stuff
+// -----------------------------------------------------------------------
+
+#define DEFAULT_OBSERVER_ADAPTATION_STATE 1.0
+
+// The Context0 observer adaptation state.
+_cmsAdaptationStateChunkType _cmsAdaptationStateChunk = { DEFAULT_OBSERVER_ADAPTATION_STATE };
+
+// Init and duplicate observer adaptation state
+void _cmsAllocAdaptationStateChunk(struct _cmsContext_struct* ctx, 
+                                   const struct _cmsContext_struct* src)
+{
+    static _cmsAdaptationStateChunkType AdaptationStateChunk = { DEFAULT_OBSERVER_ADAPTATION_STATE };
+    void* from;
+     
+    if (src != NULL) {
+        from = src ->chunks[AdaptationStateContext];       
+    }
+    else {
+       from = &AdaptationStateChunk;
+    }
+    
+    ctx ->chunks[AdaptationStateContext] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsAdaptationStateChunkType));     
+}
+
+
+// Sets adaptation state for absolute colorimetric intent in the given context.  Adaptation state applies on all 
+// but cmsCreateExtendedTransformTHR().  Little CMS can handle incomplete adaptation states.
+cmsFloat64Number CMSEXPORT cmsSetAdaptationStateTHR(cmsContext ContextID, cmsFloat64Number d)
+{
+    cmsFloat64Number prev;
+    _cmsAdaptationStateChunkType* ptr = (_cmsAdaptationStateChunkType*) _cmsContextGetClientChunk(ContextID, AdaptationStateContext);
+
+    // Get previous value for return
+    prev = ptr ->AdaptationState;
+
+    // Set the value if d is positive or zero
+    if (d >= 0.0) {
+
+        ptr ->AdaptationState = d;
+    }
+
+    // Always return previous value
+    return prev;
+}
+
+
+// The adaptation state may be defaulted by this function. If you don't like it, use the extended transform routine
+cmsFloat64Number CMSEXPORT cmsSetAdaptationState(cmsFloat64Number d)
+{    
+    return cmsSetAdaptationStateTHR(NULL, d);
+}
+
+// -----------------------------------------------------------------------
+
+// Alarm codes for 16-bit transformations, because the fixed range of containers there are
+// no values left to mark out of gamut. 
+
+#define DEFAULT_ALARM_CODES_VALUE {0x7F00, 0x7F00, 0x7F00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+_cmsAlarmCodesChunkType _cmsAlarmCodesChunk = { DEFAULT_ALARM_CODES_VALUE };
+
+// Sets the codes used to mark out-out-gamut on Proofing transforms for a given context. Values are meant to be 
+// encoded in 16 bits.
+void CMSEXPORT cmsSetAlarmCodesTHR(cmsContext ContextID, const cmsUInt16Number AlarmCodesP[cmsMAXCHANNELS])
+{
+    _cmsAlarmCodesChunkType* ContextAlarmCodes = (_cmsAlarmCodesChunkType*) _cmsContextGetClientChunk(ContextID, AlarmCodesContext);
+       
+    _cmsAssert(ContextAlarmCodes != NULL); // Can't happen
+    
+    memcpy(ContextAlarmCodes->AlarmCodes, AlarmCodesP, sizeof(ContextAlarmCodes->AlarmCodes));    
+}
+
+// Gets the current codes used to mark out-out-gamut on Proofing transforms for the given context.
+// Values are meant to be encoded in 16 bits.
+void CMSEXPORT cmsGetAlarmCodesTHR(cmsContext ContextID, cmsUInt16Number AlarmCodesP[cmsMAXCHANNELS])
+{
+    _cmsAlarmCodesChunkType* ContextAlarmCodes = (_cmsAlarmCodesChunkType*) _cmsContextGetClientChunk(ContextID, AlarmCodesContext);
+
+    _cmsAssert(ContextAlarmCodes != NULL); // Can't happen
+
+    memcpy(AlarmCodesP, ContextAlarmCodes->AlarmCodes, sizeof(ContextAlarmCodes->AlarmCodes));
+}
+
+void CMSEXPORT cmsSetAlarmCodes(const cmsUInt16Number NewAlarm[cmsMAXCHANNELS])
+{
+    _cmsAssert(NewAlarm != NULL);
+
+    cmsSetAlarmCodesTHR(NULL, NewAlarm);
+}
+
+void CMSEXPORT cmsGetAlarmCodes(cmsUInt16Number OldAlarm[cmsMAXCHANNELS])
+{ 
+    _cmsAssert(OldAlarm != NULL);
+    cmsGetAlarmCodesTHR(NULL, OldAlarm);
+}
+
+
+// Init and duplicate alarm codes
+void _cmsAllocAlarmCodesChunk(struct _cmsContext_struct* ctx, 
+                              const struct _cmsContext_struct* src)
+{
+    static _cmsAlarmCodesChunkType AlarmCodesChunk = { DEFAULT_ALARM_CODES_VALUE };
+    void* from;
+     
+    if (src != NULL) {
+        from = src ->chunks[AlarmCodesContext];       
+    }
+    else {
+       from = &AlarmCodesChunk;
+    }
+    
+    ctx ->chunks[AlarmCodesContext] = _cmsSubAllocDup(ctx ->MemPool, from, sizeof(_cmsAlarmCodesChunkType));     
+}
+
+// -----------------------------------------------------------------------
+
+// Get rid of transform resources
+void CMSEXPORT cmsDeleteTransform(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) hTransform;
+
+    _cmsAssert(p != NULL);
+
+    if (p -> GamutCheck)
+        cmsPipelineFree(p -> GamutCheck);
+
+    if (p -> Lut)
+        cmsPipelineFree(p -> Lut);
+
+    if (p ->InputColorant)
+        cmsFreeNamedColorList(p ->InputColorant);
+
+    if (p -> OutputColorant)
+        cmsFreeNamedColorList(p ->OutputColorant);
+
+    if (p ->Sequence)
+        cmsFreeProfileSequenceDescription(p ->Sequence);
+
+    if (p ->UserData)
+        p ->FreeUserData(p ->ContextID, p ->UserData);
+
+    _cmsFree(p ->ContextID, (void *) p);
+}
+
+// Apply transform.
+void CMSEXPORT cmsDoTransform(cmsHTRANSFORM  Transform,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number Size)
+
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) Transform;
+    cmsStride stride;
+
+    stride.BytesPerLineIn = 0;  // Not used
+    stride.BytesPerLineOut = 0;
+    stride.BytesPerPlaneIn = Size;
+    stride.BytesPerPlaneOut = Size;
+           
+    p -> xform(p, InputBuffer, OutputBuffer, Size, 1, &stride);
+}
+
+
+// This is a legacy stride for planar
+void CMSEXPORT cmsDoTransformStride(cmsHTRANSFORM  Transform,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number Size, cmsUInt32Number Stride)
+
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) Transform;
+    cmsStride stride;
+
+    stride.BytesPerLineIn = 0;  
+    stride.BytesPerLineOut = 0;
+    stride.BytesPerPlaneIn = Stride;
+    stride.BytesPerPlaneOut = Stride;
+
+    p -> xform(p, InputBuffer, OutputBuffer, Size, 1, &stride);
+}
+
+// This is the "fast" function for plugins
+void CMSEXPORT cmsDoTransformLineStride(cmsHTRANSFORM  Transform,
+                              const void* InputBuffer,
+                              void* OutputBuffer,
+                              cmsUInt32Number PixelsPerLine,
+                              cmsUInt32Number LineCount,
+                              cmsUInt32Number BytesPerLineIn,
+                              cmsUInt32Number BytesPerLineOut,
+                              cmsUInt32Number BytesPerPlaneIn,
+                              cmsUInt32Number BytesPerPlaneOut)
+
+{
+    _cmsTRANSFORM* p = (_cmsTRANSFORM*) Transform;
+    cmsStride stride;
+
+    stride.BytesPerLineIn = BytesPerLineIn;
+    stride.BytesPerLineOut = BytesPerLineOut;
+    stride.BytesPerPlaneIn = BytesPerPlaneIn;
+    stride.BytesPerPlaneOut = BytesPerPlaneOut;
+
+    p->xform(p, InputBuffer, OutputBuffer, PixelsPerLine, LineCount, &stride);
+}
+
+
+
+// Transform routines ----------------------------------------------------------------------------------------------------------
+
+// Float xform converts floats. Since there are no performance issues, one routine does all job, including gamut check.
+// Note that because extended range, we can use a -1.0 value for out of gamut in this case.
+static
+void FloatXFORM(_cmsTRANSFORM* p,
+                const void* in,
+                void* out, 
+                cmsUInt32Number PixelsPerLine,
+                cmsUInt32Number LineCount,
+                const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsFloat32Number fIn[cmsMAXCHANNELS], fOut[cmsMAXCHANNELS];
+    cmsFloat32Number OutOfGamut;
+    cmsUInt32Number i, j, c, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(fIn, 0, sizeof(fIn));
+    memset(fOut, 0, sizeof(fIn));
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInputFloat(p, fIn, accum, Stride->BytesPerPlaneIn);
+
+            // Any gamut chack to do?
+            if (p->GamutCheck != NULL) {
+
+                // Evaluate gamut marker.
+                cmsPipelineEvalFloat(fIn, &OutOfGamut, p->GamutCheck);
+
+                // Is current color out of gamut?
+                if (OutOfGamut > 0.0) {
+
+                    // Certainly, out of gamut
+                    for (c = 0; c < cmsMAXCHANNELS; c++)
+                        fOut[c] = -1.0;
+
+                }
+                else {
+                    // No, proceed normally
+                    cmsPipelineEvalFloat(fIn, fOut, p->Lut);
+                }
+            }
+            else {
+
+                // No gamut check at all
+                cmsPipelineEvalFloat(fIn, fOut, p->Lut);
+            }
+
+
+            output = p->ToOutputFloat(p, fOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+
+}
+
+
+static
+void NullFloatXFORM(_cmsTRANSFORM* p,
+                    const void* in,
+                    void* out, 
+                    cmsUInt32Number PixelsPerLine,
+                    cmsUInt32Number LineCount,
+                    const cmsStride* Stride)
+
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsFloat32Number fIn[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(fIn, 0, sizeof(fIn));
+
+    for (i = 0; i < LineCount; i++) {
+
+           accum = (cmsUInt8Number*) in + strideIn;
+           output = (cmsUInt8Number*) out + strideOut;
+
+           for (j = 0; j < PixelsPerLine; j++) {
+
+                  accum = p->FromInputFloat(p, fIn, accum, Stride ->BytesPerPlaneIn);
+                  output = p->ToOutputFloat(p, fIn, output, Stride->BytesPerPlaneOut);
+           }
+
+           strideIn += Stride->BytesPerLineIn;
+           strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+// 16 bit precision -----------------------------------------------------------------------------------------------------------
+
+// Null transformation, only applies formatters. No cach�
+static
+void NullXFORM(_cmsTRANSFORM* p,
+               const void* in,
+               void* out,
+               cmsUInt32Number PixelsPerLine,
+               cmsUInt32Number LineCount,
+               const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(wIn, 0, sizeof(wIn));
+
+    for (i = 0; i < LineCount; i++) {
+
+           accum = (cmsUInt8Number*)in + strideIn;
+           output = (cmsUInt8Number*)out + strideOut;
+
+           for (j = 0; j < PixelsPerLine; j++) {
+
+                  accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+                  output = p->ToOutput(p, wIn, output, Stride->BytesPerPlaneOut);
+    }
+
+           strideIn += Stride->BytesPerLineIn;
+           strideOut += Stride->BytesPerLineOut;
+    }
+
+}
+
+
+// No gamut check, no cache, 16 bits
+static
+void PrecalculatedXFORM(_cmsTRANSFORM* p,
+                        const void* in,
+                        void* out, 
+                        cmsUInt32Number PixelsPerLine,
+                        cmsUInt32Number LineCount,
+                        const cmsStride* Stride)
+{
+    register cmsUInt8Number* accum;
+    register cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+            p->Lut->Eval16Fn(wIn, wOut, p->Lut->Data);
+            output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+
+}
+
+
+// Auxiliary: Handle precalculated gamut check. The retrieval of context may be alittle bit slow, but this function is not critical.
+static
+void TransformOnePixelWithGamutCheck(_cmsTRANSFORM* p,
+                                     const cmsUInt16Number wIn[],
+                                     cmsUInt16Number wOut[])
+{
+    cmsUInt16Number wOutOfGamut;
+
+    p ->GamutCheck ->Eval16Fn(wIn, &wOutOfGamut, p ->GamutCheck ->Data);
+    if (wOutOfGamut >= 1) {
+
+        cmsUInt16Number i;
+        _cmsAlarmCodesChunkType* ContextAlarmCodes = (_cmsAlarmCodesChunkType*) _cmsContextGetClientChunk(p->ContextID, AlarmCodesContext);        
+
+        for (i=0; i < p ->Lut->OutputChannels; i++) {
+
+            wOut[i] = ContextAlarmCodes ->AlarmCodes[i];
+        }
+    }
+    else
+        p ->Lut ->Eval16Fn(wIn, wOut, p -> Lut->Data);
+}
+
+// Gamut check, No cach�, 16 bits.
+static
+void PrecalculatedXFORMGamutCheck(_cmsTRANSFORM* p,
+                                  const void* in,
+                                  void* out, 
+                                  cmsUInt32Number PixelsPerLine,
+                                  cmsUInt32Number LineCount,
+                                  const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    strideIn = 0;
+    strideOut = 0;
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    for (i = 0; i < LineCount; i++) {
+
+           accum = (cmsUInt8Number*)in + strideIn;
+           output = (cmsUInt8Number*)out + strideOut;
+
+           for (j = 0; j < PixelsPerLine; j++) {
+
+                  accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+                  TransformOnePixelWithGamutCheck(p, wIn, wOut);
+                  output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+           }
+
+           strideIn += Stride->BytesPerLineIn;
+           strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+
+// No gamut check, Cach�, 16 bits,
+static
+void CachedXFORM(_cmsTRANSFORM* p,
+                 const void* in,
+                 void* out,
+                 cmsUInt32Number PixelsPerLine,
+                 cmsUInt32Number LineCount,
+                 const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    _cmsCACHE Cache;
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    // Empty buffers for quick memcmp
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    // Get copy of zero cache
+    memcpy(&Cache, &p->Cache, sizeof(Cache));
+
+    strideIn = 0;
+    strideOut = 0;
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+
+            if (memcmp(wIn, Cache.CacheIn, sizeof(Cache.CacheIn)) == 0) {
+
+                memcpy(wOut, Cache.CacheOut, sizeof(Cache.CacheOut));
+            }
+            else {
+                p->Lut->Eval16Fn(wIn, wOut, p->Lut->Data);
+
+                memcpy(Cache.CacheIn, wIn, sizeof(Cache.CacheIn));
+                memcpy(Cache.CacheOut, wOut, sizeof(Cache.CacheOut));
+            }
+
+            output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+// All those nice features together
+static
+void CachedXFORMGamutCheck(_cmsTRANSFORM* p,
+                           const void* in,
+                           void* out, 
+                           cmsUInt32Number PixelsPerLine,
+                           cmsUInt32Number LineCount,
+                           const cmsStride* Stride)
+{
+    cmsUInt8Number* accum;
+    cmsUInt8Number* output;
+    cmsUInt16Number wIn[cmsMAXCHANNELS], wOut[cmsMAXCHANNELS];
+    _cmsCACHE Cache;
+    cmsUInt32Number i, j, strideIn, strideOut;
+
+    _cmsHandleExtraChannels(p, in, out, PixelsPerLine, LineCount, Stride);
+
+    // Empty buffers for quick memcmp
+    memset(wIn, 0, sizeof(wIn));
+    memset(wOut, 0, sizeof(wOut));
+
+    // Get copy of zero cache
+    memcpy(&Cache, &p->Cache, sizeof(Cache));
+
+    strideIn = 0;
+    strideOut = 0;
+
+    for (i = 0; i < LineCount; i++) {
+
+        accum = (cmsUInt8Number*)in + strideIn;
+        output = (cmsUInt8Number*)out + strideOut;
+
+        for (j = 0; j < PixelsPerLine; j++) {
+
+            accum = p->FromInput(p, wIn, accum, Stride->BytesPerPlaneIn);
+
+            if (memcmp(wIn, Cache.CacheIn, sizeof(Cache.CacheIn)) == 0) {
+
+                memcpy(wOut, Cache.CacheOut, sizeof(Cache.CacheOut));
+            }
+            else {
+                TransformOnePixelWithGamutCheck(p, wIn, wOut);
+
+                memcpy(Cache.CacheIn, wIn, sizeof(Cache.CacheIn));
+                memcpy(Cache.CacheOut, wOut, sizeof(Cache.CacheOut));
+            }
+
+            output = p->ToOutput(p, wOut, output, Stride->BytesPerPlaneOut);
+        }
+
+        strideIn += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+// Transform plug-ins ----------------------------------------------------------------------------------------------------
+
+// List of used-defined transform factories
+typedef struct _cmsTransformCollection_st {
+
+    _cmsTransform2Factory  Factory;
+    cmsBool                OldXform;   // Factory returns xform function in the old style
+
+    struct _cmsTransformCollection_st *Next;
+
+} _cmsTransformCollection;
+
+// The linked list head
+_cmsTransformPluginChunkType _cmsTransformPluginChunk = { NULL };
+
+
+// Duplicates the zone of memory used by the plug-in in the new context
+static
+void DupPluginTransformList(struct _cmsContext_struct* ctx, 
+                                               const struct _cmsContext_struct* src)
+{
+   _cmsTransformPluginChunkType newHead = { NULL };
+   _cmsTransformCollection*  entry;
+   _cmsTransformCollection*  Anterior = NULL;
+   _cmsTransformPluginChunkType* head = (_cmsTransformPluginChunkType*) src->chunks[TransformPlugin];
+
+    // Walk the list copying all nodes
+   for (entry = head->TransformCollection;
+        entry != NULL;
+        entry = entry ->Next) {
+
+            _cmsTransformCollection *newEntry = ( _cmsTransformCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsTransformCollection));
+   
+            if (newEntry == NULL) 
+                return;
+
+            // We want to keep the linked list order, so this is a little bit tricky
+            newEntry -> Next = NULL;
+            if (Anterior)
+                Anterior -> Next = newEntry;
+     
+            Anterior = newEntry;
+
+            if (newHead.TransformCollection == NULL)
+                newHead.TransformCollection = newEntry;
+    }
+
+  ctx ->chunks[TransformPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsTransformPluginChunkType));
+}
+
+// Allocates memory for transform plugin factory
+void _cmsAllocTransformPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src)
+{
+    if (src != NULL) {
+
+        // Copy all linked list
+        DupPluginTransformList(ctx, src);
+    }
+    else {
+        static _cmsTransformPluginChunkType TransformPluginChunkType = { NULL };
+        ctx ->chunks[TransformPlugin] = _cmsSubAllocDup(ctx ->MemPool, &TransformPluginChunkType, sizeof(_cmsTransformPluginChunkType));
+    }
+}
+
+// Adaptor for old versions of plug-in
+static
+void _cmsTransform2toTransformAdaptor(struct _cmstransform_struct *CMMcargo,
+                                      const void* InputBuffer,
+                                      void* OutputBuffer,
+                                      cmsUInt32Number PixelsPerLine,
+                                      cmsUInt32Number LineCount,
+                                      const cmsStride* Stride)
+{
+     
+       cmsUInt32Number i, strideIn, strideOut;
+
+       _cmsHandleExtraChannels(CMMcargo, InputBuffer, OutputBuffer, PixelsPerLine, LineCount, Stride);
+
+       strideIn = 0;
+       strideOut = 0;
+
+       for (i = 0; i < LineCount; i++) {
+
+              void *accum = (cmsUInt8Number*)InputBuffer + strideIn;
+              void *output = (cmsUInt8Number*)OutputBuffer + strideOut;
+
+              CMMcargo->OldXform(CMMcargo, accum, output, PixelsPerLine, Stride->BytesPerPlaneIn);
+
+              strideIn += Stride->BytesPerLineIn;
+              strideOut += Stride->BytesPerLineOut;
+       }
+}
+
+
+
+// Register new ways to transform
+cmsBool  _cmsRegisterTransformPlugin(cmsContext ContextID, cmsPluginBase* Data)
+{
+    cmsPluginTransform* Plugin = (cmsPluginTransform*) Data;
+    _cmsTransformCollection* fl;
+    _cmsTransformPluginChunkType* ctx = ( _cmsTransformPluginChunkType*) _cmsContextGetClientChunk(ContextID,TransformPlugin);
+
+    if (Data == NULL) {
+
+        // Free the chain. Memory is safely freed at exit
+        ctx->TransformCollection = NULL;
+        return TRUE;
+    }
+
+    // Factory callback is required
+    if (Plugin->factories.xform == NULL) return FALSE;
+
+
+    fl = (_cmsTransformCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsTransformCollection));
+    if (fl == NULL) return FALSE;
+
+    // Check for full xform plug-ins previous to 2.8, we would need an adapter in that case
+    if (Plugin->base.ExpectedVersion < 2080) {
+
+           fl->OldXform = TRUE;
+    }
+    else
+           fl->OldXform = FALSE;
+
+    // Copy the parameters
+    fl->Factory = Plugin->factories.xform;
+
+    // Keep linked list
+    fl ->Next = ctx->TransformCollection;
+    ctx->TransformCollection = fl;
+
+    // All is ok
+    return TRUE;
+}
+
+
+void CMSEXPORT _cmsSetTransformUserData(struct _cmstransform_struct *CMMcargo, void* ptr, _cmsFreeUserDataFn FreePrivateDataFn)
+{
+    _cmsAssert(CMMcargo != NULL);
+    CMMcargo ->UserData = ptr;
+    CMMcargo ->FreeUserData = FreePrivateDataFn;
+}
+
+// returns the pointer defined by the plug-in to store private data
+void * CMSEXPORT _cmsGetTransformUserData(struct _cmstransform_struct *CMMcargo)
+{
+    _cmsAssert(CMMcargo != NULL);
+    return CMMcargo ->UserData;
+}
+
+// returns the current formatters
+void CMSEXPORT _cmsGetTransformFormatters16(struct _cmstransform_struct *CMMcargo, cmsFormatter16* FromInput, cmsFormatter16* ToOutput)
+{
+     _cmsAssert(CMMcargo != NULL);
+     if (FromInput) *FromInput = CMMcargo ->FromInput;
+     if (ToOutput)  *ToOutput  = CMMcargo ->ToOutput;
+}
+
+void CMSEXPORT _cmsGetTransformFormattersFloat(struct _cmstransform_struct *CMMcargo, cmsFormatterFloat* FromInput, cmsFormatterFloat* ToOutput)
+{
+     _cmsAssert(CMMcargo != NULL);
+     if (FromInput) *FromInput = CMMcargo ->FromInputFloat;
+     if (ToOutput)  *ToOutput  = CMMcargo ->ToOutputFloat;
+}
+
+
+// Allocate transform struct and set it to defaults. Ask the optimization plug-in about if those formats are proper
+// for separated transforms. If this is the case,
+static
+_cmsTRANSFORM* AllocEmptyTransform(cmsContext ContextID, cmsPipeline* lut,
+                                               cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
+{
+     _cmsTransformPluginChunkType* ctx = ( _cmsTransformPluginChunkType*) _cmsContextGetClientChunk(ContextID, TransformPlugin);
+     _cmsTransformCollection* Plugin;
+
+       // Allocate needed memory
+       _cmsTRANSFORM* p = (_cmsTRANSFORM*)_cmsMallocZero(ContextID, sizeof(_cmsTRANSFORM));
+       if (!p) {
+              cmsPipelineFree(lut);
+              return NULL;
+       }
+
+       // Store the proposed pipeline
+       p->Lut = lut;
+
+       // Let's see if any plug-in want to do the transform by itself
+       if (p->Lut != NULL) {
+
+              for (Plugin = ctx->TransformCollection;
+                     Plugin != NULL;
+                     Plugin = Plugin->Next) {
+
+                     if (Plugin->Factory(&p->xform, &p->UserData, &p->FreeUserData, &p->Lut, InputFormat, OutputFormat, dwFlags)) {
+
+                            // Last plugin in the declaration order takes control. We just keep
+                            // the original parameters as a logging. 
+                            // Note that cmsFLAGS_CAN_CHANGE_FORMATTER is not set, so by default 
+                            // an optimized transform is not reusable. The plug-in can, however, change
+                            // the flags and make it suitable.
+
+                            p->ContextID = ContextID;
+                            p->InputFormat = *InputFormat;
+                            p->OutputFormat = *OutputFormat;
+                            p->dwOriginalFlags = *dwFlags;
+
+                            // Fill the formatters just in case the optimized routine is interested.
+                            // No error is thrown if the formatter doesn't exist. It is up to the optimization 
+                            // factory to decide what to do in those cases.
+                            p->FromInput = _cmsGetFormatter(ContextID, *InputFormat, cmsFormatterInput, CMS_PACK_FLAGS_16BITS).Fmt16;
+                            p->ToOutput = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_16BITS).Fmt16;
+                            p->FromInputFloat = _cmsGetFormatter(ContextID, *InputFormat, cmsFormatterInput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+                            p->ToOutputFloat = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+
+                            // Save the day? (Ignore the warning)
+                            if (Plugin->OldXform) {
+                                   p->OldXform = (_cmsTransformFn) p->xform;
+                                   p->xform = _cmsTransform2toTransformAdaptor;
+                            }
+                             
+                            return p;
+                     }
+              }
+
+              // Not suitable for the transform plug-in, let's check  the pipeline plug-in
+              _cmsOptimizePipeline(ContextID, &p->Lut, Intent, InputFormat, OutputFormat, dwFlags);
+       }
+
+    // Check whatever this is a true floating point transform
+    if (_cmsFormatterIsFloat(*InputFormat) && _cmsFormatterIsFloat(*OutputFormat)) {
+
+        // Get formatter function always return a valid union, but the contents of this union may be NULL.
+        p ->FromInputFloat = _cmsGetFormatter(ContextID, *InputFormat,  cmsFormatterInput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+        p ->ToOutputFloat  = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_FLOAT).FmtFloat;
+        *dwFlags |= cmsFLAGS_CAN_CHANGE_FORMATTER;
+
+        if (p ->FromInputFloat == NULL || p ->ToOutputFloat == NULL) {
+
+            cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported raster format");
+            cmsDeleteTransform(p);
+            return NULL;
+        }
+
+        if (*dwFlags & cmsFLAGS_NULLTRANSFORM) {
+
+            p ->xform = NullFloatXFORM;
+        }
+        else {
+            // Float transforms don't use cach�, always are non-NULL
+            p ->xform = FloatXFORM;
+        }
+
+    }
+    else {
+
+        if (*InputFormat == 0 && *OutputFormat == 0) {
+            p ->FromInput = p ->ToOutput = NULL;
+            *dwFlags |= cmsFLAGS_CAN_CHANGE_FORMATTER;
+        }
+        else {
+
+            cmsUInt32Number BytesPerPixelInput;
+
+            p ->FromInput = _cmsGetFormatter(ContextID, *InputFormat,  cmsFormatterInput, CMS_PACK_FLAGS_16BITS).Fmt16;
+            p ->ToOutput  = _cmsGetFormatter(ContextID, *OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_16BITS).Fmt16;
+
+            if (p ->FromInput == NULL || p ->ToOutput == NULL) {
+
+                cmsSignalError(ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported raster format");
+                cmsDeleteTransform(p);
+                return NULL;
+            }
+
+            BytesPerPixelInput = T_BYTES(p ->InputFormat);
+            if (BytesPerPixelInput == 0 || BytesPerPixelInput >= 2)
+                   *dwFlags |= cmsFLAGS_CAN_CHANGE_FORMATTER;
+
+        }
+
+        if (*dwFlags & cmsFLAGS_NULLTRANSFORM) {
+
+            p ->xform = NullXFORM;
+        }
+        else {
+            if (*dwFlags & cmsFLAGS_NOCACHE) {
+
+                if (*dwFlags & cmsFLAGS_GAMUTCHECK)
+                    p ->xform = PrecalculatedXFORMGamutCheck;  // Gamut check, no cach�
+                else
+                    p ->xform = PrecalculatedXFORM;  // No cach�, no gamut check
+            }
+            else {
+
+                if (*dwFlags & cmsFLAGS_GAMUTCHECK)
+                    p ->xform = CachedXFORMGamutCheck;    // Gamut check, cach�
+                else
+                    p ->xform = CachedXFORM;  // No gamut check, cach�
+
+            }
+        }
+    }
+
+    p ->InputFormat     = *InputFormat;
+    p ->OutputFormat    = *OutputFormat;
+    p ->dwOriginalFlags = *dwFlags;
+    p ->ContextID       = ContextID;
+    p ->UserData        = NULL;
+    return p;
+}
+
+static
+cmsBool GetXFormColorSpaces(cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[], cmsColorSpaceSignature* Input, cmsColorSpaceSignature* Output)
+{
+    cmsColorSpaceSignature ColorSpaceIn, ColorSpaceOut;
+    cmsColorSpaceSignature PostColorSpace;
+    cmsUInt32Number i;
+
+    if (nProfiles == 0) return FALSE;
+    if (hProfiles[0] == NULL) return FALSE;
+
+    *Input = PostColorSpace = cmsGetColorSpace(hProfiles[0]);
+
+    for (i=0; i < nProfiles; i++) {
+
+        cmsProfileClassSignature cls;
+        cmsHPROFILE hProfile = hProfiles[i];
+
+        int lIsInput = (PostColorSpace != cmsSigXYZData) &&
+                       (PostColorSpace != cmsSigLabData);
+
+        if (hProfile == NULL) return FALSE;
+
+        cls = cmsGetDeviceClass(hProfile);
+
+        if (cls == cmsSigNamedColorClass) {
+
+            ColorSpaceIn    = cmsSig1colorData;
+            ColorSpaceOut   = (nProfiles > 1) ? cmsGetPCS(hProfile) : cmsGetColorSpace(hProfile);
+        }
+        else
+        if (lIsInput || (cls == cmsSigLinkClass)) {
+
+            ColorSpaceIn    = cmsGetColorSpace(hProfile);
+            ColorSpaceOut   = cmsGetPCS(hProfile);
+        }
+        else
+        {
+            ColorSpaceIn    = cmsGetPCS(hProfile);
+            ColorSpaceOut   = cmsGetColorSpace(hProfile);
+        }
+
+        if (i==0)
+            *Input = ColorSpaceIn;
+
+        PostColorSpace = ColorSpaceOut;
+    }
+
+    *Output = PostColorSpace;
+
+    return TRUE;
+}
+
+// Check colorspace
+static
+cmsBool  IsProperColorSpace(cmsColorSpaceSignature Check, cmsUInt32Number dwFormat)
+{
+    int Space1 = (int) T_COLORSPACE(dwFormat);
+    int Space2 = _cmsLCMScolorSpace(Check);
+
+    if (Space1 == PT_ANY) return TRUE;
+    if (Space1 == Space2) return TRUE;
+
+    if (Space1 == PT_LabV2 && Space2 == PT_Lab) return TRUE;
+    if (Space1 == PT_Lab   && Space2 == PT_LabV2) return TRUE;
+
+    return FALSE;
+}
+
+// ----------------------------------------------------------------------------------------------------------------
+
+// Jun-21-2000: Some profiles (those that comes with W2K) comes
+// with the media white (media black?) x 100. Add a sanity check
+
+static
+void NormalizeXYZ(cmsCIEXYZ* Dest)
+{
+    while (Dest -> X > 2. &&
+           Dest -> Y > 2. &&
+           Dest -> Z > 2.) {
+
+               Dest -> X /= 10.;
+               Dest -> Y /= 10.;
+               Dest -> Z /= 10.;
+       }
+}
+
+static
+void SetWhitePoint(cmsCIEXYZ* wtPt, const cmsCIEXYZ* src)
+{
+    if (src == NULL) {
+        wtPt ->X = cmsD50X;
+        wtPt ->Y = cmsD50Y;
+        wtPt ->Z = cmsD50Z;
+    }
+    else {
+        wtPt ->X = src->X;
+        wtPt ->Y = src->Y;
+        wtPt ->Z = src->Z;
+
+        NormalizeXYZ(wtPt);
+    }
+
+}
+
+// New to lcms 2.0 -- have all parameters available.
+cmsHTRANSFORM CMSEXPORT cmsCreateExtendedTransform(cmsContext ContextID,
+                                                   cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[],
+                                                   cmsBool  BPC[],
+                                                   cmsUInt32Number Intents[],
+                                                   cmsFloat64Number AdaptationStates[],
+                                                   cmsHPROFILE hGamutProfile,
+                                                   cmsUInt32Number nGamutPCSposition,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsUInt32Number dwFlags)
+{
+    _cmsTRANSFORM* xform;    
+    cmsColorSpaceSignature EntryColorSpace;
+    cmsColorSpaceSignature ExitColorSpace;
+    cmsPipeline* Lut;
+    cmsUInt32Number LastIntent = Intents[nProfiles-1];
+
+    // If it is a fake transform
+    if (dwFlags & cmsFLAGS_NULLTRANSFORM)
+    {
+        return AllocEmptyTransform(ContextID, NULL, INTENT_PERCEPTUAL, &InputFormat, &OutputFormat, &dwFlags);
+    }
+
+    // If gamut check is requested, make sure we have a gamut profile
+    if (dwFlags & cmsFLAGS_GAMUTCHECK) {
+        if (hGamutProfile == NULL) dwFlags &= ~cmsFLAGS_GAMUTCHECK;
+    }
+
+    // On floating point transforms, inhibit cache
+    if (_cmsFormatterIsFloat(InputFormat) || _cmsFormatterIsFloat(OutputFormat))
+        dwFlags |= cmsFLAGS_NOCACHE;
+
+    // Mark entry/exit spaces
+    if (!GetXFormColorSpaces(nProfiles, hProfiles, &EntryColorSpace, &ExitColorSpace)) {
+        cmsSignalError(ContextID, cmsERROR_NULL, "NULL input profiles on transform");
+        return NULL;
+    }
+
+    // Check if proper colorspaces
+    if (!IsProperColorSpace(EntryColorSpace, InputFormat)) {
+        cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "Wrong input color space on transform");
+        return NULL;
+    }
+
+    if (!IsProperColorSpace(ExitColorSpace, OutputFormat)) {
+        cmsSignalError(ContextID, cmsERROR_COLORSPACE_CHECK, "Wrong output color space on transform");
+        return NULL;
+    }
+
+    // Create a pipeline with all transformations
+    Lut = _cmsLinkProfiles(ContextID, nProfiles, Intents, hProfiles, BPC, AdaptationStates, dwFlags);
+    if (Lut == NULL) {
+        cmsSignalError(ContextID, cmsERROR_NOT_SUITABLE, "Couldn't link the profiles");
+        return NULL;
+    }
+
+    // Check channel count
+    if ((cmsChannelsOf(EntryColorSpace) != cmsPipelineInputChannels(Lut)) ||
+        (cmsChannelsOf(ExitColorSpace)  != cmsPipelineOutputChannels(Lut))) {
+        cmsPipelineFree(Lut);
+        cmsSignalError(ContextID, cmsERROR_NOT_SUITABLE, "Channel count doesn't match. Profile is corrupted");
+        return NULL;
+    }
+
+
+    // All seems ok
+    xform = AllocEmptyTransform(ContextID, Lut, LastIntent, &InputFormat, &OutputFormat, &dwFlags);
+    if (xform == NULL) {
+        return NULL;
+    }
+
+    // Keep values
+    xform ->EntryColorSpace = EntryColorSpace;
+    xform ->ExitColorSpace  = ExitColorSpace;
+    xform ->RenderingIntent = Intents[nProfiles-1];
+
+    // Take white points
+    SetWhitePoint(&xform->EntryWhitePoint, (cmsCIEXYZ*) cmsReadTag(hProfiles[0], cmsSigMediaWhitePointTag));
+    SetWhitePoint(&xform->ExitWhitePoint,  (cmsCIEXYZ*) cmsReadTag(hProfiles[nProfiles-1], cmsSigMediaWhitePointTag));
+   
+
+    // Create a gamut check LUT if requested
+    if (hGamutProfile != NULL && (dwFlags & cmsFLAGS_GAMUTCHECK))
+        xform ->GamutCheck  = _cmsCreateGamutCheckPipeline(ContextID, hProfiles,
+                                                        BPC, Intents,
+                                                        AdaptationStates,
+                                                        nGamutPCSposition,
+                                                        hGamutProfile);
+
+
+    // Try to read input and output colorant table
+    if (cmsIsTag(hProfiles[0], cmsSigColorantTableTag)) {
+
+        // Input table can only come in this way.
+        xform ->InputColorant = cmsDupNamedColorList((cmsNAMEDCOLORLIST*) cmsReadTag(hProfiles[0], cmsSigColorantTableTag));
+    }
+
+    // Output is a little bit more complex.
+    if (cmsGetDeviceClass(hProfiles[nProfiles-1]) == cmsSigLinkClass) {
+
+        // This tag may exist only on devicelink profiles.
+        if (cmsIsTag(hProfiles[nProfiles-1], cmsSigColorantTableOutTag)) {
+
+            // It may be NULL if error
+            xform ->OutputColorant = cmsDupNamedColorList((cmsNAMEDCOLORLIST*) cmsReadTag(hProfiles[nProfiles-1], cmsSigColorantTableOutTag));
+        }
+
+    } else {
+
+        if (cmsIsTag(hProfiles[nProfiles-1], cmsSigColorantTableTag)) {
+
+            xform -> OutputColorant = cmsDupNamedColorList((cmsNAMEDCOLORLIST*) cmsReadTag(hProfiles[nProfiles-1], cmsSigColorantTableTag));
+        }
+    }
+
+    // Store the sequence of profiles
+    if (dwFlags & cmsFLAGS_KEEP_SEQUENCE) {
+        xform ->Sequence = _cmsCompileProfileSequence(ContextID, nProfiles, hProfiles);
+    }
+    else
+        xform ->Sequence = NULL;
+
+    // If this is a cached transform, init first value, which is zero (16 bits only)
+    if (!(dwFlags & cmsFLAGS_NOCACHE)) {
+
+        memset(&xform ->Cache.CacheIn, 0, sizeof(xform ->Cache.CacheIn));
+
+        if (xform ->GamutCheck != NULL) {
+            TransformOnePixelWithGamutCheck(xform, xform ->Cache.CacheIn, xform->Cache.CacheOut);
+        }
+        else {
+
+            xform ->Lut ->Eval16Fn(xform ->Cache.CacheIn, xform->Cache.CacheOut, xform -> Lut->Data);
+        }
+
+    }
+
+    return (cmsHTRANSFORM) xform;
+}
+
+// Multiprofile transforms: Gamut check is not available here, as it is unclear from which profile the gamut comes.
+cmsHTRANSFORM CMSEXPORT cmsCreateMultiprofileTransformTHR(cmsContext ContextID,
+                                                       cmsHPROFILE hProfiles[],
+                                                       cmsUInt32Number nProfiles,
+                                                       cmsUInt32Number InputFormat,
+                                                       cmsUInt32Number OutputFormat,
+                                                       cmsUInt32Number Intent,
+                                                       cmsUInt32Number dwFlags)
+{
+    cmsUInt32Number i;
+    cmsBool BPC[256];
+    cmsUInt32Number Intents[256];
+    cmsFloat64Number AdaptationStates[256];
+
+    if (nProfiles <= 0 || nProfiles > 255) {
+         cmsSignalError(ContextID, cmsERROR_RANGE, "Wrong number of profiles. 1..255 expected, %d found.", nProfiles);
+        return NULL;
+    }
+
+    for (i=0; i < nProfiles; i++) {
+        BPC[i] = dwFlags & cmsFLAGS_BLACKPOINTCOMPENSATION ? TRUE : FALSE;
+        Intents[i] = Intent;
+        AdaptationStates[i] = cmsSetAdaptationStateTHR(ContextID, -1);
+    }
+
+
+    return cmsCreateExtendedTransform(ContextID, nProfiles, hProfiles, BPC, Intents, AdaptationStates, NULL, 0, InputFormat, OutputFormat, dwFlags);
+}
+
+
+
+cmsHTRANSFORM CMSEXPORT cmsCreateMultiprofileTransform(cmsHPROFILE hProfiles[],
+                                                  cmsUInt32Number nProfiles,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags)
+{
+
+    if (nProfiles <= 0 || nProfiles > 255) {
+         cmsSignalError(NULL, cmsERROR_RANGE, "Wrong number of profiles. 1..255 expected, %d found.", nProfiles);
+         return NULL;
+    }
+
+    return cmsCreateMultiprofileTransformTHR(cmsGetProfileContextID(hProfiles[0]),
+                                                  hProfiles,
+                                                  nProfiles,
+                                                  InputFormat,
+                                                  OutputFormat,
+                                                  Intent,
+                                                  dwFlags);
+}
+
+cmsHTRANSFORM CMSEXPORT cmsCreateTransformTHR(cmsContext ContextID,
+                                              cmsHPROFILE Input,
+                                              cmsUInt32Number InputFormat,
+                                              cmsHPROFILE Output,
+                                              cmsUInt32Number OutputFormat,
+                                              cmsUInt32Number Intent,
+                                              cmsUInt32Number dwFlags)
+{
+
+    cmsHPROFILE hArray[2];
+
+    hArray[0] = Input;
+    hArray[1] = Output;
+
+    return cmsCreateMultiprofileTransformTHR(ContextID, hArray, Output == NULL ? 1U : 2U, InputFormat, OutputFormat, Intent, dwFlags);
+}
+
+CMSAPI cmsHTRANSFORM CMSEXPORT cmsCreateTransform(cmsHPROFILE Input,
+                                                  cmsUInt32Number InputFormat,
+                                                  cmsHPROFILE Output,
+                                                  cmsUInt32Number OutputFormat,
+                                                  cmsUInt32Number Intent,
+                                                  cmsUInt32Number dwFlags)
+{
+    return cmsCreateTransformTHR(cmsGetProfileContextID(Input), Input, InputFormat, Output, OutputFormat, Intent, dwFlags);
+}
+
+
+cmsHTRANSFORM CMSEXPORT cmsCreateProofingTransformTHR(cmsContext ContextID,
+                                                   cmsHPROFILE InputProfile,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsHPROFILE OutputProfile,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsHPROFILE ProofingProfile,
+                                                   cmsUInt32Number nIntent,
+                                                   cmsUInt32Number ProofingIntent,
+                                                   cmsUInt32Number dwFlags)
+{
+    cmsHPROFILE hArray[4];
+    cmsUInt32Number Intents[4];
+    cmsBool  BPC[4];
+    cmsFloat64Number Adaptation[4];
+    cmsBool  DoBPC = (dwFlags & cmsFLAGS_BLACKPOINTCOMPENSATION) ? TRUE : FALSE;
+
+
+    hArray[0]  = InputProfile; hArray[1] = ProofingProfile; hArray[2]  = ProofingProfile;               hArray[3] = OutputProfile;
+    Intents[0] = nIntent;      Intents[1] = nIntent;        Intents[2] = INTENT_RELATIVE_COLORIMETRIC;  Intents[3] = ProofingIntent;
+    BPC[0]     = DoBPC;        BPC[1] = DoBPC;              BPC[2] = 0;                                 BPC[3] = 0;
+
+    Adaptation[0] = Adaptation[1] = Adaptation[2] = Adaptation[3] = cmsSetAdaptationStateTHR(ContextID, -1);
+
+    if (!(dwFlags & (cmsFLAGS_SOFTPROOFING|cmsFLAGS_GAMUTCHECK)))
+        return cmsCreateTransformTHR(ContextID, InputProfile, InputFormat, OutputProfile, OutputFormat, nIntent, dwFlags);
+
+    return cmsCreateExtendedTransform(ContextID, 4, hArray, BPC, Intents, Adaptation,
+                                        ProofingProfile, 1, InputFormat, OutputFormat, dwFlags);
+
+}
+
+
+cmsHTRANSFORM CMSEXPORT cmsCreateProofingTransform(cmsHPROFILE InputProfile,
+                                                   cmsUInt32Number InputFormat,
+                                                   cmsHPROFILE OutputProfile,
+                                                   cmsUInt32Number OutputFormat,
+                                                   cmsHPROFILE ProofingProfile,
+                                                   cmsUInt32Number nIntent,
+                                                   cmsUInt32Number ProofingIntent,
+                                                   cmsUInt32Number dwFlags)
+{
+    return cmsCreateProofingTransformTHR(cmsGetProfileContextID(InputProfile),
+                                                   InputProfile,
+                                                   InputFormat,
+                                                   OutputProfile,
+                                                   OutputFormat,
+                                                   ProofingProfile,
+                                                   nIntent,
+                                                   ProofingIntent,
+                                                   dwFlags);
+}
+
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+cmsContext CMSEXPORT cmsGetTransformContextID(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+
+    if (xform == NULL) return NULL;
+    return xform -> ContextID;
+}
+
+// Grab the input/output formats
+cmsUInt32Number CMSEXPORT cmsGetTransformInputFormat(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+
+    if (xform == NULL) return 0;
+    return xform->InputFormat;
+}
+
+cmsUInt32Number CMSEXPORT cmsGetTransformOutputFormat(cmsHTRANSFORM hTransform)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+
+    if (xform == NULL) return 0;
+    return xform->OutputFormat;
+}
+
+// For backwards compatibility
+cmsBool CMSEXPORT cmsChangeBuffersFormat(cmsHTRANSFORM hTransform,
+                                         cmsUInt32Number InputFormat,
+                                         cmsUInt32Number OutputFormat)
+{
+    _cmsTRANSFORM* xform = (_cmsTRANSFORM*) hTransform;
+    cmsFormatter16 FromInput, ToOutput;
+
+
+    // We only can afford to change formatters if previous transform is at least 16 bits
+    if (!(xform ->dwOriginalFlags & cmsFLAGS_CAN_CHANGE_FORMATTER)) {
+
+        cmsSignalError(xform ->ContextID, cmsERROR_NOT_SUITABLE, "cmsChangeBuffersFormat works only on transforms created originally with at least 16 bits of precision");
+        return FALSE;
+    }
+
+    FromInput = _cmsGetFormatter(xform->ContextID, InputFormat,  cmsFormatterInput, CMS_PACK_FLAGS_16BITS).Fmt16;
+    ToOutput  = _cmsGetFormatter(xform->ContextID, OutputFormat, cmsFormatterOutput, CMS_PACK_FLAGS_16BITS).Fmt16;
+
+    if (FromInput == NULL || ToOutput == NULL) {
+
+        cmsSignalError(xform -> ContextID, cmsERROR_UNKNOWN_EXTENSION, "Unsupported raster format");
+        return FALSE;
+    }
+
+    xform ->InputFormat  = InputFormat;
+    xform ->OutputFormat = OutputFormat;
+    xform ->FromInput    = FromInput;
+    xform ->ToOutput     = ToOutput;
+    return TRUE;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/lcms2.def b/codec/L2/demos/pikEnc/host/third_party/lcms/src/lcms2.def
new file mode 100755
index 0000000000..8d9e0b548b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/lcms2.def
@@ -0,0 +1,342 @@
+LIBRARY     LCMS2.DLL
+
+EXPORTS
+
+_cms15Fixed16toDouble                    =   _cms15Fixed16toDouble
+_cms8Fixed8toDouble                      =   _cms8Fixed8toDouble
+cmsAdaptToIlluminant                     =    cmsAdaptToIlluminant
+_cmsAdjustEndianess16                    =   _cmsAdjustEndianess16
+_cmsAdjustEndianess32                    =   _cmsAdjustEndianess32
+_cmsAdjustEndianess64                    =   _cmsAdjustEndianess64
+cmsAllocNamedColorList                   =   cmsAllocNamedColorList
+cmsAllocProfileSequenceDescription       =   cmsAllocProfileSequenceDescription
+cmsAppendNamedColor                      =   cmsAppendNamedColor
+cmsBFDdeltaE                             =   cmsBFDdeltaE
+cmsBuildGamma                            =   cmsBuildGamma
+cmsBuildParametricToneCurve              =   cmsBuildParametricToneCurve
+cmsBuildSegmentedToneCurve               =   cmsBuildSegmentedToneCurve
+cmsBuildTabulatedToneCurve16             =   cmsBuildTabulatedToneCurve16
+cmsBuildTabulatedToneCurveFloat          =   cmsBuildTabulatedToneCurveFloat
+_cmsCalloc                               =   _cmsCalloc
+cmsChannelsOf                            =    cmsChannelsOf
+cmsCIE2000DeltaE                         =    cmsCIE2000DeltaE
+cmsCIE94DeltaE                           =    cmsCIE94DeltaE
+cmsCIECAM02Done                          =    cmsCIECAM02Done
+cmsCIECAM02Forward                       =    cmsCIECAM02Forward
+cmsCIECAM02Init                          =    cmsCIECAM02Init
+cmsCIECAM02Reverse                       =    cmsCIECAM02Reverse
+cmsCloseIOhandler                        =    cmsCloseIOhandler
+cmsCloseProfile                          =    cmsCloseProfile
+cmsCMCdeltaE                             =    cmsCMCdeltaE
+cmsCreate_sRGBProfile                    =    cmsCreate_sRGBProfile
+cmsCreate_sRGBProfileTHR                 =    cmsCreate_sRGBProfileTHR
+cmsCreateBCHSWabstractProfile            =    cmsCreateBCHSWabstractProfile
+cmsCreateBCHSWabstractProfileTHR         =    cmsCreateBCHSWabstractProfileTHR
+cmsCreateExtendedTransform               =    cmsCreateExtendedTransform
+cmsCreateGrayProfile                     =    cmsCreateGrayProfile
+cmsCreateGrayProfileTHR                  =    cmsCreateGrayProfileTHR
+cmsCreateInkLimitingDeviceLink           =    cmsCreateInkLimitingDeviceLink
+cmsCreateInkLimitingDeviceLinkTHR        =    cmsCreateInkLimitingDeviceLinkTHR
+cmsCreateLab2Profile                     =    cmsCreateLab2Profile
+cmsCreateLab2ProfileTHR                  =    cmsCreateLab2ProfileTHR
+cmsCreateLab4Profile                     =    cmsCreateLab4Profile
+cmsCreateLab4ProfileTHR                  =    cmsCreateLab4ProfileTHR
+cmsCreateLinearizationDeviceLink         =    cmsCreateLinearizationDeviceLink
+cmsCreateLinearizationDeviceLinkTHR      =    cmsCreateLinearizationDeviceLinkTHR
+cmsCreateMultiprofileTransform           =    cmsCreateMultiprofileTransform
+cmsCreateMultiprofileTransformTHR        =    cmsCreateMultiprofileTransformTHR
+cmsCreateNULLProfile                     =    cmsCreateNULLProfile
+cmsCreateNULLProfileTHR                  =    cmsCreateNULLProfileTHR
+cmsCreateProfilePlaceholder              =    cmsCreateProfilePlaceholder
+cmsCreateProofingTransform               =    cmsCreateProofingTransform
+cmsCreateProofingTransformTHR            =    cmsCreateProofingTransformTHR
+cmsCreateRGBProfile                      =    cmsCreateRGBProfile
+cmsCreateRGBProfileTHR                   =    cmsCreateRGBProfileTHR
+cmsCreateTransform                       =    cmsCreateTransform
+cmsCreateTransformTHR                    =    cmsCreateTransformTHR
+cmsCreateXYZProfile                      =    cmsCreateXYZProfile
+cmsCreateXYZProfileTHR                   =    cmsCreateXYZProfileTHR
+cmsD50_xyY                               =    cmsD50_xyY
+cmsD50_XYZ                               =    cmsD50_XYZ
+_cmsDecodeDateTimeNumber                 =    _cmsDecodeDateTimeNumber
+_cmsDefaultICCintents                    =    _cmsDefaultICCintents
+cmsDeleteTransform                       =    cmsDeleteTransform
+cmsDeltaE                                =    cmsDeltaE
+cmsDetectBlackPoint                      =    cmsDetectBlackPoint
+cmsDetectDestinationBlackPoint           =    cmsDetectDestinationBlackPoint
+cmsDetectTAC                             =    cmsDetectTAC
+cmsDesaturateLab                         =    cmsDesaturateLab
+cmsDoTransform                           =    cmsDoTransform
+cmsDoTransformStride                     =    cmsDoTransformStride
+_cmsDoubleTo15Fixed16                    =    _cmsDoubleTo15Fixed16
+_cmsDoubleTo8Fixed8                      =    _cmsDoubleTo8Fixed8
+_cmsDupMem                               =    _cmsDupMem
+cmsDupNamedColorList                     =    cmsDupNamedColorList
+cmsDupProfileSequenceDescription         =    cmsDupProfileSequenceDescription
+cmsDupToneCurve                          =    cmsDupToneCurve
+_cmsEncodeDateTimeNumber                 =    _cmsEncodeDateTimeNumber
+cmsEstimateGamma                         =    cmsEstimateGamma
+cmsGetToneCurveEstimatedTableEntries     =    cmsGetToneCurveEstimatedTableEntries
+cmsGetToneCurveEstimatedTable            =    cmsGetToneCurveEstimatedTable
+cmsEvalToneCurve16                       =    cmsEvalToneCurve16
+cmsEvalToneCurveFloat                    =    cmsEvalToneCurveFloat
+cmsfilelength                            =    cmsfilelength
+cmsFloat2LabEncoded                      =    cmsFloat2LabEncoded
+cmsFloat2LabEncodedV2                    =    cmsFloat2LabEncodedV2
+cmsFloat2XYZEncoded                      =    cmsFloat2XYZEncoded
+cmsFormatterForColorspaceOfProfile       =    cmsFormatterForColorspaceOfProfile
+cmsFormatterForPCSOfProfile              =    cmsFormatterForPCSOfProfile
+_cmsFree                                 =    _cmsFree
+cmsFreeNamedColorList                    =    cmsFreeNamedColorList
+cmsFreeProfileSequenceDescription        =    cmsFreeProfileSequenceDescription
+cmsFreeToneCurve                         =    cmsFreeToneCurve
+cmsFreeToneCurveTriple                   =    cmsFreeToneCurveTriple
+cmsGBDAlloc                              =    cmsGBDAlloc
+cmsGBDFree                               =    cmsGBDFree
+cmsGDBAddPoint                           =    cmsGDBAddPoint
+cmsGDBCheckPoint                         =    cmsGDBCheckPoint
+cmsGDBCompute                            =    cmsGDBCompute
+cmsGetAlarmCodes                         =    cmsGetAlarmCodes
+cmsGetColorSpace                         =    cmsGetColorSpace
+cmsGetDeviceClass                        =    cmsGetDeviceClass
+cmsGetEncodedICCversion                  =    cmsGetEncodedICCversion
+cmsGetHeaderAttributes                   =    cmsGetHeaderAttributes
+cmsGetHeaderCreationDateTime             =    cmsGetHeaderCreationDateTime
+cmsGetHeaderFlags                        =    cmsGetHeaderFlags
+cmsGetHeaderManufacturer                 =    cmsGetHeaderManufacturer
+cmsGetHeaderModel                        =    cmsGetHeaderModel
+cmsGetHeaderProfileID                    =    cmsGetHeaderProfileID
+cmsGetHeaderRenderingIntent              =    cmsGetHeaderRenderingIntent
+cmsGetNamedColorList                     =    cmsGetNamedColorList
+cmsGetPCS                                =    cmsGetPCS
+cmsGetPostScriptColorResource            =    cmsGetPostScriptColorResource
+cmsGetPostScriptCRD                      =    cmsGetPostScriptCRD
+cmsGetPostScriptCSA                      =    cmsGetPostScriptCSA
+cmsGetProfileInfo                        =    cmsGetProfileInfo
+cmsGetProfileInfoASCII                   =    cmsGetProfileInfoASCII
+cmsGetProfileContextID                   =    cmsGetProfileContextID
+cmsGetProfileVersion                     =    cmsGetProfileVersion
+cmsGetSupportedIntents                   =    cmsGetSupportedIntents
+cmsGetTagCount                           =    cmsGetTagCount
+cmsGetTagSignature                       =    cmsGetTagSignature
+cmsGetTransformContextID                 =    cmsGetTransformContextID
+_cmsICCcolorSpace                        =    _cmsICCcolorSpace
+_cmsIOPrintf                             =    _cmsIOPrintf
+cmsIsCLUT                                =    cmsIsCLUT
+cmsIsIntentSupported                     =    cmsIsIntentSupported
+cmsIsMatrixShaper                        =    cmsIsMatrixShaper
+cmsIsTag                                 =    cmsIsTag
+cmsIsToneCurveDescending                 =    cmsIsToneCurveDescending
+cmsIsToneCurveLinear                     =    cmsIsToneCurveLinear
+cmsIsToneCurveMonotonic                  =    cmsIsToneCurveMonotonic
+cmsIsToneCurveMultisegment               =    cmsIsToneCurveMultisegment
+cmsGetToneCurveParametricType            =    cmsGetToneCurveParametricType
+cmsIT8Alloc                              =    cmsIT8Alloc
+cmsIT8DefineDblFormat                    =    cmsIT8DefineDblFormat
+cmsIT8EnumDataFormat                     =    cmsIT8EnumDataFormat
+cmsIT8EnumProperties                     =    cmsIT8EnumProperties
+cmsIT8EnumPropertyMulti                  =    cmsIT8EnumPropertyMulti
+cmsIT8Free                               =    cmsIT8Free
+cmsIT8GetData                            =    cmsIT8GetData
+cmsIT8GetDataDbl                         =    cmsIT8GetDataDbl
+cmsIT8FindDataFormat                     =    cmsIT8FindDataFormat
+cmsIT8GetDataRowCol                      =    cmsIT8GetDataRowCol
+cmsIT8GetDataRowColDbl                   =    cmsIT8GetDataRowColDbl
+cmsIT8GetPatchName                       =    cmsIT8GetPatchName
+cmsIT8GetPatchByName                     =    cmsIT8GetPatchByName
+cmsIT8GetProperty                        =    cmsIT8GetProperty
+cmsIT8GetPropertyDbl                     =    cmsIT8GetPropertyDbl
+cmsIT8GetPropertyMulti                   =    cmsIT8GetPropertyMulti
+cmsIT8GetSheetType                       =    cmsIT8GetSheetType
+cmsIT8LoadFromFile                       =    cmsIT8LoadFromFile
+cmsIT8LoadFromMem                        =    cmsIT8LoadFromMem
+cmsIT8SaveToFile                         =    cmsIT8SaveToFile
+cmsIT8SaveToMem                          =    cmsIT8SaveToMem
+cmsIT8SetComment                         =    cmsIT8SetComment
+cmsIT8SetData                            =    cmsIT8SetData
+cmsIT8SetDataDbl                         =    cmsIT8SetDataDbl
+cmsIT8SetDataFormat                      =    cmsIT8SetDataFormat
+cmsIT8SetDataRowCol                      =    cmsIT8SetDataRowCol
+cmsIT8SetDataRowColDbl                   =    cmsIT8SetDataRowColDbl
+cmsIT8SetPropertyDbl                     =    cmsIT8SetPropertyDbl
+cmsIT8SetPropertyHex                     =    cmsIT8SetPropertyHex
+cmsIT8SetPropertyStr                     =    cmsIT8SetPropertyStr
+cmsIT8SetPropertyMulti                   =    cmsIT8SetPropertyMulti
+cmsIT8SetPropertyUncooked                =    cmsIT8SetPropertyUncooked
+cmsIT8SetSheetType                       =    cmsIT8SetSheetType
+cmsIT8SetTable                           =    cmsIT8SetTable
+cmsIT8SetTableByLabel                    =    cmsIT8SetTableByLabel
+cmsIT8SetIndexColumn                     =    cmsIT8SetIndexColumn
+cmsIT8TableCount                         =    cmsIT8TableCount
+cmsJoinToneCurve                         =    cmsJoinToneCurve
+cmsLab2LCh                               =    cmsLab2LCh
+cmsLab2XYZ                               =    cmsLab2XYZ
+cmsLabEncoded2Float                      =    cmsLabEncoded2Float
+cmsLabEncoded2FloatV2                    =    cmsLabEncoded2FloatV2
+cmsLCh2Lab                               =    cmsLCh2Lab
+_cmsLCMScolorSpace                       =    _cmsLCMScolorSpace
+cmsLinkTag                               =    cmsLinkTag
+cmsTagLinkedTo                           =    cmsTagLinkedTo
+cmsPipelineAlloc                         =    cmsPipelineAlloc
+cmsPipelineCat                           =    cmsPipelineCat
+cmsPipelineCheckAndRetreiveStages        =    cmsPipelineCheckAndRetreiveStages
+cmsPipelineDup                           =    cmsPipelineDup
+cmsPipelineStageCount                    =    cmsPipelineStageCount
+cmsPipelineEval16                        =    cmsPipelineEval16
+cmsPipelineEvalFloat                     =    cmsPipelineEvalFloat
+cmsPipelineEvalReverseFloat              =    cmsPipelineEvalReverseFloat
+cmsPipelineFree                          =    cmsPipelineFree
+cmsPipelineGetPtrToFirstStage            =    cmsPipelineGetPtrToFirstStage
+cmsPipelineGetPtrToLastStage             =    cmsPipelineGetPtrToLastStage
+cmsPipelineInputChannels                 =    cmsPipelineInputChannels
+cmsPipelineInsertStage                   =    cmsPipelineInsertStage
+cmsPipelineOutputChannels                =    cmsPipelineOutputChannels
+cmsPipelineSetSaveAs8bitsFlag            =    cmsPipelineSetSaveAs8bitsFlag
+_cmsPipelineSetOptimizationParameters    =    _cmsPipelineSetOptimizationParameters
+cmsPipelineUnlinkStage                   =    cmsPipelineUnlinkStage
+_cmsMalloc                               =    _cmsMalloc
+_cmsMallocZero                           =    _cmsMallocZero
+_cmsMAT3eval                             =    _cmsMAT3eval
+_cmsMAT3identity                         =    _cmsMAT3identity
+_cmsMAT3inverse                          =    _cmsMAT3inverse
+_cmsMAT3isIdentity                       =    _cmsMAT3isIdentity
+_cmsMAT3per                              =    _cmsMAT3per
+_cmsMAT3solve                            =    _cmsMAT3solve
+cmsMLUalloc                              =    cmsMLUalloc
+cmsMLUdup                                =    cmsMLUdup
+cmsMLUfree                               =    cmsMLUfree
+cmsMLUgetASCII                           =    cmsMLUgetASCII
+cmsMLUgetTranslation                     =    cmsMLUgetTranslation
+cmsMLUgetWide                            =    cmsMLUgetWide
+cmsMLUsetASCII                           =    cmsMLUsetASCII
+cmsMLUsetWide                            =    cmsMLUsetWide
+cmsStageAllocCLut16bit                   =    cmsStageAllocCLut16bit
+cmsStageAllocCLut16bitGranular           =    cmsStageAllocCLut16bitGranular
+cmsStageAllocCLutFloat                   =    cmsStageAllocCLutFloat
+cmsStageAllocCLutFloatGranular           =    cmsStageAllocCLutFloatGranular
+cmsStageAllocToneCurves                  =    cmsStageAllocToneCurves
+cmsStageAllocIdentity                    =    cmsStageAllocIdentity
+cmsStageAllocMatrix                      =    cmsStageAllocMatrix
+_cmsStageAllocPlaceholder                =    _cmsStageAllocPlaceholder
+cmsStageDup                              =    cmsStageDup
+cmsStageFree                             =    cmsStageFree
+cmsStageNext                             =    cmsStageNext
+cmsStageInputChannels                    =    cmsStageInputChannels
+cmsStageOutputChannels                   =    cmsStageOutputChannels
+cmsStageSampleCLut16bit                  =    cmsStageSampleCLut16bit
+cmsStageSampleCLutFloat                  =    cmsStageSampleCLutFloat
+cmsStageType                             =    cmsStageType
+cmsStageData                             =    cmsStageData
+cmsNamedColorCount                       =    cmsNamedColorCount
+cmsNamedColorIndex                       =    cmsNamedColorIndex
+cmsNamedColorInfo                        =    cmsNamedColorInfo
+cmsOpenIOhandlerFromFile                 =    cmsOpenIOhandlerFromFile
+cmsOpenIOhandlerFromMem                  =    cmsOpenIOhandlerFromMem
+cmsOpenIOhandlerFromNULL                 =    cmsOpenIOhandlerFromNULL
+cmsOpenIOhandlerFromStream               =    cmsOpenIOhandlerFromStream
+cmsOpenProfileFromFile                   =    cmsOpenProfileFromFile
+cmsOpenProfileFromFileTHR                =    cmsOpenProfileFromFileTHR
+cmsOpenProfileFromIOhandlerTHR           =    cmsOpenProfileFromIOhandlerTHR
+cmsOpenProfileFromMem                    =    cmsOpenProfileFromMem
+cmsOpenProfileFromMemTHR                 =    cmsOpenProfileFromMemTHR
+cmsOpenProfileFromStream                 =    cmsOpenProfileFromStream
+cmsOpenProfileFromStreamTHR              =    cmsOpenProfileFromStreamTHR
+cmsPlugin                                =    cmsPlugin
+_cmsRead15Fixed16Number                  =    _cmsRead15Fixed16Number
+_cmsReadAlignment                        =    _cmsReadAlignment
+_cmsReadFloat32Number                    =    _cmsReadFloat32Number
+cmsReadRawTag                            =    cmsReadRawTag
+cmsReadTag                               =    cmsReadTag
+_cmsReadTypeBase                         =    _cmsReadTypeBase
+_cmsReadUInt16Array                      =    _cmsReadUInt16Array
+_cmsReadUInt16Number                     =    _cmsReadUInt16Number
+_cmsReadUInt32Number                     =    _cmsReadUInt32Number
+_cmsReadUInt64Number                     =    _cmsReadUInt64Number
+_cmsReadUInt8Number                      =    _cmsReadUInt8Number
+_cmsReadXYZNumber                        =    _cmsReadXYZNumber
+_cmsRealloc                              =    _cmsRealloc
+cmsReverseToneCurve                      =    cmsReverseToneCurve
+cmsReverseToneCurveEx                    =    cmsReverseToneCurveEx
+cmsSaveProfileToFile                     =    cmsSaveProfileToFile
+cmsSaveProfileToIOhandler                =    cmsSaveProfileToIOhandler
+cmsSaveProfileToMem                      =    cmsSaveProfileToMem
+cmsSaveProfileToStream                   =    cmsSaveProfileToStream
+cmsSetAdaptationState                    =    cmsSetAdaptationState
+cmsSetAlarmCodes                         =    cmsSetAlarmCodes
+cmsSetColorSpace                         =    cmsSetColorSpace
+cmsSetDeviceClass                        =    cmsSetDeviceClass
+cmsSetEncodedICCversion                  =    cmsSetEncodedICCversion
+cmsSetHeaderAttributes                   =    cmsSetHeaderAttributes
+cmsSetHeaderFlags                        =    cmsSetHeaderFlags
+cmsSetHeaderManufacturer                 =    cmsSetHeaderManufacturer
+cmsSetHeaderModel                        =    cmsSetHeaderModel
+cmsSetHeaderProfileID                    =    cmsSetHeaderProfileID
+cmsSetHeaderRenderingIntent              =    cmsSetHeaderRenderingIntent
+cmsSetLogErrorHandler                    =    cmsSetLogErrorHandler
+cmsSetPCS                                =    cmsSetPCS
+cmsSetProfileVersion                     =    cmsSetProfileVersion
+cmsSignalError                           =    cmsSignalError
+cmsSmoothToneCurve                       =    cmsSmoothToneCurve
+cmsstrcasecmp                            =    cmsstrcasecmp
+cmsTempFromWhitePoint                    =    cmsTempFromWhitePoint
+cmsTransform2DeviceLink                  =    cmsTransform2DeviceLink
+cmsUnregisterPlugins                     =    cmsUnregisterPlugins
+_cmsVEC3cross                            =    _cmsVEC3cross
+_cmsVEC3distance                         =    _cmsVEC3distance
+_cmsVEC3dot                              =    _cmsVEC3dot
+_cmsVEC3init                             =    _cmsVEC3init
+_cmsVEC3length                           =    _cmsVEC3length
+_cmsVEC3minus                            =    _cmsVEC3minus
+cmsWhitePointFromTemp                    =    cmsWhitePointFromTemp
+_cmsWrite15Fixed16Number                 =    _cmsWrite15Fixed16Number
+_cmsWriteAlignment                       =    _cmsWriteAlignment
+_cmsWriteFloat32Number                   =    _cmsWriteFloat32Number
+cmsWriteRawTag                           =    cmsWriteRawTag
+cmsWriteTag                              =    cmsWriteTag
+_cmsWriteTypeBase                        =    _cmsWriteTypeBase
+_cmsWriteUInt16Array                     =    _cmsWriteUInt16Array
+_cmsWriteUInt16Number                    =    _cmsWriteUInt16Number
+_cmsWriteUInt32Number                    =    _cmsWriteUInt32Number
+_cmsWriteUInt64Number                    =    _cmsWriteUInt64Number
+_cmsWriteUInt8Number                     =    _cmsWriteUInt8Number
+_cmsWriteXYZNumber                       =    _cmsWriteXYZNumber
+cmsxyY2XYZ                               =   cmsxyY2XYZ
+cmsXYZ2Lab                               =   cmsXYZ2Lab
+cmsXYZ2xyY                               =   cmsXYZ2xyY
+cmsXYZEncoded2Float                      =   cmsXYZEncoded2Float
+cmsSliceSpace16                          =   cmsSliceSpace16
+cmsSliceSpaceFloat                       =   cmsSliceSpaceFloat
+cmsChangeBuffersFormat                   =   cmsChangeBuffersFormat
+cmsDictAlloc                             =   cmsDictAlloc
+cmsDictFree                              =   cmsDictFree
+cmsDictDup                               =   cmsDictDup
+cmsDictAddEntry                          =   cmsDictAddEntry
+cmsDictGetEntryList                      =   cmsDictGetEntryList
+cmsDictNextEntry                         =   cmsDictNextEntry
+_cmsGetTransformUserData                 =   _cmsGetTransformUserData
+_cmsSetTransformUserData                 =   _cmsSetTransformUserData
+_cmsGetTransformFormatters16             =   _cmsGetTransformFormatters16
+_cmsGetTransformFormattersFloat          =   _cmsGetTransformFormattersFloat
+cmsGetHeaderCreator                      =   cmsGetHeaderCreator
+cmsPluginTHR                             =   cmsPluginTHR
+cmsGetPipelineContextID                  =   cmsGetPipelineContextID
+cmsGetTransformInputFormat               =   cmsGetTransformInputFormat
+cmsGetTransformOutputFormat              =   cmsGetTransformOutputFormat
+cmsCreateContext                         =   cmsCreateContext            
+cmsDupContext                            =   cmsDupContext               
+cmsDeleteContext                         =   cmsDeleteContext              
+cmsGetContextUserData                    =   cmsGetContextUserData       
+cmsUnregisterPluginsTHR                  =   cmsUnregisterPluginsTHR 
+cmsSetAlarmCodesTHR                      =   cmsSetAlarmCodesTHR     
+cmsGetAlarmCodesTHR                      =   cmsGetAlarmCodesTHR
+cmsSetAdaptationStateTHR                 =   cmsSetAdaptationStateTHR
+cmsSetLogErrorHandlerTHR                 =   cmsSetLogErrorHandlerTHR
+cmsGetSupportedIntentsTHR                =   cmsGetSupportedIntentsTHR
+cmsMLUtranslationsCount                  =   cmsMLUtranslationsCount
+cmsMLUtranslationsCodes                  =   cmsMLUtranslationsCodes
+_cmsCreateMutex                          =   _cmsCreateMutex 
+_cmsDestroyMutex                         =   _cmsDestroyMutex
+_cmsLockMutex                            =   _cmsLockMutex   
+_cmsUnlockMutex                          =   _cmsUnlockMutex 
+cmsGetProfileIOhandler                   =   cmsGetProfileIOhandler
+cmsGetEncodedCMMversion                  =   cmsGetEncodedCMMversion
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/src/lcms2_internal.h b/codec/L2/demos/pikEnc/host/third_party/lcms/src/lcms2_internal.h
new file mode 100755
index 0000000000..9d678570fe
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/src/lcms2_internal.h
@@ -0,0 +1,1109 @@
+
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#ifndef _lcms_internal_H
+
+// Include plug-in foundation
+#ifndef _lcms_plugin_H
+#   include "lcms2_plugin.h"
+#endif
+
+// ctype is part of C99 as per 7.1.2
+#include <ctype.h>
+
+// assert macro is part of C99 as per 7.2
+#include <assert.h>
+
+// Some needed constants
+#ifndef M_PI
+#       define M_PI        3.14159265358979323846
+#endif
+
+#ifndef M_LOG10E
+#       define M_LOG10E    0.434294481903251827651
+#endif
+
+// BorlandC 5.5, VC2003 are broken on that
+#if defined(__BORLANDC__) || (_MSC_VER < 1400) // 1400 == VC++ 8.0
+#define sinf(x) (float)sin((float)x)
+#define sqrtf(x) (float)sqrt((float)x)
+#endif
+
+
+// Alignment of ICC file format uses 4 bytes (cmsUInt32Number)
+#define _cmsALIGNLONG(x) (((x)+(sizeof(cmsUInt32Number)-1)) & ~(sizeof(cmsUInt32Number)-1))
+
+// Alignment to memory pointer
+
+// (Ultra)SPARC with gcc requires ptr alignment of 8 bytes
+// even though sizeof(void *) is only four: for greatest flexibility
+// allow the build to specify ptr alignment.
+#ifndef CMS_PTR_ALIGNMENT
+# define CMS_PTR_ALIGNMENT sizeof(void *)
+#endif
+
+#define _cmsALIGNMEM(x)  (((x)+(CMS_PTR_ALIGNMENT - 1)) & ~(CMS_PTR_ALIGNMENT - 1))
+
+// Maximum encodeable values in floating point
+#define MAX_ENCODEABLE_XYZ  (1.0 + 32767.0/32768.0)
+#define MIN_ENCODEABLE_ab2  (-128.0)
+#define MAX_ENCODEABLE_ab2  ((65535.0/256.0) - 128.0)
+#define MIN_ENCODEABLE_ab4  (-128.0)
+#define MAX_ENCODEABLE_ab4  (127.0)
+
+// Maximum of channels for internal pipeline evaluation
+#define MAX_STAGE_CHANNELS  128
+
+// Unused parameter warning suppression
+#define cmsUNUSED_PARAMETER(x) ((void)x)
+
+// The specification for "inline" is section 6.7.4 of the C99 standard (ISO/IEC 9899:1999).
+// unfortunately VisualC++ does not conform that
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+#   define cmsINLINE __inline
+#else
+#   define cmsINLINE static inline
+#endif
+
+// Other replacement functions
+#ifdef _MSC_VER
+# ifndef snprintf
+#       define snprintf  _snprintf
+# endif
+# ifndef vsnprintf
+#       define vsnprintf  _vsnprintf
+# endif
+
+/// Properly define some macros to accommodate
+/// older MSVC versions.
+# if _MSC_VER <= 1700
+        #include <float.h>
+        #define isnan _isnan
+        #define isinf(x) (!_finite((x)))
+# endif
+
+#endif
+
+// A fast way to convert from/to 16 <-> 8 bits
+#define FROM_8_TO_16(rgb) (cmsUInt16Number) ((((cmsUInt16Number) (rgb)) << 8)|(rgb))
+#define FROM_16_TO_8(rgb) (cmsUInt8Number) ((((cmsUInt32Number)(rgb) * 65281U + 8388608U) >> 24) & 0xFFU)
+
+// Code analysis is broken on asserts
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1500)
+#            define _cmsAssert(a)  { assert((a)); __analysis_assume((a)); }
+#     else
+#            define _cmsAssert(a)   assert((a))
+#     endif
+#else
+#      define _cmsAssert(a)   assert((a))
+#endif
+
+//---------------------------------------------------------------------------------
+
+// Determinant lower than that are assumed zero (used on matrix invert)
+#define MATRIX_DET_TOLERANCE    0.0001
+
+//---------------------------------------------------------------------------------
+
+// Fixed point
+#define FIXED_TO_INT(x)         ((x)>>16)
+#define FIXED_REST_TO_INT(x)    ((x)&0xFFFFU)
+#define ROUND_FIXED_TO_INT(x)   (((x)+0x8000)>>16)
+
+cmsINLINE cmsS15Fixed16Number _cmsToFixedDomain(int a)                   { return a + ((a + 0x7fff) / 0xffff); }
+cmsINLINE int                 _cmsFromFixedDomain(cmsS15Fixed16Number a) { return a - ((a + 0x7fff) >> 16); }
+
+// -----------------------------------------------------------------------------------------------------------
+
+// Fast floor conversion logic. Thanks to Sree Kotay and Stuart Nixon
+// note than this only works in the range ..-32767...+32767 because
+// mantissa is interpreted as 15.16 fixed point.
+// The union is to avoid pointer aliasing overoptimization.
+cmsINLINE int _cmsQuickFloor(cmsFloat64Number val)
+{
+#ifdef CMS_DONT_USE_FAST_FLOOR
+    return (int) floor(val);
+#else
+    const cmsFloat64Number _lcms_double2fixmagic = 68719476736.0 * 1.5;  // 2^36 * 1.5, (52-16=36) uses limited precision to floor
+    union {
+        cmsFloat64Number val;
+        int halves[2];
+    } temp;
+
+    temp.val = val + _lcms_double2fixmagic;
+
+#ifdef CMS_USE_BIG_ENDIAN
+    return temp.halves[1] >> 16;
+#else
+    return temp.halves[0] >> 16;
+#endif
+#endif
+}
+
+// Fast floor restricted to 0..65535.0
+cmsINLINE cmsUInt16Number _cmsQuickFloorWord(cmsFloat64Number d)
+{
+    return (cmsUInt16Number) _cmsQuickFloor(d - 32767.0) + 32767U;
+}
+
+// Floor to word, taking care of saturation
+cmsINLINE cmsUInt16Number _cmsQuickSaturateWord(cmsFloat64Number d)
+{
+    d += 0.5;
+    if (d <= 0) return 0;
+    if (d >= 65535.0) return 0xffff;
+
+    return _cmsQuickFloorWord(d);
+}
+
+
+// Pthread support --------------------------------------------------------------------
+#ifndef CMS_NO_PTHREADS
+
+// This is the threading support. Unfortunately, it has to be platform-dependent because 
+// windows does not support pthreads. 
+
+#ifdef CMS_IS_WINDOWS_
+
+#define WIN32_LEAN_AND_MEAN 1
+#include <windows.h>
+
+
+// The locking scheme in LCMS requires a single 'top level' mutex
+// to work. This is actually implemented on Windows as a
+// CriticalSection, because they are lighter weight. With
+// pthreads, this is statically inited. Unfortunately, windows
+// can't officially statically init critical sections.
+//
+// We can work around this in 2 ways.
+//
+// 1) We can use a proper mutex purely to protect the init
+// of the CriticalSection. This in turns requires us to protect
+// the Mutex creation, which we can do using the snappily
+// named InterlockedCompareExchangePointer API (present on
+// windows XP and above).
+//
+// 2) In cases where we want to work on pre-Windows XP, we
+// can use an even more horrible hack described below.
+//
+// So why wouldn't we always use 2)? Because not calling
+// the init function for a critical section means it fails
+// testing with ApplicationVerifier (and presumably similar
+// tools).
+//
+// We therefore default to 1, and people who want to be able
+// to run on pre-Windows XP boxes can build with:
+//     CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+// defined. This is automatically set for builds using
+// versions of MSVC that don't have this API available.
+//
+// From: http://locklessinc.com/articles/pthreads_on_windows/
+// The pthreads API has an initialization macro that has no correspondence to anything in 
+// the windows API. By investigating the internal definition of the critical section type, 
+// one may work out how to initialize one without calling InitializeCriticalSection(). 
+// The trick here is that InitializeCriticalSection() is not allowed to fail. It tries 
+// to allocate a critical section debug object, but if no memory is available, it sets 
+// the pointer to a specific value. (One would expect that value to be NULL, but it is 
+// actually (void *)-1 for some reason.) Thus we can use this special value for that 
+// pointer, and the critical section code will work.
+
+// The other important part of the critical section type to initialize is the number 
+// of waiters. This controls whether or not the mutex is locked. Fortunately, this 
+// part of the critical section is unlikely to change. Apparently, many programs 
+// already test critical sections to see if they are locked using this value, so 
+// Microsoft felt that it was necessary to keep it set at -1 for an unlocked critical
+// section, even when they changed the underlying algorithm to be more scalable. 
+// The final parts of the critical section object are unimportant, and can be set 
+// to zero for their defaults. This yields to an initialization macro:
+
+typedef CRITICAL_SECTION _cmsMutex;
+
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1800)
+#          pragma warning(disable : 26135)
+#    endif
+#endif
+
+#ifndef CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+// If we are building with a version of MSVC smaller
+// than 1400 (i.e. before VS2005) then we don't have
+// the InterlockedCompareExchangePointer API, so use
+// the old version.
+#    ifdef _MSC_VER
+#       if _MSC_VER < 1400
+#          define CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+#       endif
+#    endif
+#endif
+
+#ifdef CMS_RELY_ON_WINDOWS_STATIC_MUTEX_INIT
+#      define CMS_MUTEX_INITIALIZER {(PRTL_CRITICAL_SECTION_DEBUG) -1,-1,0,0,0,0}
+#else
+#      define CMS_MUTEX_INITIALIZER {(PRTL_CRITICAL_SECTION_DEBUG)NULL,-1,0,0,0,0}
+#endif
+
+cmsINLINE int _cmsLockPrimitive(_cmsMutex *m)
+{
+	EnterCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsUnlockPrimitive(_cmsMutex *m)
+{
+	LeaveCriticalSection(m);
+	return 0;
+}
+	
+cmsINLINE int _cmsInitMutexPrimitive(_cmsMutex *m)
+{
+	InitializeCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsDestroyMutexPrimitive(_cmsMutex *m)
+{
+	DeleteCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsEnterCriticalSectionPrimitive(_cmsMutex *m)
+{
+	EnterCriticalSection(m);
+	return 0;
+}
+
+cmsINLINE int _cmsLeaveCriticalSectionPrimitive(_cmsMutex *m)
+{
+	LeaveCriticalSection(m);
+	return 0;
+}
+
+#else
+
+// Rest of the wide world
+#include <pthread.h>
+
+#define CMS_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+typedef pthread_mutex_t _cmsMutex;
+
+
+cmsINLINE int _cmsLockPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_lock(m);
+}
+
+cmsINLINE int _cmsUnlockPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_unlock(m);
+}
+	
+cmsINLINE int _cmsInitMutexPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_init(m, NULL);
+}
+
+cmsINLINE int _cmsDestroyMutexPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_destroy(m);
+}
+
+cmsINLINE int _cmsEnterCriticalSectionPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_lock(m);
+}
+
+cmsINLINE int _cmsLeaveCriticalSectionPrimitive(_cmsMutex *m)
+{
+	return pthread_mutex_unlock(m);
+}
+
+#endif
+#else
+
+#define CMS_MUTEX_INITIALIZER 0
+typedef int _cmsMutex;
+
+
+cmsINLINE int _cmsLockPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsUnlockPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+	
+cmsINLINE int _cmsInitMutexPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsDestroyMutexPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsEnterCriticalSectionPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+
+cmsINLINE int _cmsLeaveCriticalSectionPrimitive(_cmsMutex *m)
+{
+    cmsUNUSED_PARAMETER(m);
+	return 0;
+}
+#endif
+
+// Plug-In registration ---------------------------------------------------------------
+
+// Specialized function for plug-in memory management. No pairing free() since whole pool is freed at once.
+void* _cmsPluginMalloc(cmsContext ContextID, cmsUInt32Number size);
+
+// Memory management
+cmsBool   _cmsRegisterMemHandlerPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Interpolation
+cmsBool  _cmsRegisterInterpPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Parametric curves
+cmsBool  _cmsRegisterParametricCurvesPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Formatters management
+cmsBool  _cmsRegisterFormattersPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Tag type management
+cmsBool  _cmsRegisterTagTypePlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Tag management
+cmsBool  _cmsRegisterTagPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Intent management
+cmsBool  _cmsRegisterRenderingIntentPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Multi Process elements
+cmsBool  _cmsRegisterMultiProcessElementPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Optimization
+cmsBool  _cmsRegisterOptimizationPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Transform
+cmsBool  _cmsRegisterTransformPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// Mutex
+cmsBool _cmsRegisterMutexPlugin(cmsContext ContextID, cmsPluginBase* Plugin);
+
+// ---------------------------------------------------------------------------------------------------------
+
+// Suballocators. 
+typedef struct _cmsSubAllocator_chunk_st {
+
+    cmsUInt8Number* Block;
+    cmsUInt32Number BlockSize;
+    cmsUInt32Number Used;
+
+    struct _cmsSubAllocator_chunk_st* next;
+
+} _cmsSubAllocator_chunk;
+
+
+typedef struct {
+
+    cmsContext ContextID;
+    _cmsSubAllocator_chunk* h;
+
+} _cmsSubAllocator;
+
+
+_cmsSubAllocator* _cmsCreateSubAlloc(cmsContext ContextID, cmsUInt32Number Initial);
+void              _cmsSubAllocDestroy(_cmsSubAllocator* s);
+void*             _cmsSubAlloc(_cmsSubAllocator* s, cmsUInt32Number size);
+void*             _cmsSubAllocDup(_cmsSubAllocator* s, const void *ptr, cmsUInt32Number size);
+
+// ----------------------------------------------------------------------------------
+
+// The context clients. 
+typedef enum {
+
+    UserPtr,            // User-defined pointer
+    Logger,
+    AlarmCodesContext,
+    AdaptationStateContext, 
+    MemPlugin,
+    InterpPlugin,
+    CurvesPlugin,
+    FormattersPlugin,
+    TagTypePlugin,
+    TagPlugin,
+    IntentPlugin,
+    MPEPlugin,
+    OptimizationPlugin,
+    TransformPlugin,
+    MutexPlugin,
+
+    // Last in list
+    MemoryClientMax
+
+} _cmsMemoryClient;
+
+
+// Container for memory management plug-in.
+typedef struct {
+
+    _cmsMallocFnPtrType     MallocPtr;    
+    _cmsMalloZerocFnPtrType MallocZeroPtr;
+    _cmsFreeFnPtrType       FreePtr;
+    _cmsReallocFnPtrType    ReallocPtr;
+    _cmsCallocFnPtrType     CallocPtr;
+    _cmsDupFnPtrType        DupPtr;
+
+} _cmsMemPluginChunkType;
+
+// Copy memory management function pointers from plug-in to chunk, taking care of missing routines
+void  _cmsInstallAllocFunctions(cmsPluginMemHandler* Plugin, _cmsMemPluginChunkType* ptr);
+
+// Internal structure for context
+struct _cmsContext_struct {
+    
+    struct _cmsContext_struct* Next;  // Points to next context in the new style
+    _cmsSubAllocator* MemPool;        // The memory pool that stores context data
+    
+    void* chunks[MemoryClientMax];    // array of pointers to client chunks. Memory itself is hold in the suballocator. 
+                                      // If NULL, then it reverts to global Context0
+
+    _cmsMemPluginChunkType DefaultMemoryManager;  // The allocators used for creating the context itself. Cannot be overridden
+};
+
+// Returns a pointer to a valid context structure, including the global one if id is zero. 
+// Verifies the magic number.
+struct _cmsContext_struct* _cmsGetContext(cmsContext ContextID);
+
+// Returns the block assigned to the specific zone. 
+void*     _cmsContextGetClientChunk(cmsContext id, _cmsMemoryClient mc);
+
+
+// Chunks of context memory by plug-in client -------------------------------------------------------
+
+// Those structures encapsulates all variables needed by the several context clients (mostly plug-ins)
+
+// Container for error logger -- not a plug-in
+typedef struct {
+
+    cmsLogErrorHandlerFunction LogErrorHandler;  // Set to NULL for Context0 fallback
+
+} _cmsLogErrorChunkType;
+
+// The global Context0 storage for error logger
+extern  _cmsLogErrorChunkType  _cmsLogErrorChunk;
+
+// Allocate and init error logger container. 
+void _cmsAllocLogErrorChunk(struct _cmsContext_struct* ctx, 
+                            const struct _cmsContext_struct* src);
+
+// Container for alarm codes -- not a plug-in
+typedef struct {
+   
+    cmsUInt16Number AlarmCodes[cmsMAXCHANNELS];
+
+} _cmsAlarmCodesChunkType;
+
+// The global Context0 storage for alarm codes
+extern  _cmsAlarmCodesChunkType _cmsAlarmCodesChunk;
+
+// Allocate and init alarm codes container. 
+void _cmsAllocAlarmCodesChunk(struct _cmsContext_struct* ctx, 
+                            const struct _cmsContext_struct* src);
+
+// Container for adaptation state -- not a plug-in
+typedef struct {
+    
+    cmsFloat64Number  AdaptationState;
+
+} _cmsAdaptationStateChunkType;
+
+// The global Context0 storage for adaptation state
+extern  _cmsAdaptationStateChunkType    _cmsAdaptationStateChunk;
+
+// Allocate and init adaptation state container.
+void _cmsAllocAdaptationStateChunk(struct _cmsContext_struct* ctx, 
+                                   const struct _cmsContext_struct* src);
+
+
+// The global Context0 storage for memory management
+extern  _cmsMemPluginChunkType _cmsMemPluginChunk;
+
+// Allocate and init memory management container.
+void _cmsAllocMemPluginChunk(struct _cmsContext_struct* ctx, 
+                             const struct _cmsContext_struct* src);
+
+// Container for interpolation plug-in
+typedef struct {
+
+    cmsInterpFnFactory Interpolators;
+
+} _cmsInterpPluginChunkType;
+
+// The global Context0 storage for interpolation plug-in
+extern  _cmsInterpPluginChunkType _cmsInterpPluginChunk;
+
+// Allocate and init interpolation container.
+void _cmsAllocInterpPluginChunk(struct _cmsContext_struct* ctx, 
+                                const struct _cmsContext_struct* src);
+
+// Container for parametric curves plug-in
+typedef struct {
+
+    struct _cmsParametricCurvesCollection_st* ParametricCurves;
+
+} _cmsCurvesPluginChunkType;
+
+// The global Context0 storage for tone curves plug-in
+extern  _cmsCurvesPluginChunkType _cmsCurvesPluginChunk;
+
+// Allocate and init parametric curves container.
+void _cmsAllocCurvesPluginChunk(struct _cmsContext_struct* ctx, 
+                                                      const struct _cmsContext_struct* src);
+
+// Container for formatters plug-in
+typedef struct {
+
+    struct _cms_formatters_factory_list* FactoryList;
+
+} _cmsFormattersPluginChunkType;
+
+// The global Context0 storage for formatters plug-in
+extern  _cmsFormattersPluginChunkType _cmsFormattersPluginChunk;
+
+// Allocate and init formatters container.
+void _cmsAllocFormattersPluginChunk(struct _cmsContext_struct* ctx, 
+                                                       const struct _cmsContext_struct* src);
+
+// This chunk type is shared by TagType plug-in and MPE Plug-in
+typedef struct {
+
+    struct _cmsTagTypeLinkedList_st* TagTypes;
+
+} _cmsTagTypePluginChunkType;
+
+
+// The global Context0 storage for tag types plug-in
+extern  _cmsTagTypePluginChunkType      _cmsTagTypePluginChunk;
+
+
+// The global Context0 storage for mult process elements plug-in
+extern  _cmsTagTypePluginChunkType      _cmsMPETypePluginChunk;
+
+// Allocate and init Tag types container.
+void _cmsAllocTagTypePluginChunk(struct _cmsContext_struct* ctx, 
+                                                        const struct _cmsContext_struct* src);
+// Allocate and init MPE container.
+void _cmsAllocMPETypePluginChunk(struct _cmsContext_struct* ctx, 
+                                                        const struct _cmsContext_struct* src);
+// Container for tag plug-in
+typedef struct {
+   
+    struct _cmsTagLinkedList_st* Tag;
+
+} _cmsTagPluginChunkType;
+
+
+// The global Context0 storage for tag plug-in
+extern  _cmsTagPluginChunkType _cmsTagPluginChunk;
+
+// Allocate and init Tag container.
+void _cmsAllocTagPluginChunk(struct _cmsContext_struct* ctx, 
+                                                      const struct _cmsContext_struct* src); 
+
+// Container for intents plug-in
+typedef struct {
+
+    struct _cms_intents_list* Intents;
+
+} _cmsIntentsPluginChunkType;
+
+
+// The global Context0 storage for intents plug-in
+extern  _cmsIntentsPluginChunkType _cmsIntentsPluginChunk;
+
+// Allocate and init intents container.
+void _cmsAllocIntentsPluginChunk(struct _cmsContext_struct* ctx, 
+                                                        const struct _cmsContext_struct* src); 
+
+// Container for optimization plug-in
+typedef struct {
+
+    struct _cmsOptimizationCollection_st* OptimizationCollection;
+
+} _cmsOptimizationPluginChunkType;
+
+
+// The global Context0 storage for optimizers plug-in
+extern  _cmsOptimizationPluginChunkType _cmsOptimizationPluginChunk;
+
+// Allocate and init optimizers container.
+void _cmsAllocOptimizationPluginChunk(struct _cmsContext_struct* ctx, 
+                                         const struct _cmsContext_struct* src);
+
+// Container for transform plug-in
+typedef struct {
+
+    struct _cmsTransformCollection_st* TransformCollection;
+
+} _cmsTransformPluginChunkType;
+
+// The global Context0 storage for full-transform replacement plug-in
+extern  _cmsTransformPluginChunkType _cmsTransformPluginChunk;
+
+// Allocate and init transform container.
+void _cmsAllocTransformPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src);
+
+// Container for mutex plug-in
+typedef struct {
+
+    _cmsCreateMutexFnPtrType  CreateMutexPtr;
+    _cmsDestroyMutexFnPtrType DestroyMutexPtr;
+    _cmsLockMutexFnPtrType    LockMutexPtr;
+    _cmsUnlockMutexFnPtrType  UnlockMutexPtr;
+
+} _cmsMutexPluginChunkType;
+
+// The global Context0 storage for mutex plug-in
+extern  _cmsMutexPluginChunkType _cmsMutexPluginChunk;
+
+// Allocate and init mutex container.
+void _cmsAllocMutexPluginChunk(struct _cmsContext_struct* ctx, 
+                                        const struct _cmsContext_struct* src);
+
+// ----------------------------------------------------------------------------------
+// MLU internal representation
+typedef struct {
+
+    cmsUInt16Number Language;
+    cmsUInt16Number Country;
+
+    cmsUInt32Number StrW;       // Offset to current unicode string
+    cmsUInt32Number Len;        // Length in bytes
+
+} _cmsMLUentry;
+
+struct _cms_MLU_struct {
+
+    cmsContext ContextID;
+
+    // The directory
+    cmsUInt32Number  AllocatedEntries;
+    cmsUInt32Number  UsedEntries;
+    _cmsMLUentry* Entries;     // Array of pointers to strings allocated in MemPool
+
+    // The Pool
+    cmsUInt32Number PoolSize;  // The maximum allocated size
+    cmsUInt32Number PoolUsed;  // The used size
+    void*  MemPool;            // Pointer to begin of memory pool
+};
+
+// Named color list internal representation
+typedef struct {
+
+    char Name[cmsMAX_PATH];
+    cmsUInt16Number PCS[3];
+    cmsUInt16Number DeviceColorant[cmsMAXCHANNELS];
+
+} _cmsNAMEDCOLOR;
+
+struct _cms_NAMEDCOLORLIST_struct {
+
+    cmsUInt32Number nColors;
+    cmsUInt32Number Allocated;
+    cmsUInt32Number ColorantCount;
+
+    char Prefix[33];      // Prefix and suffix are defined to be 32 characters at most
+    char Suffix[33];
+
+    _cmsNAMEDCOLOR* List;
+
+    cmsContext ContextID;
+};
+
+
+// ----------------------------------------------------------------------------------
+
+// This is the internal struct holding profile details.
+
+// Maximum supported tags in a profile
+#define MAX_TABLE_TAG       100
+
+typedef struct _cms_iccprofile_struct {
+
+    // I/O handler
+    cmsIOHANDLER*            IOhandler;
+
+    // The thread ID
+    cmsContext               ContextID;
+
+    // Creation time
+    struct tm                Created;
+
+    // Only most important items found in ICC profiles
+    cmsUInt32Number          Version;
+    cmsProfileClassSignature DeviceClass;
+    cmsColorSpaceSignature   ColorSpace;
+    cmsColorSpaceSignature   PCS;
+    cmsUInt32Number          RenderingIntent;
+
+    cmsUInt32Number          flags;
+    cmsUInt32Number          manufacturer, model;
+    cmsUInt64Number          attributes;
+    cmsUInt32Number          creator;
+
+    cmsProfileID             ProfileID;
+
+    // Dictionary
+    cmsUInt32Number          TagCount;
+    cmsTagSignature          TagNames[MAX_TABLE_TAG];
+    cmsTagSignature          TagLinked[MAX_TABLE_TAG];           // The tag to which is linked (0=none)
+    cmsUInt32Number          TagSizes[MAX_TABLE_TAG];            // Size on disk
+    cmsUInt32Number          TagOffsets[MAX_TABLE_TAG];
+    cmsBool                  TagSaveAsRaw[MAX_TABLE_TAG];        // True to write uncooked
+    void *                   TagPtrs[MAX_TABLE_TAG];
+    cmsTagTypeHandler*       TagTypeHandlers[MAX_TABLE_TAG];     // Same structure may be serialized on different types
+                                                                 // depending on profile version, so we keep track of the
+                                                                 // type handler for each tag in the list.
+    // Special
+    cmsBool                  IsWrite;
+
+    // Keep a mutex for cmsReadTag -- Note that this only works if the user includes a mutex plugin
+    void *                   UsrMutex;
+
+} _cmsICCPROFILE;
+
+// IO helpers for profiles
+cmsBool              _cmsReadHeader(_cmsICCPROFILE* Icc);
+cmsBool              _cmsWriteHeader(_cmsICCPROFILE* Icc, cmsUInt32Number UsedSpace);
+int                  _cmsSearchTag(_cmsICCPROFILE* Icc, cmsTagSignature sig, cmsBool lFollowLinks);
+
+// Tag types
+cmsTagTypeHandler*   _cmsGetTagTypeHandler(cmsContext ContextID, cmsTagTypeSignature sig);
+cmsTagTypeSignature  _cmsGetTagTrueType(cmsHPROFILE hProfile, cmsTagSignature sig);
+cmsTagDescriptor*    _cmsGetTagDescriptor(cmsContext ContextID, cmsTagSignature sig);
+
+// Error logging ---------------------------------------------------------------------------------------------------------
+
+void                 _cmsTagSignature2String(char String[5], cmsTagSignature sig);
+
+// Interpolation ---------------------------------------------------------------------------------------------------------
+
+cmsInterpParams*     _cmsComputeInterpParams(cmsContext ContextID, cmsUInt32Number nSamples, cmsUInt32Number InputChan, cmsUInt32Number OutputChan, const void* Table, cmsUInt32Number dwFlags);
+cmsInterpParams*     _cmsComputeInterpParamsEx(cmsContext ContextID, const cmsUInt32Number nSamples[], cmsUInt32Number InputChan, cmsUInt32Number OutputChan, const void* Table, cmsUInt32Number dwFlags);
+void                 _cmsFreeInterpParams(cmsInterpParams* p);
+cmsBool              _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p);
+
+// Curves ----------------------------------------------------------------------------------------------------------------
+
+// This struct holds information about a segment, plus a pointer to the function that implements the evaluation.
+// In the case of table-based, Eval pointer is set to NULL
+
+// The gamma function main structure
+struct _cms_curve_struct {
+
+    cmsInterpParams*  InterpParams;  // Private optimizations for interpolation
+
+    cmsUInt32Number   nSegments;     // Number of segments in the curve. Zero for a 16-bit based tables
+    cmsCurveSegment*  Segments;      // The segments
+    cmsInterpParams** SegInterp;     // Array of private optimizations for interpolation in table-based segments
+
+    cmsParametricCurveEvaluator* Evals;  // Evaluators (one per segment)
+
+    // 16 bit Table-based representation follows
+    cmsUInt32Number    nEntries;      // Number of table elements
+    cmsUInt16Number*   Table16;       // The table itself.
+};
+
+
+//  Pipelines & Stages ---------------------------------------------------------------------------------------------
+
+// A single stage
+struct _cmsStage_struct {
+
+    cmsContext          ContextID;
+
+    cmsStageSignature   Type;           // Identifies the stage
+    cmsStageSignature   Implements;     // Identifies the *function* of the stage (for optimizations)
+
+    cmsUInt32Number     InputChannels;  // Input channels -- for optimization purposes
+    cmsUInt32Number     OutputChannels; // Output channels -- for optimization purposes
+
+    _cmsStageEvalFn     EvalPtr;        // Points to fn that evaluates the stage (always in floating point)
+    _cmsStageDupElemFn  DupElemPtr;     // Points to a fn that duplicates the *data* of the stage
+    _cmsStageFreeElemFn FreePtr;        // Points to a fn that sets the *data* of the stage free
+
+    // A generic pointer to whatever memory needed by the stage
+    void*               Data;
+
+    // Maintains linked list (used internally)
+    struct _cmsStage_struct* Next;
+};
+
+
+// Special Stages (cannot be saved)
+cmsStage*        _cmsStageAllocLab2XYZ(cmsContext ContextID);
+cmsStage*        _cmsStageAllocXYZ2Lab(cmsContext ContextID);
+cmsStage*        _cmsStageAllocLabPrelin(cmsContext ContextID);
+cmsStage*        _cmsStageAllocLabV2ToV4(cmsContext ContextID);
+cmsStage*        _cmsStageAllocLabV2ToV4curves(cmsContext ContextID);
+cmsStage*        _cmsStageAllocLabV4ToV2(cmsContext ContextID);
+cmsStage*        _cmsStageAllocNamedColor(cmsNAMEDCOLORLIST* NamedColorList, cmsBool UsePCS);
+cmsStage*        _cmsStageAllocIdentityCurves(cmsContext ContextID, cmsUInt32Number nChannels);
+cmsStage*        _cmsStageAllocIdentityCLut(cmsContext ContextID, cmsUInt32Number nChan);
+cmsStage*        _cmsStageNormalizeFromLabFloat(cmsContext ContextID);
+cmsStage*        _cmsStageNormalizeFromXyzFloat(cmsContext ContextID);
+cmsStage*        _cmsStageNormalizeToLabFloat(cmsContext ContextID);
+cmsStage*        _cmsStageNormalizeToXyzFloat(cmsContext ContextID);
+cmsStage*        _cmsStageClipNegatives(cmsContext ContextID, cmsUInt32Number nChannels);
+
+
+// For curve set only
+cmsToneCurve**     _cmsStageGetPtrToCurveSet(const cmsStage* mpe);
+
+
+// Pipeline Evaluator (in floating point)
+typedef void (* _cmsPipelineEvalFloatFn)(const cmsFloat32Number In[],
+                                         cmsFloat32Number Out[],
+                                         const void* Data);
+
+struct _cmsPipeline_struct {
+
+    cmsStage* Elements;                                // Points to elements chain
+    cmsUInt32Number InputChannels, OutputChannels;
+
+    // Data & evaluators
+    void *Data;
+
+   _cmsOPTeval16Fn         Eval16Fn;
+   _cmsPipelineEvalFloatFn EvalFloatFn;
+   _cmsFreeUserDataFn      FreeDataFn;
+   _cmsDupUserDataFn       DupDataFn;
+
+    cmsContext ContextID;            // Environment
+
+    cmsBool  SaveAs8Bits;            // Implementation-specific: save as 8 bits if possible
+};
+
+// LUT reading & creation -------------------------------------------------------------------------------------------
+
+// Read tags using low-level function, provide necessary glue code to adapt versions, etc. All those return a brand new copy
+// of the LUTS, since ownership of original is up to the profile. The user should free allocated resources.
+
+cmsPipeline*      _cmsReadInputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent);
+cmsPipeline*      _cmsReadOutputLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent);
+cmsPipeline*      _cmsReadDevicelinkLUT(cmsHPROFILE hProfile, cmsUInt32Number Intent);
+
+// Special values
+cmsBool           _cmsReadMediaWhitePoint(cmsCIEXYZ* Dest, cmsHPROFILE hProfile);
+cmsBool           _cmsReadCHAD(cmsMAT3* Dest, cmsHPROFILE hProfile);
+
+// Profile linker --------------------------------------------------------------------------------------------------
+
+cmsPipeline* _cmsLinkProfiles(cmsContext         ContextID,
+                              cmsUInt32Number    nProfiles,
+                              cmsUInt32Number    TheIntents[],
+                              cmsHPROFILE        hProfiles[],
+                              cmsBool            BPC[],
+                              cmsFloat64Number   AdaptationStates[],
+                              cmsUInt32Number    dwFlags);
+
+// Sequence --------------------------------------------------------------------------------------------------------
+
+cmsSEQ* _cmsReadProfileSequence(cmsHPROFILE hProfile);
+cmsBool _cmsWriteProfileSequence(cmsHPROFILE hProfile, const cmsSEQ* seq);
+cmsSEQ* _cmsCompileProfileSequence(cmsContext ContextID, cmsUInt32Number nProfiles, cmsHPROFILE hProfiles[]);
+
+
+// LUT optimization ------------------------------------------------------------------------------------------------
+
+cmsUInt16Number  _cmsQuantizeVal(cmsFloat64Number i, cmsUInt32Number MaxSamples);
+cmsUInt32Number  _cmsReasonableGridpointsByColorspace(cmsColorSpaceSignature Colorspace, cmsUInt32Number dwFlags);
+
+cmsBool          _cmsEndPointsBySpace(cmsColorSpaceSignature Space,
+                                      cmsUInt16Number **White,
+                                      cmsUInt16Number **Black,
+                                      cmsUInt32Number *nOutputs);
+
+cmsBool          _cmsOptimizePipeline(cmsContext ContextID,
+                                      cmsPipeline**    Lut,
+                                      cmsUInt32Number  Intent,
+                                      cmsUInt32Number* InputFormat,
+                                      cmsUInt32Number* OutputFormat,
+                                      cmsUInt32Number* dwFlags );
+
+
+// Hi level LUT building ----------------------------------------------------------------------------------------------
+
+cmsPipeline*     _cmsCreateGamutCheckPipeline(cmsContext ContextID,
+                                              cmsHPROFILE hProfiles[],
+                                              cmsBool  BPC[],
+                                              cmsUInt32Number Intents[],
+                                              cmsFloat64Number AdaptationStates[],
+                                              cmsUInt32Number nGamutPCSposition,
+                                              cmsHPROFILE hGamut);
+
+
+// Formatters ------------------------------------------------------------------------------------------------------------
+
+#define cmsFLAGS_CAN_CHANGE_FORMATTER     0x02000000   // Allow change buffer format
+
+cmsBool         _cmsFormatterIsFloat(cmsUInt32Number Type);
+cmsBool         _cmsFormatterIs8bit(cmsUInt32Number Type);
+
+cmsFormatter    _cmsGetFormatter(cmsContext ContextID,
+                                 cmsUInt32Number Type,          // Specific type, i.e. TYPE_RGB_8
+                                 cmsFormatterDirection Dir,
+                                 cmsUInt32Number dwFlags);
+
+
+#ifndef CMS_NO_HALF_SUPPORT 
+
+// Half float
+cmsFloat32Number _cmsHalf2Float(cmsUInt16Number h);
+cmsUInt16Number  _cmsFloat2Half(cmsFloat32Number flt);
+
+#endif
+
+// Transform logic ------------------------------------------------------------------------------------------------------
+
+struct _cmstransform_struct;
+
+typedef struct {
+
+    // 1-pixel cache (16 bits only)
+    cmsUInt16Number CacheIn[cmsMAXCHANNELS];
+    cmsUInt16Number CacheOut[cmsMAXCHANNELS];
+
+} _cmsCACHE;
+
+
+
+// Transformation
+typedef struct _cmstransform_struct {
+
+    cmsUInt32Number InputFormat, OutputFormat; // Keep formats for further reference
+
+    // Points to transform code
+    _cmsTransform2Fn xform;
+
+    // Formatters, cannot be embedded into LUT because cache
+    cmsFormatter16 FromInput;
+    cmsFormatter16 ToOutput;
+
+    cmsFormatterFloat FromInputFloat;
+    cmsFormatterFloat ToOutputFloat;
+
+    // 1-pixel cache seed for zero as input (16 bits, read only)
+    _cmsCACHE Cache;
+
+    // A Pipeline holding the full (optimized) transform
+    cmsPipeline* Lut;
+
+    // A Pipeline holding the gamut check. It goes from the input space to bilevel
+    cmsPipeline* GamutCheck;
+
+    // Colorant tables
+    cmsNAMEDCOLORLIST* InputColorant;       // Input Colorant table
+    cmsNAMEDCOLORLIST* OutputColorant;      // Colorant table (for n chans > CMYK)
+
+    // Informational only
+    cmsColorSpaceSignature EntryColorSpace;
+    cmsColorSpaceSignature ExitColorSpace;
+
+    // White points (informative only)
+    cmsCIEXYZ EntryWhitePoint;
+    cmsCIEXYZ ExitWhitePoint;
+
+    // Profiles used to create the transform
+    cmsSEQ* Sequence;
+
+    cmsUInt32Number  dwOriginalFlags;
+    cmsFloat64Number AdaptationState;
+
+    // The intent of this transform. That is usually the last intent in the profilechain, but may differ
+    cmsUInt32Number RenderingIntent;
+
+    // An id that uniquely identifies the running context. May be null.
+    cmsContext ContextID;
+
+    // A user-defined pointer that can be used to store data for transform plug-ins
+    void* UserData;
+    _cmsFreeUserDataFn FreeUserData;
+
+    // A way to provide backwards compatibility with full xform plugins
+    _cmsTransformFn OldXform;
+
+} _cmsTRANSFORM;
+
+// Copies extra channels from input to output if the original flags in the transform structure
+// instructs to do so. This function is called on all standard transform functions.
+void _cmsHandleExtraChannels(_cmsTRANSFORM* p, const void* in,
+                             void* out, 
+                             cmsUInt32Number PixelsPerLine,
+                             cmsUInt32Number LineCount,
+                             const cmsStride* Stride);
+
+// -----------------------------------------------------------------------------------------------------------------------
+
+cmsHTRANSFORM _cmsChain2Lab(cmsContext             ContextID,
+                            cmsUInt32Number        nProfiles,
+                            cmsUInt32Number        InputFormat,
+                            cmsUInt32Number        OutputFormat,
+                            const cmsUInt32Number  Intents[],
+                            const cmsHPROFILE      hProfiles[],
+                            const cmsBool          BPC[],
+                            const cmsFloat64Number AdaptationStates[],
+                            cmsUInt32Number        dwFlags);
+
+
+cmsToneCurve* _cmsBuildKToneCurve(cmsContext       ContextID,
+                            cmsUInt32Number        nPoints,
+                            cmsUInt32Number        nProfiles,
+                            const cmsUInt32Number  Intents[],
+                            const cmsHPROFILE      hProfiles[],
+                            const cmsBool          BPC[],
+                            const cmsFloat64Number AdaptationStates[],
+                            cmsUInt32Number        dwFlags);
+
+cmsBool   _cmsAdaptationMatrix(cmsMAT3* r, const cmsMAT3* ConeMatrix, const cmsCIEXYZ* FromIll, const cmsCIEXYZ* ToIll);
+
+cmsBool   _cmsBuildRGB2XYZtransferMatrix(cmsMAT3* r, const cmsCIExyY* WhitePoint, const cmsCIExyYTRIPLE* Primaries);
+
+
+#define _lcms_internal_H
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/common/utils.h b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/common/utils.h
new file mode 100755
index 0000000000..6cd459df95
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/common/utils.h
@@ -0,0 +1,103 @@
+
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#ifndef _lcms_utils_h
+
+// Deal with Microsoft's attempt at deprecating C standard runtime functions
+#ifdef _MSC_VER
+#    if (_MSC_VER >= 1400)
+#      ifndef _CRT_SECURE_NO_DEPRECATE
+#        define _CRT_SECURE_NO_DEPRECATE
+#      endif
+#      ifndef _CRT_SECURE_NO_WARNINGS
+#        define _CRT_SECURE_NO_WARNINGS
+#      endif
+#    endif
+#endif
+
+#include "lcms2.h"
+
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <math.h>
+#include <wchar.h>
+
+// Avoid warnings
+
+#define UTILS_UNUSED_PARAMETER(x) ((void)x)
+
+// Init the utility functions
+
+void InitUtils(const char* PName);
+
+// Fatal Error (print the message and exit(1))---------------------------------------------
+
+extern int Verbose;
+
+void FatalError(const char *frm, ...);
+
+// xgetopt() interface -------------------------------------------------------------
+
+extern int   xoptind;
+extern char *xoptarg;
+extern int   xopterr;
+extern char  SW;
+
+int xgetopt(int argc, char *argv[], char *optionS);
+
+// The stock profile utility -------------------------------------------------------
+
+cmsHPROFILE OpenStockProfile(cmsContext ContextID, const char* File);
+
+// The print info utility ----------------------------------------------------------
+
+void PrintProfileInformation(cmsHPROFILE h);
+
+// ---------------------------------------------------------------------------------
+
+void PrintRenderingIntents(void);
+void PrintBuiltins(void);
+
+// ---------------------------------------------------------------------------------
+
+cmsBool SaveMemoryBlock(const cmsUInt8Number* Buffer, cmsUInt32Number dwLen, const char* Filename);
+
+// ---------------------------------------------------------------------------------
+
+// Return a pixel type on depending on the number of channels
+int PixelTypeFromChanCount(int ColorChannels);
+
+// ------------------------------------------------------------------------------
+
+// Return number of channels of pixel type
+int ChanCountFromPixelType(int ColorChannels);
+
+#define _lcms_utils_h
+#endif
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/common/vprf.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/common/vprf.c
new file mode 100755
index 0000000000..5ddcfe4274
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/common/vprf.c
@@ -0,0 +1,336 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "utils.h"
+
+
+int Verbose = 0;
+
+static char ProgramName[256] = "";
+
+void FatalError(const char *frm, ...)
+{
+    va_list args;
+
+    va_start(args, frm);
+    fprintf(stderr, "[%s fatal error]: ", ProgramName);
+    vfprintf(stderr, frm, args);
+    fprintf(stderr, "\n");
+    va_end(args);
+
+    exit(1);
+}
+
+// Show errors to the end user (unless quiet option)
+static
+void MyErrorLogHandler(cmsContext ContextID, cmsUInt32Number ErrorCode, const char *Text)
+{
+    if (Verbose >= 0)
+        fprintf(stderr, "[%s]: %s\n", ProgramName, Text);
+
+    UTILS_UNUSED_PARAMETER(ErrorCode);
+    UTILS_UNUSED_PARAMETER(ContextID);
+}
+
+
+void InitUtils(const char* PName)
+{
+      strncpy(ProgramName, PName, sizeof(ProgramName));
+      ProgramName[sizeof(ProgramName)-1] = 0;
+
+      cmsSetLogErrorHandler(MyErrorLogHandler);
+}
+
+
+// Virtual profiles are handled here.
+cmsHPROFILE OpenStockProfile(cmsContext ContextID, const char* File)
+{
+       if (!File)
+            return cmsCreate_sRGBProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*Lab2") == 0)
+                return cmsCreateLab2ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*Lab4") == 0)
+                return cmsCreateLab4ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*Lab") == 0)
+                return cmsCreateLab4ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*LabD65") == 0) {
+
+           cmsCIExyY D65xyY;
+
+           cmsWhitePointFromTemp( &D65xyY, 6504);
+           return cmsCreateLab4ProfileTHR(ContextID, &D65xyY);
+       }
+
+       if (cmsstrcasecmp(File, "*XYZ") == 0)
+                return cmsCreateXYZProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*Gray22") == 0) {
+
+           cmsToneCurve* Curve = cmsBuildGamma(ContextID, 2.2);
+           cmsHPROFILE hProfile = cmsCreateGrayProfileTHR(ContextID, cmsD50_xyY(), Curve);
+           cmsFreeToneCurve(Curve);
+           return hProfile;
+       }
+
+        if (cmsstrcasecmp(File, "*Gray30") == 0) {
+
+           cmsToneCurve* Curve = cmsBuildGamma(ContextID, 3.0);
+           cmsHPROFILE hProfile = cmsCreateGrayProfileTHR(ContextID, cmsD50_xyY(), Curve);
+           cmsFreeToneCurve(Curve);
+           return hProfile;
+       }
+
+       if (cmsstrcasecmp(File, "*srgb") == 0)
+                return cmsCreate_sRGBProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*null") == 0)
+                return cmsCreateNULLProfileTHR(ContextID);
+
+
+       if (cmsstrcasecmp(File, "*Lin2222") == 0) {
+
+            cmsToneCurve*  Gamma = cmsBuildGamma(0, 2.2);
+            cmsToneCurve*  Gamma4[4];
+            cmsHPROFILE hProfile;
+
+            Gamma4[0] = Gamma4[1] = Gamma4[2] = Gamma4[3] = Gamma;
+            hProfile = cmsCreateLinearizationDeviceLink(cmsSigCmykData, Gamma4);
+            cmsFreeToneCurve(Gamma);
+            return hProfile;
+       }
+
+
+        return cmsOpenProfileFromFileTHR(ContextID, File, "r");
+}
+
+// Help on available built-ins
+void PrintBuiltins(void)
+{
+     fprintf(stderr, "\nBuilt-in profiles:\n\n");
+     fprintf(stderr, "\t*Lab2  -- D50-based v2 CIEL*a*b\n"
+                     "\t*Lab4  -- D50-based v4 CIEL*a*b\n"
+                     "\t*Lab   -- D50-based v4 CIEL*a*b\n"
+                     "\t*XYZ   -- CIE XYZ (PCS)\n"
+                     "\t*sRGB  -- sRGB color space\n"
+                     "\t*Gray22 - Monochrome of Gamma 2.2\n"
+                     "\t*Gray30 - Monochrome of Gamma 3.0\n"
+                     "\t*null   - Monochrome black for all input\n"
+                     "\t*Lin2222- CMYK linearization of gamma 2.2 on each channel\n");
+}
+
+
+// Auxiliary for printing information on profile
+static
+void PrintInfo(cmsHPROFILE h, cmsInfoType Info)
+{
+    char* text;
+    int len;
+
+    len = cmsGetProfileInfoASCII(h, Info, "en", "US", NULL, 0);
+    if (len == 0) return;
+
+    text = (char*) malloc(len * sizeof(char));
+    if (text == NULL) return;
+
+    cmsGetProfileInfoASCII(h, Info, "en", "US", text, len);
+
+    if (strlen(text) > 0)
+        printf("%s\n", text);
+
+    free(text);
+}
+
+
+
+// Displays the colorant table
+static
+void PrintColorantTable(cmsHPROFILE hInput, cmsTagSignature Sig, const char* Title)
+{
+    cmsNAMEDCOLORLIST* list;
+    int i, n;
+
+    if (cmsIsTag(hInput, Sig)) {
+
+        printf("%s:\n", Title);
+
+        list = (cmsNAMEDCOLORLIST*) cmsReadTag(hInput, Sig);
+        if (list == NULL) {
+            printf("(Unavailable)\n");
+            return;
+        }
+
+        n = cmsNamedColorCount(list);
+        for (i=0; i < n; i++) {
+
+            char Name[cmsMAX_PATH];
+
+            cmsNamedColorInfo(list, i, Name, NULL, NULL, NULL, NULL);
+            printf("\t%s\n", Name);
+        }
+
+        printf("\n");
+    }
+
+}
+
+
+void PrintProfileInformation(cmsHPROFILE hInput)
+{
+    if (hInput == NULL) {
+			fprintf(stderr, "*Wrong or corrupted profile*\n");
+            return;
+    }
+
+    PrintInfo(hInput, cmsInfoDescription);
+    PrintInfo(hInput, cmsInfoManufacturer);
+    PrintInfo(hInput, cmsInfoModel);
+    PrintInfo(hInput, cmsInfoCopyright);
+
+    if (Verbose > 2) {
+
+        PrintColorantTable(hInput, cmsSigColorantTableTag,    "Input colorant table");
+        PrintColorantTable(hInput, cmsSigColorantTableOutTag, "Input colorant out table");
+    }
+
+    printf("\n");
+}
+
+// -----------------------------------------------------------------------------
+
+
+void PrintRenderingIntents(void)
+{
+    cmsUInt32Number Codes[200];
+    char* Descriptions[200];
+    cmsUInt32Number n, i;
+
+    fprintf(stderr, "%ct<n> rendering intent:\n\n", SW);
+
+    n = cmsGetSupportedIntents(200, Codes, Descriptions);
+
+    for (i=0; i < n; i++) {
+        fprintf(stderr, "\t%u - %s\n", Codes[i], Descriptions[i]);
+    }
+    fprintf(stderr, "\n");
+}
+
+
+
+// ------------------------------------------------------------------------------
+
+cmsBool SaveMemoryBlock(const cmsUInt8Number* Buffer, cmsUInt32Number dwLen, const char* Filename)
+{
+    FILE* out = fopen(Filename, "wb");
+    if (out == NULL) {
+        FatalError("Cannot create '%s'", Filename);
+        return FALSE;
+    }
+
+    if (fwrite(Buffer, 1, dwLen, out) != dwLen) {
+        FatalError("Cannot write %ld bytes to %s", dwLen, Filename);
+        return FALSE;
+    }
+
+    if (fclose(out) != 0) {
+        FatalError("Error flushing file '%s'", Filename);
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+// ------------------------------------------------------------------------------
+
+// Return a pixel type on depending on the number of channels
+int PixelTypeFromChanCount(int ColorChannels)
+{
+    switch (ColorChannels) {
+
+        case 1: return PT_GRAY;
+        case 2: return PT_MCH2;
+        case 3: return PT_MCH3;
+        case 4: return PT_CMYK;
+        case 5: return PT_MCH5;
+        case 6: return PT_MCH6;
+        case 7: return PT_MCH7;
+        case 8: return PT_MCH8;
+        case 9: return PT_MCH9;
+        case 10: return PT_MCH10;
+        case 11: return PT_MCH11;
+        case 12: return PT_MCH12;
+        case 13: return PT_MCH13;
+        case 14: return PT_MCH14;
+        case 15: return PT_MCH15;
+
+        default:
+
+            FatalError("What a weird separation of %d channels?!?!", ColorChannels);
+            return -1;
+    }
+}
+
+
+// ------------------------------------------------------------------------------
+
+// Return number of channels of pixel type
+int ChanCountFromPixelType(int ColorChannels)
+{
+    switch (ColorChannels) {
+
+      case PT_GRAY: return 1;
+
+      case PT_RGB:
+      case PT_CMY:
+      case PT_Lab:
+      case PT_YUV:
+      case PT_YCbCr: return 3;
+
+      case PT_CMYK: return 4 ;
+      case PT_MCH2: return 2 ;
+      case PT_MCH3: return 3 ;
+      case PT_MCH4: return 4 ;
+      case PT_MCH5: return 5 ;
+      case PT_MCH6: return 6 ;
+      case PT_MCH7: return 7 ;
+      case PT_MCH8: return 8 ;
+      case PT_MCH9: return 9 ;
+      case PT_MCH10: return 10;
+      case PT_MCH11: return 11;
+      case PT_MCH12: return 12;
+      case PT_MCH13: return 12;
+      case PT_MCH14: return 14;
+      case PT_MCH15: return 15;
+
+      default:
+
+          FatalError("Unsupported color space of %d channels", ColorChannels);
+          return -1;
+    }
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/common/xgetopt.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/common/xgetopt.c
new file mode 100755
index 0000000000..7f3dc548a7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/common/xgetopt.c
@@ -0,0 +1,75 @@
+/*
+    getopt.c
+
+*/
+
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+
+int     xoptind = 1;    /* index of which argument is next  */
+char   *xoptarg;        /* pointer to argument of current option */
+int     xopterr = 0;    /* allow error message  */
+
+static  char   *letP = NULL;    /* remember next option char's location */
+char    SW = '-';				/* DOS switch character, either '-' or '/' */
+
+/*
+  Parse the command line options, System V style.
+
+  Standard option syntax is:
+
+    option ::= SW [optLetter]* [argLetter space* argument]
+
+*/
+
+int xgetopt(int argc, char *argv[], char *optionS)
+{
+    unsigned char ch;
+    char *optP;
+
+    if (SW == 0) {
+        SW = '/';
+    }
+
+    if (argc > xoptind) {
+        if (letP == NULL) {
+            if ((letP = argv[xoptind]) == NULL ||
+                *(letP++) != SW)  goto gopEOF;
+            if (*letP == SW) {
+                xoptind++;  goto gopEOF;
+            }
+        }
+        if (0 == (ch = *(letP++))) {
+            xoptind++;  goto gopEOF;
+        }
+        if (':' == ch  ||  (optP = strchr(optionS, ch)) == NULL)
+            goto gopError;
+        if (':' == *(++optP)) {
+            xoptind++;
+            if (0 == *letP) {
+                if (argc <= xoptind)  goto  gopError;
+                letP = argv[xoptind++];
+            }
+            xoptarg = letP;
+            letP = NULL;
+        } else {
+            if (0 == *letP) {
+                xoptind++;
+                letP = NULL;
+            }
+            xoptarg = NULL;
+        }
+        return ch;
+    }
+gopEOF:
+    xoptarg = letP = NULL;
+    return EOF;
+
+gopError:
+    xoptarg = NULL;
+    errno  = EINVAL;
+    if (xopterr)
+        perror ("get command line option");
+    return ('?');
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/delphidemo.dpr b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/delphidemo.dpr
new file mode 100755
index 0000000000..9180c04fea
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/delphidemo.dpr
@@ -0,0 +1,13 @@
+program delphidemo;
+
+uses
+  Forms,
+  demo1 in 'demo1.pas' {Form1};
+
+{$R *.RES}
+
+begin
+  Application.Initialize;
+  Application.CreateForm(TForm1, Form1);
+  Application.Run;
+end.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/delphidemo.dproj b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/delphidemo.dproj
new file mode 100755
index 0000000000..25b97d8282
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/delphidemo.dproj
@@ -0,0 +1,114 @@
+﻿	<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+		<PropertyGroup>
+			<ProjectGuid>{E3F889E8-CB8A-49AE-8173-4DDA022466BE}</ProjectGuid>
+			<MainSource>delphidemo.dpr</MainSource>
+			<Config Condition="'$(Config)'==''">Debug</Config>
+			<DCC_DCCCompiler>DCC32</DCC_DCCCompiler>
+			<ProjectVersion>12.0</ProjectVersion>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Config)'=='Base' or '$(Base)'!=''">
+			<Base>true</Base>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Config)'=='Release' or '$(Cfg_1)'!=''">
+			<Cfg_1>true</Cfg_1>
+			<CfgParent>Base</CfgParent>
+			<Base>true</Base>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Config)'=='Debug' or '$(Cfg_2)'!=''">
+			<Cfg_2>true</Cfg_2>
+			<CfgParent>Base</CfgParent>
+			<Base>true</Base>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Base)'!=''">
+			<DCC_UsePackage>vcl;rtl;vclx;vclimg;vclactnband;dbrtl;vcldb;vcldbx;bdertl;vcltouch;xmlrtl;dsnap;dsnapcon;TeeUI;TeeDB;Tee;vclib;ibxpress;adortl;IndyCore;IndySystem;IndyProtocols;inet;intrawebdb_100_140;Intraweb_100_140;VclSmp;vclie;websnap;webdsnap;inetdb;inetdbbde;inetdbxpress;soaprtl;vclribbon;dbexpress;DbxCommonDriver;DataSnapIndy10ServerTransport;DataSnapProviderClient;DbxClientDriver;DataSnapServer;DBXInterBaseDriver;DBXMySQLDriver;dbxcds;DBXFirebirdDriver;DBXSybaseASEDriver;DBXSybaseASADriver;DBXOracleDriver;DBXMSSQLDriver;DBXInformixDriver;DBXDb2Driver;Rave77VCL</DCC_UsePackage>
+			<DCC_ImageBase>00400000</DCC_ImageBase>
+			<DCC_SymbolReferenceInfo>1</DCC_SymbolReferenceInfo>
+			<DCC_DependencyCheckOutputName>delphidemo.exe</DCC_DependencyCheckOutputName>
+			<DCC_UnitAlias>WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;WinTypes=Windows;WinProcs=Windows;$(DCC_UnitAlias)</DCC_UnitAlias>
+			<DCC_Platform>x86</DCC_Platform>
+			<DCC_N>true</DCC_N>
+			<DCC_S>false</DCC_S>
+			<DCC_K>false</DCC_K>
+			<DCC_E>false</DCC_E>
+			<DCC_F>false</DCC_F>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Cfg_1)'!=''">
+			<DCC_LocalDebugSymbols>false</DCC_LocalDebugSymbols>
+			<DCC_Define>RELEASE;$(DCC_Define)</DCC_Define>
+			<DCC_SymbolReferenceInfo>0</DCC_SymbolReferenceInfo>
+			<DCC_DebugInformation>false</DCC_DebugInformation>
+		</PropertyGroup>
+		<PropertyGroup Condition="'$(Cfg_2)'!=''">
+			<DCC_Define>DEBUG;$(DCC_Define)</DCC_Define>
+		</PropertyGroup>
+		<ItemGroup>
+			<DelphiCompile Include="delphidemo.dpr">
+				<MainSource>MainSource</MainSource>
+			</DelphiCompile>
+			<DCCReference Include="demo1.pas">
+				<Form>Form1</Form>
+			</DCCReference>
+			<BuildConfiguration Include="Base">
+				<Key>Base</Key>
+			</BuildConfiguration>
+			<BuildConfiguration Include="Debug">
+				<Key>Cfg_2</Key>
+				<CfgParent>Base</CfgParent>
+			</BuildConfiguration>
+			<BuildConfiguration Include="Release">
+				<Key>Cfg_1</Key>
+				<CfgParent>Base</CfgParent>
+			</BuildConfiguration>
+		</ItemGroup>
+		<Import Project="$(BDS)\Bin\CodeGear.Delphi.Targets" Condition="Exists('$(BDS)\Bin\CodeGear.Delphi.Targets')"/>
+		<ProjectExtensions>
+			<Borland.Personality>Delphi.Personality.12</Borland.Personality>
+			<Borland.ProjectType>VCLApplication</Borland.ProjectType>
+			<BorlandProject>
+				<Delphi.Personality>
+					<Source>
+						<Source Name="MainSource">delphidemo.dpr</Source>
+					</Source>
+					<Parameters>
+						<Parameters Name="UseLauncher">False</Parameters>
+						<Parameters Name="DebugCWD">d:\lcms-1.13\delphi</Parameters>
+						<Parameters Name="LoadAllSymbols">True</Parameters>
+						<Parameters Name="LoadUnspecifiedSymbols">False</Parameters>
+					</Parameters>
+					<VersionInfo>
+						<VersionInfo Name="IncludeVerInfo">False</VersionInfo>
+						<VersionInfo Name="AutoIncBuild">False</VersionInfo>
+						<VersionInfo Name="MajorVer">1</VersionInfo>
+						<VersionInfo Name="MinorVer">0</VersionInfo>
+						<VersionInfo Name="Release">0</VersionInfo>
+						<VersionInfo Name="Build">0</VersionInfo>
+						<VersionInfo Name="Debug">False</VersionInfo>
+						<VersionInfo Name="PreRelease">False</VersionInfo>
+						<VersionInfo Name="Special">False</VersionInfo>
+						<VersionInfo Name="Private">False</VersionInfo>
+						<VersionInfo Name="DLL">False</VersionInfo>
+						<VersionInfo Name="Locale">3082</VersionInfo>
+						<VersionInfo Name="CodePage">1252</VersionInfo>
+					</VersionInfo>
+					<VersionInfoKeys>
+						<VersionInfoKeys Name="CompanyName"/>
+						<VersionInfoKeys Name="FileDescription"/>
+						<VersionInfoKeys Name="FileVersion">1.0.0.0</VersionInfoKeys>
+						<VersionInfoKeys Name="InternalName"/>
+						<VersionInfoKeys Name="LegalCopyright"/>
+						<VersionInfoKeys Name="LegalTrademarks"/>
+						<VersionInfoKeys Name="OriginalFilename"/>
+						<VersionInfoKeys Name="ProductName"/>
+						<VersionInfoKeys Name="ProductVersion">1.0.0.0</VersionInfoKeys>
+						<VersionInfoKeys Name="Comments"/>
+					</VersionInfoKeys>
+					<Excluded_Packages>
+						<Excluded_Packages Name="$(BDS)\bin\dcloffice2k140.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+						<Excluded_Packages Name="$(BDS)\bin\dclofficexp140.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+					</Excluded_Packages>
+				</Delphi.Personality>
+				<ModelSupport>False</ModelSupport>
+			</BorlandProject>
+			<ProjectFileVersion>12</ProjectFileVersion>
+		</ProjectExtensions>
+	</Project>
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/delphidemo.res b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/delphidemo.res
new file mode 100755
index 0000000000..ca4824f8cb
Binary files /dev/null and b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/delphidemo.res differ
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/demo1.dfm b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/demo1.dfm
new file mode 100755
index 0000000000..c7722534bb
Binary files /dev/null and b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/demo1.dfm differ
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/demo1.pas b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/demo1.pas
new file mode 100755
index 0000000000..8b69c98e04
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/demo1.pas
@@ -0,0 +1,322 @@
+unit demo1;
+
+interface
+
+uses
+  Windows, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
+  ExtCtrls, StdCtrls, ExtDlgs, lcms2dll, ComCtrls;
+
+type
+  TForm1 = class(TForm)
+
+    Image1: TImage;
+    Image2: TImage;
+    Panel1: TPanel;
+    Splitter1: TSplitter;
+    Button2: TButton;
+    ComboBoxInput: TComboBox;
+    ComboBoxOutput: TComboBox;
+    Label1: TLabel;
+    Label2: TLabel;
+    WBCompensation: TCheckBox;
+    NoTransform: TCheckBox;
+    RadioGroup1: TRadioGroup;
+    OpenPictureDialog1: TOpenPictureDialog;
+    Button1: TButton;
+    ProgressBar1: TProgressBar;
+    ComboBoxIntent: TComboBox;
+    Label3: TLabel;
+    Button3: TButton;
+    Button4: TButton;
+    OpenDialog1: TOpenDialog;
+    Label4: TLabel;
+    ScrollBar1: TScrollBar;
+
+    procedure Button2Click(Sender: TObject);
+    procedure Button1Click(Sender: TObject);
+    procedure Button3Click(Sender: TObject);
+    procedure Button4Click(Sender: TObject);
+    procedure ComboBoxIntentChange(Sender: TObject);
+    procedure ScrollBar1Change(Sender: TObject);
+  private
+    { Private declarations }
+    function ComputeFlags: DWORD;
+
+  public
+    constructor Create(Owner: TComponent); Override;
+    { Public declarations }
+  end;
+
+var
+  Form1: TForm1;
+
+implementation
+
+{$R *.DFM}
+
+CONST
+  IS_INPUT = $1;
+  IS_DISPLAY = $2;
+  IS_COLORSPACE = $4;
+  IS_OUTPUT = $8;
+  IS_ABSTRACT = $10;
+
+VAR
+   IntentCodes: array [0 .. 20] of cmsUInt32Number;
+
+FUNCTION InSignatures(Signature: cmsProfileClassSignature;  dwFlags: DWORD): Boolean;
+BEGIN
+
+  if (((dwFlags AND IS_DISPLAY) <> 0) AND (Signature = cmsSigDisplayClass)) then
+    InSignatures := TRUE
+  else if (((dwFlags AND IS_OUTPUT) <> 0) AND (Signature = cmsSigOutputClass))
+    then
+    InSignatures := TRUE
+  else if (((dwFlags AND IS_INPUT) <> 0) AND (Signature = cmsSigInputClass))
+    then
+    InSignatures := TRUE
+  else if (((dwFlags AND IS_COLORSPACE) <> 0) AND
+      (Signature = cmsSigColorSpaceClass)) then
+    InSignatures := TRUE
+  else if (((dwFlags AND IS_ABSTRACT) <> 0) AND
+      (Signature = cmsSigAbstractClass)) then
+    InSignatures := TRUE
+  else
+    InSignatures := FALSE
+END;
+
+PROCEDURE FillCombo(var Combo: TComboBox; Signatures: DWORD);
+var
+  Files, Descriptions: TStringList;
+  Found: Integer;
+  SearchRec: TSearchRec;
+  Path, Profile: String;
+  Dir: ARRAY [0 .. 1024] OF Char;
+  hProfile: cmsHPROFILE;
+  Descrip: array [0 .. 256] of Char;
+begin
+  Files := TStringList.Create;
+  Descriptions := TStringList.Create;
+  GetSystemDirectory(Dir, 1023);
+  Path := String(Dir) + '\SPOOL\DRIVERS\COLOR\';
+  Found := FindFirst(Path + '*.ic?', faAnyFile, SearchRec);
+  while Found = 0 do
+  begin
+    Profile := Path + SearchRec.Name;
+    hProfile := cmsOpenProfileFromFile(PAnsiChar(AnsiString(Profile)), 'r');
+    if (hProfile <> NIL) THEN
+    begin
+
+      if ((cmsGetColorSpace(hProfile) = cmsSigRgbData) AND InSignatures
+          (cmsGetDeviceClass(hProfile), Signatures)) then
+      begin
+        cmsGetProfileInfo(hProfile, cmsInfoDescription, 'EN', 'us', Descrip,
+          256);
+        Descriptions.Add(Descrip);
+        Files.Add(Profile);
+      end;
+      cmsCloseProfile(hProfile);
+    end;
+
+    Found := FindNext(SearchRec);
+
+  end;
+  FindClose(SearchRec);
+  Combo.Items := Descriptions;
+  Combo.Tag := Integer(Files);
+end;
+
+// A rather simple Logger... note the "cdecl" convention
+PROCEDURE ErrorLogger(ContextID: cmsContext; ErrorCode: cmsUInt32Number;
+  Text: PAnsiChar); Cdecl;
+begin
+  MessageBox(0, PWideChar(WideString(Text)), 'Something is going wrong...',
+    MB_OK OR MB_ICONWARNING or MB_TASKMODAL);
+end;
+
+constructor TForm1.Create(Owner: TComponent);
+var
+  IntentNames: array [0 .. 20] of PAnsiChar;
+  i, n: Integer;
+begin
+  inherited Create(Owner);
+
+   // Set the logger
+  cmsSetLogErrorHandler(ErrorLogger);
+
+  ScrollBar1.Min := 0;
+  ScrollBar1.Max := 100;
+
+  FillCombo(ComboBoxInput, IS_INPUT OR IS_COLORSPACE OR IS_DISPLAY);
+  FillCombo(ComboBoxOutput, $FFFF  );
+
+
+  // Get the supported intents
+  n := cmsGetSupportedIntents(20, @IntentCodes, @IntentNames);
+
+
+  ComboBoxIntent.Items.BeginUpdate;
+  ComboBoxIntent.Items.Clear;
+  for i:= 0 TO n - 1 DO
+    ComboBoxIntent.Items.Add(String(IntentNames[i]));
+
+  ComboBoxIntent.ItemIndex := 0;
+  ComboBoxIntent.Items.EndUpdate;
+end;
+
+
+
+procedure TForm1.ScrollBar1Change(Sender: TObject);
+var d: Integer;
+    s: String;
+begin
+     d := ScrollBar1.Position;
+     Str(d, s);
+     Label4.Caption := 'Adaptation state '+s + '% (Abs. col only)';
+end;
+
+procedure TForm1.Button2Click(Sender: TObject);
+begin
+  if OpenPictureDialog1.Execute then
+  begin
+    Image1.Picture.LoadFromFile(OpenPictureDialog1.FileName);
+    Image1.Picture.Bitmap.PixelFormat := pf24bit;
+
+    Image2.Picture.LoadFromFile(OpenPictureDialog1.FileName);
+    Image2.Picture.Bitmap.PixelFormat := pf24bit;
+
+  end
+end;
+
+function SelectedFile(var Combo: TComboBox): string;
+var
+  List: TStringList;
+  n: Integer;
+begin
+
+  List := TStringList(Combo.Tag);
+  n := Combo.ItemIndex;
+  if (n >= 0) then
+    SelectedFile := List.Strings[n]
+  else
+    SelectedFile := Combo.Text;
+end;
+
+procedure TForm1.ComboBoxIntentChange(Sender: TObject);
+begin
+   ScrollBar1.Enabled := (ComboBoxIntent.itemIndex = 3);
+end;
+
+function TForm1.ComputeFlags: DWORD;
+var
+  dwFlags: DWORD;
+begin
+  dwFlags := 0;
+  if (WBCompensation.Checked) then
+  begin
+    dwFlags := dwFlags OR cmsFLAGS_BLACKPOINTCOMPENSATION
+  end;
+
+  if (NoTransform.Checked) then
+  begin
+    dwFlags := dwFlags OR cmsFLAGS_NULLTRANSFORM
+  end;
+
+  case RadioGroup1.ItemIndex of
+    0:
+      dwFlags := dwFlags OR cmsFLAGS_NOOPTIMIZE;
+    1:
+      dwFlags := dwFlags OR cmsFLAGS_HIGHRESPRECALC;
+    3:
+      dwFlags := dwFlags OR cmsFLAGS_LOWRESPRECALC;
+  end;
+
+  ComputeFlags := dwFlags
+end;
+
+procedure TForm1.Button1Click(Sender: TObject);
+var
+  Source, Dest: String;
+  hSrc, hDest: cmsHPROFILE;
+  xform: cmsHTRANSFORM;
+  i, PicW, PicH: Integer;
+  Intent: Integer;
+  dwFlags: DWORD;
+begin
+
+  Source := SelectedFile(ComboBoxInput);
+  Dest := SelectedFile(ComboBoxOutput);
+
+  dwFlags := ComputeFlags;
+
+  Intent := IntentCodes[ComboBoxIntent.ItemIndex];
+
+  cmsSetAdaptationState(  ScrollBar1.Position / 100.0 );
+
+  if (Source <> '') AND (Dest <> '') then
+  begin
+    hSrc := cmsOpenProfileFromFile(PAnsiChar(AnsiString(Source)), 'r');
+    hDest := cmsOpenProfileFromFile(PAnsiChar(AnsiString(Dest)), 'r');
+
+    if (hSrc <> Nil) and (hDest <> Nil) then
+    begin
+      xform := cmsCreateTransform(hSrc, TYPE_BGR_8, hDest, TYPE_BGR_8, Intent,
+        dwFlags);
+    end
+    else
+    begin
+      xform := nil;
+    end;
+
+    if hSrc <> nil then
+    begin
+      cmsCloseProfile(hSrc);
+    end;
+
+    if hDest <> Nil then
+    begin
+      cmsCloseProfile(hDest);
+    end;
+
+    if (xform <> nil) then
+    begin
+
+      PicW := Image2.Picture.width;
+      PicH := Image2.Picture.height;
+      ProgressBar1.Min := 0;
+      ProgressBar1.Max := PicH;
+      ProgressBar1.Step := 1;
+
+      for i := 0 TO (PicH - 1) do
+      begin
+        if ((i MOD 100) = 0) then
+          ProgressBar1.Position := i;
+
+        cmsDoTransform(xform, Image1.Picture.Bitmap.Scanline[i],
+          Image2.Picture.Bitmap.Scanline[i], PicW);
+
+      end;
+      ProgressBar1.Position := PicH;
+
+      cmsDeleteTransform(xform);
+
+    end;
+
+    Image2.Repaint;
+    ProgressBar1.Position := 0;
+  end
+end;
+
+procedure TForm1.Button3Click(Sender: TObject);
+begin
+  if OpenDialog1.Execute then
+    ComboBoxInput.Text := OpenDialog1.FileName;
+end;
+
+procedure TForm1.Button4Click(Sender: TObject);
+begin
+  if OpenDialog1.Execute then
+    ComboBoxOutput.Text := OpenDialog1.FileName;
+end;
+
+end.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/lcms2dll.pas b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/lcms2dll.pas
new file mode 100755
index 0000000000..6a957f5fa6
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/delphi/lcms2dll.pas
@@ -0,0 +1,2149 @@
+//
+//  Little cms DELPHI wrapper
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2014 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+// Version 2.6
+//
+
+UNIT lcms2dll;
+
+{$IFDEF FPC}
+  {$MODE Delphi}
+{$ENDIF}
+
+INTERFACE
+
+{$IFNDEF MSWINDOWS}
+   USES LCLType, types;
+   Type PWChar = PWideChar;
+{$ELSE}
+   USES Windows;
+{$ENDIF}
+
+ CONST
+
+  LCMS2_SO = {$IFDEF DARWIN} 'liblcms2.2.dylib'; {$ELSE} 'lcms2.dll'; {$ENDIF}
+
+ TYPE
+
+  Uint8   = Byte;
+  Int8    = Shortint;
+  UInt16  = Word;
+  Int16   = Smallint;
+  UInt32  = LongWord;
+  Int32   = Longint;
+
+ TYPE
+     cmsUInt8Number   = Uint8;
+     cmsInt8Number    = Int8;
+     cmsUInt16Number  = UInt16;
+     cmsInt16Number   = Int16;
+
+     cmsUInt32Number  = UInt32;
+     cmsInt32Number   = Int32;
+     cmsInt64Number   = Int64;
+     cmsUInt64Number  = UInt64;
+
+     cmsFloat32Number = Single;
+     cmsFloat64Number = Double;
+
+     LPcmsUInt8Number    = ^cmsUInt8Number;
+     LPcmsInt8Number     = ^cmsInt8Number;
+     LPcmsUInt16Number   = ^cmsUInt16Number;
+     LPcmsInt16Number    = ^cmsInt16Number;
+
+     LPcmsUInt32Number   = ^cmsUInt32Number;
+     LPcmsInt32Number    = ^cmsInt32Number;
+     LPcmsInt64Number    = ^cmsInt64Number;
+     LPcmsUInt64Number   = ^cmsUInt64Number;
+
+     LPcmsFloat32Number  = ^cmsFloat32Number;
+     LPcmsFloat64Number  = ^cmsFloat64Number;
+
+
+     // Derivative types
+     cmsSignature        = cmsUInt32Number;
+     cmsU8Fixed8Number   = cmsUInt16Number;
+     cmsS15Fixed16Number = cmsInt32Number;
+     cmsU16Fixed16Number = cmsUInt32Number;
+
+     // Boolean type, which will be using the native integer
+     cmsBool = Boolean;
+
+ CONST
+
+    // Some common definitions
+    cmsMAX_PATH     = 256;
+
+    // D50 XYZ normalized to Y=1.0
+    cmsD50X             = 0.9642;
+    cmsD50Y             = 1.0;
+    cmsD50Z             = 0.8249;
+
+    // V4 perceptual black
+    cmsPERCEPTUAL_BLACK_X  = 0.00336;
+    cmsPERCEPTUAL_BLACK_Y  = 0.0034731;
+    cmsPERCEPTUAL_BLACK_Z  = 0.00287;
+
+    // Definitions in ICC spec
+    cmsMagicNumber      = $61637370;     // 'acsp'
+    lcmsSignature       = $6c636d73;     // 'lcms'
+
+
+TYPE
+
+// Base ICC type definitions
+cmsTagTypeSignature = (
+  cmsSigChromaticityType                  = $6368726D,  // 'chrm'
+  cmsSigColorantOrderType                 = $636C726F,  // 'clro'
+  cmsSigColorantTableType                 = $636C7274,  // 'clrt'
+  cmsSigCrdInfoType                       = $63726469,  // 'crdi'
+  cmsSigCurveType                         = $63757276,  // 'curv'
+  cmsSigDataType                          = $64617461,  // 'data'
+  cmsSigDictType                          = $64696374,  // 'dict'
+  cmsSigDateTimeType                      = $6474696D,  // 'dtim'
+  cmsSigDeviceSettingsType                = $64657673,  // 'devs'
+  cmsSigLut16Type                         = $6d667432,  // 'mft2'
+  cmsSigLut8Type                          = $6d667431,  // 'mft1'
+  cmsSigLutAtoBType                       = $6d414220,  // 'mAB '
+  cmsSigLutBtoAType                       = $6d424120,  // 'mBA '
+  cmsSigMeasurementType                   = $6D656173,  // 'meas'
+  cmsSigMultiLocalizedUnicodeType         = $6D6C7563,  // 'mluc'
+  cmsSigMultiProcessElementType           = $6D706574,  // 'mpet'
+  cmsSigNamedColorType                    = $6E636f6C,  // 'ncol' -- DEPRECATED!
+  cmsSigNamedColor2Type                   = $6E636C32,  // 'ncl2'
+  cmsSigParametricCurveType               = $70617261,  // 'para'
+  cmsSigProfileSequenceDescType           = $70736571,  // 'pseq'
+  cmsSigProfileSequenceIdType             = $70736964,  // 'psid'
+  cmsSigResponseCurveSet16Type            = $72637332,  // 'rcs2'
+  cmsSigS15Fixed16ArrayType               = $73663332,  // 'sf32'
+  cmsSigScreeningType                     = $7363726E,  // 'scrn'
+  cmsSigSignatureType                     = $73696720,  // 'sig '
+  cmsSigTextType                          = $74657874,  // 'text'
+  cmsSigTextDescriptionType               = $64657363,  // 'desc'
+  cmsSigU16Fixed16ArrayType               = $75663332,  // 'uf32'
+  cmsSigUcrBgType                         = $62666420,  // 'bfd '
+  cmsSigUInt16ArrayType                   = $75693136,  // 'ui16'
+  cmsSigUInt32ArrayType                   = $75693332,  // 'ui32'
+  cmsSigUInt64ArrayType                   = $75693634,  // 'ui64'
+  cmsSigUInt8ArrayType                    = $75693038,  // 'ui08'
+  cmsSigViewingConditionsType             = $76696577,  // 'view'
+  cmsSigXYZType                           = $58595A20,  // 'XYZ '
+  cmsSigVcgtType                          = $76636774   // 'vcgt'
+  );
+
+// Base ICC tag definitions
+cmsTagSignature = (
+    cmsSigAToB0Tag                          = $41324230,  // 'A2B0'
+    cmsSigAToB1Tag                          = $41324231,  // 'A2B1'
+    cmsSigAToB2Tag                          = $41324232,  // 'A2B2'
+    cmsSigBlueColorantTag                   = $6258595A,  // 'bXYZ'
+    cmsSigBlueMatrixColumnTag               = $6258595A,  // 'bXYZ'
+    cmsSigBlueTRCTag                        = $62545243,  // 'bTRC'
+    cmsSigBToA0Tag                          = $42324130,  // 'B2A0'
+    cmsSigBToA1Tag                          = $42324131,  // 'B2A1'
+    cmsSigBToA2Tag                          = $42324132,  // 'B2A2'
+    cmsSigCalibrationDateTimeTag            = $63616C74,  // 'calt'
+    cmsSigCharTargetTag                     = $74617267,  // 'targ'
+    cmsSigChromaticAdaptationTag            = $63686164,  // 'chad'
+    cmsSigChromaticityTag                   = $6368726D,  // 'chrm'
+    cmsSigColorantOrderTag                  = $636C726F,  // 'clro'
+    cmsSigColorantTableTag                  = $636C7274,  // 'clrt'
+    cmsSigColorantTableOutTag               = $636C6F74,  // 'clot'
+    cmsSigColorimetricIntentImageStateTag   = $63696973,  // 'ciis'
+    cmsSigCopyrightTag                      = $63707274,  // 'cprt'
+    cmsSigCrdInfoTag                        = $63726469,  // 'crdi'
+    cmsSigDataTag                           = $64617461,  // 'data'
+    cmsSigDateTimeTag                       = $6474696D,  // 'dtim'
+    cmsSigDeviceMfgDescTag                  = $646D6E64,  // 'dmnd'
+    cmsSigDeviceModelDescTag                = $646D6464,  // 'dmdd'
+    cmsSigDeviceSettingsTag                 = $64657673,  // 'devs'
+    cmsSigDToB0Tag                          = $44324230,  // 'D2B0'
+    cmsSigDToB1Tag                          = $44324231,  // 'D2B1'
+    cmsSigDToB2Tag                          = $44324232,  // 'D2B2'
+    cmsSigDToB3Tag                          = $44324233,  // 'D2B3'
+    cmsSigBToD0Tag                          = $42324430,  // 'B2D0'
+    cmsSigBToD1Tag                          = $42324431,  // 'B2D1'
+    cmsSigBToD2Tag                          = $42324432,  // 'B2D2'
+    cmsSigBToD3Tag                          = $42324433,  // 'B2D3'
+    cmsSigGamutTag                          = $67616D74,  // 'gamt'
+    cmsSigGrayTRCTag                        = $6b545243,  // 'kTRC'
+    cmsSigGreenColorantTag                  = $6758595A,  // 'gXYZ'
+    cmsSigGreenMatrixColumnTag              = $6758595A,  // 'gXYZ'
+    cmsSigGreenTRCTag                       = $67545243,  // 'gTRC'
+    cmsSigLuminanceTag                      = $6C756d69,  // 'lumi'
+    cmsSigMeasurementTag                    = $6D656173,  // 'meas'
+    cmsSigMediaBlackPointTag                = $626B7074,  // 'bkpt'
+    cmsSigMediaWhitePointTag                = $77747074,  // 'wtpt'
+    cmsSigNamedColorTag                     = $6E636f6C,  // 'ncol' // Deprecated by the ICC
+    cmsSigNamedColor2Tag                    = $6E636C32,  // 'ncl2'
+    cmsSigOutputResponseTag                 = $72657370,  // 'resp'
+    cmsSigPerceptualRenderingIntentGamutTag = $72696730,  // 'rig0'
+    cmsSigPreview0Tag                       = $70726530,  // 'pre0'
+    cmsSigPreview1Tag                       = $70726531,  // 'pre1'
+    cmsSigPreview2Tag                       = $70726532,  // 'pre2'
+    cmsSigProfileDescriptionTag             = $64657363,  // 'desc'
+    cmsSigProfileSequenceDescTag            = $70736571,  // 'pseq'
+    cmsSigProfileSequenceIdTag              = $70736964,  // 'psid'
+    cmsSigPs2CRD0Tag                        = $70736430,  // 'psd0'
+    cmsSigPs2CRD1Tag                        = $70736431,  // 'psd1'
+    cmsSigPs2CRD2Tag                        = $70736432,  // 'psd2'
+    cmsSigPs2CRD3Tag                        = $70736433,  // 'psd3'
+    cmsSigPs2CSATag                         = $70733273,  // 'ps2s'
+    cmsSigPs2RenderingIntentTag             = $70733269,  // 'ps2i'
+    cmsSigRedColorantTag                    = $7258595A,  // 'rXYZ'
+    cmsSigRedMatrixColumnTag                = $7258595A,  // 'rXYZ'
+    cmsSigRedTRCTag                         = $72545243,  // 'rTRC'
+    cmsSigSaturationRenderingIntentGamutTag = $72696732,  // 'rig2'
+    cmsSigScreeningDescTag                  = $73637264,  // 'scrd'
+    cmsSigScreeningTag                      = $7363726E,  // 'scrn'
+    cmsSigTechnologyTag                     = $74656368,  // 'tech'
+    cmsSigUcrBgTag                          = $62666420,  // 'bfd '
+    cmsSigViewingCondDescTag                = $76756564,  // 'vued'
+    cmsSigViewingConditionsTag              = $76696577,  // 'view'
+    cmsSigVcgtTag                           = $76636774,  // 'vcgt'
+    cmsSigMetaTag                           = $6D657461   // 'meta'
+);
+
+// ICC Technology tag
+cmsTechnologySignature = (
+    cmsSigDigitalCamera                     = $6463616D,  // 'dcam'
+    cmsSigFilmScanner                       = $6673636E,  // 'fscn'
+    cmsSigReflectiveScanner                 = $7273636E,  // 'rscn'
+    cmsSigInkJetPrinter                     = $696A6574,  // 'ijet'
+    cmsSigThermalWaxPrinter                 = $74776178,  // 'twax'
+    cmsSigElectrophotographicPrinter        = $6570686F,  // 'epho'
+    cmsSigElectrostaticPrinter              = $65737461,  // 'esta'
+    cmsSigDyeSublimationPrinter             = $64737562,  // 'dsub'
+    cmsSigPhotographicPaperPrinter          = $7270686F,  // 'rpho'
+    cmsSigFilmWriter                        = $6670726E,  // 'fprn'
+    cmsSigVideoMonitor                      = $7669646D,  // 'vidm'
+    cmsSigVideoCamera                       = $76696463,  // 'vidc'
+    cmsSigProjectionTelevision              = $706A7476,  // 'pjtv'
+    cmsSigCRTDisplay                        = $43525420,  // 'CRT '
+    cmsSigPMDisplay                         = $504D4420,  // 'PMD '
+    cmsSigAMDisplay                         = $414D4420,  // 'AMD '
+    cmsSigPhotoCD                           = $4B504344,  // 'KPCD'
+    cmsSigPhotoImageSetter                  = $696D6773,  // 'imgs'
+    cmsSigGravure                           = $67726176,  // 'grav'
+    cmsSigOffsetLithography                 = $6F666673,  // 'offs'
+    cmsSigSilkscreen                        = $73696C6B,  // 'silk'
+    cmsSigFlexography                       = $666C6578,  // 'flex'
+    cmsSigMotionPictureFilmScanner          = $6D706673,  // 'mpfs'
+    cmsSigMotionPictureFilmRecorder         = $6D706672,  // 'mpfr'
+    cmsSigDigitalMotionPictureCamera        = $646D7063,  // 'dmpc'
+    cmsSigDigitalCinemaProjector            = $64636A70   // 'dcpj'
+);
+
+
+// ICC Color spaces
+cmsColorSpaceSignature = (
+    cmsSigXYZData                           = $58595A20,  // 'XYZ '
+    cmsSigLabData                           = $4C616220,  // 'Lab '
+    cmsSigLuvData                           = $4C757620,  // 'Luv '
+    cmsSigYCbCrData                         = $59436272,  // 'YCbr'
+    cmsSigYxyData                           = $59787920,  // 'Yxy '
+    cmsSigRgbData                           = $52474220,  // 'RGB '
+    cmsSigGrayData                          = $47524159,  // 'GRAY'
+    cmsSigHsvData                           = $48535620,  // 'HSV '
+    cmsSigHlsData                           = $484C5320,  // 'HLS '
+    cmsSigCmykData                          = $434D594B,  // 'CMYK'
+    cmsSigCmyData                           = $434D5920,  // 'CMY '
+    cmsSigMCH1Data                          = $4D434831,  // 'MCH1'
+    cmsSigMCH2Data                          = $4D434832,  // 'MCH2'
+    cmsSigMCH3Data                          = $4D434833,  // 'MCH3'
+    cmsSigMCH4Data                          = $4D434834,  // 'MCH4'
+    cmsSigMCH5Data                          = $4D434835,  // 'MCH5'
+    cmsSigMCH6Data                          = $4D434836,  // 'MCH6'
+    cmsSigMCH7Data                          = $4D434837,  // 'MCH7'
+    cmsSigMCH8Data                          = $4D434838,  // 'MCH8'
+    cmsSigMCH9Data                          = $4D434839,  // 'MCH9'
+    cmsSigMCHAData                          = $4D43483A,  // 'MCHA'
+    cmsSigMCHBData                          = $4D43483B,  // 'MCHB'
+    cmsSigMCHCData                          = $4D43483C,  // 'MCHC'
+    cmsSigMCHDData                          = $4D43483D,  // 'MCHD'
+    cmsSigMCHEData                          = $4D43483E,  // 'MCHE'
+    cmsSigMCHFData                          = $4D43483F,  // 'MCHF'
+    cmsSigNamedData                         = $6e6d636c,  // 'nmcl'
+    cmsSig1colorData                        = $31434C52,  // '1CLR'
+    cmsSig2colorData                        = $32434C52,  // '2CLR'
+    cmsSig3colorData                        = $33434C52,  // '3CLR'
+    cmsSig4colorData                        = $34434C52,  // '4CLR'
+    cmsSig5colorData                        = $35434C52,  // '5CLR'
+    cmsSig6colorData                        = $36434C52,  // '6CLR'
+    cmsSig7colorData                        = $37434C52,  // '7CLR'
+    cmsSig8colorData                        = $38434C52,  // '8CLR'
+    cmsSig9colorData                        = $39434C52,  // '9CLR'
+    cmsSig10colorData                       = $41434C52,  // 'ACLR'
+    cmsSig11colorData                       = $42434C52,  // 'BCLR'
+    cmsSig12colorData                       = $43434C52,  // 'CCLR'
+    cmsSig13colorData                       = $44434C52,  // 'DCLR'
+    cmsSig14colorData                       = $45434C52,  // 'ECLR'
+    cmsSig15colorData                       = $46434C52,  // 'FCLR'
+    cmsSigLuvKData                          = $4C75764B   // 'LuvK'
+);
+
+// ICC Profile Class
+cmsProfileClassSignature = (
+    cmsSigInputClass                        = $73636E72,  // 'scnr'
+    cmsSigDisplayClass                      = $6D6E7472,  // 'mntr'
+    cmsSigOutputClass                       = $70727472,  // 'prtr'
+    cmsSigLinkClass                         = $6C696E6B,  // 'link'
+    cmsSigAbstractClass                     = $61627374,  // 'abst'
+    cmsSigColorSpaceClass                   = $73706163,  // 'spac'
+    cmsSigNamedColorClass                   = $6e6d636c   // 'nmcl'
+);
+
+
+// ICC Platforms
+cmsPlatformSignature = (
+    cmsSigMacintosh                         = $4150504C,  // 'APPL'
+    cmsSigMicrosoft                         = $4D534654,  // 'MSFT'
+    cmsSigSolaris                           = $53554E57,  // 'SUNW'
+    cmsSigSGI                               = $53474920,  // 'SGI '
+    cmsSigTaligent                          = $54474E54,  // 'TGNT'
+    cmsSigUnices                            = $2A6E6978   // '*nix'   // From argyll -- Not official
+);
+
+CONST
+
+    // Reference gamut
+    cmsSigPerceptualReferenceMediumGamut         = $70726d67;  //'prmg'
+
+    // For cmsSigColorimetricIntentImageStateTag
+    cmsSigSceneColorimetryEstimates              = $73636F65;  //'scoe'
+    cmsSigSceneAppearanceEstimates               = $73617065;  //'sape'
+    cmsSigFocalPlaneColorimetryEstimates         = $66706365;  //'fpce'
+    cmsSigReflectionHardcopyOriginalColorimetry  = $72686F63;  //'rhoc'
+    cmsSigReflectionPrintOutputColorimetry       = $72706F63;  //'rpoc'
+
+TYPE
+
+// Multi process elements types
+cmsStageSignature = (
+    cmsSigCurveSetElemType              = $63767374,  //'cvst'
+    cmsSigMatrixElemType                = $6D617466,  //'matf'
+    cmsSigCLutElemType                  = $636C7574,  //'clut'
+
+    cmsSigBAcsElemType                  = $62414353,  // 'bACS'
+    cmsSigEAcsElemType                  = $65414353,  // 'eACS'
+
+    // Custom from here, not in the ICC Spec
+    cmsSigXYZ2LabElemType               = $6C327820,  // 'l2x '
+    cmsSigLab2XYZElemType               = $78326C20,  // 'x2l '
+    cmsSigNamedColorElemType            = $6E636C20,  // 'ncl '
+    cmsSigLabV2toV4                     = $32203420,  // '2 4 '
+    cmsSigLabV4toV2                     = $34203220,  // '4 2 '
+
+    // Identities
+    cmsSigIdentityElemType              = $69646E20   // 'idn '
+);
+
+// Types of CurveElements
+cmsCurveSegSignature = (
+
+    cmsSigFormulaCurveSeg               = $70617266, // 'parf'
+    cmsSigSampledCurveSeg               = $73616D66, // 'samf'
+    cmsSigSegmentedCurve                = $63757266  // 'curf'
+);
+
+CONST
+
+    // Used in ResponseCurveType
+    cmsSigStatusA                    = $53746141; //'StaA'
+    cmsSigStatusE                    = $53746145; //'StaE'
+    cmsSigStatusI                    = $53746149; //'StaI'
+    cmsSigStatusT                    = $53746154; //'StaT'
+    cmsSigStatusM                    = $5374614D; //'StaM'
+    cmsSigDN                         = $444E2020; //'DN  '
+    cmsSigDNP                        = $444E2050; //'DN P'
+    cmsSigDNN                        = $444E4E20; //'DNN '
+    cmsSigDNNP                       = $444E4E50; //'DNNP'
+
+    // Device attributes, currently defined values correspond to the low 4 bytes
+    // of the 8 byte attribute quantity
+    cmsReflective     = 0;
+    cmsTransparency   = 1;
+    cmsGlossy         = 0;
+    cmsMatte          = 2;
+
+TYPE
+
+// Common structures in ICC tags
+cmsICCData = PACKED RECORD
+     len  :    cmsUInt32Number;
+     flag :    cmsUInt32Number;
+     data : Array [0..1] of cmsUInt8Number;
+    END;
+
+// ICC date time
+cmsDateTimeNumber = PACKED RECORD
+    year:     cmsUInt16Number;
+    month:    cmsUInt16Number;
+    day:      cmsUInt16Number;
+    hours:    cmsUInt16Number;
+    minutes:  cmsUInt16Number;
+    seconds:  cmsUInt16Number;
+END;
+
+// ICC XYZ
+
+cmsEncodedXYZNumber = PACKED RECORD
+      X: cmsS15Fixed16Number;
+      Y: cmsS15Fixed16Number;
+      Z: cmsS15Fixed16Number;
+END;
+
+
+cmsProfileID = PACKED RECORD
+    CASE Integer OF
+    1: (ID8: Array[0..15] OF cmsUInt8Number);
+    2: (ID16: Array[0..7] OF cmsUInt16Number);
+    3: (ID32: Array[0..3] OF cmsUInt32Number);
+END;
+
+
+
+// ----------------------------------------------------------------------------------------------
+// ICC profile internal base types. Strictly, shouldn't be declared in this unit, but maybe
+// somebody want to use this info for accessing profile header directly, so here it is.
+
+// Profile header -- it is 32-bit aligned, so no issues are expected on alignment
+cmsICCHeader = PACKED RECORD
+         size:           cmsUInt32Number;          // Profile size in bytes
+         cmmId:          cmsSignature;             // CMM for this profile
+         version:        cmsUInt32Number;          // Format version number
+         deviceClass:    cmsProfileClassSignature; // Type of profile
+         colorSpace:     cmsColorSpaceSignature;   // Color space of data
+         pcs:            cmsColorSpaceSignature;   // PCS, XYZ or Lab only
+         date:           cmsDateTimeNumber;        // Date profile was created
+         magic:          cmsSignature;             // Magic Number to identify an ICC profile
+         platform:       cmsPlatformSignature;     // Primary Platform
+         flags:          cmsUInt32Number;          // Various bit settings
+         manufacturer:   cmsSignature;             // Device manufacturer
+         model:          cmsUInt32Number;          // Device model number
+         attributes:     cmsUInt64Number;          // Device attributes
+         renderingIntent:cmsUInt32Number;          // Rendering intent
+         illuminant:     cmsEncodedXYZNumber;      // Profile illuminant
+         creator:        cmsSignature;             // Profile creator
+         profileID:      cmsProfileID;             // Profile ID 
+         reserved: array [0..27] of cmsInt8Number; // Reserved for future use
+END;
+
+// ICC base tag
+cmsTagBase = PACKED RECORD
+     sig:         cmsTagTypeSignature;
+     reserved:    array[0..3] of cmsInt8Number;
+END;
+
+// A tag entry in directory
+cmsTagEntry = PACKED RECORD
+    sig:    cmsTagSignature;   // The tag signature
+    offset: cmsUInt32Number;   // Start of tag
+    size:   cmsUInt32Number;   // Size in bytes
+END;
+
+
+cmsContext    = Pointer;              // Context identifier for multithreaded environments
+cmsHANDLE     = Pointer;              // Generic handle
+cmsHPROFILE   = Pointer;              // Opaque typedefs to hide internals
+cmsHTRANSFORM = Pointer;
+
+
+CONST
+
+     cmsMAXCHANNELS  = 16;                // Maximum number of channels in ICC profiles
+
+// Format of pixel is defined by one cmsUInt32Number, using bit fields as follows
+//
+//            A O TTTTT U Y F P X S EEE CCCC BBB
+//
+//            A: Floating point -- With this flag we can differentiate 16 bits as float and as int
+//            O: Optimized -- previous optimization already returns the final 8-bit value
+//            T: Pixeltype
+//            F: Flavor  0=MinIsBlack(Chocolate) 1=MinIsWhite(Vanilla)
+//            P: Planar? 0=Chunky, 1=Planar
+//            X: swap 16 bps endianness?
+//            S: Do swap? ie, BGR, KYMC
+//            E: Extra samples
+//            C: Channels (Samples per pixel)
+//            B: bytes per sample
+//            Y: Swap first - changes ABGR to BGRA and KCMY to CMYK
+
+    FUNCTION FLOAT_SH(a: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION OPTIMIZED_SH(s: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION COLORSPACE_SH(s: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION SWAPFIRST_SH(s: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION FLAVOR_SH(s: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION PLANAR_SH(p: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION ENDIAN16_SH(e: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION DOSWAP_SH(e: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION EXTRA_SH(e: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION CHANNELS_SH(c: cmsUInt32Number):cmsUInt32Number;
+    FUNCTION BYTES_SH(b: cmsUInt32Number):cmsUInt32Number;
+
+
+    FUNCTION T_FLOAT(a: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_OPTIMIZED(o: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_COLORSPACE(s: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_SWAPFIRST(s: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_FLAVOR(s: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_PLANAR(p: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_ENDIAN16(e: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_DOSWAP(e: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_EXTRA(e: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_CHANNELS(c: cmsUInt32Number): cmsUInt32Number;
+    FUNCTION T_BYTES(b: cmsUInt32Number): cmsUInt32Number;
+
+CONST
+
+
+// Pixel types
+
+    PT_ANY     =  0;    // Don't check colorspace
+                      // 1 & 2 are reserved
+    PT_GRAY    =  3;
+    PT_RGB     =  4;
+    PT_CMY     =  5;
+    PT_CMYK    =  6;
+    PT_YCbCr   =  7;
+    PT_YUV     =  8;      // Lu'v'
+    PT_XYZ     =  9;
+    PT_Lab     =  10;
+    PT_YUVK    =  11;     // Lu'v'K
+    PT_HSV     =  12;
+    PT_HLS     =  13;
+    PT_Yxy     =  14;
+
+    PT_MCH1    =  15;
+    PT_MCH2    =  16;
+    PT_MCH3    =  17;
+    PT_MCH4    =  18;
+    PT_MCH5    =  19;
+    PT_MCH6    =  20;
+    PT_MCH7    =  21;
+    PT_MCH8    =  22;
+    PT_MCH9    =  23;
+    PT_MCH10   =  24;
+    PT_MCH11   =  25;
+    PT_MCH12   =  26;
+    PT_MCH13   =  27;
+    PT_MCH14   =  28;
+    PT_MCH15   =  29;
+
+    PT_LabV2   =  30;     // Identical to PT_Lab, but using the V2 old encoding
+
+
+    // Format descriptors
+    TYPE_GRAY_8          = $030009;
+    TYPE_GRAY_8_REV      = $032009;
+    TYPE_GRAY_16         = $03000a;
+    TYPE_GRAY_16_REV     = $03200a;
+    TYPE_GRAY_16_SE      = $03080a;
+    TYPE_GRAYA_8         = $030089;
+    TYPE_GRAYA_16        = $03008a;
+    TYPE_GRAYA_16_SE     = $03088a;
+    TYPE_GRAYA_8_PLANAR  = $031089;
+    TYPE_GRAYA_16_PLANAR = $03108a;
+    TYPE_RGB_8           = $040019;
+    TYPE_RGB_8_PLANAR    = $041019;
+    TYPE_BGR_8           = $040419;
+    TYPE_BGR_8_PLANAR    = $041419;
+    TYPE_RGB_16          = $04001a;
+    TYPE_RGB_16_PLANAR   = $04101a;
+    TYPE_RGB_16_SE       = $04081a;
+    TYPE_BGR_16          = $04041a;
+    TYPE_BGR_16_PLANAR   = $04141a;
+    TYPE_BGR_16_SE       = $040c1a;
+    TYPE_RGBA_8          = $040099;
+    TYPE_RGBA_8_PLANAR   = $041099;
+    TYPE_ARGB_8_PLANAR   = $045099;
+    TYPE_ABGR_8_PLANAR   = $041499;
+    TYPE_BGRA_8_PLANAR   = $045499;
+    TYPE_RGBA_16         = $04009a;
+    TYPE_RGBA_16_PLANAR  = $04109a;
+    TYPE_RGBA_16_SE      = $04089a;
+    TYPE_ARGB_8          = $044099;
+    TYPE_ARGB_16         = $04409a;
+    TYPE_ABGR_8          = $040499;
+    TYPE_ABGR_16         = $04049a;
+    TYPE_ABGR_16_PLANAR  = $04149a;
+    TYPE_ABGR_16_SE      = $040c9a;
+    TYPE_BGRA_8          = $044499;
+    TYPE_BGRA_16         = $04449a;
+    TYPE_BGRA_16_SE      = $04489a;
+    TYPE_CMY_8           = $050019;
+    TYPE_CMY_8_PLANAR    = $051019;
+    TYPE_CMY_16          = $05001a;
+    TYPE_CMY_16_PLANAR   = $05101a;
+    TYPE_CMY_16_SE       = $05081a;
+    TYPE_CMYK_8          = $060021;
+    TYPE_CMYKA_8         = $0600a1;
+    TYPE_CMYK_8_REV      = $062021;
+    TYPE_YUVK_8          = $062021;
+    TYPE_CMYK_8_PLANAR   = $061021;
+    TYPE_CMYK_16         = $060022;
+    TYPE_CMYK_16_REV     = $062022;
+    TYPE_YUVK_16         = $062022;
+    TYPE_CMYK_16_PLANAR  = $061022;
+    TYPE_CMYK_16_SE      = $060822;
+    TYPE_KYMC_8          = $060421;
+    TYPE_KYMC_16         = $060422;
+    TYPE_KYMC_16_SE      = $060c22;
+    TYPE_KCMY_8          = $064021;
+    TYPE_KCMY_8_REV      = $066021;
+    TYPE_KCMY_16         = $064022;
+    TYPE_KCMY_16_REV     = $066022;
+    TYPE_KCMY_16_SE      = $064822;
+    TYPE_CMYK5_8         = $130029;
+    TYPE_CMYK5_16        = $13002a;
+    TYPE_CMYK5_16_SE     = $13082a;
+    TYPE_KYMC5_8         = $130429;
+    TYPE_KYMC5_16        = $13042a;
+    TYPE_KYMC5_16_SE     = $130c2a;
+    TYPE_CMYK6_8         = $140031;
+    TYPE_CMYK6_8_PLANAR  = $141031;
+    TYPE_CMYK6_16        = $140032;
+    TYPE_CMYK6_16_PLANAR = $141032;
+    TYPE_CMYK6_16_SE     = $140832;
+    TYPE_CMYK7_8         = $150039;
+    TYPE_CMYK7_16        = $15003a;
+    TYPE_CMYK7_16_SE     = $15083a;
+    TYPE_KYMC7_8         = $150439;
+    TYPE_KYMC7_16        = $15043a;
+    TYPE_KYMC7_16_SE     = $150c3a;
+    TYPE_CMYK8_8         = $160041;
+    TYPE_CMYK8_16        = $160042;
+    TYPE_CMYK8_16_SE     = $160842;
+    TYPE_KYMC8_8         = $160441;
+    TYPE_KYMC8_16        = $160442;
+    TYPE_KYMC8_16_SE     = $160c42;
+    TYPE_CMYK9_8         = $170049;
+    TYPE_CMYK9_16        = $17004a;
+    TYPE_CMYK9_16_SE     = $17084a;
+    TYPE_KYMC9_8         = $170449;
+    TYPE_KYMC9_16        = $17044a;
+    TYPE_KYMC9_16_SE     = $170c4a;
+    TYPE_CMYK10_8        = $180051;
+    TYPE_CMYK10_16       = $180052;
+    TYPE_CMYK10_16_SE    = $180852;
+    TYPE_KYMC10_8        = $180451;
+    TYPE_KYMC10_16       = $180452;
+    TYPE_KYMC10_16_SE    = $180c52;
+    TYPE_CMYK11_8        = $190059;
+    TYPE_CMYK11_16       = $19005a;
+    TYPE_CMYK11_16_SE    = $19085a;
+    TYPE_KYMC11_8        = $190459;
+    TYPE_KYMC11_16       = $19045a;
+    TYPE_KYMC11_16_SE    = $190c5a;
+    TYPE_CMYK12_8        = $1a0061;
+    TYPE_CMYK12_16       = $1a0062;
+    TYPE_CMYK12_16_SE    = $1a0862;
+    TYPE_KYMC12_8        = $1a0461;
+    TYPE_KYMC12_16       = $1a0462;
+    TYPE_KYMC12_16_SE    = $1a0c62;
+    TYPE_XYZ_16          = $09001a;
+    TYPE_Lab_8           = $0a0019;
+    TYPE_ALab_8          = $0a0499;
+    TYPE_Lab_16          = $0a001a;
+    TYPE_Yxy_16          = $0e001a;
+    TYPE_YCbCr_8         = $070019;
+    TYPE_YCbCr_8_PLANAR  = $071019;
+    TYPE_YCbCr_16        = $07001a;
+    TYPE_YCbCr_16_PLANAR = $07101a;
+    TYPE_YCbCr_16_SE     = $07081a;
+    TYPE_YUV_8           = $080019;
+    TYPE_YUV_8_PLANAR    = $081019;
+    TYPE_YUV_16          = $08001a;
+    TYPE_YUV_16_PLANAR   = $08101a;
+    TYPE_YUV_16_SE       = $08081a;
+    TYPE_HLS_8           = $0d0019;
+    TYPE_HLS_8_PLANAR    = $0d1019;
+    TYPE_HLS_16          = $0d001a;
+    TYPE_HLS_16_PLANAR   = $0d101a;
+    TYPE_HLS_16_SE       = $0d081a;
+    TYPE_HSV_8           = $0c0019;
+    TYPE_HSV_8_PLANAR    = $0c1019;
+    TYPE_HSV_16          = $0c001a;
+    TYPE_HSV_16_PLANAR   = $0c101a;
+    TYPE_HSV_16_SE       = $0c081a;
+
+    TYPE_NAMED_COLOR_INDEX = $000A;
+
+    TYPE_XYZ_FLT         = $49001c;
+    TYPE_Lab_FLT         = $4a001c;
+    TYPE_GRAY_FLT        = $43000c;
+    TYPE_RGB_FLT         = $44001c;
+    TYPE_CMYK_FLT        = $460024;
+    TYPE_XYZA_FLT        = $49009c;
+    TYPE_LabA_FLT        = $4a009c;
+    TYPE_RGBA_FLT        = $44009c;
+
+    TYPE_XYZ_DBL         = $490018;
+    TYPE_Lab_DBL         = $4a0018;
+    TYPE_GRAY_DBL        = $430008;
+    TYPE_RGB_DBL         = $440018;
+    TYPE_CMYK_DBL        = $460020;
+    TYPE_LabV2_8         = $1e0019;
+    TYPE_ALabV2_8        = $1e0499;
+    TYPE_LabV2_16        = $1e001a;
+
+    TYPE_GRAY_HALF_FLT   = $43000a;
+    TYPE_RGB_HALF_FLT    = $44001a;
+    TYPE_RGBA_HALF_FLT   = $44009a;
+    TYPE_CMYK_HALF_FLT   = $460022;
+
+    TYPE_ARGB_HALF_FLT   = $44409a;
+    TYPE_BGR_HALF_FLT    = $44041a;
+    TYPE_BGRA_HALF_FLT   = $44449a;
+    TYPE_ABGR_HALF_FLT   = $44041a;
+
+TYPE
+
+
+  // Colorimetric spaces
+
+      cmsCIEXYZ = PACKED RECORD
+                        X, Y, Z : cmsFloat64Number;
+                    END;
+      LPcmsCIEXYZ = ^cmsCIEXYZ;
+
+      cmsCIExyY = PACKED RECORD
+                        x, y, YY : cmsFloat64Number
+                        END;
+      LPcmsCIExyY = ^cmsCIEXYY;
+
+      cmsCIELab = PACKED RECORD
+                  L, a, b: cmsFloat64Number
+                  END;
+      LPcmsCIELab = ^cmsCIELab;
+
+     cmsCIELCh = PACKED RECORD
+                  L, C, h : cmsFloat64Number
+                  END;
+     LPcmsCIELCh = ^cmsCIELCh;
+
+     cmsJCh = PACKED RECORD
+                  J, C, h : cmsFloat64Number
+                  END;
+     LPcmsJCh = ^cmsJCH;
+
+
+     cmsCIEXYZTRIPLE = PACKED RECORD
+                        Red, Green, Blue : cmsCIEXYZ
+                        END;
+     LPcmsCIEXYZTRIPLE = ^cmsCIEXYZTRIPLE;
+
+
+      cmsCIExyYTRIPLE = PACKED RECORD
+                        Red, Green, Blue : cmsCIExyY
+                        END;
+      LPcmsCIExyYTRIPLE = ^cmsCIExyYTRIPLE;
+
+
+CONST
+
+    // Illuminant types for structs below
+    cmsILLUMINANT_TYPE_UNKNOWN = $0000000;
+    cmsILLUMINANT_TYPE_D50     = $0000001;
+    cmsILLUMINANT_TYPE_D65     = $0000002;
+    cmsILLUMINANT_TYPE_D93     = $0000003;
+    cmsILLUMINANT_TYPE_F2      = $0000004;
+    cmsILLUMINANT_TYPE_D55     = $0000005;
+    cmsILLUMINANT_TYPE_A       = $0000006;
+    cmsILLUMINANT_TYPE_E       = $0000007;
+    cmsILLUMINANT_TYPE_F8      = $0000008;
+
+TYPE
+
+    cmsICCMeasurementConditions = PACKED RECORD
+
+        Observer: cmsUInt32Number;       // 0 = unknown, 1=CIE 1931, 2=CIE 1964
+        Backing:  cmsCIEXYZ;             // Value of backing
+        Geometry: cmsUInt32Number;       // 0=unknown, 1=45/0, 0/45 2=0d, d/0
+        Flare:    cmsFloat64Number;      // 0..1.0
+        IlluminantType: cmsUInt32Number;
+
+    END;
+
+   cmsICCViewingConditions = PACKED RECORD
+        IlluminantXYZ: cmsCIEXYZ;         // Not the same struct as CAM02,
+        SurroundXYZ: cmsCIEXYZ;           // This is for storing the tag
+        IlluminantType: cmsUInt32Number;  // viewing condition
+    END;
+
+
+// Context   --------------------------------------------------------------------------------------------------------------
+
+FUNCTION  cmsCreateContext(Plugin : Pointer; UserData : Pointer) : cmsContext; StdCall;
+PROCEDURE cmsDeleteContext(ContextID: cmsContext); StdCall;
+FUNCTION  cmsDupContext(ContextID: cmsContext; NewUserData: Pointer): cmsContext; StdCall;
+FUNCTION  cmsGetContextUserData(ContextID: cmsContext): Pointer;  StdCall;
+
+// Plug-In registering  ---------------------------------------------------------------------------------------------------
+
+FUNCTION  cmsPlugin(Plugin: Pointer): cmsBool; StdCall;
+PROCEDURE cmsUnregisterPlugins; StdCall;
+
+// Error logging ----------------------------------------------------------------------------------------------------------
+
+// There is no error handling at all. When a function fails, it returns proper value.
+// For example, all create functions does return NULL on failure. Other may return FALSE.
+// It may be interesting, for the developer, to know why the function is failing.
+// for that reason, lcms2 does offer a logging function. This function will get
+// an ENGLISH string with some clues on what is going wrong. You can show this
+// info to the end user if you wish, or just create some sort of log on disk.
+// The logging function should NOT terminate the program, as this obviously can leave
+// unfreed resources. It is the programmer's responsibility to check each function
+// return code to make sure it didn't fail.
+
+CONST
+
+    cmsERROR_UNDEFINED                  =  0;
+    cmsERROR_FILE                       =  1;
+    cmsERROR_RANGE                      =  2;
+    cmsERROR_INTERNAL                   =  3;
+    cmsERROR_NULL                       =  4;
+    cmsERROR_READ                       =  5;
+    cmsERROR_SEEK                       =  6;
+    cmsERROR_WRITE                      =  7;
+    cmsERROR_UNKNOWN_EXTENSION          =  8;
+    cmsERROR_COLORSPACE_CHECK           =  9;
+    cmsERROR_ALREADY_DEFINED            =  10;
+    cmsERROR_BAD_SIGNATURE              =  11;
+    cmsERROR_CORRUPTION_DETECTED        =  12;
+    cmsERROR_NOT_SUITABLE               =  13;
+
+// Error logger is called with the ContextID when a message is raised. This gives the
+// chance to know which thread is responsible of the warning and any environment associated
+// with it. Non-multithreading applications may safely ignore this parameter.
+// Note that under certain special circumstances, ContextID may be NULL.
+
+TYPE
+
+    cmsLogErrorHandlerFunction = PROCEDURE( ContextID: cmsContext; ErrorCode: cmsUInt32Number; Text: PAnsiChar); CDecl;
+
+    // Allows user to set any specific logger
+    PROCEDURE cmsSetLogErrorHandler(Fn: cmsLogErrorHandlerFunction); StdCall;
+
+
+// Conversions --------------------------------------------------------------------------------------------------------------
+
+
+// Returns pointers to constant structs
+FUNCTION cmsD50_XYZ: LPcmsCIEXYZ; StdCall;
+FUNCTION cmsD50_xyY: LPcmsCIExyY; StdCall;
+
+// Colorimetric space conversions
+PROCEDURE cmsXYZ2xyY(Dest: LPcmsCIExyY; Source: LPcmsCIEXYZ); StdCall;
+PROCEDURE cmsxyY2XYZ(Dest: LPcmsCIEXYZ; Source: LPcmsCIExyY); StdCall;
+PROCEDURE cmsLab2XYZ(WhitePoint: LPcmsCIEXYZ; xyz: LPcmsCIEXYZ; Lab: LPcmsCIELab); StdCall;
+PROCEDURE cmsXYZ2Lab(WhitePoint: LPcmsCIEXYZ; Lab: LPcmsCIELab; xyz: LPcmsCIEXYZ); StdCall;
+PROCEDURE cmsLab2LCh(LCh: LPcmsCIELCh; Lab: LPcmsCIELab); StdCall;
+PROCEDURE cmsLCh2Lab(Lab: LPcmsCIELab; LCh: LPcmsCIELCh); StdCall;
+
+// Encoding /Decoding on PCS
+PROCEDURE cmsLabEncoded2Float(Lab: LPcmsCIELab; wLab: Pointer); StdCall;
+PROCEDURE cmsLabEncoded2FloatV2(Lab: LPcmsCIELab; wLab: Pointer); StdCall;
+PROCEDURE cmsFloat2LabEncoded(wLab: Pointer; Lab: LPcmsCIELab); StdCall;
+PROCEDURE cmsFloat2LabEncodedV2(wLab: Pointer; Lab: LPcmsCIELab); StdCall;
+PROCEDURE cmsXYZEncoded2Float(fxyz : LPcmsCIEXYZ; XYZ: Pointer); StdCall;
+PROCEDURE cmsFloat2XYZEncoded(XYZ: Pointer; fXYZ: LPcmsCIEXYZ); StdCall;
+
+
+// DeltaE metrics
+FUNCTION cmsDeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall;
+FUNCTION cmsCIE94DeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall;
+FUNCTION cmsBFDdeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall;
+FUNCTION cmsCMCdeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall;
+FUNCTION cmsCIE2000DeltaE(Lab1, Lab2: LPcmsCIELab; Kl, Kc, Kh: Double): Double; StdCall;
+
+
+// Temperature <-> Chromaticity (Black body)
+FUNCTION  cmsWhitePointFromTemp(var WhitePoint: cmsCIExyY; TempK: cmsFloat64Number) : cmsBool; StdCall;
+FUNCTION  cmsTempFromWhitePoint(var TeampK: cmsFloat64Number; var WhitePoint: cmsCIExyY) : cmsBool; StdCall;
+
+
+// Chromatic adaptation
+FUNCTION cmsAdaptToIlluminant(Result: LPcmsCIEXYZ; SourceWhitePt: LPcmsCIEXYZ;
+                              Illuminant: LPcmsCIEXYZ; Value: LPcmsCIEXYZ): cmsBool; StdCall;
+
+
+// CIECAM02 ---------------------------------------------------------------------------------------------------
+
+// Viewing conditions. Please note those are CAM model viewing conditions, and not the ICC tag viewing
+// conditions, which I'm naming cmsICCViewingConditions to make differences evident. Unfortunately, the tag
+// cannot deal with surround La, Yb and D value so is basically useless to store CAM02 viewing conditions.
+
+ CONST
+
+    AVG_SURROUND       = 1;
+    DIM_SURROUND       = 2;
+    DARK_SURROUND      = 3;
+    CUTSHEET_SURROUND  = 4;
+
+    D_CALCULATE        = -1;
+
+  TYPE
+
+    cmsViewingConditions = PACKED RECORD
+
+                WhitePoint: cmsCIEXYZ;
+                Yb        : cmsFloat64Number;
+                La        : cmsFloat64Number;
+                surround  : Integer;
+                D_value   : cmsFloat64Number
+              END;
+
+
+    LPcmsViewingConditions = ^cmsViewingConditions;
+
+FUNCTION    cmsCIECAM02Init(pVC : LPcmsViewingConditions ) : Pointer; StdCall;
+PROCEDURE   cmsCIECAM02Done(hModel : Pointer); StdCall;
+PROCEDURE   cmsCIECAM02Forward(hModel: Pointer; pIn: LPcmsCIEXYZ; pOut: LPcmsJCh ); StdCall;
+PROCEDURE   cmsCIECAM02Reverse(hModel: Pointer; pIn: LPcmsJCh;   pOut: LPcmsCIEXYZ ); StdCall;
+
+// Tone curves -----------------------------------------------------------------------------------------
+
+// This describes a curve segment. For a table of supported types, see the manual. User can increase the number of
+// available types by using a proper plug-in. Parametric segments allow 10 parameters at most
+
+TYPE
+cmsCurveSegment = PACKED RECORD
+       x0, x1: cmsFloat32Number;                       // Domain; for x0 < x <= x1
+         PType: cmsInt32Number;                        // Parametric type, Type == 0 means sampled segment. Negative values are reserved
+       Params: array [0..9] of cmsFloat64Number;       // Parameters if Type != 0
+    nGridPoints: cmsUInt32Number;                      // Number of grid points if Type == 0
+    SampledPoints: LPcmsFloat32Number;                 // Points to an array of floats if Type == 0
+END;
+
+LPcmsToneCurve = Pointer;
+LPcmsCurveSegmentArray = ^cmsCurveSegmentArray;
+cmsCurveSegmentArray = array[0..0] of cmsCurveSegment;
+
+LPcmsFloat64NumberArray = ^cmsFloat64NumberArray;
+cmsFloat64NumberArray = array[0..0] of cmsFloat64Number;
+
+LPcmsUInt16NumberArray = ^cmsUInt16NumberArray;
+cmsUInt16NumberArray = array[0..0] of cmsUInt16Number;
+
+LPcmsFloat32NumberArray = ^cmsFloat32NumberArray;
+cmsFloat32NumberArray = array[0..0] of cmsFloat32Number;
+
+LPLPcmsToneCurveArray = ^LPcmsToneCurveArray;
+LPcmsToneCurveArray = array[0..0] of LPcmsToneCurve;
+
+LPcmsUInt32NumberArray = ^cmsUInt32NumberArray;
+cmsUInt32NumberArray = array[0..0] of cmsUInt32Number;
+
+FUNCTION  cmsBuildSegmentedToneCurve(ContextID: cmsContext; nSegments: cmsInt32Number; Segments: LPcmsCurveSegmentArray): LPcmsToneCurve; StdCall;
+FUNCTION  cmsBuildParametricToneCurve(ContextID: cmsContext;  CType: cmsInt32Number; Params: LPcmsFloat64NumberArray): LPcmsToneCurve; StdCall;
+FUNCTION  cmsBuildGamma(ContextID: cmsContext; Gamma: cmsFloat64Number): LPcmsToneCurve; StdCall;
+FUNCTION  cmsBuildTabulatedToneCurve16(ContextID: cmsContext; nEntries: cmsInt32Number; values: LPcmsUInt16NumberArray): LPcmsToneCurve; StdCall;
+FUNCTION  cmsBuildTabulatedToneCurveFloat(ContextID: cmsContext; nEntries: cmsUInt32Number; values: LPcmsFloat32NumberArray): LPcmsToneCurve; StdCall;
+PROCEDURE cmsFreeToneCurve(Curve: LPcmsToneCurve); StdCall;
+PROCEDURE cmsFreeToneCurveTriple(Curve: LPLPcmsToneCurveArray); StdCall;
+FUNCTION  cmsDupToneCurve(Src: LPcmsToneCurve): LPcmsToneCurve; StdCall;
+FUNCTION  cmsReverseToneCurve(InGamma: LPcmsToneCurve): LPcmsToneCurve; StdCall;
+FUNCTION  cmsReverseToneCurveEx(nResultSamples: cmsInt32Number; InGamma: LPcmsToneCurve): LPcmsToneCurve; StdCall;
+FUNCTION  cmsJoinToneCurve(ContextID: cmsContext; X, Y: LPcmsToneCurve; nPoints: cmsUInt32Number ): LPcmsToneCurve; StdCall;
+FUNCTION  cmsSmoothToneCurve(Tab: LPcmsToneCurve; lambda: cmsFloat64Number): cmsBool; StdCall;
+FUNCTION  cmsEvalToneCurveFloat(Curve: LPcmsToneCurve; v: cmsFloat32Number):cmsFloat32Number; StdCall;
+FUNCTION  cmsEvalToneCurve16(Curve: LPcmsToneCurve; v:cmsUInt16Number):cmsUInt16Number; StdCall;
+FUNCTION  cmsIsToneCurveMultisegment(InGamma: LPcmsToneCurve):cmsBool; StdCall;
+FUNCTION  cmsIsToneCurveLinear(Curve: LPcmsToneCurve):cmsBool; StdCall;
+FUNCTION  cmsIsToneCurveMonotonic(t: LPcmsToneCurve):cmsBool; StdCall;
+FUNCTION  cmsIsToneCurveDescending(t: LPcmsToneCurve):cmsBool; StdCall;
+FUNCTION  cmsGetToneCurveParametricType(t: LPcmsToneCurve):cmsInt32Number; StdCall;
+FUNCTION  cmsEstimateGamma(t: LPcmsToneCurve; Precision:cmsFloat64Number):cmsFloat64Number; StdCall;
+FUNCTION  cmsGetToneCurveEstimatedTableEntries(t: LPcmsToneCurve): cmsUInt32Number; StdCall;
+FUNCTION  cmsGetToneCurveEstimatedTable(t: LPcmsToneCurve): LPcmsUInt16Number; StdCall;
+
+
+// Implements pipelines of multi-processing elements -------------------------------------------------------------
+
+TYPE
+    LPcmsPipeline = Pointer;
+    LPcmsStage    = Pointer;
+    LPLPcmsStage   = ^LPcmsStage;
+
+// Those are hi-level pipelines
+FUNCTION  cmsPipelineAlloc(ContextID: cmsContext; InputChannels, OutputChannels: cmsUInt32Number): LPcmsPipeline; StdCall;
+PROCEDURE cmsPipelineFree(lut: LPcmsPipeline); StdCall;
+FUNCTION  cmsPipelineDup(Orig: LPcmsPipeline): LPcmsPipeline; StdCall;
+FUNCTION  cmsGetPipelineContextID(lut: LPcmsPipeline) : cmsContext; StdCall;
+FUNCTION  cmsPipelineInputChannels(lut: LPcmsPipeline): cmsUInt32Number; StdCall;
+FUNCTION  cmsPipelineOutputChannels(lut: LPcmsPipeline): cmsUInt32Number; StdCall;
+
+FUNCTION cmsPipelineStageCount(lut: LPcmsPipeline): cmsUInt32Number; StdCall;
+FUNCTION cmsPipelineGetPtrToFirstStage(lut: LPcmsPipeline): LPcmsStage; StdCall;
+FUNCTION cmsPipelineGetPtrToLastStage(lut: LPcmsPipeline): LPcmsStage; StdCall;
+
+PROCEDURE cmsPipelineEval16(Inv, Outv: LPcmsUInt16NumberArray; lut: LPcmsPipeline); StdCall;
+PROCEDURE cmsPipelineEvalFloat(Inv, Outv: LPcmsFloat32NumberArray; lut: LPcmsPipeline); StdCall;
+
+FUNCTION cmsPipelineEvalReverseFloat(Target, Result, Hint: LPcmsFloat32NumberArray; lut: LPcmsPipeline): cmsBool; StdCall;
+FUNCTION cmsPipelineCat(l1, l2: LPcmsPipeline): cmsBool; StdCall;
+FUNCTION cmsPipelineSetSaveAs8bitsFlag(lut: LPcmsPipeline; On: cmsBool): cmsBool; StdCall;
+
+// Where to place/locate the stages in the pipeline chain
+TYPE
+    cmsStageLoc = (cmsAT_BEGIN = 0, cmsAT_END = 1 );
+
+PROCEDURE cmsPipelineInsertStage(lut: LPcmsPipeline; loc: cmsStageLoc; mpe: LPcmsStage); StdCall;
+PROCEDURE cmsPipelineUnlinkStage(lut: LPcmsPipeline; loc: cmsStageLoc; mpe: LPLPcmsStage); StdCall;
+
+// This function is quite useful to analyze the structure of a Pipeline and retrieve the Stage elements
+// that conform the Pipeline. It should be called with the Pipeline, the number of expected elements and
+// then a list of expected types followed with a list of double pointers to Stage elements. If
+// the function founds a match with current pipeline, it fills the pointers and returns TRUE
+// if not, returns FALSE without touching anything.
+// FUNCTION cmsPipelineCheckAndRetreiveStages(const cmsPipeline* Lut, n: cmsUInt32Number, ...): cmsBool; StdCall;
+
+// Matrix has double precision and CLUT has only float precision. That is because an ICC profile can encode
+// matrices with far more precision that CLUTS
+FUNCTION  cmsStageAllocIdentity(ContextID: cmsContext; nChannels: cmsUInt32Number): LPcmsStage; StdCall;
+FUNCTION  cmsStageAllocToneCurves(ContextID: cmsContext; nChannels: cmsUInt32Number; Curves: LPLPcmsToneCurveArray): LPcmsStage; StdCall;
+FUNCTION  cmsStageAllocMatrix(ContextID: cmsContext; Rows, Cols: cmsUInt32Number; Matrix, Offset: LPcmsFloat64NumberArray): LPcmsStage; StdCall;
+
+FUNCTION  cmsStageAllocCLut16bit(ContextID: cmsContext; nGridPoints: cmsUInt32Number; inputChan, outputChan: cmsUInt32Number; Table: LPcmsUInt16NumberArray): LPcmsStage; StdCall;
+FUNCTION  cmsStageAllocCLutFloat(ContextID: cmsContext; nGridPoints: cmsUInt32Number; inputChan, outputChan: cmsUInt32Number; Table: LPcmsFloat32NumberArray): LPcmsStage; StdCall;
+
+FUNCTION  cmsStageAllocCLut16bitGranular(ContextID: cmsContext; nGridPoints: LPcmsUInt32NumberArray; inputChan, outputChan: cmsUInt32Number; Table: LPcmsUInt16NumberArray): LPcmsStage; StdCall;
+FUNCTION  cmsStageAllocCLutFloatGranular(ContextID: cmsContext; nGridPoints: LPcmsUInt32NumberArray; inputChan, outputChan: cmsUInt32Number; Table: LPcmsFloat32NumberArray): LPcmsStage; StdCall;
+
+
+FUNCTION  cmsStageDup(mpe: LPcmsStage): LPcmsStage; StdCall;
+PROCEDURE cmsStageFree(mpe: LPcmsStage); StdCall;
+FUNCTION  cmsStageNext(mpe: LPcmsStage): LPcmsStage; StdCall;
+
+FUNCTION cmsStageInputChannels(mpe: LPcmsStage): cmsUInt32Number; StdCall;
+FUNCTION cmsStageOutputChannels(mpe: LPcmsStage): cmsUInt32Number; StdCall;
+FUNCTION cmsStageType(mpe: LPcmsStage): cmsStageSignature; StdCall;
+FUNCTION cmsStageData(mpe: LPcmsStage): Pointer; StdCall;
+
+// Sampling
+
+Type
+    cmsSAMPLER16    = FUNCTION (Inp, Outp: LPcmsUInt16NumberArray; Cargo: Pointer): cmsInt32Number; CDecl;
+    cmsSAMPLERFLOAT = FUNCTION (Inp, Outp: LPcmsFloat32NumberArray; Cargo: Pointer): cmsInt32Number; CDecl;
+
+// Use this flag to prevent changes being written to destination
+
+Const
+
+SAMPLER_INSPECT     = $01000000;
+
+
+// For CLUT only
+FUNCTION cmsStageSampleCLut16bit(mpe: LPcmsStage;  Sampler: cmsSAMPLER16;    Cargo: Pointer; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+FUNCTION cmsStageSampleCLutFloat(mpe: LPcmsStage;  Sampler: cmsSAMPLERFLOAT; Cargo: Pointer; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+
+
+// Slicers
+FUNCTION  cmsSliceSpace16(nInputs: cmsUInt32Number; clutPoints: LPcmsUInt32NumberArray;
+                                                   Sampler: cmsSAMPLER16; Cargo: Pointer): cmsBool; StdCall;
+
+FUNCTION cmsSliceSpaceFloat(nInputs: cmsUInt32Number; clutPoints: LPcmsUInt32NumberArray;
+                                                   Sampler: cmsSAMPLERFLOAT; Cargo: Pointer): cmsBool; StdCall;
+
+// Multilocalized Unicode management ---------------------------------------------------------------------------------------
+
+Type
+   LPcmsMLU = Pointer;
+
+Const
+
+cmsNoLanguage = #0#0#0;
+cmsNoCountry  = #0#0#0;
+
+
+FUNCTION  cmsMLUalloc(ContextID: cmsContext; nItems: cmsUInt32Number): LPcmsMLU; StdCall;
+PROCEDURE cmsMLUfree(mlu: LPcmsMLU); StdCall;
+FUNCTION  cmsMLUdup(mlu: LPcmsMLU): LPcmsMLU; StdCall;
+
+FUNCTION  cmsMLUsetASCII(mlu: LPcmsMLU; LanguageCode, CountryCode, ASCIIString: PAnsiChar): cmsBool; StdCall;
+FUNCTION  cmsMLUsetWide(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; WideString: PWChar): cmsBool; StdCall;
+
+FUNCTION cmsMLUgetASCII(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; Buffer: PAnsiChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+FUNCTION cmsMLUgetWide(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; Buffer: PWChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+FUNCTION cmsMLUgetTranslation(mlu: LPcmsMLU; LanguageCode, CountryCode, ObtainedLanguage, ObtainedCountry: PAnsiChar): cmsBool; StdCall;
+
+// Undercolorremoval & black generation -------------------------------------------------------------------------------------
+
+Type
+
+cmsUcrBg = PACKED RECORD
+            Ucr, Bg: LPcmsToneCurve;
+            Desc: LPcmsMLU;
+            END;
+
+
+// Screening ----------------------------------------------------------------------------------------------------------------
+
+Const
+
+ cmsPRINTER_DEFAULT_SCREENS    = $0001;
+ cmsFREQUENCE_UNITS_LINES_CM   = $0000;
+ cmsFREQUENCE_UNITS_LINES_INCH = $0002;
+
+ cmsSPOT_UNKNOWN         = 0;
+ cmsSPOT_PRINTER_DEFAULT = 1;
+ cmsSPOT_ROUND           = 2;
+ cmsSPOT_DIAMOND         = 3;
+ cmsSPOT_ELLIPSE         = 4;
+ cmsSPOT_LINE            = 5;
+ cmsSPOT_SQUARE          = 6;
+ cmsSPOT_CROSS           = 7;
+
+
+Type
+
+cmsScreeningChannel = PACKED RECORD
+
+      Frequency,
+      ScreenAngle: cmsFloat64Number;
+      SpotShape: cmsUInt32Number;
+
+END;
+
+cmsScreening = PACKED RECORD
+
+    Flag,
+    nChannels : cmsUInt32Number;
+    Channels: Array [0..cmsMAXCHANNELS-1] OF cmsScreeningChannel;
+END;
+
+
+// Named color -----------------------------------------------------------------------------------------------------------------
+
+
+LPcmsNAMEDCOLORLIST = Pointer;
+
+FUNCTION cmsAllocNamedColorList(ContextID: cmsContext; n, ColorantCount :cmsUInt32Number;
+                                                           Prefix, Suffix: PAnsiChar): LPcmsNAMEDCOLORLIST; StdCall;
+
+PROCEDURE cmsFreeNamedColorList(v: LPcmsNAMEDCOLORLIST); StdCall;
+FUNCTION  cmsDupNamedColorList(v: LPcmsNAMEDCOLORLIST): LPcmsNAMEDCOLORLIST; StdCall;
+FUNCTION  cmsAppendNamedColor(v: LPcmsNAMEDCOLORLIST; Name: PAnsiChar;
+                                                             PCS, Colorant : LPcmsUInt16NumberArray): cmsBool; StdCall;
+
+FUNCTION cmsNamedColorCount(v: LPcmsNAMEDCOLORLIST): cmsUInt32Number; StdCall;
+FUNCTION cmsNamedColorIndex(v: LPcmsNAMEDCOLORLIST; Name: PAnsiChar): cmsInt32Number; StdCall;
+
+FUNCTION cmsNamedColorInfo(v: LPcmsNAMEDCOLORLIST; nColor : cmsUInt32Number;
+                                                      Name,Prefix, Suffix : PAnsiChar;
+                                                       PCS, Colorant : LPcmsUInt16NumberArray): cmsBool; StdCall;
+
+// Retrieve named color list from transform
+FUNCTION cmsGetNamedColorList(xform: cmsHTRANSFORM ): LPcmsNAMEDCOLORLIST; StdCall;
+
+// Profile sequence -----------------------------------------------------------------------------------------------------
+
+Type
+
+// Profile sequence descriptor. Some fields come from profile sequence descriptor tag, others
+// come from Profile Sequence Identifier Tag
+
+cmsPSEQDESC = PACKED RECORD
+   deviceMfg, deviceModel: cmsSignature;
+
+   attributes: cmsUInt64Number;
+   technology: cmsTechnologySignature;
+   ProfileID: cmsProfileID;
+   Manufacturer,
+   Model,
+   Description : LPcmsMLU;
+ END;
+
+ LPcmsSEQDESC = ^cmsPSEQDESC;
+
+cmsSEQ = PACKED RECORD
+
+    n: cmsUInt32Number;
+    ContextID: cmsContext;
+    seq: LPcmsSEQDESC;
+END;
+
+LPcmsSEQ = ^cmsSEQ;
+
+FUNCTION   cmsAllocProfileSequenceDescription(ContextID: cmsContext; n: cmsUInt32Number):LPcmsSEQ; StdCall;
+FUNCTION   cmsDupProfileSequenceDescription(pseq: LPcmsSEQ):LPcmsSEQ; StdCall;
+PROCEDURE  cmsFreeProfileSequenceDescription(pseq: LPcmsSEQ); StdCall;
+
+// Dictionaries --------------------------------------------------------------------------------------------------------
+
+TYPE
+
+ LPcmsDICTentry = ^cmsDICTentry;
+
+cmsDICTentry = PACKED RECORD
+
+    Next: LPcmsDICTentry;
+
+    DisplayName, DisplayValue: LPcmsMLU;
+    Name, Value : PWChar;
+END;
+
+FUNCTION  cmsDictAlloc(ContextID: cmsContext): cmsHANDLE; StdCall;
+PROCEDURE cmsDictFree(hDict: cmsHANDLE);  StdCall;
+FUNCTION  cmsDictDup(hDict: cmsHANDLE): cmsHANDLE;  StdCall;
+
+FUNCTION cmsDictAddEntry(hDict: cmsHANDLE; Name, Value: PWChar; DisplayName, DisplayValue : LPcmsMLU): cmsBool;  StdCall;
+FUNCTION cmsDictGetEntryList(hDict: cmsHANDLE): LPcmsDICTentry; StdCall;
+FUNCTION cmsDictNextEntry(e : LPcmsDICTentry): LPcmsDICTentry;  StdCall;
+
+// Access to Profile data ----------------------------------------------------------------------------------------------
+FUNCTION cmsCreateProfilePlaceholder(ContextID: cmsContext): cmsHPROFILE; StdCall;
+
+FUNCTION cmsGetProfileContextID(hProfile: cmsHPROFILE):cmsContext; StdCall;
+FUNCTION cmsGetTagCount(hProfile: cmsHPROFILE): cmsInt32Number; StdCall;
+FUNCTION cmsGetTagSignature(hProfile: cmsHPROFILE; n: cmsUInt32Number): cmsTagSignature; StdCall;
+FUNCTION cmsIsTag(hProfile: cmsHPROFILE; sig: cmsTagSignature ): cmsBool; StdCall;
+
+// Read and write pre-formatted data
+FUNCTION cmsReadTag(hProfile: cmsHPROFILE; sig: cmsTagSignature ): Pointer; StdCall;
+FUNCTION cmsWriteTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; data: Pointer): cmsBool; StdCall;
+FUNCTION cmsLinkTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; dest: cmsTagSignature): cmsBool; StdCall;
+FUNCTION cmsTagLinkedTo(hProfile: cmsHPROFILE; sig: cmsTagSignature):cmsTagSignature; StdCall;
+
+// Read and write raw data
+FUNCTION cmsReadRawTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; Buffer: Pointer; BufferSize: cmsUInt32Number): cmsInt32Number; StdCall;
+FUNCTION cmsWriteRawTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; data: Pointer; Size: cmsUInt32Number): cmsBool; StdCall;
+
+// Access header data
+Const
+
+   cmsEmbeddedProfileFalse    = $00000000;
+   cmsEmbeddedProfileTrue     = $00000001;
+   cmsUseAnywhere             = $00000000;
+   cmsUseWithEmbeddedDataOnly = $00000002;
+
+FUNCTION  cmsGetHeaderFlags(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsGetHeaderAttributes(hProfile: cmsHPROFILE; Flags: LPcmsUInt64Number); StdCall;
+PROCEDURE cmsGetHeaderProfileID(hProfile: cmsHPROFILE; ProfileID: LPcmsUInt8Number); StdCall;
+
+// TODO:
+// FUNCTION  cmsGetHeaderCreationDateTime(hProfile: cmsHPROFILE; struct tm *Dest): cmsBool; StdCall;
+
+FUNCTION  cmsGetHeaderRenderingIntent(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsSetHeaderFlags(hProfile: cmsHPROFILE; Flags: cmsUInt32Number); StdCall;
+FUNCTION  cmsGetHeaderManufacturer(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsSetHeaderManufacturer(hProfile: cmsHPROFILE; manufacturer: cmsUInt32Number ); StdCall;
+FUNCTION  cmsGetHeaderModel(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsSetHeaderModel(hProfile: cmsHPROFILE; model: cmsUInt32Number ); StdCall;
+PROCEDURE cmsSetHeaderAttributes(hProfile: cmsHPROFILE; Flags: cmsUInt64Number); StdCall;
+PROCEDURE cmsSetHeaderProfileID(hProfile: cmsHPROFILE; ProfileID: LPcmsUInt8Number); StdCall;
+PROCEDURE cmsSetHeaderRenderingIntent(hProfile: cmsHPROFILE; RenderingIntent: cmsUInt32Number ); StdCall;
+
+FUNCTION  cmsGetPCS(hProfile: cmsHPROFILE):cmsColorSpaceSignature; StdCall;
+PROCEDURE cmsSetPCS(hProfile: cmsHPROFILE; pcs: cmsColorSpaceSignature); StdCall;
+FUNCTION  cmsGetColorSpace(hProfile: cmsHPROFILE): cmsColorSpaceSignature; StdCall;
+PROCEDURE cmsSetColorSpace(hProfile: cmsHPROFILE; sig: cmsColorSpaceSignature); StdCall;
+FUNCTION  cmsGetDeviceClass(hProfile: cmsHPROFILE): cmsProfileClassSignature; StdCall;
+PROCEDURE cmsSetDeviceClass(hProfile: cmsHPROFILE; sig: cmsProfileClassSignature); StdCall;
+PROCEDURE cmsSetProfileVersion(hProfile: cmsHPROFILE; Version: cmsFloat64Number); StdCall;
+FUNCTION  cmsGetProfileVersion(hProfile: cmsHPROFILE): cmsFloat64Number; StdCall;
+
+FUNCTION  cmsGetEncodedICCversion(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall;
+PROCEDURE cmsSetEncodedICCversion(hProfile: cmsHPROFILE; Version: cmsUInt32Number); StdCall;
+
+
+Const
+
+    // How profiles may be used
+    LCMS_USED_AS_INPUT     = 0;
+    LCMS_USED_AS_OUTPUT    = 1;
+    LCMS_USED_AS_PROOF     = 2;
+
+FUNCTION   cmsIsIntentSupported(hProfile: cmsHPROFILE; Intent: cmsUInt32Number; UsedDirection: cmsUInt32Number): cmsBool; StdCall;
+FUNCTION   cmsIsMatrixShaper(hProfile: cmsHPROFILE): cmsBool; StdCall;
+FUNCTION   cmsIsCLUT(hProfile: cmsHPROFILE; Intent: cmsUInt32Number; UsedDirection: cmsUInt32Number): cmsBool; StdCall;
+
+// Translate form/to our notation to ICC
+FUNCTION _cmsICCcolorSpace(OurNotation: Integer): cmsColorSpaceSignature; StdCall;
+FUNCTION _cmsLCMScolorSpace(ProfileSpace: cmsColorSpaceSignature): Integer; StdCall;
+
+FUNCTION cmsChannelsOf( ColorSpace: cmsColorSpaceSignature): cmsUInt32Number; StdCall;
+
+// Build a suitable formatter for the colorspace of this profile
+FUNCTION cmsFormatterForColorspaceOfProfile(hProfile: cmsHPROFILE; nBytes: cmsUInt32Number; lIsFloat: cmsBool): cmsUInt32Number; StdCall;
+FUNCTION cmsFormatterForPCSOfProfile(hProfile: cmsHPROFILE; nBytes: cmsUInt32Number; lIsFloat: cmsBool): cmsUInt32Number; StdCall;
+
+Type
+
+// Localized info
+cmsInfoType = (
+             cmsInfoDescription  = 0,
+             cmsInfoManufacturer = 1,
+             cmsInfoModel        = 2,
+             cmsInfoCopyright    = 3
+);
+
+FUNCTION cmsGetProfileInfo(hProfile: cmsHPROFILE; Info: cmsInfoType; LanguageCode, CountryCode: PAnsiChar;
+                                                            Buffer: PWChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+FUNCTION cmsGetProfileInfoASCII(hProfile: cmsHPROFILE; Info: cmsInfoType; LanguageCode, CountryCode: PAnsiChar;
+                                                            Buffer: PAnsiChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+// IO handlers ----------------------------------------------------------------------------------------------------------
+
+Type
+
+LPcmsIOHANDLER = Pointer;
+
+FUNCTION cmsOpenIOhandlerFromFile(ContextID: cmsContext; FileName, AccessMode: PAnsiChar): LPcmsIOHANDLER; StdCall;
+// FUNCTION cmsOpenIOhandlerFromStream(ContextID: cmsContext; FILE* Stream): LPcmsIOHANDLER; StdCall;
+FUNCTION cmsOpenIOhandlerFromMem(ContextID: cmsContext; Buffer: Pointer; size: cmsUInt32Number; AccessMode: PAnsiChar): LPcmsIOHANDLER; StdCall;
+FUNCTION cmsOpenIOhandlerFromNULL(ContextID: cmsContext): LPcmsIOHANDLER; StdCall;
+FUNCTION cmsCloseIOhandler(io: LPcmsIOHANDLER): cmsBool; StdCall;
+
+// Profile high level funtions ------------------------------------------------------------------------------------------
+
+FUNCTION   cmsOpenProfileFromFile(ICCProfile : PAnsiChar; sAccess: PAnsiChar): cmsHPROFILE; StdCall;
+FUNCTION   cmsOpenProfileFromFileTHR(ContextID: cmsContext; ICCProfile, sAccess: PAnsiChar): cmsHPROFILE; StdCall;
+// FUNCTION      CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char* sAccess): cmsHPROFILE; StdCall;
+// FUNCTION      CMSEXPORT cmsOpenProfileFromStreamTHR(ContextID: cmsContext; FILE* ICCProfile, const char* sAccess): cmsHPROFILE; StdCall;
+FUNCTION   cmsOpenProfileFromMem(MemPtr: Pointer; dwSize: cmsUInt32Number): cmsHPROFILE; StdCall;
+FUNCTION   cmsOpenProfileFromMemTHR(ContextID: cmsContext; MemPtr: Pointer; dwSize: cmsUInt32Number): cmsHPROFILE; StdCall;
+FUNCTION   cmsOpenProfileFromIOhandlerTHR(ContextID: cmsContext; io: LPcmsIOHANDLER): cmsHPROFILE; StdCall;
+FUNCTION   cmsCloseProfile(hProfile: cmsHPROFILE): cmsBool; StdCall;
+
+FUNCTION   cmsSaveProfileToFile(hProfile: cmsHPROFILE; FileName: PAnsiChar): cmsBool; StdCall;
+// FUNCTION         CMSEXPORT cmsSaveProfileToStream(hProfile: cmsHPROFILE, FILE* Stream): cmsBool; StdCall;
+FUNCTION   cmsSaveProfileToMem(hProfile: cmsHPROFILE; MemPtr: Pointer; BytesNeeded: LPcmsUInt32Number): cmsBool; StdCall;
+FUNCTION   cmsSaveProfileToIOhandler(hProfile: cmsHPROFILE; io: LPcmsIOHANDLER):cmsUInt32Number; StdCall;
+
+// Predefined virtual profiles ------------------------------------------------------------------------------------------
+
+FUNCTION  cmsCreateRGBProfileTHR(ContextID: cmsContext;
+                                                   WhitePoint: LPcmsCIExyY;
+                                                   Primaries: LPcmsCIExyYTRIPLE;
+                                                   TransferFunction: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall;
+
+FUNCTION  cmsCreateRGBProfile(WhitePoint: LPcmsCIExyY;
+                                                   Primaries: LPcmsCIExyYTRIPLE;
+                                                   TransferFunction: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateGrayProfileTHR(ContextID: cmsContext;
+                                                    WhitePoint: LPcmsCIExyY;
+                                                    TransferFunction: LPcmsToneCurve): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateGrayProfile(WhitePoint: LPcmsCIExyY;
+                                                     TransferFunction: LPcmsToneCurve): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateLinearizationDeviceLinkTHR(ContextID: cmsContext;
+                                                                 ColorSpace: cmsColorSpaceSignature;
+                                                                 TransferFunctions: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateLinearizationDeviceLink(ColorSpace: cmsColorSpaceSignature;
+                                                                 TransferFunctions: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateInkLimitingDeviceLinkTHR(ContextID: cmsContext;
+                                                              ColorSpace: cmsColorSpaceSignature; Limit: cmsFloat64Number): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateInkLimitingDeviceLink(ColorSpace: cmsColorSpaceSignature; Limit: cmsFloat64Number): cmsHPROFILE; StdCall;
+
+
+FUNCTION cmsCreateLab2ProfileTHR(ContextID: cmsContext; WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall;
+FUNCTION cmsCreateLab2Profile(WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall;
+FUNCTION cmsCreateLab4ProfileTHR(ContextID: cmsContext; WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall;
+FUNCTION cmsCreateLab4Profile(WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateXYZProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall;
+FUNCTION cmsCreateXYZProfile: cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreate_sRGBProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall;
+FUNCTION cmsCreate_sRGBProfile: cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateBCHSWabstractProfileTHR(ContextID: cmsContext;
+                                                             nLUTPoints: Integer;
+                                                             Bright,
+                                                             Contrast,
+                                                             Hue,
+                                                             Saturation: cmsFloat64Number;
+                                                             TempSrc,
+                                                             TempDest: Integer): cmsHPROFILE; StdCall;
+
+FUNCTION cmsCreateBCHSWabstractProfile(   nLUTPoints: Integer;
+                                                             Bright,
+                                                             Contrast,
+                                                             Hue,
+                                                             Saturation: cmsFloat64Number;
+                                                             TempSrc,
+                                                             TempDest: Integer): cmsHPROFILE; StdCall;
+
+FUNCTION  cmsCreateNULLProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall;
+FUNCTION  cmsCreateNULLProfile: cmsHPROFILE; StdCall;
+
+// Converts a transform to a devicelink profile
+FUNCTION  cmsTransform2DeviceLink(hTransform: cmsHTRANSFORM; Version: cmsFloat64Number; dwFlags: cmsUInt32Number): cmsHPROFILE; StdCall;
+
+// Intents ----------------------------------------------------------------------------------------------
+
+Const
+
+// ICC Intents
+INTENT_PERCEPTUAL                              = 0;
+INTENT_RELATIVE_COLORIMETRIC                   = 1;
+INTENT_SATURATION                              = 2;
+INTENT_ABSOLUTE_COLORIMETRIC                   = 3;
+
+// Non-ICC intents
+INTENT_PRESERVE_K_ONLY_PERCEPTUAL             = 10;
+INTENT_PRESERVE_K_ONLY_RELATIVE_COLORIMETRIC  = 11;
+INTENT_PRESERVE_K_ONLY_SATURATION             = 12;
+INTENT_PRESERVE_K_PLANE_PERCEPTUAL            = 13;
+INTENT_PRESERVE_K_PLANE_RELATIVE_COLORIMETRIC = 14;
+INTENT_PRESERVE_K_PLANE_SATURATION            = 15;
+
+Type
+LPPAnsiChar = ^PAnsiChar;
+
+// Call with NULL as parameters to get the intent count
+FUNCTION cmsGetSupportedIntents(nMax: cmsUInt32Number; Codes: LPcmsUInt32Number; Descriptions: LPPAnsiChar): cmsUInt32Number; StdCall;
+
+Const
+
+// Flags
+
+cmsFLAGS_NOCACHE                  = $0040;    // Inhibit 1-pixel cache
+cmsFLAGS_NOOPTIMIZE               = $0100;    // Inhibit optimizations
+cmsFLAGS_NULLTRANSFORM            = $0200;    // Don't transform anyway
+
+// Proofing flags
+cmsFLAGS_GAMUTCHECK               = $1000;    // Out of Gamut alarm
+cmsFLAGS_SOFTPROOFING             = $4000;    // Do softproofing
+
+// Misc
+cmsFLAGS_BLACKPOINTCOMPENSATION   = $2000;
+cmsFLAGS_NOWHITEONWHITEFIXUP      = $0004;    // Don't fix scum dot
+cmsFLAGS_HIGHRESPRECALC           = $0400;    // Use more memory to give better accurancy
+cmsFLAGS_LOWRESPRECALC            = $0800;    // Use less memory to minimize resouces
+
+// For devicelink creation
+cmsFLAGS_8BITS_DEVICELINK         = $0008;   // Create 8 bits devicelinks
+cmsFLAGS_GUESSDEVICECLASS         = $0020;   // Guess device class (for transform2devicelink)
+cmsFLAGS_KEEP_SEQUENCE            = $0080;   // Keep profile sequence for devicelink creation
+
+// Specific to a particular optimizations
+cmsFLAGS_FORCE_CLUT               = $0002;    // Force CLUT optimization
+cmsFLAGS_CLUT_POST_LINEARIZATION  = $0001;    // create postlinearization tables if possible
+cmsFLAGS_CLUT_PRE_LINEARIZATION   = $0010;    // create prelinearization tables if possible
+
+// CRD special
+cmsFLAGS_NODEFAULTRESOURCEDEF     = $01000000;
+
+// Fine-tune control over number of gridpoints
+FUNCTION cmsFLAGS_GRIDPOINTS(n: Integer): Integer;
+
+
+// Transforms ---------------------------------------------------------------------------------------------------
+
+type
+  LPcmsHPROFILEArray = ^cmsHPROFILEArray;
+  cmsHPROFILEArray = array[0..0] of cmsHPROFILE;
+
+  LPcmsBoolArray = ^cmsBoolArray;
+  cmsBoolArray = array[0..0] of cmsBool;
+
+FUNCTION   cmsCreateTransformTHR(ContextID: cmsContext;
+                                                  Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+FUNCTION   cmsCreateTransform(Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+FUNCTION   cmsCreateProofingTransformTHR(ContextID: cmsContext;
+                                                  Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Proofing: cmsHPROFILE;
+                                                  Intent: cmsUInt32Number;
+                                                  ProofingIntent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+FUNCTION   cmsCreateProofingTransform(Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Proofing: cmsHPROFILE;
+                                                  Intent: cmsUInt32Number;
+                                                  ProofingIntent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+FUNCTION   cmsCreateMultiprofileTransformTHR(ContextID: cmsContext;
+                                                  hProfiles: LPcmsHPROFILEArray;
+                                                  nProfiles: cmsUInt32Number;
+                                                  InputFormat: cmsUInt32Number;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+
+FUNCTION   cmsCreateMultiprofileTransform( hProfiles: LPcmsHPROFILEArray;
+                                                  nProfiles: cmsUInt32Number;
+                                                  InputFormat: cmsUInt32Number;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+
+FUNCTION   cmsCreateExtendedTransform(ContextID: cmsContext;
+                                                   nProfiles: cmsUInt32Number;
+                                                   hProfiles: LPcmsHPROFILEArray;
+                                                   BPC: LPcmsBoolArray;
+                                                   Intents: LPcmsUInt32NumberArray;
+                                                   AdaptationStates: LPcmsFloat64NumberArray;
+                                                   hGamutProfile: cmsHPROFILE;
+                                                   nGamutPCSposition: cmsUInt32Number;
+                                                   InputFormat,
+                                                   OutputFormat: cmsUInt32Number;
+                                                   dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall;
+
+PROCEDURE  cmsDeleteTransform(hTransform: cmsHTRANSFORM); StdCall;
+
+PROCEDURE  cmsDoTransform(Transform: cmsHTRANSFORM; InputBuffer, OutputBuffer: Pointer; size: cmsUInt32Number);  StdCall;
+PROCEDURE  cmsDoTransformStride(Transform: cmsHTRANSFORM; InputBuffer, OutputBuffer: Pointer; size: cmsUInt32Number; stride: cmsUInt32Number);  StdCall;
+
+
+PROCEDURE  cmsSetAlarmCodes( NewAlarm: LPcmsUInt16NumberArray);  StdCall;
+PROCEDURE  cmsGetAlarmCodes(NewAlarm: LPcmsUInt16NumberArray); StdCall;
+
+// Adaptation state for absolute colorimetric intent
+FUNCTION  cmsSetAdaptationState(d: cmsFloat64Number):cmsFloat64Number; StdCall;
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+FUNCTION  cmsGetTransformContextID(hTransform: cmsHTRANSFORM):cmsContext; StdCall;
+
+// For backwards compatibility
+FUNCTION  cmsChangeBuffersFormat(hTransform: cmsHTRANSFORM; InputFormat, OutputFormat: cmsUInt32Number): cmsBool; StdCall;
+
+
+
+// PostScript ColorRenderingDictionary and ColorSpaceArray ----------------------------------------------------
+
+Type
+
+cmsPSResourceType = (cmsPS_RESOURCE_CSA, cmsPS_RESOURCE_CRD ) ;
+
+// lcms2 unified method to access postscript color resources
+FUNCTION cmsGetPostScriptColorResource(ContextID: cmsContext;   RType: cmsPSResourceType;
+                                                                hProfile: cmsHPROFILE;
+                                                                Intent: cmsUInt32Number;
+                                                                dwFlags: cmsUInt32Number;
+                                                                io: LPcmsIOHANDLER): cmsUInt32Number; StdCall;
+
+FUNCTION cmsGetPostScriptCSA(ContextID: cmsContext; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number; Buffer: Pointer; dwBufferLen: cmsUInt32Number ): cmsUInt32Number; StdCall;
+FUNCTION cmsGetPostScriptCRD(ContextID: cmsContext; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number; Buffer: Pointer; dwBufferLen: cmsUInt32Number): cmsUInt32Number; StdCall;
+
+
+// IT8.7 / CGATS.17-20$ handling -----------------------------------------------------------------------------
+
+
+// CGATS.13 parser
+
+FUNCTION  cmsIT8Alloc: cmsHANDLE; StdCall;
+PROCEDURE cmsIT8Free(hIT8: cmsHANDLE); StdCall;
+
+// Tables
+
+FUNCTION  cmsIT8TableCount(hIT8: cmsHANDLE): Integer; StdCall;
+FUNCTION  cmsIT8SetTable(hIT8: cmsHANDLE; nTable: Integer): Integer; StdCall;
+
+// Persistence
+FUNCTION  cmsIT8LoadFromFile(cFileName: PAnsiChar): cmsHANDLE; StdCall;
+FUNCTION  cmsIT8LoadFromMem(Ptr: Pointer; size :DWord): cmsHANDLE; StdCall;
+
+FUNCTION cmsIT8SaveToFile(hIT8: cmsHANDLE; cFileName: PAnsiChar): cmsBool; StdCall;
+FUNCTION cmsIT8SaveToMem(hIT8: cmsHANDLE; MemPtr: Pointer; BytesNeeded: LPcmsUInt32Number): cmsBool; StdCall;
+// Properties
+
+FUNCTION cmsIT8GetSheetType(hIT8: cmsHANDLE): PAnsiChar; StdCall;
+FUNCTION cmsIT8SetSheetType(hIT8: cmsHANDLE; TheType: PAnsiChar): cmsBool; StdCall;
+
+FUNCTION cmsIT8SetComment(hIT8: cmsHANDLE; cComment: PAnsiChar): cmsBool; StdCall;
+
+FUNCTION cmsIT8SetPropertyStr(hIT8: cmsHANDLE; cProp, Str: PAnsiChar): cmsBool; StdCall;
+FUNCTION cmsIT8SetPropertyDbl(hIT8: cmsHANDLE; cProp: PAnsiChar; Val: Double): cmsBool; StdCall;
+FUNCTION cmsIT8SetPropertyHex(hIT8: cmsHANDLE; cProp: PAnsiChar; Val: Integer): cmsBool; StdCall;
+FUNCTION cmsIT8SetPropertyUncooked(hIT8: cmsHANDLE; Key, Buffer: PAnsiChar): cmsBool; StdCall;
+
+
+FUNCTION cmsIT8GetProperty(hIT8: cmsHANDLE; cProp: PAnsiChar): PAnsiChar; StdCall;
+FUNCTION cmsIT8GetPropertyDbl(hIT8: cmsHANDLE; cProp: PAnsiChar): Double; StdCall;
+FUNCTION cmsIT8EnumProperties(hIT8: cmsHANDLE; var PropertyNames: LPPAnsiChar): Integer; StdCall;
+
+// Datasets
+
+FUNCTION cmsIT8GetDataRowCol(hIT8: cmsHANDLE; row, col: Integer): PAnsiChar; StdCall;
+FUNCTION cmsIT8GetDataRowColDbl(hIT8: cmsHANDLE; row, col: Integer): Double; StdCall;
+
+FUNCTION cmsIT8SetDataRowCol(hIT8: cmsHANDLE; row, col: Integer; Val: PAnsiChar): cmsBool; StdCall;
+FUNCTION cmsIT8SetDataRowColDbl(hIT8: cmsHANDLE; row, col: Integer; Val: Double): cmsBool; StdCall;
+
+FUNCTION cmsIT8GetData(hIT8: cmsHANDLE; cPatch, cSample: PAnsiChar): PAnsiChar; StdCall;
+
+FUNCTION cmsIT8GetDataDbl(hIT8: cmsHANDLE;cPatch, cSample: PAnsiChar): Double; StdCall;
+
+FUNCTION cmsIT8SetData(hIT8: cmsHANDLE; cPatch, cSample, Val: PAnsiChar): cmsBool; StdCall;
+
+FUNCTION cmsIT8SetDataDbl(hIT8: cmsHANDLE; cPatch, cSample: PAnsiChar; Val: Double): cmsBool; StdCall;
+
+FUNCTION cmsIT8SetDataFormat(hIT8: cmsHANDLE; n: Integer; Sample: PAnsiChar): cmsBool; StdCall;
+FUNCTION cmsIT8EnumDataFormat(hIT8: cmsHANDLE; var SampleNames: LPPAnsiChar): Integer; StdCall;
+FUNCTION cmsIT8GetPatchName(hIT8: cmsHANDLE; nPatch: Integer; Buffer: PAnsiChar): PAnsiChar; StdCall;
+
+// The LABEL extension
+FUNCTION cmsIT8SetTableByLabel(hIT8: cmsHANDLE; cSet, cField, ExpectedType: PAnsiChar): Integer; StdCall;
+
+FUNCTION cmsIT8FindDataFormat(hIT8: cmsHANDLE; cSample: PAnsiChar): Integer; StdCall;
+
+// Formatter for double
+PROCEDURE  cmsIT8DefineDblFormat(hIT8: cmsHANDLE; Formatter: PAnsiChar);  StdCall;
+
+// Gamut boundary description routines ------------------------------------------------------------------------------
+
+FUNCTION  cmsGBDAlloc(ContextID: cmsContext):cmsHANDLE; StdCall;
+PROCEDURE cmsGBDFree(hGBD: cmsHANDLE); StdCall;
+FUNCTION  cmsGDBAddPoint(hGBD: cmsHANDLE; Lab: LPcmsCIELab): cmsBool; StdCall;
+FUNCTION  cmsGDBCompute(hGDB: cmsHANDLE; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+FUNCTION  cmsGDBCheckPoint(hGBD: cmsHANDLE; Lab: LPcmsCIELab): cmsBool; StdCall;
+
+// Feature detection  ----------------------------------------------------------------------------------------------
+
+// Estimate the black point
+FUNCTION cmsDetectBlackPoint( BlackPoint: LPcmsCIEXYZ; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+FUNCTION cmsDetectDestinationBlackPoint( BlackPoint: LPcmsCIEXYZ; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number): cmsBool; StdCall;
+
+
+// Estimate total area coverage
+FUNCTION cmsDetectTAC(hProfile: cmsHPROFILE): cmsFloat64Number; StdCall;
+
+
+// Poor man's gamut mapping
+FUNCTION  cmsDesaturateLab(Lab: LPcmsCIELab; amax, amin, bmax, bmin: cmsFloat64Number): cmsBool; StdCall;
+
+
+IMPLEMENTATION
+
+
+
+    FUNCTION FLOAT_SH(a: cmsUInt32Number): cmsUInt32Number;        begin  FLOAT_SH :=       ((a)  shl  22) end;
+    FUNCTION OPTIMIZED_SH(s: cmsUInt32Number): cmsUInt32Number;    begin  OPTIMIZED_SH :=   ((s)  shl  21) end;
+    FUNCTION COLORSPACE_SH(s: cmsUInt32Number):cmsUInt32Number;    begin  COLORSPACE_SH :=  ((s)  shl  16) end;
+    FUNCTION SWAPFIRST_SH(s: cmsUInt32Number):cmsUInt32Number;     begin  SWAPFIRST_SH :=   ((s)  shl  14) end;
+    FUNCTION FLAVOR_SH(s: cmsUInt32Number):cmsUInt32Number;        begin  FLAVOR_SH :=      ((s)  shl  13) end;
+    FUNCTION PLANAR_SH(p: cmsUInt32Number):cmsUInt32Number;        begin  PLANAR_SH :=      ((p)  shl  12) end;
+    FUNCTION ENDIAN16_SH(e: cmsUInt32Number):cmsUInt32Number;      begin  ENDIAN16_SH :=    ((e)  shl  11) end;
+    FUNCTION DOSWAP_SH(e: cmsUInt32Number):cmsUInt32Number;        begin  DOSWAP_SH :=      ((e)  shl  10) end;
+    FUNCTION EXTRA_SH(e: cmsUInt32Number):cmsUInt32Number;         begin  EXTRA_SH :=       ((e)  shl  7) end;
+    FUNCTION CHANNELS_SH(c: cmsUInt32Number):cmsUInt32Number;      begin  CHANNELS_SH :=    ((c)  shl  3) end;
+    FUNCTION BYTES_SH(b: cmsUInt32Number):cmsUInt32Number;         begin  BYTES_SH :=       (b) end;
+
+
+    FUNCTION T_FLOAT(a: cmsUInt32Number): cmsUInt32Number;          begin  T_FLOAT :=        (((a) shr 22) and 1) end;
+    FUNCTION T_OPTIMIZED(o: cmsUInt32Number): cmsUInt32Number;      begin  T_OPTIMIZED :=    (((o) shr 21) and 1) end;
+    FUNCTION T_COLORSPACE(s: cmsUInt32Number): cmsUInt32Number;     begin  T_COLORSPACE :=   (((s) shr 16) and 31) end;
+    FUNCTION T_SWAPFIRST(s: cmsUInt32Number): cmsUInt32Number;      begin  T_SWAPFIRST :=    (((s) shr 14) and 1) end;
+    FUNCTION T_FLAVOR(s: cmsUInt32Number): cmsUInt32Number;         begin  T_FLAVOR :=       (((s) shr 13) and 1) end;
+    FUNCTION T_PLANAR(p: cmsUInt32Number): cmsUInt32Number;         begin  T_PLANAR :=       (((p) shr 12) and 1) end;
+    FUNCTION T_ENDIAN16(e: cmsUInt32Number): cmsUInt32Number;       begin  T_ENDIAN16 :=     (((e) shr 11) and 1) end;
+    FUNCTION T_DOSWAP(e: cmsUInt32Number): cmsUInt32Number;         begin  T_DOSWAP :=       (((e) shr 10) and 1) end;
+    FUNCTION T_EXTRA(e: cmsUInt32Number): cmsUInt32Number;          begin  T_EXTRA :=        (((e) shr 7) and 7) end;
+    FUNCTION T_CHANNELS(c: cmsUInt32Number): cmsUInt32Number;       begin  T_CHANNELS :=     (((c) shr 3) and 15) end;
+    FUNCTION T_BYTES(b: cmsUInt32Number): cmsUInt32Number;          begin  T_BYTES :=        ((b) and 7) end;
+
+
+
+//
+
+FUNCTION  cmsCreateContext(Plugin : Pointer; UserData : Pointer) : cmsContext; StdCall; external LCMS2_SO;
+PROCEDURE cmsDeleteContext(ContextID: cmsContext); StdCall; external LCMS2_SO;
+FUNCTION  cmsDupContext(ContextID: cmsContext; NewUserData: Pointer): cmsContext; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetContextUserData(ContextID: cmsContext): Pointer;  StdCall; external LCMS2_SO;
+
+FUNCTION  cmsPlugin(Plugin: Pointer): cmsBool; StdCall; external LCMS2_SO;
+PROCEDURE cmsUnregisterPlugins; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetLogErrorHandler(Fn: cmsLogErrorHandlerFunction); StdCall; external LCMS2_SO;
+FUNCTION cmsD50_XYZ: LPcmsCIEXYZ; StdCall; external LCMS2_SO;
+FUNCTION cmsD50_xyY: LPcmsCIExyY; StdCall; external LCMS2_SO;
+PROCEDURE cmsXYZ2xyY(Dest: LPcmsCIExyY; Source: LPcmsCIEXYZ); StdCall; external LCMS2_SO;
+PROCEDURE cmsxyY2XYZ(Dest: LPcmsCIEXYZ; Source: LPcmsCIExyY); StdCall; external LCMS2_SO;
+PROCEDURE cmsLab2XYZ(WhitePoint: LPcmsCIEXYZ; xyz: LPcmsCIEXYZ; Lab: LPcmsCIELab); StdCall; external LCMS2_SO;
+PROCEDURE cmsXYZ2Lab(WhitePoint: LPcmsCIEXYZ; Lab: LPcmsCIELab; xyz: LPcmsCIEXYZ); StdCall; external LCMS2_SO;
+PROCEDURE cmsLab2LCh(LCh: LPcmsCIELCh; Lab: LPcmsCIELab); StdCall; external LCMS2_SO;
+PROCEDURE cmsLCh2Lab(Lab: LPcmsCIELab; LCh: LPcmsCIELCh); StdCall; external LCMS2_SO;
+PROCEDURE cmsLabEncoded2Float(Lab: LPcmsCIELab; wLab: Pointer); StdCall; external LCMS2_SO;
+PROCEDURE cmsLabEncoded2FloatV2(Lab: LPcmsCIELab; wLab: Pointer); StdCall; external LCMS2_SO;
+PROCEDURE cmsFloat2LabEncoded(wLab: Pointer; Lab: LPcmsCIELab); StdCall; external LCMS2_SO;
+PROCEDURE cmsFloat2LabEncodedV2(wLab: Pointer; Lab: LPcmsCIELab); StdCall; external LCMS2_SO;
+PROCEDURE cmsXYZEncoded2Float(fxyz : LPcmsCIEXYZ; XYZ: Pointer); StdCall; external LCMS2_SO;
+PROCEDURE cmsFloat2XYZEncoded(XYZ: Pointer; fXYZ: LPcmsCIEXYZ); StdCall; external LCMS2_SO;
+FUNCTION cmsDeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsCIE94DeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsBFDdeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsCMCdeltaE(Lab1, Lab2: LPcmsCIELab): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsCIE2000DeltaE(Lab1, Lab2: LPcmsCIELab; Kl, Kc, Kh: Double): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsWhitePointFromTemp(var WhitePoint: cmsCIExyY; TempK: cmsFloat64Number) : cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsTempFromWhitePoint(var TeampK: cmsFloat64Number; var WhitePoint: cmsCIExyY) : cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsAdaptToIlluminant(Result: LPcmsCIEXYZ; SourceWhitePt: LPcmsCIEXYZ;
+                              Illuminant: LPcmsCIEXYZ; Value: LPcmsCIEXYZ): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsCIECAM02Init(pVC : LPcmsViewingConditions ) : Pointer; StdCall; external LCMS2_SO;
+PROCEDURE cmsCIECAM02Done(hModel : Pointer); StdCall; external LCMS2_SO;
+PROCEDURE cmsCIECAM02Forward(hModel: Pointer; pIn: LPcmsCIEXYZ; pOut: LPcmsJCh ); StdCall; external LCMS2_SO;
+PROCEDURE cmsCIECAM02Reverse(hModel: Pointer; pIn: LPcmsJCh;   pOut: LPcmsCIEXYZ ); StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildSegmentedToneCurve(ContextID: cmsContext; nSegments: cmsInt32Number; Segments: LPcmsCurveSegmentArray): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildParametricToneCurve(ContextID: cmsContext;  CType: cmsInt32Number; Params: LPcmsFloat64NumberArray): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildGamma(ContextID: cmsContext; Gamma: cmsFloat64Number): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildTabulatedToneCurve16(ContextID: cmsContext; nEntries: cmsInt32Number; values: LPcmsUInt16NumberArray): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsBuildTabulatedToneCurveFloat(ContextID: cmsContext; nEntries: cmsUInt32Number; values: LPcmsFloat32NumberArray): LPcmsToneCurve; StdCall; external LCMS2_SO;
+PROCEDURE cmsFreeToneCurve(Curve: LPcmsToneCurve); StdCall; external LCMS2_SO;
+PROCEDURE cmsFreeToneCurveTriple(Curve: LPLPcmsToneCurveArray); StdCall; external LCMS2_SO;
+FUNCTION  cmsDupToneCurve(Src: LPcmsToneCurve): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsReverseToneCurve(InGamma: LPcmsToneCurve): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsReverseToneCurveEx(nResultSamples: cmsInt32Number; InGamma: LPcmsToneCurve): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsJoinToneCurve(ContextID: cmsContext; X, Y: LPcmsToneCurve; nPoints: cmsUInt32Number ): LPcmsToneCurve; StdCall; external LCMS2_SO;
+FUNCTION  cmsSmoothToneCurve(Tab: LPcmsToneCurve; lambda: cmsFloat64Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsEvalToneCurveFloat(Curve: LPcmsToneCurve; v: cmsFloat32Number):cmsFloat32Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsEvalToneCurve16(Curve: LPcmsToneCurve; v:cmsUInt16Number):cmsUInt16Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsIsToneCurveMultisegment(InGamma: LPcmsToneCurve):cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsIsToneCurveLinear(Curve: LPcmsToneCurve):cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsIsToneCurveMonotonic(t: LPcmsToneCurve):cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsIsToneCurveDescending(t: LPcmsToneCurve):cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetToneCurveParametricType(t: LPcmsToneCurve):cmsInt32Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsEstimateGamma(t: LPcmsToneCurve; Precision:cmsFloat64Number):cmsFloat64Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetToneCurveEstimatedTableEntries(t: LPcmsToneCurve): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetToneCurveEstimatedTable(t: LPcmsToneCurve): LPcmsUInt16Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsPipelineAlloc(ContextID: cmsContext; InputChannels, OutputChannels: cmsUInt32Number): LPcmsPipeline; StdCall; external LCMS2_SO;
+PROCEDURE cmsPipelineFree(lut: LPcmsPipeline); StdCall; external LCMS2_SO;
+FUNCTION  cmsPipelineDup(Orig: LPcmsPipeline): LPcmsPipeline; StdCall; external LCMS2_SO;
+FUNCTION  cmsGetPipelineContextID(lut: LPcmsPipeline) : cmsContext; StdCall; external LCMS2_SO;
+FUNCTION  cmsPipelineInputChannels(lut: LPcmsPipeline): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION  cmsPipelineOutputChannels(lut: LPcmsPipeline): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineStageCount(lut: LPcmsPipeline): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineGetPtrToFirstStage(lut: LPcmsPipeline): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineGetPtrToLastStage(lut: LPcmsPipeline): LPcmsStage; StdCall; external LCMS2_SO;
+
+PROCEDURE cmsPipelineEval16(Inv, Outv: LPcmsUInt16NumberArray; lut: LPcmsPipeline); StdCall; external LCMS2_SO;
+PROCEDURE cmsPipelineEvalFloat(Inv, Outv: LPcmsFloat32NumberArray; lut: LPcmsPipeline); StdCall; external LCMS2_SO;
+
+FUNCTION cmsPipelineEvalReverseFloat(Target, Result, Hint: LPcmsFloat32NumberArray; lut: LPcmsPipeline): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineCat(l1, l2: LPcmsPipeline): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsPipelineSetSaveAs8bitsFlag(lut: LPcmsPipeline; On: cmsBool): cmsBool; StdCall; external LCMS2_SO;
+PROCEDURE cmsPipelineInsertStage(lut: LPcmsPipeline; loc: cmsStageLoc; mpe: LPcmsStage); StdCall; external LCMS2_SO;
+PROCEDURE cmsPipelineUnlinkStage(lut: LPcmsPipeline; loc: cmsStageLoc; mpe: LPLPcmsStage); StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocIdentity(ContextID: cmsContext; nChannels: cmsUInt32Number): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocToneCurves(ContextID: cmsContext; nChannels: cmsUInt32Number; Curves: LPLPcmsToneCurveArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocMatrix(ContextID: cmsContext; Rows, Cols: cmsUInt32Number; Matrix, Offset: LPcmsFloat64NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocCLut16bit(ContextID: cmsContext; nGridPoints: cmsUInt32Number; inputChan, outputChan: cmsUInt32Number; Table: LPcmsUInt16NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocCLutFloat(ContextID: cmsContext; nGridPoints: cmsUInt32Number; inputChan, outputChan: cmsUInt32Number; Table: LPcmsFloat32NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocCLut16bitGranular(ContextID: cmsContext; nGridPoints: LPcmsUInt32NumberArray; inputChan, outputChan: cmsUInt32Number; Table: LPcmsUInt16NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageAllocCLutFloatGranular(ContextID: cmsContext; nGridPoints: LPcmsUInt32NumberArray; inputChan, outputChan: cmsUInt32Number; Table: LPcmsFloat32NumberArray): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION  cmsStageDup(mpe: LPcmsStage): LPcmsStage; StdCall; external LCMS2_SO;
+PROCEDURE cmsStageFree(mpe: LPcmsStage); StdCall; external LCMS2_SO;
+FUNCTION  cmsStageNext(mpe: LPcmsStage): LPcmsStage; StdCall; external LCMS2_SO;
+FUNCTION cmsStageInputChannels(mpe: LPcmsStage): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsStageOutputChannels(mpe: LPcmsStage): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsStageType(mpe: LPcmsStage): cmsStageSignature; StdCall; external LCMS2_SO;
+FUNCTION cmsStageData(mpe: LPcmsStage): Pointer; StdCall; external LCMS2_SO;
+FUNCTION cmsStageSampleCLut16bit(mpe: LPcmsStage;  Sampler: cmsSAMPLER16;    Cargo: Pointer; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsStageSampleCLutFloat(mpe: LPcmsStage;  Sampler: cmsSAMPLERFLOAT; Cargo: Pointer; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsSliceSpace16(nInputs: cmsUInt32Number; clutPoints: LPcmsUInt32NumberArray;
+                                                   Sampler: cmsSAMPLER16; Cargo: Pointer): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsSliceSpaceFloat(nInputs: cmsUInt32Number; clutPoints: LPcmsUInt32NumberArray;
+                                                   Sampler: cmsSAMPLERFLOAT; Cargo: Pointer): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsMLUalloc(ContextID: cmsContext; nItems: cmsUInt32Number): LPcmsMLU; StdCall; external LCMS2_SO;
+PROCEDURE cmsMLUfree(mlu: LPcmsMLU); StdCall; external LCMS2_SO;
+FUNCTION  cmsMLUdup(mlu: LPcmsMLU): LPcmsMLU; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsMLUsetASCII(mlu: LPcmsMLU; LanguageCode, CountryCode, ASCIIString: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsMLUsetWide(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; WideString: PWChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsMLUgetASCII(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; Buffer: PAnsiChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsMLUgetWide(mlu: LPcmsMLU; LanguageCode, CountryCode: PAnsiChar; Buffer: PWChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsMLUgetTranslation(mlu: LPcmsMLU; LanguageCode, CountryCode, ObtainedLanguage, ObtainedCountry: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsAllocNamedColorList(ContextID: cmsContext; n, ColorantCount :cmsUInt32Number;
+                                                           Prefix, Suffix: PAnsiChar): LPcmsNAMEDCOLORLIST; StdCall; external LCMS2_SO;
+
+PROCEDURE cmsFreeNamedColorList(v: LPcmsNAMEDCOLORLIST); StdCall; external LCMS2_SO;
+FUNCTION  cmsDupNamedColorList(v: LPcmsNAMEDCOLORLIST): LPcmsNAMEDCOLORLIST; StdCall; external LCMS2_SO;
+FUNCTION  cmsAppendNamedColor(v: LPcmsNAMEDCOLORLIST; Name: PAnsiChar;
+                                                             PCS, Colorant : LPcmsUInt16NumberArray): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsNamedColorCount(v: LPcmsNAMEDCOLORLIST): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsNamedColorIndex(v: LPcmsNAMEDCOLORLIST; Name: PAnsiChar): cmsInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsNamedColorInfo(v: LPcmsNAMEDCOLORLIST; nColor : cmsUInt32Number;
+                                                      Name,Prefix, Suffix : PAnsiChar;
+                                                       PCS, Colorant : LPcmsUInt16NumberArray): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsGetNamedColorList(xform: cmsHTRANSFORM ): LPcmsNAMEDCOLORLIST; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsAllocProfileSequenceDescription(ContextID: cmsContext; n: cmsUInt32Number):LPcmsSEQ; StdCall; external LCMS2_SO;
+FUNCTION   cmsDupProfileSequenceDescription(pseq: LPcmsSEQ):LPcmsSEQ; StdCall; external LCMS2_SO;
+PROCEDURE  cmsFreeProfileSequenceDescription(pseq: LPcmsSEQ); StdCall; external LCMS2_SO;
+
+FUNCTION  cmsDictAlloc(ContextID: cmsContext): cmsHANDLE; StdCall; external LCMS2_SO;
+PROCEDURE cmsDictFree(hDict: cmsHANDLE);  StdCall; external LCMS2_SO;
+FUNCTION  cmsDictDup(hDict: cmsHANDLE): cmsHANDLE;  StdCall; external LCMS2_SO;
+
+FUNCTION cmsDictAddEntry(hDict: cmsHANDLE; Name, Value: PWChar; DisplayName, DisplayValue : LPcmsMLU): cmsBool;  StdCall; external LCMS2_SO;
+FUNCTION cmsDictGetEntryList(hDict: cmsHANDLE): LPcmsDICTentry; StdCall; external LCMS2_SO;
+FUNCTION cmsDictNextEntry(e : LPcmsDICTentry): LPcmsDICTentry;  StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateProfilePlaceholder(ContextID: cmsContext): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsGetProfileContextID(hProfile: cmsHPROFILE):cmsContext; StdCall; external LCMS2_SO;
+FUNCTION cmsGetTagCount(hProfile: cmsHPROFILE): cmsInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsGetTagSignature(hProfile: cmsHPROFILE; n: cmsUInt32Number): cmsTagSignature; StdCall; external LCMS2_SO;
+FUNCTION cmsIsTag(hProfile: cmsHPROFILE; sig: cmsTagSignature ): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsReadTag(hProfile: cmsHPROFILE; sig: cmsTagSignature ): Pointer; StdCall; external LCMS2_SO;
+FUNCTION cmsWriteTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; data: Pointer): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsLinkTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; dest: cmsTagSignature): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsTagLinkedTo(hProfile: cmsHPROFILE; sig: cmsTagSignature):cmsTagSignature; StdCall; external LCMS2_SO;
+
+FUNCTION cmsReadRawTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; Buffer: Pointer; BufferSize: cmsUInt32Number): cmsInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsWriteRawTag(hProfile: cmsHPROFILE; sig: cmsTagSignature; data: Pointer; Size: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGetHeaderFlags(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsGetHeaderAttributes(hProfile: cmsHPROFILE; Flags: LPcmsUInt64Number); StdCall; external LCMS2_SO;
+PROCEDURE cmsGetHeaderProfileID(hProfile: cmsHPROFILE; ProfileID: LPcmsUInt8Number); StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGetHeaderRenderingIntent(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderFlags(hProfile: cmsHPROFILE; Flags: cmsUInt32Number); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetHeaderManufacturer(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderManufacturer(hProfile: cmsHPROFILE; manufacturer: cmsUInt32Number ); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetHeaderModel(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderModel(hProfile: cmsHPROFILE; model: cmsUInt32Number ); StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderAttributes(hProfile: cmsHPROFILE; Flags: cmsUInt64Number); StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderProfileID(hProfile: cmsHPROFILE; ProfileID: LPcmsUInt8Number); StdCall; external LCMS2_SO;
+PROCEDURE cmsSetHeaderRenderingIntent(hProfile: cmsHPROFILE; RenderingIntent: cmsUInt32Number ); StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGetPCS(hProfile: cmsHPROFILE):cmsColorSpaceSignature; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetPCS(hProfile: cmsHPROFILE; pcs: cmsColorSpaceSignature); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetColorSpace(hProfile: cmsHPROFILE): cmsColorSpaceSignature; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetColorSpace(hProfile: cmsHPROFILE; sig: cmsColorSpaceSignature); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetDeviceClass(hProfile: cmsHPROFILE): cmsProfileClassSignature; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetDeviceClass(hProfile: cmsHPROFILE; sig: cmsProfileClassSignature); StdCall; external LCMS2_SO;
+PROCEDURE cmsSetProfileVersion(hProfile: cmsHPROFILE; Version: cmsFloat64Number); StdCall; external LCMS2_SO;
+FUNCTION  cmsGetProfileVersion(hProfile: cmsHPROFILE): cmsFloat64Number; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGetEncodedICCversion(hProfile: cmsHPROFILE): cmsUInt32Number; StdCall; external LCMS2_SO;
+PROCEDURE cmsSetEncodedICCversion(hProfile: cmsHPROFILE; Version: cmsUInt32Number); StdCall; external LCMS2_SO;
+
+
+FUNCTION   cmsIsIntentSupported(hProfile: cmsHPROFILE; Intent: cmsUInt32Number; UsedDirection: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION   cmsIsMatrixShaper(hProfile: cmsHPROFILE): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION   cmsIsCLUT(hProfile: cmsHPROFILE; Intent: cmsUInt32Number; UsedDirection: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION _cmsICCcolorSpace(OurNotation: Integer): cmsColorSpaceSignature; StdCall; external LCMS2_SO;
+FUNCTION _cmsLCMScolorSpace(ProfileSpace: cmsColorSpaceSignature): Integer; StdCall; external LCMS2_SO;
+
+FUNCTION cmsChannelsOf( ColorSpace: cmsColorSpaceSignature): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsFormatterForColorspaceOfProfile(hProfile: cmsHPROFILE; nBytes: cmsUInt32Number; lIsFloat: cmsBool): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsFormatterForPCSOfProfile(hProfile: cmsHPROFILE; nBytes: cmsUInt32Number; lIsFloat: cmsBool): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+
+FUNCTION cmsGetProfileInfo(hProfile: cmsHPROFILE; Info: cmsInfoType; LanguageCode, CountryCode: PAnsiChar;
+                                                            Buffer: PWChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsGetProfileInfoASCII(hProfile: cmsHPROFILE; Info: cmsInfoType; LanguageCode, CountryCode: PAnsiChar;
+                                                            Buffer: PAnsiChar; BufferSize: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+
+FUNCTION cmsOpenIOhandlerFromFile(ContextID: cmsContext; FileName, AccessMode: PAnsiChar): LPcmsIOHANDLER; StdCall; external LCMS2_SO;
+// FUNCTION cmsOpenIOhandlerFromStream(ContextID: cmsContext; FILE* Stream): LPcmsIOHANDLER; StdCall; external LCMS2_SO;
+FUNCTION cmsOpenIOhandlerFromMem(ContextID: cmsContext; Buffer: Pointer; size: cmsUInt32Number; AccessMode: PAnsiChar): LPcmsIOHANDLER; StdCall; external LCMS2_SO;
+FUNCTION cmsOpenIOhandlerFromNULL(ContextID: cmsContext): LPcmsIOHANDLER; StdCall; external LCMS2_SO;
+FUNCTION cmsCloseIOhandler(io: LPcmsIOHANDLER): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsOpenProfileFromFile(ICCProfile : PAnsiChar; sAccess: PAnsiChar): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsOpenProfileFromFileTHR(ContextID: cmsContext; ICCProfile, sAccess: PAnsiChar): cmsHPROFILE; StdCall; external LCMS2_SO;
+// FUNCTION      CMSEXPORT cmsOpenProfileFromStream(FILE* ICCProfile, const char* sAccess): cmsHPROFILE; StdCall; external LCMS2_SO;
+// FUNCTION      CMSEXPORT cmsOpenProfileFromStreamTHR(ContextID: cmsContext; FILE* ICCProfile, const char* sAccess): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsOpenProfileFromMem(MemPtr: Pointer; dwSize: cmsUInt32Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsOpenProfileFromMemTHR(ContextID: cmsContext; MemPtr: Pointer; dwSize: cmsUInt32Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsOpenProfileFromIOhandlerTHR(ContextID: cmsContext; io: LPcmsIOHANDLER): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION   cmsCloseProfile(hProfile: cmsHPROFILE): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsSaveProfileToFile(hProfile: cmsHPROFILE; FileName: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+// FUNCTION         CMSEXPORT cmsSaveProfileToStream(hProfile: cmsHPROFILE, FILE* Stream): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION   cmsSaveProfileToMem(hProfile: cmsHPROFILE; MemPtr: Pointer; BytesNeeded: LPcmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION   cmsSaveProfileToIOhandler(hProfile: cmsHPROFILE; io: LPcmsIOHANDLER):cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsCreateRGBProfileTHR(ContextID: cmsContext;
+                                                   WhitePoint: LPcmsCIExyY;
+                                                   Primaries: LPcmsCIExyYTRIPLE;
+                                                   TransferFunction: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsCreateRGBProfile(WhitePoint: LPcmsCIExyY;
+                                                   Primaries: LPcmsCIExyYTRIPLE;
+                                                   TransferFunction: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateGrayProfileTHR(ContextID: cmsContext;
+                                                    WhitePoint: LPcmsCIExyY;
+                                                    TransferFunction: LPcmsToneCurve): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateGrayProfile(WhitePoint: LPcmsCIExyY;
+                                                     TransferFunction: LPcmsToneCurve): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateLinearizationDeviceLinkTHR(ContextID: cmsContext;
+                                                                 ColorSpace: cmsColorSpaceSignature;
+                                                                 TransferFunctions: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateLinearizationDeviceLink(ColorSpace: cmsColorSpaceSignature;
+                                                                 TransferFunctions: LPLPcmsToneCurveArray): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateInkLimitingDeviceLinkTHR(ContextID: cmsContext;
+                                                              ColorSpace: cmsColorSpaceSignature; Limit: cmsFloat64Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateInkLimitingDeviceLink(ColorSpace: cmsColorSpaceSignature; Limit: cmsFloat64Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+
+FUNCTION cmsCreateLab2ProfileTHR(ContextID: cmsContext; WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreateLab2Profile(WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreateLab4ProfileTHR(ContextID: cmsContext; WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreateLab4Profile(WhitePoint: LPcmsCIExyY): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateXYZProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreateXYZProfile: cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreate_sRGBProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION cmsCreate_sRGBProfile: cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateBCHSWabstractProfileTHR(ContextID: cmsContext;
+                                                             nLUTPoints: Integer;
+                                                             Bright,
+                                                             Contrast,
+                                                             Hue,
+                                                             Saturation: cmsFloat64Number;
+                                                             TempSrc,
+                                                             TempDest: Integer): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsCreateBCHSWabstractProfile(   nLUTPoints: Integer;
+                                                             Bright,
+                                                             Contrast,
+                                                             Hue,
+                                                             Saturation: cmsFloat64Number;
+                                                             TempSrc,
+                                                             TempDest: Integer): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsCreateNULLProfileTHR(ContextID: cmsContext): cmsHPROFILE; StdCall; external LCMS2_SO;
+FUNCTION  cmsCreateNULLProfile: cmsHPROFILE; StdCall; external LCMS2_SO;
+
+// Converts a transform to a devicelink profile
+FUNCTION  cmsTransform2DeviceLink(hTransform: cmsHTRANSFORM; Version: cmsFloat64Number; dwFlags: cmsUInt32Number): cmsHPROFILE; StdCall; external LCMS2_SO;
+
+// Call with NULL as parameters to get the intent count
+FUNCTION cmsGetSupportedIntents(nMax: cmsUInt32Number; Codes: LPcmsUInt32Number; Descriptions: LPPAnsiChar): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsFLAGS_GRIDPOINTS(n: Integer): Integer; begin cmsFLAGS_GRIDPOINTS :=  (((n) and $FF) shl 16) end;
+
+
+FUNCTION   cmsCreateTransformTHR(ContextID: cmsContext;
+                                                  Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsCreateTransform(Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsCreateProofingTransformTHR(ContextID: cmsContext;
+                                                  Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Proofing: cmsHPROFILE;
+                                                  Intent: cmsUInt32Number;
+                                                  ProofingIntent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsCreateProofingTransform(Input: cmsHPROFILE;
+                                                  InputFormat: cmsUInt32Number;
+                                                  Output: cmsHPROFILE;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Proofing: cmsHPROFILE;
+                                                  Intent: cmsUInt32Number;
+                                                  ProofingIntent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+FUNCTION   cmsCreateMultiprofileTransformTHR(ContextID: cmsContext;
+                                                  hProfiles: LPcmsHPROFILEArray;
+                                                  nProfiles: cmsUInt32Number;
+                                                  InputFormat: cmsUInt32Number;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+
+FUNCTION   cmsCreateMultiprofileTransform( hProfiles: LPcmsHPROFILEArray;
+                                                  nProfiles: cmsUInt32Number;
+                                                  InputFormat: cmsUInt32Number;
+                                                  OutputFormat: cmsUInt32Number;
+                                                  Intent: cmsUInt32Number;
+                                                  dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+
+FUNCTION   cmsCreateExtendedTransform(ContextID: cmsContext;
+                                                   nProfiles: cmsUInt32Number;
+                                                   hProfiles: LPcmsHPROFILEArray;
+                                                   BPC: LPcmsBoolArray;
+                                                   Intents: LPcmsUInt32NumberArray;
+                                                   AdaptationStates: LPcmsFloat64NumberArray;
+                                                   hGamutProfile: cmsHPROFILE;
+                                                   nGamutPCSposition: cmsUInt32Number;
+                                                   InputFormat,
+                                                   OutputFormat: cmsUInt32Number;
+                                                   dwFlags: cmsUInt32Number): cmsHTRANSFORM; StdCall; external LCMS2_SO;
+
+PROCEDURE  cmsDeleteTransform(hTransform: cmsHTRANSFORM); StdCall; external LCMS2_SO;
+
+PROCEDURE  cmsDoTransform(Transform: cmsHTRANSFORM; InputBuffer, OutputBuffer: Pointer; size: cmsUInt32Number);  StdCall; external LCMS2_SO;
+PROCEDURE  cmsDoTransformStride(Transform: cmsHTRANSFORM; InputBuffer, OutputBuffer: Pointer; size: cmsUInt32Number; stride: cmsUInt32Number);  StdCall; external LCMS2_SO;
+PROCEDURE  cmsSetAlarmCodes( NewAlarm: LPcmsUInt16NumberArray);  StdCall; external LCMS2_SO;
+PROCEDURE  cmsGetAlarmCodes(NewAlarm: LPcmsUInt16NumberArray); StdCall; external LCMS2_SO;
+
+// Adaptation state for absolute colorimetric intent
+FUNCTION  cmsSetAdaptationState(d: cmsFloat64Number):cmsFloat64Number; StdCall; external LCMS2_SO;
+
+// Grab the ContextID from an open transform. Returns NULL if a NULL transform is passed
+FUNCTION  cmsGetTransformContextID(hTransform: cmsHTRANSFORM):cmsContext; StdCall; external LCMS2_SO;
+
+// For backwards compatibility
+FUNCTION  cmsChangeBuffersFormat(hTransform: cmsHTRANSFORM; InputFormat, OutputFormat: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+
+
+// lcms2 unified method to access postscript color resources
+FUNCTION cmsGetPostScriptColorResource(ContextID: cmsContext;   RType: cmsPSResourceType;
+                                                                hProfile: cmsHPROFILE;
+                                                                Intent: cmsUInt32Number;
+                                                                dwFlags: cmsUInt32Number;
+                                                                io: LPcmsIOHANDLER): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+FUNCTION cmsGetPostScriptCSA(ContextID: cmsContext; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number; Buffer: Pointer; dwBufferLen: cmsUInt32Number ): cmsUInt32Number; StdCall; external LCMS2_SO;
+FUNCTION cmsGetPostScriptCRD(ContextID: cmsContext; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number; Buffer: Pointer; dwBufferLen: cmsUInt32Number): cmsUInt32Number; StdCall; external LCMS2_SO;
+
+
+// CGATS.13 parser
+
+FUNCTION  cmsIT8Alloc: cmsHANDLE; StdCall; external LCMS2_SO;
+PROCEDURE cmsIT8Free(hIT8: cmsHANDLE); StdCall; external LCMS2_SO;
+
+// Tables
+
+FUNCTION  cmsIT8TableCount(hIT8: cmsHANDLE): Integer; StdCall; external LCMS2_SO;
+FUNCTION  cmsIT8SetTable(hIT8: cmsHANDLE; nTable: Integer): Integer; StdCall; external LCMS2_SO;
+
+// Persistence
+FUNCTION  cmsIT8LoadFromFile(cFileName: PAnsiChar): cmsHANDLE; StdCall; external LCMS2_SO;
+FUNCTION  cmsIT8LoadFromMem(Ptr: Pointer; size :DWord): cmsHANDLE; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SaveToFile(hIT8: cmsHANDLE; cFileName: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SaveToMem(hIT8: cmsHANDLE; MemPtr: Pointer; BytesNeeded: LPcmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+// Properties
+
+FUNCTION cmsIT8GetSheetType(hIT8: cmsHANDLE): PAnsiChar; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetSheetType(hIT8: cmsHANDLE; TheType: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetComment(hIT8: cmsHANDLE; cComment: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetPropertyStr(hIT8: cmsHANDLE; cProp, Str: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetPropertyDbl(hIT8: cmsHANDLE; cProp: PAnsiChar; Val: Double): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetPropertyHex(hIT8: cmsHANDLE; cProp: PAnsiChar; Val: Integer): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetPropertyUncooked(hIT8: cmsHANDLE; Key, Buffer: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+
+FUNCTION cmsIT8GetProperty(hIT8: cmsHANDLE; cProp: PAnsiChar): PAnsiChar; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8GetPropertyDbl(hIT8: cmsHANDLE; cProp: PAnsiChar): Double; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8EnumProperties(hIT8: cmsHANDLE; var PropertyNames: LPPAnsiChar): Integer; StdCall; external LCMS2_SO;
+
+// Datasets
+
+FUNCTION cmsIT8GetDataRowCol(hIT8: cmsHANDLE; row, col: Integer): PAnsiChar; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8GetDataRowColDbl(hIT8: cmsHANDLE; row, col: Integer): Double; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetDataRowCol(hIT8: cmsHANDLE; row, col: Integer; Val: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8SetDataRowColDbl(hIT8: cmsHANDLE; row, col: Integer; Val: Double): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8GetData(hIT8: cmsHANDLE; cPatch, cSample: PAnsiChar): PAnsiChar; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8GetDataDbl(hIT8: cmsHANDLE;cPatch, cSample: PAnsiChar): Double; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetData(hIT8: cmsHANDLE; cPatch, cSample, Val: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetDataDbl(hIT8: cmsHANDLE; cPatch, cSample: PAnsiChar; Val: Double): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8SetDataFormat(hIT8: cmsHANDLE; n: Integer; Sample: PAnsiChar): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8EnumDataFormat(hIT8: cmsHANDLE; var SampleNames: LPPAnsiChar): Integer; StdCall; external LCMS2_SO;
+FUNCTION cmsIT8GetPatchName(hIT8: cmsHANDLE; nPatch: Integer; Buffer: PAnsiChar): PAnsiChar; StdCall; external LCMS2_SO;
+
+// The LABEL extension
+
+FUNCTION cmsIT8SetTableByLabel(hIT8: cmsHANDLE; cSet, cField, ExpectedType: PAnsiChar): Integer; StdCall; external LCMS2_SO;
+
+FUNCTION cmsIT8FindDataFormat(hIT8: cmsHANDLE; cSample: PAnsiChar): Integer; StdCall; external LCMS2_SO;
+
+// Formatter for double
+PROCEDURE  cmsIT8DefineDblFormat(hIT8: cmsHANDLE; Formatter: PAnsiChar);  StdCall; external LCMS2_SO;
+
+FUNCTION  cmsGBDAlloc(ContextID: cmsContext):cmsHANDLE; StdCall; external LCMS2_SO;
+PROCEDURE cmsGBDFree(hGBD: cmsHANDLE); StdCall; external LCMS2_SO;
+FUNCTION  cmsGDBAddPoint(hGBD: cmsHANDLE; Lab: LPcmsCIELab): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsGDBCompute(hGDB: cmsHANDLE; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION  cmsGDBCheckPoint(hGBD: cmsHANDLE; Lab: LPcmsCIELab): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsDetectBlackPoint( BlackPoint: LPcmsCIEXYZ; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+FUNCTION cmsDetectDestinationBlackPoint( BlackPoint: LPcmsCIEXYZ; hProfile: cmsHPROFILE; Intent: cmsUInt32Number; dwFlags: cmsUInt32Number): cmsBool; StdCall; external LCMS2_SO;
+
+FUNCTION cmsDetectTAC(hProfile: cmsHPROFILE): cmsFloat64Number; StdCall; external LCMS2_SO;
+
+FUNCTION  cmsDesaturateLab(Lab: LPcmsCIELab; amax, amin, bmax, bmin: cmsFloat64Number): cmsBool; StdCall; external LCMS2_SO;
+
+END.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/LICENSE_iccjpeg b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/LICENSE_iccjpeg
new file mode 100755
index 0000000000..dffd80b51e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/LICENSE_iccjpeg
@@ -0,0 +1,67 @@
+
+THIS LICENSE APPLIES ONLY TO iccjpeg.c file
+-----
+In plain English:
+
+1. We don't promise that this software works.  (But if you find any bugs,
+   please let us know!)
+2. You can use this software for whatever you want.  You don't have to pay us.
+3. You may not pretend that you wrote this software.  If you use it in a
+   program, you must acknowledge somewhere in your documentation that
+   you've used the IJG code.
+
+In legalese:
+
+The authors make NO WARRANTY or representation, either express or implied,
+with respect to this software, its quality, accuracy, merchantability, or
+fitness for a particular purpose.  This software is provided "AS IS", and you,
+its user, assume the entire risk as to its quality and accuracy.
+
+This software is copyright (C) 1991-2013, Thomas G. Lane, Guido Vollbeding.
+All Rights Reserved except as specified below.
+
+Permission is hereby granted to use, copy, modify, and distribute this
+software (or portions thereof) for any purpose, without fee, subject to these
+conditions:
+(1) If any part of the source code for this software is distributed, then this
+README file must be included, with this copyright and no-warranty notice
+unaltered; and any additions, deletions, or changes to the original files
+must be clearly indicated in accompanying documentation.
+(2) If only executable code is distributed, then the accompanying
+documentation must state that "this software is based in part on the work of
+the Independent JPEG Group".
+(3) Permission for use of this software is granted only if the user accepts
+full responsibility for any undesirable consequences; the authors accept
+NO LIABILITY for damages of any kind.
+
+These conditions apply to any software derived from or based on the IJG code,
+not just to the unmodified library.  If you use our work, you ought to
+acknowledge us.
+
+Permission is NOT granted for the use of any IJG author's name or company name
+in advertising or publicity relating to this software or products derived from
+it.  This software may be referred to only as "the Independent JPEG Group's
+software".
+
+We specifically permit and encourage the use of this software as the basis of
+commercial products, provided that all warranty or liability claims are
+assumed by the product vendor.
+
+
+The Unix configuration script "configure" was produced with GNU Autoconf.
+It is copyright by the Free Software Foundation but is freely distributable.
+The same holds for its supporting scripts (config.guess, config.sub,
+ltmain.sh).  Another support script, install-sh, is copyright by X Consortium
+but is also freely distributable.
+
+The IJG distribution formerly included code to read and write GIF files.
+To avoid entanglement with the Unisys LZW patent, GIF reading support has
+been removed altogether, and the GIF writer has been simplified to produce
+"uncompressed GIFs".  This technique does not use the LZW algorithm; the
+resulting GIF files are larger than usual, but are readable by all standard
+GIF decoders.
+
+We are required to state that
+    "The Graphics Interchange Format(c) is the Copyright property of
+    CompuServe Incorporated.  GIF(sm) is a Service Mark property of
+    CompuServe Incorporated."
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/Makefile.am b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/Makefile.am
new file mode 100755
index 0000000000..d0a0897f3d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/Makefile.am
@@ -0,0 +1,22 @@
+#
+# Makefile for building jpegicc
+# Written by Bob Friesenhahn, June 2003
+# Bugs introduced by Marti Maria on October 2004
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+if HasJPEG
+bin_PROGRAMS = jpgicc
+else
+bin_PROGRAMS = 
+endif
+
+jpgicc_LDADD = $(top_builddir)/src/liblcms2.la @JPEGICC_DEPLIBS@
+jpgicc_LDFLAGS = @LDFLAGS@
+jpgicc_SOURCES = jpgicc.c iccjpeg.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+man_MANS = jpgicc.1
+
+EXTRA_DIST = iccjpeg.h $(man_MANS)
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/Makefile.in b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/Makefile.in
new file mode 100755
index 0000000000..69032832de
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/Makefile.in
@@ -0,0 +1,739 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building jpegicc
+# Written by Bob Friesenhahn, June 2003
+# Bugs introduced by Marti Maria on October 2004
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@HasJPEG_TRUE@bin_PROGRAMS = jpgicc$(EXEEXT)
+subdir = utils/jpgicc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_jpgicc_OBJECTS = jpgicc.$(OBJEXT) iccjpeg.$(OBJEXT) \
+	../common/xgetopt.$(OBJEXT) ../common/vprf.$(OBJEXT)
+jpgicc_OBJECTS = $(am_jpgicc_OBJECTS)
+jpgicc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+jpgicc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(jpgicc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(jpgicc_SOURCES)
+DIST_SOURCES = $(jpgicc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+man1dir = $(mandir)/man1
+NROFF = nroff
+MANS = $(man_MANS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+jpgicc_LDADD = $(top_builddir)/src/liblcms2.la @JPEGICC_DEPLIBS@
+jpgicc_LDFLAGS = @LDFLAGS@
+jpgicc_SOURCES = jpgicc.c iccjpeg.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+man_MANS = jpgicc.1
+EXTRA_DIST = iccjpeg.h $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/jpgicc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/jpgicc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+jpgicc$(EXEEXT): $(jpgicc_OBJECTS) $(jpgicc_DEPENDENCIES) $(EXTRA_jpgicc_DEPENDENCIES) 
+	@rm -f jpgicc$(EXEEXT)
+	$(AM_V_CCLD)$(jpgicc_LINK) $(jpgicc_OBJECTS) $(jpgicc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-man1: $(man_MANS)
+	@$(NORMAL_INSTALL)
+	@list1=''; \
+	list2='$(man_MANS)'; \
+	test -n "$(man1dir)" \
+	  && test -n "`echo $$list1$$list2`" \
+	  || exit 0; \
+	echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \
+	$(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \
+	{ for i in $$list1; do echo "$$i"; done;  \
+	if test -n "$$list2"; then \
+	  for i in $$list2; do echo "$$i"; done \
+	    | sed -n '/\.1[a-z]*$$/p'; \
+	fi; \
+	} | while read p; do \
+	  if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; echo "$$p"; \
+	done | \
+	sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+	sed 'N;N;s,\n, ,g' | { \
+	list=; while read file base inst; do \
+	  if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+	    echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+	    $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
+	  fi; \
+	done; \
+	for i in $$list; do echo "$$i"; done | $(am__base_list) | \
+	while read files; do \
+	  test -z "$$files" || { \
+	    echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
+	    $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
+	done; }
+
+uninstall-man1:
+	@$(NORMAL_UNINSTALL)
+	@list=''; test -n "$(man1dir)" || exit 0; \
+	files=`{ for i in $$list; do echo "$$i"; done; \
+	l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \
+	  sed -n '/\.1[a-z]*$$/p'; \
+	} | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+	dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(MANS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-man
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man: install-man1
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-man
+
+uninstall-man: uninstall-man1
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-man1 \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+	pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-binPROGRAMS uninstall-man uninstall-man1
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/iccjpeg.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/iccjpeg.c
new file mode 100755
index 0000000000..d08b4bd081
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/iccjpeg.c
@@ -0,0 +1,248 @@
+/*
+ * iccprofile.c
+ *
+ * This file provides code to read and write International Color Consortium
+ * (ICC) device profiles embedded in JFIF JPEG image files.  The ICC has
+ * defined a standard format for including such data in JPEG "APP2" markers.
+ * The code given here does not know anything about the internal structure
+ * of the ICC profile data; it just knows how to put the profile data into
+ * a JPEG file being written, or get it back out when reading.
+ *
+ * This code depends on new features added to the IJG JPEG library as of
+ * IJG release 6b; it will not compile or work with older IJG versions.
+ *
+ * NOTE: this code would need surgery to work on 16-bit-int machines
+ * with ICC profiles exceeding 64K bytes in size.  If you need to do that,
+ * change all the "unsigned int" variables to "INT32".  You'll also need
+ * to find a malloc() replacement that can allocate more than 64K.
+ */
+
+#include "iccjpeg.h"
+#include <stdlib.h>			/* define malloc() */
+
+
+/*
+ * Since an ICC profile can be larger than the maximum size of a JPEG marker
+ * (64K), we need provisions to split it into multiple markers.  The format
+ * defined by the ICC specifies one or more APP2 markers containing the
+ * following data:
+ *	Identifying string	ASCII "ICC_PROFILE\0"  (12 bytes)
+ *	Marker sequence number	1 for first APP2, 2 for next, etc (1 byte)
+ *	Number of markers	Total number of APP2's used (1 byte)
+ *      Profile data		(remainder of APP2 data)
+ * Decoders should use the marker sequence numbers to reassemble the profile,
+ * rather than assuming that the APP2 markers appear in the correct sequence.
+ */
+
+#define ICC_MARKER  (JPEG_APP0 + 2)	/* JPEG marker code for ICC */
+#define ICC_OVERHEAD_LEN  14		/* size of non-profile data in APP2 */
+#define MAX_BYTES_IN_MARKER  65533	/* maximum data len of a JPEG marker */
+#define MAX_DATA_BYTES_IN_MARKER  (MAX_BYTES_IN_MARKER - ICC_OVERHEAD_LEN)
+
+
+/*
+ * This routine writes the given ICC profile data into a JPEG file.
+ * It *must* be called AFTER calling jpeg_start_compress() and BEFORE
+ * the first call to jpeg_write_scanlines().
+ * (This ordering ensures that the APP2 marker(s) will appear after the
+ * SOI and JFIF or Adobe markers, but before all else.)
+ */
+
+void
+write_icc_profile (j_compress_ptr cinfo,
+		   const JOCTET *icc_data_ptr,
+		   unsigned int icc_data_len)
+{
+  unsigned int num_markers;	/* total number of markers we'll write */
+  int cur_marker = 1;		/* per spec, counting starts at 1 */
+  unsigned int length;		/* number of bytes to write in this marker */
+
+  /* Calculate the number of markers we'll need, rounding up of course */
+  num_markers = icc_data_len / MAX_DATA_BYTES_IN_MARKER;
+  if (num_markers * MAX_DATA_BYTES_IN_MARKER != icc_data_len)
+    num_markers++;
+
+  while (icc_data_len > 0) {
+    /* length of profile to put in this marker */
+    length = icc_data_len;
+    if (length > MAX_DATA_BYTES_IN_MARKER)
+      length = MAX_DATA_BYTES_IN_MARKER;
+    icc_data_len -= length;
+
+    /* Write the JPEG marker header (APP2 code and marker length) */
+    jpeg_write_m_header(cinfo, ICC_MARKER,
+			(unsigned int) (length + ICC_OVERHEAD_LEN));
+
+    /* Write the marker identifying string "ICC_PROFILE" (null-terminated).
+     * We code it in this less-than-transparent way so that the code works
+     * even if the local character set is not ASCII.
+     */
+    jpeg_write_m_byte(cinfo, 0x49);
+    jpeg_write_m_byte(cinfo, 0x43);
+    jpeg_write_m_byte(cinfo, 0x43);
+    jpeg_write_m_byte(cinfo, 0x5F);
+    jpeg_write_m_byte(cinfo, 0x50);
+    jpeg_write_m_byte(cinfo, 0x52);
+    jpeg_write_m_byte(cinfo, 0x4F);
+    jpeg_write_m_byte(cinfo, 0x46);
+    jpeg_write_m_byte(cinfo, 0x49);
+    jpeg_write_m_byte(cinfo, 0x4C);
+    jpeg_write_m_byte(cinfo, 0x45);
+    jpeg_write_m_byte(cinfo, 0x0);
+
+    /* Add the sequencing info */
+    jpeg_write_m_byte(cinfo, cur_marker);
+    jpeg_write_m_byte(cinfo, (int) num_markers);
+
+    /* Add the profile data */
+    while (length--) {
+      jpeg_write_m_byte(cinfo, *icc_data_ptr);
+      icc_data_ptr++;
+    }
+    cur_marker++;
+  }
+}
+
+
+/*
+ * Prepare for reading an ICC profile
+ */
+
+void
+setup_read_icc_profile (j_decompress_ptr cinfo)
+{
+  /* Tell the library to keep any APP2 data it may find */
+  jpeg_save_markers(cinfo, ICC_MARKER, 0xFFFF);
+}
+
+
+/*
+ * Handy subroutine to test whether a saved marker is an ICC profile marker.
+ */
+
+static boolean
+marker_is_icc (jpeg_saved_marker_ptr marker)
+{
+  return
+    marker->marker == ICC_MARKER &&
+    marker->data_length >= ICC_OVERHEAD_LEN &&
+    /* verify the identifying string */
+    GETJOCTET(marker->data[0]) == 0x49 &&
+    GETJOCTET(marker->data[1]) == 0x43 &&
+    GETJOCTET(marker->data[2]) == 0x43 &&
+    GETJOCTET(marker->data[3]) == 0x5F &&
+    GETJOCTET(marker->data[4]) == 0x50 &&
+    GETJOCTET(marker->data[5]) == 0x52 &&
+    GETJOCTET(marker->data[6]) == 0x4F &&
+    GETJOCTET(marker->data[7]) == 0x46 &&
+    GETJOCTET(marker->data[8]) == 0x49 &&
+    GETJOCTET(marker->data[9]) == 0x4C &&
+    GETJOCTET(marker->data[10]) == 0x45 &&
+    GETJOCTET(marker->data[11]) == 0x0;
+}
+
+
+/*
+ * See if there was an ICC profile in the JPEG file being read;
+ * if so, reassemble and return the profile data.
+ *
+ * TRUE is returned if an ICC profile was found, FALSE if not.
+ * If TRUE is returned, *icc_data_ptr is set to point to the
+ * returned data, and *icc_data_len is set to its length.
+ *
+ * IMPORTANT: the data at **icc_data_ptr has been allocated with malloc()
+ * and must be freed by the caller with free() when the caller no longer
+ * needs it.  (Alternatively, we could write this routine to use the
+ * IJG library's memory allocator, so that the data would be freed implicitly
+ * at jpeg_finish_decompress() time.  But it seems likely that many apps
+ * will prefer to have the data stick around after decompression finishes.)
+ *
+ * NOTE: if the file contains invalid ICC APP2 markers, we just silently
+ * return FALSE.  You might want to issue an error message instead.
+ */
+
+boolean
+read_icc_profile (j_decompress_ptr cinfo,
+		  JOCTET **icc_data_ptr,
+		  unsigned int *icc_data_len)
+{
+  jpeg_saved_marker_ptr marker;
+  int num_markers = 0;
+  int seq_no;
+  JOCTET *icc_data;
+  unsigned int total_length;
+#define MAX_SEQ_NO  255		/* sufficient since marker numbers are bytes */
+  char marker_present[MAX_SEQ_NO+1];	  /* 1 if marker found */
+  unsigned int data_length[MAX_SEQ_NO+1]; /* size of profile data in marker */
+  unsigned int data_offset[MAX_SEQ_NO+1]; /* offset for data in marker */
+
+  *icc_data_ptr = NULL;		/* avoid confusion if FALSE return */
+  *icc_data_len = 0;
+
+  /* This first pass over the saved markers discovers whether there are
+   * any ICC markers and verifies the consistency of the marker numbering.
+   */
+
+  for (seq_no = 1; seq_no <= MAX_SEQ_NO; seq_no++)
+    marker_present[seq_no] = 0;
+
+  for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (marker_is_icc(marker)) {
+      if (num_markers == 0)
+	num_markers = GETJOCTET(marker->data[13]);
+      else if (num_markers != GETJOCTET(marker->data[13]))
+	return FALSE;		/* inconsistent num_markers fields */
+      seq_no = GETJOCTET(marker->data[12]);
+      if (seq_no <= 0 || seq_no > num_markers)
+	return FALSE;		/* bogus sequence number */
+      if (marker_present[seq_no])
+	return FALSE;		/* duplicate sequence numbers */
+      marker_present[seq_no] = 1;
+      data_length[seq_no] = marker->data_length - ICC_OVERHEAD_LEN;
+    }
+  }
+
+  if (num_markers == 0)
+    return FALSE;
+
+  /* Check for missing markers, count total space needed,
+   * compute offset of each marker's part of the data.
+   */
+
+  total_length = 0;
+  for (seq_no = 1; seq_no <= num_markers; seq_no++) {
+    if (marker_present[seq_no] == 0)
+      return FALSE;		/* missing sequence number */
+    data_offset[seq_no] = total_length;
+    total_length += data_length[seq_no];
+  }
+
+  if (total_length == 0)
+    return FALSE;		/* found only empty markers? */
+
+  /* Allocate space for assembled data */
+  icc_data = (JOCTET *) malloc(total_length * sizeof(JOCTET));
+  if (icc_data == NULL)
+    return FALSE;		/* oops, out of memory */
+
+  /* and fill it in */
+  for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (marker_is_icc(marker)) {
+      JOCTET FAR *src_ptr;
+      JOCTET *dst_ptr;
+      unsigned int length;
+      seq_no = GETJOCTET(marker->data[12]);
+      dst_ptr = icc_data + data_offset[seq_no];
+      src_ptr = marker->data + ICC_OVERHEAD_LEN;
+      length = data_length[seq_no];
+      while (length--) {
+	*dst_ptr++ = *src_ptr++;
+      }
+    }
+  }
+
+  *icc_data_ptr = icc_data;
+  *icc_data_len = total_length;
+
+  return TRUE;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/iccjpeg.h b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/iccjpeg.h
new file mode 100755
index 0000000000..5e1888d9ef
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/iccjpeg.h
@@ -0,0 +1,73 @@
+/*
+ * iccprofile.h
+ *
+ * This file provides code to read and write International Color Consortium
+ * (ICC) device profiles embedded in JFIF JPEG image files.  The ICC has
+ * defined a standard format for including such data in JPEG "APP2" markers.
+ * The code given here does not know anything about the internal structure
+ * of the ICC profile data; it just knows how to put the profile data into
+ * a JPEG file being written, or get it back out when reading.
+ *
+ * This code depends on new features added to the IJG JPEG library as of
+ * IJG release 6b; it will not compile or work with older IJG versions.
+ *
+ * NOTE: this code would need surgery to work on 16-bit-int machines
+ * with ICC profiles exceeding 64K bytes in size.  See iccprofile.c
+ * for details.
+ */
+
+#include <stdio.h>		/* needed to define "FILE", "NULL" */
+#include "jpeglib.h"
+
+
+/*
+ * This routine writes the given ICC profile data into a JPEG file.
+ * It *must* be called AFTER calling jpeg_start_compress() and BEFORE
+ * the first call to jpeg_write_scanlines().
+ * (This ordering ensures that the APP2 marker(s) will appear after the
+ * SOI and JFIF or Adobe markers, but before all else.)
+ */
+
+extern void write_icc_profile JPP((j_compress_ptr cinfo,
+				   const JOCTET *icc_data_ptr,
+				   unsigned int icc_data_len));
+
+
+/*
+ * Reading a JPEG file that may contain an ICC profile requires two steps:
+ *
+ * 1. After jpeg_create_decompress() but before jpeg_read_header(),
+ *    call setup_read_icc_profile().  This routine tells the IJG library
+ *    to save in memory any APP2 markers it may find in the file.
+ *
+ * 2. After jpeg_read_header(), call read_icc_profile() to find out
+ *    whether there was a profile and obtain it if so.
+ */
+
+
+/*
+ * Prepare for reading an ICC profile
+ */
+
+extern void setup_read_icc_profile JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * See if there was an ICC profile in the JPEG file being read;
+ * if so, reassemble and return the profile data.
+ *
+ * TRUE is returned if an ICC profile was found, FALSE if not.
+ * If TRUE is returned, *icc_data_ptr is set to point to the
+ * returned data, and *icc_data_len is set to its length.
+ *
+ * IMPORTANT: the data at **icc_data_ptr has been allocated with malloc()
+ * and must be freed by the caller with free() when the caller no longer
+ * needs it.  (Alternatively, we could write this routine to use the
+ * IJG library's memory allocator, so that the data would be freed implicitly
+ * at jpeg_finish_decompress() time.  But it seems likely that many apps
+ * will prefer to have the data stick around after decompression finishes.)
+ */
+
+extern boolean read_icc_profile JPP((j_decompress_ptr cinfo,
+				     JOCTET **icc_data_ptr,
+				     unsigned int *icc_data_len));
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/jpgicc.1 b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/jpgicc.1
new file mode 100755
index 0000000000..44795a38eb
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/jpgicc.1
@@ -0,0 +1,122 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH JPGICC 1 "September 30, 2004"
+.SH NAME
+jpgicc - little cms ICC profile applier for JPEG.
+.SH SYNOPSIS
+.B jpgicc
+.RI [ options ] " input.jpg output.jpg"
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B jpgicc
+is a little cms ICC profile applier for JPEG.
+.SH OPTIONS
+.TP
+.B \-b
+Black point compensation.
+.TP
+.BI \-c\  NUM
+Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes) [defaults to 1].
+.TP
+.BI \-d\  NUM
+Observer adaptation state (abs.col. only), (0..1.0, float value) [defaults to 0.0].
+.TP
+.B \-e
+Embed destination profile.
+.TP
+.B \-g
+Marks out-of-gamut colors on softproof.
+.TP
+.BI \-h\  NUM
+Show summary of options and examples (0=help, 1=Examples, 2=Built-in profiles, 3=Contact information)
+.TP
+.BI \-i\  profile
+Input profile (defaults to sRGB).
+.TP
+.BI \-l\  link
+TODO: explain this option.
+.TP
+.BI \-m\  NUM
+SoftProof intent (0,1,2,3) [defaults to 0].
+.TP
+.B \-n
+Ignore embedded profile.
+.TP
+.BI \-o\  profile
+Output profile (defaults to sRGB).
+.TP
+.BI \-p\  profile
+Soft proof profile.
+.TP
+.BI \-q\  NUM
+Output JPEG quality, (0..100) [defaults to 75].
+.TP
+.BI \-s\  newprofile
+Save embedded profile as \fInewprofile\fR.
+.TP
+.BI \-t\ NUM
+Rendering intent
+.nf
+.RS
+0=Perceptual [default]
+1=Relative colorimetric
+2=Saturation
+3=Absolute colorimetric
+10=Perceptual preserving black ink
+11=Relative colorimetric preserving black ink
+12=Saturation preserving black ink
+13=Perceptual preserving black plane
+14=Relative colorimetric preserving black plane
+15=Saturation preserving black plane
+.RE
+.fi
+.TP
+.B \-v
+Verbose.
+.TP
+.BI \-!\  NUM,NUM,NUM
+Out-of-gamut marker channel values (r,g,b) [defaults: 128,128,128].
+.SH BUILT-IN PROFILES
+.nf
+	*Lab2  -- D50-based v2 CIEL*a*b
+	*Lab4  -- D50-based v4 CIEL*a*b
+	*Lab   -- D50-based v4 CIEL*a*b
+	*XYZ   -- CIE XYZ (PCS)
+	*sRGB  -- sRGB color space
+	*Gray22 - Monochrome of Gamma 2.2
+	*Gray30 - Monochrome of Gamma 3.0
+	*null   - Monochrome black for all input
+	*Lin2222- CMYK linearization of gamma 2.2 on each channel
+.fi
+.SH EXAMPLES
+.nf
+To color correct from scanner to sRGB:
+	jpgicc -iscanner.icm in.jpg out.jpg
+
+To convert from monitor1 to monitor2:
+	jpgicc -imon1.icm -omon2.icm in.jpg out.jpg
+
+To make a CMYK separation:
+	jpgicc -oprinter.icm inrgb.jpg outcmyk.jpg
+
+To recover sRGB from a CMYK separation:
+	jpgicc -iprinter.icm incmyk.jpg outrgb.jpg
+
+To convert from CIELab ITU/Fax JPEG to sRGB
+	jpgicc -iitufax.icm in.jpg out.jpg
+
+To convert from CIELab ITU/Fax JPEG to sRGB
+	jpgicc in.jpg out.jpg
+.fi
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com.
+.SH SEE ALSO
+.BR linkicc (1),
+.BR psicc (1),
+.BR tificc (1),
+.BR transicc (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/jpgicc.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/jpgicc.c
new file mode 100755
index 0000000000..e6311f2637
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/jpgicc/jpgicc.c
@@ -0,0 +1,1261 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+// This program does apply profiles to (some) JPEG files
+
+
+#include "utils.h"
+
+#include "jpeglib.h"
+#include "iccjpeg.h"
+
+// Flags
+static cmsBool BlackPointCompensation = FALSE;
+static cmsBool IgnoreEmbedded         = FALSE;
+static cmsBool GamutCheck             = FALSE;
+static cmsBool lIsITUFax              = FALSE;
+static cmsBool lIsPhotoshopApp13      = FALSE;
+static cmsBool lIsEXIF;
+static cmsBool lIsDeviceLink          = FALSE;
+static cmsBool EmbedProfile           = FALSE;
+
+static const char* SaveEmbedded = NULL;
+
+static int Intent                  = INTENT_PERCEPTUAL;
+static int ProofingIntent          = INTENT_PERCEPTUAL;
+static int PrecalcMode             = 1;
+
+static int jpegQuality             = 75;
+
+static cmsFloat64Number ObserverAdaptationState = 0;
+
+
+static char *cInpProf  = NULL;
+static char *cOutProf  = NULL;
+static char *cProofing = NULL;
+
+static FILE * InFile;
+static FILE * OutFile;
+
+static struct jpeg_decompress_struct Decompressor;
+static struct jpeg_compress_struct   Compressor;
+
+
+static struct my_error_mgr {
+
+    struct  jpeg_error_mgr pub;   // "public" fields
+    void*   Cargo;                // "private" fields
+
+} ErrorHandler;
+
+
+cmsUInt16Number Alarm[4] = {128,128,128,0};
+
+
+static
+void my_error_exit (j_common_ptr cinfo)
+{
+  char buffer[JMSG_LENGTH_MAX];
+
+  (*cinfo->err->format_message) (cinfo, buffer);
+  FatalError(buffer);
+}
+
+/*
+Definition of the APPn Markers Defined for continuous-tone G3FAX
+
+The application code APP1 initiates identification of the image as
+a G3FAX application and defines the spatial resolution and subsampling.
+This marker directly follows the SOI marker. The data format will be as follows:
+
+X'FFE1' (APP1), length, FAX identifier, version, spatial resolution.
+
+The above terms are defined as follows:
+
+Length: (Two octets) Total APP1 field octet count including the octet count itself, but excluding the APP1
+marker.
+
+FAX identifier: (Six octets) X'47', X'33', X'46', X'41', X'58', X'00'. This X'00'-terminated string "G3FAX"
+uniquely identifies this APP1 marker.
+
+Version: (Two octets) X'07CA'. This string specifies the year of approval of the standard, for identification
+in the case of future revision (for example, 1994).
+
+Spatial Resolution: (Two octets) Lightness pixel density in pels/25.4 mm. The basic value is 200. Allowed values are
+100, 200, 300, 400, 600 and 1200 pels/25.4 mm, with square (or equivalent) pels.
+
+NOTE � The functional equivalence of inch-based and mm-based resolutions is maintained. For example, the 200 � 200
+*/
+
+static
+cmsBool IsITUFax(jpeg_saved_marker_ptr ptr)
+{
+    while (ptr)
+    {
+        if (ptr -> marker == (JPEG_APP0 + 1) && ptr -> data_length > 5) {
+
+            const char* data = (const char*) ptr -> data;
+
+            if (strcmp(data, "G3FAX") == 0) return TRUE;
+        }
+
+        ptr = ptr -> next;
+    }
+
+    return FALSE;
+}
+
+// Save a ITU T.42/Fax marker with defaults on boundaries. This is the only mode we support right now.
+static
+void SetITUFax(j_compress_ptr cinfo)
+{
+    unsigned char Marker[] = "G3FAX\x00\0x07\xCA\x00\xC8";
+
+    jpeg_write_marker(cinfo, (JPEG_APP0 + 1), Marker, 10);
+}
+
+
+// Build a profile for decoding ITU T.42/Fax JPEG streams.
+// The profile has an additional ability in the input direction of
+// gamut compress values between 85 < a < -85 and -75 < b < 125. This conforms
+// the default range for ITU/T.42 -- See RFC 2301, section 6.2.3 for details
+
+//  L*  =   [0, 100]
+//  a*  =   [�85, 85]
+//  b*  =   [�75, 125]
+
+
+// These functions does convert the encoding of ITUFAX to floating point
+// and vice-versa. No gamut mapping is performed yet.
+
+static
+void ITU2Lab(const cmsUInt16Number In[3], cmsCIELab* Lab)
+{
+    Lab -> L = (double) In[0] / 655.35;
+    Lab -> a = (double) 170.* (In[1] - 32768.) / 65535.;
+    Lab -> b = (double) 200.* (In[2] - 24576.) / 65535.;
+}
+
+static
+void Lab2ITU(const cmsCIELab* Lab, cmsUInt16Number Out[3])
+{
+    Out[0] = (cmsUInt16Number) floor((double) (Lab -> L / 100.)* 65535. );
+    Out[1] = (cmsUInt16Number) floor((double) (Lab -> a / 170.)* 65535. + 32768. );
+    Out[2] = (cmsUInt16Number) floor((double) (Lab -> b / 200.)* 65535. + 24576. );
+}
+
+// These are the samplers-- They are passed as callbacks to cmsStageSampleCLut16bit()
+// then, cmsSample3DGrid() will sweel whole Lab gamut calling these functions
+// once for each node. In[] will contain the Lab PCS value to convert to ITUFAX
+// on PCS2ITU, or the ITUFAX value to convert to Lab in ITU2PCS
+// You can change the number of sample points if desired, the algorithm will
+// remain same. 33 points gives good accurancy, but you can reduce to 22 or less
+// is space is critical
+
+#define GRID_POINTS 33
+
+static
+int PCS2ITU(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void*  Cargo)
+{
+    cmsCIELab Lab;
+
+    cmsLabEncoded2Float(&Lab, In);
+    cmsDesaturateLab(&Lab, 85, -85, 125, -75);    // This function does the necessary gamut remapping
+    Lab2ITU(&Lab, Out);
+    return TRUE;
+
+    UTILS_UNUSED_PARAMETER(Cargo);
+}
+
+
+static
+int ITU2PCS( register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void*  Cargo)
+{
+    cmsCIELab Lab;
+
+    ITU2Lab(In, &Lab);
+    cmsFloat2LabEncoded(Out, &Lab);
+    return TRUE;
+
+    UTILS_UNUSED_PARAMETER(Cargo);
+}
+
+// This function does create the virtual input profile, which decodes ITU to the profile connection space
+static
+cmsHPROFILE CreateITU2PCS_ICC(void)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* AToB0;
+    cmsStage* ColorMap;
+
+    AToB0 = cmsPipelineAlloc(0, 3, 3);
+    if (AToB0 == NULL) return NULL;
+
+    ColorMap = cmsStageAllocCLut16bit(0, GRID_POINTS, 3, 3, NULL);
+    if (ColorMap == NULL) return NULL;
+
+    cmsPipelineInsertStage(AToB0, cmsAT_BEGIN, ColorMap);
+    cmsStageSampleCLut16bit(ColorMap, ITU2PCS, NULL, 0);
+
+    hProfile = cmsCreateProfilePlaceholder(0);
+    if (hProfile == NULL) {
+        cmsPipelineFree(AToB0);
+        return NULL;
+    }
+
+    cmsWriteTag(hProfile, cmsSigAToB0Tag, AToB0);
+    cmsSetColorSpace(hProfile, cmsSigLabData);
+    cmsSetPCS(hProfile, cmsSigLabData);
+    cmsSetDeviceClass(hProfile, cmsSigColorSpaceClass);
+    cmsPipelineFree(AToB0);
+
+    return hProfile;
+}
+
+
+// This function does create the virtual output profile, with the necessary gamut mapping
+static
+cmsHPROFILE CreatePCS2ITU_ICC(void)
+{
+    cmsHPROFILE hProfile;
+    cmsPipeline* BToA0;
+    cmsStage* ColorMap;
+
+    BToA0 = cmsPipelineAlloc(0, 3, 3);
+    if (BToA0 == NULL) return NULL;
+
+    ColorMap = cmsStageAllocCLut16bit(0, GRID_POINTS, 3, 3, NULL);
+    if (ColorMap == NULL) return NULL;
+
+    cmsPipelineInsertStage(BToA0, cmsAT_BEGIN, ColorMap);
+    cmsStageSampleCLut16bit(ColorMap, PCS2ITU, NULL, 0);
+
+    hProfile = cmsCreateProfilePlaceholder(0);
+    if (hProfile == NULL) {
+        cmsPipelineFree(BToA0);
+        return NULL;
+    }
+
+    cmsWriteTag(hProfile, cmsSigBToA0Tag, BToA0);
+    cmsSetColorSpace(hProfile, cmsSigLabData);
+    cmsSetPCS(hProfile, cmsSigLabData);
+    cmsSetDeviceClass(hProfile, cmsSigColorSpaceClass);
+
+    cmsPipelineFree(BToA0);
+
+    return hProfile;
+}
+
+
+
+#define PS_FIXED_TO_FLOAT(h, l) ((float) (h) + ((float) (l)/(1<<16)))
+
+static
+cmsBool ProcessPhotoshopAPP13(JOCTET FAR *data, int datalen)
+{
+    int i;
+
+    for (i = 14; i < datalen; )
+    {
+        long len;
+        unsigned int type;
+
+        if (!(GETJOCTET(data[i]  ) == 0x38 &&
+              GETJOCTET(data[i+1]) == 0x42 &&
+              GETJOCTET(data[i+2]) == 0x49 &&
+              GETJOCTET(data[i+3]) == 0x4D)) break; // Not recognized
+
+        i += 4; // identifying string
+
+        type = (unsigned int) (GETJOCTET(data[i]<<8) + GETJOCTET(data[i+1]));
+
+        i += 2; // resource type
+
+        i += GETJOCTET(data[i]) + ((GETJOCTET(data[i]) & 1) ? 1 : 2);   // resource name
+
+        len = ((((GETJOCTET(data[i]<<8) + GETJOCTET(data[i+1]))<<8) +
+                         GETJOCTET(data[i+2]))<<8) + GETJOCTET(data[i+3]);
+
+        i += 4; // Size
+
+        if (type == 0x03ED && len >= 16) {
+
+            Decompressor.X_density = (UINT16) PS_FIXED_TO_FLOAT(GETJOCTET(data[i]<<8) + GETJOCTET(data[i+1]),
+                                                 GETJOCTET(data[i+2]<<8) + GETJOCTET(data[i+3]));
+            Decompressor.Y_density = (UINT16) PS_FIXED_TO_FLOAT(GETJOCTET(data[i+8]<<8) + GETJOCTET(data[i+9]),
+                                                 GETJOCTET(data[i+10]<<8) + GETJOCTET(data[i+11]));
+
+            // Set the density unit to 1 since the
+            // Vertical and Horizontal resolutions
+            // are specified in Pixels per inch
+
+            Decompressor.density_unit = 0x01;
+            return TRUE;
+
+        }
+
+        i += len + ((len & 1) ? 1 : 0);   // Alignment
+    }
+    return FALSE;
+}
+
+
+static
+cmsBool HandlePhotoshopAPP13(jpeg_saved_marker_ptr ptr)
+{
+    while (ptr) {
+
+        if (ptr -> marker == (JPEG_APP0 + 13) && ptr -> data_length > 9)
+        {
+            JOCTET FAR* data = ptr -> data;
+
+            if(GETJOCTET(data[0]) == 0x50 &&
+               GETJOCTET(data[1]) == 0x68 &&
+               GETJOCTET(data[2]) == 0x6F &&
+               GETJOCTET(data[3]) == 0x74 &&
+               GETJOCTET(data[4]) == 0x6F &&
+               GETJOCTET(data[5]) == 0x73 &&
+               GETJOCTET(data[6]) == 0x68 &&
+               GETJOCTET(data[7]) == 0x6F &&
+               GETJOCTET(data[8]) == 0x70) {
+
+                ProcessPhotoshopAPP13(data, ptr -> data_length);
+                return TRUE;
+            }
+        }
+
+        ptr = ptr -> next;
+    }
+
+    return FALSE;
+}
+
+
+typedef unsigned short uint16_t;
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+
+#define INTEL_BYTE_ORDER 0x4949
+#define XRESOLUTION 0x011a
+#define YRESOLUTION 0x011b
+#define RESOLUTION_UNIT 0x128
+
+// Read a 16-bit word
+static
+uint16_t read16(uint8_t* arr, int pos,  int swapBytes)
+{
+    uint8_t b1 = arr[pos];
+    uint8_t b2 = arr[pos+1];
+
+    return (swapBytes) ?  ((b2 << 8) | b1) : ((b1 << 8) | b2);
+}
+
+
+// Read a 32-bit word
+static
+uint32_t read32(uint8_t* arr, int pos,  int swapBytes)
+{
+
+    if(!swapBytes) {
+
+        return (arr[pos]   << 24) |
+               (arr[pos+1] << 16) |
+               (arr[pos+2] << 8) |
+                arr[pos+3];
+    }
+
+    return arr[pos] |
+           (arr[pos+1] << 8) |
+           (arr[pos+2] << 16) |
+           (arr[pos+3] << 24);
+}
+
+
+
+static
+int read_tag(uint8_t* arr, int pos,  int swapBytes, void* dest)
+{
+        // Format should be 5 over here (rational)
+    uint32_t format = read16(arr, pos + 2, swapBytes);
+    // Components should be 1
+    uint32_t components = read32(arr, pos + 4, swapBytes);
+    // Points to the value
+    uint32_t offset;
+
+    // sanity
+    if (components != 1) return 0;
+
+    if (format == 3)
+        offset = pos + 8;
+    else
+        offset =  read32(arr, pos + 8, swapBytes);
+
+    switch (format) {
+
+    case 5: // Rational
+          {
+          double num = read32(arr, offset, swapBytes);
+          double den = read32(arr, offset + 4, swapBytes);
+          *(double *) dest = num / den;
+          }
+          break;
+
+    case 3: // uint 16
+        *(int*) dest = read16(arr, offset, swapBytes);
+        break;
+
+    default:  return 0;
+    }
+
+    return 1;
+}
+
+
+
+// Handler for EXIF data
+static
+    cmsBool HandleEXIF(struct jpeg_decompress_struct* cinfo)
+{
+    jpeg_saved_marker_ptr ptr;
+    uint32_t ifd_ofs;
+    int pos = 0, swapBytes = 0;
+    uint32_t i, numEntries;
+    double XRes = -1, YRes = -1;
+    int Unit = 2; // Inches
+
+
+    for (ptr = cinfo ->marker_list; ptr; ptr = ptr ->next) {
+
+        if ((ptr ->marker == JPEG_APP0+1) && ptr ->data_length > 6) {
+            JOCTET FAR* data = ptr -> data;
+
+            if (memcmp(data, "Exif\0\0", 6) == 0) {
+
+                data += 6; // Skip EXIF marker
+
+                // 8 byte TIFF header
+                // first two determine byte order
+                pos = 0;
+                if (read16(data, pos, 0) == INTEL_BYTE_ORDER) {
+                    swapBytes = 1;
+                }
+
+                pos += 2;
+
+                // next two bytes are always 0x002A (TIFF version)
+                pos += 2;
+
+                // offset to Image File Directory (includes the previous 8 bytes)
+                ifd_ofs = read32(data, pos, swapBytes);
+
+                // Search the directory for resolution tags
+                numEntries = read16(data, ifd_ofs, swapBytes);
+
+                for (i=0; i < numEntries; i++) {
+
+                    uint32_t entryOffset = ifd_ofs + 2 + (12 * i);
+                    uint32_t tag = read16(data, entryOffset, swapBytes);
+
+                    switch (tag) {
+
+                    case RESOLUTION_UNIT:
+                        if (!read_tag(data, entryOffset, swapBytes, &Unit)) return FALSE;
+                        break;
+
+                    case XRESOLUTION:
+                        if (!read_tag(data, entryOffset, swapBytes, &XRes)) return FALSE;
+                        break;
+
+                    case YRESOLUTION:
+                        if (!read_tag(data, entryOffset, swapBytes, &YRes)) return FALSE;
+                        break;
+
+                    default:;
+                    }
+
+                }
+
+                // Proceed if all found
+
+                if (XRes != -1 && YRes != -1)
+                {
+
+                    // 1 = None
+                    // 2 = inches
+                    // 3 = cm
+
+                    switch (Unit) {
+
+                    case 2:
+
+                        cinfo ->X_density = (UINT16) floor(XRes + 0.5);
+                        cinfo ->Y_density = (UINT16) floor(YRes + 0.5);
+                        break;
+
+                    case 1:
+
+                        cinfo ->X_density = (UINT16) floor(XRes * 2.54 + 0.5);
+                        cinfo ->Y_density = (UINT16) floor(YRes * 2.54 + 0.5);
+                        break;
+
+                    default: return FALSE;
+                    }
+
+                    cinfo ->density_unit = 1;  /* 1 for dots/inch, or 2 for dots/cm.*/
+
+                }
+
+
+            }
+        }
+    }
+    return FALSE;
+}
+
+
+static
+cmsBool OpenInput(const char* FileName)
+{
+    int m;
+
+    lIsITUFax = FALSE;
+    InFile  = fopen(FileName, "rb");
+    if (InFile == NULL) {
+        FatalError("Cannot open '%s'", FileName);
+    }
+
+    // Now we can initialize the JPEG decompression object.
+    Decompressor.err                 = jpeg_std_error(&ErrorHandler.pub);
+    ErrorHandler.pub.error_exit      = my_error_exit;
+    ErrorHandler.pub.output_message  = my_error_exit;
+
+    jpeg_create_decompress(&Decompressor);
+    jpeg_stdio_src(&Decompressor, InFile);
+
+    for (m = 0; m < 16; m++)
+        jpeg_save_markers(&Decompressor, JPEG_APP0 + m, 0xFFFF);
+
+    // setup_read_icc_profile(&Decompressor);
+
+    fseek(InFile, 0, SEEK_SET);
+    jpeg_read_header(&Decompressor, TRUE);
+
+    return TRUE;
+}
+
+
+static
+cmsBool OpenOutput(const char* FileName)
+{
+
+    OutFile = fopen(FileName, "wb");
+    if (OutFile == NULL) {
+        FatalError("Cannot create '%s'", FileName);
+
+    }
+
+    Compressor.err                   = jpeg_std_error(&ErrorHandler.pub);
+    ErrorHandler.pub.error_exit      = my_error_exit;
+    ErrorHandler.pub.output_message  = my_error_exit;
+
+    Compressor.input_components = Compressor.num_components = 4;
+
+    jpeg_create_compress(&Compressor);
+    jpeg_stdio_dest(&Compressor, OutFile);
+    return TRUE;
+}
+
+static
+cmsBool Done(void)
+{
+    jpeg_destroy_decompress(&Decompressor);
+    jpeg_destroy_compress(&Compressor);
+    return fclose(InFile) + fclose(OutFile);
+
+}
+
+
+// Build up the pixeltype descriptor
+
+static
+cmsUInt32Number GetInputPixelType(void)
+{
+     int space, bps, extra, ColorChannels, Flavor;
+
+     lIsITUFax         = IsITUFax(Decompressor.marker_list);
+     lIsPhotoshopApp13 = HandlePhotoshopAPP13(Decompressor.marker_list);
+     lIsEXIF           = HandleEXIF(&Decompressor);
+
+     ColorChannels = Decompressor.num_components;
+     extra  = 0;            // Alpha = None
+     bps    = 1;            // 8 bits
+     Flavor = 0;            // Vanilla
+
+     if (lIsITUFax) {
+
+        space = PT_Lab;
+        Decompressor.out_color_space = JCS_YCbCr;  // Fake to don't touch
+     }
+     else
+     switch (Decompressor.jpeg_color_space) {
+
+     case JCS_GRAYSCALE:        // monochrome
+              space = PT_GRAY;
+              Decompressor.out_color_space = JCS_GRAYSCALE;
+              break;
+
+     case JCS_RGB:             // red/green/blue
+              space = PT_RGB;
+              Decompressor.out_color_space = JCS_RGB;
+              break;
+
+     case JCS_YCbCr:               // Y/Cb/Cr (also known as YUV)
+              space = PT_RGB;      // Let IJG code to do the conversion
+              Decompressor.out_color_space = JCS_RGB;
+              break;
+
+     case JCS_CMYK:            // C/M/Y/K
+              space = PT_CMYK;
+              Decompressor.out_color_space = JCS_CMYK;
+              if (Decompressor.saw_Adobe_marker)            // Adobe keeps CMYK inverted, so change flavor
+                                Flavor = 1;                 // from vanilla to chocolate
+              break;
+
+     case JCS_YCCK:            // Y/Cb/Cr/K
+              space = PT_CMYK;
+              Decompressor.out_color_space = JCS_CMYK;
+              if (Decompressor.saw_Adobe_marker)            // ditto
+                                Flavor = 1;
+              break;
+
+     default:
+              FatalError("Unsupported color space (0x%x)", Decompressor.jpeg_color_space);
+              return 0;
+     }
+
+     return (EXTRA_SH(extra)|CHANNELS_SH(ColorChannels)|BYTES_SH(bps)|COLORSPACE_SH(space)|FLAVOR_SH(Flavor));
+}
+
+
+// Rearrange pixel type to build output descriptor
+static
+cmsUInt32Number ComputeOutputFormatDescriptor(cmsUInt32Number dwInput, int OutColorSpace)
+{
+    int IsPlanar  = T_PLANAR(dwInput);
+    int Channels  = 0;
+    int Flavor    = 0;
+
+    switch (OutColorSpace) {
+
+   case PT_GRAY:
+       Channels = 1;
+       break;
+   case PT_RGB:
+   case PT_CMY:
+   case PT_Lab:
+   case PT_YUV:
+   case PT_YCbCr:
+       Channels = 3;
+       break;
+
+   case PT_CMYK:
+       if (Compressor.write_Adobe_marker)   // Adobe keeps CMYK inverted, so change flavor to chocolate
+           Flavor = 1;
+       Channels = 4;
+       break;
+   default:
+       FatalError("Unsupported output color space");
+    }
+
+    return (COLORSPACE_SH(OutColorSpace)|PLANAR_SH(IsPlanar)|CHANNELS_SH(Channels)|BYTES_SH(1)|FLAVOR_SH(Flavor));
+}
+
+
+// Equivalence between ICC color spaces and lcms color spaces
+static
+int GetProfileColorSpace(cmsHPROFILE hProfile)
+{
+    cmsColorSpaceSignature ProfileSpace = cmsGetColorSpace(hProfile);
+
+    return _cmsLCMScolorSpace(ProfileSpace);
+}
+
+static
+int GetDevicelinkColorSpace(cmsHPROFILE hProfile)
+{
+    cmsColorSpaceSignature ProfileSpace = cmsGetPCS(hProfile);
+
+    return _cmsLCMScolorSpace(ProfileSpace);
+}
+
+
+// From TRANSUPP
+
+static
+void jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo)
+{
+  jpeg_saved_marker_ptr marker;
+
+  /* In the current implementation, we don't actually need to examine the
+   * option flag here; we just copy everything that got saved.
+   * But to avoid confusion, we do not output JFIF and Adobe APP14 markers
+   * if the encoder library already wrote one.
+   */
+  for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) {
+
+    if (dstinfo->write_JFIF_header &&
+        marker->marker == JPEG_APP0 &&
+        marker->data_length >= 5 &&
+        GETJOCTET(marker->data[0]) == 0x4A &&
+        GETJOCTET(marker->data[1]) == 0x46 &&
+        GETJOCTET(marker->data[2]) == 0x49 &&
+        GETJOCTET(marker->data[3]) == 0x46 &&
+        GETJOCTET(marker->data[4]) == 0)
+                          continue;         /* reject duplicate JFIF */
+
+    if (dstinfo->write_Adobe_marker &&
+        marker->marker == JPEG_APP0+14 &&
+        marker->data_length >= 5 &&
+        GETJOCTET(marker->data[0]) == 0x41 &&
+        GETJOCTET(marker->data[1]) == 0x64 &&
+        GETJOCTET(marker->data[2]) == 0x6F &&
+        GETJOCTET(marker->data[3]) == 0x62 &&
+        GETJOCTET(marker->data[4]) == 0x65)
+                         continue;         /* reject duplicate Adobe */
+
+     jpeg_write_marker(dstinfo, marker->marker,
+                       marker->data, marker->data_length);
+  }
+}
+
+static
+void WriteOutputFields(int OutputColorSpace)
+{
+    J_COLOR_SPACE in_space, jpeg_space;
+    int components;
+
+    switch (OutputColorSpace) {
+
+    case PT_GRAY: in_space = jpeg_space = JCS_GRAYSCALE;
+                  components = 1;
+                  break;
+
+    case PT_RGB:  in_space = JCS_RGB;
+                  jpeg_space = JCS_YCbCr;
+                  components = 3;
+                  break;       // red/green/blue
+
+    case PT_YCbCr: in_space = jpeg_space = JCS_YCbCr;
+                   components = 3;
+                   break;               // Y/Cb/Cr (also known as YUV)
+
+    case PT_CMYK: in_space = JCS_CMYK;
+                  jpeg_space = JCS_YCCK;
+                  components = 4;
+                  break;      // C/M/Y/components
+
+    case PT_Lab:  in_space = jpeg_space = JCS_YCbCr;
+                  components = 3;
+                  break;                // Fake to don't touch
+    default:
+                 FatalError("Unsupported output color space");
+                 return;
+    }
+
+
+    if (jpegQuality >= 100) {
+
+     // avoid destructive conversion when asking for lossless compression
+        jpeg_space = in_space;
+    }
+
+    Compressor.in_color_space =  in_space;
+    Compressor.jpeg_color_space = jpeg_space;
+    Compressor.input_components = Compressor.num_components = components;
+    jpeg_set_defaults(&Compressor);
+    jpeg_set_colorspace(&Compressor, jpeg_space);
+
+
+    // Make sure to pass resolution through
+    if (OutputColorSpace == PT_CMYK)
+        Compressor.write_JFIF_header = 1;
+
+    // Avoid subsampling on high quality factor
+    jpeg_set_quality(&Compressor, jpegQuality, 1);
+    if (jpegQuality >= 70) {
+
+      int i;
+      for(i=0; i < Compressor.num_components; i++) {
+
+            Compressor.comp_info[i].h_samp_factor = 1;
+            Compressor.comp_info[i].v_samp_factor = 1;
+      }
+
+    }
+
+}
+
+
+static
+void DoEmbedProfile(const char* ProfileFile)
+{
+    FILE* f;
+    size_t size, EmbedLen;
+    cmsUInt8Number* EmbedBuffer;
+
+        f = fopen(ProfileFile, "rb");
+        if (f == NULL) return;
+
+        size = cmsfilelength(f);
+        EmbedBuffer = (cmsUInt8Number*) malloc(size + 1);
+        EmbedLen = fread(EmbedBuffer, 1, size, f);
+        fclose(f);
+        EmbedBuffer[EmbedLen] = 0;
+
+        write_icc_profile (&Compressor, EmbedBuffer, (unsigned int) EmbedLen);
+        free(EmbedBuffer);
+}
+
+
+
+static
+int DoTransform(cmsHTRANSFORM hXForm, int OutputColorSpace)
+{
+    JSAMPROW ScanLineIn;
+    JSAMPROW ScanLineOut;
+
+
+       //Preserve resolution values from the original
+       // (Thanks to Robert Bergs for finding out this bug)
+       Compressor.density_unit = Decompressor.density_unit;
+       Compressor.X_density    = Decompressor.X_density;
+       Compressor.Y_density    = Decompressor.Y_density;
+
+      //  Compressor.write_JFIF_header = 1;
+
+       jpeg_start_decompress(&Decompressor);
+       jpeg_start_compress(&Compressor, TRUE);
+
+        if (OutputColorSpace == PT_Lab)
+            SetITUFax(&Compressor);
+
+       // Embed the profile if needed
+       if (EmbedProfile && cOutProf)
+           DoEmbedProfile(cOutProf);
+
+       ScanLineIn  = (JSAMPROW) malloc(Decompressor.output_width * Decompressor.num_components);
+       ScanLineOut = (JSAMPROW) malloc(Compressor.image_width * Compressor.num_components);
+
+       while (Decompressor.output_scanline <
+                            Decompressor.output_height) {
+
+       jpeg_read_scanlines(&Decompressor, &ScanLineIn, 1);
+
+       cmsDoTransform(hXForm, ScanLineIn, ScanLineOut, Decompressor.output_width);
+
+       jpeg_write_scanlines(&Compressor, &ScanLineOut, 1);
+       }
+
+       free(ScanLineIn);
+       free(ScanLineOut);
+
+       jpeg_finish_decompress(&Decompressor);
+       jpeg_finish_compress(&Compressor);
+
+       return TRUE;
+}
+
+
+
+// Transform one image
+
+static
+int TransformImage(char *cDefInpProf, char *cOutputProf)
+{
+       cmsHPROFILE hIn, hOut, hProof;
+       cmsHTRANSFORM xform;
+       cmsUInt32Number wInput, wOutput;
+       int OutputColorSpace;
+       cmsUInt32Number dwFlags = 0;
+       cmsUInt32Number EmbedLen;
+       cmsUInt8Number* EmbedBuffer;
+
+
+       cmsSetAdaptationState(ObserverAdaptationState);
+
+       if (BlackPointCompensation) {
+
+            dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+       }
+
+
+       switch (PrecalcMode) {
+
+       case 0: dwFlags |= cmsFLAGS_NOOPTIMIZE; break;
+       case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+       case 3: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+       default:;
+       }
+
+
+       if (GamutCheck) {
+            dwFlags |= cmsFLAGS_GAMUTCHECK;
+            cmsSetAlarmCodes(Alarm);
+       }
+
+       // Take input color space
+       wInput = GetInputPixelType();
+
+        if (lIsDeviceLink) {
+
+            hIn = cmsOpenProfileFromFile(cDefInpProf, "r");
+            hOut = NULL;
+            hProof = NULL;
+       }
+        else {
+
+        if (!IgnoreEmbedded && read_icc_profile(&Decompressor, &EmbedBuffer, &EmbedLen))
+        {
+              hIn = cmsOpenProfileFromMem(EmbedBuffer, EmbedLen);
+
+               if (Verbose) {
+
+                  fprintf(stdout, " (Embedded profile found)\n");
+                  PrintProfileInformation(hIn);
+                  fflush(stdout);
+              }
+
+               if (hIn != NULL && SaveEmbedded != NULL)
+                          SaveMemoryBlock(EmbedBuffer, EmbedLen, SaveEmbedded);
+
+              free(EmbedBuffer);
+        }
+        else
+        {
+            // Default for ITU/Fax
+            if (cDefInpProf == NULL && T_COLORSPACE(wInput) == PT_Lab)
+                cDefInpProf = "*Lab";
+
+            if (cDefInpProf != NULL && cmsstrcasecmp(cDefInpProf, "*lab") == 0)
+                hIn = CreateITU2PCS_ICC();
+            else
+                hIn = OpenStockProfile(0, cDefInpProf);
+       }
+
+        if (cOutputProf != NULL && cmsstrcasecmp(cOutputProf, "*lab") == 0)
+            hOut = CreatePCS2ITU_ICC();
+        else
+        hOut = OpenStockProfile(0, cOutputProf);
+
+       hProof = NULL;
+       if (cProofing != NULL) {
+
+           hProof = OpenStockProfile(0, cProofing);
+           if (hProof == NULL) {
+            FatalError("Proofing profile couldn't be read.");
+           }
+           dwFlags |= cmsFLAGS_SOFTPROOFING;
+          }
+       }
+
+        if (!hIn)
+            FatalError("Input profile couldn't be read.");
+        if (!lIsDeviceLink && !hOut)
+            FatalError("Output profile couldn't be read.");
+
+       // Assure both, input profile and input JPEG are on same colorspace
+       if (cmsGetColorSpace(hIn) != _cmsICCcolorSpace(T_COLORSPACE(wInput)))
+              FatalError("Input profile is not operating in proper color space");
+
+
+       // Output colorspace is given by output profile
+
+        if (lIsDeviceLink) {
+            OutputColorSpace = GetDevicelinkColorSpace(hIn);
+        }
+        else {
+            OutputColorSpace = GetProfileColorSpace(hOut);
+        }
+
+       jpeg_copy_critical_parameters(&Decompressor, &Compressor);
+
+       WriteOutputFields(OutputColorSpace);
+
+       wOutput      = ComputeOutputFormatDescriptor(wInput, OutputColorSpace);
+
+
+       xform = cmsCreateProofingTransform(hIn, wInput,
+                                          hOut, wOutput,
+                                          hProof, Intent,
+                                          ProofingIntent, dwFlags);
+       if (xform == NULL)
+                 FatalError("Cannot transform by using the profiles");
+
+       DoTransform(xform, OutputColorSpace);
+
+
+       jcopy_markers_execute(&Decompressor, &Compressor);
+
+       cmsDeleteTransform(xform);
+       cmsCloseProfile(hIn);
+       cmsCloseProfile(hOut);
+       if (hProof) cmsCloseProfile(hProof);
+
+       return 1;
+}
+
+
+// Simply print help
+
+static
+void Help(int level)
+{
+     fprintf(stderr, "little cms ICC profile applier for JPEG - v3.2 [LittleCMS %2.2f]\n\n", LCMS_VERSION / 1000.0);
+
+     switch(level) {
+
+     default:
+     case 0:
+
+     fprintf(stderr, "usage: jpgicc [flags] input.jpg output.jpg\n");
+
+     fprintf(stderr, "\nflags:\n\n");
+     fprintf(stderr, "%cv - Verbose\n", SW);
+     fprintf(stderr, "%ci<profile> - Input profile (defaults to sRGB)\n", SW);
+     fprintf(stderr, "%co<profile> - Output profile (defaults to sRGB)\n", SW);
+
+     PrintRenderingIntents();
+
+
+     fprintf(stderr, "%cb - Black point compensation\n", SW);
+     fprintf(stderr, "%cd<0..1> - Observer adaptation state (abs.col. only)\n", SW);
+     fprintf(stderr, "%cn - Ignore embedded profile\n", SW);
+     fprintf(stderr, "%ce - Embed destination profile\n", SW);
+     fprintf(stderr, "%cs<new profile> - Save embedded profile as <new profile>\n", SW);
+
+     fprintf(stderr, "\n");
+
+     fprintf(stderr, "%cc<0,1,2,3> - Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes) [defaults to 1]\n", SW);
+     fprintf(stderr, "\n");
+
+     fprintf(stderr, "%cp<profile> - Soft proof profile\n", SW);
+     fprintf(stderr, "%cm<0,1,2,3> - SoftProof intent\n", SW);
+     fprintf(stderr, "%cg - Marks out-of-gamut colors on softproof\n", SW);
+     fprintf(stderr, "%c!<r>,<g>,<b> - Out-of-gamut marker channel values\n", SW);
+
+     fprintf(stderr, "\n");
+     fprintf(stderr, "%cq<0..100> - Output JPEG quality\n", SW);
+
+     fprintf(stderr, "\n");
+     fprintf(stderr, "%ch<0,1,2,3> - More help\n", SW);
+     break;
+
+     case 1:
+
+     fprintf(stderr, "Examples:\n\n"
+                     "To color correct from scanner to sRGB:\n"
+                     "\tjpgicc %ciscanner.icm in.jpg out.jpg\n"
+                     "To convert from monitor1 to monitor2:\n"
+                     "\tjpgicc %cimon1.icm %comon2.icm in.jpg out.jpg\n"
+                     "To make a CMYK separation:\n"
+                     "\tjpgicc %coprinter.icm inrgb.jpg outcmyk.jpg\n"
+                     "To recover sRGB from a CMYK separation:\n"
+                     "\tjpgicc %ciprinter.icm incmyk.jpg outrgb.jpg\n"
+                     "To convert from CIELab ITU/Fax JPEG to sRGB\n"
+                     "\tjpgicc in.jpg out.jpg\n\n",
+                     SW, SW, SW, SW, SW);
+     break;
+
+     case 2:
+         PrintBuiltins();
+         break;
+
+     case 3:
+
+     fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+                     "engine. Both lcms and this program are freeware. You can\n"
+                     "obtain both in source code at http://www.littlecms.com\n"
+                     "For suggestions, comments, bug reports etc. send mail to\n"
+                     "marti@littlecms.com\n\n");
+     break;
+     }
+
+     exit(0);
+}
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+    int s;
+
+    while ((s=xgetopt(argc,argv,"bBnNvVGgh:H:i:I:o:O:P:p:t:T:c:C:Q:q:M:m:L:l:eEs:S:!:D:d:")) != EOF) {
+
+        switch (s)
+        {
+
+        case 'b':
+        case 'B':
+            BlackPointCompensation = TRUE;
+            break;
+
+        case 'd':
+        case 'D': ObserverAdaptationState = atof(xoptarg);
+            if (ObserverAdaptationState < 0 ||
+                ObserverAdaptationState > 1.0)
+                FatalError("Adaptation state should be 0..1");
+            break;
+
+        case 'v':
+        case 'V':
+            Verbose = TRUE;
+            break;
+
+        case 'i':
+        case 'I':
+            if (lIsDeviceLink)
+                FatalError("Device-link already specified");
+
+            cInpProf = xoptarg;
+            break;
+
+        case 'o':
+        case 'O':
+            if (lIsDeviceLink)
+                FatalError("Device-link already specified");
+
+            cOutProf = xoptarg;
+            break;
+
+        case 'l':
+        case 'L':
+            if (cInpProf != NULL || cOutProf != NULL)
+                FatalError("input/output profiles already specified");
+
+            cInpProf = xoptarg;
+            lIsDeviceLink = TRUE;
+            break;
+
+        case 'p':
+        case 'P':
+            cProofing = xoptarg;
+            break;
+
+        case 't':
+        case 'T':
+            Intent = atoi(xoptarg);
+            break;
+
+        case 'N':
+        case 'n':
+            IgnoreEmbedded = TRUE;
+            break;
+
+        case 'e':
+        case 'E':
+            EmbedProfile = TRUE;
+            break;
+
+
+        case 'g':
+        case 'G':
+            GamutCheck = TRUE;
+            break;
+
+        case 'c':
+        case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 2)
+                FatalError("Unknown precalc mode '%d'", PrecalcMode);
+            break;
+
+        case 'H':
+        case 'h':  {
+
+            int a =  atoi(xoptarg);
+            Help(a);
+                   }
+            break;
+
+        case 'q':
+        case 'Q':
+            jpegQuality = atoi(xoptarg);
+            if (jpegQuality > 100) jpegQuality = 100;
+            if (jpegQuality < 0)   jpegQuality = 0;
+            break;
+
+        case 'm':
+        case 'M':
+            ProofingIntent = atoi(xoptarg);
+            break;
+
+        case 's':
+        case 'S': SaveEmbedded = xoptarg;
+            break;
+
+        case '!':
+            if (sscanf(xoptarg, "%hu,%hu,%hu", &Alarm[0], &Alarm[1], &Alarm[2]) == 3) {
+                int i;
+                for (i=0; i < 3; i++) {
+                    Alarm[i] = (Alarm[i] << 8) | Alarm[i];
+                }
+            }
+            break;
+
+        default:
+
+            FatalError("Unknown option - run without args to see valid ones");
+        }
+
+    }
+}
+
+
+int main(int argc, char* argv[])
+{
+    InitUtils("jpgicc");
+
+    HandleSwitches(argc, argv);
+
+    if ((argc - xoptind) != 2) {
+        Help(0);
+    }
+
+    OpenInput(argv[xoptind]);
+    OpenOutput(argv[xoptind+1]);
+
+    TransformImage(cInpProf, cOutProf);
+
+
+    if (Verbose) { fprintf(stdout, "\n"); fflush(stdout); }
+
+    Done();
+
+    return 0;
+}
+
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/Makefile.am b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/Makefile.am
new file mode 100755
index 0000000000..3b9186970c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/Makefile.am
@@ -0,0 +1,19 @@
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS =  -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+bin_PROGRAMS = linkicc 
+
+linkicc_LDADD = $(top_builddir)/src/liblcms2.la 
+linkicc_LDFLAGS = @LDFLAGS@
+linkicc_SOURCES = linkicc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+linkicc_MANS = linkicc.1
+
+EXTRA_DIST = $(man_MANS)
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/Makefile.in b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/Makefile.in
new file mode 100755
index 0000000000..2631ab8791
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/Makefile.in
@@ -0,0 +1,663 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+bin_PROGRAMS = linkicc$(EXEEXT)
+subdir = utils/linkicc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_linkicc_OBJECTS = linkicc.$(OBJEXT) ../common/xgetopt.$(OBJEXT) \
+	../common/vprf.$(OBJEXT)
+linkicc_OBJECTS = $(am_linkicc_OBJECTS)
+linkicc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+linkicc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(linkicc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(linkicc_SOURCES)
+DIST_SOURCES = $(linkicc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+linkicc_LDADD = $(top_builddir)/src/liblcms2.la 
+linkicc_LDFLAGS = @LDFLAGS@
+linkicc_SOURCES = linkicc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+linkicc_MANS = linkicc.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/linkicc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/linkicc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+linkicc$(EXEEXT): $(linkicc_OBJECTS) $(linkicc_DEPENDENCIES) $(EXTRA_linkicc_DEPENDENCIES) 
+	@rm -f linkicc$(EXEEXT)
+	$(AM_V_CCLD)$(linkicc_LINK) $(linkicc_OBJECTS) $(linkicc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-binPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/linkicc.1 b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/linkicc.1
new file mode 100755
index 0000000000..56f73bb9a1
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/linkicc.1
@@ -0,0 +1,123 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH LINKICC 1 "September 30, 2004"
+.SH NAME
+linkicc - little cms device link generator.
+.SH SYNOPSIS
+.B linkicc
+.RI [ options ] " profiles"
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B linkicc
+is a little cms device link generator.
+.P
+Links two or more profiles into a single devicelink profile.
+Colorspaces must be paired except Lab/XYZ, that can be interchanged.
+.SH OPTIONS
+.TP
+.BR \-a\  NUM
+Observer adaptation state (abs.col. only), (0..1.0, float value) [defaults to 1.0].
+.TP
+.B \-b
+Black point compensation.
+.TP
+.BI \-c\  precision
+Precision (0=LowRes, 1=Normal, 2=Hi-res) [defaults to 1].
+.TP
+.BI \-d\  description
+Description text (quotes can be used).
+.TP
+.BI \-h\  NUM
+Show summary of options and examples (0=help, 1=Built-in profiles, 2=Examples, 3=Contact information)
+.TP
+.BI \-k\  inklimit
+Ink-limiting in % (CMYK only), (0..400.0, float value) [default 400.0].
+.TP
+.B \-l
+Use linearization curves (may affect accuracy).
+.TP
+.BI \-n\  gridpoints
+Alternate way to set precision, number of CLUT points.
+.TP
+.BI \-o\  profile
+Output devicelink profile [defaults to 'devicelink.icm'].
+.TP
+.BI \-r\  profileversion
+Profile version. (CAUTION: may change the profile implementation), (2.0..4.3, float value) [defaults to 4.3].
+.TP
+.BI \-t\  NUM
+Rendering intent
+.nf
+.RS
+0=Perceptual [default]
+1=Relative colorimetric
+2=Saturation
+3=Absolute colorimetric
+10=Perceptual preserving black ink
+11=Relative colorimetric preserving black ink
+12=Saturation preserving black ink
+13=Perceptual preserving black plane
+14=Relative colorimetric preserving black plane
+15=Saturation preserving black plane
+.RE
+.fi
+.TP
+.BI \-v\  verbosity
+Verbosity level, (0=None, 1=Normal, 2=High, 3=Very High) [defaults to 0].
+.TP
+.B \-x
+Creatively, guess deviceclass of resulting profile.
+.TP
+.BI \-y\  copyright
+Copyright notice (quotes can be used) ["No copyright, use freely"].
+.TP
+.B \-8
+Creates 8-bit devicelink.
+.SH BUILT-IN PROFILES
+.nf
+	*Lab2  -- D50-based v2 CIEL*a*b
+	*Lab4  -- D50-based v4 CIEL*a*b
+	*Lab   -- D50-based v4 CIEL*a*b
+	*XYZ   -- CIE XYZ (PCS)
+	*sRGB  -- sRGB color space
+	*Gray22 - Monochrome of Gamma 2.2
+	*Gray30 - Monochrome of Gamma 3.0
+	*null   - Monochrome black for all input
+	*Lin2222- CMYK linearization of gamma 2.2 on each channel
+.fi
+.SH EXAMPLES
+.nf
+To create 'devicelink.icm' from a.icc to b.icc:
+	linkicc a.icc b.icc
+
+To create 'out.icc' from sRGB to cmyk.icc:
+	linkicc -o out.icc *sRGB cmyk.icc
+
+To create a sRGB input profile working in Lab:
+	linkicc -x -o sRGBLab.icc *sRGB *Lab
+
+To create a XYZ -> sRGB output profile:
+	linkicc -x -o sRGBLab.icc *XYZ *sRGB
+
+To create a abstract profile doing softproof for cmyk.icc:
+	linkicc -t1 -x -o softproof.icc *Lab cmyk.icc cmyk.icc *Lab
+
+To create a 'grayer' sRGB input profile:
+	linkicc -x -o grayer.icc *sRGB gray.icc gray.icc *Lab
+
+To embed ink limiting into a cmyk output profile:
+	linkicc -x -o cmyklimited.icc -k 250 cmyk.icc *Lab
+
+.fi
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com.
+.SH SEE ALSO
+.BR jpgicc (1),
+.BR psicc (1),
+.BR tificc (1),
+.BR transicc (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/linkicc.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/linkicc.c
new file mode 100755
index 0000000000..0fd9d0c5c4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/linkicc/linkicc.c
@@ -0,0 +1,384 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#include "utils.h"
+
+// ---------------------------------------------------------------------------------
+
+static char* Description = "Devicelink profile";
+static char* Copyright   = "No copyright, use freely";
+static int   Intent = INTENT_PERCEPTUAL;
+static char* cOutProf    = "devicelink.icc";
+static int   PrecalcMode  = 1;
+static int   NumOfGridPoints = 0;
+
+static cmsFloat64Number ObserverAdaptationState = 1.0;  // According ICC 4.2 this is the default
+
+static cmsBool BlackPointCompensation = FALSE;
+
+static cmsFloat64Number InkLimit   = 400;
+static cmsBool lUse8bits           = FALSE;
+static cmsBool TagResult           = FALSE;
+static cmsBool KeepLinearization   = FALSE;
+static cmsFloat64Number Version    = 4.3;
+
+
+// The manual
+static
+int Help(int level)
+{
+     switch (level) {
+
+     default:
+     case 0:
+
+         fprintf(stderr, "\nlinkicc: Links profiles into a single devicelink.\n");     
+
+         fprintf(stderr, "\n");     
+         fprintf(stderr, "usage: linkicc [flags] <profiles>\n\n");
+         fprintf(stderr, "flags:\n\n");         
+         fprintf(stderr, "%co<profile> - Output devicelink profile. [defaults to 'devicelink.icc']\n", SW);     
+
+         PrintRenderingIntents();
+
+         fprintf(stderr, "%cc<0,1,2> - Precision (0=LowRes, 1=Normal, 2=Hi-res) [defaults to 1]\n", SW);     
+         fprintf(stderr, "%cn<gridpoints> - Alternate way to set precision, number of CLUT points\n", SW);     
+         fprintf(stderr, "%cd<description> - description text (quotes can be used)\n", SW);     
+         fprintf(stderr, "%cy<copyright> - copyright notice (quotes can be used)\n", SW);    
+         
+         fprintf(stderr, "\n%ck<0..400> - Ink-limiting in %% (CMYK only)\n", SW);
+         fprintf(stderr, "%c8 - Creates 8-bit devicelink\n", SW);
+         fprintf(stderr, "%cx - Creatively, guess deviceclass of resulting profile.\n", SW);
+         fprintf(stderr, "%cb - Black point compensation\n", SW);
+         fprintf(stderr, "%ca<0..1> - Observer adaptation state (abs.col. only)\n\n", SW);
+         fprintf(stderr, "%cl - Use linearization curves (may affect accuracy)\n", SW);
+         fprintf(stderr, "%cr<v.r> - Profile version. (CAUTION: may change the profile implementation)\n", SW);
+         fprintf(stderr, "\n");    
+         fprintf(stderr, "Colorspaces must be paired except Lab/XYZ, that can be interchanged.\n\n");
+
+         fprintf(stderr, "%ch<0,1,2,3> - More help\n", SW);
+         break;
+
+     case 1:
+         PrintBuiltins();
+         break;
+
+     case 2:
+
+         fprintf(stderr, "\nExamples:\n\n"
+             "To create 'devicelink.icm' from a.icc to b.icc:\n"
+             "\tlinkicc a.icc b.icc\n\n"
+             "To create 'out.icc' from sRGB to cmyk.icc:\n"
+             "\tlinkicc -o out.icc *sRGB cmyk.icc\n\n"
+             "To create a sRGB input profile working in Lab:\n"
+             "\tlinkicc -x -o sRGBLab.icc *sRGB *Lab\n\n"
+             "To create a XYZ -> sRGB output profile:\n"
+             "\tlinkicc -x -o sRGBLab.icc *XYZ *sRGB\n\n"
+             "To create a abstract profile doing softproof for cmyk.icc:\n"
+             "\tlinkicc -t1 -x -o softproof.icc *Lab cmyk.icc cmyk.icc *Lab\n\n"
+             "To create a 'grayer' sRGB input profile:\n"
+             "\tlinkicc -x -o grayer.icc *sRGB gray.icc gray.icc *Lab\n\n"
+             "To embed ink limiting into a cmyk output profile:\n"
+             "\tlinkicc -x -o cmyklimited.icc -k 250 cmyk.icc *Lab\n\n");                     
+         break;                       
+
+     case 3:
+
+         fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+             "engine. Both lcms and this program are freeware. You can\n"
+             "obtain both in source code at http://www.littlecms.com\n"
+             "For suggestions, comments, bug reports etc. send mail to\n"
+             "info@littlecms.com\n\n");
+    }
+
+   exit(0);
+}
+
+// The toggles stuff
+static
+void HandleSwitches(int argc, char *argv[])
+{
+    int s;
+
+    while ((s = xgetopt(argc,argv,"a:A:BbC:c:D:d:h:H:k:K:lLn:N:O:o:r:R:T:t:V:v:xX8y:Y:")) != EOF) {
+
+    switch (s) {
+
+
+        case 'a':
+        case 'A':             
+            ObserverAdaptationState = atof(xoptarg);
+            if (ObserverAdaptationState < 0 || 
+                ObserverAdaptationState > 1.0)
+                       FatalError("Adaptation state should be 0..1");
+            break;      
+
+        case 'b':
+        case 'B':
+            BlackPointCompensation = TRUE;
+           break;
+
+        case 'c':
+        case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 2) {
+                FatalError("Unknown precalc mode '%d'", PrecalcMode);
+            }
+           break;
+
+       case 'd':
+       case 'D':
+           // Doing that is correct and safe: Description points to memory allocated in the command line.
+           // same for Copyright and output devicelink.
+           Description = xoptarg;
+           break;
+
+        case 'h':
+        case 'H':
+            Help(atoi(xoptarg));
+            return;
+
+        case 'k':
+        case 'K':
+            InkLimit = atof(xoptarg);
+            if (InkLimit < 0.0 || InkLimit > 400.0) {
+                FatalError("Ink limit must be 0%%..400%%");
+            }
+           break;
+
+
+        case 'l':
+        case 'L': KeepLinearization = TRUE;
+           break;
+
+       case 'n':
+       case 'N':
+           if (PrecalcMode != 1) {
+               FatalError("Precalc mode already specified");
+           }
+           NumOfGridPoints = atoi(xoptarg);
+           break;
+
+        case 'o':
+        case 'O':
+            cOutProf = xoptarg;
+           break;
+
+
+       case 'r':
+       case 'R':
+          Version = atof(xoptarg);
+          if (Version < 2.0 || Version > 4.3) {
+              fprintf(stderr, "WARNING: lcms was not aware of this version, tag types may be wrong!\n");
+          }
+          break;
+
+        case 't':
+        case 'T':
+            Intent = atoi(xoptarg);  // Will be validated latter on
+            break;
+
+        case 'V':
+        case 'v':
+            Verbose = atoi(xoptarg);
+            if (Verbose < 0 || Verbose > 3) {
+                FatalError("Unknown verbosity level '%d'", Verbose);
+            }
+            break;
+
+        case '8':
+            lUse8bits = TRUE;
+            break;
+
+
+
+        case 'y':
+        case 'Y':
+            Copyright = xoptarg;
+            break;
+
+
+
+       case 'x':
+       case 'X': TagResult = TRUE;
+           break;
+
+
+           
+       default:
+
+           FatalError("Unknown option - run without args to see valid ones.\n");          
+        }       
+    }
+}
+
+// Set the copyright and description
+static
+cmsBool SetTextTags(cmsHPROFILE hProfile)
+{
+    cmsMLU *DescriptionMLU, *CopyrightMLU;
+    cmsBool  rc = FALSE;
+    cmsContext ContextID = cmsGetProfileContextID(hProfile);
+
+    DescriptionMLU  = cmsMLUalloc(ContextID, 1);
+    CopyrightMLU    = cmsMLUalloc(ContextID, 1);
+
+    if (DescriptionMLU == NULL || CopyrightMLU == NULL) goto Error;
+
+    if (!cmsMLUsetASCII(DescriptionMLU,  "en", "US", Description)) goto Error;
+    if (!cmsMLUsetASCII(CopyrightMLU,    "en", "US", Copyright)) goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigProfileDescriptionTag,  DescriptionMLU)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigCopyrightTag,           CopyrightMLU)) goto Error;     
+
+    rc = TRUE;
+
+Error:
+
+    if (DescriptionMLU)
+        cmsMLUfree(DescriptionMLU);
+    if (CopyrightMLU)
+        cmsMLUfree(CopyrightMLU);
+    return rc;
+}
+
+
+
+int main(int argc, char *argv[])
+{
+    int i, nargs, rc;
+    cmsHPROFILE Profiles[257];
+    cmsHPROFILE hProfile;
+    cmsUInt32Number dwFlags;
+    cmsHTRANSFORM hTransform = NULL;
+
+    // Here we are
+    fprintf(stderr, "little cms ICC device link generator - v2.2 [LittleCMS %2.2f]\n", LCMS_VERSION / 1000.0);
+    fflush(stderr);
+
+    // Initialize
+    InitUtils("linkicc");
+    rc = 0;
+    
+    // Get the options
+    HandleSwitches(argc, argv);
+
+    // How many profiles to link?
+    nargs = (argc - xoptind);
+    if (nargs < 1)
+        return Help(0); 
+
+    if (nargs > 255) {
+        FatalError("Holy profile! what are you trying to do with so many profiles!?");
+        goto Cleanup;
+    }
+
+    // Open all profiles
+    memset(Profiles, 0, sizeof(Profiles));
+    for (i=0; i < nargs; i++) {
+
+        Profiles[i] = OpenStockProfile(0, argv[i + xoptind]);
+        if (Profiles[i] == NULL) goto Cleanup;      
+
+        if (Verbose >= 1) {
+            PrintProfileInformation(Profiles[i]);
+        }
+    }
+
+    // Ink limiting
+    if (InkLimit != 400.0) {        
+        cmsColorSpaceSignature EndingColorSpace = cmsGetColorSpace(Profiles[nargs-1]);
+        Profiles[nargs++] = cmsCreateInkLimitingDeviceLink(EndingColorSpace, InkLimit);
+    }
+
+    // Set the flags
+    dwFlags = cmsFLAGS_KEEP_SEQUENCE;
+    switch (PrecalcMode) {
+
+        case 0: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+        case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+        case 1: 
+            if (NumOfGridPoints > 0)
+                dwFlags |= cmsFLAGS_GRIDPOINTS(NumOfGridPoints);
+            break;
+
+        default: 
+            {
+                FatalError("Unknown precalculation mode '%d'", PrecalcMode);
+                goto Cleanup;
+            }
+    }
+
+    if (BlackPointCompensation)
+        dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+
+    if (TagResult)
+        dwFlags |= cmsFLAGS_GUESSDEVICECLASS;
+
+    if (KeepLinearization)
+        dwFlags |= cmsFLAGS_CLUT_PRE_LINEARIZATION|cmsFLAGS_CLUT_POST_LINEARIZATION;
+
+    if (lUse8bits) dwFlags |= cmsFLAGS_8BITS_DEVICELINK;
+
+     cmsSetAdaptationState(ObserverAdaptationState);
+     
+    // Create the color transform. Specify 0 for the format is safe as the transform 
+    // is intended to be used only for the devicelink.
+    hTransform = cmsCreateMultiprofileTransform(Profiles, nargs, 0, 0, Intent, dwFlags|cmsFLAGS_NOOPTIMIZE);
+    if (hTransform == NULL) {
+        FatalError("Transform creation failed");
+        goto Cleanup;
+    }
+
+    hProfile =  cmsTransform2DeviceLink(hTransform, Version, dwFlags);
+    if (hProfile == NULL) {
+        FatalError("Devicelink creation failed");
+        goto Cleanup;
+    }
+
+    SetTextTags(hProfile);
+    cmsSetHeaderRenderingIntent(hProfile, Intent);
+
+    if (cmsSaveProfileToFile(hProfile, cOutProf)) {
+
+        if (Verbose > 0) 
+            fprintf(stderr, "Ok");
+    }
+    else 
+        FatalError("Error saving file!");
+
+    cmsCloseProfile(hProfile);
+
+
+Cleanup:
+
+    if (hTransform != NULL) cmsDeleteTransform(hTransform);
+    for (i=0; i < nargs; i++) {
+
+        if (Profiles[i] != NULL) cmsCloseProfile(Profiles[i]);
+    }
+
+    return rc;     
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/matlab/icctrans.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/matlab/icctrans.c
new file mode 100755
index 0000000000..2125f94526
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/matlab/icctrans.c
@@ -0,0 +1,724 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2010 Marti Maria, Ignacio Ruiz de Conejo
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+#include "mex.h"
+
+#include "lcms2.h"
+#include "string.h"
+#include "stdarg.h"
+
+// xgetopt() interface -----------------------------------------------------
+
+static int   xoptind;    
+static char *xoptarg; 
+static int   xopterr;  
+static char  *letP;
+static char   SW = '-';
+
+// ------------------------------------------------------------------------
+
+
+static int  Verbose ;			// Print some statistics
+static char *cInProf;			// Input profile
+static char *cOutProf;			// Output profile
+static char *cProofing;			// Softproofing profile
+
+
+static int  Intent;				// Rendering Intent
+static int  ProofingIntent;		// RI for proof
+
+static int  PrecalcMode;		// 0 = Not, 1=Normal, 2=Accurate, 3=Fast
+
+static cmsBool BlackPointCompensation;
+static cmsBool lIsDeviceLink;
+static cmsBool lMultiProfileChain;		// Multiple profile chain
+
+static cmsHPROFILE hInput, hOutput, hProof;
+static cmsHTRANSFORM hColorTransform;
+static cmsHPROFILE hProfiles[255];
+static int nProfiles;
+
+static cmsColorSpaceSignature InputColorSpace, OutputColorSpace;
+static int OutputChannels, InputChannels, nBytesDepth;
+
+
+// Error. Print error message and abort
+
+static
+cmsBool FatalError(const char *frm, ...)
+{
+	va_list args;
+	char Buffer[1024];
+
+	va_start(args, frm);
+	vsprintf(Buffer, frm, args);
+	mexErrMsgTxt(Buffer);   
+	va_end(args);
+
+	return FALSE;               
+}
+
+// This is the handler passed to lcms
+
+static
+void MatLabErrorHandler(cmsContext ContextID, cmsUInt32Number ErrorCode, 
+						const char *Text)
+{      
+	mexErrMsgTxt(Text);    
+}
+//
+//  Parse the command line options, System V style.
+//
+
+static
+void xoptinit()
+{   
+	xoptind = 1;
+	xopterr = 0;
+	letP = NULL;
+}
+
+
+static
+int xgetopt(int argc, char *argv[], char *optionS)
+{
+	unsigned char ch;
+	char *optP;
+
+	if (SW == 0) {
+		SW = '/';
+	}
+
+	if (argc > xoptind) {
+		if (letP == NULL) {
+			if ((letP = argv[xoptind]) == NULL ||
+				*(letP++) != SW)  goto gopEOF;
+			if (*letP == SW) {
+				xoptind++;  goto gopEOF;
+			}
+		}
+		if (0 == (ch = *(letP++))) {
+			xoptind++;  goto gopEOF;
+		}
+		if (':' == ch  ||  (optP = strchr(optionS, ch)) == NULL)
+			goto gopError;
+		if (':' == *(++optP)) {
+			xoptind++;
+			if (0 == *letP) {
+				if (argc <= xoptind)  goto  gopError;
+				letP = argv[xoptind++];
+			}
+			xoptarg = letP;
+			letP = NULL;
+		} else {
+			if (0 == *letP) {
+				xoptind++;
+				letP = NULL;
+			}
+			xoptarg = NULL;
+		}
+		return ch;
+	}
+gopEOF:
+	xoptarg = letP = NULL;
+	return EOF;
+
+gopError:
+	xoptarg = NULL;    
+	if (xopterr)
+		FatalError ("get command line option");
+	return ('?');
+}
+
+
+// Return Mathlab type by depth
+
+static
+size_t SizeOfArrayType(const mxArray *Array)
+{
+
+	switch (mxGetClassID(Array))  {
+
+	 case mxINT8_CLASS:   return 1;
+	 case mxUINT8_CLASS:  return 1;
+	 case mxINT16_CLASS:  return 2;
+	 case mxUINT16_CLASS: return 2;  
+	 case mxSINGLE_CLASS: return 4;
+	 case mxDOUBLE_CLASS: return 0; // Special case -- lcms handles double as size=0
+
+
+	 default:
+		 FatalError("Unsupported data type");
+		 return 0;
+	}
+}
+
+
+// Get number of pixels of input array. Supported arrays are 
+// organized as NxMxD, being N and M the size of image and D the
+// number of components.
+
+static
+size_t GetNumberOfPixels(const mxArray* In)
+{
+	int nDimensions  = mxGetNumberOfDimensions(In); 
+	const int  *Dimensions   = mxGetDimensions(In);
+
+	switch (nDimensions) {
+
+		case 1: return 1;                            // It is just a spot color
+		case 2: return Dimensions[0];                // A scanline
+		case 3: return Dimensions[0]*Dimensions[1];  // A image
+
+		default:
+			FatalError("Unsupported array of %d dimensions", nDimensions);
+			return 0;
+	}
+}   
+
+
+// Allocates the output array. Copies the input array modifying the pixel
+// definition to match "OutputChannels".
+
+static
+mxArray* AllocateOutputArray(const mxArray* In, int OutputChannels)
+{       
+
+	mxArray*	Out			  = mxDuplicateArray(In);   // Make a "deep copy" of Input array 
+	int         nDimensions   = mxGetNumberOfDimensions(In);    
+	const int*	Dimensions    = mxGetDimensions(In);
+	int         InputChannels = Dimensions[nDimensions-1];
+
+
+	// Modify pixel size only if needed
+
+	if (InputChannels != OutputChannels) {
+
+
+		int i, NewSize;
+		int *ModifiedDimensions = (int*) mxMalloc(nDimensions * sizeof(int));
+
+
+		memmove(ModifiedDimensions, Dimensions, nDimensions * sizeof(int));
+		ModifiedDimensions[nDimensions - 1] = OutputChannels;
+
+		switch (mxGetClassID(In))  {
+
+		case mxINT8_CLASS:   NewSize = sizeof(char); break;
+		case mxUINT8_CLASS:  NewSize = sizeof(unsigned char); break;
+		case mxINT16_CLASS:  NewSize = sizeof(short); break;
+		case mxUINT16_CLASS: NewSize = sizeof(unsigned short); break;
+
+		default:
+		case mxDOUBLE_CLASS: NewSize = sizeof(double); break;
+		}
+
+
+		// NewSize = 1;
+		for (i=0; i < nDimensions; i++)
+			NewSize *= ModifiedDimensions[i];
+
+
+		mxSetDimensions(Out, ModifiedDimensions, nDimensions);
+		mxFree(ModifiedDimensions);
+
+		mxSetPr(Out, mxRealloc(mxGetPr(Out), NewSize));             
+
+	}
+
+
+	return Out;
+}
+
+
+
+// Does create a format descriptor. "Bytes" is the sizeof type in bytes
+//  
+//  Bytes  Meaning
+//  ------ --------
+//   0      Floating point (double)
+//   1      8-bit samples
+//   2      16-bit samples   
+
+static
+cmsUInt32Number MakeFormatDescriptor(cmsColorSpaceSignature ColorSpace, int Bytes)
+{
+	int IsFloat = (Bytes == 0 || Bytes == 4) ? 1 : 0;
+	int Channels = cmsChannelsOf(ColorSpace);
+	return FLOAT_SH(IsFloat)|COLORSPACE_SH(_cmsLCMScolorSpace(ColorSpace))|BYTES_SH(Bytes)|CHANNELS_SH(Channels)|PLANAR_SH(1);
+}
+
+
+// Opens a profile or proper built-in
+
+static
+cmsHPROFILE OpenProfile(const char* File)
+{   
+
+	cmsContext ContextID = 0;
+
+	   if (!File) 
+            return cmsCreate_sRGBProfileTHR(ContextID);    
+
+       if (cmsstrcasecmp(File, "*Lab2") == 0)
+                return cmsCreateLab2ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*Lab4") == 0)
+                return cmsCreateLab4ProfileTHR(ContextID, NULL);
+
+       if (cmsstrcasecmp(File, "*Lab") == 0)
+                return cmsCreateLab4ProfileTHR(ContextID, NULL);
+       
+       if (cmsstrcasecmp(File, "*LabD65") == 0) {
+
+           cmsCIExyY D65xyY;
+           
+           cmsWhitePointFromTemp( &D65xyY, 6504);           
+           return cmsCreateLab4ProfileTHR(ContextID, &D65xyY);
+       }
+
+       if (cmsstrcasecmp(File, "*XYZ") == 0)
+                return cmsCreateXYZProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*Gray22") == 0) {
+
+           cmsToneCurve* Curve = cmsBuildGamma(ContextID, 2.2);
+           cmsHPROFILE hProfile = cmsCreateGrayProfileTHR(ContextID, cmsD50_xyY(), Curve);
+           cmsFreeToneCurve(Curve);
+           return hProfile;
+       }
+
+        if (cmsstrcasecmp(File, "*Gray30") == 0) {
+
+           cmsToneCurve* Curve = cmsBuildGamma(ContextID, 3.0);
+           cmsHPROFILE hProfile = cmsCreateGrayProfileTHR(ContextID, cmsD50_xyY(), Curve);
+           cmsFreeToneCurve(Curve);
+           return hProfile;
+       }
+
+       if (cmsstrcasecmp(File, "*srgb") == 0)
+                return cmsCreate_sRGBProfileTHR(ContextID);
+
+       if (cmsstrcasecmp(File, "*null") == 0)
+                return cmsCreateNULLProfileTHR(ContextID);
+
+       
+       if (cmsstrcasecmp(File, "*Lin2222") == 0) {
+
+            cmsToneCurve*  Gamma = cmsBuildGamma(0, 2.2);
+            cmsToneCurve*  Gamma4[4];
+            cmsHPROFILE hProfile; 
+
+            Gamma4[0] = Gamma4[1] = Gamma4[2] = Gamma4[3] = Gamma;
+            hProfile = cmsCreateLinearizationDeviceLink(cmsSigCmykData, Gamma4);
+            cmsFreeToneCurve(Gamma);
+            return hProfile;
+       }
+
+           
+        return cmsOpenProfileFromFileTHR(ContextID, File, "r");
+}
+
+
+static
+cmsUInt32Number GetFlags()
+{
+	cmsUInt32Number dwFlags = 0; 
+
+	switch (PrecalcMode) {
+
+	case 0: dwFlags = cmsFLAGS_NOOPTIMIZE; break;
+	case 2: dwFlags = cmsFLAGS_HIGHRESPRECALC; break;
+	case 3: dwFlags = cmsFLAGS_LOWRESPRECALC; break;
+	case 1: break;
+
+	default: FatalError("Unknown precalculation mode '%d'", PrecalcMode);
+	}
+
+	if (BlackPointCompensation) 
+		dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+
+	return dwFlags;
+}
+
+// Create transforms
+
+static
+void OpenTransforms(int argc, char *argv[])
+{
+
+	cmsUInt32Number dwIn, dwOut, dwFlags;
+
+
+	if (lMultiProfileChain) {
+
+		int i;
+		cmsHTRANSFORM hTmp;
+
+
+		nProfiles = argc - xoptind;
+		for (i=0; i < nProfiles; i++) {
+
+			hProfiles[i] = OpenProfile(argv[i+xoptind]);
+		}
+
+
+		// Create a temporary devicelink 
+
+		hTmp = cmsCreateMultiprofileTransform(hProfiles, nProfiles, 
+			0, 0, Intent, GetFlags());
+
+		hInput = cmsTransform2DeviceLink(hTmp, 4.2, 0);
+		hOutput = NULL;
+		cmsDeleteTransform(hTmp);
+
+		InputColorSpace  = cmsGetColorSpace(hInput);
+		OutputColorSpace = cmsGetPCS(hInput);        
+		lIsDeviceLink = TRUE;
+
+	}
+	else
+		if (lIsDeviceLink) {
+
+			hInput  = cmsOpenProfileFromFile(cInProf, "r");
+			hOutput = NULL;
+			InputColorSpace  = cmsGetColorSpace(hInput);
+			OutputColorSpace = cmsGetPCS(hInput);
+
+
+		}
+		else {
+
+			hInput  = OpenProfile(cInProf);
+			hOutput = OpenProfile(cOutProf);    
+
+			InputColorSpace   = cmsGetColorSpace(hInput);
+			OutputColorSpace  = cmsGetColorSpace(hOutput);
+
+			if (cmsGetDeviceClass(hInput) == cmsSigLinkClass ||
+				cmsGetDeviceClass(hOutput) == cmsSigLinkClass)   
+				FatalError("Use %cl flag for devicelink profiles!\n", SW);
+
+		}
+
+
+		/*
+
+		if (Verbose) {
+
+		mexPrintf("From: %s\n", cmsTakeProductName(hInput));
+		if (hOutput) mexPrintf("To  : %s\n\n", cmsTakeProductName(hOutput));
+
+		}
+		*/
+
+
+		OutputChannels = cmsChannelsOf(OutputColorSpace);
+		InputChannels  = cmsChannelsOf(InputColorSpace);
+
+
+		dwIn  = MakeFormatDescriptor(InputColorSpace, nBytesDepth);
+		dwOut = MakeFormatDescriptor(OutputColorSpace, nBytesDepth);
+
+
+		dwFlags = GetFlags();
+
+		if (cProofing != NULL) {
+
+			hProof = OpenProfile(cProofing);
+			dwFlags |= cmsFLAGS_SOFTPROOFING;
+		}
+
+
+
+
+		hColorTransform = cmsCreateProofingTransform(hInput, dwIn, 
+			hOutput, dwOut, 
+			hProof, Intent, 
+			ProofingIntent, 
+			dwFlags);
+
+}
+
+
+
+static
+void ApplyTransforms(const mxArray *In, mxArray *Out)
+{   
+	double *Input  = mxGetPr(In); 
+	double *Output = mxGetPr(Out);    
+	size_t nPixels = GetNumberOfPixels(In);;
+
+	cmsDoTransform(hColorTransform, Input, Output, nPixels );
+
+}
+
+
+static
+void CloseTransforms(void)
+{
+	int i;
+
+	if (hColorTransform) cmsDeleteTransform(hColorTransform);
+	if (hInput) cmsCloseProfile(hInput);
+	if (hOutput) cmsCloseProfile(hOutput);             
+	if (hProof) cmsCloseProfile(hProof);
+
+	for (i=0; i < nProfiles; i++)
+		cmsCloseProfile(hProfiles[i]);
+
+	hColorTransform = NULL; hInput = NULL; hOutput = NULL; hProof = NULL;
+}
+
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+	int  s;
+
+	xoptinit();
+
+	while ((s = xgetopt(argc, argv,"C:c:VvbBI:i:O:o:T:t:L:l:r:r:P:p:Mm")) != EOF) {
+
+
+		switch (s){
+
+		case 'b':
+		case 'B': 
+			BlackPointCompensation = TRUE;
+			break;
+
+		case 'c':
+		case 'C':
+			PrecalcMode = atoi(xoptarg);
+			if (PrecalcMode < 0 || PrecalcMode > 3)
+				FatalError("Unknown precalc mode '%d'", PrecalcMode);
+			break;
+
+		case 'v':
+		case 'V':
+			Verbose = TRUE;
+			break;
+
+		case 'i':
+		case 'I':
+			if (lIsDeviceLink)
+				FatalError("Device-link already specified");
+			cInProf = xoptarg;
+			break;
+
+		case 'o':
+		case 'O':
+			if (lIsDeviceLink)
+				FatalError("Device-link already specified"); 
+			cOutProf = xoptarg;
+			break;
+
+		case 't':
+		case 'T':
+			Intent = atoi(xoptarg);
+			// if (Intent > 3) Intent = 3;
+			if (Intent < 0) Intent = 0;
+			break;
+
+
+		case 'l':
+		case 'L': 
+			cInProf = xoptarg;
+			lIsDeviceLink = TRUE;
+			break;
+
+		case 'p':
+		case 'P':
+			cProofing = xoptarg;
+			break;
+
+
+
+		case 'r':
+		case 'R':
+			ProofingIntent = atoi(xoptarg);
+			// if (ProofingIntent > 3) ProofingIntent = 3;
+			if (ProofingIntent < 0) ProofingIntent = 0;
+			break;
+
+
+		case 'm':
+		case 'M':
+			lMultiProfileChain = TRUE;
+			break;
+
+		default:
+			FatalError("Unknown option.");
+		}
+	}
+
+	// For multiprofile, need to specify -m
+
+	if (xoptind < argc) {
+
+		if (!lMultiProfileChain)
+			FatalError("Use %cm for multiprofile transforms", SW);
+	}
+
+}
+
+
+
+// -------------------------------------------------- Print some fancy help
+static
+void PrintHelp(void)
+{
+	mexPrintf("(MX) little cms ColorSpace conversion tool - v2.0\n\n");
+
+	mexPrintf("usage: icctrans (mVar, flags)\n\n");
+
+	mexPrintf("mVar : Matlab array.\n");
+	mexPrintf("flags: a string containing one or more of following options.\n\n");
+	mexPrintf("\t%cv - Verbose\n", SW);
+	mexPrintf("\t%ci<profile> - Input profile (defaults to sRGB)\n", SW);
+	mexPrintf("\t%co<profile> - Output profile (defaults to sRGB)\n", SW);   
+	mexPrintf("\t%cl<profile> - Transform by device-link profile\n", SW);      
+	mexPrintf("\t%cm<profiles> - Apply multiprofile chain\n", SW);      
+
+	mexPrintf("\t%ct<n> - Rendering intent\n", SW);    
+
+	mexPrintf("\t%cb - Black point compensation\n", SW);
+	mexPrintf("\t%cc<0,1,2,3> - Optimize transform (0=Off, 1=Normal, 2=Hi-res, 3=Lo-Res) [defaults to 1]\n", SW);     
+
+	mexPrintf("\t%cp<profile> - Soft proof profile\n", SW);
+	mexPrintf("\t%cr<0,1,2,3> - Soft proof intent\n", SW);
+
+	mexPrintf("\nYou can use following built-ins as profiles:\n\n");
+
+	mexPrintf("\t*Lab2  -- D50-based v2 CIEL*a*b\n"
+	"\t*Lab4  -- D50-based v4 CIEL*a*b\n"
+	"\t*Lab   -- D50-based v4 CIEL*a*b\n"
+	"\t*XYZ   -- CIE XYZ (PCS)\n"
+	"\t*sRGB  -- IEC6 1996-2.1 sRGB color space\n" 
+	"\t*Gray22 - Monochrome of Gamma 2.2\n"
+	"\t*Gray30 - Monochrome of Gamma 3.0\n"
+	"\t*null   - Monochrome black for all input\n"
+	"\t*Lin2222- CMYK linearization of gamma 2.2 on each channel\n\n");
+
+	mexPrintf("For suggestions, comments, bug reports etc. send mail to info@littlecms.com\n\n");
+
+}
+
+
+
+// Main entry point
+
+void mexFunction(
+				 int nlhs,              // Number of left hand side (output) arguments
+				 mxArray *plhs[],       // Array of left hand side arguments
+				 int nrhs,              // Number of right hand side (input) arguments
+				 const mxArray *prhs[]  // Array of right hand side arguments
+)
+{
+
+	char CommandLine[4096+1];
+	char *pt, *argv[128];
+	int argc = 1;
+
+
+	if (nrhs != 2) {    
+
+		PrintHelp();              
+		return;
+	}
+
+
+	if(nlhs > 1) {        
+		FatalError("Too many output arguments.");
+	}
+
+
+	// Setup error handler
+
+	cmsSetLogErrorHandler(MatLabErrorHandler);
+
+	// Defaults
+
+	Verbose     = 0;
+	cInProf     = NULL;
+	cOutProf    = NULL;
+	cProofing   = NULL;
+
+	lMultiProfileChain = FALSE;
+	nProfiles   = 0;
+
+	Intent                  = INTENT_PERCEPTUAL;
+	ProofingIntent          = INTENT_ABSOLUTE_COLORIMETRIC;
+	PrecalcMode = 1;
+	BlackPointCompensation  = FALSE;
+	lIsDeviceLink           = FALSE;
+
+	// Check types. Fist parameter is array of values, second parameter is command line
+
+	if (!mxIsNumeric(prhs[0]))
+		FatalError("Type mismatch on argument 1 -- Must be numeric");
+
+	if (!mxIsChar(prhs[1]))
+		FatalError("Type mismatch on argument 2 -- Must be string");
+
+
+
+
+	// Unpack string to command line buffer
+
+	if (mxGetString(prhs[1], CommandLine, 4096))
+		FatalError("Cannot unpack command string");
+
+	// Separate to argv[] convention
+
+	argv[0] = NULL;
+	for (pt = strtok(CommandLine, " ");
+		pt;
+		pt = strtok(NULL, " ")) {
+
+			argv[argc++] = pt;
+	}
+
+
+
+	// Parse arguments
+	HandleSwitches(argc, argv);
+
+
+	nBytesDepth = SizeOfArrayType(prhs[0]);
+
+	OpenTransforms(argc, argv);
+
+
+	plhs[0] = AllocateOutputArray(prhs[0], OutputChannels);
+
+
+	ApplyTransforms(prhs[0], plhs[0]);
+
+	CloseTransforms();
+
+	// Done!
+}
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/matlab/lcms_rsp b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/matlab/lcms_rsp
new file mode 100755
index 0000000000..b806323c6a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/matlab/lcms_rsp
@@ -0,0 +1,26 @@
+-O
+-I..\..\include
+icctrans.c
+..\..\src\cmscam02.c
+..\..\src\cmscgats.c
+..\..\src\cmscnvrt.c
+..\..\src\cmserr.c
+..\..\src\cmsgamma.c
+..\..\src\cmsgmt.c
+..\..\src\cmsintrp.c
+..\..\src\cmsio0.c
+..\..\src\cmsio1.c
+..\..\src\cmslut.c
+..\..\src\cmsmtrx.c
+..\..\src\cmsnamed.c
+..\..\src\cmsopt.c
+..\..\src\cmspack.c
+..\..\src\cmspcs.c
+..\..\src\cmsplugin.c
+..\..\src\cmsps2.c
+..\..\src\cmssamp.c
+..\..\src\cmssm.c
+..\..\src\cmstypes.c
+..\..\src\cmsvirt.c
+..\..\src\cmswtpnt.c
+..\..\src\cmsxform.c
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/Makefile.am b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/Makefile.am
new file mode 100755
index 0000000000..99d9d5b3a4
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/Makefile.am
@@ -0,0 +1,19 @@
+#
+# Makefile for building psicc
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS =  -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+bin_PROGRAMS = psicc 
+
+psicc_LDADD = $(top_builddir)/src/liblcms2.la 
+psicc_LDFLAGS = @LDFLAGS@
+psicc_SOURCES = psicc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+psicc_MANS = psicc.1
+
+EXTRA_DIST = $(man_MANS)
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/Makefile.in b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/Makefile.in
new file mode 100755
index 0000000000..6fed475857
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/Makefile.in
@@ -0,0 +1,663 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building psicc
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+bin_PROGRAMS = psicc$(EXEEXT)
+subdir = utils/psicc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_psicc_OBJECTS = psicc.$(OBJEXT) ../common/xgetopt.$(OBJEXT) \
+	../common/vprf.$(OBJEXT)
+psicc_OBJECTS = $(am_psicc_OBJECTS)
+psicc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+psicc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(psicc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(psicc_SOURCES)
+DIST_SOURCES = $(psicc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+psicc_LDADD = $(top_builddir)/src/liblcms2.la 
+psicc_LDFLAGS = @LDFLAGS@
+psicc_SOURCES = psicc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+psicc_MANS = psicc.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/psicc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/psicc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+psicc$(EXEEXT): $(psicc_OBJECTS) $(psicc_DEPENDENCIES) $(EXTRA_psicc_DEPENDENCIES) 
+	@rm -f psicc$(EXEEXT)
+	$(AM_V_CCLD)$(psicc_LINK) $(psicc_OBJECTS) $(psicc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-binPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/psicc.1 b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/psicc.1
new file mode 100755
index 0000000000..19868b5136
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/psicc.1
@@ -0,0 +1,47 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH PSICC 1 "September 30, 2004"
+.SH NAME
+psicc - little cms PostScript converter.
+.SH SYNOPSIS
+.B psicc
+.RI [ options ]
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B psicc
+is a little cms PostScript converter.
+.SH OPTIONS
+.TP
+.B \-b
+Black point compensation (CRD only).
+.TP
+.BI \-c\  precision
+Precision (0=LowRes, 1=Normal, 2=Hi-res) (CRD only) [defaults to 1].
+.TP
+.BI \-i\  profile
+Input profile: Generates Color Space Array (CSA).
+.TP
+.BI \-n\  gridpoints
+Alternate way to set precision, number of CLUT points (CRD only).
+.TP
+.BI \-o\  profile
+.p
+Output profile: Generates Color Rendering Dictionary(CRD).
+.TP
+.BI \-t\  intent
+Intent (0=Perceptual, 1=Colorimetric, 2=Saturation, 3=Absolute) [defaults to 0].
+.TP
+.B \-u
+Do NOT generate resource name on CRD.
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com.
+.SH SEE ALSO
+.BR jpgicc (1),
+.BR linkicc (1),
+.BR tificc (1),
+.BR transicc (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/psicc.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/psicc.c
new file mode 100755
index 0000000000..0e3c790d0d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/psicc/psicc.c
@@ -0,0 +1,232 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#include "utils.h"
+
+// ------------------------------------------------------------------------
+
+static char *cInProf = NULL;
+static char *cOutProf = NULL;
+static int Intent = INTENT_PERCEPTUAL;
+static FILE* OutFile;
+static int BlackPointCompensation = FALSE;
+static int Undecorated = FALSE;
+static int PrecalcMode = 1;
+static int NumOfGridPoints = 0;
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+       int s;
+      
+       while ((s = xgetopt(argc,argv,"uUbBI:i:O:o:T:t:c:C:n:N:")) != EOF) {
+
+       switch (s){
+
+	 
+       case 'i':
+       case 'I':
+            cInProf = xoptarg;
+            break;
+
+       case 'o':
+       case 'O':
+           cOutProf = xoptarg;
+            break;
+
+       case 'b':
+       case 'B': BlackPointCompensation =TRUE;
+            break;
+
+
+       case 't':
+       case 'T':
+            Intent = atoi(xoptarg);
+            if (Intent > 3) Intent = 3;
+            if (Intent < 0) Intent = 0;
+            break;
+     
+       case 'U':
+       case 'u':
+            Undecorated = TRUE;
+            break;
+
+       case 'c':
+       case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 2)
+                    FatalError("ERROR: Unknown precalc mode '%d'", PrecalcMode);
+            break;
+
+
+       case 'n':
+       case 'N':
+                if (PrecalcMode != 1)
+                    FatalError("Precalc mode already specified");
+                NumOfGridPoints = atoi(xoptarg);
+                break;
+
+
+  default:
+
+       FatalError("Unknown option - run without args to see valid ones.\n");
+    }       
+    }
+}
+
+static
+void Help(void)
+{
+	 fprintf(stderr, "little CMS ICC PostScript generator - v2.1 [LittleCMS %2.2f]\n", LCMS_VERSION / 1000.0);
+   
+     fprintf(stderr, "usage: psicc [flags] [<Output file>]\n\n");
+
+     fprintf(stderr, "flags:\n\n");
+     
+     fprintf(stderr, "%ci<profile> - Input profile: Generates Color Space Array (CSA)\n", SW);
+     fprintf(stderr, "%co<profile> - Output profile: Generates Color Rendering Dictionary(CRD)\n", SW);   
+     
+     fprintf(stderr, "%ct<0,1,2,3> - Intent (0=Perceptual, 1=Colorimetric, 2=Saturation, 3=Absolute)\n", SW);    
+          
+     fprintf(stderr, "%cb - Black point compensation (CRD only)\n", SW);    
+     fprintf(stderr, "%cu - Do NOT generate resource name on CRD\n", SW);    
+     fprintf(stderr, "%cc<0,1,2> - Precision (0=LowRes, 1=Normal (default), 2=Hi-res) (CRD only)\n", SW);     
+     fprintf(stderr, "%cn<gridpoints> - Alternate way to set precission, number of CLUT points (CRD only)\n", SW);     
+     
+	 fprintf(stderr, "\n");
+	 fprintf(stderr, "If no output file is specified, output goes to stdout.\n\n");
+     fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+                     "engine. Both lcms and this program are freeware. You can\n"
+                     "obtain both in source code at http://www.littlecms.com\n"
+                     "For suggestions, comments, bug reports etc. send mail to\n"
+                     "info@littlecms.com\n\n");
+     exit(0);
+}
+
+
+static
+void GenerateCSA(void)
+{
+	cmsHPROFILE hProfile = OpenStockProfile(0, cInProf);
+	size_t n;
+	char* Buffer;
+
+	if (hProfile == NULL) return;
+
+	n = cmsGetPostScriptCSA(0, hProfile, Intent, 0, NULL, 0);
+	if (n == 0) return;
+
+    Buffer = (char*) malloc(n + 1);
+    if (Buffer != NULL) {
+
+        cmsGetPostScriptCSA(0, hProfile, Intent, 0, Buffer, (cmsUInt32Number) n);
+        Buffer[n] = 0;
+
+        fprintf(OutFile, "%s", Buffer);	
+
+        free(Buffer);
+    }
+
+	cmsCloseProfile(hProfile);
+}
+
+
+static
+void GenerateCRD(void)
+{
+	cmsHPROFILE hProfile = OpenStockProfile(0, cOutProf);
+	size_t n;
+	char* Buffer;
+    cmsUInt32Number dwFlags = 0;
+    
+	if (hProfile == NULL) return;
+
+    if (BlackPointCompensation) dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+    if (Undecorated)            dwFlags |= cmsFLAGS_NODEFAULTRESOURCEDEF;
+
+    switch (PrecalcMode) {
+           	
+	    case 0: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+		case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+		case 1: 
+            if (NumOfGridPoints > 0)
+                dwFlags |= cmsFLAGS_GRIDPOINTS(NumOfGridPoints);
+            break;
+
+		default: FatalError("ERROR: Unknown precalculation mode '%d'", PrecalcMode);
+	 }
+
+	n = cmsGetPostScriptCRD(0, hProfile, Intent, dwFlags, NULL, 0);
+	if (n == 0) return;
+
+	Buffer = (char*) malloc(n + 1);
+	if (Buffer == NULL) return;
+        cmsGetPostScriptCRD(0, hProfile, Intent, dwFlags, Buffer, (cmsUInt32Number) n);
+	Buffer[n] = 0;
+
+	fprintf(OutFile, "%s", Buffer);			
+	free(Buffer);
+	cmsCloseProfile(hProfile);
+}
+
+
+int main(int argc, char *argv[])
+{
+	int nargs;
+
+	// Initialize
+	InitUtils("psicc");
+
+	 HandleSwitches(argc, argv);
+
+     nargs = (argc - xoptind);
+	 if (nargs != 0 && nargs != 1)
+				Help();            
+	
+    if (cInProf == NULL && cOutProf == NULL)
+        Help();
+
+	 if (nargs == 0) 
+			OutFile = stdout;
+	 else
+			OutFile = fopen(argv[xoptind], "wt");
+	   		
+	  if (cInProf != NULL)
+			GenerateCSA();
+		  
+	  if (cOutProf != NULL)
+			GenerateCRD();
+		
+	  if (nargs == 1) {
+		  fclose(OutFile);
+	  }
+
+      return 0;     
+}
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/Makefile.am b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/Makefile.am
new file mode 100755
index 0000000000..b3b620173f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/Makefile.am
@@ -0,0 +1,19 @@
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS =  -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+bin_PROGRAMS = wtpt 
+
+wtpt_LDADD = $(top_builddir)/src/liblcms2.la 
+wtpt_LDFLAGS = @LDFLAGS@
+wtpt_SOURCES = wtpt.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+wtpt_MANS = wtpt.1
+
+EXTRA_DIST = $(man_MANS) roundtrip.c mktiff8.c mkgrayer.c mkcmy.c itufax.c
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/Makefile.in b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/Makefile.in
new file mode 100755
index 0000000000..da52ca57b7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/Makefile.in
@@ -0,0 +1,611 @@
+# Makefile.in generated by automake 1.10 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria Oct 2004
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+bin_PROGRAMS = icctrans$(EXEEXT) wtpt$(EXEEXT) icc2ps$(EXEEXT) \
+	icclink$(EXEEXT)
+subdir = samples
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_CLEAN_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"
+binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+PROGRAMS = $(bin_PROGRAMS)
+am_icc2ps_OBJECTS = icc2ps.$(OBJEXT) xgetopt.$(OBJEXT)
+icc2ps_OBJECTS = $(am_icc2ps_OBJECTS)
+icc2ps_DEPENDENCIES = $(top_builddir)/src/liblcms.la
+icc2ps_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(icc2ps_LDFLAGS) \
+	$(LDFLAGS) -o $@
+am_icclink_OBJECTS = icclink.$(OBJEXT) xgetopt.$(OBJEXT) \
+	vprf.$(OBJEXT)
+icclink_OBJECTS = $(am_icclink_OBJECTS)
+icclink_DEPENDENCIES = $(top_builddir)/src/liblcms.la
+icclink_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(icclink_LDFLAGS) \
+	$(LDFLAGS) -o $@
+am_icctrans_OBJECTS = icctrans.$(OBJEXT) xgetopt.$(OBJEXT) \
+	vprf.$(OBJEXT)
+icctrans_OBJECTS = $(am_icctrans_OBJECTS)
+icctrans_DEPENDENCIES = $(top_builddir)/src/liblcms.la
+icctrans_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(icctrans_LDFLAGS) \
+	$(LDFLAGS) -o $@
+am_wtpt_OBJECTS = wtpt.$(OBJEXT) xgetopt.$(OBJEXT)
+wtpt_OBJECTS = $(am_wtpt_OBJECTS)
+wtpt_DEPENDENCIES = $(top_builddir)/src/liblcms.la
+wtpt_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(wtpt_LDFLAGS) \
+	$(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
+SOURCES = $(icc2ps_SOURCES) $(icclink_SOURCES) $(icctrans_SOURCES) \
+	$(wtpt_SOURCES)
+DIST_SOURCES = $(icc2ps_SOURCES) $(icclink_SOURCES) \
+	$(icctrans_SOURCES) $(wtpt_SOURCES)
+man1dir = $(mandir)/man1
+NROFF = nroff
+MANS = $(man_MANS)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+F77 = @F77@
+FFLAGS = @FFLAGS@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INT16_T = @INT16_T@
+INT32_T = @INT32_T@
+INT64_T = @INT64_T@
+INT8_T = @INT8_T@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LCMS_PYEXECDIR = @LCMS_PYEXECDIR@
+LCMS_PYINCLUDE = @LCMS_PYINCLUDE@
+LCMS_PYLIB = @LCMS_PYLIB@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PYTHON = @PYTHON@
+PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@
+PYTHON_PLATFORM = @PYTHON_PLATFORM@
+PYTHON_PREFIX = @PYTHON_PREFIX@
+PYTHON_VERSION = @PYTHON_VERSION@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+UINT16_T = @UINT16_T@
+UINT32_T = @UINT32_T@
+UINT64_T = @UINT64_T@
+UINT8_T = @UINT8_T@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_F77 = @ac_ct_F77@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+pkgpyexecdir = @pkgpyexecdir@
+pkgpythondir = @pkgpythondir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+pyexecdir = @pyexecdir@
+pythondir = @pythondir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include
+icctrans_LDADD = $(top_builddir)/src/liblcms.la 
+icctrans_LDFLAGS = @LDFLAGS@
+icctrans_SOURCES = icctrans.c xgetopt.c vprf.c
+icctrans_MANS = icctrans.1
+wtpt_LDADD = $(top_builddir)/src/liblcms.la 
+wtpt_LDFLAGS = @LDFLAGS@
+wtpt_SOURCES = wtpt.c xgetopt.c 
+icc2ps_LDADD = $(top_builddir)/src/liblcms.la 
+icc2ps_LDFLAGS = @LDFLAGS@
+icc2ps_SOURCES = icc2ps.c xgetopt.c 
+icclink_LDADD = $(top_builddir)/src/liblcms.la 
+icclink_LDFLAGS = @LDFLAGS@
+icclink_SOURCES = icclink.c xgetopt.c vprf.c
+man_MANS = wtpt.1 icc2ps.1 icclink.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  samples/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  samples/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+	  if test -f $$p \
+	     || test -f $$p1 \
+	  ; then \
+	    f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
+	   echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
+	   $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
+	  else :; fi; \
+	done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
+	  echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
+	  rm -f "$(DESTDIR)$(bindir)/$$f"; \
+	done
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+	  echo " rm -f $$p $$f"; \
+	  rm -f $$p $$f ; \
+	done
+icc2ps$(EXEEXT): $(icc2ps_OBJECTS) $(icc2ps_DEPENDENCIES) 
+	@rm -f icc2ps$(EXEEXT)
+	$(icc2ps_LINK) $(icc2ps_OBJECTS) $(icc2ps_LDADD) $(LIBS)
+icclink$(EXEEXT): $(icclink_OBJECTS) $(icclink_DEPENDENCIES) 
+	@rm -f icclink$(EXEEXT)
+	$(icclink_LINK) $(icclink_OBJECTS) $(icclink_LDADD) $(LIBS)
+icctrans$(EXEEXT): $(icctrans_OBJECTS) $(icctrans_DEPENDENCIES) 
+	@rm -f icctrans$(EXEEXT)
+	$(icctrans_LINK) $(icctrans_OBJECTS) $(icctrans_LDADD) $(LIBS)
+wtpt$(EXEEXT): $(wtpt_OBJECTS) $(wtpt_DEPENDENCIES) 
+	@rm -f wtpt$(EXEEXT)
+	$(wtpt_LINK) $(wtpt_OBJECTS) $(wtpt_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icc2ps.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icclink.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icctrans.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vprf.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wtpt.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xgetopt.Po@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-man1: $(man1_MANS) $(man_MANS)
+	@$(NORMAL_INSTALL)
+	test -z "$(man1dir)" || $(MKDIR_P) "$(DESTDIR)$(man1dir)"
+	@list='$(man1_MANS) $(dist_man1_MANS) $(nodist_man1_MANS)'; \
+	l2='$(man_MANS) $(dist_man_MANS) $(nodist_man_MANS)'; \
+	for i in $$l2; do \
+	  case "$$i" in \
+	    *.1*) list="$$list $$i" ;; \
+	  esac; \
+	done; \
+	for i in $$list; do \
+	  if test -f $(srcdir)/$$i; then file=$(srcdir)/$$i; \
+	  else file=$$i; fi; \
+	  ext=`echo $$i | sed -e 's/^.*\\.//'`; \
+	  case "$$ext" in \
+	    1*) ;; \
+	    *) ext='1' ;; \
+	  esac; \
+	  inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \
+	  inst=`echo $$inst | sed -e 's/^.*\///'`; \
+	  inst=`echo $$inst | sed '$(transform)'`.$$ext; \
+	  echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+	  $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst"; \
+	done
+uninstall-man1:
+	@$(NORMAL_UNINSTALL)
+	@list='$(man1_MANS) $(dist_man1_MANS) $(nodist_man1_MANS)'; \
+	l2='$(man_MANS) $(dist_man_MANS) $(nodist_man_MANS)'; \
+	for i in $$l2; do \
+	  case "$$i" in \
+	    *.1*) list="$$list $$i" ;; \
+	  esac; \
+	done; \
+	for i in $$list; do \
+	  ext=`echo $$i | sed -e 's/^.*\\.//'`; \
+	  case "$$ext" in \
+	    1*) ;; \
+	    *) ext='1' ;; \
+	  esac; \
+	  inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \
+	  inst=`echo $$inst | sed -e 's/^.*\///'`; \
+	  inst=`echo $$inst | sed '$(transform)'`.$$ext; \
+	  echo " rm -f '$(DESTDIR)$(man1dir)/$$inst'"; \
+	  rm -f "$(DESTDIR)$(man1dir)/$$inst"; \
+	done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(MANS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am: install-man
+
+install-dvi: install-dvi-am
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man: install-man1
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-man
+
+uninstall-man: uninstall-man1
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+	clean-generic clean-libtool ctags distclean distclean-compile \
+	distclean-generic distclean-libtool distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-binPROGRAMS install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-man1 install-pdf install-pdf-am install-ps \
+	install-ps-am install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+	uninstall-am uninstall-binPROGRAMS uninstall-man \
+	uninstall-man1
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/itufax.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/itufax.c
new file mode 100755
index 0000000000..79c7c4400a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/itufax.c
@@ -0,0 +1,138 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2003 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+#include "lcms.h"
+
+// This is a sample on how to build a profile for decoding ITU T.42/Fax JPEG
+// streams. The profile has an additional ability in the input direction of
+// gamut compress values between 85 < a < -85 and -75 < b < 125. This conforms
+// the default range for ITU/T.42 -- See RFC 2301, section 6.2.3 for details
+
+
+//  L*	=	[0, 100]
+//	a*	=	[�85, 85]
+//	b*	=	[�75, 125]
+
+
+// These functions does convert the encoding of ITUFAX to floating point
+
+static
+void ITU2Lab(WORD In[3], LPcmsCIELab Lab)
+{
+   Lab -> L = (double) In[0] / 655.35;
+   Lab -> a = (double) 170.* (In[1] - 32768.) / 65535.;
+   Lab -> b = (double) 200.* (In[2] - 24576.) / 65535.;
+}
+
+
+static
+void Lab2ITU(LPcmsCIELab Lab, WORD Out[3])
+{
+	Out[0] = (WORD) floor((double) (Lab -> L / 100.)* 65535. + 0.5);
+    Out[1] = (WORD) floor((double) (Lab -> a / 170.)* 65535. + 32768. + 0.5);
+    Out[2] = (WORD) floor((double) (Lab -> b / 200.)* 65535. + 24576. + 0.5);
+}
+
+
+// These are the samplers-- They are passed as callbacks to cmsSample3DGrid()
+// then, cmsSample3DGrid() will sweel whole Lab gamut calling these functions
+// once for each node. In[] will contain the Lab PCS value to convert to ITUFAX
+// on InputDirection, or the ITUFAX value to convert to Lab in OutputDirection
+// You can change the number of sample points if desired, the algorithm will
+// remain same. 33 points gives good accurancy, but you can reduce to 22 or less
+// is space is critical
+
+#define GRID_POINTS 33
+
+static
+int InputDirection(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	   
+    cmsCIELab Lab;
+
+    cmsLabEncoded2Float(&Lab, In);    
+    cmsClampLab(&Lab, 85, -85, 125, -75);    // This function does the necessary gamut remapping  
+    Lab2ITU(&Lab, Out);
+
+	return TRUE;
+}
+
+
+static
+int OutputDirection(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	
+
+	cmsCIELab Lab;
+
+    ITU2Lab(In, &Lab);
+    cmsFloat2LabEncoded(Out, &Lab);    
+
+	return TRUE;
+}
+
+
+// The main entry point. Just create a profile an populate it with required tags.
+// note that cmsOpenProfileFromFile("itufax.icm", "w") will NOT delete the file
+// if already exists. This is for obvious safety reasons.
+
+	
+int main(int argc, char *argv[])
+{
+	LPLUT AToB0, BToA0;
+	cmsHPROFILE hProfile;
+
+	fprintf(stderr, "Creating itufax.icm...");
+
+	unlink("itufax.icm");
+	hProfile = cmsOpenProfileFromFile("itufax.icm", "w");
+	
+    AToB0 = cmsAllocLUT();
+	BToA0 = cmsAllocLUT(); 
+
+	cmsAlloc3DGrid(AToB0, GRID_POINTS, 3, 3);
+	cmsAlloc3DGrid(BToA0, GRID_POINTS, 3, 3);
+    
+	cmsSample3DGrid(AToB0, InputDirection, NULL, 0);
+	cmsSample3DGrid(BToA0, OutputDirection, NULL, 0);
+		
+    cmsAddTag(hProfile, icSigAToB0Tag, AToB0);
+	cmsAddTag(hProfile, icSigBToA0Tag, BToA0);
+
+                                
+	cmsSetColorSpace(hProfile, icSigLabData);
+    cmsSetPCS(hProfile, icSigLabData);
+    cmsSetDeviceClass(hProfile, icSigColorSpaceClass);
+
+	cmsAddTag(hProfile, icSigProfileDescriptionTag, "ITU T.42/Fax JPEG CIEL*a*b*");
+    cmsAddTag(hProfile, icSigCopyrightTag,          "No Copyright, use freely.");
+    cmsAddTag(hProfile, icSigDeviceMfgDescTag,      "Little cms");    
+    cmsAddTag(hProfile, icSigDeviceModelDescTag,    "ITU T.42/Fax JPEG CIEL*a*b*");
+	
+	cmsCloseProfile(hProfile);
+    
+	cmsFreeLUT(AToB0);
+	cmsFreeLUT(BToA0);
+
+	fprintf(stderr, "Done.\n");
+
+	return 0;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/mkcmy.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/mkcmy.c
new file mode 100755
index 0000000000..1b0755f17b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/mkcmy.c
@@ -0,0 +1,170 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2003 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THIS SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
+// WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// IN NO EVENT SHALL MARTI MARIA BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
+// INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+// OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+// WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
+// LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+// OF THIS SOFTWARE.
+//
+// Version 1.12
+
+
+#include "lcms.h"
+
+
+typedef struct {
+				cmsHPROFILE   hLab;
+				cmsHPROFILE   hRGB;
+				cmsHTRANSFORM Lab2RGB;
+				cmsHTRANSFORM RGB2Lab;
+
+				} CARGO, FAR* LPCARGO;
+
+
+	 
+ 
+
+// Our space will be CIE primaries plus a gamma of 4.5
+
+static
+int Forward(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	
+	LPCARGO C = (LPCARGO) Cargo;	
+	WORD RGB[3];
+    cmsCIELab Lab;
+
+    cmsLabEncoded2Float(&Lab, In);
+
+	printf("%g %g %g\n", Lab.L, Lab.a, Lab.b);
+
+	cmsDoTransform(C ->Lab2RGB, In, &RGB, 1);
+
+
+	Out[0] = 0xFFFF - RGB[0]; // Our CMY is negative of RGB
+	Out[1] = 0xFFFF - RGB[1]; 
+	Out[2] = 0xFFFF - RGB[2]; 
+	
+	
+	return TRUE;
+
+}
+
+
+static
+int Reverse(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	
+
+	LPCARGO C = (LPCARGO) Cargo;	
+	WORD RGB[3];
+  
+	RGB[0] = 0xFFFF - In[0];
+	RGB[1] = 0xFFFF - In[1];
+	RGB[2] = 0xFFFF - In[2];
+
+	cmsDoTransform(C ->RGB2Lab, &RGB, Out, 1);
+	
+	return TRUE;
+
+}
+
+
+
+static
+void InitCargo(LPCARGO Cargo)
+{
+	
+
+	Cargo -> hLab = cmsCreateLabProfile(NULL);
+	Cargo -> hRGB = cmsCreate_sRGBProfile();  
+	
+	Cargo->Lab2RGB = cmsCreateTransform(Cargo->hLab, TYPE_Lab_16, 
+									    Cargo ->hRGB, TYPE_RGB_16,
+										INTENT_RELATIVE_COLORIMETRIC, 
+										cmsFLAGS_NOTPRECALC);
+
+	Cargo->RGB2Lab = cmsCreateTransform(Cargo ->hRGB, TYPE_RGB_16, 
+										Cargo ->hLab, TYPE_Lab_16, 
+										INTENT_RELATIVE_COLORIMETRIC, 
+										cmsFLAGS_NOTPRECALC);
+}
+
+
+
+
+static
+void FreeCargo(LPCARGO Cargo)
+{
+	cmsDeleteTransform(Cargo ->Lab2RGB);
+	cmsDeleteTransform(Cargo ->RGB2Lab);
+	cmsCloseProfile(Cargo ->hLab);
+	cmsCloseProfile(Cargo ->hRGB);
+}
+
+	
+	
+	
+int main(void)
+{
+	LPLUT AToB0, BToA0;	
+	CARGO Cargo;
+	cmsHPROFILE hProfile;
+	
+	fprintf(stderr, "Creating lcmscmy.icm...");	
+	
+	InitCargo(&Cargo);
+
+	hProfile = cmsCreateLabProfile(NULL);
+	
+
+    AToB0 = cmsAllocLUT();
+	BToA0 = cmsAllocLUT();
+
+	cmsAlloc3DGrid(AToB0, 25, 3, 3);
+	cmsAlloc3DGrid(BToA0, 25, 3, 3);
+	
+	
+	cmsSample3DGrid(AToB0, Reverse, &Cargo, 0);
+	cmsSample3DGrid(BToA0, Forward, &Cargo, 0);
+	
+	
+    cmsAddTag(hProfile, icSigAToB0Tag, AToB0);
+	cmsAddTag(hProfile, icSigBToA0Tag, BToA0);
+
+	cmsSetColorSpace(hProfile, icSigCmyData);
+	cmsSetDeviceClass(hProfile, icSigOutputClass);
+
+	cmsAddTag(hProfile, icSigProfileDescriptionTag, "CMY ");
+    cmsAddTag(hProfile, icSigCopyrightTag,          "Copyright (c) HP, 2007. All rights reserved.");
+    cmsAddTag(hProfile, icSigDeviceMfgDescTag,      "Little cms");    
+    cmsAddTag(hProfile, icSigDeviceModelDescTag,    "CMY space");
+
+	_cmsSaveProfile(hProfile, "lcmscmy.icm");
+	
+	
+	cmsFreeLUT(AToB0);
+	cmsFreeLUT(BToA0);
+	cmsCloseProfile(hProfile);	
+	FreeCargo(&Cargo);
+	fprintf(stderr, "Done.\n");
+
+
+
+	return 0;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/mkgrayer.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/mkgrayer.c
new file mode 100755
index 0000000000..46e9286189
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/mkgrayer.c
@@ -0,0 +1,93 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2003 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+#include "lcms.h"
+
+
+
+static
+int Forward(register WORD In[], register WORD Out[], register LPVOID Cargo)
+{	
+    cmsCIELab Lab;
+
+
+    cmsLabEncoded2Float(&Lab, In);
+
+	if (fabs(Lab.a) < 3 && fabs(Lab.b) < 3) {
+		
+		double L_01 = Lab.L / 100.0;
+	    WORD K;
+
+		if (L_01 > 1) L_01 = 1;
+		K = (WORD) floor(L_01* 65535.0 + 0.5);
+
+		Out[0] = Out[1] = Out[2] = K; 
+	}
+	else {
+		Out[0] = 0xFFFF; Out[1] = 0; Out[2] = 0; 
+	}
+
+	return TRUE;
+}
+
+
+
+
+	
+int main(int argc, char *argv[])
+{
+	LPLUT BToA0;
+	cmsHPROFILE hProfile;
+
+	fprintf(stderr, "Creating interpol2.icc...");
+
+	unlink("interpol2.icc");
+	hProfile = cmsOpenProfileFromFile("interpol2.icc", "w8");
+
+
+    BToA0 = cmsAllocLUT();
+
+	cmsAlloc3DGrid(BToA0, 17, 3, 3);
+	    
+	cmsSample3DGrid(BToA0, Forward, NULL, 0);
+			
+    cmsAddTag(hProfile, icSigBToA0Tag, BToA0);
+	                                
+	cmsSetColorSpace(hProfile, icSigRgbData);
+    cmsSetPCS(hProfile, icSigLabData);
+    cmsSetDeviceClass(hProfile, icSigOutputClass);
+
+	cmsAddTag(hProfile, icSigProfileDescriptionTag, "Interpolation test");
+    cmsAddTag(hProfile, icSigCopyrightTag,          "Copyright (c) HP 2007. All rights reserved.");
+    cmsAddTag(hProfile, icSigDeviceMfgDescTag,      "Little cms");    
+    cmsAddTag(hProfile, icSigDeviceModelDescTag,    "Interpolation test profile");
+
+	
+	cmsCloseProfile(hProfile);
+    
+	cmsFreeLUT(BToA0);
+	
+	fprintf(stderr, "Done.\n");
+
+	return 0;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/mktiff8.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/mktiff8.c
new file mode 100755
index 0000000000..ab0b66b8a0
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/mktiff8.c
@@ -0,0 +1,150 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2010 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+// Creates a devicelink that decodes TIFF8 Lab files 
+
+#include "lcms2.h"
+#include <stdlib.h>
+#include <math.h>
+
+static
+double DecodeAbTIFF(double ab)
+{
+	if (ab <= 128.)
+		ab += 127.;
+	else
+		ab -= 127.;
+
+	return ab;
+}
+
+static
+cmsToneCurve* CreateStep(void)
+{
+	cmsToneCurve* Gamma;
+	cmsUInt16Number* Table;
+	int i;
+	double a;
+
+	Table = calloc(4096, sizeof(cmsUInt16Number));
+	if (Table == NULL) return NULL;
+
+	for (i=0; i < 4096; i++) {
+
+		a = (double) i * 255. / 4095.;
+
+		a = DecodeAbTIFF(a);
+
+		Table[i] = (cmsUInt16Number) floor(a * 257. + 0.5);
+	}
+
+	Gamma = cmsBuildTabulatedToneCurve16(0, 4096, Table);
+	free(Table);
+
+	return Gamma;
+}
+
+
+static
+cmsToneCurve* CreateLinear(void)
+{
+	cmsUInt16Number Linear[2] = { 0, 0xffff };
+
+	return cmsBuildTabulatedToneCurve16(0, 2, Linear);          
+}
+
+
+
+// Set the copyright and description
+static
+cmsBool SetTextTags(cmsHPROFILE hProfile)
+{
+    cmsMLU *DescriptionMLU, *CopyrightMLU;
+    cmsBool  rc = FALSE;
+  
+    DescriptionMLU  = cmsMLUalloc(0, 1);
+    CopyrightMLU    = cmsMLUalloc(0, 1);
+
+    if (DescriptionMLU == NULL || CopyrightMLU == NULL) goto Error;
+
+    if (!cmsMLUsetASCII(DescriptionMLU,  "en", "US", "Little cms Tiff8 CIELab")) goto Error;
+    if (!cmsMLUsetASCII(CopyrightMLU,    "en", "US", "Copyright (c) Marti Maria, 2010. All rights reserved.")) goto Error;
+
+    if (!cmsWriteTag(hProfile, cmsSigProfileDescriptionTag,  DescriptionMLU)) goto Error;
+    if (!cmsWriteTag(hProfile, cmsSigCopyrightTag,           CopyrightMLU)) goto Error;     
+
+    rc = TRUE;
+
+Error:
+
+    if (DescriptionMLU)
+        cmsMLUfree(DescriptionMLU);
+    if (CopyrightMLU)
+        cmsMLUfree(CopyrightMLU);
+    return rc;
+}
+
+
+int main(int argc, char *argv[])
+{
+	cmsHPROFILE hProfile;
+	cmsPipeline *AToB0;
+	cmsToneCurve* PreLinear[3];
+	cmsToneCurve *Lin, *Step;
+
+	fprintf(stderr, "Creating lcmstiff8.icm...");
+    
+    remove("lcmstiff8.icm");
+	hProfile = cmsOpenProfileFromFile("lcmstiff8.icm", "w");
+
+	// Create linearization
+	Lin  = CreateLinear();
+	Step = CreateStep();
+
+	PreLinear[0] = Lin;
+	PreLinear[1] = Step;
+	PreLinear[2] = Step;
+
+    AToB0 = cmsPipelineAlloc(0, 3, 3);
+
+	cmsPipelineInsertStage(AToB0, 
+		cmsAT_BEGIN, cmsStageAllocToneCurves(0, 3, PreLinear));
+
+	cmsSetColorSpace(hProfile, cmsSigLabData);
+	cmsSetPCS(hProfile, cmsSigLabData);
+	cmsSetDeviceClass(hProfile, cmsSigLinkClass);
+	cmsSetProfileVersion(hProfile, 4.2);
+
+    cmsWriteTag(hProfile, cmsSigAToB0Tag, AToB0);
+	
+    SetTextTags(hProfile);
+
+	cmsCloseProfile(hProfile);
+
+	cmsFreeToneCurve(Lin);
+	cmsFreeToneCurve(Step);
+	cmsPipelineFree(AToB0);
+		
+	fprintf(stderr, "Done.\n");
+
+	return 0;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/roundtrip.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/roundtrip.c
new file mode 100755
index 0000000000..94c8bdc140
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/roundtrip.c
@@ -0,0 +1,99 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2011 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+
+#include "lcms2.h"
+#include <math.h>
+
+
+
+static
+double VecDist(cmsUInt8Number bin[3], cmsUInt8Number bout[3])
+{
+       double rdist, gdist, bdist;
+
+       rdist = fabs((double) bout[0] - bin[0]);
+       gdist = fabs((double) bout[1] - bin[1]);
+       bdist = fabs((double) bout[2] - bin[2]);
+
+       return (sqrt((rdist*rdist + gdist*gdist + bdist*bdist)));
+}
+
+
+int main(int  argc, char* argv[])
+{
+
+    int r, g, b;
+    cmsUInt8Number RGB[3], RGB_OUT[3];
+    cmsHTRANSFORM xform;
+    cmsHPROFILE hProfile;
+    double err, SumX=0, SumX2=0, Peak = 0, n = 0;
+
+
+    if (argc != 2) {
+        printf("roundtrip <RGB icc profile>\n");
+        return 1;
+    }
+
+    hProfile = cmsOpenProfileFromFile(argv[1], "r");
+    if (hProfile == NULL)
+    {
+        printf("invalid profile\n");
+        return 1;
+    }
+
+    xform = cmsCreateTransform(hProfile,TYPE_RGB_8, hProfile, TYPE_RGB_8, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOOPTIMIZE);
+    if (xform == NULL)
+    {
+        printf("Not a valid RGB profile\n");
+        return 1;
+    }
+
+    for (r=0; r< 256; r++) {
+        printf("%d  \r", r);
+        for (g=0; g < 256; g++) {
+            for (b=0; b < 256; b++) {
+
+                RGB[0] = r;
+                RGB[1] = g;
+                RGB[2] = b;
+
+                cmsDoTransform(xform, RGB, RGB_OUT, 1);
+
+                err = VecDist(RGB, RGB_OUT);
+
+                SumX  += err;
+                SumX2 += err * err;
+                n += 1.0;
+                if (err > Peak)
+                    Peak = err;
+
+            }
+        }
+    }
+
+    printf("Average %g\n", SumX / n);
+    printf("Max %g\n", Peak);
+    printf("Std  %g\n", sqrt((n*SumX2 - SumX * SumX) / (n*(n-1))));
+    cmsCloseProfile(hProfile);
+    cmsDeleteTransform(xform);
+
+    return 0;
+}
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/vericc.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/vericc.c
new file mode 100755
index 0000000000..9ac94a76c1
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/vericc.c
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2010 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "lcms2.h"
+#include <string.h>
+#include <math.h>
+
+static
+int PrintUsage(void)
+{
+	fprintf(stderr, "Sets profile version\n\nUsage: vericc --r<version> iccprofile.icc\n"); 
+	return 0; 
+}
+
+int main(int argc, char *argv[])
+{
+       cmsHPROFILE hProfile;
+	   char* ptr;
+	   cmsFloat64Number Version;
+
+	   if (argc != 3)  return PrintUsage();
+
+	   ptr = argv[1];
+	   if (strncmp(ptr, "--r", 3) != 0) return PrintUsage();
+	   ptr += 3;
+	   if (!*ptr) { fprintf(stderr, "Wrong version number\n"); return 1; }
+
+	   Version = atof(ptr); 
+
+	   hProfile = cmsOpenProfileFromFile(argv[2], "r");
+	   if (hProfile == NULL) { fprintf(stderr, "'%s': cannot open\n", argv[2]); return 1; }
+
+	   cmsSetProfileVersion(hProfile, Version);
+	   cmsSaveProfileToFile(hProfile, "$$tmp.icc");
+	   cmsCloseProfile(hProfile);
+
+	   remove(argv[2]);
+	   rename("$$tmp.icc", argv[2]);
+	   return 0;
+
+
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/wtpt.1 b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/wtpt.1
new file mode 100755
index 0000000000..fbd37ac2b1
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/wtpt.1
@@ -0,0 +1,28 @@
+.\"Shiju P. Nair September 30, 2004
+.TH WTPT 1 "September 30, 2004"
+.SH NAME 
+wtpt - Show media white of profiles, identifying black body locus.
+.SH SYNOPSIS
+.B wtpt
+.RI [ profile ]
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B wtpt 
+shows media white of profiles, identifying black body locus.
+.P
+If no parameters are given, then this program will
+ask for XYZ value of media white. If parameter given, it must be
+the profile to inspect.
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com
+.SH SEE ALSO
+.BR jpegicc (1),
+.BR tifficc (1),
+.BR icc2ps (1),
+.BR icclink (1),
+.BR icctrans (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/wtpt.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/wtpt.c
new file mode 100755
index 0000000000..45602f796a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/samples/wtpt.c
@@ -0,0 +1,144 @@
+//
+//  Little cms
+//  Copyright (C) 1998-2015 Marti Maria
+//
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2014 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+#include "utils.h"
+
+
+// The toggles stuff
+
+static cmsBool lShowXYZ = TRUE;
+static cmsBool lShowLab = FALSE;
+static cmsBool lShowLCh = FALSE;
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+       int s;
+
+       while ((s = xgetopt(argc, argv, "lcx")) != EOF) {
+
+              switch (s){
+
+
+              case 'l':
+                     lShowLab = TRUE;
+                     break;
+
+              case 'c':
+                     lShowLCh = TRUE;
+                     break;
+
+              case 'x':
+                     lShowXYZ = FALSE;
+                     break;
+
+              default:
+
+                     FatalError("Unknown option - run without args to see valid ones.\n");
+              }
+       }
+}
+
+static
+void Help(void)
+{
+       fprintf(stderr, "little CMS ICC white point utility - v3 [LittleCMS %2.2f]\n", LCMS_VERSION / 1000.0);
+
+       fprintf(stderr, "usage: wtpt [flags] [<ICC profile>]\n\n");
+
+       fprintf(stderr, "flags:\n\n");
+       
+       fprintf(stderr, "%cl - CIE Lab\n", SW);
+       fprintf(stderr, "%cc - CIE LCh\n", SW);
+       fprintf(stderr, "%cx - Don't show XYZ\n", SW);
+
+       fprintf(stderr, "\nIf no parameters are given, then this program will\n");
+       fprintf(stderr, "ask for XYZ value of media white. If parameter given, it must be\n");
+       fprintf(stderr, "the profile to inspect.\n\n");
+
+       fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+              "engine. Both lcms and this program are freeware. You can\n"
+              "obtain both in source code at http://www.littlecms.com\n"
+              "For suggestions, comments, bug reports etc. send mail to\n"
+              "info@littlecms.com\n\n");
+       exit(0);
+}
+
+
+
+static
+void ShowWhitePoint(cmsCIEXYZ* WtPt)
+{
+       cmsCIELab Lab;
+       cmsCIELCh LCh;
+       cmsCIExyY xyY;
+
+
+       cmsXYZ2Lab(NULL, &Lab, WtPt);
+       cmsLab2LCh(&LCh, &Lab);
+       cmsXYZ2xyY(&xyY, WtPt);
+
+
+       if (lShowXYZ) printf("XYZ=(%3.1f, %3.1f, %3.1f)\n", WtPt->X, WtPt->Y, WtPt->Z);
+       if (lShowLab) printf("Lab=(%3.3f, %3.3f, %3.3f)\n", Lab.L, Lab.a, Lab.b);
+       if (lShowLCh) printf("LCh=(%3.3f, %3.3f, %3.3f)\n", LCh.L, LCh.C, LCh.h);
+       {
+              double Ssens = (LCh.C * 100.0 )/ sqrt(LCh.C*LCh.C + LCh.L * LCh.L) ;
+              printf("Sens = %f\n", Ssens);
+       }
+
+}
+
+
+int main(int argc, char *argv[])
+{
+       int nargs;
+
+       InitUtils("wtpt");
+       
+       HandleSwitches(argc, argv);
+
+       nargs = (argc - xoptind);
+
+       if (nargs != 1)
+              Help();
+
+       else {
+              cmsCIEXYZ* WtPt;
+              cmsHPROFILE hProfile = cmsOpenProfileFromFile(argv[xoptind], "r");  
+              if (hProfile == NULL) return 1;
+
+              WtPt = cmsReadTag(hProfile, cmsSigMediaWhitePointTag);
+              ShowWhitePoint(WtPt);
+              cmsCloseProfile(hProfile);
+       }
+       
+       return 0;
+}
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/Makefile.am b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/Makefile.am
new file mode 100755
index 0000000000..50f5dc441e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/Makefile.am
@@ -0,0 +1,25 @@
+#
+# Makefile for building tificc
+# Originally written by Bob Friesenhahn, June 2003
+# bugs introduced by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+                
+
+if HasTIFF
+bin_PROGRAMS = tificc
+else
+bin_PROGRAMS =
+endif
+
+tificc_LDADD = $(top_builddir)/src/liblcms2.la @TIFFICC_DEPLIBS@
+tificc_LDFLAGS = @LDFLAGS@
+tificc_SOURCES = tificc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+man_MANS = tificc.1
+
+
+EXTRA_DIST = $(man_MANS)
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/Makefile.in b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/Makefile.in
new file mode 100755
index 0000000000..de4d9c2964
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/Makefile.in
@@ -0,0 +1,739 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building tificc
+# Originally written by Bob Friesenhahn, June 2003
+# bugs introduced by Marti Maria 
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@HasTIFF_TRUE@bin_PROGRAMS = tificc$(EXEEXT)
+subdir = utils/tificc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_tificc_OBJECTS = tificc.$(OBJEXT) ../common/xgetopt.$(OBJEXT) \
+	../common/vprf.$(OBJEXT)
+tificc_OBJECTS = $(am_tificc_OBJECTS)
+tificc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+tificc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(tificc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(tificc_SOURCES)
+DIST_SOURCES = $(tificc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+man1dir = $(mandir)/man1
+NROFF = nroff
+MANS = $(man_MANS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+              -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+tificc_LDADD = $(top_builddir)/src/liblcms2.la @TIFFICC_DEPLIBS@
+tificc_LDFLAGS = @LDFLAGS@
+tificc_SOURCES = tificc.c ../common/xgetopt.c ../common/vprf.c ../common/utils.h
+man_MANS = tificc.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/tificc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/tificc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+tificc$(EXEEXT): $(tificc_OBJECTS) $(tificc_DEPENDENCIES) $(EXTRA_tificc_DEPENDENCIES) 
+	@rm -f tificc$(EXEEXT)
+	$(AM_V_CCLD)$(tificc_LINK) $(tificc_OBJECTS) $(tificc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-man1: $(man_MANS)
+	@$(NORMAL_INSTALL)
+	@list1=''; \
+	list2='$(man_MANS)'; \
+	test -n "$(man1dir)" \
+	  && test -n "`echo $$list1$$list2`" \
+	  || exit 0; \
+	echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \
+	$(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \
+	{ for i in $$list1; do echo "$$i"; done;  \
+	if test -n "$$list2"; then \
+	  for i in $$list2; do echo "$$i"; done \
+	    | sed -n '/\.1[a-z]*$$/p'; \
+	fi; \
+	} | while read p; do \
+	  if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; echo "$$p"; \
+	done | \
+	sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+	sed 'N;N;s,\n, ,g' | { \
+	list=; while read file base inst; do \
+	  if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+	    echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+	    $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
+	  fi; \
+	done; \
+	for i in $$list; do echo "$$i"; done | $(am__base_list) | \
+	while read files; do \
+	  test -z "$$files" || { \
+	    echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
+	    $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
+	done; }
+
+uninstall-man1:
+	@$(NORMAL_UNINSTALL)
+	@list=''; test -n "$(man1dir)" || exit 0; \
+	files=`{ for i in $$list; do echo "$$i"; done; \
+	l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \
+	  sed -n '/\.1[a-z]*$$/p'; \
+	} | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+	dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(MANS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-man
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man: install-man1
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-man
+
+uninstall-man: uninstall-man1
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-man1 \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+	pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-binPROGRAMS uninstall-man uninstall-man1
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/tifdiff.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/tifdiff.c
new file mode 100755
index 0000000000..b00343062e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/tifdiff.c
@@ -0,0 +1,708 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "utils.h"
+#include "tiffio.h"
+
+
+// ------------------------------------------------------------------------
+
+static TIFF *Tiff1, *Tiff2, *TiffDiff;
+static const char* TiffDiffFilename;
+static const char* CGATSout;
+
+typedef struct {
+                double  n, x, x2;                    
+                double  Min, Peak;   
+
+    } STAT, *LPSTAT;
+
+
+static STAT ColorantStat[4];
+static STAT EuclideanStat;
+static STAT ColorimetricStat;
+
+static uint16 Channels; 
+
+static cmsHPROFILE hLab;
+
+
+static
+void ConsoleWarningHandler(const char* module, const char* fmt, va_list ap)
+{
+        char e[512] = { '\0' };
+        if (module != NULL)
+              strcat(strcpy(e, module), ": ");
+
+        vsprintf(e+strlen(e), fmt, ap);
+        strcat(e, ".");
+        if (Verbose) {
+
+              fprintf(stderr, "\nWarning");
+              fprintf(stderr, " %s\n", e);
+              fflush(stderr);
+              }
+}
+
+static
+void ConsoleErrorHandler(const char* module, const char* fmt, va_list ap)
+{
+       char e[512] = { '\0' };
+
+       if (module != NULL)
+              strcat(strcpy(e, module), ": ");
+
+       vsprintf(e+strlen(e), fmt, ap);
+       strcat(e, ".");
+       fprintf(stderr, "\nError");
+       fprintf(stderr, " %s\n", e);
+       fflush(stderr);
+}
+
+
+
+static
+void Help()
+{
+    fprintf(stderr, "Little cms TIFF compare utility. v1.0\n\n");
+
+    fprintf(stderr, "usage: tiffdiff [flags] input.tif output.tif\n");
+
+    fprintf(stderr, "\nflags:\n\n");
+
+
+    fprintf(stderr, "%co<tiff>   - Output TIFF file\n", SW);   
+    fprintf(stderr, "%cg<CGATS>  - Output results in CGATS file\n", SW);       
+    
+    fprintf(stderr, "\n");
+
+    fprintf(stderr, "%cv - Verbose (show warnings)\n", SW);
+    fprintf(stderr, "%ch - This help\n", SW);
+
+
+    fflush(stderr);
+    exit(0);
+}
+
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+       int s;
+      
+       while ((s=xgetopt(argc,argv,"o:O:hHvVg:G:")) != EOF) {
+
+       switch (s) {
+
+
+       case 'v':
+       case 'V':
+            Verbose = TRUE;
+            break;
+
+       case 'o':
+       case 'O':           
+           TiffDiffFilename  = xoptarg;
+           break;
+
+                
+        case 'H':
+        case 'h':             
+            Help();            
+            break;
+
+        case 'g':
+        case 'G':
+            CGATSout = xoptarg;
+            break;
+
+  default:
+
+       FatalError("Unknown option - run without args to see valid ones");
+    }       
+    }
+}
+
+
+static
+void ClearStatistics(LPSTAT st) 
+{
+
+    st ->n = st ->x = st->x2 = st->Peak = 0;    
+    st ->Min = 1E10;     
+    
+}
+
+
+static
+void AddOnePixel(LPSTAT st, double dE) 
+{ 
+    
+    st-> x += dE; st ->x2 += (dE * dE); st->n  += 1.0; 
+    if (dE > st ->Peak) st ->Peak = dE;
+    if (dE < st ->Min)  st ->Min= dE;    
+} 
+
+static    
+double Std(LPSTAT st)  
+{ 
+    return sqrt((st->n * st->x2 - st->x * st->x) / (st->n*(st->n-1))); 
+}
+    
+static
+double Mean(LPSTAT st) 
+{ 
+    return st ->x/st ->n; 
+}
+
+
+// Build up the pixeltype descriptor
+
+static
+cmsUInt32Number GetInputPixelType(TIFF *Bank)
+{
+     uint16 Photometric, bps, spp, extra, PlanarConfig, *info;
+     uint16 Compression, reverse = 0;
+     int ColorChannels, IsPlanar = 0, pt = 0;
+
+     TIFFGetField(Bank,           TIFFTAG_PHOTOMETRIC,   &Photometric);
+     TIFFGetFieldDefaulted(Bank,  TIFFTAG_BITSPERSAMPLE, &bps);
+
+     if (bps == 1)
+       FatalError("Sorry, bilevel TIFFs has nothig to do with ICC profiles");
+
+     if (bps != 8 && bps != 16)
+              FatalError("Sorry, 8 or 16 bits per sample only");
+
+     TIFFGetFieldDefaulted(Bank, TIFFTAG_SAMPLESPERPIXEL, &spp);
+     TIFFGetFieldDefaulted(Bank, TIFFTAG_PLANARCONFIG, &PlanarConfig);
+
+     switch (PlanarConfig)
+     {
+     case PLANARCONFIG_CONTIG: IsPlanar = 0; break;
+     case PLANARCONFIG_SEPARATE: FatalError("Planar TIFF are not supported");
+     default:
+
+     FatalError("Unsupported planar configuration (=%d) ", (int) PlanarConfig);
+     }
+
+     // If Samples per pixel == 1, PlanarConfiguration is irrelevant and need
+     // not to be included.
+
+     if (spp == 1) IsPlanar = 0;
+
+
+     // Any alpha?
+
+     TIFFGetFieldDefaulted(Bank, TIFFTAG_EXTRASAMPLES, &extra, &info);
+
+     
+     ColorChannels = spp - extra;
+
+     switch (Photometric) {
+
+     case PHOTOMETRIC_MINISWHITE:
+                                   
+            reverse = 1;
+
+     case PHOTOMETRIC_MINISBLACK:
+                                   
+            pt = PT_GRAY;                                
+            break;
+
+     case PHOTOMETRIC_RGB:
+                                   
+            pt = PT_RGB;
+            break;
+
+
+     case PHOTOMETRIC_PALETTE:
+                                             
+            FatalError("Sorry, palette images not supported (at least on this version)"); 
+
+     case PHOTOMETRIC_SEPARATED:
+           pt = PixelTypeFromChanCount(ColorChannels);
+           break;
+
+     case PHOTOMETRIC_YCBCR:
+           TIFFGetField(Bank, TIFFTAG_COMPRESSION, &Compression);
+           {
+                  uint16 subx, suby;
+
+                  pt = PT_YCbCr;
+                  TIFFGetFieldDefaulted(Bank, TIFFTAG_YCBCRSUBSAMPLING, &subx, &suby);
+                  if (subx != 1 || suby != 1)
+                         FatalError("Sorry, subsampled images not supported");
+
+           }
+           break;
+
+     case 9:
+     case PHOTOMETRIC_CIELAB:
+           pt = PT_Lab;
+           break;
+
+    
+     case PHOTOMETRIC_LOGLUV:      /* CIE Log2(L) (u',v') */
+
+           TIFFSetField(Bank, TIFFTAG_SGILOGDATAFMT, SGILOGDATAFMT_16BIT);
+           pt = PT_YUV;             // *ICCSpace = icSigLuvData;
+           bps = 16;               // 16 bits forced by LibTiff
+           break;
+
+     default:
+           FatalError("Unsupported TIFF color space (Photometric %d)", Photometric);
+     }
+
+     // Convert bits per sample to bytes per sample
+
+     bps >>= 3; 
+
+     return (COLORSPACE_SH(pt)|PLANAR_SH(IsPlanar)|EXTRA_SH(extra)|CHANNELS_SH(ColorChannels)|BYTES_SH(bps)|FLAVOR_SH(reverse));
+}
+
+
+
+static
+cmsUInt32Number OpenEmbedded(TIFF* tiff, cmsHPROFILE* PtrProfile, cmsHTRANSFORM* PtrXform)
+{
+
+    cmsUInt32Number EmbedLen, dwFormat = 0;
+    cmsUInt8Number* EmbedBuffer;
+    
+    *PtrProfile = NULL;
+    *PtrXform   = NULL;
+
+    if (TIFFGetField(tiff, TIFFTAG_ICCPROFILE, &EmbedLen, &EmbedBuffer)) {
+
+              *PtrProfile = cmsOpenProfileFromMem(EmbedBuffer, EmbedLen);
+   
+              if (Verbose) {
+                  
+				  fprintf(stdout, "Embedded profile found:\n");                          
+				  PrintProfileInformation(*PtrProfile);
+                  
+              }
+
+              dwFormat  = GetInputPixelType(tiff);
+              *PtrXform = cmsCreateTransform(*PtrProfile, dwFormat, 
+                                          hLab, TYPE_Lab_DBL, INTENT_RELATIVE_COLORIMETRIC, 0);
+
+      }
+
+    return dwFormat;
+}
+
+
+static
+size_t PixelSize(cmsUInt32Number dwFormat)
+{
+    return T_BYTES(dwFormat) * (T_CHANNELS(dwFormat) + T_EXTRA(dwFormat));
+}
+
+
+static
+int CmpImages(TIFF* tiff1, TIFF* tiff2, TIFF* diff)
+{
+    cmsUInt8Number* buf1, *buf2, *buf3=NULL;
+    int row, cols, imagewidth = 0, imagelength = 0;
+    uint16   Photometric;
+    double dE = 0;    
+    double dR, dG, dB, dC, dM, dY, dK;
+    int rc = 0;
+    cmsHPROFILE hProfile1 = 0, hProfile2 = 0;
+    cmsHTRANSFORM xform1 = 0, xform2 = 0;
+    cmsUInt32Number dwFormat1, dwFormat2;
+    
+
+
+      TIFFGetField(tiff1, TIFFTAG_PHOTOMETRIC, &Photometric);
+      TIFFGetField(tiff1, TIFFTAG_IMAGEWIDTH,  &imagewidth);
+      TIFFGetField(tiff1, TIFFTAG_IMAGELENGTH, &imagelength);
+      TIFFGetField(tiff1, TIFFTAG_SAMPLESPERPIXEL, &Channels);
+      
+      dwFormat1 = OpenEmbedded(tiff1, &hProfile1, &xform1);
+      dwFormat2 = OpenEmbedded(tiff2, &hProfile2, &xform2);
+    
+      
+      
+      buf1 = (cmsUInt8Number*)_TIFFmalloc(TIFFScanlineSize(tiff1));
+      buf2 = (cmsUInt8Number*)_TIFFmalloc(TIFFScanlineSize(tiff2));
+    
+      if (diff) {
+                    
+           TIFFSetField(diff, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK);
+           TIFFSetField(diff, TIFFTAG_COMPRESSION, COMPRESSION_NONE);
+           TIFFSetField(diff, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); 
+
+           TIFFSetField(diff, TIFFTAG_IMAGEWIDTH,  imagewidth);
+           TIFFSetField(diff, TIFFTAG_IMAGELENGTH, imagelength);
+
+           TIFFSetField(diff, TIFFTAG_SAMPLESPERPIXEL, 1);
+           TIFFSetField(diff, TIFFTAG_BITSPERSAMPLE, 8);
+                      
+           buf3 = (cmsUInt8Number*)_TIFFmalloc(TIFFScanlineSize(diff));                        
+      }
+      
+
+
+      for (row = 0; row < imagelength; row++) {
+
+        if (TIFFReadScanline(tiff1, buf1, row, 0) < 0) goto Error;
+        if (TIFFReadScanline(tiff2, buf2, row, 0) < 0) goto Error;                  
+
+                
+        for (cols = 0; cols < imagewidth; cols++) {
+
+              
+            switch (Photometric) {
+
+            case PHOTOMETRIC_MINISWHITE:
+            case PHOTOMETRIC_MINISBLACK:
+
+                    dE = fabs(buf2[cols] - buf1[cols]); 
+                
+                    AddOnePixel(&ColorantStat[0], dE);
+                    AddOnePixel(&EuclideanStat, dE);
+                    break;
+
+            case PHOTOMETRIC_RGB:
+                
+                    {
+                        int index = 3 * cols;
+
+                        dR = fabs(buf2[index+0] - buf1[index+0]); 
+                        dG = fabs(buf2[index+1] - buf1[index+1]); 
+                        dB = fabs(buf2[index+2] - buf1[index+2]); 
+
+                        dE = sqrt(dR * dR + dG * dG + dB * dB) / sqrt(3.);
+                    }
+
+                    AddOnePixel(&ColorantStat[0], dR);
+                    AddOnePixel(&ColorantStat[1], dG);
+                    AddOnePixel(&ColorantStat[2], dB);
+                    AddOnePixel(&EuclideanStat,   dE);
+                    break;
+
+            case PHOTOMETRIC_SEPARATED:
+                
+                {
+                        int index = 4 * cols;
+
+                        dC = fabs(buf2[index+0] - buf1[index+0]); 
+                        dM = fabs(buf2[index+1] - buf1[index+1]); 
+                        dY = fabs(buf2[index+2] - buf1[index+2]); 
+                        dK = fabs(buf2[index+3] - buf1[index+3]); 
+
+                        dE = sqrt(dC * dC + dM * dM + dY * dY + dK * dK) / 2.;
+                    }
+                    AddOnePixel(&ColorantStat[0], dC);
+                    AddOnePixel(&ColorantStat[1], dM);
+                    AddOnePixel(&ColorantStat[2], dY);
+                    AddOnePixel(&ColorantStat[3], dK);
+                    AddOnePixel(&EuclideanStat,   dE);
+                    break;
+            
+            default:
+                    FatalError("Unsupported channels: %d", Channels);                 
+            }
+
+            
+            if (xform1 && xform2) {
+
+    
+                cmsCIELab Lab1, Lab2;
+                size_t index1 = cols * PixelSize(dwFormat1);
+                size_t index2 = cols * PixelSize(dwFormat2);
+
+                cmsDoTransform(xform1, &buf1[index1], &Lab1,  1);
+                cmsDoTransform(xform2, &buf2[index2], &Lab2,  1);
+
+                dE = cmsDeltaE(&Lab1, &Lab2);               
+                AddOnePixel(&ColorimetricStat, dE);
+            }
+
+
+            if (diff) {
+                buf3[cols] = (cmsUInt8Number) floor(dE + 0.5);
+        }
+
+        }
+
+        if (diff) {
+
+                if (TIFFWriteScanline(diff, buf3, row, 0) < 0) goto Error;
+        }
+        
+
+      }
+
+     rc = 1;
+
+Error:
+         
+     if (hProfile1) cmsCloseProfile(hProfile1);
+     if (hProfile2) cmsCloseProfile(hProfile2);
+     if (xform1) cmsDeleteTransform(xform1);
+     if (xform2) cmsDeleteTransform(xform2);
+      _TIFFfree(buf1); _TIFFfree(buf2); 
+      if (diff) {
+           TIFFWriteDirectory(diff);
+          if (buf3 != NULL) _TIFFfree(buf3);
+      }
+      return rc;
+}
+
+
+static
+void AssureShortTagIs(TIFF* tif1, TIFF* tiff2, int tag, int Val, const char* Error)
+{
+        uint16 v1;
+
+        
+        if (!TIFFGetField(tif1, tag, &v1)) goto Err;
+        if (v1 != Val) goto Err;
+
+        if (!TIFFGetField(tiff2, tag, &v1)) goto Err;
+        if (v1 != Val) goto Err;
+
+        return;
+Err:
+        FatalError("%s is not proper", Error);
+}
+
+
+static
+int CmpShortTag(TIFF* tif1, TIFF* tif2, int tag)
+{
+        uint16 v1, v2;
+
+        if (!TIFFGetField(tif1, tag, &v1)) return 0;
+        if (!TIFFGetField(tif2, tag, &v2)) return 0;
+
+        return v1 == v2;
+}
+
+static
+int CmpLongTag(TIFF* tif1, TIFF* tif2, int tag)
+{
+        uint32 v1, v2;
+
+        if (!TIFFGetField(tif1, tag, &v1)) return 0;
+        if (!TIFFGetField(tif2, tag, &v2)) return 0;
+
+        return v1 == v2;
+}
+
+
+static
+void EqualShortTag(TIFF* tif1, TIFF* tif2, int tag, const char* Error)
+{
+    if (!CmpShortTag(tif1, tif2, tag))
+        FatalError("%s is different", Error);
+}
+
+
+
+static
+void EqualLongTag(TIFF* tif1, TIFF* tif2, int tag, const char* Error)
+{
+    if (!CmpLongTag(tif1, tif2, tag))
+        FatalError("%s is different", Error);
+}
+
+
+
+static
+void AddOneCGATSRow(cmsHANDLE hIT8, char *Name, LPSTAT st)
+{
+
+    double Per100 = 100.0 * ((255.0 - Mean(st)) / 255.0);
+
+    cmsIT8SetData(hIT8,    Name, "SAMPLE_ID", Name);
+    cmsIT8SetDataDbl(hIT8, Name, "PER100_EQUAL", Per100);
+    cmsIT8SetDataDbl(hIT8, Name, "MEAN_DE", Mean(st));
+    cmsIT8SetDataDbl(hIT8, Name, "STDEV_DE", Std(st));
+    cmsIT8SetDataDbl(hIT8, Name, "MIN_DE", st ->Min);
+    cmsIT8SetDataDbl(hIT8, Name, "MAX_DE", st ->Peak);
+
+}
+
+
+static
+void CreateCGATS(const char* TiffName1, const char* TiffName2)
+{
+    cmsHANDLE hIT8 = cmsIT8Alloc(0);
+    time_t ltime;
+    char Buffer[256];
+
+    cmsIT8SetSheetType(hIT8, "TIFFDIFF");
+    
+   
+    sprintf(Buffer, "Differences between %s and %s", TiffName1, TiffName2);
+  
+    cmsIT8SetComment(hIT8, Buffer);
+
+    cmsIT8SetPropertyStr(hIT8, "ORIGINATOR", "TIFFDIFF");
+    time( &ltime );
+    strcpy(Buffer, ctime(&ltime));
+    Buffer[strlen(Buffer)-1] = 0;     // Remove the nasty "\n"
+
+    cmsIT8SetPropertyStr(hIT8, "CREATED", Buffer);
+
+    cmsIT8SetComment(hIT8, " ");
+
+    cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_FIELDS", 6);
+    
+    
+    cmsIT8SetDataFormat(hIT8, 0, "SAMPLE_ID");
+    cmsIT8SetDataFormat(hIT8, 1, "PER100_EQUAL");
+    cmsIT8SetDataFormat(hIT8, 2, "MEAN_DE");
+    cmsIT8SetDataFormat(hIT8, 3, "STDEV_DE");
+    cmsIT8SetDataFormat(hIT8, 4, "MIN_DE");
+    cmsIT8SetDataFormat(hIT8, 5, "MAX_DE");
+
+       
+    switch (Channels) {
+
+    case 1:
+            cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_SETS", 3);
+            AddOneCGATSRow(hIT8, "GRAY_PLANE", &ColorantStat[0]);            
+            break;
+
+    case 3:
+            cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_SETS", 5);
+            AddOneCGATSRow(hIT8, "R_PLANE", &ColorantStat[0]);            
+            AddOneCGATSRow(hIT8, "G_PLANE", &ColorantStat[1]);            
+            AddOneCGATSRow(hIT8, "B_PLANE", &ColorantStat[2]);            
+            break;
+            
+            
+    case 4:
+            cmsIT8SetPropertyDbl(hIT8, "NUMBER_OF_SETS", 6);
+            AddOneCGATSRow(hIT8, "C_PLANE", &ColorantStat[0]);            
+            AddOneCGATSRow(hIT8, "M_PLANE", &ColorantStat[1]);            
+            AddOneCGATSRow(hIT8, "Y_PLANE", &ColorantStat[2]);            
+            AddOneCGATSRow(hIT8, "K_PLANE", &ColorantStat[3]);            
+            break;
+            
+    default: FatalError("Internal error: Bad ColorSpace");
+
+    }
+
+    AddOneCGATSRow(hIT8, "EUCLIDEAN",    &EuclideanStat);    
+    AddOneCGATSRow(hIT8, "COLORIMETRIC", &ColorimetricStat);    
+
+    cmsIT8SaveToFile(hIT8, CGATSout);
+    cmsIT8Free(hIT8);
+}
+
+int main(int argc, char* argv[])
+{
+      int i;
+
+      Tiff1 = Tiff2 = TiffDiff = NULL;
+
+	  InitUtils("tiffdiff");
+
+      HandleSwitches(argc, argv);
+
+      if ((argc - xoptind) != 2) {
+
+              Help();              
+              }
+            
+      TIFFSetErrorHandler(ConsoleErrorHandler);
+      TIFFSetWarningHandler(ConsoleWarningHandler);
+
+      Tiff1 = TIFFOpen(argv[xoptind], "r");
+      if (Tiff1 == NULL) FatalError("Unable to open '%s'", argv[xoptind]);
+
+      Tiff2 = TIFFOpen(argv[xoptind+1], "r");
+      if (Tiff2 == NULL) FatalError("Unable to open '%s'", argv[xoptind+1]);
+             
+      if (TiffDiffFilename) {
+
+          TiffDiff = TIFFOpen(TiffDiffFilename, "w");
+          if (TiffDiff == NULL) FatalError("Unable to create '%s'", TiffDiffFilename);
+
+      }
+
+ 
+      AssureShortTagIs(Tiff1, Tiff2, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG, "Planar Config");
+      AssureShortTagIs(Tiff1, Tiff2, TIFFTAG_BITSPERSAMPLE, 8, "8 bit per sample");
+
+      EqualLongTag(Tiff1, Tiff2, TIFFTAG_IMAGEWIDTH,  "Image width");
+      EqualLongTag(Tiff1, Tiff2, TIFFTAG_IMAGELENGTH, "Image length");
+      
+      EqualShortTag(Tiff1, Tiff2, TIFFTAG_SAMPLESPERPIXEL, "Samples per pixel");
+
+
+      hLab = cmsCreateLab4Profile(NULL);
+
+      ClearStatistics(&EuclideanStat);
+      for (i=0; i < 4; i++)
+            ClearStatistics(&ColorantStat[i]);
+
+      if (!CmpImages(Tiff1, Tiff2, TiffDiff))
+                FatalError("Error comparing images");
+
+      if (CGATSout) {
+            CreateCGATS(argv[xoptind], argv[xoptind+1]);
+      }
+      else {
+
+        double  Per100 = 100.0 * ((255.0 - Mean(&EuclideanStat)) / 255.0);
+
+        printf("Digital counts  %g%% equal. mean %g, min %g, max %g, Std %g\n", Per100, Mean(&EuclideanStat), 
+                                                                                EuclideanStat.Min, 
+                                                                                EuclideanStat.Peak, 
+                                                                                Std(&EuclideanStat));
+
+        if (ColorimetricStat.n > 0) {
+
+            Per100 = 100.0 * ((255.0 - Mean(&ColorimetricStat)) / 255.0);
+
+            printf("dE Colorimetric %g%% equal. mean %g, min %g, max %g, Std %g\n", Per100, Mean(&ColorimetricStat), 
+                                                                                    ColorimetricStat.Min, 
+                                                                                    ColorimetricStat.Peak, 
+                                                                                    Std(&ColorimetricStat));
+        }
+      
+      }
+
+      if (hLab)     cmsCloseProfile(hLab);
+      if (Tiff1)    TIFFClose(Tiff1);
+      if (Tiff2)    TIFFClose(Tiff2);      
+      if (TiffDiff) TIFFClose(TiffDiff);
+
+      return 0;
+}
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/tificc.1 b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/tificc.1
new file mode 100755
index 0000000000..9af0d8688c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/tificc.1
@@ -0,0 +1,117 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH TIFICC 1 "October 23, 2004"
+.SH NAME
+tificc - little cms ICC profile applier for TIFF.
+.SH SYNOPSIS
+.B tificc
+.RI [ options ] " input.tif output.tif"
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B tificc
+is a little cms ICC profile applier for TIFF.
+.SH OPTIONS
+.TP
+.B \-a
+Handle channels > 4 as alpha.
+.TP
+.B \-b
+Black point compensation.
+.TP
+.BI \-c\  NUM
+Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes) [defaults to 1].
+.TP
+.BI \-d\  NUM
+Observer adaptation state (abs.col. only), (0..1.0, float value) [defaults to 0.0].
+.TP
+.B \-e
+Embed destination profile.
+.TP
+.B \-g
+Marks out-of-gamut colors on softproof.
+.TP
+.BI \-h\  NUM
+Show summary of options and examples (0=help, 1=Examples, 2=Built-in profiles, 3=Contact information)
+.TP
+.BI \-i\ profile
+Input profile (defaults to sRGB).
+.TP
+.BI \-k\  inklimit
+Ink-limiting in % (CMYK only), (0..400.0, float value) [default 400.0].
+.TP
+.BI \-l\ profile
+Transform by device-link profile.
+.TP
+.B \-m TODO: check if values outside 0..3 are possible
+SoftProof intent [defaults to 0].
+.TP
+.B \-n
+Ignore embedded profile on input.
+.TP
+.BI \-o\  profile
+Output profile (defaults to sRGB).
+.TP
+.BI \-p\  profile
+Soft proof profile.
+.TP
+.BI \-s\  newprofile
+Save embedded profile as \fInewprofile\fR.
+.TP
+.BI \-t\ NUM
+Rendering intent
+.nf
+.RS
+0=Perceptual [default]
+1=Relative colorimetric
+2=Saturation
+3=Absolute colorimetric
+10=Perceptual preserving black ink
+11=Relative colorimetric preserving black ink
+12=Saturation preserving black ink
+13=Perceptual preserving black plane
+14=Relative colorimetric preserving black plane
+15=Saturation preserving black plane
+.RE
+.fi
+.TP
+.B \-v
+Verbose.
+.TP
+.BI \-w\  NUM
+Output depth (8, 16 or 32). Use 32 for floating-point.
+.SH BUILT-IN PROFILES
+.nf
+	*Lab2  -- D50-based v2 CIEL*a*b
+	*Lab4  -- D50-based v4 CIEL*a*b
+	*Lab   -- D50-based v4 CIEL*a*b
+	*XYZ   -- CIE XYZ (PCS)
+	*sRGB  -- sRGB color space
+	*Gray22 - Monochrome of Gamma 2.2
+	*Gray30 - Monochrome of Gamma 3.0
+	*null   - Monochrome black for all input
+	*Lin2222- CMYK linearization of gamma 2.2 on each channel
+.fi
+.SH EXAMPLES
+.nf
+To color correct from scanner to sRGB:
+	tificc -iscanner.icm in.tif out.tif
+To convert from monitor1 to monitor2:
+	tificc -imon1.icm -omon2.icm in.tif out.tif
+To make a CMYK separation:
+	tificc -oprinter.icm inrgb.tif outcmyk.tif
+To recover sRGB from a CMYK separation:
+	tificc -iprinter.icm incmyk.tif outrgb.tif
+To convert from CIELab TIFF to sRGB
+	tificc -i*Lab in.tif out.tif
+.fi
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to info@littlecms.com.
+.SH SEE ALSO
+.BR jpgicc (1),
+.BR linkicc (1),
+.BR psicc (1),
+.BR transicc (1)
+.SH AUTHOR
+This manual page was originally written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project. Modified by Marti Maria to reflect further changes.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/tificc.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/tificc.c
new file mode 100755
index 0000000000..7707a1077d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/tificc/tificc.c
@@ -0,0 +1,1180 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+
+// This program does apply profiles to (some) TIFF files
+
+#include "lcms2_plugin.h"
+#include "tiffio.h"
+#include "utils.h"
+
+
+// Flags
+
+static cmsBool BlackWhiteCompensation = FALSE;
+static cmsBool IgnoreEmbedded         = FALSE;
+static cmsBool EmbedProfile           = FALSE;
+static int     Width                  = 8;
+static cmsBool GamutCheck             = FALSE;
+static cmsBool lIsDeviceLink          = FALSE;
+static cmsBool StoreAsAlpha           = FALSE;
+
+static int Intent                  = INTENT_PERCEPTUAL;
+static int ProofingIntent          = INTENT_PERCEPTUAL;
+static int PrecalcMode             = 1;
+static cmsFloat64Number InkLimit   = 400;
+
+static cmsFloat64Number ObserverAdaptationState  = 1.0;  // According ICC 4.3 this is the default
+
+static const char *cInpProf  = NULL;
+static const char *cOutProf  = NULL;
+static const char *cProofing = NULL;
+
+static const char* SaveEmbedded = NULL;
+
+// Console error & warning
+static
+void ConsoleWarningHandler(const char* module, const char* fmt, va_list ap)
+{
+    char e[512] = { '\0' };
+    if (module != NULL)
+        strcat(strcpy(e, module), ": ");
+
+    vsprintf(e+strlen(e), fmt, ap);
+    strcat(e, ".");
+    if (Verbose) {
+
+        fprintf(stderr, "\nWarning");
+        fprintf(stderr, " %s\n", e);
+        fflush(stderr);
+    }
+}
+
+static
+void ConsoleErrorHandler(const char* module, const char* fmt, va_list ap)
+{
+    char e[512] = { '\0' };
+
+    if (module != NULL) {
+        if (strlen(module) < 500)
+               strcat(strcpy(e, module), ": ");
+    }
+
+    vsprintf(e+strlen(e), fmt, ap);
+    strcat(e, ".");
+    fprintf(stderr, "\nError");
+    fprintf(stderr, " %s\n", e);
+    fflush(stderr);
+}
+
+
+// Issue a warning
+static
+void Warning(const char *frm, ...)
+{
+    va_list args;
+
+    va_start(args, frm);
+    ConsoleWarningHandler("[tificc]", frm, args);
+    va_end(args);
+}
+
+
+
+// Out of mememory is a fatal error
+static
+void OutOfMem(cmsUInt32Number size)
+{
+    FatalError("Out of memory on allocating %d bytes.", size);  
+}
+
+
+// -----------------------------------------------------------------------------------------------
+
+// In TIFF, Lab is encoded in a different way, so let's use the plug-in 
+// capabilities of lcms2 to change the meaning of TYPE_Lab_8.  
+
+// * 0xffff / 0xff00 = (255 * 257) / (255 * 256) = 257 / 256
+static int FromLabV2ToLabV4(int x) 
+{
+    int a;
+
+    a = ((x << 8) | x) >> 8;  // * 257 / 256
+    if ( a > 0xffff) return 0xffff;
+    return a;
+}
+
+// * 0xf00 / 0xffff = * 256 / 257
+static int FromLabV4ToLabV2(int x) 
+{
+    return ((x << 8) + 0x80) / 257;
+}
+
+
+// Formatter for 8bit Lab TIFF (photometric 8)
+static
+unsigned char* UnrollTIFFLab8(struct _cmstransform_struct* CMMcargo,
+                              register cmsUInt16Number wIn[], 
+                              register cmsUInt8Number* accum, 
+                              register cmsUInt32Number Stride)
+{
+    wIn[0] = (cmsUInt16Number) FromLabV2ToLabV4((accum[0]) << 8);
+    wIn[1] = (cmsUInt16Number) FromLabV2ToLabV4(((accum[1] > 127) ? (accum[1] - 128) : (accum[1] + 128)) << 8);
+    wIn[2] = (cmsUInt16Number) FromLabV2ToLabV4(((accum[2] > 127) ? (accum[2] - 128) : (accum[2] + 128)) << 8);
+
+    return accum + 3;
+
+    UTILS_UNUSED_PARAMETER(Stride);
+    UTILS_UNUSED_PARAMETER(CMMcargo);
+}
+
+// Formatter for 16bit Lab TIFF (photometric 8)
+static
+unsigned char* UnrollTIFFLab16(struct _cmstransform_struct* CMMcargo,
+                              register cmsUInt16Number wIn[],
+                              register cmsUInt8Number* accum,
+                              register cmsUInt32Number Stride )
+{
+    cmsUInt16Number* accum16 = (cmsUInt16Number*) accum;
+
+    wIn[0] = (cmsUInt16Number) FromLabV2ToLabV4(accum16[0]);
+    wIn[1] = (cmsUInt16Number) FromLabV2ToLabV4(((accum16[1] > 0x7f00) ? (accum16[1] - 0x8000) : (accum16[1] + 0x8000)) );
+    wIn[2] = (cmsUInt16Number) FromLabV2ToLabV4(((accum16[2] > 0x7f00) ? (accum16[2] - 0x8000) : (accum16[2] + 0x8000)) );
+
+    return accum + 3 * sizeof(cmsUInt16Number);
+
+    UTILS_UNUSED_PARAMETER(Stride);
+    UTILS_UNUSED_PARAMETER(CMMcargo);
+}
+
+
+static
+unsigned char* PackTIFFLab8(struct _cmstransform_struct* CMMcargo, 
+                            register cmsUInt16Number wOut[], 
+                            register cmsUInt8Number* output, 
+                            register cmsUInt32Number Stride)
+{
+    int a, b;
+
+    *output++ = (cmsUInt8Number) (FromLabV4ToLabV2(wOut[0] + 0x0080) >> 8);
+
+    a = (FromLabV4ToLabV2(wOut[1]) + 0x0080) >> 8;
+    b = (FromLabV4ToLabV2(wOut[2]) + 0x0080) >> 8;
+
+    *output++ = (cmsUInt8Number) ((a < 128) ? (a + 128) : (a - 128));
+    *output++ = (cmsUInt8Number) ((b < 128) ? (b + 128) : (b - 128));
+
+    return output;
+
+    UTILS_UNUSED_PARAMETER(Stride);
+    UTILS_UNUSED_PARAMETER(CMMcargo);
+}
+
+static
+unsigned char* PackTIFFLab16(struct _cmstransform_struct* CMMcargo, 
+                            register cmsUInt16Number wOut[], 
+                            register cmsUInt8Number* output, 
+                            register cmsUInt32Number Stride)
+{
+    int a, b;
+    cmsUInt16Number* output16 = (cmsUInt16Number*) output;
+
+    *output16++ = (cmsUInt16Number) FromLabV4ToLabV2(wOut[0]);
+
+    a = FromLabV4ToLabV2(wOut[1]);
+    b = FromLabV4ToLabV2(wOut[2]);
+
+    *output16++ = (cmsUInt16Number) ((a < 0x7f00) ? (a + 0x8000) : (a - 0x8000));
+    *output16++ = (cmsUInt16Number) ((b < 0x7f00) ? (b + 0x8000) : (b - 0x8000));
+
+    return (cmsUInt8Number*) output16;
+
+    UTILS_UNUSED_PARAMETER(Stride);
+    UTILS_UNUSED_PARAMETER(CMMcargo);
+}
+
+
+static
+cmsFormatter TiffFormatterFactory(cmsUInt32Number Type,
+                                  cmsFormatterDirection Dir,
+                                  cmsUInt32Number dwFlags)
+{
+    cmsFormatter Result = { NULL };
+    int bps           = T_BYTES(Type);
+    int IsTiffSpecial = (Type >> 23) & 1;
+
+    if (IsTiffSpecial && !(dwFlags & CMS_PACK_FLAGS_FLOAT))
+    {
+        if (Dir == cmsFormatterInput)
+        {
+            Result.Fmt16 = (bps == 1) ? UnrollTIFFLab8 : UnrollTIFFLab16;
+        }
+        else
+            Result.Fmt16 = (bps == 1) ? PackTIFFLab8 : PackTIFFLab16;
+    }
+
+    return Result;
+}
+
+static cmsPluginFormatters TiffLabPlugin = { {cmsPluginMagicNumber, 2000, cmsPluginFormattersSig, NULL}, TiffFormatterFactory };
+
+
+
+// Build up the pixeltype descriptor
+static
+cmsUInt32Number GetInputPixelType(TIFF *Bank)
+{
+    uint16 Photometric, bps, spp, extra, PlanarConfig, *info;
+    uint16 Compression, reverse = 0;
+    int ColorChannels, IsPlanar = 0, pt = 0, IsFlt;
+    int labTiffSpecial = FALSE;
+
+    TIFFGetField(Bank,           TIFFTAG_PHOTOMETRIC,   &Photometric);
+    TIFFGetFieldDefaulted(Bank,  TIFFTAG_BITSPERSAMPLE, &bps);
+
+    if (bps == 1)
+        FatalError("Sorry, bilevel TIFFs has nothing to do with ICC profiles");
+
+    if (bps != 8 && bps != 16 && bps != 32)
+        FatalError("Sorry, 8, 16 or 32 bits per sample only");
+
+    TIFFGetFieldDefaulted(Bank, TIFFTAG_SAMPLESPERPIXEL, &spp);
+    TIFFGetFieldDefaulted(Bank, TIFFTAG_PLANARCONFIG, &PlanarConfig);
+
+    switch (PlanarConfig) {
+
+     case PLANARCONFIG_CONTIG: IsPlanar = 0; break;
+     case PLANARCONFIG_SEPARATE: IsPlanar = 1; break;
+     default:
+
+         FatalError("Unsupported planar configuration (=%d) ", (int) PlanarConfig);
+    }
+
+    // If Samples per pixel == 1, PlanarConfiguration is irrelevant and need
+    // not to be included.
+
+    if (spp == 1) IsPlanar = 0;
+
+    // Any alpha?
+
+    TIFFGetFieldDefaulted(Bank, TIFFTAG_EXTRASAMPLES, &extra, &info);
+
+    // Read alpha channels as colorant
+
+    if (StoreAsAlpha) {
+
+        ColorChannels = spp;
+        extra = 0;
+    }
+    else
+        ColorChannels = spp - extra;
+
+    switch (Photometric) {
+
+    case PHOTOMETRIC_MINISWHITE:
+
+        reverse = 1;
+
+        // ... fall through ...
+
+    case PHOTOMETRIC_MINISBLACK:                                   
+        pt = PT_GRAY;                                
+        break;
+
+    case PHOTOMETRIC_RGB:                                   
+        pt = PT_RGB;
+        break;
+
+
+     case PHOTOMETRIC_PALETTE:                                             
+         FatalError("Sorry, palette images not supported"); 
+         break;
+
+     case PHOTOMETRIC_SEPARATED: 
+
+         pt = PixelTypeFromChanCount(ColorChannels);
+         break;
+
+     case PHOTOMETRIC_YCBCR:
+         TIFFGetField(Bank, TIFFTAG_COMPRESSION, &Compression);
+         {
+             uint16 subx, suby;
+
+             pt = PT_YCbCr;
+             TIFFGetFieldDefaulted(Bank, TIFFTAG_YCBCRSUBSAMPLING, &subx, &suby);
+             if (subx != 1 || suby != 1)
+                 FatalError("Sorry, subsampled images not supported");
+
+         }
+         break;
+
+     case PHOTOMETRIC_ICCLAB:
+         pt = PT_LabV2;         
+         break;
+
+     case PHOTOMETRIC_CIELAB:
+         pt = PT_Lab;
+         labTiffSpecial = TRUE;
+         break;
+
+
+     case PHOTOMETRIC_LOGLUV:      // CIE Log2(L) (u',v') 
+
+         TIFFSetField(Bank, TIFFTAG_SGILOGDATAFMT, SGILOGDATAFMT_16BIT);
+         pt = PT_YUV;             // *ICCSpace = icSigLuvData;
+         bps = 16;                // 16 bits forced by LibTiff
+         break;
+
+     default:
+         FatalError("Unsupported TIFF color space (Photometric %d)", Photometric);
+    }
+
+    // Convert bits per sample to bytes per sample
+
+    bps >>= 3; 
+    IsFlt = (bps == 0) || (bps == 4);
+
+    return (FLOAT_SH(IsFlt)|COLORSPACE_SH(pt)|PLANAR_SH(IsPlanar)|EXTRA_SH(extra)|CHANNELS_SH(ColorChannels)|BYTES_SH(bps)|FLAVOR_SH(reverse) | (labTiffSpecial << 23) );
+}
+
+
+
+// Rearrange pixel type to build output descriptor
+static
+cmsUInt32Number ComputeOutputFormatDescriptor(cmsUInt32Number dwInput, int OutColorSpace, int bps)
+{
+    int IsPlanar  = T_PLANAR(dwInput);
+    int Channels  = ChanCountFromPixelType(OutColorSpace);
+    int IsFlt = (bps == 0) || (bps == 4);
+
+    return (FLOAT_SH(IsFlt)|COLORSPACE_SH(OutColorSpace)|PLANAR_SH(IsPlanar)|CHANNELS_SH(Channels)|BYTES_SH(bps));
+}
+
+
+
+// Tile based transforms
+static
+int TileBasedXform(cmsHTRANSFORM hXForm, TIFF* in, TIFF* out, int nPlanes)
+{
+    tsize_t BufSizeIn  = TIFFTileSize(in);
+    tsize_t BufSizeOut = TIFFTileSize(out);
+    unsigned char *BufferIn, *BufferOut;
+    ttile_t i, TileCount = TIFFNumberOfTiles(in) / nPlanes;
+    uint32 tw, tl;
+    int PixelCount, j;
+
+
+    TIFFGetFieldDefaulted(in, TIFFTAG_TILEWIDTH,  &tw);
+    TIFFGetFieldDefaulted(in, TIFFTAG_TILELENGTH, &tl);
+
+    PixelCount = (int) tw * tl;
+
+    BufferIn = (unsigned char *) _TIFFmalloc(BufSizeIn * nPlanes);
+    if (!BufferIn) OutOfMem(BufSizeIn * nPlanes);
+
+    BufferOut = (unsigned char *) _TIFFmalloc(BufSizeOut * nPlanes);
+    if (!BufferOut) OutOfMem(BufSizeOut * nPlanes);
+
+
+    for (i = 0; i < TileCount; i++) {
+
+        for (j=0; j < nPlanes; j++) {
+
+            if (TIFFReadEncodedTile(in, i + (j* TileCount), 
+                BufferIn + (j*BufSizeIn), BufSizeIn) < 0)   goto cleanup;
+        }
+
+        cmsDoTransform(hXForm, BufferIn, BufferOut, PixelCount);
+
+        for (j=0; j < nPlanes; j++) {
+
+            if (TIFFWriteEncodedTile(out, i + (j*TileCount),
+                BufferOut + (j*BufSizeOut), BufSizeOut) < 0) goto cleanup;
+        }
+
+    }
+
+    _TIFFfree(BufferIn);
+    _TIFFfree(BufferOut);
+    return 1;
+
+
+cleanup:
+
+    _TIFFfree(BufferIn);
+    _TIFFfree(BufferOut);
+    return 0;
+}
+
+
+// Strip based transforms
+
+static
+int StripBasedXform(cmsHTRANSFORM hXForm, TIFF* in, TIFF* out, int nPlanes)
+{
+    tsize_t BufSizeIn  = TIFFStripSize(in);
+    tsize_t BufSizeOut = TIFFStripSize(out);
+    unsigned char *BufferIn, *BufferOut;
+    ttile_t i, StripCount = TIFFNumberOfStrips(in) / nPlanes;
+    uint32 sw;
+    uint32 sl;
+    uint32 iml;
+    int j;
+    int PixelCount;
+
+    TIFFGetFieldDefaulted(in, TIFFTAG_IMAGEWIDTH,  &sw);
+    TIFFGetFieldDefaulted(in, TIFFTAG_ROWSPERSTRIP, &sl);
+    TIFFGetFieldDefaulted(in, TIFFTAG_IMAGELENGTH, &iml);
+
+    // It is possible to get infinite rows per strip
+    if (sl == 0 || sl > iml)
+        sl = iml;   // One strip for whole image
+
+    BufferIn = (unsigned char *) _TIFFmalloc(BufSizeIn * nPlanes);
+    if (!BufferIn) OutOfMem(BufSizeIn * nPlanes);
+
+    BufferOut = (unsigned char *) _TIFFmalloc(BufSizeOut * nPlanes);
+    if (!BufferOut) OutOfMem(BufSizeOut * nPlanes);
+
+
+    for (i = 0; i < StripCount; i++) {
+
+        for (j=0; j < nPlanes; j++) {
+
+            if (TIFFReadEncodedStrip(in, i + (j * StripCount), 
+                BufferIn + (j * BufSizeIn), BufSizeIn) < 0)   goto cleanup;
+        }
+
+        PixelCount = (int) sw * (iml < sl ? iml : sl);
+        iml -= sl;
+
+        cmsDoTransform(hXForm, BufferIn, BufferOut, PixelCount);
+
+        for (j=0; j < nPlanes; j++) {
+            if (TIFFWriteEncodedStrip(out, i + (j * StripCount), 
+                BufferOut + j * BufSizeOut, BufSizeOut) < 0) goto cleanup;
+        }
+
+    }
+
+    _TIFFfree(BufferIn);
+    _TIFFfree(BufferOut);
+    return 1;
+
+cleanup:
+
+    _TIFFfree(BufferIn);
+    _TIFFfree(BufferOut);
+    return 0;
+}
+
+
+// Creates minimum required tags
+static
+void WriteOutputTags(TIFF *out, int Colorspace, int BytesPerSample)
+{
+    int BitsPerSample = (8 * BytesPerSample);
+    int nChannels     = ChanCountFromPixelType(Colorspace);
+
+    uint16 Extra[] = { EXTRASAMPLE_UNASSALPHA, 
+                       EXTRASAMPLE_UNASSALPHA, 
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA, 
+                       EXTRASAMPLE_UNASSALPHA, 
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA,
+                       EXTRASAMPLE_UNASSALPHA
+    };
+
+
+  switch (Colorspace) {
+
+  case PT_GRAY:
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 1);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+  case PT_RGB:
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 3);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+  case PT_CMY:
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_SEPARATED);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 3);
+      TIFFSetField(out, TIFFTAG_INKSET, 2);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+  case PT_CMYK:
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_SEPARATED);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 4);
+      TIFFSetField(out, TIFFTAG_INKSET, INKSET_CMYK);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+  case PT_Lab:
+      if (BitsPerSample == 16) 
+          TIFFSetField(out, TIFFTAG_PHOTOMETRIC, 9);
+      else
+          TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_CIELAB);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 3);
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);    // Needed by TIFF Spec
+      break;
+
+
+      // Multi-ink separations
+  case PT_MCH2:
+  case PT_MCH3:
+  case PT_MCH4:
+  case PT_MCH5:
+  case PT_MCH6:
+  case PT_MCH7:
+  case PT_MCH8:
+  case PT_MCH9:
+  case PT_MCH10:
+  case PT_MCH11:
+  case PT_MCH12:
+  case PT_MCH13:
+  case PT_MCH14:
+  case PT_MCH15:
+
+      TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_SEPARATED);
+      TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, nChannels);
+
+      if (StoreAsAlpha && nChannels >= 4) {                                     
+          // CMYK plus extra alpha
+          TIFFSetField(out, TIFFTAG_EXTRASAMPLES, nChannels - 4, Extra);            
+          TIFFSetField(out, TIFFTAG_INKSET, 1);
+          TIFFSetField(out, TIFFTAG_NUMBEROFINKS, 4);
+      }
+      else {            
+          TIFFSetField(out, TIFFTAG_INKSET, 2);
+          TIFFSetField(out, TIFFTAG_NUMBEROFINKS, nChannels);
+      }
+
+      TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+      break;
+
+
+  default:
+      FatalError("Unsupported output colorspace");
+    }
+
+  if (Width == 32) 
+      TIFFSetField(out, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP);
+}
+
+
+// Copies a bunch of tages
+
+static
+void CopyOtherTags(TIFF* in, TIFF* out)
+{
+#define CopyField(tag, v) \
+    if (TIFFGetField(in, tag, &v)) TIFFSetField(out, tag, v)
+
+
+    short shortv;
+    uint32 ow, ol;
+    cmsFloat32Number floatv;
+    char *stringv;
+    uint32 longv;
+
+    CopyField(TIFFTAG_SUBFILETYPE, longv);
+
+    TIFFGetField(in, TIFFTAG_IMAGEWIDTH, &ow);
+    TIFFGetField(in, TIFFTAG_IMAGELENGTH, &ol);
+
+    TIFFSetField(out, TIFFTAG_IMAGEWIDTH, ow);
+    TIFFSetField(out, TIFFTAG_IMAGELENGTH, ol);
+
+    CopyField(TIFFTAG_PLANARCONFIG, shortv);
+    CopyField(TIFFTAG_COMPRESSION, shortv);
+
+    if (Width != 32) 
+        CopyField(TIFFTAG_PREDICTOR, shortv);
+
+    CopyField(TIFFTAG_THRESHHOLDING, shortv);
+    CopyField(TIFFTAG_FILLORDER, shortv);
+    CopyField(TIFFTAG_ORIENTATION, shortv);
+    CopyField(TIFFTAG_MINSAMPLEVALUE, shortv);
+    CopyField(TIFFTAG_MAXSAMPLEVALUE, shortv);
+    CopyField(TIFFTAG_XRESOLUTION, floatv);
+    CopyField(TIFFTAG_YRESOLUTION, floatv);
+    CopyField(TIFFTAG_RESOLUTIONUNIT, shortv);
+    CopyField(TIFFTAG_ROWSPERSTRIP, longv);
+    CopyField(TIFFTAG_XPOSITION, floatv);
+    CopyField(TIFFTAG_YPOSITION, floatv);
+    CopyField(TIFFTAG_IMAGEDEPTH, longv);
+    CopyField(TIFFTAG_TILEDEPTH, longv);
+
+    CopyField(TIFFTAG_TILEWIDTH,  longv);
+    CopyField(TIFFTAG_TILELENGTH, longv);
+
+    CopyField(TIFFTAG_ARTIST, stringv);
+    CopyField(TIFFTAG_IMAGEDESCRIPTION, stringv);
+    CopyField(TIFFTAG_MAKE, stringv);
+    CopyField(TIFFTAG_MODEL, stringv);
+
+    CopyField(TIFFTAG_DATETIME, stringv);
+    CopyField(TIFFTAG_HOSTCOMPUTER, stringv);
+    CopyField(TIFFTAG_PAGENAME, stringv);
+    CopyField(TIFFTAG_DOCUMENTNAME, stringv);
+
+}
+
+// A replacement for (the nonstandard) filelength
+
+
+static
+void DoEmbedProfile(TIFF* Out, const char* ProfileFile)
+{
+    FILE* f;
+    cmsInt32Number size;
+    cmsUInt32Number EmbedLen;
+    cmsUInt8Number* EmbedBuffer;
+
+    f = fopen(ProfileFile, "rb");
+    if (f == NULL) return;
+
+    size = cmsfilelength(f);
+    if (size < 0) return;
+
+    EmbedBuffer = (cmsUInt8Number*) malloc(size + 1);
+    if (EmbedBuffer == NULL) { 
+        OutOfMem(size+1);
+        return;
+    }
+
+    EmbedLen = (cmsUInt32Number) fread(EmbedBuffer, 1, (size_t) size, f);
+
+    if (EmbedLen != size) 
+        FatalError("Cannot read %ld bytes to %s", size, ProfileFile);
+
+    fclose(f);
+    EmbedBuffer[EmbedLen] = 0;
+
+    TIFFSetField(Out, TIFFTAG_ICCPROFILE, EmbedLen, EmbedBuffer);
+    free(EmbedBuffer);
+}
+
+
+
+static
+cmsHPROFILE GetTIFFProfile(TIFF* in)
+{    
+    cmsCIExyYTRIPLE Primaries;
+    cmsFloat32Number* chr;
+    cmsCIExyY WhitePoint;
+    cmsFloat32Number* wp;
+    int i;       
+    cmsToneCurve* Curve[3]; 
+    cmsUInt16Number *gmr, *gmg, *gmb;
+    cmsHPROFILE hProfile;
+    cmsUInt32Number EmbedLen;
+    cmsUInt8Number* EmbedBuffer;
+
+    if (IgnoreEmbedded) return NULL;
+
+    if (TIFFGetField(in, TIFFTAG_ICCPROFILE, &EmbedLen, &EmbedBuffer)) {
+
+        hProfile = cmsOpenProfileFromMem(EmbedBuffer, EmbedLen);
+
+        // Print description found in the profile
+        if (Verbose && (hProfile != NULL)) {
+
+            fprintf(stdout, "\n[Embedded profile]\n");
+            PrintProfileInformation(hProfile);                       
+            fflush(stdout);
+        }
+
+        if (hProfile != NULL && SaveEmbedded != NULL)
+            SaveMemoryBlock(EmbedBuffer, EmbedLen, SaveEmbedded);
+
+        if (hProfile) return hProfile;
+    }
+
+    // Try to see if "colorimetric" tiff
+
+    if (TIFFGetField(in, TIFFTAG_PRIMARYCHROMATICITIES, &chr)) {
+
+        Primaries.Red.x   =  chr[0];
+        Primaries.Red.y   =  chr[1];
+        Primaries.Green.x =  chr[2];
+        Primaries.Green.y =  chr[3];
+        Primaries.Blue.x  =  chr[4];
+        Primaries.Blue.y  =  chr[5];
+
+        Primaries.Red.Y = Primaries.Green.Y = Primaries.Blue.Y = 1.0;
+
+        if (TIFFGetField(in, TIFFTAG_WHITEPOINT, &wp)) {
+
+            WhitePoint.x = wp[0];
+            WhitePoint.y = wp[1];
+            WhitePoint.Y = 1.0;
+
+            // Transferfunction is a bit harder....
+
+            TIFFGetFieldDefaulted(in, TIFFTAG_TRANSFERFUNCTION,
+                &gmr, 
+                &gmg,
+                &gmb);
+
+            Curve[0] = cmsBuildTabulatedToneCurve16(NULL, 256, gmr);
+            Curve[1] = cmsBuildTabulatedToneCurve16(NULL, 256, gmg);
+            Curve[2] = cmsBuildTabulatedToneCurve16(NULL, 256, gmb);
+
+            hProfile = cmsCreateRGBProfileTHR(NULL, &WhitePoint, &Primaries, Curve);
+
+            for (i=0; i < 3; i++)
+                cmsFreeToneCurve(Curve[i]);
+
+            if (Verbose) {
+                fprintf(stdout, "\n[Colorimetric TIFF]\n");
+            }
+
+
+            return hProfile;
+        }
+    }
+
+    return NULL;
+}
+
+
+// Transform one image
+static
+int TransformImage(TIFF* in, TIFF* out, const char *cDefInpProf)
+{
+    cmsHPROFILE hIn, hOut, hProof, hInkLimit = NULL;
+    cmsHTRANSFORM xform;
+    cmsUInt32Number wInput, wOutput;
+    int OutputColorSpace;
+    int bps = Width / 8;
+    cmsUInt32Number dwFlags = 0;        
+    int nPlanes;
+
+    // Observer adaptation state (only meaningful on absolute colorimetric intent)
+
+    cmsSetAdaptationState(ObserverAdaptationState);
+
+    if (EmbedProfile && cOutProf) 
+        DoEmbedProfile(out, cOutProf);
+
+    if (BlackWhiteCompensation) 
+        dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;           
+
+
+    switch (PrecalcMode) {
+
+       case 0: dwFlags |= cmsFLAGS_NOOPTIMIZE; break;
+       case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+       case 3: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+       case 1: break;
+
+       default: FatalError("Unknown precalculation mode '%d'", PrecalcMode);
+    }
+
+
+    if (GamutCheck)
+        dwFlags |= cmsFLAGS_GAMUTCHECK;
+
+    hProof = NULL;
+    hOut = NULL;
+
+    if (lIsDeviceLink) {
+
+        hIn = cmsOpenProfileFromFile(cDefInpProf, "r");                  
+    }
+    else {
+
+        hIn =  GetTIFFProfile(in);
+
+        if (hIn == NULL)                    
+            hIn = OpenStockProfile(NULL, cDefInpProf);               
+
+        hOut = OpenStockProfile(NULL, cOutProf);
+
+        if (cProofing != NULL) {
+
+            hProof = OpenStockProfile(NULL, cProofing);
+            dwFlags |= cmsFLAGS_SOFTPROOFING;
+        }
+    }
+
+    // Take input color space
+
+    wInput = GetInputPixelType(in);
+
+    // Assure both, input profile and input TIFF are on same colorspace
+
+    if (_cmsLCMScolorSpace(cmsGetColorSpace(hIn)) != (int) T_COLORSPACE(wInput))
+        FatalError("Input profile is not operating in proper color space");
+
+
+    if (!lIsDeviceLink) 
+        OutputColorSpace = _cmsLCMScolorSpace(cmsGetColorSpace(hOut));
+    else 
+        OutputColorSpace = _cmsLCMScolorSpace(cmsGetPCS(hIn));
+
+    wOutput  = ComputeOutputFormatDescriptor(wInput, OutputColorSpace, bps);
+
+    WriteOutputTags(out, OutputColorSpace, bps);
+    CopyOtherTags(in, out);
+
+    // Ink limit
+    if (InkLimit != 400.0 && 
+        (OutputColorSpace == PT_CMYK || OutputColorSpace == PT_CMY)) {
+
+            cmsHPROFILE hProfiles[10];
+            int nProfiles = 0;
+
+
+            hInkLimit = cmsCreateInkLimitingDeviceLink(cmsGetColorSpace(hOut), InkLimit);
+
+            hProfiles[nProfiles++] = hIn;
+            if (hProof) {
+                hProfiles[nProfiles++] = hProof;
+                hProfiles[nProfiles++] = hProof;
+            }
+
+            hProfiles[nProfiles++] = hOut;
+            hProfiles[nProfiles++] = hInkLimit;
+
+            xform = cmsCreateMultiprofileTransform(hProfiles, nProfiles, 
+                                                   wInput, wOutput, Intent, dwFlags);
+
+    }
+    else {
+
+        xform = cmsCreateProofingTransform(hIn, wInput, 
+                                           hOut, wOutput, 
+                                           hProof, Intent, 
+                                           ProofingIntent, 
+                                           dwFlags);
+    }
+
+    cmsCloseProfile(hIn);
+    cmsCloseProfile(hOut);
+
+    if (hInkLimit) 
+        cmsCloseProfile(hInkLimit);
+    if (hProof) 
+        cmsCloseProfile(hProof);
+
+    if (xform == NULL) return 0;
+
+    // Planar stuff
+    if (T_PLANAR(wInput)) 
+        nPlanes = T_CHANNELS(wInput) + T_EXTRA(wInput);
+    else
+        nPlanes = 1;
+
+
+    // Handle tile by tile or strip by strip
+    if (TIFFIsTiled(in)) {
+
+        TileBasedXform(xform, in, out, nPlanes);
+    }
+    else {
+        StripBasedXform(xform, in, out, nPlanes);
+    }
+
+
+    cmsDeleteTransform(xform);
+
+    TIFFWriteDirectory(out);
+
+    return 1;
+}
+
+
+// Print help
+static
+void Help(int level)
+{
+    fprintf(stderr, "little cms ICC profile applier for TIFF - v6.2 [LittleCMS %2.2f]\n\n", LCMS_VERSION / 1000.0);
+    fflush(stderr);
+
+    switch(level) {
+
+     default:
+     case 0:
+
+         fprintf(stderr, "usage: tificc [flags] input.tif output.tif\n");
+
+         fprintf(stderr, "\nflags:\n\n");
+         fprintf(stderr, "%cv - Verbose\n", SW);
+         fprintf(stderr, "%ci<profile> - Input profile (defaults to sRGB)\n", SW);
+         fprintf(stderr, "%co<profile> - Output profile (defaults to sRGB)\n", SW);   
+         fprintf(stderr, "%cl<profile> - Transform by device-link profile\n", SW); 
+
+         PrintRenderingIntents();
+
+         fprintf(stderr, "%cb - Black point compensation\n", SW);
+         fprintf(stderr, "%cd<0..1> - Observer adaptation state (abs.col. only)\n", SW);
+
+         fprintf(stderr, "%cc<0,1,2,3> - Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes)\n", SW);     
+         fprintf(stderr, "\n");
+
+         fprintf(stderr, "%cw<8,16,32> - Output depth. Use 32 for floating-point\n\n", SW);
+         fprintf(stderr, "%ca - Handle channels > 4 as alpha\n", SW);
+
+         fprintf(stderr, "%cn - Ignore embedded profile on input\n", SW);
+         fprintf(stderr, "%ce - Embed destination profile\n", SW);
+         fprintf(stderr, "%cs<new profile> - Save embedded profile as <new profile>\n", SW);
+         fprintf(stderr, "\n");
+
+
+         fprintf(stderr, "%cp<profile> - Soft proof profile\n", SW);
+         fprintf(stderr, "%cm<n> - Soft proof intent\n", SW);
+         fprintf(stderr, "%cg - Marks out-of-gamut colors on softproof\n", SW);
+
+         fprintf(stderr, "\n"); 
+   
+         fprintf(stderr, "%ck<0..400> - Ink-limiting in %% (CMYK only)\n", SW);       
+         fprintf(stderr, "\n");
+         fprintf(stderr, "%ch<0,1,2,3> - More help\n", SW);
+         break;
+
+     case 1:
+
+         fprintf(stderr, "Examples:\n\n"
+             "To color correct from scanner to sRGB:\n"
+             "\ttificc %ciscanner.icm in.tif out.tif\n"
+             "To convert from monitor1 to monitor2:\n"
+             "\ttificc %cimon1.icm %comon2.icm in.tif out.tif\n"
+             "To make a CMYK separation:\n"
+             "\ttificc %coprinter.icm inrgb.tif outcmyk.tif\n"
+             "To recover sRGB from a CMYK separation:\n"
+             "\ttificc %ciprinter.icm incmyk.tif outrgb.tif\n"
+             "To convert from CIELab TIFF to sRGB\n"
+             "\ttificc %ci*Lab in.tif out.tif\n\n", 
+             SW, SW, SW, SW, SW, SW);
+         break;
+
+     case 2:
+         PrintBuiltins();
+         break;
+
+     case 3:
+
+         fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+             "engine. Both lcms and this program are freeware. You can\n"
+             "obtain both in source code at http://www.littlecms.com\n"
+             "For suggestions, comments, bug reports etc. send mail to\n"
+             "info@littlecms.com\n\n");
+
+         break;
+    }
+
+    fflush(stderr);
+    exit(0);
+}
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+    int s;
+
+    while ((s=xgetopt(argc,argv,"aAeEbBw:W:nNvVGgh:H:i:I:o:O:P:p:t:T:c:C:l:L:M:m:K:k:S:s:D:d:")) != EOF) {
+
+        switch (s) {
+
+        case 'a':
+        case 'A':
+            StoreAsAlpha = TRUE;
+            break;
+        case 'b':
+        case 'B':
+            BlackWhiteCompensation = TRUE;
+            break;
+
+        case 'c':
+        case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 3)
+                FatalError("Unknown precalc mode '%d'", PrecalcMode);
+            break;
+
+        case 'd':
+        case 'D': ObserverAdaptationState = atof(xoptarg);
+            if (ObserverAdaptationState < 0 || 
+                ObserverAdaptationState > 1.0)
+                Warning("Adaptation state should be 0..1");
+            break;
+
+        case 'e':
+        case 'E':
+            EmbedProfile = TRUE;
+            break;
+
+        case 'g':
+        case 'G':
+            GamutCheck = TRUE;
+            break;
+
+        case 'v':
+        case 'V':
+            Verbose = TRUE;
+            break;
+
+        case 'i':
+        case 'I':
+            if (lIsDeviceLink)
+                FatalError("Device-link already specified"); 
+
+            cInpProf = xoptarg;
+            break;
+
+        case 'o':
+        case 'O':
+            if (lIsDeviceLink)
+                FatalError("Device-link already specified"); 
+
+            cOutProf = xoptarg;
+            break;
+
+        case 'l':
+        case 'L': 
+            if (cInpProf != NULL || cOutProf != NULL) 
+                FatalError("input/output profiles already specified");
+
+            cInpProf = xoptarg;
+            lIsDeviceLink = TRUE;
+            break;
+
+        case 'p':
+        case 'P':
+            cProofing = xoptarg;
+            break;
+
+        case 't':
+        case 'T':
+            Intent = atoi(xoptarg);
+            break;
+
+        case 'm':
+        case 'M':
+            ProofingIntent = atoi(xoptarg);
+            break;
+
+        case 'N':
+        case 'n':
+            IgnoreEmbedded = TRUE;
+            break;
+
+        case 'W':
+        case 'w':
+            Width = atoi(xoptarg);
+            if (Width != 8 && Width != 16 && Width != 32)
+                FatalError("Only 8, 16 and 32 bps are supported");
+            break;
+
+        case 'k':
+        case 'K':
+            InkLimit = atof(xoptarg);
+            if (InkLimit < 0.0 || InkLimit > 400.0)
+                FatalError("Ink limit must be 0%%..400%%");
+            break;
+
+
+        case 's':
+        case 'S': SaveEmbedded = xoptarg;
+            break;
+
+        case 'H':
+        case 'h':  {
+
+            int a =  atoi(xoptarg);
+            Help(a); 
+            }
+            break;
+
+        default:
+
+            FatalError("Unknown option - run without args to see valid ones");
+        }
+
+    }
+}
+
+
+// The main sink
+
+int main(int argc, char* argv[])
+{
+    TIFF *in, *out;
+   
+    cmsPlugin(&TiffLabPlugin);
+
+    InitUtils("tificc");
+
+    HandleSwitches(argc, argv);
+
+    if ((argc - xoptind) != 2) {
+
+        Help(0);              
+    }
+   
+
+    TIFFSetErrorHandler(ConsoleErrorHandler);
+    TIFFSetWarningHandler(ConsoleWarningHandler);
+
+    in = TIFFOpen(argv[xoptind], "r");
+    if (in == NULL) FatalError("Unable to open '%s'", argv[xoptind]);
+
+    out = TIFFOpen(argv[xoptind+1], "w");
+
+    if (out == NULL) {
+
+        TIFFClose(in);
+        FatalError("Unable to write '%s'", argv[xoptind+1]);
+    }
+
+    do {
+
+        TransformImage(in, out, cInpProf);
+
+
+    } while (TIFFReadDirectory(in));
+
+
+    if (Verbose) { fprintf(stdout, "\n"); fflush(stdout); }
+
+    TIFFClose(in);
+    TIFFClose(out);
+
+    return 0;
+}
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/Makefile.am b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/Makefile.am
new file mode 100755
index 0000000000..05e8b6936f
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/Makefile.am
@@ -0,0 +1,19 @@
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+
+AM_CPPFLAGS =  -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+bin_PROGRAMS = transicc 
+
+transicc_LDADD = $(top_builddir)/src/liblcms2.la 
+transicc_LDFLAGS = @LDFLAGS@
+transicc_SOURCES = transicc.c ../common/xgetopt.c ../common/vprf.c  ../common/utils.h
+transicc_MANS = transicc.1
+
+EXTRA_DIST = $(man_MANS)
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/Makefile.in b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/Makefile.in
new file mode 100755
index 0000000000..ce93e538ae
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/Makefile.in
@@ -0,0 +1,663 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+#
+# Makefile for building lcms sample programs
+# Originally Written by Bob Friesenhahn, June 2003
+# Additions and bugs by Marti Maria 
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+bin_PROGRAMS = transicc$(EXEEXT)
+subdir = utils/transicc
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+	$(top_srcdir)/m4/ax_append_compile_flags.m4 \
+	$(top_srcdir)/m4/ax_append_flag.m4 \
+	$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+	$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+	$(top_srcdir)/m4/ax_require_defined.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__dirstamp = $(am__leading_dot)dirstamp
+am_transicc_OBJECTS = transicc.$(OBJEXT) ../common/xgetopt.$(OBJEXT) \
+	../common/vprf.$(OBJEXT)
+transicc_OBJECTS = $(am_transicc_OBJECTS)
+transicc_DEPENDENCIES = $(top_builddir)/src/liblcms2.la
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+transicc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(transicc_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(transicc_SOURCES)
+DIST_SOURCES = $(transicc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JPEGICC_DEPLIBS = @JPEGICC_DEPLIBS@
+LCMS_LIB_DEPLIBS = @LCMS_LIB_DEPLIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBRARY_AGE = @LIBRARY_AGE@
+LIBRARY_CURRENT = @LIBRARY_CURRENT@
+LIBRARY_REVISION = @LIBRARY_REVISION@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBTOOL_DEPS = @LIBTOOL_DEPS@
+LIB_JPEG = @LIB_JPEG@
+LIB_MATH = @LIB_MATH@
+LIB_THREAD = @LIB_THREAD@
+LIB_TIFF = @LIB_TIFF@
+LIB_ZLIB = @LIB_ZLIB@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_CXX = @PTHREAD_CXX@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TIFFICC_DEPLIBS = @TIFFICC_DEPLIBS@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+inline = @inline@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Don't require all the GNU mandated files
+AUTOMAKE_OPTIONS = 1.7 foreign no-dependencies
+AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+               -I$(top_srcdir)/utils/common -I$(top_builddir)/utils/common
+
+transicc_LDADD = $(top_builddir)/src/liblcms2.la 
+transicc_LDFLAGS = @LDFLAGS@
+transicc_SOURCES = transicc.c ../common/xgetopt.c ../common/vprf.c  ../common/utils.h
+transicc_MANS = transicc.1
+EXTRA_DIST = $(man_MANS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/transicc/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign utils/transicc/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+	fi; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p \
+	 || test -f $$p1 \
+	  ; then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' \
+	    -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' \
+	`; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+../common/$(am__dirstamp):
+	@$(MKDIR_P) ../common
+	@: > ../common/$(am__dirstamp)
+../common/xgetopt.$(OBJEXT): ../common/$(am__dirstamp)
+../common/vprf.$(OBJEXT): ../common/$(am__dirstamp)
+
+transicc$(EXEEXT): $(transicc_OBJECTS) $(transicc_DEPENDENCIES) $(EXTRA_transicc_DEPENDENCIES) 
+	@rm -f transicc$(EXEEXT)
+	$(AM_V_CCLD)$(transicc_LINK) $(transicc_OBJECTS) $(transicc_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+	-rm -f ../common/*.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+.c.o:
+	$(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+	$(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+	$(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f ../common/$(am__dirstamp)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
+	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
+	ctags ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-binPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/transicc.1 b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/transicc.1
new file mode 100755
index 0000000000..0c50a9039e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/transicc.1
@@ -0,0 +1,90 @@
+.\"Shiju P. Nair September 30, 2004
+.\"Thomas Weber <tweber@debian.org> April 23, 2014
+.TH TRANSICC 1 "MAY 30, 2011"
+.SH NAME
+transicc - little cms ColorSpace conversion calculator.
+.SH SYNOPSIS
+.B transicc
+.RI [ options ]\ [ CGATSINPUT ]\ [ CGATSOUTPUT ]
+.SH DESCRIPTION
+lcms is a standalone CMM engine, which deals with the color management.
+It implements a fast transformation between ICC profiles.
+.B transicc
+is a lcms ColorSpace conversion calculator.
+.SH OPTIONS
+.TP
+.B \-b
+Black point compensation.
+.TP
+.BI \-c\  NUM
+Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes) [defaults to 1].
+.TP
+.BI \-d\  NUM
+Observer adaptation state (abs.col. only), (0..1.0, float value) [defaults to 0.0].
+.TP
+.B \-e
+Encoded representation of numbers is not float (Option \fB\-w\fR=use 16 bits, Option \fB\-x\fR=hexadecimal).
+.TP
+.B \-g
+Marks out-of-gamut colors on softproof.
+.TP
+.BI \-i\  profile
+Input profile (defaults to sRGB).
+.TP
+.B \-l
+Transform by device-link profile.
+.TP
+.BI \-m\  NUM
+SoftProof intent (0,1,2,3) [defaults to 0].
+.TP
+.B \-n
+Terse output, intended for pipe usage.
+.TP
+.BI \-o\  profile
+.p
+Output profile (defaults to sRGB).
+.TP
+.B \-q
+Quantize CGATS to 8 bits.
+.TP
+.BI \-s
+Bounded mode.
+.TP
+.BI \-t\ NUM
+Rendering intent
+.nf
+.RS
+0=Perceptual [default]
+1=Relative colorimetric
+2=Saturation
+3=Absolute colorimetric
+10=Perceptual preserving black ink
+11=Relative colorimetric preserving black ink
+12=Saturation preserving black ink
+13=Perceptual preserving black plane
+14=Relative colorimetric preserving black plane
+15=Saturation preserving black plane
+.RE
+.fi
+.TP
+.BI \-v\  verbosity
+Verbosity level, (0=None, 1=Normal, 2=High, 3=Very High) [defaults to 1].
+.TP
+.B \-w
+Use 16 bits.
+.TP
+.B \-x
+Hexadecimal.
+.TP
+You can use '*Lab' and '*xyz' as built-in profiles.
+.SH NOTES
+For suggestions, comments, bug reports etc. send mail to
+info@littlecms.com.
+.SH SEE ALSO
+.BR jpgicc (1),
+.BR linkicc (1),
+.BR psicc (1),
+.BR tificc (1)
+.SH AUTHOR
+This manual page was written by Shiju p. Nair <shiju.p@gmail.com>,
+for the Debian project.
diff --git a/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/transicc.c b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/transicc.c
new file mode 100755
index 0000000000..7a76b4d14c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lcms/utils/transicc/transicc.c
@@ -0,0 +1,1316 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System
+//  Copyright (c) 1998-2017 Marti Maria Saguer
+//
+// Permission is hereby granted, free of charge, to any person obtaining 
+// a copy of this software and associated documentation files (the "Software"), 
+// to deal in the Software without restriction, including without limitation 
+// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+// and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in 
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//---------------------------------------------------------------------------------
+//
+
+#include "utils.h"
+
+#ifndef _MSC_VER 
+#    include <unistd.h> 
+#endif 
+
+#ifdef CMS_IS_WINDOWS_
+#    include <io.h>
+#endif
+
+#define MAX_INPUT_BUFFER 4096
+
+// Global options
+
+static cmsBool           InHexa                 = FALSE;
+static cmsBool           GamutCheck             = FALSE;
+static cmsBool           Width16                = FALSE;
+static cmsBool           BlackPointCompensation = FALSE;
+static cmsBool           lIsDeviceLink          = FALSE;
+static cmsBool           lQuantize              = FALSE;
+static cmsBool           lUnbounded             = TRUE;
+static cmsBool           lIsFloat               = TRUE;
+
+static cmsUInt32Number   Intent           = INTENT_PERCEPTUAL;
+static cmsUInt32Number   ProofingIntent   = INTENT_PERCEPTUAL;
+
+static int PrecalcMode  = 0;
+
+// --------------------------------------------------------------
+
+static char *cInProf   = NULL;
+static char *cOutProf  = NULL;
+static char *cProofing = NULL;
+
+static char *IncludePart = NULL;
+
+static cmsHANDLE hIT8in = NULL;        // CGATS input 
+static cmsHANDLE hIT8out = NULL;       // CGATS output
+
+static char CGATSPatch[1024];   // Actual Patch Name
+static char CGATSoutFilename[cmsMAX_PATH];
+
+static int nMaxPatches;
+
+static cmsHTRANSFORM hTrans, hTransXYZ, hTransLab;
+static cmsBool InputNamedColor = FALSE;
+
+static cmsColorSpaceSignature InputColorSpace, OutputColorSpace;
+
+static cmsNAMEDCOLORLIST* InputColorant = NULL;
+static cmsNAMEDCOLORLIST* OutputColorant = NULL;
+
+static cmsFloat64Number InputRange, OutputRange;
+
+
+// isatty replacement
+#ifdef _MSC_VER
+#define xisatty(x) _isatty( _fileno( (x) ) )
+#else
+#define xisatty(x) isatty( fileno( (x) ) )
+#endif
+
+//---------------------------------------------------------------------------------------------------
+
+// Print usage to stderr
+static
+void Help(void)
+{           
+
+    fprintf(stderr, "usage: transicc [flags] [CGATS input] [CGATS output]\n\n");
+
+    fprintf(stderr, "flags:\n\n");
+    fprintf(stderr, "%cv<0..3> - Verbosity level\n", SW); 
+
+    fprintf(stderr, "%ce[op] - Encoded representation of numbers\n", SW);
+    fprintf(stderr, "\t%cw - use 16 bits\n", SW);     
+    fprintf(stderr, "\t%cx - Hexadecimal\n\n", SW);
+
+    fprintf(stderr, "%cs - bounded mode (clip negatives and highliths)\n", SW);
+    fprintf(stderr, "%cq - Quantize (round decimals)\n\n", SW);
+
+    fprintf(stderr, "%ci<profile> - Input profile (defaults to sRGB)\n", SW);
+    fprintf(stderr, "%co<profile> - Output profile (defaults to sRGB)\n", SW);   
+    fprintf(stderr, "%cl<profile> - Transform by device-link profile\n", SW);   
+
+    fprintf(stderr, "\nYou can use '*Lab', '*xyz' and others as built-in profiles\n\n");
+
+    PrintRenderingIntents();
+
+    fprintf(stderr, "\n");
+
+    fprintf(stderr, "%cd<0..1> - Observer adaptation state (abs.col. only)\n\n", SW);
+
+    fprintf(stderr, "%cb - Black point compensation\n", SW);
+
+    fprintf(stderr, "%cc<0,1,2,3> Precalculates transform (0=Off, 1=Normal, 2=Hi-res, 3=LoRes)\n\n", SW);     
+    fprintf(stderr, "%cn - Terse output, intended for pipe usage\n", SW);
+
+    fprintf(stderr, "%cp<profile> - Soft proof profile\n", SW);
+    fprintf(stderr, "%cm<0,1,2,3> - Soft proof intent\n", SW);
+    fprintf(stderr, "%cg - Marks out-of-gamut colors on softproof\n\n", SW);
+
+
+
+    fprintf(stderr, "This program is intended to be a demo of the little cms\n"
+        "engine. Both lcms and this program are freeware. You can\n"
+        "obtain both in source code at http://www.littlecms.com\n"
+        "For suggestions, comments, bug reports etc. send mail to\n"
+        "info@littlecms.com\n\n");
+}
+
+
+
+// The toggles stuff
+
+static
+void HandleSwitches(int argc, char *argv[])
+{
+    int s;
+
+    while ((s = xgetopt(argc, argv,
+        "bBC:c:d:D:eEgGI:i:L:l:m:M:nNO:o:p:P:QqSsT:t:V:v:WwxX!:")) != EOF) {
+
+    switch (s){
+
+        case '!': 
+            IncludePart = xoptarg;
+            break;
+
+        case 'b':
+        case 'B': 
+            BlackPointCompensation = TRUE;
+            break;
+
+        case 'c':
+        case 'C':
+            PrecalcMode = atoi(xoptarg);
+            if (PrecalcMode < 0 || PrecalcMode > 3)
+                FatalError("Unknown precalc mode '%d'", PrecalcMode);
+            break;
+
+        case 'd':
+        case 'D': {
+            cmsFloat64Number ObserverAdaptationState = atof(xoptarg);
+            if (ObserverAdaptationState < 0 || 
+                ObserverAdaptationState > 1.0)
+                FatalError("Adaptation states should be between 0 and 1");
+
+            cmsSetAdaptationState(ObserverAdaptationState);
+                  }
+                  break;
+
+        case 'e':
+        case 'E': 
+            lIsFloat = FALSE;
+            break;
+
+        case 'g':
+        case 'G':
+            GamutCheck = TRUE;
+            break;
+
+        case 'i':
+        case 'I':
+            if (lIsDeviceLink)
+                FatalError("icctrans: Device-link already specified");
+
+            cInProf = xoptarg;
+            break;  
+
+        case 'l':
+        case 'L': 
+            cInProf = xoptarg;
+            lIsDeviceLink = TRUE;
+            break;
+
+            // No extra intents for proofing
+        case 'm':
+        case 'M':
+            ProofingIntent = atoi(xoptarg);
+            if (ProofingIntent > 3)
+                FatalError("Unknown Proofing Intent '%d'", ProofingIntent);        
+            break;      
+
+            // For compatibility
+        case 'n':
+        case 'N':
+            Verbose = 0;
+            break;
+
+            // Output profile        
+        case 'o':
+        case 'O':
+            if (lIsDeviceLink)
+                FatalError("icctrans: Device-link already specified"); 
+            cOutProf = xoptarg;
+            break;
+
+            // Proofing profile
+        case 'p':
+        case 'P':
+            cProofing = xoptarg;
+            break;      
+
+            // Quantize (get rid of decimals)
+        case 'q':
+        case 'Q': 
+            lQuantize = TRUE;
+            break;
+
+            // Inhibit unbounded mode
+        case 's':
+        case 'S':
+               lUnbounded = FALSE;
+               break;
+
+            // The intent
+        case 't':
+        case 'T':
+            Intent = atoi(xoptarg);            
+            break;
+
+            // Verbosity level
+        case 'V':
+        case 'v':
+            Verbose = atoi(xoptarg);
+            if (Verbose < 0 || Verbose > 3) {
+                FatalError("Unknown verbosity level '%d'", Verbose);
+            }
+            break;
+
+            // Wide (16 bits)
+        case 'W':
+        case 'w':
+            Width16 = TRUE;
+            break;
+
+            // Hexadecimal        
+        case 'x':
+        case 'X':
+            InHexa = TRUE;
+            break;
+
+        default:            
+            FatalError("Unknown option - run without args to see valid ones.\n");
+            }       
+    }
+
+
+    // If output CGATS involved, switch to float
+    if ((argc - xoptind) > 2) {
+        lIsFloat = TRUE;
+    }
+}
+
+
+
+static
+void SetRange(cmsFloat64Number range, cmsBool IsInput)
+{
+    if (IsInput)
+        InputRange = range;
+    else
+        OutputRange = range;
+}
+
+// Populate a named color list with usual component names. 
+// I am using the first Colorant channel to store the range, but it works since 
+// this space is not used anyway.
+static
+cmsNAMEDCOLORLIST* ComponentNames(cmsColorSpaceSignature space, cmsBool IsInput)
+{
+    cmsNAMEDCOLORLIST* out;
+    int i, n;
+    char Buffer[cmsMAX_PATH];
+
+    out = cmsAllocNamedColorList(0, 12, cmsMAXCHANNELS, "", "");
+    if (out == NULL) return NULL;
+
+    switch (space) {
+
+    case cmsSigXYZData:
+        SetRange(100, IsInput);
+        cmsAppendNamedColor(out, "X", NULL, NULL);
+        cmsAppendNamedColor(out, "Y", NULL, NULL);
+        cmsAppendNamedColor(out, "Z", NULL, NULL);
+        break;
+
+    case cmsSigLabData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "L*", NULL, NULL);
+        cmsAppendNamedColor(out, "a*", NULL, NULL);
+        cmsAppendNamedColor(out, "b*", NULL, NULL);
+        break;
+
+    case cmsSigLuvData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "L", NULL, NULL);
+        cmsAppendNamedColor(out, "u", NULL, NULL);
+        cmsAppendNamedColor(out, "v", NULL, NULL);
+        break;
+
+    case cmsSigYCbCrData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "Y", NULL, NULL );
+        cmsAppendNamedColor(out, "Cb", NULL, NULL);
+        cmsAppendNamedColor(out, "Cr", NULL, NULL);
+        break;
+
+
+    case cmsSigYxyData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "Y", NULL, NULL);
+        cmsAppendNamedColor(out, "x", NULL, NULL);
+        cmsAppendNamedColor(out, "y", NULL, NULL);
+        break;
+
+    case cmsSigRgbData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "R", NULL, NULL);
+        cmsAppendNamedColor(out, "G", NULL, NULL);
+        cmsAppendNamedColor(out, "B", NULL, NULL);
+        break;
+
+    case cmsSigGrayData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "G", NULL, NULL);      
+        break;
+
+    case cmsSigHsvData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "H", NULL, NULL);
+        cmsAppendNamedColor(out, "s", NULL, NULL);
+        cmsAppendNamedColor(out, "v", NULL, NULL);
+        break;
+
+    case cmsSigHlsData:
+        SetRange(255, IsInput);
+        cmsAppendNamedColor(out, "H", NULL, NULL);
+        cmsAppendNamedColor(out, "l", NULL, NULL);
+        cmsAppendNamedColor(out, "s", NULL, NULL);
+        break;
+
+    case cmsSigCmykData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "C", NULL, NULL);
+        cmsAppendNamedColor(out, "M", NULL, NULL);
+        cmsAppendNamedColor(out, "Y", NULL, NULL);                     
+        cmsAppendNamedColor(out, "K", NULL, NULL);
+        break;
+
+    case cmsSigCmyData:
+        SetRange(1, IsInput);
+        cmsAppendNamedColor(out, "C", NULL, NULL);
+        cmsAppendNamedColor(out, "M", NULL, NULL);
+        cmsAppendNamedColor(out, "Y", NULL, NULL);
+        break;
+
+    default:
+
+        SetRange(1, IsInput);
+
+        n = cmsChannelsOf(space);
+
+        for (i=0; i < n; i++) {
+
+            sprintf(Buffer, "Channel #%d", i + 1);
+            cmsAppendNamedColor(out, Buffer, NULL, NULL);
+        }
+    }
+
+    return out;
+
+}
+
+
+// Creates all needed color transforms
+static
+cmsBool OpenTransforms(void)
+{
+    cmsHPROFILE hInput, hOutput, hProof;
+    cmsUInt32Number dwIn, dwOut, dwFlags;
+    cmsNAMEDCOLORLIST* List;
+    int i;
+
+    // We don't need cache
+    dwFlags = cmsFLAGS_NOCACHE;
+
+    if (lIsDeviceLink) {
+
+        hInput  = OpenStockProfile(0, cInProf);
+        if (hInput == NULL) return FALSE; 
+        hOutput = NULL;
+        hProof  = NULL;
+
+        if (cmsGetDeviceClass(hInput) == cmsSigNamedColorClass) {
+            OutputColorSpace  = cmsGetColorSpace(hInput);
+            InputColorSpace = cmsGetPCS(hInput);
+        }
+        else {
+            InputColorSpace  = cmsGetColorSpace(hInput);
+            OutputColorSpace = cmsGetPCS(hInput);
+        }
+
+        // Read colorant tables if present
+        if (cmsIsTag(hInput, cmsSigColorantTableTag)) {
+            List = cmsReadTag(hInput, cmsSigColorantTableTag);
+            InputColorant = cmsDupNamedColorList(List);
+            InputRange = 1;
+        }
+        else InputColorant = ComponentNames(InputColorSpace, TRUE);
+
+        if (cmsIsTag(hInput, cmsSigColorantTableOutTag)){
+
+            List = cmsReadTag(hInput, cmsSigColorantTableOutTag);
+            OutputColorant = cmsDupNamedColorList(List);
+            OutputRange = 1;
+        }
+        else OutputColorant = ComponentNames(OutputColorSpace, FALSE);
+
+    }
+    else {
+
+        hInput  = OpenStockProfile(0, cInProf);
+        if (hInput == NULL) return FALSE;
+
+        hOutput = OpenStockProfile(0, cOutProf);    
+        if (hOutput == NULL) return FALSE;
+        hProof  = NULL;
+
+
+        if (cmsGetDeviceClass(hInput) == cmsSigLinkClass ||
+            cmsGetDeviceClass(hOutput) == cmsSigLinkClass)   
+            FatalError("Use %cl flag for devicelink profiles!\n", SW);
+
+
+        InputColorSpace   = cmsGetColorSpace(hInput);
+        OutputColorSpace  = cmsGetColorSpace(hOutput);
+
+        // Read colorant tables if present
+        if (cmsIsTag(hInput, cmsSigColorantTableTag)) {
+            List = cmsReadTag(hInput, cmsSigColorantTableTag);
+            InputColorant = cmsDupNamedColorList(List);
+            if (cmsNamedColorCount(InputColorant) <= 3) 
+                SetRange(255, TRUE);
+            else
+                SetRange(1, TRUE);  // Inks are already divided by 100 in the formatter
+
+        }
+        else InputColorant = ComponentNames(InputColorSpace, TRUE);
+
+        if (cmsIsTag(hOutput, cmsSigColorantTableTag)){
+
+            List = cmsReadTag(hOutput, cmsSigColorantTableTag);
+            OutputColorant = cmsDupNamedColorList(List);
+            if (cmsNamedColorCount(OutputColorant) <= 3) 
+                SetRange(255, FALSE);
+            else
+                SetRange(1, FALSE);  // Inks are already divided by 100 in the formatter
+        }
+        else OutputColorant = ComponentNames(OutputColorSpace, FALSE);
+
+
+        if (cProofing != NULL) {
+
+            hProof = OpenStockProfile(0, cProofing);
+            if (hProof == NULL) return FALSE;
+            dwFlags |= cmsFLAGS_SOFTPROOFING;
+        }
+    }
+
+    // Print information on profiles
+    if (Verbose > 2) {
+
+        printf("Profile:\n");
+        PrintProfileInformation(hInput);
+
+        if (hOutput) {
+
+            printf("Output profile:\n");
+            PrintProfileInformation(hOutput);
+        }  
+
+        if (hProof != NULL) {
+            printf("Proofing profile:\n");
+            PrintProfileInformation(hProof);
+        }
+    }
+
+
+    // Input is always in floating point
+    dwIn  = cmsFormatterForColorspaceOfProfile(hInput, 0, TRUE);
+
+    if (lIsDeviceLink) {
+
+        dwOut = cmsFormatterForPCSOfProfile(hInput, lIsFloat ? 0 : 2, lIsFloat);
+    }
+    else {
+
+        // 16 bits or floating point (only on output)   
+        dwOut = cmsFormatterForColorspaceOfProfile(hOutput, lIsFloat ? 0 : 2, lIsFloat);
+    }
+
+    // For named color, there is a specialized formatter
+    if (cmsGetDeviceClass(hInput) == cmsSigNamedColorClass) {
+        
+        dwIn = TYPE_NAMED_COLOR_INDEX;
+        InputNamedColor = TRUE;
+    }
+
+    // Precision mode
+    switch (PrecalcMode) {
+
+       case 0: dwFlags |= cmsFLAGS_NOOPTIMIZE; break;
+       case 2: dwFlags |= cmsFLAGS_HIGHRESPRECALC; break;
+       case 3: dwFlags |= cmsFLAGS_LOWRESPRECALC; break;
+       case 1: break;
+
+       default: 
+           FatalError("Unknown precalculation mode '%d'", PrecalcMode);
+    }
+
+
+    if (BlackPointCompensation) 
+        dwFlags |= cmsFLAGS_BLACKPOINTCOMPENSATION;
+
+
+    if (GamutCheck) {
+
+        cmsUInt16Number Alarm[cmsMAXCHANNELS];
+
+        if (hProof == NULL)
+            FatalError("I need proofing profile -p for gamut checking!");
+
+        for (i=0; i < cmsMAXCHANNELS; i++)
+            Alarm[i] = 0xFFFF;
+
+        cmsSetAlarmCodes(Alarm);
+        dwFlags |= cmsFLAGS_GAMUTCHECK;            
+    }
+
+
+    // The main transform
+    hTrans = cmsCreateProofingTransform(hInput,  dwIn, hOutput, dwOut, hProof, Intent, ProofingIntent, dwFlags);
+
+    if (hProof) cmsCloseProfile(hProof);
+
+    if (hTrans == NULL) return FALSE;
+
+
+    // PCS Dump if requested
+    hTransXYZ = NULL; hTransLab = NULL;
+
+    if (hOutput && Verbose > 1) {
+
+        cmsHPROFILE hXYZ = cmsCreateXYZProfile();
+        cmsHPROFILE hLab = cmsCreateLab4Profile(NULL);
+
+        hTransXYZ = cmsCreateTransform(hInput, dwIn, hXYZ,  lIsFloat ? TYPE_XYZ_DBL : TYPE_XYZ_16, Intent, cmsFLAGS_NOCACHE);        
+        if (hTransXYZ == NULL) return FALSE;
+
+        hTransLab = cmsCreateTransform(hInput, dwIn, hLab,  lIsFloat? TYPE_Lab_DBL : TYPE_Lab_16, Intent, cmsFLAGS_NOCACHE);    
+        if (hTransLab == NULL) return FALSE;
+
+        cmsCloseProfile(hXYZ);
+        cmsCloseProfile(hLab);
+    } 
+
+    if (hInput) cmsCloseProfile(hInput);
+    if (hOutput) cmsCloseProfile(hOutput); 
+
+    return TRUE;
+}
+
+
+// Free open resources
+static
+void CloseTransforms(void)
+{
+    if (InputColorant) cmsFreeNamedColorList(InputColorant);
+    if (OutputColorant) cmsFreeNamedColorList(OutputColorant);
+
+    if (hTrans) cmsDeleteTransform(hTrans);
+    if (hTransLab) cmsDeleteTransform(hTransLab);
+    if (hTransXYZ) cmsDeleteTransform(hTransXYZ);
+
+}
+
+// ---------------------------------------------------------------------------------------------------
+
+// Get input from user
+static
+void GetLine(char* Buffer, const char* frm, ...)
+{    
+    int res;
+    va_list args;
+
+    va_start(args, frm);
+
+    do {
+        if (xisatty(stdin)) 
+            vfprintf(stderr, frm, args);
+
+        res = scanf("%4095s", Buffer);
+
+        if (res < 0 || toupper(Buffer[0]) == 'Q') { // Quit?
+
+            CloseTransforms();
+
+            if (xisatty(stdin))  
+                fprintf(stderr, "Done.\n");
+
+            exit(0);        
+        }
+    } while (res == 0);
+
+    va_end(args);  
+}
+
+
+// Print a value which is given in double floating point
+static
+void PrintFloatResults(cmsFloat64Number Value[])
+{
+    cmsUInt32Number i, n;
+    char ChannelName[cmsMAX_PATH];
+    cmsFloat64Number v;
+
+    n = cmsChannelsOf(OutputColorSpace);
+    for (i=0; i < n; i++) {
+
+        if (OutputColorant != NULL) {
+
+            cmsNamedColorInfo(OutputColorant, i, ChannelName, NULL, NULL, NULL, NULL);         
+        }
+        else {
+            OutputRange = 1;
+            sprintf(ChannelName, "Channel #%u", i + 1);
+        }
+
+        v = (cmsFloat64Number) Value[i]* OutputRange;
+
+        if (lQuantize) 
+            v = floor(v + 0.5);
+
+        if (!lUnbounded) {
+
+               if (v < 0)
+                      v = 0;
+               if (v > OutputRange)
+                      v = OutputRange;
+        }
+
+        if (Verbose <= 0)
+            printf("%.4f ", v);
+        else
+            printf("%s=%.4f ", ChannelName, v);
+    }   
+
+    printf("\n");
+}
+
+
+// Get a named-color index
+static
+cmsUInt16Number GetIndex(void)
+{
+    char Buffer[4096], Name[cmsMAX_PATH], Prefix[40], Suffix[40];
+    int index, max;
+    const cmsNAMEDCOLORLIST* NamedColorList;
+    
+    NamedColorList = cmsGetNamedColorList(hTrans);
+    if (NamedColorList == NULL) return 0;
+
+    max = cmsNamedColorCount(NamedColorList)-1;
+
+    GetLine(Buffer, "Color index (0..%d)? ", max);
+    index = atoi(Buffer);
+
+    if (index > max)
+        FatalError("Named color %d out of range!", index);
+
+    cmsNamedColorInfo(NamedColorList, index, Name, Prefix, Suffix, NULL, NULL);
+
+    printf("\n%s %s %s\n", Prefix, Name, Suffix);
+
+    return (cmsUInt16Number) index;
+}
+
+// Read values from a text file or terminal
+static
+void TakeFloatValues(cmsFloat64Number Float[])
+{
+    cmsUInt32Number i, n;
+    char ChannelName[cmsMAX_PATH];
+    char Buffer[4096];
+
+    if (xisatty(stdin))
+        fprintf(stderr, "\nEnter values, 'q' to quit\n");
+
+    if (InputNamedColor) {
+
+        // This is named color index, which is always cmsUInt16Number
+        cmsUInt16Number index = GetIndex();
+        memcpy(Float, &index, sizeof(cmsUInt16Number));
+        return;
+    }
+
+    n = cmsChannelsOf(InputColorSpace);
+    for (i=0; i < n; i++) {
+
+        if (InputColorant) {
+            cmsNamedColorInfo(InputColorant, i, ChannelName, NULL, NULL, NULL, NULL);          
+        }
+        else {
+            InputRange = 1;
+            sprintf(ChannelName, "Channel #%u", i+1);
+        }
+
+        GetLine(Buffer, "%s? ", ChannelName);
+
+        Float[i] = (cmsFloat64Number) atof(Buffer) / InputRange;
+    }       
+
+    if (xisatty(stdin))
+        fprintf(stderr, "\n");
+}
+
+static
+void PrintPCSFloat(cmsFloat64Number Input[])
+{
+    if (Verbose > 1 && hTransXYZ && hTransLab) {
+
+        cmsCIEXYZ XYZ = { 0, 0, 0 };
+        cmsCIELab Lab = { 0, 0, 0 };
+
+        if (hTransXYZ) cmsDoTransform(hTransXYZ, Input, &XYZ, 1);
+        if (hTransLab) cmsDoTransform(hTransLab, Input, &Lab, 1);
+
+        printf("[PCS] Lab=(%.4f,%.4f,%.4f) XYZ=(%.4f,%.4f,%.4f)\n", Lab.L, Lab.a, Lab.b, 
+            XYZ.X * 100.0, XYZ.Y * 100.0, XYZ.Z * 100.0);
+
+    }
+}
+
+
+
+
+// -----------------------------------------------------------------------------------------------
+
+static
+void PrintEncodedResults(cmsUInt16Number Encoded[])
+{
+    cmsUInt32Number i, n;
+    char ChannelName[cmsMAX_PATH];
+    cmsUInt32Number v;
+
+    n = cmsChannelsOf(OutputColorSpace);
+    for (i=0; i < n; i++) {
+
+        if (OutputColorant != NULL) {
+
+            cmsNamedColorInfo(OutputColorant, i, ChannelName, NULL, NULL, NULL, NULL);          
+        }
+        else {          
+            sprintf(ChannelName, "Channel #%u", i + 1);
+        }
+
+        if (Verbose > 0)
+            printf("%s=", ChannelName);
+
+        v = Encoded[i];
+
+        if (InHexa) {
+
+            if (Width16)
+                printf("0x%04X ", (int) floor(v + .5));
+            else
+                printf("0x%02X ", (int) floor(v / 257. + .5));
+
+        } else {
+
+            if (Width16)
+                printf("%d ", (int) floor(v + .5));
+            else
+                printf("%d ", (int) floor(v / 257. + .5));
+        }
+
+    }   
+
+    printf("\n");
+}
+
+// Print XYZ/Lab values on verbose mode
+
+static
+void PrintPCSEncoded(cmsFloat64Number Input[])
+{
+    if (Verbose > 1 && hTransXYZ && hTransLab) {
+
+        cmsUInt16Number XYZ[3], Lab[3];
+
+        if (hTransXYZ) cmsDoTransform(hTransXYZ, Input, XYZ, 1);
+        if (hTransLab) cmsDoTransform(hTransLab, Input, Lab, 1);
+
+        printf("[PCS] Lab=(0x%04X,0x%04X,0x%04X) XYZ=(0x%04X,0x%04X,0x%04X)\n", Lab[0], Lab[1], Lab[2], 
+            XYZ[0], XYZ[1], XYZ[2]);
+
+    }
+}
+
+
+// --------------------------------------------------------------------------------------
+
+
+
+// Take a value from IT8 and scale it accordly to fill a cmsUInt16Number (0..FFFF)
+
+static
+cmsFloat64Number GetIT8Val(const char* Name, cmsFloat64Number Max)
+{
+    const char* Val = cmsIT8GetData(hIT8in, CGATSPatch, Name);
+
+    if (Val == NULL) 
+        FatalError("Field '%s' not found", Name);
+
+    return atof(Val) / Max;
+
+}
+
+
+// Read input values from CGATS file.
+
+static
+    void TakeCGATSValues(int nPatch, cmsFloat64Number Float[])
+{
+
+    // At first take the name if SAMPLE_ID is present
+    if (cmsIT8GetPatchName(hIT8in, nPatch, CGATSPatch) == NULL) {
+        FatalError("Sorry, I need 'SAMPLE_ID' on input CGATS to operate.");
+    }
+
+
+    // Special handling for named color profiles. 
+    // Lookup the name in the names database (the transform)
+
+    if (InputNamedColor) {
+
+        const cmsNAMEDCOLORLIST* NamedColorList;
+        int index;
+
+        NamedColorList = cmsGetNamedColorList(hTrans);
+        if (NamedColorList == NULL) 
+            FatalError("Malformed named color profile");
+
+        index = cmsNamedColorIndex(NamedColorList, CGATSPatch);
+        if (index < 0) 
+            FatalError("Named color '%s' not found in the profile", CGATSPatch); 
+
+        Float[0] = index;
+        return;
+    }
+
+    // Color is not a spot color, proceed.
+
+    switch (InputColorSpace) {
+
+        // Encoding should follow CGATS specification.
+
+    case cmsSigXYZData:
+        Float[0] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "XYZ_X") / 100.0;
+        Float[1] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "XYZ_Y") / 100.0;
+        Float[2] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "XYZ_Z") / 100.0;        
+        break;
+
+    case cmsSigLabData:
+        Float[0] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "LAB_L");
+        Float[1] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "LAB_A");
+        Float[2] = cmsIT8GetDataDbl(hIT8in, CGATSPatch, "LAB_B");        
+        break;
+
+
+    case cmsSigRgbData:
+        Float[0] = GetIT8Val("RGB_R", 255.0);
+        Float[1] = GetIT8Val("RGB_G", 255.0);
+        Float[2] = GetIT8Val("RGB_B", 255.0);
+        break;
+
+    case cmsSigGrayData:
+        Float[0] = GetIT8Val("GRAY", 255.0);
+        break;
+
+    case cmsSigCmykData:
+        Float[0] = GetIT8Val("CMYK_C", 1.0);
+        Float[1] = GetIT8Val("CMYK_M", 1.0);
+        Float[2] = GetIT8Val("CMYK_Y", 1.0);
+        Float[3] = GetIT8Val("CMYK_K", 1.0);
+        break;
+
+    case cmsSigCmyData:                        
+        Float[0] = GetIT8Val("CMY_C", 1.0);
+        Float[1] = GetIT8Val("CMY_M", 1.0);
+        Float[2] = GetIT8Val("CMY_Y", 1.0);
+        break;
+
+    case cmsSig1colorData:
+    case cmsSig2colorData:
+    case cmsSig3colorData:
+    case cmsSig4colorData:
+    case cmsSig5colorData:
+    case cmsSig6colorData:
+    case cmsSig7colorData:
+    case cmsSig8colorData:
+    case cmsSig9colorData:
+    case cmsSig10colorData:
+    case cmsSig11colorData:
+    case cmsSig12colorData:
+    case cmsSig13colorData:
+    case cmsSig14colorData:
+    case cmsSig15colorData:
+        {
+            cmsUInt32Number i, n;
+
+            n = cmsChannelsOf(InputColorSpace);
+            for (i=0; i < n; i++) { 
+
+                char Buffer[255];
+
+                sprintf(Buffer, "%uCLR_%u", n, i+1);
+                Float[i] = GetIT8Val(Buffer, 100.0);
+            }
+
+        }
+        break;
+
+    default: 
+        {
+            cmsUInt32Number i, n;
+
+            n = cmsChannelsOf(InputColorSpace);
+            for (i=0; i < n; i++) { 
+
+                char Buffer[255];
+
+                sprintf(Buffer, "CHAN_%u", i+1);
+                Float[i] = GetIT8Val(Buffer, 1.0);
+            }
+
+        }
+    }
+
+}
+
+static
+void SetCGATSfld(const char* Col, cmsFloat64Number Val)
+{
+    if (lQuantize) 
+        Val = floor(Val + 0.5);
+
+    if (!cmsIT8SetDataDbl(hIT8out, CGATSPatch, Col, Val)) {
+        FatalError("couldn't set '%s' on output cgats '%s'", Col, CGATSoutFilename);
+    }
+}
+
+
+
+static
+void PutCGATSValues(cmsFloat64Number Float[])
+{   
+    cmsIT8SetData(hIT8out, CGATSPatch, "SAMPLE_ID", CGATSPatch);
+    switch (OutputColorSpace) {
+
+
+    // Encoding should follow CGATS specification.
+
+    case cmsSigXYZData:
+
+        SetCGATSfld("XYZ_X", Float[0] * 100.0);
+        SetCGATSfld("XYZ_Y", Float[1] * 100.0);
+        SetCGATSfld("XYZ_Z", Float[2] * 100.0);                    
+        break;
+
+    case cmsSigLabData:
+
+        SetCGATSfld("LAB_L", Float[0]);
+        SetCGATSfld("LAB_A", Float[1]);
+        SetCGATSfld("LAB_B", Float[2]);                    
+        break;
+
+
+    case cmsSigRgbData:
+        SetCGATSfld("RGB_R", Float[0] * 255.0);
+        SetCGATSfld("RGB_G", Float[1] * 255.0);
+        SetCGATSfld("RGB_B", Float[2] * 255.0);
+        break;
+
+    case cmsSigGrayData:
+        SetCGATSfld("GRAY", Float[0] * 255.0);                    
+        break;
+
+    case cmsSigCmykData:
+        SetCGATSfld("CMYK_C", Float[0]);
+        SetCGATSfld("CMYK_M", Float[1]);
+        SetCGATSfld("CMYK_Y", Float[2]);
+        SetCGATSfld("CMYK_K", Float[3]);
+        break;
+
+    case cmsSigCmyData:
+        SetCGATSfld("CMY_C", Float[0]);
+        SetCGATSfld("CMY_M", Float[1]);
+        SetCGATSfld("CMY_Y", Float[2]);                 
+        break;
+
+    case cmsSig1colorData:
+    case cmsSig2colorData:
+    case cmsSig3colorData:
+    case cmsSig4colorData:
+    case cmsSig5colorData:
+    case cmsSig6colorData:
+    case cmsSig7colorData:
+    case cmsSig8colorData:
+    case cmsSig9colorData:
+    case cmsSig10colorData:
+    case cmsSig11colorData:
+    case cmsSig12colorData:
+    case cmsSig13colorData:
+    case cmsSig14colorData:
+    case cmsSig15colorData:
+        {
+
+            cmsUInt32Number i, n;
+
+            n = cmsChannelsOf(InputColorSpace);
+            for (i=0; i < n; i++) { 
+
+                char Buffer[255];
+
+                sprintf(Buffer, "%uCLR_%u", n, i+1);
+
+                SetCGATSfld(Buffer, Float[i] * 100.0);
+            }
+        }
+        break;
+
+    default: 
+        {
+
+            cmsUInt32Number i, n;
+
+            n = cmsChannelsOf(InputColorSpace);
+            for (i=0; i < n; i++) { 
+
+                char Buffer[255];
+
+                sprintf(Buffer, "CHAN_%u", i+1);
+
+                SetCGATSfld(Buffer, Float[i]);
+            }
+        }
+    }
+}
+
+
+
+// Create data format 
+static
+void SetOutputDataFormat(void) 
+{
+    cmsIT8DefineDblFormat(hIT8out, "%.4g");
+    cmsIT8SetPropertyStr(hIT8out, "ORIGINATOR", "icctrans");
+
+    if (IncludePart != NULL) 
+        cmsIT8SetPropertyStr(hIT8out, ".INCLUDE", IncludePart);
+
+    cmsIT8SetComment(hIT8out, "Data follows");
+    cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_SETS", nMaxPatches);
+
+
+    switch (OutputColorSpace) {
+
+
+        // Encoding should follow CGATS specification.
+
+    case cmsSigXYZData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 4);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "XYZ_X");
+        cmsIT8SetDataFormat(hIT8out, 2, "XYZ_Y");
+        cmsIT8SetDataFormat(hIT8out, 3, "XYZ_Z");
+        break;
+
+    case cmsSigLabData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 4);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "LAB_L");
+        cmsIT8SetDataFormat(hIT8out, 2, "LAB_A");
+        cmsIT8SetDataFormat(hIT8out, 3, "LAB_B");
+        break;
+
+
+    case cmsSigRgbData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 4);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "RGB_R");
+        cmsIT8SetDataFormat(hIT8out, 2, "RGB_G");
+        cmsIT8SetDataFormat(hIT8out, 3, "RGB_B");
+        break;
+
+    case cmsSigGrayData:                
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 2);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "GRAY");
+        break;
+
+    case cmsSigCmykData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 5);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "CMYK_C");
+        cmsIT8SetDataFormat(hIT8out, 2, "CMYK_M");
+        cmsIT8SetDataFormat(hIT8out, 3, "CMYK_Y");
+        cmsIT8SetDataFormat(hIT8out, 4, "CMYK_K");
+        break;
+
+    case cmsSigCmyData:
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", 4);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+        cmsIT8SetDataFormat(hIT8out, 1, "CMY_C");
+        cmsIT8SetDataFormat(hIT8out, 2, "CMY_M");
+        cmsIT8SetDataFormat(hIT8out, 3, "CMY_Y");                   
+        break;
+
+    case cmsSig1colorData:
+    case cmsSig2colorData:
+    case cmsSig3colorData:
+    case cmsSig4colorData:
+    case cmsSig5colorData:
+    case cmsSig6colorData:
+    case cmsSig7colorData:
+    case cmsSig8colorData:
+    case cmsSig9colorData:
+    case cmsSig10colorData:
+    case cmsSig11colorData:
+    case cmsSig12colorData:
+    case cmsSig13colorData:
+    case cmsSig14colorData:
+    case cmsSig15colorData:
+        {
+            int i, n;
+            char Buffer[255];
+
+            n = cmsChannelsOf(OutputColorSpace);
+            cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", n+1);
+            cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+
+            for (i=1; i <= n; i++) {
+                sprintf(Buffer, "%dCLR_%d", n, i);
+                cmsIT8SetDataFormat(hIT8out, i, Buffer);
+            }
+        }
+        break;
+
+    default: {
+
+        int i, n;
+        char Buffer[255];
+
+        n = cmsChannelsOf(OutputColorSpace);
+        cmsIT8SetPropertyDbl(hIT8out, "NUMBER_OF_FIELDS", n+1);
+        cmsIT8SetDataFormat(hIT8out, 0, "SAMPLE_ID");
+
+        for (i=1; i <= n; i++) {
+            sprintf(Buffer, "CHAN_%d", i);
+            cmsIT8SetDataFormat(hIT8out, i, Buffer);
+        }
+    }
+    }
+}
+
+// Open CGATS if specified
+
+static
+void OpenCGATSFiles(int argc, char *argv[])
+{    
+    int nParams = argc - xoptind;
+
+    if (nParams >= 1)  {
+
+        hIT8in = cmsIT8LoadFromFile(0, argv[xoptind]);
+
+        if (hIT8in == NULL) 
+            FatalError("'%s' is not recognized as a CGATS file", argv[xoptind]);
+
+        nMaxPatches = (int) cmsIT8GetPropertyDbl(hIT8in, "NUMBER_OF_SETS");     
+    }
+
+    if (nParams == 2) {
+
+        hIT8out = cmsIT8Alloc(NULL);            
+        SetOutputDataFormat();
+        strncpy(CGATSoutFilename, argv[xoptind+1], cmsMAX_PATH-1);      
+    }
+
+    if (nParams > 2) FatalError("Too many CGATS files");
+}
+
+
+
+// The main sink
+int main(int argc, char *argv[])
+{    
+    cmsUInt16Number Output[cmsMAXCHANNELS];
+    cmsFloat64Number OutputFloat[cmsMAXCHANNELS];
+    cmsFloat64Number InputFloat[cmsMAXCHANNELS];
+
+    int nPatch = 0;
+
+    fprintf(stderr, "LittleCMS ColorSpace conversion calculator - 4.3 [LittleCMS %2.2f]\n", LCMS_VERSION / 1000.0);
+
+    InitUtils("transicc");
+
+    Verbose = 1;
+
+    if (argc == 1) {
+
+        Help();              
+        return 0;
+    }
+
+    HandleSwitches(argc, argv);
+
+    // Open profiles, create transforms
+    if (!OpenTransforms()) return 1;
+
+    // Open CGATS input if specified
+    OpenCGATSFiles(argc, argv);
+
+    // Main loop: read all values and convert them
+    for(;;) {
+
+        if (hIT8in != NULL) {
+
+            if (nPatch >= nMaxPatches) break;
+            TakeCGATSValues(nPatch++, InputFloat);
+
+        } else {
+
+            if (feof(stdin)) break;         
+            TakeFloatValues(InputFloat);
+
+        }
+
+        if (lIsFloat) 
+            cmsDoTransform(hTrans, InputFloat, OutputFloat, 1);
+        else
+            cmsDoTransform(hTrans, InputFloat, Output, 1);
+
+
+        if (hIT8out != NULL) {
+
+            PutCGATSValues(OutputFloat);
+        }
+        else {
+
+            if (lIsFloat) {
+                PrintFloatResults(OutputFloat); PrintPCSFloat(InputFloat);
+            }
+            else {
+                PrintEncodedResults(Output);   PrintPCSEncoded(InputFloat);      
+            }
+
+        }
+    }
+
+
+    // Cleanup
+    CloseTransforms();
+
+    if (hIT8in)
+        cmsIT8Free(hIT8in);
+
+    if (hIT8out) {      
+        cmsIT8SaveToFile(hIT8out, CGATSoutFilename);
+        cmsIT8Free(hIT8out);
+    }
+
+    // All is ok
+    return 0;     
+}
+
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/LICENSE b/codec/L2/demos/pikEnc/host/third_party/lodepng/LICENSE
new file mode 100755
index 0000000000..a5fb0603d9
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2005-2018 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+    
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/README.md b/codec/L2/demos/pikEnc/host/third_party/lodepng/README.md
new file mode 100755
index 0000000000..301bd10e76
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/README.md
@@ -0,0 +1,12 @@
+LodePNG
+-------
+
+PNG encoder and decoder in C and C++.
+
+Home page: http://lodev.org/lodepng/
+
+Only two files are needed to allow your program to read and write PNG files: lodepng.cpp and lodepng.h.
+
+For C, you can rename lodepng.cpp to lodepng.c and it'll work. C++ only adds extra API.
+
+The other files in the project are just examples, unit tests, etc...
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_4bit_palette.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_4bit_palette.cpp
new file mode 100755
index 0000000000..1490ddfc9a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_4bit_palette.cpp
@@ -0,0 +1,101 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+//g++ lodepng.cpp example_4bit_palette.cpp -ansi -pedantic -Wall -Wextra -O3
+
+
+
+/*
+LodePNG 4-bit palette example.
+This example encodes a 511x511 PNG with a 4-bit palette.
+Both image and palette contain sine waves, resulting in a sort of plasma.
+The 511 (rather than power of two 512) size is of course chosen on purpose to
+confirm that scanlines not filling up an entire byte size are working.
+
+NOTE: a PNG image with a translucent palette is perfectly valid. However there
+exist some programs that cannot correctly read those, including, surprisingly,
+Gimp 2.8 image editor (until you set mode to RGB).
+*/
+
+#include <cmath>
+#include <iostream>
+
+#include "lodepng.h"
+
+int main(int argc, char *argv[]) {
+  //check if user gave a filename
+  if(argc < 2) {
+    std::cout << "please provide a filename to save to" << std::endl;
+    return 0;
+  }
+
+  //create encoder and set settings and info (optional)
+  lodepng::State state;
+
+  //generate palette
+  for(int i = 0; i < 16; i++) {
+    unsigned char r = 127 * (1 + std::sin(5 * i * 6.28318531 / 16));
+    unsigned char g = 127 * (1 + std::sin(2 * i * 6.28318531 / 16));
+    unsigned char b = 127 * (1 + std::sin(3 * i * 6.28318531 / 16));
+    unsigned char a = 63 * (1 + std::sin(8 * i * 6.28318531 / 16)) + 128; /*alpha channel of the palette (tRNS chunk)*/
+
+    //palette must be added both to input and output color mode, because in this
+    //sample both the raw image and the expected PNG image use that palette.
+    lodepng_palette_add(&state.info_png.color, r, g, b, a);
+    lodepng_palette_add(&state.info_raw, r, g, b, a);
+  }
+
+  //both the raw image and the encoded image must get colorType 3 (palette)
+  state.info_png.color.colortype = LCT_PALETTE; //if you comment this line, and create the above palette in info_raw instead, then you get the same image in a RGBA PNG.
+  state.info_png.color.bitdepth = 4;
+  state.info_raw.colortype = LCT_PALETTE;
+  state.info_raw.bitdepth = 4;
+  state.encoder.auto_convert = 0; //we specify ourselves exactly what output PNG color mode we want
+
+  //generate some image
+  const unsigned w = 511;
+  const unsigned h = 511;
+  std::vector<unsigned char> image;
+  image.resize((w * h * 4 + 7) / 8, 0);
+  for(unsigned y = 0; y < h; y++)
+  for(unsigned x = 0; x < w; x++) {
+    size_t byte_index = (y * w + x) / 2;
+    bool byte_half = (y * w + x) % 2 == 1;
+
+    int color = (int)(4 * ((1 + std::sin(2.0 * 6.28318531 * x / (double)w))
+                         + (1 + std::sin(2.0 * 6.28318531 * y / (double)h))) );
+
+    image[byte_index] |= (unsigned char)(color << (byte_half ? 0 : 4));
+  }
+
+  //encode and save
+  std::vector<unsigned char> buffer;
+  unsigned error = lodepng::encode(buffer, image.empty() ? 0 : &image[0], w, h, state);
+  if(error) {
+    std::cout << "encoder error " << error << ": "<< lodepng_error_text(error) << std::endl;
+    return 0;
+  }
+  lodepng::save_file(buffer, argv[1]);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_bmp2png.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_bmp2png.cpp
new file mode 100755
index 0000000000..9254bdebe6
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_bmp2png.cpp
@@ -0,0 +1,125 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2010 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+Load a BMP image and convert it to a PNG image. This example also shows how
+to use other data with the same memory structure as BMP, such as the image
+format native to win32, GDI (HBITMAP, BITMAPINFO, ...) often encountered if
+you're programming for Windows in Visual Studio.
+
+This example only supports uncompressed 24-bit RGB or 32-bit RGBA bitmaps.
+For other types of BMP's, use a full fledged BMP decoder, or convert the
+bitmap to 24-bit or 32-bit format.
+
+NOTE: it overwrites the output file without warning if it exists!
+*/
+
+//g++ lodepng.cpp example_bmp2png.cpp -ansi -pedantic -Wall -Wextra -O3
+
+#include "lodepng.h"
+
+#include <iostream>
+
+//returns 0 if all went ok, non-0 if error
+//output image is always given in RGBA (with alpha channel), even if it's a BMP without alpha channel
+unsigned decodeBMP(std::vector<unsigned char>& image, unsigned& w, unsigned& h, const std::vector<unsigned char>& bmp) {
+  static const unsigned MINHEADER = 54; //minimum BMP header size
+
+  if(bmp.size() < MINHEADER) return -1;
+  if(bmp[0] != 'B' || bmp[1] != 'M') return 1; //It's not a BMP file if it doesn't start with marker 'BM'
+  unsigned pixeloffset = bmp[10] + 256 * bmp[11]; //where the pixel data starts
+  //read width and height from BMP header
+  w = bmp[18] + bmp[19] * 256;
+  h = bmp[22] + bmp[23] * 256;
+  //read number of channels from BMP header
+  if(bmp[28] != 24 && bmp[28] != 32) return 2; //only 24-bit and 32-bit BMPs are supported.
+  unsigned numChannels = bmp[28] / 8;
+
+  //The amount of scanline bytes is width of image times channels, with extra bytes added if needed
+  //to make it a multiple of 4 bytes.
+  unsigned scanlineBytes = w * numChannels;
+  if(scanlineBytes % 4 != 0) scanlineBytes = (scanlineBytes / 4) * 4 + 4;
+
+  unsigned dataSize = scanlineBytes * h;
+  if(bmp.size() < dataSize + pixeloffset) return 3; //BMP file too small to contain all pixels
+
+  image.resize(w * h * 4);
+
+  /*
+  There are 3 differences between BMP and the raw image buffer for LodePNG:
+  -it's upside down
+  -it's in BGR instead of RGB format (or BRGA instead of RGBA)
+  -each scanline has padding bytes to make it a multiple of 4 if needed
+  The 2D for loop below does all these 3 conversions at once.
+  */
+  for(unsigned y = 0; y < h; y++)
+  for(unsigned x = 0; x < w; x++) {
+    //pixel start byte position in the BMP
+    unsigned bmpos = pixeloffset + (h - y - 1) * scanlineBytes + numChannels * x;
+    //pixel start byte position in the new raw image
+    unsigned newpos = 4 * y * w + 4 * x;
+    if(numChannels == 3) {
+      image[newpos + 0] = bmp[bmpos + 2]; //R
+      image[newpos + 1] = bmp[bmpos + 1]; //G
+      image[newpos + 2] = bmp[bmpos + 0]; //B
+      image[newpos + 3] = 255;            //A
+    } else {
+      image[newpos + 0] = bmp[bmpos + 3]; //R
+      image[newpos + 1] = bmp[bmpos + 2]; //G
+      image[newpos + 2] = bmp[bmpos + 1]; //B
+      image[newpos + 3] = bmp[bmpos + 0]; //A
+    }
+  }
+  return 0;
+}
+
+int main(int argc, char *argv[]) {
+  if(argc < 3) {
+    std::cout << "Please provice input PNG and output BMP file names" << std::endl;
+    return 0;
+  }
+
+  std::vector<unsigned char> bmp;
+  lodepng::load_file(bmp, argv[1]);
+  std::vector<unsigned char> image;
+  unsigned w, h;
+  unsigned error = decodeBMP(image, w, h, bmp);
+
+  if(error) {
+    std::cout << "BMP decoding error " << error << std::endl;
+    return 0;
+  }
+
+  std::vector<unsigned char> png;
+  error = lodepng::encode(png, image, w, h);
+
+  if(error) {
+    std::cout << "PNG encoding error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return 0;
+  }
+
+  lodepng::save_file(png, argv[2]);
+
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_decode.c b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_decode.c
new file mode 100755
index 0000000000..fc8c0f2c7b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_decode.c
@@ -0,0 +1,109 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#include "lodepng.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+3 ways to decode a PNG from a file to RGBA pixel data (and 2 in-memory ways).
+*/
+
+/*
+Example 1
+Decode from disk to raw pixels with a single function call
+*/
+void decodeOneStep(const char* filename) {
+  unsigned error;
+  unsigned char* image;
+  unsigned width, height;
+
+  error = lodepng_decode32_file(&image, &width, &height, filename);
+  if(error) printf("error %u: %s\n", error, lodepng_error_text(error));
+
+  /*use image here*/
+
+  free(image);
+}
+
+/*
+Example 2
+Load PNG file from disk to memory first, then decode to raw pixels in memory.
+*/
+void decodeTwoSteps(const char* filename) {
+  unsigned error;
+  unsigned char* image;
+  unsigned width, height;
+  unsigned char* png = 0;
+  size_t pngsize;
+
+  error = lodepng_load_file(&png, &pngsize, filename);
+  if(!error) error = lodepng_decode32(&image, &width, &height, png, pngsize);
+  if(error) printf("error %u: %s\n", error, lodepng_error_text(error));
+
+  free(png);
+
+  /*use image here*/
+
+  free(image);
+}
+
+/*
+Example 3
+Load PNG file from disk using a State, normally needed for more advanced usage.
+*/
+void decodeWithState(const char* filename) {
+  unsigned error;
+  unsigned char* image;
+  unsigned width, height;
+  unsigned char* png = 0;
+  size_t pngsize;
+  LodePNGState state;
+
+  lodepng_state_init(&state);
+  /*optionally customize the state*/
+
+  error = lodepng_load_file(&png, &pngsize, filename);
+  if(!error) error = lodepng_decode(&image, &width, &height, &state, png, pngsize);
+  if(error) printf("error %u: %s\n", error, lodepng_error_text(error));
+
+  free(png);
+
+  /*use image here*/
+  /*state contains extra information about the PNG such as text chunks, ...*/
+
+  lodepng_state_cleanup(&state);
+  free(image);
+}
+
+int main(int argc, char *argv[]) {
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  decodeOneStep(filename);
+
+  return 0;
+}
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_decode.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_decode.cpp
new file mode 100755
index 0000000000..3a778408f3
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_decode.cpp
@@ -0,0 +1,91 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#include "lodepng.h"
+#include <iostream>
+
+/*
+3 ways to decode a PNG from a file to RGBA pixel data (and 2 in-memory ways).
+*/
+
+//g++ lodepng.cpp example_decode.cpp -ansi -pedantic -Wall -Wextra -O3
+
+
+//Example 1
+//Decode from disk to raw pixels with a single function call
+void decodeOneStep(const char* filename) {
+  std::vector<unsigned char> image; //the raw pixels
+  unsigned width, height;
+
+  //decode
+  unsigned error = lodepng::decode(image, width, height, filename);
+
+  //if there's an error, display it
+  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+
+  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
+}
+
+//Example 2
+//Load PNG file from disk to memory first, then decode to raw pixels in memory.
+void decodeTwoSteps(const char* filename) {
+  std::vector<unsigned char> png;
+  std::vector<unsigned char> image; //the raw pixels
+  unsigned width, height;
+
+  //load and decode
+  unsigned error = lodepng::load_file(png, filename);
+  if(!error) error = lodepng::decode(image, width, height, png);
+
+  //if there's an error, display it
+  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+
+  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
+}
+
+//Example 3
+//Load PNG file from disk using a State, normally needed for more advanced usage.
+void decodeWithState(const char* filename) {
+  std::vector<unsigned char> png;
+  std::vector<unsigned char> image; //the raw pixels
+  unsigned width, height;
+  lodepng::State state; //optionally customize this one
+
+  unsigned error = lodepng::load_file(png, filename); //load the image file with given filename
+  if(!error) error = lodepng::decode(image, width, height, state, png);
+
+  //if there's an error, display it
+  if(error) std::cout << "decoder error " << error << ": "<< lodepng_error_text(error) << std::endl;
+
+  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
+  //State state contains extra information about the PNG such as text chunks, ...
+}
+
+int main(int argc, char *argv[]) {
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  decodeOneStep(filename);
+}
+
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_encode.c b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_encode.c
new file mode 100755
index 0000000000..ce43abae3a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_encode.c
@@ -0,0 +1,111 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#include "lodepng.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+3 ways to encode a PNG from RGBA pixel data to a file (and 2 in-memory ways).
+NOTE: this samples overwrite the file or test.png without warning!
+*/
+
+/*
+Example 1
+Encode from raw pixels to disk with a single function call
+The image argument has width * height RGBA pixels or width * height * 4 bytes
+*/
+void encodeOneStep(const char* filename, const unsigned char* image, unsigned width, unsigned height) {
+  /*Encode the image*/
+  unsigned error = lodepng_encode32_file(filename, image, width, height);
+
+  /*if there's an error, display it*/
+  if(error) printf("error %u: %s\n", error, lodepng_error_text(error));
+}
+
+/*
+Example 2
+Encode from raw pixels to an in-memory PNG file first, then write it to disk
+The image argument has width * height RGBA pixels or width * height * 4 bytes
+*/
+void encodeTwoSteps(const char* filename, const unsigned char* image, unsigned width, unsigned height) {
+  unsigned char* png;
+  size_t pngsize;
+
+  unsigned error = lodepng_encode32(&png, &pngsize, image, width, height);
+  if(!error) lodepng_save_file(png, pngsize, filename);
+
+  /*if there's an error, display it*/
+  if(error) printf("error %u: %s\n", error, lodepng_error_text(error));
+
+  free(png);
+}
+
+/*
+Example 3
+Save a PNG file to disk using a State, normally needed for more advanced usage.
+The image argument has width * height RGBA pixels or width * height * 4 bytes
+*/
+void encodeWithState(const char* filename, const unsigned char* image, unsigned width, unsigned height) {
+  unsigned error;
+  unsigned char* png;
+  size_t pngsize;
+  LodePNGState state;
+
+  lodepng_state_init(&state);
+  /*optionally customize the state*/
+
+  error = lodepng_encode(&png, &pngsize, image, width, height, &state);
+  if(!error) lodepng_save_file(png, pngsize, filename);
+
+  /*if there's an error, display it*/
+  if(error) printf("error %u: %s\n", error, lodepng_error_text(error));
+
+  lodepng_state_cleanup(&state);
+  free(png);
+}
+
+int main(int argc, char *argv[]) {
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  /*generate some image*/
+  unsigned width = 512, height = 512;
+  unsigned char* image = malloc(width * height * 4);
+  unsigned x, y;
+  for(y = 0; y < height; y++)
+  for(x = 0; x < width; x++) {
+    image[4 * width * y + 4 * x + 0] = 255 * !(x & y);
+    image[4 * width * y + 4 * x + 1] = x ^ y;
+    image[4 * width * y + 4 * x + 2] = x | y;
+    image[4 * width * y + 4 * x + 3] = 255;
+  }
+
+  /*run an example*/
+  encodeOneStep(filename, image, width, height);
+
+  free(image);
+  return 0;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_encode.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_encode.cpp
new file mode 100755
index 0000000000..228ac0306b
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_encode.cpp
@@ -0,0 +1,92 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#include "lodepng.h"
+#include <iostream>
+
+/*
+3 ways to encode a PNG from RGBA pixel data to a file (and 2 in-memory ways).
+NOTE: this samples overwrite the file or test.png without warning!
+*/
+
+//g++ lodepng.cpp examples/example_encode.cpp -I./ -ansi -pedantic -Wall -Wextra -O3
+
+//Example 1
+//Encode from raw pixels to disk with a single function call
+//The image argument has width * height RGBA pixels or width * height * 4 bytes
+void encodeOneStep(const char* filename, std::vector<unsigned char>& image, unsigned width, unsigned height) {
+  //Encode the image
+  unsigned error = lodepng::encode(filename, image, width, height);
+
+  //if there's an error, display it
+  if(error) std::cout << "encoder error " << error << ": "<< lodepng_error_text(error) << std::endl;
+}
+
+//Example 2
+//Encode from raw pixels to an in-memory PNG file first, then write it to disk
+//The image argument has width * height RGBA pixels or width * height * 4 bytes
+void encodeTwoSteps(const char* filename, std::vector<unsigned char>& image, unsigned width, unsigned height) {
+  std::vector<unsigned char> png;
+
+  unsigned error = lodepng::encode(png, image, width, height);
+  if(!error) lodepng::save_file(png, filename);
+
+  //if there's an error, display it
+  if(error) std::cout << "encoder error " << error << ": "<< lodepng_error_text(error) << std::endl;
+}
+
+//Example 3
+//Save a PNG file to disk using a State, normally needed for more advanced usage.
+//The image argument has width * height RGBA pixels or width * height * 4 bytes
+void encodeWithState(const char* filename, std::vector<unsigned char>& image, unsigned width, unsigned height) {
+  std::vector<unsigned char> png;
+  lodepng::State state; //optionally customize this one
+
+  unsigned error = lodepng::encode(png, image, width, height, state);
+  if(!error) lodepng::save_file(png, filename);
+
+  //if there's an error, display it
+  if(error) std::cout << "encoder error " << error << ": "<< lodepng_error_text(error) << std::endl;
+}
+
+//saves image to filename given as argument. Warning, this overwrites the file without warning!
+int main(int argc, char *argv[]) {
+  //NOTE: this sample will overwrite the file or test.png without warning!
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  //generate some image
+  unsigned width = 512, height = 512;
+  std::vector<unsigned char> image;
+  image.resize(width * height * 4);
+  for(unsigned y = 0; y < height; y++)
+  for(unsigned x = 0; x < width; x++) {
+    image[4 * width * y + 4 * x + 0] = 255 * !(x & y);
+    image[4 * width * y + 4 * x + 1] = x ^ y;
+    image[4 * width * y + 4 * x + 2] = x | y;
+    image[4 * width * y + 4 * x + 3] = 255;
+  }
+
+  encodeOneStep(filename, image, width, height);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_encode_type.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_encode_type.cpp
new file mode 100755
index 0000000000..c133b2417e
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_encode_type.cpp
@@ -0,0 +1,76 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2015 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+//g++ -I ./ lodepng.cpp examples/example_encode_type.cpp -ansi -pedantic -Wall -Wextra -O3
+
+
+
+/*
+This example shows how to enforce a certain color type of the PNG image when
+encoding a PNG (because by default, LodePNG automatically chooses an optimal
+color type, no matter what your raw data's color type is)
+*/
+
+#include <cmath>
+#include <iostream>
+
+#include "lodepng.h"
+
+int main(int argc, char *argv[]) {
+  //check if user gave a filename
+  if(argc < 2) {
+    std::cout << "please provide a filename to save to" << std::endl;
+    return 0;
+  }
+
+  //generate some image
+  const unsigned w = 256;
+  const unsigned h = 256;
+  std::vector<unsigned char> image(w * h * 4);
+  for(unsigned y = 0; y < h; y++)
+  for(unsigned x = 0; x < w; x++) {
+    int index = y * w * 4 + x * 4;
+    image[index + 0] = 0;
+    image[index + 1] = 0;
+    image[index + 2] = 0;
+    image[index + 3] = 255;
+  }
+
+  // we're going to encode with a state rather than a convenient function, because enforcing a color type requires setting options
+  lodepng::State state;
+  // input color type
+  state.info_raw.colortype = LCT_RGBA;
+  state.info_raw.bitdepth = 8;
+  // output color type
+  state.info_png.color.colortype = LCT_RGBA;
+  state.info_png.color.bitdepth = 8;
+  state.encoder.auto_convert = 0; // without this, it would ignore the output color type specified above and choose an optimal one instead
+
+  //encode and save
+  std::vector<unsigned char> buffer;
+  unsigned error = lodepng::encode(buffer, &image[0], w, h, state);
+  if(error) std::cout << "encoder error " << error << ": "<< lodepng_error_text(error) << std::endl;
+  else lodepng::save_file(buffer, argv[1]);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_gzip.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_gzip.cpp
new file mode 100755
index 0000000000..60545aa38d
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_gzip.cpp
@@ -0,0 +1,91 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#include "lodepng.h"
+#include <iostream>
+#include <stdlib.h>
+
+/*
+Encodes given file as a gzip file.
+
+See also the gzip specification, RFC 1952: http://www.gzip.org/zlib/rfc-gzip.html
+*/
+
+//g++ lodepng.cpp example_gzip.cpp -ansi -pedantic -Wall -Wextra -O3
+
+//saves image to filename given as argument. Warning, this overwrites the file without warning!
+int main(int argc, char *argv[]) {
+  if(argc < 2) {
+    std::cout << "Please provide input filename (output is input with .gz)" << std::endl;
+    return 0;
+  }
+
+  //NOTE: this sample will overwrite the output file without warning!
+  std::string infilename = argv[1];
+  std::string outfilename = infilename + ".gz";
+
+  std::vector<unsigned char> in;
+  lodepng::load_file(in, infilename);
+
+  size_t outsize = 10;
+  unsigned char* out = (unsigned char*)malloc(outsize);
+  out[0] = 31;  //ID1
+  out[1] = 139; //ID2
+  out[2] = 8; //CM
+  out[3] = 0; //FLG
+  //MTIME
+  out[4] = 0;
+  out[5] = 0;
+  out[6] = 0;
+  out[7] = 0;
+
+  out[8] = 2; //2 = slow, 4 = fast compression
+  out[9] = 255; //OS unknown
+
+  lodepng_deflate(&out, &outsize, &in[0], in.size(), &lodepng_default_compress_settings);
+
+  unsigned crc = lodepng_crc32(&in[0], in.size());
+
+  size_t footer = outsize;
+
+  outsize += 8;
+  out = (unsigned char*)realloc(out, outsize);
+
+  //CRC
+  out[footer + 0] = crc % 256;
+  out[footer + 1] = (crc >> 8) % 256;
+  out[footer + 2] = (crc >> 16) % 256;
+  out[footer + 3] = (crc >> 24) % 256;
+
+  //ISIZE
+  out[footer + 4] = in.size() % 256;
+  out[footer + 5] = (in.size() >> 8) % 256;
+  out[footer + 6] = (in.size() >> 16) % 256;
+  out[footer + 7] = (in.size() >> 24) % 256;
+
+  lodepng_save_file(out, outsize, outfilename.c_str());
+
+  free(out);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_opengl.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_opengl.cpp
new file mode 100755
index 0000000000..a4352ebdad
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_opengl.cpp
@@ -0,0 +1,153 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+//Compile command for Linux:
+//g++ lodepng.cpp example_opengl.cpp -lSDL -lGL -O3
+
+/*
+LodePNG OpenGL example. Decodes a PNG and shows it in OpenGL. PNG filename
+should be given as a command line parameter.
+
+It's written for the most basic old OpenGL version, and a correction for non
+power of two textures had to be added.
+
+Only very few lines on the sample are about loading the PNG. Most of the
+sample lines show a way to render a texture in 2D in OpenGL.
+
+No fancy 3D graphics are shown, it only shows the image statically. The sample
+shows LodePNG can be used to load PNG images as textures in OpenGL.
+*/
+
+#include "lodepng.h"
+
+#include <iostream>
+#include <SDL/SDL.h>
+#include <GL/gl.h>
+
+int main(int argc, char *argv[]) {
+  if(argc < 2) {
+    std::cout << "Please provide a filename." << std::endl;
+    return 1;
+  }
+  const char* filename = argv[1];
+
+  // Load file and decode image.
+  std::vector<unsigned char> image;
+  unsigned width, height;
+  unsigned error = lodepng::decode(image, width, height, filename);
+
+  // If there's an error, display it.
+  if(error != 0) {
+    std::cout << "error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return 1;
+  }
+
+  // Here the PNG is loaded in "image". All the rest of the code is SDL and OpenGL stuff.
+
+  int screenw = width;
+  if(screenw > 1024) screenw = 1024;
+  int screenh = height;
+  if(screenh > 768) screenw = 768;
+
+  if(SDL_Init(SDL_INIT_VIDEO) < 0) {
+    std::cout << "Error: Unable to init SDL: " << SDL_GetError() << std::endl;
+    return 1;
+  }
+
+  SDL_Surface* scr = SDL_SetVideoMode(screenw, screenh, 32, SDL_OPENGL);
+
+  if(scr == 0) {
+    std::cout << "Error: Unable to set video. SDL error message: " << SDL_GetError() << std::endl;
+    return 1;
+  }
+
+  // The official code for "Setting Your Raster Position to a Pixel Location" (i.e. set up a camera for 2D screen)
+  glViewport(0, 0, screenw, screenh);
+  glMatrixMode(GL_PROJECTION);
+  glLoadIdentity();
+  glOrtho(0, screenw, screenh, 0, -1, 1);
+  glMatrixMode(GL_MODELVIEW);
+  glLoadIdentity();
+
+  // Make some OpenGL properties better for 2D and enable alpha channel.
+  glDisable(GL_CULL_FACE);
+  glDisable(GL_DEPTH_TEST);
+  glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+  glEnable(GL_BLEND);
+  glDisable(GL_ALPHA_TEST);
+
+  if(glGetError() != GL_NO_ERROR) {
+    std::cout << "Error initing GL" << std::endl;
+    return 1;
+  }
+
+  // Texture size must be power of two for the primitive OpenGL version this is written for. Find next power of two.
+  size_t u2 = 1; while(u2 < width) u2 *= 2;
+  size_t v2 = 1; while(v2 < height) v2 *= 2;
+  // Ratio for power of two version compared to actual version, to render the non power of two image with proper size.
+  double u3 = (double)width / u2;
+  double v3 = (double)height / v2;
+
+  // Make power of two version of the image.
+  std::vector<unsigned char> image2(u2 * v2 * 4);
+  for(size_t y = 0; y < height; y++)
+  for(size_t x = 0; x < width; x++)
+  for(size_t c = 0; c < 4; c++) {
+    image2[4 * u2 * y + 4 * x + c] = image[4 * width * y + 4 * x + c];
+  }
+
+  // Enable the texture for OpenGL.
+  glEnable(GL_TEXTURE_2D);
+  glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); //GL_NEAREST = no smoothing
+  glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+  glTexImage2D(GL_TEXTURE_2D, 0, 4, u2, v2, 0, GL_RGBA, GL_UNSIGNED_BYTE, &image2[0]);
+
+  bool done = false;
+  SDL_Event event = {0};
+  glColor4ub(255, 255, 255, 255);
+
+  while(!done) {
+    // Quit the loop when receiving quit event.
+    while(SDL_PollEvent(&event)) {
+      if(event.type == SDL_QUIT) done = 1;
+    }
+
+    // Draw the texture on a quad, using u3 and v3 to correct non power of two texture size.
+    glBegin(GL_QUADS);
+      glTexCoord2d( 0,  0); glVertex2f(    0,      0);
+      glTexCoord2d(u3,  0); glVertex2f(width,      0);
+      glTexCoord2d(u3, v3); glVertex2f(width, height);
+      glTexCoord2d( 0, v3); glVertex2f(    0, height);
+    glEnd();
+
+    // Redraw and clear screen.
+    SDL_GL_SwapBuffers();
+    glClearColor(0, 0, 0, 0);
+    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+    //Limit frames per second, to not heat up the CPU and GPU too much.
+    SDL_Delay(16);
+  }
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_optimize_png.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_optimize_png.cpp
new file mode 100755
index 0000000000..4fa2ac1c69
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_optimize_png.cpp
@@ -0,0 +1,129 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+This example saves the PNG with the best compression LodePNG can do, and with
+unnecessary chunks removed. It tries out several combinations of settings and
+keeps the smallest one.
+
+NOTE: This is not as good as a true PNG optimizer like optipng or pngcrush.
+*/
+
+//g++ lodepng.cpp example_optimize_png.cpp -ansi -pedantic -Wall -Wextra -O3
+
+#include "lodepng.h"
+
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+  std::vector<unsigned char> image;
+  unsigned w, h;
+  std::vector<unsigned char> buffer;
+  unsigned error;
+
+  //check if user gave a filename
+  if(argc < 3) {
+    std::cout << "please provide in and out filename" << std::endl;
+    return 0;
+  }
+
+  lodepng::load_file(buffer, argv[1]);
+  error = lodepng::decode(image, w, h, buffer);
+
+  if(error) {
+    std::cout << "decoding error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return 0;
+  }
+
+  size_t origsize = buffer.size();
+  std::cout << "Original size: " << origsize << " (" << (origsize / 1024) << "K)" << std::endl;
+  buffer.clear();
+
+  //Now encode as hard as possible with several filter types and window sizes
+
+  lodepng::State state;
+  state.encoder.filter_palette_zero = 0; //We try several filter types, including zero, allow trying them all on palette images too.
+  state.encoder.add_id = false; //Don't add LodePNG version chunk to save more bytes
+  state.encoder.text_compression = 1; //Not needed because we don't add text chunks, but this demonstrates another optimization setting
+  state.encoder.zlibsettings.nicematch = 258; //Set this to the max possible, otherwise it can hurt compression
+  state.encoder.zlibsettings.lazymatching = 1; //Definitely use lazy matching for better compression
+  state.encoder.zlibsettings.windowsize = 32768; //Use maximum possible window size for best compression
+
+  size_t bestsize = 0;
+  bool inited = false;
+
+  int beststrategy = 0;
+  LodePNGFilterStrategy strategies[4] = { LFS_ZERO, LFS_MINSUM, LFS_ENTROPY, LFS_BRUTE_FORCE };
+  std::string strategynames[4] = { "LFS_ZERO", "LFS_MINSUM", "LFS_ENTROPY", "LFS_BRUTE_FORCE" };
+
+  // min match 3 allows all deflate lengths. min match 6 is similar to "Z_FILTERED" of zlib.
+  int minmatches[2] = { 3, 6 };
+  int bestminmatch = 0;
+
+  int autoconverts[2] = { 0, 1 };
+  std::string autoconvertnames[2] = { "0", "1" };
+  int bestautoconvert = 0;
+
+  int bestblocktype = 0;
+
+  // Try out all combinations of everything
+  for(int i = 0; i < 4; i++)   //filter strategy
+  for(int j = 0; j < 2; j++)   //min match
+  for(int k = 0; k < 2; k++)   //block type (for small images only)
+  for(int l = 0; l < 2; l++) { //color convert strategy
+    if(bestsize > 3000 && (k > 0 || l > 0)) continue; /* these only make sense on small images */
+    std::vector<unsigned char> temp;
+    state.encoder.filter_strategy = strategies[i];
+    state.encoder.zlibsettings.minmatch = minmatches[j];
+    state.encoder.zlibsettings.btype = k == 0 ? 2 : 1;
+    state.encoder.auto_convert = autoconverts[l];
+    error = lodepng::encode(temp, image, w, h, state);
+
+    if(error)
+    {
+      std::cout << "encoding error " << error << ": " << lodepng_error_text(error) << std::endl;
+      return 0;
+    }
+
+    if(!inited || temp.size() < bestsize)
+    {
+      bestsize = temp.size();
+      beststrategy = i;
+      bestminmatch = state.encoder.zlibsettings.minmatch;
+      bestautoconvert = l;
+      bestblocktype = state.encoder.zlibsettings.btype;
+      temp.swap(buffer);
+      inited = true;
+    }
+  }
+
+  std::cout << "Chosen filter strategy: " << strategynames[beststrategy] << std::endl;
+  std::cout << "Chosen min match: " << bestminmatch << std::endl;
+  std::cout << "Chosen block type: " << bestblocktype << std::endl;
+  std::cout << "Chosen auto convert: " << autoconvertnames[bestautoconvert] << std::endl;
+
+  lodepng::save_file(buffer, argv[2]);
+  std::cout << "New size: " << buffer.size() << " (" << (buffer.size() / 1024) << "K)" << std::endl;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_png2bmp.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_png2bmp.cpp
new file mode 100755
index 0000000000..ae12298fce
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_png2bmp.cpp
@@ -0,0 +1,125 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#include "lodepng.h"
+#include <iostream>
+
+/*
+This example converts a PNG file to a BMP file.
+NOTE: it overwrites the output file without warning if it exists!
+Give the PNG and the BMP file names as command line arguments.
+*/
+
+/*
+g++ lodepng.cpp example_png2bmp.cpp -Wall -Wextra -pedantic -ansi -lSDL -O3
+*/
+
+
+
+//Input image must be RGB buffer (3 bytes per pixel), but you can easily make it
+//support RGBA input and output by changing the inputChannels and/or outputChannels
+//in the function to 4.
+void encodeBMP(std::vector<unsigned char>& bmp, const unsigned char* image, int w, int h) {
+  //3 bytes per pixel used for both input and output.
+  int inputChannels = 3;
+  int outputChannels = 3;
+
+  //bytes 0-13
+  bmp.push_back('B'); bmp.push_back('M'); //0: bfType
+  bmp.push_back(0); bmp.push_back(0); bmp.push_back(0); bmp.push_back(0); //2: bfSize; size not yet known for now, filled in later.
+  bmp.push_back(0); bmp.push_back(0); //6: bfReserved1
+  bmp.push_back(0); bmp.push_back(0); //8: bfReserved2
+  bmp.push_back(54 % 256); bmp.push_back(54 / 256); bmp.push_back(0); bmp.push_back(0); //10: bfOffBits (54 header bytes)
+
+  //bytes 14-53
+  bmp.push_back(40); bmp.push_back(0); bmp.push_back(0); bmp.push_back(0);  //14: biSize
+  bmp.push_back(w % 256); bmp.push_back(w / 256); bmp.push_back(0); bmp.push_back(0); //18: biWidth
+  bmp.push_back(h % 256); bmp.push_back(h / 256); bmp.push_back(0); bmp.push_back(0); //22: biHeight
+  bmp.push_back(1); bmp.push_back(0); //26: biPlanes
+  bmp.push_back(outputChannels * 8); bmp.push_back(0); //28: biBitCount
+  bmp.push_back(0); bmp.push_back(0); bmp.push_back(0); bmp.push_back(0);  //30: biCompression
+  bmp.push_back(0); bmp.push_back(0); bmp.push_back(0); bmp.push_back(0);  //34: biSizeImage
+  bmp.push_back(0); bmp.push_back(0); bmp.push_back(0); bmp.push_back(0);  //38: biXPelsPerMeter
+  bmp.push_back(0); bmp.push_back(0); bmp.push_back(0); bmp.push_back(0);  //42: biYPelsPerMeter
+  bmp.push_back(0); bmp.push_back(0); bmp.push_back(0); bmp.push_back(0);  //46: biClrUsed
+  bmp.push_back(0); bmp.push_back(0); bmp.push_back(0); bmp.push_back(0);  //50: biClrImportant
+
+  /*
+  Convert the input RGBRGBRGB pixel buffer to the BMP pixel buffer format. There are 3 differences with the input buffer:
+  -BMP stores the rows inversed, from bottom to top
+  -BMP stores the color channels in BGR instead of RGB order
+  -BMP requires each row to have a multiple of 4 bytes, so sometimes padding bytes are added between rows
+  */
+
+  int imagerowbytes = outputChannels * w;
+  imagerowbytes = imagerowbytes % 4 == 0 ? imagerowbytes : imagerowbytes + (4 - imagerowbytes % 4); //must be multiple of 4
+
+  for(int y = h - 1; y >= 0; y--) { //the rows are stored inversed in bmp
+    int c = 0;
+    for(int x = 0; x < imagerowbytes; x++) {
+      if(x < w * outputChannels) {
+        int inc = c;
+        //Convert RGB(A) into BGR(A)
+        if(c == 0) inc = 2;
+        else if(c == 2) inc = 0;
+        bmp.push_back(image[inputChannels * (w * y + x / outputChannels) + inc]);
+      }
+      else bmp.push_back(0);
+      c++;
+      if(c >= outputChannels) c = 0;
+    }
+  }
+
+  // Fill in the size
+  bmp[2] = bmp.size() % 256;
+  bmp[3] = (bmp.size() / 256) % 256;
+  bmp[4] = (bmp.size() / 65536) % 256;
+  bmp[5] = bmp.size() / 16777216;
+}
+
+int main(int argc, char *argv[]) {
+  if(argc < 3) {
+    std::cout << "Please provice input PNG and output BMP file names" << std::endl;
+    return 0;
+  }
+  const char* infile = argv[1];
+  const char* outfile = argv[2];
+
+
+  std::vector<unsigned char> image; //the raw pixels
+  unsigned width, height;
+
+  unsigned error = lodepng::decode(image, width, height, infile, LCT_RGB, 8);
+
+  if(error) {
+    std::cout << "error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return 0;
+  }
+
+  std::vector<unsigned char> bmp;
+  encodeBMP(bmp, &image[0], width, height);
+
+  lodepng::save_file(bmp, outfile);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_png_info.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_png_info.cpp
new file mode 100755
index 0000000000..43427ccc49
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_png_info.cpp
@@ -0,0 +1,317 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+//g++ lodepng.cpp example_png_info.cpp -ansi -pedantic -Wall -Wextra -lSDL -O3
+
+/*
+This sample shows a lot of information in the console about a PNG file,
+including color type, text chunks, the names of all chunks in the image,
+etc...
+*/
+
+
+#include "lodepng.h"
+#include <iostream>
+
+/*
+Display general info about the PNG.
+*/
+void displayPNGInfo(const LodePNGInfo& info) {
+  const LodePNGColorMode& color = info.color;
+
+  std::cout << "Compression method: " << info.compression_method << std::endl;
+  std::cout << "Filter method: " << info.filter_method << std::endl;
+  std::cout << "Interlace method: " << info.interlace_method << std::endl;
+  std::cout << "Color type: " << color.colortype << std::endl;
+  std::cout << "Bit depth: " << color.bitdepth << std::endl;
+  std::cout << "Bits per pixel: " << lodepng_get_bpp(&color) << std::endl;
+  std::cout << "Channels per pixel: " << lodepng_get_channels(&color) << std::endl;
+  std::cout << "Is greyscale type: " << lodepng_is_greyscale_type(&color) << std::endl;
+  std::cout << "Can have alpha: " << lodepng_can_have_alpha(&color) << std::endl;
+  std::cout << "Palette size: " << color.palettesize << std::endl;
+  std::cout << "Has color key: " << color.key_defined << std::endl;
+  if(color.key_defined) {
+    std::cout << "Color key r: " << color.key_r << std::endl;
+    std::cout << "Color key g: " << color.key_g << std::endl;
+    std::cout << "Color key b: " << color.key_b << std::endl;
+  }
+  std::cout << "Texts: " << info.text_num << std::endl;
+  for(size_t i = 0; i < info.text_num; i++) {
+    std::cout << "Text: " << info.text_keys[i] << ": " << info.text_strings[i] << std::endl << std::endl;
+  }
+  std::cout << "International texts: " << info.itext_num << std::endl;
+  for(size_t i = 0; i < info.itext_num; i++) {
+    std::cout << "Text: "
+              << info.itext_keys[i] << ", "
+              << info.itext_langtags[i] << ", "
+              << info.itext_transkeys[i] << ": "
+              << info.itext_strings[i] << std::endl << std::endl;
+  }
+  std::cout << "Time defined: " << info.time_defined << std::endl;
+  if(info.time_defined) {
+    const LodePNGTime& time = info.time;
+    std::cout << "year: " << time.year << std::endl;
+    std::cout << "month: " << time.month << std::endl;
+    std::cout << "day: " << time.day << std::endl;
+    std::cout << "hour: " << time.hour << std::endl;
+    std::cout << "minute: " << time.minute << std::endl;
+    std::cout << "second: " << time.second << std::endl;
+  }
+  std::cout << "Physics defined: " << info.phys_defined << std::endl;
+  if(info.phys_defined) {
+    std::cout << "physics X: " << info.phys_x << std::endl;
+    std::cout << "physics Y: " << info.phys_y << std::endl;
+    std::cout << "physics unit: " << info.phys_unit << std::endl;
+  }
+}
+
+
+/*
+Display the names and sizes of all chunks in the PNG file.
+*/
+void displayChunkNames(const std::vector<unsigned char>& buffer) {
+  // Listing chunks is based on the original file, not the decoded png info.
+  const unsigned char *chunk, *begin, *end, *next;
+  end = &buffer.back() + 1;
+  begin = chunk = &buffer.front() + 8;
+
+  std::cout << std::endl << "Chunks:" << std::endl;
+  std::cout << " type: length(s)";
+  std::string last_type;
+  while(chunk + 8 < end && chunk >= begin) {
+    char type[5];
+    lodepng_chunk_type(type, chunk);
+    if(std::string(type).size() != 4) {
+      std::cout << "this is probably not a PNG" << std::endl;
+      return;
+    }
+
+    if(last_type != type) {
+      std::cout << std::endl;
+      std::cout << " " << type << ": ";
+    }
+    last_type = type;
+
+    std::cout << lodepng_chunk_length(chunk) << ", ";
+
+    next = lodepng_chunk_next_const(chunk);
+    if (next <= chunk) break; // integer overflow
+    chunk = next;
+  }
+  std::cout << std::endl;
+}
+
+
+/*
+Show ASCII art preview of the image
+*/
+void displayAsciiArt(const std::vector<unsigned char>& image, unsigned w, unsigned h) {
+  if(w > 0 && h > 0) {
+    std::cout << std::endl << "ASCII Art Preview: " << std::endl;
+    unsigned w2 = 48;
+    if(w < w2) w2 = w;
+    unsigned h2 = h * w2 / w;
+    h2 = (h2 * 2) / 3; //compensate for non-square characters in terminal
+    if(h2 > (w2 * 2)) h2 = w2 * 2; //avoid too large output
+
+    std::cout << '+';
+    for(unsigned x = 0; x < w2; x++) std::cout << '-';
+    std::cout << '+' << std::endl;
+    for(unsigned y = 0; y < h2; y++) {
+      std::cout << "|";
+      for(unsigned x = 0; x < w2; x++) {
+        unsigned x2 = x * w / w2;
+        unsigned y2 = y * h / h2;
+        int r = image[y2 * w * 4 + x2 * 4 + 0];
+        int g = image[y2 * w * 4 + x2 * 4 + 1];
+        int b = image[y2 * w * 4 + x2 * 4 + 2];
+        int a = image[y2 * w * 4 + x2 * 4 + 3];
+        int lightness = ((r + g + b) / 3) * a / 255;
+        int min = (r < g && r < b) ? r : (g < b ? g : b);
+        int max = (r > g && r > b) ? r : (g > b ? g : b);
+        int saturation = max - min;
+        int letter = 'i'; //i for grey, or r,y,g,c,b,m for colors
+        if(saturation > 32) {
+          int h = lightness >= (min + max) / 2;
+          if(h) letter = (min == r ? 'c' : (min == g ? 'm' : 'y'));
+          else letter = (max == r ? 'r' : (max == g ? 'g' : 'b'));
+        }
+        int symbol = ' ';
+        if(lightness > 224) symbol = '@';
+        else if(lightness > 128) symbol = letter - 32;
+        else if(lightness > 32) symbol = letter;
+        else if(lightness > 16) symbol = '.';
+        std::cout << (char)symbol;
+      }
+      std::cout << "|";
+      std::cout << std::endl;
+    }
+    std::cout << '+';
+    for(unsigned x = 0; x < w2; x++) std::cout << '-';
+    std::cout << '+' << std::endl;
+  }
+}
+
+
+/*
+Show the filtertypes of each scanline in this PNG image.
+*/
+void displayFilterTypes(const std::vector<unsigned char>& buffer, bool ignore_checksums) {
+  //Get color type and interlace type
+  lodepng::State state;
+  if(ignore_checksums) {
+    state.decoder.ignore_crc = 1;
+    state.decoder.zlibsettings.ignore_adler32 = 1;
+  }
+  unsigned w, h;
+  unsigned error;
+  error = lodepng_inspect(&w, &h, &state, &buffer[0], buffer.size());
+
+  if(error) {
+    std::cout << "inspect error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return;
+  }
+
+  if(state.info_png.interlace_method == 1) {
+    std::cout << "showing filtertypes for interlaced PNG not supported by this example" << std::endl;
+    return;
+  }
+
+  //Read literal data from all IDAT chunks
+  const unsigned char *chunk, *begin, *end, *next;
+  end = &buffer.back() + 1;
+  begin = chunk = &buffer.front() + 8;
+
+  std::vector<unsigned char> zdata;
+
+  while(chunk + 8 < end && chunk >= begin) {
+    char type[5];
+    lodepng_chunk_type(type, chunk);
+    if(std::string(type).size() != 4) {
+      std::cout << "this is probably not a PNG" << std::endl;
+      return;
+    }
+
+    if(std::string(type) == "IDAT") {
+      const unsigned char* cdata = lodepng_chunk_data_const(chunk);
+      unsigned clength = lodepng_chunk_length(chunk);
+      if(chunk + clength + 12 > end || clength > buffer.size() || chunk + clength + 12 < begin) {
+        std::cout << "invalid chunk length" << std::endl;
+        return;
+      }
+
+      for(unsigned i = 0; i < clength; i++) {
+        zdata.push_back(cdata[i]);
+      }
+    }
+
+    next = lodepng_chunk_next_const(chunk);
+    if (next <= chunk) break; // integer overflow
+    chunk = next;
+  }
+
+  //Decompress all IDAT data
+  std::vector<unsigned char> data;
+  error = lodepng::decompress(data, &zdata[0], zdata.size());
+
+  if(error) {
+    std::cout << "decompress error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return;
+  }
+
+  //A line is 1 filter byte + all pixels
+  size_t linebytes = 1 + lodepng_get_raw_size(w, 1, &state.info_png.color);
+
+  if(linebytes == 0) {
+    std::cout << "error: linebytes is 0" << std::endl;
+    return;
+  }
+
+  std::cout << "Filter types: ";
+  for(size_t i = 0; i < data.size(); i += linebytes) {
+    std::cout << (int)(data[i]) << " ";
+  }
+  std::cout << std::endl;
+
+}
+
+
+/*
+Main
+*/
+int main(int argc, char *argv[]) /*list the chunks*/ {
+  bool ignore_checksums = false;
+  std::string filename = "";
+  for (int i = 1; i < argc; i++) {
+    if(std::string(argv[i]) == "--ignore_checksums") ignore_checksums = true;
+    else filename = argv[i];
+  }
+  if(filename == "") {
+    std::cout << "Please provide a filename to preview" << std::endl;
+    return 0;
+  }
+
+  std::vector<unsigned char> buffer;
+  std::vector<unsigned char> image;
+  unsigned w, h;
+
+  lodepng::load_file(buffer, filename); //load the image file with given filename
+
+  lodepng::State state;
+  if(ignore_checksums) {
+    state.decoder.ignore_crc = 1;
+    state.decoder.zlibsettings.ignore_adler32 = 1;
+  }
+
+  unsigned error = lodepng::decode(image, w, h, state, buffer);
+
+  if(error) {
+    std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return 0;
+  }
+
+  std::cout << "Filesize: " << buffer.size() << " (" << buffer.size() / 1024 << "K)" << std::endl;
+  std::cout << "Width: " << w << std::endl;
+  std::cout << "Height: " << h << std::endl;
+  std::cout << "Num pixels: " << w * h << std::endl;
+
+  if(w > 0 && h > 0) {
+    std::cout << "Top left pixel color:"
+              << " r: " << (int)image[0]
+              << " g: " << (int)image[1]
+              << " b: " << (int)image[2]
+              << " a: " << (int)image[3]
+              << std::endl;
+  }
+
+
+  displayPNGInfo(state.info_png);
+  std::cout << std::endl;
+  displayChunkNames(buffer);
+  std::cout << std::endl;
+  displayFilterTypes(buffer, ignore_checksums);
+  std::cout << std::endl;
+  displayAsciiArt(image, w, h);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_reencode.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_reencode.cpp
new file mode 100755
index 0000000000..d316bc91d0
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_reencode.cpp
@@ -0,0 +1,72 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2010 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+LodePNG decode-encode: decodes the image, then encodes it again, with all the
+same information, chunks, color types, etc... as the original image had.
+This sample shows how LodePNG can be used for a conforming PNG editor.
+*/
+
+//g++ lodepng.cpp example_reencode.cpp -ansi -pedantic -Wall -Wextra -lSDL -O3 -o reencode
+
+#include "lodepng.h"
+
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+  std::vector<unsigned char> image;
+  unsigned w, h;
+  std::vector<unsigned char> buffer;
+  lodepng::State state;
+  unsigned error;
+
+  //check if user gave a filename
+  if(argc < 3) {
+    std::cout << "please provide in and out filename" << std::endl;
+    return 0;
+  }
+
+  state.decoder.color_convert = 0;
+  state.decoder.remember_unknown_chunks = 1; //make it reproduce even unknown chunks in the saved image
+
+  lodepng::load_file(buffer, argv[1]);
+  error = lodepng::decode(image, w, h, state, buffer);
+  if(error) {
+    std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return 0;
+  }
+
+  buffer.clear();
+
+  state.encoder.text_compression = 1;
+
+  error = lodepng::encode(buffer, image, w, h, state);
+  if(error) {
+    std::cout << "encoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return 0;
+  }
+
+  lodepng::save_file(buffer, argv[2]);
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_sdl.c b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_sdl.c
new file mode 100755
index 0000000000..874bc4c139
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_sdl.c
@@ -0,0 +1,133 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+Compile command for Linux:
+gcc lodepng.c example_sdl.c -ansi -pedantic -Wall -Wextra -lSDL -O3 -o showpng
+
+*/
+
+/*
+LodePNG SDL example
+This example displays a PNG with a checkerboard pattern to show tranparency.
+It requires the SDL library to compile and run.
+If multiple filenames are given to the command line, it shows all of them.
+Press any key to see next image, or esc to quit.
+*/
+
+#include "lodepng.h"
+
+#include <SDL/SDL.h>
+
+/*shows image with SDL. Returns 1 if user wants to fully quit, 0 if user wants to see next image.*/
+int show(const char* filename) {
+  unsigned error;
+  unsigned char* image;
+  unsigned w, h, x, y;
+  SDL_Surface* scr;
+  SDL_Event event;
+  int done;
+  size_t jump = 1;
+
+  printf("showing %s\n", filename);
+
+  /*load the PNG in one function call*/
+  error = lodepng_decode32_file(&image, &w, &h, filename);
+
+  /*stop if there is an error*/
+  if(error) {
+    printf("decoder error %u: %s\n", error, lodepng_error_text(error));
+    return 0;
+  }
+
+  /*avoid too large window size by downscaling large image*/
+
+  if(w / 1024 >= jump) jump = w / 1024 + 1;
+  if(h / 1024 >= jump) jump = h / 1024 + 1;
+
+  /*init SDL*/
+  if(SDL_Init(SDL_INIT_VIDEO) < 0) {
+    printf("Error, SDL video init failed\n");
+    return 0;
+  }
+  scr = SDL_SetVideoMode(w / jump, h / jump, 32, SDL_HWSURFACE);
+  if(!scr) {
+    printf("Error, no SDL screen\n");
+    return 0;
+  }
+  SDL_WM_SetCaption(filename, NULL); /*set window caption*/
+
+  /*plot the pixels of the PNG file*/
+  for(y = 0; y + jump - 1 < h; y += jump)
+  for(x = 0; x + jump - 1 < w; x += jump) {
+    int checkerColor;
+    Uint32* bufp;
+    Uint32 r, g, b, a;
+
+    /*get RGBA components*/
+    r = image[4 * y * w + 4 * x + 0]; /*red*/
+    g = image[4 * y * w + 4 * x + 1]; /*green*/
+    b = image[4 * y * w + 4 * x + 2]; /*blue*/
+    a = image[4 * y * w + 4 * x + 3]; /*alpha*/
+
+    /*make translucency visible by placing checkerboard pattern behind image*/
+    checkerColor = 191 + 64 * (((x / 16) % 2) == ((y / 16) % 2));
+    r = (a * r + (255 - a) * checkerColor) / 255;
+    g = (a * g + (255 - a) * checkerColor) / 255;
+    b = (a * b + (255 - a) * checkerColor) / 255;
+
+    /*give the color value to the pixel of the screenbuffer*/
+    bufp = (Uint32 *)scr->pixels + (y * scr->pitch / 4) / jump + (x / jump);
+    *bufp = 65536 * r + 256 * g + b;
+  }
+
+  /*pause until you press escape and meanwhile redraw screen*/
+  done = 0;
+  while(done == 0) {
+    while(SDL_PollEvent(&event)) {
+      if(event.type == SDL_QUIT) done = 2;
+      else if(SDL_GetKeyState(NULL)[SDLK_ESCAPE]) done = 2;
+      else if(event.type == SDL_KEYDOWN) done = 1; /*press any other key for next image*/
+    }
+    SDL_UpdateRect(scr, 0, 0, 0, 0); /*redraw screen*/
+    SDL_Delay(5); /*pause 5 ms so it consumes less processing power*/
+  }
+
+  /*cleanup*/
+  free(image);
+  SDL_Quit();
+  return done == 2 ? 1 : 0;
+}
+
+int main(int argc, char* argv[]) {
+  int i;
+
+  if(argc <= 1) printf("Please enter PNG file name(s) to display\n");;
+
+  for(i = 1; i < argc; i++) {
+    if(show(argv[i])) return 0;
+  }
+  return 0;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_sdl.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_sdl.cpp
new file mode 100755
index 0000000000..dc7dee2816
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/examples/example_sdl.cpp
@@ -0,0 +1,122 @@
+/*
+LodePNG Examples
+
+Copyright (c) 2005-2012 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+//Compile command for Linux:
+//g++ lodepng.cpp example_sdl.cpp -lSDL -O3 -o showpng
+
+/*
+LodePNG SDL example
+This example displays a PNG with a checkerboard pattern to show tranparency.
+It requires the SDL library to compile and run.
+If multiple filenames are given to the command line, it shows all of them.
+Press any key to see next image, or esc to quit.
+*/
+
+#include "lodepng.h"
+
+#include <iostream>
+#include <SDL/SDL.h>
+
+int show(const std::string& caption, const unsigned char* rgba, unsigned w, unsigned h) {
+  //avoid too large window size by downscaling large image
+  unsigned jump = 1;
+  if(w / 1024 >= jump) jump = w / 1024 + 1;
+  if(h / 1024 >= jump) jump = h / 1024 + 1;
+
+  //init SDL
+  if(SDL_Init(SDL_INIT_VIDEO) < 0) {
+    std::cout << "error, SDL video init failed" << std::endl;
+    return 0;
+  }
+  SDL_Surface* scr = SDL_SetVideoMode(w / jump, h / jump, 32, SDL_HWSURFACE);
+  if(!scr) {
+    std::cout << "error, no SDL screen" << std::endl;
+    return 0;
+  }
+  SDL_WM_SetCaption(caption.c_str(), NULL); //set window caption
+
+  //plot the pixels of the PNG file
+  for(unsigned y = 0; y + jump - 1 < h; y += jump)
+  for(unsigned x = 0; x + jump - 1 < w; x += jump) {
+    //get RGBA components
+    Uint32 r = rgba[4 * y * w + 4 * x + 0]; //red
+    Uint32 g = rgba[4 * y * w + 4 * x + 1]; //green
+    Uint32 b = rgba[4 * y * w + 4 * x + 2]; //blue
+    Uint32 a = rgba[4 * y * w + 4 * x + 3]; //alpha
+
+    //make translucency visible by placing checkerboard pattern behind image
+    int checkerColor = 191 + 64 * (((x / 16) % 2) == ((y / 16) % 2));
+    r = (a * r + (255 - a) * checkerColor) / 255;
+    g = (a * g + (255 - a) * checkerColor) / 255;
+    b = (a * b + (255 - a) * checkerColor) / 255;
+
+    //give the color value to the pixel of the screenbuffer
+    Uint32* bufp;
+    bufp = (Uint32 *)scr->pixels + (y * scr->pitch / 4) / jump + (x / jump);
+    *bufp = 65536 * r + 256 * g + b;
+  }
+
+  //pause until you press escape and meanwhile redraw screen
+  SDL_Event event;
+  int done = 0;
+  while(done == 0) {
+    while(SDL_PollEvent(&event)) {
+      if(event.type == SDL_QUIT) done = 2;
+      else if(SDL_GetKeyState(NULL)[SDLK_ESCAPE]) done = 2;
+      else if(event.type == SDL_KEYDOWN) done = 1; //press any other key for next image
+    }
+    SDL_UpdateRect(scr, 0, 0, 0, 0); //redraw screen
+    SDL_Delay(5); //pause 5 ms so it consumes less processing power
+  }
+
+  SDL_Quit();
+  return done == 2 ? 1 : 0;
+}
+
+/*shows image with SDL. Returns 1 if user wants to fully quit, 0 if user wants to see next image.*/
+int showfile(const char* filename) {
+  std::cout << "showing " << filename << std::endl;
+
+  std::vector<unsigned char> buffer, image;
+  lodepng::load_file(buffer, filename); //load the image file with given filename
+  unsigned w, h;
+  unsigned error = lodepng::decode(image, w, h, buffer); //decode the png
+
+  //stop if there is an error
+  if(error) {
+    std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+    return 0;
+  }
+
+  return show(filename, &image[0], w, h);
+}
+
+int main(int argc, char* argv[]) {
+  if(argc <= 1) std::cout << "Please enter PNG file name(s) to display" << std::endl;
+
+  for(int i = 1; i < argc; i++) {
+    if(showfile(argv[i])) return 0;
+  }
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng.cpp
new file mode 100755
index 0000000000..9128a9b636
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng.cpp
@@ -0,0 +1,5983 @@
+/*
+LodePNG version 20181230
+
+Copyright (c) 2005-2018 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+The manual and changelog are in the header file "lodepng.h"
+Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
+*/
+
+#include "lodepng.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
+#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
+#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
+#endif /*_MSC_VER */
+
+const char* LODEPNG_VERSION_STRING = "20181230";
+
+/*
+This source file is built up in the following large parts. The code sections
+with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
+-Tools for C and common code for PNG and Zlib
+-C Code for Zlib (huffman, deflate, ...)
+-C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
+-The C++ wrapper around all of the above
+*/
+
+/*The malloc, realloc and free functions defined here with "lodepng_" in front
+of the name, so that you can easily change them to others related to your
+platform if needed. Everything else in the code calls these. Pass
+-DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
+#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
+define them in your own project's source files without needing to change
+lodepng source code. Don't forget to remove "static" if you copypaste them
+from here.*/
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+static void* lodepng_malloc(size_t size) {
+#ifdef LODEPNG_MAX_ALLOC
+  if(size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+  return malloc(size);
+}
+
+static void* lodepng_realloc(void* ptr, size_t new_size) {
+#ifdef LODEPNG_MAX_ALLOC
+  if(new_size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+  return realloc(ptr, new_size);
+}
+
+static void lodepng_free(void* ptr) {
+  free(ptr);
+}
+#else /*LODEPNG_COMPILE_ALLOCATORS*/
+void* lodepng_malloc(size_t size);
+void* lodepng_realloc(void* ptr, size_t new_size);
+void lodepng_free(void* ptr);
+#endif /*LODEPNG_COMPILE_ALLOCATORS*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // Tools for C, and common code for PNG and Zlib.                       // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#define LODEPNG_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+/*
+Often in case of an error a value is assigned to a variable and then it breaks
+out of a loop (to go to the cleanup phase of a function). This macro does that.
+It makes the error handling code shorter and more readable.
+
+Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83);
+*/
+#define CERROR_BREAK(errorvar, code){\
+  errorvar = code;\
+  break;\
+}
+
+/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
+#define ERROR_BREAK(code) CERROR_BREAK(error, code)
+
+/*Set error var to the error code, and return it.*/
+#define CERROR_RETURN_ERROR(errorvar, code){\
+  errorvar = code;\
+  return code;\
+}
+
+/*Try the code, if it returns error, also return the error.*/
+#define CERROR_TRY_RETURN(call){\
+  unsigned error = call;\
+  if(error) return error;\
+}
+
+/*Set error var to the error code, and return from the void function.*/
+#define CERROR_RETURN(errorvar, code){\
+  errorvar = code;\
+  return;\
+}
+
+/*
+About uivector, ucvector and string:
+-All of them wrap dynamic arrays or text strings in a similar way.
+-LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
+-The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
+-They're not used in the interface, only internally in this file as static functions.
+-As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
+*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*dynamic vector of unsigned ints*/
+typedef struct uivector {
+  unsigned* data;
+  size_t size; /*size in number of unsigned longs*/
+  size_t allocsize; /*allocated size in bytes*/
+} uivector;
+
+static void uivector_cleanup(void* p) {
+  ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
+  lodepng_free(((uivector*)p)->data);
+  ((uivector*)p)->data = NULL;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_reserve(uivector* p, size_t allocsize) {
+  if(allocsize > p->allocsize) {
+    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data) {
+      p->allocsize = newsize;
+      p->data = (unsigned*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  return 1;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_resize(uivector* p, size_t size) {
+  if(!uivector_reserve(p, size * sizeof(unsigned))) return 0;
+  p->size = size;
+  return 1; /*success*/
+}
+
+/*resize and give all new elements the value*/
+static unsigned uivector_resizev(uivector* p, size_t size, unsigned value) {
+  size_t oldsize = p->size, i;
+  if(!uivector_resize(p, size)) return 0;
+  for(i = oldsize; i < size; ++i) p->data[i] = value;
+  return 1;
+}
+
+static void uivector_init(uivector* p) {
+  p->data = NULL;
+  p->size = p->allocsize = 0;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_push_back(uivector* p, unsigned c) {
+  if(!uivector_resize(p, p->size + 1)) return 0;
+  p->data[p->size - 1] = c;
+  return 1;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+/*dynamic vector of unsigned chars*/
+typedef struct ucvector {
+  unsigned char* data;
+  size_t size; /*used size*/
+  size_t allocsize; /*allocated size*/
+} ucvector;
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_reserve(ucvector* p, size_t allocsize) {
+  if(allocsize > p->allocsize) {
+    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data) {
+      p->allocsize = newsize;
+      p->data = (unsigned char*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  return 1;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_resize(ucvector* p, size_t size) {
+  if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0;
+  p->size = size;
+  return 1; /*success*/
+}
+
+#ifdef LODEPNG_COMPILE_PNG
+
+static void ucvector_cleanup(void* p) {
+  ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0;
+  lodepng_free(((ucvector*)p)->data);
+  ((ucvector*)p)->data = NULL;
+}
+
+static void ucvector_init(ucvector* p) {
+  p->data = NULL;
+  p->size = p->allocsize = 0;
+}
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*you can both convert from vector to buffer&size and vica versa. If you use
+init_buffer to take over a buffer and size, it is not needed to use cleanup*/
+static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size) {
+  p->data = buffer;
+  p->allocsize = p->size = size;
+}
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER)
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_push_back(ucvector* p, unsigned char c) {
+  if(!ucvector_resize(p, p->size + 1)) return 0;
+  p->data[p->size - 1] = c;
+  return 1;
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+/*free string pointer and set it to NULL*/
+static void string_cleanup(char** out) {
+  lodepng_free(*out);
+  *out = NULL;
+}
+
+/* dynamically allocates a new string with a copy of the null terminated input text */
+static char* alloc_string(const char* in) {
+  size_t insize = strlen(in);
+  char* out = (char*)lodepng_malloc(insize + 1);
+  if(out) {
+    size_t i;
+    for(i = 0; i != insize; ++i) {
+      out[i] = in[i];
+    }
+    out[i] = 0;
+  }
+  return out;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+unsigned lodepng_read32bitInt(const unsigned char* buffer) {
+  return (unsigned)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]);
+}
+
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
+/*buffer must have at least 4 allocated bytes available*/
+static void lodepng_set32bitInt(unsigned char* buffer, unsigned value) {
+  buffer[0] = (unsigned char)((value >> 24) & 0xff);
+  buffer[1] = (unsigned char)((value >> 16) & 0xff);
+  buffer[2] = (unsigned char)((value >>  8) & 0xff);
+  buffer[3] = (unsigned char)((value      ) & 0xff);
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+static void lodepng_add32bitInt(ucvector* buffer, unsigned value) {
+  ucvector_resize(buffer, buffer->size + 4); /*todo: give error if resize failed*/
+  lodepng_set32bitInt(&buffer->data[buffer->size - 4], value);
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / File IO                                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DISK
+
+/* returns negative value on error. This should be pure C compatible, so no fstat. */
+static long lodepng_filesize(const char* filename) {
+  FILE* file;
+  long size;
+  file = fopen(filename, "rb");
+  if(!file) return -1;
+
+  if(fseek(file, 0, SEEK_END) != 0) {
+    fclose(file);
+    return -1;
+  }
+
+  size = ftell(file);
+  /* It may give LONG_MAX as directory size, this is invalid for us. */
+  if(size == LONG_MAX) size = -1;
+
+  fclose(file);
+  return size;
+}
+
+/* load file into buffer that already has the correct allocated size. Returns error code.*/
+static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename) {
+  FILE* file;
+  size_t readsize;
+  file = fopen(filename, "rb");
+  if(!file) return 78;
+
+  readsize = fread(out, 1, size, file);
+  fclose(file);
+
+  if (readsize != size) return 78;
+  return 0;
+}
+
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) {
+  long size = lodepng_filesize(filename);
+  if (size < 0) return 78;
+  *outsize = (size_t)size;
+
+  *out = (unsigned char*)lodepng_malloc((size_t)size);
+  if(!(*out) && size > 0) return 83; /*the above malloc failed*/
+
+  return lodepng_buffer_file(*out, (size_t)size, filename);
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) {
+  FILE* file;
+  file = fopen(filename, "wb" );
+  if(!file) return 79;
+  fwrite(buffer, 1, buffersize, file);
+  fclose(file);
+  return 0;
+}
+
+#endif /*LODEPNG_COMPILE_DISK*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of common code and tools. Begin of Zlib related code.            // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+/*TODO: this ignores potential out of memory errors*/
+#define addBitToStream(/*size_t**/ bitpointer, /*ucvector**/ bitstream, /*unsigned char*/ bit){\
+  /*add a new byte at the end*/\
+  if(((*bitpointer) & 7) == 0) ucvector_push_back(bitstream, (unsigned char)0);\
+  /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/\
+  (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7));\
+  ++(*bitpointer);\
+}
+
+static void addBitsToStream(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits) {
+  size_t i;
+  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1));
+}
+
+static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits) {
+  size_t i;
+  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1));
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+#define READBIT(bitpointer, bitstream) ((bitstream[bitpointer >> 3] >> (bitpointer & 0x7)) & (unsigned char)1)
+
+static unsigned char readBitFromStream(size_t* bitpointer, const unsigned char* bitstream) {
+  unsigned char result = (unsigned char)(READBIT(*bitpointer, bitstream));
+  ++(*bitpointer);
+  return result;
+}
+
+static unsigned readBitsFromStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) {
+  unsigned result = 0, i;
+  for(i = 0; i != nbits; ++i) {
+    result += ((unsigned)READBIT(*bitpointer, bitstream)) << i;
+    ++(*bitpointer);
+  }
+  return result;
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflate - Huffman                                                      / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#define FIRST_LENGTH_CODE_INDEX 257
+#define LAST_LENGTH_CODE_INDEX 285
+/*256 literals, the end code, some length codes, and 2 unused codes*/
+#define NUM_DEFLATE_CODE_SYMBOLS 288
+/*the distance codes have their own symbols, 30 used, 2 unused*/
+#define NUM_DISTANCE_SYMBOLS 32
+/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
+#define NUM_CODE_LENGTH_CODES 19
+
+/*the base lengths represented by codes 257-285*/
+static const unsigned LENGTHBASE[29]
+  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
+     67, 83, 99, 115, 131, 163, 195, 227, 258};
+
+/*the extra bits used by codes 257-285 (added to base length)*/
+static const unsigned LENGTHEXTRA[29]
+  = {0, 0, 0, 0, 0, 0, 0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+      4,  4,  4,   4,   5,   5,   5,   5,   0};
+
+/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
+static const unsigned DISTANCEBASE[30]
+  = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
+     769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
+
+/*the extra bits of backwards distances (added to base)*/
+static const unsigned DISTANCEEXTRA[30]
+  = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
+       8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
+
+/*the order in which "code length alphabet code lengths" are stored, out of this
+the huffman tree of the dynamic huffman tree lengths is generated*/
+static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
+  = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*
+Huffman tree struct, containing multiple representations of the tree
+*/
+typedef struct HuffmanTree {
+  unsigned* tree2d;
+  unsigned* tree1d;
+  unsigned* lengths; /*the lengths of the codes of the 1d-tree*/
+  unsigned maxbitlen; /*maximum number of bits a single code can get*/
+  unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
+} HuffmanTree;
+
+/*function used for debug purposes to draw the tree in ascii art with C++*/
+/*
+static void HuffmanTree_draw(HuffmanTree* tree) {
+  std::cout << "tree. length: " << tree->numcodes << " maxbitlen: " << tree->maxbitlen << std::endl;
+  for(size_t i = 0; i != tree->tree1d.size; ++i) {
+    if(tree->lengths.data[i])
+      std::cout << i << " " << tree->tree1d.data[i] << " " << tree->lengths.data[i] << std::endl;
+  }
+  std::cout << std::endl;
+}*/
+
+static void HuffmanTree_init(HuffmanTree* tree) {
+  tree->tree2d = 0;
+  tree->tree1d = 0;
+  tree->lengths = 0;
+}
+
+static void HuffmanTree_cleanup(HuffmanTree* tree) {
+  lodepng_free(tree->tree2d);
+  lodepng_free(tree->tree1d);
+  lodepng_free(tree->lengths);
+}
+
+/*the tree representation used by the decoder. return value is error*/
+static unsigned HuffmanTree_make2DTree(HuffmanTree* tree) {
+  unsigned nodefilled = 0; /*up to which node it is filled*/
+  unsigned treepos = 0; /*position in the tree (1 of the numcodes columns)*/
+  unsigned n, i;
+
+  tree->tree2d = (unsigned*)lodepng_malloc(tree->numcodes * 2 * sizeof(unsigned));
+  if(!tree->tree2d) return 83; /*alloc fail*/
+
+  /*
+  convert tree1d[] to tree2d[][]. In the 2D array, a value of 32767 means
+  uninited, a value >= numcodes is an address to another bit, a value < numcodes
+  is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as
+  many columns as codes - 1.
+  A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
+  Here, the internal nodes are stored (what their 0 and 1 option point to).
+  There is only memory for such good tree currently, if there are more nodes
+  (due to too long length codes), error 55 will happen
+  */
+  for(n = 0; n < tree->numcodes * 2; ++n) {
+    tree->tree2d[n] = 32767; /*32767 here means the tree2d isn't filled there yet*/
+  }
+
+  for(n = 0; n < tree->numcodes; ++n) /*the codes*/ {
+    for(i = 0; i != tree->lengths[n]; ++i) /*the bits for this code*/ {
+      unsigned char bit = (unsigned char)((tree->tree1d[n] >> (tree->lengths[n] - i - 1)) & 1);
+      /*oversubscribed, see comment in lodepng_error_text*/
+      if(treepos > 2147483647 || treepos + 2 > tree->numcodes) return 55;
+      if(tree->tree2d[2 * treepos + bit] == 32767) /*not yet filled in*/ {
+        if(i + 1 == tree->lengths[n]) /*last bit*/ {
+          tree->tree2d[2 * treepos + bit] = n; /*put the current code in it*/
+          treepos = 0;
+        } else {
+          /*put address of the next step in here, first that address has to be found of course
+          (it's just nodefilled + 1)...*/
+          ++nodefilled;
+          /*addresses encoded with numcodes added to it*/
+          tree->tree2d[2 * treepos + bit] = nodefilled + tree->numcodes;
+          treepos = nodefilled;
+        }
+      }
+      else treepos = tree->tree2d[2 * treepos + bit] - tree->numcodes;
+    }
+  }
+
+  for(n = 0; n < tree->numcodes * 2; ++n) {
+    if(tree->tree2d[n] == 32767) tree->tree2d[n] = 0; /*remove possible remaining 32767's*/
+  }
+
+  return 0;
+}
+
+/*
+Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
+numcodes, lengths and maxbitlen must already be filled in correctly. return
+value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) {
+  uivector blcount;
+  uivector nextcode;
+  unsigned error = 0;
+  unsigned bits, n;
+
+  uivector_init(&blcount);
+  uivector_init(&nextcode);
+
+  tree->tree1d = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
+  if(!tree->tree1d) error = 83; /*alloc fail*/
+
+  if(!uivector_resizev(&blcount, tree->maxbitlen + 1, 0)
+  || !uivector_resizev(&nextcode, tree->maxbitlen + 1, 0))
+    error = 83; /*alloc fail*/
+
+  if(!error) {
+    /*step 1: count number of instances of each code length*/
+    for(bits = 0; bits != tree->numcodes; ++bits) ++blcount.data[tree->lengths[bits]];
+    /*step 2: generate the nextcode values*/
+    for(bits = 1; bits <= tree->maxbitlen; ++bits) {
+      nextcode.data[bits] = (nextcode.data[bits - 1] + blcount.data[bits - 1]) << 1;
+    }
+    /*step 3: generate all the codes*/
+    for(n = 0; n != tree->numcodes; ++n) {
+      if(tree->lengths[n] != 0) tree->tree1d[n] = nextcode.data[tree->lengths[n]]++;
+    }
+  }
+
+  uivector_cleanup(&blcount);
+  uivector_cleanup(&nextcode);
+
+  if(!error) return HuffmanTree_make2DTree(tree);
+  else return error;
+}
+
+/*
+given the code lengths (as stored in the PNG file), generate the tree as defined
+by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
+return value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
+                                            size_t numcodes, unsigned maxbitlen) {
+  unsigned i;
+  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+  tree->maxbitlen = maxbitlen;
+  return HuffmanTree_makeFromLengths2(tree);
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
+Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
+
+/*chain node for boundary package merge*/
+typedef struct BPMNode {
+  int weight; /*the sum of all weights in this chain*/
+  unsigned index; /*index of this leaf node (called "count" in the paper)*/
+  struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
+  int in_use;
+} BPMNode;
+
+/*lists of chains*/
+typedef struct BPMLists {
+  /*memory pool*/
+  unsigned memsize;
+  BPMNode* memory;
+  unsigned numfree;
+  unsigned nextfree;
+  BPMNode** freelist;
+  /*two heads of lookahead chains per list*/
+  unsigned listsize;
+  BPMNode** chains0;
+  BPMNode** chains1;
+} BPMLists;
+
+/*creates a new chain node with the given parameters, from the memory in the lists */
+static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail) {
+  unsigned i;
+  BPMNode* result;
+
+  /*memory full, so garbage collect*/
+  if(lists->nextfree >= lists->numfree) {
+    /*mark only those that are in use*/
+    for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
+    for(i = 0; i != lists->listsize; ++i) {
+      BPMNode* node;
+      for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
+      for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
+    }
+    /*collect those that are free*/
+    lists->numfree = 0;
+    for(i = 0; i != lists->memsize; ++i) {
+      if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
+    }
+    lists->nextfree = 0;
+  }
+
+  result = lists->freelist[lists->nextfree++];
+  result->weight = weight;
+  result->index = index;
+  result->tail = tail;
+  return result;
+}
+
+/*sort the leaves with stable mergesort*/
+static void bpmnode_sort(BPMNode* leaves, size_t num) {
+  BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num);
+  size_t width, counter = 0;
+  for(width = 1; width < num; width *= 2) {
+    BPMNode* a = (counter & 1) ? mem : leaves;
+    BPMNode* b = (counter & 1) ? leaves : mem;
+    size_t p;
+    for(p = 0; p < num; p += 2 * width) {
+      size_t q = (p + width > num) ? num : (p + width);
+      size_t r = (p + 2 * width > num) ? num : (p + 2 * width);
+      size_t i = p, j = q, k;
+      for(k = p; k < r; k++) {
+        if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++];
+        else b[k] = a[j++];
+      }
+    }
+    counter++;
+  }
+  if(counter & 1) memcpy(leaves, mem, sizeof(*leaves) * num);
+  lodepng_free(mem);
+}
+
+/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
+static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num) {
+  unsigned lastindex = lists->chains1[c]->index;
+
+  if(c == 0) {
+    if(lastindex >= numpresent) return;
+    lists->chains0[c] = lists->chains1[c];
+    lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
+  } else {
+    /*sum of the weights of the head nodes of the previous lookahead chains.*/
+    int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
+    lists->chains0[c] = lists->chains1[c];
+    if(lastindex < numpresent && sum > leaves[lastindex].weight) {
+      lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
+      return;
+    }
+    lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
+    /*in the end we are only interested in the chain of the last list, so no
+    need to recurse if we're at the last one (this gives measurable speedup)*/
+    if(num + 1 < (int)(2 * numpresent - 2)) {
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+    }
+  }
+}
+
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+                                      size_t numcodes, unsigned maxbitlen) {
+  unsigned error = 0;
+  unsigned i;
+  size_t numpresent = 0; /*number of symbols with non-zero frequency*/
+  BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
+
+  if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
+  if((1u << maxbitlen) < (unsigned)numcodes) return 80; /*error: represent all symbols*/
+
+  leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
+  if(!leaves) return 83; /*alloc fail*/
+
+  for(i = 0; i != numcodes; ++i) {
+    if(frequencies[i] > 0) {
+      leaves[numpresent].weight = (int)frequencies[i];
+      leaves[numpresent].index = i;
+      ++numpresent;
+    }
+  }
+
+  for(i = 0; i != numcodes; ++i) lengths[i] = 0;
+
+  /*ensure at least two present symbols. There should be at least one symbol
+  according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
+  make these work as well ensure there are at least two symbols. The
+  Package-Merge code below also doesn't work correctly if there's only one
+  symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit*/
+  if(numpresent == 0) {
+    lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
+  } else if(numpresent == 1) {
+    lengths[leaves[0].index] = 1;
+    lengths[leaves[0].index == 0 ? 1 : 0] = 1;
+  } else {
+    BPMLists lists;
+    BPMNode* node;
+
+    bpmnode_sort(leaves, numpresent);
+
+    lists.listsize = maxbitlen;
+    lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
+    lists.nextfree = 0;
+    lists.numfree = lists.memsize;
+    lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
+    lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
+    lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
+
+    if(!error) {
+      for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
+
+      bpmnode_create(&lists, leaves[0].weight, 1, 0);
+      bpmnode_create(&lists, leaves[1].weight, 2, 0);
+
+      for(i = 0; i != lists.listsize; ++i) {
+        lists.chains0[i] = &lists.memory[0];
+        lists.chains1[i] = &lists.memory[1];
+      }
+
+      /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
+      for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
+
+      for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail) {
+        for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
+      }
+    }
+
+    lodepng_free(lists.memory);
+    lodepng_free(lists.freelist);
+    lodepng_free(lists.chains0);
+    lodepng_free(lists.chains1);
+  }
+
+  lodepng_free(leaves);
+  return error;
+}
+
+/*Create the Huffman tree given the symbol frequencies*/
+static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
+                                                size_t mincodes, size_t numcodes, unsigned maxbitlen) {
+  unsigned error = 0;
+  while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
+  tree->maxbitlen = maxbitlen;
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+  tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  /*initialize all lengths to 0*/
+  memset(tree->lengths, 0, numcodes * sizeof(unsigned));
+
+  error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
+  if(!error) error = HuffmanTree_makeFromLengths2(tree);
+  return error;
+}
+
+static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index) {
+  return tree->tree1d[index];
+}
+
+static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index) {
+  return tree->lengths[index];
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
+static unsigned generateFixedLitLenTree(HuffmanTree* tree) {
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
+  for(i =   0; i <= 143; ++i) bitlen[i] = 8;
+  for(i = 144; i <= 255; ++i) bitlen[i] = 9;
+  for(i = 256; i <= 279; ++i) bitlen[i] = 7;
+  for(i = 280; i <= 287; ++i) bitlen[i] = 8;
+
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
+static unsigned generateFixedDistanceTree(HuffmanTree* tree) {
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*there are 32 distance codes, but 30-31 are unused*/
+  for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/*
+returns the code, or (unsigned)(-1) if error happened
+inbitlength is the length of the complete buffer, in bits (so its byte length times 8)
+*/
+static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp,
+                                    const HuffmanTree* codetree, size_t inbitlength) {
+  unsigned treepos = 0, ct;
+  for(;;) {
+    if(*bp >= inbitlength) return (unsigned)(-1); /*error: end of input memory reached without endcode*/
+    /*
+    decode the symbol from the tree. The "readBitFromStream" code is inlined in
+    the expression below because this is the biggest bottleneck while decoding
+    */
+    ct = codetree->tree2d[(treepos << 1) + READBIT(*bp, in)];
+    ++(*bp);
+    if(ct < codetree->numcodes) return ct; /*the symbol is decoded, return it*/
+    else treepos = ct - codetree->numcodes; /*symbol not yet decoded, instead move tree position*/
+
+    if(treepos >= codetree->numcodes) return (unsigned)(-1); /*error: it appeared outside the codetree*/
+  }
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Inflator (Decompressor)                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/
+static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) {
+  /*TODO: check for out of memory errors*/
+  generateFixedLitLenTree(tree_ll);
+  generateFixedDistanceTree(tree_d);
+}
+
+/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
+static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
+                                      const unsigned char* in, size_t* bp, size_t inlength) {
+  /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
+  unsigned error = 0;
+  unsigned n, HLIT, HDIST, HCLEN, i;
+  size_t inbitlength = inlength * 8;
+
+  /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
+  unsigned* bitlen_ll = 0; /*lit,len code lengths*/
+  unsigned* bitlen_d = 0; /*dist code lengths*/
+  /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
+  unsigned* bitlen_cl = 0;
+  HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
+
+  if((*bp) + 14 > (inlength << 3)) return 49; /*error: the bit pointer is or will go past the memory*/
+
+  /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
+  HLIT =  readBitsFromStream(bp, in, 5) + 257;
+  /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
+  HDIST = readBitsFromStream(bp, in, 5) + 1;
+  /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
+  HCLEN = readBitsFromStream(bp, in, 4) + 4;
+
+  if((*bp) + HCLEN * 3 > (inlength << 3)) return 50; /*error: the bit pointer is or will go past the memory*/
+
+  HuffmanTree_init(&tree_cl);
+
+  while(!error) {
+    /*read the code length codes out of 3 * (amount of code length codes) bits*/
+
+    bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
+    if(!bitlen_cl) ERROR_BREAK(83 /*alloc fail*/);
+
+    for(i = 0; i != NUM_CODE_LENGTH_CODES; ++i) {
+      if(i < HCLEN) bitlen_cl[CLCL_ORDER[i]] = readBitsFromStream(bp, in, 3);
+      else bitlen_cl[CLCL_ORDER[i]] = 0; /*if not, it must stay 0*/
+    }
+
+    error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
+    if(error) break;
+
+    /*now we can use this tree to read the lengths for the tree that this function will return*/
+    bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+    bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+    if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
+    for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0;
+    for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0;
+
+    /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
+    i = 0;
+    while(i < HLIT + HDIST) {
+      unsigned code = huffmanDecodeSymbol(in, bp, &tree_cl, inbitlength);
+      if(code <= 15) /*a length code*/ {
+        if(i < HLIT) bitlen_ll[i] = code;
+        else bitlen_d[i - HLIT] = code;
+        ++i;
+      } else if(code == 16) /*repeat previous*/ {
+        unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
+        unsigned value; /*set value to the previous code*/
+
+        if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
+
+        if((*bp + 2) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+        replength += readBitsFromStream(bp, in, 2);
+
+        if(i < HLIT + 1) value = bitlen_ll[i - 1];
+        else value = bitlen_d[i - HLIT - 1];
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
+          if(i < HLIT) bitlen_ll[i] = value;
+          else bitlen_d[i - HLIT] = value;
+          ++i;
+        }
+      } else if(code == 17) /*repeat "0" 3-10 times*/ {
+        unsigned replength = 3; /*read in the bits that indicate repeat length*/
+        if((*bp + 3) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+        replength += readBitsFromStream(bp, in, 3);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      } else if(code == 18) /*repeat "0" 11-138 times*/ {
+        unsigned replength = 11; /*read in the bits that indicate repeat length*/
+        if((*bp + 7) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+        replength += readBitsFromStream(bp, in, 7);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      } else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ {
+        if(code == (unsigned)(-1)) {
+          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+          (10=no endcode, 11=wrong jump outside of tree)*/
+          error = (*bp) > inbitlength ? 10 : 11;
+        }
+        else error = 16; /*unexisting code, this can never happen*/
+        break;
+      }
+    }
+    if(error) break;
+
+    if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
+
+    /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
+    error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
+    if(error) break;
+    error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
+
+    break; /*end of error-while*/
+  }
+
+  lodepng_free(bitlen_cl);
+  lodepng_free(bitlen_ll);
+  lodepng_free(bitlen_d);
+  HuffmanTree_cleanup(&tree_cl);
+
+  return error;
+}
+
+/*inflate a block with dynamic of fixed Huffman tree*/
+static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size_t* bp,
+                                    size_t* pos, size_t inlength, unsigned btype) {
+  unsigned error = 0;
+  HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
+  HuffmanTree tree_d; /*the huffman tree for distance codes*/
+  size_t inbitlength = inlength * 8;
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d);
+  else if(btype == 2) error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength);
+
+  while(!error) /*decode all symbols until end reached, breaks at end code*/ {
+    /*code_ll is literal, length or end code*/
+    unsigned code_ll = huffmanDecodeSymbol(in, bp, &tree_ll, inbitlength);
+    if(code_ll <= 255) /*literal symbol*/ {
+      /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
+      if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
+      out->data[*pos] = (unsigned char)code_ll;
+      ++(*pos);
+    } else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ {
+      unsigned code_d, distance;
+      unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
+      size_t start, forward, backward, length;
+
+      /*part 1: get length base*/
+      length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
+
+      /*part 2: get extra bits and add the value of that to length*/
+      numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
+      if((*bp + numextrabits_l) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
+      length += readBitsFromStream(bp, in, numextrabits_l);
+
+      /*part 3: get distance code*/
+      code_d = huffmanDecodeSymbol(in, bp, &tree_d, inbitlength);
+      if(code_d > 29) {
+        if(code_d == (unsigned)(-1)) /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ {
+          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+          (10=no endcode, 11=wrong jump outside of tree)*/
+          error = (*bp) > inlength * 8 ? 10 : 11;
+        }
+        else error = 18; /*error: invalid distance code (30-31 are never used)*/
+        break;
+      }
+      distance = DISTANCEBASE[code_d];
+
+      /*part 4: get extra bits from distance*/
+      numextrabits_d = DISTANCEEXTRA[code_d];
+      if((*bp + numextrabits_d) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
+      distance += readBitsFromStream(bp, in, numextrabits_d);
+
+      /*part 5: fill in all the out[n] values based on the length and dist*/
+      start = (*pos);
+      if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
+      backward = start - distance;
+
+      if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/);
+      if (distance < length) {
+        for(forward = 0; forward < length; ++forward) {
+          out->data[(*pos)++] = out->data[backward++];
+        }
+      } else {
+        memcpy(out->data + *pos, out->data + backward, length);
+        *pos += length;
+      }
+    } else if(code_ll == 256) {
+      break; /*end code, break the loop*/
+    } else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ {
+      /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+      (10=no endcode, 11=wrong jump outside of tree)*/
+      error = ((*bp) > inlength * 8) ? 10 : 11;
+      break;
+    }
+  }
+
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned inflateNoCompression(ucvector* out, const unsigned char* in, size_t* bp, size_t* pos, size_t inlength) {
+  size_t p;
+  unsigned LEN, NLEN, n, error = 0;
+
+  /*go to first boundary of byte*/
+  while(((*bp) & 0x7) != 0) ++(*bp);
+  p = (*bp) / 8; /*byte position*/
+
+  /*read LEN (2 bytes) and NLEN (2 bytes)*/
+  if(p + 4 >= inlength) return 52; /*error, bit pointer will jump past memory*/
+  LEN = in[p] + 256u * in[p + 1]; p += 2;
+  NLEN = in[p] + 256u * in[p + 1]; p += 2;
+
+  /*check if 16-bit NLEN is really the one's complement of LEN*/
+  if(LEN + NLEN != 65535) return 21; /*error: NLEN is not one's complement of LEN*/
+
+  if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/
+
+  /*read the literal data: LEN bytes are now stored in the out buffer*/
+  if(p + LEN > inlength) return 23; /*error: reading outside of in buffer*/
+  for(n = 0; n < LEN; ++n) out->data[(*pos)++] = in[p++];
+
+  (*bp) = p * 8;
+
+  return error;
+}
+
+static unsigned lodepng_inflatev(ucvector* out,
+                                 const unsigned char* in, size_t insize,
+                                 const LodePNGDecompressSettings* settings) {
+  /*bit pointer in the "in" data, current byte is bp >> 3, current bit is bp & 0x7 (from lsb to msb of the byte)*/
+  size_t bp = 0;
+  unsigned BFINAL = 0;
+  size_t pos = 0; /*byte position in the out buffer*/
+  unsigned error = 0;
+
+  (void)settings;
+
+  while(!BFINAL) {
+    unsigned BTYPE;
+    if(bp + 2 >= insize * 8) return 52; /*error, bit pointer will jump past memory*/
+    BFINAL = readBitFromStream(&bp, in);
+    BTYPE = 1u * readBitFromStream(&bp, in);
+    BTYPE += 2u * readBitFromStream(&bp, in);
+
+    if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
+    else if(BTYPE == 0) error = inflateNoCompression(out, in, &bp, &pos, insize); /*no compression*/
+    else error = inflateHuffmanBlock(out, in, &bp, &pos, insize, BTYPE); /*compression, BTYPE 01 or 10*/
+
+    if(error) return error;
+  }
+
+  return error;
+}
+
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGDecompressSettings* settings) {
+  unsigned error;
+  ucvector v;
+  ucvector_init_buffer(&v, *out, *outsize);
+  error = lodepng_inflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned inflate(unsigned char** out, size_t* outsize,
+                        const unsigned char* in, size_t insize,
+                        const LodePNGDecompressSettings* settings) {
+  if(settings->custom_inflate) {
+    return settings->custom_inflate(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_inflate(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflator (Compressor)                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
+
+/*bitlen is the size in bits of the code*/
+static void addHuffmanSymbol(size_t* bp, ucvector* compressed, unsigned code, unsigned bitlen) {
+  addBitsToStreamReversed(bp, compressed, code, bitlen);
+}
+
+/*search the index in the array, that has the largest value smaller than or equal to the given value,
+given array must be sorted (if no value is smaller, it returns the size of the given array)*/
+static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value) {
+  /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
+  size_t left = 1;
+  size_t right = array_size - 1;
+
+  while(left <= right) {
+    size_t mid = (left + right) >> 1;
+    if (array[mid] >= value) right = mid - 1;
+    else left = mid + 1;
+  }
+  if(left >= array_size || array[left] > value) left--;
+  return left;
+}
+
+static void addLengthDistance(uivector* values, size_t length, size_t distance) {
+  /*values in encoded vector are those used by deflate:
+  0-255: literal bytes
+  256: end
+  257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
+  286-287: invalid*/
+
+  unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
+  unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
+  unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
+  unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
+
+  uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX);
+  uivector_push_back(values, extra_length);
+  uivector_push_back(values, dist_code);
+  uivector_push_back(values, extra_distance);
+}
+
+/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
+bytes as input because 3 is the minimum match length for deflate*/
+static const unsigned HASH_NUM_VALUES = 65536;
+static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
+
+typedef struct Hash {
+  int* head; /*hash value to head circular pos - can be outdated if went around window*/
+  /*circular pos to prev circular pos*/
+  unsigned short* chain;
+  int* val; /*circular pos to hash value*/
+
+  /*TODO: do this not only for zeros but for any repeated byte. However for PNG
+  it's always going to be the zeros that dominate, so not important for PNG*/
+  int* headz; /*similar to head, but for chainz*/
+  unsigned short* chainz; /*those with same amount of zeros*/
+  unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
+} Hash;
+
+static unsigned hash_init(Hash* hash, unsigned windowsize) {
+  unsigned i;
+  hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
+  hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
+  hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+  hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
+  hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  if(!hash->head || !hash->chain || !hash->val  || !hash->headz|| !hash->chainz || !hash->zeros) {
+    return 83; /*alloc fail*/
+  }
+
+  /*initialize hash table*/
+  for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
+
+  for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
+
+  return 0;
+}
+
+static void hash_cleanup(Hash* hash) {
+  lodepng_free(hash->head);
+  lodepng_free(hash->val);
+  lodepng_free(hash->chain);
+
+  lodepng_free(hash->zeros);
+  lodepng_free(hash->headz);
+  lodepng_free(hash->chainz);
+}
+
+
+
+static unsigned getHash(const unsigned char* data, size_t size, size_t pos) {
+  unsigned result = 0;
+  if(pos + 2 < size) {
+    /*A simple shift and xor hash is used. Since the data of PNGs is dominated
+    by zeroes due to the filters, a better hash does not have a significant
+    effect on speed in traversing the chain, and causes more time spend on
+    calculating the hash.*/
+    result ^= (unsigned)(data[pos + 0] << 0u);
+    result ^= (unsigned)(data[pos + 1] << 4u);
+    result ^= (unsigned)(data[pos + 2] << 8u);
+  } else {
+    size_t amount, i;
+    if(pos >= size) return 0;
+    amount = size - pos;
+    for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u));
+  }
+  return result & HASH_BIT_MASK;
+}
+
+static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) {
+  const unsigned char* start = data + pos;
+  const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
+  if(end > data + size) end = data + size;
+  data = start;
+  while(data != end && *data == 0) ++data;
+  /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
+  return (unsigned)(data - start);
+}
+
+/*wpos = pos & (windowsize - 1)*/
+static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros) {
+  hash->val[wpos] = (int)hashval;
+  if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
+  hash->head[hashval] = (int)wpos;
+
+  hash->zeros[wpos] = numzeros;
+  if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
+  hash->headz[numzeros] = (int)wpos;
+}
+
+/*
+LZ77-encode the data. Return value is error code. The input are raw bytes, the output
+is in the form of unsigned integers with codes representing for example literal bytes, or
+length/distance pairs.
+It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
+sliding window (of windowsize) is used, and all past bytes in that window can be used as
+the "dictionary". A brute force search through all possible distances would be slow, and
+this hash technique is one out of several ways to speed this up.
+*/
+static unsigned encodeLZ77(uivector* out, Hash* hash,
+                           const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
+                           unsigned minmatch, unsigned nicematch, unsigned lazymatching) {
+  size_t pos;
+  unsigned i, error = 0;
+  /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
+  unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8;
+  unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
+
+  unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
+  unsigned numzeros = 0;
+
+  unsigned offset; /*the offset represents the distance in LZ77 terminology*/
+  unsigned length;
+  unsigned lazy = 0;
+  unsigned lazylength = 0, lazyoffset = 0;
+  unsigned hashval;
+  unsigned current_offset, current_length;
+  unsigned prev_offset;
+  const unsigned char *lastptr, *foreptr, *backptr;
+  unsigned hashpos;
+
+  if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
+  if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
+
+  if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
+
+  for(pos = inpos; pos < insize; ++pos) {
+    size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
+    unsigned chainlength = 0;
+
+    hashval = getHash(in, insize, pos);
+
+    if(usezeros && hashval == 0) {
+      if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+      else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+    } else {
+      numzeros = 0;
+    }
+
+    updateHashChain(hash, wpos, hashval, numzeros);
+
+    /*the length and offset found for the current position*/
+    length = 0;
+    offset = 0;
+
+    hashpos = hash->chain[wpos];
+
+    lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
+
+    /*search for the longest string*/
+    prev_offset = 0;
+    for(;;) {
+      if(chainlength++ >= maxchainlength) break;
+      current_offset = (unsigned)(hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize);
+
+      if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
+      prev_offset = current_offset;
+      if(current_offset > 0) {
+        /*test the next characters*/
+        foreptr = &in[pos];
+        backptr = &in[pos - current_offset];
+
+        /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
+        if(numzeros >= 3) {
+          unsigned skip = hash->zeros[hashpos];
+          if(skip > numzeros) skip = numzeros;
+          backptr += skip;
+          foreptr += skip;
+        }
+
+        while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ {
+          ++backptr;
+          ++foreptr;
+        }
+        current_length = (unsigned)(foreptr - &in[pos]);
+
+        if(current_length > length) {
+          length = current_length; /*the longest length*/
+          offset = current_offset; /*the offset that is related to this longest length*/
+          /*jump out once a length of max length is found (speed gain). This also jumps
+          out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
+          if(current_length >= nicematch) break;
+        }
+      }
+
+      if(hashpos == hash->chain[hashpos]) break;
+
+      if(numzeros >= 3 && length > numzeros) {
+        hashpos = hash->chainz[hashpos];
+        if(hash->zeros[hashpos] != numzeros) break;
+      } else {
+        hashpos = hash->chain[hashpos];
+        /*outdated hash value, happens if particular value was not encountered in whole last window*/
+        if(hash->val[hashpos] != (int)hashval) break;
+      }
+    }
+
+    if(lazymatching) {
+      if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH) {
+        lazy = 1;
+        lazylength = length;
+        lazyoffset = offset;
+        continue; /*try the next byte*/
+      }
+      if(lazy) {
+        lazy = 0;
+        if(pos == 0) ERROR_BREAK(81);
+        if(length > lazylength + 1) {
+          /*push the previous character as literal*/
+          if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
+        } else {
+          length = lazylength;
+          offset = lazyoffset;
+          hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
+          hash->headz[numzeros] = -1; /*idem*/
+          --pos;
+        }
+      }
+    }
+    if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
+
+    /*encode it as length/distance pair or literal value*/
+    if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ {
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    } else if(length < minmatch || (length == 3 && offset > 4096)) {
+      /*compensate for the fact that longer offsets have more extra bits, a
+      length of only 3 may be not worth it then*/
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    } else {
+      addLengthDistance(out, length, offset);
+      for(i = 1; i < length; ++i) {
+        ++pos;
+        wpos = pos & (windowsize - 1);
+        hashval = getHash(in, insize, pos);
+        if(usezeros && hashval == 0) {
+          if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+          else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+        } else {
+          numzeros = 0;
+        }
+        updateHashChain(hash, wpos, hashval, numzeros);
+      }
+    }
+  } /*end of the loop through each character of input*/
+
+  return error;
+}
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize) {
+  /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
+  2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
+
+  size_t i, j, numdeflateblocks = (datasize + 65534) / 65535;
+  unsigned datapos = 0;
+  for(i = 0; i != numdeflateblocks; ++i) {
+    unsigned BFINAL, BTYPE, LEN, NLEN;
+    unsigned char firstbyte;
+
+    BFINAL = (i == numdeflateblocks - 1);
+    BTYPE = 0;
+
+    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1) << 1) + ((BTYPE & 2) << 1));
+    ucvector_push_back(out, firstbyte);
+
+    LEN = 65535;
+    if(datasize - datapos < 65535) LEN = (unsigned)datasize - datapos;
+    NLEN = 65535 - LEN;
+
+    ucvector_push_back(out, (unsigned char)(LEN & 255));
+    ucvector_push_back(out, (unsigned char)(LEN >> 8));
+    ucvector_push_back(out, (unsigned char)(NLEN & 255));
+    ucvector_push_back(out, (unsigned char)(NLEN >> 8));
+
+    /*Decompressed data*/
+    for(j = 0; j < 65535 && datapos < datasize; ++j) {
+      ucvector_push_back(out, data[datapos++]);
+    }
+  }
+
+  return 0;
+}
+
+/*
+write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
+tree_ll: the tree for lit and len codes.
+tree_d: the tree for distance codes.
+*/
+static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encoded,
+                          const HuffmanTree* tree_ll, const HuffmanTree* tree_d) {
+  size_t i = 0;
+  for(i = 0; i != lz77_encoded->size; ++i) {
+    unsigned val = lz77_encoded->data[i];
+    addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val));
+    if(val > 256) /*for a length code, 3 more things have to be added*/ {
+      unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
+      unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
+      unsigned length_extra_bits = lz77_encoded->data[++i];
+
+      unsigned distance_code = lz77_encoded->data[++i];
+
+      unsigned distance_index = distance_code;
+      unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
+      unsigned distance_extra_bits = lz77_encoded->data[++i];
+
+      addBitsToStream(bp, out, length_extra_bits, n_length_extra_bits);
+      addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_d, distance_code),
+                       HuffmanTree_getLength(tree_d, distance_code));
+      addBitsToStream(bp, out, distance_extra_bits, n_distance_extra_bits);
+    }
+  }
+}
+
+/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
+static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash,
+                               const unsigned char* data, size_t datapos, size_t dataend,
+                               const LodePNGCompressSettings* settings, unsigned final) {
+  unsigned error = 0;
+
+  /*
+  A block is compressed as follows: The PNG data is lz77 encoded, resulting in
+  literal bytes and length/distance pairs. This is then huffman compressed with
+  two huffman trees. One huffman tree is used for the lit and len values ("ll"),
+  another huffman tree is used for the dist values ("d"). These two trees are
+  stored using their code lengths, and to compress even more these code lengths
+  are also run-length encoded and huffman compressed. This gives a huffman tree
+  of code lengths "cl". The code lenghts used to describe this third tree are
+  the code length code lengths ("clcl").
+  */
+
+  /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
+  uivector lz77_encoded;
+  HuffmanTree tree_ll; /*tree for lit,len values*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+  HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
+  uivector frequencies_ll; /*frequency of lit,len codes*/
+  uivector frequencies_d; /*frequency of dist codes*/
+  uivector frequencies_cl; /*frequency of code length codes*/
+  uivector bitlen_lld; /*lit,len,dist code lenghts (int bits), literally (without repeat codes).*/
+  uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudemtary run length compression)*/
+  /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl
+  (these are written as is in the file, it would be crazy to compress these using yet another huffman
+  tree that needs to be represented by yet another set of code lengths)*/
+  uivector bitlen_cl;
+  size_t datasize = dataend - datapos;
+
+  /*
+  Due to the huffman compression of huffman tree representations ("two levels"), there are some anologies:
+  bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
+  bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
+  bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
+  */
+
+  unsigned BFINAL = final;
+  size_t numcodes_ll, numcodes_d, i;
+  unsigned HLIT, HDIST, HCLEN;
+
+  uivector_init(&lz77_encoded);
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+  HuffmanTree_init(&tree_cl);
+  uivector_init(&frequencies_ll);
+  uivector_init(&frequencies_d);
+  uivector_init(&frequencies_cl);
+  uivector_init(&bitlen_lld);
+  uivector_init(&bitlen_lld_e);
+  uivector_init(&bitlen_cl);
+
+  /*This while loop never loops due to a break at the end, it is here to
+  allow breaking out of it to the cleanup phase on error conditions.*/
+  while(!error) {
+    if(settings->use_lz77) {
+      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                         settings->minmatch, settings->nicematch, settings->lazymatching);
+      if(error) break;
+    } else {
+      if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
+      for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
+    }
+
+    if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/);
+    if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/);
+
+    /*Count the frequencies of lit, len and dist codes*/
+    for(i = 0; i != lz77_encoded.size; ++i) {
+      unsigned symbol = lz77_encoded.data[i];
+      ++frequencies_ll.data[symbol];
+      if(symbol > 256) {
+        unsigned dist = lz77_encoded.data[i + 2];
+        ++frequencies_d.data[dist];
+        i += 3;
+      }
+    }
+    frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
+
+    /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
+    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15);
+    if(error) break;
+    /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
+    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15);
+    if(error) break;
+
+    numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286;
+    numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30;
+    /*store the code lengths of both generated trees in bitlen_lld*/
+    for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i));
+    for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i));
+
+    /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
+    17 (3-10 zeroes), 18 (11-138 zeroes)*/
+    for(i = 0; i != (unsigned)bitlen_lld.size; ++i) {
+      unsigned j = 0; /*amount of repititions*/
+      while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j;
+
+      if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/ {
+        ++j; /*include the first zero*/
+        if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ {
+          uivector_push_back(&bitlen_lld_e, 17);
+          uivector_push_back(&bitlen_lld_e, j - 3);
+        } else /*repeat code 18 supports max 138 zeroes*/ {
+          if(j > 138) j = 138;
+          uivector_push_back(&bitlen_lld_e, 18);
+          uivector_push_back(&bitlen_lld_e, j - 11);
+        }
+        i += (j - 1);
+      } else if(j >= 3) /*repeat code for value other than zero*/ {
+        size_t k;
+        unsigned num = j / 6, rest = j % 6;
+        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
+        for(k = 0; k < num; ++k) {
+          uivector_push_back(&bitlen_lld_e, 16);
+          uivector_push_back(&bitlen_lld_e, 6 - 3);
+        }
+        if(rest >= 3) {
+          uivector_push_back(&bitlen_lld_e, 16);
+          uivector_push_back(&bitlen_lld_e, rest - 3);
+        }
+        else j -= rest;
+        i += j;
+      } else /*too short to benefit from repeat code*/ {
+        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
+      }
+    }
+
+    /*generate tree_cl, the huffmantree of huffmantrees*/
+
+    if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/);
+    for(i = 0; i != bitlen_lld_e.size; ++i) {
+      ++frequencies_cl.data[bitlen_lld_e.data[i]];
+      /*after a repeat code come the bits that specify the number of repetitions,
+      those don't need to be in the frequencies_cl calculation*/
+      if(bitlen_lld_e.data[i] >= 16) ++i;
+    }
+
+    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data,
+                                            frequencies_cl.size, frequencies_cl.size, 7);
+    if(error) break;
+
+    if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/);
+    for(i = 0; i != tree_cl.numcodes; ++i) {
+      /*lenghts of code length tree is in the order as specified by deflate*/
+      bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]);
+    }
+    while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4) {
+      /*remove zeros at the end, but minimum size must be 4*/
+      if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/);
+    }
+    if(error) break;
+
+    /*
+    Write everything into the output
+
+    After the BFINAL and BTYPE, the dynamic block consists out of the following:
+    - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
+    - (HCLEN+4)*3 bits code lengths of code length alphabet
+    - HLIT + 257 code lenghts of lit/length alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - HDIST + 1 code lengths of distance alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - compressed data
+    - 256 (end code)
+    */
+
+    /*Write block type*/
+    addBitToStream(bp, out, BFINAL);
+    addBitToStream(bp, out, 0); /*first bit of BTYPE "dynamic"*/
+    addBitToStream(bp, out, 1); /*second bit of BTYPE "dynamic"*/
+
+    /*write the HLIT, HDIST and HCLEN values*/
+    HLIT = (unsigned)(numcodes_ll - 257);
+    HDIST = (unsigned)(numcodes_d - 1);
+    HCLEN = (unsigned)bitlen_cl.size - 4;
+    /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/
+    while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN;
+    addBitsToStream(bp, out, HLIT, 5);
+    addBitsToStream(bp, out, HDIST, 5);
+    addBitsToStream(bp, out, HCLEN, 4);
+
+    /*write the code lenghts of the code length alphabet*/
+    for(i = 0; i != HCLEN + 4; ++i) addBitsToStream(bp, out, bitlen_cl.data[i], 3);
+
+    /*write the lenghts of the lit/len AND the dist alphabet*/
+    for(i = 0; i != bitlen_lld_e.size; ++i) {
+      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]),
+                       HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i]));
+      /*extra bits of repeat codes*/
+      if(bitlen_lld_e.data[i] == 16) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 2);
+      else if(bitlen_lld_e.data[i] == 17) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 3);
+      else if(bitlen_lld_e.data[i] == 18) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 7);
+    }
+
+    /*write the compressed data symbols*/
+    writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
+    /*error: the length of the end code 256 must be larger than 0*/
+    if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64);
+
+    /*write the end code*/
+    addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
+
+    break; /*end of error-while*/
+  }
+
+  /*cleanup*/
+  uivector_cleanup(&lz77_encoded);
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+  HuffmanTree_cleanup(&tree_cl);
+  uivector_cleanup(&frequencies_ll);
+  uivector_cleanup(&frequencies_d);
+  uivector_cleanup(&frequencies_cl);
+  uivector_cleanup(&bitlen_lld_e);
+  uivector_cleanup(&bitlen_lld);
+  uivector_cleanup(&bitlen_cl);
+
+  return error;
+}
+
+static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash,
+                             const unsigned char* data,
+                             size_t datapos, size_t dataend,
+                             const LodePNGCompressSettings* settings, unsigned final) {
+  HuffmanTree tree_ll; /*tree for literal values and length codes*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+
+  unsigned BFINAL = final;
+  unsigned error = 0;
+  size_t i;
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  generateFixedLitLenTree(&tree_ll);
+  generateFixedDistanceTree(&tree_d);
+
+  addBitToStream(bp, out, BFINAL);
+  addBitToStream(bp, out, 1); /*first bit of BTYPE*/
+  addBitToStream(bp, out, 0); /*second bit of BTYPE*/
+
+  if(settings->use_lz77) /*LZ77 encoded*/ {
+    uivector lz77_encoded;
+    uivector_init(&lz77_encoded);
+    error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                       settings->minmatch, settings->nicematch, settings->lazymatching);
+    if(!error) writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
+    uivector_cleanup(&lz77_encoded);
+  } else /*no LZ77, but still will be Huffman compressed*/ {
+    for(i = datapos; i < dataend; ++i) {
+      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i]));
+    }
+  }
+  /*add END code*/
+  if(!error) addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
+
+  /*cleanup*/
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
+                                 const LodePNGCompressSettings* settings) {
+  unsigned error = 0;
+  size_t i, blocksize, numdeflateblocks;
+  size_t bp = 0; /*the bit pointer*/
+  Hash hash;
+
+  if(settings->btype > 2) return 61;
+  else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
+  else if(settings->btype == 1) blocksize = insize;
+  else /*if(settings->btype == 2)*/ {
+    /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
+    blocksize = insize / 8 + 8;
+    if(blocksize < 65536) blocksize = 65536;
+    if(blocksize > 262144) blocksize = 262144;
+  }
+
+  numdeflateblocks = (insize + blocksize - 1) / blocksize;
+  if(numdeflateblocks == 0) numdeflateblocks = 1;
+
+  error = hash_init(&hash, settings->windowsize);
+  if(error) return error;
+
+  for(i = 0; i != numdeflateblocks && !error; ++i) {
+    unsigned final = (i == numdeflateblocks - 1);
+    size_t start = i * blocksize;
+    size_t end = start + blocksize;
+    if(end > insize) end = insize;
+
+    if(settings->btype == 1) error = deflateFixed(out, &bp, &hash, in, start, end, settings, final);
+    else if(settings->btype == 2) error = deflateDynamic(out, &bp, &hash, in, start, end, settings, final);
+  }
+
+  hash_cleanup(&hash);
+
+  return error;
+}
+
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGCompressSettings* settings) {
+  unsigned error;
+  ucvector v;
+  ucvector_init_buffer(&v, *out, *outsize);
+  error = lodepng_deflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned deflate(unsigned char** out, size_t* outsize,
+                        const unsigned char* in, size_t insize,
+                        const LodePNGCompressSettings* settings) {
+  if(settings->custom_deflate) {
+    return settings->custom_deflate(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_deflate(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Adler32                                                                  */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len) {
+  unsigned s1 = adler & 0xffff;
+  unsigned s2 = (adler >> 16) & 0xffff;
+
+  while(len > 0) {
+    /*at least 5552 sums can be done before the sums overflow, saving a lot of module divisions*/
+    unsigned amount = len > 5552 ? 5552 : len;
+    len -= amount;
+    while(amount > 0) {
+      s1 += (*data++);
+      s2 += s1;
+      --amount;
+    }
+    s1 %= 65521;
+    s2 %= 65521;
+  }
+
+  return (s2 << 16) | s1;
+}
+
+/*Return the adler32 of the bytes data[0..len-1]*/
+static unsigned adler32(const unsigned char* data, unsigned len) {
+  return update_adler32(1L, data, len);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Zlib                                                                   / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                 size_t insize, const LodePNGDecompressSettings* settings) {
+  unsigned error = 0;
+  unsigned CM, CINFO, FDICT;
+
+  if(insize < 2) return 53; /*error, size of zlib data too small*/
+  /*read information from zlib header*/
+  if((in[0] * 256 + in[1]) % 31 != 0) {
+    /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
+    return 24;
+  }
+
+  CM = in[0] & 15;
+  CINFO = (in[0] >> 4) & 15;
+  /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
+  FDICT = (in[1] >> 5) & 1;
+  /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
+
+  if(CM != 8 || CINFO > 7) {
+    /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
+    return 25;
+  }
+  if(FDICT != 0) {
+    /*error: the specification of PNG says about the zlib stream:
+      "The additional flags shall not specify a preset dictionary."*/
+    return 26;
+  }
+
+  error = inflate(out, outsize, in + 2, insize - 2, settings);
+  if(error) return error;
+
+  if(!settings->ignore_adler32) {
+    unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
+    unsigned checksum = adler32(*out, (unsigned)(*outsize));
+    if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
+  }
+
+  return 0; /*no error*/
+}
+
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                size_t insize, const LodePNGDecompressSettings* settings) {
+  if(settings->custom_zlib) {
+    return settings->custom_zlib(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_zlib_decompress(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                               size_t insize, const LodePNGCompressSettings* settings) {
+  /*initially, *out must be NULL and outsize 0, if you just give some random *out
+  that's pointing to a non allocated buffer, this'll crash*/
+  ucvector outv;
+  size_t i;
+  unsigned error;
+  unsigned char* deflatedata = 0;
+  size_t deflatesize = 0;
+
+  /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
+  unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
+  unsigned FLEVEL = 0;
+  unsigned FDICT = 0;
+  unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
+  unsigned FCHECK = 31 - CMFFLG % 31;
+  CMFFLG += FCHECK;
+
+  /*ucvector-controlled version of the output buffer, for dynamic array*/
+  ucvector_init_buffer(&outv, *out, *outsize);
+
+  ucvector_push_back(&outv, (unsigned char)(CMFFLG >> 8));
+  ucvector_push_back(&outv, (unsigned char)(CMFFLG & 255));
+
+  error = deflate(&deflatedata, &deflatesize, in, insize, settings);
+
+  if(!error) {
+    unsigned ADLER32 = adler32(in, (unsigned)insize);
+    for(i = 0; i != deflatesize; ++i) ucvector_push_back(&outv, deflatedata[i]);
+    lodepng_free(deflatedata);
+    lodepng_add32bitInt(&outv, ADLER32);
+  }
+
+  *out = outv.data;
+  *outsize = outv.size;
+
+  return error;
+}
+
+/* compress using the default or custom zlib function */
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings) {
+  if(settings->custom_zlib) {
+    return settings->custom_zlib(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_zlib_compress(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#else /*no LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                size_t insize, const LodePNGDecompressSettings* settings) {
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings) {
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*this is a good tradeoff between speed and compression ratio*/
+#define DEFAULT_WINDOWSIZE 2048
+
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings) {
+  /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
+  settings->btype = 2;
+  settings->use_lz77 = 1;
+  settings->windowsize = DEFAULT_WINDOWSIZE;
+  settings->minmatch = 3;
+  settings->nicematch = 128;
+  settings->lazymatching = 1;
+
+  settings->custom_zlib = 0;
+  settings->custom_deflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
+
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) {
+  settings->ignore_adler32 = 0;
+
+  settings->custom_zlib = 0;
+  settings->custom_inflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0};
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of Zlib related code. Begin of PNG related code.                 // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / CRC32                                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+
+#ifndef LODEPNG_NO_COMPILE_CRC
+/* CRC polynomial: 0xedb88320 */
+static unsigned lodepng_crc32_table[256] = {
+           0u, 1996959894u, 3993919788u, 2567524794u,  124634137u, 1886057615u, 3915621685u, 2657392035u,
+   249268274u, 2044508324u, 3772115230u, 2547177864u,  162941995u, 2125561021u, 3887607047u, 2428444049u,
+   498536548u, 1789927666u, 4089016648u, 2227061214u,  450548861u, 1843258603u, 4107580753u, 2211677639u,
+   325883990u, 1684777152u, 4251122042u, 2321926636u,  335633487u, 1661365465u, 4195302755u, 2366115317u,
+   997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
+   901097722u, 1119000684u, 3686517206u, 2898065728u,  853044451u, 1172266101u, 3705015759u, 2882616665u,
+   651767980u, 1373503546u, 3369554304u, 3218104598u,  565507253u, 1454621731u, 3485111705u, 3099436303u,
+   671266974u, 1594198024u, 3322730930u, 2970347812u,  795835527u, 1483230225u, 3244367275u, 3060149565u,
+  1994146192u,   31158534u, 2563907772u, 4023717930u, 1907459465u,  112637215u, 2680153253u, 3904427059u,
+  2013776290u,  251722036u, 2517215374u, 3775830040u, 2137656763u,  141376813u, 2439277719u, 3865271297u,
+  1802195444u,  476864866u, 2238001368u, 4066508878u, 1812370925u,  453092731u, 2181625025u, 4111451223u,
+  1706088902u,  314042704u, 2344532202u, 4240017532u, 1658658271u,  366619977u, 2362670323u, 4224994405u,
+  1303535960u,  984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
+  1131014506u,  879679996u, 2909243462u, 3663771856u, 1141124467u,  855842277u, 2852801631u, 3708648649u,
+  1342533948u,  654459306u, 3188396048u, 3373015174u, 1466479909u,  544179635u, 3110523913u, 3462522015u,
+  1591671054u,  702138776u, 2966460450u, 3352799412u, 1504918807u,  783551873u, 3082640443u, 3233442989u,
+  3988292384u, 2596254646u,   62317068u, 1957810842u, 3939845945u, 2647816111u,   81470997u, 1943803523u,
+  3814918930u, 2489596804u,  225274430u, 2053790376u, 3826175755u, 2466906013u,  167816743u, 2097651377u,
+  4027552580u, 2265490386u,  503444072u, 1762050814u, 4150417245u, 2154129355u,  426522225u, 1852507879u,
+  4275313526u, 2312317920u,  282753626u, 1742555852u, 4189708143u, 2394877945u,  397917763u, 1622183637u,
+  3604390888u, 2714866558u,  953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
+  3624741850u, 2936675148u,  906185462u, 1090812512u, 3747672003u, 2825379669u,  829329135u, 1181335161u,
+  3412177804u, 3160834842u,  628085408u, 1382605366u, 3423369109u, 3138078467u,  570562233u, 1426400815u,
+  3317316542u, 2998733608u,  733239954u, 1555261956u, 3268935591u, 3050360625u,  752459403u, 1541320221u,
+  2607071920u, 3965973030u, 1969922972u,   40735498u, 2617837225u, 3943577151u, 1913087877u,   83908371u,
+  2512341634u, 3803740692u, 2075208622u,  213261112u, 2463272603u, 3855990285u, 2094854071u,  198958881u,
+  2262029012u, 4057260610u, 1759359992u,  534414190u, 2176718541u, 4139329115u, 1873836001u,  414664567u,
+  2282248934u, 4279200368u, 1711684554u,  285281116u, 2405801727u, 4167216745u, 1634467795u,  376229701u,
+  2685067896u, 3608007406u, 1308918612u,  956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
+  2932959818u, 3654703836u, 1088359270u,  936918000u, 2847714899u, 3736837829u, 1202900863u,  817233897u,
+  3183342108u, 3401237130u, 1404277552u,  615818150u, 3134207493u, 3453421203u, 1423857449u,  601450431u,
+  3009837614u, 3294710456u, 1567103746u,  711928724u, 3020668471u, 3272380065u, 1510334235u,  755167117u
+};
+
+/*Return the CRC of the bytes buf[0..len-1].*/
+unsigned lodepng_crc32(const unsigned char* data, size_t length) {
+  unsigned r = 0xffffffffu;
+  size_t i;
+  for(i = 0; i < length; ++i) {
+    r = lodepng_crc32_table[(r ^ data[i]) & 0xff] ^ (r >> 8);
+  }
+  return r ^ 0xffffffffu;
+}
+#else /* !LODEPNG_NO_COMPILE_CRC */
+unsigned lodepng_crc32(const unsigned char* data, size_t length);
+#endif /* !LODEPNG_NO_COMPILE_CRC */
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Reading and writing single bits and bytes from/to stream for LodePNG   / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream) {
+  unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
+  ++(*bitpointer);
+  return result;
+}
+
+static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) {
+  unsigned result = 0;
+  size_t i;
+  for(i = 0 ; i < nbits; ++i) {
+    result <<= 1;
+    result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream);
+  }
+  return result;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+static void setBitOfReversedStream0(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) {
+  /*the current bit in bitstream must be 0 for this to work*/
+  if(bit) {
+    /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/
+    bitstream[(*bitpointer) >> 3] |= (bit << (7 - ((*bitpointer) & 0x7)));
+  }
+  ++(*bitpointer);
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) {
+  /*the current bit in bitstream may be 0 or 1 for this to work*/
+  if(bit == 0) bitstream[(*bitpointer) >> 3] &=  (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7))));
+  else         bitstream[(*bitpointer) >> 3] |=  (1 << (7 - ((*bitpointer) & 0x7)));
+  ++(*bitpointer);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG chunks                                                             / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+unsigned lodepng_chunk_length(const unsigned char* chunk) {
+  return lodepng_read32bitInt(&chunk[0]);
+}
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk) {
+  unsigned i;
+  for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
+  type[4] = 0; /*null termination char*/
+}
+
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type) {
+  if(strlen(type) != 4) return 0;
+  return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
+}
+
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk) {
+  return((chunk[4] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_private(const unsigned char* chunk) {
+  return((chunk[6] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk) {
+  return((chunk[7] & 32) != 0);
+}
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk) {
+  return &chunk[8];
+}
+
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk) {
+  return &chunk[8];
+}
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk) {
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
+  /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
+  unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
+  if(CRC != checksum) return 1;
+  else return 0;
+}
+
+void lodepng_chunk_generate_crc(unsigned char* chunk) {
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
+  lodepng_set32bitInt(chunk + 8 + length, CRC);
+}
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk) {
+  if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+    && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+    /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+    return chunk + 8;
+  } else {
+    unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+    return chunk + total_chunk_length;
+  }
+}
+
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk) {
+  if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+    && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+    /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+    return chunk + 8;
+  } else {
+    unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+    return chunk + total_chunk_length;
+  }
+}
+
+unsigned char* lodepng_chunk_find(unsigned char* chunk, const unsigned char* end, const char type[5]) {
+  for(;;) {
+    if(chunk + 12 >= end) return 0;
+    if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+    chunk = lodepng_chunk_next(chunk);
+  }
+}
+
+const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]) {
+  for(;;) {
+    if(chunk + 12 >= end) return 0;
+    if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+    chunk = lodepng_chunk_next_const(chunk);
+  }
+}
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk) {
+  unsigned i;
+  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+  unsigned char *chunk_start, *new_buffer;
+  size_t new_length = (*outlength) + total_chunk_length;
+  if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/
+
+  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+  if(!new_buffer) return 83; /*alloc fail*/
+  (*out) = new_buffer;
+  (*outlength) = new_length;
+  chunk_start = &(*out)[new_length - total_chunk_length];
+
+  for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
+
+  return 0;
+}
+
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data) {
+  unsigned i;
+  unsigned char *chunk, *new_buffer;
+  size_t new_length = (*outlength) + length + 12;
+  if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/
+  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+  if(!new_buffer) return 83; /*alloc fail*/
+  (*out) = new_buffer;
+  (*outlength) = new_length;
+  chunk = &(*out)[(*outlength) - length - 12];
+
+  /*1: length*/
+  lodepng_set32bitInt(chunk, (unsigned)length);
+
+  /*2: chunk name (4 letters)*/
+  chunk[4] = (unsigned char)type[0];
+  chunk[5] = (unsigned char)type[1];
+  chunk[6] = (unsigned char)type[2];
+  chunk[7] = (unsigned char)type[3];
+
+  /*3: the data*/
+  for(i = 0; i != length; ++i) chunk[8 + i] = data[i];
+
+  /*4: CRC (of the chunkname characters and the data)*/
+  lodepng_chunk_generate_crc(chunk);
+
+  return 0;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Color types and such                                                   / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*return type is a LodePNG error code*/
+static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) /*bd = bitdepth*/ {
+  switch(colortype) {
+    case 0: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break; /*grey*/
+    case 2: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGB*/
+    case 3: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break; /*palette*/
+    case 4: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*grey + alpha*/
+    case 6: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGBA*/
+    default: return 31;
+  }
+  return 0; /*allowed color type / bits combination*/
+}
+
+static unsigned getNumColorChannels(LodePNGColorType colortype) {
+  switch(colortype) {
+    case 0: return 1; /*grey*/
+    case 2: return 3; /*RGB*/
+    case 3: return 1; /*palette*/
+    case 4: return 2; /*grey + alpha*/
+    case 6: return 4; /*RGBA*/
+  }
+  return 0; /*unexisting color type*/
+}
+
+static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth) {
+  /*bits per pixel is amount of channels * bits per channel*/
+  return getNumColorChannels(colortype) * bitdepth;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+void lodepng_color_mode_init(LodePNGColorMode* info) {
+  info->key_defined = 0;
+  info->key_r = info->key_g = info->key_b = 0;
+  info->colortype = LCT_RGBA;
+  info->bitdepth = 8;
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+void lodepng_color_mode_cleanup(LodePNGColorMode* info) {
+  lodepng_palette_clear(info);
+}
+
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source) {
+  size_t i;
+  lodepng_color_mode_cleanup(dest);
+  *dest = *source;
+  if(source->palette) {
+    dest->palette = (unsigned char*)lodepng_malloc(1024);
+    if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
+    for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i];
+  }
+  return 0;
+}
+
+LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth) {
+  LodePNGColorMode result;
+  lodepng_color_mode_init(&result);
+  result.colortype = colortype;
+  result.bitdepth = bitdepth;
+  return result;
+}
+
+static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b) {
+  size_t i;
+  if(a->colortype != b->colortype) return 0;
+  if(a->bitdepth != b->bitdepth) return 0;
+  if(a->key_defined != b->key_defined) return 0;
+  if(a->key_defined) {
+    if(a->key_r != b->key_r) return 0;
+    if(a->key_g != b->key_g) return 0;
+    if(a->key_b != b->key_b) return 0;
+  }
+  if(a->palettesize != b->palettesize) return 0;
+  for(i = 0; i != a->palettesize * 4; ++i) {
+    if(a->palette[i] != b->palette[i]) return 0;
+  }
+  return 1;
+}
+
+void lodepng_palette_clear(LodePNGColorMode* info) {
+  if(info->palette) lodepng_free(info->palette);
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  unsigned char* data;
+  /*the same resize technique as C++ std::vectors is used, and here it's made so that for a palette with
+  the max of 256 colors, it'll have the exact alloc size*/
+  if(!info->palette) /*allocate palette if empty*/ {
+    /*room for 256 colors with 4 bytes each*/
+    data = (unsigned char*)lodepng_realloc(info->palette, 1024);
+    if(!data) return 83; /*alloc fail*/
+    else info->palette = data;
+  }
+  info->palette[4 * info->palettesize + 0] = r;
+  info->palette[4 * info->palettesize + 1] = g;
+  info->palette[4 * info->palettesize + 2] = b;
+  info->palette[4 * info->palettesize + 3] = a;
+  ++info->palettesize;
+  return 0;
+}
+
+/*calculate bits per pixel out of colortype and bitdepth*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info) {
+  return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
+}
+
+unsigned lodepng_get_channels(const LodePNGColorMode* info) {
+  return getNumColorChannels(info->colortype);
+}
+
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info) {
+  return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
+}
+
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info) {
+  return (info->colortype & 4) != 0; /*4 or 6*/
+}
+
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info) {
+  return info->colortype == LCT_PALETTE;
+}
+
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info) {
+  size_t i;
+  for(i = 0; i != info->palettesize; ++i) {
+    if(info->palette[i * 4 + 3] < 255) return 1;
+  }
+  return 0;
+}
+
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) {
+  return info->key_defined
+      || lodepng_is_alpha_type(info)
+      || lodepng_has_palette_alpha(info);
+}
+
+size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) {
+  size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth);
+  size_t n = (size_t)w * (size_t)h;
+  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
+}
+
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) {
+  return lodepng_get_raw_size_lct(w, h, color->colortype, color->bitdepth);
+}
+
+
+#ifdef LODEPNG_COMPILE_PNG
+#ifdef LODEPNG_COMPILE_DECODER
+
+/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer,
+and in addition has one extra byte per line: the filter byte. So this gives a larger
+result than lodepng_get_raw_size. */
+static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color) {
+  size_t bpp = lodepng_get_bpp(color);
+  /* + 1 for the filter byte, and possibly plus padding bits per line */
+  size_t line = ((size_t)(w / 8) * bpp) + 1 + ((w & 7) * bpp + 7) / 8;
+  return (size_t)h * line;
+}
+
+/* Safely check if multiplying two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_mulofl(size_t a, size_t b, size_t* result) {
+  *result = a * b; /* Unsigned multiplication is well defined and safe in C90 */
+  return (a != 0 && *result / a != b);
+}
+
+/* Safely check if adding two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_addofl(size_t a, size_t b, size_t* result) {
+  *result = a + b; /* Unsigned addition is well defined and safe in C90 */
+  return *result < a;
+}
+
+/*Safely checks whether size_t overflow can be caused due to amount of pixels.
+This check is overcautious rather than precise. If this check indicates no overflow,
+you can safely compute in a size_t (but not an unsigned):
+-(size_t)w * (size_t)h * 8
+-amount of bytes in IDAT (including filter, padding and Adam7 bytes)
+-amount of bytes in raw color model
+Returns 1 if overflow possible, 0 if not.
+*/
+static int lodepng_pixel_overflow(unsigned w, unsigned h,
+                                  const LodePNGColorMode* pngcolor, const LodePNGColorMode* rawcolor) {
+  size_t bpp = LODEPNG_MAX(lodepng_get_bpp(pngcolor), lodepng_get_bpp(rawcolor));
+  size_t numpixels, total;
+  size_t line; /* bytes per line in worst case */
+
+  if(lodepng_mulofl((size_t)w, (size_t)h, &numpixels)) return 1;
+  if(lodepng_mulofl(numpixels, 8, &total)) return 1; /* bit pointer with 8-bit color, or 8 bytes per channel color */
+
+  /* Bytes per scanline with the expression "(w / 8) * bpp) + ((w & 7) * bpp + 7) / 8" */
+  if(lodepng_mulofl((size_t)(w / 8), bpp, &line)) return 1;
+  if(lodepng_addofl(line, ((w & 7) * bpp + 7) / 8, &line)) return 1;
+
+  if(lodepng_addofl(line, 5, &line)) return 1; /* 5 bytes overhead per line: 1 filterbyte, 4 for Adam7 worst case */
+  if(lodepng_mulofl(line, h, &total)) return 1; /* Total bytes in worst case */
+
+  return 0; /* no overflow */
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static void LodePNGUnknownChunks_init(LodePNGInfo* info) {
+  unsigned i;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
+}
+
+static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info) {
+  unsigned i;
+  for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
+}
+
+static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src) {
+  unsigned i;
+
+  LodePNGUnknownChunks_cleanup(dest);
+
+  for(i = 0; i != 3; ++i) {
+    size_t j;
+    dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
+    dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
+    if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
+    for(j = 0; j < src->unknown_chunks_size[i]; ++j) {
+      dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
+    }
+  }
+
+  return 0;
+}
+
+/******************************************************************************/
+
+static void LodePNGText_init(LodePNGInfo* info) {
+  info->text_num = 0;
+  info->text_keys = NULL;
+  info->text_strings = NULL;
+}
+
+static void LodePNGText_cleanup(LodePNGInfo* info) {
+  size_t i;
+  for(i = 0; i != info->text_num; ++i) {
+    string_cleanup(&info->text_keys[i]);
+    string_cleanup(&info->text_strings[i]);
+  }
+  lodepng_free(info->text_keys);
+  lodepng_free(info->text_strings);
+}
+
+static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+  size_t i = 0;
+  dest->text_keys = 0;
+  dest->text_strings = 0;
+  dest->text_num = 0;
+  for(i = 0; i != source->text_num; ++i) {
+    CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
+  }
+  return 0;
+}
+
+void lodepng_clear_text(LodePNGInfo* info) {
+  LodePNGText_cleanup(info);
+}
+
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) {
+  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
+  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
+  if(!new_keys || !new_strings) {
+    lodepng_free(new_keys);
+    lodepng_free(new_strings);
+    return 83; /*alloc fail*/
+  }
+
+  ++info->text_num;
+  info->text_keys = new_keys;
+  info->text_strings = new_strings;
+
+  info->text_keys[info->text_num - 1] = alloc_string(key);
+  info->text_strings[info->text_num - 1] = alloc_string(str);
+
+  return 0;
+}
+
+/******************************************************************************/
+
+static void LodePNGIText_init(LodePNGInfo* info) {
+  info->itext_num = 0;
+  info->itext_keys = NULL;
+  info->itext_langtags = NULL;
+  info->itext_transkeys = NULL;
+  info->itext_strings = NULL;
+}
+
+static void LodePNGIText_cleanup(LodePNGInfo* info) {
+  size_t i;
+  for(i = 0; i != info->itext_num; ++i) {
+    string_cleanup(&info->itext_keys[i]);
+    string_cleanup(&info->itext_langtags[i]);
+    string_cleanup(&info->itext_transkeys[i]);
+    string_cleanup(&info->itext_strings[i]);
+  }
+  lodepng_free(info->itext_keys);
+  lodepng_free(info->itext_langtags);
+  lodepng_free(info->itext_transkeys);
+  lodepng_free(info->itext_strings);
+}
+
+static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+  size_t i = 0;
+  dest->itext_keys = 0;
+  dest->itext_langtags = 0;
+  dest->itext_transkeys = 0;
+  dest->itext_strings = 0;
+  dest->itext_num = 0;
+  for(i = 0; i != source->itext_num; ++i) {
+    CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
+                                        source->itext_transkeys[i], source->itext_strings[i]));
+  }
+  return 0;
+}
+
+void lodepng_clear_itext(LodePNGInfo* info) {
+  LodePNGIText_cleanup(info);
+}
+
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+                           const char* transkey, const char* str) {
+  char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
+  char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
+  char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
+  char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
+  if(!new_keys || !new_langtags || !new_transkeys || !new_strings) {
+    lodepng_free(new_keys);
+    lodepng_free(new_langtags);
+    lodepng_free(new_transkeys);
+    lodepng_free(new_strings);
+    return 83; /*alloc fail*/
+  }
+
+  ++info->itext_num;
+  info->itext_keys = new_keys;
+  info->itext_langtags = new_langtags;
+  info->itext_transkeys = new_transkeys;
+  info->itext_strings = new_strings;
+
+  info->itext_keys[info->itext_num - 1] = alloc_string(key);
+  info->itext_langtags[info->itext_num - 1] = alloc_string(langtag);
+  info->itext_transkeys[info->itext_num - 1] = alloc_string(transkey);
+  info->itext_strings[info->itext_num - 1] = alloc_string(str);
+
+  return 0;
+}
+
+/* same as set but does not delete */
+static unsigned lodepng_assign_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) {
+  info->iccp_name = alloc_string(name);
+  info->iccp_profile = (unsigned char*)lodepng_malloc(profile_size);
+
+  if(!info->iccp_name || !info->iccp_profile) return 83; /*alloc fail*/
+
+  memcpy(info->iccp_profile, profile, profile_size);
+  info->iccp_profile_size = profile_size;
+
+  return 0; /*ok*/
+}
+
+unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) {
+  if(info->iccp_name) lodepng_clear_icc(info);
+
+  return lodepng_assign_icc(info, name, profile, profile_size);
+}
+
+void lodepng_clear_icc(LodePNGInfo* info) {
+  string_cleanup(&info->iccp_name);
+  lodepng_free(info->iccp_profile);
+  info->iccp_profile = NULL;
+  info->iccp_profile_size = 0;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+void lodepng_info_init(LodePNGInfo* info) {
+  lodepng_color_mode_init(&info->color);
+  info->interlace_method = 0;
+  info->compression_method = 0;
+  info->filter_method = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  info->background_defined = 0;
+  info->background_r = info->background_g = info->background_b = 0;
+
+  LodePNGText_init(info);
+  LodePNGIText_init(info);
+
+  info->time_defined = 0;
+  info->phys_defined = 0;
+
+  info->gama_defined = 0;
+  info->chrm_defined = 0;
+  info->srgb_defined = 0;
+  info->iccp_defined = 0;
+  info->iccp_name = NULL;
+  info->iccp_profile = NULL;
+
+  LodePNGUnknownChunks_init(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+void lodepng_info_cleanup(LodePNGInfo* info) {
+  lodepng_color_mode_cleanup(&info->color);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  LodePNGText_cleanup(info);
+  LodePNGIText_cleanup(info);
+
+  lodepng_clear_icc(info);
+
+  LodePNGUnknownChunks_cleanup(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
+  lodepng_info_cleanup(dest);
+  *dest = *source;
+  lodepng_color_mode_init(&dest->color);
+  CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  CERROR_TRY_RETURN(LodePNGText_copy(dest, source));
+  CERROR_TRY_RETURN(LodePNGIText_copy(dest, source));
+  if(source->iccp_defined) {
+    CERROR_TRY_RETURN(lodepng_assign_icc(dest, source->iccp_name, source->iccp_profile, source->iccp_profile_size));
+  }
+
+  LodePNGUnknownChunks_init(dest);
+  CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source));
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  return 0;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/
+static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in) {
+  unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/
+  /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/
+  unsigned p = index & m;
+  in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/
+  in = in << (bits * (m - p));
+  if(p == 0) out[index * bits / 8] = in;
+  else out[index * bits / 8] |= in;
+}
+
+typedef struct ColorTree ColorTree;
+
+/*
+One node of a color tree
+This is the data structure used to count the number of unique colors and to get a palette
+index for a color. It's like an octree, but because the alpha channel is used too, each
+node has 16 instead of 8 children.
+*/
+struct ColorTree {
+  ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/
+  int index; /*the payload. Only has a meaningful value if this is in the last level*/
+};
+
+static void color_tree_init(ColorTree* tree) {
+  int i;
+  for(i = 0; i != 16; ++i) tree->children[i] = 0;
+  tree->index = -1;
+}
+
+static void color_tree_cleanup(ColorTree* tree) {
+  int i;
+  for(i = 0; i != 16; ++i) {
+    if(tree->children[i]) {
+      color_tree_cleanup(tree->children[i]);
+      lodepng_free(tree->children[i]);
+    }
+  }
+}
+
+/*returns -1 if color not present, its index otherwise*/
+static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  int bit = 0;
+  for(bit = 0; bit < 8; ++bit) {
+    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+    if(!tree->children[i]) return -1;
+    else tree = tree->children[i];
+  }
+  return tree ? tree->index : -1;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  return color_tree_get(tree, r, g, b, a) >= 0;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*color is not allowed to already exist.
+Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/
+static void color_tree_add(ColorTree* tree,
+                           unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) {
+  int bit;
+  for(bit = 0; bit < 8; ++bit) {
+    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+    if(!tree->children[i]) {
+      tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
+      color_tree_init(tree->children[i]);
+    }
+    tree = tree->children[i];
+  }
+  tree->index = (int)index;
+}
+
+/*put a pixel, given its RGBA color, into image of any color type*/
+static unsigned rgba8ToPixel(unsigned char* out, size_t i,
+                             const LodePNGColorMode* mode, ColorTree* tree /*for palette*/,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  if(mode->colortype == LCT_GREY) {
+    unsigned char grey = r; /*((unsigned short)r + g + b) / 3;*/
+    if(mode->bitdepth == 8) out[i] = grey;
+    else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = grey;
+    else {
+      /*take the most significant bits of grey*/
+      grey = (grey >> (8 - mode->bitdepth)) & ((1 << mode->bitdepth) - 1);
+      addColorBits(out, i, mode->bitdepth, grey);
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      out[i * 3 + 0] = r;
+      out[i * 3 + 1] = g;
+      out[i * 3 + 2] = b;
+    } else {
+      out[i * 6 + 0] = out[i * 6 + 1] = r;
+      out[i * 6 + 2] = out[i * 6 + 3] = g;
+      out[i * 6 + 4] = out[i * 6 + 5] = b;
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    int index = color_tree_get(tree, r, g, b, a);
+    if(index < 0) return 82; /*color not in palette*/
+    if(mode->bitdepth == 8) out[i] = index;
+    else addColorBits(out, i, mode->bitdepth, (unsigned)index);
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    unsigned char grey = r; /*((unsigned short)r + g + b) / 3;*/
+    if(mode->bitdepth == 8) {
+      out[i * 2 + 0] = grey;
+      out[i * 2 + 1] = a;
+    } else if(mode->bitdepth == 16) {
+      out[i * 4 + 0] = out[i * 4 + 1] = grey;
+      out[i * 4 + 2] = out[i * 4 + 3] = a;
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      out[i * 4 + 0] = r;
+      out[i * 4 + 1] = g;
+      out[i * 4 + 2] = b;
+      out[i * 4 + 3] = a;
+    } else {
+      out[i * 8 + 0] = out[i * 8 + 1] = r;
+      out[i * 8 + 2] = out[i * 8 + 3] = g;
+      out[i * 8 + 4] = out[i * 8 + 5] = b;
+      out[i * 8 + 6] = out[i * 8 + 7] = a;
+    }
+  }
+
+  return 0; /*no error*/
+}
+
+/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/
+static void rgba16ToPixel(unsigned char* out, size_t i,
+                         const LodePNGColorMode* mode,
+                         unsigned short r, unsigned short g, unsigned short b, unsigned short a) {
+  if(mode->colortype == LCT_GREY) {
+    unsigned short grey = r; /*((unsigned)r + g + b) / 3;*/
+    out[i * 2 + 0] = (grey >> 8) & 255;
+    out[i * 2 + 1] = grey & 255;
+  } else if(mode->colortype == LCT_RGB) {
+    out[i * 6 + 0] = (r >> 8) & 255;
+    out[i * 6 + 1] = r & 255;
+    out[i * 6 + 2] = (g >> 8) & 255;
+    out[i * 6 + 3] = g & 255;
+    out[i * 6 + 4] = (b >> 8) & 255;
+    out[i * 6 + 5] = b & 255;
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    unsigned short grey = r; /*((unsigned)r + g + b) / 3;*/
+    out[i * 4 + 0] = (grey >> 8) & 255;
+    out[i * 4 + 1] = grey & 255;
+    out[i * 4 + 2] = (a >> 8) & 255;
+    out[i * 4 + 3] = a & 255;
+  } else if(mode->colortype == LCT_RGBA) {
+    out[i * 8 + 0] = (r >> 8) & 255;
+    out[i * 8 + 1] = r & 255;
+    out[i * 8 + 2] = (g >> 8) & 255;
+    out[i * 8 + 3] = g & 255;
+    out[i * 8 + 4] = (b >> 8) & 255;
+    out[i * 8 + 5] = b & 255;
+    out[i * 8 + 6] = (a >> 8) & 255;
+    out[i * 8 + 7] = a & 255;
+  }
+}
+
+/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/
+static void getPixelColorRGBA8(unsigned char* r, unsigned char* g,
+                               unsigned char* b, unsigned char* a,
+                               const unsigned char* in, size_t i,
+                               const LodePNGColorMode* mode) {
+  if(mode->colortype == LCT_GREY) {
+    if(mode->bitdepth == 8) {
+      *r = *g = *b = in[i];
+      if(mode->key_defined && *r == mode->key_r) *a = 0;
+      else *a = 255;
+    } else if(mode->bitdepth == 16) {
+      *r = *g = *b = in[i * 2 + 0];
+      if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+      else *a = 255;
+    } else {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = i * mode->bitdepth;
+      unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+      *r = *g = *b = (value * 255) / highest;
+      if(mode->key_defined && value == mode->key_r) *a = 0;
+      else *a = 255;
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2];
+      if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0;
+      else *a = 255;
+    } else {
+      *r = in[i * 6 + 0];
+      *g = in[i * 6 + 2];
+      *b = in[i * 6 + 4];
+      if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+         && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+         && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+      else *a = 255;
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    unsigned index;
+    if(mode->bitdepth == 8) index = in[i];
+    else {
+      size_t j = i * mode->bitdepth;
+      index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+    }
+
+    if(index >= mode->palettesize) {
+      /*This is an error according to the PNG spec, but common PNG decoders make it black instead.
+      Done here too, slightly faster due to no error handling needed.*/
+      *r = *g = *b = 0;
+      *a = 255;
+    } else {
+      *r = mode->palette[index * 4 + 0];
+      *g = mode->palette[index * 4 + 1];
+      *b = mode->palette[index * 4 + 2];
+      *a = mode->palette[index * 4 + 3];
+    }
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    if(mode->bitdepth == 8) {
+      *r = *g = *b = in[i * 2 + 0];
+      *a = in[i * 2 + 1];
+    } else {
+      *r = *g = *b = in[i * 4 + 0];
+      *a = in[i * 4 + 2];
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      *r = in[i * 4 + 0];
+      *g = in[i * 4 + 1];
+      *b = in[i * 4 + 2];
+      *a = in[i * 4 + 3];
+    } else {
+      *r = in[i * 8 + 0];
+      *g = in[i * 8 + 2];
+      *b = in[i * 8 + 4];
+      *a = in[i * 8 + 6];
+    }
+  }
+}
+
+/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color
+mode test cases, optimized to convert the colors much faster, when converting
+to RGBA or RGB with 8 bit per cannel. buffer must be RGBA or RGB output with
+enough memory, if has_alpha is true the output is RGBA. mode has the color mode
+of the input buffer.*/
+static void getPixelColorsRGBA8(unsigned char* buffer, size_t numpixels,
+                                unsigned has_alpha, const unsigned char* in,
+                                const LodePNGColorMode* mode) {
+  unsigned num_channels = has_alpha ? 4 : 3;
+  size_t i;
+  if(mode->colortype == LCT_GREY) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i];
+        if(has_alpha) buffer[3] = mode->key_defined && in[i] == mode->key_r ? 0 : 255;
+      }
+    } else if(mode->bitdepth == 16) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
+        if(has_alpha) buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255;
+      }
+    } else {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
+        if(has_alpha) buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255;
+      }
+    }
+  } else if(mode->colortype == LCT_RGB) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 3 + 0];
+        buffer[1] = in[i * 3 + 1];
+        buffer[2] = in[i * 3 + 2];
+        if(has_alpha) buffer[3] = mode->key_defined && buffer[0] == mode->key_r
+           && buffer[1]== mode->key_g && buffer[2] == mode->key_b ? 0 : 255;
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 6 + 0];
+        buffer[1] = in[i * 6 + 2];
+        buffer[2] = in[i * 6 + 4];
+        if(has_alpha) buffer[3] = mode->key_defined
+           && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+           && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+           && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255;
+      }
+    }
+  } else if(mode->colortype == LCT_PALETTE) {
+    unsigned index;
+    size_t j = 0;
+    for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+      if(mode->bitdepth == 8) index = in[i];
+      else index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+
+      if(index >= mode->palettesize) {
+        /*This is an error according to the PNG spec, but most PNG decoders make it black instead.
+        Done here too, slightly faster due to no error handling needed.*/
+        buffer[0] = buffer[1] = buffer[2] = 0;
+        if(has_alpha) buffer[3] = 255;
+      } else {
+        buffer[0] = mode->palette[index * 4 + 0];
+        buffer[1] = mode->palette[index * 4 + 1];
+        buffer[2] = mode->palette[index * 4 + 2];
+        if(has_alpha) buffer[3] = mode->palette[index * 4 + 3];
+      }
+    }
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
+        if(has_alpha) buffer[3] = in[i * 2 + 1];
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
+        if(has_alpha) buffer[3] = in[i * 4 + 2];
+      }
+    }
+  } else if(mode->colortype == LCT_RGBA) {
+    if(mode->bitdepth == 8) {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 4 + 0];
+        buffer[1] = in[i * 4 + 1];
+        buffer[2] = in[i * 4 + 2];
+        if(has_alpha) buffer[3] = in[i * 4 + 3];
+      }
+    } else {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels) {
+        buffer[0] = in[i * 8 + 0];
+        buffer[1] = in[i * 8 + 2];
+        buffer[2] = in[i * 8 + 4];
+        if(has_alpha) buffer[3] = in[i * 8 + 6];
+      }
+    }
+  }
+}
+
+/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with
+given color type, but the given color type must be 16-bit itself.*/
+static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a,
+                                const unsigned char* in, size_t i, const LodePNGColorMode* mode) {
+  if(mode->colortype == LCT_GREY) {
+    *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1];
+    if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+    else *a = 65535;
+  } else if(mode->colortype == LCT_RGB) {
+    *r = 256u * in[i * 6 + 0] + in[i * 6 + 1];
+    *g = 256u * in[i * 6 + 2] + in[i * 6 + 3];
+    *b = 256u * in[i * 6 + 4] + in[i * 6 + 5];
+    if(mode->key_defined
+       && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+       && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+       && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+    else *a = 65535;
+  } else if(mode->colortype == LCT_GREY_ALPHA) {
+    *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1];
+    *a = 256u * in[i * 4 + 2] + in[i * 4 + 3];
+  } else if(mode->colortype == LCT_RGBA) {
+    *r = 256u * in[i * 8 + 0] + in[i * 8 + 1];
+    *g = 256u * in[i * 8 + 2] + in[i * 8 + 3];
+    *b = 256u * in[i * 8 + 4] + in[i * 8 + 5];
+    *a = 256u * in[i * 8 + 6] + in[i * 8 + 7];
+  }
+}
+
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+                         unsigned w, unsigned h) {
+  size_t i;
+  ColorTree tree;
+  size_t numpixels = (size_t)w * (size_t)h;
+  unsigned error = 0;
+
+  if(lodepng_color_mode_equal(mode_out, mode_in)) {
+    size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
+    for(i = 0; i != numbytes; ++i) out[i] = in[i];
+    return 0;
+  }
+
+  if(mode_out->colortype == LCT_PALETTE) {
+    size_t palettesize = mode_out->palettesize;
+    const unsigned char* palette = mode_out->palette;
+    size_t palsize = (size_t)1u << mode_out->bitdepth;
+    /*if the user specified output palette but did not give the values, assume
+    they want the values of the input color type (assuming that one is palette).
+    Note that we never create a new palette ourselves.*/
+    if(palettesize == 0) {
+      palettesize = mode_in->palettesize;
+      palette = mode_in->palette;
+      /*if the input was also palette with same bitdepth, then the color types are also
+      equal, so copy literally. This to preserve the exact indices that were in the PNG
+      even in case there are duplicate colors in the palette.*/
+      if (mode_in->colortype == LCT_PALETTE && mode_in->bitdepth == mode_out->bitdepth) {
+        size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
+        for(i = 0; i != numbytes; ++i) out[i] = in[i];
+        return 0;
+      }
+    }
+    if(palettesize < palsize) palsize = palettesize;
+    color_tree_init(&tree);
+    for(i = 0; i != palsize; ++i) {
+      const unsigned char* p = &palette[i * 4];
+      color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned)i);
+    }
+  }
+
+  if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) {
+    for(i = 0; i != numpixels; ++i) {
+      unsigned short r = 0, g = 0, b = 0, a = 0;
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+      rgba16ToPixel(out, i, mode_out, r, g, b, a);
+    }
+  } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) {
+    getPixelColorsRGBA8(out, numpixels, 1, in, mode_in);
+  } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) {
+    getPixelColorsRGBA8(out, numpixels, 0, in, mode_in);
+  } else {
+    unsigned char r = 0, g = 0, b = 0, a = 0;
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+      error = rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a);
+      if (error) break;
+    }
+  }
+
+  if(mode_out->colortype == LCT_PALETTE) {
+    color_tree_cleanup(&tree);
+  }
+
+  return error;
+}
+
+
+/* Converts a single rgb color without alpha from one type to another, color bits truncated to
+their bitdepth. In case of single channel (grey or palette), only the r channel is used. Slow
+function, do not use to process all pixels of an image. Alpha channel not supported on purpose:
+this is for bKGD, supporting alpha may prevent it from finding a color in the palette, from the
+specification it looks like bKGD should ignore the alpha values of the palette since it can use
+any palette index but doesn't have an alpha channel. Idem with ignoring color key. */
+unsigned lodepng_convert_rgb(
+    unsigned* r_out, unsigned* g_out, unsigned* b_out,
+    unsigned r_in, unsigned g_in, unsigned b_in,
+    const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in) {
+  unsigned r = 0, g = 0, b = 0;
+  unsigned mul = 65535 / ((1u << mode_in->bitdepth) - 1u); /*65535, 21845, 4369, 257, 1*/
+  unsigned shift = 16 - mode_out->bitdepth;
+
+  if(mode_in->colortype == LCT_GREY || mode_in->colortype == LCT_GREY_ALPHA) {
+    r = g = b = r_in * mul;
+  } else if(mode_in->colortype == LCT_RGB || mode_in->colortype == LCT_RGBA) {
+    r = r_in * mul;
+    g = g_in * mul;
+    b = b_in * mul;
+  } else if(mode_in->colortype == LCT_PALETTE) {
+    if(r_in >= mode_in->palettesize) return 82;
+    r = mode_in->palette[r_in * 4 + 0] * 257u;
+    g = mode_in->palette[r_in * 4 + 1] * 257u;
+    b = mode_in->palette[r_in * 4 + 2] * 257u;
+  } else {
+    return 31;
+  }
+
+  /* now convert to output format */
+  if(mode_out->colortype == LCT_GREY || mode_out->colortype == LCT_GREY_ALPHA) {
+    *r_out = r >> shift ;
+  } else if(mode_out->colortype == LCT_RGB || mode_out->colortype == LCT_RGBA) {
+    *r_out = r >> shift ;
+    *g_out = g >> shift ;
+    *b_out = b >> shift ;
+  } else if(mode_out->colortype == LCT_PALETTE) {
+    unsigned i;
+    /* a 16-bit color cannot be in the palette */
+    if((r >> 8) != (r & 255) || (g >> 8) != (g & 255) || (b >> 8) != (b & 255)) return 82;
+    for(i = 0; i < mode_out->palettesize; i++) {
+      unsigned j = i * 4;
+      if((r >> 8) == mode_out->palette[j + 0] && (g >> 8) == mode_out->palette[j + 1] &&
+          (b >> 8) == mode_out->palette[j + 2]) {
+        *r_out = i;
+        return 0;
+      }
+    }
+    return 82;
+  } else {
+    return 31;
+  }
+
+  return 0;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+void lodepng_color_profile_init(LodePNGColorProfile* profile) {
+  profile->colored = 0;
+  profile->key = 0;
+  profile->key_r = profile->key_g = profile->key_b = 0;
+  profile->alpha = 0;
+  profile->numcolors = 0;
+  profile->bits = 1;
+  profile->numpixels = 0;
+}
+
+/*function used for debug purposes with C++*/
+/*void printColorProfile(LodePNGColorProfile* p) {
+  std::cout << "colored: " << (int)p->colored << ", ";
+  std::cout << "key: " << (int)p->key << ", ";
+  std::cout << "key_r: " << (int)p->key_r << ", ";
+  std::cout << "key_g: " << (int)p->key_g << ", ";
+  std::cout << "key_b: " << (int)p->key_b << ", ";
+  std::cout << "alpha: " << (int)p->alpha << ", ";
+  std::cout << "numcolors: " << (int)p->numcolors << ", ";
+  std::cout << "bits: " << (int)p->bits << std::endl;
+}*/
+
+/*Returns how many bits needed to represent given value (max 8 bit)*/
+static unsigned getValueRequiredBits(unsigned char value) {
+  if(value == 0 || value == 255) return 1;
+  /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/
+  if(value % 17 == 0) return value % 85 == 0 ? 2 : 4;
+  return 8;
+}
+
+/*profile must already have been inited.
+It's ok to set some parameters of profile to done already.*/
+unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
+                                   const unsigned char* in, unsigned w, unsigned h,
+                                   const LodePNGColorMode* mode_in) {
+  unsigned error = 0;
+  size_t i;
+  ColorTree tree;
+  size_t numpixels = (size_t)w * (size_t)h;
+
+  /* mark things as done already if it would be impossible to have a more expensive case */
+  unsigned colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0;
+  unsigned alpha_done = lodepng_can_have_alpha(mode_in) ? 0 : 1;
+  unsigned numcolors_done = 0;
+  unsigned bpp = lodepng_get_bpp(mode_in);
+  unsigned bits_done = (profile->bits == 1 && bpp == 1) ? 1 : 0;
+  unsigned sixteen = 0; /* whether the input image is 16 bit */
+  unsigned maxnumcolors = 257;
+  if(bpp <= 8) maxnumcolors = LODEPNG_MIN(257, profile->numcolors + (1u << bpp));
+
+  profile->numpixels += numpixels;
+
+  color_tree_init(&tree);
+
+  /*If the profile was already filled in from previous data, fill its palette in tree
+  and mark things as done already if we know they are the most expensive case already*/
+  if(profile->alpha) alpha_done = 1;
+  if(profile->colored) colored_done = 1;
+  if(profile->bits == 16) numcolors_done = 1;
+  if(profile->bits >= bpp) bits_done = 1;
+  if(profile->numcolors >= maxnumcolors) numcolors_done = 1;
+
+  if(!numcolors_done) {
+    for(i = 0; i < profile->numcolors; i++) {
+      const unsigned char* color = &profile->palette[i * 4];
+      color_tree_add(&tree, color[0], color[1], color[2], color[3], i);
+    }
+  }
+
+  /*Check if the 16-bit input is truly 16-bit*/
+  if(mode_in->bitdepth == 16 && !sixteen) {
+    unsigned short r, g, b, a;
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+      if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
+         (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/ {
+        profile->bits = 16;
+        sixteen = 1;
+        bits_done = 1;
+        numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/
+        break;
+      }
+    }
+  }
+
+  if(sixteen) {
+    unsigned short r = 0, g = 0, b = 0, a = 0;
+
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+
+      if(!colored_done && (r != g || r != b)) {
+        profile->colored = 1;
+        colored_done = 1;
+      }
+
+      if(!alpha_done) {
+        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
+        if(a != 65535 && (a != 0 || (profile->key && !matchkey))) {
+          profile->alpha = 1;
+          profile->key = 0;
+          alpha_done = 1;
+        } else if(a == 0 && !profile->alpha && !profile->key) {
+          profile->key = 1;
+          profile->key_r = r;
+          profile->key_g = g;
+          profile->key_b = b;
+        } else if(a == 65535 && profile->key && matchkey) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          profile->alpha = 1;
+          profile->key = 0;
+          alpha_done = 1;
+        }
+      }
+      if(alpha_done && numcolors_done && colored_done && bits_done) break;
+    }
+
+    if(profile->key && !profile->alpha) {
+      for(i = 0; i != numpixels; ++i) {
+        getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+        if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          profile->alpha = 1;
+          profile->key = 0;
+          alpha_done = 1;
+        }
+      }
+    }
+  } else /* < 16-bit */ {
+    unsigned char r = 0, g = 0, b = 0, a = 0;
+    for(i = 0; i != numpixels; ++i) {
+      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+
+      if(!bits_done && profile->bits < 8) {
+        /*only r is checked, < 8 bits is only relevant for greyscale*/
+        unsigned bits = getValueRequiredBits(r);
+        if(bits > profile->bits) profile->bits = bits;
+      }
+      bits_done = (profile->bits >= bpp);
+
+      if(!colored_done && (r != g || r != b)) {
+        profile->colored = 1;
+        colored_done = 1;
+        if(profile->bits < 8) profile->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/
+      }
+
+      if(!alpha_done) {
+        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
+        if(a != 255 && (a != 0 || (profile->key && !matchkey))) {
+          profile->alpha = 1;
+          profile->key = 0;
+          alpha_done = 1;
+          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        } else if(a == 0 && !profile->alpha && !profile->key) {
+          profile->key = 1;
+          profile->key_r = r;
+          profile->key_g = g;
+          profile->key_b = b;
+        } else if(a == 255 && profile->key && matchkey) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          profile->alpha = 1;
+          profile->key = 0;
+          alpha_done = 1;
+          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        }
+      }
+
+      if(!numcolors_done) {
+        if(!color_tree_has(&tree, r, g, b, a)) {
+          color_tree_add(&tree, r, g, b, a, profile->numcolors);
+          if(profile->numcolors < 256) {
+            unsigned char* p = profile->palette;
+            unsigned n = profile->numcolors;
+            p[n * 4 + 0] = r;
+            p[n * 4 + 1] = g;
+            p[n * 4 + 2] = b;
+            p[n * 4 + 3] = a;
+          }
+          ++profile->numcolors;
+          numcolors_done = profile->numcolors >= maxnumcolors;
+        }
+      }
+
+      if(alpha_done && numcolors_done && colored_done && bits_done) break;
+    }
+
+    if(profile->key && !profile->alpha) {
+      for(i = 0; i != numpixels; ++i) {
+        getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+        if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b) {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          profile->alpha = 1;
+          profile->key = 0;
+          alpha_done = 1;
+          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        }
+      }
+    }
+
+    /*make the profile's key always 16-bit for consistency - repeat each byte twice*/
+    profile->key_r += (profile->key_r << 8);
+    profile->key_g += (profile->key_g << 8);
+    profile->key_b += (profile->key_b << 8);
+  }
+
+  color_tree_cleanup(&tree);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*Adds a single color to the color profile. The profile must already have been inited. The color must be given as 16-bit
+(with 2 bytes repeating for 8-bit and 65535 for opaque alpha channel). This function is expensive, do not call it for
+all pixels of an image but only for a few additional values. */
+static unsigned lodepng_color_profile_add(LodePNGColorProfile* profile,
+                                          unsigned r, unsigned g, unsigned b, unsigned a) {
+  unsigned error = 0;
+  unsigned char image[8];
+  LodePNGColorMode mode;
+  lodepng_color_mode_init(&mode);
+  image[0] = r >> 8; image[1] = r; image[2] = g >> 8; image[3] = g;
+  image[4] = b >> 8; image[5] = b; image[6] = a >> 8; image[7] = a;
+  mode.bitdepth = 16;
+  mode.colortype = LCT_RGBA;
+  error = lodepng_get_color_profile(profile, image, 1, 1, &mode);
+  lodepng_color_mode_cleanup(&mode);
+  return error;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*Autochoose color model given the computed profile. mode_in is to copy palette order from
+when relevant.*/
+static unsigned auto_choose_color_from_profile(LodePNGColorMode* mode_out,
+                                               const LodePNGColorMode* mode_in,
+                                               const LodePNGColorProfile* prof) {
+  unsigned error = 0;
+  unsigned palettebits, palette_ok;
+  size_t i, n;
+  size_t numpixels = prof->numpixels;
+
+  unsigned alpha = prof->alpha;
+  unsigned key = prof->key;
+  unsigned bits = prof->bits;
+
+  mode_out->key_defined = 0;
+
+  if(key && numpixels <= 16) {
+    alpha = 1; /*too few pixels to justify tRNS chunk overhead*/
+    key = 0;
+    if(bits < 8) bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+  }
+  n = prof->numcolors;
+  palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8));
+  palette_ok = n <= 256 && bits <= 8;
+  if(numpixels < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/
+  if(!prof->colored && bits <= palettebits) palette_ok = 0; /*grey is less overhead*/
+
+  if(palette_ok) {
+    const unsigned char* p = prof->palette;
+    lodepng_palette_clear(mode_out); /*remove potential earlier palette*/
+    for(i = 0; i != prof->numcolors; ++i) {
+      error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]);
+      if(error) break;
+    }
+
+    mode_out->colortype = LCT_PALETTE;
+    mode_out->bitdepth = palettebits;
+
+    if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize
+        && mode_in->bitdepth == mode_out->bitdepth) {
+      /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/
+      lodepng_color_mode_cleanup(mode_out);
+      lodepng_color_mode_copy(mode_out, mode_in);
+    }
+  } else /*8-bit or 16-bit per channel*/ {
+    mode_out->bitdepth = bits;
+    mode_out->colortype = alpha ? (prof->colored ? LCT_RGBA : LCT_GREY_ALPHA)
+                                : (prof->colored ? LCT_RGB : LCT_GREY);
+
+    if(key) {
+      unsigned mask = (1u << mode_out->bitdepth) - 1u; /*profile always uses 16-bit, mask converts it*/
+      mode_out->key_r = prof->key_r & mask;
+      mode_out->key_g = prof->key_g & mask;
+      mode_out->key_b = prof->key_b & mask;
+      mode_out->key_defined = 1;
+    }
+  }
+
+  return error;
+}
+
+/*Automatically chooses color type that gives smallest amount of bits in the
+output image, e.g. grey if there are only greyscale pixels, palette if there
+are less than 256 colors, color key if only single transparent color, ...
+Updates values of mode with a potentially smaller color model. mode_out should
+contain the user chosen color model, but will be overwritten with the new chosen one.*/
+unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
+                                   const unsigned char* image, unsigned w, unsigned h,
+                                   const LodePNGColorMode* mode_in) {
+  unsigned error = 0;
+  LodePNGColorProfile prof;
+  lodepng_color_profile_init(&prof);
+  error = lodepng_get_color_profile(&prof, image, w, h, mode_in);
+  if(error) return error;
+  return auto_choose_color_from_profile(mode_out, mode_in, &prof);
+}
+
+#endif /* #ifdef LODEPNG_COMPILE_ENCODER */
+
+/*
+Paeth predicter, used by PNG filter type 4
+The parameters are of type short, but should come from unsigned chars, the shorts
+are only needed to make the paeth calculation correct.
+*/
+static unsigned char paethPredictor(short a, short b, short c) {
+  short pa = abs(b - c);
+  short pb = abs(a - c);
+  short pc = abs(a + b - c - c);
+
+  if(pc < pa && pc < pb) return (unsigned char)c;
+  else if(pb < pa) return (unsigned char)b;
+  else return (unsigned char)a;
+}
+
+/*shared values used by multiple Adam7 related functions*/
+
+static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
+static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
+static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
+static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
+
+/*
+Outputs various dimensions and positions in the image related to the Adam7 reduced images.
+passw: output containing the width of the 7 passes
+passh: output containing the height of the 7 passes
+filter_passstart: output containing the index of the start and end of each
+ reduced image with filter bytes
+padded_passstart output containing the index of the start and end of each
+ reduced image when without filter bytes but with padded scanlines
+passstart: output containing the index of the start and end of each reduced
+ image without padding between scanlines, but still padding between the images
+w, h: width and height of non-interlaced image
+bpp: bits per pixel
+"padded" is only relevant if bpp is less than 8 and a scanline or image does not
+ end at a full byte
+*/
+static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8],
+                                size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp) {
+  /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/
+  unsigned i;
+
+  /*calculate width and height in pixels of each pass*/
+  for(i = 0; i != 7; ++i) {
+    passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i];
+    passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i];
+    if(passw[i] == 0) passh[i] = 0;
+    if(passh[i] == 0) passw[i] = 0;
+  }
+
+  filter_passstart[0] = padded_passstart[0] = passstart[0] = 0;
+  for(i = 0; i != 7; ++i) {
+    /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/
+    filter_passstart[i + 1] = filter_passstart[i]
+                            + ((passw[i] && passh[i]) ? passh[i] * (1 + (passw[i] * bpp + 7) / 8) : 0);
+    /*bits padded if needed to fill full byte at end of each scanline*/
+    padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7) / 8);
+    /*only padded at end of reduced image*/
+    passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7) / 8;
+  }
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Decoder                                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*read the information from the header and store it in the LodePNGInfo. return value is error*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state,
+                         const unsigned char* in, size_t insize) {
+  unsigned width, height;
+  LodePNGInfo* info = &state->info_png;
+  if(insize == 0 || in == 0) {
+    CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/
+  }
+  if(insize < 33) {
+    CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/
+  }
+
+  /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/
+  /* TODO: remove this. One should use a new LodePNGState for new sessions */
+  lodepng_info_cleanup(info);
+  lodepng_info_init(info);
+
+  if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
+     || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10) {
+    CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/
+  }
+  if(lodepng_chunk_length(in + 8) != 13) {
+    CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/
+  }
+  if(!lodepng_chunk_type_equals(in + 8, "IHDR")) {
+    CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/
+  }
+
+  /*read the values given in the header*/
+  width = lodepng_read32bitInt(&in[16]);
+  height = lodepng_read32bitInt(&in[20]);
+  info->color.bitdepth = in[24];
+  info->color.colortype = (LodePNGColorType)in[25];
+  info->compression_method = in[26];
+  info->filter_method = in[27];
+  info->interlace_method = in[28];
+
+  if(width == 0 || height == 0) {
+    CERROR_RETURN_ERROR(state->error, 93);
+  }
+
+  if(w) *w = width;
+  if(h) *h = height;
+
+  if(!state->decoder.ignore_crc) {
+    unsigned CRC = lodepng_read32bitInt(&in[29]);
+    unsigned checksum = lodepng_crc32(&in[12], 17);
+    if(CRC != checksum) {
+      CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/
+    }
+  }
+
+  /*error: only compression method 0 is allowed in the specification*/
+  if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32);
+  /*error: only filter method 0 is allowed in the specification*/
+  if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33);
+  /*error: only interlace methods 0 and 1 exist in the specification*/
+  if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34);
+
+  state->error = checkColorValidity(info->color.colortype, info->color.bitdepth);
+  return state->error;
+}
+
+static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon,
+                                 size_t bytewidth, unsigned char filterType, size_t length) {
+  /*
+  For PNG filter method 0
+  unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte,
+  the filter works byte per byte (bytewidth = 1)
+  precon is the previous unfiltered scanline, recon the result, scanline the current one
+  the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead
+  recon and scanline MAY be the same memory address! precon must be disjoint.
+  */
+
+  size_t i;
+  switch(filterType) {
+    case 0:
+      for(i = 0; i != length; ++i) recon[i] = scanline[i];
+      break;
+    case 1:
+      for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+      for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth];
+      break;
+    case 2:
+      if(precon) {
+        for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i];
+      } else {
+        for(i = 0; i != length; ++i) recon[i] = scanline[i];
+      }
+      break;
+    case 3:
+      if(precon) {
+        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1);
+        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1);
+      } else {
+        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1);
+      }
+      break;
+    case 4:
+      if(precon) {
+        for(i = 0; i != bytewidth; ++i) {
+          recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/
+        }
+        for(i = bytewidth; i < length; ++i) {
+          recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]));
+        }
+      } else {
+        for(i = 0; i != bytewidth; ++i) {
+          recon[i] = scanline[i];
+        }
+        for(i = bytewidth; i < length; ++i) {
+          /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/
+          recon[i] = (scanline[i] + recon[i - bytewidth]);
+        }
+      }
+      break;
+    default: return 36; /*error: unexisting filter type given*/
+  }
+  return 0;
+}
+
+static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+  /*
+  For PNG filter method 0
+  this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times)
+  out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline
+  w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel
+  in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes)
+  */
+
+  unsigned y;
+  unsigned char* prevline = 0;
+
+  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+  size_t bytewidth = (bpp + 7) / 8;
+  size_t linebytes = (w * bpp + 7) / 8;
+
+  for(y = 0; y < h; ++y) {
+    size_t outindex = linebytes * y;
+    size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+    unsigned char filterType = in[inindex];
+
+    CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes));
+
+    prevline = &out[outindex];
+  }
+
+  return 0;
+}
+
+/*
+in: Adam7 interlaced image, with no padding bits between scanlines, but between
+ reduced images so that each reduced image starts at a byte.
+out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h
+bpp: bits per pixel
+out has the following size in bits: w * h * bpp.
+in is possibly bigger due to padding bits between reduced images.
+out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation
+(because that's likely a little bit faster)
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+  unsigned passw[7], passh[7];
+  size_t filter_passstart[8], padded_passstart[8], passstart[8];
+  unsigned i;
+
+  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+  if(bpp >= 8) {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      size_t bytewidth = bpp / 8;
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+        size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
+        for(b = 0; b < bytewidth; ++b) {
+          out[pixeloutstart + b] = in[pixelinstart + b];
+        }
+      }
+    }
+  } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      unsigned ilinebits = bpp * passw[i];
+      unsigned olinebits = bpp * w;
+      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+        obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
+        for(b = 0; b < bpp; ++b) {
+          unsigned char bit = readBitFromReversedStream(&ibp, in);
+          /*note that this function assumes the out buffer is completely 0, use setBitOfReversedStream otherwise*/
+          setBitOfReversedStream0(&obp, out, bit);
+        }
+      }
+    }
+  }
+}
+
+static void removePaddingBits(unsigned char* out, const unsigned char* in,
+                              size_t olinebits, size_t ilinebits, unsigned h) {
+  /*
+  After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need
+  to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers
+  for the Adam7 code, the color convert code and the output to the user.
+  in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must
+  have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits
+  also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7
+  only useful if (ilinebits - olinebits) is a value in the range 1..7
+  */
+  unsigned y;
+  size_t diff = ilinebits - olinebits;
+  size_t ibp = 0, obp = 0; /*input and output bit pointers*/
+  for(y = 0; y < h; ++y) {
+    size_t x;
+    for(x = 0; x < olinebits; ++x) {
+      unsigned char bit = readBitFromReversedStream(&ibp, in);
+      setBitOfReversedStream(&obp, out, bit);
+    }
+    ibp += diff;
+  }
+}
+
+/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from
+the IDAT chunks (with filter index bytes and possible padding bits)
+return value is error*/
+static unsigned postProcessScanlines(unsigned char* out, unsigned char* in,
+                                     unsigned w, unsigned h, const LodePNGInfo* info_png) {
+  /*
+  This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype.
+  Steps:
+  *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8)
+  *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace
+  NOTE: the in buffer will be overwritten with intermediate data!
+  */
+  unsigned bpp = lodepng_get_bpp(&info_png->color);
+  if(bpp == 0) return 31; /*error: invalid colortype*/
+
+  if(info_png->interlace_method == 0) {
+    if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8) {
+      CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp));
+      removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h);
+    }
+    /*we can immediately filter into the out buffer, no other steps needed*/
+    else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp));
+  } else /*interlace_method is 1 (Adam7)*/ {
+    unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8];
+    unsigned i;
+
+    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+    for(i = 0; i != 7; ++i) {
+      CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp));
+      /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline,
+      move bytes instead of bits or move not at all*/
+      if(bpp < 8) {
+        /*remove padding bits in scanlines; after this there still may be padding
+        bits between the different reduced images: each reduced image still starts nicely at a byte*/
+        removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp,
+                          ((passw[i] * bpp + 7) / 8) * 8, passh[i]);
+      }
+    }
+
+    Adam7_deinterlace(out, in, w, h, bpp);
+  }
+
+  return 0;
+}
+
+static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) {
+  unsigned pos = 0, i;
+  if(color->palette) lodepng_free(color->palette);
+  color->palettesize = chunkLength / 3;
+  color->palette = (unsigned char*)lodepng_malloc(4 * color->palettesize);
+  if(!color->palette && color->palettesize) {
+    color->palettesize = 0;
+    return 83; /*alloc fail*/
+  }
+  if(color->palettesize > 256) return 38; /*error: palette too big*/
+
+  for(i = 0; i != color->palettesize; ++i) {
+    color->palette[4 * i + 0] = data[pos++]; /*R*/
+    color->palette[4 * i + 1] = data[pos++]; /*G*/
+    color->palette[4 * i + 2] = data[pos++]; /*B*/
+    color->palette[4 * i + 3] = 255; /*alpha*/
+  }
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) {
+  unsigned i;
+  if(color->colortype == LCT_PALETTE) {
+    /*error: more alpha values given than there are palette entries*/
+    if(chunkLength > color->palettesize) return 39;
+
+    for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i];
+  } else if(color->colortype == LCT_GREY) {
+    /*error: this chunk must be 2 bytes for greyscale image*/
+    if(chunkLength != 2) return 30;
+
+    color->key_defined = 1;
+    color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1];
+  } else if(color->colortype == LCT_RGB) {
+    /*error: this chunk must be 6 bytes for RGB image*/
+    if(chunkLength != 6) return 41;
+
+    color->key_defined = 1;
+    color->key_r = 256u * data[0] + data[1];
+    color->key_g = 256u * data[2] + data[3];
+    color->key_b = 256u * data[4] + data[5];
+  }
+  else return 42; /*error: tRNS chunk not allowed for other color models*/
+
+  return 0; /* OK */
+}
+
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*background color chunk (bKGD)*/
+static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(info->color.colortype == LCT_PALETTE) {
+    /*error: this chunk must be 1 byte for indexed color image*/
+    if(chunkLength != 1) return 43;
+
+    /*error: invalid palette index, or maybe this chunk appeared before PLTE*/
+    if(data[0] >= info->color.palettesize) return 103;
+
+    info->background_defined = 1;
+    info->background_r = info->background_g = info->background_b = data[0];
+  } else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) {
+    /*error: this chunk must be 2 bytes for greyscale image*/
+    if(chunkLength != 2) return 44;
+
+    /*the values are truncated to bitdepth in the PNG file*/
+    info->background_defined = 1;
+    info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1];
+  } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) {
+    /*error: this chunk must be 6 bytes for greyscale image*/
+    if(chunkLength != 6) return 45;
+
+    /*the values are truncated to bitdepth in the PNG file*/
+    info->background_defined = 1;
+    info->background_r = 256u * data[0] + data[1];
+    info->background_g = 256u * data[2] + data[3];
+    info->background_b = 256u * data[4] + data[5];
+  }
+
+  return 0; /* OK */
+}
+
+/*text chunk (tEXt)*/
+static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  char *key = 0, *str = 0;
+  unsigned i;
+
+  while(!error) /*not really a while loop, only used to break on error*/ {
+    unsigned length, string2_begin;
+
+    length = 0;
+    while(length < chunkLength && data[length] != 0) ++length;
+    /*even though it's not allowed by the standard, no error is thrown if
+    there's no null termination char, if the text is empty*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    key[length] = 0;
+    for(i = 0; i != length; ++i) key[i] = (char)data[i];
+
+    string2_begin = length + 1; /*skip keyword null terminator*/
+
+    length = (unsigned)(chunkLength < string2_begin ? 0 : chunkLength - string2_begin);
+    str = (char*)lodepng_malloc(length + 1);
+    if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    str[length] = 0;
+    for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i];
+
+    error = lodepng_add_text(info, key, str);
+
+    break;
+  }
+
+  lodepng_free(key);
+  lodepng_free(str);
+
+  return error;
+}
+
+/*compressed text chunk (zTXt)*/
+static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+                               const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  unsigned i;
+
+  unsigned length, string2_begin;
+  char *key = 0;
+  ucvector decoded;
+
+  ucvector_init(&decoded);
+
+  while(!error) /*not really a while loop, only used to break on error*/ {
+    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+    if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    key[length] = 0;
+    for(i = 0; i != length; ++i) key[i] = (char)data[i];
+
+    if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+    string2_begin = length + 2;
+    if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+
+    length = (unsigned)chunkLength - string2_begin;
+    /*will fail if zlib error, e.g. if length is too small*/
+    error = zlib_decompress(&decoded.data, &decoded.size,
+                            (unsigned char*)(&data[string2_begin]),
+                            length, zlibsettings);
+    if(error) break;
+    ucvector_push_back(&decoded, 0);
+
+    error = lodepng_add_text(info, key, (char*)decoded.data);
+
+    break;
+  }
+
+  lodepng_free(key);
+  ucvector_cleanup(&decoded);
+
+  return error;
+}
+
+/*international text chunk (iTXt)*/
+static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+                               const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  unsigned i;
+
+  unsigned length, begin, compressed;
+  char *key = 0, *langtag = 0, *transkey = 0;
+  ucvector decoded;
+  ucvector_init(&decoded); /* TODO: only use in case of compressed text */
+
+  while(!error) /*not really a while loop, only used to break on error*/ {
+    /*Quick check if the chunk length isn't too small. Even without check
+    it'd still fail with other error checks below if it's too short. This just gives a different error code.*/
+    if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/
+
+    /*read the key*/
+    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+    if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    key[length] = 0;
+    for(i = 0; i != length; ++i) key[i] = (char)data[i];
+
+    /*read the compression method*/
+    compressed = data[length + 1];
+    if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+    /*even though it's not allowed by the standard, no error is thrown if
+    there's no null termination char, if the text is empty for the next 3 texts*/
+
+    /*read the langtag*/
+    begin = length + 3;
+    length = 0;
+    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+    langtag = (char*)lodepng_malloc(length + 1);
+    if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    langtag[length] = 0;
+    for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i];
+
+    /*read the transkey*/
+    begin += length + 1;
+    length = 0;
+    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+    transkey = (char*)lodepng_malloc(length + 1);
+    if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    transkey[length] = 0;
+    for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i];
+
+    /*read the actual text*/
+    begin += length + 1;
+
+    length = (unsigned)chunkLength < begin ? 0 : (unsigned)chunkLength - begin;
+
+    if(compressed) {
+      /*will fail if zlib error, e.g. if length is too small*/
+      error = zlib_decompress(&decoded.data, &decoded.size,
+                              (unsigned char*)(&data[begin]),
+                              length, zlibsettings);
+      if(error) break;
+      if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size;
+      ucvector_push_back(&decoded, 0);
+    } else {
+      if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/);
+
+      decoded.data[length] = 0;
+      for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i];
+    }
+
+    error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data);
+
+    break;
+  }
+
+  lodepng_free(key);
+  lodepng_free(langtag);
+  lodepng_free(transkey);
+  ucvector_cleanup(&decoded);
+
+  return error;
+}
+
+static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 7) return 73; /*invalid tIME chunk size*/
+
+  info->time_defined = 1;
+  info->time.year = 256u * data[0] + data[1];
+  info->time.month = data[2];
+  info->time.day = data[3];
+  info->time.hour = data[4];
+  info->time.minute = data[5];
+  info->time.second = data[6];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/
+
+  info->phys_defined = 1;
+  info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
+  info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7];
+  info->phys_unit = data[8];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_gAMA(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 4) return 96; /*invalid gAMA chunk size*/
+
+  info->gama_defined = 1;
+  info->gama_gamma = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_cHRM(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 32) return 97; /*invalid cHRM chunk size*/
+
+  info->chrm_defined = 1;
+  info->chrm_white_x = 16777216u * data[ 0] + 65536u * data[ 1] + 256u * data[ 2] + data[ 3];
+  info->chrm_white_y = 16777216u * data[ 4] + 65536u * data[ 5] + 256u * data[ 6] + data[ 7];
+  info->chrm_red_x   = 16777216u * data[ 8] + 65536u * data[ 9] + 256u * data[10] + data[11];
+  info->chrm_red_y   = 16777216u * data[12] + 65536u * data[13] + 256u * data[14] + data[15];
+  info->chrm_green_x = 16777216u * data[16] + 65536u * data[17] + 256u * data[18] + data[19];
+  info->chrm_green_y = 16777216u * data[20] + 65536u * data[21] + 256u * data[22] + data[23];
+  info->chrm_blue_x  = 16777216u * data[24] + 65536u * data[25] + 256u * data[26] + data[27];
+  info->chrm_blue_y  = 16777216u * data[28] + 65536u * data[29] + 256u * data[30] + data[31];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_sRGB(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
+  if(chunkLength != 1) return 98; /*invalid sRGB chunk size (this one is never ignored)*/
+
+  info->srgb_defined = 1;
+  info->srgb_intent = data[0];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_iCCP(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+                               const unsigned char* data, size_t chunkLength) {
+  unsigned error = 0;
+  unsigned i;
+
+  unsigned length, string2_begin;
+  ucvector decoded;
+
+  info->iccp_defined = 1;
+  if(info->iccp_name) lodepng_clear_icc(info);
+
+  for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+  if(length + 2 >= chunkLength) return 75; /*no null termination, corrupt?*/
+  if(length < 1 || length > 79) return 89; /*keyword too short or long*/
+
+  info->iccp_name = (char*)lodepng_malloc(length + 1);
+  if(!info->iccp_name) return 83; /*alloc fail*/
+
+  info->iccp_name[length] = 0;
+  for(i = 0; i != length; ++i) info->iccp_name[i] = (char)data[i];
+
+  if(data[length + 1] != 0) return 72; /*the 0 byte indicating compression must be 0*/
+
+  string2_begin = length + 2;
+  if(string2_begin > chunkLength) return 75; /*no null termination, corrupt?*/
+
+  length = (unsigned)chunkLength - string2_begin;
+  ucvector_init(&decoded);
+  error = zlib_decompress(&decoded.data, &decoded.size,
+                          (unsigned char*)(&data[string2_begin]),
+                          length, zlibsettings);
+  if(!error) {
+    info->iccp_profile_size = decoded.size;
+    info->iccp_profile = (unsigned char*)lodepng_malloc(decoded.size);
+    if(info->iccp_profile) {
+      memcpy(info->iccp_profile, decoded.data, decoded.size);
+    } else {
+      error = 83; /* alloc fail */
+    }
+  }
+  ucvector_cleanup(&decoded);
+  return error;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos,
+                               const unsigned char* in, size_t insize) {
+  const unsigned char* chunk = in + pos;
+  unsigned chunkLength;
+  const unsigned char* data;
+  unsigned unhandled = 0;
+  unsigned error = 0;
+
+  if (pos + 4 > insize) return 30;
+  chunkLength = lodepng_chunk_length(chunk);
+  if(chunkLength > 2147483647) return 63;
+  data = lodepng_chunk_data_const(chunk);
+  if(data + chunkLength + 4 > in + insize) return 30;
+
+  if(lodepng_chunk_type_equals(chunk, "PLTE")) {
+    error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "tRNS")) {
+    error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  } else if(lodepng_chunk_type_equals(chunk, "bKGD")) {
+    error = readChunk_bKGD(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "tEXt")) {
+    error = readChunk_tEXt(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
+    error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
+    error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
+    error = readChunk_tIME(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "pHYs")) {
+    error = readChunk_pHYs(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "gAMA")) {
+    error = readChunk_gAMA(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "cHRM")) {
+    error = readChunk_cHRM(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "sRGB")) {
+    error = readChunk_sRGB(&state->info_png, data, chunkLength);
+  } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
+    error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  } else {
+    /* unhandled chunk is ok (is not an error) */
+    unhandled = 1;
+  }
+
+  if(!error && !unhandled && !state->decoder.ignore_crc) {
+    if(lodepng_chunk_check_crc(chunk)) return 57; /*invalid CRC*/
+  }
+
+  return error;
+}
+
+/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/
+static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h,
+                          LodePNGState* state,
+                          const unsigned char* in, size_t insize) {
+  unsigned char IEND = 0;
+  const unsigned char* chunk;
+  size_t i;
+  ucvector idat; /*the data from idat chunks*/
+  ucvector scanlines;
+  size_t predict;
+  size_t outsize = 0;
+
+  /*for unknown chunk order*/
+  unsigned unknown = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+  /*provide some proper output values if error will happen*/
+  *out = 0;
+
+  state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/
+  if(state->error) return;
+
+  if(lodepng_pixel_overflow(*w, *h, &state->info_png.color, &state->info_raw)) {
+    CERROR_RETURN(state->error, 92); /*overflow possible due to amount of pixels*/
+  }
+
+  ucvector_init(&idat);
+  chunk = &in[33]; /*first byte of the first chunk after the header*/
+
+  /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
+  IDAT data is put at the start of the in buffer*/
+  while(!IEND && !state->error) {
+    unsigned chunkLength;
+    const unsigned char* data; /*the data in the chunk*/
+
+    /*error: size of the in buffer too small to contain next chunk*/
+    if((size_t)((chunk - in) + 12) > insize || chunk < in) {
+      if(state->decoder.ignore_end) break; /*other errors may still happen though*/
+      CERROR_BREAK(state->error, 30);
+    }
+
+    /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/
+    chunkLength = lodepng_chunk_length(chunk);
+    /*error: chunk length larger than the max PNG chunk size*/
+    if(chunkLength > 2147483647) {
+      if(state->decoder.ignore_end) break; /*other errors may still happen though*/
+      CERROR_BREAK(state->error, 63);
+    }
+
+    if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in) {
+      CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/
+    }
+
+    data = lodepng_chunk_data_const(chunk);
+
+    unknown = 0;
+
+    /*IDAT chunk, containing compressed image data*/
+    if(lodepng_chunk_type_equals(chunk, "IDAT")) {
+      size_t oldsize = idat.size;
+      size_t newsize;
+      if(lodepng_addofl(oldsize, chunkLength, &newsize)) CERROR_BREAK(state->error, 95);
+      if(!ucvector_resize(&idat, newsize)) CERROR_BREAK(state->error, 83 /*alloc fail*/);
+      for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i];
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      critical_pos = 3;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    } else if(lodepng_chunk_type_equals(chunk, "IEND")) {
+      /*IEND chunk*/
+      IEND = 1;
+    } else if(lodepng_chunk_type_equals(chunk, "PLTE")) {
+      /*palette chunk (PLTE)*/
+      state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
+      if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      critical_pos = 2;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    } else if(lodepng_chunk_type_equals(chunk, "tRNS")) {
+      /*palette transparency chunk (tRNS). Even though this one is an ancillary chunk , it is still compiled
+      in without 'LODEPNG_COMPILE_ANCILLARY_CHUNKS' because it contains essential color information that
+      affects the alpha channel of pixels. */
+      state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
+      if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      /*background color chunk (bKGD)*/
+    } else if(lodepng_chunk_type_equals(chunk, "bKGD")) {
+      state->error = readChunk_bKGD(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "tEXt")) {
+      /*text chunk (tEXt)*/
+      if(state->decoder.read_text_chunks) {
+        state->error = readChunk_tEXt(&state->info_png, data, chunkLength);
+        if(state->error) break;
+      }
+    } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
+      /*compressed text chunk (zTXt)*/
+      if(state->decoder.read_text_chunks) {
+        state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+        if(state->error) break;
+      }
+    } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
+      /*international text chunk (iTXt)*/
+      if(state->decoder.read_text_chunks) {
+        state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+        if(state->error) break;
+      }
+    } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
+      state->error = readChunk_tIME(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "pHYs")) {
+      state->error = readChunk_pHYs(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "gAMA")) {
+      state->error = readChunk_gAMA(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "cHRM")) {
+      state->error = readChunk_cHRM(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "sRGB")) {
+      state->error = readChunk_sRGB(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
+      state->error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+      if(state->error) break;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    } else /*it's not an implemented chunk type, so ignore it: skip over the data*/ {
+      /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/
+      if(!state->decoder.ignore_critical && !lodepng_chunk_ancillary(chunk)) {
+        CERROR_BREAK(state->error, 69);
+      }
+
+      unknown = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      if(state->decoder.remember_unknown_chunks) {
+        state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1],
+                                            &state->info_png.unknown_chunks_size[critical_pos - 1], chunk);
+        if(state->error) break;
+      }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    }
+
+    if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/ {
+      if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
+    }
+
+    if(!IEND) chunk = lodepng_chunk_next_const(chunk);
+  }
+
+  ucvector_init(&scanlines);
+  /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
+  If the decompressed size does not match the prediction, the image must be corrupt.*/
+  if(state->info_png.interlace_method == 0) {
+    predict = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color);
+  } else {
+    /*Adam-7 interlaced: predicted size is the sum of the 7 sub-images sizes*/
+    const LodePNGColorMode* color = &state->info_png.color;
+    predict = 0;
+    predict += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color);
+    if(*w > 4) predict += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color);
+    predict += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color);
+    if(*w > 2) predict += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color);
+    predict += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color);
+    if(*w > 1) predict += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color);
+    predict += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color);
+  }
+  if(!state->error && !ucvector_reserve(&scanlines, predict)) state->error = 83; /*alloc fail*/
+  if(!state->error) {
+    state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data,
+                                   idat.size, &state->decoder.zlibsettings);
+    if(!state->error && scanlines.size != predict) state->error = 91; /*decompressed size doesn't match prediction*/
+  }
+  ucvector_cleanup(&idat);
+
+  if(!state->error) {
+    outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
+    *out = (unsigned char*)lodepng_malloc(outsize);
+    if(!*out) state->error = 83; /*alloc fail*/
+  }
+  if(!state->error) {
+    for(i = 0; i < outsize; i++) (*out)[i] = 0;
+    state->error = postProcessScanlines(*out, scanlines.data, *w, *h, &state->info_png);
+  }
+  ucvector_cleanup(&scanlines);
+}
+
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+                        LodePNGState* state,
+                        const unsigned char* in, size_t insize) {
+  *out = 0;
+  decodeGeneric(out, w, h, state, in, insize);
+  if(state->error) return state->error;
+  if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color)) {
+    /*same color type, no copying or converting of data needed*/
+    /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype
+    the raw image has to the end user*/
+    if(!state->decoder.color_convert) {
+      state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color);
+      if(state->error) return state->error;
+    }
+  } else {
+    /*color conversion needed; sort of copy of the data*/
+    unsigned char* data = *out;
+    size_t outsize;
+
+    /*TODO: check if this works according to the statement in the documentation: "The converter can convert
+    from greyscale input color type, to 8-bit greyscale or greyscale with alpha"*/
+    if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA)
+       && !(state->info_raw.bitdepth == 8)) {
+      return 56; /*unsupported color mode conversion*/
+    }
+
+    outsize = lodepng_get_raw_size(*w, *h, &state->info_raw);
+    *out = (unsigned char*)lodepng_malloc(outsize);
+    if(!(*out)) {
+      state->error = 83; /*alloc fail*/
+    }
+    else state->error = lodepng_convert(*out, data, &state->info_raw,
+                                        &state->info_png.color, *w, *h);
+    lodepng_free(data);
+  }
+  return state->error;
+}
+
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in,
+                               size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned error;
+  LodePNGState state;
+  lodepng_state_init(&state);
+  state.info_raw.colortype = colortype;
+  state.info_raw.bitdepth = bitdepth;
+  error = lodepng_decode(out, w, h, &state, in, insize);
+  lodepng_state_cleanup(&state);
+  return error;
+}
+
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) {
+  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) {
+  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename,
+                             LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer = 0;
+  size_t buffersize;
+  unsigned error;
+  error = lodepng_load_file(&buffer, &buffersize, filename);
+  if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth);
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) {
+  return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) {
+  return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings) {
+  settings->color_convert = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  settings->read_text_chunks = 1;
+  settings->remember_unknown_chunks = 0;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  settings->ignore_crc = 0;
+  settings->ignore_critical = 0;
+  settings->ignore_end = 0;
+  lodepng_decompress_settings_init(&settings->zlibsettings);
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+
+void lodepng_state_init(LodePNGState* state) {
+#ifdef LODEPNG_COMPILE_DECODER
+  lodepng_decoder_settings_init(&state->decoder);
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+  lodepng_encoder_settings_init(&state->encoder);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+  lodepng_color_mode_init(&state->info_raw);
+  lodepng_info_init(&state->info_png);
+  state->error = 1;
+}
+
+void lodepng_state_cleanup(LodePNGState* state) {
+  lodepng_color_mode_cleanup(&state->info_raw);
+  lodepng_info_cleanup(&state->info_png);
+}
+
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source) {
+  lodepng_state_cleanup(dest);
+  *dest = *source;
+  lodepng_color_mode_init(&dest->info_raw);
+  lodepng_info_init(&dest->info_png);
+  dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return;
+  dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return;
+}
+
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Encoder                                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*chunkName must be string of 4 characters*/
+static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length) {
+  CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data));
+  out->allocsize = out->size; /*fix the allocsize again*/
+  return 0;
+}
+
+static void writeSignature(ucvector* out) {
+  /*8 bytes PNG signature, aka the magic bytes*/
+  ucvector_push_back(out, 137);
+  ucvector_push_back(out, 80);
+  ucvector_push_back(out, 78);
+  ucvector_push_back(out, 71);
+  ucvector_push_back(out, 13);
+  ucvector_push_back(out, 10);
+  ucvector_push_back(out, 26);
+  ucvector_push_back(out, 10);
+}
+
+static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
+                              LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method) {
+  unsigned error = 0;
+  ucvector header;
+  ucvector_init(&header);
+
+  lodepng_add32bitInt(&header, w); /*width*/
+  lodepng_add32bitInt(&header, h); /*height*/
+  ucvector_push_back(&header, (unsigned char)bitdepth); /*bit depth*/
+  ucvector_push_back(&header, (unsigned char)colortype); /*color type*/
+  ucvector_push_back(&header, 0); /*compression method*/
+  ucvector_push_back(&header, 0); /*filter method*/
+  ucvector_push_back(&header, interlace_method); /*interlace method*/
+
+  error = addChunk(out, "IHDR", header.data, header.size);
+  ucvector_cleanup(&header);
+
+  return error;
+}
+
+static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info) {
+  unsigned error = 0;
+  size_t i;
+  ucvector PLTE;
+  ucvector_init(&PLTE);
+  for(i = 0; i != info->palettesize * 4; ++i) {
+    /*add all channels except alpha channel*/
+    if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]);
+  }
+  error = addChunk(out, "PLTE", PLTE.data, PLTE.size);
+  ucvector_cleanup(&PLTE);
+
+  return error;
+}
+
+static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info) {
+  unsigned error = 0;
+  size_t i;
+  ucvector tRNS;
+  ucvector_init(&tRNS);
+  if(info->colortype == LCT_PALETTE) {
+    size_t amount = info->palettesize;
+    /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
+    for(i = info->palettesize; i != 0; --i) {
+      if(info->palette[4 * (i - 1) + 3] == 255) --amount;
+      else break;
+    }
+    /*add only alpha channel*/
+    for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]);
+  } else if(info->colortype == LCT_GREY) {
+    if(info->key_defined) {
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
+    }
+  } else if(info->colortype == LCT_RGB) {
+    if(info->key_defined) {
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255));
+    }
+  }
+
+  error = addChunk(out, "tRNS", tRNS.data, tRNS.size);
+  ucvector_cleanup(&tRNS);
+
+  return error;
+}
+
+static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
+                              LodePNGCompressSettings* zlibsettings) {
+  ucvector zlibdata;
+  unsigned error = 0;
+
+  /*compress with the Zlib compressor*/
+  ucvector_init(&zlibdata);
+  error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings);
+  if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size);
+  ucvector_cleanup(&zlibdata);
+
+  return error;
+}
+
+static unsigned addChunk_IEND(ucvector* out) {
+  unsigned error = 0;
+  error = addChunk(out, "IEND", 0, 0);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring) {
+  unsigned error = 0;
+  size_t i;
+  ucvector text;
+  ucvector_init(&text);
+  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&text, 0); /*0 termination char*/
+  for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]);
+  error = addChunk(out, "tEXt", text.data, text.size);
+  ucvector_cleanup(&text);
+
+  return error;
+}
+
+static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
+                              LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  ucvector data, compressed;
+  size_t i, textsize = strlen(textstring);
+
+  ucvector_init(&data);
+  ucvector_init(&compressed);
+  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&data, 0); /*0 termination char*/
+  ucvector_push_back(&data, 0); /*compression method: 0*/
+
+  error = zlib_compress(&compressed.data, &compressed.size,
+                        (unsigned char*)textstring, textsize, zlibsettings);
+  if(!error) {
+    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
+    error = addChunk(out, "zTXt", data.data, data.size);
+  }
+
+  ucvector_cleanup(&compressed);
+  ucvector_cleanup(&data);
+  return error;
+}
+
+static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag,
+                              const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  ucvector data;
+  size_t i, textsize = strlen(textstring);
+
+  ucvector_init(&data);
+
+  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&data, 0); /*null termination char*/
+  ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/
+  ucvector_push_back(&data, 0); /*compression method*/
+  for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]);
+  ucvector_push_back(&data, 0); /*null termination char*/
+  for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]);
+  ucvector_push_back(&data, 0); /*null termination char*/
+
+  if(compressed) {
+    ucvector compressed_data;
+    ucvector_init(&compressed_data);
+    error = zlib_compress(&compressed_data.data, &compressed_data.size,
+                          (unsigned char*)textstring, textsize, zlibsettings);
+    if(!error) {
+      for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]);
+    }
+    ucvector_cleanup(&compressed_data);
+  } else /*not compressed*/ {
+    for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]);
+  }
+
+  if(!error) error = addChunk(out, "iTXt", data.data, data.size);
+  ucvector_cleanup(&data);
+  return error;
+}
+
+static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) {
+  unsigned error = 0;
+  ucvector bKGD;
+  ucvector_init(&bKGD);
+  if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) {
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
+  } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) {
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_g >> 8));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_g & 255));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_b >> 8));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_b & 255));
+  } else if(info->color.colortype == LCT_PALETTE) {
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); /*palette index*/
+  }
+
+  error = addChunk(out, "bKGD", bKGD.data, bKGD.size);
+  ucvector_cleanup(&bKGD);
+
+  return error;
+}
+
+static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time) {
+  unsigned error = 0;
+  unsigned char* data = (unsigned char*)lodepng_malloc(7);
+  if(!data) return 83; /*alloc fail*/
+  data[0] = (unsigned char)(time->year >> 8);
+  data[1] = (unsigned char)(time->year & 255);
+  data[2] = (unsigned char)time->month;
+  data[3] = (unsigned char)time->day;
+  data[4] = (unsigned char)time->hour;
+  data[5] = (unsigned char)time->minute;
+  data[6] = (unsigned char)time->second;
+  error = addChunk(out, "tIME", data, 7);
+  lodepng_free(data);
+  return error;
+}
+
+static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info) {
+  unsigned error = 0;
+  ucvector data;
+  ucvector_init(&data);
+
+  lodepng_add32bitInt(&data, info->phys_x);
+  lodepng_add32bitInt(&data, info->phys_y);
+  ucvector_push_back(&data, info->phys_unit);
+
+  error = addChunk(out, "pHYs", data.data, data.size);
+  ucvector_cleanup(&data);
+
+  return error;
+}
+
+static unsigned addChunk_gAMA(ucvector* out, const LodePNGInfo* info) {
+  unsigned error = 0;
+  ucvector data;
+  ucvector_init(&data);
+
+  lodepng_add32bitInt(&data, info->gama_gamma);
+
+  error = addChunk(out, "gAMA", data.data, data.size);
+  ucvector_cleanup(&data);
+
+  return error;
+}
+
+static unsigned addChunk_cHRM(ucvector* out, const LodePNGInfo* info) {
+  unsigned error = 0;
+  ucvector data;
+  ucvector_init(&data);
+
+  lodepng_add32bitInt(&data, info->chrm_white_x);
+  lodepng_add32bitInt(&data, info->chrm_white_y);
+  lodepng_add32bitInt(&data, info->chrm_red_x);
+  lodepng_add32bitInt(&data, info->chrm_red_y);
+  lodepng_add32bitInt(&data, info->chrm_green_x);
+  lodepng_add32bitInt(&data, info->chrm_green_y);
+  lodepng_add32bitInt(&data, info->chrm_blue_x);
+  lodepng_add32bitInt(&data, info->chrm_blue_y);
+
+  error = addChunk(out, "cHRM", data.data, data.size);
+  ucvector_cleanup(&data);
+
+  return error;
+}
+
+static unsigned addChunk_sRGB(ucvector* out, const LodePNGInfo* info) {
+  unsigned char data = info->srgb_intent;
+  return addChunk(out, "sRGB", &data, 1);
+}
+
+static unsigned addChunk_iCCP(ucvector* out, const LodePNGInfo* info, LodePNGCompressSettings* zlibsettings) {
+  unsigned error = 0;
+  ucvector data, compressed;
+  size_t i;
+
+  ucvector_init(&data);
+  ucvector_init(&compressed);
+  for(i = 0; info->iccp_name[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)info->iccp_name[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&data, 0); /*0 termination char*/
+  ucvector_push_back(&data, 0); /*compression method: 0*/
+
+  error = zlib_compress(&compressed.data, &compressed.size,
+                        info->iccp_profile, info->iccp_profile_size, zlibsettings);
+  if(!error) {
+    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
+    error = addChunk(out, "iCCP", data.data, data.size);
+  }
+
+  ucvector_cleanup(&compressed);
+  ucvector_cleanup(&data);
+  return error;
+}
+
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline,
+                           size_t length, size_t bytewidth, unsigned char filterType) {
+  size_t i;
+  switch(filterType) {
+    case 0: /*None*/
+      for(i = 0; i != length; ++i) out[i] = scanline[i];
+      break;
+    case 1: /*Sub*/
+      for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+      for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth];
+      break;
+    case 2: /*Up*/
+      if(prevline) {
+        for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i];
+      } else {
+        for(i = 0; i != length; ++i) out[i] = scanline[i];
+      }
+      break;
+    case 3: /*Average*/
+      if(prevline) {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1);
+        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1);
+      } else {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1);
+      }
+      break;
+    case 4: /*Paeth*/
+      if(prevline) {
+        /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/
+        for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]);
+        for(i = bytewidth; i < length; ++i) {
+          out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth]));
+        }
+      } else {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+        /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/
+        for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
+      }
+      break;
+    default: return; /*unexisting filter type given*/
+  }
+}
+
+/* log2 approximation. A slight bit faster than std::log. */
+static float flog2(float f) {
+  float result = 0;
+  while(f > 32) { result += 4; f /= 16; }
+  while(f > 2) { ++result; f /= 2; }
+  return result + 1.442695f * (f * f * f / 3 - 3 * f * f / 2 + 3 * f - 1.83333f);
+}
+
+static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
+                       const LodePNGColorMode* info, const LodePNGEncoderSettings* settings) {
+  /*
+  For PNG filter method 0
+  out must be a buffer with as size: h + (w * h * bpp + 7) / 8, because there are
+  the scanlines with 1 extra byte per scanline
+  */
+
+  unsigned bpp = lodepng_get_bpp(info);
+  /*the width of a scanline in bytes, not including the filter type*/
+  size_t linebytes = (w * bpp + 7) / 8;
+  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+  size_t bytewidth = (bpp + 7) / 8;
+  const unsigned char* prevline = 0;
+  unsigned x, y;
+  unsigned error = 0;
+  LodePNGFilterStrategy strategy = settings->filter_strategy;
+
+  /*
+  There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard:
+   *  If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e.
+      use fixed filtering, with the filter None).
+   * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is
+     not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply
+     all five filters and select the filter that produces the smallest sum of absolute values per row.
+  This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true.
+
+  If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed,
+  but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum
+  heuristic is used.
+  */
+  if(settings->filter_palette_zero &&
+     (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO;
+
+  if(bpp == 0) return 31; /*error: invalid color type*/
+
+  if(strategy == LFS_ZERO) {
+    for(y = 0; y != h; ++y) {
+      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+      size_t inindex = linebytes * y;
+      out[outindex] = 0; /*filter type byte*/
+      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, 0);
+      prevline = &in[inindex];
+    }
+  } else if(strategy == LFS_MINSUM) {
+    /*adaptive filtering*/
+    size_t sum[5];
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t smallest = 0;
+    unsigned char type, bestType = 0;
+
+    for(type = 0; type != 5; ++type) {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) return 83; /*alloc fail*/
+    }
+
+    if(!error) {
+      for(y = 0; y != h; ++y) {
+        /*try the 5 filter types*/
+        for(type = 0; type != 5; ++type) {
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+
+          /*calculate the sum of the result*/
+          sum[type] = 0;
+          if(type == 0) {
+            for(x = 0; x != linebytes; ++x) sum[type] += (unsigned char)(attempt[type][x]);
+          } else {
+            for(x = 0; x != linebytes; ++x) {
+              /*For differences, each byte should be treated as signed, values above 127 are negative
+              (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there.
+              This means filtertype 0 is almost never chosen, but that is justified.*/
+              unsigned char s = attempt[type][x];
+              sum[type] += s < 128 ? s : (255U - s);
+            }
+          }
+
+          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || sum[type] < smallest) {
+            bestType = type;
+            smallest = sum[type];
+          }
+        }
+
+        prevline = &in[y * linebytes];
+
+        /*now fill the out values*/
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+      }
+    }
+
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  } else if(strategy == LFS_ENTROPY) {
+    float sum[5];
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    float smallest = 0;
+    unsigned type, bestType = 0;
+    unsigned count[256];
+
+    for(type = 0; type != 5; ++type) {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) return 83; /*alloc fail*/
+    }
+
+    for(y = 0; y != h; ++y) {
+      /*try the 5 filter types*/
+      for(type = 0; type != 5; ++type) {
+        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+        for(x = 0; x != 256; ++x) count[x] = 0;
+        for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
+        ++count[type]; /*the filter type itself is part of the scanline*/
+        sum[type] = 0;
+        for(x = 0; x != 256; ++x) {
+          float p = count[x] / (float)(linebytes + 1);
+          sum[type] += count[x] == 0 ? 0 : flog2(1 / p) * p;
+        }
+        /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+        if(type == 0 || sum[type] < smallest) {
+          bestType = type;
+          smallest = sum[type];
+        }
+      }
+
+      prevline = &in[y * linebytes];
+
+      /*now fill the out values*/
+      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+    }
+
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  } else if(strategy == LFS_PREDEFINED) {
+    for(y = 0; y != h; ++y) {
+      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+      size_t inindex = linebytes * y;
+      unsigned char type = settings->predefined_filters[y];
+      out[outindex] = type; /*filter type byte*/
+      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
+      prevline = &in[inindex];
+    }
+  } else if(strategy == LFS_BRUTE_FORCE) {
+    /*brute force filter chooser.
+    deflate the scanline after every filter attempt to see which one deflates best.
+    This is very slow and gives only slightly smaller, sometimes even larger, result*/
+    size_t size[5];
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t smallest = 0;
+    unsigned type = 0, bestType = 0;
+    unsigned char* dummy;
+    LodePNGCompressSettings zlibsettings = settings->zlibsettings;
+    /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
+    to simulate the true case where the tree is the same for the whole image. Sometimes it gives
+    better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
+    cases better compression. It does make this a bit less slow, so it's worth doing this.*/
+    zlibsettings.btype = 1;
+    /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG
+    images only, so disable it*/
+    zlibsettings.custom_zlib = 0;
+    zlibsettings.custom_deflate = 0;
+    for(type = 0; type != 5; ++type) {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) return 83; /*alloc fail*/
+    }
+    for(y = 0; y != h; ++y) /*try the 5 filter types*/ {
+      for(type = 0; type != 5; ++type) {
+        unsigned testsize = (unsigned)linebytes;
+        /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
+
+        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+        size[type] = 0;
+        dummy = 0;
+        zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
+        lodepng_free(dummy);
+        /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
+        if(type == 0 || size[type] < smallest) {
+          bestType = type;
+          smallest = size[type];
+        }
+      }
+      prevline = &in[y * linebytes];
+      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+    }
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  }
+  else return 88; /* unknown filter strategy */
+
+  return error;
+}
+
+static void addPaddingBits(unsigned char* out, const unsigned char* in,
+                           size_t olinebits, size_t ilinebits, unsigned h) {
+  /*The opposite of the removePaddingBits function
+  olinebits must be >= ilinebits*/
+  unsigned y;
+  size_t diff = olinebits - ilinebits;
+  size_t obp = 0, ibp = 0; /*bit pointers*/
+  for(y = 0; y != h; ++y) {
+    size_t x;
+    for(x = 0; x < ilinebits; ++x) {
+      unsigned char bit = readBitFromReversedStream(&ibp, in);
+      setBitOfReversedStream(&obp, out, bit);
+    }
+    /*obp += diff; --> no, fill in some value in the padding bits too, to avoid
+    "Use of uninitialised value of size ###" warning from valgrind*/
+    for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0);
+  }
+}
+
+/*
+in: non-interlaced image with size w*h
+out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with
+ no padding bits between scanlines, but between reduced images so that each
+ reduced image starts at a byte.
+bpp: bits per pixel
+there are no padding bits, not between scanlines, not between reduced images
+in has the following size in bits: w * h * bpp.
+out is possibly bigger due to padding bits between reduced images
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) {
+  unsigned passw[7], passh[7];
+  size_t filter_passstart[8], padded_passstart[8], passstart[8];
+  unsigned i;
+
+  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+  if(bpp >= 8) {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      size_t bytewidth = bpp / 8;
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
+        size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+        for(b = 0; b < bytewidth; ++b) {
+          out[pixeloutstart + b] = in[pixelinstart + b];
+        }
+      }
+    }
+  } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ {
+    for(i = 0; i != 7; ++i) {
+      unsigned x, y, b;
+      unsigned ilinebits = bpp * passw[i];
+      unsigned olinebits = bpp * w;
+      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x) {
+        ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
+        obp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+        for(b = 0; b < bpp; ++b) {
+          unsigned char bit = readBitFromReversedStream(&ibp, in);
+          setBitOfReversedStream(&obp, out, bit);
+        }
+      }
+    }
+  }
+}
+
+/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image.
+return value is error**/
+static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                    unsigned w, unsigned h,
+                                    const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings) {
+  /*
+  This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps:
+  *) if no Adam7: 1) add padding bits (= posible extra bits per scanline if bpp < 8) 2) filter
+  *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter
+  */
+  unsigned bpp = lodepng_get_bpp(&info_png->color);
+  unsigned error = 0;
+
+  if(info_png->interlace_method == 0) {
+    *outsize = h + (h * ((w * bpp + 7) / 8)); /*image size plus an extra byte per scanline + possible padding bits*/
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!(*out) && (*outsize)) error = 83; /*alloc fail*/
+
+    if(!error) {
+      /*non multiple of 8 bits per scanline, padding bits needed per scanline*/
+      if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8) {
+        unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7) / 8));
+        if(!padded) error = 83; /*alloc fail*/
+        if(!error) {
+          addPaddingBits(padded, in, ((w * bpp + 7) / 8) * 8, w * bpp, h);
+          error = filter(*out, padded, w, h, &info_png->color, settings);
+        }
+        lodepng_free(padded);
+      } else {
+        /*we can immediately filter into the out buffer, no other steps needed*/
+        error = filter(*out, in, w, h, &info_png->color, settings);
+      }
+    }
+  } else /*interlace_method is 1 (Adam7)*/ {
+    unsigned passw[7], passh[7];
+    size_t filter_passstart[8], padded_passstart[8], passstart[8];
+    unsigned char* adam7;
+
+    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+    *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!(*out)) error = 83; /*alloc fail*/
+
+    adam7 = (unsigned char*)lodepng_malloc(passstart[7]);
+    if(!adam7 && passstart[7]) error = 83; /*alloc fail*/
+
+    if(!error) {
+      unsigned i;
+
+      Adam7_interlace(adam7, in, w, h, bpp);
+      for(i = 0; i != 7; ++i) {
+        if(bpp < 8) {
+          unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]);
+          if(!padded) ERROR_BREAK(83); /*alloc fail*/
+          addPaddingBits(padded, &adam7[passstart[i]],
+                         ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]);
+          error = filter(&(*out)[filter_passstart[i]], padded,
+                         passw[i], passh[i], &info_png->color, settings);
+          lodepng_free(padded);
+        } else {
+          error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]],
+                         passw[i], passh[i], &info_png->color, settings);
+        }
+
+        if(error) break;
+      }
+    }
+
+    lodepng_free(adam7);
+  }
+
+  return error;
+}
+
+/*
+palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA...
+returns 0 if the palette is opaque,
+returns 1 if the palette has a single color with alpha 0 ==> color key
+returns 2 if the palette is semi-translucent.
+*/
+static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize) {
+  size_t i;
+  unsigned key = 0;
+  unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/
+  for(i = 0; i != palettesize; ++i) {
+    if(!key && palette[4 * i + 3] == 0) {
+      r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2];
+      key = 1;
+      i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/
+    }
+    else if(palette[4 * i + 3] != 255) return 2;
+    /*when key, no opaque RGB may have key's RGB*/
+    else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2;
+  }
+  return key;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize) {
+  unsigned char* inchunk = data;
+  while((size_t)(inchunk - data) < datasize) {
+    CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
+    out->allocsize = out->size; /*fix the allocsize again*/
+    inchunk = lodepng_chunk_next(inchunk);
+  }
+  return 0;
+}
+
+static unsigned isGreyICCProfile(const unsigned char* profile, unsigned size) {
+  /*
+  It is a grey profile if bytes 16-19 are "GRAY", rgb profile if bytes 16-19
+  are "RGB ". We do not perform any full parsing of the ICC profile here, other
+  than check those 4 bytes to grayscale profile. Other than that, validity of
+  the profile is not checked. This is needed only because the PNG specification
+  requires using a non-grey color model if there is an ICC profile with "RGB "
+  (sadly limiting compression opportunities if the input data is greyscale RGB
+  data), and requires using a grey color model if it is "GRAY".
+  */
+  if(size < 20) return 0;
+  return profile[16] == 'G' &&  profile[17] == 'R' &&  profile[18] == 'A' &&  profile[19] == 'Y';
+}
+
+static unsigned isRGBICCProfile(const unsigned char* profile, unsigned size) {
+  /* See comment in isGreyICCProfile*/
+  if(size < 20) return 0;
+  return profile[16] == 'R' &&  profile[17] == 'G' &&  profile[18] == 'B' &&  profile[19] == ' ';
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+                        const unsigned char* image, unsigned w, unsigned h,
+                        LodePNGState* state) {
+  unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
+  size_t datasize = 0;
+  ucvector outv;
+  LodePNGInfo info;
+
+  ucvector_init(&outv);
+  lodepng_info_init(&info);
+
+  /*provide some proper output values if error will happen*/
+  *out = 0;
+  *outsize = 0;
+  state->error = 0;
+
+  /*check input values validity*/
+  if((state->info_png.color.colortype == LCT_PALETTE || state->encoder.force_palette)
+      && (state->info_png.color.palettesize == 0 || state->info_png.color.palettesize > 256)) {
+    state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/
+    goto cleanup;
+  }
+  if(state->encoder.zlibsettings.btype > 2) {
+    state->error = 61; /*error: unexisting btype*/
+    goto cleanup;
+  }
+  if(state->info_png.interlace_method > 1) {
+    state->error = 71; /*error: unexisting interlace mode*/
+    goto cleanup;
+  }
+  state->error = checkColorValidity(state->info_png.color.colortype, state->info_png.color.bitdepth);
+  if(state->error) goto cleanup; /*error: unexisting color type given*/
+  state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
+  if(state->error) goto cleanup; /*error: unexisting color type given*/
+
+  /* color convert and compute scanline filter types */
+  lodepng_info_copy(&info, &state->info_png);
+  if(state->encoder.auto_convert) {
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    if(state->info_png.background_defined) {
+      unsigned bg_r = state->info_png.background_r;
+      unsigned bg_g = state->info_png.background_g;
+      unsigned bg_b = state->info_png.background_b;
+      unsigned r = 0, g = 0, b = 0;
+      LodePNGColorProfile prof;
+      LodePNGColorMode mode16 = lodepng_color_mode_make(LCT_RGB, 16);
+      lodepng_convert_rgb(&r, &g, &b, bg_r, bg_g, bg_b, &mode16, &state->info_png.color);
+      lodepng_color_profile_init(&prof);
+      state->error = lodepng_get_color_profile(&prof, image, w, h, &state->info_raw);
+      if(state->error) goto cleanup;
+      lodepng_color_profile_add(&prof, r, g, b, 65535);
+      state->error = auto_choose_color_from_profile(&info.color, &state->info_raw, &prof);
+      if(state->error) goto cleanup;
+      if(lodepng_convert_rgb(&info.background_r, &info.background_g, &info.background_b,
+          bg_r, bg_g, bg_b, &info.color, &state->info_png.color)) {
+        state->error = 104;
+        goto cleanup;
+      }
+    }
+    else
+#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
+    {
+      state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw);
+      if(state->error) goto cleanup;
+    }
+  }
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  if(state->info_png.iccp_defined) {
+    unsigned grey_icc = isGreyICCProfile(state->info_png.iccp_profile, state->info_png.iccp_profile_size);
+    unsigned grey_png = info.color.colortype == LCT_GREY || info.color.colortype == LCT_GREY_ALPHA;
+    /* TODO: perhaps instead of giving errors or less optimal compression, we can automatically modify
+    the ICC profile here to say "GRAY" or "RGB " to match the PNG color type, unless this will require
+    non trivial changes to the rest of the ICC profile */
+    if(!grey_icc && !isRGBICCProfile(state->info_png.iccp_profile, state->info_png.iccp_profile_size)) {
+      state->error = 100; /* Disallowed profile color type for PNG */
+      goto cleanup;
+    }
+    if(!state->encoder.auto_convert && grey_icc != grey_png) {
+      /* Non recoverable: encoder not allowed to convert color type, and requested color type not
+      compatible with ICC color type */
+      state->error = 101;
+      goto cleanup;
+    }
+    if(grey_icc && !grey_png) {
+      /* Non recoverable: trying to set greyscale ICC profile while colored pixels were given */
+      state->error = 102;
+      goto cleanup;
+      /* NOTE: this relies on the fact that lodepng_auto_choose_color never returns palette for greyscale pixels */
+    }
+    if(!grey_icc && grey_png) {
+      /* Recoverable but an unfortunate loss in compression density: We have greyscale pixels but
+      are forced to store them in more expensive RGB format that will repeat each value 3 times
+      because the PNG spec does not allow an RGB ICC profile with internal greyscale color data */
+      if(info.color.colortype == LCT_GREY) info.color.colortype = LCT_RGB;
+      if(info.color.colortype == LCT_GREY_ALPHA) info.color.colortype = LCT_RGBA;
+      if(info.color.bitdepth < 8) info.color.bitdepth = 8;
+    }
+  }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  if(!lodepng_color_mode_equal(&state->info_raw, &info.color)) {
+    unsigned char* converted;
+    size_t size = ((size_t)w * (size_t)h * (size_t)lodepng_get_bpp(&info.color) + 7) / 8;
+
+    converted = (unsigned char*)lodepng_malloc(size);
+    if(!converted && size) state->error = 83; /*alloc fail*/
+    if(!state->error) {
+      state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
+    }
+    if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
+    lodepng_free(converted);
+    if(state->error) goto cleanup;
+  }
+  else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
+
+  /* output all PNG chunks */ {
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    size_t i;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*write signature and chunks*/
+    writeSignature(&outv);
+    /*IHDR*/
+    addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*unknown chunks between IHDR and PLTE*/
+    if(info.unknown_chunks_data[0]) {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]);
+      if(state->error) goto cleanup;
+    }
+    /*color profile chunks must come before PLTE */
+    if(info.iccp_defined) addChunk_iCCP(&outv, &info, &state->encoder.zlibsettings);
+    if(info.srgb_defined) addChunk_sRGB(&outv, &info);
+    if(info.gama_defined) addChunk_gAMA(&outv, &info);
+    if(info.chrm_defined) addChunk_cHRM(&outv, &info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*PLTE*/
+    if(info.color.colortype == LCT_PALETTE) {
+      addChunk_PLTE(&outv, &info.color);
+    }
+    if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA)) {
+      addChunk_PLTE(&outv, &info.color);
+    }
+    /*tRNS*/
+    if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0) {
+      addChunk_tRNS(&outv, &info.color);
+    }
+    if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined) {
+      addChunk_tRNS(&outv, &info.color);
+    }
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*bKGD (must come between PLTE and the IDAt chunks*/
+    if(info.background_defined) {
+      state->error = addChunk_bKGD(&outv, &info);
+      if(state->error) goto cleanup;
+    }
+    /*pHYs (must come before the IDAT chunks)*/
+    if(info.phys_defined) addChunk_pHYs(&outv, &info);
+
+    /*unknown chunks between PLTE and IDAT*/
+    if(info.unknown_chunks_data[1]) {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]);
+      if(state->error) goto cleanup;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*IDAT (multiple IDAT chunks must be consecutive)*/
+    state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings);
+    if(state->error) goto cleanup;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*tIME*/
+    if(info.time_defined) addChunk_tIME(&outv, &info.time);
+    /*tEXt and/or zTXt*/
+    for(i = 0; i != info.text_num; ++i) {
+      if(strlen(info.text_keys[i]) > 79) {
+        state->error = 66; /*text chunk too large*/
+        goto cleanup;
+      }
+      if(strlen(info.text_keys[i]) < 1) {
+        state->error = 67; /*text chunk too small*/
+        goto cleanup;
+      }
+      if(state->encoder.text_compression) {
+        addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
+      } else {
+        addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
+      }
+    }
+    /*LodePNG version id in text chunk*/
+    if(state->encoder.add_id) {
+      unsigned already_added_id_text = 0;
+      for(i = 0; i != info.text_num; ++i) {
+        if(!strcmp(info.text_keys[i], "LodePNG")) {
+          already_added_id_text = 1;
+          break;
+        }
+      }
+      if(already_added_id_text == 0) {
+        addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
+      }
+    }
+    /*iTXt*/
+    for(i = 0; i != info.itext_num; ++i) {
+      if(strlen(info.itext_keys[i]) > 79) {
+        state->error = 66; /*text chunk too large*/
+        goto cleanup;
+      }
+      if(strlen(info.itext_keys[i]) < 1) {
+        state->error = 67; /*text chunk too small*/
+        goto cleanup;
+      }
+      addChunk_iTXt(&outv, state->encoder.text_compression,
+                    info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
+                    &state->encoder.zlibsettings);
+    }
+
+    /*unknown chunks between IDAT and IEND*/
+    if(info.unknown_chunks_data[2]) {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]);
+      if(state->error) goto cleanup;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    addChunk_IEND(&outv);
+  }
+
+cleanup:
+  lodepng_info_cleanup(&info);
+  lodepng_free(data);
+
+  /*instead of cleaning the vector up, give it to the output*/
+  *out = outv.data;
+  *outsize = outv.size;
+
+  return state->error;
+}
+
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image,
+                               unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned error;
+  LodePNGState state;
+  lodepng_state_init(&state);
+  state.info_raw.colortype = colortype;
+  state.info_raw.bitdepth = bitdepth;
+  state.info_png.color.colortype = colortype;
+  state.info_png.color.bitdepth = bitdepth;
+  lodepng_encode(out, outsize, image, w, h, &state);
+  error = state.error;
+  lodepng_state_cleanup(&state);
+  return error;
+}
+
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h,
+                             LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth);
+  if(!error) error = lodepng_save_file(buffer, buffersize, filename);
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) {
+  return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings) {
+  lodepng_compress_settings_init(&settings->zlibsettings);
+  settings->filter_palette_zero = 1;
+  settings->filter_strategy = LFS_MINSUM;
+  settings->auto_convert = 1;
+  settings->force_palette = 0;
+  settings->predefined_filters = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  settings->add_id = 0;
+  settings->text_compression = 1;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*
+This returns the description of a numerical error code in English. This is also
+the documentation of all the error codes.
+*/
+const char* lodepng_error_text(unsigned code) {
+  switch(code) {
+    case 0: return "no error, everything went ok";
+    case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/
+    case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/
+    case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/
+    case 13: return "problem while processing dynamic deflate block";
+    case 14: return "problem while processing dynamic deflate block";
+    case 15: return "problem while processing dynamic deflate block";
+    case 16: return "unexisting code while processing dynamic deflate block";
+    case 17: return "end of out buffer memory reached while inflating";
+    case 18: return "invalid distance code while inflating";
+    case 19: return "end of out buffer memory reached while inflating";
+    case 20: return "invalid deflate block BTYPE encountered while decoding";
+    case 21: return "NLEN is not ones complement of LEN in a deflate block";
+
+    /*end of out buffer memory reached while inflating:
+    This can happen if the inflated deflate data is longer than the amount of bytes required to fill up
+    all the pixels of the image, given the color depth and image dimensions. Something that doesn't
+    happen in a normal, well encoded, PNG image.*/
+    case 22: return "end of out buffer memory reached while inflating";
+    case 23: return "end of in buffer memory reached while inflating";
+    case 24: return "invalid FCHECK in zlib header";
+    case 25: return "invalid compression method in zlib header";
+    case 26: return "FDICT encountered in zlib header while it's not used for PNG";
+    case 27: return "PNG file is smaller than a PNG header";
+    /*Checks the magic file header, the first 8 bytes of the PNG file*/
+    case 28: return "incorrect PNG signature, it's no PNG or corrupted";
+    case 29: return "first chunk is not the header chunk";
+    case 30: return "chunk length too large, chunk broken off at end of file";
+    case 31: return "illegal PNG color type or bpp";
+    case 32: return "illegal PNG compression method";
+    case 33: return "illegal PNG filter method";
+    case 34: return "illegal PNG interlace method";
+    case 35: return "chunk length of a chunk is too large or the chunk too small";
+    case 36: return "illegal PNG filter type encountered";
+    case 37: return "illegal bit depth for this color type given";
+    case 38: return "the palette is too big"; /*more than 256 colors*/
+    case 39: return "tRNS chunk before PLTE or has more entries than palette size";
+    case 40: return "tRNS chunk has wrong size for greyscale image";
+    case 41: return "tRNS chunk has wrong size for RGB image";
+    case 42: return "tRNS chunk appeared while it was not allowed for this color type";
+    case 43: return "bKGD chunk has wrong size for palette image";
+    case 44: return "bKGD chunk has wrong size for greyscale image";
+    case 45: return "bKGD chunk has wrong size for RGB image";
+    case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?";
+    case 49: return "jumped past memory while generating dynamic huffman tree";
+    case 50: return "jumped past memory while generating dynamic huffman tree";
+    case 51: return "jumped past memory while inflating huffman block";
+    case 52: return "jumped past memory while inflating";
+    case 53: return "size of zlib data too small";
+    case 54: return "repeat symbol in tree while there was no value symbol yet";
+    /*jumped past tree while generating huffman tree, this could be when the
+    tree will have more leaves than symbols after generating it out of the
+    given lenghts. They call this an oversubscribed dynamic bit lengths tree in zlib.*/
+    case 55: return "jumped past tree while generating huffman tree";
+    case 56: return "given output image colortype or bitdepth not supported for color conversion";
+    case 57: return "invalid CRC encountered (checking CRC can be disabled)";
+    case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)";
+    case 59: return "requested color conversion not supported";
+    case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)";
+    case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)";
+    /*LodePNG leaves the choice of RGB to greyscale conversion formula to the user.*/
+    case 62: return "conversion from color to greyscale not supported";
+    /*(2^31-1)*/
+    case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk";
+    /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/
+    case 64: return "the length of the END symbol 256 in the Huffman tree is 0";
+    case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes";
+    case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
+    case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
+    case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
+    case 71: return "unexisting interlace mode given to encoder (must be 0 or 1)";
+    case 72: return "while decoding, unexisting compression method encountering in zTXt or iTXt chunk (it must be 0)";
+    case 73: return "invalid tIME chunk size";
+    case 74: return "invalid pHYs chunk size";
+    /*length could be wrong, or data chopped off*/
+    case 75: return "no null termination char found while decoding text chunk";
+    case 76: return "iTXt chunk too short to contain required bytes";
+    case 77: return "integer overflow in buffer size";
+    case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/
+    case 79: return "failed to open file for writing";
+    case 80: return "tried creating a tree of 0 symbols";
+    case 81: return "lazy matching at pos 0 is impossible";
+    case 82: return "color conversion to palette requested while a color isn't in palette, or index out of bounds";
+    case 83: return "memory allocation failed";
+    case 84: return "given image too small to contain all pixels to be encoded";
+    case 86: return "impossible offset in lz77 encoding (internal bug)";
+    case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined";
+    case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy";
+    case 89: return "text chunk keyword too short or long: must have size 1-79";
+    /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/
+    case 90: return "windowsize must be a power of two";
+    case 91: return "invalid decompressed idat size";
+    case 92: return "integer overflow due to too many pixels";
+    case 93: return "zero width or height is invalid";
+    case 94: return "header chunk must have a size of 13 bytes";
+    case 95: return "integer overflow with combined idat chunk size";
+    case 96: return "invalid gAMA chunk size";
+    case 97: return "invalid cHRM chunk size";
+    case 98: return "invalid sRGB chunk size";
+    case 99: return "invalid sRGB rendering intent";
+    case 100: return "invalid ICC profile color type, the PNG specification only allows RGB or GRAY";
+    case 101: return "PNG specification does not allow RGB ICC profile on grey color types and vice versa";
+    case 102: return "not allowed to set greyscale ICC profile with colored pixels by PNG specification";
+    case 103: return "Invalid palette index in bKGD chunk. Maybe it came before PLTE chunk?";
+    case 104: return "Invalid bKGD color while encoding (e.g. palette index out of range)";
+  }
+  return "unknown error code";
+}
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // C++ Wrapper                                                          // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng {
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename) {
+  long size = lodepng_filesize(filename.c_str());
+  if(size < 0) return 78;
+  buffer.resize((size_t)size);
+  return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str());
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename) {
+  return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str());
+}
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                    const LodePNGDecompressSettings& settings) {
+  unsigned char* buffer = 0;
+  size_t buffersize = 0;
+  unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                    const LodePNGDecompressSettings& settings) {
+  return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                  const LodePNGCompressSettings& settings) {
+  unsigned char* buffer = 0;
+  size_t buffersize = 0;
+  unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                  const LodePNGCompressSettings& settings) {
+  return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+
+
+#ifdef LODEPNG_COMPILE_PNG
+
+State::State() {
+  lodepng_state_init(this);
+}
+
+State::State(const State& other) {
+  lodepng_state_init(this);
+  lodepng_state_copy(this, &other);
+}
+
+State::~State() {
+  lodepng_state_cleanup(this);
+}
+
+State& State::operator=(const State& other) {
+  lodepng_state_copy(this, &other);
+  return *this;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
+                size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer;
+  unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
+  if(buffer && !error) {
+    State state;
+    state.info_raw.colortype = colortype;
+    state.info_raw.bitdepth = bitdepth;
+    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth) {
+  return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const unsigned char* in, size_t insize) {
+  unsigned char* buffer = NULL;
+  unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize);
+  if(buffer && !error) {
+    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+  }
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const std::vector<unsigned char>& in) {
+  return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  std::vector<unsigned char> buffer;
+  unsigned error = load_file(buffer, filename);
+  if(error) return error;
+  return decode(out, w, h, buffer, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+  return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                State& state) {
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state);
+  if(buffer) {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                State& state) {
+  if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
+  return encode(out, in.empty() ? 0 : &in[0], w, h, state);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned encode(const std::string& filename,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  std::vector<unsigned char> buffer;
+  unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
+  if(!error) error = save_file(buffer, filename);
+  return error;
+}
+
+unsigned encode(const std::string& filename,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth) {
+  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+  return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_PNG */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng.h b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng.h
new file mode 100755
index 0000000000..6f161606a7
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng.h
@@ -0,0 +1,1909 @@
+/*
+LodePNG version 20181230
+
+Copyright (c) 2005-2018 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#ifndef LODEPNG_H
+#define LODEPNG_H
+
+#include <string.h> /*for size_t*/
+
+extern const char* LODEPNG_VERSION_STRING;
+
+/*
+The following #defines are used to create code sections. They can be disabled
+to disable code sections, which can give faster compile time and smaller binary.
+The "NO_COMPILE" defines are designed to be used to pass as defines to the
+compiler command to disable them without modifying this header, e.g.
+-DLODEPNG_NO_COMPILE_ZLIB for gcc.
+In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
+allow implementing a custom lodepng_crc32.
+*/
+/*deflate & zlib. If disabled, you must specify alternative zlib functions in
+the custom_zlib field of the compress and decompress settings*/
+#ifndef LODEPNG_NO_COMPILE_ZLIB
+#define LODEPNG_COMPILE_ZLIB
+#endif
+/*png encoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_PNG
+#define LODEPNG_COMPILE_PNG
+#endif
+/*deflate&zlib decoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_DECODER
+#define LODEPNG_COMPILE_DECODER
+#endif
+/*deflate&zlib encoder and png encoder*/
+#ifndef LODEPNG_NO_COMPILE_ENCODER
+#define LODEPNG_COMPILE_ENCODER
+#endif
+/*the optional built in harddisk file loading and saving functions*/
+#ifndef LODEPNG_NO_COMPILE_DISK
+#define LODEPNG_COMPILE_DISK
+#endif
+/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
+#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
+#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
+#endif
+/*ability to convert error numerical codes to English text string*/
+#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
+#define LODEPNG_COMPILE_ERROR_TEXT
+#endif
+/*Compile the default allocators (C's free, malloc and realloc). If you disable this,
+you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
+source files with custom allocators.*/
+#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
+#define LODEPNG_COMPILE_ALLOCATORS
+#endif
+/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
+#ifdef __cplusplus
+#ifndef LODEPNG_NO_COMPILE_CPP
+#define LODEPNG_COMPILE_CPP
+#endif
+#endif
+
+#ifdef LODEPNG_COMPILE_CPP
+#include <vector>
+#include <string>
+#endif /*LODEPNG_COMPILE_CPP*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*The PNG color types (also used for raw).*/
+typedef enum LodePNGColorType {
+  LCT_GREY = 0, /*greyscale: 1,2,4,8,16 bit*/
+  LCT_RGB = 2, /*RGB: 8,16 bit*/
+  LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/
+  LCT_GREY_ALPHA = 4, /*greyscale with alpha: 8,16 bit*/
+  LCT_RGBA = 6 /*RGB with alpha: 8,16 bit*/
+} LodePNGColorType;
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Converts PNG data in memory to raw pixel data.
+out: Output parameter. Pointer to buffer that will contain the raw pixel data.
+     After decoding, its size is w * h * (bytes per pixel) bytes larger than
+     initially. Bytes per pixel depends on colortype and bitdepth.
+     Must be freed after usage with free(*out).
+     Note: for 16-bit per channel colors, uses big endian format like PNG does.
+w: Output parameter. Pointer to width of pixel data.
+h: Output parameter. Pointer to height of pixel data.
+in: Memory buffer with the PNG file.
+insize: size of the in buffer.
+colortype: the desired color type for the raw output image. See explanation on PNG color types.
+bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h,
+                               const unsigned char* in, size_t insize,
+                               LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h,
+                          const unsigned char* in, size_t insize);
+
+/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h,
+                          const unsigned char* in, size_t insize);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load PNG from disk, from file with given name.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h,
+                             const char* filename,
+                             LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h,
+                               const char* filename);
+
+/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h,
+                               const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Converts raw pixel data into a PNG image in memory. The colortype and bitdepth
+  of the output PNG image cannot be chosen, they are automatically determined
+  by the colortype, bitdepth and content of the input pixel data.
+  Note: for 16-bit per channel colors, needs big endian format like PNG does.
+out: Output parameter. Pointer to buffer that will contain the PNG image data.
+     Must be freed after usage with free(*out).
+outsize: Output parameter. Pointer to the size in bytes of the out buffer.
+image: The raw pixel data to encode. The size of this buffer should be
+       w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth.
+w: width of the raw pixel data in pixels.
+h: height of the raw pixel data in pixels.
+colortype: the color type of the raw input image. See explanation on PNG color types.
+bitdepth: the bit depth of the raw input image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize,
+                               const unsigned char* image, unsigned w, unsigned h,
+                               LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize,
+                          const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize,
+                          const unsigned char* image, unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned lodepng_encode_file(const char* filename,
+                             const unsigned char* image, unsigned w, unsigned h,
+                             LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32_file(const char* filename,
+                               const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24_file(const char* filename,
+                               const unsigned char* image, unsigned w, unsigned h);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng {
+#ifdef LODEPNG_COMPILE_DECODER
+/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
+is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const unsigned char* in, size_t insize,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::vector<unsigned char>& in,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts PNG file from disk to raw pixel data in memory.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::string& filename,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
+is that of the raw input data. The output PNG color type will be auto chosen.*/
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts 32-bit RGBA raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned encode(const std::string& filename,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(const std::string& filename,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*Returns an English description of the numerical error code.*/
+const char* lodepng_error_text(unsigned code);
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Settings for zlib decompression*/
+typedef struct LodePNGDecompressSettings LodePNGDecompressSettings;
+struct LodePNGDecompressSettings {
+  /* Check LodePNGDecoderSettings for more ignorable errors such as ignore_crc */
+  unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
+
+  /*use custom zlib decoder instead of built in one (default: null)*/
+  unsigned (*custom_zlib)(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGDecompressSettings*);
+  /*use custom deflate decoder instead of built in one (default: null)
+  if custom_zlib is used, custom_deflate is ignored since only the built in
+  zlib function will call custom_deflate*/
+  unsigned (*custom_inflate)(unsigned char**, size_t*,
+                             const unsigned char*, size_t,
+                             const LodePNGDecompressSettings*);
+
+  const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Settings for zlib compression. Tweaking these settings tweaks the balance
+between speed and compression ratio.
+*/
+typedef struct LodePNGCompressSettings LodePNGCompressSettings;
+struct LodePNGCompressSettings /*deflate = compress*/ {
+  /*LZ77 related settings*/
+  unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/
+  unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/
+  unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/
+  unsigned minmatch; /*mininum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/
+  unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/
+  unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/
+
+  /*use custom zlib encoder instead of built in one (default: null)*/
+  unsigned (*custom_zlib)(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGCompressSettings*);
+  /*use custom deflate encoder instead of built in one (default: null)
+  if custom_zlib is used, custom_deflate is ignored since only the built in
+  zlib function will call custom_deflate*/
+  unsigned (*custom_deflate)(unsigned char**, size_t*,
+                             const unsigned char*, size_t,
+                             const LodePNGCompressSettings*);
+
+  const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGCompressSettings lodepng_default_compress_settings;
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*
+Color mode of an image. Contains all information required to decode the pixel
+bits to RGBA colors. This information is the same as used in the PNG file
+format, and is used both for PNG and raw image data in LodePNG.
+*/
+typedef struct LodePNGColorMode {
+  /*header (IHDR)*/
+  LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/
+  unsigned bitdepth;  /*bits per sample, see PNG standard or documentation further in this header file*/
+
+  /*
+  palette (PLTE and tRNS)
+
+  Dynamically allocated with the colors of the palette, including alpha.
+  When encoding a PNG, to store your colors in the palette of the LodePNGColorMode, first use
+  lodepng_palette_clear, then for each color use lodepng_palette_add.
+  If you encode an image without alpha with palette, don't forget to put value 255 in each A byte of the palette.
+
+  When decoding, by default you can ignore this palette, since LodePNG already
+  fills the palette colors in the pixels of the raw RGBA output.
+
+  The palette is only supported for color type 3.
+  */
+  unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/
+  size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/
+
+  /*
+  transparent color key (tRNS)
+
+  This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit.
+  For greyscale PNGs, r, g and b will all 3 be set to the same.
+
+  When decoding, by default you can ignore this information, since LodePNG sets
+  pixels with this key to transparent already in the raw RGBA output.
+
+  The color key is only supported for color types 0 and 2.
+  */
+  unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/
+  unsigned key_r;       /*red/greyscale component of color key*/
+  unsigned key_g;       /*green component of color key*/
+  unsigned key_b;       /*blue component of color key*/
+} LodePNGColorMode;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_color_mode_init(LodePNGColorMode* info);
+void lodepng_color_mode_cleanup(LodePNGColorMode* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source);
+/* Makes a temporary LodePNGColorMode that does not need cleanup (no palette) */
+LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth);
+
+void lodepng_palette_clear(LodePNGColorMode* info);
+/*add 1 color to the palette*/
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a);
+
+/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info);
+/*get the amount of color channels used, based on colortype in the struct.
+If a palette is used, it counts as 1 channel.*/
+unsigned lodepng_get_channels(const LodePNGColorMode* info);
+/*is it a greyscale type? (only colortype 0 or 4)*/
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info);
+/*has it got an alpha channel? (only colortype 2 or 6)*/
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info);
+/*has it got a palette? (only colortype 3)*/
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info);
+/*only returns true if there is a palette and there is a value in the palette with alpha < 255.
+Loops through the palette to check this.*/
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info);
+/*
+Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image.
+Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels).
+Returns false if the image can only have opaque pixels.
+In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values,
+or if "key_defined" is true.
+*/
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info);
+/*Returns the byte size of a raw image buffer with given width, height and color mode*/
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*The information of a Time chunk in PNG.*/
+typedef struct LodePNGTime {
+  unsigned year;    /*2 bytes used (0-65535)*/
+  unsigned month;   /*1-12*/
+  unsigned day;     /*1-31*/
+  unsigned hour;    /*0-23*/
+  unsigned minute;  /*0-59*/
+  unsigned second;  /*0-60 (to allow for leap seconds)*/
+} LodePNGTime;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*Information about the PNG image, except pixels, width and height.*/
+typedef struct LodePNGInfo {
+  /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/
+  unsigned compression_method;/*compression method of the original file. Always 0.*/
+  unsigned filter_method;     /*filter method of the original file*/
+  unsigned interlace_method;  /*interlace method of the original file: 0=none, 1=Adam7*/
+  LodePNGColorMode color;     /*color type and bits, palette and transparency of the PNG file*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*
+  Suggested background color chunk (bKGD)
+
+  This uses the same color mode and bit depth as the PNG (except no alpha channel),
+  with values truncated to the bit depth in the unsigned integer.
+
+  For greyscale and palette PNGs, the value is stored in background_r. The values
+  in background_g and background_b are then unused.
+
+  So when decoding, you may get these in a different color mode than the one you requested
+  for the raw pixels.
+
+  When encoding with auto_convert, you must use the color model defined in info_png.color for
+  these values. The encoder normally ignores info_png.color when auto_convert is on, but will
+  use it to interpret these values (and convert copies of them to its chosen color model).
+
+  When encoding, avoid setting this to an expensive color, such as a non-grey value
+  when the image is grey, or the compression will be worse since it will be forced to
+  write the PNG with a more expensive color mode (when auto_convert is on).
+
+  The decoder does not use this background color to edit the color of pixels. This is a
+  completely optional metadata feature.
+  */
+  unsigned background_defined; /*is a suggested background color given?*/
+  unsigned background_r;       /*red/grey/palette component of suggested background color*/
+  unsigned background_g;       /*green component of suggested background color*/
+  unsigned background_b;       /*blue component of suggested background color*/
+
+  /*
+  non-international text chunks (tEXt and zTXt)
+
+  The char** arrays each contain num strings. The actual messages are in
+  text_strings, while text_keys are keywords that give a short description what
+  the actual text represents, e.g. Title, Author, Description, or anything else.
+
+  All the string fields below including keys, names and language tags are null terminated.
+  The PNG specification uses null characters for the keys, names and tags, and forbids null
+  characters to appear in the main text which is why we can use null termination everywhere here.
+
+  A keyword is minimum 1 character and maximum 79 characters long. It's
+  discouraged to use a single line length longer than 79 characters for texts.
+
+  Don't allocate these text buffers yourself. Use the init/cleanup functions
+  correctly and use lodepng_add_text and lodepng_clear_text.
+  */
+  size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
+  char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
+  char** text_strings; /*the actual text*/
+
+  /*
+  international text chunks (iTXt)
+  Similar to the non-international text chunks, but with additional strings
+  "langtags" and "transkeys".
+  */
+  size_t itext_num; /*the amount of international texts in this PNG*/
+  char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
+  char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/
+  char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/
+  char** itext_strings; /*the actual international text - UTF-8 string*/
+
+  /*time chunk (tIME)*/
+  unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/
+  LodePNGTime time;
+
+  /*phys chunk (pHYs)*/
+  unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/
+  unsigned phys_x; /*pixels per unit in x direction*/
+  unsigned phys_y; /*pixels per unit in y direction*/
+  unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
+
+  /*
+  Color profile related chunks: gAMA, cHRM, sRGB, iCPP
+
+  LodePNG does not apply any color conversions on pixels in the encoder or decoder and does not interpret these color
+  profile values. It merely passes on the information. If you wish to use color profiles and convert colors, please
+  use these values with a color management library.
+
+  See the PNG, ICC and sRGB specifications for more information about the meaning of these values.
+  */
+
+  /* gAMA chunk: optional, overridden by sRGB or iCCP if those are present. */
+  unsigned gama_defined; /* Whether a gAMA chunk is present (0 = not present, 1 = present). */
+  unsigned gama_gamma;   /* Gamma exponent times 100000 */
+
+  /* cHRM chunk: optional, overridden by sRGB or iCCP if those are present. */
+  unsigned chrm_defined; /* Whether a cHRM chunk is present (0 = not present, 1 = present). */
+  unsigned chrm_white_x; /* White Point x times 100000 */
+  unsigned chrm_white_y; /* White Point y times 100000 */
+  unsigned chrm_red_x;   /* Red x times 100000 */
+  unsigned chrm_red_y;   /* Red y times 100000 */
+  unsigned chrm_green_x; /* Green x times 100000 */
+  unsigned chrm_green_y; /* Green y times 100000 */
+  unsigned chrm_blue_x;  /* Blue x times 100000 */
+  unsigned chrm_blue_y;  /* Blue y times 100000 */
+
+  /*
+  sRGB chunk: optional. May not appear at the same time as iCCP.
+  If gAMA is also present gAMA must contain value 45455.
+  If cHRM is also present cHRM must contain respectively 31270,32900,64000,33000,30000,60000,15000,6000.
+  */
+  unsigned srgb_defined; /* Whether an sRGB chunk is present (0 = not present, 1 = present). */
+  unsigned srgb_intent;  /* Rendering intent: 0=perceptual, 1=rel. colorimetric, 2=saturation, 3=abs. colorimetric */
+
+  /*
+  iCCP chunk: optional. May not appear at the same time as sRGB.
+
+  LodePNG does not parse or use the ICC profile (except its color space header field for an edge case), a
+  separate library to handle the ICC data (not included in LodePNG) format is needed to use it for color
+  management and conversions.
+
+  For encoding, if iCCP is present, gAMA and cHRM are recommended to be added as well with values that match the ICC
+  profile as closely as possible, if you wish to do this you should provide the correct values for gAMA and cHRM and
+  enable their '_defined' flags since LodePNG will not automatically compute them from the ICC profile.
+
+  For encoding, the ICC profile is required by the PNG specification to be an "RGB" profile for non-grey
+  PNG color types and a "GRAY" profile for grey PNG color types. If you disable auto_convert, you must ensure
+  the ICC profile type matches your requested color type, else the encoder gives an error. If auto_convert is
+  enabled (the default), and the ICC profile is not a good match for the pixel data, this will result in an encoder
+  error if the pixel data has non-grey pixels for a GRAY profile, or a silent less-optimal compression of the pixel
+  data if the pixels could be encoded as greyscale but the ICC profile is RGB.
+
+  To avoid this do not set an ICC profile in the image unless there is a good reason for it, and when doing so
+  make sure you compute it carefully to avoid the above problems.
+  */
+  unsigned iccp_defined;      /* Whether an iCCP chunk is present (0 = not present, 1 = present). */
+  char* iccp_name;            /* Null terminated string with profile name, 1-79 bytes */
+  /*
+  The ICC profile in iccp_profile_size bytes.
+  Don't allocate this buffer yourself. Use the init/cleanup functions
+  correctly and use lodepng_set_icc and lodepng_clear_icc.
+  */
+  unsigned char* iccp_profile;
+  unsigned iccp_profile_size; /* The size of iccp_profile in bytes */
+
+  /* End of color profile related chunks */
+
+
+  /*
+  unknown chunks: chunks not known by LodePNG, passed on byte for byte.
+
+  There are 3 buffers, one for each position in the PNG where unknown chunks can appear.
+  Each buffer contains all unknown chunks for that position consecutively.
+  The 3 positions are:
+  0: between IHDR and PLTE, 1: between PLTE and IDAT, 2: between IDAT and IEND.
+
+  For encoding, do not store critical chunks or known chunks that are enabled with a "_defined" flag
+  above in here, since the encoder will blindly follow this and could then encode an invalid PNG file
+  (such as one with two IHDR chunks or the disallowed combination of sRGB with iCCP). But do use
+  this if you wish to store an ancillary chunk that is not supported by LodePNG (such as sPLT or hIST),
+  or any non-standard PNG chunk.
+
+  Do not allocate or traverse this data yourself. Use the chunk traversing functions declared
+  later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct.
+  */
+  unsigned char* unknown_chunks_data[3];
+  size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGInfo;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_info_init(LodePNGInfo* info);
+void lodepng_info_cleanup(LodePNGInfo* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/
+void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
+
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+                           const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/
+void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/
+
+/*replaces if exists*/
+unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size);
+void lodepng_clear_icc(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*
+Converts raw buffer from one color type to another color type, based on
+LodePNGColorMode structs to describe the input and output color type.
+See the reference manual at the end of this header file to see which color conversions are supported.
+return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported)
+The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel
+of the output color type (lodepng_get_bpp).
+For < 8 bpp images, there should not be padding bits at the end of scanlines.
+For 16-bit per channel colors, uses big endian format like PNG does.
+Return value is LodePNG error code
+*/
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+                         unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Settings for the decoder. This contains settings for the PNG and the Zlib
+decoder, but not the Info settings from the Info structs.
+*/
+typedef struct LodePNGDecoderSettings {
+  LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/
+
+  /* Check LodePNGDecompressSettings for more ignorable errors such as ignore_adler32 */
+  unsigned ignore_crc; /*ignore CRC checksums*/
+  unsigned ignore_critical; /*ignore unknown critical chunks*/
+  unsigned ignore_end; /*ignore issues at end of file if possible (missing IEND chunk, too large chunk, ...)*/
+  /* TODO: make a system involving warnings with levels and a strict mode instead. Other potentially recoverable
+     errors: srgb rendering intent value, size of content of ancillary chunks, more than 79 characters for some
+     strings, placement/combination rules for ancillary chunks, crc of unknown chunks, allowed characters
+     in string keys, etc... */
+
+  unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
+  /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
+  unsigned remember_unknown_chunks;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGDecoderSettings;
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/
+typedef enum LodePNGFilterStrategy {
+  /*every filter at zero*/
+  LFS_ZERO,
+  /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/
+  LFS_MINSUM,
+  /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending
+  on the image, this is better or worse than minsum.*/
+  LFS_ENTROPY,
+  /*
+  Brute-force-search PNG filters by compressing each filter for each scanline.
+  Experimental, very slow, and only rarely gives better compression than MINSUM.
+  */
+  LFS_BRUTE_FORCE,
+  /*use predefined_filters buffer: you specify the filter type for each scanline*/
+  LFS_PREDEFINED
+} LodePNGFilterStrategy;
+
+/*Gives characteristics about the integer RGBA colors of the image (count, alpha channel usage, bit depth, ...),
+which helps decide which color model to use for encoding.
+Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.
+NOTE: This is not related to the ICC color profile, search "iccp_profile" instead to find the ICC/chromacity/...
+fields in this header file.*/
+typedef struct LodePNGColorProfile {
+  unsigned colored; /*not greyscale*/
+  unsigned key; /*image is not opaque and color key is possible instead of full alpha*/
+  unsigned short key_r; /*key values, always as 16-bit, in 8-bit case the byte is duplicated, e.g. 65535 means 255*/
+  unsigned short key_g;
+  unsigned short key_b;
+  unsigned alpha; /*image is not opaque and alpha channel or alpha palette required*/
+  unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16.*/
+  unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order*/
+  unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for greyscale only. 16 if 16-bit per channel required.*/
+  size_t numpixels;
+} LodePNGColorProfile;
+
+void lodepng_color_profile_init(LodePNGColorProfile* profile);
+
+/*Get a LodePNGColorProfile of the image. The profile must already have been inited.
+NOTE: This is not related to the ICC color profile, search "iccp_profile" instead to find the ICC/chromacity/...
+fields in this header file.*/
+unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
+                                   const unsigned char* image, unsigned w, unsigned h,
+                                   const LodePNGColorMode* mode_in);
+/*The function LodePNG uses internally to decide the PNG color with auto_convert.
+Chooses an optimal color model, e.g. grey if only grey pixels, palette if < 256 colors, ...*/
+unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
+                                   const unsigned char* image, unsigned w, unsigned h,
+                                   const LodePNGColorMode* mode_in);
+
+/*Settings for the encoder.*/
+typedef struct LodePNGEncoderSettings {
+  LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/
+
+  unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/
+
+  /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than
+  8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to
+  completely follow the official PNG heuristic, filter_palette_zero must be true and
+  filter_strategy must be LFS_MINSUM*/
+  unsigned filter_palette_zero;
+  /*Which filter strategy to use when not using zeroes due to filter_palette_zero.
+  Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/
+  LodePNGFilterStrategy filter_strategy;
+  /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with
+  the same length as the amount of scanlines in the image, and each value must <= 5. You
+  have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero
+  must be set to 0 to ensure this is also used on palette or low bitdepth images.*/
+  const unsigned char* predefined_filters;
+
+  /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
+  If colortype is 3, PLTE is _always_ created.*/
+  unsigned force_palette;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*add LodePNG identifier and version as a text chunk, for debugging*/
+  unsigned add_id;
+  /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/
+  unsigned text_compression;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGEncoderSettings;
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+/*The settings, state and information for extended encoding and decoding.*/
+typedef struct LodePNGState {
+#ifdef LODEPNG_COMPILE_DECODER
+  LodePNGDecoderSettings decoder; /*the decoding settings*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+  LodePNGEncoderSettings encoder; /*the encoding settings*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+  LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
+  LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
+  unsigned error;
+#ifdef LODEPNG_COMPILE_CPP
+  /* For the lodepng::State subclass. */
+  virtual ~LodePNGState(){}
+#endif
+} LodePNGState;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_state_init(LodePNGState* state);
+void lodepng_state_cleanup(LodePNGState* state);
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source);
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and
+getting much more information about the PNG image and color mode.
+*/
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+                        LodePNGState* state,
+                        const unsigned char* in, size_t insize);
+
+/*
+Read the PNG header, but not the actual data. This returns only the information
+that is in the IHDR chunk of the PNG, such as width, height and color type. The
+information is placed in the info_png field of the LodePNGState.
+*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h,
+                         LodePNGState* state,
+                         const unsigned char* in, size_t insize);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/*
+Reads one metadata chunk (other than IHDR) of the PNG file and outputs what it
+read in the state. Returns error code on failure.
+Use lodepng_inspect first with a new state, then e.g. lodepng_chunk_find_const
+to find the desired chunk type, and if non null use lodepng_inspect_chunk (with
+chunk_pointer - start_of_file as pos).
+Supports most metadata chunks from the PNG standard (gAMA, bKGD, tEXt, ...).
+Ignores unsupported, unknown, non-metadata or IHDR chunks (without error).
+Requirements: &in[pos] must point to start of a chunk, must use regular
+lodepng_inspect first since format of most other chunks depends on IHDR, and if
+there is a PLTE chunk, that one must be inspected before tRNS or bKGD.
+*/
+unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos,
+                               const unsigned char* in, size_t insize);
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+                        const unsigned char* image, unsigned w, unsigned h,
+                        LodePNGState* state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*
+The lodepng_chunk functions are normally not needed, except to traverse the
+unknown chunks stored in the LodePNGInfo struct, or add new ones to it.
+It also allows traversing the chunks of an encoded PNG file yourself.
+
+The chunk pointer always points to the beginning of the chunk itself, that is
+the first byte of the 4 length bytes.
+
+In the PNG file format, chunks have the following format:
+-4 bytes length: length of the data of the chunk in bytes (chunk itself is 12 bytes longer)
+-4 bytes chunk type (ASCII a-z,A-Z only, see below)
+-length bytes of data (may be 0 bytes if length was 0)
+-4 bytes of CRC, computed on chunk name + data
+
+The first chunk starts at the 8th byte of the PNG file, the entire rest of the file
+exists out of concatenated chunks with the above format.
+
+PNG standard chunk ASCII naming conventions:
+-First byte: uppercase = critical, lowercase = ancillary
+-Second byte: uppercase = public, lowercase = private
+-Third byte: must be uppercase
+-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy
+*/
+
+/*
+Gets the length of the data of the chunk. Total chunk length has 12 bytes more.
+There must be at least 4 bytes to read from. If the result value is too large,
+it may be corrupt data.
+*/
+unsigned lodepng_chunk_length(const unsigned char* chunk);
+
+/*puts the 4-byte type in null terminated string*/
+void lodepng_chunk_type(char type[5], const unsigned char* chunk);
+
+/*check if the type is the given type*/
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type);
+
+/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk);
+
+/*0: public, 1: private (see PNG standard)*/
+unsigned char lodepng_chunk_private(const unsigned char* chunk);
+
+/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk);
+
+/*get pointer to the data of the chunk, where the input points to the header of the chunk*/
+unsigned char* lodepng_chunk_data(unsigned char* chunk);
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk);
+
+/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk);
+
+/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/
+void lodepng_chunk_generate_crc(unsigned char* chunk);
+
+/*
+Iterate to next chunks, allows iterating through all chunks of the PNG file.
+Input must be at the beginning of a chunk (result of a previous lodepng_chunk_next call,
+or the 8th byte of a PNG file which always has the first chunk), or alternatively may
+point to the first byte of the PNG file (which is not a chunk but the magic header, the
+function will then skip over it and return the first real chunk).
+Expects at least 8 readable bytes of memory in the input pointer.
+Will output pointer to the start of the next chunk or the end of the file if there
+is no more chunk after this. Start this process at the 8th byte of the PNG file.
+In a non-corrupt PNG file, the last chunk should have name "IEND".
+*/
+unsigned char* lodepng_chunk_next(unsigned char* chunk);
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk);
+
+/*Finds the first chunk with the given type in the range [chunk, end), or returns NULL if not found.*/
+unsigned char* lodepng_chunk_find(unsigned char* chunk, const unsigned char* end, const char type[5]);
+const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]);
+
+/*
+Appends chunk to the data in out. The given chunk should already have its chunk header.
+The out variable and outlength are updated to reflect the new reallocated buffer.
+Returns error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk);
+
+/*
+Appends new chunk to out. The chunk to append is given by giving its length, type
+and data separately. The type is a 4-letter string.
+The out variable and outlength are updated to reflect the new reallocated buffer.
+Returne error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data);
+
+
+/*Calculate CRC32 of buffer*/
+unsigned lodepng_crc32(const unsigned char* buf, size_t len);
+#endif /*LODEPNG_COMPILE_PNG*/
+
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*
+This zlib part can be used independently to zlib compress and decompress a
+buffer. It cannot be used to create gzip files however, and it only supports the
+part of zlib that is required for PNG, it does not support dictionaries.
+*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGDecompressSettings* settings);
+
+/*
+Decompresses Zlib data. Reallocates the out buffer and appends the data. The
+data must be according to the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize,
+                                 const unsigned char* in, size_t insize,
+                                 const LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Compresses data with Zlib. Reallocates the out buffer and appends the data.
+Zlib adds a small header and trailer around the deflate data.
+The data is output in the format of the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize,
+                               const unsigned char* in, size_t insize,
+                               const LodePNGCompressSettings* settings);
+
+/*
+Find length-limited Huffman code for given frequencies. This function is in the
+public interface only for tests, it's used internally by lodepng_deflate.
+*/
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+                                      size_t numcodes, unsigned maxbitlen);
+
+/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGCompressSettings* settings);
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into buffer. The function allocates the out buffer, and
+after usage you should free it.
+out: output parameter, contains pointer to loaded buffer.
+outsize: output parameter, size of the allocated out buffer
+filename: the path to the file to load
+return value: error code (0 means ok)
+*/
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename);
+
+/*
+Save a file from buffer to disk. Warning, if it exists, this function overwrites
+the file without warning!
+buffer: the buffer to write
+buffersize: size of the buffer to write
+filename: the path to the file to save to
+return value: error code (0 means ok)
+*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+
+#ifdef LODEPNG_COMPILE_CPP
+/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
+namespace lodepng {
+#ifdef LODEPNG_COMPILE_PNG
+class State : public LodePNGState {
+  public:
+    State();
+    State(const State& other);
+    virtual ~State();
+    State& operator=(const State& other);
+};
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Same as other lodepng::decode, but using a State for more settings and information. */
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const unsigned char* in, size_t insize);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const std::vector<unsigned char>& in);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Same as other lodepng::encode, but using a State for more settings and information. */
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                State& state);
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                State& state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into an std::vector.
+return value: error code (0 means ok)
+*/
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
+
+/*
+Save the binary data in an std::vector to a file on disk. The file is overwritten
+without warning.
+*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_PNG */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+/* Zlib-decompress an unsigned char buffer */
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+
+/* Zlib-decompress an std::vector */
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Zlib-compress an unsigned char buffer */
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+
+/* Zlib-compress an std::vector */
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+
+/*
+TODO:
+[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often
+[.] check compatibility with various compilers  - done but needs to be redone for every newer version
+[X] converting color to 16-bit per channel types
+[X] support color profile chunk types (but never let them touch RGB values by default)
+[ ] support all public PNG chunk types
+[ ] make sure encoder generates no chunks with size > (2^31)-1
+[ ] partial decoding (stream processing)
+[X] let the "isFullyOpaque" function check color keys and transparent palettes too
+[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl"
+[ ] don't stop decoding on errors like 69, 57, 58 (make warnings)
+[ ] make warnings like: oob palette, checksum fail, data after iend, wrong/unknown crit chunk, no null terminator in text, ...
+[ ] errors with line numbers (and version)
+[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
+[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
+[ ] allow user to give data (void*) to custom allocator
+*/
+
+#endif /*LODEPNG_H inclusion guard*/
+
+/*
+LodePNG Documentation
+---------------------
+
+0. table of contents
+--------------------
+
+  1. about
+   1.1. supported features
+   1.2. features not supported
+  2. C and C++ version
+  3. security
+  4. decoding
+  5. encoding
+  6. color conversions
+    6.1. PNG color types
+    6.2. color conversions
+    6.3. padding bits
+    6.4. A note about 16-bits per channel and endianness
+  7. error values
+  8. chunks and PNG editing
+  9. compiler support
+  10. examples
+   10.1. decoder C++ example
+   10.2. decoder C example
+  11. state settings reference
+  12. changes
+  13. contact information
+
+
+1. about
+--------
+
+PNG is a file format to store raster images losslessly with good compression,
+supporting different color types and alpha channel.
+
+LodePNG is a PNG codec according to the Portable Network Graphics (PNG)
+Specification (Second Edition) - W3C Recommendation 10 November 2003.
+
+The specifications used are:
+
+*) Portable Network Graphics (PNG) Specification (Second Edition):
+     http://www.w3.org/TR/2003/REC-PNG-20031110
+*) RFC 1950 ZLIB Compressed Data Format version 3.3:
+     http://www.gzip.org/zlib/rfc-zlib.html
+*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3:
+     http://www.gzip.org/zlib/rfc-deflate.html
+
+The most recent version of LodePNG can currently be found at
+http://lodev.org/lodepng/
+
+LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds
+extra functionality.
+
+LodePNG exists out of two files:
+-lodepng.h: the header file for both C and C++
+-lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage
+
+If you want to start using LodePNG right away without reading this doc, get the
+examples from the LodePNG website to see how to use it in code, or check the
+smaller examples in chapter 13 here.
+
+LodePNG is simple but only supports the basic requirements. To achieve
+simplicity, the following design choices were made: There are no dependencies
+on any external library. There are functions to decode and encode a PNG with
+a single function call, and extended versions of these functions taking a
+LodePNGState struct allowing to specify or get more information. By default
+the colors of the raw image are always RGB or RGBA, no matter what color type
+the PNG file uses. To read and write files, there are simple functions to
+convert the files to/from buffers in memory.
+
+This all makes LodePNG suitable for loading textures in games, demos and small
+programs, ... It's less suitable for full fledged image editors, loading PNGs
+over network (it requires all the image data to be available before decoding can
+begin), life-critical systems, ...
+
+1.1. supported features
+-----------------------
+
+The following features are supported by the decoder:
+
+*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image,
+   or the same color type as the PNG
+*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image
+*) Adam7 interlace and deinterlace for any color type
+*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk
+*) support for alpha channels, including RGBA color model, translucent palettes and color keying
+*) zlib decompression (inflate)
+*) zlib compression (deflate)
+*) CRC32 and ADLER32 checksums
+*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks.
+*) the following chunks are supported (generated/interpreted) by both encoder and decoder:
+    IHDR: header information
+    PLTE: color palette
+    IDAT: pixel data
+    IEND: the final chunk
+    tRNS: transparency for palettized images
+    tEXt: textual information
+    zTXt: compressed textual information
+    iTXt: international textual information
+    bKGD: suggested background color
+    pHYs: physical dimensions
+    tIME: modification time
+
+1.2. features not supported
+---------------------------
+
+The following features are _not_ supported:
+
+*) some features needed to make a conformant PNG-Editor might be still missing.
+*) partial loading/stream processing. All data must be available and is processed in one call.
+*) The following public chunks are not supported but treated as unknown chunks by LodePNG
+    cHRM, gAMA, iCCP, sRGB, sBIT, hIST, sPLT
+   Some of these are not supported on purpose: LodePNG wants to provide the RGB values
+   stored in the pixels, not values modified by system dependent gamma or color models.
+
+
+2. C and C++ version
+--------------------
+
+The C version uses buffers allocated with alloc that you need to free()
+yourself. You need to use init and cleanup functions for each struct whenever
+using a struct from the C version to avoid exploits and memory leaks.
+
+The C++ version has extra functions with std::vectors in the interface and the
+lodepng::State class which is a LodePNGState with constructor and destructor.
+
+These files work without modification for both C and C++ compilers because all
+the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers
+ignore it, and the C code is made to compile both with strict ISO C90 and C++.
+
+To use the C++ version, you need to rename the source file to lodepng.cpp
+(instead of lodepng.c), and compile it with a C++ compiler.
+
+To use the C version, you need to rename the source file to lodepng.c (instead
+of lodepng.cpp), and compile it with a C compiler.
+
+
+3. Security
+-----------
+
+Even if carefully designed, it's always possible that LodePNG contains possible
+exploits. If you discover one, please let me know, and it will be fixed.
+
+When using LodePNG, care has to be taken with the C version of LodePNG, as well
+as the C-style structs when working with C++. The following conventions are used
+for all C-style structs:
+
+-if a struct has a corresponding init function, always call the init function when making a new one
+-if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks
+-if a struct has a corresponding copy function, use the copy function instead of "=".
+ The destination must also be inited already.
+
+
+4. Decoding
+-----------
+
+Decoding converts a PNG compressed image to a raw pixel buffer.
+
+Most documentation on using the decoder is at its declarations in the header
+above. For C, simple decoding can be done with functions such as
+lodepng_decode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_decode. For C++, all decoding can be done with the
+various lodepng::decode functions, and lodepng::State can be used for advanced
+features.
+
+When using the LodePNGState, it uses the following fields for decoding:
+*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here
+*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get
+*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use
+
+LodePNGInfo info_png
+--------------------
+
+After decoding, this contains extra information of the PNG image, except the actual
+pixels, width and height because these are already gotten directly from the decoder
+functions.
+
+It contains for example the original color type of the PNG image, text comments,
+suggested background color, etc... More details about the LodePNGInfo struct are
+at its declaration documentation.
+
+LodePNGColorMode info_raw
+-------------------------
+
+When decoding, here you can specify which color type you want
+the resulting raw image to be. If this is different from the colortype of the
+PNG, then the decoder will automatically convert the result. This conversion
+always works, except if you want it to convert a color PNG to greyscale or to
+a palette with missing colors.
+
+By default, 32-bit color is used for the result.
+
+LodePNGDecoderSettings decoder
+------------------------------
+
+The settings can be used to ignore the errors created by invalid CRC and Adler32
+chunks, and to disable the decoding of tEXt chunks.
+
+There's also a setting color_convert, true by default. If false, no conversion
+is done, the resulting data will be as it was in the PNG (after decompression)
+and you'll have to puzzle the colors of the pixels together yourself using the
+color type information in the LodePNGInfo.
+
+
+5. Encoding
+-----------
+
+Encoding converts a raw pixel buffer to a PNG compressed image.
+
+Most documentation on using the encoder is at its declarations in the header
+above. For C, simple encoding can be done with functions such as
+lodepng_encode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_encode. For C++, all encoding can be done with the
+various lodepng::encode functions, and lodepng::State can be used for advanced
+features.
+
+Like the decoder, the encoder can also give errors. However it gives less errors
+since the encoder input is trusted, the decoder input (a PNG image that could
+be forged by anyone) is not trusted.
+
+When using the LodePNGState, it uses the following fields for encoding:
+*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be.
+*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has
+*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use
+
+LodePNGInfo info_png
+--------------------
+
+When encoding, you use this the opposite way as when decoding: for encoding,
+you fill in the values you want the PNG to have before encoding. By default it's
+not needed to specify a color type for the PNG since it's automatically chosen,
+but it's possible to choose it yourself given the right settings.
+
+The encoder will not always exactly match the LodePNGInfo struct you give,
+it tries as close as possible. Some things are ignored by the encoder. The
+encoder uses, for example, the following settings from it when applicable:
+colortype and bitdepth, text chunks, time chunk, the color key, the palette, the
+background color, the interlace method, unknown chunks, ...
+
+When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk.
+If the palette contains any colors for which the alpha channel is not 255 (so
+there are translucent colors in the palette), it'll add a tRNS chunk.
+
+LodePNGColorMode info_raw
+-------------------------
+
+You specify the color type of the raw image that you give to the input here,
+including a possible transparent color key and palette you happen to be using in
+your raw image data.
+
+By default, 32-bit color is assumed, meaning your input has to be in RGBA
+format with 4 bytes (unsigned chars) per pixel.
+
+LodePNGEncoderSettings encoder
+------------------------------
+
+The following settings are supported (some are in sub-structs):
+*) auto_convert: when this option is enabled, the encoder will
+automatically choose the smallest possible color mode (including color key) that
+can encode the colors of all pixels without information loss.
+*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree,
+   2 = dynamic huffman tree (best compression). Should be 2 for proper
+   compression.
+*) use_lz77: whether or not to use LZ77 for compressed block types. Should be
+   true for proper compression.
+*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value
+   2048 by default, but can be set to 32768 for better, but slow, compression.
+*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE
+   chunk if force_palette is true. This can used as suggested palette to convert
+   to by viewers that don't support more than 256 colors (if those still exist)
+*) add_id: add text chunk "Encoder: LodePNG <version>" to the image.
+*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks.
+  zTXt chunks use zlib compression on the text. This gives a smaller result on
+  large texts but a larger result on small texts (such as a single program name).
+  It's all tEXt or all zTXt though, there's no separate setting per text yet.
+
+
+6. color conversions
+--------------------
+
+An important thing to note about LodePNG, is that the color type of the PNG, and
+the color type of the raw image, are completely independent. By default, when
+you decode a PNG, you get the result as a raw image in the color type you want,
+no matter whether the PNG was encoded with a palette, greyscale or RGBA color.
+And if you encode an image, by default LodePNG will automatically choose the PNG
+color type that gives good compression based on the values of colors and amount
+of colors in the image. It can be configured to let you control it instead as
+well, though.
+
+To be able to do this, LodePNG does conversions from one color mode to another.
+It can convert from almost any color type to any other color type, except the
+following conversions: RGB to greyscale is not supported, and converting to a
+palette when the palette doesn't have a required color is not supported. This is
+not supported on purpose: this is information loss which requires a color
+reduction algorithm that is beyong the scope of a PNG encoder (yes, RGB to grey
+is easy, but there are multiple ways if you want to give some channels more
+weight).
+
+By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB
+color, no matter what color type the PNG has. And by default when encoding,
+LodePNG automatically picks the best color model for the output PNG, and expects
+the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control
+the color format of the images yourself, you can skip this chapter.
+
+6.1. PNG color types
+--------------------
+
+A PNG image can have many color types, ranging from 1-bit color to 64-bit color,
+as well as palettized color modes. After the zlib decompression and unfiltering
+in the PNG image is done, the raw pixel data will have that color type and thus
+a certain amount of bits per pixel. If you want the output raw image after
+decoding to have another color type, a conversion is done by LodePNG.
+
+The PNG specification gives the following color types:
+
+0: greyscale, bit depths 1, 2, 4, 8, 16
+2: RGB, bit depths 8 and 16
+3: palette, bit depths 1, 2, 4 and 8
+4: greyscale with alpha, bit depths 8 and 16
+6: RGBA, bit depths 8 and 16
+
+Bit depth is the amount of bits per pixel per color channel. So the total amount
+of bits per pixel is: amount of channels * bitdepth.
+
+6.2. color conversions
+----------------------
+
+As explained in the sections about the encoder and decoder, you can specify
+color types and bit depths in info_png and info_raw to change the default
+behaviour.
+
+If, when decoding, you want the raw image to be something else than the default,
+you need to set the color type and bit depth you want in the LodePNGColorMode,
+or the parameters colortype and bitdepth of the simple decoding function.
+
+If, when encoding, you use another color type than the default in the raw input
+image, you need to specify its color type and bit depth in the LodePNGColorMode
+of the raw image, or use the parameters colortype and bitdepth of the simple
+encoding function.
+
+If, when encoding, you don't want LodePNG to choose the output PNG color type
+but control it yourself, you need to set auto_convert in the encoder settings
+to false, and specify the color type you want in the LodePNGInfo of the
+encoder (including palette: it can generate a palette if auto_convert is true,
+otherwise not).
+
+If the input and output color type differ (whether user chosen or auto chosen),
+LodePNG will do a color conversion, which follows the rules below, and may
+sometimes result in an error.
+
+To avoid some confusion:
+-the decoder converts from PNG to raw image
+-the encoder converts from raw image to PNG
+-the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image
+-the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG
+-when encoding, the color type in LodePNGInfo is ignored if auto_convert
+ is enabled, it is automatically generated instead
+-when decoding, the color type in LodePNGInfo is set by the decoder to that of the original
+ PNG image, but it can be ignored since the raw image has the color type you requested instead
+-if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion
+ between the color types is done if the color types are supported. If it is not
+ supported, an error is returned. If the types are the same, no conversion is done.
+-even though some conversions aren't supported, LodePNG supports loading PNGs from any
+ colortype and saving PNGs to any colortype, sometimes it just requires preparing
+ the raw image correctly before encoding.
+-both encoder and decoder use the same color converter.
+
+Non supported color conversions:
+-color to greyscale: no error is thrown, but the result will look ugly because
+only the red channel is taken
+-anything to palette when that palette does not have that color in it: in this
+case an error is thrown
+
+Supported color conversions:
+-anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA
+-any grey or grey+alpha, to grey or grey+alpha
+-anything to a palette, as long as the palette has the requested colors in it
+-removing alpha channel
+-higher to smaller bitdepth, and vice versa
+
+If you want no color conversion to be done (e.g. for speed or control):
+-In the encoder, you can make it save a PNG with any color type by giving the
+raw color mode and LodePNGInfo the same color mode, and setting auto_convert to
+false.
+-In the decoder, you can make it store the pixel data in the same color type
+as the PNG has, by setting the color_convert setting to false. Settings in
+info_raw are then ignored.
+
+The function lodepng_convert does the color conversion. It is available in the
+interface but normally isn't needed since the encoder and decoder already call
+it.
+
+6.3. padding bits
+-----------------
+
+In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines
+have a bit amount that isn't a multiple of 8, then padding bits are used so that each
+scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output.
+The raw input image you give to the encoder, and the raw output image you get from the decoder
+will NOT have these padding bits, e.g. in the case of a 1-bit image with a width
+of 7 pixels, the first pixel of the second scanline will the the 8th bit of the first byte,
+not the first bit of a new byte.
+
+6.4. A note about 16-bits per channel and endianness
+----------------------------------------------------
+
+LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like
+for any other color format. The 16-bit values are stored in big endian (most
+significant byte first) in these arrays. This is the opposite order of the
+little endian used by x86 CPU's.
+
+LodePNG always uses big endian because the PNG file format does so internally.
+Conversions to other formats than PNG uses internally are not supported by
+LodePNG on purpose, there are myriads of formats, including endianness of 16-bit
+colors, the order in which you store R, G, B and A, and so on. Supporting and
+converting to/from all that is outside the scope of LodePNG.
+
+This may mean that, depending on your use case, you may want to convert the big
+endian output of LodePNG to little endian with a for loop. This is certainly not
+always needed, many applications and libraries support big endian 16-bit colors
+anyway, but it means you cannot simply cast the unsigned char* buffer to an
+unsigned short* buffer on x86 CPUs.
+
+
+7. error values
+---------------
+
+All functions in LodePNG that return an error code, return 0 if everything went
+OK, or a non-zero code if there was an error.
+
+The meaning of the LodePNG error values can be retrieved with the function
+lodepng_error_text: given the numerical error code, it returns a description
+of the error in English as a string.
+
+Check the implementation of lodepng_error_text to see the meaning of each code.
+
+
+8. chunks and PNG editing
+-------------------------
+
+If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG
+editor that should follow the rules about handling of unknown chunks, or if your
+program is able to read other types of chunks than the ones handled by LodePNG,
+then that's possible with the chunk functions of LodePNG.
+
+A PNG chunk has the following layout:
+
+4 bytes length
+4 bytes type name
+length bytes data
+4 bytes CRC
+
+8.1. iterating through chunks
+-----------------------------
+
+If you have a buffer containing the PNG image data, then the first chunk (the
+IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the
+signature of the PNG and are not part of a chunk. But if you start at byte 8
+then you have a chunk, and can check the following things of it.
+
+NOTE: none of these functions check for memory buffer boundaries. To avoid
+exploits, always make sure the buffer contains all the data of the chunks.
+When using lodepng_chunk_next, make sure the returned value is within the
+allocated memory.
+
+unsigned lodepng_chunk_length(const unsigned char* chunk):
+
+Get the length of the chunk's data. The total chunk length is this length + 12.
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk):
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type):
+
+Get the type of the chunk or compare if it's a certain type
+
+unsigned char lodepng_chunk_critical(const unsigned char* chunk):
+unsigned char lodepng_chunk_private(const unsigned char* chunk):
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk):
+
+Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are).
+Check if the chunk is private (public chunks are part of the standard, private ones not).
+Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical
+chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your
+program doesn't handle that type of unknown chunk.
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk):
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk):
+
+Get a pointer to the start of the data of the chunk.
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk):
+void lodepng_chunk_generate_crc(unsigned char* chunk):
+
+Check if the crc is correct or generate a correct one.
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk):
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk):
+
+Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these
+functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
+data available in the buffer to be able to go to the next chunk.
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk):
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data):
+
+These functions are used to create new chunks that are appended to the data in *out that has
+length *outlength. The append function appends an existing chunk to the new data. The create
+function creates a new chunk with the given parameters and appends it. Type is the 4-letter
+name of the chunk.
+
+8.2. chunks in info_png
+-----------------------
+
+The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3
+buffers (each with size) to contain 3 types of unknown chunks:
+the ones that come before the PLTE chunk, the ones that come between the PLTE
+and the IDAT chunks, and the ones that come after the IDAT chunks.
+It's necessary to make the distionction between these 3 cases because the PNG
+standard forces to keep the ordering of unknown chunks compared to the critical
+chunks, but does not force any other ordering rules.
+
+info_png.unknown_chunks_data[0] is the chunks before PLTE
+info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT
+info_png.unknown_chunks_data[2] is the chunks after IDAT
+
+The chunks in these 3 buffers can be iterated through and read by using the same
+way described in the previous subchapter.
+
+When using the decoder to decode a PNG, you can make it store all unknown chunks
+if you set the option settings.remember_unknown_chunks to 1. By default, this
+option is off (0).
+
+The encoder will always encode unknown chunks that are stored in the info_png.
+If you need it to add a particular chunk that isn't known by LodePNG, you can
+use lodepng_chunk_append or lodepng_chunk_create to the chunk data in
+info_png.unknown_chunks_data[x].
+
+Chunks that are known by LodePNG should not be added in that way. E.g. to make
+LodePNG add a bKGD chunk, set background_defined to true and add the correct
+parameters there instead.
+
+
+9. compiler support
+-------------------
+
+No libraries other than the current standard C library are needed to compile
+LodePNG. For the C++ version, only the standard C++ library is needed on top.
+Add the files lodepng.c(pp) and lodepng.h to your project, include
+lodepng.h where needed, and your program can read/write PNG files.
+
+It is compatible with C90 and up, and C++03 and up.
+
+If performance is important, use optimization when compiling! For both the
+encoder and decoder, this makes a large difference.
+
+Make sure that LodePNG is compiled with the same compiler of the same version
+and with the same settings as the rest of the program, or the interfaces with
+std::vectors and std::strings in C++ can be incompatible.
+
+CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
+
+*) gcc and g++
+
+LodePNG is developed in gcc so this compiler is natively supported. It gives no
+warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++
+version 4.7.1 on Linux, 32-bit and 64-bit.
+
+*) Clang
+
+Fully supported and warning-free.
+
+*) Mingw
+
+The Mingw compiler (a port of gcc for Windows) should be fully supported by
+LodePNG.
+
+*) Visual Studio and Visual C++ Express Edition
+
+LodePNG should be warning-free with warning level W4. Two warnings were disabled
+with pragmas though: warning 4244 about implicit conversions, and warning 4996
+where it wants to use a non-standard function fopen_s instead of the standard C
+fopen.
+
+Visual Studio may want "stdafx.h" files to be included in each source file and
+give an error "unexpected end of file while looking for precompiled header".
+This is not standard C++ and will not be added to the stock LodePNG. You can
+disable it for lodepng.cpp only by right clicking it, Properties, C/C++,
+Precompiled Headers, and set it to Not Using Precompiled Headers there.
+
+NOTE: Modern versions of VS should be fully supported, but old versions, e.g.
+VS6, are not guaranteed to work.
+
+*) Compilers on Macintosh
+
+LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for
+C and C++.
+
+*) Other Compilers
+
+If you encounter problems on any compilers, feel free to let me know and I may
+try to fix it if the compiler is modern and standards complient.
+
+
+10. examples
+------------
+
+This decoder example shows the most basic usage of LodePNG. More complex
+examples can be found on the LodePNG website.
+
+10.1. decoder C++ example
+-------------------------
+
+#include "lodepng.h"
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  //load and decode
+  std::vector<unsigned char> image;
+  unsigned width, height;
+  unsigned error = lodepng::decode(image, width, height, filename);
+
+  //if there's an error, display it
+  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+
+  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
+}
+
+10.2. decoder C example
+-----------------------
+
+#include "lodepng.h"
+
+int main(int argc, char *argv[]) {
+  unsigned error;
+  unsigned char* image;
+  size_t width, height;
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  error = lodepng_decode32_file(&image, &width, &height, filename);
+
+  if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error));
+
+  / * use image here * /
+
+  free(image);
+  return 0;
+}
+
+11. state settings reference
+----------------------------
+
+A quick reference of some settings to set on the LodePNGState
+
+For decoding:
+
+state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums
+state.decoder.zlibsettings.custom_...: use custom inflate function
+state.decoder.ignore_crc: ignore CRC checksums
+state.decoder.ignore_critical: ignore unknown critical chunks
+state.decoder.ignore_end: ignore missing IEND chunk. May fail if this corruption causes other errors
+state.decoder.color_convert: convert internal PNG color to chosen one
+state.decoder.read_text_chunks: whether to read in text metadata chunks
+state.decoder.remember_unknown_chunks: whether to read in unknown chunks
+state.info_raw.colortype: desired color type for decoded image
+state.info_raw.bitdepth: desired bit depth for decoded image
+state.info_raw....: more color settings, see struct LodePNGColorMode
+state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo
+
+For encoding:
+
+state.encoder.zlibsettings.btype: disable compression by setting it to 0
+state.encoder.zlibsettings.use_lz77: use LZ77 in compression
+state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize
+state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match
+state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching
+state.encoder.zlibsettings.lazymatching: try one more LZ77 matching
+state.encoder.zlibsettings.custom_...: use custom deflate function
+state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png
+state.encoder.filter_palette_zero: PNG filter strategy for palette
+state.encoder.filter_strategy: PNG filter strategy to encode with
+state.encoder.force_palette: add palette even if not encoding to one
+state.encoder.add_id: add LodePNG identifier and version as a text chunk
+state.encoder.text_compression: use compressed text chunks for metadata
+state.info_raw.colortype: color type of raw input image you provide
+state.info_raw.bitdepth: bit depth of raw input image you provide
+state.info_raw: more color settings, see struct LodePNGColorMode
+state.info_png.color.colortype: desired color type if auto_convert is false
+state.info_png.color.bitdepth: desired bit depth if auto_convert is false
+state.info_png.color....: more color settings, see struct LodePNGColorMode
+state.info_png....: more PNG related settings, see struct LodePNGInfo
+
+
+12. changes
+-----------
+
+The version number of LodePNG is the date of the change given in the format
+yyyymmdd.
+
+Some changes aren't backwards compatible. Those are indicated with a (!)
+symbol.
+
+*) 30 dec 2018: code style changes only: removed newlines before opening braces.
+*) 10 sep 2018: added way to inspect metadata chunks without full decoding.
+*) 19 aug 2018 (!): fixed color mode bKGD is encoded with and made it use
+   palette index in case of palette.
+*) 10 aug 2018 (!): added support for gAMA, cHRM, sRGB and iCCP chunks. This
+   change is backwards compatible unless you relied on unknown_chunks for those.
+*) 11 jun 2018: less restrictive check for pixel size integer overflow
+*) 14 jan 2018: allow optionally ignoring a few more recoverable errors
+*) 17 sep 2017: fix memory leak for some encoder input error cases
+*) 27 nov 2016: grey+alpha auto color model detection bugfix
+*) 18 apr 2016: Changed qsort to custom stable sort (for platforms w/o qsort).
+*) 09 apr 2016: Fixed colorkey usage detection, and better file loading (within
+   the limits of pure C90).
+*) 08 dec 2015: Made load_file function return error if file can't be opened.
+*) 24 okt 2015: Bugfix with decoding to palette output.
+*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding.
+*) 23 aug 2014: Reduced needless memory usage of decoder.
+*) 28 jun 2014: Removed fix_png setting, always support palette OOB for
+    simplicity. Made ColorProfile public.
+*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization.
+*) 22 dec 2013: Power of two windowsize required for optimization.
+*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
+*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
+*) 11 mar 2013 (!): Bugfix with custom free. Changed from "my" to "lodepng_"
+    prefix for the custom allocators and made it possible with a new #define to
+    use custom ones in your project without needing to change lodepng's code.
+*) 28 jan 2013: Bugfix with color key.
+*) 27 okt 2012: Tweaks in text chunk keyword length error handling.
+*) 8 okt 2012 (!): Added new filter strategy (entropy) and new auto color mode.
+    (no palette). Better deflate tree encoding. New compression tweak settings.
+    Faster color conversions while decoding. Some internal cleanups.
+*) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
+*) 1 sep 2012 (!): Removed #define's for giving custom (de)compression functions
+    and made it work with function pointers instead.
+*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
+    and free functions and toggle #defines from compiler flags. Small fixes.
+*) 6 may 2012 (!): Made plugging in custom zlib/deflate functions more flexible.
+*) 22 apr 2012 (!): Made interface more consistent, renaming a lot. Removed
+    redundant C++ codec classes. Reduced amount of structs. Everything changed,
+    but it is cleaner now imho and functionality remains the same. Also fixed
+    several bugs and shrunk the implementation code. Made new samples.
+*) 6 nov 2011 (!): By default, the encoder now automatically chooses the best
+    PNG color model and bit depth, based on the amount and type of colors of the
+    raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
+*) 9 okt 2011: simpler hash chain implementation for the encoder.
+*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching.
+*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking.
+    A bug with the PNG filtertype heuristic was fixed, so that it chooses much
+    better ones (it's quite significant). A setting to do an experimental, slow,
+    brute force search for PNG filter types is added.
+*) 17 aug 2011 (!): changed some C zlib related function names.
+*) 16 aug 2011: made the code less wide (max 120 characters per line).
+*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
+*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
+*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman
+    to optimize long sequences of zeros.
+*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and
+    LodePNG_InfoColor_canHaveAlpha functions for convenience.
+*) 7 nov 2010: added LodePNG_error_text function to get error code description.
+*) 30 okt 2010: made decoding slightly faster
+*) 26 okt 2010: (!) changed some C function and struct names (more consistent).
+     Reorganized the documentation and the declaration order in the header.
+*) 08 aug 2010: only changed some comments and external samples.
+*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version.
+*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers.
+*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could
+    read by ignoring the problem but windows apps couldn't.
+*) 06 jun 2008: added more error checks for out of memory cases.
+*) 26 apr 2008: added a few more checks here and there to ensure more safety.
+*) 06 mar 2008: crash with encoding of strings fixed
+*) 02 feb 2008: support for international text chunks added (iTXt)
+*) 23 jan 2008: small cleanups, and #defines to divide code in sections
+*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor.
+*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder.
+*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added
+    Also various fixes, such as in the deflate and the padding bits code.
+*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved
+    filtering code of encoder.
+*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A
+    C++ wrapper around this provides an interface almost identical to before.
+    Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code
+    are together in these files but it works both for C and C++ compilers.
+*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks
+*) 30 aug 2007: bug fixed which makes this Borland C++ compatible
+*) 09 aug 2007: some VS2005 warnings removed again
+*) 21 jul 2007: deflate code placed in new namespace separate from zlib code
+*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
+*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
+    invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
+*) 02 jun 2007: made the encoder add a tag with version by default
+*) 27 may 2007: zlib and png code separated (but still in the same file),
+    simple encoder/decoder functions added for more simple usage cases
+*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69),
+    moved some examples from here to lodepng_examples.cpp
+*) 12 may 2007: palette decoding bug fixed
+*) 24 apr 2007: changed the license from BSD to the zlib license
+*) 11 mar 2007: very simple addition: ability to encode bKGD chunks.
+*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding
+    palettized PNG images. Plus little interface change with palette and texts.
+*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes.
+    Fixed a bug where the end code of a block had length 0 in the Huffman tree.
+*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented
+    and supported by the encoder, resulting in smaller PNGs at the output.
+*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone.
+*) 24 jan 2007: gave encoder an error interface. Added color conversion from any
+    greyscale type to 8-bit greyscale with or without alpha.
+*) 21 jan 2007: (!) Totally changed the interface. It allows more color types
+    to convert to and is more uniform. See the manual for how it works now.
+*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days:
+    encode/decode custom tEXt chunks, separate classes for zlib & deflate, and
+    at last made the decoder give errors for incorrect Adler32 or Crc.
+*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel.
+*) 29 dec 2006: Added support for encoding images without alpha channel, and
+    cleaned out code as well as making certain parts faster.
+*) 28 dec 2006: Added "Settings" to the encoder.
+*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now.
+    Removed some code duplication in the decoder. Fixed little bug in an example.
+*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter.
+    Fixed a bug of the decoder with 16-bit per color.
+*) 15 okt 2006: Changed documentation structure
+*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the
+    given image buffer, however for now it's not compressed.
+*) 08 sep 2006: (!) Changed to interface with a Decoder class
+*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different
+    way. Renamed decodePNG to decodePNGGeneric.
+*) 29 jul 2006: (!) Changed the interface: image info is now returned as a
+    struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy.
+*) 28 jul 2006: Cleaned the code and added new error checks.
+    Corrected terminology "deflate" into "inflate".
+*) 23 jun 2006: Added SDL example in the documentation in the header, this
+    example allows easy debugging by displaying the PNG and its transparency.
+*) 22 jun 2006: (!) Changed way to obtain error value. Added
+    loadFile function for convenience. Made decodePNG32 faster.
+*) 21 jun 2006: (!) Changed type of info vector to unsigned.
+    Changed position of palette in info vector. Fixed an important bug that
+    happened on PNGs with an uncompressed block.
+*) 16 jun 2006: Internally changed unsigned into unsigned where
+    needed, and performed some optimizations.
+*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them
+    in LodePNG namespace. Changed the order of the parameters. Rewrote the
+    documentation in the header. Renamed files to lodepng.cpp and lodepng.h
+*) 22 apr 2006: Optimized and improved some code
+*) 07 sep 2005: (!) Changed to std::vector interface
+*) 12 aug 2005: Initial release (C++, decoder only)
+
+
+13. contact information
+-----------------------
+
+Feel free to contact me with suggestions, problems, comments, ... concerning
+LodePNG. If you encounter a PNG image that doesn't work properly with this
+decoder, feel free to send it and I'll use it to find and fix the problem.
+
+My email address is (puzzle the account and domain together with an @ symbol):
+Domain: gmail dot com.
+Account: lode dot vandevenne.
+
+
+Copyright (c) 2005-2018 Lode Vandevenne
+*/
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_benchmark.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_benchmark.cpp
new file mode 100755
index 0000000000..b327226687
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_benchmark.cpp
@@ -0,0 +1,467 @@
+/*
+LodePNG Benchmark
+
+Copyright (c) 2005-2014 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+//g++ lodepng.cpp lodepng_benchmark.cpp -Wall -Wextra -pedantic -ansi -lSDL -O3
+//g++ lodepng.cpp lodepng_benchmark.cpp -Wall -Wextra -pedantic -ansi -lSDL -O3 && ./a.out
+
+#include "lodepng.h"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <sstream>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <SDL/SDL.h> //SDL is used for timing.
+
+#define NUM_DECODE 5 //set to 0 for not benchmarking encoding at all, 1 for normal, higher for decoding multiple times to measure better
+
+double total_dec_time = 0;
+double total_enc_time = 0;
+size_t total_enc_size = 0;
+size_t total_in_size = 0; // This is the uncompressed data in the raw color format
+
+bool verbose = false;
+
+////////////////////////////////////////////////////////////////////////////////
+
+double getTime() {
+  return SDL_GetTicks() / 1000.0;
+}
+
+void fail() {
+  throw 1; //that's how to let a unittest fail
+}
+
+template<typename T, typename U>
+void assertEquals(const T& expected, const U& actual, const std::string& message = "") {
+  if(expected != (T)actual) {
+    std::cout << "Error: Not equal! Expected " << expected << " got " << actual << "." << std::endl;
+    std::cout << "Message: " << message << std::endl;
+    fail();
+  }
+}
+
+void assertTrue(bool value, const std::string& message = "") {
+  if(!value) {
+    std::cout << "Error: expected true." << std::endl;
+    std::cout << "Message: " << message << std::endl;
+    fail();
+  }
+}
+
+//Test image data
+struct Image {
+  std::vector<unsigned char> data;
+  unsigned width;
+  unsigned height;
+  LodePNGColorType colorType;
+  unsigned bitDepth;
+};
+
+//Utility for debug messages
+template<typename T>
+std::string valtostr(const T& val) {
+  std::ostringstream sstream;
+  sstream << val;
+  return sstream.str();
+}
+
+template<typename T>
+void printValue(const std::string& name, const T& value, const std::string& unit = "") {
+  std::cout << name << ": " << value << unit << std::endl;
+}
+
+template<typename T, typename U>
+void printValue(const std::string& name, const T& value, const std::string& s2, const U& value2, const std::string& unit = "") {
+  std::cout << name << ": " << value << s2 << value2 << unit << std::endl;
+}
+
+//Test LodePNG encoding and decoding the encoded result, using the C interface
+void doCodecTest(Image& image) {
+  unsigned char* encoded = 0;
+  size_t encoded_size = 0;
+  unsigned char* decoded = 0;
+  unsigned decoded_w;
+  unsigned decoded_h;
+
+  double t_enc0 = getTime();
+
+  unsigned error_enc = lodepng_encode_memory(&encoded, &encoded_size, &image.data[0],
+                                             image.width, image.height, image.colorType, image.bitDepth);
+
+  double t_enc1 = getTime();
+
+  assertEquals(0, error_enc, "encoder error C");
+
+  double t_dec0 = getTime();
+  for(int i = 0; i < NUM_DECODE; i++) {
+    unsigned error_dec = lodepng_decode_memory(&decoded, &decoded_w, &decoded_h,
+                                               encoded, encoded_size, image.colorType, image.bitDepth);
+    assertEquals(0, error_dec, "decoder error C");
+  }
+  double t_dec1 = getTime();
+
+
+  assertEquals(image.width, decoded_w);
+  assertEquals(image.height, decoded_h);
+
+  total_enc_size += encoded_size;
+  total_enc_time += (t_enc1 - t_enc0);
+  total_dec_time += (t_dec1 - t_dec0);
+  LodePNGColorMode colormode;
+  colormode.colortype = image.colorType;
+  colormode.bitdepth = image.bitDepth;
+  total_in_size += lodepng_get_raw_size(image.width, image.height, &colormode);
+
+  if(verbose) {
+    printValue("encoding time", t_enc1 - t_enc0, "s");
+    std::cout << "compression: " << ((double)(encoded_size) / (double)(image.data.size())) * 100 << "%"
+              << " ratio: " << ((double)(image.data.size()) / (double)(encoded_size))
+              << " size: " << encoded_size << std::endl;
+    if(NUM_DECODE> 0) printValue("decoding time", t_dec1 - t_dec0, "/", NUM_DECODE, " s");
+    std::cout << std::endl;
+  }
+
+  //LodePNG_saveFile(encoded, encoded_size, "test.png");
+
+  free(encoded);
+  free(decoded);
+}
+
+static const int IMGSIZE = 4096;
+
+void testPatternSine() {
+  if(verbose) std::cout << "sine pattern" << std::endl;
+
+  /*
+  There's something annoying about this pattern: it encodes worse, slower and with worse compression,
+  when adjusting the parameters, while all other images go faster and higher compression, and vice versa.
+  It responds opposite to optimizations...
+  */
+
+  Image image;
+  int w = IMGSIZE / 2;
+  int h = IMGSIZE / 2;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_RGBA;
+  image.bitDepth = 8;
+  image.data.resize(w * h * 4);
+  for(int y = 0; y < h; y++)
+  for(int x = 0; x < w; x++) {
+    //pattern 1
+    image.data[4 * w * y + 4 * x + 0] = (unsigned char)(127 * (1 + std::sin((                    x * x +                     y * y) / (w * h / 8.0))));
+    image.data[4 * w * y + 4 * x + 1] = (unsigned char)(127 * (1 + std::sin(((w - x - 1) * (w - x - 1) +                     y * y) / (w * h / 8.0))));
+    image.data[4 * w * y + 4 * x + 2] = (unsigned char)(127 * (1 + std::sin((                    x * x + (h - y - 1) * (h - y - 1)) / (w * h / 8.0))));
+    image.data[4 * w * y + 4 * x + 3] = (unsigned char)(127 * (1 + std::sin(((w - x - 1) * (w - x - 1) + (h - y - 1) * (h - y - 1)) / (w * h / 8.0))));
+  }
+
+  doCodecTest(image);
+}
+
+void testPatternSineNoAlpha() {
+  if(verbose) std::cout << "sine pattern w/o alpha" << std::endl;
+
+  /*
+  There's something annoying about this pattern: it encodes worse, slower and with worse compression,
+  when adjusting the parameters, while all other images go faster and higher compression, and vice versa.
+  It responds opposite to optimizations...
+  */
+
+  Image image;
+  int w = IMGSIZE / 2;
+  int h = IMGSIZE / 2;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_RGB;
+  image.bitDepth = 8;
+  image.data.resize(w * h * 3);
+  for(int y = 0; y < h; y++)
+  for(int x = 0; x < w; x++) {
+    //pattern 1
+    image.data[3 * w * y + 3 * x + 0] = (unsigned char)(127 * (1 + std::sin((                    x * x +                     y * y) / (w * h / 8.0))));
+    image.data[3 * w * y + 3 * x + 1] = (unsigned char)(127 * (1 + std::sin(((w - x - 1) * (w - x - 1) +                     y * y) / (w * h / 8.0))));
+    image.data[3 * w * y + 3 * x + 2] = (unsigned char)(127 * (1 + std::sin((                    x * x + (h - y - 1) * (h - y - 1)) / (w * h / 8.0))));
+  }
+
+  doCodecTest(image);
+}
+
+void testPatternXor() {
+  if(verbose) std::cout << "xor pattern" << std::endl;
+
+  Image image;
+  int w = IMGSIZE;
+  int h = IMGSIZE;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_RGB;
+  image.bitDepth = 8;
+  image.data.resize(w * h * 3);
+  for(int y = 0; y < h; y++)
+  for(int x = 0; x < w; x++) {
+    image.data[3 * w * y + 3 * x + 0] = x ^ y;
+    image.data[3 * w * y + 3 * x + 1] = x ^ y;
+    image.data[3 * w * y + 3 * x + 2] = x ^ y;
+  }
+
+  doCodecTest(image);
+}
+
+static unsigned int m_w = 1;
+static unsigned int m_z = 2;
+
+//"Multiply-With-Carry" generator of G. Marsaglia
+unsigned int getRandomUint() {
+  m_z = 36969 * (m_z & 65535) + (m_z >> 16);
+  m_w = 18000 * (m_w & 65535) + (m_w >> 16);
+  return (m_z << 16) + m_w;  //32-bit result
+}
+
+void testPatternPseudoRan() {
+  if(verbose) std::cout << "pseudorandom pattern" << std::endl;
+
+  Image image;
+  int w = IMGSIZE / 2;
+  int h = IMGSIZE / 2;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_RGB;
+  image.bitDepth = 8;
+  image.data.resize(w * h * 3);
+  for(int y = 0; y < h; y++)
+  for(int x = 0; x < w; x++) {
+    unsigned int random = getRandomUint();
+    image.data[3 * w * y + 3 * x + 0] = random % 256;
+    image.data[3 * w * y + 3 * x + 1] = (random >> 8) % 256;
+    image.data[3 * w * y + 3 * x + 2] = (random >> 16) % 256;
+  }
+
+  doCodecTest(image);
+}
+
+void testPatternSineXor() {
+  if(verbose) std::cout << "sine+xor pattern" << std::endl;
+
+  Image image;
+  int w = IMGSIZE / 2;
+  int h = IMGSIZE / 2;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_RGBA;
+  image.bitDepth = 8;
+  image.data.resize(w * h * 4);
+  for(int y = 0; y < h; y++)
+  for(int x = 0; x < w; x++) {
+    //pattern 1
+    image.data[4 * w * y + 4 * x + 0] = (unsigned char)(127 * (1 + std::sin((                    x * x +                     y * y) / (w * h / 8.0))));
+    image.data[4 * w * y + 4 * x + 1] = (unsigned char)(127 * (1 + std::sin(((w - x - 1) * (w - x - 1) +                     y * y) / (w * h / 8.0))));
+    image.data[4 * w * y + 4 * x + 2] = (unsigned char)(127 * (1 + std::sin((                    x * x + (h - y - 1) * (h - y - 1)) / (w * h / 8.0))));
+    image.data[4 * w * y + 4 * x + 3] = (unsigned char)(127 * (1 + std::sin(((w - x - 1) * (w - x - 1) + (h - y - 1) * (h - y - 1)) / (w * h / 8.0))));
+    image.data[4 * w * y + 4 * x + 0] = image.data[4 * w * y + 4 * x + 0] / 2 + ((x ^ y) % 256) / 2;
+    image.data[4 * w * y + 4 * x + 1] = image.data[4 * w * y + 4 * x + 1] / 2 + ((x ^ y) % 256) / 2;
+    image.data[4 * w * y + 4 * x + 2] = image.data[4 * w * y + 4 * x + 2] / 2 + ((x ^ y) % 256) / 2;
+    image.data[4 * w * y + 4 * x + 3] = image.data[4 * w * y + 4 * x + 3] / 2 + ((x ^ y) % 256) / 2;
+  }
+
+  doCodecTest(image);
+}
+
+void testPatternGreyMandel() {
+  if(verbose) std::cout << "grey mandelbrot pattern" << std::endl;
+
+  Image image;
+  int w = IMGSIZE / 2;
+  int h = IMGSIZE / 2;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_GREY;
+  image.bitDepth = 8;
+  image.data.resize(w * h);
+
+  double pr, pi;
+  double newRe, newIm, oldRe, oldIm;
+  // go to a position in the mandelbrot where there's lots of entropy
+  double zoom = 1779.8, moveX = -0.7431533999637661, moveY = -0.1394057861346605;
+  int maxIterations = 300;
+
+  for(int y = 0; y < h; y++)
+  for(int x = 0; x < w; x++) {
+    pr = 1.5 * (x - w / 2) / (0.5 * zoom * w) + moveX;
+    pi = (y - h / 2) / (0.5 * zoom * h) + moveY;
+    newRe = newIm = oldRe = oldIm = 0; //these should start at 0,0
+    int i;
+    for(i = 0; i < maxIterations; i++) {
+        oldRe = newRe;
+        oldIm = newIm;
+        newRe = oldRe * oldRe - oldIm * oldIm + pr;
+        newIm = 2 * oldRe * oldIm + pi;
+        if((newRe * newRe + newIm * newIm) > 4) break;
+    }
+    image.data[w * y + x] = i % 256;
+  }
+
+  doCodecTest(image);
+}
+
+void testPatternGreyMandelSmall() {
+  if(verbose) std::cout << "grey mandelbrot pattern" << std::endl;
+
+  Image image;
+  int w = IMGSIZE / 8;
+  int h = IMGSIZE / 8;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_GREY;
+  image.bitDepth = 8;
+  image.data.resize(w * h);
+
+  double pr, pi;
+  double newRe, newIm, oldRe, oldIm;
+  // go to a position in the mandelbrot where there's lots of entropy
+  double zoom = 1779.8, moveX = -0.7431533999637661, moveY = -0.1394057861346605;
+  int maxIterations = 300;
+
+  for(int y = 0; y < h; y++)
+  for(int x = 0; x < w; x++) {
+    pr = 1.5 * (x - w / 2) / (0.5 * zoom * w) + moveX;
+    pi = (y - h / 2) / (0.5 * zoom * h) + moveY;
+    newRe = newIm = oldRe = oldIm = 0; //these should start at 0,0
+    int i;
+    for(i = 0; i < maxIterations; i++) {
+        oldRe = newRe;
+        oldIm = newIm;
+        newRe = oldRe * oldRe - oldIm * oldIm + pr;
+        newIm = 2 * oldRe * oldIm + pi;
+        if((newRe * newRe + newIm * newIm) > 4) break;
+    }
+    image.data[w * y + x] = i % 256;
+  }
+
+  doCodecTest(image);
+}
+
+void testPatternX() {
+  if(verbose) std::cout << "x pattern" << std::endl;
+
+  Image image;
+  int w = IMGSIZE;
+  int h = IMGSIZE;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_GREY;
+  image.bitDepth = 8;
+  image.data.resize(w * h * 4);
+  for(int y = 0; y < h; y++)
+  for(int x = 0; x < w; x++) {
+    image.data[w * y + x + 0] = x % 256;
+  }
+
+  doCodecTest(image);
+}
+
+void testPatternY() {
+  if(verbose) std::cout << "y pattern" << std::endl;
+
+  Image image;
+  int w = IMGSIZE;
+  int h = IMGSIZE;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_GREY;
+  image.bitDepth = 8;
+  image.data.resize(w * h * 4);
+  for(int y = 0; y < h; y++)
+  for(int x = 0; x < w; x++) {
+    image.data[w * y + x + 0] = y % 256;
+  }
+
+  doCodecTest(image);
+}
+
+void testPatternDisk(const std::string& filename) {
+  if(verbose) std::cout << "file " << filename << std::endl;
+
+  Image image;
+  image.colorType = LCT_RGB;
+  image.bitDepth = 8;
+  lodepng::decode(image.data, image.width, image.height, filename, image.colorType, image.bitDepth);
+
+  doCodecTest(image);
+}
+
+int main(int argc, char *argv[]) {
+  verbose = false;
+
+  std::vector<std::string> files;
+
+  for(int i = 1; i < argc; i++) {
+    std::string arg = argv[i];
+    if(arg == "-v") verbose = true;
+    else files.push_back(arg);
+  }
+
+  std::cout << "NUM_DECODE: " << NUM_DECODE << std::endl;
+
+  if(files.empty()) {
+    //testPatternDisk("testdata/frymire.png");
+    //testPatternGreyMandel();
+
+    testPatternDisk("testdata/Ecce_homo_by_Hieronymus_Bosch.png");
+    testPatternDisk("testdata/ephyse_franco-chon-s-butchery.png");
+    testPatternDisk("testdata/jwbalsley_subway-rats.png");
+    testPatternDisk("testdata/Biomenace_complete.png");
+    testPatternDisk("testdata/frymire.png");
+    testPatternDisk("testdata/lena.png");
+    testPatternDisk("testdata/linedrawing.png");
+    //testPatternSine();
+    //testPatternSineNoAlpha();
+    testPatternXor();
+    testPatternPseudoRan();
+    //testPatternSineXor();
+    testPatternGreyMandel();
+    //testPatternX();
+    //testPatternY();
+    //testPatternDisk("Data/purplesmall.png");
+
+    /*testPatternDisk("testdata/Ecce_homo_by_Hieronymus_Bosch.png");
+    testPatternSine();*/
+  } else {
+    for(size_t i = 0; i < files.size(); i++) {
+      testPatternDisk(files[i]);
+    }
+  }
+
+  std::cout << "Total decoding time: " << total_dec_time/NUM_DECODE << "s (" << ((total_in_size/1024.0/1024.0)/(total_dec_time/NUM_DECODE)) << " MB/s)" << std::endl;
+  std::cout << "Total encoding time: " << total_enc_time << "s (" << ((total_in_size/1024.0/1024.0)/(total_enc_time)) << " MB/s)" << std::endl;
+  std::cout << "Total uncompressed size  : " << total_in_size << std::endl;
+  std::cout << "Total encoded size: " << total_enc_size << " (" << (100.0 * total_enc_size / total_in_size) << "%)" << std::endl;
+
+  if(verbose) std::cout << "benchmark done" << std::endl;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_unittest.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_unittest.cpp
new file mode 100755
index 0000000000..da996fdf89
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_unittest.cpp
@@ -0,0 +1,2504 @@
+/*
+LodePNG Unit Test
+
+Copyright (c) 2005-2018 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+//g++ lodepng.cpp lodepng_util.cpp lodepng_unittest.cpp -Wall -Wextra -Wsign-conversion -pedantic -ansi -O3
+
+/*
+Testing instructions:
+
+*) Ensure no tests commented out below or early return in doMain
+
+*) Compile with g++ or clang++ with all warnings and run the unit test
+g++ lodepng.cpp lodepng_util.cpp lodepng_unittest.cpp -Wall -Wextra -Wsign-conversion -Wshadow -pedantic -ansi -O3 && ./a.out
+clang++ lodepng.cpp lodepng_util.cpp lodepng_unittest.cpp -Wall -Wextra -Wsign-conversion -Wshadow -pedantic -ansi -O3 && ./a.out
+
+*) Compile with pure ISO C90 and all warnings:
+mv lodepng.cpp lodepng.c ; gcc -I ./ lodepng.c examples/example_decode.c -ansi -pedantic -Wall -Wextra -O3 ; mv lodepng.c lodepng.cpp
+
+*) Compile with C with -pedantic but not -ansi flag so it warns about // style comments in C++-only ifdefs
+mv lodepng.cpp lodepng.c ; gcc -I ./ lodepng.c examples/example_decode.c -pedantic -Wall -Wextra -O3 ; mv lodepng.c lodepng.cpp
+
+*) try lodepng_benchmark.cpp
+g++ lodepng.cpp lodepng_benchmark.cpp -Wall -Wextra -pedantic -ansi -lSDL -O3 && ./a.out
+g++ lodepng.cpp lodepng_benchmark.cpp -Wall -Wextra -pedantic -ansi -lSDL -O3 && ./a.out corpus/''*
+
+*) Check if all examples compile without warnings:
+g++ -I ./ lodepng.cpp examples/''*.cpp -W -Wall -ansi -pedantic -O3 -c
+mv lodepng.cpp lodepng.c ; gcc -I ./ lodepng.c examples/''*.c -W -Wall -ansi -pedantic -O3 -c ; mv lodepng.c lodepng.cpp
+
+*) Check pngdetail.cpp:
+g++ lodepng.cpp lodepng_util.cpp pngdetail.cpp -W -Wall -ansi -pedantic -O3 -o pngdetail
+./pngdetail testdata/PngSuite/basi0g01.png
+
+*) Test compiling with some code sections with #defines disabled, for unused static function warnings etc...
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_ZLIB
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_PNG
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_DECODER
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_ENCODER
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_DISK
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_ERROR_TEXT
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_CPP
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_ZLIB -DLODEPNG_NO_COMPILE_DECODER
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_ZLIB -DLODEPNG_NO_COMPILE_ENCODER
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_PNG -DLODEPNG_NO_COMPILE_DECODER
+g++ lodepng.cpp -W -Wall -ansi -pedantic -O3 -c -DLODEPNG_NO_COMPILE_PNG -DLODEPNG_NO_COMPILE_ENCODER
+rm *.o
+
+*) analyze with clang:
+clang++ lodepng.cpp --analyze
+More verbose:
+clang++ --analyze -Xanalyzer -analyzer-output=text lodepng.cpp
+Or html, look under lodepng.plist dir afterwards and find the numbered locations in the pages:
+clang++ --analyze -Xanalyzer -analyzer-output=html lodepng.cpp
+
+*) check for memory leaks and vulnerabilities with valgrind
+(DISABLE_SLOW disables a few tests that are very slow with valgrind)
+g++ -DDISABLE_SLOW lodepng.cpp lodepng_util.cpp lodepng_unittest.cpp -Wall -Wextra -pedantic -ansi -O3 -DLODEPNG_MAX_ALLOC=100000000 && valgrind --leak-check=full --track-origins=yes ./a.out
+
+*) Try with clang++ and address sanitizer (to get line numbers, make sure 'llvm' is also installed to get 'llvm-symbolizer'
+clang++ -fsanitize=address lodepng.cpp lodepng_util.cpp lodepng_unittest.cpp -Wall -Wextra -Wshadow -pedantic -ansi -O3 && ASAN_OPTIONS=allocator_may_return_null=1 ./a.out
+clang++ -fsanitize=address lodepng.cpp lodepng_util.cpp lodepng_unittest.cpp -Wall -Wextra -Wshadow -pedantic -ansi -g3 && ASAN_OPTIONS=allocator_may_return_null=1 ./a.out
+
+*) remove "#include <iostream>" from lodepng.cpp if it's still in there
+cat lodepng.cpp | grep iostream
+cat lodepng.cpp | grep "#include"
+
+*) check that no plain "free", "malloc" and "realloc" used, but the lodepng_* versions instead
+
+*) check version dates in copyright message and LODEPNG_VERSION_STRING
+
+*) check year in copyright message at top of all files as well as at bottom of lodepng.h
+
+*) check examples/sdl.cpp with the png test suite images (the "x" ones are expected to show error)
+g++ -I ./ lodepng.cpp examples/example_sdl.cpp -Wall -Wextra -pedantic -ansi -O3 -lSDL -o showpng && ./showpng testdata/PngSuite/''*.png
+
+*) strip trailing spaces and ensure consistent newlines
+
+*) check diff of lodepng.cpp and lodepng.h before submitting
+git difftool -y
+
+*/
+
+#include "lodepng.h"
+#include "lodepng_util.h"
+
+#include <cmath>
+#include <map>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+////////////////////////////////////////////////////////////////////////////////
+
+void fail() {
+  throw 1; //that's how to let a unittest fail
+}
+
+template<typename T, typename U>
+void assertEquals(const T& expected, const U& actual, const std::string& message = "") {
+  if(expected != (T)actual) {
+    std::cout << "Error: Not equal! Expected " << expected << " got " << (T)actual << ". "
+              << "Message: " << message << std::endl;
+    fail();
+  }
+}
+
+template<typename T, typename U>
+void assertNotEquals(const T& expected, const U& actual, const std::string& message = "") {
+  if(expected == (T)actual) {
+    std::cout << "Error: Equal but expected not equal! Expected not " << expected << " got " << (T)actual << ". "
+              << "Message: " << message << std::endl;
+    fail();
+  }
+}
+
+void assertTrue(bool value, const std::string& message = "") {
+  if(!value) {
+    std::cout << "Error: expected true. " << "Message: " << message << std::endl;
+    fail();
+  }
+}
+
+//assert that no error
+void assertNoPNGError(unsigned error, const std::string& message = "") {
+  if(error) {
+    std::string msg = (message == "") ? lodepng_error_text(error)
+                                      : message + std::string(": ") + lodepng_error_text(error);
+    assertEquals(0, error, msg);
+  }
+}
+
+void assertNoError(unsigned error) {
+  if(error) {
+    assertEquals(0, error, "Expected no error");
+  }
+}
+
+#define STR_EXPAND(s) #s
+#define STR(s) STR_EXPAND(s)
+#define ASSERT_EQUALS(e, v) {\
+  assertEquals(e, v, std::string() + "line " + STR(__LINE__) + ": " + STR(v) + " ASSERT_EQUALS(" + #e + ", " + #v + ")");\
+}
+#define ASSERT_NOT_EQUALS(e, v) {\
+  assertNotEquals(e, v, std::string() + "line " + STR(__LINE__) + ": " + STR(v) + " ASSERT_NOT_EQUALS(" + #e + ", " + #v + ")");\
+}
+#define ASSERT_STRING_EQUALS(e, v) ASSERT_EQUALS(std::string(e), std::string(v))
+#define ASSERT_NO_PNG_ERROR_MSG(error, message) assertNoPNGError(error, std::string("line ") + STR(__LINE__) + (std::string(message).empty() ? std::string("") : (": " + std::string(message))))
+#define ASSERT_NO_PNG_ERROR(error) ASSERT_NO_PNG_ERROR_MSG(error, std::string(""))
+
+static const std::string BASE64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+
+
+//T and U can be std::string or std::vector<unsigned char>
+template<typename T, typename U>
+void toBase64(T& out, const U& in) {
+  for(size_t i = 0; i < in.size(); i += 3) {
+    int v = 65536 * in[i];
+    if(i + 1 < in.size()) v += 256 * in[i + 1];
+    if(i + 2 < in.size()) v += in[i + 2];
+    out.push_back(BASE64[(v >> 18) & 0x3f]);
+    out.push_back(BASE64[(v >> 12) & 0x3f]);
+    if(i + 1 < in.size()) out.push_back(BASE64[(v >> 6) & 0x3f]);
+    else out.push_back('=');
+    if(i + 2 < in.size()) out.push_back(BASE64[(v >> 0) & 0x3f]);
+    else out.push_back('=');
+  }
+}
+
+int fromBase64(int v) {
+  if(v >= 'A' && v <= 'Z') return (v - 'A');
+  if(v >= 'a' && v <= 'z') return (v - 'a' + 26);
+  if(v >= '0' && v <= '9') return (v - '0' + 52);
+  if(v == '+') return 62;
+  if(v == '/') return 63;
+  return 0; //v == '='
+}
+
+//T and U can be std::string or std::vector<unsigned char>
+template<typename T, typename U>
+void fromBase64(T& out, const U& in) {
+  for(size_t i = 0; i + 3 < in.size(); i += 4) {
+    int v = 262144 * fromBase64(in[i]) + 4096 * fromBase64(in[i + 1]) + 64 * fromBase64(in[i + 2]) + fromBase64(in[i + 3]);
+    out.push_back((v >> 16) & 0xff);
+    if(in[i + 2] != '=') out.push_back((v >> 8) & 0xff);
+    if(in[i + 3] != '=') out.push_back((v >> 0) & 0xff);
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+//Test image data
+struct Image {
+  std::vector<unsigned char> data;
+  unsigned width;
+  unsigned height;
+  LodePNGColorType colorType;
+  unsigned bitDepth;
+};
+
+//Utility for debug messages
+template<typename T>
+std::string valtostr(const T& val) {
+  std::ostringstream sstream;
+  sstream << val;
+  return sstream.str();
+}
+
+//Get number of color channels for a given PNG color type
+unsigned getNumColorChannels(unsigned colorType) {
+  switch(colorType) {
+    case 0: return 1; /*grey*/
+    case 2: return 3; /*RGB*/
+    case 3: return 1; /*palette*/
+    case 4: return 2; /*grey + alpha*/
+    case 6: return 4; /*RGBA*/
+  }
+  return 0; /*unexisting color type*/
+}
+
+//Generate a test image with some data in it, the contents of the data is unspecified,
+//except the content is not just one plain color, and not true random either to be compressible.
+void generateTestImage(Image& image, unsigned width, unsigned height, LodePNGColorType colorType = LCT_RGBA, unsigned bitDepth = 8) {
+  image.width = width;
+  image.height = height;
+  image.colorType = colorType;
+  image.bitDepth = bitDepth;
+
+  size_t bits = bitDepth * getNumColorChannels(colorType); //bits per pixel
+  size_t size = (width * height * bits + 7) / 8; //total image size in bytes
+  image.data.resize(size);
+  unsigned char value = 128;
+  for(size_t i = 0; i < size; i++) {
+    image.data[i] = value++;
+  }
+}
+
+//Generate a 16-bit test image with minimal size that requires at minimum the given color type (bit depth, greyscaleness, ...)
+//If key is true, makes it such that exactly one color is transparent, so it can use a key. If false, adds a translucent color depending on
+//whether it's an alpha color type or not.
+void generateTestImageRequiringColorType16(Image& image, LodePNGColorType colorType, unsigned bitDepth, bool key) {
+  image.colorType = colorType;
+  image.bitDepth = bitDepth;
+  unsigned w = 1;
+  unsigned h = 1;
+
+  bool grey = colorType == LCT_GREY || colorType == LCT_GREY_ALPHA;
+  bool alpha = colorType == LCT_RGBA || colorType == LCT_GREY_ALPHA;
+
+  if(colorType == LCT_PALETTE) {
+    w = 1u << bitDepth;
+    h = 256; // ensure it'll really choose palette, not omit it due to small image size
+    image.data.resize(w * h * 8);
+    for(size_t y = 0; y < h; y++) {
+      for(size_t x = 0; x < w; x++) {
+        size_t i = y * w * 8 + x * 8;
+        image.data[i + 0] = image.data[i + 1] = y;
+        image.data[i + 2] = image.data[i + 3] = 255;
+        image.data[i + 4] = image.data[i + 5] = 0;
+        image.data[i + 6] = image.data[i + 7] = (key && y == 0) ? 0 : 255;
+      }
+    }
+  } else if(bitDepth == 16) {
+    // one color suffices for this model. But add one more to support key.
+    w = 2;
+    image.data.resize(w * h * 8);
+    image.data[0] = 10; image.data[1] = 20;
+    image.data[2] = 10; image.data[3] = 20;
+    image.data[4] = grey ? 10 : 110; image.data[5] = grey ? 20 : 120;
+    image.data[6] = alpha ? 128 : 255; image.data[7] = alpha ? 20 : 255;
+
+    image.data[8] = 40; image.data[9] = 50;
+    image.data[10] = 40; image.data[11] = 50;
+    image.data[12] = grey ? 40 : 140; image.data[13] = grey ? 50 : 150;
+    image.data[14] = key ? 0 : 255; image.data[15] = key ? 0 : 255;
+  } else if(grey) {
+    w = 2;
+    unsigned v = 255u / ((1u << bitDepth) - 1u); // value that forces at least this bitdepth
+    image.data.resize(w * h * 8);
+    image.data[0] = v; image.data[1] = v;
+    image.data[2] = v; image.data[3] = v;
+    image.data[4] = v; image.data[5] = v;
+    image.data[6] = alpha ? v : 255; image.data[7] = alpha ? v : 255;
+
+    image.data[8] = image.data[9] = 0;
+    image.data[10] = image.data[11] = 0;
+    image.data[12] = image.data[13] = 0;
+    image.data[14] = image.data[15] = key ? 0 : 255;
+  } else {
+    // now it's RGB or RGBA with bitdepth 8
+    w = 257; // must have at least more than 256 colors so it won't use palette
+    image.data.resize(w * h * 8);
+    for(size_t y = 0; y < h; y++) {
+      for(size_t x = 0; x < w; x++) {
+        size_t i = y * w * 8 + x * 8;
+        image.data[i + 0] = image.data[i + 1] = i / 2;
+        image.data[i + 2] = image.data[i + 3] = i / 3;
+        image.data[i + 4] = image.data[i + 5] = i / 5;
+        image.data[i + 6] = image.data[i + 7] = (key && y == 0) ? 0 : (alpha ? i : 255);
+      }
+    }
+  }
+
+  image.width = w;
+  image.height = h;
+}
+
+//Generate a 8-bit test image with minimal size that requires at minimum the given color type (bit depth, greyscaleness, ...). bitDepth max 8 here.
+//If key is true, makes it such that exactly one color is transparent, so it can use a key. If false, adds a translucent color depending on
+//whether it's an alpha color type or not.
+void generateTestImageRequiringColorType8(Image& image, LodePNGColorType colorType, unsigned bitDepth, bool key) {
+  image.colorType = colorType;
+  image.bitDepth = bitDepth;
+  unsigned w = 1;
+  unsigned h = 1;
+
+  bool grey = colorType == LCT_GREY || colorType == LCT_GREY_ALPHA;
+  bool alpha = colorType == LCT_RGBA || colorType == LCT_GREY_ALPHA;
+
+  if(colorType == LCT_PALETTE) {
+    w = 1u << bitDepth;
+    h = 256; // ensure it'll really choose palette, not omit it due to small image size
+    image.data.resize(w * h * 4);
+    for(size_t y = 0; y < h; y++) {
+      for(size_t x = 0; x < w; x++) {
+        size_t i = y * w * 4 + x * 4;
+        image.data[i + 0] = x;
+        image.data[i + 1] = 255;
+        image.data[i + 2] = 0;
+        image.data[i + 3] = (key && x == 0) ? 0 : 255;
+      }
+    }
+  } else if(grey) {
+    w = 2;
+    unsigned v = 255u / ((1u << bitDepth) - 1u); // value that forces at least this bitdepth
+    image.data.resize(w * h * 4);
+    image.data[0] = v;
+    image.data[1] = v;
+    image.data[2] = v;
+    image.data[3] = alpha ? v : 255;
+
+    image.data[4] = 0;
+    image.data[5] = 0;
+    image.data[6] = 0;
+    image.data[7] = key ? 0 : 255;
+  } else {
+    // now it's RGB or RGBA with bitdepth 8
+    w = 257; // must have at least more than 256 colors so it won't use palette
+    image.data.resize(w * h * 4);
+    for(size_t y = 0; y < h; y++) {
+      for(size_t x = 0; x < w; x++) {
+        size_t i = y * w * 4 + x * 4;
+        image.data[i + 0] = i / 2;
+        image.data[i + 1] = i / 3;
+        image.data[i + 2] = i / 5;
+        image.data[i + 3] = (key && x == 0) ? 0 : (alpha ? i : 255);
+      }
+    }
+  }
+
+  image.width = w;
+  image.height = h;
+}
+
+//Check that the decoded PNG pixels are the same as the pixels in the image
+void assertPixels(Image& image, const unsigned char* decoded, const std::string& message) {
+  for(size_t i = 0; i < image.data.size(); i++) {
+    int byte_expected = image.data[i];
+    int byte_actual = decoded[i];
+
+    //last byte is special due to possible random padding bits which need not to be equal
+    if(i == image.data.size() - 1) {
+      size_t numbits = getNumColorChannels(image.colorType) * image.bitDepth * image.width * image.height;
+      size_t padding = 8u - (numbits - 8u * (numbits / 8u));
+      if(padding != 8u) {
+        //set all padding bits of both to 0
+        for(size_t j = 0; j < padding; j++) {
+          byte_expected = (byte_expected & (~(1 << j))) % 256;
+          byte_actual = (byte_actual & (~(1 << j))) % 256;
+        }
+      }
+    }
+
+    assertEquals(byte_expected, byte_actual, message + " " + valtostr(i));
+  }
+}
+
+//Test LodePNG encoding and decoding the encoded result, using the C interface
+void doCodecTestC(Image& image) {
+  unsigned char* encoded = 0;
+  size_t encoded_size = 0;
+  unsigned char* decoded = 0;
+  unsigned decoded_w;
+  unsigned decoded_h;
+
+  struct OnExitScope {
+    unsigned char** a;
+    unsigned char** b;
+    OnExitScope(unsigned char** ca, unsigned char** cb) : a(ca), b(cb) {}
+    ~OnExitScope() { free(*a); free(*b); }
+  } onExitScope(&encoded, &decoded);
+
+  unsigned error_enc = lodepng_encode_memory(&encoded, &encoded_size, &image.data[0],
+                                             image.width, image.height, image.colorType, image.bitDepth);
+
+  if(error_enc != 0) std::cout << "Error: " << lodepng_error_text(error_enc) << std::endl;
+  ASSERT_NO_PNG_ERROR_MSG(error_enc, "encoder error C");
+
+  //if the image is large enough, compressing it should result in smaller size
+  if(image.data.size() > 512) assertTrue(encoded_size < image.data.size(), "compressed size");
+
+  unsigned error_dec = lodepng_decode_memory(&decoded, &decoded_w, &decoded_h,
+                                             encoded, encoded_size, image.colorType, image.bitDepth);
+
+  if(error_dec != 0) std::cout << "Error: " << lodepng_error_text(error_dec) << std::endl;
+  ASSERT_NO_PNG_ERROR_MSG(error_dec, "decoder error C");
+
+  ASSERT_EQUALS(image.width, decoded_w);
+  ASSERT_EQUALS(image.height, decoded_h);
+  assertPixels(image, decoded, "Pixels C");
+}
+
+//Test LodePNG encoding and decoding the encoded result, using the C++ interface
+void doCodecTestCPP(Image& image) {
+  std::vector<unsigned char> encoded;
+  std::vector<unsigned char> decoded;
+  unsigned decoded_w;
+  unsigned decoded_h;
+
+  unsigned error_enc = lodepng::encode(encoded, image.data, image.width, image.height,
+                                       image.colorType, image.bitDepth);
+
+  ASSERT_NO_PNG_ERROR_MSG(error_enc, "encoder error C++");
+
+  //if the image is large enough, compressing it should result in smaller size
+  if(image.data.size() > 512) assertTrue(encoded.size() < image.data.size(), "compressed size");
+
+  unsigned error_dec = lodepng::decode(decoded, decoded_w, decoded_h, encoded, image.colorType, image.bitDepth);
+
+  ASSERT_NO_PNG_ERROR_MSG(error_dec, "decoder error C++");
+
+  ASSERT_EQUALS(image.width, decoded_w);
+  ASSERT_EQUALS(image.height, decoded_h);
+  ASSERT_EQUALS(image.data.size(), decoded.size());
+  assertPixels(image, &decoded[0], "Pixels C++");
+}
+
+
+void doCodecTestWithEncState(Image& image, lodepng::State& state) {
+  std::vector<unsigned char> encoded;
+  std::vector<unsigned char> decoded;
+  unsigned decoded_w;
+  unsigned decoded_h;
+  state.info_raw.colortype = image.colorType;
+  state.info_raw.bitdepth = image.bitDepth;
+
+
+  unsigned error_enc = lodepng::encode(encoded, image.data, image.width, image.height, state);
+  ASSERT_NO_PNG_ERROR_MSG(error_enc, "encoder error uncompressed");
+
+  unsigned error_dec = lodepng::decode(decoded, decoded_w, decoded_h, encoded, image.colorType, image.bitDepth);
+
+  ASSERT_NO_PNG_ERROR_MSG(error_dec, "decoder error uncompressed");
+
+  ASSERT_EQUALS(image.width, decoded_w);
+  ASSERT_EQUALS(image.height, decoded_h);
+  ASSERT_EQUALS(image.data.size(), decoded.size());
+  assertPixels(image, &decoded[0], "Pixels uncompressed");
+}
+
+
+//Test LodePNG encoding and decoding the encoded result, using the C++ interface
+void doCodecTestUncompressed(Image& image) {
+  lodepng::State state;
+  state.encoder.zlibsettings.btype = 0;
+  doCodecTestWithEncState(image, state);
+}
+
+void doCodecTestNoLZ77(Image& image) {
+  lodepng::State state;
+  state.encoder.zlibsettings.use_lz77 = 0;
+  doCodecTestWithEncState(image, state);
+}
+
+//Test LodePNG encoding and decoding the encoded result, using the C++ interface, with interlace
+void doCodecTestInterlaced(Image& image) {
+  std::vector<unsigned char> encoded;
+  std::vector<unsigned char> decoded;
+  unsigned decoded_w;
+  unsigned decoded_h;
+
+  lodepng::State state;
+  state.info_png.interlace_method = 1;
+  state.info_raw.colortype = image.colorType;
+  state.info_raw.bitdepth = image.bitDepth;
+
+  unsigned error_enc = lodepng::encode(encoded, image.data, image.width, image.height, state);
+
+  ASSERT_NO_PNG_ERROR_MSG(error_enc, "encoder error interlaced");
+
+  //if the image is large enough, compressing it should result in smaller size
+  if(image.data.size() > 512) assertTrue(encoded.size() < image.data.size(), "compressed size");
+
+  state.info_raw.colortype = image.colorType;
+  state.info_raw.bitdepth = image.bitDepth;
+  unsigned error_dec = lodepng::decode(decoded, decoded_w, decoded_h, state, encoded);
+
+  ASSERT_NO_PNG_ERROR_MSG(error_dec, "decoder error interlaced");
+
+  ASSERT_EQUALS(image.width, decoded_w);
+  ASSERT_EQUALS(image.height, decoded_h);
+  ASSERT_EQUALS(image.data.size(), decoded.size());
+  assertPixels(image, &decoded[0], "Pixels interlaced");
+}
+
+//Test LodePNG encoding and decoding the encoded result
+void doCodecTest(Image& image) {
+  doCodecTestC(image);
+  doCodecTestCPP(image);
+  doCodecTestInterlaced(image);
+  doCodecTestUncompressed(image);
+  doCodecTestNoLZ77(image);
+}
+
+
+//Test LodePNG encoding and decoding using some image generated with the given parameters
+void codecTest(unsigned width, unsigned height, LodePNGColorType colorType = LCT_RGBA, unsigned bitDepth = 8) {
+  std::cout << "codec test " << width << " " << height << std::endl;
+  Image image;
+  generateTestImage(image, width, height, colorType, bitDepth);
+  doCodecTest(image);
+}
+
+std::string removeSpaces(const std::string& s) {
+  std::string result;
+  for(size_t i = 0; i < s.size(); i++) if(s[i] != ' ') result += s[i];
+  return result;
+}
+
+void bitStringToBytes(std::vector<unsigned char>& bytes, const std::string& bits_) {
+  std::string bits = removeSpaces(bits_);
+  bytes.resize((bits.size()) + 7 / 8);
+  for(size_t i = 0; i < bits.size(); i++) {
+    size_t j = i / 8;
+    size_t k = i % 8;
+    char c = bits[i];
+    if(k == 0) bytes[j] = 0;
+    if(c == '1') bytes[j] |= (1 << (7 - k));
+  }
+}
+
+/*
+test color convert on a single pixel. Testing palette and testing color keys is
+not supported by this function. Pixel values given using bits in an std::string
+of 0's and 1's.
+*/
+void colorConvertTest(const std::string& bits_in, LodePNGColorType colorType_in, unsigned bitDepth_in,
+                      const std::string& bits_out, LodePNGColorType colorType_out, unsigned bitDepth_out) {
+  std::cout << "color convert test " << bits_in << " - " << bits_out << std::endl;
+
+  std::vector<unsigned char> expected, actual, image;
+  bitStringToBytes(expected, bits_out);
+  actual.resize(expected.size());
+  bitStringToBytes(image, bits_in);
+  LodePNGColorMode mode_in, mode_out;
+  lodepng_color_mode_init(&mode_in);
+  lodepng_color_mode_init(&mode_out);
+  mode_in.colortype = colorType_in;
+  mode_in.bitdepth = bitDepth_in;
+  mode_out.colortype = colorType_out;
+  mode_out.bitdepth = bitDepth_out;
+  unsigned error = lodepng_convert(&actual[0], &image[0], &mode_out, &mode_in, 1, 1);
+
+  ASSERT_NO_PNG_ERROR_MSG(error, "convert error");
+
+  for(size_t i = 0; i < expected.size(); i++) {
+    assertEquals((int)expected[i], (int)actual[i], "byte " + valtostr(i));
+  }
+
+  lodepng_color_mode_cleanup(&mode_in);
+  lodepng_color_mode_cleanup(&mode_out);
+}
+
+void testOtherPattern1() {
+  std::cout << "codec other pattern 1" << std::endl;
+
+  Image image1;
+  size_t w = 192;
+  size_t h = 192;
+  image1.width = w;
+  image1.height = h;
+  image1.colorType = LCT_RGBA;
+  image1.bitDepth = 8;
+  image1.data.resize(w * h * 4u);
+  for(size_t y = 0; y < h; y++)
+  for(size_t x = 0; x < w; x++) {
+    //pattern 1
+    image1.data[4u * w * y + 4u * x + 0u] = (unsigned char)(127 * (1 + std::sin((                    x * x +                     y * y) / (w * h / 8.0))));
+    image1.data[4u * w * y + 4u * x + 1u] = (unsigned char)(127 * (1 + std::sin(((w - x - 1) * (w - x - 1) +                     y * y) / (w * h / 8.0))));
+    image1.data[4u * w * y + 4u * x + 2u] = (unsigned char)(127 * (1 + std::sin((                    x * x + (h - y - 1) * (h - y - 1)) / (w * h / 8.0))));
+    image1.data[4u * w * y + 4u * x + 3u] = (unsigned char)(127 * (1 + std::sin(((w - x - 1) * (w - x - 1) + (h - y - 1) * (h - y - 1)) / (w * h / 8.0))));
+  }
+
+  doCodecTest(image1);
+}
+
+void testOtherPattern2() {
+  std::cout << "codec other pattern 2" << std::endl;
+
+  Image image1;
+  size_t w = 192;
+  size_t h = 192;
+  image1.width = w;
+  image1.height = h;
+  image1.colorType = LCT_RGBA;
+  image1.bitDepth = 8;
+  image1.data.resize(w * h * 4u);
+  for(size_t y = 0; y < h; y++)
+  for(size_t x = 0; x < w; x++) {
+    image1.data[4u * w * y + 4u * x + 0u] = 255 * !(x & y);
+    image1.data[4u * w * y + 4u * x + 1u] = x ^ y;
+    image1.data[4u * w * y + 4u * x + 2u] = x | y;
+    image1.data[4u * w * y + 4u * x + 3u] = 255;
+  }
+
+  doCodecTest(image1);
+}
+
+void testSinglePixel(int r, int g, int b, int a) {
+  std::cout << "codec single pixel " << r << " " << g << " " << b << " " << a << std::endl;
+  Image pixel;
+  pixel.width = 1;
+  pixel.height = 1;
+  pixel.colorType = LCT_RGBA;
+  pixel.bitDepth = 8;
+  pixel.data.resize(4);
+  pixel.data[0] = r;
+  pixel.data[1] = g;
+  pixel.data[2] = b;
+  pixel.data[3] = a;
+
+  doCodecTest(pixel);
+}
+
+void testColor(int r, int g, int b, int a) {
+  std::cout << "codec test color " << r << " " << g << " " << b << " " << a << std::endl;
+  Image image;
+  image.width = 20;
+  image.height = 20;
+  image.colorType = LCT_RGBA;
+  image.bitDepth = 8;
+  image.data.resize(20 * 20 * 4);
+  for(size_t y = 0; y < 20; y++)
+  for(size_t x = 0; x < 20; x++) {
+    image.data[20 * 4 * y + 4 * x + 0] = r;
+    image.data[20 * 4 * y + 4 * x + 0] = g;
+    image.data[20 * 4 * y + 4 * x + 0] = b;
+    image.data[20 * 4 * y + 4 * x + 0] = a;
+  }
+
+  doCodecTest(image);
+
+  Image image2 = image;
+  image2.data[3] = 0; //one fully transparent pixel
+  doCodecTest(image2);
+  image2.data[3] = 128; //one semi transparent pixel
+  doCodecTest(image2);
+
+  Image image3 = image;
+  // add 255 different colors
+  for(size_t i = 0; i < 255; i++) {
+    image.data[i * 4 + 0] = i;
+    image.data[i * 4 + 1] = i;
+    image.data[i * 4 + 2] = i;
+    image.data[i * 4 + 3] = 255;
+  }
+  doCodecTest(image3);
+  // a 256th color
+  image.data[255 * 4 + 0] = 255;
+  image.data[255 * 4 + 1] = 255;
+  image.data[255 * 4 + 2] = 255;
+  image.data[255 * 4 + 3] = 255;
+  doCodecTest(image3);
+
+  testSinglePixel(r, g, b, a);
+}
+
+// Tests combinations of various colors in different orders
+void testFewColors() {
+  std::cout << "codec test few colors " << std::endl;
+  Image image;
+  image.width = 20;
+  image.height = 20;
+  image.colorType = LCT_RGBA;
+  image.bitDepth = 8;
+  image.data.resize(image.width * image.height * 4);
+  std::vector<unsigned char> colors;
+  colors.push_back(0); colors.push_back(0); colors.push_back(0); colors.push_back(255); // black
+  colors.push_back(255); colors.push_back(255); colors.push_back(255); colors.push_back(255); // white
+  colors.push_back(128); colors.push_back(128); colors.push_back(128); colors.push_back(255); // grey
+  colors.push_back(0); colors.push_back(0); colors.push_back(255); colors.push_back(255); // blue
+  colors.push_back(255); colors.push_back(255); colors.push_back(255); colors.push_back(0); // transparent white
+  colors.push_back(255); colors.push_back(255); colors.push_back(255); colors.push_back(1); // translucent white
+  for(size_t i = 0; i < colors.size(); i += 4)
+  for(size_t j = 0; j < colors.size(); j += 4)
+  for(size_t k = 0; k < colors.size(); k += 4)
+  for(size_t l = 0; l < colors.size(); l += 4) {
+    //std::cout << (i/4) << " " << (j/4) << " " << (k/4) << " " << (l/4) << std::endl;
+    for(size_t c = 0; c < 4; c++) {
+      for(unsigned y = 0; y < image.height; y++)
+      for(unsigned x = 0; x < image.width; x++) {
+        image.data[y * image.width * 4 + x * 4 + c] = (x ^ y) ? colors[i + c] : colors[j + c];
+      }
+      image.data[c] = colors[k + c];
+      image.data[image.data.size() - 4 + c] = colors[l + c];
+    }
+    doCodecTest(image);
+  }
+}
+
+void testSize(unsigned w, unsigned h) {
+  std::cout << "codec test size " << w << " " << h << std::endl;
+  Image image;
+  image.width = w;
+  image.height = h;
+  image.colorType = LCT_RGBA;
+  image.bitDepth = 8;
+  image.data.resize(w * h * 4);
+  for(size_t y = 0; y < h; y++)
+  for(size_t x = 0; x < w; x++) {
+    image.data[w * 4 * y + 4 * x + 0] = x % 256;
+    image.data[w * 4 * y + 4 * x + 0] = y % 256;
+    image.data[w * 4 * y + 4 * x + 0] = 255;
+    image.data[w * 4 * y + 4 * x + 0] = 255;
+  }
+
+  doCodecTest(image);
+}
+
+void testPNGCodec() {
+  codecTest(1, 1);
+  codecTest(2, 2);
+  codecTest(1, 1, LCT_GREY, 1);
+  codecTest(7, 7, LCT_GREY, 1);
+#ifndef DISABLE_SLOW
+  codecTest(127, 127);
+  codecTest(127, 127, LCT_GREY, 1);
+  codecTest(500, 500);
+  codecTest(1, 10000);
+  codecTest(10000, 1);
+
+  testOtherPattern1();
+  testOtherPattern2();
+#endif // DISABLE_SLOW
+
+  testColor(255, 255, 255, 255);
+  testColor(0, 0, 0, 255);
+  testColor(1, 2, 3, 255);
+  testColor(255, 0, 0, 255);
+  testColor(0, 255, 0, 255);
+  testColor(0, 0, 255, 255);
+  testColor(0, 0, 0, 255);
+  testColor(1, 1, 1, 255);
+  testColor(1, 1, 1, 1);
+  testColor(0, 0, 0, 128);
+  testColor(255, 0, 0, 128);
+  testColor(127, 127, 127, 255);
+  testColor(128, 128, 128, 255);
+  testColor(127, 127, 127, 128);
+  testColor(128, 128, 128, 128);
+  //transparent single pixels
+  testColor(0, 0, 0, 0);
+  testColor(255, 0, 0, 0);
+  testColor(1, 2, 3, 0);
+  testColor(255, 255, 255, 0);
+  testColor(254, 254, 254, 0);
+
+  // This is mainly to test the Adam7 interlacing
+  for(unsigned h = 1; h < 12; h++)
+  for(unsigned w = 1; w < 12; w++) {
+    testSize(w, h);
+  }
+}
+
+//Tests some specific color conversions with specific color bit combinations
+void testColorConvert() {
+  //test color conversions to RGBA8
+  colorConvertTest("1", LCT_GREY, 1, "11111111 11111111 11111111 11111111", LCT_RGBA, 8);
+  colorConvertTest("10", LCT_GREY, 2, "10101010 10101010 10101010 11111111", LCT_RGBA, 8);
+  colorConvertTest("1001", LCT_GREY, 4, "10011001 10011001 10011001 11111111", LCT_RGBA, 8);
+  colorConvertTest("10010101", LCT_GREY, 8, "10010101 10010101 10010101 11111111", LCT_RGBA, 8);
+  colorConvertTest("10010101 11111110", LCT_GREY_ALPHA, 8, "10010101 10010101 10010101 11111110", LCT_RGBA, 8);
+  colorConvertTest("10010101 00000001 11111110 00000001", LCT_GREY_ALPHA, 16, "10010101 10010101 10010101 11111110", LCT_RGBA, 8);
+  colorConvertTest("01010101 00000000 00110011", LCT_RGB, 8, "01010101 00000000 00110011 11111111", LCT_RGBA, 8);
+  colorConvertTest("01010101 00000000 00110011 10101010", LCT_RGBA, 8, "01010101 00000000 00110011 10101010", LCT_RGBA, 8);
+  colorConvertTest("10101010 01010101 11111111 00000000 11001100 00110011", LCT_RGB, 16, "10101010 11111111 11001100 11111111", LCT_RGBA, 8);
+  colorConvertTest("10101010 01010101 11111111 00000000 11001100 00110011 11100111 00011000", LCT_RGBA, 16, "10101010 11111111 11001100 11100111", LCT_RGBA, 8);
+
+  //test color conversions to RGB8
+  colorConvertTest("1", LCT_GREY, 1, "11111111 11111111 11111111", LCT_RGB, 8);
+  colorConvertTest("10", LCT_GREY, 2, "10101010 10101010 10101010", LCT_RGB, 8);
+  colorConvertTest("1001", LCT_GREY, 4, "10011001 10011001 10011001", LCT_RGB, 8);
+  colorConvertTest("10010101", LCT_GREY, 8, "10010101 10010101 10010101", LCT_RGB, 8);
+  colorConvertTest("10010101 11111110", LCT_GREY_ALPHA, 8, "10010101 10010101 10010101", LCT_RGB, 8);
+  colorConvertTest("10010101 00000001 11111110 00000001", LCT_GREY_ALPHA, 16, "10010101 10010101 10010101", LCT_RGB, 8);
+  colorConvertTest("01010101 00000000 00110011", LCT_RGB, 8, "01010101 00000000 00110011", LCT_RGB, 8);
+  colorConvertTest("01010101 00000000 00110011 10101010", LCT_RGBA, 8, "01010101 00000000 00110011", LCT_RGB, 8);
+  colorConvertTest("10101010 01010101 11111111 00000000 11001100 00110011", LCT_RGB, 16, "10101010 11111111 11001100", LCT_RGB, 8);
+  colorConvertTest("10101010 01010101 11111111 00000000 11001100 00110011 11100111 00011000", LCT_RGBA, 16, "10101010 11111111 11001100", LCT_RGB, 8);
+
+  //test color conversions to RGBA16
+  colorConvertTest("1", LCT_GREY, 1, "11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111", LCT_RGBA, 16);
+  colorConvertTest("10", LCT_GREY, 2, "10101010 10101010 10101010 10101010 10101010 10101010 11111111 11111111", LCT_RGBA, 16);
+
+  //test greyscale color conversions
+  colorConvertTest("1", LCT_GREY, 1, "11111111", LCT_GREY, 8);
+  colorConvertTest("1", LCT_GREY, 1, "1111111111111111", LCT_GREY, 16);
+  colorConvertTest("0", LCT_GREY, 1, "00000000", LCT_GREY, 8);
+  colorConvertTest("0", LCT_GREY, 1, "0000000000000000", LCT_GREY, 16);
+  colorConvertTest("11", LCT_GREY, 2, "11111111", LCT_GREY, 8);
+  colorConvertTest("11", LCT_GREY, 2, "1111111111111111", LCT_GREY, 16);
+  colorConvertTest("10", LCT_GREY, 2, "10101010", LCT_GREY, 8);
+  colorConvertTest("10", LCT_GREY, 2, "1010101010101010", LCT_GREY, 16);
+  colorConvertTest("1000", LCT_GREY, 4, "10001000", LCT_GREY, 8);
+  colorConvertTest("1000", LCT_GREY, 4, "1000100010001000", LCT_GREY, 16);
+  colorConvertTest("10110101", LCT_GREY, 8, "1011010110110101", LCT_GREY, 16);
+  colorConvertTest("1011010110110101", LCT_GREY, 16, "10110101", LCT_GREY, 8);
+
+  //others
+  colorConvertTest("11111111 11111111 11111111 00000000 00000000 00000000", LCT_RGB, 8, "10", LCT_GREY, 1);
+  colorConvertTest("11111111 11111111 11111111 11111111 11111111 11111111 00000000 00000000 00000000 00000000 00000000 00000000", LCT_RGB, 16, "10", LCT_GREY, 1);
+}
+
+//This tests color conversions from any color model to any color model, with any bit depth
+//But it tests only with colors black and white, because that are the only colors every single model supports
+void testColorConvert2() {
+  std::cout << "testColorConvert2" << std::endl;
+  struct Combo {
+    LodePNGColorType colortype;
+    unsigned bitdepth;
+  };
+
+  Combo combos[15] = { { LCT_GREY, 1}, { LCT_GREY, 2}, { LCT_GREY, 4}, { LCT_GREY, 8}, { LCT_GREY, 16}, { LCT_RGB, 8}, { LCT_RGB, 16}, { LCT_PALETTE, 1}, { LCT_PALETTE, 2}, { LCT_PALETTE, 4}, { LCT_PALETTE, 8}, { LCT_GREY_ALPHA, 8}, { LCT_GREY_ALPHA, 16}, { LCT_RGBA, 8}, { LCT_RGBA, 16},
+  };
+
+  lodepng::State state;
+  LodePNGColorMode& mode_in = state.info_png.color;
+  LodePNGColorMode& mode_out = state.info_raw;
+  LodePNGColorMode mode_8;
+  lodepng_color_mode_init(&mode_8);
+
+  for(size_t i = 0; i < 256; i++) {
+    size_t j = i == 1 ? 255 : i;
+    lodepng_palette_add(&mode_in, j, j, j, 255);
+    lodepng_palette_add(&mode_out, j, j, j, 255);
+  }
+
+  for(size_t i = 0; i < 15; i++) {
+    mode_in.colortype = combos[i].colortype;
+    mode_in.bitdepth = combos[i].bitdepth;
+
+    for(size_t j = 0; j < 15; j++) {
+      mode_out.colortype = combos[i].colortype;
+      mode_out.bitdepth = combos[i].bitdepth;
+
+      unsigned char eight[36] = {
+          0,0,0,255, 255,255,255,255,
+          0,0,0,255, 255,255,255,255,
+          255,255,255,255, 0,0,0,255,
+          255,255,255,255, 255,255,255,255,
+          0,0,0,255 }; //input in RGBA8
+      unsigned char in[72]; //custom input color type
+      unsigned char out[72]; //custom output color type
+      unsigned char eight2[36]; //back in RGBA8 after all conversions to check correctness
+      unsigned error = 0;
+
+      error |= lodepng_convert(in, eight, &mode_in, &mode_8, 3, 3);
+      if(!error) error |= lodepng_convert(out, in, &mode_out, &mode_in, 3, 3); //Test input to output type
+      if(!error) error |= lodepng_convert(eight2, out, &mode_8, &mode_out, 3, 3);
+
+      if(!error) {
+        for(size_t k = 0; k < 36; k++) {
+          if(eight[k] != eight2[k]) {
+            error = 99999;
+            break;
+          }
+        }
+      }
+
+      if(error) {
+        std::cout << "Error " << error << " i: " << i << " j: " << j
+          << " colortype i: " << combos[i].colortype
+          << " bitdepth i: " << combos[i].bitdepth
+          << " colortype j: " << combos[j].colortype
+          << " bitdepth j: " << combos[j].bitdepth
+          << std::endl;
+        if(error != 99999) ASSERT_NO_PNG_ERROR(error);
+        else fail();
+      }
+    }
+  }
+}
+
+//if compressible is true, the test will also assert that the compressed string is smaller
+void testCompressStringZlib(const std::string& text, bool compressible) {
+  if(text.size() < 500) std::cout << "compress test with text: " << text << std::endl;
+  else std::cout << "compress test with text length: " << text.size() << std::endl;
+
+  std::vector<unsigned char> in(text.size());
+  for(size_t i = 0; i < text.size(); i++) in[i] = (unsigned char)text[i];
+  unsigned char* out = 0;
+  size_t outsize = 0;
+  unsigned error = 0;
+
+  error = lodepng_zlib_compress(&out, &outsize, in.empty() ? 0 : &in[0], in.size(), &lodepng_default_compress_settings);
+  ASSERT_NO_PNG_ERROR(error);
+  if(compressible) assertTrue(outsize < in.size());
+
+  unsigned char* out2 = 0;
+  size_t outsize2 = 0;
+
+  error = lodepng_zlib_decompress(&out2, &outsize2, out, outsize, &lodepng_default_decompress_settings);
+  ASSERT_NO_PNG_ERROR(error);
+  ASSERT_EQUALS(outsize2, in.size());
+  for(size_t i = 0; i < in.size(); i++) ASSERT_EQUALS(in[i], out2[i]);
+
+  free(out);
+  free(out2);
+}
+
+void testCompressZlib() {
+  testCompressStringZlib("", false);
+  testCompressStringZlib("a", false);
+  testCompressStringZlib("aa", false);
+  testCompressStringZlib("ababababababababababababababababababababababababababababababababababababababababababab", true);
+  testCompressStringZlib("abaaaabaabbbaabbabbababbbbabababbbaabbbaaaabbbbabbbabbbaababbbbbaaabaabbabaaaabbbbbbab", true);
+  testCompressStringZlib("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", true);
+  testCompressStringZlib("omnomnomnomnomnomnomnomnomnomnom", true);
+  testCompressStringZlib("the quick brown fox jumps over the lazy dog. the quick brown fox jumps over the lazy dog.", true);
+  testCompressStringZlib("abracadabra", false);
+  testCompressStringZlib("hello hello hello hello hello hello hello hello hello hello hello?", true);
+  testCompressStringZlib("WPgZX2D*um0H::,4/KU\"kt\"Ne\"#Qa.&#<aF9{jag]|{hv,IXez\
+\\DKn5zYdV{XxBi=n|1J-TwakWvp[b8|-kOcZ@QkAxJSMeZ0l&<*w0BP/CXM(LFH'", false);
+  testCompressStringZlib("asdfhlkhfafsduyfbasiuytfgbiasuidygiausygdifaubsydfsdf", false);
+  testCompressStringZlib("418541499849814614617987416457317375467441841687487", true);
+  testCompressStringZlib("3.141592653589793238462643383279502884197169399375105820974944592307816406286", true);
+  testCompressStringZlib("lodepng_zlib_decompress(&out2, &outsize2, out, outsize, &lodepng_default_decompress_settings);", true);
+}
+
+void testDiskCompressZlib(const std::string& filename) {
+  std::cout << "testDiskCompressZlib: File " << filename << std::endl;
+
+  std::vector<unsigned char> buffer;
+  lodepng::load_file(buffer, filename);
+  std::string f;
+  for(size_t i = 0; i < buffer.size(); i++) f += (char)buffer[i];
+  testCompressStringZlib(f, false);
+}
+
+void testDiskPNG(const std::string& filename) {
+  std::cout << "testDiskPNG: File " << filename << std::endl;
+
+  Image image;
+  image.colorType = LCT_RGB;
+  image.bitDepth = 8;
+  unsigned error = lodepng::decode(image.data, image.width, image.height, filename, image.colorType, image.bitDepth);
+  ASSERT_NO_PNG_ERROR(error);
+
+  doCodecTest(image);
+}
+
+std::vector<unsigned> strtovector(const std::string& numbers) {
+  std::vector<unsigned> result;
+  std::stringstream ss(numbers);
+  unsigned i;
+  while(ss >> i) result.push_back(i);
+  return result;
+}
+
+void doTestHuffmanCodeLengths(const std::string& expectedstr, const std::string& counts, size_t bitlength) {
+  std::vector<unsigned> expected = strtovector(expectedstr);
+  std::vector<unsigned> count = strtovector(counts);
+  std::cout << "doTestHuffmanCodeLengths: " << counts << std::endl;
+  std::vector<unsigned> result(count.size());
+  unsigned error = lodepng_huffman_code_lengths(&result[0], &count[0], count.size(), bitlength);
+  ASSERT_NO_PNG_ERROR_MSG(error, "errorcode");
+  std::stringstream ss1, ss2;
+  for(size_t i = 0; i < count.size(); i++) {
+    ss1 << expected[i] << " ";
+    ss2 << result[i] << " ";
+  }
+  assertEquals(ss1.str(), ss2.str(), "value");
+}
+
+void testHuffmanCodeLengths() {
+  bool atleasttwo = true; //LodePNG generates at least two, instead of at least one, symbol
+  if(atleasttwo) {
+    doTestHuffmanCodeLengths("1 1", "0 0", 16);
+    doTestHuffmanCodeLengths("1 1 0", "0 0 0", 16);
+    doTestHuffmanCodeLengths("1 1", "1 0", 16);
+    doTestHuffmanCodeLengths("1 1 0 0 0 0 0 0 0", "0 0 0 0 0 0 0 0 0", 16);
+    doTestHuffmanCodeLengths("1 1 0 0 0 0 0 0 0", "1 0 0 0 0 0 0 0 0", 16);
+    doTestHuffmanCodeLengths("1 1 0 0 0 0 0 0 0", "0 1 0 0 0 0 0 0 0", 16);
+    doTestHuffmanCodeLengths("1 0 0 0 0 0 0 0 1", "0 0 0 0 0 0 0 0 1", 16);
+    doTestHuffmanCodeLengths("0 0 0 0 0 0 0 1 1", "0 0 0 0 0 0 0 1 1", 16);
+  } else {
+    doTestHuffmanCodeLengths("1 0", "0 0", 16);
+    doTestHuffmanCodeLengths("1 0 0", "0 0 0", 16);
+    doTestHuffmanCodeLengths("1 0", "1 0", 16);
+    doTestHuffmanCodeLengths("1", "1", 16);
+    doTestHuffmanCodeLengths("1", "0", 16);
+  }
+  doTestHuffmanCodeLengths("1 1", "1 1", 16);
+  doTestHuffmanCodeLengths("1 1", "1 100", 16);
+  doTestHuffmanCodeLengths("2 2 1", "1 2 3", 16);
+  doTestHuffmanCodeLengths("2 1 2", "2 3 1", 16);
+  doTestHuffmanCodeLengths("1 2 2", "3 1 2", 16);
+  doTestHuffmanCodeLengths("3 3 2 1", "1 30 31 32", 16);
+  doTestHuffmanCodeLengths("2 2 2 2", "1 30 31 32", 2);
+  doTestHuffmanCodeLengths("5 5 4 4 4 3 3 1", "1 2 3 4 5 6 7 500", 16);
+}
+
+/*
+Create a PNG image with all known chunks (except only one of tEXt or zTXt) plus
+unknown chunks, and a palette.
+*/
+void createComplexPNG(std::vector<unsigned char>& png) {
+  unsigned w = 16, h = 17;
+  std::vector<unsigned char> image(w * h);
+  for(size_t i = 0; i < w * h; i++) {
+    image[i] = i % 256;
+  }
+
+  lodepng::State state;
+  LodePNGInfo& info = state.info_png;
+  info.color.colortype = LCT_PALETTE;
+  info.color.bitdepth = 8;
+  state.info_raw.colortype = LCT_PALETTE;
+  state.info_raw.bitdepth = 8;
+  state.encoder.auto_convert = false;
+  state.encoder.text_compression = 1;
+  state.encoder.add_id = 1;
+  for(size_t i = 0; i < 256; i++) {
+    lodepng_palette_add(&info.color, i, i, i, i);
+    lodepng_palette_add(&state.info_raw, i, i, i, i);
+  }
+
+  info.background_defined = 1;
+  info.background_r = 127;
+
+  lodepng_add_text(&info, "key0", "string0");
+  lodepng_add_text(&info, "key1", "string1");
+
+  lodepng_add_itext(&info, "ikey0", "ilangtag0", "itranskey0", "istring0");
+  lodepng_add_itext(&info, "ikey1", "ilangtag1", "itranskey1", "istring1");
+
+  info.time_defined = 1;
+  info.time.year = 2012;
+  info.time.month = 1;
+  info.time.day = 2;
+  info.time.hour = 3;
+  info.time.minute = 4;
+  info.time.second = 5;
+
+  info.phys_defined = 1;
+  info.phys_x = 1;
+  info.phys_y = 2;
+  info.phys_unit = 1;
+
+  lodepng_chunk_create(&info.unknown_chunks_data[0], &info.unknown_chunks_size[0], 3, "uNKa", (unsigned char*)"a00");
+  lodepng_chunk_create(&info.unknown_chunks_data[0], &info.unknown_chunks_size[0], 3, "uNKa", (unsigned char*)"a01");
+  lodepng_chunk_create(&info.unknown_chunks_data[1], &info.unknown_chunks_size[1], 3, "uNKb", (unsigned char*)"b00");
+  lodepng_chunk_create(&info.unknown_chunks_data[2], &info.unknown_chunks_size[2], 3, "uNKc", (unsigned char*)"c00");
+
+  unsigned error = lodepng::encode(png, &image[0], w, h, state);
+  ASSERT_NO_PNG_ERROR(error);
+}
+
+std::string extractChunkNames(const std::vector<unsigned char>& png) {
+  const unsigned char* chunk = &png[8];
+  char name[5];
+  std::string result = "";
+  for(;;) {
+    lodepng_chunk_type(name, chunk);
+    result += (std::string(" ") + name);
+    if(std::string(name) == "IEND") break;
+    chunk = lodepng_chunk_next_const(chunk);
+    assertTrue(chunk < &png.back(), "jumped out of chunks");
+  }
+  return result;
+}
+
+void testComplexPNG() {
+  std::cout << "testComplexPNG" << std::endl;
+
+  std::vector<unsigned char> png;
+  createComplexPNG(png);
+ {
+    lodepng::State state;
+    LodePNGInfo& info = state.info_png;
+    unsigned w, h;
+    std::vector<unsigned char> image;
+    unsigned error = lodepng::decode(image, w, h, state, &png[0], png.size());
+    ASSERT_NO_PNG_ERROR(error);
+
+    ASSERT_EQUALS(16, w);
+    ASSERT_EQUALS(17, h);
+    ASSERT_EQUALS(1, info.background_defined);
+    ASSERT_EQUALS(127, info.background_r);
+    ASSERT_EQUALS(1, info.time_defined);
+    ASSERT_EQUALS(2012, info.time.year);
+    ASSERT_EQUALS(1, info.time.month);
+    ASSERT_EQUALS(2, info.time.day);
+    ASSERT_EQUALS(3, info.time.hour);
+    ASSERT_EQUALS(4, info.time.minute);
+    ASSERT_EQUALS(5, info.time.second);
+    ASSERT_EQUALS(1, info.phys_defined);
+    ASSERT_EQUALS(1, info.phys_x);
+    ASSERT_EQUALS(2, info.phys_y);
+    ASSERT_EQUALS(1, info.phys_unit);
+
+    std::string chunknames = extractChunkNames(png);
+    //std::string expectednames = " IHDR uNKa uNKa PLTE tRNS bKGD pHYs uNKb IDAT tIME tEXt tEXt tEXt iTXt iTXt uNKc IEND";
+    std::string expectednames = " IHDR uNKa uNKa PLTE tRNS bKGD pHYs uNKb IDAT tIME zTXt zTXt tEXt iTXt iTXt uNKc IEND";
+    ASSERT_EQUALS(expectednames, chunknames);
+
+    ASSERT_EQUALS(3, info.text_num);
+    ASSERT_STRING_EQUALS("key0", info.text_keys[0]);
+    ASSERT_STRING_EQUALS("string0", info.text_strings[0]);
+    ASSERT_STRING_EQUALS("key1", info.text_keys[1]);
+    ASSERT_STRING_EQUALS("string1", info.text_strings[1]);
+    ASSERT_STRING_EQUALS("LodePNG", info.text_keys[2]);
+    ASSERT_STRING_EQUALS(LODEPNG_VERSION_STRING, info.text_strings[2]);
+
+    ASSERT_EQUALS(2, info.itext_num);
+    ASSERT_STRING_EQUALS("ikey0", info.itext_keys[0]);
+    ASSERT_STRING_EQUALS("ilangtag0", info.itext_langtags[0]);
+    ASSERT_STRING_EQUALS("itranskey0", info.itext_transkeys[0]);
+    ASSERT_STRING_EQUALS("istring0", info.itext_strings[0]);
+    ASSERT_STRING_EQUALS("ikey1", info.itext_keys[1]);
+    ASSERT_STRING_EQUALS("ilangtag1", info.itext_langtags[1]);
+    ASSERT_STRING_EQUALS("itranskey1", info.itext_transkeys[1]);
+    ASSERT_STRING_EQUALS("istring1", info.itext_strings[1]);
+
+    // TODO: test if unknown chunks listed too
+  }
+
+
+  // Test that if read_text_chunks is disabled, we do not get the texts
+  {
+    lodepng::State state;
+    state.decoder.read_text_chunks = 0;
+    unsigned w, h;
+    std::vector<unsigned char> image;
+    unsigned error = lodepng::decode(image, w, h, state, &png[0], png.size());
+    ASSERT_NO_PNG_ERROR(error);
+
+    ASSERT_EQUALS(0, state.info_png.text_num);
+    ASSERT_EQUALS(0, state.info_png.itext_num);
+
+    // But we should still get other values.
+    ASSERT_EQUALS(2012, state.info_png.time.year);
+  }
+}
+
+// Tests lodepng_inspect_chunk, and also lodepng_chunk_find to find the chunk to inspect
+void testInspectChunk() {
+  std::cout << "testInspectChunk" << std::endl;
+
+  std::vector<unsigned char> png;
+  createComplexPNG(png);
+
+  const unsigned char* chunk;
+  lodepng::State state;
+  LodePNGInfo& info = state.info_png;
+  state.decoder.read_text_chunks = 0;
+  lodepng_inspect(0, 0, &state, png.data(), png.size());
+  chunk = lodepng_chunk_find(png.data(), png.data() + png.size(), "tIME");
+  ASSERT_NOT_EQUALS((const unsigned char*)0, chunk);
+  ASSERT_EQUALS(0, info.time_defined);
+  lodepng_inspect_chunk(&state, (size_t)(chunk - png.data()), png.data(), png.size());
+  ASSERT_EQUALS(1, info.time_defined);
+  ASSERT_EQUALS(2012, state.info_png.time.year);
+  ASSERT_EQUALS(1, info.time.month);
+  ASSERT_EQUALS(2, info.time.day);
+  ASSERT_EQUALS(3, info.time.hour);
+  ASSERT_EQUALS(4, info.time.minute);
+  ASSERT_EQUALS(5, info.time.second);
+
+  ASSERT_EQUALS(0, info.text_num);
+  chunk = lodepng_chunk_find_const(png.data(), png.data() + png.size(), "zTXt");
+  lodepng_inspect_chunk(&state, (size_t)(chunk - png.data()), png.data(), png.size());
+  ASSERT_EQUALS(1, info.text_num);
+  chunk = lodepng_chunk_find_const(chunk, png.data() + png.size(), "zTXt");
+  lodepng_inspect_chunk(&state, (size_t)(chunk - png.data()), png.data(), png.size());
+  ASSERT_EQUALS(2, info.text_num);
+}
+
+//test that, by default, it chooses filter type zero for all scanlines if the image has a palette
+void testPaletteFilterTypesZero() {
+  std::cout << "testPaletteFilterTypesZero" << std::endl;
+
+  std::vector<unsigned char> png;
+  createComplexPNG(png);
+
+  std::vector<unsigned char> filterTypes;
+  lodepng::getFilterTypes(filterTypes, png);
+
+  ASSERT_EQUALS(17, filterTypes.size());
+  for(size_t i = 0; i < 17; i++) ASSERT_EQUALS(0, filterTypes[i]);
+}
+
+//tests that there are no crashes with auto color chooser in case of palettes with translucency etc...
+void testPaletteToPaletteConvert() {
+  std::cout << "testPaletteToPaletteConvert" << std::endl;
+  unsigned error;
+  unsigned w = 16, h = 16;
+  std::vector<unsigned char> image(w * h);
+  for(size_t i = 0; i < w * h; i++) image[i] = i % 256;
+  lodepng::State state;
+  LodePNGInfo& info = state.info_png;
+  info.color.colortype = state.info_raw.colortype = LCT_PALETTE;
+  info.color.bitdepth = state.info_raw.bitdepth = 8;
+  ASSERT_EQUALS(true, state.encoder.auto_convert);
+  for(size_t i = 0; i < 256; i++) {
+    lodepng_palette_add(&info.color, i, i, i, i);
+  }
+  std::vector<unsigned char> png;
+  for(size_t i = 0; i < 256; i++) {
+    lodepng_palette_add(&state.info_raw, i, i, i, i);
+  }
+  error = lodepng::encode(png, &image[0], w, h, state);
+  ASSERT_NO_PNG_ERROR(error);
+}
+
+//for this test, you have to choose palette colors that cause LodePNG to actually use a palette,
+//so don't use all greyscale colors for example
+void doRGBAToPaletteTest(unsigned char* palette, size_t size, LodePNGColorType expectedType = LCT_PALETTE) {
+  std::cout << "testRGBToPaletteConvert " << size << std::endl;
+  unsigned error;
+  unsigned w = size, h = 257 /*LodePNG encodes no palette if image is too small*/;
+  std::vector<unsigned char> image(w * h * 4);
+  for(size_t i = 0; i < image.size(); i++) image[i] = palette[i % (size * 4)];
+  std::vector<unsigned char> png;
+  error = lodepng::encode(png, &image[0], w, h);
+  ASSERT_NO_PNG_ERROR(error);
+  lodepng::State state;
+  std::vector<unsigned char> image2;
+  error = lodepng::decode(image2, w, h, state, png);
+  ASSERT_NO_PNG_ERROR(error);
+  ASSERT_EQUALS(image.size(), image2.size());
+  for(size_t i = 0; i < image.size(); i++) ASSERT_EQUALS(image[i], image2[i]);
+
+  ASSERT_EQUALS(expectedType, state.info_png.color.colortype);
+  if(expectedType == LCT_PALETTE) {
+
+    ASSERT_EQUALS(size, state.info_png.color.palettesize);
+    for(size_t i = 0; i < size * 4; i++) ASSERT_EQUALS(state.info_png.color.palette[i], image[i]);
+  }
+}
+
+void testRGBToPaletteConvert() {
+  unsigned char palette1[4] = {1,2,3,4};
+  doRGBAToPaletteTest(palette1, 1);
+  unsigned char palette2[8] = {1,2,3,4, 5,6,7,8};
+  doRGBAToPaletteTest(palette2, 2);
+  unsigned char palette3[12] = {1,1,1,255, 20,20,20,255, 20,20,21,255};
+  doRGBAToPaletteTest(palette3, 3);
+
+  std::vector<unsigned char> palette;
+  for(int i = 0; i < 256; i++) {
+    palette.push_back(i);
+    palette.push_back(5);
+    palette.push_back(6);
+    palette.push_back(128);
+  }
+  doRGBAToPaletteTest(&palette[0], 256);
+  palette.push_back(5);
+  palette.push_back(6);
+  palette.push_back(7);
+  palette.push_back(8);
+  doRGBAToPaletteTest(&palette[0], 257, LCT_RGBA);
+}
+
+void testColorKeyConvert() {
+  std::cout << "testColorKeyConvert" << std::endl;
+  unsigned error;
+  unsigned w = 32, h = 32;
+  std::vector<unsigned char> image(w * h * 4);
+  for(size_t i = 0; i < w * h; i++) {
+    image[i * 4 + 0] = i % 256;
+    image[i * 4 + 1] = i / 256;
+    image[i * 4 + 2] = 0;
+    image[i * 4 + 3] = i == 23 ? 0 : 255;
+  }
+  std::vector<unsigned char> png;
+  error = lodepng::encode(png, &image[0], w, h);
+  ASSERT_NO_PNG_ERROR(error);
+
+  lodepng::State state;
+  std::vector<unsigned char> image2;
+  error = lodepng::decode(image2, w, h, state, png);
+  ASSERT_NO_PNG_ERROR(error);
+  ASSERT_EQUALS(32, w);
+  ASSERT_EQUALS(32, h);
+  ASSERT_EQUALS(1, state.info_png.color.key_defined);
+  ASSERT_EQUALS(23, state.info_png.color.key_r);
+  ASSERT_EQUALS(0, state.info_png.color.key_g);
+  ASSERT_EQUALS(0, state.info_png.color.key_b);
+  ASSERT_EQUALS(image.size(), image2.size());
+  for(size_t i = 0; i < image.size(); i++) {
+    ASSERT_EQUALS(image[i], image2[i]);
+  }
+}
+
+void testNoAutoConvert() {
+  std::cout << "testNoAutoConvert" << std::endl;
+  unsigned error;
+  unsigned w = 32, h = 32;
+  std::vector<unsigned char> image(w * h * 4);
+  for(size_t i = 0; i < w * h; i++) {
+    image[i * 4 + 0] = (i % 2) ? 255 : 0;
+    image[i * 4 + 1] = (i % 2) ? 255 : 0;
+    image[i * 4 + 2] = (i % 2) ? 255 : 0;
+    image[i * 4 + 3] = 0;
+  }
+  std::vector<unsigned char> png;
+  lodepng::State state;
+  state.info_png.color.colortype = LCT_RGBA;
+  state.info_png.color.bitdepth = 8;
+  state.encoder.auto_convert = false;
+  error = lodepng::encode(png, &image[0], w, h, state);
+  ASSERT_NO_PNG_ERROR(error);
+
+  lodepng::State state2;
+  std::vector<unsigned char> image2;
+  error = lodepng::decode(image2, w, h, state2, png);
+  ASSERT_NO_PNG_ERROR(error);
+  ASSERT_EQUALS(32, w);
+  ASSERT_EQUALS(32, h);
+  ASSERT_EQUALS(LCT_RGBA, state2.info_png.color.colortype);
+  ASSERT_EQUALS(8, state2.info_png.color.bitdepth);
+  ASSERT_EQUALS(image.size(), image2.size());
+  for(size_t i = 0; i < image.size(); i++) {
+    ASSERT_EQUALS(image[i], image2[i]);
+  }
+}
+
+unsigned char flipBit(unsigned char c, int bitpos) {
+  return c ^ (1 << bitpos);
+}
+
+//Test various broken inputs. Returned errors are not checked, what is tested is
+//that is doesn't crash, and, when run with valgrind, no memory warnings are
+//given.
+void testFuzzing() {
+  std::cout << "testFuzzing" << std::endl;
+  std::vector<unsigned char> png;
+  createComplexPNG(png);
+  std::vector<unsigned char> broken = png;
+  std::vector<unsigned char> result;
+  std::map<unsigned, unsigned> errors;
+  unsigned w, h;
+  lodepng::State state;
+  state.decoder.ignore_crc = 1;
+  state.decoder.zlibsettings.ignore_adler32 = 1;
+  for(size_t i = 0; i < png.size(); i++) {
+    result.clear();
+    broken[i] = ~png[i];
+    errors[lodepng::decode(result, w, h, state, broken)]++;
+    broken[i] = 0;
+    errors[lodepng::decode(result, w, h, state, broken)]++;
+    for(int j = 0; j < 8; j++) {
+      broken[i] = flipBit(png[i], j);
+      errors[lodepng::decode(result, w, h, state, broken)]++;
+    }
+    broken[i] = 255;
+    errors[lodepng::decode(result, w, h, state, broken)]++;
+    broken[i] = png[i]; //fix it again for the next test
+  }
+  std::cout << "testFuzzing shrinking" << std::endl;
+  broken = png;
+  while(broken.size() > 0) {
+    broken.resize(broken.size() - 1);
+    errors[lodepng::decode(result, w, h, state, broken)]++;
+  }
+
+  //For fun, print the number of each error
+  std::cout << "Fuzzing error code counts: ";
+  for(std::map<unsigned, unsigned>::iterator it = errors.begin(); it != errors.end(); ++it) {
+    std::cout << it->first << ":" << it->second << ", ";
+  }
+  std::cout << std::endl;
+}
+
+void testCustomZlibCompress() {
+  std::cout << "testCustomZlibCompress" << std::endl;
+  Image image;
+  generateTestImage(image, 5, 5, LCT_RGBA, 8);
+
+  std::vector<unsigned char> encoded;
+  int customcontext = 5;
+
+  struct TestFun {
+    static unsigned custom_zlib(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGCompressSettings* settings) {
+      ASSERT_EQUALS(5, *(int*)(settings->custom_context));
+      return 5555; //return a custom error code to prove this function was called
+    }
+  };
+
+  lodepng::State state;
+  state.encoder.zlibsettings.custom_zlib = TestFun::custom_zlib;
+  state.encoder.zlibsettings.custom_context = &customcontext;
+
+  unsigned error = lodepng::encode(encoded, image.data, image.width, image.height,
+                                   state);
+
+  ASSERT_EQUALS(5555, error);
+}
+
+void testCustomZlibCompress2() {
+  std::cout << "testCustomZlibCompress2" << std::endl;
+  Image image;
+  generateTestImage(image, 5, 5, LCT_RGBA, 8);
+
+  std::vector<unsigned char> encoded;
+
+  lodepng::State state;
+  state.encoder.zlibsettings.custom_zlib = lodepng_zlib_compress;
+
+  unsigned error = lodepng::encode(encoded, image.data, image.width, image.height,
+                                   state);
+  ASSERT_NO_PNG_ERROR(error);
+
+  std::vector<unsigned char> decoded;
+  unsigned w, h;
+  state.decoder.zlibsettings.ignore_adler32 = 0;
+  state.decoder.ignore_crc = 0;
+  error = lodepng::decode(decoded, w, h, state, encoded);
+  ASSERT_NO_PNG_ERROR(error);
+  ASSERT_EQUALS(5, w);
+  ASSERT_EQUALS(5, h);
+}
+
+void testCustomDeflate() {
+  std::cout << "testCustomDeflate" << std::endl;
+  Image image;
+  generateTestImage(image, 5, 5, LCT_RGBA, 8);
+
+  std::vector<unsigned char> encoded;
+  int customcontext = 5;
+
+  struct TestFun {
+    static unsigned custom_deflate(unsigned char**, size_t*,
+                                   const unsigned char*, size_t,
+                                   const LodePNGCompressSettings* settings) {
+      ASSERT_EQUALS(5, *(int*)(settings->custom_context));
+      return 5555; //return a custom error code to prove this function was called
+    }
+  };
+
+  lodepng::State state;
+  state.encoder.zlibsettings.custom_deflate = TestFun::custom_deflate;
+  state.encoder.zlibsettings.custom_context = &customcontext;
+
+  unsigned error = lodepng::encode(encoded, image.data, image.width, image.height,
+                                   state);
+
+  ASSERT_EQUALS(5555, error);
+}
+
+void testCustomZlibDecompress() {
+  std::cout << "testCustomZlibDecompress" << std::endl;
+  Image image;
+  generateTestImage(image, 5, 5, LCT_RGBA, 8);
+
+  std::vector<unsigned char> encoded;
+
+  unsigned error_enc = lodepng::encode(encoded, image.data, image.width, image.height,
+                                   image.colorType, image.bitDepth);
+  ASSERT_NO_PNG_ERROR_MSG(error_enc, "encoder error not expected");
+
+
+  std::vector<unsigned char> decoded;
+  unsigned w, h;
+  int customcontext = 5;
+
+  struct TestFun {
+    static unsigned custom_zlib(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGDecompressSettings* settings) {
+      ASSERT_EQUALS(5, *(int*)(settings->custom_context));
+      return 5555; //return a custom error code to prove this function was called
+    }
+  };
+
+  lodepng::State state;
+  state.decoder.zlibsettings.custom_zlib = TestFun::custom_zlib;
+  state.decoder.zlibsettings.custom_context = &customcontext;
+  state.decoder.zlibsettings.ignore_adler32 = 0;
+  state.decoder.ignore_crc = 0;
+  unsigned error = lodepng::decode(decoded, w, h, state, encoded);
+
+  ASSERT_EQUALS(5555, error);
+}
+
+void testCustomInflate() {
+  std::cout << "testCustomInflate" << std::endl;
+  Image image;
+  generateTestImage(image, 5, 5, LCT_RGBA, 8);
+
+  std::vector<unsigned char> encoded;
+
+  unsigned error_enc = lodepng::encode(encoded, image.data, image.width, image.height,
+                                   image.colorType, image.bitDepth);
+  ASSERT_NO_PNG_ERROR_MSG(error_enc, "encoder error not expected");
+
+
+  std::vector<unsigned char> decoded;
+  unsigned w, h;
+  int customcontext = 5;
+
+  struct TestFun {
+    static unsigned custom_inflate(unsigned char**, size_t*,
+                                   const unsigned char*, size_t,
+                                   const LodePNGDecompressSettings* settings) {
+      ASSERT_EQUALS(5, *(int*)(settings->custom_context));
+      return 5555; //return a custom error code to prove this function was called
+    }
+  };
+
+  lodepng::State state;
+  state.decoder.zlibsettings.custom_inflate = TestFun::custom_inflate;
+  state.decoder.zlibsettings.custom_context = &customcontext;
+  state.decoder.zlibsettings.ignore_adler32 = 0;
+  state.decoder.ignore_crc = 0;
+  unsigned error = lodepng::decode(decoded, w, h, state, encoded);
+
+  ASSERT_EQUALS(5555, error);
+}
+
+void doPngSuiteTinyTest(const std::string& base64, unsigned w, unsigned h,
+                        unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  lodepng::State state;
+  std::vector<unsigned char> png;
+  fromBase64(png, base64);
+  unsigned w2, h2;
+  std::vector<unsigned char> image;
+  unsigned error = lodepng::decode(image, w2, h2, state, png);
+  ASSERT_NO_PNG_ERROR(error);
+  ASSERT_EQUALS(w, w2);
+  ASSERT_EQUALS(h, h2);
+  ASSERT_EQUALS((int)r, (int)image[0]);
+  ASSERT_EQUALS((int)g, (int)image[1]);
+  ASSERT_EQUALS((int)b, (int)image[2]);
+  ASSERT_EQUALS((int)a, (int)image[3]);
+
+  state.encoder.auto_convert = false;
+  std::vector<unsigned char> png2;
+  error = lodepng::encode(png2, image, w, h, state);
+  ASSERT_NO_PNG_ERROR(error);
+  std::vector<unsigned char> image2;
+  error = lodepng::decode(image2, w2, h2, state, png2);
+  ASSERT_NO_PNG_ERROR(error);
+  for(size_t i = 0; i < image.size(); i++) ASSERT_EQUALS(image[i], image2[i]);
+}
+
+/*checks that both png suite images have the exact same pixel content, e.g. to check that
+it decodes an interlaced and non-interlaced corresponding png suite image equally*/
+void doPngSuiteEqualTest(const std::string& base64a, const std::string& base64b) {
+  lodepng::State state;
+  std::vector<unsigned char> pnga, pngb;
+  fromBase64(pnga, base64a);
+  fromBase64(pngb, base64b);
+  unsigned wa, ha, wb, hb;
+  std::vector<unsigned char> imagea, imageb;
+  ASSERT_NO_PNG_ERROR(lodepng::decode(imagea, wa, ha, state, pnga));
+  ASSERT_NO_PNG_ERROR(lodepng::decode(imageb, wb, hb, state, pngb));
+  ASSERT_EQUALS(wa, wb);
+  ASSERT_EQUALS(ha, hb);
+
+  size_t size = wa * ha * 4;
+  for(size_t i = 0; i < size; i++) {
+    if(imagea[i] != imageb[i]) {
+      std::cout << "x: " << ((i / 4) % wa) << " y: " << ((i / 4) / wa) << " c: " << i % 4 << std::endl;
+      ASSERT_EQUALS((int)imagea[i], (int)imageb[i]);
+    }
+  }
+}
+
+void testPngSuiteTiny() {
+  std::cout << "testPngSuiteTiny" << std::endl;
+  doPngSuiteTinyTest("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABAQMAAAFS3GZcAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+                     "BAQEd/i1owAAAANQTFRFAAD/injSVwAAAApJREFUeJxjYAAAAAIAAUivpHEAAAAASUVORK5CYII=",
+                     1, 1, 0, 0, 255, 255); //s01n3p01.png
+  doPngSuiteTinyTest("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABAQMAAAAl21bKAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+                     "BAQEd/i1owAAAANQTFRFAAD/injSVwAAAApJREFUeJxjYAAAAAIAAUivpHEAAAAASUVORK5CYII=",
+                     1, 1, 0, 0, 255, 255); //s01i3p01.png
+  doPngSuiteTinyTest("iVBORw0KGgoAAAANSUhEUgAAAAcAAAAHAgMAAAC5PL9AAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+                     "BAQEd/i1owAAAAxQTFRF/wB3AP93//8AAAD/G0OznAAAABpJREFUeJxj+P+H4WoMw605DDfmgEgg"
+                     "+/8fAHF5CrkeXW0HAAAAAElFTkSuQmCC",
+                     7, 7, 0, 0, 255, 255); //s07n3p02.png
+  doPngSuiteTinyTest("iVBORw0KGgoAAAANSUhEUgAAAAcAAAAHAgMAAAHOO4/WAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+                     "BAQEd/i1owAAAAxQTFRF/wB3AP93//8AAAD/G0OznAAAACVJREFUeJxjOMBwgOEBwweGDQyvGf4z"
+                     "/GFIAcI/DFdjGG7MAZIAweMMgVWC+YkAAAAASUVORK5CYII=",
+                     7, 7, 0, 0, 255, 255); //s07i3p02.png
+  doPngSuiteTinyTest("iVBORw0KGgoAAAANSUhEUgAAACAAAAAgAgMAAAAOFJJnAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+                     "AQEBfC53ggAAAAxQTFRFAP8A/wAA//8AAAD/ZT8rugAAACJJREFUeJxj+B+6igGEGfAw8MnBGKug"
+                     "LHwMqNL/+BiDzD0AvUl/geqJjhsAAAAASUVORK5CYII=",
+                     32, 32, 0, 0, 255, 255); //basn3p02.png
+  doPngSuiteTinyTest("iVBORw0KGgoAAAANSUhEUgAAACAAAAAgAQMAAABJtOi3AAAABGdBTUEAAYagMeiWXwAAAAZQTFRF"
+                     "7v8iImb/bBrSJgAAABVJREFUeJxj4AcCBjTiAxCgEwOkDgC7Hz/Bk4JmWQAAAABJRU5ErkJggg==",
+                     32, 32, 238, 255, 34, 255); //basn3p01.png
+  doPngSuiteTinyTest("iVBORw0KGgoAAAANSUhEUgAAACAAAAAgEAAAAAAGgflrAAAABGdBTUEAAYagMeiWXwAAAF5JREFU"
+                     "eJzV0jEKwDAMQ1E5W+9/xtygk8AoezLVKgSj2Y8/OICnuFcTE2OgOoJgHQiZAN2C9kDKBOgW3AZC"
+                     "JkC3oD2QMgG6BbeBkAnQLWgPpExgP28H7E/0GTjPfwAW2EvYX64rn9cAAAAASUVORK5CYII=",
+                     32, 32, 0, 0, 0, 255); //basn0g16.png
+  doPngSuiteTinyTest("iVBORw0KGgoAAAANSUhEUgAAACAAAAAgEAAAAAFxhsn9AAAABGdBTUEAAYagMeiWXwAAAOJJREFU"
+                     "eJy1kTsOwjAQRMdJCqj4XYHD5DAcj1Okyg2okCyBRLOSC0BDERKCI7xJVmgaa/X8PFo7oESJEtka"
+                     "TeLDjdjjgCMe7eTE96FGd3AL7HvZsdNEaJMVo0GNGm775bgwW6Afj/SAjAY+JsYNXIHtz2xYxTXi"
+                     "UoOek4AbFcCnDYEK4NMGsgXcMrGHJytkBX5HIP8FAhVANIMVIBVANMPfgUAFEM3wAVyG5cxcecY5"
+                     "/dup3LVFa1HXmA61LY59f6Ygp1Eg1gZGQaBRILYGdxoFYmtAGgXx9YmCfPD+RMHwuuAFVpjuiRT/"
+                     "//4AAAAASUVORK5CYII=",
+                     32, 32, 0, 0, 0, 255); //basi0g16.png
+
+  //s01n3p01.png s01i3p01.png
+  doPngSuiteEqualTest("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABAQMAAAFS3GZcAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+                      "BAQEd/i1owAAAANQTFRFAAD/injSVwAAAApJREFUeJxjYAAAAAIAAUivpHEAAAAASUVORK5CYII=",
+                      "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABAQMAAAAl21bKAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+                      "BAQEd/i1owAAAANQTFRFAAD/injSVwAAAApJREFUeJxjYAAAAAIAAUivpHEAAAAASUVORK5CYII=");
+  //s07n3p02.png and s07i3p02.png
+  doPngSuiteEqualTest("iVBORw0KGgoAAAANSUhEUgAAAAcAAAAHAgMAAAC5PL9AAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+                      "BAQEd/i1owAAAAxQTFRF/wB3AP93//8AAAD/G0OznAAAABpJREFUeJxj+P+H4WoMw605DDfmgEgg"
+                      "+/8fAHF5CrkeXW0HAAAAAElFTkSuQmCC",
+                      "iVBORw0KGgoAAAANSUhEUgAAAAcAAAAHAgMAAAHOO4/WAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+                      "BAQEd/i1owAAAAxQTFRF/wB3AP93//8AAAD/G0OznAAAACVJREFUeJxjOMBwgOEBwweGDQyvGf4z"
+                      "/GFIAcI/DFdjGG7MAZIAweMMgVWC+YkAAAAASUVORK5CYII=");
+  //basn0g16.png and basi0g16.png
+  doPngSuiteEqualTest("iVBORw0KGgoAAAANSUhEUgAAACAAAAAgEAAAAAAGgflrAAAABGdBTUEAAYagMeiWXwAAAF5JREFU"
+                      "eJzV0jEKwDAMQ1E5W+9/xtygk8AoezLVKgSj2Y8/OICnuFcTE2OgOoJgHQiZAN2C9kDKBOgW3AZC"
+                      "JkC3oD2QMgG6BbeBkAnQLWgPpExgP28H7E/0GTjPfwAW2EvYX64rn9cAAAAASUVORK5CYII=",
+                      "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgEAAAAAFxhsn9AAAABGdBTUEAAYagMeiWXwAAAOJJREFU"
+                      "eJy1kTsOwjAQRMdJCqj4XYHD5DAcj1Okyg2okCyBRLOSC0BDERKCI7xJVmgaa/X8PFo7oESJEtka"
+                      "TeLDjdjjgCMe7eTE96FGd3AL7HvZsdNEaJMVo0GNGm775bgwW6Afj/SAjAY+JsYNXIHtz2xYxTXi"
+                      "UoOek4AbFcCnDYEK4NMGsgXcMrGHJytkBX5HIP8FAhVANIMVIBVANMPfgUAFEM3wAVyG5cxcecY5"
+                      "/dup3LVFa1HXmA61LY59f6Ygp1Eg1gZGQaBRILYGdxoFYmtAGgXx9YmCfPD+RMHwuuAFVpjuiRT/"
+                      "//4AAAAASUVORK5CYII=");
+}
+
+void testChunkUtil() {
+  std::cout << "testChunkUtil" << std::endl;
+  std::vector<unsigned char> png;
+  createComplexPNG(png);
+
+  std::vector<std::string> names[3];
+  std::vector<std::vector<unsigned char> > chunks[3];
+
+  assertNoError(lodepng::getChunks(names, chunks, png));
+
+  std::vector<std::vector<unsigned char> > chunks2[3];
+  chunks2[0].push_back(chunks[2][2]); //zTXt
+  chunks2[1].push_back(chunks[2][3]); //tEXt
+  chunks2[2].push_back(chunks[2][4]); //iTXt
+
+  assertNoError(lodepng::insertChunks(png, chunks2));
+
+  std::string chunknames = extractChunkNames(png);
+  //                                        chunks2[0]                    chunks2[1]                                   chunks2[2]
+  //                                             v                             v                                            v
+  std::string expectednames = " IHDR uNKa uNKa zTXt PLTE tRNS bKGD pHYs uNKb tEXt IDAT tIME zTXt zTXt tEXt iTXt iTXt uNKc iTXt IEND";
+  ASSERT_EQUALS(expectednames, chunknames);
+
+  std::vector<unsigned char> image;
+  unsigned w, h;
+  ASSERT_NO_PNG_ERROR(lodepng::decode(image, w, h, png));
+}
+
+//Test that when decoding to 16-bit per channel, it always uses big endian consistently.
+//It should always output big endian, the convention used inside of PNG, even though x86 CPU's are little endian.
+void test16bitColorEndianness() {
+  std::cout << "test16bitColorEndianness" << std::endl;
+
+  //basn0g16.png from the PNG test suite
+  std::string base64 = "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgEAAAAAAGgflrAAAABGdBTUEAAYagMeiWXwAAAF5JREFU"
+                       "eJzV0jEKwDAMQ1E5W+9/xtygk8AoezLVKgSj2Y8/OICnuFcTE2OgOoJgHQiZAN2C9kDKBOgW3AZC"
+                       "JkC3oD2QMgG6BbeBkAnQLWgPpExgP28H7E/0GTjPfwAW2EvYX64rn9cAAAAASUVORK5CYII=";
+  std::vector<unsigned char> png;
+  fromBase64(png, base64);
+  unsigned w, h;
+  std::vector<unsigned char> image;
+  lodepng::State state;
+
+  // Decode from 16-bit grey image to 16-bit per channel RGBA
+  state.info_raw.bitdepth = 16;
+  ASSERT_NO_PNG_ERROR(lodepng::decode(image, w, h, state, png));
+  ASSERT_EQUALS(0x09, image[8]);
+  ASSERT_EQUALS(0x00, image[9]);
+
+  // Decode from 16-bit grey image to 16-bit grey raw image (no conversion)
+  image.clear();
+  state = lodepng::State();
+  state.decoder.color_convert = false;
+  ASSERT_NO_PNG_ERROR(lodepng::decode(image, w, h, state, png));
+  ASSERT_EQUALS(0x09, image[2]);
+  ASSERT_EQUALS(0x00, image[3]);
+
+  // Decode from 16-bit per channel RGB image to 16-bit per channel RGBA
+  base64 = "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgEAIAAACsiDHgAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+           "DQ0N0DeNwQAAAH5JREFUeJztl8ENxEAIAwcJ6cpI+q8qKeNepAgelq2dCjz4AdQM1jRcf3WIDQ13"
+           "qUNsiBBQZ1gR0cARUFIz3pug3586wo5+rOcfIaBOsCSggSOgpcB8D4D3R9DgfUyECIhDbAhp4Ajo"
+           "KPD+CBq8P4IG72MiQkCdYUVEA0dAyQcwUyZpXH92ZwAAAABJRU5ErkJggg=="; //cs3n2c16.png
+  png.clear();
+  fromBase64(png, base64);
+  image.clear();
+  state = lodepng::State();
+  state.info_raw.bitdepth = 16;
+  ASSERT_NO_PNG_ERROR(lodepng::decode(image, w, h, state, png));
+  ASSERT_EQUALS(0x1f, image[258]);
+  ASSERT_EQUALS(0xf9, image[259]);
+
+  // Decode from 16-bit per channel RGB image to 16-bit per channel RGBA raw image (no conversion)
+  image.clear();
+  state = lodepng::State();
+  state.decoder.color_convert = false;
+  ASSERT_NO_PNG_ERROR(lodepng::decode(image, w, h, state, png));
+
+  ASSERT_EQUALS(0x1f, image[194]);
+  ASSERT_EQUALS(0xf9, image[195]);
+
+  image.clear();
+  state = lodepng::State();
+
+  // Decode from palette image to 16-bit per channel RGBA
+  base64 = "iVBORw0KGgoAAAANSUhEUgAAAAcAAAAHAgMAAAC5PL9AAAAABGdBTUEAAYagMeiWXwAAAANzQklU"
+           "BAQEd/i1owAAAAxQTFRF/wB3AP93//8AAAD/G0OznAAAABpJREFUeJxj+P+H4WoMw605DDfmgEgg"
+           "+/8fAHF5CrkeXW0HAAAAAElFTkSuQmCC"; //s07n3p02.png
+  png.clear();
+  fromBase64(png, base64);
+  image.clear();
+  state = lodepng::State();
+  state.info_raw.bitdepth = 16;
+  ASSERT_NO_PNG_ERROR(lodepng::decode(image, w, h, state, png));
+  ASSERT_EQUALS(0x77, image[84]);
+  ASSERT_EQUALS(0x77, image[85]);
+}
+
+void testPredefinedFilters() {
+  size_t w = 32, h = 32;
+  std::cout << "testPredefinedFilters" << std::endl;
+  Image image;
+  generateTestImage(image, w, h, LCT_RGBA, 8);
+
+  // everything to filter type '3'
+  std::vector<unsigned char> predefined(h, 3);
+  lodepng::State state;
+  state.encoder.filter_strategy = LFS_PREDEFINED;
+  state.encoder.filter_palette_zero = 0;
+  state.encoder.predefined_filters = &predefined[0];
+
+  std::vector<unsigned char> png;
+  unsigned error = lodepng::encode(png, &image.data[0], w, h, state);
+  assertNoError(error);
+
+  std::vector<unsigned char> outfilters;
+  error = lodepng::getFilterTypes(outfilters, png);
+  assertNoError(error);
+
+  ASSERT_EQUALS(outfilters.size(), h);
+  for(size_t i = 0; i < h; i++) ASSERT_EQUALS(3, outfilters[i]);
+}
+
+void testEncoderErrors() {
+  std::cout << "testEncoderErrors" << std::endl;
+
+  std::vector<unsigned char> png;
+  unsigned w = 32, h = 32;
+  Image image;
+  generateTestImage(image, w, h);
+
+  lodepng::State def;
+
+  lodepng::State state;
+
+  ASSERT_EQUALS(0, lodepng::encode(png, &image.data[0], w, h, state));
+
+  // test window sizes
+  state.encoder.zlibsettings.windowsize = 0;
+  ASSERT_EQUALS(60, lodepng::encode(png, &image.data[0], w, h, state));
+  state.encoder.zlibsettings.windowsize = 65536;
+  ASSERT_EQUALS(60, lodepng::encode(png, &image.data[0], w, h, state));
+  state.encoder.zlibsettings.windowsize = 1000; // not power of two
+  ASSERT_EQUALS(90, lodepng::encode(png, &image.data[0], w, h, state));
+  state.encoder.zlibsettings.windowsize = 256;
+  ASSERT_EQUALS(0, lodepng::encode(png, &image.data[0], w, h, state));
+
+  state = def;
+  state.info_png.color.bitdepth = 3;
+  ASSERT_EQUALS(37, lodepng::encode(png, &image.data[0], w, h, state));
+
+  state = def;
+  state.info_png.color.colortype = (LodePNGColorType)5;
+  ASSERT_EQUALS(31, lodepng::encode(png, &image.data[0], w, h, state));
+
+  state = def;
+  state.info_png.color.colortype = LCT_PALETTE;
+  ASSERT_EQUALS(68, lodepng::encode(png, &image.data[0], w, h, state));
+
+  state = def;
+  state.info_png.interlace_method = 0;
+  ASSERT_EQUALS(0, lodepng::encode(png, &image.data[0], w, h, state));
+  state.info_png.interlace_method = 1;
+  ASSERT_EQUALS(0, lodepng::encode(png, &image.data[0], w, h, state));
+  state.info_png.interlace_method = 2;
+  ASSERT_EQUALS(71, lodepng::encode(png, &image.data[0], w, h, state));
+
+  state = def;
+  state.encoder.zlibsettings.btype = 0;
+  ASSERT_EQUALS(0, lodepng::encode(png, &image.data[0], w, h, state));
+  state.encoder.zlibsettings.btype = 1;
+  ASSERT_EQUALS(0, lodepng::encode(png, &image.data[0], w, h, state));
+  state.encoder.zlibsettings.btype = 2;
+  ASSERT_EQUALS(0, lodepng::encode(png, &image.data[0], w, h, state));
+  state.encoder.zlibsettings.btype = 3;
+  ASSERT_EQUALS(61, lodepng::encode(png, &image.data[0], w, h, state));
+}
+
+void addColor(std::vector<unsigned char>& colors, unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  colors.push_back(r);
+  colors.push_back(g);
+  colors.push_back(b);
+  colors.push_back(a);
+}
+
+void addColor16(std::vector<unsigned char>& colors, unsigned short r, unsigned short g, unsigned short b, unsigned short a) {
+  colors.push_back(r & 255);
+  colors.push_back((r >> 8) & 255);
+  colors.push_back(g & 255);
+  colors.push_back((g >> 8) & 255);
+  colors.push_back(b & 255);
+  colors.push_back((b >> 8) & 255);
+  colors.push_back(a & 255);
+  colors.push_back((a >> 8) & 255);
+}
+
+// colors is in RGBA, inbitdepth must be 8 or 16, the amount of bits per channel.
+// colortype and bitdepth are the expected values. insize is amount of pixels. So the amount of bytes is insize * 4 * (inbitdepth / 8)
+void testAutoColorModel(const std::vector<unsigned char>& colors, unsigned inbitdepth, LodePNGColorType colortype, unsigned bitdepth, bool key) {
+  std::cout << "testAutoColorModel " << inbitdepth << " " << colortype << " " << bitdepth << " " << key << std::endl;
+  size_t innum = colors.size() / 4 * inbitdepth / 8;
+  size_t num = innum < 65536 ? 65536 : innum; // Make image bigger so the convert doesn't avoid palette due to small image.
+  std::vector<unsigned char> colors2(num * 4 * (inbitdepth / 8));
+  for(size_t i = 0; i < colors2.size(); i++) colors2[i] = colors[i % colors.size()];
+
+  std::vector<unsigned char> png;
+  lodepng::encode(png, colors2, num, 1, LCT_RGBA, inbitdepth);
+
+  // now extract the color type it chose
+  unsigned w, h;
+  lodepng::State state;
+  std::vector<unsigned char> decoded;
+  lodepng::decode(decoded, w, h, state, png);
+  ASSERT_EQUALS(num, w);
+  ASSERT_EQUALS(1, h);
+  ASSERT_EQUALS(colortype, state.info_png.color.colortype);
+  ASSERT_EQUALS(bitdepth, state.info_png.color.bitdepth);
+  ASSERT_EQUALS(key, state.info_png.color.key_defined);
+  // also check that the PNG decoded correctly and has same colors as input
+  if(inbitdepth == 8) { for(size_t i = 0; i < colors.size(); i++) ASSERT_EQUALS(colors[i], decoded[i]); }
+  else { for(size_t i = 0; i < colors.size() / 2; i++) ASSERT_EQUALS(colors[i * 2], decoded[i]); }
+}
+
+void testAutoColorModels() {
+  // 1-bit grey
+  std::vector<unsigned char> grey1;
+  for(size_t i = 0; i < 2; i++) addColor(grey1, i * 255, i * 255, i * 255, 255);
+  testAutoColorModel(grey1, 8, LCT_GREY, 1, false);
+
+  // 2-bit grey
+  std::vector<unsigned char> grey2;
+  for(size_t i = 0; i < 4; i++) addColor(grey2, i * 85, i * 85, i * 85, 255);
+  testAutoColorModel(grey2, 8, LCT_GREY, 2, false);
+
+  // 4-bit grey
+  std::vector<unsigned char> grey4;
+  for(size_t i = 0; i < 16; i++) addColor(grey4, i * 17, i * 17, i * 17, 255);
+  testAutoColorModel(grey4, 8, LCT_GREY, 4, false);
+
+  // 8-bit grey
+  std::vector<unsigned char> grey8;
+  for(size_t i = 0; i < 256; i++) addColor(grey8, i, i, i, 255);
+  testAutoColorModel(grey8, 8, LCT_GREY, 8, false);
+
+  // 16-bit grey
+  std::vector<unsigned char> grey16;
+  for(size_t i = 0; i < 257; i++) addColor16(grey16, i, i, i, 65535);
+  testAutoColorModel(grey16, 16, LCT_GREY, 16, false);
+
+  // 8-bit grey+alpha
+  std::vector<unsigned char> grey8a;
+  for(size_t i = 0; i < 17; i++) addColor(grey8a, i, i, i, i);
+  testAutoColorModel(grey8a, 8, LCT_GREY_ALPHA, 8, false);
+
+  // 16-bit grey+alpha
+  std::vector<unsigned char> grey16a;
+  for(size_t i = 0; i < 257; i++) addColor16(grey16a, i, i, i, i);
+  testAutoColorModel(grey16a, 16, LCT_GREY_ALPHA, 16, false);
+
+
+  // various palette tests
+  std::vector<unsigned char> palette;
+  addColor(palette, 0, 0, 1, 255);
+  testAutoColorModel(palette, 8, LCT_PALETTE, 1, false);
+  addColor(palette, 0, 0, 2, 255);
+  testAutoColorModel(palette, 8, LCT_PALETTE, 1, false);
+  for(int i = 3; i <= 4; i++) addColor(palette, 0, 0, i, 255);
+  testAutoColorModel(palette, 8, LCT_PALETTE, 2, false);
+  for(int i = 5; i <= 7; i++) addColor(palette, 0, 0, i, 255);
+  testAutoColorModel(palette, 8, LCT_PALETTE, 4, false);
+  for(int i = 8; i <= 17; i++) addColor(palette, 0, 0, i, 255);
+  testAutoColorModel(palette, 8, LCT_PALETTE, 8, false);
+  addColor(palette, 0, 0, 18, 0); // transparent
+  testAutoColorModel(palette, 8, LCT_PALETTE, 8, false);
+  addColor(palette, 0, 0, 18, 1); // translucent
+  testAutoColorModel(palette, 8, LCT_PALETTE, 8, false);
+
+  // 1-bit grey + alpha not possible, becomes palette
+  std::vector<unsigned char> grey1a;
+  for(size_t i = 0; i < 2; i++) addColor(grey1a, i, i, i, 128);
+  testAutoColorModel(grey1a, 8, LCT_PALETTE, 1, false);
+
+  // 2-bit grey + alpha not possible, becomes palette
+  std::vector<unsigned char> grey2a;
+  for(size_t i = 0; i < 4; i++) addColor(grey2a, i, i, i, 128);
+  testAutoColorModel(grey2a, 8, LCT_PALETTE, 2, false);
+
+  // 4-bit grey + alpha not possible, becomes palette
+  std::vector<unsigned char> grey4a;
+  for(size_t i = 0; i < 16; i++) addColor(grey4a, i, i, i, 128);
+  testAutoColorModel(grey4a, 8, LCT_PALETTE, 4, false);
+
+  // 8-bit rgb
+  std::vector<unsigned char> rgb = grey8;
+  addColor(rgb, 255, 0, 0, 255);
+  testAutoColorModel(rgb, 8, LCT_RGB, 8, false);
+
+  // 8-bit rgb + key
+  std::vector<unsigned char> rgb_key = rgb;
+  addColor(rgb_key, 128, 0, 0, 0);
+  testAutoColorModel(rgb_key, 8, LCT_RGB, 8, true);
+
+  // 8-bit rgb, not key due to edge case: single key color, but opaque color has same RGB value
+  std::vector<unsigned char> rgb_key2 = rgb_key;
+  addColor(rgb_key2, 128, 0, 0, 255); // same color but opaque ==> no more key
+  testAutoColorModel(rgb_key2, 8, LCT_RGBA, 8, false);
+
+  // 8-bit rgb, not key due to semi translucent
+  std::vector<unsigned char> rgb_key3 = rgb_key;
+  addColor(rgb_key3, 128, 0, 0, 255); // semi-translucent ==> no more key
+  testAutoColorModel(rgb_key3, 8, LCT_RGBA, 8, false);
+
+  // 8-bit rgb, not key due to multiple transparent colors
+  std::vector<unsigned char> rgb_key4 = rgb_key;
+  addColor(rgb_key4, 128, 0, 0, 255);
+  addColor(rgb_key4, 129, 0, 0, 255); // two different transparent colors ==> no more key
+  testAutoColorModel(rgb_key4, 8, LCT_RGBA, 8, false);
+
+  // 1-bit grey with key
+  std::vector<unsigned char> grey1_key = grey1;
+  grey1_key[7] = 0;
+  testAutoColorModel(grey1_key, 8, LCT_GREY, 1, true);
+
+  // 2-bit grey with key
+  std::vector<unsigned char> grey2_key = grey2;
+  grey2_key[7] = 0;
+  testAutoColorModel(grey2_key, 8, LCT_GREY, 2, true);
+
+  // 4-bit grey with key
+  std::vector<unsigned char> grey4_key = grey4;
+  grey4_key[7] = 0;
+  testAutoColorModel(grey4_key, 8, LCT_GREY, 4, true);
+
+  // 8-bit grey with key
+  std::vector<unsigned char> grey8_key = grey8;
+  grey8_key[7] = 0;
+  testAutoColorModel(grey8_key, 8, LCT_GREY, 8, true);
+
+  // 16-bit grey with key
+  std::vector<unsigned char> grey16_key = grey16;
+  grey16_key[14] = grey16_key[15] = 0;
+  testAutoColorModel(grey16_key, 16, LCT_GREY, 16, true);
+
+  // a single 16-bit color, can't become palette due to being 16-bit
+  std::vector<unsigned char> small16;
+  addColor16(small16, 1, 0, 0, 65535);
+  testAutoColorModel(small16, 16, LCT_RGB, 16, false);
+
+  std::vector<unsigned char> small16a;
+  addColor16(small16a, 1, 0, 0, 1);
+  testAutoColorModel(small16a, 16, LCT_RGBA, 16, false);
+
+  // what we provide as 16-bit is actually representable as 8-bit, so 8-bit palette expected for single color
+  std::vector<unsigned char> not16;
+  addColor16(not16, 257, 257, 257, 0);
+  testAutoColorModel(not16, 16, LCT_PALETTE, 1, false);
+
+  // the rgb color is representable as 8-bit, but the alpha channel only as 16-bit, so ensure it uses 16-bit and not palette for this single color
+  std::vector<unsigned char> alpha16;
+  addColor16(alpha16, 257, 0, 0, 10000);
+  testAutoColorModel(alpha16, 16, LCT_RGBA, 16, false);
+
+  // 1-bit grey, with attempt to get color key but can't do it due to opaque color with same value
+  std::vector<unsigned char> grey1k;
+  addColor(grey1k, 0, 0, 0, 255);
+  addColor(grey1k, 255, 255, 255, 255);
+  addColor(grey1k, 255, 255, 255, 0);
+  testAutoColorModel(grey1k, 8, LCT_PALETTE, 2, false);
+}
+
+void testPaletteToPaletteDecode() {
+  std::cout << "testPaletteToPaletteDecode" << std::endl;
+  // It's a bit big for a 2x2 image... but this tests needs one with 256 palette entries in it.
+  std::string base64 = "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAMAAABFaP0WAAAAA3NCSVQICAjb4U/gAAADAFBMVEUA"
+                       "AAAAADMAAGYAAJkAAMwAAP8AMwAAMzMAM2YAM5kAM8wAM/8AZgAAZjMAZmYAZpkAZswAZv8AmQAA"
+                       "mTMAmWYAmZkAmcwAmf8AzAAAzDMAzGYAzJkAzMwAzP8A/wAA/zMA/2YA/5kA/8wA//8zAAAzADMz"
+                       "AGYzAJkzAMwzAP8zMwAzMzMzM2YzM5kzM8wzM/8zZgAzZjMzZmYzZpkzZswzZv8zmQAzmTMzmWYz"
+                       "mZkzmcwzmf8zzAAzzDMzzGYzzJkzzMwzzP8z/wAz/zMz/2Yz/5kz/8wz//9mAABmADNmAGZmAJlm"
+                       "AMxmAP9mMwBmMzNmM2ZmM5lmM8xmM/9mZgBmZjNmZmZmZplmZsxmZv9mmQBmmTNmmWZmmZlmmcxm"
+                       "mf9mzABmzDNmzGZmzJlmzMxmzP9m/wBm/zNm/2Zm/5lm/8xm//+ZAACZADOZAGaZAJmZAMyZAP+Z"
+                       "MwCZMzOZM2aZM5mZM8yZM/+ZZgCZZjOZZmaZZpmZZsyZZv+ZmQCZmTOZmWaZmZmZmcyZmf+ZzACZ"
+                       "zDOZzGaZzJmZzMyZzP+Z/wCZ/zOZ/2aZ/5mZ/8yZ///MAADMADPMAGbMAJnMAMzMAP/MMwDMMzPM"
+                       "M2bMM5nMM8zMM//MZgDMZjPMZmbMZpnMZszMZv/MmQDMmTPMmWbMmZnMmczMmf/MzADMzDPMzGbM"
+                       "zJnMzMzMzP/M/wDM/zPM/2bM/5nM/8zM////AAD/ADP/AGb/AJn/AMz/AP//MwD/MzP/M2b/M5n/"
+                       "M8z/M///ZgD/ZjP/Zmb/Zpn/Zsz/Zv//mQD/mTP/mWb/mZn/mcz/mf//zAD/zDP/zGb/zJn/zMz/"
+                       "zP///wD//zP//2b//5n//8z///8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+                       "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+                       "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABlenwdAAABAHRSTlP/////////////////////////"
+                       "////////////////////////////////////////////////////////////////////////////"
+                       "////////////////////////////////////////////////////////////////////////////"
+                       "////////////////////////////////////////////////////////////////////////////"
+                       "//////////////////////////////////8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+                       "AAAAAAAAAAAAG8mZagAAAAlwSFlzAAAOTQAADpwB3vacVwAAAA5JREFUCJlj2CLHwHodAATjAa+k"
+                       "lTE5AAAAAElFTkSuQmCC";
+  std::vector<unsigned char> png;
+  fromBase64(png, base64);
+
+  std::vector<unsigned char> image;
+  unsigned width, height;
+  unsigned error = lodepng::decode(image, width, height, png, LCT_PALETTE, 8);
+  ASSERT_EQUALS(0, error);
+  ASSERT_EQUALS(2, width);
+  ASSERT_EQUALS(2, height);
+  ASSERT_EQUALS(180, image[0]);
+  ASSERT_EQUALS(30, image[1]);
+  ASSERT_EQUALS(5, image[2]);
+  ASSERT_EQUALS(215, image[3]);
+}
+
+//2-bit palette
+void testPaletteToPaletteDecode2() {
+  std::cout << "testPaletteToPaletteDecode2" << std::endl;
+  std::string base64 = "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgAgMAAAAOFJJnAAAADFBMVEX/AAAA/wAAAP/////7AGD2AAAAE0lEQVR4AWMQhAKG3VCALDIqAgDl2WYBCQHY9gAAAABJRU5ErkJggg==";
+  std::vector<unsigned char> png;
+  fromBase64(png, base64);
+
+  std::vector<unsigned char> image;
+  unsigned width, height;
+  unsigned error = lodepng::decode(image, width, height, png, LCT_PALETTE, 8);
+  ASSERT_EQUALS(0, error);
+  ASSERT_EQUALS(32, width);
+  ASSERT_EQUALS(32, height);
+  ASSERT_EQUALS(0, image[0]);
+  ASSERT_EQUALS(1, image[1]);
+
+  //Now add a user-specified output palette, that differs from the input palette. That should give error 82.
+  LodePNGState state;
+  lodepng_state_init(&state);
+  state.info_raw.colortype = LCT_PALETTE;
+  state.info_raw.bitdepth = 8;
+  lodepng_palette_add(&state.info_raw, 0, 0, 0, 255);
+  lodepng_palette_add(&state.info_raw, 1, 1, 1, 255);
+  lodepng_palette_add(&state.info_raw, 2, 2, 2, 255);
+  lodepng_palette_add(&state.info_raw, 3, 3, 3, 255);
+  unsigned char* image2 = 0;
+  unsigned error2 = lodepng_decode(&image2, &width, &height, &state, &png[0], png.size());
+  lodepng_state_cleanup(&state);
+  ASSERT_EQUALS(82, error2);
+  free(image2);
+}
+
+void assertColorProfileDataEqual(const lodepng::State& a, const lodepng::State& b) {
+  ASSERT_EQUALS(a.info_png.gama_defined, b.info_png.gama_defined);
+  if(a.info_png.gama_defined) {
+    ASSERT_EQUALS(a.info_png.gama_gamma, b.info_png.gama_gamma);
+  }
+
+  ASSERT_EQUALS(a.info_png.chrm_defined, b.info_png.chrm_defined);
+  if(a.info_png.chrm_defined) {
+    ASSERT_EQUALS(a.info_png.chrm_white_x, b.info_png.chrm_white_x);
+    ASSERT_EQUALS(a.info_png.chrm_white_y, b.info_png.chrm_white_y);
+    ASSERT_EQUALS(a.info_png.chrm_red_x, b.info_png.chrm_red_x);
+    ASSERT_EQUALS(a.info_png.chrm_red_y, b.info_png.chrm_red_y);
+    ASSERT_EQUALS(a.info_png.chrm_green_x, b.info_png.chrm_green_x);
+    ASSERT_EQUALS(a.info_png.chrm_green_y, b.info_png.chrm_green_y);
+    ASSERT_EQUALS(a.info_png.chrm_blue_x, b.info_png.chrm_blue_x);
+    ASSERT_EQUALS(a.info_png.chrm_blue_y, b.info_png.chrm_blue_y);
+  }
+
+  ASSERT_EQUALS(a.info_png.srgb_defined, b.info_png.srgb_defined);
+  if(a.info_png.srgb_defined) {
+    ASSERT_EQUALS(a.info_png.srgb_intent, b.info_png.srgb_intent);
+  }
+
+  ASSERT_EQUALS(a.info_png.iccp_defined, b.info_png.iccp_defined);
+  if(a.info_png.iccp_defined) {
+    //ASSERT_EQUALS(std::string(a.info_png.iccp_name), std::string(b.info_png.iccp_name));
+    ASSERT_EQUALS(a.info_png.iccp_profile_size, b.info_png.iccp_profile_size);
+    for(size_t i = 0; i < a.info_png.iccp_profile_size; ++i) {
+      ASSERT_EQUALS(a.info_png.iccp_profile[i], b.info_png.iccp_profile[i]);
+    }
+  }
+}
+
+// Tests the gAMA, cHRM, sRGB, iCCP chunks
+void testColorProfile() {
+  std::cout << "testColorProfile" << std::endl;
+ {
+    unsigned error;
+    unsigned w = 32, h = 32;
+    std::vector<unsigned char> image(w * h * 4);
+    for(size_t i = 0; i < image.size(); i++) image[i] = i & 255;
+    std::vector<unsigned char> png;
+    lodepng::State state;
+    state.info_png.gama_defined = 1;
+    state.info_png.gama_gamma = 12345;
+    state.info_png.chrm_defined = 1;
+    state.info_png.chrm_white_x = 10;
+    state.info_png.chrm_white_y = 20;
+    state.info_png.chrm_red_x = 30;
+    state.info_png.chrm_red_y = 40;
+    state.info_png.chrm_green_x = 100000;
+    state.info_png.chrm_green_y = 200000;
+    state.info_png.chrm_blue_x = 300000;
+    state.info_png.chrm_blue_y = 400000;
+    error = lodepng::encode(png, &image[0], w, h, state);
+    ASSERT_NO_PNG_ERROR(error);
+
+    lodepng::State state2;
+    std::vector<unsigned char> image2;
+    error = lodepng::decode(image2, w, h, state2, png);
+    ASSERT_NO_PNG_ERROR(error);
+    assertColorProfileDataEqual(state, state2);
+    ASSERT_EQUALS(32, w);
+    ASSERT_EQUALS(32, h);
+    ASSERT_EQUALS(image.size(), image2.size());
+    for(size_t i = 0; i < image.size(); i++) ASSERT_EQUALS(image[i], image2[i]);
+  }
+ {
+    unsigned error;
+    unsigned w = 32, h = 32;
+    std::vector<unsigned char> image(w * h * 4);
+    for(size_t i = 0; i < image.size(); i++) image[i] = i & 255;
+    std::vector<unsigned char> png;
+    lodepng::State state;
+    state.info_png.srgb_defined = 1;
+    state.info_png.srgb_intent = 2;
+    error = lodepng::encode(png, &image[0], w, h, state);
+    ASSERT_NO_PNG_ERROR(error);
+
+    lodepng::State state2;
+    std::vector<unsigned char> image2;
+    error = lodepng::decode(image2, w, h, state2, png);
+    ASSERT_NO_PNG_ERROR(error);
+    assertColorProfileDataEqual(state, state2);
+    ASSERT_EQUALS(32, w);
+    ASSERT_EQUALS(32, h);
+    ASSERT_EQUALS(image.size(), image2.size());
+    for(size_t i = 0; i < image.size(); i++) ASSERT_EQUALS(image[i], image2[i]);
+  }
+ {
+    unsigned error;
+    unsigned w = 32, h = 32;
+    std::vector<unsigned char> image(w * h * 4);
+    for(size_t i = 0; i < image.size(); i++) image[i] = i & 255;
+    std::vector<unsigned char> png;
+    lodepng::State state;
+    state.info_png.iccp_defined = 1;
+    std::string testprofile = "0123456789abcdefRGB fake iccp profile for testing";
+    testprofile[0] = testprofile[1] = 0;
+    lodepng_set_icc(&state.info_png, "test", (const unsigned char*)testprofile.c_str(), testprofile.size());
+    error = lodepng::encode(png, &image[0], w, h, state);
+    ASSERT_NO_PNG_ERROR(error);
+
+    lodepng::State state2;
+    std::vector<unsigned char> image2;
+    error = lodepng::decode(image2, w, h, state2, png);
+    ASSERT_NO_PNG_ERROR(error);
+    assertColorProfileDataEqual(state, state2);
+    ASSERT_EQUALS(32, w);
+    ASSERT_EQUALS(32, h);
+    ASSERT_EQUALS(image.size(), image2.size());
+    for(size_t i = 0; i < image.size(); i++) ASSERT_EQUALS(image[i], image2[i]);
+  }
+
+  // greyscale ICC profile
+  {
+    unsigned error;
+    unsigned w = 32, h = 32;
+    std::vector<unsigned char> image(w * h * 4);
+    for(size_t i = 0; i + 4 < image.size(); i += 4) {
+      image[i] = image[i + 1] = image[i + 2] = image[i + 3] = i;
+    }
+    std::vector<unsigned char> png;
+    lodepng::State state;
+    state.info_png.iccp_defined = 1;
+    std::string testprofile = "0123456789abcdefGRAYfake iccp profile for testing";
+    testprofile[0] = testprofile[1] = 0;
+    lodepng_set_icc(&state.info_png, "test", (const unsigned char*)testprofile.c_str(), testprofile.size());
+    error = lodepng::encode(png, &image[0], w, h, state);
+    ASSERT_NO_PNG_ERROR(error);
+
+    lodepng::State state2;
+    std::vector<unsigned char> image2;
+    error = lodepng::decode(image2, w, h, state2, png);
+    ASSERT_NO_PNG_ERROR(error);
+    assertColorProfileDataEqual(state, state2);
+    ASSERT_EQUALS(32, w);
+    ASSERT_EQUALS(32, h);
+    ASSERT_EQUALS(image.size(), image2.size());
+    for(size_t i = 0; i < image.size(); i++) ASSERT_EQUALS(image[i], image2[i]);
+  }
+}
+
+// r, g, b is input background color to encoder, given in png color model
+// r2, g2, b2 is expected decoded background color, in color model it auto chose if auto_convert is on
+// pixels must be given in mode_raw color format
+void testBkgdChunk(unsigned r, unsigned g, unsigned b,
+                   unsigned r2, unsigned g2, unsigned b2,
+                   const std::vector<unsigned char>& pixels,
+                   unsigned w, unsigned h,
+                   const LodePNGColorMode& mode_raw,
+                   const LodePNGColorMode& mode_png,
+                   bool auto_convert, bool expect_encoder_error = false) {
+  unsigned error;
+
+  lodepng::State state;
+  LodePNGInfo& info = state.info_png;
+  lodepng_color_mode_copy(&info.color, &mode_png);
+  lodepng_color_mode_copy(&state.info_raw, &mode_raw);
+  state.encoder.auto_convert = auto_convert;
+
+  info.background_defined = 1;
+  info.background_r = r;
+  info.background_g = g;
+  info.background_b = b;
+
+  std::vector<unsigned char> png;
+  error = lodepng::encode(png, pixels, w, h, state);
+  if(expect_encoder_error) {
+    ASSERT_NOT_EQUALS(0, error);
+    return;
+  }
+  ASSERT_NO_PNG_ERROR(error);
+
+  lodepng::State state2;
+  LodePNGInfo& info2 = state2.info_png;
+  state2.info_raw.colortype = LCT_RGBA;
+  state2.info_raw.bitdepth = 16;
+  unsigned w2, h2;
+  std::vector<unsigned char> image2;
+  error = lodepng::decode(image2, w2, h2, state2, &png[0], png.size());
+  ASSERT_NO_PNG_ERROR(error);
+
+  ASSERT_EQUALS(w, w2);
+  ASSERT_EQUALS(h, h2);
+  ASSERT_EQUALS(1, info2.background_defined);
+  ASSERT_EQUALS(r2, info2.background_r);
+  ASSERT_EQUALS(g2, info2.background_g);
+  ASSERT_EQUALS(b2, info2.background_b);
+
+  // compare pixels in the "raw" color model
+  LodePNGColorMode mode_temp; lodepng_color_mode_init(&mode_temp); mode_temp.bitdepth = 16; mode_temp.colortype = LCT_RGBA;
+  std::vector<unsigned char> image3((w * h * lodepng_get_bpp(&mode_raw) + 7) / 8);
+  error = lodepng_convert(image3.data(), image2.data(), &mode_raw, &mode_temp, w, h);
+  ASSERT_NO_PNG_ERROR(error);
+  ASSERT_EQUALS(pixels.size(), image3.size());
+  for(size_t i = 0; i < image3.size(); i++) {
+    ASSERT_EQUALS((int)image3[i], (int)pixels[i]);
+  }
+}
+
+// r, g, b is input background color to encoder, given in png color model
+// r2, g2, b2 is expected decoded background color, in color model it auto chose if auto_convert is on
+void testBkgdChunk(unsigned r, unsigned g, unsigned b,
+                   unsigned r2, unsigned g2, unsigned b2,
+                   LodePNGColorType type_pixels, unsigned bitdepth_pixels,
+                   LodePNGColorType type_raw, unsigned bitdepth_raw,
+                   LodePNGColorType type_png, unsigned bitdepth_png,
+                   bool auto_convert, bool expect_encoder_error = false) {
+  unsigned error;
+  Image image;
+  generateTestImageRequiringColorType16(image, type_pixels, bitdepth_pixels, false);
+
+  LodePNGColorMode mode_raw; lodepng_color_mode_init(&mode_raw); mode_raw.bitdepth = bitdepth_raw; mode_raw.colortype = type_raw;
+  LodePNGColorMode mode_temp; lodepng_color_mode_init(&mode_temp); mode_temp.bitdepth = 16; mode_temp.colortype = LCT_RGBA;
+  LodePNGColorMode mode_png; lodepng_color_mode_init(&mode_png); mode_png.bitdepth = bitdepth_png; mode_png.colortype = type_png;
+  std::vector<unsigned char> temp((image.width * image.height * lodepng_get_bpp(&mode_raw) + 7) / 8);
+  error = lodepng_convert(temp.data(), image.data.data(), &mode_raw, &mode_temp, image.width, image.height);
+  ASSERT_NO_PNG_ERROR(error);
+  image.data = temp;
+
+  testBkgdChunk(r, g, b, r2, g2, b2,
+                image.data, image.width, image.height,
+                mode_raw, mode_png, auto_convert, expect_encoder_error);
+}
+
+void testBkgdChunk() {
+  std::cout << "testBkgdChunk" << std::endl;
+  // color param order is: generated, raw, png ( == bKGD)
+  // here generated means: what color values the pixels will get, so what auto_convert will make it choose
+  testBkgdChunk(255, 0, 0, 255, 0, 0, LCT_RGBA, 8, LCT_RGBA, 8, LCT_RGBA, 8, true);
+  testBkgdChunk(255, 0, 0, 255, 0, 0, LCT_RGBA, 8, LCT_RGB, 8, LCT_RGB, 8, true);
+  testBkgdChunk(255, 0, 0, 255, 0, 0, LCT_RGB, 8, LCT_RGB, 8, LCT_RGB, 8, true);
+  testBkgdChunk(255, 255, 255, 1, 1, 1, LCT_GREY, 1, LCT_RGB, 8, LCT_RGB, 8, true);
+  testBkgdChunk(255, 255, 255, 3, 3, 3, LCT_GREY, 2, LCT_RGB, 8, LCT_RGB, 8, true);
+  testBkgdChunk(255, 255, 255, 15, 15, 15, LCT_GREY, 4, LCT_RGB, 8, LCT_RGB, 8, true);
+  testBkgdChunk(255, 255, 255, 255, 255, 255, LCT_GREY, 8, LCT_RGB, 8, LCT_RGB, 8, true);
+  testBkgdChunk(255, 255, 255, 65535, 65535, 65535, LCT_GREY, 16, LCT_RGB, 16, LCT_RGB, 8, true);
+  testBkgdChunk(123, 0, 0, 123, 0, 0, LCT_GREY, 1, LCT_RGB, 8, LCT_RGB, 8, true);
+  testBkgdChunk(170, 170, 170, 2, 2, 2, LCT_GREY, 1, LCT_RGB, 8, LCT_RGB, 8, true); // 170 = value 2 in 2-bit
+
+  // without auto_convert. Note that it will still convert if different colortype is given for raw and png, it's just
+  // not automatic in that case.
+  testBkgdChunk(255, 0, 0, 255, 0, 0, LCT_RGBA, 8, LCT_RGBA, 8, LCT_RGBA, 8, false);
+  testBkgdChunk(60000, 0, 0, 60000, 0, 0, LCT_RGBA, 8, LCT_RGBA, 8, LCT_RGBA, 16, false);
+  testBkgdChunk(128, 128, 128, 128, 128, 128, LCT_GREY, 8, LCT_RGBA, 8, LCT_GREY, 8, false);
+ {
+    LodePNGColorMode pal;
+    lodepng_color_mode_init(&pal);
+    for(int i = 0; i < 200; i++) lodepng_palette_add(&pal, i, i / 2, 0, 255);
+    pal.colortype = LCT_PALETTE;
+    pal.bitdepth = 8;
+    unsigned w = 200;
+    unsigned h = 200;
+    std::vector<unsigned char> img(w * h);
+    for(unsigned y = 0; y < h; y++)
+    for(unsigned x = 0; x < w; x++) {
+      img[y * w + x] = x;
+    }
+
+    testBkgdChunk(100, 0, 0, 100, 100, 100, img, w, h, pal, pal, true, false);
+    testBkgdChunk(100, 0, 0, 100, 100, 100, img, w, h, pal, pal, false, false);
+    testBkgdChunk(250, 0, 0, 250, 250, 250, img, w, h, pal, pal, true, true);
+
+    std::vector<unsigned char> fourcolor(w * h);
+    for(unsigned y = 0; y < h; y++)
+    for(unsigned x = 0; x < w; x++) {
+      fourcolor[y * w + x] = x & 3;
+    }
+    // palette index 4 expected for output bKGD: auto_convert should turn the 200-sized
+    // palette in one of size 5, 4 values for the fourcolor image above, and then a 5th for
+    // the bkgd index. The other two 4's actually shouldn't matter, it's not defined what
+    // they should be though currently lodepng sets them also to the palette index...
+    testBkgdChunk(100, 0, 0, 4, 4, 4, fourcolor, w, h, pal, pal, true, false);
+
+
+    std::vector<unsigned char> mini(4);
+    mini[0] = 1; mini[1] = 2; mini[2] = 3; mini[3] = 4;
+    // here we expect RGB color from the output image, since the image is tiny so it chooses to not add PLTE
+    testBkgdChunk(100, 0, 0, 100, 50, 0, mini, 2, 2, pal, pal, true, false);
+
+    lodepng_color_mode_cleanup(&pal);
+  }
+}
+
+void testBkgdChunk2() {
+  std::cout << "testBkgdChunk2" << std::endl;
+  Image image;
+  generateTestImageRequiringColorType8(image, LCT_GREY, 2, false);
+
+  // without background, it should choose 2-bit grey for this PNG
+  std::vector<unsigned char> png0;
+  ASSERT_NO_PNG_ERROR(lodepng::encode(png0, image.data, image.width, image.height));
+  lodepng::State state0;
+  unsigned w0, h0;
+  lodepng_inspect(&w0, &h0, &state0, png0.data(), png0.size());
+  ASSERT_EQUALS(2, state0.info_png.color.bitdepth);
+  ASSERT_EQUALS(LCT_GREY, state0.info_png.color.colortype);
+
+  // red background, with auto_convert, it is forced to choose RGB
+  lodepng::State state;
+  LodePNGInfo& info = state.info_png;
+  info.background_defined = 1;
+  info.background_r = 255;
+  info.background_g = 0;
+  info.background_b = 0;
+  std::vector<unsigned char> png1;
+  ASSERT_NO_PNG_ERROR(lodepng::encode(png1, image.data, image.width, image.height, state));
+  lodepng::State state1;
+  unsigned w1, h1;
+  lodepng_inspect(&w1, &h1, &state1, png1.data(), png1.size());
+  ASSERT_EQUALS(8, state1.info_png.color.bitdepth);
+  ASSERT_EQUALS(LCT_RGB, state1.info_png.color.colortype);
+
+  // grey output required, background color also interpreted as grey
+  state.info_raw.colortype = LCT_RGB;
+  state.info_png.color.colortype = LCT_GREY;
+  state.info_png.color.bitdepth = 1;
+  state.encoder.auto_convert = 0;
+  info.background_defined = 1;
+  info.background_r = 1;
+  info.background_g = 1;
+  info.background_b = 1;
+  std::vector<unsigned char> png2;
+  ASSERT_NO_PNG_ERROR(lodepng::encode(png2, image.data, image.width, image.height, state));
+  lodepng::State state2;
+  unsigned w2, h2;
+  lodepng_inspect(&w2, &h2, &state2, png2.data(), png2.size());
+  ASSERT_EQUALS(1, state2.info_png.color.bitdepth);
+  ASSERT_EQUALS(LCT_GREY, state2.info_png.color.colortype);
+}
+
+void doMain() {
+  //PNG
+  testPNGCodec();
+  testPngSuiteTiny();
+  testPaletteFilterTypesZero();
+  testComplexPNG();
+  testInspectChunk();
+  testPredefinedFilters();
+  testFuzzing();
+  testEncoderErrors();
+  testPaletteToPaletteDecode();
+  testPaletteToPaletteDecode2();
+  testColorProfile();
+  testBkgdChunk();
+  testBkgdChunk2();
+
+  //Colors
+#ifndef DISABLE_SLOW
+  testFewColors();
+#endif // DISABLE_SLOW
+  testColorKeyConvert();
+  testColorConvert();
+  testColorConvert2();
+  testPaletteToPaletteConvert();
+  testRGBToPaletteConvert();
+  test16bitColorEndianness();
+  testAutoColorModels();
+  testNoAutoConvert();
+
+  //Zlib
+  testCompressZlib();
+  testHuffmanCodeLengths();
+  testCustomZlibCompress();
+  testCustomZlibCompress2();
+  testCustomDeflate();
+  testCustomZlibDecompress();
+  testCustomInflate();
+
+  //lodepng_util
+  testChunkUtil();
+
+  std::cout << "\ntest successful" << std::endl;
+}
+
+int main() {
+  try {
+    doMain();
+  }
+  catch(...) {
+    std::cout << "error!" << std::endl;
+  }
+
+  return 0;
+}
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_util.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_util.cpp
new file mode 100755
index 0000000000..65a32b5282
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_util.cpp
@@ -0,0 +1,750 @@
+/*
+LodePNG Utils
+
+Copyright (c) 2005-2018 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#include "lodepng_util.h"
+#include <iostream>
+#include <cmath>
+
+namespace lodepng {
+
+LodePNGInfo getPNGHeaderInfo(const std::vector<unsigned char>& png) {
+  unsigned w, h;
+  lodepng::State state;
+  lodepng_inspect(&w, &h, &state, &png[0], png.size());
+  return state.info_png;
+}
+
+unsigned getChunkInfo(std::vector<std::string>& names, std::vector<size_t>& sizes,
+                      const std::vector<unsigned char>& png) {
+  // Listing chunks is based on the original file, not the decoded png info.
+  const unsigned char *chunk, *begin, *end, *next;
+  end = &png.back() + 1;
+  begin = chunk = &png.front() + 8;
+
+  while(chunk + 8 < end && chunk >= begin) {
+    char type[5];
+    lodepng_chunk_type(type, chunk);
+    if(std::string(type).size() != 4) return 1;
+
+    unsigned length = lodepng_chunk_length(chunk);
+    names.push_back(type);
+    sizes.push_back(length);
+    if(chunk + length + 12 > end) return 1;
+
+    next = lodepng_chunk_next_const(chunk);
+    if (next <= chunk) return 1; // integer overflow
+    chunk = next;
+  }
+  return 0;
+}
+
+unsigned getChunks(std::vector<std::string> names[3],
+                   std::vector<std::vector<unsigned char> > chunks[3],
+                   const std::vector<unsigned char>& png) {
+  const unsigned char *chunk, *next, *begin, *end;
+  end = &png.back() + 1;
+  begin = chunk = &png.front() + 8;
+
+  int location = 0;
+
+  while(chunk + 8 < end && chunk >= begin) {
+    char type[5];
+    lodepng_chunk_type(type, chunk);
+    std::string name(type);
+    if(name.size() != 4) return 1;
+
+    next = lodepng_chunk_next_const(chunk);
+    if (next <= chunk) return 1; // integer overflow
+
+    if(name == "IHDR") {
+      location = 0;
+    } else if(name == "PLTE") {
+      location = 1;
+    } else if(name == "IDAT") {
+      location = 2;
+    } else if(name == "IEND") {
+      break; // anything after IEND is not part of the PNG or the 3 groups here.
+    } else {
+      if(next > end) return 1; // invalid chunk, content too far
+      names[location].push_back(name);
+      chunks[location].push_back(std::vector<unsigned char>(chunk, next));
+    }
+
+    chunk = next;
+  }
+  return 0;
+}
+
+
+unsigned insertChunks(std::vector<unsigned char>& png,
+                      const std::vector<std::vector<unsigned char> > chunks[3]) {
+  const unsigned char *chunk, *next, *begin, *end;
+  end = &png.back() + 1;
+  begin = chunk = &png.front() + 8;
+
+  long l0 = 0; //location 0: IHDR-l0-PLTE (or IHDR-l0-l1-IDAT)
+  long l1 = 0; //location 1: PLTE-l1-IDAT (or IHDR-l0-l1-IDAT)
+  long l2 = 0; //location 2: IDAT-l2-IEND
+
+  while(chunk + 8 < end && chunk >= begin) {
+    char type[5];
+    lodepng_chunk_type(type, chunk);
+    std::string name(type);
+    if(name.size() != 4) return 1;
+
+    next = lodepng_chunk_next_const(chunk);
+    if (next <= chunk) return 1; // integer overflow
+
+    if(name == "PLTE") {
+      if(l0 == 0) l0 = chunk - begin + 8;
+    } else if(name == "IDAT") {
+      if(l0 == 0) l0 = chunk - begin + 8;
+      if(l1 == 0) l1 = chunk - begin + 8;
+    } else if(name == "IEND") {
+      if(l2 == 0) l2 = chunk - begin + 8;
+    }
+
+    chunk = next;
+  }
+
+  std::vector<unsigned char> result;
+  result.insert(result.end(), png.begin(), png.begin() + l0);
+  for(size_t i = 0; i < chunks[0].size(); i++) result.insert(result.end(), chunks[0][i].begin(), chunks[0][i].end());
+  result.insert(result.end(), png.begin() + l0, png.begin() + l1);
+  for(size_t i = 0; i < chunks[1].size(); i++) result.insert(result.end(), chunks[1][i].begin(), chunks[1][i].end());
+  result.insert(result.end(), png.begin() + l1, png.begin() + l2);
+  for(size_t i = 0; i < chunks[2].size(); i++) result.insert(result.end(), chunks[2][i].begin(), chunks[2][i].end());
+  result.insert(result.end(), png.begin() + l2, png.end());
+
+  png = result;
+  return 0;
+}
+
+unsigned getFilterTypesInterlaced(std::vector<std::vector<unsigned char> >& filterTypes,
+                                  const std::vector<unsigned char>& png) {
+  //Get color type and interlace type
+  lodepng::State state;
+  unsigned w, h;
+  unsigned error;
+  error = lodepng_inspect(&w, &h, &state, &png[0], png.size());
+
+  if(error) return 1;
+
+  //Read literal data from all IDAT chunks
+  const unsigned char *chunk, *begin, *end, *next;
+  end = &png.back() + 1;
+  begin = chunk = &png.front() + 8;
+
+  std::vector<unsigned char> zdata;
+
+  while(chunk + 8 < end && chunk >= begin) {
+    char type[5];
+    lodepng_chunk_type(type, chunk);
+    if(std::string(type).size() != 4) break; //Probably not a PNG file
+
+    if(std::string(type) == "IDAT") {
+      const unsigned char* cdata = lodepng_chunk_data_const(chunk);
+      unsigned clength = lodepng_chunk_length(chunk);
+      if(chunk + clength + 12 > end || clength > png.size() || chunk + clength + 12 < begin) {
+        // corrupt chunk length
+        return 1;
+      }
+
+      for(unsigned i = 0; i < clength; i++) {
+        zdata.push_back(cdata[i]);
+      }
+    }
+
+    next = lodepng_chunk_next_const(chunk);
+    if (next <= chunk) break; // integer overflow
+    chunk = next;
+  }
+
+  //Decompress all IDAT data (if the while loop ended early, this might fail)
+  std::vector<unsigned char> data;
+  error = lodepng::decompress(data, &zdata[0], zdata.size());
+
+  if(error) return 1;
+
+  if(state.info_png.interlace_method == 0) {
+    filterTypes.resize(1);
+
+    //A line is 1 filter byte + all pixels
+    size_t linebytes = 1 + lodepng_get_raw_size(w, 1, &state.info_png.color);
+
+    for(size_t i = 0; i < data.size(); i += linebytes) {
+      filterTypes[0].push_back(data[i]);
+    }
+  } else {
+    //Interlaced
+    filterTypes.resize(7);
+    static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
+    static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
+    static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
+    static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
+    size_t pos = 0;
+    for(size_t j = 0; j < 7; j++) {
+      unsigned w2 = (w - ADAM7_IX[j] + ADAM7_DX[j] - 1) / ADAM7_DX[j];
+      unsigned h2 = (h - ADAM7_IY[j] + ADAM7_DY[j] - 1) / ADAM7_DY[j];
+      if(ADAM7_IX[j] >= w) w2 = 0;
+      if(ADAM7_IY[j] >= h) h2 = 0;
+      size_t linebytes = 1 + lodepng_get_raw_size(w2, 1, &state.info_png.color);
+      for(size_t i = 0; i < h2; i++) {
+        filterTypes[j].push_back(data[pos]);
+        pos += linebytes;
+      }
+    }
+  }
+  return 0; /* OK */
+}
+
+
+unsigned getFilterTypes(std::vector<unsigned char>& filterTypes, const std::vector<unsigned char>& png) {
+  std::vector<std::vector<unsigned char> > passes;
+  unsigned error = getFilterTypesInterlaced(passes, png);
+  if(error) return error;
+
+  if(passes.size() == 1) {
+    filterTypes.swap(passes[0]);
+  } else {
+    lodepng::State state;
+    unsigned w, h;
+    lodepng_inspect(&w, &h, &state, &png[0], png.size());
+    /*
+    Interlaced. Simplify it: put pass 6 and 7 alternating in the one vector so
+    that one filter per scanline of the uninterlaced image is given, with that
+    filter corresponding the closest to what it would be for non-interlaced
+    image.
+    */
+    for(size_t i = 0; i < h; i++) {
+      filterTypes.push_back(i % 2 == 0 ? passes[5][i / 2] : passes[6][i / 2]);
+    }
+  }
+  return 0; /* OK */
+}
+
+int getPaletteValue(const unsigned char* data, size_t i, int bits) {
+  if(bits == 8) return data[i];
+  else if(bits == 4) return (data[i / 2] >> ((i % 2) * 4)) & 15;
+  else if(bits == 2) return (data[i / 4] >> ((i % 4) * 2)) & 3;
+  else if(bits == 1) return (data[i / 8] >> (i % 8)) & 1;
+  else return 0;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+// Multiplies values with 3x3 matrix
+void mulMatrix(float* x2, float* y2, float* z2, const float* m, float x, float y, float z) {
+  *x2 = x * m[0] + y * m[1] + z * m[2];
+  *y2 = x * m[3] + y * m[4] + z * m[5];
+  *z2 = x * m[6] + y * m[7] + z * m[8];
+}
+
+// Inverts 3x3 matrix in place
+void invMatrix(float* matrix) {
+  float e0 = matrix[4] * matrix[8] - matrix[5] * matrix[7];
+  float e3 = matrix[5] * matrix[6] - matrix[3] * matrix[8];
+  float e6 = matrix[3] * matrix[7] - matrix[4] * matrix[6];
+  // inverse determinant
+  float f = 1.0f / (matrix[0] * e0 + matrix[1] * e3 + matrix[2] * e6);
+  float result[9];
+  result[0] = e0 * f;
+  result[1] = (matrix[2] * matrix[7] - matrix[1] * matrix[8]) * f;
+  result[2] = (matrix[1] * matrix[5] - matrix[2] * matrix[4]) * f;
+  result[3] = e3 * f;
+  result[4] = (matrix[0] * matrix[8] - matrix[2] * matrix[6]) * f;
+  result[5] = (matrix[3] * matrix[2] - matrix[0] * matrix[5]) * f;
+  result[6] = e6 * f;
+  result[7] = (matrix[6] * matrix[1] - matrix[0] * matrix[7]) * f;
+  result[8] = (matrix[0] * matrix[4] - matrix[3] * matrix[1]) * f;
+  for(int i = 0; i < 9; i++) matrix[i] = result[i];
+}
+
+// Get the matrix to go from linear RGB to XYZ given the RGB whitepoint and chromaticities in xy colorspace
+void getChrmMatrix(float* m, float wx, float wy, float rx, float ry, float gx, float gy, float bx, float by) {
+  float wX = wx / wy, wY = 1, wZ = (1 - wx - wy) / wy;
+  float rX = rx / ry, rY = 1, rZ = (1 - rx - ry) / ry;
+  float gX = gx / gy, gY = 1, gZ = (1 - gx - gy) / gy;
+  float bX = bx / by, bY = 1, bZ = (1 - bx - by) / by;
+  float t[9] = {rX, gX, bX, rY, gY, bY, rZ, gZ, bZ};
+  invMatrix(t);
+  float rs, gs, bs;
+  mulMatrix(&rs, &gs, &bs, t, wX, wY, wZ);
+  float r[9] = {rs * rX, gs * gX, bs * bX, rs * rY, gs * gY, bs * bY, rs * rZ, gs * gZ, bs * bZ};
+  for(int i = 0; i < 9; i++) m[i] = r[i];
+}
+
+unsigned convertToXYZ(float* out, const unsigned char* in,
+                      unsigned w, unsigned h, const LodePNGColorMode* mode_in,
+                      const LodePNGInfo* info) {
+  std::vector<unsigned char> data(w * h * 8);
+  LodePNGColorMode mode16 = lodepng_color_mode_make(LCT_RGBA, 16);
+  lodepng_convert(data.data(), in, &mode16, mode_in, w, h);
+
+  if(info->iccp_defined && !info->gama_defined && !info->chrm_defined) {
+    return 1;  // fail: iCCP chunk not supported and no fallback gamma/chrm available
+  }
+
+  size_t n = w * h;
+  for(unsigned i = 0; i < n; i++) {
+    for(unsigned c = 0; c < 4; c++) {
+      size_t j = i * 8 + c * 2;
+      out[i * 4 + c] = (data[j + 0] * 256 + data[j + 1]) / 65535.0;
+    }
+  }
+
+  if(info->gama_defined && !info->srgb_defined) {
+    float gamma = 100000.0f / info->gama_gamma;
+    for(unsigned i = 0; i < n; i++) {
+      for(unsigned c = 0; c < 3; c++) {
+        out[i * 4 + c] = std::pow(out[i * 4 + c], gamma);
+      }
+    }
+  } else {
+    for(unsigned i = 0; i < n; i++) {
+      for(unsigned c = 0; c < 3; c++) {
+        // sRGB gamma expand
+        float& v = out[i * 4 + c];
+        if(v < 0.04045) v = c / 12.92;
+        else v = std::pow((v + 0.055) / 1.055, 2.4);
+      }
+    }
+  }
+
+  if(info->chrm_defined && !info->srgb_defined) {
+    float wx = info->chrm_white_x / 100000.0f, wy = info->chrm_white_y / 100000.0f;
+    float rx = info->chrm_red_x / 100000.0f, ry = info->chrm_red_y / 100000.0f;
+    float gx = info->chrm_green_x / 100000.0f, gy = info->chrm_green_y / 100000.0f;
+    float bx = info->chrm_blue_x / 100000.0f, by = info->chrm_blue_y / 100000.0f;
+    float m[9];
+    getChrmMatrix(m, wx, wy, rx, ry, gx, gy, bx, by);
+    for(unsigned i = 0; i < n; i++) {
+      size_t j = i * 4;
+      mulMatrix(&out[j + 0], &out[j + 1], &out[j + 2], m, out[j + 0], out[j + 1], out[j + 2]);
+    }
+  } else {
+    // linear sRGB to XYZ matrix
+    float m[9] = {0.4124564, 0.3575761, 0.1804375, 0.2126729, 0.7151522, 0.0721750, 0.0193339, 0.1191920, 0.9503041};
+    for(unsigned i = 0; i < n; i++) {
+      size_t j = i * 4;
+      mulMatrix(&out[j + 0], &out[j + 1], &out[j + 2], m, out[j + 0], out[j + 1], out[j + 2]);
+    }
+  }
+
+  return 0; // ok
+}
+
+unsigned convertFromXYZ(unsigned char* out, const float* in,
+                        unsigned w, unsigned h, const LodePNGColorMode* mode_out,
+                        const LodePNGInfo* info) {
+  std::vector<float> im(in, in + w * h * 4);
+  std::vector<unsigned char> data(w * h * 8);
+
+  if(info->iccp_defined && !info->gama_defined && !info->chrm_defined) {
+    return 1;  // fail: iCCP chunk not supported and no fallback gamma/chrm available
+  }
+
+  size_t n = w * h;
+
+  if(info->chrm_defined && !info->srgb_defined) {
+    float wx = info->chrm_white_x / 100000.0f, wy = info->chrm_white_y / 100000.0f;
+    float rx = info->chrm_red_x / 100000.0f, ry = info->chrm_red_y / 100000.0f;
+    float gx = info->chrm_green_x / 100000.0f, gy = info->chrm_green_y / 100000.0f;
+    float bx = info->chrm_blue_x / 100000.0f, by = info->chrm_blue_y / 100000.0f;
+    float m[9];
+    getChrmMatrix(m, wx, wy, rx, ry, gx, gy, bx, by);
+    invMatrix(m);
+    for(unsigned i = 0; i < n; i++) {
+      size_t j = i * 4;
+      mulMatrix(&im[j + 0], &im[j + 1], &im[j + 2], m, im[j + 0], im[j + 1], im[j + 2]);
+    }
+  } else {
+    // XYZ to linear sRGB matrix
+    float m[9] = {3.2404542, -1.5371385, -0.4985314, -0.9692660, 1.8760108, 0.0415560,
+                  0.0556434, -0.2040259, 1.0572252};
+    for(unsigned i = 0; i < n; i++) {
+      size_t j = i * 4;
+      mulMatrix(&im[j + 0], &im[j + 1], &im[j + 2], m, im[j + 0], im[j + 1], im[j + 2]);
+    }
+  }
+
+  if(info->gama_defined && !info->srgb_defined) {
+    float gamma = info->gama_gamma / 100000.0f;
+    for(unsigned i = 0; i < n; i++) {
+      for(unsigned c = 0; c < 3; c++) {
+        im[i * 4 + c] = std::pow(im[i * 4 + c], gamma);
+      }
+    }
+  } else {
+    for(unsigned i = 0; i < n; i++) {
+      for(unsigned c = 0; c < 3; c++) {
+        // sRGB gamma compress
+        float& v = im[i * 4 + c];
+        if(v < 0.0031308) v *= 12.92;
+        else v = 1.055 * std::pow(v, 1/2.4) - 0.055;
+      }
+    }
+  }
+
+  for(unsigned i = 0; i < n; i++) {
+    for(unsigned c = 0; c < 4; c++) {
+      size_t j = i * 8 + c * 2;
+      int i16 = (int)(0.5 + 65535.0 * std::min(std::max(0.0f, im[i * 4 + c]), 1.0f));
+      data[j + 0] = i16 >> 8;
+      data[j + 1] = i16 & 255;
+    }
+  }
+
+  LodePNGColorMode mode16 = lodepng_color_mode_make(LCT_RGBA, 16);
+  lodepng_convert(out, data.data(), mode_out, &mode16, w, h);
+
+  return 0; // ok
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+//This uses a stripped down version of picoPNG to extract detailed zlib information while decompressing.
+static const unsigned long LENBASE[29] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258};
+static const unsigned long LENEXTRA[29] = {0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,  4,  5,  5,  5,  5,  0};
+static const unsigned long DISTBASE[30] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
+static const unsigned long DISTEXTRA[30] = {0,0,0,0,1,1,2, 2, 3, 3, 4, 4, 5, 5,  6,  6,  7,  7,  8,  8,   9,   9,  10,  10,  11,  11,  12,   12,   13,   13};
+static const unsigned long CLCL[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; //code length code lengths
+
+struct ExtractZlib { // Zlib decompression and information extraction
+  std::vector<ZlibBlockInfo>* zlibinfo;
+  ExtractZlib(std::vector<ZlibBlockInfo>* info) : zlibinfo(info) {};
+  int error;
+
+  unsigned long readBitFromStream(size_t& bitp, const unsigned char* bits) {
+    unsigned long result = (bits[bitp >> 3] >> (bitp & 0x7)) & 1;
+    bitp++;
+    return result;
+  }
+
+  unsigned long readBitsFromStream(size_t& bitp, const unsigned char* bits, size_t nbits) {
+    unsigned long result = 0;
+    for(size_t i = 0; i < nbits; i++) result += (readBitFromStream(bitp, bits)) << i;
+    return result;
+  }
+
+  struct HuffmanTree {
+    int makeFromLengths(const std::vector<unsigned long>& bitlen, unsigned long maxbitlen) { //make tree given the lengths
+      unsigned long numcodes = (unsigned long)(bitlen.size()), treepos = 0, nodefilled = 0;
+      std::vector<unsigned long> tree1d(numcodes), blcount(maxbitlen + 1, 0), nextcode(maxbitlen + 1, 0);
+      //count number of instances of each code length
+      for(unsigned long bits = 0; bits < numcodes; bits++) blcount[bitlen[bits]]++;
+      for(unsigned long bits = 1; bits <= maxbitlen; bits++) {
+        nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) << 1;
+      }
+      //generate all the codes
+      for(unsigned long n = 0; n < numcodes; n++) if(bitlen[n] != 0) tree1d[n] = nextcode[bitlen[n]]++;
+      tree2d.clear(); tree2d.resize(numcodes * 2, 32767); //32767 here means the tree2d isn't filled there yet
+      for(unsigned long n = 0; n < numcodes; n++) //the codes
+      for(unsigned long i = 0; i < bitlen[n]; i++) { //the bits for this code
+        unsigned long bit = (tree1d[n] >> (bitlen[n] - i - 1)) & 1;
+        if(treepos > numcodes - 2) return 55;
+        if(tree2d[2 * treepos + bit] == 32767) { //not yet filled in
+          if(i + 1 == bitlen[n]) {
+            //last bit
+            tree2d[2 * treepos + bit] = n;
+            treepos = 0;
+          } else {
+            //addresses are encoded as values > numcodes
+            tree2d[2 * treepos + bit] = ++nodefilled + numcodes;
+            treepos = nodefilled;
+          }
+        }
+        else treepos = tree2d[2 * treepos + bit] - numcodes; //subtract numcodes from address to get address value
+      }
+      return 0;
+    }
+    int decode(bool& decoded, unsigned long& result, size_t& treepos, unsigned long bit) const { //Decodes a symbol from the tree
+      unsigned long numcodes = (unsigned long)tree2d.size() / 2;
+      if(treepos >= numcodes) return 11; //error: you appeared outside the codetree
+      result = tree2d[2 * treepos + bit];
+      decoded = (result < numcodes);
+      treepos = decoded ? 0 : result - numcodes;
+      return 0;
+    }
+    //2D representation of a huffman tree: one dimension is "0" or "1", the other contains all nodes and leaves.
+    std::vector<unsigned long> tree2d;
+  };
+
+  void inflate(std::vector<unsigned char>& out, const std::vector<unsigned char>& in, size_t inpos = 0) {
+    size_t bp = 0, pos = 0; //bit pointer and byte pointer
+    error = 0;
+    unsigned long BFINAL = 0;
+    while(!BFINAL && !error) {
+      size_t uncomprblockstart = pos;
+      size_t bpstart = bp;
+      if(bp >> 3 >= in.size()) { error = 52; return; } //error, bit pointer will jump past memory
+      BFINAL = readBitFromStream(bp, &in[inpos]);
+      unsigned long BTYPE = readBitFromStream(bp, &in[inpos]); BTYPE += 2 * readBitFromStream(bp, &in[inpos]);
+      zlibinfo->resize(zlibinfo->size() + 1);
+      zlibinfo->back().btype = BTYPE;
+      if(BTYPE == 3) { error = 20; return; } //error: invalid BTYPE
+      else if(BTYPE == 0) inflateNoCompression(out, &in[inpos], bp, pos, in.size());
+      else inflateHuffmanBlock(out, &in[inpos], bp, pos, in.size(), BTYPE);
+      size_t uncomprblocksize = pos - uncomprblockstart;
+      zlibinfo->back().compressedbits = bp - bpstart;
+      zlibinfo->back().uncompressedbytes = uncomprblocksize;
+    }
+  }
+
+  void generateFixedTrees(HuffmanTree& tree, HuffmanTree& treeD) { //get the tree of a deflated block with fixed tree
+    std::vector<unsigned long> bitlen(288, 8), bitlenD(32, 5);;
+    for(size_t i = 144; i <= 255; i++) bitlen[i] = 9;
+    for(size_t i = 256; i <= 279; i++) bitlen[i] = 7;
+    tree.makeFromLengths(bitlen, 15);
+    treeD.makeFromLengths(bitlenD, 15);
+  }
+
+  //the code tree for Huffman codes, dist codes, and code length codes
+  HuffmanTree codetree, codetreeD, codelengthcodetree;
+  unsigned long huffmanDecodeSymbol(const unsigned char* in, size_t& bp, const HuffmanTree& tree, size_t inlength) {
+    //decode a single symbol from given list of bits with given code tree. return value is the symbol
+    bool decoded; unsigned long ct;
+    for(size_t treepos = 0;;) {
+      if((bp & 0x07) == 0 && (bp >> 3) > inlength) { error = 10; return 0; } //error: end reached without endcode
+      error = tree.decode(decoded, ct, treepos, readBitFromStream(bp, in));
+      if(error) return 0; //stop, an error happened
+      if(decoded) return ct;
+    }
+  }
+
+  void getTreeInflateDynamic(HuffmanTree& tree, HuffmanTree& treeD,
+                             const unsigned char* in, size_t& bp, size_t inlength) {
+    size_t bpstart = bp;
+    //get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree
+    std::vector<unsigned long> bitlen(288, 0), bitlenD(32, 0);
+    if(bp >> 3 >= inlength - 2) { error = 49; return; } //the bit pointer is or will go past the memory
+    size_t HLIT =  readBitsFromStream(bp, in, 5) + 257; //number of literal/length codes + 257
+    size_t HDIST = readBitsFromStream(bp, in, 5) + 1; //number of dist codes + 1
+    size_t HCLEN = readBitsFromStream(bp, in, 4) + 4; //number of code length codes + 4
+    zlibinfo->back().hlit = HLIT - 257;
+    zlibinfo->back().hdist = HDIST - 1;
+    zlibinfo->back().hclen = HCLEN - 4;
+    std::vector<unsigned long> codelengthcode(19); //lengths of tree to decode the lengths of the dynamic tree
+    for(size_t i = 0; i < 19; i++) codelengthcode[CLCL[i]] = (i < HCLEN) ? readBitsFromStream(bp, in, 3) : 0;
+    //code length code lengths
+    for(size_t i = 0; i < codelengthcode.size(); i++) zlibinfo->back().clcl.push_back(codelengthcode[i]);
+    error = codelengthcodetree.makeFromLengths(codelengthcode, 7); if(error) return;
+    size_t i = 0, replength;
+    while(i < HLIT + HDIST) {
+      unsigned long code = huffmanDecodeSymbol(in, bp, codelengthcodetree, inlength); if(error) return;
+      zlibinfo->back().treecodes.push_back(code); //tree symbol code
+      if(code <= 15)  { if(i < HLIT) bitlen[i++] = code; else bitlenD[i++ - HLIT] = code; } //a length code
+      else if(code == 16) { //repeat previous
+        if(bp >> 3 >= inlength) { error = 50; return; } //error, bit pointer jumps past memory
+        replength = 3 + readBitsFromStream(bp, in, 2);
+        unsigned long value; //set value to the previous code
+        if((i - 1) < HLIT) value = bitlen[i - 1];
+        else value = bitlenD[i - HLIT - 1];
+        for(size_t n = 0; n < replength; n++) { //repeat this value in the next lengths
+          if(i >= HLIT + HDIST) { error = 13; return; } //error: i is larger than the amount of codes
+          if(i < HLIT) bitlen[i++] = value; else bitlenD[i++ - HLIT] = value;
+        }
+      } else if(code == 17) { //repeat "0" 3-10 times
+        if(bp >> 3 >= inlength) { error = 50; return; } //error, bit pointer jumps past memory
+        replength = 3 + readBitsFromStream(bp, in, 3);
+        zlibinfo->back().treecodes.push_back(replength); //tree symbol code repetitions
+        for(size_t n = 0; n < replength; n++) { //repeat this value in the next lengths
+          if(i >= HLIT + HDIST) { error = 14; return; } //error: i is larger than the amount of codes
+          if(i < HLIT) bitlen[i++] = 0; else bitlenD[i++ - HLIT] = 0;
+        }
+      } else if(code == 18) { //repeat "0" 11-138 times
+        if(bp >> 3 >= inlength) { error = 50; return; } //error, bit pointer jumps past memory
+        replength = 11 + readBitsFromStream(bp, in, 7);
+        zlibinfo->back().treecodes.push_back(replength); //tree symbol code repetitions
+        for(size_t n = 0; n < replength; n++) { //repeat this value in the next lengths
+          if(i >= HLIT + HDIST) { error = 15; return; } //error: i is larger than the amount of codes
+          if(i < HLIT) bitlen[i++] = 0; else bitlenD[i++ - HLIT] = 0;
+        }
+      }
+      else { error = 16; return; } //error: somehow an unexisting code appeared. This can never happen.
+    }
+    if(bitlen[256] == 0) { error = 64; return; } //the length of the end code 256 must be larger than 0
+    error = tree.makeFromLengths(bitlen, 15);
+    if(error) return; //now we've finally got HLIT and HDIST, so generate the code trees, and the function is done
+    error = treeD.makeFromLengths(bitlenD, 15);
+    if(error) return;
+    zlibinfo->back().treebits = bp - bpstart;
+    //lit/len/end symbol lengths
+    for(size_t j = 0; j < bitlen.size(); j++) zlibinfo->back().litlenlengths.push_back(bitlen[j]);
+    //dist lengths
+    for(size_t j = 0; j < bitlenD.size(); j++) zlibinfo->back().distlengths.push_back(bitlenD[j]);
+  }
+
+  void inflateHuffmanBlock(std::vector<unsigned char>& out,
+                           const unsigned char* in, size_t& bp, size_t& pos, size_t inlength, unsigned long btype) {
+    size_t numcodes = 0, numlit = 0, numlen = 0; //for logging
+    if(btype == 1) { generateFixedTrees(codetree, codetreeD); }
+    else if(btype == 2) { getTreeInflateDynamic(codetree, codetreeD, in, bp, inlength); if(error) return; }
+    for(;;) {
+      unsigned long code = huffmanDecodeSymbol(in, bp, codetree, inlength); if(error) return;
+      numcodes++;
+      zlibinfo->back().lz77_lcode.push_back(code); //output code
+      zlibinfo->back().lz77_dcode.push_back(0);
+      zlibinfo->back().lz77_lbits.push_back(0);
+      zlibinfo->back().lz77_dbits.push_back(0);
+      zlibinfo->back().lz77_lvalue.push_back(0);
+      zlibinfo->back().lz77_dvalue.push_back(0);
+
+      if(code == 256) {
+        break; //end code
+      } else if(code <= 255) { //literal symbol
+        out.push_back((unsigned char)(code));
+        pos++;
+        numlit++;
+      } else if(code >= 257 && code <= 285) { //length code
+        size_t length = LENBASE[code - 257], numextrabits = LENEXTRA[code - 257];
+        if((bp >> 3) >= inlength) { error = 51; return; } //error, bit pointer will jump past memory
+        length += readBitsFromStream(bp, in, numextrabits);
+        unsigned long codeD = huffmanDecodeSymbol(in, bp, codetreeD, inlength); if(error) return;
+        if(codeD > 29) { error = 18; return; } //error: invalid dist code (30-31 are never used)
+        unsigned long dist = DISTBASE[codeD], numextrabitsD = DISTEXTRA[codeD];
+        if((bp >> 3) >= inlength) { error = 51; return; } //error, bit pointer will jump past memory
+        dist += readBitsFromStream(bp, in, numextrabitsD);
+        size_t start = pos, back = start - dist; //backwards
+        for(size_t i = 0; i < length; i++) {
+          out.push_back(out[back++]);
+          pos++;
+          if(back >= start) back = start - dist;
+        }
+        numlen++;
+        zlibinfo->back().lz77_dcode.back() = codeD; //output distance code
+        zlibinfo->back().lz77_lbits.back() = numextrabits; //output length extra bits
+        zlibinfo->back().lz77_dbits.back() = numextrabitsD; //output dist extra bits
+        zlibinfo->back().lz77_lvalue.back() = length; //output length
+        zlibinfo->back().lz77_dvalue.back() = dist; //output dist
+      }
+    }
+    zlibinfo->back().numlit = numlit; //output number of literal symbols
+    zlibinfo->back().numlen = numlen; //output number of length symbols
+  }
+
+  void inflateNoCompression(std::vector<unsigned char>& out,
+                            const unsigned char* in, size_t& bp, size_t& pos, size_t inlength) {
+    while((bp & 0x7) != 0) bp++; //go to first boundary of byte
+    size_t p = bp / 8;
+    if(p >= inlength - 4) { error = 52; return; } //error, bit pointer will jump past memory
+    unsigned long LEN = in[p] + 256u * in[p + 1], NLEN = in[p + 2] + 256u * in[p + 3]; p += 4;
+    if(LEN + NLEN != 65535) { error = 21; return; } //error: NLEN is not one's complement of LEN
+    if(p + LEN > inlength) { error = 23; return; } //error: reading outside of in buffer
+    for(unsigned long n = 0; n < LEN; n++) {
+      out.push_back(in[p++]); //read LEN bytes of literal data
+      pos++;
+    }
+    bp = p * 8;
+  }
+
+  int decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in) { //returns error value
+    if(in.size() < 2) { return 53; } //error, size of zlib data too small
+    //error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way
+    if((in[0] * 256 + in[1]) % 31 != 0) { return 24; }
+    unsigned long CM = in[0] & 15, CINFO = (in[0] >> 4) & 15, FDICT = (in[1] >> 5) & 1;
+    //error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec
+    if(CM != 8 || CINFO > 7) { return 25; }
+    //error: the PNG spec says about the zlib stream: "The additional flags shall not specify a preset dictionary."
+    if(FDICT != 0) { return 26; }
+    inflate(out, in, 2);
+    return error; //note: adler32 checksum was skipped and ignored
+  }
+};
+
+struct ExtractPNG { //PNG decoding and information extraction
+  std::vector<ZlibBlockInfo>* zlibinfo;
+  ExtractPNG(std::vector<ZlibBlockInfo>* info) : zlibinfo(info) {};
+  int error;
+  void decode(const unsigned char* in, size_t size) {
+    error = 0;
+    if(size == 0 || in == 0) { error = 48; return; } //the given data is empty
+    readPngHeader(&in[0], size); if(error) return;
+    size_t pos = 33; //first byte of the first chunk after the header
+    std::vector<unsigned char> idat; //the data from idat chunks
+    bool IEND = false;
+    //loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
+    //IDAT data is put at the start of the in buffer
+    while(!IEND) {
+      //error: size of the in buffer too small to contain next chunk
+      if(pos + 8 >= size) { error = 30; return; }
+      size_t chunkLength = read32bitInt(&in[pos]); pos += 4;
+      if(chunkLength > 2147483647) { error = 63; return; }
+      //error: size of the in buffer too small to contain next chunk
+      if(pos + chunkLength >= size) { error = 35; return; }
+      //IDAT chunk, containing compressed image data
+      if(in[pos + 0] == 'I' && in[pos + 1] == 'D' && in[pos + 2] == 'A' && in[pos + 3] == 'T') {
+        idat.insert(idat.end(), &in[pos + 4], &in[pos + 4 + chunkLength]);
+        pos += (4 + chunkLength);
+      } else if(in[pos + 0] == 'I' && in[pos + 1] == 'E' && in[pos + 2] == 'N' && in[pos + 3] == 'D') {
+          pos += 4;
+          IEND = true;
+      } else { //it's not an implemented chunk type, so ignore it: skip over the data
+        pos += (chunkLength + 4); //skip 4 letters and uninterpreted data of unimplemented chunk
+      }
+      pos += 4; //step over CRC (which is ignored)
+    }
+    std::vector<unsigned char> out; //now the out buffer will be filled
+    ExtractZlib zlib(zlibinfo); //decompress with the Zlib decompressor
+    error = zlib.decompress(out, idat);
+    if(error) return; //stop if the zlib decompressor returned an error
+  }
+
+  //read the information from the header and store it in the Info
+  void readPngHeader(const unsigned char* in, size_t inlength) {
+    if(inlength < 29) { error = 27; return; } //error: the data length is smaller than the length of the header
+    if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
+    || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10) { error = 28; return; } //no PNG signature
+    //error: it doesn't start with a IHDR chunk!
+    if(in[12] != 'I' || in[13] != 'H' || in[14] != 'D' || in[15] != 'R') { error = 29; return; }
+  }
+
+  unsigned long readBitFromReversedStream(size_t& bitp, const unsigned char* bits) {
+    unsigned long result = (bits[bitp >> 3] >> (7 - (bitp & 0x7))) & 1;
+    bitp++;
+    return result;
+  }
+
+  unsigned long readBitsFromReversedStream(size_t& bitp, const unsigned char* bits, unsigned long nbits) {
+    unsigned long result = 0;
+    for(size_t i = nbits - 1; i < nbits; i--) result += ((readBitFromReversedStream(bitp, bits)) << i);
+    return result;
+  }
+
+  void setBitOfReversedStream(size_t& bitp, unsigned char* bits, unsigned long bit) {
+    bits[bitp >> 3] |=  (bit << (7 - (bitp & 0x7))); bitp++;
+  }
+
+  unsigned long read32bitInt(const unsigned char* buffer) {
+    return (unsigned int)((buffer[0] << 24u) | (buffer[1] << 16u) | (buffer[2] << 8u) | buffer[3]);
+  }
+};
+
+void extractZlibInfo(std::vector<ZlibBlockInfo>& zlibinfo, const std::vector<unsigned char>& in) {
+  ExtractPNG decoder(&zlibinfo);
+  decoder.decode(&in[0], in.size());
+
+  if(decoder.error) std::cout << "extract error: " << decoder.error << std::endl;
+}
+
+} // namespace lodepng
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_util.h b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_util.h
new file mode 100755
index 0000000000..305951fcc2
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/lodepng_util.h
@@ -0,0 +1,206 @@
+/*
+LodePNG Utils
+
+Copyright (c) 2005-2018 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+Extra C++ utilities for LodePNG, for convenience.
+Not part of the stable API of lodepng, more loose separate utils.
+*/
+
+#ifndef LODEPNG_UTIL_H
+#define LODEPNG_UTIL_H
+
+#include <string>
+#include <vector>
+#include "lodepng.h"
+
+namespace lodepng {
+
+/*
+Returns info from the header of the PNG by value, purely for convenience.
+Does NOT check for errors. Returns bogus info if the PNG has an error.
+Does not require cleanup of allocated memory because no palette or text chunk
+info is in the LodePNGInfo object after checking only the header of the PNG.
+*/
+LodePNGInfo getPNGHeaderInfo(const std::vector<unsigned char>& png);
+
+/*
+Get the names and sizes of all chunks in the PNG file.
+Returns 0 if ok, non-0 if error happened.
+*/
+unsigned getChunkInfo(std::vector<std::string>& names, std::vector<size_t>& sizes,
+                      const std::vector<unsigned char>& png);
+
+/*
+Returns the names and full chunks (including the name and everything else that
+makes up the chunk) for all chunks except IHDR, PLTE, IDAT and IEND.
+It separates the chunks into 3 separate lists, representing the chunks between
+certain critical chunks: 0: IHDR-PLTE, 1: PLTE-IDAT, 2: IDAT-IEND
+Returns 0 if ok, non-0 if error happened.
+*/
+unsigned getChunks(std::vector<std::string> names[3],
+                   std::vector<std::vector<unsigned char> > chunks[3],
+                   const std::vector<unsigned char>& png);
+
+/*
+Inserts chunks into the given png file. The chunks must be fully encoded,
+including length, type, content and CRC.
+The array index determines where it goes:
+0: between IHDR and PLTE, 1: between PLTE and IDAT, 2: between IDAT and IEND.
+They're appended at the end of those locations within the PNG.
+Returns 0 if ok, non-0 if error happened.
+*/
+unsigned insertChunks(std::vector<unsigned char>& png,
+                      const std::vector<std::vector<unsigned char> > chunks[3]);
+
+/*
+Get the filtertypes of each scanline in this PNG file.
+Returns 0 if ok, 1 if PNG decoding error happened.
+
+For a non-interlaced PNG, it returns one filtertype per scanline, in order.
+
+For interlaced PNGs, it returns a result as if it's not interlaced. It returns
+one filtertype per scanline, in order. The values match pass 6 and 7 of the
+Adam7 interlacing, alternating between the two, so that the values correspond
+the most to their scanlines.
+*/
+unsigned getFilterTypes(std::vector<unsigned char>& filterTypes, const std::vector<unsigned char>& png);
+
+/*
+Get the filtertypes of each scanline in every interlace pass this PNG file.
+Returns 0 if ok, 1 if PNG decoding error happened.
+
+For a non-interlaced PNG, it returns one filtertype per scanline, in order, in
+a single std::vector in filterTypes.
+
+For an interlaced PNG, it returns 7 std::vectors in filterTypes, one for each
+Adam7 pass. The amount of values per pass can be calculated as follows, where
+w and h are the size of the image and all divisions are integer divisions:
+pass 1: (h + 7) / 8
+pass 2: w <= 4 ? 0 : (h + 7) / 8
+pass 3: h <= 4 ? 0 : (h + 7) / 8
+pass 4: w <= 2 ? 0 : (h + 3) / 4
+pass 5: h <= 2 ? 0 : (h + 3) / 4
+pass 6: w <= 1 ? 0 : (h + 1) / 2
+pass 7: h <= 1 ? 0 : (h + 1) / 2
+*/
+unsigned getFilterTypesInterlaced(std::vector<std::vector<unsigned char> >& filterTypes,
+                                  const std::vector<unsigned char>& png);
+
+/*
+Returns the value of the i-th pixel in an image with 1, 2, 4 or 8-bit color.
+E.g. if bits is 4 and i is 5, it returns the 5th nibble (4-bit group), which
+is the second half of the 3th byte, in big endian (PNG's endian order).
+*/
+int getPaletteValue(const unsigned char* data, size_t i, int bits);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*
+Converts the RGB color to XYZ color given the color profile chunks in the PNG info.
+Supports the gAMA, cHRM and sRGB colorimetry chunks, but not iCCP. If no colometry chunks are present,
+it assumes the format is sRGB.
+For more information, see the chunk specifications in the PNG specification.
+Some background:
+A PNG image contains RGB data inside, but this data may use a specific RGB model (by default sRGB but
+different if colorimetry chunks are given).
+The computer display and/or operating system can have another RGB model (typically sRGB, but not necessarily).
+The PNG chunks describe what format the data inside has, not the format of the display. To correctly
+display a PNG image on a display, a conversion is needed if their models differ.
+Some options to achieve that are:
+*) If your use case already supports color management on its own, you can give it the RGB values straight from
+   the PNG image and give it the information from the cHRM, gAMA, sRGB and iCCP chunks (which you can find
+   in the LodePNGInfo), and the color management should then handle it correctly for you. You don't need
+   this function here in that case.
+*) If your use case does not support color management, you may instead want to give it the RGB values in a
+   consistent color model, such as sRGB, but the PNG does not necessarily have it in this desired model.
+   In that case, use the function below (or similar other function from elsewhere, e.g. one that supports
+   iCCP too) to convert it to the absolute color space XYZ, and then you can convert it from XYZ to sRGB
+   or any other desired color space easily (since XYZ is absolute), e.g. with the counterpart convertFromXYZ
+   further below.
+Parameters:
+*) out: 4 floats per pixel, X,Y,Z,alpha color format, in range 0-1 (normally), must be allocated to
+        have 4 * w * h floats available.
+*) in: input RGB color, in byte format given by mode_in and RGB model given by info
+*) w, h: image size
+*) mode_in: byte format of in (amount of channels, bit depth)
+*) info: PNG info with possibly an RGB color model in cHRM,gAMA and/or sRGB chunks
+*) return value: 0 if ok, positive value if error
+*/
+unsigned convertToXYZ(float* out, const unsigned char* in,
+                      unsigned w, unsigned h, const LodePNGColorMode* mode_in,
+                      const LodePNGInfo* info);
+
+/*
+Converts XYZ to RGB in the RGB color model given by info and byte format by mode_out.
+If info has no coloremtry chunks, converts to sRGB.
+Parameters:
+*) out: output color in the RGB model given by the color model in info, must have
+        enough bytes allocated to contain pixels in the mode_out format.
+*) in: 4 floats per pixel, X,Y,Z,alpha color format, in range 0-1 (normally).
+*) w, h: image size
+*) mode_out: byte format of out (amount of channels, bit depth)
+*) info: PNG info with possibly an RGB color model in cHRM,gAMA and/or sRGB chunks
+*) return value: 0 if ok, positive value if error
+*/
+unsigned convertFromXYZ(unsigned char* out, const float* in,
+                        unsigned w, unsigned h, const LodePNGColorMode* mode_out,
+                        const LodePNGInfo* info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*
+The information for extractZlibInfo.
+*/
+struct ZlibBlockInfo {
+  int btype; //block type (0-2)
+  size_t compressedbits; //size of compressed block in bits
+  size_t uncompressedbytes; //size of uncompressed block in bytes
+
+  // only filled in for block type 2
+  size_t treebits; //encoded tree size in bits
+  int hlit; //the HLIT value that was filled in for this tree
+  int hdist; //the HDIST value that was filled in for this tree
+  int hclen; //the HCLEN value that was filled in for this tree
+  std::vector<int> clcl; //19 code length code lengths (compressed tree's tree)
+  std::vector<int> treecodes; //N tree codes, with values 0-18. Values 17 or 18 are followed by the repetition value.
+  std::vector<int> litlenlengths; //288 code lengths for lit/len symbols
+  std::vector<int> distlengths; //32 code lengths for dist symbols
+
+  // only filled in for block types 1 or 2
+  std::vector<int> lz77_lcode; //LZ77 codes. 0-255: literals. 256: end symbol. 257-285: length code of length/dist pairs
+  // the next vectors have the same size as lz77_lcode, but an element only has meaningful value if lz77_lcode contains a length code.
+  std::vector<int> lz77_dcode;
+  std::vector<int> lz77_lbits;
+  std::vector<int> lz77_dbits;
+  std::vector<int> lz77_lvalue;
+  std::vector<int> lz77_dvalue;
+  size_t numlit; //number of lit codes in this block
+  size_t numlen; //number of len codes in this block
+};
+
+//Extracts all info needed from a PNG file to reconstruct the zlib compression exactly.
+void extractZlibInfo(std::vector<ZlibBlockInfo>& zlibinfo, const std::vector<unsigned char>& in);
+
+} // namespace lodepng
+
+#endif /*LODEPNG_UTIL_H inclusion guard*/
diff --git a/codec/L2/demos/pikEnc/host/third_party/lodepng/pngdetail.cpp b/codec/L2/demos/pikEnc/host/third_party/lodepng/pngdetail.cpp
new file mode 100755
index 0000000000..c0aa16999c
--- /dev/null
+++ b/codec/L2/demos/pikEnc/host/third_party/lodepng/pngdetail.cpp
@@ -0,0 +1,1177 @@
+/*
+LodePNG pngdetail
+
+Copyright (c) 2005-2018 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+//g++ pngdetail.cpp lodepng_util.cpp lodepng.cpp -ansi -pedantic -Wall -Wextra -o pngdetail -O3
+
+
+/*
+Utility program that shows a lot of information in the console about a PNG file,
+including color type, text chunks, the names and sizes of all chunks in the
+image, all the zlib compression blocks and symbols, etc...
+
+compression info:
+./pngdetail -sfczB image.png
+
+everything, 8-bit:
+./pngdetail -sPlLA#cfzB7 image.png
+
+everything, 16-bit:
+./pngdetail -sPlLA@cfzB7 image.png
+
+everything except huge output:
+./pngdetail -sPlAcfzB image.png
+*/
+
+#include "lodepng.h"
+#include "lodepng_util.h"
+#include <iostream>
+#include <iomanip>
+#include <map>
+#include <cmath>
+#include <sstream>
+#include <algorithm>
+#include <stdio.h>
+#include <inttypes.h>
+
+void showHelp() {
+  std::cout << "pngdetail by Lode Vandevenne" << std::endl;
+  std::cout << "version: " << LODEPNG_VERSION_STRING << std::endl;
+  std::cout << "Shows detailed information about a PNG image, its compression and possible corruptions.\n"
+               "Usage: pngdetail [filename] [options]...\n"
+               "Without options shows a default set of stats. With options, shows only selected options.\n"
+               "E.g. 'pngdetail image.png -plc' to show png info, palette info and chunks\n"
+               "Options:\n"
+               "-s: show header summary on one line\n"
+               "-h: show header info\n"
+               "-p: show PNG file info\n"
+               "-i: show ICC profile in full (if any)\n"
+               "--format=<format>: hex display mode for -i:\n"
+               "    mix: Use printable ASCII characters, hex for others\n"
+               "    hex: Use only hex\n"
+               "--size=<width>: render width (not used by hex, hex16 or palette):\n"
+               "-l: show palette (if any)\n"
+               "-r: render the PNG image in terminal (with --mode and --size)\n"
+               "--mode=<mode>: render mode for -r:\n"
+               "    ascii:   Letters ROYLGTCABVMF indicate hue (L=lime, T=turquoise, A=azure, F=fuchsia, ...).\n"
+               "    hex:     CSS hex notation for every pixel.\n"
+               "    hex16:   Like hex but shows 16 bits values per channel.\n"
+               "    palette: Shows palette index of each pixel, only for palette images.\n"
+               "--size=<width>: render width (not used by hex, hex16 or palette):\n"
+               "-c: show PNG chunks\n"
+               "-C: show PNG chunks (alternate format)\n"
+               "-f: show PNG filters\n"
+               "-z: show Zlib info\n"
+               "-b: show Zlib blocks\n"
+               "-B: show Zlib block symbol counts\n"
+               "-7: show all lz77 values (huge output)\n"
+               "-v: be more verbose\n"
+               "-x: print most integer numbers in hexadecimal (includes e.g. year, num unique colors, ...)\n"
+               "-?, --help: show this help" << std::endl;
+}
+
+enum RenderMode {
+  RM_ASCII,
+  RM_HEX, // CSS
+  RM_HEX16,
+  RM_PAL // palette indices (only rendered if image is palette based)
+};
+
+// for displaying ICC profile
+enum HexFormat {
+  HF_HEX,
+  HF_MIX // hex and ascii
+};
+
+struct Options {
+  bool verbose;
+  bool show_one_line_summary; //show filesize, pixels and color type on single line
+  bool show_header;
+  bool show_icc; // show ICC color profile in full
+  bool show_color_stats;
+  bool show_png_info; //show things like filesize, width, height, palette size, ...
+  bool show_palette; //show all palette values
+  bool show_palette_pixels; //show palette indices of pixels
+
+  HexFormat hexformat;
+
+  bool show_render;
+  RenderMode rendermode;
+  int rendersize;
+
+  bool show_chunks; //show the PNG chunk names and their lengths
+  bool show_chunks2; //alternate form to print chunks
+  bool show_filters; //show the PNG filter of each scanline (not supported for interlaced PNGs currently)
+  bool zlib_info; //show basic zlib info
+  bool zlib_blocks; //show type, tree info, code length summaries and sizes for each zlib block
+  bool zlib_counts; //in addition to the zlib_blocks info, show counts of occurrences all symbols
+  bool zlib_full; //in addition to the zlib_blocks info, show all symbols, one per line (huge output)
+  bool use_hex; //show some sizes or positions in hexadecimal
+
+  Options() : verbose(false), show_one_line_summary(false), show_header(false), show_icc(false),
+              show_color_stats(false), show_png_info(false),
+              show_palette(false), show_palette_pixels(false),
+              hexformat(HF_MIX), show_render(false), rendermode(RM_ASCII), rendersize(80),
+              show_chunks(false), show_chunks2(false), show_filters(false),
+              zlib_info(false), zlib_blocks(false), zlib_counts(false), zlib_full(false), use_hex(false) {
+  }
+};
+
+unsigned inspect_chunk_by_name(const unsigned char* data, const unsigned char* end,
+                               lodepng::State& state, const char type[5]) {
+  const unsigned char* p = lodepng_chunk_find_const(data, end, type);
+  return lodepng_inspect_chunk(&state, p - data, data, end - data);
+}
+
+// Lazy loads the raw file, inspected header or entire image as needed
+struct Data {
+  std::string filename;
+  std::vector<unsigned char> buffer;
+  std::vector<unsigned char> pixels; // 16-bit
+  unsigned w;
+  unsigned h;
+  lodepng::State state;
+  unsigned error;
+  bool inspected;
+
+  Data(const std::string& filename) : filename(filename), error(0), inspected(false) {}
+
+
+  // Load the file if not already loaded
+  void loadFile() {
+    if(buffer.empty()) {
+      error = lodepng::load_file(buffer, filename); //load the image file with given filename
+    } else {
+      error = 0; // for reloadpixels, reset error if file was already successfully loaded
+    }
+  }
+
+  // Load header info (plus a few more nearby light chunks) if not already loaded, and the file if needed
+  void loadInspect() {
+    if(inspected) return;
+    inspected = true;
+    loadFile();
+    if(error) return;
+    const unsigned char* data = &buffer[0];
+    error = lodepng_inspect(&w, &h, &state, data, buffer.size());
+    if(error) return;
+    // end before first IDAT chunk: do not parse more than first part of file for all this.
+    const unsigned char* end = lodepng_chunk_find_const(data, data + buffer.size(), "IDAT");
+    if(!end) end = data + buffer.size(); // no IDAT, invalid PNG but extract info anyway
+    inspect_chunk_by_name(data, end, state, "PLTE");
+    if(error) return;
+    inspect_chunk_by_name(data, end, state, "cHRM");
+    if(error) return;
+    inspect_chunk_by_name(data, end, state, "gAMA");
+    if(error) return;
+    inspect_chunk_by_name(data, end, state, "sBIT");
+    if(error) return;
+    inspect_chunk_by_name(data, end, state, "bKGD");
+    if(error) return;
+    inspect_chunk_by_name(data, end, state, "hIST");
+    if(error) return;
+    inspect_chunk_by_name(data, end, state, "pHYs");
+    if(error) return;
+    inspect_chunk_by_name(data, end, state, "iCCP");
+    if(error) return;
+  }
+
+  // Load the pixels if not already loaded, and the file if needed
+  void loadPixels() {
+    if(pixels.empty()) reloadPixels();
+  }
+
+  void reloadPixels() {
+    loadFile();
+    if(error) return;
+    inspected = true;
+    state.info_raw.colortype = LCT_RGBA;
+    state.info_raw.bitdepth = 16;
+    pixels.clear();
+    error = lodepng::decode(pixels, w, h, state, buffer);
+  }
+};
+
+std::string colorTypeString(LodePNGColorType type) {
+  std::string name;
+  switch(type) {
+    case LCT_GREY: name = "grey"; break;
+    case LCT_RGB: name = "RGB"; break;
+    case LCT_PALETTE: name = "palette"; break;
+    case LCT_GREY_ALPHA: name = "grey+alpha"; break;
+    case LCT_RGBA: name = "RGBA"; break;
+    default: name = "invalid"; break;
+  }
+  std::stringstream ss;
+  ss << type << " (" << name << ")";
+  return ss.str();
+}
+
+template<typename T>
+T strtoval(const std::string& s) {
+  std::istringstream sstream(s);
+  T val;
+  sstream >> val;
+  return val;
+}
+
+
+/*
+Display the names and sizes of all chunks in the PNG file.
+*/
+void displayChunkNames(Data& data, const Options& options) {
+  data.loadFile();
+  if(data.error) return;
+  std::cout << (options.use_hex ? std::hex: std::dec);
+  const std::vector<unsigned char>& buffer = data.buffer;
+  std::vector<std::string> names;
+  std::vector<size_t> sizes;
+  unsigned error = lodepng::getChunkInfo(names, sizes, buffer);
+  if(error) {
+    if(!names.empty() && names.back() == "IEND" && sizes.back() == 0) {
+      std::cout << "Corruption or superfluous data detected after the IEND chunk" << std::endl;
+    } else {
+      std::cout << "Error while identifying chunks. Listing identified chunks anyway." << std::endl;
+    }
+  }
+
+  if(options.show_chunks2) {
+    std::cout << "Chunk types: ";
+    for(size_t i = 0; i < names.size(); i++) std::cout << names[i] << " ";
+    std::cout << std::endl;
+    std::cout << "Chunk sizes: ";
+    for(size_t i = 0; i < sizes.size(); i++) std::cout << sizes[i] << " ";
+    std::cout << std::endl;
+  } else {
+    std::cout << "Chunks (type: lengths):";
+    std::string last_type;
+    for(size_t i = 0; i < names.size(); i++) {
+      if(last_type != names[i]) {
+        std::cout << std::endl;
+        std::cout << " " << names[i] << ": ";
+      }
+      last_type = names[i];
+
+      std::cout << sizes[i] << " ";
+    }
+    std::cout << std::endl;
+  }
+
+  std::map<std::string, bool> typedict;
+  for(size_t i = 0; i < names.size(); i++) {
+    typedict[names[i]] = true;
+  }
+
+  if(!error) {
+    if(!typedict["IHDR"]) std::cout << "Error: no IHDR chunk" << std::endl;
+    if(!typedict["IDAT"]) std::cout << "Error: no IDAT chunk" << std::endl;
+    if(!typedict["IEND"]) std::cout << "Error: no IEND chunk" << std::endl;
+  }
+}
+
+void RGBtoHSL(unsigned char r, unsigned char g, unsigned char b, unsigned char* h, unsigned char* s, unsigned char* l) {
+  int cmax = std::max<int>(r, std::max<int>(g, b));
+  int cmin = std::min<int>(r, std::min<int>(g, b));
+  if(cmin == cmax) {
+    *h = *s = 0;
+    *l = r;
+  } else {
+    int sum = cmin + cmax;
+    int diff = cmax - cmin;
+    *l = sum / 2;
+    *s = 255 * diff / ((*l < 128) ? sum : (512 - sum));
+    int hi = (r == cmax) ? (255 * (g - b) / diff) : ((g == cmax) ? (512 + 255 * (b - r) / diff) : (1024 + 255 * (r - g) / diff));
+    *h = ((hi / 6) & 255);
+  }
+}
+
+/*
+HCT: Hue, Chroma, Tone: returns a linear combination between a pure hue and a greyscale value.
+*) Chroma: The linear combination factor: 255 for pure hue, 0 for pure greyscale
+*) Tone: greyscale to mix with: 0 = black (shade), 255 = white (tint), in between = grey (tone)
+*/
+void RGBtoHCT(unsigned char r, unsigned char g, unsigned char b, unsigned char* h, unsigned char* c, unsigned char* t) {
+  int cmax = std::max<int>(r, std::max<int>(g, b));
+  int cmin = std::min<int>(r, std::min<int>(g, b));
+  RGBtoHSL(r, g, b, h, c, t);
+  *c = cmax - cmin;
+  *t = *c == 255 ? 0 : 255 * cmin / (255 + cmin - cmax);
+}
+
+// add 32 to get small letter instead of capital
+char HueToLetter(int h) {
+  char hl = 'R';
+  // 12 unique hue letters for 30 degree increment hues.
+  if(h < 11 || h >= 244) hl = 'R';  // red
+  else if(h >= 11 && h < 32) hl = 'O';  // orange
+  else if(h >= 32 && h < 53) hl = 'Y';  // yellow
+  else if(h >= 53 && h < 74) hl = 'L';  // lime (officialy "chartreuse" but c is for cyan)
+  else if(h >= 74 && h < 96) hl = 'G';  // green
+  else if(h >= 96 && h < 117) hl = 'T';  // turquoise (officially "spring green" but that name overlaps green)
+  else if(h >= 117 && h < 138) hl = 'C';  // cyan
+  else if(h >= 138 && h < 159) hl = 'A';  // azure
+  else if(h >= 159 && h < 181) hl = 'B';  // blue
+  else if(h >= 181 && h < 202) hl = 'V';  // violet
+  else if(h >= 202 && h < 223) hl = 'M';  // magenta
+  else if(h >= 223 && h < 244) hl = 'F';  // fuchsia (officially "rose" but r is for red)
+  return hl;
+}
+
+char lightnessToLetter(int l) {
+  int c = ' ';
+  if(l < 16) c = ' ';
+  else if(l < 48) c = '.';
+  else if(l < 80) c = ':';
+  else if(l < 112) c = '-';
+  else if(l < 144) c = '!';
+  else if(l < 176) c = '*';
+  else if(l < 208) c = '+'; // The + looks denser than the * in a terminal...
+  else if(l < 240) c = '=';
+  else c = '#';
+  return c;
+}
+
+// Both v and result are assumed in range 0-255
+// range is the size of an individual bucket. A value in roughly range [-range/2, range/2) can get added to v.
+// E.g. if there are 12 hue letters, give 255/12 = 21 as range
+static inline int applyDither(int v, int range, int x, int y, bool wrap) {
+  // ordered dithering pattern; ranges from 0-15, so multiply with 17 to have 0-255
+  static const int pattern[16] = {0,8,2,10,
+                                  12,4,14,6,
+                                  3,11,1,9,
+                                  15,7,13,5};
+  int d = pattern[(x & 3) + 4 * (y & 3)] * 17 - 128; // range: -128 to 127
+  if(wrap) return (v + d * range / 256) & 255;
+  else return std::max(0, std::min(255, v + d * range / 256));
+}
+
+// x and y are to use for dithering
+// inverted inverts black and white, for in case black text on white background is used (by default it assumes white text on black background)
+char RGBtoLetter(unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned x, unsigned y, bool dither = true, bool inverted = false) {
+  if(a < 255) {
+    r = a * r / 255;
+    g = a * g / 255;
+    b = a * b / 255;
+  }
+
+  if(dither) {
+    unsigned char h, c, t;
+    RGBtoHCT(r, g, b, &h, &c, &t);
+    int l = (std::max(std::max(r, g), b) + std::min(std::min(r, g), b)) / 2;
+    if(inverted) {
+      l = 255 - l;
+      t = 255 - t;
+    }
+    if(applyDither(c, 254, x, y, false) >= 128) {
+      char letter = HueToLetter(applyDither(h, 21, x, y, true));
+      bool smallcaps = applyDither(l, 64, x+2, y+2, false) < 80;
+      return letter + (smallcaps ? 32 : 0);
+    }
+    else return lightnessToLetter(applyDither(l, 31, x, y, false));
+  } else {
+    unsigned char h, s, l;
+    RGBtoHSL(r, g, b, &h, &s, &l);
+    if(inverted) l = 255 - l;
+
+    char hl = HueToLetter(h);
+    char c = ' ';
+    if(l < 24 || l > 232 || s < 64) {
+      c = lightnessToLetter(l);
+    } else {
+      if(l < 128) c = hl + 32;
+      else c = hl;
+    }
+    return c;
+  }
+}
+
+std::vector<unsigned char> rescale(const std::vector<unsigned char>& in,
+                                   int w0, int h0, int w1, int h1, bool smooth) {
+  int numchannels = in.size() / (w0 * h0);
+  std::vector<unsigned char> out(w1 * h1 * numchannels);
+  if(smooth) {
+    // box filter.
+    std::vector<unsigned char> temp(w1 * h0 * numchannels);
+    for (int c = 0; c < numchannels; c++) {
+      for (int x = 0; x < w1; x++) {
+        float xaf = x * 1.0 * w0 / w1;
+        float xbf = (x + 1.0) * w0 / w1;
+        int xa = (int)xaf;
+        int xb = (int)xbf;
+        double norm = 1.0 / (xbf - xaf);
+        xaf -= std::floor(xaf);
+        xbf -= std::floor(xbf);
+        for (int y = 0; y < h0; y++) {
+          int index1 = x * numchannels + y * w1 * numchannels;
+          double val = 0;
+          for(int x0 = xa; x0 <= xb; x0++) {
+            int index0 = x0 * numchannels + y * w0 * numchannels;
+            double v = 1;
+            if(x0 == xa) v -= xaf;
+            if(x0 == xb) v -= (1 - xbf);
+            val += v * in[index0 + c];
+          }
+          temp[index1 + c] = val * norm;
+        }
+      }
+      for (int y = 0; y < h1; y++) {
+        float yaf = y * 1.0 * h0 / h1;
+        float ybf = (y + 1.0) * h0 / h1;
+        int ya = (int)yaf;
+        int yb = (int)ybf;
+        double norm = 1.0 / (ybf - yaf);
+        yaf -= std::floor(yaf);
+        ybf -= std::floor(ybf);
+        for (int x = 0; x < w1; x++) {
+          int index1 = x * numchannels + y * w1 * numchannels;
+          double val = 0;
+          for(int y0 = ya; y0 <= yb; y0++) {
+            int index0 = x * numchannels + y0 * w1 * numchannels;
+            double v = 1;
+            if(y0 == ya) v -= yaf;
+            if(y0 == yb) v -= (1 - ybf);
+            val += v * temp[index0 + c];
+          }
+          out[index1 + c] = val * norm;
+        }
+      }
+    }
+  } else {
+    for(int y = 0; y < h1; y++) {
+      int y0 = (int)((y + 0.5) * h0 / h1 - 0.5);
+      for (int x = 0; x < w1; x++) {
+        int x0 = (int)((x + 0.5) * w0 / w1 - 0.5);
+        int index0 = x0 * numchannels + y0 * w0 * numchannels;
+        int index1 = x * numchannels + y * w1 * numchannels;
+        for (int c = 0; c < numchannels; c++) {
+          out[index1 + c] = in[index0 + c];
+        }
+      }
+    }
+  }
+  return out;
+}
+
+/*
+Show ASCII art preview of the image
+image is given in 16-bit big endian
+*/
+void displayAsciiArt(const std::vector<unsigned char>& image, unsigned w, unsigned h, unsigned asciiw) {
+  const std::vector<unsigned char>* imagep = &image;
+  std::vector<unsigned char> image2;
+  if(asciiw < w) {
+    unsigned w2 = asciiw;
+    unsigned h2 = h * w2 / w;
+    image2 = rescale(image, w, h, w2, h2, true);
+    imagep = &image2;
+    w = w2;
+    h = h2;
+  }
+  if(w > 0 && h > 0) {
+    std::cout << "ASCII Art Preview: " << std::endl;
+    unsigned h2 = 1 + ((h - 1) * 4) / 7; //compensate for non-square characters in terminal
+    std::cout << '+';
+    for(unsigned x = 0; x < w; x++) std::cout << '-';
+    std::cout << '+' << std::endl;
+    for(unsigned y = 0; y < h2; y++) {
+      std::cout << "|";
+      unsigned y2 = y * h / h2;
+      for(unsigned x = 0; x < w; x++) {
+        int r = (*imagep)[y2 * w * 8 + x * 8 + 0];
+        int g = (*imagep)[y2 * w * 8 + x * 8 + 2];
+        int b = (*imagep)[y2 * w * 8 + x * 8 + 4];
+        int a = (*imagep)[y2 * w * 8 + x * 8 + 6];
+        char symbol = RGBtoLetter(r, g, b, a, x, y, true, false);
+        std::cout << (char)symbol;
+      }
+      std::cout << "|";
+      std::cout << std::endl;
+    }
+    std::cout << '+';
+    for(unsigned x = 0; x < w; x++) std::cout << '-';
+    std::cout << '+' << std::endl;
+  }
+}
+
+//sixteen: print 16 bits per pixel
+//alpha: print alpha channel
+//input image ALWAYS given in 16-bit per channel RGBA
+void displayColorsHex(const std::vector<unsigned char>& image, unsigned w, unsigned h, bool sixteen) {
+  std::ios_base::fmtflags flags = std::cout.flags();
+
+  if(w > 0 && h > 0) {
+    std::cout << "Colors (CSS RGBA hex format):" << std::endl;
+
+    for(unsigned y = 0; y < h; y++) {
+      std::cout.flags(flags); //print line numbers in hex or dec whatever it originally was
+      std::cout << y << ":";
+      for(unsigned x = 0; x < w; x++) {
+        size_t index = y * w * 8 + x * 8;
+        if (sixteen) {
+          int r = image[index + 0] * 256 + image[index + 1];
+          int g = image[index + 2] * 256 + image[index + 3];
+          int b = image[index + 4] * 256 + image[index + 5];
+          int a = image[index + 6] * 256 + image[index + 7];
+          std::cout << std::hex << std::setfill('0') << " #" << std::setw(4) << r << std::setw(4) << g << std::setw(4) << b << std::setw(4) << a;
+        } else {
+          int r = image[index + 0];
+          int g = image[index + 2];
+          int b = image[index + 4];
+          int a = image[index + 6];
+          std::cout << std::hex << std::setfill('0') << " #" << std::setw(2) << r << std::setw(2) << g << std::setw(2) << b << std::setw(2) << a;
+        }
+      }
+      std::cout << std::endl;
+    }
+  }
+
+  std::cout.flags(flags);
+}
+
+
+/*
+Show the filtertypes of each scanline in this PNG image.
+*/
+void displayFilterTypes(Data& data, const Options& options) {
+  std::cout << (options.use_hex ? std::hex: std::dec);
+  data.loadFile();
+  if(data.error) return;
+  const std::vector<unsigned char>& buffer = data.buffer;
+  std::vector<std::vector<unsigned char> > types;
+  unsigned error = lodepng::getFilterTypesInterlaced(types, buffer);
+  if(error) {
+    std::cout << "Error getting filter types" << std::endl;
+    return;
+  }
+
+  if(types.size() == 7) {
+    std::cout << "Filter types (Adam7 interlaced):" << std::endl;
+    for(int j = 0; j < 7; j++) {
+      std::cout << " Pass " << (j + 1) << ": ";
+      for(size_t i = 0; i < types[j].size(); i++) {
+        std::cout << (int)(types[j][i]);
+      }
+      std::cout << std::endl;
+    }
+  } else {
+    std::cout << "Filter types: ";
+    for(size_t i = 0; i < types[0].size(); i++) {
+      std::cout << (int)(types[0][i]);
+    }
+    std::cout << std::endl;
+  }
+}
+
+//image type MUST be palette
+void displayPalette(Data& data, const Options& options) {
+  data.loadInspect();
+  if(data.error) return;
+  std::cout << (options.use_hex ? std::hex: std::dec);
+
+  const LodePNGInfo& info = data.state.info_png;
+  const LodePNGColorMode& color = info.color;
+
+  std::cout << "Palette size: " << color.palettesize << std::endl;
+  std::cout << "Palette colors: ";
+  std::ios_base::fmtflags flags = std::cout.flags();
+  std::cout << std::hex << std::setfill('0');
+  for(size_t i = 0; i < color.palettesize; i++) {
+    unsigned char* p = &color.palette[i * 4];
+    std::cout << "#" << std::setw(2) << (int)p[0] << std::setw(2) << (int)p[1] << std::setw(2) << (int)p[2] << std::setw(2) << (int)p[3] << " ";
+  }
+  std::cout.flags(flags);
+  std::cout << std::endl;
+}
+
+//image type MUST be palette
+void displayPalettePixels(const std::vector<unsigned char>& buffer, const Options& options) {
+  unsigned w, h;
+  lodepng::State state;
+  std::vector<unsigned char> out;
+  std::cout << (options.use_hex ? std::hex: std::dec);
+
+  state.decoder.color_convert = 0;
+
+  lodepng::decode(out, w, h, state, buffer);
+
+  if(state.info_png.color.colortype == LCT_PALETTE) {
+    if (options.show_color_stats) {
+      std::vector<size_t> count(256, 0);
+      size_t outofbounds = 0;
+
+      for(size_t i = 0; i < w * h; i++) {
+        int value = lodepng::getPaletteValue(&out[0], i, state.info_raw.bitdepth);
+        count[value]++;
+        if(value >= (int)state.info_raw.palettesize) outofbounds++;
+      }
+
+      std::cout << "Palette count: ";
+      for(size_t i = 0; i < state.info_raw.palettesize; i++) {
+        std::cout << count[i] << " ";
+      }
+      std::cout << std::endl;
+
+      if(outofbounds > 0) std::cout << "Out of bounds palette values: " << outofbounds << std::endl;
+    }
+
+    std::cout << "Pixel palette indices:" << std::endl;
+    for(size_t i = 0; i < w * h; i++) {
+      int value = lodepng::getPaletteValue(&out[0], i, state.info_raw.bitdepth);
+      std::cout << value << ", ";
+      if(i % w == w - 1) std::cout << std::endl;
+    }
+  } else {
+    std::cout << "Pixel palette indices: not shown, not a palette image\n" << std::endl;
+  }
+}
+
+void printZlibInfo(Data& data, const Options& options) {
+  data.loadFile();
+  if(data.error) return;
+  const std::vector<unsigned char>& in = data.buffer;
+  std::cout << (options.use_hex ? std::hex: std::dec);
+
+  std::vector<lodepng::ZlibBlockInfo> zlibinfo;
+  lodepng::extractZlibInfo(zlibinfo, in);
+
+  if(options.zlib_info) {
+    //std::cout << "Zlib info: " << std::endl;
+    size_t compressed = 0;
+    size_t uncompressed = 0;
+    std::vector<size_t> boundaries_compressed;
+    std::vector<size_t> boundaries_uncompressed;
+    for(size_t i = 0; i < zlibinfo.size(); i++) {
+      compressed += zlibinfo[i].compressedbits / 8;
+      uncompressed += zlibinfo[i].uncompressedbytes;
+      boundaries_compressed.push_back(compressed);
+      boundaries_uncompressed.push_back(uncompressed);
+    }
+
+    std::cout << "IDAT zlib info: " << compressed << std::endl;
+    std::cout << "Compressed size: " << compressed << std::endl;
+    std::cout << "Uncompressed size: " << uncompressed << std::endl;
+    std::cout << "Amount of zlib blocks: " << zlibinfo.size() << std::endl;
+    if(zlibinfo.size() > 1) {
+      std::cout << "Block sizes (uncompressed): ";
+      for(size_t i = 0; i < zlibinfo.size(); i++)
+          std::cout << zlibinfo[i].uncompressedbytes << " ";
+      std::cout << std::endl;
+      std::cout << "Block sizes (compressed): ";
+      for(size_t i = 0; i < zlibinfo.size(); i++)
+          std::cout << (zlibinfo[i].compressedbits / 8) << " ";
+      std::cout << std::endl;
+      std::cout << "Block boundaries (uncompressed): ";
+      for(size_t i = 0; i + 1 < boundaries_uncompressed.size(); i++)
+          std::cout << boundaries_uncompressed[i] << " ";
+      std::cout << std::endl;
+      std::cout << "Block boundaries (compressed): ";
+      for(size_t i = 0; i + 1 < boundaries_compressed.size(); i++)
+          std::cout << boundaries_compressed[i] << " ";
+      std::cout << std::endl;
+    }
+  }
+
+  if(options.zlib_blocks) {
+    for(size_t i = 0; i < zlibinfo.size(); i++) {
+      const lodepng::ZlibBlockInfo& info = zlibinfo[i];
+
+      std::cout << "Zlib block " << i << ":" << std::endl;
+      std::cout << " block type: " << info.btype << std::endl;
+
+      size_t compressedsize = info.compressedbits / 8;
+      size_t uncompressedsize = info.uncompressedbytes;
+      std::cout << " block compressed: " << compressedsize << " (" << compressedsize / 1024 << "K) (" << info.compressedbits << " bits)" << std::endl;
+      std::cout << " block uncompressed: " << uncompressedsize << " (" << uncompressedsize / 1024 << "K)" << std::endl;
+
+      if(info.btype > 2) {
+        std::cout << "Error: Invalid Block Type" << std::endl;
+        return;
+      }
+
+      if(info.btype == 2) {
+        std::cout << " encoded trees size: " << info.treebits / 8 << " (" << info.treebits << " bits)" << std::endl;
+        std::cout << " HLIT: " << info.hlit << std::endl;
+        std::cout << " HDIST: " << info.hdist << std::endl;
+        std::cout << " HCLEN: " << info.hclen << std::endl;
+        std::cout << std::hex;
+        std::cout << " code length code lengths: "; for(size_t j = 0; j < 19; j++) std::cout << info.clcl[j]; std::cout << std::endl;
+        if(!options.use_hex) std::cout << std::dec;
+        if(options.zlib_full) {
+          for(size_t j = 0; j < info.treecodes.size(); j++) {
+            int code = info.treecodes[j];
+            if(code < 17) {
+               std::cout << " tree: " << code << std::endl;
+            } else {
+              j++;
+              std::cout << " tree: " << code << " rep: " << info.treecodes[j] << std::endl;
+            }
+
+          }
+        }
+
+        std::cout << std::hex;
+        std::cout << " lit code lengths 0-127  : "; for(size_t j = 0; j < 128; j++) std::cout << info.litlenlengths[j]; std::cout << std::endl;
+        std::cout << " lit code lengths 128-255: "; for(size_t j = 128; j < 256; j++) std::cout << info.litlenlengths[j]; std::cout << std::endl;
+        std::cout << " end code length         : "; std::cout << info.litlenlengths[256]; std::cout << std::endl;
+        std::cout << " len code lengths        : "; for(size_t j = 257; j < 288; j++) std::cout << info.litlenlengths[j]; std::cout << std::endl;
+        std::cout << " dist code lengths       : "; for(size_t j = 0; j < 32; j++) std::cout << info.distlengths[j]; std::cout << std::endl;
+        if(!options.use_hex) std::cout << std::dec;
+      }
+
+
+      if(info.btype != 0) {
+        std::cout << " code counts: lit: " << info.numlit << ", len/dist: " << info.numlen << ", total: " << (info.numlit + info.numlen + 1) << ", with dists: " << (info.numlit + 2 * info.numlen + 1) << std::endl;
+
+        if(options.zlib_full) {
+          for(size_t j = 0; j < info.lz77_lcode.size(); j++) {
+            int symbol = info.lz77_lcode[j];
+            if(symbol == 256) {
+              std::cout << " end" << std::endl;
+            } else if(symbol < 256) {
+              std::cout << " lit: " << symbol << std::endl;
+            } else {
+              std::cout << " len: " << info.lz77_lvalue[j] << ", dist: " << info.lz77_dvalue[j] << std::endl;
+            }
+          }
+        }
+
+        if(options.zlib_counts) {
+          std::vector<size_t> ll_count(288, 0);
+          std::vector<size_t> d_count(32, 0);
+          for(size_t j = 0; j < info.lz77_lcode.size(); j++) {
+            int symbol = info.lz77_lcode[j];
+            if(symbol <= 256) {
+              ll_count[symbol]++;
+            } else {
+              ll_count[symbol]++;
+              d_count[info.lz77_dcode[j]]++;
+            }
+          }
+          std::cout << " lit code 0-63 counts   : "; for(size_t j = 0; j < 64; j++) std::cout << ll_count[j] << " "; std::cout << std::endl;
+          std::cout << " lit code 64-127 counts : "; for(size_t j = 64; j < 128; j++) std::cout << ll_count[j] << " "; std::cout << std::endl;
+          std::cout << " lit code 128-191 counts: "; for(size_t j = 128; j < 192; j++) std::cout << ll_count[j] << " "; std::cout << std::endl;
+          std::cout << " lit code 192-255 counts: "; for(size_t j = 192; j < 256; j++) std::cout << ll_count[j] << " "; std::cout << std::endl;
+          std::cout << " end code count         : "; std::cout << ll_count[256] << " "; std::cout << std::endl;
+          std::cout << " len code counts        : "; for(size_t j = 257; j < 288; j++) std::cout << ll_count[j] << " "; std::cout << std::endl;
+          std::cout << " dist code counts       : "; for(size_t j = 0; j < 32; j++) std::cout << d_count[j] << " "; std::cout << std::endl;
+        }
+      }
+    }
+  }
+}
+
+// returns number of unique RGBA colors in the image
+// also fills unique r, g, b, a counts in the output parameters
+// the input image is in 16-bit per channel color, so 8 chars per pixel
+size_t countColors(std::vector<unsigned char> image, unsigned w, unsigned h,
+    size_t* ro, size_t* go, size_t* bo, size_t* ao) {
+  typedef std::pair<std::pair<unsigned short, unsigned short>, std::pair<unsigned short, unsigned short> > RGBA;
+  std::map<RGBA, size_t> rgbam;
+  //std::map<uint64_t, size_t> rgbam;
+  std::vector<unsigned char> rm(65536, 0);
+  std::vector<unsigned char> gm(65536, 0);
+  std::vector<unsigned char> bm(65536, 0);
+  std::vector<unsigned char> am(65536, 0);
+  for(unsigned y = 0; y < h; y++) {
+    for(unsigned x = 0; x < w; x++) {
+      unsigned short r = 256 * image[y * 8 * w + x * 8 + 0] + image[y * 8 * w + x * 8 + 1];
+      unsigned short g = 256 * image[y * 8 * w + x * 8 + 2] + image[y * 8 * w + x * 8 + 3];
+      unsigned short b = 256 * image[y * 8 * w + x * 8 + 4] + image[y * 8 * w + x * 8 + 5];
+      unsigned short a = 256 * image[y * 8 * w + x * 8 + 6] + image[y * 8 * w + x * 8 + 7];
+      RGBA rgba(std::make_pair(r, g), std::make_pair(b, a));
+      //uint64_t rgba = (uint64_t)r + ((uint64_t)g << 16) + ((uint64_t)b << 32) + ((uint64_t)a << 48);
+      rgbam[rgba]++;
+      rm[r] = 1;
+      gm[g] = 1;
+      bm[b] = 1;
+      am[a] = 1;
+    }
+  }
+  *ro = *go = *bo = *ao = 0;
+  for(size_t i = 0; i < rm.size(); i++) {
+    *ro += rm[i];
+    *go += gm[i];
+    *bo += bm[i];
+    *ao += am[i];
+  }
+
+  return rgbam.size();
+}
+
+
+void showError(Data& data, const Options& options) {
+  std::cout << (options.use_hex ? std::hex: std::dec);
+  std::string prefix = (options.use_hex ? "0x": "");
+  if(!data.error) {
+    std::cout << "No error" << std::endl;
+  }
+  std::cout << "Decoding error " << prefix << data.error << ": " << lodepng_error_text(data.error) << std::endl;
+}
+
+void loadWithErrorRecovery(Data& data, const Options& options) {
+  (void)options;
+  unsigned& error = data.error;
+  lodepng::State& state = data.state;
+
+  data.loadPixels();
+
+  // In case of checksum errors and some other ignorable errors, report it but ignore it and retry
+  while(error) {
+    // Not showing regular error here, is shown at end of program.
+    unsigned error2 = error;
+    if(error == 57) {
+      showError(data, options);
+      std::cerr << "Ignoring the error: enabling ignore_crc" << std::endl;
+      state.decoder.ignore_crc = 1;
+      data.reloadPixels();
+    } else if(error == 58) {
+      showError(data, options);
+      std::cerr << "Ignoring the error: enabling ignore_adler32" << std::endl;
+      state.decoder.zlibsettings.ignore_adler32 = 1;
+      data.reloadPixels();
+    } else if(error == 69) {
+      showError(data, options);
+      std::cerr << "Ignoring the error: enabling ignore_critical" << std::endl;
+      state.decoder.ignore_critical = 1;
+      data.reloadPixels();
+    } else if(error == 30 || error == 63) {
+      showError(data, options);
+      std::cerr << "Ignoring the error: enabling ignore_end" << std::endl;
+      state.decoder.ignore_end = 1;
+      data.reloadPixels();
+    } else {
+      if(error == 0) std::cerr << "This error is unrecoverable" << std::endl;
+      break;  // other error that we cannot ignore
+    }
+    if(error == 0) std::cerr << "Successfully ignored the error" << std::endl;
+    if(error == error2) {
+      std::cerr << "Failed to ignore the error" << std::endl;
+      break; // avoid infinite loop if ignoring did not fix the error code
+    }
+  }
+}
+
+
+
+
+void showSingleLineSummary(Data& data, const Options& options) {
+  data.loadInspect();
+  if(data.error) return;
+  std::cout << (options.use_hex ? std::hex: std::dec);
+
+  std::cout << "Filesize: " << data.buffer.size() << " (" << data.buffer.size() / 1024 << "K)" << ", ";
+  std::cout << data.w << "x" << data.h << ", ";
+  std::cout << "Color: " << colorTypeString(data.state.info_png.color.colortype) << ", " << data.state.info_png.color.bitdepth << " bit" << std::endl;
+}
+
+void showHeaderInfo(Data& data, const Options& options) {
+  data.loadInspect();
+  if(data.error) return;
+  std::cout << (options.use_hex ? std::hex: std::dec);
+
+  const LodePNGInfo& info = data.state.info_png;
+  const LodePNGColorMode& color = info.color;
+  if(options.show_header) {
+    std::cout << "Filesize: " << data.buffer.size() << " (" << data.buffer.size() / 1024 << "K)" << std::endl;
+    std::cout << "Width: " << data.w << std::endl;
+    std::cout << "Height: " << data.h << std::endl;
+    std::cout << "Interlace method: " << info.interlace_method << std::endl;
+    if(options.verbose) {
+      std::cout << "Compression method: " << info.compression_method << std::endl;
+      std::cout << "Filter method: " << info.filter_method << std::endl;
+    }
+    std::cout << "Color type: " << colorTypeString(color.colortype) << std::endl;
+    std::cout << "Bit depth: " << color.bitdepth << std::endl;
+    if(options.verbose) {
+      std::cout << "Bits per pixel: " << lodepng_get_bpp(&color) << std::endl;
+      std::cout << "Channels per pixel: " << lodepng_get_channels(&color) << std::endl;
+      std::cout << "Is greyscale type: " << lodepng_is_greyscale_type(&color) << std::endl;
+      std::cout << "Can have alpha: " << lodepng_can_have_alpha(&color) << std::endl;
+      std::cout << "Has color key: " << color.key_defined << std::endl;
+    }
+    if (color.colortype == LCT_PALETTE) {
+      std::cout << "Palette size: " << color.palettesize << std::endl;
+    }
+    if(color.key_defined) {
+      std::cout << "Color key rgb: " << color.key_r
+                << ", " << color.key_g
+                << ", " << color.key_b << std::endl;
+    }
+    if(info.background_defined) {
+      if(color.colortype == LCT_PALETTE) {
+        std::cout << "Background index: " << info.background_r << std::endl;
+      } else {
+        std::cout << "Background rgb: " << info.background_r
+                  << ", " << info.background_g
+                  << ", " << info.background_b << std::endl;
+      }
+    }
+    if(info.gama_defined) {
+      std::cout << "gAMA defined: " << info.gama_gamma << " (" << (info.gama_gamma / 100000.0)
+                << ", " << (100000.0 / info.gama_gamma) << ")" << std::endl;
+    }
+    if(info.chrm_defined) {
+      std::cout << "cHRM defined: w: " << (info.chrm_white_x / 100000.0) << " " << (info.chrm_white_y / 100000.0)
+                << ", r: " << (info.chrm_red_x / 100000.0) << " " << (info.chrm_red_y / 100000.0)
+                << ", g: " << (info.chrm_green_x / 100000.0) << " " << (info.chrm_green_y / 100000.0)
+                << ", b: " << (info.chrm_blue_x / 100000.0) << " " << (info.chrm_blue_y / 100000.0)
+                << std::endl;
+    }
+    if(info.srgb_defined) {
+      std::cout << "sRGB defined: rendering intent: " << info.srgb_intent << std::endl;
+    }
+    if(info.iccp_defined) {
+      std::cout << "iCCP defined: (" << info.iccp_profile_size << " bytes), name: " << info.iccp_name << std::endl;
+      if(options.verbose && !options.show_icc) std::cout << "Use -i to show full ICC profile" << std::endl;
+    }
+  }
+  if(info.iccp_defined && options.show_icc) {
+    for(size_t i = 0; i < info.iccp_profile_size; i++) {
+      unsigned char c = info.iccp_profile[i];
+      if(c > 32 && c < 127 && options.hexformat == HF_MIX) printf(" %c ", c);
+      else printf("%02x ", c);
+      if(i % 40 == 39 && i + 1 != info.iccp_profile_size) std::cout << std::endl;
+    }
+    std::cout << std::endl;
+  }
+
+  if(options.show_header) {
+    if(options.verbose) std::cout << "Physics defined: " << info.phys_defined << std::endl;
+    if(info.phys_defined) {
+      std::cout << "Physics: X: " << info.phys_x << ", Y: " << info.phys_y << ", unit: " << info.phys_unit << std::endl;
+    }
+  }
+}
+
+// A bit more PNG info, which is from chunks that can come after IDAT. showHeaderInfo shows most other stuff.
+void showPNGInfo(Data& data, const Options& options) {
+  loadWithErrorRecovery(data, options);
+  if(data.error) return;
+  std::cout << (options.use_hex ? std::hex: std::dec);
+
+  const LodePNGInfo& info = data.state.info_png;
+
+  if(options.verbose) std::cout << "Texts: " << info.text_num << std::endl;
+  for(size_t i = 0; i < info.text_num; i++) {
+    std::cout << "Text: " << info.text_keys[i] << ": " << info.text_strings[i] << std::endl;
+  }
+  if(options.verbose) std::cout << "International texts: " << info.itext_num << std::endl;
+  for(size_t i = 0; i < info.itext_num; i++) {
+    std::cout << "Text: "
+              << info.itext_keys[i] << ", "
+              << info.itext_langtags[i] << ", "
+              << info.itext_transkeys[i] << ": "
+              << info.itext_strings[i] << std::endl;
+  }
+  if(options.verbose) std::cout << "Time defined: " << info.time_defined << std::endl;
+  if(info.time_defined) {
+    const LodePNGTime& time = info.time;
+    std::cout << "year: " << time.year << std::endl;
+    std::cout << "month: " << time.month << std::endl;
+    std::cout << "day: " << time.day << std::endl;
+    std::cout << "hour: " << time.hour << std::endl;
+    std::cout << "minute: " << time.minute << std::endl;
+    std::cout << "second: " << time.second << std::endl;
+  }
+}
+
+void showColorStats(Data& data, const Options& options) {
+  std::cout << (options.use_hex ? std::hex: std::dec);
+  std::vector<unsigned char>& image = data.pixels;
+  unsigned& w = data.w;
+  unsigned& h = data.h;
+
+  data.loadPixels();
+  if(data.error) return;
+  // TODO: move to show color stats function
+  if(options.verbose) std::cout << "Num pixels: " << w * h << std::endl;
+  size_t rc, gc, bc, ac;
+  std::cout << "Num unique colors: " << countColors(image, w, h, &rc, &gc, &bc, &ac);
+  std::cout << " (r: " << rc << ", g: " << gc << ", b: " << bc << ", a: " << ac << ")";
+  std::cout << std::endl;
+  if(w > 0 && h > 0) {
+    double avg[4] = {0, 0, 0, 0};
+    double min[4] = {999999, 999999, 999999, 999999};
+    double max[4] = {0, 0, 0, 0};
+    for(unsigned y = 0; y < h; y++) {
+      for(unsigned x = 0; x < w; x++) {
+        for(int c = 0; c < 4; c++) {
+          double v = 256 * image[y * 8 * w + x * 8 + c * 2] + image[y * 8 * w + x * 8 + c * 2 + 1];
+          avg[c] += v;
+          min[c] = std::min(min[c], v);
+          max[c] = std::max(max[c], v);
+        }
+      }
+    }
+    for(int c = 0; c < 4; c++) {
+      avg[c] /= (w * h * 257.0);
+      min[c] /= 257.0;
+      max[c] /= 257.0;
+    }
+    if(options.verbose) std::cout << "Ranges shown as 0.0-255.0, even for 16-bit data:" << std::endl;
+    std::cout << "Average color: " << avg[0] << ", " << avg[1] << ", " << avg[2] << ", " << avg[3] << std::endl;
+    std::cout << "Color ranges: " << min[0] << "-" << max[0] << ", " << min[1] << "-" << max[1] << ", " << min[2] << "-" << max[2] << ", " << min[3] << "-" << max[3] << std::endl;
+  }
+}
+
+void showRender(Data& data, const Options& options) {
+  data.loadPixels();
+  if(data.error) return;
+  if(options.rendermode == RM_ASCII) {
+    displayAsciiArt(data.pixels, data.w, data.h, options.rendersize);
+  }
+
+  if(options.rendermode == RM_HEX) {
+    displayColorsHex(data.pixels, data.w, data.h, false);
+  }
+
+  if(options.rendermode == RM_HEX16) {
+    displayColorsHex(data.pixels, data.w, data.h, true);
+  }
+
+  if(options.rendermode == RM_PAL) {
+    displayPalettePixels(data.buffer, options);
+  }
+}
+
+
+void showInfos(Data& data, const Options& options) {
+  if(options.show_one_line_summary) showSingleLineSummary(data, options);
+  if(options.show_header || options.show_icc) showHeaderInfo(data, options);
+  if(options.show_color_stats) showColorStats(data, options);
+  if(options.show_png_info) showPNGInfo(data, options);
+  if(options.show_palette) displayPalette(data, options);
+  if(options.show_chunks || options.show_chunks2) displayChunkNames(data, options);
+  if(options.show_filters) displayFilterTypes(data, options);
+  if(options.show_render) showRender(data, options);
+  if(options.zlib_info || options.zlib_blocks || options.zlib_counts || options.zlib_full) {
+    printZlibInfo(data, options);
+  }
+
+  if(data.error) showError(data, options);
+}
+
+int main(int argc, char *argv[]) {
+  Options options;
+  bool options_chosen = false;
+
+  std::vector<std::string> filenames;
+  for (int i = 1; i < argc; i++) {
+    std::string s = argv[i];
+    if(s.size() > 1 && s[0] == '-' && s[1] != '-') {
+      // anything that chooses actual set disables the defaults
+      if(s != "-x" && s != "-v") options_chosen = true;
+      for(size_t j = 1; j < s.size(); j++) {
+        char c = s[j];
+        if(c == '?') {
+          showHelp();
+          return 0;
+        }
+        else if(c == 'o') options.show_one_line_summary = true;
+        else if(c == 'h') options.show_header = true;
+        else if(c == 'i') options.show_icc = true;
+        else if(c == 'v') options.verbose = true;
+        else if(c == 's') options.show_color_stats = true;
+        else if(c == 'p') options.show_header = options.show_png_info = true;
+        else if(c == 'r') options.show_render = true;
+        else if(c == 'l') options.show_palette = true;
+        else if(c == 'L') options.show_palette_pixels = true;
+        else if(c == 'c') options.show_chunks = true;
+        else if(c == 'C') options.show_chunks2 = true;
+        else if(c == 'f') options.show_filters = true;
+        else if(c == 'z') options.zlib_info = true;
+        else if(c == 'b') options.zlib_blocks = true;
+        else if(c == 'B') {
+          options.zlib_blocks = true;
+          options.zlib_counts = true;
+        }
+        else if(c == '7') {
+          options.zlib_blocks = true;
+          options.zlib_full = true;
+        }
+        else if(c == 'x') {
+          options.use_hex = true;
+          std::cout << std::hex;
+        }
+        else if(c == '-') {
+          if(s != "--help") std::cout << "Unknown flag: " << s << ". Use -h for help" << std::endl;
+          showHelp();
+          return 0;
+        }
+        else {
+          std::cout << "Unknown flag: " << c << ". Use -h for help" << std::endl;
+          showHelp();
+          return 0;
+        }
+
+      }
+    } else if(s.size() > 1 && s[0] == '-' && s[1] == '-') {
+      size_t eqpos = 2;
+      while(eqpos < s.size() && s[eqpos] != '=') eqpos++;
+      std::string key = s.substr(2, eqpos - 2);
+      std::string value = (eqpos + 1) < s.size() ? s.substr(eqpos + 1) : "";
+      if(key == "help") {
+        showHelp();
+        return 0;
+      }
+      if(key == "mode") {
+        if(value == "ascii") options.rendermode = RM_ASCII;
+        else if(value == "hex") options.rendermode = RM_HEX;
+        else if(value == "hex16") options.rendermode = RM_HEX16;
+        else if(value == "palette") options.rendermode = RM_PAL;
+      }
+      if(key == "size") {
+        int size = strtoval<int>(value);
+        if(options.rendersize >= 1 && options.rendersize <= 4096) options.rendersize = size;
+      }
+      if(key == "format") {
+        if(value == "mix") options.hexformat = HF_MIX;
+        else if(value == "hex") options.hexformat = HF_HEX;
+      }
+    }
+    else filenames.push_back(s);
+  }
+
+  if(filenames.empty()) {
+    std::cout << "Please provide a filename to preview" << std::endl;
+    showHelp();
+    return 0;
+  }
+
+  if(!options_chosen) {
+    //fill in defaults
+    options.show_header = true;
+  }
+
+  for(size_t i = 0; i < filenames.size(); i++) {
+    if(filenames.size() > 1) {
+      if(i > 0 && !options.show_one_line_summary) std::cout << std::endl;
+      std::cout << filenames[i] << ":";
+      if(!options.show_one_line_summary) std::cout << std::endl; else std::cout << " ";
+    }
+    Data data(filenames[i]);
+    showInfos(data, options);
+  }
+}
+
diff --git a/codec/L2/demos/pikEnc/images/small32x32.png b/codec/L2/demos/pikEnc/images/small32x32.png
new file mode 100644
index 0000000000..e50f46a988
Binary files /dev/null and b/codec/L2/demos/pikEnc/images/small32x32.png differ
diff --git a/codec/L2/demos/pikEnc/images/t0.png b/codec/L2/demos/pikEnc/images/t0.png
new file mode 100644
index 0000000000..294bbaae40
Binary files /dev/null and b/codec/L2/demos/pikEnc/images/t0.png differ
diff --git a/codec/L2/demos/pikEnc/images/t1.png b/codec/L2/demos/pikEnc/images/t1.png
new file mode 100644
index 0000000000..3b0012f91f
Binary files /dev/null and b/codec/L2/demos/pikEnc/images/t1.png differ
diff --git a/codec/L2/demos/pikEnc/images/t2.png b/codec/L2/demos/pikEnc/images/t2.png
new file mode 100644
index 0000000000..da8ecb130a
Binary files /dev/null and b/codec/L2/demos/pikEnc/images/t2.png differ
diff --git a/codec/L2/demos/pikEnc/kernel/XAccPIKKernel1.cpp b/codec/L2/demos/pikEnc/kernel/XAccPIKKernel1.cpp
new file mode 100644
index 0000000000..503d7188cd
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/XAccPIKKernel1.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pikEnc/XAccPIKKernel1.hpp"
+
+namespace xf {
+namespace codec {
+
+extern "C" void pikEncKernel1Top(ap_uint<32> config[MAX_NUM_CONFIG],
+                                 ap_uint<AXI_WIDTH> rbuf[BUF_DEPTH / 2],
+                                 ap_uint<32> axi_out[AXI_OUT],
+                                 ap_uint<32> axi_cmap[AXI_CMAP],
+                                 ap_uint<32> axi_qf[AXI_QF]) {
+#pragma HLS INTERFACE m_axi offset = slave latency = 8 num_write_outstanding = 4 num_read_outstanding = \
+    4 max_write_burst_length = 8 max_read_burst_length = 8 bundle = gmem0_0 port = config
+#pragma HLS INTERFACE s_axilite port = config bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 num_write_outstanding = 4 num_read_outstanding = \
+    8 max_write_burst_length = 8 max_read_burst_length = 256 bundle = gmem0_1 port = rbuf
+#pragma HLS INTERFACE s_axilite port = rbuf bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 num_write_outstanding = 16 num_read_outstanding = \
+    4 max_write_burst_length = 128 max_read_burst_length = 8 bundle = gmem1_0 port = axi_out
+#pragma HLS INTERFACE s_axilite port = axi_out bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 num_write_outstanding = 16 num_read_outstanding = \
+    4 max_write_burst_length = 32 max_read_burst_length = 8 bundle = gmem1_1 port = axi_cmap
+#pragma HLS INTERFACE s_axilite port = axi_cmap bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 num_write_outstanding = 16 num_read_outstanding = \
+    4 max_write_burst_length = 32 max_read_burst_length = 8 bundle = gmem1_2 port = axi_qf
+#pragma HLS INTERFACE s_axilite port = axi_qf bundle = control
+
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+    int len[3];
+    int offsets[3];
+    int xsize;
+    int ysize;
+    float quant_ac;
+
+    loadConfig(config, len, offsets, xsize, ysize, quant_ac);
+
+    kernel1_core(rbuf, len, offsets, xsize, ysize, quant_ac, axi_out, axi_cmap, axi_qf);
+}
+} // namespace codec
+} // namespace xf
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/kernel/XAccPIKKernel2.cpp b/codec/L2/demos/pikEnc/kernel/XAccPIKKernel2.cpp
new file mode 100644
index 0000000000..12a19d1b55
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/XAccPIKKernel2.cpp
@@ -0,0 +1,4495 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pikEnc/XAccPIKKernel2.hpp"
+#include "pikEnc/dct.hpp"
+#include "pikEnc/dequant.hpp"
+
+#include "xf_utils_hw/axi_to_multi_stream.hpp"
+#include "xf_utils_hw/axi_to_stream.hpp"
+#include "xf_utils_hw/stream_to_axi.hpp"
+
+#include <iomanip>
+#include <iostream>
+
+#ifndef __SYNTHESIS__
+#define DEBUG true
+#ifdef DEBUG
+//#define DEBUG_ACSTRATEGY true
+//#define DEBUG_DCT true
+//#define DEBUG_IDCT true
+//#define DEBUG_QUANTIZER true
+//#define DEBUG_FRAMECACHE true
+//#define DEBUG_COEFFS true
+//#define DEBUG_REORDER true
+//#define DEBUG_COEFFS_ORDER true
+//#define DEBUG_SORT true
+#endif
+#endif
+
+#define USE_HLS_SQRT true
+
+float power(float a, float b) {
+#pragma HLS PIPELINE
+
+    return hls::pow(a, b);
+}
+
+#ifndef USE_HLS_SQRT
+
+float Inv(float x) {
+#pragma HLS INLINE
+
+    return 1.0 / x;
+}
+
+float InvSqrt(float x) {
+#pragma HLS INLINE
+
+    float xhalf = 0.5f * x;
+    int i = *(int*)&x;              // get bits for floating VALUE
+    i = 0x5f3759df - (i >> 1);      // gives initial guess y0
+    x = *(float*)&i;                // convert bits BACK to float
+    x = x * (1.5f - xhalf * x * x); // Newton step, repeating increases accuracy
+    x = x * (1.5f - xhalf * x * x); // Newton step, repeating increases accuracy
+    return x;
+}
+
+float Sqrt2(float x) {
+#pragma HLS INTERFACE ap_ctrl_none port = return
+#pragma HLS INLINE off
+
+    float temp;
+    temp = InvSqrt(x);
+    return Inv(temp);
+}
+
+float Sqrt4(float x) {
+#pragma HLS INTERFACE ap_ctrl_none port = return
+#pragma HLS INLINE off
+
+    float temp = InvSqrt(x);
+    return InvSqrt(temp);
+}
+
+float Sqrt8(float x) {
+#pragma HLS INTERFACE ap_ctrl_none port = return
+#pragma HLS INLINE off
+
+    float temp = Sqrt2(x);
+    return Sqrt4(temp);
+}
+
+#else
+
+float Sqrt2(float x) {
+#pragma HLS INTERFACE ap_ctrl_none port = return
+#pragma HLS INLINE off
+
+    return hls::sqrt(x);
+}
+
+float Sqrt4(float x) {
+#pragma HLS INTERFACE ap_ctrl_none port = return
+#pragma HLS INLINE off
+
+    float temp = hls::sqrt(x);
+    return hls::sqrt(temp);
+}
+
+float Sqrt8(float x) {
+#pragma HLS INTERFACE ap_ctrl_none port = return
+#pragma HLS INLINE off
+
+    float temp0 = hls::sqrt(x);
+    float temp1 = hls::sqrt(temp0);
+    return hls::sqrt(temp1);
+}
+
+#endif
+
+void get4x4block(ap_uint<8> bx, ap_uint<8> by, float src32x32[3][1024], hls::stream<float> src4x4[3][16]) {
+#pragma HLS INLINE off
+
+#ifdef DEBUG_ACSTRATEGY
+    std::cout << "get4x4block:" << std::endl;
+#endif
+
+    ap_uint<10> addr_i;
+    ap_uint<4> addr_o;
+
+    for (ap_uint<8> block = 0; block < 4; block++)
+        for (ap_uint<8> dy = 0; dy < 4; dy++) {
+            for (ap_uint<8> dx = 0; dx < 4; dx++) {
+#pragma HLS pipeline II = 1
+                for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+
+                    addr_i(9, 8) = by(1, 0);
+                    addr_i[7] = block[1];
+                    addr_i(6, 5) = dy(1, 0);
+                    addr_i(4, 3) = bx(1, 0);
+                    addr_i[2] = block[0];
+                    addr_i(1, 0) = dx(1, 0);
+
+                    addr_o(3, 2) = dy(1, 0);
+                    addr_o(1, 0) = dx(1, 0);
+
+                    src4x4[c][addr_o].write(src32x32[c][addr_i]);
+
+#ifdef DEBUG_ACSTRATEGY
+                    std::cout << "id=" << addr_i << " src=" << src32x32[c][addr_i] << std::endl;
+#endif
+                }
+            }
+        }
+}
+
+void DCT_collection4x4(hls::stream<float> src_in[3][16], const float kColorWeights[3], hls::stream<float>& blockval) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    hls::stream<float> sum_stream[3];
+#pragma HLS stream variable = sum_stream depth = 8
+#pragma HLS BIND_STORAGE variable = sum_stream type = fifo impl = srl
+
+loop_sum:
+    for (ap_uint<8> ix = 0; ix < 4; ix++) {
+        for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS pipeline
+
+            float sum = 0;
+            float src[16];
+            for (int i = 0; i < 16; i++) {
+#pragma HLS UNROLL
+                src[i] = src_in[c][i].read();
+            }
+
+            // x
+            sum += hls::fabs(src[0] - src[1]);
+            sum += hls::fabs(src[1] - src[2]);
+            sum += hls::fabs(src[2] - src[3]);
+
+            sum += hls::fabs(src[4] - src[5]);
+            sum += hls::fabs(src[5] - src[6]);
+            sum += hls::fabs(src[6] - src[7]);
+
+            sum += hls::fabs(src[8] - src[9]);
+            sum += hls::fabs(src[9] - src[10]);
+            sum += hls::fabs(src[10] - src[11]);
+
+            sum += hls::fabs(src[12] - src[13]);
+            sum += hls::fabs(src[13] - src[14]);
+            sum += hls::fabs(src[14] - src[15]);
+
+            // y
+            sum += hls::fabs(src[0] - src[4]);
+            sum += hls::fabs(src[1] - src[5]);
+            sum += hls::fabs(src[2] - src[6]);
+            sum += hls::fabs(src[3] - src[7]);
+
+            sum += hls::fabs(src[4] - src[8]);
+            sum += hls::fabs(src[5] - src[9]);
+            sum += hls::fabs(src[6] - src[10]);
+            sum += hls::fabs(src[7] - src[11]);
+
+            sum += hls::fabs(src[8] - src[12]);
+            sum += hls::fabs(src[9] - src[13]);
+            sum += hls::fabs(src[10] - src[14]);
+            sum += hls::fabs(src[11] - src[15]);
+
+#ifdef DEBUG_ACSTRATEGY
+            std::cout << "DCT_collection4x4:" << std::endl;
+            for (ap_uint<8> i = 0; i < 16; i++) {
+                std::cout << "c=" << c << " id=" << i << " src=" << src[i] << std::endl;
+            }
+            std::cout << "sum=" << sum << std::endl;
+#endif
+            sum_stream[c].write(sum);
+        }
+    }
+
+loop_total_sum:
+    for (ap_uint<8> ix = 0; ix < 4; ix++) {
+#pragma HLS pipeline
+
+        float sum[3];
+        for (ap_uint<8> c = 0; c < 3; c++) {
+            sum[c] = sum_stream[c].read();
+        }
+        float total_sum = kColorWeights[0] * sum[0] + kColorWeights[1] * sum[1] + kColorWeights[2] * sum[2];
+        blockval.write(total_sum);
+    }
+}
+
+void DCT_normalize(hls::stream<float>& blockval, const float constant[3], hls::stream<bool>& result) {
+#pragma HLS INLINE off
+
+    float power2 = 0;
+    float power4 = 0;
+    float power8 = 0;
+
+    float norm2;
+    float norm4;
+    float norm8;
+    for (ap_uint<8> ix = 0; ix < 4; ix++) {
+#pragma HLS pipeline
+
+        float v = blockval.read();
+        float v2, v4, v8;
+
+        v2 = v * v;
+        power2 += v2;
+        v4 = v2 * v2;
+        power4 += v4;
+        v8 = v4 * v4;
+        power8 += v8;
+    }
+
+    norm2 = Sqrt2(power2 * 0.25);
+    norm4 = Sqrt4(power4 * 0.25);
+    norm8 = Sqrt8(power8 * 0.25);
+    norm2 += 0.03;
+
+    float loss = constant[0] * norm8;
+    loss += constant[1] * norm4;
+    float loss_limit = constant[2] * norm2;
+
+    bool result_tmp;
+    if (loss >= loss_limit) {
+        result_tmp = true;
+    } else {
+        result_tmp = false;
+    }
+    result.write(result_tmp);
+}
+
+void DCT_collection32x32(ap_uint<16> xblock,
+                         ap_uint<16> yblock,
+                         hls::stream<float> src[3],
+                         const float kColorWeights[3],
+                         const float constant[3],
+                         hls::stream<bool>& result) {
+#pragma HLS INLINE off
+
+#ifdef DEBUG_ACSTRATEGY
+    std::cout << "kColorWeights:" << std::endl;
+    for (ap_uint<8> i = 0; i < 3; i++) {
+        std::cout << "id=" << i << " " << kColorWeights[i] << std::endl;
+    }
+
+    std::cout << "constant:" << std::endl;
+    for (ap_uint<8> i = 0; i < 3; i++) {
+        std::cout << "id=" << i << " " << constant[i] << std::endl;
+    }
+#endif
+
+// Process y
+loop_y:
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+    // Process x
+    loop_x:
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+#pragma HLS DATAFLOW
+            // load src_buffer 32x32
+            float src_buffer[3][1024];
+#pragma HLS ARRAY_PARTITION variable = src_buffer complete dim = 1
+
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                    src_buffer[c][i] = src[c].read();
+                }
+            }
+
+            // divide 32x32 to 16 block of 8x8
+            for (ap_uint<8> by = 0; by < 4; by++) {
+                for (ap_uint<8> bx = 0; bx < 4; bx++) {
+#pragma HLS DATAFLOW
+
+                    hls::stream<float> src4x4[3][16];
+#pragma HLS ARRAY_PARTITION variable = src4x4 complete
+#pragma HLS stream variable = src4x4 depth = 8
+#pragma HLS BIND_STORAGE variable = src4x4 type = fifo impl = srl
+                    hls::stream<float> blockval;
+#pragma HLS stream variable = blockval depth = 8
+#pragma HLS BIND_STORAGE variable = blockval type = fifo impl = srl
+
+                    // divide 8x8 to 4 block of 4x4
+                    get4x4block(bx, by, src_buffer, src4x4);
+                    DCT_collection4x4(src4x4, kColorWeights, blockval);
+                    DCT_normalize(blockval, constant, result);
+                }
+            }
+        }
+    }
+}
+
+void min_max_entropy(float src[3][1024],
+                     hls::stream<float> min_ext16_strm[3],
+                     hls::stream<float> max_ext16_strm[3],
+                     hls::stream<float> min_ext32_strm[3],
+                     hls::stream<float> max_ext32_strm[3]) {
+#pragma HLS INLINE off
+
+    float min_ext32[3];
+#pragma HLS ARRAY_PARTITION variable = min_ext32 complete
+    float max_ext32[3];
+#pragma HLS ARRAY_PARTITION variable = max_ext32 complete
+
+    float min_ext16[3];
+#pragma HLS ARRAY_PARTITION variable = min_ext16 complete
+    float max_ext16[3];
+#pragma HLS ARRAY_PARTITION variable = max_ext16 complete
+
+loop_min_mix:
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                float min8x8;
+                float max8x8;
+                for (ap_uint<8> iy = 0; iy < 2; iy++) {
+                    for (ap_uint<8> ix = 0; ix < 2; ix++) {
+                        for (ap_uint<8> dy = 0; dy < 8; ++dy) {
+                            for (ap_uint<8> dx = 0; dx < 8; ++dx) {
+#pragma HLS pipeline
+
+                                // min-max 8x8
+                                ap_uint<10> shift;
+                                shift[9] = by[0];
+                                shift[8] = iy[0];
+                                shift(7, 5) = dy(2, 0);
+                                shift[4] = bx[0];
+                                shift[3] = ix[0];
+                                shift(2, 0) = dx(2, 0);
+
+                                if (dy == 0 && dx == 0) {
+                                    min8x8 = 1e30;
+                                    max8x8 = -1e30;
+
+                                    if (iy == 0 && ix == 0 && dy == 0 && dx == 0) {
+                                        min_ext16[c] = 1e30;
+                                        max_ext16[c] = -1e30;
+
+                                        if (by == 0 && bx == 0) {
+                                            min_ext32[c] = 1e30;
+                                            max_ext32[c] = -1e30;
+                                        }
+                                    }
+                                }
+
+                                float v = src[c][shift];
+                                if (v < min8x8) min8x8 = v;
+                                if (v > max8x8) max8x8 = v;
+
+                                if (dy == 7 && dx == 7) {
+                                    float ext = max8x8 - min8x8;
+                                    if (ext < min_ext16[c]) min_ext16[c] = ext;
+                                    if (ext > max_ext16[c]) max_ext16[c] = ext;
+
+                                    if (ext < min_ext32[c]) min_ext32[c] = ext;
+                                    if (ext > max_ext32[c]) max_ext32[c] = ext;
+
+                                    if (iy == 1 && ix == 1) {
+                                        min_ext16_strm[c].write(min_ext16[c]);
+                                        max_ext16_strm[c].write(max_ext16[c]);
+                                        if (by == 1 && bx == 1) {
+                                            min_ext32_strm[c].write(min_ext32[c]);
+                                            max_ext32_strm[c].write(max_ext32[c]);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void entropy_accum0(float power0[3][1024],
+                    float power1[3][1024],
+                    hls::stream<float> entropy16_strm[3],
+                    hls::stream<float> entropy32_strm[3]) {
+#pragma HLS INLINE off
+
+    static const float kColorWeights[3] = {
+        0.65285453568125873, 2.4740163893371157, 2.0140216656143393,
+    };
+
+    const float kDiff16 = 0.2494383590606063;
+    const float kDiff32 = 0.9539527585329598;
+
+    const float kPow = 0.99263297216052859;
+    const float kPow2 = 0.018823021573462634;
+    const float kExtremityWeight = 7.77;
+
+    float entropy32[3];
+#pragma HLS ARRAY_PARTITION variable = entropy32 complete
+    float entropy16[3];
+#pragma HLS ARRAY_PARTITION variable = entropy16 complete
+
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+            for (ap_uint<8> iy = 0; iy < 2; iy++) {
+                for (ap_uint<8> ix = 0; ix < 2; ix++) {
+                    for (ap_uint<8> dy = 0; dy < 8; ++dy) {
+                        for (ap_uint<8> dx = 0; dx < 8; ++dx) {
+#pragma HLS pipeline
+                            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+
+                                // dequant 8x8, skip the dc values at 0 and 64.
+                                if (dx == 0 && dy == 0) {
+                                    if (ix == 0 && iy == 0) {
+                                        entropy16[c] = 0;
+                                        if (by == 0 && bx == 0) {
+                                            entropy32[c] = 0;
+                                        }
+                                    }
+                                } else {
+                                    ap_uint<10> shift1;
+
+                                    shift1[9] = by[0];
+                                    shift1[8] = iy[0];
+                                    shift1(7, 5) = dy(2, 0);
+                                    shift1[4] = bx[0];
+                                    shift1[3] = ix[0];
+                                    shift1(2, 0) = dx(2, 0);
+
+                                    float p0 = power0[c][shift1];
+                                    float p1 = power1[c][shift1];
+                                    entropy16[c] += 1 + kDiff16 - p0 - kDiff16 * p1;
+                                    entropy32[c] += 1 + kDiff32 - p0 - kDiff32 * p1;
+
+                                    if (dy == 7 && dx == 7 && iy == 1 && ix == 1) {
+                                        entropy16_strm[c].write(entropy16[c]);
+                                        if (by == 1 && bx == 1) {
+                                            entropy32_strm[c].write(entropy32[c]);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void entropy_accum1(hls::stream<float> entropy16_strm[3],
+                    hls::stream<float> max_ext16_strm[3],
+                    hls::stream<float> min_ext16_strm[3],
+                    hls::stream<float>& reference_entropy16) {
+#pragma HLS INLINE off
+
+    static const float kColorWeights[3] = {
+        0.65285453568125873, 2.4740163893371157, 2.0140216656143393,
+    };
+    const float kExtremityWeight = 7.77;
+
+    float dct16x16_reference_entropy;
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS pipeline
+
+                if (c == 0) {
+                    dct16x16_reference_entropy = 0;
+                }
+
+                float entropy16_tmp = entropy16_strm[c].read();
+                float max_ext16_tmp = max_ext16_strm[c].read();
+                float min_ext16_tmp = min_ext16_strm[c].read();
+                entropy16_tmp -= kExtremityWeight * (max_ext16_tmp - min_ext16_tmp);
+                dct16x16_reference_entropy += kColorWeights[c] * entropy16_tmp;
+
+                if (c == 2) {
+                    reference_entropy16.write(dct16x16_reference_entropy);
+                }
+            }
+        }
+    }
+}
+
+void entropy_accum2(hls::stream<float> entropy32_strm[3],
+                    hls::stream<float> max_ext32_strm[3],
+                    hls::stream<float> min_ext32_strm[3],
+                    hls::stream<float>& reference_entropy32) {
+#pragma HLS INLINE off
+
+    static const float kColorWeights[3] = {
+        0.65285453568125873, 2.4740163893371157, 2.0140216656143393,
+    };
+    const float kExtremityWeight = 7.77;
+
+    float dct32x32_reference_entropy = 0;
+    for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS pipeline
+
+        float entropy32_tmp = entropy32_strm[c].read();
+        float max_ext32_tmp = max_ext32_strm[c].read();
+        float min_ext32_tmp = min_ext32_strm[c].read();
+        entropy32_tmp -= kExtremityWeight * (max_ext32_tmp - min_ext32_tmp);
+        dct32x32_reference_entropy += kColorWeights[c] * entropy32_tmp;
+    }
+    reference_entropy32.write(dct32x32_reference_entropy);
+}
+
+void load_dequant_src(hls::stream<float>& quant_field_strm,
+                      hls::stream<float> src_strm[3],
+                      hls::stream<float> coeffs_strm[3],
+                      float quant_field[16],
+                      float src[3][1024],
+                      float coeffs[3][1024]) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+load_src:
+    for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+        for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+            src[c][i] = src_strm[c].read();
+        }
+    }
+
+load_coeffs:
+    for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+        for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+            coeffs[c][i] = coeffs_strm[c].read();
+        }
+    }
+
+load_qf:
+    for (ap_uint<16> i = 0; i < 16; i++) {
+#pragma HLS PIPELINE II = 1
+        quant_field[i] = quant_field_strm.read();
+    }
+}
+
+void dequant(float discretization_factor,
+             const float inv_dequant[3][64],
+             float quant_field[16],
+             float src[3][1024],
+             float coeffs[3][1024],
+             float power0[3][1024],
+             float power1[3][1024]) {
+#pragma HLS INLINE off
+
+    // The quantized symbol distribution contracts with the increasing
+    // butteraugli_target.
+    static const float kColorWeights[3] = {
+        0.65285453568125873, 2.4740163893371157, 2.0140216656143393,
+    };
+
+    const float kDiff16 = 0.2494383590606063;
+    const float kDiff32 = 0.9539527585329598;
+
+    const float kPow = 0.99263297216052859;
+    const float kPow2 = 0.018823021573462634;
+    const float kExtremityWeight = 7.77;
+
+loop_dequant:
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+            for (ap_uint<8> iy = 0; iy < 2; iy++) {
+                for (ap_uint<8> ix = 0; ix < 2; ix++) {
+                    for (ap_uint<8> dy = 0; dy < 8; ++dy) {
+                        for (ap_uint<8> dx = 0; dx < 8; ++dx) {
+#pragma HLS pipeline
+                            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                                // dequant 8x8, skip the dc values at 0 and 64.
+                                ap_uint<10> shift1;
+                                ap_uint<6> shift2;
+                                ap_uint<4> shift3;
+
+                                shift1[9] = by[0];
+                                shift1[8] = iy[0];
+                                shift1(7, 5) = dy(2, 0);
+                                shift1[4] = bx[0];
+                                shift1[3] = ix[0];
+                                shift1(2, 0) = dx(2, 0);
+
+                                shift2(5, 3) = dy(2, 0);
+                                shift2(2, 0) = dx(2, 0);
+
+                                shift3[3] = by[0];
+                                shift3[2] = iy[0];
+                                shift3[1] = bx[0];
+                                shift3[0] = ix[0];
+
+                                float val = coeffs[c][shift1] * inv_dequant[c][shift2];
+                                val *= quant_field[shift3];
+                                float v = std::fabs(val) * discretization_factor;
+                                power0[c][shift1] = std::pow(kPow, v);
+                                power1[c][shift1] = std::pow(kPow2, v);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void reference_entropy(ap_uint<16> xblock,
+                       ap_uint<16> yblock,
+                       float discretization_factor,
+                       hls::stream<float>& quant_field_strm,
+                       const float inv_dequant[3][64],
+                       hls::stream<float> src_strm[3],
+                       hls::stream<float> coeffs_strm[3],
+                       hls::stream<float>& reference_entropy16,
+                       hls::stream<float>& reference_entropy32) {
+#pragma HLS INLINE off
+
+// Process y
+loop_y:
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+    // Process x
+    loop_x:
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+#pragma HLS DATAFLOW
+
+            float quant_field[16];
+#pragma HLS ARRAY_PARTITION variable = quant_field complete
+            float src[3][1024];
+#pragma HLS ARRAY_PARTITION variable = src complete dim = 1
+#pragma HLS BIND_STORAGE variable = src type = ram_2p impl = bram
+            float coeffs[3][1024];
+#pragma HLS ARRAY_PARTITION variable = coeffs complete dim = 1
+#pragma HLS BIND_STORAGE variable = coeffs type = ram_2p impl = bram
+
+            float power0[3][1024];
+#pragma HLS ARRAY_PARTITION variable = power0 complete dim = 1
+#pragma HLS BIND_STORAGE variable = power0 type = ram_2p impl = bram
+            float power1[3][1024];
+#pragma HLS ARRAY_PARTITION variable = power1 complete dim = 1
+#pragma HLS BIND_STORAGE variable = power1 type = ram_2p impl = bram
+
+            hls::stream<float> min_ext32_strm[3];
+#pragma HLS stream variable = min_ext32_strm depth = 512
+#pragma HLS BIND_STORAGE variable = min_ext32_strm type = fifo impl = bram
+            hls::stream<float> max_ext32_strm[3];
+#pragma HLS stream variable = max_ext32_strm depth = 512
+#pragma HLS BIND_STORAGE variable = max_ext32_strm type = fifo impl = bram
+            hls::stream<float> entropy32_strm[3];
+#pragma HLS stream variable = entropy32_strm depth = 8
+#pragma HLS BIND_STORAGE variable = entropy32_strm type = fifo impl = srl
+
+            hls::stream<float> min_ext16_strm[3];
+#pragma HLS stream variable = min_ext16_strm depth = 512
+#pragma HLS BIND_STORAGE variable = min_ext16_strm type = fifo impl = bram
+            hls::stream<float> max_ext16_strm[3];
+#pragma HLS stream variable = max_ext16_strm depth = 512
+#pragma HLS BIND_STORAGE variable = max_ext16_strm type = fifo impl = bram
+            hls::stream<float> entropy16_strm[3];
+#pragma HLS stream variable = entropy16_strm depth = 8
+#pragma HLS BIND_STORAGE variable = entropy16_strm type = fifo impl = srl
+
+            load_dequant_src(quant_field_strm, src_strm, coeffs_strm, quant_field, src, coeffs);
+
+            min_max_entropy(src, min_ext16_strm, max_ext16_strm, min_ext32_strm, max_ext32_strm);
+
+            dequant(discretization_factor, inv_dequant, quant_field, src, coeffs, power0, power1);
+
+            entropy_accum0(power0, power1, entropy16_strm, entropy32_strm);
+
+            entropy_accum1(entropy16_strm, max_ext16_strm, min_ext16_strm, reference_entropy16);
+
+            entropy_accum2(entropy32_strm, max_ext32_strm, min_ext32_strm, reference_entropy32);
+        }
+    }
+}
+
+void get_max_quant(float kMulInho16,
+                   float kMulInho32,
+                   float quant_field[16],
+                   hls::stream<float>& quant_field_strm,
+                   hls::stream<float>& inhomogeneity16,
+                   hls::stream<float>& quant16,
+                   hls::stream<float>& inhomogeneity32,
+                   hls::stream<float>& quant32) {
+#pragma HLS INLINE off
+
+    float quant_inhomogeneity32;
+    float max_quant32;
+    float quant_inhomogeneity16;
+    float max_quant16;
+
+block4x4:
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+            for (ap_uint<8> dy = 0; dy < 2; ++dy) {
+                for (ap_uint<8> dx = 0; dx < 2; ++dx) {
+#pragma HLS pipeline
+
+                    ap_uint<4> shift;
+                    shift[3] = by[0];
+                    shift[2] = dy[0];
+                    shift[1] = bx[0];
+                    shift[0] = dx[0];
+
+                    if ((dy == 0) && (dx == 0)) {
+                        quant_inhomogeneity16 = 0;
+                        max_quant16 = -1e30;
+                        if ((by == 0) && (bx == 0)) {
+                            quant_inhomogeneity32 = 0;
+                            max_quant32 = -1e30;
+                        }
+                    }
+
+                    float temp = quant_field[shift];
+                    max_quant16 = hls::max(max_quant16, temp);
+                    quant_inhomogeneity16 -= temp;
+
+                    max_quant32 = hls::max(max_quant32, temp);
+                    quant_inhomogeneity32 -= temp;
+
+                    if ((dy == 1) && (dx == 1)) {
+                        quant_inhomogeneity16 += 4 * max_quant16;
+                        inhomogeneity16.write(kMulInho16 * quant_inhomogeneity16);
+                        quant16.write(max_quant16);
+                        if ((by == 1) && (bx == 1)) {
+                            quant_inhomogeneity32 += 16 * max_quant32;
+                            inhomogeneity32.write(kMulInho32 * quant_inhomogeneity32);
+                            quant32.write(max_quant32);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void get_quant_inhomogeneity(ap_uint<16> xblock,
+                             ap_uint<16> yblock,
+                             float kMulInho16,
+                             float kMulInho32,
+                             hls::stream<float>& quant_field_strm,
+                             hls::stream<float>& inhomogeneity16,
+                             hls::stream<float>& quant16,
+                             hls::stream<float>& inhomogeneity32,
+                             hls::stream<float>& quant32) {
+#pragma HLS INLINE off
+
+// Process y
+loop_y:
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+    // Process x
+    loop_x:
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+#pragma HLS DATAFLOW
+
+            float quant_field[16];
+
+        load_qf:
+            for (ap_uint<16> i = 0; i < 16; i++) {
+                quant_field[i] = quant_field_strm.read();
+            }
+
+            get_max_quant(kMulInho16, kMulInho32, quant_field, quant_field_strm, inhomogeneity16, quant16,
+                          inhomogeneity32, quant32);
+        }
+    }
+}
+
+float compute_entropy16_sub(float discretization_factor,
+                            ap_uint<8> by,
+                            ap_uint<8> bx,
+                            float max_quant,
+                            const float inv_dequant16[256],
+                            float coeffs16[1024]) {
+#pragma HLS INLINE off
+
+    const float kFavor8x8Dct = 0.978192691479985;
+    const float kDiff = 0.2494383590606063;
+    const float kPow = 0.99263297216052859;
+    const float kPow2 = 0.018823021573462634;
+    const float kExtremityWeight = 7.77;
+
+    float entropy = 0;
+    for (ap_uint<8> dy = 0; dy < 16; dy++) {
+        for (ap_uint<8> dx = 0; dx < 16; dx++) {
+#pragma HLS pipeline
+            if (dy < 2 && dx < 2) {
+                // Leave out the lowest frequencies.
+            } else {
+                ap_uint<10> shift1;
+                ap_uint<8> shift2;
+
+                shift1[9] = by[0];
+                shift1(8, 5) = dy(3, 0);
+                shift1[4] = bx[0];
+                shift1(3, 0) = dx(3, 0);
+
+                shift2(7, 4) = dy(3, 0);
+                shift2(3, 0) = dx(3, 0);
+
+                float val = coeffs16[shift1] * inv_dequant16[shift2];
+                val *= max_quant;
+                float v = std::fabs(val) * discretization_factor;
+                entropy += 1 + kDiff - std::pow(kPow, v) - kDiff * std::pow(kPow2, v);
+
+#ifdef DEBUG_ACSTRATEGY
+                std::cout << "entropy:" << entropy << std::endl;
+#endif
+            }
+#ifdef DEBUG_ACSTRATEGY
+            std::cout << "coeff16=" << coeffs16[shift1] << " inv_dequant16=" << inv_dequant16[shift2]
+                      << " max_quant=" << max_quant << " discretization_factor=" << discretization_factor << " v=" << v
+                      << " entropy=" << entropy << std::endl;
+#endif
+        }
+    }
+    return entropy;
+}
+
+void compute_entropy16(float discretization_factor,
+                       float entropy8[2][2],
+                       float quant_inhomogeneity[2][2],
+                       float max_quant[2][2],
+                       const float inv_dequant16[3][256],
+                       float coeffs16[3][1024],
+                       hls::stream<bool>& enableDCT16) {
+#pragma HLS INLINE off
+#pragma HLS allocation function instance = compute_entropy16_sub limit = 3
+
+    const float kColorWeights[3] = {
+        0.65285453568125873, 2.4740163893371157, 2.0140216656143393,
+    };
+
+    const float kFavor8x8Dct = 0.978192691479985;
+    const float kDiff = 0.2494383590606063;
+    const float kPow = 0.99263297216052859;
+    const float kPow2 = 0.018823021573462634;
+    const float kExtremityWeight = 7.77;
+
+    float dct16x16_entropy;
+    float dct8x8_entropy;
+    float entropy0, entropy1, entropy2;
+    bool result;
+
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+            dct8x8_entropy = entropy8[by][bx] + quant_inhomogeneity[by][bx];
+
+            entropy0 =
+                compute_entropy16_sub(discretization_factor, by, bx, max_quant[by][bx], inv_dequant16[0], coeffs16[0]);
+
+            entropy1 =
+                compute_entropy16_sub(discretization_factor, by, bx, max_quant[by][bx], inv_dequant16[1], coeffs16[1]);
+
+            entropy2 =
+                compute_entropy16_sub(discretization_factor, by, bx, max_quant[by][bx], inv_dequant16[2], coeffs16[2]);
+
+            dct16x16_entropy = kColorWeights[0] * entropy0 + kColorWeights[1] * entropy1 + kColorWeights[2] * entropy2;
+
+            if (dct16x16_entropy < kFavor8x8Dct * dct8x8_entropy) {
+                result = true;
+            } else {
+                result = false;
+            }
+            enableDCT16.write(result);
+        }
+
+#ifdef DEBUG_ACSTRATEGY
+        std::cout << "entropy:" << entropy << std::endl;
+        std::cout << "by=" << by << " bx=" << bx << " dct16x16_entropy=" << dct16x16_entropy
+                  << " dct8x8_entropy=" << dct8x8_entropy << std::endl;
+#endif
+    }
+}
+
+void dct_entropy16x16(ap_uint<16> xblock,
+                      ap_uint<16> yblock,
+                      float discretization_factor,
+                      hls::stream<float>& entropy8_strm,
+                      hls::stream<float>& quant_inhomogeneity_strm,
+                      hls::stream<float>& max_quant_strm,
+                      const float inv_dequant16[3][256],
+                      hls::stream<float> coeffs16_strm[3],
+                      hls::stream<bool>& enableDCT16) {
+#pragma HLS INLINE off
+
+    float entropy8[2][2];
+#pragma HLS ARRAY_PARTITION variable = entropy8 complete
+    float quant_inhomogeneity[2][2];
+#pragma HLS ARRAY_PARTITION variable = quant_inhomogeneity complete
+    float max_quant[2][2];
+#pragma HLS ARRAY_PARTITION variable = max_quant complete
+    float coeffs16[3][1024];
+#pragma HLS ARRAY_PARTITION variable = coeffs16 complete dim = 1
+// Process y
+loop_y:
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+    // Process x
+    loop_x:
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+#pragma HLS DATAFLOW
+
+        load_ctl:
+            for (ap_uint<8> by = 0; by < 2; by++) {
+                for (ap_uint<8> bx = 0; bx < 2; bx++) {
+#pragma HLS PIPELINE II = 1
+                    entropy8[by][bx] = entropy8_strm.read();
+                    quant_inhomogeneity[by][bx] = quant_inhomogeneity_strm.read();
+                    max_quant[by][bx] = max_quant_strm.read();
+                }
+            }
+
+        load_coeffs:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                    coeffs16[c][i] = coeffs16_strm[c].read();
+                }
+            }
+
+            compute_entropy16(discretization_factor, entropy8, quant_inhomogeneity, max_quant, inv_dequant16, coeffs16,
+                              enableDCT16);
+        }
+    }
+}
+
+float compute_entropy32_sub(float discretization_factor,
+                            float max_quant,
+                            const float inv_dequant32[1024],
+                            float coeffs32[1024]) {
+#pragma HLS INLINE off
+
+    const float kDiff = 0.9539527585329598;
+    const float kPow = 0.99263297216052859;
+    const float kPow2 = 0.018823021573462634;
+    const float kExtremityWeight = 7.77;
+
+    float entropy = 0;
+
+compute_entropy32_sub:
+    for (ap_uint<8> dy = 0; dy < 32; dy++) {
+        for (ap_uint<8> dx = 0; dx < 32; dx++) {
+#pragma HLS PIPELINE
+            if (dy < 4 && dx < 4) {
+                // Leave out the lowest frequencies.
+            } else {
+                ap_uint<10> shift;
+                shift(9, 5) = dy(4, 0);
+                shift(4, 0) = dx(4, 0);
+
+                float val = coeffs32[shift] * inv_dequant32[shift];
+                val *= max_quant;
+                float v = std::fabs(val) * discretization_factor;
+                entropy += 1 + kDiff - std::pow(kPow, v) - kDiff * std::pow(kPow2, v);
+            }
+        }
+    }
+    return entropy;
+}
+
+void compute_entropy32(float discretization_factor,
+                       float butteraugli_target,
+                       hls::stream<float>& entropy8_strm,
+                       hls::stream<float>& quant_inhomogeneity_strm,
+                       hls::stream<float>& max_quant_strm,
+                       const float inv_dequant32[3][1024],
+                       float coeffs32[3][1024],
+                       hls::stream<bool>& enableDCT32) {
+#pragma HLS INLINE off
+#pragma HLS allocation function instance = compute_entropy32_sub limit = 3
+
+    static const float kColorWeights[3] = {
+        0.65285453568125873, 2.4740163893371157, 2.0140216656143393,
+    };
+
+    float kFavor8x8Dct = 0.74742417168628905;
+    if (butteraugli_target >= 6.0) {
+        kFavor8x8Dct = 0.737101360945845;
+    }
+
+    const float kDiff = 0.9539527585329598;
+    const float kPow = 0.99263297216052859;
+    const float kPow2 = 0.018823021573462634;
+    const float kExtremityWeight = 7.77;
+
+    float entropy8 = entropy8_strm.read();
+    float quant_inhomogeneity = quant_inhomogeneity_strm.read();
+    float max_quant = max_quant_strm.read();
+    float dct8x8_entropy = entropy8 + quant_inhomogeneity;
+
+    float entropy0, entropy1, entropy2;
+    entropy0 = compute_entropy32_sub(discretization_factor, max_quant, inv_dequant32[0], coeffs32[0]);
+    entropy1 = compute_entropy32_sub(discretization_factor, max_quant, inv_dequant32[1], coeffs32[1]);
+    entropy2 = compute_entropy32_sub(discretization_factor, max_quant, inv_dequant32[2], coeffs32[2]);
+
+    float dct32x32_entropy = kColorWeights[0] * entropy0 + kColorWeights[1] * entropy1 + kColorWeights[2] * entropy2;
+
+    bool result;
+    if (dct32x32_entropy < kFavor8x8Dct * dct8x8_entropy) {
+        result = true;
+    } else {
+        result = false;
+    }
+    enableDCT32.write(result);
+}
+
+void dct_entropy32x32(ap_uint<16> xblock,
+                      ap_uint<16> yblock,
+                      float discretization_factor,
+                      float butteraugli_target,
+                      hls::stream<float>& entropy8_strm,
+                      hls::stream<float>& quant_inhomogeneity_strm,
+                      hls::stream<float>& max_quant_strm,
+                      const float inv_dequant32[3][1024],
+                      hls::stream<float> coeffs32_strm[3],
+                      hls::stream<bool>& enableDCT32) {
+#pragma HLS INLINE off
+
+// Process y
+loop_y:
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+    // Process x
+    loop_x:
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+#pragma HLS DATAFLOW
+
+            float coeffs32[3][1024];
+#pragma HLS ARRAY_PARTITION variable = coeffs32 complete dim = 1
+
+        load_coeffs:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                    coeffs32[c][i] = coeffs32_strm[c].read();
+                }
+            }
+
+            compute_entropy32(discretization_factor, butteraugli_target, entropy8_strm, quant_inhomogeneity_strm,
+                              max_quant_strm, inv_dequant32, coeffs32, enableDCT32);
+        }
+    }
+}
+
+void DCTWrapper(ap_uint<16> xblock,
+                ap_uint<16> yblock,
+                hls::stream<float> src[3],
+                hls::stream<float> coeffs4_tmp[3],
+                hls::stream<float> coeffs8_tmp0[3],
+                hls::stream<float> coeffs16_tmp0[3],
+                hls::stream<float> coeffs32_tmp0[3],
+                hls::stream<float> coeffs8_tmp1[3],
+                hls::stream<float> coeffs16_tmp1[3],
+                hls::stream<float> coeffs32_tmp1[3]) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    hls::stream<float> src0[3];
+#pragma HLS stream variable = src0 depth = 8
+#pragma HLS BIND_STORAGE variable = src0 type = fifo impl = srl
+    hls::stream<float> src1[3];
+#pragma HLS stream variable = src1 depth = 8
+#pragma HLS BIND_STORAGE variable = src1 type = fifo impl = srl
+    hls::stream<float> src2[3];
+#pragma HLS stream variable = src2 depth = 8
+#pragma HLS BIND_STORAGE variable = src2 type = fifo impl = srl
+    hls::stream<float> src3[3];
+#pragma HLS stream variable = src3 depth = 8
+#pragma HLS BIND_STORAGE variable = src3 type = fifo impl = srl
+
+    hls::stream<float> coeffs8[3];
+#pragma HLS stream variable = src1 depth = 8
+#pragma HLS BIND_STORAGE variable = src1 type = fifo impl = srl
+    hls::stream<float> coeffs16[3];
+#pragma HLS stream variable = src2 depth = 8
+#pragma HLS BIND_STORAGE variable = src2 type = fifo impl = srl
+    hls::stream<float> coeffs32[3];
+#pragma HLS stream variable = src3 depth = 8
+#pragma HLS BIND_STORAGE variable = src3 type = fifo impl = srl
+
+    // Process y
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+        // Process x
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+        duplicate_src:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                    float tmp = src[c].read();
+
+                    src0[c].write(tmp);
+                    src1[c].write(tmp);
+                    src2[c].write(tmp);
+                    src3[c].write(tmp);
+                }
+            }
+        }
+    }
+
+    for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+        DCT4x4Top(xblock, yblock, src0[c], coeffs4_tmp[c]);
+    }
+
+    for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+        DCT8x8Top(xblock, yblock, src1[c], coeffs8[c]);
+    }
+
+    for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+        DCT16x16Top(xblock, yblock, src2[c], coeffs16[c]);
+    }
+
+    for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+        DCT32x32Top(xblock, yblock, src3[c], coeffs32[c]);
+    }
+
+    // Process y
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+        // Process x
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+#pragma HLS DATAFLOW
+
+        duplicate_coeffs8:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                    float tmp1 = coeffs8[c].read();
+                    coeffs8_tmp0[c].write(tmp1);
+                    coeffs8_tmp1[c].write(tmp1);
+                }
+            }
+
+        duplicate_coeffs16:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                    float tmp2 = coeffs16[c].read();
+                    coeffs16_tmp0[c].write(tmp2);
+                    coeffs16_tmp1[c].write(tmp2);
+                }
+            }
+
+        duplicate_coeffs32:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                    float tmp3 = coeffs32[c].read();
+                    coeffs32_tmp0[c].write(tmp3);
+                    coeffs32_tmp1[c].write(tmp3);
+                }
+            }
+        }
+    }
+}
+
+void LoadAcStrategySrc(ap_uint<16> xblock,
+                       ap_uint<16> yblock,
+                       hls::stream<ap_uint<32> > src[3],
+                       hls::stream<float> src0[3],
+                       hls::stream<float> src1[3],
+                       hls::stream<float> src2[3],
+                       hls::stream<float> src3[3]) {
+#pragma HLS INLINE off
+
+// Process y
+loop_y:
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+    // Process x
+    loop_x:
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+        // load buffer of 32x32 region
+        load32x32_region:
+            for (ap_uint<8> y = 0; y < 32; y++) {
+                for (ap_uint<8> x = 0; x < 32; x++) {
+#pragma HLS pipeline II = 1
+                    for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                        float temp = bitsToF<uint32_t, float>(src[c].read());
+                        src0[c].write(temp);
+                        src1[c].write(temp);
+                        src2[c].write(temp);
+                        src3[c].write(temp);
+
+#ifdef DEBUG_ACSTRATEGY
+                        std::cout << "y=" << y << " x=" << x << " " << temp << std::endl;
+#endif
+                    }
+                }
+            }
+        }
+    }
+}
+
+void LoadAcStrategyQF(ap_uint<16> xblock,
+                      ap_uint<16> yblock,
+                      hls::stream<float>& quant_field,
+                      hls::stream<float>& quant_field_strm0,
+                      hls::stream<float>& quant_field_strm1) {
+#pragma HLS INLINE off
+
+    // Disgard first 2 value
+    quant_field.read();
+    quant_field.read();
+
+// Process y
+loop_y:
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+    // Process x
+    loop_x:
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+        // load quant field of 4x4 region
+        load4x4_region:
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+                    ap_uint<4> shift;
+                    shift(3, 2) = y(1, 0);
+                    shift(1, 0) = x(1, 0);
+
+                    float temp = quant_field.read();
+                    quant_field_strm0.write(temp);
+                    quant_field_strm1.write(temp);
+                }
+            }
+        }
+    }
+}
+
+void JudgeAcStrategy(ap_uint<16> xblock,
+                     ap_uint<16> yblock,
+                     ap_uint<16> xsize,
+                     ap_uint<16> ysize,
+                     hls::stream<bool>& enable_dct4_strm,
+                     hls::stream<bool>& disable_dct16_strm,
+                     hls::stream<bool>& enable_dct16_strm,
+                     hls::stream<bool>& enable_dct32_strm,
+                     hls::stream<ap_uint<4> >& ac_strategy) {
+#pragma HLS INLINE off
+// Process y
+loop_y:
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+    // Process x
+    loop_x:
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+            bool enable_dct4;
+            bool disable_dct16;
+            bool enable_dct16[2][2];
+            bool enable_dct32;
+
+        judge_ac_strategy:
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+                    enable_dct4 = enable_dct4_strm.read();
+                    disable_dct16 = disable_dct16_strm.read();
+
+                    if (x == 0 && y == 0) enable_dct32 = enable_dct32_strm.read();
+
+                    if (x[0] == 0 && y[0] == 0) enable_dct16[y[1]][x[1]] = enable_dct16_strm.read();
+
+                    uint8_t temp;
+                    ap_uint<16> bx32 = xcnt;
+                    ap_uint<16> by32 = ycnt;
+
+                    ap_uint<16> bx16 = xcnt << 1;
+                    ap_uint<16> by16 = ycnt << 1;
+
+                    if (enable_dct4) {
+                        temp = 3; // AcStrategy::Type::DCT4X4 3
+                    } else if (!disable_dct16 && x == 0 && y == 0 && bx32 < xsize(15, 5) && by32 < ysize(15, 5) &&
+                               enable_dct32) {
+                        temp = 5; // AcStrategy::Type::DCT32X32 5
+                    } else if (!disable_dct16 && x[0] == 0 && y[0] == 0 && bx16 < xsize(15, 4) && by16 < ysize(15, 4) &&
+                               enable_dct16[y[1]][x[1]]) {
+                        temp = 4; // AcStrategy::Type::DCT16X16 4
+                    } else {
+                        temp = 0; // AcStrategy::Type::DCT 0
+                    }
+                    ac_strategy.write(temp);
+                }
+            }
+        }
+    }
+}
+
+void ComposeDCT(ap_uint<16> xblock,
+                ap_uint<16> yblock,
+                bool kChooseAcStrategy,
+                hls::stream<ap_uint<4> >& strategy_strm,
+                hls::stream<float> coeffs4_strm[3],
+                hls::stream<float> coeffs8_strm[3],
+                hls::stream<float> coeffs16_strm[3],
+                hls::stream<float> coeffs32_strm[3],
+                hls::stream<uint8_t>& ac_strategy0,
+                hls::stream<uint8_t>& ac_strategy1,
+                hls::stream<uint8_t>& ac_strategy2,
+                hls::stream<uint8_t>& ac_strategy3,
+                hls::stream<float> dct[3],
+                hls::stream<float> ac_dec[3]) {
+#pragma HLS INLINE off
+
+// Process y
+loop_y:
+    for (ap_uint<16> ycnt = 0; ycnt < yblock; ycnt++) {
+    // Process x
+    loop_x:
+        for (ap_uint<16> xcnt = 0; xcnt < xblock; xcnt++) {
+#pragma HLS DATAFLOW
+
+            ap_uint<4> strategy[4][4];
+#pragma HLS ARRAY_PARTITION variable = strategy complete
+            float coeffs4[3][1024];
+            float coeffs8[3][1024];
+            float coeffs16[3][1024];
+            float coeffs32[3][1024];
+
+        load_coeffs:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                    coeffs4[c][i] = coeffs4_strm[c].read();
+                    coeffs8[c][i] = coeffs8_strm[c].read();
+                    coeffs16[c][i] = coeffs16_strm[c].read();
+                    coeffs32[c][i] = coeffs32_strm[c].read();
+                }
+            }
+
+        load4x4:
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+                    strategy[y][x] = strategy_strm.read();
+                }
+            }
+
+        // compose dct
+        compose_dct:
+            for (ap_uint<8> by = 0; by < 4; by++) {
+                for (ap_uint<8> bx = 0; bx < 4; bx++) {
+                    for (ap_uint<8> y = 0; y < 8; y++) {
+                        for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS pipeline II = 1
+
+                            ap_uint<8> strategy_block32 = strategy[0][0];
+                            ap_uint<8> strategy_block16 = strategy[by[1] << 1][bx[1] << 1];
+                            ap_uint<8> strategy_block8 = strategy[by][bx];
+
+                            ap_uint<10> addr;
+                            addr(9, 8) = by(1, 0);
+                            addr(7, 5) = y(2, 0);
+                            addr(4, 3) = bx(1, 0);
+                            addr(2, 0) = x(2, 0);
+
+                            uint8_t strategy_tmp;
+                            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+
+                                float dct_tmp;
+                                float ac_dec_tmp;
+
+                                if (!kChooseAcStrategy) {
+                                    strategy_tmp = 0;
+                                    dct_tmp = coeffs8[c][addr];
+                                    ac_dec_tmp = coeffs8[c][addr];
+                                } else if (strategy_block32 == 5) {
+                                    // DCT32
+                                    strategy_tmp = 5;
+                                    dct_tmp = coeffs32[c][addr];
+                                    ac_dec_tmp = coeffs32[c][addr];
+                                } else if (strategy_block16 == 4) {
+                                    // DCT16
+                                    strategy_tmp = 4;
+                                    dct_tmp = coeffs16[c][addr];
+                                    ac_dec_tmp = coeffs16[c][addr];
+                                } else if (strategy_block8 == 0) {
+                                    // DCT8
+                                    strategy_tmp = 0;
+                                    dct_tmp = coeffs8[c][addr];
+                                    ac_dec_tmp = coeffs8[c][addr];
+                                } else {
+                                    // DCT4
+                                    strategy_tmp = 3;
+                                    dct_tmp = coeffs4[c][addr];
+                                    ac_dec_tmp = coeffs4[c][addr];
+                                }
+
+                                dct[c].write(dct_tmp);
+                                ac_dec[c].write(ac_dec_tmp);
+
+#ifdef DEBUG_ACSTRATEGY
+                                std::cout << "acs=" << strategy[by][bx] << " c=" << i << " by=" << by << " bx=" << bx
+                                          << " y=" << y << " x=" << x << " coeff16=" << coeffs16[i][addr] << std::endl;
+#endif
+                            }
+                            if (x == 0 && y == 0) {
+                                ac_strategy0.write(strategy_tmp);
+                                ac_strategy1.write(strategy_tmp);
+                                ac_strategy2.write(strategy_tmp);
+                                ac_strategy3.write(strategy_tmp);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void FindBestAcStrategy(Config config,
+                        hls::stream<float>& quant_field,
+
+                        const float inv_dequant8x8[3][64],
+                        const float inv_dequant16x16[3][256],
+                        const float inv_dequant32x32[3][1024],
+
+                        hls::stream<ap_uint<32> > src[3],
+                        hls::stream<uint8_t>& ac_strategy0,
+                        hls::stream<uint8_t>& ac_strategy1,
+                        hls::stream<uint8_t>& ac_strategy2,
+                        hls::stream<uint8_t>& ac_strategy3,
+                        hls::stream<float> dct[3],
+                        hls::stream<float> ac_dec[3]) {
+// TODO(veluca): this function does *NOT* know the actual quantization field
+// values, and thus is not able to make choices taking into account the actual
+// quantization matrix.
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    static const float kColorWeights16x16[3] = {0.60349588292079182, 1.5435289569786645, 0.33080849938060852};
+
+    static const float kMul16x16[3] = {0.86101693093148191, -0.18168363725368566, 1.0861540086721586};
+
+    static const float kColorWeights4x4[3] = {0.76084140985773008, 0.9344031093258709, 0.31536647913297183};
+
+    static const float kMul4x4[3] = {0.84695221371792806, -0.012220022434342694, 1.079485914917413};
+
+    ap_uint<16> xsize = config.xsize;
+    ap_uint<16> ysize = config.ysize;
+    ap_uint<16> xblock = config.xblock32;
+    ap_uint<16> yblock = config.yblock32;
+    float butteraugli_target = config.butteraugli_target;
+    float discretization_factor = config.discretization_factor;
+    float kMul16 = config.kMulInhomogeneity16x16;
+    float kMul32 = config.kMulInhomogeneity32x32;
+
+    hls::stream<float> src0[3]; // 3 plane of region 32x32
+#pragma HLS stream variable = src0 depth = 4096
+#pragma HLS BIND_STORAGE variable = src0 type = fifo impl = uram
+    hls::stream<float> src1[3];
+#pragma HLS stream variable = src1 depth = 4096
+#pragma HLS BIND_STORAGE variable = src1 type = fifo impl = uram
+    hls::stream<float> src2[3];
+#pragma HLS stream variable = src2 depth = 8192
+#pragma HLS BIND_STORAGE variable = src2 type = fifo impl = uram
+    hls::stream<float> src3[3];
+#pragma HLS stream variable = src3 depth = 16384
+#pragma HLS BIND_STORAGE variable = src3 type = fifo impl = uram
+
+    hls::stream<float> coeffs4[3];
+#pragma HLS stream variable = coeffs4 depth = 16384
+#pragma HLS BIND_STORAGE variable = coeffs4 type = fifo impl = uram
+
+    hls::stream<float> coeffs8tmp0[3];
+#pragma HLS stream variable = coeffs8tmp0 depth = 4096
+#pragma HLS BIND_STORAGE variable = coeffs8tmp0 type = fifo impl = uram
+    hls::stream<float> coeffs16tmp0[3];
+#pragma HLS stream variable = coeffs16tmp0 depth = 8192
+#pragma HLS BIND_STORAGE variable = coeffs16tmp0 type = fifo impl = uram
+    hls::stream<float> coeffs32tmp0[3];
+#pragma HLS stream variable = coeffs32tmp0 depth = 8192
+#pragma HLS BIND_STORAGE variable = coeffs32tmp0 type = fifo impl = uram
+
+    hls::stream<float> coeffs8tmp1[3];
+#pragma HLS stream variable = coeffs8tmp1 depth = 16384
+#pragma HLS BIND_STORAGE variable = coeffs8tmp1 type = fifo impl = uram
+    hls::stream<float> coeffs16tmp1[3];
+#pragma HLS stream variable = coeffs16tmp1 depth = 16384
+#pragma HLS BIND_STORAGE variable = coeffs16tmp1 type = fifo impl = uram
+    hls::stream<float> coeffs32tmp1[3];
+#pragma HLS stream variable = coeffs32tmp1 depth = 16384
+#pragma HLS BIND_STORAGE variable = coeffs32tmp1 type = fifo impl = uram
+
+    hls::stream<float> quant_field_strm0("qf0");
+#pragma HLS stream variable = quant_field_strm0 depth = 512
+#pragma HLS BIND_STORAGE variable = quant_field_strm0 type = fifo impl = bram
+    hls::stream<float> quant_field_strm1("qf1");
+#pragma HLS stream variable = quant_field_strm1 depth = 1024
+#pragma HLS BIND_STORAGE variable = quant_field_strm1 type = fifo impl = bram
+
+    hls::stream<bool> enable_dct4("enable_dct4");
+#pragma HLS stream variable = enable_dct4 depth = 2048
+#pragma HLS BIND_STORAGE variable = enable_dct4 type = fifo impl = bram
+    hls::stream<bool> disable_dct16("disable_dct16");
+#pragma HLS stream variable = disable_dct16 depth = 2048
+#pragma HLS BIND_STORAGE variable = disable_dct16 type = fifo impl = bram
+
+    hls::stream<bool> enable_dct16("enable_dct16");
+#pragma HLS stream variable = enable_dct16 depth = 128
+#pragma HLS BIND_STORAGE variable = enable_dct16 type = fifo impl = srl
+    hls::stream<bool> enable_dct32("enable_dct32");
+#pragma HLS stream variable = enable_dct32 depth = 128
+#pragma HLS BIND_STORAGE variable = enable_dct32 type = fifo impl = srl
+
+    hls::stream<float> entropy16("entropy16");
+#pragma HLS stream variable = entropy16 depth = 512
+#pragma HLS BIND_STORAGE variable = entropy16 type = fifo impl = bram
+    hls::stream<float> inhomogeneity16("inhomogeneity16");
+#pragma HLS stream variable = inhomogeneity16 depth = 512
+#pragma HLS BIND_STORAGE variable = inhomogeneity16 type = fifo impl = bram
+    hls::stream<float> max_quant16("max_quant16");
+#pragma HLS stream variable = max_quant16 depth = 512
+#pragma HLS BIND_STORAGE variable = max_quant16 type = fifo impl = bram
+
+    hls::stream<float> entropy32("entropy32");
+#pragma HLS stream variable = entropy32 depth = 512
+#pragma HLS BIND_STORAGE variable = entropy32 type = fifo impl = bram
+    hls::stream<float> inhomogeneity32("inhomogeneity32");
+#pragma HLS stream variable = inhomogeneity32 depth = 512
+#pragma HLS BIND_STORAGE variable = inhomogeneity32 type = fifo impl = bram
+    hls::stream<float> max_quant32("max_quant32");
+#pragma HLS stream variable = max_quant32 depth = 512
+#pragma HLS BIND_STORAGE variable = max_quant32 type = fifo impl = bram
+
+    hls::stream<ap_uint<4> > strategy_temp("strategy_tmp");
+#pragma HLS stream variable = strategy_temp depth = 512
+#pragma HLS BIND_STORAGE variable = strategy_temp type = fifo impl = bram
+
+// Load data
+#ifdef DEBUG_ACSTRATEGY
+    std::cout << "Load input" << std::endl;
+#endif
+
+    LoadAcStrategySrc(xblock, yblock, src, src0, src1, src2, src3);
+
+    LoadAcStrategyQF(xblock, yblock, quant_field, quant_field_strm0, quant_field_strm1);
+
+// Disable large transform
+#ifdef DEBUG_ACSTRATEGY
+    std::cout << "Disable large transform" << std::endl;
+#endif
+    DCT_collection32x32(xblock, yblock, src0, kColorWeights16x16, kMul16x16, disable_dct16);
+
+// Enable DCT4x4
+#ifdef DEBUG_ACSTRATEGY
+    std::cout << "Enable DCT4x4" << std::endl;
+#endif
+    DCT_collection32x32(xblock, yblock, src1, kColorWeights4x4, kMul4x4, enable_dct4);
+
+// DCT
+#ifdef DEBUG_ACSTRATEGY
+    std::cout << "DCT" << std::endl;
+#endif
+    DCTWrapper(xblock, yblock, src2, coeffs4, coeffs8tmp0, coeffs16tmp0, coeffs32tmp0, coeffs8tmp1, coeffs16tmp1,
+               coeffs32tmp1);
+
+// Reference entropy
+#ifdef DEBUG_ACSTRATEGY
+    std::cout << "Reference entropy" << std::endl;
+#endif
+    reference_entropy(xblock, yblock, discretization_factor, quant_field_strm0, inv_dequant8x8, src3, coeffs8tmp0,
+                      entropy16, entropy32);
+
+    get_quant_inhomogeneity(xblock, yblock, kMul16, kMul32, quant_field_strm1, inhomogeneity16, max_quant16,
+                            inhomogeneity32, max_quant32);
+
+    dct_entropy16x16(xblock, yblock, discretization_factor, entropy16, inhomogeneity16, max_quant16, inv_dequant16x16,
+                     coeffs16tmp0, enable_dct16);
+
+    dct_entropy32x32(xblock, yblock, discretization_factor, butteraugli_target, entropy32, inhomogeneity32, max_quant32,
+                     inv_dequant32x32, coeffs32tmp0, enable_dct32);
+
+// Judge
+#ifdef DEBUG_ACSTRATEGY
+    std::cout << "Judge AcStrategy" << std::endl;
+#endif
+    JudgeAcStrategy(xblock, yblock, xsize, ysize, enable_dct4, disable_dct16, enable_dct16, enable_dct32,
+                    strategy_temp);
+
+#ifdef DEBUG_ACSTRATEGY
+    std::cout << "Compose DCT" << std::endl;
+#endif
+    ComposeDCT(xblock, yblock, config.kChooseAcStrategy, strategy_temp, coeffs4, coeffs8tmp1, coeffs16tmp1,
+               coeffs32tmp1, ac_strategy0, ac_strategy1, ac_strategy2, ac_strategy3, dct, ac_dec);
+}
+
+void GetMaxQuant(hls::stream<float>& quant_field,
+                 hls::stream<uint8_t>& ac_strategy,
+                 ap_uint<8> strategy[4][4],
+                 float max_quant8[4][4],
+                 float max_quant16[2][2],
+                 float& max_quant32) {
+#pragma HLS INLINE off
+
+get_max_quant:
+    for (ap_uint<8> y = 0; y < 4; y++) {
+        for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+            strategy[y][x] = ac_strategy.read();
+            float quant = quant_field.read();
+
+            if (x == 0 && y == 0) {
+                max_quant32 = quant;
+            } else {
+                max_quant32 = hls::max(quant, max_quant32);
+            }
+
+            if (x[0] == 0 && y[0] == 0) {
+                max_quant16[y[1]][x[1]] = quant;
+            } else {
+                max_quant16[y[1]][x[1]] = hls::max(quant, max_quant16[y[1]][x[1]]);
+            }
+
+            max_quant8[y][x] = quant;
+        }
+    }
+}
+
+void AdjustValue(ap_uint<8> strategy[4][4],
+                 float max_quant8[4][4],
+                 float max_quant16[2][2],
+                 float max_quant32,
+                 hls::stream<float>& o_quant_field) {
+#pragma HLS INLINE off
+
+adjust_quant_field:
+    for (ap_uint<8> y = 0; y < 4; y++) {
+        for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+            float quant;
+            if (strategy[0][0] == 5) {
+                // DCT32
+                quant = max_quant32;
+            } else if (strategy[y[1] << 1][x[1] << 1] == 4) {
+                // DCT16
+                quant = max_quant16[y[1]][x[1]];
+            } else {
+                quant = max_quant8[y][x];
+            }
+            o_quant_field.write(quant);
+
+#ifdef DEBUG_QUANTIZER
+            std::cout << "qf_post=" << quant << std::endl;
+#endif
+        }
+    }
+}
+
+void AdjustQuantField(ap_uint<16> xblock,
+                      ap_uint<16> yblock,
+                      hls::stream<float>& i_quant_field,
+                      hls::stream<uint8_t>& ac_strategy,
+                      hls::stream<float>& o_quant_field) {
+// Replace the whole quant_field in non-8x8 blocks with the maximum of each
+// 8x8 block.
+#pragma HLS INLINE off
+
+    for (ap_uint<16> by = 0; by < xblock; ++by) {
+        for (ap_uint<16> bx = 0; bx < yblock; ++bx) {
+#pragma HLS DATAFLOW
+
+            ap_uint<8> strategy[4][4];
+#pragma HLS ARRAY_PARTITION variable = strategy complete
+            float max_quant8[4][4];
+#pragma HLS ARRAY_PARTITION variable = max_quant8 complete
+            float max_quant16[2][2];
+#pragma HLS ARRAY_PARTITION variable = max_quant16 complete
+            float max_quant32;
+
+            // get max quant
+            GetMaxQuant(i_quant_field, ac_strategy, strategy, max_quant8, max_quant16, max_quant32);
+
+            // adjust quant field
+            AdjustValue(strategy, max_quant8, max_quant16, max_quant32, o_quant_field);
+        }
+    }
+}
+
+int ClampVal(float val) {
+#pragma HLS INLINE
+    const int kQuantMax = 256;
+    return hls::min<float>((float)kQuantMax, hls::max<float>(1.0, val));
+}
+
+void SetQuantField(hls::stream<float>& inv_global_scale_strm,
+                   ap_uint<16> xblock,
+                   ap_uint<16> yblock,
+                   hls::stream<float>& quant_field,
+                   hls::stream<int32_t>& quant_img_ac0,
+                   hls::stream<int32_t>& quant_img_ac1) {
+#pragma HLS INLINE off
+
+    float inv_global_scale = inv_global_scale_strm.read();
+
+    for (ap_uint<16> by = 0; by < xblock; ++by) {
+        for (ap_uint<16> bx = 0; bx < yblock; ++bx) {
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+                    float quant = quant_field.read();
+                    int val = (int)ClampVal(quant * inv_global_scale + 0.5f);
+
+                    quant_img_ac0.write(val);
+                    quant_img_ac1.write(val);
+
+#ifdef DEBUG_QUANTIZER
+                    std::cout << "qf=" << quant << " quant_qf=" << val << std::endl;
+#endif
+                }
+            }
+        }
+    }
+}
+
+void GetQuantizer(ap_uint<16> xblock,
+                  ap_uint<16> yblock,
+                  float quant_dc,
+                  hls::stream<float>& i_quant_field,
+                  hls::stream<float>& inv_global_scale_strm,
+                  hls::stream<ap_uint<32> >& scale_out_strm,
+                  hls::stream<float> quantizer[3],
+                  hls::stream<float>& o_quant_field) {
+#pragma HLS INLINE off
+
+    // origin quantizer
+    const int kGlobalScaleDenom = 1 << 16;
+
+    const int quant_dc_origin = 64;
+    const int global_scale = kGlobalScaleDenom / 64;
+
+    const float global_scale_float = (float)(global_scale) * (1.0 / (float)kGlobalScaleDenom);
+    const float inv_global_scale = 1.0 * (float)(kGlobalScaleDenom / global_scale);
+
+    const float qdc = global_scale_float * (float)(quant_dc_origin);
+    const float inv_quant_dc = 1.0f / qdc;
+
+    float quant_median = i_quant_field.read();
+    float quant_median_absd = i_quant_field.read();
+
+    // Target value for the median value in the quant field.
+    const float kQuantFieldTarget = 3.80987740592518214386;
+    // We reduce the median of the quant field by the median absolute
+    // deviation:
+    // higher resolution on highly varying quant fields.
+
+    // update new quantizer
+    int global_scale_;
+    int quant_dc_;
+    float global_scale_float_;
+    float inv_global_scale_;
+    float inv_quant_dc_;
+    bool changed;
+
+    int new_global_scale = (int)((float)kGlobalScaleDenom * (quant_median - quant_median_absd) / kQuantFieldTarget);
+
+    // Ensure that quant_dc_ will always be at least
+    // kGlobalScaleDenom/kGlobalScaleNumerator.
+    const int kGlobalScaleNumerator = 4096;
+
+    if (new_global_scale > quant_dc * kGlobalScaleNumerator) {
+        new_global_scale = quant_dc * kGlobalScaleNumerator;
+    }
+
+    // Ensure that new_global_scale is positive and no more than 1<<15.
+    if (new_global_scale <= 0) new_global_scale = 1;
+    if (new_global_scale > (1 << 15)) new_global_scale = 1 << 15;
+
+    if (new_global_scale != global_scale) {
+        global_scale_ = new_global_scale;
+        global_scale_float_ = (float)global_scale_ * (1.0 / (float)kGlobalScaleDenom);
+        inv_global_scale_ = 1.0 * ((float)kGlobalScaleDenom / (float)global_scale_);
+
+        changed = true;
+    } else {
+        global_scale_ = global_scale;
+        global_scale_float_ = global_scale_float;
+        inv_global_scale_ = inv_global_scale;
+
+        changed = false;
+    }
+
+    int val = ClampVal(quant_dc * inv_global_scale_ + 0.5f);
+    if (val != quant_dc_origin) {
+        quant_dc_ = val;
+        changed = true;
+    } else {
+        quant_dc_ = quant_dc;
+        changed = false;
+    }
+
+    if (changed) {
+        float tmp = global_scale_float_ * (float)quant_dc_;
+        inv_quant_dc_ = 1.0f / tmp;
+    } else {
+        inv_quant_dc_ = inv_quant_dc;
+    }
+
+    inv_global_scale_strm.write(inv_global_scale_);
+    scale_out_strm.write(global_scale_);
+    scale_out_strm.write(quant_dc_);
+
+    for (ap_uint<8> i = 0; i < 3; i++) {
+        quantizer[i].write(bitsToF<int32_t, float>(quant_dc_));
+        quantizer[i].write(bitsToF<int32_t, float>(global_scale_));
+        quantizer[i].write(inv_quant_dc_);
+        quantizer[i].write(inv_global_scale_);
+        quantizer[i].write(global_scale_float_);
+    }
+
+#ifdef DEBUG_QUANTIZER
+    std::cout << "quant_dc=" << quant_dc_ << std::endl;
+    std::cout << "global_scale=" << global_scale_ << std::endl;
+    std::cout << "inv_quant_dc=" << inv_quant_dc_ << std::endl;
+    std::cout << "inv_global_scale=" << inv_global_scale_ << std::endl;
+    std::cout << "global_scale_float=" << global_scale_float_ << std::endl;
+#endif
+
+    for (ap_uint<16> y = 0; y < yblock; y++) {
+        for (ap_uint<16> x = 0; x < xblock; x++) {
+            for (ap_uint<16> k = 0; k < 16; k++) {
+#pragma HLS pipeline II = 1
+
+                float tmp = i_quant_field.read();
+                o_quant_field.write(tmp);
+
+#ifdef DEBUG_QUANTIZER
+                std::cout << "qf_pre=" << tmp << std::endl;
+#endif
+            }
+        }
+    }
+}
+
+void FindBestQuantizer(Config config,
+                       hls::stream<float>& quant_field,
+                       hls::stream<uint8_t>& ac_strategy,
+                       hls::stream<float> quantizer[3],
+                       hls::stream<ap_uint<32> >& scale_out,
+                       hls::stream<int32_t>& quant_img_ac0,
+                       hls::stream<int32_t>& quant_img_ac1) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    hls::stream<float> inv_global_scale_strm;
+#pragma HLS stream variable = inv_global_scale_strm depth = 4
+#pragma HLS resource variable = inv_global_scale_strm core = FIFO_SRL
+    hls::stream<float> quant_field_temp0;
+#pragma HLS stream variable = quant_field_temp0 depth = 16
+#pragma HLS resource variable = quant_field_temp0 core = FIFO_SRL
+    hls::stream<float> quant_field_temp1;
+#pragma HLS stream variable = quant_field_temp1 depth = 16
+#pragma HLS resource variable = quant_field_temp1 core = FIFO_SRL
+
+    GetQuantizer(config.xblock32, config.yblock32, config.quant_dc, quant_field, inv_global_scale_strm, scale_out,
+                 quantizer, quant_field_temp0);
+
+    AdjustQuantField(config.xblock32, config.yblock32, quant_field_temp0, ac_strategy, quant_field_temp1);
+
+    SetQuantField(inv_global_scale_strm, config.xblock32, config.yblock32, quant_field_temp1, quant_img_ac0,
+                  quant_img_ac1);
+}
+
+template <typename type_t, int N>
+void Delay_block(type_t in[N], type_t out[N]) {
+    for (ap_uint<8> i = 0; i < N; i++) {
+#pragma HLS pipeline II = 1
+        out[i] = in[i];
+    }
+}
+
+#define SHIFT_SCL (6)
+#define FACTOR_SCL (1 << SHIFT_SCL)
+#define SCLF(a) ((int)((int)a * (int)FACTOR_SCL))
+
+typedef ap_int<SHIFT_SCL + 1> ap_frac;
+typedef ap_uint<SHIFT_SCL + 1> apu_frac;
+typedef ap_int<SHIFT_SCL + 10> ap_frac16;
+
+int16_t UpdateErr_int(ap_frac16 val_i, apu_frac thres_i, char k, ap_frac& err_left, ap_frac previous_row_err_i[8]) {
+#pragma HLS INLINE
+
+    int idx = k & 7;
+    short err_i;
+
+    if (k == 0)
+        err_i = 0;
+    else if ((idx) == 0) {
+        err_i = previous_row_err_i[idx];
+    } else {
+        if (k > 7)
+            err_i = err_left + previous_row_err_i[idx];
+        else
+            err_i = err_left;
+    }
+    bool isPos = val_i > 0;
+
+    int val_org_i = val_i;
+    bool isValOrg_1 = (val_org_i > FACTOR_SCL) || (0 - val_org_i > FACTOR_SCL);
+    apu_frac val_frac = val_i & (FACTOR_SCL - 1);
+    ap_frac16 val_int = (val_i - val_frac) >> SHIFT_SCL;
+    bool isValIntZero = val_int == 0;
+    bool isValNegOne = val_int == -1;
+
+    bool isZero_u;
+    bool isZero_Nu;
+    bool isUseErr = (err_i > 0);
+
+    ap_frac gap_u_Z_p = (0 << SHIFT_SCL) + val_frac + err_i / 2;
+    ap_frac gap_u_Z_n = -(((-1) << SHIFT_SCL) + val_frac - err_i / 2);
+    ap_frac gap_u_Nz_p = val_frac + err_i / 2 - (((val_frac + err_i / 2 + FACTOR_SCL / 2) >> SHIFT_SCL) << SHIFT_SCL);
+    ap_frac gap_u_Nz_n = (((val_frac - err_i / 2 + FACTOR_SCL / 2) >> SHIFT_SCL) << SHIFT_SCL) - val_frac + err_i / 2;
+    ap_frac gap_un_Z_p = val_frac;
+    ap_frac gap_un_Z_n = (FACTOR_SCL - val_frac);
+    ap_frac gap_un_Nz_p = val_frac - (((val_frac + FACTOR_SCL / 2) >> SHIFT_SCL) << SHIFT_SCL);
+    ap_frac gap_un_Nz_n = (((val_frac + FACTOR_SCL / 2) >> SHIFT_SCL) << SHIFT_SCL) - val_frac;
+
+    ap_frac err_i_gap = isValOrg_1 ? 0 : err_i;
+    ap_frac err_i_gap_un_Z_p = err_i_gap + gap_un_Z_p;
+    ap_frac err_i_gap_un_Z_n = err_i_gap + gap_un_Z_n;
+    ap_frac err_i_gap_un_Nz_p = err_i_gap + gap_un_Nz_p;
+    ap_frac err_i_gap_un_Nz_n = err_i_gap + gap_un_Nz_n;
+
+    bool NoCarry_u_p = val_frac < (thres_i - err_i / 2);
+    bool NoCarry_u_n = (FACTOR_SCL - val_frac) < (thres_i - err_i / 2);
+    bool NoCarry_Nu_p = val_frac < thres_i;
+    bool NoCarry_Nu_n = (FACTOR_SCL - val_frac) < thres_i;
+
+    if (isUseErr) {
+        if (isPos) {
+            if (isValIntZero && NoCarry_u_p)
+                isZero_u = true;
+            else
+                isZero_u = false;
+        } else {
+            if (isValNegOne && NoCarry_u_n)
+                isZero_u = true;
+            else
+                isZero_u = false;
+        }
+    }
+    if (!isUseErr) {
+        if (isPos) {
+            if (isValIntZero && NoCarry_Nu_p)
+                isZero_Nu = true;
+            else
+                isZero_Nu = false;
+        } else {
+            if (isValNegOne && NoCarry_Nu_n)
+                isZero_Nu = true;
+            else
+                isZero_Nu = false;
+        }
+    }
+
+    if (k == 0 || (idx) == 7) {
+        err_left = 0;
+    } else if (isUseErr) {
+        if (isZero_u) {
+            if (isPos)
+                err_left = gap_u_Z_p / 2;
+            else
+                err_left = gap_u_Z_n / 2;
+        } else {
+            if (isPos)
+                err_left = gap_u_Nz_p / 2;
+            else
+                err_left = gap_u_Nz_n / 2;
+        }
+    } else {
+        if (isZero_Nu) {
+            if (isPos)
+                err_left = err_i_gap_un_Z_p / 2; // + gap_un_Z_p;
+            else
+                err_left = err_i_gap_un_Z_n / 2; // + gap_un_Z_n;
+        } else {
+            if (isPos)
+                err_left = err_i_gap_un_Nz_p / 2; // + gap_un_Nz_p;
+            else
+                err_left = err_i_gap_un_Nz_n / 2; // + gap_un_Nz_n;
+        }
+    }
+
+    ap_frac err_new_i;
+    if (k == 0) {
+        err_new_i = 0;
+    } else if (isUseErr) {
+        if (isZero_u) {
+            if (isPos)
+                err_new_i = gap_u_Z_p;
+            else
+                err_new_i = gap_u_Z_n;
+        } else {
+            if (isPos)
+                err_new_i = gap_u_Nz_p;
+            else
+                err_new_i = gap_u_Nz_n;
+        }
+    } else {
+        if (isZero_Nu) {
+            if (isPos)
+                err_new_i = err_i_gap_un_Z_p; // + gap_un_Z_p;
+            else
+                err_new_i = err_i_gap_un_Z_n; // + gap_un_Z_n;
+        } else {
+            if (isPos)
+                err_new_i = err_i_gap_un_Nz_p; // + gap_un_Nz_p;
+            else
+                err_new_i = err_i_gap_un_Nz_n; // + gap_un_Nz_n;
+        }
+    }
+
+    if ((idx) == 7)
+        previous_row_err_i[idx] = err_new_i;
+    else
+        previous_row_err_i[idx] = err_new_i / 2;
+
+    int16_t v_i;
+    if (isUseErr) {
+        if (isPos)
+            v_i = isZero_u ? 0 : (int16_t)(((val_i + err_i / 2 + FACTOR_SCL / 2) >> SHIFT_SCL));
+        else
+            v_i = isZero_u ? 0 : (int16_t)(((val_i - err_i / 2 + FACTOR_SCL / 2) >> SHIFT_SCL));
+    } else // err is not used
+        v_i = isZero_Nu ? 0 : (int16_t)(((val_i + FACTOR_SCL / 2) >> SHIFT_SCL));
+    if (v_i > 32767) v_i = 32767;
+    if (v_i < -32767) v_i = -32767;
+
+    return v_i;
+}
+
+void QuantizeBlockAC0(hls::stream<float>& qac_strm,
+                      hls::stream<uint8_t>& ac_strategy_strm,
+                      float thres,
+                      const float InvDequantMatrix4x4[64],
+                      const float InvDequantMatrix8x8[64],
+                      const float InvDequantMatrix16x16[256],
+                      const float InvDequantMatrix32x32[1024],
+                      float cplane[1024],
+                      int16_t quantized[1024]) {
+#pragma HLS INLINE off
+    // Done in a somewhat weird way to preserve the previous behaviour of
+    // dithering.
+    // TODO(jyrki): properly dither DCT blocks larger than 8.
+
+    for (ap_uint<8> iy = 0; iy < 4; iy++) {
+        for (ap_uint<8> ix = 0; ix < 4; ix++) {
+            ap_frac err = 0;
+            ap_frac previous_row_err_i[8] = {0};
+#pragma HLS ARRAY_PARTITION variable = previous_row_err_i complete
+
+            ap_uint<8> strategy_block;
+            float qa;
+            for (ap_uint<8> k = 0; k < 64; ++k) {
+#pragma HLS pipeline II = 1
+
+                if (k == 0) {
+                    qa = qac_strm.read();
+                    strategy_block = ac_strategy_strm.read();
+                }
+
+                ap_uint<10> addr8x8;
+                addr8x8(9, 8) = iy(1, 0);
+                addr8x8(7, 5) = k(5, 3);
+                addr8x8(4, 3) = ix(1, 0);
+                addr8x8(2, 0) = k(2, 0);
+
+                ap_uint<10> addr32x32;
+                addr32x32(9, 7) = k(5, 3);
+                addr32x32[6] = iy[1];
+                addr32x32[5] = iy[0];
+                addr32x32(4, 2) = k(2, 0);
+                addr32x32[1] = ix[1];
+                addr32x32[0] = ix[0];
+
+                ap_uint<10> addr16x16;
+                addr16x16[9] = iy[1];
+                addr16x16(8, 6) = k(5, 3);
+                addr16x16[5] = iy[0];
+                addr16x16[4] = ix[1];
+                addr16x16(3, 1) = k(2, 0);
+                addr16x16[0] = ix[0];
+
+                ap_uint<8> addr_quant16;
+                addr_quant16(7, 5) = k(5, 3);
+                addr_quant16[4] = iy[0];
+                addr_quant16(3, 1) = k(2, 0);
+                addr_quant16[0] = ix[0];
+
+                float qm, plane;
+                if (strategy_block == 5) {
+                    // DCT32
+                    qm = InvDequantMatrix32x32[addr32x32];
+                    plane = cplane[addr32x32];
+                } else if (strategy_block == 4) {
+                    // DCT16
+                    qm = InvDequantMatrix16x16[addr_quant16];
+                    plane = cplane[addr16x16];
+                } else if (strategy_block == 0) {
+                    // DCT8
+                    qm = InvDequantMatrix8x8[k];
+                    plane = cplane[addr8x8];
+                } else {
+                    // DCT4
+                    qm = InvDequantMatrix4x4[k];
+                    plane = cplane[addr8x8];
+                }
+
+                float val = plane * (qm * qa);
+
+                ap_frac16 val_i = SCLF(val);
+                apu_frac thres_i = SCLF(thres);
+                int16_t v_i = UpdateErr_int(val_i, thres_i, k, err, previous_row_err_i);
+
+                ap_uint<10> addr_o;
+                if (strategy_block == 5) {
+                    // DCT32
+                    addr_o = addr32x32;
+                } else if (strategy_block == 4) {
+                    // DCT16
+                    addr_o = addr16x16;
+                } else if (strategy_block == 0) {
+                    // DCT8
+                    addr_o = addr8x8;
+                } else {
+                    // DCT4
+                    addr_o = addr8x8;
+                }
+                quantized[addr_o] = (int16_t)v_i;
+
+#ifdef DEBUG_COEFFS
+                if (ix == 0 && iy == 0) {
+                    std::cout << std::hex << "k2_qua: k=" << k << " cplane=" << (int&)plane << " val=" << (int&)val
+                              << " qm=" << (int&)qm << " qac=" << (int&)qa << " quantized=" << v_i << std::endl;
+
+                    std::cout << std::setprecision(16) << "k2_qua: k=" << k << " cplane=" << plane << " val=" << val
+                              << " qm=" << qm << " qac=" << qa << " quantized=" << v_i << std::endl;
+                }
+#endif
+            }
+        }
+    }
+}
+
+void QuantizeBlockAC1(float qac[4][4],
+                      uint8_t ac_strategy[4][4],
+                      float thres,
+                      const float InvDequantMatrix4x4[64],
+                      const float InvDequantMatrix8x8[64],
+                      const float InvDequantMatrix16x16[256],
+                      const float InvDequantMatrix32x32[1024],
+                      float cplane[1024],
+                      int16_t quantized[1024]) {
+#pragma HLS INLINE off
+    // Done in a somewhat weird way to preserve the previous behaviour of
+    // dithering.
+    // TODO(jyrki): properly dither DCT blocks larger than 8.
+
+    for (ap_uint<8> iy = 0; iy < 4; iy++) {
+        for (ap_uint<8> ix = 0; ix < 4; ix++) {
+            ap_frac err = 0;
+            ap_frac previous_row_err_i[8] = {0};
+#pragma HLS ARRAY_PARTITION variable = previous_row_err_i complete
+            for (ap_uint<8> k = 0; k < 64; ++k) {
+#pragma HLS pipeline II = 1
+#pragma HLS dependence variable = ac_strategy inter false
+
+                ap_uint<10> addr8x8;
+                addr8x8(9, 8) = iy(1, 0);
+                addr8x8(7, 5) = k(5, 3);
+                addr8x8(4, 3) = ix(1, 0);
+                addr8x8(2, 0) = k(2, 0);
+
+                ap_uint<10> addr32x32;
+                addr32x32(9, 7) = k(5, 3);
+                addr32x32[6] = iy[1];
+                addr32x32[5] = iy[0];
+                addr32x32(4, 2) = k(2, 0);
+                addr32x32[1] = ix[1];
+                addr32x32[0] = ix[0];
+
+                ap_uint<10> addr16x16;
+                addr16x16[9] = iy[1];
+                addr16x16(8, 6) = k(5, 3);
+                addr16x16[5] = iy[0];
+                addr16x16[4] = ix[1];
+                addr16x16(3, 1) = k(2, 0);
+                addr16x16[0] = ix[0];
+
+                ap_uint<8> addr_quant16;
+                addr_quant16(7, 5) = k(5, 3);
+                addr_quant16[4] = iy[0];
+                addr_quant16(3, 1) = k(2, 0);
+                addr_quant16[0] = ix[0];
+
+                ap_uint<8> strategy_block = ac_strategy[iy][ix];
+
+                float qm, qa, plane;
+                if (strategy_block == 5) {
+                    // DCT32
+                    qm = InvDequantMatrix32x32[addr32x32];
+                    plane = cplane[addr32x32];
+                    qa = qac[iy][ix];
+                } else if (strategy_block == 4) {
+                    // DCT16
+                    qm = InvDequantMatrix16x16[addr_quant16];
+                    plane = cplane[addr16x16];
+                    qa = qac[iy][ix];
+                } else if (strategy_block == 0) {
+                    // DCT8
+                    qm = InvDequantMatrix8x8[k];
+                    plane = cplane[addr8x8];
+                    qa = qac[iy][ix];
+                } else {
+                    // DCT4
+                    qm = InvDequantMatrix4x4[k];
+                    plane = cplane[addr8x8];
+                    qa = qac[iy][ix];
+                }
+
+                float val = plane * (qm * qa);
+
+                ap_frac16 val_i = SCLF(val);
+                apu_frac thres_i = SCLF(thres);
+                int16_t v_i = UpdateErr_int(val_i, thres_i, k, err, previous_row_err_i);
+
+                ap_uint<10> addr_o;
+                if (strategy_block == 5) {
+                    // DCT32
+                    addr_o = addr32x32;
+                } else if (strategy_block == 4) {
+                    // DCT16
+                    addr_o = addr16x16;
+                } else if (strategy_block == 0) {
+                    // DCT8
+                    addr_o = addr8x8;
+                } else {
+                    // DCT4
+                    addr_o = addr8x8;
+                }
+                quantized[addr_o] = (int16_t)v_i;
+
+#ifdef DEBUG_COEFFS
+                if (ix == 0 && iy == 0) {
+                    std::cout << std::hex << "k2_qua: k=" << k << " cplane=" << (int&)plane << " val=" << (int&)val
+                              << " qm=" << (int&)qm << " qac=" << (int&)qa << " quantized=" << v_i << std::endl;
+
+                    std::cout << std::setprecision(16) << "k2_qua: k=" << k << " cplane=" << plane << " val=" << val
+                              << " qm=" << qm << " qac=" << qa << " quantized=" << v_i << std::endl;
+                }
+#endif
+            }
+        }
+    }
+}
+
+void adjustAcDec(hls::stream<uint8_t>& acs1,
+                 hls::stream<float>& inv_quant,
+                 const float DequantMatrix4x4Y[64],
+                 const float DequantMatrix8x8Y[64],
+                 const float DequantMatrix16x16Y[256],
+                 const float DequantMatrix32x32Y[1024],
+                 int16_t quantized[1024],
+                 float dec_ac_Y[1024]) {
+#pragma HLS INLINE off
+
+adjust:
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            ap_uint<8> strategy_block;
+            float inv_qac;
+            for (ap_uint<8> k = 0; k < 64; k++) {
+#pragma HLS pipeline II = 1
+
+                ap_uint<10> addr32x32;
+                addr32x32(9, 8) = by(1, 0);
+                addr32x32(7, 5) = k(5, 3);
+                addr32x32(4, 3) = bx(1, 0);
+                addr32x32(2, 0) = k(2, 0);
+
+                ap_uint<8> addr16x16;
+                addr16x16[7] = by[0];
+                addr16x16(6, 4) = k(5, 3);
+                addr16x16[3] = bx[0];
+                addr16x16(2, 0) = k(2, 0);
+
+                int16_t quantized_coeff = quantized[addr32x32];
+
+                if (k == 0) {
+                    strategy_block = acs1.read();
+                    inv_qac = inv_quant.read();
+                }
+
+                float dequant;
+                if (strategy_block == 5) {
+                    // DCT32
+                    dequant = DequantMatrix32x32Y[addr32x32];
+                } else if (strategy_block == 4) {
+                    // DCT16
+                    dequant = DequantMatrix16x16Y[addr16x16];
+                } else if (strategy_block == 0) {
+                    // DCT8
+                    dequant = DequantMatrix8x8Y[k];
+                } else if (strategy_block == 3) {
+                    // DCT4
+                    dequant = DequantMatrix4x4Y[k];
+                }
+
+                float kBiasNumerator = 0.145f;
+                float AdjustQuantBias;
+
+                if (quantized_coeff == 0)
+                    AdjustQuantBias = 0;
+                else if (quantized_coeff == 1)
+                    AdjustQuantBias = 1.0f - 0.07005449891748593f;
+                else if (quantized_coeff == -1)
+                    AdjustQuantBias = 0.07005449891748593f - 1.0f;
+                else
+                    AdjustQuantBias = quantized_coeff - kBiasNumerator / quantized_coeff;
+
+                float out = AdjustQuantBias * dequant * inv_qac;
+                dec_ac_Y[addr32x32] = out;
+
+#ifdef DEBUG_COEFFS
+                std::cout << "k2_quant_y=" << by << " bx=" << bx << " k=" << k << " quant_y=" << (int)quantized_coeff
+                          << " dec_ac_Y=" << out << std::endl;
+#endif
+            }
+        }
+    }
+}
+
+void QuantizeRoundtripBlockAC(ap_uint<16> xblock,
+                              ap_uint<16> yblock,
+                              hls::stream<float>& quant,
+                              hls::stream<float>& inv_quant,
+                              hls::stream<uint8_t>& ac_strategy,
+
+                              const float DequantMatrix4x4Y[64],
+                              const float DequantMatrix8x8Y[64],
+                              const float DequantMatrix16x16Y[256],
+                              const float DequantMatrix32x32Y[1024],
+
+                              const float InvDequantMatrix4x4Y[64],
+                              const float InvDequantMatrix8x8Y[64],
+                              const float InvDequantMatrix16x16Y[256],
+                              const float InvDequantMatrix32x32Y[1024],
+
+                              hls::stream<float>& plane_y,
+                              hls::stream<float>& dec_ac_y) {
+#pragma HLS INLINE off
+
+    const float thres = 0.6f;
+
+    for (ap_uint<16> y = 0; y < yblock; ++y) {
+        for (ap_uint<16> x = 0; x < xblock; ++x) {
+#pragma HLS DATAFLOW
+
+            hls::stream<uint8_t> acs0;
+#pragma HLS stream variable = acs0 depth = 512
+#pragma HLS resource variable = acs0 core = FIFO_BRAM
+            hls::stream<uint8_t> acs1;
+#pragma HLS stream variable = acs1 depth = 1024
+#pragma HLS resource variable = acs1 core = FIFO_BRAM
+
+        duplicate_acs:
+            for (ap_uint<8> dy = 0; dy < 4; dy++) {
+                for (ap_uint<8> dx = 0; dx < 4; dx++) {
+#pragma HLS pipeline II = 1
+
+                    uint8_t tmp = ac_strategy.read();
+                    acs0.write(tmp);
+                    acs1.write(tmp);
+                }
+            }
+
+            float y_plane[1024];
+        load_src:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                y_plane[i] = plane_y.read();
+            }
+
+            int16_t quantized[1024];
+            QuantizeBlockAC0(quant, acs0, thres, InvDequantMatrix4x4Y, InvDequantMatrix8x8Y, InvDequantMatrix16x16Y,
+                             InvDequantMatrix32x32Y, y_plane, quantized);
+
+            float dec_ac_Y[1024];
+            adjustAcDec(acs1, inv_quant, DequantMatrix4x4Y, DequantMatrix8x8Y, DequantMatrix16x16Y, DequantMatrix32x32Y,
+                        quantized, dec_ac_Y);
+
+        feed:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                dec_ac_y.write(dec_ac_Y[i]);
+            }
+        }
+    }
+}
+
+// "y_plane" may refer to plane#1 of "coeffs"; it is also organized in the
+// block layout (consecutive block coefficient `pixels').
+// Class Dequant applies color correlation maps back.
+void UnapplyColorCorrelationAC(ap_uint<16> xblock,
+                               ap_uint<16> yblock,
+                               float ytob_map[16384],
+                               float ytox_map[16384],
+                               hls::stream<float>& plane_x,
+                               hls::stream<float>& plane_y,
+                               hls::stream<float>& plane_b,
+                               hls::stream<float>& coeffs_Y,
+                               hls::stream<float> coeffs_XYB[3]) {
+#pragma HLS INLINE off
+
+LOOP_Y:
+    for (ap_uint<16> y = 0; y < yblock; ++y) {
+    LOOP_X:
+        for (ap_uint<16> x = 0; x < xblock; ++x) {
+#pragma HLS DATAFLOW
+
+            float x_plane[1024];
+            float y_plane[1024];
+            float b_plane[1024];
+            float coeffs_y[1024];
+            float coeffs[3][1024];
+#pragma HLS ARRAY_PARTITION variable = coeffs complete dim = 1
+
+        LOAD:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                x_plane[i] = plane_x.read();
+                y_plane[i] = plane_y.read();
+                b_plane[i] = plane_b.read();
+
+                coeffs_y[i] = coeffs_Y.read();
+            }
+
+        CALC:
+            for (ap_uint<8> by = 0; by < 4; by++) {
+                for (ap_uint<8> bx = 0; bx < 4; bx++) {
+                    for (ap_uint<8> k = 0; k < 64; k++) {
+#pragma HLS PIPELINE II = 1
+
+                        ap_uint<14> block64 = xblock[0] ? ap_uint<14>(xblock(15, 1) + 1) : xblock(15, 1);
+                        ap_uint<14> cmap_addr = y(15, 1) * block64 + x(15, 1);
+
+                        float ytob = ytob_map[cmap_addr];
+                        float ytox = ytox_map[cmap_addr];
+
+                        ap_uint<10> addr;
+                        addr(9, 8) = by(1, 0);
+                        addr(7, 5) = k(5, 3);
+                        addr(4, 3) = bx(1, 0);
+                        addr(2, 0) = k(2, 0);
+
+                        float in_y = y_plane[addr];
+                        float in_b = b_plane[addr];
+                        float in_x = x_plane[addr];
+
+                        float y_tmp = coeffs_y[addr];
+
+                        float out_b = in_b - ytob * in_y;
+                        float out_x = in_x - ytox * in_y;
+
+                        coeffs[0][addr] = out_x;
+                        coeffs[1][addr] = y_tmp;
+                        coeffs[2][addr] = out_b;
+
+#ifdef DEBUG_COEFFS
+                        std::cout << std::setprecision(16) << "k2_corr_in by=" << by << " bx=" << bx << " k=" << k
+                                  << " x=" << in_x << " y=" << in_y << " b=" << in_b << std::endl;
+
+                        std::cout << std::setprecision(16) << "k2_corr_out by=" << by << " bx=" << bx << " k=" << k
+                                  << " x=" << out_x << " y=" << y_tmp << " b=" << out_b << std::endl;
+#endif
+                    }
+                }
+            }
+
+        FEED:
+            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+                for (ap_uint<16> i = 0; i < 1024; i++) {
+                    coeffs_XYB[c].write(coeffs[c][i]);
+                }
+            }
+        }
+    }
+}
+
+void InitializeComputeCoeffocients(ap_uint<16> xblock,
+                                   ap_uint<16> yblock,
+                                   hls::stream<float>& quantizer,
+                                   hls::stream<uint8_t>& ac_strategy,
+                                   hls::stream<int32_t>& quant_field,
+
+                                   hls::stream<uint8_t>& ac_strategy0,
+                                   hls::stream<uint8_t>& ac_strategy1,
+                                   hls::stream<uint8_t>& ac_strategy2,
+                                   hls::stream<float>& inv_qac,
+                                   hls::stream<float>& qac0,
+                                   hls::stream<float>& qac1) {
+#pragma HLS INLINE off
+
+    Quantizer q;
+    q.quant_dc = fToBits<float, int32_t>(quantizer.read());
+    q.global_scale = fToBits<float, int32_t>(quantizer.read());
+    q.inv_quant_dc = quantizer.read();
+    q.inv_global_scale = quantizer.read();
+    q.global_scale_float = quantizer.read();
+
+LOOP_Y:
+    for (ap_uint<16> by = 0; by < yblock; ++by) {
+    LOOP_X:
+        for (ap_uint<16> bx = 0; bx < xblock; ++bx) {
+        initialize:
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+                    float acs = ac_strategy.read();
+                    ac_strategy0.write(acs);
+                    ac_strategy1.write(acs);
+                    ac_strategy2.write(acs);
+
+                    int32_t quant = quant_field.read();
+                    inv_qac.write(q.inv_global_scale / quant);
+
+                    float tmp = q.global_scale_float * quant;
+                    qac0.write(tmp);
+                    qac1.write(tmp);
+
+#ifdef DEBUG_COEFFS
+                    std::cout << "y=" << y << " x=" << x << " qac=" << tmp << " inv_qac=" << q.inv_global_scale / quant
+                              << " qf=" << quant << std::endl;
+#endif
+                }
+            }
+        }
+    }
+}
+
+void LoadAcsDc4x4(hls::stream<uint8_t>& ac_strategy,
+                  uint8_t acs[3][4][4],
+                  hls::stream<float> dc[3],
+                  float dc0[3][16],
+                  float dc1[3][16],
+                  float dc2[3][16]) {
+#pragma HLS INLINE off
+load_acs_dc:
+    for (ap_uint<8> y = 0; y < 4; y++) {
+        for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+            uint8_t acs_tmp = ac_strategy.read();
+            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+
+                ap_uint<4> addr;
+                addr(3, 2) = y(1, 0);
+                addr(1, 0) = x(1, 0);
+
+                float dc_tmp = dc[c].read();
+                dc0[c][addr] = dc_tmp;
+                dc1[c][addr] = dc_tmp;
+                dc2[c][addr] = dc_tmp;
+
+                acs[c][y][x] = acs_tmp;
+
+#ifdef DEBUG_COEFFS
+                std::cout << "c=" << c << " y=" << y << " x=" << x << " dc_dec=" << dc_tmp << std::endl;
+#endif
+            }
+        }
+    }
+}
+
+void LoadAc32x32(hls::stream<float> ac[3], float ac_in[3][1024]) {
+#pragma HLS INLINE off
+
+load_ac:
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> y = 0; y < 8; y++) {
+                for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS pipeline II = 1
+                    for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+
+                        ap_uint<10> addr;
+                        addr(9, 8) = by(1, 0);
+                        addr(7, 5) = y(2, 0);
+                        addr(4, 3) = bx(1, 0);
+                        addr(2, 0) = x(2, 0);
+
+                        ac_in[c][addr] = ac[c].read();
+                    }
+                }
+            }
+        }
+    }
+}
+
+// Compute the lowest-frequency coefficients in the DCT block, then compose
+// it with ac (1x1 for DCT8, 2x2 for DCT16, etc.)
+void ComputeLlf(ap_uint<16> xblock,
+                ap_uint<16> yblock,
+                hls::stream<uint8_t>& ac_strategy,
+                hls::stream<float> dc_in[3],
+                hls::stream<float> ac_in[3],
+                hls::stream<float>& plane_x,
+                hls::stream<float>& plane_y0,
+                hls::stream<float>& plane_y1,
+                hls::stream<float>& plane_b) {
+#pragma HLS INLINE off
+
+LOOP_Y:
+    for (ap_uint<16> dy = 0; dy < yblock; ++dy) {
+    LOOP_X:
+        for (ap_uint<16> dx = 0; dx < xblock; ++dx) {
+#pragma HLS DATAFLOW
+
+            uint8_t acs[3][4][4];
+#pragma HLS ARRAY_PARTITION variable = acs complete
+            float dc0[3][16];
+#pragma HLS ARRAY_PARTITION variable = dc0 complete
+            float dc1[3][16];
+#pragma HLS ARRAY_PARTITION variable = dc1 complete
+            float dc2[3][16];
+#pragma HLS ARRAY_PARTITION variable = dc2 complete
+            float ac[3][1024];
+#pragma HLS ARRAY_PARTITION variable = ac complete dim = 1
+            float plane[3][1024];
+#pragma HLS ARRAY_PARTITION variable = plane complete dim = 1
+            float dct1_o[3][16];
+#pragma HLS ARRAY_PARTITION variable = dct1_o complete
+            float dct2_o[3][16];
+#pragma HLS ARRAY_PARTITION variable = dct2_o complete
+            float dct4_o[3][16];
+#pragma HLS ARRAY_PARTITION variable = dct4_o complete
+
+#ifdef DEBUG_COEFFS
+            std::cout << "Load data" << std::endl;
+#endif
+
+            LoadAcsDc4x4(ac_strategy, acs, dc_in, dc0, dc1, dc2);
+            LoadAc32x32(ac_in, ac);
+
+        LOOP_C0:
+            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+
+#ifdef DEBUG_COEFFS
+                std::cout << "DCT" << std::endl;
+#endif
+
+                Delay_block<float, 16>(dc0[c], dct1_o[c]);
+                DCT2x2_block16(dc1[c], dct2_o[c]);
+                DCT4x4_block16(dc2[c], dct4_o[c]);
+
+#ifdef DEBUG_COEFFS
+                int cnt = 0;
+                for (int i = 0; i < 16; i++)
+                    std::cout << "c=" << c << " id=" << i << " dct1_i=" << dc0[c][i] << " dct2_i=" << dc1[c][i]
+                              << " dct4_i=" << dc2[c][i] << std::endl;
+
+                for (ap_uint<8> i = 0; i < 16; i++)
+                    std::cout << "c=" << c << " id=" << i << " dct1_o=" << dct1_o[c][i] << " dct2_o="
+                              << dct2_o[c][i] * DCTTotalScale<2>(i(1, 0), i(3, 2)) *
+                                     DCTInvTotalScale<16>(i(1, 0), i(3, 2))
+                              << " dct4_o="
+                              << dct4_o[c][i] * DCTTotalScale<4>(i(1, 0), i(3, 2)) *
+                                     DCTInvTotalScale<32>(i(1, 0), i(3, 2))
+                              << std::endl;
+#endif
+            }
+
+        LOOP_C1:
+            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+
+#ifdef DEBUG_COEFFS
+                std::cout << "Compose AC" << std::endl;
+#endif
+
+            judge:
+                for (ap_uint<8> by = 0; by < 4; by++) {
+                    for (ap_uint<8> bx = 0; bx < 4; bx++) {
+#ifdef DEBUG_COEFFS
+                        if (c == 0) std::cout << "by=" << by << " bx=" << bx << std::endl;
+#endif
+                        for (ap_uint<8> y = 0; y < 8; y++) {
+                            for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS pipeline II = 1
+
+                                ap_uint<8> strategy_block = acs[c][by][bx];
+
+                                ap_uint<10> addr;
+                                addr(9, 8) = by(1, 0);
+                                addr(7, 5) = y(2, 0);
+                                addr(4, 3) = bx(1, 0);
+                                addr(2, 0) = x(2, 0);
+
+                                ap_uint<4> addr_dct32;
+                                addr_dct32(3, 2) = y(1, 0);
+                                addr_dct32(1, 0) = x(1, 0);
+
+                                ap_uint<4> addr_dct16;
+                                addr_dct16[3] = by[1];
+                                addr_dct16[2] = y[0];
+                                addr_dct16[1] = bx[1];
+                                addr_dct16[0] = x[0];
+
+                                ap_uint<4> addr_dct8;
+                                addr_dct8(3, 2) = by(1, 0);
+                                addr_dct8(1, 0) = bx(1, 0);
+
+                                float tmp;
+                                if (strategy_block == 5) {
+                                    // DCT32
+                                    if (by == 0 && bx == 0 && y < 4 && x < 4) {
+                                        tmp =
+                                            dct4_o[c][addr_dct32] * DCTTotalScale<4>(x, y) * DCTInvTotalScale<32>(x, y);
+#ifdef DEBUG_COEFFS
+                                        std::cout << "k2_dc_:" << std::setprecision(8) << tmp << std::endl;
+#endif
+                                    } else
+                                        tmp = ac[c][addr];
+                                } else if (strategy_block == 4) {
+                                    // DCT16
+                                    if (by[0] == 0 && bx[0] == 0 && y < 2 && x < 2) {
+                                        tmp =
+                                            dct2_o[c][addr_dct16] * DCTTotalScale<2>(x, y) * DCTInvTotalScale<16>(x, y);
+                                    } else
+                                        tmp = ac[c][addr];
+                                } else {
+                                    // DCT8 && DCT4
+                                    if (y == 0 && x == 0) {
+                                        tmp = dct1_o[c][addr_dct8];
+#ifdef DEBUG_COEFFS
+                                        std::cout << "k2_dc_:" << std::setprecision(8) << tmp << std::endl;
+#endif
+                                    } else
+                                        tmp = ac[c][addr];
+                                }
+
+                                plane[c][addr] = tmp;
+
+#ifdef DEBUG_COEFFS
+                                if (c == 0) {
+                                    std::cout << plane[c][addr] << ",";
+                                }
+#endif
+                            }
+                        }
+#ifdef DEBUG_COEFFS
+                        if (c == 0) std::cout << std::endl;
+#endif
+                    }
+                }
+            }
+
+#ifdef DEBUG_COEFFS
+            std::cout << "Feed Data" << std::endl;
+#endif
+        FEED:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+
+                plane_x.write(plane[0][i]);
+                plane_y0.write(plane[1][i]);
+                plane_y1.write(plane[1][i]);
+                plane_b.write(plane[2][i]);
+            }
+        }
+    }
+}
+
+void ScatterCoefficient(uint8_t ac_strategy[4][4], int16_t quantized[1024], hls::stream<int16_t>& ac_quantized) {
+#pragma HLS INLINE off
+
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> y = 0; y < 8; y++) {
+                for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS pipeline II = 1
+
+                    uint8_t strategy = ac_strategy[by][bx];
+                    ap_uint<10> addr, addr8, addr16, addr32;
+
+                    addr8(9, 8) = by(1, 0);
+                    addr8(7, 5) = y(2, 0);
+                    addr8(4, 3) = bx(1, 0);
+                    addr8(2, 0) = x(2, 0);
+
+                    addr16[9] = by[1];
+                    addr16(8, 6) = y(2, 0);
+                    addr16[5] = by[0];
+                    addr16[4] = bx[1];
+                    addr16(3, 1) = x(2, 0);
+                    addr16[0] = bx[0];
+
+                    addr32(9, 7) = y(2, 0);
+                    addr32[6] = by[1];
+                    addr32[5] = by[0];
+                    addr32(4, 2) = x(2, 0);
+                    addr32[1] = bx[1];
+                    addr32[0] = bx[0];
+
+                    if (strategy == 5) {
+                        // DCT32
+                        addr = addr32;
+                    } else if (strategy == 4) {
+                        // DCT16
+                        addr = addr16;
+                    } else if (strategy == 0) {
+                        // DCT8
+                        addr = addr8;
+                    } else {
+                        // DCT4
+                        addr = addr8;
+                    }
+
+                    ac_quantized.write(quantized[addr]);
+
+#ifdef DEBUG_COEFFS
+                    std::cout << "scatter by=" << by << " bx=" << bx << " y=" << y << " x=" << x << " addr=" << addr
+                              << " v=" << quantized[addr] << std::endl;
+#endif
+                }
+            }
+        }
+    }
+}
+
+void QuantizeAc(ap_uint<16> xblock,
+                ap_uint<16> yblock,
+                hls::stream<float> coeffs_in[3],
+                hls::stream<uint8_t>& ac_strategy,
+                hls::stream<float>& quant,
+
+                const float DequantMatrix4x4[3][64],
+                const float DequantMatrix8x8[3][64],
+                const float DequantMatrix16x16[3][256],
+                const float DequantMatrix32x32[3][1024],
+
+                const float InvDequantMatrix4x4[3][64],
+                const float InvDequantMatrix8x8[3][64],
+                const float InvDequantMatrix16x16[3][256],
+                const float InvDequantMatrix32x32[3][1024],
+
+                hls::stream<int16_t> ac_quantized[3]) {
+#pragma HLS INLINE off
+
+    const float kZeroBiasDefault[3] = {0.65f, 0.6f, 0.7f};
+
+LOOP_Y:
+    for (ap_uint<16> by = 0; by < yblock; ++by) {
+    LOOP_X:
+        for (ap_uint<16> bx = 0; bx < xblock; ++bx) {
+#pragma HLS DATAFLOW
+
+            uint8_t acs0[3][4][4];
+#pragma HLS ARRAY_PARTITION variable = acs0 complete
+            uint8_t acs1[3][4][4];
+#pragma HLS ARRAY_PARTITION variable = acs1 complete
+            float qac[3][4][4];
+#pragma HLS ARRAY_PARTITION variable = qac complete
+            float coeffs[3][1024];
+#pragma HLS ARRAY_PARTITION variable = coeffs complete dim = 1
+            int16_t quantized[3][1024];
+#pragma HLS ARRAY_PARTITION variable = quantized complete dim = 1
+
+        load_acs:
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+                    uint8_t tmp0 = ac_strategy.read();
+                    float tmp1 = quant.read();
+
+                    for (ap_uint<8> c = 0; c < 3; c++) {
+                        acs0[c][y][x] = tmp0;
+                        acs1[c][y][x] = tmp0;
+                        qac[c][y][x] = tmp1;
+                    }
+                }
+            }
+
+        quantize_ac:
+            for (ap_uint<8> c = 0; c < 3; ++c) {
+#pragma HLS UNROLL
+
+            load_coeffs:
+                for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+                    coeffs[c][i] = coeffs_in[c].read();
+                }
+
+#ifdef DEBUG_COEFFS
+                std::cout << "QuantizeBlockAC c=" << c << std::endl;
+#endif
+
+                QuantizeBlockAC1(qac[c], acs0[c], kZeroBiasDefault[c], InvDequantMatrix4x4[c], InvDequantMatrix8x8[c],
+                                 InvDequantMatrix16x16[c], InvDequantMatrix32x32[c], coeffs[c], quantized[c]);
+
+                ScatterCoefficient(acs1[c], quantized[c], ac_quantized[c]);
+            }
+        }
+    }
+}
+
+void ComputeCoefficients(Config config,
+                         hls::stream<float>& quantizer,
+                         hls::stream<float> dc[3],
+                         hls::stream<float> ac[3],
+                         hls::stream<uint8_t>& ac_strategy,
+                         hls::stream<int32_t>& quant_field,
+
+                         float ytob_map[16384],
+                         float ytox_map[16384],
+
+                         const float DequantMatrix4x4Y[64],
+                         const float DequantMatrix8x8Y[64],
+                         const float DequantMatrix16x16Y[256],
+                         const float DequantMatrix32x32Y[1024],
+
+                         const float InvDequantMatrix4x4Y[64],
+                         const float InvDequantMatrix8x8Y[64],
+                         const float InvDequantMatrix16x16Y[256],
+                         const float InvDequantMatrix32x32Y[1024],
+
+                         const float DequantMatrix4x4[3][64],
+                         const float DequantMatrix8x8[3][64],
+                         const float DequantMatrix16x16[3][256],
+                         const float DequantMatrix32x32[3][1024],
+
+                         const float InvDequantMatrix4x4[3][64],
+                         const float InvDequantMatrix8x8[3][64],
+                         const float InvDequantMatrix16x16[3][256],
+                         const float InvDequantMatrix32x32[3][1024],
+
+                         hls::stream<int16_t> ac_quantized[3]) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    hls::stream<uint8_t> ac_strategy0("acs0");
+#pragma HLS stream variable = ac_strategy0 depth = 512
+#pragma HLS resource variable = ac_strategy0 core = FIFO_BRAM
+    hls::stream<uint8_t> ac_strategy1("acs1");
+#pragma HLS stream variable = ac_strategy1 depth = 2048
+#pragma HLS resource variable = ac_strategy1 core = FIFO_BRAM
+    hls::stream<uint8_t> ac_strategy2("acs2");
+#pragma HLS stream variable = ac_strategy2 depth = 4096
+#pragma HLS BIND_STORAGE variable = ac_strategy2 type = fifo impl = uram
+
+    hls::stream<float> inv_qac("inv_qac");
+#pragma HLS stream variable = inv_qac depth = 512
+#pragma HLS resource variable = inv_qac core = FIFO_BRAM
+    hls::stream<float> qac0("qac0");
+#pragma HLS stream variable = qac0 depth = 512
+#pragma HLS resource variable = qac0 core = FIFO_BRAM
+    hls::stream<float> qac1("qac1");
+#pragma HLS stream variable = qac1 depth = 4096
+#pragma HLS BIND_STORAGE variable = qac1 type = fifo impl = uram
+
+    hls::stream<float> plane_y0("plane_y0");
+#pragma HLS stream variable = plane_y0 depth = 1024
+#pragma HLS resource variable = plane_y0 core = FIFO_BRAM
+    hls::stream<float> plane_x("plane_x");
+#pragma HLS stream variable = plane_x depth = 8192
+#pragma HLS BIND_STORAGE variable = plane_x type = fifo impl = uram
+    hls::stream<float> plane_y1("plane_y1");
+#pragma HLS stream variable = plane_y1 depth = 8192
+#pragma HLS BIND_STORAGE variable = plane_y1 type = fifo impl = uram
+    hls::stream<float> plane_b("plane_b");
+#pragma HLS stream variable = plane_b depth = 8192
+#pragma HLS BIND_STORAGE variable = plane_b type = fifo impl = uram
+
+    hls::stream<float> dec_ac_Y("dec_ac_Y");
+#pragma HLS stream variable = dec_ac_Y depth = 1024
+#pragma HLS resource variable = dec_ac_Y core = FIFO_BRAM
+    hls::stream<float> coeffs[3];
+#pragma HLS stream variable = coeffs depth = 4192
+#pragma HLS BIND_STORAGE variable = coeffs type = fifo impl = uram
+
+    ap_uint<16> xblock = config.xblock32;
+    ap_uint<16> yblock = config.yblock32;
+
+// Pre-quantized, matches what decoder will see.
+
+#ifdef DEBUG_COEFFS
+    std::cout << "Initialize" << std::endl;
+#endif
+
+    InitializeComputeCoeffocients(xblock, yblock, quantizer, ac_strategy, quant_field, ac_strategy0, ac_strategy1,
+                                  ac_strategy2, inv_qac, qac0, qac1);
+
+#ifdef DEBUG_COEFFS
+    std::cout << "Compute low frequency" << std::endl;
+#endif
+
+    ComputeLlf(xblock, yblock, ac_strategy0, dc, ac, plane_x, plane_y0, plane_y1, plane_b);
+
+#ifdef DEBUG_COEFFS
+    std::cout << "Quantize Y" << std::endl;
+#endif
+
+    QuantizeRoundtripBlockAC(xblock, yblock, qac0, inv_qac, ac_strategy1, DequantMatrix4x4Y, DequantMatrix8x8Y,
+                             DequantMatrix16x16Y, DequantMatrix32x32Y, InvDequantMatrix4x4Y, InvDequantMatrix8x8Y,
+                             InvDequantMatrix16x16Y, InvDequantMatrix32x32Y, plane_y0, dec_ac_Y);
+
+#ifdef DEBUG_COEFFS
+    std::cout << "Unapply color correlation" << std::endl;
+#endif
+
+    UnapplyColorCorrelationAC(xblock, yblock, ytob_map, ytox_map, plane_x, dec_ac_Y, plane_b, plane_y1, coeffs);
+
+#ifdef DEBUG_COEFFS
+    std::cout << "Quantize AC" << std::endl;
+#endif
+
+    QuantizeAc(xblock, yblock, coeffs, ac_strategy2, qac1, DequantMatrix4x4, DequantMatrix8x8, DequantMatrix16x16,
+               DequantMatrix32x32, InvDequantMatrix4x4, InvDequantMatrix8x8, InvDequantMatrix16x16,
+               InvDequantMatrix32x32, ac_quantized);
+}
+
+void Reorder_load_acs(ap_uint<16> xblock32,
+                      hls::stream<uint8_t>& ac_strategy32,
+                      uint8_t ac_strategy[4][1024],
+                      uint8_t block[4][1024]) {
+#pragma HLS INLINE off
+
+    uint8_t acs[4][4];
+Load_ac_strategy:
+    for (ap_uint<16> bx = 0; bx < xblock32; ++bx) {
+        for (ap_uint<8> y = 0; y < 4; ++y) {
+            for (ap_uint<8> x = 0; x < 4; ++x) {
+#pragma HLS pipeline II = 1
+
+                ap_uint<10> cur;
+                cur(9, 2) = bx(7, 0);
+                cur(1, 0) = x(1, 0);
+
+                uint8_t ac_tmp = ac_strategy32.read();
+                ac_strategy[y][cur] = ac_tmp;
+
+                acs[y][x] = ac_tmp;
+                uint8_t blk;
+                if (acs[0][0] == 5) {
+                    blk = (y(1, 0), x(1, 0));
+                } else if (acs[y[1] << 1][x[1] << 1] == 4) {
+                    blk = ((ap_uint<1>)y[0], (ap_uint<1>)x[0]);
+                } else {
+                    blk = 0;
+                }
+                block[y][cur] = blk;
+
+#ifdef DEBUG_REORDER
+                std::cout << "acs_in y=" << y << " x=" << x << " acs=" << (uint32_t)ac_tmp << std::endl;
+#endif
+            }
+        }
+    }
+}
+
+void Reorder_load_qf(ap_uint<16> xblock32, hls::stream<int32_t>& quant_field32, int32_t quant_field[4][1024]) {
+#pragma HLS INLINE off
+
+Load_quant_field:
+    for (ap_uint<16> bx = 0; bx < xblock32; ++bx) {
+        for (ap_uint<8> y = 0; y < 4; ++y) {
+            for (ap_uint<8> x = 0; x < 4; ++x) {
+#pragma HLS pipeline II = 1
+
+                ap_uint<10> cur;
+                cur(9, 2) = bx(7, 0);
+                cur(1, 0) = x(1, 0);
+
+                quant_field[y][cur] = quant_field32.read();
+            }
+        }
+    }
+}
+
+void Reorder_load_dc(ap_uint<16> xblock32, hls::stream<int16_t> dc32[3], int16_t dc[3][4][1024]) {
+#pragma HLS INLINE off
+
+Load_dc:
+    for (ap_uint<16> bx = 0; bx < xblock32; ++bx) {
+        for (ap_uint<8> c = 0; c < 3; ++c) {
+#pragma HLS UNROLL
+            for (ap_uint<8> y = 0; y < 4; ++y) {
+                for (ap_uint<8> x = 0; x < 4; ++x) {
+#pragma HLS pipeline II = 1
+
+                    ap_uint<10> cur;
+                    cur(9, 2) = bx(7, 0);
+                    cur(1, 0) = x(1, 0);
+
+                    dc[c][y][cur] = dc32[c].read();
+                }
+            }
+        }
+    }
+}
+
+void Reorder_load_ac(ap_uint<16> xblock32, hls::stream<int16_t> ac32[3], ap_uint<64> ac[3][65536]) {
+#pragma HLS INLINE off
+
+Load_ac:
+    for (ap_uint<16> bx = 0; bx < xblock32; ++bx) {
+        for (ap_uint<8> c = 0; c < 3; ++c) {
+#pragma HLS UNROLL
+            for (ap_uint<8> dy = 0; dy < 4; ++dy) {
+                for (ap_uint<8> dx = 0; dx < 4; ++dx) {
+                    for (ap_uint<8> y = 0; y < 8; ++y) {
+#pragma HLS pipeline II = 8
+
+                        ap_uint<16> cur0, cur1;
+                        cur0(15, 11) = (dy(1, 0), y(2, 0));
+                        cur0(10, 3) = bx(7, 0);
+                        cur0(2, 1) = dx(1, 0);
+                        cur0[0] = 0;
+
+                        cur1(15, 11) = (dy(1, 0), y(2, 0));
+                        cur1(10, 3) = bx(7, 0);
+                        cur1(2, 1) = dx(1, 0);
+                        cur1[0] = 1;
+
+                        ap_uint<64> tmp0, tmp1;
+                        ap_int<16> r0 = ac32[c].read();
+                        ap_int<16> r1 = ac32[c].read();
+                        ap_int<16> r2 = ac32[c].read();
+                        ap_int<16> r3 = ac32[c].read();
+                        tmp0 = (r3, r2, r1, r0);
+
+                        ap_int<16> r4 = ac32[c].read();
+                        ap_int<16> r5 = ac32[c].read();
+                        ap_int<16> r6 = ac32[c].read();
+                        ap_int<16> r7 = ac32[c].read();
+                        tmp1 = (r7, r6, r5, r4);
+
+                        ac[c][cur0] = tmp0;
+                        ac[c][cur1] = tmp1;
+                    }
+                }
+            }
+        }
+    }
+}
+
+void Reorder_feed_acs(ap_uint<16> by,
+                      ap_uint<16> xblock8,
+                      ap_uint<16> yblock8,
+                      uint8_t ac_strategy[4][1024],
+                      uint8_t block[4][1024],
+                      hls::stream<ap_uint<32> >& o_ac_strategy,
+                      hls::stream<ap_uint<32> >& o_block) {
+#pragma HLS INLINE off
+
+Feed_ac_strategy:
+    for (ap_uint<8> y = 0; y < 4; ++y) {
+        for (ap_uint<16> x = 0; x < xblock8; ++x) {
+#pragma HLS pipeline II = 1
+
+            ap_uint<10> cur;
+            cur(9, 2) = by(7, 0);
+            cur(1, 0) = y(1, 0);
+
+            if (cur < yblock8) {
+                o_ac_strategy.write(ac_strategy[y][x]);
+                o_block.write(block[y][x]);
+
+#ifdef DEBUG_REORDER
+                std::cout << "acs_out y=" << y << " x=" << x << " acs=" << (uint32_t)ac_strategy[y][x] << std::endl;
+#endif
+            }
+        }
+    }
+}
+
+void Reorder_feed_qf(ap_uint<16> by,
+                     ap_uint<16> xblock8,
+                     ap_uint<16> yblock8,
+                     int32_t quant_field[4][1024],
+                     hls::stream<ap_uint<32> >& o_quant_field) {
+#pragma HLS INLINE off
+
+Feed_quant_field:
+    for (ap_uint<8> y = 0; y < 4; ++y) {
+        for (ap_uint<16> x = 0; x < xblock8; ++x) {
+#pragma HLS pipeline II = 1
+            ap_uint<10> cur;
+            cur(9, 2) = by(7, 0);
+            cur(1, 0) = y(1, 0);
+
+            if (cur < yblock8) {
+                o_quant_field.write(quant_field[y][x]);
+            }
+        }
+    }
+}
+
+void Reorder_feed_dc(
+    ap_uint<16> by, ap_uint<16> xblock8, ap_uint<16> yblock8, int16_t dc[3][4][1024], hls::stream<ap_uint<32> >& o_dc) {
+#pragma HLS INLINE off
+
+Feed_dc:
+    for (ap_uint<8> y = 0; y < 4; ++y) {
+        for (ap_uint<8> c = 0; c < 3; ++c) {
+            for (ap_uint<16> x = 0; x < xblock8; ++x) {
+#pragma HLS pipeline II = 1
+                ap_uint<10> cur;
+                cur(9, 2) = by(7, 0);
+                cur(1, 0) = y(1, 0);
+
+                if (cur < yblock8) {
+                    o_dc.write(dc[c][y][x]);
+                }
+            }
+        }
+    }
+}
+
+void Reorder_feed_ac(ap_uint<16> by,
+                     ap_uint<16> xblock8,
+                     ap_uint<16> yblock8,
+                     ap_uint<64> ac[3][65536],
+                     hls::stream<int16_t>& o_ac0,
+                     hls::stream<ap_uint<32> >& o_ac1) {
+#pragma HLS INLINE off
+
+Feed_ac_order:
+    for (ap_uint<8> dy = 0; dy < 4; ++dy) {
+        for (ap_uint<16> bx = 0; bx < xblock8; ++bx) {
+            for (ap_uint<8> c = 0; c < 3; ++c) {
+                for (ap_uint<8> y = 0; y < 8; ++y) {
+                    for (ap_uint<8> x = 0; x < 8; ++x) {
+#pragma HLS pipeline II = 1
+                        ap_uint<10> cur;
+                        cur(9, 2) = by(7, 0);
+                        cur(1, 0) = dy(1, 0);
+
+                        ap_uint<16> addr;
+                        addr(15, 14) = dy(1, 0);
+                        addr(13, 11) = y(2, 0);
+                        addr(10, 1) = bx(9, 0);
+                        addr[0] = x[2];
+
+                        ap_uint<64> uram_tmp = ac[c][addr];
+                        int16_t ac_tmp[4];
+                        ac_tmp[0] = uram_tmp(15, 0);
+                        ac_tmp[1] = uram_tmp(31, 16);
+                        ac_tmp[2] = uram_tmp(47, 32);
+                        ac_tmp[3] = uram_tmp(63, 48);
+
+                        if (cur < yblock8) {
+                            int16_t tmp = ac_tmp[x(1, 0)];
+
+                            o_ac0.write(tmp);
+                            o_ac1.write(tmp);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+// 32x32 block scan to 8x8 block scan, disgard redundant result,
+void Reorder(Config config,
+             hls::stream<int16_t> dc32[3],
+             hls::stream<int16_t> ac32[3],
+             hls::stream<uint8_t>& ac_strategy32,
+             hls::stream<int32_t>& quant_field32,
+
+             hls::stream<ap_uint<32> >& o_dc,
+             hls::stream<int16_t>& o_ac0,
+             hls::stream<ap_uint<32> >& o_ac1,
+             hls::stream<ap_uint<32> >& o_quant_field,
+             hls::stream<ap_uint<32> >& o_ac_strategy,
+             hls::stream<ap_uint<32> >& o_block) {
+#pragma HLS INLINE off
+
+    int16_t dc[3][4][1024];
+    ap_uint<64> ac[3][65536];
+#pragma HLS RESOURCE variable = ac core = RAM_2P_URAM // XPM_MEMORY uram
+    uint8_t ac_strategy[4][1024];
+    uint8_t block[4][1024];
+    int32_t quant_field[4][1024];
+
+    ap_uint<16> xblock8 = config.xblock8;
+    ap_uint<16> yblock8 = config.yblock8;
+    ap_uint<16> xblock32 = config.xblock32;
+    ap_uint<16> yblock32 = config.yblock32;
+
+    for (ap_uint<16> by = 0; by < yblock32; ++by) {
+#pragma HLS DATAFLOW
+
+        Reorder_load_acs(xblock32, ac_strategy32, ac_strategy, block);
+
+        Reorder_load_qf(xblock32, quant_field32, quant_field);
+
+        Reorder_load_dc(xblock32, dc32, dc);
+
+        Reorder_load_ac(xblock32, ac32, ac);
+
+        Reorder_feed_acs(by, xblock8, yblock8, ac_strategy, block, o_ac_strategy, o_block);
+
+        Reorder_feed_qf(by, xblock8, yblock8, quant_field, o_quant_field);
+
+        Reorder_feed_dc(by, xblock8, yblock8, dc, o_dc);
+
+        Reorder_feed_ac(by, xblock8, yblock8, ac, o_ac0, o_ac1);
+    }
+}
+
+template <typename T, int len>
+void bubbleSort(T arr[len], T pld[len]) {
+#pragma HLS INLINE off
+
+    T tempKey, tempPld;
+    int i, j;
+
+    for (i = 0; i < len - 1; i++) {
+        for (j = 0; j < len - 1 - i; j++) {
+#pragma HLS PIPELINE
+
+            if (arr[j] > arr[j + 1]) {
+                tempKey = arr[j];
+                arr[j] = arr[j + 1];
+                arr[j + 1] = tempKey;
+
+                tempPld = pld[j];
+                pld[j] = pld[j + 1];
+                pld[j + 1] = tempPld;
+            }
+        }
+    }
+}
+
+void SortOrder(ap_uint<16> xgroup,
+               ap_uint<16> ygroup,
+               uint32_t num_zeros[16 * 16 * 4 * 64],
+               hls::stream<ap_uint<32> >& order) {
+#pragma HLS INLINE off
+
+    // zig-zag addr
+    const ap_uint<8> natural_coeff_order[8 * 8] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                                   12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                                   35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                                   58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+
+    for (ap_uint<8> by = 0; by < ygroup; ++by) {
+        for (ap_uint<8> bx = 0; bx < xgroup; ++bx) {
+            for (ap_uint<8> c = 0; c < 3; ++c) {
+                ap_uint<32> key[64];
+                ap_uint<32> pld[64];
+
+                // Apply zig-zag order.
+                for (ap_uint<8> i = 0; i < 64; ++i) {
+#pragma HLS pipeline II = 1
+
+                    ap_uint<8> order = natural_coeff_order[i];
+
+                    ap_uint<16> offset;
+                    offset(15, 12) = by(3, 0);
+                    offset(11, 8) = bx(3, 0);
+                    offset(7, 6) = c(1, 0);
+                    offset(5, 0) = order(5, 0);
+
+                    pld[i] = order;
+                    // We don't care for the exact number -> quantize number of zeros,
+                    // to get less permuted order.
+                    key[i] = num_zeros[offset] / 8;
+                }
+
+                // sort
+                bubbleSort<ap_uint<32>, 64>(key, pld);
+
+                // feed
+                for (ap_uint<8> i = 0; i < 64; i++) {
+#pragma HLS pipeline II = 1
+
+                    order.write(pld[i]);
+                }
+            }
+        }
+    }
+}
+
+void ComputeCount(Config config, hls::stream<int16_t>& ac, uint32_t num_zeros[16 * 16 * 4 * 64]) {
+#pragma HLS INLINE off
+
+    ap_uint<16> xblock = config.xblock8;
+    ap_uint<16> yblock = config.yblock8;
+
+    ap_uint<16> xgroup = config.xgroup;
+    ap_uint<16> ygroup = config.ygroup;
+
+initilize:
+    for (ap_uint<16> by = 0; by < yblock; ++by) {
+        for (ap_uint<16> bx = 0; bx < xblock; ++bx) {
+            for (ap_uint<8> c = 0; c < 3; ++c) {
+                for (ap_uint<8> k = 0; k < 64; ++k) {
+#pragma HLS pipeline II = 1
+
+                    ap_uint<16> offset;
+                    offset(15, 12) = by(9, 6);
+                    offset(11, 8) = bx(9, 6);
+                    offset(7, 6) = c(1, 0);
+                    offset(5, 0) = k(5, 0);
+                    num_zeros[offset] = 0;
+                }
+            }
+        }
+    }
+
+Compute_count:
+    for (ap_uint<16> by = 0; by < yblock; ++by) {
+        for (ap_uint<16> bx = 0; bx < xblock; ++bx) {
+            for (ap_uint<8> c = 0; c < 3; ++c) {
+                for (ap_uint<8> k = 0; k < 64; ++k) {
+#pragma HLS pipeline II = 1
+
+                    ap_uint<16> offset;
+                    offset(15, 12) = by(9, 6);
+                    offset(11, 8) = bx(9, 6);
+                    offset(7, 6) = c(1, 0);
+                    offset(5, 0) = k(5, 0);
+                    int16_t row = ac.read();
+                    if (row == 0 && k != 0) ++num_zeros[offset];
+
+#ifdef DEBUG_COEFFS_ORDER
+                    cnt++;
+#endif
+                }
+            }
+        }
+    }
+}
+
+void ComputeCoeffOrder(Config config, hls::stream<int16_t>& ac, hls::stream<ap_uint<32> >& order) {
+#pragma HLS INLINE off
+
+    ap_uint<16> xblock = config.xblock8;
+    ap_uint<16> yblock = config.yblock8;
+
+    ap_uint<16> xgroup = config.xgroup;
+    ap_uint<16> ygroup = config.ygroup;
+
+    // Count number of zero coefficients, separately for each DCT band.
+    uint32_t num_zeros[16 * 16 * 4 * 64];
+#pragma HLS BIND_STORAGE variable = num_zeros type = ram_2p impl = uram
+
+    ComputeCount(config, ac, num_zeros);
+
+    SortOrder(xgroup, ygroup, num_zeros, order);
+}
+
+void loadIDCTs(hls::stream<float>& dcts, float coeffs0[16], float coeffs1[16], float coeffs2[16]) {
+#pragma HLS INLINE off
+
+    ap_uint<4> p0 = 0, p1 = 0, p2 = 0;
+LOOP_LOAD_DCTS:
+    for (ap_uint<8> by = 0; by < 4; ++by) {
+        for (ap_uint<8> bx = 0; bx < 4; ++bx) {
+            for (ap_uint<8> y = 0; y < 8; ++y) {
+                for (ap_uint<8> x = 0; x < 8; ++x) {
+#pragma HLS pipeline II = 1
+
+                    float tmp = dcts.read();
+                    ap_uint<4> addr1 = (by(1, 0), bx(1, 0));
+                    if (y == 0 && x == 0) {
+                        coeffs0[addr1] = tmp; // IDCT1x1
+#ifdef DEBUG_IDCT
+                        std::cout << "idct1x1: id=" << addr1 << " value=" << tmp << " scaled=" << coeffs0[addr1]
+                                  << std::endl;
+#endif
+                    }
+
+                    ap_uint<4> addr2;
+                    addr2[3] = by[1];
+                    addr2[2] = y[0];
+                    addr2[1] = bx[1];
+                    addr2[0] = x[0];
+                    if ((by[0] == 0) && (bx[0] == 0) && y < 2 && x < 2) {
+                        coeffs1[addr2] = tmp * DCTInvTotalScale<2>(x, y) * DCTTotalScale<16>(x, y); // IDCT2x2
+#ifdef DEBUG_IDCT
+                        std::cout << "idct2x2: id=" << addr2 << " value=" << tmp << " scaled=" << coeffs1[addr2]
+                                  << std::endl;
+#endif
+                    }
+
+                    ap_uint<4> addr3 = (y(1, 0), x(1, 0));
+                    if (by == 0 && bx == 0 && y < 4 && x < 4) {
+                        coeffs2[addr3] = tmp * DCTInvTotalScale<4>(x, y) * DCTTotalScale<32>(x, y); // IDCT4x4
+#ifdef DEBUG_IDCT
+                        std::cout << "idct4x4: id=" << addr3 << " value=" << tmp << " scaled=" << coeffs2[addr3]
+                                  << std::endl;
+#endif
+                    }
+                }
+            }
+        } // bx
+    }     // by
+}
+
+void IDCT(float from0[16], float from1[16], float from2[16], float to0[16], float to1[16], float to2[16]) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    Delay_block<float, 16>(from0, to0); // Delay
+    IDCT2x2_block16(from1, to1);        // IDCT2x2
+    IDCT4x4_block16(from2, to2);        // IDCT4x4
+}
+
+void feedIDCTs(uint8_t ac_strategy[4][4], float dcs0[16], float dcs1[16], float dcs2[16], hls::stream<float>& dc) {
+#pragma HLS INLINE off
+
+Judge:
+    for (ap_uint<8> y = 0; y < 4; ++y) {
+        for (ap_uint<8> x = 0; x < 4; ++x) {
+#pragma HLS pipeline II = 1
+            float tmp;
+            ap_uint<4> addr;
+            addr(3, 2) = y(1, 0);
+            addr(1, 0) = x(1, 0);
+
+            uint8_t acs = ac_strategy[y][x];
+
+            if (acs == 5) { // dct32
+                tmp = dcs2[addr];
+            } else if (acs == 4) { // dct16
+                tmp = dcs1[addr];
+            } else { // dct4:3 && dct8:0
+                tmp = dcs0[addr];
+            }
+
+#ifdef DEBUG_IDCT
+            std::cout << "dc=" << tmp << " dct8x8=" << dcs0[addr] << " dct16x16=" << dcs1[addr]
+                      << " dct32x32=" << dcs2[addr] << std::endl;
+#endif
+
+            dc.write(tmp);
+        }
+    }
+}
+
+void QuantizeDC(ap_uint<16> xblock32,
+                ap_uint<16> yblock32,
+                hls::stream<float>& quantizer,
+                hls::stream<float> in[3],
+                const float ytox,
+                const float ytob,
+                hls::stream<int16_t> dc[3],
+                hls::stream<float> dc_dec[3]) {
+#pragma HLS INLINE off
+
+    Quantizer q;
+    q.quant_dc = fToBits<float, int32_t>(quantizer.read());
+    q.global_scale = fToBits<float, int32_t>(quantizer.read());
+    q.inv_quant_dc = quantizer.read();
+    q.inv_global_scale = quantizer.read();
+    q.global_scale_float = quantizer.read();
+
+    float mul_x = dequantDCx * q.inv_quant_dc;
+    float mul_y = dequantDCy * q.inv_quant_dc;
+    float mul_b = dequantDCb * q.inv_quant_dc;
+
+    float inv_mul_x = invDequantDCx * q.quant_dc * q.global_scale_float;
+    float inv_mul_y = invDequantDCy * q.quant_dc * q.global_scale_float;
+    float inv_mul_b = invDequantDCb * q.quant_dc * q.global_scale_float;
+
+#ifdef DEBUG_FRAMECACHE
+    std::cout << "ytox=" << ytox << std::endl;
+    std::cout << "ytob=" << ytob << std::endl;
+
+    std::cout << "mul_x=" << mul_x << std::endl;
+    std::cout << "mul_y=" << mul_y << std::endl;
+    std::cout << "mul_b=" << mul_b << std::endl;
+
+    std::cout << "inv_mul_x=" << inv_mul_x << std::endl;
+    std::cout << "inv_mul_y=" << inv_mul_y << std::endl;
+    std::cout << "inv_mul_b=" << inv_mul_b << std::endl;
+#endif
+
+LOOP_Y:
+    for (ap_uint<16> iy = 0; iy < xblock32; ++iy) {
+    LOOP_X:
+        for (ap_uint<16> ix = 0; ix < yblock32; ++ix) {
+        LOOP_QUANT_BY:
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS PIPELINE // II=3 is ok
+
+                    float in_x = in[0].read();
+                    float in_y = in[1].read();
+                    float in_b = in[2].read();
+
+                    // QuantizeRoundtripDC
+                    float coeff_dc_y = hls::round(in_y * inv_mul_y);
+                    float dc_dec_y = coeff_dc_y * mul_y;
+
+                    // ApplyColorCorrelationDC false
+                    float out_x_f = in_x - ytox * dc_dec_y;
+                    float out_b_f = in_b - ytob * dc_dec_y;
+
+                    // QuantizeCoeffsDC
+                    float coeff_dc_x = hls::round(out_x_f * inv_mul_x);
+                    float coeff_dc_b = hls::round(out_b_f * inv_mul_b);
+
+                    // Dequant
+                    float dc_dec_x = coeff_dc_x * mul_x;
+                    float dc_dec_b = coeff_dc_b * mul_b;
+
+                    // ApplyColorCorrelationDC true
+                    float out_x_t = dc_dec_x + ytox * dc_dec_y;
+                    float out_b_t = dc_dec_b + ytob * dc_dec_y;
+
+                    dc[0].write((int16_t)coeff_dc_x);
+                    dc[1].write((int16_t)coeff_dc_y);
+                    dc[2].write((int16_t)coeff_dc_b);
+
+                    dc_dec[0].write(out_x_t);
+                    dc_dec[1].write(dc_dec_y);
+                    dc_dec[2].write(out_b_t);
+
+#ifdef DEBUG_FRAMECACHE
+                    std::cout << "dc_in: y=" << y << " x=" << x << " X=" << in_x << " Y=" << in_y << " B=" << in_b
+                              << std::endl;
+                    std::cout << "quantizeRound: y=" << y << " x=" << x << " X=" << in_x << " Y=" << dc_dec_y
+                              << " B=" << in_b << std::endl;
+                    std::cout << "cor_false: y=" << y << " x=" << x << " X=" << out_x_f << " Y=" << dc_dec_y
+                              << " B=" << out_b_f << std::endl;
+                    std::cout << "dc: y=" << y << " x=" << x << " X=" << coeff_dc_x << " Y=" << coeff_dc_y
+                              << " B=" << coeff_dc_b << std::endl;
+                    std::cout << "cor_true: y=" << y << " x=" << x << " X=" << out_x_t << " Y=" << dc_dec_y
+                              << " B=" << out_b_t << std::endl;
+#endif
+                }
+            }
+        }
+    }
+}
+
+void AdaptiveDCReconstruction(ap_uint<16> xblock32,
+                              ap_uint<16> yblock32,
+                              hls::stream<float>& quantizer,
+                              hls::stream<int16_t> dc_quantized[3],
+                              hls::stream<float> dc_dec_tmp[3],
+
+                              hls::stream<int16_t> dc[3],
+                              hls::stream<float> dc_dec[3]) {
+#pragma HLS INLINE off
+
+    Quantizer q;
+    q.quant_dc = fToBits<float, int32_t>(quantizer.read());
+    q.global_scale = fToBits<float, int32_t>(quantizer.read());
+    q.inv_quant_dc = quantizer.read();
+    q.inv_global_scale = quantizer.read();
+    q.global_scale_float = quantizer.read();
+
+    float half_step[3] = {q.inv_quant_dc * dequantDCx * 0.5f, q.inv_quant_dc * dequantDCy * 0.5f,
+                          q.inv_quant_dc * dequantDCb * 0.5f};
+
+#ifdef DEBUG_FRAMECACHE
+    std::cout << "k2_AdaptiveDCReconstruction:";
+#endif
+
+LOOP_Y:
+    for (ap_uint<16> iy = 0; iy < xblock32; ++iy) {
+    LOOP_X:
+        for (ap_uint<16> ix = 0; ix < yblock32; ++ix) {
+        LOOP_ADAPTIVE:
+            for (ap_uint<8> c = 0; c < 3; c++) {
+#pragma HLS UNROLL
+            Y:
+                for (ap_uint<8> iy = 0; iy < 4; iy++) {
+                X:
+                    for (ap_uint<8> ix = 0; ix < 4; ix++) {
+#pragma HLS PIPELINE II = 1
+
+                        ap_uint<4> addr;
+                        addr(3, 2) = iy(1, 0);
+                        addr(1, 0) = ix(1, 0);
+
+                        float tmp = dc_dec_tmp[c].read();
+                        float out = hls::max(tmp - half_step[c], hls::min(tmp, tmp + half_step[c]));
+
+                        int16_t dc_tmp = dc_quantized[c].read();
+
+                        dc[c].write(dc_tmp);
+                        dc_dec[c].write(out);
+
+#ifdef DEBUG_FRAMECACHE
+                        std::cout << std::setprecision(16) << out << ",";
+#endif
+                    } // c
+                }     // ix
+            }         // iy
+        }
+    }
+
+#ifdef DEBUG_FRAMECACHE
+    std::cout << std::endl;
+#endif
+}
+
+void LoadAcStrategy4x4(hls::stream<uint8_t>& ac_strategy, uint8_t strategy[3][4][4]) {
+#pragma HLS INLINE off
+
+Load_ac_strategy:
+    for (ap_uint<8> y = 0; y < 4; ++y) {
+        for (ap_uint<8> x = 0; x < 4; ++x) {
+#pragma HLS pipeline II = 1
+
+            uint8_t acs_tmp = ac_strategy.read();
+            for (ap_uint<8> c = 0; c < 3; c++) strategy[c][y][x] = acs_tmp;
+        }
+    }
+}
+
+void IDCTxyb(ap_uint<16> xblock32,
+             ap_uint<16> yblock32,
+             hls::stream<float> dcts[3],
+             hls::stream<uint8_t>& ac_strategy,
+             hls::stream<float> dc_tmp[3]) {
+#pragma HLS INLINE off
+
+IDCT:
+    for (ap_uint<16> iy = 0; iy < xblock32; ++iy) {
+        for (ap_uint<16> ix = 0; ix < yblock32; ++ix) {
+#pragma HLS DATAFLOW
+
+            uint8_t strategy[3][4][4];
+#pragma HLS ARRAY_PARTITION variable = strategy complete
+            float temp0[3][16];
+#pragma HLS ARRAY_PARTITION variable = temp0 complete
+            float temp1[3][16];
+#pragma HLS ARRAY_PARTITION variable = temp1 complete
+            float temp2[3][16];
+#pragma HLS ARRAY_PARTITION variable = temp2 complete
+            float temp3[3][16];
+#pragma HLS ARRAY_PARTITION variable = temp3 complete
+            float temp4[3][16];
+#pragma HLS ARRAY_PARTITION variable = temp4 complete
+            float temp5[3][16];
+#pragma HLS ARRAY_PARTITION variable = temp5 complete
+
+            LoadAcStrategy4x4(ac_strategy, strategy);
+
+        loopLoadIDCT:
+            for (ap_uint<8> c = 0; c < 3; ++c) {
+#pragma HLS UNROLL
+                loadIDCTs(dcts[c], temp0[c], temp1[c], temp2[c]);
+            }
+
+        loopIDCT:
+            for (ap_uint<8> c = 0; c < 3; ++c) {
+#pragma HLS UNROLL
+                IDCT(temp0[c], temp1[c], temp2[c], temp3[c], temp4[c], temp5[c]);
+            }
+
+        loopFeedIDCT:
+            for (ap_uint<8> c = 0; c < 3; ++c) {
+#pragma HLS UNROLL
+                feedIDCTs(strategy[c], temp3[c], temp4[c], temp5[c], dc_tmp[c]);
+            }
+        }
+    }
+}
+
+void InitializeFrameEncCache(Config config,
+                             hls::stream<float>& quantizer0,
+                             hls::stream<float>& quantizer1,
+                             hls::stream<float> dcts[3],
+                             hls::stream<uint8_t>& ac_strategy,
+                             float ytob,
+                             float ytox,
+                             hls::stream<int16_t> dc[3],
+                             hls::stream<float> dc_dec[3]) {
+#pragma HLS INLINE off
+#pragma HLS dataflow
+
+    int xblock32 = config.xblock32;
+    int yblock32 = config.yblock32;
+
+    hls::stream<float> dc_tmp[3];
+#pragma HLS stream variable = dc_tmp depth = 32
+#pragma HLS BIND_STORAGE variable = dc_tmp type = fifo impl = srl
+    hls::stream<int16_t> dc_quantized[3];
+#pragma HLS stream variable = dc_quantized depth = 32
+#pragma HLS BIND_STORAGE variable = dc_quantized type = fifo impl = srl
+    hls::stream<float> dc_dec_tmp[3];
+#pragma HLS stream variable = dc_dec_tmp depth = 32
+#pragma HLS BIND_STORAGE variable = dc_dec_tmp type = fifo impl = srl
+
+    IDCTxyb(xblock32, yblock32, dcts, ac_strategy, dc_tmp);
+
+    QuantizeDC(xblock32, yblock32, quantizer0, dc_tmp, ytox, ytob, dc_quantized, dc_dec_tmp);
+
+    AdaptiveDCReconstruction(xblock32, yblock32, quantizer1, dc_quantized, dc_dec_tmp, dc, dc_dec);
+}
+
+void load_config(ap_uint<32> in[32], Config config[8]) {
+/*
+
+  uint32_t xsize;
+  uint32_t ysize;
+  uint32_t xblock8;
+  uint32_t yblock8;
+  uint32_t xblock32;
+  uint32_t yblock32;
+  uint32_t xgroup;
+  uint32_t ygroup;
+
+  int src_num;
+  int in_quant_field_num;
+  int cmap_num0;
+  int cmap_num1;
+  int ac_num;
+  int dc_num;
+  int acs_num;
+  int out_quant_field_num;
+
+  bool kChooseAcStrategy;
+  float discretization_factor;
+  float kMulInhomogeneity16x16;
+  float kMulInhomogeneity32x32;
+  float butteraugli_target;
+  float intensity_multiplier;
+
+ */
+
+#ifdef DEBUG
+    std::cout << "Config:" << std::endl;
+    std::cout << "xsize:" << in[0] << std::endl;
+    std::cout << "ysize:" << in[1] << std::endl;
+    std::cout << "xblock8:" << in[2] << std::endl;
+    std::cout << "yblock8:" << in[3] << std::endl;
+    std::cout << "xblock32:" << in[4] << std::endl;
+    std::cout << "yblock32:" << in[5] << std::endl;
+    std::cout << "xgroup:" << in[6] << std::endl;
+    std::cout << "ygroup:" << in[7] << std::endl;
+    std::cout << "src_num:" << in[8] << std::endl;
+    std::cout << "in_quant_field_num:" << in[9] << std::endl;
+    std::cout << "cmap_num0:" << in[10] << std::endl;
+    std::cout << "cmap_num1:" << in[11] << std::endl;
+    std::cout << "ac_num:" << in[12] << std::endl;
+    std::cout << "dc_num:" << in[13] << std::endl;
+    std::cout << "acs_num:" << in[14] << std::endl;
+    std::cout << "out_quant_field_num:" << in[15] << std::endl;
+    std::cout << "choose_ac_astrategy:" << in[16] << std::endl;
+    std::cout << "discretization_factor:" << bitsToF<uint32_t, float>(in[17]) << std::endl;
+    std::cout << "kmul16:" << bitsToF<uint32_t, float>(in[18]) << std::endl;
+    std::cout << "kmul32:" << bitsToF<uint32_t, float>(in[19]) << std::endl;
+    std::cout << "butteraugli:" << bitsToF<uint32_t, float>(in[20]) << std::endl;
+    std::cout << "intensity_mul:" << bitsToF<uint32_t, float>(in[21]) << std::endl;
+    std::cout << "quant_dc:" << bitsToF<uint32_t, float>(in[22]) << std::endl;
+#endif
+
+    ap_uint<32> tmp[32];
+    for (ap_uint<8> i = 0; i < 32; i++) {
+#pragma HLS pipeline II = 1
+        tmp[i] = in[i];
+    }
+
+    for (ap_uint<8> i = 0; i < 8; i++) {
+        config[i].xsize = tmp[0];
+        config[i].ysize = tmp[1];
+        config[i].xblock8 = tmp[2];
+        config[i].yblock8 = tmp[3];
+        config[i].xblock32 = tmp[4];
+        config[i].yblock32 = tmp[5];
+        config[i].xgroup = tmp[6];
+        config[i].ygroup = tmp[7];
+
+        // config[i].src_num = tmp[8];
+        config[i].in_quant_field_num = tmp[9];
+        config[i].cmap_num0 = tmp[10];
+        config[i].cmap_num1 = tmp[11];
+        config[i].ac_num = tmp[12];
+        config[i].dc_num = tmp[13];
+        config[i].acs_num = tmp[14];
+        config[i].out_quant_field_num = tmp[15];
+
+        config[i].kChooseAcStrategy = tmp[16];
+        config[i].discretization_factor = bitsToF<uint32_t, float>(tmp[17]);
+        config[i].kMulInhomogeneity16x16 = bitsToF<uint32_t, float>(tmp[18]);
+        config[i].kMulInhomogeneity32x32 = bitsToF<uint32_t, float>(tmp[19]);
+        config[i].butteraugli_target = bitsToF<uint32_t, float>(tmp[20]);
+        config[i].intensity_multiplier = bitsToF<uint32_t, float>(tmp[21]);
+        config[i].quant_dc = bitsToF<uint32_t, float>(tmp[22]);
+
+        config[i].src_num[0] = tmp[25];
+        config[i].src_num[1] = tmp[26];
+        config[i].src_num[2] = tmp[27];
+        config[i].src_offset[0] = tmp[28];
+        config[i].src_offset[1] = tmp[29];
+        config[i].src_offset[2] = tmp[30];
+    }
+};
+
+void streamDup(hls::stream<ap_uint<32> >& istrm,
+               hls::stream<bool>& e_istrm,
+               hls::stream<float>& ostrm0,
+               hls::stream<float>& ostrm1) {
+#pragma HLS INLINE off
+
+    bool end = e_istrm.read();
+    while (!end) {
+        float in = bitsToF<uint32_t, float>(istrm.read());
+        end = e_istrm.read();
+
+        ostrm0.write(in);
+        ostrm1.write(in);
+    }
+}
+
+void eliminate_strm_end(hls::stream<bool>& strm_end) {
+#pragma HLS INLINE off
+
+    bool end = strm_end.read();
+    while (!end) {
+        end = strm_end.read();
+    }
+}
+
+void loadCMap(hls::stream<ap_uint<32> >& cmap_strm,
+              ap_uint<32> cmap_num,
+              float& YtoB,
+              float& YtoX,
+              float ytob_map[16384],
+              float ytox_map[16384]) {
+#pragma HLS INLINE off
+
+    const int32_t kColorFactorX = 256;
+    const int32_t kColorOffsetX = 128;
+    const float kColorScaleX = 1.0f / kColorFactorX;
+
+    const int32_t kColorFactorB = 128;
+    const int32_t kColorOffsetB = 0;
+    const float kColorScaleB = 1.0f / kColorFactorB;
+
+    int32_t ytox = cmap_strm.read();
+    int32_t ytob = cmap_strm.read();
+
+    YtoX = 1.0 * (ytox - kColorOffsetX) * kColorScaleX;
+    YtoB = 1.0 * (ytob - kColorOffsetB) * kColorScaleB;
+
+#ifdef DEBUG
+    std::cout << "x=" << ytox << " ytox=" << YtoX << std::endl;
+    std::cout << "b=" << ytob << " ytob=" << YtoB << std::endl;
+#endif
+
+    for (int i = 0; i < cmap_num; i++) {
+#pragma HLS pipeline
+
+        int x = cmap_strm.read();
+        int b = cmap_strm.read();
+
+        ytox_map[i] = 1.0 * (x - kColorOffsetX) * kColorScaleX;
+        ytob_map[i] = 1.0 * (b - kColorOffsetB) * kColorScaleB;
+
+#ifdef DEBUG
+        std::cout << "id=" << i << " x=" << x << " ytox=" << ytox_map[i] << std::endl;
+        std::cout << "id=" << i << " b=" << b << " ytob=" << ytob_map[i] << std::endl;
+#endif
+    }
+}
+
+template <int _BurstLen, int _WData>
+void DcToAxi(ap_uint<32> xblock8,
+             ap_uint<32> yblock8,
+             ap_uint<_WData>* dc,
+             hls::stream<ap_uint<_WData> >& dc_quantized) {
+#pragma HLS INLINE off
+
+    for (int y = 0; y < yblock8; y++) {
+        for (int c = 0; c < 3; c++) {
+            ap_uint<32> addr = c * yblock8 * xblock8 + y * xblock8;
+            for (int x = 0; x < xblock8; x++) {
+#pragma HLS PIPELINE II = 1
+                dc[addr] = dc_quantized.read();
+                addr++;
+            }
+        }
+    }
+}
+
+template <int _BurstLen, int _WData>
+void AcToAxi(ap_uint<32> xblock8,
+             ap_uint<32> yblock8,
+             ap_uint<_WData>* ac,
+             hls::stream<ap_uint<_WData> >& ac_quantized) {
+#pragma HLS INLINE off
+
+    ap_uint<32> num_tile_y = yblock8(2, 0) == 0 ? (ap_uint<32>)yblock8(31, 3) : (ap_uint<32>)(yblock8(31, 3) + 1);
+    ap_uint<32> num_tile_x = xblock8(2, 0) == 0 ? (ap_uint<32>)xblock8(31, 3) : (ap_uint<32>)(xblock8(31, 3) + 1);
+
+    ap_uint<32> num_group_y = yblock8(5, 0) == 0 ? (ap_uint<32>)yblock8(31, 6) : (ap_uint<32>)(yblock8(31, 6) + 1);
+    ap_uint<32> num_group_x = xblock8(5, 0) == 0 ? (ap_uint<32>)xblock8(31, 6) : (ap_uint<32>)(xblock8(31, 6) + 1);
+
+    for (ap_uint<32> by = 0; by < yblock8; by++) {
+        for (ap_uint<32> bx = 0; bx < xblock8; bx++) {
+            for (int c = 0; c < 3; c++) {
+                ap_uint<32> addr = c * xblock8 * yblock8 * 64 + by(31, 6) * 64 * xblock8 * 64 + bx(31, 6) * 4096 +
+                                   (by(5, 0) * xblock8 + bx(5, 0)) * 64;
+
+#ifdef DEBUG_AXI
+                if (by(31, 6) > 0 || bx(31, 6) > 0) std::cout << "k2_group:";
+#endif
+
+                for (int i = 0; i < 64; i++) {
+#pragma HLS PIPELINE II = 1
+                    ap_uint<_WData> data = ac_quantized.read();
+                    ac[addr] = data;
+                    addr++;
+
+                    if (by(31, 6) > 0 || bx(31, 6) > 0) std::cout << (int)data << ",";
+                }
+
+                if (by(31, 6) > 0 || bx(31, 6) > 0) std::cout << std::endl;
+            }
+        }
+    }
+}
+
+template <int _BurstLen, int _WData>
+void QfToAxi(ap_uint<32> xblock8,
+             ap_uint<32> yblock8,
+             ap_uint<_WData>* qf,
+             hls::stream<ap_uint<_WData> >& qf_quantized,
+             hls::stream<ap_uint<_WData> >& scale_strm) {
+#pragma HLS INLINE off
+
+    ap_uint<32> addr = 0;
+    for (int y = 0; y < yblock8; y++) {
+        for (int x = 0; x < xblock8; x++) {
+#pragma HLS PIPELINE II = 1
+            qf[addr] = qf_quantized.read();
+            addr++;
+        }
+    }
+
+    qf[addr] = scale_strm.read();
+    qf[addr + 1] = scale_strm.read();
+}
+
+template <int _BurstLen, int _WData>
+void AcsToAxi(ap_uint<32> xblock8,
+              ap_uint<32> yblock8,
+              ap_uint<_WData>* acs,
+              hls::stream<ap_uint<_WData> >& acs_strm,
+              ap_uint<_WData>* block,
+              hls::stream<ap_uint<_WData> >& block_strm) {
+#pragma HLS INLINE off
+
+    ap_uint<32> addr = 0;
+    for (int y = 0; y < yblock8; y++) {
+        for (int x = 0; x < xblock8; x++) {
+#pragma HLS PIPELINE II = 1
+            acs[addr] = acs_strm.read();
+            block[addr] = block_strm.read();
+            addr++;
+        }
+    }
+}
+
+template <int _BurstLen, int _WData>
+void OrderToAxi(ap_uint<32> xgroup,
+                ap_uint<32> ygroup,
+                ap_uint<_WData>* order,
+                hls::stream<ap_uint<_WData> >& order_strm) {
+#pragma HLS INLINE off
+
+    ap_uint<32> addr = 0;
+    for (int y = 0; y < ygroup; y++) {
+        for (int x = 0; x < xgroup; x++) {
+            for (int k = 0; k < 3 * 64; k++) {
+#pragma HLS PIPELINE II = 1
+                order[addr] = order_strm.read();
+                addr++;
+            }
+        }
+    }
+}
+
+template <int _BurstLen, int _WData>
+void WriteToAxi(Config config,
+                ap_uint<_WData>* ac,
+                hls::stream<ap_uint<_WData> >& ac_quantized,
+                ap_uint<_WData>* dc,
+                hls::stream<ap_uint<_WData> >& dc_quantized,
+                ap_uint<_WData>* qf,
+                hls::stream<ap_uint<_WData> >& qf_quantized,
+                hls::stream<ap_uint<_WData> >& scale_strm,
+                ap_uint<_WData>* acs,
+                hls::stream<ap_uint<_WData> >& acs_strm,
+                ap_uint<_WData>* block,
+                hls::stream<ap_uint<_WData> >& block_strm,
+                ap_uint<_WData>* order,
+                hls::stream<ap_uint<_WData> >& order_strm) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    DcToAxi<_BurstLen, _WData>(config.xblock8, config.yblock8, dc, dc_quantized);
+
+    AcToAxi<_BurstLen, _WData>(config.xblock8, config.yblock8, ac, ac_quantized);
+
+    QfToAxi<_BurstLen, _WData>(config.xblock8, config.yblock8, qf, qf_quantized, scale_strm);
+
+    AcsToAxi<_BurstLen, _WData>(config.xblock8, config.yblock8, acs, acs_strm, block, block_strm);
+
+    OrderToAxi<_BurstLen, _WData>(config.xgroup, config.ygroup, order, order_strm);
+}
+
+void kernel2Wrapper(ap_uint<AXI_SZ> config[MAX_NUM_CONFIG],
+
+                    ap_uint<2 * AXI_SZ> src[AXI_OUT / 2],
+                    ap_uint<AXI_SZ> quant_field_in[AXI_QF],
+                    ap_uint<AXI_SZ> cmap[AXI_CMAP],
+
+                    ap_uint<AXI_SZ> ac[MAX_NUM_AC],
+                    ap_uint<AXI_SZ> dc[MAX_NUM_DC],
+
+                    ap_uint<AXI_SZ> quant_field_out[MAX_NUM_BLOCK88],
+                    ap_uint<AXI_SZ> ac_strategy[MAX_NUM_BLOCK88],
+                    ap_uint<AXI_SZ> block[MAX_NUM_BLOCK88],
+                    ap_uint<AXI_SZ> order[MAX_NUM_ORDER]) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    Config config_dev[8];
+#pragma HLS ARRAY_PARTITION variable = config_dev complete dim = 1
+#pragma HLS DISAGGREGATE variable = config_dev
+
+#ifdef DEBUG
+    std::cout << "==================Kernel2 Start================" << std::endl;
+    std::cout << "===================Load config=================" << std::endl;
+#endif
+
+    load_config(config, config_dev);
+
+// scan
+#ifdef DEBUG
+    std::cout << "======================Scan=====================" << std::endl;
+#endif
+
+    hls::stream<ap_uint<32> > src_strm[3];
+#pragma HLS stream variable = src_strm depth = 4096
+#pragma HLS BIND_STORAGE variable = src_strm type = fifo impl = uram
+
+    hls::stream<bool> e_src_strm[3];
+#pragma HLS stream variable = e_src_strm depth = 64
+#pragma HLS resource variable = e_src_strm core = FIFO_SRL
+
+    //  xf::common::utils_hw::axiToStream<64, 32, ap_uint<32> >(
+    //      src, config_dev[0].src_num, src_strm, e_src_strm);
+
+    xf::common::utils_hw::axiToMultiStream<1024, 64, ap_uint<32>, ap_uint<32>, ap_uint<32> >(
+        src, src_strm[0], e_src_strm[0], src_strm[1], e_src_strm[1], src_strm[2], e_src_strm[2], config_dev[0].src_num,
+        config_dev[0].src_offset);
+
+    hls::stream<ap_uint<32> > quant_field_strm("quant_field_strm");
+#pragma HLS stream variable = quant_field_strm depth = 512
+#pragma HLS resource variable = quant_field_strm core = FIFO_BRAM
+
+    hls::stream<bool> e_quant_field_strm;
+#pragma HLS stream variable = e_quant_field_strm depth = 64
+#pragma HLS resource variable = e_quant_field_strm core = FIFO_SRL
+
+    xf::common::utils_hw::axiToStream<64, 32, ap_uint<32> >(quant_field_in, config_dev[0].in_quant_field_num,
+                                                            quant_field_strm, e_quant_field_strm);
+
+    hls::stream<ap_uint<32> > cmap_strm("cmap_strm");
+#pragma HLS stream variable = cmap_strm depth = 8
+#pragma HLS resource variable = cmap_strm core = FIFO_SRL
+
+    hls::stream<bool> e_cmap_strm;
+#pragma HLS stream variable = e_cmap_strm depth = 8
+#pragma HLS resource variable = e_cmap_strm core = FIFO_SRL
+
+    xf::common::utils_hw::axiToStream<64, 32, ap_uint<32> >(cmap, config_dev[0].cmap_num0, cmap_strm, e_cmap_strm);
+
+// load
+#ifdef DEBUG
+    std::cout << "===================Load cmap===================" << std::endl;
+#endif
+
+    hls::stream<float> quant_field0("acs_quant_field");
+#pragma HLS stream variable = quant_field0 depth = 1024
+#pragma HLS resource variable = quant_field0 core = FIFO_BRAM
+    hls::stream<float> quant_field1("quantizer_quant_field");
+#pragma HLS stream variable = quant_field1 depth = 4096
+#pragma HLS BIND_STORAGE variable = quant_field1 type = fifo impl = uram
+
+    float ytob;
+    float ytox;
+
+    float ytob_map[16384];
+#pragma HLS BIND_STORAGE variable = ytob_map type = ram_2p impl = uram
+    float ytox_map[16384];
+#pragma HLS BIND_STORAGE variable = ytox_map type = ram_2p impl = uram
+
+    eliminate_strm_end(e_src_strm[0]);
+    eliminate_strm_end(e_src_strm[1]);
+    eliminate_strm_end(e_src_strm[2]);
+    eliminate_strm_end(e_cmap_strm);
+
+    streamDup(quant_field_strm, e_quant_field_strm, quant_field0, quant_field1);
+
+    loadCMap(cmap_strm, config_dev[0].cmap_num1, ytob, ytox, ytob_map, ytox_map);
+
+// Find Best AcStrategy
+#ifdef DEBUG
+    std::cout << "=============Find best ac_strategy=============" << std::endl;
+#endif
+
+    hls::stream<uint8_t> ac_strategy0("acs0");
+#pragma HLS stream variable = ac_strategy0 depth = 1024
+#pragma HLS resource variable = ac_strategy0 core = FIFO_BRAM
+    hls::stream<uint8_t> ac_strategy1("acs1");
+#pragma HLS stream variable = ac_strategy1 depth = 4096
+#pragma HLS BIND_STORAGE variable = ac_strategy1 type = fifo impl = uram
+    hls::stream<uint8_t> ac_strategy2("acs2");
+#pragma HLS stream variable = ac_strategy2 depth = 8192
+#pragma HLS BIND_STORAGE variable = ac_strategy2 type = fifo impl = uram
+    hls::stream<uint8_t> ac_strategy3("acs3");
+#pragma HLS stream variable = ac_strategy3 depth = 8192
+#pragma HLS BIND_STORAGE variable = ac_strategy3 type = fifo impl = uram
+    hls::stream<float> dct[3];
+#pragma HLS stream variable = dct depth = 4096
+#pragma HLS BIND_STORAGE variable = dct type = fifo impl = uram
+    hls::stream<float> ac_dec[3];
+#pragma HLS stream variable = ac_dec depth = 8192
+#pragma HLS BIND_STORAGE variable = ac_dec type = fifo impl = uram
+
+    FindBestAcStrategy(config_dev[1], quant_field0, inv_dequant0_matrix8x8, inv_dequant0_matrix16x16,
+                       inv_dequant0_matrix32x32, src_strm, ac_strategy0, ac_strategy1, ac_strategy2, ac_strategy3, dct,
+                       ac_dec);
+
+// Find Best Quantizer (quantize quant field)
+#ifdef DEBUG
+    std::cout << "==============Find best quantizer==============" << std::endl;
+#endif
+
+    hls::stream<float> quantizer[3];
+#pragma HLS stream variable = quantizer depth = 8
+#pragma HLS ARRAY_PARTITION variable = quantizer complete
+    hls::stream<ap_uint<32> > scale_strm;
+#pragma HLS stream variable = scale_strm depth = 4
+#pragma HLS resource variable = scale_strm core = FIFO_SRL
+    hls::stream<int32_t> quant_img_ac0;
+#pragma HLS stream variable = quant_img_ac0 depth = 4096
+#pragma HLS BIND_STORAGE variable = quant_img_ac0 type = fifo impl = uram
+    hls::stream<int32_t> quant_img_ac1;
+#pragma HLS stream variable = quant_img_ac1 depth = 4096
+#pragma HLS BIND_STORAGE variable = quant_img_ac1 type = fifo impl = uram
+
+    FindBestQuantizer(config_dev[2], quant_field1, ac_strategy0, quantizer, scale_strm, quant_img_ac0, quant_img_ac1);
+
+// Initialize FrameEncCache
+#ifdef DEBUG
+    std::cout << "===========Initialize Frame EncCache===========" << std::endl;
+#endif
+
+    hls::stream<float> dc_dec[3];
+#pragma HLS stream variable = dc_dec depth = 1024
+#pragma HLS resource variable = dc_dec core = FIFO_BRAM
+    hls::stream<int16_t> dc_quantized0[3];
+#pragma HLS stream variable = dc_quantized0 depth = 4096
+#pragma HLS BIND_STORAGE variable = dc_quantized0 type = fifo impl = uram
+
+    InitializeFrameEncCache(config_dev[3], quantizer[0], quantizer[1], dct, ac_strategy1, ytob, ytox, dc_quantized0,
+                            dc_dec);
+
+// Compute Coefficient
+#ifdef DEBUG
+    std::cout << "==============Compute coefficients=============" << std::endl;
+#endif
+
+    hls::stream<int16_t> ac_quantized0[3];
+#pragma HLS stream variable = ac_quantized0 depth = 2048
+#pragma HLS resource variable = ac_quantized0 core = FIFO_BRAM
+
+    ComputeCoefficients(config_dev[4], quantizer[2], dc_dec, ac_dec, ac_strategy2, quant_img_ac0,
+
+                        ytob_map, ytox_map,
+
+                        dequantY_matrix4x4, dequantY_matrix8x8, dequantY_matrix16x16, dequantY_matrix32x32,
+
+                        inv_dequantY_matrix4x4, inv_dequantY_matrix8x8, inv_dequantY_matrix16x16,
+                        inv_dequantY_matrix32x32,
+
+                        dequant1_matrix4x4, dequant1_matrix8x8, dequant1_matrix16x16, dequant1_matrix32x32,
+
+                        inv_dequant_matrix4x4, inv_dequant_matrix8x8, inv_dequant_matrix16x16, inv_dequant_matrix32x32,
+
+                        ac_quantized0);
+
+// Reorder
+#ifdef DEBUG
+    std::cout << "======================Reorder==================" << std::endl;
+#endif
+
+    hls::stream<ap_uint<32> > dc_quantized("dc_quantized");
+#pragma HLS stream variable = dc_quantized depth = 4096
+#pragma HLS resource variable = dc_quantized core = FIFO_BRddAM
+
+    hls::stream<int16_t> ac_quantized1("ac_quantized1");
+#pragma HLS stream variable = ac_quantized1 depth = 4096
+#pragma HLS resource variable = ac_quantized1 core = FIFO_BRAM
+
+    hls::stream<ap_uint<32> > ac_quantized2("ac_quantized");
+#pragma HLS stream variable = ac_quantized2 depth = 4096
+#pragma HLS resource variable = ac_quantized2 core = FIFO_BRAM
+
+    hls::stream<ap_uint<32> > quant_img_ac2("qf");
+#pragma HLS stream variable = quant_img_ac2 depth = 4096
+#pragma HLS resource variable = quant_img_ac2 core = FIFO_BRAM
+
+    hls::stream<ap_uint<32> > ac_strategy4("acs4");
+#pragma HLS stream variable = ac_strategy4 depth = 4096
+#pragma HLS resource variable = ac_strategy4 core = FIFO_BRAM
+
+    hls::stream<ap_uint<32> > ac_block("block");
+#pragma HLS stream variable = ac_block depth = 4096
+#pragma HLS resource variable = ac_block core = FIFO_BRAM
+
+    Reorder(config_dev[5], dc_quantized0, ac_quantized0, ac_strategy3, quant_img_ac1, dc_quantized, ac_quantized1,
+            ac_quantized2, quant_img_ac2, ac_strategy4, ac_block);
+
+// Compute Order
+#ifdef DEBUG
+    std::cout << "================Compute Coeff order============" << std::endl;
+#endif
+
+    hls::stream<ap_uint<32> > order_strm("order");
+#pragma HLS stream variable = order_strm depth = 4096
+#pragma HLS resource variable = order_strm core = FIFO_BRAM
+
+    ComputeCoeffOrder(config_dev[6], ac_quantized1, order_strm);
+
+// write out
+#ifdef DEBUG
+    std::cout << "=====================Write out=================" << std::endl;
+#endif
+
+    WriteToAxi<64, 32>(config_dev[7], ac, ac_quantized2, dc, dc_quantized, quant_field_out, quant_img_ac2, scale_strm,
+                       ac_strategy, ac_strategy4, block, ac_block, order, order_strm);
+
+#ifdef DEBUG
+    std::cout << "===================Kernel2 End=================" << std::endl;
+#endif
+}
+
+namespace xf {
+namespace codec {
+
+extern "C" void pikEncKernel2Top(ap_uint<AXI_SZ> config[MAX_NUM_CONFIG],
+
+                                 ap_uint<2 * AXI_SZ> src[AXI_OUT / 2],
+                                 ap_uint<AXI_SZ> quant_field_in[AXI_QF],
+                                 ap_uint<AXI_SZ> cmap[AXI_CMAP],
+
+                                 ap_uint<AXI_SZ> ac[MAX_NUM_AC],
+                                 ap_uint<AXI_SZ> dc[MAX_NUM_DC],
+
+                                 ap_uint<AXI_SZ> quant_field_out[AXI_QF],
+                                 ap_uint<AXI_SZ> ac_strategy[MAX_NUM_BLOCK88],
+                                 ap_uint<AXI_SZ> block[MAX_NUM_BLOCK88],
+                                 ap_uint<AXI_SZ> order[MAX_NUM_ORDER]) {
+#pragma HLS INLINE off
+
+// clang-format off
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 4 num_write_outstanding = 4 max_write_burst_length = 8 \
+    max_read_burst_length = 8 bundle = gmem0_0 port = config
+#pragma HLS INTERFACE s_axilite port = config bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 8 num_write_outstanding = 8 max_write_burst_length = 32 \
+    max_read_burst_length = 32 bundle = gmem0_1 port = src 
+#pragma HLS INTERFACE s_axilite port = src bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 8 num_write_outstanding = 8 max_write_burst_length = 32 \
+    max_read_burst_length = 32 bundle = gmem0_2 port = quant_field_in
+#pragma HLS INTERFACE s_axilite port = quant_field_in bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 8 num_write_outstanding = 8 max_write_burst_length = 32 \
+    max_read_burst_length = 32 bundle = gmem0_3 port = cmap
+#pragma HLS INTERFACE s_axilite port = cmap bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 8 num_write_outstanding = 8 max_write_burst_length = 32 \
+    max_read_burst_length = 32 bundle = gmem1_0 port = ac
+#pragma HLS INTERFACE s_axilite port = ac bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 8 num_write_outstanding = 8 max_write_burst_length = 32 \
+    max_read_burst_length = 32 bundle = gmem1_1 port = dc
+#pragma HLS INTERFACE s_axilite port = dc bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 8 num_write_outstanding = 8 max_write_burst_length = 32 \
+    max_read_burst_length = 32 bundle = gmem1_2 port = quant_field_out
+#pragma HLS INTERFACE s_axilite port = quant_field_out bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 8 num_write_outstanding = 8 max_write_burst_length = 32 \
+    max_read_burst_length = 32 bundle = gmem1_3 port = ac_strategy 
+#pragma HLS INTERFACE s_axilite port = ac_strategy bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 8 num_write_outstanding = 8 max_write_burst_length = 32 \
+    max_read_burst_length = 32 bundle = gmem1_4 port = block 
+#pragma HLS INTERFACE s_axilite port = block bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64 \
+    num_read_outstanding = 8 num_write_outstanding = 8 max_write_burst_length = 32 \
+    max_read_burst_length = 32 bundle = gmem1_5 port = order
+#pragma HLS INTERFACE s_axilite port = order bundle = control
+
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+    // clang-format on
+
+    kernel2Wrapper(config, src, quant_field_in, cmap, ac, dc, quant_field_out, ac_strategy, block, order);
+}
+} // namespace codec
+} // namespace xf
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/kernel/XAccPIKKernel3.cpp b/codec/L2/demos/pikEnc/kernel/XAccPIKKernel3.cpp
new file mode 100644
index 0000000000..4c0738b124
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/XAccPIKKernel3.cpp
@@ -0,0 +1,1271 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pikEnc/XAccPIKKernel3.hpp"
+#include "xf_utils_hw/axi_to_stream.hpp"
+#include "xf_utils_hw/stream_to_axi.hpp"
+
+// ------------------------------------------------------------
+uint8_t ac_static_context_map[hls_kNumContexts] = {
+    0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,
+    1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,
+    2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,
+    0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,
+    7,  7,  7,  7,  4,  4,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  7,  7,  7,  7,  4,  4,  5,  5,  5,  5,  5,  6,
+    6,  6,  6,  6,  8,  8,  8,  8,  8,  8,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  9,  9,  9,  9,  8,  8,  5,  5,
+    5,  5,  5,  6,  6,  6,  6,  9,  9,  9,  9,  8,  8,  5,  5,  5,  5,  5,  6,  6,  6,  9,  9,  9,  9,  8,  8,  5,
+    5,  5,  5,  5,  3,  10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 14, 14, 11, 11, 12,
+    12, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 14, 14, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 15, 15, 15,
+    15, 15, 15, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 16, 16, 16, 16, 15, 15, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+    16, 16, 16, 16, 15, 15, 12, 12, 12, 12, 12, 13, 13, 13, 16, 16, 16, 16, 15, 15, 12, 12, 12, 12, 12, 10, 17, 17,
+    18, 18, 18, 18, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 21, 21, 21, 21, 18, 18, 19, 19, 19, 19, 19, 20, 20, 20,
+    20, 20, 21, 21, 21, 21, 18, 18, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 22, 22, 22, 22, 22, 22, 19, 19, 19, 19,
+    19, 20, 20, 20, 20, 20, 23, 23, 23, 23, 22, 22, 19, 19, 19, 19, 19, 20, 20, 20, 20, 23, 23, 23, 23, 22, 22, 19,
+    19, 19, 19, 19, 20, 20, 20, 23, 23, 23, 23, 22, 22, 19, 19, 19, 19, 19, 17,
+};
+//----------------------------------------------------------
+void buffer_tokens_onboard(
+    // const int total_token,
+    hls::stream<hls_Token_symb>& strm_token_symb,
+    hls::stream<hls_Token_bits>& strm_token_bits,
+    hls::stream<bool>& strm_e_token,
+
+    int& total_token,
+    ap_uint<72> ram_symb[hls_kMaxBufSize], // hls_Token_symb*3
+    ap_uint<72> ram_bits[hls_kMaxBufSize]  // hls_Token_bits*3
+    ) {
+#pragma HLS INLINE OFF
+
+    // for(int i=0; i<total_token; i++){
+    ap_uint<2> cnt3 = 0;
+    ap_uint<72> buffer_symb;
+    ap_uint<72> buffer_bits;
+
+    // total_token = 0;
+    int addr = 0;
+    bool e = strm_e_token.read();
+    while (!e) {
+#pragma HLS PIPELINE II = 1
+        e = strm_e_token.read();
+
+        hls_Token_symb token_symb = strm_token_symb.read();
+        hls_Token_bits token_bits = strm_token_bits.read();
+
+        ap_uint<16> context = token_symb.context;
+        ap_uint<8> symbol = token_symb.symbol;
+        ap_uint<16> bits = token_bits.bits;
+        ap_uint<8> nbits = token_bits.nbits;
+
+        buffer_symb(24 * (cnt3 + 1) - 1, 24 * cnt3) = (context, symbol);
+        buffer_bits(24 * (cnt3 + 1) - 1, 24 * cnt3) = (nbits, bits);
+
+        // write to ram_token
+        if (cnt3 == 2) {
+            ram_symb[addr] = buffer_symb;
+            ram_bits[addr] = buffer_bits;
+            addr++;
+            cnt3 = 0;
+        } else {
+            cnt3++;
+        }
+    }
+    if (cnt3 > 0) {
+        ram_symb[addr] = buffer_symb;
+        ram_bits[addr] = buffer_bits;
+    }
+
+    total_token = addr * 3 + cnt3;
+}
+
+//----------------------------------------------------------
+void read_token_symb(const int total_token,
+                     ap_uint<72> ram_symb[hls_kMaxBufSize], // hls_Token_symb*3
+
+                     hls::stream<hls_Token_symb>& strm_ac_token_reverse) {
+#pragma HLS INLINE OFF
+    for (int i = 0; i < total_token; i += hls_kANSBufferSize) {
+        int left = total_token - i;
+        int end = hls_kANSBufferSize <= left ? (i + hls_kANSBufferSize) : total_token;
+
+        int addr_reverse = (end) / 3;
+
+        ap_uint<72> buffer_symb;
+
+        int tmp = end - addr_reverse * 3; // 1,2,0
+        ap_uint<2> cnt_r;
+        if (!tmp) {
+            cnt_r = 2;
+            buffer_symb = ram_symb[addr_reverse - 1];
+            addr_reverse -= 2;
+        } else {
+            cnt_r = tmp - 1;
+            buffer_symb = ram_symb[addr_reverse];
+            addr_reverse--;
+        }
+
+        _XF_IMAGE_PRINT("---cnt_r = %d, addr_reverse =%d, start=%d, end=%d\n", (int)cnt_r, addr_reverse, i, end);
+
+        for (int j = i; j < end; j++) {
+#pragma HLS PIPELINE II = 1
+            // reverse sequence
+            ap_uint<24> token_symb = buffer_symb(24 * (cnt_r + 1) - 1, 24 * cnt_r);
+            hls_Token_symb ac_token_reverse;
+            ac_token_reverse.context = token_symb(23, 8);
+            ac_token_reverse.symbol = token_symb(7, 0);
+            strm_ac_token_reverse.write(ac_token_reverse);
+
+            if (cnt_r == 0) {
+                if (addr_reverse >= 0) {
+                    buffer_symb = ram_symb[addr_reverse];
+                }
+                addr_reverse--;
+                cnt_r = 2;
+            } else {
+                cnt_r--;
+            }
+        }
+
+        _XF_IMAGE_PRINT("last---cnt_r = %d, addr_reverse =%d, start=%d, end=%d\n", (int)cnt_r, addr_reverse, i, end);
+    }
+}
+
+//----------------------------------------------------------
+void read_token_bits(const int total_token,
+                     ap_uint<72> ram_bits[hls_kMaxBufSize], // hls_Token_bits*3
+
+                     hls::stream<hls_Token_bits>& strm_token_bit) {
+#pragma HLS INLINE OFF
+    for (int i = 0; i < total_token; i += hls_kANSBufferSize) {
+        int left = total_token - i;
+        int end = hls_kANSBufferSize <= left ? (i + hls_kANSBufferSize) : total_token;
+
+        int addr = i / 3;
+        ap_uint<72> buffer_bits;
+
+        int tmp3 = i - addr * 3;
+        ap_uint<2> cnt3 = tmp3;
+        _XF_IMAGE_PRINT("---  cnt3=%d, start=%d, end=%d\n", (int)cnt3, i, end);
+
+        buffer_bits = ram_bits[addr];
+        addr++;
+
+        for (int j = i; j < end; j++) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<24> token_bits = buffer_bits(24 * (cnt3 + 1) - 1, 24 * cnt3);
+            hls_Token_bits token_bit_plain;
+            token_bit_plain.nbits = token_bits(23, 16);
+            token_bit_plain.bits = token_bits(15, 0);
+            strm_token_bit.write(token_bit_plain);
+
+            if (cnt3 == 2) {
+                buffer_bits = ram_bits[addr];
+                addr++;
+                cnt3 = 0;
+            } else {
+                cnt3++;
+            }
+        }
+
+        _XF_IMAGE_PRINT("last---cnt3 = %d,  addr         =%d, start=%d, end=%d\n", (int)cnt3, addr, i, end);
+    }
+}
+
+//----------------------------------------------------------
+void ANS_top(const bool is_dc,
+             uint8_t dc_context_map[MAX_NUM_COLOR],
+             const int total_token,
+             ap_uint<72> ram_symb[hls_kMaxBufSize], // hls_Token_symb*3
+             ap_uint<72> ram_bits[hls_kMaxBufSize], // hls_Token_bits*3
+             hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+
+             // uint32_t& num_extra_bits,
+             int& len_ac,
+             hls::stream<uint16_t>& strm_ac_dc_byte,
+             hls::stream<bool>& strm_ac_dc_e) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    hls::stream<hls_Token_symb> strm_ac_token_reverse;
+#pragma HLS RESOURCE variable = strm_ac_token_reverse core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_ac_token_reverse depth = 32
+
+    hls::stream<hls_Token_bits> strm_token_bit;
+#pragma HLS RESOURCE variable = strm_token_bit core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_token_bit depth = 32
+
+    read_token_symb(total_token, ram_symb, strm_ac_token_reverse);
+
+    read_token_bits(total_token, ram_bits, strm_token_bit);
+
+    hls_WriteTokensTop(total_token, strm_ac_token_reverse, strm_token_bit, hls_codes, ac_static_context_map, is_dc,
+                       dc_context_map, len_ac, strm_ac_dc_byte, strm_ac_dc_e);
+}
+
+// ------------------------------------------------------------
+
+// ------------------------------------------------------------
+
+#define DEBUGCONFIG
+void load_config_kernel3(ap_uint<32> in[MAX_NUM_CONFIG], ConfigKernel3 config[4]) {
+#pragma HLS INLINE OFF
+
+    ap_uint<32> tmp[MAX_NUM_CONFIG];
+    for (int i = 0; i < MAX_NUM_CONFIG; i++) {
+#pragma HLS PIPELINE II = 1
+        tmp[i] = in[i];
+    }
+
+    for (int i = 0; i < 4; i++) {
+#pragma HLS PIPELINE II = 1
+        config[i].xsize = tmp[0];
+        config[i].ysize = tmp[1];
+        config[i].xblock8 = tmp[2];
+        config[i].yblock8 = tmp[3];
+        config[i].xblock32 = tmp[4];
+        config[i].yblock32 = tmp[5];
+        config[i].xblock64 = tmp[6];
+        config[i].yblock64 = tmp[7];
+        config[i].ac_xgroup = tmp[8];
+        config[i].ac_ygroup = tmp[9];
+        config[i].dc_xgroup = tmp[10];
+        config[i].dc_ygroup = tmp[11];
+        config[i].ac_group = tmp[12];
+        config[i].dc_group = tmp[13];
+        config[i].num_dc = tmp[14];
+        config[i].num_ac = tmp[15];
+    }
+
+#ifndef __SYNTHESIS__
+#ifdef DEBUGCONFIG
+    std::cout << "k3 Config:" << std::endl;
+    std::cout << "xsize:" << tmp[0] << std::endl;
+    std::cout << "ysize:" << tmp[1] << std::endl;
+    std::cout << "xblock8:" << tmp[2] << std::endl;
+    std::cout << "yblock8:" << tmp[3] << std::endl;
+    std::cout << "xblock32:" << tmp[4] << std::endl;
+    std::cout << "yblock32:" << tmp[5] << std::endl;
+    std::cout << "xblock64:" << tmp[6] << std::endl;
+    std::cout << "yblock64:" << tmp[7] << std::endl;
+    std::cout << "ac_xgroup:" << tmp[8] << std::endl;
+    std::cout << "ac_ygroup:" << tmp[9] << std::endl;
+    std::cout << "dc_xgroup:" << tmp[10] << std::endl;
+    std::cout << "dc_ygroup:" << tmp[11] << std::endl;
+    std::cout << "ac_group:" << tmp[12] << std::endl;
+    std::cout << "dc_group:" << tmp[13] << std::endl;
+    std::cout << "num_dc:" << tmp[14] << std::endl;
+    std::cout << "num_ac:" << tmp[15] << std::endl;
+#endif
+#endif
+};
+
+template <typename _IStrm, typename _TStrm>
+void streamRetype(hls::stream<_IStrm>& istrm, hls::stream<bool>& strm_e_in, hls::stream<_TStrm>& ostrm) {
+#ifndef __SYNTHESIS__
+    int addr = 0;
+#endif
+
+    bool e = strm_e_in.read();
+    while (!e) {
+#pragma HLS PIPELINE II = 1
+        e = strm_e_in.read();
+        _IStrm in = istrm.read();
+        _TStrm out = (_TStrm)in;
+        ostrm.write(out);
+    }
+}
+
+template <typename _IStrm, typename _TStrm>
+void streamRetype(hls::stream<_IStrm>& istrm,
+                  hls::stream<bool>& strm_e_in,
+                  hls::stream<_TStrm>& ostrm,
+                  hls::stream<bool>& strm_e_o) {
+    bool e = strm_e_in.read();
+    while (!e) {
+#pragma HLS PIPELINE II = 1
+        e = strm_e_in.read();
+        _IStrm in = istrm.read();
+        _TStrm out = (_TStrm)in;
+        ostrm.write(out);
+        strm_e_o.write(false);
+    }
+    strm_e_o.write(true);
+}
+
+template <int _BurstLen, int _WAxi1, typename _TStrm1, typename _RStrm1>
+void axiToStreamRetype(ap_uint<_WAxi1>* rbuf1, const int num1, hls::stream<_RStrm1>& ostrm1) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    hls::stream<_TStrm1> strm_in1("strm_in1");
+#pragma HLS STREAM variable = strm_in1 depth = 32
+    hls::stream<bool> strm_e_in1("strm_e_in1");
+#pragma HLS STREAM variable = strm_e_in1 depth = 32
+
+    xf::common::utils_hw::axiToStream<_BurstLen, _WAxi1, _TStrm1>(rbuf1, num1, strm_in1, strm_e_in1);
+
+    streamRetype<_TStrm1, _RStrm1>(strm_in1, strm_e_in1, ostrm1);
+}
+
+template <int _BurstLen, int _WAxi1, typename _RStrm1, bool _Padd1, int _WAxi2, typename _RStrm2, bool _Padd2>
+void axiToStreamRetype_serial(ap_uint<_WAxi1>* rbuf1,
+                              ap_uint<_WAxi2>* rbuf2,
+                              const int num,
+                              hls::stream<_RStrm1>& ostrm1,
+                              hls::stream<_RStrm2>& ostrm2) {
+#pragma HLS INLINE off
+#pragma HLS FUNCTION_INSTANTIATE variable = ostrm1
+
+    const int loop_num = num / _BurstLen;
+    const int fraction = num % _BurstLen;
+
+    int addr1 = 0;
+    int addr2 = 0;
+base:
+    for (int i = 0; i < loop_num; i++) {
+        for (int j = 0; j < _BurstLen; j++) {
+#pragma HLS PIPELINE II = 1
+
+            _RStrm1 tmp1;
+            if (_Padd1)
+                tmp1 = 0;
+            else
+                tmp1 = rbuf1[addr1];
+            ostrm1.write(tmp1);
+#ifndef __SYNTHESIS__
+            std::cout << "base: addr1=" << addr1 << " strm1=" << (int)tmp1 << std::endl;
+#endif
+            addr1++;
+        }
+
+        for (int j = 0; j < _BurstLen; j++) {
+#pragma HLS PIPELINE II = 1
+
+            _RStrm2 tmp2;
+            if (_Padd2)
+                tmp2 = 0;
+            else
+                tmp2 = rbuf2[addr2];
+#ifndef __SYNTHESIS__
+            std::cout << "base: addr2=" << addr2 << " strm2=" << (int)tmp2 << std::endl;
+#endif
+            ostrm2.write(tmp2);
+            addr2++;
+        }
+    }
+
+fraction1:
+    for (int i = 0; i < fraction; i++) {
+#pragma HLS PIPELINE II = 1
+
+        _RStrm1 tmp1;
+        if (_Padd1)
+            tmp1 = 0;
+        else
+            tmp1 = rbuf1[addr1];
+#ifndef __SYNTHESIS__
+        std::cout << "fraction: addr1=" << addr1 << " strm1=" << (int)tmp1 << std::endl;
+#endif
+        ostrm1.write(tmp1);
+        addr1++;
+    }
+fraction2:
+    for (int i = 0; i < fraction; i++) {
+#pragma HLS PIPELINE II = 1
+
+        _RStrm2 tmp2;
+        if (_Padd2)
+            tmp2 = 0;
+        else
+            tmp2 = rbuf2[addr2];
+#ifndef __SYNTHESIS__
+        std::cout << "fraction: addr2=" << addr2 << " strm2=" << (int)tmp2 << std::endl;
+#endif
+        ostrm2.write(tmp2);
+        addr2++;
+    }
+}
+
+template <int _BurstLen, int _WAxi1, typename _RStrm1, bool _Padd1, int _WAxi2, typename _RStrm2, bool _Padd2>
+void axiToStreamRetype_parallel(ap_uint<_WAxi1>* rbuf1,
+                                ap_uint<_WAxi2>* rbuf2,
+                                const int num,
+                                hls::stream<_RStrm1>& ostrm1,
+                                hls::stream<_RStrm2>& ostrm2) {
+#pragma HLS INLINE off
+
+    const int loop_num = num / _BurstLen;
+    const int fraction = num % _BurstLen;
+
+    int addr1 = 0;
+    int addr2 = 0;
+base:
+    for (int i = 0; i < loop_num; i++) {
+        for (int j = 0; j < _BurstLen; j++) {
+#pragma HLS PIPELINE II = 1
+
+            _RStrm1 tmp1;
+            if (_Padd1)
+                tmp1 = 0;
+            else
+                tmp1 = rbuf1[addr1];
+            ostrm1.write(tmp1);
+#ifndef __SYNTHESIS__
+            std::cout << "base: addr1=" << addr1 << " strm1=" << (int)tmp1 << std::endl;
+#endif
+            addr1++;
+
+            _RStrm2 tmp2;
+            if (_Padd2)
+                tmp2 = 0;
+            else
+                tmp2 = rbuf2[addr2];
+#ifndef __SYNTHESIS__
+            std::cout << "base: addr2=" << addr2 << " strm2=" << (int)tmp2 << std::endl;
+#endif
+            ostrm2.write(tmp2);
+            addr2++;
+        }
+    }
+
+fraction:
+    for (int i = 0; i < fraction; i++) {
+#pragma HLS PIPELINE II = 1
+
+        _RStrm1 tmp1;
+        if (_Padd1)
+            tmp1 = 0;
+        else
+            tmp1 = rbuf1[addr1];
+#ifndef __SYNTHESIS__
+        std::cout << "fraction: addr1=" << addr1 << " strm1=" << (int)tmp1 << std::endl;
+#endif
+        ostrm1.write(tmp1);
+        addr1++;
+
+        _RStrm2 tmp2;
+        if (_Padd2)
+            tmp2 = 0;
+        else
+            tmp2 = rbuf2[addr2];
+#ifndef __SYNTHESIS__
+        std::cout << "fraction: addr2=" << addr2 << " strm2=" << (int)tmp2 << std::endl;
+#endif
+        ostrm2.write(tmp2);
+        addr2++;
+    }
+}
+
+// ------------------------------------------------------------
+template <int _WAxi, int _BurstLen>
+void axiToPikAcStream(const ap_uint<_WAxi>* rbuf,
+                      ConfigKernel3 config_dev,
+
+                      hls::stream<dct_t>& strm_ac) {
+#pragma HLS INLINE OFF
+
+    const int hls_ac_groups = config_dev.ac_group;
+    const int xsize_blocks = config_dev.xblock8;
+    const int ysize_blocks = config_dev.yblock8;
+    const int xsize_tiles = config_dev.xblock64;
+    const int ysize_tiles = config_dev.yblock64;
+    const int element_size = config_dev.num_dc * 64;
+    int cnt = 0;
+
+    for (int gy = 0; gy < config_dev.ac_ygroup; ++gy) {
+        for (int gx = 0; gx < config_dev.ac_xgroup; ++gx) {
+            hls_Rect group_rect;
+            group_rect.x0 = gx * 8;
+            group_rect.y0 = gy * 8;
+            group_rect.xsize = (group_rect.x0 + 8 <= xsize_tiles) ? 8 : (xsize_tiles - group_rect.x0);
+            group_rect.ysize = (group_rect.y0 + 8 <= ysize_tiles) ? 8 : (ysize_tiles - group_rect.y0);
+
+            for (int tby = 0; tby < group_rect.ysize; ++tby) {
+                for (int tbx = 0; tbx < group_rect.xsize; ++tbx) {
+                    hls_Rect tile_rect;
+                    tile_rect.x0 = (group_rect.x0 + tbx) * hls_kTileDimInBlocks;
+                    tile_rect.y0 = (group_rect.y0 + tby) * hls_kTileDimInBlocks;
+
+                    tile_rect.xsize = (tile_rect.x0 + hls_kTileDimInBlocks <= xsize_blocks)
+                                          ? hls_kTileDimInBlocks
+                                          : (xsize_blocks - tile_rect.x0);
+                    tile_rect.ysize = (tile_rect.y0 + hls_kTileDimInBlocks <= ysize_blocks)
+                                          ? hls_kTileDimInBlocks
+                                          : (ysize_blocks - tile_rect.y0);
+
+                    _XF_IMAGE_PRINT("-5 debug the axi(%d,%d... ,%d,%d) - E2B\n", tile_rect.x0, tile_rect.y0,
+                                    tile_rect.xsize, tile_rect.ysize);
+
+                    for (int c = 0; c < MAX_NUM_COLOR; ++c) {
+                        for (int by = 0; by < tile_rect.ysize; ++by) {
+#pragma HLS DATAFLOW
+                            const ap_uint<_WAxi>* vec_ptr = rbuf + element_size * c +
+                                                            xsize_blocks * (by + tby * 8 + gy * 64) * 64 + gx * 4096 +
+                                                            8 * tbx * 64;
+                            ap_uint<_WAxi> row_tile_ram[512]; // 512 = 8*64 for one row of a tile
+
+                            for (int n = 0; n < 512; n++) {
+#pragma HLS loop_tripcount min = 1 max = 1
+#pragma HLS PIPELINE II = 1
+                                row_tile_ram[n] = vec_ptr[n];
+                            }
+
+                            for (int bx = 0; bx < tile_rect.xsize; bx++) { // OUTPUT
+                                for (ap_uint<7> i = 0; i < 64; i++) {
+#pragma HLS loop_tripcount min = 1 max = 1
+#pragma HLS PIPELINE II = 1
+                                    dct_t ac = row_tile_ram[bx * 64 + i];
+                                    //_XF_IMAGE_PRINT("%d,", (int)ac);
+                                    strm_ac.write(ac);
+                                    cnt++;
+                                }
+                                //_XF_IMAGE_PRINT("\n");
+                            }
+                        } // dataflow region
+                    }     // color
+                }         // tile x
+            }             // tile y
+        }                 // group x
+    }                     // group y
+#ifdef DEBUGAXItoPikAcStream
+    std::cout << "all_ac = " << cnt << std::endl;
+#endif
+}
+
+// ------------------------------------------------------------
+
+void axiToPikAcwithOrder(ap_uint<32>* ac_buf,
+                         ap_uint<32>* order_buf,
+                         ConfigKernel3 config_dev,
+
+                         hls::stream<dct_t>& strm_ac,
+                         hls::stream<int>& strm_order) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+    axiToPikAcStream<32, 512>(ac_buf, config_dev, strm_ac); // 512 for 32w input//256 for 16w input
+
+    axiToStreamRetype<128, 32, ap_uint<32>, int>(order_buf, (config_dev.ac_group * 3 * 64), strm_order);
+}
+// ------------------------------------------------------------
+void localOrder(hls::stream<int>& strm_order, int hls_order[hls_kOrderContexts][64]) {
+    // std::cout<<"k3_order:"<<std::endl;
+    for (int i = 0; i < hls_kOrderContexts; i++) {
+        for (int j = 0; j < 64; j++) {
+#pragma HLS PIPELINE II = 1
+            int tmp = strm_order.read();
+            hls_order[i][j] = tmp;
+            // std::cout<<tmp<<",";
+        }
+    }
+    // std::cout<<std::endl;
+}
+
+// ------------------------------------------------------------
+// sequential
+void hls_burst_dc_ac_by_group2(ConfigKernel3 config_dev,
+
+                               ap_uint<32>* ddr_dc,
+                               ap_uint<32>* ddr_ac_strategy,
+                               ap_uint<32>* ddr_block,
+                               ap_uint<32>* ddr_quant_field,
+                               ap_uint<32>* ddr_ac,
+                               ap_uint<32>* ddr_order,
+
+                               hls::stream<dct_t>& strm_dc_y1,
+                               hls::stream<dct_t>& strm_dc_y2,
+                               hls::stream<dct_t>& strm_dc_y3,
+                               hls::stream<dct_t>& strm_dc_x,
+                               hls::stream<dct_t>& strm_dc_b,
+
+                               hls::stream<uint8_t>& strm_strategy,
+                               hls::stream<quant_t>& strm_quant_field,
+                               hls::stream<arsigma_t>& strm_arsigma,
+                               hls::stream<bool>& strm_strategy_block0,
+                               hls::stream<bool>& strm_strategy_block1,
+                               hls::stream<bool>& strm_strategy_block2,
+
+                               hls::stream<dct_t>& strm_ac,
+                               hls::stream<int>& strm_order) {
+#pragma HLS INLINE OFF
+    assert(hls_kDcGroupDimInBlocks % (hls_kGroupDim / 8) == 0);
+
+    ap_uint<32>* ddr_dc_x = ddr_dc;
+    ap_uint<32>* ddr_dc_y = ddr_dc + config_dev.num_dc;
+    ap_uint<32>* ddr_dc_b = ddr_dc + 2 * config_dev.num_dc;
+
+    for (int i = 0; i < 7; i++) {
+        if (i == 0)
+            axiToStreamRetype_serial<64, 32, dct_t, false, 32, dct_t, false>(ddr_dc_x, ddr_dc_y, config_dev.num_dc,
+                                                                             strm_dc_x, strm_dc_y1);
+        else if (i == 1)
+            axiToStreamRetype<64, 32, ap_uint<32>, dct_t>(ddr_dc_y, config_dev.num_dc, strm_dc_y2);
+        else if (i == 2)
+            axiToStreamRetype_serial<64, 32, dct_t, false, 32, dct_t, false>(ddr_dc_b, ddr_dc_y, config_dev.num_dc,
+                                                                             strm_dc_b, strm_dc_y3);
+        else if (i == 3)
+            axiToStreamRetype_parallel<64, 32, uint8_t, false, 32, bool, false>(
+                ddr_ac_strategy, ddr_block, config_dev.num_dc, strm_strategy, strm_strategy_block0);
+        else if (i == 4)
+            axiToStreamRetype_parallel<64, 32, quant_t, false, 32, bool, false>(
+                ddr_quant_field, ddr_block, config_dev.num_dc, strm_quant_field, strm_strategy_block1);
+        else if (i == 5)
+            axiToStreamRetype_parallel<64, 32, arsigma_t, true, 32, bool, false>(nullptr, ddr_block, config_dev.num_dc,
+                                                                                 strm_arsigma, strm_strategy_block2);
+        else
+            axiToPikAcwithOrder(ddr_ac, ddr_order, config_dev, strm_ac, strm_order);
+    }
+}
+
+void hls_tokenize_dc_ac_sequential(ConfigKernel3 config_dev,
+
+                                   // ac
+                                   hls::stream<dct_t>& strm_coef_raster,
+                                   hls::stream<int>& strm_order,
+
+                                   // dc
+                                   hls::stream<dct_t>& strm_dc_y1,
+                                   hls::stream<dct_t>& strm_dc_y2,
+                                   hls::stream<dct_t>& strm_dc_y3,
+                                   hls::stream<dct_t>& strm_dc_x,
+                                   hls::stream<dct_t>& strm_dc_b,
+
+                                   hls::stream<uint8_t>& strm_strategy,
+                                   hls::stream<quant_t>& strm_quant_field,
+                                   hls::stream<arsigma_t>& strm_arsigma,
+                                   hls::stream<bool>& strm_strategy_block0,
+                                   hls::stream<bool>& strm_strategy_block1,
+                                   hls::stream<bool>& strm_strategy_block2,
+
+                                   hls::stream<addr_t>& strm_token_addr,
+                                   hls::stream<hls_Token_symb>& strm_token_symb,
+                                   hls::stream<hls_Token_bits>& strm_token_bits,
+                                   hls::stream<bool>& strm_e_addr,
+                                   hls::stream<bool>& strm_e_token) {
+#pragma HLS INLINE OFF
+
+    const int xsize_blocks = config_dev.xblock8;
+    const int ysize_blocks = config_dev.yblock8;
+    const int xsize_tiles = config_dev.xblock64;
+    const int ysize_tiles = config_dev.yblock64;
+
+    for (ap_uint<8> gy = 0; gy < config_dev.dc_ygroup; ++gy) {
+        for (ap_uint<8> gx = 0; gx < config_dev.dc_xgroup; ++gx) {
+            hls_Rect dc_rect;
+
+            ap_uint<16> block_x0 = gx * 256;
+            ap_uint<16> block_y0 = gy * 256;
+            dc_rect.xsize = (block_x0 + 256 <= xsize_blocks) ? (int)(256) : (int)(xsize_blocks - block_x0);
+            dc_rect.ysize = (block_y0 + 256 <= ysize_blocks) ? (int)(256) : (int)(ysize_blocks - block_y0);
+
+            _XF_IMAGE_PRINT("dc_rect(%d,%d) \n", dc_rect.xsize, dc_rect.ysize);
+
+            //----------------interleaving encode------------------------
+
+            _XF_IMAGE_PRINT("\n************************************\n");
+            _XF_IMAGE_PRINT("-5 Tokenize DC by GROUP - E2B\n");
+            _XF_IMAGE_PRINT("**************************************\n");
+            hls_encode_dc_top(false, dc_rect, strm_dc_y1, strm_dc_y2, strm_dc_y3, strm_dc_x, strm_dc_b, strm_token_addr,
+                              strm_token_symb, strm_token_bits, strm_e_addr, strm_e_token);
+            _XF_IMAGE_PRINT("\n************************************\n");
+            _XF_IMAGE_PRINT("-5 Tokenize ctrl by GROUP - E2B\n");
+            _XF_IMAGE_PRINT("**************************************\n");
+            Xacc_TokenizeCtrlField_top(dc_rect, strm_strategy, strm_quant_field, strm_arsigma, strm_strategy_block0,
+                                       strm_strategy_block1, strm_strategy_block2, strm_token_addr, strm_token_symb,
+                                       strm_token_bits, strm_e_addr, strm_e_token);
+        } // gx
+    }     // gy
+
+    for (ap_uint<8> gy = 0; gy < config_dev.ac_ygroup; ++gy) {
+        for (ap_uint<8> gx = 0; gx < config_dev.ac_xgroup; ++gx) {
+            group_rect ac_rect;
+
+            ap_uint<16> block_x0 = gx * 64;
+            ap_uint<16> block_y0 = gy * 64;
+            ac_rect.xsize_blocks = (block_x0 + 64 <= xsize_blocks) ? (int)64 : (int)(xsize_blocks - block_x0);
+            ac_rect.ysize_blocks = (block_y0 + 64 <= ysize_blocks) ? (int)64 : (int)(ysize_blocks - block_y0);
+
+            ap_uint<16> tile_x0 = gx * 8;
+            ap_uint<16> tile_y0 = gy * 8;
+
+            ac_rect.xsize_tiles = (tile_x0 + 8 <= xsize_tiles) ? (int)8 : (int)(xsize_tiles - tile_x0);
+            ac_rect.ysize_tiles = (tile_y0 + 8 <= ysize_tiles) ? (int)8 : (int)(ysize_tiles - tile_y0);
+
+            _XF_IMAGE_PRINT("ac_rect(%d,%d,%d,%d) \n", ac_rect.xsize_tiles, ac_rect.ysize_tiles, ac_rect.xsize_blocks,
+                            ac_rect.ysize_blocks);
+
+            // 5. Tokenize Coefficients by tiles=8   blocks =ysize_blocks/8=ysize/64
+            _XF_IMAGE_PRINT("\n************************************\n");
+            _XF_IMAGE_PRINT("-5 Tokenize AC by Tiles - E2B\n");
+            _XF_IMAGE_PRINT("**************************************\n");
+            int hls_order[hls_kOrderContexts][64];
+
+            localOrder(strm_order, hls_order);
+            XAcc_TokenizeCoefficients6(hls_order, ac_rect, strm_coef_raster, ac_static_context_map,
+
+                                       strm_token_addr, strm_token_symb, strm_token_bits, strm_e_addr, strm_e_token);
+        } // gx
+    }     // gy
+}
+
+void hls_encode_dc_ac(const int hls_dc_groups,
+                      const int hls_ac_groups,
+
+                      hls::stream<ap_uint<13> >& strm_token_addr,
+                      hls::stream<hls_Token_symb>& strm_token_symb,
+                      hls::stream<hls_Token_bits>& strm_token_bits,
+                      hls::stream<bool>& strm_e_addr,
+                      hls::stream<bool>& strm_e_token,
+
+                      hist_t hls_histograms[hls_NumHistograms],
+                      ap_uint<32> histo_cfg[2 * (2 * MAX_DC_GROUP + MAX_AC_GROUP)],
+                      hls::stream<int>& histo_offset,
+                      hls::stream<uint8_t>& strm_histo_byte,
+                      hls::stream<bool>& strm_histo_e,
+                      hls::stream<int>& ac_dc_offset,
+                      hls::stream<uint16_t>& strm_ac_dc_byte,
+                      hls::stream<bool>& strm_ac_dc_e) {
+#pragma HLS INLINE OFF
+
+    int len_dc_histo[2 * MAX_DC_GROUP] = {0};
+    int len_dc[2 * MAX_DC_GROUP] = {0};
+    int len_ac_histo[MAX_AC_GROUP] = {0};
+    int len_ac[MAX_AC_GROUP] = {0};
+
+    int offset_dc_histo = 0;
+    int offset_dc = 0;
+    int offset_ac_histo = 0;
+    int offset_ac = 0;
+
+#ifndef __SYNTHESIS__
+
+    ap_uint<72>* ram_symb;
+    ap_uint<72>* ram_bits;
+    ram_symb = (ap_uint<72>*)malloc(hls_kMaxBufSize * sizeof(ap_uint<72>));
+    ram_bits = (ap_uint<72>*)malloc(hls_kMaxBufSize * sizeof(ap_uint<72>));
+
+#else
+
+    ap_uint<72> ram_symb[hls_kMaxBufSize];
+#pragma HLS resource variable = ram_symb core = XPM_MEMORY uram
+    ap_uint<72> ram_bits[hls_kMaxBufSize];
+#pragma HLS resource variable = ram_bits core = XPM_MEMORY uram
+
+#endif
+
+    ac_dc_offset.write(0);
+    histo_offset.write(0);
+encode_dc:
+    for (ap_uint<16> group_index = 0; group_index < 2 * hls_dc_groups; ++group_index) {
+        int total_token;
+        // buffer_tokens_onboard and XAcc_EncodeHistogramsFast_top start at the
+        // same time they both pingpang with the ANS_top
+        // then they all ap_done, the ANS_top will ap_start
+        buffer_tokens_onboard(strm_token_symb, strm_token_bits, strm_e_token, total_token, ram_symb, ram_bits);
+
+#ifndef __SYNTHESIS__
+        _XF_IMAGE_PRINT("size_token =%d\n", total_token);
+#endif
+
+        // 6. Build And Encode Histograms
+        hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][MAX_ALPHABET_SIZE];
+        uint8_t dc_context_map[MAX_NUM_COLOR];
+        int pos = 0;
+
+#ifndef __SYNTHESIS__
+        _XF_IMAGE_PRINT("\n************************************\n");
+        _XF_IMAGE_PRINT("-6 BuildAndEncodeHistogramsFast - E2B\n");
+        _XF_IMAGE_PRINT("**************************************\n");
+#endif
+
+        XAcc_EncodeHistogramsFast_top(true, dc_context_map, strm_token_addr, strm_e_addr, hls_codes, hls_histograms,
+                                      pos, len_dc_histo[group_index], strm_histo_byte, strm_histo_e);
+
+        offset_dc_histo += len_dc_histo[group_index];
+
+#ifndef __SYNTHESIS__
+        _XF_IMAGE_PRINT("len_histo = %d", (int)len_dc_histo[group_index]);
+        _XF_IMAGE_PRINT("\n************************************\n");
+        _XF_IMAGE_PRINT("-7 Write AC Tokens - E2B\n");
+        _XF_IMAGE_PRINT("**************************************\n");
+#endif
+
+        ANS_top(true, dc_context_map, total_token, ram_symb, ram_bits, hls_codes, len_dc[group_index], strm_ac_dc_byte,
+                strm_ac_dc_e);
+
+        offset_dc += (len_dc[group_index] + 1) / 2;
+
+        ac_dc_offset.write(offset_dc);
+        histo_offset.write(offset_dc_histo);
+    }
+
+    ac_dc_offset.write(0);
+    histo_offset.write(0);
+encode_ac:
+    for (ap_uint<16> group_index = 0; group_index < hls_ac_groups; ++group_index) {
+        int total_token;
+        // buffer_tokens_onboard and XAcc_EncodeHistogramsFast_top start at the
+        // same time they both pingpang with the ANS_top
+        // then they all ap_done, the ANS_top will ap_start
+        buffer_tokens_onboard(strm_token_symb, strm_token_bits, strm_e_token, total_token, ram_symb, ram_bits);
+
+#ifndef __SYNTHESIS__
+        _XF_IMAGE_PRINT("size_token =%d\n", total_token);
+#endif
+
+        // 6. Build And Encode Histograms
+        hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][MAX_ALPHABET_SIZE];
+        uint8_t ac_context_map[MAX_NUM_COLOR];
+        int pos = 0;
+
+#ifndef __SYNTHESIS__
+        _XF_IMAGE_PRINT("\n************************************\n");
+        _XF_IMAGE_PRINT("-6 BuildAndEncodeHistogramsFast - E2B\n");
+        _XF_IMAGE_PRINT("**************************************\n");
+#endif
+
+        XAcc_EncodeHistogramsFast_top(false, ac_context_map, strm_token_addr, strm_e_addr, hls_codes, hls_histograms,
+                                      pos, len_ac_histo[group_index], strm_histo_byte, strm_histo_e);
+
+        offset_ac_histo += len_ac_histo[group_index];
+
+#ifndef __SYNTHESIS__
+        _XF_IMAGE_PRINT("len_histo = %d", (int)len_ac_histo[group_index]);
+        _XF_IMAGE_PRINT("\n************************************\n");
+        _XF_IMAGE_PRINT("-7 Write AC Tokens - E2B\n");
+        _XF_IMAGE_PRINT("**************************************\n");
+#endif
+
+        ANS_top(false, ac_context_map, total_token, ram_symb, ram_bits, hls_codes, len_ac[group_index], strm_ac_dc_byte,
+                strm_ac_dc_e);
+
+        offset_ac += (len_ac[group_index] + 1) / 2;
+
+        ac_dc_offset.write(offset_ac);
+        histo_offset.write(offset_ac_histo);
+    }
+
+#ifndef __SYNTHESIS__
+    std::cout << "ac_dc_byte_size:" << strm_ac_dc_byte.size() << " " << strm_ac_dc_e.size() << std::endl;
+    free(ram_symb);
+    free(ram_bits);
+#endif
+
+    ap_uint<32> cfg_addr = 0;
+output_cfg0:
+    for (int group_index = 0; group_index < 2 * hls_dc_groups; ++group_index) {
+#pragma HLS PIPELINE II = 1
+        histo_cfg[cfg_addr] = len_dc_histo[group_index];
+        cfg_addr++;
+    }
+
+output_cfg1:
+    for (int group_index = 0; group_index < hls_ac_groups; ++group_index) {
+#pragma HLS PIPELINE II = 1
+        histo_cfg[cfg_addr] = len_ac_histo[group_index];
+        cfg_addr++;
+    }
+
+output_cfg2:
+    for (int group_index = 0; group_index < 2 * hls_dc_groups; ++group_index) {
+#pragma HLS PIPELINE II = 1
+        histo_cfg[cfg_addr] = len_dc[group_index];
+        cfg_addr++;
+    }
+
+output_cfg3:
+    for (int group_index = 0; group_index < hls_ac_groups; ++group_index) {
+#pragma HLS PIPELINE II = 1
+        histo_cfg[cfg_addr] = len_ac[group_index];
+        cfg_addr++;
+    }
+}
+
+template <int _BurstLen, int _WAxi, typename _IStrm, int _WStrm>
+void streamRetypeToAxi(ap_uint<_WAxi>* wbuf, hls::stream<_IStrm>& istrm, hls::stream<bool>& e_istrm) {
+#pragma HLS INLINE OFF
+
+    const int fifo_buf = 2 * _BurstLen;
+
+#pragma HLS DATAFLOW
+
+    hls::stream<ap_uint<_WAxi> > axi_strm;
+    hls::stream<ap_uint<8> > nb_strm;
+#pragma HLS stream variable = nb_strm depth = 2
+#pragma HLS stream variable = axi_strm depth = fifo_buf
+
+    hls::stream<ap_uint<_WStrm> > istrm2;
+#pragma HLS stream variable = istrm2 depth = 32
+    hls::stream<bool> e_istrm2;
+#pragma HLS stream variable = e_istrm2 depth = 32
+
+    streamRetype<_IStrm, ap_uint<_WStrm> >(istrm, e_istrm, istrm2, e_istrm2);
+
+    xf::common::utils_hw::details::countForBurst<_WAxi, _WStrm, _BurstLen>(istrm2, e_istrm2, axi_strm, nb_strm);
+
+    xf::common::utils_hw::details::burstWrite<_WAxi, _WStrm, _BurstLen>(wbuf, axi_strm, nb_strm);
+}
+
+void hls_writeout_sub(hls::stream<uint8_t>& strm_histo_byte,
+                      hls::stream<bool>& strm_histo_e,
+
+                      hls::stream<uint16_t>& strm_dc_ac_byte,
+                      hls::stream<bool>& strm_dc_ac_e,
+
+                      ap_uint<32>* histo_code_out,
+                      ap_uint<32>* code_out) {
+#pragma HLS INLINE OFF
+    for (int i = 0; i < 2; i++) {
+        if (i == 0) {
+#ifdef DEBUG
+            std::cout << "======================write out histo=====================" << std::endl;
+#endif
+            streamRetypeToAxi<64, 32, uint8_t, 32>(histo_code_out, strm_histo_byte, strm_histo_e);
+#ifdef DEBUG
+            std::cout << "=========================write done=======================" << std::endl;
+#endif
+        } else {
+#ifdef DEBUG
+            std::cout << "======================write out DC_AC=====================" << std::endl;
+#endif
+            streamRetypeToAxi<64, 32, uint16_t, 32>(code_out, strm_dc_ac_byte, strm_dc_ac_e);
+#ifdef DEBUG
+            std::cout << "=========================write done=======================" << std::endl;
+#endif
+        }
+    }
+}
+
+void hls_writeout(const int hls_dc_groups,
+                  const int hls_ac_groups,
+                  hls::stream<int>& histo_offset,
+                  hls::stream<uint8_t>& strm_histo_byte,
+                  hls::stream<bool>& strm_histo_e,
+                  hls::stream<int>& dc_ac_offset,
+                  hls::stream<uint16_t>& strm_dc_ac_byte,
+                  hls::stream<bool>& strm_dc_ac_e,
+
+                  ap_uint<32> dc_histo_code_out[2 * MAX_DC_GROUP * MAX_DC_HISTO_SIZE],
+                  ap_uint<32> dc_code_out[2 * MAX_DC_GROUP * MAX_DC_SIZE],
+
+                  ap_uint<32> ac_histo_code_out[MAX_AC_GROUP * MAX_AC_HISTO_SIZE],
+                  ap_uint<32> ac_code_out[MAX_AC_GROUP * MAX_AC_SIZE]) {
+#pragma HLS INLINE OFF
+
+#ifdef DEBUG
+    std::cout << "ac_goutps=" << hls_ac_groups << " dc_groups=" << hls_dc_groups << std::endl;
+#endif
+
+    for (int group_index = 0; group_index < 2 * hls_dc_groups; ++group_index) {
+#ifdef DEBUG
+        std::cout << "======================write out DC=====================" << std::endl;
+#endif
+
+        int offset0 = histo_offset.read();
+        int offset1 = dc_ac_offset.read();
+        hls_writeout_sub(strm_histo_byte, strm_histo_e, strm_dc_ac_byte, strm_dc_ac_e, (dc_histo_code_out + offset0),
+                         (dc_code_out + offset1));
+    }
+
+    // disgard padding info
+    histo_offset.read();
+    dc_ac_offset.read();
+
+    for (int group_index = 0; group_index < hls_ac_groups; ++group_index) {
+#ifdef DEBUG
+        std::cout << "======================write out AC=====================" << std::endl;
+#endif
+
+        int offset0 = histo_offset.read();
+        int offset1 = dc_ac_offset.read();
+        hls_writeout_sub(strm_histo_byte, strm_histo_e, strm_dc_ac_byte, strm_dc_ac_e, (ac_histo_code_out + offset0),
+                         (ac_code_out + offset1));
+    }
+
+    // disgard padding info
+    histo_offset.read();
+    dc_ac_offset.read();
+}
+
+void kernel3Wrapper(ap_uint<32> config[MAX_NUM_CONFIG],
+
+                    ap_uint<32> ddr_dc[MAX_NUM_DC],
+                    ap_uint<32> ddr_ac_strategy[MAX_NUM_BLOCK88],
+                    ap_uint<32> ddr_block[MAX_NUM_BLOCK88],
+                    ap_uint<32> ddr_quant_field[MAX_NUM_BLOCK88],
+                    ap_uint<32> ddr_ac[ALL_PIXEL],
+                    ap_uint<32> ddr_order[MAX_AC_GROUP * hls_kOrderContexts * 64],
+
+                    ap_uint<32> histo_cfg[2 * (2 * MAX_DC_GROUP + MAX_AC_GROUP)],
+                    ap_uint<32> dc_histo_code_out[2 * MAX_DC_GROUP * MAX_DC_HISTO_SIZE],
+                    ap_uint<32> dc_code_out[2 * MAX_DC_GROUP * MAX_DC_SIZE],
+                    ap_uint<32> ac_histo_code_out[MAX_AC_GROUP * MAX_AC_HISTO_SIZE],
+                    ap_uint<32> ac_code_out[MAX_AC_GROUP * MAX_AC_SIZE]) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    _XF_IMAGE_PRINT("\n kernel 3  start! \n");
+
+    ConfigKernel3 config_dev[4];
+#pragma HLS ARRAY_PARTITION variable = config_dev complete dim = 1
+
+#ifdef DEBUG
+    std::cout << "===================Load config=================" << std::endl;
+#endif
+
+    load_config_kernel3(config, config_dev);
+
+#ifdef DEBUG
+    std::cout << "======================Scan=====================" << std::endl;
+#endif
+
+    hls::stream<dct_t> strm_dc_x("strm_dc_x");
+#pragma HLS RESOURCE variable = strm_dc_x core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_dc_x depth = 512
+    hls::stream<dct_t> strm_dc_y1;
+#pragma HLS RESOURCE variable = strm_dc_y1 core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_dc_y1 depth = 512
+    hls::stream<dct_t> strm_dc_y2("strm_dc_y");
+#pragma HLS RESOURCE variable = strm_dc_y2 core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_dc_y2 depth = 512
+    hls::stream<dct_t> strm_dc_y3;
+#pragma HLS RESOURCE variable = strm_dc_y3 core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_dc_y3 depth = 512
+    hls::stream<dct_t> strm_dc_b("strm_dc_b");
+#pragma HLS RESOURCE variable = strm_dc_b core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_dc_b depth = 512
+
+    hls::stream<uint8_t> strm_strategy("strm_strategy");
+#pragma HLS RESOURCE variable = strm_strategy core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_strategy depth = 512
+    hls::stream<quant_t> strm_quant_field("strm_quant");
+#pragma HLS RESOURCE variable = strm_quant_field core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_quant_field depth = 512
+    hls::stream<arsigma_t> strm_arsigma("strm_arsigma");
+#pragma HLS RESOURCE variable = strm_arsigma core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_arsigma depth = 512
+    hls::stream<bool> strm_strategy_block0("strm_strategy_block0");
+#pragma HLS RESOURCE variable = strm_strategy_block0 core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_strategy_block0 depth = 512
+    hls::stream<bool> strm_strategy_block1("strm_strategy_block1");
+#pragma HLS RESOURCE variable = strm_strategy_block1 core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_strategy_block1 depth = 512
+    hls::stream<bool> strm_strategy_block2("strm_strategy_block2");
+#pragma HLS RESOURCE variable = strm_strategy_block2 core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_strategy_block2 depth = 512
+
+    hls::stream<dct_t> strm_coef_raster_syn("strm_ac_raster_syn");
+#pragma HLS RESOURCE variable = strm_coef_raster_syn core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_coef_raster_syn depth = 1024
+    hls::stream<int> strm_order("strm_order");
+#pragma HLS RESOURCE variable = strm_coef_raster_syn core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_order depth = 512
+
+    hls_burst_dc_ac_by_group2(config_dev[0], ddr_dc, ddr_ac_strategy, ddr_block, ddr_quant_field, ddr_ac, ddr_order,
+
+                              strm_dc_y1, strm_dc_y2, strm_dc_y3, strm_dc_x, strm_dc_b, strm_strategy, strm_quant_field,
+                              strm_arsigma, strm_strategy_block0, strm_strategy_block1, strm_strategy_block2,
+                              strm_coef_raster_syn, strm_order);
+
+#ifdef DEBUG
+    std::cout << "======================tokenize=====================" << std::endl;
+#endif
+
+    hls::stream<addr_t> strm_token_addr("strm_token_addr");
+#pragma HLS RESOURCE variable = strm_token_addr core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_token_addr depth = 1024
+    hls::stream<bool> strm_e_addr;
+#pragma HLS RESOURCE variable = strm_e_addr core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_e_addr depth = 1024
+
+    hls::stream<hls_Token_symb> strm_token_symb("strm_token_symb");
+#pragma HLS RESOURCE variable = strm_token_symb core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_token_symb depth = 1024
+    hls::stream<hls_Token_bits> strm_token_bits("strm_token_bits");
+#pragma HLS RESOURCE variable = strm_token_bits core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_token_bits depth = 1024
+    hls::stream<bool> strm_e_token("strm_e_token");
+#pragma HLS RESOURCE variable = strm_e_token core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_e_token depth = 1024
+
+    hls_tokenize_dc_ac_sequential(config_dev[1], strm_coef_raster_syn, strm_order, strm_dc_y1, strm_dc_y2, strm_dc_y3,
+                                  strm_dc_x, strm_dc_b, strm_strategy, strm_quant_field, strm_arsigma,
+                                  strm_strategy_block0, strm_strategy_block1, strm_strategy_block2,
+
+                                  strm_token_addr, strm_token_symb, strm_token_bits, strm_e_addr, strm_e_token);
+
+#ifdef DEBUG
+    std::cout << "======================encode=====================" << std::endl;
+#endif
+
+    hist_t hls_histograms[hls_NumHistograms];
+
+    hls::stream<int> strm_histo_offset("strm_histo_offset");
+#pragma HLS RESOURCE variable = strm_histo_offset core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_histo_offset depth = 32
+    hls::stream<uint8_t> strm_histo_byte("strm_histo_byte");
+#pragma HLS RESOURCE variable = strm_histo_byte core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_histo_byte depth = 32
+    hls::stream<bool> strm_histo_e("strm_histo_e");
+#pragma HLS RESOURCE variable = strm_histo_e core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_histo_e depth = 32
+
+    hls::stream<int> strm_dc_ac_offset("strm_dc_ac_offset");
+#pragma HLS RESOURCE variable = strm_dc_ac_offset core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_dc_ac_offset depth = 32
+    hls::stream<uint16_t> strm_dc_ac_byte("dc_byte");
+#pragma HLS RESOURCE variable = strm_dc_ac_byte core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_dc_ac_byte depth = 32
+    hls::stream<bool> strm_dc_ac_e("dc_e");
+#pragma HLS RESOURCE variable = strm_dc_ac_e core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_dc_ac_e depth = 32
+
+    hls_encode_dc_ac(config_dev[2].dc_group, config_dev[2].ac_group, strm_token_addr, strm_token_symb, strm_token_bits,
+                     strm_e_addr, strm_e_token,
+
+                     hls_histograms, histo_cfg, strm_histo_offset, strm_histo_byte, strm_histo_e, strm_dc_ac_offset,
+                     strm_dc_ac_byte, strm_dc_ac_e);
+
+#ifdef DEBUG
+    std::cout << "======================start write out=====================" << std::endl;
+#endif
+
+    hls_writeout(config_dev[3].dc_group, config_dev[3].ac_group, strm_histo_offset, strm_histo_byte, strm_histo_e,
+                 strm_dc_ac_offset, strm_dc_ac_byte, strm_dc_ac_e, dc_histo_code_out, dc_code_out, ac_histo_code_out,
+                 ac_code_out);
+
+#ifdef DEBUG
+    std::cout << "======================kernel3 done=====================" << std::endl;
+#endif
+}
+
+namespace xf {
+namespace codec {
+extern "C" void pikEncKernel3Top(ap_uint<32>* config,
+
+                                 ap_uint<32>* ddr_ac,
+                                 ap_uint<32>* ddr_dc,
+                                 ap_uint<32>* ddr_quant_field,
+                                 ap_uint<32>* ddr_ac_strategy,
+                                 ap_uint<32>* ddr_block,
+                                 ap_uint<32>* hls_order,
+
+                                 ap_uint<32>* histo_cfg,
+                                 ap_uint<32>* dc_histo_code_out,
+                                 ap_uint<32>* dc_code_out,
+                                 ap_uint<32>* ac_histo_code_out,
+                                 ap_uint<32>* ac_code_out) {
+#pragma HLS INLINE off
+
+    const int max_num_dc = MAX_NUM_DC;
+    const int max_num_block88 = MAX_NUM_BLOCK88;
+    const int max_pixel = ALL_PIXEL;
+    const int max_dc_histo = 2 * MAX_DC_GROUP * MAX_DC_HISTO_SIZE;
+    const int max_dc = 2 * MAX_DC_GROUP * MAX_DC_SIZE;
+    const int max_ac_histo = MAX_AC_GROUP * MAX_AC_HISTO_SIZE;
+    const int max_ac = MAX_AC_GROUP * MAX_AC_SIZE;
+    const int max_order = MAX_AC_GROUP * hls_kOrderContexts * 64;
+
+// clang-format off
+
+// cfg
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 4 max_write_burst_length = 8                   \
+        num_read_outstanding = 4 max_read_burst_length = 8 bundle =            \
+        gmem0_0 port = config depth = 32
+#pragma HLS INTERFACE s_axilite port = config bundle = control
+
+// dc
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 4 max_write_burst_length = 8                   \
+        num_read_outstanding = 8 max_read_burst_length = 64 bundle =           \
+        gmem0_1 port = ddr_dc depth = max_num_dc
+#pragma HLS INTERFACE s_axilite port = ddr_dc bundle = control
+
+// acs
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 4 max_write_burst_length = 8                   \
+        num_read_outstanding = 8 max_read_burst_length = 64 bundle =           \
+        gmem0_2 port = ddr_ac_strategy depth = max_num_block88
+#pragma HLS INTERFACE s_axilite port = ddr_ac_strategy bundle = control
+
+// qf
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 4 max_write_burst_length = 8                   \
+        num_read_outstanding = 8 max_read_burst_length = 64 bundle =           \
+        gmem0_3 port = ddr_quant_field depth = max_num_block88
+#pragma HLS INTERFACE s_axilite port = ddr_quant_field bundle = control
+
+// block
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 4 max_write_burst_length = 8                   \
+        num_read_outstanding = 8 max_read_burst_length = 64 bundle =           \
+        gmem0_4 port = ddr_block depth = max_num_block88
+#pragma HLS INTERFACE s_axilite port = ddr_block bundle = control
+
+// ac
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 4 max_write_burst_length = 8                   \
+        num_read_outstanding = 8 max_read_burst_length = 64 bundle =           \
+        gmem0_5 port = ddr_ac depth = max_pixel
+#pragma HLS INTERFACE s_axilite port = ddr_ac bundle = control
+
+// order
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 4 max_write_burst_length = 8                   \
+        num_read_outstanding = 8 max_read_burst_length = 64 bundle =           \
+        gmem0_6 port = hls_order depth = max_order
+#pragma HLS INTERFACE s_axilite port = hls_order bundle = control
+
+
+// output
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 8 max_write_burst_length = 64                  \
+        num_read_outstanding = 4 max_read_burst_length = 8 bundle =            \
+        gmem1_0 port = dc_histo_code_out depth = max_dc_histo
+#pragma HLS INTERFACE s_axilite port = dc_histo_code_out bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 8 max_write_burst_length = 64                  \
+        num_read_outstanding = 4 max_read_burst_length = 8 bundle =            \
+        gmem1_1 port = dc_code_out depth = max_dc
+#pragma HLS INTERFACE s_axilite port = dc_code_out bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 8 max_write_burst_length = 64                  \
+        num_read_outstanding = 4 max_read_burst_length = 8 bundle =            \
+        gmem1_2 port = ac_histo_code_out depth = max_ac_histo
+#pragma HLS INTERFACE s_axilite port = ac_histo_code_out bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 8 max_write_burst_length = 64                  \
+        num_read_outstanding = 4 max_read_burst_length = 8 bundle =            \
+        gmem1_3 port = ac_code_out depth = max_ac
+#pragma HLS INTERFACE s_axilite port = ac_code_out bundle = control
+
+#pragma HLS INTERFACE m_axi offset = slave latency = 64                        \
+        num_write_outstanding = 4 max_write_burst_length = 8                   \
+        num_read_outstanding = 4 max_read_burst_length = 8 bundle =            \
+        gmem1_4 port = histo_cfg depth = 1024
+#pragma HLS INTERFACE s_axilite port = histo_cfg bundle = control
+
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+    // clang-format on
+
+    kernel3Wrapper(config, ddr_dc, ddr_ac_strategy, ddr_block, ddr_quant_field, ddr_ac, hls_order, histo_cfg,
+                   dc_histo_code_out, dc_code_out, ac_histo_code_out, ac_code_out);
+}
+} // namespace codec
+} // namespace xf
\ No newline at end of file
diff --git a/codec/L2/demos/pikEnc/kernel/kernel3/ac_tokenize.cpp b/codec/L2/demos/pikEnc/kernel/kernel3/ac_tokenize.cpp
new file mode 100755
index 0000000000..b1b2bb9e36
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/kernel3/ac_tokenize.cpp
@@ -0,0 +1,1241 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/ac_tokenize.hpp"
+
+// ------------------------------------------------------------
+// XAcc_PredictFromTopAndLeft(cnt_nz, bx, is_top_row, cnt_nz_left, cnt_nz_abv,
+// predicted_nz);
+void XAcc_PredictFromTopAndLeft_nz(nzeros_t cnt_nz_here,
+                                   uint32_t x,
+                                   bool is_top_row,
+                                   nzeros_t& cnt_nz_left,
+                                   nzeros_t cnt_nz_abv[MAX_NUM_BLOCK88_W_TITLE],
+                                   nzeros_t& predicted_nz) {
+#pragma HLS INLINE
+
+    nzeros_t default_val = 32;
+    nzeros_t predicted_nzeros;
+    // nzeros_t cnt_nz_here = cnt_nz.read();
+
+    // read the regs and update
+    if (x == 0 && is_top_row) {
+        predicted_nzeros = default_val;
+    } else if (x == 0) {
+        predicted_nzeros = cnt_nz_abv[0];
+    } else if (is_top_row) {
+        predicted_nzeros = cnt_nz_left;
+    } else {
+        predicted_nzeros = (cnt_nz_abv[x] + cnt_nz_left + 1) / 2;
+    }
+    cnt_nz_left = cnt_nz_here;
+    cnt_nz_abv[x] = cnt_nz_here;
+    predicted_nz = predicted_nzeros;
+}
+// ------------------------------------------------------------
+
+void XAcc_EncodeVarLenUint(uint16_t value, int& nbits, int& bits) {
+#pragma HLS INLINE
+    if (value == 0) {
+        nbits = 0;
+        bits = 0;
+    } else {
+        // int len = Log2FloorNonZero(value + 1);//because the __builtin_clz input
+        // and return is a int
+        uint8_t len = 31 ^ __builtin_clz(value + 1);
+        nbits = len;
+        bits = (value + 1) & ((1 << len) - 1);
+    }
+}
+
+uint32_t NonZeroContext(uint32_t non_zeros, uint32_t block_ctx) {
+    return hls_kOrderContexts * (non_zeros >> 1) + block_ctx;
+}
+
+// ------------------------------------------------------------
+
+// ------------------------------------------------------------
+// 1 coeffs per cycle in order
+void hls_orderBlk(const hls_blksize rect,
+                  const int32_t orders[3][64],
+                  hls::stream<dct_t>& strm_coef_raster,
+                  hls::stream<dct_t>& strm_coef_orderd // dct_t coef_blk[64]//
+                  ) {
+#pragma HLS INLINE OFF
+
+#ifndef __SYNTHESIS__
+    dct_t tmp_max = 0;
+#endif
+
+    bool ping = true;
+
+    dct_t coef_blk[2][64];
+#pragma HLS ARRAY_PARTITION variable = coef_blk dim = 1
+#pragma HLS RESOURCE variable = coef_blk core = RAM_2P_BRAM
+
+    assert(rect.ysize != 0);
+    assert(rect.xsize != 0);
+    const int nPingPong = rect.ysize * rect.xsize;
+    for (int i = 0; i < BLKDIM; ++i) {     // 8
+        for (int j = 0; j < BLKDIM; ++j) { // 8
+#pragma HLS PIPELINE II = 1
+            dct_t tmp = strm_coef_raster.read();
+            coef_blk[ping][i * BLKDIM + j] = tmp;
+        }
+    }
+    ping = !ping;
+
+HLS_READ_BLK:
+    for (int c = 0; c < 3; ++c) {
+        for (int bx = 0; bx < (c == 2 ? (nPingPong - 1) : nPingPong); ++bx) {
+            for (int i = 0; i < BLKDIM; ++i) {     // 8
+                for (int j = 0; j < BLKDIM; ++j) { // 8
+#pragma HLS PIPELINE II = 1
+                    dct_t tmp = strm_coef_raster.read();
+                    coef_blk[ping][i * BLKDIM + j] = tmp;
+
+                    int zig = orders[c][i * BLKDIM + j]; // no dataflow
+                    strm_coef_orderd.write((zig ? coef_blk[!ping][zig] : 0));
+                }
+            }
+            ping = !ping;
+        }
+    } // end pingpang
+
+    for (int i = 0; i < BLKDIM; ++i) {     // 8
+        for (int j = 0; j < BLKDIM; ++j) { // 8
+#pragma HLS PIPELINE II = 1
+            int zig = orders[2][i * BLKDIM + j];
+            strm_coef_orderd.write((zig ? coef_blk[!ping][zig] : 0));
+        }
+    }
+
+    _XF_IMAGE_PRINT("--dct coeff max=%d - Tokenize\n", tmp_max);
+}
+
+// ------------------------------------------------------------
+// goal: one NZs / 1 clock
+/**
+ * @brief count the NZs of DCT coeff in AC by block8x8 scanning.read the 64
+ * coeff but write out 63 ac
+ *
+ * @param xsize_blocks num blocks per line of image.
+ * @param coef coeffcients of one block line of DCT.
+ * @param cnt_nz counter of non-zeros
+ * XAcc_count_ac_nz(xsize_blocks, strm_coef, cnt_nz, lb_nz_write);
+ */
+void hls_count_ac_nz(hls_blksize rect,
+                     hls::stream<dct_t>& strm_coef,
+
+                     hls::stream<dct_t> strm_o_coef[64],
+                     hls::stream<nzeros_t>& cnt_nz,
+                     hls::stream<nzeros_t>& cnt_nz2) {
+#pragma HLS INLINE OFF
+    // counts
+    nzeros_t reg_nz_cnt = 0;
+
+    //    for (int c = 0; c < 3; ++c) {
+    //    	for (int by = 0; by < rect.ysize; by++) {
+    for (int bx = 0; bx < 3 * rect.xsize * rect.ysize; ++bx) { // while
+        for (int i = 0; i < BLOCK_SIZE; i++) {
+// for (int j = 0; j < BLKDIM; j++) {
+#pragma HLS PIPELINE II = 1
+            dct_t tmp = strm_coef.read();
+            reg_nz_cnt += (tmp != 0);
+            strm_o_coef[i].write(tmp);
+
+            // write
+            if (i == BLOCK_SIZE - 1) {
+                cnt_nz.write(reg_nz_cnt);
+                cnt_nz2.write(reg_nz_cnt);
+                _XF_IMAGE_PRINT("%d,", (int)reg_nz_cnt);
+                reg_nz_cnt = 0;
+            }
+        }
+    } // bx
+}
+
+// ------------------------------------------------------------
+// goal: one NZs / 1 clock
+/**
+ * @brief count the NZs of DCT coeff in AC by block8x8 scanning.read the 64
+ * coeff but write out 63 ac
+ *
+ * @param xsize_blocks num blocks per line of image.
+ * @param coef coeffcients of one block line of DCT.
+ * @param cnt_nz counter of non-zeros
+ * XAcc_count_ac_nz(xsize_blocks, strm_coef, cnt_nz, lb_nz_write);
+ */
+void hls_CountAcNz(hls_blksize rect,
+                   hls::stream<dct_t>& strm_coef,
+
+                   hls::stream<dct_t>& strm_o_coef,
+                   hls::stream<nzeros_t>& cnt_nz,
+                   hls::stream<nzeros_t>& cnt_nz2) {
+#pragma HLS INLINE OFF
+    // counts
+    nzeros_t reg_nz_cnt = 0;
+
+    //    for (int c = 0; c < 3; ++c) {
+    //    	for (int by = 0; by < rect.ysize; by++) {
+    for (int bx = 0; bx < 3 * rect.xsize * rect.ysize; ++bx) { // while
+        for (int i = 0; i < BLOCK_SIZE; i++) {
+// for (int j = 0; j < BLKDIM; j++) {
+#pragma HLS PIPELINE II = 1
+            dct_t tmp = strm_coef.read();
+            reg_nz_cnt += (tmp != 0);
+            strm_o_coef.write(tmp);
+
+            // write
+            if (i == BLOCK_SIZE - 1) {
+                cnt_nz.write(reg_nz_cnt);
+                cnt_nz2.write(reg_nz_cnt);
+                _XF_IMAGE_PRINT("%d,", (int)reg_nz_cnt);
+                reg_nz_cnt = 0;
+            }
+        }
+    } // bx
+}
+
+// ------------------------------------------------------------
+void hls_tokenize_nz(hls_blksize rect,
+                     hls::stream<nzeros_t>& cnt_nz,
+
+                     hls::stream<hls_Token>& strm_nz_token) {
+#pragma HLS INLINE OFF
+    nzeros_t cnt_nz_abv[MAX_NUM_BLOCK88_W_TITLE];
+#pragma HLS ARRAY_PARTITION variable = cnt_nz_abv complete dim = 1
+    nzeros_t cnt_nz_left;
+    nzeros_t predicted_nz;
+
+    hls_Token ac_token;
+    hls_Token out_token;
+    int total_token = 0; // init
+
+    int bx = 0;
+
+    for (int c = 0; c < 3; ++c) {
+        for (int by = 0; by < rect.ysize;) { // while
+#pragma HLS PIPELINE II = 1
+
+            bool is_top_row = (by == 0);
+            // for (int bx = 0; bx < rect.xsize; ++bx) {
+
+            nzeros_t cnt_nz_here = cnt_nz.read();
+            XAcc_PredictFromTopAndLeft_nz(cnt_nz_here, bx, is_top_row, cnt_nz_left, cnt_nz_abv, predicted_nz);
+
+            int32_t predicted_nzeros = predicted_nz;
+
+            ac_token.context = NonZeroContext(predicted_nzeros, c);
+            ac_token.symbol = cnt_nz_here;
+
+            /// write token (context symbol nbits bits)
+            out_token.context = ac_token.context;
+            out_token.symbol = ac_token.symbol;
+            out_token.nbits = 0;
+            out_token.bits = 0;
+
+            strm_nz_token.write(out_token);
+
+#ifndef __SYNTHESIS__
+            total_token++;
+#endif
+            if (bx == rect.xsize - 1) {
+                by++;
+                bx = 0;
+            } else {
+                bx++;
+            }
+
+            //}
+        } // end by
+    }     // end tile
+}
+
+//// ------------------------------------------------------------
+// void hls_orderblk_tokennz(const int32_t orders[3][64], // color dct_band
+//                               const hls_blksize rect,
+//							   //hls::stream<hls_blksize>
+//&rect,
+//                               hls::stream<dct_t> &strm_coef_raster,
+//
+//                               hls::stream<nzeros_t>& cnt_nz,
+//                               hls::stream<hls_Token>& strm_nz_token,
+//                               hls::stream<dct_t> strm_coef_orderd[64]
+//                               // hls::stream< hls_Tokenbit >& strm_token_bit
+//                               ) {
+//#pragma HLS INLINE
+//#pragma HLS DATAFLOW
+//
+//    static hls::stream<dct_t> strm_coef_ord("strm_coef_ord");
+////#pragma HLS RESOURCE variable = strm_coef_ord core = FIFO_LUTRAM
+//#pragma HLS STREAM variable = strm_coef_ord depth = 1024
+//
+//    // for predict
+//    hls::stream<nzeros_t> cnt_nz2("strm_cnt_nz2");
+//#pragma HLS RESOURCE variable = cnt_nz2 core = FIFO_LUTRAM
+//#pragma HLS STREAM variable = cnt_nz2 depth = 32
+//
+//    _XF_IMAGE_PRINT("\n --2 tmp_num_nzeros begin - Tokenize \n");
+//
+//
+//    hls_order_blk(rect, orders, strm_coef_raster, strm_coef_ord);
+//
+//    hls_count_ac_nz(rect, strm_coef_ord, strm_coef_orderd, cnt_nz, cnt_nz2);
+//
+//    hls_tokenize_nz(rect, cnt_nz2,  // ac_static_context_map,
+//                         strm_nz_token); //, strm_coef_orderd
+//}
+
+// ------------------------------------------------------------
+// goal: one token / clock
+/**
+ * @brief order_zig_zag the coeff in AC and encode to the
+ * token(cxt,sym,nbits,bits).
+ * @brief pick out the non-zero coeff, read the  63 ac and encode
+ *
+ * @param xsize_blocks num blocks per line of image.
+ * @param coef coeffcients of one block line of DCT.
+ * @param cnt_nz counter of non-zeros
+ * XAcc_count_ac_nz(xsize_blocks, strm_coef, cnt_nz, lb_nz_write);
+ */
+
+void tokenize_blk_syn_test_org(
+
+    const hls_blksize rect,
+    // hls::stream<hls_blksize> &rect,
+    hls::stream<dct_t> strm_coef_orderd[64],
+    hls::stream<nzeros_t>& cnt_nz,
+
+    hls::stream<nzeros_t>& strm_cnt_ac,
+    hls::stream<hls_Token>& strm_ac_token
+
+    ) {
+#pragma HLS INLINE OFF
+
+    nzeros_t reg_nz_cnt; // reg of last cnt
+    nzeros_t cnt;        // cnt for nz--
+
+    ap_uint<8> kSkipAndBits;
+    hls_Token ac_token;
+
+    nzeros_t len = 0;      // cnt for ac_tokens
+    nzeros_t run = 0;      // cnt for run
+    nzeros_t bpos = 1;     // cnt for pos in the block
+    nzeros_t last_pos = 0; // reg of last pos
+    // total_token = 4096;//init for nz tokens
+
+    dct_t blk_orderd[64];
+#pragma HLS ARRAY_PARTITION variable = blk_orderd complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = hls_kCoeffNumNonzeroContext complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = hls_kCoeffFreqContext complete dim = 1
+    int bx = 0;
+
+PICK_OUT_NZ_LOOP:
+    for (int c = 0; c < 3; ++c) {
+        // 1. init
+        ap_uint<9> histo_offset = 96 + 105 * c; // ZeroDensityContextsOffset(c);
+
+        if (rect.xsize || rect.ysize) {
+            reg_nz_cnt = cnt_nz.read();
+            cnt = reg_nz_cnt;
+
+            for (int j = 0; j < 64; j++) {
+#pragma HLS UNROLL
+                blk_orderd[j] = strm_coef_orderd[j].read();
+            }
+        }
+
+        for (int by = 0; by < rect.ysize;) { //++by// cnt for block// while(bx < rect.xsize){
+#pragma HLS PIPELINE II = 1
+
+            // 2. write out
+            if (cnt > 0) {
+                if (blk_orderd[bpos] != 0) { // find nz and encode nz token
+
+                    int nbits, bits;
+                    //  EncodeVarLenInt((int32_t)blk_orderd[bpos], &nbits,
+                    //  &bits);//(-1,1,0)
+                    XAcc_EncodeVarLenUint(hls_PackSigned((int16_t)blk_orderd[bpos]), nbits, bits);
+
+                    kSkipAndBits(7, 4) = run;
+                    kSkipAndBits(3, 0) = nbits;
+
+                    //                    ac_token.context =
+                    //                        histo_offset +
+                    //                        hls_kCoeffNumNonzeroContext[cnt] +
+                    //                        hls_kCoeffFreqContext[last_pos];
+                    ac_token.symbol = kSkipAndBits;
+                    ac_token.nbits = nbits;
+                    ac_token.bits = bits;
+
+                    len++;
+                    strm_ac_token.write(ac_token);
+
+                    run = 0;
+                    last_pos = bpos;
+                    cnt--;
+
+                } else {             // find 0 and run++
+                    if (run == 15) { // find ff and encode ff token
+                        ap_uint<4> nbits = 0;
+                        ap_uint<4> skip = 15;
+                        kSkipAndBits(7, 4) = skip;
+                        kSkipAndBits(3, 0) = nbits;
+
+                        //                        ac_token.context =
+                        //                            histo_offset +
+                        //                            hls_kCoeffNumNonzeroContext[cnt] +
+                        //                            hls_kCoeffFreqContext[last_pos];
+                        ac_token.symbol = kSkipAndBits;
+                        ac_token.nbits = nbits;
+                        ac_token.bits = 0;
+
+                        len++;
+                        strm_ac_token.write(ac_token);
+
+                        run = 0;
+                        last_pos = bpos;
+                    } else {
+                        run++;
+                    }
+                }
+
+                bpos++;
+            }
+
+            // 3. read new block
+            if (!cnt) {
+                strm_cnt_ac.write(len);
+                len = 0;
+                run = 0;
+                last_pos = 0;
+                bpos = 1;
+
+                if (bx == rect.xsize - 1) {
+                    if (by < rect.ysize - 1) {
+                        reg_nz_cnt = cnt_nz.read();
+                        cnt = reg_nz_cnt;
+
+                        for (int j = 0; j < 64; j++) {
+#pragma HLS UNROLL
+                            blk_orderd[j] = strm_coef_orderd[j].read();
+                        }
+                    }
+
+                    bx = 0;
+                    by++; // break here
+                } else {
+                    reg_nz_cnt = cnt_nz.read();
+                    cnt = reg_nz_cnt;
+
+                    for (int j = 0; j < 64; j++) {
+#pragma HLS UNROLL
+                        blk_orderd[j] = strm_coef_orderd[j].read();
+                    }
+
+                    bx++;
+                }
+            }
+        }
+    }
+} // end tile
+
+void tokenize_blk_syn_test(
+
+    const hls_blksize rect,
+    // hls::stream<hls_blksize> &rect,
+    hls::stream<dct_t> strm_coef_orderd[64],
+    hls::stream<nzeros_t>& cnt_nz,
+
+    hls::stream<nzeros_t>& strm_cnt_ac,
+    hls::stream<ap_uint<9> >& strm_histo_offset,
+    hls::stream<nzeros_t>& strm_cnt_lookup,
+    hls::stream<nzeros_t>& strm_last_pos,
+    // hls::stream<bool>& strm_e,
+    hls::stream<hls_Token>& strm_ac_token
+
+    ) {
+#pragma HLS INLINE OFF
+
+    nzeros_t reg_nz_cnt; // reg of last cnt
+    nzeros_t cnt;        // cnt for nz--
+
+    ap_uint<8> kSkipAndBits;
+    hls_Token ac_token;
+
+    nzeros_t len = 0;      // cnt for ac_tokens
+    nzeros_t run = 0;      // cnt for run
+    nzeros_t bpos = 1;     // cnt for pos in the block
+    nzeros_t last_pos = 0; // reg of last pos
+    // total_token = 4096;//init for nz tokens
+
+    dct_t blk_orderd[64];
+#pragma HLS ARRAY_PARTITION variable = blk_orderd complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = hls_kCoeffNumNonzeroContext complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = hls_kCoeffFreqContext complete dim = 1
+    int bx = 0;
+
+PICK_OUT_NZ_LOOP:
+    for (int c = 0; c < 3; ++c) {
+        // 1. init
+        ap_uint<9> histo_offset = 96 + 105 * c; // ZeroDensityContextsOffset(c);
+
+        if (rect.xsize || rect.ysize) {
+            reg_nz_cnt = cnt_nz.read();
+            cnt = reg_nz_cnt;
+
+            for (int j = 0; j < 64; j++) {
+#pragma HLS UNROLL
+                blk_orderd[j] = strm_coef_orderd[j].read();
+            }
+        }
+
+        for (int by = 0; by < rect.ysize;) { //++by// cnt for block// while(bx < rect.xsize){
+#pragma HLS PIPELINE II = 1
+
+            // 2. write out
+            if (cnt > 0) {
+                if (blk_orderd[bpos] != 0) { // find nz and encode nz token
+
+                    int nbits, bits;
+                    //  EncodeVarLenInt((int32_t)blk_orderd[bpos], &nbits,
+                    //  &bits);//(-1,1,0)
+                    XAcc_EncodeVarLenUint(hls_PackSigned((int16_t)blk_orderd[bpos]), nbits, bits);
+
+                    kSkipAndBits(7, 4) = run;
+                    kSkipAndBits(3, 0) = nbits;
+
+                    //                    ac_token.context =
+                    //                        histo_offset +
+                    //                        hls_kCoeffNumNonzeroContext[cnt] +
+                    //                        hls_kCoeffFreqContext[last_pos];
+                    ac_token.symbol = kSkipAndBits;
+                    ac_token.nbits = nbits;
+                    ac_token.bits = bits;
+
+                    len++;
+                    strm_ac_token.write(ac_token);
+                    strm_histo_offset.write(histo_offset);
+                    strm_cnt_lookup.write(cnt);
+                    strm_last_pos.write(last_pos);
+                    // strm_e.write(false);
+
+                    run = 0;
+                    last_pos = bpos;
+                    cnt--;
+
+                } else {             // find 0 and run++
+                    if (run == 15) { // find ff and encode ff token
+                        ap_uint<4> nbits = 0;
+                        ap_uint<4> skip = 15;
+                        kSkipAndBits(7, 4) = skip;
+                        kSkipAndBits(3, 0) = nbits;
+
+                        //                        ac_token.context =
+                        //                            histo_offset +
+                        //                            hls_kCoeffNumNonzeroContext[cnt] +
+                        //                            hls_kCoeffFreqContext[last_pos];
+                        ac_token.symbol = kSkipAndBits;
+                        ac_token.nbits = nbits;
+                        ac_token.bits = 0;
+
+                        len++;
+                        strm_ac_token.write(ac_token);
+                        strm_histo_offset.write(histo_offset);
+                        strm_cnt_lookup.write(cnt);
+                        strm_last_pos.write(last_pos);
+                        // strm_e.write(false);
+
+                        run = 0;
+                        last_pos = bpos;
+                    } else {
+                        run++;
+                    }
+                }
+
+                bpos++;
+            }
+
+            // 3. read new block
+            if (!cnt) {
+                strm_cnt_ac.write(len);
+                len = 0;
+                run = 0;
+                last_pos = 0;
+                bpos = 1;
+
+                if (bx == rect.xsize - 1) {
+                    if (by < rect.ysize - 1) {
+                        reg_nz_cnt = cnt_nz.read();
+                        cnt = reg_nz_cnt;
+
+                        for (int j = 0; j < 64; j++) {
+#pragma HLS UNROLL
+                            blk_orderd[j] = strm_coef_orderd[j].read();
+                        }
+                    }
+
+                    bx = 0;
+                    by++; // break here
+                } else {
+                    reg_nz_cnt = cnt_nz.read();
+                    cnt = reg_nz_cnt;
+
+                    for (int j = 0; j < 64; j++) {
+#pragma HLS UNROLL
+                        blk_orderd[j] = strm_coef_orderd[j].read();
+                    }
+
+                    bx++;
+                }
+            }
+        }
+        // strm_e.write(true);
+    }
+
+} // end tile
+
+// ------------------------------------------------------------
+void hls_TokenizeBlk(
+
+    const hls_blksize rect,
+    hls::stream<dct_t>& strm_coef_orderd,
+    hls::stream<nzeros_t>& cnt_nz,
+
+    hls::stream<nzeros_t>& strm_cnt_ac,
+    hls::stream<ap_uint<9> >& strm_histo_offset,
+    hls::stream<nzeros_t>& strm_cnt_lookup,
+    hls::stream<nzeros_t>& strm_last_pos,
+    // hls::stream<bool>& strm_e,
+    hls::stream<hls_Token>& strm_ac_token
+
+    ) {
+#pragma HLS INLINE OFF
+
+    nzeros_t reg_nz_cnt; // reg of last cnt
+    nzeros_t cnt;        // cnt for nz--
+
+    ap_uint<8> kSkipAndBits;
+    hls_Token ac_token;
+
+    nzeros_t len = 0;      // cnt for ac_tokens
+    nzeros_t run = 0;      // cnt for run
+    nzeros_t bpos = 0;     // cnt for pos in the block
+    nzeros_t last_pos = 0; // reg of last pos
+                           // total_token = 4096;//init for nz tokens
+
+#pragma HLS ARRAY_PARTITION variable = hls_kCoeffNumNonzeroContext complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = hls_kCoeffFreqContext complete dim = 1
+
+    dct_t ac;
+
+PICK_OUT_NZ_LOOP:
+    for (int c = 0; c < 3; ++c) {
+        for (int bx = 0; bx < rect.xsize * rect.ysize * BLOCK_SIZE; bx++) { // while
+#pragma HLS PIPELINE II = 1
+
+            if (bpos == 0) { // pos == 0 no token
+                cnt = cnt_nz.read();
+                dct_t tmp = strm_coef_orderd.read();
+            } else {
+                ac = strm_coef_orderd.read();
+                ap_uint<9> histo_offset = 96 + 105 * c; // ZeroDensityContextsOffset(c);
+
+                if (cnt > 0) {
+                    if (ac != 0) { // find nz and encode nz token
+
+                        int nbits, bits;
+                        //  EncodeVarLenInt((int32_t)blk_orderd[bpos], &nbits,
+                        //  &bits);//(-1,1,0)
+                        XAcc_EncodeVarLenUint(hls_PackSigned((int16_t)ac), nbits, bits);
+
+                        kSkipAndBits(7, 4) = run;
+                        kSkipAndBits(3, 0) = nbits;
+
+                        //                    ac_token.context =
+                        //                        histo_offset +
+                        //                        hls_kCoeffNumNonzeroContext[cnt] +
+                        //                        hls_kCoeffFreqContext[last_pos];
+                        ac_token.symbol = kSkipAndBits;
+                        ac_token.nbits = nbits;
+                        ac_token.bits = bits;
+
+                        len++;
+                        strm_ac_token.write(ac_token);
+                        strm_histo_offset.write(histo_offset);
+                        strm_cnt_lookup.write(cnt);
+                        strm_last_pos.write(last_pos);
+                        // strm_e.write(false);
+
+                        run = 0;
+                        last_pos = bpos;
+                        cnt--;
+
+                    } else {             // find 0 and run++
+                        if (run == 15) { // find ff and encode ff token
+                            ap_uint<4> nbits = 0;
+                            ap_uint<4> skip = 15;
+                            kSkipAndBits(7, 4) = skip;
+                            kSkipAndBits(3, 0) = nbits;
+
+                            //                        ac_token.context =
+                            //                            histo_offset +
+                            //                            hls_kCoeffNumNonzeroContext[cnt] +
+                            //                            hls_kCoeffFreqContext[last_pos];
+                            ac_token.symbol = kSkipAndBits;
+                            ac_token.nbits = nbits;
+                            ac_token.bits = 0;
+
+                            len++;
+                            strm_ac_token.write(ac_token);
+                            strm_histo_offset.write(histo_offset);
+                            strm_cnt_lookup.write(cnt);
+                            strm_last_pos.write(last_pos);
+                            // strm_e.write(false);
+
+                            run = 0;
+                            last_pos = bpos;
+                        } else {
+                            run++;
+                        }
+                    }
+                }
+            } // end ac
+
+            if (bpos == BLOCK_SIZE - 1) {
+                strm_cnt_ac.write(len);
+                run = 0;
+                last_pos = 0;
+                len = 0;
+                bpos = 0;
+            } else {
+                ++bpos;
+            }
+
+        } // end inter loop
+    }
+
+} // end tile
+
+// ------------------------------------------------------------
+void tokenize_lookup_table1(hls::stream<ap_uint<9> >& strm_histo_offset,
+                            hls::stream<nzeros_t>& strm_cnt_lookup,
+                            hls::stream<nzeros_t>& strm_last_pos,
+                            hls::stream<hls_Token>& strm_ac_token,
+                            hls::stream<bool>& strm_e,
+
+                            hls::stream<hls_Token>& strm_ac_token_out
+
+                            ) {
+#pragma HLS INLINE OFF
+
+    for (int c = 0; c < 3; ++c) {
+        bool e = strm_e.read();
+        while (!e) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<9> histo_offset = strm_histo_offset.read();
+            nzeros_t cnt = strm_cnt_lookup.read();
+            nzeros_t last_pos = strm_last_pos.read();
+            hls_Token ac_token = strm_ac_token.read();
+            e = strm_e.read();
+
+            ac_token.context = histo_offset + hls_kCoeffNumNonzeroContext[cnt] + hls_kCoeffFreqContext[last_pos];
+
+            strm_ac_token_out.write(ac_token);
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void tokenize_lookup_table(const hls_blksize rect,
+                           hls::stream<nzeros_t>& strm_cnt_ac,
+                           hls::stream<ap_uint<9> >& strm_histo_offset,
+                           hls::stream<nzeros_t>& strm_cnt_lookup,
+                           hls::stream<nzeros_t>& strm_last_pos,
+                           hls::stream<hls_Token>& strm_ac_token,
+
+                           hls::stream<nzeros_t>& strm_cnt_ac_out,
+                           hls::stream<hls_Token>& strm_ac_token_out
+
+                           ) {
+#pragma HLS INLINE OFF
+
+    nzeros_t len = 0;
+    for (int c = 0; c < 3; ++c) {
+        for (int by = 0; by < rect.ysize; ++by) {
+            // for (int bx = 0; bx < xsize_blocks; ++bx) {
+            for (int bx = 0; bx < rect.xsize + 1;) {
+#pragma HLS PIPELINE II = 1
+
+                if (len == 0) { // no tockens
+                    if (bx < rect.xsize) {
+                        len = strm_cnt_ac.read();
+                        strm_cnt_ac_out.write(len);
+                    }
+
+                    bx++; // break loop from here
+
+                } else {
+                    hls_Token ac_token = strm_ac_token.read();
+                    ap_uint<9> histo_offset = strm_histo_offset.read();
+                    nzeros_t cnt = strm_cnt_lookup.read();
+                    nzeros_t last_pos = strm_last_pos.read();
+
+                    ac_token.context =
+                        histo_offset + hls_kCoeffNumNonzeroContext[cnt] + hls_kCoeffFreqContext[last_pos];
+
+                    strm_ac_token_out.write(ac_token);
+
+                    len--;
+                }
+
+            } // end bx
+        }
+    } // end tile
+}
+
+// ------------------------------------------------------------
+void tokenize_blk(const hls_blksize rect,
+                  hls::stream<dct_t> strm_coef_orderd[64],
+                  hls::stream<nzeros_t>& cnt_nz,
+
+                  hls::stream<nzeros_t>& strm_cnt_ac,
+                  hls::stream<hls_Token>& strm_ac_token
+
+                  ) {
+    nzeros_t reg_nz_cnt; // reg of last cnt
+    nzeros_t cnt;        // cnt for nz--
+
+    ap_uint<8> kSkipAndBits;
+    hls_Token ac_token;
+
+    nzeros_t len = 0;      // cnt for ac_tokens
+    nzeros_t run = 0;      // cnt for run
+    nzeros_t bpos = 1;     // cnt for pos in the block
+    nzeros_t last_pos = 0; // reg of last pos
+    // total_token = 4096;//init for nz tokens
+
+    dct_t blk_orderd[64];
+#pragma HLS ARRAY_PARTITION variable = blk_orderd complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = hls_kCoeffNumNonzeroContext complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = hls_kCoeffFreqContext complete dim = 1
+    // 1. init
+
+    for (int c = 0; c < 3; ++c) {
+        for (int by = 0; by < rect.ysize; ++by) {
+            if (rect.xsize) {
+                reg_nz_cnt = cnt_nz.read();
+                // strm_cnt_nz.write(reg_nz_cnt);
+                cnt = reg_nz_cnt;
+
+                for (int j = 0; j < 64; j++) {
+#pragma HLS UNROLL
+                    blk_orderd[j] = strm_coef_orderd[j].read();
+                }
+            }
+
+            ap_uint<9> histo_offset = 96 + 105 * c; // ZeroDensityContextsOffset(c);
+            int bx = 0;                             // cnt for block
+        PICK_OUT_NZ_LOOP:
+            while (bx < rect.xsize) {
+#pragma HLS PIPELINE II = 2
+
+                // 2. write out
+                if (cnt > 0) {
+                    if (blk_orderd[bpos] != 0) { // find nz and encode nz token
+
+                        int nbits, bits;
+                        XAcc_EncodeVarLenUint(hls_PackSigned((int16_t)blk_orderd[bpos]), nbits, bits);
+
+                        kSkipAndBits(7, 4) = run;
+                        kSkipAndBits(3, 0) = nbits;
+
+                        ac_token.context =
+                            histo_offset + hls_kCoeffNumNonzeroContext[cnt] + hls_kCoeffFreqContext[last_pos];
+                        ac_token.symbol = kSkipAndBits;
+                        ac_token.nbits = nbits;
+                        ac_token.bits = bits;
+
+                        len++;
+                        strm_ac_token.write(ac_token);
+
+                        run = 0;
+                        last_pos = bpos;
+                        cnt--;
+
+                    } else {             // find 0 and run++
+                        if (run == 15) { // find ff and encode ff token
+                            ap_uint<4> nbits = 0;
+                            ap_uint<4> skip = 15;
+                            kSkipAndBits(7, 4) = skip;
+                            kSkipAndBits(3, 0) = nbits;
+
+                            ac_token.context =
+                                histo_offset + hls_kCoeffNumNonzeroContext[cnt] + hls_kCoeffFreqContext[last_pos];
+                            ac_token.symbol = kSkipAndBits;
+                            ac_token.nbits = nbits;
+                            ac_token.bits = 0;
+
+                            len++;
+                            strm_ac_token.write(ac_token);
+
+                            run = 0;
+                            last_pos = bpos;
+                        } else {
+                            run++;
+                        }
+                    }
+
+                    bpos++;
+                }
+
+                // 3. read new block
+                if (!cnt) {
+                    if (bx < rect.xsize - 1) {
+                        reg_nz_cnt = cnt_nz.read();
+                        cnt = reg_nz_cnt;
+
+                        for (int j = 0; j < 64; j++) {
+#pragma HLS UNROLL
+                            blk_orderd[j] = strm_coef_orderd[j].read();
+                        }
+                    }
+
+                    strm_cnt_ac.write(len);
+                    len = 0;
+                    run = 0;
+                    last_pos = 0;
+                    bpos = 1;
+                    bx++; // break here
+                }
+            }
+        }
+    } // end tile
+}
+// ------------------------------------------------------------
+
+void collect_token_syn(const hls_blksize rect,
+                       // hls::stream<hls_blksize> &rect,
+                       uint8_t ac_static_context_map[hls_kNumContexts],
+                       bool e_tile,
+
+                       hls::stream<hls_Token>& strm_nz_token,
+                       hls::stream<nzeros_t>& strm_cnt_ac,
+                       hls::stream<hls_Token>& strm_ac_token,
+
+                       hls::stream<ap_uint<13> >& strm_token_addr,
+                       hls::stream<bool>& strm_e_addr,
+                       hls::stream<bool>& strm_e_token,
+                       hls::stream<hls_Token_symb>& strm_token_symb,
+                       hls::stream<hls_Token_bits>& strm_token_bits) {
+#pragma HLS INLINE OFF
+
+    hls_Token ac_token;
+    nzeros_t len = 0;
+
+#pragma HLS ARRAY_PARTITION variable = hls_kSkipAndBitsSymbol complete dim = 1
+
+    for (int c = 0; c < 3; ++c) {
+        for (int by = 0; by < rect.ysize; ++by) {
+            // for (int bx = 0; bx < xsize_blocks; ++bx) {
+
+            for (int bx = 0; bx < rect.xsize + 1;) {
+// while(bx < rect.xsize+1 ){
+#pragma HLS PIPELINE II = 1
+
+                if (len == 0) { // no tockens
+
+                    if (bx < rect.xsize) {
+                        // nzeros_t reg_cnt_nz = strm_cnt_nz.read();// to be remove,
+                        // non-used
+                        len = strm_cnt_ac.read();
+                        ac_token = strm_nz_token.read();
+                        // Token out_token(0,0,0,0);
+
+                        ap_uint<13> ac_token_addr =
+                            ((uint16_t)ac_static_context_map[ac_token.context] << 8) + ac_token.symbol;
+                        strm_token_addr.write(ac_token_addr);
+                        strm_e_addr.write(false);
+                        strm_e_token.write(false);
+
+                        hls_Token_symb token_symb;
+                        hls_Token_bits token_bits;
+                        token_symb.context = ac_token.context;
+                        token_symb.symbol = ac_token.symbol;
+                        token_bits.bits = ac_token.bits;
+                        token_bits.nbits = ac_token.nbits;
+
+                        strm_token_symb.write(token_symb);
+                        strm_token_bits.write(token_bits);
+                        _XF_IMAGE_PRINT("---write token(%d,%d,0,0,%d) \n", (int)(ac_token.context),
+                                        (int)(ac_token.symbol), (int)ac_token_addr.V.VAL);
+                        //_XF_IMAGE_PRINT("---len=%d \n",len);
+                    }
+
+                    bx++; // break loop from here
+
+                } else {
+                    ac_token = strm_ac_token.read();
+                    // addr_c = ac_token.addr;
+                    ap_uint<13> ac_token_addr = ((uint16_t)ac_static_context_map[ac_token.context] << 8) +
+                                                hls_kSkipAndBitsSymbol[ac_token.symbol];
+                    strm_token_addr.write(ac_token_addr);
+                    strm_e_addr.write(false);
+                    strm_e_token.write(false);
+
+                    hls_Token_symb token_symb;
+                    hls_Token_bits token_bits;
+                    token_symb.context = ac_token.context;
+                    token_symb.symbol = hls_kSkipAndBitsSymbol[ac_token.symbol]; // ac_token.symbol;
+                    token_bits.bits = ac_token.bits;
+                    token_bits.nbits = ac_token.nbits;
+                    strm_token_symb.write(token_symb);
+                    strm_token_bits.write(token_bits);
+
+                    len--;
+                    _XF_IMAGE_PRINT("---write token(%d,%d,%d,%d,%d), skip_bits=\n", ac_token.context,
+                                    hls_kSkipAndBitsSymbol[ac_token.symbol], ac_token.nbits, ac_token.bits,
+                                    (int)ac_token_addr.V.VAL);
+                }
+
+            } // end bx
+        }
+    } // end tile
+
+    if (e_tile) {
+        strm_e_addr.write(true);
+        strm_e_token.write(true);
+    }
+}
+//
+////----------------------------------------------------------
+// void hls_read_config(hls::stream<hls_Rect>& strm_rect,
+//                     hls::stream<bool>& strm_e_tile,
+//					 hls::stream<hls_blksize>& rect_a,
+//					 hls::stream<hls_blksize>& rect_b,
+//					 hls::stream<hls_blksize>& rect_c,
+//					 hls::stream<bool>& rect_e_tile) {
+//#pragma HLS INLINE OFF
+//    bool e_tile = strm_e_tile.read();
+//
+//    hls_Rect tmp = strm_rect.read();
+//    hls_blksize tmp_out;
+//    tmp_out.xsize = tmp.xsize;
+//    tmp_out.ysize = tmp.ysize;
+//    rect_a.write(tmp_out);
+//    rect_b.write(tmp_out);
+//    rect_c.write(tmp_out);
+//    rect_e_tile.write(e_tile);
+//
+//}
+
+//----------------------------------------------------------
+void hls_read_config(hls::stream<hls_blksize>& strm_rect,
+                     hls::stream<bool>& strm_e_tile,
+                     hls_blksize& rect_a,
+                     hls_blksize& rect_b,
+                     hls_blksize& rect_c,
+                     hls_blksize& rect_d,
+                     hls_blksize& rect_e,
+                     bool& rect_e_tile) {
+#pragma HLS INLINE OFF
+#pragma HLS PIPELINE II = 1
+    bool e_tile = strm_e_tile.read();
+
+    hls_blksize tmp = strm_rect.read();
+    hls_blksize tmp_out;
+    tmp_out.xsize = tmp.xsize;
+    tmp_out.ysize = tmp.ysize;
+    rect_a = (tmp_out);
+    rect_b = (tmp_out);
+    rect_c = (tmp_out);
+    rect_d = (tmp_out);
+    rect_e = (tmp_out);
+    rect_e_tile = (e_tile);
+}
+
+//----------------------------------------------------------
+void hls_tokenize_AC_tile_top(const int32_t orders[3][64], // color dct_band
+                              hls::stream<hls_blksize>& strm_rect,
+                              hls::stream<bool>& strm_e_tile,
+                              // hls::stream<dct_t> strm_coef_raster[8],
+                              hls::stream<dct_t>& strm_coef_raster,
+                              uint8_t ac_static_context_map[hls_kNumContexts],
+                              // hls::stream< hls_blksize > &strm_blk_size,
+
+                              hls::stream<ap_uint<13> >& strm_token_addr,
+                              hls::stream<hls_Token_symb>& strm_token_symb,
+                              hls::stream<hls_Token_bits>& strm_token_bits,
+                              hls::stream<bool>& strm_e_addr,
+                              hls::stream<bool>& strm_e_token) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    // clang-format off
+	// strm_cnt_ac output every block with  1, 0, 0, 0, 1...
+	// strm_ac_token2 output only there is a ac_tocken
+	  static hls::stream< nzeros_t > cnt_nz("cnt_nz");
+#pragma HLS RESOURCE  	  variable = cnt_nz core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = cnt_nz depth = 32
+	  static hls::stream< hls_Token> strm_nz_token;
+#pragma HLS DATA_PACK 	  variable = strm_nz_token
+#pragma HLS RESOURCE  	  variable = strm_nz_token core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_nz_token depth = 1024
+//	  static hls::stream< dct_t >    strm_coef_orderd[64];
+//#pragma HLS RESOURCE  	  variable = strm_coef_orderd core = FIFO_LUTRAM
+//#pragma HLS ARRAY_PARTITION variable=strm_coef_orderd complete
+//#pragma HLS STREAM    	  variable = strm_coef_orderd depth = 16
+
+	  static hls::stream< dct_t >    strm_coef_orderd;
+#pragma HLS RESOURCE  	  variable = strm_coef_orderd core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_coef_orderd depth = 128
+
+	  static hls::stream< nzeros_t > strm_cnt_ac("len_token");
+#pragma HLS RESOURCE  	  variable = strm_cnt_ac core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_cnt_ac depth = 64
+	  static hls::stream< nzeros_t > strm_cnt_ac2("len_token2");
+#pragma HLS RESOURCE  	  variable = strm_cnt_ac2 core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_cnt_ac2 depth = 64
+	  static hls::stream< hls_Token> strm_ac_token("ac_token");
+#pragma HLS DATA_PACK 	  variable = strm_ac_token
+#pragma HLS RESOURCE  	  variable = strm_ac_token core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_ac_token depth = 64
+
+  	hls::stream<ap_uint<9> > 		 strm_histo_offset;
+#pragma HLS STREAM    	  variable = strm_histo_offset depth = 64
+  	hls::stream<nzeros_t> 			 strm_cnt_lookup;
+#pragma HLS STREAM    	  variable = strm_cnt_lookup depth = 64
+  	hls::stream<nzeros_t> 			 strm_last_pos;
+#pragma HLS STREAM    	  variable = strm_last_pos depth = 64
+  	hls::stream<bool> 				 strm_e;
+#pragma HLS STREAM    	  variable = strm_e depth = 64
+  	hls::stream<hls_Token> 			 strm_ac_token2;
+#pragma HLS STREAM    	  variable = strm_ac_token2 depth = 64
+    // clang-format on
+    hls_blksize rect_a;
+    hls_blksize rect_b;
+    hls_blksize rect_c;
+    hls_blksize rect_d;
+    hls_blksize rect_e;
+    bool rect_e_tile;
+
+    hls_read_config(strm_rect, strm_e_tile, rect_a, rect_b, rect_c, rect_d, rect_e, rect_e_tile);
+
+    // hls_orderblk_tokennz(orders, rect_a, strm_coef_raster, //
+    // ac_static_context_map,
+    //                          cnt_nz, strm_nz_token, strm_coef_orderd);
+
+    static hls::stream<dct_t> strm_coef_ord("strm_coef_ord");
+//#pragma HLS RESOURCE variable = strm_coef_ord core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_coef_ord depth = 1024
+
+    // for predict
+    hls::stream<nzeros_t> cnt_nz2("strm_cnt_nz2");
+#pragma HLS RESOURCE variable = cnt_nz2 core = FIFO_LUTRAM
+#pragma HLS STREAM variable = cnt_nz2 depth = 32
+
+    _XF_IMAGE_PRINT("\n --2 tmp_num_nzeros begin - Tokenize \n");
+
+    hls_orderBlk(rect_a, orders, strm_coef_raster, strm_coef_ord); // II=1
+
+    // hls_count_ac_nz(rect_d, strm_coef_ord, strm_coef_orderd, cnt_nz,
+    // cnt_nz2);//II=1/8 in64
+    hls_CountAcNz(rect_d, strm_coef_ord, strm_coef_orderd, cnt_nz, cnt_nz2);
+
+    hls_tokenize_nz(rect_e, cnt_nz2, strm_nz_token); // II=1/8 in64
+
+    // tokenize_blk(rect_b, strm_coef_orderd, cnt_nz, strm_cnt_ac, strm_ac_token);
+    // //3.3ns II=2
+
+    //    tokenize_blk_syn_test(rect_b, strm_coef_orderd, cnt_nz,
+    //    		strm_cnt_ac, strm_histo_offset, strm_cnt_lookup,
+    //    strm_last_pos,  strm_ac_token); //3.7ns II=1//strm_e,
+
+    // tokenize_lookup_table(strm_histo_offset, strm_cnt_lookup, strm_last_pos,
+    // strm_ac_token, strm_e, strm_ac_token2);
+
+    hls_TokenizeBlk(rect_b, strm_coef_orderd, cnt_nz, strm_cnt_ac, strm_histo_offset, strm_cnt_lookup, strm_last_pos,
+                    strm_ac_token);
+
+    tokenize_lookup_table(rect_b, strm_cnt_ac, strm_histo_offset, strm_cnt_lookup, strm_last_pos, strm_ac_token,
+                          strm_cnt_ac2, strm_ac_token2);
+
+    // interleaving collect the nz token and ac token
+    collect_token_syn(rect_c, ac_static_context_map, rect_e_tile, // strm_cnt_nz, num_tile,
+                      strm_nz_token, strm_cnt_ac2, strm_ac_token2, strm_token_addr, strm_e_addr, strm_e_token,
+                      strm_token_symb, strm_token_bits);
+}
+
+//----------------------------------------------------------
+void hls_config_gen(group_rect rect, hls::stream<hls_blksize>& strm_rect, hls::stream<bool>& strm_e_tile) {
+#pragma HLS INLINE OFF
+    for (int tby = 0; tby < rect.ysize_tiles; ++tby) {
+        for (int tbx = 0; tbx < rect.xsize_tiles; ++tbx) { // block
+#pragma HLS PIPELINE II = 1
+            hls_blksize tmp;
+            int x0 = tbx * hls_kTileDimInBlocks;
+            int y0 = tby * hls_kTileDimInBlocks;
+            tmp.xsize =
+                (x0 + hls_kTileDimInBlocks <= rect.xsize_blocks) ? hls_kTileDimInBlocks : (rect.xsize_blocks - x0);
+            tmp.ysize =
+                (y0 + hls_kTileDimInBlocks <= rect.ysize_blocks) ? hls_kTileDimInBlocks : (rect.ysize_blocks - y0);
+            strm_rect.write(tmp);
+            if ((tby == rect.ysize_tiles - 1) && (tbx == rect.xsize_tiles - 1)) {
+                strm_e_tile.write(true);
+            } else {
+                strm_e_tile.write(false);
+            }
+        }
+    }
+}
+
+//----------------------------------------------------------
+void XAcc_TokenizeCoefficients6(const int32_t orders[3][64], // color dct_band
+                                const group_rect rect,
+                                // hls::stream<dct_t> strm_coef_raster[8],
+                                hls::stream<dct_t>& strm_coef_raster,
+                                uint8_t ac_static_context_map[hls_kNumContexts],
+                                // hls::stream< hls_blksize > &strm_blk_size,
+
+                                hls::stream<ap_uint<13> >& strm_token_addr,
+                                hls::stream<hls_Token_symb>& strm_token_symb,
+                                hls::stream<hls_Token_bits>& strm_token_bits,
+                                hls::stream<bool>& strm_e_addr,
+                                hls::stream<bool>& strm_e_token) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    // clang-format off
+	  static hls::stream< hls_blksize > strm_rect;
+#pragma HLS RESOURCE  	  variable = strm_rect core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_rect depth = 32
+	      static hls::stream< bool > strm_e_tile;
+#pragma HLS RESOURCE  	  variable = strm_e_tile core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_e_tile depth = 32
+    // clang-format on
+
+    hls_config_gen(rect, strm_rect, strm_e_tile);
+
+    for (int tby = 0; tby < rect.ysize_tiles; ++tby) {
+        for (int tbx = 0; tbx < rect.xsize_tiles; ++tbx) { // block
+
+            hls_tokenize_AC_tile_top(orders, strm_rect, strm_e_tile, strm_coef_raster,
+                                     ac_static_context_map, // strm_blk_size,
+                                     strm_token_addr, strm_token_symb, strm_token_bits, strm_e_addr, strm_e_token);
+        }
+    }
+}
diff --git a/codec/L2/demos/pikEnc/kernel/kernel3/ans.cpp b/codec/L2/demos/pikEnc/kernel/kernel3/ans.cpp
new file mode 100755
index 0000000000..91504e0852
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/kernel3/ans.cpp
@@ -0,0 +1,572 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/ans.hpp"
+
+// ------------------------------------------------------------
+void ANS_LookupInfo( // input
+    const int start,
+    const int end,
+    hls::stream<hls_Token_symb>& strm_ac_token_reverse,
+    const hls_ANSEncSymbolInfo codes[hls_kNumStaticContexts][hls_alphabet_size],
+    const uint8_t context_map[hls_kNumContexts],
+    const bool is_dc,
+    uint8_t dc_context_map[MAX_NUM_COLOR],
+    // output
+    hls::stream<hls_TokenInfo>& strm_token_info) {
+#pragma HLS INLINE OFF
+    hls_TokenInfo token_info;
+    int size_ac_token = end - start;
+
+    // token index
+    for (int i = start; i < end; i++) {
+#pragma HLS PIPELINE II = 1
+        hls_Token_symb token = strm_ac_token_reverse.read();
+        const uint8_t histo_idx = is_dc ? dc_context_map[token.context] : context_map[token.context];
+        token_info.info = codes[histo_idx][token.symbol]; // 0~23 * 0~255 = 6144's freq
+        strm_token_info.write(token_info);
+        // strm_e_info.write(false);
+    }
+    // strm_e_info.write(true);
+}
+
+void ANS_Renormalize( // input
+    const int start,
+    const int end,
+    hls::stream<hls_TokenInfo>& strm_token_info_reverse,
+    // output
+
+    hls::stream<uint32_t>& strm_last_state,
+    hls_Runbit_t ram_runbit[hls_kANSBufferSize >> 1],
+    int& cntInt,
+    hls::stream<int>& strm_start,
+    hls::stream<int>& strm_end,
+    hls::stream<int>& strm_cntInt) {
+#pragma HLS INLINE OFF
+
+    // const int start = 0;
+    int size_ac_token = end - start;
+    ap_uint<32> cnt = 0;
+    // private
+    uint32_t state_ = (hls_ANS_SIGNATURE << 16);
+    uint32_t runbit_reg;
+
+    _XF_IMAGE_PRINT("--1 start ac ANS loop %d- AC_ENCODE\n", (int)size_ac_token);
+
+    // 2. check all nbits = 16 // loop 50243 because the size_ac_token<kANSBufferSize //
+    // lookup all the token to form the strm_idx strm_bits
+    // short cntInt;
+
+    // for(int j = start; j <size_ac_token ; j+= hls_kANSBufferSize){
+    // when the timing fixed the reset could be move into loop, but attention the index
+
+    for (int i = start + 1; i <= end; i++) {
+#pragma HLS PIPELINE II = 6
+        hls_TokenInfo token_info = strm_token_info_reverse.read();
+        // hls_Token_bits token_bit_plain = strm_token_bits.read();
+        const hls_ANSEncSymbolInfo info = token_info.info;
+
+        bool do_shift;
+        uint16_t bits;
+
+        // AMS_PutSymbol(state_, info, do_shift, bits);//state+info = bool + bits + next_state
+        bits = 0;
+        do_shift = false;
+        if ((state_ >> (32 - hls_ANS_LOG_TAB_SIZE)) >= info.freq_) { // freq(0~1024)
+            bits = state_ & 0xffff;
+            state_ >>= 16;
+            do_shift = true;
+        }
+
+// We use mult-by-reciprocal trick, but that requires 64b calc.
+
+#if 1
+        // We use mult-by-reciprocal trick, but that requires 64b calc.
+        const uint32_t v = (state_ * info.ifreq_) >> hls_RECIPROCAL_PRECISION;
+        const uint32_t offset = state_ - v * info.freq_ + info.start_;
+        state_ = (v << hls_ANS_LOG_TAB_SIZE) + offset;
+#else
+        state_ = ((state_ / info.freq_) << ANS_LOG_TAB_SIZE) + (state_ % info.freq_) + info.start_;
+#endif
+
+        //_XF_IMAGE_PRINT("--2 check all nbits=16, bits=%d, idx=%d - AC_ENCODE\n", bits, (int)(end-i));
+        if (do_shift) {
+            _XF_IMAGE_PRINT("--2 check all nbits=16, bits=%d, idx=%d - AC_ENCODE\n", bits, (int)(end - i));
+            uint16_t nRenormal = end - i;
+            uint32_t tmp = nRenormal;
+            tmp = (tmp << 16) | bits;
+
+            if (!cnt[0]) {
+                ram_runbit[cnt >> 1] = tmp;
+                runbit_reg = tmp;
+            } else { // write the same addr but with the new high 32bit
+                uint64_t runbit = tmp;
+                runbit = ((runbit << 32) | runbit_reg);
+                ram_runbit[cnt >> 1] = runbit;
+            }
+            cnt++;
+        }
+    } // end i
+
+    // last_state = state_;
+    cntInt = cnt;
+    strm_start.write(start);
+    strm_end.write(end);
+    strm_last_state.write(state_);
+    strm_cntInt.write(cnt);
+
+    // strm_e_run.write(true);
+    _XF_IMAGE_PRINT("--3 nbits = 16 write Tocken, last=%.4x - AC_ENCODE\n", state_);
+    _XF_IMAGE_PRINT("--flag cntInt=%d - AC_ENCODE\n", cntInt);
+}
+
+// read ram reseve
+void ANS_read_ram(const int cntInt,
+                  hls_Runbit_t ram_runbit[hls_kANSBufferSize >> 1],
+                  hls::stream<hls_Runbit_t2>& strm_runbit) {
+#pragma HLS INLINE OFF
+    ap_int<18> cnt = cntInt;
+
+    bool is_odd = cnt[0];
+
+    ap_uint<64> runbit_reverse = ram_runbit[(cntInt - 1) >> 1];
+
+    hls_Runbit_t2 runbit = is_odd ? runbit_reverse(31, 0) : runbit_reverse(63, 32);
+    strm_runbit.write(runbit);
+    is_odd = !is_odd;
+    cnt--;
+
+    while (cnt > 0) {
+#pragma HLS PIPELINE II = 1
+
+        if (is_odd) {
+            runbit = runbit_reverse(31, 0);
+        } else {
+            runbit_reverse = ram_runbit[(cnt - 2) >> 1];
+            runbit = runbit_reverse(63, 32);
+        }
+
+        strm_runbit.write(runbit);
+        is_odd = !is_odd;
+        cnt--;
+    }
+}
+
+void ANS_enc_Pushbit2( // input
+    const int start,
+    const int end,
+    hls::stream<hls_Token_bits>& strm_token_bit,
+    hls::stream<hls_Runbit_t2>& strm_runbit,
+    int cntInt,
+    uint32_t last_state,
+    // output
+    uint32_t& pos,
+
+    uint8_t& cnt_buffer,
+    uint16_t& reg_buffer,
+    hls::stream<uint16_t>& strm_pos_byte,
+    hls::stream<bool>& strm_ac_e) {
+#pragma HLS INLINE OFF
+
+    _XF_IMAGE_PRINT("--4 nbits < 16 write Tocken - AC_ENCODE\n");
+
+    int size_ac_token = end - start;
+    int tokenidx = 0;
+
+    uint16_t shortInt;
+    // size_t num_extra_bits = 0;
+    ap_int<18> cnt = cntInt; // there is 1 sign and 17 bit to represent 1~1<<16
+    bool is_odd = cnt[0];    //  (even 32 | odd 32)= 64bits
+    // loop 8668 // write out strm_bits
+
+    // read the end runbit
+    ap_uint<32> runbit;
+    uint16_t nRenormal = 0;
+    uint16_t Renormal_bits;
+
+    //	    if(cntInt>0){
+    //	    	runbit = ram_runbit[(cntInt-1)>>1];
+    //			Renormal_bits = is_odd ? runbit(15,0) : runbit(47,32);
+    //			nRenormal = is_odd ? runbit(31,16) : runbit(63,48);//= next index gap
+    //	    	//cnt--;
+    //	    	is_odd = !is_odd;
+    //	    }
+    if (cntInt > 0) {
+        runbit = strm_runbit.read();
+        Renormal_bits = runbit(15, 0);
+        nRenormal = runbit(31, 16);
+    }
+
+    // for token loop
+    bool is_extra_loop = nRenormal;
+    // uint16_t reg_nRenormal = nRenormal;
+    int i = 0;
+
+    // for write byte count
+    uint64_t storage_ix = 0;
+    ap_uint<32> buffer = reg_buffer;
+    uint8_t cnt16 = cnt_buffer;
+
+    // pos of the byte
+    // uint64_t pos = 32;
+    ap_uint<32> l_state = last_state;
+    _XF_IMAGE_PRINT("reg buffer = %.4x\n", reg_buffer);
+    _XF_IMAGE_PRINT("last state = %.4x\n", last_state);
+    // high
+    buffer(cnt16 + 15, cnt16) = l_state(31, 16);
+    shortInt = buffer(15, 0);
+    strm_pos_byte.write(shortInt);
+    strm_ac_e.write(false);
+    buffer >>= 16;
+    _XF_IMAGE_PRINT("state high buffer = %.4x\n", shortInt);
+    // low
+    buffer(cnt16 + 15, cnt16) = l_state(15, 0);
+    shortInt = buffer(15, 0);
+    strm_pos_byte.write(shortInt);
+    strm_ac_e.write(false);
+    buffer >>= 16;
+    _XF_IMAGE_PRINT("state low buffer = %.4x\n", shortInt);
+    pos += 32;
+
+    while (cnt >= 0) {
+#pragma HLS PIPELINE II = 1
+
+        if (is_extra_loop) { // write some extra data ,usually not big than 16bits
+            // _XF_IMAGE_PRINT("--4 W from %d to %d - AC_ENCODE\n", tokenidx, nRenormal);
+
+            const hls_Token_bits token = strm_token_bit.read();
+            // !!!to improve:checkout the nbits=0 in the front module
+            if (token.nbits > 0) buffer(cnt16 + token.nbits - 1, cnt16) = token.bits;
+
+            // write out when there is enough 8 bits
+            if (token.nbits + cnt16 >= 16) {
+                shortInt = buffer(15, 0);
+                cnt16 = cnt16 + token.nbits - 16;
+                buffer >>= 16;
+                strm_pos_byte.write(shortInt);
+                strm_ac_e.write(false);
+                // storage_ix += 2;
+
+                if (token.nbits > 0)
+                    _XF_IMAGE_PRINT("---W--- n_bits=%ld, bits=%ld, pos=%ld\n", token.nbits, token.bits, pos);
+                pos += token.nbits;
+            } else {
+                cnt16 = cnt16 + token.nbits;
+
+                if (token.nbits > 0)
+                    _XF_IMAGE_PRINT("---W--- n_bits=%ld, bits=%ld, pos=%ld, cnt16=%d\n", token.nbits, token.bits, pos,
+                                    cnt16);
+                pos += token.nbits;
+            }
+            // num_extra_bits += token.nbits;
+
+            if (i == nRenormal - 1 || (i == size_ac_token - 1)) { // the last time not write out
+                tokenidx = i + 1;
+                is_extra_loop = false;
+
+                if (!cnt && (i == size_ac_token - 1)) {
+                    cnt--;
+                }
+            }
+            i++;
+
+        } else { // is_extra_loop=0 // write a 16bits Renormal_bits
+
+            if (cnt > 0) {
+                buffer(cnt16 + 15, cnt16) = Renormal_bits;
+
+                _XF_IMAGE_PRINT("---W--- n_bits=%ld, bits=%ld, pos=%ld\n", 16, Renormal_bits, pos);
+                pos += 16;
+                // read next renormal
+                if (cnt >= 2) {
+                    runbit = strm_runbit.read();
+                } else {
+                    runbit = 0;
+                }
+                Renormal_bits = runbit(15, 0);
+                nRenormal = runbit(31, 16);
+
+                // if(cnt == 1)
+                //  nRenormal = end;
+                cnt--;
+
+                shortInt = buffer(15, 0);
+                buffer >>= 16;
+                strm_pos_byte.write(shortInt);
+                strm_ac_e.write(false);
+                // storage_ix += 2;
+            }
+
+            is_extra_loop = true;
+            // reg_nRenormal = nRenormal;
+        }
+    } // end while
+
+    if (cnt16 > 0 && (cnt16 <= 8)) { // can we change cnt16 to pos?
+
+        reg_buffer = buffer(15, 0);
+        cnt_buffer = cnt16;
+        _XF_IMAGE_PRINT("1 reg buffer = %.4x\n", reg_buffer);
+    } else if (cnt16 > 8 && (cnt16 < 16)) {
+        // shortInt = buffer( 7, 0 );
+        // strm_pos_byte.write(shortInt);
+        // buffer >>= 8;
+
+        reg_buffer = buffer(15, 0);
+        cnt_buffer = cnt16;
+        _XF_IMAGE_PRINT("2 reg buffer = %.4x, cnt_buffer=%d\n", reg_buffer, cnt_buffer);
+    }
+}
+
+void ANS_runbitram( // input
+    const int start,
+    const int end,
+    hls::stream<hls_Token_symb>& strm_ac_token_reverse,
+    const hls_ANSEncSymbolInfo codes[hls_kNumStaticContexts][hls_alphabet_size],
+    const uint8_t context_map[hls_kNumContexts],
+    const bool is_dc,
+    uint8_t dc_context_map[MAX_NUM_COLOR],
+    // output
+    hls::stream<int>& strm_start,
+    hls::stream<int>& strm_end,
+    hls::stream<int>& strm_cntInt,
+    int& cntInt,
+    hls_Runbit_t ram_runbit[hls_kANSBufferSize >> 1],
+    hls::stream<uint32_t>& strm_last_state) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    // clang-format off
+  hls::stream< hls_TokenInfo > strm_token_info_reverse;
+#pragma HLS DATA_PACK 	  variable = strm_token_info_reverse
+#pragma HLS RESOURCE  	  variable = strm_token_info_reverse core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_token_info_reverse depth = 32
+    // clang-format on
+
+    ANS_LookupInfo(start, end, strm_ac_token_reverse, codes, context_map, is_dc, dc_context_map,
+                   strm_token_info_reverse);
+
+    ANS_Renormalize(start, end, strm_token_info_reverse, strm_last_state, ram_runbit, cntInt, strm_start, strm_end,
+                    strm_cntInt);
+}
+// ------------------------------------------------------------
+void ANS_runbitram2( // input
+    const int ndataflow,
+    const int start[3],
+    const int end[3],
+    hls::stream<hls_Token_symb>& strm_ac_token_reverse,
+    const hls_ANSEncSymbolInfo codes[hls_kNumStaticContexts][hls_alphabet_size],
+    const uint8_t context_map[hls_kNumContexts],
+    const bool is_dc,
+    uint8_t dc_context_map[MAX_NUM_COLOR],
+    // output
+    hls::stream<int>& strm_start,
+    hls::stream<int>& strm_end,
+    hls::stream<int>& strm_cntInt,
+
+    hls::stream<hls_Runbit_t2>& strm_runbit,
+    hls::stream<uint32_t>& strm_last_state) {
+#pragma HLS INLINE OFF
+
+LOOP_ANSBUFFER:
+    for (int i = 0; i < ndataflow; i++) { // 0~65535 65536~..
+#pragma HLS DATAFLOW
+
+        // clang-format off
+	  hls::stream< hls_TokenInfo > strm_token_info_reverse;
+	#pragma HLS DATA_PACK 	  variable = strm_token_info_reverse
+	#pragma HLS RESOURCE  	  variable = strm_token_info_reverse core = FIFO_LUTRAM
+	#pragma HLS STREAM    	  variable = strm_token_info_reverse depth = 32
+// clang-format on
+
+#ifndef __SYNTHESIS__
+        hls_Runbit_t* ram_runbit;
+        ram_runbit = (hls_Runbit_t*)malloc((hls_kANSBufferSize >> 1) * sizeof(hls_Runbit_t));
+#else
+        hls_Runbit_t ram_runbit[hls_kANSBufferSize >> 1];
+// or remove this pargma use bram instead
+#pragma HLS RESOURCE variable = ram_runbit core = XPM_MEMORY uram
+#endif
+
+        int cntInt = 0;
+
+        ANS_runbitram(start[i], end[i], strm_ac_token_reverse, codes, context_map, is_dc, dc_context_map, strm_start,
+                      strm_end, strm_cntInt, cntInt, ram_runbit, strm_last_state);
+
+        ANS_read_ram(cntInt, ram_runbit, strm_runbit);
+    }
+}
+
+// ------------------------------------------------------------
+
+void ANS_Rpushbit( // input
+    const int start,
+    const int end,
+    hls::stream<hls_Token_bits>& strm_token_bit,
+    hls_Runbit_t ram_runbit[hls_kANSBufferSize >> 1],
+    int cntInt,
+    uint32_t last_state,
+    // output
+    uint32_t& pos,
+    // uint32_t& num_extra_bits,
+
+    uint8_t& cnt_buffer,
+    uint16_t& reg_buffer,
+    // hls_PikImageSizeInfo& pik_info,
+    hls::stream<uint16_t>& strm_pos_byte,
+    hls::stream<bool>& strm_ac_e) { // structure
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+    // clang-format off
+	hls::stream< hls_Runbit_t2 > strm_runbit;
+#pragma HLS RESOURCE  	  variable = strm_runbit core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_runbit depth = 32
+    // clang-format on
+
+    ANS_read_ram(cntInt, ram_runbit, strm_runbit);
+
+    ANS_enc_Pushbit2(start, end, strm_token_bit, strm_runbit, cntInt, last_state, pos, cnt_buffer, // num_extra_bits,
+                     reg_buffer, strm_pos_byte, strm_ac_e);                                        // pik_info,
+}
+
+// pingpang ram_runbit
+void hls_InitEnd(const int total_token, int& ndataflow, int start[3], int end[3]) {
+#pragma HLS INLINE OFF
+    int cnt = 0;
+LOOP_CONST_END:
+    for (int i = 0; i < total_token; i += hls_kANSBufferSize) { // 0~65535 65536~..
+#pragma HLS PIPELINE II = 1
+        int left = total_token - i;
+        start[cnt] = i;
+        end[cnt] = (hls_kANSBufferSize <= left) ? (i + hls_kANSBufferSize) : total_token;
+        cnt++;
+    }
+    ndataflow = cnt;
+}
+
+void ANS_enc_Pushbit3( // input
+    const int ndataflow,
+    hls::stream<int>& strm_start,
+    hls::stream<int>& strm_end,
+    hls::stream<hls_Token_bits>& strm_token_bit,
+    hls::stream<hls_Runbit_t2>& strm_runbit,
+    hls::stream<int>& strm_cntInt,
+    hls::stream<uint32_t>& strm_last_state,
+    // output
+    uint32_t& pos,
+    uint8_t& cnt_buffer,
+    uint16_t& reg_buffer,
+    hls::stream<uint16_t>& strm_pos_byte,
+    hls::stream<bool>& strm_ac_e) {
+PUSH_LOOP_ANSBUFFER:
+    for (int i = 0; i < ndataflow; i++) { // 0~65535 65536~..
+#pragma HLS DATAFLOW
+        int start = strm_start.read();
+        int end = strm_end.read();
+        int cntInt = strm_cntInt.read();
+        uint32_t last_state = strm_last_state.read();
+
+        ANS_enc_Pushbit2(start, end, strm_token_bit, strm_runbit, cntInt, last_state, pos,
+                         cnt_buffer, // num_extra_bits,
+                         reg_buffer, strm_pos_byte, strm_ac_e);
+    }
+}
+// ------------------------------------------------------------
+// pingpang ram_runbit
+void XAcc_WriteTokens_wapper3(
+
+    // input
+    const int total_token,
+    const int ndataflow,
+    const int start[3],
+    const int end[3],
+    hls::stream<hls_Token_symb>& strm_ac_token_reverse,
+    hls::stream<hls_Token_bits>& strm_token_bit,
+    hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][hls_alphabet_size],
+    uint8_t ac_static_context_map[hls_kNumContexts], // table
+    const bool is_dc,
+    uint8_t dc_context_map[MAX_NUM_COLOR],
+
+    // output
+    uint32_t& ans_pos,
+    uint8_t& cnt_buffer,
+    uint16_t& reg_buffer,
+    hls::stream<uint16_t>& strm_ac_byte,
+    hls::stream<bool>& strm_ac_e) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    // clang-format off
+		hls::stream< hls_Runbit_t2 > strm_runbit;
+#pragma HLS DATA_PACK 	  variable = strm_runbit
+#pragma HLS RESOURCE  	  variable = strm_runbit core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_runbit depth = 32
+			   hls::stream<uint32_t> strm_last_state;
+#pragma HLS STREAM    	  variable = strm_last_state depth = 32
+					hls::stream<int> strm_cntInt;
+#pragma HLS STREAM    	  variable = strm_cntInt depth = 32
+					hls::stream<int> strm_start;
+#pragma HLS STREAM    	  variable = strm_start depth = 32
+					hls::stream<int> strm_end;
+#pragma HLS STREAM    	  variable = strm_end depth = 32
+    // clang-format on
+
+    ANS_runbitram2(ndataflow, start, end, strm_ac_token_reverse, hls_codes, ac_static_context_map, is_dc,
+                   dc_context_map, strm_start, strm_end, strm_cntInt, strm_runbit, strm_last_state);
+
+    ANS_enc_Pushbit3(ndataflow, strm_start, strm_end, strm_token_bit, strm_runbit, strm_cntInt, strm_last_state,
+                     ans_pos, cnt_buffer, reg_buffer, strm_ac_byte, strm_ac_e); // pik_info,
+}
+
+// ------------------------------------------------------------
+// pingpang ram_runbit
+void hls_WriteTokensTop(
+
+    // input
+    const int total_token,
+    hls::stream<hls_Token_symb>& strm_ac_token_reverse,
+    hls::stream<hls_Token_bits>& strm_token_bit,
+    hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][hls_alphabet_size],
+    uint8_t ac_static_context_map[hls_kNumContexts], // table
+    const bool is_dc,
+    uint8_t dc_context_map[MAX_NUM_COLOR],
+
+    // output
+    int& len_ac,
+    hls::stream<uint16_t>& strm_ac_byte,
+    hls::stream<bool>& strm_ac_e) {
+#pragma HLS INLINE OFF
+
+    uint32_t ans_pos = 0;
+    uint16_t reg_buffer = 0;
+    uint8_t cnt_buffer = 0;
+    int ndataflow;
+    int start[3];
+    int end[3]; // 3 is Empirical values measured from groups with a lot of detail
+    hls_InitEnd(total_token, ndataflow, start, end);
+
+    XAcc_WriteTokens_wapper3(total_token, ndataflow, start, end, strm_ac_token_reverse, strm_token_bit, hls_codes,
+                             ac_static_context_map, is_dc, dc_context_map, ans_pos, cnt_buffer, reg_buffer,
+                             strm_ac_byte, strm_ac_e);
+
+    if (cnt_buffer != 0) {
+        strm_ac_byte.write(reg_buffer);
+        strm_ac_e.write(false);
+    }
+    strm_ac_e.write(true);
+
+    len_ac = (ans_pos + 7) >> 3;
+}
diff --git a/codec/L2/demos/pikEnc/kernel/kernel3/build_cluster.cpp b/codec/L2/demos/pikEnc/kernel/kernel3/build_cluster.cpp
new file mode 100755
index 0000000000..87dd916247
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/kernel3/build_cluster.cpp
@@ -0,0 +1,637 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/build_cluster.hpp"
+
+struct hls_HistogramPair {
+    uint32_t idx1;
+    uint32_t idx2;
+    double cost_combo;
+    double cost_diff;
+};
+
+float hls_ANSPopulationCost(const hist_t* data, int alphabet_size, int total_count) {
+#pragma HLS INLINE OFF
+
+    static hls::stream<float> strm_fsub;
+#pragma HLS RESOURCE variable = strm_fsub core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_fsub depth = 1024
+    static hls::stream<float> strm_entropy_bits;
+#pragma HLS RESOURCE variable = strm_entropy_bits core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_entropy_bits depth = 32
+    int num = 0;
+    float fsub;
+
+    if (total_count == 0) {
+        return 7;
+    }
+
+    float entropy_bits = total_count * hls_ANS_LOG_TAB_SIZE;
+    int histogram_bits = 0;
+    int count = 0;
+    int length = 0;
+
+    if (total_count > hls_ANS_TAB_SIZE) {
+        uint64_t total = total_count;
+        for (int i = 0; i < alphabet_size; ++i) {
+            if (data[i] > 0) {
+                ++count;    // 25    num_nz
+                length = i; // 24 max_nz_symbol
+            }
+        }
+        if (count == 1) {
+            return 7;
+        }
+        ++length;                                                                // 25
+        const uint64_t max0 = (total * length) >> hls_ANS_LOG_TAB_SIZE;          // 184
+        const uint64_t max1 = (max0 * length) >> hls_ANS_LOG_TAB_SIZE;           // 4
+        const uint32_t min_base = (total + max0 + max1) >> hls_ANS_LOG_TAB_SIZE; // 7
+        total += min_base * count;                                               // 7735
+        const int64_t kFixBits = 32;
+        const int64_t kFixOne = 1LL << kFixBits;                      // 4294967296
+        const int64_t kDescaleBits = kFixBits - hls_ANS_LOG_TAB_SIZE; // 22
+        const int64_t kDescaleOne = 1LL << kDescaleBits;              // 4194304
+        const int64_t kDescaleMask = kDescaleOne - 1;
+        const uint32_t mult = kFixOne / total;  /// 555264
+        const uint32_t error = kFixOne % total; // 256
+        uint32_t cumul = error;
+        if (error < kDescaleOne) {
+            cumul += (kDescaleOne - error) >> 1; // 2097280
+        }
+
+        _XF_IMAGE_PRINT("---cobo:  total = %d \n", total_count);
+
+        if (data[0] > 0) { // 870
+            uint64_t c = (uint64_t)(data[0] + min_base) * mult + cumul;
+            _XF_IMAGE_PRINT("data[0]= %d, c = %ld\n", (int)(data[0]), (c >> kDescaleBits));
+            float log2count = hls_FastLog2(c >> kDescaleBits);
+            // entropy_bits -= data[0] * log2count;
+            fsub = data[0] * log2count;
+            strm_fsub.write((-1 * fsub));
+
+            cumul = c & kDescaleMask; // 2524544
+
+            num++;
+        }
+        _XF_IMAGE_PRINT("\n : entropy_bits = %f\n", (entropy_bits));
+
+        for (int i = 1; i < length; ++i) {
+#pragma HLS PIPELINE II = 1
+            if (data[i] > 0) {
+                uint64_t c = (uint64_t)(data[i] + min_base) * mult + cumul;
+                _XF_IMAGE_PRINT("data[%d]= %d, c = %ld\n", i, (int)(data[i]), (c >> kDescaleBits));
+
+                float log2count = hls_FastLog2(c >> kDescaleBits); // 6.49
+                int log2floor = static_cast<int>(log2count);       // 6
+                // entropy_bits -= data[i] * log2count;//65277
+                fsub = data[i] * log2count;
+                strm_fsub.write((-1 * fsub));
+                num++;
+
+                // when use ap_uint<16>
+                // log2count from 9.27 to 9 make the fsub is smaller, then the return is
+                // bigger to all
+                // which is misable
+                _XF_IMAGE_PRINT("log2count= %f, log2floor = %d\n", log2count, (log2floor));
+
+                histogram_bits += log2floor;                              // 6
+                histogram_bits += hls_kLogCountBitLengths[log2floor + 1]; // 6+3
+                cumul = c & kDescaleMask;
+
+            } else {
+                histogram_bits += hls_kLogCountBitLengths[0];
+            }
+        } // end for
+
+        _XF_IMAGE_PRINT("\n");
+
+    } else {
+        float log2norm = hls_ANS_LOG_TAB_SIZE - hls_FastLog2(total_count);
+        if (data[0] > 0) {
+            float log2count = hls_FastLog2(data[0]) + log2norm;
+            // entropy_bits -= data[0] * log2count;
+            fsub = data[0] * log2count;
+            strm_fsub.write((-1 * fsub));
+            num++;
+
+            length = 0;
+            ++count;
+        }
+        for (int i = 1; i < alphabet_size; ++i) {
+#pragma HLS PIPELINE II = 1
+            if (data[i] > 0) {
+                float log2count = hls_FastLog2(data[i]) + log2norm;
+                int log2floor = static_cast<int>(log2count);
+                // entropy_bits -= data[i] * log2count;
+                fsub = data[i] * log2count;
+                strm_fsub.write((-1 * fsub));
+                num++;
+
+                if (log2floor >= hls_ANS_LOG_TAB_SIZE) {
+                    log2floor = hls_ANS_LOG_TAB_SIZE - 1;
+                }
+                histogram_bits += (log2floor + 1) >> 1; // GetPopulationCountPrecision(log2floor);
+                histogram_bits += hls_kLogCountBitLengths[log2floor + 1];
+                length = i;
+                ++count;
+            } else {
+                histogram_bits += hls_kLogCountBitLengths[0];
+            }
+        }
+        ++length;
+    }
+
+    if (num != 0) {
+        num++;
+        fsub = total_count * hls_ANS_LOG_TAB_SIZE;
+        strm_fsub.write((fsub));
+    } else {
+        num = 0;
+    }
+
+    ADD_FP_strm(num, strm_fsub, strm_entropy_bits);
+
+    if (num != 0) {
+        entropy_bits = strm_entropy_bits.read();
+    }
+
+    if (count == 1) {
+        return 7;
+    }
+
+    if (count == 2) {
+        return static_cast<int>(entropy_bits) + 1 + 12 + hls_ANS_LOG_TAB_SIZE;
+    }
+
+    uint8_t tmp = alphabet_size - 1;
+    int max_bits = 1 + (tmp == 0 ? -1 : hls_Log2FloorNonZero_32b((int)tmp));
+    histogram_bits += max_bits;
+
+    _XF_IMAGE_PRINT("\n histogram_bits= %d, entropy_bits = %d\n", (int)(histogram_bits),
+                    (static_cast<int>(entropy_bits)));
+
+    return histogram_bits + static_cast<int>(entropy_bits) + 1;
+}
+
+inline float hls_ClusterCostDiff(int size_a, int size_b) {
+    int size_c = size_a + size_b;
+    return size_a * hls_FastLog2(size_a) + size_b * hls_FastLog2(size_b) - size_c * hls_FastLog2(size_c);
+}
+
+inline bool comparePair(const hls_HistogramPair& p1, const hls_HistogramPair& p2) {
+    if (p1.cost_diff != p2.cost_diff) {
+        return p1.cost_diff > p2.cost_diff;
+    }
+#ifndef __SYNTHESIS__
+    return std::abs(int(p1.idx1 - p1.idx2)) > std::abs(int(p2.idx1 - p2.idx2));
+#else
+    return hls::abs(p1.idx1 - p1.idx2) > hls::abs(p2.idx1 - p2.idx2);
+#endif
+}
+
+void hls_CompareAndPushToQueue(const hist_t hls_clustgrams[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                               const uint16_t total_count[MAX_NUM_COLOR],
+                               const int max_loop,
+                               const uint8_t max_nz_symbol[MAX_NUM_COLOR],
+
+                               const int cluster_size[MAX_NUM_COLOR],
+                               const float bit_cost[MAX_NUM_COLOR],
+                               int idx1,
+                               int idx2,
+                               bool& pair_is_empty,
+                               int& cnt,
+                               hls_HistogramPair pairs[MAX_NUM_COLOR] // for {01}{02}{12}
+                               ) {
+#pragma HLS INLINE OFF
+
+    if (idx1 == idx2) {
+        return;
+    }
+    if (idx2 < idx1) {
+        int t = idx2;
+        idx2 = idx1;
+        idx1 = t;
+    }
+    bool store_pair = false;
+    hls_HistogramPair p;
+    p.idx1 = idx1;
+    p.idx2 = idx2;
+    _XF_IMAGE_PRINT("------cluster_size[idx1]=%d, cluster_size[idx2]=%d\n", cluster_size[idx1], cluster_size[idx2]);
+    p.cost_diff = 0.5f * hls_ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+    _XF_IMAGE_PRINT("------cost_diff[%d]=%f\n", p.cost_diff);
+    p.cost_diff -= bit_cost[idx1];
+    p.cost_diff -= bit_cost[idx2];
+
+    _XF_IMAGE_PRINT("-bit_cost[%d]=%f, bit_cost[%d]=%f\n", idx1, bit_cost[idx1], idx2, bit_cost[idx2]);
+
+    if (total_count[idx1] == 0) {
+        p.cost_combo = bit_cost[idx2];
+        store_pair = true;
+    } else if (total_count[idx2] == 0) {
+        p.cost_combo = bit_cost[idx1];
+        store_pair = true;
+    } else {
+        double zero = 0.0f;
+
+        float threshold;
+#ifndef __SYNTHESIS__
+        threshold = pair_is_empty ? std::numeric_limits<float>::max() : hls::max(zero, pairs[0].cost_diff);
+#else
+        threshold = hls::max(zero, pairs[0].cost_diff);
+#endif
+        // Histogram combo = out[idx1];
+        // combo.AddHistogram(out[idx2]);// total = 3780*2
+        // float cost_combo = combo.hls_PopulationCost();//31914  27268  31977 46566
+        hist_t hls_clustgrams12[MAX_ALPHABET_SIZE];
+
+        for (int n = 0; n < max_loop; ++n) { // 0~255
+#pragma HLS PIPELINE II = 1
+            hls_clustgrams12[n] = hls_clustgrams[idx1][n] + hls_clustgrams[idx2][n];
+        }
+
+        uint16_t total_count12 = total_count[idx1] + total_count[idx2];
+        int combo_size = hls::max(max_nz_symbol[idx1], max_nz_symbol[idx2]);
+        _XF_IMAGE_PRINT("-combo_size=%d, total_count12=%d\n", combo_size, (int)total_count12);
+        float cost_combo = hls_ANSPopulationCost(hls_clustgrams12, combo_size, (int)total_count12);
+
+        _XF_IMAGE_PRINT("-cost_combo=%f, p.cost_diff=%f, threshold=%f\n", cost_combo, p.cost_diff, threshold);
+        if (pair_is_empty || (cost_combo + p.cost_diff < threshold)) { // 2116  -25   -29266
+            // threshold max  max?    0
+            p.cost_combo = cost_combo;
+            store_pair = true;
+            pair_is_empty = false;
+        }
+    }
+    if (store_pair) {
+        p.cost_diff += p.cost_combo;
+
+        if (cnt == 0) {
+            pairs[0].idx1 = p.idx1;
+            pairs[0].idx2 = p.idx2;
+            pairs[0].cost_diff = p.cost_diff;
+            pairs[0].cost_combo = p.cost_combo;
+            cnt++;
+        } else {
+            bool smaller = comparePair(pairs[cnt - 1], p);
+            if (smaller) {
+                if (cnt == 1) {
+                    pairs[cnt].idx1 = pairs[cnt - 1].idx1;
+                    pairs[cnt].idx2 = pairs[cnt - 1].idx2;
+                    pairs[cnt].cost_diff = pairs[cnt - 1].cost_diff;
+                    pairs[cnt].cost_combo = pairs[cnt - 1].cost_combo;
+                } else {
+                    pairs[cnt].idx1 = pairs[cnt - 1].idx1;
+                    pairs[cnt].idx2 = pairs[cnt - 1].idx2;
+                    pairs[cnt].cost_diff = pairs[cnt - 1].cost_diff;
+                    pairs[cnt].cost_combo = pairs[cnt - 1].cost_combo;
+                    pairs[cnt - 1].idx1 = pairs[cnt - 2].idx1;
+                    pairs[cnt - 1].idx2 = pairs[cnt - 2].idx2;
+                    pairs[cnt - 1].cost_diff = pairs[cnt - 2].cost_diff;
+                    pairs[cnt - 1].cost_combo = pairs[cnt - 2].cost_combo;
+                }
+                pairs[0].idx1 = p.idx1;
+                pairs[0].idx2 = p.idx2;
+                pairs[0].cost_diff = p.cost_diff;
+                pairs[0].cost_combo = p.cost_combo;
+            } else {
+                pairs[cnt].idx1 = p.idx1;
+                pairs[cnt].idx2 = p.idx2;
+                pairs[cnt].cost_diff = p.cost_diff;
+                pairs[cnt].cost_combo = p.cost_combo;
+            }
+            cnt++;
+        }
+    }
+}
+
+void smallCase_AddHistogram(const hist_t hls_clustgrams[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                            uint16_t total_count[MAX_NUM_COLOR],
+                            const int max_symbol,
+                            hist_t hls_clustgrams_out[hls_kNumStaticContexts][MAX_ALPHABET_SIZE]) {
+#pragma HLS INLINE OFF
+
+#pragma HLS ARRAY_PARTITION variable = hls_clustgrams complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = total_count complete dim = 1
+
+    for (int i = 0; i < max_symbol; i++) {
+#pragma HLS PIPELINE II = 1
+        hist_t tmp0 = hls_clustgrams[0][i];
+        hist_t tmp1 = hls_clustgrams[1][i];
+        hist_t tmp2 = hls_clustgrams[2][i];
+
+        hls_clustgrams_out[0][i] = tmp0 + tmp1 + tmp2;
+
+        if (i == 0) {
+            uint16_t tmp3 = total_count[0];
+            uint16_t tmp4 = total_count[1];
+            uint16_t tmp5 = total_count[2];
+            total_count[0] = tmp3 + tmp4 + tmp5;
+        }
+
+    } // end for
+}
+
+void hls_HistogramCombine(const hist_t hls_clustgrams[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                          uint16_t total_count[MAX_NUM_COLOR],
+                          const int max_loop,
+                          uint8_t max_nz_symbol[MAX_NUM_COLOR], // rewrite by combo
+
+                          int cluster_size[MAX_NUM_COLOR], // 1,1,1
+                          float bit_cost[MAX_NUM_COLOR],
+                          uint8_t dc_context_map[MAX_NUM_COLOR],
+                          int symbols_size, // 3
+                          int max_clusters, // 64
+                          hist_t hls_clustgrams_out[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+
+                          int& num_clusters) {
+#pragma HLS INLINE OFF
+
+    float cost_diff_threshold = 0.0f;
+    int min_cluster_size = 1;
+
+    // Uniquify the list of symbols after merging empty clusters.
+
+    int clusters[MAX_NUM_COLOR];
+    int cnt = 0;
+
+    int sum_of_totals = 0;
+    int first_zero_pop_count_symbol = -1;
+    for (int i = 0; i < MAX_NUM_COLOR; ++i) { // 3
+#pragma HLS PIPELINE II = 1
+        if (total_count[dc_context_map[i]] == 0) { // 3780
+            // Merge the zero pop count histograms into one.
+            if (first_zero_pop_count_symbol == -1) {
+                first_zero_pop_count_symbol = dc_context_map[i];
+                clusters[cnt] = dc_context_map[i];
+                cnt++;
+            } else {
+                dc_context_map[i] = first_zero_pop_count_symbol;
+            }
+        } else {
+            // Insert all histograms with non-zero pop counts.
+            clusters[cnt] = dc_context_map[i];
+            sum_of_totals += total_count[dc_context_map[i]]; // 11340 = total_token
+            cnt++;
+        }
+    }
+
+    if (sum_of_totals < 160) { // 12288
+
+        *cluster_size = 1;
+        smallCase_AddHistogram(hls_clustgrams, total_count, max_loop, hls_clustgrams_out);
+        // Histogram combo = hls_clustgrams[dc_context_map[0]];
+        // for (int i = 1; i < MAX_NUM_COLOR; ++i) {
+        // combo.AddHistogram(hls_clustgrams[dc_context_map[i]]);
+        //}
+        // cluster_histograms[dc_context_map[0]] = combo;// todo
+        for (int i = 1; i < MAX_NUM_COLOR; ++i) { // 1,2
+#pragma HLS UNROLL
+            dc_context_map[i] = dc_context_map[0];
+        }
+        // return 1;
+        num_clusters = 1;
+        max_nz_symbol[0] = max_loop;
+
+    } else {
+        // cpoy
+        for (int c = 0; c < MAX_NUM_COLOR; ++c) {
+            // for (int n = 0; n < max_nz_symbol[c]; ++n) { // 0~255 // csim
+            for (int n = 0; n < 256; ++n) { // 0~255   //syn
+#pragma HLS PIPELINE II = 1
+                hls_clustgrams_out[c][n] = hls_clustgrams[c][n];
+            }
+        }
+
+        bool pair_is_empty = true;
+        int pairs_size = 0;
+        hls_HistogramPair pairs[MAX_NUM_COLOR];
+        // sequential
+        // std::vector<hls_HistogramPair> pairs;
+        for (int idx1 = 0; idx1 < cnt; ++idx1) {
+            for (int idx2 = idx1 + 1; idx2 < cnt; ++idx2) {
+                _XF_IMAGE_PRINT("org compair p.cost_diff to check (%d,%d)\n", idx1, idx2);
+                hls_CompareAndPushToQueue(hls_clustgrams, total_count, max_loop, max_nz_symbol, cluster_size, bit_cost,
+                                          clusters[idx1], clusters[idx2], pair_is_empty, pairs_size, pairs);
+            }
+        }
+
+        // debug
+        for (int i = 0; i < pairs_size; ++i) {
+            _XF_IMAGE_PRINT("\n debug : %d-pairs.cost_diff=%f, p.cost_combo=%f,idx1=%d,idx2=%d\n", i,
+                            pairs[i].cost_diff, pairs[i].cost_combo, pairs[i].idx1, pairs[i].idx2);
+        }
+
+        while (num_clusters > min_cluster_size) {
+            _XF_IMAGE_PRINT("pairs[0].cost_diff=%f, cost_diff_threshold=%f\n", pairs[0].cost_diff, cost_diff_threshold);
+            if (pairs[0].cost_diff < cost_diff_threshold) {
+                int best_idx1 = pairs[0].idx1; // 0
+                int best_idx2 = pairs[0].idx2; // 2
+                // cluster_histograms[best_idx1].AddHistogram(cluster_histograms[best_idx2]);
+
+                int hls_clustgrams12[MAX_ALPHABET_SIZE];
+
+                for (int n = 0; n < max_loop; ++n) { // 0~255 ii=n or unroll the 256*3 lut
+#pragma HLS PIPELINE
+                    hls_clustgrams_out[best_idx1][n] =
+                        hls_clustgrams_out[best_idx1][n] + hls_clustgrams_out[best_idx2][n];
+                }
+                if (max_nz_symbol[best_idx1] < max_nz_symbol[best_idx2]) {
+                    max_nz_symbol[best_idx1] = max_nz_symbol[best_idx2];
+                }
+                total_count[best_idx1] = total_count[best_idx1] + total_count[best_idx2];
+                bit_cost[best_idx1] = pairs[0].cost_combo; // 27268//28093
+
+                cluster_size[best_idx1] += cluster_size[best_idx2];
+                for (int i = 0; i < MAX_NUM_COLOR; ++i) {
+#pragma HLS PIPELINE II = 1
+                    if (dc_context_map[i] == best_idx2) { // 0,1,2->0,1,0
+                        dc_context_map[i] = best_idx1;
+                    }
+                }
+
+                if (best_idx2 == 1) { // others will be clusters[0] = 0; clusters[1] =
+                                      // 1;
+                    clusters[1] = 2;
+                    for (int n = 0; n < max_loop; ++n) {
+#pragma HLS PIPELINE II = 1
+                        hls_clustgrams_out[1][n] = hls_clustgrams_out[2][n];
+                    }
+                }
+
+                // because of the init is 3
+                pair_is_empty = true;
+                pairs_size = 0;
+                // clusters.resize;
+                // because init is 3 ,then if Remove pairs, left is 2
+                num_clusters = num_clusters - 1; // init is 3
+
+                for (int i = 0; i < num_clusters; ++i) {
+                    hls_CompareAndPushToQueue(hls_clustgrams_out, total_count, max_loop, max_nz_symbol, cluster_size,
+                                              bit_cost, best_idx1, clusters[i], pair_is_empty, pairs_size, pairs);
+                }
+            } else {
+                // to end while
+                const int kClustersLimit = 64;
+                min_cluster_size = kClustersLimit;
+            }
+        } // end while
+
+    } // endif
+}
+
+void CountDChisto_todo(const uint32_t histogram[MAX_ALPHABET_SIZE], // output
+                       uint8_t& max_nz_symbol,
+                       const uint16_t max_loop,
+
+                       uint16_t& total,
+                       int hls_counts[MAX_ALPHABET_SIZE],
+                       int hls_counts2[MAX_ALPHABET_SIZE],
+                       int hls_counts3[MAX_ALPHABET_SIZE],
+                       int hls_countFlat[MAX_ALPHABET_SIZE],
+                       int& num_symbols,                    // output
+                       int scode_symbols[MAX_ALPHABET_SIZE] // output
+                       ) {
+#pragma HLS INLINE
+    // const int table_size = 1 << ANS_LOG_TAB_SIZE;  // target sum / table size
+    // uint16_t total = 0;
+    total = 0; // change from 64 to 32 to 16 because there is max 2^16 tockens
+    int max_symbol = 0;
+    int symbol_count = 0;
+    const int flat_cnt = hls_ANS_TAB_SIZE / max_loop;
+
+    // 1. test if symbol_count > precision_table_size
+    // cnt = sym_cnt + 0_cnt
+    // total of the all the tockens
+    for (int n = 0; n < max_loop; ++n) { // 0~255
+#pragma HLS PIPELINE II = 1
+        total += histogram[n];
+        hls_counts[n] = histogram[n];
+        hls_counts2[n] = histogram[n];
+        hls_counts3[n] = histogram[n];
+        hls_countFlat[n] = flat_cnt;
+
+        if (histogram[n] > 0) { // the front 4 non-zero cnt is record
+            if (symbol_count < hls_kMaxNumSymbolsForSmallCode) {
+                scode_symbols[symbol_count] = n;
+            }
+            ++symbol_count; // sym_cnt is non-z cnt
+            max_symbol = n + 1;
+            _XF_IMAGE_PRINT("--historgrams[%d] = %d\n", n, (int)histogram[n]);
+        }
+    }
+
+    max_nz_symbol = max_symbol;
+    // count the symbol to num_symbols
+    num_symbols = symbol_count;
+}
+
+// dc and ctrl flied have the same 3 in_size for
+//{x,y,b} and {quenter, acstrategy, arparameter}
+void hls_ClusterHistograms_top(
+
+    const hist_t hls_clustgrams[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+    int max_histograms,
+    int& num_clusters,
+    uint8_t max_nz_symbol[MAX_NUM_COLOR],
+
+    hist_t hls_clustgrams_out[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+    uint8_t dc_context_map[MAX_NUM_COLOR] // histogram_symbols
+    ) {
+#pragma HLS INLINE OFF
+
+    // block_group_offsets is 0 forever in the origin codes;
+
+    const int in_size = MAX_NUM_COLOR; // num_contexts * num_blocks;// dc and ctrl
+                                       // flied have the same 3 in_size for
+
+    // init
+    int num_symbols[MAX_NUM_COLOR];
+    uint8_t max_loop = 0;
+    uint16_t total_count[MAX_NUM_COLOR];
+    float bit_cost[MAX_NUM_COLOR];
+    int cluster_size[MAX_NUM_COLOR] = {1, 1, 1}; // debug syn
+
+    int max_symbol = 0;
+    int symbol_count = 0;
+
+    uint16_t total_tmp = 0;
+    for (int c = 0; c < MAX_NUM_COLOR; ++c) {
+#pragma HLS UNROLL
+        total_count[c] = 0;
+    }
+
+    for (int c = 0; c < MAX_NUM_COLOR; ++c) {
+        for (int n = 0; n < MAX_ALPHABET_SIZE; ++n) { // 0~255
+#pragma HLS PIPELINE II = 1
+            total_count[c] += hls_clustgrams[c][n];
+
+            if (hls_clustgrams[c][n] > 0) { // the front 4 non-zero cnt is record
+
+                ++symbol_count; // sym_cnt is non-z cnt
+                max_symbol = n + 1;
+            }
+        }
+        max_nz_symbol[c] = max_symbol;
+        num_symbols[c] = symbol_count;
+        max_loop = (max_loop < max_symbol) ? max_symbol : max_loop;
+        // clear for next loop
+        max_symbol = 0;
+        symbol_count = 0;
+    }
+    _XF_IMAGE_PRINT("--- total = %d \n", total_count[0]);
+
+    for (int i = 0; i < MAX_NUM_COLOR; ++i) {
+        _XF_IMAGE_PRINT(" counts.size()= %d, total = %d\n", (int)(max_nz_symbol[i]), total_count[i]);
+        bit_cost[i] = hls_ANSPopulationCost(hls_clustgrams[i], (int)max_nz_symbol[i], (int)total_count[i]);
+        dc_context_map[i] = i;
+    }
+
+    // Collapse similar histograms within a block type.
+
+    static const int kMinClustersForHistogramRemap = 24;
+
+    num_clusters = 3;
+
+    // If we did not have block groups , we have to do one final round of
+    // clustering.
+
+    hls_HistogramCombine(hls_clustgrams, total_count, max_loop, max_nz_symbol,
+
+                         cluster_size, // 1,1,1
+                         bit_cost, dc_context_map,
+                         MAX_NUM_COLOR,  // 3
+                         max_histograms, // 64
+                         hls_clustgrams_out,
+
+                         num_clusters); // 64
+
+    if (dc_context_map[0] == 0 && (dc_context_map[1] == 0) && (dc_context_map[2] == 2)) {
+        dc_context_map[2] = 1;
+        max_nz_symbol[1] = max_nz_symbol[2];
+    }
+#ifndef __SYNTHESIS__
+    _XF_IMAGE_PRINT("num_clusters= %d\n", (int)(num_clusters));
+    _XF_IMAGE_PRINT("---cluster historgram after remap:\n");
+    for (int c = 0; c < num_clusters; c++) {
+        for (int i = 0; i < (max_nz_symbol[c]); i++) {
+            uint32_t tmp = hls_clustgrams_out[c][i];
+            _XF_IMAGE_PRINT("%d,", (int)(tmp));
+        }
+        _XF_IMAGE_PRINT("\n");
+    }
+
+#endif
+}
diff --git a/codec/L2/demos/pikEnc/kernel/kernel3/build_table_encode_histo.cpp b/codec/L2/demos/pikEnc/kernel/kernel3/build_table_encode_histo.cpp
new file mode 100755
index 0000000000..119afc79f8
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/kernel3/build_table_encode_histo.cpp
@@ -0,0 +1,1583 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/build_table_encode_histo.hpp"
+
+inline void hls_SmallestIncrement(int count, int& inc) {
+    int bits = (count == 0) ? -1 : (31 ^ __builtin_clz((uint32_t)count)); // logcount
+    int drop_bits = bits - ((bits + 1) >> 1);                             // GetPopulationCountPrecision(bits);
+    inc = (1 << drop_bits);
+}
+
+void hls_ANSBuildInfoTable_syn(const int counts[MAX_ALPHABET_SIZE],
+                               const int flat_counts[MAX_ALPHABET_SIZE],
+                               bool use_flat,
+                               int alphabet_size,
+                               uint16_t max_nz_symbol,
+
+                               int histogram[MAX_ALPHABET_SIZE],
+                               hls_ANSEncSymbolInfo* info) {
+#pragma HLS INLINE OFF
+
+    int Fs_start = 0;
+
+    if (use_flat) {
+        _XF_IMAGE_PRINT("--5 ANSBuildInfoTable - BuildAndStoreANS\n");
+        _XF_IMAGE_PRINT("--6 RewindStorage - BuildAndStoreANS\n");
+        _XF_IMAGE_PRINT("--7 EncodeFlatHistogram - BuildAndStoreANS\n");
+    }
+    for (int s = 0; s < alphabet_size; ++s) {
+#pragma HLS PIPELINE II = 1
+        histogram[s] = 0; // clean to 0 for next table
+
+        int tmp = use_flat ? flat_counts[s] : counts[s];
+        const uint32_t freq = (s < max_nz_symbol) ? tmp : 0;
+        info[s].freq_ = freq;
+        info[s].start_ = Fs_start;
+        Fs_start += freq;
+        // zyl:use ifreq_
+        //#ifdef USE_MULT_BY_RECIPROCAL
+        if (freq != 0) {
+            info[s].ifreq_ = ((1ull << hls_RECIPROCAL_PRECISION) + info[s].freq_ - 1) / info[s].freq_;
+        } else {
+            info[s].ifreq_ = 1; // shouldn't matter (symbol shouldn't occur), but...
+        }
+        //#endif
+    }
+}
+
+// ------------------------------------------------------------
+
+// ------------------------------------------------------------
+// 1. count the symbol to num_symbols
+// 2. jadge the max_symbol
+// 3. rebalance
+void CountNZSymbol(const uint32_t histogram[MAX_ALPHABET_SIZE], // output
+                   uint8_t max_nz_symbol[2],
+                   const uint16_t max_loop,
+
+                   uint16_t& total,
+                   int hls_counts[MAX_ALPHABET_SIZE],
+                   int hls_counts2[MAX_ALPHABET_SIZE],
+                   int hls_counts3[MAX_ALPHABET_SIZE],
+                   int hls_countFlat[MAX_ALPHABET_SIZE],
+                   int& num_symbols,                    // output
+                   int scode_symbols[MAX_ALPHABET_SIZE] // output
+                   ) {
+#pragma HLS INLINE OFF
+    // const int table_size = 1 << ANS_LOG_TAB_SIZE;  // target sum / table size
+    // uint16_t total = 0;
+    total = 0; // change from 64 to 32 to 16 because there is max 2^16 tockens
+    int max_symbol = 0;
+    int symbol_count = 0;
+    const int flat_cnt = hls_ANS_TAB_SIZE / max_loop;
+    const int re_add = hls_ANS_TAB_SIZE - flat_cnt * max_loop;
+
+    // 1. test if symbol_count > precision_table_size
+    // cnt = sym_cnt + 0_cnt
+    // total of the all the tockens
+    for (int n = 0; n < max_loop; ++n) { // 0~255
+#pragma HLS PIPELINE II = 1
+        total += histogram[n];
+        hls_counts[n] = histogram[n];
+        hls_counts2[n] = histogram[n];
+        hls_counts3[n] = histogram[n];
+        if (n < re_add) {
+            hls_countFlat[n] = flat_cnt + 1;
+        } else {
+            hls_countFlat[n] = flat_cnt;
+        }
+
+        if (histogram[n] > 0) { // the front 4 non-zero cnt is record
+            if (symbol_count < hls_kMaxNumSymbolsForSmallCode) {
+                scode_symbols[symbol_count] = n;
+            }
+            ++symbol_count; // sym_cnt is non-z cnt
+            max_symbol = n + 1;
+            _XF_IMAGE_PRINT("--historgrams[%d] = %d\n", n, (int)histogram[n]);
+        }
+    }
+
+    max_nz_symbol[0] = max_symbol;
+    max_nz_symbol[1] = max_symbol;
+    // count the symbol to num_symbols
+    num_symbols = symbol_count;
+}
+
+void ADD_FP_strm(const int num_in,
+                 hls::stream<float>& strm_in, // max 256 input
+
+                 hls::stream<float>& strm_sum) {
+#pragma HLS INLINE OFF
+
+    ap_uint<4> idx = 0;
+    float sum[16];
+    const int DEP = 16;
+    const int line = (num_in + 15) >> 4; //(num_in+15)/16
+
+    if (num_in != 0) {
+    INIT_ACC:
+        for (int t = 0; t < DEP; t++) {
+#pragma HLS UNROLL
+            sum[t] = 0.0f;
+        }
+
+    CALC_ELEMENTS:
+        for (int i = 0; i < line; i++) {
+            for (int j = 0; j < DEP; j++) {
+#pragma HLS PIPELINE II = 1
+#pragma HLS LOOP_TRIPCOUNT min = DEP max = DEP
+#pragma HLS DEPENDENCE variable = sum inter false
+                float in = 0.0f;
+                if ((i << 4) + j < num_in) {
+                    in = strm_in.read();
+                }
+                sum[j] += in;
+            }
+        }
+
+        // sum 16 data to 8
+        float alpha_sum_tmp0[8] = {0};
+        for (int k = 0; k < 8; k++) {
+#pragma HLS PIPELINE
+            alpha_sum_tmp0[k] = sum[2 * k] + sum[2 * k + 1];
+        }
+
+        // sum 8 data to 4
+        float alpha_sum_tmp1[4] = {0};
+        for (int k = 0; k < 4; k++) {
+#pragma HLS PIPELINE
+            alpha_sum_tmp1[k] = alpha_sum_tmp0[2 * k] + alpha_sum_tmp0[2 * k + 1];
+        }
+        // sum 4 data to 2
+        float alpha_sum_tmp2[2] = {0};
+        for (int k = 0; k < 2; k++) {
+#pragma HLS PIPELINE
+            alpha_sum_tmp2[k] = alpha_sum_tmp1[2 * k] + alpha_sum_tmp1[2 * k + 1];
+        }
+        // sum 2 data to 1
+        float sum_out = 0.0f;
+        sum_out = alpha_sum_tmp2[0] + alpha_sum_tmp2[1];
+
+        strm_sum.write(sum_out);
+
+    } // endif
+}
+
+void ADD_FP(const int num_in,
+            float block_in[MAX_ALPHABET_SIZE], // max 256 input
+
+            int& num_out,
+            float& strm_sum) {
+#pragma HLS INLINE OFF
+
+    ap_uint<4> idx = 0;
+    float sum[16];
+    const int DEP = 16;
+    const int line = (num_in + 15) >> 4; //(num_in+15)/16
+
+    if (num_in != 0) {
+    INIT_ACC:
+        for (int t = 0; t < DEP; t++) {
+#pragma HLS UNROLL
+            sum[t] = 0.0f;
+        }
+
+    CALC_ELEMENTS:
+        for (int i = 0; i < line; i++) {
+            for (int j = 0; j < DEP; j++) {
+#pragma HLS PIPELINE II = 1
+#pragma HLS LOOP_TRIPCOUNT min = DEP max = DEP
+#pragma HLS DEPENDENCE variable = sum inter false
+                float in = 0.0f;
+                if ((i << 4) + j < num_in) {
+                    in = block_in[i * DEP + j];
+                }
+                sum[j] += in;
+            }
+        }
+
+        // sum 16 data to 8
+        float alpha_sum_tmp0[8] = {0};
+        for (int k = 0; k < 8; k++) {
+#pragma HLS PIPELINE
+            alpha_sum_tmp0[k] = sum[2 * k] + sum[2 * k + 1];
+        }
+
+        // sum 8 data to 4
+        float alpha_sum_tmp1[4] = {0};
+        for (int k = 0; k < 4; k++) {
+#pragma HLS PIPELINE
+            alpha_sum_tmp1[k] = alpha_sum_tmp0[2 * k] + alpha_sum_tmp0[2 * k + 1];
+        }
+        // sum 4 data to 2
+        float alpha_sum_tmp2[2] = {0};
+        for (int k = 0; k < 2; k++) {
+#pragma HLS PIPELINE
+            alpha_sum_tmp2[k] = alpha_sum_tmp1[2 * k] + alpha_sum_tmp1[2 * k + 1];
+        }
+        // sum 2 data to 1
+        float sum_out = 0.0f;
+        sum_out = alpha_sum_tmp2[0] + alpha_sum_tmp2[1];
+
+        strm_sum = sum_out;
+
+    } // endif
+    num_out = num_in;
+}
+
+void hls_Rebalance_by_sum(const float targets[MAX_ALPHABET_SIZE],
+                          const int max_symbol,
+                          const int table_size, // 1024
+                          const int num_symbols,
+
+                          // hls::stream<float>& strm_sum_nonrounded,
+                          const float add_nround,
+                          int re_cnt1[MAX_ALPHABET_SIZE],
+                          int sum,
+
+                          int& omit_pos,
+                          int re_counts1[MAX_ALPHABET_SIZE],
+                          int re_counts2[MAX_ALPHABET_SIZE],
+                          int re_counts3[MAX_ALPHABET_SIZE],
+                          bool& mode) {
+#pragma HLS INLINE OFF
+
+    if (num_symbols > 1 && (num_symbols <= hls_ANS_TAB_SIZE)) {
+        float sum_nonrounded = 0.0f;
+        if (sum != 0) {
+            sum_nonrounded = add_nround; // strm_sum_nonrounded.read();
+        }
+        const float discount_ratio = (hls_ANS_TAB_SIZE - sum) / (hls_ANS_TAB_SIZE - sum_nonrounded);
+        assert(discount_ratio > 0);
+        assert(discount_ratio <= 1.0);
+        int remainder_pos = 0; // if all of them are handled in first loop
+        int remainder_log = -1;
+        int count;
+
+        // Invariant for minimize_error_of_sum == true:
+        // abs(sum - sum_nonrounded)
+        //   <= SmallestIncrement(max(targets[])) + max_symbol
+        for (int n = 0; n < max_symbol; ++n) { // 33
+#pragma HLS PIPELINE II = 1
+            if (targets[n] >= 1.0) {
+                // sum_nonrounded += targets[n];
+                count = static_cast<uint32_t>(targets[n] * discount_ratio); // truncate
+
+                // round
+                if (count == 0) count = 1;
+                if (count == table_size) count = table_size - 1;
+                // Round the count to the closest nonzero multiple of SmallestIncrement
+                // (when minimize_error_of_sum is false) or one of two closest so as to
+                // keep the sum as close as possible to sum_nonrounded.
+
+                int inc;
+                hls_SmallestIncrement(count, inc);
+
+                count -= count & (inc - 1);
+                // TODO(robryk): Should we rescale targets[n]?
+
+                const float target = targets[n];
+                if (count == 0 || (target > count + (inc >> 1) && (count + inc < table_size))) {
+                    count += inc;
+                }
+                sum += count;
+
+                re_counts1[n] = count; // duplicate
+                re_counts2[n] = count; // duplicate
+                re_counts3[n] = count; // duplicate
+
+                const int count_log = (31 ^ __builtin_clz((uint32_t)count)); // hls_Log2FloorNonZero_32b(re_counts1[n]);
+                if (count_log > remainder_log) {
+                    remainder_pos = n;
+                    remainder_log = count_log;
+                }
+
+            } else {
+                int tmp = re_cnt1[n];
+                re_counts1[n] = tmp; // duplicate
+                re_counts2[n] = tmp; // duplicate
+                re_counts3[n] = tmp; // duplicate
+            }
+        }
+
+        for (int j = 0; j < max_symbol; ++j) {
+            _XF_IMAGE_PRINT("--historgrams_norm[%d] = %d\n", j, (int)re_counts3[j]);
+        }
+
+        assert(remainder_pos != -1);
+        int tmp = re_counts1[remainder_pos] - sum + table_size;
+
+        _XF_IMAGE_PRINT("--remainder_pos = %d, tmp=%d \n", remainder_pos, (int)tmp);
+
+        re_counts1[remainder_pos] = tmp; // dup
+        re_counts2[remainder_pos] = tmp; // dup
+        re_counts3[remainder_pos] = tmp; // dup
+        omit_pos = remainder_pos;
+        mode = tmp > 0;
+
+    } else {
+        // strm_sum_nonrounded.read();//no read and no used
+        for (int n = 0; n < max_symbol; ++n) { // 33
+#pragma HLS PIPELINE II = 1
+            int tmp = re_cnt1[n];
+            re_counts1[n] = tmp; // dup
+            re_counts2[n] = tmp; // dup
+            re_counts3[n] = tmp; // dup
+        }
+    }
+}
+
+void hls_RebalanceHistogram_minture(const float targets[MAX_ALPHABET_SIZE],
+                                    const int max_symbol,
+                                    const int table_size, // 1024
+                                    const int num_symbols,
+                                    // const ap_uint<16> total,
+                                    int sum,
+                                    float sum_nonrounded,
+                                    const float discount_ratio,
+                                    int& omit_pos,
+                                    int re_counts1[MAX_ALPHABET_SIZE],
+                                    int re_counts2[MAX_ALPHABET_SIZE],
+                                    int re_counts3[MAX_ALPHABET_SIZE],
+                                    bool& mode) {
+    if (num_symbols > 1 && (num_symbols <= hls_ANS_TAB_SIZE)) {
+        const float discount_ratio = (hls_ANS_TAB_SIZE - sum) / (hls_ANS_TAB_SIZE - sum_nonrounded);
+        assert(discount_ratio > 0);
+        assert(discount_ratio <= 1.0);
+        // the input count is reblance once to form 303 to 72, 3 to 1
+        // the target[] not change but the fianl targets change to sum-sum_round.
+
+        int remainder_pos = 0; // if all of them are handled in first loop
+        int remainder_log = -1;
+
+        // Invariant for minimize_error_of_sum == true:
+        // abs(sum - sum_nonrounded)
+        //   <= SmallestIncrement(max(targets[])) + max_symbol
+        for (int n = 0; n < max_symbol; ++n) { // 33
+#pragma HLS PIPELINE II = 1
+            if (targets[n] >= 1.0) {
+                sum_nonrounded += targets[n];
+                re_counts1[n] = static_cast<uint32_t>(targets[n] * discount_ratio); // truncate
+
+                // round
+                if (re_counts1[n] == 0) re_counts1[n] = 1;
+                if (re_counts1[n] == table_size) re_counts1[n] = table_size - 1;
+                // Round the count to the closest nonzero multiple of SmallestIncrement
+                // (when minimize_error_of_sum is false) or one of two closest so as to
+                // keep the sum as close as possible to sum_nonrounded.
+
+                int inc;
+                hls_SmallestIncrement(re_counts1[n], inc);
+
+                re_counts1[n] -= re_counts1[n] & (inc - 1);
+                // TODO(robryk): Should we rescale targets[n]?
+
+                const float target = (sum_nonrounded - sum);
+                if (re_counts1[n] == 0 || (target > re_counts1[n] + (inc >> 1) && (re_counts1[n] + inc < table_size))) {
+                    re_counts1[n] += inc;
+                }
+                sum += re_counts1[n];
+                re_counts2[n] = re_counts1[n];
+                re_counts3[n] = re_counts1[n];
+
+                const int count_log =
+                    (31 ^ __builtin_clz((uint32_t)re_counts1[n])); // hls_Log2FloorNonZero_32b(re_counts1[n]);
+                if (count_log > remainder_log) {
+                    remainder_pos = n;
+                    remainder_log = count_log;
+                }
+            }
+        }
+
+        //    for (int j = 0; j < 100; ++j) {
+        //    	_XF_IMAGE_PRINT("--historgrams_norm[%d] = %d\n", j,
+        //    (int)re_counts1[j] );
+        //    }
+
+        assert(remainder_pos != -1);
+        re_counts1[remainder_pos] -= sum - table_size;
+        re_counts2[remainder_pos] = re_counts1[remainder_pos];
+        re_counts3[remainder_pos] = re_counts1[remainder_pos];
+        omit_pos = remainder_pos;
+        mode = re_counts1[remainder_pos] > 0;
+
+    } // endif
+}
+
+void ComputeTarget(int hls_counts[MAX_ALPHABET_SIZE],
+                   uint8_t max_nz_symbol,
+
+                   uint16_t total,
+                   int& num_symbols,
+                   const int scode_symbol_0,
+
+                   int& sum,
+                   // hls::stream<float> &strm_target,
+                   float add_targets[MAX_ALPHABET_SIZE],
+                   float targets[MAX_ALPHABET_SIZE],
+                   int re_cnt1[MAX_ALPHABET_SIZE] // output
+                   ) {
+#pragma HLS INLINE OFF
+
+    const float norm = 1.f * hls_ANS_TAB_SIZE / total;
+    ap_uint<16> total_ap = total;
+    uint16_t remd = total_ap(9, 0);
+    bool is_remd = total_ap(9, 0) != 0;
+    uint8_t diff = total_ap(15, 10);
+
+    for (int n = 0; n < max_nz_symbol; ++n) { // round the <1 to 1
+#pragma HLS PIPELINE II = 1
+
+        if (num_symbols == 0) {
+            re_cnt1[n] = 0;
+        } else if (num_symbols == 1) {
+            int tmp_counts = (n == scode_symbol_0) ? hls_ANS_TAB_SIZE : 0;
+            re_cnt1[n] = tmp_counts;
+        } else if (num_symbols > hls_ANS_TAB_SIZE) {
+            _XF_IMAGE_PRINT("Too many entries in an ANS histogram");
+        } else {
+            int tmp = hls_counts[n];
+            float target_tmp = norm * tmp;
+            targets[n] = target_tmp;
+
+            if (tmp != 0 && tmp < (is_remd ? (diff + 1) : diff)) {
+                re_cnt1[n] = 1; // round count which is small than norm
+                add_targets[sum] = target_tmp;
+                sum += 1; // add to sum
+            } else if (tmp == 0) {
+                re_cnt1[n] = 0; // clean the ram for the 0 num in the org_count
+            }
+        }
+    }
+}
+
+void CopyRams(uint8_t max_nz_symbol,
+              float targets[MAX_ALPHABET_SIZE],
+              int re_cnt[MAX_ALPHABET_SIZE],
+
+              float targets_sub[MAX_ALPHABET_SIZE],
+              int re_cnt_sub[MAX_ALPHABET_SIZE]) {
+    for (int n = 0; n < max_nz_symbol; ++n) { // round the <1 to 1
+#pragma HLS PIPELINE II = 1
+        targets_sub[n] = targets[n];
+        re_cnt_sub[n] = re_cnt[n];
+    }
+}
+
+void SetTargetsWithRebalance(bool& mode, // todo
+                             int hls_counts[MAX_ALPHABET_SIZE],
+                             int& omit_pos,
+                             uint8_t max_nz_symbol,
+
+                             uint16_t total,
+                             int& num_symbols,
+                             const int scode_symbol_0,
+
+                             int re_counts1[MAX_ALPHABET_SIZE], // output
+                             int re_counts2[MAX_ALPHABET_SIZE], // output
+                             int re_counts3[MAX_ALPHABET_SIZE]  // output
+                             ) {
+// round to ANS_TAB_SIZE==1024
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+    // for the loop is small, no need to dataflow and pingpang
+
+    // 1. two rebalence mechanism to rebalence the histogram
+
+    float targets[MAX_ALPHABET_SIZE]; // hard code
+    float add_targets[MAX_ALPHABET_SIZE];
+#pragma HLS RESOURCE variable = add_targets core = RAM_2P_BRAM
+
+    float add_nround;
+    int sum_sub;
+    //#pragma HLS ARRAY_PARTITION variable=targets complete
+    int sum = 0;
+    int re_cnt[MAX_ALPHABET_SIZE];        // pingpang ram
+    float targets_sub[MAX_ALPHABET_SIZE]; //
+    int re_cnt_sub[MAX_ALPHABET_SIZE];
+
+    hls::stream<float> strm_target;
+#pragma HLS RESOURCE variable = strm_target core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_target depth = 1024
+
+    // this module is sequencial because of discount_ratio to iterate re_counts
+    ComputeTarget(hls_counts, max_nz_symbol, total, num_symbols, scode_symbol_0,
+
+                  sum, add_targets, targets, re_cnt); // strm_target,
+
+    hls::stream<float> strm_sum_nonrounded;
+#pragma HLS RESOURCE variable = strm_sum_nonrounded core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_sum_nonrounded depth = 32
+
+    // ADD_FP(sum, strm_target, strm_sum_nonrounded);//once for one func
+    ADD_FP(sum, add_targets, sum_sub, add_nround); // once for one func
+
+    CopyRams(max_nz_symbol, targets, re_cnt, targets_sub, re_cnt_sub);
+
+    hls_Rebalance_by_sum(targets_sub, max_nz_symbol, hls_ANS_TAB_SIZE, num_symbols, add_nround, re_cnt_sub,
+                         sum_sub, // strm_sum_nonrounded,
+                         omit_pos, re_counts1, re_counts2, re_counts3, mode);
+
+    // debug
+    for (int j = 0; j < max_nz_symbol; ++j) {
+        _XF_IMAGE_PRINT("--re_historgrams[%d] = %d\n", j, (int)re_counts3[j]);
+    }
+}
+
+void hls_StoreVarLenUint16_build(uint32_t n,
+                                 // size_t* storage_ix, uint8_t* storage,
+                                 int& num_bits,
+                                 int& num,
+                                 hls::stream<nbits_t>& strm_nbits,
+                                 hls::stream<uint16_t>& strm_bits) {
+#pragma HLS INLINE OFF
+    if (n == 0) {
+        hls_WriteBits_strm(1, 0, num_bits, num, strm_nbits, strm_bits);
+    } else {
+        int nbits;
+        for (int i = 0; i < 3; i++) {
+#pragma HLS PIPELINE
+            if (i == 0) {
+                hls_WriteBits_strm_nodepend(1, 1, strm_nbits, strm_bits);
+                nbits = hls_Log2FloorNonZero_32b(n);
+            } else if (i == 1) {
+                hls_WriteBits_strm_nodepend(4, nbits, strm_nbits, strm_bits);
+            } else {
+                hls_WriteBits_strm_nodepend(nbits, n - (1ULL << nbits), strm_nbits, strm_bits);
+
+                num_bits += 1 + 4 + nbits;
+                num += 2 + ((nbits == 0) ? 0 : 1);
+            }
+        }
+    }
+}
+
+// encode module ii will affected, because the use_flat flag!
+void hls_EncodeCounts(const int counts[MAX_ALPHABET_SIZE],
+                      const int alphabet_size,
+                      const int omit_pos,
+                      const int num_symbols,
+                      const int symbols[MAX_ALPHABET_SIZE],
+                      const uint8_t max_nz_symbol,
+                      bool do_encode,
+
+                      int& histo_bits,
+                      int& num,
+                      hls::stream<nbits_t>& strm_nbits,
+                      hls::stream<uint16_t>& strm_bits) { // encode module ii is not affected!
+
+#pragma HLS INLINE OFF
+
+    histo_bits = 0;
+    num = 0;
+    int same[MAX_ALPHABET_SIZE];
+#pragma HLS RESOURCE variable = same core = RAM_2P_LUTRAM
+
+    if (do_encode) {
+        _XF_IMAGE_PRINT("--tree size(num_symbols) = %d\n", (int)num_symbols);
+        if (num_symbols <= 2) { // Small tree
+
+            hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits); // Small tree marker to encode 1-2
+                                                                              // symbols.
+
+            if (num_symbols == 0) {
+                hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+                hls_StoreVarLenUint16_build(0, histo_bits, num, strm_nbits, strm_bits);
+
+            } else {
+                hls_WriteBits_strm(1, num_symbols - 1, histo_bits, num, strm_nbits, strm_bits);
+                for (int i = 0; i < num_symbols; ++i) {
+                    // hls_StoreVarLenUint16_build(symbols[i], histo_bits,  num,
+                    // strm_nbits, strm_bits);
+                    if (symbols[i] == 0) {
+                        hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+                    } else {
+                        hls_StoreVarLenUint16_build(symbols[i], histo_bits, num, strm_nbits, strm_bits);
+                    }
+                }
+            }
+            if (num_symbols == 2) {
+                hls_WriteBits_strm(hls_ANS_LOG_TAB_SIZE, counts[symbols[0]], histo_bits, num, strm_nbits, strm_bits);
+            }
+
+        } else { // non-small tree
+                 //		  ---W--- n_bits=1, bits=0
+                 //		  ---W--- n_bits=1, bits=0
+                 // hls_StoreVarLenUint16_build
+            //		  ---W--- n_bits=1, bits=1
+            //		  ---W--- n_bits=4, bits=0  =
+            // hls_Log2FloorNonZero_32b(max_nz_symbol - 3)
+            //		  ---W--- n_bits=3, bits=1  , pos=908
+
+            //		  ---W--- n_bits=3, bits=1, pos=911
+            //		  ---W--- n_bits=3, bits=2
+            //		  ---W--- n_bits=4, bits=3
+            //		  ---W--- n_bits=3, bits=1
+            //		  ---W--- n_bits=2, bits=0
+            //		  ---W--- n_bits=1, bits=0
+            //		  ---W--- n_bits=1, bits=1
+            //		  ---W--- n_bits=10, bits=256
+
+            // Mark non-small tree.
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+            // Mark non-flat histogram.
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+            // Since num_symbols >= 3, we know that length >= 3, therefore we encode
+            // length - 3.
+            _XF_IMAGE_PRINT("max_nz_symbol = %d\n", max_nz_symbol);
+            hls_StoreVarLenUint16_build(max_nz_symbol - 3, histo_bits, num, strm_nbits, strm_bits);
+
+            // todo Merge the first two loop to syn
+            // ------------------------------------------------------------
+            // init a RAM
+            // Precompute sequences for RLE encoding. Contains the number of identical
+            // values starting at a given index. Only contains the value at the first
+            // element of the series.
+            // std::vector<int> same(alphabet_size, 0);
+
+            same[0] = 0;
+            same[1] = 0;
+            //    for(int i = 0; i < max_nz_symbol; i++){
+            //    	same[i] = 0;
+            //    }
+            //    i = 1, 2, 3, 4, 5, 6, 7
+            // last = 0, 0, 3, 4, 4, 6, 7
+            int last = 0;
+            for (int i = 1; i < max_nz_symbol; i++) {
+#pragma HLS DEPENDENCE variable = same inter false
+#pragma HLS PIPELINE
+                // Store the sequence length once different symbol reached, or we're at
+                // the end, or the length is longer than we can encode, or we are at
+                // the omit_pos. We don't support including the omit_pos in an RLE
+                // sequence because this value may use a different amoung of log2 bits
+                // than standard, it is too complex to handle in the decoder.
+
+                same[i] = 0; // two port bram
+                if (counts[i] != counts[last] || i + 1 == alphabet_size || (i - last) >= 255 || i == omit_pos ||
+                    i == omit_pos + 1) {
+                    same[last] = (i - last);
+                    last = i + 1;
+                }
+            }
+
+            for (int j = 0; j < max_nz_symbol; ++j) {
+                _XF_IMAGE_PRINT("--same[%d] = %d\n", j, (int)same[j]);
+            }
+
+            // ------------------------------------------------------------
+            // init a RAM
+            uint8_t logcounts[MAX_ALPHABET_SIZE];
+#pragma HLS RESOURCE variable = logcounts core = RAM_2P_LUTRAM
+            uint8_t omit_log = 0;
+
+            for (int i = 0; i < max_nz_symbol; ++i) {
+#pragma HLS PIPELINE II = 1
+                assert(counts[i] <= hls_ANS_TAB_SIZE);
+                assert(counts[i] >= 0);
+                if (i == omit_pos) {
+                } else if (counts[i] > 0) {
+                    logcounts[i] = (31 ^ __builtin_clz((uint32_t)counts[i])) + 1;
+                    if (i < omit_pos) {
+                        omit_log = (omit_log > logcounts[i] + 1) ? omit_log : (logcounts[i] + 1);
+                    } else {
+                        omit_log = (omit_log > logcounts[i]) ? omit_log : (logcounts[i]);
+                    }
+                } else {
+                    logcounts[i] = 0;
+                }
+            }
+            logcounts[omit_pos] = omit_log;
+
+            // The logcount values are encoded with a static Huffman code.
+            static const int kMinReps = 4;
+            int rep = hls_ANS_LOG_TAB_SIZE + 1;
+
+        logcount_loop1:
+            for (int i = 0; i < max_nz_symbol; ++i) {
+#pragma HLS PIPELINE II = 4
+                if (i > 0 && same[i - 1] > kMinReps) {
+                    // Encode the RLE symbol and skip the repeated ones.
+                    hls_WriteBits_strm(hls_kLogCountBitLengths[rep], hls_kLogCountSymbols[rep],
+                                       // pos,storage,
+                                       histo_bits, num, strm_nbits, strm_bits);
+                    hls_WriteBits_strm(8, same[i - 1], histo_bits, num, strm_nbits, strm_bits);
+                    i += same[i - 1] - 2;
+
+                } else {
+                    hls_WriteBits_strm(hls_kLogCountBitLengths[logcounts[i]], hls_kLogCountSymbols[logcounts[i]],
+                                       histo_bits, num, strm_nbits, strm_bits);
+                }
+            }
+
+        logcount_loop2:
+            for (int i = 0; i < max_nz_symbol; ++i) {
+#pragma HLS PIPELINE II = 3
+                if (i > 0 && same[i - 1] > kMinReps) {
+                    // Skip symbols encoded by RLE.
+                    i += same[i - 1] - 2;
+
+                } else if (logcounts[i] > 1 && i != omit_pos) {
+                    int bitcount = (logcounts[i]) >> 1;
+                    int drop_bits = logcounts[i] - 1 - bitcount;
+                    hls_WriteBits_strm(bitcount, (counts[i] >> drop_bits) - (1 << bitcount), histo_bits, num,
+                                       strm_nbits, strm_bits);
+                }
+            }
+        } // end num_symbol if
+    }     // end pos if
+}
+
+void hls_EstimateDataBits( // debug
+    bool do_estimate,
+    const int* histogram,
+    const int counts[MAX_ALPHABET_SIZE],
+    const short num_symbol,
+    const short len, // alphabet_size
+    int& Estimate) {
+#pragma HLS INLINE OFF
+
+    float sum = 0.0f;
+    int total_histogram = 0;
+    int total_counts = 0;
+    Estimate = 0;
+
+    int num = 0;
+
+    hls::stream<float> strm_fsub;
+#pragma HLS RESOURCE variable = strm_fsub core = FIFO_BRAM
+#pragma HLS STREAM variable = strm_fsub depth = 1024
+    hls::stream<float> strm_sum_nonrounded;
+#pragma HLS RESOURCE variable = strm_sum_nonrounded core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_sum_nonrounded depth = 32
+
+    if (do_estimate) {
+        _XF_IMAGE_PRINT("--4 EstimateDataBits - BuildAndStoreANS\n");
+
+#pragma HLS DATAFLOW
+
+        for (int i = 0; i < len; ++i) { // alphabet_size
+#pragma HLS PIPELINE II = 1
+
+            if (num_symbol == 0) {
+                //}else if (num_symbol == 1){
+
+            } else {
+                total_histogram += histogram[i]; // already have 4096
+                total_counts += counts[i];       // already have 1024, rebalance from 4096 to 1024
+
+                if (histogram[i] > 0) {
+                    assert(counts[i] > 0);
+                    float tmp = hls_FastLog2(counts[i]); //
+                    float fsub = histogram[i] * tmp;
+                    strm_fsub.write((-1 * fsub));
+
+                    num++;
+                }
+            }
+        }
+
+        ADD_FP_strm(num, strm_fsub, strm_sum_nonrounded);
+
+        for (int i = 0; i < 1; ++i) {
+#pragma HLS PIPELINE II = 1
+            if (num != 0) {
+                sum = strm_sum_nonrounded.read();
+            }
+
+            if (num_symbol == 1) total_counts = hls_ANS_TAB_SIZE;
+
+            _XF_IMAGE_PRINT("-- total_histogram = %d, max_symb=%d, sum=%f\n", total_histogram, len, sum);
+            if (total_histogram > 0) {
+                assert(total_counts == hls_ANS_TAB_SIZE);
+                sum += total_histogram * hls_ANS_LOG_TAB_SIZE;
+            }
+            Estimate = static_cast<int>(sum + 1.0f);
+        }
+    } // endif
+}
+
+inline int hls_EstimateDataBitsFlat(const short len, const uint16_t total_histogram) {
+    const float flat_bits = hls_FastLog2(len); // 8
+    return static_cast<int>(total_histogram * flat_bits + 1.0);
+}
+
+void hls_WriteBitStreamWithConsume(hls::stream<int>& strm_num_pair,
+                                   hls::stream<nbits_t>& strm_nbits1,
+                                   hls::stream<uint16_t>& strm_bits1,
+
+                                   hls::stream<bool>& strm_use_flat,
+                                   hls::stream<int>& strm_num_fpair,
+                                   hls::stream<nbits_t>& strm_nbits2,
+                                   hls::stream<uint16_t>& strm_bits2,
+
+                                   int& pos,
+                                   uint8_t& byte_tail,
+                                   hls::stream<uint8_t>& strm_byte,
+                                   hls::stream<bool>& strm_histo_e) {
+#pragma HLS INLINE OFF
+
+    uint8_t ntail = pos & 7;
+    uint8_t n_byte = 0; // n bytes to be write out
+    nbits_t pair_nbits = 0;
+    uint16_t pair_bits;
+    nbits_t nbits1 = 0;
+    uint16_t bits1;
+    nbits_t nbits2 = 0;
+    uint16_t bits2;
+    ap_uint<32> buffer = byte_tail;
+    int cnt_pair = 0;
+    int cnt_cnsm = 0;
+    int cnt_max = 0;
+
+    // init
+    int num_pair1 = strm_num_pair.read();
+    int num_pair2 = strm_num_fpair.read();
+    const bool use_pair2 = strm_use_flat.read();
+
+    _XF_IMAGE_PRINT("--byte_tail = %d , pos=%d\n", byte_tail, pos);
+    _XF_IMAGE_PRINT("--num_pair = %d , num_fpair=%d\n", num_pair1, num_pair2);
+
+    int num_pair = use_pair2 ? num_pair2 : num_pair1;
+    int num_cnsm = use_pair2 ? num_pair1 : num_pair2;
+    int num_max = (num_pair1 > num_pair2) ? num_pair1 : num_pair2;
+
+    while (cnt_max < num_max + 1) { // loopn and tail// ii=2 is not affected
+#pragma HLS PIPELINE II = 1
+
+        if (n_byte == 0) { // update num to write
+
+            if (cnt_max < num_pair1) {
+                nbits1 = strm_nbits1.read();
+                bits1 = strm_bits1.read();
+            }
+            if (cnt_max < num_pair2) {
+                nbits2 = strm_nbits2.read();
+                bits2 = strm_bits2.read();
+            }
+
+            if (use_pair2) {
+                pair_nbits = nbits2;
+                pair_bits = bits2;
+            } else {
+                pair_nbits = nbits1;
+                pair_bits = bits1;
+            }
+
+            if (cnt_pair < num_pair) {
+                pos += pair_nbits;
+                n_byte = (ntail + pair_nbits) >> 3;
+                buffer(ntail + 16, ntail) = pair_bits;
+                ntail = (ntail + pair_nbits) & 7;
+            }
+
+            // n_byte = (ntail+pair_nbits)>>3;
+            cnt_pair++; // end here
+            cnt_max++;
+
+        } else { // write out
+
+            uint8_t byte = buffer(7, 0);
+
+            buffer = buffer >> 8;
+            strm_byte.write(byte);
+            strm_histo_e.write(false);
+            // num_byte++;
+            n_byte--;
+        }
+    } // end while
+
+    byte_tail = buffer(7, 0);
+}
+
+inline void hls_EncodeFlatHistogram(const int alphabet_size,
+                                    bool do_encode,
+                                    int& num_fpair,
+                                    hls::stream<nbits_t>& strm_flat_nbits,
+                                    hls::stream<uint16_t>& strm_flat_bits) {
+#pragma HLS INLINE OFF
+    int tmp = 0;
+    num_fpair = 0;
+    if (do_encode) {
+        // Mark non-small tree.
+        hls_WriteBits_strm(1, 0, tmp, num_fpair, strm_flat_nbits, strm_flat_bits);
+        // Mark uniform histogram.
+        hls_WriteBits_strm(1, 1, tmp, num_fpair, strm_flat_nbits, strm_flat_bits);
+        // Encode alphabet size.
+        hls_WriteBits_strm(hls_ANS_LOG_TAB_SIZE, alphabet_size, tmp, num_fpair, strm_flat_nbits, strm_flat_bits);
+    }
+}
+
+// ------------------------------------------------------------
+void XAcc_BuildAndStoreANSEncodingData3(int histogram1[MAX_ALPHABET_SIZE],
+                                        int histogram2[MAX_ALPHABET_SIZE],
+                                        // const uint32_t histogram2[MAX_ALPHABET_SIZE],
+                                        const uint16_t alphabet_size,
+                                        // ANSEncSymbolInfo ans_table[MAX_ALPHABET_SIZE],
+                                        bool do_encode, // tmp cache int64
+
+                                        uint8_t& max_nz_symbol,
+                                        uint16_t& total,
+                                        int& num_symbols,
+                                        int scode_symbol[MAX_ALPHABET_SIZE],
+                                        int re_counts1[MAX_ALPHABET_SIZE],
+                                        // int hls_count_flat[MAX_ALPHABET_SIZE],
+
+                                        bool& use_flat,
+                                        hls::stream<bool>& strm_use_flat,
+
+                                        hls::stream<int>& strm_num_pair,
+                                        hls::stream<nbits_t>& strm_nbits,
+                                        hls::stream<uint16_t>& strm_bits,
+
+                                        hls::stream<int>& strm_num_fpair,
+                                        hls::stream<nbits_t>& strm_flat_nbits,
+                                        hls::stream<uint16_t>& strm_flat_bits) {
+#pragma HLS INLINE OFF
+    // because the use_flat is a conditional execution
+    int num_pair = 0;
+    int num_fpair = 0;
+
+    assert(alphabet_size <= hls_ANS_TAB_SIZE); //<1024 = (1<<ANS_LOG_TAB_SIZE)
+
+    bool mode = false;
+
+    bool do_estimate = do_encode && (alphabet_size > hls_kMaxNumSymbolsForSmallCode); //
+
+    _XF_IMAGE_PRINT("--1 NormalizeCounts - BuildAndStoreANS\n");
+
+    int omit_pos = 0;
+    int re_counts2[MAX_ALPHABET_SIZE];
+    int re_counts3[MAX_ALPHABET_SIZE];
+    // loop max_nz_symbol*3  dataflow
+    SetTargetsWithRebalance(mode, histogram1, omit_pos, max_nz_symbol, total, num_symbols, scode_symbol[0],
+
+                            re_counts1, re_counts2, re_counts3);
+
+    // prepare for flat
+    const int histo_bits_flat = hls_ANS_LOG_TAB_SIZE + 2;
+
+    const int data_bits_flat = hls_EstimateDataBitsFlat(alphabet_size, total);
+
+    // const int storage_ix0 = *pos;
+
+    _XF_IMAGE_PRINT("--3 EncodeCounts - BuildAndStoreANS\n");
+
+    int histo_bits = 0;
+    // sequential
+    hls_EncodeCounts(re_counts2, alphabet_size, omit_pos, num_symbols, scode_symbol, max_nz_symbol, do_encode,
+
+                     histo_bits, num_pair, strm_nbits, strm_bits);
+
+    hls_EncodeFlatHistogram(alphabet_size, do_encode, num_fpair, strm_flat_nbits, strm_flat_bits);
+
+    // Let's see if we can do better in terms of histogram size + data size.
+    // const int histo_bits = hls_pos - storage_ix0;
+    int data_bits;
+    hls_EstimateDataBits(do_estimate, histogram2, re_counts3, num_symbols, max_nz_symbol, data_bits);
+
+    use_flat = do_estimate && (histo_bits_flat + data_bits_flat < histo_bits + data_bits);
+    _XF_IMAGE_PRINT(
+        "--histo_bits_flat = %d , data_bits_flat=%d, histo_bits = %d "
+        ", data_bits=%d\n",
+        histo_bits_flat, data_bits_flat, histo_bits, data_bits);
+
+    strm_num_pair.write(num_pair);
+    strm_num_fpair.write(num_fpair);
+    strm_use_flat.write(use_flat);
+}
+
+// hls_histo_bitstream_top(cluster_size, num_pair, strm_nbits, strm_bits,
+// use_flat, num_fpair, strm_flat_nbits,
+//                        strm_flat_bits, pos, tail_bits, strm_histo,
+//                        strm_histo_e);//);
+void hls_histo_bitstream_top(const int cluster_size,
+                             hls::stream<int>& num_pair,
+                             hls::stream<nbits_t>& strm_nbits,
+                             hls::stream<uint16_t>& strm_bits,
+
+                             hls::stream<bool>& use_flat,
+                             hls::stream<int>& num_fpair,
+                             hls::stream<nbits_t>& strm_flat_nbits,
+                             hls::stream<uint16_t>& strm_flat_bits,
+
+                             int& pos, // tmp cache int64
+                             uint8_t& tail_bits,
+                             hls::stream<uint8_t>& strm_histo,
+                             hls::stream<bool>& strm_histo_e) {
+#pragma HLS INLINE OFF
+    for (int c = 0; c < cluster_size; ++c) {
+        hls_WriteBitStreamWithConsume(num_pair, strm_nbits, strm_bits, use_flat, num_fpair, strm_flat_nbits,
+                                      strm_flat_bits, pos, tail_bits, strm_histo, strm_histo_e);
+    }
+}
+
+void hls_build_ans_encode_histo(const bool is_dc,
+                                uint32_t histogram[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                                const uint16_t alphabet_size,
+                                const int cluster_size,
+                                const uint16_t alphabet_size_dc[MAX_NUM_COLOR],
+                                const bool do_encode,
+
+                                hls::stream<int>& strm_num_pair,
+                                hls::stream<nbits_t>& strm_nbits,
+                                hls::stream<uint16_t>& strm_bits,
+
+                                hls::stream<bool>& strm_use_flat,
+                                hls::stream<int>& strm_num_fpair,
+                                hls::stream<nbits_t>& strm_flat_nbits,
+                                hls::stream<uint16_t>& strm_flat_bits,
+                                hls_ANSEncSymbolInfo ans_table[hls_kNumStaticContexts][MAX_ALPHABET_SIZE]
+
+                                ) {
+#pragma HLS INLINE OFF
+
+    for (int c = 0; c < cluster_size; ++c) {
+        //#pragma HLS DATAFLOW
+
+        uint16_t total = 0;
+        uint8_t max_nz_symbol[2]; // for dataflow
+        max_nz_symbol[0] = 0;
+        max_nz_symbol[1] = 0;
+        int num_symbols = 0;
+        int num_byte;
+
+        int scode_symbol[MAX_ALPHABET_SIZE]; // output
+        int hls_counts1[MAX_ALPHABET_SIZE];
+        int hls_counts2[MAX_ALPHABET_SIZE];
+        int hls_counts3[MAX_ALPHABET_SIZE];
+        int hls_count_flat[MAX_ALPHABET_SIZE];
+
+        // loop max_loop = alphabet_size
+        CountNZSymbol(histogram[c], max_nz_symbol, (is_dc ? alphabet_size_dc[c] : alphabet_size),
+
+                      total, hls_counts1, hls_counts2, hls_counts3, hls_count_flat, num_symbols, scode_symbol);
+
+        // count rebalance and estimate
+        bool use_flat = false;
+        int re_counts1[MAX_ALPHABET_SIZE];
+        XAcc_BuildAndStoreANSEncodingData3(hls_counts1, hls_counts2, (is_dc ? alphabet_size_dc[c] : alphabet_size),
+
+                                           do_encode, max_nz_symbol[0], total, num_symbols, scode_symbol,
+                                           // output
+                                           re_counts1, use_flat, strm_use_flat, strm_num_pair, strm_nbits, strm_bits,
+                                           strm_num_fpair, strm_flat_nbits, strm_flat_bits);
+
+        _XF_IMAGE_PRINT("-- ANSBuildInfoTable - BuildAndStoreANS\n");
+        hls_ANSBuildInfoTable_syn(re_counts1, hls_count_flat, use_flat, (is_dc ? alphabet_size_dc[c] : alphabet_size),
+                                  max_nz_symbol[1],
+
+                                  hls_counts3, ans_table[c]);
+
+// for print
+#ifndef __SYNTHESIS__
+        _XF_IMAGE_PRINT("c=%d \n", (int)c);
+        _XF_IMAGE_PRINT("alphabet_size=%d \n", (int)(is_dc ? alphabet_size_dc[c] : alphabet_size));
+        for (int i = 0; i < (is_dc ? alphabet_size_dc[c] : alphabet_size); ++i) { // 0~255
+            hls_ANSEncSymbolInfo info = ans_table[c][i];
+            if (info.freq_ > 0) _XF_IMAGE_PRINT("info.freq=%d, info.start=%d \n", (int)info.freq_, (int)info.start_);
+        }
+#endif
+
+    } // end loop
+}
+
+void hls_build_and_encode_top(const bool is_dc,
+                              uint32_t histogram[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                              const uint16_t alphabet_size,
+                              const int cluster_size,
+                              const uint16_t alphabet_size_dc[MAX_NUM_COLOR],
+
+                              hls_ANSEncSymbolInfo ans_table[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                              int& pos, // tmp cache int64
+                              const bool do_encode,
+                              // uint8_t* storage
+                              hls::stream<uint8_t>& strm_histo,
+                              hls::stream<bool>& strm_histo_e,
+                              uint8_t& tail_bits) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    // clang-format off
+	    hls::stream<int> num_pair;
+#pragma HLS RESOURCE  	  variable = num_pair core = FIFO_BRAM
+#pragma HLS STREAM    	  variable = num_pair depth = 2048
+		hls::stream<nbits_t> strm_nbits;
+#pragma HLS RESOURCE  	  variable = strm_nbits core = FIFO_BRAM
+#pragma HLS STREAM    	  variable = strm_nbits depth = 2048
+		hls::stream<uint16_t> strm_bits("strm_bits");
+#pragma HLS RESOURCE  	  variable = strm_bits core = FIFO_BRAM
+#pragma HLS STREAM    	  variable = strm_bits depth = 2048
+
+		 hls::stream<bool>    use_flat;
+#pragma HLS RESOURCE  	  variable = use_flat core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = use_flat depth = 32
+		hls::stream<int>     num_fpair;
+#pragma HLS RESOURCE  	  variable = num_fpair core = FIFO_BRAM
+#pragma HLS STREAM    	  variable = num_fpair depth = 2048
+	    hls::stream<nbits_t> strm_flat_nbits;
+#pragma HLS RESOURCE  	  variable = strm_flat_nbits core = FIFO_BRAM
+#pragma HLS STREAM    	  variable = strm_flat_nbits depth = 2048
+	    hls::stream<uint16_t> strm_flat_bits("strm_flat_bits");
+#pragma HLS RESOURCE  	  variable = strm_flat_bits core = FIFO_BRAM
+#pragma HLS STREAM    	  variable = strm_flat_bits depth = 2048
+    // clang-format on
+
+    hls_build_ans_encode_histo(is_dc, histogram, alphabet_size, cluster_size, alphabet_size_dc, do_encode, num_pair,
+                               strm_nbits, strm_bits, use_flat, num_fpair, strm_flat_nbits, strm_flat_bits, ans_table);
+
+    hls_histo_bitstream_top(cluster_size, num_pair, strm_nbits, strm_bits, use_flat, num_fpair, strm_flat_nbits,
+                            strm_flat_bits, pos, tail_bits, strm_histo, strm_histo_e);
+}
+
+void build_historgram_syn(hls::stream<ap_uint<13> >& strm_token_addr,
+                          hls::stream<bool>& strm_e_addr,
+                          hist_t total[hls_kMinClustersForHistogramRemap],
+                          hist_t hls_histograms[hls_NumHistograms],
+                          hist_t hls_histograms2[hls_kNumStaticContexts][MAX_ALPHABET_SIZE]
+
+                          ) {
+#pragma HLS INLINE OFF
+#pragma HLS RESOURCE variable = hls_histograms core = RAM_2P_BRAM
+
+    // init addr
+    ap_uint<13> addr_c, addr_r0, addr_r1, addr_r2, addr_r3, addr_r4;
+    addr_r0 = addr_r1 = addr_r2 = addr_r3 = addr_r4 = 0x1fff; // The 0x7ff should never be accessed
+    addr_c = 0;
+    // init reg
+
+    hist_t cnt;
+    hist_t cnt_r0 = 0; // max 4096?
+    hist_t cnt_r1 = 0;
+    hist_t cnt_r2 = 0;
+    hist_t cnt_r3 = 0;
+    hist_t cnt_r4 = 0;
+
+    bool e = strm_e_addr.read();
+
+    // 1.init
+    if (!e) {
+        for (int i = 0; i < hls_kNumStaticContexts; ++i) {
+            for (int j = 0; j < MAX_ALPHABET_SIZE; ++j) {
+#pragma HLS PIPELINE II = 1
+                hls_histograms[i * MAX_ALPHABET_SIZE + j] = 0;
+                hls_histograms2[i][j] = 0;
+            }
+        }
+        for (int i = 0; i < hls_kMinClustersForHistogramRemap; ++i) {
+            total[i] = 0;
+        }
+    }
+
+AGGREGATE_TOKEN:
+    while (!e) {
+#pragma HLS dependence variable = hls_histograms inter false
+#pragma HLS PIPELINE II = 1
+
+        // 1)Get data's address
+        // addr_c = ac_static_context_map[ac_token.context]<<8 + ac_token.symbol;
+        addr_c = strm_token_addr.read();
+        e = strm_e_addr.read();
+
+        total[addr_c >> 8]++;
+
+        // 2)Read RAM and select the cnt based on the addr, addr0 and addr1
+        if (addr_c == addr_r0) { //&& cur_key == key_r0
+            cnt = cnt_r0;
+        } else if (addr_c == addr_r1) {
+            cnt = cnt_r1;
+        } else if (addr_c == addr_r2) {
+            cnt = cnt_r2;
+        } else if (addr_c == addr_r3) { // pass the cosim of 2019.1
+            cnt = cnt_r3;
+        } else if (addr_c == addr_r4) { // must be use for the cosim of 2018.3
+            cnt = cnt_r4;
+        } else {
+            cnt = hls_histograms[addr_c];
+        }
+        // IMBS
+        cnt = cnt + 1;
+        // 3)Write back to RAM
+        hls_histograms[addr_c] = cnt;
+        hls_histograms2[addr_c >> 8][addr_c(7, 0)] = cnt;
+
+        // 4)shift the whole data line 1 cycle for RAM content( state) and ADDRESS
+        cnt_r4 = cnt_r3;
+        cnt_r3 = cnt_r2;
+        cnt_r2 = cnt_r1;
+        cnt_r1 = cnt_r0;
+        cnt_r0 = cnt;
+
+        addr_r4 = addr_r3;
+        addr_r3 = addr_r2;
+        addr_r2 = addr_r1;
+        addr_r1 = addr_r0;
+        addr_r0 = addr_c;
+    }
+}
+
+void XAcc_EncodeStaticContextMap(int& pos,
+                                 hls::stream<uint8_t>& strm_histo,
+                                 hls::stream<bool>& strm_histo_e,
+                                 uint8_t& tail_cxt_bits) {
+#pragma HLS INLINE OFF
+
+    pos += 901;
+    static const uint16_t static_cxt_short[56] = {
+        0x8379, 0x2028, 0x7776, 0x6cdb, 0x557d, 0x3000, 0x0027, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x73a0, 0x97de,
+        0x8470, 0xeeeb, 0x3434, 0x7774, 0x1a1a, 0x496e, 0xee10, 0xd3bb, 0xddd0, 0x69dd, 0xeae8, 0x34ee, 0x8f1c, 0xee79,
+        0xf097, 0xde1a, 0xa776, 0xa1a1, 0xd3bb, 0xf0d0, 0x049e, 0x63e1, 0x1a77, 0xbbba, 0x0d3b, 0xdd5d, 0x869d, 0x73e3,
+        0xf6be, 0xf877, 0xbf09, 0xd3bb, 0xd0d0, 0x69dd, 0xf868, 0x824b, 0xbff0, 0x0d3b, 0xdddd, 0x869d, 0xeeae, 0xc34e};
+    int tail_cxt_pos = 5;
+    // static const uint8_t
+    tail_cxt_bits = 0x11;
+
+    for (int i = 0; i < 56; i++) {
+        for (int j = 0; j < 2; j++) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<16> shortInt;
+            if (j == 0) {
+                shortInt = static_cxt_short[i];
+            }
+
+            uint8_t tmp = (j == 0) ? shortInt(7, 0) : shortInt(15, 8);
+            strm_histo.write(tmp);
+            strm_histo_e.write(false);
+        }
+    }
+}
+
+// ------------------------------------------------------------
+void hls_StoreVarLenUint8_build(
+    uint32_t n, int& num_bits, int& num, hls::stream<nbits_t>& strm_nbits, hls::stream<uint16_t>& strm_bits) {
+#pragma HLS INLINE
+    if (n == 0) {
+        hls_WriteBits_strm(1, 0, num_bits, num, strm_nbits, strm_bits);
+    } else {
+        int nbits;
+        for (int i = 0; i < 3; i++) {
+#pragma HLS PIPELINE
+            if (i == 0) {
+                hls_WriteBits_strm_nodepend(1, 1, strm_nbits, strm_bits);
+                nbits = hls_Log2FloorNonZero_32b(n);
+            } else if (i == 1) {
+                hls_WriteBits_strm_nodepend(3, nbits, strm_nbits, strm_bits);
+            } else {
+                hls_WriteBits_strm_nodepend(nbits, n - (1ULL << nbits), strm_nbits, strm_bits);
+                num_bits += 1 + 3 + nbits;
+                num += 2 + ((nbits == 0) ? 0 : 1);
+            }
+        }
+    }
+}
+
+// sequential
+void hls_EncodeContextMap(const uint8_t context_map[MAX_NUM_COLOR],
+                          const int num_histograms,
+
+                          int& num,
+                          hls::stream<nbits_t>& strm_nbits,
+                          hls::stream<uint16_t>& strm_bits) {
+#pragma HLS INLINE OFF
+
+    _XF_IMAGE_PRINT("---start EncodeContextMap:\n");
+
+    int histo_bits = 0;
+    // 1. write to storage_ix
+    _XF_IMAGE_PRINT("--1 StoreVarLenUint8 - start EncodeContextMap\n");
+    // hls_StoreVarLenUint8(num_histograms - 1, storage_ix, storage);
+    hls_StoreVarLenUint8_build(num_histograms - 1, histo_bits, num, strm_nbits, strm_bits);
+
+    // context_map = 000 will not go into the if
+    if (num_histograms != 1) {
+        // Alphabet size is 256 + 16 = 272. (We can have 256 clusters and 16 run
+        // length codes).
+        static const int kAlphabetSize = 272;
+
+        // 2. sort
+        _XF_IMAGE_PRINT("--2 MoveToFrontTransform - start EncodeContextMap\n");
+        // 3. encode runlength:  input [dc, 0*63] return v_out = [dc, prefix_code] ,
+        // extra = [64 - 1 - length of
+        // prefix_code]
+        _XF_IMAGE_PRINT("--3 RunLengthCodeZeros - start EncodeContextMap\n");
+        uint32_t max_run_length_prefix = 0;
+        // RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
+        // &rle_symbols,
+        //                     &extra_bits);
+        if (context_map[0] == 0 && (context_map[1] == 0) && (context_map[2] == 1)) {
+            max_run_length_prefix = 1;
+        }
+        // 4. write a length
+        _XF_IMAGE_PRINT("--4 use_rle - start EncodeContextMap\n");
+
+        bool use_rle = max_run_length_prefix > 0;
+        hls_WriteBits_strm(1, use_rle, histo_bits, num, strm_nbits, strm_bits);
+        if (use_rle) {
+            hls_WriteBits_strm(4, max_run_length_prefix - 1, histo_bits, num, strm_nbits, strm_bits);
+        }
+        // 5. write huffman tree
+        _XF_IMAGE_PRINT("--5 BuildAndStoreHuffmanTree - start EncodeContextMap\n");
+        if (context_map[0] == 0 && (context_map[1] == 1) && (context_map[2] == 0)) {
+            hls_WriteBits_strm(2, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(2, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(4, 0, histo_bits, num, strm_nbits, strm_bits);
+        } else if (context_map[0] == 0 && (context_map[1] == 1) && (context_map[2] == 2)) {
+            hls_WriteBits_strm(2, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(2, 2, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(4, 0, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(4, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+        } else if (context_map[0] == 0 && (context_map[1] == 0) && (context_map[2] == 1)) {
+            hls_WriteBits_strm(2, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(2, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(4, 0, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(4, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+        } else {
+            _XF_IMAGE_PRINT("--ERROR : new status to be added !!!\n");
+        }
+
+        // 6. move storage to storage_ix
+        _XF_IMAGE_PRINT("--6 move storage to storage_ix - start EncodeContextMap\n");
+        if (context_map[0] == 0 && (context_map[1] == 1) && (context_map[2] == 0)) {
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits);
+        } else if (context_map[0] == 0 && (context_map[1] == 1) && (context_map[2] == 2)) {
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(2, 1, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(2, 3, histo_bits, num, strm_nbits, strm_bits);
+        } else if (context_map[0] == 0 && (context_map[1] == 0) && (context_map[2] == 1)) {
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 0, histo_bits, num, strm_nbits, strm_bits);
+            hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits);
+        } else {
+            _XF_IMAGE_PRINT("--ERROR : new status to be added !!!\n");
+        }
+        hls_WriteBits_strm(1, 1, histo_bits, num, strm_nbits, strm_bits); // use move-to-front
+    }                                                                     // end if (num_histograms != 1)
+}
+
+void hls_EncodeContextMapByte(const uint8_t context_map[MAX_NUM_COLOR],
+                              const int num_histograms,
+                              uint8_t& tail_bits,
+                              int& pos,
+                              hls::stream<uint8_t>& strm_histo_byte,
+                              hls::stream<bool>& strm_histo_e) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    int num = 0;
+
+    // clang-format off
+		 hls::stream<nbits_t> strm_nbits("strm_nbits_cluster");
+#pragma HLS RESOURCE  	  variable = strm_nbits core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_nbits depth = 32
+		hls::stream<uint16_t> strm_bits("strm_bits_cluster");
+#pragma HLS RESOURCE  	  variable = strm_bits core = FIFO_LUTRAM
+#pragma HLS STREAM    	  variable = strm_bits depth = 32
+    // clang-format on
+
+    hls_EncodeContextMap(context_map, num_histograms, num, strm_nbits, strm_bits);
+
+    hls_WriteBitToStream(num, tail_bits, strm_nbits, strm_bits, pos, strm_histo_byte, strm_histo_e);
+}
+
+inline double CrossEntropy(const uint32_t* counts, const int counts_len, const uint32_t* codes, const int codes_len) {
+    double sum = 0.0f;
+    uint32_t total_count = 0;
+    uint32_t total_codes = 0;
+    for (int i = 0; i < codes_len; ++i) {
+#pragma HLS PIPELINE
+        if (codes[i] > 0) {
+            if (i < counts_len && counts[i] > 0) {
+                sum -= counts[i] * hls_FastLog2(codes[i]);
+                total_count += counts[i];
+            }
+            total_codes += codes[i];
+        }
+    }
+    if (total_codes > 0) {
+        sum += total_count * hls_FastLog2(total_codes);
+    }
+    return sum;
+}
+
+inline double hls_ShannonEntropy(const uint32_t* data, const int data_size) {
+    return CrossEntropy(data, data_size, data, data_size);
+}
+
+void CopyStaticHisto(const hist_t hls_histograms2[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                     hist_t hls_histograms_out[hls_kNumStaticContexts][MAX_ALPHABET_SIZE]) {
+#pragma HLS INLINE OFF
+
+    for (int i = 0; i < hls_kNumStaticContexts; ++i) {
+        for (int j = 0; j < MAX_ALPHABET_SIZE; ++j) { // 0~255
+#pragma HLS PIPELINE II = 1
+            hls_histograms_out[i][j] = hls_histograms2[i][j];
+        }
+    }
+}
+
+#define DEBUG_CLUSTER (1)
+// sequential
+void hls_encode_histo_context(const bool is_dc,
+                              const hist_t hls_histograms2[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+
+                              hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                              uint8_t dc_context_map[MAX_NUM_COLOR],
+                              int& pos,
+                              hls::stream<uint8_t>& strm_histo_byte,
+                              hls::stream<bool>& strm_histo_e) {
+#pragma HLS INLINE OFF
+
+    // prepare buffers
+    // const int max_out_size = hls_kNumStaticContexts * 1024;//24K
+    uint8_t tail_bits;
+
+    int num_clusters = 0;
+    int cluster_dcac;
+
+    uint16_t alphabet_size_dc[MAX_NUM_COLOR];
+    hist_t hls_histograms_out[hls_kNumStaticContexts][MAX_ALPHABET_SIZE];
+    // 3. build and store ANS from histograms/counts to ans_table and storage
+
+    if (is_dc) {
+#ifdef DEBUG_DC_HISTO
+        _XF_IMAGE_PRINT("---org historgram:\n");
+        for (int c = 0; c < 3; c++) {
+            for (int i = 0; i < 256; i++) {
+                uint32_t tmp = hls_histograms2[c][i];
+                _XF_IMAGE_PRINT("%d,", (int)(tmp));
+            }
+            _XF_IMAGE_PRINT("\n");
+        }
+#endif
+
+        _XF_IMAGE_PRINT("---start ClusterHistograms:\n");
+
+        uint8_t max_nz_symbol[MAX_NUM_COLOR];
+
+        hls_ClusterHistograms_top(hls_histograms2, hls_kClustersLimit, num_clusters, max_nz_symbol, hls_histograms_out,
+                                  dc_context_map);
+
+#ifdef DEBUG_CLUSTER
+
+        _XF_IMAGE_PRINT("---remap historgram:\n");
+        for (int c = 0; c < num_clusters; c++) {
+            for (int i = 0; i < max_nz_symbol[c]; i++) {
+                uint32_t tmp = hls_histograms_out[c][i];
+                _XF_IMAGE_PRINT("%d,", (int)(tmp));
+            }
+            _XF_IMAGE_PRINT("\n");
+        }
+        _XF_IMAGE_PRINT("---dc / ctrl context_map:\n");
+        for (int c = 0; c < MAX_NUM_COLOR; ++c) {
+            _XF_IMAGE_PRINT("%d,", dc_context_map[c]);
+        }
+        _XF_IMAGE_PRINT("\n");
+
+        _XF_IMAGE_PRINT("-alphabet_size = %d, pos=%d\n", (int)max_nz_symbol[0], (int)(pos));
+        _XF_IMAGE_PRINT("-alphabet_size = %d, pos=%d\n", (int)max_nz_symbol[1], (int)(pos));
+        _XF_IMAGE_PRINT("-alphabet_size = %d, pos=%d\n", (int)max_nz_symbol[2], (int)(pos));
+
+#endif
+
+        tail_bits = 0;
+        hls_EncodeContextMapByte(dc_context_map, num_clusters, tail_bits, pos, strm_histo_byte, strm_histo_e);
+
+        bool do_encode = (pos != 0);
+        for (int c = 0; c < MAX_NUM_COLOR; c++) {
+            alphabet_size_dc[c] = max_nz_symbol[c];
+        }
+        cluster_dcac = num_clusters;
+        _XF_IMAGE_PRINT("---end EncodeContextMap\n");
+
+    } else {
+        cluster_dcac = hls_kNumStaticContexts;
+
+        CopyStaticHisto(hls_histograms2, hls_histograms_out);
+        // 3. Encode the ContextMap.
+        XAcc_EncodeStaticContextMap(pos, strm_histo_byte, strm_histo_e, tail_bits);
+    }
+
+    bool do_encode = (pos != 0);
+    _XF_IMAGE_PRINT("do_encode=%d \n", (int)do_encode);
+    hls_build_and_encode_top(is_dc, hls_histograms_out, 256, cluster_dcac,
+                             alphabet_size_dc, // use when dc
+                             hls_codes, pos, do_encode, strm_histo_byte, strm_histo_e, tail_bits);
+
+    if (pos & (7)) {
+        strm_histo_byte.write(tail_bits);
+        strm_histo_e.write(false);
+    }
+    strm_histo_e.write(true);
+
+    // 4. Close the histogram bit stream.
+    // WriteZeroesToByteBoundary(&storage_ix, storage);
+    _XF_IMAGE_PRINT("storage_ix=%d \n", (int)pos);
+
+    hls_WriteZeroesToByteBoundary(&pos);
+
+    _XF_IMAGE_PRINT("storage_ix=%d \n", (int)pos);
+}
+
+void XAcc_EncodeHistogramsFast_top(const bool is_dc,
+                                   uint8_t dc_context_map[MAX_NUM_COLOR],
+                                   hls::stream<ap_uint<13> >& strm_token_addr,
+                                   hls::stream<bool>& strm_e_addr,
+
+                                   // hls_PikImageSizeInfo info,
+                                   hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                                   hist_t hls_histograms[hls_NumHistograms],
+                                   int pos,
+                                   int& len_histo,
+                                   hls::stream<uint8_t>& strm_histo_byte,
+                                   hls::stream<bool>& strm_histo_e) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    hist_t hls_histograms2[hls_kNumStaticContexts][MAX_ALPHABET_SIZE];
+    hist_t total[hls_kMinClustersForHistogramRemap];
+    // todo: just init once
+
+    // 2.build
+    build_historgram_syn(strm_token_addr, strm_e_addr, total, hls_histograms, hls_histograms2);
+
+    for (int j = 0; j < (10); ++j) {
+        _XF_IMAGE_PRINT("--historgrams[%d] = %d\n", j, (int)hls_histograms[j]);
+    }
+
+    for (int j = 256; j < (256 + 10); ++j) {
+        _XF_IMAGE_PRINT("--historgrams[%d] = %d\n", j, (int)hls_histograms[j]);
+    }
+    for (int j = 512; j < (512 + 10); ++j) {
+        _XF_IMAGE_PRINT("--historgrams[%d] = %d\n", j, (int)hls_histograms[j]);
+    }
+
+    // encode
+    hls_encode_histo_context(is_dc, hls_histograms2, hls_codes, dc_context_map, pos, strm_histo_byte, strm_histo_e); //
+
+    int storage_ix = pos;
+    len_histo = (storage_ix + 7) >> 3;
+}
diff --git a/codec/L2/demos/pikEnc/kernel/kernel3/ctrl_tokenize.cpp b/codec/L2/demos/pikEnc/kernel/kernel3/ctrl_tokenize.cpp
new file mode 100755
index 0000000000..844eafa4c5
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/kernel3/ctrl_tokenize.cpp
@@ -0,0 +1,552 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/ctrl_tokenize.hpp"
+
+void hls_TokenizeAcStrategy(hls_Rect dc_rect,
+                            hls::stream<bool>& strm_strategy_block0,
+                            hls::stream<uint8_t>& strm_stragegy,
+
+                            hls::stream<hls_Token>& strm_strategy_token,
+                            hls::stream<bool>& strm_e) {
+#pragma HLS INLINE OFF
+    hls_Token out_token;
+
+    for (int by = 0; by < dc_rect.ysize; by++) {
+        for (int bx = 0; bx < dc_rect.xsize; bx++) {
+#pragma HLS PIPELINE II = 1
+            bool block = strm_strategy_block0.read();
+            uint8_t strategy_ = strm_stragegy.read();
+            if (!block) {
+                out_token.context = 0;
+                out_token.symbol = static_cast<uint8_t>(strategy_);
+                out_token.nbits = 0;
+                out_token.bits = 0;
+                strm_strategy_token.write(out_token);
+                strm_e.write(false);
+            }
+        }
+    }
+    strm_e.write(true);
+}
+
+inline void XAcc_PredictFromTopAndLeft_quant(quant_t here,
+                                             uint32_t x,
+                                             bool is_top_row,
+                                             bool ping,
+                                             quant_t abv1[hls_kDcGroupDimInBlocks],
+                                             quant_t abv2[hls_kDcGroupDimInBlocks],
+                                             quant_t& left,
+                                             quant_t abv[hls_kDcGroupDimInBlocks][2],
+                                             quant_t& predicted) {
+#pragma HLS INLINE
+#pragma HLS ARRAY_PARTITION variable = abv complete dim = 2
+    quant_t default_val = 32;
+    quant_t pred;
+
+    // read the regs and update
+    if (x == 0 && is_top_row) {
+        pred = default_val;
+    } else if (x == 0) {
+        pred = abv1[x];
+    } else if (is_top_row) {
+        pred = left;
+    } else {
+        pred = (abv1[x] + left + 1) >> 1;
+    }
+    left = here;
+
+    abv2[x] = here;
+    predicted = pred;
+}
+
+void XAcc_PredictFromTopAndLeft_q(
+    quant_t here, uint32_t x, bool is_top_row, quant_t& left, quant_t abv, quant_t& predicted) {
+#pragma HLS INLINE
+
+    quant_t default_val = 32;
+    quant_t pred;
+
+    // read the regs and update
+    if (x == 0 && is_top_row) {
+        pred = default_val;
+    } else if (x == 0) {
+        pred = abv;
+    } else if (is_top_row) {
+        pred = left;
+    } else {
+        pred = (abv + left + 1) >> 1;
+    }
+    left = here;
+    predicted = pred;
+}
+
+void streamDup(hls_Rect dc_rect,
+               hls::stream<quant_t>& istrm,
+               hls::stream<quant_t>& ostrm0,
+               hls::stream<quant_t>& ostrm1) {
+#pragma HLS INLINE OFF
+
+    for (int by = 0; by < dc_rect.ysize; by++) {
+        for (int bx = 0; bx < dc_rect.xsize; bx++) {
+#pragma HLS PIPELINE II = 1
+            quant_t in = istrm.read();
+
+            ostrm0.write(in);
+            ostrm1.write(in);
+        }
+    }
+}
+
+void hls_linebuffer_write(hls_Rect dc_rect,
+                          quant_t array_above_ram[hls_kDcGroupDimInBlocks],
+                          hls::stream<quant_t>& strm_out) {
+#pragma HLS INLINE OFF
+    for (int bx = 0; bx < dc_rect.xsize; bx++) {
+#pragma HLS PIPELINE II = 1
+        strm_out.write(array_above_ram[bx]);
+    }
+}
+
+void hls_linebuffer_read(hls_Rect dc_rect,
+                         hls::stream<quant_t>& strm_in,
+                         quant_t array_above_ram[hls_kDcGroupDimInBlocks]) {
+#pragma HLS INLINE OFF
+    for (int bx = 0; bx < dc_rect.xsize; bx++) {
+#pragma HLS PIPELINE II = 1
+        quant_t tmp = strm_in.read();
+        array_above_ram[bx] = tmp;
+    }
+}
+
+void hls_linebuffer(hls_Rect dc_rect, hls::stream<quant_t>& strm_in, hls::stream<quant_t>& strm_out) {
+#pragma HLS INLINE OFF
+    static quant_t array_above_ram[hls_kDcGroupDimInBlocks];
+#pragma HLS RESOURCE variable = array_above_ram core = RAM_2P_BRAM
+
+    // no init, because the ram not used in the line[0]
+    for (int by = 0; by < dc_rect.ysize; by++) {
+        for (int bx = 0; bx < dc_rect.xsize; bx++) {
+#pragma HLS PIPELINE II = 1
+            strm_out.write(array_above_ram[bx]);
+            quant_t tmp = strm_in.read();
+            array_above_ram[bx] = tmp;
+        }
+    }
+}
+
+void hls_TokenizeQuantField_main2(hls_Rect dc_rect,
+                                  hls::stream<bool>& strm_strategy_block,
+                                  hls::stream<quant_t>& strm_quant_field,
+                                  hls::stream<quant_t>& strm_quant_abv,
+
+                                  hls::stream<hls_Token>& strm_quant_token,
+                                  hls::stream<bool>& strm_e) {
+#pragma HLS INLINE OFF
+    hls_Token out_token;
+
+    quant_t quant_left;
+    quant_t predicted_quant;
+    quant_t quant_here;
+    bool is_locked = false;
+    bool ping = false; // why pingpang?
+
+    int bx = 0;
+    int by = 0;
+    bool is_top_row = (by == 0);
+
+#ifdef DEBUG_QUANT
+    int cnt = 0;
+#endif
+
+    while (by < dc_rect.ysize || is_locked) { // for flatten for and while loop
+#pragma HLS PIPELINE II = 1
+
+        if (is_locked) { // because of the q>=255 need 2 clk to write the stream
+            is_locked = false;
+            out_token.context = hls_QuantContext;
+            out_token.symbol = quant_here - 1;
+            out_token.nbits = 0;
+            out_token.bits = 0;
+
+            strm_quant_token.write(out_token);
+            strm_e.write(false);
+
+        } else {
+            bool block_ = strm_strategy_block.read();
+
+            quant_here = strm_quant_field.read();
+            quant_t quant_abv = strm_quant_abv.read();
+
+#ifdef DEBUG_QUANT
+            cnt++;
+#endif
+
+            quant_t predicted_quant;
+            XAcc_PredictFromTopAndLeft_q(quant_here, bx, is_top_row, quant_left, quant_abv, predicted_quant);
+
+            if (!block_) {
+                assert(quant_here < 32768);
+                assert(quant_here > -32767);
+                uint16_t q = hls_PackSigned_16b((int16_t)(quant_here - predicted_quant));
+
+                if (q >= 255) {
+                    _XF_IMAGE_PRINT("---quant_here = %d, predicted_quant = %d\n", quant_here, predicted_quant);
+                    is_locked = true;
+
+                    out_token.context = hls_QuantContext;
+                    out_token.symbol = 255;
+                    out_token.nbits = 0;
+                    out_token.bits = 0;
+
+                    strm_quant_token.write(out_token);
+                    strm_e.write(false);
+
+                    bx++;
+
+                } else {
+                    out_token.context = hls_QuantContext;
+                    out_token.symbol = q;
+                    out_token.nbits = 0;
+                    out_token.bits = 0;
+
+                    strm_quant_token.write(out_token);
+                    strm_e.write(false);
+
+                    bx++;
+                }
+            } else {
+                bx++;
+            }
+        }
+
+        if (bx == dc_rect.xsize && (!is_locked)) {
+            by++;
+            is_top_row = false;
+            bx = 0;
+            ping = !ping;
+        }
+    } // end by
+
+    strm_e.write(true);
+
+#ifdef DEBUG_QUANT
+    std::cout << "read quant_field:" << cnt << std::endl;
+#endif
+}
+
+void hls_TokenizeQuantField_main(hls_Rect dc_rect,
+                                 hls::stream<bool>& strm_strategy_block,
+                                 hls::stream<quant_t>& strm_quant_field,
+                                 hls::stream<quant_t>& strm_quant_abv,
+
+                                 hls::stream<hls_Token>& strm_quant_token,
+                                 hls::stream<bool>& strm_e) {
+#pragma HLS INLINE OFF
+    hls_Token out_token;
+
+    quant_t quant_left;
+    quant_t predicted_quant;
+    quant_t quant_here;
+    bool is_locked = false;
+    bool ping = false; // why pingpang?
+
+    int bx = 0;
+    int by = 0;
+    bool is_top_row = (by == 0);
+
+#ifdef DEBUG_QUANT
+    int cnt = 0;
+#endif
+
+    for (int by = 0; by < dc_rect.ysize; by++) {
+        is_top_row = (by == 0);
+        bx = 0;
+
+        while (bx < dc_rect.xsize || is_locked) { // for flatten for and while loop
+#pragma HLS PIPELINE II = 1
+
+            if (is_locked) { // because of the q>=255 need 2 clk to write the stream
+                is_locked = false;
+                out_token.context = hls_QuantContext;
+                out_token.symbol = quant_here - 1;
+                out_token.nbits = 0;
+                out_token.bits = 0;
+
+                strm_quant_token.write(out_token);
+                strm_e.write(false);
+
+            } else {
+                bool block_ = strm_strategy_block.read();
+
+                quant_here = strm_quant_field.read();
+                quant_t quant_abv = strm_quant_abv.read();
+
+#ifdef DEBUG_QUANT
+                cnt++;
+#endif
+
+                quant_t predicted_quant;
+
+                XAcc_PredictFromTopAndLeft_q(quant_here, bx, is_top_row, quant_left, quant_abv, predicted_quant);
+
+                if (!block_) {
+                    assert(quant_here < 32768);
+                    assert(quant_here > -32767);
+                    uint16_t q = hls_PackSigned_16b((int16_t)(quant_here - predicted_quant));
+
+                    if (q >= 255) {
+                        _XF_IMAGE_PRINT("---quant_here = %d, predicted_quant = %d\n", quant_here, predicted_quant);
+                        is_locked = true;
+
+                        out_token.context = hls_QuantContext;
+                        out_token.symbol = 255;
+                        out_token.nbits = 0;
+                        out_token.bits = 0;
+
+                        strm_quant_token.write(out_token);
+                        strm_e.write(false);
+
+                        bx++;
+
+                    } else {
+                        out_token.context = hls_QuantContext;
+                        out_token.symbol = q;
+                        out_token.nbits = 0;
+                        out_token.bits = 0;
+
+                        strm_quant_token.write(out_token);
+                        strm_e.write(false);
+
+                        bx++;
+                    }
+                } else {
+                    bx++;
+                }
+            }
+        } // end bx
+    }     // end by
+
+    strm_e.write(true);
+
+#ifdef DEBUG_QUANT
+    std::cout << "read quant_field:" << cnt << std::endl;
+#endif
+}
+
+// goal: one Quant / clock
+void hls_TokenizeQuantField(hls_Rect dc_rect,
+                            hls::stream<bool>& strm_strategy_block,
+                            hls::stream<quant_t>& quant_field,
+
+                            hls::stream<hls_Token>& strm_quant_token,
+                            hls::stream<bool>& strm_e) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    hls::stream<quant_t> quant_here("quant_here");
+#pragma HLS RESOURCE variable = quant_here core = FIFO_LUTRAM
+#pragma HLS STREAM variable = quant_here depth = 32
+    hls::stream<quant_t> quant_here1("quant_here1");
+#pragma HLS RESOURCE variable = quant_here1 core = FIFO_LUTRAM
+#pragma HLS STREAM variable = quant_here1 depth = 32
+    hls::stream<quant_t> quant_abv("quant_abv");
+#pragma HLS RESOURCE variable = quant_abv core = FIFO_LUTRAM
+#pragma HLS STREAM variable = quant_abv depth = 32
+
+    streamDup(dc_rect, quant_field, quant_here, quant_here1);
+    hls_linebuffer(dc_rect, quant_here1, quant_abv);
+    hls_TokenizeQuantField_main(dc_rect, strm_strategy_block, quant_here, quant_abv, strm_quant_token, strm_e);
+}
+
+// goal: one stragegy / clock
+void hls_TokenizeARParameters(hls_Rect dc_rect,
+                              hls::stream<bool>& strm_strategy_block,
+                              hls::stream<arsigma_t>& strm_arsigma,
+
+                              hls::stream<hls_Token>& strm_arsigma_token,
+                              hls::stream<bool>& strm_e) {
+#pragma HLS INLINE OFF
+
+    hls_Token out_token;
+
+    for (int by = 0; by < dc_rect.ysize; by++) {
+        for (int bx = 0; bx < dc_rect.xsize; bx++) {
+#pragma HLS PIPELINE II = 1
+            bool block_ = strm_strategy_block.read();
+            arsigma_t tmp = strm_arsigma.read();
+            if (!block_) {
+                out_token.context = hls_kARParamsContexts;
+                out_token.symbol = tmp;
+                out_token.nbits = 0;
+                out_token.bits = 0;
+
+                strm_arsigma_token.write(out_token);
+                strm_e.write(false);
+            }
+        }
+    }
+    strm_e.write(true);
+}
+
+// goal: one token / cycle
+void collect_ctrl_token(hls::stream<hls_Token>& strm_strategy_token,
+                        hls::stream<bool>& strm_e_strategy,
+                        hls::stream<addr_t>& strm_token_ct_addr,
+                        hls::stream<bool>& strm_e_ct_addr,
+                        hls::stream<hls_Token_symb>& strm_token_symb,
+                        hls::stream<hls_Token_bits>& strm_token_bits,
+                        hls::stream<bool>& strm_e_ctrl) {
+#pragma HLS INLINE OFF
+    hls_Token out_token;
+    hls_Token_symb out_s;
+    hls_Token_bits out_t;
+
+    _XF_IMAGE_PRINT("---read the strategy_token \n");
+    bool e = strm_e_strategy.read();
+
+    while (!e) {
+#pragma HLS PIPELINE II = 1
+        out_token = strm_strategy_token.read();
+        e = strm_e_strategy.read();
+        out_s.symbol = out_token.symbol;
+        out_s.context = out_token.context;
+        out_t.nbits = out_token.nbits;
+        out_t.bits = out_token.bits;
+        strm_token_symb.write(out_s);
+        strm_token_bits.write(out_t);
+        strm_e_ctrl.write(false);
+
+        addr_t addr = (out_token.context << 8) + out_token.symbol;
+        strm_token_ct_addr.write(addr);
+        strm_e_ct_addr.write(false);
+        _XF_IMAGE_PRINT("---write token(%d,%d,%d,%d), %d \n", (int)(out_token.context), (int)(out_token.symbol),
+                        out_token.nbits, out_token.bits, (int)addr);
+    }
+
+    _XF_IMAGE_PRINT("---read the quant_token \n");
+    e = strm_e_strategy.read();
+
+    while (!e) {
+#pragma HLS PIPELINE II = 1
+        out_token = strm_strategy_token.read();
+        e = strm_e_strategy.read();
+        out_s.symbol = out_token.symbol;
+        out_s.context = out_token.context;
+        out_t.nbits = out_token.nbits;
+        out_t.bits = out_token.bits;
+        strm_token_symb.write(out_s);
+        strm_token_bits.write(out_t);
+        strm_e_ctrl.write(false);
+
+        addr_t addr = (out_token.context << 8) + out_token.symbol;
+        strm_token_ct_addr.write(addr);
+        strm_e_ct_addr.write(false);
+        _XF_IMAGE_PRINT("---write token(%d,%d,%d,%d), %d \n", (int)(out_token.context), (int)(out_token.symbol),
+                        out_token.nbits, out_token.bits, (int)addr);
+    }
+
+    _XF_IMAGE_PRINT("---read the arsigma_token \n");
+    e = strm_e_strategy.read();
+
+    while (!e) {
+#pragma HLS PIPELINE II = 1
+        out_token = strm_strategy_token.read();
+        e = strm_e_strategy.read();
+        out_s.symbol = out_token.symbol;
+        out_s.context = out_token.context;
+        out_t.nbits = out_token.nbits;
+        out_t.bits = out_token.bits;
+        strm_token_symb.write(out_s);
+        strm_token_bits.write(out_t);
+        strm_e_ctrl.write(false);
+
+        addr_t addr = (out_token.context << 8) + out_token.symbol;
+        strm_token_ct_addr.write(addr);
+        strm_e_ct_addr.write(false);
+        _XF_IMAGE_PRINT("---write token(%d,%d,%d,%d), %d \n", (int)(out_token.context), (int)(out_token.symbol),
+                        out_token.nbits, out_token.bits, (int)addr);
+    }
+
+    strm_e_ctrl.write(true);
+    strm_e_ct_addr.write(true);
+}
+
+void hls_TokenizeCtrlField_warpper(hls_Rect dc_rect,
+                                   hls::stream<uint8_t>& strm_strategy,
+                                   hls::stream<quant_t>& strm_quant_field,
+                                   hls::stream<arsigma_t>& strm_arsigma,
+                                   hls::stream<bool>& strm_strategy_block0,
+                                   hls::stream<bool>& strm_strategy_block1,
+                                   hls::stream<bool>& strm_strategy_block2,
+
+                                   hls::stream<hls_Token>& strm_strategy_token,
+                                   hls::stream<bool>& strm_e_strategy) {
+// sequantial
+#pragma HLS INLINE OFF
+
+    hls_TokenizeAcStrategy(dc_rect, strm_strategy_block0, strm_strategy, strm_strategy_token, strm_e_strategy);
+
+    hls_TokenizeQuantField(dc_rect, strm_strategy_block1, strm_quant_field, strm_strategy_token, strm_e_strategy);
+
+    hls_TokenizeARParameters(dc_rect, strm_strategy_block2, strm_arsigma, strm_strategy_token, strm_e_strategy);
+}
+
+// goal: Save storage resources with build histogram
+void Xacc_TokenizeCtrlField_top(hls_Rect dc_rect,
+                                hls::stream<uint8_t>& strm_strategy,
+                                hls::stream<quant_t>& strm_quant_field,
+                                hls::stream<arsigma_t>& strm_arsigma,
+                                hls::stream<bool>& strm_strategy_block0,
+                                hls::stream<bool>& strm_strategy_block1,
+                                hls::stream<bool>& strm_strategy_block2,
+
+                                hls::stream<addr_t>& strm_token_ct_addr,
+                                hls::stream<hls_Token_symb>& strm_token_symb,
+                                hls::stream<hls_Token_bits>& strm_token_bits,
+                                hls::stream<bool>& strm_e_ct_addr,
+                                hls::stream<bool>& strm_e_ctrl) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+
+    static hls::stream<hls_Token> strm_strategy_token;
+#pragma HLS DATA_PACK variable = strm_strategy_token
+#pragma HLS RESOURCE variable = strm_strategy_token core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_strategy_token depth = 32
+    static hls::stream<hls_Token> strm_quant_token;
+#pragma HLS DATA_PACK variable = strm_quant_token
+#pragma HLS RESOURCE variable = strm_quant_token core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_quant_token depth = 32
+    static hls::stream<hls_Token> strm_arsigma_token;
+#pragma HLS DATA_PACK variable = strm_arsigma_token
+#pragma HLS RESOURCE variable = strm_arsigma_token core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_arsigma_token depth = 32
+    static hls::stream<bool> strm_e_strategy;
+#pragma HLS RESOURCE variable = strm_e_strategy core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_e_strategy depth = 32
+    static hls::stream<bool> strm_e_quant;
+#pragma HLS RESOURCE variable = strm_e_quant core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_e_quant depth = 32
+    static hls::stream<bool> strm_e_arsigma;
+#pragma HLS RESOURCE variable = strm_e_arsigma core = FIFO_LUTRAM
+#pragma HLS STREAM variable = strm_e_arsigma depth = 32
+
+    hls_TokenizeCtrlField_warpper(dc_rect, strm_strategy, strm_quant_field, strm_arsigma, strm_strategy_block0,
+                                  strm_strategy_block1, strm_strategy_block2, strm_strategy_token, strm_e_strategy);
+
+    collect_ctrl_token(strm_strategy_token, strm_e_strategy, strm_token_ct_addr, strm_e_ct_addr, strm_token_symb,
+                       strm_token_bits, strm_e_ctrl);
+}
diff --git a/codec/L2/demos/pikEnc/kernel/kernel3/dc_shrink.cpp b/codec/L2/demos/pikEnc/kernel/kernel3/dc_shrink.cpp
new file mode 100755
index 0000000000..3d509bf089
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/kernel3/dc_shrink.cpp
@@ -0,0 +1,572 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/dc_shrink.hpp"
+
+void hls_shink_fixed(const int xsize,
+                     hls::stream<dct_t>& strm_dc_y,
+                     dct_t line3_y[3][MAX_NUM_BLOCK88_W],
+                     hls::stream<dct_t>& strm_dc_residuals) {
+#pragma HLS INLINE OFF
+
+    dct_t row_dc_left;
+
+    for (int x = 0; x < xsize; ++x) {
+#pragma HLS PIPELINE II = 1
+        dct_t dc = strm_dc_y.read();
+        line3_y[0][x] = dc;
+
+        if (x == 0) {
+            strm_dc_residuals.write(dc);
+        } else {
+            strm_dc_residuals.write(dc - row_dc_left);
+        }
+
+        row_dc_left = dc;
+    }
+}
+
+inline dct_t hls_function(const int x,
+                          const dct_t pred, // PixelV
+                          const dct_t* row_b,
+                          dct_t* residuals
+
+                          ) {
+    dct_t blow_pred = row_b[x];
+    residuals[x] = blow_pred - pred;
+    return blow_pred;
+}
+
+inline dct_t hls_saturated_add_16b(const dct_t v0, const dct_t v1) {
+    dct_t add;
+    ap_fixed<16, 16, AP_RND, AP_SAT_SYM> tmp =
+        (ap_fixed<16, 16, AP_RND, AP_SAT_SYM>)v0 + (ap_fixed<16, 16, AP_RND, AP_SAT_SYM>)v1;
+    add = tmp;
+
+    return add;
+}
+
+inline dct_t hls_Average_16b(const dct_t v0, const dct_t v1) {
+    dct_t add = hls_saturated_add_16b(v0, v1);
+    dct_t avg = (add >> 1);
+    return avg;
+}
+
+inline dct_t hls_saturated_subtract_16b(const dct_t v0, const dct_t v1) {
+    dct_t subtract;
+    ap_fixed<16, 16, AP_RND, AP_SAT_SYM> tmp =
+        (ap_fixed<16, 16, AP_RND, AP_SAT_SYM>)v0 - (ap_fixed<16, 16, AP_RND, AP_SAT_SYM>)v1;
+    subtract = tmp;
+    return subtract;
+}
+
+inline dct_t hls_ClampedGradient(const dct_t n, const dct_t w, const dct_t l) {
+    const dct_t grad = hls_saturated_subtract_16b(hls_saturated_add_16b(n, w), l); //(-32768, a.raw - b.raw), 32767)
+    const dct_t vmin = hls::min(n, hls::min(w, l));
+    const dct_t vmax = hls::max(n, hls::max(w, l));
+    return hls::min(hls::max(vmin, grad), vmax);
+}
+
+inline void hls_Y_Predict(const dct_t n, const dct_t w, const dct_t l, const dct_t r, dct_t pred[hls_kNumPredictors]) {
+    // Eight predictors for luminance (decreases coded size by ~0.5% vs four)
+    pred[0] = hls_Average_16b(hls_Average_16b(n, w), r);
+    pred[1] = hls_Average_16b(w, n);
+    pred[2] = hls_Average_16b(n, r);
+    pred[3] = hls_Average_16b(w, l);
+    pred[4] = hls_Average_16b(n, l);
+    pred[5] = w;
+    pred[6] = hls_ClampedGradient(n, w, l);
+    pred[7] = n;
+}
+
+inline dct_t hls_AbsResidual(const dct_t c, const dct_t pred) {
+    return hls::abs(hls_saturated_subtract_16b(c, pred));
+}
+
+inline void hls_PredictorCosts(
+    // input
+    const int x,
+    const dct_t* row_m,
+    const dct_t* row_b,
+    const dct_t* row_t,
+    // store
+    dct_t& tl_,
+    dct_t& tn_,
+    const dct_t l_,
+    const dct_t n_,
+    const dct_t w_,
+    const dct_t pred_w_[hls_kNumPredictors],
+    // output
+    dct_t costs[hls_kNumPredictors]) {
+    const dct_t tr = row_t[x + 1];
+    dct_t pred_n[hls_kNumPredictors];
+#pragma HLS ARRAY_PARTITION variable = pred_n complete
+    hls_Y_Predict(tn_, l_, tl_, tr, pred_n);
+
+    for (int i = 0; i < hls_kNumPredictors; ++i) {
+#pragma HLS UNROLL
+        costs[i] = hls_AbsResidual(n_, pred_n[i]) + hls_AbsResidual(w_, pred_w_[i]);
+    }
+
+    tl_ = tn_;
+    tn_ = tr;
+}
+
+// check  timing TODO
+inline uint8_t hls_IndexOfMinCost(const dct_t abs_costs[hls_kNumPredictors]) {
+    // Algorithm must exactly match minpos_epu16.
+    uint8_t idx_pred = 0;
+    int16_t min_cost = abs_costs[0];
+    for (uint8_t i = 0; i < hls_kNumPredictors; ++i) { // check the timing
+#pragma HLS UNROLL
+        const int16_t cost = abs_costs[i];
+        if (cost < min_cost) {
+            min_cost = cost;
+            idx_pred = i;
+        }
+    }
+    return idx_pred;
+}
+
+inline dct_t hls_Y_PredictC(const dct_t l_,
+                            const dct_t n_,
+                            const dct_t w_,
+                            const dct_t r,
+                            const dct_t costs[hls_kNumPredictors],
+                            dct_t pred_w_[hls_kNumPredictors]) {
+    hls_Y_Predict(n_, w_, l_, r, pred_w_);
+
+    return pred_w_[hls_IndexOfMinCost(costs)];
+}
+
+inline void hls_Y_Advance(const dct_t r,
+                          const dct_t c,
+
+                          dct_t& l_,
+                          dct_t& n_,
+                          dct_t& w_) {
+    l_ = n_;
+    n_ = r;
+    w_ = c;
+}
+
+void hls_ForeachPrediction(const int xsize,
+
+                           const int t_idx,
+                           const int m_idx,
+                           const int b_idx,
+
+                           dct_t tl_,
+                           dct_t tn_, // row_t[2];
+                           dct_t l_,  // row_m[1];
+                           dct_t n_,  // row_m[2];
+                           dct_t w_,
+                           dct_t wl, // row_m[0];
+                           dct_t ww, // row_b[0];
+
+                           dct_t line3_y[3][MAX_NUM_BLOCK88_W],
+                           hls::stream<dct_t>& strm_dc_y,
+                           hls::stream<dct_t>& strm_dc_residuals) {
+#pragma HLS INLINE OFF
+
+    dct_t dc;
+    dct_t pred_w_[hls_kNumPredictors];
+#pragma HLS ARRAY_PARTITION variable = pred_w_ complete
+    dct_t costs[hls_kNumPredictors]; // 60 55 ...
+#pragma HLS ARRAY_PARTITION variable = costs complete
+
+    hls_Y_Predict(l_, ww, wl, n_, pred_w_);
+
+    if (xsize >= 2) { // Avoid out of bounds reads.
+
+        // PixelNeighborsY uses w at x - 1 => two pixel margin.
+        for (int x = 2; x < xsize - 1; ++x) {
+#pragma HLS PIPELINE II = 1
+            dct_t r = line3_y[m_idx][x + 1]; // row_m[x + 1];
+
+            {
+                const dct_t tr = line3_y[t_idx][x + 1]; // row_t[x + 1];
+                dct_t pred_n[hls_kNumPredictors];
+                hls_Y_Predict(tn_, l_, tl_, tr, pred_n);
+
+                for (int i = 0; i < hls_kNumPredictors; ++i) {
+#pragma HLS UNROLL
+                    costs[i] = hls_AbsResidual(n_, pred_n[i]) + hls_AbsResidual(w_, pred_w_[i]);
+                }
+
+                tl_ = tn_;
+                tn_ = tr;
+            }
+
+            const dct_t pred_c = hls_Y_PredictC(l_, n_, w_, r, costs, pred_w_);
+
+            {
+                dc = strm_dc_y.read();
+                _XF_IMAGE_PRINT("%d \n", (int)dc);
+                dct_t residuals = dc - pred_c;
+                strm_dc_residuals.write(residuals);
+                line3_y[b_idx][x] = dc;
+            }
+            hls_Y_Advance(r, dc, l_, n_, w_);
+        }
+    }
+}
+
+void hls_shink_Y_adaptive(const int xsize,
+                          const bool is_by1,
+                          const int m_idx,
+                          dct_t line3_y[3][MAX_NUM_BLOCK88_W],
+                          hls::stream<dct_t>& strm_dc_y,
+                          hls::stream<dct_t>& strm_dc_residuals) {
+#pragma HLS INLINE OFF
+
+    const int t_idx = is_by1 ? 0 : ((m_idx == 0) ? 2 : (m_idx - 1)); // 0, 0, 1, 2, 0 , 1, 2
+    const int b_idx = (m_idx == 2) ? 0 : (m_idx + 1);                // 1, 2, 0 , 1, 2
+    dct_t residuals, dc;
+
+    {
+        dc = strm_dc_y.read();
+        _XF_IMAGE_PRINT("%d \n", (int)dc);
+        // output and store line buffer
+        residuals = dc - line3_y[m_idx][0]; // row_b[0] - row_m[0];
+        strm_dc_residuals.write(residuals);
+        line3_y[b_idx][0] = dc;
+
+        dct_t dc_left = dc;
+        if (xsize > 2) { // diff with the origin
+            dc = strm_dc_y.read();
+            _XF_IMAGE_PRINT("%d \n", (int)dc);
+            residuals = dc - dc_left; // row_b[0];
+            strm_dc_residuals.write(residuals);
+            line3_y[b_idx][1] = dc;
+        }
+    }
+
+    dct_t tl_ = line3_y[t_idx][1];
+    dct_t tn_ = line3_y[t_idx][2]; // row_t[2];
+    dct_t l_ = line3_y[m_idx][1];  // row_m[1];
+    dct_t n_ = line3_y[m_idx][2];  // row_m[2];
+    dct_t wl = line3_y[m_idx][0];  // row_m[0];
+    dct_t w_ = line3_y[b_idx][1];  // row_b[1];
+    dct_t ww = line3_y[b_idx][0];  // row_b[0];
+
+    hls_ForeachPrediction(xsize, t_idx, m_idx, b_idx, tl_, tn_, l_, n_, w_, wl, ww, line3_y, strm_dc_y,
+                          strm_dc_residuals);
+
+    {
+        if (xsize >= 2) {
+            dc = strm_dc_y.read();
+            _XF_IMAGE_PRINT("%d \n", (int)dc);
+            dct_t row_m_last = line3_y[b_idx][xsize - 2];
+
+            residuals = dc - row_m_last; // row_m[xsize - 2];
+            strm_dc_residuals.write(residuals);
+            line3_y[b_idx][xsize - 1] = dc;
+        }
+    }
+}
+
+void hls_shink_xb_fixed(const int xsize,
+                        hls::stream<dct_t>& strm_dc_y,
+                        hls::stream<dct_t>& strm_dc_xb,
+                        dct_t line2_y[2][MAX_NUM_BLOCK88_W],
+                        dct_t line3_xb[3][MAX_NUM_BLOCK88_W],
+                        hls::stream<dct_t>& strm_dc_residuals) {
+#pragma HLS INLINE OFF
+
+    dct_t row_dc_left;
+
+    for (int x = 0; x < xsize; ++x) {
+#pragma HLS PIPELINE II = 1
+        dct_t dc = strm_dc_xb.read();
+        dct_t y = strm_dc_y.read();
+        line3_xb[0][x] = dc;
+        line2_y[0][x] = y;
+
+        if (x == 0) {
+            strm_dc_residuals.write(dc);
+        } else {
+            strm_dc_residuals.write(dc - row_dc_left);
+        }
+        row_dc_left = dc;
+    }
+}
+
+inline void hls_XB_Predict(const dct_t n, const dct_t w, const dct_t l, const dct_t r, dct_t pred[hls_kNumPredictors]) {
+    ///#if SIMD_TARGET_VALUE == SIMD_NONE
+    // Eight predictors for luminance (decreases coded size by ~0.5% vs four)
+    pred[0] = hls_ClampedGradient(n, w, l);
+    pred[1] = hls_Average_16b(n, w);
+    pred[2] = n;
+    pred[3] = hls_Average_16b(n, r);
+    pred[4] = w;
+    pred[5] = hls_Average_16b(w, l);
+    pred[6] = r;
+    pred[7] = hls_Average_16b(hls_Average_16b(w, r), n);
+}
+
+inline void hls_XB_PredictorCosts(
+    // input
+    const int x,
+
+    // store
+    dct_t& yn_,
+    dct_t& yw_,
+    dct_t& yl_,
+    dct_t& n_,
+    dct_t& w_,
+    dct_t& l_,
+    const dct_t yr,
+    const dct_t yc,
+
+    const dct_t pred_w_[hls_kNumPredictors],
+    // output
+    dct_t costs[hls_kNumPredictors]) {
+    dct_t pred_y[hls_kNumPredictors];
+#pragma HLS ARRAY_PARTITION variable = pred_y complete
+
+    hls_XB_Predict(yn_, yw_, yl_, yr, pred_y);
+
+    for (int i = 0; i < hls_kNumPredictors; ++i) {
+#pragma HLS UNROLL
+        costs[i] = hls_AbsResidual(yc, pred_y[i]);
+    }
+
+    yl_ = yn_;
+    yn_ = yr;
+    yw_ = yc;
+}
+
+inline dct_t hls_XB_PredictC(const dct_t l_,
+                             const dct_t n_,
+                             const dct_t w_,
+                             const dct_t r,
+                             const dct_t costs[hls_kNumPredictors],
+                             dct_t pred_w_[hls_kNumPredictors]) {
+    hls_XB_Predict(n_, w_, l_, r, pred_w_);
+
+    return pred_w_[hls_IndexOfMinCost(costs)];
+}
+
+void hls_XB_ForeachPrediction(const int xsize,
+                              const bool ym_idx,
+                              const int yb_idx,
+                              const int t_idx,
+                              const int m_idx,
+                              const int b_idx,
+                              dct_t line2_y[2][MAX_NUM_BLOCK88_W],
+                              dct_t line3_xb[3][MAX_NUM_BLOCK88_W],
+                              hls::stream<dct_t>& strm_dc_y,
+                              hls::stream<dct_t>& strm_dc_xb,
+                              hls::stream<dct_t>& strm_dc_residuals) {
+#pragma HLS INLINE OFF
+
+    dct_t yn_ = line2_y[ym_idx][2]; // row_ym[2];
+    dct_t yw_ = line2_y[yb_idx][1]; // row_yb[1];
+    dct_t yl_ = line2_y[ym_idx][1]; // row_ym[1];
+    dct_t n_ = line3_xb[m_idx][2];  // row_m[2];
+    dct_t w_ = line3_xb[b_idx][1];  // row_b[1];
+    dct_t l_ = line3_xb[m_idx][1];  // row_m[1];
+    dct_t pred_w_[hls_kNumPredictors];
+#pragma HLS ARRAY_PARTITION variable = pred_w_ complete
+    dct_t costs[hls_kNumPredictors]; // 60 55 ...
+#pragma HLS ARRAY_PARTITION variable = costs complete
+
+    if (xsize >= 2) { // Avoid out of bounds reads.
+
+        // PixelNeighborsY uses w at x - 1 => two pixel margin.
+        for (int x = 2; x < xsize - 1; ++x) {
+            const dct_t y = strm_dc_y.read();
+            const dct_t dc = strm_dc_xb.read(); // row_b[x];
+
+            const dct_t r = line3_xb[m_idx][x + 1];  // row_m[x + 1];//102
+            const dct_t yr = line2_y[ym_idx][x + 1]; // row_ym[x + 1];
+            const dct_t yc = y;                      // row_yb[x];
+            hls_XB_PredictorCosts(x,                 // row_ym, row_yb, row_t,
+                                  yn_, yw_, yl_, n_, w_, l_, yr, yc, pred_w_, costs);
+
+            const dct_t pred_c = hls_XB_PredictC(l_, n_, w_, r, costs, pred_w_); // 188
+
+            dct_t residuals = dc - pred_c;
+
+            line2_y[yb_idx][x] = y;
+            line3_xb[b_idx][x] = dc;
+            strm_dc_residuals.write(residuals);
+            hls_Y_Advance(r, dc, l_, n_, w_);
+        }
+    }
+}
+
+void hls_shink_XB_adaptive(const int xsize,
+                           const bool is_by1,
+
+                           const bool ym_idx, // 0,1,0,1
+                           const int m_idx,   // 0,1,2,0,1,2
+
+                           dct_t line2_y[2][MAX_NUM_BLOCK88_W],
+                           dct_t line3_xb[3][MAX_NUM_BLOCK88_W],
+
+                           hls::stream<dct_t>& strm_dc_y,
+                           hls::stream<dct_t>& strm_dc_xb,
+
+                           hls::stream<dct_t>& strm_dc_residuals) {
+#pragma HLS INLINE OFF
+
+    const int t_idx = is_by1 ? 0 : ((m_idx == 0) ? 2 : (m_idx - 1)); // 0, 0, 1, 2, 0 , 1, 2
+    const int b_idx = (m_idx == 2) ? 0 : (m_idx + 1);                // 1, 2, 0 , 1, 2
+    const bool yb_idx = !ym_idx;
+
+    dct_t y = strm_dc_y.read();
+    dct_t dc = strm_dc_xb.read();
+
+    // output and store line buffer
+    dct_t residuals = dc - line3_xb[m_idx][0]; // row_b[0] - row_m[0];
+
+    line2_y[yb_idx][0] = y;
+    line3_xb[b_idx][0] = dc;
+    strm_dc_residuals.write(residuals);
+
+    dct_t dc_left = dc;
+    if (xsize > 2) {
+        y = strm_dc_y.read();
+        dc = strm_dc_xb.read();
+        residuals = dc - dc_left; // row_b[0];
+
+        line2_y[yb_idx][1] = y;
+        line3_xb[b_idx][1] = dc;
+        strm_dc_residuals.write(residuals);
+    }
+
+    hls_XB_ForeachPrediction(xsize, ym_idx, yb_idx, t_idx, m_idx, b_idx, line2_y, line3_xb, strm_dc_y, strm_dc_xb,
+                             strm_dc_residuals);
+
+    {
+        if (xsize >= 2) {
+            y = strm_dc_y.read();
+            dc = strm_dc_xb.read();
+            dct_t row_m_last = line3_xb[b_idx][xsize - 2];
+            residuals = dc - row_m_last; // row_m[xsize - 2];
+            line2_y[yb_idx][xsize - 1] = y;
+            line3_xb[b_idx][xsize - 1] = dc;
+            strm_dc_residuals.write(residuals);
+        }
+    }
+}
+
+void hls_ShrinkXB(const hls_Rect rect_in,
+                  hls::stream<dct_t>& strm_dc_xb,
+                  hls::stream<dct_t>& strm_dc_y,
+                  hls::stream<dct_t>& strm_dc_residuals) {
+#pragma HLS INLINE OFF
+
+    const int xsize = rect_in.xsize;
+    const int ysize = rect_in.ysize;
+
+    // for init
+    dct_t line3_xb[3][MAX_NUM_BLOCK88_W];
+#pragma HLS RESOURCE variable = line3_xb core = RAM_2P_BRAM
+#pragma HLS ARRAY_PARTITION variable = line3_xb complete dim = 1
+
+    dct_t line2_y[2][MAX_NUM_BLOCK88_W];
+#pragma HLS RESOURCE variable = line2_y core = RAM_2P_BRAM
+#pragma HLS ARRAY_PARTITION variable = line2_y complete dim = 1
+
+    hls_shink_xb_fixed((int)xsize, strm_dc_y, strm_dc_xb, line2_y, line3_xb, strm_dc_residuals);
+
+    _XF_IMAGE_PRINT("\n start XB row 1 \n");
+
+    bool is_by1 = true;
+    bool ym_idx = 0;
+    int m_idx = 0;
+    if (ysize >= 2) {
+        // Only one previous row, so row_t == row_m.
+        is_by1 = true;
+        hls_shink_XB_adaptive(xsize, is_by1, ym_idx, m_idx,
+
+                              line2_y, line3_xb, strm_dc_y, strm_dc_xb, strm_dc_residuals);
+    }
+
+    for (int y = 2; y < ysize; ++y) {
+        is_by1 = false;
+        if (m_idx == 2) {
+            m_idx = 0;
+        } else {
+            m_idx++;
+        }
+        ym_idx = !ym_idx;
+        hls_shink_XB_adaptive(xsize, is_by1, ym_idx, m_idx,
+
+                              line2_y, line3_xb, strm_dc_y, strm_dc_xb, strm_dc_residuals);
+    }
+}
+
+void hls_ShrinkY(const hls_Rect rect_in, hls::stream<dct_t>& strm_dc_y, hls::stream<dct_t>& strm_dc_residuals) {
+#pragma HLS INLINE OFF
+
+    const int xsize = rect_in.xsize;
+    const int ysize = rect_in.ysize;
+
+    // for init
+    dct_t line3_y[3][MAX_NUM_BLOCK88_W];
+#pragma HLS RESOURCE variable = line3_y core = RAM_2P_BRAM
+#pragma HLS ARRAY_PARTITION variable = line3_y complete dim = 1
+
+    hls_shink_fixed((int)xsize, strm_dc_y, line3_y, strm_dc_residuals);
+
+    _XF_IMAGE_PRINT("\n start Y row 0 \n");
+    _XF_IMAGE_PRINT("\n start Y row 1 \n");
+
+    bool is_by1 = true;
+
+    int m_idx = 0;
+    if (ysize >= 2) {
+        // Only one previous row, so row_t == row_m.
+        is_by1 = true;
+        hls_shink_Y_adaptive((int)xsize, is_by1, m_idx, line3_y, strm_dc_y, strm_dc_residuals);
+        _XF_IMAGE_PRINT("\n start row 2 \n");
+    }
+
+    for (int y = 2; y < ysize; ++y) {
+        is_by1 = false;
+        if (m_idx == 2) {
+            m_idx = 0;
+        } else {
+            m_idx++;
+        }
+
+        hls_shink_Y_adaptive((int)xsize, is_by1, m_idx, line3_y, strm_dc_y, strm_dc_residuals);
+
+        _XF_IMAGE_PRINT("\n start row %d \n", (int)y + 1);
+    }
+}
+
+void hls_ShrinkDC_top(const hls_Rect rect_dc,
+                      hls::stream<dct_t>& strm_dc_y1,
+                      hls::stream<dct_t>& strm_dc_y2,
+                      hls::stream<dct_t>& strm_dc_y3,
+                      hls::stream<dct_t>& strm_dc_x,
+                      hls::stream<dct_t>& strm_dc_b,
+                      hls::stream<dct_t>& strm_dc_residuals) {
+#pragma HLS INLINE OFF
+
+    hls_ShrinkXB(rect_dc, strm_dc_x, strm_dc_y1, strm_dc_residuals);
+
+    hls_ShrinkY(rect_dc, strm_dc_y2, strm_dc_residuals);
+
+    _XF_IMAGE_PRINT("row_residuals of Y\n");
+
+    hls_ShrinkXB(rect_dc, strm_dc_b, strm_dc_y3, strm_dc_residuals);
+}
diff --git a/codec/L2/demos/pikEnc/kernel/kernel3/dc_tokenize.cpp b/codec/L2/demos/pikEnc/kernel/kernel3/dc_tokenize.cpp
new file mode 100755
index 0000000000..f003799857
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/kernel3/dc_tokenize.cpp
@@ -0,0 +1,174 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/dc_tokenize.hpp"
+
+void hls_EncodeHybridVarLenUint(uint32_t value, int* symbol, int* nbits, int* bits) {
+#pragma HLS INLINE
+    if (value < hls_kHybridEncodingSplitToken) {
+        *symbol = value;
+        *nbits = 0;
+        *bits = 0;
+    } else {
+        uint32_t n = hls_Log2FloorNonZero_32b(value);
+        uint32_t m = value - (1 << n);
+        *symbol = hls_kHybridEncodingSplitToken + ((n - hls_kHybridEncodingDirectSplitExponent) << 1) + (m >> (n - 1));
+        *nbits = n - 1;
+        *bits = value & ((1 << (n - 1)) - 1);
+    }
+}
+
+// Pack signed integer and encode value.
+void hls_EncodeHybridVarLenInt(int32_t value, int* symbol, int* nbits, int* bits) {
+#pragma HLS INLINE
+    hls_EncodeHybridVarLenUint(hls_PackSigned_32b(value), symbol, nbits, bits);
+}
+
+void Tokenize_DC_top(const bool rle,
+                     const hls_Rect rect,
+                     hls::stream<dct_t>& strm_dc_residuals,
+
+                     hls::stream<addr_t>& strm_token_addr,
+                     hls::stream<hls_Token_symb>& strm_token_symb,
+                     hls::stream<hls_Token_bits>& strm_token_bits,
+                     hls::stream<bool>& strm_e_addr,
+                     hls::stream<bool>& strm_e_dc) {
+#pragma HLS INLINE OFF
+
+    const int xsize = rect.xsize;
+    const int ysize = rect.ysize;
+
+    int cnt = 0;
+    ap_uint<13> addr = 0;
+
+    for (int c = 0; c < 3; c++) {
+        for (int y = 0; y < ysize; y++) {
+            int x = 0;
+            dct_t dc = strm_dc_residuals.read();
+            while (x < xsize) {
+#pragma HLS PIPELINE II = 1
+                if (!rle || dc) { // nz must be encode and zero-flase is always better
+                    if (cnt > 0) {
+                        int symbol, nbits, bits;
+
+                        hls_EncodeHybridVarLenUint(cnt - 1, &symbol, &nbits, &bits);
+                        hls_Token_symb out_s;
+                        hls_Token_bits out_t;
+                        out_s.context = c;
+                        out_s.symbol = hls_kRleSymStart + symbol;
+                        out_t.nbits = nbits;
+                        out_t.bits = bits;
+                        addr = (c << 8) + hls_kRleSymStart + symbol;
+
+                        strm_token_addr.write(addr);
+                        strm_token_symb.write(out_s);
+                        strm_token_bits.write(out_t);
+                        strm_e_addr.write(false);
+                        strm_e_dc.write(false);
+
+                        _XF_IMAGE_PRINT("---write cnt token(%d,%d,%d,%d), run_bits=%.2x \n", c,
+                                        hls_kRleSymStart + symbol, nbits, bits, cnt);
+
+                        cnt = 0;
+                    } else {
+                        int symbol, nbits, bits;
+                        hls_EncodeHybridVarLenInt(dc, &symbol, &nbits, &bits);
+                        assert(symbol < hls_kRleSymStart);
+                        hls_Token_symb out_s;
+                        hls_Token_bits out_t;
+                        out_s.context = c;
+                        out_s.symbol = symbol;
+                        out_t.nbits = nbits;
+                        out_t.bits = bits;
+                        addr = (c << 8) + symbol;
+
+                        strm_token_addr.write(addr);
+                        strm_token_symb.write(out_s);
+                        strm_token_bits.write(out_t);
+                        strm_e_addr.write(false);
+                        strm_e_dc.write(false);
+
+                        _XF_IMAGE_PRINT("---write token(%d,%d,%d,%d)\n", c, symbol, nbits, bits);
+
+                        // update
+                        if (x < xsize - 1) dc = strm_dc_residuals.read();
+                        x++;
+                    }
+
+                } else {
+                    if (x < xsize - 1) dc = strm_dc_residuals.read();
+                    cnt++;
+                    x++;
+                }
+            } // bx
+        }     // by
+
+        if (cnt > 0) {
+            int symbol, nbits, bits;
+            hls_EncodeHybridVarLenUint(cnt - 1, &symbol, &nbits, &bits);
+            hls_Token_symb out_s;
+            hls_Token_bits out_t;
+            out_s.context = c;
+            out_s.symbol = hls_kRleSymStart + symbol;
+            out_t.nbits = nbits;
+            out_t.bits = bits;
+            addr = (c << 8) + hls_kRleSymStart + symbol;
+
+            strm_token_addr.write(addr);
+            strm_token_symb.write(out_s);
+            strm_token_bits.write(out_t);
+            strm_e_addr.write(false);
+            strm_e_dc.write(false);
+
+            _XF_IMAGE_PRINT("---write cnt token(%d,%d,%d,%d), run_bits=%.2x \n", c, hls_kRleSymStart + symbol, nbits,
+                            bits, cnt);
+            cnt = 0;
+        } // per color
+    }
+
+    strm_e_addr.write(true);
+    strm_e_dc.write(true);
+}
+
+void hls_encode_dc_top(const bool rle,
+                       const hls_Rect rect_dc,
+                       hls::stream<dct_t>& strm_dc_y1,
+                       hls::stream<dct_t>& strm_dc_y2,
+                       hls::stream<dct_t>& strm_dc_y3,
+
+                       hls::stream<dct_t>& strm_dc_x,
+                       hls::stream<dct_t>& strm_dc_b,
+
+                       hls::stream<addr_t>& strm_token_addr,
+                       hls::stream<hls_Token_symb>& strm_token_symb,
+                       hls::stream<hls_Token_bits>& strm_token_bits,
+                       hls::stream<bool>& strm_e_addr,
+                       hls::stream<bool>& strm_e_dc) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+    // clang-format off
+static hls::stream< dct_t >   strm_dc_residuals("dc_residuals");
+#pragma HLS RESOURCE variable=strm_dc_residuals core=FIFO_LUTRAM
+#pragma HLS STREAM   variable=strm_dc_residuals depth=32 dim=1
+    // clang-format on
+
+    hls_ShrinkDC_top(rect_dc, strm_dc_y1, strm_dc_y2, strm_dc_y3, strm_dc_x, strm_dc_b,
+
+                     strm_dc_residuals);
+
+    Tokenize_DC_top(false, rect_dc, strm_dc_residuals, strm_token_addr, strm_token_symb, strm_token_bits, strm_e_addr,
+                    strm_e_dc);
+}
diff --git a/codec/L2/demos/pikEnc/kernel/kernel3/kernel3_common.cpp b/codec/L2/demos/pikEnc/kernel/kernel3/kernel3_common.cpp
new file mode 100755
index 0000000000..86313c332a
--- /dev/null
+++ b/codec/L2/demos/pikEnc/kernel/kernel3/kernel3_common.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel3/kernel3_common.hpp"
+
+void hls_WriteBits_strm(const nbits_t n_bits,
+                        uint16_t bits,
+
+                        int& num_bits,
+                        int& num,
+                        hls::stream<nbits_t>& strm_nbits,
+                        hls::stream<uint16_t>& strm_bits) {
+    assert((bits >> n_bits) == 0);
+    assert(n_bits <= 56);
+
+    if (n_bits != 0) {
+        _XF_IMAGE_PRINT("---W--- n_bits=%d, bits=%d\n", (int)n_bits, bits);
+        nbits_t nbits = n_bits;
+        num_bits += nbits;
+        num++;
+
+        strm_nbits.write(nbits);
+        strm_bits.write(bits);
+    }
+}
+
+void hls_WriteBits_strm_nodepend(const nbits_t n_bits,
+                                 uint16_t bits,
+                                 hls::stream<nbits_t>& strm_nbits,
+                                 hls::stream<uint16_t>& strm_bits) {
+    assert((bits >> n_bits) == 0);
+    assert(n_bits <= 56);
+
+    if (n_bits != 0) {
+        _XF_IMAGE_PRINT("---W--- n_bits=%d, bits=%d\n", (int)n_bits, bits);
+        nbits_t nbits = n_bits;
+        strm_nbits.write(nbits);
+        strm_bits.write(bits);
+    }
+}
+
+void hls_StoreVarLenUint16(
+    uint32_t n, int& num_bits, int& num, hls::stream<nbits_t>& strm_nbits, hls::stream<uint16_t>& strm_bits) {
+    if (n == 0) {
+        hls_WriteBits_strm(1, 0, num_bits, num, strm_nbits, strm_bits);
+    } else {
+        hls_WriteBits_strm(1, 1, num_bits, num, strm_nbits, strm_bits);
+        int nbits = hls_Log2FloorNonZero_32b(n);
+        hls_WriteBits_strm(4, nbits, num_bits, num, strm_nbits, strm_bits);
+        hls_WriteBits_strm(nbits, n - (1ULL << nbits), num_bits, num, strm_nbits, strm_bits);
+    }
+}
+
+static const float hls_kLog2Table[] = {
+    0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f, 1.5849625007211563f, 2.0000000000000000f,
+    2.3219280948873622f, 2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f, 3.1699250014423126f,
+    3.3219280948873626f, 3.4594316186372978f, 3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+    3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f, 4.1699250014423122f, 4.2479275134435852f,
+    4.3219280948873626f, 4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f, 4.5849625007211570f,
+    4.6438561897747244f, 4.7004397181410926f, 4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+    4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f, 5.0443941193584534f, 5.0874628412503400f,
+    5.1292830169449664f, 5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f, 5.2854022188622487f,
+    5.3219280948873626f, 5.3575520046180838f, 5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+    5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f, 5.5849625007211570f, 5.6147098441152083f,
+    5.6438561897747244f, 5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f, 5.7548875021634691f,
+    5.7813597135246599f, 5.8073549220576046f, 5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+    5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f, 5.9772799234999168f, 6.0000000000000000f,
+    6.0223678130284544f, 6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f, 6.1085244567781700f,
+    6.1292830169449672f, 6.1497471195046822f, 6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+    6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f, 6.2854022188622487f, 6.3037807481771031f,
+    6.3219280948873617f, 6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f, 6.3923174227787598f,
+    6.4093909361377026f, 6.4262647547020979f, 6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+    6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f, 6.5391588111080319f, 6.5545888516776376f,
+    6.5698556083309478f, 6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f, 6.6293566200796095f,
+    6.6438561897747253f, 6.6582114827517955f, 6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+    6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f, 6.7548875021634691f, 6.7681843247769260f,
+    6.7813597135246599f, 6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f, 6.8328900141647422f,
+    6.8454900509443757f, 6.8579809951275719f, 6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+    6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f, 6.9425145053392399f, 6.9541963103868758f,
+    6.9657842846620879f, 6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f, 7.0112272554232540f,
+    7.0223678130284544f, 7.0334230015374501f, 7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+    7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f, 7.1085244567781700f, 7.1189410727235076f,
+    7.1292830169449664f, 7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f, 7.1699250014423130f,
+    7.1799090900149345f, 7.1898245588800176f, 7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+    7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f, 7.2573878426926521f, 7.2667865406949019f,
+    7.2761244052742384f, 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f, 7.3128829552843557f,
+    7.3219280948873617f, 7.3309168781146177f, 7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+    7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f, 7.3923174227787607f, 7.4008794362821844f,
+    7.4093909361377026f, 7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f, 7.4429434958487288f,
+    7.4512111118323299f, 7.4594316186372973f, 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+    7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f, 7.5156998382840436f, 7.5235619560570131f,
+    7.5313814605163119f, 7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f, 7.5622424242210728f,
+    7.5698556083309478f, 7.5774288280357487f, 7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+    7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f, 7.6293566200796095f, 7.6366246205436488f,
+    7.6438561897747244f, 7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f, 7.6724253419714952f,
+    7.6794800995054464f, 7.6865005271832185f, 7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+    7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f, 7.7347096202258392f, 7.7414669864011465f,
+    7.7481928495894596f, 7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f, 7.7747870596011737f,
+    7.7813597135246608f, 7.7879025593914317f, 7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+    7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f, 7.8328900141647422f, 7.8392037880969445f,
+    7.8454900509443757f, 7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f, 7.8703647195834048f,
+    7.8765169465650002f, 7.8826430493618425f, 7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+    7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f, 7.9248125036057813f, 7.9307373375628867f,
+    7.9366379390025719f, 7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f, 7.9600019320680806f,
+    7.9657842846620870f, 7.9715435539507720f, 7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+    7.9943534368588578f};
+
+float hls_FastLog2(int v) {
+    if (v < 256) {
+        return hls_kLog2Table[v];
+    } else if (v == 256) {
+        return 8.0f;
+    }
+#ifndef __SYNTHESIS__
+    float golden = std::log2(v);
+    ap_ufixed<19, 16> rtn = std::log2(v);
+    _XF_IMAGE_PRINT("-- debug log2(%d) = %f , golden = %f\n", v, (float)rtn, golden);
+#else
+    ap_ufixed<19, 16> tmp = v;
+    ap_ufixed<19, 16> rtn = hls::log2(tmp);
+#endif
+    return (float)rtn;
+}
+
+void hls_WriteBitToStream(const int num_pair,
+                          uint8_t& byte_tail,
+                          hls::stream<nbits_t>& strm_nbits,
+                          hls::stream<uint16_t>& strm_bits,
+                          int& pos,
+                          hls::stream<uint8_t>& strm_byte,
+                          hls::stream<bool>& strm_histo_e) {
+    uint8_t ntail = pos & 7;
+    uint8_t n_byte = 0;
+    nbits_t nbits = 0;
+    uint16_t bits;
+    ap_uint<32> buffer = byte_tail;
+    int cnt = 0;
+
+    while (cnt < num_pair + 1) {
+#pragma HLS PIPELINE II = 1
+
+        if (n_byte == 0) { // update num to write
+            if (cnt < num_pair) {
+                nbits = strm_nbits.read();
+                bits = strm_bits.read();
+
+                pos += nbits;
+                n_byte = (ntail + nbits) >> 3;
+                buffer(ntail + 16, ntail) = bits;
+                ntail = (ntail + nbits) & 7;
+            }
+
+            byte_tail = buffer(7, 0);
+            cnt++; // end here
+
+        } else { // write out
+
+            uint8_t byte = buffer(7, 0);
+
+            buffer = buffer >> 8;
+            strm_byte.write(byte);
+            strm_histo_e.write(false);
+            n_byte--;
+        }
+    } // end while
+}
+
+void hls_WriteZeroesToByteBoundary(int* pos) {
+    const uint8_t nbits = ((*pos + 7) & ~7) - *pos;
+    *pos += nbits;
+    assert(*pos % 8 == 0);
+}
diff --git a/codec/L2/demos/pikEnc/opts.ini b/codec/L2/demos/pikEnc/opts.ini
new file mode 100755
index 0000000000..e54a567797
--- /dev/null
+++ b/codec/L2/demos/pikEnc/opts.ini
@@ -0,0 +1,2 @@
+[vivado]
+param=project.writeIntermediateCheckpoints=1
diff --git a/codec/L2/demos/pikEnc/utils.mk b/codec/L2/demos/pikEnc/utils.mk
new file mode 100644
index 0000000000..0ee80e90da
--- /dev/null
+++ b/codec/L2/demos/pikEnc/utils.mk
@@ -0,0 +1,270 @@
+#
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+
+#get suffix of kernel by PLATFORM
+VITIS_VER = $(shell v++ --version | grep 'v++' | sed 's/^[[:space:]]*//' | sed -e 's/^[*]* v++ v//g' | cut -d " " -f1)
+DEVICE_TYPE = $(shell platforminfo -p $(PLATFORM) | grep 'FPGA Family' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(DEVICE_TYPE), versal)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+LINK_TARGET_FMT := xsa
+else
+LINK_TARGET_FMT := xclbin
+endif
+else
+LINK_TARGET_FMT := xclbin
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+ifeq ($(HOST_ARCH), x86)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#Check OS and setting env for xrt c++ api
+OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
+OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
+
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/demos/resize/Makefile b/codec/L2/demos/resize/Makefile
new file mode 100644
index 0000000000..96aa5680dd
--- /dev/null
+++ b/codec/L2/demos/resize/Makefile
@@ -0,0 +1,331 @@
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to build xclbin application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
+	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_u50_gen3x16_xdma_5_202210_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u50
+PLATFORM_BLOCKLIST +=  zc
+
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# get global setting
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += -I$(CUR_DIR)/src/ -fmessage-length=0 --sysroot=$(SYSROOT)  -I$(SYSROOT)/usr/include/xrt -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(SYSROOT)/usr/lib -L$(XILINX_VITIS_AIETOOLS)/lib/aarch64.o -Wl,--as-needed -lxilinxopencl -lxrt_coreutil 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+########################## Setting up Host Variables ##########################
+ifeq ($(TARGET),sw_emu)
+CXXFLAGS += -D SW_EMU_TEST
+endif
+ifeq ($(TARGET),hw_emu)
+CXXFLAGS += -D HW_EMU_TEST
+endif
+
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
+#Inclue Required Host Source Files
+HOST_SRCS += $(CUR_DIR)/host/test_resize.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
+CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/resize -I $(XFLIB_DIR)/L2/demos/resize/host -I $(XFLIB_DIR)/L2/demos/resize/kernel -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
+CXXFLAGS += -O3 -std=c++11
+
+EXE_NAME := host.exe
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
+
+HOST_ARGS :=  -xclbin $(BUILD_DIR)/kernel_resize.xclbin -i $(CUR_DIR)/images/t0.raw -srcw 512 -srch 512 -dstw 64 -dsth 64
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u50.cfg
+VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/resize -I $(XFLIB_DIR)/L2/demos/resize/kernel
+
+else 
+VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/resize -I $(XFLIB_DIR)/L2/demos/resize/kernel
+
+endif
+
+######################### binary container global settings ##########################
+VPP_FLAGS_kernel_resize +=  -D KERNEL_NAME=kernel_resize
+VPP_FLAGS_kernel_resize += --hls.clock 300000000:kernel_resize
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_kernel_resize += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_kernel_resize += --kernel_frequency 300
+endif
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS += $(BUILD_DIR)/kernel_resize.xclbin
+else
+BINARY_CONTAINERS += $(BUILD_DIR)/kernel_resize_pkg.$(LINK_TARGET_FMT)
+BINARY_CONTAINERS_PKG += $(BUILD_DIR)/kernel_resize.xclbin
+endif
+
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+$(TEMP_DIR)/kernel_resize.xo: $(CUR_DIR)/kernel/kernel_resize.cpp 
+	$(ECHO) "Compiling Kernel: kernel_resize"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_kernel_resize) $(VPP_FLAGS) -k kernel_resize -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
+BINARY_CONTAINER_kernel_resize_OBJS += $(TEMP_DIR)/kernel_resize.xo
+BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_resize_OBJS)
+$(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
+	mkdir -p $(BUILD_DIR)
+	$(VPP) -l $(VPP_FLAGS) --temp_dir $(TEMP_DIR) --report_dir $(BUILD_REPORT_DIR)/kernel_resize $(VPP_LDFLAGS)  $(VPP_LDFLAGS_kernel_resize) $(AIE_LDFLAGS)   -o $@ $^
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_xrt
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+else
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_sysroot
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+############################## Preparing sdcard folder ##############################
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE := $(SYSROOT)/../../uImage
+else
+K_IMAGE := $(SYSROOT)/../../Image
+endif
+RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
+$(RUN_SCRIPT):
+	rm -rf $(RUN_SCRIPT)
+	@echo 'export LD_LIBRARY_PATH=/mnt:/tmp:$(LIBRARY_PATH)' >> $(RUN_SCRIPT)
+ifneq ($(filter sw_emu hw_emu, $(TARGET)),)
+	@echo 'export XCL_EMULATION_MODE=$(TARGET)' >> $(RUN_SCRIPT)
+endif
+	@echo 'export XILINX_VITIS=/mnt' >> $(RUN_SCRIPT)
+	@echo 'export XILINX_XRT=/usr' >> $(RUN_SCRIPT)
+	@echo 'if [ -f platform_desc.txt  ]; then' >> $(RUN_SCRIPT)
+	@echo '        cp platform_desc.txt /etc/xocl.txt' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo './$(EXE_NAME) $(PKG_HOST_ARGS)' >> $(RUN_SCRIPT)
+	@echo 'return_code=$$?' >> $(RUN_SCRIPT)
+	@echo 'if [ $$return_code -ne 0 ]; then' >> $(RUN_SCRIPT)
+	@echo '        echo "ERROR: Embedded host run failed, RC=$$return_code"' >> $(RUN_SCRIPT)
+	@echo 'else' >> $(RUN_SCRIPT)
+	@echo '        echo "INFO: TEST PASSED, RC=0"' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo 'echo "INFO: Embedded host run completed."' >> $(RUN_SCRIPT)
+	@echo 'exit $$return_code' >> $(RUN_SCRIPT)
+DATA_FILE := $(CUR_DIR)//images/t0.raw 
+DATA_DIR := 
+SD_FILES += $(RUN_SCRIPT)
+SD_FILES += $(EXE_FILE)
+SD_FILES += $(EMCONFIG)
+SD_FILES += xrt.ini
+SD_FILES += $(DATA_FILE)# where define DATAFILE in json
+SD_FILES_WITH_PREFIX = $(foreach sd_file,$(SD_FILES), $(if $(filter $(sd_file),$(wildcard $(sd_file))), --package.sd_file $(sd_file)))
+SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
+PACKAGE_FILES := $(BINARY_CONTAINERS)
+PACKAGE_FILES += $(AIE_CONTAINER)
+SD_CARD := $(CUR_DIR)/package_$(TARGET)
+vck190_dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+	@echo "Generating sd_card folder...."
+	mkdir -p $(SD_CARD)
+	chmod a+rx $(BUILD_DIR)/run_script.sh
+ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+vck190_dfx_hw := true
+endif
+endif
+ifeq ($(vck190_dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
+
+.PHONY: sd_card
+sd_card: $(SD_CARD)
+endif
+############################## Setting Essential Checks and Building Rules ##############################
+RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
+RUN_DEPS += $(SD_CARD)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS) 
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#hw
+ifeq ($(TARGET), hw)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	
+else
+	$(ECHO) "Please copy the content of sd_card folder and data to an SD Card and run on the board"
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: clean cleanall emconfig
+emconfig: $(EMCONFIG)
+
+.PHONY: host
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+
+.PHONY: xclbin
+ifeq ($(HOST_ARCH), x86)
+xclbin:  check_vpp check_xrt $(BINARY_CONTAINERS) 
+else
+xclbin:  check_vpp check_sysroot $(BINARY_CONTAINERS) 
+endif
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
+
+clean: cleanh
\ No newline at end of file
diff --git a/codec/L2/demos/resize/README.md b/codec/L2/demos/resize/README.md
new file mode 100644
index 0000000000..09a25e5185
--- /dev/null
+++ b/codec/L2/demos/resize/README.md
@@ -0,0 +1,122 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+# Resize 
+
+Renumber example resides in ``L2/demos/reszie`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+## Executable Usage
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in [here](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#building). For getting the design,
+
+```
+   cd L2/demos/resize
+```   
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+```
+   make run TARGET=hw DEVICE=xilinx_u50_gen3x16_xdma_201920_3
+```   
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+```
+   ./build_dir.hw.xilinx_u50_gen3x16_xdma_201920_3/host.exe -xclbin build_dir.hw.xilinx_u50_gen3x16_xdma_201920_3/kernel_resize.xclbin -i images/t0.raw -srcw 512 -srch 512 -dstw 256 -dsth 256 
+```   
+
+Renumber Input Arguments:
+
+```
+   Usage: host.exe -[-xclbin -i -srcw -srch -dstw -dsth]
+         -xclbin:           the kernel name
+         -i:                the input bin file
+         -srcw:             the source image width
+         -srch:             the source image height
+         -dstw:             the destination width 
+         -dsth:             the destination height
+```         
+
+Note: Default arguments are set in Makefile, the data have only one column that the node's community id is divided by other clustering algorithm, for example louvain.
+
+* **Example output(Step 4)** 
+
+```
+    Read image successfully.
+    Found Platform
+    Platform Name: Xilinx
+    Info: Context created
+    Info: Command queue created
+    Found Device=xilinx_u50_gen3x16_xdma_201920_3
+    INFO: Importing build_dir.sw_emu.xilinx_u50_gen3x16_xdma_201920_3/kernel_resize.xclbin
+    Loading: 'build_dir.sw_emu.xilinx_u50_gen3x16_xdma_201920_3/kernel_resize.xclbin'
+    Info: Program created
+    Info: Kernel created
+    kernel has been created
+    INFO: kernel start------
+    INFO: kernel end------
+    INFO: Execution time 6334.86ms
+    Info: Time in host-to-device: 14.9204ms
+    Info: Time in kernel: 6319.06ms
+    Info: Time in device-to-host: 0.475648ms
+    The src image size is 512*512.
+    The dst image size is 89*27.
+    Image resized successfully.
+    PASS: no error found.
+    Info: Test passed
+```    
+    
+## Profiling
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+Table 1 : Hardware resources for Resize 
+
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+    |    Kernel           |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+    |  kernel_resize(1x)  |    14    |    0     |    53    |   8635   |  6566   |      397.1      |
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+    |  kernel_resize(8x)  |    29    |    0     |    168   |   20824  |  15087  |      340.9      |
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+
+Table 2 : Resize FPGA acceleration benchmark 
+
+    +---------------+-----------+--------------------+-----------------+
+    |    Inputs     |   Size    |  FPGA 1x/8x (ms)   |   Fps 1x / 8x   |
+    +---------------+-----------+--------------------+-----------------+
+    |   7680*4320   |  512*512  |    84.30 / 12.55   |  11.86 / 79.67  |
+    +---------------+-----------+--------------------+-----------------+
+    |   7680*4320   | 1920*1080 |    84.35 / 12.43   |  11.86 / 80.46  | 
+    +---------------+-----------+--------------------+-----------------+
+    |   7680*4320   | 3840*2160 |    84.34 / 12.43   |  11.86 / 80.46  | 
+    +---------------+-----------+--------------------+-----------------+
+
+Note: This table is the result of each image resize down 8 times.
+
+.. Note::
+
+   1. Resize running on Intel(R) Xeon(R) Silver 4116 CPU @ 2.10GHz, cache(16896 KB), cores(12).
+   2. time unit: ms.
+
+.. toctree::
+    :maxdepth: 1
diff --git a/codec/L2/demos/resize/conn_u50.cfg b/codec/L2/demos/resize/conn_u50.cfg
new file mode 100644
index 0000000000..a8cc274ce3
--- /dev/null
+++ b/codec/L2/demos/resize/conn_u50.cfg
@@ -0,0 +1,6 @@
+[connectivity]
+sp=kernel_resize.m_axi_gmem0:HBM[0]
+sp=kernel_resize.m_axi_gmem1:HBM[1]
+sp=kernel_resize.m_axi_gmem2:HBM[2]
+slr=kernel_resize:SLR0
+nk=kernel_resize:1:kernel_resize
diff --git a/codec/L2/demos/resize/description.json b/codec/L2/demos/resize/description.json
new file mode 100644
index 0000000000..cbf4af4ea1
--- /dev/null
+++ b/codec/L2/demos/resize/description.json
@@ -0,0 +1,118 @@
+{
+    "gui": true, 
+    "name": "Xilinx Resize Test", 
+    "description": "An example for fast resize in codec cases", 
+    "flow": "vitis", 
+    "platform_allowlist": [
+        "u50" 
+    ], 
+    "platform_blocklist": [
+        "zc"
+    ], 
+    "platform_properties": {
+        "u50": {
+            "v++": {
+                "compiler": {
+                    "clflags": [
+                        "--config PROJECT/conn_u50.cfg"
+                    ]
+                }
+            }
+        } 
+    },
+    "data":[
+        "PROJECT/images/t0.raw",
+        "PROJECT/images/t0_resized.raw"
+    ],
+    "launch": [
+        {
+            "cmd_args": " -xclbin BUILD/kernel_resize.xclbin -i PROJECT/images/t0.raw -srcw 512 -srch 512 -dstw 64 -dsth 64", 
+            "name": "generic launch for all flows"
+        }
+    ], 
+    "host": {
+        "host_exe": "host.exe", 
+        "compiler": {
+            "sources": [
+                "host/test_resize.cpp", 
+                "LIB_DIR/ext/xcl2/xcl2.cpp"
+            ], 
+            "includepaths": [
+                "LIB_DIR/L2/include/hw/resize", 
+                "LIB_DIR/L2/demos/resize/host", 
+                "LIB_DIR/L2/demos/resize/kernel", 
+                "LIB_DIR/../utils/L1/include", 
+                "LIB_DIR/ext/xcl2"
+            ], 
+            "options": "-O3 -std=c++11"
+        }
+    }, 
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/L2/include/hw/resize", 
+                "LIB_DIR/L2/demos/resize/kernel"
+            ]
+        }
+    }, 
+    "containers": [
+        {
+            "accelerators": [
+                {
+                    "location": "kernel/kernel_resize.cpp", 
+                    "frequency": 300.0, 
+                    "clflags": " -D KERNEL_NAME=kernel_resize", 
+                    "name": "kernel_resize", 
+                    "num_compute_units": 1, 
+                    "compute_units": [
+                        {
+                            "name": "kernel_resize", 
+                            "slr": "SLR0", 
+                            "arguments": [
+                                {
+                                    "name": "gmem0", 
+                                    "memory": "HBM[0]"
+                                }, 
+                                {
+                                    "name": "gmem1", 
+                                    "memory": "HBM[1]"
+                                }, 
+                                {
+                                    "name": "gmem2", 
+                                    "memory": "HBM[2]"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ], 
+            "frequency": 300.0, 
+            "name": "kernel_resize"
+        }
+    ], 
+    "testinfo": {
+        "disable": false, 
+        "jobs": [
+            {
+                "index": 0, 
+                "dependency": [], 
+                "env": "", 
+                "cmd": "", 
+                "max_memory_MB": 32768, 
+                "max_time_min": {
+                    "vitis_hw_build": 470, 
+                    "vitis_hw_emu": 300, 
+                    "vitis_sw_emu": 60, 
+                    "vitis_hw_run": 10
+                }
+            }
+        ], 
+        "targets": [
+            "vitis_sw_emu", 
+            "vitis_hw_emu", 
+            "vitis_hw_build", 
+            "vitis_hw_run"
+        ], 
+        "category": "canary"
+    }
+}
diff --git a/codec/L2/demos/resize/host/test_resize.cpp b/codec/L2/demos/resize/host/test_resize.cpp
new file mode 100644
index 0000000000..3bf5c89ea1
--- /dev/null
+++ b/codec/L2/demos/resize/host/test_resize.cpp
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <ap_int.h>
+#include <fstream>
+#include <hls_math.h>
+#include <iomanip>
+#include <iostream>
+#include "xcl2.hpp"
+#include "utils.hpp"
+#include "kernel_resize.hpp"
+#include "xf_utils_sw/logger.hpp"
+
+using namespace std;
+
+int main(int argc, const char* argv[]) {
+    int nerror = 0;
+
+    xf::common::utils_sw::Logger logger(std::cout, std::cerr);
+    cl_int fail;
+
+    ArgParser parser(argc, argv);
+    std::string xclbin_path;
+    if (!parser.getCmdOption("-xclbin", xclbin_path)) {
+        std::cout << "ERROR:xclbin path is not set!\n";
+        return 1;
+    }
+
+    std::string infile;
+    if (!parser.getCmdOption("-i", infile)) {
+        std::cout << "ERROR: input file path is not set!\n";
+        return 1;
+    }
+
+    std::string outfile(infile);
+    std::size_t found = outfile.find_last_of(".");
+    outfile.insert(found, "_resized");
+
+    ap_uint<32> src_width, src_height;
+    std::string input_width;
+    if (!parser.getCmdOption("-srcw", input_width)) {
+        std::cout << "INFO: image input width is not set!" << std::endl;
+    } else {
+        src_width = std::stoi(input_width);
+    }
+
+    std::string input_height;
+    if (!parser.getCmdOption("-srch", input_height)) {
+        std::cout << "INFO: image input height is not set!" << std::endl;
+    } else {
+        src_height = std::stoi(input_height);
+    }
+
+    ap_uint<32> dst_width, dst_height;
+    std::string output_width;
+    if (!parser.getCmdOption("-dstw", output_width)) {
+        std::cout << "INFO: image output width is not set!" << std::endl;
+    } else {
+        dst_width = std::stoi(output_width);
+    }
+
+    std::string output_height;
+    if (!parser.getCmdOption("-dsth", output_height)) {
+        std::cout << "INFO: image output height is not set!" << std::endl;
+    } else {
+        dst_height = std::stoi(output_height);
+    }
+
+    if (src_width < dst_width || src_height < dst_height) {
+        std::cout << "WARNING: The output size is invaild!\n";
+        return 1;
+    }
+
+    ap_uint<WDATA> pixel_64;
+    ap_uint<WBIT> srcPixel;
+    ap_uint<WBIT> tmpDst;
+
+    ap_uint<32>* configs = aligned_alloc<ap_uint<32> >(4 + 1);
+    configs[0] = src_width;
+    configs[1] = src_height;
+    configs[2] = dst_width;
+    configs[3] = dst_height;
+
+    ap_uint<WDATA>* axi_src = aligned_alloc<ap_uint<WDATA> >(MAX_SRC);
+    ap_uint<WDATA>* axi_dst = aligned_alloc<ap_uint<WDATA> >(MAX_DST);
+
+    // read raw data
+    FILE* fp;
+
+    if ((fp = fopen(infile.c_str(), "rb")) == NULL) {
+        cout << "Error reading file fail." << '\n' << "Please check the path: " << infile << endl;
+        exit(1);
+    } else {
+        cout << endl << "Read image successfully." << endl;
+    }
+
+#if NPPC == 1
+    for (int i = 0; i < src_width * src_height; i++) {
+        fread(&srcPixel, 1, 1, fp);
+        axi_src[i] = srcPixel;
+    }
+#else
+    for (int i = 0; i < src_width * src_height; i++) {
+        fread(&srcPixel, 1, 1, fp); // std::cout<< "pixel:" << srcPixel << std::endl;
+        pixel_64.range((i % 8) * WBIT + WBIT - 1, (i % 8) * WBIT) = srcPixel.range(WBIT - 1, 0);
+        if ((i + 1) % 8 == 0) axi_src[i / 8] = pixel_64;
+    }
+#endif
+    if (fp != NULL) fclose(fp);
+
+    // do pre-process on CPU
+    struct timeval start_time, end_time;
+    // platform related operations
+    std::vector<cl::Device> devices = xcl::get_xil_devices();
+    cl::Device device = devices[0];
+
+    // Creating Context and Command Queue for selected Device
+    cl::Context context(device, NULL, NULL, NULL, &fail);
+    logger.logCreateContext(fail);
+    cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &fail);
+    logger.logCreateCommandQueue(fail);
+    std::string devName = device.getInfo<CL_DEVICE_NAME>();
+    printf("Found Device=%s\n", devName.c_str());
+
+    cl::Program::Binaries xclBins = xcl::import_binary_file(xclbin_path);
+    devices.resize(1);
+    devices[0] = device;
+    cl::Program program(context, devices, xclBins, NULL, &fail);
+    logger.logCreateProgram(fail);
+    cl::Kernel resize;
+    resize = cl::Kernel(program, "kernel_resize", &fail);
+    logger.logCreateKernel(fail);
+    std::cout << "kernel has been created" << std::endl;
+
+    std::vector<cl_mem_ext_ptr_t> mext_o(3);
+    mext_o[0] = {(unsigned int)(0) | XCL_MEM_TOPOLOGY, configs, 0};
+    mext_o[1] = {(unsigned int)(1) | XCL_MEM_TOPOLOGY, axi_src, 0};
+    mext_o[2] = {(unsigned int)(2) | XCL_MEM_TOPOLOGY, axi_dst, 0};
+
+    // create device buffer and map dev buf to host buf
+    cl::Buffer configs_buf, axi_src_buf, axi_dst_buf;
+
+    configs_buf = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                             sizeof(ap_uint<32>) * 5, &mext_o[0]);
+    axi_src_buf = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
+                             sizeof(ap_int<WDATA>) * MAX_SRC, &mext_o[1]);
+    axi_dst_buf = cl::Buffer(context, CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                             sizeof(ap_int<WDATA>) * MAX_DST, &mext_o[2]);
+
+    std::vector<cl::Memory> init;
+    init.push_back(configs_buf);
+    init.push_back(axi_src_buf);
+    init.push_back(axi_dst_buf);
+
+    q.enqueueMigrateMemObjects(init, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED, nullptr, nullptr);
+    q.finish();
+
+    std::vector<cl::Memory> ob_in;
+    std::vector<cl::Memory> ob_out;
+    std::vector<cl::Event> events_write(1);
+    std::vector<cl::Event> events_kernel(1);
+    std::vector<cl::Event> events_read(1);
+
+    ob_in.push_back(configs_buf);
+    ob_in.push_back(axi_src_buf);
+    q.enqueueMigrateMemObjects(ob_in, 0, nullptr, &events_write[0]);
+
+    ob_out.push_back(axi_dst_buf);
+    // launch kernel and calculate kernel execution time
+    std::cout << "INFO: kernel start------" << std::endl;
+    gettimeofday(&start_time, 0);
+    int j = 0;
+    resize.setArg(j++, configs_buf);
+    resize.setArg(j++, axi_src_buf);
+    resize.setArg(j++, axi_dst_buf);
+
+    q.enqueueTask(resize, &events_write, &events_kernel[0]);
+    q.enqueueMigrateMemObjects(ob_out, 1, &events_kernel, &events_read[0]);
+    q.finish();
+    gettimeofday(&end_time, 0);
+
+    std::cout << "INFO: kernel end------" << std::endl;
+    std::cout << "INFO: Execution time " << tvdiff(&start_time, &end_time) / 1000.0 << "ms" << std::endl;
+
+    cl_ulong ts, te;
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &ts);
+    events_write[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &te);
+    float elapsed = ((float)te - (float)ts) / 1000000.0;
+    logger.info(xf::common::utils_sw::Logger::Message::TIME_H2D_MS, elapsed);
+
+    events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &ts);
+    events_kernel[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &te);
+    elapsed = ((float)te - (float)ts) / 1000000.0;
+    logger.info(xf::common::utils_sw::Logger::Message::TIME_KERNEL_MS, elapsed);
+
+    events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_START, &ts);
+    events_read[0].getProfilingInfo(CL_PROFILING_COMMAND_END, &te);
+    elapsed = ((float)te - (float)ts) / 1000000.0;
+    logger.info(xf::common::utils_sw::Logger::Message::TIME_D2H_MS, elapsed);
+
+    FILE* outFile;
+    if ((outFile = fopen(outfile.c_str(), "wb")) == NULL) {
+        cout << "Error writing file fail." << '\n' << "Please check the path: " << outfile << endl;
+        exit(1);
+    }
+#if NPPC == 1
+    for (int j = 0; j < dst_width * dst_height; j++) {
+        tmpDst = axi_dst[j];
+        fwrite(&tmpDst, 1, 1, outFile);
+    }
+#else
+    for (int i = 0; i < DivCeil(dst_width * dst_height, WBIT); i++) {
+        pixel_64 = axi_dst[i];
+        for (int j = 0; j < WBIT; j++) {
+            tmpDst.range(WBIT - 1, 0) = pixel_64.range(j * WBIT + WBIT - 1, j * WBIT);
+            if ((i * 8 + j) < (dst_width * dst_height)) {
+                fwrite(&tmpDst, 1, 1, outFile);
+                // std::cout << (int)tmpDst << std::endl;
+            }
+        }
+    }
+#endif
+    if (outFile != NULL) fclose(outFile);
+
+    if (nerror) {
+        std::cout << "\nFAIL: nerror= " << nerror << " errors found.\n";
+        logger.error(xf::common::utils_sw::Logger::Message::TEST_FAIL);
+    } else {
+        std::cout << "The src image size is " << src_width << "*" << src_height << ".\nThe dst image size is "
+                  << dst_width << "*" << dst_height << ".\n"
+                  << "Image resized successfully." << std::endl;
+        std::cout << "PASS: no error found.\n";
+        logger.info(xf::common::utils_sw::Logger::Message::TEST_PASS);
+    }
+
+    free(configs);
+    free(axi_src);
+    free(axi_dst);
+    return nerror;
+}
diff --git a/codec/L2/demos/resize/host/utils.hpp b/codec/L2/demos/resize/host/utils.hpp
new file mode 100644
index 0000000000..76cffecea0
--- /dev/null
+++ b/codec/L2/demos/resize/host/utils.hpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2020 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef UTILS_H
+#define UTILS_H
+#include <sys/time.h>
+inline int tvdiff(struct timeval* tv0, struct timeval* tv1) {
+    return (tv1->tv_sec - tv0->tv_sec) * 1000000 + (tv1->tv_usec - tv0->tv_usec);
+}
+//--------------------------------------------------------------
+
+#include <new>
+
+#include <algorithm>
+#include <cstdlib>
+#include <iterator>
+#include <vector>
+
+class ArgParser {
+   public:
+    ArgParser(int& argc, const char** argv) {
+        for (int i = 1; i < argc; ++i) mTokens.push_back(std::string(argv[i]));
+    }
+    bool getCmdOption(const std::string option, std::string& value) const {
+        std::vector<std::string>::const_iterator itr;
+        itr = std::find(this->mTokens.begin(), this->mTokens.end(), option);
+        if (itr != this->mTokens.end() && ++itr != this->mTokens.end()) {
+            value = *itr;
+            return true;
+        }
+        return false;
+    }
+
+   private:
+    std::vector<std::string> mTokens;
+};
+
+template <typename T>
+T* aligned_alloc(std::size_t num) {
+    void* ptr = NULL;
+
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+
+    return reinterpret_cast<T*>(ptr);
+}
+#endif
diff --git a/codec/L2/demos/resize/images/t0.raw b/codec/L2/demos/resize/images/t0.raw
new file mode 100644
index 0000000000..6d505cdb46
--- /dev/null
+++ b/codec/L2/demos/resize/images/t0.raw
@@ -0,0 +1 @@
+??????????????????????????>>>>>>=>>>????????@@@@@@@@@@@@????????>>>>>>>>@@@@@@@@?@@@@@@@@@@@@???AA@@?@@@@@@AAAAAAAAAAAAA@@@AAAAAAAAAAAAAAAAAAAAAAAAAABBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHGHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIFIMF<@]HHHHHHHHHIIIIIIIIIIIHHGGGGGGGGHHHGGHHGFFFGGGGGGGGGGGGGGGGHHHIJJJJJKKKKLLLKKKKKKKKKKKKKKKKKKKKKKKJKKKKKKKKLLLLKKKKLLLLLLLLLLLLLLLLKLLMMNNONNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPOOPPPPQQRRRRRRRRRRRRRRRRRRRRRRRRSSRRRRSSSSSSSSSSTTTTUUUUUUUUUUUUUUUUUUUUVVWWWWWXYYYYYYZZZZZZ[[[\\\\]]]]]^^^^^^__`____`abbbbbbbbbbbaaabbcbccccddddddddddd???????????????????????????>>>??>>>>???????@@@@@@@@@@@@@????????????????@@@@@@@@@@@@@@@@????@@@@A@?????@@@@AAAAAAAAAAAAA@@AAAAAAAAAAAAAAAAAAAAAAAAAABBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIKE<9;JJKHHHHHHHHIIIIIIIIIIIHHHGGGGGGGHHHGHHHGFFFGGGGGGGGHHHHHHHHHHHHIJJJKLLLLMMMKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLMMLLLLLLMNNOONNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPOPPPPQQQRRRRRRRRRRRRRRRRRRRRRRRRSRRRRRRSSSSSSSSSTTTTUUUUUUUUUUUUUUUUUUUUVVWWWWWXYYYYYZZZZZZZ[[\\\\]]]]]^^^^^^________`aabbbbbbbbbbaaabbcccccdddddddddddd????????????????????????@@???????????????@@@@@@@@@@@@@@@????????@@@@@@@@@@@@@@@@@@@?????>>???@@@@??>>>??AAAAAAAAAAAAAAAA@AAAAAAAAAAAAAAAAAAAAAAAAAAABBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEDDDDDDDDEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIJC9=EW6LHHHHHHHHIIIIIIIIIIIHHHGGGGGGHHHHGHHHGGFGGGGGGGGGHHHHHHHHHHHHIJJIJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLMMMMLLLLMMNNOONNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSTTTTTUUUUUUUUUUUUUUUUUUUUVVWWWWWXYYYYYZZZZZZ[[[\\\]]]]]^^^^^^^________`aabbbbbbbbbbbaabccccccdddddddddddd????????????????????????A@@@?@@@????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@??>>>>>????@@??>>=>>>AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEDDDDDDDDEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIGGEJJT3MHHHHHHHHIIIIIIIIIIIIHHHHGGGHHHHHHHIHGGGGHHHHHHHHHHHHHHHHHHHHIJJJIIIJJJJKKKKKKKKKKKKKKKKKLLLKKKKKKKKKKKKKKKLLLLLLLLLLLLLLMMMMMMMMMMMNNNOONNNNNNNNOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSTTTTTTUUUUUUUUUUUUUUUUUUUUVVWWWWWXYYYYYZZZZZ[[[\\\]]]]]^^^^^^^________```abbbbbbbbcbbbbbcccccccddddddddddd????????????????@@@@@@@@AA@@@@@@@@??????@@@@@AAA@@@@@@@@AAAAAAAA@@@@@@@@@@@@@@@@@@@??>>>??????????>>=>>>AAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBABBBBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIKKI@HCPHHHHHHHHIIIIIIIIIIIIHHHHGGHHHHHHHIIHGGGHHHHHHHHHHHHHHHHHIHHIJJJJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLKKKLLLLLLLLLLLLLLLLLLLLLLLLMNNNNNNMNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSTTTTTTTTUUUUUUUUUUUUVVVVVVVVVVWWWWWXYYYYZZZZZZ[[\\\\]]]]^^^^^^^________```aabbbbbbbbcbbbbbcdbbbcccccdddddddd????????????????@@@@@@@@AA@@@@@@????@@@@@@@AAAAAAAAAAAAAAAAAAAAA@@@@@@@@AAAAAAAAA@@@??>>@@@@????@??>>>??AAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJHLI>HVNIIIIIIIIIIIIIIIIIIIIIHHHGGHIIIHHIIIHGGHHHHHHHHHHIIIIIIIIIIIIKKKKJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMLLLLKKLLLLLLLLNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOPPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSRSSSSTTTTTTTTUUUUUUUUUUUUVVVVVVVVVVWWWWWXXYYYZZZZ[[[[\\]]]]]^^^^_^_________``aaaabbbbbbbbccbbbccdbbccccdddddddddd????????????????@@@@@@@@A@@@?@@@???@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA@@@@???@@@@@@@@A@?????@BBBAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFMNET]LIIIIIIIIIIIIIIIIIIIIIIIIFGHIIIHHIIIHGGHIHHHHHHHHIIIIIIIIJJJJKLLKJJJJKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLNNOOOONNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOPPPPPPPPPPPPPPPPQQQQQRRRRRRRRRRRRRRRRRRRSSSSSSSSSSTTTTSSSSSTTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVWWWWWXXXYYZZZZ[[[\\\]]]]^^^^___________`aabbbabbbbbbbbccbbbcddcccddddedddddddd????????????????@@@@@@@@@@@???@@???@@@AAAAAAAAAAAAAAAAAABBBBBBBBAAAAAAAAAAAAAAAA@@@@@@@??@@@AAAAAA@@?@@@BBBAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCDEEFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHIIIIIIIJIIIIIIIIIIIIIIIIIIIIIIIIJFNPFZ\QJJJJJJJJIIIIIIIIIIIIIIIIFGHIIIHHIIIHGGHIHHHHHHHHIIIIIIIIKJJKLLLLKLLLLLMMLLLLLLLLLLLLLLLLMLLLLLLLLLLLLLLLLLLLLMMMLLLLLLLLNOOOOOONOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRRRRRRRRRTTTTTTTTSTTUUTTSSSTTTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVWWWWWXXXYYZZZ[[[[\\]]]]^^^^___________``abbbbbbbbbbbbbcccbbcdddddeeeeedddddddd???ABB@>?@@@AA@@@@AAAA@@@@@@@AAAAA@@@AABAAAAAAAAAAAAAAAAABBBBBBAAAAAAAAAAAAAAAAAAAAAA@@@@@@AAAAAAAAAAAAAAAAAAAAABBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFEEGGGGGGGGHHHHIIIIIIIIIIIIJJIIIIIIKKJIIIIIIIIIIIJJGIIGFVRNKFKJHLKLKJIHHIIHIIHIIIIHGHHIIIHHJIIHHHHHIIIIIIIIHIIJJIIIJJJKKKKLKKKKKKKKKKKLLLLMKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOOOOPPPPPPPPPPOPPPPPQRRRRRRRRRSSRQQQQQSSSTTTTTSSSSSTTTTTTTTTTTTTTTUUUUVVVVVVVVWWWWVVVVVVWWWXXXXXXXXXXXXXXXYZ[\[[\\\\]]^^^^]^^^^___``aaaaabbbbbbbbbbccccccddddeeeeeeeeeeeeeeeee???@AA@?@@@AAAAA@@AAAA@@@@@@@AAAAA@@@AABAAAAAAAAAAAAAAAAABBBBBBAAAAAAAAAAAAAAAAAAAAAAA@@@@AAAAAAAAAAAAAAAAAAAAAABBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFEGGGGGGGGHHHHIIIIIIIIIIIIJJIIIIJJKKJIIIIIIIIIIIJJIIHGGSMMKFKJHLKKKJIHIIIIIIIIIJIIGHHIIIHHIIIHHHIIIIIIIIIIIIIIIIIIJJJKKKLLKKKKKKKKKKKLLLLMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOOOPPPPPPPPPPPOPPPPPQRRRRRRRRRSRRQQQRRSSTTTTTTSSSSTTTTTTTTTTTTTTTUUUUUVVVVVVVVVVVVVVVVVWWWXXXXYYYYYYYYYXXYYZ[\\\\]]]]]__^^^^___```aaaaaaabbbbbccccccccccddddeeeeeeeeeeeeeeeeee????@A@@AAAABAAA@@AAAA@@@@@AAAAAA@@@@AAAAAAAAAAABBAAAAAAABBBBBBABBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGHHHHIIIIIIIIIIIIIIIIIJJKKKJIIIIIIIIIIJJJHHFHIPJQKFKJHLKKKJIHIIIIJIIIJJIIHHHIIIIIIIIHIIIJIIIIIIIIIIIIIJJJJKKKKKLLKKKKKKKKKKKLLLLMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOOOPPPPPPPPPPPPPPPPPQRRRRRRRRRRRQQRRRSSTTTTTTTSSTTTTTTTTTTTTTTTUUUUUUUVVVVVVVVVVVVVVVVWWWXXXYYZZZZZZZZYYYYZ[\\]]]]^^^^_______`aaaaaaaaaaabbbbbcccccdddccddddeeeeeeeeeeeeeeeeee?????@AAABBBBBBAAAAAAAAAAAAAAAAA@@@@AAAAAAAAAAAABBBAAAAAABBBBBBABBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGHHHHIIIIIIIIIIIIIIIIJKKKKKJIIIIIIIIIIJJJGGFIGJDPKFKJHLKKKJIIIIIIJIIIJJJIHHHIIIIIIIIIIIIIIIIIIIIIJJIIJJKKKKKKKLLLKKKKKKKKKKKLLLLMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOOPPPPPPPPPPPPPPQPPPQRRRRRRRRRQQQRRSSSTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUVVVVVVVVVVVVVWWWWWXXYYYYZZZZZZZZZYYZZ[\]]]]^^^^________`aaaaaaaaaabbbbcccccccccccddddeeeeeeeeeeeeeeeeeee@@@??@AABBBBBBBBAAAAAAAAAAAAAAAA@@@AAAAAAAAAAAAABBBBAAAAABBBBBBAAAAAAAAAAAAAAAAAAAAABBBBBBBBAAAABBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEGGGGGGGGGGGGGGGGFFGGGGFFGGGGGGGGHHHHIIIIIIIIIIIIIIIJKKKKKKJIIIIIIIIIJJJKJJJKCEBMJFKJHLKKLKIIIJJIJJIIJJJIHHHIIIJJJJJIIIIIJJJJJJJJKJJJJKLLKKKLLLLLKKKKKKKKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMLLLLLLLLLLMMMMNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOPPPPPQQQQQQQQPQQQPQQRRRRRRRRRQQRRSSTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUVVVVVVVVVVVVVWWWXXXXXXYYYZZZZZZZZZZZZZZ[\]^]]]^^^^___^^___`aaaaaaaaaabbbbccccccccccdddddeeeeeeeeeeeeeeeeeeeAAA@??@ABBBBBBBABBAAAABBBBBAAAAA@@AAAAAAAAAAAAAABBBBBBBBABBBBBBAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBABBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFGGGGGGFGGGGGGGGHHHHIIIIIIIIIIIIIIJKKKKKKKJIIIIIIIIIJJKKKKOOEMMQJFKKILJKLKJIJJJJJJIJJKJJIIIIIJJKKKJJIIIIJJJJJJJJJJKKKLLLLLLLLLLLKKKKKKKKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMLMMMMNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOPPPPPPPQQQQQQQQPQQQQQRSRRRRRRRRQQRSSTTTTTTTTTTSTTTTUUUUUUUUUUUUUUUVVVVVVVVVVVVVVWWWXXYYXYYYZZZZZZZZZZZZ[ZZ[[\]^]^^^^____^^^__````aaaaaaabbbbcccccccccccddddeeeeeeeeeeeeeeeeeeeeBBBA@?@ABBBBBBAABBAAAABBBBBBBAAA@@AAAAA@AAAAAAAAAAABBBBBABBBBBBAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHIIIIIIIIIIIIIJJKKKKKKKJIIIIIIIIIJJKKKIPOEWYQJFLKILJJLKJIJJJJKJJJJKJJIIIIIJKKKJJJIIJJJJJJJJJJJKKLLLLLLLLLLLLLKKKKKKKKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQRSRRRRRRRRQRRSTTTSTTTTTTSSTTUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVWWXXYYYYYZZZ[[ZZZZZZZZ[[[[\]^^^^____``_____``a`aaabbbbbbbbbcccdddcccccddddeeeeeeeeeeeeeeeeeeeeBCCBA@@ABBBBBAAABBAAAABBBBBBBAAA@@AAAAA@AAAAAAAAAAAABBCCABBBBBBABBBBBBBBBBBBBBBBBBBBBBBCCBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHIIIIIIIIIIIIJJKKKKKJKKJIIIIIIIIIJKKKLGNL@WYHJFLKILJJLKJJJJJJKJJJKKJJIIIIIJKKJJJJJJJKJJJJJJJJJKKLMMLLLLLLLLLLKKKKKKKKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQRSRRRRRRRRQRSSTTSSTTTTTSSSTUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVUVVWWXXYYYYZZ[[[ZZZZZZZZ[[[[\]^____```````````aaaaabbcccbbbbbcccdddddddcddddeeeeeeeeeeeeeeeeeeeeBBBBBAAABBBBBBBBBBBBBBBBAAAAAAAA@@@@@@@@AABBBBBBAAAAAAAACCCCCCCCBBBBBBBBBBBBBBBBBBAAAAAABBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGHIIIIIHIIIIIIIIKKKKKKKKJJJJJIIIHHIIJKKKJGFKFSAOIIIJJKKKJJJJJJJJJJJJJKKKJJIIIJKLJJJJJJJJJJJJJJJJKKKKLLLLLLLLLLLLKKKKKKKKKKKLLLMMKKKKKKKKMMLMMMNNMMLMMNMMNNMMMMMMMMMMMNNNOOOOOOOOOOPPPPQQPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQRRRRRRRRRRRRRRRRRRSSTTTTTTTTTTUUTTTTUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVWWXXXYYYZZ[[\[[[[[[[[\\[[\^_a___````_^___`aaabaaabbbabbbbccccddddddddeeeeeeeeffeeeeeeeeeeeeeeBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCCBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGHHIIIIIHIIIIIIIIJJJJJJJJJJJJIIIIHHIIJKKKKMKIFYHKIIJJKKKKJJJJJJJJJJJJJKKKKJJIJJKLJJJJJJJJKKKKKKKKKKKLLLLLLLLLLLLLKKKKKKKKKKLLLLMMLLLLLLLLMMLMMMNNMMLMNNNMNNMMMMMMMMNNNNNNOOOOOOOOOOPPPPQQPPPPPPPPQQQQQQQQQQQQQQQQPQQQQRRRRRRRRRRRSSSSSSSSSSSSSSSSSSTTTTTUTTTTTTTTTTTUUUUUUUUUUUUUUUUUVVVVWVVVVVVVWWWWXXYYYZZZ[[[[[[[[[[\\]]\\]^_`^_______^___`aaaaaaabcbbbbbbccccddddddddeeeeeeeeeeeeeeffeeeeeeeeCCCBBBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAAABBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCCCCCCCCCCCCCCCCCCCCCCBBCCCCCCCDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHIIJIIHIIIIIIIIIIIIIIIIIIIIIIIIIIIJJKKKILMKDTILIJJKKKKKJJJJJJJJJJJJJKKKKKJJJKLLKKKKKKKKLLLLLLLLLLLLLMMMLLLLLLLLKKKKKKKKLLLLLMMMLLLLLLLLMMLMMMNNMMMMNNNMNNMMMMMMNNNNNNOOOOOOOOOOOPPPPQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTUUUUTTTTTTUUUUUUUUUUUUUUUUUUVVVVVWWWWWWWWXXXXYYYZZZZ[[[[[[\\\\\\\^^^^^^__^^^____^^___`aaaa``abcccbbbbccccddddeeeeeeeeeeeeddddefggeeeeeeeeCCCCCBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAABBBCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCCCCCCCCCCCCCCBBCCCDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHIIJJIIIHHHHHHHHIIIIIIIIIIIIIIIIIIJJJKKKIIMOCFCNIJJKKLLLJJJJJJJJJJJJJKKKKKKKKLLMLLLLLLLLLLLLLLLLLLLMMMMMMLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMLMMMNNNMMMNNNNNNNMMMNNNOOOOOOOOOOOOOOOPPPPPQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSUUUTTTTTUUUUTTTTUUUUUUUUUUUUUUUUUVVVVWWWWWWWXXXXYYYYZZZZ[[[[[[[[\\\]]]]^______^^]]^^^^^^^___`aaaaa`abccbbbbbccccdddeeeeeeeeeeeeeddddefggeeeeeeeeCCCCCBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDBCCCDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFGGGGGGGGFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHIIJJJJIIHHHHHHHHIIIIIIIIIIIIIIIIJJJJKKKKMJMODFHNIJJKKLLLJJJJJJJJJJJJJKKKKKKKLLLMLLLLLLLLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMLMMMNNNMMNNONNNNNNNNNNOOOOOOOOOOOOOPPPPPPPQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRSSSRRRRRRRRTTTTTTTTSSSSSSSSTTTTTTTTTTTTUUUUUUUUUVVVVVVVVVVVVVVVWWWWWWXXXYYYZZZZZ[[[[[[[[[[[\\]]^^__`````_^^]]^^^^^^^___`aaabaaabbbabbbbccccdddeeeefeeeeeeeeddddeeffeeeeeeeeCCCBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHIJJJJJIIIIIIIIIIJJJJJJJJIIIIIIIIKKKKKKKKKJLJGSXQIJJKKKKKJJJJJJJJJJJJJKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMLMMMNNNNMNOOONNNNNNNOOOOOOOOOOOOOOPPPPPPPPQQQQQQQQQQQQRRRRRRRRRRRRRRRRQQRRRRSSRRRRRRRRSSSSSSSSSSSSSSSSTTTTTTTTTTTUUUUUUUUVVVVVVVVVVVVVVVWWWWXXXXXYYZZZZZZZ[[[[[[\\\\\\]]]^__```````_^^]]^^^^^^^___`aaabbaabbbabbbbcccccdddeeeeeeeeeeeeeeeeeeefeeeeeeeeBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHJJJKJJIIIIIIIIIIIIIIIIIIIIIIJJJJKKKKKKKKIHJIEOZRIIJJKKKKJJJJJJJJJJJJJKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMLMMMNNNNNNOOONNNNNNNOOOOOOOOOOOOOOPPPPPPPQQQQQQQQQQQQQRRRRRRRRRRRRRRRRQRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTUUUUUUUUUUUUVVVVVVVVVVVVVVVWWWWWXXXXXXYZZ[[ZZZ[[[[[[[\\\]]]]]^__``a````__^^]]^^^^^^^___`aaabaaabbbbbbbbcccccccddeeeeeeeeeeeeeeeeeefeeeeeeeeBBBBBAAABBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHJJJKJJIIIIIIIIIIIIIIIIIIIIIJJJJJKKKKKKKKNHLL?=LMIIIJJKKKJJJJJJJJJJJJJKKKJKKLLLLLLLLLLLLLLLLLLLLLKLLLLLMMMMMMMLLLMMMMMMMMMMMMMMMMMMMMMMMMMMLMMMNNONNNOOONNNNNNOOONNOOOOOOOOOOPPPPPPPQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRSSSSTTTTTTTTSSSSSSSSTTTTTTTTTTTUUUVVUUUUUUTTVVVVVVVVVVVVVVVVWWWWXXXXXXYYZ[[[ZZZ[[[[[[[[\]]]^]^^_``aa```____^]^^^^^^^^___`aaaa``abccbbbbbcccccccdddeeeeeeeeeeeeeeeeffeeeeeeeeBBBCDCBBCCBBBBCDCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGFFFFFFFFEFGGGGGGGGFFFGGHGGGGGGGGHHHHHHHHGHHHHHIJJJJJJIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJKKKKJN;AGI8RJJKKKKKJJJJJJJJJJJJJKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMNMMLLLLKKLMMMMNNNMMMMMMMMMMMMMMMMNNNNNNNNOOOOOOOOOONNNNOOOOOOOOOOPPPPPPPPPPPPQQQQQQQQQQQQPQRRRQRRRRRRRRSSSSSSSSSSTTTTTTTTSSSSSSSTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVUUVVVVWWVWWXXXXXYYYZZZZZZZZ[[\]]]]]]^^^^^^^_`aa````__^^]]]^^^^__]]^_`abcaaaaabbbbbbbccccddeeeeffddddddddeefggffeffffffffBBBCDCCBCCBBBCCDCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGFFFFFFFFGGGGGGFFGGFFFGGGGGGGGGGGHHHHHHHHGHHHHHIJJJJJJIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJKKKKJKBGIK6KKKLLLLLLJJJJJJJJJJJJKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMNMMLLLLKKLLMMMMMNMMMMMMMMMMMMMMMNNNNNNNNNOOOOOOOOPOONNOOPOOOOOOOOPPPPPPPPPPPPQQQQPQQQQQQPPQRRRQRRRRRRRSSSSSSSSSSSTTTTTTTTSSSSSSSTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVUVVVVWWWWWXXXXXXYYZZZZZ[ZZ[[\\]]^^^^^^^^`___```______^]]]]]^^^^_^^__`abbaaaabbbbbbbbccccddddeeeeddddddddeeeeffggffffffffCCCCDCCBCCBBCCCDCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGFFFFFFFFHHGGFFFFFFGGGGGGGGGGGGGGHHHHHHHHGHHHHHIJJJJJJIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJKKKKMIJLGL8GJJJKKKJJJJJJJJJJJJJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNMMMLLLLLLLLLMMMMMMMMMMMMMMMMMNNNNNNNNNNNOOOOOOOOPOOOOOOPOOOOOOOOPPPPPPPPPPPPQQQQPPQQQQPPQQRRRQRRRRRRRSSSSSSSSSSSTTTTTTTTSSSSSTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVVVVVVWWWXXXYYYYYYZZZZZ[[[[[\\]]]^^^^^^^^a`_____^____^^]]]]]^^^^^___``aaaaaaabbbbbbbccccccdddddeeddddddddeeeeefhhffffffffDCCCCDCBCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGFFFFFFFFHHGFFFGGFFGHHGGGGGGGGGGGHHHHHHHHHHHHHHIJJJJJJIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJKKKKMENK@I?KIIIJJJJJKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMNNMMMMLLLLLLLLMMMMMMMMMMMMMMMNNNNNNNNNNNNNOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPQQQQQPPQQQQPPQRRRRQRRRRRRSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVVVVVWWWWXYYYZZZZZZZZZ[[[\\\]]]]^^^^_____a`_____^^^^^^]]]]]]]^^^^__``aaabaaabbbbcbbccccdddddddeeeeeeeeeeeeeeeffghffffffffDDCCCDCCCCCCDDDDDDDDDDDDCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGGGGGGGGGGGFGGHHFGGHHHGGGGGGGGGGHHHHHHHHHHIHHHIJJJJJJIIIIIIIIIIIJJJJJJJJIIIIIIIIJJJJKKKKJCOK<GHQKKKKKKKKLLLLLLLLKKKKKKKLLLLLLLLLLLLLLLLLMMMMMMMMMMMMNNNNMMMMMLLLLLLLMMMMMMMMMMMMNNNNNNNNOOOOOOOOOOOOOOOONOOOOOONPPPPPPPPPPPPPPPPPPPQQQQRPQQQQQQPQRSSRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVVVVWWWWXYYYZZZZZZZZZ[[[[\]]]]]]^]]^^_`````__``__^^^^^^]]]]]^^^^___``abbbabbbbccccccccdddddeeeeefeeeeeeeeeffgggffffffffffEDCCCDCCCCDDDDDDDDDDDDDDCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGGGGGGGGGGGGGGHHGGGGHGGGHHHHHHHHHHHHHHHHHHIIHHIJJJJJJIIIJJJJJJJJJJJJJJJJIIIIIIIIJJJJKKKKJGQOCKTVKKJJJKKKLLLLLLLLKKKLLLLLLLLLLLLLLLLLLLLLNNNNNNNNNNNNNNNNNNMMMMLLLLMMMMNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPQQQQRRQQQRRQQQRRSSRRRRRSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVVWWWWXXXYYZZZZZZZZZ[[[[\]]]]]]]]\]]^_`aa____```_^^^^^^^]^^^^_____``aabbbbbbbccccccccdddddeeeefffeeeeeeeeefggggfeffffffffFEDCCDDCCCDDDDDDDDDDDDDDDDDDDDDDDDDDCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDEEEEEFFFEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGGGGGGGGGGGHHGGGGGGGGGHHHHHHHHHHHHHHHHHHHHIIHIIJJJJJJIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKJLOPIM[TJJJIJJKKLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMNNNNNNNNNNNNNNNNNNNMMMMMMMMMNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPQQPPOPPPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRSSRRRRSSSSSSSSSSSSSSSSTTTTTTTTUUUTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVWWWWWXXXYYYZZZZZZZ[[[[\\]]]]]]]]\]]^__``____```_^____^^^^^____````aaaabbbbbbcccccccddddddddeeeeeeeeeeeeeffffggggffffffffFEDCCDDCCCDEEDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDEEEEFFFFEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGGGGGGGGGHHHHGGFHGGGGGHHHHHHHHHHHHHHHHHHHHIIHIIJJJJJJIIIJJJJJJJJJJJJJJJJKKKKKKKKJJJJKKKKJMHKHJZMLLKKKLLMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMNNNNNNNNNNNNNNNNNNNMMMMMMMMNNNNOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOPQQRRQQPPPPPPPPPPPPPPPPPPQQQQQRRRRRSSRRRRSSSRRRRSSSSSSSSSSSSSSSSTTTTTTTTUUUUTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVWWWWXXXXYYYZZZZZZ[[[[\\\]]]]]\\\]]]^^__``___```_______^^____````aaaaaaaabbbccccccccddddecddddeeeeeeeeeeeffeefghiffffffffEEEEDDEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEFEDDDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEDDEEEEEEEEDDDEEEEEEEEEEEEEEEEEEEEEGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHHHHHHHHHIIIIIIJJJJJJJJJJJJJJJJJJJJJKKLLLLLLLLLLLJJJJJJKKJJEJGGTRKKKKLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLMMLLMMNNNNNNMMNNNNNNNNNNMMNNMLMMMMMMMMNNNNNNNNNNNNOOOOOOOOOOOOPONMOPQQRRRQQQQPQQQQPPPPPPPPOOOPRRRRRRRRSSSSSSSSRRRSSRRRSSRRRSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUVVUUUVVVVVVVVVVWWWWWXXXXXXYYZZZ[[[[[[[[\\\]]\\\\[[]]]]]]]]`````_____````__^^__`aaaaaaabbbbbbcccccccccccdeedddddeeedeeeeeffffffgggghhhhhhhhFEEEDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEFEEEEEEEEEEDDDDDEDDDDDDDDDDDDDDDDDDDDDDEEDDDDDDDDEEEEEEEEEEFFFFEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHHHHHHHHHIIIIIIJJJJJJJJJJJJJJJJJJJKKKLLMMLLLLLLLLKKJJJJKKKIHILJISKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMLLMMNNNNNNNMNNNNNNNNNNMMNNMLMMMMMMMMNNNNNNNNNNNNOOOOOOONNOOOPOMMNPQQQQQQQQQQPPPPPPPPPPQPPPPQRRRRRRRRSSSSSRRRRRRSSRRRSSRRSSTTTTTUUTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUVVVUVVVVWWWVVWWWWWWXXYYYYYYYZZZ[[[[\\\\\\\\]]]]\\\\]]]]]]]]``````____````__^__``aaaaaaabbbbbbccdddddccccdeeddddeeeeeeeeffffggggghhhhhhhhhhhFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEFFEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEDDDDDDDDEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHHHHHHHHHIIIIIIJJJJJJJJJJJJJJJJJJKKKLLMMMMMMMMMMMLKKJJJJJKILIPLATKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMLMNNNNNNNNNNNNNNNNNNMMMNNMMMMMMMMMMOOOOOOOONNNNOOOOONNNNNNOONMMNOPPPPPPPPPPOOOOPPPPOPPPPPQQRRRRRRRRTSSSRRRRRRSSSSRRSSSSSSTTTUUUUUUTTTTTTTTTTTTTTTTTUUUUUUUVUUUUUVVVVVVVWWWWVVWWWXXXYYZZ[[ZZZZZ[[[\\\\]]]]]]^^]]]]\\]]]]]]]]```````__``````____``aaaaaaabbbbbccdddddddcccdeedddeeeeeeeefffffgghhhhiihhhhhhhhFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDCDDDEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHHHHHHHHHIIIIIIJJJJJJJJJJJJJJJJJJKKKLLLLLLLLLLLLLLKKJJJJJJJNLLKHSKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMNNNNNNNNNNNNNNNNNNMMMNNMMNNNNNNNNOOOOOOOONNNNOOOONNNNNNNNONMMNOOPOOOPPPPPNOOOOOOOOOPPPPQRRRRRRRRRTTSSRRQQRRSSSSRRSSSSSTTTUUUUUUUUTTTTTTTTTTTTTTTTUUUUUUUUUUUUVVVVVVVVVWWWWWWXXYYYZZZ[[[[[ZZ[[\\\\]]]]]]]]^^]]]]\\]]]]]]]]__```aa`````````_```aaaaaaaabbbbbbccdddddddcddeedddddeeeeeeeffffgghhhhiihhhhhhhhFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEDDDDDDDDDDDDCCCCCCCCCCDDDEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHIIIIIIIIIIIIIIJJIIIIIIIIJJJJJJJJKKKKKKKKLLLLLLLLKKKJJIIIIKNPEIXRKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMNONNNNNNNNNNNNNNNNMMMNNNMNNNNNNNNOOOOOOOONNNNOOOONNNNNNNNONMMNOOOOOOOPPPPOOOOONNNNOPPPPQQRRRRRRRRTTSSRRQQRSSSSSSRSSSSTTTTUUUUUUUUTTTTTTTTTTTTTTTTTTTUUUUUUUUVVVVVUVVVVWWWWWXXYYZZZZZ[[[[[[[[\\\]]]]]]]]]]]]]]\\\\]]]]]]]]]^_``aaa```aa``````aaaaaaaaabbbbbbccccccedddddeeccdddddedeeeeeffgghhhhhihhhhhhhhFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEFFFFFFFFFFFEEFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFHHHHHHHHHHHHHHGGGGGGGGGGHHHHHHHHIIIIIIIIIIIIIIJJIIIIIIIIJJJJJJJJKKKKKKKKKKKKKKKKKKKJJJJJIJMPBJaQKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMNMMMMNOOONNNNNNNNNNNNNNMMMNNNMNNNNNNNNOOOOOOOONNNNOOOOONNNNNNOONNNOOOOOOOOOPPPOOOOONNNOPPPPPPQRRRRRRRRTSSSRRRRRSSSSSSRRSSTTTTTTUUUUUUTTTTTTTTTTTTTTTTTTTTTUUUUUVVVVVVVVVVVWWWWXXYYYZZZZZZ[[[[[[\\\]]]]]]]]]]]]]]]]\\\\]]]]]]]]]]^_`aaa``aaaa``aaaaaaaaaaaabbbbbbccddddeeddddeecdddddeeeeeeffffgghhhhiihhhhhhhhFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFEEEEEEEEEFFFFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFGGFFFFGGFFFFFFFFGGGFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHGGGGGGGGGGHHHHHHHHIIIIIIIIIIIIIIJJJJJJJJJJKKKKKKKKLLLKKKKKKKKKKKKKLLKKKKLLLHKLGPZPKKKKLLLLLLLLLLLLLLLLLLLLMMMMMMMMLMNNMMMNOOONNNNNNNNNNNNNMMMMNNNMNNNNNNNNNNNNNNNNNNNNOOOOOOONNOOOOONOOPOOPPPPPOOOOOOOOOOOQQRQQQQRRRRRRRRRSSSSSRRRSSSSSSSSRSTTTTTTTTTUUTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVWWWXXXXXYYYZZZZZZZ[[[\[[\\\]]]^^^^^^]]]]^^]]]]\\]]]]]]]]]]^___```aaaaaa`aaaaaaaaaaaabbbbccddeeeefeeddeeeddeeeeffffffgggghhhiiiiihhhhhhhhFFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGEEEEEEEEFFFGGFEEFFFFFFFFFFFFFFFFGGGFFFFFEEEEEEEEFFFFFFFFHGGFFGGHGGGGGGGGHGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHGGGGGGGGGGHHHHHHHHIIIIIIIIIIIIIIJJKKKKKKKKLLLLLLLLMLLLLLKKLLLLLLLLLLLLLLMMNEJHLUPPKKKKLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMNNMMNNOOOONNNNNNNNNNNNMMMMNNNMNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOPOOOPPPOPPPPPOOONOOOOOPPRRSRRQRRRRRRRRRRSSSSSSSSSSSSSSSSRSTTUTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUVVVVVVVVVVVVVXXXYYYYYZZZZZZZZZ[[\\\\\\\\]]^^^___^^^^^^^^^]]]]]]]]]]]]^^^^__^^`aaaaaa`aaaaaaaaaaaabbbbddeefffefeeddeeeeeeffffgggggghhhiiiijjjjhhhhhhhhFFEDDEFFGGFFFEEEFEDDDDEFFEEEEEEFEEFFGFFEFFGGGGFFFFFFFFFFGFFEEFFGFFFFFFFFFGGGGGGGGGGGHHHHGGGGGGGGGGGGGGGGFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGHHGGGGHHGGGGGGGGHGGGFFEEDEGHGGHHHHHHHIIIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIJKKJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLIDKLINQPKKJJKLLLLLMMMMMMMMMMMMMMNNNNNNNNMMMMMMMMOOPONNNOONNOOONNNNNNNNNNNNOPPONNNNNMMNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPQQQQRRRRRRRRRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVUUTTUUVWYYYYZZZZ[[[[\\\\\\\\\\\\\\\]]]]^]]]]]]]]_^]]]]]^]]]\\]]]^^^^^^^^__`aaaaaaaaaaaaaaabbcdddeeffggggeeeefffffffggghhhhhhiiiijjjjjjjjkkkkkkkkFFEDDEFFGGFFFFEEFFEDDEFFFFEEEEFFEFFGGGFFGGGHHGGGFFFFFFFFFFFEEFFFFFFFFFFFGGGGGGGGGGGHHHHHGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHGGGGHHGGGGGGGGGGGGFFEEEFGHGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIJKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMNOLJJIMKKJJKLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMOOPONNNOONNOOONNNNNNNNNNNNOOOONNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPQQQRRRRRRRRRRRRRRRSSSSSSSSSSSSSSTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUVVVVVVVVVVUUUVWWYYYZZZZ[[[[[\\\\]]]]]]]]\\\]]]]]]]]]]]]]^^]]]]]^]]]\\]]]^^^^^^^^___``aaaaaaaaaaabbbbcccceeefffeeeeeeefffffggghhhhhhhiiiijjjjjjjjkkkkkkkkFFEEEEFFFFFFFFFFGFFEEFFGFFFFFFFFFFGGGGGGGGHHHHGGGGFFFFFFFFEEEEFFFFFFFFGGGGGGGGHHHHHHHHHHHHHHHHHHGGGGGGGGHHHHHHHHGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHGGGGGGGGGGGGGFFEFGHHGGGHIIIIHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIJJJJJJJJJIIJKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLFLMKKJLSLKJKKLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNOOPOONOOONNOOONNNNNNNNNNNNOOOONNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPQRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUVVVVVVVVWWVVVVWWYYZZZZ[[[[[[\\\\]]]]]]]]\\\]]]]]]]]]]]]]^^^]]]]^]]]]]]]]^^^^^^^^_____`aaaaaaaaaabbbbccccdeeeeddddeeeefffgggghhhhhhhiiiiijjjjjjkkkkkkkkkkFFFFFFFFFFFFGGGGGGFFFFGGGFFFFFFGGGGGHHHHGGGGGGGGGGGGFFFFFFFEEFFFFFFFGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGHHGFFHHHHHHHHIIIIIIHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMNNIHONLLKKLLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNOOOOONOOONNOOONNNNNNNNNNNOOOOOONNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOPPPPPPPPOOPPPQQQRRRRRRRRRRRRRRRRSSSSSSTTRRRRRRRRTTTTTTTTTTTTTUUUUUUUUUUUVVVVVVVVVVVVVVVVXWWVVWWWYYYZZZZZ[[[[\\\\\\\\\\\\\\\]]]]]]]]]]]]]^^^]]]]]]]]]]]]]^^^^^^^^__^^_``aaaaabbbbbbbcccccddeeeeddeeffffgggghhhhhhhhiiiijjjjjjkkkkkkkkkkkkGGFFFFGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHGGGGGGGGGGGGGGFFFFFFFFFFFFGGGGGGGGGHHHIIIIIIIIIIIIIIIIIIHHHHHHHHHHHHHHHHIIIIIIIIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGHHGGFIIHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJKKJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLPILRGJYOMLLLLLLKLLMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNOOOONOOONNOOONNNNNNNNNNOOOOOOOOONNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOPPPQQRRRRRRRRRRSSSSSSSSSSSSTTTTSSSSSSSSTTTTTTTTTTTTUUUUUUUUUUUUVVVVVVVVVVVVVVVVWWVVVVWWYYYYZZZZ[[[[\\\\\\\\\\\\\]]]]]]]]]]]]]]]^^^^^]]]^^^^^^^^^^^^^^^^__^^_``aaaabbbccbbbcddeeeeefffffffgggghhhhhhhhhhiiiiijjjjjkkkkkkkkkkkkkkGGGGGGGGGGGGGGGGGGGGGGGGHHGGGGHHHGGGGHHHGGFFFFGGGGGGGGGGFFGGGGFFGGGGGGGGGHHHIIIIIIIIIIIIIIIIIIIIHHHHHHHHHHHHHHHHJJJJJJJJIIIIIIIIIIIIIIIIIIIIIIIIHHHHHHHHHHHHHHHHGGGGGGHHJIHHIIIIIIIIIJJJIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLNGGPHMaUMLLLMMLKLLMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNOOOOOOPONNOOONNNNNNNNNNOOOOOOOOONNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPQQRRRSSSSSRRRSSSSSSSSSSTTTTTTSSSSSSSSTTTTTTTTTTTUUUUUUUUUUUUUVVVVVVVVWWWWWWWWWWVVVWXXYYYZZZZZ[[[[\\\\\\\\\\\\]]]]]]]]]]]]]]]]]^^^^]]]^^^^^^^^^^^^^^^^_____`aaaaabbcccbccddeffffgghhhhggghhhhhiiiiiiiiiiiijjjjkkkkkkkkkkkkkkkkGGGGGGGGHHHGGGGGGGGGGGGGHHHGGHHHHGGGGGHHGGGFFGGGHGGGGGGGFGGHHGGFGGGGGGGHHHHHIIJJJJJIIIIIJJJJJJJJIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJGGHHHHGGHHHHHHHHHHGGGHHIJIHHIJJIIIIIIJJJIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJKLLLLKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLPRHKGESPMMMMMMLKLLMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNOOOOOOPONNOOONNNNNNNNNNOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPQRRRSSSSSSSSRSSSSSSSSTTTTTTTTSSSSSSSSTTTTTTTTTTUUUUUUUUUUUUUUVVVVVVVVWWWWWWWWWWVWWXYZZZZ[[[[[[[[[\\\\\\\\\\\\]]]]]]]]]]]]]]]]]^^^^^]]^^^__^^^^^^^^^^^___``aaaaaabcccddddeefffgghhhhhhggghhhhhiiiiiiiiiiijjjjjkkkkkkklkkkkkkkkGGGHHGGGHHHHGGFFFGGGGGGFHHHHHHHHHGGFGGHHHHGGGGHHHHHGGGGGGGHHHHGGGGGGGHHHHHHIIIJJJJJJIIIIJJJJJJJJIIIIIIIIIIIIIIIIKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKGGHHHHGGHHHHHHHHHHGFFHIJJIHHIJJJJJJJIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKLMMMKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLFSCBGALTMMMMMMLJLLMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNOOOOOOPONNOOONNNNNNNNNNPOOOOOOPOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPQQRRSSSSSSSSSRSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUVVVVVVVVWWWWWWWWWWWWXYZ[[[[\\\\\[[[[\\\\]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^]\^^^__^^^^^^^^^^^__`aaaaaaaabccddeeeeefffhhhhhhhggggghhhhiiiiiiiiiiijjjjkkkkkklllkkkkkkkkHHGGGGHHHHHGGGGGHHGGGGHHGGGGGGGGFGGGGHHHIIHHHHHHHHHHHHHHHHGGGGHHHHHHHHHHIIIJJJJJJJJJJKKKLLLLLLLLIIIIIIIIIIJJKKJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKJJHHGGGGHIGHIIIHGGHHHGHHIJJJJJJJJJIIJJJJJJJJJKKKKKJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKJKLLLLLLLLLLLLLLLLLLLLMMLLLLLLLLLLLLLLLLLLLLLMMMMNMKKMMLJP5=IT4JQMKLMLLMMMMMMMMMMMMNNNNONNNNNMMMNNNNNNNNNNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPOOPPPPPPPPPPPPPPQQQQRRRRSSTTSSSTSSTTTTSSTTTTTTUUUUUUUUUUTTUUUUUUUUUUUUVVVVVVVWWWVVVVVVVVVWWWXXYYWXXXYZZZ\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^]^^__^^]^___^^__```aaaabbccccddddddeeeeefffgghiihhhhhiiiiiiiiihhijjjjkkkkkkkklllllllllllHHHGGHHHHHHHHHGGHHGGGGHHHHHHHHHHGGGGHHHHIIIHHHHHHHHHHHHHHHGGGGHHHHHHHIIIIIIIJJJJJJJJKKKKKKKKKKKKJIIIIIIIJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKJJHHGGGHHIHHIJIIHGIHHHHIJJJJJJJJJJIJJJJJJJIJJJKJJJJJJJJJJJJJJJJJJJIIIIIIIIJJJJJJJJJKLLLLLLLLLLLLLLLLLLLLMMLLLLLLLLLLLLLLLLLLLLMMMMMNNLLNNNO42MHM<_OMKLMLLMMMMMMMMMMMMNNNNNNNNNNNMMNNNNNNNNNNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPQQQRRRRRSSTTSSSTSSTTTTSSTTTTTTUUUUUUUUUUTTUUUUUUUUUUUVVVVVVVVWWWVVVVVVVVWWWWXXXYXXXYYZZZ\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^__^^^^______```aaaabbbccccdddccddeeefggghhhiihhhhhhhhiiiiijjjjjjkkkkkkkkllllmllllllllHHHHHHHHHHHHHHIIHHHHHHHHHHHHHHHHGGGHHHHIIIIIHHHHHHHHHHHHHHGGGGHHIIIIIIIIIIIIJJJJJJKKKKKKKKKKKKKKJJJJJIIIKKKKKLLLKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKJHHHHHHIIIIJJJIIHIIIIIIJJJJJJJJJJJJJJJJJKIJJJJJJJJJJJJJJJJJJJJJJJIIIIIIIIJJJJJJJJJKLLLLLLLLLLLLLLLLLLLLMMLLLLLLLLLLLLLLLLLLLLMMMMMNNMMNONR;ETBI<XMLKMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPQQQQQQQQQQQQRRRRSSSTTSSSTSSTTTTSSTTTTTTUUUUUUUUUUTTUUUUUUUUUUUVVVVVVVVWWWVVVVVVVVWWWWXXXXXXYYZZ[[\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]]]]^^^^___^^^^__^^____`a``aaaabbbccccdddcddeefgghhhhhhhhggghhhhhiiijjjkkjkkkkllllllllmmmllllllllHHIIIIHHHHHIIIIJIIIHHIIIHHHHHHHHGGGHHHHHIIIIIIIIHHHHHHHHIHHGGHHIIIIIIIIIIIIIIJJJKKKKKKKKKKKKKKKKJJJJJJJJKKKKKKLMKKKKKKKKKKKKKKKKKKKKKKKKKLLLLKKKIIIIIIJJJKKKKJJIIIIIIIJJJJJJJJJJJJIIIJJJJJKKKKKKKKKKKKKKKKKKKKKKJJJJJJJJJJJJJJJJJKLLLLLLLLLLLLLLLLLLLLMMLLLLLLLLLLLLLLLLLLLMMMMNLNMLLMMLCJQPGOGUKKLMMMMMMMMMMMMMMMMNNNNNNNNNNNNNOOOOOOOONNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOPOOOOOOOOOOOOPPPPPPPPQQQQRRRRRRRRQRRRRSSSSSTTSSSTSSSTTSSSTTTTTTUUUUUUUUUUTTUUUUUUUUUUVVVVVVVVVWWWWWWWWWWWXXXXXXXXXYYZZ[[[\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]]^^^^_____^^^^__^^_```aa`aaaabbbbccccdddeeeffggghhhhhhhggghhhhiiiiijjjjjjjkkkkklkkllllmmllllllllHHIIIIHHHIIIIJJJIIIIIIIIGGGGGGGGGGGGHHHHIIIIIIIIIIIIIIIIIIHHHHIIJJIIIHHHIIIJJJJJKKKKKKLLKKKKKKKKKKKKKJJJKKJJJKLLKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLKKJJJJJJJJKKKKKJJJHHIIIJJJJJJJJJJJJJIIIIIJKKLLLLLLLLLLLLLLLLLLLLLLKKKKKKKKKJJJJJJIJKLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMLMMMMNNNLMMKKKKICLMVNKVXIKLMMMMMNNNNNNNNMNNNNNNNNNNNNNNNOOOOOOOONNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOPPPOOOOOOOOOOOOPPPPQQQQQQQQRRRRRRRRRRRRSSSSSSTTSSSTTSSSSSSTTTTTTTUUUUUUUUUUTTUUUUUUUUUVVVVVVVVVVWWWWWWWWWWWXXXXXXXXYYZZ[[\\\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]^^^^_____^^]]^^_^_````abaaaaabbbbccccdddeeefffffhhhhgggfhhhiiiijiiiiiiiijjjjjkkkkkkkllllllllllllIIIIIIIIIIIIIIIIIJJJJJJIHHHHHHHHGGHHHHIIIIIIIIJJIIIIIIIIIIHHHHIIJJIIIHHHIJJJJKKKKKKKLLLLKKKKKKKKLKKKKKKKKKJKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLKKJJKKKKKKLLLKKJJJIIIJJJJJJJJJJJJJJJJIIIIILLLLMMLLLLLLLLLLLLLLLLLLLLLLLLLLKKKKJJJJJKLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNNLMMKKKJHOKLXHDdPIKLLLMMMNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOONNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOPPPPOOOOOOOOOOOOPPPPQQQQQRRRRRRRRRRRRRRSSSSSSSTTSSSTTTSSSSTTTTTTTTUUUUUUUUUUTTUUUUUUUVVVVVVVVVVVVWWWWWWWWWWWXXXYYYYYZZZ[[\\\\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]^^^______^^^^^^__`````abaaaabbbbbccccdddddeeeeeehhhgggffhhiiijjjjiiiiiiiijjjjjkkkkkkklllllllllllJIIIIIIJIIIIIIIIIJJKKJJIIIIIIIIIHIIIIIJJIIIIJJJJIIIIIIIIIIHHHHIIIIIIIIIIJJJKKKKKKKLLLLLLKKKKKKKKLLLLKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLKJKKLLLKKLLKKKJJJJJKKLLKKKKKKKKKKKKKKKJJJKLLLMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLKKKJJJKLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNNMNNMLLKIKILNDHgNIKMKKLMMNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOONNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOPPPPOOOOOOOOOOOOPPPPQQQRRRRRRRRRRRRRRRRSSSSTSSTTSSSTTTSSSSTTTTTTTTUUUUUUUUUUTTUUUUUUVVVVVVVVVVVVVWWWWWWWWWWWXXYYZZZZZZ[[\\\]\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]^^^____`___``___``aa``aaaaaabbbbbccccddddeeeffffhhhhgggghhhiiijjijjjjjjjjjjjkkkkkkklllllllllllllJJIIIIJJIIIIIHHHJJKKKKJJJJJJJJJJIJJJJJKKIIIIJJJJIIIIIIIIIIIHHIIIIIIJJKKKJKKKKLLLKLLLLLLLLLLLLLLLLLLLLLKKKKLMMMMLMMMMMMMMLLLLLLLLLLLLLLLLLLMMLLLLKKLLLLLKLLKKJJJJKKLLMMLLLLLLLLLLKKLLLKKJKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMLLKKJJJKLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMNNNNNMOONNNMKNONGKFSLILMKJLMLNNNNNNNNNNNNNNNNNNNNOOOOPPPPPPPPNNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOPPPPPOOOOOOOOOOOOPPPPQQRRRRRRRRRRRRRRRRSSSSTTSSTTSSSTTTSSSSTTTTTTTTUUUUUUUUUUTTUUUUUUVVVVVVVVVVVVVWWWWWWWWWWWXYYYZ[[[ZZ[[\\]]\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]^^____`````aa````aaa``aaaaaabbbbbccccdddeffgghhihhhhhhhhgghhiiiiiijjjkkkjjkkkkllkllllmmmllllllllIJKKJJJJJJJJJKKKJJJJJJJJKKKJJJJJIJJJKKLLJJKKKJIIJJJJJJJJKKKKJIJJIJJJJKKKKKKKKKKKMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLLLLLLLLLLLKKKKJKKLLKIHIJKKJJJJLLLLLLLLLLLLLLLLLLLLKKKKJKKLLLKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLKKKKKKKKLLLLLLLLKLLLLLLLMMMMMMMMMMMMMMMMMMMMMNNNMMMMMMMMLLLPPLMQLJIILNNMMMNNNONNNNNNNNNNOOOOOOOOOOOOOOOOOONNNNOOOOOOOOPPONNNNONNNOPPPOOONNOPQQPPOOOOOOOOOOOOPPPQRRRRRRSSSSSSSSSSRRRSSSSSSTTUUTTSTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVXXXWWWVVVVVVVVVVYYZ[[[[ZZZ[[\]]^]]]]]]]]\\]]]]]]^^^^^^^^]]]]]]]]\\]]^^^^^^__^^__````aaaaa``aabbabbbbbbbbdddeeeffefghiiiijiiihhhhffghiiiijjkkkllllllmmmmmmmmmllllnnnnnnnnIJKKKJJJJJJJKKKKKKKKKKKKKKKKJJJJIIJJKKKKJJJKJJIIIIIJJJJJJKKJJIIJJJJJJKKKLLLLLLLLMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLLLLLLLLLLLLKKKKKKLKKJIJKKLKJJJLLLLLLLLLLLLLLLLLLLLLLKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNMMMMMMMMONNQNJMOLJIJLMNMMNNNOOOONNNNNOOOOOOOOOOOOOOOOOOOOONNNNOOOOOOOOPPONNNNONNNOPPPOOOOOPPPPPPOOOOOOOOOPPPPPQQRRRRRSSSSSSSSSSSRRRSSSSSTTTUUTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVXXWWWWWWWWWWWWWWYYZZ[[[[ZZ[[\\]]\\\\\\\\\]]]]]]]^^^^^^^^]]]]]]]]]]]]]^^^^______``aaaabbbaaaabbaabbbbbbbbddddeeeffghhiihhhhhggggfgghiiijjjjjkklllmmmmmmmmnnmmmmllnnnnnnnnIJKKKJJJJJKKKKKKKKKKKKKKKKKKKKJJIIJJJKKKJJJJJJIIIIIJJJKKJJJJIIIIJJJKKKKKMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLLLLLLLMLLLLKKKKKLLKKJJKLLLKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNOMPKEMNLKJJLMMMNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNOOOOOOOPPONNNOONNNOPPPOOOPPPPPPPPOOOOOOOOQQQQQQQQRRRRRSSSSSSSSSSSRRSSSSTTTTTUUTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUVVVWWWWWWWWWWWWXXXXXXXXXXXXYYYZZZ[[[[[[\\\\\\\\\\\\]]]]]]]]^^^^^^^^]]]]]]]]^^]]]]^^_``````aaaabbbbbbbaabbaacccccccccdddeeeeghhhhggghhgggggghhiijjjjjjjkkkllmmmmmmmnnnnnnmmmnnnnnnnnIJKKKKKKKKKKKKKKLLLLLLLLKKKKKKKKIIJJJKKKJJJJJJJJIIJJJJKKIJJJIIIIKKKKKLLLMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMLLLLKLLLLLKKKKLLLKJJKLLLLLLLLLLLLLLLLMLLLLLLLMLLKKLLMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNNNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNMNLQJFRQLKKKLMMMNNOOOPOOOOOOPPPPOOOOOOOOOOOOOOOOONNNNNNOOOOOOOPPONNNOOONNOPPPOOPQQPOOPPQPPPPPPPPRRRRRRQQRRRRSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUVVVVVVWWWWWWWWWWWXXXYYYYYYYYYYZZYYYZ[[[[[[[[[[\\\\\\\\]]]]]]]]^^^^^^^^]]^^^^^^^^]]]^^_`aaa``aaaaabbbbbbbabbcbbccccccccccddeeeeggggggggiiiiiiiiiijjkkkkjjjkkkllmmmmmmmmooooonnnnnnnnnnnIJKLKKKKKKKKKKLLLLLLLLLLLLKKKKKKJJJJKKKKJJJJJJJJKKKKKJJJJJKJJIJJKKKLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMLLLLLLLLLLLLKLLKJJJKMMMMMMMMMMMMMMMMMMMMMMMMMMLLLLMMMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMNNNNNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNMOMTMK]YLLLLLMMMNNOOOPOOOOOPPPQQOOOOOOOOOOOOOOOONNNNNNNNOOOOOOPPONNNOOONNOOPOOPPRQPOOPQQQQQQQQQQSSRRRRRRRRRSSSSSSSSSSSSSSSSSTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVWWWWWWWWWVWWXXYYZYYYYYYYYZZYYZZ[[\\\\\\\\\\\\\\\\]]]]]]]^^^^^^^^^^^^^^^^_^^]]^_`a`abaaaabaaaabbbbbbabcddcddddddddddddeeffgggggghhjjjjjjjjjjjkkkkkjjjkkllllllmmmmmoooooooonnnnnnnnJKLLLKKKKKKKLLLLLLLLLLLLLLLLKKKKKKKKKKKKKKJJJKKKLLLKKKKKKKLKKKKLLLLLLMMMLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMLLLLMMMMMLLKKKKJJKLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMNNNNNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNMNKTLLaYLLMMMMMMNNNOOOOOOOOPPPQQOOOOOOOOOOOOOOOONNNNNNNNOOOOOOPPOONNOOONNOOOOOPQRQPPPPQRRRRRRRRRRRRRSSSSRSSSSSSSSSSSSSSSSSSTTTTTUTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUVVVWWWWWWWWWWWWWWWWXYYZZZZZZZZZZZZZZ[[[\\\\\\\\\\\\\\\\\]]]]]^^^^^^^^^^^^^^^_____^^__`aaaabbaabbaaaabbbbbbbcdeedddddddddddeeefffggggghiiiiiiijjjjkkkkkkkkkkklllmmmmmmmmmooooopppnnnnnnnnJKLLLKKKKKLLLLLLLLLLLLLLLLLLLLKKKKKKLLLLKKKKKKLLLLLLLLLLLLMMLLLMLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNMMMMLLLLMMNNMMLKKLKKKLMMMMMMMMMMMMMMMMMMMMMNNNNMMMMMMMLMMMMMMMMMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNNNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNMNISJH_SLMMMMMMMMNNNOOOOOOOOPPQQOOOOOOOOOOOOOOOONNNNNNNNOOOOOOPPOONNOOONNOOOOOPQQQPPPQRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSTTTTUUUTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUWWWWWWWWWWWWWWWWWWXXYYYZZZZZZZZZZ[[[\\\\\\\]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^____``````aaaaabbaabbabbbbccccbbcdeedeeeeeeeeeeefffggggggghijhiiiijjjkkkkkkkjkklllmmmnnnnnnnnooooppppnnnnnnnnJKLLLKKLKLLLLLLLLLLLLLLLLLLLLLLKLLLLLLLLLKKKKLLMLLLLMMMNMMNMMMMNLLLMMMMNNNNNNNNNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNMMMMMMMMMMMMMMMMNNMMMMLLLMNOONMLKLLLLLMOMMMMMMMMMMMMMMMMMMMNNNNNMMNNNMMLMMMMMMMMMMMMMMMMNNNNNNNNLLLLLLLLMMMMMMMMLLLLLLLLMMMMMMMNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNQPISIF]OLMNNMLLMMMNNNONNNNOOPPPQOOOOOOOOOOOOOOOONNNMMNNNOOOOOOPPOONOOOONNOOOOOPQPPPPQQRRRRRRRRRRQRRRSSSSSSSSSSSSSSSSSSSSSSTTTTUUUTTSSTTUUUUUUUUUUUUUUUUUUUUUUUUUWWWWWWWWWWWWWWWWXXXXYYYZZZZZZZZZZ[\\\\\\\\]]]^^^]]]]]]]]]]]]^^^^^^^^^^^^^^^___```aaaaaaaaabbaabbbbbcccccdcccdeddeeeeeeeeeefffggghhgghhijiiijjjkkkkkkkkjjllllmmmnoooooooonnooopppnnnnnnnnKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMLLLLLLLLMMMMMMMMNNNNNNNNLLMMMNNNNNNNNNNNNNNNMMMLMMMMMLLLNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMNNNMMLLLMMNNONNMLLLLLMNOMNNNNNNNNNNNNNNNNNNNNNNMNNNNNNNNMNNNNNNNNNMMMMMNNNNNNNNMMMMMMLLLMMMMMMNNMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNMMNNNNNONNNNNNNNOPORNMUSTOLKKKKLLMNOOOOOOOOOPPPPPOOOOOOPOOOOOOOOOONNNNNMNNNNOOOOOOOOOOPPPOOOPPQRPPQQRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUTTTTTTTTUUUUUUUUUUUUVVVVWWWWVVVVVVVVYYYYYZZZYYZZZZZ[\\\\\\\\]]]]]]]]\\\\]]]]\\]]]]^^^^^^^^^]^^__``aabbbaaaa`aaaabbccdccbccddccccccddeeeeeeeeffgggghhggghiijjjjjjjjkkkkllllkkllmmmnnnooooooooooooooooppppppppKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMNNNNNNNNLMMMMNNNNNNNNNNNNNNNMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMMLLMMNNNNMMLLLLMNNONNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMMMNNNNNNNNNNNMMMMLLLMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNMMNNNNNNNNNNNNNNNPRUOOVOPMKKLKLNLMNOOOOOOOOOPPPPPPPPPOOOOOOOOOOOOONNNNNMNNNNOOOOOOOOPPPPOOOOPPQRPPQQRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUTTTTTTUUUUUUUUUUUUUVWWWWWWWWWWWWWWWYYYYZZZZZZZZ[[[[\\\\\\\\]]]]]]]]]]]]]]]]\\]]]]^^^^_________``aabccbbbaaabbbbbccddddddddddddcdddefffffffffgggghhhhhhiijjkjjjjjkkkkkllllkkllmmnnnoooooooopooooppppppppppppLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMNNMMMMMMMMMMMMMMMMNNNNNNNNMMMMNNNNNNNNNNNNNNNNNMMMLMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMMMMMMMNNMMMLLLMMNOONNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOONNNNNNNNNMMMMLLMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNMMMMNNNNNNNNNOSTKLTKMLLMMLMOLMNOOPOOOOOOPPPPPQQQPPONOOOOOOOOOONNNNNMNNNNOOOOOOPPPPPPOOOOPQRRPQQRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUTTTUUUUVVVVVVVVVWWWWWWWWWWWXXXYYYYZZZZZZZZZ[[[[[\\\\\\\\]]]]]]]]]]]]]^^^]]]]]^^^^__````````aabbbccccbbbbcbbbbcccddeeeeeeeddddeeegggggggggggghhhhiijjjjkkjjjkkkkkkklllllkllmmnnooooooooppppppppppppppppppLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNMMMMMMMMMMMMMMMMNNNNNNNNMNNNNNNNNNNNNNNNNNNNNNNNMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOONNNNNNNNNNNMMMMMMMMLLMMMMNNOONNNNNOOOOOOOOOOOOOONNNNNNNNNNNNNNNNNNOOONNNNOOPPOOOOONNNNNNNMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMMMMMMMMMMMOMPSHHVSLLMNMKLNMMNOPPPOOOOOPPPPQQRRQPONOOOOOOOOPONNOONMNNNNOOOOOOPPPPPPOOOOPQRSQQQRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUTUUUUVVVVVVVVVVVVVWWWWWWWWWXXYYYZZZZZZZZZZZ[[[[[]]]]]]]]]]]]]]]]]]]]^^^^]]]^^^^^_``````````abbbcccccbbbbcccbbcccddefffeeeeeddeeeggggggggghhhhhiijjjjjjjjkkkkkkkklllllllllmmnnooooooooppppppqqqqqppppppppMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNMMMMMMMMOOOOOOOONNNNNNOONNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNNNNNNMMMLLLMNNNOOOONNNOOOOOOOOOOOOOOOOOONNNNNNNNNNNNNNOOOOONNOOOPPPOOOOOOOOOONNNNMMMMMMMMMMMMMMMMMMNNNNNNOOOOOOOOOOOOOOOOOONNNNNMMMNNNNNNNNNKOUKHX^LLNONLLNMNNOPPPPOPPPPQQQQRRRRQOOOOOOOOOOPONNOONMNNNNOOOOOPPQQQPPPPPPPQRRQQRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVWWWWVWWXXYYYZZZZZZ[[ZZZ[[[[[]]]]]]]]]]]]]]]]]]]]^^^^^^^^____``aa```_```aabbcbbbbbbbbccbbbbccddeeffeeeeeeeeeehhhhhhhhhhhiiiiijjjjjjjjkkkkkkkkllllllmmmmmnnoooooooppppqqqqqqqqppppppppMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNMMMNNNNNOOOOOOOONNNNNNNNOOOOOOOOOOOOOOOONNNNNNNNNNNNOOOOOOONNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNOOONNMMLLLNNOOOPOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNOOOOOOOOOOOPPPPPPPPPOOOOOOONNNNNNNNNNMMMMMMMMMMMNNNNOOOOOOOOOOOOOOOOOOOOOONNNNNNNNNNNNNNOOORLFQYMMMOOMMOMNOPPPPPPPQQQQRRRRRRRQPPOOOOOOOOPOOOOONNNNNNOOOOOPQQRQPPQPPPPQQRRRRSSSRRRRRRRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUUUUVVVVVUUUVVVVVVVVVVVVVVVWWWWWWWWWXXXYYZZZZ[[[[[[[[[\\\]]]]]]]]]]]]]]]]]]]^^^______````aaaaa``_``aabbccbbbbbbbbccccccddddeeffffffffefffhhhhhhhhhiiiijjjjjjjkkkkkkkkkllllllllmmnnnnnnooooooppppppqqqqqqqppppppppNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNNNNOOOOPOOOOOOONOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNOOOOPONMMLMMNOOPPPPOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNOOOOOOOOOPPQQPPPPPPPPPOOOOOONNNNOONNNNNNMMMMMMMMNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOTKHHFKPQNMNONNOMNOPPQPPQQQRRRRRRRRRRRRQOOOOOOOOPOOOOONNNNNNOOOOOPQRRQPPRQQPPPQQRRRSSSSRRRRRRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUUVVVVVVVUUUVVVVWWWWWWWWWWXXXXXXXXXXYYYYYZZ[[[[[[\\\\]]]]^^^^^^^^^^^^^^^^^^___```````aaaabbbbbaaaaabbcccdccccccccdddddeefeeeffggghhgggggghhhhhhhhiiiijjjjkkkkklllkkklllllmllllmnnooooonnnoopppppppppqqqqqppppppppNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNNNNOOOPPNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPOOOOOOOMNNNOOOOPPONMMMMNOOPPPPOOOOOOOOPPPPPPPPPPOOOOOOONNNNNNNNOOOOOOOPPPQQQPPOPPPPPPPPOOOOONNNOOOONNNNMMMMMMMMNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPOOOOOOOOLUF=FLQSTQNNNMMMNNOPPQPPRRRRRSSSRRRRRRRROOOOOOOOPOOOOONNNNNNOOOOOPQRRQPPRRQPPPPPRRSSSSSRRRRRRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUUVVVVVVVUUVVVVWWWWWWWWWWXXYYYYYYYYYYYYYYZ[[[[[[[]]]]^^^^^^^^^^^^^^^^^^^^__```aaa`aaaabbbbbbcccbbbbbccdddccccddddddeeefggfffffghiiiihhhhhhhhhhhhhiiijjjjjkklllmmmkkklllllmllllmnnpoooonnnopppppppppppqqqqppppppppOOOOOOOONNNNNNNNNNNNNNNNNNNNNNNNMNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPOONNOOPOOOOOOOOOOOOOOOOPOOOOONNOOOOOOOOPONOPQQQPPPONNMLPOOOOOONPPOOOPPQPPPPPPPPOONNNOOOPONNNOPQPPPPPPPPOPQRRRQPQPPOOPPQPPPPOOOONNOPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNPMPQNDI?POPQPOOPOOPPPQQQPQQRRRRRRRRRRRRRRRQPOOOOOOOOOOOONMNPPNMNOPRSSSRQRRQQPPPQRRRRSSSSTSSRRSSTSSTTTTTTTTTTTTTTUUUUTTTTUUUUUUUUUUUUUUUUVVVVVVVVVVVVVWWWWWWWXXXXZYYXXYYZXXYYZZZZZZZ[[\\\[\]]]]^^```_____^^^^_______``aaaaabbbaa`bbbbbcccbbccccddeeeeeeeedefghhhhfffffghhhhhhhhhhhhhhhhhhiiiijjjjkkkkkkkklllllmmmmmmllmmnnopppppopppppqqqpppqqqqqqqqqqqqqOOOOOOOONNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONOOOOPPPPOOOOOOPOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOQPOOPQQPPPPONNMMPPOOPPOOOOOOOPPPPPPPPPPPOOOOOOOOPOONNOPPPPPPPPPPOPQRRRRRRQQPPPQQPPPOOOOOOOPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOQRQOQHNMQPPQQOOPOOOPPQQQPQQRRRRRRRRRRRRRRRQPPOOOPPPPPPPPONNPPNNOOPQSSSRQRRQQPPPQRRRSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVWWXWXXXXXXXZYYYYYYZYYYZZ[[[[[[\\]]][\]]]]^_________________````aaaaaabbbbbabbbbccccccccddddeeeeeeeedefghhhhgffffghhhhhggggghhhhhhhhijjjjkkklllllllllllllmmmmmmmmmnnoopppppoppppppqqpppqqqqqqqqqqqqqOOOOOOOONNNNNNNNNNNNNNNNNNNNNNNNOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNOOOOPPPOOOOPPOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPRQPPPPPPQPOONNNOQPOOPPPONOOPPQQQPPPPPPPPOOOOOOOOOOOOOPPPPPPPPPPPPPQQRSSSSRRQQQQQPPPOOOOOPPPPPOONOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOOOOPPPPPNRPMPHPURQQQQONONNOOPPQQPQQRRRRRRRRRRRRRRRQQQPOOQQQQQQQQPOOPPOOPPQQRRRRRRRRQPPPQSSSSSSRRSSSSSSSSSSTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUTTUUUUUVVVVVVVVWVVVVWWXXXXXXXYYYZZZYYZZZZZZZ[[\\]]]]]]]][\]^^^_`________````````aaaaaaaaaabbcccbbccccdddccddddeeeeeeeeeeeffghhhhhggffgghhhgggfffiiiiiiiijjjkkkkklllllllllllllmmmmmnnnnnnooppppppppppppppppqqqqrrqqqqqqqqOOOOOOOOOOOOOOOONNNNNNNNNNNNNNNNOOOOONNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNOOOOOPPPPPPPPOOOOOOOOOOOOOOOOOOOOOPPPQQQQQQQQQPPPPPPOQPOONOOPPPOOPPOOOPPQRRRRQQQQQQQQPPPPPPOOOOPPPPPPPPPPPPPPQQQQQRSSSSRQQPPPPPPPPPPPPPPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOOPPPQQRRNOSROHVVRPPQPOOONNOOPPQQPQQRRRRRRRRRRRRRQRRRQQPOQQQQQQQQPPOOOOPPQQQQRRSSQRRQPPPQSSSSSRRQRSSTTSSRSSTTTTTTTTTTTTTTSTTTUUUVUUUUUUUUTTTUUUVVVVVVVVWWVVVWWXYYYYYYYYYYZZZZZZZZZZZZ[[\\]]]]]]]]\]^^__`a````____aaaaaaaabbbbbaaaaaabbcccccccddddddddeeeeeeeeeeeefffghhhhhhggggghhhhggggfiiiiiiiijjjkkkkkllllllllllllmmmmmnooooooppppppppppppppppqqqqqrrrqqqqqqqqOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPOOOONNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNOOOOPPPPPPPPPOOOOOOOOPPPPPPPPOOOOOOOOPPPPPPPPPOOPPPPOPPOOOPQQPPOOPPOOOOPQRRRQQQQQQQQQPPQQQPPOOPPQQQPPQQQQQQQQQQPPPQQRRRRQQPPOPPPPPPPPPPPPPPQQPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQRRSTNWYOLa[RONPPOOPNOOOPPQQPQQRRRRRRRRRRRRRRRRRRQPPPPPPPPPPPPOOOOPPRRQQRRSSQRRRQPPQSSSTSSRQRSSTTSSRSSTTTTTTTTTTTTTTSTTTUUUVUUUUUUUUTTTUUUVVVVVVWWWWWWWWXXYYYYYYYYZZZZZZZZZZYYZZ[[[[]]]]]]]]]]^__`aaaa``__^^aaaaaabbbbbbbbbb`aaaabbccccddddddddeeeeffffffffffffggghhihhggghhiiiihhhhjjjjjjjjjjjjkkkkkkkkkkkkllmmmmnnnoopppppqqppppppppppppqqqqqqrrrrqqqqqqqqOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPQQQQPPPPPPPPPPPPPPPPPPPPPOOOOOOOOOOOOOPOOPQQPPPPOOPQQRQPPPPPPOOOOPPPPPQQQQQQQQQQQRQQPOPPQQQQQPQQQQQQQQRQPPPPQQRRRRQQPPQQQQQQQQPPPPPQQRPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQRRRSVLSTLLb\RONOQPPPPPPPPPPPPQQRRRRRRRRRRRRRRRRRRQQQPPPPPPPPPPPOOPPPRRRRRRSSQRRRQQPQRRSTTSRQSSSSSSSSSSTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUTTUUUUUVVVVVWWWWXXXXXXYYYYYYYYZZZZZ[[ZZZYYZZ[[[[\\]]]]]]]^____`aaa``__^^aaaaaaaabbbbbbbbaaaaaabbccccdddddddeeeefffffffffffffffggihhhhhiiiiiijjjjjjjjjjjjjjjjkkkkllllllllmmmnnnnoooppqqqqqqqpppqqpqqqqqqqqqqrrrrrqqqqqqqqOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPOOOOOOOOPPPPPPPPPPPPPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPQQQQQQPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQRRQPOOPPQQRRRQQQQQQPQPPPPPPQQQQQQQQQQQRRRQPPPPQQQQQQQQQQQQQQQQQQQQQQQRRRRRQQQQRRRRRRQQPPPPQQQQQQQQQQPPPPPPPPPPPPPPPPQQQQQQQQPPPPPPPPQQQQRRRRQNMKMJXXTQOPQQQQQQQQPPPPPQQRRRRRSSSSSSSSSSRRRRRRQQQQQQQQPQQOOQQPRRRRRRRRQRSRRQPQQRSTTSRRSSSSSSSSSSTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUVVVWWWWWYYXXXXYYYYYYYYYYYZZZZZZYZZZ[[\\\\\\]]^^^^^_____```______``````aaaaaabbbbbbaaaabbcccccdddddddeeeeffffffffffeeeffgihhhhiijiiijjjkkkkkkkkkkjkkkklllmmmmmmmmnnnnooooppppqqrrrqqpppqqqqqqqrrrqqrrrrssqqqqqqqqOOOOOOOOPPPPPPPPOOOOOOOOOOOOOOOOOOOOPPPPOOOOOOOOPPPPPPPPPPPPPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPQQPQQRRQQPPPPPPPPPPPPPPPPPQQQQQRRRRRRRRRRRSSRSSRQPNOPQQRRRSRRRRRRQSSRQQQRRQQQQQQQQQRRSRQPPQQQQQQQQQQQQQQQQQQQQRRRRRRSSSSRRRRRRRSSSRRQPPPPQQQQQQQQQPPPPPPPPPPPPPPPPQQQQQQQQPPPPPPPPQQQQQRRRPUNJVNPVVSPQRRQQRRQQQPPPPQQRRRRRSSSSSSSSSSRRQRRRRRRRRRRRPRRPPRRPRRRRSRRRQRSSRQQQQQSTTTSRTSSRRSSTSSTTTTTTTTTTTTTTUUUUTTTTUUUUUUUUUUUUUUUUVVWWWWWWYYYXXXXXXYYYYYYYYYZZZZYY[[[[\\]]\\]]^^__^____^__^___````_````````aaabbbbccbaabbbbbccccddddddeeeeffffffffffeeeeffhhhhhijjhhiijkkkkkkkkkkkkkklllllnnnnnnnnnoooopppqpppqrrsrrqpppqqqrrrrrrrqqrrrrssqqqqqqqqOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNPPPPPPPPPPPPPPPPQQQPPPPPPPPPPOOOPPPPPPPPOOOOOOOOPPPOPPQQQQQQRRRRRRRRRRRRPQQQQPPOOOPPPPPPQQQRRRRRSRRRRRRSSSSSSRPOOPQRSSSSTTTSSSSSSSSRRQQQSSSSSSSSQRRRRRQPRRRRRQQPQQQQPPPPRRRRRRSSSSSSSSSSSSSSSSSSQPPPPPOOPPPQQQQQQOOQQPOPPPQQRQQPOPQQQQPPOOPPQQQQRRRRRRRRRORTSKUURPPRSRQQRQQPPQQRRRSTTTTSSSSSSSSSSRRRRRRSQRSSSSSSSSSSSRRRSSSSSSSSSSTTTRQPPQSTTTSSSSSSSTTTTTTTUUUUTTTTTTTTVVVVVVVVVVVVVVVVUUUVVVVVUUVVWWXYZZZYYYYYXXXXXXYYYYZZZ[[[[[[[[\\\\\]^]^^_``aa`___````aa``__`_____`abbcbbbccbbbbbbdddddddddddeeeeeefffffedeefffgffhhhiijjjijjjkkkklllllkkklllllmmmnnnnnnnnnnooopppqqqqqqqqqqqqqqqrrrrrrrrrrrssssssttttttttOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNPPPPPPPPPPPPPPPPQQQQQQQPQQPPPPPPPPPPPPPPOOOOOOOOPPOOOPPQQQQRRRRRRRRRRRRRPPQQQQQQPPPPPPPPPQQRRRRRSRRRRRRSSSSSSRQOPPQRSSSSTTTSSSSSSSSRRQQQSSSSSSSSRRRRRRQQRRRSRRQQPPPQQQQQRRRRRSSSSSSSSSSSSSSSSSSSSRRRRRQQRRRRRRRQTQPQQPPRQQRRRQQPPPQQQQPPOOPPQQQQRRRRRRRRSQSSRITTSQQSSRQRRRQQQQRRQRRSSSSSSSSSSSSSRRRRRRRRQRSSSSSSSSSSSRRRSSSSSSSSSSTTTSQPQQSTTTSSSSSSTTTTTTTUUUUUTTUUUUUUVVVVVVVVVVVVVVVVUUVVVVVVVVVVWXXYZZYYYXXXYYYYYYZZYZZZZ[[[[[[\\\]]\]^^^^_``aa`___````aaaa`_````_``aaabbbbbccbbbbcccccdddeeeeeeeeeefffgffeeffggghgghhhiiiiiijjkkkkkkkkkkkkkllllmmmmoooooooonoooopppqqqqqqqqqqqqqqrrrrrrrrrrrrsssttsttttttttOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPRQQQQQQQQQQQQQQQPPPPPPPPPPPPPPPPPOOOOPPPQRRRRRRRRRRRRRRRQQQQQQRRPPPPQQQQPQQRRSSSSSRRRRSSSSRRRRQPPQQRRSSSTTTSSSSSSSSRRRQQSSSSSSSSRRRRRRRRSSSSRRQQPPQQQRRRRRRSSSSSSSSSSSSSSSSSSSSSTSRRSSRQRRRRRQPPSQPQRQQRQRRRRQQPQQQQQPPOOOPPQQQPRRRRRRRRQPRQOGSUUSRRSRRSRRRRRRRRQRRSSSSSSSSSSSSSRRSSSSRRQRSSTSSSTTSSSSRRSSSSSSSSRSTTTSRRQRSTTTTSSSTTTTTTTTTUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWXYYZZYYXXWWYYYYYZZZZZZZ[[[[[\]]^^^]]^_____`aaa`__`a``aaaaaa`aaa```aaaaaabbbcccbbcccccddeeefffffeeeeffgggfffgghhiiihiiiiiiiijjjkkkkkkkkkkkkklllmmmmnooooooooooooopppqqqqqqqqqqqqqqrrrrrrrrrrrsssstttttttttttOOOOOOOOOOOOOOOOOOPPPPOONOOOOOPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQPPPPPPPPQQQQQQQQPPPPPPQQRRRRRRRRRRRRRRRRRRQQQRRSQQQQQQQQQQRRSSRRSSRRRRSSSSRRRRQQQQRRRSSSSSSSSSSSSSSSRRRRSSSSSSSSSRRQQRRSSSSSSRRRQQRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSRRRRRQQRRQQPPOOQPPQRRRQQQQQQQQQQQQQPPPOPPPQQQQPRRRRRRRRPPSQOIXZVSQRRQRSSSSRRSSSRSSSSSSSSSSSSSSSRRSTTSRRRRSSTTSSTTTSSSSSSSSSSSSSRSSTTTSSRSSTTTTTTTTTTTTTTUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWXXYYYYYYYXXXYYXXXYYY[[[[[[[[\\^____^__``__`aaaa`__`a`aaaaaaa`aaaa`aaaaaaabbcccccccdddeeeeffffffeeeeeffffffffhhhiiiiiiiiiiiihjjkkkkkkkkkkkkkkmmmmmnnnooooooooooooppppqqqqqqqqrqqqqrrrssssssssssssstttttttttttOOOOOOPPPPPPPPPPOOPQQPOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQPPPPPPPPPPPQQQQQQQQQQQQQQQQRRRRRSSSRRRRRRRRSSRRQRRRQRRRRRRRSSSSSRRQSSSRRSSSSSRRRSRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSRRRRSSTTTSSSSSSSSSRRRRSSSSSSSSSSSSSSSSTTTTTTTTSRRRRRQQRQQQPPQQTSQOPQRRPPPPPPQQQQQPPPPPPQQQQQQQRRRRRRRRQRUSRM]_VSQQQQQSSSSSSSSSTTTTTTTTSSSSSSSSRSTTTTSRRRSTTTSSTTTTSSSSTTTTTTTTSSSTTTTSSSTTUUTTTTTTTTUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVWWWWXXXXXXYYYYYYYZZZYYYYYYYZ[[[[[[[[\]^_```_``a````aaaa`_`abaaabbbba`aaaa`aabaaaabbccccccdddgggffffffffeeeedffffffffghhiiiiijjiiiiiijjkklllkkkkkklllmmmnnnnoppppppppooopppppqqqqqqqqrrrqrrrssssssssstsssstuuttttttttOOOOOPPPPPPPPPPPOPQQQQPOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQPPPQQQQQQQQQQQQQQQQQQRRRRRRRRRSSSSSSSSSSSSSTSSSRRRRRRRRSSSSTTSSSRQQSSSRRSSSSSRRRSSRSSRRRSSSSSSSSTTTSSSSSSSSSSSSSSSSSSRRRSSSUUTTSSSSTTTSSSRRSSSSSTTTTTTTTTTTTTTTTTTTTTSSSSSRSSRRSTUV\YTPOQSTQQQPPPQQQQPPPPQQQRRRRRRQRRRRRRRRQRTQPM[\URQQQQQSRSSSSSSRUTTTTTUUSSSSSSSSSSTTTTSSRRSTTTTSTTTTTSSSTTTTTTTTSSTTTTTTTTTUUUUTTTTTUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVWWWWWWXXXXXXXXXXYYZZ[[ZZZZZZ[[\\\[[[[[]^_``````aaa```aaaaa``abaabbbbbbaabaaaabbbbbbbcccccccdeehgggggfffffffeeeffffffggghhhiiiijjjjjjjjjkklllllkklllllmnnnnooooppppppppppppppppqqqqqqqqrrrrrrssssssssssttssstuuttttttttNOOOOPPPPPPPPPPPOPQRRQPOPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQRRQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTSSRRRSSSSSSSSSSRRRSSSTSSRRSSSSSSSSTTTSSSSSSTTSSSSSSSSRRSSSSSSUUTTSTTTTTTTTSSSSSSTTTTTTTTTTTTTUUUUUUUUUTTTTTSSTSSSTWZ\^]ZWVWWUTSRQQPPPPPPPPQQRRRSSSSRRRRRRRRRRPPROOKYXURQRRRRTRRSSSSRRUTTTSTTUSSSSSSSSTTTTTTTTRRSTTTTSUUTTTTSSTTTTTTTTTTTTTTTTTTUUUUUUTTUUUUUUUUUUUUUVUUVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWXXXWWWXXXXYYYZZZZZZZZ[[[\\\\[[[[^^^_``aa`aaa````aabaaaababbbcbbbabbbbabbbbbcccccccccddefgggfffffffgggggghggggghhhhhiiiiijjjjkkkkkkklllllllllmmmmnnnoooooppppppppppppppppqqqqqqqqrrrrrrssssssssssttssstuvttttttttNOOOOPPPPPPPPPPPOPQRRQPOQQPPPPOOPPPPPPPPPPPPPPPPQQQQQQRRRRRRRRRRRRRRRRRRQQQQQQQQPQQRRRQQSSSSSSSSSSSSSSSSRRSTTTTSSSSSSSTTSSSSSSSRSSSSSSSSSSRRRSSSTSSRRSSSSSSSSTTTSSSSSTTTSSSSSSSSRRSSTSSSVUUTTTTTTTTTTTTTSSSTTTTTUUUUUUUUVVVVVVVVUTSSTTSRTSRSUX\_[^`__^ZUVUTRQPPPPPOOPQRRSSSSSSSSRRRRRRRRQQROPN[YTRQSSSSTRRSSSSRRTTSSSSTTSSSSSSSSTTTSSTTTRRSTTTTSUUTTTTSSTTTTTTTTUUTTTTTTUUUUUUUUTUUUUUUUUUUUUUVVVVVVVWWWVVVVVVVVVVVVVVVVWWWWWWWWXXWWWWWWXXXYYYYYZYYYYZZZ]\\\[[[[^^^_``ab`aaa`_```abbaaaabbbcccbbbcccbbccbbcdddccbcccdeefeeffffffggghhhhhihhhhhijhhiiijiijjjkklllkkkllllllllmmmnnnnooooppqqqqqqqqppppppppqqqqqqqqrrrrrrssttttttttuttsstuvttttttttPPOOOPQRRQONNNOPPQQRRQPOPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQRRRRRRRRQQQQQQQQQQQQQQQQRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSRRRRRTSSRRSSTSSRRRSTTSSSSSSSSTTTTTTTTSSSTTTTTTTSSSSRRRRSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUTSTTUVTUUWXYZZ^_``_][YYXWVTSRQOOPPPPQQSSRRRRRQRRRRRRRRUNLWWIYTUTTTTTTUTTTTSSRRSTTSRRSSTTTTUUUUUUUUUUUUTTTUUUTSTSSTUUUTUUTTTUUVUTTTTTTUTTTTUUUUUUUUUUUUUUUUUUUUVUUUUUVVUVVVVVVWVUUUUVWXWWWWWXXXXXXXXWWWXXXXXXXXYZZZYZ[[\]]\\\]]__`````abbaa``____^]^`acccccccbbdccccccceeeeddddeddddeeeeeeeeeeeggghhiiiiiiiiiiiiiiiijjjjjjkkkkklmmmlllmmmmmmmmmppooooppopppoooppppqqqqqqqqpppooqqrsstttsssstttttttttuuvvvvvvvvvPPOOOPQRRQPOOOPQQQRRRRQPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTSSSRRSSTSSSSSSTSSRRRSTUTTTSSSSSTTTTTTTTTTTTTUUUTTTSSSSRSSSTTTTTTTTTTTTTTTTTTTTTUTTTTTTTTTTTTTTTTTTUUUVVUUTTTUVVVVVWWXXX[\]]]\ZYYXWVUTSRQQQQQPPPRRRRRRRRRRRRRRRROXXTOK\QVUTTSSSSUUUUTTSRTTTSSRSTTTTUUUUUUUUUUUUUTTTUUUTTUTTUUUTTTTTTTTUUUUTTTTUUTTTTUUUUUUUUUUUUVVVVVVVVVVUUUUVVVVWWVVWWVVVVVWWXWWWWWXXXXXXXXXWWYYYYYYYYZZ[ZZZ[\]]]]\\]]______`abbaa`````____abcccccccccccdddddddeeeedeeffeeeeeeeeeeeeffgghhhiiiiiiiiiiiiiiijjjjjjkkkklllmmmmmmnnnnnnnnnppooooppopppoooppppqqqqqqqqqppppqqrsstttssssttttuttttuvvvvvvvvvvPPOOOPQQRRQQPPQQRRRSSRQQQQQQQQQQQQQQQQQQPPPPPPPPOOPPPPQQRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTSSSSSSSTTSSSSTTSSSRSSTUTTTTTTSSTTTTTTTTUUUUUUUUTTTSSSSSSTTTTTTSTTTTTTTTUUUUUUUUUUTTTTTTTTTTTTTTTTTTUVWXVVUUUVVWXXXWWWVVXYYZZYYXXXWVUTTSRRRRQQQQQQQRRRRSSSSSSSSSQQOQRKXSVUTSSRSSUUUUUTTSTTUTTSTTTTTUUUUUUUUUUUUUTTTUUVUTVUUUUUTSSTTTTTTTUUTTTTUUTTUUUUUUVVVVVVVVVVVVVVVVVVUUUUVVVWWWWWWXWWWWWWXXWWWWWXXXYYXXXXXXZZZZZZZZZ[[[[[\]]]]]\\]^___^^_`aaaaaaaaa````aabcbbbbcccccdddeeffdeeeeeffggfeeeeeeeffffffghhhiiiiiiiiijjjiiijjjjkkkkkklllllmmmmnonnnnnnnnpoooooopopppoopppppqqqqqqqqqppppqrrsstttssssttttuutttuvwvvvvvvvvPPOOOPPPQQQQQQQQRRRRRRRQQQQQQQQQQQQQQQQQPPPPPPPPOOOPPPPQSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTSSSSSSSTTTSSTTTTSSSTTUUUUTTTTTTUUUUUUUUUUUUUUUUTTTTSSSSSTTTTTTTUUUUUUUUUUUUUUUUUUUTTTTTTTTTTTTTTTTTUVXXVVVVVVWWYXXWWVVVVWWWWWWWWWVVUUTTSSSRRRRRRRRRRRRRRRRRRRRRSRTVRCR[TTSSSSTTUUUVUUTSTUUUTTTUTUUUUUUUUUUUUUUUUTTUVVVUVUUUVVUTSSTTTTTSUUTTTTUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVWWVVWXXXXXXXXXWWWWWXXXYYYYYYYY[[[[[[[[[[[[[[\]]^^^]]^^___^]^`aaaaaaabb``aabbbbaabbccdddddeeeffdefeeeffggffffffeefffffghhhhiiiiijjjjjjjjjjjjkkkkkkllllmmmnnnnnonnnnnnnnoooooooooppppopppppqqqqqqqqqqqqqrrssttttssssttttvuutuvwwvvvvvvvvPPPPPPPPPPQQQQQPQRRRRRRRQQQQQQQQQQQQQQQQPPPPPPPPPPPPQQQQSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTSSSSTTTTTTTTTTTTTTTUUUUUUUUUTTUUUUUUUUUUUUUUUUUTTTTSSSSSTTUUUUUUUUUUUUUUUUUUUUUUUUTTTTTTTTTTTTUUUUVVWXWVVVWWWWXXXWWVVVWWWVVVVVVVVUUTTTSSSSSSSSSSSRRRRRRRRRRRRRQSTQUR^bTTSSSSTTTTUVUTSSTUUUUTUUUUUUUUUUUUUUUUUUUUTUVVVVVUUVWWVVSSTTTTTSVUUTTUUVUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVUVVVVVWWYYYYYYXXWWWWWXXXYYYZZZZZ[[[[[[[[[[\[[[\]^^_^^]^____^]^`aaaaabbbb`aabbbaaabbbcdddddddeeeeefffedeegfffffggeefffggghhiiiiiijjjjjjjjjjjkkkklllllmmmmnnoonnnoooooooooooppppoooppppopppppqqqqqqqqqqqqqrrstttttsttttuuuvvuuuvwxvvvvvvvvQQQQQPPPPPPQQQQQRRRRRRRRQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTSSTTTTTTTUUTTTTTUUUUUVVVUUUUUUUUUUUUUUUUUUUUUTUUUTTTTSSSTUUUVVUUUUUUUUUUUUUUUUUUUUUTTTTTTTTTTTUUVVVVVVVVWWWWWWXWWWWWWWXWWWWVVVVVUUTTTSSSSSTTTTTTSSSSRRRRRRRRRRRPPMSMV`VUTSSRSSSTUVVUSSSTUVUUUUUUUUUUUUUUUUUUUUVUUUVWWWVVVVWWWVSTTTTTTTVVUUUUVVUUUUUUUUUUUUUVVVVVVVVVVVWWWWWVVUTUVVVVWWYYYYYYXXWWWWWXXXYZZZ[[[[\\\\\\\\\\\\\\]^____^^__``__^^`aaaaabbbbaabbbbbabbccdddeddeeeeeefggfeeefffffffggfffggghhiiiiiijjjjjjjkkkkkkkllllllmmmmmnoooonnnoppppppppppppppppppqppppqpppqqqqqqqqqrrrrssstttttttuuuuvvvvvvwwwxvvvvvvvvRRRRQQPPPQQQQRRRRRRRRRSSRRRRRRRRRRRRRRRRQQQQQQQQRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUTTUUUUTTTUUUVVVVVVVVVUUUUUUUUUUUVVVUUUUUUUUTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUTTTTTTTTTTUUVWWWVVVVWWWWWWWWWWWWWWXXXXWWVVWVVUUTTTSSSSTTTTTTTTTTTTSSSSSSSSTWZMF8BWVUTTSSSSTUVWVUTSSTUVUUTTUUUUUUUVUUUUUUUUVUUUVWWWWVVWWXWVTTTTTTUUVVUUUUVVVVUUUUUUUUUVVVVWVVVVVVVVWWWWWVVUUUVVVVWXYYZZYYXWWWWWWXXXZZZ[[\\\]]]]]]]]]^^^]^^_____^^_````___``aabbbbbbbbbbbbbbcccdddeedeeeffffgghggfgggfffffffggghhiiiiiiiijjjjjjjkkkkkkklllllmmmmmnnnoopoooopppppppppppqqqqppppqppppqpppqqqqqqqqqrrrrssttttttuuuvvvvvvvwwxxwwvvvvvvvvRRRRRQQPQQQQRRSSSSSSSSTTSSSSSSSSSSSSSSSSRRRRRRRRSSSSTTTTSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUTTUUUUTTTUUVVVVVVVVVVVVVVVVVVVVVWVVVVVVVUUUUTTTTTUUUUUUUVVVVVVVVVVVVVVVVUUUUUUUTTTTTTTTTTUVWXWWVVVVWWWWWWWWWWWWWWWXXXWWVWWWVUUTTTTTTSSSSTTTTTUUUTTTTTTTTRSK:KVTLUTTTTTTUTUVWWVUTRTUVUUTTUUUUUUVVUUUUUUUUVUUUVWXXXWWWXWVVUUTTTUUVVVUUUUVVVVVUUUUUVVVVWWWWWWWWWWWWWWWXWWVUUVWWWWXXYYZZYYXWWWWWWXXXZZZ[[\\\]]]]]]]]^_____``__`____``````_``bbbbbbbbdcccccccdddddeeedeeffghhghhhhhhiggffeeeehhhiijjjiiiiijjjjjjjkkkkkkllllmmmmmmnnnnnooooopqpppppppppqqrrqqpppqqpppqpppqqqqqqqqrrrssssttttttvvvvvwwwvvwxxxwwvvvvvvvvQRRRRQPOQQRRRSSSSSRQQQRRSSSSSSSSTTTTTTTTRRRRRRRRSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUVVVVUUVVVVVUUUUTTTTTTUTTTTUUUUUUVVUUVVUUUUVVVVVVVVVVVVVVVVVVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUUUUUVVVVVWYYXWWXXXXXVVXZXXWWXYYXXXXXWWWWXXWWVVUUUTTTSSRRSSSTTTTUTTTTTTTTWTXVUQWLWVUSSSTUUVVWWVVUVUTTTUUUVVVVVVVVVVVVWVUTVVVVVVVVWWWWWWVVVUTSSTUUVVVUUVVVWWWUTTUUVVVVVWWWXXXXXXXXXXWWWVVVVVVVVVVVZYYYXXWWWXYYYYYYZZ[[\\\\^^]]]^^^^^_``abb`__^^^__]_aa`_`abbbbcccccccbbcccccddeeffdeefhijjhhiiiihhhgfffghijjjjjjjjiiiiijjjjjjkkkkkkllllllmllmnooooooppqqpppppppqqqqqqqqqqqqqqqpppoppppqqqqqqqrrrrrssttttuuvvvvvvvvwwwxxxyyyyyyyyyyQRRSRRQPQQRRSSSSSSRQQRRRSSSSSSSSSSSSSSSSSSRRRRSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUVVVVUUVVVVVUUUUUTTTTUUUUUUUUUUUVVVVUVWVVVVVVVWVVVVVVVVVVVVVVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUUUVVVVWWZ\^^][[[XZ\\[[YXYXWXYZZYXXXXXWWWWWWWWVVVVVUUTSSRSSTTTTUUTTTTTTTTTVZYSJXSWVVUUUUUVVVVVVVVUUTUUVVUVVVVVVUUUUUVVVVUVVVVVVVVWWWWWWVVVUTTTTVVVVUUUUVVXXWVUUUVVVVWWWWWWWWWWWWWWWWWWWWVWWVVVVWWYYYYYXWWXYZZZZZZ[[[\\\]]^^^^^^___```aaaaa``_____]_aa```bbbbbccccccccccccdddeeeffeeefgghhhhhhhhhhhgggghhiiiiiiiiiiiiijjjjjkkkkkkkkkllkklllmmnooooooppqqppppppqqqqqqqqqqqrrrqqpppppppppqqqqqrrrrrsssttttuuvvvvvwwwwwwxxyyyyyyyyyyyRRSSSSRRQRRRSSSSSSRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUVVVVVVUUVVVVVVVVUUUUUUTTUUUUUUUUUTTUVVVVVVWWWVVVVWWVVVVVVVVVVWWVVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUVVVWWWWWY\_``_``_abbbb_[[YXXYZ[[YXXXXWWWVVVWWWXXXXWVUTTSTTTTTUUUUUUUUUUUQVUWRDXXUUVVVVUUWVVVVVVWUUUVWWWVVVVVVUUUUUUUVVVUWVVVVVVWWWWWWVVVVVUTUVWXVVUUUUVVXXXWVUVVWWWWWWXXVVVVVVVVVWWWWWWWWWWWWWWWYYZZYYXWYZ[[[[[[[[\\\]]]^^^^__``aaaaaa``ba``____^`aba`abbbbccccccccccccceeeeeeeeeeeeefffgggggggggghhhhiiiiiiiiiiiiijjjjkkkkkkkkkkllllllmmmnnooooooppqqppppppqqqqqqqqrrrrrrrqqqpppppppppqqrrrrrsssttttuuuvvvwwwwwwwxxxyyyyyyyyyyyRSSTTTTSRRRRSSSSSSRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTSSTTTTUUTTTTTTTTTTTTTTTTUUUUUUUUTUUUVVVWWVVUUVVWVVVVVUUUUUUUUUUUVVVUUUTTUVWVVVVWXWWVVWXXVVVVVVVVWWWWWVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUWWWWXXXXXZ\]]]^_acccehgd`\YYZZZ[YYYXXXXWVVVWWXXXXXWWVUUTTTTUUUUUUUUUUUUUSVOTTG[[UUUUUUUUWWVVVVWWUUUVXXWVVVVVVVUUUTTUVWVVWWVVVVWWWWWWWWVVWVUUUWXYWVVVVVVWYYYXWVVWXXXXXXXXWWWWWWWWVVVWWXXXXXXXXXXXYYZZZYXXZZ[\[[[\[[[\\]]]^^___`aabbbaaaa`aaa`````_`abaaabbbccccdddccccccdeeeeeeeeffeeeeeeggggggggghhiiiiiiiiiiiiijjjjjkkkkkklllllmnnnnnnomnnoooooooppqqpppppqqqqrqqrrrrrsssrrrqqqppppppqqrrrrrssstttttuuuvvwwwwwwxxxxyyyyyyyyyyyySSSTTTTTRRRSSSSSSSRRRSSTSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVWWWVVVVVVWVVVVVUUUUUVVVVUUVVVVUUUTVVWWVVWWXXWWWWXXVVVVVVVVWWWWWVVUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWXXXXYZ[ZYXY\_^_achklkf`[ZZYYZYYYYXXXXWWWWWWWXWWWVVVUUTTUUUUVVUUUUUUUUVXQTXM^bXWUTTTUUXWWVVWWXVUVWXXWVUVVWWVVUUUUUVWWVWWWWWWWWXXXXWWVVWWVVVWYYXXWWWWXXYYZYXWWWXXXXXXXXWWWWWWWWVVWWXYYYYYYXXYYYYZZZZZYYYZ[[[[[\[[[\\\]]^^_```aabbbbaaaa```aaaaa``bbbbbbcccccdddddccccddffefffffffffffffggffffgghhijjjiijjjjjjjjjjjkkkklllllllllnnoonnoonnooooooooppqqpppqqqqrrrrrrrrssstsssrrrqppqqqqqqrrrrssssttttuuuuvvwwwxxxxxxyyyyyyyyyyyyyTTTTTTTURRSSSSSSSSSRRSTTSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVWWWWVVVVWWVVVVVUUUUVVVVVVUVVVVVUUUVWWWWVWXXXXXWWWWWWWWWWWWWWWWWVVUVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWVVVVVVVVWWXXXXXYZYYXY]cgddfkmkijme^[[YYZYYYYYXXXXXXXWWWWVVVVVVVVUUUUVVVVVVVVVVVVTXWSVO[dZYWVUVWWWWWWWWWWVVVVWWWVUVWXXXWVVVUUVWWWWWWWWWWWXXXXXXWWXWVVVWXYYXXWWXXYYZZYXWWWXXXXXXXXXXXXXXXXWWXXYYZZZZYYYYZZZZZZZZZZYZ[[[[[\[[\\\]]]^__`````aaaaaaaa__`aabbb`abbcccbccccddddddddddddeeffffffffffggggggffffgghiijjjjjkkkkkkkkkkkkllllmmllllllmnnnnmnoooooooooooppqqppqqqqrrrrrrrssstttttsssrrqqqqqrrrrrrsssstttttuuuuvwwwxxxxxyyyyyyyyyyyyyyyTTTTTTTTSSSSSSSSSSSRSSTTSSSSSSSSTTTTTTTTTTSSSSTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVWWWWVVVVVVVUUUVVVWWVVVVVVVVVVVVWWWWWWXWWXXXWVVWWWWWWWWWWWWWVVUVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWVVVVVVVVWWWXXXXXWWXX\bjooklrqjhmri`^][Z\ZYYYYXXXXXXXXWWWWWWWVVVVUUUVVVVVVVVVVVVVUUYOUSU_ZYXWWWXXWWWXXWWWWWVVVWVVUVWXYYXXXWVVWWWWXWWWWWWXYYYYYXXXYXWVVWXYYXXWWXXYYZZZXWWWXXXXXXXWXXXXXXXXXXYYYZZZZZZZZZZZ[ZZYZZ[\ZZ[\[[[\\\]]^^^^^__```__aaaaaaaa__`aaaaaaabccdcccccdddddddddddddeeeffgggfffffggghggffgghiijjjjjjkkkkkkkkkkklllllmmmmmlllmnnnmmnooopppoooooppqqppqqqqrrrrrrrsstttutttssssrrrrrrrrrrsssstttttuuuuuvwwwxxyyyyyyyyyyyyyyyyyyTTTTTTTTSSSSSSSSSSSSSSTTSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUTTTTTTTTTTTUUUUVUUUUUUUUUUUUUUUUVVVVVVVVWWVVVVVVVVWWWWVVVVVVVUUUVVWWWWVVVVVVVVVVVWWWWWWXVWXXXWVUXXXXXXXXXXXWWVVUVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWVVVVVVVVWWWXXXXXZYYY\binpjjqqjluukb_^\\^ZZYYYYXXXXXXXXXXXXXWWVVUUUVVVVVWVVVVVVVVZUXKX\SYWWWXWWWWWWXXXXWWXWVUVVVVUVWYZZYYYXWVWWWWXXWWWWXXYYZYYYXXYXWVVWXXXXWWWWXXYYZZYWWWXXXXXWWWXXXXXXXXYYYZZZ[[[ZZZZZZ[[ZZYYZ[\Z[\\\[\\]]^^_______``___baaaa```_``aaa``aabcddcccccddddeeddddddeeeeffgggffffffffhggffgghijjjjkkkkkkkkkkkkkllllmmmmmmmmllnnoonnoooppppoooooppqqppqqqrrrrrrrrsstttuutttsssrrrrssssrssssttttttuuuuuwwwwxxyyyyyyyyyyyyyyyyyyTTTTTTTTSSSSSSSSTTTTTTSSSSSSSSSSTTTTUUUUUUUUUUUUSSSSSSSSTTTTTTTTUUUUUUUUUUUUUUVVWVVVVVVVVVVVVVVVWWWWWWWWVVVVVWWWVVVVVVVVWWVVVWWXVVVVVWWXVVVWWXXXVVWWWWVVVVVWWXXXXXXWUUWYWWWWWWXXWWWWWWWWWWVVVVWWXXXXXXXXWWWWWWWWWXXXYYYYXXY\bipsupmmkgimpqolfa]\[ZYXXXXXXXXXXXXXXXXXWWWWVVVVWWWWWWWWWWWWUTS_YWRX[ZXWWWWVXXXXXXXXVVVVUUVWWWWWWWXYWWWWWXZ\YZZYYXXYZZZYYZZZZYYXXXXX[ZYXXYZ[[ZZYYYXWZZZZYYXXZYXWWWXYYZZZZZZYZZZZZZZZ[[[[[[[[Z[\\\\]]______________^^a``````__``aaa``abbcccccbccccddddddddeeecdefeeeeeeefffffiihhhhiijjjjiijkikllkjjkkkllmmmnnmmllmmnooooooooonoopppooopqqqqqrrrrrrsssssttuuvvvvuutttsssssssssssssttttuuuvwwwyyxxxyzzyyyyyyyyzzzzzzzzTTTTTTTTSSSSSSSSTTTTTTSSSSSSSSSSTTTUUUUUUUUUUUUUTTTTTTTTTTTTTTTTUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVWWWWWWWWVVVVVWWWVVWWWWWWWWWWWWWWVVVVVWWXVVWWXXXYWWXXXXWWVWWXXXXXXXXVVVWXXXXXXXXXWWWWWWWWWVVVVWWXXXXXXXXXVVWWWXXXXXYYZZZ[ZZ[^cinpqjfghffiijkjfb^\Z[[ZZYXXXXXXXXXXXXXXWWWWVVVWWWWWWWWWWWWWWXZXXWZZZYXWXXWWYXXXXXXXWWWVVVWXXXXXWVWXWWWVVWY[Z[[ZYYYY[[ZZZZ[[[ZYXXXXXZZYXXYZ[[ZZZZYXWYYYYYYYXZYXWWXYYZZZZZZZZ[ZZZZZZ[[[[[[[[[Z[\]]]]^________________aa``aa`````aaa``bbbccccccccccdddddddeeeedeffffffffggggggiihhhhiiijjjiijkjkkkkjkllllmmnnnmmlllmnoooooooooonnopppooopqqqqqrrrssssssssttuuuvvuuutttssssssssssssssttttuuvvwwxxxyyyzzzzzzzzzzzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSSSSSSSSSTTTUUUUVUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWVVVVVVVWWWXXXXXXXWWWVVVVWWWWWWWXXXYYYXYYYYYYXWXXXYYYXYXWVWXXXXXXXXXXXWWWWWWWWVVVVWWXXXXXXXXXXVVWWXXYYYYYZ[[\\[[]`dgjkld^`ccbbabdffc_\Y[]\[YXXYYYYYYYYXXXXWWWWWWWWWWWWWWWWWWWWXVZSVOVXZYXXXXXXZYYXXXXYXXXWVWXXYYYXWVVWWWWWWXY[[[[[ZZZZ\[[[[[[\[[ZYYXXXZYYYYZ[\[[ZZZZYXXYYYYYYYYYXXXXYZ[ZZZZZZ[[[ZZZZ[[ZZ[[[[[[[\]^^^^_________________baaaaaa```aaaaaabbbcccccccccddddddddeeeedfgggffghhhhhhhhiiihhiiiijjjiijkkkkkkklmnnnnnnnnmmmmmnopoooooooonnnoppooooppqqqqrsssssssrssstuuuuuuuttttssssssssssssssttttuuvvwwxxxyzzzzzzzzzzzzzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSSSSSSSSSTTTTTUUUUUUUUUUUVVVVVVVVUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWVVVVVVVWWXXXXXXXXWVVVVWXXWWWXXXXYYYYYYYYYYYYXYYYYYYYYWVWXYXWXXXXXXXXWWWWWWWWVVVWWXXYXXXXXXXXWWWXXYYYYYZZ[[\\Z[]`bdedga[\^^]\]]^abb`^Z[]\ZYYYYYYYYYYYYYXXXXXWXXXXXXXXWWWWWWWWXPTYYMR]ZYXXYYYXZYXXXXXYYYYXWXXYZZYXWVWWYYYYYYZ\[[\[ZZZ[\\\[[\\\\[[ZZYYYZYYYZ[\\[[ZZ[ZYYXXYYZZZYYYXXXYZZ[[ZZZZ[[[[[ZZ[[[ZZ[[[[[\\]^^^__`_____________```baaabbbaaaaabbbbbbcccdcccccddddddddeeeefefghhgghhhhhiiiiiiiiiiiiiijiiijklkkkklmnoooooooommmmnoopnnnnnnnnnnnoppoooooppqqqrrrrssssrssstuuuuuuuttttsssssssstssssttttuuvvwwwwxyzzzzzzzzzzzzzzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSSSSSSSSSSSSTTTTTUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUUUUVVVVVVVVVVWWWVVVVVVVVWWWWWWWWWWWWVVVVUUVVWWXXXXXXXWVVVWWXXXWWYYYYYYYYYYYXXYYYYYYYYYYXXWWXYZYWXXXXWWWWWWWWWWWWVVWWXXXYXXXXXXXXXXXXYYYYYYYZZ[[[Z[]`bcbab_\\\\\\^]\\^_`_[\[ZXXYZZZZZZZZZZZYYYYXXXXXXXXXXWWWWWWWWXSS_ZWYgZYYYZZYXYYXXXXYYZZZYYYYZZZZYXXXYZZZZZZ[\[[\[[Z[[\\[[[[\\\\\[[ZZZZZZZZ[\][[[[[[ZZXYYZZZZZYYYYYZ[[\[[ZZ[[\[[[[[[[[[[[[[\\\]^_____`____________````baaabcbbbbbbbbbbbcccddddccddddeedeeeeffffghhhhhhhhhhiiiiiiiiiiiiiijjijkkmlkklmnnooooooooonnnnoooooooooooonnopppooopppqqqrrrrrrsssssttuuuuuuuutttttttttttttttttuuuuvvwwxxwxyzzzzzyyyyyyyyzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSTTTTTTTTSSSTTTTTVVVVVVVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWVVVVVUUVVVWWWXXXXWWWVWWXXXXWWYYYYYYYYXXXWWXXXYYYYXXXWWWXYYZYXXXXWWWWWWWWWWWWWVWWXXXXXXXXXXXXXYYYYYYYYYYYZZZZZ[\]`bba`]]]]\\]^_][Z[]^_]\ZXXXZ[[[[[[[[[[[ZZZZYYYYYYYYXXWWWWWWWWX[WYRZ[aZZZZZZYXXYYYYYZZ[[[ZYYZ[ZZZZYYZZZZZZYYZ[[[\[[[[\\\\\\\\\\\\\\\[[ZZZ[[\\\\[[[\\[ZZZZZ[ZZZYYYZZ[[\\\[[[[\\\[[[[[[\[[[\\]]]^^_________________```aaaaaabccbbbbbbbccbccddddddddddeeeeeeefffffghihhhhhhhhiiiiiijjjjiiijjjjjklnmllmmnmnnoooooooooooooooooooooooooppqppppppqqrrrrrrssssssttuuvvvvvuuuuuuuuuuuuuuuuuuuuvvvvwwxxyxyyzzzzzyyyyyyyyzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSTTTTTTTTTTTTUUUUVVVVVVVVUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWVVVVVWWWVVVWWWWWWWWWWWWWXXXWWXXXYYYYYXXXXXWWWWXXXXXXXWWVVWYZZYYYYXXXXWWWWWWWWWWWWWXXXXXXXXXXXXXXYYYYZZZZZZZZZZYY[\]_```_[\]]]]]]]]\[Z[]^_\YXYZ[Z\\\\\\\\\\[[[[ZZYYYYYYYYXXXXXXXXX\YTPUWW[[ZZZZYXXYZ[[[[[\\\[ZZ[\[[[[ZZ[[Z[[[ZZZ[[\\\\[\\]]]\\]]][\\]]]\\[[[[[\\\\[[\\\\[[[[[[[ZZYYZZ[[\\[\\\\\\[\\[[[[\\\\\\]]^^^_``_^^_________^__``aaaaa`abccbccbbbccdcccdddddddddeeeeeeeefffffghihhhhiiiiiiiiiijjjjiijjkkkklmnmmmnnmlmmnnnooooooppppppppppppppoppqqqpqqqqqrsssssstttttttuuvvwvvvvvvvvvvvvvvvvvvuuuvvvwwwxxyyyzzyyyyzzzzzzzzzzzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVVVUUUUUUUUUUUUUUUUVVVVVVVVVVVWWWWWXXXXXXXXWWWWWWWWVVVVVWWWWWWWWWWWWWVVVWWXYXXWWWXXYYYYXXXXXXWWWWXXXXXWWVVVUWZZZYYYYYYXXXXWWWWWWWWWWXXXXXXWXXXXXXXXXYYYZZ[[[[[ZZZYY[[\]^^^]\]]^^]\ZZ\]\[[[\`]YXZ\[Z\\\\\\\\\\\\[[[[ZZZYYYYYYYYYYYYYYVZW[UYY\[[[[ZXWXY[\]]\\]]\\[[[\\\\\[[[\[\\\[[[\\]]]\\]]^^]]]]^^[[\]]]]][\\\\\\\\[[\]]\[\\\\[[[ZYYZZ[\\\[\\]]\\[\\[[[[\\\\\]]^^^_```_^^^________^__``aaba``abccbcccbbcddccddddddddddeeeeeeeffffffghihhhhiiiiijjjijjkkjjijkkkkklmonmmnnmllmmmnnoonooppqqqpppppppppppqqrqqqqqqqrstttttuuuuttuuvvwwwvvvvvvvvvvvvvvvvvvvvvwwwwxxyyzz{zzyyyyzzzzzzzzzzzzzzzzzUUUUUUUUTTTTTTTTTTTTTTTTSTTTTUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWXWWWWWXXWVVUUVVWWWWWWWXXXXWVVWXXYXXXXXYYZZZYYXXXWWWWXXXXYYXWVVVVYYYYXXXYYYXWWXXXXXXXXXXXXXXXXXXXXXWXYYYYYYYYZZ[\[[[[[[[[[[\]]]]]\\\\\\\\\\\\[[]_^\\]\ZZ]\\\\\]]]\\\\\\\\[ZZZZZZZZZZZZYYYX][`TVXX\[ZZ[\[ZYZ[\\\\\]]]\\[\]\]^^]\[[[[\\\[[[\\\\\\\\\]___^]]]]]^^^\[\\[Z[\\\Z[[\\\\\\\\\\\\\[[[[[\\\[[\\\\]]]\\\[\\\\]]]]]]^_`````__________^^_`abbc``aabcccccccbcccddddefeedeeeefffffffffffggghhhhhhhiiiijjkkkjjjjjllllllmmlmnnmmmnmmnnnnoooooppqqqppppoopqppppqqqqqqqqqrrruutttuuvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwxxxyyyyzz{{zyyzzzzzzzzzzz{{{{{{{{UUUUUUUUTTTTTTTTTTTTTTTTTTTUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWXXWWWWWXWWVVVVVWWWWWWWWWXWWVVWXXYXXXXXYYZZZYYXXXXXXXXXXXYYXWVVVVYZZZYXYYYYXXXXXYYYYYYYYYXXXXXXXXXXXXYYYYYYYYYZ[\[[[[[[[[Z[[\\\]]\\\\\\\\\\\[[[\^]\\]\Z[]\\\\]]]]\\\\\\\\\\[[[[[[[[[[[[ZZ][YXYZ]W\[[[\\\[YZ[\\\\\]]]]\\\]\]^^]\[\\\\\\\\[]]]]]]]]]^__^^]]]]]^__^\]\[[\\\\[[[\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]\\\\\\\]^^^^^___``````___________`aabb``aabcccccccccccddddefeeeeeeefffffffgggggghhhhiihhhiiiijjjjjjjkklllllllllmnnnnnonnnooooooooppppqppppoopqqqqqqrrrqqrrrrssuuuttuvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwxxxyyyyzz{zzzzzzzzzzzzzzz{{{{{{{{UUUUUUUUTTTTTTTTTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWXXWWWWWWXXWWWWWWWWWWWWWWWWVVWWXYYXXXXYYYZZYYYXXXXXYYYYYYYYXWWVWWZZZZYYYZYYYXXXYYYYYYYYYYXXXXXXXXYXXYYZZYYYYYYZ[[[[[[[[[[ZZZ[[\\\]]]]]]]]\\\[[[[\\\\\\[[]\\]]]]]]]]]]]]]]]]]\\\\\\\\\\\\\YX]STNVX]\\\]]\\ZZ[\]]\\]]^^^]]]\]^^]\\\]]]]]]\\]]]]]]]]^^__^^^_^^^_``_^]\\\]]\\\\\\\\\\\\]]]]\\]]]]]]]]]]]]^^^^^]]]]]]]]]^^^__`___``aaa```````````aaabbaaabbcccccccccddedddefeeeeeffffffffghhhihhhhhiiihhhiiiijjjjjkkkklllllllllmnoooopooooooooppppppppopppopqrqqqrrrrsrrrrssssuutttuuvvvvvvvvvwwwwwwwwxxxxxxxxwwwwwxxxyyyyyzzzzzz{zzzzzzzzz{{{{{{{{{{{UUUUUUUUTTTTTTTTTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWXXXWWWWWXXXXXXXYWWWWWWVVVVVVWXXYXXXYYYYZZYYYYYYXXXYYYYYYYYXWWWWWYZZZYYYZYYYYXXXXYYYYYYYYYYYYYYYYYYYYZZZZZYYYYZ[[[[[[[[[[[[ZZ[[\]]]]]]]]]\]]\\[[[[\\\[[\]]]]]]]]]]]]]]]]]^^]]\\\\\\\\]]]]^^e^ZRYg]\\\]]\\ZZ[\]]]]]^__^^]]]]^]]\]]]]]^]]]\]]]]]]]]___^^^_`___````_]]]]^^]]]]\\\\\\\\]]]]\\]]]]]]]]]]]^^^^__^^^^^^^^_____```````aabaaaaaaaaaaaaaabbaabbbcccddddddddedddeffefffffgggggghhiiihhhiiiijhiiiiijjjkkkkkkklllllllllmnoooppppoooonnppppppoooopppprsqqrrrrrsrrrsssssutttttuuvvvvvvvvwwwwwwwwxxxxxxxxxxxxxxxxyyyyzzzzzz{{{zzyzzzz{{{{{{{{{{{{UUUUUUUUTTTTTTTTUUUUUUUUTTUUUUVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWVVVVVVVVVVVVWWWWWWWWWWWWXXXXXXXXXXXXXWWWWXXYYYZZXXXWWWVVVVVWWXXYXXYYYZZZYYYYYYYYXXXXYYYYYYXWWWXXYYZYYYYZYYYYXXXXYYYYYYYYYYYYYYZZZYYZZ[[ZZZZYZZ[[[[[[[[[[[[[[[\]^]]]]]]]]^^^^^]\\[\\\[\]]]]]]]]^^]]]]]]]]^]]]\\]]\\\\]]]]]YY]ZYUh\\\\\\\\Z[\]]]]]]^___^]]]]^]]]]^]]]]]]]\^^^^^^^^____^_`aa```aa`__^]]^^^^^^]]\]]]\]]]]]]\]]]]]]]]]]]^^^^_``______``a`___`aa````aabbbbbbbbbbbbbbbbbbbbccccdddeeeeeeddeeffefffggggghhhhhhiiiiiijjjjiiijjjjjlkkkkkkklllllllllmnoooopoooonnnnqqpppooooppppprsqqqqrrrrrrrrrssstssssstuvvvvvvvvwwwwwwwwwwwwwwwwxxxxxxxxyyyzzzzzzz{{{{zyzzz{{{||{{{{{{{{UUUUUUUUTTTTTTTTUUUUUUUUTUUUUVVVUUUUUUUUUUVVVVWWWWWWWWWWWWWWWWWWVVVVVVVVVVVVWWWWWWWWWWWWXXXXXXXXXXXYYXXXWXXYYZZYYYXXWWWVVVWWWXXXXXYZZZZZYYYYYYYYXXXXYYYYYYXXXXXYXYYYYYYZYYYYYYXXYYYYYYYYYYYYZZZZZZZZ[[[[[[ZZZ[\\[[[[[[[[\\\\\]]^^^^^^^^^^^^_`_^]\]]\[\]]]]]]^^^^^^^^^^^^]]]\]]]]\\\\]]]]`^\\X[Qe\]]]\\\]Z[\]]]]]]^__^^]^]]^]]]^_]^^^^]]]^^^^^^^^_``___`bba``aa`_`_^^^_____^]]]]]]]]]]]]]^^^]]]]]]^^^^___aa``````aaaa____baaaaaaabbbbbbbbccbbbbbbccccccccddefffeeeddefffeggggghhhhhhhhhhhiijjjjjkjjjjkkkkllllkkkklllmmmmnmnoonnooooonnnnnppppppooppppppqsqqqqrrrrrrrrrssstsssssttvvvvvvvvwwwwwwwwwwwwwwwwxxxxxxxxyyzzzz{{{{{{{zzzzz{{{|||{{{{{{{{UUUUUUUUTTTTTTTTUUUUUUUUUUUVVVVVUUUUUUUUUUVVVVWWWWWWWWWWWWWWWWWWVVVVVVVVVVVWWWWWWWWWWWWWXXXXXXXXXXYYZZYYXYYYYYYXZZYYXWWWVWWWWWWWXXYZZZZZYYYYYYYYYYYYYYYYYYXXXXYYYYZZYYZZZZZ[ZZYYYYYYYYYYYYYZZZ[[[ZZZ[\\[\\[[[[\]\\\\\\\\[\\]]]]]^^^^^^^^^^_`a`_]]__]\]]]]]^^^^^^^^^^^^^^\\\]]^^_]]]]]]]]Z\[LMTJV\]^^]\]^[[\]^^]]^^_^^]]^]]^]]]_`^^__^^^^^^^^^^^^_`````abaa``aaa`a`^]^_```_^]]]]^^^]\\]^^__^^^^^^^^____``baaaaaaa`aa`____aaaaaaaabbbbbbbbccccccccccccccccddeffffeeedeffffggghhhhhhhhhhiiijjjjjkkkkkkkklllllllllkkkllmnnoonopoonoooooooooopppppppppqqpppqrrrrrrsssrrsssstttttsttuuvvvvvvvvxxxxxxxxxxxxxxxxyyyxxxxxzzzzz{{{{{{zzzz{zz{{||||{{{{{{{{UUUUUUUUTTTTTTTTUUUUUUUUVVVVWWWWUUUUUUUUUUVVVVWWWWWWWWWWWWWWWWWWVVVVVVVVVVWWWWWWWWWWWWWWXXXXXXXXXXYZZZZZYZZZYXWWZZZYXXWWWWWWWWWWXXYZ[[ZZXXYYYYZZYYZZZZZZYYXXXXYYYZZZZZZ[[[[\\[ZZZZZZZZZZYYZZZ[[[[ZZ[\\\[\\\[[\\]\\\\\\\\[[\]]]]]^^^^^^^^]]^`aa_]^`_]\]^\]^^^^^^^^^^^^^^^\\]]^__`^^^]]]]]_]Y@S`TQ]^_^]]^_[[\]^^^]^^^^]]]^]]^]]^_`_______^_________`````aba```aaaab`^]^_`a``^]]]]^_^]\\]^________^___`````bbaaaaaa``a``_````abbbbbbbbbbbbbccccccccccccccccdeffggfeeedeffffggghhhhiggghhiiijjjjkkkkkkklllllkkklllllkllmnoppopppooooooooppppppppppppqqqpppqrrrrsssstssssttttuutttuvvvvvvvvvvxxxxxxxxyyyyyyyyyyyyxxxxzzzzz{{{|{{zzz{{zz{{|||}{{{{{{{{VVVVVUUUUUUUUUUUUUUUUUUUVVVVVVVWWWVVWWXYXXXXXXXXWWWWWWWWUVVVVVVVVVVWWWWWWWWWWWWWWWXXXYYYYYYYYYYYYYYYYYYYZZZZZZYYZZYZ[[YXXYZZYXYZYYZ[[ZYYZZZ[[\\\Z[[[[ZYXYYYYYYYYZZ[[[[[ZZZYXXYZ[[[[ZZZYY[[[[[[[[\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]^_``_^^^]]]]]]]]^^_^^^]]\]]^^_``^^^_`aaab`_]\\]]__``]^ky�mb``]J^^_a`^\]\[\^_^^`___``___]^^^_^^^^`a__``]^``_^__^^_abaa```aabbaa`abcbbaaa`__^^_ab_`_^\\^`___``aaa_``aaaa`aa`___abbbbaaaaaaaaaaaaabbbbccccdddddddddddddddddeeeefffeeefghhghhhiiiiiiiiiijjjkkkkkkkkkkkkkkkkkklmnnnnmmlmmmnnmmmmmmmmmnnooppppppqqqqqqrrrrrrrsssssssssssssssstttttuuustuvwwwwxxxyzzyxyyyzzzzzzzyyyyzzyyyyyyyyz{{{{|}~}||||}}}}}}}}}}}VVVVVVUUUUUUUUUUUUUUUUUUVVVVVVVWWWVVWXXYXXXXXXXXWWWWWWWWVVVVVVVVVVWWWWWXWWWWWWWWXXXXYYYYYYYYYYYYYYYYYYYYZZZZZZYYZYYZZZYXYZZZYXXYXYZZ[ZZYZZZ[[[[\[[[[ZZYXYYYZZZYYZZ[[[[[Z[ZZYYZ[[\[[[[ZZZ[[[[[[[[\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]^_``_^]^]]]]]]]]^^^^^^]][[\]^__`aaabdeeecca`___^adimmpz�����m]]``^^__^]]]\\^_^_`^__``__^^^^^^^^^^``__`_]^`a__``_^_abaa```aabbaa`abbbbaab`__^^_`a`a`_]]_a```aaabb``aabaaaaaa`__abbbbaaaabaaaaaaaabbbbccccddddddddddddddddeeeeffffffffghhhhhiiiiijiiiiijjjkkkkkkkkllllllllkllmnnnnmmmmmmnnmmmmmmmmnnnooppqppqqqqqqrrrrrrrrsssssssssssssssssttttuuuttuvvwwwxxxxyyyxyyyyzzzzzzyyyyzzyyyyyyyy{{||{|}~}}|||}}}}}}}}}}}VVVVVVVVUUUUUUUUUUUUUUUUVVVVVVVWWWWWWXYYXXXXXXXXWWWWWWWWWWWWVVVVVWWWWWXXXXWWWWXXXXXXYYYYYYYYYYYYYYYYYYYYZZZZZZYYYYYYZYYXZ[[ZYXXXXYYYZZZZ[[[[[[[[[[[ZZZYYXYZ[[ZZYZZZ[[[ZZ[[ZZZ[\\\\\[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\]]]]]]^^^^^^^^^^__^]]]]]]]]]]]^^^^^^]][[\]^_``aaabcdedcccba`_^^gw���������uebkhc___^]^]\\^_^_`^__``__^^^^^^___^``__`_^]_a``a`_^`abaaaaaaabbaaaaabbbabba`_^^__`abb`__`a`aaabbbbaaabbbaaaaa```abbbbaaabbaaaaabbbbbcccccdddddddeeddddeeeeeeeffffgggfghiiihiiiiijjiiijjjjjkkkkkkkkllllllllkllmnnnnmmmmmnnonnnnnnnnnnooppqqqqqqqqqqrrrrrrrrsssssssssssssssssttttuuuuuuuvvwwxxwwxyxxxxyyyyzzzzyyyyzzzzzzzzzz{|||{{|}}}||}}}}}}}}}}}}WWVVVVVVUUUUUUUUUUUUUUUUVVVVVVVWWWWWXXYYXXXXXXXXWWWWWWWWXXWWWVVVWWWWWXXXYXWWWWXYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZYYYYYYYYXX[[[ZYYXYYYYYYZ[[[[[[[[ZZ\[ZYYYZZYZ[\[[ZYZZZZZZZZ[[[[[[\\\\\[[[[[\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]^^^^^^^^]^__^]]]]]]]]]]]]]^^^^^]^^_``abb`__`abaaaaaa````dhqz�������zomgkqjc``_^^^\\^^^_a__````___________`````_^]_aaaba`_`abaaaaaabbbbaa`abbbabbaa`___``abba```aaaaabbbbaaaaaaaaaaaa``abbbbaaabbaabbbbbbcccccdddddddeeeedeeeeeffffffgggghgghhiihiiiiijjjiijjjjkkkkkkkkkkllllllllllmmnnnnnnnnnnoooooooooooooppqqqqqqqqqrrrrrrrssssssssssssssssssstttttuuuvuuuuvwwxwwwwxxxxxxyyyyyzzyyyyzzzzzzzzzz{||{{{{|}}}}}}}~}}}}}}}}WWWWVVVVUUUUUUUUVVVVVVVVVVVVVVVWWWWXXYYYXXXXXXXXXXXXXXXXYXXWWWWWWWWWXXXXYXXWWXXYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZYYYYYXXXXXZZZZZZZZYYYYYZ[\\\[[[ZZZ[[ZYYZZ[[[\\\[YXZZZZZ[[[[[[[[[\\[[[[[[[[\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^]^__^]]]\\\\\\\\]]^^^^^^aaaabbbbaa``aa`____`bdghjkmquxz|{{�pdffpslea`_^^_]]^^^_a_``aa``_``___``````aa`__]`baabba_`abaaabbbbbbbbbaabbbabbbbaa``aaabbba```aaaaaaaaaaa```aaabbbaaabbbaaabbbbbbbbbbccccdddddddeeeeeeeeeeffffggggghhhhgghhiihiiiijjjjjjjjkkkklllllllllllllllllmmnnnnmnnnnnoopppppppppppppqqqqqqqqrrrrrrrsssssssssssssttttttttttuuuuuvwvvvvvwxxwwwwxxxxxxyyyyyzzyyyyzzzzzzzzzz{{{{zz{{}}}}}}}~}}}}}}}}WWWWWWVVVVVVVVVVVVVVVVVVVVVVVVVWWWXXYYYYYYYYYYYYYYYYYYYYYYXWWWWXWWWXXXXYYYXXXXYYZZZZYYYYYYYYYYYYYYYYYYYYZZZZZZYYYYYYXXXYZYYZ[[[[ZZYYZ[[\[[[[[[ZZ[ZZYYZ[[\]]]\[YX[[[[[[\\[[[\\\\\[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^__^]]]]]]]]]]]]^^_____aaaaaaaaba````_^__`beilomnqtutpnmjlg`^cpkfba`_^^_^]^^^_a``aaaa``aa```aaba`abb```^abbbbba`abbaabbbbbbbbbbabbbbaaabbbbabbbaabbba`_aaaaaaaaaa``````abbbbabbbbaaabbbbbbcccccccddddeeeeeeeeeeeefffgggggghhhhiihhhiihhiiijjjjkjjjkkkklllllllllmmmmmmmmmmnnnnmmooooooppppppppppppqqqqrrqqrrrrrrssssssssssssssssttttttttuuuvvvvvwwwwwwwxxxwwxyxxxxyyyyzzzzyyyyzzzzzzzzzzz{{{zz{|}}}}}}~~}}}}}}}}WWWWWWWWVVVVVVVVVVVVVVVVVVVVVVVWWWXYYYYYYYYYYYYYZZZZZZZZYXXWWXXYWWXXXXYYYXXXXXXYZZZZYYYYYYYYYYYYYYYYYYYYZZZZZZYYYZZYXXYZZYYZ[\\[[[[[[[[\[[[[[[[[ZZZYZZ[[]]]\\[ZZ\\[[[\\]\\\]]]]\\\\\\\\\]]]]]]]]]]]]]]]]^^^^^^^^]]^^^^^^^^^^^^^^^_``_^]^^^^^^^^^^^__`````aaaaaaaba```__^aabcehjknkheegijeb[]__ccb`_``_^_`^]^^^`baabbbbaabbaaabbcbaacca`a_bcbabcb`abbaabccbbbbbbcbbcbaaaabbbbbbbbaabbcb`_bbbbbaaabaa```aaabccbbbbbaaaabbbccccccccddddeeeeeeeeeeefefffggghhhhhhiiijiiiijiiiijjjjkkjkkkklllllllllllmmmmmmmmmnnnnnmmoooooppqqqqqqqqqqqqqrrrrrrrrrrrrssssssssttttttttttttttttvvvvwwwwxxxxxxxxxxxxyyyxyyyyzzzzzzyyyyzz{{{{{{{{z{{{{{|}}}}}}}~~}}}}}}}}WWWWWWWWVVVVVVVVVVVVVVVVVVVVVVVWWWXYYYYYYYYYYYYYZZZZZZZZYXXWWXYZWXXXXYYYXXYYYYXX[[ZZZYYYYYYYYYYYYYYYYYYYZZZZZZYYYZZYXXYZZYYZ[\\[[[[[[[[[[[[[[[[[ZZZZZZ[[]]]\\[[[\\\[[\]]\]]]^]]]\\\\\\\]]]]]]]]]]]]]]]]]^^^^^^^^]^^^^^^^^^^^^^^^^_``_^^^^^^^^^^^^^_``````aaabbbbbbaaaa``bbbccccceffffedcai`\]`eW]]_aa_^^`^]^^^`babbccbbacbbaabcdcabdcaab`bcaabcb`abbaabcccbbbbccbccca```aaaabbbbbbbccba_ccccbbbbbbaaaaababccbbbbaaaaabbbccccccccddddeeeeeeeeeffffffggghhhhhhiiiikjijjjjiijjjjkkkkkkkklllllllllllmmmmmmmmnnnnnnmmoooopppqqqqqqqqqqqqrrrrrrrrrrrrrssssssstttttttttttttttttvvwwwwxxxxyyyyxxxxxyzzyxyyyzzzzzzzyyyyzz{{{{{{{{{{|||}~}}}}}}~~}}}}}}}}XXXWWWWWWWWWWWWWWWWWWWVVVVVVVWWWWXXYYYYYYYZZZZZYZZZZZZZZYXWWXXXXXXXXXXXXZZZZYYYXZZZZZZYYYYYYYYYYYYYYYYYYZZYYYYZZZZYYYY[\[[ZZZZ[[\\\\\\\\ZZZ[[[\\[ZZZZ[\\]]]]\\\[\\\\\\\\]]]]]]]]\\\\]]]]\\\\\\\\^^^^^^^^^^^^^^^^^^^^^^^^________^^^^^^^^]]]^^^^^____````aaaa``abba`___``cddeeddcbcccddccdh^_d``Xd]a`^_]^```__```bbbcccccbccccbccdccccccbaacdddcccaacdcccccbccccbbccdcb`_aaaaabbcbbcdcba`acdbbccbcbaabbbbbbccccbbbbbbccccdddddddddddddeeefffffffffffgghhhhhhhiiiiiiiiiiijjjjjjkkkkkklllllllllmmmmmnnnnnnnnnnnoooooooooppqpqqrrrqqqqrrrrrrrrrrrrrrrrrrsssstttttttuttttuuuuvwwwwxxxxxxxyyyyyyyyyyyyyyyzzzzzzzzzzzzz{{{{{{{|}}}}}}}}}}}}}}}}}}}}}}}}XXXXXWWWWWWWWWWWWWWWWWVVVVVVWWWWWXXYYYYYYYZZZZZYZZZZZZZZYXWWXXXXXXXXXXXXZZZYYYYYZZZZZZZZYYYYYYYYYYYYYYYY[[ZZYYZZ[ZYYZ\\[[[[ZZ[[[\\\\\\\\ZZ[[[[\\[[[[[[\\]]]]\\\\\\\\\\\\]]]]]\\\\\\\\]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^________^^^^^^^^^^^^^^]]____```````___`abaa`````bccddccccccdddddahccaZ\\e^bb`b`a``````aabbbbccccccdccbccdccccccbabcddddccaacdccddccccccbbcddcba`baaaaabbbbcdcba`acdbbccbcbaabbbbbbccccbbbbbcccccddddddddddddeeeefffffffffffgghhhhhhhiiiiiiiiiijjjjjjkkkkkklllllmmmmmmmmmmmmnnnnnnnnnoooooooooppqpqqrrrqqqrrrrrrrrrrrrrrrrrrsssssttttttttttttuuuuvvvwwwwwwxxxxyyyyyyyyyyyyyzzzzzzzzzzzzzz{{{{||||}}}}}}}}}}}}}}}}}}}}}}}}YYXXXXWWWWWWWWWWWWWWWWVVVVVVWWWWWXXYYYYYYYZZZZZZZZZZZZZZXXWWXXXXYYYYYYYYYYYYYZZZ[[ZZZZZZZZZZZZZZZZZZZZZZ\\[[ZZZZ\ZXY\]\Z[[[[[[[[\\\\\\\\[[[[[\\\[[\\\\\\]]]\\\\\\\\\\\\\]]]\\\\\[\\\\]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^________^^^^^^^^^^^^^]]]___`````aaa```abaaaaa```aabbcbbbccdddddd`fcc_VY^d^ba`b`aaaaaabbcbbbbccccccddcccdedcccdccbbcddddddbbcdccdedcccccbbcdddcbbbbaaaaaabccddcbaacdcbccbccbbbccbccccccccccccccccdddddddddddeeeeeffffffffffggghhhhhhiiiiiiiiijjjjjjjjkkkkklllllmmnnnmmmmlmmmnnnnnnnooooopooooppqqqqqrrrqqrrrrrrrrrrrrrrrrrsssssssssttttttttttuuuuvvvwwwwwwwxxxxyyyyyyyyyyyzzzzzzzzzzzz{{{z{{{||}}}}}}}}}}}}}}}}}}}}}}}}}}YXXXXWWWWWWWWWWWWWWWWWVVVVVWWWWWWXXYYYYYYYYYZZZZZZZZZZZZXXWWXXXWYYYYYYYYXXYYZZZ[[[[[[[[[[[[[[[[[[[[[[[[[\\\[[[ZZ[ZZZ\\\[\[[[[[[\\\\\\\\\[[[[\\\\[\\]]]]\]]\\\\\\\\\\\\\\]\\\\\\\[\\\\]]]]]]]]]]]^^^^^^^^^^^^^^^^________________^^^^^^^^^^^^^^^^__````aaabbaaabcaabbba``aaabbbbbccddddccdc`ba[\be^bb`b`aaaabbccdbbcccccccdddccddedccddddbccddddddbbdedcdeedddccbccddeddcbbbbaaaaccdddccbbcdcccccdcbccdcccccccccccccddddddddddddddeeeefffffffffffgggghhhhhhiiiijjiijjjjjjjjjkkkkllllllmmmnnnnnnmmmmmnnnnnooooopppppppppqqqqqqrrrrrrrrrrrrrrrrrrrrsssssssssstttttttttuuuuuwwwwwxxxxxxxyyyyzzzzzzzzzzzzzzzzzzzz{{{{z{{||}~~~~~~~~~~}}}}}}}}}}}}}}}}XXXXXWWWWWWWWWWWWWWWWWVVVWWWWWWWWXXYYYYYYYYYYYZZZZZZZZZZXXWXXXXWXXXXXXXXXXXYZZ[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\[[[YZ\\[Z[\\\\\\\\\\\\\\\\\\\\\\\\\[\\]]]]]\\\\\\]]]]]]]]]]]]]\\\\\\\\\]]]]]]]]]]]]^^^^^^^^________________________^^^^^^^^^^^^____`````aaa````_`ababbcbb``aaabbccccccddcccfcbbc`akgaedbcabbbbbccdeccccccddddeddddefedddeeeccddddddeccdeddefeddddcbccdeeeeecccbbbbaccdddddcccddcccdedccddddddddddddddddddddeeeeeeeeeeeeffffffffffffggghhhhhiiiiijjjjjjjjjjjjkkkklllllllmmmmnnnnnnoonnnnnnnnoooppppppppppqqqqqqqqrrrrrrrrrrsssssssssssssstttttttttttuuuuuvvvxxxxxyyyyyyyyzzzzzzzzzzzzzzzzzzzzzz{{{||{{{|}}~~~~~~~~~~}}}}}}}}}}}}}}}}YYYXXXXWWWWWWWWWWWWWWWVVWWWWWWWWWXXYYYYYYYYYYYZZZZZZZZZZXXXYYYXXXXXXXXXXXXYYZZ[[[[[[\\\\\\\\\\\\\\\\\\\\[[\\\\[[WZ]][ZZ\\\\]]\\\\\\\\\\\\\\\\\\\\\\]]]]]\\\\\]]]]]]]]]]]]]]]]]\\\\]]]]^^^^^^^^^^^^^^^^^^________________________^^^^^^^^^^^__``a````aaaaaaa```abbbcccba`aaabbcddcdddddccbbd`^^`mgaddacaacbcccddddddddddddeeedddegfdddeeedddddeeefdcefedefeddeddcccddeeeecccccccbcdddeeeedddeecceeedddeeddddddddddddeeeeeeeeeeeeeeeffffgggggggggghhhhhhhhiiiijjjjjjjjjjkkkkkklllllllmmmmnnnnoooopnnooooooooppppqqpppppqqrqqqqqqrrrrrrrsssssssssssssstttttttttuuuuuuvvvvwwxxxyyyyzyyyzzzzzzzzzzzzzzzzzzzz{zz{{{||||||}}~~~~~~~~~~~}}}}}}}}ZZZYYYYYWWWWWWWWWWWWWWVVWWWWWWWWWXXYYYYYYYYXYYZZZZZZZZZZYYYYZZYXXXXXXXXXXXYYZZZ[[[\\\\\\\\\\\\\\\\\\\\\\[[\\\[[ZXZ]]\[[\\\]]]]\\\\\\\\\\\\\\\\\\\\\\\]]]\\\\]]]]]]]]]]]]^^]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^________________________^^^^^^^^___``aaa```aaaaabbbbaabcbbcccbaaaaaabcdeeeeeedddcbc\\_\ff_cb`b`accccccddeeeeeeeedeeeddeegfeddeffeeddeeeefddefedefedeeedddddddeeeccdddddcdddeeeeeedeffddffeddeeeeeeddddeeeeeeeeeefffffffffffffggghhhhhhhhhhhhhhhhiiijjjjjjjjjkkkkkkkkllllllmmmmnnooooooooooooppppppppqqqqppppqqrrrqqqqqrrrrrrrsssssssssssstttttttuuuuuuuvvvvwwwwxxxxyyyyyyyyyzzzzzzzzzzzzzzzzzz{{zz{{||||||}}}~~~~~~~~~~~}}}}}}}}[[[ZZZZZWWWWWWWWWWWWWWVVWWWWWWWWWXXYYYYYYYYXXYZZZZZZZZZZYYYZZZYXYYYYYYYYXYYYZZZZ[\\\\\\\\\\\\\\\\\\\\\\\\\\\\[ZYZ[\]^]\[\\]]]]\\\\\\\\\\\\\\\\\\\\\[\\]^[\\\]]]]]]]]]]]]^^^^^]]]]]^^^^__^^^^^^^^^^^^^^^^________________________^^^^^^^^````aaaa```aaaabaaa```abcccccbba```aacddfffffeeeidb[`e\^f`ddbdbcdccccccdeeeeefffdeeeedeehfedeeffeeeeeeeefedffeeeeedeeeedddddddddccdeeeeddddeeeffedegfddffeeeefeeeeddddeeeeeeeeffffffffffffffgggghhhhhhhhhhhhhhiiiiijjjjkjjjjkkkkkkkllllllmmmmnnnppppooooppppppppppppqqqqqpppqqrrrqqqqqrrrrrrssssssssssssttttttttuuuuvvvvwwwwxxxxxxxxxyyyxyyyyzzzzzzzzzzzzzzzz{{{zz{{|||}}}}}}}~~��������}}}}}}}}[[[ZZZZYYYXWWWXXYYXXWWWWWWWWWXXXZZZZZZZZZZZZZ[[\[ZZZZZZYYYYYYYYYXXYYYYYYYZZZZYZZ]]]]]]]]\\\\\\\\\\\\\\\\\\\\\[ZY[[[\\\\\]]]]]]]]]]\\\\]]\]^\\]][\\\\\\\\]]]]]]]]]]]]]]^^]]^^^^^^]]]]]]]]]]]]^^]]________^^____``````````a__``^^_`^]]^```````aaaaaaaaaaaa`bcb``acabcddcbacccb``bdb_NbeeihhchaiZc`geccefedgecceeedddeeeeeeeeeeefeedeeffgggfeeddeeeeeeffgffffffffffeedeeeeddcccdeeeeeddeefgfeeddeefgggfedefeeeeeeeeefgffeffffffggggggghhhhhhiiiiiiiiiiijjjjjjjjjkkkkkkkkkkkjkklllllmmllmmnnoooppooooopppppppqqrrrrrqqqqrrrrssssssssssssssttsssttttttttuuuvvuuvvwwxxwwwwxxyyxxxxyyyyyzzzzzz{{{{{{zzzzzz{{{{{||{{{|||~~~~~~~~~~~~~~��������[[[ZZZZZYYXXWXXXXXXWWWWWWWWWXXXXYZZZZZZZ[[ZZZ[[[\[[Z[ZZYZZZZZZZZYYYYYYYYYZZZZZZ[]]]]]]]]]]]]]]]]]]]]]]]]]]\\[[ZZ[[[\\\\\]]]]]]]]]]]\\]]]\]][[]][\\\\\\\\]]]]]]]]]]]]]]^^]]]]]^^^^^]]]\\\]]]^^^^]________^____```````````a__``^]_`^]]^```````aaaaaaaaaaaa`bcba`acabcddcbbcddcaacfjgWffdhegheha][_dbabdfffddccdeeeeeeeeeeefeeeefeeddefgggggffeeeffffffgfffggffffggeedeefeeddccdeeeeeeeefggfeeeeeffgggfeeefeeeeefffefggffffgggggggggghhhhhhiiiiiiiiiiijjjjjjjjjkkkkkkkkkkkkkkllmmmmnnmmmnnoppppppppppppooooppqqrrrrqqqqrrrrssssssssssssssttsstttttttttuuvvvvvvwwwxxwwwxxxyyxxxyyyyzyyyzz{{{}|||{{{{zzzzz{{{{{{{{{||~~~~~~~~~~~��������[[[[ZZZZZYYXXXXXWWWWWWWWWWWXXXXYYYZZZ[[[[[[ZZZZ[\\[[[ZZYZZZZZZZZZZZYYYYYXYZZZZZ[]]]]]]]]]]]]]]]]]]]]]]]]]]\\[[[[[[\\\\]]]]]]]]]]\]]]]]]\\]\[[\]\\\\\\\\\]]]]]]]]\]]]]]]]\\\]]]]]^^^]]\\\]]^^^^^^_____________```________a````^^^_^]]_``````aaaaaaaaaaaaaabcbaaababcddccbddddcceghh\eedgcfibeZZT]geccdeeeeeedcbcceeeefffffeeeeffecdeghhgggggffgghhgggggffggffffggfeeeffeeedcddeeeeeeeefghfeeeeffggggfeeffeeffffffffggfffghhhhhhhgghhhhhhhiiiiiiiiijjjjjjjjjjkkkkkllllllllllmmnnoooonnnnoopqqqqqqppppooooooopqqrrrqqqrrrrrssssssssssssssttstttttttttuuuvvvvvwwwwwwwxxxxxyyyyyyyzzzyzz{{|}}~~}}}||{zzzz{{{{{{{{{{||~~~~~~~~~����������������[[[[[ZZZZZYYYYYYXXXXXXXXXXXXXYYYYYZZ[[[[[[ZZYYYY[[ZZZZZYZZZZZZZZ[ZZZYYYYXYZZZZ[\]]]]]]]]^^^^^^^^]]]]]]]]]]\[[[[\\\\\\]]]\\\\\\\\\]]]]]]\\\[[[\]]]]]]]]]]]]]]]]]]\]]^^]]\\\\\\]]]^^]]]]]]^^^____^____________`````````````````___^^^^_```aaaaaaabaaaaaaaaabcbaabbbbcddccccdddddfgcf_bcdgcgfc^]ZWaigeddeeefffdbabdffffffffgfeeefffbdfghhhghggfgghhhhhhggghhggffgghffeffffefeddeefeeeeeefggffeeefggfgggfffgffffffggfghhgggghhhhhggghhhhhhhhiiiiijjjjjjjjjjjjkkkklllllllllllmmmnoopppoonnnnnpppooppppppoooonnoopqrrrqqrrrrssssssssssssssssttttttttttuuuuvvvvwwwwwwwwwxxxyyyyyyyzzzzz{|||}}~~~~~}}}}}{{{{|||||||||||}}}}}}}}}����������������\\[[[[ZZZZZZZYYYYYYYYYYZXXXYYYYZZZZZ[[[\[ZZYYYXXZYYYZ[ZZZZZZZZZZ[[[ZZYYYXYZZZZ[\]]]]]]]]]]]]]]]]]]]]]]]]]]\\[\\\\\\\]]]]\\\\\\\\\]]]]]]\\[[[[\]^]]]]]]]]]]]]]]]]]]]^^]]\\\\]]]]]]]]^^^^^^^______````````___````a````````__``aa`_^^_```aaaaaaabbbaaaaaaaabbbbbbbbbccdddcccccdddfffhecddfbgffafe_lfeddefggdeedccegfggggggggfeeefffcdfhiihgggfffgghghhhgghihhhgghhhggfggggfgfeeefffeeeeefgggfffffggfggggfggffgggggggghhhgghhhhhgggghhhhhhhhiiijjjjjjjjjjkkkkkkkllllllllllllnnnnooppoonmmmmnnnnmmnnnppppoooonoopqrrrrrrrrsssssssssssssssssttttttttttuuuvvvvvwwwwwwwwwxxyyyyyzzzzz{{{|||||}}}|}}}}}~~}}}}}}}}}}}}}}~~~~~~~~~~�������������������\\\[[[[[Z[[[ZZYYZZZZZZZZYYYYZZZZ[[[[[[[[ZZZYYYYYYYYYZ[[[[[[[[[[[[[[ZZZZZYZ[[[[\\]]]]]]]]\\\\\\\\\\\\\\\\]]\\\\\]\\\]]]]]]]]]]]]]\]]]]]]\\[[\]\]_]]]]]]]]^^^^^^^^^^^^^^]]]]]]^^^^]]^^__``___````_````````_````aaaaaaaaaaa^`aaaa`_]_`aaaabbbbbbbbbbbbbbbbbbbbbbbbbcccdddddccccdeeegfgdgegdcgddchZmffeeddeefffedcdegggghhhhhfeeefggdefhiihhgffffgghghhhggijiiiiiiiihhhhhihhhgfefffffeeeffghhhgffffgffggggghgggggghhghiihhhhhhhhgggghhhhhhhijjjjjjjjjjjkkkkkkkllllmmmmmmmmmmoonnnoopnnmmlmmmnmmllmmnppppppppoopqqrrrrrrrsssssssssssssssssstttttttttuuvvvvvvvwwwwwwxxwxyyzzyyzzz{{{{{||{{{{{{|||}}~~~~~~~~~~~~~~~~~~~~~~~~~~���������������������\\\\[[[[[[[[[ZZY[ZZZZZYYYYYZZZZZ[[[[[[[[[ZZZZZZZZZZZ[\\[[[[[[[[[[[[[[[[[[[\\[[\\]]]]]]]]\\\\\\\\]]]]]]]]\\]]]]]\\\]]]]^^]]]]]]]]]]]\\]]]\[[]^]]_^^^^^^^^__________^^^^^^^^^^____^^___```__```````````````````aaaaaaaaaaa_abaaa`^]_abbbbbbbbbbbbbbbbbbbbbccbbcccbccdddddddccddeeefafcifggdfcWUUIaiihgfddcgedddddchhhhhhhhhgeeefggefghhhhhgffffghhhhihffhjijjkkjjijiiiijiihgffffffgfffgghiiihgffffefghhgghggghhhhhhhiihhhihhhhhhhhhhhhhhiijjjjjjjjjkkkkkkklllllmmmmmmmmmmmoonmmmnommmllmnnnnmllmnnpppqqqqqppqqrrrrrrrsssssssssssssssssssttttttttuuvvvvvvvvvvwwwxxxwxyzzzyyz{{{{|||{{{|||||||}}}~~~~~~~~~~~~~~}~~~�����������������������\\\\[[[[[[[\[[ZY[ZZZYYYYYYZZZZ[[\\\[[[ZZ[[[ZZZ[[\[[[\]\\[[[[[[[[ZZ[[[[[[\\]\\\\\]]]]]]]]]]]]]]]]^^^^^^^^\\]]]]]\\\]]]]^^]]]]]]]]]]\\\\]]\[\^_]^_`````````````````__^^^_________`__``````_```````````````````aaaa`````````bcaaa_\^`bccbbcbbbbbbbbbbbbbbbbccbbcccbcdddddddddddeeeeibgdkfggideEI<<Thhiiihhha``beghhhhhhhhiihgeeefggffgghhhhggffgghihiihffgiijkllkjijjiijjjiihgfffffhggghhijjihgffffefghhhhhhhhhhhhhhiiiihhihhhiiiiihhhhhiiijjjjjjjkkkkkkkkkllllmmmmmmmmmmmmponmlmmnmmmlmmnooonmmnoopppqqrrrpqqrrrrrrrrsssstsssssssssssssstttttttuuuvvvvvvvvvvwwxxxywxyzzzyy{{{{||||||}}}}~~}}}~~~~~~~~~~~~~~}}}}~~~������������������������\\\\\\\\[[[[[[[[ZZZZZZYYZZZZZZ[[\\\\\\\\\[[[[[[\[[[[[[[[]\\[[[ZYZZZZ[[[[\\\\\\\\\\\\\\\\]\\\\]^^]]]]]]]]\\\\]]^^]]^^^^__]]]]^^]\\]]]]]]\\]^_____``````aa`````aaba``__^^]_____```````````_````aaaaa```````aaa```a```a````aaaaa``_bbbbbbbbbbbbbbbbbbbbbbbbccccccccddddddeedddeefffeefggfffeicYW`_Kcehijiggd]]cefhhghhhhhgghgggggghhgffghhhfggfefhjkjiihghkiijkkkjjkjjjjiiihhgfffgghhhhhijkiiiigefhhhggghhihhhhhhhhhhhhhhhhiihhiijkiiiiiiiijjjjjkkkkkkkllllmmmmmmmmonnnoonnpoonmmmmmmnnnnnnnnmmmnoopppqqqqqqqqrrrssssssssssssssssssrstuuutsuuuuuvvvwwvuuuvvvwxxyxxxyyyzzyyy{{{|||||||||||||}}}}}}}}}}~~~~~~~~~~~~~~~~~�������������������������]]]]]]]]\\\\[[[[Z[[[ZZYYZZZZ[[[[\\\\\\\\\\[[[[\\\\\\\\\\]\\[[[ZYYZZZ[[\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\\\]]^^_^^]^^^__]^^^]]]]]]^^]]]]\]______```````a```_````a``__^^^____````````````_````aaaaaa`````aaba```````aaa``abba`_`abbbbbbbbbbbbbbbbbbbbbbbbccccccccddddddddddeeefffffgghgggeghfa^^Xddefgghiibbfffgghhhhhhgghgggggghhgfffghhgghgffhjkjiiighkijjkkkkkkkkkjjjjihhgggggihhhiijkjiiihfgihhhggghhhhhhhhhhhhhhhhhhiiihiijjiiiiiiiijjjjkkkkkkklllllmmmmmmmnonnnoonnooonmmmmnnnnnnnnnnnnnooppppqqqqqqqqrrsssssssssssssssssssrstuuttsuuuuvvvvvvuuuvwwwwxxxxxxyyzzzzyyzz{{{|{{||||||||}}}}}}}}}~~~~~~~~~~~���������������������������]]]]]]]]^]]\\[[[[[[[[ZZYZ[[[[[[[\\\\\\\\]]\\\\]]]]]]]]]]]\\\\[ZYYZZ[[\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^__^^]]]^_`^__^]\]]^^^^]]]\]^_`____`````````_______a```__^^____````````````_````aaaaaaa````bbba```````aaaaa`bca_^`cbbbbbbbbbbbbbbbbbbbbbbbbcccccccccccdddddddeeffffgghhhhhhghihaZWXggffeeeegbdgfehihhhgggghggghhggghgfefghighihggijkjijihhkijkklllllllkkkkjiihhggghiiiiiijjkjjjiggiiihggghhhhhhhhhhiiiiiiiiiiiiijjjiiiiiiiiijjkkkkklllllmmmmmmnnnnnonnnoonnoonnnmmmnnnnnnnoooooopppppqqqqrrqqrrrsssssssssssssssssssssstttttuuuvvvvwuuuuvwxyxxxxxxyyyyzzzzyyyzz{{{{{||||||||}}}}}}~~~~~~~~~~�����������������������������]]]]]]]]^]]]]\\\[[[[[[ZZ[[[[[\\\\\\\\\\\]]]]]]]]]]]]]]]]]\\\\\[ZZZ[[\\\]]]]]]]]]]]]]]]]]]^^^^]]\]]]]]]]]^^^^^____^^]]^``^``_]\]^_^^]]\\\^_```___``````````___^__````____````````_________````aaaaaaaaaaabbbba`````aabbbbabba``acbbbbbbbbbbbbbbbbcccccccccccccccccccccdddeeeefffghhhhhihhiif][\WVggghgfdbfcfheeijihhggghhgghhhhggihgeefhihijihhijkjjjihikijjkkllllllkkkkjjiiihhhhjjjjjjjjljjjjhhjjiihgghhhhhhhhhhiiiiiiiiiiiiijjjjjjjjjjjijklllkklllmmmmnnnnnnnnnonnnoonnnnnnnnnnnnnnooooppppppqqqqqqqrrrrrrrsssssssssssssssssssssssttuuuvvvvvwwwvuuvvwxyxxxxxyyzzzzzzzzzzzz{{{{{}}}}}}}}}}}}}~~~~~~~~~~~�������������������������������������]]]]]]]]]]]]]]]]\\\\\[[Z\\\\\\\\]]]]]]]]]]^^^^]]]]]]]]]]]\\\]]\[\\\\\\\\]]]]]]]]]]]]]]]]^^^^^^]\]]]]]]]]^^^^^_____^]^_`a_`a_]\]__^]]\\\\^_`a`___````````a``_____``````__````````_________````aaaaaaaaaaaabbbaaabaaabbbbbca``abbbbbbbbbbbbbbbbbbbccccccccddddddddccccddddeeefffgghhhhhhhhggf]^b^eddegiihhkikkfehhihhggghhgghhhhggiihfeegiijkjihijkjjkjhikiijjjkkllkkkkjjjjjjjjiiijjjjjjjjmkjkjiijjjihhhhhhhhhhhhhiiiiiiiiiiijjjjjjjjjjjjjijklllkkmmmmnnnnnnnoooooonnnoonnmnnnonnnoooooooopppqqqqqqqqqrrrrrrrsssssssssssssttttttttstttuuuvvvvwwwwxwwvvvwwxyxxxxyyzzzzzzzzzz{{|||||}}}}}}}}}}}}~~~~~~~~~~~����������������������������������������^^^^^^^^\\]]]]^^\\]\\\[[\\\\\]]]]]]]]]]]]]^^^^]]]]]]]]]]]\\]]]\\]\\\\\\\]]]]]]]]^^^^^^^^^^^^^^^]]]]]]]]]^^^^^_____^^^_`a```_]]^__^]]\\]]^_aa````````````a```````````````````````````````_````aaaaaaaaabbabbbaabbbbbbbbbbca_`bdcabbbbbbbbbbbbbbbbccccccccddddddddccccdddeeefffggghhggghhhgeieec_shfdegijkigjjffhghhhgggghggghhgggiiigefhiijkjihijkjkkkiikjjjjjjkklkkkkjjjkkkkkjjijjjkkkkjmkjkkiiijjihhhhiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjkkllllknnnnnooooooooooponnnoonnlmnooonnooooopppppqqqqqqqqqrrrrrrsssssssttttttttttttttttttuuvvvvwwwwxxxxxxwvvwwwxxxxyyzzzz{{{{zz{{||}}}}}}}}}}}}}}}}~~~~~~~~���������������������������������������^^^^^^^^]]]]]]]]]]]]]\\[\]]]]]]]]]]]]]]]]]^^^^]]]]]]]]]]]]\]^^]\\\\\]]]]^^^^^^^^^^^^^^^^^^^^^^^_]]]]]]]]^^^^^^________````_^^^_``_^]]^^_^_`aaaaaa``````````````a````````````````````````_````aaaaaaabbbbbbcbaaabccccbbaaba`acccbbbbbbbbbbbbbbbbbccccccccddddddddcccddeeeeeffgggghhggghhihgkcec\qlifefhiiedghegighhhhhhgghgggggghijjhffhiikkkihijkjklkiikkkjjjjkkllllkkkkkkllkkjjijjkllkkmkijkiiijiihhiijiiiiiiiiiiiiiiiiiijjkjjjjjjjjjjjkkklllllnnnooooooooopppponnnoonnlmnoooonooopppppppqqrqqqqqrrrrsssssssssstttttttttttttttttuvwwwwwwwwxxxxxxxwwvwwwxxyyyzzzzz{{{{zz{||}}}}}}}}}}}}}}}}}~~~~~~~�������������������������������������^^^^^^^^]]]]\\\\]]]]]\\\]]]]]]]]]]]]]]]]]]^^^^]]]]]]]]]]]]\]^^]]\\\]]]]^^^^^^^^^^^^^^^^^^^^]^^_`]]]]]]]]^^^^^^^______```a`^^__````_^^_``^_`aaaabaa``````____```a`````aaaaa``````````````_````aaaaaaabbbbcccbaaaaccccbbaa`abbbbccbbbbbbbbbbbbbbbbccccccccddddddddccddeeefefffggggihhhhhiigihY`i_jhfefhjihigiifghdghhhhhgghgggggghjjkigfhiikkkihhikjklkiikllkjjkkkmmmllllkkllllkjjiijkllllmkijkiiiiiihhijjiiiiiiiiiiiiiiiihijjkkjjkkkkkkkkkkkkkllmnnooooppoopppppponnnoonnllnoooonooppppppopqqrqqqqqrrrrsssssssssstttttttttttttttttuvxxxwwwwxxxxyyxxwwwwwxxxyzzzzyz{{{{{{z{|||}}}}}}}}}}}}}}}}~~~~~~������������������������������������^^^^^^^^]]]]]]]]]]]]]]]]]]\\\]]^]]]]]]]]]]^__^^^]]]]]]]]\\]]]]]]]^^^^___```__^^^_^^]]^^_________^^]\]]^_______^^__``___`_____^^^__^^^___a`__`a`_aaa``____^^^^__`aaa``____````aaaaaaaaaaa````aaaabbbbbbbbccccccccccdddcbbaabcccccbcccccccccccccccddddddddddddeeeeeddddeefggffeefghhhgghhhkghhakf`ihhhhhhhiiiihgfegghhhijjiiiiiiiihijihghijjjjjjiiiklkjkjikkkjjkklkkkkkkllmllllllkjkmmlkllkllkklkiijjhhjjiiiiiiiiijjjjjjjjiijjjkkkjkkkkkllllllmmmmooooooooppppppppooooooooooopppppoopqqqqpqqqqrrrrrrrrrrrrrrrrrsstttttttttttttuuvvvvvwwxxxxxxwwxxxxxyyxxxyxxyyz{{{zz{{{{{{}}}}}}}|}}}}}}}~~~~~~������������������������������������������^^^^^^^^]]]]]^^^]]]]]]]]]]]]]]]]]]]]]]]]]]^^_^^^]]]]]]]]]]]]]]]]]^^^^___``____^^_^^]]^^__________^]]]]^^`````__^__```__`_____^^^^^^^^__```_`````aaaaaaa```____``aaaaa```aaaaaaaaaaaaaaaa```aaaabbbbbbbbbccccccccccdddcbb`abcddccccccccccccccccccdddddddddddddeeeeeeefffgggffeefghhhgghhhjhijdia_gfggiiiiiiihhgffghiihiijjjjiiiiihijihhhijjkjjjiiiklkjkkikkkkkllmmllllllkmmllmmlljlmmmllljllklmljijjihjjiiiiiiiiijjjjjjjjjjjjkkkkkkkkllllllllmmmmooooooooooppppppooooooooooooppppoppqqqqqqqqqrrrrrrrrssssssssssttttttttuuuuuuuuvvvvwwwxxxxxxxxxxxxyyyxxxyyyyyyz{||{{zz{||~~~~}}}}~}}}}~~~~~~~~������������������������������������������^^^^^^^^]]]]^^^_]]]]]]]]]^^^]]\\]]]]]]]]]]^^^^^]^^^^^^^^]]]]]]^^^^^^^___`______^__^^^^____________^]]]^^``````___````__```____^^]]^^__````a````aaaaaabbbbbaa````aaaaaaaabbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbccccccccccdddcba`acdddcbccccccccccccccccddddddddccddddeedeeeffgggggfefghhhhhhhhhhiijeeY]fffhijiiiiihhggghiiiiiijjjjjjiiiiijjihiikkkkjjiiiklkkkkjkkkllmnnnnnmmlllnnmmnnmmklmnmmllkllllnmkhjkjiijjjjjjjjjjjjjjjjjjkkkkkkkkkkkllllllllmmmmmooooooooooooppppoonnnnoooooooopppppqqqqqqqqrrrrrqrrsstttttttttttttttuuuuvvvuuvvvvwwwxxxxxxxxxxxxyyzyyxyy{zyyyz{|}|{zz{||~~~~~~~~~~~~~~~�������������������������������������������________^^^^^^^^^^^^^^^^]]^^^^]]^^^^^^^^]]^^^^^]________^^^^^^^^^^^^^^_____________^^___````____^^^^^^^_aaaaa``__`aa```````_____]]^______aba`_`aaaaaabbbbbbaaaaaaaaabbbbbbbbbaaaaaaaaaaaaaabbbbbccccccccccccccccccdddcbbabcdedcbcccccdddddddddddddddddddccddddeecddeeffffggfffghihhhhhhigighcaVagghhiigfiihhhgggiijjiiijjjjjjjjjijjjjiiikkkkkjjihkmlllljkklmmnoooonnnnnnoonnoonnjkmnnnnnmlllmnmlhklljijkjjjjjjjjkkkkkkkkkkkkkkkkkkkklllllllmmmmmooooooooooooopppoonnnnooooooooppppppqqqqqqrrrrssrrssttttuuuuttttuuuuuuuuwwvvvvvvwwwwxxxxyxxxxxxyyzzzyyzz{{{zz{{|}}|{{{||~~~~~~~~~~~~~~��������������������������������������������____________^^^]^^^^^^^^\]]^^^^^^^^^^^^^]]^^_^^^________^^^^^^^^^^^^^^^^________________aaa```__]]^^^_``aaaaa```_`aa````````____^^_____^`aba```baaaaaaaaaaaabbbb``aaabbbbbbbaaaaaaaaaaaabbbbbcccccccccccddddddddcdddccbbbbcdedcccccddddddddddddddddddddddddddeeeddefffffffgggghiiihhhhiihjgfcd^kiiiiihgehhhhhhhgijjjiijjjjjjkkkkjjjkkjjikkkkkjjjhkmmlmlkkllmnoopnnoooooopooooponkklmoopponmmnnmmjkllkjjkkkkkkkkkkkkkkkkkkkkkkkkkjkkkkkllllmmmmnnnnnnnnnnnnooooppponnnnopooooppppooopppqqrrrrrsssttttttttuuuuuttsuuuuuvvvwwwvvvvwwwxxxxxxyyxxxxyyzz{{zzz{{{||||{{}}}}}||{}}}~~~~~~~~~~~~��������������������������������������������������������```````````__^]]^^^^^^^^\]]^^___^^^^^^^^]^____^^____________^^^^^^^^^^^^^______`````````baaa``__]]^^_``aaaaaa`````aaa``a`````___________`aaaa`aabbaaaaaa`aabbbbbaaaaaabbbbbbbbaabbbbbbbbbbbcccccccccccccddddddddddccbcccccdddddcddddddddddddddddeeeeeeeedddeeeefffghhhhgffgggghiiiihhiiikkigbigtiiiiiihghhiiihhgijjjjjjkjjkkkkkkjjkklkjikklkkkjjhkmmmmmkllmnoooonoooppppppooppoolllmnpqqponnonnnlkkllkklkkkkkkkkllllllllkkkkkkkkkkkkllllllmmmmnnnnnnnnnnnnnoooppppooooppppppppppppppppqqrrrrsssstttttttttuuuuuttuuvvvvvvwwvvvvwwxxxxxxyyyyyyyyyy{{{{{z{{z{|}}}{{|}~~~}|{}}}}}~~~~~~~~~~��������������������������������������������������������```````````__^^^^^^^^^^^]]]^^^__________^^_``_________________^^^^^^^^^^^^____``````````aa``__^^]]^__```aaaaa``_`aaaa``aaa````______```aaaaaabaaabbbbbbbaabbbbbabbbbbbbbbbbbbcccbbbbbbbbbccccdddddddddddddddddddedcbbbcdddccddddddddddddddddddddeeeeeeeeeeeeffffghhiihhgefghhhiiiiiiiiiikjig^hiojiiiijihghijjihgijjjjjkllllkkkjjkkklmljillllkkjjhkmnnnmlllmnooooooooppppppooppoonmllnpqqqonoonnpnkjlmlklllllllllllllllllkkkkllllllllmmmmlmmmmnnnnnnnnnnnmnnnoooppppppppppppqqqqqqqppqqrrrrrssssstttuuuuutuvvvvuuvvvvvvvvvvvvvvwwxxxxxyyyyyyyyyyy{|||{{{|{{|}}}|||}~~~~}}~~~~~~~~~~~~��������������������������������������������������������```````````______________^^^^^__________^__```_________________^^^^^^^^^^^^__``````````````_^^]]^^__````aaaaa``_`aaaa`aaaa````__`___`abbba`abba`aabbcccdbbcccbaaccbbbbbbbcccccddccccccccccccddddddddddddddddddddfecaabcdddcccddedddddddeeeeeeeeeeeeeeeeeeeefffffgghihhgfefghhhiiiiiiiiiijhieYcddkjjiiiihfgijjihgiijkjkllmmllkjiikkklmmkilllllkkjhkmnnnnlllmnoonnppppppooppooppoopnllmoqqqnmopooqokikmmllllllllllllllllllkkkkllmmmmmmmnnnmmmmmnnnnnnnnnnnmmnnoooppppqqpppqqqqqqqqrrqqqrssrrrssssttttuuvvvuuvwwwvvvvvvvvvwuuuvvvwwxxxxxyyyzyyyyyyz{|||{{||||||}}}}}}}}~~~~~~~~����������������������������������������������������������````````````````__``_^^]________________``````__________``aaa```__``````_____```aaaaaaaa`````___^_aa``abaaaaaaaaaaabbbbbaaa`````aabbbbbbbbbbbbbbbbbccccdddcbbbbcbcdcbbcdbccccccdccccccccccddddddddddddeedcceecdfeedbabdgeeeeeeeeeeeeeeeeeeeeeeeefffeefffeffffghihhhhhhhhiiiiiiiijiiiiiijjheeehjaiijjjihhiiiiiiiijjjkklllllllllkkllfmrliiiikmmkklihjnonllimonnopoppppppppqqqqqqqqrqolmpqpppppponnonnnnnnmmnnonmlklllllmmmmmmllmnnkkllmmnnnoooooonmmmmmnooppppppppppqqqqqqqqqqqrrrrrrrrrrsrrssssstuuuuuuuuvvuuttsssttuuvwwwvuuuuvwxxwwxxyyxyyxxyz{zz{{||}}}}}}}}||}}~~~~~�����������������������������������������������������������������```````````````````___^^________````````````````________```````__```````___````aaaaaaaaaaaaaaa``_``aa`aaaaaaaaaaaabbbbbabaaaaaaabbbbbbbbbbbbbbbbbbbcccddddcbbbccccddccccbccccccdcccccccccddddddddddddeeeeddeecdedbaaceffeeeeeeeeeeeeeeeeeeeeeeeefffeefffffgfffghhhhhhhhhiiiiiiiijiiiiiijlfn`j^i_ijjjjjiiiiiiiiiijjkkklllllllllkkmkprdnuojjkmmkkliijmonmmjmonnopoppppppppqqqqqqqqrqomnpqppppppoonoonnnnmmnnoonmllllmmmmnnmmlllmmnlmmmnnnnooopoonnnnnnnooppppppppppqqqqqqqqqqqrrrrrrrrssssrsttsstuvvvvvvvvuuuuutttttuuvwwwxwvuuvwxyxxxxyzzyyyyyyz{{{{||}}~}}}}}}}}~~~~~~~~~~~~����������������������������������������������������������������```````````````````__```````````aaaaaaaaaaaa``````````````````__```aaaa````aaaaabbbaaaaaaaaaaaaa```aaba`aaaaabbb`abccbaabbaaabbcbbbbbbbcccccccccbcccccddddccccccccccddcbbccccccdccccccccddddeedddddeeeeefeefedcdda_`egfdddddddddfffffffffffffffffffeeffffggffffghhhhhhhhiiiiiiiiiijjjjiijikgd_adjjkkkjjjjjjjjjjjjkkklllllllllllkknmnkqihmlkmmkkmkjjmnnmnkmoooppoppppppppqqqqqqqqrqonnpqpppppppooponnnnmlnooonnmllmnnonnnmmmlmmmnnnnoooooopppponmnnnnooppppppppppqqqqqqqqqqrrrrrrrssttsssstttttuuvvvvvvvvuuuuuuuuuvvvwwxxxxwvvwxxyyyyyyz{zzzzyyz{{||}}}~~~~~~~~~~~~~}}}}~~~~����������������������������������������������������������������````````````````a````aabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa````aaaaaaaaaaabbbbbbbbbbaa````````ba`abba``aaabbbc`abccbaabbaaabccbbbbcccccccccccccccccddddddcccccdcabcdcbbccccccddddddddddddeeeeeeeeeeeeefeeffedcdb`begebeeeeeeeefffffffffffffffffffeefffffffffggiiiiiiiiiiiiiiiiiijjjjiihjgj_a\gkkkkkkkkjjjjjjjjkkkkllllllllllllknlmmlvoomllmllnmkkmnnnomnooopppppppppppqqqqqqqqrqpoopppppppppppponnnonnooooonmmmnoopoonnnnnnnnnooooooooppppponmnnooopppppppppppqqqqqrrrrrrrrrrrrstuuttsstuuttuvvvvvvvvvuuuuvvvvwwwwxxxxyxxwwxxyyyyyyzz{{{{{zzz{|||}}}~~~~~~~~~~~~~~~~~~�������������������������������������������������������������������aaaaaaaa````````aaaaaabcbbbbbbbbbbbbbbbbaaabbbbbbbbbbbbbbbbbbbaaaaabbbbbaabbbbcccbbbbbbb````aaaabaaabca`aaabbbccaabbbbaaaaaaabbccccccccccccccccccccddddddddddcccdcaabccbbccccccdddddddddddeeeeeeeeeeeeeeeeeeffecbbbceeedfffffffffffffffffffffffffffeefffefffffghiiiiiiiiiiiiiiiiijjkkjjikflbg``ikkkkkkklkkkkkkkkkkllllllllmmmmmlnlqmj���qnllmmmnnlkmnnnpnnnoppppqqqqqqqqrrrrrrrrrqqppoppppppppqqpoonopqqpppooonnmnopppoooooooooooooooooopqqqponnnnnoopppqppppppqqqqqrrrrrrrrrrsssstuuttsstttttuuvvvvvvvvuuuvvwwwxxxxxxyyyyxxxxyyyyxxyyzz{|||{{{|||||}}}}}}}}~~~~~~~������������������������������������������������������������������������aaaaaaaaaaaaaaaabbbbbbccbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcccccbbaabbbbbbbbbbbbccccccccbbbabbbbbbbbbbbbbaaaabbbbccbbbbbbaa``aabbbccccccdddddddddddddddddddddddddccdccccdccbccccccddddddddddeeeeeeeeeeeefffeffefgeb`bdeeefgfffffffffffffffffffffffffffeefffffgggghiiiiiiiiiiiiiiiiijjjkkjjjiikegleslkkkkkllllllllllllllllmmmmmnnnnmnpim���rolmnnnoomlmnnoppnnpqqpqqqqqqqqqrrrrrrrrrqqrqooqpppppqqrqpoopqrrqpppoooonooppppoppppppooooooooppppqqpponnnooppppqqppppqqqqqrrrssrrrrssssssttutttstuuttuvuuuuuuuuuvvvwwxxxxxyyyyyyyyxxyyyyyxxyyzz{|}}||||||||}}}}}}}}~~~~����������������������������������������������������������������������������bbbbbbbbaaaaaaaabbccccccccccccccbbbbbbbbbbbbcccccccccccccccccbaabbbccccbbbbbbcccccccccccbbbbbbbbbcccbaaabbbbbbbbcbbaaaab``bccccccccdddddddddddddddddddddddeeedccbdfggedcbccccccdddddddddeeeeffeeeeeffffffgffggeaaceeeefgfffffffffffffffffffffffffffeefffgghggghiiiiiiiiiiiiiiiiikkjjjjkkgnff]o^skkkkkklllllllllllllllmmmnnnnoonnmss���ʂrolmoooppnmnonopqonpqqqrrrrrrrrrrrrrrrrrqqrsqooqqppppqrrqpoopqrrqqppoooooooopppppppqppoooooopppqppqqqpoonoppqqqpqqppppqqqqqrrsssrrsssssssssttttttuuuuuvvvvvvvvvvvvwwwxxxyyyyyyyyyyyyyyyyzyyyyz{{{|}~}}}}}}}}}}}}}~~~~��������������������������������������������������������������������������������ccccccccbbbbbbbbbccddccbccccccccbbbbbbbbbbbcccccccccccccccccbbaabbccccccabbbbcccccccccccaaaaaaabbcdcbaabccbbbbbbcbaaaabb`abddddcccdddddddddddddddddddddddeeeedccaeikigdcbccccccdeeeeeeeeeeefffeeeeefffffghgfggd`deffeeeefffffffffffffffffffffffffffeefffhhhhgghiiiiiiiiiiiiiiiiikkjjjjkkilkYX\Rakkkjkklllllllllllllllmmmnnoooooonq����|romnoppppnmnonopronprqqrrrrrrrrrrrrrrrrrqqrsrooqqppppqrsrponopqqrqqppoooooooopppopppppoooooppqqqpppqqpppoopqrqqqqqppppqqqqqrrsssrsssssssssssstttuuvvvvvwvvvvvvvvwwwwwxxxyyyyyxxxyyyyyyyyzzzzzz{|{|}~~~~~~~~~~~~}~~~����������������������������������������������������������������������������������cccbbbbbbbbbbbbbaabbbbccccccccbbccccccccccccccccccccccccccccccccccccccccccccccccdddccccbcbabcdcbbbbbbbbbbbbbccccbbbaaabbbccdddddddddddddeeeeeeeefeeeedddccdeedccfbgtzshbdddddddddddddddddefghgffdeeefffffefgebbcffeeeeffggggggggffffffffffffffggfeeefgijiiiiiiiiiiiijjjjjjjjjjjjjjjjjjklnh_DYerGgknljkklllmmmmllkllmmnnnmkplnsmqs�����oopnmnppmpomlnqpmollpssqqqrrrrrrrrrrrrrrrqqrrrqporrssrqqqrrrqppqrrrrqrrssqrqqpopqqqqqqqqqppqqqqrrqqqqqpppqqqqrrrrppqqppqrqqqrrrrsssssssssttuutuuvvvvvvvvvvvvvvwwwwwwwwwwwxxxxxxxwyzzzyyyzzzzz{{{{|}~~~}}}~~�~�������������������������������������������������������������������������������������ccccccccbbbbbbbbbbbbccccccccccccccccccccddddddddddddddddddddddddccddddddccccccccdddcccccbbbbcdccccccccccbbbcccccccbbbbbbbcddddddddddddddeeeeeeeeeeeeddddcdeeeedcebentqkgeeeeeeeeeeeeeeeedefgggffeeeeffffgfffebbcgffeeeffffffffffffffffffffffffffffeefgiiiiiiiiiiiiiijjjjjjjjjjjjjjiijjkllbHPgdtMfmpmjllkllmmmmllllmmnnnnksnwpiuh����ͅvnpponnppnpomlnqpmpmlpssrqrrrrrrrrrrrrrrrrqrrrrqppqrssrqqqqqqqpppqrrrrrrssrrrqppqqqqqqqqqqqqqqqrrrrrrqqqqqqqqqrrrrpqqqqqqrqqrrrrsssssssssstuuutuvvvvvvvvvvvvvvwwwwwwwwwwxxxxxxxxxxyzzzyyyzzzz{{{{||}}~~~}~~~������������������������������������������������������������������������������������dddddcccccccccccddccccccddddccccccccccccddddddddddddddddddddddddddddddddddddddddddddccccabccccdeddddddddbbcccdddeddcbbbbccddeeddeeeeeeeeeeeeeeeeeeeddddddeeffeedfddfhigdeeeeeeeeffffffffeeffffffeeeeffffhgffdbbdhggfeeffeeeeeeeeffffffffffffffeeffffghiiiiiiiiiiiiijjjjjjjjjjjjjiiiiijjkj\[`ffb\elomkmmklmmmmmmllmnnoooopqlmumo�����qoqqqpooqqoqpnmnqqoqnmorrrrrrrrrrrrrrrrrrrrrrrsrrqpqrssrqqqpqrqqppqrrrrrrssrssrqqqrrrrrrrrrqqqqrrrrrrrrrrrsqqrrrrrrqrrrrrrsrrrrsssssssssssstuuuuuvwvvvvvvvvvvwwwwwwwwwxxxxxxxyyyyyxyzzzyyzzzz{{|||}|}}~~~~~~~������������������������������������������������������������������������������������eeeeddddddddddddeedddddcddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddcccacddccefddddddddccccddddeeddccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeddeeffffeeffebbeebffffffffffffffffffeeeffgefffffffhgfedcdfhggfeeeeeeeeeeeeffffffffffffffffggggghiiiiiiiiiiiijjjjjjjjjjjjjjjjjiiiiifbvg_bXbfgiklmmmmmmmmmmmmnnoppppsmonsp�����tvorrqpoopppqqpnnpqprpnoqrrrrrrrrsssrrrrrrrrrrrssrrrqrssrqqqqqrsrqqqrrrrrrssssssrrrsrrrrrrrrqqrrrrrsrrrrssssrrrrrrrrrsssrrssrrssssttttttttttuuuuuuvwwwwwwwwwwwwwwwwxxxxxxyyyyyyzzzyyzz{{zzz{z{{||}}}}}}}~~~~�������������������������������������������������������������������������������������eeeeeeeeeeeeeeeeeeeeeddddddeeeeeeeeeeeeeffffffffffffffffffffffffeeeeedddddddddddeeddddccbceeddefddddddddddddddeeeeddddddeeeeeeeeeeeeeeeeffffffffffffffffffffffffdggddhhfffffffffffffffffffeeeffgfffffffffffddefgggfeedeeffffffffffffffffeeeffggggggghhiiiiiiiiijjjjjjjjjjjjjjjjjkkjjiihhdjoeg`fbkdcjollommmmmmmmnnopppqqnqsyl����Љsqrpqppoooopqqqnnpqqsrpoprrrrrrssssssrrrrrrrrrrrsrrrpqrsrrrrrrstsrrsssssssrrsttssrstssssssssrrrrrsssrrrrssssrrrrrrrrssttssstssssttttttttttttuuvuuuvwwwwwwwwwwwwxxxxxxxxyyyyzyyzzzzzz{{|{{{{{{|||}}~~}}}~~~~������������������������������������������������������������������������������������������������ffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffffffffffffffffffffffffffffffffeeeeeddeeeeeeeeeeedddddddeeeeefeeeeeeeeeeeeeeddeeddddeefeeeeeefeeeeeeeeffffffffgggfffffffffffffeghffggdffffffffffffffffffeeeeffffffffeeeffddfhhgfeeddeeffffffffffffffffeeffghhihhhhhiiihiiiiijjjjjjjjjjjjjjjjjjkkkjjihgkhodlfglqgdkomlommmmmmmmnnoppqqqnsoi~�����wqsupqpppomnpprqnmoqrssqpprrrssssssssssrrrrrqrrrrrrsspqrsrrrrrsttssstssssssrrsttsssstttttttttrrrsssssrsssssssssssrrrrstttssstssttttuuuuuuuuuuuvvvuvwwxxxxxxxxxxxxxxxxxxyyyzzzzzz{{{{{||}||||}}}}}}~~~~~~~~~������������������������������������������������������������������������������������������������ffeeeeeeeeeeeeeeeeeeeeeeeeeeffffeeeeeeeefffffffffffffffffffffffffffffeeeeeeeeeeeeeeeddddeeeeefeeeeeeeeeefffeedddeeedddddffeeeeffffffffffffffffffffffffffffffffffgggfeecbffffffffffffffffffeeeeffgggfffeeeffddfhhgffeeefffffffffffffffffffffghhiihhhiiiihhhiiijjjjjjjjjjjjjjjjjjjkkkkjjihojscek`pnkikmmmnnmmmmmmnnnoppqqqrpot������yroxrqqpppomnpprqnlnqsstspprrqsssssssssssrrrqqqqqqrrsspqrsrrrrsstssstussssssrrsttsssttuuuuuuuurrsssstttttttssssssssrrrstttssstttttuuuuuuuuuuuuuvvvvvwxxxxxxxxxxxxxxxyyyyyyzz{{zz{{||||}}~}}}}~~~~~~~~~~~�������������������������������������������������������������������������������������������������eeeeeeeeeeeeeeeedddeeeeeeeefffffeeeeeeeeffffffffffffffffffffffffggggffffeeeeeeeeeeeeddddgedeffedffffffffggffedddffedddccffeeeeffffffffffffffffffffffffffffffffffgfefggggffffffffffffffffffeeeeeegggfffeeeggedfhggffffffgffffffffffffffffgggghhiihhiiiiihhhiiijjjjjjjjjjjjjjjjjjjjjkkkkjjiwca_ffchlmijnolnnmmmmnnnnoppqqqrqx�����ŉtqwolqqpppomnqorqnlnqsstsqprrqssssssstsssrrrqqqqqqqrsspqrsrrrsssssrstussstssrrsttsssttuuuuuuuursssstttuuutttttssssssrrstttssssttttuuuuuuuuuuuuuvvvvvwxxxxxxxxxxxxxxyyyyyyzz{{{z{{|||||}~~~}}~~~~~~~~~~���������������������������������������������������������������������������������������������������fgffeefgggffffffeeeeeeeedddeeffgeeffffggeeeeeeeeeeeeeeeefeeeeefffffffffffffffffffffdccefeeeffeeeffffffeefffeedddcdddeeffhgffggfeffffffffggggggggffffffffffffffffefffffffffeeeeefgggggghhggggfffffffhgeddeeeeefghfgghhhggfffffffffffffggghhhhhhhhiiiiiiiiiiiiiiiijjjjjkkkjjjjjjjjijjjkkkkoonceniaijkkklmnppooonnnlkpplpsnqo�����׈}srsrpqpqqpoooppqqpooprrrsrqqqqrrrrssssssssrrrrqrrstuspqqqqrrrrssrsstssttsssttutttttsssuuuuuuuuttssssstrsttttsssstsssrrsssttttustuvvvvvwwwwwwwwwwwwwwwwxxxyyyyyyyyyyyyyzzzzzzzzz{{{{{|}~~~~~~~~��������������������������������������������������������������������������������������������������������fggfeffgffffffffffeeeeffeeeeeeeeffffffffeeeeffffffffffffffeeeffgfffffeeefffffffffffdccdfeffffffeffffgggggfffeedddddeeeffgfffggffffffffffffffffffffffffffffffffffffffffffgffffghhgghhhhhhgggggffefefhhedeeeefgghighhhhggfffffffffffgghhhhhhhhhhhhiiiiiiiiiiiiiiiijjjjjjjjjjjiijjjjjjkkkllmmldfmhbjlnmjjkmpooonnnnmomnrqoun�������~vpqsrqrqqqqpppppqqpppqrrrrrqqqrrrrssssssssssrrrqssstutqrrrrrrsssrrsstssttsssttttttttttttttttuuuttssssstrsstttsssstttsssttttttttstuvvvvvvvvvvvvvxxxxxxxxxxxxyyyyyyyyzzzzzzzz{{{{{{|{{{||}}}~~~~��������������������������������������������������������������������������������������������������������fggfffggggffffffgffffffgfffeeeddffffffffffffffggffggggggggfffgggffffeeeeffffffffffedccdeeeffffeeeeffgghhggfffeeedddeefffffeffgffggffffffffffffffffffffffffffffffffffffffeeeefghhhhhhhhhhghhhhgeeeefhhfeeddefhiiihhhhhggfffffffffgghhiihhhhhhhiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiijjjjjjkklllljkffjebkmomjijmooonnnnnmmonmoqq������yuqoqsrqrsrqqrrqpppqqpqrsqrrrrqrrrrrsssstssssssssrststvusssrrrrrrsssstttsssssssttttttttttttttttttttssrsstsssttttsstttttttttttttttstuvvvvvvvvvvvvvxxxxxxxxxxxxxyyyyyyzz{{{zzz{{|||||||{{|||}}}~~���������������������������������������������������������������������������������������������������������������fgggffgggggggggggggggggggggfffeeggfffffeggggggggggghhhhhggggghhhggffffffffffffffffeddddeeeeeeeeeddeefffgfffffeeedeeeefffeeeefgfffffffffffffffffffffffffffffffffffffffgggeeeeefghhhhhhgggghhihgedeefiigefcdfghiiihhhhhggggggggggggghiiihhhhhhiiiiiiiiiiiiiiiiiiiiiiiiijjjjjiiiijjijjjkkllmjkgfgablmnmkjklnnnnnnnnmmnrqkw������΅vrqqssrqrsqpprrqoppqqqrssqqrrrrrsrsssstttssssssssrttstuussrrqqqqqttttuuutssssstttttttuuuuttttuuuuttssssstsssttttttttuuuuuuuutttttttuvvvvvwwwwwwwwxxxxxxxxxxxxxyyyzzzzzzzzzz{{||}}}}}||{||||}}~����������������������������������������������������������������������������������������������������������������fghggggghhhhhhhhhhhgghhhhhhhhggggggggfffggghhhhhhhhhhhhhhhhhhhhhhhhhggggffffffffffeeeeeeffggggffffffffffeeeeeeeeeeeeffffeeeefgffeeeffgggfffffffffffffffffffffffffffggggghhgggghhggggggggghiihgfeeefiigffeefghhhhhhhhhhhhggggggggfgghhhhhhhhiiijjiiiiiiiijjjjjjjjiiiiijjjjjiiiijjiijjkkllnkmiggcfnmklmmlknnnnnooopolqsm������ݜrvrrrsrqqrsqpprrqoppqqrsssqqrrrrrsssssttttssssssssrttsstuttsrrqqrrvuuuvvvussstttttttuuuuvvuuuvvvvvuttssstttttttttuuuuvvvvvvvuuuutttuvvwwwvxxxxxxxxxxxxxxxxxxxxyyyyzzzzzzzzz{{|}}~~}~~}|||}|}}~������������������������������������������������������������������������������������������������������������������fghhgggghhhiiiiihhhhhhhhhhhhiiiihhhhgggghhhhhhhhhhhhhgggghhiiihhihhhhhhhfffffffffeeefgffiiiiiiiihhhgggffeeeeeeeeeeffffffffeffgffddeffghhffffffffffffffffggggggggggggggggihggfffgffffggggghhihhgfgfghhgfggggghhhghhhhhhiiggggggggfffggghhhhiiijjjjjjjjjjjjjjjjjjjiiijjjjjjiiiiiijiijjklllompjhjhlpmkknnlinnnoooppslsoc������ꢁxqqqqqqqqrrqppqqpoqppqrsrrqrrrrqrrssstttttssstttttrttrrtuuuutssstuvuuvvwvvssttuuuuuuuuuuvvvvvwwwwwuutttttuuuutuuuvvvvwwwvvvvvvvvvvuvwwxxwwyyyyyyyyyyyyyyyyxxyyyyzzyyyz{{|||||}}~~~~~~}}}}~~~~�������������������������������������������������������������������������������������������������������������������fghhhggghhhhhhhhhhhhhhhhhhhiiiiihhhhhhhighhhhhhhihhggfffghhiiihhhhhhhhhhfffffffffeefghhgiijjjjiiiihhhgggeeeeefffffffffffgfffggffdeeffghhffffffffgggggggggggggggggggggggghhgfeeeffffggghhghhhhhhghgghhffgiihhhhiiiihhhhiihhhhhhhhggfffghhhhiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiijjjkklmmmnnqiejkonlklnomknnnoppqqqmur}������uywrpqqqqrrrrrrrqqqqqppqrrqprrrrqqqrsssttttustttttttsuusrtvvvutsttuvvuuuvvvusttuvvuuvvvvvuuuwwwwwwwwvvuttuuuvvuuuvvwwwwwwwwvwwwwwwwwvwxxxxxxyyyyyyyyyyyyyyyyyyyzzzzzzz{{|}}}}}}}}~~~}~~~}}~��������������������������������������������������������������������������������������������������������������������fghihgggggggghhhhhhgghhhhhhiiiiihhhiiiiigghhhhhhihhgfeeeghhiiihhhhhhhhhgfffffffffeefhiihhhiiiihhhhhgggffeeefffffffffffffhgffggfeeefffggggggggggggggggggghhhhhhhhggggggghiihgggggfffghhiighhhhhhhihghgffgjjiiijjkjjihhhhhhhhhhhhhhgfffghihhiijjjkkkkkkkkkjjjjjjjjjjjjjkkkiiiiiiiikkllmnnnmopfbhimkkklnoonnnoopqqqmyjx������҉vsmurrrrssrqrsssrqqrqppqrrqorrsrqqqqssttttuusttttttttvusrtvwutssstuvutttuvuustuvvvvvvvvvvuuuwwwwwwwwwvuuuuuvwvvvvvwwxxxxwwvvwwwwwxxxwwxyyyxxyyyyyyyyyyyyyyyyyyzzzz{{|||}}}}}}}}~~~~~}~~~~~������������������������������������������������������������������������������������������������������������������������gggggghhggghhiiijihgghijiiiiiiiiiiiiiiiihiijjjiigggggggghhhhhggfggggggggeghgfefhdefhiiiiiiiiiiiiiiiihhhhgggggggghhhhhhhhihhhhhhhgggggggggggggggfffffffffggggggggfgghhggfhhhhhggghhhhhiiihhhhhhhhkfhjefjhihkinncmjjjiggikmkgfghhfhhhhhhhhgggghijkjjjjjjjjjjjjjjjjjjjjjjjjjiihiijjlllmnnooriq`ckgaonnnooonkpolnrstsnt�������wttupssssssssssssrrrrrrqqrrqpqrrrqqrrttsttuttssrsttttttsrstuvrrsrrrtuutstvwxwvuttvwwwxwwwwwwxuvwxwwvwvvvwwwwwxxxxwwwvvvvvvvvvwwwwwwwwwwwwxxxxyyyzzzzzzzzzzzz{z{{{||}}}}}}}}}}}}}}~~~~��~���������������������������������������������������������������������������������������������������������������������hhhhghhhgghhiiijjjiiiijjiiiiiiiiiiiiiiiiiijjjjiigggggggghhhhhhggggggggggghihgfghefghiiiiiiiiiiiiiiiihhhhhhhhhhhhiiiiiiiiiihhhhhhgggggggghhgggggggggggggggggggggggggggggghhhhggggggghhiiihhhhhhhhlhggdeikjkoieklklkiikljfehjiggjniiiiiiiihhhhijjkjjjjjjjjjjjjjjjjjjjjjjjjjjjiijjklllmnnoomqpmpko_nnmnoooomjmpprrlns�������}rurpussssssssssssssrrrrqqrrqqqrrrrqrrttsttuutsssstttttssrrsttrsssrrsuvutuvwwvvuttvwwwxwwvvwwxvwxxxwwwwwxxxxxxyyyxxxwwwwwwwwwwxxxxxxxxwwwxxxxxyyyzzzzzzzzzzzzzzz{{|}}~}}}}~~~~}}}~~~~~��������������������������������������������������������������������������������������������������������������������������jjiihhhihhiiijjjkkkjjkkkiiiiiiiiiiiiiiiijjjkjjihgggggggghhhhhhhhggggggggiijjihhgfghiiiihiiiiiiiiiiiiihhhiiiiiiiiiiiiiiiiiihhhhhhhhhhgggghhhhhggghhhhhhhhhhhhhhhhhhgggghhhhhhggggfgghhijjiiiiiiiiihdcfimtſ���umjoppmjiiimlkklkihjjjjjjjjiiiijjkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjkkklllmmnnnompomplnennmnoppprnrtprupt���������utotqrsssssssssssssssssrqqrrrqqrssrrrrttttuuuttssttuuttttssssstttssrstwvuuvvvvvuttuvwwwwvvvvwwwxxyxxxyyyyyzyyyzyyyyyyxyyyyyyyyyyyyyyyyxxxxxxxxyyyzzzzzyyzzzzzzyzz{|}~~}~~~~~~~~~~~~��������������������������������������������������������������������������������������������������������������������������������kjjjijjjiiijjjjjjkkkkkkjjjjjjjjjiiiiiiiijjkjjihhhhhhhhhhhhhiiihhggggggggjjjjjihghhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhiiihhhhhghhhhhhhhiiiiiiiiiiiiiiiiihhgghhiiiihhhhggghhiiijjjjjjjjjgiedjlnw�����­��vjinpoookhkopmjllkkkkkkjjjjkkllkkkkkkkkkkkkkkkkkkkkkkkkkkkkklllmmmmnnnnphsefm`honnnoppprnmpqpsz�������Ӆszyntvossssssssssssttttsrqqrrrrqrsssrrrttttuuuuttttuuuuuuvvuuttuuuttssswwvvvvvvvuuuuvwwwwvvvvwwwxxyxxyyzzzzzzzyyyyyyyyyzzzzzzzzzzzzzzzzyyyyyyyyyyzzzz{{zzz{{{{{zz{||}}}}}~~~~~~~~~���������������������������������������������������������������������������������������������������������������������������������jjjjjjkkjjjjjjjjjjkkkkjjjjjjjjjjiiiiiiiijjjjjihghhhhhhhhhhhiiiiihhhhhhhhkjjjjihgijjjjiiijjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiijiiihiiiiiiihhhhiiiiiiiiiiiiiiiijjjjjjjjiihhhhiijjjiiiiiiiiiiiiijjjjjjjjhkhhmkiox�������Һ��|oknpoppomormlllkkkkkkkkllllllllllllllllllllllkkkkkkjkkkllllmmmnnnnnohtihnZiponnopppsnirzpz�������ߙ|toxwqvsssssssssssssttttsrqqrrssqrstsrrrutttuvvuuttuuvvuuvvvvuutvvuuutssvvwwwwwwvvvvvvwxxwwvvwwxwxxxxxyyz{{{zzzyyyyyyyyyzzzzzzzzzzzzzzzzyyyyyyyyyyzzzz{{z{{||||||||||}}}}}}}~~~~������������������������������������������������������������������������������������������������������������������������������������jjjjjjkkjjjjjjjjjjkkkkjjjjjjjjjjjjjjjjjjjjkjjihhiiiiiiiiiiiiiiihhhhhhhhhjjjiiihhjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiijjiiiiiijjjiiihhiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkjjjjiijjjjjjiijjjjjjjjfhikmjhjimj���������ү�uvqllllmpnmmmllkkklllllllllllllllllllllllllkkkkkjjkkllmllnnnnnnnnmollllhvqponopppspmstp��������uxynvzptssssssssssssssssstsqqrsssqstttsrruttuvvvvuuuuvvvvuuuvvuutwvvvvutsuvwxxwxxwwwwvwxyxxwwwwxxwxxxxxyz{{{{{zzyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzyyyzzzz{{{{{|}}}}}}}}}}}}}~~~~~~~~����������������������������������������������������������������������������������������������������������������������������������������jjiiijjjkkjjjjjjjjkkkkjjjjjjjjjjkkkkkkkkkkkkkjjijjjjjjjjiiiiiihhhhhhhhhhjkjihhikkkjjjkkkjjjjjjjjjjjjjjjjiiiiiiiijjjjjjjjjjiiiiiikjjjiiihiiiijjjjjjjjjjjjjjjjjjjjiijjjjiikkjjjjiijjjjkkkkjjjjjjjjeeimlkkjifojo����������˟��{yupooonnmmllkllmlllkkkkkkkkkkkkkkkkkkkkkkjjjkllmnnmmnnnnnnnnookjikf{qpnnopqqnopmq����������xpwxutsspssssssssssssssrrtsqqrsttqstutsrruttuvvvvvuuuvwwvuuuvvvvuwvvvwvusuvxxxxxyxxxxwwxzyxxxxxxyxxyxxyz{||||{{zzzzzzz{{{{{{{{{{{zzzzzzzzzzzzzzzzzzzzz{{{z{|}~}}}~~~~~~~~����������������������������������������������������������������������������������������������������������������������������������������jjiiiiiikkjjjjjikkkkkkkkkkkkkkkkllllllllkllllkkjjjjjjjjjjjjjiihhhhhhhhhhjkkjhhjmkkjjkkkljjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiikkjjjiiiiijjjjjjjjjjjjjjjjjjjjjjiijjjjiijjjjjiiijjkkkklljjjjjjjjkglokjlijlohknx�����������ɯ��trppponnmmkllmllkkkkkkkkkkkkkkkkkkkkkkjjjjllmnnonnnnnnnnnnpitkfkPmponnoqrrrtuq��������΅yywmurovrrssssssssssssrrrrtsqqrsttqstutssruttuvwwvvuuvwwwvvvvwwwwwwvvvwwutuwxyxwxyxyyxwwyzyyxxxxyyyyyyyy{|}}}||{zzzz{{{|||{{{{{{{{{{{{{{{{{{{zzzzzzzzzz{{{z{|}}}}|~~~~������������������������������������������������������������������������������������������������������������������������������������������������kkjjjjjkkkllmmmlnmkjjkkkkkkklllllmmlkkkkllkkkkjjiijjjjkkijjjiijjiijjjkkkllkkkklljkkkkkjjllkkkkjjkkjjjjjjkkkkkkkkkkkkkkkkkkkkjjjjlkkkklkkjjkkkkkkjjjjjjjjiiiijjjjjjjjjiiikkkkkkjjjjjkkkkkjkkkkjihhijkkkklkkkkklnox������������ѵ�|vstsonpnmnqpljlklmmlkkkkkklllmmlkjjjjjjllllmmmmlllmmnnnmqoOLc]]skuqlssvn{g��������ޒwvxttttttsrrrrsssssuttssttuutrrrstutsrsttsrstvvvvuvvvvwwwwwxwwvvwwxxwwxyxvtuwxwxyywxyzyxwxyzzyyyyyyxyyzz{{|||||{{zzzzzz{{{{{{{{{{{{{{{{{{{{}}}}}|||{{{{{|}}~}}}}}}}~~~~~�����������������������������������������������������������������������������������������������������������������������������������������������llkkjkkkhijlnpqrqpnmmllkkkkkllllllmlkkkllllkkkkjijjjjkkkjkkjjjjkjkkkklllmlllllllkllllkjjkkkkkkkkkkkjjjjjllllllllkkkkkkkklkkkkjjjlkkklllkjkklllkkkkkkkkkkjjjjjjkkjjjjjjjjjjkkkkkkllkkkjjjkkkkkjihijkkkkklllmmmmoplieq������������Φ�vupnsprojimonkkllllkklllllllllkkjjjkklllmmmmnlmmmmnnnkrgcmsiQgqomqsokyj��������xqxstttttsssssstttttttsstttttsrrrtvtsrsuutsstvvvvvvvvvwwwwwxxwwwwxxxwxxyxwuuwxwxyyxxyzyxxxxyyyyxxyyyyzz{{||}}|||{{zz{{{{{||||||||||||||||||}}}}}}}}||||||}}~~~~~}}}~~~~~����������������������������������������������������������������������������������������������������������������������������������������������mlllkklljklmnoppqponnnmlllllllkkklllkkkllllkkkkkjjjjkkkkkllkkkklllllmmmmmllmnomllllllkkjkkkkkkkkkkkkjjjjlllllllllllllllllllkkkkkllkklllkkkklllkkkkkkkkkkkkkkkkkkkkkkjjjjjjjkkkkklllkkjjjkkkkkjjiijkkkklllmmmmmnopopmem����������������{spqqqrrnjpommmlkjllllllllllkkkkkkmmmmmnnnnnnnnnnnlrlmtri[muottmtss�����������rtwtsstttttttttttuuutttssttttuutrrtvssstvvutstvwvvvvvvwwwwxxxxxwwxxxxxxyyxwvvwxxxyyxxyyyyxxxyyyyyyyyzz{{|||}}}}|||{{{{{{|||}||||||||}}}}}}}}||}}}}~~}}}}}}}~~~~~~~~}~~~����������������������������������������������������������������������������������������������������������������������������������������������mlllllllmmmmmllkmmmmnnmlmmllllkkkkllkkllllllkkkkjkkkkkllllllkkllmmmmmmmmlkkmppnmllllkkkkjjkkkkllllkkkjjjlllllllllllllllllllkkkkklkkklllkkkkkkkkkkkkkkkkkkkkkkllllllkkkkkkkkkkkjjkkkkkkllkkkjjjjjjkllllllklmmlmmnmjjmooqs�������������׸��yqrrnovnmmmmmnmlllllmmmllllllllmmmnnnnnoonnnnnnqltifg\iqnnslkvpv��������Ȇ�rrxqsstttttttttttuuuttttttttsuvurqtvsssuvwvustvwwvvvwwwwwxxxxwwwwwwxxxxyyxwwwwxxyyzyyyzzyyyyzzzzzzzy{{|||}}}}}}|||||||{{||}}||||||||~~~~~~~~}}}}}}~~~~~}}}}}~~~~~~~~������������������������������������������������������������������������������������������������������������������������������������������������lllllllljkkllllljjklmmmlmmmlllllkllllklmmllllkkkkkklllllllllkklmmmmmmmmmjjknppomkkkkkkkljjkkkkllllkkkkkklllllllllllllllllllkkkkklkkkklkkkkkkkkkkkkkkkkkkkkkkklllmllllllllllllkkkkkkkllllkkjiiijjkklmlllmllmmmmmnltsignohomw�������������ճ�~xrmlstttqonmllllmmmnllmmmmmmnnnnnoooooooonnnsiohhh[iqqrlourt��������و�wxuntttttttuustttttttttuuuutttvwvsrsvtssuvwwustvwwwwwwwwwxxxxwwwwwwwwxxyyyxxxxwxxyyzzzzzzzzzyz{{{{{{{|||}}}}}}}|||||}||{{{|}}||||||||}}}}}}}}~~~~}}}}~~~~}}}}~~~~������������������������������������������������������������������������������������������������������������������������������������������������������lmmmmllljjkklllllllmmmlkmmmmllllllmmlllmmmmllllkkllllmmmllmllllmnmmmmmmljkmnooonllkkkkllkkkkkkkkkkkkklllllllllllllllllllmllllkkklkkklllklkkkkkkkkkkkkkkkkkklllllmmmmmmmmkllmmmmmmmlllkkklkjiijjkklmmmlmmmnoonnnnqlknlhmwnpnik��������������ʤ�{vqqqqppoollmmmmmmlmmnnnnnnnnoooooppoooooopqknohkosvumtpv����������{zsvxlwttttttuussssssttuuuvvuuutvwwtssutsstvwwvsuvwwwwwwwwxxxxxwwxxxxwwxyyyxxxyyxxyyyz{{{{{|{{z{{|}}||{}}}}}}}}}||{{|}}||{{{|}~}}}}}}}}}}}}}}}}~~~~~}}}}}~~~~~}~~��������������������������������������������������������������������������������������������������������������������������������������������������������mmnnnmmllmmmmlllnnmmnmmlllmmmmmmmnnmlllmmmmlllllllllmmmmmmmmllmnnnnnmmmmlnpponnoonmlllllkkkkkkkkjkkkllmmmmmmmmmmmmmmmmmmnnmmmmllmlllmmmlmmllklllkkkkkkkkllllmmmmnnnnmmmmllmmnnnnmmmllkkkmlkjjklllmmmmmmmmnooonnnmmlnqsoioqutnku�������������������tnnrttnnnmmmllmmnooooonooooppppppppooontoolhpxpppqmq���������zo{tuqtuutttttussssstttvvvwwvvvuvwwuttsutssuvvvsuvwxwwxwwxxxxyyxxyyyyxxxyzyxxyyyxxyzyz|}|{|}}|{{{|}}}|{}}}}}}}}||{{{||}}}|{|}~~~~~~~~~~~~~~~~~~~~~~~~~}~~��������������������������������������������������������������������������������������������������������������������������������������������������������nnnoonmmllmnooopnmmmnnmllllmmmmmnnnnmllmmmmmlllllllmmmmmnnnnmmnooonnnmmmnprqnmmoqponmlllllkkkkjjjjkklmmmmmmmmmmmnnnnnnnnoonnnnmmnmmmnnmmnnmlllmmllllllllmmmmmmmnnnnnnnnmnnnnnmmmllllllllnmlkkkmnlmnnmmmmmmnnnmmmmmmmmnoprmkoommpnx��������������վ��yurpoonnmlkkmmnoppoooooopppppppppppppmujiphtoltwk���������Ćzlvy{popvuttttttssssttttvvwwwwvvvvwwvutsvtrstvvvsuvwxxxxwwxxxxyyyyzzzzyyxyzyxxyzzxxyzyz|}}||}}|{z{|}}||{}}}}}}}}|{{z{{|}~}|||}~~~~~~~~~}~~~~~}~���������~~��������������������������������������������������������������������������������������������������������������������������������������������������������oonmmnoonnooooonnnmllmmmnnnnnnnnppoonnmmoonnmmllkllllmnnnoooooonponmmmmmnoooooooopppoonnlkkklmkjllmmmmmnlmmnnoopoonnnnooooooooonooonnnnnmnnoonnmlmmllkllnnnnnnnnppppooonmnnmmnnmooonllmolmnooonmmmmlmmnnmmmnnnnnnnnnnnnnooooopppmprv���������������ڷ��vspnonkkmkkmpqonmknonmoqqoooooopprpoheigim|il���������׎vvttttrrtstuvvuuvvvvvvvvvvvvvvvvvuvvuuvtquxukwvvuvsty{ywywwxxyyyyxxyzyyxwzzzzz{{{{zyyz|}~|||}}}}}y|~~~}}}}|{{{}}}}||||~~}|}}����~����~����������������������������������������������������������������������������������������������������������������������������������������������������������������mmllllmmnooooonnnmmlllmmnnnnnnnnooooonnnoonnnnnnmmmmmmnnoooooooooonnmmmmooooooooooppponnmllllllkmmmnnmmmmmmnooppoonnnnoooooooonnooonnnnnmnnnnnnmmnnnmmmmnnnnnoooopppoonnmnomnoonpponnmnnlmnnoonnnnnnnnoonnnnnooopoooooonooooppppvqnoot����������������Ь�vrqonmopnllprsnppppqpnppoooppppqriipkgrts{����������v}vtsttrrttuvwvvvvvvvvvvvvvvvvvvvvvwwuuvusvywnwuvvvssxzxwyxxyzzzzzzzzzzyyyzz{{{||||{zzz|}}||}}}}}}x{~}}~}}}}||||||||||||}~}|~��~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������onnmmnnooooooonnnmmmmmmmnnnnnnnnnnnoooppoooooooonnnmmnnnooooooooooonnnmmooooooooooppponmmmmmlkllmnnonnmlmnnnooppoooooooooooooonnooonnnnnmnnnnnnmoooonnnonnoooooooooooonnmnonnppoppoooonmllmnooonoooppppoooooooooqpppoooooooppppqqopsojmuo����������������ǣ�xvvtpommqupgooooqrqpppppppqqqtvnnvohsjt���������vvwutstsrrsuvwwwvwwvvvvvvvvwwwwwwwwwwwvvwvttxxqwvxxwttwyxxzyzz{{{{{}|{{zzzz{{{||}}}|{{z{|}}}}}}}}}}xz|}}~~}}}||}}}}}}}}}}}~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������poooooopooooonnnonnnnnnnoooooooonnnoopppooooonnnnnmmnnnnoooppooonnooonnmooooopppooppponmmmnmkklmmnooonmlnnoooppppppoopppooooooooooooonnnnnnnnnnnopppooopooooooppoooooonnlnnmnppopoooppnllmmnoooooppqqppooooooooopppppooopppppqqqnsusrttqvln������ȹ��������ۼ��~uutroopqrrqppqqqpooppqqqrswpnrkhrow�������컄z~sutstsrrsuvwwwvvwvvvvvvvvwwwwwwwwwwwvwwvurvwtvwz{yvuxzz{|zz{{{||{~}}|{{{{{{{|}}~~|||{{{|}}}}}}}}}z{|}~~~~}}|||}~~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oonnnnooooooonnnpppppoooppppppppooooooopooonnnmmmmmmmnooooppppoonnoooonnooopppppooppponnlmnmkklmmnopponnooopppppppppppppoooooooopppooooooonnnnooppppooopooppppppppppoonnlnnmnppopooopomkmmnnooppoppqqppooooooooooooooppppppqqqqrpttppsrmtpquu����⿦���������ϸ��vrttssttsrqoopoooopqrrrptpkhdinvz�������Ňw|xxvtsttrrtuvwwvvvvvvvvvvvvwwwwwwwwxwvvwwvvruwwtwz{zxvwyz{|zzz{{{{{}}}}}|{z{{|||}}~|}}|{{|}~~~~}}}}||}~~~~~}}|||}~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ooooooooooonnooopqqqqpppppppppppppppooooooonnmmmllmmmnooppppppppnnoooooopppppppppppooonnmmmlllmnnnopppoopppppppppqqqqqqpppppppppqqqppppppoooooopppqppoppppppppqqpppppooomnonnppoooooonmkoooooppppppppppooooooooonoooppppqqqqrrrrroptvtsuqpopppty����Ҷ�����������Ϥ�zvttqpprrpooooppqqrsuqtrkfeopx������ؐvyuyuvututsstuvwwvvvvvvvvvvvvwwwwwwwwxvvwwwwxvvwyrwyyywvvxz{{zz{{{{{{||}~}|{z||||}}}}|}~}{{{}~~~~~~~}~}}~~}}}}}}}~~���}~~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oppppppoonnnnopppqqqqqpppppppppppppppooooooonnnnnnmnnnooppppppppooooooopppppppppppoonooonmmmnoppooppppooqqqqqqqqpqqrrqqppppqqqqqrrrqqqqpqppppppqpqqqpppqpppqqqqqqqqqpppooppooqqpppqponmmppppppppqqpppppppppppppooppppppqqqqrrrrrtrrturqrsvvuuurnmw����ˬ�~x��������޾��|}vpprrqqrrqqrrrsururnmmv{x������ݟuyxu|qwuuuussuvwxxwvvvwwwwwwwwxxxxxxxxywvwxxxyzww{qwyyywvvxz{{{||||||||}}}}}}|}}}}}}}}}~~}{z{|~~~~~~~~~|{}~}}}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mnnnnnnmnnnnnopqppqqqppoooooooooppppppppooooooooooonnooopppqqpppoonnoopppppppppqqponnnooonmmoqqqpppppoooqqqqqqqqqqrrrrqqqqqqqqqrrrrrrqqqqqppppqqqrrrqpqqqqqqqqqqqqqqqppppqqppqqppqrqomnnqqqppppqrqqpppqqqqqqqpppqqqqqqqqqqrrrrssqvvqqwwrssrqqsuvvnoy����Ψ|c_gy�������ҷ��xrsssrttssrrssqpsomrrwzq������yxuszq{wvuvuttuvwxxxwwwwwwwwwwwxxxxxxxxzwvxyxxz{wv|qy{zzxwwx{|{}}}~~~}}}}}}}~~~~~~~~~~}~~{z{|~~~~~~~}{z}~}}~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������nooppoonoooppppqppqrqponoooooooopqqqqqqpoooooooonnnnnnnnoopqqponnnoopqqqppppppqrpppppppqlmopqrqqrrqppoppopppqqqqrrrrrrrrqqqqqqqqssrrrrrrqqqqqqrrsrrqqqqqrrrrrrrrrrrrqqqqrrrqqqqqsppqpmotpppqqqqrrqqppqqrrrqqqqqqqqqqrrrrrrrrssssttttsssssssstuutwpptrp~���ՙaPRQUev������ѯ�squwusvskrxoqpvtmgpgp������zyxwvvuuwwvvvvvvvwxxxxwwwwwwwwwwxxxxyyyyyyyyzzzzzvvyzxy|{xwyzz|~||||||||}}}}}}}}~~~}}}}}~~~~}|zy����|z{}~|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oppqqppoppppppqqpqqrqqpoppppooooppqqqqppoooooooooooooooopqqrrqonnooppqqqppppppqrqqppqqpplmnpqqqqrqppppppppqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrqqqqqqqqrrrqqqqqrrrrrrrrrrrrqqqqrrrrqqqqrrqonoqqppqqqqrrrrqqqqrrrrqqqqqqqqqrrrrrrrssssssttttssssssssttttssuvrmouv���xQLNMHLZ|�����ܴ�~}xrpstqqtuvrdgruk�������~yxxwvvvvwwwvvvvwvwxxxxwwwwwwwwwwxxxyyyyyxxyyyyzzzwvyywx{zyxxy{|}}}}}}}}}}}}}}}}}}}}~~~~~~~~~~|{z~~����}{{}~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pqqrrqqpqqqqqqqqqqqqqpppqqqqpppoppqqqqppppppppppqqqqqqqqrrrrrqpooooppqqqppqpppqrrqqqrrqolmnoqqqqrqoopqppqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrqqqqqqqqrrrqqrrrrrrrrrrrrrsrqqqqrrrrqqqqrtrmmrsoqqqqqrrrrrrqqrrrrrrqqqqrrrrrrrrrssssstttttssssssssrstttstvuqopoliq����jTMQSMGOe������Ĝ�wqwqsxqlqrku|ie�����Έvvwwwwwwwxwwwwwwwvwxxyxwwwwwwwwwwxyyyyyyyxxxyyyyzzxxywuw{yzyvw{}|~~~~~~~~}}}}}}}}}}}~~~~~~~~~}||}}~�����~||}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qqrrrrqqrrrrrqqqrqqpppppqqqqqppppppqqpppqqqqqqqqqqqqqqqqqrrrrqpopppqqqqrpqqqppqrrqqrssqnmnopqrrrsqooqrqprrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrqqqqqrrrrrrrrrrsrrrrrrrrrssrrqqrrrrrrrrrsspnorrpqqrrrrrrrrrrrrrrrrrrrrrrrrrsssssttttttttssssssssrrrsttttturnotwvsupkw���sUIINLAIHUr�����̼�~zvpuxutmomj�����Ύv~uuvwxxwwxxxxxxwwuvwxyyyxxxxxxxxxyyyyyyyyyyyyzzzzzzyyvtvzy|zuu{~|~~~~~~~~~~~~~~~~}}}~~~~~~~~~~|}~�����~}|}~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qqrrrrqqssrrrrrqqqpooopppppqqqqqqqqqqqqqqqqqqqqqrrrrrrrrqqqqqqppqqqqqrrrqqrrqqqqsrrrsrpooppqrrssrqppqrrqrrrrrrrrssssssssrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssrqrrrrrrrrrrtpnprqqsrrrrrssssssssssssrrrrrssssssssssttttttttrrrrssssrrrstuutqqrrsstuvuttrrw~�m]SQPDGBJJH_��������~|wut}xtl���ѫ�pvyuuvwxxxxxyyyyxxxtuwxyzzzyyyyyyyyyyyyyzzzzzzz{{{{{{zyvtvyy|{vu{~}~~~~~~~~~~~~~~~~}}~~~~~~~~~|}~������~}|~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qqrrrrqqrrrrrrrrqqppoopppppqqqrrrqqqqqqrrrrrrrrrrrrrrrrrqqqqqqqqrrrrrrrrqrrrqqqqrssrrqpoqqrrrrrsrrqqqrsstssssssrsssssssssssssssssssssrrrrrssssssssrrrrssssssssssssssrrrrrrrrrrrrrnnrtqqtsssssssssssttsssssrrrsssssstttttutttttttsssssssssrrstuuurpqtspquotvtrsrormi^TNGGL@AIC?a�����ᯓ����nli{����xu~vtvwwxxxxxyyzzzyyxtuwxyzzzyyyyyyyyyyyzzzzzz{{{{|||{{{ywvvwz|{xwz}~~~~~~~~~~~~~~~~~~~~~~~~~}~��������~}}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qqrrrrqqrrrrrrrrrrqqqpqqqqqrrrrrrrrqqrrrrrrrrrrrrrrrrrrrrrrrqqqqrrrrrrrrqrssrqqqrssrpoopsrrrrqrrqsssrrsuuuuuttttssssssssssssssssttsssssssssstttttssrrrrrsssssssssstsrrrrrrrrrrrrnoqrssstssssssssssttttssssssssstttttttttttttttttttttuuuutttttuttsqqsssuwvstwsloxtywtmc[RKIIIGCCG^���������к�����ߜrxr{sxxxxxxxyyzz{zzyxuvwyyzyyyyyyyyyyyzzzzzzzzz{{{{||{{{zxxwv{{{zyz}~~~~~~~~~~~~~~~~~}}~~~����������~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qrrssrrqqqrrrrrssrrrrqqqsssrrrrrsrrrrrrsrrrrrrrrsssssssssssrrqqqsssrrrrrrrssrqqqqstromoqssrqqqqqqsutrqtwvvvvuuuusssssssssssssssstttttsssttttttttttssrrrrsssssssssttsrrrsrrrrrrsskpsrrtusttttsssssttuuttsssssssttttttttuuttttttssuuuvvvvvvuututtspqsuwwuqstssuwtoswqvxsrgdd\QORNEHOQk������ְ������|{|yt|zyyxxxxyyzz{{zyxvwxyyyyxyyyyyyyyzzzzzzzzzzzz{{{{|{{zzywu{{{|zy|�~~~~~~~~~~~~~~~~~�~}}}~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ttsssrrrrrssssrrqrrsssrrssssssssrrrrrrrrrrrrrrrsqqqrrrrrrrrrqqrrrsssssrqrssttssrqsrpprrqqrsssrrqprtuutttwwwwvvvvvuttsrrrssssstttttttttttsssssssssssssssssrrrrrrsrsttsrrrqqqrttrppqqrstttrsuutsssttttttttssrrrstttttttttttuuuutuvuuvvwvvuwvvvvusrvvvuuuttuttttuuvvutsstuusszpjxvgFFAAQq����c[q���}yzw{zyxxxyzzzzyyzzzyvwxyyzzzzzzzzzzzzzzzzzzz{{{{{{{{|{{{zyvtz}|z{z|�~~}~~��~~~~��~����������~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������sssssrrrrrsssssrrsssssrrssssssssrrrrrssssrrrrsssssrrrrrrsrrrrrrrssssssrrsstttsrrrssqqssrsttttsrqqsuvvuuuwwwwvvvvvuutssssssssssssuuuuuuuusssssssstsssssstssssssssssttsrrrrrrrssrprsssstttrtuutsssttttttttssrrrstttttttttuuuvvuuvvuvvwwwvvuvvvtsttwwvvvuuuuttttuvvwvuttttu{qqvwwjXFDBBEILL�uTAANx�ߔ~yx{wyzyyxxyzzzzyzz{zzwwxyyzzzzzzzzzzzzzzzzzzz{{{{{{{{{{{{{yvtx||z{z|�~~~����~~~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ssssssssrrsstttttttttsrrssssssssrrssssssssssssttttttssssssrrrrrrtssssssssstttssrsssrrstsuuvuutsrstvwvvvvwwwwvvvvvvuuttttssstttttuuuuuuuutttttttttsssssstssttttsstttssrsssrrrsrqqutttttttstuutssstttttttttssrsstttttuuuvvvvwwvvwwvwwxxwwvuvwusrtwwwwvvvuuuuuuuvvwwwvutttuvvvwlYLC=?BB?;=AN9CBHIQa��{{wyvzzyyyyyz{{zzz{{{{xxxxyyzzzzzzzzzzzzzzzzzz{{{{{{{{{{{||zwuw{|z{z{��������������������~~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ssssssssssttuuuuvvvvutsrssssssssrrssssssssssssttuuuuttttssrrrrrrssrrrrssssttttssssssstttvvvvuuttsuvwvvvwwwwwvvvvwvvuuuuuuuuuvvvvuuuuuuuuvvvvvvvvttttttttsstuutssuuttsssttssrrrrquutttttttuuuttsttttttttttttsstttttuuvvwwwwxxwwxxwxxxxxwvvwvussuwwwwvvvuuvvvvvwwwwwvvuuuuqypaM=>?HFA9338<B=DEJIGQs{w}yzw{zyyyyzz{{{z{{||{yyxxyyz{{{{{{{{{{{{{{{{{{{{{{{{{z{|||{xvw|}{|{{������������������������~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ssssstttttuuvvwwwwwvutsrssssssssrrsssssstsssstttuuuuuuvvsssrrrrssrrrrrssrsstuuttrrssttttuuvvuuuutuvvvvvvwwwwvvvvwvvvuuvvvvvvvvvvvvvvvvvvwwwwwwwwuuuttuuusttuuttsuuutsstuuttsrrrsttssstuuuvvvutttuuuuuuuuuuuuuuuuuuvvwwwxwxyxxxxyxyyyyxwwxwutuuvvwwvvvvvvvvwwwwxxwwwvvvvvyjPA?@CD53//9C?4AHHFGJCVs�vz{~yxzzzzz{{{|{{{|||{zyyyyyz{{{{{{{{{{{{{{{{{{{{{{{{{z{{||{yww}~}~|{~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ttttttttuuuvvvwwwwwwvusssssssssssssssttttttttuuuuuuvvvvvtsssrsssrrrrrrssrsttuuutrrrtuttuuuuuuuuutuuvuuvvwwwwvvvvvvvvvvvvuuuuuuvvvvvvvvvvvvvvvvvvvvvuuvvvtuuuuuutuvvutttuuutssstuttssstuvwwwwvuuvvvvvvvvvvvvvvvvvvvvwwxxxxyyyyxyzyyyyyxwvwvuvwwwvwwwwwvvvwwxxxxxxwwwwwwww{U;7@C73/58:CI?.8;DBDGIgyzzy{{zzzzz{{{{||{{||{zzzyyyzzz{{{{{{{{||||||||{{{{{{{{{{{|||zyw|~~|{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vvuuuuttvvuuuvvwwwwwvutsttttttttttttuuuuvvvuvvvwvvvvvvvvttsssssssssstttsttuuuttssrsuvuuvwvvvvvvvtuvvuuvvwwwwvvvvvvvuvvvvuuuuuuvvvvvvvvvvvvvvvvvvwwwvvwwwvvvvvvvvuvvvuttuvvutssuwuttstuvwxxxwvvvwwwwwwwwwwwwwwwwvwwwxxxxxxyyyyyyzyyyyxwwvtuwxyxxxxxxxxxxxwxxyyyyyxxxxxxwwwOB78=+'1:ABCFB;88AEOS��x||xw{{zzz{{|||}|{{{{zyzzzzzzzy{{{{{{{{||||||||{{{{{{{{{{{{||{{t{}~}|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wvvvuuutvuuttuuvwwwwvutsuuuuuuuuuuuuvvvvwwwwwwwxwvvvvvuuttsssssssstuuuttuuuuutsrtstvwuuwxxwvvvuuuuvvvvvwwwwwvvvvvvuuuvvvvvvvvvvvwwwwwwwwwwwwwwwwxwwwwwwxxwwwwwwxuvwvvuttvvvtstvxvuuttuwxyyyxwvwwxxxxxxxxxxxxxxwwxxxxxxxxxyyyyyyzyyyyxwvvquxzyyyzyyyyyyyywxyyzyyyyyyyxwwvzTK0*?53BCDDEEA=4:4@T\�͘z{yzz}wzz{{||||}}|{{zyyzzz{{zyy{{{{{{{{}}}}}}}}{{{{{{{{{{{{||||ry|~~}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xxwwvuutuuuuuvwwyyxwvuutvvvvvvvvvuuuuuuvuwxwvvwxwwwwwwwwvvutttuuvvvvvvvvxvtstuutvvvvvvvvxxxxxwvuvvwwxxxxxwwwwvvvvvvvvvvvwvvvwwwwwxxxxxxxwwwwwwwwwwvvvwwxwwwvvwwwuuvwwvvvwvttvvvvvwxwwwwxxxxwwwwxyyyxxxxxyxxxxwwwxxxxxxxxyyyyyyyyzyyz{yvsyxxxxyyzyyyyyyyyyyyyyyyyyyyxxwwvzcC,;KAE<CHEE@9?A:=JFS���{zzy|yzz{{{{{{||{{{zzz{z{|||{zzy{}|{{|~}}}}}||{||||||||{|}}}|||xq}{�~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yxxwwvvvvvvvvwwxxwwwvvvvwwwwwwwwuuuuuvvwvwwwwwwxwwwwwwvvvvuttuvvvvvvwwwwxwutttuuvvvvvvwwxxxxxwvvvvwwxxxxwwwwwwwwvvvvvvvvwwvvwxxwwwxxxxxxwwwwwwwwwwwwvwwwvvvuuvvvvvwwwvvuvutuvwwvvwxxwwwxxxxwvvwxxxxxxxxyyyxxxxwwyyyyyyyyyyyyyyyyyzzzywvuzyyxxyyyyyyyyyyyyyyyyyyyyyyxxxwwtnSACDBDJJF@?@?C@E@FL6K��{yzx{{{z{{{{{{|||{{{{{{zz{||{{zy{}}|{|~}}}}}|||{{||||}}}~~~}|||zs}~|}|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyxxxxxxwwwwxxwwwwwwwwxxxxxxxxuuuvvwxxxwwwxxxwwxxxwwvuvvvvvvwxwwwwxxxxxxvtssuvvvwwwwwwxxxxxwwvwwxxxxxxwwwwwxxxvvvvvvvvxwwwwxxwwwwwxxxxwwwwwwwwwwwwwwvvvvvvvvvvwwwwwvuuttuvwwwvvwxxxwwwxxxwwvwxxxxyyyyyyyyyxxxxyyyyyyyyzzzzzzzzyz{ywuvx{{zzyyyyzzzzzzzzyyyyyyyyyyyyxxxxyxZHC>IMK@415>B=D=KKSZZ�ωy|y{|{{{||{{||||{{{{{|zz{{{{{{z{}}}|}~~~~~~}}|{{||}}~~~~~}}~~~w|~}�}{������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyyyyyyyxxxxxxwwxxxyyyyyyyyyyyvvvvwwxxyxwwyyyxxxxxxwvuwwwwwxyyxxxxyyyyxxwussuwwwwwwxxxyyyyxxwwxxxxyxxxwwwxxxyywwwwwwwwyxwwxxxxwwwwxxxxxxxxxxxxwwxxwwvvxxxyyxxxxxxxwvuutuvwxwwvvwxxxwwwxyyxxwxxyyyyyyyyzzyyyyxxyyyyyyyyzzzzzzzzyyyxvvxy{{{{zzzz{{{{{{{{zzzzzzzzyyyyyyyy{{hZO?4,%-=JRKVG]dnaR�߬~�|}|z{||||{|}||||||||zz{{{|||{|}~~}}~~~~}}||}}~~~~}}~~}~�z{}��}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyyyyyyyyyxxxwyyyyyyzzyyyyyyyyxxwwwxxxyxxxzzyxxxyxxwvvwwwxxyzzyyyyyzzzxxxvttvxxxxxxxxxzyyyyxxxyyyyyyxxwwxxyyyzxxxxxxxxyyxxxyyxxxxxxxxxxxxxxxxxwxxxxwwvyyzzzzyyyyyyxwuuuvwxxwvvwxxyxxxxyzzzyyyyzzzzyyyyzzzzyyyyyyyyyyyyzzzzzzzzzxwwxyzz{{{{{{{{{{{{{{{{zzzzzzzzzzzzzzzzxy���mfO8)6CLZYfl�xxbQ}�،�||||}}}|||}|||||}}}{{{{{|}}|}~~~~~~~~~~~~~~~}}~}}~�~|x|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyyyyyyyyyyyxxzzzzzzzzyyyyyyyyyyxxxxyyzyyyzzzyyyxxwwwwwxxyyyzzzzzzzzzzxxwwvvwxxxyyyyyyzzyyyyyyzzzzzyyxxxxyyyzzyyyyyyyyzyxxyyyyyyxxxxxxxxxxxxxxxxxyyxxxxyyyyyyxzzzyxwvuvwyyyxwwxyyyxxyzz{{{zzzz{zzzzyyy{{zzzzyyyyyyyyyyzzzzzzzzzxwwy{{zz{{|||||{{{{{{{{{{{{{{{{zzzzzzzzwrzwy�zP(6KOZ[hz|wsTFa����{|�}}~}}|}}|||}}}~~}||{{|}~}~~~~�����~~~~|}vz����~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zzzyyyyyyzzzzzyyyyyyyyzzyyyyyyyyyyyyyyzzyzzzzzz{yyxwwwxxxxyyyzzyzzzzzzzzxwwwxxxxyyyyyyzz{zzyyyzzzzzzzyyyyyyyyzzzzzzzzzzzzzyyyyyyyyyyyyyyxxxxxxxxyyyyyyzzyxxxxxxyzzzzyxwvvxzzyyyyzzyyxyz{z{{{{zzzzzzzzzzz{{{{zzzzzzzzzzzzyyyyyyyyzzyzzzzz{{||||{{{{{{{{{{{{{{{{{{{{{{zzzyzx|{yz{|�S':PJZk�˜mogHc��ŉ|y�}~~~}}}}}}}}}~~}||}~~~~~����������������~~~}�vz���~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{zzzyyyzz{{{{{zxxxxyyyyyyyyyyyyxxyyyz{{yz{{zyz|yyxwwxyyxxyyyyyyzzzzzzzzwvvwyyywyyyyzzzz{zzyyyzz{{{zzyyyyyyyzzzzyyyyyyyy{zyyyzyyzzzyyyyyxxxxxxxxyyyyyz{{yyxxxxyyzzzzyxwvvxzzzyz{{zzyxy{|yz{{zzzzzzzzz{{{{{{{zzzzzzzzzzzzyyyyyyyyy{||zyyz{||||{zz{{{{{{{{{{{{{{{{||{{zzyyyzu{yv~x}T*8IQ���syU]��ݎ��v~~~~~}}}~}}}}~~�}||}~�~}~������������������������y|���}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zz{{{{||{{{{{zzz{{{{zzzzyyyyxxxxvyzzz{{yz|}{z{zy|yvvwyyyyxz{{yy{{||{zz{{wwxzzxxy{yzzxy|{yyyyzzzz{{{zzzzzzyyxxyyzyyzz{{zzzzzzzzzzyyyyyyyy{zzzzyyyyyyyzzzzzyyyyyz{zyyxxyzzyyyyyyyyzyyyyyz{{{{{{{{{{{{{{{{{zzzzzzzzzzzzzzzz|{yy{~zuxyz{zyyy||{{{{{z{{{{zzzz{{{{{{{{{{{{{{{{zz{||{{}}T+<Er����wuj^{�私�z}~}~~{|~~~~~�~~~~~}~�����������������������������x{���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zzz{{{{|{{{{zzzz{{{{{{{{zzzyyyyy{{zyz}}{xyyxy||{|ywvxz{{||}~|zyy|{zzzzyxuvxzywvwzwxzyzzwzz{{zzzzzzyyyyyyzzzyyzzzzz{{||{{zzzzzzzzzzzzzzzzzzyyyyxxyyyyzzzzyyyyyz{{zzyxxyyzyyyyyyyyzyyyyzzzzz{{{{{{||||||||zzzzzzzz{{{{{{{{{yz}}{yyz{{|{zz{{{{{{zzz{{{{{{{{{{{{{{{{{{{{{{{{{z{||z{}�I.?Ux����xozv\c�ﴇ�~�~}~}||~~~~~~���~~��~~~�����������������������������zz���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zzz{{{{{|{{{{zzz{{{{|||||{{zzzzz{|�������}yz||z{yxxz|}~~}~}{z{|zyyzzxxxxy{zyz{}yz{z||xyz{{yxyyyyyyyyzzzz{{{{zz{{|}}}||{{{{{{{{zzzzzzzzzzzzyyyyzzzzzyyyyyyzz{{|{zyxxyyzzzzzzzzzzzyyyzzzzzz{{{{z||||||||{{{{{{{{||||||||{y{�~xx~{|}}||||zzzzz{{{|||||||||||||||||||||||||{{|{z{}�P(>RY����un{~YQ��Ȏ���~}~~~|}~~~~~����~~��������������������������������������~y���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zzz{{{{{|||{{{zz{{{{||||||{{{{||x�����������~yyzz|~�}|}~}{{}}||||{z{{zyzzz|�}||y{}|y{|{yxyzzzzz{{{{{{|}}|{{{|}}~}}}{{{{{{{{{{{{{{{{||{{{{zz{{zzzzyyyzzz{||||{zyyyzzzzzzzzzzzzzzzz{{yzzz{{{{}}}}}}}}||||||||}}}}}}}}|{|~}xy~|}~}}}}}zz{{{{|||}}}}}~~||||||||}}}}}}}}}|{|{z{}�P'=LR{���}wx]Mz�ڕ��~~~~~~}}~~���������������������������������������������y��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{{{{||||||||{{{{{{{{||||||||}~{��������������~z|������~}}|{{}|}~�~||~~}~~}}�~��|{|z|}}|{z{|zz{{{{||{|}~~}|{{|}}~}}}||||||||{{{{{{{{||||{{{{{{{{zzzzzz{{||}}|{{zzzzzzzzzzzzz{{{{{{{{zzz{{{{{}}}}}}}}}}}}}}}}~~~~~~~~}}|zz{{{}}~}}}}~||||}}}}}}~~~~~}}}}}}}}}}}}}}}}~}||{z{}�C2;H`g��}w�eOT��~��~�������������������������������������������������|~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{{||||}||}}}}}}||||||||||||~�������������������~������������||}||��������������������}~}|{zz{{{{{{{{{{|}~~}|{|}}}}}}||||||||||||||||||{{{{zz||||{{{{{{||}}}|{||||||{{{{{{{{{{|||}|||{{{||}}}}}}}}}}}}}}}}}}}~~~~~~~~}}{xz}}z}~~}}}~~}~~~~~~~~~~~~}}}}}}}}~~~~~~~~~}}}|{|~�D/;CTX����|zv|hWE�󾃃������������������������������������������������������������~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||||}}}}|}}}~~~~~~}}}}||||~��������������������������������������������������������������|z{{{z|||||{{{}}~~}}||}~~~}}}}}}}}}}}}}}}}}}||||{{{{|||||}}}||}}}}||{|}~~~}|}}}}}}}}|}}~~~}}||}}~~~~~~~~~~~~~~~~~~~~~~~~~~~~}{z{}~}}~~~~}}~~~~~~~~~~~~~~~~~~~}~~|}yJ(<DG`m���{{vwndG��ᘂ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||}}}}~~|}}~~�~~~}}||}���������������������������������������������������������������}~~}~~}}}}}|}~~~~~~}|}}~~~~~}}}}}}}}}}}}}}}}}}}}||||||}}}}~~}}}~}}||z|~�}|~~~~~~~~}}~~~~~}|}}~~~~~~~~~~~~~~~~~~~~~~~~~~|xz�}|�~~~�~~~~~~~~~~~~~~~~~}~}}�G,5@FP\����ywymLw�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}~~�����������������������������������������������������������������������������������~~}}}~~~~}}}~~~~~~~~~~~~~~~~~~~~}}}}}}}}{|~~~~}}~{z{z|~~~~��~~~����~~~~~~~~~~~~~~~~~~~}~�~||~~~~~~~~~~~~~~����~}}��I-1AQPV��{t~}uLc��ҕ����~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~���������������������������������������������������������������������������������������~~���~~����~~~~~~~~~~~~}}}}}}}}}~�~~}~~|{{|}~��~~~�������~~~~}~���������������~}~��I*3?TPV��wtv�x]T��髆����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~}}}}}}}}~�}||}~������������������������������������~}~~�����������������������������������������~~���H(5>WQWz�ojnv^thK������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~~~~~~~~~������~}}������������������������������������������~||~����������������������������������������������������~���D*6AVTWhکrpv|]orLv��Ԡ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~�������~���������������������������������������������������~|}����������������������������������������������������>-6HRXY_ũ~~��k��V\��立�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������������������������������������~~����������������������������������������������������������9/:NPZ]c���}}g��jR���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~��������������������������������������������������������������������������������������������}}�������������������������������������������������������������������������������������������������������������������6/AOPYab��}tzwhy�yV���֙����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|~�������������������������������������������������������������������������������������������|}�����������������������������������������������������������������������������������������������������������������~6-GOQXde��{q{wm��zYw�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|}���������������������������������������������������~��������������������������������������������������������������z16GUU]bf|�|�t|~��hY�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������������������������������������������������������������������������������������������������������������}7:FTZdf`v�{xy�����tM���ژ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|9;CQ\gfew��s�������M���򩅍��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z8;DS\gffp~{mr}���b`��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z8?LY^himn�~z��r���{P���ٙ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������x9CP[^hjkhz|�m����T���먄��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������u;FOX^iipm�}~}�s~����i��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������t>ILT_ljjj|zww{v�����|x���И��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������o?CQVfhjnh��{w~w������h���栊�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pAHNYemnmi��{{�{������k���쯎�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qALNZ^hmnj��wz�|������u����Ŕ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pAMR\bggok�|rx�x�����������ڜ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mFNUd��jmk�v}�tz������q���驍������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jJQUo���ok�����s}������s���󼒌�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jEUWt�ߩyp�����q������������ћ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������l=WZr��ƅv��{~�n~�������u���࢐�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������iJRYq���v��v}~y�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������kNTWp���|��~��z�������������ɒ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hRWWp���Ӆ��~��{��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cSYXs�������y����������������騐����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cSZYt����zeru[u�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������fSZYq����{w��lr�������������˕����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������eQ[Zm���󷋄���s|�������������ۣ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������aM[[k���������s��������������崓���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������dLZ]r����ɜ�v��q���������������Ę���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������eN\\p����נ����x���������������ʙ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������dO][o����楒�������������������٠���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������aN[[q�����������������������譓��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`NZZu����𯑀������������������𼖒�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`OZZw�����}��|����������������͜��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������_QZYv����򹔅��x����������������ߣ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������]PZXu����򻖏��y��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`MYZ{����������}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������_OY[|����������z�����������������Л�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������^PZ\}����������{�����������������墝����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������\RZ]~�����������������������������������������������������������������������������������������������������������Ƿ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ZTY\�����ś�������������������������������������������������������������������������������������������������������ԧ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XVX\�����ƚ���|������������������͜�������������������������������������������������������������������������������೤������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������WXY]�����ƚ���s������������������ݨ��������������������������������������������������������������������������������ȫ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������WYY]������Ǜ���o������������������汖�������������������������������������������������������������������������������ׯ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������UXY]������Ƙ���u�����������������������������������������������������������������������������������������������������ɦ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȗ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������TWY\������Ř���u�������������������Ĝ��������������������������������������������������������������������������������۰����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ș�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������RVX[�����������w�������������������ќ��������������������������������������������������������������������������������뿪���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ș�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������PVX[������ʗ���~�������������������ݢ���������������������������������������������������������������������������������Э���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ț�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������NUY[������Ù�����������������������鬜��������������������������������������������������������������������������������Ấ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ț�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������LUZ\������Ɲ����������������������󺙜��������������������������������������������������������������������������������Щ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ț�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������LU[]������ʃ\v}ax�������������������ʚ���������������������������������������������������������������������������������㶬�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ț�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������KV\^�������no��ai�������������������נ����������������������������������������������������������������������������������²�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ȟ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������NXZa�������y���mt�������������������⠥���������������������������������������������������������������������������������ଦ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˞�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������OWY`������ě���~~�������������������魢���������������������������������������������������������������������������������⽮������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˞�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������PWXa������ɟ���t���������������������������������������������������������������������������������������������������������ϰ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˞������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������PWYb������ș���t���������������������џ����������������������������������������������������������������������������������ٱ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˞������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|OXZc������ʞ�������������������������ߡ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˟������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zPX[d������Λ�������������������������騞����������������������������������������������������������������������������������צ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ˠ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yRXYc������Қ�������������������������ﱟ����������������������������������������������������������������������������������߷�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ˡ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yTXXb������ϟ�����������������������񺢠����������������������������������������������������������������������������������ϰ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ˡ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uVUVe������Р�������������������������å�����������������������������������������������������������������������������������齦���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������΢������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vMUYd������͞��������������������������ˤ�����������������������������������������������������������������������������������ɬ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Σ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uRZZd������ӟ��������������������������դ����������������������������������������������������������������������������������d�ҳ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Σ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tSQN`������і���x����������������������ԥ����������������������������������������������������������������������������������gsʼ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������΢������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qRPI]������՞��������������������������������������������������������������������������������������������������������������sW�ǭ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������΢������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oQYS`������Ϣ��������������������������������������������������������������������������������������������������������������Q�ʬ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ρ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pKWTb������С���������������������������������������������������������������������������������������������������������������Sf����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������΢������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qSYR_������ʐ���{�����������������������������������������������������������������������������������������������������������RW�ɰ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Σ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������nSU[d������țwwn������������������������������������������������������������������������������������������������������������V�߱�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѣ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mQU\f������͞���~������������������������������������������������������������������������������������������������������������m^�ï������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѣ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mMRZc������Ҧ�����������������������������������������������������������������������������������������������������������������T�ʲ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѥ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mJRYa������ӣ���n�������������������������������������������������������������������������������������������������������������vvֺ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѯ�������ż��������������������������������������������������������������������������������������������������������ľ�����������������������������������������������������������������������������������������������������������������������������������������������������������mNX\b������қ���Xv�������������������������������������������������������������������������������������������������������������Q�ĳ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѽ��������ľ���������������������������������������������������������������������������������������Ŀ����������������ż���������������������������������������������������������������������������������������������������������������������������������������������������������lO[_d������ѡ���[p�������������������������������������������������������������������������������������������������������������h�ɹ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĺ�������������������������������������������������������������������������������������������������������ƺ��������������������������������������������������������������������������������������������������������������������������������������������������������jM\_c������ө���_n�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĺ�������������������������������������������������������������������������������������������������������Ź�������������������������������������������������������������������������������������������������������������������������������������������������������iL\_b������֤���Wm�������������������������������������������������������������������������������������������������������������oY�ǽ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������dPX\i������ܑx��N}����������������������������������������������������������������������������������������������������������pQO*N��׼��ñ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѿ���������������������������û������������������������������������������������������������������������������������������������������������������������������������������������������bR[^k�������rWcf?{��������������������������������������������������������������������������������������������������������jMHP>5UO����Ÿ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ�������������������������������������������������������������������������¾õ����������������������������Ƚ��������������������������������������������������������������������������������������������������������������������������������������������������_S\`l�������cAx�T��������������������������������������������������������������������������������������������������������p=HB0C6@ECq��������ſ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ÿ�������������������������������������������������������������������������ұ��������������������������������ʼ������������������������������������������������������������������������������������������������������������������������������������������������]P[^j�������fN�呧��������������������������������������������������������������������������������������������������������C8LJK7MFM}j_���������¾���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ������������������������������������������������������������������������������������ݼ���������������������������������̼�����������������������������������������������������������������������������������������������������������������������������������������������\NY]i�������cA������������������������������������������������������������������������������������������������������������oTNN�pS����������������ƽ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ּ�������������������������������������������������������������������������������������������޸���������������������������������ķ�������������������������������������������������������������������������������������������������������������������Ÿ�������������������������ZO[^i�������fA��s������������������������������������������������������������������������������������������������������������}�떞��������������������ý��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ַ��������������������������������������������������������������������������������������������ֲ��������������������������������½����������������������������������������������������������������������������������������������������������������������Ǵ����������������������XR]`i�������pfEkb��������������������������������������������������������������������������������������������������������������Ɵ�����·�����������������ÿ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ַ���������������������������������������������������������������������������������������������ɱ������������������������������ƿ½�����������������������������������������������������������������������������������������������������������������������ž��������������������WT_`i������ו�puv��������������������������������������������������������������������������������������������������������������s��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������֯���������������������������������������������������������������������������������������������彬�����������������������������������������������������������������������������������������������������������������Ǽ��������������������������������������������Ƽ�������������PW\ch������ђ���e{������������������������������������������������������������������������������������������������������������n��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٯ����������������������������������������������������������������������������������������������ݷ������������������������������������������������������������������������������������������������������������������ɼ��������������������������������������������ķ������������QX\bi���а��[a��PE��k|��������������������������������������������������������������������������������������������������������~hz�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�����������������������������������������������������������������������������������������������ѭ������������������������������������������������������������������������������������������������������������������ķ�������������������������������������������ɼ������������PZ]af�������u���"!vpV��������������������������������������������������������������������������������������������������������f���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������½�������������������������ɾ������������QV\em������ۈ���DF��z�������������������������������������������������������������������������������������������������������ؓa��ľ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������٭������������������������������������������������������������������������������������������������߫���������������������������������������������������������������������������������������������������������������ý�������������������������������¿������������ƽ������������V\^`f�����οy}yj@>��n�������������������������������������������������������������������������������������������������������Ɋ_���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�������������������������������������������������������������������������������������������������Ӱ�������������������������������������������������������������������������������������������������������������ſ������������������������������������������������������������WU`hj���ěp�hns=#*KZX�����������������������������������¿����������������������������������������������������������������ؽ�^���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�����������������������������������������������������������������������������������������������»߾���������������������������������������������������������������������������������������������������������������������������������������������������������������������������VY`fi������rdd%%/3z�������������������������������������û���������������������������������¾���������������������������Ǹ�`�����½¼������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ź�����������������������������������������������������������Va[`p������ހ�z*!$&?�����������������������������������������������������������ͼ����������������������������ǻ����������Ծ��b�����»��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٯ���������������������������������������������������������������������������������������������������Ѷ������������������������������������������������������������������������������������������������������������þ���������¼������������������������������������������������V]`am�����ôhgN102199������������������������������������������������������������������������������������������������������Ľ�b���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܯ��������������������������������������������������������������������������������������������������tϼ������������������������������������������������������������������������������������������������������������¿�����������������������������������������������������������W\acp���ߠ�vDFE7/0>9:�����������������������������������������ɿ�����������������������������������������������ſ�������������b������¿�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܮ��������������������������������������������������������������������������������������������������g�޴��������������������������������������������������������Ŀ����þ������������������������������������������������������¿������������������������������������������������WZ`bs������ԓ���L����������������������������������������������������������������������������������������������ſ�����������Ȑf���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܮ���������������������������������������������������������������������������������������������������^�ҹ�������������������������������������������������������������ľ��������������������������������������������������������������������������������������������������������WZ_`s������֞���N����������������������������������������������������������������������������������������������žǿ���������̒i���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܯ���������������������������������������������������������������������������������������������������ns�ͳ��������������������������������������������¿������������������������������������������������������������������������������������������������������������������������X[__t������֜���f����������������������������������������������������������������������������������������������ſ�����������ʑj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܯ����������������������������������������������������������������������������������������������������r�׻����������������������������������������������������������������������ż������������������þ��������������������������������������������������������������������������YZ__y������❟��bu����������������������������������������������������������������������������������������������������ʽ����ɑk���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܮ�����������������������������������������������������������������������������������������������������x�Ƚ����������������������������������������������������������������������ľ�������������������þ������������������������������������������������������������������������YY^^|������漴�������������������������������������������������������������������������������������������������������ѿ�����ɑi���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܮ��������������������������������������������������������������������������������������������������������Ķ�����������������������������������������þ�����������������������������������������������ǿ�����������������������������������������������������������������������}W\^\z������������������������������������������ÿ���������������������������������������������������������������������������ȏe���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܯ���������������������������������������������������������������������������������������������������������μ��������������������������Ȼ�����������¿������������������������������������������������¾�����������������������������������������������������������������������yX[\_y����������������������������������������������������������Ľ��������������������������������������������������ٿ�ƽ����Ñi���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߯������������������������������������������������������������������������������������������������������Me����������������������������������������������������������������������ÿ�������������������û������������������������������������������������������������������������xW\_`x�������ĸ�����������������������������������������������������������������������������������������������������ʼ�������Ēi���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߰�����������������������������������������������������������������������������������������������������VLEU�η��������������������������������������������������������������������������������������ɸ�������������������������������������������������������������������������uV]aau������徰�����ÿ�������������������������������������������������������������������������������������������������������ēj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱�����������������������������������������������������������������������������������������������������DGYOe���������������������������������������������������������������������������������������⼶�������������������������������������������������������������������������sV]cbs�����������z������������������������������������������������������������������������������������������������ѿ�Ľ������Ĕj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱�����������������������������������������������������������������������������������������������������{f�b����������������������������������������������������������������������������������������β��������������������������������������������������������������������������rW]caq������盇��k��½�������������������������������������������½����������������������������������������������������������ŕj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⼰�����������ÿ�������������������������������������������������������������qX\b`q������趨�����ý�����������������������������������ſ������������������������������������������������������������������ǖi���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱������������������������������������������������������������������������������������������������������ۺ����������������������������������������������������������������������������������������Ѷ���������������������������������������������������������������������������pX\``s������Ḫ�������������������������������������������������¿�����������������������������������������������������������ʗh}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱��������������������������������������������������������������������������������������������������������¼�������������������������������������������������������������������������������������伵���������������������������������������������������������������������������oX\`_t������빩����������������������������������������������¾��¼����������������������������������������������������������̘g{��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߰��������������������������������������������������������������������������������������������������������򸵹�����������������������������������������������������������������������������������ĺ���������������¿�����������������������������������������������������������k[^da|������峓���������������������������������������������������ļ��»�����������������������������������������������������Ԛnp��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⰰ�������������������������������������������������������������������������������������������������������𷵹����������������������������������������������������������������������������������߿�����������������������������������������������������������������������������jZ^db~������賤���������������������������������������������������Ǿ��¼�����������������������������������������������������ԛno��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⱱ����������������������������������������������������������������������������������������������������ޫ������������������������������������������������������������������������������������ȹ�����������������������������������������������������������������������������hY]dc�������谪��������������������������������������������������������������������������������������������������������������Қmn��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⲳ�������������������������������������������������������������������������������������������������������칸����������������������������������������������������������������������������������ڸ������������������������������������������������������������������������������gY]dc�������괩��������������������������������������������������������������������������������������������������������������Ιkl��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⳳ���������������������������������������������������������������������������������������������������ꕈ�껺����������������������������������������������������������������������������������ȴ������������������������������������������������������������������������������fZ^cb�������汘�����������������������������������������¿�������������������������������������������������������������������̚kl��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⴴ���������������������������������������������������������������������������������������������������Ȁ��绽���������������������������������������������������������������������������������ܿ�������������������������������������������������������������������������������eZ^cb�������紓�����������������������������������������������ƾ�������������������������������������������������������������˜ll��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⴴ���������������������������������������������������������������������������������������������������x��彿���������������������������������������������������������������������������������Ȼ�������������������������������������������������������������������������������dY^bb���������������������������������������������������������ù�����ž������������������������������������������������Ľ����ɝkk��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⴴ����������������������������������������������������������������������������������������������������v������������������������������������������������������������������������������������ḹ�������������������������������������������������������������������������������cX]bb�������橡�����������������������������������������������·�����»������������������������������������������������������Ȝji��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⳳ����������������������������������������������������������������������������������������������������d��߾����������������������������������������������������������������������ǿ����ǻ��ڽ���������������������������������������������������������������������������������[]__b�������롂��������������������������������������������������������������������������������������������������������������ʡmi��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⲳ�������������������������������������������������������������������������������������������������ɶ�d��ݾ������������������¾��������������������������������¿������������������������ǯܻ���������������������������������������������������������������������������������bU^^b��������|��������������������������������������������������������������������������������������������������������������ɠlh��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⱱ������������������������������������������������������������������������������������������������ں��`��۽��������������������������������Ƽ�������¿�����������������������������}ly�~oR`�ô��������������������������������������������������������������������������������iY]^a�������﵎��������������������������������������������������������������������������������������������������������������ɠlg��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⰰ����������������������������������������������������������������������������������������������������X��׽������������ǿ����������������������ƶ��¿���������������������������ž������aZy��Ľ�������������������������������������������������������������������������������\\^^b�������䭓��������������������������������������������������������������������������������������������������������������ȡlf��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⯯�����������������������������������������������������������������������������������������������˷�®T��ҽ��������������ź�������¾��������������¼��������������������������������������ƙ����������������������������������������������������������������������������������T\_`c�������첡�������������������������������������������������������������ż�����������������������������������������������ơh_��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⯯����������������������������������������������������������������������������������������������庻���W��ͽ������������������������¿���º��˵�����������������������������������������Ĕ�ܹ����������������������������������������������������������������������������������[Z_`c�������뫙�������������������������������������������������������������Ǽ������������������������������������������¿���ácY��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⮯����������������������������������������������������������������������������������������������ʴ����[��ʾ��������������������������������¼����������������������������������������������ǜ�׾������������������������������������������������������������������������������ZV^_b�������觏�������������������������������������������������������������ƻ������������������������������������������¾���£e[��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⮮���������������������������������������������������������������������������������������������۶�����\��Ⱦ���������������������ƽ�������ѽ����������������������������������������������{�����ü�����������������������������������������������������������������������������WY\]a�������貚�������������������������������������������������������������Ź������������������������������������������½���§jc��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⱱ���������������������������������������������������������������������������������������������²�����d��ǽ������������������������»��ۻ������������������������������������������������u�����ҵ�����������������������������������������������������������������������������VY[^`�������嵤�������������������������������������������������������������Ƹ������������������������������������������¾���ãqe������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߰���������������������������������������������������������������������������������������������չ������i��ǽ���������������������������ͽ�������������������������������������������������m������Ÿ����������������������������������������������������������������������������TX[_b�����������������������������������������������������������������������Ŵ��������������������������;��������������ÿ���ĥre����������������������������������ӭZPm�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱����������������������������������������������������������������������������������������������������zk��ƽ�����������������������������������������������������������������������������������۷����������������������������������������������������������������������������RW[ae��������������������������������������������������������������������ų��������������������������վ�������������������ħrd����������������������������������͵�gLb����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߲�������������������������������������������������������������������������������������������޴�������lm��Ž����������������������Ĩdqɯ�������������������������������������������������¾������ƺ���������������������������������������������������������������������������RX[af�������쮝��������������������������������������������������������������������������������������������������������������ĩrd������������������������������������̤a�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߲�������������������������������������������������������������������������������������������ϴ�������bu��ļ��������������������������Ӻ�����������������������������������������������˷|�̽�����ٻ���������������������������������������������������������Ķ����������������SZ[af������������������������������������������������������������������������������������������������������������������������©rd������������������������������������˺�ε��ξ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱����������������������������������������������������������������������������������������������������[~��¼��������������������������꽶���������������������������������������������з�~�۾����������������������������������������������������������������Ƶ����������������T\[ae�������������������������������������������������������������������������������������������������Ŀ�����������������������qe������������������������������������зѽ����ͯ¿�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߲����������������������������������������������������������������������������������������������������U��������������������������������ƾ������������������������������������������������~�߽������к��������������������������������������������������������ȳ����������������T[]ad�������밟������������������������������������������������������������������������������������ÿ��������������������������rf������������������������������������λy������̩���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߳����������������¸������������������¾��������������������������������������������������������������R��������������������������������غ�������������������������������������������ླྀĳ}�Ҿ������޿��������������������������������������������������������ɲ����������������S[^ad�������䐀������������������������������������������������������������������������������������Ŀ��������������������������sg������������������������������������Ҽw������̪����������������������������������������������������������������������̵���������������������������������������������������������������������������������������������������������߯�Ľ�������������ɰ������������������Ƹ����������������������ų��������������������������������������S��꿽������������������������������������������������������������������������ż���z�ؽ�������ӷ�������������������������������������������������������ǵ����������������\Y^`g�������������������������������������������������������������������������������������������ƽ�����������������¿�¿����up������������������ſ����������������ӽx�������ō���������������������������������������������������������������������Ǭ���������������������������������������������������������������������������������������������������������ݼ���������������ͻ�����������������®������������������������Ų�����������ư���ҳ��������������������O��������������������������������ۻ������������������������������������������Ӵ����~�Ի�������Ḷ������������������������������������������������������¹����������������[Y]`h����������������������������������������������������������������������}��ۿ��������������������ï����������������¿�������un�������������������������������������ڄ�������͛���������������������������������������������������������������������ra^c�������������������������������������������������������������������������������������������������������ݾŵ�������������г�����������ú������������������������������ź�����������ļ��˶���������������������L��彺���������������������������߽����������������������Ⱦ���������ø�·���׾�����}�ּ��������°������������������������������������������������������������������������YY]`j���������������������������������������������������������������������������ɿ�������������������ù�iz���������������������ul��������������������������������������|�������ү��������������������������������������������������������������������ܩ�g��Ϻ�����Լ���������������������������������������������������������������������������������������������ݱƯ������������ȿ�������������ɬ������������������������������Ⱦ�������ȳ���ʿ�����������������������K��㽺���������ǽ���������ƺó���ཱ���������������������ǹ����������κ�����´�����}�ع��������̱�����������������������������������������������������������������������}XZ]`k�������𨚣�������������������������������������������������������ɾ���������ļ������������������ů�����������������������tj��������������������ʼ����������������������̾�������������������������������������������������������������ʽ������ݫ���ض�����ҵ���������������������������������������������������������������������������������������������ݧı�������������̾��������ĳ��ȸ�Ǻ����������������������������ž����̽ȼ���Ĵ����������¾�����������O��ຸ���������ʮ�ж��������ȱ���⼷��������������������ȹ���������������������������պ��������Ʒ���������������������ƺ������������������������������������������������}X\^`j�������񧔬������������������������������������������������������ƻ������������Ļ������������������ŵ�������ƾ������������rj�������������������ķ���������������ž�ͤ������ĵ������������������������������������������������������������Ǒ������ӵ���ټ�����δ���������������������������������������������������������������������������������������������ݫ���������������įӴ������º�����ñ�����������������Ĺ�����������������»���ҵ�������¾��ľ�����ƾ���U��ี��������Ӽ������������ß���仸�����ô������������Ƕ������˿��������Xz³����������ƺ��Ư�������������������������Ŵ������������������������������������������������~X[^`j���������������������������������������������������������������ÿ��������������п�����������������У�Ž�����ڽ������������qk����ǿ��¿���������ǫ����������������Ç�˪�����Ů��������������������������������������������������������������g������ö���������ϼ���������������������������������������������������������������������������������������������ݿ�����������������ի������þ�����Ÿ�����������������ķ������������׷����Ŷ��͵��������û¿������ú���[��㸳���������´���Ž������ѝ���߷��Ľ���´�����������������������������v�˰�������������ɽ����������������������������������������������������������������������������XZ]_i�������뫞�������������������������������ø����������������������������µ�������»����������������ך�¿�������������������ol����ȿ��������������ţ���������������Ď�͙�����ȳ�������������������������������������������������������������ɱ�����ɼ���̾��þ�Ʒ����������������������������������������������������������������������������������������������ׯ�������������ǻ�Ͳ�������������±��������������������������î���׳���������÷��������϶�����������~]��޴�����������������������Ӛ��ݻ��ξ�����������������±���̻����������k��ó�������غ���ȭ���������������������������������������������������������������¼������������WY\^i�����������������������������������������ź����������������������������õ�������������������������Ǡ�Ƽ�������������������nn��������������������Ƒ�����������ÿ�̿��Ж�����ǰ�������������������������������������������������������������ʻ��������������ƽ�ʻ����������������������������������������������������������������������������������������������ˬ�������������͸���������­��������������������������������������۱�����ǲ�Ŵ�����Ʒ��ó�����������ye��ظ��������ֺ���Ȱ�����º�Ǥ��ؼ�������������������·�����θ����������b��ν�������ٽ���β����������������������������������������������������������y����������������ÃWU^_f�������힞�������������������������������Ⱥ����������������������������ɳ�������������������������Ɲ�ɻ�������������������rk�������������������ż����������¿����ň�Ԟ�����Ű�������������������������������������������������������������͸�����������Ȓ�����ǫ���������������������������������������������������������������������������������������������ylionnqptsqxxvv��r|p{uw{~}��z���|}{xwxxwvvvvvwwwwuvz~}zxxyz~���������������������������������������tg��޷���������ư�ɶ����»������۴ϵ�������������������ʼ����Ĺ�����������b��Ӻ�������׼���Ŵ�������������������������������������������������������¸���������ö��������|UX^ah�������ꪛ�������������������������������������������������������������˵�����������������������û���˼�������������������qj�����������������¾�����¿������������ӱ�����ǲ�������������������������������������������������������������ͺ�����н����̓�����Ǫ���������������������������������������������������������������������������������������������__b`ZZ[WWVQY^Y^�mTXW^YV[\T]Zo~d^^[[_^ZY]ZZZYZZ[\ZXXZ\\[[[[ZXYZYXXYZY]V`_��T�w]bcffdpgiiwlikprs|���~~^i��˘������������Ĭ������ȮƸ��ݺκ�������������������˼���˾������������y��ҹ����|��ۻ���¶������������������������������������������������������´���ɲ�����¾��������sSY^cj�������ﱙ�������������������������������������������������������������̶����������������������������ͼ�������������������pj����������������������������������������ճͽ���¯�������������������������������������������������������������ͺ�����ͻ����Г����� ���������������������������������������������������������������������������������������������XYZWVWUOJLHIORP}YSXURUQPXQUQ��XYYUUYXSRUVVUUUVWW[ZYYZ[[\^^]ZVUVWX]Z^[T_c��Q�nYXVYYV\UYX_\YXYXW\d_\Y^No���nggeghgx�}zotwvwy}}�����p�Ȓ���~����������������������������������������¥���v��ѭ����������������������������������������������������������������̮���������������mQY]ck�����������������������������������������������������������������������̵����������������������������Ͻ�������������������pi�������������������þ����ľ��¿������È�ڲͿ���Ĵ�������������������������������������������������������������ι�����̿����Ӗ����ξ����������������������������������������������������������������������������������������������XWTRRRPPNOROMVNTHLRPLSRNPQRD��LYYWWWWVVWTTSSTTUUVVUUVWWVPRTTRRUXV[^^YY[`�~Ph^ZXZXYWZVZY[WTSTSQRTLPTXKz���f\\WXYVc�b^ZYVWZa]_b_iZtfT��dcZYWzmRYVZPONNNNPQPTUUOVVR��vPR\UUUVVUXW��_��fd[aZ��bei�plnquz�������������������������������������������������������ζ���������������kQW\bj�������𢔚������������������������������������������������������������˴����������������������������н�������������������qi����ǿ�������������������������������Ō�ݳ�����Ļ�������������������������������������������������������������θ������ŕ���֜����������������������������������������������������������������������������������������������������MOLIHDFNTQZYQTOIQMMPMNNMIHI;�}CHHIHGHKLLRRRSSSSSRRQQRTSQSTUVTSTWVUZVUZST��XTVXUVRSRSQRQQWUSQNIFDKDEEC���`Z\YZ[Yd�ZWPPUUUYUSXUURUTT��bhogq��dfdjjhffffgigb`dced`��t[ah^e`ca^_X�vY��a\VXX��Z^_�iTTTSPOOPTVXZ]ckq��������������������������������������������Ϻ���������������kQU\`g��������LU=����������������������������������������������������������̵����������������������������Ѽ�������������������qg|���Ǿ�����������������¿������������Č�ܬ�Ǻ�����������������������������������������������������������������θ������Ɠ���զ�����Ţ���������������������������������������������������������������������������������������������PWVUXXZcjgnnjhebcZXZXQUTWQPH�xLHLLKKLMOONOPPQPPPRQPPSUTPRONMLJJLRNPLOOMM{xNIKIIEIIJJJIHHMQUZ_dhkbRRGD���SNNKKLMe�RNHEMMTQSQPQOVWYR��^co�������������������������������������{��������}d�쭒�������zqia]WUQOMMNNJS_�qx{������������������������������������ϸ���������������jRV^_e��������zs����������������������������������������������������������͵����������������������������ѻ�������������������pcw���Ǽ��������������������������������ݨ������ǜ������������������������������������������������������������̵���������Ю���ſǤ���������������������������������������������������������������������������������������������pwtottprrssqvtnnlknljenlb\\N{`QVRPOPOMMONOQRRQQPMLJIMPOJJFCDFGIKMKKKOJTW��X_`]hdiilnqprtx{~~~�����pV���a]^_dinz�ppxorw|t��wvnu|~k��������}������������������������������������������_�꫖��������������~zwrkfa]X�]LT[ROTY]dhtcu�������������������������ҺŬ�������������hRW_^d��������| !l����������������������������������������������������������͵����������������������������к�������������������n`r���ƻ�����������������������������������ֿ����Ş������������������������������������������������������������˱�����ȼ����˱��ƿ�ǣ��������������������������������������������������������������������������������������������؂����~{yywvxzxtqovrqlmtj`WOLLMNOOOMIHJJHKKKJGEEEDDBFT\[YZ\_bejptzxxz~|y�����}�~~�����������������Y����{�~~{���~��������������xѸ��������������������}||{yy��������������ë�������b�뭆��������������������}ywqd_dhcfaaYT^[Xeuz{����������������������ȭġ�������������cTU\]g��������~!p����������������������������������������������������������ɼ����������������������������Ժ�������������������n\j�����������������������������������������ƿ���¯��ż�������ľż�����½�����¾�������������������������������ȵ�����������Ȫ��ɕ������������������������������������������������������������������������������������������������z{{||||{xvttvwxyw{y|{wq\GIKID@CINNKGEFFDDDDDEHKN\beflrw||}~~}}�{xz��������|�}�������������������~Z��򻎋����yiwilzz~|~���������{Թ������������������������~up�wv|}�������~����������e�䧌��{������������������������������~��q}��~}�{�������������������������������������[OW]^g��������u%!k����������������������������������������������������������ʸ����������������������������ӹ�������������������t`o���¾��½������������Ŷ��������ý������ޫ�þ�����y������¿����þ����ÿ��������������������»�����������������ǰ���������俶����pv}f~��������������������������������������������������������������������������������������������||||}}}}{|~~}|{z}~ynZJLPQOIFEHHHFGJKKVX[`gnuy}��~xw|������~}}~}}��������������������������������{V��쮊������|u{�z|z}}}}}~��~{{׷����������~}{{|��������}yz}~�����}~�������|`�쬄|����������������������������������n�~}~y��}���������������������������xz~��QOXX\g��������kNXImpqqoopppnnnopnjhfhegfchhiegtiuuptqu}����������������������ί����������������������������֮�������������������nWg��詙�����������~�|������������������{�ץҲ������t�����������������������������¼���������»�����������������Ư��Ⱦ�����ђ�wlrqtque{}~�~���������������������������������������������������������������������������������������}|{zxvuvuxz{|{xuwwwvsstnopqssojfghiknruvuwy|������}ww|~��������{~����{v�~t�����������������������|[��񲎌�������~�~w}~}{zz|~�}�᷇���}zy|��}}{xwy�~|{}�����������~~y|Ș�����{b�⤈||vy������������������������~~���~d{������������������������w����}~}|{|}~wxuqtQTX^`g��������eXbZjccbabba_^adda^_`_`\_^Z]\\WXaX^_\ZZailr������������������~��������������������������q����ʍ�����~|{||}�����~sgUh��蝄zw{{{|||{z����~��}}��|{zz}|{���o�ؖŐ������g��������������������������������������������������������������®����o�ƃ~sk|tunl_qru|{�}|��������¾����������������������������������������������½��������������������������ŀ�~zyzsrqquxxvpptrlmqssvyxutuvvvwxz{|}zz{{{{zy~{}|wx~{}~~}||}~����~}~~}������������������������u[����|z{z{|}yy��}{yxyz}~�y�箇�}xzzyxzzyzz{yxx{~yww{�����~����|�xҔ~}z}~�|i�ܜvk}�}}~~~~�������������������������m|���������������������|��}����{}|zyz{}~���xORZU^k��������bV[^hbabdffdbddbaabehghde_W]c`YX`XXXYXW\`^als���������������{�����������������������|mdz}l��ysyxyvsmkjlnopqrqstmk^m��뜁~zurrrqqsuv����v��{}��~~~��~���o�ؘǉ�}{���no�|��}�|~|~�|��{�|vu���������������������������������������tmez�n�͌�|m�qkni]jlq}}ywz{{���������������������������������������������������������������������������������ooqttqruxwusttpjaamqprqrxxwuvxxxvvvxxvuvuttuxz||}��tsz{|}~~}}~�������������������������������j\��驪����}�|{|{{~}vzzywwxz||r�㬆}ytwywvwvswz|}{ywvvqoswzzz{~|{}|{~z}v�sz��w{{~ztcwm_y�}}����~|{{|}~|zyyxstxxttvszw~~}�yzv|�~���~�~�����}}���~~~~���}}{||}}|{{~{~�o75CO_o������߻kglllcceilljihecehhb]W^ch^QW_]WV\XUUYYX\acgjhqsw{~�����������h������������~~~~}yvyyleuxm��tnqruppsnigilnpqrtupi`g���{wrlxvrmjmsxrjpyt��v|�~}���������r�֗Ə������ni�����������{|�����~|~rx�~�|��~���������������������������}w��pke~�o�ҋ�pYokpkf]ekoy~vrw}}xtw�����������������������������������������������~yuuv}��������������������������popsqoptuwxusqjbUVeklqqvwusswywrsrtwwustusssuvwwwt|~rqyytvxyyz{|}���~~���~�����������������������pl��נ�����}}}}~{xoomid`_`cau��|wzruvssuvsvwxxwtpnuqptwvojnqpqtnhlnljtln�n_efhf_YMD@BUebf_]\ZWSQPPRSRNLMNNORY]]`c``_jigkjmkhchloupihppqsmnnprrrqrspqsu|rtxxy{|{ywuzvuze407N[[jx������oYQZgllnrtrqqrrswyumg_cgpk_aecaacb`acdekot{}wuwxyyz{}|�������g������������~qx�}{ywqpe]hof�Ă|snsmllhc^\^`afghlladf���}xsmhiifcdjptqupi�s{{|yw}������y�ܞɛ������zi�����������wz��������}����������������������������������{vp��qkb{�t�ԋ|rXkikkh_cmnuyyxustzvx�������������������������������������������xnsuwxx{��������������������������xutsojkorvwvwz{xmhogZXYdemttrrtuooquurppijlptwwvyrwzpototvxyyxyy~~}}�����||}{~�����������������{s���wgj_beehhikfb^Z`^]YQF=98235<ES\cbhllmnmijheca`^]ND<>IT[^befkpf^biciqrpfkjnhgedhehZST^]miklkkihhhlmkhgilnijnompqldcpokpnsuuttrx�x{x}xvwoqsrtx|{uopv}rs{}~}~~~|{yqqntr_c``qopio|����^9/Catsuwwtrstw{{yy|{qhorlmmoqsoqrtrsx}{y~�|z{|}}~����������p~��wndt��������nz��������u{�r��|�zw�zxvy{yuojgqpmopbmn���unha\dlonnqutx~{|�av�zx��������xݕ���������i��������������������������������������z��}�����|������}{yr��nj\nvm�ՋulXqqkki__lkp}{wsqsutr}yy||}������������������������������������zttvvurvurty{y�����������������������wy{ztmlppqqqswzz{~{siaYRS^jnopqrvvzzstwqjpstw|~}~~~��wlvuxxsu}���}||{zzsuqrwoowlpywtusuvy{yvvy{usspkrsuwwtnjigfjqwxwopsutqprliffilljlikoleflfc`aeikkiryzz���~|y|�y����sw}xx|}vuxpxrrqvpqwspuuru|glyrpvsyyzzxwx{zxzvv{y{wx}|}|uxyysuxtxqy}~}wtxvy|}|{{{pwx}{����~zsy~vu}x~�������x:#P_�����z��u�zzu}����~}~|dwl~{sVeo{iwy�{wx��~|vqu|�������y�i��z|j~z��~~fher������{}���{���~������������������������}�~��������������}gby���~�yyz��v}՞���������d�������������~��������|~�����������������{sz}����|zvvxrg��pgXhog}ԇuqPnonkmeTlhnx{uignmkpuuxwx{z{zw}�������sty|����������������yssutsuuuxtosoqwswu|~~zy������������������wz}|wssvoponqtuswxz{}}ytfoy}}}~}}ywx{~{siov{~~{xvy|~{uomorxxuy��{wvvusonuqprnstns}|zyurpqvxvsrmmjgeghiccdfhjkkkkklnnljolloqnjhomlmpsrqqrrswzwr|����������������������y�������}z{}~��{y�����������|}{y}�~{��qz��~xqlpnrpptrvxtuz}~}yqprpu{y���zy}}yxy}{trw}{{turyxyryzvuvwx}wx�������|M*Jn��}������}u�{{~{~��h~��~xj{��|utvsx�{{|}}~����������Rq������~����w~z|�tpx{�����{uv{�}|��zx}����������~��~v}�������}����������������yyy��}~���~}�����������o����������������������xn�����������������������������������~��rrkx~spՐytVkjgke^Wofdfj{yiirsphs~{sqt{|xtuz~~}yoilqqqs|���~~|vz���~wtqqsuuuxstnmqotvy|�~wvvw~��������������y}�||�zywvwyws���������~xrouvsnljffmmnrssrssrstpjkqruxvrtwtwuqosxxvssxtqrpuxstwutroonprrnjhfdcefeeegd`\Z\`cdfhjkkjjqruz}}{{������������������������������������������wx|~t}~~|}��������|}�����zyy|pjuvs}u{}yuvyy{wzxvvu{|}�}~�}|z}~|���|yy|~�~~}zw}~y{x����youxz�}~xx�{���xwb3@t���������w�}zwuuvz��}k|{{{}}���v~����}|x|�������������C_{{|w�{x~�~�~��������������������������������~����|��������|��������������������������������������{u~��������������������{i~��������������������������������������|u��|u����gs{~qjf^mjmlyoozygbquvututqkyywssuvtqrporusnsvruy|xzwvy}}zyqmnouxvrqtqprpssruz}{yyyuw|~����������{~|{��}zxyzvp{}�wnotwwvsnic`musnmjkr|vrststxurqpolmphgknlnrswtpnqvupwsqnnommkf`][]_^_dhieabdd_^eloprosx{zwsryz}�����������������������������������������������������ziq}~�}z�����������}~|}{�~~}{zwvd^q|y{qv{|zxy{~z}{xww|}{~{{�yyx{zw�||{yw{��t{��zux}u{|~vwuyu|~���sjrtwy��z||zhrtLLz��������~x�~��w�y|�x����������y~z~���������|��{�����Vi{x|us���������|~������������������~{uqsx{{{tz}}yy{xz�|{��xkpsrrqzv����{zv}��~ttz�}rn��twyxx{�������{���{�����|u������{~{�x{~���~|}}qek{wty����~�������������������������������sx��������}��|z��ywz~��~��~|{{zy{ytuyzvry|��rnnsz|}��}y�wsrpruruttpmllnjpyzz}zzuqturvzv��������yyvstx~�vtqprrniilooh`agdegjmprsnpljonlqqoorspoptvvspnkghdiqpmpspponptqlfdaaa``]YXUTW[]`glqsstwz}ywx{}��������������������������������������������������������������|wvy}l_munnpu~�{w~����zz}uz����~~~}~zux���������}~�~���������}wniotptv{{x��{zwtwxr�|vw|�~{|��{|y{~vtuv|}uqosrw|uw}vp��pn������|oapkg|�|}����|��������������������~}~������y|�����v}����zx{ytmimyst��~zz�~}~~|yxuz{vqnickd_`dffekorppqsyzwyxz}z~{w}o`o||��~}�{�orvvxy}~x��{�����������������u�����������|}rrt���|}��~�vs��}~�����~||����������mox���~|�����|{�ziWZiu��|z��������|\KM`}�|yy||yz~������������l9$+>]svx~��~|}}{z{���~������tju{�y{~|y�����������tqonoqvzssqqstrnlkkmnljiihhknpsumnkjjiimklnqrqrsrvtkefgggbgomhfgbgjijmkfkpproijfquw|���������������������������������������������������������������������{{����~}}{wscfjv{y�����������}uyqx}}}�z|}����}�����yuqnmost{xtphghmruwtsvwtsni`TVfhrvs{��y}�|usy�zumpotrsunswqu{uyt{x|�z{xrz��srz������|��������zvvu|x|��wxv}}����������|~������tdivurv{zv�~�{|y|}�|��|v��{{}wwxz|zvs}y{��yrrllnonmnpjikmtupuzxzvtqmxzzr~vcljhkrjluouut{�~}�����sx�������~�{x�|zu�xxtz}�z|��������|���}��������������������~��yufYvvgr�q|�}|�wrz�������|kp|���������xx{}�yzy~�����||}}||}~~rtzsopiv���nix������|����������j]kt~����|�����������liinomnrwwwvxyxuvtrqssmgjgedcbbc_^addglolnmjilquttqmlosuutx||}~z������|w������������������}~{unsx{|}~~wz~��������������������������}��������������~}��ztmkloyzyvy}xmmvy|zw}}�����}wr{�}{}��}|��|��~y|�������}wz�~yrtxrmka]fcgnrttqvz����yyhn||opxt}���|{}~xopknlopprtrqqpfboos}tskft|ofggsssmjms{o|zv{mslklrjoqfagltvyuz}��~}�|vtv|||vmdhsy}���zz|�|�uqrt~{��{}|��umd~��������y{���}�|~������vsutytglrodgmjoxmoluxvznltxuz{y{sqifY_f���~llpzw|yoia_my{}}{z~�vwx{v�|{w|������~��|���������{}��~}z{���}z{|}}�zp_jhfuaUbxs~ybmy�����������wx�������~ab{�����|�����������������}q����������x������~s}�������������������{z~~��������dadnoigkuuusstsqnpokihc]_^_bdegkxkiqw}~v{~}wsvz|ytv��������}{��{���������������������������������|y���}����������������������������~y������������{wwyyyyvsqpmkis�����v��������~zxzzxuuut{{vz~��~����|y|~yw}ywnen�wy{yx~zx|pnsv|}vqw���}pmgjw|uvvielpj`_kxw{{wxy�}}tqzzv{umvqoxmllgu~vvzzsppmmoorex{|tz��}zvotpumpnqtvz|yy|}{rrzq{|~����w`ryw|�����rem�t��{xv}uwspury|y|u�����������������}�}}�~|~w��sN\wz�zz~}ogq{yv�z}�����xzyzjmrusztxsqfomvtknrv~����������������y���������x{������~��v}|z~|w{��~yy{}}��|t���{yuin}�}xvyx}{u||zrr|��z}{|xsxxkr~x�~�rtz��~~}|��������t����������z�������~��}���������������������������������hYammlimesognqmpnorrsyzsx{�������xz��������~}{uI\���~���|���}�����������������������������������������������������������������y����~������}vvrw�~xz|xvx{|{{{�����{����������}pkryrspnvwq||{qqy{v��wwvzopqurkluyvxvnmrmionnonnosxpsluii`ira^gnzv�|u{~qQb|vyvty||}xqrmtt����r^u|{���y��~���|ssqg_Wn����rn|�}vwxqlo}u{����pmz{xxsuurw��wo[ipkiputVnfenfhnviXcptmugY\Wbfhcglgdfch{wxyth]^ekmjeirsnuy|yrkikopV]w{�~}yvvtx�xu�vmas}|qvojejoghsrmnmqwvv|���}}�������������������������������}��x~�������������������oeLOi�������z}q^o�����s���zx{w�|z}������|raPn�x}~����������������~kfdifhpld|zkb_z|}zww����������������������������c[dhhdchmsjbmz����������������������������������w_u��������������������������������������������������ww�����������~�������������~��~{����yywu\Q]p}����|yxy{�������|x�|~�~yvstqsvtswzyvwutusz�}umhkqvvtkoorknnty{|{xtsywrrnkpmqtx|v_Gqufdryr{v{}|tvptr|�|���sfr|��zrpx{xvvgfVkqpfO[r|ge[\ihchaabglihnnmeejfQQY`glsutvs|tjkluy|jfklqunfpl`dqpfdea\]cgfa^]Y`]Shc`UZ]a`jn\gxgmpqwlmohlusyuqoljmriiklkls|pjhqqoknilHLswqqlv{{}zrn`kwlefeqwrkiev|~wxwy~wow{�������������~}kgsigpqqx�{u{��������|������|}������������������������������{����no������������������������������������������������|�������~t_n�����������tnrootxz{{��|wiec��������}y�vyoos��z����������������������������~������rn����{{�z���������}}uq�~|�����������������������~���������|tz�{��zv����|u|���������|x|yxhfpv{|}��}yvrmhfingcemsrqswsw~���~�}vtuvrlunsvz~{yzvuvrlqztsrzyxqrtvz}~|{|�{�zv{yvw|�zppxvnruuqr|qljroginhajpo[gdafnpptfqml`N`_VWLQDI`ea`XYedZW_^^cjlq{mvoliv}{vtty}|z{{|ujtr{xjhjhacg`oigknolgnqqi^Z`gnhf\_f^dcjeb]ekw�y~�mz~~�vx|vvwowz���zz}����~y}�����yoly}q2*czz�zwx��xopz|umq{}~w}~~rx~�ymrusw{~}~}|{ww|�x�yuv}yuvtgnm�pqqu||{�|���}��z��~�����v����������x���|��������������|~����������������������������~�����yrz��}|����������������������l�����������������������������|�����������z���������������������������}��}xy�tmrzsmrxpu}z|~uw|t����~~�~������������}������������������������������������}xttowvu����x|{gjy|�~�������||����������{z������������������|u|urpw���wsw�yy�uwy�������}|~����~���|{vv{|z���t|�v|uT`yvwolljoi^\]]gjjaZ_gf`a`hjicbmfnfZc_cih[fgeq|xqrvutyyqjrek{rj|vvtvvpkkkgqupmepsvstvx}znfy��~������}lhu���{xwvzkdtyzrwtuqxzivzww{}xxvcRUklr|��|zy}}z|��~~�qik~�K?l}��}y�}wz�|qszuk`gnpughotsirvsy���ztz�zu|pbnzrllpstqgcqkhqurjghiiijrqq}�z�|x}yzv{������|�}���vrz�z����yf���|�vrv{|}�|����}��t~����������rt|��|]`o}ros��|u��������������|��}��xxpv`ay�����}}�������������s���������~p������������������}�|miz�vfmx~��{��|~~ww||x�|�w}ns�~vw��������zx|��~����urvysnoqzxtruzzx�������������������~||}�{}x}��y����nJVw�~�������}|��������������������������}yuf|�xstqmlx����w}|urw{zz}�������}��|y{zuopuwtqtxrnt������|��x{r\g|yxkgkomdeehljabdjkc]_icnnhrstrnmkcjfkr{ymjopmqpf^dmopvzuXFDfeju��wrtsuqgnnomgrrsspqvxs��uy�{puwrry���~��w{t}�xrm{x}�~��yz}ytyy`LQ7SrzjVRWlnoou~�����~|�z�ofrywuWelo|�yj{�yusry~knn�~zsjjcilknifogdp}{maP;E]inkhmileb[\aXdkfemsomisv{����~�|�}yxvxuz~~�������l�����������ku{�������������������qKJWp�����~������������������~��������������s������wpmp������zvttvz~�~�hUh����������{xl|��|����~}���������~����~~��������}�|xvy~uy����{u~z~��{z��|wyy}qhjrspruvxuruvplorssrqv|~{|�����������������pmnrz}���������{�������������mw�|�������{vuprtuwyzyqpsustwvy�|u{yutxxy������|������������������{x}��~��~|yzztm`sz�y�{�x���|���~��}{wurmnsux����k^��vywyqgqjuwwvqihr~wz~��{w{�vmn�olrilw{vx{sy{y�ttz����������{}zsmjossqu||u�|�|���������������n���rak������������������}�������������rmuukdsyy}zfQ_x�������nxuw~���~||tupqrnmtreSAHYhqoml^gk\6Bbt}�~y����z�������~r|��tyzw}���{����r_q��������z�����}oJJYiq~�{wzzx�w�{��z{~������}����������~neq���������wz{��������j}�����|x����}tz|}�{tokkosjjurbas���������omsnrz{xxvskeky�����||}y{��||�~}{tu}zyz{|x�{zyv}|wyqvrnonqzyxzwyz}|z|~~��}����������������������������|}yw}tvqov������x���~xslmslpsgpqsptqkovxv�{vtrqrtsuy�|z{}{}~}~}oj^^hfbksny~���������������|�n^b|��������wuy|��z|������}�y���nvhyzz�������������������WXy��~���zvpsouurtsieo{vusqmb]a^Y]homekbZ_fgnupvqpxrox{~xt|��������������}qjkpu���zx�����������uypZ}���pt�������������}{}�}vw���|wp{��xms��{sttw}zspmvurvosuul|upy{}��{zxu�yx~udkyuuoqsx|xyuwxyyx����������z����otvxqsy�zwzuuuuwsnwxtx|xqqx�����|y}{�sv{znruvwz�y����lr������|}~����}n`t�~~�y{|zvsuxsvzisv�w}zrpzyx�w~�{�~��������������������{���}v��������jnzwnrttsx}xoott����~zumg][cktsgmmuzwy|wy~{z��zx{uu���{|x�wqy}y{|��������}|���������������������~{~vztpy{us]auv~vourp|~z~sr~ztsnrykmrdjntuunknmoyyupmmnrw~wtwtmnuyz���zpd]aad]GHfkg|y]^hasrx~{sv�vwpo�pgxtx~��}|}~vrnlv}w}yvx������~�zw}�tdt��������������������|v����w�tvmdfklmpk_Z\]cfb^chfdjERtY][`e`kqdamrxsuoaVny���{yxx}trpy�yu}uz�������{uqn{~�}y{lihhlp{kinvsmplmwtknwuqonmkicinponjeinzhfZmvkql\\jjd]]SJOZ`aibh_^a[lonfzujlf{|x�{p|wky�zu|vyutqx|rnr|x|y��������}xtsinouqanZjdoc\hmtyrjhlqjknddxyh`jqsondiY_jkt_gnqrxrs}yt}rucef`e^_`VYRUZ]]gpvuwve[otafjpm{snoollpgfgVgnyntz{q~liac[dfljknxz����z�������������������������{y}��~|���{|~{�~zztquyw|�~{~�}~~}{zyxxtpx�zv{{{�}~z���������~��������}w{���������~~���x�����{{}}yu|zw~{sl������~����t��������������zqv}~z}��umvwrvxmu]l|wrtxwx~�����{qopy��yx~���uiewsske^dendZekirpxu|}~}qv�|���������������������}xvoxvw�qmid_ieje]\gjjjefnlpll�|tv{��uq~|~�svyteYjiurt{wrnhGTkqndmopujxt}{mvvndr�|�xw~��oblnbdn|y{}|tr|{i[S\WD@OYW]RI\bfc^S]flacgkehbclimrkgZ_rpgpglcpm`F,BUadvfg^V]YQFN_S:8WU\V]\jjlopnlkjtxeglnicac^qtonxppz}uqtrm}wtcdabgb/7uchorcehng\hkspmbeTX`jgj`dgml`adclWdZUhvywkruphhovtbgjsf]ghmrrnlqxosaU]ingr}~{ghle_ktsqrpk[XX\TNRMQcs^JGSPRKNQMTchloc`adZYU\a^Z[^__XRS[emqp}x~~x�y_\dmrmskkkkkkkk�����������wr�������������������������������~v�|�}zv|����������������������}�������~{��wu}{px}~|{�����||��p}����~�z~qs}�vy���{|~�~~~~�����������{�����|wz}}~�����~wprsrooquv|�~��~��vtrmonwpgbXZ\`ggXXbvwv~x\Uhqs�������{sppbeb^^][^gulofa`ef\\PY`bbdhhd_ce]OTT^UTWY]bgbkyvwupsojdaqvlvz�~gp|uzmi~���vrt������~{zxitgcelpkmxtkrxxwd`TKORE66CQVTSX\W\aj^SSUTUZXX\`\ab^eg\`llsrswpkslpmmd^YSdilckdiVQcjcNIUQJKgif[\\khglprsrd^^`jink[__t{kdjnsxwoggkpikpqpicbipfCEd]\`ce`[WXchj]mWRVRRVWW_]aWZZSWYTZQT]X[jjf^chikmopsy{hknlfgb^]]^ae_^[[[aeXcd`hbhnkdbabgjjhm`UYYY[P\[_Z^]_XUTW]]VRWTZXXSVV[UZ^^\[\\\YUUW[^`ZTUZ]PCQPACIRZXZkkkkkkkk������������������������~�������������������������������������������������������������������~z}~���~~����|��~�������������������������������{qpu�������������������������������{~}zusqokxwy��}zxrojdfivrea^qouxvifgsvrwz\Qff_knx{y|xiaegilkhgc`c\d_d`dcb\^gahid\hhieab`YahgfWPNIHLTWakd\W]d`^`bmifnhowgjlegahui_n\g[dbhdbnmnohfijcfijcXWgdbhaehp_dhgimgYdjfYQU]`olloojssuqtntw{qjonwvllrx{|{xtsuvnqosunejeh`glu\bfcfecc\TPajea`bjhiottokwppuscjcXZLjo`col{�~phfehgcgffnllrsi__ac__\d[[W]afdRmVT\c_c\]]YWUSST[_Z`D;UUQirupqponmkihpode^_Xa^\[Z[dof_`e`fqec^[lkjhdic]\\^emec__XU\[_WUU_XVUSTS\f]Va]][`XWRQ[YVUSQSVTVXXXZ_bYZcbZPN[_[acdhb^kkkkkkkk�����������������������~jw���������������������������������������������}������������~�~����������������������������������mq�����������ydbjs|������������������|w~�|}����~����{wx~|smn~|���������~tpwtqmlvkimiwzqkjilrlakdY]^dbX_jgiol`ghdcb_`^`ghstvhihi]`blpvpssnljjgls~xoolt}�xuzseisywxxvx��|�mmt}z����x~k^�tq{wqokntsni~vx|~zomttuykow����y������xw���}|���z�������~wy�ywqpmgibbb\]fknftlnpolgdf`fdaJS`ksrf__NFMbeljjjilrvtokuyxvmbrbiiWdi_[ZYdje_\ZV_^^]Y\eifjdaig^\cb[ZX`acfqienmfopb`U[]eki`^]__[a]Rdgbu����~vomnntsq{�mli^afjfacimmpvstyksrp�~xqmgecbbgq{ikilib`Y\_`^jimqkjhltrqywrs~wxrmlga]XQOQZ\[UPQV[`W[[X[ZZ_]^ZW]aakkkkkkkk����xvx�����������������������������������������{�����������������������������������������������������������|n�����������n{����������������������������������������������~~~~����xx~yxxtu||~|sbNQht|x|{slshimkgmokpxuttptxrgan|{{|vt~}tqtttomqffcdZjinmqinorpwqlsrssv~����~w��}{|{v}~xuvspohy~{y}~o{vqnsulib^sytw|�gp��rszyzw~���������������{tko|�������}~��|�{�x��}���xu����uyzvwt}�rgjmsjwlhmtupqqkocRHSfy~vllwsmo���{u{{}��������~{z�}zywx�������������~�}}���������zwpkdgcaZNZUYPcbdqgnmtlnt�wxwtsszolnrrtxuqrqnmnppw||����~�}z|���u{snquwp~�z�����~||��������si^qyxp��������������������������������{|������xkp~|||ypklhkkkkkkkk������|y������yy����������������������������������������������������������}z{|yux{��}�����������������������~�������������������������������������������������������������}wsqutkfhklsupknvuku{wvjlsrtpspqim_ib]`lpkslstsmkspfl}�|tsq~��uprtvxsiggxuy�xrrxovv}��{���������oqvu��unt|vgdijoqpetvy�tv~{{|||}�~������sq~zlpo`Zgz���������}�|vvrrtxywrtusru{�wmkquxjnlcvn}qpv|~�}xx}}uvytu~�yu��}~�x}��~�~rw���������������}�������������z��������������������}�{����~}��~rsv�{}}uwtl_jojynojgadoniqsoqv~wyqv}{|zrrsv{|wqu{~|�}������~��znt~����|���|sy|}������������������������������xyv���}~������������������|wnelrx~|{~wkkkkkkkk��������������o�������������������������������������������{�������������{wuz~���������������������������������������z���������~~�xrv}}tkyt�����zxuxswqrqpjiptqmgfehmpja]flkilonirmmoopbilfmhlkcjgbdfcjjb]^]gpnuoorojknuow��{xx}��}xst{�xjgkwmt��zx{r|z����������}����ptkuwXJVvynouw~|ulwnq�~xkPJau|�}��}}���upv�~tyzzz��~vxutotpsputyqgTHRcmohstiemqmipoohii_kmemg|zx�����y{zyz{xul^s|���}�{�����������������������������������x{�����zomlqwwroorrqxpfhgjkmkdahqsulhilgedopolkrwy}���|v{zv~vy����������������������������yke`[nvwwuw�~s}uwurjhkrz}}���������������~|�z|��������vx|�����������������������|��v{}tmls~tkkkkkkkk���������uIQ����������������������}kq������������������������~~z���}�z~�����������������������������������}��xqomst}{wx{|tqrttqptyvsrrpnryvgsw{�oc^qu|�yvztnryvonisywy���z|���}{}w�~}tgi[Yhnzrx~~|�z~mYORO`xz�����wsox�w~������������qjrjkrjvqlw|����z���{����{~�}xy~tzt|�xx|ebx}������~��}~rijb\NTltuyt��o�}��vtw~y|{mnjprhhtspcebox|tyleI0;[s~~}zxyyxsqdil{{o|��yp�����~��������~{�����y�y��~~{�|s�yvu��{|������|osy|zwx||y�}�yvyx�nv{{ypty{zz����}yy}vutynqzlklroqmtmgqthifcYbkw}xhlfjqmurkr|�~z������������������������lNFR]_z{��}z��}������������������������������������yvw���������zx���������������y~������{v|jkkkkkkkk���������\9^����������������������wt{������~��~���{wzyy�����~����}�{~yv{~���������~���������������y~yuhkpijf\TUbfnnhgjlopqtvspquwyuzrpszxurv}r}|�RY��z�������}o}}}�}}�qq~������������~�������x}~{vu{�������vv}r}�������xy��~����������~qququ|yw{�mizwwm���}xxsurts{��|uikjcKKasZt��y}��pdgxkciobhk\Wb_\b^edemeljgohhin{�vwwk`_miokdVmy}vlmw�z������������������{|�tW]y����wtz}szuv|tuu|wohozvonmoouvvvoowxnvw�~���wpuvx���~|y���~���vu�~���������������������{rxuz}����������y{~sxjfry��nlkkgv{yx��{y��|�����������������������}����~|nv}~�������vuqtwgubl]u�~��y��������������������w�u|uz��{����qxk{��m\mq{xm~{{x��|z�x|vvvvvvvv����������������������������yvv��||zxx{}||��xx�~�zkgjq|�zyz|��{yq|��qep��������{��~���������������P=d�{�r|{nxlgjw|{im}����������������}��������lw�}���������������������{v����zqppnnkz������umow}~����������xqp}���������������ob^iiquorz}yxszu`k���|~x|����tt�������{rfoy�{{��~��~wnlrlxgmroffim]iqmbiqkv}�y}�{y{z���������}{���������~�~����������~����yz}��tw~�yuihbefpopxvvyvxyxru�vptvruxmfhgjhmmt����}tqu{������wttz��z���f5Km�����}z{}�{�}|�wursssory}xuupqrsv|}xso~������������~���qk{�|w�~vzxr{��y{�~w�tlrk\o������������wx~}~�~�vqusrwv�~�z������}w�~s��usnyyz~�|y~xt|tz�u}��{x|��un`rwoglzvz����}t��y��|�vvvvvvvv������wu����|yxxw��|qhstkljgeehjhb_eieemsjoytov~~�|qs|�������{pv��������������{|�|req��������fT~�������tihu���ch����������|uz|����zwwpkii��}����������ymwz}{y}z}�~{��������vu~���zu{q{soppmpv����}��{|u{v|���������}ibqxw~{�yvutqmzyxy�m[{����}cau�}��uow����~��|���x��������������zrwtpydovyo{}{�������|z~���y{��~�~�}������|�{omz�tpxy�ztyyxwuroor~vr�{wy�vrrujrvsqlislolpjmrrnqxyx|}oiqtwnsrpu}�nv���|��~~�zyxt������xf^`oxwv{�{rkkixywtws{zx}lgnr~vvsztstv~�xijwotvukrwwvm|�y��rGAqywv�~wz|y���{{|�w��{vuv�����{{~��z~z{~����|xyvysuz�~�~zz�}{|}u�vsxqpmnllsxob^gXciqlpc_rxvyzxw{xu��{������z�|x�|tw|xvvvvvvvv��������������z|~�}zwzsvmdbfijkrolpurrz���������������������������������~vy���|~�{minj``|����������������z~�ynwnjcy}nokz������������|y{���}}rdjp�������������������������}��wsvvz��������o^gjxz~|slr}�������tri��������}������{gXi��{z}�ykceu�q��~zor���rx��|}|�z}ur|xu~xrmpy��q����{srg_gmvqpkmuotxv�}���}�snkrqyznjpvuqmoilpfnu�psmrpwtrou|}lqsnprm\grtkjhhiklmnvxv}sslnlouzlprovk^[RXcngkrvurokpnnghqoonsg_kdgnqy���r{wrrropuwtruuvo{zkoqlebehlhhgidnlsec`dimurlmovqrqe_cebkuqxy}uvwzwwsuz�tu�����nZu{rnzz|}��������z|}~xwvosx|ov}��{u}tsroxtv��~{~yxzvv||�������u������vyxx{~wooicZf}}y�������stvwv|~}������~vbmu}t|towvvvvvvvv��������������}�������|v��yz{vqn�����������������������������������������zqu~zu|xuuw{��ynhmw������t������y��{�}pvtpjihnbki[Vapvdu��{yI-Tyz}�������|zv����������������~���������}|~~~|v{��{���������������������������������������������������|�������������~jhgZao`QSLWWT^eogaefnShmurfeiha^h]egmpd[vv{zyv����yyz�~�kfdkvzvslht��~nhlh`fiunkvphdTbihtlijpeirknpmjkqwkwuoincYmw|�~~sp����~vzx{ytpllv�zttrstpqr|jgorlobbqouqqdkmbSU_ck]UV[Uaaequj`cp{�yqif_kimcntkow~�~z}wtmmwrvtjlqlmqmpmrw~txvyuoo�{zw{v����~x}wutuwly�{�}ezvvprpquepspm~�yzt���u������}�~{z{xzwx~�����x����pdknuz{zwty����y~{ty����}nnedfcdfgd`Y`_rxvskgyrmqoVvvvvvvvv���������������z��|�z|y|�}x{}|zz�����������������������������������������{vz|xuwpghprrx}tpormaYYUUleiqgcfi_]]`llpqxm{t��}��vorrmfkojhrXHD`�z{~�~������~�~����xw|qu���������������������������������yvy{|sjrw�{�������������|����z�����}������yjfgbmrlw}mxxvoqsrjnnlfhhabTVdkg`]ag_hmpn]^fnikq��r�jutun\^afg`l`nyvf[Tafw�wvqoqnkmwoqyuwnqylh_kqx{xojsijgh^pvmonvsvwtokmpsqxskty{�mvy����������}��~����~{��~||�~�wxyxkhnlyxw{laioof`hnjnkkkpo{xsonsxyxxyy~������|��{{utzyswoqknnlrvtvvooukpihhpvwjik|zzuztx~{{tv����wttxoy���|u��tlv���rzzvyzpyz�������y{}zz|ww~����rv}�����~���yjpz�����������������vpqurphommihhlbhfptp}v�w_kwdQvvvvvvvv��lojq��|sumr}tt�zx�����������������������������������zcxw�������zqy|sy}}ww||vtwphimklpoihjoog^YfUda_kumopxot���xz~x���~���zwyumwlopnyv�lmxtuvooqooy�uosv�xp|rw�vu������x���}������������w}uv{y}~}xqnrwxqorvnnu��zuqwx{�|wpsuyd{����nh}sy��wyzudgqov{itoqztnjmrnfmpddnbb_fkilnvmfpn�ouoxwxjt~uphsmokiqiiqy�susg_ddibdakkjovsr||zx{x}{���~qvv{��o]dnupmoz~|~t}}{||zyyxv}wlipgltttsep|�{uwywuwu~yuxuqtjp{{wx}�����}pklmgmmgpsfbcedhswpjoxyxq}}�wu|����}wvx����yw{�{�{xtvwyz|zyz|{xsoq{ttrmoqepz{w|���vjpqsrlhl{oriovjnou��{t�|~��z|}�ustsvsxz����ϸ���������yqt�r�q]U{�������������������Ǿ�����������vz�}�tyojgpzxgSVk}}���}qo|tjvvvvvvvv�������sganpw������������������rtw����������w}�w|���������������xwm{vqsxxppx{toptqtvu���������������|�zktkt|~�tw������z]fosy��{{oonlwmwyt{���x�~��~�|sidpfb|tpumk~�xxsov}r�tv~|srprskdfn[aYktjouu{��}��������������w||w|}v{loz||~yw�zx}zpsxqlksxnlob`\\jhkc^nsbbmli_qp]_ihgajr_eWXNAHJZ����zdqyr~����hk|tv�vlnnrjwxr�{x���z�rnfhmxt�ytw~x{nu�{mekty{u��|���~z����������t|UL^qq|}i��ӓ�qvoqnpw��|zxyv�������zuro]hsylkqozfciqpkmuw}vy~zhkjojjottsroigipp}{w�zw~|hyuopmtoojmpomliilfsqksmusthpz��{���u~�g>EVQiotpegqwwoir{z}{�v�vwuy��ƨ�jz����op�����в���k[unnZ�����rdXVZV_}������������pc��������xoyi]W]bls^bmxshy��|o_rvvvvvvvv����������������������������������������������~s~{����������������x������ujpzzwvz���~�{���������yxtpolxfmu|��~|���xl�sirukgqee\auxsl��������������vv��hd�������}q|uupzmi|lgcnj||f\[ppzkuq�{��������s��xwpz}�wyvrswqlwwljt{wywifwy�}���}utv��q~�w�~olzofnmh``V`b_keokakfbgowc{~uoiP>GQRZdaib{�sqlhkvxo�plwk�{�wu]odegktbevg_hbhgge]jrla\Sf\bffo~sqh`[ahg_kdjqd_pkfi|pu|y|ljomzx��x���}~�w}��~�nv|{w{�{yp��uxvlplbtvtdnws|ymqyoj^be]`fbqwupnujvm\W`kjS[g^lwk~rknfa[^k_h^geUCFynbldbjlckrkjef[cG?FHRdji_heKF3*3<Vqpwyutmsloq�vx|zyk]dqx�xy�ѣMQMB:K_dHB;V�zkr���޳���T@|g`ZEEAGDEEI=ARVdfb]`������TGc���ಙ�����zz���wzvehlfkuwuqgbrrrrrrrr�����zx�����������������������������������������������������������|~{x���{x}��}}|�������y��~�������{~�uh}������������������kV:@OIj�����~���������������}s�����������|}}�o`knFK[_{s����u~��z��w{�q{~oWVT^cijsrqosuklnw���~~z{yo^kkqc\bibhZ`ommhqloqijmjajk`_ligcctoridwrrptlXgjhhplgpvwxutrlmmkgmk`_ff`Z_^Z]jbeskV\Y[`d^]ao_ek`\kkea[XQjkjgeppsjmkljlkng_SQQ]UQSYZYWain_dkgkgosuvkb[YeXS_YT_okdegYZfljnmhegbhsknilorh_pgilqqorrjjmlmijkrmpnsmnslhgl\biaorf^Yh`hegkfifc_bYbdcmbf][gkhigspvplmnpnXT_V\_[]mpoa`aRKOMcbTgkgej_Z[ifdaW]i_imskeeppk`ZA1A<;?@KO9C@5EG:1FRp���XF?TEBI98483338.-64>;831/?R[n�t`Yua9y������{PQq�����|plsjpsqv��{rrrrrrrr�}|zz{x|���{suuz���ggm�������������������������{����wwtvxtwpy|����ys}z������{wv����~}zsssqv}y}{��yw�ynkp~�ommtystr���s|��twyxytuyjlhojv{~��|gWfiqsqjdkfphhjcoyv}jZmec]`[keQarzxr��ujrckoc[xiVWjXdskURS[`abjnkhy�{~{���~�|wsqoiidnjegl`oilnfiiknjrqwtzninkmqrr|{~lrwomhe\e`\a]ZV[YYbhmd_kltg`Qc`\`aTMUUWdacdaY_^NWg\hg\iQLJb_WYqd^SGN?KSY]beqtoz{v|vrvstlnnqnhfhmjfafkgozwvv|xxwveGFH9Kao^S_mdjpoelnijojgl^dqrmehmmf`k\`qphknpjbY\`kryytusysvv�|rokkdkmttjajgih`qnnqsvugofr�pix�ozylhuvkhnxxqv}zt|sopilurpxumgano^qqjg{|szutf`n�jnlswlqvvvvjQHJTWQ82;:<=7+%) ,4@ORPT]d`[_`hXSNRJEAD:A=.0*/1./=������iR:*Oua����T=.-��˹����{|{{��~urrrrrrrr��}y���{t�zwuv���u7:U�����������ux��z{�kj~�xqyqrxw{�}{xx}y��������������rlpx~���������������������~��{t{�tsz|~x���m������y�xm]imtn|rcchbUQXakdeg]dU`fkiUYaZY[g~[k�u�����~z|}wvndfij_Q?JRM3-,Klk[XYR[Vfeuq{����tqievwwx{zxz�~tuqjb^g\_ftwjr�~wyvyz�|oyz|vwvqvzcbgd]\XPXZ`e^ffjebecb]V\[lln^aafy�xu�rmdcn\NXTebbofjcYXIAOiYl`l_a\Rab\`\b[PX\kqwt{ojip}~�y}yzt~��xz�w}���}}u�x��mmmxyw�{uw�w��z���~����|s��xv||vtqltv�~sx�|vsnvrusyutogosq[Tamox}x�}|yslPiksqvqhlnwtrg_fqk__wp{�}~�xt{zukcsqim_dkk^^]Ygurqvxwovmv~�~���~���{����rqw�����{pnnz\evt{|`QVjto[[c``gjfepnqeVUabehpjgppiLCFFBYbHe���`AT���|o?4+&fuSћ������zow}{{{rrrrrrrr{yux}}zsxxpoustz{siXck}������������|}�|u|����|y������}�����������������������|}��������������}~�}uw{�y}�{��w}~��}ulv|vl`imvsfjookmsg\iopykqwou|qt��|}�|���y�����ehlmx|wftmcft|yhhrjeld:,3Suxx~�~�}��������`[^Zme^cpvvw|{|z{{yxtdfju{u��~xoirrtp|vvuvnicjimgedghga_`khkli\\hmqsifglmnhjtrkg]W_tUTdSNUQT`YPU``hZS]SMZSUTUUY[X\g]_Yngaepqfihjgomqsjigagadirr|{|uvuz|srut�h`lmjYhYXTUh_Y^d^`Tdpkdf`]hg_[SZ`mvrtwpjlgjddo]bfjvolhpxyriryy[WexcP_ieuomniKn�mxcb]ddmqxoshhhwpy{vzy{q~|ximwkfkqilymy�wim{{��svspmnWJqy�������rzwvvuimptqscagfgjjhlwkx}uqaYpzu�����vhpqrztrrqzv}zxv|u�tuowxm�xx{qstduVRE@Ii�P.0BFHu�W���ؽ�ywlamtprqrrrrrrrrngeu|usrxxpsyz~}{urwz�������������xy}ux���lcj�������������������������������}~~������{�����������vv|{t��||v{sorxypt~xupum��ssskcZqul~�~uzw}�������{y�~�{vpwxm^ckn[^h`p}ochqpjt�|xzzohlqj^Yamfjppj[UTa\fqmlsd`fbb\[bgjmqb^b]fkjgmwtungqof\eda[hhlelkiedgdWUU_`bZZZ]XX[db\gnfjpiee]Zd]XQQLNHGMMKNMMMYUSTSVZY`ZYAETWM@AGOFNSbaej^cbb\qiieoj_cfeirltmb[]Z_[^ef^cklaecbfbfhkm^Zblk`pfqvrrihp``fcinnmklThcm^b``gspo{}svmvwt{ebbhvwvwxtqruspkt}onwzqww{vd_czu�eglspsdsy}ryhwzmtszz{ohppb_js}mpko|s{}�tlsrmqnkbfefrf<bkju}�xoqeiXZW[TW[WYnslhpnlv�nkvhwph`]nx}��ysqnnwxw�pustyqupnkrlkxz~vy{uxt~�ow�wxpus[agsmrnuht�t�������qumrtnm_rrrrrrrr�{qw{z������~yxi]q}xvx{{��������������~xcx}xnz{y}yxvz�yu|}���{r~|~{x��w{�����������������|}�{����ypttumeekh{�|yusm\dgluvubgv}xzkM]jz}||�}��}jb^dw{uz}ml`hdopmfqWcbaUOb_ljuyokeptncsptvsme`bjVRZh^a`\s`]Ui\aaX^akhffbXSZdcbfgnab^msrqnsoxzvylhfjorpt{rgfolb[keeeXTZib__afeomjnsqhmqiiiaY^akf\WOLQTOMQWW^a][bNLH@TRRMUbK+16*)>S[linvikjjiokrmrkprsqwtlleg`feghiomjhptupsmnmtqsoktvwrrvmqtrhlmf_fn{vrrrix]kbnbv|wnnf_ik\]VZ^^_UTIL\lib^hbajg\ni\hlox_OKVdc\Q[Wenogbea^^`pmpv~pjqghqwurjkkfjmikpsplfe[`aXaajqjigdbqkhl|ibjgkglkr`bogi^YYgkhecinuqrfafrmnx�ylrolt{��rszxeqqjmelflkchffdjc`aQUbvzukvffrm{w|pxnoq`ef�~yyvujr{g{rhw|pppnulrrrrrrrrf{~{ux��{s[MM[dP`o|xxoprxtz�}wmmibl~�|�rrig^gny�zsjr~vlmhurtuvoqtrsy�t{����~utw����}woinooghh_^eecoobfhacfn{nqlqtxqxhun��toSRixssjG\\lmgQ`kdZe_gecj\JOMSN]W`[`[\U^Zbj]kk{n�yvzynsriugotrpxyz|xmhnxfca`ahwz~okleiYSadegn[[jryzqn`c\bbfpuhrwx{x���y{{ujvxxtxvvjtunpoonxpouv}krhissplsujled^\p�yRUdrqhgpvrri^uoxnfje{uggbsQQ`PYRcnotnlmdjrqvlp}}k{pecnfdg\c]jmjihvuqhuz~qwjddqlmo_hs{wu`Zh^\dngpxsj}xog]OgX]]\Pbnsjd_]aX]XRJKOMQM3+:[aa[[W\kldysny�~lYjj`r\OQSdd`Ypgg\`d`iKWh^qnjqgrsutpauz�}okcjjpe\S[^J`]_hfjofcg_eR^d_Y^ck\\dghjfe^Zgg`eysspphgrxTUNNOS]^WXUKITN\gbmrdf^\ZYVj[V_bfdh`\T8:OfcY_d_ZXhu^grsiomeIJVRX]_omfhe]`]Xadibkh\b^rrrrrrrr~xkecm�wysgam~�����skkwn}spkiduxyup�}qyxrxxsqtwuqu|nkotw���~���������}rjhihprrmgeinosokijjmutxy���|y~��|y��y��|q{pptkruw�qhcbgkh`ZWccaWZZc[]YXVJCJMRZWU[`hirppnwsmnbae`_`]]]bjk]U[QSXaXot|{jie_[VRPT\fgitveeUbkf_lgrppaZibemr}zuzotuxvtrpyvpx{}npt�w|}fksnnn|�runolxic@\jpfoqjdtniu�tty`[P]nli~re\O`�}~|}vjvik`frHA4-0&9Gcysjr{x|zmizwpxoubmqhohiwyutgmlxxlw�p]WX[hlVL_sjeK[E3>4GDO[VPUVPYfc^[\_[nre[wmlg`]\]]`_ZSV`inbchhgieg^grmkxs_^ccTGoth\_\\_PEKKday{sqj~PPP<9Wh~mqkbeeddd_VU`lbkhgd`gmcecibV[ZSXaRbcf^gYT^jpk`dh^^gX`\`RVXYkdwijgeW[QA,.AIIKIFVPJR\`ffjgfcmhoxdS]fd\ed^XQQXchhdeeWPShbejf]bb\h_?J[OR\V^d[Z]aia\Tcd\]TPllllllllz�yokr��w{{qs�������w\S`oiyyvi^Rdvyvrqp�~z��~vgjis���triglpqmhlmgknpz{qpja_gs|ssuz��������������z}{xllhjqmmy|{wyk]ilch__[^uyf_XRNKIJQ_ih^ORYPRONOIFLS[ed^_dllpnmdgcbWX`b^cmoX\ZUUVWWMHONQamqmihaYW[_gsxrkotphILurhfafontjaopqxvuvwxtmggotsvzvx��x�~��{vxqzuypppjdjlpu�y_klmlmrrusvx�~p{�qrnniWUr{allw�}vxrmkohnpmcBB>64/@R_ljcjy�ytvnmnkpjqgj]gikmkozyv|yww}tj|tSXS``zj]HekqlIPM9;=LQ`kpjbnmlqvyrkwvnjnaNZdkmnpa[[nqfa\ghinvadg]NRUNWZ^maPMMGIVMJQR]VMN=)&$-0bnemqiUUSWF:E]``Q_kYRY]XVbje_a]ab]ckglbgeVYWV[\]afabcZUSNNU[N`]VZV`S[`_TEPUaVRONILC3 $-BIWPYUYULR`[JJQ[VZ`qvkbfjiY[arjfe`\bmgkbZ\W]`hgbNFVbfdE?LPSZ^_`^fbUZ]hqmeficVllllllllUeaXWY^]aeeTUa\[_W[lq`V^c`lrqvqhU_qto~��xtfcd`^X]]\UXm|we`[YULHMNUQQWetvrtrnlpy������������|�����{kjjnivmkeaddcfbcgcbmtsvlh]U`ammopnf_[aZV]i[]TXZYX\[Z]\[\_ccb`lfaiipmk^[XRP\gh]dd\Z^\UZUYTU]^Zdb[dptneryxomtsjdZWdepxn\b]gonsqcmsqwwkitpjppq{tvtzykt�s~|�{m|ppymjlqprphjnx|{qoskpmqmm_gmoojor]Jnyl^di`]igfdkbiegohcfk]hYbg`adhtxzshjsxxv{pvwxurrioif]quvtuuz�y�~�zm}rdcrtinvtaplehmmaXaOG[vqtqypeiqvzruoupilkW^dhjijSSTkoaTDVg\d~lyyeK_X`e_WSXUVR=FJ31:>LH>EA3B6DRtngvf==Q^]55M^[PbfTQYa\Wbg[ag]W`kogdgX[^Xab__^cecag[UVYWVZ]e\U]_RZXRcfljnme_XVSSSQHGHPL^`[V`W\YUYZb]QV\fppZZY]`gY[hpvukcdileujgg[Xjmhtm_krgurkhf^\ddcclh\bkz����wu{llllllllQ]SNTTTTNT_Y]g`enjht��wn}xukaekjMZqyv}x_`aZ^_ROTPORON`uycgfdc\\girl\[hnebekrw{����~zxy{|~{u{yyxvv{z~wwtumj`gyvqa]ZOMSRPTbeigaa\^[Z[]]ZVfbZ\]S^[Y\]_eijkqi_dlnkcg]UZW_^Ya[PFDKQPKPX^ac_XXZW]Yojnmwt}|slnrnkgkpmmtws|����}�xtxupgwqqhhmgkakjmcbo]YY^jho�{��~zly}{ffnedakr}yu|yxvmndgb]V[crzn|}pt^N][abb[ff`XPYmhmfolZWY[UJQalghhXPZdefopgVY^]ionejspibW?R]w�xlpzvukjg[g]]cg`QNd`SacXUQ=0?J0#5Y[[_kc^a\Zde_`hVNT_M`^`jq{o�|{q]=%,RQQhY_a\^^XMZ_Ya_imnMSXHJEK[T:5EOWK`cabg_bYhpiB7ER\]gngp_cmmbbkngumadlm`VV_c_^\W`\`]j^gkoe_^`fhdhc`db^e^V`did_dgkfg^]V\]a_na]SWXW_zs`acsaT^_LQkp`_a`gbdefqsjisskpzkmng\d`O[f^`c_cyztljkggb]]ajn|z|��XO^lllllllldgZX`]aiZ[ipvvmplqomqrjbsqi_X[cdhnmeac`X^fhtynhiqdhy{yyy������������~�zz{||}�~ztnigf}wtnthggegljokjdgdkku{g\_\XNOXY\Xknhd``_]]]^aeiknkagchmewsnhb`]YXWNPTU[VXVRM<CHF@FLPRSUY[X\bekldowvyo{mmhploaWTe~wqkqvpvmhlwwstgr|zwurw�}mqmdbZZ^lr|qjsaQ]_dcfuvr~{xolhmzz�uvhbmeddvqxytiqrj[MBUswoqmdfa^heosiS[WPYZmtkgYpjaedVYdcgrocQ20>Ralrk[[YWcaZRQNgjWRXPa]c[WZ\ZMWXXSUeWLMJiVK9CVJFHM)(65>8Oabhmrqj_^aT[_`OPVgj[]aho|{�����fG^VWdYQV]rf[lVQ^Pda]hWWXTX\STZXY\TY<ESSS__cjgm�rW]pb]Yc_UfbcieVQ[e_jki_QQUhWj_JWXUVY][j_jgagnmiicW[^go_QTX[Y_`dffjjgibcZ^bfhr�RUTfiKS^ejadlX\g^eoeichccdcPGJPW\_]YS^[_X^a_cSWab`akpwwzyuujehdZZgpsjboxvsjhllllllllfkglnfjsuigpngc`]ceegffjhkimqpqkfvyttrosw{sqv{}w�wmn{��{uxurvz{{smory����|ssuvz~�����{~��jpnm_efbPQRTJGFO_mk]]ZZZ_aZ^jiqj`bdabbltvspqrmopqamtxmgeb\]_^W_X[_ernkhggZekeLU]eje`apllhelnbkt|vyzrqumuu|owwmcZlzkhovuszx|z}{z{xqu�~sssoqswxus|vlqigvqfhqqfSZmikmfxoeku�jpxus{ovrrfqppw~ejd_gXZi\[YkmbaeVRXPUNWYdkQljisp]`NV][@$%/.2G`jhb\]aWYLIGIMK=)!%3Q[[V^_PHGOTROVXTP]IY[]VW\`WR]LC=GWWi]af^fbcagmqsiafihj]V\\bc]^fkfojruiqbnhnqjbaWRXitnhebfa`ia\cg_\db_\]`XWHGYeZWbTb]^_aaZnX^^bfebdlj\TUWWX^[\\W[^cTpcIVUONYV`d`_X_bd_]dg`XRXgZHERWVcj}�ykhjlnwutuvpimZ����YI^pskmi]VVQah]]W_ZWZW;7AMU]cb]_glj\fpcklnkoqoxtmywotloquplnnshbstr}vllllllllllqszyswz|pjunk|vlpqtwvv{wztwwmhb^q|}����z~{xvy}tz�ym|�������||�~wzxz}�yustrot}}~~{vrolx{cpyoprvola\TZ\UV]_em]baszkls|widmmehijh`YWXZlVUSTc`]WSWZUSVUYhb_^ezyvigqmyxkovrlkd[Y\X^aZ\`Zjonbgoouzpsup�puvjc_tkkfpsw�lmwyrsuwvog_mmpyvrtojccdefcsvxpu�|dG>E55<TdiiwyvvddeckdakhhpbZSf|po_Y\X`og]a_lsyqcbejprokss^ji]ghc][\[XSHDNVT]hf__`S_ZVU\c[P3%-.=L`abw�r`Z``TOXHU\a]U_W]f^X[\bfcfuq[`ceWOTVXW`b[\_NWRKLVXZZ\adjhzlob\YU[e_V_`\a`bTZVUS[[KK^jjg`ei][]ORdnpf]ppghkbUZ\Xd^giaf^hgikj[OefX[b]W]c\[enpgc^dVDQ^__jfqvolb]Z[_ceaXT\XVWd[UTVXX]VZSHOOV`fcg_daT`��{|rtn_ciaUWU\^haX^\a[[XUEUbjkqytgSX_bfslomi^csuurskuuqqnuvvsgeheikihffllllllllifabdirpnmlwe]wnu�}||{}x|svumtxpwzvv|{pt{��������wt�w|nflz~|xquy~rq|wtuuvumfmz���|roszpkok{t~�ngqyycair��{vpofhm_lvigad`XYedbsf`WQSY]\F0DBFPT[TR^ieabaYrm^KMlurcfws{zpUimdaacigZdqe[bhu�����{}zqyum�xypilkqjZnrv�ult}WaitpqzmSd~h[iebi\ipjt��{�w�yjm\B#[YQXmmi�~mbf|ox�}wo\gdnwz�jm]mnf{mdZjq��lV_aU[Wbon[bqbhb]A`y�sdWKTY\ceckx~hknutrb`J@Zw�xUflb\\X_iy~ld~x��wuoxw\r[hZeaVQ`XjQGLN?B=EPP\npeVNNZqh[T_^T]pwbxhyg\_cWufj{�ukUgdmwgpmS\^[ji{qbbdlcLUmokfi\X]RNURMD6Nf�iSMWYSa]ZXOlpfjj`]V`bgg]YU`mcXOP_^SWX^q_g\UNOX]_bbgnhfek[YPG9CM;IDBMILOXW_dl\VRaWd`_fgY\ja]^WadeZ_^ad_hcegb^\`aYKABZnaQJ_bYOLJRecVnzeYWU`\XU_i|�{qts]\ebefllllllll_snqamqtopcIBNZfrxupjslrutyzqoqlUexz��{hbiimfljmj`\bZNPNPNQZ`^hYQK_e_jmiaR_l[Yjnlnsofldljmru}tvmhbbnj]af_kmdjnb[hZZQRdd_lrdW]fVfgb[icMKBIL[ORTVXohlqrtqe\crF'@iupihok^`rsvrosuz��y��uNAM\{�uvyqkkkgplieqrlqnjifr}pznnm��yqloshfz|h�wo�tn`]jSHm~y|ucUZfOF1&!-.6Xa`Z_mj\k^dnnxzourecmeoquybastvhovrxcPWVYUVaY`glga]fcapoljo{et`dm{�y}��}z���{zz}qopqkhw{mbScRWas^f[[ZZeeM=CL\eROa]TOQZSe_Y\YSVUTUbak{�suao�cEVjonfetzrlrpswojj\RSnsgprhvu`vraxiagVcflsqnrqiisl\digZ__gYOd\_`db_VOOGLSbc^ekWOYbNV[ZY[SKLa^eeZcWPUPUYMZ\\_X\ejg`WdUO[ZXORP[eZ]_cZYYSTOZVO^ivv{u__hleYZgjaYVW[ZVY`df^TW__\WMZJR\F5FX^NHG?DDIOOX`VNMeyvpwfcb_ceckafbchcjghc````````>Q]ebbika>=G?FZ^Wdjic`Z]Z`v�t`[\Qapnvroj]ZV^[]Z^\^SQURRSK\fW\cdYTLY^^mk]XTajb`fc`inmbffprmqspwz]lfbf`X\]XURXdZ[`]WPMO]ganqsfgleeehbjg\]OPT]\adcb_YamlilnmjqhV`lmgfejrssv~s{�~��~���{xpjssow�~w�z{zmfcnlefb`bfupjxkomjtwqvz}vrqrr{�zutr|lhYFXciZ`dipv_YZM[[MQOZaSN\ebsiakmsqfijgc[R[[^imb^jr����{��yffl{rrkmkffjY][d]Z^buulcqg\hhcSKWP^^^Xgikbke\\_fmjipc[[o`Y\^KKJG4,)789>EX_IIOX\YetnnzuoqqnebZa_^\e`\fmugbm_`ghd`_dm`plknc_fndtjghnf��abrtpvssdOenloadZobuvkfie`nkizmfWWV`g[PRYXUTYYl_dbcY`_S\TXJQO[JNd_``XHXbb^fl\Wdf`g[ecgx^ccYjfjbjtmh^_^me_XWZ\TWRNRUWZYc``fnnf_Y\^_`b^Xb_WQMIJQLDMMRL@4.4PZJOS@IOZdXVnypnq{thZd`[X`dmfhfd_`bbdm````````X\hkvlwuZ<KeUK]e`v}tvukU[cpo`XSJKTX[kf][VQLRTXUWfjXT^XQUbenepl\`TKTVRYYPQVY[_eggab\hovpghfgm^\fcV\adZNNIMOHCE;DG\ZLLNXnmno{haj}}gigqtqwmlmhjghkkc_dke^agemnxs}zpdjllmkecprxpbn~wuo^`Y^ahuqd`jnnsrrg^VVV\f[YZ_b`engn`kp^mqnvrvwyiekdkrotphF/7@PUZ`_\]]ecoeFKVMVPiT^ekkdl`Q]gmgcgeiXNBejwvrv|n}�����yt�~���{xnsjPdc[QgjZoltj^jpljbL=LO]lqbXOGMkiltod]TSle[]mnghH:8GNCHIFA55E^iZC6FUYiqgjypdgjibd[^]\ZceVfgpwqp^_bikc[YYXiefl^V_[ZhhggigccOO\sdge\cLazzkckrfhi]^IKSUfjuhWUVR]qfteWVW_^Y^_icl`edSWUbW^VXd_fZ[^b]d[]TT[\VMXYcZggoiihd`kehpmhZTR\aedlfe\`[QMDKMOXHVTPS`puqY\]\]^]Z[[VPLHMY`^baP222HNY\PWlmocaxqgvga]R^`]UVLSWe`\Y\Wa\`^c^d````````rhd_j_cX]ly�vfm~��zgknxpibYON[^QCIL[uqbbdd^ZZcikylVXb^dsn\\bl\TaZNRWQOQPPZXWfnpwoR,>[gM%<`cflr`RVUSUNFF@AJE=<HNMYYW__[dfkx�yhk|}qps�{g^TYedrkfecimrrqswxxoi_anreinrrjcclhgkj]awZrtiU[q�xrottjm�{|qechkojffikjjmcWSJPWIOTejQHGZYhki\Xaa\acfhcTX_nkfg^\]jXSKZa}zr}�utwt]NWiuqnqnS115PXlX]qXUaO7aoodh^NZbXmumbjbc]apgdTW\e^Ztt_lqtum\Rec_RZcW`PYf^er|�oVUTZPJLZ[_bTQX\Zcjmuwrb\q��nrslngacc]\ZX]\_a_[cotpklinz|�uoqtplg\fa^fhcb\edZd^kdleejeWadpbXmU1FWc_`{eSLRcSViqMW_]lbmokixsyrijada`Y`^EW]kjl][^]]cdVSPXWbemoSXXVV`^BCCDLT_hYb^lZj�{proddiibb_da`]finkVJJPQBMQQOVbeaNLPURLQ[W]YQPTY_i^Z\[KKDUUMTcjpoja^omecJDYUTJN]hjh^mld[RXa_`^YLN````````{xrszxvkj��wtklstmonjU`uzfZXV[^ZQZ[fwsejR]ntridbg]Yfh`cf_URSSM]]aZY_bdbXKY\brqjsgbOXantnhregdYNUYOIU\ZXOHJGFFd`bd_jsrf^imyvsq}���~sqeXYZZ^RZTW_bbhkifehlz|xxpTYaa^Z]fhb_f]V[`Z_r`s}qkikailpsoeaet||qkjqzj]SZfje\`\Z]Z\_YdnZI\bd\d\]\[kp]e_R@RYjgux}��ulnla^��|rzx�yi^ckgVNT\XPHB4)9FA:VVB8'4>2/KDMSf]Qa^ckjc^lecdU\fet�syimwuy�w|xrojqeYgkyno^_T\^UO\]Vh]e`^SVRVdUXWX`X[h_[hi]^ai\^\ZZWcWOUb\W_XNWQJR]ZW^afU\RJ[chtvmjkgj^TZ`chalfOXWd[_\V[akagsua^G5Qf^bd_jokU_mvbgVahbeaggnimqprrvlmfbaccZogghe[`g`UY^_aXXSXSQZH@<KEERNKFDJHKS<_SYMERGOOLKNVXVYURWTY[]^aVGPO<<PMPX[UPOIIQZXPR\Y\YUUTRRMEKJXbc[T]hniirupmnooqrsup[`]XY[bg`f[U_ga]\Z_UTZ````````murxuxzxnxvumipe~uxuqbatkffbWTWS]`WYabVWQNV[]V`mh_XWNLWWSQRWXVgXXdgfglm]T^dluneknl`ceossprfQ8@RMHFIWYTUPXffYJdea\XgprgXdt{p}��ng]_b`[]`^\ZQQOOOMNRV\\W^ofbaai\__adZOUac^afndVdskmqxdenx{|�vv}vestuqvt|��uis��ve`_]_WUg_]sa[gb^ccYQddi�w`ms\_bkfjlm{tmf__cr|plarm^U`gqpkkTLQZ`^`D5+@NIPQ50/<OKS[PWixcd~kkglkbecfWXSfxqaZcX_^Thoidfgpid`hxsmh[bea^jeRQRJQMLPcb`]em\^YV_JFRMKUUQTQWRSYa`Z^TOS\VYaUT\UUYWR_KSdVZMHTR[hja_hqei__hedeegb^dajlnNKZi^ohfme;CJb]]m^v�eakel\nZ]qlcfe_hin�vtolgmjc[]\Y]OO^_XOZMPKS]f^MV[bW\VWLGFJQR[QWOPCLSY_aLS^[PMOYPA8>PV[bVXUV][QFUR@Ua^\_fje]QSVY[\[Z\XY`aUMRHLYNR[]TY_]U]a`qi_YOLMO_ebXfli]Xcd^d_]WU_beW]eoh````````tyvwjcfla[`vqn{k]cl_]dF:>EPRNTZSTWNMQ[UQQMQR\\dgU\\^VIIKOMSYb]_XTejjfelgkkmqsrsumpogX[Zc\T`pkem`]_]^WT\\[tvk]_qaV^hpsgVV\[P]f[VYPRVPINTW]XTFFNX_]YTVYSTaijmhmqeXdnmfjtxwvu��yrwsje]9Bikdpddptqsyxont�~�mvyznnj~uk_cbokkt\efZMR]^WWD<[gbq�|nj`aiiepie]MNVg|wecvj`bnjv}�m\SPYcbkTNHSOBQU;-HX__oDEIS`TOekf_po_Y^`e_^hgTSO\NFMM]W[NURYZ]apzmX\]ibc^VWNKXTDTLM\\ZUUYPPVT^VWROQRD>D=LJMY]`c[kj_\`eljch]cf^Vmnnoim`psvmfhigp�pz{zvqrnhooba[qtJDdzrbmbdg`Eb\\SaaVWzckp�p`sbiyifjg`aecyipteZ[dbajnum\S`X\R[V\IDJYWMXX`YUSegPONGO\OOLYR[P^cmql`mtdY]K81@`eji``a^]fcaldSXgmlekun[TRRSTTRP[Z]eeXT^^^_`jkiT4EL8>C>_aa]Q@=IMQL@EXgG*0?NUPVW`]gnaXen`````````dfhnfVY_ZH@Za^eTTZbLG[:04-9OY\][Xfe_T][ZSX\TckfPQ_cmhG;HOV`RY[Yd`bamohqvqlkkks{z|}}lW[]ogklfdYccUYY_djl_Yb`pvXr[]dQHIDGCY[_eaK\lwqtn_TT[^amZW[frvqc`ntoo~tql_aRR`dis~~wtzvh`hlheB?PMI^SeNQ^hd_emlfmx�offdhempjjctcYKiqrrst_tja_lh_^RT^^j[QT^UaQ_ki^c\^YE>YO\d^q`ONW]P^_VKR^_iqnz_MRe^S`E+<ccgUdG1acS_fensftj\Y`U`e_U\AFMbk]`[_XhjiQPdxslwq_VVLL_cP`bNXaPSTX\_ebW[^arnry�se__QP_]adlqdl����ojxy|�{|ikofWjlxux�y�y�|v��uiq{w��ns�smvvyvuyncikfhdamcVX_spfddgtht�kqxzu|tpie`__cel[`hfbNMWe`hZPecWJXipfUW]]ZV`T[YW\gjdamqrbgM\YQ`kga[f_x|ljmogcrmhaid`eenh[AI[US^gl`UUROQVVRURW_[LGL[ab^YTU^_feu�w�r\PbSHXQW\c_]Vdx],*#,D1#0383/CIXVQ[iWW_f````````nvwpmjebib^Yb_`_\]_YZ\UWT_ddijzpkqemZ]]ffYY`kjYZsdOVPR`hdgcc[abievopxcmwvrl`hbbcfpwzj`ydW]bcahrkfieikapggfdbafgafj\[nqhejdhtXfValtowvc`dgfac[T`_hlnbfl`a_hii[\^gljd_suizog`jdjkfb[P]_[nreqsokmww���z�~r����sqlt��wlpph|vplxupuqwi`pidZmkmjScqhY\knkYgdl`cVJ^alpl`VN^YZZWQRRMS^[ZT]il^EUZWVPabE=B?`pgabegmd`b[TSIFHUe^a_YWc\ULOJQW__]`bjghkik_a`glnorioqrvxvsxvroigdntqz�|��vmagjjbhlrX`c_b\gbO[cccaT\kc_elekunkqvpjaeqk^jrvlngiplvwp}xmhk_cxnoz~onztkrmebovmtmhb`fjgmfhZ\\W_ZVR]ZdVLWcikXKEbcQZUTT\_XW[ZU]ihgj`YW\UZezcbjsf`ma]WX[UXbj_g^Zdoipe\V[XX[_V>=Rcc_`a^bYVdNKVPcYVb`UVVSh��rf[ajTVadgjkmr}nkfMV\[l[\X\N@LSd[S^E<74,8;IbYYWX]]XVlllllllldwxcY]^Z_^dbd]^[baa^a`Y[dgqggiqnjnfmcb\[^YZX\_[dnh`j`bpvl}�{sv|{x�}rujlnhed`kmv{spvqsh��bpwts|~rqvkhffkee_alront`\`jpw{sokud\da^abZ^ihfgcljol^Yca`_Z]^YZakmurug``hjirmahojnvy|�sytwpjuonorprhw~���}y|z}��|nsuvwz{}}{t�xz�yWMVkhFGDUR_]cmieoxy}soqors�xjNTim{�idXPZ[^\PKW_VUZVXUgv|`L^icjdURTSWPNNLWmfeiivo[ROVR\_od_RXP]\WPMQ`lUWgci|u}�zyv��o��r�yuy�yldb]\_]dbbbeibgmtz~vms|aYYmmlki\TaYJRLa`_XWY[dPa]P^]YJbf^QOW\Yi\hvuktoppegrcdgtfguwz�~jnvqlqy�wvqlvwnaij`m`hedabgiX_c]a[IRQ[QZ[\FSSYMMVX^^TPW_kgmkc_]aVOIKPkjk`hj_hbjid`YU\j]^Zaidhld^VWVZZZ\S[bghfec`fbc\^SU\VWZba^a\Uc�}g^\g``amg_QVacoqvoZ]_[[]S`[ZVXdmlprf`NPUdea_fd]bebjqllllllllO^^SWcbXPScfc\b___ca^XPUWZog`[PKFTZ]US\b]cjdfiinnifm`dop_fqmrqz{tzkgqvopljpryz{trz�~}g��}xqmkfejfuojad[[dkrvp_]mYTY`^^`_RVgU[][TXZ\Z[[V[Qcjg]jb^amcPN\bVipxz~yyzmy|�xtuvryqpn~�ywmqrrvirpxz�wyptm~�vyznloxklqtprw��}t|}qc^-4FB3),5677Ehlrt~y�����y�o�yee^gp��ulfeeitmt}�vi\U`\Yf�x][`Y\cYUUKQ@GQCA_ZXTK_defQNKPO`mk^eZ_a_^U]quzplov~�~~~}y}ovqeUorhTi^[de^[`Y\Z\`jo^\V^crxgf\RXU[a]bnhhlZXmfadba^]gobheWhacVSb][^akng_Zguksulypqvchc_cfdekkfda_bkgez�}qikwmXswn`aamnoqobdaX[^\R9?8BShXSdZ`ec_^dga]`f[bijjbZSIJIDUZV[W`SX[d]eclfZb[gttuhm`]h]TQ^b\[UIKQX\\[\\T[T`\T[Z\VSSX[OJOW\ZSS]R\Ua[T=DBBK\hha^^XTaWd\b_V_Y[cWY`V]W[k{yxcbtpjskllllllllVTOR[\UQKJ[]XS\YNT__ZRNZTVc_XUNKPVYY]Y`\Zdkelmkgfbftmssicfllsvzypsmoosl{volhmxzkhokqiYcjh_^gk]R^\qwqjh]eboppvocb\ZROVLBPMOSTQTLUYV`]YVTcfQ]oXeffelilgaklnq�}�mw�x}y�}vzz�zykednw~zsqcXaebu���yvdyqwqagofpq{rx�{ex|��{gium\SblIIG($&&-$ Pbfl}nl�ulqknmhusovijes�h_^\adxvz���}p`bYJNpgVbn`Y]``[U_RMQDG\dah\babgahprqffalzwpruqu�vwwq^l|n�f]Zdissc{pmeiX_aQOfljitrbTOFMacVXZRXe]U\FG\aTPab_YSLYj[\`XXZ^f[qomkvffbeiPOVSbilgNOmqsfbknpr`ehftma\mgdmlmnhWSctsrxpxxvegmveZafnxvxxdYlYMT?<IC7:Tm[`CGaT]gqthbjmdgio|udkippmnnimdqjgsvmm^lreh[cjmvpn`[eb`bfXIQYX_hlib`aZQX`U``big]\dop^XXQaod]^V\RXQOIQXYQSTTVJWT^^\UPR\SVMRYMV`a^Mq��ͩdabc`]llllllllgfed_TS\WQ[ZUPWRILVY[[]l``[^VNOIU[dhsmskdjldnnqjkfirluqjjvo_Zqzxwuorpn\]QTUSVjna_YM[]S\m`[[\b_U]NYaX[RMVTjv|��ogg`YZfaUdkh`d^gfwbJFIPX^phPWl]lfalsha[\q|ffgbognxyqcrjglnlihgflcetkelquzxv������{|yrlju}�������t~��xeS[mxt|wzt]^iVE?-,+'FVW^cZRjqlj`keps{lz�phom^PPZecmftvkW\mlizqhe_YiaW^[ZY]e`mTO[fTa`d[U^ge`][eon`bd`pjnx�tnwq`[nbcnqyW[]mlUH>Oq�NSURP=>`ijiruqrpVGRWJ\a]WX_YUXC?GWjbmnf^[^aZY[RTYcjYNXbs}wurceS]fX[`^fmd[]shoW^jrbd~��f^ViX`^Y\dbbjn_V]sf^g}�zvthi^q{}ymKB^bGQN`oiYI6II^vZputnpwurwmmtpmqh]U[]dpty�vt��pzxvsgswhmcd`in`Q_`[WUNG7.:B@KVZZZ]_^b`eTWcfYWT\digVWZN]k]UT\\[^RLVWLYRLKS`RU[ndg[^`RTPLRTQYXKY�����꽊_]bWjllllllll]injc\\bXSZYYU[XZUSRX^_ihh^h`X[T]cjii_ijinpkuovq}}��vxsxptg\P[XTYa`cil\LM]ee`jkd\^_]kejc[YgjcO688>GL^\ad^hmmjejyjX_hkmfhegicgknhF0&(4GWbZhb`frVE[gQ47Ro}goZVXjfpveQZU]dnbjrtpwiqjppf^guxpvmwoxx|pvqptsohejr^UTTLm~�}wnx�|v`rweo{�u{t`QTK]]bcT^\\ZZONW]ghc\\ebY_]_SR[ZRXSl^Z\dmhedebag[`PO^^\TS[NbXXjqP[Zad_dges�uq^aMljc`cmz�maflk`x{oh~j_e[abI@DBMiZ[RO?/4Y_dageed`RMWVPTT``[c\VUDLLEl~ijmqm_Zbd`^]Y`lfXY^sthfiwtccaT[e\Y]VVRURlMYdrk`p�q]fco_uuichipufb^`h`\cq�nmmtutrlvxnbillfni_ibWUVWTYcgesj`dx�vgikvxvl_^acTV]YV^\[af^b\YbfokYbfe^_]RAVZR[ULIHJQKZZWQNMLKYdZWgXXUXVV]\Y[TU^TYaXX\XRTa]PSC?TTMINVLS]d\Z]^eZd[X[Z`bUx���������}fnoollllllllT^`[Z[WUTTYVXT\_ib[RTXW]XSP[X\`e]YV[a^hk_aehwioidny{rmammebf]H73H]fgigohdlhf\_`hbhkdlnrfg]idN9,:=B=@DBHANKSch]UV^N[bcf\_hhk\a_]@%$!7KEIUINZH*&BA=G[\]ym{oqno]c`UEHCUUbdlxqmodpkieYT^b^^a^k^W\kkxz}{z��jaT5,''(K^lz�pstnrm][RWfp^_V`]ljYLUVQ`d]f`SQS[TWWd]^qbmkjhhl]W_`t^bmpsuwouqxt_dlrmjh`]_Z__WdmjmkqjgnyuhjlsgdTgjnT]etvj]bab_n`ec^P_\Y[]Z]bSLVWVUYZF>TJOPWOSQTY]a[fTKT]a`V_`XZaXqtfUVdie\Vbcj^NWde^TT_bV^bimmjhjr{`bhZ^TL`k[ehtumj�ldcbbTbejhihgja^_]X^adcmwfhciitsbc`]^h`Z`hhclmcQkihkZeNQ\ejt�zg_bb\^YTbc_Q``]TQJGEJ[TTPKNMWPTVMIIPSTXPJU_cb[Zdbd\TOJDBBY[PMidXQXWV[YV_a^e^Z^^_aWVPdcUNDFTWXWONRac\VV_X^Xb\X[`hggsvdx��je]|�jeoVllllllll]_XRTZ_dfgi_]V^fggdYVXX`SP]ibbU[Y\]`XNYegabj�pvogjf^VUNetkia^Tansqglxnvhgicligbokb^pg`]grp|fG?;EZcPM@DSNPGIQRMLONOVXghYkhZN=?FK1&@?* 3@$3A<DQH@7ONVk�rSebbcj}jakz}z{t�w}onxmxvefacjf\Z[_q]N]ibXOh|��xr��rm_<0.=S]TGWaIWgYged|pqd[fbmcg`m]MSTcfdfrcealuio�{ocm_{ajoqznqsopnytkwxibhWbg`[_�promsh^afbrmnalofjcc\HS]ddV]USQXbev||rxt~ablzmm^mRXYMKKYlVNUD2V^NFT>;35BIIOW_q}t_ZTWe_Ub]\OVO^]o]_`Y`aTO^rbUo~bcdU^_k[ggeXVhx�]Tlc[\X_k[X[bcrzze`P`leaW_TXec]SaTOQ^OLSR`Yahd^gg^VIJBNYQISQR:GILZ`WNcRepl]HGaodVkmXOJHY_UKd]]RDUTOG^DKL<EJbTAEITYXK[_VNW^a_USZXXRRWULLS]SMJK_[_TRQWUSXVfhaX\aZSUdVeWIHWLMOUVD>OSNII[e^_`fbXYbgdjVVOPSVOY^NWZSccllllllllZXAO]dejbfdb]appadpleebjcgbezqlicbjniikf^cir��tp}qjusknonmfb`buvohiken]bqipc`Y`yz}o^VWcxw|o[VTS[`Y\]fpTIZURPWPTV_XX_cdba_SKQMN;Con^ZNB9<?:RSgmgkb[lo�rlahtejrbpsjlnkotn`\fq�^L^f_con\d\U_bZmXPUIMd_`ee]XniRaSQPZZeYYiZY]W^g]b[WWdjsfggcox^L\h\{��qrw{}�ytsuf`nnpc^iqq�ujw|ydSXWW\O?T^JKS_cZ\QU__[Zd_lPFI[]uue\SSs`cx}\hxfbistigaxe]]_`gce^epXDSa^JI?O\ud`npcZQKVP[Yd_Y_eb\CRc\LS`_Kb_^U[e_bYTJUTly`\t��zhdueZn[kienhdcrr[\adZNibcUZWZlX\Z\de]iQRU\mjk^WOVZYWDUbhejbldnijheaVIL\^RZ]FQWNKLdeVc`fldUKa^^egkiYYU=8@FZdlkXRKgmtpZ\Y]KJLHFN`\_Wedb_dbdcVQa_E1<GKIKR[]YPXOL[SS\addUWM\]UWOSh]V\Z\aiZ\YQNQLOYQSIJ<<<AB@@>@68D9::DGPXONRNBJV\cYSSSSSSSS^]][d`il_cgprrywseekpzpkovtrxvpaxvwvrureqvx~�|xe|�vklojbprl^Xajhk_iaaeb]dpfd\fgfsoogdffkvmoj`]_gg^PY^\`PSW[OPS_\X`]XYW\YW]\Zb`fZho}gVWP>JRiiqobgkjphnbXP[c]`_adaghghhgf^[\j]QF]W]]e]\npmZc^bg[V]X]MO_lkJX\YeUSSa_hb]bevnfbju|swpyx||nqusrmbZZXjnlY]mxomlohdaSR`W`wf]eovoslrj^d]`Y\TJQO@FBQLWRnca]akm}t[Ud]g`X[[^vnt}�vwd]^_ekjmmlcded_Y_fbbnhVOWNPPIh|omd]llXSTUQVUa^XUZ\XUUcY[ZIQJRW\]]llfYMNebaeeuquvwzu��\S`_[WY^\_hz�{xkoceieb_[]ZMZbdXXX^T]abhivrhbe\QVTbcf\c]icxzpklpmaVQYW\[TS\\WY\WNepn``TR^c\\eh[QW_a^^Y\cicY]XUfiuxcmnp{vdZZelkZ\\adil^\aba\SAEJRSX[_jiODVQR_TWWOKCOSPPQWORfeRZ[PW\UU]WQRQSZRZZ^\TOHMWZP\@CA=A943DTNMROOTMIXdSSSSSSSSabqiebii`gmxxpstk^aogeO@isrpipnY`lvtmopidjkw|~wjkt{sjrytmfceadkgmerighebgnjjglmbqkjhoti`korihomkcd]edX]__RSQ\ejVO]jeihqdT_hjpnvrtgxjaiouuqvsqk]LZemnroaT`][\W_EU^_drn[Rf_ZgYc^mfdWc]``XUYcdbcXX*3?15Pmwmmrwradhtv{ysq~�xglpturlnxniq^efioxla^bi\\OTcn_X]iiail[Sgs�f_sw�w�une_ibQ\^\^iiVK[VPbNo����x��zzehhhgclbkuzjc[edlnmmnwWerjc^ajkh\QYp{vrklqwwtmdoZSTOZWSVWY_X^c[^[[Xb_ih_aV[]Tjfea^^C?WZSPRyxobf^dh\^cY`[[Ye]ZVcf_aajusi_`UZg^djlmbnqbXhlmoksogd]RQ[YdjVM`\\Qhotf`dhli]Tbfjs__^flklVVdjol]bhvgZcgZY\epdkqlkiahw^XfWXbM_Of}racc`osfedd`[a_\ZZ\[WILQ_\b_`_\j[TXbV[UYW^YPGYTX]WWdi\TRL\VKGR[\ZSOTSONIQV_Vak_Y`[WUKNWXQUVQZkTMOOQRSSSSSSSSSdcljZjjiitwwmadi]Zer]_cfs{ssimj_efghjmg\]`]mx��}ru{pcekoiZ_fif_giloqrn|ojgkuuppejmspqulfehh\]lqsgvohc\`frnghaosp`hrdeafMQYede_ef^be]bf`n}wqnia]WgjeVLJ_bhbJJRmyP=BI[nhPAGMTYZb]bimbaQJW__hhfG:SB(+*)Bl��ov������xwoq��~htwqeztho\j]VZVaUk��jlllkikdbbiadeirfbtoXRgyiZaieku}{slkomq``lottvj�jf�{�����������ushdirfei^OW_`Y^`]UR[FQmehw�nmrqkx�ysbck^Ytlk\ZSK^g]}zfdqye]dlclkmjt~pWOPH_cnbDXae_eko^pu|pmaUQSWO[cdijma\_lbl`cR^WPPYRZ[EJspoorprxppw�|wuv�zpj`NIO[P^_WWiovmmnlqp`mvijnVUTjjmwnlfb_caaTadebc`a`iqbE^aaa]hnnrbSNQKKNVc]VYYWV`^aa\WSllaXdsqfPQUbZ``bPM`Vik_TYRYQWYWZcVRLEGLRP]]IZk]DLWUTTUWT^\QLRXOY[X]SMEW]Z_XQVPJUVGIEFTTRSSSSSSSSbbhcXkihjzzri`abljqw`bqyvzkomdZ_^dgc[V\fe`TX\cXYe__^dhgsmaiaefQ`Y[[]^\k]ga`jmowsfmtjadglkchkfbdppud[fkiicompmts|wkg_hfsgfhroqknpt�x`lmV[YUTTTSXcXA3/AGQIkg_Z>OR>G?EPNC8-!>F=9BEZ_sui[Obhl~\98#5;&/2=Vr{vjn|�|}�vWadk}{x\lcfZ`nyvTYQWQ^kYZpr[^^b^db`\W^Ya\]d\NVeh]Zgg]lyw�w}zzug{~oex}szult{v{y��������xymhq�|���caZcmc^WVXSSZTjcaosafbheZgxttxhcZabrfZ`dZ^hdUWQaidef�}r^_^lshtiaGCC;HRYOe_aa[Ub^cfxqnmi`[`OW`__gb\^qvw�lh_cUQYUNRL=?IHL`abtri_ahhbbbgruojifgaUZaUa^Yfdihcjk\bnjztfhf[jwhV`WW^YXXR]ilXNRKDPRJP^Z[\UTMUfY[UV`io[Z`\TQ\X]ZXUMTZX\__[Yfzumhi[^_\VJ]MNHM]XR[RVSMMNTbe_WJDPSRSRRISLYVMJNVWS_fZ^[X`dVL`t�mUX[RRYOHRQNVNDIJPSSSSSSSSZerhkf`X\nlhjgc^gbhrqorgacXerg]fa_]cljfh[XRS\`Yb`[chmg[ihZdbc\GX\S^TV^^Zcc[^boxtkijegonphbr{tnklwpbcuvlm_gkguoos~fZ[dZfjmhqqxtuxnujZbh^^\QFFNVMO8*,#)1EUVSirG;,#9)*0,-2/$MSROf`u�{X214B4BIK%$&&2>O^i~xtpmpwiXbnqtag`droYZ_fx[MJZP^h^U[ecXUb]NLSZ_kZUXomgighiiWileo^[mdgY`nfxll_exsc[g]b`hZ]p{��xgZ_xmmiqppp|���|yokpnol^`leX_mplwlaq[bmkhtuckyhe]aPXQM_[Ncm\\SFNLC@IfjafZV\kt�qypc^UQZ\GW^nnk_lrhdladsbS_wiswgYioohstv�ebndYM^YVXNFA?BD^\^fP^RNLRRODMSX[^gel^W^_P]WY_^^ZZfmedh^cRQ]e\VZUXl[Z�m`cjwvk[R`]SVMHYQLSVYht[fijWPPnmba^Z[Y\`edaaU\b^LHV_ZX]i_ab^aaQSRQ8HNHSb[\V\WRLOUVND==?4:50+41@=IJHKU^V[_dUXRRVVRYdz�p\V_YXYMGIHEKQROP]SSSSSSSSShlhwi\UTb]\db_]bYVU`]cS`_[g{xuqflorxtoqcbbZ`\Xge`txnf_jdPWe_X[b]UbY]hkhmpbghsqnohhkw���obihdw�{~vopvndjrstgd]hrkgadibjpoenoyvrt���spoqqjfYW^gB-#52395>:'B]_]R>55;FR_ghavwuv\GOM3,=EB,!@M-4/*'00JZ^[^^_glteg`YRY`peprikvlQOTZdNMRR^a_Y[[cZTYTDIVY^p[M\xkhtvnQTY`]W_VbYlsfa_V\\meYr�qjztmpil�izpwig^PZdotxtxv��{|}�~wvjt|rntldx�ww�t^nx��}~�}gidSVUQJYZQW_TWfboe^e]VQdbcSTHCJWbiMKUNM[WNVZc[is_fw|hfpstUIWJZbjo\O\li\Z[\b[ag`^^iZIDCGGbgdk\Y]ZaXYX`_aZRMMSRTKWRGOGCNVeSY\ZY[YRWZY[Z[]`[OMVXXJSfcVVM[WTWXhrlc]WVFHJAF\qd`^a_iZn]bdTN[\TNW]alfkgqdfpoiebphqif`\@;@^XHVbWBQbdaQSPNPC=2"$$(! %'#&?00/:NegMYNLLRTPTc^hPXNW]ej[QSUUK[cac`QScSSSSSSSSRhU\prdgZdZW\WY`fgeZd]ohmebcprs^__XYioieabh\c\]rweusbfhlteai^l�~{|zwvu�w~|ivvynrsqrkejs��qljbq}vungdhiirWSVc[`ZR[rsmmszzvippxrikj�yprkmqpzofexYHED`[ZdUZ_]��}�ji�����xxi`ZK_lY310$GdbE)2/+6>FNLej^V\]aotxTTZnhqhkat�unpvki]_bOPCP___UdT[hi_^bjraYqf\fkZcly�kfggleXQ[gci`DEVU^}}jhc^d[dzqon��r�qvwmtprljp�yvv�o|j}oxpw�sljrhdalZX[Jhrmcgle\eFO\JK]VafYZ[_gNVlqbazfSLND=KPcc_\XRMNRVZZfd^Wbda[We`a_gy~nrf^P_WRTVaYZ_\dbIKDZclwq]SROMZVVgji_l`SVWWLXa]bb]V^PSZHVR`XNNP^ecb`]\[XVPfg_]f\L^mpd]YcVZKc]^^__i`RYWUGSQCDGIU\NJG\U\QNRLIRVYRWWUfiofremypeeg\Xh]]ZbJHQ]ffVJ[W[UOK@LK??1>B5857DL72*,65;<84DRD@8@UVUJO_[\SNG<EB`bTHMXO[iZTVJHJSSSSSSSSWkVQ\]cgfYR[]]inmlnjaeopjmbbilgR_\OSagg`ddhbnijyw{�r|��tiinQTlursxuwho�gmhlxs|zrw{tx�qs�~�szotnrssjkoswrVX\ty|sg^xx~rzrznnqpvrympkrx��rvv�tuuvu}u�|~��x|�wyv�x�st��mslzhhh[qte\S=IbW02:FO&//?IUOSeihmg`agjq\W]aZi]_YagZoal`fzai\ePR[TY]`b^k^Yledu`Z\Y[djdopkn~yxwV>7-36ITQWKS^iPWqgepbb^iod~�|pp�ozrkrt}s�~uv}lgmtfni]lnyzmlsrjrildb_bd_UkPW_lfMPbCSbZTPUBWYUN@ZVDEU]QG@@WW_f``X[`\\_\TQj|scamb`^aeSUMbbU`i`NVVLW`PKTZNRe`VPFU`mcbb_OVafRSZT[YUR[JXVZcW`]ZV`VJTUed^POSTWe[TNONQ]^TfsNabe_glehhmVJFM^_WV\TT\[[\ZUSZ^TE>IMOPSaZU]QSJPd]Y\_WadYL\NX]`\WZUTUTSVU`NMY\^QWETZ]PVQ[QTEA95MVcm\\QIAF>GUVNYZYK@:6<C>CV\YHFTSUHCN@EJBGQJLLJMSSRTMMMMMMMM\lc]fUYV`ha]_ajoqoljinroo~vigpp\^fOMiniq|px��������nFAGQfpffdad]gzhiejz{�xnoprnh^jkluwzknluym_ipqqpku}~worl_qophhgnsquosuzuxxwz}�xyk���{}xx{tf[b\`mtb]fsq}�nqv}~s|pmxutel^eof^hnvi`hdl`f[[fcc]aaX_nda\XVZIQLNeSZU[RVbolVY]h`SecY`Vm_[_[ghimfgYYQFPRUZrr_\]mtkG6=8.<1Xskigru}qaqw��}woxpkyymofsbiwvg^zy�wwyvkforqrpctvtu||vtrvsndU^ifb\t`ghnu{p^OQRVCHVLTRMJ=HFGKHKO?8CHMX\UBDNQE>ADKDLV\^URHTUQ^b\<OYVVccZQK>=HGFJDPNNK]cMO^eVSX\V`^c\a`QRW^VQGWSZU\]]d_`[^WY\TYW[aa^e^VSV]]]nj]fVW^[U`^LP]kYRQWghaK`[PV^[LVVWXYXODHO[bYTGJOVaVTaWJ\\S^geSSDJURQGTRSPQLTQWLJXTUJJITRUNZXaUVPGIJTU^^b_`\JJFIOV[XXN=1+;.7AKnnW[Y]Y_XMXXSWIPQTSOHGMNLOMMMMMMMMiprmweqom}vnnhgkqwzz}|tloyxusuyswsdcnsurmopou�ztnwvouplodSLhui\\_^bd`eopolitsvswqruojcojow��|}���{ituumiu{�~ygmkougjenu~}�|���}���{~xrj{}vpvuwtk_[a`\]loiuwy}uon|�t{v|�tj_�s_Y[fvrul_UMT[ce`^\bY^bLFZZ\eb[aOZJ@MP\_fcghaggd\WWO\jf]`fbZa]gqqe_k]toap�}ikVglxYhKGLS\lXo�ulnyw~�rp����uhuf^UYmbr{q}�iaTvxoijpjYW]\echbnbbikoofg``beU]biec}lfadYodHFCAKZOLINVRMF@5EUI=>9?YJFNVVE?>>:;CIBHRTY^]doiecc[\TW_`[ilmZbXIMQJNGWYRMfnUVZd^]ad_h^\\cbVU`a]VS_`un[VQXTTQWb^[XbXUXSW]ZSURVYZ`b]_UVYXWcdVYdb^ec^gnp``Z_c`eka^ZQMX^VRW_cUI53)GcbUVZXloje\WLGRQJSACFGF?D;JCA=.=;DHGSXVZWaWTED`QUXUS\Y[S\\FGJLMSV6,$ &*)!,$5w�f]J[da[WXcef`\PMHNU_f]MEMMMMMMMMnnpjnfurfjlwxlkpu~{vz}|}ccgotttsunhlnrwmmc]_]igj`jbbj`YdcpankXTaefmjhdZakkiqnps�zzpluebfhh��vvvx��rsmrkbiehv�wtpz�y���������}|ys{ssps~zmgkkgijvvujmnxpkn~nnkz�wzuqus~yqqala[dfcruqlrznlc[Z`^bgahkWWnv^[YhcYW]RZmjidbtvb^hpsr^Vpdmbe[c^d^fkeUeliuwnk{l~piug]ah^ok~��}kmqpsxylvvi}_gxwz�iksfeajadyz_k]pm^]Xbefjfae`iofk`^cM9CG==L\]hjf`^sg^`j^jmbSLFCFDJUPVVWV^WY^QJNJP[RT[[MD><>@ADGPIFIWZRV_cie\\ede`[bideRddYY\P]eMRRNXYIKYc_YZ^dribacghcgXWZZ^]fX^[NKKWRKUOYggMLSMZ_]WaVT\^SSZUOTU[Z[dhgegfqmbdhkki\WUU^bc[ZXRZc]ZYWYZcSJELV`]X\[^ei_LJORKGB]VZOGH;A6JDCH;?BGOFHB>EMYPMHL\TUVRRXYJBJMFO[dWSO.%$.')KVTDZ\NQ\bai\e`dWJOT[]SC<MMMMMMMM`dda_bha^Ydwodmo`hd_hsuvadfeiojbgl^]jgchhimqjphhtuagznaeoqbgWYopuw|y�owgecsz}w|xyss|omnnf�}~titwntx��~mmfx��rhm|{�|�v}�r��pqkovrsxt|l_s}����hfjmqhdactjkp~~tssibclf��si{x_\[VGRW]dijanocP[\afpzpam~�u`_i]]PXVcja`e\jjwcip\kk_�acf\_]]b]ikgfij`ax{yu~vpfkuniue_umtu{Z\prqnssrxs�_fpxplgmzpzjbWZSXXjbfcVZR[]bndaWNTbmblzdSMNI@C;?MV^ZRVRS\akojcGJIHLKLXKHA>ZZIEPPJEHWPWTVWFOSVRH@DNKIGDLPNXZ[baV\fhcYPhmln`^^ZZ`P\jSOUZ[[XWU`ggkgfiecc^bkdZs`YMTcdZ]`UPQc]PVP`n`IVZX^\[ZbY]ZWSVTPZ\_fbW]hbX[RX[\b_YTWROOT^[HAM\[VTLZVRSYjedg\TZYRSPJJUVOPZZhPII]WcZ]NPFYWXbIDPKOGA=;;?F;977CHFFHHEKA:97=CHSK?;325'$%BZS\\[[T`WaRQP^LKLJIIC?@MMMMMMMM`efdbmpja`knUPb^_c`^kxukiccbb`Z]fcXWYW[[hqrebqqpqwefsaXYeiamll|{���������qalw�xrqs��zx�xjdw{��{ispn|���wu�{}{|mdrylnYxmnubu{momv}vv~}hyw~���scsx�nf^`juy�xqtq{g]TYjr���lrveQ]WP[Tcp]mbiloejYfep|phplxycjcgqg{gqqtevjpih\fih|yzunljkiwsrfnllss|hbsnxblsdp�yvt|vwnlip]hsigagjhhkoaglsSJb^UXyordZPPNZYZ]TVSUMOa\cVLR`c[Tg}nUTTSLN89ELOUQMSX\RVi`YITNUi_OWSURNPPHEB?MYMg]h\Ye_]dd]PEGUORTS`nf]gageT\fmgaObbhrjjba\gWUZRKU`\`jkb`]W^^__V[\VU`dZ_X_Zcqf\V\Z[Yb\V[SU[XR`RTOLQWWP]\_\^kfil^df`acVOSGLPSZWRNNIIEFMGJBIX[XWTMKLOFHHW]XYZUPJDA?LPKDHGLCCJ_X]W\UWW^WRTUM[QKD@CD;9=;BD>HOHILLGRZUQHPOIMSC97.2#&(!,8)%+@TOSV^ZJJISUCMUJOQPNLGDGMMMMMMMMjkmehlrrrz�x`_pmjdZRZlpefUW`aZTaaWY`UT]Q^kn\^bZT[okeiPTY^b\bmgqx|xuko��lkhsurst��tkvhefov��~qsmklte`RUfdbc]XYLS__gRupx}mloqjmxien�y�zfsz���~lml�rqtzp}r�ngr�jnhev��ot}��hvvmrgp~m{iTT_o{lzpmtu{mr[dk����qu�vZ^RclbYVDPUisWckbf[jcjekqxxkxqmjfofpvd��}y^rhcdhkcfbpp_c[bdc^`b]dkr`W_]^`Ojq[R_aSRKITSPPSJP]ZdaVcl_JHYfnl^`YZ^L?DDGOW\e\_[N_^cc^HF`bVWPIIIaRKVXU_cWdWZ`endjopqqf]_U_micfc_^eqj^wxk}}lqh_mhh_gXb\VWTU[a`cmocbe`e`]WZ[Y]XXd_Y__[bkf^TYX\Y^WTZ\RR_bpgXUSQ^[RZcffdvqhiccfdkna^`VZWOQRSOWUML[dTWSMIKSVTTW]aUSRbfPNS[\I>MOZVPFMOLPANQQFIRRR[UMF@QNZ^[X[[\PKHGRYQNJFPLIGRYV\TZ_baZLF@58>=>3+AOO[aZ[^f[XY`HJMWN_dbYXSNKECFMMMMMMMM``ldoiquv~ugaijnje]^othdW[ajnehjompm]SL`dcT_\UVOhniq[gjn^cgdfxlvh\fpfevzz||gktw|}���ljkhonqy{zthggYfetkaccfoXU\c`XieZqqtztdg{lux�ssq�dkjg�~kvvvnvlekmr���w��tn}�puss|�||rnrzpvfugmi_`bzlYhceovefhr}��mlVSVcgojbabuSR]_SX`NRVTTbco^ns[a_S`\W\_OYU^[Q^Xdjj[wsu\\V`X^WYoT[ZecSXHJe[USeT[_^ZUKFVu\Vcpq`VVNC9ITMOWYgg`ceXkongb_afbWQFPWL8EFUPXa`?FYt`V^f\PKaaSXb^]Ypdgvrlsq`ieXclytpnggnkglnadhhkfatUMTN[SKfor}vXeddaqTTUTZZ_YYab_ZZ]d]_Y_^b]Yi`LRJPZMP]b\LNRLMNYRIK\NDJBZmkohTad]YJA\^X[eYYNE<J\URWKMMJQTV_SS`^V[[irjYX`]QORTUPWPRjD>CLQJUQPSHE@ILX`X_ofXUYXP\QQTO\Z\if[ZK\SPGALYSWKQi]TPVNGUMLXhcQJU^ddfLGMWPW`ZmjXXg_XOTVXZ^dbUIFJMPTPJKMMMMMMMM`ekYZgcoonmbdcYec[a_[dfbbjhbmfigirigvn]chd\`]d_^hs|r�wqxvmiojv�}ufjtrnpx�k`mmrq~y}pt{r}�z}}��tlqqjo_y�uymsmsb\l|jyrrty����dixgp}z�xlcnjly�}rzyrrwxhmv���~{��wpqnqqodpuyx�|ttr|yz�t�zn{pu~vigimzu�rprjgijXgxshqov|]T[fZUDRUbtonnjcdryfg^XVTY]_RWIX`a^_QMSTYNE]aSN]NP]^]\V\WMVX[W]eeW^UXdXRQ`�ubmpb\sjYMOSckPgZjlj^_`lcoskhf_bYUN^UWTIFXR[\SL^Y`c\[_m^VT_kvc]wtwnag�nqkjn�lfk^Wkphnzyqpiacgbleiti`lRQ^OGIFGNYSjbqnm`qsy|yp^r�a[[eucMYP_agMYd`YXPLL\K\PXZPXIT]Va[ZFEMUKLUWDAMT^^RZSYZeaV^aZQP``d`OXPBOXXYT]_ZYUVddaa_lwmgk]XZTiY\FGIPMb[KORHEPSOYSV[]XXfSYV`Z^aV`V^YaWT@VSHh\X]QUdn\f[WMO\RVYSZQPGDP]ba`TY\YT_\^VRSWPFIVXP^\LUYEMORFJXPHVVVVVVVVb`ggjkdlidddipja_ae`\WORcjnsxfeglsx{zoc_bbgf`_gkmlvnvhlr�yrlvtpmylbjm_`qojjaiggamkp\ljgmx{v|���psmry~vnt�~v~~|trx���������xffjhhlluqeewprumbowywkmjb_k~krdgofpkgcgysNN^trqvu�shlfeotvb]ZT]kuog\}ocvvmtp�}zq{xkdosstfaZ[dcTPGJ\bkaUYRpzV_cKMNU^hv`Ycv__`edefc_pcTUXJETX\YT[dfe_XRaodcaO[RVbYq�laltg`sh]bh^eqghSNPU[[Xeinli``R`aq]ZjieaR\^bbRO[Y\c_Zeqhi`gktcRepmnhjyw{yhaxgOPXReqq~{rpo]Ycdgjjn{rnpglkthe^^_`e\RIFEV`iejipyshmmgfkd\YaafV^d^el^XZ`T_UUUJODNbgcXW[WixkdVbYLJP]eVSWZ[JU]Oadc_nmdUQ]RI[[NSMSRR]ZP_SHWbXXdaQofQX]^]SUPWUcTOKJKKIGJDWEbRTXUTLE[\XZ^VS\WaZZLMV[^X[afndY\b\SWY^WTNKWWPQMHFIJGNLRQQOLOPLLONIGJXS[_SVb\RX]QPZSJVVVVVVVViemwyvtveZZeinn]TXRHKIFXXZbv�tsvpp~�vpehdrw{sulf[ec`S_dc\eck_FBS_RS_\^bfoo_fd`U[awo~sjmsykoy����}v�|�vy�tw�������������~��|vmYblzsqtqdrqoecypnnilZZ^fe^L0+47\ecXI\M"!6LafnfyoUU_ULZsieeeg^cbhe|yuq|y�q�tiZ^YVYYYZjl]MS\VH722?JE^IPMOFAa\EcihhiRaTHRhh]]h_hklZRPFWYSJUYbcc\]_YYULR[T]XbdLPK_JGZe_\RVoaZljh~e]misiWJV`VXb]tahk��ydsSE_doketnk`Vc]ihgv�{sznkifqsirvpimd`acfVJ:>EddZouorlhhrge]_`sqqejp^zqnjppfnE?;6<3;@KEBViV[V^WD7[zojXgf^eh\ddnedb_hgcMYx{qpynZ\qaip]LS[YJM\ekka[XZLSTYbeqb[\VYTZNK\YOHMXMEWb\b[CCWUVedxj^eZWDP]TVPeeNIISUE>JNJDiVXEIFIDMKMVdg]WOcgkbfR[freSgfUIRWNKXdZURJJONNY\SMOMFEGOCHLJC:DMNKNV\`khaZ`k\WUWRORRWVVVVVVVVdgkqoqzvc[Vfidjd^e`RSSP]vniu�zrj|svzrkdXbOMS_gqli^bebR]d`Uaicf^Udt\NQT[QT^RV]e_adhux}|lqywdw��ywx�w{lrxzxpqvv�s|ljirw{rjv{y{sWgpkrnsmfZopmss{pq|efjj_`jj=)#8Om{vV+0# A[m``cW_\V]kve_WX^\lnndmgkr�v�{agi[]ZWQHHQcfYPUMS]W\RKM[[Kc_UNDMNYukbaYG^ZdmibPJYPgr~sgVL\a\SQOcfiXNLHURZcab[[��jlO[UTFY\b]XcUXfcefXP]|nrih\bd_db`^UP`t�td{kXmrxso|taVTfggW[XX\ixyvqpqdRWihepgiZaVOU@9:>QTWS7JWKW`URQUUS^i]ir^th`W`fm�bQMJdaodaSIUPGI\ca]QTZOfhhaaisnsgdZWZW^^ZTfj]cz�mZavkdigZUUYPT_XVWXOYY`fjaa`vfV][cSUQOVSWTRYPMadU`k]MZ[\Y`\OWT\cajcZShccVUTP`m^RWF<ER\ul]STINXb[XfaXPce]QjZ`emqalheiYYKW]_QUXQPOKTZ]YVTOIPRRIJSMFQ_cVIIOR]\SejW\_LOUWY\]^VVVVVVVVffgkilsmbg_fokopw��qfimvwtkq{��osmfde_[]_VT\Y]`dgb[`cT[d^`\lepnbgiZZYWbW^aOZehcgvorruwhhqwj|ztk}z}��~~��zswkxtkmkfadcwkrqvstxfx{nskmjhgnwlxvorqp^^jgd_ihEBIU]cagVA@'/F;86<C;.Ph`am{x~rjkinlpjeZEFM_qppmltp^_^]SPRYUPX]QRZdYZMIOQSISOCTbPQ`X[__]^\_T\bTXOWJXT\`dYOSbdf_[b^b]\`^jedlbkT=kpwj`Ua\Q^`_]Z\OSXP[YSU^ekqjj\`c`[^kdcVZ\oih{�}���~vsm[YWUGWVPVc[gZSRMV`UXHSMPWU`haZZXLM5HNFBcLUYRhg[OPIDU_bS\ov�aZORWjtshZ^hvuhLMVgjhTWVXROQbgld[Ybejhn`ic]gZZTORWLKajlRR[^bPQ\TJM\\XUJKLRR]QLSaY][nfdncaQY][YS[]X]YW`ZLRY]WVP\ZOUTW`kYYXoxhgTSL]_T_uvj^`J;P`Xl_[bciig]dbig_jnlh_QPb^f^Y_elUSPaaTO]bVSVZXSSWTNPWeWO\UTDMFKIBDKI?HMDW_JLSR^ddee]NVVVVVVVVpghprnkjeridpvtor~�rcgrjqkjgtufejffomo~qoiod`]hmodjqiryx�t~��vn}ynkdemahh^_mcdZb]m]`Z]a\qu��vo|d|yy|r�yr�|�{ztviejfymhejkkr^c`ckZV\[_pylvyjfHHT^c`rj`ibYg_T]QNP]X-$0%*1>:=3Pecgu|y���fl}�vi]PU^WSWjqv~ysztrgg[XLGSbUV\abe]XUScXQ\UYniame|lQVQbgSONNRVVRc^][`a\Thjj_U\W_ddhkor_jeus_aht^bi^Zab^IOX\VU\LLLT]imp~zl_jwr[_cdcmlqXZ^Ze[avukbY]Z`RKJKGPOKSWQSNG]n`hTULPSRbb`YW^_gW^ijVnSRTF^ZVNOD=OUVJM_vrcdc[Wf`oc]\]Vbh_N_ctmUOW\[ebfgaaYeyyla`Ygd[`MLKIRWRT^T[\`f^kVLN?DCPWUTTXUWZ]jhmd[WO``gpn]Taa]a_`\WZWSUUZWUa^VNdaTPRF*NqNTimYa`i`_la]gnpd`a[]`jrc^\WYape_]b^W`mro`NC`TWVQa``_\cfc\X[_XTZ\WSYaZNRb]RI]TW@GLKGELSK=BZRJPRUQUNKUWOTYVVVVVVVVd`edgibjkkibfonnaosgbZTb\b_aXeqpvywx}xx�x�����y�y}x�~�����|�����|hYYclmrsnt~r{smky_[\n|�loxxqmwagdciqxvlestujglibiuqtoagimwz]TWYhlkg_urpjnyl_06<Q_`b^_ebMXPYkhg_mmebZJM^hTTNPWaOQkwykm�}y�qztd]bgoj_T`gaQKWZknna^WWWaab`Vbkuzu{��prx��oulhYXebf[`cJSWlcj{{p^ZRVQgnfi]TU]aTS]UUN_\[cagte`ZjgqIOdWdf]cgO\RLPZ[bkggg]NUY]V`R]XXRRUY\^VU_c[YXPQa_\RSUPSQUVjqT>Uop~wkeicqrpbU]YYUab^>@COYERKLQKD@QH@.1Ilr`gq`Wgl{he[^F`lsXik{w_ZUJVgib]I@KWlvh\UW]b^XIKXSHUYNRZcW_aajaXYMOJKUZ_e^ZVMJ`jkUbUKX^gktaVb_Vbc_^VRS]_Y`dcqmhZfZd_NH:6TMalus�zua\oic_dsm^X`eadX`RWYQLa`_]``]akdSVb[UI[jgk`_ecba]daV[\WZSX\eh]NNVPLFTR\HDGIJIJLKHRWOMVY\fxgY`WBL`VVVVVVVVQZdSSbapm\bc\aivhm`Xe]Oea^XaXf|��~{}}wtwinpoyggx_cdm^^n\\WmhrnwwnbajbZl}q�{r{����xmvtx��tqqgff`pci]Y|lodUPZ`[SXYblpaOSXaVWltWNES]YYcQZsgjclefJPWjfgiopdS?ABehmulp_goig`gnmvlV^Xagnonfv�xt|ezs~�zquwnmkm`RRMZ[\ei]b\][ae[icfphnd[__kWdXPhnYl�djim^krt}�qzzoabV]U`gUf\Y[`bQQ]JBGP]fk^l]adgnaa]Vo]lmbptqqOVP=9igaYOPMWNbWJGASLTJOUcUORWoRBOBCOUcb[^`d]n���qcbZRp�ym`okznY\ZV[dUZ[hYP]QBLLBEVIJKGEPHJLQON]vbW\ffVXe`KOhpagd\cT`GHSPRWkbRRNUfb^SYP`j]VVi\jTX[PEVbe_aUQMVVLOScjoue^[LNEHPY`_WSVdZVcSa`U]]VR[ceiaVbZOUYcVecfZ`Qag_n}pmnzgieQ\W[^dtjbk`ac\e\aY`d[_VXV\a_]jk]S\_]a_WQi]e_d_qs]dY`aX]VS\cb\YY[WJGUZ\LMJKJHEEIM\ZM@MZUUT_adYJPSVVVVVVVV\]WZb\hZeZU[bfjmknbZ]Xe[^W]]RO_mwsup_Yafdbbee]g\VaRLU`USX\_ckwq�{__bibeimnvzuu}��tj|sok�{mpn`Xjfcytq]Z|xid_aaja_\ekkc[kVVd]bomnnaSThslb`rpxraqqmu}{gtzvseK83AVeteqtus_Ucobjp�~k_]q}xz}rqxw|y|s���y{vt|��~zkmll]Oclcmmnmbfb\f^ejfkb[_ePWWNPYWcsjqk`TcPWYgqm]XeUTNRMQRZWZadTWHNacb[MffclhcesxvslcYZdlod[fhjRgzW`baagQLT`_`g^YZYISNROZPXQUaZY[aOJKZaMLWJEMde^kfaVPQZ\`\ZdaShj]]^�h[__\_]W][YYmbQIOOVfXXQFOUQVKQYcaRR\bZetebf_minrX^VX`hoj^VSavnbd\OdkT\MdRa[_aW_VUTWXhmTYZWOKT^JLYWWTXYlYc^`]W]ZMG[W_RER^X[f[k__\Y[ZXNXVkc_]VK<digzvonw~wiQ_qkeiccbom_U_pm]g`^Z_VSOVhmqrhqif^\_baXYYZahb`vf\a_lrVS`b]ZW\_]WONXOc[QOVE@WNNRXZPIHKPJHO_VRHGINNNNNNNNXb`cc\llil[[n^Of`YPekdb_^ZX\RRYZluk_a_`niga\b]kaT]]]X]ZZic`cgjgnyp_|w�kdq}xq|�~{txtmfvwufusosrbitnh`mjbabd]^RSOONY^San[`Venmlup]`ibbkh`_cb^e`dimllx~langgqrvovRhmw��nu�ssp���~e����|{|����|zqwy{o���sptw��~xtqWOF>JZaXOXIFW[i_gf^Z]WBA[YPIMNPrZSHN_\]Yfttka`RS_^KXrrap~�pkYWeejiejhbaokev�xg_`Y]b_Ubdga[W^sik~bn]\g^c`exeZRdQERQ_^JU^KQOWaZ_Rj��`LQJEF<<:PUSJEBWOMWTU\g^`xqg��[Z\^a_Y[lssyjfjlhljV^ecb[RWYbb_S]UXSGQdi`efso�v__\Xceb`b\aV_sk][SXTH`^gT_gpig{ueXMJP```VLfi`^MNMQ__dq�]WPQQNU\[[[SUMEOUS]nXUWcVcZej^aYjgbSQB9IZc^^`fnuy{``srsiirmlaYWapojdY]a]V_ZWcfuznlzpjgdhj_[kjf_idhqm{tml\^fdiHUUKSNX[dd]EJQRJDIBG@FD@ISTQGEK[SMBEMNNNNNNNNP\U[ZRTWgka_eVIURUNgfmcageW]S\gdceiiaZ_imql_dWe_PVbl]]\Wjbbimmuryww�t|�ry|l`q�zlmks]akb][��ootvwgXa`_dgge_S]g{uiircSYmoaVgthcfo\RSV\]S[]^W_`[_[dp{vt^Xko}�zqlt^wlasqj}yolev��~�w���rfl~����~m`_dUXigpwpkgtddfv�mWTWcW:B@',6+90:54D\YOEY]ZW^ZCfcUdejyelgeZaggccq��}jbbpr�|eYbhytn^pyhoh[`yuxpgabk]NXb^TMRVicRZ[]TaacgOFdirca[UYVS]YRO^ig]]SWAC`|ogSN;;PPN]]VPM_toipmgeRp``b\Ri`^ac]Y^d^ekvtz{plxub[U`dSLT\bbf``lydT^^[a]\TOdbX[_O_jdRIAZY^nf\cnsj[a^`cgahglsus^Ye^[[^]ehPL^V]]_ppt��`YNPWXXaab]USQS[XX^[WORZVVPUXU`SXWdXYU[imdcbcgjic]`asiodggd`]ehhiam^R_yrcbhchXadcflfhidhlf\_]h[acn~ttfZZRRV[^E_[OaWTYXMLIMLJRSLJSDKZTHEFD9DUcRPYZSNNNNNNNNMWIMONHJZad^WXUNISTa]j]Y_aS^NU]Ybeki__ee[gmfnZik_[[_V]^Z`\`fhetgjx��yx��tptrjr}yigZqcgjidWtvf]bicuujg`[gbbacdsoqvk|�]XZUhUYevddddl\Rac\eceeVZ[]YQV__RSRYh`aMOLW][ndSZZZlxpmfptws{svmt`TVo|y~~wzypxwkgZb^sbLZIUWko{��ke`]PQM\\EB;OPLJMFY\e`bd`mcjcfk~����`bartiovgrywgklhrpjttjgdhnr_u�p__jl{hecmddvllhv}aaZatussZbuoRembf]ZjvgqeYVZv`rng{{jirZ]JOZWQTXYYRYY]YV\`eltrlruf\\Ykjjcvppx�ykg`[[aqqmgV^a`hhS^a_Z[]VT[UQ`mUPddheceVQVcak|c^``VP`nvoXO^hlpcXYZScm^epucckhWYYXU^\�z\YeR\goroqomX^TVde]hdejbTOWhgn\P^`ojwjej`O[ZXS[^_c[koqlprpqpf[skgU[_pv`lyqcffgpus�~xwijv`ecfl_ckmmkhm_^Z^WerplplbbcVNCQCJYNL[WTYOGPTNZPUXOSa]JULEP^`UMHNPSQLONNNNNNNNMXICBHFLKX\URURLVYinmk^_tn^fSVWVZm_KW`\aaedagQ[_]dbecg\SZ[\\bco\h�uosh��xkkrpqurjf]dslu~e[hsqce^`etvsofdgqwwt|j^nn��qwra[Q[ff^aeboaWhjermgkbZU^VWU]X\`b`hb_Wcoi\eeZT\`\Ztke`fckmxtned\T\bgft}sqlwhmqkdrlsiYl`lbhnwu{|}lfkjid`fghZfkkmdplhhnufjj`sdnpv�}nhfero^ho[gdjongvrjfg^Tbqhtvb[Y_QYjf}t`Sg^ekgia`tjja^dga`WjpcWintuk`bhdxj}aVkMV_j|m^ebk[]Xl\pW]QW``YTS^^qqxukswl[;9Uhmei`SM]jouk__ac[QQHWSSjrZed\[]e^_bO_]haaaT]hhYYNKY]gvd]afc_fcjn[Woxzssu{o}}xyvylorw`NQWUf[dmWHRQemsijj[]`d`_heWc[^^_PHM_Z_VaWWk\dWXokKN]b^[[^k_UWdbgkkhgkope^ac_fpggcb][]UfuzkpknpngkU`abc`dbapl^m`OLQSP_YViYCFKOVHPDJMGQVMTMDTWILXQU\VTUWTQFNZSQUOQPEEQLDNNNNNNNNPVPNT]]`_XbdUW]SYc~ha][are\cXZSP\kic`TN\jeX\e^hikpg_Zehl`fcalppez�m_YYc~�ulotuqlfrlmvwnylnkfijzyxjqxv|{rtxsjgvojpvnq{wijm_ZgXg_^ffeegea]fZ^eZOWO[Ydbnmg`ahbge}lcndUMQXSH^ZYZ_Yeizi\aifZdpeYZeafidW`e_epfjg`h]bXUfuphfd^dkdihdmehroc`ouwpdd\akny}rtgpsr�ptihosny}fnkh_ccir]XUQPbkqm]]TI[ROkemeRHUXojkgkUerkrmpxVN_fY[RP`\d^NZU]ZKIV^ROZe^hdbQSUNLOQeffcdb]efbddlglopllgZZWOJLjugV\UJU__bU]`\TTNNFQRVgm\bVbhmwikp\e_a^a_PPlkLcW[]cecejstmfed\dg[_n}�tz~�sr~�~y{vxwrqinphud\e^OWWTD\S[_T_i[]\dbWdSWV`WLDRJPG]LZVV]D>OZPOPNSYXbekTUQ_ZZ_\TT]oh[[YXZZk`SYitpZffe__\\[f_cQXZY\VOEFcgVhS;<LT?OMUpdVYW\b^XaLPTccMF@IXWFLKJQ\\\TZJMELSHIWUKJPMOP`NNNNNNNN_\afqvz~lW^^IQcVX`wTMSSYd\hlfiYS_`im\B>Jzvfkjik`^cjm^dikhxxtvo`iwz�o^[Mu|upz��qjhvuyp}`c^fml}{k^_apxjbhghZLLVld^fpZYo`]qeMIPH\ZRWbqp[JJRnkfqppobllonsphozlb[mmqkuqfkpi`]ceeg_jj_deX_c__imvkg\bndgic^oxvtmf^VTUSZgmcchqtj^\c]firgnkgijdnlnl`plhjnlng\he__`rqnluxvicin��hd]i_PUpu`\H^kfi\_k[\gom[AUZ`grchVT\`h{jglomjP>TLUSKU;;<OXGdiZ]xeLftZ@PObcgfqe�sgadfeh_c^]V[`d\KMiY`hlv�lbgYYZXc`cga^fVOJOLQ_mkkTLRVfW^pefjmmmgR>Z\Roswkiuj�xsqwytxcbm][v}dn|�ycjsowyogc[Y]m�{wffceQMPSRIBKWW_cRUV]_]oY_hqhZERTfSXWhVPY<+'8JSSNYQY^TYJZZd^Z\\YSPZbcbhc\FZ]^fknocY]egYV[_MUgbba_k[JEHY]NZCIJJQVl]OYNSWVfgb^iYZTabTLUmlgRPD>?@<MR[QRBFWUQTCIID@PQUNNNNNNNNqhng`VcsvsdSSalnwesfpteivkud\ifkujW_YMUajencfbOOOdu`agac����nWtdfsoiT]xqepr`bkameglWQXYff��kmdejwsgd[ZWTSMVXdZfWS^M`vnVdXWZkegw{woXShjwgkv�~ilrv��{ck�wzp���yk�p}��|krpfaXc^ZWb[gWU]kgl^db\]chdghmfqqebVWQWS^TUR`ot__fbcXdoicjifdeYojmyUYcmrlnmqh{lcujsedff_S\^fktswkbobWe|riqVYadef_aWTKIWILITIYUWSYjbaPMVV\acWb^SOZMGLILLYTJYieei^\cacXTT``elUNkq`UYenujuztV\f^[UNrnUJSZabcsknqhib^bdjrTO]UWchigupiaYk\`n`kbZafhkfb[bevqif\Nskluyk{�rrxd[mjt^^]fjZlr^^_c`]TACRjgbfliwebpus^V[krnpjida\YlT^[lqoSUSe^ZbjuZdcla^YXb_a`\S_eXXZbjfWS]_VKMV_sim]c\TWYXWLHIMRENQMKNVNIPXqr[_\UL:<>IDEFRnlRRSg_RaXMYVlhR_g_nkw{tR\ZU\WFY[WTTHNVD=KNPNKENFDNNNNNNNN^TQTFGGV^fmhoyvuzmZj�vt||wY^jg_md[VWg[bbg_miWYLV]fneie^d|�{rlihYq{xx]f�wX]ZpdZ``Vj|n`cdkfjbozxhlmklzmbWT]YRLTMWMWYVQ]oh_kwkm`kbkhrsn^ckgjgdagwxpnjqpiaz�slcrjprdb`kik��tovega`^in_]YPZlliTW[bfdgikibYTUU``cWQX^QJ]UYbXfi`^]Z\]gb]Ze[YSNS\MHX]_WISXO]qm{ghj`Pad^okntuol\mjiefpotb_gYVhl\Ycm^NV[OSY_Zbgadmspl�{wd[TdedihbbbUXRUGLSdeZ_blihlba\cV^Y^NPzsUTa_squisZP_HY_ZjXLJO[a[UMbfTUTBT^]X]ZKSY_kspid`irllg^[NNXO^eZKWTF@LNYZ^DEfoekh_lwu�uwoqdvUJMaVFGdjie\bfkhXcqkiehrolgvy|hsgchfhbgpb[qheinbqlpebS]ia\fbW_UeZTlgbZbf\Xa_ZaWUTcgddTQMGIT\bijgdZ[UZ_V\X[Z[TXMFHVBNTV]]LTX``SEIG=FFNXQUYXXKE[]Y^{_Zc`egd][g_ed_Y``]MT\VPSXgWNKHJE@DLGJJJJJJJJ][WSJMGO[bmmdeiviotuuplwr|vaSccjkYTI[ccf^haggTYc_ep~uokpuszrqlnckn�gWqnblgrjbi_Zj��qzz�n]b^V]`h`TZnug_PMUWGSTOKT]^QP]_[hirgfehcZs|j]lndhjcPdgkyrletqimfhXPgd^hiXRVY\fu\axsda\drndWYj`Xhdbc]b^q`meSV_^[XT`cfWUac]TWSOMK[_]a[IDFYWTQ^^caXX^W\imfZUZf[Wf\{jV^]efbvwelsdmso^px�sa_bsyrk_dfopkb`dQBTK\YgXffkZecY^tthlkffe_kfV_\c_FMJNUXY_aZZb\R`npp[g`dS\knYVY]]hoi_bgQIN_XUZ^\]ejgilo_\``]SP\ZgQN]c_gkq{y_imfjo|pjUQSj_UYZrUT\an`SXjnniPSVLZl��edZiwlZWQRTI]mo^QW[\aYastl`Qb``_kprheW[h`UVV[^^mi]bba_brhRVi^ZX]`c^e]ZQUiWYe\WYZSMNDQTUWTL?=KRLECKVHDLIit~pn}��VX^jnXW^JHRUnvhcYcmeRRTWSTZa`SPLMUW[b]`oMS_LIXbbahXTVJCBHOCENQIDKV^WFHQUSV[XJJJJJJJJekf]WWP\^amwhkabfls{kTaypu|�uristmncf__e^jiknZRgton|ujgygWa_ecl\a[PY^Rl�]^nqpiexg\dojfe]a[L_J4EXh`[cp~bXM\f^NORbiekmcYVWS]\rhoq}~mwt]QdjgffbF_Wjsgkk��qc[fPA\whQYVahttdiZ\gb_`_l|e_ni`QUUMT]_fXhbgXPi~vieZYV_XPOS]a\aQ<<KanuteSAMV[RPNYdcddZZXd_f_T_[T`US^ICFMRfX[VY[I[]^SZ\fbX^dmwl]YmjfnqlaobQNBSU\ET]\GVJ<IXTTWHGTg^gsqc`eIEGVMXVONGESTPOcZXaS_mce_fkf```V[\Qdjscumncnlinzzl`lgvjaj��~ikjxd_f`W`[[l�uw~o[Xq^]V`SbeU]o}kilow{l`ronxjnpi`pvtgUdls�~pdXPFEX\JEOY]edfsy|�}�}|ypaR9@X^SUWPNJPQYeXXRkiint\c^_mjf^hie^aiRTXKWKIOS\b`bYWF<RaW9@@>FOQPGGM?HTkm]Gl�d@=GXXmp]YaV^VK[\\]PNHLZMMJGE>NUTfjYAPV`JTd\VXX_gnWKH?G:8ACMhgUDD@SMGDDHGNTTJJJJJJJJfoj][UPfsojk]qi_`b[rhL_g�qsv�xtoersud]^mgijjqgT^m`TaaWPbZPSMMQ`\`eTc[Cag\MPhqoexl\aSWpdWZa`lZM\iwmjabpab[caWWVP^d^dhpxtschnwypvyqpnd`PQYNQNUBE6RUFS`|ojok^YX[hpfptqp��o^fhhdmo`agikmmiYQZXdNQ[`qidZVZ`gp`hm]binpdghV^WIMT^YX]]TEHHKKOMJOda]Y^\okae^\SNXW`SILbecPVbbd]Se]YVTJMMGNVWcebXbZ[W`nZaZ[VcfXTi{zdcqaW_eX[g]WemYXWhfe`PeleTaZQVR^ah`RVT\`pnuosipnecichccWU^flxxpkqf\g{{ni`Slpd]orollg`]ZbcT_fh_c[`__ii\OjRSUh[Tid_fXUWOf\dej_]nd[ebsjabQkftdYW]CSXPcmf]W_`ZPPY`o��{��zrumbMLXUZRRWfm}qaSKHZWZTlb^UF]\beXokhiGNaYYSMSbf]VYWKU_ST@NWY[XPLPMKUQYWWR^\\fdXUbBJkj^\\ke_fXKSR[SQWQQLGVS^UBIL?LO:?FOTT]cb_RSZnYCK>BIFFQTRFCBMNKPSYXYL@JJJJJJJJ`lfW^\Uelhb_Nfhf^cdnnfg]wosv�zpXftorc^ix{nlenynkc^YhlcXbUejkcd]]deii[UY^fYRb^WPemg`SVkiXOKQQV\fhnekYV^]d_T\]XZZR_]dZf~{{nh`PURfhhli[^Y^edjZYYOKgdOHGE@JfzffagfsmdxnfppjgstbNRYSW`YQVfYOQdceJP]_blbhna\kyqus[X\__ZV_T\`\Y^XOY_UQR]X]bc]RYYaed]Qaa_dkcdk]T_OZZTg^gaVh^R[WYSMNVXS\[[XYc^b``OSgW`^bRU[b[\bjltjYYYgj]ih`us[S]n��dlftYP_QLZLLXYSUJQaboaH_L[\XS\\QWSU[ZZ[aewwk`klgb\\fe]a`dXVcdfhdaeabTHIS]MKUP_aera\^QYhwlfqZ\fUFPFHDKMNH?XPQS\adx_jgyqg`Odh{la`]^XP_XQFO]YW]dnptsfltoT_`MS[Xe\L_aeokcltqgc`l_cbO][baWNfbVGFVJYWQ\cYPbKSVNVVROTSLD=>FPOJQKRPT^YY\aZUQhUWq]OM]aqsk[Ta[_^VHMNUZ_ZZMBDHLJTLLBBPSXdbaVTT\_FJCGGAAUUWLL\F88EFHGRMHJJJJJJJJ[hcVkrgid`bketu{qjwrnn`jwoo�ypkidspf]Kaiuhd]fxsjWZ\hfaZaYrv|ngLDwxlf[aigab[[MGGX[dW_^Xj_OFRQZ\dj]Xh`_YTQTQ^`b]YXWPO:GcRLKTUTTQODALehc]bYam^QZV^ivdPNJYZfz~xj��q\>XWYZRcylzs_WSQWNROYgcbf^a[YYg`aoektssqfrd]T^]Z\^RSPS`if\YQSSNT\WZeg[TJ[\foth\WRIN^Va|tpbjWKLjmYYXPh[OUTWNKXVKDOI[VU\WOK_OKRNdcXWV_XYQSLbk^[dRZghf_QedXURIbmh�gQE8DDP]MSXLG\QS_\iaPcGS[UWg\R@CASgRY[`cpdRkyn_ULE_cnnihfmcTURmllkde^^^OCMJIIYtZTKPajhmhbTZ^YGV]Q\X]BJLMKTejjjkRR`n\hp`[bdTZZY_[QY^cV[fbc^][_[YV_fF812=INSSQB\\IW_U[gr~`Rng[]PMTbRi_RPHI@FDPNN\XJJHDDJRMJXLJFFIC;;MJF=TVP?JWPTVZS_^dal^aQMQZY^e]_RU`_KOKU`RKHFJKHRZXXZJO^IQc]YV_`\MBGHJHNXYahOD^IBCVTQELFGJJJJJJJJ]^\Vmnei^XZaitpqymlkgZWpbak�yfbb_sojeShfqokl{�tiZWV]UVUZqsml][OJNaYda_aPQOIQWYY]kofi_PYYdhsxl\`gbY]_\YYU]ZXTf^QVPDI@XrYVajfhbc[PZZd]N<<7AL\UTSQFURP_k�tV\aj�fORWUKMXX^oajcXSPWdj_ZsV?AXUihi]]bjamjZSTROb\_Y[TWeUSPUPOKAM[\YUPMGSS[^]bQXNFFYcp\LO]jRAO]hZx]MYZ@;HYZbXOZS>=CYUMDDWg]_YZW^p`]ZU`ZL_D?DKALXJXdmw_OLEJNJPSUdMMVjgixsmTHEPTMeoq_WUft\U[b[JMRLX]SN5@EPPE\[XCX^FOZ_``TBEPl�nocYSNUPe_fuwnZ]]bZ_Ydietn\@QGXQGOVAU[XXamou^WFQ]SY_`ON\o~LThnaet\JW`gb_ht{wkqTUZZv~XUjfeaM6*OXY_OVbVdricYTjgUSTVihSMVOidF>0?P?ADCHW_]^l^WVH@D<CFD><:;@IRR8JK_kZTU\OTQFRXU[QIJdUUJKP9=DISYSOMPTWNMNRV\kfg\RA:KIDKF\s{eHEF@LTX`fWeuZ?@@QBI<;4BCQJJJJJJJJi\Z\jYTgWalltubTpx_jmXhnbdkhd_jV^nYcb`h[djck��i__UPXQSSSKBJSNRZWLT]YR\JMJOT^\QWgf\eWR[Q_brt�o[^]^VQ_Y]bcpdj^TMV\^Tecu�gotsgaachcmokdgPFPUMliV[VJHPX_]k^TJHL_nk_\eef]]\UcPPMV_\]buucf,;KOCZadhRE)=fm\[\]khfj`a[[bdeTYTSSQQUOORRWY`gqmfnZ]OIFMG\PRO\d\S]kj`c`ZbVVPZd_[ZM[QXPDMP^YJMY[sgb[dp\af\OGHZOGUU]WUHKWNUWKDVNFJMVS`aJKMPEOb^VI<A?7J^dVLUU_LKX]WTsfffUPH:I_[UXhcf_jfdTPSOLOOADF_glU[[ba\_WGQlnokjbiojk][`\_YT]KPVQQR_Leb^qXVAYUTP\]eoxpxtVXROMeVah]X[TUZksZfnWjtdx{�pxZP\P:2.&,/5Sc[l_L^ZSPVQ^ecggde^X\_[UVFfiGXcYY^ZUPTabTRRFHLMN?6DKHGFBALUM>F?CWRKGDXTROZ\PFLGHZKXSQZEIPROR]U\]ZUR\XGBK[NWCKaLBQXRBQWSON?UOWPN\hragns^STUVGPLM;@JJJJJJJJT]]`h[dl]itx}rnagotnt[_b_[PbpYYM`__`[lkfchabf{d]_XTlaVDMLBTONNSTO[YRabMU]SP^]VGRlbdM?CUW_bhf]dgPFIR]Y[lplpfXYKPZmds|vnegei]_`f|xhhaax_WSU]ZVjlmd\\bbQWWW\\bW\kcfhcfSM\XWQUdV^tglqa^K6IL\TJ-F`UP37[ZZNNcogdpojf_vi`b}toqdY[QRaU\a``d^WTQQX^QRa[RY\c`]goqt_MFRRPAG]LNT`VNJSHHKMLaWDEMJY]XSmPVZOXJO^GBRQ]Od[FXZ^ZKDVWV?QTZagi]SWNHRFOSMEPGKK^KGEKLNY`S]Gnal|kdjYNZVWb`annlxTsnidGTRJRUPSgUwpi_LRRORlsiywpownbaYT][CLdgTYPWFROOS]RSSNJ]WEA@PML`jgbX[YROUYbeRO[k_S[aci_VapkfnnlgdfX]V[\GPXeZheZrdei^gu^e]hipr]Y[kkqliPPMGblVZUY_Ud\YPX]=EGGQ?<HGCCDIKM\G@E9836A;EONUNU[XOFEJR`]bOVYPDVPK`S_^SEQXVPK<SLNLPWPOa_SD[ZRTJFORNI`girwakhddUQEFOYeNN[[[[[[[[fZ^e`]sqadhhjaihr~rR[fhS_YTclSSR[`_\Vgnq|viaZwnc`ZYej^QQLYZU`TLLKYcZdnjl�~rg^aUV[Zijdde\LO\dWUUTSNQfecimlnia[WV`jix}vojtjbgci\jkbla]kede`Z^aktp_YQXd\TI\``c]asq~mguqfb_l\ZcabwgrsifI&-(*-(#,6/2;[dfMVts^^rndYZup^N[TZdZXejT_Zg`kYmcTd^TX`_be^`mh^]mwh^hYWIUINRaXUZWabXBLNIDI<EWRNMEQVTR_YBREGGOXOLRP^JKbJ]sjbVPZVWLSgp^mudYU\QNWVWNAUVQ[cjamUSRdhd_Q]ewump~wkbIV`ior^RX9NHKUP\ZRO^`Q\[shUUGJJX^gsqpvpvlgszmr_BKRP@C?GBNIFFTT]cQAGHJLDL43BHR\^^[`XLK_odbvbYaZxvXon[^cUMjgdcdW^]]je\^cheqbcS\jeuuea]dca]\chmgtoaelscRQ[lZWh_lQVW_cXZ[ZNLLMMNTXX[OULEM>B@FD<UQCBCDABSPNGLVJDHRQHKXW^i]WacdaXRHLbZagY\XSGZNTZbWIJK[QGPR]^]eoegegneSISOS`]^[[[[[[[[kY[\TWj^^^][\VemzhZXlwl]WW[\_QRXTXSSYgjlolkgYrlYY_gbmbeiMcUHOFKNLJTILNY^_^\]^d_fg`bieeb[[QWeecLIY\Y_X\a`_`hmiunsd\\dpqqzs}kg]\WZjb]`biupjwsgywgj\WXZb_}xphegwowziu��uiqoifbYl`lgWVD" $,(7Lc\�z|~sqx}hdU]^^beY\mZ`lUV[ojg]dkmVS`ocKYLLOUTNP]]VOT]XU^W^PeUSLM`a`PN[fZOVRGRKDP>?C;EO[gaedaPE=\XnwdIVS\]`ezzmX\a_XacZV^\eZYN]N?DJ\XEXe_Z[mdwbdfVcaZVWfvvtngeu|dAQc][PRRRXKO\ca]YSZ\PMGKTFXXZYe_hft|�gsoNZ^WQ`VLLDQ]]QTcJS]YYMJIMLXVPHAPO>IVb\]b^e\OP_i_f�m]fPtxlrj`f\Jpf]]aY\ZcWP]]IR\egi^]ebu{kii^MA@GPRIUSB]zqghSG\ZFFIaTM?LPKOOOKXJS_WSTPYZQSOYJJFEJF]MLKIHLVo`XFBDI>KHHIa]X]_h[`YQYULMH\Y]aPOYSBJMJR[ggRJ[YMV]iri[gmcafrsh`jVQV\Y[[[[[[[[i]SKPSYQUVWSTMX[ZMQZ`cXQKUaPPRSVINJQ[\[]]ZVTL`bYYbj_f]it`f_VVZZHJKXPXQZYST^joj^cYbkvbWUYZV\cis\b_`\[]]ebdU[[]tnkmrv~~i^qpoif[_ZW^dcdh`cqtbjm`nhjk]^]Y_OYoohebkZX]TbpmaZdafpoei\[M.,6'"*2&*<GT}��{bkhfz�~plSaq��{skyqtyjenyv[g`cHR[oeQZTRQRUYQPeJSQKOSTYQVXqlgUQVZ_\UXaY`Y_]a`SPUQPCIMVa[Oc`kTEUdjeVObXO[jcbcbZXXkjlvnltkd\_Q]YM=I`dSV]Z\WWINTU_izcdZbagxvzzqqzxox�wkajgovtsgd_lfkb^gc[Za_ljhlng_`g`f\wp][_cnnQD@MebZdb^NWbXXPNPLR[^ed\cVT]YbabW\`]`a\[ZT`VIXUZeqx|jhqhh]chgbZXU_gU\^fjjkiinlsfcai[]ia[UG?FPJKJAFoka]d_G=KMJ=CEIFOH[PEPQOQYNR^RSbSTLIGOTMWV[HRVKFLQQCLNFO_AE?MHPQNG:GEW[CFYX\VQ^d_QBaXQPNHO^ktUVNQDIJO\P]gyjc^_dddhXVWUT[[[[[[[[pZSR[Z_^cge]\VZRZMQ[Z_^WR]jSNNGGFOPWXJNV\TEGMWVUUVTPRLOXfil[NXVPJV]O[U`dcab]aa[]SVUf^YZ^`\^\`hYgnWQZkVXYVXsfX_dfrjat�ufrreQc[TWZadolueabl\ftmga��kffW^[abmgZUgmtv[PPJDK\TXlohXVUZ=>SN?HQORU\ZhWxji[ke^oyvpt]^aqoo}otrlu��njoNOHXMZauq^YDKRTRVMNsSUUY[SUeWQcihe_onjaghkmac[`cde[`]MD@T]\\_OIRaF[\[gok]]Y\kbcTDSkVJGRT]^okRLZe\\YFADLVXPPPTPQRPYR\szYbW^RWclunfmpjxmy�{eegebelb\Terq`Z^XZ`guundaXd^]]^eezqgYXWqyjii`ntnmjrghm`cb^_bfealsljbSV^\INWX^bjeWZc_[^[^l]bf^tvgrtae^TTaofX^u^LQdigobbg`nh^ek\PaV]`S@@KKEC?CYMQMU]PBURL?JEBPMNaOMa]YNMAENDKHGML8=CMTUVTMUZK?>A11AN?KdGGHcaaNF:F]WKB;-@NFRRPVVN<SMU_XUWljmKSOI@OTLYVVZqjdYSYR^\XYWLV[[[[[[[[rR_mi`d`hlg]^\_SUOTXU]oaGOa\^ZV^UZWZZKRR[UGScaVPPONSIEEMcf]D=KM_^`cekUbyifnpumfjc]P`g`\UVVbfolfueRIN]HLR[h�|mlsqXaey�fWa|p[`bU\MWh�qpbdd^`fkqmcyjZbjXabTWc^NTk{�fNRPHN]XPbkfL^ib`cmjZecJMU^Vhlh]hNQWXahl[]^gfnhW`k}e]loZ\`PDT[PQ_os{jLZSVXSYUUy]ZZfmcduj]qfdaZkosccflk]R`TK^tnsjXPOdidad^Xa\Tai^bincUOSiX_TJRdW^PUIQIYjiYZUHMWJHEFR\QV]ZIHHF9=Lpoespg^f_b`PPcdYn^`qoheffWU__YS\lVQ`[^geUood^UMn\USjvmmcnqxeahct{���xjcl�wujZYdptnkllkjnmj\STPZ_chd]^]Y`freph_ddgovti]\a`UO_is[[_^pYIF^WgeXdcahjbTf[bhTFBDLHGC?ZUbYJFNCV_RPDDJQANSDZiohZWRMC<MOPO[KPKJSWZUYgfOUWbRSOXLCUEB@POTWTOb^D5>+9:LIS[SSbaZOLQfa\SqpmRNgklmh\fdXR[YXTQ[Ob``[XRb[[[[[[[[dXnrh^[STVTNSPULJcqcQY|aUZe`_SRY^c__d\aN\VP\dce^XW]cVX^ckbTV\ib^^Ybzzahv`i���u^[ibNMOGW[L]qibOU`QUPVYSWbzvtkmyyhimeo|qjkmoc]cmlQ[h{migkqfbTPipaUOUZ^LRXGHQSJZ_`PE@IZVFEKSUhwq\ouaqollbmdRdSWkkog\P[Pc~kgXUWY`lehXZp_afJDK[ba`mkYRebp�wa^U]ZWZ^u]eltzrnvnfpikn^\_hJACO[]NaQGUrupbY]_onhhW]beov^Y`_cjcSMMPKPMQQIOIW[8ANL\bOMVKUccZLGKLBSXPL_lnOTYhjtwtleikafig_TWYYYfds{�pijke[dmp\WcW`^SUqrV``h�ioccpuwfm{�y\c[w{nny�k^USYnjoj^bV[bkoqrkhij�sc_d__agfecSRZ\jjvsabqrj^ix]W^ff\a[cbKSNaa[SVVldfl]`gc^Ve_ZaPKHEPJJM9g�{AFN/&15/B9;HUQ\WK\Z]^PGJMF?C^TBPNPRPITWRMTK?OU]ROJRRJD?@NWQPSPNb^C.,2POT_[^dhmgeQMHSTTG`e\cXQaqh_\fcc[WXWZVYSbig_ajp[[[[[[[[Wkv]\[RPTVWX]VZS_W]ml]|cPY_UUOV[UekijflOQS\f^]lc`UZ`\engf_ezgkvdga]e`eqcWg|vyrkge^P\skoajw~hh_jaW]Xpk`Q\^oroajnfdrmourttikchg�tfrecdnytw�}rnypvz]_U[JFYaaa]JXU\QTYaklie[^hnq_Vga`qdZaYeZW^Vb]kkigh[f_f}|}qbYWjq]UfX^ge__K^hwgdlohf]^x�uuYNdaLRbunqpoj`dwhhb]]i]RUcLS]^\Z_\XbNO`akdkr�~pj`w\J_]dXRTZdeXQJLBIKERGKSL^STh^c}^ShUX_l�bQONOjePQUZSBU]\enf^njiXgmUNchhZcft^]`vm_Y\iiqhel`SG[RSQsUkpslnoeencih`bk�iuUi|{us�x|swef^mo]Ye__fa[[UZbb~rpjia^ckfdj`Ybh]XcghhjZ`a^lcb^bg^`U^`GZ]YhaZHXrYZYNjk^US`f\gSQOMWHHUC]`Q'-)"<LM\VbSJBGKJUXK?::ADSRL__PYZX^cpZODXKUXTVWRKQIRKTSPR[]Z]]WYZeZe`ghjswbSQTWXUMG/@RKiRWXhc`UUTTXZiovle]aog]atg[[[[[[[[OcTSNR\U_YZWTakdnlma^QilaRchYL[YTpyal�{UW^rqbbipdc]f�~{bZr~bhofabgem`\W`P]cackcYVQp��|upnooinp]_nvvmXThidogY_WlwuyspuorfTfe^lpufgsvk|vo��|obitg]c^]UO[a[STWT[gkkold`k_VT^USZQTkWiikUYOPV\a[uvc[ZWSdTJT^jul_\hjeZkoqpkjha]lfc_ed^~pd[_fc\Znsd^V\`\ltlcgobkgVS^R\ZbBJ[^lf[VQ`HOX[]h{���upjqMR\_cfWS_QT\YUY]omw�nld\`b]Zcyn`bf]a[^bROQU^bVUGOWSOSfetvkda]^erhZWjymfbvzc]b`]ZRRGLIQ[XOEGRXVP\|^ljtXKSSKSafp`gsz_no^v|rapvwtw�l^k_c\mq}d`bdFJW^gkjjVRQe^VelgZgrdV\cffeb[jomcXjpd]^\YXL\u}sine^kg\QYYTOG\IXZ]Xi^UX^RSZ:+-A<34$ 00>NDBCGMBLCU^KCMTIPFILLKEPEYd_b]POJEPPb[POWUce]gij_XYPW`]badijh^TG\kGBEMYMNJZPNSVLPNKKWZVXV\MD`nwngmglW]\dYddddddddPZKYZOOQ^[_a]boukg^WUQ^Z`Zab^QUWbux^`sspqznZUY_`dgowxvow{emphic][^TTUf]`[ZTSLN[Xb^`iirlmsmddcY`mjnk[ef^fb^mk�sdaa``exqagaT]fig^adbtl^knmi`\[MM_hc`edZMJPUPPUSUX]aah[VZbab^MRm_hc]NY_hgkdbloebYWVh_WQE[kqlhclslz|zuoqpe_hgquz|u�zphkdWYT]`]]_^hblvphinb[X\ku^dst[[b_cWg[Ulknjaelvvtqltan[ahgglf[\XUZPOUaoov~utl_ZUPR^tncfgZ\SSRLNNQduouiee^USXZhyxf]beXdc\cu}ymgrr_Y]]laZVOWVS\\XSSURMWYpZe`_A8964G[bmT`py\a`WUW\f{slnhjXejjj\gr�pmhjKDLOW\`e[VXe[Xorkgbcdjtrg`^XO\ce^_baZW`kpwecjmnnvsmrqh]cYP[[gTc\PR`\_fgZ_zN+"=IE@.#!()2>PD:CKHRJIULLV\NMYQQ[`QXXU[V_bVVS^TI_e^Y^U\e`bdi[YWLNQMRS[ahrrfMV[CBHLRPUYnfa]S@?KMDKVRRZVWXadjh`fh^TI\][dddddddd\eRfp^TZ]YX\\W^n_^UUVbib_b`]f\TW]il[W_mnhgk^KGJS\\ckqhlgn_ajdnh_b^U_]WTUNQQKTRPIPVFKYotajkmnj]ZfgjymrwZZ`^mtszhnrefhhbhzvmhcSSddla^`apfUNWX\^\\XX_dSSeb][VZ[SSWVNFHZgo\SRZcdZNTegli\QS\dEer|dUYc_bfohq~}mhhhqcpyorpy��udpjiftxz�~~|vkrdV[bortflYqghqk`\^eTR]lr_u�{rquz~qwh\fg]\\MOTPRV^nVc`_]VNWdXT\RRLSVvwa[dbN49KYbffl~kZM?P^jhfhc\jpcli]ab]`WPVnu]S`fJNOFR]X_^SQX]bebf]f\Vcqne^SMPTX^nel]^[XGFLQXhi]`V^ciQUTU\VZgzk`biW<ULZgcalmdef~m[^Z[Z\a_HN^`cjWJYZ_]XbsycaYMX]a]T\i_A;ShhXSSXbioxtovvrkbdWTAZ`aYVFITai�ubL-'$$(J9.&492/0/7[aWM?HRPU\eZSXOLWeVW^ebWbj`a_im`\^ig[SQ^^`izuZ[Y^[RPNQTYio`<9926DGES\aysofWFEGPGOfgjyams_V_omeshbRga[dddddddd`qYeui]`c\V_lhhxttm^X^bX_da_he[YZbgc_]\Y\akdVSXd`TV_d_eZ`WXcek[RV[Tgab\VTZ]X`bfXV^KN^y|dp��wwrlxlu�zy{c`hcjnkmWXadkfd_abprihXQgc^ZZYVZRDIUOLTVZ\\U^QPZOV^WUQHKSTHJQ\]b[_[drndilgov~tpjqubie}znreLVim]g��webk�jnhiifkptyryxpfjfewxoytame`���qna�v�}u~viabhUU[`bY��yzuv��~qg^bi_r�jb`VXTRYLTZVUSKX\Z[cUOWeh|wzsgrq]Y\]bjjm}wql_ktxtpnnekcNaiY]edjXYYfiYUcme]ZHWYJ[mokf`[cleYhN@GXMKOU_g^TUZVddZND<GQX^g_S__`WXKTQSMILVpsrpfV?]MP^do|rjaSjYwxolca`ddUNOUZIOT^kofm{y``ZLTVYTUJTa\blf]OLMPWY]ro[abcw]eegrbjfoih]a`ZPZ_bgXTKHKXZ[JFHACIEFjb[ZXSWRTXig_GJFF[WVZe_LQXTXU`]WUNKMMIFOLRZjrZ_jsnb]ZkjesxfB;@DDTVG\cf}qnh]TXSYNQ^ZX`gs{THTqveyyfacbQdddddddd]tZarj_b`ZQYllitoor[\`i]_acddhbX`fgebb[TT^jdWQUegUT\b`g[a\ZbeiXTT_Te[daSUW]ZY^iPU`QS^v{hk��ptvkwjw��}widjeghehYZakpdc`f_kuhk[Md^^ab_[WRHFYRJRWXVWN]VTYIT]XYULNUUJPUXU^[`^bsmall_jp{srkuyhjgyvcukQSkpbg�secl�jqiihgkkpzw|}sgf`\txiztZgee���wr_�t�~q}tf_cgSV\cfX~�v{qn}�{qe]_j[m�e\[T]WTWOOXSWXS`X]bbWI]hs�vvapqWVZ[blig}zrm_jvxukekckbR_o\_hflW]dfaZYdrl^[GYYI`eojf^WdodRdJBP^KOQQYbZU^USbm]PCBHRX]f^Vk^bVUMZQMNKMTlsrh`VE`\OZhh{qpfShZwxpmdd_jcTLU^^EKP^jmfo|l__ZLRSTPPK[aT[jebOKOQSU\xw\a]aw]f_codfdwji_ba_V[]]d\[MBAQ\cNBG=ADAFo`VTTYXPOQhlcCKICY[X]fcPNUV]YeVTUSOMKJNUP]]au[\jrkcabondqtaB;?FBSYBZ]n�plf[WZQRNU[]b`enyRJSmrh|{_a]bPdddddddd_s\fvj_fc`W\kkittpsX]`eW_^dh_fdU]eec\`\YWbmfZRUheUV_c_g^ZYZaehXWR`VdZddNSU\\Y`oRV[IO`z~jo��rxvhznx��}sh]efkqmm]Y`fj`cZeajwhiZMd_]`]Y[TSHBYTKT\^ZYS_QNYOWYUVRILTTOOORXh^Wa_urbhjfqu}vripvhgh{�jscUNgk_g��vh`h�itlhdfpsvzn}{nijf_vzjyv\hfa���qn_�s��q�uf`gfSX[biY��u}sr��ne_cm_p�i__TZPQXVQXRVSPXX^]]VE^`f�xuoizm[XZX`mml{zrpbjuvwoipbh`UZkZ]geiYSdi`ZX`oi\[F[YL`psfec]eedTgK@MXIMQQ[e[S^\SZdWOCDGRY`m`O`\bYVKXROPGKXpuvncV?Q[LYkp�ql`Qg`twmiae]kaVKNU`HJSdom^i�ubb[MSUXUQMZ_Vbph^IGNRSVarrZa]dxbcbipgjcsgg_b^]SRa`g^YICJX]_KDQ@?H>Ak`Z[ZVXPNRjlaGIJFZYUa`bQNSU\UcWXONSRGDKQJ]__z\Zjpf_]`jmdru`B:=D?R_G^am�nni[UVSRNSUY`[jnyTNXmsg�xdabdUdddddddd^pZftg^c]XR[jkmxspr[[ceZ_`bh_faV[egh_a\[T_icYRThbUU]d^d]^]^eghUQP\Vg_dgMT[[Z[]iQXaQS]u{ip��rwuf{lr��|zkagffjfgZV`fkdi\gajwih[Pec^a\W]SSIGXRINTYWUU_TQZOO]YXQHMWWGPVXVb]^_]tpahihkqzxskqxffevxjphQMnp`d�tg_p�mpagehnhl{z~ymkjg`uxkvu_ih_���tnb�r��o�ud]ehSYZ^eW��y}srz�{pg`ccYm�j_aT\QT\QOXUXRU[[bZcYJacnu|tcqr[Y\\bkhh}ymm_gwznliq`i_Ran__jfdYVblfZYdnh^\GZVNZgqig]VgkaUiM?MWPJQQZf]T_YS_j^SC=HRUZjaSg`aWWHTRSRFIVkprkbWCW^P[fi}rncVc_w|qjah[jcXNRT`IKQ^jrfmk`_YKQTVROMZ_W^icbNLRTSS\wt^d\cr`fciidibtmh^a]]WY]Y]WYMDEQ[cK<J<DL@GmbWXQTYROUlk`HEHFXWR^bbRQVUZVjRXXSIKOLLSHW\_w]Ynuia]_lodtwaC9BGBP^CW\imok[XWRWPRZ^aajmvQGVjtb�sgac_Sdddddddd`s]dpjbagYQ\kjirpqr_Ua`]_bafbh_X]ccg__XXV`id\SRegWR[gae\^YX`diYVV\Sd]_gPP]XW`_hWU]LPa|~fn��qtwizos��w|f`fggmkl^X`hk_e\i]kxkg[Pa_\b_Y^QQGHTQNRV\[YUYRQWQSYWYTJLTRJMRWXc\[`bwl`njaosywpjpydjh{wnljTMlk_i��vi]n�hqjlcdnpt{o~yonfc^uxovu^fi`}��xmb�t��m�ve^fiOV[djV~�y{qsz�~qf\dear�i\_T_SSVPQYVWLTYXbVgUF\dj�yyqftpYW[[ajjl{npcgvxrqjn_ndR_i\^khc\]\feUWgkg^\FXVS[nrfe^YigeUbIASUMJRRZf\R]YUaeXOECEUY\j_Qfc]T[MUPPNGNWlvwj`VCXUM]gj~tmaXb_qyojbl[jcRJWU[FMT_jrcg�s``[NUVVQPMX_X^jj`MJNQTV^vp\bZdqcg`hkhh`wkc\d_\TW^`g\XKCEU[`LAQ>BG=Hl`V]WU\QKSki`HEJFX\W]d`NQWT[ZcS\QOQQFIEUOX`ds\Vksgb_bnn_ouaE:@DCO^DYbm�nohXWXLYON^^Z_go{UGXjtg�udc_`Sddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbdddddddddddddddddddddddddddddddd�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|����������������������������|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~�����~~������������������������~~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{{{{{{{|||}~~}}}~~~~~~~~~}}}}~~~~~~~~~~~z{~��}{||||||||��~}}}}||||||||||||||||||}~~~~~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyyyyyyyz{{|}}}}}}}}}}||}}}|{zzzzz{{{{zzzzzzzzy{~��~|z{{{{{{{{~}{zyyyzzzzzzzz{{{{{{{{xyz{|||{||||}}}}~~~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xxxxxxxxxxyyzz{{|||||{{{yz{{{yxwwwwxxxxxwwwwwwwwy{}~{y{{{{{{{{}|{yxxwwyyyyyyyyzzzzzzzzvwyz{{zzzzzzzzzzzzzz{{{{||||||||||||||||}|||~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xxxxxxxxxxxxyyzzzzyyyyxxwxyzyxvtuuuuuuuuuuuuuuuuyz|}}{zyzzzzzzzz|{zxwwwwwwwwwwwwxxxxxxxxuvxz{{zyyyyyyyyyxxyyyyzzxxxxxxxxxxxxxxxxxxvwx{����~}}~~~}}}||~~�������~~}}}~~~~~~~~~~�����������������������������������������������������������������������������������������������������������������������������xxxxxxxxxxxxxyyywwwwvvvvvwxyxwusttttttttttttttttyyzzzyyxxxxxxxxx{{yxwwwwvvvvvvvvwwwwwwwwtvxz{{zyyyyyyxxxxyyyyzzzvvvvvvvvuuuuuuuuvussuy|�|zzz{|yyyyxxxwyyzz{||}~}}||{zzxxz{{{zzyyyyyyyyz{{|}}~~����������������������������������������������������������������������������������������������������������������������wwwwwwwwvvvvvvvvvvvvvvvvwwwwwwwwuuuuuuuuuuuuuuuuvvvvvvvvuuuvvvvvxwtrqrssttttttttuuuuuuuutttuuuvvvvvvvvvvttuuvvwwttttttttttttttttttttttttvvvvwwwwuuuuuuuuxwwwvuuuqrstuvvvuuuuuuuuuuvvvvuuwwwwwwwwyyyyyyyyxxxxxxxxwwwwwwwwyyyyyyyywwwwwwwwxxxxxxxxyyyyyyyyxyyz{|}}}}~�����������������������������������������������������vvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvttttttttuuuuuuuuuuuuuuuuttuuuuvvvusqppqrrrrrrrrrrrrrrrrrssttttuutttttttttttuuvvvttttttttttttttttttttttttuuvvvvwwuuuuuuuuwwvvuuttqqstuuuuuuuuuuuuuuvvvvuuwwwwwwwwvvvvvvvvwwwwwwwwvvvvvvvvwwwwwwwwvvvvvvvvwwwwwwwwvvvvvvvvvvwwxyzzzz{|}~������������������������������������������������vvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuttttttttsssssssssssssssssssttttttsqpoopppppppppppppppppprrsssssssssssssssssttuuutttttttttttttttttttttttttuuuuvvvuuuuuuuuvvvuuttspqrsttttuuuuuuuutuuvvuutvvvvvvvvttttttttvvvvvvvvuuuuuuuuttttttttuuuuuuuuvvvvvvvvttttttttssttuvvwvvvwwxxxwwxyyyxxxxxxyyyyyyyyyyyy{{||||}}||||||||||||||||uuuuuuuuttttttttttttttttssssssssttttttttrrrrrrrrqqqqqqqqrrrrsssstsrqppqqpppppppppppppppprrrrrrrrqqqqqqqqrrsstttuttttttttttttttttttttttttttttuuuuuuuuuuuuuuuttssspqrsttssuuuuuuuutuuvvuutuuuuuuuuttttttttuuuuuuuuttttttttttttttttttttttttuuuuuuuussssssssrssttuuvttttttttrrstttssssssttttttttttttuuvvvvwwvvvvvvvvvvvvvvvvssssssssqqqqqqqqqqqqqqqqrrrrrrrrssssssssqqqqqqqqppppppppqqqrrrrruutssrssrrrrrrrrrrrrrrrrrrrqqqqqqqqqqqqqrrrsstttuuuuuuuuttttttttttttttttsttttuuuuuuuuuuuuuuttssrpqrsssssttttttttttuuuuttttttttttuuuuuuuuttttttttttttttttuuuuuuuuttttttttttttttttuuuuuuuuttuuuvvvuuuuttttrrsttttsssstttttttttttttsttttuuuuuuuuuuuttttttttrrrrrrrrpppppppppppppppprrrrrrrrrrrrrrrrqqqqqqqqqqqqqqqqqqqqqrrruuttttssssssssssssssssssrrrqqqqqqqqqqqqqrrrstttuuuuuuuuuttttttttttttttttttttuuuuuuuuuuuuuuuttsssqrstttssttttttttttuuuuttttttttttvvvvvvvvttttttttttttttttvvvvvvvvttttttttttttttttvvvvvvvvvvvvwwwwvvvvuuuustuvvvuuuuuuuvvvvvvvvvvvtttuuuuvvvvvvvvvttttttttrrrrrrrrqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrqqqqqqqqqqqqqqqqqqqqrrrrssssssrrssssssssrrrrrrrrrrrrrqqqrrrrrrrrrssstuuuuuuuuuuutttttttttttttttttttuuuuvuuuuuuuuvvuuttssrsttuttsttttttttsttuuttsttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvuvvvvvwwvvvvvvuustuuvvuutuuuuvvvvvvvvvvvuuuvvvvvuuuuuuuuuuuuuuuuqqqqqqqqssssssssssssssssrrrrrrrrrrrrrrrrqqqqqqqqrrrrrrrrqqqqrrrrqqrrqqqprrrrrrrrqqqqqqqqssrrrqqqsssssssssssttuuuuuuuuuuuttttttttttttttttuuuuuvvvuuuuuuuuvvvuuttssstuuuttttttttttsttuuttsuuuuuuuussssssssuuuuuuuuuuuuuuuuttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvuuursttuttsssttttuuttttttttuvvvvwwwttttttttuuuuuuuuppqqqqrrtssrrqqpqqqqqqqqqqqqqqqqqqqqqrrrssrrrrqqppppqqqrqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssqrrstvvwttuuuuvvuuuuuuuuvvuuuuttuuuuuuuuuuuuuuuutttuuuuuuuuuuuuuttttttttttttttttttttsssssssssssssssssssstuwxxwutttttttttttuuuuvvuuuuuuuuttttttttvwwwxxxxxwwwwwvvwwvvvvvvtuuuuvvvvvvwwxxyxxxxxxxxwwwvvvuuvvvvvvvvuuuuuuuupppqqqqrrrqqpoooppppppppqqqqqqqqqqqqqrrrrrrrrqqqpppqqqqrqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssrrsstuuvtttuuuuuuuuuuuuuvvuuuuttttttttttuuuuuuuuttttuuuuuuuuuuuuttttttttttttttttttttsssssssssssssssssssstuvwwvutttttttttttuuuuvvuuuuuuuuttttttttuuvvvwwwvvvvuuuuvvvvuuuuuuvvvvwwvvvvvvwwwwwwwwwwwwwwvvvvuuuuuuuuuuuuuuuupppqqqqqpppoonnnnnnnnnnnqqqqqqqqqqqqrrrrrrrrqqqqpqqqqqqrqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssssssttttttttuuuuuuuuuuuuuuuuutttrrrrrrrrttttttttttttuuuuuuuuuuuuttttttttttttttttttttssssssssssssssssssssstuvvutssssssssstttuuuuuuuuuuuuuttttttttttttttuuttttsssstttttuuuvvvvvwwwuuuuutttvvvvvvvvuuuvvvvvttttttttuuuuuuuuppppqqqqqppppoooooooooooppppppppppqqrrrsrrrrqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrsssssssstttssssssssttttuuuuuuuuuuuuuttttqqqqqqqqsssssssssttttuuuuuuuuuuuttttttttttttttttttttssssssssssssssssssssrsttttsrrrrrrrrrttttuuuuuuuuuuuuttttttttrrrrrsssrrrrqqqqrsssttttuuuuuvvvvuuttsrrttttttttssssttttttttttttuuuuuuuuoppppqqqrrrrqqqqqqqqqqqqpppppppppppqrrssssrrrrqqrrrrqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssttsssssrssssstttuuuuuuuuuuutttttqqqqqqqqrrrrrrrrsssttttuuuuuuuuuttttttttttttttttttttssssssssssssssssssssrssssssrqqqqqqqqtttttuuuuuuuuuuuttttttttrrrrrqqqqqqpppppqqrssttussttttuuvvutssrrrrrrrrrrrrrrrrrrssssssssuuuuuuuuoooppppqrrrrrrrrqqqqqqqqppppppppoppqrrssttssssrrrrrrrqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssssssssssrrrsssstttttttttuuttttssqqqqqqqqrrrrrrrrssssttttuuuuuuuuttttttttttttttttttttssssssssssssssssssssssssssssqqqqqqqqssttttuuuuuuuuuuttttttttrrrrrqqqqqqppppppqqrstuutttuuuuuwvvuttssttttttttssssrrrrttttttttuuuuuuuuooopppppqqqqqqqqppppppppppppppppoppqrsstuuttttssssrrrrqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssrrrsssssrrrrsssstttttttttttttsssssssssssrrrrrrrrssssttttuuuuuuuuttttttttttttttttttttssssssssssssssssssssssssssssqqqqqqqqssstttttuuuuuuuuttttttttssssrrrrrqqqqppppqqstuvvwwwxxxxxwwwvvvuuxxxxxxxxxxwvuuttttttttttuuuuuuuuooooppppooooooppooooooooppppppppoopqrsstuuuuutttsssrrrqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssrrrrssssrrrrrssstttttttttttttsssssssssssssssssssssssstttuuuuuuuuttttttttttttttttttttssssssssssssssssssssttssssttrrrrrrrrssstttttuuuuuuuutttttttttttsssrrrrrqqqqqpqrstuvwzzzz{{{{xxwwwwww|||||||||{zyxwwvttttttttuuuuuuuuppppppppqqpponnnnnnnnooooooooooopqqrrsssssssssssssrrqqpppppoppqrqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrsssssssssssrrrrqrstuuuutttttttsssssssssssssssssssssssssstttuuuuuvvuttttuttttttttttttttttuttsssssttttttttssssssssssssssssssstttttttttttttttttttttttttttttrrrrrrrroopprsuvz{|}~~~~|||{{{{{~~~~~~~~~~~~~~~~vwyyyxvtuuuuuuuuppppppppppppooonnnooooppppppppppppqqrrsssssssssssrrqqpppqppppqqrrrrrrrrrrrrrrrrrsssssssssssssssssssssssssssrrrrrrsstutttsssssssssssssssssssssssssssssssstttuuvvvvvuttttuttttttttttttttttutttssssttttttttssssssssssssssssssstttttttttttttttttttttttttttttssssssssqqqqrtuvxyz{{{{zyyyyxxxxzzzzzzzz{{{{{{{{vwxyywvuuuuuuuuupppppppppppppppooooppppqqqqqqqqqooppqqrrrrrrrrrrrrqqpppoqqpppqrrssssssssssssssssttttttttttttttttttttttttssssrrrrrrstttssrrrrsssssssssssssssssssssssssssstttuvwwwvuttsttuttttttttttttttttttttttssttttttttssssssssssssssssssstttuuuuuuuuuuttttttttttttttttttttttttssssstuvvwwxxwwvvvvvuuuuvvvvvvvvwwwwwwwwvwxyxwvuuuuuuuuuppppppppppppppppppppqqqqrrrrrrrroooppqqrqqqqqqqqqqqqpppprqqqqrrsttttttttttttttttttttttttttttttttttttttttsssssrrrqrssssrrrrrrrrrssssssssssssssssssssssssssttuvwxxvutsssttttttttttttttttttttuuuuttttttttttsssssssssssssssssstttuuuuuuuuuuuuuuuuuuuttttttttuuuuuuuuvuuttuvvvwwwwvutuuuuutttuuuuuuuuuuuuuuuuvvwxxwvvuuuuuuuuqqqqqqqqqqqqqqqppppqqqqrrrrrrrrroooppqqrqqqqqqqqqqqqqppprrqqqrstuuuuuuuuttttttttuuuuuuuuttttttttttttttttttssssrrrrssssrqqqrrrssssssssssssssssssssssssssssstuvwxxuutsssttttttttttttttttttttuvvvutttttttttttttttttssssssssssttuuvvvvvvvvvvuuuuuuuuttttttttuuuuuuuuwvvuuvvwxxyyxwvuwwvvvvuuuuuuuuuuvvvvvvvvvvwwwwvvvvvvvvvvssssssssssrrqqppppppqqqqrrrrrrrrooppqqrrqqqqqqqqrrrqqqqqsrrrrsstuuuuuuuutttttttttttttttttttttttttttttttttttssssssstttsrrqqrrssttssssssssssssssssssssssssssttuvvwuutsssstttttttttttttttttttvwwvuuttttttttttttttttttttttttssttuvvwvvvvvvvvuuuuuuuuttttttttuuuuuuuuwvvuvvwwyyzzyxwwxxwwwwwvvvvvvvvvwwwwwwwwvvvvwwwwvvvvvvvvttttttttttsrqppooppppqqqqqqqqqqqppqqrrssrrrrrrrrrrrrrrrrssrrrsttttttttttssssssssttttttttssssssssssssssssttttsssstttutssrrrrstttussssssssssssssssssssssssssstttuuutssrsstttttttttttttttttstvxxwvuttttttttttttttttttttttttsstuuvwwwwwwwwwwuuuuuuuuttttttttttttttttvvuuvvwxxxyyyxxwwwwwwvvvvvvvvvvvwwwwwwwwuuvvvvwwvvvvvvvvuuuuuuuuuutsqpooooopppppqqqqqqqqpqqrrsssrrrrrrrrssssssssssrrrstuttttttttssssssssttttttttrrrrrrrrssssssssttttsssstuuuutsrrrsstuuussssssssssssssssssssssssssssssssutssrsssttttttttttttttttsuvxxwvuttttttttttttttttttttttttsstuvvwwwwwwwwwwuuuuuuuuttttttttttttttttuuuuvwxxvwxxxxwwwwvvvvuuvvvvvvvvvvvvvvvvuuuuvvwwvvvvvvvvrrrrrrrrppqrrrrqqqqrrrssttttttttuuuuuuuutsssstuuuuuuttttttuuutsrsssssssstttssssssssssssssssrrrqqrrrrssssuuttsttusstuuuttssssttttttttttttsssttuuuuuuuuuuuttttttttqrrrssstsssssssstttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuwwxxxwvuvvvvvvvvuuuuuuuuuuuuuuuuttuuvwxxwwwwwwwwwwwwwwwwvvwwwwxxyxxwwvvuvwwwwwvuvvvvvvvvqqqqqqqqpqrsssrrqrrrrsssttttttttuuuuuuuutsssstuuuuuuttttttuuutsrsssssssstttssssssssssssstssrrqpprrrrrsssuttsstuurssttssrssssttttttttttttuuuvvwwwvvvvvvvvttttttttssssssstsssssssstttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuwwxxwwvuvvvvvvvvuuuuuuuuuuuuuuuuttuuvwxxwwwwwwwwwwwwwwwwvvwwwwxxxxxwwvvuvvwxxwwvvvvvvvvvqqqqqqqqqrssttsssssssttttttttttttttttttttsssstuuuuuuttttttuuutsstttttttttttssssssssssssstssrqpooqqqrrrrrssssstuvstttsrqqssssttttuuuuuuuuvwwxxyyyvvvvvvvvuuuuuuuutttttttstttttttttttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuvvwwwvvuvvvvvvvvuuuuuuuuuuuuuuuuttuuvwxxwwwwwwwwwwwwwwwwvvwwwwxxxxxwwvvvvvwxyyxxwwwwwwwwrrrrrrrrrsstuttsttttttttuuuuuuuutttttttttsssstuuuuuutttttttttttttttttttttttssssssssssssstssrqponqqqqrrrrrrrrstuvvvvuusrqssssttttuuuuuuuuvvwwxxyywwwwwwwwvvvvvvvvvvuutttttttttttttttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuvvvvvvvuvvvvvvvvuuuuuuuuuuuuuuuutuuvvwwwwwwwwwwwwwwwwwwwvvwwwwxxxxwwwvvvvvwyyzyyxxxxxxxxrrrrrrrrrstuuuttttttttttttttttttsssssssstsssstuuuuuuttttttttttttuuuuuuuutttssssssssssssstssrqpooqqqrrrrsrqqqrsuuxxxxvutsssssttttuuuuuuuutttuvvvwwwwwwwwwvvvvvvvvvvvuuttttttttttttttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvuuuvvwwwwwwwwwwwwwwwwwwwvvwwwwxxwwwwwwwvvvwyyzyyxxxxxxxxttttttttrsttuttsuttttttsttttttttsssssssstsssstuuuuuuttttttssstuuuuuuuuuutttssssssssssssssssrrqqqrrrrssssrqqqrrstxxxxwvtsssssttttttttttttrssttuuuvvvvvvvvvvvvvvvvuuuuuttttttttttttttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuuutttuuvwwwwwwwwvvvvvvvvvvvvvvvvuuuvvvwwwwwwwwwwwwwwwwwwvvwwwwxxwwwwwwwwvvwxyyxxxxxxxxxxuuuuuuuurrstttssttttssssssssssssrrrrrrrrtsssstuuuuuuttttttssstuvvvvvvvvvtttsssssssssssssssssssssssssttttrrqqqqrsvvvvutsrssssttttssssssssssstuuuvvvvvvvvvuuuuuuuutttttuuuuuuuuuuutttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuttssttuvwwwwwwwwvvvvvvvvvvvvvvvvuuvvvvvvwwwwwwwwwwwwwwwwvvwwwwxxwwwwwwwwvvwxxwwvxxxxxxxxvvvvvvvvqrsstssstttsssrrrrrrrrrrrrrrrrrrtsssstuuuuuuttttttssstuvvvvvvvvvtttsssssssssssssssssttttssttttuusrqqqqqrsttttsrqssssttttrrrrrrrrttuuvvvwuuuuuuuuuuuuuuuusssttuuuuuuuuuuutttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuttssstuvwwwwwwwwvvvvvvvvvvvvvvvvuvvvvvvvwwwwwwwwwwwwwwwwvvwwwwxxwwwwwwwwvwwwwwvuwwwwwwwwttttttttssssssssttttttttttttttttssssssssssssssssttsssssssstttuuuuuuuuuuuttttttttsssssssssstuwxyyyxvutsstuuuuuuuussssssssssssssssssssssssttttttttrrsstttuuuuutttsrssttuuvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuuuuuuuttuuuuttttttttttuuuuuuuuuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwvvwwwwxxwwxxwvuuwwwwwwwwwwwwwwwwttttttttssssssssttttttttttttttttsssssssssssssssssssssssstttttttuuuuuuuuuvvvvvvvvuuuuuuuustuvwxyyyxwuttttuuuuuuuussssssssssssssssssssssssttttttttqqrrsttuuuuuttttssstuuvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuuuuuuutuuvvuutuuuuuuuuuuuuuuuuuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxyxwvvwwwwwwwwwwwwwwwwttttttttssssssssttttttttttttttttssssssssssssssssrrsssttttttttsssssssssssvvvvvvvvvvvvvvvvtuuvwxyyxxwuuuuuuuuuuuuuttttttttssssssssssssssssssssssssppqrsttuuuutttttstttuuvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuuuuuuuuuvwwvuuuuuuuuuuuuuuuuuuuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyxyyzzyxxwwwwwwwwwwwwwwwwttttttttssssssssttttttttttttttttssssssssssssssssqrrsttuutttsssrrrrrrrrrrttttttttuuuuuuuuuuvvwxxxxwvuuuuvuuuuuuuuttttttttssssssssssssssssrrrrrrrroopqrsttuuttttttttuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuuuuuuuvvwwwwvvvvvvvvvvuuuuuuuuuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyyyz{{{zzwwwwwwwwwwwwwwwwttttttttttttttttttttttttttttttttssssssssrrrrrrrrqrrsttuuttssrrrqqqqqqqqqqqqqqqqqssssssssuuuvvwwwvvuutuuvuuuuuuuuttttttttssssssssssssssssrrrrrrrroppqrsttttttttuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuvvwxxwvvvvvvvvvvvvvvvvvvuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyyz{{||{{wwwwwwwwwwwwwwwwttttttttttttttttttttttttttttttttssssssssrrrrrrrrrrssstttsrrrrrrrqqqqqqqqppppppppqqqqqqqquuuuuuuuuttsttuvuuuuuuuuttttttttssssssssssssssssssssssssqqqrrssstttttuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuvvwwwwvvwwwwwwwwvvvvvvvvuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyz|||||wwwwwwwwwwwwwwwwttttttttttttttttttttttttttttttttssssssssrrrrrrrrssssssssqqqrrrrrqqqqqqqqppppppppqqqqqqqqttttttttssrrsstuuuuuuuuuuuuuuuuussssssssssssssssttttttttssssssssttttuuuuwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuvvwwvvuwwwwwwwwvvvvvvvvuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxyz{|}}}wwwwwwwwwwwwwwwwttttttttttttttttttttttttttttttttssssssssrrrrrrrrttsssssspqqqrrrsqqqqqqqqqqqqqqqqrrrrrrrrtttsssssrrrqrstuuuuuuuuuuuuuuuuusssssssssssssssstttttttttttsssssstttuuuuwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuvwwvuuwwwwwwwwvvvvvvvvuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwvvwwwwxxwxz{|}}}wwwwwwwwwwwwwwwwuuuuttttttttttttttttttttttttttttuuuuuuuussssssssssssssssqqqrrrrrrrrrrrrrrrrrrrrrrsuvvusrttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuurrrrrrrruuuuuuuussssssssrstuuuttuwyxvuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwvvvvwwwwwvvvuuvvvuvvwxwwwwwwwwvvwwwwxxwwwwwwwwuuuttttsttttttttttttttttttttttttttttttttssssssssssssssssqqrrrrrssssssssstttttttttuwxxwutttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuussssssssuuuuuuuussssssssrstuuutttvxxvuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwvvvvwwwwwvvvvuwvvuvvwxwwwwwwwwvwwwwxxxwwwwwwwwtttttsssssssssssssssssssttttttttssssssssssssssssssssssssqrrrrrssttttttttvvvvvvvvvwxyyxwvttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuttttttttuuuuuuuussssssssssttuutttvxxvuvwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwvuuvwwwwwvvvvvwvvvvwxxwwwwwwwwwwwwwxxxwwwwwwwwttttssssrrrrrrrrssssssssttttttttssssssssssssssssssssssssrrrrrsssttttttttvvvvvvvvvvwxxwvvttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttttttttsstttttttvxwvuvxvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwvvuuvvwvvvvvvvvwwvvvwxxxxxxxxxxwwwwxxxxxxxxxxxxtttttsssrrrrrrrrssssssssssssssssrrrrrrrrssssssssssssssssrrrrssssuuuuuuuuttttttttttuuuuttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttttttttttttttuuuvxwuuvxvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwvuuuuvwvvvvvvvvwwvvvwxyxxxxxxxxwwwxxxxyxxxxxxxxuuuuttttssssssssssssssssssssssssssssssssssssssssssssssssrrrsssstttttttttssssssssssssssssttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttssstuuvwxwutvwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuvvvvvvvwwwxwwvwwxyxxxxxxxxwxxxxyyyyyyyyyyyvvvuuuuuttttttttttttttttssssssssssssssssssssssssssssssssrrssssttttttttttttttttttssssssssttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttssstuvxyywtsuwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuttuvvuvvvvwwwxwwwwxyyxxxxxxxxxxxxxyyyyyyyyyyywvvvvuuuttttttttttttttttssssssssssssssssssssssssssssssssrsssstttttttttttuuuuuuuuttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttssstuvyyywtstvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuttuvvuuvvvwwwxwwwwxyyxxxxxxxxxxxxyyyyzzzzzzzzuuuuuuuuuuutttttrrrrrrrrrrrrrrrrtttttttttttttttttttsssssssssssssttttttttttttttttttttttttsssrssssttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwxxwwvuuuuuuvvvwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwvuuuvwvvvwwxxxwwwvvvuuvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxuuuuuuuusssssrrrrrrrrrrrssssssssttttttttttttttttttttssssssssssssttttttttttttttttttttttttssstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwxxwwvuuuuuuvvvwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwvuuuuvwvvwwwwxxwwwwvvvvvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxyyyyyyyyuuuuuuuurrrrrqqqrrrrrrrrttttttttttttttttttttttttttttssssssssssssttttttttttttttttttttttttttuvvwwwuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwwwwvvvuuuuvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuvvvwwwwwwxxxxwwwwwvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxyyyyyyyyuuuuuuuussssrrrrssssssssttttttttttttttttuuuuuuuuuttttssssssssssstttttttttttttttttttttttttuvwxyyyuuuuuuuuuuuuuuuuuuuuuuuuttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwwwwvvvuuvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuvvvwwwwwwwwxxxxxxxxvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxyyyyyyyyuuuuuuuuuuuuutttssssssssttttttttttttttttuuuuuuuuuuuttttssssssssstttttttttttttttttttttttttuwxyzzyuuuuuuuuuuuuuuuuuuuuuuuusstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuvvvwwwwwwwwwwwxxxxxxxxvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxyyyyyyyyuuuuuuuuvvvvuuuussssssssttttttttttttttttuuuuuuuuuuuuttttsssssssstttttttttttttttttttttttttuwxyyxxvvvvvvvvuuuuuuuuuuuuuuuusssttuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwvvvvvvvvvvvvvvuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuvwwxxxxxwwwwwwvwwwwwxxxvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxuuuuuuuuuuuuttttssssssssssssssssttttttttuuuuuuuuuuuuttttsssssssstttttttttttttttttttttttttuwxxxvvvvvvvvvvuuuuuuuuuuuuuuuurssstuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwvvvvvvvvwwvvvuuuttttttttvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvxyyyyyxxwwwwvvvvvwwwwwvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxuuuuuuuuttssssrrttttttttrrrrrrrrttttttttuuuuuuuuuuuuutttssssssssttttttttttttttttttttttttsuvxxwutvvvvvvvvuuuuuuuuuuuuuuuurrsstuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwvvvvvvvwwwvvvuuussssssssvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwxzzzzyxxxwwvvvuuvvvwwwvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuwxyz{{{{wwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuvwxyz{zzwwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuvwyyzzywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuwxyyyywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuutuvxxyyxwwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuutuwxyyyywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuvwxyyyywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuvwyyzzywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvoooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmllkjjklllllllllllkkkjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjjjjjjjjjoooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmllkkkkllllllllllkkkkjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjjjjjjjjjoooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmmllkkllmllllllllkkkkkjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjjjjjjjjjoooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmmmllllmmllllllllkkkkkkjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjiiiiiiiioooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmmllkkllmlllllllljjkkkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjiiiiiiiioooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmllkjjkllkkkkkkkkjjjkkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjiiiiiiiioooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmkkjjjjkkjjjjjjjjjjjjkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjiiiiiiiioooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmkjjiijjkiiiiiiiijjjjkkklkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmmmmmllllmmmmmmmmkkkkkkkkkkkkkkkkkllmlkjikkkkkkkkkkkkkkkkllllllllmmllllkkkkkkkkkkkkkkkkkkjjkkkjiijjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmmmmlllllllllllllkkkkkkkkkkkkkkkkkllllkjjkkkkkkkkllllllllllllllllmmllllkkkkkkkkkkkkkkkkkkjjkkkjiijjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmmmmllllkkkkkkkkkkkkkkkkkkkkkkkkkkllllkkjkkkkkkkkllllllllmmmmmmmmlllllkkkkkkkkkkkkkkkkkkkjjkkkjiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmmllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkklllkkkllllllllmmmmmmmmmmmmmmmmllllkkkkkkkkkkkkkkkkkkkkjjkkkjiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmllllkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkllllllllllmmmmmmmmmmmmmmmmlllkkkkkkkkkkkkkkkkkkkkkjjkkkjiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklmmmmmmmmmmmmmmmmmmmmmmmmmllkkkkjjkkkkkkkkkkkkkkkkjjkkkjiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmlllkkkkjkkkkkkkkkkkkkkkkkkkkkkkklkkjkklmmmmmmmmmmmmmmmmmllllllllkkkkkjjjkkkkkkkkkkkkkkkkjjkkkjiijjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmllkkkkjjkkkkkkkkkkkkkkkkkkkkkkkklkjjjkmmnnnnnnnnllllllllllllllllkkkkkjjjkkkkkkkkkkkkkkkkjjkkkjiijjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnooooonnnnmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklllllkkkmmmmmmmmkjjjjiiikkkkkkkkkkkkkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmlmmnnoooonnnnmmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllkkkkllllllllkjjjjiiikkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllmmnnoonnnnnmmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllkkkkkkkkkkkkkjjjjiiikkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnoooooooonnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllmmnnoonnnnmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklllkkkkjjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiiggggggggggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllkkkkkkkklllmmnnnnnnmmmmlkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklkkkkjjjjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhggggggggggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllkkkkkkkkkllmmnnnnmmmmlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjkjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllkkkkkkkkkkllmmnnmmmmmlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjkkkkkkkkkjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllkkkkkkkkjjjjjjjjkkllmmnnmmmmllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjkkkkkkkkkjjjjiiihhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggnnnnnnnnnnnnnnnnnnnnmmmmnnnnnnnnnnnnnnnnllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkjkkkllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkkkkkjjjjiiiiiiiiiiijjjkkiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkkkkkllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkkkkkjjjjiiiiiiiiiiijjjjkiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkkkkkklllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkkkkjjjjjiiiiiiiiiiiijjjjiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnnnmmmmllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkkkkkkkllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjkkjjjjiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmlllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkllkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmllllllllllllllkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiihhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmlllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllmmmmmmmmllllkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiihhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmllllknnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllnnnnnnnnllllkkkjkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiihiiiiiiiiiiiihhhgiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmlkkjjkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnnllkkkkllmmmmmmmmllllllllkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkmllkkjiiiiiiiiiihhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmlkkjjkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnnllkkkkllmmmmmmmmllllllllkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjlllkjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllkkkjkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnnllkkkkllllllllllkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjlkkkjjiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllmllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnnllkkkkllllllllllkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjkkjjjiiiiiiiiiiijjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmlllllllllllllllllllllllllllllllllllllllllllllkkkllllllllllllllllllllllllkllmmnnnllkkkkllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiijjjjjjjjjjjiiiiiiiiiiiiijjjjjjjjiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllkllmmnnnllkkkkllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllkllmmnnnllkkkklljjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiiiiiiiiiihhhhhhhhggggggggffffffffggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllkkkkkkkkklllllllllllllllllllllllllllllllkllmmnnnllkkkklljjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhggggggggffffffffggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklmnnmlkllllllllllllllllkkkllmmnmlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiijihgghijiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggffeeefggggggggggeeeeeeeeggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklmmmmlkllllllllllllllllkkkllmmnllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiiiihgghiiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggeeedeefgggggggggffffffffggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllkllllllllllllllllkkkllmmnlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiiiihhhhiiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggeddddeffggggggggffffffffggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllkkkllllllllllllllllkkkllmmnkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiiihhhhhhiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggddcccdefggggggggffffffffggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjkkkkkkkkkkkkkkkkkkkkkkllmmnkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiihhhiihhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggddcccdefggggggggffffffffggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjkkkkkkkkkkkkkkkkkkkkllmmnjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiihhhhhhhhhhiiiihhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffeddddeffggggggggggggggggggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjiiiijkkkkkkkkkkkkkkkkkkkkllmmnjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiihhhhhhhhghiiiihghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggeeeeeeeeeeedeefgggggggggggggggggggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjihhijkkkkkkkkkkkkkkkkkkkkllmmnijjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiihhhhhhhhghijjihghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggddddddddffeeefggggggggggggggggggggggggggmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjkkkkkkkkjjjjjkmmkkkkkkkkkkkkkkkkjjjjjjjjkkkkkkkkiiiiiiiijjjiiiihhhgggghhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggffffggggefggggfeddddddddddeeeeffggggggggggggggggggggggggmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjkkkkkkkkjjjjjkmmkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjiiiiiiiijjjiiiihhhgggghhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggffffffffeffggffeddddddddeeeeffffggggggggggggggggggggggggmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjjjjjjjjjjjjjjkmmkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjiiiiiiiijjjiiiihhhgggghhhhhhhhhhgggggggggggggggggggggggggggggggggggggggggggggggggfffeeddeeffffeeeeeeedddeeffffggggggggggggggggggggggggggmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjiiiiiiiijjjjjkmmjjjjjjjjkkkkkkkkjjjjjjjjjjjjjjjjiiiiiiiijjjiiiihhhgggghhgggggggggggggggggggggggggggggggggggggggggggggggggggggggggffeedccdeeffeedffeeeeeeffffggggggggggggffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjiiiiiiiijjjjjkmmjjjjjjjjjjjjjjjjiiiiiiiijjjjjjjjiiiiiiiijjjiiiihhhgggghhggggggggggggggggggggggggggggggggggggggggggggggggffffffffgffeedcceeffffeegfffeeeeffffggggggggggggffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkmmmmmmmmllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjjjjjjjjjjjjjjkmmjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjiiiihhhgggghhggggggggggggggggggggggggggggggggggggggggggggggggffffffffgfffeeddefggggfegggffeeeeeffffggggggggggffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkmmmmmmmmllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjkkkkkkkkjjjjjkmmjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjiiiihhhgggghhggggggggggggggggggggggggggggggggggggggggggggggggfffffffffffffffffgghhggfhhggffeeeeeeffffggggggggffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkmmmmmmmmllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjkkkkkkkkjjjjjkmmjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjiiiihhhgggghhggggggggggggggggggggggggggggggggggggggggggggggggffffffffffffgggggghhhhgghhhgffeeddeeeeffggggggggeeeeeeeeggggggggmmmmmmmmmmmmmmmmkkkkkkkkjjkkkkllmmmmmmmmnnnnnnnnmmmmmmmmnnnmmmmlmmmmmmmmjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiikkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkiijjkklliiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiihhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggffeeddcceeefffffggggggggffffffffddddddddddddddddffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkllllmmmmmmmmmmmmmmmmmmmmmmmmnnmmmmllmmmmmmmmkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkiijjkklliiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiihhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggfffeeddceffffgggggggggggffffffffddddddddeeeeeeeeffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkllllmmmmmmmmmmmmmmmmmmmmmmmmmmnmmmmlllmmmmmmmmkkkkkkkkkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjkkllliiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiihhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggfffeedddfffgggghggggggggffffffffddddddddeeeeeeeeffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkllllmmmmmmmmmmmmllllllllmmmmmmmmmmmmllllllllllllllllllllkkkkkkkkjjjjjjjjkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkjjkkllmmiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggfffedddggggghhhggggggggffffffffeeeeeeeeeeeeeeeeffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkllllmmmmmmmmmmmmllllllllllllllllmmmlllllkkkkkkkkllllllllkkkkkkkkjjjjjjjjkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkllmmmjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhihhhhgggggggggggggggggggggggggggggggggggggggggggggggggggffffffffggffeeddggggghhhggggggggffffffffeeeeeeeeffffffffggggggggggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkllllmmmmmmmmmmkkkkkkkkllllllllmmlllllkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffgggfeeedfffgggghggggggggffffffffffffffffggggggggggggggggggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkllllmmmmmmmmkkkkkkkkllllllllmlllllkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkklllmmnnnjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffgggffeeeeffffgggggggggggffffffffffffffffggggggggggggggggggggggggmmmmmmmmmmmmmmmmkkkkkkkkjjkkkkllmmmmmmmmkkkkkkkklllllllllllllkkkjjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiijjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkklllmmnnojjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffgggffeeeeeefffffggggggggffffffffffffffffgggggggggggggggggggggggglllmmmmnmmmmmmmmkkkkkkkkkkkkkkkklllllkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjiijjjkkkjklmnnmlkkkjjiihiiiiiiiihhhhhhhhiiiihhhhgggggggghhgffghhggggggggggggggggggggggggggggggggggffffeeggggggggeeeeffffggggggggffffffffffffffffddddeeeeeeeeeeeegggggggggggggggghhhhhhhhlllmmmmmmmmmmmmmkkkkkkkkkkkkkkkklllllllljjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjijjjjkkkjklmnmmmkkkjjiihiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhgffghhggggggggggggggggggggggggggggggggggffffeeffffffffeeeeffffggggggggggggggggggggggggdeeeefffffffffffggggggggggggggggggggggggllllmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkklllmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjjjjjjkkkjkllmmmmkkkjjiihiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhgffghhggggggggggggggggggggggggggggggggfffffeeeeeeeeeeeeeefffffggggggggggggggggggggggggeeffffggffffffffgggggggggggggggggggggggglllllmmmmmmmmmmmkkkkkkkkkkkkkkkkjjkklmmnkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkllmmnkkkjjiihiiiiiiiijjjjjjjjiiiihhhhiiiiiiiihhgffghhggggggggggggggggggggggggggggggggffffeeeeddddddddeeffffggggggggggggggggggggggggggfffgggggffffffffggggggggffffffffffffffffkkllllmmmmmmmmmmkkkkkkkkkkkkkkkkjjkklmmnkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjklmnnkkkjjiihiiiiiiiijjjjjjjjiiiihhhhiiiiiiiihhgffghhggggggggggggggggggggggggggggggggfffeeeeeddddddddfffffgggggggggggggggggggggggggggffgggghhffffffffffffffffeeeeeeeeffffffffkkklllllmmmmmmmmkkkkkkkkkkkkkkkkkkklllmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkjjjjjiijjkmnokkkjjiihiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhgffghhggggggggggggggggggggggggggggggggffeeeeddddddddddffffggggggggggggfffffffffffffffffffgggghggggggggffffffffeeeeeeeeeeeeeeeekkkkllllmmmmmmmmkkkkkkkkkkkkkkkkllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkjjjjiiiijklnokkkjjiihiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhgffghhggggggggggggggggggggggggggggggggeeeeedddeeeeeeeefffgggggggggggggfffffffffffffffffffffgggggggggggffffffffeeeeeeeeeeeeeeeekkkkllllmmmmmmmmkkkkkkkkkkkkkkkklllllkkkllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkjjjiiiiiiklnokkkjjiihiiiiiiiihhhhhhhhiiiihhhhgggggggghhgffghhggggggggggggggggggggggggggggggggeeeeedddeeeeeeeefffgggghggggggggeeeeeeeeeeeeeeeeeeffffggggggggggffffffffddddddddeeeeeeeellkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjkkkkkkkkiiiijlmnlkjiiiiihhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggggggggggffffeeeeeeeeeeeefffffffffgghhggfggggggggfeeeddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddeeeekkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjiiiijlmnlkjiiiiihhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggggggggggfffeeeddfffffffffffffffffgghhggfggggggggeeeeddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddeeeekkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiijlmnlkjihhiihhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggggggggggeeeeedddfffffffffffffffffgghhggfffffffffeeeeedddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddeeeejjjkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiijlmnkkjihhhihhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggggggggggeeedddddggggggggfffffffffgghhggfffffffffeeeeeeddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddeeeejjjjkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiijlmnkjihhhhhhhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggffffffffeeeeeeeeggggggggggggggggfgghhggfeeeeeeeeddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddeeeeiijjjkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijlmnkjihhhhhhhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggffffffffeeeeffffggggggggggggggggfgghhggfeeeeeeeedddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddeeeeiiijjkklkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiijlmnkjihgghhhhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggffffffffeffffgggggggggggggggggggfgghhggfddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddeeeehiijjkklkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiijlmnkjihgghhhhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggfffffffffffggghhffffffffggggggggfgghhggfddddddddddddeeefeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddeeeeiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjgghhhiiijjjjjjjjjjjjjjjjiiiiiiiiiiiijklmjjjihhhggikkjijlhgghijhfhhhhhhhhggggggggggggggggggggggggfffffeeeddddddddffffggggffffffffgggggggghhggfeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeejjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjhhhiiiijjjjjjjjjjjjjjjjjiiiiiiiiiiiijklmjjjihhhggikjihikjkkkihhhhhhhhhhhggggggggggggggggggggggggfffffeeeddddddddffffgggggggggggggggggggghggffeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeejjjjjjjjkkkkkkkkjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiijjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiijklmjjjihhhghijigfhjmproifhkhhhhhhhhggggggggggggggggggggggggfffffeeeddddddddffffggggggggggggggggggggggfffeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeekkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiijklmjjjihhhghiihffhjquvqiehmhhhhhhhhggggggggggggggggggggggggfffffeeeddddddddffffggggggggggggfffffffffffeeeedddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeekkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiijklmjjjihhhgiiihfgknuwvpiehlhhhhhhhhggggggggggggggggggggggggfffffeeeeeeeeeeeffffggggggggggggeeeeeeeeeeeeedddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeejjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiijklmjjjihhhghiihhjosxuqlihhihhhhhhhhggggggggggggggggggggggggfffffeeeeeeeeeeeffffggggffffffffeeeeeeeeddddddddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeejjjjjjjjiiiiiiiijjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiihhhjjjjjjjjiiiiiiiiiiiiiiiiiiiijklmjjjihhhghiiijntyzrjgjkiehhhhhhhhggggggggggggggggggggggggfffffeeeeeeeeeeeffffggggffffffffeeeeeeeeddddddddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiihhhgjjjjjjjjiiiiiiiiiiiiiiiiiiiijklmjjjihhhghhiikpw|{oedjnichhhhhhhhggggggggggggggggggggggggfffffeeeeeeeeeeeffffggggeeeeeeeeddddddddcccdddddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiijjiiihhhhhhhhhhhhhhhhhhhiiiiiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiijjkkkkkjhhhggfffhhhhhhhhiiiiiiiihhhhhhhhiijjklmmkjhggghihfgltz{zjigffgijggggggggggggggggffffffffggggggggggggggggeeefffffgghhhgffeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeddddddddddddddddddddddddiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiijiiiihhhhhhhhhhhhhhhhhhhiiiiiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjiiihhhhhhhhhhiiiiiiiihhhhhhhhhiijklmmjjihggghghkqxzxujigffgijggggggggggggggggggggggggggggggggffffffffeeefffffggghggfeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeddddddddddddddddddddddddiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhggghhhkkkjjjjjhhhhhhhhiiiiiiiihhhhhhhhhiijkklliiiihggfgjqx|ysmihgffghiggggggggggggggggggggggggggggggggeeeeeeeeeeefffffffggffedddddddddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeddddddddddddddddddddddddiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiihhhhhiijiiiiiihhhhhhhhhhiiiiiiiihhhhhhhhhhiijkkkhhiiihgfhnw}}wmfhhgffghhggggggggggggggggggggggggggggggggddddddddeeefffffeefffeddccccccccddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeddddddddddddddddddddddddiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhjjijklnoihhhggffhhhhhhhhhhhhhhhhhhhhhhhhhhiiijjjhhiiiiihnsz}zrichgggggghggggggggggggggggggggggggffffffffddddddddeeefffffeeffeedcccccccccddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddddddddddddddiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhjjjkloqsllkjihgghhhhhhhhhhhhhhhhhhhhhhhhhhhiiiijjihghjlmvx{yslgeggggggggggggggggggggggggffffffffffffffffddddddddeeefffffeefffeddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddddddddddddddiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhiiiijhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhijmpsuttsqonmlhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiligegkps~}zrkfgjggggggggggggggggggggggggffffffffffffffffeeeeeeeeeeefffffeffffeedeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddddddddddddddddddddddiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhiiijjhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhffgilptv{zywusrqhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhimjfdfkrv��xnecgmffggggffggggggggggggggggeeeeeeeeffffffffeeeeeeeeeeefffffffgggfedeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddddddddddddddddddddddiiiiiiiijjjjjjjjhhhhhhhhffggghhhhhhiiijjiiiiiiiihhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhtvy{|zwusqmjhghhghijjjjifgghiijjgghijjkknifhow|}|xqkgfffgggghhiiggggggggefffggggffffffffeeeeeeeeddddddddffffffffeeeeeeeeeeeeeeeeddddddddeeeeeeeeddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjiiiiiiiiggghhhiihhhiiiijiiiiiiiihhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgjnswz{|{yvsomlkffggghhhiiiiiihhgghijjkklihksy|}xtojgefggggggghhggggggggfffffgggffffffffeeeeeeeeddddddddffffffffeeeeeeeeddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjiiiiiiiihhhiiiiihhhiiiiiiiiiiiiihhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhbdgkotwy�~|yvsqjigfeffgkkjihhgggghiijkkhilqx|||rokhfefgggggffffggggggggffffffggffffffffeeeeeeeeddddddddffffffffeeeeeeeeddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjjjjjjjjjiiiiijjjiiiiiiiiiiiiiiiihhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhjihghjlnxz|}}|zytqnkhgghhhhhhhgghhhiijjkfjpw|}{yljhfefghgggffeeeggggggggffffffffffffffffeeeeeeeeddddddddffffffffeeeeeeeeddddddddddddddddccccccccddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiihhhhhhhhhhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhomigffghilqvz|}~~|xtpmlkeefghhiihhhiijjjflu{}{wthgfeefgggggffeeeffffffffffffffeeffffffffddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddccccccccddddddddccccccccddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjiiiiiiiijjjjiiiiiiiiihhhhhhhhhhhhhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiihggijkachmrvyz~|yurphhhiiijjhhiiijjjhoy~|wqngfffffggggggfffffffffffffffeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeccccccccddddddddddddddddddddddddccccccccddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjiiiiiiiijjiiiihhjiiiihhhhhhhhhhhhhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhffghijjkeefhjmoqwy|~~{xvqponlkiiiiiiiijjls{~yrkhgggggggfgggggghhfffffffffffeeeddddddddddddddddddddddddddeeeeeeeeeeeeeeeeccccccccddddddddddddddddddddddddccccccccddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjhhhhhhhhjiiihhhhjjiiihhhhhhhhhhhhhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiihgedmkifefghmqx}|zzxvrokigiiiiiiijnu}~wnhehhhhhggfgggghhiifffffffffffeeeddccccccccddddddddddddddddeeeeeeeeeeeeeeeeccccccccddddddddeeeeeeeeddddddddccccccccddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhggggggggggggggggijjmpuy{zzzzwtpnjhgffhjkwyzunhghgggggggghhhhhhhhggggffffffffeeeeddddddddddddddddeeeeeeeeeefggfeedddefgggcddddddeeeeeeeeecccdddddccccccccddddddddeeeeeeeeddddddddeedccdeeeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhgggggggggggggggghhhjlpsu{|~~}{zrqomllmnwxwrkgghgggggggghhhhhhhhggggffffffffeeeeddddddddddddddddeeeeeeeeeefggfeedddeefffcccdddddddddddddccddddeeddddddddddddddddddddddddddddddddeddccddeeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggffffhjmnuvy|~��|zxvtssswvsmhfgigggggggghhhhhhhhggggffffffffeeeeddddddddeeeeeeeeddddddddeefggfeeddeeeeeeccccccccdddddddddddddeeeddddddddddddddddddddddddddddddddeddccddeeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggffeefghijknqtvxy~}|{{zzvsnieegiggggggggggggggggggggffffffffeeeeddddddddeeeeeeeeddddddddeefggfeeeeeeddddddcccbbbccccccccdddddeeeddddddddddddddddccccccccddddddddddccccddeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggghggfffggffhjloqs{|}~��upjedegiggggggggggggggggggggffffffffeeeeeeeeeeeeeeeeeeeeddddddddeefggfeeeeeeedddeeddcbbacccccccccccdddddddddddddccccccccccccccccddddddddddccccddeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggghhhhggghiijknpsuyz|����rmhdefggggggggggggggggggggggffffffffeeeeeeeeeeeeeeeeeeeeddddddddeefggfeeeeeeefffffedccbabbbbbbbbbbbcccccccccccccccccccccbbbbbbbbdddddddddccbbccdeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhggggggggggggggggggghhhhhiijloswzz{~��~pkfeghgeggggggggggggggggggggffffffffeeeeeeeeeeeeffffffffeeeeeeeeeefggfeeeeffgggghggedcbbbbbbbbbbaaaabbbbccccccccccccccccbbbbbbbbdddddddddccbbccdeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhggggggggggggggggeffgghhheeginsx{}~���}{njfehigdggggggggggggggggggggffffffffeeeeeeeeeeeeffffffffeeeeeeeeeefggfeeeeffghhiihgfedcbbbbbbbbb``aaaaabbbbbbbbbccccccccbbbbbbbbdddddddddcbbbbcdeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhgggfhhhhhggghhgggggghhhiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggghkosxz|}���}{okfdefffggggggggggggggggggffeeddeffggffeffffffffffffffffffffffffeeeeeeeedeeffgghihhgfeddcbbbaaaabbbbbbbbccccbbbbbbbbbbbbbbbccccccccbbbbbbbbbcccceeeeeeeefeeeeeeeddddddddhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggiiiiiiiiiiihhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggghjmqtwy{~��~|zplgeefffggggggggggggggggggffeeddeffggffeffffffffeeeeeeeeeeeeeeeeeeeeeeeedeeffggggffedccbcbbbbaaabbbbbbbbccccbbbbbbbbbbbbbbbcccccccccbbbbbbbccccceeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhihhhhhggjjjjjjjjiiiiiiiihhhhiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggghgghjmoqux|~}{yyrnheefffggggggggggggggggggffeeddeffggffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffggeedccbaacbbbbbbaccccccccccccbbbbbbbbbbbbbbbcccccccccccccbbccccddeeeeeeeeccdddeeeffffffffhhhhhhhhhhhhhhhhhiiiihhgiiiihhhhiiiiijjjhhhhhhhhghhhhiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggghggghiklpu{}zxwwupjfefffggggggggggggggggggffeeddeffggffeddddddddddddddddddddddddeeeeeeeeeeeeffffeeddccbbbbbbbbbbccccccccccccbbbbbbbbbbbbbbbcccccbcccccccccccddddeeeeeeeebbcddeffffffffffhhhhhhhhhhhhhhhhijkkjjhhiiiiiiiigghhhiiigggggfffggghhhhiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggggggghhnt{|xutvxslgffffffffffffggggggggggffeeddeffggffeddddddddddddddddddddddddeeeeeeeeeeeeeffffffeedddbbbbccccddddddddccccbbbbbbbbbbbbbbbcccccbccccdddcddddeeeeeeeeeeebccdeeffgggggggghhhhhhhhhhhhhhhhkllmlkihiiiijjjjgghhiiijiihgffeegggghhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggmu|}wrru{umhffffffffffffggggggggggffeeddeffggffeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffeeeebbbccccceeeeeeeeccccbbbbbbbbbbbbbbbcccccbbccddeedddeeeeeeeeeeeeedddeefffffffffffhhhhhhhhhhhhhhhhlmnonlkiiiijjjkkiijjklllllkjhgffgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggffgghhhhmv~~wqqt}woiffffffffffffggggggggggffeeddeffggffeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeedddddccbbccccddffffffffccccbbbbbbbbbbbbbbbcccccbbccdeeeddeeeeffeeeeeeeeffffffffffffffffhhhhhhhhhhhhhhhhmnopomkjiijjjkkkkllmmnnoponljiggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggefghhiiinw�wqqt~xoigfffffffffffggggggggggffeeddeffggffeeeeeeeeeffffffffffffffffeeeeeeeeeeeeeeddccbbbbbbbbcccdddffffffffccccbbbbbbbbbbbbbbbcccccbbcddeeeeeeeefffeeeeeeeeggggffffffffffffhhhhhhhhhhhhhhhhlllllllliijjklllpppqrrsssrpnkhfegggggggggggggggghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqxxrprz}xmimi_ffffffffffffffffggggggggggggggggeeeeeeeeeeeeeeeeeeeeeeeeeddddccceeeeeeeebbbbbbbbddddddddeeeeeeeeeddccbaabbbbbbbbbccccdddeeeeeeeeeeeeeeeeeeeeeeeeffffffffeeeeffffhhhhhhhhhhhhhhhhiiiiiiiihiijjkkknnnooppqqqomjhgfgggggggggggggggghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqx~wqpqw|zoijhaffffffffffffffffggggggggggggggggeeeeeeeeeeeeeeeeeeeeeeeeeddddcccddddddddccccccccddddddddeeeeeeeeeddccbbbbbbbbbbbcccccdddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffhhhhhhhhggggggggffffffffhhhiijjjkkkllmmmnnlkjhgggggggggggggggggghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqx~}vpnprz}tjggeffffffffffffffffggggggggffffffffeeeeeeeeeeeeeeeeeeeeeeeeeddddcccddddddddccccccccddddddddeeeeeeeeedddccbbbbbbbbbbccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffffhhhhhhhhffffffffffffffffggghhhiihhhiijjjkkjiihhgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqw}|tnmnmw~xleeiffffffffffffffffggggggggffffffffffffffffeeeeeeeeeeeeeeeeeddddcccccccccccddddddddddddddddeeeeeeeeeedddccccccccccccccdddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeffffgghhhhhhhhffffffffggggggggggggghhhggggghhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqx}{smlmjs|{pfejffffffffffffffffggggggggffffffffffffffffeeeeeeeeeeeeeeeeeddddcccccccccccddddddddeeeeeeeeeeeeeeeeeeeeddddccccccccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddfffffggghhhhhhhhgggggggghhhhhhhhgggggggggggggggggghhhhhiggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhrx}{slkmjox}uiehffffffffeeeeeeeeggggggggffffffffffffffffeeeeeeeeeeeeeeeeeddddcccddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeccccccccdddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhhhhgghhhhiiggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhty~{slkmkks}zmefffffffffeeeeeeeeggggggggggggggggffffffffeeeeeeeeeeeeeeeeeddddcccddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffccccccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiihhhhhiiiggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhtz~{slkmlip}}pedffffffffeeeeeeeeggggggggggggggggffffffffeeeeeeeeeeeeeeeeeddddccceeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffffccccccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeeefffffffffffgggghhhhhhhhhhhhhhhhhgggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijvz}zsmklkikt|{peffffffffffffeeeeeefffgggggggggggffffffffeeeeeeeeddeeeeffggggffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijuy|zsmkkjhjr{zpfffffffffffffeeeeeefffgggggggggggffffffffeeeeeeeeeeeeffffgggfffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffggggggggiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijtx|zsmjkjghpyyqgfffffffffffffeeeffffffggggggggggeeeeeeeeeeeeeeeeeeffffgggggffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffggggggggjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsx{zsmjjjffmvxqiggggggggggffffeeffffffffffffffffeeeeeeeeeeeeeeeeffffgggggffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggggggggggjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsw|ztnkkjfekuxslgggggggggggfffffffffffffffffffffeeeeeeeeffffffffffffggggffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggggggggggiiiiiiiijjjjjjjjjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsx|{uolkkfdisxuoggggggggggggffffggffffffffffffffffffffffffffffffeeffffggfffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffggggggggiiiiiiiijjjjjjjjjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsx}}wpmllfchsyvqgggggggggggggfffgggfffeeffffffffggggggggffffffffeeeefffffffeeeedeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffgggggggghhhhhhhhkkkkkkkkjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsy~}wqmmmgchrywshhhhhhhhhggggfffgggfffeeeeeeeeeeggggggggffffffffddeeeeffffeeeeddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhgggggggggggggggggggggggggggggggggggggggghgggghiivy}|xsomjjihinv|kiffghgfggggggggggggggggggggggggggggffffddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeejjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhgggggggggggggggggggggggggggggggggggggggghgggghiivy}}ysnljjihinu{ljgfghgfggggggggggggggggggggggggggggffffdddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeeeeeeeeeeeeeeeffffffffjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhgggggggggggggggggggggggggggggggggggggggghgggghiivy}}ysnljjihimtynkgfghgfggggggggggggggggggggggggggggffffdeeeefffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeeeeeeeffffffffffffffffjjjjjjjjjjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhgggggggghhhhhhhhgggggggggggggggggggggggggggggggggggggggghgggghiivz~~ysnkjjihhlrwqmhffgggggggggggggggggggggggggggggggffffeefffgggffffffffffffffffeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffggggggggjjjjjjjjiiiiiiiihhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhgggggggghhhhhhhhggggggggggggggggffffffffgggggggggggggggghgggghiivz~~zsmjjjihgkpusoiffgggggggggggggggggggggggggggggggffffggggggggffffffffffffffffeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffggggggggggggggggjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggffffffffgggggggggggggggghgggghiivz~zsmiijigginsvpjfefggggggggggggggggggggggggggggggffffiihhgggfffffffffffffffffeeeeeeeeffffffffggggggggggggggggggggggggffffffffffffffffffffffffffffffffgggggggggggggggghhhhhhhhjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggffffffffgggggggggggggggghgggghiivzzsliijigfhmqwrjfefghggggggggggggggggggggggggggggffffkkjihgfeffffffffffffffffeeeeeeeeffffffffggggggggggggggggggggggggffffffffffffffffffffffffffffffffgggggggghhhhhhhhhhhhhhhhjjjjjjjjiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggffffffffgggggggggggggggghgggghiivzzslhijigfhlpxrkedfghggggggggggggggggggggggggggggffffllkihfeeffffffffffffffffeeeeeeeeffffffffggggggggggggggggggggggggffffffffffffffffffffffffffffffffhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhiiiijjjiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggfhihffhjw{~zsmjhhggghhiuwqfchieggggggggggggggggggggggggffffffffhkmkfdgjeeeeeeeeffffffffeeeeeeeeggggggggffffffffggggggggggggggggffffffffffffffffffffffffgggggggghhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiijjjiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggfhihffhjw{~zsmjihgggghhtvqgcgieggggggggggggggggggggggggffffffffgkmlifgiffffffffffffffffffffffffffffffffffffffffggggggggggggggggffffffffffffffffffffffffgggggggggggggggghhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiijjjjiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggfhhgffhkw{~~zsmjjihgggggrurhdgieggggggggggggggggggggggggfffffffffjnomiggffffffffffffffffffffffffffffffffffffffffggggggggggggggggffffffffffffffffffffffffgggggggggggggggghhhhhhhhhhhhhhhhjjjjjjjjjjjjjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggghgffilvz~~zsnkjjhgfffgoutjeghfggggggggggggggggggggggggffffffffeinqplhfffffffffffffffffffffffffffffffffffffffffggggggggggggggggggggggggffffffffggggggggggggggggffffffffhhhhhhhhhhhhhhhhkkkkkkkkjjjjjjjjiijjjjkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggghfefilvz~~zsnkjjhgfffgltuleghfggggggggggggggggggggggggggggggggfhmqqnieffffffffffffffffffffffffffffffffggggggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffhhhhhhhhhhhhhhhhkkkkkkkkkkkkkkkkjjjjjkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggfefjmvz~~zsnkjihgggggisvnfghgggggggggggggggggggggggggggggggggghkoqojfggggggggffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhkkkkkkkkkkkkkkkkjjjjkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggeefjnuy~~ztnlihgggghhgrwogfhggggggggggggggggggggggggggggggggghghlpokgggggggggffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhllllllllkkkkkkkkjjjjkkkkkkkkkkkkjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggedfjnuy~~ztolhhggghhierxpgfhgggggggggggggggggggggggggggggggggiggjnolgggggggggffffffffgggggggghhhhhhhhgggggggggggggggggggggggghhhhhhhhgggggggghhhhhhhhgggggggghhhhhhhhhhhhhhhhhhhhhhhhmmlkjjiikkkkkkkkkkkkkkkkkkkkkkkkjjjjjkkkjjjkkkllkkjjjiiiijjkkjihjjjjjkkkjjjjjjjjjjjjjjjjhhhhhhhhhhhiiijjiiiiiiiihhhhhhhhhhhhhhhhghigfgjmvy|}ztmikjigffffgmqoiefiggggggggggggggggffgggghhggggggggheejqsnheffffffeffffffffgggggggggggggggghhhhhhhhgggggggggggggggghhhhhhhhhhhhhhhhffgghhhhhhhhiiiiffgggfeehhhhhggghhhhhhhhnmmllkkkllllllllkkkkkkkkkkkkkkkkjjjjkkkkllllllllllkkkkjjijjkkjiijjjjkkkkjjjjjjjjjjjjjjjjhhhhhhhhhhhhiiiiiiiiiiiihhhhhhhhhhhhhhhhhhihfgjmwy|}ztmikjiggffffkpniefiggggggggggggggggfgggghhhhhhhhhhhjggkrtpkfgggggggffffffffgggggggggggggggghhhhhhhhgggggggggggggggghhhhhhhhhhhhhhhhgggghhhhghhhhiiigghhhhggiiiihhhhiiiiiiiimmmmmmmmlllllllllllllllllllllllljjkkkkllmmmmmmmmlllllkkkjjjkkjjjjjjjkkkkjjjjjjjjjjjjjjjjiiiiiiiihhhhhiiiiiiiiiiiiiiiiiiihhhhhhhhhiihghkmwy|}ztnikjihggggeimlhfgihhhhhhhhhhhhhhhhgggghhhhhhhhhhhhkihlqtqnggghhhiigggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiihhhhhgggggghhhhhhhijkkjjjjjjjiiiiiiiiiiilmmnnnmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkllllmmmmllllllllllllkkkkkkkjjjjkkkklkkkkkkkkkkkkkkkkiiiiiiiihhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiijighknxz|}ztnjkjihggggehkjhfgihhhhhhhhhhhhhhhhghhhhhiihhhhhhhhkihkoqqoggggghhigggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiihhggggggghhhhiiklmmmmllkkkkjjjjjjjjjjllmnnnmlmmmmmmmmmmmmmmmmmmmmmmmmkkllllmmllllllllllllllllnnmllkkkjkkkklllkkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiijjihilnyz||ztokkjihghhhfgiihghihhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiihghknppggfeefgghhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiiiiihhhggggghhhhhijlmnooomllllkkkjjjjjjjjnnppponmllllllllllllllllllllllllllllmmmmmmmnnnnonnoooooorqpnmlllkkkklllllllllllllllllllljjjjjjjjkjjjjjiiiiiiiiiijjjjjjjjjjjjjjjjjjkjhiloy{||ztokjjihhhhihhhhhhhhhhhhhhhhhhhhhhhhhiiiijjjjjjjjjjjihggilorihgeeefghhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiiiiihhhhhhhhhiiiijklmnoonmmmllllkkkkkkkkkrstutsqpllllllllkkkkkkkkkkkkkkkkllmmmmnnoppqrrsssssttttuvtrpnmllkkkkllllllllllllllllllllkkkkkkkkllkkkkjjiiiiiiiikkkkkkkkjjjjjjjjjkkjijmoz{||yuoljjihhhiijihhiihgiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjihhlqumljhfgghiiiiiiiihhhhhhhhjjjjjjjjiiiiiiiijjjjjjjjjjjjjjjjhhhhhhhhhhhhhhhhhhhhiiiiiiiiijjjjklmmnmmmmllllkkkkkkkkkkvwxxwvsrkkkkkkkkkkkkkkkkkkkkkkkklmmmmnnnrrstuvwwwwwxxxyyxvtqnmllkkklllllllllllllllllllllkkkkkkkkmmlllkkkiiiiiiiikkkkkkkkkkkkkkkkjkkjijmpz{||yuoljjihhiiikihhijigiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjllkiilrwqoljhhiiiiiiiiiihhhhhhhhjjjjjjjjiiiiiiiijjjjjjjjjjjjjjjjhhhhhhhhhhhhhhhhhhhhiijjiiijjjjkjjklmmllmllllkkkkkkkkkkk|{{zyxwvoonnmmmlmmmmmmmmllllllllmmmmmmmmopruwxxxvvwxxyyyvwwvuromlkkjkklmmmmmmmmmlllmmnnnllllllllmmmmmlkjhiijjjkjkjjjjjjjklllllmmhiihghknz{}~|yvspmjijkjiiiiiiiiiiiihhhgghhhhiiiiiiiiiiiiklmmmmlknljlpuxzxurponkiljgffhkmjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjmmmmmmmmllllllllkkkkkkkkzyyxwvuuppoonmmmllllllllllllllllmmmmmmmmnprtvwwwtuuvwwxxxxyywusqmmlkkllmmmmmmmmmlllllmmmllllllllllmmmmlkiiiiiiiijjjkkllmmmmlllllijjihiknyz|}|zvtomjijkjjiiiiiiiijjjjiiiijjjjjkkkkkkkkkkkkllmmllkmkjlpuyzzwuuvvutpomkijjkjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkiiiiiiiijjjjjjjjiiiijjjjkkkkkkkkmmmmmmmmllllllllkkkkkkkkwvvuutssrqqponmmllllllllllllllllmmmmmmmmnoqrtuvvrrsttuvwyzzzzxvuoommllmmmmmmmmmmllllkkkkkkkkkkkkllnoooookkjjiihhjklmopqrqqpoonmmklljiilnxz|}}zwuoliijkkjjjjjjjjjjjjkkkkkkkklllllllllllllkkllllkkmkjlqvyzvtstwyyyutsqomlkkkkjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjijjjjkkkkkkkkkkkmmmmmmmmlllllllllllllllltsssrrrqssrqponnllllllllmmmmmmmmmmmmmmmmnopqrsttqqqqrstuxxyyyxwvqponmmmmlllllllllllkkjjjkkkkkkkknoprstttponmkkjjmnoqtvwxxwwutsrrnnnljjmowy|}}zwupmjiklkkkkkkkkkkijjjkkllkkkllllmlllllllllllllllllkknswyyomlmorrruuvvtrpommlkkkkkllllllllllllllllkkkkkkkkkkkkkkkkllllllllkkkkkkkkjjkkkkllkkkkkkkkmmmmmmmmllllllllllllllllqqqqqqqpssrqqpoommmmmmmmnnnnnnnnmmmmmmmmnoopqrssqppppqsttuuuvuutqqonnmnnmmmmmmmmmlllkkkjllllllllprtvwyyyvtrqonnnqrsuwy{{}||{yxxwoppnllnqwy|}}zwuroljkllkkkkkkkkkiijjkkklkkkkllllkkkkkkkkmllllllmmmmpuxyypmjjklllqrtvvvvuqpnmlkllmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllkkklllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmppppppqqrrrqqpppnnnnnnnnnnnnnnnnnnnnnnnnoooopqrsrrpppprsqqqrrrrqqponnnnnmmmmmmmmmmmmmmmmmmmmmmmmrsuwy{{|yxusrqqrttuvwxyz{{{zzyyypqqomnqsxz|}}zwutqnlmmlklllllllllllllmmmllllmmmmllllllllnnmmmmnnoopswyyxvsommnmmmnprtvxyutqomlllmmmmmmmmmmmmmmmmllllllllllllllllllllllllmmmmmmmmllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmopppqqqqqqqqqqqqoooooooonnnnnnnnppppppppppoopqrrutrpppqrppppppppoonnnnoonnnnnnnnnnnooooonnnnnnnnqrtvxyzzyxusrrrsttttttttttttuuvvpqqpopsuyz|}|zvtwtpnnnlkmmmmmmmmppppppppooooppppoooooooooonmmnoorqruxzywvsommnnnmmmnpsvwywtqnmllmmmmmmmmmmmmmmmmllllllllllllllllllllllllmmmmmmmmlmmmmnnnmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnppppqqrrppppqqqqooooooooooooooooppppppppqppopqrrvusqppqrpppoooooonnmmnoooooooooooooppqqqoooooooooqruvxxxxwtrqqrstssrqpoommnopqqrpqrqpqtwz{}~|yvsyvroonlkmmmmmmmmtttsssrrqrrrrrssqqqqqqqqpponnoppsstvyzywqmihjkllomlkloru|zvromllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmnnnnmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnqqqqqqqqssrqqqqrqqqqppppooooooooqqqqrrrrppppppppsssrqonmoopppponooooooooqqqqrrrrrrrrrrrrtsqppqsttuuvvwwwwwwvvuuuqqpppooonnnnnooonopoopswxz}~}{xvuspnmmmnppqrtuvv{zzyyxxxwwwwwwwwwwwwwwwwuutssrqqtwyzxvvvspmmnoonnmmmnoqrsrpnmlllmmmmmmmmllllllllmmmnnnnollllllllnnnnnnnnlllllllljjklmnoooooooooonnnnmmmmnnnoooppppppppppqqqqqqqqrrqqqqrsrrqqqqqpppppppppqqqrrrrsppppppppssssrqpoooppppooppppppppssttuuuuvvvvvvvvtsrqqrstuuuuvvvvuuuuttttppppoooonnnnoooonopoopsvxz}~}{xvusqonnoorstuvwxxzzzyyxxxwwwwwwwwwwwwwwwwwvvuuttswxzywwwytqnmoponnmmmmnpprqonmllmmmmmmmmmllllllllllllmmmmmmmmnnnnppppppppoooooooollmmnnnnoooooooooonnnmmmnooooppppppppppprrrrrrrrqqqqqrstssrrrrqqqqqqqqqqrrrrrrssqqqqqqqqsstttsrrppqrrqqprrrrrrrrvvvwxxyyyyyyyyyyutsrrstuvvvuttssrrrrrsssppppoooonnnooooonopoopsvxz}~}{xvutrqpqqruuvwxyyzzzyyxxwwwwwwwwwwvvvvvvvvxxxwwvvvyzywvvy|vspoppponnmmmmnnpoonmmmmnnnnnnnnmmmmmmmmkkkkllllnnoppqrrrrrrrrrrrrrrrrrrppoonnnnnnnnnnnnooooonnnoopppppppppppppprrrrrrrrqqqqrsuuttttssrrrrrrrrrrrrrsssssrrrrrrrrrsuvvvutsstuuuttttttttttvvwxyyzzzzzzzzzzvutsstuvwwvutsrqopppqqrrppppppppooooppppopqpopsvxz}~}{xvuutsssttvvvwxxyyyyyxxwwwwwwwwwwwvvvvvvvvxxxwwwwwyywtsuy}xuqpqrqpooonnnmmoonnnnnnoooooooonnnnnnnnmmmmmnnnppqrstuuuuuuuuuuuuuuuuuusrqponnmnnnnnnnnpppppppppppqqqqqqqqqqqqqssssssssqqqrsuvwvvvuttssssssssssssssssssssssssssrtvwxwvuvvwxxxxwuuuuuuuuuvvwwxxyxxxxxxxxvuuttuuvwwvutsrqooppqqqqpppqqqqqpppqqqqrqrrqpqtwxz}~}{xvvuuuuuuvuuuvvwwwyyxxwwvvwwwwwwwwuuuuuuuuvvvwwwwwxwurqsx|xurqrsrqrqqqppoopppppppoppppppppoooooooopppqqqqqrrstuvvwvvvvvvvvuuuuuuuutssqponnooooooooqqqqqqqqrrrqqqqqqqqqqqqqssssssssssstuvwxxxwvvutttttttttttttttsssssssssssrtvxyxwvwwxyzzyyvvvvvvvvuuuvvvvvvvvvvvvvuuuuuuuuvvvuttssqqqqrrrrqqrrrrrrrrrrsssssstsrruxxz}~}{xvvvvvvvvvuuuuuuuuxxxwwvvvvvvvvvvvuuuuuuuuvvvwwxxxvvusrsx{xurqrssrttuuttsrrrrsrrqqppppppppppppppppssssttttssttuvvwuuuuuuuuttttttttssrqqpooppppppppqqrrrrrrssrrrrrrrrrrrrrrttttttttuuuuvwxyyyxwwvuuttttttttuuutttssttttttttstwyyxvuuvwxxxxxvvvvvvvvwwvvvvvuuuuuuuuuuuuuuuuuuuuuvvvvtttsssssrrsssttttttttuuutuutstwyxz}~}{xvwwwwvvuuvvvvvvvvxxwwvvuuuuuuuuuuttttttttwxxyyzz{vwwvuvy|vtqqrttswwxyxxwvtuuuutsrqqqqqqqqqqqqqqqqssttttuuttttuuuvttttttttrrrrrrrrrqqqqqppqqqqqqqqrrrrsssstssssrrrrrrrrrrrttttttttwwvvwxyyzyyxwvuuuuuuuuuuuuutttssttttttttsuwyyxvtssuvvvvvvvvvvvvvxxxwwvvvuuuuuuuuttuuuutttuuvvwwwvvuutttsssstttuuuuuuuvvvuvvuttwzxz}~}{xvxwwwvvutxxwwwwwwxxwwvvuuttttttttttttttttyyz{{|}}wyzzyy{}uspprtutxyz{{zyyvwwwwutsqqqqqqqqrrrrrrrrsssstttttttttttussssssssqqqqqqqqpppqqqqqrrrrrrrrrrrssttttttsssrrrrrrrrrrttttttttuuuvwwwxxxxxxxxxxwvuuuuuuuuuuuuuttttttttrvz{xusrsttuuttsuuuvvvvvuuuuuuuuwwvvuuuttssstuwxwvvuvwxxvvvvvvvvvvvvvvvvuuvvvwwwwvvvvwxxy{}~~{xvwvutttuuvvvvvvvvwwxxxxyyzywvutuuvvvvwwwxyyyzzzz{yyyyy{|}yvsrtvwvxyyzzyyxwwwwvutsssssssssssssssssrrrrssttrrsstuuuuuttttsssrrrrsttrrrrrrrrsssssssssttttuuutuuuutsstttttttttttttttttttuuvvvwwwwwwwwxwvuttuutttttttttttttttttw{{yvtstuuvvuutuuvvvvwwvvvvvvvvwwvvvvvvuuttuvwxwvvvvwxxvvvvvvvvvvvvvvvvvvvvvwwwvvvuvvwxy{}~~{xvwvutttuuvvvvvvvvwwxxxxyyzywvuuuuvvvvwwwwxyyyyyzzyyxyy{|}zwsrtvvvwxyyyyxwwwwwwuttttttttttssssssssssssssssrssttuuuuuuttttsssrrrsttsssssssstttttttttttttuuuuuvvvuttuuuuuuuuuuuuuuuussstuuuvvvvvvvvvwwvutttuttttttttttttttttuy|}zwvuvvwwwwvvvvvvwwwwwwwwwwwwwwwwwxxxwwwvvwwxwvvvvwwxvvvvvvvvwwwwwwwwwwwvvvvvvvuuuvwwy{}~~{xvwvutttuuvvvvvvvvwwwxxxxxyxwvvuuuvvvwwwwwwwwxxxxyxxxxy{|}|xtstuuuwwxxxxwwwwwwwvutttttttttttttttttttttssssssttuuuvuuutttttssrrsstuuuuuuuuuuuuuuuuutttuuuuuvvwwwvuuvvvvvvvvuuuuuuuutuuuvvwwxxxxxxxxwwvutttuttttttttuuuuuuuuwz}~{ywwwxxyyxxwwwwwwxxxyyyyxxxxwwxxyyyyzyyxxxxxvvvvwwwxvvvvvvvvwwwwwwwwwwwvvvuuvuuuuvwwy{}~~{xvwvutttuuvvvvvvvvwwwwxxxxxxwwvvvvvvwwwwwwvvvvwwwwxxxxyz|}}zuttuutvvwxxwvvwwwwwvuuttttttttuuuuuuuuuuuuttsstttuuuvvuuuutttttsssstuuwwwwwwwwvvvvvvvvuuuuuvvvwwxxxwvvwwwwwwwwvvvvvvvvwwxxyyyzzzzzzzzzxwvuuuuuuuuuuuuuuuuuuuuuy|~~|zyyxyyzzyyxwxxxxyyyzzzzyyyxxxxyyyzz{{zzyyxxvvvwwwwwvvvvvvvvwwwwwwwwwwwvvvuuvvuuuvwwy{}~~{xvwvutttuuvvvvvvvvwwwwwxxxwwwwwwwwwwwwwwvvuuuvvvvvwwwxyz|}~zvtuvuuvvwwwwvvvwwxwwvuuuuuuuuuvvvvvvvvvvvuuuttuuuuuvvvvuuuuttttttsttuvxxxxxxxxwwwwwwwwuuuvvvvvxxxyxxwvwwwwwwwwvvvvvvvvxyyyz{{{||||||||yxwvvvvvvvvvvvvvvvvvvvvvz||zyzyyzzzzyyxxyyyyzz{{zzyyyxxxyyyyyyz{{{zzyxuvvwwwwwwwwwwwwwwwwwwwwwwwwvvvvvwvvvvwxxy{}~~{xvwvutttuuvvvvvvvvvvwwwwxxvwwxxxwwwwwwwvvvuuuuvvvvvvwwyz|}~{wuuvvuvvwwwwvvvwwxxwwvvvvvvvvvvvvvvvvvvvvvvvvuvvvvvvvvvvvuuuutuutttuvvwwwwwwwwwwwwwwwwvvvvvwwwxxxyxxwvwwwwwwwwwwwwwwwwxxxyyzzz{{{{{{{{zyxwwwwwwwwwwwwwvvvvvvvvz}~|zzzyyz{{zyyyyyyzzzz{{zzyyxxyyyyxxxxzz{{{zyyuvwwxwwvwwwwwwwwvvvvvvvvvvvvvwwwxwwwwxxyy{}~~{xvwvutttuuvvvvvvvvvvvwwwwwvvwxyyxxwwwwvvvvuuuuvvvvvvvwyz|}~{wuvwwvvwwxxwwvvwwxxxwwvvvvvvvvvvvvvvvvvvvvwwwwvvvvvvvvvvvuuuuuuutttuvwwwwwwwwwvvvvvvvvvvvwwwwwwxxxxwwvwwwwwwwwwwwwwwwwvwwwxyyyzzzzzzzz{zyxwwxxwwwwwwwwwwwwwwwwz}~~|zzzyyz{{zyyyyyzzzz{{{zzyxxxyyyyxxwwyyz{{{zyuvwxxwwvwwwwwwwwvvvvvvvvuuvvvwwwxxwwwxyzy{}~~{xvwvutttuuvvvvvvvvvvvwwwwwuvwyyyxxxwwwvvvvuuuvvvvvvvvwxz|}~zwuvwxwwwxxxxwwvwwxxxwwvvvvvvvvuuuuuuuuvvvwwwxxwwwwvvvvvvvvuuuuuuutuuvwvvvvvvvvvvvvvvvvvvvwwwwxwwxxxwvvwwwwwwwwwwwwwwwwxxxyyyyyzzzzzzzzzzzyyyyywwwwwwwwxxwwxxyz{|}}}{zxyyyyyyyyyyyzz{{{zzzyyyyxyyyyyyyyyzz{||{{zyxwvvwwwwwwwwwwvvvvvvvvuuvvvwwwwwwwwxyz{{}}}|zywwwwwvvvvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvuvvwwvuuwwvwxz|~~{xvwwwvwwwxxxxxxxxxxwvuwwwwwwwwvvvvuuuuuuvvwxxxwwwvwwxywwwvvvvvuuuuuuuuwwwwwwwwvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyzzzzzzzzzzyyyyxxwwwwwwwwxxxwxyyz{|}}}{zxyyyyyyyyyyzzz{{{zzzzzyyyyyyyyyyyyzz{|{{{zyxwvvwwwwwwwwwwvvvvvvvvvvvvwwwwwwwwwxyz{{}}}|zywwwwwvvvvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvuvwwwvvuxwwwxz|}~{xvwxwvwwwxxxxxyyyyywvvwwwwwwwwvvvvvuuuuuvvwwxxwwwvwwxyxwwwvvvvuuuuuuuuwwwwwwwwvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyzzzzzzzzyyyyxxxxwwwwwwwwyyxxxyzz{|}}}{zxyyyyyyyyzzzzz{{{{{{zzzzzyyyyyyyyyzz{{{{zzyxwwwwwwwwwwwwwvvvvvvvvvvvvwwwwwwwwwxyz{{}}}|zyxxwwwwvvvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvvwwwwvvxwwwxz|}~{xwwxwvwwwxxxxx{{{zzxwvwwwwwwwwwvvvvuuuuvvvwwxxwwwvwwxyxxxxwwwvuuuuuuuuwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyyyyyyyyyyyxxxxxwwwwwxxxzzyxyyzz{|}}}{zxyyyyyyyyzz{{{{{{|{{{{zzzyyyyyyyyyzz{{{zyzyxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxyz{{}}}|zyxxxwwwwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvvwxxxwwxxwwxy{}~{xwwxwvwwwxxxxx||||zyxwwwwwwwwwwwwvvvvuvvvvwwwxwwwvwwxyxxyyyxwwvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxyyyyyzzzzzzzzyyyyxxxxwwxxxyyy{zzyyyz{{|}}}{zxyyyyyyyy{{{{{{{{|{{{{zzzyyyyyyyyyzz{{zyyzzyxwwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxyz{{}}}|zyxxxxwwwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvwwxyxxxyxwwxy{||xwxxxwwwwxxxxx||||{zxxwwwwwwwwwwwwvvvvvvvwwwwwwwwvwwxyyyzzzyxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxyyyyyyyyyyyzzzzzzzzzzzzzzzzyyyyyxxyyzzz{|{zzyzz{{|}}}{zxyyyyyyyy||{{{{{{{{{zzzzzzzzzzzzzyzz{zzyx{zyxwwxxwwwwwwwwxxxxxxxxxxxxwwwwwwwwwxyz{{}}}|zyxxxxxwwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvwxyyyyyyxwwxy{||ywxyxwwwwxxxxx{|||{zyywwwwwwwwwwwwwvvvvvwwwwwwwwwvwwxyyz{{{zxxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxx{{{{{{{{zzz{{{{{{{{{{{{{{{{zzzzyyyzz{{||||{zzzz{{|}}}{zxyyyyyyyy||||{{{{zzzzzyyyzzzzzzzzyzzzzyxx{zyxxxxxwwwwwwwwxxxxxxxxxxxxwwwwwwwwwxyz{{}}}|zyyyxxxxwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvwxyzzyyyxwwwy{||ywxyxwwwwxxxxxzz{{{{zzwwwwwwwwxwwwwvvvwwwwwwwwwwwvwwxyyz{||{yxwwwwwwwwwwwwwwwwxxxxxxxxwwwwwwwwwwwwwwwwxxxxxxxx{{{{{{{{{{{{||||||||||||{{{{{zzzzzz{||}}}|{zzz{{{|}}}{zxyyyyyyyy||||{{{zzzzyyyyxzzzzzzzzyzzzzyxx{zyxxxxxwwwwwwwwxxxxxxxxyyxxxwwwwwwwwxyz{{}}}|zyyyxxxxwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvwxyzzzzyyxwwy{||yxxyxwwwwxxxxxyyz{{{zzwwwwwwwwxxwwwwvvwwwwwvvvwwwvwwxyyz|}|{yxxxxxxxxxwwwwwwwwxxxxxxxxwwwwwwwwwwwwwwwwxxxxxxxx�������������������������������~~~~~~~~}}}}}}}}{{{{{{{{~}}}||||||||||||||||||||}||||{{{{{{zzzzz{{{zyyxxxxyyyyzzzzzzzzzzxwwwxy{||}~~}{yxxxxxxwwwvvvwxxyyxwvuuvwxwxxxyyyyxxxxxxxxxxxyz{{{yyyyyyyy}|zyxwwwvvvwwxxxwxyz{{zyvvvvvvvvxxxxxxxxwwwwwwwwwwvvvwxxzz{{{zyxwwwwwwwwwwwwxxxxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww������������������������������������������������~~~~~~~~}~~~~~~~~~~~~~~~~~~~~~~~~~~~~}}}}}}}|||||{{zzyyxxxxxyyyyyyyyyyyyyxxxxxy{{|}~~}{zxxxxwwwwwxxyyzzzxwvvvvwxwwwxxxyyxxxxxxxxwxxyyzz{{{{{{{{{~}|zyxxxxxxxxxyyxyz{{{{zxxxxxxxxyyyyyyyyxxxxxxxxxxwwwxyyzzz{zzyxxxxxxxxxvvwwwxxxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww������������������������������������������������������������������������������������������������|||{{zzzyyzzzz{{zzzzzzzzyyyyyz{||}~~||yyxxxxwwyyzz{{{|xxwwwwxxwwwxxxxxyyyyyyyywxxxyyyz{{{{{{{{�}{zyyyzzzyyyyyyzz{{{{zzzzzzzzzyyyyyyyyxxxxxxxxyxxxxyyzyzz{zzyxxxxxxxxxvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww�����������������������������������������������������������������������������������������������������������~~}}~~~~}}}}}}}}|||||||}z{|}~}}|zzzzyyyyzzz{{{||zzyyyyzzxxxyyyyyyyyyyyyyyyyyyyyy{{{{{{{{�}{zyxxzzzyyyxxyyzzzzzzxxxxxxxxxxxxxxxxwwwwwwwwxxwwwxyyzz{{{zyxyyyyyyyyvvvvvwwwwwwwwwwwwwwwwwwwvvvvvvvvwwwwwwww�����������������������������������������������������������������������������������������������������������������������������������z{|}}}}}}||||{{{{{{{{{||||}}}}||{{{{{{{{zzzzzzzz||{{{{{{zzzzzzzz~}{zyxxxzzzyyxxxzyyyyyyywwwwwwwwwwwwwwwwvvvvvvvvwwwvwwxy{{{|{{zyyyyyyyyywwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww����������������������������������������������������������������������������������������������������������������������������������������}~~���~}}}~~~~~��������~~~~~~~~~~~��~~~{{{{{{{{}|{zzzzz{{{{{{zz||{{{{{{xxxxxxxxyyyyyyyyxxxxxxxxyxxxxyzz||}}}|{zzzzzzzzzxxxxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~}||}~~~~~���~�~~~~~~~~}}}}}}}}||||||||||{{{|}}}}~~~}||zzzzzzzzyyyxxxxwwwwwwwwwwwwwwwwwxxxxxxxxwwwwwwww��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~��~~~}}zzzzzzzzzzyyyxxxwwwwwwwwwwwwwwwwyyyyyyyywwwwwwww��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~}}}}}}}}}~~~}||{{zzzzzzzzzzzzzzzzyyyyyyyywwwwwwww���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~}}}}}}}}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������
\ No newline at end of file
diff --git a/codec/L2/demos/resize/kernel/kernel_resize.cpp b/codec/L2/demos/resize/kernel/kernel_resize.cpp
new file mode 100644
index 0000000000..8fba9feea7
--- /dev/null
+++ b/codec/L2/demos/resize/kernel/kernel_resize.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file kernel_resize.cpp
+ *
+ * @brief This file contains top function of test case.
+ */
+
+#include "kernel_resize.hpp"
+#include "bicubicinterpolator.hpp"
+
+extern "C" void kernel_resize(ap_uint<32>* configs, ap_uint<WDATA>* axi_src, ap_uint<WDATA>* axi_dst) {
+// clang-format off
+#pragma HLS INTERFACE m_axi offset = slave bundle = gmem0 port = configs latency = 32 num_read_outstanding = \
+    64 max_read_burst_length = 64 num_write_outstanding = 64 max_write_burst_length = 32 depth = 5
+
+#pragma HLS INTERFACE m_axi offset = slave bundle = gmem1 port = axi_src latency = 32 num_read_outstanding = \
+    64 max_read_burst_length = 64 num_write_outstanding = 64 max_write_burst_length = 32 depth = 128
+
+#pragma HLS INTERFACE m_axi offset = slave bundle = gmem2 port = axi_dst latency = 32 num_read_outstanding = \
+    64 max_read_burst_length = 64 num_write_outstanding = 64 max_write_burst_length = 32 depth = 128
+
+#pragma HLS INTERFACE s_axilite port = configs bundle = control
+#pragma HLS INTERFACE s_axilite port = axi_src bundle = control
+#pragma HLS INTERFACE s_axilite port = axi_dst bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+    // clang-format on
+
+    xf::codec::resizeTop(configs, axi_src, axi_dst);
+}
diff --git a/codec/L2/demos/resize/kernel/kernel_resize.hpp b/codec/L2/demos/resize/kernel/kernel_resize.hpp
new file mode 100644
index 0000000000..31b1293c9c
--- /dev/null
+++ b/codec/L2/demos/resize/kernel/kernel_resize.hpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file kernel_resize.hpp
+ *
+ * @brief This file contains top function of test case.
+ */
+
+#ifndef _XF_CODEC_KERNEL_RESIZE_HPP_
+#define _XF_CODEC_KERNEL_RESIZE_HPP_
+
+#include <ap_fixed.h>
+#include <ap_int.h>
+#include <hls_math.h>
+#include <hls_stream.h>
+
+/* The fixed width of interpolation */
+#define W 33 // 36 //23 24
+#define I 14 // 8K
+
+/* The pixel width */
+#define WBIT 8              // the input image width
+#define NPPC 8              // 1-pixel/8-pixel implementation
+#define WDATA (WBIT * NPPC) // axi data width
+// 1 - 1-pixel / clock Interpolation
+// 8 - 8-pixel / clock Interpolation
+
+#define MAX_SRC (8192 * 8192 / NPPC) // 64M
+#define MAX_DST (8192 * 8192 / NPPC) // 64M
+
+/* define the type of data for resize */
+typedef hls::stream<ap_uint<WDATA> > data_t;
+typedef hls::stream<ap_uint<72> > data_8x_t;
+typedef ap_fixed<W, I> fixed_t;
+
+template <typename T>
+T DivCeil(T a, int b) {
+    return (a + b - 1) / b; // 8
+}
+
+extern "C" void kernel_resize(ap_uint<32>* configs, ap_uint<WDATA>* axi_src, ap_uint<WDATA>* axi_dst);
+
+#endif // _XF_CODEC_KERNEL_RESIZE_HPP_
diff --git a/codec/L2/demos/resize/utils.mk b/codec/L2/demos/resize/utils.mk
new file mode 100644
index 0000000000..0ee80e90da
--- /dev/null
+++ b/codec/L2/demos/resize/utils.mk
@@ -0,0 +1,270 @@
+#
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+
+#get suffix of kernel by PLATFORM
+VITIS_VER = $(shell v++ --version | grep 'v++' | sed 's/^[[:space:]]*//' | sed -e 's/^[*]* v++ v//g' | cut -d " " -f1)
+DEVICE_TYPE = $(shell platforminfo -p $(PLATFORM) | grep 'FPGA Family' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(DEVICE_TYPE), versal)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+LINK_TARGET_FMT := xsa
+else
+LINK_TARGET_FMT := xclbin
+endif
+else
+LINK_TARGET_FMT := xclbin
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+ifeq ($(HOST_ARCH), x86)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#Check OS and setting env for xrt c++ api
+OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
+OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
+
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/demos/resize_sc/Makefile b/codec/L2/demos/resize_sc/Makefile
new file mode 100644
index 0000000000..b2e0aa661c
--- /dev/null
+++ b/codec/L2/demos/resize_sc/Makefile
@@ -0,0 +1,268 @@
+# Copyright 2019-2021 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# sc makefile-generator v1.0.0
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_u50_gen3x16_xdma_5_202120_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u50
+PLATFORM_BLOCKLIST +=  zc
+
+GCC_INTOOL := 8.3.0
+BINUTILS_INTOOL := 2.37
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# set debug switch
+ifneq ($(debug),yes)
+CXXFLAGS += -O3
+endif
+
+# get global setting
+ifdef XILINX_SC_PFM_CONFIG
+CXXFLAGS += -DXILINX_SC_PFM_CONFIG=$(XILINX_SC_PFM_CONFIG)
+endif
+ifdef XILINX_SC_PFM_EXT
+CXXFLAGS += -DXILINX_SC_PFM_EXT=$(XILINX_SC_PFM_EXT)
+endif
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -I $(XILINX_VITIS)/system_compiler/include -I $(XILINX_HLS)/include 
+LDFLAGS += -L$(XILINX_XRT)/lib -L$(XILINX_VITIS)/system_compiler/lib/x86 -lvpp_acc -l$(LIB_XRT) -lxrt_coreutil  -Wl,-rpath=$(XILINX_VITIS)/system_compiler/lib/x86:$(XILINX_XRT)/lib:$(GCC_HOME)/lib64  -Wl,--enable-new-dtags -lpthread 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --temp_dir $(TEMP_DIR) --save-temps -g -I $(XILINX_VITIS)/system_compiler/include 
+VPP_LDFLAGS += 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += 
+LDFLAGS += 
+VPP_FLAGS += 
+VPP_LDFLAGS += 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+ifeq ($(TARGET),sw)
+  $(error Error: The sw target is not supported anymore. Please use sw_emu instead)
+else ifeq ($(TARGET),sw_emu)
+  LIB_XRT  := xrt_swemu
+  HOST_PREAMBLE := XCL_EMULATION_MODE=sw_emu
+else ifeq ($(TARGET),hw_emu)
+  LIB_XRT  := xrt_hwemu
+  HOST_PREAMBLE := XCL_EMULATION_MODE=hw_emu
+  ifneq (,$(findstring -g,$(EXTRA_VPPFLAGS) $(CXXFLAGS)))
+    # for sourcing pre/post xsim scripts
+    ifneq ($(XILINX_SC_HW_EMU),0)
+      HOST_PREAMBLE += XILINX_SC_HW_EMU=1 XILINX_SC_BUILD_DIR=$(PWD)/$(BUILD_DIR)
+    endif
+  endif
+else ifeq ($(TARGET),hw)
+  LIB_XRT  := xrt_core
+endif
+
+########################## Setting up Host Variables ##########################
+
+#Inclue Required Host Source Files
+HOST_SRCS += $(CUR_DIR)/host/test_resize.cpp $(CUR_DIR)/kernel/kernel_resize.cpp 
+CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/resize -I $(XFLIB_DIR)/L2/demos/resize_sc/host -I $(XFLIB_DIR)/L2/demos/resize -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/demos/resize_sc/kernel
+CXXFLAGS += -O3
+
+EXE_NAME := host.exe
+EXE_OBJS := $(addprefix $(TEMP_DIR)/, $(addsuffix .o,$(basename $(HOST_SRCS))))
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(EXE_OBJS) 
+MAKEDEPEND = $(CXX) $< -MM -MP -MF $(basename $@).d -MT $@  $(CXXFLAGS)
+
+HOST_ARGS := -i $(CUR_DIR)/images/t0.raw -srcw 512 -srch 512 -dstw 64 -dsth 64
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include/hw/resize -I $(XFLIB_DIR)/L2/demos/resize_sc/kernel
+
+######################### binary container global settings ##########################
+VPP_FLAGS_kernel_resize += --hls.clock 300000000:kernel_resize
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_kernel_resize += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_kernel_resize += --kernel_frequency 200
+endif
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS_TMP := $(BUILD_DIR)/$(TARGET).o
+BINARY_CONTAINERS := $(BUILD_DIR)/$(TARGET).xclbin
+ifeq ($(TARGET),sw_emu)
+  BINARY_CONTAINERS_TMP :=
+endif
+else
+# placeholder for non_x86
+endif
+
+.SECONDEXPANSION:
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+ACC_SRCS_kernel_resize += $(CUR_DIR)/kernel/kernel_resize.cpp
+ACC_OBJS_kernel_resize  := $(addprefix $(TEMP_DIR)/, $(addsuffix .o,$(basename $(ACC_SRCS_kernel_resize))))
+$(ACC_OBJS_kernel_resize): $(TEMP_DIR)/%.o : %.cpp $$(@D)/.f
+	@echo "--> Making $@ from: $?"
+	$(MAKEDEPEND)
+	$(VPP) $(VPP_FLAGS) $(VPP_FLAGS_kernel_resize) -o $@  -c $<
+BINARY_CONTAINERS_DEPS  += $(ACC_OBJS_kernel_resize) 
+$(BINARY_CONTAINERS_TMP) : $(BINARY_CONTAINERS_DEPS)
+	@echo "--> Making $@ from: $?"
+	$(VPP) $(VPP_FLAGS) $(VPP_LDFLAGS) $(VPP_LDFLAGS_kernel_resize) -o $(BINARY_CONTAINERS) -l $^
+EXE_FILE_DEPS += $(BINARY_CONTAINERS_TMP)
+EXE_FILE_DEPS += $(BINARY_CONTAINERS_DEPS)
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(TEMP_DIR)/%.o : %.cpp $$(@D)/.f
+	@echo "--> Making $@ from: $?"
+	mkdir -p $(BUILD_DIR)
+	$(MAKEDEPEND)
+	$(CXX) -o $@ $(CXXFLAGS)  -I . -c $<
+$(EXE_FILE): $(EXE_FILE_DEPS)  
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+else
+# place holder for arch64
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+
+%/.f:
+	mkdir -p $(dir $@)
+	touch $@
+
+.PRECIOUS: %/.f
+
+RUN_DEPS += $(EXE_FILE) $(EMCONFIG)
+
+run: check_device  $(RUN_DEPS)
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(HOST_PREAMBLE) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+# place holder for arch64
+endif
+endif
+
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(HOST_PREAMBLE) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+# place holder for arch64
+endif
+endif
+
+#hw
+ifeq ($(TARGET), hw)
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(HOST_PREAMBLE) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+# place holder for arch64
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: all clean cleanall emconfig
+emconfig: $(EMCONFIG)
+ifeq ($(HOST_ARCH), x86)
+all:  check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
+else
+all:  check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
+endif
+
+.PHONY: host xclbin
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+xclbin: $(BINARY_CONTAINERS_TMP)
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
+	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
+	-$(RMDIR) _x_temp.* 
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+
+clean: cleanh
diff --git a/codec/L2/demos/resize_sc/README.md b/codec/L2/demos/resize_sc/README.md
new file mode 100644
index 0000000000..e0d32d12d6
--- /dev/null
+++ b/codec/L2/demos/resize_sc/README.md
@@ -0,0 +1,122 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+# Resize 
+
+Renumber example resides in ``L2/demos/reszie_sc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+## Executable Usage
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in [here](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#building). For getting the design,
+
+```
+   cd L2/demos/resize_sc
+```   
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+```
+   make run TARGET=hw DEVICE=xilinx_u50_gen3x16_xdma_201920_3
+```   
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+```
+   ./build_dir.hw.xilinx_u50_gen3x16_xdma_201920_3/host.exe -xclbin build_dir.hw.xilinx_u50_gen3x16_xdma_201920_3/kernel_resize.xclbin -i images/t0.raw -srcw 512 -srch 512 -dstw 256 -dsth 256 
+```   
+
+Renumber Input Arguments:
+
+```
+   Usage: host.exe -[-xclbin -i -srcw -srch -dstw -dsth]
+         -xclbin:           the kernel name
+         -i:                the input bin file
+         -srcw:             the source image width
+         -srch:             the source image height
+         -dstw:             the destination width 
+         -dsth:             the destination height
+```         
+
+Note: Default arguments are set in Makefile, the data have only one column that the node's community id is divided by other clustering algorithm, for example louvain.
+
+* **Example output(Step 4)** 
+
+```
+    Read image successfully.
+    Found Platform
+    Platform Name: Xilinx
+    Info: Context created
+    Info: Command queue created
+    Found Device=xilinx_u50_gen3x16_xdma_201920_3
+    INFO: Importing build_dir.sw_emu.xilinx_u50_gen3x16_xdma_201920_3/kernel_resize.xclbin
+    Loading: 'build_dir.sw_emu.xilinx_u50_gen3x16_xdma_201920_3/kernel_resize.xclbin'
+    Info: Program created
+    Info: Kernel created
+    kernel has been created
+    INFO: kernel start------
+    INFO: kernel end------
+    INFO: Execution time 6334.86ms
+    Info: Time in host-to-device: 14.9204ms
+    Info: Time in kernel: 6319.06ms
+    Info: Time in device-to-host: 0.475648ms
+    The src image size is 512*512.
+    The dst image size is 89*27.
+    Image resized successfully.
+    PASS: no error found.
+    Info: Test passed
+```    
+    
+## Profiling
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+Table 1 : Hardware resources for Resize 
+
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+    |    Kernel           |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+    |  kernel_resize(1x)  |    14    |    0     |    53    |   8635   |  6566   |      397.1      |
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+    |  kernel_resize(8x)  |    29    |    0     |    168   |   20824  |  15087  |      340.9      |
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+
+Table 2 : Resize FPGA acceleration benchmark 
+
+    +---------------+-----------+--------------------+-----------------+
+    |    Inputs     |   Size    |  FPGA 1x/8x (ms)   |   Fps 1x / 8x   |
+    +---------------+-----------+--------------------+-----------------+
+    |   7680*4320   |  512*512  |    84.30 / 12.55   |  11.86 / 79.67  |
+    +---------------+-----------+--------------------+-----------------+
+    |   7680*4320   | 1920*1080 |    84.35 / 12.43   |  11.86 / 80.46  | 
+    +---------------+-----------+--------------------+-----------------+
+    |   7680*4320   | 3840*2160 |    84.34 / 12.43   |  11.86 / 80.46  | 
+    +---------------+-----------+--------------------+-----------------+
+
+Note: This table is the result of each image resize down 8 times.
+
+.. Note::
+
+   1. Resize running on Intel(R) Xeon(R) Silver 4116 CPU @ 2.10GHz, cache(16896 KB), cores(12).
+   2. time unit: ms.
+
+.. toctree::
+    :maxdepth: 1
diff --git a/codec/L2/demos/resize_sc/description.json b/codec/L2/demos/resize_sc/description.json
new file mode 100755
index 0000000000..06cbb45b6a
--- /dev/null
+++ b/codec/L2/demos/resize_sc/description.json
@@ -0,0 +1,81 @@
+{
+    "gui": false, 
+    "name": "Xilinx Resize (SC) Test", 
+    "description": "A SystemCompiler example for fast resize in codec cases.", 
+    "flow": "vitis", 
+    "platform_allowlist": [
+        "u50" 
+    ], 
+    "platform_blocklist": [
+        "zc"
+    ], 
+    "launch": [
+        {
+            "cmd_args": "-i PROJECT/images/t0.raw -srcw 512 -srch 512 -dstw 64 -dsth 64", 
+            "name": "generic launch for all flows"
+        }
+    ], 
+    "host": {
+        "host_exe": "host.exe", 
+        "compiler": {
+            "sources": [
+                "host/test_resize.cpp",
+		        "kernel/kernel_resize.cpp"
+            ], 
+            "includepaths": [
+                "LIB_DIR/L2/include/hw/resize", 
+                "LIB_DIR/L2/demos/resize_sc/host", 
+                "LIB_DIR/L2/demos/resize", 
+                "LIB_DIR/../utils/L1/include",
+                "LIB_DIR/L2/demos/resize_sc/kernel" 
+            ],
+            "options": "-O3"
+        }
+    }, 
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/L2/include/hw/resize", 
+                "LIB_DIR/L2/demos/resize_sc/kernel"
+            ]
+        }
+    }, 
+    "containers": [
+        {
+            "accelerators": [
+                {
+                    "location": "kernel/kernel_resize.cpp", 
+                    "frequency": 300.0, 
+                    "name": "kernel_resize"
+                }
+            ], 
+            "frequency": 200.0, 
+            "name": "kernel_resize"
+        }
+    ], 
+    "testinfo": {
+        "disable": false, 
+        "jobs": [
+            {
+                "index": 0, 
+                "dependency": [], 
+                "env": "", 
+                "cmd": "", 
+                "max_memory_MB": 32768, 
+                "max_time_min": {
+                    "vitis_hw_build": 470, 
+                    "vitis_hw_emu": 300, 
+                    "vitis_sw_emu": 60, 
+                    "vitis_hw_run": 10
+                }
+            }
+        ], 
+        "targets": [
+            "vitis_sw_emu", 
+            "vitis_hw_emu", 
+            "vitis_hw_build", 
+            "vitis_hw_run"
+        ], 
+        "category": "canary"
+    }
+}
diff --git a/codec/L2/demos/resize_sc/host/test_resize.cpp b/codec/L2/demos/resize_sc/host/test_resize.cpp
new file mode 100644
index 0000000000..7b6ba5a15e
--- /dev/null
+++ b/codec/L2/demos/resize_sc/host/test_resize.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <ap_int.h>
+#include <fstream>
+#include <hls_math.h>
+#include <iomanip>
+#include <iostream>
+//#include "xcl2.hpp"
+#include "utils.hpp"
+#include "kernel_resize.hpp"
+#include "xf_utils_sw/logger.hpp"
+
+using namespace std;
+
+int main(int argc, char* argv[]) {
+    int nerror = 0;
+
+    xf::common::utils_sw::Logger logger(std::cout, std::cerr);
+    cl_int fail;
+
+    ArgParser parser(argc, (const char**)argv);
+
+    std::string infile;
+    if (!parser.getCmdOption("-i", infile)) {
+        std::cout << "ERROR: input file path is not set!\n";
+        return 1;
+    }
+
+    std::string outfile(infile);
+    std::size_t found = outfile.find_last_of(".");
+    outfile.insert(found, "_resized");
+
+    ap_uint<32> src_width, src_height;
+    std::string input_width;
+    if (!parser.getCmdOption("-srcw", input_width)) {
+        std::cout << "INFO: image input width is not set!" << std::endl;
+    } else {
+        src_width = std::stoi(input_width);
+    }
+
+    std::string input_height;
+    if (!parser.getCmdOption("-srch", input_height)) {
+        std::cout << "INFO: image input height is not set!" << std::endl;
+    } else {
+        src_height = std::stoi(input_height);
+    }
+
+    ap_uint<32> dst_width, dst_height;
+    std::string output_width;
+    if (!parser.getCmdOption("-dstw", output_width)) {
+        std::cout << "INFO: image output width is not set!" << std::endl;
+    } else {
+        dst_width = std::stoi(output_width);
+    }
+
+    std::string output_height;
+    if (!parser.getCmdOption("-dsth", output_height)) {
+        std::cout << "INFO: image output height is not set!" << std::endl;
+    } else {
+        dst_height = std::stoi(output_height);
+    }
+
+    if (src_width < dst_width || src_height < dst_height) {
+        std::cout << "WARNING: The output size is invaild!\n";
+        return 1;
+    }
+
+    ap_uint<WDATA> pixel_64;
+    ap_uint<72> pixel_72;
+    ap_uint<WBIT> srcPixel;
+    ap_uint<WBIT> tmpDst;
+
+    ap_uint<32>* configs = aligned_alloc<ap_uint<32> >(4 + 1);
+    configs[0] = src_width;
+    configs[1] = src_height;
+    configs[2] = dst_width;
+    configs[3] = dst_height;
+
+    ap_uint<WDATA>* axi_src = aligned_alloc<ap_uint<WDATA> >(MAX_SRC);
+    ap_uint<WDATA>* axi_dst = aligned_alloc<ap_uint<WDATA> >(MAX_DST);
+
+    // read raw data
+    FILE* fp;
+
+    if ((fp = fopen(infile.c_str(), "rb")) == NULL) {
+        cout << "Error reading file fail." << '\n' << "Please check the path: " << infile << endl;
+        exit(1);
+    } else {
+        cout << endl << "Read image successfully." << endl;
+    }
+
+#if NPPC == 1
+    for (int i = 0; i < src_width * src_height; i++) {
+        fread(&srcPixel, 1, 1, fp);
+        axi_src[i] = srcPixel;
+    }
+#else
+    for (int i = 0; i < src_width * src_height; i++) {
+        fread(&srcPixel, 1, 1, fp); // std::cout<< "pixel:" << srcPixel << std::endl;
+        pixel_64.range((i % 8) * WBIT + WBIT - 1, (i % 8) * WBIT) = srcPixel.range(WBIT - 1, 0);
+        if ((i + 1) % 8 == 0) axi_src[i / 8] = pixel_64;
+    }
+#endif
+    if (fp != NULL) fclose(fp);
+
+    // send task requests
+    auto configs_pool = resize_acc::create_bufpool(vpp::input);
+    auto axi_src_pool = resize_acc::create_bufpool(vpp::input);
+    auto axi_dst_pool = resize_acc::create_bufpool(vpp::output);
+    resize_acc::send_while([&]() -> bool {
+        ap_uint<32>* acc_configs = (ap_uint<32>*)resize_acc::alloc_buf(configs_pool, sizeof(ap_uint<32>) * 5);
+        ap_uint<WDATA>* acc_axi_src =
+            (ap_uint<WDATA>*)resize_acc::alloc_buf(axi_src_pool, sizeof(ap_int<WDATA>) * MAX_SRC);
+        ap_uint<WDATA>* acc_axi_dst =
+            (ap_uint<WDATA>*)resize_acc::alloc_buf(axi_dst_pool, sizeof(ap_int<WDATA>) * MAX_DST);
+
+        memcpy(acc_configs, configs, sizeof(ap_uint<32>) * 5);
+        memcpy(acc_axi_src, axi_src, sizeof(ap_int<WDATA>) * MAX_SRC);
+
+        resize_acc::compute(acc_configs, acc_axi_src, acc_axi_dst);
+
+        return 0;
+    });
+
+    // send result receiving requests
+    resize_acc::receive_all_in_order([&]() {
+        ap_uint<WDATA>* acc_axi_dst = (ap_uint<WDATA>*)resize_acc::get_buf(axi_dst_pool);
+        memcpy(axi_dst, acc_axi_dst, sizeof(ap_int<WDATA>) * MAX_DST);
+    });
+
+    struct timeval start_time, end_time;
+    gettimeofday(&start_time, 0);
+    resize_acc::join();
+    gettimeofday(&end_time, 0);
+
+    std::cout << "INFO: kernel end------" << std::endl;
+    std::cout << "INFO: Execution time " << tvdiff(&start_time, &end_time) / 1000.0 << "ms" << std::endl;
+
+    FILE* outFile;
+    if ((outFile = fopen(outfile.c_str(), "wb")) == NULL) {
+        cout << "Error writing file fail." << '\n' << "Please check the path: " << outfile << endl;
+        exit(1);
+    }
+#if NPPC == 1
+    for (int j = 0; j < dst_width * dst_height; j++) {
+        tmpDst = axi_dst[j];
+        fwrite(&tmpDst, 1, 1, outFile);
+    }
+#else
+    for (int i = 0; i < DivCeil(dst_width * dst_height, WBIT); i++) {
+        pixel_64 = axi_dst[i];
+        for (int j = 0; j < WBIT; j++) {
+            tmpDst.range(WBIT - 1, 0) = pixel_64.range(j * WBIT + WBIT - 1, j * WBIT);
+            if ((i * 8 + j) < (dst_width * dst_height)) {
+                fwrite(&tmpDst, 1, 1, outFile);
+                // std::cout << (int)tmpDst << std::endl;
+            }
+        }
+    }
+#endif
+    if (outFile != NULL) fclose(outFile);
+
+    if (nerror) {
+        std::cout << "\nFAIL: nerror= " << nerror << " errors found.\n";
+        logger.error(xf::common::utils_sw::Logger::Message::TEST_FAIL);
+    } else {
+        std::cout << "The src image size is " << src_width << "*" << src_height << ".\nThe dst image size is "
+                  << dst_width << "*" << dst_height << ".\n"
+                  << "Image resized successfully." << std::endl;
+        std::cout << "PASS: no error found.\n";
+        logger.info(xf::common::utils_sw::Logger::Message::TEST_PASS);
+    }
+
+    free(configs);
+    free(axi_src);
+    free(axi_dst);
+    return nerror;
+}
diff --git a/codec/L2/demos/resize_sc/host/utils.hpp b/codec/L2/demos/resize_sc/host/utils.hpp
new file mode 100644
index 0000000000..76cffecea0
--- /dev/null
+++ b/codec/L2/demos/resize_sc/host/utils.hpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2020 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef UTILS_H
+#define UTILS_H
+#include <sys/time.h>
+inline int tvdiff(struct timeval* tv0, struct timeval* tv1) {
+    return (tv1->tv_sec - tv0->tv_sec) * 1000000 + (tv1->tv_usec - tv0->tv_usec);
+}
+//--------------------------------------------------------------
+
+#include <new>
+
+#include <algorithm>
+#include <cstdlib>
+#include <iterator>
+#include <vector>
+
+class ArgParser {
+   public:
+    ArgParser(int& argc, const char** argv) {
+        for (int i = 1; i < argc; ++i) mTokens.push_back(std::string(argv[i]));
+    }
+    bool getCmdOption(const std::string option, std::string& value) const {
+        std::vector<std::string>::const_iterator itr;
+        itr = std::find(this->mTokens.begin(), this->mTokens.end(), option);
+        if (itr != this->mTokens.end() && ++itr != this->mTokens.end()) {
+            value = *itr;
+            return true;
+        }
+        return false;
+    }
+
+   private:
+    std::vector<std::string> mTokens;
+};
+
+template <typename T>
+T* aligned_alloc(std::size_t num) {
+    void* ptr = NULL;
+
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+
+    return reinterpret_cast<T*>(ptr);
+}
+#endif
diff --git a/codec/L2/demos/resize_sc/images/t0.raw b/codec/L2/demos/resize_sc/images/t0.raw
new file mode 100644
index 0000000000..6d505cdb46
--- /dev/null
+++ b/codec/L2/demos/resize_sc/images/t0.raw
@@ -0,0 +1 @@
+??????????????????????????>>>>>>=>>>????????@@@@@@@@@@@@????????>>>>>>>>@@@@@@@@?@@@@@@@@@@@@???AA@@?@@@@@@AAAAAAAAAAAAA@@@AAAAAAAAAAAAAAAAAAAAAAAAAABBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHGHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIFIMF<@]HHHHHHHHHIIIIIIIIIIIHHGGGGGGGGHHHGGHHGFFFGGGGGGGGGGGGGGGGHHHIJJJJJKKKKLLLKKKKKKKKKKKKKKKKKKKKKKKJKKKKKKKKLLLLKKKKLLLLLLLLLLLLLLLLKLLMMNNONNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPOOPPPPQQRRRRRRRRRRRRRRRRRRRRRRRRSSRRRRSSSSSSSSSSTTTTUUUUUUUUUUUUUUUUUUUUVVWWWWWXYYYYYYZZZZZZ[[[\\\\]]]]]^^^^^^__`____`abbbbbbbbbbbaaabbcbccccddddddddddd???????????????????????????>>>??>>>>???????@@@@@@@@@@@@@????????????????@@@@@@@@@@@@@@@@????@@@@A@?????@@@@AAAAAAAAAAAAA@@AAAAAAAAAAAAAAAAAAAAAAAAAABBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIKE<9;JJKHHHHHHHHIIIIIIIIIIIHHHGGGGGGGHHHGHHHGFFFGGGGGGGGHHHHHHHHHHHHIJJJKLLLLMMMKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLLMMLLLLLLMNNOONNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPOPPPPQQQRRRRRRRRRRRRRRRRRRRRRRRRSRRRRRRSSSSSSSSSTTTTUUUUUUUUUUUUUUUUUUUUVVWWWWWXYYYYYZZZZZZZ[[\\\\]]]]]^^^^^^________`aabbbbbbbbbbaaabbcccccdddddddddddd????????????????????????@@???????????????@@@@@@@@@@@@@@@????????@@@@@@@@@@@@@@@@@@@?????>>???@@@@??>>>??AAAAAAAAAAAAAAAA@AAAAAAAAAAAAAAAAAAAAAAAAAAABBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEDDDDDDDDEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIJC9=EW6LHHHHHHHHIIIIIIIIIIIHHHGGGGGGHHHHGHHHGGFGGGGGGGGGHHHHHHHHHHHHIJJIJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLMMMMLLLLMMNNOONNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSTTTTTUUUUUUUUUUUUUUUUUUUUVVWWWWWXYYYYYZZZZZZ[[[\\\]]]]]^^^^^^^________`aabbbbbbbbbbbaabccccccdddddddddddd????????????????????????A@@@?@@@????????@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@??>>>>>????@@??>>=>>>AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEDDDDDDDDEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIGGEJJT3MHHHHHHHHIIIIIIIIIIIIHHHHGGGHHHHHHHIHGGGGHHHHHHHHHHHHHHHHHHHHIJJJIIIJJJJKKKKKKKKKKKKKKKKKLLLKKKKKKKKKKKKKKKLLLLLLLLLLLLLLMMMMMMMMMMMNNNOONNNNNNNNOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSTTTTTTUUUUUUUUUUUUUUUUUUUUVVWWWWWXYYYYYZZZZZ[[[\\\]]]]]^^^^^^^________```abbbbbbbbcbbbbbcccccccddddddddddd????????????????@@@@@@@@AA@@@@@@@@??????@@@@@AAA@@@@@@@@AAAAAAAA@@@@@@@@@@@@@@@@@@@??>>>??????????>>=>>>AAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBABBBBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIKKI@HCPHHHHHHHHIIIIIIIIIIIIHHHHGGHHHHHHHIIHGGGHHHHHHHHHHHHHHHHHIHHIJJJJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLKKKLLLLLLLLLLLLLLLLLLLLLLLLMNNNNNNMNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSTTTTTTTTUUUUUUUUUUUUVVVVVVVVVVWWWWWXYYYYZZZZZZ[[\\\\]]]]^^^^^^^________```aabbbbbbbbcbbbbbcdbbbcccccdddddddd????????????????@@@@@@@@AA@@@@@@????@@@@@@@AAAAAAAAAAAAAAAAAAAAA@@@@@@@@AAAAAAAAA@@@??>>@@@@????@??>>>??AAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJHLI>HVNIIIIIIIIIIIIIIIIIIIIIHHHGGHIIIHHIIIHGGHHHHHHHHHHIIIIIIIIIIIIKKKKJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMLLLLKKLLLLLLLLNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOPPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSRSSSSTTTTTTTTUUUUUUUUUUUUVVVVVVVVVVWWWWWXXYYYZZZZ[[[[\\]]]]]^^^^_^_________``aaaabbbbbbbbccbbbccdbbccccdddddddddd????????????????@@@@@@@@A@@@?@@@???@@@@@@AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA@@@@???@@@@@@@@A@?????@BBBAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIFMNET]LIIIIIIIIIIIIIIIIIIIIIIIIFGHIIIHHIIIHGGHIHHHHHHHHIIIIIIIIJJJJKLLKJJJJKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLNNOOOONNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOPPPPPPPPPPPPPPPPQQQQQRRRRRRRRRRRRRRRRRRRSSSSSSSSSSTTTTSSSSSTTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVWWWWWXXXYYZZZZ[[[\\\]]]]^^^^___________`aabbbabbbbbbbbccbbbcddcccddddedddddddd????????????????@@@@@@@@@@@???@@???@@@AAAAAAAAAAAAAAAAAABBBBBBBBAAAAAAAAAAAAAAAA@@@@@@@??@@@AAAAAA@@?@@@BBBAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBABBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCDEEFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFGGGGGGHHHHHHIIIIIIIJIIIIIIIIIIIIIIIIIIIIIIIIJFNPFZ\QJJJJJJJJIIIIIIIIIIIIIIIIFGHIIIHHIIIHGGHIHHHHHHHHIIIIIIIIKJJKLLLLKLLLLLMMLLLLLLLLLLLLLLLLMLLLLLLLLLLLLLLLLLLLLMMMLLLLLLLLNOOOOOONOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRRRRRRRRRTTTTTTTTSTTUUTTSSSTTTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVWWWWWXXXYYZZZ[[[[\\]]]]^^^^___________``abbbbbbbbbbbbbcccbbcdddddeeeeedddddddd???ABB@>?@@@AA@@@@AAAA@@@@@@@AAAAA@@@AABAAAAAAAAAAAAAAAAABBBBBBAAAAAAAAAAAAAAAAAAAAAA@@@@@@AAAAAAAAAAAAAAAAAAAAABBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFEEGGGGGGGGHHHHIIIIIIIIIIIIJJIIIIIIKKJIIIIIIIIIIIJJGIIGFVRNKFKJHLKLKJIHHIIHIIHIIIIHGHHIIIHHJIIHHHHHIIIIIIIIHIIJJIIIJJJKKKKLKKKKKKKKKKKLLLLMKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOOOOPPPPPPPPPPOPPPPPQRRRRRRRRRSSRQQQQQSSSTTTTTSSSSSTTTTTTTTTTTTTTTUUUUVVVVVVVVWWWWVVVVVVWWWXXXXXXXXXXXXXXXYZ[\[[\\\\]]^^^^]^^^^___``aaaaabbbbbbbbbbccccccddddeeeeeeeeeeeeeeeee???@AA@?@@@AAAAA@@AAAA@@@@@@@AAAAA@@@AABAAAAAAAAAAAAAAAAABBBBBBAAAAAAAAAAAAAAAAAAAAAAA@@@@AAAAAAAAAAAAAAAAAAAAAABBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFEGGGGGGGGHHHHIIIIIIIIIIIIJJIIIIJJKKJIIIIIIIIIIIJJIIHGGSMMKFKJHLKKKJIHIIIIIIIIIJIIGHHIIIHHIIIHHHIIIIIIIIIIIIIIIIIIJJJKKKLLKKKKKKKKKKKLLLLMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOOOPPPPPPPPPPPOPPPPPQRRRRRRRRRSRRQQQRRSSTTTTTTSSSSTTTTTTTTTTTTTTTUUUUUVVVVVVVVVVVVVVVVVWWWXXXXYYYYYYYYYXXYYZ[\\\\]]]]]__^^^^___```aaaaaaabbbbbccccccccccddddeeeeeeeeeeeeeeeeee????@A@@AAAABAAA@@AAAA@@@@@AAAAAA@@@@AAAAAAAAAAABBAAAAAAABBBBBBABBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGHHHHIIIIIIIIIIIIIIIIIJJKKKJIIIIIIIIIIJJJHHFHIPJQKFKJHLKKKJIHIIIIJIIIJJIIHHHIIIIIIIIHIIIJIIIIIIIIIIIIIJJJJKKKKKLLKKKKKKKKKKKLLLLMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOOOPPPPPPPPPPPPPPPPPQRRRRRRRRRRRQQRRRSSTTTTTTTSSTTTTTTTTTTTTTTTUUUUUUUVVVVVVVVVVVVVVVVWWWXXXYYZZZZZZZZYYYYZ[\\]]]]^^^^_______`aaaaaaaaaaabbbbbcccccdddccddddeeeeeeeeeeeeeeeeee?????@AAABBBBBBAAAAAAAAAAAAAAAAA@@@@AAAAAAAAAAAABBBAAAAAABBBBBBABBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGHHHHIIIIIIIIIIIIIIIIJKKKKKJIIIIIIIIIIJJJGGFIGJDPKFKJHLKKKJIIIIIIJIIIJJJIHHHIIIIIIIIIIIIIIIIIIIIIJJIIJJKKKKKKKLLLKKKKKKKKKKKLLLLMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOOPPPPPPPPPPPPPPQPPPQRRRRRRRRRQQQRRSSSTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUVVVVVVVVVVVVVWWWWWXXYYYYZZZZZZZZZYYZZ[\]]]]^^^^________`aaaaaaaaaabbbbcccccccccccddddeeeeeeeeeeeeeeeeeee@@@??@AABBBBBBBBAAAAAAAAAAAAAAAA@@@AAAAAAAAAAAAABBBBAAAAABBBBBBAAAAAAAAAAAAAAAAAAAAABBBBBBBBAAAABBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEGGGGGGGGGGGGGGGGFFGGGGFFGGGGGGGGHHHHIIIIIIIIIIIIIIIJKKKKKKJIIIIIIIIIJJJKJJJKCEBMJFKJHLKKLKIIIJJIJJIIJJJIHHHIIIJJJJJIIIIIJJJJJJJJKJJJJKLLKKKLLLLLKKKKKKKKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMLLLLLLLLLLMMMMNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOOOPPPPPQQQQQQQQPQQQPQQRRRRRRRRRQQRRSSTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUVVVVVVVVVVVVVWWWXXXXXXYYYZZZZZZZZZZZZZZ[\]^]]]^^^^___^^___`aaaaaaaaaabbbbccccccccccdddddeeeeeeeeeeeeeeeeeeeAAA@??@ABBBBBBBABBAAAABBBBBAAAAA@@AAAAAAAAAAAAAABBBBBBBBABBBBBBAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBABBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFGGGGGGFGGGGGGGGHHHHIIIIIIIIIIIIIIJKKKKKKKJIIIIIIIIIJJKKKKOOEMMQJFKKILJKLKJIJJJJJJIJJKJJIIIIIJJKKKJJIIIIJJJJJJJJJJKKKLLLLLLLLLLLKKKKKKKKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMLMMMMNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPOPPPPPPPQQQQQQQQPQQQQQRSRRRRRRRRQQRSSTTTTTTTTTTSTTTTUUUUUUUUUUUUUUUVVVVVVVVVVVVVVWWWXXYYXYYYZZZZZZZZZZZZ[ZZ[[\]^]^^^^____^^^__````aaaaaaabbbbcccccccccccddddeeeeeeeeeeeeeeeeeeeeBBBA@?@ABBBBBBAABBAAAABBBBBBBAAA@@AAAAA@AAAAAAAAAAABBBBBABBBBBBAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHIIIIIIIIIIIIIJJKKKKKKKJIIIIIIIIIJJKKKIPOEWYQJFLKILJJLKJIJJJJKJJJJKJJIIIIIJKKKJJJIIJJJJJJJJJJJKKLLLLLLLLLLLLLKKKKKKKKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQRSRRRRRRRRQRRSTTTSTTTTTTSSTTUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVWWXXYYYYYZZZ[[ZZZZZZZZ[[[[\]^^^^____``_____``a`aaabbbbbbbbbcccdddcccccddddeeeeeeeeeeeeeeeeeeeeBCCBA@@ABBBBBAAABBAAAABBBBBBBAAA@@AAAAA@AAAAAAAAAAAABBCCABBBBBBABBBBBBBBBBBBBBBBBBBBBBBCCBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHIIIIIIIIIIIIJJKKKKKJKKJIIIIIIIIIJKKKLGNL@WYHJFLKILJJLKJJJJJJKJJJKKJJIIIIIJKKJJJJJJJKJJJJJJJJJKKLMMLLLLLLLLLLKKKKKKKKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQRSRRRRRRRRQRSSTTSSTTTTTSSSTUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVUVVWWXXYYYYZZ[[[ZZZZZZZZ[[[[\]^____```````````aaaaabbcccbbbbbcccdddddddcddddeeeeeeeeeeeeeeeeeeeeBBBBBAAABBBBBBBBBBBBBBBBAAAAAAAA@@@@@@@@AABBBBBBAAAAAAAACCCCCCCCBBBBBBBBBBBBBBBBBBAAAAAABBBBBBBBBBBBBBBBAAAAAAAABBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCBBBBCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGHIIIIIHIIIIIIIIKKKKKKKKJJJJJIIIHHIIJKKKJGFKFSAOIIIJJKKKJJJJJJJJJJJJJKKKJJIIIJKLJJJJJJJJJJJJJJJJKKKKLLLLLLLLLLLLKKKKKKKKKKKLLLMMKKKKKKKKMMLMMMNNMMLMMNMMNNMMMMMMMMMMMNNNOOOOOOOOOOPPPPQQPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQQQQRRRRRRRRRRRRRRRRRRSSTTTTTTTTTTUUTTTTUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVWWXXXYYYZZ[[\[[[[[[[[\\[[\^_a___````_^___`aaabaaabbbabbbbccccddddddddeeeeeeeeffeeeeeeeeeeeeeeBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCCBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGHHIIIIIHIIIIIIIIJJJJJJJJJJJJIIIIHHIIJKKKKMKIFYHKIIJJKKKKJJJJJJJJJJJJJKKKKJJIJJKLJJJJJJJJKKKKKKKKKKKLLLLLLLLLLLLLKKKKKKKKKKLLLLMMLLLLLLLLMMLMMMNNMMLMNNNMNNMMMMMMMMNNNNNNOOOOOOOOOOPPPPQQPPPPPPPPQQQQQQQQQQQQQQQQPQQQQRRRRRRRRRRRSSSSSSSSSSSSSSSSSSTTTTTUTTTTTTTTTTTUUUUUUUUUUUUUUUUUVVVVWVVVVVVVWWWWXXYYYZZZ[[[[[[[[[[\\]]\\]^_`^_______^___`aaaaaaabcbbbbbbccccddddddddeeeeeeeeeeeeeeffeeeeeeeeCCCBBBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAAABBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCCCCCCCCCCCCCCCCCCCCCCBBCCCCCCCDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHIIJIIHIIIIIIIIIIIIIIIIIIIIIIIIIIIJJKKKILMKDTILIJJKKKKKJJJJJJJJJJJJJKKKKKJJJKLLKKKKKKKKLLLLLLLLLLLLLMMMLLLLLLLLKKKKKKKKLLLLLMMMLLLLLLLLMMLMMMNNMMMMNNNMNNMMMMMMNNNNNNOOOOOOOOOOOPPPPQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTUUUUTTTTTTUUUUUUUUUUUUUUUUUUVVVVVWWWWWWWWXXXXYYYZZZZ[[[[[[\\\\\\\^^^^^^__^^^____^^___`aaaa``abcccbbbbccccddddeeeeeeeeeeeeddddefggeeeeeeeeCCCCCBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAABBBCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCCCCCCCCCCCCCCBBCCCDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHIIJJIIIHHHHHHHHIIIIIIIIIIIIIIIIIIJJJKKKIIMOCFCNIJJKKLLLJJJJJJJJJJJJJKKKKKKKKLLMLLLLLLLLLLLLLLLLLLLMMMMMMLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMLMMMNNNMMMNNNNNNNMMMNNNOOOOOOOOOOOOOOOPPPPPQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSUUUTTTTTUUUUTTTTUUUUUUUUUUUUUUUUUVVVVWWWWWWWXXXXYYYYZZZZ[[[[[[[[\\\]]]]^______^^]]^^^^^^^___`aaaaa`abccbbbbbccccdddeeeeeeeeeeeeeddddefggeeeeeeeeCCCCCBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDBCCCDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFGGGGGGGGFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHIIJJJJIIHHHHHHHHIIIIIIIIIIIIIIIIJJJJKKKKMJMODFHNIJJKKLLLJJJJJJJJJJJJJKKKKKKKLLLMLLLLLLLLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMLMMMNNNMMNNONNNNNNNNNNOOOOOOOOOOOOOPPPPPPPQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRRSSSRRRRRRRRTTTTTTTTSSSSSSSSTTTTTTTTTTTTUUUUUUUUUVVVVVVVVVVVVVVVWWWWWWXXXYYYZZZZZ[[[[[[[[[[[\\]]^^__`````_^^]]^^^^^^^___`aaabaaabbbabbbbccccdddeeeefeeeeeeeeddddeeffeeeeeeeeCCCBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHIJJJJJIIIIIIIIIIJJJJJJJJIIIIIIIIKKKKKKKKKJLJGSXQIJJKKKKKJJJJJJJJJJJJJKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMLMMMNNNNMNOOONNNNNNNOOOOOOOOOOOOOOPPPPPPPPQQQQQQQQQQQQRRRRRRRRRRRRRRRRQQRRRRSSRRRRRRRRSSSSSSSSSSSSSSSSTTTTTTTTTTTUUUUUUUUVVVVVVVVVVVVVVVWWWWXXXXXYYZZZZZZZ[[[[[[\\\\\\]]]^__```````_^^]]^^^^^^^___`aaabbaabbbabbbbcccccdddeeeeeeeeeeeeeeeeeeefeeeeeeeeBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHJJJKJJIIIIIIIIIIIIIIIIIIIIIIJJJJKKKKKKKKIHJIEOZRIIJJKKKKJJJJJJJJJJJJJKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMLMMMNNNNNNOOONNNNNNNOOOOOOOOOOOOOOPPPPPPPQQQQQQQQQQQQQRRRRRRRRRRRRRRRRQRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTUUUUUUUUUUUUVVVVVVVVVVVVVVVWWWWWXXXXXXYZZ[[ZZZ[[[[[[[\\\]]]]]^__``a````__^^]]^^^^^^^___`aaabaaabbbbbbbbcccccccddeeeeeeeeeeeeeeeeeefeeeeeeeeBBBBBAAABBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCDDCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHJJJKJJIIIIIIIIIIIIIIIIIIIIIJJJJJKKKKKKKKNHLL?=LMIIIJJKKKJJJJJJJJJJJJJKKKJKKLLLLLLLLLLLLLLLLLLLLLKLLLLLMMMMMMMLLLMMMMMMMMMMMMMMMMMMMMMMMMMMLMMMNNONNNOOONNNNNNOOONNOOOOOOOOOOPPPPPPPQQQQQQQQQQQQQRRRRRRRRRRRRRRRRRRRRSSSSTTTTTTTTSSSSSSSSTTTTTTTTTTTUUUVVUUUUUUTTVVVVVVVVVVVVVVVVWWWWXXXXXXYYZ[[[ZZZ[[[[[[[[\]]]^]^^_``aa```____^]^^^^^^^^___`aaaa``abccbbbbbcccccccdddeeeeeeeeeeeeeeeeffeeeeeeeeBBBCDCBBCCBBBBCDCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGFFFFFFFFEFGGGGGGGGFFFGGHGGGGGGGGHHHHHHHHGHHHHHIJJJJJJIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJKKKKJN;AGI8RJJKKKKKJJJJJJJJJJJJJKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMNMMLLLLKKLMMMMNNNMMMMMMMMMMMMMMMMNNNNNNNNOOOOOOOOOONNNNOOOOOOOOOOPPPPPPPPPPPPQQQQQQQQQQQQPQRRRQRRRRRRRRSSSSSSSSSSTTTTTTTTSSSSSSSTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVUUVVVVWWVWWXXXXXYYYZZZZZZZZ[[\]]]]]]^^^^^^^_`aa````__^^]]]^^^^__]]^_`abcaaaaabbbbbbbccccddeeeeffddddddddeefggffeffffffffBBBCDCCBCCBBBCCDCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGFFFFFFFFGGGGGGFFGGFFFGGGGGGGGGGGHHHHHHHHGHHHHHIJJJJJJIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJKKKKJKBGIK6KKKLLLLLLJJJJJJJJJJJJKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMNMMLLLLKKLLMMMMMNMMMMMMMMMMMMMMMNNNNNNNNNOOOOOOOOPOONNOOPOOOOOOOOPPPPPPPPPPPPQQQQPQQQQQQPPQRRRQRRRRRRRSSSSSSSSSSSTTTTTTTTSSSSSSSTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVUVVVVWWWWWXXXXXXYYZZZZZ[ZZ[[\\]]^^^^^^^^`___```______^]]]]]^^^^_^^__`abbaaaabbbbbbbbccccddddeeeeddddddddeeeeffggffffffffCCCCDCCBCCBBCCCDCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGFFFFFFFFHHGGFFFFFFGGGGGGGGGGGGGGHHHHHHHHGHHHHHIJJJJJJIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJKKKKMIJLGL8GJJJKKKJJJJJJJJJJJJJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNMMMLLLLLLLLLMMMMMMMMMMMMMMMMMNNNNNNNNNNNOOOOOOOOPOOOOOOPOOOOOOOOPPPPPPPPPPPPQQQQPPQQQQPPQQRRRQRRRRRRRSSSSSSSSSSSTTTTTTTTSSSSSTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVVVVVVWWWXXXYYYYYYZZZZZ[[[[[\\]]]^^^^^^^^a`_____^____^^]]]]]^^^^^___``aaaaaaabbbbbbbccccccdddddeeddddddddeeeeefhhffffffffDCCCCDCBCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGFFFFFFFFHHGFFFGGFFGHHGGGGGGGGGGGHHHHHHHHHHHHHHIJJJJJJIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJKKKKMENK@I?KIIIJJJJJKKKKKKKKKKKKKKKKLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMNNMMMMLLLLLLLLMMMMMMMMMMMMMMMNNNNNNNNNNNNNOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPQQQQQPPQQQQPPQRRRRQRRRRRRSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVVVVVWWWWXYYYZZZZZZZZZ[[[\\\]]]]^^^^_____a`_____^^^^^^]]]]]]]^^^^__``aaabaaabbbbcbbccccdddddddeeeeeeeeeeeeeeeffghffffffffDDCCCDCCCCCCDDDDDDDDDDDDCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGGGGGGGGGGGFGGHHFGGHHHGGGGGGGGGGHHHHHHHHHHIHHHIJJJJJJIIIIIIIIIIIJJJJJJJJIIIIIIIIJJJJKKKKJCOK<GHQKKKKKKKKLLLLLLLLKKKKKKKLLLLLLLLLLLLLLLLLMMMMMMMMMMMMNNNNMMMMMLLLLLLLMMMMMMMMMMMMNNNNNNNNOOOOOOOOOOOOOOOONOOOOOONPPPPPPPPPPPPPPPPPPPQQQQRPQQQQQQPQRSSRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVVVVWWWWXYYYZZZZZZZZZ[[[[\]]]]]]^]]^^_`````__``__^^^^^^]]]]]^^^^___``abbbabbbbccccccccdddddeeeeefeeeeeeeeeffgggffffffffffEDCCCDCCCCDDDDDDDDDDDDDDCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGGGGGGGGGGGGGGHHGGGGHGGGHHHHHHHHHHHHHHHHHHIIHHIJJJJJJIIIJJJJJJJJJJJJJJJJIIIIIIIIJJJJKKKKJGQOCKTVKKJJJKKKLLLLLLLLKKKLLLLLLLLLLLLLLLLLLLLLNNNNNNNNNNNNNNNNNNMMMMLLLLMMMMNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPQQQQRRQQQRRQQQRRSSRRRRRSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVVWWWWXXXYYZZZZZZZZZ[[[[\]]]]]]]]\]]^_`aa____```_^^^^^^^]^^^^_____``aabbbbbbbccccccccdddddeeeefffeeeeeeeeefggggfeffffffffFEDCCDDCCCDDDDDDDDDDDDDDDDDDDDDDDDDDCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDEEEEEFFFEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGGGGGGGGGGGHHGGGGGGGGGHHHHHHHHHHHHHHHHHHHHIIHIIJJJJJJIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKJLOPIM[TJJJIJJKKLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMNNNNNNNNNNNNNNNNNNNMMMMMMMMMNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOPPQQPPOPPPPPPPPPPPPPPPPPPQQQQRRRRRRRRRRRRSSRRRRSSSSSSSSSSSSSSSSTTTTTTTTUUUTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVWWWWWXXXYYYZZZZZZZ[[[[\\]]]]]]]]\]]^__``____```_^____^^^^^____````aaaabbbbbbcccccccddddddddeeeeeeeeeeeeeffffggggffffffffFEDCCDDCCCDEEDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCDDDDDDDDEEEEFFFFEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGGGGGGGGGHHHHGGFHGGGGGHHHHHHHHHHHHHHHHHHHHIIHIIJJJJJJIIIJJJJJJJJJJJJJJJJKKKKKKKKJJJJKKKKJMHKHJZMLLKKKLLMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMNNNNNNNNNNNNNNNNNNNMMMMMMMMNNNNOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOPQQRRQQPPPPPPPPPPPPPPPPPPQQQQQRRRRRSSRRRRSSSRRRRSSSSSSSSSSSSSSSSTTTTTTTTUUUUTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVWWWWXXXXYYYZZZZZZ[[[[\\\]]]]]\\\]]]^^__``___```_______^^____````aaaaaaaabbbccccccccddddecddddeeeeeeeeeeeffeefghiffffffffEEEEDDEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEFEDDDDEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEDDEEEEEEEEDDDEEEEEEEEEEEEEEEEEEEEEGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHHHHHHHHHIIIIIIJJJJJJJJJJJJJJJJJJJJJKKLLLLLLLLLLLJJJJJJKKJJEJGGTRKKKKLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLMMLLMMNNNNNNMMNNNNNNNNNNMMNNMLMMMMMMMMNNNNNNNNNNNNOOOOOOOOOOOOPONMOPQQRRRQQQQPQQQQPPPPPPPPOOOPRRRRRRRRSSSSSSSSRRRSSRRRSSRRRSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUVVUUUVVVVVVVVVVWWWWWXXXXXXYYZZZ[[[[[[[[\\\]]\\\\[[]]]]]]]]`````_____````__^^__`aaaaaaabbbbbbcccccccccccdeedddddeeedeeeeeffffffgggghhhhhhhhFEEEDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEFEEEEEEEEEEDDDDDEDDDDDDDDDDDDDDDDDDDDDDEEDDDDDDDDEEEEEEEEEEFFFFEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHHHHHHHHHIIIIIIJJJJJJJJJJJJJJJJJJJKKKLLMMLLLLLLLLKKJJJJKKKIHILJISKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMLLMMNNNNNNNMNNNNNNNNNNMMNNMLMMMMMMMMNNNNNNNNNNNNOOOOOOONNOOOPOMMNPQQQQQQQQQQPPPPPPPPPPQPPPPQRRRRRRRRSSSSSRRRRRRSSRRRSSRRSSTTTTTUUTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUVVVUVVVVWWWVVWWWWWWXXYYYYYYYZZZ[[[[\\\\\\\\]]]]\\\\]]]]]]]]``````____````__^__``aaaaaaabbbbbbccdddddccccdeeddddeeeeeeeeffffggggghhhhhhhhhhhFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEFFEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEDDDDDDDDEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHHHHHHHHHIIIIIIJJJJJJJJJJJJJJJJJJKKKLLMMMMMMMMMMMLKKJJJJJKILIPLATKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMLMNNNNNNNNNNNNNNNNNNMMMNNMMMMMMMMMMOOOOOOOONNNNOOOOONNNNNNOONMMNOPPPPPPPPPPOOOOPPPPOPPPPPQQRRRRRRRRTSSSRRRRRRSSSSRRSSSSSSTTTUUUUUUTTTTTTTTTTTTTTTTTUUUUUUUVUUUUUVVVVVVVWWWWVVWWWXXXYYZZ[[ZZZZZ[[[\\\\]]]]]]^^]]]]\\]]]]]]]]```````__``````____``aaaaaaabbbbbccdddddddcccdeedddeeeeeeeefffffgghhhhiihhhhhhhhFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDCDDDEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHHHHHHHHHIIIIIIJJJJJJJJJJJJJJJJJJKKKLLLLLLLLLLLLLLKKJJJJJJJNLLKHSKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMNNNNNNNNNNNNNNNNNNMMMNNMMNNNNNNNNOOOOOOOONNNNOOOONNNNNNNNONMMNOOPOOOPPPPPNOOOOOOOOOPPPPQRRRRRRRRRTTSSRRQQRRSSSSRRSSSSSTTTUUUUUUUUTTTTTTTTTTTTTTTTUUUUUUUUUUUUVVVVVVVVVWWWWWWXXYYYZZZ[[[[[ZZ[[\\\\]]]]]]]]^^]]]]\\]]]]]]]]__```aa`````````_```aaaaaaaabbbbbbccdddddddcddeedddddeeeeeeeffffgghhhhiihhhhhhhhFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEDDDDDDDDDDDDCCCCCCCCCCDDDEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGGGGHHHHHHGGGGGGGGGGHHHHHHHHIIIIIIIIIIIIIIJJIIIIIIIIJJJJJJJJKKKKKKKKLLLLLLLLKKKJJIIIIKNPEIXRKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMNONNNNNNNNNNNNNNNNMMMNNNMNNNNNNNNOOOOOOOONNNNOOOONNNNNNNNONMMNOOOOOOOPPPPOOOOONNNNOPPPPQQRRRRRRRRTTSSRRQQRSSSSSSRSSSSTTTTUUUUUUUUTTTTTTTTTTTTTTTTTTTUUUUUUUUVVVVVUVVVVWWWWWXXYYZZZZZ[[[[[[[[\\\]]]]]]]]]]]]]]\\\\]]]]]]]]]^_``aaa```aa``````aaaaaaaaabbbbbbccccccedddddeeccdddddedeeeeeffgghhhhhihhhhhhhhFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEFFFFFFFFFFFEEFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFHHHHHHHHHHHHHHGGGGGGGGGGHHHHHHHHIIIIIIIIIIIIIIJJIIIIIIIIJJJJJJJJKKKKKKKKKKKKKKKKKKKJJJJJIJMPBJaQKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMNMMMMNOOONNNNNNNNNNNNNNMMMNNNMNNNNNNNNOOOOOOOONNNNOOOOONNNNNNOONNNOOOOOOOOOPPPOOOOONNNOPPPPPPQRRRRRRRRTSSSRRRRRSSSSSSRRSSTTTTTTUUUUUUTTTTTTTTTTTTTTTTTTTTTUUUUUVVVVVVVVVVVWWWWXXYYYZZZZZZ[[[[[[\\\]]]]]]]]]]]]]]]]\\\\]]]]]]]]]]^_`aaa``aaaa``aaaaaaaaaaaabbbbbbccddddeeddddeecdddddeeeeeeffffgghhhhiihhhhhhhhFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFEEEEEEEEEFFFFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFGGFFFFGGFFFFFFFFGGGFFFFFFFFFFFFFGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHGGGGGGGGGGHHHHHHHHIIIIIIIIIIIIIIJJJJJJJJJJKKKKKKKKLLLKKKKKKKKKKKKKLLKKKKLLLHKLGPZPKKKKLLLLLLLLLLLLLLLLLLLLMMMMMMMMLMNNMMMNOOONNNNNNNNNNNNNMMMMNNNMNNNNNNNNNNNNNNNNNNNNOOOOOOONNOOOOONOOPOOPPPPPOOOOOOOOOOOQQRQQQQRRRRRRRRRSSSSSRRRSSSSSSSSRSTTTTTTTTTUUTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVWWWXXXXXYYYZZZZZZZ[[[\[[\\\]]]^^^^^^]]]]^^]]]]\\]]]]]]]]]]^___```aaaaaa`aaaaaaaaaaaabbbbccddeeeefeeddeeeddeeeeffffffgggghhhiiiiihhhhhhhhFFFEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFGGGGGEEEEEEEEFFFGGFEEFFFFFFFFFFFFFFFFGGGFFFFFEEEEEEEEFFFFFFFFHGGFFGGHGGGGGGGGHGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHGGGGGGGGGGHHHHHHHHIIIIIIIIIIIIIIJJKKKKKKKKLLLLLLLLMLLLLLKKLLLLLLLLLLLLLLMMNEJHLUPPKKKKLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMNNMMNNOOOONNNNNNNNNNNNMMMMNNNMNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOPOOOPPPOPPPPPOOONOOOOOPPRRSRRQRRRRRRRRRRSSSSSSSSSSSSSSSSRSTTUTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUVVVVVVVVVVVVVXXXYYYYYZZZZZZZZZ[[\\\\\\\\]]^^^___^^^^^^^^^]]]]]]]]]]]]^^^^__^^`aaaaaa`aaaaaaaaaaaabbbbddeefffefeeddeeeeeeffffgggggghhhiiiijjjjhhhhhhhhFFEDDEFFGGFFFEEEFEDDDDEFFEEEEEEFEEFFGFFEFFGGGGFFFFFFFFFFGFFEEFFGFFFFFFFFFGGGGGGGGGGGHHHHGGGGGGGGGGGGGGGGFFFFFFFFFFFFFFFFGGGGGGGGGGGGGGGGGGGGGGGGHHGGGGHHGGGGGGGGHGGGFFEEDEGHGGHHHHHHHIIIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIIJKKJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLIDKLINQPKKJJKLLLLLMMMMMMMMMMMMMMNNNNNNNNMMMMMMMMOOPONNNOONNOOONNNNNNNNNNNNOPPONNNNNMMNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPQQQQRRRRRRRRRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUVVVVVVVVUUTTUUVWYYYYZZZZ[[[[\\\\\\\\\\\\\\\]]]]^]]]]]]]]_^]]]]]^]]]\\]]]^^^^^^^^__`aaaaaaaaaaaaaaabbcdddeeffggggeeeefffffffggghhhhhhiiiijjjjjjjjkkkkkkkkFFEDDEFFGGFFFFEEFFEDDEFFFFEEEEFFEFFGGGFFGGGHHGGGFFFFFFFFFFFEEFFFFFFFFFFFGGGGGGGGGGGHHHHHGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHGGGGHHGGGGGGGGGGGGFFEEEFGHGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIIIIJKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMNOLJJIMKKJJKLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMOOPONNNOONNOOONNNNNNNNNNNNOOOONNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPQQQRRRRRRRRRRRRRRRSSSSSSSSSSSSSSTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUVVVVVVVVVVUUUVWWYYYZZZZ[[[[[\\\\]]]]]]]]\\\]]]]]]]]]]]]]^^]]]]]^]]]\\]]]^^^^^^^^___``aaaaaaaaaaabbbbcccceeefffeeeeeeefffffggghhhhhhhiiiijjjjjjjjkkkkkkkkFFEEEEFFFFFFFFFFGFFEEFFGFFFFFFFFFFGGGGGGGGHHHHGGGGFFFFFFFFEEEEFFFFFFFFGGGGGGGGHHHHHHHHHHHHHHHHHHGGGGGGGGHHHHHHHHGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHHHHHGGGGGGGGGGGGGFFEFGHHGGGHIIIIHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIIIIIIIIIIIIJJJJJJJJJIIJKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLFLMKKJLSLKJKKLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNOOPOONOOONNOOONNNNNNNNNNNNOOOONNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPQRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUVVVVVVVVWWVVVVWWYYZZZZ[[[[[[\\\\]]]]]]]]\\\]]]]]]]]]]]]]^^^]]]]^]]]]]]]]^^^^^^^^_____`aaaaaaaaaabbbbccccdeeeeddddeeeefffgggghhhhhhhiiiiijjjjjjkkkkkkkkkkFFFFFFFFFFFFGGGGGGFFFFGGGFFFFFFGGGGGHHHHGGGGGGGGGGGGFFFFFFFEEFFFFFFFGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGHHGFFHHHHHHHHIIIIIIHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMNNIHONLLKKLLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNOOOOONOOONNOOONNNNNNNNNNNOOOOOONNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOPPPPPPPPOOPPPQQQRRRRRRRRRRRRRRRRSSSSSSTTRRRRRRRRTTTTTTTTTTTTTUUUUUUUUUUUVVVVVVVVVVVVVVVVXWWVVWWWYYYZZZZZ[[[[\\\\\\\\\\\\\\\]]]]]]]]]]]]]^^^]]]]]]]]]]]]]^^^^^^^^__^^_``aaaaabbbbbbbcccccddeeeeddeeffffgggghhhhhhhhiiiijjjjjjkkkkkkkkkkkkGGFFFFGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGHHHHGGGGGGGGGGGGGGFFFFFFFFFFFFGGGGGGGGGHHHIIIIIIIIIIIIIIIIIIHHHHHHHHHHHHHHHHIIIIIIIIHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGHHGGFIIHHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJKKJKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLPILRGJYOMLLLLLLKLLMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNOOOONOOONNOOONNNNNNNNNNOOOOOOOOONNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOPPPQQRRRRRRRRRRSSSSSSSSSSSSTTTTSSSSSSSSTTTTTTTTTTTTUUUUUUUUUUUUVVVVVVVVVVVVVVVVWWVVVVWWYYYYZZZZ[[[[\\\\\\\\\\\\\]]]]]]]]]]]]]]]^^^^^]]]^^^^^^^^^^^^^^^^__^^_``aaaabbbccbbbcddeeeeefffffffgggghhhhhhhhhhiiiiijjjjjkkkkkkkkkkkkkkGGGGGGGGGGGGGGGGGGGGGGGGHHGGGGHHHGGGGHHHGGFFFFGGGGGGGGGGFFGGGGFFGGGGGGGGGHHHIIIIIIIIIIIIIIIIIIIIHHHHHHHHHHHHHHHHJJJJJJJJIIIIIIIIIIIIIIIIIIIIIIIIHHHHHHHHHHHHHHHHGGGGGGHHJIHHIIIIIIIIIJJJIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJKKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLNGGPHMaUMLLLMMLKLLMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNOOOOOOPONNOOONNNNNNNNNNOOOOOOOOONNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPQQRRRSSSSSRRRSSSSSSSSSSTTTTTTSSSSSSSSTTTTTTTTTTTUUUUUUUUUUUUUVVVVVVVVWWWWWWWWWWVVVWXXYYYZZZZZ[[[[\\\\\\\\\\\\]]]]]]]]]]]]]]]]]^^^^]]]^^^^^^^^^^^^^^^^_____`aaaaabbcccbccddeffffgghhhhggghhhhhiiiiiiiiiiiijjjjkkkkkkkkkkkkkkkkGGGGGGGGHHHGGGGGGGGGGGGGHHHGGHHHHGGGGGHHGGGFFGGGHGGGGGGGFGGHHGGFGGGGGGGHHHHHIIJJJJJIIIIIJJJJJJJJIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJGGHHHHGGHHHHHHHHHHGGGHHIJIHHIJJIIIIIIJJJIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIJJJJJJJJJJJJJJJJJJJKLLLLKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLPRHKGESPMMMMMMLKLLMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNOOOOOOPONNOOONNNNNNNNNNOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPQRRRSSSSSSSSRSSSSSSSSTTTTTTTTSSSSSSSSTTTTTTTTTTUUUUUUUUUUUUUUVVVVVVVVWWWWWWWWWWVWWXYZZZZ[[[[[[[[[\\\\\\\\\\\\]]]]]]]]]]]]]]]]]^^^^^]]^^^__^^^^^^^^^^^___``aaaaaabcccddddeefffgghhhhhhggghhhhhiiiiiiiiiiijjjjjkkkkkkklkkkkkkkkGGGHHGGGHHHHGGFFFGGGGGGFHHHHHHHHHGGFGGHHHHGGGGHHHHHGGGGGGGHHHHGGGGGGGHHHHHHIIIJJJJJJIIIIJJJJJJJJIIIIIIIIIIIIIIIIKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKGGHHHHGGHHHHHHHHHHGFFHIJJIHHIJJJJJJJIIIIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKKKKKKKKKKLMMMKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLFSCBGALTMMMMMMLJLLMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNOOOOOOPONNOOONNNNNNNNNNPOOOOOOPOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPQQRRSSSSSSSSSRSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUVVVVVVVVWWWWWWWWWWWWXYZ[[[[\\\\\[[[[\\\\]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^]\^^^__^^^^^^^^^^^__`aaaaaaaabccddeeeeefffhhhhhhhggggghhhhiiiiiiiiiiijjjjkkkkkklllkkkkkkkkHHGGGGHHHHHGGGGGHHGGGGHHGGGGGGGGFGGGGHHHIIHHHHHHHHHHHHHHHHGGGGHHHHHHHHHHIIIJJJJJJJJJJKKKLLLLLLLLIIIIIIIIIIJJKKJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKJJHHGGGGHIGHIIIHGGHHHGHHIJJJJJJJJJIIJJJJJJJJJKKKKKJJJJJJJJJJJJJJJJJJJJJJJJJJJJJKKKJKLLLLLLLLLLLLLLLLLLLLMMLLLLLLLLLLLLLLLLLLLLLMMMMNMKKMMLJP5=IT4JQMKLMLLMMMMMMMMMMMMNNNNONNNNNMMMNNNNNNNNNNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPOOPPPPPPPPPPPPPPQQQQRRRRSSTTSSSTSSTTTTSSTTTTTTUUUUUUUUUUTTUUUUUUUUUUUUVVVVVVVWWWVVVVVVVVVWWWXXYYWXXXYZZZ\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^]^^__^^]^___^^__```aaaabbccccddddddeeeeefffgghiihhhhhiiiiiiiiihhijjjjkkkkkkkklllllllllllHHHGGHHHHHHHHHGGHHGGGGHHHHHHHHHHGGGGHHHHIIIHHHHHHHHHHHHHHHGGGGHHHHHHHIIIIIIIJJJJJJJJKKKKKKKKKKKKJIIIIIIIJJJKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKJJHHGGGHHIHHIJIIHGIHHHHIJJJJJJJJJJIJJJJJJJIJJJKJJJJJJJJJJJJJJJJJJJIIIIIIIIJJJJJJJJJKLLLLLLLLLLLLLLLLLLLLMMLLLLLLLLLLLLLLLLLLLLMMMMMNNLLNNNO42MHM<_OMKLMLLMMMMMMMMMMMMNNNNNNNNNNNMMNNNNNNNNNNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPPPPPQQQRRRRRSSTTSSSTSSTTTTSSTTTTTTUUUUUUUUUUTTUUUUUUUUUUUVVVVVVVVWWWVVVVVVVVWWWWXXXYXXXYYZZZ\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^__^^^^______```aaaabbbccccdddccddeeefggghhhiihhhhhhhhiiiiijjjjjjkkkkkkkkllllmllllllllHHHHHHHHHHHHHHIIHHHHHHHHHHHHHHHHGGGHHHHIIIIIHHHHHHHHHHHHHHGGGGHHIIIIIIIIIIIIJJJJJJKKKKKKKKKKKKKKJJJJJIIIKKKKKLLLKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKJHHHHHHIIIIJJJIIHIIIIIIJJJJJJJJJJJJJJJJJKIJJJJJJJJJJJJJJJJJJJJJJJIIIIIIIIJJJJJJJJJKLLLLLLLLLLLLLLLLLLLLMMLLLLLLLLLLLLLLLLLLLLMMMMMNNMMNONR;ETBI<XMLKMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPQQQQQQQQQQQQRRRRSSSTTSSSTSSTTTTSSTTTTTTUUUUUUUUUUTTUUUUUUUUUUUVVVVVVVVWWWVVVVVVVVWWWWXXXXXXYYZZ[[\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]]]]^^^^___^^^^__^^____`a``aaaabbbccccdddcddeefgghhhhhhhhggghhhhhiiijjjkkjkkkkllllllllmmmllllllllHHIIIIHHHHHIIIIJIIIHHIIIHHHHHHHHGGGHHHHHIIIIIIIIHHHHHHHHIHHGGHHIIIIIIIIIIIIIIJJJKKKKKKKKKKKKKKKKJJJJJJJJKKKKKKLMKKKKKKKKKKKKKKKKKKKKKKKKKLLLLKKKIIIIIIJJJKKKKJJIIIIIIIJJJJJJJJJJJJIIIJJJJJKKKKKKKKKKKKKKKKKKKKKKJJJJJJJJJJJJJJJJJKLLLLLLLLLLLLLLLLLLLLMMLLLLLLLLLLLLLLLLLLLMMMMNLNMLLMMLCJQPGOGUKKLMMMMMMMMMMMMMMMMNNNNNNNNNNNNNOOOOOOOONNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOOOPOOOOOOOOOOOOPPPPPPPPQQQQRRRRRRRRQRRRRSSSSSTTSSSTSSSTTSSSTTTTTTUUUUUUUUUUTTUUUUUUUUUUVVVVVVVVVWWWWWWWWWWWXXXXXXXXXYYZZ[[[\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]]^^^^_____^^^^__^^_```aa`aaaabbbbccccdddeeeffggghhhhhhhggghhhhiiiiijjjjjjjkkkkklkkllllmmllllllllHHIIIIHHHIIIIJJJIIIIIIIIGGGGGGGGGGGGHHHHIIIIIIIIIIIIIIIIIIHHHHIIJJIIIHHHIIIJJJJJKKKKKKLLKKKKKKKKKKKKKJJJKKJJJKLLKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLKKJJJJJJJJKKKKKJJJHHIIIJJJJJJJJJJJJJIIIIIJKKLLLLLLLLLLLLLLLLLLLLLLKKKKKKKKKJJJJJJIJKLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMLMMMMNNNLMMKKKKICLMVNKVXIKLMMMMMNNNNNNNNMNNNNNNNNNNNNNNNOOOOOOOONNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOOPPPOOOOOOOOOOOOPPPPQQQQQQQQRRRRRRRRRRRRSSSSSSTTSSSTTSSSSSSTTTTTTTUUUUUUUUUUTTUUUUUUUUUVVVVVVVVVVWWWWWWWWWWWXXXXXXXXYYZZ[[\\\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]^^^^_____^^]]^^_^_````abaaaaabbbbccccdddeeefffffhhhhgggfhhhiiiijiiiiiiiijjjjjkkkkkkkllllllllllllIIIIIIIIIIIIIIIIIJJJJJJIHHHHHHHHGGHHHHIIIIIIIIJJIIIIIIIIIIHHHHIIJJIIIHHHIJJJJKKKKKKKLLLLKKKKKKKKLKKKKKKKKKJKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLKKJJKKKKKKLLLKKJJJIIIJJJJJJJJJJJJJJJJIIIIILLLLMMLLLLLLLLLLLLLLLLLLLLLLLLLLKKKKJJJJJKLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNNLMMKKKJHOKLXHDdPIKLLLMMMNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOONNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOPPPPOOOOOOOOOOOOPPPPQQQQQRRRRRRRRRRRRRRSSSSSSSTTSSSTTTSSSSTTTTTTTTUUUUUUUUUUTTUUUUUUUVVVVVVVVVVVVWWWWWWWWWWWXXXYYYYYZZZ[[\\\\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]^^^______^^^^^^__`````abaaaabbbbbccccdddddeeeeeehhhgggffhhiiijjjjiiiiiiiijjjjjkkkkkkklllllllllllJIIIIIIJIIIIIIIIIJJKKJJIIIIIIIIIHIIIIIJJIIIIJJJJIIIIIIIIIIHHHHIIIIIIIIIIJJJKKKKKKKLLLLLLKKKKKKKKLLLLKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLKJKKLLLKKLLKKKJJJJJKKLLKKKKKKKKKKKKKKKJJJKLLLMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLKKKJJJKLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNNMNNMLLKIKILNDHgNIKMKKLMMNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOONNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOOPPPPOOOOOOOOOOOOPPPPQQQRRRRRRRRRRRRRRRRSSSSTSSTTSSSTTTSSSSTTTTTTTTUUUUUUUUUUTTUUUUUUVVVVVVVVVVVVVWWWWWWWWWWWXXYYZZZZZZ[[\\\]\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]^^^____`___``___``aa``aaaaaabbbbbccccddddeeeffffhhhhgggghhhiiijjijjjjjjjjjjjkkkkkkklllllllllllllJJIIIIJJIIIIIHHHJJKKKKJJJJJJJJJJIJJJJJKKIIIIJJJJIIIIIIIIIIIHHIIIIIIJJKKKJKKKKLLLKLLLLLLLLLLLLLLLLLLLLLKKKKLMMMMLMMMMMMMMLLLLLLLLLLLLLLLLLLMMLLLLKKLLLLLKLLKKJJJJKKLLMMLLLLLLLLLLKKLLLKKJKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMLLKKJJJKLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMNNNNNMOONNNMKNONGKFSLILMKJLMLNNNNNNNNNNNNNNNNNNNNOOOOPPPPPPPPNNNNOOOOOOOOOOOOOONNNNOOOOOOOOOOOOOPPPPPOOOOOOOOOOOOPPPPQQRRRRRRRRRRRRRRRRSSSSTTSSTTSSSTTTSSSSTTTTTTTTUUUUUUUUUUTTUUUUUUVVVVVVVVVVVVVWWWWWWWWWWWXYYYZ[[[ZZ[[\\]]\\\\\\\\\\]\\[\\\\]]]]]]]]]]]]]]]]]]]]]]^^____`````aa````aaa``aaaaaabbbbbccccdddeffgghhihhhhhhhhgghhiiiiiijjjkkkjjkkkkllkllllmmmllllllllIJKKJJJJJJJJJKKKJJJJJJJJKKKJJJJJIJJJKKLLJJKKKJIIJJJJJJJJKKKKJIJJIJJJJKKKKKKKKKKKMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLLLLLLLLLLLKKKKJKKLLKIHIJKKJJJJLLLLLLLLLLLLLLLLLLLLKKKKJKKLLLKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLKKKKKKKKLLLLLLLLKLLLLLLLMMMMMMMMMMMMMMMMMMMMMNNNMMMMMMMMLLLPPLMQLJIILNNMMMNNNONNNNNNNNNNOOOOOOOOOOOOOOOOOONNNNOOOOOOOOPPONNNNONNNOPPPOOONNOPQQPPOOOOOOOOOOOOPPPQRRRRRRSSSSSSSSSSRRRSSSSSSTTUUTTSTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVXXXWWWVVVVVVVVVVYYZ[[[[ZZZ[[\]]^]]]]]]]]\\]]]]]]^^^^^^^^]]]]]]]]\\]]^^^^^^__^^__````aaaaa``aabbabbbbbbbbdddeeeffefghiiiijiiihhhhffghiiiijjkkkllllllmmmmmmmmmllllnnnnnnnnIJKKKJJJJJJJKKKKKKKKKKKKKKKKJJJJIIJJKKKKJJJKJJIIIIIJJJJJJKKJJIIJJJJJJKKKLLLLLLLLMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLLLLLLLLLLLLKKKKKKLKKJIJKKLKJJJLLLLLLLLLLLLLLLLLLLLLLKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMNNNMMMMMMMMONNQNJMOLJIJLMNMMNNNOOOONNNNNOOOOOOOOOOOOOOOOOOOOONNNNOOOOOOOOPPONNNNONNNOPPPOOOOOPPPPPPOOOOOOOOOPPPPPQQRRRRRSSSSSSSSSSSRRRSSSSSTTTUUTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVXXWWWWWWWWWWWWWWYYZZ[[[[ZZ[[\\]]\\\\\\\\\]]]]]]]^^^^^^^^]]]]]]]]]]]]]^^^^______``aaaabbbaaaabbaabbbbbbbbddddeeeffghhiihhhhhggggfgghiiijjjjjkklllmmmmmmmmnnmmmmllnnnnnnnnIJKKKJJJJJKKKKKKKKKKKKKKKKKKKKJJIIJJJKKKJJJJJJIIIIIJJJKKJJJJIIIIJJJKKKKKMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMLLLLLLLLLLLLLLLLMLLLLKKKKKLLKKJJKLLLKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNOMPKEMNLKJJLMMMNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNOOOOOOOPPONNNOONNNOPPPOOOPPPPPPPPOOOOOOOOQQQQQQQQRRRRRSSSSSSSSSSSRRSSSSTTTTTUUTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUVVVWWWWWWWWWWWWXXXXXXXXXXXXYYYZZZ[[[[[[\\\\\\\\\\\\]]]]]]]]^^^^^^^^]]]]]]]]^^]]]]^^_``````aaaabbbbbbbaabbaacccccccccdddeeeeghhhhggghhgggggghhiijjjjjjjkkkllmmmmmmmnnnnnnmmmnnnnnnnnIJKKKKKKKKKKKKKKLLLLLLLLKKKKKKKKIIJJJKKKJJJJJJJJIIJJJJKKIJJJIIIIKKKKKLLLMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMLLLLKLLLLLKKKKLLLKJJKLLLLLLLLLLLLLLLLMLLLLLLLMLLKKLLMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNNNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNMNLQJFRQLKKKLMMMNNOOOPOOOOOOPPPPOOOOOOOOOOOOOOOOONNNNNNOOOOOOOPPONNNOOONNOPPPOOPQQPOOPPQPPPPPPPPRRRRRRQQRRRRSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUVVVVVVWWWWWWWWWWWXXXYYYYYYYYYYZZYYYZ[[[[[[[[[[\\\\\\\\]]]]]]]]^^^^^^^^]]^^^^^^^^]]]^^_`aaa``aaaaabbbbbbbabbcbbccccccccccddeeeeggggggggiiiiiiiiiijjkkkkjjjkkkllmmmmmmmmooooonnnnnnnnnnnIJKLKKKKKKKKKKLLLLLLLLLLLLKKKKKKJJJJKKKKJJJJJJJJKKKKKJJJJJKJJIJJKKKLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMLLLLLLLLLLLLKLLKJJJKMMMMMMMMMMMMMMMMMMMMMMMMMMLLLLMMMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMNNNNNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNMOMTMK]YLLLLLMMMNNOOOPOOOOOPPPQQOOOOOOOOOOOOOOOONNNNNNNNOOOOOOPPONNNOOONNOOPOOPPRQPOOPQQQQQQQQQQSSRRRRRRRRRSSSSSSSSSSSSSSSSSTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVWWWWWWWWWVWWXXYYZYYYYYYYYZZYYZZ[[\\\\\\\\\\\\\\\\]]]]]]]^^^^^^^^^^^^^^^^_^^]]^_`a`abaaaabaaaabbbbbbabcddcddddddddddddeeffgggggghhjjjjjjjjjjjkkkkkjjjkkllllllmmmmmoooooooonnnnnnnnJKLLLKKKKKKKLLLLLLLLLLLLLLLLKKKKKKKKKKKKKKJJJKKKLLLKKKKKKKLKKKKLLLLLLMMMLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMLLLLMMMMMLLKKKKJJKLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMNNNNNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNMNKTLLaYLLMMMMMMNNNOOOOOOOOPPPQQOOOOOOOOOOOOOOOONNNNNNNNOOOOOOPPOONNOOONNOOOOOPQRQPPPPQRRRRRRRRRRRRRSSSSRSSSSSSSSSSSSSSSSSSTTTTTUTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUVVVWWWWWWWWWWWWWWWWXYYZZZZZZZZZZZZZZ[[[\\\\\\\\\\\\\\\\\]]]]]^^^^^^^^^^^^^^^_____^^__`aaaabbaabbaaaabbbbbbbcdeedddddddddddeeefffggggghiiiiiiijjjjkkkkkkkkkkklllmmmmmmmmmooooopppnnnnnnnnJKLLLKKKKKLLLLLLLLLLLLLLLLLLLLKKKKKKLLLLKKKKKKLLLLLLLLLLLLMMLLLMLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNMMMMLLLLMMNNMMLKKLKKKLMMMMMMMMMMMMMMMMMMMMMNNNNMMMMMMMLMMMMMMMMMMMMMMMMMMMMMMMMLLLLLLLLLLLLLLLLLLLLLLLLMMMMMNNNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNMNISJH_SLMMMMMMMMNNNOOOOOOOOPPQQOOOOOOOOOOOOOOOONNNNNNNNOOOOOOPPOONNOOONNOOOOOPQQQPPPQRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSTTTTUUUTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUWWWWWWWWWWWWWWWWWWXXYYYZZZZZZZZZZ[[[\\\\\\\]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^____``````aaaaabbaabbabbbbccccbbcdeedeeeeeeeeeeefffggggggghijhiiiijjjkkkkkkkjkklllmmmnnnnnnnnooooppppnnnnnnnnJKLLLKKLKLLLLLLLLLLLLLLLLLLLLLLKLLLLLLLLLKKKKLLMLLLLMMMNMMNMMMMNLLLMMMMNNNNNNNNNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNMMMMMMMMMMMMMMMMNNMMMMLLLMNOONMLKLLLLLMOMMMMMMMMMMMMMMMMMMMNNNNNMMNNNMMLMMMMMMMMMMMMMMMMNNNNNNNNLLLLLLLLMMMMMMMMLLLLLLLLMMMMMMMNNNNNNNNNNNNNNNNNMMMMMNNNNNNNNNNNQPISIF]OLMNNMLLMMMNNNONNNNOOPPPQOOOOOOOOOOOOOOOONNNMMNNNOOOOOOPPOONOOOONNOOOOOPQPPPPQQRRRRRRRRRRQRRRSSSSSSSSSSSSSSSSSSSSSSTTTTUUUTTSSTTUUUUUUUUUUUUUUUUUUUUUUUUUWWWWWWWWWWWWWWWWXXXXYYYZZZZZZZZZZ[\\\\\\\\]]]^^^]]]]]]]]]]]]^^^^^^^^^^^^^^^___```aaaaaaaaabbaabbbbbcccccdcccdeddeeeeeeeeeefffggghhgghhijiiijjjkkkkkkkkjjllllmmmnoooooooonnooopppnnnnnnnnKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMLLLLLLLLMMMMMMMMNNNNNNNNLLMMMNNNNNNNNNNNNNNNMMMLMMMMMLLLNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMNNNMMLLLMMNNONNMLLLLLMNOMNNNNNNNNNNNNNNNNNNNNNNMNNNNNNNNMNNNNNNNNNMMMMMNNNNNNNNMMMMMMLLLMMMMMMNNMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNMMNNNNNONNNNNNNNOPORNMUSTOLKKKKLLMNOOOOOOOOOPPPPPOOOOOOPOOOOOOOOOONNNNNMNNNNOOOOOOOOOOPPPOOOPPQRPPQQRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUTTTTTTTTUUUUUUUUUUUUVVVVWWWWVVVVVVVVYYYYYZZZYYZZZZZ[\\\\\\\\]]]]]]]]\\\\]]]]\\]]]]^^^^^^^^^]^^__``aabbbaaaa`aaaabbccdccbccddccccccddeeeeeeeeffgggghhggghiijjjjjjjjkkkkllllkkllmmmnnnooooooooooooooooppppppppKKKKKKKKLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMMMMMMMMMMMMMMMMMMNNNNNNNNLMMMMNNNNNNNNNNNNNNNMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMMLLMMNNNNMMLLLLMNNONNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMMMNNNNNNNNNNNMMMMLLLMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNMMNNNNNNNNNNNNNNNPRUOOVOPMKKLKLNLMNOOOOOOOOOPPPPPPPPPOOOOOOOOOOOOONNNNNMNNNNOOOOOOOOPPPPOOOOPPQRPPQQRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUTTTTTTUUUUUUUUUUUUUVWWWWWWWWWWWWWWWYYYYZZZZZZZZ[[[[\\\\\\\\]]]]]]]]]]]]]]]]\\]]]]^^^^_________``aabccbbbaaabbbbbccddddddddddddcdddefffffffffgggghhhhhhiijjkjjjjjkkkkkllllkkllmmnnnoooooooopooooppppppppppppLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLMMMMNNMMMMMMMMMMMMMMMMNNNNNNNNMMMMNNNNNNNNNNNNNNNNNMMMLMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMMMMMMMNNMMMLLLMMNOONNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOONNNNNNNNNMMMMLLMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNMMMMNNNNNNNNNOSTKLTKMLLMMLMOLMNOOPOOOOOOPPPPPQQQPPONOOOOOOOOOONNNNNMNNNNOOOOOOPPPPPPOOOOPQRRPQQRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUTTTUUUUVVVVVVVVVWWWWWWWWWWWXXXYYYYZZZZZZZZZ[[[[[\\\\\\\\]]]]]]]]]]]]]^^^]]]]]^^^^__````````aabbbccccbbbbcbbbbcccddeeeeeeeddddeeegggggggggggghhhhiijjjjkkjjjkkkkkkklllllkllmmnnooooooooppppppppppppppppppLLLLLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNMMMMMMMMMMMMMMMMNNNNNNNNMNNNNNNNNNNNNNNNNNNNNNNNMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOONNNNNNNNNNNMMMMMMMMLLMMMMNNOONNNNNOOOOOOOOOOOOOONNNNNNNNNNNNNNNNNNOOONNNNOOPPOOOOONNNNNNNMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMMMMMMMMMMMOMPSHHVSLLMNMKLNMMNOPPPOOOOOPPPPQQRRQPONOOOOOOOOPONNOONMNNNNOOOOOOPPPPPPOOOOPQRSQQQRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUTUUUUVVVVVVVVVVVVVWWWWWWWWWXXYYYZZZZZZZZZZZ[[[[[]]]]]]]]]]]]]]]]]]]]^^^^]]]^^^^^_``````````abbbcccccbbbbcccbbcccddefffeeeeeddeeeggggggggghhhhhiijjjjjjjjkkkkkkkklllllllllmmnnooooooooppppppqqqqqppppppppMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNMMMMMMMMOOOOOOOONNNNNNOONNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNNNNNNMMMLLLMNNNOOOONNNOOOOOOOOOOOOOOOOOONNNNNNNNNNNNNNOOOOONNOOOPPPOOOOOOOOOONNNNMMMMMMMMMMMMMMMMMMNNNNNNOOOOOOOOOOOOOOOOOONNNNNMMMNNNNNNNNNKOUKHX^LLNONLLNMNNOPPPPOPPPPQQQQRRRRQOOOOOOOOOOPONNOONMNNNNOOOOOPPQQQPPPPPPPQRRQQRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVWWWWVWWXXYYYZZZZZZ[[ZZZ[[[[[]]]]]]]]]]]]]]]]]]]]^^^^^^^^____``aa```_```aabbcbbbbbbbbccbbbbccddeeffeeeeeeeeeehhhhhhhhhhhiiiiijjjjjjjjkkkkkkkkllllllmmmmmnnoooooooppppqqqqqqqqppppppppMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNMMMNNNNNOOOOOOOONNNNNNNNOOOOOOOOOOOOOOOONNNNNNNNNNNNOOOOOOONNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNOOONNMMLLLNNOOOPOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNOOOOOOOOOOOPPPPPPPPPOOOOOOONNNNNNNNNNMMMMMMMMMMMNNNNOOOOOOOOOOOOOOOOOOOOOONNNNNNNNNNNNNNOOORLFQYMMMOOMMOMNOPPPPPPPQQQQRRRRRRRQPPOOOOOOOOPOOOOONNNNNNOOOOOPQQRQPPQPPPPQQRRRRSSSRRRRRRRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUUUUVVVVVUUUVVVVVVVVVVVVVVVWWWWWWWWWXXXYYZZZZ[[[[[[[[[\\\]]]]]]]]]]]]]]]]]]]^^^______````aaaaa``_``aabbccbbbbbbbbccccccddddeeffffffffefffhhhhhhhhhiiiijjjjjjjkkkkkkkkkllllllllmmnnnnnnooooooppppppqqqqqqqppppppppNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNNNNOOOOPOOOOOOONOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNOOOOPONMMLMMNOOPPPPOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNOOOOOOOOOPPQQPPPPPPPPPOOOOOONNNNOONNNNNNMMMMMMMMNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOTKHHFKPQNMNONNOMNOPPQPPQQQRRRRRRRRRRRRQOOOOOOOOPOOOOONNNNNNOOOOOPQRRQPPRQQPPPQQRRRSSSSRRRRRRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUUVVVVVVVUUUVVVVWWWWWWWWWWXXXXXXXXXXYYYYYZZ[[[[[[\\\\]]]]^^^^^^^^^^^^^^^^^^___```````aaaabbbbbaaaaabbcccdccccccccdddddeefeeeffggghhgggggghhhhhhhhiiiijjjjkkkkklllkkklllllmllllmnnooooonnnoopppppppppqqqqqppppppppNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMMNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNNNNNNNNOOOPPNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPOOOOOOOMNNNOOOOPPONMMMMNOOPPPPOOOOOOOOPPPPPPPPPPOOOOOOONNNNNNNNOOOOOOOPPPQQQPPOPPPPPPPPOOOOONNNOOOONNNNMMMMMMMMNOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPOOOOOOOOLUF=FLQSTQNNNMMMNNOPPQPPRRRRRSSSRRRRRRRROOOOOOOOPOOOOONNNNNNOOOOOPQRRQPPRRQPPPPPRRSSSSSRRRRRRRRRRRRSSSSSSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUUVVVVVVVUUVVVVWWWWWWWWWWXXYYYYYYYYYYYYYYZ[[[[[[[]]]]^^^^^^^^^^^^^^^^^^^^__```aaa`aaaabbbbbbcccbbbbbccdddccccddddddeeefggfffffghiiiihhhhhhhhhhhhhiiijjjjjkklllmmmkkklllllmllllmnnpoooonnnopppppppppppqqqqppppppppOOOOOOOONNNNNNNNNNNNNNNNNNNNNNNNMNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPOONNOOPOOOOOOOOOOOOOOOOPOOOOONNOOOOOOOOPONOPQQQPPPONNMLPOOOOOONPPOOOPPQPPPPPPPPOONNNOOOPONNNOPQPPPPPPPPOPQRRRQPQPPOOPPQPPPPOOOONNOPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNPMPQNDI?POPQPOOPOOPPPQQQPQQRRRRRRRRRRRRRRRQPOOOOOOOOOOOONMNPPNMNOPRSSSRQRRQQPPPQRRRRSSSSTSSRRSSTSSTTTTTTTTTTTTTTUUUUTTTTUUUUUUUUUUUUUUUUVVVVVVVVVVVVVWWWWWWWXXXXZYYXXYYZXXYYZZZZZZZ[[\\\[\]]]]^^```_____^^^^_______``aaaaabbbaa`bbbbbcccbbccccddeeeeeeeedefghhhhfffffghhhhhhhhhhhhhhhhhhiiiijjjjkkkkkkkklllllmmmmmmllmmnnopppppopppppqqqpppqqqqqqqqqqqqqOOOOOOOONNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONOOOOPPPPOOOOOOPOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOQPOOPQQPPPPONNMMPPOOPPOOOOOOOPPPPPPPPPPPOOOOOOOOPOONNOPPPPPPPPPPOPQRRRRRRQQPPPQQPPPOOOOOOOPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOQRQOQHNMQPPQQOOPOOOPPQQQPQQRRRRRRRRRRRRRRRQPPOOOPPPPPPPPONNPPNNOOPQSSSRQRRQQPPPQRRRSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVWWXWXXXXXXXZYYYYYYZYYYZZ[[[[[[\\]]][\]]]]^_________________````aaaaaabbbbbabbbbccccccccddddeeeeeeeedefghhhhgffffghhhhhggggghhhhhhhhijjjjkkklllllllllllllmmmmmmmmmnnoopppppoppppppqqpppqqqqqqqqqqqqqOOOOOOOONNNNNNNNNNNNNNNNNNNNNNNNOOOONNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNOOOOPPPOOOOPPOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPRQPPPPPPQPOONNNOQPOOPPPONOOPPQQQPPPPPPPPOOOOOOOOOOOOOPPPPPPPPPPPPPQQRSSSSRRQQQQQPPPOOOOOPPPPPOONOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOOOOPPPPPNRPMPHPURQQQQONONNOOPPQQPQQRRRRRRRRRRRRRRRQQQPOOQQQQQQQQPOOPPOOPPQQRRRRRRRRQPPPQSSSSSSRRSSSSSSSSSSTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUTTUUUUUVVVVVVVVWVVVVWWXXXXXXXYYYZZZYYZZZZZZZ[[\\]]]]]]]][\]^^^_`________````````aaaaaaaaaabbcccbbccccdddccddddeeeeeeeeeeeffghhhhhggffgghhhgggfffiiiiiiiijjjkkkkklllllllllllllmmmmmnnnnnnooppppppppppppppppqqqqrrqqqqqqqqOOOOOOOOOOOOOOOONNNNNNNNNNNNNNNNOOOOONNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNOOOOOPPPPPPPPOOOOOOOOOOOOOOOOOOOOOPPPQQQQQQQQQPPPPPPOQPOONOOPPPOOPPOOOPPQRRRRQQQQQQQQPPPPPPOOOOPPPPPPPPPPPPPPQQQQQRSSSSRQQPPPPPPPPPPPPPPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOOPPPQQRRNOSROHVVRPPQPOOONNOOPPQQPQQRRRRRRRRRRRRRQRRRQQPOQQQQQQQQPPOOOOPPQQQQRRSSQRRQPPPQSSSSSRRQRSSTTSSRSSTTTTTTTTTTTTTTSTTTUUUVUUUUUUUUTTTUUUVVVVVVVVWWVVVWWXYYYYYYYYYYZZZZZZZZZZZZ[[\\]]]]]]]]\]^^__`a````____aaaaaaaabbbbbaaaaaabbcccccccddddddddeeeeeeeeeeeefffghhhhhhggggghhhhggggfiiiiiiiijjjkkkkkllllllllllllmmmmmnooooooppppppppppppppppqqqqqrrrqqqqqqqqOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPOOOONNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNOOOOPPPPPPPPPOOOOOOOOPPPPPPPPOOOOOOOOPPPPPPPPPOOPPPPOPPOOOPQQPPOOPPOOOOPQRRRQQQQQQQQQPPQQQPPOOPPQQQPPQQQQQQQQQQPPPQQRRRRQQPPOPPPPPPPPPPPPPPQQPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQRRSTNWYOLa[RONPPOOPNOOOPPQQPQQRRRRRRRRRRRRRRRRRRQPPPPPPPPPPPPOOOOPPRRQQRRSSQRRRQPPQSSSTSSRQRSSTTSSRSSTTTTTTTTTTTTTTSTTTUUUVUUUUUUUUTTTUUUVVVVVVWWWWWWWWXXYYYYYYYYZZZZZZZZZZYYZZ[[[[]]]]]]]]]]^__`aaaa``__^^aaaaaabbbbbbbbbb`aaaabbccccddddddddeeeeffffffffffffggghhihhggghhiiiihhhhjjjjjjjjjjjjkkkkkkkkkkkkllmmmmnnnoopppppqqppppppppppppqqqqqqrrrrqqqqqqqqOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPQQQQPPPPPPPPPPPPPPPPPPPPPOOOOOOOOOOOOOPOOPQQPPPPOOPQQRQPPPPPPOOOOPPPPPQQQQQQQQQQQRQQPOPPQQQQQPQQQQQQQQRQPPPPQQRRRRQQPPQQQQQQQQPPPPPQQRPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQRRRSVLSTLLb\RONOQPPPPPPPPPPPPQQRRRRRRRRRRRRRRRRRRQQQPPPPPPPPPPPOOPPPRRRRRRSSQRRRQQPQRRSTTSRQSSSSSSSSSSTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUTTUUUUUVVVVVWWWWXXXXXXYYYYYYYYZZZZZ[[ZZZYYZZ[[[[\\]]]]]]]^____`aaa``__^^aaaaaaaabbbbbbbbaaaaaabbccccdddddddeeeefffffffffffffffggihhhhhiiiiiijjjjjjjjjjjjjjjjkkkkllllllllmmmnnnnoooppqqqqqqqpppqqpqqqqqqqqqqrrrrrqqqqqqqqOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPOOOOOOOOPPPPPPPPPPPPPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPQQQQQQPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQRRQPOOPPQQRRRQQQQQQPQPPPPPPQQQQQQQQQQQRRRQPPPPQQQQQQQQQQQQQQQQQQQQQQQRRRRRQQQQRRRRRRQQPPPPQQQQQQQQQQPPPPPPPPPPPPPPPPQQQQQQQQPPPPPPPPQQQQRRRRQNMKMJXXTQOPQQQQQQQQPPPPPQQRRRRRSSSSSSSSSSRRRRRRQQQQQQQQPQQOOQQPRRRRRRRRQRSRRQPQQRSTTSRRSSSSSSSSSSTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUVVVWWWWWYYXXXXYYYYYYYYYYYZZZZZZYZZZ[[\\\\\\]]^^^^^_____```______``````aaaaaabbbbbbaaaabbcccccdddddddeeeeffffffffffeeeffgihhhhiijiiijjjkkkkkkkkkkjkkkklllmmmmmmmmnnnnooooppppqqrrrqqpppqqqqqqqrrrqqrrrrssqqqqqqqqOOOOOOOOPPPPPPPPOOOOOOOOOOOOOOOOOOOOPPPPOOOOOOOOPPPPPPPPPPPPPPPPOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPQQPQQRRQQPPPPPPPPPPPPPPPPPQQQQQRRRRRRRRRRRSSRSSRQPNOPQQRRRSRRRRRRQSSRQQQRRQQQQQQQQQRRSRQPPQQQQQQQQQQQQQQQQQQQQRRRRRRSSSSRRRRRRRSSSRRQPPPPQQQQQQQQQPPPPPPPPPPPPPPPPQQQQQQQQPPPPPPPPQQQQQRRRPUNJVNPVVSPQRRQQRRQQQPPPPQQRRRRRSSSSSSSSSSRRQRRRRRRRRRRRPRRPPRRPRRRRSRRRQRSSRQQQQQSTTTSRTSSRRSSTSSTTTTTTTTTTTTTTUUUUTTTTUUUUUUUUUUUUUUUUVVWWWWWWYYYXXXXXXYYYYYYYYYZZZZYY[[[[\\]]\\]]^^__^____^__^___````_````````aaabbbbccbaabbbbbccccddddddeeeeffffffffffeeeeffhhhhhijjhhiijkkkkkkkkkkkkkklllllnnnnnnnnnoooopppqpppqrrsrrqpppqqqrrrrrrrqqrrrrssqqqqqqqqOOOOOOOOOOOOOOOOOOOOOOOOOOOONNNNPPPPPPPPPPPPPPPPQQQPPPPPPPPPPOOOPPPPPPPPOOOOOOOOPPPOPPQQQQQQRRRRRRRRRRRRPQQQQPPOOOPPPPPPQQQRRRRRSRRRRRRSSSSSSRPOOPQRSSSSTTTSSSSSSSSRRQQQSSSSSSSSQRRRRRQPRRRRRQQPQQQQPPPPRRRRRRSSSSSSSSSSSSSSSSSSQPPPPPOOPPPQQQQQQOOQQPOPPPQQRQQPOPQQQQPPOOPPQQQQRRRRRRRRRORTSKUURPPRSRQQRQQPPQQRRRSTTTTSSSSSSSSSSRRRRRRSQRSSSSSSSSSSSRRRSSSSSSSSSSTTTRQPPQSTTTSSSSSSSTTTTTTTUUUUTTTTTTTTVVVVVVVVVVVVVVVVUUUVVVVVUUVVWWXYZZZYYYYYXXXXXXYYYYZZZ[[[[[[[[\\\\\]^]^^_``aa`___````aa``__`_____`abbcbbbccbbbbbbdddddddddddeeeeeefffffedeefffgffhhhiijjjijjjkkkklllllkkklllllmmmnnnnnnnnnnooopppqqqqqqqqqqqqqqqrrrrrrrrrrrssssssttttttttOOOOOOOOOOOOOOOOOOOOOOOOOOOOOONNPPPPPPPPPPPPPPPPQQQQQQQPQQPPPPPPPPPPPPPPOOOOOOOOPPOOOPPQQQQRRRRRRRRRRRRRPPQQQQQQPPPPPPPPPQQRRRRRSRRRRRRSSSSSSRQOPPQRSSSSTTTSSSSSSSSRRQQQSSSSSSSSRRRRRRQQRRRSRRQQPPPQQQQQRRRRRSSSSSSSSSSSSSSSSSSSSRRRRRQQRRRRRRRQTQPQQPPRQQRRRQQPPPQQQQPPOOPPQQQQRRRRRRRRSQSSRITTSQQSSRQRRRQQQQRRQRRSSSSSSSSSSSSSRRRRRRRRQRSSSSSSSSSSSRRRSSSSSSSSSSTTTSQPQQSTTTSSSSSSTTTTTTTUUUUUTTUUUUUUVVVVVVVVVVVVVVVVUUVVVVVVVVVVWXXYZZYYYXXXYYYYYYZZYZZZZ[[[[[[\\\]]\]^^^^_``aa`___````aaaa`_````_``aaabbbbbccbbbbcccccdddeeeeeeeeeefffgffeeffggghgghhhiiiiiijjkkkkkkkkkkkkkllllmmmmoooooooonoooopppqqqqqqqqqqqqqqrrrrrrrrrrrrsssttsttttttttOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPPRQQQQQQQQQQQQQQQPPPPPPPPPPPPPPPPPOOOOPPPQRRRRRRRRRRRRRRRQQQQQQRRPPPPQQQQPQQRRSSSSSRRRRSSSSRRRRQPPQQRRSSSTTTSSSSSSSSRRRQQSSSSSSSSRRRRRRRRSSSSRRQQPPQQQRRRRRRSSSSSSSSSSSSSSSSSSSSSTSRRSSRQRRRRRQPPSQPQRQQRQRRRRQQPQQQQQPPOOOPPQQQPRRRRRRRRQPRQOGSUUSRRSRRSRRRRRRRRQRRSSSSSSSSSSSSSRRSSSSRRQRSSTSSSTTSSSSRRSSSSSSSSRSTTTSRRQRSTTTTSSSTTTTTTTTTUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWXYYZZYYXXWWYYYYYZZZZZZZ[[[[[\]]^^^]]^_____`aaa`__`a``aaaaaa`aaa```aaaaaabbbcccbbcccccddeeefffffeeeeffgggfffgghhiiihiiiiiiiijjjkkkkkkkkkkkkklllmmmmnooooooooooooopppqqqqqqqqqqqqqqrrrrrrrrrrrsssstttttttttttOOOOOOOOOOOOOOOOOOPPPPOONOOOOOPPPPPPPPPPPPPPPPPPQQQQQQQQQQQQQQQQPPPPPPPPQQQQQQQQPPPPPPQQRRRRRRRRRRRRRRRRRRQQQRRSQQQQQQQQQQRRSSRRSSRRRRSSSSRRRRQQQQRRRSSSSSSSSSSSSSSSRRRRSSSSSSSSSRRQQRRSSSSSSRRRQQRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSRRRRRQQRRQQPPOOQPPQRRRQQQQQQQQQQQQQPPPOPPPQQQQPRRRRRRRRPPSQOIXZVSQRRQRSSSSRRSSSRSSSSSSSSSSSSSSSRRSTTSRRRRSSTTSSTTTSSSSSSSSSSSSSRSSTTTSSRSSTTTTTTTTTTTTTTUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWXXYYYYYYYXXXYYXXXYYY[[[[[[[[\\^____^__``__`aaaa`__`a`aaaaaaa`aaaa`aaaaaaabbcccccccdddeeeeffffffeeeeeffffffffhhhiiiiiiiiiiiihjjkkkkkkkkkkkkkkmmmmmnnnooooooooooooppppqqqqqqqqrqqqqrrrssssssssssssstttttttttttOOOOOOPPPPPPPPPPOOPQQPOOOOOOPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQQQPPPPPPPPPPPQQQQQQQQQQQQQQQQRRRRRSSSRRRRRRRRSSRRQRRRQRRRRRRRSSSSSRRQSSSRRSSSSSRRRSRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSRRRRSSTTTSSSSSSSSSRRRRSSSSSSSSSSSSSSSSTTTTTTTTSRRRRRQQRQQQPPQQTSQOPQRRPPPPPPQQQQQPPPPPPQQQQQQQRRRRRRRRQRUSRM]_VSQQQQQSSSSSSSSSTTTTTTTTSSSSSSSSRSTTTTSRRRSTTTSSTTTTSSSSTTTTTTTTSSSTTTTSSSTTUUTTTTTTTTUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVWWWWXXXXXXYYYYYYYZZZYYYYYYYZ[[[[[[[[\]^_```_``a````aaaa`_`abaaabbbba`aaaa`aabaaaabbccccccdddgggffffffffeeeedffffffffghhiiiiijjiiiiiijjkklllkkkkkklllmmmnnnnoppppppppooopppppqqqqqqqqrrrqrrrssssssssstsssstuuttttttttOOOOOPPPPPPPPPPPOPQQQQPOOOOPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQQPPPQQQQQQQQQQQQQQQQQQRRRRRRRRRSSSSSSSSSSSSSTSSSRRRRRRRRSSSSTTSSSRQQSSSRRSSSSSRRRSSRSSRRRSSSSSSSSTTTSSSSSSSSSSSSSSSSSSRRRSSSUUTTSSSSTTTSSSRRSSSSSTTTTTTTTTTTTTTTTTTTTTSSSSSRSSRRSTUV\YTPOQSTQQQPPPQQQQPPPPQQQRRRRRRQRRRRRRRRQRTQPM[\URQQQQQSRSSSSSSRUTTTTTUUSSSSSSSSSSTTTTSSRRSTTTTSTTTTTSSSTTTTTTTTSSTTTTTTTTTUUUUTTTTTUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVWWWWWWXXXXXXXXXXYYZZ[[ZZZZZZ[[\\\[[[[[]^_``````aaa```aaaaa``abaabbbbbbaabaaaabbbbbbbcccccccdeehgggggfffffffeeeffffffggghhhiiiijjjjjjjjjkklllllkklllllmnnnnooooppppppppppppppppqqqqqqqqrrrrrrssssssssssttssstuuttttttttNOOOOPPPPPPPPPPPOPQRRQPOPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQRRQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTSSRRRSSSSSSSSSSRRRSSSTSSRRSSSSSSSSTTTSSSSSSTTSSSSSSSSRRSSSSSSUUTTSTTTTTTTTSSSSSSTTTTTTTTTTTTTUUUUUUUUUTTTTTSSTSSSTWZ\^]ZWVWWUTSRQQPPPPPPPPQQRRRSSSSRRRRRRRRRRPPROOKYXURQRRRRTRRSSSSRRUTTTSTTUSSSSSSSSTTTTTTTTRRSTTTTSUUTTTTSSTTTTTTTTTTTTTTTTTTUUUUUUTTUUUUUUUUUUUUUVUUVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWXXXWWWXXXXYYYZZZZZZZZ[[[\\\\[[[[^^^_``aa`aaa````aabaaaababbbcbbbabbbbabbbbbcccccccccddefgggfffffffgggggghggggghhhhhiiiiijjjjkkkkkkklllllllllmmmmnnnoooooppppppppppppppppqqqqqqqqrrrrrrssssssssssttssstuvttttttttNOOOOPPPPPPPPPPPOPQRRQPOQQPPPPOOPPPPPPPPPPPPPPPPQQQQQQRRRRRRRRRRRRRRRRRRQQQQQQQQPQQRRRQQSSSSSSSSSSSSSSSSRRSTTTTSSSSSSSTTSSSSSSSRSSSSSSSSSSRRRSSSTSSRRSSSSSSSSTTTSSSSSTTTSSSSSSSSRRSSTSSSVUUTTTTTTTTTTTTTSSSTTTTTUUUUUUUUVVVVVVVVUTSSTTSRTSRSUX\_[^`__^ZUVUTRQPPPPPOOPQRRSSSSSSSSRRRRRRRRQQROPN[YTRQSSSSTRRSSSSRRTTSSSSTTSSSSSSSSTTTSSTTTRRSTTTTSUUTTTTSSTTTTTTTTUUTTTTTTUUUUUUUUTUUUUUUUUUUUUUVVVVVVVWWWVVVVVVVVVVVVVVVVWWWWWWWWXXWWWWWWXXXYYYYYZYYYYZZZ]\\\[[[[^^^_``ab`aaa`_```abbaaaabbbcccbbbcccbbccbbcdddccbcccdeefeeffffffggghhhhhihhhhhijhhiiijiijjjkklllkkkllllllllmmmnnnnooooppqqqqqqqqppppppppqqqqqqqqrrrrrrssttttttttuttsstuvttttttttPPOOOPQRRQONNNOPPQQRRQPOPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQRRRRRRRRQQQQQQQQQQQQQQQQRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSRRRRRTSSRRSSTSSRRRSTTSSSSSSSSTTTTTTTTSSSTTTTTTTSSSSRRRRSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUTSTTUVTUUWXYZZ^_``_][YYXWVTSRQOOPPPPQQSSRRRRRQRRRRRRRRUNLWWIYTUTTTTTTUTTTTSSRRSTTSRRSSTTTTUUUUUUUUUUUUTTTUUUTSTSSTUUUTUUTTTUUVUTTTTTTUTTTTUUUUUUUUUUUUUUUUUUUUVUUUUUVVUVVVVVVWVUUUUVWXWWWWWXXXXXXXXWWWXXXXXXXXYZZZYZ[[\]]\\\]]__`````abbaa``____^]^`acccccccbbdccccccceeeeddddeddddeeeeeeeeeeeggghhiiiiiiiiiiiiiiiijjjjjjkkkkklmmmlllmmmmmmmmmppooooppopppoooppppqqqqqqqqpppooqqrsstttsssstttttttttuuvvvvvvvvvPPOOOPQRRQPOOOPQQQRRRRQPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTSSSRRSSTSSSSSSTSSRRRSTUTTTSSSSSTTTTTTTTTTTTTUUUTTTSSSSRSSSTTTTTTTTTTTTTTTTTTTTTUTTTTTTTTTTTTTTTTTTUUUVVUUTTTUVVVVVWWXXX[\]]]\ZYYXWVUTSRQQQQQPPPRRRRRRRRRRRRRRRROXXTOK\QVUTTSSSSUUUUTTSRTTTSSRSTTTTUUUUUUUUUUUUUTTTUUUTTUTTUUUTTTTTTTTUUUUTTTTUUTTTTUUUUUUUUUUUUVVVVVVVVVVUUUUVVVVWWVVWWVVVVVWWXWWWWWXXXXXXXXXWWYYYYYYYYZZ[ZZZ[\]]]]\\]]______`abbaa`````____abcccccccccccdddddddeeeedeeffeeeeeeeeeeeeffgghhhiiiiiiiiiiiiiiijjjjjjkkkklllmmmmmmnnnnnnnnnppooooppopppoooppppqqqqqqqqqppppqqrsstttssssttttuttttuvvvvvvvvvvPPOOOPQQRRQQPPQQRRRSSRQQQQQQQQQQQQQQQQQQPPPPPPPPOOPPPPQQRRRRRRRRRRRRRRRRRRRRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTSSSSSSSTTSSSSTTSSSRSSTUTTTTTTSSTTTTTTTTUUUUUUUUTTTSSSSSSTTTTTTSTTTTTTTTUUUUUUUUUUTTTTTTTTTTTTTTTTTTUVWXVVUUUVVWXXXWWWVVXYYZZYYXXXWVUTTSRRRRQQQQQQQRRRRSSSSSSSSSQQOQRKXSVUTSSRSSUUUUUTTSTTUTTSTTTTTUUUUUUUUUUUUUTTTUUVUTVUUUUUTSSTTTTTTTUUTTTTUUTTUUUUUUVVVVVVVVVVVVVVVVVVUUUUVVVWWWWWWXWWWWWWXXWWWWWXXXYYXXXXXXZZZZZZZZZ[[[[[\]]]]]\\]^___^^_`aaaaaaaaa````aabcbbbbcccccdddeeffdeeeeeffggfeeeeeeeffffffghhhiiiiiiiiijjjiiijjjjkkkkkklllllmmmmnonnnnnnnnpoooooopopppoopppppqqqqqqqqqppppqrrsstttssssttttuutttuvwvvvvvvvvPPOOOPPPQQQQQQQQRRRRRRRQQQQQQQQQQQQQQQQQPPPPPPPPOOOPPPPQSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTSSSSSSSTTTSSTTTTSSSTTUUUUTTTTTTUUUUUUUUUUUUUUUUTTTTSSSSSTTTTTTTUUUUUUUUUUUUUUUUUUUTTTTTTTTTTTTTTTTTUVXXVVVVVVWWYXXWWVVVVWWWWWWWWWVVUUTTSSSRRRRRRRRRRRRRRRRRRRRRSRTVRCR[TTSSSSTTUUUVUUTSTUUUTTTUTUUUUUUUUUUUUUUUUTTUVVVUVUUUVVUTSSTTTTTSUUTTTTUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVWWVVWXXXXXXXXXWWWWWXXXYYYYYYYY[[[[[[[[[[[[[[\]]^^^]]^^___^]^`aaaaaaabb``aabbbbaabbccdddddeeeffdefeeeffggffffffeefffffghhhhiiiiijjjjjjjjjjjjkkkkkkllllmmmnnnnnonnnnnnnnoooooooooppppopppppqqqqqqqqqqqqqrrssttttssssttttvuutuvwwvvvvvvvvPPPPPPPPPPQQQQQPQRRRRRRRQQQQQQQQQQQQQQQQPPPPPPPPPPPPQQQQSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTSSSSTTTTTTTTTTTTTTTUUUUUUUUUTTUUUUUUUUUUUUUUUUUTTTTSSSSSTTUUUUUUUUUUUUUUUUUUUUUUUUTTTTTTTTTTTTUUUUVVWXWVVVWWWWXXXWWVVVWWWVVVVVVVVUUTTTSSSSSSSSSSSRRRRRRRRRRRRRQSTQUR^bTTSSSSTTTTUVUTSSTUUUUTUUUUUUUUUUUUUUUUUUUUTUVVVVVUUVWWVVSSTTTTTSVUUTTUUVUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVUVVVVVWWYYYYYYXXWWWWWXXXYYYZZZZZ[[[[[[[[[[\[[[\]^^_^^]^____^]^`aaaaabbbb`aabbbaaabbbcdddddddeeeeefffedeegfffffggeefffggghhiiiiiijjjjjjjjjjjkkkklllllmmmmnnoonnnoooooooooooppppoooppppopppppqqqqqqqqqqqqqrrstttttsttttuuuvvuuuvwxvvvvvvvvQQQQQPPPPPPQQQQQRRRRRRRRQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTSSTTTTTTTUUTTTTTUUUUUVVVUUUUUUUUUUUUUUUUUUUUUTUUUTTTTSSSTUUUVVUUUUUUUUUUUUUUUUUUUUUTTTTTTTTTTTUUVVVVVVVVWWWWWWXWWWWWWWXWWWWVVVVVUUTTTSSSSSTTTTTTSSSSRRRRRRRRRRRPPMSMV`VUTSSRSSSTUVVUSSSTUVUUUUUUUUUUUUUUUUUUUUVUUUVWWWVVVVWWWVSTTTTTTTVVUUUUVVUUUUUUUUUUUUUVVVVVVVVVVVWWWWWVVUTUVVVVWWYYYYYYXXWWWWWXXXYZZZ[[[[\\\\\\\\\\\\\\]^____^^__``__^^`aaaaabbbbaabbbbbabbccdddeddeeeeeefggfeeefffffffggfffggghhiiiiiijjjjjjjkkkkkkkllllllmmmmmnoooonnnoppppppppppppppppppqppppqpppqqqqqqqqqrrrrssstttttttuuuuvvvvvvwwwxvvvvvvvvRRRRQQPPPQQQQRRRRRRRRRSSRRRRRRRRRRRRRRRRQQQQQQQQRRRSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUTTUUUUTTTUUUVVVVVVVVVUUUUUUUUUUUVVVUUUUUUUUTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUTTTTTTTTTTUUVWWWVVVVWWWWWWWWWWWWWWXXXXWWVVWVVUUTTTSSSSTTTTTTTTTTTTSSSSSSSSTWZMF8BWVUTTSSSSTUVWVUTSSTUVUUTTUUUUUUUVUUUUUUUUVUUUVWWWWVVWWXWVTTTTTTUUVVUUUUVVVVUUUUUUUUUVVVVWVVVVVVVVWWWWWVVUUUVVVVWXYYZZYYXWWWWWWXXXZZZ[[\\\]]]]]]]]]^^^]^^_____^^_````___``aabbbbbbbbbbbbbbcccdddeedeeeffffgghggfgggfffffffggghhiiiiiiiijjjjjjjkkkkkkklllllmmmmmnnnoopoooopppppppppppqqqqppppqppppqpppqqqqqqqqqrrrrssttttttuuuvvvvvvvwwxxwwvvvvvvvvRRRRRQQPQQQQRRSSSSSSSSTTSSSSSSSSSSSSSSSSRRRRRRRRSSSSTTTTSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUTTUUUUTTTUUVVVVVVVVVVVVVVVVVVVVVWVVVVVVVUUUUTTTTTUUUUUUUVVVVVVVVVVVVVVVVUUUUUUUTTTTTTTTTTUVWXWWVVVVWWWWWWWWWWWWWWWXXXWWVWWWVUUTTTTTTSSSSTTTTTUUUTTTTTTTTRSK:KVTLUTTTTTTUTUVWWVUTRTUVUUTTUUUUUUVVUUUUUUUUVUUUVWXXXWWWXWVVUUTTTUUVVVUUUUVVVVVUUUUUVVVVWWWWWWWWWWWWWWWXWWVUUVWWWWXXYYZZYYXWWWWWWXXXZZZ[[\\\]]]]]]]]^_____``__`____``````_``bbbbbbbbdcccccccdddddeeedeeffghhghhhhhhiggffeeeehhhiijjjiiiiijjjjjjjkkkkkkllllmmmmmmnnnnnooooopqpppppppppqqrrqqpppqqpppqpppqqqqqqqqrrrssssttttttvvvvvwwwvvwxxxwwvvvvvvvvQRRRRQPOQQRRRSSSSSRQQQRRSSSSSSSSTTTTTTTTRRRRRRRRSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUVVVVUUVVVVVUUUUTTTTTTUTTTTUUUUUUVVUUVVUUUUVVVVVVVVVVVVVVVVVVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUUUUUVVVVVWYYXWWXXXXXVVXZXXWWXYYXXXXXWWWWXXWWVVUUUTTTSSRRSSSTTTTUTTTTTTTTWTXVUQWLWVUSSSTUUVVWWVVUVUTTTUUUVVVVVVVVVVVVWVUTVVVVVVVVWWWWWWVVVUTSSTUUVVVUUVVVWWWUTTUUVVVVVWWWXXXXXXXXXXWWWVVVVVVVVVVVZYYYXXWWWXYYYYYYZZ[[\\\\^^]]]^^^^^_``abb`__^^^__]_aa`_`abbbbcccccccbbcccccddeeffdeefhijjhhiiiihhhgfffghijjjjjjjjiiiiijjjjjjkkkkkkllllllmllmnooooooppqqpppppppqqqqqqqqqqqqqqqpppoppppqqqqqqqrrrrrssttttuuvvvvvvvvwwwxxxyyyyyyyyyyQRRSRRQPQQRRSSSSSSRQQRRRSSSSSSSSSSSSSSSSSSRRRRSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUVVVVUUVVVVVUUUUUTTTTUUUUUUUUUUUVVVVUVWVVVVVVVWVVVVVVVVVVVVVVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUUUVVVVWWZ\^^][[[XZ\\[[YXYXWXYZZYXXXXXWWWWWWWWVVVVVUUTSSRSSTTTTUUTTTTTTTTTVZYSJXSWVVUUUUUVVVVVVVVUUTUUVVUVVVVVVUUUUUVVVVUVVVVVVVVWWWWWWVVVUTTTTVVVVUUUUVVXXWVUUUVVVVWWWWWWWWWWWWWWWWWWWWVWWVVVVWWYYYYYXWWXYZZZZZZ[[[\\\]]^^^^^^___```aaaaa``_____]_aa```bbbbbccccccccccccdddeeeffeeefgghhhhhhhhhhhgggghhiiiiiiiiiiiiijjjjjkkkkkkkkkllkklllmmnooooooppqqppppppqqqqqqqqqqqrrrqqpppppppppqqqqqrrrrrsssttttuuvvvvvwwwwwwxxyyyyyyyyyyyRRSSSSRRQRRRSSSSSSRRRRRSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUVVVVVVUUVVVVVVVVUUUUUUTTUUUUUUUUUTTUVVVVVVWWWVVVVWWVVVVVVVVVVWWVVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUVVVWWWWWY\_``_``_abbbb_[[YXXYZ[[YXXXXWWWVVVWWWXXXXWVUTTSTTTTTUUUUUUUUUUUQVUWRDXXUUVVVVUUWVVVVVVWUUUVWWWVVVVVVUUUUUUUVVVUWVVVVVVWWWWWWVVVVVUTUVWXVVUUUUVVXXXWVUVVWWWWWWXXVVVVVVVVVWWWWWWWWWWWWWWWYYZZYYXWYZ[[[[[[[[\\\]]]^^^^__``aaaaaa``ba``____^`aba`abbbbccccccccccccceeeeeeeeeeeeefffgggggggggghhhhiiiiiiiiiiiiijjjjkkkkkkkkkkllllllmmmnnooooooppqqppppppqqqqqqqqrrrrrrrqqqpppppppppqqrrrrrsssttttuuuvvvwwwwwwwxxxyyyyyyyyyyyRSSTTTTSRRRRSSSSSSRRRRSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTSSTTTTUUTTTTTTTTTTTTTTTTUUUUUUUUTUUUVVVWWVVUUVVWVVVVVUUUUUUUUUUUVVVUUUTTUVWVVVVWXWWVVWXXVVVVVVVVWWWWWVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUWWWWXXXXXZ\]]]^_acccehgd`\YYZZZ[YYYXXXXWVVVWWXXXXXWWVUUTTTTUUUUUUUUUUUUUSVOTTG[[UUUUUUUUWWVVVVWWUUUVXXWVVVVVVVUUUTTUVWVVWWVVVVWWWWWWWWVVWVUUUWXYWVVVVVVWYYYXWVVWXXXXXXXXWWWWWWWWVVVWWXXXXXXXXXXXYYZZZYXXZZ[\[[[\[[[\\]]]^^___`aabbbaaaa`aaa`````_`abaaabbbccccdddccccccdeeeeeeeeffeeeeeeggggggggghhiiiiiiiiiiiiijjjjjkkkkkklllllmnnnnnnomnnoooooooppqqpppppqqqqrqqrrrrrsssrrrqqqppppppqqrrrrrssstttttuuuvvwwwwwwxxxxyyyyyyyyyyyySSSTTTTTRRRSSSSSSSRRRSSTSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVWWWVVVVVVWVVVVVUUUUUVVVVUUVVVVUUUTVVWWVVWWXXWWWWXXVVVVVVVVWWWWWVVUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWXXXXYZ[ZYXY\_^_achklkf`[ZZYYZYYYYXXXXWWWWWWWXWWWVVVUUTTUUUUVVUUUUUUUUVXQTXM^bXWUTTTUUXWWVVWWXVUVWXXWVUVVWWVVUUUUUVWWVWWWWWWWWXXXXWWVVWWVVVWYYXXWWWWXXYYZYXWWWXXXXXXXXWWWWWWWWVVWWXYYYYYYXXYYYYZZZZZYYYZ[[[[[\[[[\\\]]^^_```aabbbbaaaa```aaaaa``bbbbbbcccccdddddccccddffefffffffffffffggffffgghhijjjiijjjjjjjjjjjkkkklllllllllnnoonnoonnooooooooppqqpppqqqqrrrrrrrrssstsssrrrqppqqqqqqrrrrssssttttuuuuvvwwwxxxxxxyyyyyyyyyyyyyTTTTTTTURRSSSSSSSSSRRSTTSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVWWWWVVVVWWVVVVVUUUUVVVVVVUVVVVVUUUVWWWWVWXXXXXWWWWWWWWWWWWWWWWWVVUVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWVVVVVVVVWWXXXXXYZYYXY]cgddfkmkijme^[[YYZYYYYYXXXXXXXWWWWVVVVVVVVUUUUVVVVVVVVVVVVTXWSVO[dZYWVUVWWWWWWWWWWVVVVWWWVUVWXXXWVVVUUVWWWWWWWWWWWXXXXXXWWXWVVVWXYYXXWWXXYYZZYXWWWXXXXXXXXXXXXXXXXWWXXYYZZZZYYYYZZZZZZZZZZYZ[[[[[\[[\\\]]]^__`````aaaaaaaa__`aabbb`abbcccbccccddddddddddddeeffffffffffggggggffffgghiijjjjjkkkkkkkkkkkkllllmmllllllmnnnnmnoooooooooooppqqppqqqqrrrrrrrssstttttsssrrqqqqqrrrrrrsssstttttuuuuvwwwxxxxxyyyyyyyyyyyyyyyTTTTTTTTSSSSSSSSSSSRSSTTSSSSSSSSTTTTTTTTTTSSSSTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVWWWWVVVVVVVUUUVVVWWVVVVVVVVVVVVWWWWWWXWWXXXWVVWWWWWWWWWWWWWVVUVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWVVVVVVVVWWWXXXXXWWXX\bjooklrqjhmri`^][Z\ZYYYYXXXXXXXXWWWWWWWVVVVUUUVVVVVVVVVVVVVUUYOUSU_ZYXWWWXXWWWXXWWWWWVVVWVVUVWXYYXXXWVVWWWWXWWWWWWXYYYYYXXXYXWVVWXYYXXWWXXYYZZZXWWWXXXXXXXWXXXXXXXXXXYYYZZZZZZZZZZZ[ZZYZZ[\ZZ[\[[[\\\]]^^^^^__```__aaaaaaaa__`aaaaaaabccdcccccdddddddddddddeeeffgggfffffggghggffgghiijjjjjjkkkkkkkkkkklllllmmmmmlllmnnnmmnooopppoooooppqqppqqqqrrrrrrrsstttutttssssrrrrrrrrrrsssstttttuuuuuvwwwxxyyyyyyyyyyyyyyyyyyTTTTTTTTSSSSSSSSSSSSSSTTSSSSSSSSTTTTTTTTTTTTTTTTUUUUUUUUTTTTTTTTTTTUUUUVUUUUUUUUUUUUUUUUVVVVVVVVWWVVVVVVVVWWWWVVVVVVVUUUVVWWWWVVVVVVVVVVVWWWWWWXVWXXXWVUXXXXXXXXXXXWWVVUVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWVVVVVVVVWWWXXXXXZYYY\binpjjqqjluukb_^\\^ZZYYYYXXXXXXXXXXXXXWWVVUUUVVVVVWVVVVVVVVZUXKX\SYWWWXWWWWWWXXXXWWXWVUVVVVUVWYZZYYYXWVWWWWXXWWWWXXYYZYYYXXYXWVVWXXXXWWWWXXYYZZYWWWXXXXXWWWXXXXXXXXYYYZZZ[[[ZZZZZZ[[ZZYYZ[\Z[\\\[\\]]^^_______``___baaaa```_``aaa``aabcddcccccddddeeddddddeeeeffgggffffffffhggffgghijjjjkkkkkkkkkkkkkllllmmmmmmmmllnnoonnoooppppoooooppqqppqqqrrrrrrrrsstttuutttsssrrrrssssrssssttttttuuuuuwwwwxxyyyyyyyyyyyyyyyyyyTTTTTTTTSSSSSSSSTTTTTTSSSSSSSSSSTTTTUUUUUUUUUUUUSSSSSSSSTTTTTTTTUUUUUUUUUUUUUUVVWVVVVVVVVVVVVVVVWWWWWWWWVVVVVWWWVVVVVVVVWWVVVWWXVVVVVWWXVVVWWXXXVVWWWWVVVVVWWXXXXXXWUUWYWWWWWWXXWWWWWWWWWWVVVVWWXXXXXXXXWWWWWWWWWXXXYYYYXXY\bipsupmmkgimpqolfa]\[ZYXXXXXXXXXXXXXXXXXWWWWVVVVWWWWWWWWWWWWUTS_YWRX[ZXWWWWVXXXXXXXXVVVVUUVWWWWWWWXYWWWWWXZ\YZZYYXXYZZZYYZZZZYYXXXXX[ZYXXYZ[[ZZYYYXWZZZZYYXXZYXWWWXYYZZZZZZYZZZZZZZZ[[[[[[[[Z[\\\\]]______________^^a``````__``aaa``abbcccccbccccddddddddeeecdefeeeeeeefffffiihhhhiijjjjiijkikllkjjkkkllmmmnnmmllmmnooooooooonoopppooopqqqqqrrrrrrsssssttuuvvvvuutttsssssssssssssttttuuuvwwwyyxxxyzzyyyyyyyyzzzzzzzzTTTTTTTTSSSSSSSSTTTTTTSSSSSSSSSSTTTUUUUUUUUUUUUUTTTTTTTTTTTTTTTTUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVWWWWWWWWVVVVVWWWVVWWWWWWWWWWWWWWVVVVVWWXVVWWXXXYWWXXXXWWVWWXXXXXXXXVVVWXXXXXXXXXWWWWWWWWWVVVVWWXXXXXXXXXVVWWWXXXXXYYZZZ[ZZ[^cinpqjfghffiijkjfb^\Z[[ZZYXXXXXXXXXXXXXXWWWWVVVWWWWWWWWWWWWWWXZXXWZZZYXWXXWWYXXXXXXXWWWVVVWXXXXXWVWXWWWVVWY[Z[[ZYYYY[[ZZZZ[[[ZYXXXXXZZYXXYZ[[ZZZZYXWYYYYYYYXZYXWWXYYZZZZZZZZ[ZZZZZZ[[[[[[[[[Z[\]]]]^________________aa``aa`````aaa``bbbccccccccccdddddddeeeedeffffffffggggggiihhhhiiijjjiijkjkkkkjkllllmmnnnmmlllmnoooooooooonnopppooopqqqqqrrrssssssssttuuuvvuuutttssssssssssssssttttuuvvwwxxxyyyzzzzzzzzzzzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSSSSSSSSSTTTUUUUVUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWVVVVVVVWWWXXXXXXXWWWVVVVWWWWWWWXXXYYYXYYYYYYXWXXXYYYXYXWVWXXXXXXXXXXXWWWWWWWWVVVVWWXXXXXXXXXXVVWWXXYYYYYZ[[\\[[]`dgjkld^`ccbbabdffc_\Y[]\[YXXYYYYYYYYXXXXWWWWWWWWWWWWWWWWWWWWXVZSVOVXZYXXXXXXZYYXXXXYXXXWVWXXYYYXWVVWWWWWWXY[[[[[ZZZZ\[[[[[[\[[ZYYXXXZYYYYZ[\[[ZZZZYXXYYYYYYYYYXXXXYZ[ZZZZZZ[[[ZZZZ[[ZZ[[[[[[[\]^^^^_________________baaaaaa```aaaaaabbbcccccccccddddddddeeeedfgggffghhhhhhhhiiihhiiiijjjiijkkkkkkklmnnnnnnnnmmmmmnopoooooooonnnoppooooppqqqqrsssssssrssstuuuuuuuttttssssssssssssssttttuuvvwwxxxyzzzzzzzzzzzzzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSSSSSSSSSTTTTTUUUUUUUUUUUVVVVVVVVUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWVVVVVVVWWXXXXXXXXWVVVVWXXWWWXXXXYYYYYYYYYYYYXYYYYYYYYWVWXYXWXXXXXXXXWWWWWWWWVVVWWXXYXXXXXXXXWWWXXYYYYYZZ[[\\Z[]`bdedga[\^^]\]]^abb`^Z[]\ZYYYYYYYYYYYYYXXXXXWXXXXXXXXWWWWWWWWXPTYYMR]ZYXXYYYXZYXXXXXYYYYXWXXYZZYXWVWWYYYYYYZ\[[\[ZZZ[\\\[[\\\\[[ZZYYYZYYYZ[\\[[ZZ[ZYYXXYYZZZYYYXXXYZZ[[ZZZZ[[[[[ZZ[[[ZZ[[[[[\\]^^^__`_____________```baaabbbaaaaabbbbbbcccdcccccddddddddeeeefefghhgghhhhhiiiiiiiiiiiiiijiiijklkkkklmnoooooooommmmnoopnnnnnnnnnnnoppoooooppqqqrrrrssssrssstuuuuuuuttttsssssssstssssttttuuvvwwwwxyzzzzzzzzzzzzzzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSSSSSSSSSSSSTTTTTUUUUUUUUUUUUUUUUVVVVVVVVUUUUUUUUUUUVVVVVVVVVVWWWVVVVVVVVWWWWWWWWWWWWVVVVUUVVWWXXXXXXXWVVVWWXXXWWYYYYYYYYYYYXXYYYYYYYYYYXXWWXYZYWXXXXWWWWWWWWWWWWVVWWXXXYXXXXXXXXXXXXYYYYYYYZZ[[[Z[]`bcbab_\\\\\\^]\\^_`_[\[ZXXYZZZZZZZZZZZYYYYXXXXXXXXXXWWWWWWWWXSS_ZWYgZYYYZZYXYYXXXXYYZZZYYYYZZZZYXXXYZZZZZZ[\[[\[[Z[[\\[[[[\\\\\[[ZZZZZZZZ[\][[[[[[ZZXYYZZZZZYYYYYZ[[\[[ZZ[[\[[[[[[[[[[[[[\\\]^_____`____________````baaabcbbbbbbbbbbbcccddddccddddeedeeeeffffghhhhhhhhhhiiiiiiiiiiiiiijjijkkmlkklmnnooooooooonnnnoooooooooooonnopppooopppqqqrrrrrrsssssttuuuuuuuutttttttttttttttttuuuuvvwwxxwxyzzzzzyyyyyyyyzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSTTTTTTTTSSSTTTTTVVVVVVVVUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWVVVVVUUVVVWWWXXXXWWWVWWXXXXWWYYYYYYYYXXXWWXXXYYYYXXXWWWXYYZYXXXXWWWWWWWWWWWWWVWWXXXXXXXXXXXXXYYYYYYYYYYYZZZZZ[\]`bba`]]]]\\]^_][Z[]^_]\ZXXXZ[[[[[[[[[[[ZZZZYYYYYYYYXXWWWWWWWWX[WYRZ[aZZZZZZYXXYYYYYZZ[[[ZYYZ[ZZZZYYZZZZZZYYZ[[[\[[[[\\\\\\\\\\\\\\\[[ZZZ[[\\\\[[[\\[ZZZZZ[ZZZYYYZZ[[\\\[[[[\\\[[[[[[\[[[\\]]]^^_________________```aaaaaabccbbbbbbbccbccddddddddddeeeeeeefffffghihhhhhhhhiiiiiijjjjiiijjjjjklnmllmmnmnnoooooooooooooooooooooooooppqppppppqqrrrrrrssssssttuuvvvvvuuuuuuuuuuuuuuuuuuuuvvvvwwxxyxyyzzzzzyyyyyyyyzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSTTTTTTTTTTTTUUUUVVVVVVVVUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWVVVVVWWWVVVWWWWWWWWWWWWWXXXWWXXXYYYYYXXXXXWWWWXXXXXXXWWVVWYZZYYYYXXXXWWWWWWWWWWWWWXXXXXXXXXXXXXXYYYYZZZZZZZZZZYY[\]_```_[\]]]]]]]]\[Z[]^_\YXYZ[Z\\\\\\\\\\[[[[ZZYYYYYYYYXXXXXXXXX\YTPUWW[[ZZZZYXXYZ[[[[[\\\[ZZ[\[[[[ZZ[[Z[[[ZZZ[[\\\\[\\]]]\\]]][\\]]]\\[[[[[\\\\[[\\\\[[[[[[[ZZYYZZ[[\\[\\\\\\[\\[[[[\\\\\\]]^^^_``_^^_________^__``aaaaa`abccbccbbbccdcccdddddddddeeeeeeeefffffghihhhhiiiiiiiiiijjjjiijjkkkklmnmmmnnmlmmnnnooooooppppppppppppppoppqqqpqqqqqrsssssstttttttuuvvwvvvvvvvvvvvvvvvvvvuuuvvvwwwxxyyyzzyyyyzzzzzzzzzzzzzzzzzzTTTTTTTTTTTTTTTTTTTTTTSSTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVVVUUUUUUUUUUUUUUUUVVVVVVVVVVVWWWWWXXXXXXXXWWWWWWWWVVVVVWWWWWWWWWWWWWVVVWWXYXXWWWXXYYYYXXXXXXWWWWXXXXXWWVVVUWZZZYYYYYYXXXXWWWWWWWWWWXXXXXXWXXXXXXXXXYYYZZ[[[[[ZZZYY[[\]^^^]\]]^^]\ZZ\]\[[[\`]YXZ\[Z\\\\\\\\\\\\[[[[ZZZYYYYYYYYYYYYYYVZW[UYY\[[[[ZXWXY[\]]\\]]\\[[[\\\\\[[[\[\\\[[[\\]]]\\]]^^]]]]^^[[\]]]]][\\\\\\\\[[\]]\[\\\\[[[ZYYZZ[\\\[\\]]\\[\\[[[[\\\\\]]^^^_```_^^^________^__``aaba``abccbcccbbcddccddddddddddeeeeeeeffffffghihhhhiiiiijjjijjkkjjijkkkkklmonmmnnmllmmmnnoonooppqqqpppppppppppqqrqqqqqqqrstttttuuuuttuuvvwwwvvvvvvvvvvvvvvvvvvvvvwwwwxxyyzz{zzyyyyzzzzzzzzzzzzzzzzzUUUUUUUUTTTTTTTTTTTTTTTTSTTTTUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWXWWWWWXXWVVUUVVWWWWWWWXXXXWVVWXXYXXXXXYYZZZYYXXXWWWWXXXXYYXWVVVVYYYYXXXYYYXWWXXXXXXXXXXXXXXXXXXXXXWXYYYYYYYYZZ[\[[[[[[[[[[\]]]]]\\\\\\\\\\\\[[]_^\\]\ZZ]\\\\\]]]\\\\\\\\[ZZZZZZZZZZZZYYYX][`TVXX\[ZZ[\[ZYZ[\\\\\]]]\\[\]\]^^]\[[[[\\\[[[\\\\\\\\\]___^]]]]]^^^\[\\[Z[\\\Z[[\\\\\\\\\\\\\[[[[[\\\[[\\\\]]]\\\[\\\\]]]]]]^_`````__________^^_`abbc``aabcccccccbcccddddefeedeeeefffffffffffggghhhhhhhiiiijjkkkjjjjjllllllmmlmnnmmmnmmnnnnoooooppqqqppppoopqppppqqqqqqqqqrrruutttuuvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwxxxyyyyzz{{zyyzzzzzzzzzzz{{{{{{{{UUUUUUUUTTTTTTTTTTTTTTTTTTTUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWXXWWWWWXWWVVVVVWWWWWWWWWXWWVVWXXYXXXXXYYZZZYYXXXXXXXXXXXYYXWVVVVYZZZYXYYYYXXXXXYYYYYYYYYXXXXXXXXXXXXYYYYYYYYYZ[\[[[[[[[[Z[[\\\]]\\\\\\\\\\\[[[\^]\\]\Z[]\\\\]]]]\\\\\\\\\\[[[[[[[[[[[[ZZ][YXYZ]W\[[[\\\[YZ[\\\\\]]]]\\\]\]^^]\[\\\\\\\\[]]]]]]]]]^__^^]]]]]^__^\]\[[\\\\[[[\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]\\\\\\\]^^^^^___``````___________`aabb``aabcccccccccccddddefeeeeeeefffffffgggggghhhhiihhhiiiijjjjjjjkklllllllllmnnnnnonnnooooooooppppqppppoopqqqqqqrrrqqrrrrssuuuttuvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwxxxyyyyzz{zzzzzzzzzzzzzzz{{{{{{{{UUUUUUUUTTTTTTTTTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWXXWWWWWWXXWWWWWWWWWWWWWWWWVVWWXYYXXXXYYYZZYYYXXXXXYYYYYYYYXWWVWWZZZZYYYZYYYXXXYYYYYYYYYYXXXXXXXXYXXYYZZYYYYYYZ[[[[[[[[[[ZZZ[[\\\]]]]]]]]\\\[[[[\\\\\\[[]\\]]]]]]]]]]]]]]]]]\\\\\\\\\\\\\YX]STNVX]\\\]]\\ZZ[\]]\\]]^^^]]]\]^^]\\\]]]]]]\\]]]]]]]]^^__^^^_^^^_``_^]\\\]]\\\\\\\\\\\\]]]]\\]]]]]]]]]]]]^^^^^]]]]]]]]]^^^__`___``aaa```````````aaabbaaabbcccccccccddedddefeeeeeffffffffghhhihhhhhiiihhhiiiijjjjjkkkklllllllllmnoooopooooooooppppppppopppopqrqqqrrrrsrrrrssssuutttuuvvvvvvvvvwwwwwwwwxxxxxxxxwwwwwxxxyyyyyzzzzzz{zzzzzzzzz{{{{{{{{{{{UUUUUUUUTTTTTTTTTTTTTTTTUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWXXXWWWWWXXXXXXXYWWWWWWVVVVVVWXXYXXXYYYYZZYYYYYYXXXYYYYYYYYXWWWWWYZZZYYYZYYYYXXXXYYYYYYYYYYYYYYYYYYYYZZZZZYYYYZ[[[[[[[[[[[[ZZ[[\]]]]]]]]]\]]\\[[[[\\\[[\]]]]]]]]]]]]]]]]]^^]]\\\\\\\\]]]]^^e^ZRYg]\\\]]\\ZZ[\]]]]]^__^^]]]]^]]\]]]]]^]]]\]]]]]]]]___^^^_`___````_]]]]^^]]]]\\\\\\\\]]]]\\]]]]]]]]]]]^^^^__^^^^^^^^_____```````aabaaaaaaaaaaaaaabbaabbbcccddddddddedddeffefffffgggggghhiiihhhiiiijhiiiiijjjkkkkkkklllllllllmnoooppppoooonnppppppoooopppprsqqrrrrrsrrrsssssutttttuuvvvvvvvvwwwwwwwwxxxxxxxxxxxxxxxxyyyyzzzzzz{{{zzyzzzz{{{{{{{{{{{{UUUUUUUUTTTTTTTTUUUUUUUUTTUUUUVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWVVVVVVVVVVVVWWWWWWWWWWWWXXXXXXXXXXXXXWWWWXXYYYZZXXXWWWVVVVVWWXXYXXYYYZZZYYYYYYYYXXXXYYYYYYXWWWXXYYZYYYYZYYYYXXXXYYYYYYYYYYYYYYZZZYYZZ[[ZZZZYZZ[[[[[[[[[[[[[[[\]^]]]]]]]]^^^^^]\\[\\\[\]]]]]]]]^^]]]]]]]]^]]]\\]]\\\\]]]]]YY]ZYUh\\\\\\\\Z[\]]]]]]^___^]]]]^]]]]^]]]]]]]\^^^^^^^^____^_`aa```aa`__^]]^^^^^^]]\]]]\]]]]]]\]]]]]]]]]]]^^^^_``______``a`___`aa````aabbbbbbbbbbbbbbbbbbbbccccdddeeeeeeddeeffefffggggghhhhhhiiiiiijjjjiiijjjjjlkkkkkkklllllllllmnoooopoooonnnnqqpppooooppppprsqqqqrrrrrrrrrssstssssstuvvvvvvvvwwwwwwwwwwwwwwwwxxxxxxxxyyyzzzzzzz{{{{zyzzz{{{||{{{{{{{{UUUUUUUUTTTTTTTTUUUUUUUUTUUUUVVVUUUUUUUUUUVVVVWWWWWWWWWWWWWWWWWWVVVVVVVVVVVVWWWWWWWWWWWWXXXXXXXXXXXYYXXXWXXYYZZYYYXXWWWVVVWWWXXXXXYZZZZZYYYYYYYYXXXXYYYYYYXXXXXYXYYYYYYZYYYYYYXXYYYYYYYYYYYYZZZZZZZZ[[[[[[ZZZ[\\[[[[[[[[\\\\\]]^^^^^^^^^^^^_`_^]\]]\[\]]]]]]^^^^^^^^^^^^]]]\]]]]\\\\]]]]`^\\X[Qe\]]]\\\]Z[\]]]]]]^__^^]^]]^]]]^_]^^^^]]]^^^^^^^^_``___`bba``aa`_`_^^^_____^]]]]]]]]]]]]]^^^]]]]]]^^^^___aa``````aaaa____baaaaaaabbbbbbbbccbbbbbbccccccccddefffeeeddefffeggggghhhhhhhhhhhiijjjjjkjjjjkkkkllllkkkklllmmmmnmnoonnooooonnnnnppppppooppppppqsqqqqrrrrrrrrrssstsssssttvvvvvvvvwwwwwwwwwwwwwwwwxxxxxxxxyyzzzz{{{{{{{zzzzz{{{|||{{{{{{{{UUUUUUUUTTTTTTTTUUUUUUUUUUUVVVVVUUUUUUUUUUVVVVWWWWWWWWWWWWWWWWWWVVVVVVVVVVVWWWWWWWWWWWWWXXXXXXXXXXYYZZYYXYYYYYYXZZYYXWWWVWWWWWWWXXYZZZZZYYYYYYYYYYYYYYYYYYXXXXYYYYZZYYZZZZZ[ZZYYYYYYYYYYYYYZZZ[[[ZZZ[\\[\\[[[[\]\\\\\\\\[\\]]]]]^^^^^^^^^^_`a`_]]__]\]]]]]^^^^^^^^^^^^^^\\\]]^^_]]]]]]]]Z\[LMTJV\]^^]\]^[[\]^^]]^^_^^]]^]]^]]]_`^^__^^^^^^^^^^^^_`````abaa``aaa`a`^]^_```_^]]]]^^^]\\]^^__^^^^^^^^____``baaaaaaa`aa`____aaaaaaaabbbbbbbbccccccccccccccccddeffffeeedeffffggghhhhhhhhhhiiijjjjjkkkkkkkklllllllllkkkllmnnoonopoonoooooooooopppppppppqqpppqrrrrrrsssrrsssstttttsttuuvvvvvvvvxxxxxxxxxxxxxxxxyyyxxxxxzzzzz{{{{{{zzzz{zz{{||||{{{{{{{{UUUUUUUUTTTTTTTTUUUUUUUUVVVVWWWWUUUUUUUUUUVVVVWWWWWWWWWWWWWWWWWWVVVVVVVVVVWWWWWWWWWWWWWWXXXXXXXXXXYZZZZZYZZZYXWWZZZYXXWWWWWWWWWWXXYZ[[ZZXXYYYYZZYYZZZZZZYYXXXXYYYZZZZZZ[[[[\\[ZZZZZZZZZZYYZZZ[[[[ZZ[\\\[\\\[[\\]\\\\\\\\[[\]]]]]^^^^^^^^]]^`aa_]^`_]\]^\]^^^^^^^^^^^^^^^\\]]^__`^^^]]]]]_]Y@S`TQ]^_^]]^_[[\]^^^]^^^^]]]^]]^]]^_`_______^_________`````aba```aaaab`^]^_`a``^]]]]^_^]\\]^________^___`````bbaaaaaa``a``_````abbbbbbbbbbbbbccccccccccccccccdeffggfeeedeffffggghhhhiggghhiiijjjjkkkkkkklllllkkklllllkllmnoppopppooooooooppppppppppppqqqpppqrrrrsssstssssttttuutttuvvvvvvvvvvxxxxxxxxyyyyyyyyyyyyxxxxzzzzz{{{|{{zzz{{zz{{|||}{{{{{{{{VVVVVUUUUUUUUUUUUUUUUUUUVVVVVVVWWWVVWWXYXXXXXXXXWWWWWWWWUVVVVVVVVVVWWWWWWWWWWWWWWWXXXYYYYYYYYYYYYYYYYYYYZZZZZZYYZZYZ[[YXXYZZYXYZYYZ[[ZYYZZZ[[\\\Z[[[[ZYXYYYYYYYYZZ[[[[[ZZZYXXYZ[[[[ZZZYY[[[[[[[[\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]^_``_^^^]]]]]]]]^^_^^^]]\]]^^_``^^^_`aaab`_]\\]]__``]^ky�mb``]J^^_a`^\]\[\^_^^`___``___]^^^_^^^^`a__``]^``_^__^^_abaa```aabbaa`abcbbaaa`__^^_ab_`_^\\^`___``aaa_``aaaa`aa`___abbbbaaaaaaaaaaaaabbbbccccdddddddddddddddddeeeefffeeefghhghhhiiiiiiiiiijjjkkkkkkkkkkkkkkkkkklmnnnnmmlmmmnnmmmmmmmmmnnooppppppqqqqqqrrrrrrrsssssssssssssssstttttuuustuvwwwwxxxyzzyxyyyzzzzzzzyyyyzzyyyyyyyyz{{{{|}~}||||}}}}}}}}}}}VVVVVVUUUUUUUUUUUUUUUUUUVVVVVVVWWWVVWXXYXXXXXXXXWWWWWWWWVVVVVVVVVVWWWWWXWWWWWWWWXXXXYYYYYYYYYYYYYYYYYYYYZZZZZZYYZYYZZZYXYZZZYXXYXYZZ[ZZYZZZ[[[[\[[[[ZZYXYYYZZZYYZZ[[[[[Z[ZZYYZ[[\[[[[ZZZ[[[[[[[[\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]^_``_^]^]]]]]]]]^^^^^^]][[\]^__`aaabdeeecca`___^adimmpz�����m]]``^^__^]]]\\^_^_`^__``__^^^^^^^^^^``__`_]^`a__``_^_abaa```aabbaa`abbbbaab`__^^_`a`a`_]]_a```aaabb``aabaaaaaa`__abbbbaaaabaaaaaaaabbbbccccddddddddddddddddeeeeffffffffghhhhhiiiiijiiiiijjjkkkkkkkkllllllllkllmnnnnmmmmmmnnmmmmmmmmnnnooppqppqqqqqqrrrrrrrrsssssssssssssssssttttuuuttuvvwwwxxxxyyyxyyyyzzzzzzyyyyzzyyyyyyyy{{||{|}~}}|||}}}}}}}}}}}VVVVVVVVUUUUUUUUUUUUUUUUVVVVVVVWWWWWWXYYXXXXXXXXWWWWWWWWWWWWVVVVVWWWWWXXXXWWWWXXXXXXYYYYYYYYYYYYYYYYYYYYZZZZZZYYYYYYZYYXZ[[ZYXXXXYYYZZZZ[[[[[[[[[[[ZZZYYXYZ[[ZZYZZZ[[[ZZ[[ZZZ[\\\\\[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\]]]]]]^^^^^^^^^^__^]]]]]]]]]]]^^^^^^]][[\]^_``aaabcdedcccba`_^^gw���������uebkhc___^]^]\\^_^_`^__``__^^^^^^___^``__`_^]_a``a`_^`abaaaaaaabbaaaaabbbabba`_^^__`abb`__`a`aaabbbbaaabbbaaaaa```abbbbaaabbaaaaabbbbbcccccdddddddeeddddeeeeeeeffffgggfghiiihiiiiijjiiijjjjjkkkkkkkkllllllllkllmnnnnmmmmmnnonnnnnnnnnnooppqqqqqqqqqqrrrrrrrrsssssssssssssssssttttuuuuuuuvvwwxxwwxyxxxxyyyyzzzzyyyyzzzzzzzzzz{|||{{|}}}||}}}}}}}}}}}}WWVVVVVVUUUUUUUUUUUUUUUUVVVVVVVWWWWWXXYYXXXXXXXXWWWWWWWWXXWWWVVVWWWWWXXXYXWWWWXYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZYYYYYYYYXX[[[ZYYXYYYYYYZ[[[[[[[[ZZ\[ZYYYZZYZ[\[[ZYZZZZZZZZ[[[[[[\\\\\[[[[[\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]^^^^^^^^]^__^]]]]]]]]]]]]]^^^^^]^^_``abb`__`abaaaaaa````dhqz�������zomgkqjc``_^^^\\^^^_a__````___________`````_^]_aaaba`_`abaaaaaabbbbaa`abbbabbaa`___``abba```aaaaabbbbaaaaaaaaaaaa``abbbbaaabbaabbbbbbcccccdddddddeeeedeeeeeffffffgggghgghhiihiiiiijjjiijjjjkkkkkkkkkkllllllllllmmnnnnnnnnnnoooooooooooooppqqqqqqqqqrrrrrrrssssssssssssssssssstttttuuuvuuuuvwwxwwwwxxxxxxyyyyyzzyyyyzzzzzzzzzz{||{{{{|}}}}}}}~}}}}}}}}WWWWVVVVUUUUUUUUVVVVVVVVVVVVVVVWWWWXXYYYXXXXXXXXXXXXXXXXYXXWWWWWWWWWXXXXYXXWWXXYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZYYYYYXXXXXZZZZZZZZYYYYYZ[\\\[[[ZZZ[[ZYYZZ[[[\\\[YXZZZZZ[[[[[[[[[\\[[[[[[[[\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^]^__^]]]\\\\\\\\]]^^^^^^aaaabbbbaa``aa`____`bdghjkmquxz|{{�pdffpslea`_^^_]]^^^_a_``aa``_``___``````aa`__]`baabba_`abaaabbbbbbbbbaabbbabbbbaa``aaabbba```aaaaaaaaaaa```aaabbbaaabbbaaabbbbbbbbbbccccdddddddeeeeeeeeeeffffggggghhhhgghhiihiiiijjjjjjjjkkkklllllllllllllllllmmnnnnmnnnnnoopppppppppppppqqqqqqqqrrrrrrrsssssssssssssttttttttttuuuuuvwvvvvvwxxwwwwxxxxxxyyyyyzzyyyyzzzzzzzzzz{{{{zz{{}}}}}}}~}}}}}}}}WWWWWWVVVVVVVVVVVVVVVVVVVVVVVVVWWWXXYYYYYYYYYYYYYYYYYYYYYYXWWWWXWWWXXXXYYYXXXXYYZZZZYYYYYYYYYYYYYYYYYYYYZZZZZZYYYYYYXXXYZYYZ[[[[ZZYYZ[[\[[[[[[ZZ[ZZYYZ[[\]]]\[YX[[[[[[\\[[[\\\\\[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^__^]]]]]]]]]]]]^^_____aaaaaaaaba````_^__`beilomnqtutpnmjlg`^cpkfba`_^^_^]^^^_a``aaaa``aa```aaba`abb```^abbbbba`abbaabbbbbbbbbbabbbbaaabbbbabbbaabbba`_aaaaaaaaaa``````abbbbabbbbaaabbbbbbcccccccddddeeeeeeeeeeeefffgggggghhhhiihhhiihhiiijjjjkjjjkkkklllllllllmmmmmmmmmmnnnnmmooooooppppppppppppqqqqrrqqrrrrrrssssssssssssssssttttttttuuuvvvvvwwwwwwwxxxwwxyxxxxyyyyzzzzyyyyzzzzzzzzzzz{{{zz{|}}}}}}~~}}}}}}}}WWWWWWWWVVVVVVVVVVVVVVVVVVVVVVVWWWXYYYYYYYYYYYYYZZZZZZZZYXXWWXXYWWXXXXYYYXXXXXXYZZZZYYYYYYYYYYYYYYYYYYYYZZZZZZYYYZZYXXYZZYYZ[\\[[[[[[[[\[[[[[[[[ZZZYZZ[[]]]\\[ZZ\\[[[\\]\\\]]]]\\\\\\\\\]]]]]]]]]]]]]]]]^^^^^^^^]]^^^^^^^^^^^^^^^_``_^]^^^^^^^^^^^__`````aaaaaaaba```__^aabcehjknkheegijeb[]__ccb`_``_^_`^]^^^`baabbbbaabbaaabbcbaacca`a_bcbabcb`abbaabccbbbbbbcbbcbaaaabbbbbbbbaabbcb`_bbbbbaaabaa```aaabccbbbbbaaaabbbccccccccddddeeeeeeeeeeefefffggghhhhhhiiijiiiijiiiijjjjkkjkkkklllllllllllmmmmmmmmmnnnnnmmoooooppqqqqqqqqqqqqqrrrrrrrrrrrrssssssssttttttttttttttttvvvvwwwwxxxxxxxxxxxxyyyxyyyyzzzzzzyyyyzz{{{{{{{{z{{{{{|}}}}}}}~~}}}}}}}}WWWWWWWWVVVVVVVVVVVVVVVVVVVVVVVWWWXYYYYYYYYYYYYYZZZZZZZZYXXWWXYZWXXXXYYYXXYYYYXX[[ZZZYYYYYYYYYYYYYYYYYYYZZZZZZYYYZZYXXYZZYYZ[\\[[[[[[[[[[[[[[[[[ZZZZZZ[[]]]\\[[[\\\[[\]]\]]]^]]]\\\\\\\]]]]]]]]]]]]]]]]]^^^^^^^^]^^^^^^^^^^^^^^^^_``_^^^^^^^^^^^^^_``````aaabbbbbbaaaa``bbbccccceffffedcai`\]`eW]]_aa_^^`^]^^^`babbccbbacbbaabcdcabdcaab`bcaabcb`abbaabcccbbbbccbccca```aaaabbbbbbbccba_ccccbbbbbbaaaaababccbbbbaaaaabbbccccccccddddeeeeeeeeeffffffggghhhhhhiiiikjijjjjiijjjjkkkkkkkklllllllllllmmmmmmmmnnnnnnmmoooopppqqqqqqqqqqqqrrrrrrrrrrrrrssssssstttttttttttttttttvvwwwwxxxxyyyyxxxxxyzzyxyyyzzzzzzzyyyyzz{{{{{{{{{{|||}~}}}}}}~~}}}}}}}}XXXWWWWWWWWWWWWWWWWWWWVVVVVVVWWWWXXYYYYYYYZZZZZYZZZZZZZZYXWWXXXXXXXXXXXXZZZZYYYXZZZZZZYYYYYYYYYYYYYYYYYYZZYYYYZZZZYYYY[\[[ZZZZ[[\\\\\\\\ZZZ[[[\\[ZZZZ[\\]]]]\\\[\\\\\\\\]]]]]]]]\\\\]]]]\\\\\\\\^^^^^^^^^^^^^^^^^^^^^^^^________^^^^^^^^]]]^^^^^____````aaaa``abba`___``cddeeddcbcccddccdh^_d``Xd]a`^_]^```__```bbbcccccbccccbccdccccccbaacdddcccaacdcccccbccccbbccdcb`_aaaaabbcbbcdcba`acdbbccbcbaabbbbbbccccbbbbbbccccdddddddddddddeeefffffffffffgghhhhhhhiiiiiiiiiiijjjjjjkkkkkklllllllllmmmmmnnnnnnnnnnnoooooooooppqpqqrrrqqqqrrrrrrrrrrrrrrrrrrsssstttttttuttttuuuuvwwwwxxxxxxxyyyyyyyyyyyyyyyzzzzzzzzzzzzz{{{{{{{|}}}}}}}}}}}}}}}}}}}}}}}}XXXXXWWWWWWWWWWWWWWWWWVVVVVVWWWWWXXYYYYYYYZZZZZYZZZZZZZZYXWWXXXXXXXXXXXXZZZYYYYYZZZZZZZZYYYYYYYYYYYYYYYY[[ZZYYZZ[ZYYZ\\[[[[ZZ[[[\\\\\\\\ZZ[[[[\\[[[[[[\\]]]]\\\\\\\\\\\\]]]]]\\\\\\\\]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^________^^^^^^^^^^^^^^]]____```````___`abaa`````bccddccccccdddddahccaZ\\e^bb`b`a``````aabbbbccccccdccbccdccccccbabcddddccaacdccddccccccbbcddcba`baaaaabbbbcdcba`acdbbccbcbaabbbbbbccccbbbbbcccccddddddddddddeeeefffffffffffgghhhhhhhiiiiiiiiiijjjjjjkkkkkklllllmmmmmmmmmmmmnnnnnnnnnoooooooooppqpqqrrrqqqrrrrrrrrrrrrrrrrrrsssssttttttttttttuuuuvvvwwwwwwxxxxyyyyyyyyyyyyyzzzzzzzzzzzzzz{{{{||||}}}}}}}}}}}}}}}}}}}}}}}}YYXXXXWWWWWWWWWWWWWWWWVVVVVVWWWWWXXYYYYYYYZZZZZZZZZZZZZZXXWWXXXXYYYYYYYYYYYYYZZZ[[ZZZZZZZZZZZZZZZZZZZZZZ\\[[ZZZZ\ZXY\]\Z[[[[[[[[\\\\\\\\[[[[[\\\[[\\\\\\]]]\\\\\\\\\\\\\]]]\\\\\[\\\\]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^________^^^^^^^^^^^^^]]]___`````aaa```abaaaaa```aabbcbbbccdddddd`fcc_VY^d^ba`b`aaaaaabbcbbbbccccccddcccdedcccdccbbcddddddbbcdccdedcccccbbcdddcbbbbaaaaaabccddcbaacdcbccbccbbbccbccccccccccccccccdddddddddddeeeeeffffffffffggghhhhhhiiiiiiiiijjjjjjjjkkkkklllllmmnnnmmmmlmmmnnnnnnnooooopooooppqqqqqrrrqqrrrrrrrrrrrrrrrrrsssssssssttttttttttuuuuvvvwwwwwwwxxxxyyyyyyyyyyyzzzzzzzzzzzz{{{z{{{||}}}}}}}}}}}}}}}}}}}}}}}}}}YXXXXWWWWWWWWWWWWWWWWWVVVVVWWWWWWXXYYYYYYYYYZZZZZZZZZZZZXXWWXXXWYYYYYYYYXXYYZZZ[[[[[[[[[[[[[[[[[[[[[[[[[\\\[[[ZZ[ZZZ\\\[\[[[[[[\\\\\\\\\[[[[\\\\[\\]]]]\]]\\\\\\\\\\\\\\]\\\\\\\[\\\\]]]]]]]]]]]^^^^^^^^^^^^^^^^________________^^^^^^^^^^^^^^^^__````aaabbaaabcaabbba``aaabbbbbccddddccdc`ba[\be^bb`b`aaaabbccdbbcccccccdddccddedccddddbccddddddbbdedcdeedddccbccddeddcbbbbaaaaccdddccbbcdcccccdcbccdcccccccccccccddddddddddddddeeeefffffffffffgggghhhhhhiiiijjiijjjjjjjjjkkkkllllllmmmnnnnnnmmmmmnnnnnooooopppppppppqqqqqqrrrrrrrrrrrrrrrrrrrrsssssssssstttttttttuuuuuwwwwwxxxxxxxyyyyzzzzzzzzzzzzzzzzzzzz{{{{z{{||}~~~~~~~~~~}}}}}}}}}}}}}}}}XXXXXWWWWWWWWWWWWWWWWWVVVWWWWWWWWXXYYYYYYYYYYYZZZZZZZZZZXXWXXXXWXXXXXXXXXXXYZZ[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\[[[YZ\\[Z[\\\\\\\\\\\\\\\\\\\\\\\\\[\\]]]]]\\\\\\]]]]]]]]]]]]]\\\\\\\\\]]]]]]]]]]]]^^^^^^^^________________________^^^^^^^^^^^^____`````aaa````_`ababbcbb``aaabbccccccddcccfcbbc`akgaedbcabbbbbccdeccccccddddeddddefedddeeeccddddddeccdeddefeddddcbccdeeeeecccbbbbaccdddddcccddcccdedccddddddddddddddddddddeeeeeeeeeeeeffffffffffffggghhhhhiiiiijjjjjjjjjjjjkkkklllllllmmmmnnnnnnoonnnnnnnnoooppppppppppqqqqqqqqrrrrrrrrrrsssssssssssssstttttttttttuuuuuvvvxxxxxyyyyyyyyzzzzzzzzzzzzzzzzzzzzzz{{{||{{{|}}~~~~~~~~~~}}}}}}}}}}}}}}}}YYYXXXXWWWWWWWWWWWWWWWVVWWWWWWWWWXXYYYYYYYYYYYZZZZZZZZZZXXXYYYXXXXXXXXXXXXYYZZ[[[[[[\\\\\\\\\\\\\\\\\\\\[[\\\\[[WZ]][ZZ\\\\]]\\\\\\\\\\\\\\\\\\\\\\]]]]]\\\\\]]]]]]]]]]]]]]]]]\\\\]]]]^^^^^^^^^^^^^^^^^^________________________^^^^^^^^^^^__``a````aaaaaaa```abbbcccba`aaabbcddcdddddccbbd`^^`mgaddacaacbcccddddddddddddeeedddegfdddeeedddddeeefdcefedefeddeddcccddeeeecccccccbcdddeeeedddeecceeedddeeddddddddddddeeeeeeeeeeeeeeeffffgggggggggghhhhhhhhiiiijjjjjjjjjjkkkkkklllllllmmmmnnnnoooopnnooooooooppppqqpppppqqrqqqqqqrrrrrrrsssssssssssssstttttttttuuuuuuvvvvwwxxxyyyyzyyyzzzzzzzzzzzzzzzzzzzz{zz{{{||||||}}~~~~~~~~~~~}}}}}}}}ZZZYYYYYWWWWWWWWWWWWWWVVWWWWWWWWWXXYYYYYYYYXYYZZZZZZZZZZYYYYZZYXXXXXXXXXXXYYZZZ[[[\\\\\\\\\\\\\\\\\\\\\\[[\\\[[ZXZ]]\[[\\\]]]]\\\\\\\\\\\\\\\\\\\\\\\]]]\\\\]]]]]]]]]]]]^^]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^________________________^^^^^^^^___``aaa```aaaaabbbbaabcbbcccbaaaaaabcdeeeeeedddcbc\\_\ff_cb`b`accccccddeeeeeeeedeeeddeegfeddeffeeddeeeefddefedefedeeedddddddeeeccdddddcdddeeeeeedeffddffeddeeeeeeddddeeeeeeeeeefffffffffffffggghhhhhhhhhhhhhhhhiiijjjjjjjjjkkkkkkkkllllllmmmmnnooooooooooooppppppppqqqqppppqqrrrqqqqqrrrrrrrsssssssssssstttttttuuuuuuuvvvvwwwwxxxxyyyyyyyyyzzzzzzzzzzzzzzzzzz{{zz{{||||||}}}~~~~~~~~~~~}}}}}}}}[[[ZZZZZWWWWWWWWWWWWWWVVWWWWWWWWWXXYYYYYYYYXXYZZZZZZZZZZYYYZZZYXYYYYYYYYXYYYZZZZ[\\\\\\\\\\\\\\\\\\\\\\\\\\\\[ZYZ[\]^]\[\\]]]]\\\\\\\\\\\\\\\\\\\\\[\\]^[\\\]]]]]]]]]]]]^^^^^]]]]]^^^^__^^^^^^^^^^^^^^^^________________________^^^^^^^^````aaaa```aaaabaaa```abcccccbba```aacddfffffeeeidb[`e\^f`ddbdbcdccccccdeeeeefffdeeeedeehfedeeffeeeeeeeefedffeeeeedeeeedddddddddccdeeeeddddeeeffedegfddffeeeefeeeeddddeeeeeeeeffffffffffffffgggghhhhhhhhhhhhhhiiiiijjjjkjjjjkkkkkkkllllllmmmmnnnppppooooppppppppppppqqqqqpppqqrrrqqqqqrrrrrrssssssssssssttttttttuuuuvvvvwwwwxxxxxxxxxyyyxyyyyzzzzzzzzzzzzzzzz{{{zz{{|||}}}}}}}~~��������}}}}}}}}[[[ZZZZYYYXWWWXXYYXXWWWWWWWWWXXXZZZZZZZZZZZZZ[[\[ZZZZZZYYYYYYYYYXXYYYYYYYZZZZYZZ]]]]]]]]\\\\\\\\\\\\\\\\\\\\\[ZY[[[\\\\\]]]]]]]]]]\\\\]]\]^\\]][\\\\\\\\]]]]]]]]]]]]]]^^]]^^^^^^]]]]]]]]]]]]^^]]________^^____``````````a__``^^_`^]]^```````aaaaaaaaaaaa`bcb``acabcddcbacccb``bdb_NbeeihhchaiZc`geccefedgecceeedddeeeeeeeeeeefeedeeffgggfeeddeeeeeeffgffffffffffeedeeeeddcccdeeeeeddeefgfeeddeefgggfedefeeeeeeeeefgffeffffffggggggghhhhhhiiiiiiiiiiijjjjjjjjjkkkkkkkkkkkjkklllllmmllmmnnoooppooooopppppppqqrrrrrqqqqrrrrssssssssssssssttsssttttttttuuuvvuuvvwwxxwwwwxxyyxxxxyyyyyzzzzzz{{{{{{zzzzzz{{{{{||{{{|||~~~~~~~~~~~~~~��������[[[ZZZZZYYXXWXXXXXXWWWWWWWWWXXXXYZZZZZZZ[[ZZZ[[[\[[Z[ZZYZZZZZZZZYYYYYYYYYZZZZZZ[]]]]]]]]]]]]]]]]]]]]]]]]]]\\[[ZZ[[[\\\\\]]]]]]]]]]]\\]]]\]][[]][\\\\\\\\]]]]]]]]]]]]]]^^]]]]]^^^^^]]]\\\]]]^^^^]________^____```````````a__``^]_`^]]^```````aaaaaaaaaaaa`bcba`acabcddcbbcddcaacfjgWffdhegheha][_dbabdfffddccdeeeeeeeeeeefeeeefeeddefgggggffeeeffffffgfffggffffggeedeefeeddccdeeeeeeeefggfeeeeeffgggfeeefeeeeefffefggffffgggggggggghhhhhhiiiiiiiiiiijjjjjjjjjkkkkkkkkkkkkkkllmmmmnnmmmnnoppppppppppppooooppqqrrrrqqqqrrrrssssssssssssssttsstttttttttuuvvvvvvwwwxxwwwxxxyyxxxyyyyzyyyzz{{{}|||{{{{zzzzz{{{{{{{{{||~~~~~~~~~~~��������[[[[ZZZZZYYXXXXXWWWWWWWWWWWXXXXYYYZZZ[[[[[[ZZZZ[\\[[[ZZYZZZZZZZZZZZYYYYYXYZZZZZ[]]]]]]]]]]]]]]]]]]]]]]]]]]\\[[[[[[\\\\]]]]]]]]]]\]]]]]]\\]\[[\]\\\\\\\\\]]]]]]]]\]]]]]]]\\\]]]]]^^^]]\\\]]^^^^^^_____________```________a````^^^_^]]_``````aaaaaaaaaaaaaabcbaaababcddccbddddcceghh\eedgcfibeZZT]geccdeeeeeedcbcceeeefffffeeeeffecdeghhgggggffgghhgggggffggffffggfeeeffeeedcddeeeeeeeefghfeeeeffggggfeeffeeffffffffggfffghhhhhhhgghhhhhhhiiiiiiiiijjjjjjjjjjkkkkkllllllllllmmnnoooonnnnoopqqqqqqppppooooooopqqrrrqqqrrrrrssssssssssssssttstttttttttuuuvvvvvwwwwwwwxxxxxyyyyyyyzzzyzz{{|}}~~}}}||{zzzz{{{{{{{{{{||~~~~~~~~~����������������[[[[[ZZZZZYYYYYYXXXXXXXXXXXXXYYYYYZZ[[[[[[ZZYYYY[[ZZZZZYZZZZZZZZ[ZZZYYYYXYZZZZ[\]]]]]]]]^^^^^^^^]]]]]]]]]]\[[[[\\\\\\]]]\\\\\\\\\]]]]]]\\\[[[\]]]]]]]]]]]]]]]]]]\]]^^]]\\\\\\]]]^^]]]]]]^^^____^____________`````````````````___^^^^_```aaaaaaabaaaaaaaaabcbaabbbbcddccccdddddfgcf_bcdgcgfc^]ZWaigeddeeefffdbabdffffffffgfeeefffbdfghhhghggfgghhhhhhggghhggffgghffeffffefeddeefeeeeeefggffeeefggfgggfffgffffffggfghhgggghhhhhggghhhhhhhhiiiiijjjjjjjjjjjjkkkklllllllllllmmmnoopppoonnnnnpppooppppppoooonnoopqrrrqqrrrrssssssssssssssssttttttttttuuuuvvvvwwwwwwwwwxxxyyyyyyyzzzzz{|||}}~~~~~}}}}}{{{{|||||||||||}}}}}}}}}����������������\\[[[[ZZZZZZZYYYYYYYYYYZXXXYYYYZZZZZ[[[\[ZZYYYXXZYYYZ[ZZZZZZZZZZ[[[ZZYYYXYZZZZ[\]]]]]]]]]]]]]]]]]]]]]]]]]]\\[\\\\\\\]]]]\\\\\\\\\]]]]]]\\[[[[\]^]]]]]]]]]]]]]]]]]]]^^]]\\\\]]]]]]]]^^^^^^^______````````___````a````````__``aa`_^^_```aaaaaaabbbaaaaaaaabbbbbbbbbccdddcccccdddfffhecddfbgffafe_lfeddefggdeedccegfggggggggfeeefffcdfhiihgggfffgghghhhgghihhhgghhhggfggggfgfeeefffeeeeefgggfffffggfggggfggffgggggggghhhgghhhhhgggghhhhhhhhiiijjjjjjjjjjkkkkkkkllllllllllllnnnnooppoonmmmmnnnnmmnnnppppoooonoopqrrrrrrrrsssssssssssssssssttttttttttuuuvvvvvwwwwwwwwwxxyyyyyzzzzz{{{|||||}}}|}}}}}~~}}}}}}}}}}}}}}~~~~~~~~~~�������������������\\\[[[[[Z[[[ZZYYZZZZZZZZYYYYZZZZ[[[[[[[[ZZZYYYYYYYYYZ[[[[[[[[[[[[[[ZZZZZYZ[[[[\\]]]]]]]]\\\\\\\\\\\\\\\\]]\\\\\]\\\]]]]]]]]]]]]]\]]]]]]\\[[\]\]_]]]]]]]]^^^^^^^^^^^^^^]]]]]]^^^^]]^^__``___````_````````_````aaaaaaaaaaa^`aaaa`_]_`aaaabbbbbbbbbbbbbbbbbbbbbbbbbcccdddddccccdeeegfgdgegdcgddchZmffeeddeefffedcdegggghhhhhfeeefggdefhiihhgffffgghghhhggijiiiiiiiihhhhhihhhgfefffffeeeffghhhgffffgffggggghgggggghhghiihhhhhhhhgggghhhhhhhijjjjjjjjjjjkkkkkkkllllmmmmmmmmmmoonnnoopnnmmlmmmnmmllmmnppppppppoopqqrrrrrrrsssssssssssssssssstttttttttuuvvvvvvvwwwwwwxxwxyyzzyyzzz{{{{{||{{{{{{|||}}~~~~~~~~~~~~~~~~~~~~~~~~~~���������������������\\\\[[[[[[[[[ZZY[ZZZZZYYYYYZZZZZ[[[[[[[[[ZZZZZZZZZZZ[\\[[[[[[[[[[[[[[[[[[[\\[[\\]]]]]]]]\\\\\\\\]]]]]]]]\\]]]]]\\\]]]]^^]]]]]]]]]]]\\]]]\[[]^]]_^^^^^^^^__________^^^^^^^^^^____^^___```__```````````````````aaaaaaaaaaa_abaaa`^]_abbbbbbbbbbbbbbbbbbbbbccbbcccbccdddddddccddeeefafcifggdfcWUUIaiihgfddcgedddddchhhhhhhhhgeeefggefghhhhhgffffghhhhihffhjijjkkjjijiiiijiihgffffffgfffgghiiihgffffefghhgghggghhhhhhhiihhhihhhhhhhhhhhhhhiijjjjjjjjjkkkkkkklllllmmmmmmmmmmmoonmmmnommmllmnnnnmllmnnpppqqqqqppqqrrrrrrrsssssssssssssssssssttttttttuuvvvvvvvvvvwwwxxxwxyzzzyyz{{{{|||{{{|||||||}}}~~~~~~~~~~~~~~}~~~�����������������������\\\\[[[[[[[\[[ZY[ZZZYYYYYYZZZZ[[\\\[[[ZZ[[[ZZZ[[\[[[\]\\[[[[[[[[ZZ[[[[[[\\]\\\\\]]]]]]]]]]]]]]]]^^^^^^^^\\]]]]]\\\]]]]^^]]]]]]]]]]\\\\]]\[\^_]^_`````````````````__^^^_________`__``````_```````````````````aaaa`````````bcaaa_\^`bccbbcbbbbbbbbbbbbbbbbccbbcccbcdddddddddddeeeeibgdkfggideEI<<Thhiiihhha``beghhhhhhhhiihgeeefggffgghhhhggffgghihiihffgiijkllkjijjiijjjiihgfffffhggghhijjihgffffefghhhhhhhhhhhhhhiiiihhihhhiiiiihhhhhiiijjjjjjjkkkkkkkkkllllmmmmmmmmmmmmponmlmmnmmmlmmnooonmmnoopppqqrrrpqqrrrrrrrrsssstsssssssssssssstttttttuuuvvvvvvvvvvwwxxxywxyzzzyy{{{{||||||}}}}~~}}}~~~~~~~~~~~~~~}}}}~~~������������������������\\\\\\\\[[[[[[[[ZZZZZZYYZZZZZZ[[\\\\\\\\\[[[[[[\[[[[[[[[]\\[[[ZYZZZZ[[[[\\\\\\\\\\\\\\\\]\\\\]^^]]]]]]]]\\\\]]^^]]^^^^__]]]]^^]\\]]]]]]\\]^_____``````aa`````aaba``__^^]_____```````````_````aaaaa```````aaa```a```a````aaaaa``_bbbbbbbbbbbbbbbbbbbbbbbbccccccccddddddeedddeefffeefggfffeicYW`_Kcehijiggd]]cefhhghhhhhgghgggggghhgffghhhfggfefhjkjiihghkiijkkkjjkjjjjiiihhgfffgghhhhhijkiiiigefhhhggghhihhhhhhhhhhhhhhhhiihhiijkiiiiiiiijjjjjkkkkkkkllllmmmmmmmmonnnoonnpoonmmmmmmnnnnnnnnmmmnoopppqqqqqqqqrrrssssssssssssssssssrstuuutsuuuuuvvvwwvuuuvvvwxxyxxxyyyzzyyy{{{|||||||||||||}}}}}}}}}}~~~~~~~~~~~~~~~~~�������������������������]]]]]]]]\\\\[[[[Z[[[ZZYYZZZZ[[[[\\\\\\\\\\[[[[\\\\\\\\\\]\\[[[ZYYZZZ[[\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\\\]]^^_^^]^^^__]^^^]]]]]]^^]]]]\]______```````a```_````a``__^^^____````````````_````aaaaaa`````aaba```````aaa``abba`_`abbbbbbbbbbbbbbbbbbbbbbbbccccccccddddddddddeeefffffgghgggeghfa^^Xddefgghiibbfffgghhhhhhgghgggggghhgfffghhgghgffhjkjiiighkijjkkkkkkkkkjjjjihhgggggihhhiijkjiiihfgihhhggghhhhhhhhhhhhhhhhhhiiihiijjiiiiiiiijjjjkkkkkkklllllmmmmmmmnonnnoonnooonmmmmnnnnnnnnnnnnnooppppqqqqqqqqrrsssssssssssssssssssrstuuttsuuuuvvvvvvuuuvwwwwxxxxxxyyzzzzyyzz{{{|{{||||||||}}}}}}}}}~~~~~~~~~~~���������������������������]]]]]]]]^]]\\[[[[[[[[ZZYZ[[[[[[[\\\\\\\\]]\\\\]]]]]]]]]]]\\\\[ZYYZZ[[\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^__^^]]]^_`^__^]\]]^^^^]]]\]^_`____`````````_______a```__^^____````````````_````aaaaaaa````bbba```````aaaaa`bca_^`cbbbbbbbbbbbbbbbbbbbbbbbbcccccccccccdddddddeeffffgghhhhhhghihaZWXggffeeeegbdgfehihhhgggghggghhggghgfefghighihggijkjijihhkijkklllllllkkkkjiihhggghiiiiiijjkjjjiggiiihggghhhhhhhhhhiiiiiiiiiiiiijjjiiiiiiiiijjkkkkklllllmmmmmmnnnnnonnnoonnoonnnmmmnnnnnnnoooooopppppqqqqrrqqrrrsssssssssssssssssssssstttttuuuvvvvwuuuuvwxyxxxxxxyyyyzzzzyyyzz{{{{{||||||||}}}}}}~~~~~~~~~~�����������������������������]]]]]]]]^]]]]\\\[[[[[[ZZ[[[[[\\\\\\\\\\\]]]]]]]]]]]]]]]]]\\\\\[ZZZ[[\\\]]]]]]]]]]]]]]]]]]^^^^]]\]]]]]]]]^^^^^____^^]]^``^``_]\]^_^^]]\\\^_```___``````````___^__````____````````_________````aaaaaaaaaaabbbba`````aabbbbabba``acbbbbbbbbbbbbbbbbcccccccccccccccccccccdddeeeefffghhhhhihhiif][\WVggghgfdbfcfheeijihhggghhgghhhhggihgeefhihijihhijkjjjihikijjkkllllllkkkkjjiiihhhhjjjjjjjjljjjjhhjjiihgghhhhhhhhhhiiiiiiiiiiiiijjjjjjjjjjjijklllkklllmmmmnnnnnnnnnonnnoonnnnnnnnnnnnnnooooppppppqqqqqqqrrrrrrrsssssssssssssssssssssssttuuuvvvvvwwwvuuvvwxyxxxxxyyzzzzzzzzzzzz{{{{{}}}}}}}}}}}}}~~~~~~~~~~~�������������������������������������]]]]]]]]]]]]]]]]\\\\\[[Z\\\\\\\\]]]]]]]]]]^^^^]]]]]]]]]]]\\\]]\[\\\\\\\\]]]]]]]]]]]]]]]]^^^^^^]\]]]]]]]]^^^^^_____^]^_`a_`a_]\]__^]]\\\\^_`a`___````````a``_____``````__````````_________````aaaaaaaaaaaabbbaaabaaabbbbbca``abbbbbbbbbbbbbbbbbbbccccccccddddddddccccddddeeefffgghhhhhhhhggf]^b^eddegiihhkikkfehhihhggghhgghhhhggiihfeegiijkjihijkjjkjhikiijjjkkllkkkkjjjjjjjjiiijjjjjjjjmkjkjiijjjihhhhhhhhhhhhhiiiiiiiiiiijjjjjjjjjjjjjijklllkkmmmmnnnnnnnoooooonnnoonnmnnnonnnoooooooopppqqqqqqqqqrrrrrrrsssssssssssssttttttttstttuuuvvvvwwwwxwwvvvwwxyxxxxyyzzzzzzzzzz{{|||||}}}}}}}}}}}}~~~~~~~~~~~����������������������������������������^^^^^^^^\\]]]]^^\\]\\\[[\\\\\]]]]]]]]]]]]]^^^^]]]]]]]]]]]\\]]]\\]\\\\\\\]]]]]]]]^^^^^^^^^^^^^^^]]]]]]]]]^^^^^_____^^^_`a```_]]^__^]]\\]]^_aa````````````a```````````````````````````````_````aaaaaaaaabbabbbaabbbbbbbbbbca_`bdcabbbbbbbbbbbbbbbbccccccccddddddddccccdddeeefffggghhggghhhgeieec_shfdegijkigjjffhghhhgggghggghhgggiiigefhiijkjihijkjkkkiikjjjjjjkklkkkkjjjkkkkkjjijjjkkkkjmkjkkiiijjihhhhiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjkkllllknnnnnooooooooooponnnoonnlmnooonnooooopppppqqqqqqqqqrrrrrrsssssssttttttttttttttttttuuvvvvwwwwxxxxxxwvvwwwxxxxyyzzzz{{{{zz{{||}}}}}}}}}}}}}}}}~~~~~~~~���������������������������������������^^^^^^^^]]]]]]]]]]]]]\\[\]]]]]]]]]]]]]]]]]^^^^]]]]]]]]]]]]\]^^]\\\\\]]]]^^^^^^^^^^^^^^^^^^^^^^^_]]]]]]]]^^^^^^________````_^^^_``_^]]^^_^_`aaaaaa``````````````a````````````````````````_````aaaaaaabbbbbbcbaaabccccbbaaba`acccbbbbbbbbbbbbbbbbbccccccccddddddddcccddeeeeeffgggghhggghhihgkcec\qlifefhiiedghegighhhhhhgghgggggghijjhffhiikkkihijkjklkiikkkjjjjkkllllkkkkkkllkkjjijjkllkkmkijkiiijiihhiijiiiiiiiiiiiiiiiiiijjkjjjjjjjjjjjkkklllllnnnooooooooopppponnnoonnlmnoooonooopppppppqqrqqqqqrrrrsssssssssstttttttttttttttttuvwwwwwwwwxxxxxxxwwvwwwxxyyyzzzzz{{{{zz{||}}}}}}}}}}}}}}}}}~~~~~~~�������������������������������������^^^^^^^^]]]]\\\\]]]]]\\\]]]]]]]]]]]]]]]]]]^^^^]]]]]]]]]]]]\]^^]]\\\]]]]^^^^^^^^^^^^^^^^^^^^]^^_`]]]]]]]]^^^^^^^______```a`^^__````_^^_``^_`aaaabaa``````____```a`````aaaaa``````````````_````aaaaaaabbbbcccbaaaaccccbbaa`abbbbccbbbbbbbbbbbbbbbbccccccccddddddddccddeeefefffggggihhhhhiigihY`i_jhfefhjihigiifghdghhhhhgghgggggghjjkigfhiikkkihhikjklkiikllkjjkkkmmmllllkkllllkjjiijkllllmkijkiiiiiihhijjiiiiiiiiiiiiiiiihijjkkjjkkkkkkkkkkkkkllmnnooooppoopppppponnnoonnllnoooonooppppppopqqrqqqqqrrrrsssssssssstttttttttttttttttuvxxxwwwwxxxxyyxxwwwwwxxxyzzzzyz{{{{{{z{|||}}}}}}}}}}}}}}}}~~~~~~������������������������������������^^^^^^^^]]]]]]]]]]]]]]]]]]\\\]]^]]]]]]]]]]^__^^^]]]]]]]]\\]]]]]]]^^^^___```__^^^_^^]]^^_________^^]\]]^_______^^__``___`_____^^^__^^^___a`__`a`_aaa``____^^^^__`aaa``____````aaaaaaaaaaa````aaaabbbbbbbbccccccccccdddcbbaabcccccbcccccccccccccccddddddddddddeeeeeddddeefggffeefghhhgghhhkghhakf`ihhhhhhhiiiihgfegghhhijjiiiiiiiihijihghijjjjjjiiiklkjkjikkkjjkklkkkkkkllmllllllkjkmmlkllkllkklkiijjhhjjiiiiiiiiijjjjjjjjiijjjkkkjkkkkkllllllmmmmooooooooppppppppooooooooooopppppoopqqqqpqqqqrrrrrrrrrrrrrrrrrsstttttttttttttuuvvvvvwwxxxxxxwwxxxxxyyxxxyxxyyz{{{zz{{{{{{}}}}}}}|}}}}}}}~~~~~~������������������������������������������^^^^^^^^]]]]]^^^]]]]]]]]]]]]]]]]]]]]]]]]]]^^_^^^]]]]]]]]]]]]]]]]]^^^^___``____^^_^^]]^^__________^]]]]^^`````__^__```__`_____^^^^^^^^__```_`````aaaaaaa```____``aaaaa```aaaaaaaaaaaaaaaa```aaaabbbbbbbbbccccccccccdddcbb`abcddccccccccccccccccccdddddddddddddeeeeeeefffgggffeefghhhgghhhjhijdia_gfggiiiiiiihhgffghiihiijjjjiiiiihijihhhijjkjjjiiiklkjkkikkkkkllmmllllllkmmllmmlljlmmmllljllklmljijjihjjiiiiiiiiijjjjjjjjjjjjkkkkkkkkllllllllmmmmooooooooooppppppooooooooooooppppoppqqqqqqqqqrrrrrrrrssssssssssttttttttuuuuuuuuvvvvwwwxxxxxxxxxxxxyyyxxxyyyyyyz{||{{zz{||~~~~}}}}~}}}}~~~~~~~~������������������������������������������^^^^^^^^]]]]^^^_]]]]]]]]]^^^]]\\]]]]]]]]]]^^^^^]^^^^^^^^]]]]]]^^^^^^^___`______^__^^^^____________^]]]^^``````___````__```____^^]]^^__````a````aaaaaabbbbbaa````aaaaaaaabbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbccccccccccdddcba`acdddcbccccccccccccccccddddddddccddddeedeeeffgggggfefghhhhhhhhhhiijeeY]fffhijiiiiihhggghiiiiiijjjjjjiiiiijjihiikkkkjjiiiklkkkkjkkkllmnnnnnmmlllnnmmnnmmklmnmmllkllllnmkhjkjiijjjjjjjjjjjjjjjjjjkkkkkkkkkkkllllllllmmmmmooooooooooooppppoonnnnoooooooopppppqqqqqqqqrrrrrqrrsstttttttttttttttuuuuvvvuuvvvvwwwxxxxxxxxxxxxyyzyyxyy{zyyyz{|}|{zz{||~~~~~~~~~~~~~~~�������������������������������������������________^^^^^^^^^^^^^^^^]]^^^^]]^^^^^^^^]]^^^^^]________^^^^^^^^^^^^^^_____________^^___````____^^^^^^^_aaaaa``__`aa```````_____]]^______aba`_`aaaaaabbbbbbaaaaaaaaabbbbbbbbbaaaaaaaaaaaaaabbbbbccccccccccccccccccdddcbbabcdedcbcccccdddddddddddddddddddccddddeecddeeffffggfffghihhhhhhigighcaVagghhiigfiihhhgggiijjiiijjjjjjjjjijjjjiiikkkkkjjihkmlllljkklmmnoooonnnnnnoonnoonnjkmnnnnnmlllmnmlhklljijkjjjjjjjjkkkkkkkkkkkkkkkkkkkklllllllmmmmmooooooooooooopppoonnnnooooooooppppppqqqqqqrrrrssrrssttttuuuuttttuuuuuuuuwwvvvvvvwwwwxxxxyxxxxxxyyzzzyyzz{{{zz{{|}}|{{{||~~~~~~~~~~~~~~��������������������������������������������____________^^^]^^^^^^^^\]]^^^^^^^^^^^^^]]^^_^^^________^^^^^^^^^^^^^^^^________________aaa```__]]^^^_``aaaaa```_`aa````````____^^_____^`aba```baaaaaaaaaaaabbbb``aaabbbbbbbaaaaaaaaaaaabbbbbcccccccccccddddddddcdddccbbbbcdedcccccddddddddddddddddddddddddddeeeddefffffffgggghiiihhhhiihjgfcd^kiiiiihgehhhhhhhgijjjiijjjjjjkkkkjjjkkjjikkkkkjjjhkmmlmlkkllmnoopnnoooooopooooponkklmoopponmmnnmmjkllkjjkkkkkkkkkkkkkkkkkkkkkkkkkjkkkkkllllmmmmnnnnnnnnnnnnooooppponnnnopooooppppooopppqqrrrrrsssttttttttuuuuuttsuuuuuvvvwwwvvvvwwwxxxxxxyyxxxxyyzz{{zzz{{{||||{{}}}}}||{}}}~~~~~~~~~~~~��������������������������������������������������������```````````__^]]^^^^^^^^\]]^^___^^^^^^^^]^____^^____________^^^^^^^^^^^^^______`````````baaa``__]]^^_``aaaaaa`````aaa``a`````___________`aaaa`aabbaaaaaa`aabbbbbaaaaaabbbbbbbbaabbbbbbbbbbbcccccccccccccddddddddddccbcccccdddddcddddddddddddddddeeeeeeeedddeeeefffghhhhgffgggghiiiihhiiikkigbigtiiiiiihghhiiihhgijjjjjjkjjkkkkkkjjkklkjikklkkkjjhkmmmmmkllmnoooonoooppppppooppoolllmnpqqponnonnnlkkllkklkkkkkkkkllllllllkkkkkkkkkkkkllllllmmmmnnnnnnnnnnnnnoooppppooooppppppppppppppppqqrrrrsssstttttttttuuuuuttuuvvvvvvwwvvvvwwxxxxxxyyyyyyyyyy{{{{{z{{z{|}}}{{|}~~~}|{}}}}}~~~~~~~~~~��������������������������������������������������������```````````__^^^^^^^^^^^]]]^^^__________^^_``_________________^^^^^^^^^^^^____``````````aa``__^^]]^__```aaaaa``_`aaaa``aaa````______```aaaaaabaaabbbbbbbaabbbbbabbbbbbbbbbbbbcccbbbbbbbbbccccdddddddddddddddddddedcbbbcdddccddddddddddddddddddddeeeeeeeeeeeeffffghhiihhgefghhhiiiiiiiiiikjig^hiojiiiijihghijjihgijjjjjkllllkkkjjkkklmljillllkkjjhkmnnnmlllmnooooooooppppppooppoonmllnpqqqonoonnpnkjlmlklllllllllllllllllkkkkllllllllmmmmlmmmmnnnnnnnnnnnmnnnoooppppppppppppqqqqqqqppqqrrrrrssssstttuuuuutuvvvvuuvvvvvvvvvvvvvvwwxxxxxyyyyyyyyyyy{|||{{{|{{|}}}|||}~~~~}}~~~~~~~~~~~~��������������������������������������������������������```````````______________^^^^^__________^__```_________________^^^^^^^^^^^^__``````````````_^^]]^^__````aaaaa``_`aaaa`aaaa````__`___`abbba`abba`aabbcccdbbcccbaaccbbbbbbbcccccddccccccccccccddddddddddddddddddddfecaabcdddcccddedddddddeeeeeeeeeeeeeeeeeeeefffffgghihhgfefghhhiiiiiiiiiijhieYcddkjjiiiihfgijjihgiijkjkllmmllkjiikkklmmkilllllkkjhkmnnnnlllmnoonnppppppooppooppoopnllmoqqqnmopooqokikmmllllllllllllllllllkkkkllmmmmmmmnnnmmmmmnnnnnnnnnnnmmnnoooppppqqpppqqqqqqqqrrqqqrssrrrssssttttuuvvvuuvwwwvvvvvvvvvwuuuvvvwwxxxxxyyyzyyyyyyz{|||{{||||||}}}}}}}}~~~~~~~~����������������������������������������������������������````````````````__``_^^]________________``````__________``aaa```__``````_____```aaaaaaaa`````___^_aa``abaaaaaaaaaaabbbbbaaa`````aabbbbbbbbbbbbbbbbbccccdddcbbbbcbcdcbbcdbccccccdccccccccccddddddddddddeedcceecdfeedbabdgeeeeeeeeeeeeeeeeeeeeeeeefffeefffeffffghihhhhhhhhiiiiiiiijiiiiiijjheeehjaiijjjihhiiiiiiiijjjkklllllllllkkllfmrliiiikmmkklihjnonllimonnopoppppppppqqqqqqqqrqolmpqpppppponnonnnnnnmmnnonmlklllllmmmmmmllmnnkkllmmnnnoooooonmmmmmnooppppppppppqqqqqqqqqqqrrrrrrrrrrsrrssssstuuuuuuuuvvuuttsssttuuvwwwvuuuuvwxxwwxxyyxyyxxyz{zz{{||}}}}}}}}||}}~~~~~�����������������������������������������������������������������```````````````````___^^________````````````````________```````__```````___````aaaaaaaaaaaaaaa``_``aa`aaaaaaaaaaaabbbbbabaaaaaaabbbbbbbbbbbbbbbbbbbcccddddcbbbccccddccccbccccccdcccccccccddddddddddddeeeeddeecdedbaaceffeeeeeeeeeeeeeeeeeeeeeeeefffeefffffgfffghhhhhhhhhiiiiiiiijiiiiiijlfn`j^i_ijjjjjiiiiiiiiiijjkkklllllllllkkmkprdnuojjkmmkkliijmonmmjmonnopoppppppppqqqqqqqqrqomnpqppppppoonoonnnnmmnnoonmllllmmmmnnmmlllmmnlmmmnnnnooopoonnnnnnnooppppppppppqqqqqqqqqqqrrrrrrrrssssrsttsstuvvvvvvvvuuuuutttttuuvwwwxwvuuvwxyxxxxyzzyyyyyyz{{{{||}}~}}}}}}}}~~~~~~~~~~~~����������������������������������������������������������������```````````````````__```````````aaaaaaaaaaaa``````````````````__```aaaa````aaaaabbbaaaaaaaaaaaaa```aaba`aaaaabbb`abccbaabbaaabbcbbbbbbbcccccccccbcccccddddccccccccccddcbbccccccdccccccccddddeedddddeeeeefeefedcdda_`egfdddddddddfffffffffffffffffffeeffffggffffghhhhhhhhiiiiiiiiiijjjjiijikgd_adjjkkkjjjjjjjjjjjjkkklllllllllllkknmnkqihmlkmmkkmkjjmnnmnkmoooppoppppppppqqqqqqqqrqonnpqpppppppooponnnnmlnooonnmllmnnonnnmmmlmmmnnnnoooooopppponmnnnnooppppppppppqqqqqqqqqqrrrrrrrssttsssstttttuuvvvvvvvvuuuuuuuuuvvvwwxxxxwvvwxxyyyyyyz{zzzzyyz{{||}}}~~~~~~~~~~~~~}}}}~~~~����������������������������������������������������������������````````````````a````aabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa````aaaaaaaaaaabbbbbbbbbbaa````````ba`abba``aaabbbc`abccbaabbaaabccbbbbcccccccccccccccccddddddcccccdcabcdcbbccccccddddddddddddeeeeeeeeeeeeefeeffedcdb`begebeeeeeeeefffffffffffffffffffeefffffffffggiiiiiiiiiiiiiiiiiijjjjiihjgj_a\gkkkkkkkkjjjjjjjjkkkkllllllllllllknlmmlvoomllmllnmkkmnnnomnooopppppppppppqqqqqqqqrqpoopppppppppppponnnonnooooonmmmnoopoonnnnnnnnnooooooooppppponmnnooopppppppppppqqqqqrrrrrrrrrrrrstuuttsstuuttuvvvvvvvvvuuuuvvvvwwwwxxxxyxxwwxxyyyyyyzz{{{{{zzz{|||}}}~~~~~~~~~~~~~~~~~~�������������������������������������������������������������������aaaaaaaa````````aaaaaabcbbbbbbbbbbbbbbbbaaabbbbbbbbbbbbbbbbbbbaaaaabbbbbaabbbbcccbbbbbbb````aaaabaaabca`aaabbbccaabbbbaaaaaaabbccccccccccccccccccccddddddddddcccdcaabccbbccccccdddddddddddeeeeeeeeeeeeeeeeeeffecbbbceeedfffffffffffffffffffffffffffeefffefffffghiiiiiiiiiiiiiiiiijjkkjjikflbg``ikkkkkkklkkkkkkkkkkllllllllmmmmmlnlqmj���qnllmmmnnlkmnnnpnnnoppppqqqqqqqqrrrrrrrrrqqppoppppppppqqpoonopqqpppooonnmnopppoooooooooooooooooopqqqponnnnnoopppqppppppqqqqqrrrrrrrrrrsssstuuttsstttttuuvvvvvvvvuuuvvwwwxxxxxxyyyyxxxxyyyyxxyyzz{|||{{{|||||}}}}}}}}~~~~~~~������������������������������������������������������������������������aaaaaaaaaaaaaaaabbbbbbccbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcccccbbaabbbbbbbbbbbbccccccccbbbabbbbbbbbbbbbbaaaabbbbccbbbbbbaa``aabbbccccccdddddddddddddddddddddddddccdccccdccbccccccddddddddddeeeeeeeeeeeefffeffefgeb`bdeeefgfffffffffffffffffffffffffffeefffffgggghiiiiiiiiiiiiiiiiijjjkkjjjiikegleslkkkkkllllllllllllllllmmmmmnnnnmnpim���rolmnnnoomlmnnoppnnpqqpqqqqqqqqqrrrrrrrrrqqrqooqpppppqqrqpoopqrrqpppoooonooppppoppppppooooooooppppqqpponnnooppppqqppppqqqqqrrrssrrrrssssssttutttstuuttuvuuuuuuuuuvvvwwxxxxxyyyyyyyyxxyyyyyxxyyzz{|}}||||||||}}}}}}}}~~~~����������������������������������������������������������������������������bbbbbbbbaaaaaaaabbccccccccccccccbbbbbbbbbbbbcccccccccccccccccbaabbbccccbbbbbbcccccccccccbbbbbbbbbcccbaaabbbbbbbbcbbaaaab``bccccccccdddddddddddddddddddddddeeedccbdfggedcbccccccdddddddddeeeeffeeeeeffffffgffggeaaceeeefgfffffffffffffffffffffffffffeefffgghggghiiiiiiiiiiiiiiiiikkjjjjkkgnff]o^skkkkkklllllllllllllllmmmnnnnoonnmss���ʂrolmoooppnmnonopqonpqqqrrrrrrrrrrrrrrrrrqqrsqooqqppppqrrqpoopqrrqqppoooooooopppppppqppoooooopppqppqqqpoonoppqqqpqqppppqqqqqrrsssrrsssssssssttttttuuuuuvvvvvvvvvvvvwwwxxxyyyyyyyyyyyyyyyyzyyyyz{{{|}~}}}}}}}}}}}}}~~~~��������������������������������������������������������������������������������ccccccccbbbbbbbbbccddccbccccccccbbbbbbbbbbbcccccccccccccccccbbaabbccccccabbbbcccccccccccaaaaaaabbcdcbaabccbbbbbbcbaaaabb`abddddcccdddddddddddddddddddddddeeeedccaeikigdcbccccccdeeeeeeeeeeefffeeeeefffffghgfggd`deffeeeefffffffffffffffffffffffffffeefffhhhhgghiiiiiiiiiiiiiiiiikkjjjjkkilkYX\Rakkkjkklllllllllllllllmmmnnoooooonq����|romnoppppnmnonopronprqqrrrrrrrrrrrrrrrrrqqrsrooqqppppqrsrponopqqrqqppoooooooopppopppppoooooppqqqpppqqpppoopqrqqqqqppppqqqqqrrsssrsssssssssssstttuuvvvvvwvvvvvvvvwwwwwxxxyyyyyxxxyyyyyyyyzzzzzz{|{|}~~~~~~~~~~~~}~~~����������������������������������������������������������������������������������cccbbbbbbbbbbbbbaabbbbccccccccbbccccccccccccccccccccccccccccccccccccccccccccccccdddccccbcbabcdcbbbbbbbbbbbbbccccbbbaaabbbccdddddddddddddeeeeeeeefeeeedddccdeedccfbgtzshbdddddddddddddddddefghgffdeeefffffefgebbcffeeeeffggggggggffffffffffffffggfeeefgijiiiiiiiiiiiijjjjjjjjjjjjjjjjjjklnh_DYerGgknljkklllmmmmllkllmmnnnmkplnsmqs�����oopnmnppmpomlnqpmollpssqqqrrrrrrrrrrrrrrrqqrrrqporrssrqqqrrrqppqrrrrqrrssqrqqpopqqqqqqqqqppqqqqrrqqqqqpppqqqqrrrrppqqppqrqqqrrrrsssssssssttuutuuvvvvvvvvvvvvvvwwwwwwwwwwwxxxxxxxwyzzzyyyzzzzz{{{{|}~~~}}}~~�~�������������������������������������������������������������������������������������ccccccccbbbbbbbbbbbbccccccccccccccccccccddddddddddddddddddddddddccddddddccccccccdddcccccbbbbcdccccccccccbbbcccccccbbbbbbbcddddddddddddddeeeeeeeeeeeeddddcdeeeedcebentqkgeeeeeeeeeeeeeeeedefgggffeeeeffffgfffebbcgffeeeffffffffffffffffffffffffffffeefgiiiiiiiiiiiiiijjjjjjjjjjjjjjiijjkllbHPgdtMfmpmjllkllmmmmllllmmnnnnksnwpiuh����ͅvnpponnppnpomlnqpmpmlpssrqrrrrrrrrrrrrrrrrqrrrrqppqrssrqqqqqqqpppqrrrrrrssrrrqppqqqqqqqqqqqqqqqrrrrrrqqqqqqqqqrrrrpqqqqqqrqqrrrrsssssssssstuuutuvvvvvvvvvvvvvvwwwwwwwwwwxxxxxxxxxxyzzzyyyzzzz{{{{||}}~~~}~~~������������������������������������������������������������������������������������dddddcccccccccccddccccccddddccccccccccccddddddddddddddddddddddddddddddddddddddddddddccccabccccdeddddddddbbcccdddeddcbbbbccddeeddeeeeeeeeeeeeeeeeeeeddddddeeffeedfddfhigdeeeeeeeeffffffffeeffffffeeeeffffhgffdbbdhggfeeffeeeeeeeeffffffffffffffeeffffghiiiiiiiiiiiiijjjjjjjjjjjjjiiiiijjkj\[`ffb\elomkmmklmmmmmmllmnnoooopqlmumo�����qoqqqpooqqoqpnmnqqoqnmorrrrrrrrrrrrrrrrrrrrrrrsrrqpqrssrqqqpqrqqppqrrrrrrssrssrqqqrrrrrrrrrqqqqrrrrrrrrrrrsqqrrrrrrqrrrrrrsrrrrsssssssssssstuuuuuvwvvvvvvvvvvwwwwwwwwwxxxxxxxyyyyyxyzzzyyzzzz{{|||}|}}~~~~~~~������������������������������������������������������������������������������������eeeeddddddddddddeedddddcddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddcccacddccefddddddddccccddddeeddccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeddeeffffeeffebbeebffffffffffffffffffeeeffgefffffffhgfedcdfhggfeeeeeeeeeeeeffffffffffffffffggggghiiiiiiiiiiiijjjjjjjjjjjjjjjjjiiiiifbvg_bXbfgiklmmmmmmmmmmmmnnoppppsmonsp�����tvorrqpoopppqqpnnpqprpnoqrrrrrrrrsssrrrrrrrrrrrssrrrqrssrqqqqqrsrqqqrrrrrrssssssrrrsrrrrrrrrqqrrrrrsrrrrssssrrrrrrrrrsssrrssrrssssttttttttttuuuuuuvwwwwwwwwwwwwwwwwxxxxxxyyyyyyzzzyyzz{{zzz{z{{||}}}}}}}~~~~�������������������������������������������������������������������������������������eeeeeeeeeeeeeeeeeeeeeddddddeeeeeeeeeeeeeffffffffffffffffffffffffeeeeedddddddddddeeddddccbceeddefddddddddddddddeeeeddddddeeeeeeeeeeeeeeeeffffffffffffffffffffffffdggddhhfffffffffffffffffffeeeffgfffffffffffddefgggfeedeeffffffffffffffffeeeffggggggghhiiiiiiiiijjjjjjjjjjjjjjjjjkkjjiihhdjoeg`fbkdcjollommmmmmmmnnopppqqnqsyl����Љsqrpqppoooopqqqnnpqqsrpoprrrrrrssssssrrrrrrrrrrrsrrrpqrsrrrrrrstsrrsssssssrrsttssrstssssssssrrrrrsssrrrrssssrrrrrrrrssttssstssssttttttttttttuuvuuuvwwwwwwwwwwwwxxxxxxxxyyyyzyyzzzzzz{{|{{{{{{|||}}~~}}}~~~~������������������������������������������������������������������������������������������������ffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffffffffffffffffffffffffffffffffeeeeeddeeeeeeeeeeedddddddeeeeefeeeeeeeeeeeeeeddeeddddeefeeeeeefeeeeeeeeffffffffgggfffffffffffffeghffggdffffffffffffffffffeeeeffffffffeeeffddfhhgfeeddeeffffffffffffffffeeffghhihhhhhiiihiiiiijjjjjjjjjjjjjjjjjjkkkjjihgkhodlfglqgdkomlommmmmmmmnnoppqqqnsoi~�����wqsupqpppomnpprqnmoqrssqpprrrssssssssssrrrrrqrrrrrrsspqrsrrrrrsttssstssssssrrsttsssstttttttttrrrsssssrsssssssssssrrrrstttssstssttttuuuuuuuuuuuvvvuvwwxxxxxxxxxxxxxxxxxxyyyzzzzzz{{{{{||}||||}}}}}}~~~~~~~~~������������������������������������������������������������������������������������������������ffeeeeeeeeeeeeeeeeeeeeeeeeeeffffeeeeeeeefffffffffffffffffffffffffffffeeeeeeeeeeeeeeeddddeeeeefeeeeeeeeeefffeedddeeedddddffeeeeffffffffffffffffffffffffffffffffffgggfeecbffffffffffffffffffeeeeffgggfffeeeffddfhhgffeeefffffffffffffffffffffghhiihhhiiiihhhiiijjjjjjjjjjjjjjjjjjjkkkkjjihojscek`pnkikmmmnnmmmmmmnnnoppqqqrpot������yroxrqqpppomnpprqnlnqsstspprrqsssssssssssrrrqqqqqqrrsspqrsrrrrsstssstussssssrrsttsssttuuuuuuuurrsssstttttttssssssssrrrstttssstttttuuuuuuuuuuuuuvvvvvwxxxxxxxxxxxxxxxyyyyyyzz{{zz{{||||}}~}}}}~~~~~~~~~~~�������������������������������������������������������������������������������������������������eeeeeeeeeeeeeeeedddeeeeeeeefffffeeeeeeeeffffffffffffffffffffffffggggffffeeeeeeeeeeeeddddgedeffedffffffffggffedddffedddccffeeeeffffffffffffffffffffffffffffffffffgfefggggffffffffffffffffffeeeeeegggfffeeeggedfhggffffffgffffffffffffffffgggghhiihhiiiiihhhiiijjjjjjjjjjjjjjjjjjjjjkkkkjjiwca_ffchlmijnolnnmmmmnnnnoppqqqrqx�����ŉtqwolqqpppomnqorqnlnqsstsqprrqssssssstsssrrrqqqqqqqrsspqrsrrrsssssrstussstssrrsttsssttuuuuuuuursssstttuuutttttssssssrrstttssssttttuuuuuuuuuuuuuvvvvvwxxxxxxxxxxxxxxyyyyyyzz{{{z{{|||||}~~~}}~~~~~~~~~~���������������������������������������������������������������������������������������������������fgffeefgggffffffeeeeeeeedddeeffgeeffffggeeeeeeeeeeeeeeeefeeeeefffffffffffffffffffffdccefeeeffeeeffffffeefffeedddcdddeeffhgffggfeffffffffggggggggffffffffffffffffefffffffffeeeeefgggggghhggggfffffffhgeddeeeeefghfgghhhggfffffffffffffggghhhhhhhhiiiiiiiiiiiiiiiijjjjjkkkjjjjjjjjijjjkkkkoonceniaijkkklmnppooonnnlkpplpsnqo�����׈}srsrpqpqqpoooppqqpooprrrsrqqqqrrrrssssssssrrrrqrrstuspqqqqrrrrssrsstssttsssttutttttsssuuuuuuuuttssssstrsttttsssstsssrrsssttttustuvvvvvwwwwwwwwwwwwwwwwxxxyyyyyyyyyyyyyzzzzzzzzz{{{{{|}~~~~~~~~��������������������������������������������������������������������������������������������������������fggfeffgffffffffffeeeeffeeeeeeeeffffffffeeeeffffffffffffffeeeffgfffffeeefffffffffffdccdfeffffffeffffgggggfffeedddddeeeffgfffggffffffffffffffffffffffffffffffffffffffffffgffffghhgghhhhhhgggggffefefhhedeeeefgghighhhhggfffffffffffgghhhhhhhhhhhhiiiiiiiiiiiiiiiijjjjjjjjjjjiijjjjjjkkkllmmldfmhbjlnmjjkmpooonnnnmomnrqoun�������~vpqsrqrqqqqpppppqqpppqrrrrrqqqrrrrssssssssssrrrqssstutqrrrrrrsssrrsstssttsssttttttttttttttttuuuttssssstrsstttsssstttsssttttttttstuvvvvvvvvvvvvvxxxxxxxxxxxxyyyyyyyyzzzzzzzz{{{{{{|{{{||}}}~~~~��������������������������������������������������������������������������������������������������������fggfffggggffffffgffffffgfffeeeddffffffffffffffggffggggggggfffgggffffeeeeffffffffffedccdeeeffffeeeeffgghhggfffeeedddeefffffeffgffggffffffffffffffffffffffffffffffffffffffeeeefghhhhhhhhhhghhhhgeeeefhhfeeddefhiiihhhhhggfffffffffgghhiihhhhhhhiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiijjjjjjkklllljkffjebkmomjijmooonnnnnmmonmoqq������yuqoqsrqrsrqqrrqpppqqpqrsqrrrrqrrrrrsssstssssssssrststvusssrrrrrrsssstttsssssssttttttttttttttttttttssrsstsssttttsstttttttttttttttstuvvvvvvvvvvvvvxxxxxxxxxxxxxyyyyyyzz{{{zzz{{|||||||{{|||}}}~~���������������������������������������������������������������������������������������������������������������fgggffgggggggggggggggggggggfffeeggfffffeggggggggggghhhhhggggghhhggffffffffffffffffeddddeeeeeeeeeddeefffgfffffeeedeeeefffeeeefgfffffffffffffffffffffffffffffffffffffffgggeeeeefghhhhhhgggghhihgedeefiigefcdfghiiihhhhhggggggggggggghiiihhhhhhiiiiiiiiiiiiiiiiiiiiiiiiijjjjjiiiijjijjjkkllmjkgfgablmnmkjklnnnnnnnnmmnrqkw������΅vrqqssrqrsqpprrqoppqqqrssqqrrrrrsrsssstttssssssssrttstuussrrqqqqqttttuuutssssstttttttuuuuttttuuuuttssssstsssttttttttuuuuuuuutttttttuvvvvvwwwwwwwwxxxxxxxxxxxxxyyyzzzzzzzzzz{{||}}}}}||{||||}}~����������������������������������������������������������������������������������������������������������������fghggggghhhhhhhhhhhgghhhhhhhhggggggggfffggghhhhhhhhhhhhhhhhhhhhhhhhhggggffffffffffeeeeeeffggggffffffffffeeeeeeeeeeeeffffeeeefgffeeeffgggfffffffffffffffffffffffffffggggghhgggghhggggggggghiihgfeeefiigffeefghhhhhhhhhhhhggggggggfgghhhhhhhhiiijjiiiiiiiijjjjjjjjiiiiijjjjjiiiijjiijjkkllnkmiggcfnmklmmlknnnnnooopolqsm������ݜrvrrrsrqqrsqpprrqoppqqrsssqqrrrrrsssssttttssssssssrttsstuttsrrqqrrvuuuvvvussstttttttuuuuvvuuuvvvvvuttssstttttttttuuuuvvvvvvvuuuutttuvvwwwvxxxxxxxxxxxxxxxxxxxxyyyyzzzzzzzzz{{|}}~~}~~}|||}|}}~������������������������������������������������������������������������������������������������������������������fghhgggghhhiiiiihhhhhhhhhhhhiiiihhhhgggghhhhhhhhhhhhhgggghhiiihhihhhhhhhfffffffffeeefgffiiiiiiiihhhgggffeeeeeeeeeeffffffffeffgffddeffghhffffffffffffffffggggggggggggggggihggfffgffffggggghhihhgfgfghhgfggggghhhghhhhhhiiggggggggfffggghhhhiiijjjjjjjjjjjjjjjjjjjiiijjjjjjiiiiiijiijjklllompjhjhlpmkknnlinnnoooppslsoc������ꢁxqqqqqqqqrrqppqqpoqppqrsrrqrrrrqrrssstttttssstttttrttrrtuuuutssstuvuuvvwvvssttuuuuuuuuuuvvvvvwwwwwuutttttuuuutuuuvvvvwwwvvvvvvvvvvuvwwxxwwyyyyyyyyyyyyyyyyxxyyyyzzyyyz{{|||||}}~~~~~~}}}}~~~~�������������������������������������������������������������������������������������������������������������������fghhhggghhhhhhhhhhhhhhhhhhhiiiiihhhhhhhighhhhhhhihhggfffghhiiihhhhhhhhhhfffffffffeefghhgiijjjjiiiihhhgggeeeeefffffffffffgfffggffdeeffghhffffffffgggggggggggggggggggggggghhgfeeeffffggghhghhhhhhghgghhffgiihhhhiiiihhhhiihhhhhhhhggfffghhhhiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiijjjkklmmmnnqiejkonlklnomknnnoppqqqmur}������uywrpqqqqrrrrrrrqqqqqppqrrqprrrrqqqrsssttttustttttttsuusrtvvvutsttuvvuuuvvvusttuvvuuvvvvvuuuwwwwwwwwvvuttuuuvvuuuvvwwwwwwwwvwwwwwwwwvwxxxxxxyyyyyyyyyyyyyyyyyyyzzzzzzz{{|}}}}}}}}~~~}~~~}}~��������������������������������������������������������������������������������������������������������������������fghihgggggggghhhhhhgghhhhhhiiiiihhhiiiiigghhhhhhihhgfeeeghhiiihhhhhhhhhgfffffffffeefhiihhhiiiihhhhhgggffeeefffffffffffffhgffggfeeefffggggggggggggggggggghhhhhhhhggggggghiihgggggfffghhiighhhhhhhihghgffgjjiiijjkjjihhhhhhhhhhhhhhgfffghihhiijjjkkkkkkkkkjjjjjjjjjjjjjkkkiiiiiiiikkllmnnnmopfbhimkkklnoonnnoopqqqmyjx������҉vsmurrrrssrqrsssrqqrqppqrrqorrsrqqqqssttttuusttttttttvusrtvwutssstuvutttuvuustuvvvvvvvvvvuuuwwwwwwwwwvuuuuuvwvvvvvwwxxxxwwvvwwwwwxxxwwxyyyxxyyyyyyyyyyyyyyyyyyzzzz{{|||}}}}}}}}~~~~~}~~~~~������������������������������������������������������������������������������������������������������������������������gggggghhggghhiiijihgghijiiiiiiiiiiiiiiiihiijjjiigggggggghhhhhggfggggggggeghgfefhdefhiiiiiiiiiiiiiiiihhhhgggggggghhhhhhhhihhhhhhhgggggggggggggggfffffffffggggggggfgghhggfhhhhhggghhhhhiiihhhhhhhhkfhjefjhihkinncmjjjiggikmkgfghhfhhhhhhhhgggghijkjjjjjjjjjjjjjjjjjjjjjjjjjiihiijjlllmnnooriq`ckgaonnnooonkpolnrstsnt�������wttupssssssssssssrrrrrrqqrrqpqrrrqqrrttsttuttssrsttttttsrstuvrrsrrrtuutstvwxwvuttvwwwxwwwwwwxuvwxwwvwvvvwwwwwxxxxwwwvvvvvvvvvwwwwwwwwwwwwxxxxyyyzzzzzzzzzzzz{z{{{||}}}}}}}}}}}}}}~~~~��~���������������������������������������������������������������������������������������������������������������������hhhhghhhgghhiiijjjiiiijjiiiiiiiiiiiiiiiiiijjjjiigggggggghhhhhhggggggggggghihgfghefghiiiiiiiiiiiiiiiihhhhhhhhhhhhiiiiiiiiiihhhhhhgggggggghhgggggggggggggggggggggggggggggghhhhggggggghhiiihhhhhhhhlhggdeikjkoieklklkiikljfehjiggjniiiiiiiihhhhijjkjjjjjjjjjjjjjjjjjjjjjjjjjjjiijjklllmnnoomqpmpko_nnmnoooomjmpprrlns�������}rurpussssssssssssssrrrrqqrrqqqrrrrqrrttsttuutsssstttttssrrsttrsssrrsuvutuvwwvvuttvwwwxwwvvwwxvwxxxwwwwwxxxxxxyyyxxxwwwwwwwwwwxxxxxxxxwwwxxxxxyyyzzzzzzzzzzzzzzz{{|}}~}}}}~~~~}}}~~~~~��������������������������������������������������������������������������������������������������������������������������jjiihhhihhiiijjjkkkjjkkkiiiiiiiiiiiiiiiijjjkjjihgggggggghhhhhhhhggggggggiijjihhgfghiiiihiiiiiiiiiiiiihhhiiiiiiiiiiiiiiiiiihhhhhhhhhhgggghhhhhggghhhhhhhhhhhhhhhhhhgggghhhhhhggggfgghhijjiiiiiiiiihdcfimtſ���umjoppmjiiimlkklkihjjjjjjjjiiiijjkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjkkklllmmnnnompomplnennmnoppprnrtprupt���������utotqrsssssssssssssssssrqqrrrqqrssrrrrttttuuuttssttuuttttssssstttssrstwvuuvvvvvuttuvwwwwvvvvwwwxxyxxxyyyyyzyyyzyyyyyyxyyyyyyyyyyyyyyyyxxxxxxxxyyyzzzzzyyzzzzzzyzz{|}~~}~~~~~~~~~~~~��������������������������������������������������������������������������������������������������������������������������������kjjjijjjiiijjjjjjkkkkkkjjjjjjjjjiiiiiiiijjkjjihhhhhhhhhhhhhiiihhggggggggjjjjjihghhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhiiihhhhhghhhhhhhhiiiiiiiiiiiiiiiiihhgghhiiiihhhhggghhiiijjjjjjjjjgiedjlnw�����­��vjinpoookhkopmjllkkkkkkjjjjkkllkkkkkkkkkkkkkkkkkkkkkkkkkkkkklllmmmmnnnnphsefm`honnnoppprnmpqpsz�������Ӆszyntvossssssssssssttttsrqqrrrrqrsssrrrttttuuuuttttuuuuuuvvuuttuuuttssswwvvvvvvvuuuuvwwwwvvvvwwwxxyxxyyzzzzzzzyyyyyyyyyzzzzzzzzzzzzzzzzyyyyyyyyyyzzzz{{zzz{{{{{zz{||}}}}}~~~~~~~~~���������������������������������������������������������������������������������������������������������������������������������jjjjjjkkjjjjjjjjjjkkkkjjjjjjjjjjiiiiiiiijjjjjihghhhhhhhhhhhiiiiihhhhhhhhkjjjjihgijjjjiiijjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiijiiihiiiiiiihhhhiiiiiiiiiiiiiiiijjjjjjjjiihhhhiijjjiiiiiiiiiiiiijjjjjjjjhkhhmkiox�������Һ��|oknpoppomormlllkkkkkkkkllllllllllllllllllllllkkkkkkjkkkllllmmmnnnnnohtihnZiponnopppsnirzpz�������ߙ|toxwqvsssssssssssssttttsrqqrrssqrstsrrrutttuvvuuttuuvvuuvvvvuutvvuuutssvvwwwwwwvvvvvvwxxwwvvwwxwxxxxxyyz{{{zzzyyyyyyyyyzzzzzzzzzzzzzzzzyyyyyyyyyyzzzz{{z{{||||||||||}}}}}}}~~~~������������������������������������������������������������������������������������������������������������������������������������jjjjjjkkjjjjjjjjjjkkkkjjjjjjjjjjjjjjjjjjjjkjjihhiiiiiiiiiiiiiiihhhhhhhhhjjjiiihhjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiijjiiiiiijjjiiihhiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkjjjjiijjjjjjiijjjjjjjjfhikmjhjimj���������ү�uvqllllmpnmmmllkkklllllllllllllllllllllllllkkkkkjjkkllmllnnnnnnnnmollllhvqponopppspmstp��������uxynvzptssssssssssssssssstsqqrsssqstttsrruttuvvvvuuuuvvvvuuuvvuutwvvvvutsuvwxxwxxwwwwvwxyxxwwwwxxwxxxxxyz{{{{{zzyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzyyyzzzz{{{{{|}}}}}}}}}}}}}~~~~~~~~����������������������������������������������������������������������������������������������������������������������������������������jjiiijjjkkjjjjjjjjkkkkjjjjjjjjjjkkkkkkkkkkkkkjjijjjjjjjjiiiiiihhhhhhhhhhjkjihhikkkjjjkkkjjjjjjjjjjjjjjjjiiiiiiiijjjjjjjjjjiiiiiikjjjiiihiiiijjjjjjjjjjjjjjjjjjjjiijjjjiikkjjjjiijjjjkkkkjjjjjjjjeeimlkkjifojo����������˟��{yupooonnmmllkllmlllkkkkkkkkkkkkkkkkkkkkkkjjjkllmnnmmnnnnnnnnookjikf{qpnnopqqnopmq����������xpwxutsspssssssssssssssrrtsqqrsttqstutsrruttuvvvvvuuuvwwvuuuvvvvuwvvvwvusuvxxxxxyxxxxwwxzyxxxxxxyxxyxxyz{||||{{zzzzzzz{{{{{{{{{{{zzzzzzzzzzzzzzzzzzzzz{{{z{|}~}}}~~~~~~~~����������������������������������������������������������������������������������������������������������������������������������������jjiiiiiikkjjjjjikkkkkkkkkkkkkkkkllllllllkllllkkjjjjjjjjjjjjjiihhhhhhhhhhjkkjhhjmkkjjkkkljjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiikkjjjiiiiijjjjjjjjjjjjjjjjjjjjjjiijjjjiijjjjjiiijjkkkklljjjjjjjjkglokjlijlohknx�����������ɯ��trppponnmmkllmllkkkkkkkkkkkkkkkkkkkkkkjjjjllmnnonnnnnnnnnnpitkfkPmponnoqrrrtuq��������΅yywmurovrrssssssssssssrrrrtsqqrsttqstutssruttuvwwvvuuvwwwvvvvwwwwwwvvvwwutuwxyxwxyxyyxwwyzyyxxxxyyyyyyyy{|}}}||{zzzz{{{|||{{{{{{{{{{{{{{{{{{{zzzzzzzzzz{{{z{|}}}}|~~~~������������������������������������������������������������������������������������������������������������������������������������������������kkjjjjjkkkllmmmlnmkjjkkkkkkklllllmmlkkkkllkkkkjjiijjjjkkijjjiijjiijjjkkkllkkkklljkkkkkjjllkkkkjjkkjjjjjjkkkkkkkkkkkkkkkkkkkkjjjjlkkkklkkjjkkkkkkjjjjjjjjiiiijjjjjjjjjiiikkkkkkjjjjjkkkkkjkkkkjihhijkkkklkkkkklnox������������ѵ�|vstsonpnmnqpljlklmmlkkkkkklllmmlkjjjjjjllllmmmmlllmmnnnmqoOLc]]skuqlssvn{g��������ޒwvxttttttsrrrrsssssuttssttuutrrrstutsrsttsrstvvvvuvvvvwwwwwxwwvvwwxxwwxyxvtuwxwxyywxyzyxwxyzzyyyyyyxyyzz{{|||||{{zzzzzz{{{{{{{{{{{{{{{{{{{{}}}}}|||{{{{{|}}~}}}}}}}~~~~~�����������������������������������������������������������������������������������������������������������������������������������������������llkkjkkkhijlnpqrqpnmmllkkkkkllllllmlkkkllllkkkkjijjjjkkkjkkjjjjkjkkkklllmlllllllkllllkjjkkkkkkkkkkkjjjjjllllllllkkkkkkkklkkkkjjjlkkklllkjkklllkkkkkkkkkkjjjjjjkkjjjjjjjjjjkkkkkkllkkkjjjkkkkkjihijkkkkklllmmmmoplieq������������Φ�vupnsprojimonkkllllkklllllllllkkjjjkklllmmmmnlmmmmnnnkrgcmsiQgqomqsokyj��������xqxstttttsssssstttttttsstttttsrrrtvtsrsuutsstvvvvvvvvvwwwwwxxwwwwxxxwxxyxwuuwxwxyyxxyzyxxxxyyyyxxyyyyzz{{||}}|||{{zz{{{{{||||||||||||||||||}}}}}}}}||||||}}~~~~~}}}~~~~~����������������������������������������������������������������������������������������������������������������������������������������������mlllkklljklmnoppqponnnmlllllllkkklllkkkllllkkkkkjjjjkkkkkllkkkklllllmmmmmllmnomllllllkkjkkkkkkkkkkkkjjjjlllllllllllllllllllkkkkkllkklllkkkklllkkkkkkkkkkkkkkkkkkkkkkjjjjjjjkkkkklllkkjjjkkkkkjjiijkkkklllmmmmmnopopmem����������������{spqqqrrnjpommmlkjllllllllllkkkkkkmmmmmnnnnnnnnnnnlrlmtri[muottmtss�����������rtwtsstttttttttttuuutttssttttuutrrtvssstvvutstvwvvvvvvwwwwxxxxxwwxxxxxxyyxwvvwxxxyyxxyyyyxxxyyyyyyyyzz{{|||}}}}|||{{{{{{|||}||||||||}}}}}}}}||}}}}~~}}}}}}}~~~~~~~~}~~~����������������������������������������������������������������������������������������������������������������������������������������������mlllllllmmmmmllkmmmmnnmlmmllllkkkkllkkllllllkkkkjkkkkkllllllkkllmmmmmmmmlkkmppnmllllkkkkjjkkkkllllkkkjjjlllllllllllllllllllkkkkklkkklllkkkkkkkkkkkkkkkkkkkkkkllllllkkkkkkkkkkkjjkkkkkkllkkkjjjjjjkllllllklmmlmmnmjjmooqs�������������׸��yqrrnovnmmmmmnmlllllmmmllllllllmmmnnnnnoonnnnnnqltifg\iqnnslkvpv��������Ȇ�rrxqsstttttttttttuuuttttttttsuvurqtvsssuvwvustvwwvvvwwwwwxxxxwwwwwwxxxxyyxwwwwxxyyzyyyzzyyyyzzzzzzzy{{|||}}}}}}|||||||{{||}}||||||||~~~~~~~~}}}}}}~~~~~}}}}}~~~~~~~~������������������������������������������������������������������������������������������������������������������������������������������������lllllllljkkllllljjklmmmlmmmlllllkllllklmmllllkkkkkklllllllllkklmmmmmmmmmjjknppomkkkkkkkljjkkkkllllkkkkkklllllllllllllllllllkkkkklkkkklkkkkkkkkkkkkkkkkkkkkkkklllmllllllllllllkkkkkkkllllkkjiiijjkklmlllmllmmmmmnltsignohomw�������������ճ�~xrmlstttqonmllllmmmnllmmmmmmnnnnnoooooooonnnsiohhh[iqqrlourt��������و�wxuntttttttuustttttttttuuuutttvwvsrsvtssuvwwustvwwwwwwwwwxxxxwwwwwwwwxxyyyxxxxwxxyyzzzzzzzzzyz{{{{{{{|||}}}}}}}|||||}||{{{|}}||||||||}}}}}}}}~~~~}}}}~~~~}}}}~~~~������������������������������������������������������������������������������������������������������������������������������������������������������lmmmmllljjkklllllllmmmlkmmmmllllllmmlllmmmmllllkkllllmmmllmllllmnmmmmmmljkmnooonllkkkkllkkkkkkkkkkkkklllllllllllllllllllmllllkkklkkklllklkkkkkkkkkkkkkkkkkklllllmmmmmmmmkllmmmmmmmlllkkklkjiijjkklmmmlmmmnoonnnnqlknlhmwnpnik��������������ʤ�{vqqqqppoollmmmmmmlmmnnnnnnnnoooooppoooooopqknohkosvumtpv����������{zsvxlwttttttuussssssttuuuvvuuutvwwtssutsstvwwvsuvwwwwwwwwxxxxxwwxxxxwwxyyyxxxyyxxyyyz{{{{{|{{z{{|}}||{}}}}}}}}}||{{|}}||{{{|}~}}}}}}}}}}}}}}}}~~~~~}}}}}~~~~~}~~��������������������������������������������������������������������������������������������������������������������������������������������������������mmnnnmmllmmmmlllnnmmnmmlllmmmmmmmnnmlllmmmmlllllllllmmmmmmmmllmnnnnnmmmmlnpponnoonmlllllkkkkkkkkjkkkllmmmmmmmmmmmmmmmmmmnnmmmmllmlllmmmlmmllklllkkkkkkkkllllmmmmnnnnmmmmllmmnnnnmmmllkkkmlkjjklllmmmmmmmmnooonnnmmlnqsoioqutnku�������������������tnnrttnnnmmmllmmnooooonooooppppppppooontoolhpxpppqmq���������zo{tuqtuutttttussssstttvvvwwvvvuvwwuttsutssuvvvsuvwxwwxwwxxxxyyxxyyyyxxxyzyxxyyyxxyzyz|}|{|}}|{{{|}}}|{}}}}}}}}||{{{||}}}|{|}~~~~~~~~~~~~~~~~~~~~~~~~~}~~��������������������������������������������������������������������������������������������������������������������������������������������������������nnnoonmmllmnooopnmmmnnmllllmmmmmnnnnmllmmmmmlllllllmmmmmnnnnmmnooonnnmmmnprqnmmoqponmlllllkkkkjjjjkklmmmmmmmmmmmnnnnnnnnoonnnnmmnmmmnnmmnnmlllmmllllllllmmmmmmmnnnnnnnnmnnnnnmmmllllllllnmlkkkmnlmnnmmmmmmnnnmmmmmmmmnoprmkoommpnx��������������վ��yurpoonnmlkkmmnoppoooooopppppppppppppmujiphtoltwk���������Ćzlvy{popvuttttttssssttttvvwwwwvvvvwwvutsvtrstvvvsuvwxxxxwwxxxxyyyyzzzzyyxyzyxxyzzxxyzyz|}}||}}|{z{|}}||{}}}}}}}}|{{z{{|}~}|||}~~~~~~~~~}~~~~~}~���������~~��������������������������������������������������������������������������������������������������������������������������������������������������������oonmmnoonnooooonnnmllmmmnnnnnnnnppoonnmmoonnmmllkllllmnnnoooooonponmmmmmnoooooooopppoonnlkkklmkjllmmmmmnlmmnnoopoonnnnooooooooonooonnnnnmnnoonnmlmmllkllnnnnnnnnppppooonmnnmmnnmooonllmolmnooonmmmmlmmnnmmmnnnnnnnnnnnnnooooopppmprv���������������ڷ��vspnonkkmkkmpqonmknonmoqqoooooopprpoheigim|il���������׎vvttttrrtstuvvuuvvvvvvvvvvvvvvvvvuvvuuvtquxukwvvuvsty{ywywwxxyyyyxxyzyyxwzzzzz{{{{zyyz|}~|||}}}}}y|~~~}}}}|{{{}}}}||||~~}|}}����~����~����������������������������������������������������������������������������������������������������������������������������������������������������������������mmllllmmnooooonnnmmlllmmnnnnnnnnooooonnnoonnnnnnmmmmmmnnoooooooooonnmmmmooooooooooppponnmllllllkmmmnnmmmmmmnooppoonnnnoooooooonnooonnnnnmnnnnnnmmnnnmmmmnnnnnoooopppoonnmnomnoonpponnmnnlmnnoonnnnnnnnoonnnnnooopoooooonooooppppvqnoot����������������Ь�vrqonmopnllprsnppppqpnppoooppppqriipkgrts{����������v}vtsttrrttuvwvvvvvvvvvvvvvvvvvvvvvwwuuvusvywnwuvvvssxzxwyxxyzzzzzzzzzzyyyzz{{{||||{zzz|}}||}}}}}}x{~}}~}}}}||||||||||||}~}|~��~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������onnmmnnooooooonnnmmmmmmmnnnnnnnnnnnoooppoooooooonnnmmnnnooooooooooonnnmmooooooooooppponmmmmmlkllmnnonnmlmnnnooppoooooooooooooonnooonnnnnmnnnnnnmoooonnnonnoooooooooooonnmnonnppoppoooonmllmnooonoooppppoooooooooqpppoooooooppppqqopsojmuo����������������ǣ�xvvtpommqupgooooqrqpppppppqqqtvnnvohsjt���������vvwutstsrrsuvwwwvwwvvvvvvvvwwwwwwwwwwwvvwvttxxqwvxxwttwyxxzyzz{{{{{}|{{zzzz{{{||}}}|{{z{|}}}}}}}}}}xz|}}~~}}}||}}}}}}}}}}}~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������poooooopooooonnnonnnnnnnoooooooonnnoopppooooonnnnnmmnnnnoooppooonnooonnmooooopppooppponmmmnmkklmmnooonmlnnoooppppppoopppooooooooooooonnnnnnnnnnnopppooopooooooppoooooonnlnnmnppopoooppnllmmnoooooppqqppooooooooopppppooopppppqqqnsusrttqvln������ȹ��������ۼ��~uutroopqrrqppqqqpooppqqqrswpnrkhrow�������컄z~sutstsrrsuvwwwvvwvvvvvvvvwwwwwwwwwwwvwwvurvwtvwz{yvuxzz{|zz{{{||{~}}|{{{{{{{|}}~~|||{{{|}}}}}}}}}z{|}~~~~}}|||}~~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oonnnnooooooonnnpppppoooppppppppooooooopooonnnmmmmmmmnooooppppoonnoooonnooopppppooppponnlmnmkklmmnopponnooopppppppppppppoooooooopppooooooonnnnooppppooopooppppppppppoonnlnnmnppopooopomkmmnnooppoppqqppooooooooooooooppppppqqqqrpttppsrmtpquu����⿦���������ϸ��vrttssttsrqoopoooopqrrrptpkhdinvz�������Ňw|xxvtsttrrtuvwwvvvvvvvvvvvvwwwwwwwwxwvvwwvvruwwtwz{zxvwyz{|zzz{{{{{}}}}}|{z{{|||}}~|}}|{{|}~~~~}}}}||}~~~~~}}|||}~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ooooooooooonnooopqqqqpppppppppppppppooooooonnmmmllmmmnooppppppppnnoooooopppppppppppooonnmmmlllmnnnopppoopppppppppqqqqqqpppppppppqqqppppppoooooopppqppoppppppppqqpppppooomnonnppoooooonmkoooooppppppppppooooooooonoooppppqqqqrrrrroptvtsuqpopppty����Ҷ�����������Ϥ�zvttqpprrpooooppqqrsuqtrkfeopx������ؐvyuyuvututsstuvwwvvvvvvvvvvvvwwwwwwwwxvvwwwwxvvwyrwyyywvvxz{{zz{{{{{{||}~}|{z||||}}}}|}~}{{{}~~~~~~~}~}}~~}}}}}}}~~���}~~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oppppppoonnnnopppqqqqqpppppppppppppppooooooonnnnnnmnnnooppppppppooooooopppppppppppoonooonmmmnoppooppppooqqqqqqqqpqqrrqqppppqqqqqrrrqqqqpqppppppqpqqqpppqpppqqqqqqqqqpppooppooqqpppqponmmppppppppqqpppppppppppppooppppppqqqqrrrrrtrrturqrsvvuuurnmw����ˬ�~x��������޾��|}vpprrqqrrqqrrrsururnmmv{x������ݟuyxu|qwuuuussuvwxxwvvvwwwwwwwwxxxxxxxxywvwxxxyzww{qwyyywvvxz{{{||||||||}}}}}}|}}}}}}}}}~~}{z{|~~~~~~~~~|{}~}}}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mnnnnnnmnnnnnopqppqqqppoooooooooppppppppooooooooooonnooopppqqpppoonnoopppppppppqqponnnooonmmoqqqpppppoooqqqqqqqqqqrrrrqqqqqqqqqrrrrrrqqqqqppppqqqrrrqpqqqqqqqqqqqqqqqppppqqppqqppqrqomnnqqqppppqrqqpppqqqqqqqpppqqqqqqqqqqrrrrssqvvqqwwrssrqqsuvvnoy����Ψ|c_gy�������ҷ��xrsssrttssrrssqpsomrrwzq������yxuszq{wvuvuttuvwxxxwwwwwwwwwwwxxxxxxxxzwvxyxxz{wv|qy{zzxwwx{|{}}}~~~}}}}}}}~~~~~~~~~~}~~{z{|~~~~~~~}{z}~}}~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������nooppoonoooppppqppqrqponoooooooopqqqqqqpoooooooonnnnnnnnoopqqponnnoopqqqppppppqrpppppppqlmopqrqqrrqppoppopppqqqqrrrrrrrrqqqqqqqqssrrrrrrqqqqqqrrsrrqqqqqrrrrrrrrrrrrqqqqrrrqqqqqsppqpmotpppqqqqrrqqppqqrrrqqqqqqqqqqrrrrrrrrssssttttsssssssstuutwpptrp~���ՙaPRQUev������ѯ�squwusvskrxoqpvtmgpgp������zyxwvvuuwwvvvvvvvwxxxxwwwwwwwwwwxxxxyyyyyyyyzzzzzvvyzxy|{xwyzz|~||||||||}}}}}}}}~~~}}}}}~~~~}|zy����|z{}~|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oppqqppoppppppqqpqqrqqpoppppooooppqqqqppoooooooooooooooopqqrrqonnooppqqqppppppqrqqppqqpplmnpqqqqrqppppppppqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrqqqqqqqqrrrqqqqqrrrrrrrrrrrrqqqqrrrrqqqqrrqonoqqppqqqqrrrrqqqqrrrrqqqqqqqqqrrrrrrrssssssttttssssssssttttssuvrmouv���xQLNMHLZ|�����ܴ�~}xrpstqqtuvrdgruk�������~yxxwvvvvwwwvvvvwvwxxxxwwwwwwwwwwxxxyyyyyxxyyyyzzzwvyywx{zyxxy{|}}}}}}}}}}}}}}}}}}}}~~~~~~~~~~|{z~~����}{{}~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pqqrrqqpqqqqqqqqqqqqqpppqqqqpppoppqqqqppppppppppqqqqqqqqrrrrrqpooooppqqqppqpppqrrqqqrrqolmnoqqqqrqoopqppqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrqqqqqqqqrrrqqrrrrrrrrrrrrrsrqqqqrrrrqqqqrtrmmrsoqqqqqrrrrrrqqrrrrrrqqqqrrrrrrrrrssssstttttssssssssrstttstvuqopoliq����jTMQSMGOe������Ĝ�wqwqsxqlqrku|ie�����Έvvwwwwwwwxwwwwwwwvwxxyxwwwwwwwwwwxyyyyyyyxxxyyyyzzxxywuw{yzyvw{}|~~~~~~~~}}}}}}}}}}}~~~~~~~~~}||}}~�����~||}�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qqrrrrqqrrrrrqqqrqqpppppqqqqqppppppqqpppqqqqqqqqqqqqqqqqqrrrrqpopppqqqqrpqqqppqrrqqrssqnmnopqrrrsqooqrqprrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrqqqqqrrrrrrrrrrsrrrrrrrrrssrrqqrrrrrrrrrsspnorrpqqrrrrrrrrrrrrrrrrrrrrrrrrrsssssttttttttssssssssrrrsttttturnotwvsupkw���sUIINLAIHUr�����̼�~zvpuxutmomj�����Ύv~uuvwxxwwxxxxxxwwuvwxyyyxxxxxxxxxyyyyyyyyyyyyzzzzzzyyvtvzy|zuu{~|~~~~~~~~~~~~~~~~}}}~~~~~~~~~~|}~�����~}|}~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qqrrrrqqssrrrrrqqqpooopppppqqqqqqqqqqqqqqqqqqqqqrrrrrrrrqqqqqqppqqqqqrrrqqrrqqqqsrrrsrpooppqrrssrqppqrrqrrrrrrrrssssssssrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssrqrrrrrrrrrrtpnprqqsrrrrrssssssssssssrrrrrssssssssssttttttttrrrrssssrrrstuutqqrrsstuvuttrrw~�m]SQPDGBJJH_��������~|wut}xtl���ѫ�pvyuuvwxxxxxyyyyxxxtuwxyzzzyyyyyyyyyyyyyzzzzzzz{{{{{{zyvtvyy|{vu{~}~~~~~~~~~~~~~~~~}}~~~~~~~~~|}~������~}|~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qqrrrrqqrrrrrrrrqqppoopppppqqqrrrqqqqqqrrrrrrrrrrrrrrrrrqqqqqqqqrrrrrrrrqrrrqqqqrssrrqpoqqrrrrrsrrqqqrsstssssssrsssssssssssssssssssssrrrrrssssssssrrrrssssssssssssssrrrrrrrrrrrrrnnrtqqtsssssssssssttsssssrrrsssssstttttutttttttsssssssssrrstuuurpqtspquotvtrsrormi^TNGGL@AIC?a�����ᯓ����nli{����xu~vtvwwxxxxxyyzzzyyxtuwxyzzzyyyyyyyyyyyzzzzzz{{{{|||{{{ywvvwz|{xwz}~~~~~~~~~~~~~~~~~~~~~~~~~}~��������~}}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qqrrrrqqrrrrrrrrrrqqqpqqqqqrrrrrrrrqqrrrrrrrrrrrrrrrrrrrrrrrqqqqrrrrrrrrqrssrqqqrssrpoopsrrrrqrrqsssrrsuuuuuttttssssssssssssssssttsssssssssstttttssrrrrrsssssssssstsrrrrrrrrrrrrnoqrssstssssssssssttttssssssssstttttttttttttttttttttuuuutttttuttsqqsssuwvstwsloxtywtmc[RKIIIGCCG^���������к�����ߜrxr{sxxxxxxxyyzz{zzyxuvwyyzyyyyyyyyyyyzzzzzzzzz{{{{||{{{zxxwv{{{zyz}~~~~~~~~~~~~~~~~~}}~~~����������~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qrrssrrqqqrrrrrssrrrrqqqsssrrrrrsrrrrrrsrrrrrrrrsssssssssssrrqqqsssrrrrrrrssrqqqqstromoqssrqqqqqqsutrqtwvvvvuuuusssssssssssssssstttttsssttttttttttssrrrrsssssssssttsrrrsrrrrrrsskpsrrtusttttsssssttuuttsssssssttttttttuuttttttssuuuvvvvvvuututtspqsuwwuqstssuwtoswqvxsrgdd\QORNEHOQk������ְ������|{|yt|zyyxxxxyyzz{{zyxvwxyyyyxyyyyyyyyzzzzzzzzzzzz{{{{|{{zzywu{{{|zy|�~~~~~~~~~~~~~~~~~�~}}}~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ttsssrrrrrssssrrqrrsssrrssssssssrrrrrrrrrrrrrrrsqqqrrrrrrrrrqqrrrsssssrqrssttssrqsrpprrqqrsssrrqprtuutttwwwwvvvvvuttsrrrssssstttttttttttsssssssssssssssssrrrrrrsrsttsrrrqqqrttrppqqrstttrsuutsssttttttttssrrrstttttttttttuuuutuvuuvvwvvuwvvvvusrvvvuuuttuttttuuvvutsstuusszpjxvgFFAAQq����c[q���}yzw{zyxxxyzzzzyyzzzyvwxyyzzzzzzzzzzzzzzzzzzz{{{{{{{{|{{{zyvtz}|z{z|�~~}~~��~~~~��~����������~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������sssssrrrrrsssssrrsssssrrssssssssrrrrrssssrrrrsssssrrrrrrsrrrrrrrssssssrrsstttsrrrssqqssrsttttsrqqsuvvuuuwwwwvvvvvuutssssssssssssuuuuuuuusssssssstsssssstssssssssssttsrrrrrrrssrprsssstttrtuutsssttttttttssrrrstttttttttuuuvvuuvvuvvwwwvvuvvvtsttwwvvvuuuuttttuvvwvuttttu{qqvwwjXFDBBEILL�uTAANx�ߔ~yx{wyzyyxxyzzzzyzz{zzwwxyyzzzzzzzzzzzzzzzzzzz{{{{{{{{{{{{{yvtx||z{z|�~~~����~~~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ssssssssrrsstttttttttsrrssssssssrrssssssssssssttttttssssssrrrrrrtssssssssstttssrsssrrstsuuvuutsrstvwvvvvwwwwvvvvvvuuttttssstttttuuuuuuuutttttttttsssssstssttttsstttssrsssrrrsrqqutttttttstuutssstttttttttssrsstttttuuuvvvvwwvvwwvwwxxwwvuvwusrtwwwwvvvuuuuuuuvvwwwvutttuvvvwlYLC=?BB?;=AN9CBHIQa��{{wyvzzyyyyyz{{zzz{{{{xxxxyyzzzzzzzzzzzzzzzzzz{{{{{{{{{{{||zwuw{|z{z{��������������������~~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ssssssssssttuuuuvvvvutsrssssssssrrssssssssssssttuuuuttttssrrrrrrssrrrrssssttttssssssstttvvvvuuttsuvwvvvwwwwwvvvvwvvuuuuuuuuuvvvvuuuuuuuuvvvvvvvvttttttttsstuutssuuttsssttssrrrrquutttttttuuuttsttttttttttttsstttttuuvvwwwwxxwwxxwxxxxxwvvwvussuwwwwvvvuuvvvvvwwwwwvvuuuuqypaM=>?HFA9338<B=DEJIGQs{w}yzw{zyyyyzz{{{z{{||{yyxxyyz{{{{{{{{{{{{{{{{{{{{{{{{{z{|||{xvw|}{|{{������������������������~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ssssstttttuuvvwwwwwvutsrssssssssrrsssssstsssstttuuuuuuvvsssrrrrssrrrrrssrsstuuttrrssttttuuvvuuuutuvvvvvvwwwwvvvvwvvvuuvvvvvvvvvvvvvvvvvvwwwwwwwwuuuttuuusttuuttsuuutsstuuttsrrrsttssstuuuvvvutttuuuuuuuuuuuuuuuuuuvvwwwxwxyxxxxyxyyyyxwwxwutuuvvwwvvvvvvvvwwwwxxwwwvvvvvyjPA?@CD53//9C?4AHHFGJCVs�vz{~yxzzzzz{{{|{{{|||{zyyyyyz{{{{{{{{{{{{{{{{{{{{{{{{{z{{||{yww}~}~|{~���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ttttttttuuuvvvwwwwwwvusssssssssssssssttttttttuuuuuuvvvvvtsssrsssrrrrrrssrsttuuutrrrtuttuuuuuuuuutuuvuuvvwwwwvvvvvvvvvvvvuuuuuuvvvvvvvvvvvvvvvvvvvvvuuvvvtuuuuuutuvvutttuuutssstuttssstuvwwwwvuuvvvvvvvvvvvvvvvvvvvvwwxxxxyyyyxyzyyyyyxwvwvuvwwwvwwwwwvvvwwxxxxxxwwwwwwww{U;7@C73/58:CI?.8;DBDGIgyzzy{{zzzzz{{{{||{{||{zzzyyyzzz{{{{{{{{||||||||{{{{{{{{{{{|||zyw|~~|{����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vvuuuuttvvuuuvvwwwwwvutsttttttttttttuuuuvvvuvvvwvvvvvvvvttsssssssssstttsttuuuttssrsuvuuvwvvvvvvvtuvvuuvvwwwwvvvvvvvuvvvvuuuuuuvvvvvvvvvvvvvvvvvvwwwvvwwwvvvvvvvvuvvvuttuvvutssuwuttstuvwxxxwvvvwwwwwwwwwwwwwwwwvwwwxxxxxxyyyyyyzyyyyxwwvtuwxyxxxxxxxxxxxwxxyyyyyxxxxxxwwwOB78=+'1:ABCFB;88AEOS��x||xw{{zzz{{|||}|{{{{zyzzzzzzzy{{{{{{{{||||||||{{{{{{{{{{{{||{{t{}~}|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������wvvvuuutvuuttuuvwwwwvutsuuuuuuuuuuuuvvvvwwwwwwwxwvvvvvuuttsssssssstuuuttuuuuutsrtstvwuuwxxwvvvuuuuvvvvvwwwwwvvvvvvuuuvvvvvvvvvvvwwwwwwwwwwwwwwwwxwwwwwwxxwwwwwwxuvwvvuttvvvtstvxvuuttuwxyyyxwvwwxxxxxxxxxxxxxxwwxxxxxxxxxyyyyyyzyyyyxwvvquxzyyyzyyyyyyyywxyyzyyyyyyyxwwvzTK0*?53BCDDEEA=4:4@T\�͘z{yzz}wzz{{||||}}|{{zyyzzz{{zyy{{{{{{{{}}}}}}}}{{{{{{{{{{{{||||ry|~~}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xxwwvuutuuuuuvwwyyxwvuutvvvvvvvvvuuuuuuvuwxwvvwxwwwwwwwwvvutttuuvvvvvvvvxvtstuutvvvvvvvvxxxxxwvuvvwwxxxxxwwwwvvvvvvvvvvvwvvvwwwwwxxxxxxxwwwwwwwwwwvvvwwxwwwvvwwwuuvwwvvvwvttvvvvvwxwwwwxxxxwwwwxyyyxxxxxyxxxxwwwxxxxxxxxyyyyyyyyzyyz{yvsyxxxxyyzyyyyyyyyyyyyyyyyyyyxxwwvzcC,;KAE<CHEE@9?A:=JFS���{zzy|yzz{{{{{{||{{{zzz{z{|||{zzy{}|{{|~}}}}}||{||||||||{|}}}|||xq}{�~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yxxwwvvvvvvvvwwxxwwwvvvvwwwwwwwwuuuuuvvwvwwwwwwxwwwwwwvvvvuttuvvvvvvwwwwxwutttuuvvvvvvwwxxxxxwvvvvwwxxxxwwwwwwwwvvvvvvvvwwvvwxxwwwxxxxxxwwwwwwwwwwwwvwwwvvvuuvvvvvwwwvvuvutuvwwvvwxxwwwxxxxwvvwxxxxxxxxyyyxxxxwwyyyyyyyyyyyyyyyyyzzzywvuzyyxxyyyyyyyyyyyyyyyyyyyyyyxxxwwtnSACDBDJJF@?@?C@E@FL6K��{yzx{{{z{{{{{{|||{{{{{{zz{||{{zy{}}|{|~}}}}}|||{{||||}}}~~~}|||zs}~|}|��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyxxxxxxwwwwxxwwwwwwwwxxxxxxxxuuuvvwxxxwwwxxxwwxxxwwvuvvvvvvwxwwwwxxxxxxvtssuvvvwwwwwwxxxxxwwvwwxxxxxxwwwwwxxxvvvvvvvvxwwwwxxwwwwwxxxxwwwwwwwwwwwwwwvvvvvvvvvvwwwwwvuuttuvwwwvvwxxxwwwxxxwwvwxxxxyyyyyyyyyxxxxyyyyyyyyzzzzzzzzyz{ywuvx{{zzyyyyzzzzzzzzyyyyyyyyyyyyxxxxyxZHC>IMK@415>B=D=KKSZZ�ωy|y{|{{{||{{||||{{{{{|zz{{{{{{z{}}}|}~~~~~~}}|{{||}}~~~~~}}~~~w|~}�}{������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyyyyyyyxxxxxxwwxxxyyyyyyyyyyyvvvvwwxxyxwwyyyxxxxxxwvuwwwwwxyyxxxxyyyyxxwussuwwwwwwxxxyyyyxxwwxxxxyxxxwwwxxxyywwwwwwwwyxwwxxxxwwwwxxxxxxxxxxxxwwxxwwvvxxxyyxxxxxxxwvuutuvwxwwvvwxxxwwwxyyxxwxxyyyyyyyyzzyyyyxxyyyyyyyyzzzzzzzzyyyxvvxy{{{{zzzz{{{{{{{{zzzzzzzzyyyyyyyy{{hZO?4,%-=JRKVG]dnaR�߬~�|}|z{||||{|}||||||||zz{{{|||{|}~~}}~~~~}}||}}~~~~}}~~}~�z{}��}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyyyyyyyyyxxxwyyyyyyzzyyyyyyyyxxwwwxxxyxxxzzyxxxyxxwvvwwwxxyzzyyyyyzzzxxxvttvxxxxxxxxxzyyyyxxxyyyyyyxxwwxxyyyzxxxxxxxxyyxxxyyxxxxxxxxxxxxxxxxxwxxxxwwvyyzzzzyyyyyyxwuuuvwxxwvvwxxyxxxxyzzzyyyyzzzzyyyyzzzzyyyyyyyyyyyyzzzzzzzzzxwwxyzz{{{{{{{{{{{{{{{{zzzzzzzzzzzzzzzzxy���mfO8)6CLZYfl�xxbQ}�،�||||}}}|||}|||||}}}{{{{{|}}|}~~~~~~~~~~~~~~~}}~}}~�~|x|���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyyyyyyyyyyyxxzzzzzzzzyyyyyyyyyyxxxxyyzyyyzzzyyyxxwwwwwxxyyyzzzzzzzzzzxxwwvvwxxxyyyyyyzzyyyyyyzzzzzyyxxxxyyyzzyyyyyyyyzyxxyyyyyyxxxxxxxxxxxxxxxxxyyxxxxyyyyyyxzzzyxwvuvwyyyxwwxyyyxxyzz{{{zzzz{zzzzyyy{{zzzzyyyyyyyyyyzzzzzzzzzxwwy{{zz{{|||||{{{{{{{{{{{{{{{{zzzzzzzzwrzwy�zP(6KOZ[hz|wsTFa����{|�}}~}}|}}|||}}}~~}||{{|}~}~~~~�����~~~~|}vz����~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zzzyyyyyyzzzzzyyyyyyyyzzyyyyyyyyyyyyyyzzyzzzzzz{yyxwwwxxxxyyyzzyzzzzzzzzxwwwxxxxyyyyyyzz{zzyyyzzzzzzzyyyyyyyyzzzzzzzzzzzzzyyyyyyyyyyyyyyxxxxxxxxyyyyyyzzyxxxxxxyzzzzyxwvvxzzyyyyzzyyxyz{z{{{{zzzzzzzzzzz{{{{zzzzzzzzzzzzyyyyyyyyzzyzzzzz{{||||{{{{{{{{{{{{{{{{{{{{{{zzzyzx|{yz{|�S':PJZk�˜mogHc��ŉ|y�}~~~}}}}}}}}}~~}||}~~~~~����������������~~~}�vz���~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{zzzyyyzz{{{{{zxxxxyyyyyyyyyyyyxxyyyz{{yz{{zyz|yyxwwxyyxxyyyyyyzzzzzzzzwvvwyyywyyyyzzzz{zzyyyzz{{{zzyyyyyyyzzzzyyyyyyyy{zyyyzyyzzzyyyyyxxxxxxxxyyyyyz{{yyxxxxyyzzzzyxwvvxzzzyz{{zzyxy{|yz{{zzzzzzzzz{{{{{{{zzzzzzzzzzzzyyyyyyyyy{||zyyz{||||{zz{{{{{{{{{{{{{{{{||{{zzyyyzu{yv~x}T*8IQ���syU]��ݎ��v~~~~~}}}~}}}}~~�}||}~�~}~������������������������y|���}�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zz{{{{||{{{{{zzz{{{{zzzzyyyyxxxxvyzzz{{yz|}{z{zy|yvvwyyyyxz{{yy{{||{zz{{wwxzzxxy{yzzxy|{yyyyzzzz{{{zzzzzzyyxxyyzyyzz{{zzzzzzzzzzyyyyyyyy{zzzzyyyyyyyzzzzzyyyyyz{zyyxxyzzyyyyyyyyzyyyyyz{{{{{{{{{{{{{{{{{zzzzzzzzzzzzzzzz|{yy{~zuxyz{zyyy||{{{{{z{{{{zzzz{{{{{{{{{{{{{{{{zz{||{{}}T+<Er����wuj^{�私�z}~}~~{|~~~~~�~~~~~}~�����������������������������x{���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zzz{{{{|{{{{zzzz{{{{{{{{zzzyyyyy{{zyz}}{xyyxy||{|ywvxz{{||}~|zyy|{zzzzyxuvxzywvwzwxzyzzwzz{{zzzzzzyyyyyyzzzyyzzzzz{{||{{zzzzzzzzzzzzzzzzzzyyyyxxyyyyzzzzyyyyyz{{zzyxxyyzyyyyyyyyzyyyyzzzzz{{{{{{||||||||zzzzzzzz{{{{{{{{{yz}}{yyz{{|{zz{{{{{{zzz{{{{{{{{{{{{{{{{{{{{{{{{{z{||z{}�I.?Ux����xozv\c�ﴇ�~�~}~}||~~~~~~���~~��~~~�����������������������������zz���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zzz{{{{{|{{{{zzz{{{{|||||{{zzzzz{|�������}yz||z{yxxz|}~~}~}{z{|zyyzzxxxxy{zyz{}yz{z||xyz{{yxyyyyyyyyzzzz{{{{zz{{|}}}||{{{{{{{{zzzzzzzzzzzzyyyyzzzzzyyyyyyzz{{|{zyxxyyzzzzzzzzzzzyyyzzzzzz{{{{z||||||||{{{{{{{{||||||||{y{�~xx~{|}}||||zzzzz{{{|||||||||||||||||||||||||{{|{z{}�P(>RY����un{~YQ��Ȏ���~}~~~|}~~~~~����~~��������������������������������������~y���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zzz{{{{{|||{{{zz{{{{||||||{{{{||x�����������~yyzz|~�}|}~}{{}}||||{z{{zyzzz|�}||y{}|y{|{yxyzzzzz{{{{{{|}}|{{{|}}~}}}{{{{{{{{{{{{{{{{||{{{{zz{{zzzzyyyzzz{||||{zyyyzzzzzzzzzzzzzzzz{{yzzz{{{{}}}}}}}}||||||||}}}}}}}}|{|~}xy~|}~}}}}}zz{{{{|||}}}}}~~||||||||}}}}}}}}}|{|{z{}�P'=LR{���}wx]Mz�ڕ��~~~~~~}}~~���������������������������������������������y��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{{{{||||||||{{{{{{{{||||||||}~{��������������~z|������~}}|{{}|}~�~||~~}~~}}�~��|{|z|}}|{z{|zz{{{{||{|}~~}|{{|}}~}}}||||||||{{{{{{{{||||{{{{{{{{zzzzzz{{||}}|{{zzzzzzzzzzzzz{{{{{{{{zzz{{{{{}}}}}}}}}}}}}}}}~~~~~~~~}}|zz{{{}}~}}}}~||||}}}}}}~~~~~}}}}}}}}}}}}}}}}~}||{z{}�C2;H`g��}w�eOT��~��~�������������������������������������������������|~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{{||||}||}}}}}}||||||||||||~�������������������~������������||}||��������������������}~}|{zz{{{{{{{{{{|}~~}|{|}}}}}}||||||||||||||||||{{{{zz||||{{{{{{||}}}|{||||||{{{{{{{{{{|||}|||{{{||}}}}}}}}}}}}}}}}}}}~~~~~~~~}}{xz}}z}~~}}}~~}~~~~~~~~~~~~}}}}}}}}~~~~~~~~~}}}|{|~�D/;CTX����|zv|hWE�󾃃������������������������������������������������������������~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||||}}}}|}}}~~~~~~}}}}||||~��������������������������������������������������������������|z{{{z|||||{{{}}~~}}||}~~~}}}}}}}}}}}}}}}}}}||||{{{{|||||}}}||}}}}||{|}~~~}|}}}}}}}}|}}~~~}}||}}~~~~~~~~~~~~~~~~~~~~~~~~~~~~}{z{}~}}~~~~}}~~~~~~~~~~~~~~~~~~~}~~|}yJ(<DG`m���{{vwndG��ᘂ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������||}}}}~~|}}~~�~~~}}||}���������������������������������������������������������������}~~}~~}}}}}|}~~~~~~}|}}~~~~~}}}}}}}}}}}}}}}}}}}}||||||}}}}~~}}}~}}||z|~�}|~~~~~~~~}}~~~~~}|}}~~~~~~~~~~~~~~~~~~~~~~~~~~|xz�}|�~~~�~~~~~~~~~~~~~~~~~}~}}�G,5@FP\����ywymLw�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}}~~�����������������������������������������������������������������������������������~~}}}~~~~}}}~~~~~~~~~~~~~~~~~~~~}}}}}}}}{|~~~~}}~{z{z|~~~~��~~~����~~~~~~~~~~~~~~~~~~~}~�~||~~~~~~~~~~~~~~����~}}��I-1AQPV��{t~}uLc��ҕ����~������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~���������������������������������������������������������������������������������������~~���~~����~~~~~~~~~~~~}}}}}}}}}~�~~}~~|{{|}~��~~~�������~~~~}~���������������~}~��I*3?TPV��wtv�x]T��髆����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~}}}}}}}}~�}||}~������������������������������������~}~~�����������������������������������������~~���H(5>WQWz�ojnv^thK������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~~~~~~~~~������~}}������������������������������������������~||~����������������������������������������������������~���D*6AVTWhکrpv|]orLv��Ԡ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~�������~���������������������������������������������������~|}����������������������������������������������������>-6HRXY_ũ~~��k��V\��立�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������������������������������������~~����������������������������������������������������������9/:NPZ]c���}}g��jR���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~��������������������������������������������������������������������������������������������}}�������������������������������������������������������������������������������������������������������������������6/AOPYab��}tzwhy�yV���֙����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|~�������������������������������������������������������������������������������������������|}�����������������������������������������������������������������������������������������������������������������~6-GOQXde��{q{wm��zYw�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|}���������������������������������������������������~��������������������������������������������������������������z16GUU]bf|�|�t|~��hY�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~��������������������������������������������������������������������������������������������������������������������}7:FTZdf`v�{xy�����tM���ژ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|9;CQ\gfew��s�������M���򩅍��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z8;DS\gffp~{mr}���b`��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������z8?LY^himn�~z��r���{P���ٙ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������x9CP[^hjkhz|�m����T���먄��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������u;FOX^iipm�}~}�s~����i��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������t>ILT_ljjj|zww{v�����|x���И��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������o?CQVfhjnh��{w~w������h���栊�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pAHNYemnmi��{{�{������k���쯎�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qALNZ^hmnj��wz�|������u����Ŕ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pAMR\bggok�|rx�x�����������ڜ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mFNUd��jmk�v}�tz������q���驍������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jJQUo���ok�����s}������s���󼒌�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jEUWt�ߩyp�����q������������ћ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������l=WZr��ƅv��{~�n~�������u���࢐�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������iJRYq���v��v}~y�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������kNTWp���|��~��z�������������ɒ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hRWWp���Ӆ��~��{��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cSYXs�������y����������������騐����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������cSZYt����zeru[u�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������fSZYq����{w��lr�������������˕����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������eQ[Zm���󷋄���s|�������������ۣ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������aM[[k���������s��������������崓���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������dLZ]r����ɜ�v��q���������������Ę���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������eN\\p����נ����x���������������ʙ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������dO][o����楒�������������������٠���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������aN[[q�����������������������譓��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`NZZu����𯑀������������������𼖒�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`OZZw�����}��|����������������͜��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������_QZYv����򹔅��x����������������ߣ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������]PZXu����򻖏��y��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������`MYZ{����������}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������_OY[|����������z�����������������Л�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������^PZ\}����������{�����������������墝����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������\RZ]~�����������������������������������������������������������������������������������������������������������Ƿ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ZTY\�����ś�������������������������������������������������������������������������������������������������������ԧ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XVX\�����ƚ���|������������������͜�������������������������������������������������������������������������������೤������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������WXY]�����ƚ���s������������������ݨ��������������������������������������������������������������������������������ȫ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������WYY]������Ǜ���o������������������汖�������������������������������������������������������������������������������ׯ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������UXY]������Ƙ���u�����������������������������������������������������������������������������������������������������ɦ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ȗ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������TWY\������Ř���u�������������������Ĝ��������������������������������������������������������������������������������۰����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ș�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������RVX[�����������w�������������������ќ��������������������������������������������������������������������������������뿪���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ș�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������PVX[������ʗ���~�������������������ݢ���������������������������������������������������������������������������������Э���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ț�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������NUY[������Ù�����������������������鬜��������������������������������������������������������������������������������Ấ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ț�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������LUZ\������Ɲ����������������������󺙜��������������������������������������������������������������������������������Щ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ț�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������LU[]������ʃ\v}ax�������������������ʚ���������������������������������������������������������������������������������㶬�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ț�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������KV\^�������no��ai�������������������נ����������������������������������������������������������������������������������²�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ȟ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������NXZa�������y���mt�������������������⠥���������������������������������������������������������������������������������ଦ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˞�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������OWY`������ě���~~�������������������魢���������������������������������������������������������������������������������⽮������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˞�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������PWXa������ɟ���t���������������������������������������������������������������������������������������������������������ϰ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˞������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������PWYb������ș���t���������������������џ����������������������������������������������������������������������������������ٱ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˞������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|OXZc������ʞ�������������������������ߡ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������˟������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������zPX[d������Λ�������������������������騞����������������������������������������������������������������������������������צ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ˠ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yRXYc������Қ�������������������������ﱟ����������������������������������������������������������������������������������߷�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ˡ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yTXXb������ϟ�����������������������񺢠����������������������������������������������������������������������������������ϰ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ˡ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uVUVe������Р�������������������������å�����������������������������������������������������������������������������������齦���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������΢������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������vMUYd������͞��������������������������ˤ�����������������������������������������������������������������������������������ɬ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Σ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������uRZZd������ӟ��������������������������դ����������������������������������������������������������������������������������d�ҳ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Σ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������tSQN`������і���x����������������������ԥ����������������������������������������������������������������������������������gsʼ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������΢������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qRPI]������՞��������������������������������������������������������������������������������������������������������������sW�ǭ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������΢������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������oQYS`������Ϣ��������������������������������������������������������������������������������������������������������������Q�ʬ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ρ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������pKWTb������С���������������������������������������������������������������������������������������������������������������Sf����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������΢������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������qSYR_������ʐ���{�����������������������������������������������������������������������������������������������������������RW�ɰ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Σ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������nSU[d������țwwn������������������������������������������������������������������������������������������������������������V�߱�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѣ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mQU\f������͞���~������������������������������������������������������������������������������������������������������������m^�ï������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѣ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mMRZc������Ҧ�����������������������������������������������������������������������������������������������������������������T�ʲ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѥ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������mJRYa������ӣ���n�������������������������������������������������������������������������������������������������������������vvֺ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ѯ�������ż��������������������������������������������������������������������������������������������������������ľ�����������������������������������������������������������������������������������������������������������������������������������������������������������mNX\b������қ���Xv�������������������������������������������������������������������������������������������������������������Q�ĳ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѽ��������ľ���������������������������������������������������������������������������������������Ŀ����������������ż���������������������������������������������������������������������������������������������������������������������������������������������������������lO[_d������ѡ���[p�������������������������������������������������������������������������������������������������������������h�ɹ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĺ�������������������������������������������������������������������������������������������������������ƺ��������������������������������������������������������������������������������������������������������������������������������������������������������jM\_c������ө���_n�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ĺ�������������������������������������������������������������������������������������������������������Ź�������������������������������������������������������������������������������������������������������������������������������������������������������iL\_b������֤���Wm�������������������������������������������������������������������������������������������������������������oY�ǽ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������dPX\i������ܑx��N}����������������������������������������������������������������������������������������������������������pQO*N��׼��ñ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ѿ���������������������������û������������������������������������������������������������������������������������������������������������������������������������������������������bR[^k�������rWcf?{��������������������������������������������������������������������������������������������������������jMHP>5UO����Ÿ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ�������������������������������������������������������������������������¾õ����������������������������Ƚ��������������������������������������������������������������������������������������������������������������������������������������������������_S\`l�������cAx�T��������������������������������������������������������������������������������������������������������p=HB0C6@ECq��������ſ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ÿ�������������������������������������������������������������������������ұ��������������������������������ʼ������������������������������������������������������������������������������������������������������������������������������������������������]P[^j�������fN�呧��������������������������������������������������������������������������������������������������������C8LJK7MFM}j_���������¾���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������þ������������������������������������������������������������������������������������ݼ���������������������������������̼�����������������������������������������������������������������������������������������������������������������������������������������������\NY]i�������cA������������������������������������������������������������������������������������������������������������oTNN�pS����������������ƽ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ּ�������������������������������������������������������������������������������������������޸���������������������������������ķ�������������������������������������������������������������������������������������������������������������������Ÿ�������������������������ZO[^i�������fA��s������������������������������������������������������������������������������������������������������������}�떞��������������������ý��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ַ��������������������������������������������������������������������������������������������ֲ��������������������������������½����������������������������������������������������������������������������������������������������������������������Ǵ����������������������XR]`i�������pfEkb��������������������������������������������������������������������������������������������������������������Ɵ�����·�����������������ÿ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ַ���������������������������������������������������������������������������������������������ɱ������������������������������ƿ½�����������������������������������������������������������������������������������������������������������������������ž��������������������WT_`i������ו�puv��������������������������������������������������������������������������������������������������������������s��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������֯���������������������������������������������������������������������������������������������彬�����������������������������������������������������������������������������������������������������������������Ǽ��������������������������������������������Ƽ�������������PW\ch������ђ���e{������������������������������������������������������������������������������������������������������������n��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٯ����������������������������������������������������������������������������������������������ݷ������������������������������������������������������������������������������������������������������������������ɼ��������������������������������������������ķ������������QX\bi���а��[a��PE��k|��������������������������������������������������������������������������������������������������������~hz�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�����������������������������������������������������������������������������������������������ѭ������������������������������������������������������������������������������������������������������������������ķ�������������������������������������������ɼ������������PZ]af�������u���"!vpV��������������������������������������������������������������������������������������������������������f���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������½�������������������������ɾ������������QV\em������ۈ���DF��z�������������������������������������������������������������������������������������������������������ؓa��ľ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������٭������������������������������������������������������������������������������������������������߫���������������������������������������������������������������������������������������������������������������ý�������������������������������¿������������ƽ������������V\^`f�����οy}yj@>��n�������������������������������������������������������������������������������������������������������Ɋ_���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�������������������������������������������������������������������������������������������������Ӱ�������������������������������������������������������������������������������������������������������������ſ������������������������������������������������������������WU`hj���ěp�hns=#*KZX�����������������������������������¿����������������������������������������������������������������ؽ�^���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�����������������������������������������������������������������������������������������������»߾���������������������������������������������������������������������������������������������������������������������������������������������������������������������������VY`fi������rdd%%/3z�������������������������������������û���������������������������������¾���������������������������Ǹ�`�����½¼������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٮ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ź�����������������������������������������������������������Va[`p������ހ�z*!$&?�����������������������������������������������������������ͼ����������������������������ǻ����������Ծ��b�����»��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ٯ���������������������������������������������������������������������������������������������������Ѷ������������������������������������������������������������������������������������������������������������þ���������¼������������������������������������������������V]`am�����ôhgN102199������������������������������������������������������������������������������������������������������Ľ�b���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܯ��������������������������������������������������������������������������������������������������tϼ������������������������������������������������������������������������������������������������������������¿�����������������������������������������������������������W\acp���ߠ�vDFE7/0>9:�����������������������������������������ɿ�����������������������������������������������ſ�������������b������¿�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܮ��������������������������������������������������������������������������������������������������g�޴��������������������������������������������������������Ŀ����þ������������������������������������������������������¿������������������������������������������������WZ`bs������ԓ���L����������������������������������������������������������������������������������������������ſ�����������Ȑf���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܮ���������������������������������������������������������������������������������������������������^�ҹ�������������������������������������������������������������ľ��������������������������������������������������������������������������������������������������������WZ_`s������֞���N����������������������������������������������������������������������������������������������žǿ���������̒i���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܯ���������������������������������������������������������������������������������������������������ns�ͳ��������������������������������������������¿������������������������������������������������������������������������������������������������������������������������X[__t������֜���f����������������������������������������������������������������������������������������������ſ�����������ʑj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܯ����������������������������������������������������������������������������������������������������r�׻����������������������������������������������������������������������ż������������������þ��������������������������������������������������������������������������YZ__y������❟��bu����������������������������������������������������������������������������������������������������ʽ����ɑk���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܮ�����������������������������������������������������������������������������������������������������x�Ƚ����������������������������������������������������������������������ľ�������������������þ������������������������������������������������������������������������YY^^|������漴�������������������������������������������������������������������������������������������������������ѿ�����ɑi���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܮ��������������������������������������������������������������������������������������������������������Ķ�����������������������������������������þ�����������������������������������������������ǿ�����������������������������������������������������������������������}W\^\z������������������������������������������ÿ���������������������������������������������������������������������������ȏe���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ܯ���������������������������������������������������������������������������������������������������������μ��������������������������Ȼ�����������¿������������������������������������������������¾�����������������������������������������������������������������������yX[\_y����������������������������������������������������������Ľ��������������������������������������������������ٿ�ƽ����Ñi���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߯������������������������������������������������������������������������������������������������������Me����������������������������������������������������������������������ÿ�������������������û������������������������������������������������������������������������xW\_`x�������ĸ�����������������������������������������������������������������������������������������������������ʼ�������Ēi���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߰�����������������������������������������������������������������������������������������������������VLEU�η��������������������������������������������������������������������������������������ɸ�������������������������������������������������������������������������uV]aau������徰�����ÿ�������������������������������������������������������������������������������������������������������ēj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱�����������������������������������������������������������������������������������������������������DGYOe���������������������������������������������������������������������������������������⼶�������������������������������������������������������������������������sV]cbs�����������z������������������������������������������������������������������������������������������������ѿ�Ľ������Ĕj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱�����������������������������������������������������������������������������������������������������{f�b����������������������������������������������������������������������������������������β��������������������������������������������������������������������������rW]caq������盇��k��½�������������������������������������������½����������������������������������������������������������ŕj���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⼰�����������ÿ�������������������������������������������������������������qX\b`q������趨�����ý�����������������������������������ſ������������������������������������������������������������������ǖi���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱������������������������������������������������������������������������������������������������������ۺ����������������������������������������������������������������������������������������Ѷ���������������������������������������������������������������������������pX\``s������Ḫ�������������������������������������������������¿�����������������������������������������������������������ʗh}��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱��������������������������������������������������������������������������������������������������������¼�������������������������������������������������������������������������������������伵���������������������������������������������������������������������������oX\`_t������빩����������������������������������������������¾��¼����������������������������������������������������������̘g{��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߰��������������������������������������������������������������������������������������������������������򸵹�����������������������������������������������������������������������������������ĺ���������������¿�����������������������������������������������������������k[^da|������峓���������������������������������������������������ļ��»�����������������������������������������������������Ԛnp��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⰰ�������������������������������������������������������������������������������������������������������𷵹����������������������������������������������������������������������������������߿�����������������������������������������������������������������������������jZ^db~������賤���������������������������������������������������Ǿ��¼�����������������������������������������������������ԛno��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⱱ����������������������������������������������������������������������������������������������������ޫ������������������������������������������������������������������������������������ȹ�����������������������������������������������������������������������������hY]dc�������谪��������������������������������������������������������������������������������������������������������������Қmn��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⲳ�������������������������������������������������������������������������������������������������������칸����������������������������������������������������������������������������������ڸ������������������������������������������������������������������������������gY]dc�������괩��������������������������������������������������������������������������������������������������������������Ιkl��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⳳ���������������������������������������������������������������������������������������������������ꕈ�껺����������������������������������������������������������������������������������ȴ������������������������������������������������������������������������������fZ^cb�������汘�����������������������������������������¿�������������������������������������������������������������������̚kl��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⴴ���������������������������������������������������������������������������������������������������Ȁ��绽���������������������������������������������������������������������������������ܿ�������������������������������������������������������������������������������eZ^cb�������紓�����������������������������������������������ƾ�������������������������������������������������������������˜ll��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⴴ���������������������������������������������������������������������������������������������������x��彿���������������������������������������������������������������������������������Ȼ�������������������������������������������������������������������������������dY^bb���������������������������������������������������������ù�����ž������������������������������������������������Ľ����ɝkk��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⴴ����������������������������������������������������������������������������������������������������v������������������������������������������������������������������������������������ḹ�������������������������������������������������������������������������������cX]bb�������橡�����������������������������������������������·�����»������������������������������������������������������Ȝji��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⳳ����������������������������������������������������������������������������������������������������d��߾����������������������������������������������������������������������ǿ����ǻ��ڽ���������������������������������������������������������������������������������[]__b�������롂��������������������������������������������������������������������������������������������������������������ʡmi��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Ⲳ�������������������������������������������������������������������������������������������������ɶ�d��ݾ������������������¾��������������������������������¿������������������������ǯܻ���������������������������������������������������������������������������������bU^^b��������|��������������������������������������������������������������������������������������������������������������ɠlh��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⱱ������������������������������������������������������������������������������������������������ں��`��۽��������������������������������Ƽ�������¿�����������������������������}ly�~oR`�ô��������������������������������������������������������������������������������iY]^a�������﵎��������������������������������������������������������������������������������������������������������������ɠlg��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⰰ����������������������������������������������������������������������������������������������������X��׽������������ǿ����������������������ƶ��¿���������������������������ž������aZy��Ľ�������������������������������������������������������������������������������\\^^b�������䭓��������������������������������������������������������������������������������������������������������������ȡlf��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⯯�����������������������������������������������������������������������������������������������˷�®T��ҽ��������������ź�������¾��������������¼��������������������������������������ƙ����������������������������������������������������������������������������������T\_`c�������첡�������������������������������������������������������������ż�����������������������������������������������ơh_��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⯯����������������������������������������������������������������������������������������������庻���W��ͽ������������������������¿���º��˵�����������������������������������������Ĕ�ܹ����������������������������������������������������������������������������������[Z_`c�������뫙�������������������������������������������������������������Ǽ������������������������������������������¿���ácY��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⮯����������������������������������������������������������������������������������������������ʴ����[��ʾ��������������������������������¼����������������������������������������������ǜ�׾������������������������������������������������������������������������������ZV^_b�������觏�������������������������������������������������������������ƻ������������������������������������������¾���£e[��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������⮮���������������������������������������������������������������������������������������������۶�����\��Ⱦ���������������������ƽ�������ѽ����������������������������������������������{�����ü�����������������������������������������������������������������������������WY\]a�������貚�������������������������������������������������������������Ź������������������������������������������½���§jc��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ⱱ���������������������������������������������������������������������������������������������²�����d��ǽ������������������������»��ۻ������������������������������������������������u�����ҵ�����������������������������������������������������������������������������VY[^`�������嵤�������������������������������������������������������������Ƹ������������������������������������������¾���ãqe������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߰���������������������������������������������������������������������������������������������չ������i��ǽ���������������������������ͽ�������������������������������������������������m������Ÿ����������������������������������������������������������������������������TX[_b�����������������������������������������������������������������������Ŵ��������������������������;��������������ÿ���ĥre����������������������������������ӭZPm�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱����������������������������������������������������������������������������������������������������zk��ƽ�����������������������������������������������������������������������������������۷����������������������������������������������������������������������������RW[ae��������������������������������������������������������������������ų��������������������������վ�������������������ħrd����������������������������������͵�gLb����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߲�������������������������������������������������������������������������������������������޴�������lm��Ž����������������������Ĩdqɯ�������������������������������������������������¾������ƺ���������������������������������������������������������������������������RX[af�������쮝��������������������������������������������������������������������������������������������������������������ĩrd������������������������������������̤a�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߲�������������������������������������������������������������������������������������������ϴ�������bu��ļ��������������������������Ӻ�����������������������������������������������˷|�̽�����ٻ���������������������������������������������������������Ķ����������������SZ[af������������������������������������������������������������������������������������������������������������������������©rd������������������������������������˺�ε��ξ�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߱����������������������������������������������������������������������������������������������������[~��¼��������������������������꽶���������������������������������������������з�~�۾����������������������������������������������������������������Ƶ����������������T\[ae�������������������������������������������������������������������������������������������������Ŀ�����������������������qe������������������������������������зѽ����ͯ¿�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߲����������������������������������������������������������������������������������������������������U��������������������������������ƾ������������������������������������������������~�߽������к��������������������������������������������������������ȳ����������������T[]ad�������밟������������������������������������������������������������������������������������ÿ��������������������������rf������������������������������������λy������̩���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������߳����������������¸������������������¾��������������������������������������������������������������R��������������������������������غ�������������������������������������������ླྀĳ}�Ҿ������޿��������������������������������������������������������ɲ����������������S[^ad�������䐀������������������������������������������������������������������������������������Ŀ��������������������������sg������������������������������������Ҽw������̪����������������������������������������������������������������������̵���������������������������������������������������������������������������������������������������������߯�Ľ�������������ɰ������������������Ƹ����������������������ų��������������������������������������S��꿽������������������������������������������������������������������������ż���z�ؽ�������ӷ�������������������������������������������������������ǵ����������������\Y^`g�������������������������������������������������������������������������������������������ƽ�����������������¿�¿����up������������������ſ����������������ӽx�������ō���������������������������������������������������������������������Ǭ���������������������������������������������������������������������������������������������������������ݼ���������������ͻ�����������������®������������������������Ų�����������ư���ҳ��������������������O��������������������������������ۻ������������������������������������������Ӵ����~�Ի�������Ḷ������������������������������������������������������¹����������������[Y]`h����������������������������������������������������������������������}��ۿ��������������������ï����������������¿�������un�������������������������������������ڄ�������͛���������������������������������������������������������������������ra^c�������������������������������������������������������������������������������������������������������ݾŵ�������������г�����������ú������������������������������ź�����������ļ��˶���������������������L��彺���������������������������߽����������������������Ⱦ���������ø�·���׾�����}�ּ��������°������������������������������������������������������������������������YY]`j���������������������������������������������������������������������������ɿ�������������������ù�iz���������������������ul��������������������������������������|�������ү��������������������������������������������������������������������ܩ�g��Ϻ�����Լ���������������������������������������������������������������������������������������������ݱƯ������������ȿ�������������ɬ������������������������������Ⱦ�������ȳ���ʿ�����������������������K��㽺���������ǽ���������ƺó���ཱ���������������������ǹ����������κ�����´�����}�ع��������̱�����������������������������������������������������������������������}XZ]`k�������𨚣�������������������������������������������������������ɾ���������ļ������������������ů�����������������������tj��������������������ʼ����������������������̾�������������������������������������������������������������ʽ������ݫ���ض�����ҵ���������������������������������������������������������������������������������������������ݧı�������������̾��������ĳ��ȸ�Ǻ����������������������������ž����̽ȼ���Ĵ����������¾�����������O��ຸ���������ʮ�ж��������ȱ���⼷��������������������ȹ���������������������������պ��������Ʒ���������������������ƺ������������������������������������������������}X\^`j�������񧔬������������������������������������������������������ƻ������������Ļ������������������ŵ�������ƾ������������rj�������������������ķ���������������ž�ͤ������ĵ������������������������������������������������������������Ǒ������ӵ���ټ�����δ���������������������������������������������������������������������������������������������ݫ���������������įӴ������º�����ñ�����������������Ĺ�����������������»���ҵ�������¾��ľ�����ƾ���U��ี��������Ӽ������������ß���仸�����ô������������Ƕ������˿��������Xz³����������ƺ��Ư�������������������������Ŵ������������������������������������������������~X[^`j���������������������������������������������������������������ÿ��������������п�����������������У�Ž�����ڽ������������qk����ǿ��¿���������ǫ����������������Ç�˪�����Ů��������������������������������������������������������������g������ö���������ϼ���������������������������������������������������������������������������������������������ݿ�����������������ի������þ�����Ÿ�����������������ķ������������׷����Ŷ��͵��������û¿������ú���[��㸳���������´���Ž������ѝ���߷��Ľ���´�����������������������������v�˰�������������ɽ����������������������������������������������������������������������������XZ]_i�������뫞�������������������������������ø����������������������������µ�������»����������������ך�¿�������������������ol����ȿ��������������ţ���������������Ď�͙�����ȳ�������������������������������������������������������������ɱ�����ɼ���̾��þ�Ʒ����������������������������������������������������������������������������������������������ׯ�������������ǻ�Ͳ�������������±��������������������������î���׳���������÷��������϶�����������~]��޴�����������������������Ӛ��ݻ��ξ�����������������±���̻����������k��ó�������غ���ȭ���������������������������������������������������������������¼������������WY\^i�����������������������������������������ź����������������������������õ�������������������������Ǡ�Ƽ�������������������nn��������������������Ƒ�����������ÿ�̿��Ж�����ǰ�������������������������������������������������������������ʻ��������������ƽ�ʻ����������������������������������������������������������������������������������������������ˬ�������������͸���������­��������������������������������������۱�����ǲ�Ŵ�����Ʒ��ó�����������ye��ظ��������ֺ���Ȱ�����º�Ǥ��ؼ�������������������·�����θ����������b��ν�������ٽ���β����������������������������������������������������������y����������������ÃWU^_f�������힞�������������������������������Ⱥ����������������������������ɳ�������������������������Ɲ�ɻ�������������������rk�������������������ż����������¿����ň�Ԟ�����Ű�������������������������������������������������������������͸�����������Ȓ�����ǫ���������������������������������������������������������������������������������������������ylionnqptsqxxvv��r|p{uw{~}��z���|}{xwxxwvvvvvwwwwuvz~}zxxyz~���������������������������������������tg��޷���������ư�ɶ����»������۴ϵ�������������������ʼ����Ĺ�����������b��Ӻ�������׼���Ŵ�������������������������������������������������������¸���������ö��������|UX^ah�������ꪛ�������������������������������������������������������������˵�����������������������û���˼�������������������qj�����������������¾�����¿������������ӱ�����ǲ�������������������������������������������������������������ͺ�����н����̓�����Ǫ���������������������������������������������������������������������������������������������__b`ZZ[WWVQY^Y^�mTXW^YV[\T]Zo~d^^[[_^ZY]ZZZYZZ[\ZXXZ\\[[[[ZXYZYXXYZY]V`_��T�w]bcffdpgiiwlikprs|���~~^i��˘������������Ĭ������ȮƸ��ݺκ�������������������˼���˾������������y��ҹ����|��ۻ���¶������������������������������������������������������´���ɲ�����¾��������sSY^cj�������ﱙ�������������������������������������������������������������̶����������������������������ͼ�������������������pj����������������������������������������ճͽ���¯�������������������������������������������������������������ͺ�����ͻ����Г����� ���������������������������������������������������������������������������������������������XYZWVWUOJLHIORP}YSXURUQPXQUQ��XYYUUYXSRUVVUUUVWW[ZYYZ[[\^^]ZVUVWX]Z^[T_c��Q�nYXVYYV\UYX_\YXYXW\d_\Y^No���nggeghgx�}zotwvwy}}�����p�Ȓ���~����������������������������������������¥���v��ѭ����������������������������������������������������������������̮���������������mQY]ck�����������������������������������������������������������������������̵����������������������������Ͻ�������������������pi�������������������þ����ľ��¿������È�ڲͿ���Ĵ�������������������������������������������������������������ι�����̿����Ӗ����ξ����������������������������������������������������������������������������������������������XWTRRRPPNOROMVNTHLRPLSRNPQRD��LYYWWWWVVWTTSSTTUUVVUUVWWVPRTTRRUXV[^^YY[`�~Ph^ZXZXYWZVZY[WTSTSQRTLPTXKz���f\\WXYVc�b^ZYVWZa]_b_iZtfT��dcZYWzmRYVZPONNNNPQPTUUOVVR��vPR\UUUVVUXW��_��fd[aZ��bei�plnquz�������������������������������������������������������ζ���������������kQW\bj�������𢔚������������������������������������������������������������˴����������������������������н�������������������qi����ǿ�������������������������������Ō�ݳ�����Ļ�������������������������������������������������������������θ������ŕ���֜����������������������������������������������������������������������������������������������������MOLIHDFNTQZYQTOIQMMPMNNMIHI;�}CHHIHGHKLLRRRSSSSSRRQQRTSQSTUVTSTWVUZVUZST��XTVXUVRSRSQRQQWUSQNIFDKDEEC���`Z\YZ[Yd�ZWPPUUUYUSXUURUTT��bhogq��dfdjjhffffgigb`dced`��t[ah^e`ca^_X�vY��a\VXX��Z^_�iTTTSPOOPTVXZ]ckq��������������������������������������������Ϻ���������������kQU\`g��������LU=����������������������������������������������������������̵����������������������������Ѽ�������������������qg|���Ǿ�����������������¿������������Č�ܬ�Ǻ�����������������������������������������������������������������θ������Ɠ���զ�����Ţ���������������������������������������������������������������������������������������������PWVUXXZcjgnnjhebcZXZXQUTWQPH�xLHLLKKLMOONOPPQPPPRQPPSUTPRONMLJJLRNPLOOMM{xNIKIIEIIJJJIHHMQUZ_dhkbRRGD���SNNKKLMe�RNHEMMTQSQPQOVWYR��^co�������������������������������������{��������}d�쭒�������zqia]WUQOMMNNJS_�qx{������������������������������������ϸ���������������jRV^_e��������zs����������������������������������������������������������͵����������������������������ѻ�������������������pcw���Ǽ��������������������������������ݨ������ǜ������������������������������������������������������������̵���������Ю���ſǤ���������������������������������������������������������������������������������������������pwtottprrssqvtnnlknljenlb\\N{`QVRPOPOMMONOQRRQQPMLJIMPOJJFCDFGIKMKKKOJTW��X_`]hdiilnqprtx{~~~�����pV���a]^_dinz�ppxorw|t��wvnu|~k��������}������������������������������������������_�꫖��������������~zwrkfa]X�]LT[ROTY]dhtcu�������������������������ҺŬ�������������hRW_^d��������| !l����������������������������������������������������������͵����������������������������к�������������������n`r���ƻ�����������������������������������ֿ����Ş������������������������������������������������������������˱�����ȼ����˱��ƿ�ǣ��������������������������������������������������������������������������������������������؂����~{yywvxzxtqovrqlmtj`WOLLMNOOOMIHJJHKKKJGEEEDDBFT\[YZ\_bejptzxxz~|y�����}�~~�����������������Y����{�~~{���~��������������xѸ��������������������}||{yy��������������ë�������b�뭆��������������������}ywqd_dhcfaaYT^[Xeuz{����������������������ȭġ�������������cTU\]g��������~!p����������������������������������������������������������ɼ����������������������������Ժ�������������������n\j�����������������������������������������ƿ���¯��ż�������ľż�����½�����¾�������������������������������ȵ�����������Ȫ��ɕ������������������������������������������������������������������������������������������������z{{||||{xvttvwxyw{y|{wq\GIKID@CINNKGEFFDDDDDEHKN\beflrw||}~~}}�{xz��������|�}�������������������~Z��򻎋����yiwilzz~|~���������{Թ������������������������~up�wv|}�������~����������e�䧌��{������������������������������~��q}��~}�{�������������������������������������[OW]^g��������u%!k����������������������������������������������������������ʸ����������������������������ӹ�������������������t`o���¾��½������������Ŷ��������ý������ޫ�þ�����y������¿����þ����ÿ��������������������»�����������������ǰ���������俶����pv}f~��������������������������������������������������������������������������������������������||||}}}}{|~~}|{z}~ynZJLPQOIFEHHHFGJKKVX[`gnuy}��~xw|������~}}~}}��������������������������������{V��쮊������|u{�z|z}}}}}~��~{{׷����������~}{{|��������}yz}~�����}~�������|`�쬄|����������������������������������n�~}~y��}���������������������������xz~��QOXX\g��������kNXImpqqoopppnnnopnjhfhegfchhiegtiuuptqu}����������������������ί����������������������������֮�������������������nWg��詙�����������~�|������������������{�ץҲ������t�����������������������������¼���������»�����������������Ư��Ⱦ�����ђ�wlrqtque{}~�~���������������������������������������������������������������������������������������}|{zxvuvuxz{|{xuwwwvsstnopqssojfghiknruvuwy|������}ww|~��������{~����{v�~t�����������������������|[��񲎌�������~�~w}~}{zz|~�}�᷇���}zy|��}}{xwy�~|{}�����������~~y|Ș�����{b�⤈||vy������������������������~~���~d{������������������������w����}~}|{|}~wxuqtQTX^`g��������eXbZjccbabba_^adda^_`_`\_^Z]\\WXaX^_\ZZailr������������������~��������������������������q����ʍ�����~|{||}�����~sgUh��蝄zw{{{|||{z����~��}}��|{zz}|{���o�ؖŐ������g��������������������������������������������������������������®����o�ƃ~sk|tunl_qru|{�}|��������¾����������������������������������������������½��������������������������ŀ�~zyzsrqquxxvpptrlmqssvyxutuvvvwxz{|}zz{{{{zy~{}|wx~{}~~}||}~����~}~~}������������������������u[����|z{z{|}yy��}{yxyz}~�y�箇�}xzzyxzzyzz{yxx{~yww{�����~����|�xҔ~}z}~�|i�ܜvk}�}}~~~~�������������������������m|���������������������|��}����{}|zyz{}~���xORZU^k��������bV[^hbabdffdbddbaabehghde_W]c`YX`XXXYXW\`^als���������������{�����������������������|mdz}l��ysyxyvsmkjlnopqrqstmk^m��뜁~zurrrqqsuv����v��{}��~~~��~���o�ؘǉ�}{���no�|��}�|~|~�|��{�|vu���������������������������������������tmez�n�͌�|m�qkni]jlq}}ywz{{���������������������������������������������������������������������������������ooqttqruxwusttpjaamqprqrxxwuvxxxvvvxxvuvuttuxz||}��tsz{|}~~}}~�������������������������������j\��驪����}�|{|{{~}vzzywwxz||r�㬆}ytwywvwvswz|}{ywvvqoswzzz{~|{}|{~z}v�sz��w{{~ztcwm_y�}}����~|{{|}~|zyyxstxxttvszw~~}�yzv|�~���~�~�����}}���~~~~���}}{||}}|{{~{~�o75CO_o������߻kglllcceilljihecehhb]W^ch^QW_]WV\XUUYYX\acgjhqsw{~�����������h������������~~~~}yvyyleuxm��tnqruppsnigilnpqrtupi`g���{wrlxvrmjmsxrjpyt��v|�~}���������r�֗Ə������ni�����������{|�����~|~rx�~�|��~���������������������������}w��pke~�o�ҋ�pYokpkf]ekoy~vrw}}xtw�����������������������������������������������~yuuv}��������������������������popsqoptuwxusqjbUVeklqqvwusswywrsrtwwustusssuvwwwt|~rqyytvxyyz{|}���~~���~�����������������������pl��נ�����}}}}~{xoomid`_`cau��|wzruvssuvsvwxxwtpnuqptwvojnqpqtnhlnljtln�n_efhf_YMD@BUebf_]\ZWSQPPRSRNLMNNORY]]`c``_jigkjmkhchloupihppqsmnnprrrqrspqsu|rtxxy{|{ywuzvuze407N[[jx������oYQZgllnrtrqqrrswyumg_cgpk_aecaacb`acdekot{}wuwxyyz{}|�������g������������~qx�}{ywqpe]hof�Ă|snsmllhc^\^`afghlladf���}xsmhiifcdjptqupi�s{{|yw}������y�ܞɛ������zi�����������wz��������}����������������������������������{vp��qkb{�t�ԋ|rXkikkh_cmnuyyxustzvx�������������������������������������������xnsuwxx{��������������������������xutsojkorvwvwz{xmhogZXYdemttrrtuooquurppijlptwwvyrwzpototvxyyxyy~~}}�����||}{~�����������������{s���wgj_beehhikfb^Z`^]YQF=98235<ES\cbhllmnmijheca`^]ND<>IT[^befkpf^biciqrpfkjnhgedhehZST^]miklkkihhhlmkhgilnijnompqldcpokpnsuuttrx�x{x}xvwoqsrtx|{uopv}rs{}~}~~~|{yqqntr_c``qopio|����^9/Catsuwwtrstw{{yy|{qhorlmmoqsoqrtrsx}{y~�|z{|}}~����������p~��wndt��������nz��������u{�r��|�zw�zxvy{yuojgqpmopbmn���unha\dlonnqutx~{|�av�zx��������xݕ���������i��������������������������������������z��}�����|������}{yr��nj\nvm�ՋulXqqkki__lkp}{wsqsutr}yy||}������������������������������������zttvvurvurty{y�����������������������wy{ztmlppqqqswzz{~{siaYRS^jnopqrvvzzstwqjpstw|~}~~~��wlvuxxsu}���}||{zzsuqrwoowlpywtusuvy{yvvy{usspkrsuwwtnjigfjqwxwopsutqprliffilljlikoleflfc`aeikkiryzz���~|y|�y����sw}xx|}vuxpxrrqvpqwspuuru|glyrpvsyyzzxwx{zxzvv{y{wx}|}|uxyysuxtxqy}~}wtxvy|}|{{{pwx}{����~zsy~vu}x~�������x:#P_�����z��u�zzu}����~}~|dwl~{sVeo{iwy�{wx��~|vqu|�������y�i��z|j~z��~~fher������{}���{���~������������������������}�~��������������}gby���~�yyz��v}՞���������d�������������~��������|~�����������������{sz}����|zvvxrg��pgXhog}ԇuqPnonkmeTlhnx{uignmkpuuxwx{z{zw}�������sty|����������������yssutsuuuxtosoqwswu|~~zy������������������wz}|wssvoponqtuswxz{}}ytfoy}}}~}}ywx{~{siov{~~{xvy|~{uomorxxuy��{wvvusonuqprnstns}|zyurpqvxvsrmmjgeghiccdfhjkkkkklnnljolloqnjhomlmpsrqqrrswzwr|����������������������y�������}z{}~��{y�����������|}{y}�~{��qz��~xqlpnrpptrvxtuz}~}yqprpu{y���zy}}yxy}{trw}{{turyxyryzvuvwx}wx�������|M*Jn��}������}u�{{~{~��h~��~xj{��|utvsx�{{|}}~����������Rq������~����w~z|�tpx{�����{uv{�}|��zx}����������~��~v}�������}����������������yyy��}~���~}�����������o����������������������xn�����������������������������������~��rrkx~spՐytVkjgke^Wofdfj{yiirsphs~{sqt{|xtuz~~}yoilqqqs|���~~|vz���~wtqqsuuuxstnmqotvy|�~wvvw~��������������y}�||�zywvwyws���������~xrouvsnljffmmnrssrssrstpjkqruxvrtwtwuqosxxvssxtqrpuxstwutroonprrnjhfdcefeeegd`\Z\`cdfhjkkjjqruz}}{{������������������������������������������wx|~t}~~|}��������|}�����zyy|pjuvs}u{}yuvyy{wzxvvu{|}�}~�}|z}~|���|yy|~�~~}zw}~y{x����youxz�}~xx�{���xwb3@t���������w�}zwuuvz��}k|{{{}}���v~����}|x|�������������C_{{|w�{x~�~�~��������������������������������~����|��������|��������������������������������������{u~��������������������{i~��������������������������������������|u��|u����gs{~qjf^mjmlyoozygbquvututqkyywssuvtqrporusnsvruy|xzwvy}}zyqmnouxvrqtqprpssruz}{yyyuw|~����������{~|{��}zxyzvp{}�wnotwwvsnic`musnmjkr|vrststxurqpolmphgknlnrswtpnqvupwsqnnommkf`][]_^_dhieabdd_^eloprosx{zwsryz}�����������������������������������������������������ziq}~�}z�����������}~|}{�~~}{zwvd^q|y{qv{|zxy{~z}{xww|}{~{{�yyx{zw�||{yw{��t{��zux}u{|~vwuyu|~���sjrtwy��z||zhrtLLz��������~x�~��w�y|�x����������y~z~���������|��{�����Vi{x|us���������|~������������������~{uqsx{{{tz}}yy{xz�|{��xkpsrrqzv����{zv}��~ttz�}rn��twyxx{�������{���{�����|u������{~{�x{~���~|}}qek{wty����~�������������������������������sx��������}��|z��ywz~��~��~|{{zy{ytuyzvry|��rnnsz|}��}y�wsrpruruttpmllnjpyzz}zzuqturvzv��������yyvstx~�vtqprrniilooh`agdegjmprsnpljonlqqoorspoptvvspnkghdiqpmpspponptqlfdaaa``]YXUTW[]`glqsstwz}ywx{}��������������������������������������������������������������|wvy}l_munnpu~�{w~����zz}uz����~~~}~zux���������}~�~���������}wniotptv{{x��{zwtwxr�|vw|�~{|��{|y{~vtuv|}uqosrw|uw}vp��pn������|oapkg|�|}����|��������������������~}~������y|�����v}����zx{ytmimyst��~zz�~}~~|yxuz{vqnickd_`dffekorppqsyzwyxz}z~{w}o`o||��~}�{�orvvxy}~x��{�����������������u�����������|}rrt���|}��~�vs��}~�����~||����������mox���~|�����|{�ziWZiu��|z��������|\KM`}�|yy||yz~������������l9$+>]svx~��~|}}{z{���~������tju{�y{~|y�����������tqonoqvzssqqstrnlkkmnljiihhknpsumnkjjiimklnqrqrsrvtkefgggbgomhfgbgjijmkfkpproijfquw|���������������������������������������������������������������������{{����~}}{wscfjv{y�����������}uyqx}}}�z|}����}�����yuqnmost{xtphghmruwtsvwtsni`TVfhrvs{��y}�|usy�zumpotrsunswqu{uyt{x|�z{xrz��srz������|��������zvvu|x|��wxv}}����������|~������tdivurv{zv�~�{|y|}�|��|v��{{}wwxz|zvs}y{��yrrllnonmnpjikmtupuzxzvtqmxzzr~vcljhkrjluouut{�~}�����sx�������~�{x�|zu�xxtz}�z|��������|���}��������������������~��yufYvvgr�q|�}|�wrz�������|kp|���������xx{}�yzy~�����||}}||}~~rtzsopiv���nix������|����������j]kt~����|�����������liinomnrwwwvxyxuvtrqssmgjgedcbbc_^addglolnmjilquttqmlosuutx||}~z������|w������������������}~{unsx{|}~~wz~��������������������������}��������������~}��ztmkloyzyvy}xmmvy|zw}}�����}wr{�}{}��}|��|��~y|�������}wz�~yrtxrmka]fcgnrttqvz����yyhn||opxt}���|{}~xopknlopprtrqqpfboos}tskft|ofggsssmjms{o|zv{mslklrjoqfagltvyuz}��~}�|vtv|||vmdhsy}���zz|�|�uqrt~{��{}|��umd~��������y{���}�|~������vsutytglrodgmjoxmoluxvznltxuz{y{sqifY_f���~llpzw|yoia_my{}}{z~�vwx{v�|{w|������~��|���������{}��~}z{���}z{|}}�zp_jhfuaUbxs~ybmy�����������wx�������~ab{�����|�����������������}q����������x������~s}�������������������{z~~��������dadnoigkuuusstsqnpokihc]_^_bdegkxkiqw}~v{~}wsvz|ytv��������}{��{���������������������������������|y���}����������������������������~y������������{wwyyyyvsqpmkis�����v��������~zxzzxuuut{{vz~��~����|y|~yw}ywnen�wy{yx~zx|pnsv|}vqw���}pmgjw|uvvielpj`_kxw{{wxy�}}tqzzv{umvqoxmllgu~vvzzsppmmoorex{|tz��}zvotpumpnqtvz|yy|}{rrzq{|~����w`ryw|�����rem�t��{xv}uwspury|y|u�����������������}�}}�~|~w��sN\wz�zz~}ogq{yv�z}�����xzyzjmrusztxsqfomvtknrv~����������������y���������x{������~��v}|z~|w{��~yy{}}��|t���{yuin}�}xvyx}{u||zrr|��z}{|xsxxkr~x�~�rtz��~~}|��������t����������z�������~��}���������������������������������hYammlimesognqmpnorrsyzsx{�������xz��������~}{uI\���~���|���}�����������������������������������������������������������������y����~������}vvrw�~xz|xvx{|{{{�����{����������}pkryrspnvwq||{qqy{v��wwvzopqurkluyvxvnmrmionnonnosxpsluii`ira^gnzv�|u{~qQb|vyvty||}xqrmtt����r^u|{���y��~���|ssqg_Wn����rn|�}vwxqlo}u{����pmz{xxsuurw��wo[ipkiputVnfenfhnviXcptmugY\Wbfhcglgdfch{wxyth]^ekmjeirsnuy|yrkikopV]w{�~}yvvtx�xu�vmas}|qvojejoghsrmnmqwvv|���}}�������������������������������}��x~�������������������oeLOi�������z}q^o�����s���zx{w�|z}������|raPn�x}~����������������~kfdifhpld|zkb_z|}zww����������������������������c[dhhdchmsjbmz����������������������������������w_u��������������������������������������������������ww�����������~�������������~��~{����yywu\Q]p}����|yxy{�������|x�|~�~yvstqsvtswzyvwutusz�}umhkqvvtkoorknnty{|{xtsywrrnkpmqtx|v_Gqufdryr{v{}|tvptr|�|���sfr|��zrpx{xvvgfVkqpfO[r|ge[\ihchaabglihnnmeejfQQY`glsutvs|tjkluy|jfklqunfpl`dqpfdea\]cgfa^]Y`]Shc`UZ]a`jn\gxgmpqwlmohlusyuqoljmriiklkls|pjhqqoknilHLswqqlv{{}zrn`kwlefeqwrkiev|~wxwy~wow{�������������~}kgsigpqqx�{u{��������|������|}������������������������������{����no������������������������������������������������|�������~t_n�����������tnrootxz{{��|wiec��������}y�vyoos��z����������������������������~������rn����{{�z���������}}uq�~|�����������������������~���������|tz�{��zv����|u|���������|x|yxhfpv{|}��}yvrmhfingcemsrqswsw~���~�}vtuvrlunsvz~{yzvuvrlqztsrzyxqrtvz}~|{|�{�zv{yvw|�zppxvnruuqr|qljroginhajpo[gdafnpptfqml`N`_VWLQDI`ea`XYedZW_^^cjlq{mvoliv}{vtty}|z{{|ujtr{xjhjhacg`oigknolgnqqi^Z`gnhf\_f^dcjeb]ekw�y~�mz~~�vx|vvwowz���zz}����~y}�����yoly}q2*czz�zwx��xopz|umq{}~w}~~rx~�ymrusw{~}~}|{ww|�x�yuv}yuvtgnm�pqqu||{�|���}��z��~�����v����������x���|��������������|~����������������������������~�����yrz��}|����������������������l�����������������������������|�����������z���������������������������}��}xy�tmrzsmrxpu}z|~uw|t����~~�~������������}������������������������������������}xttowvu����x|{gjy|�~�������||����������{z������������������|u|urpw���wsw�yy�uwy�������}|~����~���|{vv{|z���t|�v|uT`yvwolljoi^\]]gjjaZ_gf`a`hjicbmfnfZc_cih[fgeq|xqrvutyyqjrek{rj|vvtvvpkkkgqupmepsvstvx}znfy��~������}lhu���{xwvzkdtyzrwtuqxzivzww{}xxvcRUklr|��|zy}}z|��~~�qik~�K?l}��}y�}wz�|qszuk`gnpughotsirvsy���ztz�zu|pbnzrllpstqgcqkhqurjghiiijrqq}�z�|x}yzv{������|�}���vrz�z����yf���|�vrv{|}�|����}��t~����������rt|��|]`o}ros��|u��������������|��}��xxpv`ay�����}}�������������s���������~p������������������}�|miz�vfmx~��{��|~~ww||x�|�w}ns�~vw��������zx|��~����urvysnoqzxtruzzx�������������������~||}�{}x}��y����nJVw�~�������}|��������������������������}yuf|�xstqmlx����w}|urw{zz}�������}��|y{zuopuwtqtxrnt������|��x{r\g|yxkgkomdeehljabdjkc]_icnnhrstrnmkcjfkr{ymjopmqpf^dmopvzuXFDfeju��wrtsuqgnnomgrrsspqvxs��uy�{puwrry���~��w{t}�xrm{x}�~��yz}ytyy`LQ7SrzjVRWlnoou~�����~|�z�ofrywuWelo|�yj{�yusry~knn�~zsjjcilknifogdp}{maP;E]inkhmileb[\aXdkfemsomisv{����~�|�}yxvxuz~~�������l�����������ku{�������������������qKJWp�����~������������������~��������������s������wpmp������zvttvz~�~�hUh����������{xl|��|����~}���������~����~~��������}�|xvy~uy����{u~z~��{z��|wyy}qhjrspruvxuruvplorssrqv|~{|�����������������pmnrz}���������{�������������mw�|�������{vuprtuwyzyqpsustwvy�|u{yutxxy������|������������������{x}��~��~|yzztm`sz�y�{�x���|���~��}{wurmnsux����k^��vywyqgqjuwwvqihr~wz~��{w{�vmn�olrilw{vx{sy{y�ttz����������{}zsmjossqu||u�|�|���������������n���rak������������������}�������������rmuukdsyy}zfQ_x�������nxuw~���~||tupqrnmtreSAHYhqoml^gk\6Bbt}�~y����z�������~r|��tyzw}���{����r_q��������z�����}oJJYiq~�{wzzx�w�{��z{~������}����������~neq���������wz{��������j}�����|x����}tz|}�{tokkosjjurbas���������omsnrz{xxvskeky�����||}y{��||�~}{tu}zyz{|x�{zyv}|wyqvrnonqzyxzwyz}|z|~~��}����������������������������|}yw}tvqov������x���~xslmslpsgpqsptqkovxv�{vtrqrtsuy�|z{}{}~}~}oj^^hfbksny~���������������|�n^b|��������wuy|��z|������}�y���nvhyzz�������������������WXy��~���zvpsouurtsieo{vusqmb]a^Y]homekbZ_fgnupvqpxrox{~xt|��������������}qjkpu���zx�����������uypZ}���pt�������������}{}�}vw���|wp{��xms��{sttw}zspmvurvosuul|upy{}��{zxu�yx~udkyuuoqsx|xyuwxyyx����������z����otvxqsy�zwzuuuuwsnwxtx|xqqx�����|y}{�sv{znruvwz�y����lr������|}~����}n`t�~~�y{|zvsuxsvzisv�w}zrpzyx�w~�{�~��������������������{���}v��������jnzwnrttsx}xoott����~zumg][cktsgmmuzwy|wy~{z��zx{uu���{|x�wqy}y{|��������}|���������������������~{~vztpy{us]auv~vourp|~z~sr~ztsnrykmrdjntuunknmoyyupmmnrw~wtwtmnuyz���zpd]aad]GHfkg|y]^hasrx~{sv�vwpo�pgxtx~��}|}~vrnlv}w}yvx������~�zw}�tdt��������������������|v����w�tvmdfklmpk_Z\]cfb^chfdjERtY][`e`kqdamrxsuoaVny���{yxx}trpy�yu}uz�������{uqn{~�}y{lihhlp{kinvsmplmwtknwuqonmkicinponjeinzhfZmvkql\\jjd]]SJOZ`aibh_^a[lonfzujlf{|x�{p|wky�zu|vyutqx|rnr|x|y��������}xtsinouqanZjdoc\hmtyrjhlqjknddxyh`jqsondiY_jkt_gnqrxrs}yt}rucef`e^_`VYRUZ]]gpvuwve[otafjpm{snoollpgfgVgnyntz{q~liac[dfljknxz����z�������������������������{y}��~|���{|~{�~zztquyw|�~{~�}~~}{zyxxtpx�zv{{{�}~z���������~��������}w{���������~~���x�����{{}}yu|zw~{sl������~����t��������������zqv}~z}��umvwrvxmu]l|wrtxwx~�����{qopy��yx~���uiewsske^dendZekirpxu|}~}qv�|���������������������}xvoxvw�qmid_ieje]\gjjjefnlpll�|tv{��uq~|~�svyteYjiurt{wrnhGTkqndmopujxt}{mvvndr�|�xw~��oblnbdn|y{}|tr|{i[S\WD@OYW]RI\bfc^S]flacgkehbclimrkgZ_rpgpglcpm`F,BUadvfg^V]YQFN_S:8WU\V]\jjlopnlkjtxeglnicac^qtonxppz}uqtrm}wtcdabgb/7uchorcehng\hkspmbeTX`jgj`dgml`adclWdZUhvywkruphhovtbgjsf]ghmrrnlqxosaU]ingr}~{ghle_ktsqrpk[XX\TNRMQcs^JGSPRKNQMTchloc`adZYU\a^Z[^__XRS[emqp}x~~x�y_\dmrmskkkkkkkk�����������wr�������������������������������~v�|�}zv|����������������������}�������~{��wu}{px}~|{�����||��p}����~�z~qs}�vy���{|~�~~~~�����������{�����|wz}}~�����~wprsrooquv|�~��~��vtrmonwpgbXZ\`ggXXbvwv~x\Uhqs�������{sppbeb^^][^gulofa`ef\\PY`bbdhhd_ce]OTT^UTWY]bgbkyvwupsojdaqvlvz�~gp|uzmi~���vrt������~{zxitgcelpkmxtkrxxwd`TKORE66CQVTSX\W\aj^SSUTUZXX\`\ab^eg\`llsrswpkslpmmd^YSdilckdiVQcjcNIUQJKgif[\\khglprsrd^^`jink[__t{kdjnsxwoggkpikpqpicbipfCEd]\`ce`[WXchj]mWRVRRVWW_]aWZZSWYTZQT]X[jjf^chikmopsy{hknlfgb^]]^ae_^[[[aeXcd`hbhnkdbabgjjhm`UYYY[P\[_Z^]_XUTW]]VRWTZXXSVV[UZ^^\[\\\YUUW[^`ZTUZ]PCQPACIRZXZkkkkkkkk������������������������~�������������������������������������������������������������������~z}~���~~����|��~�������������������������������{qpu�������������������������������{~}zusqokxwy��}zxrojdfivrea^qouxvifgsvrwz\Qff_knx{y|xiaegilkhgc`c\d_d`dcb\^gahid\hhieab`YahgfWPNIHLTWakd\W]d`^`bmifnhowgjlegahui_n\g[dbhdbnmnohfijcfijcXWgdbhaehp_dhgimgYdjfYQU]`olloojssuqtntw{qjonwvllrx{|{xtsuvnqosunejeh`glu\bfcfecc\TPajea`bjhiottokwppuscjcXZLjo`col{�~phfehgcgffnllrsi__ac__\d[[W]afdRmVT\c_c\]]YWUSST[_Z`D;UUQirupqponmkihpode^_Xa^\[Z[dof_`e`fqec^[lkjhdic]\\^emec__XU\[_WUU_XVUSTS\f]Va]][`XWRQ[YVUSQSVTVXXXZ_bYZcbZPN[_[acdhb^kkkkkkkk�����������������������~jw���������������������������������������������}������������~�~����������������������������������mq�����������ydbjs|������������������|w~�|}����~����{wx~|smn~|���������~tpwtqmlvkimiwzqkjilrlakdY]^dbX_jgiol`ghdcb_`^`ghstvhihi]`blpvpssnljjgls~xoolt}�xuzseisywxxvx��|�mmt}z����x~k^�tq{wqokntsni~vx|~zomttuykow����y������xw���}|���z�������~wy�ywqpmgibbb\]fknftlnpolgdf`fdaJS`ksrf__NFMbeljjjilrvtokuyxvmbrbiiWdi_[ZYdje_\ZV_^^]Y\eifjdaig^\cb[ZX`acfqienmfopb`U[]eki`^]__[a]Rdgbu����~vomnntsq{�mli^afjfacimmpvstyksrp�~xqmgecbbgq{ikilib`Y\_`^jimqkjhltrqywrs~wxrmlga]XQOQZ\[UPQV[`W[[X[ZZ_]^ZW]aakkkkkkkk����xvx�����������������������������������������{�����������������������������������������������������������|n�����������n{����������������������������������������������~~~~����xx~yxxtu||~|sbNQht|x|{slshimkgmokpxuttptxrgan|{{|vt~}tqtttomqffcdZjinmqinorpwqlsrssv~����~w��}{|{v}~xuvspohy~{y}~o{vqnsulib^sytw|�gp��rszyzw~���������������{tko|�������}~��|�{�x��}���xu����uyzvwt}�rgjmsjwlhmtupqqkocRHSfy~vllwsmo���{u{{}��������~{z�}zywx�������������~�}}���������zwpkdgcaZNZUYPcbdqgnmtlnt�wxwtsszolnrrtxuqrqnmnppw||����~�}z|���u{snquwp~�z�����~||��������si^qyxp��������������������������������{|������xkp~|||ypklhkkkkkkkk������|y������yy����������������������������������������������������������}z{|yux{��}�����������������������~�������������������������������������������������������������}wsqutkfhklsupknvuku{wvjlsrtpspqim_ib]`lpkslstsmkspfl}�|tsq~��uprtvxsiggxuy�xrrxovv}��{���������oqvu��unt|vgdijoqpetvy�tv~{{|||}�~������sq~zlpo`Zgz���������}�|vvrrtxywrtusru{�wmkquxjnlcvn}qpv|~�}xx}}uvytu~�yu��}~�x}��~�~rw���������������}�������������z��������������������}�{����~}��~rsv�{}}uwtl_jojynojgadoniqsoqv~wyqv}{|zrrsv{|wqu{~|�}������~��znt~����|���|sy|}������������������������������xyv���}~������������������|wnelrx~|{~wkkkkkkkk��������������o�������������������������������������������{�������������{wuz~���������������������������������������z���������~~�xrv}}tkyt�����zxuxswqrqpjiptqmgfehmpja]flkilonirmmoopbilfmhlkcjgbdfcjjb]^]gpnuoorojknuow��{xx}��}xst{�xjgkwmt��zx{r|z����������}����ptkuwXJVvynouw~|ulwnq�~xkPJau|�}��}}���upv�~tyzzz��~vxutotpsputyqgTHRcmohstiemqmipoohii_kmemg|zx�����y{zyz{xul^s|���}�{�����������������������������������x{�����zomlqwwroorrqxpfhgjkmkdahqsulhilgedopolkrwy}���|v{zv~vy����������������������������yke`[nvwwuw�~s}uwurjhkrz}}���������������~|�z|��������vx|�����������������������|��v{}tmls~tkkkkkkkk���������uIQ����������������������}kq������������������������~~z���}�z~�����������������������������������}��xqomst}{wx{|tqrttqptyvsrrpnryvgsw{�oc^qu|�yvztnryvonisywy���z|���}{}w�~}tgi[Yhnzrx~~|�z~mYORO`xz�����wsox�w~������������qjrjkrjvqlw|����z���{����{~�}xy~tzt|�xx|ebx}������~��}~rijb\NTltuyt��o�}��vtw~y|{mnjprhhtspcebox|tyleI0;[s~~}zxyyxsqdil{{o|��yp�����~��������~{�����y�y��~~{�|s�yvu��{|������|osy|zwx||y�}�yvyx�nv{{ypty{zz����}yy}vutynqzlklroqmtmgqthifcYbkw}xhlfjqmurkr|�~z������������������������lNFR]_z{��}z��}������������������������������������yvw���������zx���������������y~������{v|jkkkkkkkk���������\9^����������������������wt{������~��~���{wzyy�����~����}�{~yv{~���������~���������������y~yuhkpijf\TUbfnnhgjlopqtvspquwyuzrpszxurv}r}|�RY��z�������}o}}}�}}�qq~������������~�������x}~{vu{�������vv}r}�������xy��~����������~qququ|yw{�mizwwm���}xxsurts{��|uikjcKKasZt��y}��pdgxkciobhk\Wb_\b^edemeljgohhin{�vwwk`_miokdVmy}vlmw�z������������������{|�tW]y����wtz}szuv|tuu|wohozvonmoouvvvoowxnvw�~���wpuvx���~|y���~���vu�~���������������������{rxuz}����������y{~sxjfry��nlkkgv{yx��{y��|�����������������������}����~|nv}~�������vuqtwgubl]u�~��y��������������������w�u|uz��{����qxk{��m\mq{xm~{{x��|z�x|vvvvvvvv����������������������������yvv��||zxx{}||��xx�~�zkgjq|�zyz|��{yq|��qep��������{��~���������������P=d�{�r|{nxlgjw|{im}����������������}��������lw�}���������������������{v����zqppnnkz������umow}~����������xqp}���������������ob^iiquorz}yxszu`k���|~x|����tt�������{rfoy�{{��~��~wnlrlxgmroffim]iqmbiqkv}�y}�{y{z���������}{���������~�~����������~����yz}��tw~�yuihbefpopxvvyvxyxru�vptvruxmfhgjhmmt����}tqu{������wttz��z���f5Km�����}z{}�{�}|�wursssory}xuupqrsv|}xso~������������~���qk{�|w�~vzxr{��y{�~w�tlrk\o������������wx~}~�~�vqusrwv�~�z������}w�~s��usnyyz~�|y~xt|tz�u}��{x|��un`rwoglzvz����}t��y��|�vvvvvvvv������wu����|yxxw��|qhstkljgeehjhb_eieemsjoytov~~�|qs|�������{pv��������������{|�|req��������fT~�������tihu���ch����������|uz|����zwwpkii��}����������ymwz}{y}z}�~{��������vu~���zu{q{soppmpv����}��{|u{v|���������}ibqxw~{�yvutqmzyxy�m[{����}cau�}��uow����~��|���x��������������zrwtpydovyo{}{�������|z~���y{��~�~�}������|�{omz�tpxy�ztyyxwuroor~vr�{wy�vrrujrvsqlislolpjmrrnqxyx|}oiqtwnsrpu}�nv���|��~~�zyxt������xf^`oxwv{�{rkkixywtws{zx}lgnr~vvsztstv~�xijwotvukrwwvm|�y��rGAqywv�~wz|y���{{|�w��{vuv�����{{~��z~z{~����|xyvysuz�~�~zz�}{|}u�vsxqpmnllsxob^gXciqlpc_rxvyzxw{xu��{������z�|x�|tw|xvvvvvvvv��������������z|~�}zwzsvmdbfijkrolpurrz���������������������������������~vy���|~�{minj``|����������������z~�ynwnjcy}nokz������������|y{���}}rdjp�������������������������}��wsvvz��������o^gjxz~|slr}�������tri��������}������{gXi��{z}�ykceu�q��~zor���rx��|}|�z}ur|xu~xrmpy��q����{srg_gmvqpkmuotxv�}���}�snkrqyznjpvuqmoilpfnu�psmrpwtrou|}lqsnprm\grtkjhhiklmnvxv}sslnlouzlprovk^[RXcngkrvurokpnnghqoonsg_kdgnqy���r{wrrropuwtruuvo{zkoqlebehlhhgidnlsec`dimurlmovqrqe_cebkuqxy}uvwzwwsuz�tu�����nZu{rnzz|}��������z|}~xwvosx|ov}��{u}tsroxtv��~{~yxzvv||�������u������vyxx{~wooicZf}}y�������stvwv|~}������~vbmu}t|towvvvvvvvv��������������}�������|v��yz{vqn�����������������������������������������zqu~zu|xuuw{��ynhmw������t������y��{�}pvtpjihnbki[Vapvdu��{yI-Tyz}�������|zv����������������~���������}|~~~|v{��{���������������������������������������������������|�������������~jhgZao`QSLWWT^eogaefnShmurfeiha^h]egmpd[vv{zyv����yyz�~�kfdkvzvslht��~nhlh`fiunkvphdTbihtlijpeirknpmjkqwkwuoincYmw|�~~sp����~vzx{ytpllv�zttrstpqr|jgorlobbqouqqdkmbSU_ck]UV[Uaaequj`cp{�yqif_kimcntkow~�~z}wtmmwrvtjlqlmqmpmrw~txvyuoo�{zw{v����~x}wutuwly�{�}ezvvprpquepspm~�yzt���u������}�~{z{xzwx~�����x����pdknuz{zwty����y~{ty����}nnedfcdfgd`Y`_rxvskgyrmqoVvvvvvvvv���������������z��|�z|y|�}x{}|zz�����������������������������������������{vz|xuwpghprrx}tpormaYYUUleiqgcfi_]]`llpqxm{t��}��vorrmfkojhrXHD`�z{~�~������~�~����xw|qu���������������������������������yvy{|sjrw�{�������������|����z�����}������yjfgbmrlw}mxxvoqsrjnnlfhhabTVdkg`]ag_hmpn]^fnikq��r�jutun\^afg`l`nyvf[Tafw�wvqoqnkmwoqyuwnqylh_kqx{xojsijgh^pvmonvsvwtokmpsqxskty{�mvy����������}��~����~{��~||�~�wxyxkhnlyxw{laioof`hnjnkkkpo{xsonsxyxxyy~������|��{{utzyswoqknnlrvtvvooukpihhpvwjik|zzuztx~{{tv����wttxoy���|u��tlv���rzzvyzpyz�������y{}zz|ww~����rv}�����~���yjpz�����������������vpqurphommihhlbhfptp}v�w_kwdQvvvvvvvv��lojq��|sumr}tt�zx�����������������������������������zcxw�������zqy|sy}}ww||vtwphimklpoihjoog^YfUda_kumopxot���xz~x���~���zwyumwlopnyv�lmxtuvooqooy�uosv�xp|rw�vu������x���}������������w}uv{y}~}xqnrwxqorvnnu��zuqwx{�|wpsuyd{����nh}sy��wyzudgqov{itoqztnjmrnfmpddnbb_fkilnvmfpn�ouoxwxjt~uphsmokiqiiqy�susg_ddibdakkjovsr||zx{x}{���~qvv{��o]dnupmoz~|~t}}{||zyyxv}wlipgltttsep|�{uwywuwu~yuxuqtjp{{wx}�����}pklmgmmgpsfbcedhswpjoxyxq}}�wu|����}wvx����yw{�{�{xtvwyz|zyz|{xsoq{ttrmoqepz{w|���vjpqsrlhl{oriovjnou��{t�|~��z|}�ustsvsxz����ϸ���������yqt�r�q]U{�������������������Ǿ�����������vz�}�tyojgpzxgSVk}}���}qo|tjvvvvvvvv�������sganpw������������������rtw����������w}�w|���������������xwm{vqsxxppx{toptqtvu���������������|�zktkt|~�tw������z]fosy��{{oonlwmwyt{���x�~��~�|sidpfb|tpumk~�xxsov}r�tv~|srprskdfn[aYktjouu{��}��������������w||w|}v{loz||~yw�zx}zpsxqlksxnlob`\\jhkc^nsbbmli_qp]_ihgajr_eWXNAHJZ����zdqyr~����hk|tv�vlnnrjwxr�{x���z�rnfhmxt�ytw~x{nu�{mekty{u��|���~z����������t|UL^qq|}i��ӓ�qvoqnpw��|zxyv�������zuro]hsylkqozfciqpkmuw}vy~zhkjojjottsroigipp}{w�zw~|hyuopmtoojmpomliilfsqksmusthpz��{���u~�g>EVQiotpegqwwoir{z}{�v�vwuy��ƨ�jz����op�����в���k[unnZ�����rdXVZV_}������������pc��������xoyi]W]bls^bmxshy��|o_rvvvvvvvv����������������������������������������������~s~{����������������x������ujpzzwvz���~�{���������yxtpolxfmu|��~|���xl�sirukgqee\auxsl��������������vv��hd�������}q|uupzmi|lgcnj||f\[ppzkuq�{��������s��xwpz}�wyvrswqlwwljt{wywifwy�}���}utv��q~�w�~olzofnmh``V`b_keokakfbgowc{~uoiP>GQRZdaib{�sqlhkvxo�plwk�{�wu]odegktbevg_hbhgge]jrla\Sf\bffo~sqh`[ahg_kdjqd_pkfi|pu|y|ljomzx��x���}~�w}��~�nv|{w{�{yp��uxvlplbtvtdnws|ymqyoj^be]`fbqwupnujvm\W`kjS[g^lwk~rknfa[^k_h^geUCFynbldbjlckrkjef[cG?FHRdji_heKF3*3<Vqpwyutmsloq�vx|zyk]dqx�xy�ѣMQMB:K_dHB;V�zkr���޳���T@|g`ZEEAGDEEI=ARVdfb]`������TGc���ಙ�����zz���wzvehlfkuwuqgbrrrrrrrr�����zx�����������������������������������������������������������|~{x���{x}��}}|�������y��~�������{~�uh}������������������kV:@OIj�����~���������������}s�����������|}}�o`knFK[_{s����u~��z��w{�q{~oWVT^cijsrqosuklnw���~~z{yo^kkqc\bibhZ`ommhqloqijmjajk`_ligcctoridwrrptlXgjhhplgpvwxutrlmmkgmk`_ff`Z_^Z]jbeskV\Y[`d^]ao_ek`\kkea[XQjkjgeppsjmkljlkng_SQQ]UQSYZYWain_dkgkgosuvkb[YeXS_YT_okdegYZfljnmhegbhsknilorh_pgilqqorrjjmlmijkrmpnsmnslhgl\biaorf^Yh`hegkfifc_bYbdcmbf][gkhigspvplmnpnXT_V\_[]mpoa`aRKOMcbTgkgej_Z[ifdaW]i_imskeeppk`ZA1A<;?@KO9C@5EG:1FRp���XF?TEBI98483338.-64>;831/?R[n�t`Yua9y������{PQq�����|plsjpsqv��{rrrrrrrr�}|zz{x|���{suuz���ggm�������������������������{����wwtvxtwpy|����ys}z������{wv����~}zsssqv}y}{��yw�ynkp~�ommtystr���s|��twyxytuyjlhojv{~��|gWfiqsqjdkfphhjcoyv}jZmec]`[keQarzxr��ujrckoc[xiVWjXdskURS[`abjnkhy�{~{���~�|wsqoiidnjegl`oilnfiiknjrqwtzninkmqrr|{~lrwomhe\e`\a]ZV[YYbhmd_kltg`Qc`\`aTMUUWdacdaY_^NWg\hg\iQLJb_WYqd^SGN?KSY]beqtoz{v|vrvstlnnqnhfhmjfafkgozwvv|xxwveGFH9Kao^S_mdjpoelnijojgl^dqrmehmmf`k\`qphknpjbY\`kryytusysvv�|rokkdkmttjajgih`qnnqsvugofr�pix�ozylhuvkhnxxqv}zt|sopilurpxumgano^qqjg{|szutf`n�jnlswlqvvvvjQHJTWQ82;:<=7+%) ,4@ORPT]d`[_`hXSNRJEAD:A=.0*/1./=������iR:*Oua����T=.-��˹����{|{{��~urrrrrrrr��}y���{t�zwuv���u7:U�����������ux��z{�kj~�xqyqrxw{�}{xx}y��������������rlpx~���������������������~��{t{�tsz|~x���m������y�xm]imtn|rcchbUQXakdeg]dU`fkiUYaZY[g~[k�u�����~z|}wvndfij_Q?JRM3-,Klk[XYR[Vfeuq{����tqievwwx{zxz�~tuqjb^g\_ftwjr�~wyvyz�|oyz|vwvqvzcbgd]\XPXZ`e^ffjebecb]V\[lln^aafy�xu�rmdcn\NXTebbofjcYXIAOiYl`l_a\Rab\`\b[PX\kqwt{ojip}~�y}yzt~��xz�w}���}}u�x��mmmxyw�{uw�w��z���~����|s��xv||vtqltv�~sx�|vsnvrusyutogosq[Tamox}x�}|yslPiksqvqhlnwtrg_fqk__wp{�}~�xt{zukcsqim_dkk^^]Ygurqvxwovmv~�~���~���{����rqw�����{pnnz\evt{|`QVjto[[c``gjfepnqeVUabehpjgppiLCFFBYbHe���`AT���|o?4+&fuSћ������zow}{{{rrrrrrrr{yux}}zsxxpoustz{siXck}������������|}�|u|����|y������}�����������������������|}��������������}~�}uw{�y}�{��w}~��}ulv|vl`imvsfjookmsg\iopykqwou|qt��|}�|���y�����ehlmx|wftmcft|yhhrjeld:,3Suxx~�~�}��������`[^Zme^cpvvw|{|z{{yxtdfju{u��~xoirrtp|vvuvnicjimgedghga_`khkli\\hmqsifglmnhjtrkg]W_tUTdSNUQT`YPU``hZS]SMZSUTUUY[X\g]_Yngaepqfihjgomqsjigagadirr|{|uvuz|srut�h`lmjYhYXTUh_Y^d^`Tdpkdf`]hg_[SZ`mvrtwpjlgjddo]bfjvolhpxyriryy[WexcP_ieuomniKn�mxcb]ddmqxoshhhwpy{vzy{q~|ximwkfkqilymy�wim{{��svspmnWJqy�������rzwvvuimptqscagfgjjhlwkx}uqaYpzu�����vhpqrztrrqzv}zxv|u�tuowxm�xx{qstduVRE@Ii�P.0BFHu�W���ؽ�ywlamtprqrrrrrrrrngeu|usrxxpsyz~}{urwz�������������xy}ux���lcj�������������������������������}~~������{�����������vv|{t��||v{sorxypt~xupum��ssskcZqul~�~uzw}�������{y�~�{vpwxm^ckn[^h`p}ochqpjt�|xzzohlqj^Yamfjppj[UTa\fqmlsd`fbb\[bgjmqb^b]fkjgmwtungqof\eda[hhlelkiedgdWUU_`bZZZ]XX[db\gnfjpiee]Zd]XQQLNHGMMKNMMMYUSTSVZY`ZYAETWM@AGOFNSbaej^cbb\qiieoj_cfeirltmb[]Z_[^ef^cklaecbfbfhkm^Zblk`pfqvrrihp``fcinnmklThcm^b``gspo{}svmvwt{ebbhvwvwxtqruspkt}onwzqww{vd_czu�eglspsdsy}ryhwzmtszz{ohppb_js}mpko|s{}�tlsrmqnkbfefrf<bkju}�xoqeiXZW[TW[WYnslhpnlv�nkvhwph`]nx}��ysqnnwxw�pustyqupnkrlkxz~vy{uxt~�ow�wxpus[agsmrnuht�t�������qumrtnm_rrrrrrrr�{qw{z������~yxi]q}xvx{{��������������~xcx}xnz{y}yxvz�yu|}���{r~|~{x��w{�����������������|}�{����ypttumeekh{�|yusm\dgluvubgv}xzkM]jz}||�}��}jb^dw{uz}ml`hdopmfqWcbaUOb_ljuyokeptncsptvsme`bjVRZh^a`\s`]Ui\aaX^akhffbXSZdcbfgnab^msrqnsoxzvylhfjorpt{rgfolb[keeeXTZib__afeomjnsqhmqiiiaY^akf\WOLQTOMQWW^a][bNLH@TRRMUbK+16*)>S[linvikjjiokrmrkprsqwtlleg`feghiomjhptupsmnmtqsoktvwrrvmqtrhlmf_fn{vrrrix]kbnbv|wnnf_ik\]VZ^^_UTIL\lib^hbajg\ni\hlox_OKVdc\Q[Wenogbea^^`pmpv~pjqghqwurjkkfjmikpsplfe[`aXaajqjigdbqkhl|ibjgkglkr`bogi^YYgkhecinuqrfafrmnx�ylrolt{��rszxeqqjmelflkchffdjc`aQUbvzukvffrm{w|pxnoq`ef�~yyvujr{g{rhw|pppnulrrrrrrrrf{~{ux��{s[MM[dP`o|xxoprxtz�}wmmibl~�|�rrig^gny�zsjr~vlmhurtuvoqtrsy�t{����~utw����}woinooghh_^eecoobfhacfn{nqlqtxqxhun��toSRixssjG\\lmgQ`kdZe_gecj\JOMSN]W`[`[\U^Zbj]kk{n�yvzynsriugotrpxyz|xmhnxfca`ahwz~okleiYSadegn[[jryzqn`c\bbfpuhrwx{x���y{{ujvxxtxvvjtunpoonxpouv}krhissplsujled^\p�yRUdrqhgpvrri^uoxnfje{uggbsQQ`PYRcnotnlmdjrqvlp}}k{pecnfdg\c]jmjihvuqhuz~qwjddqlmo_hs{wu`Zh^\dngpxsj}xog]OgX]]\Pbnsjd_]aX]XRJKOMQM3+:[aa[[W\kldysny�~lYjj`r\OQSdd`Ypgg\`d`iKWh^qnjqgrsutpauz�}okcjjpe\S[^J`]_hfjofcg_eR^d_Y^ck\\dghjfe^Zgg`eysspphgrxTUNNOS]^WXUKITN\gbmrdf^\ZYVj[V_bfdh`\T8:OfcY_d_ZXhu^grsiomeIJVRX]_omfhe]`]Xadibkh\b^rrrrrrrr~xkecm�wysgam~�����skkwn}spkiduxyup�}qyxrxxsqtwuqu|nkotw���~���������}rjhihprrmgeinosokijjmutxy���|y~��|y��y��|q{pptkruw�qhcbgkh`ZWccaWZZc[]YXVJCJMRZWU[`hirppnwsmnbae`_`]]]bjk]U[QSXaXot|{jie_[VRPT\fgitveeUbkf_lgrppaZibemr}zuzotuxvtrpyvpx{}npt�w|}fksnnn|�runolxic@\jpfoqjdtniu�tty`[P]nli~re\O`�}~|}vjvik`frHA4-0&9Gcysjr{x|zmizwpxoubmqhohiwyutgmlxxlw�p]WX[hlVL_sjeK[E3>4GDO[VPUVPYfc^[\_[nre[wmlg`]\]]`_ZSV`inbchhgieg^grmkxs_^ccTGoth\_\\_PEKKday{sqj~PPP<9Wh~mqkbeeddd_VU`lbkhgd`gmcecibV[ZSXaRbcf^gYT^jpk`dh^^gX`\`RVXYkdwijgeW[QA,.AIIKIFVPJR\`ffjgfcmhoxdS]fd\ed^XQQXchhdeeWPShbejf]bb\h_?J[OR\V^d[Z]aia\Tcd\]TPllllllllz�yokr��w{{qs�������w\S`oiyyvi^Rdvyvrqp�~z��~vgjis���triglpqmhlmgknpz{qpja_gs|ssuz��������������z}{xllhjqmmy|{wyk]ilch__[^uyf_XRNKIJQ_ih^ORYPRONOIFLS[ed^_dllpnmdgcbWX`b^cmoX\ZUUVWWMHONQamqmihaYW[_gsxrkotphILurhfafontjaopqxvuvwxtmggotsvzvx��x�~��{vxqzuypppjdjlpu�y_klmlmrrusvx�~p{�qrnniWUr{allw�}vxrmkohnpmcBB>64/@R_ljcjy�ytvnmnkpjqgj]gikmkozyv|yww}tj|tSXS``zj]HekqlIPM9;=LQ`kpjbnmlqvyrkwvnjnaNZdkmnpa[[nqfa\ghinvadg]NRUNWZ^maPMMGIVMJQR]VMN=)&$-0bnemqiUUSWF:E]``Q_kYRY]XVbje_a]ab]ckglbgeVYWV[\]afabcZUSNNU[N`]VZV`S[`_TEPUaVRONILC3 $-BIWPYUYULR`[JJQ[VZ`qvkbfjiY[arjfe`\bmgkbZ\W]`hgbNFVbfdE?LPSZ^_`^fbUZ]hqmeficVllllllllUeaXWY^]aeeTUa\[_W[lq`V^c`lrqvqhU_qto~��xtfcd`^X]]\UXm|we`[YULHMNUQQWetvrtrnlpy������������|�����{kjjnivmkeaddcfbcgcbmtsvlh]U`ammopnf_[aZV]i[]TXZYX\[Z]\[\_ccb`lfaiipmk^[XRP\gh]dd\Z^\UZUYTU]^Zdb[dptneryxomtsjdZWdepxn\b]gonsqcmsqwwkitpjppq{tvtzykt�s~|�{m|ppymjlqprphjnx|{qoskpmqmm_gmoojor]Jnyl^di`]igfdkbiegohcfk]hYbg`adhtxzshjsxxv{pvwxurrioif]quvtuuz�y�~�zm}rdcrtinvtaplehmmaXaOG[vqtqypeiqvzruoupilkW^dhjijSSTkoaTDVg\d~lyyeK_X`e_WSXUVR=FJ31:>LH>EA3B6DRtngvf==Q^]55M^[PbfTQYa\Wbg[ag]W`kogdgX[^Xab__^cecag[UVYWVZ]e\U]_RZXRcfljnme_XVSSSQHGHPL^`[V`W\YUYZb]QV\fppZZY]`gY[hpvukcdileujgg[Xjmhtm_krgurkhf^\ddcclh\bkz����wu{llllllllQ]SNTTTTNT_Y]g`enjht��wn}xukaekjMZqyv}x_`aZ^_ROTPORON`uycgfdc\\girl\[hnebekrw{����~zxy{|~{u{yyxvv{z~wwtumj`gyvqa]ZOMSRPTbeigaa\^[Z[]]ZVfbZ\]S^[Y\]_eijkqi_dlnkcg]UZW_^Ya[PFDKQPKPX^ac_XXZW]Yojnmwt}|slnrnkgkpmmtws|����}�xtxupgwqqhhmgkakjmcbo]YY^jho�{��~zly}{ffnedakr}yu|yxvmndgb]V[crzn|}pt^N][abb[ff`XPYmhmfolZWY[UJQalghhXPZdefopgVY^]ionejspibW?R]w�xlpzvukjg[g]]cg`QNd`SacXUQ=0?J0#5Y[[_kc^a\Zde_`hVNT_M`^`jq{o�|{q]=%,RQQhY_a\^^XMZ_Ya_imnMSXHJEK[T:5EOWK`cabg_bYhpiB7ER\]gngp_cmmbbkngumadlm`VV_c_^\W`\`]j^gkoe_^`fhdhc`db^e^V`did_dgkfg^]V\]a_na]SWXW_zs`acsaT^_LQkp`_a`gbdefqsjisskpzkmng\d`O[f^`c_cyztljkggb]]ajn|z|��XO^lllllllldgZX`]aiZ[ipvvmplqomqrjbsqi_X[cdhnmeac`X^fhtynhiqdhy{yyy������������~�zz{||}�~ztnigf}wtnthggegljokjdgdkku{g\_\XNOXY\Xknhd``_]]]^aeiknkagchmewsnhb`]YXWNPTU[VXVRM<CHF@FLPRSUY[X\bekldowvyo{mmhploaWTe~wqkqvpvmhlwwstgr|zwurw�}mqmdbZZ^lr|qjsaQ]_dcfuvr~{xolhmzz�uvhbmeddvqxytiqrj[MBUswoqmdfa^heosiS[WPYZmtkgYpjaedVYdcgrocQ20>Ralrk[[YWcaZRQNgjWRXPa]c[WZ\ZMWXXSUeWLMJiVK9CVJFHM)(65>8Oabhmrqj_^aT[_`OPVgj[]aho|{�����fG^VWdYQV]rf[lVQ^Pda]hWWXTX\STZXY\TY<ESSS__cjgm�rW]pb]Yc_UfbcieVQ[e_jki_QQUhWj_JWXUVY][j_jgagnmiicW[^go_QTX[Y_`dffjjgibcZ^bfhr�RUTfiKS^ejadlX\g^eoeichccdcPGJPW\_]YS^[_X^a_cSWab`akpwwzyuujehdZZgpsjboxvsjhllllllllfkglnfjsuigpngc`]ceegffjhkimqpqkfvyttrosw{sqv{}w�wmn{��{uxurvz{{smory����|ssuvz~�����{~��jpnm_efbPQRTJGFO_mk]]ZZZ_aZ^jiqj`bdabbltvspqrmopqamtxmgeb\]_^W_X[_ernkhggZekeLU]eje`apllhelnbkt|vyzrqumuu|owwmcZlzkhovuszx|z}{z{xqu�~sssoqswxus|vlqigvqfhqqfSZmikmfxoeku�jpxus{ovrrfqppw~ejd_gXZi\[YkmbaeVRXPUNWYdkQljisp]`NV][@$%/.2G`jhb\]aWYLIGIMK=)!%3Q[[V^_PHGOTROVXTP]IY[]VW\`WR]LC=GWWi]af^fbcagmqsiafihj]V\\bc]^fkfojruiqbnhnqjbaWRXitnhebfa`ia\cg_\db_\]`XWHGYeZWbTb]^_aaZnX^^bfebdlj\TUWWX^[\\W[^cTpcIVUONYV`d`_X_bd_]dg`XRXgZHERWVcj}�ykhjlnwutuvpimZ����YI^pskmi]VVQah]]W_ZWZW;7AMU]cb]_glj\fpcklnkoqoxtmywotloquplnnshbstr}vllllllllllqszyswz|pjunk|vlpqtwvv{wztwwmhb^q|}����z~{xvy}tz�ym|�������||�~wzxz}�yustrot}}~~{vrolx{cpyoprvola\TZ\UV]_em]baszkls|widmmehijh`YWXZlVUSTc`]WSWZUSVUYhb_^ezyvigqmyxkovrlkd[Y\X^aZ\`Zjonbgoouzpsup�puvjc_tkkfpsw�lmwyrsuwvog_mmpyvrtojccdefcsvxpu�|dG>E55<TdiiwyvvddeckdakhhpbZSf|po_Y\X`og]a_lsyqcbejprokss^ji]ghc][\[XSHDNVT]hf__`S_ZVU\c[P3%-.=L`abw�r`Z``TOXHU\a]U_W]f^X[\bfcfuq[`ceWOTVXW`b[\_NWRKLVXZZ\adjhzlob\YU[e_V_`\a`bTZVUS[[KK^jjg`ei][]ORdnpf]ppghkbUZ\Xd^giaf^hgikj[OefX[b]W]c\[enpgc^dVDQ^__jfqvolb]Z[_ceaXT\XVWd[UTVXX]VZSHOOV`fcg_daT`��{|rtn_ciaUWU\^haX^\a[[XUEUbjkqytgSX_bfslomi^csuurskuuqqnuvvsgeheikihffllllllllifabdirpnmlwe]wnu�}||{}x|svumtxpwzvv|{pt{��������wt�w|nflz~|xquy~rq|wtuuvumfmz���|roszpkok{t~�ngqyycair��{vpofhm_lvigad`XYedbsf`WQSY]\F0DBFPT[TR^ieabaYrm^KMlurcfws{zpUimdaacigZdqe[bhu�����{}zqyum�xypilkqjZnrv�ult}WaitpqzmSd~h[iebi\ipjt��{�w�yjm\B#[YQXmmi�~mbf|ox�}wo\gdnwz�jm]mnf{mdZjq��lV_aU[Wbon[bqbhb]A`y�sdWKTY\ceckx~hknutrb`J@Zw�xUflb\\X_iy~ld~x��wuoxw\r[hZeaVQ`XjQGLN?B=EPP\npeVNNZqh[T_^T]pwbxhyg\_cWufj{�ukUgdmwgpmS\^[ji{qbbdlcLUmokfi\X]RNURMD6Nf�iSMWYSa]ZXOlpfjj`]V`bgg]YU`mcXOP_^SWX^q_g\UNOX]_bbgnhfek[YPG9CM;IDBMILOXW_dl\VRaWd`_fgY\ja]^WadeZ_^ad_hcegb^\`aYKABZnaQJ_bYOLJRecVnzeYWU`\XU_i|�{qts]\ebefllllllll_snqamqtopcIBNZfrxupjslrutyzqoqlUexz��{hbiimfljmj`\bZNPNPNQZ`^hYQK_e_jmiaR_l[Yjnlnsofldljmru}tvmhbbnj]af_kmdjnb[hZZQRdd_lrdW]fVfgb[icMKBIL[ORTVXohlqrtqe\crF'@iupihok^`rsvrosuz��y��uNAM\{�uvyqkkkgplieqrlqnjifr}pznnm��yqloshfz|h�wo�tn`]jSHm~y|ucUZfOF1&!-.6Xa`Z_mj\k^dnnxzourecmeoquybastvhovrxcPWVYUVaY`glga]fcapoljo{et`dm{�y}��}z���{zz}qopqkhw{mbScRWas^f[[ZZeeM=CL\eROa]TOQZSe_Y\YSVUTUbak{�suao�cEVjonfetzrlrpswojj\RSnsgprhvu`vraxiagVcflsqnrqiisl\digZ__gYOd\_`db_VOOGLSbc^ekWOYbNV[ZY[SKLa^eeZcWPUPUYMZ\\_X\ejg`WdUO[ZXORP[eZ]_cZYYSTOZVO^ivv{u__hleYZgjaYVW[ZVY`df^TW__\WMZJR\F5FX^NHG?DDIOOX`VNMeyvpwfcb_ceckafbchcjghc````````>Q]ebbika>=G?FZ^Wdjic`Z]Z`v�t`[\Qapnvroj]ZV^[]Z^\^SQURRSK\fW\cdYTLY^^mk]XTajb`fc`inmbffprmqspwz]lfbf`X\]XURXdZ[`]WPMO]ganqsfgleeehbjg\]OPT]\adcb_YamlilnmjqhV`lmgfejrssv~s{�~��~���{xpjssow�~w�z{zmfcnlefb`bfupjxkomjtwqvz}vrqrr{�zutr|lhYFXciZ`dipv_YZM[[MQOZaSN\ebsiakmsqfijgc[R[[^imb^jr����{��yffl{rrkmkffjY][d]Z^buulcqg\hhcSKWP^^^Xgikbke\\_fmjipc[[o`Y\^KKJG4,)789>EX_IIOX\YetnnzuoqqnebZa_^\e`\fmugbm_`ghd`_dm`plknc_fndtjghnf��abrtpvssdOenloadZobuvkfie`nkizmfWWV`g[PRYXUTYYl_dbcY`_S\TXJQO[JNd_``XHXbb^fl\Wdf`g[ecgx^ccYjfjbjtmh^_^me_XWZ\TWRNRUWZYc``fnnf_Y\^_`b^Xb_WQMIJQLDMMRL@4.4PZJOS@IOZdXVnypnq{thZd`[X`dmfhfd_`bbdm````````X\hkvlwuZ<KeUK]e`v}tvukU[cpo`XSJKTX[kf][VQLRTXUWfjXT^XQUbenepl\`TKTVRYYPQVY[_eggab\hovpghfgm^\fcV\adZNNIMOHCE;DG\ZLLNXnmno{haj}}gigqtqwmlmhjghkkc_dke^agemnxs}zpdjllmkecprxpbn~wuo^`Y^ahuqd`jnnsrrg^VVV\f[YZ_b`engn`kp^mqnvrvwyiekdkrotphF/7@PUZ`_\]]ecoeFKVMVPiT^ekkdl`Q]gmgcgeiXNBejwvrv|n}�����yt�~���{xnsjPdc[QgjZoltj^jpljbL=LO]lqbXOGMkiltod]TSle[]mnghH:8GNCHIFA55E^iZC6FUYiqgjypdgjibd[^]\ZceVfgpwqp^_bikc[YYXiefl^V_[ZhhggigccOO\sdge\cLazzkckrfhi]^IKSUfjuhWUVR]qfteWVW_^Y^_icl`edSWUbW^VXd_fZ[^b]d[]TT[\VMXYcZggoiihd`kehpmhZTR\aedlfe\`[QMDKMOXHVTPS`puqY\]\]^]Z[[VPLHMY`^baP222HNY\PWlmocaxqgvga]R^`]UVLSWe`\Y\Wa\`^c^d````````rhd_j_cX]ly�vfm~��zgknxpibYON[^QCIL[uqbbdd^ZZcikylVXb^dsn\\bl\TaZNRWQOQPPZXWfnpwoR,>[gM%<`cflr`RVUSUNFF@AJE=<HNMYYW__[dfkx�yhk|}qps�{g^TYedrkfecimrrqswxxoi_anreinrrjcclhgkj]awZrtiU[q�xrottjm�{|qechkojffikjjmcWSJPWIOTejQHGZYhki\Xaa\acfhcTX_nkfg^\]jXSKZa}zr}�utwt]NWiuqnqnS115PXlX]qXUaO7aoodh^NZbXmumbjbc]apgdTW\e^Ztt_lqtum\Rec_RZcW`PYf^er|�oVUTZPJLZ[_bTQX\Zcjmuwrb\q��nrslngacc]\ZX]\_a_[cotpklinz|�uoqtplg\fa^fhcb\edZd^kdleejeWadpbXmU1FWc_`{eSLRcSViqMW_]lbmokixsyrijada`Y`^EW]kjl][^]]cdVSPXWbemoSXXVV`^BCCDLT_hYb^lZj�{proddiibb_da`]finkVJJPQBMQQOVbeaNLPURLQ[W]YQPTY_i^Z\[KKDUUMTcjpoja^omecJDYUTJN]hjh^mld[RXa_`^YLN````````{xrszxvkj��wtklstmonjU`uzfZXV[^ZQZ[fwsejR]ntridbg]Yfh`cf_URSSM]]aZY_bdbXKY\brqjsgbOXantnhregdYNUYOIU\ZXOHJGFFd`bd_jsrf^imyvsq}���~sqeXYZZ^RZTW_bbhkifehlz|xxpTYaa^Z]fhb_f]V[`Z_r`s}qkikailpsoeaet||qkjqzj]SZfje\`\Z]Z\_YdnZI\bd\d\]\[kp]e_R@RYjgux}��ulnla^��|rzx�yi^ckgVNT\XPHB4)9FA:VVB8'4>2/KDMSf]Qa^ckjc^lecdU\fet�syimwuy�w|xrojqeYgkyno^_T\^UO\]Vh]e`^SVRVdUXWX`X[h_[hi]^ai\^\ZZWcWOUb\W_XNWQJR]ZW^afU\RJ[chtvmjkgj^TZ`chalfOXWd[_\V[akagsua^G5Qf^bd_jokU_mvbgVahbeaggnimqprrvlmfbaccZogghe[`g`UY^_aXXSXSQZH@<KEERNKFDJHKS<_SYMERGOOLKNVXVYURWTY[]^aVGPO<<PMPX[UPOIIQZXPR\Y\YUUTRRMEKJXbc[T]hniirupmnooqrsup[`]XY[bg`f[U_ga]\Z_UTZ````````murxuxzxnxvumipe~uxuqbatkffbWTWS]`WYabVWQNV[]V`mh_XWNLWWSQRWXVgXXdgfglm]T^dluneknl`ceossprfQ8@RMHFIWYTUPXffYJdea\XgprgXdt{p}��ng]_b`[]`^\ZQQOOOMNRV\\W^ofbaai\__adZOUac^afndVdskmqxdenx{|�vv}vestuqvt|��uis��ve`_]_WUg_]sa[gb^ccYQddi�w`ms\_bkfjlm{tmf__cr|plarm^U`gqpkkTLQZ`^`D5+@NIPQ50/<OKS[PWixcd~kkglkbecfWXSfxqaZcX_^Thoidfgpid`hxsmh[bea^jeRQRJQMLPcb`]em\^YV_JFRMKUUQTQWRSYa`Z^TOS\VYaUT\UUYWR_KSdVZMHTR[hja_hqei__hedeegb^dajlnNKZi^ohfme;CJb]]m^v�eakel\nZ]qlcfe_hin�vtolgmjc[]\Y]OO^_XOZMPKS]f^MV[bW\VWLGFJQR[QWOPCLSY_aLS^[PMOYPA8>PV[bVXUV][QFUR@Ua^\_fje]QSVY[\[Z\XY`aUMRHLYNR[]TY_]U]a`qi_YOLMO_ebXfli]Xcd^d_]WU_beW]eoh````````tyvwjcfla[`vqn{k]cl_]dF:>EPRNTZSTWNMQ[UQQMQR\\dgU\\^VIIKOMSYb]_XTejjfelgkkmqsrsumpogX[Zc\T`pkem`]_]^WT\\[tvk]_qaV^hpsgVV\[P]f[VYPRVPINTW]XTFFNX_]YTVYSTaijmhmqeXdnmfjtxwvu��yrwsje]9Bikdpddptqsyxont�~�mvyznnj~uk_cbokkt\efZMR]^WWD<[gbq�|nj`aiiepie]MNVg|wecvj`bnjv}�m\SPYcbkTNHSOBQU;-HX__oDEIS`TOekf_po_Y^`e_^hgTSO\NFMM]W[NURYZ]apzmX\]ibc^VWNKXTDTLM\\ZUUYPPVT^VWROQRD>D=LJMY]`c[kj_\`eljch]cf^Vmnnoim`psvmfhigp�pz{zvqrnhooba[qtJDdzrbmbdg`Eb\\SaaVWzckp�p`sbiyifjg`aecyipteZ[dbajnum\S`X\R[V\IDJYWMXX`YUSegPONGO\OOLYR[P^cmql`mtdY]K81@`eji``a^]fcaldSXgmlekun[TRRSTTRP[Z]eeXT^^^_`jkiT4EL8>C>_aa]Q@=IMQL@EXgG*0?NUPVW`]gnaXen`````````dfhnfVY_ZH@Za^eTTZbLG[:04-9OY\][Xfe_T][ZSX\TckfPQ_cmhG;HOV`RY[Yd`bamohqvqlkkks{z|}}lW[]ogklfdYccUYY_djl_Yb`pvXr[]dQHIDGCY[_eaK\lwqtn_TT[^amZW[frvqc`ntoo~tql_aRR`dis~~wtzvh`hlheB?PMI^SeNQ^hd_emlfmx�offdhempjjctcYKiqrrst_tja_lh_^RT^^j[QT^UaQ_ki^c\^YE>YO\d^q`ONW]P^_VKR^_iqnz_MRe^S`E+<ccgUdG1acS_fensftj\Y`U`e_U\AFMbk]`[_XhjiQPdxslwq_VVLL_cP`bNXaPSTX\_ebW[^arnry�se__QP_]adlqdl����ojxy|�{|ikofWjlxux�y�y�|v��uiq{w��ns�smvvyvuyncikfhdamcVX_spfddgtht�kqxzu|tpie`__cel[`hfbNMWe`hZPecWJXipfUW]]ZV`T[YW\gjdamqrbgM\YQ`kga[f_x|ljmogcrmhaid`eenh[AI[US^gl`UUROQVVRURW_[LGL[ab^YTU^_feu�w�r\PbSHXQW\c_]Vdx],*#,D1#0383/CIXVQ[iWW_f````````nvwpmjebib^Yb_`_\]_YZ\UWT_ddijzpkqemZ]]ffYY`kjYZsdOVPR`hdgcc[abievopxcmwvrl`hbbcfpwzj`ydW]bcahrkfieikapggfdbafgafj\[nqhejdhtXfValtowvc`dgfac[T`_hlnbfl`a_hii[\^gljd_suizog`jdjkfb[P]_[nreqsokmww���z�~r����sqlt��wlpph|vplxupuqwi`pidZmkmjScqhY\knkYgdl`cVJ^alpl`VN^YZZWQRRMS^[ZT]il^EUZWVPabE=B?`pgabegmd`b[TSIFHUe^a_YWc\ULOJQW__]`bjghkik_a`glnorioqrvxvsxvroigdntqz�|��vmagjjbhlrX`c_b\gbO[cccaT\kc_elekunkqvpjaeqk^jrvlngiplvwp}xmhk_cxnoz~onztkrmebovmtmhb`fjgmfhZ\\W_ZVR]ZdVLWcikXKEbcQZUTT\_XW[ZU]ihgj`YW\UZezcbjsf`ma]WX[UXbj_g^Zdoipe\V[XX[_V>=Rcc_`a^bYVdNKVPcYVb`UVVSh��rf[ajTVadgjkmr}nkfMV\[l[\X\N@LSd[S^E<74,8;IbYYWX]]XVlllllllldwxcY]^Z_^dbd]^[baa^a`Y[dgqggiqnjnfmcb\[^YZX\_[dnh`j`bpvl}�{sv|{x�}rujlnhed`kmv{spvqsh��bpwts|~rqvkhffkee_alront`\`jpw{sokud\da^abZ^ihfgcljol^Yca`_Z]^YZakmurug``hjirmahojnvy|�sytwpjuonorprhw~���}y|z}��|nsuvwz{}}{t�xz�yWMVkhFGDUR_]cmieoxy}soqors�xjNTim{�idXPZ[^\PKW_VUZVXUgv|`L^icjdURTSWPNNLWmfeiivo[ROVR\_od_RXP]\WPMQ`lUWgci|u}�zyv��o��r�yuy�yldb]\_]dbbbeibgmtz~vms|aYYmmlki\TaYJRLa`_XWY[dPa]P^]YJbf^QOW\Yi\hvuktoppegrcdgtfguwz�~jnvqlqy�wvqlvwnaij`m`hedabgiX_c]a[IRQ[QZ[\FSSYMMVX^^TPW_kgmkc_]aVOIKPkjk`hj_hbjid`YU\j]^Zaidhld^VWVZZZ\S[bghfec`fbc\^SU\VWZba^a\Uc�}g^\g``amg_QVacoqvoZ]_[[]S`[ZVXdmlprf`NPUdea_fd]bebjqllllllllO^^SWcbXPScfc\b___ca^XPUWZog`[PKFTZ]US\b]cjdfiinnifm`dop_fqmrqz{tzkgqvopljpryz{trz�~}g��}xqmkfejfuojad[[dkrvp_]mYTY`^^`_RVgU[][TXZ\Z[[V[Qcjg]jb^amcPN\bVipxz~yyzmy|�xtuvryqpn~�ywmqrrvirpxz�wyptm~�vyznloxklqtprw��}t|}qc^-4FB3),5677Ehlrt~y�����y�o�yee^gp��ulfeeitmt}�vi\U`\Yf�x][`Y\cYUUKQ@GQCA_ZXTK_defQNKPO`mk^eZ_a_^U]quzplov~�~~~}y}ovqeUorhTi^[de^[`Y\Z\`jo^\V^crxgf\RXU[a]bnhhlZXmfadba^]gobheWhacVSb][^akng_Zguksulypqvchc_cfdekkfda_bkgez�}qikwmXswn`aamnoqobdaX[^\R9?8BShXSdZ`ec_^dga]`f[bijjbZSIJIDUZV[W`SX[d]eclfZb[gttuhm`]h]TQ^b\[UIKQX\\[\\T[T`\T[Z\VSSX[OJOW\ZSS]R\Ua[T=DBBK\hha^^XTaWd\b_V_Y[cWY`V]W[k{yxcbtpjskllllllllVTOR[\UQKJ[]XS\YNT__ZRNZTVc_XUNKPVYY]Y`\Zdkelmkgfbftmssicfllsvzypsmoosl{volhmxzkhokqiYcjh_^gk]R^\qwqjh]eboppvocb\ZROVLBPMOSTQTLUYV`]YVTcfQ]oXeffelilgaklnq�}�mw�x}y�}vzz�zykednw~zsqcXaebu���yvdyqwqagofpq{rx�{ex|��{gium\SblIIG($&&-$ Pbfl}nl�ulqknmhusovijes�h_^\adxvz���}p`bYJNpgVbn`Y]``[U_RMQDG\dah\babgahprqffalzwpruqu�vwwq^l|n�f]Zdissc{pmeiX_aQOfljitrbTOFMacVXZRXe]U\FG\aTPab_YSLYj[\`XXZ^f[qomkvffbeiPOVSbilgNOmqsfbknpr`ehftma\mgdmlmnhWSctsrxpxxvegmveZafnxvxxdYlYMT?<IC7:Tm[`CGaT]gqthbjmdgio|udkippmnnimdqjgsvmm^lreh[cjmvpn`[eb`bfXIQYX_hlib`aZQX`U``big]\dop^XXQaod]^V\RXQOIQXYQSTTVJWT^^\UPR\SVMRYMV`a^Mq��ͩdabc`]llllllllgfed_TS\WQ[ZUPWRILVY[[]l``[^VNOIU[dhsmskdjldnnqjkfirluqjjvo_Zqzxwuorpn\]QTUSVjna_YM[]S\m`[[\b_U]NYaX[RMVTjv|��ogg`YZfaUdkh`d^gfwbJFIPX^phPWl]lfalsha[\q|ffgbognxyqcrjglnlihgflcetkelquzxv������{|yrlju}�������t~��xeS[mxt|wzt]^iVE?-,+'FVW^cZRjqlj`keps{lz�phom^PPZecmftvkW\mlizqhe_YiaW^[ZY]e`mTO[fTa`d[U^ge`][eon`bd`pjnx�tnwq`[nbcnqyW[]mlUH>Oq�NSURP=>`ijiruqrpVGRWJ\a]WX_YUXC?GWjbmnf^[^aZY[RTYcjYNXbs}wurceS]fX[`^fmd[]shoW^jrbd~��f^ViX`^Y\dbbjn_V]sf^g}�zvthi^q{}ymKB^bGQN`oiYI6II^vZputnpwurwmmtpmqh]U[]dpty�vt��pzxvsgswhmcd`in`Q_`[WUNG7.:B@KVZZZ]_^b`eTWcfYWT\digVWZN]k]UT\\[^RLVWLYRLKS`RU[ndg[^`RTPLRTQYXKY�����꽊_]bWjllllllll]injc\\bXSZYYU[XZUSRX^_ihh^h`X[T]cjii_ijinpkuovq}}��vxsxptg\P[XTYa`cil\LM]ee`jkd\^_]kejc[YgjcO688>GL^\ad^hmmjejyjX_hkmfhegicgknhF0&(4GWbZhb`frVE[gQ47Ro}goZVXjfpveQZU]dnbjrtpwiqjppf^guxpvmwoxx|pvqptsohejr^UTTLm~�}wnx�|v`rweo{�u{t`QTK]]bcT^\\ZZONW]ghc\\ebY_]_SR[ZRXSl^Z\dmhedebag[`PO^^\TS[NbXXjqP[Zad_dges�uq^aMljc`cmz�maflk`x{oh~j_e[abI@DBMiZ[RO?/4Y_dageed`RMWVPTT``[c\VUDLLEl~ijmqm_Zbd`^]Y`lfXY^sthfiwtccaT[e\Y]VVRURlMYdrk`p�q]fco_uuichipufb^`h`\cq�nmmtutrlvxnbillfni_ibWUVWTYcgesj`dx�vgikvxvl_^acTV]YV^\[af^b\YbfokYbfe^_]RAVZR[ULIHJQKZZWQNMLKYdZWgXXUXVV]\Y[TU^TYaXX\XRTa]PSC?TTMINVLS]d\Z]^eZd[X[Z`bUx���������}fnoollllllllT^`[Z[WUTTYVXT\_ib[RTXW]XSP[X\`e]YV[a^hk_aehwioidny{rmammebf]H73H]fgigohdlhf\_`hbhkdlnrfg]idN9,:=B=@DBHANKSch]UV^N[bcf\_hhk\a_]@%$!7KEIUINZH*&BA=G[\]ym{oqno]c`UEHCUUbdlxqmodpkieYT^b^^a^k^W\kkxz}{z��jaT5,''(K^lz�pstnrm][RWfp^_V`]ljYLUVQ`d]f`SQS[TWWd]^qbmkjhhl]W_`t^bmpsuwouqxt_dlrmjh`]_Z__WdmjmkqjgnyuhjlsgdTgjnT]etvj]bab_n`ec^P_\Y[]Z]bSLVWVUYZF>TJOPWOSQTY]a[fTKT]a`V_`XZaXqtfUVdie\Vbcj^NWde^TT_bV^bimmjhjr{`bhZ^TL`k[ehtumj�ldcbbTbejhihgja^_]X^adcmwfhciitsbc`]^h`Z`hhclmcQkihkZeNQ\ejt�zg_bb\^YTbc_Q``]TQJGEJ[TTPKNMWPTVMIIPSTXPJU_cb[Zdbd\TOJDBBY[PMidXQXWV[YV_a^e^Z^^_aWVPdcUNDFTWXWONRac\VV_X^Xb\X[`hggsvdx��je]|�jeoVllllllll]_XRTZ_dfgi_]V^fggdYVXX`SP]ibbU[Y\]`XNYegabj�pvogjf^VUNetkia^Tansqglxnvhgicligbokb^pg`]grp|fG?;EZcPM@DSNPGIQRMLONOVXghYkhZN=?FK1&@?* 3@$3A<DQH@7ONVk�rSebbcj}jakz}z{t�w}onxmxvefacjf\Z[_q]N]ibXOh|��xr��rm_<0.=S]TGWaIWgYged|pqd[fbmcg`m]MSTcfdfrcealuio�{ocm_{ajoqznqsopnytkwxibhWbg`[_�promsh^afbrmnalofjcc\HS]ddV]USQXbev||rxt~ablzmm^mRXYMKKYlVNUD2V^NFT>;35BIIOW_q}t_ZTWe_Ub]\OVO^]o]_`Y`aTO^rbUo~bcdU^_k[ggeXVhx�]Tlc[\X_k[X[bcrzze`P`leaW_TXec]SaTOQ^OLSR`Yahd^gg^VIJBNYQISQR:GILZ`WNcRepl]HGaodVkmXOJHY_UKd]]RDUTOG^DKL<EJbTAEITYXK[_VNW^a_USZXXRRWULLS]SMJK_[_TRQWUSXVfhaX\aZSUdVeWIHWLMOUVD>OSNII[e^_`fbXYbgdjVVOPSVOY^NWZSccllllllllZXAO]dejbfdb]appadpleebjcgbezqlicbjniikf^cir��tp}qjusknonmfb`buvohiken]bqipc`Y`yz}o^VWcxw|o[VTS[`Y\]fpTIZURPWPTV_XX_cdba_SKQMN;Con^ZNB9<?:RSgmgkb[lo�rlahtejrbpsjlnkotn`\fq�^L^f_con\d\U_bZmXPUIMd_`ee]XniRaSQPZZeYYiZY]W^g]b[WWdjsfggcox^L\h\{��qrw{}�ytsuf`nnpc^iqq�ujw|ydSXWW\O?T^JKS_cZ\QU__[Zd_lPFI[]uue\SSs`cx}\hxfbistigaxe]]_`gce^epXDSa^JI?O\ud`npcZQKVP[Yd_Y_eb\CRc\LS`_Kb_^U[e_bYTJUTly`\t��zhdueZn[kienhdcrr[\adZNibcUZWZlX\Z\de]iQRU\mjk^WOVZYWDUbhejbldnijheaVIL\^RZ]FQWNKLdeVc`fldUKa^^egkiYYU=8@FZdlkXRKgmtpZ\Y]KJLHFN`\_Wedb_dbdcVQa_E1<GKIKR[]YPXOL[SS\addUWM\]UWOSh]V\Z\aiZ\YQNQLOYQSIJ<<<AB@@>@68D9::DGPXONRNBJV\cYSSSSSSSS^]][d`il_cgprrywseekpzpkovtrxvpaxvwvrureqvx~�|xe|�vklojbprl^Xajhk_iaaeb]dpfd\fgfsoogdffkvmoj`]_gg^PY^\`PSW[OPS_\X`]XYW\YW]\Zb`fZho}gVWP>JRiiqobgkjphnbXP[c]`_adaghghhgf^[\j]QF]W]]e]\npmZc^bg[V]X]MO_lkJX\YeUSSa_hb]bevnfbju|swpyx||nqusrmbZZXjnlY]mxomlohdaSR`W`wf]eovoslrj^d]`Y\TJQO@FBQLWRnca]akm}t[Ud]g`X[[^vnt}�vwd]^_ekjmmlcded_Y_fbbnhVOWNPPIh|omd]llXSTUQVUa^XUZ\XUUcY[ZIQJRW\]]llfYMNebaeeuquvwzu��\S`_[WY^\_hz�{xkoceieb_[]ZMZbdXXX^T]abhivrhbe\QVTbcf\c]icxzpklpmaVQYW\[TS\\WY\WNepn``TR^c\\eh[QW_a^^Y\cicY]XUfiuxcmnp{vdZZelkZ\\adil^\aba\SAEJRSX[_jiODVQR_TWWOKCOSPPQWORfeRZ[PW\UU]WQRQSZRZZ^\TOHMWZP\@CA=A943DTNMROOTMIXdSSSSSSSSabqiebii`gmxxpstk^aogeO@isrpipnY`lvtmopidjkw|~wjkt{sjrytmfceadkgmerighebgnjjglmbqkjhoti`korihomkcd]edX]__RSQ\ejVO]jeihqdT_hjpnvrtgxjaiouuqvsqk]LZemnroaT`][\W_EU^_drn[Rf_ZgYc^mfdWc]``XUYcdbcXX*3?15Pmwmmrwradhtv{ysq~�xglpturlnxniq^efioxla^bi\\OTcn_X]iiail[Sgs�f_sw�w�une_ibQ\^\^iiVK[VPbNo����x��zzehhhgclbkuzjc[edlnmmnwWerjc^ajkh\QYp{vrklqwwtmdoZSTOZWSVWY_X^c[^[[Xb_ih_aV[]Tjfea^^C?WZSPRyxobf^dh\^cY`[[Ye]ZVcf_aajusi_`UZg^djlmbnqbXhlmoksogd]RQ[YdjVM`\\Qhotf`dhli]Tbfjs__^flklVVdjol]bhvgZcgZY\epdkqlkiahw^XfWXbM_Of}racc`osfedd`[a_\ZZ\[WILQ_\b_`_\j[TXbV[UYW^YPGYTX]WWdi\TRL\VKGR[\ZSOTSONIQV_Vak_Y`[WUKNWXQUVQZkTMOOQRSSSSSSSSSdcljZjjiitwwmadi]Zer]_cfs{ssimj_efghjmg\]`]mx��}ru{pcekoiZ_fif_giloqrn|ojgkuuppejmspqulfehh\]lqsgvohc\`frnghaosp`hrdeafMQYede_ef^be]bf`n}wqnia]WgjeVLJ_bhbJJRmyP=BI[nhPAGMTYZb]bimbaQJW__hhfG:SB(+*)Bl��ov������xwoq��~htwqeztho\j]VZVaUk��jlllkikdbbiadeirfbtoXRgyiZaieku}{slkomq``lottvj�jf�{�����������ushdirfei^OW_`Y^`]UR[FQmehw�nmrqkx�ysbck^Ytlk\ZSK^g]}zfdqye]dlclkmjt~pWOPH_cnbDXae_eko^pu|pmaUQSWO[cdijma\_lbl`cR^WPPYRZ[EJspoorprxppw�|wuv�zpj`NIO[P^_WWiovmmnlqp`mvijnVUTjjmwnlfb_caaTadebc`a`iqbE^aaa]hnnrbSNQKKNVc]VYYWV`^aa\WSllaXdsqfPQUbZ``bPM`Vik_TYRYQWYWZcVRLEGLRP]]IZk]DLWUTTUWT^\QLRXOY[X]SMEW]Z_XQVPJUVGIEFTTRSSSSSSSSbbhcXkihjzzri`abljqw`bqyvzkomdZ_^dgc[V\fe`TX\cXYe__^dhgsmaiaefQ`Y[[]^\k]ga`jmowsfmtjadglkchkfbdppud[fkiicompmts|wkg_hfsgfhroqknpt�x`lmV[YUTTTSXcXA3/AGQIkg_Z>OR>G?EPNC8-!>F=9BEZ_sui[Obhl~\98#5;&/2=Vr{vjn|�|}�vWadk}{x\lcfZ`nyvTYQWQ^kYZpr[^^b^db`\W^Ya\]d\NVeh]Zgg]lyw�w}zzug{~oex}szult{v{y��������xymhq�|���caZcmc^WVXSSZTjcaosafbheZgxttxhcZabrfZ`dZ^hdUWQaidef�}r^_^lshtiaGCC;HRYOe_aa[Ub^cfxqnmi`[`OW`__gb\^qvw�lh_cUQYUNRL=?IHL`abtri_ahhbbbgruojifgaUZaUa^Yfdihcjk\bnjztfhf[jwhV`WW^YXXR]ilXNRKDPRJP^Z[\UTMUfY[UV`io[Z`\TQ\X]ZXUMTZX\__[Yfzumhi[^_\VJ]MNHM]XR[RVSMMNTbe_WJDPSRSRRISLYVMJNVWS_fZ^[X`dVL`t�mUX[RRYOHRQNVNDIJPSSSSSSSSZerhkf`X\nlhjgc^gbhrqorgacXerg]fa_]cljfh[XRS\`Yb`[chmg[ihZdbc\GX\S^TV^^Zcc[^boxtkijegonphbr{tnklwpbcuvlm_gkguoos~fZ[dZfjmhqqxtuxnujZbh^^\QFFNVMO8*,#)1EUVSirG;,#9)*0,-2/$MSROf`u�{X214B4BIK%$&&2>O^i~xtpmpwiXbnqtag`droYZ_fx[MJZP^h^U[ecXUb]NLSZ_kZUXomgighiiWileo^[mdgY`nfxll_exsc[g]b`hZ]p{��xgZ_xmmiqppp|���|yokpnol^`leX_mplwlaq[bmkhtuckyhe]aPXQM_[Ncm\\SFNLC@IfjafZV\kt�qypc^UQZ\GW^nnk_lrhdladsbS_wiswgYioohstv�ebndYM^YVXNFA?BD^\^fP^RNLRRODMSX[^gel^W^_P]WY_^^ZZfmedh^cRQ]e\VZUXl[Z�m`cjwvk[R`]SVMHYQLSVYht[fijWPPnmba^Z[Y\`edaaU\b^LHV_ZX]i_ab^aaQSRQ8HNHSb[\V\WRLOUVND==?4:50+41@=IJHKU^V[_dUXRRVVRYdz�p\V_YXYMGIHEKQROP]SSSSSSSSShlhwi\UTb]\db_]bYVU`]cS`_[g{xuqflorxtoqcbbZ`\Xge`txnf_jdPWe_X[b]UbY]hkhmpbghsqnohhkw���obihdw�{~vopvndjrstgd]hrkgadibjpoenoyvrt���spoqqjfYW^gB-#52395>:'B]_]R>55;FR_ghavwuv\GOM3,=EB,!@M-4/*'00JZ^[^^_glteg`YRY`peprikvlQOTZdNMRR^a_Y[[cZTYTDIVY^p[M\xkhtvnQTY`]W_VbYlsfa_V\\meYr�qjztmpil�izpwig^PZdotxtxv��{|}�~wvjt|rntldx�ww�t^nx��}~�}gidSVUQJYZQW_TWfboe^e]VQdbcSTHCJWbiMKUNM[WNVZc[is_fw|hfpstUIWJZbjo\O\li\Z[\b[ag`^^iZIDCGGbgdk\Y]ZaXYX`_aZRMMSRTKWRGOGCNVeSY\ZY[YRWZY[Z[]`[OMVXXJSfcVVM[WTWXhrlc]WVFHJAF\qd`^a_iZn]bdTN[\TNW]alfkgqdfpoiebphqif`\@;@^XHVbWBQbdaQSPNPC=2"$$(! %'#&?00/:NegMYNLLRTPTc^hPXNW]ej[QSUUK[cac`QScSSSSSSSSRhU\prdgZdZW\WY`fgeZd]ohmebcprs^__XYioieabh\c\]rweusbfhlteai^l�~{|zwvu�w~|ivvynrsqrkejs��qljbq}vungdhiirWSVc[`ZR[rsmmszzvippxrikj�yprkmqpzofexYHED`[ZdUZ_]��}�ji�����xxi`ZK_lY310$GdbE)2/+6>FNLej^V\]aotxTTZnhqhkat�unpvki]_bOPCP___UdT[hi_^bjraYqf\fkZcly�kfggleXQ[gci`DEVU^}}jhc^d[dzqon��r�qvwmtprljp�yvv�o|j}oxpw�sljrhdalZX[Jhrmcgle\eFO\JK]VafYZ[_gNVlqbazfSLND=KPcc_\XRMNRVZZfd^Wbda[We`a_gy~nrf^P_WRTVaYZ_\dbIKDZclwq]SROMZVVgji_l`SVWWLXa]bb]V^PSZHVR`XNNP^ecb`]\[XVPfg_]f\L^mpd]YcVZKc]^^__i`RYWUGSQCDGIU\NJG\U\QNRLIRVYRWWUfiofremypeeg\Xh]]ZbJHQ]ffVJ[W[UOK@LK??1>B5857DL72*,65;<84DRD@8@UVUJO_[\SNG<EB`bTHMXO[iZTVJHJSSSSSSSSWkVQ\]cgfYR[]]inmlnjaeopjmbbilgR_\OSagg`ddhbnijyw{�r|��tiinQTlursxuwho�gmhlxs|zrw{tx�qs�~�szotnrssjkoswrVX\ty|sg^xx~rzrznnqpvrympkrx��rvv�tuuvu}u�|~��x|�wyv�x�st��mslzhhh[qte\S=IbW02:FO&//?IUOSeihmg`agjq\W]aZi]_YagZoal`fzai\ePR[TY]`b^k^Yledu`Z\Y[djdopkn~yxwV>7-36ITQWKS^iPWqgepbb^iod~�|pp�ozrkrt}s�~uv}lgmtfni]lnyzmlsrjrildb_bd_UkPW_lfMPbCSbZTPUBWYUN@ZVDEU]QG@@WW_f``X[`\\_\TQj|scamb`^aeSUMbbU`i`NVVLW`PKTZNRe`VPFU`mcbb_OVafRSZT[YUR[JXVZcW`]ZV`VJTUed^POSTWe[TNONQ]^TfsNabe_glehhmVJFM^_WV\TT\[[\ZUSZ^TE>IMOPSaZU]QSJPd]Y\_WadYL\NX]`\WZUTUTSVU`NMY\^QWETZ]PVQ[QTEA95MVcm\\QIAF>GUVNYZYK@:6<C>CV\YHFTSUHCN@EJBGQJLLJMSSRTMMMMMMMM\lc]fUYV`ha]_ajoqoljinroo~vigpp\^fOMiniq|px��������nFAGQfpffdad]gzhiejz{�xnoprnh^jkluwzknluym_ipqqpku}~worl_qophhgnsquosuzuxxwz}�xyk���{}xx{tf[b\`mtb]fsq}�nqv}~s|pmxutel^eof^hnvi`hdl`f[[fcc]aaX_nda\XVZIQLNeSZU[RVbolVY]h`SecY`Vm_[_[ghimfgYYQFPRUZrr_\]mtkG6=8.<1Xskigru}qaqw��}woxpkyymofsbiwvg^zy�wwyvkforqrpctvtu||vtrvsndU^ifb\t`ghnu{p^OQRVCHVLTRMJ=HFGKHKO?8CHMX\UBDNQE>ADKDLV\^URHTUQ^b\<OYVVccZQK>=HGFJDPNNK]cMO^eVSX\V`^c\a`QRW^VQGWSZU\]]d_`[^WY\TYW[aa^e^VSV]]]nj]fVW^[U`^LP]kYRQWghaK`[PV^[LVVWXYXODHO[bYTGJOVaVTaWJ\\S^geSSDJURQGTRSPQLTQWLJXTUJJITRUNZXaUVPGIJTU^^b_`\JJFIOV[XXN=1+;.7AKnnW[Y]Y_XMXXSWIPQTSOHGMNLOMMMMMMMMiprmweqom}vnnhgkqwzz}|tloyxusuyswsdcnsurmopou�ztnwvouplodSLhui\\_^bd`eopolitsvswqruojcojow��|}���{ituumiu{�~ygmkougjenu~}�|���}���{~xrj{}vpvuwtk_[a`\]loiuwy}uon|�t{v|�tj_�s_Y[fvrul_UMT[ce`^\bY^bLFZZ\eb[aOZJ@MP\_fcghaggd\WWO\jf]`fbZa]gqqe_k]toap�}ikVglxYhKGLS\lXo�ulnyw~�rp����uhuf^UYmbr{q}�iaTvxoijpjYW]\echbnbbikoofg``beU]biec}lfadYodHFCAKZOLINVRMF@5EUI=>9?YJFNVVE?>>:;CIBHRTY^]doiecc[\TW_`[ilmZbXIMQJNGWYRMfnUVZd^]ad_h^\\cbVU`a]VS_`un[VQXTTQWb^[XbXUXSW]ZSURVYZ`b]_UVYXWcdVYdb^ec^gnp``Z_c`eka^ZQMX^VRW_cUI53)GcbUVZXloje\WLGRQJSACFGF?D;JCA=.=;DHGSXVZWaWTED`QUXUS\Y[S\\FGJLMSV6,$ &*)!,$5w�f]J[da[WXcef`\PMHNU_f]MEMMMMMMMMnnpjnfurfjlwxlkpu~{vz}|}ccgotttsunhlnrwmmc]_]igj`jbbj`YdcpankXTaefmjhdZakkiqnps�zzpluebfhh��vvvx��rsmrkbiehv�wtpz�y���������}|ys{ssps~zmgkkgijvvujmnxpkn~nnkz�wzuqus~yqqala[dfcruqlrznlc[Z`^bgahkWWnv^[YhcYW]RZmjidbtvb^hpsr^Vpdmbe[c^d^fkeUeliuwnk{l~piug]ah^ok~��}kmqpsxylvvi}_gxwz�iksfeajadyz_k]pm^]Xbefjfae`iofk`^cM9CG==L\]hjf`^sg^`j^jmbSLFCFDJUPVVWV^WY^QJNJP[RT[[MD><>@ADGPIFIWZRV_cie\\ede`[bideRddYY\P]eMRRNXYIKYc_YZ^dribacghcgXWZZ^]fX^[NKKWRKUOYggMLSMZ_]WaVT\^SSZUOTU[Z[dhgegfqmbdhkki\WUU^bc[ZXRZc]ZYWYZcSJELV`]X\[^ei_LJORKGB]VZOGH;A6JDCH;?BGOFHB>EMYPMHL\TUVRRXYJBJMFO[dWSO.%$.')KVTDZ\NQ\bai\e`dWJOT[]SC<MMMMMMMM`dda_bha^Ydwodmo`hd_hsuvadfeiojbgl^]jgchhimqjphhtuagznaeoqbgWYopuw|y�owgecsz}w|xyss|omnnf�}~titwntx��~mmfx��rhm|{�|�v}�r��pqkovrsxt|l_s}����hfjmqhdactjkp~~tssibclf��si{x_\[VGRW]dijanocP[\afpzpam~�u`_i]]PXVcja`e\jjwcip\kk_�acf\_]]b]ikgfij`ax{yu~vpfkuniue_umtu{Z\prqnssrxs�_fpxplgmzpzjbWZSXXjbfcVZR[]bndaWNTbmblzdSMNI@C;?MV^ZRVRS\akojcGJIHLKLXKHA>ZZIEPPJEHWPWTVWFOSVRH@DNKIGDLPNXZ[baV\fhcYPhmln`^^ZZ`P\jSOUZ[[XWU`ggkgfiecc^bkdZs`YMTcdZ]`UPQc]PVP`n`IVZX^\[ZbY]ZWSVTPZ\_fbW]hbX[RX[\b_YTWROOT^[HAM\[VTLZVRSYjedg\TZYRSPJJUVOPZZhPII]WcZ]NPFYWXbIDPKOGA=;;?F;977CHFFHHEKA:97=CHSK?;325'$%BZS\\[[T`WaRQP^LKLJIIC?@MMMMMMMM`efdbmpja`knUPb^_c`^kxukiccbb`Z]fcXWYW[[hqrebqqpqwefsaXYeiamll|{���������qalw�xrqs��zx�xjdw{��{ispn|���wu�{}{|mdrylnYxmnubu{momv}vv~}hyw~���scsx�nf^`juy�xqtq{g]TYjr���lrveQ]WP[Tcp]mbiloejYfep|phplxycjcgqg{gqqtevjpih\fih|yzunljkiwsrfnllss|hbsnxblsdp�yvt|vwnlip]hsigagjhhkoaglsSJb^UXyordZPPNZYZ]TVSUMOa\cVLR`c[Tg}nUTTSLN89ELOUQMSX\RVi`YITNUi_OWSURNPPHEB?MYMg]h\Ye_]dd]PEGUORTS`nf]gageT\fmgaObbhrjjba\gWUZRKU`\`jkb`]W^^__V[\VU`dZ_X_Zcqf\V\Z[Yb\V[SU[XR`RTOLQWWP]\_\^kfil^df`acVOSGLPSZWRNNIIEFMGJBIX[XWTMKLOFHHW]XYZUPJDA?LPKDHGLCCJ_X]W\UWW^WRTUM[QKD@CD;9=;BD>HOHILLGRZUQHPOIMSC97.2#&(!,8)%+@TOSV^ZJJISUCMUJOQPNLGDGMMMMMMMMjkmehlrrrz�x`_pmjdZRZlpefUW`aZTaaWY`UT]Q^kn\^bZT[okeiPTY^b\bmgqx|xuko��lkhsurst��tkvhefov��~qsmklte`RUfdbc]XYLS__gRupx}mloqjmxien�y�zfsz���~lml�rqtzp}r�ngr�jnhev��ot}��hvvmrgp~m{iTT_o{lzpmtu{mr[dk����qu�vZ^RclbYVDPUisWckbf[jcjekqxxkxqmjfofpvd��}y^rhcdhkcfbpp_c[bdc^`b]dkr`W_]^`Ojq[R_aSRKITSPPSJP]ZdaVcl_JHYfnl^`YZ^L?DDGOW\e\_[N_^cc^HF`bVWPIIIaRKVXU_cWdWZ`endjopqqf]_U_micfc_^eqj^wxk}}lqh_mhh_gXb\VWTU[a`cmocbe`e`]WZ[Y]XXd_Y__[bkf^TYX\Y^WTZ\RR_bpgXUSQ^[RZcffdvqhiccfdkna^`VZWOQRSOWUML[dTWSMIKSVTTW]aUSRbfPNS[\I>MOZVPFMOLPANQQFIRRR[UMF@QNZ^[X[[\PKHGRYQNJFPLIGRYV\TZ_baZLF@58>=>3+AOO[aZ[^f[XY`HJMWN_dbYXSNKECFMMMMMMMM``ldoiquv~ugaijnje]^othdW[ajnehjompm]SL`dcT_\UVOhniq[gjn^cgdfxlvh\fpfevzz||gktw|}���ljkhonqy{zthggYfetkaccfoXU\c`XieZqqtztdg{lux�ssq�dkjg�~kvvvnvlekmr���w��tn}�puss|�||rnrzpvfugmi_`bzlYhceovefhr}��mlVSVcgojbabuSR]_SX`NRVTTbco^ns[a_S`\W\_OYU^[Q^Xdjj[wsu\\V`X^WYoT[ZecSXHJe[USeT[_^ZUKFVu\Vcpq`VVNC9ITMOWYgg`ceXkongb_afbWQFPWL8EFUPXa`?FYt`V^f\PKaaSXb^]Ypdgvrlsq`ieXclytpnggnkglnadhhkfatUMTN[SKfor}vXeddaqTTUTZZ_YYab_ZZ]d]_Y_^b]Yi`LRJPZMP]b\LNRLMNYRIK\NDJBZmkohTad]YJA\^X[eYYNE<J\URWKMMJQTV_SS`^V[[irjYX`]QORTUPWPRjD>CLQJUQPSHE@ILX`X_ofXUYXP\QQTO\Z\if[ZK\SPGALYSWKQi]TPVNGUMLXhcQJU^ddfLGMWPW`ZmjXXg_XOTVXZ^dbUIFJMPTPJKMMMMMMMM`ekYZgcoonmbdcYec[a_[dfbbjhbmfigirigvn]chd\`]d_^hs|r�wqxvmiojv�}ufjtrnpx�k`mmrq~y}pt{r}�z}}��tlqqjo_y�uymsmsb\l|jyrrty����dixgp}z�xlcnjly�}rzyrrwxhmv���~{��wpqnqqodpuyx�|ttr|yz�t�zn{pu~vigimzu�rprjgijXgxshqov|]T[fZUDRUbtonnjcdryfg^XVTY]_RWIX`a^_QMSTYNE]aSN]NP]^]\V\WMVX[W]eeW^UXdXRQ`�ubmpb\sjYMOSckPgZjlj^_`lcoskhf_bYUN^UWTIFXR[\SL^Y`c\[_m^VT_kvc]wtwnag�nqkjn�lfk^Wkphnzyqpiacgbleiti`lRQ^OGIFGNYSjbqnm`qsy|yp^r�a[[eucMYP_agMYd`YXPLL\K\PXZPXIT]Va[ZFEMUKLUWDAMT^^RZSYZeaV^aZQP``d`OXPBOXXYT]_ZYUVddaa_lwmgk]XZTiY\FGIPMb[KORHEPSOYSV[]XXfSYV`Z^aV`V^YaWT@VSHh\X]QUdn\f[WMO\RVYSZQPGDP]ba`TY\YT_\^VRSWPFIVXP^\LUYEMORFJXPHVVVVVVVVb`ggjkdlidddipja_ae`\WORcjnsxfeglsx{zoc_bbgf`_gkmlvnvhlr�yrlvtpmylbjm_`qojjaiggamkp\ljgmx{v|���psmry~vnt�~v~~|trx���������xffjhhlluqeewprumbowywkmjb_k~krdgofpkgcgysNN^trqvu�shlfeotvb]ZT]kuog\}ocvvmtp�}zq{xkdosstfaZ[dcTPGJ\bkaUYRpzV_cKMNU^hv`Ycv__`edefc_pcTUXJETX\YT[dfe_XRaodcaO[RVbYq�laltg`sh]bh^eqghSNPU[[Xeinli``R`aq]ZjieaR\^bbRO[Y\c_Zeqhi`gktcRepmnhjyw{yhaxgOPXReqq~{rpo]Ycdgjjn{rnpglkthe^^_`e\RIFEV`iejipyshmmgfkd\YaafV^d^el^XZ`T_UUUJODNbgcXW[WixkdVbYLJP]eVSWZ[JU]Oadc_nmdUQ]RI[[NSMSRR]ZP_SHWbXXdaQofQX]^]SUPWUcTOKJKKIGJDWEbRTXUTLE[\XZ^VS\WaZZLMV[^X[afndY\b\SWY^WTNKWWPQMHFIJGNLRQQOLOPLLONIGJXS[_SVb\RX]QPZSJVVVVVVVViemwyvtveZZeinn]TXRHKIFXXZbv�tsvpp~�vpehdrw{sulf[ec`S_dc\eck_FBS_RS_\^bfoo_fd`U[awo~sjmsykoy����}v�|�vy�tw�������������~��|vmYblzsqtqdrqoecypnnilZZ^fe^L0+47\ecXI\M"!6LafnfyoUU_ULZsieeeg^cbhe|yuq|y�q�tiZ^YVYYYZjl]MS\VH722?JE^IPMOFAa\EcihhiRaTHRhh]]h_hklZRPFWYSJUYbcc\]_YYULR[T]XbdLPK_JGZe_\RVoaZljh~e]misiWJV`VXb]tahk��ydsSE_doketnk`Vc]ihgv�{sznkifqsirvpimd`acfVJ:>EddZouorlhhrge]_`sqqejp^zqnjppfnE?;6<3;@KEBViV[V^WD7[zojXgf^eh\ddnedb_hgcMYx{qpynZ\qaip]LS[YJM\ekka[XZLSTYbeqb[\VYTZNK\YOHMXMEWb\b[CCWUVedxj^eZWDP]TVPeeNIISUE>JNJDiVXEIFIDMKMVdg]WOcgkbfR[freSgfUIRWNKXdZURJJONNY\SMOMFEGOCHLJC:DMNKNV\`khaZ`k\WUWRORRWVVVVVVVVdgkqoqzvc[Vfidjd^e`RSSP]vniu�zrj|svzrkdXbOMS_gqli^bebR]d`Uaicf^Udt\NQT[QT^RV]e_adhux}|lqywdw��ywx�w{lrxzxpqvv�s|ljirw{rjv{y{sWgpkrnsmfZopmss{pq|efjj_`jj=)#8Om{vV+0# A[m``cW_\V]kve_WX^\lnndmgkr�v�{agi[]ZWQHHQcfYPUMS]W\RKM[[Kc_UNDMNYukbaYG^ZdmibPJYPgr~sgVL\a\SQOcfiXNLHURZcab[[��jlO[UTFY\b]XcUXfcefXP]|nrih\bd_db`^UP`t�td{kXmrxso|taVTfggW[XX\ixyvqpqdRWihepgiZaVOU@9:>QTWS7JWKW`URQUUS^i]ir^th`W`fm�bQMJdaodaSIUPGI\ca]QTZOfhhaaisnsgdZWZW^^ZTfj]cz�mZavkdigZUUYPT_XVWXOYY`fjaa`vfV][cSUQOVSWTRYPMadU`k]MZ[\Y`\OWT\cajcZShccVUTP`m^RWF<ER\ul]STINXb[XfaXPce]QjZ`emqalheiYYKW]_QUXQPOKTZ]YVTOIPRRIJSMFQ_cVIIOR]\SejW\_LOUWY\]^VVVVVVVVffgkilsmbg_fokopw��qfimvwtkq{��osmfde_[]_VT\Y]`dgb[`cT[d^`\lepnbgiZZYWbW^aOZehcgvorruwhhqwj|ztk}z}��~~��zswkxtkmkfadcwkrqvstxfx{nskmjhgnwlxvorqp^^jgd_ihEBIU]cagVA@'/F;86<C;.Ph`am{x~rjkinlpjeZEFM_qppmltp^_^]SPRYUPX]QRZdYZMIOQSISOCTbPQ`X[__]^\_T\bTXOWJXT\`dYOSbdf_[b^b]\`^jedlbkT=kpwj`Ua\Q^`_]Z\OSXP[YSU^ekqjj\`c`[^kdcVZ\oih{�}���~vsm[YWUGWVPVc[gZSRMV`UXHSMPWU`haZZXLM5HNFBcLUYRhg[OPIDU_bS\ov�aZORWjtshZ^hvuhLMVgjhTWVXROQbgld[Ybejhn`ic]gZZTORWLKajlRR[^bPQ\TJM\\XUJKLRR]QLSaY][nfdncaQY][YS[]X]YW`ZLRY]WVP\ZOUTW`kYYXoxhgTSL]_T_uvj^`J;P`Xl_[bciig]dbig_jnlh_QPb^f^Y_elUSPaaTO]bVSVZXSSWTNPWeWO\UTDMFKIBDKI?HMDW_JLSR^ddee]NVVVVVVVVpghprnkjeridpvtor~�rcgrjqkjgtufejffomo~qoiod`]hmodjqiryx�t~��vn}ynkdemahh^_mcdZb]m]`Z]a\qu��vo|d|yy|r�yr�|�{ztviejfymhejkkr^c`ckZV\[_pylvyjfHHT^c`rj`ibYg_T]QNP]X-$0%*1>:=3Pecgu|y���fl}�vi]PU^WSWjqv~ysztrgg[XLGSbUV\abe]XUScXQ\UYniame|lQVQbgSONNRVVRc^][`a\Thjj_U\W_ddhkor_jeus_aht^bi^Zab^IOX\VU\LLLT]imp~zl_jwr[_cdcmlqXZ^Ze[avukbY]Z`RKJKGPOKSWQSNG]n`hTULPSRbb`YW^_gW^ijVnSRTF^ZVNOD=OUVJM_vrcdc[Wf`oc]\]Vbh_N_ctmUOW\[ebfgaaYeyyla`Ygd[`MLKIRWRT^T[\`f^kVLN?DCPWUTTXUWZ]jhmd[WO``gpn]Taa]a_`\WZWSUUZWUa^VNdaTPRF*NqNTimYa`i`_la]gnpd`a[]`jrc^\WYape_]b^W`mro`NC`TWVQa``_\cfc\X[_XTZ\WSYaZNRb]RI]TW@GLKGELSK=BZRJPRUQUNKUWOTYVVVVVVVVd`edgibjkkibfonnaosgbZTb\b_aXeqpvywx}xx�x�����y�y}x�~�����|�����|hYYclmrsnt~r{smky_[\n|�loxxqmwagdciqxvlestujglibiuqtoagimwz]TWYhlkg_urpjnyl_06<Q_`b^_ebMXPYkhg_mmebZJM^hTTNPWaOQkwykm�}y�qztd]bgoj_T`gaQKWZknna^WWWaab`Vbkuzu{��prx��oulhYXebf[`cJSWlcj{{p^ZRVQgnfi]TU]aTS]UUN_\[cagte`ZjgqIOdWdf]cgO\RLPZ[bkggg]NUY]V`R]XXRRUY\^VU_c[YXPQa_\RSUPSQUVjqT>Uop~wkeicqrpbU]YYUab^>@COYERKLQKD@QH@.1Ilr`gq`Wgl{he[^F`lsXik{w_ZUJVgib]I@KWlvh\UW]b^XIKXSHUYNRZcW_aajaXYMOJKUZ_e^ZVMJ`jkUbUKX^gktaVb_Vbc_^VRS]_Y`dcqmhZfZd_NH:6TMalus�zua\oic_dsm^X`eadX`RWYQLa`_]``]akdSVb[UI[jgk`_ecba]daV[\WZSX\eh]NNVPLFTR\HDGIJIJLKHRWOMVY\fxgY`WBL`VVVVVVVVQZdSSbapm\bc\aivhm`Xe]Oea^XaXf|��~{}}wtwinpoyggx_cdm^^n\\WmhrnwwnbajbZl}q�{r{����xmvtx��tqqgff`pci]Y|lodUPZ`[SXYblpaOSXaVWltWNES]YYcQZsgjclefJPWjfgiopdS?ABehmulp_goig`gnmvlV^Xagnonfv�xt|ezs~�zquwnmkm`RRMZ[\ei]b\][ae[icfphnd[__kWdXPhnYl�djim^krt}�qzzoabV]U`gUf\Y[`bQQ]JBGP]fk^l]adgnaa]Vo]lmbptqqOVP=9igaYOPMWNbWJGASLTJOUcUORWoRBOBCOUcb[^`d]n���qcbZRp�ym`okznY\ZV[dUZ[hYP]QBLLBEVIJKGEPHJLQON]vbW\ffVXe`KOhpagd\cT`GHSPRWkbRRNUfb^SYP`j]VVi\jTX[PEVbe_aUQMVVLOScjoue^[LNEHPY`_WSVdZVcSa`U]]VR[ceiaVbZOUYcVecfZ`Qag_n}pmnzgieQ\W[^dtjbk`ac\e\aY`d[_VXV\a_]jk]S\_]a_WQi]e_d_qs]dY`aX]VS\cb\YY[WJGUZ\LMJKJHEEIM\ZM@MZUUT_adYJPSVVVVVVVV\]WZb\hZeZU[bfjmknbZ]Xe[^W]]RO_mwsup_Yafdbbee]g\VaRLU`USX\_ckwq�{__bibeimnvzuu}��tj|sok�{mpn`Xjfcytq]Z|xid_aaja_\ekkc[kVVd]bomnnaSThslb`rpxraqqmu}{gtzvseK83AVeteqtus_Ucobjp�~k_]q}xz}rqxw|y|s���y{vt|��~zkmll]Oclcmmnmbfb\f^ejfkb[_ePWWNPYWcsjqk`TcPWYgqm]XeUTNRMQRZWZadTWHNacb[MffclhcesxvslcYZdlod[fhjRgzW`baagQLT`_`g^YZYISNROZPXQUaZY[aOJKZaMLWJEMde^kfaVPQZ\`\ZdaShj]]^�h[__\_]W][YYmbQIOOVfXXQFOUQVKQYcaRR\bZetebf_minrX^VX`hoj^VSavnbd\OdkT\MdRa[_aW_VUTWXhmTYZWOKT^JLYWWTXYlYc^`]W]ZMG[W_RER^X[f[k__\Y[ZXNXVkc_]VK<digzvonw~wiQ_qkeiccbom_U_pm]g`^Z_VSOVhmqrhqif^\_baXYYZahb`vf\a_lrVS`b]ZW\_]WONXOc[QOVE@WNNRXZPIHKPJHO_VRHGINNNNNNNNXb`cc\llil[[n^Of`YPekdb_^ZX\RRYZluk_a_`niga\b]kaT]]]X]ZZic`cgjgnyp_|w�kdq}xq|�~{txtmfvwufusosrbitnh`mjbabd]^RSOONY^San[`Venmlup]`ibbkh`_cb^e`dimllx~langgqrvovRhmw��nu�ssp���~e����|{|����|zqwy{o���sptw��~xtqWOF>JZaXOXIFW[i_gf^Z]WBA[YPIMNPrZSHN_\]Yfttka`RS_^KXrrap~�pkYWeejiejhbaokev�xg_`Y]b_Ubdga[W^sik~bn]\g^c`exeZRdQERQ_^JU^KQOWaZ_Rj��`LQJEF<<:PUSJEBWOMWTU\g^`xqg��[Z\^a_Y[lssyjfjlhljV^ecb[RWYbb_S]UXSGQdi`efso�v__\Xceb`b\aV_sk][SXTH`^gT_gpig{ueXMJP```VLfi`^MNMQ__dq�]WPQQNU\[[[SUMEOUS]nXUWcVcZej^aYjgbSQB9IZc^^`fnuy{``srsiirmlaYWapojdY]a]V_ZWcfuznlzpjgdhj_[kjf_idhqm{tml\^fdiHUUKSNX[dd]EJQRJDIBG@FD@ISTQGEK[SMBEMNNNNNNNNP\U[ZRTWgka_eVIURUNgfmcageW]S\gdceiiaZ_imql_dWe_PVbl]]\Wjbbimmuryww�t|�ry|l`q�zlmks]akb][��ootvwgXa`_dgge_S]g{uiircSYmoaVgthcfo\RSV\]S[]^W_`[_[dp{vt^Xko}�zqlt^wlasqj}yolev��~�w���rfl~����~m`_dUXigpwpkgtddfv�mWTWcW:B@',6+90:54D\YOEY]ZW^ZCfcUdejyelgeZaggccq��}jbbpr�|eYbhytn^pyhoh[`yuxpgabk]NXb^TMRVicRZ[]TaacgOFdirca[UYVS]YRO^ig]]SWAC`|ogSN;;PPN]]VPM_toipmgeRp``b\Ri`^ac]Y^d^ekvtz{plxub[U`dSLT\bbf``lydT^^[a]\TOdbX[_O_jdRIAZY^nf\cnsj[a^`cgahglsus^Ye^[[^]ehPL^V]]_ppt��`YNPWXXaab]USQS[XX^[WORZVVPUXU`SXWdXYU[imdcbcgjic]`asiodggd`]ehhiam^R_yrcbhchXadcflfhidhlf\_]h[acn~ttfZZRRV[^E_[OaWTYXMLIMLJRSLJSDKZTHEFD9DUcRPYZSNNNNNNNNMWIMONHJZad^WXUNISTa]j]Y_aS^NU]Ybeki__ee[gmfnZik_[[_V]^Z`\`fhetgjx��yx��tptrjr}yigZqcgjidWtvf]bicuujg`[gbbacdsoqvk|�]XZUhUYevddddl\Rac\eceeVZ[]YQV__RSRYh`aMOLW][ndSZZZlxpmfptws{svmt`TVo|y~~wzypxwkgZb^sbLZIUWko{��ke`]PQM\\EB;OPLJMFY\e`bd`mcjcfk~����`bartiovgrywgklhrpjttjgdhnr_u�p__jl{hecmddvllhv}aaZatussZbuoRembf]ZjvgqeYVZv`rng{{jirZ]JOZWQTXYYRYY]YV\`eltrlruf\\Ykjjcvppx�ykg`[[aqqmgV^a`hhS^a_Z[]VT[UQ`mUPddheceVQVcak|c^``VP`nvoXO^hlpcXYZScm^epucckhWYYXU^\�z\YeR\goroqomX^TVde]hdejbTOWhgn\P^`ojwjej`O[ZXS[^_c[koqlprpqpf[skgU[_pv`lyqcffgpus�~xwijv`ecfl_ckmmkhm_^Z^WerplplbbcVNCQCJYNL[WTYOGPTNZPUXOSa]JULEP^`UMHNPSQLONNNNNNNNMXICBHFLKX\URURLVYinmk^_tn^fSVWVZm_KW`\aaedagQ[_]dbecg\SZ[\\bco\h�uosh��xkkrpqurjf]dslu~e[hsqce^`etvsofdgqwwt|j^nn��qwra[Q[ff^aeboaWhjermgkbZU^VWU]X\`b`hb_Wcoi\eeZT\`\Ztke`fckmxtned\T\bgft}sqlwhmqkdrlsiYl`lbhnwu{|}lfkjid`fghZfkkmdplhhnufjj`sdnpv�}nhfero^ho[gdjongvrjfg^Tbqhtvb[Y_QYjf}t`Sg^ekgia`tjja^dga`WjpcWintuk`bhdxj}aVkMV_j|m^ebk[]Xl\pW]QW``YTS^^qqxukswl[;9Uhmei`SM]jouk__ac[QQHWSSjrZed\[]e^_bO_]haaaT]hhYYNKY]gvd]afc_fcjn[Woxzssu{o}}xyvylorw`NQWUf[dmWHRQemsijj[]`d`_heWc[^^_PHM_Z_VaWWk\dWXokKN]b^[[^k_UWdbgkkhgkope^ac_fpggcb][]UfuzkpknpngkU`abc`dbapl^m`OLQSP_YViYCFKOVHPDJMGQVMTMDTWILXQU\VTUWTQFNZSQUOQPEEQLDNNNNNNNNPVPNT]]`_XbdUW]SYc~ha][are\cXZSP\kic`TN\jeX\e^hikpg_Zehl`fcalppez�m_YYc~�ulotuqlfrlmvwnylnkfijzyxjqxv|{rtxsjgvojpvnq{wijm_ZgXg_^ffeegea]fZ^eZOWO[Ydbnmg`ahbge}lcndUMQXSH^ZYZ_Yeizi\aifZdpeYZeafidW`e_epfjg`h]bXUfuphfd^dkdihdmehroc`ouwpdd\akny}rtgpsr�ptihosny}fnkh_ccir]XUQPbkqm]]TI[ROkemeRHUXojkgkUerkrmpxVN_fY[RP`\d^NZU]ZKIV^ROZe^hdbQSUNLOQeffcdb]efbddlglopllgZZWOJLjugV\UJU__bU]`\TTNNFQRVgm\bVbhmwikp\e_a^a_PPlkLcW[]cecejstmfed\dg[_n}�tz~�sr~�~y{vxwrqinphud\e^OWWTD\S[_T_i[]\dbWdSWV`WLDRJPG]LZVV]D>OZPOPNSYXbekTUQ_ZZ_\TT]oh[[YXZZk`SYitpZffe__\\[f_cQXZY\VOEFcgVhS;<LT?OMUpdVYW\b^XaLPTccMF@IXWFLKJQ\\\TZJMELSHIWUKJPMOP`NNNNNNNN_\afqvz~lW^^IQcVX`wTMSSYd\hlfiYS_`im\B>Jzvfkjik`^cjm^dikhxxtvo`iwz�o^[Mu|upz��qjhvuyp}`c^fml}{k^_apxjbhghZLLVld^fpZYo`]qeMIPH\ZRWbqp[JJRnkfqppobllonsphozlb[mmqkuqfkpi`]ceeg_jj_deX_c__imvkg\bndgic^oxvtmf^VTUSZgmcchqtj^\c]firgnkgijdnlnl`plhjnlng\he__`rqnluxvicin��hd]i_PUpu`\H^kfi\_k[\gom[AUZ`grchVT\`h{jglomjP>TLUSKU;;<OXGdiZ]xeLftZ@PObcgfqe�sgadfeh_c^]V[`d\KMiY`hlv�lbgYYZXc`cga^fVOJOLQ_mkkTLRVfW^pefjmmmgR>Z\Roswkiuj�xsqwytxcbm][v}dn|�ycjsowyogc[Y]m�{wffceQMPSRIBKWW_cRUV]_]oY_hqhZERTfSXWhVPY<+'8JSSNYQY^TYJZZd^Z\\YSPZbcbhc\FZ]^fknocY]egYV[_MUgbba_k[JEHY]NZCIJJQVl]OYNSWVfgb^iYZTabTLUmlgRPD>?@<MR[QRBFWUQTCIID@PQUNNNNNNNNqhng`VcsvsdSSalnwesfpteivkud\ifkujW_YMUajencfbOOOdu`agac����nWtdfsoiT]xqepr`bkameglWQXYff��kmdejwsgd[ZWTSMVXdZfWS^M`vnVdXWZkegw{woXShjwgkv�~ilrv��{ck�wzp���yk�p}��|krpfaXc^ZWb[gWU]kgl^db\]chdghmfqqebVWQWS^TUR`ot__fbcXdoicjifdeYojmyUYcmrlnmqh{lcujsedff_S\^fktswkbobWe|riqVYadef_aWTKIWILITIYUWSYjbaPMVV\acWb^SOZMGLILLYTJYieei^\cacXTT``elUNkq`UYenujuztV\f^[UNrnUJSZabcsknqhib^bdjrTO]UWchigupiaYk\`n`kbZafhkfb[bevqif\Nskluyk{�rrxd[mjt^^]fjZlr^^_c`]TACRjgbfliwebpus^V[krnpjida\YlT^[lqoSUSe^ZbjuZdcla^YXb_a`\S_eXXZbjfWS]_VKMV_sim]c\TWYXWLHIMRENQMKNVNIPXqr[_\UL:<>IDEFRnlRRSg_RaXMYVlhR_g_nkw{tR\ZU\WFY[WTTHNVD=KNPNKENFDNNNNNNNN^TQTFGGV^fmhoyvuzmZj�vt||wY^jg_md[VWg[bbg_miWYLV]fneie^d|�{rlihYq{xx]f�wX]ZpdZ``Vj|n`cdkfjbozxhlmklzmbWT]YRLTMWMWYVQ]oh_kwkm`kbkhrsn^ckgjgdagwxpnjqpiaz�slcrjprdb`kik��tovega`^in_]YPZlliTW[bfdgikibYTUU``cWQX^QJ]UYbXfi`^]Z\]gb]Ze[YSNS\MHX]_WISXO]qm{ghj`Pad^okntuol\mjiefpotb_gYVhl\Ycm^NV[OSY_Zbgadmspl�{wd[TdedihbbbUXRUGLSdeZ_blihlba\cV^Y^NPzsUTa_squisZP_HY_ZjXLJO[a[UMbfTUTBT^]X]ZKSY_kspid`irllg^[NNXO^eZKWTF@LNYZ^DEfoekh_lwu�uwoqdvUJMaVFGdjie\bfkhXcqkiehrolgvy|hsgchfhbgpb[qheinbqlpebS]ia\fbW_UeZTlgbZbf\Xa_ZaWUTcgddTQMGIT\bijgdZ[UZ_V\X[Z[TXMFHVBNTV]]LTX``SEIG=FFNXQUYXXKE[]Y^{_Zc`egd][g_ed_Y``]MT\VPSXgWNKHJE@DLGJJJJJJJJ][WSJMGO[bmmdeiviotuuplwr|vaSccjkYTI[ccf^haggTYc_ep~uokpuszrqlnckn�gWqnblgrjbi_Zj��qzz�n]b^V]`h`TZnug_PMUWGSTOKT]^QP]_[hirgfehcZs|j]lndhjcPdgkyrletqimfhXPgd^hiXRVY\fu\axsda\drndWYj`Xhdbc]b^q`meSV_^[XT`cfWUac]TWSOMK[_]a[IDFYWTQ^^caXX^W\imfZUZf[Wf\{jV^]efbvwelsdmso^px�sa_bsyrk_dfopkb`dQBTK\YgXffkZecY^tthlkffe_kfV_\c_FMJNUXY_aZZb\R`npp[g`dS\knYVY]]hoi_bgQIN_XUZ^\]ejgilo_\``]SP\ZgQN]c_gkq{y_imfjo|pjUQSj_UYZrUT\an`SXjnniPSVLZl��edZiwlZWQRTI]mo^QW[\aYastl`Qb``_kprheW[h`UVV[^^mi]bba_brhRVi^ZX]`c^e]ZQUiWYe\WYZSMNDQTUWTL?=KRLECKVHDLIit~pn}��VX^jnXW^JHRUnvhcYcmeRRTWSTZa`SPLMUW[b]`oMS_LIXbbahXTVJCBHOCENQIDKV^WFHQUSV[XJJJJJJJJekf]WWP\^amwhkabfls{kTaypu|�uristmncf__e^jiknZRgton|ujgygWa_ecl\a[PY^Rl�]^nqpiexg\dojfe]a[L_J4EXh`[cp~bXM\f^NORbiekmcYVWS]\rhoq}~mwt]QdjgffbF_Wjsgkk��qc[fPA\whQYVahttdiZ\gb_`_l|e_ni`QUUMT]_fXhbgXPi~vieZYV_XPOS]a\aQ<<KanuteSAMV[RPNYdcddZZXd_f_T_[T`US^ICFMRfX[VY[I[]^SZ\fbX^dmwl]YmjfnqlaobQNBSU\ET]\GVJ<IXTTWHGTg^gsqc`eIEGVMXVONGESTPOcZXaS_mce_fkf```V[\Qdjscumncnlinzzl`lgvjaj��~ikjxd_f`W`[[l�uw~o[Xq^]V`SbeU]o}kilow{l`ronxjnpi`pvtgUdls�~pdXPFEX\JEOY]edfsy|�}�}|ypaR9@X^SUWPNJPQYeXXRkiint\c^_mjf^hie^aiRTXKWKIOS\b`bYWF<RaW9@@>FOQPGGM?HTkm]Gl�d@=GXXmp]YaV^VK[\\]PNHLZMMJGE>NUTfjYAPV`JTd\VXX_gnWKH?G:8ACMhgUDD@SMGDDHGNTTJJJJJJJJfoj][UPfsojk]qi_`b[rhL_g�qsv�xtoersud]^mgijjqgT^m`TaaWPbZPSMMQ`\`eTc[Cag\MPhqoexl\aSWpdWZa`lZM\iwmjabpab[caWWVP^d^dhpxtschnwypvyqpnd`PQYNQNUBE6RUFS`|ojok^YX[hpfptqp��o^fhhdmo`agikmmiYQZXdNQ[`qidZVZ`gp`hm]binpdghV^WIMT^YX]]TEHHKKOMJOda]Y^\okae^\SNXW`SILbecPVbbd]Se]YVTJMMGNVWcebXbZ[W`nZaZ[VcfXTi{zdcqaW_eX[g]WemYXWhfe`PeleTaZQVR^ah`RVT\`pnuosipnecichccWU^flxxpkqf\g{{ni`Slpd]orollg`]ZbcT_fh_c[`__ii\OjRSUh[Tid_fXUWOf\dej_]nd[ebsjabQkftdYW]CSXPcmf]W_`ZPPY`o��{��zrumbMLXUZRRWfm}qaSKHZWZTlb^UF]\beXokhiGNaYYSMSbf]VYWKU_ST@NWY[XPLPMKUQYWWR^\\fdXUbBJkj^\\ke_fXKSR[SQWQQLGVS^UBIL?LO:?FOTT]cb_RSZnYCK>BIFFQTRFCBMNKPSYXYL@JJJJJJJJ`lfW^\Uelhb_Nfhf^cdnnfg]wosv�zpXftorc^ix{nlenynkc^YhlcXbUejkcd]]deii[UY^fYRb^WPemg`SVkiXOKQQV\fhnekYV^]d_T\]XZZR_]dZf~{{nh`PURfhhli[^Y^edjZYYOKgdOHGE@JfzffagfsmdxnfppjgstbNRYSW`YQVfYOQdceJP]_blbhna\kyqus[X\__ZV_T\`\Y^XOY_UQR]X]bc]RYYaed]Qaa_dkcdk]T_OZZTg^gaVh^R[WYSMNVXS\[[XYc^b``OSgW`^bRU[b[\bjltjYYYgj]ih`us[S]n��dlftYP_QLZLLXYSUJQaboaH_L[\XS\\QWSU[ZZ[aewwk`klgb\\fe]a`dXVcdfhdaeabTHIS]MKUP_aera\^QYhwlfqZ\fUFPFHDKMNH?XPQS\adx_jgyqg`Odh{la`]^XP_XQFO]YW]dnptsfltoT_`MS[Xe\L_aeokcltqgc`l_cbO][baWNfbVGFVJYWQ\cYPbKSVNVVROTSLD=>FPOJQKRPT^YY\aZUQhUWq]OM]aqsk[Ta[_^VHMNUZ_ZZMBDHLJTLLBBPSXdbaVTT\_FJCGGAAUUWLL\F88EFHGRMHJJJJJJJJ[hcVkrgid`bketu{qjwrnn`jwoo�ypkidspf]Kaiuhd]fxsjWZ\hfaZaYrv|ngLDwxlf[aigab[[MGGX[dW_^Xj_OFRQZ\dj]Xh`_YTQTQ^`b]YXWPO:GcRLKTUTTQODALehc]bYam^QZV^ivdPNJYZfz~xj��q\>XWYZRcylzs_WSQWNROYgcbf^a[YYg`aoektssqfrd]T^]Z\^RSPS`if\YQSSNT\WZeg[TJ[\foth\WRIN^Va|tpbjWKLjmYYXPh[OUTWNKXVKDOI[VU\WOK_OKRNdcXWV_XYQSLbk^[dRZghf_QedXURIbmh�gQE8DDP]MSXLG\QS_\iaPcGS[UWg\R@CASgRY[`cpdRkyn_ULE_cnnihfmcTURmllkde^^^OCMJIIYtZTKPajhmhbTZ^YGV]Q\X]BJLMKTejjjkRR`n\hp`[bdTZZY_[QY^cV[fbc^][_[YV_fF812=INSSQB\\IW_U[gr~`Rng[]PMTbRi_RPHI@FDPNN\XJJHDDJRMJXLJFFIC;;MJF=TVP?JWPTVZS_^dal^aQMQZY^e]_RU`_KOKU`RKHFJKHRZXXZJO^IQc]YV_`\MBGHJHNXYahOD^IBCVTQELFGJJJJJJJJ]^\Vmnei^XZaitpqymlkgZWpbak�yfbb_sojeShfqokl{�tiZWV]UVUZqsml][OJNaYda_aPQOIQWYY]kofi_PYYdhsxl\`gbY]_\YYU]ZXTf^QVPDI@XrYVajfhbc[PZZd]N<<7AL\UTSQFURP_k�tV\aj�fORWUKMXX^oajcXSPWdj_ZsV?AXUihi]]bjamjZSTROb\_Y[TWeUSPUPOKAM[\YUPMGSS[^]bQXNFFYcp\LO]jRAO]hZx]MYZ@;HYZbXOZS>=CYUMDDWg]_YZW^p`]ZU`ZL_D?DKALXJXdmw_OLEJNJPSUdMMVjgixsmTHEPTMeoq_WUft\U[b[JMRLX]SN5@EPPE\[XCX^FOZ_``TBEPl�nocYSNUPe_fuwnZ]]bZ_Ydietn\@QGXQGOVAU[XXamou^WFQ]SY_`ON\o~LThnaet\JW`gb_ht{wkqTUZZv~XUjfeaM6*OXY_OVbVdricYTjgUSTVihSMVOidF>0?P?ADCHW_]^l^WVH@D<CFD><:;@IRR8JK_kZTU\OTQFRXU[QIJdUUJKP9=DISYSOMPTWNMNRV\kfg\RA:KIDKF\s{eHEF@LTX`fWeuZ?@@QBI<;4BCQJJJJJJJJi\Z\jYTgWalltubTpx_jmXhnbdkhd_jV^nYcb`h[djck��i__UPXQSSSKBJSNRZWLT]YR\JMJOT^\QWgf\eWR[Q_brt�o[^]^VQ_Y]bcpdj^TMV\^Tecu�gotsgaachcmokdgPFPUMliV[VJHPX_]k^TJHL_nk_\eef]]\UcPPMV_\]buucf,;KOCZadhRE)=fm\[\]khfj`a[[bdeTYTSSQQUOORRWY`gqmfnZ]OIFMG\PRO\d\S]kj`c`ZbVVPZd_[ZM[QXPDMP^YJMY[sgb[dp\af\OGHZOGUU]WUHKWNUWKDVNFJMVS`aJKMPEOb^VI<A?7J^dVLUU_LKX]WTsfffUPH:I_[UXhcf_jfdTPSOLOOADF_glU[[ba\_WGQlnokjbiojk][`\_YT]KPVQQR_Leb^qXVAYUTP\]eoxpxtVXROMeVah]X[TUZksZfnWjtdx{�pxZP\P:2.&,/5Sc[l_L^ZSPVQ^ecggde^X\_[UVFfiGXcYY^ZUPTabTRRFHLMN?6DKHGFBALUM>F?CWRKGDXTROZ\PFLGHZKXSQZEIPROR]U\]ZUR\XGBK[NWCKaLBQXRBQWSON?UOWPN\hragns^STUVGPLM;@JJJJJJJJT]]`h[dl]itx}rnagotnt[_b_[PbpYYM`__`[lkfchabf{d]_XTlaVDMLBTONNSTO[YRabMU]SP^]VGRlbdM?CUW_bhf]dgPFIR]Y[lplpfXYKPZmds|vnegei]_`f|xhhaax_WSU]ZVjlmd\\bbQWWW\\bW\kcfhcfSM\XWQUdV^tglqa^K6IL\TJ-F`UP37[ZZNNcogdpojf_vi`b}toqdY[QRaU\a``d^WTQQX^QRa[RY\c`]goqt_MFRRPAG]LNT`VNJSHHKMLaWDEMJY]XSmPVZOXJO^GBRQ]Od[FXZ^ZKDVWV?QTZagi]SWNHRFOSMEPGKK^KGEKLNY`S]Gnal|kdjYNZVWb`annlxTsnidGTRJRUPSgUwpi_LRRORlsiywpownbaYT][CLdgTYPWFROOS]RSSNJ]WEA@PML`jgbX[YROUYbeRO[k_S[aci_VapkfnnlgdfX]V[\GPXeZheZrdei^gu^e]hipr]Y[kkqliPPMGblVZUY_Ud\YPX]=EGGQ?<HGCCDIKM\G@E9836A;EONUNU[XOFEJR`]bOVYPDVPK`S_^SEQXVPK<SLNLPWPOa_SD[ZRTJFORNI`girwakhddUQEFOYeNN[[[[[[[[fZ^e`]sqadhhjaihr~rR[fhS_YTclSSR[`_\Vgnq|viaZwnc`ZYej^QQLYZU`TLLKYcZdnjl�~rg^aUV[Zijdde\LO\dWUUTSNQfecimlnia[WV`jix}vojtjbgci\jkbla]kede`Z^aktp_YQXd\TI\``c]asq~mguqfb_l\ZcabwgrsifI&-(*-(#,6/2;[dfMVts^^rndYZup^N[TZdZXejT_Zg`kYmcTd^TX`_be^`mh^]mwh^hYWIUINRaXUZWabXBLNIDI<EWRNMEQVTR_YBREGGOXOLRP^JKbJ]sjbVPZVWLSgp^mudYU\QNWVWNAUVQ[cjamUSRdhd_Q]ewump~wkbIV`ior^RX9NHKUP\ZRO^`Q\[shUUGJJX^gsqpvpvlgszmr_BKRP@C?GBNIFFTT]cQAGHJLDL43BHR\^^[`XLK_odbvbYaZxvXon[^cUMjgdcdW^]]je\^cheqbcS\jeuuea]dca]\chmgtoaelscRQ[lZWh_lQVW_cXZ[ZNLLMMNTXX[OULEM>B@FD<UQCBCDABSPNGLVJDHRQHKXW^i]WacdaXRHLbZagY\XSGZNTZbWIJK[QGPR]^]eoegegneSISOS`]^[[[[[[[[kY[\TWj^^^][\VemzhZXlwl]WW[\_QRXTXSSYgjlolkgYrlYY_gbmbeiMcUHOFKNLJTILNY^_^\]^d_fg`bieeb[[QWeecLIY\Y_X\a`_`hmiunsd\\dpqqzs}kg]\WZjb]`biupjwsgywgj\WXZb_}xphegwowziu��uiqoifbYl`lgWVD" $,(7Lc\�z|~sqx}hdU]^^beY\mZ`lUV[ojg]dkmVS`ocKYLLOUTNP]]VOT]XU^W^PeUSLM`a`PN[fZOVRGRKDP>?C;EO[gaedaPE=\XnwdIVS\]`ezzmX\a_XacZV^\eZYN]N?DJ\XEXe_Z[mdwbdfVcaZVWfvvtngeu|dAQc][PRRRXKO\ca]YSZ\PMGKTFXXZYe_hft|�gsoNZ^WQ`VLLDQ]]QTcJS]YYMJIMLXVPHAPO>IVb\]b^e\OP_i_f�m]fPtxlrj`f\Jpf]]aY\ZcWP]]IR\egi^]ebu{kii^MA@GPRIUSB]zqghSG\ZFFIaTM?LPKOOOKXJS_WSTPYZQSOYJJFEJF]MLKIHLVo`XFBDI>KHHIa]X]_h[`YQYULMH\Y]aPOYSBJMJR[ggRJ[YMV]iri[gmcafrsh`jVQV\Y[[[[[[[[i]SKPSYQUVWSTMX[ZMQZ`cXQKUaPPRSVINJQ[\[]]ZVTL`bYYbj_f]it`f_VVZZHJKXPXQZYST^joj^cYbkvbWUYZV\cis\b_`\[]]ebdU[[]tnkmrv~~i^qpoif[_ZW^dcdh`cqtbjm`nhjk]^]Y_OYoohebkZX]TbpmaZdafpoei\[M.,6'"*2&*<GT}��{bkhfz�~plSaq��{skyqtyjenyv[g`cHR[oeQZTRQRUYQPeJSQKOSTYQVXqlgUQVZ_\UXaY`Y_]a`SPUQPCIMVa[Oc`kTEUdjeVObXO[jcbcbZXXkjlvnltkd\_Q]YM=I`dSV]Z\WWINTU_izcdZbagxvzzqqzxox�wkajgovtsgd_lfkb^gc[Za_ljhlng_`g`f\wp][_cnnQD@MebZdb^NWbXXPNPLR[^ed\cVT]YbabW\`]`a\[ZT`VIXUZeqx|jhqhh]chgbZXU_gU\^fjjkiinlsfcai[]ia[UG?FPJKJAFoka]d_G=KMJ=CEIFOH[PEPQOQYNR^RSbSTLIGOTMWV[HRVKFLQQCLNFO_AE?MHPQNG:GEW[CFYX\VQ^d_QBaXQPNHO^ktUVNQDIJO\P]gyjc^_dddhXVWUT[[[[[[[[pZSR[Z_^cge]\VZRZMQ[Z_^WR]jSNNGGFOPWXJNV\TEGMWVUUVTPRLOXfil[NXVPJV]O[U`dcab]aa[]SVUf^YZ^`\^\`hYgnWQZkVXYVXsfX_dfrjat�ufrreQc[TWZadolueabl\ftmga��kffW^[abmgZUgmtv[PPJDK\TXlohXVUZ=>SN?HQORU\ZhWxji[ke^oyvpt]^aqoo}otrlu��njoNOHXMZauq^YDKRTRVMNsSUUY[SUeWQcihe_onjaghkmac[`cde[`]MD@T]\\_OIRaF[\[gok]]Y\kbcTDSkVJGRT]^okRLZe\\YFADLVXPPPTPQRPYR\szYbW^RWclunfmpjxmy�{eegebelb\Terq`Z^XZ`guundaXd^]]^eezqgYXWqyjii`ntnmjrghm`cb^_bfealsljbSV^\INWX^bjeWZc_[^[^l]bf^tvgrtae^TTaofX^u^LQdigobbg`nh^ek\PaV]`S@@KKEC?CYMQMU]PBURL?JEBPMNaOMa]YNMAENDKHGML8=CMTUVTMUZK?>A11AN?KdGGHcaaNF:F]WKB;-@NFRRPVVN<SMU_XUWljmKSOI@OTLYVVZqjdYSYR^\XYWLV[[[[[[[[rR_mi`d`hlg]^\_SUOTXU]oaGOa\^ZV^UZWZZKRR[UGScaVPPONSIEEMcf]D=KM_^`cekUbyifnpumfjc]P`g`\UVVbfolfueRIN]HLR[h�|mlsqXaey�fWa|p[`bU\MWh�qpbdd^`fkqmcyjZbjXabTWc^NTk{�fNRPHN]XPbkfL^ib`cmjZecJMU^Vhlh]hNQWXahl[]^gfnhW`k}e]loZ\`PDT[PQ_os{jLZSVXSYUUy]ZZfmcduj]qfdaZkosccflk]R`TK^tnsjXPOdidad^Xa\Tai^bincUOSiX_TJRdW^PUIQIYjiYZUHMWJHEFR\QV]ZIHHF9=Lpoespg^f_b`PPcdYn^`qoheffWU__YS\lVQ`[^geUood^UMn\USjvmmcnqxeahct{���xjcl�wujZYdptnkllkjnmj\STPZ_chd]^]Y`freph_ddgovti]\a`UO_is[[_^pYIF^WgeXdcahjbTf[bhTFBDLHGC?ZUbYJFNCV_RPDDJQANSDZiohZWRMC<MOPO[KPKJSWZUYgfOUWbRSOXLCUEB@POTWTOb^D5>+9:LIS[SSbaZOLQfa\SqpmRNgklmh\fdXR[YXTQ[Ob``[XRb[[[[[[[[dXnrh^[STVTNSPULJcqcQY|aUZe`_SRY^c__d\aN\VP\dce^XW]cVX^ckbTV\ib^^Ybzzahv`i���u^[ibNMOGW[L]qibOU`QUPVYSWbzvtkmyyhimeo|qjkmoc]cmlQ[h{migkqfbTPipaUOUZ^LRXGHQSJZ_`PE@IZVFEKSUhwq\ouaqollbmdRdSWkkog\P[Pc~kgXUWY`lehXZp_afJDK[ba`mkYRebp�wa^U]ZWZ^u]eltzrnvnfpikn^\_hJACO[]NaQGUrupbY]_onhhW]beov^Y`_cjcSMMPKPMQQIOIW[8ANL\bOMVKUccZLGKLBSXPL_lnOTYhjtwtleikafig_TWYYYfds{�pijke[dmp\WcW`^SUqrV``h�ioccpuwfm{�y\c[w{nny�k^USYnjoj^bV[bkoqrkhij�sc_d__agfecSRZ\jjvsabqrj^ix]W^ff\a[cbKSNaa[SVVldfl]`gc^Ve_ZaPKHEPJJM9g�{AFN/&15/B9;HUQ\WK\Z]^PGJMF?C^TBPNPRPITWRMTK?OU]ROJRRJD?@NWQPSPNb^C.,2POT_[^dhmgeQMHSTTG`e\cXQaqh_\fcc[WXWZVYSbig_ajp[[[[[[[[Wkv]\[RPTVWX]VZS_W]ml]|cPY_UUOV[UekijflOQS\f^]lc`UZ`\engf_ezgkvdga]e`eqcWg|vyrkge^P\skoajw~hh_jaW]Xpk`Q\^oroajnfdrmourttikchg�tfrecdnytw�}rnypvz]_U[JFYaaa]JXU\QTYaklie[^hnq_Vga`qdZaYeZW^Vb]kkigh[f_f}|}qbYWjq]UfX^ge__K^hwgdlohf]^x�uuYNdaLRbunqpoj`dwhhb]]i]RUcLS]^\Z_\XbNO`akdkr�~pj`w\J_]dXRTZdeXQJLBIKERGKSL^STh^c}^ShUX_l�bQONOjePQUZSBU]\enf^njiXgmUNchhZcft^]`vm_Y\iiqhel`SG[RSQsUkpslnoeencih`bk�iuUi|{us�x|swef^mo]Ye__fa[[UZbb~rpjia^ckfdj`Ybh]XcghhjZ`a^lcb^bg^`U^`GZ]YhaZHXrYZYNjk^US`f\gSQOMWHHUC]`Q'-)"<LM\VbSJBGKJUXK?::ADSRL__PYZX^cpZODXKUXTVWRKQIRKTSPR[]Z]]WYZeZe`ghjswbSQTWXUMG/@RKiRWXhc`UUTTXZiovle]aog]atg[[[[[[[[OcTSNR\U_YZWTakdnlma^QilaRchYL[YTpyal�{UW^rqbbipdc]f�~{bZr~bhofabgem`\W`P]cackcYVQp��|upnooinp]_nvvmXThidogY_WlwuyspuorfTfe^lpufgsvk|vo��|obitg]c^]UO[a[STWT[gkkold`k_VT^USZQTkWiikUYOPV\a[uvc[ZWSdTJT^jul_\hjeZkoqpkjha]lfc_ed^~pd[_fc\Znsd^V\`\ltlcgobkgVS^R\ZbBJ[^lf[VQ`HOX[]h{���upjqMR\_cfWS_QT\YUY]omw�nld\`b]Zcyn`bf]a[^bROQU^bVUGOWSOSfetvkda]^erhZWjymfbvzc]b`]ZRRGLIQ[XOEGRXVP\|^ljtXKSSKSafp`gsz_no^v|rapvwtw�l^k_c\mq}d`bdFJW^gkjjVRQe^VelgZgrdV\cffeb[jomcXjpd]^\YXL\u}sine^kg\QYYTOG\IXZ]Xi^UX^RSZ:+-A<34$ 00>NDBCGMBLCU^KCMTIPFILLKEPEYd_b]POJEPPb[POWUce]gij_XYPW`]badijh^TG\kGBEMYMNJZPNSVLPNKKWZVXV\MD`nwngmglW]\dYddddddddPZKYZOOQ^[_a]boukg^WUQ^Z`Zab^QUWbux^`sspqznZUY_`dgowxvow{emphic][^TTUf]`[ZTSLN[Xb^`iirlmsmddcY`mjnk[ef^fb^mk�sdaa``exqagaT]fig^adbtl^knmi`\[MM_hc`edZMJPUPPUSUX]aah[VZbab^MRm_hc]NY_hgkdbloebYWVh_WQE[kqlhclslz|zuoqpe_hgquz|u�zphkdWYT]`]]_^hblvphinb[X\ku^dst[[b_cWg[Ulknjaelvvtqltan[ahgglf[\XUZPOUaoov~utl_ZUPR^tncfgZ\SSRLNNQduouiee^USXZhyxf]beXdc\cu}ymgrr_Y]]laZVOWVS\\XSSURMWYpZe`_A8964G[bmT`py\a`WUW\f{slnhjXejjj\gr�pmhjKDLOW\`e[VXe[Xorkgbcdjtrg`^XO\ce^_baZW`kpwecjmnnvsmrqh]cYP[[gTc\PR`\_fgZ_zN+"=IE@.#!()2>PD:CKHRJIULLV\NMYQQ[`QXXU[V_bVVS^TI_e^Y^U\e`bdi[YWLNQMRS[ahrrfMV[CBHLRPUYnfa]S@?KMDKVRRZVWXadjh`fh^TI\][dddddddd\eRfp^TZ]YX\\W^n_^UUVbib_b`]f\TW]il[W_mnhgk^KGJS\\ckqhlgn_ajdnh_b^U_]WTUNQQKTRPIPVFKYotajkmnj]ZfgjymrwZZ`^mtszhnrefhhbhzvmhcSSddla^`apfUNWX\^\\XX_dSSeb][VZ[SSWVNFHZgo\SRZcdZNTegli\QS\dEer|dUYc_bfohq~}mhhhqcpyorpy��udpjiftxz�~~|vkrdV[bortflYqghqk`\^eTR]lr_u�{rquz~qwh\fg]\\MOTPRV^nVc`_]VNWdXT\RRLSVvwa[dbN49KYbffl~kZM?P^jhfhc\jpcli]ab]`WPVnu]S`fJNOFR]X_^SQX]bebf]f\Vcqne^SMPTX^nel]^[XGFLQXhi]`V^ciQUTU\VZgzk`biW<ULZgcalmdef~m[^Z[Z\a_HN^`cjWJYZ_]XbsycaYMX]a]T\i_A;ShhXSSXbioxtovvrkbdWTAZ`aYVFITai�ubL-'$$(J9.&492/0/7[aWM?HRPU\eZSXOLWeVW^ebWbj`a_im`\^ig[SQ^^`izuZ[Y^[RPNQTYio`<9926DGES\aysofWFEGPGOfgjyams_V_omeshbRga[dddddddd`qYeui]`c\V_lhhxttm^X^bX_da_he[YZbgc_]\Y\akdVSXd`TV_d_eZ`WXcek[RV[Tgab\VTZ]X`bfXV^KN^y|dp��wwrlxlu�zy{c`hcjnkmWXadkfd_abprihXQgc^ZZYVZRDIUOLTVZ\\U^QPZOV^WUQHKSTHJQ\]b[_[drndilgov~tpjqubie}znreLVim]g��webk�jnhiifkptyryxpfjfewxoytame`���qna�v�}u~viabhUU[`bY��yzuv��~qg^bi_r�jb`VXTRYLTZVUSKX\Z[cUOWeh|wzsgrq]Y\]bjjm}wql_ktxtpnnekcNaiY]edjXYYfiYUcme]ZHWYJ[mokf`[cleYhN@GXMKOU_g^TUZVddZND<GQX^g_S__`WXKTQSMILVpsrpfV?]MP^do|rjaSjYwxolca`ddUNOUZIOT^kofm{y``ZLTVYTUJTa\blf]OLMPWY]ro[abcw]eegrbjfoih]a`ZPZ_bgXTKHKXZ[JFHACIEFjb[ZXSWRTXig_GJFF[WVZe_LQXTXU`]WUNKMMIFOLRZjrZ_jsnb]ZkjesxfB;@DDTVG\cf}qnh]TXSYNQ^ZX`gs{THTqveyyfacbQdddddddd]tZarj_b`ZQYllitoor[\`i]_acddhbX`fgebb[TT^jdWQUegUT\b`g[a\ZbeiXTT_Te[daSUW]ZY^iPU`QS^v{hk��ptvkwjw��}widjeghehYZakpdc`f_kuhk[Md^^ab_[WRHFYRJRWXVWN]VTYIT]XYULNUUJPUXU^[`^bsmall_jp{srkuyhjgyvcukQSkpbg�secl�jqiihgkkpzw|}sgf`\txiztZgee���wr_�t�~q}tf_cgSV\cfX~�v{qn}�{qe]_j[m�e\[T]WTWOOXSWXS`X]bbWI]hs�vvapqWVZ[blig}zrm_jvxukekckbR_o\_hflW]dfaZYdrl^[GYYI`eojf^WdodRdJBP^KOQQYbZU^USbm]PCBHRX]f^Vk^bVUMZQMNKMTlsrh`VE`\OZhh{qpfShZwxpmdd_jcTLU^^EKP^jmfo|l__ZLRSTPPK[aT[jebOKOQSU\xw\a]aw]f_codfdwji_ba_V[]]d\[MBAQ\cNBG=ADAFo`VTTYXPOQhlcCKICY[X]fcPNUV]YeVTUSOMKJNUP]]au[\jrkcabondqtaB;?FBSYBZ]n�plf[WZQRNU[]b`enyRJSmrh|{_a]bPdddddddd_s\fvj_fc`W\kkittpsX]`eW_^dh_fdU]eec\`\YWbmfZRUheUV_c_g^ZYZaehXWR`VdZddNSU\\Y`oRV[IO`z~jo��rxvhznx��}sh]efkqmm]Y`fj`cZeajwhiZMd_]`]Y[TSHBYTKT\^ZYS_QNYOWYUVRILTTOOORXh^Wa_urbhjfqu}vripvhgh{�jscUNgk_g��vh`h�itlhdfpsvzn}{nijf_vzjyv\hfa���qn_�s��q�uf`gfSX[biY��u}sr��ne_cm_p�i__TZPQXVQXRVSPXX^]]VE^`f�xuoizm[XZX`mml{zrpbjuvwoipbh`UZkZ]geiYSdi`ZX`oi\[F[YL`psfec]eedTgK@MXIMQQ[e[S^\SZdWOCDGRY`m`O`\bYVKXROPGKXpuvncV?Q[LYkp�ql`Qg`twmiae]kaVKNU`HJSdom^i�ubb[MSUXUQMZ_Vbph^IGNRSVarrZa]dxbcbipgjcsgg_b^]SRa`g^YICJX]_KDQ@?H>Ak`Z[ZVXPNRjlaGIJFZYUa`bQNSU\UcWXONSRGDKQJ]__z\Zjpf_]`jmdru`B:=D?R_G^am�nni[UVSRNSUY`[jnyTNXmsg�xdabdUdddddddd^pZftg^c]XR[jkmxspr[[ceZ_`bh_faV[egh_a\[T_icYRThbUU]d^d]^]^eghUQP\Vg_dgMT[[Z[]iQXaQS]u{ip��rwuf{lr��|zkagffjfgZV`fkdi\gajwih[Pec^a\W]SSIGXRINTYWUU_TQZOO]YXQHMWWGPVXVb]^_]tpahihkqzxskqxffevxjphQMnp`d�tg_p�mpagehnhl{z~ymkjg`uxkvu_ih_���tnb�r��o�ud]ehSYZ^eW��y}srz�{pg`ccYm�j_aT\QT\QOXUXRU[[bZcYJacnu|tcqr[Y\\bkhh}ymm_gwznliq`i_Ran__jfdYVblfZYdnh^\GZVNZgqig]VgkaUiM?MWPJQQZf]T_YS_j^SC=HRUZjaSg`aWWHTRSRFIVkprkbWCW^P[fi}rncVc_w|qjah[jcXNRT`IKQ^jrfmk`_YKQTVROMZ_W^icbNLRTSS\wt^d\cr`fciidibtmh^a]]WY]Y]WYMDEQ[cK<J<DL@GmbWXQTYROUlk`HEHFXWR^bbRQVUZVjRXXSIKOLLSHW\_w]Ynuia]_lodtwaC9BGBP^CW\imok[XWRWPRZ^aajmvQGVjtb�sgac_Sdddddddd`s]dpjbagYQ\kjirpqr_Ua`]_bafbh_X]ccg__XXV`id\SRegWR[gae\^YX`diYVV\Sd]_gPP]XW`_hWU]LPa|~fn��qtwizos��w|f`fggmkl^X`hk_e\i]kxkg[Pa_\b_Y^QQGHTQNRV\[YUYRQWQSYWYTJLTRJMRWXc\[`bwl`njaosywpjpydjh{wnljTMlk_i��vi]n�hqjlcdnpt{o~yonfc^uxovu^fi`}��xmb�t��m�ve^fiOV[djV~�y{qsz�~qf\dear�i\_T_SSVPQYVWLTYXbVgUF\dj�yyqftpYW[[ajjl{npcgvxrqjn_ndR_i\^khc\]\feUWgkg^\FXVS[nrfe^YigeUbIASUMJRRZf\R]YUaeXOECEUY\j_Qfc]T[MUPPNGNWlvwj`VCXUM]gj~tmaXb_qyojbl[jcRJWU[FMT_jrcg�s``[NUVVQPMX_X^jj`MJNQTV^vp\bZdqcg`hkhh`wkc\d_\TW^`g\XKCEU[`LAQ>BG=Hl`V]WU\QKSki`HEJFX\W]d`NQWT[ZcS\QOQQFIEUOX`ds\Vksgb_bnn_ouaE:@DCO^DYbm�nohXWXLYON^^Z_go{UGXjtg�udc_`Sddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbddddddddddddddddddddddddddddddddbbbbbbbbbbbbbbbb________________________________ccccccccccccccccZZZZZZZZZZZZZZZZrrrrrrrrrrrrrrrrffffffffffffffffddddddddddddddddZZZZZZZZZZZZZZZZUUUUUUUUUUUUUUUUccccccccccccccccjjjjjjjjjjjjjjjjmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmqqqqqqqqqqqqqqqqaaaaaaaaaaaaaaaaffffffffffffffffYYYYYYYYYYYYYYYYppppppppppppppppkkkkkkkkkkkkkkkk````````````````\\\\\\\\\\\\\\\\VVVVVVVVVVVVVVVVYYYYYYYYYYYYYYYY[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\eeeeeeeeeeeeeeeehhhhhhhhhhhhhhhh\\\\\\\\\\\\\\\\hhhhhhhhhhhhhhhh^^^^^^^^^^^^^^^^BBBBBBBBBBBBBBBBXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYY\\\\\\\\\\\\\\\\________________bbbbbbbbbbbbbbbbdddddddddddddddddddddddddddddddd�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~|����������������������������|������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������}~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~�����~~������������������������~~�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{{{{{{{{|||}~~}}}~~~~~~~~~}}}}~~~~~~~~~~~z{~��}{||||||||��~}}}}||||||||||||||||||}~~~~~����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yyyyyyyyyyz{{|}}}}}}}}}}||}}}|{zzzzz{{{{zzzzzzzzy{~��~|z{{{{{{{{~}{zyyyzzzzzzzz{{{{{{{{xyz{|||{||||}}}}~~~��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xxxxxxxxxxyyzz{{|||||{{{yz{{{yxwwwwxxxxxwwwwwwwwy{}~{y{{{{{{{{}|{yxxwwyyyyyyyyzzzzzzzzvwyz{{zzzzzzzzzzzzzz{{{{||||||||||||||||}|||~�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������xxxxxxxxxxxxyyzzzzyyyyxxwxyzyxvtuuuuuuuuuuuuuuuuyz|}}{zyzzzzzzzz|{zxwwwwwwwwwwwwxxxxxxxxuvxz{{zyyyyyyyyyxxyyyyzzxxxxxxxxxxxxxxxxxxvwx{����~}}~~~}}}||~~�������~~}}}~~~~~~~~~~�����������������������������������������������������������������������������������������������������������������������������xxxxxxxxxxxxxyyywwwwvvvvvwxyxwusttttttttttttttttyyzzzyyxxxxxxxxx{{yxwwwwvvvvvvvvwwwwwwwwtvxz{{zyyyyyyxxxxyyyyzzzvvvvvvvvuuuuuuuuvussuy|�|zzz{|yyyyxxxwyyzz{||}~}}||{zzxxz{{{zzyyyyyyyyz{{|}}~~����������������������������������������������������������������������������������������������������������������������wwwwwwwwvvvvvvvvvvvvvvvvwwwwwwwwuuuuuuuuuuuuuuuuvvvvvvvvuuuvvvvvxwtrqrssttttttttuuuuuuuutttuuuvvvvvvvvvvttuuvvwwttttttttttttttttttttttttvvvvwwwwuuuuuuuuxwwwvuuuqrstuvvvuuuuuuuuuuvvvvuuwwwwwwwwyyyyyyyyxxxxxxxxwwwwwwwwyyyyyyyywwwwwwwwxxxxxxxxyyyyyyyyxyyz{|}}}}~�����������������������������������������������������vvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvttttttttuuuuuuuuuuuuuuuuttuuuuvvvusqppqrrrrrrrrrrrrrrrrrssttttuutttttttttttuuvvvttttttttttttttttttttttttuuvvvvwwuuuuuuuuwwvvuuttqqstuuuuuuuuuuuuuuvvvvuuwwwwwwwwvvvvvvvvwwwwwwwwvvvvvvvvwwwwwwwwvvvvvvvvwwwwwwwwvvvvvvvvvvwwxyzzzz{|}~������������������������������������������������vvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuttttttttsssssssssssssssssssttttttsqpoopppppppppppppppppprrsssssssssssssssssttuuutttttttttttttttttttttttttuuuuvvvuuuuuuuuvvvuuttspqrsttttuuuuuuuutuuvvuutvvvvvvvvttttttttvvvvvvvvuuuuuuuuttttttttuuuuuuuuvvvvvvvvttttttttssttuvvwvvvwwxxxwwxyyyxxxxxxyyyyyyyyyyyy{{||||}}||||||||||||||||uuuuuuuuttttttttttttttttssssssssttttttttrrrrrrrrqqqqqqqqrrrrsssstsrqppqqpppppppppppppppprrrrrrrrqqqqqqqqrrsstttuttttttttttttttttttttttttttttuuuuuuuuuuuuuuuttssspqrsttssuuuuuuuutuuvvuutuuuuuuuuttttttttuuuuuuuuttttttttttttttttttttttttuuuuuuuussssssssrssttuuvttttttttrrstttssssssttttttttttttuuvvvvwwvvvvvvvvvvvvvvvvssssssssqqqqqqqqqqqqqqqqrrrrrrrrssssssssqqqqqqqqppppppppqqqrrrrruutssrssrrrrrrrrrrrrrrrrrrrqqqqqqqqqqqqqrrrsstttuuuuuuuuttttttttttttttttsttttuuuuuuuuuuuuuuttssrpqrsssssttttttttttuuuuttttttttttuuuuuuuuttttttttttttttttuuuuuuuuttttttttttttttttuuuuuuuuttuuuvvvuuuuttttrrsttttsssstttttttttttttsttttuuuuuuuuuuuttttttttrrrrrrrrpppppppppppppppprrrrrrrrrrrrrrrrqqqqqqqqqqqqqqqqqqqqqrrruuttttssssssssssssssssssrrrqqqqqqqqqqqqqrrrstttuuuuuuuuuttttttttttttttttttttuuuuuuuuuuuuuuuttsssqrstttssttttttttttuuuuttttttttttvvvvvvvvttttttttttttttttvvvvvvvvttttttttttttttttvvvvvvvvvvvvwwwwvvvvuuuustuvvvuuuuuuuvvvvvvvvvvvtttuuuuvvvvvvvvvttttttttrrrrrrrrqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrqqqqqqqqqqqqqqqqqqqqrrrrssssssrrssssssssrrrrrrrrrrrrrqqqrrrrrrrrrssstuuuuuuuuuuutttttttttttttttttttuuuuvuuuuuuuuvvuuttssrsttuttsttttttttsttuuttsttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvuvvvvvwwvvvvvvuustuuvvuutuuuuvvvvvvvvvvvuuuvvvvvuuuuuuuuuuuuuuuuqqqqqqqqssssssssssssssssrrrrrrrrrrrrrrrrqqqqqqqqrrrrrrrrqqqqrrrrqqrrqqqprrrrrrrrqqqqqqqqssrrrqqqsssssssssssttuuuuuuuuuuuttttttttttttttttuuuuuvvvuuuuuuuuvvvuuttssstuuuttttttttttsttuuttsuuuuuuuussssssssuuuuuuuuuuuuuuuuttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvuuursttuttsssttttuuttttttttuvvvvwwwttttttttuuuuuuuuppqqqqrrtssrrqqpqqqqqqqqqqqqqqqqqqqqqrrrssrrrrqqppppqqqrqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssqrrstvvwttuuuuvvuuuuuuuuvvuuuuttuuuuuuuuuuuuuuuutttuuuuuuuuuuuuuttttttttttttttttttttsssssssssssssssssssstuwxxwutttttttttttuuuuvvuuuuuuuuttttttttvwwwxxxxxwwwwwvvwwvvvvvvtuuuuvvvvvvwwxxyxxxxxxxxwwwvvvuuvvvvvvvvuuuuuuuupppqqqqrrrqqpoooppppppppqqqqqqqqqqqqqrrrrrrrrqqqpppqqqqrqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssrrsstuuvtttuuuuuuuuuuuuuvvuuuuttttttttttuuuuuuuuttttuuuuuuuuuuuuttttttttttttttttttttsssssssssssssssssssstuvwwvutttttttttttuuuuvvuuuuuuuuttttttttuuvvvwwwvvvvuuuuvvvvuuuuuuvvvvwwvvvvvvwwwwwwwwwwwwwwvvvvuuuuuuuuuuuuuuuupppqqqqqpppoonnnnnnnnnnnqqqqqqqqqqqqrrrrrrrrqqqqpqqqqqqrqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssssssttttttttuuuuuuuuuuuuuuuuutttrrrrrrrrttttttttttttuuuuuuuuuuuuttttttttttttttttttttssssssssssssssssssssstuvvutssssssssstttuuuuuuuuuuuuuttttttttttttttuuttttsssstttttuuuvvvvvwwwuuuuutttvvvvvvvvuuuvvvvvttttttttuuuuuuuuppppqqqqqppppoooooooooooppppppppppqqrrrsrrrrqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrsssssssstttssssssssttttuuuuuuuuuuuuuttttqqqqqqqqsssssssssttttuuuuuuuuuuuttttttttttttttttttttssssssssssssssssssssrsttttsrrrrrrrrrttttuuuuuuuuuuuuttttttttrrrrrsssrrrrqqqqrsssttttuuuuuvvvvuuttsrrttttttttssssttttttttttttuuuuuuuuoppppqqqrrrrqqqqqqqqqqqqpppppppppppqrrssssrrrrqqrrrrqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssttsssssrssssstttuuuuuuuuuuutttttqqqqqqqqrrrrrrrrsssttttuuuuuuuuuttttttttttttttttttttssssssssssssssssssssrssssssrqqqqqqqqtttttuuuuuuuuuuuttttttttrrrrrqqqqqqpppppqqrssttussttttuuvvutssrrrrrrrrrrrrrrrrrrssssssssuuuuuuuuoooppppqrrrrrrrrqqqqqqqqppppppppoppqrrssttssssrrrrrrrqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssssssssssrrrsssstttttttttuuttttssqqqqqqqqrrrrrrrrssssttttuuuuuuuuttttttttttttttttttttssssssssssssssssssssssssssssqqqqqqqqssttttuuuuuuuuuuttttttttrrrrrqqqqqqppppppqqrstuutttuuuuuwvvuttssttttttttssssrrrrttttttttuuuuuuuuooopppppqqqqqqqqppppppppppppppppoppqrsstuuttttssssrrrrqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssrrrsssssrrrrsssstttttttttttttsssssssssssrrrrrrrrssssttttuuuuuuuuttttttttttttttttttttssssssssssssssssssssssssssssqqqqqqqqssstttttuuuuuuuuttttttttssssrrrrrqqqqppppqqstuvvwwwxxxxxwwwvvvuuxxxxxxxxxxwvuuttttttttttuuuuuuuuooooppppooooooppooooooooppppppppoopqrsstuuuuutttsssrrrqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrssssssssrrrrssssrrrrrssstttttttttttttsssssssssssssssssssssssstttuuuuuuuuttttttttttttttttttttssssssssssssssssssssttssssttrrrrrrrrssstttttuuuuuuuutttttttttttsssrrrrrqqqqqpqrstuvwzzzz{{{{xxwwwwww|||||||||{zyxwwvttttttttuuuuuuuuppppppppqqpponnnnnnnnooooooooooopqqrrsssssssssssssrrqqpppppoppqrqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrsssssssssssrrrrqrstuuuutttttttsssssssssssssssssssssssssstttuuuuuvvuttttuttttttttttttttttuttsssssttttttttssssssssssssssssssstttttttttttttttttttttttttttttrrrrrrrroopprsuvz{|}~~~~|||{{{{{~~~~~~~~~~~~~~~~vwyyyxvtuuuuuuuuppppppppppppooonnnooooppppppppppppqqrrsssssssssssrrqqpppqppppqqrrrrrrrrrrrrrrrrrsssssssssssssssssssssssssssrrrrrrsstutttsssssssssssssssssssssssssssssssstttuuvvvvvuttttuttttttttttttttttutttssssttttttttssssssssssssssssssstttttttttttttttttttttttttttttssssssssqqqqrtuvxyz{{{{zyyyyxxxxzzzzzzzz{{{{{{{{vwxyywvuuuuuuuuupppppppppppppppooooppppqqqqqqqqqooppqqrrrrrrrrrrrrqqpppoqqpppqrrssssssssssssssssttttttttttttttttttttttttssssrrrrrrstttssrrrrsssssssssssssssssssssssssssstttuvwwwvuttsttuttttttttttttttttttttttssttttttttssssssssssssssssssstttuuuuuuuuuuttttttttttttttttttttttttssssstuvvwwxxwwvvvvvuuuuvvvvvvvvwwwwwwwwvwxyxwvuuuuuuuuuppppppppppppppppppppqqqqrrrrrrrroooppqqrqqqqqqqqqqqqpppprqqqqrrsttttttttttttttttttttttttttttttttttttttttsssssrrrqrssssrrrrrrrrrssssssssssssssssssssssssssttuvwxxvutsssttttttttttttttttttttuuuuttttttttttsssssssssssssssssstttuuuuuuuuuuuuuuuuuuuttttttttuuuuuuuuvuuttuvvvwwwwvutuuuuutttuuuuuuuuuuuuuuuuvvwxxwvvuuuuuuuuqqqqqqqqqqqqqqqppppqqqqrrrrrrrrroooppqqrqqqqqqqqqqqqqppprrqqqrstuuuuuuuuttttttttuuuuuuuuttttttttttttttttttssssrrrrssssrqqqrrrssssssssssssssssssssssssssssstuvwxxuutsssttttttttttttttttttttuvvvutttttttttttttttttssssssssssttuuvvvvvvvvvvuuuuuuuuttttttttuuuuuuuuwvvuuvvwxxyyxwvuwwvvvvuuuuuuuuuuvvvvvvvvvvwwwwvvvvvvvvvvssssssssssrrqqppppppqqqqrrrrrrrrooppqqrrqqqqqqqqrrrqqqqqsrrrrsstuuuuuuuutttttttttttttttttttttttttttttttttttssssssstttsrrqqrrssttssssssssssssssssssssssssssttuvvwuutsssstttttttttttttttttttvwwvuuttttttttttttttttttttttttssttuvvwvvvvvvvvuuuuuuuuttttttttuuuuuuuuwvvuvvwwyyzzyxwwxxwwwwwvvvvvvvvvwwwwwwwwvvvvwwwwvvvvvvvvttttttttttsrqppooppppqqqqqqqqqqqppqqrrssrrrrrrrrrrrrrrrrssrrrsttttttttttssssssssttttttttssssssssssssssssttttsssstttutssrrrrstttussssssssssssssssssssssssssstttuuutssrsstttttttttttttttttstvxxwvuttttttttttttttttttttttttsstuuvwwwwwwwwwwuuuuuuuuttttttttttttttttvvuuvvwxxxyyyxxwwwwwwvvvvvvvvvvvwwwwwwwwuuvvvvwwvvvvvvvvuuuuuuuuuutsqpooooopppppqqqqqqqqpqqrrsssrrrrrrrrssssssssssrrrstuttttttttssssssssttttttttrrrrrrrrssssssssttttsssstuuuutsrrrsstuuussssssssssssssssssssssssssssssssutssrsssttttttttttttttttsuvxxwvuttttttttttttttttttttttttsstuvvwwwwwwwwwwuuuuuuuuttttttttttttttttuuuuvwxxvwxxxxwwwwvvvvuuvvvvvvvvvvvvvvvvuuuuvvwwvvvvvvvvrrrrrrrrppqrrrrqqqqrrrssttttttttuuuuuuuutsssstuuuuuuttttttuuutsrsssssssstttssssssssssssssssrrrqqrrrrssssuuttsttusstuuuttssssttttttttttttsssttuuuuuuuuuuuttttttttqrrrssstsssssssstttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuwwxxxwvuvvvvvvvvuuuuuuuuuuuuuuuuttuuvwxxwwwwwwwwwwwwwwwwvvwwwwxxyxxwwvvuvwwwwwvuvvvvvvvvqqqqqqqqpqrsssrrqrrrrsssttttttttuuuuuuuutsssstuuuuuuttttttuuutsrsssssssstttssssssssssssstssrrqpprrrrrsssuttsstuurssttssrssssttttttttttttuuuvvwwwvvvvvvvvttttttttssssssstsssssssstttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuwwxxwwvuvvvvvvvvuuuuuuuuuuuuuuuuttuuvwxxwwwwwwwwwwwwwwwwvvwwwwxxxxxwwvvuvvwxxwwvvvvvvvvvqqqqqqqqqrssttsssssssttttttttttttttttttttsssstuuuuuuttttttuuutsstttttttttttssssssssssssstssrqpooqqqrrrrrssssstuvstttsrqqssssttttuuuuuuuuvwwxxyyyvvvvvvvvuuuuuuuutttttttstttttttttttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuvvwwwvvuvvvvvvvvuuuuuuuuuuuuuuuuttuuvwxxwwwwwwwwwwwwwwwwvvwwwwxxxxxwwvvvvvwxyyxxwwwwwwwwrrrrrrrrrsstuttsttttttttuuuuuuuutttttttttsssstuuuuuutttttttttttttttttttttttssssssssssssstssrqponqqqqrrrrrrrrstuvvvvuusrqssssttttuuuuuuuuvvwwxxyywwwwwwwwvvvvvvvvvvuutttttttttttttttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuvvvvvvvuvvvvvvvvuuuuuuuuuuuuuuuutuuvvwwwwwwwwwwwwwwwwwwwvvwwwwxxxxwwwvvvvvwyyzyyxxxxxxxxrrrrrrrrrstuuuttttttttttttttttttsssssssstsssstuuuuuuttttttttttttuuuuuuuutttssssssssssssstssrqpooqqqrrrrsrqqqrsuuxxxxvutsssssttttuuuuuuuutttuvvvwwwwwwwwwvvvvvvvvvvvuuttttttttttttttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvuuuvvwwwwwwwwwwwwwwwwwwwvvwwwwxxwwwwwwwvvvwyyzyyxxxxxxxxttttttttrsttuttsuttttttsttttttttsssssssstsssstuuuuuuttttttssstuuuuuuuuuutttssssssssssssssssrrqqqrrrrssssrqqqrrstxxxxwvtsssssttttttttttttrssttuuuvvvvvvvvvvvvvvvvuuuuuttttttttttttttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuuutttuuvwwwwwwwwvvvvvvvvvvvvvvvvuuuvvvwwwwwwwwwwwwwwwwwwvvwwwwxxwwwwwwwwvvwxyyxxxxxxxxxxuuuuuuuurrstttssttttssssssssssssrrrrrrrrtsssstuuuuuuttttttssstuvvvvvvvvvtttsssssssssssssssssssssssssttttrrqqqqrsvvvvutsrssssttttssssssssssstuuuvvvvvvvvvuuuuuuuutttttuuuuuuuuuuutttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuttssttuvwwwwwwwwvvvvvvvvvvvvvvvvuuvvvvvvwwwwwwwwwwwwwwwwvvwwwwxxwwwwwwwwvvwxxwwvxxxxxxxxvvvvvvvvqrsstssstttsssrrrrrrrrrrrrrrrrrrtsssstuuuuuuttttttssstuvvvvvvvvvtttsssssssssssssssssttttssttttuusrqqqqqrsttttsrqssssttttrrrrrrrrttuuvvvwuuuuuuuuuuuuuuuusssttuuuuuuuuuuutttuuvvwvvvvvvvvvuttstttttttttttttttttttttttuuuuttssstuvwwwwwwwwvvvvvvvvvvvvvvvvuvvvvvvvwwwwwwwwwwwwwwwwvvwwwwxxwwwwwwwwvwwwwwvuwwwwwwwwttttttttssssssssttttttttttttttttssssssssssssssssttsssssssstttuuuuuuuuuuuttttttttsssssssssstuwxyyyxvutsstuuuuuuuussssssssssssssssssssssssttttttttrrsstttuuuuutttsrssttuuvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuuuuuuuttuuuuttttttttttuuuuuuuuuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwvvwwwwxxwwxxwvuuwwwwwwwwwwwwwwwwttttttttssssssssttttttttttttttttsssssssssssssssssssssssstttttttuuuuuuuuuvvvvvvvvuuuuuuuustuvwxyyyxwuttttuuuuuuuussssssssssssssssssssssssttttttttqqrrsttuuuuuttttssstuuvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuuuuuuutuuvvuutuuuuuuuuuuuuuuuuuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxyxwvvwwwwwwwwwwwwwwwwttttttttssssssssttttttttttttttttssssssssssssssssrrsssttttttttsssssssssssvvvvvvvvvvvvvvvvtuuvwxyyxxwuuuuuuuuuuuuuttttttttssssssssssssssssssssssssppqrsttuuuutttttstttuuvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuuuuuuuuuvwwvuuuuuuuuuuuuuuuuuuuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyxyyzzyxxwwwwwwwwwwwwwwwwttttttttssssssssttttttttttttttttssssssssssssssssqrrsttuutttsssrrrrrrrrrrttttttttuuuuuuuuuuvvwxxxxwvuuuuvuuuuuuuuttttttttssssssssssssssssrrrrrrrroopqrsttuuttttttttuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuuuuuuuvvwwwwvvvvvvvvvvuuuuuuuuuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyyyz{{{zzwwwwwwwwwwwwwwwwttttttttttttttttttttttttttttttttssssssssrrrrrrrrqrrsttuuttssrrrqqqqqqqqqqqqqqqqqssssssssuuuvvwwwvvuutuuvuuuuuuuuttttttttssssssssssssssssrrrrrrrroppqrsttttttttuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuvvwxxwvvvvvvvvvvvvvvvvvvuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyyz{{||{{wwwwwwwwwwwwwwwwttttttttttttttttttttttttttttttttssssssssrrrrrrrrrrssstttsrrrrrrrqqqqqqqqppppppppqqqqqqqquuuuuuuuuttsttuvuuuuuuuuttttttttssssssssssssssssssssssssqqqrrssstttttuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuvvwwwwvvwwwwwwwwvvvvvvvvuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyz|||||wwwwwwwwwwwwwwwwttttttttttttttttttttttttttttttttssssssssrrrrrrrrssssssssqqqrrrrrqqqqqqqqppppppppqqqqqqqqttttttttssrrsstuuuuuuuuuuuuuuuuussssssssssssssssttttttttssssssssttttuuuuwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuvvwwvvuwwwwwwwwvvvvvvvvuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxyz{|}}}wwwwwwwwwwwwwwwwttttttttttttttttttttttttttttttttssssssssrrrrrrrrttsssssspqqqrrrsqqqqqqqqqqqqqqqqrrrrrrrrtttsssssrrrqrstuuuuuuuuuuuuuuuuusssssssssssssssstttttttttttsssssstttuuuuwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuuuuuvwwvuuwwwwwwwwvvvvvvvvuvwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwvvwwwwxxwxz{|}}}wwwwwwwwwwwwwwwwuuuuttttttttttttttttttttttttttttuuuuuuuussssssssssssssssqqqrrrrrrrrrrrrrrrrrrrrrrsuvvusrttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuurrrrrrrruuuuuuuussssssssrstuuuttuwyxvuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwvvvvwwwwwvvvuuvvvuvvwxwwwwwwwwvvwwwwxxwwwwwwwwuuuttttsttttttttttttttttttttttttttttttttssssssssssssssssqqrrrrrssssssssstttttttttuwxxwutttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuussssssssuuuuuuuussssssssrstuuutttvxxvuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwvvvvwwwwwvvvvuwvvuvvwxwwwwwwwwvwwwwxxxwwwwwwwwtttttsssssssssssssssssssttttttttssssssssssssssssssssssssqrrrrrssttttttttvvvvvvvvvwxyyxwvttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuttttttttuuuuuuuussssssssssttuutttvxxvuvwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwvuuvwwwwwvvvvvwvvvvwxxwwwwwwwwwwwwwxxxwwwwwwwwttttssssrrrrrrrrssssssssttttttttssssssssssssssssssssssssrrrrrsssttttttttvvvvvvvvvvwxxwvvttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttttttttsstttttttvxwvuvxvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwvvuuvvwvvvvvvvvwwvvvwxxxxxxxxxxwwwwxxxxxxxxxxxxtttttsssrrrrrrrrssssssssssssssssrrrrrrrrssssssssssssssssrrrrssssuuuuuuuuttttttttttuuuuttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttttttttttttttuuuvxwuuvxvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwvuuuuvwvvvvvvvvwwvvvwxyxxxxxxxxwwwxxxxyxxxxxxxxuuuuttttssssssssssssssssssssssssssssssssssssssssssssssssrrrsssstttttttttssssssssssssssssttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttssstuuvwxwutvwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuvvvvvvvwwwxwwvwwxyxxxxxxxxwxxxxyyyyyyyyyyyvvvuuuuuttttttttttttttttssssssssssssssssssssssssssssssssrrssssttttttttttttttttttssssssssttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttssstuvxyywtsuwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuttuvvuvvvvwwwxwwwwxyyxxxxxxxxxxxxxyyyyyyyyyyywvvvvuuuttttttttttttttttssssssssssssssssssssssssssssssssrsssstttttttttttuuuuuuuuttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuttssstuvyyywtstvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuttuvvuuvvvwwwxwwwwxyyxxxxxxxxxxxxyyyyzzzzzzzzuuuuuuuuuuutttttrrrrrrrrrrrrrrrrtttttttttttttttttttsssssssssssssttttttttttttttttttttttttsssrssssttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwxxwwvuuuuuuvvvwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwvuuuvwvvvwwxxxwwwvvvuuvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxuuuuuuuusssssrrrrrrrrrrrssssssssttttttttttttttttttttssssssssssssttttttttttttttttttttttttssstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwxxwwvuuuuuuvvvwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwvuuuuvwvvwwwwxxwwwwvvvvvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxyyyyyyyyuuuuuuuurrrrrqqqrrrrrrrrttttttttttttttttttttttttttttssssssssssssttttttttttttttttttttttttttuvvwwwuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwwwwvvvuuuuvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuvvvwwwwwwxxxxwwwwwvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxyyyyyyyyuuuuuuuussssrrrrssssssssttttttttttttttttuuuuuuuuuttttssssssssssstttttttttttttttttttttttttuvwxyyyuuuuuuuuuuuuuuuuuuuuuuuuttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwwwwvvvuuvvvvvvvvwwwwwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuvvvwwwwwwwwxxxxxxxxvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxyyyyyyyyuuuuuuuuuuuuutttssssssssttttttttttttttttuuuuuuuuuuuttttssssssssstttttttttttttttttttttttttuwxyzzyuuuuuuuuuuuuuuuuuuuuuuuusstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwwwwvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuvvvwwwwwwwwwwwxxxxxxxxvvvvwwwwwwwwwwwwxxxxxxxxxxxxxxxxyyyyyyyyuuuuuuuuvvvvuuuussssssssttttttttttttttttuuuuuuuuuuuuttttsssssssstttttttttttttttttttttttttuwxyyxxvvvvvvvvuuuuuuuuuuuuuuuusssttuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwvvvvvvvvvvvvvvuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuvwwxxxxxwwwwwwvwwwwwxxxvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxuuuuuuuuuuuuttttssssssssssssssssttttttttuuuuuuuuuuuuttttsssssssstttttttttttttttttttttttttuwxxxvvvvvvvvvvuuuuuuuuuuuuuuuurssstuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwvvvvvvvvwwvvvuuuttttttttvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvxyyyyyxxwwwwvvvvvwwwwwvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxuuuuuuuuttssssrrttttttttrrrrrrrrttttttttuuuuuuuuuuuuutttssssssssttttttttttttttttttttttttsuvxxwutvvvvvvvvuuuuuuuuuuuuuuuurrsstuuuuuuuuuuuuuuuuuuuuuuuuuuuuutttuvwvvvvvvvwwwvvvuuussssssssvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwxzzzzyxxxwwvvvuuvvvwwwvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuwxyz{{{{wwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuvwxyz{zzwwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuvwyyzzywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuuwxyyyywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuutuvxxyyxwwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuutuwxyyyywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuvwxyyyywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvuuuuuuuuuuuuttttttttssssssssssssuuuuuuuuttttttttssttttssssttttuuttttttttttttuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvuuuuutsrrrrstttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvutsrsttutttuvvwwvvuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvuuuuuvwyyzzywwwwwwwwvvvvvwwwvvvvvvvvwwwwwwwwwwvuuvwwwwwwvvvvvvvvvvvvoooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmllkjjklllllllllllkkkjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjjjjjjjjjoooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmllkkkkllllllllllkkkkjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjjjjjjjjjoooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmmllkkllmllllllllkkkkkjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjjjjjjjjjoooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmmmllllmmllllllllkkkkkkjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjiiiiiiiioooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmmllkkllmlllllllljjkkkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjiiiiiiiioooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmllkjjkllkkkkkkkkjjjkkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjiiiiiiiioooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmkkjjjjkkjjjjjjjjjjjjkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjiiiiiiiioooooooooooooooooooooooooooooooonnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmnnoopponmmlmmmmmmmmmmmmmmmmmmmmmmmllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkjkklmmmmmmmmmkjjiijjkiiiiiiiijjjjkkklkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiikkkjjjjjhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmmmmmllllmmmmmmmmkkkkkkkkkkkkkkkkkllmlkjikkkkkkkkkkkkkkkkllllllllmmllllkkkkkkkkkkkkkkkkkkjjkkkjiijjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmmmmlllllllllllllkkkkkkkkkkkkkkkkkllllkjjkkkkkkkkllllllllllllllllmmllllkkkkkkkkkkkkkkkkkkjjkkkjiijjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmmmmllllkkkkkkkkkkkkkkkkkkkkkkkkkkllllkkjkkkkkkkkllllllllmmmmmmmmlllllkkkkkkkkkkkkkkkkkkkjjkkkjiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmmllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkklllkkkllllllllmmmmmmmmmmmmmmmmllllkkkkkkkkkkkkkkkkkkkkjjkkkjiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmllllkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkllllllllllmmmmmmmmmmmmmmmmlllkkkkkkkkkkkkkkkkkkkkkjjkkkjiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklmmmmmmmmmmmmmmmmmmmmmmmmmllkkkkjjkkkkkkkkkkkkkkkkjjkkkjiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmlllkkkkjkkkkkkkkkkkkkkkkkkkkkkkklkkjkklmmmmmmmmmmmmmmmmmllllllllkkkkkjjjkkkkkkkkkkkkkkkkjjkkkjiijjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnooponmmmmmllkkkkjjkkkkkkkkkkkkkkkkkkkkkkkklkjjjkmmnnnnnnnnllllllllllllllllkkkkkjjjkkkkkkkkkkkkkkkkjjkkkjiijjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnooooonnnnmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklllllkkkmmmmmmmmkjjjjiiikkkkkkkkkkkkkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmlmmnnoooonnnnmmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllkkkkllllllllkjjjjiiikkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllmmnnoonnnnnmmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllkkkkkkkkkkkkkjjjjiiikkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnoooooooonnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllmmnnoonnnnmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklllkkkkjjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiiggggggggggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllkkkkkkkklllmmnnnnnnmmmmlkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklkkkkjjjjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhggggggggggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllkkkkkkkkkllmmnnnnmmmmlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjkjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllkkkkkkkkkkllmmnnmmmmmlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjkkkkkkkkkjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllkkkkkkkkjjjjjjjjkkllmmnnmmmmllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjkkkkkkkkkjjjjiiihhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggnnnnnnnnnnnnnnnnnnnnmmmmnnnnnnnnnnnnnnnnllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkjkkkllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkkkkkjjjjiiiiiiiiiiijjjkkiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkkkkkllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkkkkkjjjjiiiiiiiiiiijjjjkiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkkkkkklllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkkkkjjjjjiiiiiiiiiiiijjjjiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnnnmmmmllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkkkkkkkllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjkkjjjjiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmlllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllkkkkkkkkllkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmllllllllllllllkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiihhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmlllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllmmmmmmmmllllkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiihhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmllllknnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkllmnoonmmmmlllnnnnnnnnllllkkkjkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiihiiiiiiiiiiiihhhgiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmlkkjjkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnnllkkkkllmmmmmmmmllllllllkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkmllkkjiiiiiiiiiihhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmlkkjjkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnnllkkkkllmmmmmmmmllllllllkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjlllkjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllkkkjkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnnllkkkkllllllllllkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjlkkkjjiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllmllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnnllkkkkllllllllllkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjkkjjjiiiiiiiiiiijjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmlllllllllllllllllllllllllllllllllllllllllllllkkkllllllllllllllllllllllllkllmmnnnllkkkkllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiijjjjjjjjjjjiiiiiiiiiiiiijjjjjjjjiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllkllmmnnnllkkkkllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllkllmmnnnllkkkklljjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiiiiiiiiiihhhhhhhhggggggggffffffffggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggnnnnnnnnmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllkkkkkkkkklllllllllllllllllllllllllllllllkllmmnnnllkkkklljjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhggggggggffffffffggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklmnnmlkllllllllllllllllkkkllmmnmlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiijihgghijiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggffeeefggggggggggeeeeeeeeggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkklmmmmlkllllllllllllllllkkkllmmnllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiiiihgghiiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggeeedeefgggggggggffffffffggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllkllllllllllllllllkkkllmmnlllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiiiihhhhiiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggeddddeffggggggggffffffffggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllkkkllllllllllllllllkkkllmmnkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiiihhhhhhiiiiiiiiihhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggddcccdefggggggggffffffffggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjkkkkkkkkkkkkkkkkkkkkkkllmmnkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiiiiiiiiiihhhiihhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggddcccdefggggggggffffffffggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjkkkkkkkkkkkkkkkkkkkkllmmnjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiihhhhhhhhhhiiiihhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffeddddeffggggggggggggggggggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjiiiijkkkkkkkkkkkkkkkkkkkkllmmnjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiihhhhhhhhghiiiihghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggeeeeeeeeeeedeefgggggggggggggggggggggggggmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjihhijkkkkkkkkkkkkkkkkkkkkllmmnijjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjiiiijiiiiiiiihhhhhhhhghijjihghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggddddddddffeeefggggggggggggggggggggggggggmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjkkkkkkkkjjjjjkmmkkkkkkkkkkkkkkkkjjjjjjjjkkkkkkkkiiiiiiiijjjiiiihhhgggghhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggffffggggefggggfeddddddddddeeeeffggggggggggggggggggggggggmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjkkkkkkkkjjjjjkmmkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjiiiiiiiijjjiiiihhhgggghhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggffffffffeffggffeddddddddeeeeffffggggggggggggggggggggggggmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjjjjjjjjjjjjjjkmmkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjiiiiiiiijjjiiiihhhgggghhhhhhhhhhgggggggggggggggggggggggggggggggggggggggggggggggggfffeeddeeffffeeeeeeedddeeffffggggggggggggggggggggggggggmmmmmmmmmmmmmmmmllllllllkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjiiiiiiiijjjjjkmmjjjjjjjjkkkkkkkkjjjjjjjjjjjjjjjjiiiiiiiijjjiiiihhhgggghhgggggggggggggggggggggggggggggggggggggggggggggggggggggggggffeedccdeeffeedffeeeeeeffffggggggggggggffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjiiiiiiiijjjjjkmmjjjjjjjjjjjjjjjjiiiiiiiijjjjjjjjiiiiiiiijjjiiiihhhgggghhggggggggggggggggggggggggggggggggggggggggggggggggffffffffgffeedcceeffffeegfffeeeeffffggggggggggggffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkmmmmmmmmllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjjjjjjjjjjjjjjkmmjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjiiiihhhgggghhggggggggggggggggggggggggggggggggggggggggggggggggffffffffgfffeeddefggggfegggffeeeeeffffggggggggggffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkmmmmmmmmllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjkkkkkkkkjjjjjkmmjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjiiiihhhgggghhggggggggggggggggggggggggggggggggggggggggggggggggfffffffffffffffffgghhggfhhggffeeeeeeffffggggggggffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkkkkkkkmmmmmmmmllllllllllllllllllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjkkllkkjjjjjjjjjkkkkkkkkjjjjjkmmjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjiiiihhhgggghhggggggggggggggggggggggggggggggggggggggggggggggggffffffffffffgggggghhhhgghhhgffeeddeeeeffggggggggeeeeeeeeggggggggmmmmmmmmmmmmmmmmkkkkkkkkjjkkkkllmmmmmmmmnnnnnnnnmmmmmmmmnnnmmmmlmmmmmmmmjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiikkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkiijjkklliiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiihhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggffeeddcceeefffffggggggggffffffffddddddddddddddddffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkllllmmmmmmmmmmmmmmmmmmmmmmmmnnmmmmllmmmmmmmmkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkiijjkklliiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiihhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggfffeeddceffffgggggggggggffffffffddddddddeeeeeeeeffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkllllmmmmmmmmmmmmmmmmmmmmmmmmmmnmmmmlllmmmmmmmmkkkkkkkkkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjkkllliiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiihhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggfffeedddfffgggghggggggggffffffffddddddddeeeeeeeeffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkllllmmmmmmmmmmmmllllllllmmmmmmmmmmmmllllllllllllllllllllkkkkkkkkjjjjjjjjkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkjjkkllmmiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggfffedddggggghhhggggggggffffffffeeeeeeeeeeeeeeeeffffffffggggggggmmmmmmmmmmmmmmmmkkkkkkkkllllmmmmmmmmmmmmllllllllllllllllmmmlllllkkkkkkkkllllllllkkkkkkkkjjjjjjjjkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkllmmmjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhihhhhgggggggggggggggggggggggggggggggggggggggggggggggggggffffffffggffeeddggggghhhggggggggffffffffeeeeeeeeffffffffggggggggggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkllllmmmmmmmmmmkkkkkkkkllllllllmmlllllkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkkkkkkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkllmmnnjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffgggfeeedfffgggghggggggggffffffffffffffffggggggggggggggggggggggggmmmmmmmmmmmmmmmmkkkkkkkkkkkkllllmmmmmmmmkkkkkkkkllllllllmlllllkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkklllmmnnnjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffgggffeeeeffffgggggggggggffffffffffffffffggggggggggggggggggggggggmmmmmmmmmmmmmmmmkkkkkkkkjjkkkkllmmmmmmmmkkkkkkkklllllllllllllkkkjjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiijjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkklllmmnnojjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffgggffeeeeeefffffggggggggffffffffffffffffgggggggggggggggggggggggglllmmmmnmmmmmmmmkkkkkkkkkkkkkkkklllllkkkjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjiijjjkkkjklmnnmlkkkjjiihiiiiiiiihhhhhhhhiiiihhhhgggggggghhgffghhggggggggggggggggggggggggggggggggggffffeeggggggggeeeeffffggggggggffffffffffffffffddddeeeeeeeeeeeegggggggggggggggghhhhhhhhlllmmmmmmmmmmmmmkkkkkkkkkkkkkkkklllllllljjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjijjjjkkkjklmnmmmkkkjjiihiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhgffghhggggggggggggggggggggggggggggggggggffffeeffffffffeeeeffffggggggggggggggggggggggggdeeeefffffffffffggggggggggggggggggggggggllllmmmmmmmmmmmmkkkkkkkkkkkkkkkkkkklllmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjkkkkkkkkjjjjjjjjjjjjjkkkjkllmmmmkkkjjiihiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhgffghhggggggggggggggggggggggggggggggggfffffeeeeeeeeeeeeeefffffggggggggggggggggggggggggeeffffggffffffffgggggggggggggggggggggggglllllmmmmmmmmmmmkkkkkkkkkkkkkkkkjjkklmmnkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkllmmnkkkjjiihiiiiiiiijjjjjjjjiiiihhhhiiiiiiiihhgffghhggggggggggggggggggggggggggggggggffffeeeeddddddddeeffffggggggggggggggggggggggggggfffgggggffffffffggggggggffffffffffffffffkkllllmmmmmmmmmmkkkkkkkkkkkkkkkkjjkklmmnkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjklmnnkkkjjiihiiiiiiiijjjjjjjjiiiihhhhiiiiiiiihhgffghhggggggggggggggggggggggggggggggggfffeeeeeddddddddfffffgggggggggggggggggggggggggggffgggghhffffffffffffffffeeeeeeeeffffffffkkklllllmmmmmmmmkkkkkkkkkkkkkkkkkkklllmmkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkjjjjjiijjkmnokkkjjiihiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhgffghhggggggggggggggggggggggggggggggggffeeeeddddddddddffffggggggggggggfffffffffffffffffffgggghggggggggffffffffeeeeeeeeeeeeeeeekkkkllllmmmmmmmmkkkkkkkkkkkkkkkkllllllllllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkjjjjiiiijklnokkkjjiihiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhgffghhggggggggggggggggggggggggggggggggeeeeedddeeeeeeeefffgggggggggggggfffffffffffffffffffffgggggggggggffffffffeeeeeeeeeeeeeeeekkkkllllmmmmmmmmkkkkkkkkkkkkkkkklllllkkkllllllllkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjkjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkjjjiiiiiiklnokkkjjiihiiiiiiiihhhhhhhhiiiihhhhgggggggghhgffghhggggggggggggggggggggggggggggggggeeeeedddeeeeeeeefffgggghggggggggeeeeeeeeeeeeeeeeeeffffggggggggggffffffffddddddddeeeeeeeellkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjkkkkkkkkiiiijlmnlkjiiiiihhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggggggggggffffeeeeeeeeeeeefffffffffgghhggfggggggggfeeeddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddeeeekkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjiiiijlmnlkjiiiiihhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggggggggggfffeeeddfffffffffffffffffgghhggfggggggggeeeeddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddeeeekkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiijlmnlkjihhiihhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggggggggggeeeeedddfffffffffffffffffgghhggfffffffffeeeeedddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddeeeejjjkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiijlmnkkjihhhihhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggggggggggeeedddddggggggggfffffffffgghhggfffffffffeeeeeeddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddeeeejjjjkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiijlmnkjihhhhhhhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggffffffffeeeeeeeeggggggggggggggggfgghhggfeeeeeeeeddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddeeeeiijjjkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijlmnkjihhhhhhhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggffffffffeeeeffffggggggggggggggggfgghhggfeeeeeeeedddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddeeeeiiijjkklkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiijlmnkjihgghhhhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggffffffffeffffgggggggggggggggggggfgghhggfddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddeeeehiijjkklkkkkkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiijlmnkjihgghhhhhhhhhhhhhhhhhhiihhhhgghhhhhhhhggggggggggggggggggggggggfffffffffffggghhffffffffggggggggfgghhggfddddddddddddeeefeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddeeeeiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjgghhhiiijjjjjjjjjjjjjjjjiiiiiiiiiiiijklmjjjihhhggikkjijlhgghijhfhhhhhhhhggggggggggggggggggggggggfffffeeeddddddddffffggggffffffffgggggggghhggfeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeejjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjhhhiiiijjjjjjjjjjjjjjjjjiiiiiiiiiiiijklmjjjihhhggikjihikjkkkihhhhhhhhhhhggggggggggggggggggggggggfffffeeeddddddddffffgggggggggggggggggggghggffeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeejjjjjjjjkkkkkkkkjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjiiiijjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiijklmjjjihhhghijigfhjmproifhkhhhhhhhhggggggggggggggggggggggggfffffeeeddddddddffffggggggggggggggggggggggfffeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeekkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiijklmjjjihhhghiihffhjquvqiehmhhhhhhhhggggggggggggggggggggggggfffffeeeddddddddffffggggggggggggfffffffffffeeeedddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeekkkkkkkkkkkkkkkkjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiijklmjjjihhhgiiihfgknuwvpiehlhhhhhhhhggggggggggggggggggggggggfffffeeeeeeeeeeeffffggggggggggggeeeeeeeeeeeeedddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeejjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiijklmjjjihhhghiihhjosxuqlihhihhhhhhhhggggggggggggggggggggggggfffffeeeeeeeeeeeffffggggffffffffeeeeeeeeddddddddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeejjjjjjjjiiiiiiiijjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiihhhjjjjjjjjiiiiiiiiiiiiiiiiiiiijklmjjjihhhghiiijntyzrjgjkiehhhhhhhhggggggggggggggggggggggggfffffeeeeeeeeeeeffffggggffffffffeeeeeeeeddddddddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiihhhgjjjjjjjjiiiiiiiiiiiiiiiiiiiijklmjjjihhhghhiikpw|{oedjnichhhhhhhhggggggggggggggggggggggggfffffeeeeeeeeeeeffffggggeeeeeeeeddddddddcccdddddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiijjiiihhhhhhhhhhhhhhhhhhhiiiiiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiijjkkkkkjhhhggfffhhhhhhhhiiiiiiiihhhhhhhhiijjklmmkjhggghihfgltz{zjigffgijggggggggggggggggffffffffggggggggggggggggeeefffffgghhhgffeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeddddddddddddddddddddddddiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiijiiiihhhhhhhhhhhhhhhhhhhiiiiiiiijjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiijjjiiihhhhhhhhhhiiiiiiiihhhhhhhhhiijklmmjjihggghghkqxzxujigffgijggggggggggggggggggggggggggggggggffffffffeeefffffggghggfeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddeeeeeeeeddddddddddddddddddddddddiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhggghhhkkkjjjjjhhhhhhhhiiiiiiiihhhhhhhhhiijkklliiiihggfgjqx|ysmihgffghiggggggggggggggggggggggggggggggggeeeeeeeeeeefffffffggffedddddddddddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeddddddddddddddddddddddddiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiihhhhhiijiiiiiihhhhhhhhhhiiiiiiiihhhhhhhhhhiijkkkhhiiihgfhnw}}wmfhhgffghhggggggggggggggggggggggggggggggggddddddddeeefffffeefffeddccccccccddddddddddddddddeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeddddddddddddddddddddddddiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhjjijklnoihhhggffhhhhhhhhhhhhhhhhhhhhhhhhhhiiijjjhhiiiiihnsz}zrichgggggghggggggggggggggggggggggggffffffffddddddddeeefffffeeffeedcccccccccddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddddddddddddddiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhjjjkloqsllkjihgghhhhhhhhhhhhhhhhhhhhhhhhhhhiiiijjihghjlmvx{yslgeggggggggggggggggggggggggffffffffffffffffddddddddeeefffffeefffeddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddddddddddddddiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhiiiijhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhijmpsuttsqonmlhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiligegkps~}zrkfgjggggggggggggggggggggggggffffffffffffffffeeeeeeeeeeefffffeffffeedeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddddddddddddddddddddddiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhiiijjhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhffgilptv{zywusrqhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhimjfdfkrv��xnecgmffggggffggggggggggggggggeeeeeeeeffffffffeeeeeeeeeeefffffffgggfedeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddddddddddddddddddddddddddddddddddiiiiiiiijjjjjjjjhhhhhhhhffggghhhhhhiiijjiiiiiiiihhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhtvy{|zwusqmjhghhghijjjjifgghiijjgghijjkknifhow|}|xqkgfffgggghhiiggggggggefffggggffffffffeeeeeeeeddddddddffffffffeeeeeeeeeeeeeeeeddddddddeeeeeeeeddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjiiiiiiiiggghhhiihhhiiiijiiiiiiiihhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgjnswz{|{yvsomlkffggghhhiiiiiihhgghijjkklihksy|}xtojgefggggggghhggggggggfffffgggffffffffeeeeeeeeddddddddffffffffeeeeeeeeddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjiiiiiiiihhhiiiiihhhiiiiiiiiiiiiihhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhbdgkotwy�~|yvsqjigfeffgkkjihhgggghiijkkhilqx|||rokhfefgggggffffggggggggffffffggffffffffeeeeeeeeddddddddffffffffeeeeeeeeddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjjjjjjjjjiiiiijjjiiiiiiiiiiiiiiiihhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhjihghjlnxz|}}|zytqnkhgghhhhhhhgghhhiijjkfjpw|}{yljhfefghgggffeeeggggggggffffffffffffffffeeeeeeeeddddddddffffffffeeeeeeeeddddddddddddddddccccccccddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiihhhhhhhhhhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhomigffghilqvz|}~~|xtpmlkeefghhiihhhiijjjflu{}{wthgfeefgggggffeeeffffffffffffffeeffffffffddddddddddddddddeeeeeeeeeeeeeeeeddddddddddddddddccccccccddddddddccccccccddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjiiiiiiiijjjjiiiiiiiiihhhhhhhhhhhhhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiihggijkachmrvyz~|yurphhhiiijjhhiiijjjhoy~|wqngfffffggggggfffffffffffffffeeeeeeeeeeeeeddddddddddddddddeeeeeeeeeeeeeeeeccccccccddddddddddddddddddddddddccccccccddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjiiiiiiiijjiiiihhjiiiihhhhhhhhhhhhhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhffghijjkeefhjmoqwy|~~{xvqponlkiiiiiiiijjls{~yrkhgggggggfgggggghhfffffffffffeeeddddddddddddddddddddddddddeeeeeeeeeeeeeeeeccccccccddddddddddddddddddddddddccccccccddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeiiiiiiiijjjjjjjjhhhhhhhhjiiihhhhjjiiihhhhhhhhhhhhhhhgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiihgedmkifefghmqx}|zzxvrokigiiiiiiijnu}~wnhehhhhhggfgggghhiifffffffffffeeeddccccccccddddddddddddddddeeeeeeeeeeeeeeeeccccccccddddddddeeeeeeeeddddddddccccccccddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhggggggggggggggggijjmpuy{zzzzwtpnjhgffhjkwyzunhghgggggggghhhhhhhhggggffffffffeeeeddddddddddddddddeeeeeeeeeefggfeedddefgggcddddddeeeeeeeeecccdddddccccccccddddddddeeeeeeeeddddddddeedccdeeeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhiiiiiiiihhhhhhhhgggggggggggggggghhhjlpsu{|~~}{zrqomllmnwxwrkgghgggggggghhhhhhhhggggffffffffeeeeddddddddddddddddeeeeeeeeeefggfeedddeefffcccdddddddddddddccddddeeddddddddddddddddddddddddddddddddeddccddeeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggffffhjmnuvy|~��|zxvtssswvsmhfgigggggggghhhhhhhhggggffffffffeeeeddddddddeeeeeeeeddddddddeefggfeeddeeeeeeccccccccdddddddddddddeeeddddddddddddddddddddddddddddddddeddccddeeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggffeefghijknqtvxy~}|{{zzvsnieegiggggggggggggggggggggffffffffeeeeddddddddeeeeeeeeddddddddeefggfeeeeeeddddddcccbbbccccccccdddddeeeddddddddddddddddccccccccddddddddddccccddeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggghggfffggffhjloqs{|}~��upjedegiggggggggggggggggggggffffffffeeeeeeeeeeeeeeeeeeeeddddddddeefggfeeeeeeedddeeddcbbacccccccccccdddddddddddddccccccccccccccccddddddddddccccddeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggghhhhggghiijknpsuyz|����rmhdefggggggggggggggggggggggffffffffeeeeeeeeeeeeeeeeeeeeddddddddeefggfeeeeeeefffffedccbabbbbbbbbbbbcccccccccccccccccccccbbbbbbbbdddddddddccbbccdeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhggggggggggggggggggghhhhhiijloswzz{~��~pkfeghgeggggggggggggggggggggffffffffeeeeeeeeeeeeffffffffeeeeeeeeeefggfeeeeffgggghggedcbbbbbbbbbbaaaabbbbccccccccccccccccbbbbbbbbdddddddddccbbccdeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhggggggggggggggggeffgghhheeginsx{}~���}{njfehigdggggggggggggggggggggffffffffeeeeeeeeeeeeffffffffeeeeeeeeeefggfeeeeffghhiihgfedcbbbbbbbbb``aaaaabbbbbbbbbccccccccbbbbbbbbdddddddddcbbbbcdeeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhhhhhgggfhhhhhggghhgggggghhhiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggghkosxz|}���}{okfdefffggggggggggggggggggffeeddeffggffeffffffffffffffffffffffffeeeeeeeedeeffgghihhgfeddcbbbaaaabbbbbbbbccccbbbbbbbbbbbbbbbccccccccbbbbbbbbbcccceeeeeeeefeeeeeeeddddddddhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggiiiiiiiiiiihhhhhhhhhiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggghjmqtwy{~��~|zplgeefffggggggggggggggggggffeeddeffggffeffffffffeeeeeeeeeeeeeeeeeeeeeeeedeeffggggffedccbcbbbbaaabbbbbbbbccccbbbbbbbbbbbbbbbcccccccccbbbbbbbccccceeeeeeeeeeeeeeeeeeeeeeeehhhhhhhhhhhhhhhhhhhhhhhhihhhhhggjjjjjjjjiiiiiiiihhhhiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggghgghjmoqux|~}{yyrnheefffggggggggggggggggggffeeddeffggffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffggeedccbaacbbbbbbaccccccccccccbbbbbbbbbbbbbbbcccccccccccccbbccccddeeeeeeeeccdddeeeffffffffhhhhhhhhhhhhhhhhhiiiihhgiiiihhhhiiiiijjjhhhhhhhhghhhhiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggghggghiklpu{}zxwwupjfefffggggggggggggggggggffeeddeffggffeddddddddddddddddddddddddeeeeeeeeeeeeffffeeddccbbbbbbbbbbccccccccccccbbbbbbbbbbbbbbbcccccbcccccccccccddddeeeeeeeebbcddeffffffffffhhhhhhhhhhhhhhhhijkkjjhhiiiiiiiigghhhiiigggggfffggghhhhiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggggggghhnt{|xutvxslgffffffffffffggggggggggffeeddeffggffeddddddddddddddddddddddddeeeeeeeeeeeeeffffffeedddbbbbccccddddddddccccbbbbbbbbbbbbbbbcccccbccccdddcddddeeeeeeeeeeebccdeeffgggggggghhhhhhhhhhhhhhhhkllmlkihiiiijjjjgghhiiijiihgffeegggghhhhiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggmu|}wrru{umhffffffffffffggggggggggffeeddeffggffeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffeeeebbbccccceeeeeeeeccccbbbbbbbbbbbbbbbcccccbbccddeedddeeeeeeeeeeeeedddeefffffffffffhhhhhhhhhhhhhhhhlmnonlkiiiijjjkkiijjklllllkjhgffgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggffgghhhhmv~~wqqt}woiffffffffffffggggggggggffeeddeffggffeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeedddddccbbccccddffffffffccccbbbbbbbbbbbbbbbcccccbbccdeeeddeeeeffeeeeeeeeffffffffffffffffhhhhhhhhhhhhhhhhmnopomkjiijjjkkkkllmmnnoponljiggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggefghhiiinw�wqqt~xoigfffffffffffggggggggggffeeddeffggffeeeeeeeeeffffffffffffffffeeeeeeeeeeeeeeddccbbbbbbbbcccdddffffffffccccbbbbbbbbbbbbbbbcccccbbcddeeeeeeeefffeeeeeeeeggggffffffffffffhhhhhhhhhhhhhhhhlllllllliijjklllpppqrrsssrpnkhfegggggggggggggggghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqxxrprz}xmimi_ffffffffffffffffggggggggggggggggeeeeeeeeeeeeeeeeeeeeeeeeeddddccceeeeeeeebbbbbbbbddddddddeeeeeeeeeddccbaabbbbbbbbbccccdddeeeeeeeeeeeeeeeeeeeeeeeeffffffffeeeeffffhhhhhhhhhhhhhhhhiiiiiiiihiijjkkknnnooppqqqomjhgfgggggggggggggggghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqx~wqpqw|zoijhaffffffffffffffffggggggggggggggggeeeeeeeeeeeeeeeeeeeeeeeeeddddcccddddddddccccccccddddddddeeeeeeeeeddccbbbbbbbbbbbcccccdddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffhhhhhhhhggggggggffffffffhhhiijjjkkkllmmmnnlkjhgggggggggggggggggghhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqx~}vpnprz}tjggeffffffffffffffffggggggggffffffffeeeeeeeeeeeeeeeeeeeeeeeeeddddcccddddddddccccccccddddddddeeeeeeeeedddccbbbbbbbbbbccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffffhhhhhhhhffffffffffffffffggghhhiihhhiijjjkkjiihhgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqw}|tnmnmw~xleeiffffffffffffffffggggggggffffffffffffffffeeeeeeeeeeeeeeeeeddddcccccccccccddddddddddddddddeeeeeeeeeedddccccccccccccccdddddeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeffffgghhhhhhhhffffffffggggggggggggghhhggggghhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhqx}{smlmjs|{pfejffffffffffffffffggggggggffffffffffffffffeeeeeeeeeeeeeeeeeddddcccccccccccddddddddeeeeeeeeeeeeeeeeeeeeddddccccccccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddfffffggghhhhhhhhgggggggghhhhhhhhgggggggggggggggggghhhhhiggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhrx}{slkmjox}uiehffffffffeeeeeeeeggggggggffffffffffffffffeeeeeeeeeeeeeeeeeddddcccddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeccccccccdddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffgggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhhhhgghhhhiiggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhty~{slkmkks}zmefffffffffeeeeeeeeggggggggggggggggffffffffeeeeeeeeeeeeeeeeeddddcccddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffccccccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiihhhhhiiiggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhtz~{slkmlip}}pedffffffffeeeeeeeeggggggggggggggggffffffffeeeeeeeeeeeeeeeeeddddccceeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeefffffccccccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeeefffffffffffgggghhhhhhhhhhhhhhhhhgggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijvz}zsmklkikt|{peffffffffffffeeeeeefffgggggggggggffffffffeeeeeeeeddeeeeffggggffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijuy|zsmkkjhjr{zpfffffffffffffeeeeeefffgggggggggggffffffffeeeeeeeeeeeeffffgggfffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffggggggggiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijtx|zsmjkjghpyyqgfffffffffffffeeeffffffggggggggggeeeeeeeeeeeeeeeeeeffffgggggffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffggggggggjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsx{zsmjjjffmvxqiggggggggggffffeeffffffffffffffffeeeeeeeeeeeeeeeeffffgggggffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggggggggggjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsw|ztnkkjfekuxslgggggggggggfffffffffffffffffffffeeeeeeeeffffffffffffggggffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggggggggggiiiiiiiijjjjjjjjjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsx|{uolkkfdisxuoggggggggggggffffggffffffffffffffffffffffffffffffeeffffggfffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffggggggggiiiiiiiijjjjjjjjjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsx}}wpmllfchsyvqgggggggggggggfffgggfffeeffffffffggggggggffffffffeeeefffffffeeeedeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffgggggggghhhhhhhhkkkkkkkkjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghijsy~}wqmmmgchrywshhhhhhhhhggggfffgggfffeeeeeeeeeeggggggggffffffffddeeeeffffeeeeddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhgggggggggggggggggggggggggggggggggggggggghgggghiivy}|xsomjjihinv|kiffghgfggggggggggggggggggggggggggggffffddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeejjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhgggggggggggggggggggggggggggggggggggggggghgggghiivy}}ysnljjihinu{ljgfghgfggggggggggggggggggggggggggggffffdddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeeeeeeeeeeeeeeeffffffffjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggghhhhhhhhgggggggggggggggggggggggggggggggggggggggghgggghiivy}}ysnljjihimtynkgfghgfggggggggggggggggggggggggggggffffdeeeefffeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeeeeeeeffffffffffffffffjjjjjjjjjjjjjjjjhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhhhhhhhhgggggggghhhhhhhhgggggggggggggggggggggggggggggggggggggggghgggghiivz~~ysnkjjihhlrwqmhffgggggggggggggggggggggggggggggggffffeefffgggffffffffffffffffeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffggggggggjjjjjjjjiiiiiiiihhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhgggggggghhhhhhhhggggggggggggggggffffffffgggggggggggggggghgggghiivz~~zsmjjjihgkpusoiffgggggggggggggggggggggggggggggggffffggggggggffffffffffffffffeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffggggggggggggggggjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggffffffffgggggggggggggggghgggghiivz~zsmiijigginsvpjfefggggggggggggggggggggggggggggggffffiihhgggfffffffffffffffffeeeeeeeeffffffffggggggggggggggggggggggggffffffffffffffffffffffffffffffffgggggggggggggggghhhhhhhhjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggffffffffgggggggggggggggghgggghiivzzsliijigfhmqwrjfefghggggggggggggggggggggggggggggffffkkjihgfeffffffffffffffffeeeeeeeeffffffffggggggggggggggggggggggggffffffffffffffffffffffffffffffffgggggggghhhhhhhhhhhhhhhhjjjjjjjjiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggffffffffgggggggggggggggghgggghiivzzslhijigfhlpxrkedfghggggggggggggggggggggggggggggffffllkihfeeffffffffffffffffeeeeeeeeffffffffggggggggggggggggggggggggffffffffffffffffffffffffffffffffhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiihhhhhhhhhiiiijjjiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggfhihffhjw{~zsmjhhggghhiuwqfchieggggggggggggggggggggggggffffffffhkmkfdgjeeeeeeeeffffffffeeeeeeeeggggggggffffffffggggggggggggggggffffffffffffffffffffffffgggggggghhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiijjjiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggfhihffhjw{~zsmjihgggghhtvqgcgieggggggggggggggggggggggggffffffffgkmlifgiffffffffffffffffffffffffffffffffffffffffggggggggggggggggffffffffffffffffffffffffgggggggggggggggghhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiijjjjiiiiiiiijjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggfhhgffhkw{~~zsmjjihgggggrurhdgieggggggggggggggggggggggggfffffffffjnomiggffffffffffffffffffffffffffffffffffffffffggggggggggggggggffffffffffffffffffffffffgggggggggggggggghhhhhhhhhhhhhhhhjjjjjjjjjjjjjjjjiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggggggggggggggggghgffilvz~~zsnkjjhgfffgoutjeghfggggggggggggggggggggggggffffffffeinqplhfffffffffffffffffffffffffffffffffffffffffggggggggggggggggggggggggffffffffggggggggggggggggffffffffhhhhhhhhhhhhhhhhkkkkkkkkjjjjjjjjiijjjjkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggghfefilvz~~zsnkjjhgfffgltuleghfggggggggggggggggggggggggggggggggfhmqqnieffffffffffffffffffffffffffffffffggggggggggggggggggggggggggggggggggggggggggggggggggggggggffffffffhhhhhhhhhhhhhhhhkkkkkkkkkkkkkkkkjjjjjkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggfefjmvz~~zsnkjihgggggisvnfghgggggggggggggggggggggggggggggggggghkoqojfggggggggffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhkkkkkkkkkkkkkkkkjjjjkkkkjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggeefjnuy~~ztnlihgggghhgrwogfhggggggggggggggggggggggggggggggggghghlpokgggggggggffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhllllllllkkkkkkkkjjjjkkkkkkkkkkkkjjjjjjjjkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiihhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhgggggggggggggggggggedfjnuy~~ztolhhggghhierxpgfhgggggggggggggggggggggggggggggggggiggjnolgggggggggffffffffgggggggghhhhhhhhgggggggggggggggggggggggghhhhhhhhgggggggghhhhhhhhgggggggghhhhhhhhhhhhhhhhhhhhhhhhmmlkjjiikkkkkkkkkkkkkkkkkkkkkkkkjjjjjkkkjjjkkkllkkjjjiiiijjkkjihjjjjjkkkjjjjjjjjjjjjjjjjhhhhhhhhhhhiiijjiiiiiiiihhhhhhhhhhhhhhhhghigfgjmvy|}ztmikjigffffgmqoiefiggggggggggggggggffgggghhggggggggheejqsnheffffffeffffffffgggggggggggggggghhhhhhhhgggggggggggggggghhhhhhhhhhhhhhhhffgghhhhhhhhiiiiffgggfeehhhhhggghhhhhhhhnmmllkkkllllllllkkkkkkkkkkkkkkkkjjjjkkkkllllllllllkkkkjjijjkkjiijjjjkkkkjjjjjjjjjjjjjjjjhhhhhhhhhhhhiiiiiiiiiiiihhhhhhhhhhhhhhhhhhihfgjmwy|}ztmikjiggffffkpniefiggggggggggggggggfgggghhhhhhhhhhhjggkrtpkfgggggggffffffffgggggggggggggggghhhhhhhhgggggggggggggggghhhhhhhhhhhhhhhhgggghhhhghhhhiiigghhhhggiiiihhhhiiiiiiiimmmmmmmmlllllllllllllllllllllllljjkkkkllmmmmmmmmlllllkkkjjjkkjjjjjjjkkkkjjjjjjjjjjjjjjjjiiiiiiiihhhhhiiiiiiiiiiiiiiiiiiihhhhhhhhhiihghkmwy|}ztnikjihggggeimlhfgihhhhhhhhhhhhhhhhgggghhhhhhhhhhhhkihlqtqnggghhhiigggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiihhhhhgggggghhhhhhhijkkjjjjjjjiiiiiiiiiiilmmnnnmmmmmmmmmmmmmmmmmmmmmmmmmmkkkkllllmmmmllllllllllllkkkkkkkjjjjkkkklkkkkkkkkkkkkkkkkiiiiiiiihhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiijighknxz|}ztnjkjihggggehkjhfgihhhhhhhhhhhhhhhhghhhhhiihhhhhhhhkihkoqqoggggghhigggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiihhggggggghhhhiiklmmmmllkkkkjjjjjjjjjjllmnnnmlmmmmmmmmmmmmmmmmmmmmmmmmkkllllmmllllllllllllllllnnmllkkkjkkkklllkkkkkkkkkkkkkkkkjjjjjjjjiiiiiiiiiiiiiiiijjjjjjjjiiiiiiiiijjihilnyz||ztokkjihghhhfgiihghihhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiihghknppggfeefgghhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiiiiihhhggggghhhhhijlmnooomllllkkkjjjjjjjjnnppponmllllllllllllllllllllllllllllmmmmmmmnnnnonnoooooorqpnmlllkkkklllllllllllllllllllljjjjjjjjkjjjjjiiiiiiiiiijjjjjjjjjjjjjjjjjjkjhiloy{||ztokjjihhhhihhhhhhhhhhhhhhhhhhhhhhhhhiiiijjjjjjjjjjjihggilorihgeeefghhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihhhhhhhhiiiiiiiiiiihhhhhhhhhiiiijklmnoonmmmllllkkkkkkkkkrstutsqpllllllllkkkkkkkkkkkkkkkkllmmmmnnoppqrrsssssttttuvtrpnmllkkkkllllllllllllllllllllkkkkkkkkllkkkkjjiiiiiiiikkkkkkkkjjjjjjjjjkkjijmoz{||yuoljjihhhiijihhiihgiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjihhlqumljhfgghiiiiiiiihhhhhhhhjjjjjjjjiiiiiiiijjjjjjjjjjjjjjjjhhhhhhhhhhhhhhhhhhhhiiiiiiiiijjjjklmmnmmmmllllkkkkkkkkkkvwxxwvsrkkkkkkkkkkkkkkkkkkkkkkkklmmmmnnnrrstuvwwwwwxxxyyxvtqnmllkkklllllllllllllllllllllkkkkkkkkmmlllkkkiiiiiiiikkkkkkkkkkkkkkkkjkkjijmpz{||yuoljjihhiiikihhijigiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjllkiilrwqoljhhiiiiiiiiiihhhhhhhhjjjjjjjjiiiiiiiijjjjjjjjjjjjjjjjhhhhhhhhhhhhhhhhhhhhiijjiiijjjjkjjklmmllmllllkkkkkkkkkkk|{{zyxwvoonnmmmlmmmmmmmmllllllllmmmmmmmmopruwxxxvvwxxyyyvwwvuromlkkjkklmmmmmmmmmlllmmnnnllllllllmmmmmlkjhiijjjkjkjjjjjjjklllllmmhiihghknz{}~|yvspmjijkjiiiiiiiiiiiihhhgghhhhiiiiiiiiiiiiklmmmmlknljlpuxzxurponkiljgffhkmjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjmmmmmmmmllllllllkkkkkkkkzyyxwvuuppoonmmmllllllllllllllllmmmmmmmmnprtvwwwtuuvwwxxxxyywusqmmlkkllmmmmmmmmmlllllmmmllllllllllmmmmlkiiiiiiiijjjkkllmmmmlllllijjihiknyz|}|zvtomjijkjjiiiiiiiijjjjiiiijjjjjkkkkkkkkkkkkllmmllkmkjlpuyzzwuuvvutpomkijjkjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkiiiiiiiijjjjjjjjiiiijjjjkkkkkkkkmmmmmmmmllllllllkkkkkkkkwvvuutssrqqponmmllllllllllllllllmmmmmmmmnoqrtuvvrrsttuvwyzzzzxvuoommllmmmmmmmmmmllllkkkkkkkkkkkkllnoooookkjjiihhjklmopqrqqpoonmmklljiilnxz|}}zwuoliijkkjjjjjjjjjjjjkkkkkkkklllllllllllllkkllllkkmkjlqvyzvtstwyyyutsqomlkkkkjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkjjjjjjjjijjjjkkkkkkkkkkkmmmmmmmmlllllllllllllllltsssrrrqssrqponnllllllllmmmmmmmmmmmmmmmmnopqrsttqqqqrstuxxyyyxwvqponmmmmlllllllllllkkjjjkkkkkkkknoprstttponmkkjjmnoqtvwxxwwutsrrnnnljjmowy|}}zwupmjiklkkkkkkkkkkijjjkkllkkkllllmlllllllllllllllllkknswyyomlmorrruuvvtrpommlkkkkkllllllllllllllllkkkkkkkkkkkkkkkkllllllllkkkkkkkkjjkkkkllkkkkkkkkmmmmmmmmllllllllllllllllqqqqqqqpssrqqpoommmmmmmmnnnnnnnnmmmmmmmmnoopqrssqppppqsttuuuvuutqqonnmnnmmmmmmmmmlllkkkjllllllllprtvwyyyvtrqonnnqrsuwy{{}||{yxxwoppnllnqwy|}}zwuroljkllkkkkkkkkkiijjkkklkkkkllllkkkkkkkkmllllllmmmmpuxyypmjjklllqrtvvvvuqpnmlkllmmmmmmmmmmmmmmmmllllllllllllllllllllllllllllllllkkklllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmppppppqqrrrqqpppnnnnnnnnnnnnnnnnnnnnnnnnoooopqrsrrpppprsqqqrrrrqqponnnnnmmmmmmmmmmmmmmmmmmmmmmmmrsuwy{{|yxusrqqrttuvwxyz{{{zzyyypqqomnqsxz|}}zwutqnlmmlklllllllllllllmmmllllmmmmllllllllnnmmmmnnoopswyyxvsommnmmmnprtvxyutqomlllmmmmmmmmmmmmmmmmllllllllllllllllllllllllmmmmmmmmllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmopppqqqqqqqqqqqqoooooooonnnnnnnnppppppppppoopqrrutrpppqrppppppppoonnnnoonnnnnnnnnnnooooonnnnnnnnqrtvxyzzyxusrrrsttttttttttttuuvvpqqpopsuyz|}|zvtwtpnnnlkmmmmmmmmppppppppooooppppoooooooooonmmnoorqruxzywvsommnnnmmmnpsvwywtqnmllmmmmmmmmmmmmmmmmllllllllllllllllllllllllmmmmmmmmlmmmmnnnmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnppppqqrrppppqqqqooooooooooooooooppppppppqppopqrrvusqppqrpppoooooonnmmnoooooooooooooppqqqoooooooooqruvxxxxwtrqqrstssrqpoommnopqqrpqrqpqtwz{}~|yvsyvroonlkmmmmmmmmtttsssrrqrrrrrssqqqqqqqqpponnoppsstvyzywqmihjkllomlkloru|zvromllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmnnnnmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnqqqqqqqqssrqqqqrqqqqppppooooooooqqqqrrrrppppppppsssrqonmoopppponooooooooqqqqrrrrrrrrrrrrtsqppqsttuuvvwwwwwwvvuuuqqpppooonnnnnooonopoopswxz}~}{xvuspnmmmnppqrtuvv{zzyyxxxwwwwwwwwwwwwwwwwuutssrqqtwyzxvvvspmmnoonnmmmnoqrsrpnmlllmmmmmmmmllllllllmmmnnnnollllllllnnnnnnnnlllllllljjklmnoooooooooonnnnmmmmnnnoooppppppppppqqqqqqqqrrqqqqrsrrqqqqqpppppppppqqqrrrrsppppppppssssrqpoooppppooppppppppssttuuuuvvvvvvvvtsrqqrstuuuuvvvvuuuuttttppppoooonnnnoooonopoopsvxz}~}{xvusqonnoorstuvwxxzzzyyxxxwwwwwwwwwwwwwwwwwvvuuttswxzywwwytqnmoponnmmmmnpprqonmllmmmmmmmmmllllllllllllmmmmmmmmnnnnppppppppoooooooollmmnnnnoooooooooonnnmmmnooooppppppppppprrrrrrrrqqqqqrstssrrrrqqqqqqqqqqrrrrrrssqqqqqqqqsstttsrrppqrrqqprrrrrrrrvvvwxxyyyyyyyyyyutsrrstuvvvuttssrrrrrsssppppoooonnnooooonopoopsvxz}~}{xvutrqpqqruuvwxyyzzzyyxxwwwwwwwwwwvvvvvvvvxxxwwvvvyzywvvy|vspoppponnmmmmnnpoonmmmmnnnnnnnnmmmmmmmmkkkkllllnnoppqrrrrrrrrrrrrrrrrrrppoonnnnnnnnnnnnooooonnnoopppppppppppppprrrrrrrrqqqqrsuuttttssrrrrrrrrrrrrrsssssrrrrrrrrrsuvvvutsstuuuttttttttttvvwxyyzzzzzzzzzzvutsstuvwwvutsrqopppqqrrppppppppooooppppopqpopsvxz}~}{xvuutsssttvvvwxxyyyyyxxwwwwwwwwwwwvvvvvvvvxxxwwwwwyywtsuy}xuqpqrqpooonnnmmoonnnnnnoooooooonnnnnnnnmmmmmnnnppqrstuuuuuuuuuuuuuuuuuusrqponnmnnnnnnnnpppppppppppqqqqqqqqqqqqqssssssssqqqrsuvwvvvuttssssssssssssssssssssssssssrtvwxwvuvvwxxxxwuuuuuuuuuvvwwxxyxxxxxxxxvuuttuuvwwvutsrqooppqqqqpppqqqqqpppqqqqrqrrqpqtwxz}~}{xvvuuuuuuvuuuvvwwwyyxxwwvvwwwwwwwwuuuuuuuuvvvwwwwwxwurqsx|xurqrsrqrqqqppoopppppppoppppppppoooooooopppqqqqqrrstuvvwvvvvvvvvuuuuuuuutssqponnooooooooqqqqqqqqrrrqqqqqqqqqqqqqssssssssssstuvwxxxwvvutttttttttttttttsssssssssssrtvxyxwvwwxyzzyyvvvvvvvvuuuvvvvvvvvvvvvvuuuuuuuuvvvuttssqqqqrrrrqqrrrrrrrrrrsssssstsrruxxz}~}{xvvvvvvvvvuuuuuuuuxxxwwvvvvvvvvvvvuuuuuuuuvvvwwxxxvvusrsx{xurqrssrttuuttsrrrrsrrqqppppppppppppppppssssttttssttuvvwuuuuuuuuttttttttssrqqpooppppppppqqrrrrrrssrrrrrrrrrrrrrrttttttttuuuuvwxyyyxwwvuuttttttttuuutttssttttttttstwyyxvuuvwxxxxxvvvvvvvvwwvvvvvuuuuuuuuuuuuuuuuuuuuuvvvvtttsssssrrsssttttttttuuutuutstwyxz}~}{xvwwwwvvuuvvvvvvvvxxwwvvuuuuuuuuuuttttttttwxxyyzz{vwwvuvy|vtqqrttswwxyxxwvtuuuutsrqqqqqqqqqqqqqqqqssttttuuttttuuuvttttttttrrrrrrrrrqqqqqppqqqqqqqqrrrrsssstssssrrrrrrrrrrrttttttttwwvvwxyyzyyxwvuuuuuuuuuuuuutttssttttttttsuwyyxvtssuvvvvvvvvvvvvvxxxwwvvvuuuuuuuuttuuuutttuuvvwwwvvuutttsssstttuuuuuuuvvvuvvuttwzxz}~}{xvxwwwvvutxxwwwwwwxxwwvvuuttttttttttttttttyyz{{|}}wyzzyy{}uspprtutxyz{{zyyvwwwwutsqqqqqqqqrrrrrrrrsssstttttttttttussssssssqqqqqqqqpppqqqqqrrrrrrrrrrrssttttttsssrrrrrrrrrrttttttttuuuvwwwxxxxxxxxxxwvuuuuuuuuuuuuuttttttttrvz{xusrsttuuttsuuuvvvvvuuuuuuuuwwvvuuuttssstuwxwvvuvwxxvvvvvvvvvvvvvvvvuuvvvwwwwvvvvwxxy{}~~{xvwvutttuuvvvvvvvvwwxxxxyyzywvutuuvvvvwwwxyyyzzzz{yyyyy{|}yvsrtvwvxyyzzyyxwwwwvutsssssssssssssssssrrrrssttrrsstuuuuuttttsssrrrrsttrrrrrrrrsssssssssttttuuutuuuutsstttttttttttttttttttuuvvvwwwwwwwwxwvuttuutttttttttttttttttw{{yvtstuuvvuutuuvvvvwwvvvvvvvvwwvvvvvvuuttuvwxwvvvvwxxvvvvvvvvvvvvvvvvvvvvvwwwvvvuvvwxy{}~~{xvwvutttuuvvvvvvvvwwxxxxyyzywvuuuuvvvvwwwwxyyyyyzzyyxyy{|}zwsrtvvvwxyyyyxwwwwwwuttttttttttssssssssssssssssrssttuuuuuuttttsssrrrsttsssssssstttttttttttttuuuuuvvvuttuuuuuuuuuuuuuuuussstuuuvvvvvvvvvwwvutttuttttttttttttttttuy|}zwvuvvwwwwvvvvvvwwwwwwwwwwwwwwwwwxxxwwwvvwwxwvvvvwwxvvvvvvvvwwwwwwwwwwwvvvvvvvuuuvwwy{}~~{xvwvutttuuvvvvvvvvwwwxxxxxyxwvvuuuvvvwwwwwwwwxxxxyxxxxy{|}|xtstuuuwwxxxxwwwwwwwvutttttttttttttttttttttssssssttuuuvuuutttttssrrsstuuuuuuuuuuuuuuuuutttuuuuuvvwwwvuuvvvvvvvvuuuuuuuutuuuvvwwxxxxxxxxwwvutttuttttttttuuuuuuuuwz}~{ywwwxxyyxxwwwwwwxxxyyyyxxxxwwxxyyyyzyyxxxxxvvvvwwwxvvvvvvvvwwwwwwwwwwwvvvuuvuuuuvwwy{}~~{xvwvutttuuvvvvvvvvwwwwxxxxxxwwvvvvvvwwwwwwvvvvwwwwxxxxyz|}}zuttuutvvwxxwvvwwwwwvuuttttttttuuuuuuuuuuuuttsstttuuuvvuuuutttttsssstuuwwwwwwwwvvvvvvvvuuuuuvvvwwxxxwvvwwwwwwwwvvvvvvvvwwxxyyyzzzzzzzzzxwvuuuuuuuuuuuuuuuuuuuuuy|~~|zyyxyyzzyyxwxxxxyyyzzzzyyyxxxxyyyzz{{zzyyxxvvvwwwwwvvvvvvvvwwwwwwwwwwwvvvuuvvuuuvwwy{}~~{xvwvutttuuvvvvvvvvwwwwwxxxwwwwwwwwwwwwwwvvuuuvvvvvwwwxyz|}~zvtuvuuvvwwwwvvvwwxwwvuuuuuuuuuvvvvvvvvvvvuuuttuuuuuvvvvuuuuttttttsttuvxxxxxxxxwwwwwwwwuuuvvvvvxxxyxxwvwwwwwwwwvvvvvvvvxyyyz{{{||||||||yxwvvvvvvvvvvvvvvvvvvvvvz||zyzyyzzzzyyxxyyyyzz{{zzyyyxxxyyyyyyz{{{zzyxuvvwwwwwwwwwwwwwwwwwwwwwwwwvvvvvwvvvvwxxy{}~~{xvwvutttuuvvvvvvvvvvwwwwxxvwwxxxwwwwwwwvvvuuuuvvvvvvwwyz|}~{wuuvvuvvwwwwvvvwwxxwwvvvvvvvvvvvvvvvvvvvvvvvvuvvvvvvvvvvvuuuutuutttuvvwwwwwwwwwwwwwwwwvvvvvwwwxxxyxxwvwwwwwwwwwwwwwwwwxxxyyzzz{{{{{{{{zyxwwwwwwwwwwwwwvvvvvvvvz}~|zzzyyz{{zyyyyyyzzzz{{zzyyxxyyyyxxxxzz{{{zyyuvwwxwwvwwwwwwwwvvvvvvvvvvvvvwwwxwwwwxxyy{}~~{xvwvutttuuvvvvvvvvvvvwwwwwvvwxyyxxwwwwvvvvuuuuvvvvvvvwyz|}~{wuvwwvvwwxxwwvvwwxxxwwvvvvvvvvvvvvvvvvvvvvwwwwvvvvvvvvvvvuuuuuuutttuvwwwwwwwwwvvvvvvvvvvvwwwwwwxxxxwwvwwwwwwwwwwwwwwwwvwwwxyyyzzzzzzzz{zyxwwxxwwwwwwwwwwwwwwwwz}~~|zzzyyz{{zyyyyyzzzz{{{zzyxxxyyyyxxwwyyz{{{zyuvwxxwwvwwwwwwwwvvvvvvvvuuvvvwwwxxwwwxyzy{}~~{xvwvutttuuvvvvvvvvvvvwwwwwuvwyyyxxxwwwvvvvuuuvvvvvvvvwxz|}~zwuvwxwwwxxxxwwvwwxxxwwvvvvvvvvuuuuuuuuvvvwwwxxwwwwvvvvvvvvuuuuuuutuuvwvvvvvvvvvvvvvvvvvvvwwwwxwwxxxwvvwwwwwwwwwwwwwwwwxxxyyyyyzzzzzzzzzzzyyyyywwwwwwwwxxwwxxyz{|}}}{zxyyyyyyyyyyyzz{{{zzzyyyyxyyyyyyyyyzz{||{{zyxwvvwwwwwwwwwwvvvvvvvvuuvvvwwwwwwwwxyz{{}}}|zywwwwwvvvvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvuvvwwvuuwwvwxz|~~{xvwwwvwwwxxxxxxxxxxwvuwwwwwwwwvvvvuuuuuuvvwxxxwwwvwwxywwwvvvvvuuuuuuuuwwwwwwwwvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyzzzzzzzzzzyyyyxxwwwwwwwwxxxwxyyz{|}}}{zxyyyyyyyyyyzzz{{{zzzzzyyyyyyyyyyyyzz{|{{{zyxwvvwwwwwwwwwwvvvvvvvvvvvvwwwwwwwwwxyz{{}}}|zywwwwwvvvvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvuvwwwvvuxwwwxz|}~{xvwxwvwwwxxxxxyyyyywvvwwwwwwwwvvvvvuuuuuvvwwxxwwwvwwxyxwwwvvvvuuuuuuuuwwwwwwwwvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyzzzzzzzzyyyyxxxxwwwwwwwwyyxxxyzz{|}}}{zxyyyyyyyyzzzzz{{{{{{zzzzzyyyyyyyyyzz{{{{zzyxwwwwwwwwwwwwwvvvvvvvvvvvvwwwwwwwwwxyz{{}}}|zyxxwwwwvvvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvvwwwwvvxwwwxz|}~{xwwxwvwwwxxxxx{{{zzxwvwwwwwwwwwvvvvuuuuvvvwwxxwwwvwwxyxxxxwwwvuuuuuuuuwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxyyyyyyyyyyyyyyxxxxxwwwwwxxxzzyxyyzz{|}}}{zxyyyyyyyyzz{{{{{{|{{{{zzzyyyyyyyyyzz{{{zyzyxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxyz{{}}}|zyxxxwwwwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvvwxxxwwxxwwxy{}~{xwwxwvwwwxxxxx||||zyxwwwwwwwwwwwwvvvvuvvvvwwwxwwwvwwxyxxyyyxwwvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxyyyyyzzzzzzzzyyyyxxxxwwxxxyyy{zzyyyz{{|}}}{zxyyyyyyyy{{{{{{{{|{{{{zzzyyyyyyyyyzz{{zyyzzyxwwwxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxyz{{}}}|zyxxxxwwwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvwwxyxxxyxwwxy{||xwxxxwwwwxxxxx||||{zxxwwwwwwwwwwwwvvvvvvvwwwwwwwwvwwxyyyzzzyxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxyyyyyyyyyyyzzzzzzzzzzzzzzzzyyyyyxxyyzzz{|{zzyzz{{|}}}{zxyyyyyyyy||{{{{{{{{{zzzzzzzzzzzzzyzz{zzyx{zyxwwxxwwwwwwwwxxxxxxxxxxxxwwwwwwwwwxyz{{}}}|zyxxxxxwwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvwxyyyyyyxwwxy{||ywxyxwwwwxxxxx{|||{zyywwwwwwwwwwwwwvvvvvwwwwwwwwwvwwxyyz{{{zxxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxx{{{{{{{{zzz{{{{{{{{{{{{{{{{zzzzyyyzz{{||||{zzzz{{|}}}{zxyyyyyyyy||||{{{{zzzzzyyyzzzzzzzzyzzzzyxx{zyxxxxxwwwwwwwwxxxxxxxxxxxxwwwwwwwwwxyz{{}}}|zyyyxxxxwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvwxyzzyyyxwwwy{||ywxyxwwwwxxxxxzz{{{{zzwwwwwwwwxwwwwvvvwwwwwwwwwwwvwwxyyz{||{yxwwwwwwwwwwwwwwwwxxxxxxxxwwwwwwwwwwwwwwwwxxxxxxxx{{{{{{{{{{{{||||||||||||{{{{{zzzzzz{||}}}|{zzz{{{|}}}{zxyyyyyyyy||||{{{zzzzyyyyxzzzzzzzzyzzzzyxx{zyxxxxxwwwwwwwwxxxxxxxxyyxxxwwwwwwwwxyz{{}}}|zyyyxxxxwwvvvvvvvvwwwwwwwwvwxyzzyyxxwwwwvvvwxyzzzzyyxwwy{||yxxyxwwwwxxxxxyyz{{{zzwwwwwwwwxxwwwwvvwwwwwvvvwwwvwwxyyz|}|{yxxxxxxxxxwwwwwwwwxxxxxxxxwwwwwwwwwwwwwwwwxxxxxxxx�������������������������������~~~~~~~~}}}}}}}}{{{{{{{{~}}}||||||||||||||||||||}||||{{{{{{zzzzz{{{zyyxxxxyyyyzzzzzzzzzzxwwwxy{||}~~}{yxxxxxxwwwvvvwxxyyxwvuuvwxwxxxyyyyxxxxxxxxxxxyz{{{yyyyyyyy}|zyxwwwvvvwwxxxwxyz{{zyvvvvvvvvxxxxxxxxwwwwwwwwwwvvvwxxzz{{{zyxwwwwwwwwwwwwxxxxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww������������������������������������������������~~~~~~~~}~~~~~~~~~~~~~~~~~~~~~~~~~~~~}}}}}}}|||||{{zzyyxxxxxyyyyyyyyyyyyyxxxxxy{{|}~~}{zxxxxwwwwwxxyyzzzxwvvvvwxwwwxxxyyxxxxxxxxwxxyyzz{{{{{{{{{~}|zyxxxxxxxxxyyxyz{{{{zxxxxxxxxyyyyyyyyxxxxxxxxxxwwwxyyzzz{zzyxxxxxxxxxvvwwwxxxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww������������������������������������������������������������������������������������������������|||{{zzzyyzzzz{{zzzzzzzzyyyyyz{||}~~||yyxxxxwwyyzz{{{|xxwwwwxxwwwxxxxxyyyyyyyywxxxyyyz{{{{{{{{�}{zyyyzzzyyyyyyzz{{{{zzzzzzzzzyyyyyyyyxxxxxxxxyxxxxyyzyzz{zzyxxxxxxxxxvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww�����������������������������������������������������������������������������������������������������������~~}}~~~~}}}}}}}}|||||||}z{|}~}}|zzzzyyyyzzz{{{||zzyyyyzzxxxyyyyyyyyyyyyyyyyyyyyy{{{{{{{{�}{zyxxzzzyyyxxyyzzzzzzxxxxxxxxxxxxxxxxwwwwwwwwxxwwwxyyzz{{{zyxyyyyyyyyvvvvvwwwwwwwwwwwwwwwwwwwvvvvvvvvwwwwwwww�����������������������������������������������������������������������������������������������������������������������������������z{|}}}}}}||||{{{{{{{{{||||}}}}||{{{{{{{{zzzzzzzz||{{{{{{zzzzzzzz~}{zyxxxzzzyyxxxzyyyyyyywwwwwwwwwwwwwwwwvvvvvvvvwwwvwwxy{{{|{{zyyyyyyyyywwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww����������������������������������������������������������������������������������������������������������������������������������������}~~���~}}}~~~~~��������~~~~~~~~~~~��~~~{{{{{{{{}|{zzzzz{{{{{{zz||{{{{{{xxxxxxxxyyyyyyyyxxxxxxxxyxxxxyzz||}}}|{zzzzzzzzzxxxxwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~}||}~~~~~���~�~~~~~~~~}}}}}}}}||||||||||{{{|}}}}~~~}||zzzzzzzzyyyxxxxwwwwwwwwwwwwwwwwwxxxxxxxxwwwwwwww��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~��~~~}}zzzzzzzzzzyyyxxxwwwwwwwwwwwwwwwwyyyyyyyywwwwwwww��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~}}}}}}}}}~~~}||{{zzzzzzzzzzzzzzzzyyyyyyyywwwwwwww���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������~~~~~~~~}}}}}}}}������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������
\ No newline at end of file
diff --git a/codec/L2/demos/resize_sc/kernel/kernel_resize.cpp b/codec/L2/demos/resize_sc/kernel/kernel_resize.cpp
new file mode 100644
index 0000000000..945593416b
--- /dev/null
+++ b/codec/L2/demos/resize_sc/kernel/kernel_resize.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file kernel_resize.cpp
+ *
+ * @brief This file contains top function of test case.
+ */
+
+#include "kernel_resize.hpp"
+#include "bicubicinterpolator.hpp"
+
+void resize_acc::compute(ap_uint<32>* configs, ap_uint<WDATA>* axi_src, ap_uint<WDATA>* axi_dst) {
+    kernel_resize(configs, axi_src, axi_dst);
+}
+
+void resize_acc::kernel_resize(ap_uint<32>* configs, ap_uint<WDATA>* axi_src, ap_uint<WDATA>* axi_dst) {
+// clang-format off
+#pragma HLS INTERFACE m_axi offset = direct bundle = gmem0 port = configs latency = 32 num_read_outstanding = \
+    64 max_read_burst_length = 64 num_write_outstanding = 64 max_write_burst_length = 32 depth = 5
+
+#pragma HLS INTERFACE m_axi offset = direct bundle = gmem1 port = axi_src latency = 32 num_read_outstanding = \
+    64 max_read_burst_length = 64 num_write_outstanding = 64 max_write_burst_length = 32 depth = 128
+
+#pragma HLS INTERFACE m_axi offset = direct bundle = gmem2 port = axi_dst latency = 32 num_read_outstanding = \
+    64 max_read_burst_length = 64 num_write_outstanding = 64 max_write_burst_length = 32 depth = 128
+    // clang-format on
+
+    xf::codec::resizeTop(configs, axi_src, axi_dst);
+}
diff --git a/codec/L2/demos/resize_sc/kernel/kernel_resize.hpp b/codec/L2/demos/resize_sc/kernel/kernel_resize.hpp
new file mode 100644
index 0000000000..ebeee5c550
--- /dev/null
+++ b/codec/L2/demos/resize_sc/kernel/kernel_resize.hpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file kernel_resize.hpp
+ *
+ * @brief This file contains top function of test case.
+ */
+
+#ifndef _XF_CODEC_KERNEL_RESIZE_SC_HPP_
+#define _XF_CODEC_KERNEL_RESIZE_SC_HPP_
+
+#pragma once
+
+#include "vpp_acc.hpp"
+
+#include <ap_fixed.h>
+#include <ap_int.h>
+#include <hls_math.h>
+#include <hls_stream.h>
+
+/* The fixed width of interpolation */
+#define W 32 // 36 //23 24
+#define I 13 // 8K
+
+/* The pixel width */
+#define WBIT 8              // the input image width
+#define NPPC 8              // 1-pixel/8-pixel implementation
+#define WDATA (WBIT * NPPC) // axi data width
+// 1 - 1-pixel / clock Interpolation
+// 8 - 8-pixel / clock Interpolation
+
+#define MAX_SRC (8192 * 8192 / NPPC) // 64M
+#define MAX_DST (8192 * 8192 / NPPC) // 64M
+
+/* define the type of data for resize */
+typedef ap_fixed<W, I> fixed_t;
+
+template <typename T>
+T DivCeil(T a, int b) {
+    return (a + b - 1) / b; // 8
+}
+
+#if NPPC == 1
+
+class resize_acc : public VPP_ACC<resize_acc, 1> {
+    // port bindings
+    ZERO_COPY(configs);
+    ZERO_COPY(axi_src);
+    ZERO_COPY(axi_dst);
+
+    SYS_PORT(configs, HBM[0]);
+    SYS_PORT(axi_src, HBM[1]);
+    SYS_PORT(axi_dst, HBM[2]);
+
+   public:
+    static void compute(ap_uint<32>* configs, ap_uint<8>* axi_src, ap_uint<8>* axi_dst);
+    static void kernel_resize(ap_uint<32>* configs, ap_uint<8>* axi_src, ap_uint<8>* axi_dst);
+};
+
+#else
+
+class resize_acc : public VPP_ACC<resize_acc, 1> {
+    // port bindings
+    ZERO_COPY(configs);
+    ZERO_COPY(axi_src);
+    ZERO_COPY(axi_dst);
+
+    SYS_PORT(configs, HBM[0]);
+    SYS_PORT(axi_src, HBM[1]);
+    SYS_PORT(axi_dst, HBM[2]);
+
+   public:
+    static void compute(ap_uint<32>* configs, ap_uint<64>* axi_src, ap_uint<64>* axi_dst);
+    static void kernel_resize(ap_uint<32>* configs, ap_uint<64>* axi_src, ap_uint<64>* axi_dst);
+};
+
+#endif
+
+#endif // _XF_CODEC_KERNEL_RESIZE_SC_HPP_
diff --git a/codec/L2/demos/resize_sc/utils.mk b/codec/L2/demos/resize_sc/utils.mk
new file mode 100644
index 0000000000..1937b53d2b
--- /dev/null
+++ b/codec/L2/demos/resize_sc/utils.mk
@@ -0,0 +1,239 @@
+#
+# Copyright 2019-2021 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# sc makefile-generator v1.0.0
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
+ifeq ($(HOST_ARCH), x86)
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#check binutils
+BINUTILS := $(shell ld -v | cut -f 4 -d " " | cut -f 1 -d "-")
+BINUTILS_REQ := $(BINUTILS_INTOOL)
+ifneq ($(shell expr $(BINUTILS) \>= $(BINUTILS_REQ)), 1)
+export PATH := $(XILINX_VIVADO)/tps/lnx64/binutils-$(BINUTILS_INTOOL)/bin:$(PATH)
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/demos/webpEnc/Makefile b/codec/L2/demos/webpEnc/Makefile
new file mode 100644
index 0000000000..76912a59d4
--- /dev/null
+++ b/codec/L2/demos/webpEnc/Makefile
@@ -0,0 +1,319 @@
+# Copyright 2019-2021 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.5
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
+	$(ECHO) "      Command to build xclbin application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
+	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= x86
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+#PLATFORM := xilinx_u200_gen3x16_xdma_1_202110_1
+PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1
+PLATFORM_NEW := xilinx_u200_gen3x16_xdma_base_2
+#PLATFORM := xilinx_u200_gen3x16_xdma_base_2
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  u200
+PLATFORM_BLOCKLIST +=  zc
+
+GCC_INTOOL := 8.3.0
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# get global setting
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += -I$(CUR_DIR)/src/ -fmessage-length=0 --sysroot=$(SYSROOT)  -I$(SYSROOT)/usr/include/xrt -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(SYSROOT)/usr/lib -L$(XILINX_VITIS_AIETOOLS)/lib/aarch64.o -Wl,--as-needed -lxilinxopencl -lxrt_coreutil 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+########################## Setting up Host Variables ##########################
+ifeq ($(TARGET),sw_emu)
+CXXFLAGS += -D SW_EMU_TEST
+endif
+ifeq ($(TARGET),hw_emu)
+CXXFLAGS += -D HW_EMU_TEST
+endif
+
+ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
+#Inclue Required Host Source Files
+HOST_SRCS += $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/alpha.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/buffer.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/frame.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/idec.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/io.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/quant.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/tree.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/vp8.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/vp8l.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/webp.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/demux/anim_decode.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/demux/demux.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/alpha_processing.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/alpha_processing_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse41.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/cpu.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/dec.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/dec_clip_tables.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/dec_mips32.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/dec_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/dec_neon.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/dec_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/dec_sse41.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/filters.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/filters_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/filters_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless_neon.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/rescaler.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/rescaler_mips32.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/rescaler_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/rescaler_neon.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/rescaler_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/upsampling.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/upsampling_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/upsampling_neon.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/upsampling_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/yuv.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/yuv_mips32.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/argb.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/argb_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/argb_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/yuv_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/yuv_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/cost.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/cost_mips32.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/cost_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/cost_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/enc.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/enc_avx2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/enc_mips32.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/enc_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/enc_neon.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/enc_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/enc_sse41.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless_enc.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips32.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips_dsp_r2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless_enc_neon.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse2.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse41.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/alpha.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/analysis.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/backward_references.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/config.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/cost.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/delta_palettization.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/filter.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/frame.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/histogram.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/iterator.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/near_lossless.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/picture.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/picture_csp.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/picture_psnr.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/picture_rescale.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/picture_tools.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/quant.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/syntax.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/token.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/tree.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/vp8l.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/webpenc.c $(XFLIB_DIR)/L2/demos/webpEnc/host/jpegdec.c $(XFLIB_DIR)/L2/demos/webpEnc/host/metadata.c $(XFLIB_DIR)/L2/demos/webpEnc/host/pngdec.c $(XFLIB_DIR)/L2/demos/webpEnc/host/webpdec.c $(XFLIB_DIR)/L2/demos/webpEnc/host/example_util.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/mux/anim_encode.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/mux/muxedit.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/mux/muxinternal.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/mux/muxread.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/bit_reader.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/color_cache.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/filters.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/huffman.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/quant_levels_dec.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/random.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/rescaler.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/thread.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/utils.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/bit_writer.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/huffman_encode.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/quant_levels.c $(XFLIB_DIR)/L2/demos/webpEnc/host/cwebp.c $(XFLIB_DIR)/L2/demos/webpEnc/host/wicdec.c $(XFLIB_DIR)/L2/demos/webpEnc/host/create_kernel.c $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/kernel/oclHelper.cpp $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/kernel/oclErrorCodes.cpp $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/profiling.c 
+CXXFLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/webpEnc/ -I $(XFLIB_DIR)/L2/demos/webpEnc/host/ -I $(XFLIB_DIR)/L2/demos/webpEnc/kernel/ -I $(XFLIB_DIR)/L2/demos/webpEnc/host/src/ -I $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dec/ -I $(XFLIB_DIR)/L2/demos/webpEnc/host/src/dsp/ -I $(XFLIB_DIR)/L2/demos/webpEnc/host/src/enc/ -I $(XFLIB_DIR)/L2/demos/webpEnc/host/src/mux/ -I $(XFLIB_DIR)/L2/demos/webpEnc/host/src/webp/ -I $(XFLIB_DIR)/L2/demos/webpEnc/host/src/utils/
+CXXFLAGS += -O3 -fpermissive -DWEBP_NBINSTANCES=1 -DWEBPDSA=\"$(PLATFORM_NEW)\" -DHAVE_MALLOC_H -DHAVE_PTHREAD -DWEBP_HAVE_PNG -DWEBP_HAVE_TIFF -DWEBP_USE_THREAD -DWEBP_HAVE_GIF -DFPGA_DEVICE 
+LDFLAGS +=  -lrt -Wno-narrowing -DVERBOSE -L$(XILINX_VITIS)/lib/lnx64.o -lxilinxopencl -pthread -lpng
+
+EXE_NAME := cwebp
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
+
+HOST_ARGS :=  -xclbin $(BUILD_DIR)/kernel.xclbin list.rst -use_ocl -q 80 -o images
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u200.cfg
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/webpEnc -I $(XFLIB_DIR)/L2/demos/webpEnc/kernel
+
+else 
+VPP_FLAGS +=  -I $(XFLIB_DIR)/../utils/L1/include/ -I $(XFLIB_DIR)/L2/include/hw/webpEnc -I $(XFLIB_DIR)/L2/demos/webpEnc/kernel
+
+endif
+
+######################### binary container global settings ##########################
+VPP_FLAGS_webp_IntraPredLoop2_NoOut_1 +=  -D KERNEL_NAME=webp_IntraPredLoop2_NoOut_1
+VPP_FLAGS_webp_IntraPredLoop2_NoOut_1 += --hls.clock 250000000:webp_IntraPredLoop2_NoOut_1
+VPP_FLAGS_webp_2_ArithmeticCoding_1 +=  -D KERNEL_NAME=webp_2_ArithmeticCoding_1
+VPP_FLAGS_webp_2_ArithmeticCoding_1 += --hls.clock 250000000:webp_2_ArithmeticCoding_1
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_kernel += --clock.defaultFreqHz 250000000
+else
+VPP_LDFLAGS_kernel += --kernel_frequency 250
+endif
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS += $(BUILD_DIR)/kernel.xclbin
+else
+BINARY_CONTAINERS += $(BUILD_DIR)/kernel_pkg.$(LINK_TARGET_FMT)
+BINARY_CONTAINERS_PKG += $(BUILD_DIR)/kernel.xclbin
+endif
+
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+$(TEMP_DIR)/webp_IntraPredLoop2_NoOut_1.xo: $(XFLIB_DIR)/L2/demos/webpEnc/kernel/vp8_hls_pred.cpp 
+	$(ECHO) "Compiling Kernel: webp_IntraPredLoop2_NoOut_1"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_webp_IntraPredLoop2_NoOut_1) $(VPP_FLAGS) -k webp_IntraPredLoop2_NoOut_1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+BINARY_CONTAINER_kernel_OBJS += $(TEMP_DIR)/webp_IntraPredLoop2_NoOut_1.xo
+$(TEMP_DIR)/webp_2_ArithmeticCoding_1.xo: $(XFLIB_DIR)/L2/demos/webpEnc/kernel/vp8_hls_ac.cpp 
+	$(ECHO) "Compiling Kernel: webp_2_ArithmeticCoding_1"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_webp_2_ArithmeticCoding_1) $(VPP_FLAGS) -k webp_2_ArithmeticCoding_1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+BINARY_CONTAINER_kernel_OBJS += $(TEMP_DIR)/webp_2_ArithmeticCoding_1.xo
+BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_OBJS)
+$(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
+	mkdir -p $(BUILD_DIR)
+	$(VPP) -l $(VPP_FLAGS) --temp_dir $(TEMP_DIR) --report_dir $(BUILD_REPORT_DIR)/kernel $(VPP_LDFLAGS)  $(VPP_LDFLAGS_kernel) $(AIE_LDFLAGS)   -o $@ $^
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_xrt
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+else
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_sysroot
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+############################## Preparing sdcard folder ##############################
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE := $(SYSROOT)/../../uImage
+else
+K_IMAGE := $(SYSROOT)/../../Image
+endif
+RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
+$(RUN_SCRIPT):
+	rm -rf $(RUN_SCRIPT)
+	@echo 'export LD_LIBRARY_PATH=/mnt:/tmp:$(LIBRARY_PATH)' >> $(RUN_SCRIPT)
+ifneq ($(filter sw_emu hw_emu, $(TARGET)),)
+	@echo 'export XCL_EMULATION_MODE=$(TARGET)' >> $(RUN_SCRIPT)
+endif
+	@echo 'export XILINX_VITIS=/mnt' >> $(RUN_SCRIPT)
+	@echo 'export XILINX_XRT=/usr' >> $(RUN_SCRIPT)
+	@echo 'if [ -f platform_desc.txt  ]; then' >> $(RUN_SCRIPT)
+	@echo '        cp platform_desc.txt /etc/xocl.txt' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo './$(EXE_NAME) $(PKG_HOST_ARGS)' >> $(RUN_SCRIPT)
+	@echo 'return_code=$$?' >> $(RUN_SCRIPT)
+	@echo 'if [ $$return_code -ne 0 ]; then' >> $(RUN_SCRIPT)
+	@echo '        echo "ERROR: Embedded host run failed, RC=$$return_code"' >> $(RUN_SCRIPT)
+	@echo 'else' >> $(RUN_SCRIPT)
+	@echo '        echo "INFO: TEST PASSED, RC=0"' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo 'echo "INFO: Embedded host run completed."' >> $(RUN_SCRIPT)
+	@echo 'exit $$return_code' >> $(RUN_SCRIPT)
+DATA_FILE := 
+DATA_DIR := $(CUR_DIR)/images 
+SD_FILES += $(RUN_SCRIPT)
+SD_FILES += $(EXE_FILE)
+SD_FILES += $(EMCONFIG)
+SD_FILES += xrt.ini
+SD_FILES += $(DATA_FILE)# where define DATAFILE in json
+SD_FILES_WITH_PREFIX = $(foreach sd_file,$(SD_FILES), $(if $(filter $(sd_file),$(wildcard $(sd_file))), --package.sd_file $(sd_file)))
+SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
+PACKAGE_FILES := $(BINARY_CONTAINERS)
+PACKAGE_FILES += $(AIE_CONTAINER)
+SD_CARD := $(CUR_DIR)/package_$(TARGET)
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+	@echo "Generating sd_card folder...."
+	mkdir -p $(SD_CARD)
+	chmod a+rx $(BUILD_DIR)/run_script.sh
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+
+.PHONY: sd_card
+sd_card: $(SD_CARD)
+endif
+############################## Setting Essential Checks and Building Rules ##############################
+RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
+RUN_DEPS += $(SD_CARD)
+run: check_device  $(RUN_DEPS)
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS) 
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#hw
+ifeq ($(TARGET), hw)
+ifeq ($(HOST_ARCH), x86)
+	$(EXE_FILE) $(HOST_ARGS)
+	
+else
+	$(ECHO) "Please copy the content of sd_card folder and data to an SD Card and run on the board"
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: all clean cleanall emconfig
+emconfig: $(EMCONFIG)
+ifeq ($(HOST_ARCH), x86)
+all:  check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
+else
+all:  check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
+endif
+
+.PHONY: host
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+
+.PHONY: xclbin
+ifeq ($(HOST_ARCH), x86)
+xclbin:  check_vpp check_xrt $(BINARY_CONTAINERS) 
+else
+xclbin:  check_vpp check_sysroot $(BINARY_CONTAINERS) 
+endif
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
+	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
+	-$(RMDIR) _x_temp.* 
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+
+clean: cleanh
\ No newline at end of file
diff --git a/codec/L2/demos/webpEnc/README.md b/codec/L2/demos/webpEnc/README.md
new file mode 100644
index 0000000000..b4a2fe619a
--- /dev/null
+++ b/codec/L2/demos/webpEnc/README.md
@@ -0,0 +1,131 @@
+Webp Encoder
+============
+
+Webp encoder demo resides in ``L2/demo/webpEnc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Executable Usage
+----------------
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in [here](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#building). For getting the design,
+
+```
+   cd L2/demo/webpEnc
+```   
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+```
+   make run TARGET=hw DEVICE=xilinx_u200_xdma_201830_2
+```   
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+```
+   ./build_dir.hw.xilinx_u200_xdma_201830_2/cwebp list.rst -use_ocl -q 80 -o output
+```   
+
+Webp Input Arguments:
+
+```
+   Usage: cwebp -[-use_ocl -q -o]
+          list.rst:     the input list
+          -use_ocl:     should be kept
+          -q:           compression quality
+          -o:           output directory
+```          
+
+Note: Default arguments are set in Makefile, you can use other [pictures](https://github.com/Xilinx/Vitis_Libraries/tree/master/codec/L2/demos#pictures) listed in the table.
+
+* **Example output(Step 4)** 
+
+```
+   INFO: CreateKernel start.
+   INFO: Number of Platforms: 1
+   INFO: Selected Platform: Xilinx
+   INFO: Number of devices for platform 0: 1
+   INFO: target_device found:   xilinx_u200_xdma_201830_2
+   INFO: target_device chosen:  xilinx_u200_xdma_201830_2
+   NFO: OpenCL Version: 1.-48
+   INFO: Loading kernel.xclbin
+   INFO: Loading kernel.xclbin Finished
+
+   ...
+
+   *** Picture: 1 - 1,  Buffer: 0, Instance: 0, Event: 0 ***
+   INFO: Host2Device finished. Computation time is 0.480000 (ms)
+   INFO: PredKernel Finished. Computation time is 0.042000 (ms)
+   INFO: ACKernel Finished. Computation time is 0.012000 (ms)
+   INFO: Device2Host finished. Computation time is 0.005000 (ms)
+   INFO: Loop of Pictures Finished. Computation time is 16.500000 (ms)
+   INFO: VP8EncTokenLoopAsync Finished. Computation time is 22.676000 (ms)
+   INFO: WebPEncodeAsync Finished. Computation time is 47.519000 (ms)
+   INFO: Release Kernel.
+```   
+
+Profiling
+---------
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+
+Table 1 Hardware resources for webp kernels
+
+|    Kernel    |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz) |
+|--------------|----------|----------|----------|----------|---------|----------------|
+|    Kernel1   |    72    |    10    |   410    |   56498  |  48301  |      250       |
+|    Kernel2   |    11    |    0     |    5     |   23073  |  16375  |      250       |
+
+
+* One instance achieves about 6~14 times acceleration. Here are some examples:
+
+
+##### Table 2 Performance of Wepb Encoder for FPGA 
+
+|   Kernel   | Width (pix) | Height (pix) | -q |  latency (ms) |  Throughput B(MB/s) | Throughput P(MB/s) | FPs (fps) |
+|------------|-------------|--------------|----|---------------|---------------------|--------------------|-----------|
+|   Kernel1  |    1920     |     1080     | 80 |     21.18     |       146.83        |       97.88        |   47.20   |
+|   Kernel2  |    1920     |     1080     | 80 |     14.57     |       213.54        |       142.36       |   68.65   |
+|   Kernel1  |    512      |     512      | 80 |     3.22      |       122.03        |       81.35        |   310.33  |
+|   Kernel2  |    512      |     512      | 80 |     2.92      |       134.65        |       89.77        |   342.43  |
+|   Kernel1  |    1920     |     1080     | 90 |     21.03     |       147.87        |       98.58        |   47.54   |
+|   Kernel2  |    1920     |     1080     | 90 |     15.92     |       195.43        |       130.29       |   62.83   |
+|   Kernel1  |    512      |     512      | 90 |     4.73      |       83.12         |       55.41        |   211.39  |
+|   Kernel2  |    512      |     512      | 90 |     4.93      |       79.73         |       53.16        |   202.78  |
+
+
+* Platform: FPGA U200, CPU details are listd belowd (single thread)
+
+##### Note
+```
+    | 1. Kernels running on platform with Intel(R) Xeon(R) CPU E5-2603 v3 @ 1.60GHz, 48 Threads.
+    | 2. time unit: ms.
+    | 3. "-" Indicates that the result could not be obtained due to insufficient memory.
+    | 4. FPGA time is the kernel runtime by adding data transfer and executed with webp encoder.
+```    
+
+## License
+
+Licensed using the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
+
+    Copyright 2022 Xilinx, Inc.
+    
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+    
+        http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+    Copyright 2022 Xilinx, Inc.
+
diff --git a/codec/L2/demos/webpEnc/conn_u200.cfg b/codec/L2/demos/webpEnc/conn_u200.cfg
new file mode 100755
index 0000000000..a6f7fdcc17
--- /dev/null
+++ b/codec/L2/demos/webpEnc/conn_u200.cfg
@@ -0,0 +1,16 @@
+[connectivity]
+sp=webp_IntraPredLoop2_NoOut_1_1.m_axi_gmem0:DDR[3]
+sp=webp_IntraPredLoop2_NoOut_1_1.m_axi_gmem1:DDR[3]
+sp=webp_IntraPredLoop2_NoOut_1_1.m_axi_gmem2:DDR[3]
+sp=webp_2_ArithmeticCoding_1_1.m_axi_gmem0:DDR[3]
+sp=webp_2_ArithmeticCoding_1_1.m_axi_gmem1:DDR[3]
+sp=webp_2_ArithmeticCoding_1_1.m_axi_gmem2:DDR[3]
+
+[vivado]
+prop=run.impl_1.STEPS.OPT_DESIGN.ARGS.DIRECTIVE=Explore
+prop=run.impl_1.STEPS.PLACE_DESIGN.ARGS.DIRECTIVE=ExtraNetDelay_low
+prop=run.impl_1.STEPS.PHYS_OPT_DESIGN.IS_ENABLED=true
+prop=run.impl_1.STEPS.PHYS_OPT_DESIGN.ARGS.DIRECTIVE=AggressiveExplore
+prop=run.impl_1.STEPS.ROUTE_DESIGN.ARGS.DIRECTIVE=NoTimingRelaxation
+prop=run.impl_1.{STEPS.ROUTE_DESIGN.ARGS.MORE OPTIONS}={-tns_cleanup}
+prop=run.impl_1.STEPS.POST_ROUTE_PHYS_OPT_DESIGN.IS_ENABLED=true
diff --git a/codec/L2/demos/webpEnc/description.json b/codec/L2/demos/webpEnc/description.json
new file mode 100644
index 0000000000..55a6e7bd7a
--- /dev/null
+++ b/codec/L2/demos/webpEnc/description.json
@@ -0,0 +1,267 @@
+{
+    "gui": false,
+    "name": "Xilinx WebP Demo",
+    "description": "",
+    "flow": "vitis",
+    "platform_allowlist": [
+        "u200"
+    ],
+    "platform_blocklist": [
+        "zc"
+    ],
+    "platform_properties": {
+        "u200": {
+            "v++": {
+                "compiler": {
+                    "clflags": [
+                        "--config PROJECT/conn_u200.cfg"
+                    ]
+                }
+            }
+        }
+    },
+    "data": [
+        "./images"
+    ],
+    "launch": [
+        {
+            "cmd_args": " -xclbin BUILD/kernel.xclbin list.rst -use_ocl -q 80 -o images",
+            "name": "generic launch for all flows"
+        }
+    ],
+    "host": {
+        "host_exe": "cwebp",
+        "compiler": {
+            "sources": [
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/alpha.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/buffer.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/frame.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/idec.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/io.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/quant.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/tree.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/vp8.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/vp8l.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/webp.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/demux/anim_decode.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/demux/demux.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/alpha_processing.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/alpha_processing_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse41.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/cpu.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/dec.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/dec_clip_tables.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/dec_mips32.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/dec_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/dec_neon.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/dec_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/dec_sse41.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/filters.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/filters_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/filters_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless_neon.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/rescaler.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/rescaler_mips32.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/rescaler_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/rescaler_neon.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/rescaler_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/upsampling.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/upsampling_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/upsampling_neon.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/upsampling_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/yuv.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/yuv_mips32.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/argb.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/argb_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/argb_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/yuv_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/yuv_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/cost.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/cost_mips32.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/cost_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/cost_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/enc.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/enc_avx2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/enc_mips32.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/enc_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/enc_neon.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/enc_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/enc_sse41.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless_enc.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips32.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips_dsp_r2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless_enc_neon.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse2.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse41.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/alpha.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/analysis.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/backward_references.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/config.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/cost.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/delta_palettization.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/filter.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/frame.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/histogram.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/iterator.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/near_lossless.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/picture.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/picture_csp.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/picture_psnr.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/picture_rescale.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/picture_tools.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/quant.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/syntax.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/token.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/tree.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/vp8l.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/webpenc.c",
+                "LIB_DIR/L2/demos/webpEnc/host/jpegdec.c",
+                "LIB_DIR/L2/demos/webpEnc/host/metadata.c",
+                "LIB_DIR/L2/demos/webpEnc/host/pngdec.c",
+                "LIB_DIR/L2/demos/webpEnc/host/webpdec.c",
+                "LIB_DIR/L2/demos/webpEnc/host/example_util.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/mux/anim_encode.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/mux/muxedit.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/mux/muxinternal.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/mux/muxread.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/bit_reader.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/color_cache.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/filters.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/huffman.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/quant_levels_dec.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/random.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/rescaler.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/thread.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/utils.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/bit_writer.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/huffman_encode.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/quant_levels.c",
+                "LIB_DIR/L2/demos/webpEnc/host/cwebp.c",
+                "LIB_DIR/L2/demos/webpEnc/host/wicdec.c",
+                "LIB_DIR/L2/demos/webpEnc/host/create_kernel.c",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/kernel/oclHelper.cpp",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/kernel/oclErrorCodes.cpp",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/profiling.c"
+            ],
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/L2/include/hw/webpEnc/",
+                "LIB_DIR/L2/demos/webpEnc/host/",
+                "LIB_DIR/L2/demos/webpEnc/kernel/",
+                "LIB_DIR/L2/demos/webpEnc/host/src/",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dec/",
+                "LIB_DIR/L2/demos/webpEnc/host/src/dsp/",
+                "LIB_DIR/L2/demos/webpEnc/host/src/enc/",
+                "LIB_DIR/L2/demos/webpEnc/host/src/mux/",
+                "LIB_DIR/L2/demos/webpEnc/host/src/webp/",
+                "LIB_DIR/L2/demos/webpEnc/host/src/utils/"
+            ],
+            "options": "-O3 -fpermissive -DWEBP_NBINSTANCES=1 -DWEBPDSA=\"$(DEVICE)\" -DHAVE_MALLOC_H -DHAVE_PTHREAD -DWEBP_HAVE_PNG -DWEBP_HAVE_TIFF -DWEBP_USE_THREAD -DWEBP_HAVE_GIF -DFPGA_DEVICE "
+        },
+        "linker": {
+            "options": " -lrt -Wno-narrowing -DVERBOSE -L$(XILINX_VITIS)/lib/lnx64.o -lxilinxopencl -pthread -lpng"
+        }
+    },
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                "LIB_DIR/../utils/L1/include/",
+                "LIB_DIR/L2/include/hw/webpEnc",
+                "LIB_DIR/L2/demos/webpEnc/kernel"
+            ]
+        }
+    },
+    "containers": [
+        {
+            "name": "kernel",
+            "accelerators": [
+                {
+                    "location": "LIB_DIR/L2/demos/webpEnc/kernel/vp8_hls_pred.cpp",
+                    "frequency": 250.0,
+                    "clflags": " -D KERNEL_NAME=webp_kernel_IntraPredLoop2_NoOut_1",
+                    "name": "webp_kernel_IntraPredLoop2_NoOut_1",
+                    "num_compute_units": 1,
+                    "compute_units": [
+                        {
+                            "name": "webp_kernel_IntraPredLoop2_NoOut_1",
+                            "arguments": [
+                                {
+                                    "name": "m_axi_gmem0",
+                                    "memory": "DDR[3]"
+                                },
+                                {
+                                    "name": "m_axi_gmem1",
+                                    "memory": "DDR[3]"
+                                },
+                                {
+                                    "name": "m_axi_gmem2",
+                                    "memory": "DDR[3]"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "location": "LIB_DIR/L2/demos/webpEnc/kernel/vp8_hls_ac.cpp",
+                    "frequency": 250.0,
+                    "clflags": " -D KERNEL_NAME=webp_kernel_2_ArithmeticCoding_1",
+                    "name": "webp_kernel_2_ArithmeticCoding_1",
+                    "num_compute_units": 1,
+                    "compute_units": [
+                        {
+                            "name": "webp_kernel_2_ArithmeticCoding_1",
+                            "arguments": [
+                                {
+                                    "name": "m_axi_gmem0",
+                                    "memory": "DDR[3]"
+                                },
+                                {
+                                    "name": "m_axi_gmem1",
+                                    "memory": "DDR[3]"
+                                },
+                                {
+                                    "name": "m_axi_gmem2",
+                                    "memory": "DDR[3]"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "frequency": 250
+        }
+    ],
+    "testinfo": {
+        "disable": false,
+        "jobs": [
+            {
+                "index": 0,
+                "dependency": [],
+                "env": "",
+                "cmd": "",
+                "max_memory_MB": {
+                    "vitis_hw_build": 40960,
+                    "vitis_hw_emu": 28672,
+                    "vitis_sw_emu": 10240,
+                    "vitis_hw_run": 10240
+                },
+                "max_time_min": {
+                    "vitis_hw_build": 3600,
+                    "vitis_hw_emu": 300,
+                    "vitis_sw_emu": 60,
+                    "vitis_hw_run": 10
+                }
+            }
+        ],
+        "targets": [
+            "vitis_sw_emu",
+            "vitis_hw_emu",
+            "vitis_hw"
+        ],
+        "category": "canary"
+    }
+}
\ No newline at end of file
diff --git a/codec/L2/demos/webpEnc/host/Android.mk b/codec/L2/demos/webpEnc/host/Android.mk
new file mode 100644
index 0000000000..5489f03bc2
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/Android.mk
@@ -0,0 +1,71 @@
+LOCAL_PATH := $(call my-dir)
+
+################################################################################
+# libexample_util
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+    example_util.c \
+
+LOCAL_CFLAGS := $(WEBP_CFLAGS)
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
+
+LOCAL_MODULE := example_util
+
+include $(BUILD_STATIC_LIBRARY)
+
+################################################################################
+# cwebp
+
+include $(CLEAR_VARS)
+
+# Note: to enable jpeg/png encoding the sources from AOSP can be used with
+# minor modification to their Android.mk files.
+LOCAL_SRC_FILES := \
+    cwebp.c \
+    jpegdec.c \
+    metadata.c \
+    pngdec.c \
+    tiffdec.c \
+    webpdec.c \
+
+LOCAL_CFLAGS := $(WEBP_CFLAGS)
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
+LOCAL_STATIC_LIBRARIES := example_util webp
+
+LOCAL_MODULE := cwebp
+
+include $(BUILD_EXECUTABLE)
+
+################################################################################
+# dwebp
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+    dwebp.c \
+
+LOCAL_CFLAGS := $(WEBP_CFLAGS)
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
+LOCAL_STATIC_LIBRARIES := example_util webp
+
+LOCAL_MODULE := dwebp
+
+include $(BUILD_EXECUTABLE)
+
+################################################################################
+# webpmux
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+    webpmux.c \
+
+LOCAL_CFLAGS := $(WEBP_CFLAGS)
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
+LOCAL_STATIC_LIBRARIES := example_util webpmux webp
+
+LOCAL_MODULE := webpmux_example
+
+include $(BUILD_EXECUTABLE)
diff --git a/codec/L2/demos/webpEnc/host/Makefile.am b/codec/L2/demos/webpEnc/host/Makefile.am
new file mode 100644
index 0000000000..81970ce1e2
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/Makefile.am
@@ -0,0 +1,66 @@
+AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
+
+bin_PROGRAMS = dwebp cwebp
+if BUILD_VWEBP
+  bin_PROGRAMS += vwebp
+endif
+if WANT_MUX
+  bin_PROGRAMS += webpmux
+endif
+
+if BUILD_GIF2WEBP
+  bin_PROGRAMS += gif2webp
+endif
+
+noinst_LTLIBRARIES = libexampleutil.la
+
+libexampleutil_la_SOURCES = example_util.c example_util.h stopwatch.h
+
+if BUILD_ANIMDIFF
+  noinst_PROGRAMS = anim_diff
+endif
+
+anim_diff_SOURCES = anim_diff.c anim_util.c anim_util.h
+anim_diff_CPPFLAGS = $(AM_CPPFLAGS) $(GIF_INCLUDES)
+anim_diff_LDADD  = ../src/demux/libwebpdemux.la
+anim_diff_LDADD += libexampleutil.la
+anim_diff_LDADD += $(GIF_LIBS) -lm
+
+dwebp_SOURCES = dwebp.c stopwatch.h
+dwebp_CPPFLAGS  = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
+dwebp_CPPFLAGS += $(JPEG_INCLUDES) $(PNG_INCLUDES)
+dwebp_LDADD = libexampleutil.la $(PNG_LIBS) $(JPEG_LIBS)
+
+cwebp_SOURCES  = cwebp.c metadata.c metadata.h stopwatch.h
+cwebp_SOURCES += jpegdec.c jpegdec.h
+cwebp_SOURCES += pngdec.c pngdec.h
+cwebp_SOURCES += tiffdec.c tiffdec.h
+cwebp_SOURCES += webpdec.c webpdec.h
+cwebp_SOURCES += wicdec.c wicdec.h
+cwebp_CPPFLAGS  = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
+cwebp_CPPFLAGS += $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
+cwebp_LDADD  = libexampleutil.la ../src/libwebp.la
+cwebp_LDADD += $(JPEG_LIBS) $(PNG_LIBS) $(TIFF_LIBS)
+
+gif2webp_SOURCES = gif2webp.c gifdec.c gifdec.h
+gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
+gif2webp_LDADD  = libexampleutil.la ../src/mux/libwebpmux.la ../src/libwebp.la
+gif2webp_LDADD += $(GIF_LIBS)
+
+webpmux_SOURCES = webpmux.c
+webpmux_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
+webpmux_LDADD = libexampleutil.la ../src/mux/libwebpmux.la ../src/libwebp.la
+
+vwebp_SOURCES = vwebp.c
+vwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GL_INCLUDES)
+vwebp_LDADD = libexampleutil.la ../src/demux/libwebpdemux.la $(GL_LIBS)
+
+if BUILD_LIBWEBPDECODER
+  anim_diff_LDADD += ../src/libwebpdecoder.la
+  dwebp_LDADD += ../src/libwebpdecoder.la
+  vwebp_LDADD += ../src/libwebpdecoder.la
+else
+  anim_diff_LDADD += ../src/libwebp.la
+  dwebp_LDADD += ../src/libwebp.la
+  vwebp_LDADD += ../src/libwebp.la
+endif
diff --git a/codec/L2/demos/webpEnc/host/anim_diff.c b/codec/L2/demos/webpEnc/host/anim_diff.c
new file mode 100644
index 0000000000..d44ae779df
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/anim_diff.c
@@ -0,0 +1,210 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Checks if given pair of animated GIF/WebP images are identical:
+// That is: their reconstructed canvases match pixel-by-pixel and their other
+// animation properties (loop count etc) also match.
+//
+// example: anim_diff foo.gif bar.webp
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h> // for 'strtod'.
+#include <string.h> // for 'strcmp'.
+
+#include "./anim_util.h"
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#define snprintf _snprintf
+#endif
+
+// Returns true if 'a + b' will overflow.
+static int AdditionWillOverflow(int a, int b) {
+    return (b > 0) && (a > INT_MAX - b);
+}
+
+// Minimize number of frames by combining successive frames that have exact same
+// ARGB data into a single longer duration frame.
+static void MinimizeAnimationFrames(AnimatedImage* const img) {
+    uint32_t i;
+    for (i = 1; i < img->num_frames; ++i) {
+        DecodedFrame* const frame1 = &img->frames[i - 1];
+        DecodedFrame* const frame2 = &img->frames[i];
+        const uint8_t* const rgba1 = frame1->rgba;
+        const uint8_t* const rgba2 = frame2->rgba;
+        // If merging frames will result in integer overflow for 'duration',
+        // skip merging.
+        if (AdditionWillOverflow(frame1->duration, frame2->duration)) continue;
+        if (!memcmp(rgba1, rgba2, img->canvas_width * 4 * img->canvas_height)) {
+            // Merge 'i+1'th frame into 'i'th frame.
+            frame1->duration += frame2->duration;
+            if (i + 1 < img->num_frames) {
+                memmove(&img->frames[i], &img->frames[i + 1], (img->num_frames - i - 1) * sizeof(*img->frames));
+            }
+            --img->num_frames;
+            --i;
+        }
+    }
+}
+
+static int CompareValues(uint32_t a, uint32_t b, const char* output_str) {
+    if (a != b) {
+        fprintf(stderr, "%s: %d vs %d\n", output_str, a, b);
+        return 0;
+    }
+    return 1;
+}
+
+// Note: As long as frame durations and reconstructed frames are identical, it
+// is OK for other aspects like offsets, dispose/blend method to vary.
+static int CompareAnimatedImagePair(const AnimatedImage* const img1,
+                                    const AnimatedImage* const img2,
+                                    int premultiply,
+                                    double min_psnr) {
+    int ok = 1;
+    const int is_multi_frame_image = (img1->num_frames > 1);
+    uint32_t i;
+
+    ok = CompareValues(img1->canvas_width, img2->canvas_width, "Canvas width mismatch") && ok;
+    ok = CompareValues(img1->canvas_height, img2->canvas_height, "Canvas height mismatch") && ok;
+    ok = CompareValues(img1->num_frames, img2->num_frames, "Frame count mismatch") && ok;
+    if (!ok) return 0; // These are fatal failures, can't proceed.
+
+    if (is_multi_frame_image) { // Checks relevant for multi-frame images only.
+        ok = CompareValues(img1->loop_count, img2->loop_count, "Loop count mismatch") && ok;
+        ok = CompareValues(img1->bgcolor, img2->bgcolor, "Background color mismatch") && ok;
+    }
+
+    for (i = 0; i < img1->num_frames; ++i) {
+        // Pixel-by-pixel comparison.
+        const uint8_t* const rgba1 = img1->frames[i].rgba;
+        const uint8_t* const rgba2 = img2->frames[i].rgba;
+        int max_diff;
+        double psnr;
+        if (is_multi_frame_image) { // Check relevant for multi-frame images only.
+            const char format[] = "Frame #%d, duration mismatch";
+            char tmp[sizeof(format) + 8];
+            ok = ok && (snprintf(tmp, sizeof(tmp), format, i) >= 0);
+            ok = ok && CompareValues(img1->frames[i].duration, img2->frames[i].duration, tmp);
+        }
+        GetDiffAndPSNR(rgba1, rgba2, img1->canvas_width, img1->canvas_height, premultiply, &max_diff, &psnr);
+        if (min_psnr > 0.) {
+            if (psnr < min_psnr) {
+                fprintf(stderr, "Frame #%d, psnr = %.2lf (min_psnr = %f)\n", i, psnr, min_psnr);
+                ok = 0;
+            }
+        } else {
+            if (max_diff != 0) {
+                fprintf(stderr, "Frame #%d, max pixel diff: %d\n", i, max_diff);
+                ok = 0;
+            }
+        }
+    }
+    return ok;
+}
+
+static void Help(void) {
+    printf("Usage: anim_diff <image1> <image2> [options]\n");
+    printf("\nOptions:\n");
+    printf("  -dump_frames <folder> dump decoded frames in PAM format\n");
+    printf("  -min_psnr <float> ... minimum per-frame PSNR\n");
+    printf("  -raw_comparison ..... if this flag is not used, RGB is\n");
+    printf("                        premultiplied before comparison\n");
+}
+
+int main(int argc, const char* argv[]) {
+    int return_code = -1;
+    int dump_frames = 0;
+    const char* dump_folder = NULL;
+    double min_psnr = 0.;
+    int got_input1 = 0;
+    int got_input2 = 0;
+    int premultiply = 1;
+    int i, c;
+    const char* files[2] = {NULL, NULL};
+    AnimatedImage images[2];
+
+    if (argc < 3) {
+        Help();
+        return -1;
+    }
+
+    for (c = 1; c < argc; ++c) {
+        int parse_error = 0;
+        if (!strcmp(argv[c], "-dump_frames")) {
+            if (c < argc - 1) {
+                dump_frames = 1;
+                dump_folder = argv[++c];
+            } else {
+                parse_error = 1;
+            }
+        } else if (!strcmp(argv[c], "-min_psnr")) {
+            if (c < argc - 1) {
+                const char* const v = argv[++c];
+                char* end = NULL;
+                const double d = strtod(v, &end);
+                if (end == v) {
+                    parse_error = 1;
+                    fprintf(stderr, "Error! '%s' is not a floating point number.\n", v);
+                }
+                min_psnr = d;
+            } else {
+                parse_error = 1;
+            }
+        } else if (!strcmp(argv[c], "-raw_comparison")) {
+            premultiply = 0;
+        } else {
+            if (!got_input1) {
+                files[0] = argv[c];
+                got_input1 = 1;
+            } else if (!got_input2) {
+                files[1] = argv[c];
+                got_input2 = 1;
+            } else {
+                parse_error = 1;
+            }
+        }
+        if (parse_error) {
+            Help();
+            return -1;
+        }
+    }
+    if (!got_input2) {
+        Help();
+        return -1;
+    }
+
+    if (dump_frames) {
+        printf("Dumping decoded frames in: %s\n", dump_folder);
+    }
+
+    memset(images, 0, sizeof(images));
+    for (i = 0; i < 2; ++i) {
+        printf("Decoding file: %s\n", files[i]);
+        if (!ReadAnimatedImage(files[i], &images[i], dump_frames, dump_folder)) {
+            fprintf(stderr, "Error decoding file: %s\n Aborting.\n", files[i]);
+            return_code = -2;
+            goto End;
+        } else {
+            MinimizeAnimationFrames(&images[i]);
+        }
+    }
+
+    if (!CompareAnimatedImagePair(&images[0], &images[1], premultiply, min_psnr)) {
+        fprintf(stderr, "\nFiles %s and %s differ.\n", files[0], files[1]);
+        return_code = -3;
+    } else {
+        printf("\nFiles %s and %s are identical.\n", files[0], files[1]);
+        return_code = 0;
+    }
+End:
+    ClearAnimatedImage(&images[0]);
+    ClearAnimatedImage(&images[1]);
+    return return_code;
+}
diff --git a/codec/L2/demos/webpEnc/host/anim_util.c b/codec/L2/demos/webpEnc/host/anim_util.c
new file mode 100644
index 0000000000..fe5c9f9657
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/anim_util.c
@@ -0,0 +1,721 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for animated images
+
+#include "./anim_util.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifdef WEBP_HAVE_GIF
+#include <gif_lib.h>
+#endif
+#include "webp/format_constants.h"
+#include "webp/decode.h"
+#include "webp/demux.h"
+#include "./example_util.h"
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#define snprintf _snprintf
+#endif
+
+static const int kNumChannels = 4;
+
+// -----------------------------------------------------------------------------
+// Common utilities.
+
+// Returns true if the frame covers the full canvas.
+static int IsFullFrame(int width, int height, int canvas_width, int canvas_height) {
+    return (width == canvas_width && height == canvas_height);
+}
+
+static int AllocateFrames(AnimatedImage* const image, uint32_t num_frames) {
+    uint32_t i;
+    const size_t rgba_size = image->canvas_width * kNumChannels * image->canvas_height;
+    uint8_t* const mem = (uint8_t*)malloc(num_frames * rgba_size * sizeof(*mem));
+    DecodedFrame* const frames = (DecodedFrame*)malloc(num_frames * sizeof(*frames));
+
+    if (mem == NULL || frames == NULL) {
+        free(mem);
+        free(frames);
+        return 0;
+    }
+    free(image->raw_mem);
+    image->num_frames = num_frames;
+    image->frames = frames;
+    for (i = 0; i < num_frames; ++i) {
+        frames[i].rgba = mem + i * rgba_size;
+        frames[i].duration = 0;
+        frames[i].is_key_frame = 0;
+    }
+    image->raw_mem = mem;
+    return 1;
+}
+
+void ClearAnimatedImage(AnimatedImage* const image) {
+    if (image != NULL) {
+        free(image->raw_mem);
+        free(image->frames);
+        image->num_frames = 0;
+        image->frames = NULL;
+        image->raw_mem = NULL;
+    }
+}
+
+// Clear the canvas to transparent.
+static void ZeroFillCanvas(uint8_t* rgba, uint32_t canvas_width, uint32_t canvas_height) {
+    memset(rgba, 0, canvas_width * kNumChannels * canvas_height);
+}
+
+// Clear given frame rectangle to transparent.
+static void ZeroFillFrameRect(uint8_t* rgba, int rgba_stride, int x_offset, int y_offset, int width, int height) {
+    int j;
+    assert(width * kNumChannels <= rgba_stride);
+    rgba += y_offset * rgba_stride + x_offset * kNumChannels;
+    for (j = 0; j < height; ++j) {
+        memset(rgba, 0, width * kNumChannels);
+        rgba += rgba_stride;
+    }
+}
+
+// Copy width * height pixels from 'src' to 'dst'.
+static void CopyCanvas(const uint8_t* src, uint8_t* dst, uint32_t width, uint32_t height) {
+    assert(src != NULL && dst != NULL);
+    memcpy(dst, src, width * kNumChannels * height);
+}
+
+// Copy pixels in the given rectangle from 'src' to 'dst' honoring the 'stride'.
+static void CopyFrameRectangle(
+    const uint8_t* src, uint8_t* dst, int stride, int x_offset, int y_offset, int width, int height) {
+    int j;
+    const int width_in_bytes = width * kNumChannels;
+    const size_t offset = y_offset * stride + x_offset * kNumChannels;
+    assert(width_in_bytes <= stride);
+    src += offset;
+    dst += offset;
+    for (j = 0; j < height; ++j) {
+        memcpy(dst, src, width_in_bytes);
+        src += stride;
+        dst += stride;
+    }
+}
+
+// Canonicalize all transparent pixels to transparent black to aid comparison.
+static void CleanupTransparentPixels(uint32_t* rgba, uint32_t width, uint32_t height) {
+    const uint32_t* const rgba_end = rgba + width * height;
+    while (rgba < rgba_end) {
+        const uint8_t alpha = (*rgba >> 24) & 0xff;
+        if (alpha == 0) {
+            *rgba = 0;
+        }
+        ++rgba;
+    }
+}
+
+// Dump frame to a PAM file. Returns true on success.
+static int DumpFrame(const char filename[],
+                     const char dump_folder[],
+                     uint32_t frame_num,
+                     const uint8_t rgba[],
+                     int canvas_width,
+                     int canvas_height) {
+    int ok = 0;
+    size_t max_len;
+    int y;
+    const char* base_name = NULL;
+    char* file_name = NULL;
+    FILE* f = NULL;
+
+    base_name = strrchr(filename, '/');
+    base_name = (base_name == NULL) ? filename : base_name + 1;
+    max_len = strlen(dump_folder) + 1 + strlen(base_name) + strlen("_frame_") + strlen(".pam") + 8;
+    file_name = (char*)malloc(max_len * sizeof(*file_name));
+    if (file_name == NULL) goto End;
+
+    if (snprintf(file_name, max_len, "%s/%s_frame_%d.pam", dump_folder, base_name, frame_num) < 0) {
+        fprintf(stderr, "Error while generating file name\n");
+        goto End;
+    }
+
+    f = fopen(file_name, "wb");
+    if (f == NULL) {
+        fprintf(stderr, "Error opening file for writing: %s\n", file_name);
+        ok = 0;
+        goto End;
+    }
+    if (fprintf(f,
+                "P7\nWIDTH %d\nHEIGHT %d\n"
+                "DEPTH 4\nMAXVAL 255\nTUPLTYPE RGB_ALPHA\nENDHDR\n",
+                canvas_width, canvas_height) < 0) {
+        fprintf(stderr, "Write error for file %s\n", file_name);
+        goto End;
+    }
+    for (y = 0; y < canvas_height; ++y) {
+        if (fwrite((const char*)(rgba) + y * canvas_width * kNumChannels, canvas_width * kNumChannels, 1, f) != 1) {
+            fprintf(stderr, "Error writing to file: %s\n", file_name);
+            goto End;
+        }
+    }
+    ok = 1;
+End:
+    if (f != NULL) fclose(f);
+    free(file_name);
+    return ok;
+}
+
+// -----------------------------------------------------------------------------
+// WebP Decoding.
+
+// Returns true if this is a valid WebP bitstream.
+static int IsWebP(const WebPData* const webp_data) {
+    return (WebPGetInfo(webp_data->bytes, webp_data->size, NULL, NULL) != 0);
+}
+
+// Read animated WebP bitstream 'file_str' into 'AnimatedImage' struct.
+static int ReadAnimatedWebP(const char filename[],
+                            const WebPData* const webp_data,
+                            AnimatedImage* const image,
+                            int dump_frames,
+                            const char dump_folder[]) {
+    int ok = 0;
+    int dump_ok = 1;
+    uint32_t frame_index = 0;
+    int prev_frame_timestamp = 0;
+    WebPAnimDecoder* dec;
+    WebPAnimInfo anim_info;
+
+    memset(image, 0, sizeof(*image));
+
+    dec = WebPAnimDecoderNew(webp_data, NULL);
+    if (dec == NULL) {
+        fprintf(stderr, "Error parsing image: %s\n", filename);
+        goto End;
+    }
+
+    if (!WebPAnimDecoderGetInfo(dec, &anim_info)) {
+        fprintf(stderr, "Error getting global info about the animation\n");
+        goto End;
+    }
+
+    // Animation properties.
+    image->canvas_width = anim_info.canvas_width;
+    image->canvas_height = anim_info.canvas_height;
+    image->loop_count = anim_info.loop_count;
+    image->bgcolor = anim_info.bgcolor;
+
+    // Allocate frames.
+    if (!AllocateFrames(image, anim_info.frame_count)) return 0;
+
+    // Decode frames.
+    while (WebPAnimDecoderHasMoreFrames(dec)) {
+        DecodedFrame* curr_frame;
+        uint8_t* curr_rgba;
+        uint8_t* frame_rgba;
+        int timestamp;
+
+        if (!WebPAnimDecoderGetNext(dec, &frame_rgba, &timestamp)) {
+            fprintf(stderr, "Error decoding frame #%u\n", frame_index);
+            goto End;
+        }
+        curr_frame = &image->frames[frame_index];
+        curr_rgba = curr_frame->rgba;
+        curr_frame->duration = timestamp - prev_frame_timestamp;
+        curr_frame->is_key_frame = 0; // Unused.
+        memcpy(curr_rgba, frame_rgba, image->canvas_width * kNumChannels * image->canvas_height);
+
+        // Needed only because we may want to compare with GIF later.
+        CleanupTransparentPixels((uint32_t*)curr_rgba, image->canvas_width, image->canvas_height);
+
+        if (dump_frames && dump_ok) {
+            dump_ok =
+                DumpFrame(filename, dump_folder, frame_index, curr_rgba, image->canvas_width, image->canvas_height);
+            if (!dump_ok) { // Print error once, but continue decode loop.
+                fprintf(stderr, "Error dumping frames to %s\n", dump_folder);
+            }
+        }
+
+        ++frame_index;
+        prev_frame_timestamp = timestamp;
+    }
+    ok = dump_ok;
+
+End:
+    WebPAnimDecoderDelete(dec);
+    return ok;
+}
+
+// -----------------------------------------------------------------------------
+// GIF Decoding.
+
+// Returns true if this is a valid GIF bitstream.
+static int IsGIF(const WebPData* const data) {
+    return data->size > GIF_STAMP_LEN &&
+           (!memcmp(GIF_STAMP, data->bytes, GIF_STAMP_LEN) || !memcmp(GIF87_STAMP, data->bytes, GIF_STAMP_LEN) ||
+            !memcmp(GIF89_STAMP, data->bytes, GIF_STAMP_LEN));
+}
+
+#ifdef WEBP_HAVE_GIF
+
+// GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
+#if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
+#define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
+#define LOCAL_GIF_PREREQ(maj, min) (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
+#else
+#define LOCAL_GIF_VERSION 0
+#define LOCAL_GIF_PREREQ(maj, min) 0
+#endif
+
+#if !LOCAL_GIF_PREREQ(5, 0)
+
+// Added in v5.0
+typedef struct {
+    int DisposalMode;
+#define DISPOSAL_UNSPECIFIED 0 // No disposal specified
+#define DISPOSE_DO_NOT 1       // Leave image in place
+#define DISPOSE_BACKGROUND 2   // Set area to background color
+#define DISPOSE_PREVIOUS 3     // Restore to previous content
+    int UserInputFlag;         // User confirmation required before disposal
+    int DelayTime;             // Pre-display delay in 0.01sec units
+    int TransparentColor;      // Palette index for transparency, -1 if none
+#define NO_TRANSPARENT_COLOR -1
+} GraphicsControlBlock;
+
+static int DGifExtensionToGCB(const size_t GifExtensionLength,
+                              const GifByteType* GifExtension,
+                              GraphicsControlBlock* gcb) {
+    if (GifExtensionLength != 4) {
+        return GIF_ERROR;
+    }
+    gcb->DisposalMode = (GifExtension[0] >> 2) & 0x07;
+    gcb->UserInputFlag = (GifExtension[0] & 0x02) != 0;
+    gcb->DelayTime = GifExtension[1] | (GifExtension[2] << 8);
+    if (GifExtension[0] & 0x01) {
+        gcb->TransparentColor = (int)GifExtension[3];
+    } else {
+        gcb->TransparentColor = NO_TRANSPARENT_COLOR;
+    }
+    return GIF_OK;
+}
+
+static int DGifSavedExtensionToGCB(GifFileType* GifFile, int ImageIndex, GraphicsControlBlock* gcb) {
+    int i;
+    if (ImageIndex < 0 || ImageIndex > GifFile->ImageCount - 1) {
+        return GIF_ERROR;
+    }
+    gcb->DisposalMode = DISPOSAL_UNSPECIFIED;
+    gcb->UserInputFlag = 0;
+    gcb->DelayTime = 0;
+    gcb->TransparentColor = NO_TRANSPARENT_COLOR;
+
+    for (i = 0; i < GifFile->SavedImages[ImageIndex].ExtensionBlockCount; i++) {
+        ExtensionBlock* ep = &GifFile->SavedImages[ImageIndex].ExtensionBlocks[i];
+        if (ep->Function == GRAPHICS_EXT_FUNC_CODE) {
+            return DGifExtensionToGCB(ep->ByteCount, (const GifByteType*)ep->Bytes, gcb);
+        }
+    }
+    return GIF_ERROR;
+}
+
+#define CONTINUE_EXT_FUNC_CODE 0x00
+
+// Signature was changed in v5.0
+#define DGifOpenFileName(a, b) DGifOpenFileName(a)
+
+#endif // !LOCAL_GIF_PREREQ(5, 0)
+
+// Signature changed in v5.1
+#if !LOCAL_GIF_PREREQ(5, 1)
+#define DGifCloseFile(a, b) DGifCloseFile(a)
+#endif
+
+static void GIFDisplayError(const GifFileType* const gif, int gif_error) {
+// libgif 4.2.0 has retired PrintGifError() and added GifErrorString().
+#if LOCAL_GIF_PREREQ(4, 2)
+#if LOCAL_GIF_PREREQ(5, 0)
+    const char* error_str = GifErrorString((gif == NULL) ? gif_error : gif->Error);
+#else
+    const char* error_str = GifErrorString();
+    (void)gif;
+#endif
+    if (error_str == NULL) error_str = "Unknown error";
+    fprintf(stderr, "GIFLib Error %d: %s\n", gif_error, error_str);
+#else
+    (void)gif;
+    fprintf(stderr, "GIFLib Error %d: ", gif_error);
+    PrintGifError();
+    fprintf(stderr, "\n");
+#endif
+}
+
+static int IsKeyFrameGIF(const GifImageDesc* prev_desc,
+                         int prev_dispose,
+                         const DecodedFrame* const prev_frame,
+                         int canvas_width,
+                         int canvas_height) {
+    if (prev_frame == NULL) return 1;
+    if (prev_dispose == DISPOSE_BACKGROUND) {
+        if (IsFullFrame(prev_desc->Width, prev_desc->Height, canvas_width, canvas_height)) {
+            return 1;
+        }
+        if (prev_frame->is_key_frame) return 1;
+    }
+    return 0;
+}
+
+static int GetTransparentIndexGIF(GifFileType* gif) {
+    GraphicsControlBlock first_gcb;
+    memset(&first_gcb, 0, sizeof(first_gcb));
+    DGifSavedExtensionToGCB(gif, 0, &first_gcb);
+    return first_gcb.TransparentColor;
+}
+
+static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
+    const int transparent_index = GetTransparentIndexGIF(gif);
+    const ColorMapObject* const color_map = gif->SColorMap;
+    if (transparent_index != NO_TRANSPARENT_COLOR && gif->SBackGroundColor == transparent_index) {
+        return 0x00ffffff; // Special case: transparent white.
+    } else if (color_map == NULL || color_map->Colors == NULL || gif->SBackGroundColor >= color_map->ColorCount) {
+        return 0xffffffff; // Invalid: assume white.
+    } else {
+        const GifColorType color = color_map->Colors[gif->SBackGroundColor];
+        return (0xff << 24) | (color.Red << 16) | (color.Green << 8) | (color.Blue << 0);
+    }
+}
+
+// Find appropriate app extension and get loop count from the next extension.
+static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
+    int i;
+    for (i = 0; i < gif->ImageCount; ++i) {
+        const SavedImage* const image = &gif->SavedImages[i];
+        int j;
+        for (j = 0; (j + 1) < image->ExtensionBlockCount; ++j) {
+            const ExtensionBlock* const eb1 = image->ExtensionBlocks + j;
+            const ExtensionBlock* const eb2 = image->ExtensionBlocks + j + 1;
+            const char* const signature = (const char*)eb1->Bytes;
+            const int signature_is_ok =
+                (eb1->Function == APPLICATION_EXT_FUNC_CODE) && (eb1->ByteCount == 11) &&
+                (!memcmp(signature, "NETSCAPE2.0", 11) || !memcmp(signature, "ANIMEXTS1.0", 11));
+            if (signature_is_ok && eb2->Function == CONTINUE_EXT_FUNC_CODE && eb2->ByteCount >= 3 &&
+                eb2->Bytes[0] == 1) {
+                return ((uint32_t)(eb2->Bytes[2]) << 8) + ((uint32_t)(eb2->Bytes[1]) << 0);
+            }
+        }
+    }
+    return 0; // Default.
+}
+
+// Get duration of 'n'th frame in milliseconds.
+static int GetFrameDurationGIF(GifFileType* gif, int n) {
+    GraphicsControlBlock gcb;
+    memset(&gcb, 0, sizeof(gcb));
+    DGifSavedExtensionToGCB(gif, n, &gcb);
+    return gcb.DelayTime * 10;
+}
+
+// Returns true if frame 'target' completely covers 'covered'.
+static int CoversFrameGIF(const GifImageDesc* const target, const GifImageDesc* const covered) {
+    return target->Left <= covered->Left && covered->Left + covered->Width <= target->Left + target->Width &&
+           target->Top <= covered->Top && covered->Top + covered->Height <= target->Top + target->Height;
+}
+
+static void RemapPixelsGIF(
+    const uint8_t* const src, const ColorMapObject* const cmap, int transparent_color, int len, uint8_t* dst) {
+    int i;
+    for (i = 0; i < len; ++i) {
+        if (src[i] != transparent_color) {
+            // If a pixel in the current frame is transparent, we don't modify it, so
+            // that we can see-through the corresponding pixel from an earlier frame.
+            const GifColorType c = cmap->Colors[src[i]];
+            dst[4 * i + 0] = c.Red;
+            dst[4 * i + 1] = c.Green;
+            dst[4 * i + 2] = c.Blue;
+            dst[4 * i + 3] = 0xff;
+        }
+    }
+}
+
+static int ReadFrameGIF(const SavedImage* const gif_image,
+                        const ColorMapObject* cmap,
+                        int transparent_color,
+                        int out_stride,
+                        uint8_t* const dst) {
+    const GifImageDesc* image_desc = &gif_image->ImageDesc;
+    const uint8_t* in;
+    uint8_t* out;
+    int j;
+
+    if (image_desc->ColorMap) cmap = image_desc->ColorMap;
+
+    if (cmap == NULL || cmap->ColorCount != (1 << cmap->BitsPerPixel)) {
+        fprintf(stderr, "Potentially corrupt color map.\n");
+        return 0;
+    }
+
+    in = (const uint8_t*)gif_image->RasterBits;
+    out = dst + image_desc->Top * out_stride + image_desc->Left * kNumChannels;
+
+    for (j = 0; j < image_desc->Height; ++j) {
+        RemapPixelsGIF(in, cmap, transparent_color, image_desc->Width, out);
+        in += image_desc->Width;
+        out += out_stride;
+    }
+    return 1;
+}
+
+// Read animated GIF bitstream from 'filename' into 'AnimatedImage' struct.
+static int ReadAnimatedGIF(const char filename[],
+                           AnimatedImage* const image,
+                           int dump_frames,
+                           const char dump_folder[]) {
+    uint32_t frame_count;
+    uint32_t canvas_width, canvas_height;
+    uint32_t i;
+    int gif_error;
+    GifFileType* gif;
+
+    gif = DGifOpenFileName(filename, NULL);
+    if (gif == NULL) {
+        fprintf(stderr, "Could not read file: %s.\n", filename);
+        return 0;
+    }
+
+    gif_error = DGifSlurp(gif);
+    if (gif_error != GIF_OK) {
+        fprintf(stderr, "Could not parse image: %s.\n", filename);
+        GIFDisplayError(gif, gif_error);
+        DGifCloseFile(gif, NULL);
+        return 0;
+    }
+
+    // Animation properties.
+    image->canvas_width = (uint32_t)gif->SWidth;
+    image->canvas_height = (uint32_t)gif->SHeight;
+    if (image->canvas_width > MAX_CANVAS_SIZE || image->canvas_height > MAX_CANVAS_SIZE) {
+        fprintf(stderr, "Invalid canvas dimension: %d x %d\n", image->canvas_width, image->canvas_height);
+        DGifCloseFile(gif, NULL);
+        return 0;
+    }
+    image->loop_count = GetLoopCountGIF(gif);
+    image->bgcolor = GetBackgroundColorGIF(gif);
+
+    frame_count = (uint32_t)gif->ImageCount;
+    if (frame_count == 0) {
+        DGifCloseFile(gif, NULL);
+        return 0;
+    }
+
+    if (image->canvas_width == 0 || image->canvas_height == 0) {
+        image->canvas_width = gif->SavedImages[0].ImageDesc.Width;
+        image->canvas_height = gif->SavedImages[0].ImageDesc.Height;
+        gif->SavedImages[0].ImageDesc.Left = 0;
+        gif->SavedImages[0].ImageDesc.Top = 0;
+        if (image->canvas_width == 0 || image->canvas_height == 0) {
+            fprintf(stderr, "Invalid canvas size in GIF.\n");
+            DGifCloseFile(gif, NULL);
+            return 0;
+        }
+    }
+    // Allocate frames.
+    AllocateFrames(image, frame_count);
+
+    canvas_width = image->canvas_width;
+    canvas_height = image->canvas_height;
+
+    // Decode and reconstruct frames.
+    for (i = 0; i < frame_count; ++i) {
+        const int canvas_width_in_bytes = canvas_width * kNumChannels;
+        const SavedImage* const curr_gif_image = &gif->SavedImages[i];
+        GraphicsControlBlock curr_gcb;
+        DecodedFrame* curr_frame;
+        uint8_t* curr_rgba;
+
+        memset(&curr_gcb, 0, sizeof(curr_gcb));
+        DGifSavedExtensionToGCB(gif, i, &curr_gcb);
+
+        curr_frame = &image->frames[i];
+        curr_rgba = curr_frame->rgba;
+        curr_frame->duration = GetFrameDurationGIF(gif, i);
+
+        if (i == 0) { // Initialize as transparent.
+            curr_frame->is_key_frame = 1;
+            ZeroFillCanvas(curr_rgba, canvas_width, canvas_height);
+        } else {
+            DecodedFrame* const prev_frame = &image->frames[i - 1];
+            const GifImageDesc* const prev_desc = &gif->SavedImages[i - 1].ImageDesc;
+            GraphicsControlBlock prev_gcb;
+            memset(&prev_gcb, 0, sizeof(prev_gcb));
+            DGifSavedExtensionToGCB(gif, i - 1, &prev_gcb);
+
+            curr_frame->is_key_frame =
+                IsKeyFrameGIF(prev_desc, prev_gcb.DisposalMode, prev_frame, canvas_width, canvas_height);
+
+            if (curr_frame->is_key_frame) { // Initialize as transparent.
+                ZeroFillCanvas(curr_rgba, canvas_width, canvas_height);
+            } else {
+                int prev_frame_disposed, curr_frame_opaque;
+                int prev_frame_completely_covered;
+                // Initialize with previous canvas.
+                uint8_t* const prev_rgba = image->frames[i - 1].rgba;
+                CopyCanvas(prev_rgba, curr_rgba, canvas_width, canvas_height);
+
+                // Dispose previous frame rectangle.
+                prev_frame_disposed =
+                    (prev_gcb.DisposalMode == DISPOSE_BACKGROUND || prev_gcb.DisposalMode == DISPOSE_PREVIOUS);
+                curr_frame_opaque = (curr_gcb.TransparentColor == NO_TRANSPARENT_COLOR);
+                prev_frame_completely_covered =
+                    curr_frame_opaque && CoversFrameGIF(&curr_gif_image->ImageDesc, prev_desc);
+
+                if (prev_frame_disposed && !prev_frame_completely_covered) {
+                    switch (prev_gcb.DisposalMode) {
+                        case DISPOSE_BACKGROUND: {
+                            ZeroFillFrameRect(curr_rgba, canvas_width_in_bytes, prev_desc->Left, prev_desc->Top,
+                                              prev_desc->Width, prev_desc->Height);
+                            break;
+                        }
+                        case DISPOSE_PREVIOUS: {
+                            int src_frame_num = i - 2;
+                            while (src_frame_num >= 0) {
+                                GraphicsControlBlock src_frame_gcb;
+                                memset(&src_frame_gcb, 0, sizeof(src_frame_gcb));
+                                DGifSavedExtensionToGCB(gif, src_frame_num, &src_frame_gcb);
+                                if (src_frame_gcb.DisposalMode != DISPOSE_PREVIOUS) break;
+                                --src_frame_num;
+                            }
+                            if (src_frame_num >= 0) {
+                                // Restore pixels inside previous frame rectangle to
+                                // corresponding pixels in source canvas.
+                                uint8_t* const src_frame_rgba = image->frames[src_frame_num].rgba;
+                                CopyFrameRectangle(src_frame_rgba, curr_rgba, canvas_width_in_bytes, prev_desc->Left,
+                                                   prev_desc->Top, prev_desc->Width, prev_desc->Height);
+                            } else {
+                                // Source canvas doesn't exist. So clear previous frame
+                                // rectangle to background.
+                                ZeroFillFrameRect(curr_rgba, canvas_width_in_bytes, prev_desc->Left, prev_desc->Top,
+                                                  prev_desc->Width, prev_desc->Height);
+                            }
+                            break;
+                        }
+                        default:
+                            break; // Nothing to do.
+                    }
+                }
+            }
+        }
+
+        // Decode current frame.
+        if (!ReadFrameGIF(curr_gif_image, gif->SColorMap, curr_gcb.TransparentColor, canvas_width_in_bytes,
+                          curr_rgba)) {
+            DGifCloseFile(gif, NULL);
+            return 0;
+        }
+
+        if (dump_frames) {
+            if (!DumpFrame(filename, dump_folder, i, curr_rgba, canvas_width, canvas_height)) {
+                DGifCloseFile(gif, NULL);
+                return 0;
+            }
+        }
+    }
+    DGifCloseFile(gif, NULL);
+    return 1;
+}
+
+#else
+
+static int ReadAnimatedGIF(const char filename[],
+                           AnimatedImage* const image,
+                           int dump_frames,
+                           const char dump_folder[]) {
+    (void)filename;
+    (void)image;
+    (void)dump_frames;
+    (void)dump_folder;
+    fprintf(stderr,
+            "GIF support not compiled. Please install the libgif-dev "
+            "package before building.\n");
+    return 0;
+}
+
+#endif // WEBP_HAVE_GIF
+
+// -----------------------------------------------------------------------------
+
+int ReadAnimatedImage(const char filename[], AnimatedImage* const image, int dump_frames, const char dump_folder[]) {
+    int ok = 0;
+    WebPData webp_data;
+
+    WebPDataInit(&webp_data);
+    memset(image, 0, sizeof(*image));
+
+    if (!ExUtilReadFile(filename, &webp_data.bytes, &webp_data.size)) {
+        fprintf(stderr, "Error reading file: %s\n", filename);
+        return 0;
+    }
+
+    if (IsWebP(&webp_data)) {
+        ok = ReadAnimatedWebP(filename, &webp_data, image, dump_frames, dump_folder);
+    } else if (IsGIF(&webp_data)) {
+        ok = ReadAnimatedGIF(filename, image, dump_frames, dump_folder);
+    } else {
+        fprintf(stderr, "Unknown file type: %s. Supported file types are WebP and GIF\n", filename);
+        ok = 0;
+    }
+    if (!ok) ClearAnimatedImage(image);
+    WebPDataClear(&webp_data);
+    return ok;
+}
+
+static void Accumulate(double v1, double v2, double* const max_diff, double* const sse) {
+    const double diff = fabs(v1 - v2);
+    if (diff > *max_diff) *max_diff = diff;
+    *sse += diff * diff;
+}
+
+void GetDiffAndPSNR(const uint8_t rgba1[],
+                    const uint8_t rgba2[],
+                    uint32_t width,
+                    uint32_t height,
+                    int premultiply,
+                    int* const max_diff,
+                    double* const psnr) {
+    const uint32_t stride = width * kNumChannels;
+    const int kAlphaChannel = kNumChannels - 1;
+    double f_max_diff = 0.;
+    double sse = 0.;
+    uint32_t x, y;
+    for (y = 0; y < height; ++y) {
+        for (x = 0; x < stride; x += kNumChannels) {
+            int k;
+            const size_t offset = y * stride + x;
+            const int alpha1 = rgba1[offset + kAlphaChannel];
+            const int alpha2 = rgba2[offset + kAlphaChannel];
+            Accumulate(alpha1, alpha2, &f_max_diff, &sse);
+            if (!premultiply) {
+                for (k = 0; k < kAlphaChannel; ++k) {
+                    Accumulate(rgba1[offset + k], rgba2[offset + k], &f_max_diff, &sse);
+                }
+            } else {
+                // premultiply R/G/B channels with alpha value
+                for (k = 0; k < kAlphaChannel; ++k) {
+                    Accumulate(rgba1[offset + k] * alpha1 / 255., rgba2[offset + k] * alpha2 / 255., &f_max_diff, &sse);
+                }
+            }
+        }
+    }
+    *max_diff = (int)f_max_diff;
+    if (*max_diff == 0) {
+        *psnr = 99.; // PSNR when images are identical.
+    } else {
+        sse /= stride * height;
+        *psnr = 4.3429448 * log(255. * 255. / sse);
+    }
+}
diff --git a/codec/L2/demos/webpEnc/host/anim_util.h b/codec/L2/demos/webpEnc/host/anim_util.h
new file mode 100644
index 0000000000..933cb80059
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/anim_util.h
@@ -0,0 +1,66 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for animated images
+
+#ifndef WEBP_EXAMPLES_ANIM_UTIL_H_
+#define WEBP_EXAMPLES_ANIM_UTIL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include "webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    uint8_t* rgba;    // Decoded and reconstructed full frame.
+    int duration;     // Frame duration in milliseconds.
+    int is_key_frame; // True if this frame is a key-frame.
+} DecodedFrame;
+
+typedef struct {
+    uint32_t canvas_width;
+    uint32_t canvas_height;
+    uint32_t bgcolor;
+    uint32_t loop_count;
+    DecodedFrame* frames;
+    uint32_t num_frames;
+    void* raw_mem;
+} AnimatedImage;
+
+// Deallocate everything in 'image' (but not the object itself).
+void ClearAnimatedImage(AnimatedImage* const image);
+
+// Read animated image file into 'AnimatedImage' struct.
+// If 'dump_frames' is true, dump frames to 'dump_folder'.
+// Previous content of 'image' is obliterated.
+// Upon successful return, content of 'image' must be deleted by
+// calling 'ClearAnimatedImage'.
+int ReadAnimatedImage(const char filename[], AnimatedImage* const image, int dump_frames, const char dump_folder[]);
+
+// Given two RGBA buffers, calculate max pixel difference and PSNR.
+// If 'premultiply' is true, R/G/B values will be pre-multiplied by the
+// transparency before comparison.
+void GetDiffAndPSNR(const uint8_t rgba1[],
+                    const uint8_t rgba2[],
+                    uint32_t width,
+                    uint32_t height,
+                    int premultiply,
+                    int* const max_diff,
+                    double* const psnr);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_EXAMPLES_ANIM_UTIL_H_
diff --git a/codec/L2/demos/webpEnc/host/create_kernel.c b/codec/L2/demos/webpEnc/host/create_kernel.c
new file mode 100644
index 0000000000..2d163f4ef5
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/create_kernel.c
@@ -0,0 +1,984 @@
+/**********
+
+  Copyright (c) 2017, Xilinx, Inc.
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without modification,
+  are permitted provided that the following conditions are met:
+
+  1. Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+  2. Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+  3. Neither the name of the copyright holder nor the names of its contributors
+  may be used to endorse or promote products derived from this software
+  without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+  THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+**********/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "./create_kernel.h"
+#include "../src/enc/vp8enci.h"
+#include "../src/dec/common.h"
+#include "../src/utils/utils.h"
+
+#include "vp8_hls_syn.h"
+#include "vp8_AsyncConfig.h"
+
+#include "../src/utils/profiling.h"
+
+//#include <CL/cl_ext.h>//before 17.3
+#include <CL/cl_ext_xilinx.h>
+
+NearLosslessPara nearpara;
+ResidualPara residualpara;
+AnalyzePara analyzepara;
+EncLoopPara enclooppara;
+EncLoopPara* encloopparaAsync;
+
+oclHardware hardware;
+oclSoftware software;
+
+oclKernelInfo nearlossless;
+oclKernelInfo residualimage;
+oclKernelInfo analyze;
+
+oclKernelInfo encloop;
+
+template <typename T>
+T* aligned_allocator(std::size_t num) {
+    void* ptr = nullptr;
+    if (posix_memalign(&ptr, 4096, num * sizeof(T))) throw std::bad_alloc();
+    return reinterpret_cast<T*>(ptr);
+};
+
+uint32_t RoundUp(uint32_t value, uint32_t mutiple) {
+    uint32_t remain_size = value % mutiple;
+    if (remain_size != 0) {
+        value += (mutiple - remain_size);
+    }
+    return value;
+}
+
+void GenNearlosslessInfo(int xsize, int ysize, int* device_size) {
+    int device_width;
+    int global_width;
+    int device_height;
+#ifdef HANDLE_MULTI_PIXELS_PER_ITEM
+    device_width = RoundUp(xsize, GRX_SIZE * PIXELS_PER_ITEM);
+    global_width = RoundUp(xsize - PADDING_SIZE, GRX_SIZE * PIXELS_PER_ITEM);
+    device_height = RoundUp(ysize, GRY_SIZE);
+#elif defined USE_VECTOR
+    if (xsize > IMAGE_4K) {
+        device_width = RoundUp(xsize, VECTOR_GRX_SIZE_4K * VECTOR_LENGTH);
+        global_width = RoundUp(xsize - VECTOR_WIDTH_PADDING, VECTOR_GRX_SIZE_4K * VECTOR_LENGTH);
+        device_height = RoundUp(ysize, VECTOR_GRY_SIZE_4K);
+    } else {
+        device_width = RoundUp(xsize, VECTOR_GRX_SIZE * VECTOR_LENGTH);
+        global_width = RoundUp(xsize - VECTOR_WIDTH_PADDING, VECTOR_GRX_SIZE * VECTOR_LENGTH);
+        device_height = RoundUp(ysize, VECTOR_GRY_SIZE);
+    }
+#else
+    device_width = RoundUp(xsize, GRX_SIZE);
+    global_width = RoundUp(xsize - PADDING_SIZE, GRX_SIZE);
+    device_height = RoundUp(ysize, GRY_SIZE);
+#endif
+    *device_size = device_width * (device_height + PADDING_SIZE) * sizeof(uint32_t);
+
+    nearpara.width = xsize;
+    nearpara.height = ysize;
+#ifdef HANDLE_MULTI_PIXELS_PER_ITEM
+    nearpara.lwidth = GRX_SIZE * PIXELS_PER_ITEM + PADDING_SIZE;
+    nearpara.edgewidth = GRX_SIZE * PIXELS_PER_ITEM - (global_width - xsize);
+#else
+    nearpara.lwidth = GRX_SIZE + PADDING_SIZE;
+    nearpara.edgewidth = GRX_SIZE - (global_width - xsize);
+#endif
+    nearpara.lheight = GRY_SIZE + PADDING_SIZE;
+}
+
+int SetNearlosslessArg(int device_size) {
+    int arg = 2;
+    int status = 0;
+    cl_int err;
+
+    nearpara.input_argb =
+        clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, device_size, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    nearpara.output_argb =
+        clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, device_size, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    // set args
+
+    err = clSetKernelArg(nearlossless.mKernel, arg++, sizeof(int), &(nearpara.width));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(nearlossless.mKernel, arg++, sizeof(int), &(nearpara.height));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(nearlossless.mKernel, arg++, sizeof(int), &(nearpara.lwidth));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(nearlossless.mKernel, arg++, sizeof(int), &(nearpara.lheight));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(nearlossless.mKernel, arg++, sizeof(int), &(nearpara.edgewidth));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clFinish(hardware.mQueue);
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    return status;
+
+Err:
+    releaseKernel(nearlossless);
+    releaseSoftware(software);
+    clReleaseMemObject(nearpara.input_argb);
+    clReleaseMemObject(nearpara.output_argb);
+    releaseHardware(hardware);
+
+    return status;
+}
+
+void GenResidualImageInfo(int width, int height, int* frame_size2) {
+    const int kNumPredModes = 14;
+
+    residualpara.width = width;
+    residualpara.height = height;
+    residualpara.group_width = GRX_SIZE_RESIDUAL;
+    residualpara.group_height = GRY_SIZE_RESIDUAL;
+    residualpara.residual_size = ((width + residualpara.group_width - 1) / residualpara.group_width) *
+                                 residualpara.group_width *
+                                 ((height + residualpara.group_height - 1) / residualpara.group_height) *
+                                 residualpara.group_height * kNumPredModes * sizeof(uint32_t);
+    *frame_size2 = (residualpara.residual_size / kNumPredModes) + (width + 1) * sizeof(uint32_t);
+}
+
+int SetResidualImageArg(int residual_size, int frame_size2) {
+    int arg = 0;
+    int result = 0;
+    cl_int status;
+
+    residualpara.buffer_argb =
+        clCreateBuffer(hardware.mContext, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, frame_size2, NULL, &status);
+    if (CL_SUCCESS != status) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        result = -1;
+        goto Error;
+    }
+
+    residualpara.buffer_residual =
+        clCreateBuffer(hardware.mContext, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY /* | CL_MEM_ALLOC_HOST_PTR*/,
+                       residual_size, NULL, &status);
+    if (CL_SUCCESS != status) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        result = -1;
+        goto Error;
+    }
+
+    status = clSetKernelArg(residualimage.mKernel, arg++, sizeof(int), &(residualpara.width));
+    if (status != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        result = -1;
+        goto Error;
+    }
+
+    status = clSetKernelArg(residualimage.mKernel, arg++, sizeof(int), &(residualpara.group_height));
+    if (status != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        result = -1;
+        goto Error;
+    }
+
+    status = clSetKernelArg(residualimage.mKernel, arg++, sizeof(cl_mem), &(residualpara.buffer_argb));
+    if (status != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        result = -1;
+        goto Error;
+    }
+
+    status = clSetKernelArg(residualimage.mKernel, arg++, sizeof(cl_mem), &(residualpara.buffer_residual));
+    if (status != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        result = -1;
+        goto Error;
+    }
+
+    return result;
+
+Error:
+    releaseKernel(residualimage);
+    releaseSoftware(software);
+    clReleaseMemObject(residualpara.buffer_argb);
+    clReleaseMemObject(residualpara.buffer_residual);
+    releaseHardware(hardware);
+
+    return result;
+}
+
+int SetAnalyzeArg(int xsize, int ysize) {
+    int arg = 0;
+    int status = 0;
+    cl_int err;
+
+    AnalyzeInputInfo input_info;
+    const int mb_w = (xsize + 15) >> 4;
+    const int mb_h = (ysize + 15) >> 4;
+    const int preds_w = 4 * mb_w + 1;
+    const int preds_h = 4 * mb_h + 1;
+    const int y_stride = xsize;
+    const int uv_width = (xsize + 1) >> 1;
+    const int uv_height = (ysize + 1) >> 1;
+    const int uv_stride = uv_width;
+    const int expand_yheight = RoundUp(ysize, 16);
+    const int expand_uvheight = RoundUp(uv_height, 8);
+
+    int mb_size;
+    int preds_size;
+    int nz_size;
+    int info_size;
+    int output_size;
+    int alphas_size;
+    uint64_t y_size;
+    uint64_t u_size;
+    uint64_t v_size;
+
+    int expand_y_size = 0;
+    int expand_uv_size = 0;
+    if (expand_yheight > ysize) {
+        expand_y_size = (expand_yheight - ysize) * xsize;
+    }
+
+    if (expand_uvheight > uv_height) {
+        expand_uv_size = (expand_uvheight - uv_height) * uv_width;
+    }
+
+    input_info.width = xsize;
+    input_info.height = ysize;
+    input_info.mb_w = mb_w;
+    input_info.mb_h = mb_h;
+    input_info.y_stride = y_stride;
+    input_info.uv_stride = uv_stride;
+    input_info.preds_w = preds_w;
+    input_info.top_stride = mb_w * 16;
+
+    mb_size = mb_w * mb_h * sizeof(uint8_t);
+    preds_size = preds_w * preds_h * sizeof(uint8_t);
+    nz_size = (mb_w + 1) * sizeof(uint32_t) + WEBP_ALIGN_CST;
+    y_size = (uint64_t)y_stride * ysize;
+    u_size = (uint64_t)uv_stride * uv_height;
+    v_size = (uint64_t)uv_stride * uv_height;
+    info_size = sizeof(input_info);
+    // output_size = sizeof(AnalyzeOutput);
+    output_size = mb_h * sizeof(int);
+    alphas_size = mb_h * (MAX_ALPHA + 1) * sizeof(int);
+
+    input_info.mb_size = mb_size;
+    input_info.preds_size = preds_size;
+    input_info.nz_size = nz_size;
+    input_info.y_size = y_size;
+    input_info.u_size = u_size;
+    input_info.v_size = v_size;
+
+    analyzepara.mb_info =
+        clCreateBuffer(hardware.mContext, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 3 * mb_size, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    analyzepara.preds =
+        clCreateBuffer(hardware.mContext, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, preds_size, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    analyzepara.y =
+        clCreateBuffer(hardware.mContext, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, y_size + expand_y_size, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    analyzepara.u = clCreateBuffer(hardware.mContext, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, u_size + expand_uv_size,
+                                   NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    analyzepara.v = clCreateBuffer(hardware.mContext, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, v_size + expand_uv_size,
+                                   NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    analyzepara.output_alpha =
+        clCreateBuffer(hardware.mContext, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, output_size, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    // analyzepara.output_alpha = clCreateBuffer(hardware.mContext, CL_MEM_WRITE_ONLY,
+    //                                           sizeof(cl_int), NULL, &err);
+    // if(CL_SUCCESS != err) {
+    //   fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+    //   status = -1;
+    //   goto Err;
+    // }
+
+    analyzepara.output_uvalpha =
+        clCreateBuffer(hardware.mContext, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, output_size, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    // analyzepara.output_uvalpha = clCreateBuffer(hardware.mContext, CL_MEM_WRITE_ONLY,
+    //                                             sizeof(cl_int), NULL, &err);
+    // if(CL_SUCCESS != err) {
+    //   fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+    //   status = -1;
+    //   goto Err;
+    // }
+
+    analyzepara.alphas =
+        clCreateBuffer(hardware.mContext, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, alphas_size, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(analyze.mKernel, arg++, sizeof(cl_mem), &(analyzepara.y));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(analyze.mKernel, arg++, sizeof(cl_mem), &(analyzepara.u));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(analyze.mKernel, arg++, sizeof(cl_mem), &(analyzepara.v));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(analyze.mKernel, arg++, sizeof(cl_mem), &(analyzepara.mb_info));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(analyze.mKernel, arg++, sizeof(cl_mem), &(analyzepara.preds));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(analyze.mKernel, arg++, sizeof(cl_mem), &(analyzepara.output_alpha));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(analyze.mKernel, arg++, sizeof(cl_mem), &(analyzepara.output_uvalpha));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(analyze.mKernel, arg++, sizeof(cl_mem), &(analyzepara.alphas));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clSetKernelArg(analyze.mKernel, arg++, sizeof(AnalyzeInputInfo), &input_info);
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    return status;
+
+Err:
+    releaseKernel(analyze);
+    releaseSoftware(software);
+    clReleaseMemObject(analyzepara.mb_info);
+    clReleaseMemObject(analyzepara.preds);
+    clReleaseMemObject(analyzepara.y);
+    clReleaseMemObject(analyzepara.u);
+    clReleaseMemObject(analyzepara.v);
+    // clReleaseMemObject(analyzepara.output_data);
+    clReleaseMemObject(analyzepara.output_alpha);
+    clReleaseMemObject(analyzepara.output_uvalpha);
+    releaseHardware(hardware);
+
+    return status;
+}
+
+int SetEncLoopArg(int xsize, int ysize) {
+    int arg = 0;
+    int status = 0;
+    cl_int err;
+
+    StopProfilingWatch watch;
+    double watch_time;
+    int watch_count;
+
+    /* const int mb_w = (xsize + 15) >> 4; */
+    /* const int mb_h = (ysize + 15) >> 4; */
+    /* const int preds_w = 4 * mb_w + 1; */
+    /* const int preds_h = 4 * mb_h + 1; */
+
+    /* const int y_width = xsize; */
+    /* const int y_height = ysize; */
+
+    /* const int uv_width = (xsize + 1) >> 1; */
+    /* const int uv_height = (ysize + 1) >> 1; */
+
+    /* const int y_stride = y_width; */
+    /* const int uv_stride = uv_width; */
+
+    /* const int expand_yheight = RoundUp(ysize, 16); */
+    /* const int expand_uvheight = RoundUp(uv_height, 8); */
+
+    /* uint64_t y_size = 0; */
+    /* uint64_t uv_size = 0; */
+
+    /* int mb_size = 0; */
+    /* int preds_size = 0; */
+    /* int nz_size = 0; */
+    /* int top_data_size = 0; */
+    /* int quant_matrix_size = 0; */
+    /* int coeffs_size = 0; */
+    /* int stats_size = 0; */
+    /* int level_cost_size = 0; */
+    /* int bw_buf_size = 0; */
+    /* int sse_size = 0; */
+    /* int block_count_size = 0; */
+    /* int extra_info_size = 0; */
+    /* int max_edge_size = 0; */
+    /* int bit_count_size = 0; */
+    /* int expand_y_size = 0; */
+    /* int expand_uv_size = 0; */
+    /* uint64_t output_size = 0; */
+    /* int output_tokens_size = 0; */
+
+    /* y_size = y_width * y_height * sizeof(uint8_t); */
+    /* uv_size = uv_width * uv_height * sizeof(uint8_t); */
+    /* output_size = MAX_NUM_MB_W * MAX_NUM_MB_H * 512 * sizeof(uint16_t); */
+
+    /* mb_size = mb_w * mb_h * sizeof(uint8_t); */
+    /* preds_size = preds_w * preds_h * sizeof(uint8_t) + preds_w + 1; */
+    /* nz_size = (mb_w + 1 + 1) * sizeof(uint32_t)/\* + WEBP_ALIGN_CST*\/; */
+    /* top_data_size = mb_w * 16 * sizeof(uint8_t); */
+    /* quant_matrix_size = sizeof(VP8EncMatrix); */
+    /* coeffs_size = NUM_CTX * NUM_PROBAS * NUM_TYPES * NUM_BANDS * sizeof(uint8_t); */
+    /* stats_size = NUM_CTX * NUM_PROBAS * NUM_TYPES * NUM_BANDS * sizeof(uint32_t); */
+    /* level_cost_size = NUM_CTX * NUM_PROBAS * NUM_TYPES * (MAX_VARIABLE_LEVEL + 1) * sizeof(uint16_t); */
+    /* bw_buf_size = 408000 * sizeof(uint8_t); */
+    /* sse_size = 4 * sizeof(uint64_t); */
+    /* block_count_size = 3 * sizeof(int); */
+    /* extra_info_size = mb_w * mb_h * sizeof(uint8_t); */
+    /* max_edge_size = NUM_MB_SEGMENTS * sizeof(int); */
+    /* bit_count_size = 4 * 3 * sizeof(uint64_t); */
+    /* output_tokens_size = sizeof(uint16_t) * PAGE_COUNT * TOKENS_COUNT_PER_PAGE;  */
+
+    /* if (expand_yheight > y_height) { */
+    /* 	expand_y_size = (expand_yheight - y_height) * y_width; */
+    /* } */
+
+    /* if (expand_uvheight > uv_height) { */
+    /* 	expand_uv_size = (expand_uvheight - uv_height) * uv_width; */
+    /* } */
+
+    /* StartProfiling(&watch);  */
+
+    enclooppara.input =
+        clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, SIZE_P_INFO, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+    printf("INFO: Buffer .input created \n");
+
+    enclooppara.y =
+        clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, SIZE_P_YSRC, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    printf("INFO: Buffer .y created \n");
+
+    enclooppara.u =
+        clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, SIZE_P_USRC, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    printf("INFO: Buffer .u created \n");
+
+    enclooppara.v =
+        clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, SIZE_P_VSRC, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    printf("INFO: Buffer .v created \n");
+
+    enclooppara.output =
+        clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, SIZE_P_OUT, NULL, &err);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    printf("INFO: Buffer .output created \n");
+
+    watch_time = 0.0;
+    StopProfiling(&watch, &watch_time, &watch_count);
+    printf("INFO: Create buffers finished. Computation time is %f (ms) \n\n", watch_time);
+
+    return status;
+
+Err:
+    releaseKernel(encloop);
+    releaseSoftware(software);
+    clReleaseMemObject(enclooppara.input);
+    clReleaseMemObject(enclooppara.y);
+    clReleaseMemObject(enclooppara.u);
+    clReleaseMemObject(enclooppara.v);
+    clReleaseMemObject(enclooppara.output);
+    releaseHardware(hardware);
+
+    return status;
+}
+
+int CreateKernel(const char* xclbinpath) {
+    using namespace xf::common::utils_sw;
+    Logger logger(std::cout, std::cerr);
+
+    StopProfilingWatch watch;
+    double watch_time;
+    int watch_count;
+
+    fprintf(stderr, "INFO: CreateKernel start. \n");
+    StartProfiling(&watch);
+
+    int status = 0;
+    int device_size;
+    cl_int err;
+
+    const cl_device_type deviceType = CL_DEVICE_TYPE_ACCELERATOR;
+
+    // char target_device_name[1001] = "xilinx:aws-vu9p-f1:4ddr-xpr-2pr:4.0";
+    // target_device_name = "xilinx_xil-accel-rd-ku115_4ddr-xpr_4_0"
+    char target_device_name[101] = WEBPDSA;
+
+    hardware = getOclHardware(deviceType, target_device_name);
+    if (!hardware.mQueue) {
+        fprintf(stderr, "%s %d getOclHardware\n", __func__, __LINE__);
+        return -1;
+    }
+
+    strcpy(software.mFileName, xclbinpath);
+
+    getOclSoftware(software, hardware);
+
+    encloop.mKernelPred = new cl_kernel[NasyncDepth * Ninstances];
+    encloop.mKernelAC = new cl_kernel[NasyncDepth * Ninstances];
+
+    for (int i = 0; i < Ninstances; i++) {
+        for (int j = 0; j < NasyncDepth; j++) {
+            std::string namepred = "webp_IntraPredLoop2_NoOut_" + std::to_string(i + 1);
+            std::string nameac = "webp_2_ArithmeticCoding_" + std::to_string(i + 1);
+
+            encloop.mKernelPred[i * NasyncDepth + j] = clCreateKernel(software.mProgram, namepred.c_str(), NULL);
+            logger.logCreateKernel(err);
+            if (encloop.mKernelPred[i * NasyncDepth + j] == 0) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                status = -1;
+                return status;
+            }
+
+            encloop.mKernelAC[i * NasyncDepth + j] = clCreateKernel(software.mProgram, nameac.c_str(), NULL);
+            logger.logCreateKernel(err);
+            if (encloop.mKernelAC[i * NasyncDepth + j] == 0) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                status = -1;
+                return status;
+            }
+        };
+    };
+
+    watch_time = 0.0;
+    StopProfiling(&watch, &watch_time, &watch_count);
+
+    fprintf(stderr, "INFO: CreateKernel finished. Computation time is %f (ms) \n\n", watch_time);
+
+    return status;
+}
+
+int ReleaseKernel() {
+    fprintf(stderr, "INFO: Release Kernel. \n");
+
+    for (int i = 0; i < Ninstances * NasyncDepth; i++) {
+        clReleaseKernel(encloop.mKernelPred[i]);
+        clReleaseKernel(encloop.mKernelAC[i]);
+    }
+
+    delete[] encloop.mKernelPred;
+    delete[] encloop.mKernelAC;
+
+    return 0;
+}
+
+int SetKernelArg(int xsize, int ysize) {
+    int status = 0;
+    // int frame_size2;
+
+    // GenNearlosslessInfo(xsize, ysize, &device_size);
+    // status = SetNearlosslessArg(device_size);
+
+    // GenResidualImageInfo(xsize, ysize, &frame_size2);
+    // status = SetResidualImageArg(residualpara.residual_size, frame_size2);
+
+    // status = SetAnalyzeArg(xsize, ysize);
+
+    status = SetEncLoopArg(xsize, ysize);
+
+    // printf("INFO: SetKernelArg() finished \n\n");
+
+    return status;
+}
+
+// Create device buffers
+int CreateDeviceBuffers(const int Numbatch) {
+    int status = 0;
+    cl_int err;
+
+    StopProfilingWatch watch;
+    double watch_time;
+    int watch_count;
+
+    fprintf(stderr, "INFO: Create buffers started.\n");
+
+    StartProfiling(&watch);
+
+    encloopparaAsync = new EncLoopPara[Ninstances * NasyncDepth];
+
+    for (int i = 0; i < Ninstances; i++) {
+        for (int j = 0; j < NasyncDepth; j++) {
+            cl_mem_ext_ptr_t bankmem_input;
+            cl_mem_ext_ptr_t bankmem_y;
+            cl_mem_ext_ptr_t bankmem_u;
+            cl_mem_ext_ptr_t bankmem_v;
+            cl_mem_ext_ptr_t bankmem_output;
+            cl_mem_ext_ptr_t bankmem_prob;
+            cl_mem_ext_ptr_t bankmem_bw;
+            cl_mem_ext_ptr_t bankmem_ret;
+            cl_mem_ext_ptr_t bankmem_pred;
+
+            if (i == 0) {
+                bankmem_input.flags = XCL_MEM_DDR_BANK3;
+                bankmem_y.flags = XCL_MEM_DDR_BANK3;
+                bankmem_u.flags = XCL_MEM_DDR_BANK3;
+                bankmem_v.flags = XCL_MEM_DDR_BANK3;
+                bankmem_output.flags = XCL_MEM_DDR_BANK3;
+                bankmem_prob.flags = XCL_MEM_DDR_BANK3;
+                bankmem_bw.flags = XCL_MEM_DDR_BANK3;
+                bankmem_ret.flags = XCL_MEM_DDR_BANK3;
+                bankmem_pred.flags = XCL_MEM_DDR_BANK3;
+            } else if (i == 1) {
+                bankmem_input.flags = XCL_MEM_DDR_BANK3;
+                bankmem_y.flags = XCL_MEM_DDR_BANK3;
+                bankmem_u.flags = XCL_MEM_DDR_BANK3;
+                bankmem_v.flags = XCL_MEM_DDR_BANK3;
+                bankmem_output.flags = XCL_MEM_DDR_BANK3;
+                bankmem_prob.flags = XCL_MEM_DDR_BANK3;
+                bankmem_bw.flags = XCL_MEM_DDR_BANK3;
+                bankmem_ret.flags = XCL_MEM_DDR_BANK3;
+                bankmem_pred.flags = XCL_MEM_DDR_BANK3;
+            } else if (i == 2) {
+                bankmem_input.flags = XCL_MEM_DDR_BANK0;
+                bankmem_y.flags = XCL_MEM_DDR_BANK0;
+                bankmem_u.flags = XCL_MEM_DDR_BANK0;
+                bankmem_v.flags = XCL_MEM_DDR_BANK0;
+                bankmem_output.flags = XCL_MEM_DDR_BANK0;
+                bankmem_prob.flags = XCL_MEM_DDR_BANK0;
+                bankmem_bw.flags = XCL_MEM_DDR_BANK0;
+                bankmem_ret.flags = XCL_MEM_DDR_BANK0;
+                bankmem_pred.flags = XCL_MEM_DDR_BANK0;
+            } else if (i == 3) {
+                bankmem_input.flags = XCL_MEM_DDR_BANK0;
+                bankmem_y.flags = XCL_MEM_DDR_BANK0;
+                bankmem_u.flags = XCL_MEM_DDR_BANK0;
+                bankmem_v.flags = XCL_MEM_DDR_BANK0;
+                bankmem_output.flags = XCL_MEM_DDR_BANK0;
+                bankmem_prob.flags = XCL_MEM_DDR_BANK0;
+                bankmem_bw.flags = XCL_MEM_DDR_BANK0;
+                bankmem_ret.flags = XCL_MEM_DDR_BANK0;
+                bankmem_pred.flags = XCL_MEM_DDR_BANK0;
+            } else if (i == 4) {
+                bankmem_input.flags = XCL_MEM_DDR_BANK1;
+                bankmem_y.flags = XCL_MEM_DDR_BANK1;
+                bankmem_u.flags = XCL_MEM_DDR_BANK1;
+                bankmem_v.flags = XCL_MEM_DDR_BANK1;
+                bankmem_output.flags = XCL_MEM_DDR_BANK1;
+                bankmem_prob.flags = XCL_MEM_DDR_BANK1;
+                bankmem_bw.flags = XCL_MEM_DDR_BANK1;
+                bankmem_ret.flags = XCL_MEM_DDR_BANK1;
+                bankmem_pred.flags = XCL_MEM_DDR_BANK1;
+            } else if (i == 5) {
+                bankmem_input.flags = XCL_MEM_DDR_BANK2;
+                bankmem_y.flags = XCL_MEM_DDR_BANK2;
+                bankmem_u.flags = XCL_MEM_DDR_BANK2;
+                bankmem_v.flags = XCL_MEM_DDR_BANK2;
+                bankmem_output.flags = XCL_MEM_DDR_BANK2;
+                bankmem_prob.flags = XCL_MEM_DDR_BANK2;
+                bankmem_bw.flags = XCL_MEM_DDR_BANK2;
+                bankmem_ret.flags = XCL_MEM_DDR_BANK2;
+                bankmem_pred.flags = XCL_MEM_DDR_BANK2;
+            }
+
+            bankmem_input.param = 0;
+            bankmem_y.param = 0;
+            bankmem_u.param = 0;
+            bankmem_v.param = 0;
+            bankmem_output.param = 0;
+            bankmem_prob.param = 0;
+            bankmem_bw.param = 0;
+            bankmem_ret.param = 0;
+            bankmem_pred.param = 0;
+
+            const uint32_t offset_info = Get_Busoffset_info_32bits() * sizeof(uint32_t);
+            encloopparaAsync[i * NasyncDepth + j].inputcpu = aligned_allocator<uint8_t>(offset_info * Numbatch);
+            encloopparaAsync[i * NasyncDepth + j].ycpu = aligned_allocator<uint8_t>(SIZE32_MEM_YSRC * 4);
+            encloopparaAsync[i * NasyncDepth + j].ucpu = aligned_allocator<uint8_t>(SIZE32_MEM_UVSRC * 4);
+            encloopparaAsync[i * NasyncDepth + j].vcpu = aligned_allocator<uint8_t>(SIZE32_MEM_UVSRC * 4);
+            encloopparaAsync[i * NasyncDepth + j].probcpu = aligned_allocator<uint8_t>(SIZE8_MEM_PROB * Numbatch);
+            encloopparaAsync[i * NasyncDepth + j].bwcpu = aligned_allocator<uint8_t>(SIZE8_MEM_BW);
+            encloopparaAsync[i * NasyncDepth + j].retcpu = aligned_allocator<uint8_t>(SIZE8_MEM_RET);
+            encloopparaAsync[i * NasyncDepth + j].predcpu = aligned_allocator<uint8_t>(SIZE8_MEM_PRED);
+
+            bankmem_input.obj = encloopparaAsync[i * NasyncDepth + j].inputcpu;
+            bankmem_y.obj = encloopparaAsync[i * NasyncDepth + j].ycpu;
+            bankmem_u.obj = encloopparaAsync[i * NasyncDepth + j].ucpu;
+            bankmem_v.obj = encloopparaAsync[i * NasyncDepth + j].vcpu;
+            bankmem_output.obj = NULL;
+            bankmem_prob.obj = encloopparaAsync[i * NasyncDepth + j].probcpu;
+            bankmem_bw.obj = encloopparaAsync[i * NasyncDepth + j].bwcpu;
+            bankmem_ret.obj = encloopparaAsync[i * NasyncDepth + j].retcpu;
+            bankmem_pred.obj = encloopparaAsync[i * NasyncDepth + j].predcpu;
+
+            // input
+            encloopparaAsync[i * NasyncDepth + j].input =
+                clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR,
+                               offset_info * Numbatch, &bankmem_input, &err);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            }
+            // fprintf(stderr, "INFO: Buffer .input created \n");
+
+            // y
+            encloopparaAsync[i * NasyncDepth + j].y =
+                clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR,
+                               SIZE32_MEM_YSRC * 4, &bankmem_y, &err);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            }
+            // fprintf(stderr, "INFO: Buffer .y created \n");
+
+            // u
+            encloopparaAsync[i * NasyncDepth + j].u =
+                clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR,
+                               SIZE32_MEM_UVSRC * 4, &bankmem_u, &err);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            }
+
+            // v
+            encloopparaAsync[i * NasyncDepth + j].v =
+                clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR,
+                               SIZE32_MEM_UVSRC * 4, &bankmem_v, &err);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            }
+            // fprintf(stderr, "INFO: Buffer .v created \n");
+
+            // output
+            encloopparaAsync[i * NasyncDepth + j].output =
+                clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX, SIZE32_MEM_LEVEL * 4,
+                               &bankmem_output, &err);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            }
+            // fprintf(stderr, "INFO: Buffer .output created \n");
+
+            // output_prob
+            encloopparaAsync[i * NasyncDepth + j].output_prob =
+                clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR,
+                               SIZE8_MEM_PROB * Numbatch, &bankmem_prob, &err);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            }
+
+            // output_bw
+            encloopparaAsync[i * NasyncDepth + j].output_bw =
+                clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR,
+                               SIZE8_MEM_BW, &bankmem_bw, &err);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            }
+
+            // output_ret
+            encloopparaAsync[i * NasyncDepth + j].output_ret =
+                clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR,
+                               SIZE8_MEM_RET, &bankmem_ret, &err);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            }
+
+            // output_pred
+            encloopparaAsync[i * NasyncDepth + j].output_pred =
+                clCreateBuffer(hardware.mContext, CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX | CL_MEM_USE_HOST_PTR,
+                               SIZE8_MEM_PRED, &bankmem_pred, &err);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            }
+            // fprintf(stderr, "INFO: Buffer .output_pred created \n");
+        }
+    }
+
+    err = clFinish(hardware.mQueue);
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+    }
+
+    watch_time = 0.0;
+    StopProfiling(&watch, &watch_time, &watch_count);
+    fprintf(stderr, "INFO: Create buffers finished. Computation time is %f (ms) \n\n", watch_time);
+
+    /* Err: */
+    /*  releaseKernel(encloop); */
+    /*  releaseSoftware(software); */
+    /*  /\* clReleaseMemObject(encloopparaAsync.input); *\/ */
+    /*  /\* clReleaseMemObject(encloopparaAsync.y); *\/ */
+    /*  /\* clReleaseMemObject(encloopparaAsync.u); *\/ */
+    /*  /\* clReleaseMemObject(encloopparaAsync.v); *\/ */
+    /*  /\* clReleaseMemObject(encloopparaAsync.output);  *\/ */
+    /*  releaseHardware(hardware); */
+
+    return status;
+};
+
+// Release device buffers
+int ReleaseDeviceBuffers() {
+    for (int i = 0; i < Ninstances * NasyncDepth; i++) {
+        clReleaseMemObject(encloopparaAsync[i].input);
+        clReleaseMemObject(encloopparaAsync[i].y);
+        clReleaseMemObject(encloopparaAsync[i].u);
+        clReleaseMemObject(encloopparaAsync[i].v);
+        clReleaseMemObject(encloopparaAsync[i].output);
+        clReleaseMemObject(encloopparaAsync[i].output_prob);
+        clReleaseMemObject(encloopparaAsync[i].output_pred);
+        clReleaseMemObject(encloopparaAsync[i].output_bw);
+        clReleaseMemObject(encloopparaAsync[i].output_ret);
+
+        free(encloopparaAsync[i].inputcpu);
+        free(encloopparaAsync[i].ycpu);
+        free(encloopparaAsync[i].ucpu);
+        free(encloopparaAsync[i].vcpu);
+        free(encloopparaAsync[i].probcpu);
+        free(encloopparaAsync[i].bwcpu);
+        free(encloopparaAsync[i].predcpu);
+        free(encloopparaAsync[i].retcpu);
+    };
+
+    delete[] encloopparaAsync;
+
+    return 0;
+}
diff --git a/codec/L2/demos/webpEnc/host/create_kernel.h b/codec/L2/demos/webpEnc/host/create_kernel.h
new file mode 100644
index 0000000000..9cc353eafa
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/create_kernel.h
@@ -0,0 +1,286 @@
+/**********
+
+  Copyright (c) 2017, Xilinx, Inc.
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without modification,
+  are permitted provided that the following conditions are met:
+
+  1. Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+  2. Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+  3. Neither the name of the copyright holder nor the names of its contributors
+  may be used to endorse or promote products derived from this software
+  without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+  THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+**********/
+
+#ifndef WEBP_CREAT_NEARLERNEL_H_
+#define WEBP_CREAT_NEARLERNEL_H_
+
+#include "../src/webp/types.h"
+#include "../src/enc/kernel/oclHelper.h"
+#include "../src/dec/common.h"
+#include "xf_utils_sw/logger.hpp"
+
+#include "vp8_hls_syn.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// define macro for kernel nearlossless
+#define GRX_SIZE 256 // Work group size for kernel nearlossless
+#define GRY_SIZE 16  // Work group size for kernel nearlossless
+#define PADDING_SIZE 4
+
+// #define HANDLE_MULTI_PIXELS_PER_ITEM        // ON  : handle multi pixels per item
+// OFF : handle one pixel per item
+#define PIXELS_PER_ITEM 4
+
+#define USE_VECTOR // ON  : The input para tyoe is uint16
+#define VECTOR_GRX_SIZE 128
+#define VECTOR_GRY_SIZE 16
+#define VECTOR_WIDTH_PADDING 16
+#define VECTOR_HEIGHT_PADDING 2
+#define VECTOR_LENGTH 16
+
+#define VECTOR_GRX_SIZE_4K 256
+#define VECTOR_GRY_SIZE_4K 8
+#define IMAGE_4K 2048
+
+// define macro for kernel residualimage
+#define GRX_SIZE_RESIDUAL 256 // Work group size for kernel residualimage
+#define GRY_SIZE_RESIDUAL 16  // Work group size for kernel residualimage
+
+#define MAX_ALPHA 255 // 8b of precision for susceptibilities.
+
+#define ANALYZE_GRX_SIZE 240 // Support only under 4k image
+#define ENCLOOP_GRX_SIZE 135 // Support only under 4k image
+
+#define USE_C_KERNEL
+
+typedef struct NearLosslessPara {
+    cl_mem input_argb;
+    cl_mem output_argb;
+    cl_int width;
+    cl_int height;
+    cl_int lwidth;
+    cl_int lheight;
+    cl_int edgewidth;
+    cl_int limitbits;
+} NearLosslessPara;
+
+typedef struct ResidualPara {
+    cl_mem buffer_argb;
+    cl_mem buffer_residual;
+    cl_int width;
+    cl_int height;
+    cl_int exact;
+    int group_width;
+    int group_height;
+    int residual_size;
+} ResidualPara;
+
+typedef struct AnalyzePara {
+    cl_mem mb_info;
+    cl_mem preds;
+    cl_mem y;
+    cl_mem u;
+    cl_mem v;
+    cl_mem output_data;
+    cl_mem output_alpha;
+    cl_mem output_uvalpha;
+    cl_mem alphas;
+    cl_int method;
+} AnalyzePara;
+
+typedef struct AnalyzeInputInfo {
+    cl_int width;
+    cl_int height;
+    cl_int mb_w;
+    cl_int mb_h;
+    cl_int y_stride;
+    cl_int uv_stride;
+    cl_int preds_w;
+    cl_int top_stride;
+    size_t mb_size;
+    size_t preds_size;
+    size_t nz_size;
+    uint64_t y_size;
+    uint64_t u_size;
+    uint64_t v_size;
+} AnalyzeInputInfo;
+
+typedef struct AnalyzeOutput {
+    int alpha;
+    int uv_alpha;
+} AnalyzeOutput;
+
+typedef struct EncLoopPara {
+    cl_mem input;
+    cl_mem y;       // input/output width * height
+    cl_mem u;       // input/output (width + 1) / 2 * (height + 1) / 2
+    cl_mem v;       // input/output (width + 1) / 2 * (height + 1) / 2
+    cl_mem mb_info; // No longer used:// input/output 4 * ((width + 15) / 16) * ((height + 15) / 16)
+    cl_mem preds;   // No longer used:// input/output (4 * ((width + 15) / 16) + 1) * (4 * ((height + 15) / 16) + 1)
+    cl_mem nz;      // No longer used:// output (((width + 15) / 16) + 1) * 4 + 31
+    cl_mem y_top;   // No longer used:// output ((width + 15) / 16)*16
+    cl_mem uv_top;  // No longer used:// output ((width + 15) / 16)*16
+    cl_mem quant_matrix; // No longer used:// input 3 * (96 * 3 + 192 * 2) VP8Matrix
+    cl_mem coeffs;       // No longer used:// input 4 * 8 * 3 * 11
+    cl_mem stats;        // No longer used:// No longer used:// input 4 * 4 * 8 * 3 * 11
+    cl_mem level_cost;   // No longer used:// input 2 * 4 * 8 * 3 * 68
+    cl_mem segment;      // No longer used:// input
+    cl_mem bw_buf;       // No longer used:// output 1 * 408000B
+    cl_mem sse;          // No longer used:// output 4 * 8
+    cl_mem block_count;  // No longer used:// output 3 * 4
+    cl_mem extra_info;   // No longer used:// output ((width + 15) / 16) * ((height + 15) / 16)
+    cl_mem max_edge;     // No longer used:// output 4 * 5
+    cl_mem bit_count;    // No longer used:// output 8 * 4 * 3
+    cl_mem sse_count;    // No longer used:
+    cl_mem output;       // Output of new kernel-1, used by kernel-2
+    cl_mem output_prob;  // Output of new kernel-1, used for probability table passed to kernel-2, also used for
+                         // enc->prob_.coeff_
+    cl_mem output_bw;    // Output of kernel-2, used for AC
+    cl_mem output_ret;  // Output of kernel-2, used for propagating return-value from Intra-prediction of kernel-1 hided
+                        // in pout_level
+    cl_mem output_pred; // Output of kernel-2 used for propagating return-value from  Intra-prediction of kernel-1 hided
+                        // in pout_level
+    cl_mem output_data; // No longer used: output
+    cl_mem output_tokens; // No longer used:uint16_t tokens_[PAGE_COUNT * TOKENS_COUNT_PER_PAGE];
+
+    // AllPicInfo* inputcpu;
+    uint8_t* inputcpu;
+    uint8_t* ycpu;
+    uint8_t* ucpu;
+    uint8_t* vcpu;
+    uint8_t* probcpu;
+    uint8_t* predcpu;
+    uint8_t* bwcpu;
+    uint8_t* retcpu;
+
+    cl_mem ysub;
+    cl_mem usub;
+    cl_mem vsub;
+};
+
+typedef struct EncloopInputData {
+    cl_int width;
+    cl_int height;
+    cl_int filter_sharpness;
+    cl_int show_compressed;
+    cl_int extra_info_type;
+    cl_int stats_add;
+    cl_int simple;
+    cl_int num_parts;
+    cl_int max_i4_header_bits;
+    cl_int lf_stats_status;
+    cl_int use_skip_proba;
+    cl_int method;
+    cl_int rd_opt;
+} EncloopInputData;
+
+typedef struct EncloopSegmentData {
+    int quant[NUM_MB_SEGMENTS];
+    int fstrength[NUM_MB_SEGMENTS];
+
+    int max_edge[NUM_MB_SEGMENTS];
+
+    int min_disto[NUM_MB_SEGMENTS];
+    int lambda_i16[NUM_MB_SEGMENTS];
+    int lambda_i4[NUM_MB_SEGMENTS];
+    int lambda_uv[NUM_MB_SEGMENTS];
+    int lambda_mode[NUM_MB_SEGMENTS];
+    int tlambda[NUM_MB_SEGMENTS];
+    int lambda_trellis_i16[NUM_MB_SEGMENTS];
+    int lambda_trellis_i4[NUM_MB_SEGMENTS];
+    int lambda_trellis_uv[NUM_MB_SEGMENTS];
+} EncloopSegmentData;
+
+typedef struct VP8EncMatrix {
+    uint32_t q_[16];       // quantizer steps
+    uint32_t iq_[16];      // reciprocals, fixed point.
+    uint32_t bias_[16];    // rounding bias
+    uint32_t zthresh_[16]; // value below which a coefficient is zeroed
+    uint32_t sharpen_[16]; // frequency boosters for slight sharpening
+} VP8EncMatrix;
+
+typedef struct EncLoopOutputData {
+    int32_t range;
+    int32_t value;
+    int32_t run;
+    int32_t nb_bits;
+    int32_t pos;
+    int32_t max_pos;
+    int32_t error;
+    int32_t max_i4_header_bits;
+    // for token buf
+    int32_t cur_page_;
+    int32_t page_count_;
+    int32_t left_;      // how many free tokens left before the page is full
+    int32_t page_size_; // number of tokens per page
+    int32_t error_;     // true in case of malloc error
+
+    // uint64_t sse_count;
+} EncLoopOutputData;
+
+extern NearLosslessPara nearpara;
+extern ResidualPara residualpara;
+extern AnalyzePara analyzepara;
+extern EncLoopPara enclooppara;
+extern EncLoopPara* encloopparaAsync;
+extern oclHardware hardware;
+extern oclSoftware software;
+extern oclKernelInfo nearlossless;
+extern oclKernelInfo residualimage;
+extern oclKernelInfo analyze;
+extern oclKernelInfo encloop;
+
+uint32_t RoundUp(uint32_t value, uint32_t mutiple);
+
+// creat kernel
+int SetKernelArg(int xsize, int ysize);
+
+// creat kernel
+int CreateKernel(const char* xclbinpath);
+int ReleaseKernel();
+int CreateDeviceBuffers(const int);
+int ReleaseDeviceBuffers();
+
+// set arguments for nearlossless kernel.
+int SetNearlosslessArg(int device_size);
+
+// Generate infomatin for nearlossless kernel.
+void GenNearlosslessInfo(int xsize, int ysize, int* device_size);
+
+// set arguments for residualimage kernel.
+int SetResidualImageArg(int residual_size, int frame_size2);
+
+// Generate infomatin for residualimage kernel.
+void GenResidualImageInfo(int xsize, int ysize, int* frame_size2);
+
+// set arguments for analyze kernel.
+int SetAnalyzeArg(int xsize, int ysize);
+
+// set arguments for encloop kernel.
+int SetEncLoopArg(int xsize, int ysize);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_CREAT_NEARLERNEL_H_
diff --git a/codec/L2/demos/webpEnc/host/cwebp.c b/codec/L2/demos/webpEnc/host/cwebp.c
new file mode 100644
index 0000000000..281364b66b
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/cwebp.c
@@ -0,0 +1,1385 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  simple command line calling the WebPEncode function.
+//  Encodes a raw .YUV into WebP bitstream
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <fstream>
+#include <iostream>
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#ifdef WEBP_USE_THREAD
+#include <sys/sysinfo.h>
+#endif
+
+#include "xf_utils_sw/logger.hpp"
+
+#include "webp/encode.h"
+
+#include "./example_util.h"
+#include "./metadata.h"
+#include "./stopwatch.h"
+#include "./create_kernel.h"
+
+#include "./jpegdec.h"
+#include "./pngdec.h"
+#include "./webpdec.h"
+#include "./wicdec.h"
+#include "../src/utils/profiling.h"
+
+#include "vp8_AsyncConfig.h"
+
+#ifndef WEBP_DLL
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void* VP8GetCPUInfo; // opaque forward declaration.
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif // WEBP_DLL
+
+//------------------------------------------------------------------------------
+
+static int verbose = 0;
+
+static int ReadYUV(const uint8_t* const data, size_t data_size, WebPPicture* const pic) {
+    int y;
+    const int use_argb = pic->use_argb;
+    const int uv_width = (pic->width + 1) / 2;
+    const int uv_height = (pic->height + 1) / 2;
+    const int uv_plane_size = uv_width * uv_height;
+    const size_t expected_data_size = pic->width * pic->height + 2 * uv_plane_size;
+
+    if (data_size != expected_data_size) {
+        fprintf(stderr, "input data doesn't have the expected size (%d instead of %d)\n", (int)data_size,
+                (int)expected_data_size);
+        return 0;
+    }
+
+    pic->use_argb = 0;
+    if (!WebPPictureAlloc(pic)) return 0;
+
+    for (y = 0; y < pic->height; ++y) {
+        memcpy(pic->y + y * pic->y_stride, data + y * pic->width, pic->width * sizeof(*pic->y));
+    }
+    for (y = 0; y < uv_height; ++y) {
+        const uint8_t* const uv_data = data + pic->height * pic->y_stride;
+        memcpy(pic->u + y * pic->uv_stride, uv_data + y * uv_width, uv_width * sizeof(*uv_data));
+        memcpy(pic->v + y * pic->uv_stride, uv_data + y * uv_width + uv_plane_size, uv_width * sizeof(*uv_data));
+    }
+    return use_argb ? WebPPictureYUVAToARGB(pic) : 1;
+}
+
+#ifdef HAVE_WINCODEC_H
+
+static int ReadPicture(const char* const filename, WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
+    int ok = 0;
+    const uint8_t* data = NULL;
+    size_t data_size = 0;
+    if (pic->width != 0 && pic->height != 0) {
+        ok = ExUtilReadFile(filename, &data, &data_size);
+        ok = ok && ReadYUV(data, data_size, pic);
+    } else {
+        // If no size specified, try to decode it using WIC.
+        ok = ReadPictureWithWIC(filename, pic, keep_alpha, metadata);
+        if (!ok) {
+            ok = ExUtilReadFile(filename, &data, &data_size);
+            ok = ok && ReadWebP(data, data_size, pic, keep_alpha, metadata);
+        }
+    }
+    if (!ok) {
+        fprintf(stderr, "Error! Could not process file %s\n", filename);
+    }
+    free((void*)data);
+    return ok;
+}
+
+#else // !HAVE_WINCODEC_H
+
+typedef enum {
+    PNG_ = 0,
+    JPEG_,
+    TIFF_, // 'TIFF' clashes with libtiff
+    WEBP_,
+    UNSUPPORTED
+} InputFileFormat;
+
+static uint32_t GetBE32(const uint8_t buf[]) {
+    return ((uint32_t)buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+}
+
+static InputFileFormat GuessImageType(const uint8_t buf[12]) {
+    InputFileFormat format = UNSUPPORTED;
+    const uint32_t magic1 = GetBE32(buf + 0);
+    const uint32_t magic2 = GetBE32(buf + 8);
+    if (magic1 == 0x89504E47U) {
+        format = PNG_;
+    } else if (magic1 >= 0xFFD8FF00U && magic1 <= 0xFFD8FFFFU) {
+        format = JPEG_;
+    } else if (magic1 == 0x49492A00 || magic1 == 0x4D4D002A) {
+        format = TIFF_;
+    } else if (magic1 == 0x52494646 && magic2 == 0x57454250) {
+        format = WEBP_;
+    }
+    return format;
+}
+
+static int ReadPicture(const char* const filename, WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
+    const uint8_t* data = NULL;
+    size_t data_size = 0;
+    int ok = 0;
+
+    ok = ExUtilReadFile(filename, &data, &data_size);
+    if (!ok) goto End;
+
+    if (pic->width == 0 || pic->height == 0) {
+        ok = 0;
+        if (data_size >= 12) {
+            const InputFileFormat format = GuessImageType(data);
+            if (format == PNG_) {
+                ok = ReadPNG(data, data_size, pic, keep_alpha, metadata);
+            } else if (format == JPEG_) {
+                ok = ReadJPEG(data, data_size, pic, metadata);
+            } else if (format == TIFF_) {
+                ; // ok = ReadTIFF(data, data_size, pic, keep_alpha, metadata);
+            } else if (format == WEBP_) {
+                ok = ReadWebP(data, data_size, pic, keep_alpha, metadata);
+            }
+        }
+    } else {
+        // If image size is specified, infer it as YUV format.
+        ok = ReadYUV(data, data_size, pic);
+    }
+End:
+    if (!ok) {
+        fprintf(stderr, "Error! Could not process file %s\n", filename);
+    }
+    free((void*)data);
+    return ok;
+}
+
+#endif // !HAVE_WINCODEC_H
+
+static void AllocExtraInfo(WebPPicture* const pic) {
+    const int mb_w = (pic->width + 15) / 16;
+    const int mb_h = (pic->height + 15) / 16;
+    pic->extra_info = (uint8_t*)malloc(mb_w * mb_h * sizeof(*pic->extra_info));
+}
+
+static void PrintByteCount(const int bytes[4], int total_size, int* const totals) {
+    int s;
+    int total = 0;
+    for (s = 0; s < 4; ++s) {
+        fprintf(stderr, "| %7d ", bytes[s]);
+        total += bytes[s];
+        if (totals) totals[s] += bytes[s];
+    }
+    fprintf(stderr, "| %7d  (%.1f%%)\n", total, 100.f * total / total_size);
+}
+
+static void PrintPercents(const int counts[4], int total) {
+    int s;
+    for (s = 0; s < 4; ++s) {
+        fprintf(stderr, "|      %2d%%", 100 * counts[s] / total);
+    }
+    fprintf(stderr, "| %7d\n", total);
+}
+
+static void PrintValues(const int values[4]) {
+    int s;
+    for (s = 0; s < 4; ++s) {
+        fprintf(stderr, "| %7d ", values[s]);
+    }
+    fprintf(stderr, "|\n");
+}
+
+static void PrintFullLosslessInfo(const WebPAuxStats* const stats, const char* const description) {
+    fprintf(stderr, "Lossless-%s compressed size: %d bytes\n", description, stats->lossless_size);
+    fprintf(stderr, "  * Header size: %d bytes, image data size: %d\n", stats->lossless_hdr_size,
+            stats->lossless_data_size);
+    if (stats->lossless_features) {
+        fprintf(stderr, "  * Lossless features used:");
+        if (stats->lossless_features & 1) fprintf(stderr, " PREDICTION");
+        if (stats->lossless_features & 2) fprintf(stderr, " CROSS-COLOR-TRANSFORM");
+        if (stats->lossless_features & 4) fprintf(stderr, " SUBTRACT-GREEN");
+        if (stats->lossless_features & 8) fprintf(stderr, " PALETTE");
+        fprintf(stderr, "\n");
+    }
+    fprintf(stderr, "  * Precision Bits: histogram=%d transform=%d cache=%d\n", stats->histogram_bits,
+            stats->transform_bits, stats->cache_bits);
+    if (stats->palette_size > 0) {
+        fprintf(stderr, "  * Palette size:   %d\n", stats->palette_size);
+    }
+}
+
+static void PrintExtraInfoLossless(const WebPPicture* const pic, int short_output, const char* const file_name) {
+    const WebPAuxStats* const stats = pic->stats;
+    if (short_output) {
+        fprintf(stderr, "%7d %2.2f\n", stats->coded_size, stats->PSNR[3]);
+    } else {
+        fprintf(stderr, "File:      %s\n", file_name);
+        fprintf(stderr, "Dimension: %d x %d\n", pic->width, pic->height);
+        fprintf(stderr, "Output:    %d bytes\n", stats->coded_size);
+        PrintFullLosslessInfo(stats, "ARGB");
+    }
+}
+
+static void PrintExtraInfoLossy(const WebPPicture* const pic,
+                                int short_output,
+                                int full_details,
+                                const char* const file_name) {
+    const WebPAuxStats* const stats = pic->stats;
+    if (short_output) {
+        fprintf(stderr, "%7d %2.2f\n", stats->coded_size, stats->PSNR[3]);
+    } else {
+        const int num_i4 = stats->block_count[0];
+        const int num_i16 = stats->block_count[1];
+        const int num_skip = stats->block_count[2];
+        const int total = num_i4 + num_i16;
+        fprintf(stderr, "File:      %s\n", file_name);
+        fprintf(stderr, "Dimension: %d x %d%s", pic->width, pic->height, stats->alpha_data_size ? " (with alpha)" : "");
+        fprintf(stderr,
+                "Output:    "
+                "%d bytes Y-U-V-All-PSNR %2.2f %2.2f %2.2f   %2.2f dB\n",
+                stats->coded_size, stats->PSNR[0], stats->PSNR[1], stats->PSNR[2], stats->PSNR[3]);
+        if (total > 0) {
+            int totals[4] = {0, 0, 0, 0};
+            fprintf(stderr,
+                    "block count:  intra4: %d\n"
+                    "              intra16: %d  (-> %.2f%%)\n",
+                    num_i4, num_i16, 100.f * num_i16 / total);
+            fprintf(stderr, "              skipped block: %d (%.2f%%)\n", num_skip, 100.f * num_skip / total);
+            fprintf(stderr,
+                    "bytes used:  header:         %6d  (%.1f%%)\n"
+                    "             mode-partition: %6d  (%.1f%%)\n",
+                    stats->header_bytes[0], 100.f * stats->header_bytes[0] / stats->coded_size, stats->header_bytes[1],
+                    100.f * stats->header_bytes[1] / stats->coded_size);
+            if (stats->alpha_data_size > 0) {
+                fprintf(stderr, "             transparency:   %6d (%.1f dB)\n", stats->alpha_data_size, stats->PSNR[4]);
+            }
+            fprintf(stderr,
+                    " Residuals bytes  "
+                    "|segment 1|segment 2|segment 3"
+                    "|segment 4|  total\n");
+            if (full_details) {
+                fprintf(stderr, "  intra4-coeffs:  ");
+                PrintByteCount(stats->residual_bytes[0], stats->coded_size, totals);
+                fprintf(stderr, " intra16-coeffs:  ");
+                PrintByteCount(stats->residual_bytes[1], stats->coded_size, totals);
+                fprintf(stderr, "  chroma coeffs:  ");
+                PrintByteCount(stats->residual_bytes[2], stats->coded_size, totals);
+            }
+            fprintf(stderr, "    macroblocks:  ");
+            PrintPercents(stats->segment_size, total);
+            fprintf(stderr, "      quantizer:  ");
+            PrintValues(stats->segment_quant);
+            fprintf(stderr, "   filter level:  ");
+            PrintValues(stats->segment_level);
+            if (full_details) {
+                fprintf(stderr, "------------------+---------");
+                fprintf(stderr, "+---------+---------+---------+-----------------\n");
+                fprintf(stderr, " segments total:  ");
+                PrintByteCount(totals, stats->coded_size, NULL);
+            }
+        }
+        if (stats->lossless_size > 0) {
+            PrintFullLosslessInfo(stats, "alpha");
+        }
+    }
+}
+
+static void PrintMapInfo(const WebPPicture* const pic) {
+    if (pic->extra_info != NULL) {
+        const int mb_w = (pic->width + 15) / 16;
+        const int mb_h = (pic->height + 15) / 16;
+        const int type = pic->extra_info_type;
+        int x, y;
+        for (y = 0; y < mb_h; ++y) {
+            for (x = 0; x < mb_w; ++x) {
+                const int c = pic->extra_info[x + y * mb_w];
+                if (type == 1) { // intra4/intra16
+                    fprintf(stderr, "%c", "+."[c]);
+                } else if (type == 2) { // segments
+                    fprintf(stderr, "%c", ".-*X"[c]);
+                } else if (type == 3) { // quantizers
+                    fprintf(stderr, "%.2d ", c);
+                } else if (type == 6 || type == 7) {
+                    fprintf(stderr, "%3d ", c);
+                } else {
+                    fprintf(stderr, "0x%.2x ", c);
+                }
+            }
+            fprintf(stderr, "\n");
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+
+static int MyWriter(const uint8_t* data, size_t data_size, const WebPPicture* const pic) {
+    FILE* const out = (FILE*)pic->custom_ptr;
+    return data_size ? (fwrite(data, data_size, 1, out) == 1) : 1;
+}
+
+// Dumps a picture as a PGM file using the IMC4 layout.
+static int DumpPicture(const WebPPicture* const picture, const char* PGM_name) {
+    int y;
+    const int uv_width = (picture->width + 1) / 2;
+    const int uv_height = (picture->height + 1) / 2;
+    const int stride = (picture->width + 1) & ~1;
+    const int alpha_height = WebPPictureHasTransparency(picture) ? picture->height : 0;
+    const int height = picture->height + uv_height + alpha_height;
+    FILE* const f = fopen(PGM_name, "wb");
+    if (f == NULL) return 0;
+    fprintf(f, "P5\n%d %d\n255\n", stride, height);
+    for (y = 0; y < picture->height; ++y) {
+        if (fwrite(picture->y + y * picture->y_stride, picture->width, 1, f) != 1) return 0;
+        if (picture->width & 1) fputc(0, f); // pad
+    }
+    for (y = 0; y < uv_height; ++y) {
+        if (fwrite(picture->u + y * picture->uv_stride, uv_width, 1, f) != 1) return 0;
+        if (fwrite(picture->v + y * picture->uv_stride, uv_width, 1, f) != 1) return 0;
+    }
+    for (y = 0; y < alpha_height; ++y) {
+        if (fwrite(picture->a + y * picture->a_stride, picture->width, 1, f) != 1) return 0;
+        if (picture->width & 1) fputc(0, f); // pad
+    }
+    fclose(f);
+    return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Metadata writing.
+
+enum {
+    METADATA_EXIF = (1 << 0),
+    METADATA_ICC = (1 << 1),
+    METADATA_XMP = (1 << 2),
+    METADATA_ALL = METADATA_EXIF | METADATA_ICC | METADATA_XMP
+};
+
+static const int kChunkHeaderSize = 8;
+static const int kTagSize = 4;
+
+static void PrintMetadataInfo(const Metadata* const metadata, int metadata_written) {
+    if (metadata == NULL || metadata_written == 0) return;
+
+    fprintf(stderr, "Metadata:\n");
+    if (metadata_written & METADATA_ICC) {
+        fprintf(stderr, "  * ICC profile:  %6d bytes\n", (int)metadata->iccp.size);
+    }
+    if (metadata_written & METADATA_EXIF) {
+        fprintf(stderr, "  * EXIF data:    %6d bytes\n", (int)metadata->exif.size);
+    }
+    if (metadata_written & METADATA_XMP) {
+        fprintf(stderr, "  * XMP data:     %6d bytes\n", (int)metadata->xmp.size);
+    }
+}
+
+// Outputs, in little endian, 'num' bytes from 'val' to 'out'.
+static int WriteLE(FILE* const out, uint32_t val, int num) {
+    uint8_t buf[4];
+    int i;
+    for (i = 0; i < num; ++i) {
+        buf[i] = (uint8_t)(val & 0xff);
+        val >>= 8;
+    }
+    return (fwrite(buf, num, 1, out) == 1);
+}
+
+static int WriteLE24(FILE* const out, uint32_t val) {
+    return WriteLE(out, val, 3);
+}
+
+static int WriteLE32(FILE* const out, uint32_t val) {
+    return WriteLE(out, val, 4);
+}
+
+static int WriteMetadataChunk(FILE* const out, const char fourcc[4], const MetadataPayload* const payload) {
+    const uint8_t zero = 0;
+    const size_t need_padding = payload->size & 1;
+    int ok = (fwrite(fourcc, kTagSize, 1, out) == 1);
+    ok = ok && WriteLE32(out, (uint32_t)payload->size);
+    ok = ok && (fwrite(payload->bytes, payload->size, 1, out) == 1);
+    return ok && (fwrite(&zero, need_padding, need_padding, out) == need_padding);
+}
+
+// Sets 'flag' in 'vp8x_flags' and updates 'metadata_size' with the size of the
+// chunk if there is metadata and 'keep' is true.
+static int UpdateFlagsAndSize(
+    const MetadataPayload* const payload, int keep, int flag, uint32_t* vp8x_flags, uint64_t* metadata_size) {
+    if (keep && payload->bytes != NULL && payload->size > 0) {
+        *vp8x_flags |= flag;
+        *metadata_size += kChunkHeaderSize + payload->size + (payload->size & 1);
+        return 1;
+    }
+    return 0;
+}
+
+// Writes a WebP file using the image contained in 'memory_writer' and the
+// metadata from 'metadata'. Metadata is controlled by 'keep_metadata' and the
+// availability in 'metadata'. Returns true on success.
+// For details see doc/webp-container-spec.txt#extended-file-format.
+static int WriteWebPWithMetadata(FILE* const out,
+                                 const WebPPicture* const picture,
+                                 const WebPMemoryWriter* const memory_writer,
+                                 const Metadata* const metadata,
+                                 int keep_metadata,
+                                 int* const metadata_written) {
+    const char kVP8XHeader[] = "VP8X\x0a\x00\x00\x00";
+    const int kAlphaFlag = 0x10;
+    const int kEXIFFlag = 0x08;
+    const int kICCPFlag = 0x20;
+    const int kXMPFlag = 0x04;
+    const size_t kRiffHeaderSize = 12;
+    const size_t kMaxChunkPayload = ~0 - kChunkHeaderSize - 1;
+    const size_t kMinSize = kRiffHeaderSize + kChunkHeaderSize;
+    uint32_t flags = 0;
+    uint64_t metadata_size = 0;
+    const int write_exif =
+        UpdateFlagsAndSize(&metadata->exif, !!(keep_metadata & METADATA_EXIF), kEXIFFlag, &flags, &metadata_size);
+    const int write_iccp =
+        UpdateFlagsAndSize(&metadata->iccp, !!(keep_metadata & METADATA_ICC), kICCPFlag, &flags, &metadata_size);
+    const int write_xmp =
+        UpdateFlagsAndSize(&metadata->xmp, !!(keep_metadata & METADATA_XMP), kXMPFlag, &flags, &metadata_size);
+    uint8_t* webp = memory_writer->mem;
+    size_t webp_size = memory_writer->size;
+
+    *metadata_written = 0;
+
+    if (webp_size < kMinSize) return 0;
+    if (webp_size - kChunkHeaderSize + metadata_size > kMaxChunkPayload) {
+        fprintf(stderr,
+                "Error! Addition of metadata would exceed "
+                "container size limit.\n");
+        return 0;
+    }
+
+    if (metadata_size > 0) {
+        const int kVP8XChunkSize = 18;
+        const int has_vp8x = !memcmp(webp + kRiffHeaderSize, "VP8X", kTagSize);
+        const uint32_t riff_size =
+            (uint32_t)(webp_size - kChunkHeaderSize + (has_vp8x ? 0 : kVP8XChunkSize) + metadata_size);
+        // RIFF
+        int ok = (fwrite(webp, kTagSize, 1, out) == 1);
+        // RIFF size (file header size is not recorded)
+        ok = ok && WriteLE32(out, riff_size);
+        webp += kChunkHeaderSize;
+        webp_size -= kChunkHeaderSize;
+        // WEBP
+        ok = ok && (fwrite(webp, kTagSize, 1, out) == 1);
+        webp += kTagSize;
+        webp_size -= kTagSize;
+        if (has_vp8x) { // update the existing VP8X flags
+            webp[kChunkHeaderSize] |= (uint8_t)(flags & 0xff);
+            ok = ok && (fwrite(webp, kVP8XChunkSize, 1, out) == 1);
+            webp += kVP8XChunkSize;
+            webp_size -= kVP8XChunkSize;
+        } else {
+            const int is_lossless = !memcmp(webp, "VP8L", kTagSize);
+            if (is_lossless) {
+                // Presence of alpha is stored in the 29th bit of VP8L data.
+                if (webp[kChunkHeaderSize + 3] & (1 << 5)) flags |= kAlphaFlag;
+            }
+            ok = ok && (fwrite(kVP8XHeader, kChunkHeaderSize, 1, out) == 1);
+            ok = ok && WriteLE32(out, flags);
+            ok = ok && WriteLE24(out, picture->width - 1);
+            ok = ok && WriteLE24(out, picture->height - 1);
+        }
+        if (write_iccp) {
+            ok = ok && WriteMetadataChunk(out, "ICCP", &metadata->iccp);
+            *metadata_written |= METADATA_ICC;
+        }
+        // Image
+        ok = ok && (fwrite(webp, webp_size, 1, out) == 1);
+        if (write_exif) {
+            ok = ok && WriteMetadataChunk(out, "EXIF", &metadata->exif);
+            *metadata_written |= METADATA_EXIF;
+        }
+        if (write_xmp) {
+            ok = ok && WriteMetadataChunk(out, "XMP ", &metadata->xmp);
+            *metadata_written |= METADATA_XMP;
+        }
+        return ok;
+    } else {
+        // No metadata, just write the original image file.
+        return (fwrite(webp, webp_size, 1, out) == 1);
+    }
+}
+
+//------------------------------------------------------------------------------
+
+static int ProgressReport(int percent, const WebPPicture* const picture) {
+    fprintf(stderr, "[%s]: %3d %%      \r", (char*)picture->user_data, percent);
+    return 1; // all ok
+}
+
+//------------------------------------------------------------------------------
+
+static void HelpShort(void) {
+    printf("Usage:\n\n");
+    printf("   cwebp [options] -q quality input.png -o output.webp\n\n");
+    printf("where quality is between 0 (poor) to 100 (very good).\n");
+    printf("Typical value is around 80.\n\n");
+    printf("Try -longhelp for an exhaustive list of advanced options.\n");
+}
+
+static void HelpLong(void) {
+    printf("Usage:\n");
+    printf(" cwebp [-preset <...>] [options] in_file [-o out_file]\n\n");
+    printf(
+        "If input size (-s) for an image is not specified, it is\n"
+        "assumed to be a PNG, JPEG, TIFF or WebP file.\n");
+#ifdef HAVE_WINCODEC_H
+    printf("Windows builds can take as input any of the files handled by WIC.\n");
+#endif
+    printf("\nOptions:\n");
+    printf("  -h / -help  ............ short help\n");
+    printf("  -H / -longhelp  ........ long help\n");
+    printf("  -q <float> ............. quality factor (0:small..100:big)\n");
+    printf(
+        "  -alpha_q <int> ......... transparency-compression quality "
+        "(0..100)\n");
+    printf("  -preset <string> ....... preset setting, one of:\n");
+    printf("                            default, photo, picture,\n");
+    printf("                            drawing, icon, text\n");
+    printf("     -preset must come first, as it overwrites other parameters\n");
+    printf(
+        "  -z <int> ............... activates lossless preset with given\n"
+        "                           level in [0:fast, ..., 9:slowest]\n");
+    printf("\n");
+    printf("  -m <int> ............... compression method (0=fast, 6=slowest)\n");
+    printf("  -segments <int> ........ number of segments to use (1..4)\n");
+    printf("  -size <int> ............ target size (in bytes)\n");
+    printf("  -psnr <float> .......... target PSNR (in dB. typically: 42)\n");
+    printf("\n");
+    printf("  -s <int> <int> ......... input size (width x height) for YUV\n");
+    printf("  -sns <int> ............. spatial noise shaping (0:off, 100:max)\n");
+    printf("  -f <int> ............... filter strength (0=off..100)\n");
+    printf(
+        "  -sharpness <int> ....... "
+        "filter sharpness (0:most .. 7:least sharp)\n");
+    printf(
+        "  -strong ................ use strong filter instead "
+        "of simple (default)\n");
+    printf("  -nostrong .............. use simple filter instead of strong\n");
+    printf("  -partition_limit <int> . limit quality to fit the 512k limit on\n");
+    printf(
+        "                           "
+        "the first partition (0=no degradation ... 100=full)\n");
+    printf("  -pass <int> ............ analysis pass number (1..10)\n");
+    printf("  -crop <x> <y> <w> <h> .. crop picture with the given rectangle\n");
+    printf("  -resize <w> <h> ........ resize picture (after any cropping)\n");
+    printf("  -mt .................... use multi-threading if available\n");
+    printf("  -low_memory ............ reduce memory usage (slower encoding)\n");
+    printf("  -map <int> ............. print map of extra info\n");
+    printf("  -print_psnr ............ prints averaged PSNR distortion\n");
+    printf("  -print_ssim ............ prints averaged SSIM distortion\n");
+    printf("  -print_lsim ............ prints local-similarity distortion\n");
+    printf("  -d <file.pgm> .......... dump the compressed output (PGM file)\n");
+    printf("  -alpha_method <int> .... transparency-compression method (0..1)\n");
+    printf("  -alpha_filter <string> . predictive filtering for alpha plane,\n");
+    printf("                           one of: none, fast (default) or best\n");
+    printf(
+        "  -exact ................. preserve RGB values in transparent area"
+        "\n");
+    printf(
+        "  -blend_alpha <hex> ..... blend colors against background color\n"
+        "                           expressed as RGB values written in\n"
+        "                           hexadecimal, e.g. 0xc0e0d0 for red=0xc0\n"
+        "                           green=0xe0 and blue=0xd0\n");
+    printf("  -noalpha ............... discard any transparency information\n");
+    printf("  -lossless .............. encode image losslessly\n");
+    printf(
+        "  -near_lossless <int> ... use near-lossless image\n"
+        "                           preprocessing (0..100=off)\n");
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    printf("  -delta_palettization ... use delta palettization\n");
+#endif // WEBP_EXPERIMENTAL_FEATURES
+    printf("  -hint <string> ......... specify image characteristics hint,\n");
+    printf("                           one of: photo, picture or graph\n");
+
+    printf("\n");
+    printf("  -metadata <string> ..... comma separated list of metadata to\n");
+    printf("                           ");
+    printf("copy from the input to the output if present.\n");
+    printf(
+        "                           "
+        "Valid values: all, none (default), exif, icc, xmp\n");
+
+    printf("\n");
+    printf("  -short ................. condense printed message\n");
+    printf("  -quiet ................. don't print anything\n");
+    printf("  -version ............... print version number and exit\n");
+#ifndef WEBP_DLL
+    printf("  -noasm ................. disable all assembly optimizations\n");
+#endif
+    printf(
+        "  -v ..................... verbose, e.g. print encoding/decoding "
+        "times\n");
+    printf("  -progress .............. report encoding progress\n");
+    printf("\n");
+    printf("Experimental Options:\n");
+    printf("  -jpeg_like ............. roughly match expected JPEG size\n");
+    printf("  -af .................... auto-adjust filter strength\n");
+    printf("  -pre <int> ............. pre-processing filter\n");
+    printf("\n");
+}
+
+//------------------------------------------------------------------------------
+// Error messages
+
+static const char* const kErrorMessages[VP8_ENC_ERROR_LAST] = {
+    "OK", "OUT_OF_MEMORY: Out of memory allocating objects",
+    "BITSTREAM_OUT_OF_MEMORY: Out of memory re-allocating byte buffer",
+    "NULL_PARAMETER: NULL parameter passed to function", "INVALID_CONFIGURATION: configuration is invalid",
+    "BAD_DIMENSION: Bad picture dimension. Maximum width and height "
+    "allowed is 16383 pixels.",
+    "PARTITION0_OVERFLOW: Partition #0 is too big to fit 512k.\n"
+    "To reduce the size of this partition, try using less segments "
+    "with the -segments option, and eventually reduce the number of "
+    "header bits using -partition_limit. More details are available "
+    "in the manual (`man cwebp`)",
+    "PARTITION_OVERFLOW: Partition is too big to fit 16M", "BAD_WRITE: Picture writer returned an I/O error",
+    "FILE_TOO_BIG: File would be too big to fit in 4G", "USER_ABORT: encoding abort requested by user"};
+
+//------------------------------------------------------------------------------
+// #define RUN_XINLINX_HW
+
+#ifdef RUN_XINLINX_HW
+
+#define TEST_CASE_NUM 1
+#define TEST_PARA_NUM 10
+char* test_argv[TEST_CASE_NUM][TEST_PARA_NUM] = {
+    {
+        "cwebp.exe", "-near_lossless", "0", "-use_ocl", "-lossless", "../../1920x1080.png", "-o", "../../lossless.webp",
+        NULL,
+    } //,
+      //{
+      //  "cwebp.exe",
+      //  "-q",
+      //  "75",
+      //  "../../1920x1080.png",
+      //  "-o",
+      //  "../../q75.webp",
+      //  NULL,
+      //},
+      //{
+      //  "cwebp.exe",
+      //  "-m",
+      //  "0",
+      //  "-q",
+      //  "75",
+      //  "../../1920x1080.png",
+      //  "-o",
+      //  "../../q75m0.webp",
+      //  NULL,
+      //}
+};
+
+int org_main(int argc, const char* argv[]);
+
+int main(int argc, const char* argv[]) {
+    int ret = 0;
+    int index_case = 0;
+    for (index_case = 0; index_case < TEST_CASE_NUM; index_case++) {
+        int i = 0;
+        int _argc = 0;
+
+        while (test_argv[index_case][++_argc] != NULL) {
+        }
+        // fprintf(stderr, "_argc:%d\n", _argc);
+        ret = org_main(_argc, test_argv[index_case]);
+        if (ret != 0) {
+            return ret;
+        }
+        ResetEncodeProfilingData();
+    }
+
+    return 0;
+}
+
+int org_main(int argc, const char* argv[]) {
+    int i = 0;
+    for (i = 0; i < argc; i++) {
+        fprintf(stderr, "%s ", argv[i]);
+    }
+    fprintf(stderr, "\n");
+#else
+int main(int argc, const char* argv[]) {
+#endif
+    for (int i = 0; i < argc; i++) printf("%s \n", argv[i]);
+    int return_value = -1;
+    const char *in_file = NULL, *out_dir = NULL, *dump_file = NULL;
+    WebPPicture* picture = NULL;
+    std::string* PicPoolList = NULL;
+    std::ifstream PicRead1, PicRead2;
+    int sTotal = 0;
+    std::string stmp;
+    FILE** out = NULL;
+    int c;
+    int short_output = 0;
+    int quiet = 0;
+    int keep_alpha = 1;
+    int blend_alpha = 0;
+    uint32_t background_color = 0xffffffu;
+    int crop = 0, crop_x = 0, crop_y = 0, crop_w = 0, crop_h = 0;
+    int resize_w = 0, resize_h = 0;
+    int lossless_preset = 6;
+    int use_lossless_preset = -1; // -1=unset, 0=don't use, 1=use it
+    int show_progress = 0;
+    int keep_metadata = 0;
+    int metadata_written = 0;
+
+    int print_distortion = -1; // -1=off, 0=PSNR, 1=SSIM, 2=LSIM
+
+    int Numpic = 1;
+    int Numbatch = 1;
+
+    WebPPicture original_picture; // when PSNR or SSIM is requested
+
+    WebPConfig config;
+    WebPAuxStats stats;
+    WebPMemoryWriter memory_writer;
+
+    Metadata metadata;
+    Stopwatch stop_watch;
+
+    int kernel_status = 0;
+    const char* xclbinpath;
+
+    StopProfilingWatch watch;
+    double watch_time;
+    int watch_count;
+
+    MetadataInit(&metadata);
+    WebPMemoryWriterInit(&memory_writer);
+    WebPConfigInit(&config);
+
+    if (argc == 1) {
+        HelpShort();
+        return 0;
+    }
+
+// configure default number of threads
+#ifdef WEBP_USE_THREAD
+    config.thread_number = 0; // get_nprocs() > 16 ? 16 : get_nprocs();
+#endif
+
+    for (c = 1; c < argc; ++c) {
+        int parse_error = 0;
+        if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
+            HelpShort();
+            return 0;
+        } else if (!strcmp(argv[c], "-H") || !strcmp(argv[c], "-longhelp")) {
+            HelpLong();
+            return 0;
+        } else if (!strcmp(argv[c], "-o") && c < argc - 1) {
+            out_dir = argv[++c];
+        } else if (!strcmp(argv[c], "-d") && c < argc - 1) {
+            dump_file = argv[++c];
+            config.show_compressed = 1;
+        } else if (!strcmp(argv[c], "-print_psnr")) {
+            config.show_compressed = 1;
+            print_distortion = 0;
+        } else if (!strcmp(argv[c], "-print_ssim")) {
+            config.show_compressed = 1;
+            print_distortion = 1;
+        } else if (!strcmp(argv[c], "-print_lsim")) {
+            config.show_compressed = 1;
+            print_distortion = 2;
+        } else if (!strcmp(argv[c], "-short")) {
+            ++short_output;
+        } else if (!strcmp(argv[c], "-s") && c < argc - 2) {
+            /* picture[0].width = ExUtilGetInt(argv[++c], 0, &parse_error); */
+            /* picture[0].height = ExUtilGetInt(argv[++c], 0, &parse_error); */
+            /* if (picture[0].width > WEBP_MAX_DIMENSION || picture[0].width < 0 || */
+            /*  picture[0].height > WEBP_MAX_DIMENSION ||  picture[0].height < 0) { */
+            /*   fprintf(stderr, */
+            /*        "Specified dimension (%d x %d) is out of range.\n", */
+            /*        picture.width, picture.height); */
+            /*   goto Error; */
+            /* } */
+        } else if (!strcmp(argv[c], "-m") && c < argc - 1) {
+            config.method = ExUtilGetInt(argv[++c], 0, &parse_error);
+            use_lossless_preset = 0; // disable -z option
+        } else if (!strcmp(argv[c], "-n") && c < argc - 1) {
+#ifdef WEBP_USE_THREAD
+            config.thread_number = ExUtilGetInt(argv[++c], 0, &parse_error);
+            config.thread_number = get_nprocs() > config.thread_number ? config.thread_number : get_nprocs();
+#endif
+        } else if (!strcmp(argv[c], "-q") && c < argc - 1) {
+            config.quality = ExUtilGetFloat(argv[++c], &parse_error);
+            use_lossless_preset = 0; // disable -z option
+        } else if (!strcmp(argv[c], "-numpic") && c < argc - 1) {
+            Numpic = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-numbatch") && c < argc - 1) {
+            Numbatch = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-z") && c < argc - 1) {
+            lossless_preset = ExUtilGetInt(argv[++c], 0, &parse_error);
+            if (use_lossless_preset != 0) use_lossless_preset = 1;
+        } else if (!strcmp(argv[c], "-alpha_q") && c < argc - 1) {
+            config.alpha_quality = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-alpha_method") && c < argc - 1) {
+            config.alpha_compression = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-alpha_cleanup")) {
+            // This flag is obsolete, does opposite of -exact.
+            config.exact = 0;
+        } else if (!strcmp(argv[c], "-exact")) {
+            config.exact = 1;
+        } else if (!strcmp(argv[c], "-xclbin")) {
+            xclbinpath = argv[++c];
+        } else if (!strcmp(argv[c], "-blend_alpha") && c < argc - 1) {
+            blend_alpha = 1;
+            // background color is given in hex with an optional '0x' prefix
+            background_color = ExUtilGetInt(argv[++c], 16, &parse_error);
+            background_color = background_color & 0x00ffffffu;
+        } else if (!strcmp(argv[c], "-alpha_filter") && c < argc - 1) {
+            ++c;
+            if (!strcmp(argv[c], "none")) {
+                config.alpha_filtering = 0;
+            } else if (!strcmp(argv[c], "fast")) {
+                config.alpha_filtering = 1;
+            } else if (!strcmp(argv[c], "best")) {
+                config.alpha_filtering = 2;
+            } else {
+                fprintf(stderr, "Error! Unrecognized alpha filter: %s\n", argv[c]);
+                goto Error;
+            }
+        } else if (!strcmp(argv[c], "-noalpha")) {
+            keep_alpha = 0;
+        } else if (!strcmp(argv[c], "-lossless")) {
+            config.lossless = 1;
+        } else if (!strcmp(argv[c], "-near_lossless") && c < argc - 1) {
+            config.near_lossless = ExUtilGetInt(argv[++c], 0, &parse_error);
+            config.lossless = 1; // use near-lossless only with lossless
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+        } else if (!strcmp(argv[c], "-delta_palettization")) {
+            config.delta_palettization = 1;
+            config.lossless = 1; // use delta-palettization only with lossless
+#endif                           // WEBP_EXPERIMENTAL_FEATURES
+        } else if (!strcmp(argv[c], "-hint") && c < argc - 1) {
+            ++c;
+            if (!strcmp(argv[c], "photo")) {
+                config.image_hint = WEBP_HINT_PHOTO;
+            } else if (!strcmp(argv[c], "picture")) {
+                config.image_hint = WEBP_HINT_PICTURE;
+            } else if (!strcmp(argv[c], "graph")) {
+                config.image_hint = WEBP_HINT_GRAPH;
+            } else {
+                fprintf(stderr, "Error! Unrecognized image hint: %s\n", argv[c]);
+                goto Error;
+            }
+        } else if (!strcmp(argv[c], "-size") && c < argc - 1) {
+            config.target_size = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-psnr") && c < argc - 1) {
+            config.target_PSNR = ExUtilGetFloat(argv[++c], &parse_error);
+        } else if (!strcmp(argv[c], "-sns") && c < argc - 1) {
+            config.sns_strength = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-f") && c < argc - 1) {
+            config.filter_strength = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-af")) {
+            config.autofilter = 1;
+        } else if (!strcmp(argv[c], "-jpeg_like")) {
+            config.emulate_jpeg_size = 1;
+        } else if (!strcmp(argv[c], "-mt")) {
+            ++config.thread_level; // increase thread level
+        } else if (!strcmp(argv[c], "-low_memory")) {
+            config.low_memory = 1;
+        } else if (!strcmp(argv[c], "-strong")) {
+            config.filter_type = 1;
+        } else if (!strcmp(argv[c], "-nostrong")) {
+            config.filter_type = 0;
+        } else if (!strcmp(argv[c], "-sharpness") && c < argc - 1) {
+            config.filter_sharpness = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-pass") && c < argc - 1) {
+            config.pass = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-pre") && c < argc - 1) {
+            config.preprocessing = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-segments") && c < argc - 1) {
+            config.segments = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-partition_limit") && c < argc - 1) {
+            config.partition_limit = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-use_ocl")) {
+            config.use_ocl = 1;
+        } else if (!strcmp(argv[c], "-map") && c < argc - 1) {
+            // picture.extra_info_type = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-crop") && c < argc - 4) {
+            crop = 1;
+            crop_x = ExUtilGetInt(argv[++c], 0, &parse_error);
+            crop_y = ExUtilGetInt(argv[++c], 0, &parse_error);
+            crop_w = ExUtilGetInt(argv[++c], 0, &parse_error);
+            crop_h = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-resize") && c < argc - 2) {
+            resize_w = ExUtilGetInt(argv[++c], 0, &parse_error);
+            resize_h = ExUtilGetInt(argv[++c], 0, &parse_error);
+#ifndef WEBP_DLL
+        } else if (!strcmp(argv[c], "-noasm")) {
+            VP8GetCPUInfo = NULL;
+#endif
+        } else if (!strcmp(argv[c], "-version")) {
+            const int version = WebPGetEncoderVersion();
+            printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+            return 0;
+        } else if (!strcmp(argv[c], "-progress")) {
+            show_progress = 1;
+        } else if (!strcmp(argv[c], "-quiet")) {
+            quiet = 1;
+        } else if (!strcmp(argv[c], "-preset") && c < argc - 1) {
+            WebPPreset preset;
+            ++c;
+            if (!strcmp(argv[c], "default")) {
+                preset = WEBP_PRESET_DEFAULT;
+            } else if (!strcmp(argv[c], "photo")) {
+                preset = WEBP_PRESET_PHOTO;
+            } else if (!strcmp(argv[c], "picture")) {
+                preset = WEBP_PRESET_PICTURE;
+            } else if (!strcmp(argv[c], "drawing")) {
+                preset = WEBP_PRESET_DRAWING;
+            } else if (!strcmp(argv[c], "icon")) {
+                preset = WEBP_PRESET_ICON;
+            } else if (!strcmp(argv[c], "text")) {
+                preset = WEBP_PRESET_TEXT;
+            } else {
+                fprintf(stderr, "Error! Unrecognized preset: %s\n", argv[c]);
+                goto Error;
+            }
+            if (!WebPConfigPreset(&config, preset, config.quality)) {
+                fprintf(stderr, "Error! Could initialize configuration with preset.\n");
+                goto Error;
+            }
+        } else if (!strcmp(argv[c], "-metadata") && c < argc - 1) {
+            static const struct {
+                const char* option;
+                int flag;
+            } kTokens[] = {
+                {"all", METADATA_ALL}, {"none", 0},           {"exif", METADATA_EXIF},
+                {"icc", METADATA_ICC}, {"xmp", METADATA_XMP},
+            };
+            const size_t kNumTokens = sizeof(kTokens) / sizeof(kTokens[0]);
+            const char* start = argv[++c];
+            const char* const end = start + strlen(start);
+
+            while (start < end) {
+                size_t i;
+                const char* token = strchr(start, ',');
+                if (token == NULL) token = end;
+
+                for (i = 0; i < kNumTokens; ++i) {
+                    if ((size_t)(token - start) == strlen(kTokens[i].option) &&
+                        !strncmp(start, kTokens[i].option, strlen(kTokens[i].option))) {
+                        if (kTokens[i].flag != 0) {
+                            keep_metadata |= kTokens[i].flag;
+                        } else {
+                            keep_metadata = 0;
+                        }
+                        break;
+                    }
+                }
+                if (i == kNumTokens) {
+                    fprintf(stderr, "Error! Unknown metadata type '%.*s'\n", (int)(token - start), start);
+                    HelpLong();
+                    return -1;
+                }
+                start = token + 1;
+            }
+#ifdef HAVE_WINCODEC_H
+            if (keep_metadata != 0 && keep_metadata != METADATA_ICC) {
+                // TODO(jzern): remove when -metadata is supported on all platforms.
+                fprintf(stderr,
+                        "Warning: only ICC profile extraction is currently"
+                        " supported on this platform!\n");
+            }
+#endif
+        } else if (!strcmp(argv[c], "-v")) {
+            verbose = 1;
+        } else if (!strcmp(argv[c], "--")) {
+            if (c < argc - 1) in_file = argv[++c];
+            break;
+        } else if (argv[c][0] == '-') {
+            fprintf(stderr, "Error! Unknown option '%s'\n", argv[c]);
+            HelpLong();
+            return -1;
+        } else {
+            in_file = argv[c];
+        }
+
+        if (parse_error) {
+            HelpLong();
+            return -1;
+        }
+    }
+
+    if (in_file == NULL) {
+        fprintf(stderr, "No input file specified!\n");
+        HelpShort();
+        goto Error;
+    }
+
+    picture = new WebPPicture[Numpic];
+
+    // Init pictures
+    for (int i = 0; i < Numpic; i++) {
+        if (!WebPPictureInit(&picture[i]) || !WebPPictureInit(&original_picture)) {
+            fprintf(stderr, "Error! Version mismatch!\n");
+            return -1;
+        }
+    };
+
+    // Read pictures name list from file to PicPoolList
+    PicPoolList = new std::string[Numpic];
+
+    PicRead1 = std::ifstream(in_file); // Get number of pictures in list "in_file"
+
+    while (!PicRead1.eof()) {
+        getline(PicRead1, stmp);
+        sTotal++;
+    }
+    PicRead1.close();
+
+    if (sTotal < Numpic) {
+        fprintf(stderr, "ERROR: There is not enought pictures in %s\n", in_file);
+        return 0;
+    }
+
+    PicRead2 = std::ifstream(in_file); // Get pictures names from list "in_file"
+    for (int i = 0; i < Numpic; i++) {
+        std::getline(PicRead2, PicPoolList[i]);
+    };
+    PicRead2.close();
+
+    for (int i = 0; i < Numpic; i++) std::cout << PicPoolList[i] << std::endl;
+
+    if (use_lossless_preset == 1) {
+        if (!WebPConfigLosslessPreset(&config, lossless_preset)) {
+            fprintf(stderr, "Invalid lossless preset (-z %d)\n", lossless_preset);
+            goto Error;
+        }
+    }
+
+    // Check for unsupported command line options for lossless mode and log
+    // warning for such options.
+    if (!quiet && config.lossless == 1) {
+        if (config.target_size > 0 || config.target_PSNR > 0) {
+            fprintf(stderr,
+                    "Encoding for specified size or PSNR is not supported"
+                    " for lossless encoding. Ignoring such option(s)!\n");
+        }
+        if (config.partition_limit > 0) {
+            fprintf(stderr,
+                    "Partition limit option is not required for lossless"
+                    " encoding. Ignoring this option!\n");
+        }
+    }
+
+    if (!WebPValidateConfig(&config)) {
+        fprintf(stderr, "Error! Invalid configuration.\n");
+        goto Error;
+    }
+
+    // Read the input. We need to decide if we prefer ARGB or YUVA
+    // samples, depending on the expected compression mode (this saves
+    // some conversion steps).
+    for (int i = 0; i < Numpic; i++) {
+        picture[i].use_argb = (config.lossless || config.preprocessing > 0 || crop || (resize_w | resize_h) > 0);
+        if (verbose) {
+            StopwatchReset(&stop_watch);
+        }
+    }
+
+    // Read pictures
+    for (int i = 0; i < Numpic; i++) {
+        int ok = ReadPicture(PicPoolList[i].c_str(), &picture[i], keep_alpha, (keep_metadata == 0) ? NULL : &metadata);
+        if (!ok) {
+            fprintf(stderr, "Error! Cannot read input picture file '%s'\n", in_file);
+            goto Error;
+        }
+
+        picture[i].progress_hook = (show_progress && !quiet) ? ProgressReport : NULL;
+
+        if (blend_alpha) {
+            WebPBlendAlpha(&picture[i], background_color);
+        }
+    };
+
+    if (verbose) {
+        const double read_time = StopwatchReadAndReset(&stop_watch);
+        fprintf(stderr, "Time to read input: %.3fs\n", read_time);
+    }
+
+    // Open the output
+    if (out_dir != NULL) {
+        out = new FILE*[Numpic];
+
+        /* const int use_stdout = !strcmp(out_file, "-"); */
+        /* out = use_stdout ? ExUtilSetBinaryMode(stdout) : fopen(out_file, "wb"); */
+
+        for (int i = 0; i < Numpic; i++) {
+            // output file name
+            int sl = PicPoolList[i].find_last_of("/\\");
+            int spng = PicPoolList[i].find(".png");
+            std::string out_file = std::string(out_dir) + "/" + PicPoolList[i].substr(sl + 1, spng - sl - 1) + ".webp";
+
+            // std::cout << out_file << std::endl;
+
+            strcpy(picture[i].custom_ptr_name, out_file.c_str());
+
+            /* out[i] = fopen(out_file.c_str(),"wb"); */
+            /* if (out[i] == NULL) { */
+            /*   fprintf(stderr, "Error! Cannot open output file '%s'\n", out_file.c_str()); */
+            /*   goto Error; */
+            /* }  */
+            /* else { */
+            /*   if (!short_output && !quiet) { */
+            /*  fprintf(stderr, "INFO: output file is '%s'\n", out_file.c_str()); */
+            /*   } */
+            /* } */
+
+            if (keep_metadata == 0) {
+                for (int i = 0; i < Numpic; i++) {
+                    picture[i].writer = MyWriter;
+                    // picture[i].custom_ptr = (void*)out[i];
+                };
+            } else {
+                for (int i = 0; i < Numpic; i++) {
+                    picture[i].writer = WebPMemoryWrite;
+                    picture[i].custom_ptr = (void*)&memory_writer;
+                };
+            }
+        }; // end of loop
+    } else {
+        out = NULL;
+        if (!quiet && !short_output) {
+            fprintf(stderr, "No output dir specified (no -o flag). Encoding will\n");
+            fprintf(stderr, "be performed, but its results discarded.\n\n");
+        }
+    }
+
+    if (!quiet) {
+        for (int i = 0; i < Numpic; i++) {
+            picture[i].stats = &stats;
+            picture[i].user_data = (void*)PicPoolList[i].c_str();
+        }
+    }
+
+    /* // Crop & resize. */
+    /* if (verbose) { */
+    /*   StopwatchReset(&stop_watch); */
+    /* } */
+    /* if (crop != 0) { */
+    /*   // We use self-cropping using a view. */
+    /*   if (!WebPPictureView(&picture, crop_x, crop_y, crop_w, crop_h, &picture)) { */
+    /*  fprintf(stderr, "Error! Cannot crop picture\n"); */
+    /*  goto Error; */
+    /*   } */
+    /* } */
+    /* if ((resize_w | resize_h) > 0) { */
+    /*   if (!WebPPictureRescale(&picture, resize_w, resize_h)) { */
+    /*  fprintf(stderr, "Error! Cannot resize picture\n"); */
+    /*  goto Error; */
+    /*   } */
+    /* } */
+    /* if (verbose && (crop != 0 || (resize_w | resize_h) > 0)) { */
+    /*   const double preproc_time = StopwatchReadAndReset(&stop_watch); */
+    /*   fprintf(stderr, "Time to crop/resize picture: %.3fs\n", preproc_time); */
+    /* } */
+
+    /* if (picture.extra_info_type > 0) { */
+    /*   AllocExtraInfo(&picture); */
+    /* } */
+    /* if (print_distortion >= 0) {  // Save original picture for later comparison */
+    /*   WebPPictureCopy(&picture, &original_picture); */
+    /* } */
+
+    /* // Compress. */
+    /* if (verbose) { */
+    /*   StopwatchReset(&stop_watch); */
+    /* } */
+
+    // *********************** create buffer device, in ./create_kernel.c and ./create_kernel.h
+
+    // If using opencl, create kernel first.
+    if (config.use_ocl) {
+        kernel_status = CreateKernel(xclbinpath); // create_kernel.c
+        if (kernel_status) {
+            fprintf(stderr, "Create kernel1 failed!\n");
+            goto Error;
+        }
+    }
+
+    // If using opencl, set kernel arguments first.
+    if (config.use_ocl) {
+        kernel_status = CreateDeviceBuffers(Numbatch);
+        if (kernel_status) {
+            fprintf(stderr, "Create buffers failed!\n");
+            goto Error;
+        }
+    }
+
+    //
+    fprintf(stderr, "INFO: WebPEncodeAsync Starts... \n");
+    StartProfiling(&watch);
+
+    /*int ok = */ WebPEncodeAsync(Numpic, Numbatch, &config, &picture);
+    /* if (ok) {  */
+    /*   /\* fprintf(stderr, "Error! Cannot encode picture as WebP\n"); *\/ */
+    /*   /\* fprintf(stderr, "Error code: %d (%s)\n", *\/ */
+    /*   /\*          picture.error_code, kErrorMessages[picture.error_code]); *\/ */
+    /*   /\* goto Error; *\/ */
+    /* } */
+
+    watch_time = 0.0;
+    StopProfiling(&watch, &watch_time, &watch_count);
+    fprintf(stderr, "INFO: WebPEncodeAsync Finished. Computation time is %f (ms) \n\n", watch_time);
+
+    // ********************************************************************************************************************
+    // //
+
+    ReleaseDeviceBuffers();
+
+    if (config.use_ocl) {
+        releaseSoftware(software);
+        releaseHardware(hardware);
+    }
+
+    ReleaseKernel();
+
+    // display all profiling data
+    // DisplayEncodeProfilingResult();
+
+    // fprintf(stderr, "stop profiling.......\n");
+
+    /* if (verbose) { */
+    /*   const double encode_time = StopwatchReadAndReset(&stop_watch); */
+    /*   fprintf(stderr, "Time to encode picture: %.3fs\n", encode_time); */
+    /* } */
+
+    /* // Write info */
+    /* if (dump_file) { */
+    /*   if (picture.use_argb) { */
+    /*  fprintf(stderr, "Warning: can't dump file (-d option) in lossless mode."); */
+    /*   } else if (!DumpPicture(&picture, dump_file)) { */
+    /*  fprintf(stderr, "Warning, couldn't dump picture %s\n", dump_file); */
+    /*   } */
+    /* } */
+
+    /* if (keep_metadata != 0) { */
+    /*   if (out != NULL) { */
+    /*  if (!WriteWebPWithMetadata(out, &picture, &memory_writer, */
+    /*                             &metadata, keep_metadata, &metadata_written)) { */
+    /*    fprintf(stderr, "Error writing WebP file with metadata!\n"); */
+    /*    goto Error; */
+    /*  } */
+    /*   } else {  // output is disabled, just display the metadata stats. */
+    /*  const struct { */
+    /*    const MetadataPayload* const payload; */
+    /*    int flag; */
+    /*  } *iter, info[] = { */
+    /*    { &metadata.exif, METADATA_EXIF }, */
+    /*    { &metadata.iccp, METADATA_ICC }, */
+    /*    { &metadata.xmp, METADATA_XMP }, */
+    /*    { NULL, 0 } */
+    /*  }; */
+    /*  uint32_t unused1 = 0; */
+    /*  uint64_t unused2 = 0; */
+
+    /*  for (iter = info; iter->payload != NULL; ++iter) { */
+    /*    if (UpdateFlagsAndSize(iter->payload, !!(keep_metadata & iter->flag), */
+    /*                           0, &unused1, &unused2)) { */
+    /*      metadata_written |= iter->flag; */
+    /*    } */
+    /*  } */
+    /*   } */
+    /* } */
+
+    /* if (!quiet) { */
+    /*   if (!short_output || print_distortion < 0) { */
+    /*  if (config.lossless) { */
+    /*    PrintExtraInfoLossless(&picture, short_output, in_file); */
+    /*  } else { */
+    /*    PrintExtraInfoLossy(&picture, short_output, config.low_memory, in_file); */
+    /*  } */
+    /*   } */
+    /*   if (!short_output && picture.extra_info_type > 0) { */
+    /*  PrintMapInfo(&picture); */
+    /*   } */
+    /*   if (print_distortion >= 0) {    // print distortion */
+    /*  static const char* distortion_names[] = { "PSNR", "SSIM", "LSIM" }; */
+    /*  float values[5]; */
+    /*  if (picture.use_argb != original_picture.use_argb) { */
+    /*    // Somehow, the WebPEncode() call converted the original picture. */
+    /*    // We need to make both match before calling WebPPictureDistortion(). */
+    /*    int ok = 0; */
+    /*    if (picture.use_argb) { */
+    /*      ok = WebPPictureYUVAToARGB(&original_picture); */
+    /*    } else { */
+    /*      ok = WebPPictureARGBToYUVA(&original_picture, WEBP_YUV420A); */
+    /*    } */
+    /*    if (!ok) { */
+    /*      fprintf(stderr, "Error while converting original picture.\n"); */
+    /*      goto Error; */
+    /*    } */
+    /*  } */
+    /*  if (!WebPPictureDistortion(&picture, &original_picture, */
+    /*                             print_distortion, values)) { */
+    /*    fprintf(stderr, "Error while computing the distortion.\n"); */
+    /*    goto Error; */
+    /*  } */
+    /*  if (!short_output) { */
+    /*    fprintf(stderr, "%s: ", distortion_names[print_distortion]); */
+    /*    if (picture.use_argb) { */
+    /*      fprintf(stderr, "B:%.2f G:%.2f R:%.2f A:%.2f  Total:%.2f\n", */
+    /*              values[0], values[1], values[2], values[3], values[4]); */
+    /*    } else { */
+    /*      fprintf(stderr, "Y:%.2f U:%.2f V:%.2f A:%.2f  Total:%.2f\n", */
+    /*              values[0], values[1], values[2], values[3], values[4]); */
+    /*    } */
+    /*  } else { */
+    /*    fprintf(stderr, "%7d %.4f\n", picture.stats->coded_size, values[4]); */
+    /*  } */
+    /*   } */
+    /*   if (!short_output) { */
+    /*  PrintMetadataInfo(&metadata, metadata_written); */
+    /*   } */
+    /* } */
+    /* return_value = 0; */
+
+    using namespace xf::common::utils_sw;
+    Logger logger(std::cout, std::cerr);
+    return_value = 0;
+    return_value ? logger.error(Logger::Message::TEST_FAIL) : logger.info(Logger::Message::TEST_PASS);
+    return return_value;
+
+Error:
+    WebPMemoryWriterClear(&memory_writer);
+    // free(picture.extra_info);
+    MetadataFree(&metadata);
+    /* WebPPictureFree(&picture); */
+    /* WebPPictureFree(&original_picture); */
+    /* if (out != NULL && out != stdout) { */
+    /*   fclose(out); */
+    /* } */
+
+    return_value = 1;
+    return_value ? logger.error(Logger::Message::TEST_FAIL) : logger.info(Logger::Message::TEST_PASS);
+    return return_value;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/dwebp.c b/codec/L2/demos/webpEnc/host/dwebp.c
new file mode 100644
index 0000000000..3fe962f864
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/dwebp.c
@@ -0,0 +1,805 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Command-line tool for decoding a WebP image.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+//#include <sys/sysinfo.h>
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#ifdef WEBP_HAVE_PNG
+#include <png.h>
+#include <setjmp.h> // note: this must be included *after* png.h
+#endif
+
+#ifdef HAVE_WINCODEC_H
+#ifdef __MINGW32__
+#define INITGUID // Without this GUIDs are declared extern and fail to link
+#endif
+#define CINTERFACE
+#define COBJMACROS
+#define _WIN32_IE 0x500 // Workaround bug in shlwapi.h when compiling C++
+                        // code with COBJMACROS.
+#include <ole2.h>       // CreateStreamOnHGlobal()
+#include <shlwapi.h>
+#include <windows.h>
+#include <wincodec.h>
+#endif
+
+#include "webp/decode.h"
+#include "./example_util.h"
+#include "./stopwatch.h"
+#include "../src/utils/profiling.h"
+
+static int verbose = 0;
+static int quiet = 0;
+#ifndef WEBP_DLL
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void* VP8GetCPUInfo; // opaque forward declaration.
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif // WEBP_DLL
+
+//------------------------------------------------------------------------------
+
+// Output types
+typedef enum {
+    PNG = 0,
+    PAM,
+    PPM,
+    PGM,
+    BMP,
+    TIFF,
+    YUV,
+    ALPHA_PLANE_ONLY // this is for experimenting only
+} OutputFileFormat;
+
+#ifdef HAVE_WINCODEC_H
+
+#define IFS(fn)                                                         \
+    do {                                                                \
+        if (SUCCEEDED(hr)) {                                            \
+            hr = (fn);                                                  \
+            if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr); \
+        }                                                               \
+    } while (0)
+
+#ifdef __cplusplus
+#define MAKE_REFGUID(x) (x)
+#else
+#define MAKE_REFGUID(x) &(x)
+#endif
+
+static HRESULT CreateOutputStream(const char* out_file_name, int write_to_mem, IStream** stream) {
+    HRESULT hr = S_OK;
+    if (write_to_mem) {
+        // Output to a memory buffer. This is freed when 'stream' is released.
+        IFS(CreateStreamOnHGlobal(NULL, TRUE, stream));
+    } else {
+        IFS(SHCreateStreamOnFileA(out_file_name, STGM_WRITE | STGM_CREATE, stream));
+    }
+    if (FAILED(hr)) {
+        fprintf(stderr, "Error opening output file %s (%08lx)\n", out_file_name, hr);
+    }
+    return hr;
+}
+
+static HRESULT WriteUsingWIC(const char* out_file_name,
+                             int use_stdout,
+                             REFGUID container_guid,
+                             uint8_t* rgb,
+                             int stride,
+                             uint32_t width,
+                             uint32_t height,
+                             int has_alpha) {
+    HRESULT hr = S_OK;
+    IWICImagingFactory* factory = NULL;
+    IWICBitmapFrameEncode* frame = NULL;
+    IWICBitmapEncoder* encoder = NULL;
+    IStream* stream = NULL;
+    WICPixelFormatGUID pixel_format = has_alpha ? GUID_WICPixelFormat32bppBGRA : GUID_WICPixelFormat24bppBGR;
+
+    IFS(CoInitialize(NULL));
+    IFS(CoCreateInstance(MAKE_REFGUID(CLSID_WICImagingFactory), NULL, CLSCTX_INPROC_SERVER,
+                         MAKE_REFGUID(IID_IWICImagingFactory), (LPVOID*)&factory));
+    if (hr == REGDB_E_CLASSNOTREG) {
+        fprintf(stderr,
+                "Couldn't access Windows Imaging Component (are you running "
+                "Windows XP SP3 or newer?). PNG support not available. "
+                "Use -ppm or -pgm for available PPM and PGM formats.\n");
+    }
+    IFS(CreateOutputStream(out_file_name, use_stdout, &stream));
+    IFS(IWICImagingFactory_CreateEncoder(factory, container_guid, NULL, &encoder));
+    IFS(IWICBitmapEncoder_Initialize(encoder, stream, WICBitmapEncoderNoCache));
+    IFS(IWICBitmapEncoder_CreateNewFrame(encoder, &frame, NULL));
+    IFS(IWICBitmapFrameEncode_Initialize(frame, NULL));
+    IFS(IWICBitmapFrameEncode_SetSize(frame, width, height));
+    IFS(IWICBitmapFrameEncode_SetPixelFormat(frame, &pixel_format));
+    IFS(IWICBitmapFrameEncode_WritePixels(frame, height, stride, height * stride, rgb));
+    IFS(IWICBitmapFrameEncode_Commit(frame));
+    IFS(IWICBitmapEncoder_Commit(encoder));
+
+    if (SUCCEEDED(hr) && use_stdout) {
+        HGLOBAL image;
+        IFS(GetHGlobalFromStream(stream, &image));
+        if (SUCCEEDED(hr)) {
+            HANDLE std_output = GetStdHandle(STD_OUTPUT_HANDLE);
+            DWORD mode;
+            const BOOL update_mode = GetConsoleMode(std_output, &mode);
+            const void* const image_mem = GlobalLock(image);
+            DWORD bytes_written = 0;
+
+            // Clear output processing if necessary, then output the image.
+            if (update_mode) SetConsoleMode(std_output, 0);
+            if (!WriteFile(std_output, image_mem, (DWORD)GlobalSize(image), &bytes_written, NULL) ||
+                bytes_written != GlobalSize(image)) {
+                hr = E_FAIL;
+            }
+            if (update_mode) SetConsoleMode(std_output, mode);
+            GlobalUnlock(image);
+        }
+    }
+
+    if (frame != NULL) IUnknown_Release(frame);
+    if (encoder != NULL) IUnknown_Release(encoder);
+    if (factory != NULL) IUnknown_Release(factory);
+    if (stream != NULL) IUnknown_Release(stream);
+    return hr;
+}
+
+static int WritePNG(const char* out_file_name, int use_stdout, const WebPDecBuffer* const buffer) {
+    const uint32_t width = buffer->width;
+    const uint32_t height = buffer->height;
+    uint8_t* const rgb = buffer->u.RGBA.rgba;
+    const int stride = buffer->u.RGBA.stride;
+    const int has_alpha = (buffer->colorspace == MODE_BGRA);
+
+    return SUCCEEDED(WriteUsingWIC(out_file_name, use_stdout, MAKE_REFGUID(GUID_ContainerFormatPng), rgb, stride, width,
+                                   height, has_alpha));
+}
+
+#elif defined(WEBP_HAVE_PNG) // !HAVE_WINCODEC_H
+static void PNGAPI PNGErrorFunction(png_structp png, png_const_charp dummy) {
+    (void)dummy; // remove variable-unused warning
+    longjmp(png_jmpbuf(png), 1);
+}
+
+static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
+    const uint32_t width = buffer->width;
+    const uint32_t height = buffer->height;
+    uint8_t* const rgb = buffer->u.RGBA.rgba;
+    const int stride = buffer->u.RGBA.stride;
+    const int has_alpha = (buffer->colorspace == MODE_RGBA);
+    volatile png_structp png;
+    volatile png_infop info;
+    png_uint_32 y;
+
+    png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, PNGErrorFunction, NULL);
+    if (png == NULL) {
+        return 0;
+    }
+    info = png_create_info_struct(png);
+    if (info == NULL) {
+        png_destroy_write_struct((png_structpp)&png, NULL);
+        return 0;
+    }
+    if (setjmp(png_jmpbuf(png))) {
+        png_destroy_write_struct((png_structpp)&png, (png_infopp)&info);
+        return 0;
+    }
+    png_init_io(png, out_file);
+    png_set_IHDR(png, info, width, height, 8, has_alpha ? PNG_COLOR_TYPE_RGBA : PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE,
+                 PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+    png_write_info(png, info);
+    for (y = 0; y < height; ++y) {
+        png_bytep row = rgb + y * stride;
+        png_write_rows(png, &row, 1);
+    }
+    png_write_end(png, info);
+    png_destroy_write_struct((png_structpp)&png, (png_infopp)&info);
+    return 1;
+}
+#else                        // !HAVE_WINCODEC_H && !WEBP_HAVE_PNG
+static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
+    (void)out_file;
+    (void)buffer;
+    fprintf(stderr,
+            "PNG support not compiled. Please install the libpng "
+            "development package before building.\n");
+    fprintf(stderr, "You can run with -ppm flag to decode in PPM format.\n");
+    return 0;
+}
+#endif
+
+static int WritePPM(FILE* fout, const WebPDecBuffer* const buffer, int alpha) {
+    const uint32_t width = buffer->width;
+    const uint32_t height = buffer->height;
+    const uint8_t* const rgb = buffer->u.RGBA.rgba;
+    const int stride = buffer->u.RGBA.stride;
+    const size_t bytes_per_px = alpha ? 4 : 3;
+    uint32_t y;
+
+    if (alpha) {
+        fprintf(fout,
+                "P7\nWIDTH %u\nHEIGHT %u\nDEPTH 4\nMAXVAL 255\n"
+                "TUPLTYPE RGB_ALPHA\nENDHDR\n",
+                width, height);
+    } else {
+        fprintf(fout, "P6\n%u %u\n255\n", width, height);
+    }
+    for (y = 0; y < height; ++y) {
+        if (fwrite(rgb + y * stride, width, bytes_per_px, fout) != bytes_per_px) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+static void PutLE16(uint8_t* const dst, uint32_t value) {
+    dst[0] = (value >> 0) & 0xff;
+    dst[1] = (value >> 8) & 0xff;
+}
+
+static void PutLE32(uint8_t* const dst, uint32_t value) {
+    PutLE16(dst + 0, (value >> 0) & 0xffff);
+    PutLE16(dst + 2, (value >> 16) & 0xffff);
+}
+
+#define BMP_HEADER_SIZE 54
+static int WriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
+    const int has_alpha = (buffer->colorspace != MODE_BGR);
+    const uint32_t width = buffer->width;
+    const uint32_t height = buffer->height;
+    const uint8_t* const rgba = buffer->u.RGBA.rgba;
+    const int stride = buffer->u.RGBA.stride;
+    const uint32_t bytes_per_px = has_alpha ? 4 : 3;
+    uint32_t y;
+    const uint32_t line_size = bytes_per_px * width;
+    const uint32_t bmp_stride = (line_size + 3) & ~3; // pad to 4
+    const uint32_t total_size = bmp_stride * height + BMP_HEADER_SIZE;
+    uint8_t bmp_header[BMP_HEADER_SIZE] = {0};
+
+    // bitmap file header
+    PutLE16(bmp_header + 0, 0x4d42);           // signature 'BM'
+    PutLE32(bmp_header + 2, total_size);       // size including header
+    PutLE32(bmp_header + 6, 0);                // reserved
+    PutLE32(bmp_header + 10, BMP_HEADER_SIZE); // offset to pixel array
+    // bitmap info header
+    PutLE32(bmp_header + 14, 40);               // DIB header size
+    PutLE32(bmp_header + 18, width);            // dimensions
+    PutLE32(bmp_header + 22, -(int)height);     // vertical flip!
+    PutLE16(bmp_header + 26, 1);                // number of planes
+    PutLE16(bmp_header + 28, bytes_per_px * 8); // bits per pixel
+    PutLE32(bmp_header + 30, 0);                // no compression (BI_RGB)
+    PutLE32(bmp_header + 34, 0);                // image size (dummy)
+    PutLE32(bmp_header + 38, 2400);             // x pixels/meter
+    PutLE32(bmp_header + 42, 2400);             // y pixels/meter
+    PutLE32(bmp_header + 46, 0);                // number of palette colors
+    PutLE32(bmp_header + 50, 0);                // important color count
+
+    // TODO(skal): color profile
+
+    // write header
+    if (fwrite(bmp_header, sizeof(bmp_header), 1, fout) != 1) {
+        return 0;
+    }
+
+    // write pixel array
+    for (y = 0; y < height; ++y) {
+        if (fwrite(rgba + y * stride, line_size, 1, fout) != 1) {
+            return 0;
+        }
+        // write padding zeroes
+        if (bmp_stride != line_size) {
+            const uint8_t zeroes[3] = {0};
+            if (fwrite(zeroes, bmp_stride - line_size, 1, fout) != 1) {
+                return 0;
+            }
+        }
+    }
+    return 1;
+}
+#undef BMP_HEADER_SIZE
+
+#define NUM_IFD_ENTRIES 15
+#define EXTRA_DATA_SIZE 16
+// 10b for signature/header + n * 12b entries + 4b for IFD terminator:
+#define EXTRA_DATA_OFFSET (10 + 12 * NUM_IFD_ENTRIES + 4)
+#define TIFF_HEADER_SIZE (EXTRA_DATA_OFFSET + EXTRA_DATA_SIZE)
+
+static int WriteTIFF(FILE* fout, const WebPDecBuffer* const buffer) {
+    const int has_alpha = (buffer->colorspace != MODE_RGB);
+    const uint32_t width = buffer->width;
+    const uint32_t height = buffer->height;
+    const uint8_t* const rgba = buffer->u.RGBA.rgba;
+    const int stride = buffer->u.RGBA.stride;
+    const uint8_t bytes_per_px = has_alpha ? 4 : 3;
+    // For non-alpha case, we omit tag 0x152 (ExtraSamples).
+    const uint8_t num_ifd_entries = has_alpha ? NUM_IFD_ENTRIES : NUM_IFD_ENTRIES - 1;
+    uint8_t tiff_header[TIFF_HEADER_SIZE] = {
+        0x49, 0x49, 0x2a, 0x00, // little endian signature
+        8, 0, 0, 0,             // offset to the unique IFD that follows
+        // IFD (offset = 8). Entries must be written in increasing tag order.
+        num_ifd_entries, 0,                       // Number of entries in the IFD (12 bytes each).
+        0x00, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, //  10: Width  (TBD)
+        0x01, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, //  22: Height (TBD)
+        0x02, 0x01, 3, 0, bytes_per_px, 0, 0, 0,  //  34: BitsPerSample: 8888
+        EXTRA_DATA_OFFSET + 0, 0, 0, 0, 0x03, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0, //  46: Compression: none
+        0x06, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,                                 //  58: Photometric: RGB
+        0x11, 0x01, 4, 0, 1, 0, 0, 0,                                             //  70: Strips offset:
+        TIFF_HEADER_SIZE, 0, 0, 0,                                                //      data follows header
+        0x12, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,                                 //  82: Orientation: topleft
+        0x15, 0x01, 3, 0, 1, 0, 0, 0,                                             //  94: SamplesPerPixels
+        bytes_per_px, 0, 0, 0, 0x16, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0,          // 106: Rows per strip (TBD)
+        0x17, 0x01, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0,                                 // 118: StripByteCount (TBD)
+        0x1a, 0x01, 5, 0, 1, 0, 0, 0,                                             // 130: X-resolution
+        EXTRA_DATA_OFFSET + 8, 0, 0, 0, 0x1b, 0x01, 5, 0, 1, 0, 0, 0,             // 142: Y-resolution
+        EXTRA_DATA_OFFSET + 8, 0, 0, 0, 0x1c, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0, // 154: PlanarConfiguration
+        0x28, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0,                                 // 166: ResolutionUnit (inch)
+        0x52, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0,                                 // 178: ExtraSamples: rgbA
+        0, 0, 0, 0,                                                               // 190: IFD terminator
+        // EXTRA_DATA_OFFSET:
+        8, 0, 8, 0, 8, 0, 8, 0, // BitsPerSample
+        72, 0, 0, 0, 1, 0, 0, 0 // 72 pixels/inch, for X/Y-resolution
+    };
+    uint32_t y;
+
+    // Fill placeholders in IFD:
+    PutLE32(tiff_header + 10 + 8, width);
+    PutLE32(tiff_header + 22 + 8, height);
+    PutLE32(tiff_header + 106 + 8, height);
+    PutLE32(tiff_header + 118 + 8, width * bytes_per_px * height);
+    if (!has_alpha) PutLE32(tiff_header + 178, 0); // IFD terminator
+
+    // write header
+    if (fwrite(tiff_header, sizeof(tiff_header), 1, fout) != 1) {
+        return 0;
+    }
+    // write pixel values
+    for (y = 0; y < height; ++y) {
+        if (fwrite(rgba + y * stride, bytes_per_px, width, fout) != width) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+#undef TIFF_HEADER_SIZE
+#undef EXTRA_DATA_OFFSET
+#undef EXTRA_DATA_SIZE
+#undef NUM_IFD_ENTRIES
+
+static int WriteAlphaPlane(FILE* fout, const WebPDecBuffer* const buffer) {
+    const uint32_t width = buffer->width;
+    const uint32_t height = buffer->height;
+    const uint8_t* const a = buffer->u.YUVA.a;
+    const int a_stride = buffer->u.YUVA.a_stride;
+    uint32_t y;
+    assert(a != NULL);
+    fprintf(fout, "P5\n%u %u\n255\n", width, height);
+    for (y = 0; y < height; ++y) {
+        if (fwrite(a + y * a_stride, width, 1, fout) != 1) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+// format=PGM: save a grayscale PGM file using the IMC4 layout
+// (http://www.fourcc.org/yuv.php#IMC4). This is a very convenient format for
+// viewing the samples, esp. for odd dimensions.
+// format=YUV: just save the Y/U/V/A planes sequentially without header.
+static int WritePGMOrYUV(FILE* fout, const WebPDecBuffer* const buffer, OutputFileFormat format) {
+    const int width = buffer->width;
+    const int height = buffer->height;
+    const WebPYUVABuffer* const yuv = &buffer->u.YUVA;
+    int ok = 1;
+    int y;
+    const int pad = (format == YUV) ? 0 : 1;
+    const int uv_width = (width + 1) / 2;
+    const int uv_height = (height + 1) / 2;
+    const int out_stride = (width + pad) & ~pad;
+    const int a_height = yuv->a ? height : 0;
+    if (format == PGM) {
+        fprintf(fout, "P5\n%d %d\n255\n", out_stride, height + uv_height + a_height);
+    }
+    for (y = 0; ok && y < height; ++y) {
+        ok &= (fwrite(yuv->y + y * yuv->y_stride, width, 1, fout) == 1);
+        if (format == PGM) {
+            if (width & 1) fputc(0, fout); // padding byte
+        }
+    }
+    if (format == PGM) { // IMC4 layout
+        for (y = 0; ok && y < uv_height; ++y) {
+            ok &= (fwrite(yuv->u + y * yuv->u_stride, uv_width, 1, fout) == 1);
+            ok &= (fwrite(yuv->v + y * yuv->v_stride, uv_width, 1, fout) == 1);
+        }
+    } else {
+        for (y = 0; ok && y < uv_height; ++y) {
+            ok &= (fwrite(yuv->u + y * yuv->u_stride, uv_width, 1, fout) == 1);
+        }
+        for (y = 0; ok && y < uv_height; ++y) {
+            ok &= (fwrite(yuv->v + y * yuv->v_stride, uv_width, 1, fout) == 1);
+        }
+    }
+    for (y = 0; ok && y < a_height; ++y) {
+        ok &= (fwrite(yuv->a + y * yuv->a_stride, width, 1, fout) == 1);
+        if (format == PGM) {
+            if (width & 1) fputc(0, fout); // padding byte
+        }
+    }
+    return ok;
+}
+
+static int SaveOutput(const WebPDecBuffer* const buffer, OutputFileFormat format, const char* const out_file) {
+    FILE* fout = NULL;
+    int needs_open_file = 1;
+    const int use_stdout = !strcmp(out_file, "-");
+    int ok = 1;
+    Stopwatch stop_watch;
+
+    if (verbose) {
+        StopwatchReset(&stop_watch);
+    }
+
+#ifdef HAVE_WINCODEC_H
+    needs_open_file = (format != PNG);
+#endif
+
+    if (needs_open_file) {
+        fout = use_stdout ? ExUtilSetBinaryMode(stdout) : fopen(out_file, "wb");
+        if (fout == NULL) {
+            fprintf(stderr, "Error opening output file %s\n", out_file);
+            return 0;
+        }
+    }
+
+    if (format == PNG) {
+#ifdef HAVE_WINCODEC_H
+        ok &= WritePNG(out_file, use_stdout, buffer);
+#else
+        ok &= WritePNG(fout, buffer);
+#endif
+    } else if (format == PAM) {
+        ok &= WritePPM(fout, buffer, 1);
+    } else if (format == PPM) {
+        ok &= WritePPM(fout, buffer, 0);
+    } else if (format == BMP) {
+        ok &= WriteBMP(fout, buffer);
+    } else if (format == TIFF) {
+        ok &= WriteTIFF(fout, buffer);
+    } else if (format == PGM || format == YUV) {
+        ok &= WritePGMOrYUV(fout, buffer, format);
+    } else if (format == ALPHA_PLANE_ONLY) {
+        ok &= WriteAlphaPlane(fout, buffer);
+    }
+    if (fout != NULL && fout != stdout) {
+        fclose(fout);
+    }
+    if (ok) {
+        if (!quiet) {
+            if (use_stdout) {
+                fprintf(stderr, "Saved to stdout\n");
+            } else {
+                fprintf(stderr, "Saved file %s\n", out_file);
+            }
+        }
+        if (verbose) {
+            const double write_time = StopwatchReadAndReset(&stop_watch);
+            fprintf(stderr, "Time to write output: %.3fs\n", write_time);
+        }
+    } else {
+        if (use_stdout) {
+            fprintf(stderr, "Error writing to stdout !!\n");
+        } else {
+            fprintf(stderr, "Error writing file %s !!\n", out_file);
+        }
+    }
+    return ok;
+}
+
+static void Help(void) {
+    printf(
+        "Usage: dwebp in_file [options] [-o out_file]\n\n"
+        "Decodes the WebP image file to PNG format [Default]\n"
+        "Use following options to convert into alternate image formats:\n"
+        "  -pam ......... save the raw RGBA samples as a color PAM\n"
+        "  -ppm ......... save the raw RGB samples as a color PPM\n"
+        "  -bmp ......... save as uncompressed BMP format\n"
+        "  -tiff ........ save as uncompressed TIFF format\n"
+        "  -pgm ......... save the raw YUV samples as a grayscale PGM\n"
+        "                 file with IMC4 layout\n"
+        "  -yuv ......... save the raw YUV samples in flat layout\n"
+        "\n"
+        " Other options are:\n"
+        "  -version  .... print version number and exit\n"
+        "  -nofancy ..... don't use the fancy YUV420 upscaler\n"
+        "  -nofilter .... disable in-loop filtering\n"
+        "  -nodither .... disable dithering\n"
+        "  -dither <d> .. dithering strength (in 0..100)\n"
+        "  -alpha_dither  use alpha-plane dithering if needed\n"
+        "  -mt .......... use multi-threading\n"
+        "  -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
+        "  -resize <w> <h> ......... scale the output (*after* any cropping)\n"
+        "  -flip ........ flip the output vertically\n"
+        "  -alpha ....... only save the alpha plane\n"
+        "  -incremental . use incremental decoding (useful for tests)\n"
+        "  -h     ....... this help message\n"
+        "  -v     ....... verbose (e.g. print encoding/decoding times)\n"
+        "  -quiet ....... quiet mode, don't print anything\n"
+#ifndef WEBP_DLL
+        "  -noasm ....... disable all assembly optimizations\n"
+#endif
+        );
+}
+
+static const char* const kFormatType[] = {"unspecified", "lossy", "lossless"};
+
+// #define RUN_XINLINX_HW
+
+#ifdef RUN_XINLINX_HW
+
+#define TEST_CASE_NUM 4
+#define TEST_PARA_NUM 10
+char* test_argv[TEST_CASE_NUM][TEST_PARA_NUM] = {
+    {
+        "dwebp.exe", "../../lossless.webp", "-o", "../../lossless.png", NULL,
+    },
+    {
+        "dwebp.exe", "-n", "4", "../../lossless.webp", "-o", "../../lossless_mt.png", NULL,
+    },
+    {
+        "dwebp.exe", "../../lossy.webp", "-o", "../../lossy.png", NULL,
+    },
+    {
+        "dwebp.exe", "-mt", "../../lossy.webp", "-o", "../../lossy-mt.png", NULL,
+    }};
+
+int org_main(int argc, const char* argv[]);
+
+int main(int argc, const char* argv[]) {
+    int ret = 0;
+    int index_case = 0;
+    for (index_case = 0; index_case < TEST_CASE_NUM; index_case++) {
+        int i = 0;
+        int _argc = 0;
+
+        while (test_argv[index_case][++_argc] != NULL) {
+        }
+        // fprintf(stderr, "_argc:%d\n", _argc);
+        ret = org_main(_argc, test_argv[index_case]);
+        if (ret != 0) {
+            return ret;
+        }
+        ResetDecodeProfilingData();
+    }
+
+    return 0;
+}
+
+int org_main(int argc, const char* argv[]) {
+    int i = 0;
+    for (i = 0; i < argc; i++) {
+        fprintf(stderr, "%s ", argv[i]);
+    }
+    fprintf(stderr, "\n");
+
+#else
+int main(int argc, const char* argv[]) {
+#endif
+    // fprintf(stderr, "cpu number usable:%d , total:%d\n", get_nprocs(), get_nprocs_conf());
+
+    int ok = 0;
+    const char* in_file = NULL;
+    const char* out_file = NULL;
+
+    WebPDecoderConfig config;
+    WebPDecBuffer* const output_buffer = &config.output;
+    WebPBitstreamFeatures* const bitstream = &config.input;
+    OutputFileFormat format = PNG;
+    int incremental = 0;
+    int c;
+
+    if (!WebPInitDecoderConfig(&config)) {
+        fprintf(stderr, "Library version mismatch!\n");
+        return -1;
+    }
+
+    for (c = 1; c < argc; ++c) {
+        int parse_error = 0;
+        if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
+            Help();
+            return 0;
+        } else if (!strcmp(argv[c], "-o") && c < argc - 1) {
+            out_file = argv[++c];
+        } else if (!strcmp(argv[c], "-alpha")) {
+            format = ALPHA_PLANE_ONLY;
+        } else if (!strcmp(argv[c], "-nofancy")) {
+            config.options.no_fancy_upsampling = 1;
+        } else if (!strcmp(argv[c], "-nofilter")) {
+            config.options.bypass_filtering = 1;
+        } else if (!strcmp(argv[c], "-pam")) {
+            format = PAM;
+        } else if (!strcmp(argv[c], "-ppm")) {
+            format = PPM;
+        } else if (!strcmp(argv[c], "-bmp")) {
+            format = BMP;
+        } else if (!strcmp(argv[c], "-tiff")) {
+            format = TIFF;
+        } else if (!strcmp(argv[c], "-quiet")) {
+            quiet = 1;
+        } else if (!strcmp(argv[c], "-version")) {
+            const int version = WebPGetDecoderVersion();
+            printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+            return 0;
+        } else if (!strcmp(argv[c], "-pgm")) {
+            format = PGM;
+        } else if (!strcmp(argv[c], "-yuv")) {
+            format = YUV;
+        } else if (!strcmp(argv[c], "-mt")) {
+            config.options.use_threads = 1;
+        } else if (!strcmp(argv[c], "-alpha_dither")) {
+            config.options.alpha_dithering_strength = 100;
+        } else if (!strcmp(argv[c], "-nodither")) {
+            config.options.dithering_strength = 0;
+        } else if (!strcmp(argv[c], "-dither") && c < argc - 1) {
+            config.options.dithering_strength = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-n") && c < argc - 1) {
+#ifdef WEBP_USE_THREAD
+            config.options.thread_number = ExUtilGetInt(argv[++c], 0, &parse_error);
+#endif
+        } else if (!strcmp(argv[c], "-crop") && c < argc - 4) {
+            config.options.use_cropping = 1;
+            config.options.crop_left = ExUtilGetInt(argv[++c], 0, &parse_error);
+            config.options.crop_top = ExUtilGetInt(argv[++c], 0, &parse_error);
+            config.options.crop_width = ExUtilGetInt(argv[++c], 0, &parse_error);
+            config.options.crop_height = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if ((!strcmp(argv[c], "-scale") || !strcmp(argv[c], "-resize")) &&
+                   c < argc - 2) { // '-scale' is left for compatibility
+            config.options.use_scaling = 1;
+            config.options.scaled_width = ExUtilGetInt(argv[++c], 0, &parse_error);
+            config.options.scaled_height = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-flip")) {
+            config.options.flip = 1;
+        } else if (!strcmp(argv[c], "-v")) {
+            verbose = 1;
+#ifndef WEBP_DLL
+        } else if (!strcmp(argv[c], "-noasm")) {
+            VP8GetCPUInfo = NULL;
+#endif
+        } else if (!strcmp(argv[c], "-incremental")) {
+            incremental = 1;
+        } else if (!strcmp(argv[c], "--")) {
+            if (c < argc - 1) in_file = argv[++c];
+            break;
+        } else if (argv[c][0] == '-') {
+            fprintf(stderr, "Unknown option '%s'\n", argv[c]);
+            Help();
+            return -1;
+        } else {
+            in_file = argv[c];
+        }
+
+        if (parse_error) {
+            Help();
+            return -1;
+        }
+    }
+
+    if (in_file == NULL) {
+        fprintf(stderr, "missing input file!!\n");
+        Help();
+        return -1;
+    }
+
+    if (quiet) verbose = 0;
+
+    {
+        VP8StatusCode status = VP8_STATUS_OK;
+        size_t data_size = 0;
+        const uint8_t* data = NULL;
+        if (!ExUtilLoadWebP(in_file, &data, &data_size, bitstream)) {
+            return -1;
+        }
+
+        switch (format) {
+            case PNG:
+#ifdef HAVE_WINCODEC_H
+                output_buffer->colorspace = bitstream->has_alpha ? MODE_BGRA : MODE_BGR;
+#else
+                output_buffer->colorspace = bitstream->has_alpha ? MODE_RGBA : MODE_RGB;
+#endif
+                break;
+            case PAM:
+                output_buffer->colorspace = MODE_RGBA;
+                break;
+            case PPM:
+                output_buffer->colorspace = MODE_RGB; // drops alpha for PPM
+                break;
+            case BMP:
+                output_buffer->colorspace = bitstream->has_alpha ? MODE_BGRA : MODE_BGR;
+                break;
+            case TIFF: // note: force pre-multiplied alpha
+                output_buffer->colorspace = bitstream->has_alpha ? MODE_rgbA : MODE_RGB;
+                break;
+            case PGM:
+            case YUV:
+                output_buffer->colorspace = bitstream->has_alpha ? MODE_YUVA : MODE_YUV;
+                break;
+            case ALPHA_PLANE_ONLY:
+                output_buffer->colorspace = MODE_YUVA;
+                break;
+            default:
+                free((void*)data);
+                return -1;
+        }
+
+        fprintf(stderr, "start profiling.......\n");
+
+        if (incremental) {
+            status = ExUtilDecodeWebPIncremental(data, data_size, verbose, &config);
+        } else {
+            status = ExUtilDecodeWebP(data, data_size, verbose, &config);
+        }
+
+        // display all profiling data
+        DisplayDecodeProfilingResult();
+
+        fprintf(stderr, "stop profiling.......\n");
+
+        free((void*)data);
+        ok = (status == VP8_STATUS_OK);
+        if (!ok) {
+            ExUtilPrintWebPError(in_file, status);
+            goto Exit;
+        }
+    }
+
+    if (out_file != NULL) {
+        if (!quiet) {
+            fprintf(stderr,
+                    "Decoded %s. Dimensions: %d x %d %s. Format: %s. "
+                    "Now saving...\n",
+                    in_file, output_buffer->width, output_buffer->height, bitstream->has_alpha ? " (with alpha)" : "",
+                    kFormatType[bitstream->format]);
+        }
+        ok = SaveOutput(output_buffer, format, out_file);
+    } else {
+        if (!quiet) {
+            fprintf(stderr,
+                    "File %s can be decoded "
+                    "(dimensions: %d x %d %s. Format: %s).\n",
+                    in_file, output_buffer->width, output_buffer->height, bitstream->has_alpha ? " (with alpha)" : "",
+                    kFormatType[bitstream->format]);
+            fprintf(stderr,
+                    "Nothing written; "
+                    "use -o flag to save the result as e.g. PNG.\n");
+        }
+    }
+Exit:
+    WebPFreeDecBuffer(output_buffer);
+    return ok ? 0 : -1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/example_util.c b/codec/L2/demos/webpEnc/host/example_util.c
new file mode 100644
index 0000000000..e246e6bafa
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/example_util.c
@@ -0,0 +1,268 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Utility functions used by the example programs.
+//
+
+#include "./example_util.h"
+
+#if defined(_WIN32)
+#include <fcntl.h> // for _O_BINARY
+#include <io.h>    // for _setmode()
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webp/decode.h"
+#include "./stopwatch.h"
+
+//------------------------------------------------------------------------------
+// String parsing
+
+uint32_t ExUtilGetUInt(const char* const v, int base, int* const error) {
+    char* end = NULL;
+    const uint32_t n = (v != NULL) ? (uint32_t)strtoul(v, &end, base) : 0u;
+    if (end == v && error != NULL && !*error) {
+        *error = 1;
+        fprintf(stderr, "Error! '%s' is not an integer.\n", (v != NULL) ? v : "(null)");
+    }
+    return n;
+}
+
+int ExUtilGetInt(const char* const v, int base, int* const error) {
+    return (int)ExUtilGetUInt(v, base, error);
+}
+
+float ExUtilGetFloat(const char* const v, int* const error) {
+    char* end = NULL;
+    const float f = (v != NULL) ? (float)strtod(v, &end) : 0.f;
+    if (end == v && error != NULL && !*error) {
+        *error = 1;
+        fprintf(stderr, "Error! '%s' is not a floating point number.\n", (v != NULL) ? v : "(null)");
+    }
+    return f;
+}
+
+// -----------------------------------------------------------------------------
+// File I/O
+
+FILE* ExUtilSetBinaryMode(FILE* file) {
+#if defined(_WIN32)
+    if (_setmode(_fileno(file), _O_BINARY) == -1) {
+        fprintf(stderr, "Failed to reopen file in O_BINARY mode.\n");
+        return NULL;
+    }
+#endif
+    return file;
+}
+
+int ExUtilReadFromStdin(const uint8_t** data, size_t* data_size) {
+    static const size_t kBlockSize = 16384; // default initial size
+    size_t max_size = 0;
+    size_t size = 0;
+    uint8_t* input = NULL;
+
+    if (data == NULL || data_size == NULL) return 0;
+    *data = NULL;
+    *data_size = 0;
+
+    if (!ExUtilSetBinaryMode(stdin)) return 0;
+
+    while (!feof(stdin)) {
+        // We double the buffer size each time and read as much as possible.
+        const size_t extra_size = (max_size == 0) ? kBlockSize : max_size;
+        void* const new_data = realloc(input, max_size + extra_size);
+        if (new_data == NULL) goto Error;
+        input = (uint8_t*)new_data;
+        max_size += extra_size;
+        size += fread(input + size, 1, extra_size, stdin);
+        if (size < max_size) break;
+    }
+    if (ferror(stdin)) goto Error;
+    *data = input;
+    *data_size = size;
+    return 1;
+
+Error:
+    free(input);
+    fprintf(stderr, "Could not read from stdin\n");
+    return 0;
+}
+
+int ExUtilReadFile(const char* const file_name, const uint8_t** data, size_t* data_size) {
+    int ok;
+    void* file_data;
+    size_t file_size;
+    FILE* in;
+    const int from_stdin = (file_name == NULL) || !strcmp(file_name, "-");
+
+    if (from_stdin) return ExUtilReadFromStdin(data, data_size);
+
+    if (data == NULL || data_size == NULL) return 0;
+    *data = NULL;
+    *data_size = 0;
+
+    in = fopen(file_name, "rb");
+    if (in == NULL) {
+        fprintf(stderr, "cannot open input file '%s'\n", file_name);
+        return 0;
+    }
+    fseek(in, 0, SEEK_END);
+    file_size = ftell(in);
+    fseek(in, 0, SEEK_SET);
+    file_data = malloc(file_size);
+    if (file_data == NULL) return 0;
+    ok = (fread(file_data, file_size, 1, in) == 1);
+    fclose(in);
+
+    if (!ok) {
+        fprintf(stderr, "Could not read %d bytes of data from file %s\n", (int)file_size, file_name);
+        free(file_data);
+        return 0;
+    }
+    *data = (uint8_t*)file_data;
+    *data_size = file_size;
+    return 1;
+}
+
+int ExUtilWriteFile(const char* const file_name, const uint8_t* data, size_t data_size) {
+    int ok;
+    FILE* out;
+    const int to_stdout = (file_name == NULL) || !strcmp(file_name, "-");
+
+    if (data == NULL) {
+        return 0;
+    }
+    out = to_stdout ? stdout : fopen(file_name, "wb");
+    if (out == NULL) {
+        fprintf(stderr, "Error! Cannot open output file '%s'\n", file_name);
+        return 0;
+    }
+    ok = (fwrite(data, data_size, 1, out) == 1);
+    if (out != stdout) fclose(out);
+    return ok;
+}
+
+//------------------------------------------------------------------------------
+// WebP decoding
+
+static const char* const kStatusMessages[VP8_STATUS_NOT_ENOUGH_DATA + 1] = {
+    "OK",        "OUT_OF_MEMORY", "INVALID_PARAM",  "BITSTREAM_ERROR", "UNSUPPORTED_FEATURE",
+    "SUSPENDED", "USER_ABORT",    "NOT_ENOUGH_DATA"};
+
+static void PrintAnimationWarning(const WebPDecoderConfig* const config) {
+    if (config->input.has_animation) {
+        fprintf(stderr,
+                "Error! Decoding of an animated WebP file is not supported.\n"
+                "       Use webpmux to extract the individual frames or\n"
+                "       vwebp to view this image.\n");
+    }
+}
+
+void ExUtilPrintWebPError(const char* const in_file, int status) {
+    fprintf(stderr, "Decoding of %s failed.\n", in_file);
+    fprintf(stderr, "Status: %d", status);
+    if (status >= VP8_STATUS_OK && status <= VP8_STATUS_NOT_ENOUGH_DATA) {
+        fprintf(stderr, "(%s)", kStatusMessages[status]);
+    }
+    fprintf(stderr, "\n");
+}
+
+int ExUtilLoadWebP(const char* const in_file,
+                   const uint8_t** data,
+                   size_t* data_size,
+                   WebPBitstreamFeatures* bitstream) {
+    VP8StatusCode status;
+    WebPBitstreamFeatures local_features;
+    if (!ExUtilReadFile(in_file, data, data_size)) return 0;
+
+    if (bitstream == NULL) {
+        bitstream = &local_features;
+    }
+
+    status = WebPGetFeatures(*data, *data_size, bitstream);
+    if (status != VP8_STATUS_OK) {
+        free((void*)*data);
+        *data = NULL;
+        *data_size = 0;
+        ExUtilPrintWebPError(in_file, status);
+        return 0;
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+
+VP8StatusCode ExUtilDecodeWebP(const uint8_t* const data,
+                               size_t data_size,
+                               int verbose,
+                               WebPDecoderConfig* const config) {
+    Stopwatch stop_watch;
+    VP8StatusCode status = VP8_STATUS_OK;
+    if (config == NULL) return VP8_STATUS_INVALID_PARAM;
+
+    PrintAnimationWarning(config);
+
+    StopwatchReset(&stop_watch);
+
+    // Decoding call.
+    status = WebPDecode(data, data_size, config);
+
+    if (verbose) {
+        const double decode_time = StopwatchReadAndReset(&stop_watch);
+        fprintf(stderr, "Time to decode picture: %.3fs\n", decode_time);
+    }
+    return status;
+}
+
+VP8StatusCode ExUtilDecodeWebPIncremental(const uint8_t* const data,
+                                          size_t data_size,
+                                          int verbose,
+                                          WebPDecoderConfig* const config) {
+    Stopwatch stop_watch;
+    VP8StatusCode status = VP8_STATUS_OK;
+    if (config == NULL) return VP8_STATUS_INVALID_PARAM;
+
+    PrintAnimationWarning(config);
+
+    StopwatchReset(&stop_watch);
+
+    // Decoding call.
+    {
+        WebPIDecoder* const idec = WebPIDecode(data, data_size, config);
+        if (idec == NULL) {
+            fprintf(stderr, "Failed during WebPINewDecoder().\n");
+            return VP8_STATUS_OUT_OF_MEMORY;
+        } else {
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+            size_t size = 0;
+            const size_t incr = 2 + (data_size / 20);
+            while (size < data_size) {
+                size_t next_size = size + (rand() % incr);
+                if (next_size > data_size) next_size = data_size;
+                status = WebPIUpdate(idec, data, next_size);
+                if (status != VP8_STATUS_OK && status != VP8_STATUS_SUSPENDED) break;
+                size = next_size;
+            }
+#else
+            status = WebPIUpdate(idec, data, data_size);
+#endif
+            WebPIDelete(idec);
+        }
+    }
+
+    if (verbose) {
+        const double decode_time = StopwatchReadAndReset(&stop_watch);
+        fprintf(stderr, "Time to decode picture: %.3fs\n", decode_time);
+    }
+    return status;
+}
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/example_util.h b/codec/L2/demos/webpEnc/host/example_util.h
new file mode 100644
index 0000000000..78b3ab2397
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/example_util.h
@@ -0,0 +1,91 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Utility functions used by the example programs.
+//
+
+#ifndef WEBP_EXAMPLES_EXAMPLE_UTIL_H_
+#define WEBP_EXAMPLES_EXAMPLE_UTIL_H_
+
+#include <stdio.h>
+#include "webp/decode.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// String parsing
+
+// Parses 'v' using strto(ul|l|d)(). If error is non-NULL, '*error' is set to
+// true on failure while on success it is left unmodified to allow chaining of
+// calls. An error is only printed on the first occurrence.
+uint32_t ExUtilGetUInt(const char* const v, int base, int* const error);
+int ExUtilGetInt(const char* const v, int base, int* const error);
+float ExUtilGetFloat(const char* const v, int* const error);
+
+//------------------------------------------------------------------------------
+// File I/O
+
+// Reopen file in binary (O_BINARY) mode.
+// Returns 'file' on success, NULL otherwise.
+FILE* ExUtilSetBinaryMode(FILE* file);
+
+// Allocates storage for entire file 'file_name' and returns contents and size
+// in 'data' and 'data_size'. Returns 1 on success, 0 otherwise. '*data' should
+// be deleted using free().
+// If 'file_name' is NULL or equal to "-", input is read from stdin by calling
+// the function ExUtilReadFromStdin().
+int ExUtilReadFile(const char* const file_name, const uint8_t** data, size_t* data_size);
+
+// Same as ExUtilReadFile(), but reads until EOF from stdin instead.
+int ExUtilReadFromStdin(const uint8_t** data, size_t* data_size);
+
+// Write a data segment into a file named 'file_name'. Returns true if ok.
+// If 'file_name' is NULL or equal to "-", output is written to stdout.
+int ExUtilWriteFile(const char* const file_name, const uint8_t* data, size_t data_size);
+
+//------------------------------------------------------------------------------
+// WebP decoding
+
+// Prints an informative error message regarding decode failure of 'in_file'.
+// 'status' is treated as a VP8StatusCode and if valid will be printed as a
+// text string.
+void ExUtilPrintWebPError(const char* const in_file, int status);
+
+// Reads a WebP from 'in_file', returning the contents and size in 'data' and
+// 'data_size'. If not NULL, 'bitstream' is populated using WebPGetFeatures().
+// Returns true on success.
+int ExUtilLoadWebP(const char* const in_file,
+                   const uint8_t** data,
+                   size_t* data_size,
+                   WebPBitstreamFeatures* bitstream);
+
+// Decodes the WebP contained in 'data'.
+// 'config' is a structure previously initialized by WebPInitDecoderConfig().
+// 'config->output' should have the desired colorspace selected. 'verbose' will
+// cause decode timing to be reported.
+// Returns the decoder status. On success 'config->output' will contain the
+// decoded picture.
+VP8StatusCode ExUtilDecodeWebP(const uint8_t* const data,
+                               size_t data_size,
+                               int verbose,
+                               WebPDecoderConfig* const config);
+
+// Same as ExUtilDecodeWebP(), but using the incremental decoder.
+VP8StatusCode ExUtilDecodeWebPIncremental(const uint8_t* const data,
+                                          size_t data_size,
+                                          int verbose,
+                                          WebPDecoderConfig* const config);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_EXAMPLES_EXAMPLE_UTIL_H_
diff --git a/codec/L2/demos/webpEnc/host/gif2webp.c b/codec/L2/demos/webpEnc/host/gif2webp.c
new file mode 100644
index 0000000000..3b2ebb526e
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/gif2webp.c
@@ -0,0 +1,542 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  simple tool to convert animated GIFs to WebP
+//
+// Authors: Skal (pascal.massimino@gmail.com)
+//          Urvang (urvang@google.com)
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#ifdef WEBP_HAVE_GIF
+
+#include <gif_lib.h>
+#include "webp/encode.h"
+#include "webp/mux.h"
+#include "./example_util.h"
+#include "./gifdec.h"
+
+//------------------------------------------------------------------------------
+
+static int transparent_index = GIF_INDEX_INVALID; // Opaque by default.
+
+static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
+    "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA", "WEBP_MUX_MEMORY_ERROR",
+    "WEBP_MUX_NOT_ENOUGH_DATA"};
+
+static const char* ErrorString(WebPMuxError err) {
+    assert(err <= WEBP_MUX_NOT_FOUND && err >= WEBP_MUX_NOT_ENOUGH_DATA);
+    return kErrorMessages[-err];
+}
+
+enum { METADATA_ICC = (1 << 0), METADATA_XMP = (1 << 1), METADATA_ALL = METADATA_ICC | METADATA_XMP };
+
+//------------------------------------------------------------------------------
+
+static void Help(void) {
+    printf("Usage:\n");
+    printf(" gif2webp [options] gif_file -o webp_file\n");
+    printf("Options:\n");
+    printf("  -h / -help  ............ this help\n");
+    printf("  -lossy ................. encode image using lossy compression\n");
+    printf(
+        "  -mixed ................. for each frame in the image, pick lossy\n"
+        "                           or lossless compression heuristically\n");
+    printf("  -q <float> ............. quality factor (0:small..100:big)\n");
+    printf("  -m <int> ............... compression method (0=fast, 6=slowest)\n");
+    printf(
+        "  -min_size .............. minimize output size (default:off)\n"
+        "                           lossless compression by default; can be\n"
+        "                           combined with -q, -m, -lossy or -mixed\n"
+        "                           options\n");
+    printf("  -kmin <int> ............ min distance between key frames\n");
+    printf("  -kmax <int> ............ max distance between key frames\n");
+    printf("  -f <int> ............... filter strength (0=off..100)\n");
+    printf("  -metadata <string> ..... comma separated list of metadata to\n");
+    printf("                           ");
+    printf("copy from the input to the output if present\n");
+    printf(
+        "                           "
+        "Valid values: all, none, icc, xmp (default)\n");
+    printf("  -mt .................... use multi-threading if available\n");
+    printf("\n");
+    printf("  -version ............... print version number and exit\n");
+    printf("  -v ..................... verbose\n");
+    printf("  -quiet ................. don't print anything\n");
+    printf("\n");
+}
+
+//------------------------------------------------------------------------------
+
+int main(int argc, const char* argv[]) {
+    int verbose = 0;
+    int gif_error = GIF_ERROR;
+    WebPMuxError err = WEBP_MUX_OK;
+    int ok = 0;
+    const char *in_file = NULL, *out_file = NULL;
+    FILE* out = NULL;
+    GifFileType* gif = NULL;
+    int frame_duration = 0;
+    int frame_timestamp = 0;
+    GIFDisposeMethod orig_dispose = GIF_DISPOSE_NONE;
+
+    WebPPicture frame;               // Frame rectangle only (not disposed).
+    WebPPicture curr_canvas;         // Not disposed.
+    WebPPicture prev_canvas;         // Disposed.
+    WebPPicture prev_to_prev_canvas; // Disposed.
+
+    WebPAnimEncoder* enc = NULL;
+    WebPAnimEncoderOptions enc_options;
+    WebPConfig config;
+
+    int is_first_frame = 1; // Whether we are processing the first frame.
+    int done;
+    int c;
+    int quiet = 0;
+    WebPData webp_data;
+
+    int keep_metadata = METADATA_XMP; // ICC not output by default.
+    WebPData icc_data;
+    int stored_icc = 0; // Whether we have already stored an ICC profile.
+    WebPData xmp_data;
+    int stored_xmp = 0; // Whether we have already stored an XMP profile.
+    int loop_count = 0;
+    int stored_loop_count = 0; // Whether we have found an explicit loop count.
+    WebPMux* mux = NULL;
+
+    int default_kmin = 1; // Whether to use default kmin value.
+    int default_kmax = 1;
+
+    if (!WebPConfigInit(&config) || !WebPAnimEncoderOptionsInit(&enc_options) || !WebPPictureInit(&frame) ||
+        !WebPPictureInit(&curr_canvas) || !WebPPictureInit(&prev_canvas) || !WebPPictureInit(&prev_to_prev_canvas)) {
+        fprintf(stderr, "Error! Version mismatch!\n");
+        return -1;
+    }
+    config.lossless = 1; // Use lossless compression by default.
+
+    WebPDataInit(&webp_data);
+    WebPDataInit(&icc_data);
+    WebPDataInit(&xmp_data);
+
+    if (argc == 1) {
+        Help();
+        return 0;
+    }
+
+    for (c = 1; c < argc; ++c) {
+        int parse_error = 0;
+        if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
+            Help();
+            return 0;
+        } else if (!strcmp(argv[c], "-o") && c < argc - 1) {
+            out_file = argv[++c];
+        } else if (!strcmp(argv[c], "-lossy")) {
+            config.lossless = 0;
+        } else if (!strcmp(argv[c], "-mixed")) {
+            enc_options.allow_mixed = 1;
+            config.lossless = 0;
+        } else if (!strcmp(argv[c], "-q") && c < argc - 1) {
+            config.quality = ExUtilGetFloat(argv[++c], &parse_error);
+        } else if (!strcmp(argv[c], "-m") && c < argc - 1) {
+            config.method = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-min_size")) {
+            enc_options.minimize_size = 1;
+        } else if (!strcmp(argv[c], "-kmax") && c < argc - 1) {
+            enc_options.kmax = ExUtilGetInt(argv[++c], 0, &parse_error);
+            default_kmax = 0;
+        } else if (!strcmp(argv[c], "-kmin") && c < argc - 1) {
+            enc_options.kmin = ExUtilGetInt(argv[++c], 0, &parse_error);
+            default_kmin = 0;
+        } else if (!strcmp(argv[c], "-f") && c < argc - 1) {
+            config.filter_strength = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-metadata") && c < argc - 1) {
+            static const struct {
+                const char* option;
+                int flag;
+            } kTokens[] = {
+                {"all", METADATA_ALL}, {"none", 0}, {"icc", METADATA_ICC}, {"xmp", METADATA_XMP},
+            };
+            const size_t kNumTokens = sizeof(kTokens) / sizeof(*kTokens);
+            const char* start = argv[++c];
+            const char* const end = start + strlen(start);
+
+            keep_metadata = 0;
+            while (start < end) {
+                size_t i;
+                const char* token = strchr(start, ',');
+                if (token == NULL) token = end;
+
+                for (i = 0; i < kNumTokens; ++i) {
+                    if ((size_t)(token - start) == strlen(kTokens[i].option) &&
+                        !strncmp(start, kTokens[i].option, strlen(kTokens[i].option))) {
+                        if (kTokens[i].flag != 0) {
+                            keep_metadata |= kTokens[i].flag;
+                        } else {
+                            keep_metadata = 0;
+                        }
+                        break;
+                    }
+                }
+                if (i == kNumTokens) {
+                    fprintf(stderr, "Error! Unknown metadata type '%.*s'\n", (int)(token - start), start);
+                    Help();
+                    return -1;
+                }
+                start = token + 1;
+            }
+        } else if (!strcmp(argv[c], "-mt")) {
+            ++config.thread_level;
+        } else if (!strcmp(argv[c], "-version")) {
+            const int enc_version = WebPGetEncoderVersion();
+            const int mux_version = WebPGetMuxVersion();
+            printf("WebP Encoder version: %d.%d.%d\nWebP Mux version: %d.%d.%d\n", (enc_version >> 16) & 0xff,
+                   (enc_version >> 8) & 0xff, enc_version & 0xff, (mux_version >> 16) & 0xff, (mux_version >> 8) & 0xff,
+                   mux_version & 0xff);
+            return 0;
+        } else if (!strcmp(argv[c], "-quiet")) {
+            quiet = 1;
+        } else if (!strcmp(argv[c], "-v")) {
+            verbose = 1;
+            enc_options.verbose = 1;
+        } else if (!strcmp(argv[c], "--")) {
+            if (c < argc - 1) in_file = argv[++c];
+            break;
+        } else if (argv[c][0] == '-') {
+            fprintf(stderr, "Error! Unknown option '%s'\n", argv[c]);
+            Help();
+            return -1;
+        } else {
+            in_file = argv[c];
+        }
+
+        if (parse_error) {
+            Help();
+            return -1;
+        }
+    }
+
+    // Appropriate default kmin, kmax values for lossy and lossless.
+    if (default_kmin) {
+        enc_options.kmin = config.lossless ? 9 : 3;
+    }
+    if (default_kmax) {
+        enc_options.kmax = config.lossless ? 17 : 5;
+    }
+
+    if (!WebPValidateConfig(&config)) {
+        fprintf(stderr, "Error! Invalid configuration.\n");
+        goto End;
+    }
+
+    if (in_file == NULL) {
+        fprintf(stderr, "No input file specified!\n");
+        Help();
+        goto End;
+    }
+
+// Start the decoder object
+#if LOCAL_GIF_PREREQ(5, 0)
+    gif = DGifOpenFileName(in_file, &gif_error);
+#else
+    gif = DGifOpenFileName(in_file);
+#endif
+    if (gif == NULL) goto End;
+
+    // Loop over GIF images
+    done = 0;
+    do {
+        GifRecordType type;
+        if (DGifGetRecordType(gif, &type) == GIF_ERROR) goto End;
+
+        switch (type) {
+            case IMAGE_DESC_RECORD_TYPE: {
+                GIFFrameRect gif_rect;
+                GifImageDesc* const image_desc = &gif->Image;
+
+                if (!DGifGetImageDesc(gif)) goto End;
+
+                if (is_first_frame) {
+                    if (verbose) {
+                        printf("Canvas screen: %d x %d\n", gif->SWidth, gif->SHeight);
+                    }
+                    // Fix some broken GIF global headers that report
+                    // 0 x 0 screen dimension.
+                    if (gif->SWidth == 0 || gif->SHeight == 0) {
+                        image_desc->Left = 0;
+                        image_desc->Top = 0;
+                        gif->SWidth = image_desc->Width;
+                        gif->SHeight = image_desc->Height;
+                        if (gif->SWidth <= 0 || gif->SHeight <= 0) {
+                            goto End;
+                        }
+                        if (verbose) {
+                            printf("Fixed canvas screen dimension to: %d x %d\n", gif->SWidth, gif->SHeight);
+                        }
+                    }
+                    // Allocate current buffer.
+                    frame.width = gif->SWidth;
+                    frame.height = gif->SHeight;
+                    frame.use_argb = 1;
+                    if (!WebPPictureAlloc(&frame)) goto End;
+                    GIFClearPic(&frame, NULL);
+                    WebPPictureCopy(&frame, &curr_canvas);
+                    WebPPictureCopy(&frame, &prev_canvas);
+                    WebPPictureCopy(&frame, &prev_to_prev_canvas);
+
+                    // Background color.
+                    GIFGetBackgroundColor(gif->SColorMap, gif->SBackGroundColor, transparent_index,
+                                          &enc_options.anim_params.bgcolor);
+
+                    // Initialize encoder.
+                    enc = WebPAnimEncoderNew(curr_canvas.width, curr_canvas.height, &enc_options);
+                    if (enc == NULL) {
+                        fprintf(stderr,
+                                "Error! Could not create encoder object. Possibly due to "
+                                "a memory error.\n");
+                        goto End;
+                    }
+                    is_first_frame = 0;
+                }
+
+                // Some even more broken GIF can have sub-rect with zero width/height.
+                if (image_desc->Width == 0 || image_desc->Height == 0) {
+                    image_desc->Width = gif->SWidth;
+                    image_desc->Height = gif->SHeight;
+                }
+
+                if (!GIFReadFrame(gif, transparent_index, &gif_rect, &frame)) {
+                    goto End;
+                }
+                // Blend frame rectangle with previous canvas to compose full canvas.
+                // Note that 'curr_canvas' is same as 'prev_canvas' at this point.
+                GIFBlendFrames(&frame, &gif_rect, &curr_canvas);
+
+                if (!WebPAnimEncoderAdd(enc, &curr_canvas, frame_timestamp, &config)) {
+                    fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
+                }
+
+                // Update canvases.
+                GIFCopyPixels(&prev_canvas, &prev_to_prev_canvas);
+                GIFDisposeFrame(orig_dispose, &gif_rect, &prev_canvas, &curr_canvas);
+                GIFCopyPixels(&curr_canvas, &prev_canvas);
+
+                // Update timestamp (for next frame).
+                frame_timestamp += frame_duration;
+
+                // In GIF, graphic control extensions are optional for a frame, so we
+                // may not get one before reading the next frame. To handle this case,
+                // we reset frame properties to reasonable defaults for the next frame.
+                orig_dispose = GIF_DISPOSE_NONE;
+                frame_duration = 0;
+                transparent_index = GIF_INDEX_INVALID;
+                break;
+            }
+            case EXTENSION_RECORD_TYPE: {
+                int extension;
+                GifByteType* data = NULL;
+                if (DGifGetExtension(gif, &extension, &data) == GIF_ERROR) {
+                    goto End;
+                }
+                switch (extension) {
+                    case COMMENT_EXT_FUNC_CODE: {
+                        break; // Do nothing for now.
+                    }
+                    case GRAPHICS_EXT_FUNC_CODE: {
+                        if (!GIFReadGraphicsExtension(data, &frame_duration, &orig_dispose, &transparent_index)) {
+                            goto End;
+                        }
+                        break;
+                    }
+                    case PLAINTEXT_EXT_FUNC_CODE: {
+                        break;
+                    }
+                    case APPLICATION_EXT_FUNC_CODE: {
+                        if (data[0] != 11) break; // Chunk is too short
+                        if (!memcmp(data + 1, "NETSCAPE2.0", 11) || !memcmp(data + 1, "ANIMEXTS1.0", 11)) {
+                            if (!GIFReadLoopCount(gif, &data, &loop_count)) {
+                                goto End;
+                            }
+                            if (verbose) {
+                                fprintf(stderr, "Loop count: %d\n", loop_count);
+                            }
+                            stored_loop_count = (loop_count != 0);
+                        } else { // An extension containing metadata.
+                            // We only store the first encountered chunk of each type, and
+                            // only if requested by the user.
+                            const int is_xmp =
+                                (keep_metadata & METADATA_XMP) && !stored_xmp && !memcmp(data + 1, "XMP DataXMP", 11);
+                            const int is_icc =
+                                (keep_metadata & METADATA_ICC) && !stored_icc && !memcmp(data + 1, "ICCRGBG1012", 11);
+                            if (is_xmp || is_icc) {
+                                if (!GIFReadMetadata(gif, &data, is_xmp ? &xmp_data : &icc_data)) {
+                                    goto End;
+                                }
+                                if (is_icc) {
+                                    stored_icc = 1;
+                                } else if (is_xmp) {
+                                    stored_xmp = 1;
+                                }
+                            }
+                        }
+                        break;
+                    }
+                    default: {
+                        break; // skip
+                    }
+                }
+                while (data != NULL) {
+                    if (DGifGetExtensionNext(gif, &data) == GIF_ERROR) goto End;
+                }
+                break;
+            }
+            case TERMINATE_RECORD_TYPE: {
+                done = 1;
+                break;
+            }
+            default: {
+                if (verbose) {
+                    fprintf(stderr, "Skipping over unknown record type %d\n", type);
+                }
+                break;
+            }
+        }
+    } while (!done);
+
+    // Last NULL frame.
+    if (!WebPAnimEncoderAdd(enc, NULL, frame_timestamp, NULL)) {
+        fprintf(stderr, "Error flushing WebP muxer.\n");
+        fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
+    }
+
+    if (!WebPAnimEncoderAssemble(enc, &webp_data)) {
+        fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
+        goto End;
+    }
+
+    if (stored_loop_count || stored_icc || stored_xmp) {
+        // Re-mux to add loop count and/or metadata as needed.
+        mux = WebPMuxCreate(&webp_data, 1);
+        if (mux == NULL) {
+            fprintf(stderr, "ERROR: Could not re-mux to add loop count/metadata.\n");
+            goto End;
+        }
+        WebPDataClear(&webp_data);
+
+        if (stored_loop_count) { // Update loop count.
+            WebPMuxAnimParams new_params;
+            err = WebPMuxGetAnimationParams(mux, &new_params);
+            if (err != WEBP_MUX_OK) {
+                fprintf(stderr, "ERROR (%s): Could not fetch loop count.\n", ErrorString(err));
+                goto End;
+            }
+            new_params.loop_count = loop_count;
+            err = WebPMuxSetAnimationParams(mux, &new_params);
+            if (err != WEBP_MUX_OK) {
+                fprintf(stderr, "ERROR (%s): Could not update loop count.\n", ErrorString(err));
+                goto End;
+            }
+        }
+
+        if (stored_icc) { // Add ICCP chunk.
+            err = WebPMuxSetChunk(mux, "ICCP", &icc_data, 1);
+            if (verbose) {
+                fprintf(stderr, "ICC size: %d\n", (int)icc_data.size);
+            }
+            if (err != WEBP_MUX_OK) {
+                fprintf(stderr, "ERROR (%s): Could not set ICC chunk.\n", ErrorString(err));
+                goto End;
+            }
+        }
+
+        if (stored_xmp) { // Add XMP chunk.
+            err = WebPMuxSetChunk(mux, "XMP ", &xmp_data, 1);
+            if (verbose) {
+                fprintf(stderr, "XMP size: %d\n", (int)xmp_data.size);
+            }
+            if (err != WEBP_MUX_OK) {
+                fprintf(stderr, "ERROR (%s): Could not set XMP chunk.\n", ErrorString(err));
+                goto End;
+            }
+        }
+
+        err = WebPMuxAssemble(mux, &webp_data);
+        if (err != WEBP_MUX_OK) {
+            fprintf(stderr,
+                    "ERROR (%s): Could not assemble when re-muxing to add "
+                    "loop count/metadata.\n",
+                    ErrorString(err));
+            goto End;
+        }
+    }
+
+    if (out_file != NULL) {
+        if (!ExUtilWriteFile(out_file, webp_data.bytes, webp_data.size)) {
+            fprintf(stderr, "Error writing output file: %s\n", out_file);
+            goto End;
+        }
+        if (!quiet) {
+            fprintf(stderr, "Saved output file (%d bytes): %s\n", (int)webp_data.size, out_file);
+        }
+    } else {
+        if (!quiet) {
+            fprintf(stderr,
+                    "Nothing written; use -o flag to save the result "
+                    "(%d bytes).\n",
+                    (int)webp_data.size);
+        }
+    }
+
+    // All OK.
+    ok = 1;
+    gif_error = GIF_OK;
+
+End:
+    WebPDataClear(&icc_data);
+    WebPDataClear(&xmp_data);
+    WebPMuxDelete(mux);
+    WebPDataClear(&webp_data);
+    WebPPictureFree(&frame);
+    WebPPictureFree(&curr_canvas);
+    WebPPictureFree(&prev_canvas);
+    WebPPictureFree(&prev_to_prev_canvas);
+    WebPAnimEncoderDelete(enc);
+    if (out != NULL && out_file != NULL) fclose(out);
+
+    if (gif_error != GIF_OK) {
+        GIFDisplayError(gif, gif_error);
+    }
+    if (gif != NULL) {
+#if LOCAL_GIF_PREREQ(5, 1)
+        DGifCloseFile(gif, &gif_error);
+#else
+        DGifCloseFile(gif);
+#endif
+    }
+
+    return !ok;
+}
+
+#else // !WEBP_HAVE_GIF
+
+int main(int argc, const char* argv[]) {
+    fprintf(stderr, "GIF support not enabled in %s.\n", argv[0]);
+    (void)argc;
+    return 0;
+}
+
+#endif
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/gifdec.c b/codec/L2/demos/webpEnc/host/gifdec.c
new file mode 100644
index 0000000000..65acefba35
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/gifdec.c
@@ -0,0 +1,374 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// GIF decode.
+
+#include "./gifdec.h"
+
+#include <stdio.h>
+
+#ifdef WEBP_HAVE_GIF
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webp/encode.h"
+#include "webp/mux_types.h"
+
+#define GIF_TRANSPARENT_COLOR 0x00ffffff
+#define GIF_WHITE_COLOR 0xffffffff
+#define GIF_TRANSPARENT_MASK 0x01
+#define GIF_DISPOSE_MASK 0x07
+#define GIF_DISPOSE_SHIFT 2
+
+// from utils/utils.h
+extern void WebPCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height);
+extern void WebPCopyPixels(const WebPPicture* const src, WebPPicture* const dst);
+
+void GIFGetBackgroundColor(const ColorMapObject* const color_map,
+                           int bgcolor_index,
+                           int transparent_index,
+                           uint32_t* const bgcolor) {
+    if (transparent_index != GIF_INDEX_INVALID && bgcolor_index == transparent_index) {
+        *bgcolor = GIF_TRANSPARENT_COLOR; // Special case.
+    } else if (color_map == NULL || color_map->Colors == NULL || bgcolor_index >= color_map->ColorCount) {
+        *bgcolor = GIF_WHITE_COLOR;
+        fprintf(stderr,
+                "GIF decode warning: invalid background color index. Assuming "
+                "white background.\n");
+    } else {
+        const GifColorType color = color_map->Colors[bgcolor_index];
+        *bgcolor = (0xff << 24) | (color.Red << 16) | (color.Green << 8) | (color.Blue << 0);
+    }
+}
+
+int GIFReadGraphicsExtension(const GifByteType* const buf,
+                             int* const duration,
+                             GIFDisposeMethod* const dispose,
+                             int* const transparent_index) {
+    const int flags = buf[1];
+    const int dispose_raw = (flags >> GIF_DISPOSE_SHIFT) & GIF_DISPOSE_MASK;
+    const int duration_raw = buf[2] | (buf[3] << 8); // In 10 ms units.
+    if (buf[0] != 4) return 0;
+    *duration = duration_raw * 10; // Duration is in 1 ms units.
+    switch (dispose_raw) {
+        case 3:
+            *dispose = GIF_DISPOSE_RESTORE_PREVIOUS;
+            break;
+        case 2:
+            *dispose = GIF_DISPOSE_BACKGROUND;
+            break;
+        case 1:
+        case 0:
+        default:
+            *dispose = GIF_DISPOSE_NONE;
+            break;
+    }
+    *transparent_index = (flags & GIF_TRANSPARENT_MASK) ? buf[4] : GIF_INDEX_INVALID;
+    return 1;
+}
+
+static void Remap(
+    const GifFileType* const gif, const uint8_t* const src, int len, int transparent_index, uint32_t* dst) {
+    int i;
+    const GifColorType* colors;
+    const ColorMapObject* const cmap = gif->Image.ColorMap ? gif->Image.ColorMap : gif->SColorMap;
+    if (cmap == NULL) return;
+    colors = cmap->Colors;
+
+    for (i = 0; i < len; ++i) {
+        const GifColorType c = colors[src[i]];
+        dst[i] = (src[i] == transparent_index) ? GIF_TRANSPARENT_COLOR
+                                               : c.Blue | (c.Green << 8) | (c.Red << 16) | (0xff << 24);
+    }
+}
+
+int GIFReadFrame(GifFileType* const gif,
+                 int transparent_index,
+                 GIFFrameRect* const gif_rect,
+                 WebPPicture* const picture) {
+    WebPPicture sub_image;
+    const GifImageDesc* const image_desc = &gif->Image;
+    uint32_t* dst = NULL;
+    uint8_t* tmp = NULL;
+    int ok = 0;
+    GIFFrameRect rect = {image_desc->Left, image_desc->Top, image_desc->Width, image_desc->Height};
+    *gif_rect = rect;
+
+    // Use a view for the sub-picture:
+    if (!WebPPictureView(picture, rect.x_offset, rect.y_offset, rect.width, rect.height, &sub_image)) {
+        fprintf(stderr, "Sub-image %dx%d at position %d,%d is invalid!\n", rect.width, rect.height, rect.x_offset,
+                rect.y_offset);
+        return 0;
+    }
+    dst = sub_image.argb;
+
+    tmp = (uint8_t*)malloc(rect.width * sizeof(*tmp));
+    if (tmp == NULL) goto End;
+
+    if (image_desc->Interlace) { // Interlaced image.
+        // We need 4 passes, with the following offsets and jumps.
+        const int interlace_offsets[] = {0, 4, 2, 1};
+        const int interlace_jumps[] = {8, 8, 4, 2};
+        int pass;
+        for (pass = 0; pass < 4; ++pass) {
+            int y;
+            for (y = interlace_offsets[pass]; y < rect.height; y += interlace_jumps[pass]) {
+                if (DGifGetLine(gif, tmp, rect.width) == GIF_ERROR) goto End;
+                Remap(gif, tmp, rect.width, transparent_index, dst + y * sub_image.argb_stride);
+            }
+        }
+    } else { // Non-interlaced image.
+        int y;
+        for (y = 0; y < rect.height; ++y) {
+            if (DGifGetLine(gif, tmp, rect.width) == GIF_ERROR) goto End;
+            Remap(gif, tmp, rect.width, transparent_index, dst + y * sub_image.argb_stride);
+        }
+    }
+    ok = 1;
+
+End:
+    if (!ok) picture->error_code = sub_image.error_code;
+    WebPPictureFree(&sub_image);
+    free(tmp);
+    return ok;
+}
+
+int GIFReadLoopCount(GifFileType* const gif, GifByteType** const buf, int* const loop_count) {
+    assert(!memcmp(*buf + 1, "NETSCAPE2.0", 11) || !memcmp(*buf + 1, "ANIMEXTS1.0", 11));
+    if (DGifGetExtensionNext(gif, buf) == GIF_ERROR) {
+        return 0;
+    }
+    if (*buf == NULL) {
+        return 0; // Loop count sub-block missing.
+    }
+    if ((*buf)[0] < 3 || (*buf)[1] != 1) {
+        return 0; // wrong size/marker
+    }
+    *loop_count = (*buf)[2] | ((*buf)[3] << 8);
+    return 1;
+}
+
+int GIFReadMetadata(GifFileType* const gif, GifByteType** const buf, WebPData* const metadata) {
+    const int is_xmp = !memcmp(*buf + 1, "XMP DataXMP", 11);
+    const int is_icc = !memcmp(*buf + 1, "ICCRGBG1012", 11);
+    assert(is_xmp || is_icc);
+    (void)is_icc; // silence unused warning.
+    // Construct metadata from sub-blocks.
+    // Usual case (including ICC profile): In each sub-block, the
+    // first byte specifies its size in bytes (0 to 255) and the
+    // rest of the bytes contain the data.
+    // Special case for XMP data: In each sub-block, the first byte
+    // is also part of the XMP payload. XMP in GIF also has a 257
+    // byte padding data. See the XMP specification for details.
+    while (1) {
+        WebPData subblock;
+        const uint8_t* tmp;
+        if (DGifGetExtensionNext(gif, buf) == GIF_ERROR) {
+            return 0;
+        }
+        if (*buf == NULL) break; // Finished.
+        subblock.size = is_xmp ? (*buf)[0] + 1 : (*buf)[0];
+        assert(subblock.size > 0);
+        subblock.bytes = is_xmp ? *buf : *buf + 1;
+        // Note: We store returned value in 'tmp' first, to avoid
+        // leaking old memory in metadata->bytes on error.
+        tmp = (uint8_t*)realloc((void*)metadata->bytes, metadata->size + subblock.size);
+        if (tmp == NULL) {
+            return 0;
+        }
+        memcpy((void*)(tmp + metadata->size), subblock.bytes, subblock.size);
+        metadata->bytes = tmp;
+        metadata->size += subblock.size;
+    }
+    if (is_xmp) {
+        // XMP padding data is 0x01, 0xff, 0xfe ... 0x01, 0x00.
+        const size_t xmp_pading_size = 257;
+        if (metadata->size > xmp_pading_size) {
+            metadata->size -= xmp_pading_size;
+        }
+    }
+    return 1;
+}
+
+static void ClearRectangle(WebPPicture* const picture, int left, int top, int width, int height) {
+    int j;
+    for (j = top; j < top + height; ++j) {
+        uint32_t* const dst = picture->argb + j * picture->argb_stride;
+        int i;
+        for (i = left; i < left + width; ++i) {
+            dst[i] = GIF_TRANSPARENT_COLOR;
+        }
+    }
+}
+
+void GIFClearPic(WebPPicture* const pic, const GIFFrameRect* const rect) {
+    if (rect != NULL) {
+        ClearRectangle(pic, rect->x_offset, rect->y_offset, rect->width, rect->height);
+    } else {
+        ClearRectangle(pic, 0, 0, pic->width, pic->height);
+    }
+}
+
+void GIFCopyPixels(const WebPPicture* const src, WebPPicture* const dst) {
+    WebPCopyPixels(src, dst);
+}
+
+void GIFDisposeFrame(GIFDisposeMethod dispose,
+                     const GIFFrameRect* const rect,
+                     const WebPPicture* const prev_canvas,
+                     WebPPicture* const curr_canvas) {
+    assert(rect != NULL);
+    if (dispose == GIF_DISPOSE_BACKGROUND) {
+        GIFClearPic(curr_canvas, rect);
+    } else if (dispose == GIF_DISPOSE_RESTORE_PREVIOUS) {
+        const int src_stride = prev_canvas->argb_stride;
+        const uint32_t* const src = prev_canvas->argb + rect->x_offset + rect->y_offset * src_stride;
+        const int dst_stride = curr_canvas->argb_stride;
+        uint32_t* const dst = curr_canvas->argb + rect->x_offset + rect->y_offset * dst_stride;
+        assert(prev_canvas != NULL);
+        WebPCopyPlane((uint8_t*)src, 4 * src_stride, (uint8_t*)dst, 4 * dst_stride, 4 * rect->width, rect->height);
+    }
+}
+
+void GIFBlendFrames(const WebPPicture* const src, const GIFFrameRect* const rect, WebPPicture* const dst) {
+    int j;
+    assert(src->width == dst->width && src->height == dst->height);
+    for (j = rect->y_offset; j < rect->y_offset + rect->height; ++j) {
+        int i;
+        for (i = rect->x_offset; i < rect->x_offset + rect->width; ++i) {
+            const uint32_t src_pixel = src->argb[j * src->argb_stride + i];
+            const int src_alpha = src_pixel >> 24;
+            if (src_alpha != 0) {
+                dst->argb[j * dst->argb_stride + i] = src_pixel;
+            }
+        }
+    }
+}
+
+void GIFDisplayError(const GifFileType* const gif, int gif_error) {
+// libgif 4.2.0 has retired PrintGifError() and added GifErrorString().
+#if LOCAL_GIF_PREREQ(4, 2)
+#if LOCAL_GIF_PREREQ(5, 0)
+    // Static string actually, hence the const char* cast.
+    const char* error_str = (const char*)GifErrorString((gif == NULL) ? gif_error : gif->Error);
+#else
+    const char* error_str = (const char*)GifErrorString();
+    (void)gif;
+#endif
+    if (error_str == NULL) error_str = "Unknown error";
+    fprintf(stderr, "GIFLib Error %d: %s\n", gif_error, error_str);
+#else
+    (void)gif;
+    fprintf(stderr, "GIFLib Error %d: ", gif_error);
+    PrintGifError();
+    fprintf(stderr, "\n");
+#endif
+}
+
+#else // !WEBP_HAVE_GIF
+
+static void ErrorGIFNotAvailable() {
+    fprintf(stderr,
+            "GIF support not compiled. Please install the libgif-dev "
+            "package before building.\n");
+}
+
+void GIFGetBackgroundColor(const struct ColorMapObject* const color_map,
+                           int bgcolor_index,
+                           int transparent_index,
+                           uint32_t* const bgcolor) {
+    (void)color_map;
+    (void)bgcolor_index;
+    (void)transparent_index;
+    (void)bgcolor;
+    ErrorGIFNotAvailable();
+}
+
+int GIFReadGraphicsExtension(const GifByteType* const data,
+                             int* const duration,
+                             GIFDisposeMethod* const dispose,
+                             int* const transparent_index) {
+    (void)data;
+    (void)duration;
+    (void)dispose;
+    (void)transparent_index;
+    ErrorGIFNotAvailable();
+    return 0;
+}
+
+int GIFReadFrame(struct GifFileType* const gif,
+                 int transparent_index,
+                 GIFFrameRect* const gif_rect,
+                 struct WebPPicture* const picture) {
+    (void)gif;
+    (void)transparent_index;
+    (void)gif_rect;
+    (void)picture;
+    ErrorGIFNotAvailable();
+    return 0;
+}
+
+int GIFReadLoopCount(struct GifFileType* const gif, GifByteType** const buf, int* const loop_count) {
+    (void)gif;
+    (void)buf;
+    (void)loop_count;
+    ErrorGIFNotAvailable();
+    return 0;
+}
+
+int GIFReadMetadata(struct GifFileType* const gif, GifByteType** const buf, struct WebPData* const metadata) {
+    (void)gif;
+    (void)buf;
+    (void)metadata;
+    ErrorGIFNotAvailable();
+    return 0;
+}
+
+void GIFDisposeFrame(GIFDisposeMethod dispose,
+                     const GIFFrameRect* const rect,
+                     const struct WebPPicture* const prev_canvas,
+                     struct WebPPicture* const curr_canvas) {
+    (void)dispose;
+    (void)rect;
+    (void)prev_canvas;
+    (void)curr_canvas;
+    ErrorGIFNotAvailable();
+}
+
+void GIFBlendFrames(const struct WebPPicture* const src,
+                    const GIFFrameRect* const rect,
+                    struct WebPPicture* const dst) {
+    (void)src;
+    (void)rect;
+    (void)dst;
+    ErrorGIFNotAvailable();
+}
+
+void GIFDisplayError(const struct GifFileType* const gif, int gif_error) {
+    (void)gif;
+    (void)gif_error;
+    ErrorGIFNotAvailable();
+}
+
+void GIFClearPic(struct WebPPicture* const pic, const GIFFrameRect* const rect) {
+    (void)pic;
+    (void)rect;
+    ErrorGIFNotAvailable();
+}
+
+void GIFCopyPixels(const struct WebPPicture* const src, struct WebPPicture* const dst) {
+    (void)src;
+    (void)dst;
+    ErrorGIFNotAvailable();
+}
+
+#endif // WEBP_HAVE_GIF
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/gifdec.h b/codec/L2/demos/webpEnc/host/gifdec.h
new file mode 100644
index 0000000000..a7484816b2
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/gifdec.h
@@ -0,0 +1,112 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// GIF decode.
+
+#ifndef WEBP_EXAMPLES_GIFDEC_H_
+#define WEBP_EXAMPLES_GIFDEC_H_
+
+#include <stdio.h>
+#include "webp/types.h"
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#ifdef WEBP_HAVE_GIF
+#include <gif_lib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
+#if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
+#define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
+#define LOCAL_GIF_PREREQ(maj, min) (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
+#else
+#define LOCAL_GIF_VERSION 0
+#define LOCAL_GIF_PREREQ(maj, min) 0
+#endif
+
+#define GIF_INDEX_INVALID (-1)
+
+typedef enum GIFDisposeMethod {
+    GIF_DISPOSE_NONE,
+    GIF_DISPOSE_BACKGROUND,
+    GIF_DISPOSE_RESTORE_PREVIOUS
+} GIFDisposeMethod;
+
+typedef struct { int x_offset, y_offset, width, height; } GIFFrameRect;
+
+struct WebPData;
+struct WebPPicture;
+
+#ifndef WEBP_HAVE_GIF
+struct ColorMapObject;
+struct GifFileType;
+typedef unsigned char GifByteType;
+#endif
+
+// Given the index of background color and transparent color, returns the
+// corresponding background color (in BGRA format) in 'bgcolor'.
+void GIFGetBackgroundColor(const struct ColorMapObject* const color_map,
+                           int bgcolor_index,
+                           int transparent_index,
+                           uint32_t* const bgcolor);
+
+// Parses the given graphics extension data to get frame duration (in 1ms
+// units), dispose method and transparent color index.
+// Returns true on success.
+int GIFReadGraphicsExtension(const GifByteType* const buf,
+                             int* const duration,
+                             GIFDisposeMethod* const dispose,
+                             int* const transparent_index);
+
+// Reads the next GIF frame from 'gif' into 'picture'. Also, returns the GIF
+// frame dimensions and offsets in 'rect'.
+// Returns true on success.
+int GIFReadFrame(struct GifFileType* const gif,
+                 int transparent_index,
+                 GIFFrameRect* const gif_rect,
+                 struct WebPPicture* const picture);
+
+// Parses loop count from the given Netscape extension data.
+int GIFReadLoopCount(struct GifFileType* const gif, GifByteType** const buf, int* const loop_count);
+
+// Parses the given ICC or XMP extension data and stores it into 'metadata'.
+// Returns true on success.
+int GIFReadMetadata(struct GifFileType* const gif, GifByteType** const buf, struct WebPData* const metadata);
+
+// Dispose the pixels within 'rect' of 'curr_canvas' based on 'dispose' method
+// and 'prev_canvas'.
+void GIFDisposeFrame(GIFDisposeMethod dispose,
+                     const GIFFrameRect* const rect,
+                     const struct WebPPicture* const prev_canvas,
+                     struct WebPPicture* const curr_canvas);
+
+// Given 'src' picture and its frame rectangle 'rect', blend it into 'dst'.
+void GIFBlendFrames(const struct WebPPicture* const src, const GIFFrameRect* const rect, struct WebPPicture* const dst);
+
+// Prints an error string based on 'gif_error'.
+void GIFDisplayError(const struct GifFileType* const gif, int gif_error);
+
+// In the given 'pic', clear the pixels in 'rect' to transparent color.
+void GIFClearPic(struct WebPPicture* const pic, const GIFFrameRect* const rect);
+
+// Copy pixels from 'src' to 'dst' honoring strides. 'src' and 'dst' are assumed
+// to be already allocated.
+void GIFCopyPixels(const struct WebPPicture* const src, struct WebPPicture* const dst);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_EXAMPLES_GIFDEC_H_
diff --git a/codec/L2/demos/webpEnc/host/jpegdec.c b/codec/L2/demos/webpEnc/host/jpegdec.c
new file mode 100644
index 0000000000..08b1f7841e
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/jpegdec.c
@@ -0,0 +1,337 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// JPEG decode.
+
+#include "./jpegdec.h"
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include <stdio.h>
+
+#ifdef WEBP_HAVE_JPEG
+#include <jpeglib.h>
+#include <jerror.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webp/encode.h"
+#include "./example_util.h"
+#include "./metadata.h"
+
+// -----------------------------------------------------------------------------
+// Metadata processing
+
+#ifndef JPEG_APP1
+#define JPEG_APP1 (JPEG_APP0 + 1)
+#endif
+#ifndef JPEG_APP2
+#define JPEG_APP2 (JPEG_APP0 + 2)
+#endif
+
+typedef struct {
+    const uint8_t* data;
+    size_t data_length;
+    int seq; // this segment's sequence number [1, 255] for use in reassembly.
+} ICCPSegment;
+
+static void SaveMetadataMarkers(j_decompress_ptr dinfo) {
+    const unsigned int max_marker_length = 0xffff;
+    jpeg_save_markers(dinfo, JPEG_APP1, max_marker_length); // Exif/XMP
+    jpeg_save_markers(dinfo, JPEG_APP2, max_marker_length); // ICC profile
+}
+
+static int CompareICCPSegments(const void* a, const void* b) {
+    const ICCPSegment* s1 = (const ICCPSegment*)a;
+    const ICCPSegment* s2 = (const ICCPSegment*)b;
+    return s1->seq - s2->seq;
+}
+
+// Extract ICC profile segments from the marker list in 'dinfo', reassembling
+// and storing them in 'iccp'.
+// Returns true on success and false for memory errors and corrupt profiles.
+static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
+    // ICC.1:2010-12 (4.3.0.0) Annex B.4 Embedding ICC Profiles in JPEG files
+    static const char kICCPSignature[] = "ICC_PROFILE";
+    static const size_t kICCPSignatureLength = 12; // signature includes '\0'
+    static const size_t kICCPSkipLength = 14;      // signature + seq & count
+    int expected_count = 0;
+    int actual_count = 0;
+    int seq_max = 0;
+    size_t total_size = 0;
+    ICCPSegment iccp_segments[255];
+    jpeg_saved_marker_ptr marker;
+
+    memset(iccp_segments, 0, sizeof(iccp_segments));
+    for (marker = dinfo->marker_list; marker != NULL; marker = marker->next) {
+        if (marker->marker == JPEG_APP2 && marker->data_length > kICCPSkipLength &&
+            !memcmp(marker->data, kICCPSignature, kICCPSignatureLength)) {
+            // ICC_PROFILE\0<seq><count>; 'seq' starts at 1.
+            const int seq = marker->data[kICCPSignatureLength];
+            const int count = marker->data[kICCPSignatureLength + 1];
+            const size_t segment_size = marker->data_length - kICCPSkipLength;
+            ICCPSegment* segment;
+
+            if (segment_size == 0 || count == 0 || seq == 0) {
+                fprintf(stderr,
+                        "[ICCP] size (%d) / count (%d) / sequence number (%d)"
+                        " cannot be 0!\n",
+                        (int)segment_size, seq, count);
+                return 0;
+            }
+
+            if (expected_count == 0) {
+                expected_count = count;
+            } else if (expected_count != count) {
+                fprintf(stderr, "[ICCP] Inconsistent segment count (%d / %d)!\n", expected_count, count);
+                return 0;
+            }
+
+            segment = iccp_segments + seq - 1;
+            if (segment->data_length != 0) {
+                fprintf(stderr, "[ICCP] Duplicate segment number (%d)!\n", seq);
+                return 0;
+            }
+
+            segment->data = marker->data + kICCPSkipLength;
+            segment->data_length = segment_size;
+            segment->seq = seq;
+            total_size += segment_size;
+            if (seq > seq_max) seq_max = seq;
+            ++actual_count;
+        }
+    }
+
+    if (actual_count == 0) return 1;
+    if (seq_max != actual_count) {
+        fprintf(stderr, "[ICCP] Discontinuous segments, expected: %d actual: %d!\n", actual_count, seq_max);
+        return 0;
+    }
+    if (expected_count != actual_count) {
+        fprintf(stderr, "[ICCP] Segment count: %d does not match expected: %d!\n", actual_count, expected_count);
+        return 0;
+    }
+
+    // The segments may appear out of order in the file, sort them based on
+    // sequence number before assembling the payload.
+    qsort(iccp_segments, actual_count, sizeof(*iccp_segments), CompareICCPSegments);
+
+    iccp->bytes = (uint8_t*)malloc(total_size);
+    if (iccp->bytes == NULL) return 0;
+    iccp->size = total_size;
+
+    {
+        int i;
+        size_t offset = 0;
+        for (i = 0; i < seq_max; ++i) {
+            memcpy(iccp->bytes + offset, iccp_segments[i].data, iccp_segments[i].data_length);
+            offset += iccp_segments[i].data_length;
+        }
+    }
+    return 1;
+}
+
+// Returns true on success and false for memory errors and corrupt profiles.
+// The caller must use MetadataFree() on 'metadata' in all cases.
+static int ExtractMetadataFromJPEG(j_decompress_ptr dinfo, Metadata* const metadata) {
+    static const struct {
+        int marker;
+        const char* signature;
+        size_t signature_length;
+        size_t storage_offset;
+    } kJPEGMetadataMap[] = {
+        // Exif 2.2 Section 4.7.2 Interoperability Structure of APP1 ...
+        {JPEG_APP1, "Exif\0", 6, METADATA_OFFSET(exif)},
+        // XMP Specification Part 3 Section 3 Embedding XMP Metadata ... #JPEG
+        // TODO(jzern) Add support for 'ExtendedXMP'
+        {JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, METADATA_OFFSET(xmp)},
+        {0, NULL, 0, 0},
+    };
+    jpeg_saved_marker_ptr marker;
+    // Treat ICC profiles separately as they may be segmented and out of order.
+    if (!StoreICCP(dinfo, &metadata->iccp)) return 0;
+
+    for (marker = dinfo->marker_list; marker != NULL; marker = marker->next) {
+        int i;
+        for (i = 0; kJPEGMetadataMap[i].marker != 0; ++i) {
+            if (marker->marker == kJPEGMetadataMap[i].marker &&
+                marker->data_length > kJPEGMetadataMap[i].signature_length &&
+                !memcmp(marker->data, kJPEGMetadataMap[i].signature, kJPEGMetadataMap[i].signature_length)) {
+                MetadataPayload* const payload =
+                    (MetadataPayload*)((uint8_t*)metadata + kJPEGMetadataMap[i].storage_offset);
+
+                if (payload->bytes == NULL) {
+                    const char* marker_data = (const char*)marker->data + kJPEGMetadataMap[i].signature_length;
+                    const size_t marker_data_length = marker->data_length - kJPEGMetadataMap[i].signature_length;
+                    if (!MetadataCopy(marker_data, marker_data_length, payload)) return 0;
+                } else {
+                    fprintf(stderr, "Ignoring additional '%s' marker\n", kJPEGMetadataMap[i].signature);
+                }
+            }
+        }
+    }
+    return 1;
+}
+
+#undef JPEG_APP1
+#undef JPEG_APP2
+
+// -----------------------------------------------------------------------------
+// JPEG decoding
+
+struct my_error_mgr {
+    struct jpeg_error_mgr pub;
+    jmp_buf setjmp_buffer;
+};
+
+static void my_error_exit(j_common_ptr dinfo) {
+    struct my_error_mgr* myerr = (struct my_error_mgr*)dinfo->err;
+    dinfo->err->output_message(dinfo);
+    longjmp(myerr->setjmp_buffer, 1);
+}
+
+typedef struct {
+    struct jpeg_source_mgr pub;
+    const uint8_t* data;
+    size_t data_size;
+} JPEGReadContext;
+
+static void ContextInit(j_decompress_ptr cinfo) {
+    JPEGReadContext* const ctx = (JPEGReadContext*)cinfo->src;
+    ctx->pub.next_input_byte = ctx->data;
+    ctx->pub.bytes_in_buffer = ctx->data_size;
+}
+
+static int ContextFill(j_decompress_ptr cinfo) {
+    // we shouldn't get here.
+    ERREXIT(cinfo, JERR_FILE_READ);
+    return 0;
+}
+
+static void ContextSkip(j_decompress_ptr cinfo, long jump_size) {
+    JPEGReadContext* const ctx = (JPEGReadContext*)cinfo->src;
+    size_t jump = (size_t)jump_size;
+    if (jump > ctx->pub.bytes_in_buffer) { // Don't overflow the buffer.
+        jump = ctx->pub.bytes_in_buffer;
+    }
+    ctx->pub.bytes_in_buffer -= jump;
+    ctx->pub.next_input_byte += jump;
+}
+
+static void ContextTerm(j_decompress_ptr cinfo) {
+    (void)cinfo;
+}
+
+static void ContextSetup(volatile struct jpeg_decompress_struct* const cinfo, JPEGReadContext* const ctx) {
+    cinfo->src = (struct jpeg_source_mgr*)ctx;
+    ctx->pub.init_source = ContextInit;
+    ctx->pub.fill_input_buffer = ContextFill;
+    ctx->pub.skip_input_data = ContextSkip;
+    ctx->pub.resync_to_restart = jpeg_resync_to_restart;
+    ctx->pub.term_source = ContextTerm;
+    ctx->pub.bytes_in_buffer = 0;
+    ctx->pub.next_input_byte = NULL;
+}
+
+int ReadJPEG(const uint8_t* const data, size_t data_size, WebPPicture* const pic, Metadata* const metadata) {
+    volatile int ok = 0;
+    int stride, width, height;
+    volatile struct jpeg_decompress_struct dinfo;
+    struct my_error_mgr jerr;
+    uint8_t* volatile rgb = NULL;
+    JSAMPROW buffer[1];
+    JPEGReadContext ctx;
+
+    memset(&ctx, 0, sizeof(ctx));
+    ctx.data = data;
+    ctx.data_size = data_size;
+
+    memset((j_decompress_ptr)&dinfo, 0, sizeof(dinfo)); // for setjmp sanity
+    dinfo.err = jpeg_std_error(&jerr.pub);
+    jerr.pub.error_exit = my_error_exit;
+
+    if (setjmp(jerr.setjmp_buffer)) {
+    Error:
+        MetadataFree(metadata);
+        jpeg_destroy_decompress((j_decompress_ptr)&dinfo);
+        goto End;
+    }
+
+    jpeg_create_decompress((j_decompress_ptr)&dinfo);
+    ContextSetup(&dinfo, &ctx);
+    if (metadata != NULL) SaveMetadataMarkers((j_decompress_ptr)&dinfo);
+    jpeg_read_header((j_decompress_ptr)&dinfo, TRUE);
+
+    dinfo.out_color_space = JCS_RGB;
+    dinfo.do_fancy_upsampling = TRUE;
+
+    jpeg_start_decompress((j_decompress_ptr)&dinfo);
+
+    if (dinfo.output_components != 3) {
+        goto Error;
+    }
+
+    width = dinfo.output_width;
+    height = dinfo.output_height;
+    stride = dinfo.output_width * dinfo.output_components * sizeof(*rgb);
+
+    rgb = (uint8_t*)malloc(stride * height);
+    if (rgb == NULL) {
+        goto End;
+    }
+    buffer[0] = (JSAMPLE*)rgb;
+
+    while (dinfo.output_scanline < dinfo.output_height) {
+        if (jpeg_read_scanlines((j_decompress_ptr)&dinfo, buffer, 1) != 1) {
+            goto End;
+        }
+        buffer[0] += stride;
+    }
+
+    if (metadata != NULL) {
+        ok = ExtractMetadataFromJPEG((j_decompress_ptr)&dinfo, metadata);
+        if (!ok) {
+            fprintf(stderr, "Error extracting JPEG metadata!\n");
+            goto Error;
+        }
+    }
+
+    jpeg_finish_decompress((j_decompress_ptr)&dinfo);
+    jpeg_destroy_decompress((j_decompress_ptr)&dinfo);
+
+    // WebP conversion.
+    pic->width = width;
+    pic->height = height;
+    ok = WebPPictureImportRGB(pic, rgb, stride);
+    if (!ok) goto Error;
+
+End:
+    free(rgb);
+    return ok;
+}
+#else  // !WEBP_HAVE_JPEG
+int ReadJPEG(const uint8_t* const data,
+             size_t data_size,
+             struct WebPPicture* const pic,
+             struct Metadata* const metadata) {
+    (void)data;
+    (void)data_size;
+    (void)pic;
+    (void)metadata;
+    fprintf(stderr,
+            "JPEG support not compiled. Please install the libjpeg "
+            "development package before building.\n");
+    return 0;
+}
+#endif // WEBP_HAVE_JPEG
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/jpegdec.h b/codec/L2/demos/webpEnc/host/jpegdec.h
new file mode 100644
index 0000000000..678c2a2a9e
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/jpegdec.h
@@ -0,0 +1,37 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// JPEG decode.
+
+#ifndef WEBP_EXAMPLES_JPEGDEC_H_
+#define WEBP_EXAMPLES_JPEGDEC_H_
+
+#include <stdio.h>
+#include "webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct Metadata;
+struct WebPPicture;
+
+// Reads a JPEG from 'data', returning the decoded output in 'pic'.
+// The output is RGB or YUV depending on pic->use_argb value.
+// Returns true on success.
+int ReadJPEG(const uint8_t* const data,
+             size_t data_size,
+             struct WebPPicture* const pic,
+             struct Metadata* const metadata);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_EXAMPLES_JPEGDEC_H_
diff --git a/codec/L2/demos/webpEnc/host/metadata.c b/codec/L2/demos/webpEnc/host/metadata.c
new file mode 100644
index 0000000000..80f3a68063
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/metadata.c
@@ -0,0 +1,48 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Metadata types and functions.
+//
+
+#include "./metadata.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "webp/types.h"
+
+void MetadataInit(Metadata* const metadata) {
+    if (metadata == NULL) return;
+    memset(metadata, 0, sizeof(*metadata));
+}
+
+void MetadataPayloadDelete(MetadataPayload* const payload) {
+    if (payload == NULL) return;
+    free(payload->bytes);
+    payload->bytes = NULL;
+    payload->size = 0;
+}
+
+void MetadataFree(Metadata* const metadata) {
+    if (metadata == NULL) return;
+    MetadataPayloadDelete(&metadata->exif);
+    MetadataPayloadDelete(&metadata->iccp);
+    MetadataPayloadDelete(&metadata->xmp);
+}
+
+int MetadataCopy(const char* metadata, size_t metadata_len, MetadataPayload* const payload) {
+    if (metadata == NULL || metadata_len == 0 || payload == NULL) return 0;
+    payload->bytes = (uint8_t*)malloc(metadata_len);
+    if (payload->bytes == NULL) return 0;
+    payload->size = metadata_len;
+    memcpy(payload->bytes, metadata, metadata_len);
+    return 1;
+}
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/metadata.h b/codec/L2/demos/webpEnc/host/metadata.h
new file mode 100644
index 0000000000..d690a7ee1e
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/metadata.h
@@ -0,0 +1,46 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Metadata types and functions.
+//
+
+#ifndef WEBP_EXAMPLES_METADATA_H_
+#define WEBP_EXAMPLES_METADATA_H_
+
+#include "webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct MetadataPayload {
+    uint8_t* bytes;
+    size_t size;
+} MetadataPayload;
+
+typedef struct Metadata {
+    MetadataPayload exif;
+    MetadataPayload iccp;
+    MetadataPayload xmp;
+} Metadata;
+
+#define METADATA_OFFSET(x) offsetof(Metadata, x)
+
+void MetadataInit(Metadata* const metadata);
+void MetadataPayloadDelete(MetadataPayload* const payload);
+void MetadataFree(Metadata* const metadata);
+
+// Stores 'metadata' to 'payload->bytes', returns false on allocation error.
+int MetadataCopy(const char* metadata, size_t metadata_len, MetadataPayload* const payload);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_EXAMPLES_METADATA_H_
diff --git a/codec/L2/demos/webpEnc/host/pngdec.c b/codec/L2/demos/webpEnc/host/pngdec.c
new file mode 100644
index 0000000000..427134cb39
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/pngdec.c
@@ -0,0 +1,312 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// PNG decode.
+
+#include "./pngdec.h"
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include <stdio.h>
+
+#ifdef WEBP_HAVE_PNG
+#include <assert.h>
+#include <png.h>
+#include <setjmp.h> // note: this must be included *after* png.h
+#include <stdlib.h>
+#include <string.h>
+
+#include "webp/encode.h"
+#include "./example_util.h"
+#include "./metadata.h"
+
+static void PNGAPI error_function(png_structp png, png_const_charp error) {
+    if (error != NULL) fprintf(stderr, "libpng error: %s\n", error);
+    longjmp(png_jmpbuf(png), 1);
+}
+
+// Converts the NULL terminated 'hexstring' which contains 2-byte character
+// representations of hex values to raw data.
+// 'hexstring' may contain values consisting of [A-F][a-f][0-9] in pairs,
+// e.g., 7af2..., separated by any number of newlines.
+// 'expected_length' is the anticipated processed size.
+// On success the raw buffer is returned with its length equivalent to
+// 'expected_length'. NULL is returned if the processed length is less than
+// 'expected_length' or any character aside from those above is encountered.
+// The returned buffer must be freed by the caller.
+static uint8_t* HexStringToBytes(const char* hexstring, size_t expected_length) {
+    const char* src = hexstring;
+    size_t actual_length = 0;
+    uint8_t* const raw_data = (uint8_t*)malloc(expected_length);
+    uint8_t* dst;
+
+    if (raw_data == NULL) return NULL;
+
+    for (dst = raw_data; actual_length < expected_length && *src != '\0'; ++src) {
+        char* end;
+        char val[3];
+        if (*src == '\n') continue;
+        val[0] = *src++;
+        val[1] = *src;
+        val[2] = '\0';
+        *dst++ = (uint8_t)strtol(val, &end, 16);
+        if (end != val + 2) break;
+        ++actual_length;
+    }
+
+    if (actual_length != expected_length) {
+        free(raw_data);
+        return NULL;
+    }
+    return raw_data;
+}
+
+static int ProcessRawProfile(const char* profile, size_t profile_len, MetadataPayload* const payload) {
+    const char* src = profile;
+    char* end;
+    int expected_length;
+
+    if (profile == NULL || profile_len == 0) return 0;
+
+    // ImageMagick formats 'raw profiles' as
+    // '\n<name>\n<length>(%8lu)\n<hex payload>\n'.
+    if (*src != '\n') {
+        fprintf(stderr, "Malformed raw profile, expected '\\n' got '\\x%.2X'\n", *src);
+        return 0;
+    }
+    ++src;
+    // skip the profile name and extract the length.
+    while (*src != '\0' && *src++ != '\n') {
+    }
+    expected_length = (int)strtol(src, &end, 10);
+    if (*end != '\n') {
+        fprintf(stderr, "Malformed raw profile, expected '\\n' got '\\x%.2X'\n", *end);
+        return 0;
+    }
+    ++end;
+
+    // 'end' now points to the profile payload.
+    payload->bytes = HexStringToBytes(end, expected_length);
+    if (payload->bytes == NULL) return 0;
+    payload->size = expected_length;
+    return 1;
+}
+
+static const struct {
+    const char* name;
+    int (*process)(const char* profile, size_t profile_len, MetadataPayload* const payload);
+    size_t storage_offset;
+} kPNGMetadataMap[] = {
+    // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
+    // See also: ExifTool on CPAN.
+    {"Raw profile type exif", ProcessRawProfile, METADATA_OFFSET(exif)},
+    {"Raw profile type xmp", ProcessRawProfile, METADATA_OFFSET(xmp)},
+    // Exiftool puts exif data in APP1 chunk, too.
+    {"Raw profile type APP1", ProcessRawProfile, METADATA_OFFSET(exif)},
+    // XMP Specification Part 3, Section 3 #PNG
+    {"XML:com.adobe.xmp", MetadataCopy, METADATA_OFFSET(xmp)},
+    {NULL, NULL, 0},
+};
+
+// Looks for metadata at both the beginning and end of the PNG file, giving
+// preference to the head.
+// Returns true on success. The caller must use MetadataFree() on 'metadata' in
+// all cases.
+static int ExtractMetadataFromPNG(png_structp png,
+                                  png_infop const head_info,
+                                  png_infop const end_info,
+                                  Metadata* const metadata) {
+    int p;
+
+    for (p = 0; p < 2; ++p) {
+        png_infop const info = (p == 0) ? head_info : end_info;
+        png_textp text = NULL;
+        const png_uint_32 num = png_get_text(png, info, &text, NULL);
+        png_uint_32 i;
+        // Look for EXIF / XMP metadata.
+        for (i = 0; i < num; ++i, ++text) {
+            int j;
+            for (j = 0; kPNGMetadataMap[j].name != NULL; ++j) {
+                if (!strcmp(text->key, kPNGMetadataMap[j].name)) {
+                    MetadataPayload* const payload =
+                        (MetadataPayload*)((uint8_t*)metadata + kPNGMetadataMap[j].storage_offset);
+                    png_size_t text_length;
+                    switch (text->compression) {
+#ifdef PNG_iTXt_SUPPORTED
+                        case PNG_ITXT_COMPRESSION_NONE:
+                        case PNG_ITXT_COMPRESSION_zTXt:
+                            text_length = text->itxt_length;
+                            break;
+#endif
+                        case PNG_TEXT_COMPRESSION_NONE:
+                        case PNG_TEXT_COMPRESSION_zTXt:
+                        default:
+                            text_length = text->text_length;
+                            break;
+                    }
+                    if (payload->bytes != NULL) {
+                        fprintf(stderr, "Ignoring additional '%s'\n", text->key);
+                    } else if (!kPNGMetadataMap[j].process(text->text, text_length, payload)) {
+                        fprintf(stderr, "Failed to process: '%s'\n", text->key);
+                        return 0;
+                    }
+                    break;
+                }
+            }
+        }
+        // Look for an ICC profile.
+        {
+            png_charp name;
+            int comp_type;
+#if ((PNG_LIBPNG_VER_MAJOR << 8) | PNG_LIBPNG_VER_MINOR << 0) < ((1 << 8) | (5 << 0))
+            png_charp profile;
+#else // >= libpng 1.5.0
+            png_bytep profile;
+#endif
+            png_uint_32 len;
+
+            if (png_get_iCCP(png, info, &name, &comp_type, &profile, &len) == PNG_INFO_iCCP) {
+                if (!MetadataCopy((const char*)profile, len, &metadata->iccp)) return 0;
+            }
+        }
+    }
+
+    return 1;
+}
+
+typedef struct {
+    const uint8_t* data;
+    size_t data_size;
+    png_size_t offset;
+} PNGReadContext;
+
+static void ReadFunc(png_structp png_ptr, png_bytep data, png_size_t length) {
+    PNGReadContext* const ctx = (PNGReadContext*)png_get_io_ptr(png_ptr);
+    assert(ctx->offset + length <= ctx->data_size);
+    memcpy(data, ctx->data + ctx->offset, length);
+    ctx->offset += length;
+}
+
+int ReadPNG(const uint8_t* const data,
+            size_t data_size,
+            struct WebPPicture* const pic,
+            int keep_alpha,
+            struct Metadata* const metadata) {
+    volatile png_structp png = NULL;
+    volatile png_infop info = NULL;
+    volatile png_infop end_info = NULL;
+    PNGReadContext context = {NULL, 0, 0};
+    int color_type, bit_depth, interlaced;
+    int has_alpha;
+    int num_passes;
+    int p;
+    volatile int ok = 0;
+    png_uint_32 width, height, y;
+    png_uint_32 stride;
+    uint8_t* volatile rgb = NULL;
+
+    context.data = data;
+    context.data_size = data_size;
+
+    png = png_create_read_struct(PNG_LIBPNG_VER_STRING, 0, 0, 0);
+    if (png == NULL) goto End;
+
+    png_set_error_fn(png, 0, error_function, NULL);
+    if (setjmp(png_jmpbuf(png))) {
+    Error:
+        MetadataFree(metadata);
+        goto End;
+    }
+
+    info = png_create_info_struct(png);
+    if (info == NULL) goto Error;
+    end_info = png_create_info_struct(png);
+    if (end_info == NULL) goto Error;
+
+    png_set_read_fn(png, &context, ReadFunc);
+    png_read_info(png, info);
+    if (!png_get_IHDR(png, info, &width, &height, &bit_depth, &color_type, &interlaced, NULL, NULL)) goto Error;
+
+    png_set_strip_16(png);
+    png_set_packing(png);
+    if (color_type == PNG_COLOR_TYPE_PALETTE) {
+        png_set_palette_to_rgb(png);
+    }
+    if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
+        if (bit_depth < 8) {
+            png_set_expand_gray_1_2_4_to_8(png);
+        }
+        png_set_gray_to_rgb(png);
+    }
+    if (png_get_valid(png, info, PNG_INFO_tRNS)) {
+        png_set_tRNS_to_alpha(png);
+        has_alpha = 1;
+    } else {
+        has_alpha = !!(color_type & PNG_COLOR_MASK_ALPHA);
+    }
+
+    if (!keep_alpha) {
+        png_set_strip_alpha(png);
+        has_alpha = 0;
+    }
+
+    num_passes = png_set_interlace_handling(png);
+    png_read_update_info(png, info);
+    stride = (has_alpha ? 4 : 3) * width * sizeof(*rgb);
+    rgb = (uint8_t*)malloc(stride * height);
+    if (rgb == NULL) goto Error;
+    for (p = 0; p < num_passes; ++p) {
+        for (y = 0; y < height; ++y) {
+            png_bytep row = (png_bytep)(rgb + y * stride);
+            png_read_rows(png, &row, NULL, 1);
+        }
+    }
+    png_read_end(png, end_info);
+
+    if (metadata != NULL && !ExtractMetadataFromPNG(png, info, end_info, metadata)) {
+        fprintf(stderr, "Error extracting PNG metadata!\n");
+        goto Error;
+    }
+
+    pic->width = (int)width;
+    pic->height = (int)height;
+    ok = has_alpha ? WebPPictureImportRGBA(pic, rgb, (int)stride) : WebPPictureImportRGB(pic, rgb, (int)stride);
+
+    if (!ok) {
+        goto Error;
+    }
+
+End:
+    if (png != NULL) {
+        png_destroy_read_struct((png_structpp)&png, (png_infopp)&info, (png_infopp)&end_info);
+    }
+    free(rgb);
+    return ok;
+}
+#else  // !WEBP_HAVE_PNG
+int ReadPNG(const uint8_t* const data,
+            size_t data_size,
+            struct WebPPicture* const pic,
+            int keep_alpha,
+            struct Metadata* const metadata) {
+    (void)data;
+    (void)data_size;
+    (void)pic;
+    (void)keep_alpha;
+    (void)metadata;
+    fprintf(stderr,
+            "PNG support not compiled. Please install the libpng "
+            "development package before building.\n");
+    return 0;
+}
+#endif // WEBP_HAVE_PNG
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/pngdec.h b/codec/L2/demos/webpEnc/host/pngdec.h
new file mode 100644
index 0000000000..ccdb7ab706
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/pngdec.h
@@ -0,0 +1,39 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// PNG decode.
+
+#ifndef WEBP_EXAMPLES_PNGDEC_H_
+#define WEBP_EXAMPLES_PNGDEC_H_
+
+#include "webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct Metadata;
+struct WebPPicture;
+
+// Reads a PNG from 'data', returning the decoded output in 'pic'.
+// Output is RGBA or YUVA, depending on pic->use_argb value.
+// If 'keep_alpha' is true and the PNG has an alpha channel, the output is RGBA
+// or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
+// Returns true on success.
+int ReadPNG(const uint8_t* const data,
+            size_t data_size,
+            struct WebPPicture* const pic,
+            int keep_alpha,
+            struct Metadata* const metadata);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_EXAMPLES_PNGDEC_H_
diff --git a/codec/L2/demos/webpEnc/host/src/dec/Makefile.am b/codec/L2/demos/webpEnc/host/src/dec/Makefile.am
new file mode 100644
index 0000000000..7a0b4e8112
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/Makefile.am
@@ -0,0 +1,28 @@
+noinst_LTLIBRARIES = libwebpdecode.la
+
+libwebpdecode_la_SOURCES =
+libwebpdecode_la_SOURCES += alpha.c
+libwebpdecode_la_SOURCES += alphai.h
+libwebpdecode_la_SOURCES += buffer.c
+libwebpdecode_la_SOURCES += common.h
+libwebpdecode_la_SOURCES += decode_vp8.h
+libwebpdecode_la_SOURCES += frame.c
+libwebpdecode_la_SOURCES += idec.c
+libwebpdecode_la_SOURCES += io.c
+libwebpdecode_la_SOURCES += quant.c
+libwebpdecode_la_SOURCES += tree.c
+libwebpdecode_la_SOURCES += vp8.c
+libwebpdecode_la_SOURCES += vp8i.h
+libwebpdecode_la_SOURCES += vp8l.c
+libwebpdecode_la_SOURCES += vp8li.h
+libwebpdecode_la_SOURCES += webp.c
+libwebpdecode_la_SOURCES += webpi.h
+
+libwebpdecodeinclude_HEADERS =
+libwebpdecodeinclude_HEADERS += ../webp/decode.h
+libwebpdecodeinclude_HEADERS += ../webp/types.h
+noinst_HEADERS =
+noinst_HEADERS += ../webp/format_constants.h
+
+libwebpdecode_la_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
+libwebpdecodeincludedir = $(includedir)/webp
diff --git a/codec/L2/demos/webpEnc/host/src/dec/alpha.c b/codec/L2/demos/webpEnc/host/src/dec/alpha.c
new file mode 100644
index 0000000000..b80b602663
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/alpha.c
@@ -0,0 +1,160 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha-plane decompression.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./alphai.h"
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "../dsp/dsp.h"
+#include "../utils/quant_levels_dec.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+//------------------------------------------------------------------------------
+// ALPHDecoder object.
+
+ALPHDecoder* ALPHNew(void) {
+    ALPHDecoder* const dec = (ALPHDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+    return dec;
+}
+
+void ALPHDelete(ALPHDecoder* const dec) {
+    if (dec != NULL) {
+        VP8LDelete(dec->vp8l_dec_);
+        dec->vp8l_dec_ = NULL;
+        WebPSafeFree(dec);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Decoding.
+
+// Initialize alpha decoding by parsing the alpha header and decoding the image
+// header for alpha data stored using lossless compression.
+// Returns false in case of error in alpha header (data too short, invalid
+// compression method or filter, error in lossless header data etc).
+static int ALPHInit(
+    ALPHDecoder* const dec, const uint8_t* data, size_t data_size, int width, int height, uint8_t* output) {
+    int ok = 0;
+    const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
+    const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
+    int rsrv;
+
+    assert(width > 0 && height > 0);
+    assert(data != NULL && output != NULL);
+
+    dec->width_ = width;
+    dec->height_ = height;
+
+    if (data_size <= ALPHA_HEADER_LEN) {
+        return 0;
+    }
+
+    dec->method_ = (data[0] >> 0) & 0x03;
+    dec->filter_ = (data[0] >> 2) & 0x03;
+    dec->pre_processing_ = (data[0] >> 4) & 0x03;
+    rsrv = (data[0] >> 6) & 0x03;
+    if (dec->method_ < ALPHA_NO_COMPRESSION || dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
+        dec->filter_ >= WEBP_FILTER_LAST || dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS || rsrv != 0) {
+        return 0;
+    }
+
+    if (dec->method_ == ALPHA_NO_COMPRESSION) {
+        const size_t alpha_decoded_size = dec->width_ * dec->height_;
+        ok = (alpha_data_size >= alpha_decoded_size);
+    } else {
+        assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
+        ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size, output);
+    }
+    VP8FiltersInit();
+    return ok;
+}
+
+// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha
+// starting from row number 'row'. It assumes that rows up to (row - 1) have
+// already been decoded.
+// Returns false in case of bitstream error.
+static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
+    ALPHDecoder* const alph_dec = dec->alph_dec_;
+    const int width = alph_dec->width_;
+    const int height = alph_dec->height_;
+    WebPUnfilterFunc unfilter_func = WebPUnfilters[alph_dec->filter_];
+    uint8_t* const output = dec->alpha_plane_;
+    if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
+        const size_t offset = row * width;
+        const size_t num_pixels = num_rows * width;
+        assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN + offset + num_pixels);
+        memcpy(dec->alpha_plane_ + offset, dec->alpha_data_ + ALPHA_HEADER_LEN + offset, num_pixels);
+    } else { // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
+        assert(alph_dec->vp8l_dec_ != NULL);
+        if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
+            return 0;
+        }
+    }
+
+    if (unfilter_func != NULL) {
+        unfilter_func(width, height, width, row, num_rows, output);
+    }
+
+    if (row + num_rows == dec->pic_hdr_.height_) {
+        dec->is_alpha_decoded_ = 1;
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point.
+
+const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, int row, int num_rows) {
+    const int width = dec->pic_hdr_.width_;
+    const int height = dec->pic_hdr_.height_;
+
+    if (row < 0 || num_rows <= 0 || row + num_rows > height) {
+        return NULL; // sanity check.
+    }
+
+    if (row == 0) {
+        // Initialize decoding.
+        assert(dec->alpha_plane_ != NULL);
+        dec->alph_dec_ = ALPHNew();
+        if (dec->alph_dec_ == NULL) return NULL;
+        if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_, width, height, dec->alpha_plane_)) {
+            ALPHDelete(dec->alph_dec_);
+            dec->alph_dec_ = NULL;
+            return NULL;
+        }
+        // if we allowed use of alpha dithering, check whether it's needed at all
+        if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
+            dec->alpha_dithering_ = 0; // disable dithering
+        } else {
+            num_rows = height; // decode everything in one pass
+        }
+    }
+
+    if (!dec->is_alpha_decoded_) {
+        int ok = 0;
+        assert(dec->alph_dec_ != NULL);
+        ok = ALPHDecode(dec, row, num_rows);
+        if (ok && dec->alpha_dithering_ > 0) {
+            ok = WebPDequantizeLevels(dec->alpha_plane_, width, height, dec->alpha_dithering_);
+        }
+        if (!ok || dec->is_alpha_decoded_) {
+            ALPHDelete(dec->alph_dec_);
+            dec->alph_dec_ = NULL;
+        }
+        if (!ok) return NULL; // Error.
+    }
+
+    // Return a pointer to the current decoded row.
+    return dec->alpha_plane_ + row * width;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dec/alphai.h b/codec/L2/demos/webpEnc/host/src/dec/alphai.h
new file mode 100644
index 0000000000..936d9c1030
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/alphai.h
@@ -0,0 +1,55 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha decoder: internal header.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_DEC_ALPHAI_H_
+#define WEBP_DEC_ALPHAI_H_
+
+#include "./webpi.h"
+#include "../utils/filters.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP8LDecoder; // Defined in dec/vp8li.h.
+
+typedef struct ALPHDecoder ALPHDecoder;
+struct ALPHDecoder {
+    int width_;
+    int height_;
+    int method_;
+    WEBP_FILTER_TYPE filter_;
+    int pre_processing_;
+    struct VP8LDecoder* vp8l_dec_;
+    VP8Io io_;
+    int use_8b_decode; // Although alpha channel requires only 1 byte per
+                       // pixel, sometimes VP8LDecoder may need to allocate
+                       // 4 bytes per pixel internally during decode.
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// Allocates a new alpha decoder instance.
+ALPHDecoder* ALPHNew(void);
+
+// Clears and deallocates an alpha decoder instance.
+void ALPHDelete(ALPHDecoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_DEC_ALPHAI_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/dec/buffer.c b/codec/L2/demos/webpEnc/host/src/dec/buffer.c
new file mode 100644
index 0000000000..7eb0b5ffcb
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/buffer.c
@@ -0,0 +1,252 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Everything about WebPDecBuffer
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "./vp8i.h"
+#include "./webpi.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// WebPDecBuffer
+
+// Number of bytes per pixel for the different color-spaces.
+static const int kModeBpp[MODE_LAST] = {3, 4, 3, 4, 4, 2, 2, 4, 4, 4, 2, // pre-multiplied modes
+                                        1, 1};
+
+// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
+// Convert to an integer to handle both the unsigned/signed enum cases
+// without the need for casting to remove type limit warnings.
+static int IsValidColorspace(int webp_csp_mode) {
+    return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
+}
+
+// strictly speaking, the very last (or first, if flipped) row
+// doesn't require padding.
+#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) (uint64_t)(STRIDE) * ((HEIGHT)-1) + (WIDTH)
+
+static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
+    int ok = 1;
+    const WEBP_CSP_MODE mode = buffer->colorspace;
+    const int width = buffer->width;
+    const int height = buffer->height;
+    if (!IsValidColorspace(mode)) {
+        ok = 0;
+    } else if (!WebPIsRGBMode(mode)) { // YUV checks
+        const WebPYUVABuffer* const buf = &buffer->u.YUVA;
+        const int uv_width = (width + 1) / 2;
+        const int uv_height = (height + 1) / 2;
+        const int y_stride = abs(buf->y_stride);
+        const int u_stride = abs(buf->u_stride);
+        const int v_stride = abs(buf->v_stride);
+        const int a_stride = abs(buf->a_stride);
+        const uint64_t y_size = MIN_BUFFER_SIZE(width, height, y_stride);
+        const uint64_t u_size = MIN_BUFFER_SIZE(uv_width, uv_height, u_stride);
+        const uint64_t v_size = MIN_BUFFER_SIZE(uv_width, uv_height, v_stride);
+        const uint64_t a_size = MIN_BUFFER_SIZE(width, height, a_stride);
+        ok &= (y_size <= buf->y_size);
+        ok &= (u_size <= buf->u_size);
+        ok &= (v_size <= buf->v_size);
+        ok &= (y_stride >= width);
+        ok &= (u_stride >= uv_width);
+        ok &= (v_stride >= uv_width);
+        ok &= (buf->y != NULL);
+        ok &= (buf->u != NULL);
+        ok &= (buf->v != NULL);
+        if (mode == MODE_YUVA) {
+            ok &= (a_stride >= width);
+            ok &= (a_size <= buf->a_size);
+            ok &= (buf->a != NULL);
+        }
+    } else { // RGB checks
+        const WebPRGBABuffer* const buf = &buffer->u.RGBA;
+        const int stride = abs(buf->stride);
+        const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
+        ok &= (size <= buf->size);
+        ok &= (stride >= width * kModeBpp[mode]);
+        ok &= (buf->rgba != NULL);
+    }
+    return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
+}
+#undef MIN_BUFFER_SIZE
+
+static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
+    const int w = buffer->width;
+    const int h = buffer->height;
+    const WEBP_CSP_MODE mode = buffer->colorspace;
+
+    if (w <= 0 || h <= 0 || !IsValidColorspace(mode)) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+
+    if (!buffer->is_external_memory && buffer->private_memory == NULL) {
+        uint8_t* output;
+        int uv_stride = 0, a_stride = 0;
+        uint64_t uv_size = 0, a_size = 0, total_size;
+        // We need memory and it hasn't been allocated yet.
+        // => initialize output buffer, now that dimensions are known.
+        const int stride = w * kModeBpp[mode];
+        const uint64_t size = (uint64_t)stride * h;
+
+        if (!WebPIsRGBMode(mode)) {
+            uv_stride = (w + 1) / 2;
+            uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
+            if (mode == MODE_YUVA) {
+                a_stride = w;
+                a_size = (uint64_t)a_stride * h;
+            }
+        }
+        total_size = size + 2 * uv_size + a_size;
+
+        // Security/sanity checks
+        output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output));
+        if (output == NULL) {
+            return VP8_STATUS_OUT_OF_MEMORY;
+        }
+        buffer->private_memory = output;
+
+        if (!WebPIsRGBMode(mode)) { // YUVA initialization
+            WebPYUVABuffer* const buf = &buffer->u.YUVA;
+            buf->y = output;
+            buf->y_stride = stride;
+            buf->y_size = (size_t)size;
+            buf->u = output + size;
+            buf->u_stride = uv_stride;
+            buf->u_size = (size_t)uv_size;
+            buf->v = output + size + uv_size;
+            buf->v_stride = uv_stride;
+            buf->v_size = (size_t)uv_size;
+            if (mode == MODE_YUVA) {
+                buf->a = output + size + 2 * uv_size;
+            }
+            buf->a_size = (size_t)a_size;
+            buf->a_stride = a_stride;
+        } else { // RGBA initialization
+            WebPRGBABuffer* const buf = &buffer->u.RGBA;
+            buf->rgba = output;
+            buf->stride = stride;
+            buf->size = (size_t)size;
+        }
+    }
+    return CheckDecBuffer(buffer);
+}
+
+VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
+    if (buffer == NULL) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+    if (WebPIsRGBMode(buffer->colorspace)) {
+        WebPRGBABuffer* const buf = &buffer->u.RGBA;
+        buf->rgba += (buffer->height - 1) * buf->stride;
+        buf->stride = -buf->stride;
+    } else {
+        WebPYUVABuffer* const buf = &buffer->u.YUVA;
+        const int H = buffer->height;
+        buf->y += (H - 1) * buf->y_stride;
+        buf->y_stride = -buf->y_stride;
+        buf->u += ((H - 1) >> 1) * buf->u_stride;
+        buf->u_stride = -buf->u_stride;
+        buf->v += ((H - 1) >> 1) * buf->v_stride;
+        buf->v_stride = -buf->v_stride;
+        if (buf->a != NULL) {
+            buf->a += (H - 1) * buf->a_stride;
+            buf->a_stride = -buf->a_stride;
+        }
+    }
+    return VP8_STATUS_OK;
+}
+
+VP8StatusCode WebPAllocateDecBuffer(int w, int h, const WebPDecoderOptions* const options, WebPDecBuffer* const out) {
+    VP8StatusCode status;
+    if (out == NULL || w <= 0 || h <= 0) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+    if (options != NULL) { // First, apply options if there is any.
+        if (options->use_cropping) {
+            const int cw = options->crop_width;
+            const int ch = options->crop_height;
+            const int x = options->crop_left & ~1;
+            const int y = options->crop_top & ~1;
+            if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) {
+                return VP8_STATUS_INVALID_PARAM; // out of frame boundary.
+            }
+            w = cw;
+            h = ch;
+        }
+        if (options->use_scaling) {
+            int scaled_width = options->scaled_width;
+            int scaled_height = options->scaled_height;
+            if (!WebPRescalerGetScaledDimensions(w, h, &scaled_width, &scaled_height)) {
+                return VP8_STATUS_INVALID_PARAM;
+            }
+            w = scaled_width;
+            h = scaled_height;
+        }
+    }
+    out->width = w;
+    out->height = h;
+
+    // Then, allocate buffer for real.
+    status = AllocateBuffer(out);
+    if (status != VP8_STATUS_OK) return status;
+
+    // Use the stride trick if vertical flip is needed.
+    if (options != NULL && options->flip) {
+        status = WebPFlipBuffer(out);
+    }
+    return status;
+}
+
+//------------------------------------------------------------------------------
+// constructors / destructors
+
+int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) {
+    if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+        return 0; // version mismatch
+    }
+    if (buffer == NULL) return 0;
+    memset(buffer, 0, sizeof(*buffer));
+    return 1;
+}
+
+void WebPFreeDecBuffer(WebPDecBuffer* buffer) {
+    if (buffer != NULL) {
+        if (!buffer->is_external_memory) {
+            WebPSafeFree(buffer->private_memory);
+        }
+        buffer->private_memory = NULL;
+    }
+}
+
+void WebPCopyDecBuffer(const WebPDecBuffer* const src, WebPDecBuffer* const dst) {
+    if (src != NULL && dst != NULL) {
+        *dst = *src;
+        if (src->private_memory != NULL) {
+            dst->is_external_memory = 1; // dst buffer doesn't own the memory.
+            dst->private_memory = NULL;
+        }
+    }
+}
+
+// Copy and transfer ownership from src to dst (beware of parameter order!)
+void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
+    if (src != NULL && dst != NULL) {
+        *dst = *src;
+        if (src->private_memory != NULL) {
+            src->is_external_memory = 1; // src relinquishes ownership
+            src->private_memory = NULL;
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dec/common.h b/codec/L2/demos/webpEnc/host/src/dec/common.h
new file mode 100644
index 0000000000..ff84629a84
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/common.h
@@ -0,0 +1,59 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Definitions and macros common to encoding and decoding
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DEC_COMMON_H_
+#define WEBP_DEC_COMMON_H_
+
+// intra prediction modes
+enum {
+    B_DC_PRED = 0, // 4x4 modes
+    B_TM_PRED = 1,
+    B_VE_PRED = 2,
+    B_HE_PRED = 3,
+    B_RD_PRED = 4,
+    B_VR_PRED = 5,
+    B_LD_PRED = 6,
+    B_VL_PRED = 7,
+    B_HD_PRED = 8,
+    B_HU_PRED = 9,
+    NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10
+
+    // Luma16 or UV modes
+    DC_PRED = B_DC_PRED,
+    V_PRED = B_VE_PRED,
+    H_PRED = B_HE_PRED,
+    TM_PRED = B_TM_PRED,
+    B_PRED = NUM_BMODES, // refined I4x4 mode
+    NUM_PRED_MODES = 4,
+
+    // special modes
+    B_DC_PRED_NOTOP = 4,
+    B_DC_PRED_NOLEFT = 5,
+    B_DC_PRED_NOTOPLEFT = 6,
+    NUM_B_DC_MODES = 7
+};
+
+enum {
+    MB_FEATURE_TREE_PROBS = 3,
+    NUM_MB_SEGMENTS = 4,
+    NUM_REF_LF_DELTAS = 4,
+    NUM_MODE_LF_DELTAS = 4, // I4x4, ZERO, *, SPLIT
+    MAX_NUM_PARTITIONS = 8,
+    // Probabilities
+    NUM_TYPES = 4, // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
+    NUM_BANDS = 8,
+    NUM_CTX = 3,
+    NUM_PROBAS = 11
+};
+
+#endif // WEBP_DEC_COMMON_H_
diff --git a/codec/L2/demos/webpEnc/host/src/dec/decode_vp8.h b/codec/L2/demos/webpEnc/host/src/dec/decode_vp8.h
new file mode 100644
index 0000000000..c4e35d2a27
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/decode_vp8.h
@@ -0,0 +1,186 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Low-level API for VP8 decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_DECODE_VP8_H_
+#define WEBP_WEBP_DECODE_VP8_H_
+
+#include "../webp/decode.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Lower-level API
+//
+// These functions provide fine-grained control of the decoding process.
+// The call flow should resemble:
+//
+//   VP8Io io;
+//   VP8InitIo(&io);
+//   io.data = data;
+//   io.data_size = size;
+//   /* customize io's functions (setup()/put()/teardown()) if needed. */
+//
+//   VP8Decoder* dec = VP8New();
+//   bool ok = VP8Decode(dec);
+//   if (!ok) printf("Error: %s\n", VP8StatusMessage(dec));
+//   VP8Delete(dec);
+//   return ok;
+
+// Input / Output
+typedef struct VP8Io VP8Io;
+typedef int (*VP8IoPutHook)(const VP8Io* io);
+typedef int (*VP8IoSetupHook)(VP8Io* io);
+typedef void (*VP8IoTeardownHook)(const VP8Io* io);
+
+struct VP8Io {
+    // set by VP8GetHeaders()
+    int width, height; // picture dimensions, in pixels (invariable).
+                       // These are the original, uncropped dimensions.
+                       // The actual area passed to put() is stored
+                       // in mb_w / mb_h fields.
+
+    // set before calling put()
+    int mb_y;                 // position of the current rows (in pixels)
+    int mb_w;                 // number of columns in the sample
+    int mb_h;                 // number of rows in the sample
+    const uint8_t *y, *u, *v; // rows to copy (in yuv420 format)
+    int y_stride;             // row stride for luma
+    int uv_stride;            // row stride for chroma
+
+    void* opaque; // user data
+
+    // called when fresh samples are available. Currently, samples are in
+    // YUV420 format, and can be up to width x 24 in size (depending on the
+    // in-loop filtering level, e.g.). Should return false in case of error
+    // or abort request. The actual size of the area to update is mb_w x mb_h
+    // in size, taking cropping into account.
+    VP8IoPutHook put;
+
+    // called just before starting to decode the blocks.
+    // Must return false in case of setup error, true otherwise. If false is
+    // returned, teardown() will NOT be called. But if the setup succeeded
+    // and true is returned, then teardown() will always be called afterward.
+    VP8IoSetupHook setup;
+
+    // Called just after block decoding is finished (or when an error occurred
+    // during put()). Is NOT called if setup() failed.
+    VP8IoTeardownHook teardown;
+
+    // this is a recommendation for the user-side yuv->rgb converter. This flag
+    // is set when calling setup() hook and can be overwritten by it. It then
+    // can be taken into consideration during the put() method.
+    int fancy_upsampling;
+
+    // Input buffer.
+    size_t data_size;
+    const uint8_t* data;
+
+    // If true, in-loop filtering will not be performed even if present in the
+    // bitstream. Switching off filtering may speed up decoding at the expense
+    // of more visible blocking. Note that output will also be non-compliant
+    // with the VP8 specifications.
+    int bypass_filtering;
+
+    // Cropping parameters.
+    int use_cropping;
+    int crop_left, crop_right, crop_top, crop_bottom;
+
+    // Scaling parameters.
+    int use_scaling;
+    int scaled_width, scaled_height;
+
+    // If non NULL, pointer to the alpha data (if present) corresponding to the
+    // start of the current row (That is: it is pre-offset by mb_y and takes
+    // cropping into account).
+    const uint8_t* a;
+};
+
+// Internal, version-checked, entry point
+int VP8InitIoInternal(VP8Io* const, int);
+
+// Set the custom IO function pointers and user-data. The setter for IO hooks
+// should be called before initiating incremental decoding. Returns true if
+// WebPIDecoder object is successfully modified, false otherwise.
+int WebPISetIOHooks(
+    WebPIDecoder* const idec, VP8IoPutHook put, VP8IoSetupHook setup, VP8IoTeardownHook teardown, void* user_data);
+
+// Main decoding object. This is an opaque structure.
+typedef struct VP8Decoder VP8Decoder;
+
+// Create a new decoder object.
+VP8Decoder* VP8New(void);
+
+// Must be called to make sure 'io' is initialized properly.
+// Returns false in case of version mismatch. Upon such failure, no other
+// decoding function should be called (VP8Decode, VP8GetHeaders, ...)
+static WEBP_INLINE int VP8InitIo(VP8Io* const io) {
+    return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION);
+}
+
+// Decode the VP8 frame header. Returns true if ok.
+// Note: 'io->data' must be pointing to the start of the VP8 frame header.
+int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io);
+
+// Decode a picture. Will call VP8GetHeaders() if it wasn't done already.
+// Returns false in case of error.
+int VP8Decode(VP8Decoder* const dec, VP8Io* const io);
+
+// Return current status of the decoder:
+VP8StatusCode VP8Status(VP8Decoder* const dec);
+
+// return readable string corresponding to the last status.
+const char* VP8StatusMessage(VP8Decoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Not a mandatory call between calls to VP8Decode().
+void VP8Clear(VP8Decoder* const dec);
+
+// Destroy the decoder object.
+void VP8Delete(VP8Decoder* const dec);
+
+//------------------------------------------------------------------------------
+// Miscellaneous VP8/VP8L bitstream probing functions.
+
+// Returns true if the next 3 bytes in data contain the VP8 signature.
+WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);
+
+// Validates the VP8 data-header and retrieves basic header information viz
+// width and height. Returns 0 in case of formatting error. *width/*height
+// can be passed NULL.
+WEBP_EXTERN(int)
+VP8GetInfo(const uint8_t* data,
+           size_t data_size,  // data available so far
+           size_t chunk_size, // total data size expected in the chunk
+           int* const width,
+           int* const height);
+
+// Returns true if the next byte(s) in data is a VP8L signature.
+WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);
+
+// Validates the VP8L data-header and retrieves basic header information viz
+// width, height and alpha. Returns 0 in case of formatting error.
+// width/height/has_alpha can be passed NULL.
+WEBP_EXTERN(int)
+VP8LGetInfo(const uint8_t* data,
+            size_t data_size, // data available so far
+            int* const width,
+            int* const height,
+            int* const has_alpha);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_WEBP_DECODE_VP8_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/dec/frame.c b/codec/L2/demos/webpEnc/host/src/dec/frame.c
new file mode 100644
index 0000000000..1d195499fa
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/frame.c
@@ -0,0 +1,815 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Frame-reconstruction function. Memory allocation.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./vp8i.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+
+//------------------------------------------------------------------------------
+// Main reconstruction function.
+
+static const int kScan[16] = {0 + 0 * BPS,  4 + 0 * BPS,  8 + 0 * BPS,  12 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS,
+                              8 + 4 * BPS,  12 + 4 * BPS, 0 + 8 * BPS,  4 + 8 * BPS,  8 + 8 * BPS, 12 + 8 * BPS,
+                              0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS};
+
+static int CheckMode(int mb_x, int mb_y, int mode) {
+    if (mode == B_DC_PRED) {
+        if (mb_x == 0) {
+            return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
+        } else {
+            return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
+        }
+    }
+    return mode;
+}
+
+static void Copy32b(uint8_t* const dst, const uint8_t* const src) {
+    memcpy(dst, src, 4);
+}
+
+static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, uint8_t* const dst) {
+    switch (bits >> 30) {
+        case 3:
+            VP8Transform(src, dst, 0);
+            break;
+        case 2:
+            VP8TransformAC3(src, dst);
+            break;
+        case 1:
+            VP8TransformDC(src, dst);
+            break;
+        default:
+            break;
+    }
+}
+
+static void DoUVTransform(uint32_t bits, const int16_t* const src, uint8_t* const dst) {
+    if (bits & 0xff) {                // any non-zero coeff at all?
+        if (bits & 0xaa) {            // any non-zero AC coefficient?
+            VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V
+        } else {
+            VP8TransformDCUV(src, dst);
+        }
+    }
+}
+
+static void ReconstructRow(const VP8Decoder* const dec, const VP8ThreadContext* ctx) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    int j;
+    int mb_x;
+    const int mb_y = ctx->mb_y_;
+    const int cache_id = ctx->id_;
+    uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
+    uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
+    uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
+
+    // Initialize left-most block.
+    for (j = 0; j < 16; ++j) {
+        y_dst[j * BPS - 1] = 129;
+    }
+    for (j = 0; j < 8; ++j) {
+        u_dst[j * BPS - 1] = 129;
+        v_dst[j * BPS - 1] = 129;
+    }
+
+    // Init top-left sample on left column too.
+    if (mb_y > 0) {
+        y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
+    } else {
+        // we only need to do this init once at block (0,0).
+        // Afterward, it remains valid for the whole topmost row.
+        memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
+        memset(u_dst - BPS - 1, 127, 8 + 1);
+        memset(v_dst - BPS - 1, 127, 8 + 1);
+    }
+
+    // Reconstruct one row.
+    for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
+        const VP8MBData* const block = ctx->mb_data_ + mb_x;
+
+        // Rotate in the left samples from previously decoded block. We move four
+        // pixels at a time for alignment reason, and because of in-loop filter.
+        if (mb_x > 0) {
+            for (j = -1; j < 16; ++j) {
+                Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
+            }
+            for (j = -1; j < 8; ++j) {
+                Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
+                Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
+            }
+        }
+        {
+            // bring top samples into the cache
+            VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;
+            const int16_t* const coeffs = block->coeffs_;
+            uint32_t bits = block->non_zero_y_;
+            int n;
+
+            if (mb_y > 0) {
+                memcpy(y_dst - BPS, top_yuv[0].y, 16);
+                memcpy(u_dst - BPS, top_yuv[0].u, 8);
+                memcpy(v_dst - BPS, top_yuv[0].v, 8);
+            }
+
+            // predict and add residuals
+            if (block->is_i4x4_) { // 4x4
+                uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
+
+                if (mb_y > 0) {
+                    if (mb_x >= dec->mb_w_ - 1) { // on rightmost border
+                        memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
+                    } else {
+                        memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
+                    }
+                }
+                // replicate the top-right pixels below
+                top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
+
+                // predict and add residuals for all 4x4 blocks in turn.
+                for (n = 0; n < 16; ++n, bits <<= 2) {
+                    uint8_t* const dst = y_dst + kScan[n];
+                    VP8PredLuma4[block->imodes_[n]](dst);
+                    DoTransform(bits, coeffs + n * 16, dst);
+                }
+            } else { // 16x16
+                const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]);
+                VP8PredLuma16[pred_func](y_dst);
+                if (bits != 0) {
+                    for (n = 0; n < 16; ++n, bits <<= 2) {
+                        DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);
+                    }
+                }
+            }
+            {
+                // Chroma
+                const uint32_t bits_uv = block->non_zero_uv_;
+                const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);
+                VP8PredChroma8[pred_func](u_dst);
+                VP8PredChroma8[pred_func](v_dst);
+                DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
+                DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);
+            }
+
+            // stash away top samples for next block
+            if (mb_y < dec->mb_h_ - 1) {
+                memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
+                memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);
+                memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);
+            }
+        }
+        // Transfer reconstructed samples from yuv_b_ cache to final destination.
+        {
+            const int y_offset = cache_id * 16 * dec->cache_y_stride_;
+            const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
+            uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;
+            uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;
+            uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;
+            for (j = 0; j < 16; ++j) {
+                memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
+            }
+            for (j = 0; j < 8; ++j) {
+                memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
+                memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
+            }
+        }
+    }
+
+    StopProfiling(&stop_watch, &timeReconstructRow, &countReconstructRow);
+}
+
+//------------------------------------------------------------------------------
+// Filtering
+
+// kFilterExtraRows[] = How many extra lines are needed on the MB boundary
+// for caching, given a filtering level.
+// Simple filter:  up to 2 luma samples are read and 1 is written.
+// Complex filter: up to 4 luma samples are read and 3 are written. Same for
+//                 U/V, so it's 8 samples total (because of the 2x upsampling).
+static const uint8_t kFilterExtraRows[3] = {0, 2, 8};
+
+static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
+    const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+    const int cache_id = ctx->id_;
+    const int y_bps = dec->cache_y_stride_;
+    const VP8FInfo* const f_info = ctx->f_info_ + mb_x;
+    uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16;
+    const int ilevel = f_info->f_ilevel_;
+    const int limit = f_info->f_limit_;
+    if (limit == 0) {
+        return;
+    }
+    assert(limit >= 3);
+    if (dec->filter_type_ == 1) { // simple
+        if (mb_x > 0) {
+            VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
+        }
+        if (f_info->f_inner_) {
+            VP8SimpleHFilter16i(y_dst, y_bps, limit);
+        }
+        if (mb_y > 0) {
+            VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
+        }
+        if (f_info->f_inner_) {
+            VP8SimpleVFilter16i(y_dst, y_bps, limit);
+        }
+    } else { // complex
+        const int uv_bps = dec->cache_uv_stride_;
+        uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
+        uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
+        const int hev_thresh = f_info->hev_thresh_;
+        if (mb_x > 0) {
+            VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
+            VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
+        }
+        if (f_info->f_inner_) {
+            VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
+            VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
+        }
+        if (mb_y > 0) {
+            VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
+            VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
+        }
+        if (f_info->f_inner_) {
+            VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
+            VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
+        }
+    }
+}
+
+// Filter the decoded macroblock row (if needed)
+static void FilterRow(const VP8Decoder* const dec) {
+    int mb_x;
+    const int mb_y = dec->thread_ctx_.mb_y_;
+    assert(dec->thread_ctx_.filter_row_);
+    for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
+        DoFilter(dec, mb_x, mb_y);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
+
+static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
+    if (dec->filter_type_ > 0) {
+        int s;
+        const VP8FilterHeader* const hdr = &dec->filter_hdr_;
+        for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+            int i4x4;
+            // First, compute the initial level
+            int base_level;
+            if (dec->segment_hdr_.use_segment_) {
+                base_level = dec->segment_hdr_.filter_strength_[s];
+                if (!dec->segment_hdr_.absolute_delta_) {
+                    base_level += hdr->level_;
+                }
+            } else {
+                base_level = hdr->level_;
+            }
+            for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
+                VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
+                int level = base_level;
+                if (hdr->use_lf_delta_) {
+                    level += hdr->ref_lf_delta_[0];
+                    if (i4x4) {
+                        level += hdr->mode_lf_delta_[0];
+                    }
+                }
+                level = (level < 0) ? 0 : (level > 63) ? 63 : level;
+                if (level > 0) {
+                    int ilevel = level;
+                    if (hdr->sharpness_ > 0) {
+                        if (hdr->sharpness_ > 4) {
+                            ilevel >>= 2;
+                        } else {
+                            ilevel >>= 1;
+                        }
+                        if (ilevel > 9 - hdr->sharpness_) {
+                            ilevel = 9 - hdr->sharpness_;
+                        }
+                    }
+                    if (ilevel < 1) ilevel = 1;
+                    info->f_ilevel_ = ilevel;
+                    info->f_limit_ = 2 * level + ilevel;
+                    info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+                } else {
+                    info->f_limit_ = 0; // no filtering
+                }
+                info->f_inner_ = i4x4;
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// Dithering
+
+#define DITHER_AMP_TAB_SIZE 12
+static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
+    // roughly, it's dqm->uv_mat_[1]
+    8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1};
+
+void VP8InitDithering(const WebPDecoderOptions* const options, VP8Decoder* const dec) {
+    assert(dec != NULL);
+    if (options != NULL) {
+        const int d = options->dithering_strength;
+        const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;
+        const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);
+        if (f > 0) {
+            int s;
+            int all_amp = 0;
+            for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+                VP8QuantMatrix* const dqm = &dec->dqm_[s];
+                if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
+                    // TODO(skal): should we specially dither more for uv_quant_ < 0?
+                    const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
+                    dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
+                }
+                all_amp |= dqm->dither_;
+            }
+            if (all_amp != 0) {
+                VP8InitRandom(&dec->dithering_rg_, 1.0f);
+                dec->dither_ = 1;
+            }
+        }
+        // potentially allow alpha dithering
+        dec->alpha_dithering_ = options->alpha_dithering_strength;
+        if (dec->alpha_dithering_ > 100) {
+            dec->alpha_dithering_ = 100;
+        } else if (dec->alpha_dithering_ < 0) {
+            dec->alpha_dithering_ = 0;
+        }
+    }
+}
+
+// minimal amp that will provide a non-zero dithering effect
+#define MIN_DITHER_AMP 4
+#define DITHER_DESCALE 4
+#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))
+#define DITHER_AMP_BITS 8
+#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)
+
+static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
+    int i, j;
+    for (j = 0; j < 8; ++j) {
+        for (i = 0; i < 8; ++i) {
+            // TODO: could be made faster with SSE2
+            const int bits = VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;
+            // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
+            const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;
+            const int v = (int)dst[i] + delta;
+            dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;
+        }
+        dst += bps;
+    }
+}
+
+static void DitherRow(VP8Decoder* const dec) {
+    int mb_x;
+    assert(dec->dither_);
+    for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
+        const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+        const VP8MBData* const data = ctx->mb_data_ + mb_x;
+        const int cache_id = ctx->id_;
+        const int uv_bps = dec->cache_uv_stride_;
+        if (data->dither_ >= MIN_DITHER_AMP) {
+            uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
+            uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
+            Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);
+            Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// This function is called after a row of macroblocks is finished decoding.
+// It also takes into account the following restrictions:
+//  * In case of in-loop filtering, we must hold off sending some of the bottom
+//    pixels as they are yet unfiltered. They will be when the next macroblock
+//    row is decoded. Meanwhile, we must preserve them by rotating them in the
+//    cache area. This doesn't hold for the very bottom row of the uncropped
+//    picture of course.
+//  * we must clip the remaining pixels against the cropping area. The VP8Io
+//    struct must have the following fields set correctly before calling put():
+
+#define MACROBLOCK_VPOS(mb_y) ((mb_y)*16) // vertical position of a MB
+
+// Finalize and transmit a complete row. Return false in case of user-abort.
+static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    int ok = 1;
+    const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+    const int cache_id = ctx->id_;
+    const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
+    const int ysize = extra_y_rows * dec->cache_y_stride_;
+    const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
+    const int y_offset = cache_id * 16 * dec->cache_y_stride_;
+    const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
+    uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
+    uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
+    uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
+    const int mb_y = ctx->mb_y_;
+    const int is_first_row = (mb_y == 0);
+    const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);
+
+    if (dec->mt_method_ == 2) {
+        ReconstructRow(dec, ctx);
+    }
+
+    if (ctx->filter_row_) {
+        FilterRow(dec);
+    }
+
+    if (dec->dither_) {
+        DitherRow(dec);
+    }
+
+    if (io->put != NULL) {
+        int y_start = MACROBLOCK_VPOS(mb_y);
+        int y_end = MACROBLOCK_VPOS(mb_y + 1);
+        if (!is_first_row) {
+            y_start -= extra_y_rows;
+            io->y = ydst;
+            io->u = udst;
+            io->v = vdst;
+        } else {
+            io->y = dec->cache_y_ + y_offset;
+            io->u = dec->cache_u_ + uv_offset;
+            io->v = dec->cache_v_ + uv_offset;
+        }
+
+        if (!is_last_row) {
+            y_end -= extra_y_rows;
+        }
+        if (y_end > io->crop_bottom) {
+            y_end = io->crop_bottom; // make sure we don't overflow on last row.
+        }
+        io->a = NULL;
+        if (dec->alpha_data_ != NULL && y_start < y_end) {
+            // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a
+            // good idea.
+            io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
+            if (io->a == NULL) {
+                return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, "Could not decode alpha data.");
+            }
+        }
+        if (y_start < io->crop_top) {
+            const int delta_y = io->crop_top - y_start;
+            y_start = io->crop_top;
+            assert(!(delta_y & 1));
+            io->y += dec->cache_y_stride_ * delta_y;
+            io->u += dec->cache_uv_stride_ * (delta_y >> 1);
+            io->v += dec->cache_uv_stride_ * (delta_y >> 1);
+            if (io->a != NULL) {
+                io->a += io->width * delta_y;
+            }
+        }
+        if (y_start < y_end) {
+            io->y += io->crop_left;
+            io->u += io->crop_left >> 1;
+            io->v += io->crop_left >> 1;
+            if (io->a != NULL) {
+                io->a += io->crop_left;
+            }
+            io->mb_y = y_start - io->crop_top;
+            io->mb_w = io->crop_right - io->crop_left;
+            io->mb_h = y_end - y_start;
+            ok = io->put(io);
+        }
+    }
+    // rotate top samples if needed
+    if (cache_id + 1 == dec->num_caches_) {
+        if (!is_last_row) {
+            memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
+            memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
+            memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
+        }
+    }
+
+    StopProfiling(&stop_watch, &timeFinishRow, &countFinishRow);
+    return ok;
+}
+
+#undef MACROBLOCK_VPOS
+
+//------------------------------------------------------------------------------
+
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    int ok = 1;
+    VP8ThreadContext* const ctx = &dec->thread_ctx_;
+    const int filter_row = (dec->filter_type_ > 0) && (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
+    if (dec->mt_method_ == 0) {
+        // ctx->id_ and ctx->f_info_ are already set
+        ctx->mb_y_ = dec->mb_y_;
+        ctx->filter_row_ = filter_row;
+        ReconstructRow(dec, ctx);
+        ok = FinishRow(dec, io);
+    } else {
+        WebPWorker* const worker = &dec->worker_;
+        // Finish previous job *before* updating context
+        ok &= WebPGetWorkerInterface()->Sync(worker);
+        assert(worker->status_ == OK);
+        if (ok) { // spawn a new deblocking/output job
+            ctx->io_ = *io;
+            ctx->id_ = dec->cache_id_;
+            ctx->mb_y_ = dec->mb_y_;
+            ctx->filter_row_ = filter_row;
+            if (dec->mt_method_ == 2) { // swap macroblock data
+                VP8MBData* const tmp = ctx->mb_data_;
+                ctx->mb_data_ = dec->mb_data_;
+                dec->mb_data_ = tmp;
+            } else {
+                // perform reconstruction directly in main thread
+                ReconstructRow(dec, ctx);
+            }
+            if (filter_row) { // swap filter info
+                VP8FInfo* const tmp = ctx->f_info_;
+                ctx->f_info_ = dec->f_info_;
+                dec->f_info_ = tmp;
+            }
+            // (reconstruct)+filter in parallel
+            WebPGetWorkerInterface()->Launch(worker);
+            if (++dec->cache_id_ == dec->num_caches_) {
+                dec->cache_id_ = 0;
+            }
+        }
+    }
+
+    StopProfiling(&stop_watch, &timeVP8ProcessRow, &countVP8ProcessRow);
+
+    return ok;
+}
+
+//------------------------------------------------------------------------------
+// Finish setting up the decoding parameter once user's setup() is called.
+
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
+    // Call setup() first. This may trigger additional decoding features on 'io'.
+    // Note: Afterward, we must call teardown() no matter what.
+    if (io->setup != NULL && !io->setup(io)) {
+        VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
+        return dec->status_;
+    }
+
+    // Disable filtering per user request
+    if (io->bypass_filtering) {
+        dec->filter_type_ = 0;
+    }
+    // TODO(skal): filter type / strength / sharpness forcing
+
+    // Define the area where we can skip in-loop filtering, in case of cropping.
+    //
+    // 'Simple' filter reads two luma samples outside of the macroblock
+    // and filters one. It doesn't filter the chroma samples. Hence, we can
+    // avoid doing the in-loop filtering before crop_top/crop_left position.
+    // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.
+    // Means: there's a dependency chain that goes all the way up to the
+    // top-left corner of the picture (MB #0). We must filter all the previous
+    // macroblocks.
+    // TODO(skal): add an 'approximate_decoding' option, that won't produce
+    // a 1:1 bit-exactness for complex filtering?
+    {
+        const int extra_pixels = kFilterExtraRows[dec->filter_type_];
+        if (dec->filter_type_ == 2) {
+            // For complex filter, we need to preserve the dependency chain.
+            dec->tl_mb_x_ = 0;
+            dec->tl_mb_y_ = 0;
+        } else {
+            // For simple filter, we can filter only the cropped region.
+            // We include 'extra_pixels' on the other side of the boundary, since
+            // vertical or horizontal filtering of the previous macroblock can
+            // modify some abutting pixels.
+            dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
+            dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;
+            if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;
+            if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
+        }
+        // We need some 'extra' pixels on the right/bottom.
+        dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
+        dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;
+        if (dec->br_mb_x_ > dec->mb_w_) {
+            dec->br_mb_x_ = dec->mb_w_;
+        }
+        if (dec->br_mb_y_ > dec->mb_h_) {
+            dec->br_mb_y_ = dec->mb_h_;
+        }
+    }
+    PrecomputeFilterStrengths(dec);
+    return VP8_STATUS_OK;
+}
+
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
+    int ok = 1;
+    if (dec->mt_method_ > 0) {
+        ok = WebPGetWorkerInterface()->Sync(&dec->worker_);
+    }
+
+    if (io->teardown != NULL) {
+        io->teardown(io);
+    }
+    return ok;
+}
+
+//------------------------------------------------------------------------------
+// For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.
+//
+// Reason is: the deblocking filter cannot deblock the bottom horizontal edges
+// immediately, and needs to wait for first few rows of the next macroblock to
+// be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending
+// on strength).
+// With two threads, the vertical positions of the rows being decoded are:
+// Decode:  [ 0..15][16..31][32..47][48..63][64..79][...
+// Deblock:         [ 0..11][12..27][28..43][44..59][...
+// If we use two threads and two caches of 16 pixels, the sequence would be:
+// Decode:  [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...
+// Deblock:         [ 0..11][12..27!!][-4..11][12..27][...
+// The problem occurs during row [12..15!!] that both the decoding and
+// deblocking threads are writing simultaneously.
+// With 3 cache lines, one get a safe write pattern:
+// Decode:  [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..
+// Deblock:         [ 0..11][12..27][28..43][-4..11][12..27][28...
+// Note that multi-threaded output _without_ deblocking can make use of two
+// cache lines of 16 pixels only, since there's no lagging behind. The decoding
+// and output process have non-concurrent writing:
+// Decode:  [ 0..15][16..31][ 0..15][16..31][...
+// io->put:         [ 0..15][16..31][ 0..15][...
+
+#define MT_CACHE_LINES 3
+#define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case
+
+// Initialize multi/single-thread worker
+static int InitThreadContext(VP8Decoder* const dec) {
+    dec->cache_id_ = 0;
+    if (dec->mt_method_ > 0) {
+        WebPWorker* const worker = &dec->worker_;
+        if (!WebPGetWorkerInterface()->Reset(worker)) {
+            return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, "thread initialization failed.");
+        }
+        worker->data1 = dec;
+        worker->data2 = (void*)&dec->thread_ctx_.io_;
+        worker->hook = (WebPWorkerHook)FinishRow;
+        dec->num_caches_ = (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
+    } else {
+        dec->num_caches_ = ST_CACHE_LINES;
+    }
+    return 1;
+}
+
+int VP8GetThreadMethod(const WebPDecoderOptions* const options,
+                       const WebPHeaderStructure* const headers,
+                       int width,
+                       int height) {
+    if (options == NULL || options->use_threads == 0) {
+        return 0;
+    }
+    (void)headers;
+    (void)width;
+    (void)height;
+    assert(headers == NULL || !headers->is_lossless);
+#if defined(WEBP_USE_THREAD)
+    if (width < MIN_WIDTH_FOR_THREADS) return 0;
+// TODO(skal): tune the heuristic further
+#if 0
+  if (height < 2 * width) return 2;
+#endif
+    return 2;
+#else // !WEBP_USE_THREAD
+    return 0;
+#endif
+}
+
+#undef MT_CACHE_LINES
+#undef ST_CACHE_LINES
+
+//------------------------------------------------------------------------------
+// Memory setup
+
+static int AllocateMemory(VP8Decoder* const dec) {
+    const int num_caches = dec->num_caches_;
+    const int mb_w = dec->mb_w_;
+    // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
+    const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
+    const size_t top_size = sizeof(VP8TopSamples) * mb_w;
+    const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
+    const size_t f_info_size = (dec->filter_type_ > 0) ? mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo) : 0;
+    const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
+    const size_t mb_data_size = (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
+    const size_t cache_height = (16 * num_caches + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
+    const size_t cache_size = top_size * cache_height;
+    // alpha_size is the only one that scales as width x height.
+    const uint64_t alpha_size =
+        (dec->alpha_data_ != NULL) ? (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
+    const uint64_t needed = (uint64_t)intra_pred_mode_size + top_size + mb_info_size + f_info_size + yuv_size +
+                            mb_data_size + cache_size + alpha_size + WEBP_ALIGN_CST;
+    uint8_t* mem;
+
+    if (needed != (size_t)needed) return 0; // check for overflow
+    if (needed > dec->mem_size_) {
+        WebPSafeFree(dec->mem_);
+        dec->mem_size_ = 0;
+        dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
+        if (dec->mem_ == NULL) {
+            return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, "no memory during frame initialization.");
+        }
+        // down-cast is ok, thanks to WebPSafeAlloc() above.
+        dec->mem_size_ = (size_t)needed;
+    }
+
+    mem = (uint8_t*)dec->mem_;
+    dec->intra_t_ = (uint8_t*)mem;
+    mem += intra_pred_mode_size;
+
+    dec->yuv_t_ = (VP8TopSamples*)mem;
+    mem += top_size;
+
+    dec->mb_info_ = ((VP8MB*)mem) + 1;
+    mem += mb_info_size;
+
+    dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
+    mem += f_info_size;
+    dec->thread_ctx_.id_ = 0;
+    dec->thread_ctx_.f_info_ = dec->f_info_;
+    if (dec->mt_method_ > 0) {
+        // secondary cache line. The deblocking process need to make use of the
+        // filtering strength from previous macroblock row, while the new ones
+        // are being decoded in parallel. We'll just swap the pointers.
+        dec->thread_ctx_.f_info_ += mb_w;
+    }
+
+    mem = (uint8_t*)WEBP_ALIGN(mem);
+    assert((yuv_size & WEBP_ALIGN_CST) == 0);
+    dec->yuv_b_ = (uint8_t*)mem;
+    mem += yuv_size;
+
+    dec->mb_data_ = (VP8MBData*)mem;
+    dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
+    if (dec->mt_method_ == 2) {
+        dec->thread_ctx_.mb_data_ += mb_w;
+    }
+    mem += mb_data_size;
+
+    dec->cache_y_stride_ = 16 * mb_w;
+    dec->cache_uv_stride_ = 8 * mb_w;
+    {
+        const int extra_rows = kFilterExtraRows[dec->filter_type_];
+        const int extra_y = extra_rows * dec->cache_y_stride_;
+        const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
+        dec->cache_y_ = ((uint8_t*)mem) + extra_y;
+        dec->cache_u_ = dec->cache_y_ + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
+        dec->cache_v_ = dec->cache_u_ + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
+        dec->cache_id_ = 0;
+    }
+    mem += cache_size;
+
+    // alpha plane
+    dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
+    mem += alpha_size;
+    assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
+
+    // note: left/top-info is initialized once for all.
+    memset(dec->mb_info_ - 1, 0, mb_info_size);
+    VP8InitScanline(dec); // initialize left too.
+
+    // initialize top
+    memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
+
+    return 1;
+}
+
+static void InitIo(VP8Decoder* const dec, VP8Io* io) {
+    // prepare 'io'
+    io->mb_y = 0;
+    io->y = dec->cache_y_;
+    io->u = dec->cache_u_;
+    io->v = dec->cache_v_;
+    io->y_stride = dec->cache_y_stride_;
+    io->uv_stride = dec->cache_uv_stride_;
+    io->a = NULL;
+}
+
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {
+    if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_.
+    if (!AllocateMemory(dec)) return 0;
+    InitIo(dec, io);
+    VP8DspInit(); // Init critical function pointers and look-up tables.
+    return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dec/idec.c b/codec/L2/demos/webpEnc/host/src/dec/idec.c
new file mode 100644
index 0000000000..855c3ef996
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/idec.c
@@ -0,0 +1,844 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Incremental decoding
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "./alphai.h"
+#include "./webpi.h"
+#include "./vp8i.h"
+#include "../utils/utils.h"
+
+// In append mode, buffer allocations increase as multiples of this value.
+// Needs to be a power of 2.
+#define CHUNK_SIZE 4096
+#define MAX_MB_SIZE 4096
+
+//------------------------------------------------------------------------------
+// Data structures for memory and states
+
+// Decoding states. State normally flows as:
+// WEBP_HEADER->VP8_HEADER->VP8_PARTS0->VP8_DATA->DONE for a lossy image, and
+// WEBP_HEADER->VP8L_HEADER->VP8L_DATA->DONE for a lossless image.
+// If there is any error the decoder goes into state ERROR.
+typedef enum {
+    STATE_WEBP_HEADER, // All the data before that of the VP8/VP8L chunk.
+    STATE_VP8_HEADER,  // The VP8 Frame header (within the VP8 chunk).
+    STATE_VP8_PARTS0,
+    STATE_VP8_DATA,
+    STATE_VP8L_HEADER,
+    STATE_VP8L_DATA,
+    STATE_DONE,
+    STATE_ERROR
+} DecState;
+
+// Operating state for the MemBuffer
+typedef enum { MEM_MODE_NONE = 0, MEM_MODE_APPEND, MEM_MODE_MAP } MemBufferMode;
+
+// storage for partition #0 and partial data (in a rolling fashion)
+typedef struct {
+    MemBufferMode mode_; // Operation mode
+    size_t start_;       // start location of the data to be decoded
+    size_t end_;         // end location
+    size_t buf_size_;    // size of the allocated buffer
+    uint8_t* buf_;       // We don't own this buffer in case WebPIUpdate()
+
+    size_t part0_size_;        // size of partition #0
+    const uint8_t* part0_buf_; // buffer to store partition #0
+} MemBuffer;
+
+struct WebPIDecoder {
+    DecState state_;       // current decoding state
+    WebPDecParams params_; // Params to store output info
+    int is_lossless_;      // for down-casting 'dec_'.
+    void* dec_;            // either a VP8Decoder or a VP8LDecoder instance
+    VP8Io io_;
+
+    MemBuffer mem_;        // input memory buffer.
+    WebPDecBuffer output_; // output buffer (when no external one is supplied)
+    size_t chunk_size_;    // Compressed VP8/VP8L size extracted from Header.
+
+    int last_mb_y_; // last row reached for intra-mode decoding
+};
+
+// MB context to restore in case VP8DecodeMB() fails
+typedef struct {
+    VP8MB left_;
+    VP8MB info_;
+    VP8BitReader token_br_;
+} MBContext;
+
+//------------------------------------------------------------------------------
+// MemBuffer: incoming data handling
+
+static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
+    return (mem->end_ - mem->start_);
+}
+
+// Check if we need to preserve the compressed alpha data, as it may not have
+// been decoded yet.
+static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
+    if (idec->state_ == STATE_WEBP_HEADER) {
+        // We haven't parsed the headers yet, so we don't know whether the image is
+        // lossy or lossless. This also means that we haven't parsed the ALPH chunk.
+        return 0;
+    }
+    if (idec->is_lossless_) {
+        return 0; // ALPH chunk is not present for lossless images.
+    } else {
+        const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+        assert(dec != NULL); // Must be true as idec->state_ != STATE_WEBP_HEADER.
+        return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_;
+    }
+}
+
+static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
+    MemBuffer* const mem = &idec->mem_;
+    const uint8_t* const new_base = mem->buf_ + mem->start_;
+    // note: for VP8, setting up idec->io_ is only really needed at the beginning
+    // of the decoding, till partition #0 is complete.
+    idec->io_.data = new_base;
+    idec->io_.data_size = MemDataSize(mem);
+
+    if (idec->dec_ != NULL) {
+        if (!idec->is_lossless_) {
+            VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+            const int last_part = dec->num_parts_ - 1;
+            if (offset != 0) {
+                int p;
+                for (p = 0; p <= last_part; ++p) {
+                    VP8RemapBitReader(dec->parts_ + p, offset);
+                }
+                // Remap partition #0 data pointer to new offset, but only in MAP
+                // mode (in APPEND mode, partition #0 is copied into a fixed memory).
+                if (mem->mode_ == MEM_MODE_MAP) {
+                    VP8RemapBitReader(&dec->br_, offset);
+                }
+            }
+            {
+                const uint8_t* const last_start = dec->parts_[last_part].buf_;
+                assert(last_part >= 0);
+                VP8BitReaderSetBuffer(&dec->parts_[last_part], last_start, mem->buf_ + mem->end_ - last_start);
+            }
+            if (NeedCompressedAlpha(idec)) {
+                ALPHDecoder* const alph_dec = dec->alph_dec_;
+                dec->alpha_data_ += offset;
+                if (alph_dec != NULL) {
+                    if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
+                        VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
+                        assert(alph_vp8l_dec != NULL);
+                        assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
+                        VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_, dec->alpha_data_ + ALPHA_HEADER_LEN,
+                                               dec->alpha_data_size_ - ALPHA_HEADER_LEN);
+                    } else { // alph_dec->method_ == ALPHA_NO_COMPRESSION
+                             // Nothing special to do in this case.
+                    }
+                }
+            }
+        } else { // Resize lossless bitreader
+            VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+            VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
+        }
+    }
+}
+
+// Appends data to the end of MemBuffer->buf_. It expands the allocated memory
+// size if required and also updates VP8BitReader's if new memory is allocated.
+static int AppendToMemBuffer(WebPIDecoder* const idec, const uint8_t* const data, size_t data_size) {
+    VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+    MemBuffer* const mem = &idec->mem_;
+    const int need_compressed_alpha = NeedCompressedAlpha(idec);
+    const uint8_t* const old_start = mem->buf_ + mem->start_;
+    const uint8_t* const old_base = need_compressed_alpha ? dec->alpha_data_ : old_start;
+    assert(mem->mode_ == MEM_MODE_APPEND);
+    if (data_size > MAX_CHUNK_PAYLOAD) {
+        // security safeguard: trying to allocate more than what the format
+        // allows for a chunk should be considered a smoke smell.
+        return 0;
+    }
+
+    if (mem->end_ + data_size > mem->buf_size_) { // Need some free memory
+        const size_t new_mem_start = old_start - old_base;
+        const size_t current_size = MemDataSize(mem) + new_mem_start;
+        const uint64_t new_size = (uint64_t)current_size + data_size;
+        const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
+        uint8_t* const new_buf = (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
+        if (new_buf == NULL) return 0;
+        memcpy(new_buf, old_base, current_size);
+        WebPSafeFree(mem->buf_);
+        mem->buf_ = new_buf;
+        mem->buf_size_ = (size_t)extra_size;
+        mem->start_ = new_mem_start;
+        mem->end_ = current_size;
+    }
+
+    memcpy(mem->buf_ + mem->end_, data, data_size);
+    mem->end_ += data_size;
+    assert(mem->end_ <= mem->buf_size_);
+
+    DoRemap(idec, mem->buf_ + mem->start_ - old_start);
+    return 1;
+}
+
+static int RemapMemBuffer(WebPIDecoder* const idec, const uint8_t* const data, size_t data_size) {
+    MemBuffer* const mem = &idec->mem_;
+    const uint8_t* const old_buf = mem->buf_;
+    const uint8_t* const old_start = old_buf + mem->start_;
+    assert(mem->mode_ == MEM_MODE_MAP);
+
+    if (data_size < mem->buf_size_) return 0; // can't remap to a shorter buffer!
+
+    mem->buf_ = (uint8_t*)data;
+    mem->end_ = mem->buf_size_ = data_size;
+
+    DoRemap(idec, mem->buf_ + mem->start_ - old_start);
+    return 1;
+}
+
+static void InitMemBuffer(MemBuffer* const mem) {
+    mem->mode_ = MEM_MODE_NONE;
+    mem->buf_ = NULL;
+    mem->buf_size_ = 0;
+    mem->part0_buf_ = NULL;
+    mem->part0_size_ = 0;
+}
+
+static void ClearMemBuffer(MemBuffer* const mem) {
+    assert(mem);
+    if (mem->mode_ == MEM_MODE_APPEND) {
+        WebPSafeFree(mem->buf_);
+        WebPSafeFree((void*)mem->part0_buf_);
+    }
+}
+
+static int CheckMemBufferMode(MemBuffer* const mem, MemBufferMode expected) {
+    if (mem->mode_ == MEM_MODE_NONE) {
+        mem->mode_ = expected; // switch to the expected mode
+    } else if (mem->mode_ != expected) {
+        return 0; // we mixed the modes => error
+    }
+    assert(mem->mode_ == expected); // mode is ok
+    return 1;
+}
+
+// To be called last.
+static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
+    const WebPDecoderOptions* const options = idec->params_.options;
+    WebPDecBuffer* const output = idec->params_.output;
+
+    idec->state_ = STATE_DONE;
+    if (options != NULL && options->flip) {
+        return WebPFlipBuffer(output);
+    } else {
+        return VP8_STATUS_OK;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Macroblock-decoding contexts
+
+static void SaveContext(const VP8Decoder* dec, const VP8BitReader* token_br, MBContext* const context) {
+    context->left_ = dec->mb_info_[-1];
+    context->info_ = dec->mb_info_[dec->mb_x_];
+    context->token_br_ = *token_br;
+}
+
+static void RestoreContext(const MBContext* context, VP8Decoder* const dec, VP8BitReader* const token_br) {
+    dec->mb_info_[-1] = context->left_;
+    dec->mb_info_[dec->mb_x_] = context->info_;
+    *token_br = context->token_br_;
+}
+
+//------------------------------------------------------------------------------
+
+static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
+    if (idec->state_ == STATE_VP8_DATA) {
+        VP8Io* const io = &idec->io_;
+        if (io->teardown != NULL) {
+            io->teardown(io);
+        }
+    }
+    idec->state_ = STATE_ERROR;
+    return error;
+}
+
+static void ChangeState(WebPIDecoder* const idec, DecState new_state, size_t consumed_bytes) {
+    MemBuffer* const mem = &idec->mem_;
+    idec->state_ = new_state;
+    mem->start_ += consumed_bytes;
+    assert(mem->start_ <= mem->end_);
+    idec->io_.data = mem->buf_ + mem->start_;
+    idec->io_.data_size = MemDataSize(mem);
+}
+
+// Headers
+static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
+    MemBuffer* const mem = &idec->mem_;
+    const uint8_t* data = mem->buf_ + mem->start_;
+    size_t curr_size = MemDataSize(mem);
+    VP8StatusCode status;
+    WebPHeaderStructure headers;
+
+    headers.data = data;
+    headers.data_size = curr_size;
+    headers.have_all_data = 0;
+    status = WebPParseHeaders(&headers);
+    if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+        return VP8_STATUS_SUSPENDED; // We haven't found a VP8 chunk yet.
+    } else if (status != VP8_STATUS_OK) {
+        return IDecError(idec, status);
+    }
+
+    idec->chunk_size_ = headers.compressed_size;
+    idec->is_lossless_ = headers.is_lossless;
+    if (!idec->is_lossless_) {
+        VP8Decoder* const dec = VP8New();
+        if (dec == NULL) {
+            return VP8_STATUS_OUT_OF_MEMORY;
+        }
+        idec->dec_ = dec;
+        dec->alpha_data_ = headers.alpha_data;
+        dec->alpha_data_size_ = headers.alpha_data_size;
+        ChangeState(idec, STATE_VP8_HEADER, headers.offset);
+    } else {
+        VP8LDecoder* const dec = VP8LNew();
+        if (dec == NULL) {
+            return VP8_STATUS_OUT_OF_MEMORY;
+        }
+        idec->dec_ = dec;
+        ChangeState(idec, STATE_VP8L_HEADER, headers.offset);
+    }
+    return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
+    const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
+    const size_t curr_size = MemDataSize(&idec->mem_);
+    int width, height;
+    uint32_t bits;
+
+    if (curr_size < VP8_FRAME_HEADER_SIZE) {
+        // Not enough data bytes to extract VP8 Frame Header.
+        return VP8_STATUS_SUSPENDED;
+    }
+    if (!VP8GetInfo(data, curr_size, idec->chunk_size_, &width, &height)) {
+        return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+    }
+
+    bits = data[0] | (data[1] << 8) | (data[2] << 16);
+    idec->mem_.part0_size_ = (bits >> 5) + VP8_FRAME_HEADER_SIZE;
+
+    idec->io_.data = data;
+    idec->io_.data_size = curr_size;
+    idec->state_ = STATE_VP8_PARTS0;
+    return VP8_STATUS_OK;
+}
+
+// Partition #0
+static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) {
+    VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+    VP8BitReader* const br = &dec->br_;
+    const size_t part_size = br->buf_end_ - br->buf_;
+    MemBuffer* const mem = &idec->mem_;
+    assert(!idec->is_lossless_);
+    assert(mem->part0_buf_ == NULL);
+    // the following is a format limitation, no need for runtime check:
+    assert(part_size <= mem->part0_size_);
+    if (part_size == 0) { // can't have zero-size partition #0
+        return VP8_STATUS_BITSTREAM_ERROR;
+    }
+    if (mem->mode_ == MEM_MODE_APPEND) {
+        // We copy and grab ownership of the partition #0 data.
+        uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, part_size);
+        if (part0_buf == NULL) {
+            return VP8_STATUS_OUT_OF_MEMORY;
+        }
+        memcpy(part0_buf, br->buf_, part_size);
+        mem->part0_buf_ = part0_buf;
+        VP8BitReaderSetBuffer(br, part0_buf, part_size);
+    } else {
+        // Else: just keep pointers to the partition #0's data in dec_->br_.
+    }
+    mem->start_ += part_size;
+    return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
+    VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+    VP8Io* const io = &idec->io_;
+    const WebPDecParams* const params = &idec->params_;
+    WebPDecBuffer* const output = params->output;
+
+    // Wait till we have enough data for the whole partition #0
+    if (MemDataSize(&idec->mem_) < idec->mem_.part0_size_) {
+        return VP8_STATUS_SUSPENDED;
+    }
+
+    if (!VP8GetHeaders(dec, io)) {
+        const VP8StatusCode status = dec->status_;
+        if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+            // treating NOT_ENOUGH_DATA as SUSPENDED state
+            return VP8_STATUS_SUSPENDED;
+        }
+        return IDecError(idec, status);
+    }
+
+    // Allocate/Verify output buffer now
+    dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options, output);
+    if (dec->status_ != VP8_STATUS_OK) {
+        return IDecError(idec, dec->status_);
+    }
+    // This change must be done before calling VP8InitFrame()
+    dec->mt_method_ = VP8GetThreadMethod(params->options, NULL, io->width, io->height);
+    VP8InitDithering(params->options, dec);
+
+    dec->status_ = CopyParts0Data(idec);
+    if (dec->status_ != VP8_STATUS_OK) {
+        return IDecError(idec, dec->status_);
+    }
+
+    // Finish setting up the decoding parameters. Will call io->setup().
+    if (VP8EnterCritical(dec, io) != VP8_STATUS_OK) {
+        return IDecError(idec, dec->status_);
+    }
+
+    // Note: past this point, teardown() must always be called
+    // in case of error.
+    idec->state_ = STATE_VP8_DATA;
+    // Allocate memory and prepare everything.
+    if (!VP8InitFrame(dec, io)) {
+        return IDecError(idec, dec->status_);
+    }
+    return VP8_STATUS_OK;
+}
+
+// Remaining partitions
+static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
+    VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+    VP8Io* const io = &idec->io_;
+
+    assert(dec->ready_);
+    for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
+        if (idec->last_mb_y_ != dec->mb_y_) {
+            if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+                // note: normally, error shouldn't occur since we already have the whole
+                // partition0 available here in DecodeRemaining(). Reaching EOF while
+                // reading intra modes really means a BITSTREAM_ERROR.
+                return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+            }
+            idec->last_mb_y_ = dec->mb_y_;
+        }
+        for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+            VP8BitReader* const token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+            MBContext context;
+            SaveContext(dec, token_br, &context);
+            if (!VP8DecodeMB(dec, token_br)) {
+                // We shouldn't fail when MAX_MB data was available
+                if (dec->num_parts_ == 1 && MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
+                    return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+                }
+                RestoreContext(&context, dec, token_br);
+                return VP8_STATUS_SUSPENDED;
+            }
+            // Release buffer only if there is only one partition
+            if (dec->num_parts_ == 1) {
+                idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
+                assert(idec->mem_.start_ <= idec->mem_.end_);
+            }
+        }
+        VP8InitScanline(dec); // Prepare for next scanline
+
+        // Reconstruct, filter and emit the row.
+        if (!VP8ProcessRow(dec, io)) {
+            return IDecError(idec, VP8_STATUS_USER_ABORT);
+        }
+    }
+    // Synchronize the thread and check for errors.
+    if (!VP8ExitCritical(dec, io)) {
+        return IDecError(idec, VP8_STATUS_USER_ABORT);
+    }
+    dec->ready_ = 0;
+    return FinishDecoding(idec);
+}
+
+static VP8StatusCode ErrorStatusLossless(WebPIDecoder* const idec, VP8StatusCode status) {
+    if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+        return VP8_STATUS_SUSPENDED;
+    }
+    return IDecError(idec, status);
+}
+
+static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
+    VP8Io* const io = &idec->io_;
+    VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+    const WebPDecParams* const params = &idec->params_;
+    WebPDecBuffer* const output = params->output;
+    size_t curr_size = MemDataSize(&idec->mem_);
+    assert(idec->is_lossless_);
+
+    // Wait until there's enough data for decoding header.
+    if (curr_size < (idec->chunk_size_ >> 3)) {
+        dec->status_ = VP8_STATUS_SUSPENDED;
+        return ErrorStatusLossless(idec, dec->status_);
+    }
+
+    if (!VP8LDecodeHeader(dec, io)) {
+        if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR && curr_size < idec->chunk_size_) {
+            dec->status_ = VP8_STATUS_SUSPENDED;
+        }
+        return ErrorStatusLossless(idec, dec->status_);
+    }
+    // Allocate/verify output buffer now.
+    dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options, output);
+    if (dec->status_ != VP8_STATUS_OK) {
+        return IDecError(idec, dec->status_);
+    }
+
+    idec->state_ = STATE_VP8L_DATA;
+    return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
+    VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+    const size_t curr_size = MemDataSize(&idec->mem_);
+    assert(idec->is_lossless_);
+
+    // Switch to incremental decoding if we don't have all the bytes available.
+    dec->incremental_ = (curr_size < idec->chunk_size_);
+
+    if (!VP8LDecodeImage(dec)) {
+        return ErrorStatusLossless(idec, dec->status_);
+    }
+    assert(dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED);
+    return (dec->status_ == VP8_STATUS_SUSPENDED) ? dec->status_ : FinishDecoding(idec);
+}
+
+// Main decoding loop
+static VP8StatusCode IDecode(WebPIDecoder* idec) {
+    VP8StatusCode status = VP8_STATUS_SUSPENDED;
+
+    if (idec->state_ == STATE_WEBP_HEADER) {
+        status = DecodeWebPHeaders(idec);
+    } else {
+        if (idec->dec_ == NULL) {
+            return VP8_STATUS_SUSPENDED; // can't continue if we have no decoder.
+        }
+    }
+    if (idec->state_ == STATE_VP8_HEADER) {
+        status = DecodeVP8FrameHeader(idec);
+    }
+    if (idec->state_ == STATE_VP8_PARTS0) {
+        status = DecodePartition0(idec);
+    }
+    if (idec->state_ == STATE_VP8_DATA) {
+        status = DecodeRemaining(idec);
+    }
+    if (idec->state_ == STATE_VP8L_HEADER) {
+        status = DecodeVP8LHeader(idec);
+    }
+    if (idec->state_ == STATE_VP8L_DATA) {
+        status = DecodeVP8LData(idec);
+    }
+    return status;
+}
+
+//------------------------------------------------------------------------------
+// Public functions
+
+WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
+    WebPIDecoder* idec = (WebPIDecoder*)WebPSafeCalloc(1ULL, sizeof(*idec));
+    if (idec == NULL) {
+        return NULL;
+    }
+
+    idec->state_ = STATE_WEBP_HEADER;
+    idec->chunk_size_ = 0;
+
+    idec->last_mb_y_ = -1;
+
+    InitMemBuffer(&idec->mem_);
+    WebPInitDecBuffer(&idec->output_);
+    VP8InitIo(&idec->io_);
+
+    WebPResetDecParams(&idec->params_);
+    idec->params_.output = (output_buffer != NULL) ? output_buffer : &idec->output_;
+    WebPInitCustomIo(&idec->params_, &idec->io_); // Plug the I/O functions.
+
+    return idec;
+}
+
+WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size, WebPDecoderConfig* config) {
+    WebPIDecoder* idec;
+
+    // Parse the bitstream's features, if requested:
+    if (data != NULL && data_size > 0 && config != NULL) {
+        if (WebPGetFeatures(data, data_size, &config->input) != VP8_STATUS_OK) {
+            return NULL;
+        }
+    }
+    // Create an instance of the incremental decoder
+    idec = WebPINewDecoder(config ? &config->output : NULL);
+    if (idec == NULL) {
+        return NULL;
+    }
+    // Finish initialization
+    if (config != NULL) {
+        idec->params_.options = &config->options;
+    }
+    return idec;
+}
+
+void WebPIDelete(WebPIDecoder* idec) {
+    if (idec == NULL) return;
+    if (idec->dec_ != NULL) {
+        if (!idec->is_lossless_) {
+            if (idec->state_ == STATE_VP8_DATA) {
+                // Synchronize the thread, clean-up and check for errors.
+                VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
+            }
+            VP8Delete((VP8Decoder*)idec->dec_);
+        } else {
+            VP8LDelete((VP8LDecoder*)idec->dec_);
+        }
+    }
+    ClearMemBuffer(&idec->mem_);
+    WebPFreeDecBuffer(&idec->output_);
+    WebPSafeFree(idec);
+}
+
+//------------------------------------------------------------------------------
+// Wrapper toward WebPINewDecoder
+
+WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer, size_t output_buffer_size, int output_stride) {
+    const int is_external_memory = (output_buffer != NULL);
+    WebPIDecoder* idec;
+
+    if (mode >= MODE_YUV) return NULL;
+    if (!is_external_memory) { // Overwrite parameters to sane values.
+        output_buffer_size = 0;
+        output_stride = 0;
+    } else { // A buffer was passed. Validate the other params.
+        if (output_stride == 0 || output_buffer_size == 0) {
+            return NULL; // invalid parameter.
+        }
+    }
+    idec = WebPINewDecoder(NULL);
+    if (idec == NULL) return NULL;
+    idec->output_.colorspace = mode;
+    idec->output_.is_external_memory = is_external_memory;
+    idec->output_.u.RGBA.rgba = output_buffer;
+    idec->output_.u.RGBA.stride = output_stride;
+    idec->output_.u.RGBA.size = output_buffer_size;
+    return idec;
+}
+
+WebPIDecoder* WebPINewYUVA(uint8_t* luma,
+                           size_t luma_size,
+                           int luma_stride,
+                           uint8_t* u,
+                           size_t u_size,
+                           int u_stride,
+                           uint8_t* v,
+                           size_t v_size,
+                           int v_stride,
+                           uint8_t* a,
+                           size_t a_size,
+                           int a_stride) {
+    const int is_external_memory = (luma != NULL);
+    WebPIDecoder* idec;
+    WEBP_CSP_MODE colorspace;
+
+    if (!is_external_memory) { // Overwrite parameters to sane values.
+        luma_size = u_size = v_size = a_size = 0;
+        luma_stride = u_stride = v_stride = a_stride = 0;
+        u = v = a = NULL;
+        colorspace = MODE_YUVA;
+    } else { // A luma buffer was passed. Validate the other parameters.
+        if (u == NULL || v == NULL) return NULL;
+        if (luma_size == 0 || u_size == 0 || v_size == 0) return NULL;
+        if (luma_stride == 0 || u_stride == 0 || v_stride == 0) return NULL;
+        if (a != NULL) {
+            if (a_size == 0 || a_stride == 0) return NULL;
+        }
+        colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
+    }
+
+    idec = WebPINewDecoder(NULL);
+    if (idec == NULL) return NULL;
+
+    idec->output_.colorspace = colorspace;
+    idec->output_.is_external_memory = is_external_memory;
+    idec->output_.u.YUVA.y = luma;
+    idec->output_.u.YUVA.y_stride = luma_stride;
+    idec->output_.u.YUVA.y_size = luma_size;
+    idec->output_.u.YUVA.u = u;
+    idec->output_.u.YUVA.u_stride = u_stride;
+    idec->output_.u.YUVA.u_size = u_size;
+    idec->output_.u.YUVA.v = v;
+    idec->output_.u.YUVA.v_stride = v_stride;
+    idec->output_.u.YUVA.v_size = v_size;
+    idec->output_.u.YUVA.a = a;
+    idec->output_.u.YUVA.a_stride = a_stride;
+    idec->output_.u.YUVA.a_size = a_size;
+    return idec;
+}
+
+WebPIDecoder* WebPINewYUV(uint8_t* luma,
+                          size_t luma_size,
+                          int luma_stride,
+                          uint8_t* u,
+                          size_t u_size,
+                          int u_stride,
+                          uint8_t* v,
+                          size_t v_size,
+                          int v_stride) {
+    return WebPINewYUVA(luma, luma_size, luma_stride, u, u_size, u_stride, v, v_size, v_stride, NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
+
+static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
+    assert(idec);
+    if (idec->state_ == STATE_ERROR) {
+        return VP8_STATUS_BITSTREAM_ERROR;
+    }
+    if (idec->state_ == STATE_DONE) {
+        return VP8_STATUS_OK;
+    }
+    return VP8_STATUS_SUSPENDED;
+}
+
+VP8StatusCode WebPIAppend(WebPIDecoder* idec, const uint8_t* data, size_t data_size) {
+    VP8StatusCode status;
+    if (idec == NULL || data == NULL) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+    status = IDecCheckStatus(idec);
+    if (status != VP8_STATUS_SUSPENDED) {
+        return status;
+    }
+    // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
+    if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_APPEND)) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+    // Append data to memory buffer
+    if (!AppendToMemBuffer(idec, data, data_size)) {
+        return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    return IDecode(idec);
+}
+
+VP8StatusCode WebPIUpdate(WebPIDecoder* idec, const uint8_t* data, size_t data_size) {
+    VP8StatusCode status;
+    if (idec == NULL || data == NULL) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+    status = IDecCheckStatus(idec);
+    if (status != VP8_STATUS_SUSPENDED) {
+        return status;
+    }
+    // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
+    if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_MAP)) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+    // Make the memory buffer point to the new buffer
+    if (!RemapMemBuffer(idec, data, data_size)) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+    return IDecode(idec);
+}
+
+//------------------------------------------------------------------------------
+
+static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
+    if (idec == NULL || idec->dec_ == NULL) {
+        return NULL;
+    }
+    if (idec->state_ <= STATE_VP8_PARTS0) {
+        return NULL;
+    }
+    return idec->params_.output;
+}
+
+const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec, int* left, int* top, int* width, int* height) {
+    const WebPDecBuffer* const src = GetOutputBuffer(idec);
+    if (left != NULL) *left = 0;
+    if (top != NULL) *top = 0;
+    if (src) {
+        if (width != NULL) *width = src->width;
+        if (height != NULL) *height = idec->params_.last_y;
+    } else {
+        if (width != NULL) *width = 0;
+        if (height != NULL) *height = 0;
+    }
+    return src;
+}
+
+uint8_t* WebPIDecGetRGB(const WebPIDecoder* idec, int* last_y, int* width, int* height, int* stride) {
+    const WebPDecBuffer* const src = GetOutputBuffer(idec);
+    if (src == NULL) return NULL;
+    if (src->colorspace >= MODE_YUV) {
+        return NULL;
+    }
+
+    if (last_y != NULL) *last_y = idec->params_.last_y;
+    if (width != NULL) *width = src->width;
+    if (height != NULL) *height = src->height;
+    if (stride != NULL) *stride = src->u.RGBA.stride;
+
+    return src->u.RGBA.rgba;
+}
+
+uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec,
+                         int* last_y,
+                         uint8_t** u,
+                         uint8_t** v,
+                         uint8_t** a,
+                         int* width,
+                         int* height,
+                         int* stride,
+                         int* uv_stride,
+                         int* a_stride) {
+    const WebPDecBuffer* const src = GetOutputBuffer(idec);
+    if (src == NULL) return NULL;
+    if (src->colorspace < MODE_YUV) {
+        return NULL;
+    }
+
+    if (last_y != NULL) *last_y = idec->params_.last_y;
+    if (u != NULL) *u = src->u.YUVA.u;
+    if (v != NULL) *v = src->u.YUVA.v;
+    if (a != NULL) *a = src->u.YUVA.a;
+    if (width != NULL) *width = src->width;
+    if (height != NULL) *height = src->height;
+    if (stride != NULL) *stride = src->u.YUVA.y_stride;
+    if (uv_stride != NULL) *uv_stride = src->u.YUVA.u_stride;
+    if (a_stride != NULL) *a_stride = src->u.YUVA.a_stride;
+
+    return src->u.YUVA.y;
+}
+
+int WebPISetIOHooks(
+    WebPIDecoder* const idec, VP8IoPutHook put, VP8IoSetupHook setup, VP8IoTeardownHook teardown, void* user_data) {
+    if (idec == NULL || idec->state_ > STATE_WEBP_HEADER) {
+        return 0;
+    }
+
+    idec->io_.put = put;
+    idec->io_.setup = setup;
+    idec->io_.teardown = teardown;
+    idec->io_.opaque = user_data;
+
+    return 1;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dec/io.c b/codec/L2/demos/webpEnc/host/src/dec/io.c
new file mode 100644
index 0000000000..8c0de67784
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/io.c
@@ -0,0 +1,571 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// functions for sample output.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "../dec/vp8i.h"
+#include "./webpi.h"
+#include "../dsp/dsp.h"
+#include "../dsp/yuv.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// Main YUV<->RGB conversion functions
+
+static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
+    WebPDecBuffer* output = p->output;
+    const WebPYUVABuffer* const buf = &output->u.YUVA;
+    uint8_t* const y_dst = buf->y + io->mb_y * buf->y_stride;
+    uint8_t* const u_dst = buf->u + (io->mb_y >> 1) * buf->u_stride;
+    uint8_t* const v_dst = buf->v + (io->mb_y >> 1) * buf->v_stride;
+    const int mb_w = io->mb_w;
+    const int mb_h = io->mb_h;
+    const int uv_w = (mb_w + 1) / 2;
+    const int uv_h = (mb_h + 1) / 2;
+    int j;
+    for (j = 0; j < mb_h; ++j) {
+        memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w);
+    }
+    for (j = 0; j < uv_h; ++j) {
+        memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w);
+        memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w);
+    }
+    return io->mb_h;
+}
+
+// Point-sampling U/V sampler.
+static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
+    WebPDecBuffer* const output = p->output;
+    WebPRGBABuffer* const buf = &output->u.RGBA;
+    uint8_t* const dst = buf->rgba + io->mb_y * buf->stride;
+    WebPSamplerProcessPlane(io->y, io->y_stride, io->u, io->v, io->uv_stride, dst, buf->stride, io->mb_w, io->mb_h,
+                            WebPSamplers[output->colorspace]);
+    return io->mb_h;
+}
+
+//------------------------------------------------------------------------------
+// Fancy upsampling
+
+#ifdef FANCY_UPSAMPLING
+static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
+    int num_lines_out = io->mb_h; // a priori guess
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
+    WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
+    const uint8_t* cur_y = io->y;
+    const uint8_t* cur_u = io->u;
+    const uint8_t* cur_v = io->v;
+    const uint8_t* top_u = p->tmp_u;
+    const uint8_t* top_v = p->tmp_v;
+    int y = io->mb_y;
+    const int y_end = io->mb_y + io->mb_h;
+    const int mb_w = io->mb_w;
+    const int uv_w = (mb_w + 1) / 2;
+
+    if (y == 0) {
+        // First line is special cased. We mirror the u/v samples at boundary.
+        upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w);
+    } else {
+        // We can finish the left-over line from previous call.
+        upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v, dst - buf->stride, dst, mb_w);
+        ++num_lines_out;
+    }
+    // Loop over each output pairs of row.
+    for (; y + 2 < y_end; y += 2) {
+        top_u = cur_u;
+        top_v = cur_v;
+        cur_u += io->uv_stride;
+        cur_v += io->uv_stride;
+        dst += 2 * buf->stride;
+        cur_y += 2 * io->y_stride;
+        upsample(cur_y - io->y_stride, cur_y, top_u, top_v, cur_u, cur_v, dst - buf->stride, dst, mb_w);
+    }
+    // move to last row
+    cur_y += io->y_stride;
+    if (io->crop_top + y_end < io->crop_bottom) {
+        // Save the unfinished samples for next call (as we're not done yet).
+        memcpy(p->tmp_y, cur_y, mb_w * sizeof(*p->tmp_y));
+        memcpy(p->tmp_u, cur_u, uv_w * sizeof(*p->tmp_u));
+        memcpy(p->tmp_v, cur_v, uv_w * sizeof(*p->tmp_v));
+        // The fancy upsampler leaves a row unfinished behind
+        // (except for the very last row)
+        num_lines_out--;
+    } else {
+        // Process the very last row of even-sized picture
+        if (!(y_end & 1)) {
+            upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst + buf->stride, NULL, mb_w);
+        }
+    }
+    return num_lines_out;
+}
+
+#endif /* FANCY_UPSAMPLING */
+
+//------------------------------------------------------------------------------
+
+static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p, int expected_num_lines_out) {
+    const uint8_t* alpha = io->a;
+    const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+    const int mb_w = io->mb_w;
+    const int mb_h = io->mb_h;
+    uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
+    int j;
+    (void)expected_num_lines_out;
+    assert(expected_num_lines_out == mb_h);
+    if (alpha != NULL) {
+        for (j = 0; j < mb_h; ++j) {
+            memcpy(dst, alpha, mb_w * sizeof(*dst));
+            alpha += io->width;
+            dst += buf->a_stride;
+        }
+    } else if (buf->a != NULL) {
+        // the user requested alpha, but there is none, set it to opaque.
+        for (j = 0; j < mb_h; ++j) {
+            memset(dst, 0xff, mb_w * sizeof(*dst));
+            dst += buf->a_stride;
+        }
+    }
+    return 0;
+}
+
+static int GetAlphaSourceRow(const VP8Io* const io, const uint8_t** alpha, int* const num_rows) {
+    int start_y = io->mb_y;
+    *num_rows = io->mb_h;
+
+    // Compensate for the 1-line delay of the fancy upscaler.
+    // This is similar to EmitFancyRGB().
+    if (io->fancy_upsampling) {
+        if (start_y == 0) {
+            // We don't process the last row yet. It'll be done during the next call.
+            --*num_rows;
+        } else {
+            --start_y;
+            // Fortunately, *alpha data is persistent, so we can go back
+            // one row and finish alpha blending, now that the fancy upscaler
+            // completed the YUV->RGB interpolation.
+            *alpha -= io->width;
+        }
+        if (io->crop_top + io->mb_y + io->mb_h == io->crop_bottom) {
+            // If it's the very last call, we process all the remaining rows!
+            *num_rows = io->crop_bottom - io->crop_top - start_y;
+        }
+    }
+    return start_y;
+}
+
+static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p, int expected_num_lines_out) {
+    const uint8_t* alpha = io->a;
+    if (alpha != NULL) {
+        const int mb_w = io->mb_w;
+        const WEBP_CSP_MODE colorspace = p->output->colorspace;
+        const int alpha_first = (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+        const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+        int num_rows;
+        const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+        uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+        uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
+        const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w, num_rows, dst, buf->stride);
+        (void)expected_num_lines_out;
+        assert(expected_num_lines_out == num_rows);
+        // has_alpha is true if there's non-trivial alpha to premultiply with.
+        if (has_alpha && WebPIsPremultipliedMode(colorspace)) {
+            WebPApplyAlphaMultiply(base_rgba, alpha_first, mb_w, num_rows, buf->stride);
+        }
+    }
+    return 0;
+}
+
+static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p, int expected_num_lines_out) {
+    const uint8_t* alpha = io->a;
+    if (alpha != NULL) {
+        const int mb_w = io->mb_w;
+        const WEBP_CSP_MODE colorspace = p->output->colorspace;
+        const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+        int num_rows;
+        const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+        uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+#ifdef WEBP_SWAP_16BIT_CSP
+        uint8_t* alpha_dst = base_rgba;
+#else
+        uint8_t* alpha_dst = base_rgba + 1;
+#endif
+        uint32_t alpha_mask = 0x0f;
+        int i, j;
+        for (j = 0; j < num_rows; ++j) {
+            for (i = 0; i < mb_w; ++i) {
+                // Fill in the alpha value (converted to 4 bits).
+                const uint32_t alpha_value = alpha[i] >> 4;
+                alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+                alpha_mask &= alpha_value;
+            }
+            alpha += io->width;
+            alpha_dst += buf->stride;
+        }
+        (void)expected_num_lines_out;
+        assert(expected_num_lines_out == num_rows);
+        if (alpha_mask != 0x0f && WebPIsPremultipliedMode(colorspace)) {
+            WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
+        }
+    }
+    return 0;
+}
+
+//------------------------------------------------------------------------------
+// YUV rescaling (no final RGB conversion needed)
+
+static int Rescale(const uint8_t* src, int src_stride, int new_lines, WebPRescaler* const wrk) {
+    int num_lines_out = 0;
+    while (new_lines > 0) { // import new contributions of source rows.
+        const int lines_in = WebPRescalerImport(wrk, new_lines, src, src_stride);
+        src += lines_in * src_stride;
+        new_lines -= lines_in;
+        num_lines_out += WebPRescalerExport(wrk); // emit output row(s)
+    }
+    return num_lines_out;
+}
+
+static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
+    const int mb_h = io->mb_h;
+    const int uv_mb_h = (mb_h + 1) >> 1;
+    WebPRescaler* const scaler = &p->scaler_y;
+    int num_lines_out = 0;
+    if (WebPIsAlphaMode(p->output->colorspace) && io->a != NULL) {
+        // Before rescaling, we premultiply the luma directly into the io->y
+        // internal buffer. This is OK since these samples are not used for
+        // intra-prediction (the top samples are saved in cache_y_/u_/v_).
+        // But we need to cast the const away, though.
+        WebPMultRows((uint8_t*)io->y, io->y_stride, io->a, io->width, io->mb_w, mb_h, 0);
+    }
+    num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler);
+    Rescale(io->u, io->uv_stride, uv_mb_h, &p->scaler_u);
+    Rescale(io->v, io->uv_stride, uv_mb_h, &p->scaler_v);
+    return num_lines_out;
+}
+
+static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p, int expected_num_lines_out) {
+    if (io->a != NULL) {
+        const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+        uint8_t* dst_y = buf->y + p->last_y * buf->y_stride;
+        const uint8_t* src_a = buf->a + p->last_y * buf->a_stride;
+        const int num_lines_out = Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
+        (void)expected_num_lines_out;
+        assert(expected_num_lines_out == num_lines_out);
+        if (num_lines_out > 0) { // unmultiply the Y
+            WebPMultRows(dst_y, buf->y_stride, src_a, buf->a_stride, p->scaler_a.dst_width, num_lines_out, 1);
+        }
+    }
+    return 0;
+}
+
+static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
+    const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
+    const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+    const int out_width = io->scaled_width;
+    const int out_height = io->scaled_height;
+    const int uv_out_width = (out_width + 1) >> 1;
+    const int uv_out_height = (out_height + 1) >> 1;
+    const int uv_in_width = (io->mb_w + 1) >> 1;
+    const int uv_in_height = (io->mb_h + 1) >> 1;
+    const size_t work_size = 2 * out_width;       // scratch memory for luma rescaler
+    const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones
+    size_t tmp_size;
+    rescaler_t* work;
+
+    tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
+    if (has_alpha) {
+        tmp_size += work_size * sizeof(*work);
+    }
+    p->memory = WebPSafeMalloc(1ULL, tmp_size);
+    if (p->memory == NULL) {
+        return 0; // memory error
+    }
+    work = (rescaler_t*)p->memory;
+    WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h, buf->y, out_width, out_height, buf->y_stride, 1, work);
+    WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height, buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
+                     work + work_size);
+    WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height, buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
+                     work + work_size + uv_work_size);
+    p->emit = EmitRescaledYUV;
+
+    if (has_alpha) {
+        WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h, buf->a, out_width, out_height, buf->a_stride, 1,
+                         work + work_size + 2 * uv_work_size);
+        p->emit_alpha = EmitRescaledAlphaYUV;
+        WebPInitAlphaProcessing();
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// RGBA rescaling
+
+static int ExportRGB(WebPDecParams* const p, int y_pos) {
+    const WebPYUV444Converter convert = WebPYUV444Converters[p->output->colorspace];
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    uint8_t* dst = buf->rgba + y_pos * buf->stride;
+    int num_lines_out = 0;
+    // For RGB rescaling, because of the YUV420, current scan position
+    // U/V can be +1/-1 line from the Y one.  Hence the double test.
+    while (WebPRescalerHasPendingOutput(&p->scaler_y) && WebPRescalerHasPendingOutput(&p->scaler_u)) {
+        assert(y_pos + num_lines_out < p->output->height);
+        assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
+        WebPRescalerExportRow(&p->scaler_y);
+        WebPRescalerExportRow(&p->scaler_u);
+        WebPRescalerExportRow(&p->scaler_v);
+        convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst, dst, p->scaler_y.dst_width);
+        dst += buf->stride;
+        ++num_lines_out;
+    }
+    return num_lines_out;
+}
+
+static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
+    const int mb_h = io->mb_h;
+    const int uv_mb_h = (mb_h + 1) >> 1;
+    int j = 0, uv_j = 0;
+    int num_lines_out = 0;
+    while (j < mb_h) {
+        const int y_lines_in = WebPRescalerImport(&p->scaler_y, mb_h - j, io->y + j * io->y_stride, io->y_stride);
+        j += y_lines_in;
+        if (WebPRescaleNeededLines(&p->scaler_u, uv_mb_h - uv_j)) {
+            const int u_lines_in =
+                WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j, io->u + uv_j * io->uv_stride, io->uv_stride);
+            const int v_lines_in =
+                WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j, io->v + uv_j * io->uv_stride, io->uv_stride);
+            (void)v_lines_in; // remove a gcc warning
+            assert(u_lines_in == v_lines_in);
+            uv_j += u_lines_in;
+        }
+        num_lines_out += ExportRGB(p, p->last_y + num_lines_out);
+    }
+    return num_lines_out;
+}
+
+static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
+    const int alpha_first = (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+    uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
+    int num_lines_out = 0;
+    const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+    uint32_t non_opaque = 0;
+    const int width = p->scaler_a.dst_width;
+
+    while (WebPRescalerHasPendingOutput(&p->scaler_a) && num_lines_out < max_lines_out) {
+        assert(y_pos + num_lines_out < p->output->height);
+        WebPRescalerExportRow(&p->scaler_a);
+        non_opaque |= WebPDispatchAlpha(p->scaler_a.dst, 0, width, 1, dst, 0);
+        dst += buf->stride;
+        ++num_lines_out;
+    }
+    if (is_premult_alpha && non_opaque) {
+        WebPApplyAlphaMultiply(base_rgba, alpha_first, width, num_lines_out, buf->stride);
+    }
+    return num_lines_out;
+}
+
+static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos, int max_lines_out) {
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
+#ifdef WEBP_SWAP_16BIT_CSP
+    uint8_t* alpha_dst = base_rgba;
+#else
+    uint8_t* alpha_dst = base_rgba + 1;
+#endif
+    int num_lines_out = 0;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
+    const int width = p->scaler_a.dst_width;
+    const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+    uint32_t alpha_mask = 0x0f;
+
+    while (WebPRescalerHasPendingOutput(&p->scaler_a) && num_lines_out < max_lines_out) {
+        int i;
+        assert(y_pos + num_lines_out < p->output->height);
+        WebPRescalerExportRow(&p->scaler_a);
+        for (i = 0; i < width; ++i) {
+            // Fill in the alpha value (converted to 4 bits).
+            const uint32_t alpha_value = p->scaler_a.dst[i] >> 4;
+            alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+            alpha_mask &= alpha_value;
+        }
+        alpha_dst += buf->stride;
+        ++num_lines_out;
+    }
+    if (is_premult_alpha && alpha_mask != 0x0f) {
+        WebPApplyAlphaMultiply4444(base_rgba, width, num_lines_out, buf->stride);
+    }
+    return num_lines_out;
+}
+
+static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p, int expected_num_out_lines) {
+    if (io->a != NULL) {
+        WebPRescaler* const scaler = &p->scaler_a;
+        int lines_left = expected_num_out_lines;
+        const int y_end = p->last_y + lines_left;
+        while (lines_left > 0) {
+            const int row_offset = scaler->src_y - io->mb_y;
+            WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y, io->a + row_offset * io->width, io->width);
+            lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left);
+        }
+    }
+    return 0;
+}
+
+static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
+    const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
+    const int out_width = io->scaled_width;
+    const int out_height = io->scaled_height;
+    const int uv_in_width = (io->mb_w + 1) >> 1;
+    const int uv_in_height = (io->mb_h + 1) >> 1;
+    const size_t work_size = 2 * out_width; // scratch memory for one rescaler
+    rescaler_t* work;                       // rescalers work area
+    uint8_t* tmp;                           // tmp storage for scaled YUV444 samples before RGB conversion
+    size_t tmp_size1, tmp_size2, total_size;
+
+    tmp_size1 = 3 * work_size;
+    tmp_size2 = 3 * out_width;
+    if (has_alpha) {
+        tmp_size1 += work_size;
+        tmp_size2 += out_width;
+    }
+    total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
+    p->memory = WebPSafeMalloc(1ULL, total_size);
+    if (p->memory == NULL) {
+        return 0; // memory error
+    }
+    work = (rescaler_t*)p->memory;
+    tmp = (uint8_t*)(work + tmp_size1);
+    WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h, tmp + 0 * out_width, out_width, out_height, 0, 1,
+                     work + 0 * work_size);
+    WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height, tmp + 1 * out_width, out_width, out_height, 0, 1,
+                     work + 1 * work_size);
+    WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height, tmp + 2 * out_width, out_width, out_height, 0, 1,
+                     work + 2 * work_size);
+    p->emit = EmitRescaledRGB;
+    WebPInitYUV444Converters();
+
+    if (has_alpha) {
+        WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h, tmp + 3 * out_width, out_width, out_height, 0, 1,
+                         work + 3 * work_size);
+        p->emit_alpha = EmitRescaledAlphaRGB;
+        if (p->output->colorspace == MODE_RGBA_4444 || p->output->colorspace == MODE_rgbA_4444) {
+            p->emit_alpha_row = ExportAlphaRGBA4444;
+        } else {
+            p->emit_alpha_row = ExportAlpha;
+        }
+        WebPInitAlphaProcessing();
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Default custom functions
+
+static int CustomSetup(VP8Io* io) {
+    WebPDecParams* const p = (WebPDecParams*)io->opaque;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
+    const int is_rgb = WebPIsRGBMode(colorspace);
+    const int is_alpha = WebPIsAlphaMode(colorspace);
+
+    p->memory = NULL;
+    p->emit = NULL;
+    p->emit_alpha = NULL;
+    p->emit_alpha_row = NULL;
+    if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) {
+        return 0;
+    }
+    if (is_alpha && WebPIsPremultipliedMode(colorspace)) {
+        WebPInitUpsamplers();
+    }
+    if (io->use_scaling) {
+        const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
+        if (!ok) {
+            return 0; // memory error
+        }
+    } else {
+        if (is_rgb) {
+            WebPInitSamplers();
+            p->emit = EmitSampledRGB; // default
+            if (io->fancy_upsampling) {
+#ifdef FANCY_UPSAMPLING
+                const int uv_width = (io->mb_w + 1) >> 1;
+                p->memory = WebPSafeMalloc(1ULL, (size_t)(io->mb_w + 2 * uv_width));
+                if (p->memory == NULL) {
+                    return 0; // memory error.
+                }
+                p->tmp_y = (uint8_t*)p->memory;
+                p->tmp_u = p->tmp_y + io->mb_w;
+                p->tmp_v = p->tmp_u + uv_width;
+                p->emit = EmitFancyRGB;
+                WebPInitUpsamplers();
+#endif
+            }
+        } else {
+            p->emit = EmitYUV;
+        }
+        if (is_alpha) { // need transparency output
+            p->emit_alpha = (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444)
+                                ? EmitAlphaRGBA4444
+                                : is_rgb ? EmitAlphaRGB : EmitAlphaYUV;
+            if (is_rgb) {
+                WebPInitAlphaProcessing();
+            }
+        }
+    }
+
+    if (is_rgb) {
+        VP8YUVInit();
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static int CustomPut(const VP8Io* io) {
+    WebPDecParams* const p = (WebPDecParams*)io->opaque;
+    const int mb_w = io->mb_w;
+    const int mb_h = io->mb_h;
+    int num_lines_out;
+    assert(!(io->mb_y & 1));
+
+    if (mb_w <= 0 || mb_h <= 0) {
+        return 0;
+    }
+    num_lines_out = p->emit(io, p);
+    if (p->emit_alpha != NULL) {
+        p->emit_alpha(io, p, num_lines_out);
+    }
+    p->last_y += num_lines_out;
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static void CustomTeardown(const VP8Io* io) {
+    WebPDecParams* const p = (WebPDecParams*)io->opaque;
+    WebPSafeFree(p->memory);
+    p->memory = NULL;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point
+
+void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
+    io->put = CustomPut;
+    io->setup = CustomSetup;
+    io->teardown = CustomTeardown;
+    io->opaque = params;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dec/quant.c b/codec/L2/demos/webpEnc/host/src/dec/quant.c
new file mode 100644
index 0000000000..3d94832404
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/quant.c
@@ -0,0 +1,87 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Quantizer initialization
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8i.h"
+
+static WEBP_INLINE int clip(int v, int M) {
+    return v < 0 ? 0 : v > M ? M : v;
+}
+
+// Paragraph 14.1
+static const uint8_t kDcTable[128] = {
+    4,   5,   6,   7,   8,   9,   10,  10,  11,  12,  13,  14,  15,  16,  17,  17,  18,  19,  20,  20,  21,  21,
+    22,  22,  23,  23,  24,  25,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,
+    40,  41,  42,  43,  44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,
+    61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  76,  77,  78,  79,  80,  81,
+    82,  83,  84,  85,  86,  87,  88,  89,  91,  93,  95,  96,  98,  100, 101, 102, 104, 106, 108, 110, 112, 114,
+    116, 118, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157};
+
+static const uint16_t kAcTable[128] = {
+    4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
+    26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
+    48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,  78,  80,
+    82,  84,  86,  88,  90,  92,  94,  96,  98,  100, 102, 104, 106, 108, 110, 112, 114, 116, 119, 122, 125, 128,
+    131, 134, 137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201,
+    205, 209, 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284};
+
+//------------------------------------------------------------------------------
+// Paragraph 9.6
+
+void VP8ParseQuant(VP8Decoder* const dec) {
+    VP8BitReader* const br = &dec->br_;
+    const int base_q0 = VP8GetValue(br, 7);
+    const int dqy1_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+    const int dqy2_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+    const int dqy2_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+    const int dquv_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+    const int dquv_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+
+    const VP8SegmentHeader* const hdr = &dec->segment_hdr_;
+    int i;
+
+    for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+        int q;
+        if (hdr->use_segment_) {
+            q = hdr->quantizer_[i];
+            if (!hdr->absolute_delta_) {
+                q += base_q0;
+            }
+        } else {
+            if (i > 0) {
+                dec->dqm_[i] = dec->dqm_[0];
+                continue;
+            } else {
+                q = base_q0;
+            }
+        }
+        {
+            VP8QuantMatrix* const m = &dec->dqm_[i];
+            m->y1_mat_[0] = kDcTable[clip(q + dqy1_dc, 127)];
+            m->y1_mat_[1] = kAcTable[clip(q + 0, 127)];
+
+            m->y2_mat_[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
+            // For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
+            // The smallest precision for that is '(x*6349) >> 12' but 16 is a good
+            // word size.
+            m->y2_mat_[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
+            if (m->y2_mat_[1] < 8) m->y2_mat_[1] = 8;
+
+            m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
+            m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
+
+            m->uv_quant_ = q + dquv_ac; // for dithering strength evaluation
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dec/tree.c b/codec/L2/demos/webpEnc/host/src/dec/tree.c
new file mode 100644
index 0000000000..9fca2dd271
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/tree.c
@@ -0,0 +1,452 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Coding trees and probas
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8i.h"
+#include "../utils/bit_reader_inl.h"
+#include "../utils/profiling.h"
+
+#define USE_GENERIC_TREE
+
+#ifdef USE_GENERIC_TREE
+static const int8_t kYModesIntra4[18] = {-B_DC_PRED, 1, -B_TM_PRED, 2, -B_VE_PRED, 3,
+                                         4,          6, -B_HE_PRED, 5, -B_RD_PRED, -B_VR_PRED,
+                                         -B_LD_PRED, 7, -B_VL_PRED, 8, -B_HD_PRED, -B_HU_PRED};
+#endif
+
+//------------------------------------------------------------------------------
+// Default probabilities
+
+// Paragraph 13.5
+static const uint8_t CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+    {{{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128},
+      {189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128},
+      {106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128}},
+     {
+         {1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128},
+         {181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128},
+         {78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128},
+     },
+     {
+         {1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128},
+         {184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128},
+         {77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128},
+     },
+     {{1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128},
+      {170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128},
+      {37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128}},
+     {{1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128},
+      {207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128},
+      {102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128}},
+     {{1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128},
+      {177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128},
+      {80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128}},
+     {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
+    {{{198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62},
+      {131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1},
+      {68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128}},
+     {{1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128},
+      {184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128},
+      {81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128}},
+     {{1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128},
+      {99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128},
+      {23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128}},
+     {{1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128},
+      {109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128},
+      {44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128}},
+     {{1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128},
+      {94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128},
+      {22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128}},
+     {{1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128},
+      {124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128},
+      {35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128}},
+     {{1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128},
+      {121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128},
+      {45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128}},
+     {{1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128},
+      {203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+      {137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128}}},
+    {{{253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128},
+      {175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128},
+      {73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128}},
+     {{1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128},
+      {239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128},
+      {155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128}},
+     {{1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128},
+      {201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128},
+      {69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128}},
+     {{1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128},
+      {223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128},
+      {141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128}},
+     {{1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+      {190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128},
+      {149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128},
+      {213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128},
+      {55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
+    {{{202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255},
+      {126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128},
+      {61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128}},
+     {{1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128},
+      {166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128},
+      {39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128}},
+     {{1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128},
+      {124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128},
+      {24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128}},
+     {{1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128},
+      {149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128},
+      {28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128}},
+     {{1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128},
+      {123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128},
+      {20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128}},
+     {{1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128},
+      {168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128},
+      {47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128}},
+     {{1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128},
+      {141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128},
+      {42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128}},
+     {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}}}};
+
+// Paragraph 11.5
+static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
+    {{231, 120, 48, 89, 115, 113, 120, 152, 112},
+     {152, 179, 64, 126, 170, 118, 46, 70, 95},
+     {175, 69, 143, 80, 85, 82, 72, 155, 103},
+     {56, 58, 10, 171, 218, 189, 17, 13, 152},
+     {114, 26, 17, 163, 44, 195, 21, 10, 173},
+     {121, 24, 80, 195, 26, 62, 44, 64, 85},
+     {144, 71, 10, 38, 171, 213, 144, 34, 26},
+     {170, 46, 55, 19, 136, 160, 33, 206, 71},
+     {63, 20, 8, 114, 114, 208, 12, 9, 226},
+     {81, 40, 11, 96, 182, 84, 29, 16, 36}},
+    {{134, 183, 89, 137, 98, 101, 106, 165, 148},
+     {72, 187, 100, 130, 157, 111, 32, 75, 80},
+     {66, 102, 167, 99, 74, 62, 40, 234, 128},
+     {41, 53, 9, 178, 241, 141, 26, 8, 107},
+     {74, 43, 26, 146, 73, 166, 49, 23, 157},
+     {65, 38, 105, 160, 51, 52, 31, 115, 128},
+     {104, 79, 12, 27, 217, 255, 87, 17, 7},
+     {87, 68, 71, 44, 114, 51, 15, 186, 23},
+     {47, 41, 14, 110, 182, 183, 21, 17, 194},
+     {66, 45, 25, 102, 197, 189, 23, 18, 22}},
+    {{88, 88, 147, 150, 42, 46, 45, 196, 205},
+     {43, 97, 183, 117, 85, 38, 35, 179, 61},
+     {39, 53, 200, 87, 26, 21, 43, 232, 171},
+     {56, 34, 51, 104, 114, 102, 29, 93, 77},
+     {39, 28, 85, 171, 58, 165, 90, 98, 64},
+     {34, 22, 116, 206, 23, 34, 43, 166, 73},
+     {107, 54, 32, 26, 51, 1, 81, 43, 31},
+     {68, 25, 106, 22, 64, 171, 36, 225, 114},
+     {34, 19, 21, 102, 132, 188, 16, 76, 124},
+     {62, 18, 78, 95, 85, 57, 50, 48, 51}},
+    {{193, 101, 35, 159, 215, 111, 89, 46, 111},
+     {60, 148, 31, 172, 219, 228, 21, 18, 111},
+     {112, 113, 77, 85, 179, 255, 38, 120, 114},
+     {40, 42, 1, 196, 245, 209, 10, 25, 109},
+     {88, 43, 29, 140, 166, 213, 37, 43, 154},
+     {61, 63, 30, 155, 67, 45, 68, 1, 209},
+     {100, 80, 8, 43, 154, 1, 51, 26, 71},
+     {142, 78, 78, 16, 255, 128, 34, 197, 171},
+     {41, 40, 5, 102, 211, 183, 4, 1, 221},
+     {51, 50, 17, 168, 209, 192, 23, 25, 82}},
+    {{138, 31, 36, 171, 27, 166, 38, 44, 229},
+     {67, 87, 58, 169, 82, 115, 26, 59, 179},
+     {63, 59, 90, 180, 59, 166, 93, 73, 154},
+     {40, 40, 21, 116, 143, 209, 34, 39, 175},
+     {47, 15, 16, 183, 34, 223, 49, 45, 183},
+     {46, 17, 33, 183, 6, 98, 15, 32, 183},
+     {57, 46, 22, 24, 128, 1, 54, 17, 37},
+     {65, 32, 73, 115, 28, 128, 23, 128, 205},
+     {40, 3, 9, 115, 51, 192, 18, 6, 223},
+     {87, 37, 9, 115, 59, 77, 64, 21, 47}},
+    {{104, 55, 44, 218, 9, 54, 53, 130, 226},
+     {64, 90, 70, 205, 40, 41, 23, 26, 57},
+     {54, 57, 112, 184, 5, 41, 38, 166, 213},
+     {30, 34, 26, 133, 152, 116, 10, 32, 134},
+     {39, 19, 53, 221, 26, 114, 32, 73, 255},
+     {31, 9, 65, 234, 2, 15, 1, 118, 73},
+     {75, 32, 12, 51, 192, 255, 160, 43, 51},
+     {88, 31, 35, 67, 102, 85, 55, 186, 85},
+     {56, 21, 23, 111, 59, 205, 45, 37, 192},
+     {55, 38, 70, 124, 73, 102, 1, 34, 98}},
+    {{125, 98, 42, 88, 104, 85, 117, 175, 82},
+     {95, 84, 53, 89, 128, 100, 113, 101, 45},
+     {75, 79, 123, 47, 51, 128, 81, 171, 1},
+     {57, 17, 5, 71, 102, 57, 53, 41, 49},
+     {38, 33, 13, 121, 57, 73, 26, 1, 85},
+     {41, 10, 67, 138, 77, 110, 90, 47, 114},
+     {115, 21, 2, 10, 102, 255, 166, 23, 6},
+     {101, 29, 16, 10, 85, 128, 101, 196, 26},
+     {57, 18, 10, 102, 102, 213, 34, 20, 43},
+     {117, 20, 15, 36, 163, 128, 68, 1, 26}},
+    {{102, 61, 71, 37, 34, 53, 31, 243, 192},
+     {69, 60, 71, 38, 73, 119, 28, 222, 37},
+     {68, 45, 128, 34, 1, 47, 11, 245, 171},
+     {62, 17, 19, 70, 146, 85, 55, 62, 70},
+     {37, 43, 37, 154, 100, 163, 85, 160, 1},
+     {63, 9, 92, 136, 28, 64, 32, 201, 85},
+     {75, 15, 9, 9, 64, 255, 184, 119, 16},
+     {86, 6, 28, 5, 64, 255, 25, 248, 1},
+     {56, 8, 17, 132, 137, 255, 55, 116, 128},
+     {58, 15, 20, 82, 135, 57, 26, 121, 40}},
+    {{164, 50, 31, 137, 154, 133, 25, 35, 218},
+     {51, 103, 44, 131, 131, 123, 31, 6, 158},
+     {86, 40, 64, 135, 148, 224, 45, 183, 128},
+     {22, 26, 17, 131, 240, 154, 14, 1, 209},
+     {45, 16, 21, 91, 64, 222, 7, 1, 197},
+     {56, 21, 39, 155, 60, 138, 23, 102, 213},
+     {83, 12, 13, 54, 192, 255, 68, 47, 28},
+     {85, 26, 85, 85, 128, 128, 32, 146, 171},
+     {18, 11, 7, 63, 144, 171, 4, 4, 246},
+     {35, 27, 10, 146, 174, 171, 12, 26, 128}},
+    {{190, 80, 35, 99, 180, 80, 126, 54, 45},
+     {85, 126, 47, 87, 176, 51, 41, 20, 32},
+     {101, 75, 128, 139, 118, 146, 116, 128, 85},
+     {56, 41, 15, 176, 236, 85, 37, 9, 62},
+     {71, 30, 17, 119, 118, 255, 17, 18, 138},
+     {101, 38, 60, 138, 55, 70, 43, 26, 142},
+     {146, 36, 19, 30, 171, 255, 97, 27, 20},
+     {138, 45, 61, 62, 219, 1, 81, 188, 64},
+     {32, 41, 20, 117, 151, 142, 20, 21, 163},
+     {112, 19, 12, 61, 195, 128, 48, 4, 24}}};
+
+void VP8ResetProba(VP8Proba* const proba) {
+    memset(proba->segments_, 255u, sizeof(proba->segments_));
+    // proba->bands_[][] is initialized later
+}
+
+static void ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec, int mb_x) {
+    uint8_t* const top = dec->intra_t_ + 4 * mb_x;
+    uint8_t* const left = dec->intra_l_;
+    VP8MBData* const block = dec->mb_data_ + mb_x;
+
+    // Note: we don't save segment map (yet), as we don't expect
+    // to decode more than 1 keyframe.
+    if (dec->segment_hdr_.update_map_) {
+        // Hardcoded tree parsing
+        block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0]) ? VP8GetBit(br, dec->proba_.segments_[1])
+                                                                   : 2 + VP8GetBit(br, dec->proba_.segments_[2]);
+    } else {
+        block->segment_ = 0; // default for intra
+    }
+    if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_);
+
+    block->is_i4x4_ = !VP8GetBit(br, 145); // decide for B_PRED first
+    if (!block->is_i4x4_) {
+        // Hardcoded 16x16 intra-mode decision tree.
+        const int ymode =
+            VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED) : (VP8GetBit(br, 163) ? V_PRED : DC_PRED);
+        block->imodes_[0] = ymode;
+        memset(top, ymode, 4 * sizeof(*top));
+        memset(left, ymode, 4 * sizeof(*left));
+    } else {
+        uint8_t* modes = block->imodes_;
+        int y;
+        for (y = 0; y < 4; ++y) {
+            int ymode = left[y];
+            int x;
+            for (x = 0; x < 4; ++x) {
+                const uint8_t* const prob = kBModesProba[top[x]][ymode];
+#ifdef USE_GENERIC_TREE
+                // Generic tree-parsing
+                int i = kYModesIntra4[VP8GetBit(br, prob[0])];
+                while (i > 0) {
+                    i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i])];
+                }
+                ymode = -i;
+#else
+                // Hardcoded tree parsing
+                ymode = !VP8GetBit(br, prob[0])
+                            ? B_DC_PRED
+                            : !VP8GetBit(br, prob[1])
+                                  ? B_TM_PRED
+                                  : !VP8GetBit(br, prob[2])
+                                        ? B_VE_PRED
+                                        : !VP8GetBit(br, prob[3])
+                                              ? (!VP8GetBit(br, prob[4])
+                                                     ? B_HE_PRED
+                                                     : (!VP8GetBit(br, prob[5]) ? B_RD_PRED : B_VR_PRED))
+                                              : (!VP8GetBit(br, prob[6])
+                                                     ? B_LD_PRED
+                                                     : (!VP8GetBit(br, prob[7])
+                                                            ? B_VL_PRED
+                                                            : (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
+#endif // USE_GENERIC_TREE
+                top[x] = ymode;
+            }
+            memcpy(modes, top, 4 * sizeof(*top));
+            modes += 4;
+            left[y] = ymode;
+        }
+    }
+    // Hardcoded UVMode decision tree
+    block->uvmode_ =
+        !VP8GetBit(br, 142) ? DC_PRED : !VP8GetBit(br, 114) ? V_PRED : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
+}
+
+int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    int mb_x;
+    for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
+        ParseIntraMode(br, dec, mb_x);
+    }
+
+    StopProfiling(&stop_watch, &timeVP8ParseIntraModeRow, &countVP8ParseIntraModeRow);
+
+    return !dec->br_.eof_;
+}
+
+//------------------------------------------------------------------------------
+// Paragraph 13
+
+static const uint8_t CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+    {{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+      {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+      {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255},
+      {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
+    {{{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255},
+      {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255}},
+     {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
+    {{{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255},
+      {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255},
+      {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255}},
+     {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
+    {{{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255},
+      {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+      {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}}};
+
+// Paragraph 9.9
+
+static const int kBands[16 + 1] = {
+    0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+    0 // extra entry as sentinel
+};
+
+void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
+    VP8Proba* const proba = &dec->proba_;
+    int t, b, c, p;
+    for (t = 0; t < NUM_TYPES; ++t) {
+        for (b = 0; b < NUM_BANDS; ++b) {
+            for (c = 0; c < NUM_CTX; ++c) {
+                for (p = 0; p < NUM_PROBAS; ++p) {
+                    const int v =
+                        VP8GetBit(br, CoeffsUpdateProba[t][b][c][p]) ? VP8GetValue(br, 8) : CoeffsProba0[t][b][c][p];
+                    proba->bands_[t][b].probas_[c][p] = v;
+                }
+            }
+        }
+        for (b = 0; b < 16 + 1; ++b) {
+            proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]];
+        }
+    }
+    dec->use_skip_proba_ = VP8Get(br);
+    if (dec->use_skip_proba_) {
+        dec->skip_p_ = VP8GetValue(br, 8);
+    }
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dec/vp8.c b/codec/L2/demos/webpEnc/host/src/dec/vp8.c
new file mode 100644
index 0000000000..b941f48836
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/vp8.c
@@ -0,0 +1,675 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// main entry for the decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "./alphai.h"
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "./webpi.h"
+#include "../utils/bit_reader_inl.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+
+//------------------------------------------------------------------------------
+
+int WebPGetDecoderVersion(void) {
+    return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
+// VP8Decoder
+
+static void SetOk(VP8Decoder* const dec) {
+    dec->status_ = VP8_STATUS_OK;
+    dec->error_msg_ = "OK";
+}
+
+int VP8InitIoInternal(VP8Io* const io, int version) {
+    if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+        return 0; // mismatch error
+    }
+    if (io != NULL) {
+        memset(io, 0, sizeof(*io));
+    }
+    return 1;
+}
+
+VP8Decoder* VP8New(void) {
+    VP8Decoder* const dec = (VP8Decoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+    if (dec != NULL) {
+        SetOk(dec);
+        WebPGetWorkerInterface()->Init(&dec->worker_);
+        dec->ready_ = 0;
+        dec->num_parts_ = 1;
+    }
+    return dec;
+}
+
+VP8StatusCode VP8Status(VP8Decoder* const dec) {
+    if (!dec) return VP8_STATUS_INVALID_PARAM;
+    return dec->status_;
+}
+
+const char* VP8StatusMessage(VP8Decoder* const dec) {
+    if (dec == NULL) return "no object";
+    if (!dec->error_msg_) return "OK";
+    return dec->error_msg_;
+}
+
+void VP8Delete(VP8Decoder* const dec) {
+    if (dec != NULL) {
+        VP8Clear(dec);
+        WebPSafeFree(dec);
+    }
+}
+
+int VP8SetError(VP8Decoder* const dec, VP8StatusCode error, const char* const msg) {
+    // The oldest error reported takes precedence over the new one.
+    if (dec->status_ == VP8_STATUS_OK) {
+        dec->status_ = error;
+        dec->error_msg_ = msg;
+        dec->ready_ = 0;
+    }
+    return 0;
+}
+
+//------------------------------------------------------------------------------
+
+int VP8CheckSignature(const uint8_t* const data, size_t data_size) {
+    return (data_size >= 3 && data[0] == 0x9d && data[1] == 0x01 && data[2] == 0x2a);
+}
+
+int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size, int* const width, int* const height) {
+    if (data == NULL || data_size < VP8_FRAME_HEADER_SIZE) {
+        return 0; // not enough data
+    }
+    // check signature
+    if (!VP8CheckSignature(data + 3, data_size - 3)) {
+        return 0; // Wrong signature.
+    } else {
+        const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16);
+        const int key_frame = !(bits & 1);
+        const int w = ((data[7] << 8) | data[6]) & 0x3fff;
+        const int h = ((data[9] << 8) | data[8]) & 0x3fff;
+
+        if (!key_frame) { // Not a keyframe.
+            return 0;
+        }
+
+        if (((bits >> 1) & 7) > 3) {
+            return 0; // unknown profile
+        }
+        if (!((bits >> 4) & 1)) {
+            return 0; // first frame is invisible!
+        }
+        if (((bits >> 5)) >= chunk_size) { // partition_length
+            return 0;                      // inconsistent size information.
+        }
+        if (w == 0 || h == 0) {
+            return 0; // We don't support both width and height to be zero.
+        }
+
+        if (width) {
+            *width = w;
+        }
+        if (height) {
+            *height = h;
+        }
+
+        return 1;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Header parsing
+
+static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
+    assert(hdr != NULL);
+    hdr->use_segment_ = 0;
+    hdr->update_map_ = 0;
+    hdr->absolute_delta_ = 1;
+    memset(hdr->quantizer_, 0, sizeof(hdr->quantizer_));
+    memset(hdr->filter_strength_, 0, sizeof(hdr->filter_strength_));
+}
+
+// Paragraph 9.3
+static int ParseSegmentHeader(VP8BitReader* br, VP8SegmentHeader* hdr, VP8Proba* proba) {
+    assert(br != NULL);
+    assert(hdr != NULL);
+    hdr->use_segment_ = VP8Get(br);
+    if (hdr->use_segment_) {
+        hdr->update_map_ = VP8Get(br);
+        if (VP8Get(br)) { // update data
+            int s;
+            hdr->absolute_delta_ = VP8Get(br);
+            for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+                hdr->quantizer_[s] = VP8Get(br) ? VP8GetSignedValue(br, 7) : 0;
+            }
+            for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+                hdr->filter_strength_[s] = VP8Get(br) ? VP8GetSignedValue(br, 6) : 0;
+            }
+        }
+        if (hdr->update_map_) {
+            int s;
+            for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
+                proba->segments_[s] = VP8Get(br) ? VP8GetValue(br, 8) : 255u;
+            }
+        }
+    } else {
+        hdr->update_map_ = 0;
+    }
+    return !br->eof_;
+}
+
+// Paragraph 9.5
+// This function returns VP8_STATUS_SUSPENDED if we don't have all the
+// necessary data in 'buf'.
+// This case is not necessarily an error (for incremental decoding).
+// Still, no bitreader is ever initialized to make it possible to read
+// unavailable memory.
+// If we don't even have the partitions' sizes, than VP8_STATUS_NOT_ENOUGH_DATA
+// is returned, and this is an unrecoverable error.
+// If the partitions were positioned ok, VP8_STATUS_OK is returned.
+static VP8StatusCode ParsePartitions(VP8Decoder* const dec, const uint8_t* buf, size_t size) {
+    VP8BitReader* const br = &dec->br_;
+    const uint8_t* sz = buf;
+    const uint8_t* buf_end = buf + size;
+    const uint8_t* part_start;
+    size_t size_left = size;
+    size_t last_part;
+    size_t p;
+
+    dec->num_parts_ = 1 << VP8GetValue(br, 2);
+    last_part = dec->num_parts_ - 1;
+    if (size < 3 * last_part) {
+        // we can't even read the sizes with sz[]! That's a failure.
+        return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+    part_start = buf + last_part * 3;
+    size_left -= last_part * 3;
+    for (p = 0; p < last_part; ++p) {
+        size_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
+        if (psize > size_left) psize = size_left;
+        VP8InitBitReader(dec->parts_ + p, part_start, psize);
+        part_start += psize;
+        size_left -= psize;
+        sz += 3;
+    }
+    VP8InitBitReader(dec->parts_ + last_part, part_start, size_left);
+    return (part_start < buf_end) ? VP8_STATUS_OK : VP8_STATUS_SUSPENDED; // Init is ok, but there's not enough data
+}
+
+// Paragraph 9.4
+static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
+    VP8FilterHeader* const hdr = &dec->filter_hdr_;
+    hdr->simple_ = VP8Get(br);
+    hdr->level_ = VP8GetValue(br, 6);
+    hdr->sharpness_ = VP8GetValue(br, 3);
+    hdr->use_lf_delta_ = VP8Get(br);
+    if (hdr->use_lf_delta_) {
+        if (VP8Get(br)) { // update lf-delta?
+            int i;
+            for (i = 0; i < NUM_REF_LF_DELTAS; ++i) {
+                if (VP8Get(br)) {
+                    hdr->ref_lf_delta_[i] = VP8GetSignedValue(br, 6);
+                }
+            }
+            for (i = 0; i < NUM_MODE_LF_DELTAS; ++i) {
+                if (VP8Get(br)) {
+                    hdr->mode_lf_delta_[i] = VP8GetSignedValue(br, 6);
+                }
+            }
+        }
+    }
+    dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
+    return !br->eof_;
+}
+
+// Topmost call
+int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
+    const uint8_t* buf;
+    size_t buf_size;
+    VP8FrameHeader* frm_hdr;
+    VP8PictureHeader* pic_hdr;
+    VP8BitReader* br;
+    VP8StatusCode status;
+
+    if (dec == NULL) {
+        return 0;
+    }
+    SetOk(dec);
+    if (io == NULL) {
+        return VP8SetError(dec, VP8_STATUS_INVALID_PARAM, "null VP8Io passed to VP8GetHeaders()");
+    }
+    buf = io->data;
+    buf_size = io->data_size;
+    if (buf_size < 4) {
+        return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "Truncated header.");
+    }
+
+    // Paragraph 9.1
+    {
+        const uint32_t bits = buf[0] | (buf[1] << 8) | (buf[2] << 16);
+        frm_hdr = &dec->frm_hdr_;
+        frm_hdr->key_frame_ = !(bits & 1);
+        frm_hdr->profile_ = (bits >> 1) & 7;
+        frm_hdr->show_ = (bits >> 4) & 1;
+        frm_hdr->partition_length_ = (bits >> 5);
+        if (frm_hdr->profile_ > 3)
+            return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, "Incorrect keyframe parameters.");
+        if (!frm_hdr->show_) return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE, "Frame not displayable.");
+        buf += 3;
+        buf_size -= 3;
+    }
+
+    pic_hdr = &dec->pic_hdr_;
+    if (frm_hdr->key_frame_) {
+        // Paragraph 9.2
+        if (buf_size < 7) {
+            return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "cannot parse picture header");
+        }
+        if (!VP8CheckSignature(buf, buf_size)) {
+            return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, "Bad code word");
+        }
+        pic_hdr->width_ = ((buf[4] << 8) | buf[3]) & 0x3fff;
+        pic_hdr->xscale_ = buf[4] >> 6; // ratio: 1, 5/4 5/3 or 2
+        pic_hdr->height_ = ((buf[6] << 8) | buf[5]) & 0x3fff;
+        pic_hdr->yscale_ = buf[6] >> 6;
+        buf += 7;
+        buf_size -= 7;
+
+        dec->mb_w_ = (pic_hdr->width_ + 15) >> 4;
+        dec->mb_h_ = (pic_hdr->height_ + 15) >> 4;
+        // Setup default output area (can be later modified during io->setup())
+        io->width = pic_hdr->width_;
+        io->height = pic_hdr->height_;
+        io->use_scaling = 0;
+        io->use_cropping = 0;
+        io->crop_top = 0;
+        io->crop_left = 0;
+        io->crop_right = io->width;
+        io->crop_bottom = io->height;
+        io->mb_w = io->width;  // sanity check
+        io->mb_h = io->height; // ditto
+
+        VP8ResetProba(&dec->proba_);
+        ResetSegmentHeader(&dec->segment_hdr_);
+    }
+
+    // Check if we have all the partition #0 available, and initialize dec->br_
+    // to read this partition (and this partition only).
+    if (frm_hdr->partition_length_ > buf_size) {
+        return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "bad partition length");
+    }
+
+    br = &dec->br_;
+    VP8InitBitReader(br, buf, frm_hdr->partition_length_);
+    buf += frm_hdr->partition_length_;
+    buf_size -= frm_hdr->partition_length_;
+
+    if (frm_hdr->key_frame_) {
+        pic_hdr->colorspace_ = VP8Get(br);
+        pic_hdr->clamp_type_ = VP8Get(br);
+    }
+    if (!ParseSegmentHeader(br, &dec->segment_hdr_, &dec->proba_)) {
+        return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, "cannot parse segment header");
+    }
+    // Filter specs
+    if (!ParseFilterHeader(br, dec)) {
+        return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, "cannot parse filter header");
+    }
+    status = ParsePartitions(dec, buf, buf_size);
+    if (status != VP8_STATUS_OK) {
+        return VP8SetError(dec, status, "cannot parse partitions");
+    }
+
+    // quantizer change
+    VP8ParseQuant(dec);
+
+    // Frame buffer marking
+    if (!frm_hdr->key_frame_) {
+        return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE, "Not a key frame.");
+    }
+
+    VP8Get(br); // ignore the value of update_proba_
+
+    VP8ParseProba(br, dec);
+
+    // sanitized state
+    dec->ready_ = 1;
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Residual decoding (Paragraph 13.2 / 13.3)
+
+static const uint8_t kCat3[] = {173, 148, 140, 0};
+static const uint8_t kCat4[] = {176, 155, 140, 135, 0};
+static const uint8_t kCat5[] = {180, 157, 141, 134, 130, 0};
+static const uint8_t kCat6[] = {254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0};
+static const uint8_t* const kCat3456[] = {kCat3, kCat4, kCat5, kCat6};
+static const uint8_t kZigzag[16] = {0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15};
+
+// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
+static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
+    int v;
+    if (!VP8GetBit(br, p[3])) {
+        if (!VP8GetBit(br, p[4])) {
+            v = 2;
+        } else {
+            v = 3 + VP8GetBit(br, p[5]);
+        }
+    } else {
+        if (!VP8GetBit(br, p[6])) {
+            if (!VP8GetBit(br, p[7])) {
+                v = 5 + VP8GetBit(br, 159);
+            } else {
+                v = 7 + 2 * VP8GetBit(br, 165);
+                v += VP8GetBit(br, 145);
+            }
+        } else {
+            const uint8_t* tab;
+            const int bit1 = VP8GetBit(br, p[8]);
+            const int bit0 = VP8GetBit(br, p[9 + bit1]);
+            const int cat = 2 * bit1 + bit0;
+            v = 0;
+            for (tab = kCat3456[cat]; *tab; ++tab) {
+                v += v + VP8GetBit(br, *tab);
+            }
+            v += 3 + (8 << cat);
+        }
+    }
+    return v;
+}
+
+// Returns the position of the last non-zero coeff plus one
+static int GetCoeffs(
+    VP8BitReader* const br, const VP8BandProbas* const prob[], int ctx, const quant_t dq, int n, int16_t* out) {
+    // StopProfilingWatch stop_watch;
+    // StartProfiling(&stop_watch);
+
+    const uint8_t* p = prob[n]->probas_[ctx];
+    for (; n < 16; ++n) {
+        if (!VP8GetBit(br, p[0])) {
+            // StopProfiling(&stop_watch, &timeGetCoeffs, &countGetCoeffs);
+            return n; // previous coeff was last non-zero coeff
+        }
+        while (!VP8GetBit(br, p[1])) { // sequence of zero coeffs
+            p = prob[++n]->probas_[0];
+
+            if (n == 16) {
+                // StopProfiling(&stop_watch, &timeGetCoeffs, &countGetCoeffs);
+                return 16;
+            }
+        }
+        { // non zero coeff
+            const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
+            int v;
+            if (!VP8GetBit(br, p[2])) {
+                v = 1;
+                p = p_ctx[1];
+            } else {
+                v = GetLargeValue(br, p);
+                p = p_ctx[2];
+            }
+            out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
+        }
+    }
+
+    // StopProfiling(&stop_watch, &timeGetCoeffs, &countGetCoeffs);
+
+    return 16;
+}
+
+static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
+    nz_coeffs <<= 2;
+    nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz;
+    return nz_coeffs;
+}
+
+static int ParseResiduals(VP8Decoder* const dec, VP8MB* const mb, VP8BitReader* const token_br) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    const VP8BandProbas*(*const bands)[16 + 1] = dec->proba_.bands_ptr_;
+    const VP8BandProbas* const* ac_proba;
+    VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
+    const VP8QuantMatrix* const q = &dec->dqm_[block->segment_];
+    int16_t* dst = block->coeffs_;
+    VP8MB* const left_mb = dec->mb_info_ - 1;
+    uint8_t tnz, lnz;
+    uint32_t non_zero_y = 0;
+    uint32_t non_zero_uv = 0;
+    int x, y, ch;
+    uint32_t out_t_nz, out_l_nz;
+    int first;
+
+    // StopProfilingWatch stop_watch_if1;
+    // StartProfiling(&stop_watch_if1);
+    memset(dst, 0, 384 * sizeof(*dst));
+    if (!block->is_i4x4_) { // parse DC
+        int16_t dc[16] = {0};
+        const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
+        const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat_, 0, dc);
+        mb->nz_dc_ = left_mb->nz_dc_ = (nz > 0);
+        if (nz > 1) { // more than just the DC -> perform the full transform
+            // StopProfilingWatch stop_watch_VP8TransformWHT;
+            // StartProfiling(&stop_watch_VP8TransformWHT);
+
+            VP8TransformWHT(dc, dst);
+            // StopProfiling(&stop_watch_VP8TransformWHT, &timeVP8TransformWHT, &countVP8TransformWHT);
+        } else { // only DC is non-zero -> inlined simplified transform
+            int i;
+            const int dc0 = (dc[0] + 3) >> 3;
+            for (i = 0; i < 16 * 16; i += 16) dst[i] = dc0;
+        }
+        first = 1;
+        ac_proba = bands[0];
+    } else {
+        first = 0;
+        ac_proba = bands[3];
+    }
+    // StopProfiling(&stop_watch_if1, &timeParseResidualsIf1, &countParseResidualsIf1);
+
+    // StopProfilingWatch stop_watch_loop1;
+    // StartProfiling(&stop_watch_loop1);
+    tnz = mb->nz_ & 0x0f;
+    lnz = left_mb->nz_ & 0x0f;
+    for (y = 0; y < 4; ++y) {
+        int l = lnz & 1;
+        uint32_t nz_coeffs = 0;
+        for (x = 0; x < 4; ++x) {
+            const int ctx = l + (tnz & 1);
+            const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst);
+            l = (nz > first);
+            tnz = (tnz >> 1) | (l << 7);
+            nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
+            dst += 16;
+        }
+        tnz >>= 4;
+        lnz = (lnz >> 1) | (l << 7);
+        non_zero_y = (non_zero_y << 8) | nz_coeffs;
+    }
+    out_t_nz = tnz;
+    out_l_nz = lnz >> 4;
+    // StopProfiling(&stop_watch_loop1, &timeParseResidualsLoop1, &countParseResidualsLoop1);
+
+    // StopProfilingWatch stop_watch_loop2;
+    // StartProfiling(&stop_watch_loop2);
+    for (ch = 0; ch < 4; ch += 2) {
+        uint32_t nz_coeffs = 0;
+        tnz = mb->nz_ >> (4 + ch);
+        lnz = left_mb->nz_ >> (4 + ch);
+        for (y = 0; y < 2; ++y) {
+            int l = lnz & 1;
+            for (x = 0; x < 2; ++x) {
+                const int ctx = l + (tnz & 1);
+                const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst);
+                l = (nz > 0);
+                tnz = (tnz >> 1) | (l << 3);
+                nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
+                dst += 16;
+            }
+            tnz >>= 2;
+            lnz = (lnz >> 1) | (l << 5);
+        }
+        // Note: we don't really need the per-4x4 details for U/V blocks.
+        non_zero_uv |= nz_coeffs << (4 * ch);
+        out_t_nz |= (tnz << 4) << ch;
+        out_l_nz |= (lnz & 0xf0) << ch;
+    }
+    mb->nz_ = out_t_nz;
+    left_mb->nz_ = out_l_nz;
+    // StopProfiling(&stop_watch_loop2, &timeParseResidualsLoop2, &countParseResidualsLoop2);
+
+    block->non_zero_y_ = non_zero_y;
+    block->non_zero_uv_ = non_zero_uv;
+
+    // We look at the mode-code of each block and check if some blocks have less
+    // than three non-zero coeffs (code < 2). This is to avoid dithering flat and
+    // empty blocks.
+    block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_;
+
+    StopProfiling(&stop_watch, &timeParseResiduals, &countParseResiduals);
+
+    return !(non_zero_y | non_zero_uv); // will be used for further optimization
+}
+
+//------------------------------------------------------------------------------
+// Main loop
+
+int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    VP8MB* const left = dec->mb_info_ - 1;
+    VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
+    VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
+    int skip = dec->use_skip_proba_ ? block->skip_ : 0;
+
+    if (!skip) {
+        skip = ParseResiduals(dec, mb, token_br);
+    } else {
+        left->nz_ = mb->nz_ = 0;
+        if (!block->is_i4x4_) {
+            left->nz_dc_ = mb->nz_dc_ = 0;
+        }
+        block->non_zero_y_ = 0;
+        block->non_zero_uv_ = 0;
+        block->dither_ = 0;
+    }
+
+    if (dec->filter_type_ > 0) { // store filter info
+        VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
+        *finfo = dec->fstrengths_[block->segment_][block->is_i4x4_];
+        finfo->f_inner_ |= !skip;
+    }
+
+    StopProfiling(&stop_watch, &timeVP8DecodeMB, &countVP8DecodeMB);
+
+    return !token_br->eof_;
+}
+
+void VP8InitScanline(VP8Decoder* const dec) {
+    VP8MB* const left = dec->mb_info_ - 1;
+    left->nz_ = 0;
+    left->nz_dc_ = 0;
+    memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
+    dec->mb_x_ = 0;
+}
+
+static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
+        // Parse bitstream for this row.
+        VP8BitReader* const token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+        if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+            return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "Premature end-of-partition0 encountered.");
+        }
+        for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+            if (!VP8DecodeMB(dec, token_br)) {
+                return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "Premature end-of-file encountered.");
+            }
+        }
+        VP8InitScanline(dec); // Prepare for next scanline
+
+        // Reconstruct, filter and emit the row.
+        if (!VP8ProcessRow(dec, io)) {
+            return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
+        }
+    }
+    if (dec->mt_method_ > 0) {
+        if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) return 0;
+    }
+
+    StopProfiling(&stop_watch, &timeParseFrame, &countParseFrame);
+
+    return 1;
+}
+
+// Main entry point
+int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
+    int ok = 0;
+    if (dec == NULL) {
+        return 0;
+    }
+    if (io == NULL) {
+        return VP8SetError(dec, VP8_STATUS_INVALID_PARAM, "NULL VP8Io parameter in VP8Decode().");
+    }
+
+    if (!dec->ready_) {
+        if (!VP8GetHeaders(dec, io)) {
+            return 0;
+        }
+    }
+    assert(dec->ready_);
+
+    // Finish setting up the decoding parameter. Will call io->setup().
+    ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK);
+    if (ok) { // good to go.
+        // Will allocate memory and prepare everything.
+        if (ok) ok = VP8InitFrame(dec, io);
+
+        // Main decoding loop
+        if (ok) ok = ParseFrame(dec, io);
+
+        // Exit.
+        ok &= VP8ExitCritical(dec, io);
+    }
+
+    if (!ok) {
+        VP8Clear(dec);
+        return 0;
+    }
+
+    dec->ready_ = 0;
+    return ok;
+}
+
+void VP8Clear(VP8Decoder* const dec) {
+    if (dec == NULL) {
+        return;
+    }
+    WebPGetWorkerInterface()->End(&dec->worker_);
+    ALPHDelete(dec->alph_dec_);
+    dec->alph_dec_ = NULL;
+    WebPSafeFree(dec->mem_);
+    dec->mem_ = NULL;
+    dec->mem_size_ = 0;
+    memset(&dec->br_, 0, sizeof(dec->br_));
+    dec->ready_ = 0;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dec/vp8i.h b/codec/L2/demos/webpEnc/host/src/dec/vp8i.h
new file mode 100644
index 0000000000..cb648127e1
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/vp8i.h
@@ -0,0 +1,313 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// VP8 decoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DEC_VP8I_H_
+#define WEBP_DEC_VP8I_H_
+
+#include <string.h> // for memcpy()
+#include "./common.h"
+#include "./vp8li.h"
+#include "../utils/bit_reader.h"
+#include "../utils/random.h"
+#include "../utils/thread.h"
+#include "../dsp/dsp.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Various defines and enums
+
+// version numbers
+#define DEC_MAJ_VERSION 0
+#define DEC_MIN_VERSION 5
+#define DEC_REV_VERSION 0
+
+// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
+// Constraints are: We need to store one 16x16 block of luma samples (y),
+// and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned,
+// in order to be SIMD-friendly. We also need to store the top, left and
+// top-left samples (from previously decoded blocks), along with four
+// extra top-right samples for luma (intra4x4 prediction only).
+// One possible layout is, using 32 * (17 + 9) bytes:
+//
+//   .+------   <- only 1 pixel high
+//   .|yyyyt.
+//   .|yyyyt.
+//   .|yyyyt.
+//   .|yyyy..
+//   .+--.+--   <- only 1 pixel high
+//   .|uu.|vv
+//   .|uu.|vv
+//
+// Every character is a 4x4 block, with legend:
+//  '.' = unused
+//  'y' = y-samples   'u' = u-samples     'v' = u-samples
+//  '|' = left sample,   '-' = top sample,    '+' = top-left sample
+//  't' = extra top-right sample for 4x4 modes
+#define YUV_SIZE (BPS * 17 + BPS * 9)
+#define Y_SIZE (BPS * 17)
+#define Y_OFF (BPS * 1 + 8)
+#define U_OFF (Y_OFF + BPS * 16 + BPS)
+#define V_OFF (U_OFF + 16)
+
+// minimal width under which lossy multi-threading is always disabled
+#define MIN_WIDTH_FOR_THREADS 512
+
+//------------------------------------------------------------------------------
+// Headers
+
+typedef struct {
+    uint8_t key_frame_;
+    uint8_t profile_;
+    uint8_t show_;
+    uint32_t partition_length_;
+} VP8FrameHeader;
+
+typedef struct {
+    uint16_t width_;
+    uint16_t height_;
+    uint8_t xscale_;
+    uint8_t yscale_;
+    uint8_t colorspace_; // 0 = YCbCr
+    uint8_t clamp_type_;
+} VP8PictureHeader;
+
+// segment features
+typedef struct {
+    int use_segment_;
+    int update_map_;                          // whether to update the segment map or not
+    int absolute_delta_;                      // absolute or delta values for quantizer and filter
+    int8_t quantizer_[NUM_MB_SEGMENTS];       // quantization changes
+    int8_t filter_strength_[NUM_MB_SEGMENTS]; // filter strength for segments
+} VP8SegmentHeader;
+
+// probas associated to one of the contexts
+typedef uint8_t VP8ProbaArray[NUM_PROBAS];
+
+typedef struct { // all the probas associated to one band
+    VP8ProbaArray probas_[NUM_CTX];
+} VP8BandProbas;
+
+// Struct collecting all frame-persistent probabilities.
+typedef struct {
+    uint8_t segments_[MB_FEATURE_TREE_PROBS];
+    // Type: 0:Intra16-AC  1:Intra16-DC   2:Chroma   3:Intra4
+    VP8BandProbas bands_[NUM_TYPES][NUM_BANDS];
+    const VP8BandProbas* bands_ptr_[NUM_TYPES][16 + 1];
+} VP8Proba;
+
+// Filter parameters
+typedef struct {
+    int simple_;    // 0=complex, 1=simple
+    int level_;     // [0..63]
+    int sharpness_; // [0..7]
+    int use_lf_delta_;
+    int ref_lf_delta_[NUM_REF_LF_DELTAS];
+    int mode_lf_delta_[NUM_MODE_LF_DELTAS];
+} VP8FilterHeader;
+
+//------------------------------------------------------------------------------
+// Informations about the macroblocks.
+
+typedef struct {         // filter specs
+    uint8_t f_limit_;    // filter limit in [3..189], or 0 if no filtering
+    uint8_t f_ilevel_;   // inner limit in [1..63]
+    uint8_t f_inner_;    // do inner filtering?
+    uint8_t hev_thresh_; // high edge variance threshold in [0..2]
+} VP8FInfo;
+
+typedef struct {    // Top/Left Contexts used for syntax-parsing
+    uint8_t nz_;    // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
+    uint8_t nz_dc_; // non-zero DC coeff (1bit)
+} VP8MB;
+
+// Dequantization matrices
+typedef int quant_t[2]; // [DC / AC].  Can be 'uint16_t[2]' too (~slower).
+typedef struct {
+    quant_t y1_mat_, y2_mat_, uv_mat_;
+
+    int uv_quant_; // U/V quantizer value
+    int dither_;   // dithering amplitude (0 = off, max=255)
+} VP8QuantMatrix;
+
+// Data needed to reconstruct a macroblock
+typedef struct {
+    int16_t coeffs_[384]; // 384 coeffs = (16+4+4) * 4*4
+    uint8_t is_i4x4_;     // true if intra4x4
+    uint8_t imodes_[16];  // one 16x16 mode (#0) or sixteen 4x4 modes
+    uint8_t uvmode_;      // chroma prediction mode
+    // bit-wise info about the content of each sub-4x4 blocks (in decoding order).
+    // Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
+    //   code=0 -> no coefficient
+    //   code=1 -> only DC
+    //   code=2 -> first three coefficients are non-zero
+    //   code=3 -> more than three coefficients are non-zero
+    // This allows to call specialized transform functions.
+    uint32_t non_zero_y_;
+    uint32_t non_zero_uv_;
+    uint8_t dither_; // local dithering strength (deduced from non_zero_*)
+    uint8_t skip_;
+    uint8_t segment_;
+} VP8MBData;
+
+// Persistent information needed by the parallel processing
+typedef struct {
+    int id_;             // cache row to process (in [0..2])
+    int mb_y_;           // macroblock position of the row
+    int filter_row_;     // true if row-filtering is needed
+    VP8FInfo* f_info_;   // filter strengths (swapped with dec->f_info_)
+    VP8MBData* mb_data_; // reconstruction data (swapped with dec->mb_data_)
+    VP8Io io_;           // copy of the VP8Io to pass to put()
+} VP8ThreadContext;
+
+// Saved top samples, per macroblock. Fits into a cache-line.
+typedef struct { uint8_t y[16], u[8], v[8]; } VP8TopSamples;
+
+//------------------------------------------------------------------------------
+// VP8Decoder: the main opaque structure handed over to user
+
+struct VP8Decoder {
+    VP8StatusCode status_;
+    int ready_;             // true if ready to decode a picture with VP8Decode()
+    const char* error_msg_; // set when status_ is not OK.
+
+    // Main data source
+    VP8BitReader br_;
+
+    // headers
+    VP8FrameHeader frm_hdr_;
+    VP8PictureHeader pic_hdr_;
+    VP8FilterHeader filter_hdr_;
+    VP8SegmentHeader segment_hdr_;
+
+    // Worker
+    WebPWorker worker_;
+    int mt_method_;               // multi-thread method: 0=off, 1=[parse+recon][filter]
+                                  // 2=[parse][recon+filter]
+    int cache_id_;                // current cache row
+    int num_caches_;              // number of cached rows of 16 pixels (1, 2 or 3)
+    VP8ThreadContext thread_ctx_; // Thread context
+
+    // dimension, in macroblock units.
+    int mb_w_, mb_h_;
+
+    // Macroblock to process/filter, depending on cropping and filter_type.
+    int tl_mb_x_, tl_mb_y_; // top-left MB that must be in-loop filtered
+    int br_mb_x_, br_mb_y_; // last bottom-right MB that must be decoded
+
+    // number of partitions.
+    int num_parts_;
+    // per-partition boolean decoders.
+    VP8BitReader parts_[MAX_NUM_PARTITIONS];
+
+    // Dithering strength, deduced from decoding options
+    int dither_;             // whether to use dithering or not
+    VP8Random dithering_rg_; // random generator for dithering
+
+    // dequantization (one set of DC/AC dequant factor per segment)
+    VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
+
+    // probabilities
+    VP8Proba proba_;
+    int use_skip_proba_;
+    uint8_t skip_p_;
+
+    // Boundary data cache and persistent buffers.
+    uint8_t* intra_t_;   // top intra modes values: 4 * mb_w_
+    uint8_t intra_l_[4]; // left intra modes values
+
+    VP8TopSamples* yuv_t_; // top y/u/v samples
+
+    VP8MB* mb_info_;   // contextual macroblock info (mb_w_ + 1)
+    VP8FInfo* f_info_; // filter strength info
+    uint8_t* yuv_b_;   // main block for Y/U/V (size = YUV_SIZE)
+
+    uint8_t* cache_y_; // macroblock row for storing unfiltered samples
+    uint8_t* cache_u_;
+    uint8_t* cache_v_;
+    int cache_y_stride_;
+    int cache_uv_stride_;
+
+    // main memory chunk for the above data. Persistent.
+    void* mem_;
+    size_t mem_size_;
+
+    // Per macroblock non-persistent infos.
+    int mb_x_, mb_y_;    // current position, in macroblock units
+    VP8MBData* mb_data_; // parsed reconstruction data
+
+    // Filtering side-info
+    int filter_type_;                         // 0=off, 1=simple, 2=complex
+    VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type
+
+    // Alpha
+    struct ALPHDecoder* alph_dec_; // alpha-plane decoder object
+    const uint8_t* alpha_data_;    // compressed alpha data (if present)
+    size_t alpha_data_size_;
+    int is_alpha_decoded_; // true if alpha_data_ is decoded in alpha_plane_
+    uint8_t* alpha_plane_; // output. Persistent, contains the whole data.
+    int alpha_dithering_;  // derived from decoding options (0=off, 100=full).
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// in vp8.c
+int VP8SetError(VP8Decoder* const dec, VP8StatusCode error, const char* const msg);
+
+// in tree.c
+void VP8ResetProba(VP8Proba* const proba);
+void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec);
+// parses one row of intra mode data in partition 0, returns !eof
+int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec);
+
+// in quant.c
+void VP8ParseQuant(VP8Decoder* const dec);
+
+// in frame.c
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io);
+// Call io->setup() and finish setting up scan parameters.
+// After this call returns, one must always call VP8ExitCritical() with the
+// same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK
+// if ok, otherwise sets and returns the error status on *dec.
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
+// Must always be called in pair with VP8EnterCritical().
+// Returns false in case of error.
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
+// Return the multi-threading method to use (0=off), depending
+// on options and bitstream size. Only for lossy decoding.
+int VP8GetThreadMethod(const WebPDecoderOptions* const options,
+                       const WebPHeaderStructure* const headers,
+                       int width,
+                       int height);
+// Initialize dithering post-process if needed.
+void VP8InitDithering(const WebPDecoderOptions* const options, VP8Decoder* const dec);
+// Process the last decoded row (filtering + output).
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
+// To be called at the start of a new scanline, to initialize predictors.
+void VP8InitScanline(VP8Decoder* const dec);
+// Decode one macroblock. Returns false if there is not enough data.
+int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br);
+
+// in alpha.c
+const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, int row, int num_rows);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_DEC_VP8I_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/dec/vp8l.c b/codec/L2/demos/webpEnc/host/src/dec/vp8l.c
new file mode 100644
index 0000000000..04b32af3ce
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/vp8l.c
@@ -0,0 +1,1601 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// main entry for the decoder
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "./alphai.h"
+#include "./vp8li.h"
+#include "../dsp/dsp.h"
+#include "../dsp/lossless.h"
+#include "../dsp/yuv.h"
+#include "../utils/endian_inl.h"
+#include "../utils/huffman.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+
+#define NUM_ARGB_CACHE_ROWS 16
+
+static const int kCodeLengthLiterals = 16;
+static const int kCodeLengthRepeatCode = 16;
+static const int kCodeLengthExtraBits[3] = {2, 3, 7};
+static const int kCodeLengthRepeatOffsets[3] = {3, 3, 11};
+
+// -----------------------------------------------------------------------------
+//  Five Huffman codes are used at each meta code:
+//  1. green + length prefix codes + color cache codes,
+//  2. alpha,
+//  3. red,
+//  4. blue, and,
+//  5. distance prefix codes.
+typedef enum { GREEN = 0, RED = 1, BLUE = 2, ALPHA = 3, DIST = 4 } HuffIndex;
+
+static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = {
+    NUM_LITERAL_CODES + NUM_LENGTH_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES, NUM_DISTANCE_CODES};
+
+static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = {0, 1, 1, 1, 0};
+
+#define NUM_CODE_LENGTH_CODES 19
+static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {17, 18, 0, 1,  2,  3,  4,  5,  16, 6,
+                                                                    7,  8,  9, 10, 11, 12, 13, 14, 15};
+
+#define CODE_TO_PLANE_CODES 120
+static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
+    0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, 0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b,
+    0x36, 0x3a, 0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45,
+    0x4b, 0x34, 0x3c, 0x03, 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c, 0x55, 0x5b, 0x33, 0x3d,
+    0x68, 0x02, 0x67, 0x69, 0x12, 0x1e, 0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, 0x32, 0x3e,
+    0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, 0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
+    0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41, 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d,
+    0x51, 0x5f, 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70};
+
+// Memory needed for lookup tables of one Huffman tree group. Red, blue, alpha
+// and distance alphabets are constant (256 for red, blue and alpha, 40 for
+// distance) and lookup table sizes for them in worst case are 630 and 410
+// respectively. Size of green alphabet depends on color cache size and is equal
+// to 256 (green component values) + 24 (length prefix values)
+// + color_cache_size (between 0 and 2048).
+// All values computed for 8-bit first level lookup with Mark Adler's tool:
+// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
+#define FIXED_TABLE_SIZE (630 * 3 + 410)
+static const int kTableSize[12] = {FIXED_TABLE_SIZE + 654,  FIXED_TABLE_SIZE + 656,  FIXED_TABLE_SIZE + 658,
+                                   FIXED_TABLE_SIZE + 662,  FIXED_TABLE_SIZE + 670,  FIXED_TABLE_SIZE + 686,
+                                   FIXED_TABLE_SIZE + 718,  FIXED_TABLE_SIZE + 782,  FIXED_TABLE_SIZE + 912,
+                                   FIXED_TABLE_SIZE + 1168, FIXED_TABLE_SIZE + 1680, FIXED_TABLE_SIZE + 2704};
+
+static int DecodeImageStream(
+    int xsize, int ysize, int is_level0, VP8LDecoder* const dec, uint32_t** const decoded_data);
+
+//------------------------------------------------------------------------------
+
+int VP8LCheckSignature(const uint8_t* const data, size_t size) {
+    return (size >= VP8L_FRAME_HEADER_SIZE && data[0] == VP8L_MAGIC_BYTE && (data[4] >> 5) == 0); // version
+}
+
+static int ReadImageInfo(VP8LBitReader* const br, int* const width, int* const height, int* const has_alpha) {
+    if (VP8LReadBits(br, 8) != VP8L_MAGIC_BYTE) return 0;
+    *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
+    *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
+    *has_alpha = VP8LReadBits(br, 1);
+    if (VP8LReadBits(br, VP8L_VERSION_BITS) != 0) return 0;
+    return !br->eos_;
+}
+
+int VP8LGetInfo(const uint8_t* data, size_t data_size, int* const width, int* const height, int* const has_alpha) {
+    if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) {
+        return 0; // not enough data
+    } else if (!VP8LCheckSignature(data, data_size)) {
+        return 0; // bad signature
+    } else {
+        int w, h, a;
+        VP8LBitReader br;
+        VP8LInitBitReader(&br, data, data_size);
+        if (!ReadImageInfo(&br, &w, &h, &a)) {
+            return 0;
+        }
+        if (width != NULL) *width = w;
+        if (height != NULL) *height = h;
+        if (has_alpha != NULL) *has_alpha = a;
+        return 1;
+    }
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int GetCopyDistance(int distance_symbol, VP8LBitReader* const br) {
+    int extra_bits, offset;
+    if (distance_symbol < 4) {
+        return distance_symbol + 1;
+    }
+    extra_bits = (distance_symbol - 2) >> 1;
+    offset = (2 + (distance_symbol & 1)) << extra_bits;
+    return offset + VP8LReadBits(br, extra_bits) + 1;
+}
+
+static WEBP_INLINE int GetCopyLength(int length_symbol, VP8LBitReader* const br) {
+    // Length and distance prefixes are encoded the same way.
+    return GetCopyDistance(length_symbol, br);
+}
+
+static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
+    if (plane_code > CODE_TO_PLANE_CODES) {
+        return plane_code - CODE_TO_PLANE_CODES;
+    } else {
+        const int dist_code = kCodeToPlane[plane_code - 1];
+        const int yoffset = dist_code >> 4;
+        const int xoffset = 8 - (dist_code & 0xf);
+        const int dist = yoffset * xsize + xoffset;
+        return (dist >= 1) ? dist : 1; // dist<1 can happen if xsize is very small
+    }
+}
+
+//------------------------------------------------------------------------------
+// Decodes the next Huffman code from bit-stream.
+// FillBitWindow(br) needs to be called at minimum every second call
+// to ReadSymbol, in order to pre-fetch enough bits.
+static WEBP_INLINE int ReadSymbol(const HuffmanCode* table, VP8LBitReader* const br) {
+    int nbits;
+    uint32_t val = VP8LPrefetchBits(br);
+    table += val & HUFFMAN_TABLE_MASK;
+    nbits = table->bits - HUFFMAN_TABLE_BITS;
+    if (nbits > 0) {
+        VP8LSetBitPos(br, br->bit_pos_ + HUFFMAN_TABLE_BITS);
+        val = VP8LPrefetchBits(br);
+        table += table->value;
+        table += val & ((1 << nbits) - 1);
+    }
+    VP8LSetBitPos(br, br->bit_pos_ + table->bits);
+    return table->value;
+}
+
+// Reads packed symbol depending on GREEN channel
+#define BITS_SPECIAL_MARKER 0x100 // something large enough (and a bit-mask)
+#define PACKED_NON_LITERAL_CODE 0 // must be < NUM_LITERAL_CODES
+static WEBP_INLINE int ReadPackedSymbols(const HTreeGroup* group, VP8LBitReader* const br, uint32_t* const dst) {
+    const uint32_t val = VP8LPrefetchBits(br) & (HUFFMAN_PACKED_TABLE_SIZE - 1);
+    const HuffmanCode32 code = group->packed_table[val];
+    assert(group->use_packed_table);
+    if (code.bits < BITS_SPECIAL_MARKER) {
+        VP8LSetBitPos(br, br->bit_pos_ + code.bits);
+        *dst = code.value;
+        return PACKED_NON_LITERAL_CODE;
+    } else {
+        VP8LSetBitPos(br, br->bit_pos_ + code.bits - BITS_SPECIAL_MARKER);
+        assert(code.value >= NUM_LITERAL_CODES);
+        return code.value;
+    }
+}
+
+static int AccumulateHCode(HuffmanCode hcode, int shift, HuffmanCode32* const huff) {
+    huff->bits += hcode.bits;
+    huff->value |= (uint32_t)hcode.value << shift;
+    assert(huff->bits <= HUFFMAN_TABLE_BITS);
+    return hcode.bits;
+}
+
+static void BuildPackedTable(HTreeGroup* const htree_group) {
+    uint32_t code;
+    for (code = 0; code < HUFFMAN_PACKED_TABLE_SIZE; ++code) {
+        uint32_t bits = code;
+        HuffmanCode32* const huff = &htree_group->packed_table[bits];
+        HuffmanCode hcode = htree_group->htrees[GREEN][bits];
+        if (hcode.value >= NUM_LITERAL_CODES) {
+            huff->bits = hcode.bits + BITS_SPECIAL_MARKER;
+            huff->value = hcode.value;
+        } else {
+            huff->bits = 0;
+            huff->value = 0;
+            bits >>= AccumulateHCode(hcode, 8, huff);
+            bits >>= AccumulateHCode(htree_group->htrees[RED][bits], 16, huff);
+            bits >>= AccumulateHCode(htree_group->htrees[BLUE][bits], 0, huff);
+            bits >>= AccumulateHCode(htree_group->htrees[ALPHA][bits], 24, huff);
+            (void)bits;
+        }
+    }
+}
+
+static int ReadHuffmanCodeLengths(VP8LDecoder* const dec,
+                                  const int* const code_length_code_lengths,
+                                  int num_symbols,
+                                  int* const code_lengths) {
+    int ok = 0;
+    VP8LBitReader* const br = &dec->br_;
+    int symbol;
+    int max_symbol;
+    int prev_code_len = DEFAULT_CODE_LENGTH;
+    HuffmanCode table[1 << LENGTHS_TABLE_BITS];
+
+    if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS, code_length_code_lengths, NUM_CODE_LENGTH_CODES)) {
+        goto End;
+    }
+
+    if (VP8LReadBits(br, 1)) { // use length
+        const int length_nbits = 2 + 2 * VP8LReadBits(br, 3);
+        max_symbol = 2 + VP8LReadBits(br, length_nbits);
+        if (max_symbol > num_symbols) {
+            goto End;
+        }
+    } else {
+        max_symbol = num_symbols;
+    }
+
+    symbol = 0;
+    while (symbol < num_symbols) {
+        const HuffmanCode* p;
+        int code_len;
+        if (max_symbol-- == 0) break;
+        VP8LFillBitWindow(br);
+        p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
+        VP8LSetBitPos(br, br->bit_pos_ + p->bits);
+        code_len = p->value;
+        if (code_len < kCodeLengthLiterals) {
+            code_lengths[symbol++] = code_len;
+            if (code_len != 0) prev_code_len = code_len;
+        } else {
+            const int use_prev = (code_len == kCodeLengthRepeatCode);
+            const int slot = code_len - kCodeLengthLiterals;
+            const int extra_bits = kCodeLengthExtraBits[slot];
+            const int repeat_offset = kCodeLengthRepeatOffsets[slot];
+            int repeat = VP8LReadBits(br, extra_bits) + repeat_offset;
+            if (symbol + repeat > num_symbols) {
+                goto End;
+            } else {
+                const int length = use_prev ? prev_code_len : 0;
+                while (repeat-- > 0) code_lengths[symbol++] = length;
+            }
+        }
+    }
+    ok = 1;
+
+End:
+    if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    return ok;
+}
+
+// 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman
+// tree.
+static int ReadHuffmanCode(int alphabet_size,
+                           VP8LDecoder* const dec,
+                           int* const code_lengths,
+                           HuffmanCode* const table) {
+    int ok = 0;
+    int size = 0;
+    VP8LBitReader* const br = &dec->br_;
+    const int simple_code = VP8LReadBits(br, 1);
+
+    memset(code_lengths, 0, alphabet_size * sizeof(*code_lengths));
+
+    if (simple_code) { // Read symbols, codes & code lengths directly.
+        const int num_symbols = VP8LReadBits(br, 1) + 1;
+        const int first_symbol_len_code = VP8LReadBits(br, 1);
+        // The first code is either 1 bit or 8 bit code.
+        int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
+        code_lengths[symbol] = 1;
+        // The second code (if present), is always 8 bit long.
+        if (num_symbols == 2) {
+            symbol = VP8LReadBits(br, 8);
+            code_lengths[symbol] = 1;
+        }
+        ok = 1;
+    } else { // Decode Huffman-coded code lengths.
+        int i;
+        int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = {0};
+        const int num_codes = VP8LReadBits(br, 4) + 4;
+        if (num_codes > NUM_CODE_LENGTH_CODES) {
+            dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+            return 0;
+        }
+
+        for (i = 0; i < num_codes; ++i) {
+            code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3);
+        }
+        ok = ReadHuffmanCodeLengths(dec, code_length_code_lengths, alphabet_size, code_lengths);
+    }
+
+    ok = ok && !br->eos_;
+    if (ok) {
+        size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS, code_lengths, alphabet_size);
+    }
+    if (!ok || size == 0) {
+        dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+        return 0;
+    }
+    return size;
+}
+
+static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, int color_cache_bits, int allow_recursion) {
+    int i, j;
+    VP8LBitReader* const br = &dec->br_;
+    VP8LMetadata* const hdr = &dec->hdr_;
+    uint32_t* huffman_image = NULL;
+    HTreeGroup* htree_groups = NULL;
+    HuffmanCode* huffman_tables = NULL;
+    HuffmanCode* next = NULL;
+    int num_htree_groups = 1;
+    int max_alphabet_size = 0;
+    int* code_lengths = NULL;
+    const int table_size = kTableSize[color_cache_bits];
+
+    if (allow_recursion && VP8LReadBits(br, 1)) {
+        // use meta Huffman codes.
+        const int huffman_precision = VP8LReadBits(br, 3) + 2;
+        const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision);
+        const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision);
+        const int huffman_pixs = huffman_xsize * huffman_ysize;
+        if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec, &huffman_image)) {
+            goto Error;
+        }
+        hdr->huffman_subsample_bits_ = huffman_precision;
+        for (i = 0; i < huffman_pixs; ++i) {
+            // The huffman data is stored in red and green bytes.
+            const int group = (huffman_image[i] >> 8) & 0xffff;
+            huffman_image[i] = group;
+            if (group >= num_htree_groups) {
+                num_htree_groups = group + 1;
+            }
+        }
+    }
+
+    if (br->eos_) goto Error;
+
+    // Find maximum alphabet size for the htree group.
+    for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+        int alphabet_size = kAlphabetSize[j];
+        if (j == 0 && color_cache_bits > 0) {
+            alphabet_size += 1 << color_cache_bits;
+        }
+        if (max_alphabet_size < alphabet_size) {
+            max_alphabet_size = alphabet_size;
+        }
+    }
+
+    huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size, sizeof(*huffman_tables));
+    htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
+    code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size, sizeof(*code_lengths));
+
+    if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
+        dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    next = huffman_tables;
+    for (i = 0; i < num_htree_groups; ++i) {
+        HTreeGroup* const htree_group = &htree_groups[i];
+        HuffmanCode** const htrees = htree_group->htrees;
+        int size;
+        int total_size = 0;
+        int is_trivial_literal = 1;
+        int max_bits = 0;
+        for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+            int alphabet_size = kAlphabetSize[j];
+            htrees[j] = next;
+            if (j == 0 && color_cache_bits > 0) {
+                alphabet_size += 1 << color_cache_bits;
+            }
+            size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next);
+            if (size == 0) {
+                goto Error;
+            }
+            if (is_trivial_literal && kLiteralMap[j] == 1) {
+                is_trivial_literal = (next->bits == 0);
+            }
+            total_size += next->bits;
+            next += size;
+            if (j <= ALPHA) {
+                int local_max_bits = code_lengths[0];
+                int k;
+                for (k = 1; k < alphabet_size; ++k) {
+                    if (code_lengths[k] > local_max_bits) {
+                        local_max_bits = code_lengths[k];
+                    }
+                }
+                max_bits += local_max_bits;
+            }
+        }
+        htree_group->is_trivial_literal = is_trivial_literal;
+        htree_group->is_trivial_code = 0;
+        if (is_trivial_literal) {
+            const int red = htrees[RED][0].value;
+            const int blue = htrees[BLUE][0].value;
+            const int alpha = htrees[ALPHA][0].value;
+            htree_group->literal_arb = ((uint32_t)alpha << 24) | (red << 16) | blue;
+            if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
+                htree_group->is_trivial_code = 1;
+                htree_group->literal_arb |= htrees[GREEN][0].value << 8;
+            }
+        }
+        htree_group->use_packed_table = !htree_group->is_trivial_code && (max_bits < HUFFMAN_PACKED_BITS);
+        if (htree_group->use_packed_table) BuildPackedTable(htree_group);
+    }
+    WebPSafeFree(code_lengths);
+
+    // All OK. Finalize pointers and return.
+    hdr->huffman_image_ = huffman_image;
+    hdr->num_htree_groups_ = num_htree_groups;
+    hdr->htree_groups_ = htree_groups;
+    hdr->huffman_tables_ = huffman_tables;
+    return 1;
+
+Error:
+    WebPSafeFree(code_lengths);
+    WebPSafeFree(huffman_image);
+    WebPSafeFree(huffman_tables);
+    VP8LHtreeGroupsFree(htree_groups);
+    return 0;
+}
+
+//------------------------------------------------------------------------------
+// Scaling.
+
+static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
+    const int num_channels = 4;
+    const int in_width = io->mb_w;
+    const int out_width = io->scaled_width;
+    const int in_height = io->mb_h;
+    const int out_height = io->scaled_height;
+    const uint64_t work_size = 2 * num_channels * (uint64_t)out_width;
+    rescaler_t* work; // Rescaler work area.
+    const uint64_t scaled_data_size = (uint64_t)out_width;
+    uint32_t* scaled_data; // Temporary storage for scaled BGRA data.
+    const uint64_t memory_size =
+        sizeof(*dec->rescaler) + work_size * sizeof(*work) + scaled_data_size * sizeof(*scaled_data);
+    uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory));
+    if (memory == NULL) {
+        dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+        return 0;
+    }
+    assert(dec->rescaler_memory == NULL);
+    dec->rescaler_memory = memory;
+
+    dec->rescaler = (WebPRescaler*)memory;
+    memory += sizeof(*dec->rescaler);
+    work = (rescaler_t*)memory;
+    memory += work_size * sizeof(*work);
+    scaled_data = (uint32_t*)memory;
+
+    WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data, out_width, out_height, 0, num_channels,
+                     work);
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Export to ARGB
+
+// We have special "export" function since we need to convert from BGRA
+static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, int rgba_stride, uint8_t* const rgba) {
+    uint32_t* const src = (uint32_t*)rescaler->dst;
+    const int dst_width = rescaler->dst_width;
+    int num_lines_out = 0;
+    while (WebPRescalerHasPendingOutput(rescaler)) {
+        uint8_t* const dst = rgba + num_lines_out * rgba_stride;
+        WebPRescalerExportRow(rescaler);
+        WebPMultARGBRow(src, dst_width, 1);
+        VP8LConvertFromBGRA(src, dst_width, colorspace, dst);
+        ++num_lines_out;
+    }
+    return num_lines_out;
+}
+
+// Emit scaled rows.
+static int EmitRescaledRowsRGBA(
+    const VP8LDecoder* const dec, uint8_t* in, int in_stride, int mb_h, uint8_t* const out, int out_stride) {
+    const WEBP_CSP_MODE colorspace = dec->output_->colorspace;
+    int num_lines_in = 0;
+    int num_lines_out = 0;
+    while (num_lines_in < mb_h) {
+        uint8_t* const row_in = in + num_lines_in * in_stride;
+        uint8_t* const row_out = out + num_lines_out * out_stride;
+        const int lines_left = mb_h - num_lines_in;
+        const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
+        assert(needed_lines > 0 && needed_lines <= lines_left);
+        WebPMultARGBRows(row_in, in_stride, dec->rescaler->src_width, needed_lines, 0);
+        WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride);
+        num_lines_in += needed_lines;
+        num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out);
+    }
+    return num_lines_out;
+}
+
+// Emit rows without any scaling.
+static int EmitRows(WEBP_CSP_MODE colorspace,
+                    const uint8_t* row_in,
+                    int in_stride,
+                    int mb_w,
+                    int mb_h,
+                    uint8_t* const out,
+                    int out_stride) {
+    int lines = mb_h;
+    uint8_t* row_out = out;
+    while (lines-- > 0) {
+        VP8LConvertFromBGRA((const uint32_t*)row_in, mb_w, colorspace, row_out);
+        row_in += in_stride;
+        row_out += out_stride;
+    }
+    return mb_h; // Num rows out == num rows in.
+}
+
+//------------------------------------------------------------------------------
+// Export to YUVA
+
+static void ConvertToYUVA(const uint32_t* const src, int width, int y_pos, const WebPDecBuffer* const output) {
+    const WebPYUVABuffer* const buf = &output->u.YUVA;
+
+    // first, the luma plane
+    WebPConvertARGBToY(src, buf->y + y_pos * buf->y_stride, width);
+
+    // then U/V planes
+    {
+        uint8_t* const u = buf->u + (y_pos >> 1) * buf->u_stride;
+        uint8_t* const v = buf->v + (y_pos >> 1) * buf->v_stride;
+        // even lines: store values
+        // odd lines: average with previous values
+        WebPConvertARGBToUV(src, u, v, width, !(y_pos & 1));
+    }
+    // Lastly, store alpha if needed.
+    if (buf->a != NULL) {
+        uint8_t* const a = buf->a + y_pos * buf->a_stride;
+#if defined(WORDS_BIGENDIAN)
+        WebPExtractAlpha((uint8_t*)src + 0, 0, width, 1, a, 0);
+#else
+        WebPExtractAlpha((uint8_t*)src + 3, 0, width, 1, a, 0);
+#endif
+    }
+}
+
+static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
+    WebPRescaler* const rescaler = dec->rescaler;
+    uint32_t* const src = (uint32_t*)rescaler->dst;
+    const int dst_width = rescaler->dst_width;
+    int num_lines_out = 0;
+    while (WebPRescalerHasPendingOutput(rescaler)) {
+        WebPRescalerExportRow(rescaler);
+        WebPMultARGBRow(src, dst_width, 1);
+        ConvertToYUVA(src, dst_width, y_pos, dec->output_);
+        ++y_pos;
+        ++num_lines_out;
+    }
+    return num_lines_out;
+}
+
+static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec, uint8_t* in, int in_stride, int mb_h) {
+    int num_lines_in = 0;
+    int y_pos = dec->last_out_row_;
+    while (num_lines_in < mb_h) {
+        const int lines_left = mb_h - num_lines_in;
+        const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
+        WebPMultARGBRows(in, in_stride, dec->rescaler->src_width, needed_lines, 0);
+        WebPRescalerImport(dec->rescaler, lines_left, in, in_stride);
+        num_lines_in += needed_lines;
+        in += needed_lines * in_stride;
+        y_pos += ExportYUVA(dec, y_pos);
+    }
+    return y_pos;
+}
+
+static int EmitRowsYUVA(const VP8LDecoder* const dec, const uint8_t* in, int in_stride, int mb_w, int num_rows) {
+    int y_pos = dec->last_out_row_;
+    while (num_rows-- > 0) {
+        ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output_);
+        in += in_stride;
+        ++y_pos;
+    }
+    return y_pos;
+}
+
+//------------------------------------------------------------------------------
+// Cropping.
+
+// Sets io->mb_y, io->mb_h & io->mb_w according to start row, end row and
+// crop options. Also updates the input data pointer, so that it points to the
+// start of the cropped window. Note that pixels are in ARGB format even if
+// 'in_data' is uint8_t*.
+// Returns true if the crop window is not empty.
+static int SetCropWindow(VP8Io* const io, int y_start, int y_end, uint8_t** const in_data, int pixel_stride) {
+    assert(y_start < y_end);
+    assert(io->crop_left < io->crop_right);
+    if (y_end > io->crop_bottom) {
+        y_end = io->crop_bottom; // make sure we don't overflow on last row.
+    }
+    if (y_start < io->crop_top) {
+        const int delta = io->crop_top - y_start;
+        y_start = io->crop_top;
+        *in_data += delta * pixel_stride;
+    }
+    if (y_start >= y_end) return 0; // Crop window is empty.
+
+    *in_data += io->crop_left * sizeof(uint32_t);
+
+    io->mb_y = y_start - io->crop_top;
+    io->mb_w = io->crop_right - io->crop_left;
+    io->mb_h = y_end - y_start;
+    return 1; // Non-empty crop window.
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int GetMetaIndex(const uint32_t* const image, int xsize, int bits, int x, int y) {
+    if (bits == 0) return 0;
+    return image[xsize * (y >> bits) + (x >> bits)];
+}
+
+static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr, int x, int y) {
+    const int meta_index = GetMetaIndex(hdr->huffman_image_, hdr->huffman_xsize_, hdr->huffman_subsample_bits_, x, y);
+    assert(meta_index < hdr->num_htree_groups_);
+    return hdr->htree_groups_ + meta_index;
+}
+
+//------------------------------------------------------------------------------
+// Main loop, with custom row-processing function
+
+typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
+
+static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows, const uint32_t* const rows) {
+    StopProfilingWatch stop_watch0;
+    StartProfiling(&stop_watch0);
+    int n = dec->next_transform_;
+    const int cache_pixs = dec->width_ * num_rows;
+    const int start_row = dec->last_row_;
+    const int end_row = start_row + num_rows;
+    const uint32_t* rows_in = rows;
+    uint32_t* const rows_out = dec->argb_cache_;
+
+    // Inverse transforms.
+    // TODO: most transforms only need to operate on the cropped region only.
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    if (0 == dec->thread_number_) {
+        memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
+    }
+
+    StopProfiling(&stop_watch, &timeProcessRowsCopy1, &countProcessRowsCopy1);
+    while (n-- > 0) {
+        VP8LTransform* const transform = &dec->transforms_[n];
+        VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out, dec->thread_number_);
+        rows_in = rows_out;
+    }
+    StopProfiling(&stop_watch0, &timeApplyInverseTransforms, &countApplyInverseTransforms);
+}
+
+// Special method for paletted alpha data.
+static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows, const uint8_t* const rows) {
+    const int start_row = dec->last_row_;
+    const int end_row = start_row + num_rows;
+    const uint8_t* rows_in = rows;
+    uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row;
+    VP8LTransform* const transform = &dec->transforms_[0];
+    assert(dec->next_transform_ == 1);
+    assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
+    VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in, rows_out);
+}
+
+// Processes (transforms, scales & color-converts) the rows decoded after the
+// last call.
+static void ProcessRows(VP8LDecoder* const dec, int row) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    // fprintf(stderr, "dec->width_:%d row:%d dec->last_row_:%d %s %d\n",
+    //         dec->width_, row, dec->last_row_, __FUNCTION__, __LINE__);
+    const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_;
+    const int num_rows = row - dec->last_row_;
+
+    if (num_rows <= 0) return; // Nothing to be done.
+    ApplyInverseTransforms(dec, num_rows, rows);
+
+    // Emit output.
+    {
+        VP8Io* const io = dec->io_;
+        uint8_t* rows_data = (uint8_t*)dec->argb_cache_;
+        const int in_stride = io->width * sizeof(uint32_t); // in unit of RGBA
+        if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) {
+            // Nothing to output (this time).
+        } else {
+            const WebPDecBuffer* const output = dec->output_;
+            if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA
+                const WebPRGBABuffer* const buf = &output->u.RGBA;
+                uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
+                const int num_rows_out =
+                    io->use_scaling
+                        ? EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h, rgba, buf->stride)
+                        : EmitRows(output->colorspace, rows_data, in_stride, io->mb_w, io->mb_h, rgba, buf->stride);
+                // Update 'last_out_row_'.
+                dec->last_out_row_ += num_rows_out;
+            } else { // convert to YUVA
+                dec->last_out_row_ = io->use_scaling ? EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h)
+                                                     : EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
+            }
+            assert(dec->last_out_row_ <= output->height);
+        }
+    }
+
+    // Update 'last_row_'.
+    dec->last_row_ = row;
+    assert(dec->last_row_ <= dec->height_);
+
+    StopProfiling(&stop_watch, &timeProcessRows, &countProcessRows);
+}
+
+// Row-processing for the special case when alpha data contains only one
+// transform (color indexing), and trivial non-green literals.
+static int Is8bOptimizable(const VP8LMetadata* const hdr) {
+    int i;
+    if (hdr->color_cache_size_ > 0) return 0;
+    // When the Huffman tree contains only one symbol, we can skip the
+    // call to ReadSymbol() for red/blue/alpha channels.
+    for (i = 0; i < hdr->num_htree_groups_; ++i) {
+        HuffmanCode** const htrees = hdr->htree_groups_[i].htrees;
+        if (htrees[RED][0].bits > 0) return 0;
+        if (htrees[BLUE][0].bits > 0) return 0;
+        if (htrees[ALPHA][0].bits > 0) return 0;
+    }
+    return 1;
+}
+
+static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
+    const int num_rows = row - dec->last_row_;
+    const uint8_t* const in = (uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_;
+    if (num_rows > 0) {
+        ApplyInverseTransformsAlpha(dec, num_rows, in);
+    }
+    dec->last_row_ = dec->last_out_row_ = row;
+}
+
+//------------------------------------------------------------------------------
+// Helper functions for fast pattern copy (8b and 32b)
+
+// cyclic rotation of pattern word
+static WEBP_INLINE uint32_t Rotate8b(uint32_t V) {
+#if defined(WORDS_BIGENDIAN)
+    return ((V & 0xff000000u) >> 24) | (V << 8);
+#else
+    return ((V & 0xffu) << 24) | (V >> 8);
+#endif
+}
+
+// copy 1, 2 or 4-bytes pattern
+static WEBP_INLINE void CopySmallPattern8b(const uint8_t* src, uint8_t* dst, int length, uint32_t pattern) {
+    int i;
+    // align 'dst' to 4-bytes boundary. Adjust the pattern along the way.
+    while ((uintptr_t)dst & 3) {
+        *dst++ = *src++;
+        pattern = Rotate8b(pattern);
+        --length;
+    }
+    // Copy the pattern 4 bytes at a time.
+    for (i = 0; i < (length >> 2); ++i) {
+        ((uint32_t*)dst)[i] = pattern;
+    }
+    // Finish with left-overs. 'pattern' is still correctly positioned,
+    // so no Rotate8b() call is needed.
+    for (i <<= 2; i < length; ++i) {
+        dst[i] = src[i];
+    }
+}
+
+static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
+    const uint8_t* src = dst - dist;
+    if (length >= 8) {
+        uint32_t pattern = 0;
+        switch (dist) {
+            case 1:
+                pattern = src[0];
+#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much
+                pattern |= pattern << 8;
+                pattern |= pattern << 16;
+#elif defined(WEBP_USE_MIPS_DSP_R2)
+                __asm__ volatile("replv.qb %0, %0" : "+r"(pattern));
+#else
+                pattern = 0x01010101u * pattern;
+#endif
+                break;
+            case 2:
+                memcpy(&pattern, src, sizeof(uint16_t));
+#if defined(__arm__) || defined(_M_ARM)
+                pattern |= pattern << 16;
+#elif defined(WEBP_USE_MIPS_DSP_R2)
+                __asm__ volatile("replv.ph %0, %0" : "+r"(pattern));
+#else
+                pattern = 0x00010001u * pattern;
+#endif
+                break;
+            case 4:
+                memcpy(&pattern, src, sizeof(uint32_t));
+                break;
+            default:
+                goto Copy;
+                break;
+        }
+        CopySmallPattern8b(src, dst, length, pattern);
+        return;
+    }
+Copy:
+    if (dist >= length) { // no overlap -> use memcpy()
+        memcpy(dst, src, length * sizeof(*dst));
+    } else {
+        int i;
+        for (i = 0; i < length; ++i) dst[i] = src[i];
+    }
+}
+
+// copy pattern of 1 or 2 uint32_t's
+static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src, uint32_t* dst, int length, uint64_t pattern) {
+    int i;
+    if ((uintptr_t)dst & 4) { // Align 'dst' to 8-bytes boundary.
+        *dst++ = *src++;
+        pattern = (pattern >> 32) | (pattern << 32);
+        --length;
+    }
+    assert(0 == ((uintptr_t)dst & 7));
+    for (i = 0; i < (length >> 1); ++i) {
+        ((uint64_t*)dst)[i] = pattern; // Copy the pattern 8 bytes at a time.
+    }
+    if (length & 1) { // Finish with left-over.
+        dst[i << 1] = src[i << 1];
+    }
+}
+
+static WEBP_INLINE void CopyBlock32b(uint32_t* const dst, int dist, int length) {
+    const uint32_t* const src = dst - dist;
+    if (dist <= 2 && length >= 4 && ((uintptr_t)dst & 3) == 0) {
+        uint64_t pattern;
+        if (dist == 1) {
+            pattern = (uint64_t)src[0];
+            pattern |= pattern << 32;
+        } else {
+            memcpy(&pattern, src, sizeof(pattern));
+        }
+        CopySmallPattern32b(src, dst, length, pattern);
+    } else if (dist >= length) { // no overlap
+        memcpy(dst, src, length * sizeof(*dst));
+    } else {
+        int i;
+        for (i = 0; i < length; ++i) dst[i] = src[i];
+    }
+}
+
+//------------------------------------------------------------------------------
+
+static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data, int width, int height, int last_row) {
+    int ok = 1;
+    int row = dec->last_pixel_ / width;
+    int col = dec->last_pixel_ % width;
+    int split_rows = 0;
+    VP8LBitReader* const br = &dec->br_;
+    VP8LMetadata* const hdr = &dec->hdr_;
+    const HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
+    int pos = dec->last_pixel_;        // current position
+    const int end = width * height;    // End of data
+    const int last = width * last_row; // Last pixel to decode
+    const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
+    const int mask = hdr->huffman_mask_;
+    assert(htree_group != NULL);
+    assert(pos < end);
+    assert(last_row <= height);
+    assert(Is8bOptimizable(hdr));
+
+    // fprintf(stderr, "dec->thread_number_ %d %s %d\n", dec->thread_number_, __FUNCTION__, __LINE__);
+    if (dec->thread_number_ != 0) {
+        split_rows = height;
+    } else {
+        split_rows = NUM_ARGB_CACHE_ROWS;
+    }
+
+    while (!br->eos_ && pos < last) {
+        int code;
+        // Only update when changing tile.
+        if ((col & mask) == 0) {
+            htree_group = GetHtreeGroupForPos(hdr, col, row);
+        }
+        VP8LFillBitWindow(br);
+        code = ReadSymbol(htree_group->htrees[GREEN], br);
+        if (code < NUM_LITERAL_CODES) { // Literal
+            data[pos] = code;
+            ++pos;
+            ++col;
+            if (col >= width) {
+                col = 0;
+                ++row;
+                if (row % split_rows == 0) {
+                    ExtractPalettedAlphaRows(dec, row);
+                }
+            }
+        } else if (code < len_code_limit) { // Backward reference
+            int dist_code, dist;
+            const int length_sym = code - NUM_LITERAL_CODES;
+            const int length = GetCopyLength(length_sym, br);
+            const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br);
+            VP8LFillBitWindow(br);
+            dist_code = GetCopyDistance(dist_symbol, br);
+            dist = PlaneCodeToDistance(width, dist_code);
+            if (pos >= dist && end - pos >= length) {
+                CopyBlock8b(data + pos, dist, length);
+            } else {
+                ok = 0;
+                goto End;
+            }
+            pos += length;
+            col += length;
+            while (col >= width) {
+                col -= width;
+                ++row;
+                if (row % split_rows == 0) {
+                    ExtractPalettedAlphaRows(dec, row);
+                }
+            }
+            if (pos < last && (col & mask)) {
+                htree_group = GetHtreeGroupForPos(hdr, col, row);
+            }
+        } else { // Not reached
+            ok = 0;
+            goto End;
+        }
+        assert(br->eos_ == VP8LIsEndOfStream(br));
+    }
+    // Process the remaining rows corresponding to last row-block.
+    ExtractPalettedAlphaRows(dec, row);
+
+End:
+    if (!ok || (br->eos_ && pos < end)) {
+        ok = 0;
+        dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED : VP8_STATUS_BITSTREAM_ERROR;
+    } else {
+        dec->last_pixel_ = pos;
+    }
+    return ok;
+}
+
+static void SaveState(VP8LDecoder* const dec, int last_pixel) {
+    assert(dec->incremental_);
+    dec->saved_br_ = dec->br_;
+    dec->saved_last_pixel_ = last_pixel;
+    if (dec->hdr_.color_cache_size_ > 0) {
+        VP8LColorCacheCopy(&dec->hdr_.color_cache_, &dec->hdr_.saved_color_cache_);
+    }
+}
+
+static void RestoreState(VP8LDecoder* const dec) {
+    assert(dec->br_.eos_);
+    dec->status_ = VP8_STATUS_SUSPENDED;
+    dec->br_ = dec->saved_br_;
+    dec->last_pixel_ = dec->saved_last_pixel_;
+    if (dec->hdr_.color_cache_size_ > 0) {
+        VP8LColorCacheCopy(&dec->hdr_.saved_color_cache_, &dec->hdr_.color_cache_);
+    }
+}
+
+#define SYNC_EVERY_N_ROWS 8 // minimum number of rows between check-points
+static int DecodeImageData(
+    VP8LDecoder* const dec, uint32_t* const data, int width, int height, int last_row, ProcessRowsFunc process_func) {
+    // fprintf(stderr, "width:%d height:%d NUM_ARGB_CACHE_ROWS:%d dec->height_:%d\n", width, height,
+    // NUM_ARGB_CACHE_ROWS, dec->height_);
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    int row = dec->last_pixel_ / width;
+    int col = dec->last_pixel_ % width;
+    int split_rows = 0;
+    VP8LBitReader* const br = &dec->br_;
+    VP8LMetadata* const hdr = &dec->hdr_;
+    HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
+    uint32_t* src = data + dec->last_pixel_;
+    uint32_t* last_cached = src;
+    uint32_t* const src_end = data + width * height;    // End of data
+    uint32_t* const src_last = data + width * last_row; // Last pixel to decode
+    const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
+    const int color_cache_limit = len_code_limit + hdr->color_cache_size_;
+    int next_sync_row = dec->incremental_ ? row : 1 << 24;
+    VP8LColorCache* const color_cache = (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
+    const int mask = hdr->huffman_mask_;
+    assert(htree_group != NULL);
+    assert(src < src_end);
+    assert(src_last <= src_end);
+
+    // fprintf(stderr, "dec->thread_number_ %d %s %d\n", dec->thread_number_, __FUNCTION__, __LINE__);
+    if (dec->thread_number_ != 0) {
+        split_rows = height;
+    } else {
+        split_rows = NUM_ARGB_CACHE_ROWS;
+    }
+
+    while (src < src_last) {
+        int code;
+        if (row >= next_sync_row) {
+            SaveState(dec, (int)(src - data));
+            next_sync_row = row + SYNC_EVERY_N_ROWS;
+        }
+        // Only update when changing tile. Note we could use this test:
+        // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
+        // but that's actually slower and needs storing the previous col/row.
+        if ((col & mask) == 0) htree_group = GetHtreeGroupForPos(hdr, col, row);
+        if (htree_group->is_trivial_code) {
+            *src = htree_group->literal_arb;
+            goto AdvanceByOne;
+        }
+        VP8LFillBitWindow(br);
+        if (htree_group->use_packed_table) {
+            code = ReadPackedSymbols(htree_group, br, src);
+            if (code == PACKED_NON_LITERAL_CODE) goto AdvanceByOne;
+        } else {
+            code = ReadSymbol(htree_group->htrees[GREEN], br);
+        }
+        if (br->eos_) break;            // early out
+        if (code < NUM_LITERAL_CODES) { // Literal
+            if (htree_group->is_trivial_literal) {
+                *src = htree_group->literal_arb | (code << 8);
+            } else {
+                int red, blue, alpha;
+                red = ReadSymbol(htree_group->htrees[RED], br);
+                VP8LFillBitWindow(br);
+                blue = ReadSymbol(htree_group->htrees[BLUE], br);
+                alpha = ReadSymbol(htree_group->htrees[ALPHA], br);
+                if (br->eos_) break;
+                *src = ((uint32_t)alpha << 24) | (red << 16) | (code << 8) | blue;
+            }
+        AdvanceByOne:
+            ++src;
+            ++col;
+            if (col >= width) {
+                col = 0;
+                ++row;
+                if ((row % split_rows == 0) && (process_func != NULL)) {
+                    process_func(dec, row);
+                }
+                if (color_cache != NULL) {
+                    while (last_cached < src) {
+                        VP8LColorCacheInsert(color_cache, *last_cached++);
+                    }
+                }
+            }
+        } else if (code < len_code_limit) { // Backward reference
+            int dist_code, dist;
+            const int length_sym = code - NUM_LITERAL_CODES;
+            const int length = GetCopyLength(length_sym, br);
+            const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br);
+            VP8LFillBitWindow(br);
+            dist_code = GetCopyDistance(dist_symbol, br);
+            dist = PlaneCodeToDistance(width, dist_code);
+            if (br->eos_) break;
+            if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) {
+                goto Error;
+            } else {
+                CopyBlock32b(src, dist, length);
+            }
+            src += length;
+            col += length;
+            while (col >= width) {
+                col -= width;
+                ++row;
+                if ((row % split_rows == 0) && (process_func != NULL)) {
+                    process_func(dec, row);
+                }
+            }
+            // Because of the check done above (before 'src' was incremented by
+            // 'length'), the following holds true.
+            assert(src <= src_end);
+            if (col & mask) htree_group = GetHtreeGroupForPos(hdr, col, row);
+            if (color_cache != NULL) {
+                while (last_cached < src) {
+                    VP8LColorCacheInsert(color_cache, *last_cached++);
+                }
+            }
+        } else if (code < color_cache_limit) { // Color cache
+            const int key = code - len_code_limit;
+            assert(color_cache != NULL);
+            while (last_cached < src) {
+                VP8LColorCacheInsert(color_cache, *last_cached++);
+            }
+            *src = VP8LColorCacheLookup(color_cache, key);
+            goto AdvanceByOne;
+        } else { // Not reached
+            goto Error;
+        }
+        assert(br->eos_ == VP8LIsEndOfStream(br));
+    }
+
+    if (dec->incremental_ && br->eos_ && src < src_end) {
+        RestoreState(dec);
+    } else if (!br->eos_) {
+        // Process the remaining rows corresponding to last row-block.
+        if (process_func != NULL) {
+            process_func(dec, row);
+        }
+        dec->status_ = VP8_STATUS_OK;
+        dec->last_pixel_ = (int)(src - data); // end-of-scan marker
+    } else {
+        // if not incremental, and we are past the end of buffer (eos_=1), then this
+        // is a real bitstream error.
+        goto Error;
+    }
+
+    StopProfiling(&stop_watch, &timeDecodeImageData, &countDecodeImageData);
+
+    return 1;
+
+Error:
+    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    return 0;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LTransform
+
+static void ClearTransform(VP8LTransform* const transform) {
+    WebPSafeFree(transform->data_);
+    transform->data_ = NULL;
+}
+
+// For security reason, we need to remap the color map to span
+// the total possible bundled values, and not just the num_colors.
+static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
+    int i;
+    const int final_num_colors = 1 << (8 >> transform->bits_);
+    uint32_t* const new_color_map = (uint32_t*)WebPSafeMalloc((uint64_t)final_num_colors, sizeof(*new_color_map));
+    if (new_color_map == NULL) {
+        return 0;
+    } else {
+        uint8_t* const data = (uint8_t*)transform->data_;
+        uint8_t* const new_data = (uint8_t*)new_color_map;
+        new_color_map[0] = transform->data_[0];
+        for (i = 4; i < 4 * num_colors; ++i) {
+            // Equivalent to AddPixelEq(), on a byte-basis.
+            new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
+        }
+        for (; i < 4 * final_num_colors; ++i) new_data[i] = 0; // black tail.
+        WebPSafeFree(transform->data_);
+        transform->data_ = new_color_map;
+    }
+    return 1;
+}
+
+static int ReadTransform(int* const xsize, int const* ysize, VP8LDecoder* const dec) {
+    int ok = 1;
+    VP8LBitReader* const br = &dec->br_;
+    VP8LTransform* transform = &dec->transforms_[dec->next_transform_];
+    const VP8LImageTransformType type = (VP8LImageTransformType)VP8LReadBits(br, 2);
+
+    // Each transform type can only be present once in the stream.
+    if (dec->transforms_seen_ & (1U << type)) {
+        return 0; // Already there, let's not accept the second same transform.
+    }
+    dec->transforms_seen_ |= (1U << type);
+
+    transform->type_ = type;
+    transform->xsize_ = *xsize;
+    transform->ysize_ = *ysize;
+    transform->data_ = NULL;
+    ++dec->next_transform_;
+    assert(dec->next_transform_ <= NUM_TRANSFORMS);
+
+    switch (type) {
+        case PREDICTOR_TRANSFORM:
+        case CROSS_COLOR_TRANSFORM:
+            transform->bits_ = VP8LReadBits(br, 3) + 2;
+            ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize_, transform->bits_),
+                                   VP8LSubSampleSize(transform->ysize_, transform->bits_), 0, dec, &transform->data_);
+            break;
+        case COLOR_INDEXING_TRANSFORM: {
+            const int num_colors = VP8LReadBits(br, 8) + 1;
+            const int bits = (num_colors > 16) ? 0 : (num_colors > 4) ? 1 : (num_colors > 2) ? 2 : 3;
+            *xsize = VP8LSubSampleSize(transform->xsize_, bits);
+            transform->bits_ = bits;
+            ok = DecodeImageStream(num_colors, 1, 0, dec, &transform->data_);
+            ok = ok && ExpandColorMap(num_colors, transform);
+            break;
+        }
+        case SUBTRACT_GREEN:
+            break;
+        default:
+            assert(0); // can't happen
+            break;
+    }
+
+    return ok;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LMetadata
+
+static void InitMetadata(VP8LMetadata* const hdr) {
+    assert(hdr != NULL);
+    memset(hdr, 0, sizeof(*hdr));
+}
+
+static void ClearMetadata(VP8LMetadata* const hdr) {
+    assert(hdr != NULL);
+
+    WebPSafeFree(hdr->huffman_image_);
+    WebPSafeFree(hdr->huffman_tables_);
+    VP8LHtreeGroupsFree(hdr->htree_groups_);
+    VP8LColorCacheClear(&hdr->color_cache_);
+    VP8LColorCacheClear(&hdr->saved_color_cache_);
+    InitMetadata(hdr);
+}
+
+// -----------------------------------------------------------------------------
+// VP8LDecoder
+
+VP8LDecoder* VP8LNew(void) {
+    VP8LDecoder* const dec = (VP8LDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+    if (dec == NULL) return NULL;
+    dec->status_ = VP8_STATUS_OK;
+    dec->state_ = READ_DIM;
+
+    VP8LDspInit(); // Init critical function pointers.
+
+    return dec;
+}
+
+void VP8LClear(VP8LDecoder* const dec) {
+    int i;
+    if (dec == NULL) return;
+    ClearMetadata(&dec->hdr_);
+
+    WebPSafeFree(dec->pixels_);
+    dec->pixels_ = NULL;
+    for (i = 0; i < dec->next_transform_; ++i) {
+        ClearTransform(&dec->transforms_[i]);
+    }
+    dec->next_transform_ = 0;
+    dec->transforms_seen_ = 0;
+
+    WebPSafeFree(dec->rescaler_memory);
+    dec->rescaler_memory = NULL;
+
+    dec->output_ = NULL; // leave no trace behind
+}
+
+void VP8LDelete(VP8LDecoder* const dec) {
+    if (dec != NULL) {
+        VP8LClear(dec);
+        WebPSafeFree(dec);
+    }
+}
+
+static void UpdateDecoder(VP8LDecoder* const dec, int width, int height) {
+    VP8LMetadata* const hdr = &dec->hdr_;
+    const int num_bits = hdr->huffman_subsample_bits_;
+    dec->width_ = width;
+    dec->height_ = height;
+
+    hdr->huffman_xsize_ = VP8LSubSampleSize(width, num_bits);
+    hdr->huffman_mask_ = (num_bits == 0) ? ~0 : (1 << num_bits) - 1;
+}
+
+static int DecodeImageStream(
+    int xsize, int ysize, int is_level0, VP8LDecoder* const dec, uint32_t** const decoded_data) {
+    int ok = 1;
+    int transform_xsize = xsize;
+    int transform_ysize = ysize;
+    VP8LBitReader* const br = &dec->br_;
+    VP8LMetadata* const hdr = &dec->hdr_;
+    uint32_t* data = NULL;
+    int color_cache_bits = 0;
+
+    // Read the transforms (may recurse).
+    if (is_level0) {
+        while (ok && VP8LReadBits(br, 1)) {
+            ok = ReadTransform(&transform_xsize, &transform_ysize, dec);
+        }
+    }
+
+    // Color cache
+    if (ok && VP8LReadBits(br, 1)) {
+        color_cache_bits = VP8LReadBits(br, 4);
+        ok = (color_cache_bits >= 1 && color_cache_bits <= MAX_CACHE_BITS);
+        if (!ok) {
+            dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+            goto End;
+        }
+    }
+
+    // Read the Huffman codes (may recurse).
+    ok = ok && ReadHuffmanCodes(dec, transform_xsize, transform_ysize, color_cache_bits, is_level0);
+    if (!ok) {
+        dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+        goto End;
+    }
+
+    // Finish setting up the color-cache
+    if (color_cache_bits > 0) {
+        hdr->color_cache_size_ = 1 << color_cache_bits;
+        if (!VP8LColorCacheInit(&hdr->color_cache_, color_cache_bits)) {
+            dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+            ok = 0;
+            goto End;
+        }
+    } else {
+        hdr->color_cache_size_ = 0;
+    }
+    UpdateDecoder(dec, transform_xsize, transform_ysize);
+
+    if (is_level0) { // level 0 complete
+        dec->state_ = READ_HDR;
+        goto End;
+    }
+
+    {
+        const uint64_t total_size = (uint64_t)transform_xsize * transform_ysize;
+        data = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*data));
+        if (data == NULL) {
+            dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+            ok = 0;
+            goto End;
+        }
+    }
+
+    // Use the Huffman trees to decode the LZ77 encoded data.
+    ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, transform_ysize, NULL);
+    ok = ok && !br->eos_;
+
+End:
+    if (!ok) {
+        WebPSafeFree(data);
+        ClearMetadata(hdr);
+    } else {
+        if (decoded_data != NULL) {
+            *decoded_data = data;
+        } else {
+            // We allocate image data in this function only for transforms. At level 0
+            // (that is: not the transforms), we shouldn't have allocated anything.
+            assert(data == NULL);
+            assert(is_level0);
+        }
+        dec->last_pixel_ = 0;               // Reset for future DECODE_DATA_FUNC() calls.
+        if (!is_level0) ClearMetadata(hdr); // Clean up temporary data behind.
+    }
+    return ok;
+}
+
+//------------------------------------------------------------------------------
+// Allocate internal buffers dec->pixels_ and dec->argb_cache_.
+static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
+    const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_;
+    // Scratch buffer corresponding to top-prediction row for transforming the
+    // first row in the row-blocks. Not needed for paletted alpha.
+    const uint64_t cache_top_pixels = (uint16_t)final_width;
+    // Scratch buffer for temporary BGRA storage. Not needed for paletted alpha.
+    // fprintf(stderr, "dec->thread_number_ %d %s %d\n", dec->thread_number_, __FUNCTION__, __LINE__);
+    uint64_t cache_pixels = 0;
+    if (dec->thread_number_ != 0) {
+        cache_pixels = (uint64_t)final_width * dec->height_;
+    } else {
+        cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
+    }
+
+    const uint64_t total_num_pixels = num_pixels + cache_top_pixels + cache_pixels;
+
+    // fprintf(stderr, "dec->width_:%d dec->height_:%d final_width:%d %s %d\n",
+    //         dec->width_, dec->height_, final_width, __FUNCTION__, __LINE__);
+
+    assert(dec->width_ <= final_width);
+    dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t));
+    if (dec->pixels_ == NULL) {
+        dec->argb_cache_ = NULL; // for sanity check
+        dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+        return 0;
+    }
+
+    if (dec->thread_number_ != 0) {
+        dec->argb_cache_ = dec->pixels_;
+    } else {
+        dec->argb_cache_ = dec->pixels_ + num_pixels + cache_top_pixels;
+    }
+
+    return 1;
+}
+
+static int AllocateInternalBuffers8b(VP8LDecoder* const dec) {
+    const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_;
+    dec->argb_cache_ = NULL; // for sanity check
+    dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t));
+    if (dec->pixels_ == NULL) {
+        dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+        return 0;
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+
+// Special row-processing that only stores the alpha data.
+static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
+    const int num_rows = row - dec->last_row_;
+    const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_;
+
+    if (num_rows <= 0) return; // Nothing to be done.
+    ApplyInverseTransforms(dec, num_rows, in);
+
+    // Extract alpha (which is stored in the green plane).
+    {
+        const int width = dec->io_->width; // the final width (!= dec->width_)
+        const int cache_pixs = width * num_rows;
+        uint8_t* const dst = (uint8_t*)dec->io_->opaque + width * dec->last_row_;
+        const uint32_t* const src = dec->argb_cache_;
+        int i;
+        for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
+    }
+    dec->last_row_ = dec->last_out_row_ = row;
+}
+
+int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
+                          const uint8_t* const data,
+                          size_t data_size,
+                          uint8_t* const output) {
+    int ok = 0;
+    VP8LDecoder* dec;
+    VP8Io* io;
+    assert(alph_dec != NULL);
+    alph_dec->vp8l_dec_ = VP8LNew();
+    if (alph_dec->vp8l_dec_ == NULL) return 0;
+    dec = alph_dec->vp8l_dec_;
+
+    dec->width_ = alph_dec->width_;
+    dec->height_ = alph_dec->height_;
+    dec->io_ = &alph_dec->io_;
+    io = dec->io_;
+
+    VP8InitIo(io);
+    WebPInitCustomIo(NULL, io); // Just a sanity Init. io won't be used.
+    io->opaque = output;
+    io->width = alph_dec->width_;
+    io->height = alph_dec->height_;
+
+    dec->status_ = VP8_STATUS_OK;
+    VP8LInitBitReader(&dec->br_, data, data_size);
+
+    if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) {
+        goto Err;
+    }
+
+    // Special case: if alpha data uses only the color indexing transform and
+    // doesn't use color cache (a frequent case), we will use DecodeAlphaData()
+    // method that only needs allocation of 1 byte per pixel (alpha channel).
+    if (dec->next_transform_ == 1 && dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM &&
+        Is8bOptimizable(&dec->hdr_)) {
+        alph_dec->use_8b_decode = 1;
+        ok = AllocateInternalBuffers8b(dec);
+    } else {
+        // Allocate internal buffers (note that dec->width_ may have changed here).
+        alph_dec->use_8b_decode = 0;
+        ok = AllocateInternalBuffers32b(dec, alph_dec->width_);
+    }
+
+    if (!ok) goto Err;
+
+    return 1;
+
+Err:
+    VP8LDelete(alph_dec->vp8l_dec_);
+    alph_dec->vp8l_dec_ = NULL;
+    return 0;
+}
+
+int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) {
+    VP8LDecoder* const dec = alph_dec->vp8l_dec_;
+    assert(dec != NULL);
+    assert(last_row <= dec->height_);
+
+    if (dec->last_pixel_ == dec->width_ * dec->height_) {
+        return 1; // done
+    }
+
+    // Decode (with special row processing).
+    return alph_dec->use_8b_decode
+               ? DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_, last_row)
+               : DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_, last_row, ExtractAlphaRows);
+}
+
+//------------------------------------------------------------------------------
+
+int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
+    int width, height, has_alpha;
+
+    if (dec == NULL) return 0;
+    if (io == NULL) {
+        dec->status_ = VP8_STATUS_INVALID_PARAM;
+        return 0;
+    }
+
+    dec->io_ = io;
+    dec->status_ = VP8_STATUS_OK;
+    VP8LInitBitReader(&dec->br_, io->data, io->data_size);
+    if (!ReadImageInfo(&dec->br_, &width, &height, &has_alpha)) {
+        dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+        goto Error;
+    }
+    dec->state_ = READ_DIM;
+    io->width = width;
+    io->height = height;
+
+    if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error;
+    return 1;
+
+Error:
+    VP8LClear(dec);
+    assert(dec->status_ != VP8_STATUS_OK);
+    return 0;
+}
+
+int VP8LDecodeImage(VP8LDecoder* const dec) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    VP8Io* io = NULL;
+    WebPDecParams* params = NULL;
+
+    // Sanity checks.
+    if (dec == NULL) return 0;
+
+    assert(dec->hdr_.huffman_tables_ != NULL);
+    assert(dec->hdr_.htree_groups_ != NULL);
+    assert(dec->hdr_.num_htree_groups_ > 0);
+
+    io = dec->io_;
+    assert(io != NULL);
+    params = (WebPDecParams*)io->opaque;
+    assert(params != NULL);
+
+    // Initialization.
+    if (dec->state_ != READ_DATA) {
+        dec->output_ = params->output;
+        dec->thread_number_ = params->options->thread_number;
+        // fprintf(stderr, "dec->thread_number_ %d %s %d\n", dec->thread_number_, __FUNCTION__, __LINE__);
+        assert(dec->output_ != NULL);
+
+        if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) {
+            dec->status_ = VP8_STATUS_INVALID_PARAM;
+            goto Err;
+        }
+
+        if (!AllocateInternalBuffers32b(dec, io->width)) goto Err;
+
+        if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
+
+        if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
+            // need the alpha-multiply functions for premultiplied output or rescaling
+            WebPInitAlphaProcessing();
+        }
+        if (!WebPIsRGBMode(dec->output_->colorspace)) {
+            WebPInitConvertARGBToYUV();
+            if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing();
+        }
+        if (dec->incremental_) {
+            if (dec->hdr_.color_cache_size_ > 0 && dec->hdr_.saved_color_cache_.colors_ == NULL) {
+                if (!VP8LColorCacheInit(&dec->hdr_.saved_color_cache_, dec->hdr_.color_cache_.hash_bits_)) {
+                    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+                    goto Err;
+                }
+            }
+        }
+        dec->state_ = READ_DATA;
+    }
+
+    // Decode.
+    if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_, dec->height_, ProcessRows)) {
+        goto Err;
+    }
+
+    params->last_y = dec->last_out_row_;
+
+    StopProfiling(&stop_watch, &timeVP8LDecodeImage, &countVP8LDecodeImage);
+
+    return 1;
+
+Err:
+    VP8LClear(dec);
+    assert(dec->status_ != VP8_STATUS_OK);
+    return 0;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dec/vp8li.h b/codec/L2/demos/webpEnc/host/src/dec/vp8li.h
new file mode 100644
index 0000000000..d832841c64
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/vp8li.h
@@ -0,0 +1,138 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Lossless decoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora(vikaas.arora@gmail.com)
+
+#ifndef WEBP_DEC_VP8LI_H_
+#define WEBP_DEC_VP8LI_H_
+
+#include <string.h> // for memcpy()
+#include "./webpi.h"
+#include "../utils/bit_reader.h"
+#include "../utils/color_cache.h"
+#include "../utils/huffman.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum { READ_DATA = 0, READ_HDR = 1, READ_DIM = 2 } VP8LDecodeState;
+
+typedef struct VP8LTransform VP8LTransform;
+struct VP8LTransform {
+    VP8LImageTransformType type_; // transform type.
+    int bits_;                    // subsampling bits defining transform window.
+    int xsize_;                   // transform window X index.
+    int ysize_;                   // transform window Y index.
+    uint32_t* data_;              // transform data.
+};
+
+typedef struct {
+    int color_cache_size_;
+    VP8LColorCache color_cache_;
+    VP8LColorCache saved_color_cache_; // for incremental
+
+    int huffman_mask_;
+    int huffman_subsample_bits_;
+    int huffman_xsize_;
+    uint32_t* huffman_image_;
+    int num_htree_groups_;
+    HTreeGroup* htree_groups_;
+    HuffmanCode* huffman_tables_;
+} VP8LMetadata;
+
+typedef struct VP8LDecoder VP8LDecoder;
+struct VP8LDecoder {
+    VP8StatusCode status_;
+    VP8LDecodeState state_;
+    VP8Io* io_;
+
+    const WebPDecBuffer* output_; // shortcut to io->opaque->output
+
+    uint32_t* pixels_;     // Internal data: either uint8_t* for alpha
+                           // or uint32_t* for BGRA.
+    uint32_t* argb_cache_; // Scratch buffer for temporary BGRA storage.
+
+    VP8LBitReader br_;
+    int incremental_;        // if true, incremental decoding is expected
+    VP8LBitReader saved_br_; // note: could be local variables too
+    int saved_last_pixel_;
+
+    int width_;
+    int height_;
+    int last_row_;     // last input row decoded so far.
+    int last_pixel_;   // last pixel decoded so far. However, it may
+                       // not be transformed, scaled and
+                       // color-converted yet.
+    int last_out_row_; // last row output so far.
+
+    VP8LMetadata hdr_;
+
+    int next_transform_;
+    VP8LTransform transforms_[NUM_TRANSFORMS];
+    // or'd bitset storing the transforms types.
+    uint32_t transforms_seen_;
+
+    uint8_t* rescaler_memory; // Working memory for rescaling work.
+    WebPRescaler* rescaler;   // Common rescaler for all channels.
+    int thread_number_;       // multi-thread for lossless
+
+    // WebPWorker *workers;
+    // TileWorkerData *tile_worker_data;
+    // TileBuffer tile_buffers[64];
+    // int num_tile_workers;
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+struct ALPHDecoder; // Defined in dec/alphai.h.
+
+// in vp8l.c
+
+// Decodes image header for alpha data stored using lossless compression.
+// Returns false in case of error.
+int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec,
+                          const uint8_t* const data,
+                          size_t data_size,
+                          uint8_t* const output);
+
+// Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are
+// already decoded in previous call(s), it will resume decoding from where it
+// was paused.
+// Returns false in case of bitstream error.
+int VP8LDecodeAlphaImageStream(struct ALPHDecoder* const alph_dec, int last_row);
+
+// Allocates and initialize a new lossless decoder instance.
+VP8LDecoder* VP8LNew(void);
+
+// Decodes the image header. Returns false in case of error.
+int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);
+
+// Decodes an image. It's required to decode the lossless header before calling
+// this function. Returns false in case of error, with updated dec->status_.
+int VP8LDecodeImage(VP8LDecoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Preserves the dec->status_ value.
+void VP8LClear(VP8LDecoder* const dec);
+
+// Clears and deallocate a lossless decoder instance.
+void VP8LDelete(VP8LDecoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_DEC_VP8LI_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/dec/webp.c b/codec/L2/demos/webpEnc/host/src/dec/webp.c
new file mode 100644
index 0000000000..c48593d1fc
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/webp.c
@@ -0,0 +1,829 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Main decoding functions for WEBP images.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "./webpi.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+#include "../webp/mux_types.h" // ALPHA_FLAG
+
+//------------------------------------------------------------------------------
+// RIFF layout is:
+//   Offset  tag
+//   0...3   "RIFF" 4-byte tag
+//   4...7   size of image data (including metadata) starting at offset 8
+//   8...11  "WEBP"   our form-type signature
+// The RIFF container (12 bytes) is followed by appropriate chunks:
+//   12..15  "VP8 ": 4-bytes tags, signaling the use of VP8 video format
+//   16..19  size of the raw VP8 image data, starting at offset 20
+//   20....  the VP8 bytes
+// Or,
+//   12..15  "VP8L": 4-bytes tags, signaling the use of VP8L lossless format
+//   16..19  size of the raw VP8L image data, starting at offset 20
+//   20....  the VP8L bytes
+// Or,
+//   12..15  "VP8X": 4-bytes tags, describing the extended-VP8 chunk.
+//   16..19  size of the VP8X chunk starting at offset 20.
+//   20..23  VP8X flags bit-map corresponding to the chunk-types present.
+//   24..26  Width of the Canvas Image.
+//   27..29  Height of the Canvas Image.
+// There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8,
+// VP8L, XMP, EXIF  ...)
+// All sizes are in little-endian order.
+// Note: chunk data size must be padded to multiple of 2 when written.
+
+// Validates the RIFF container (if detected) and skips over it.
+// If a RIFF container is detected, returns:
+//     VP8_STATUS_BITSTREAM_ERROR for invalid header,
+//     VP8_STATUS_NOT_ENOUGH_DATA for truncated data if have_all_data is true,
+// and VP8_STATUS_OK otherwise.
+// In case there are not enough bytes (partial RIFF container), return 0 for
+// *riff_size. Else return the RIFF size extracted from the header.
+static VP8StatusCode ParseRIFF(const uint8_t** const data,
+                               size_t* const data_size,
+                               int have_all_data,
+                               size_t* const riff_size) {
+    assert(data != NULL);
+    assert(data_size != NULL);
+    assert(riff_size != NULL);
+
+    *riff_size = 0; // Default: no RIFF present.
+    if (*data_size >= RIFF_HEADER_SIZE && !memcmp(*data, "RIFF", TAG_SIZE)) {
+        if (memcmp(*data + 8, "WEBP", TAG_SIZE)) {
+            return VP8_STATUS_BITSTREAM_ERROR; // Wrong image file signature.
+        } else {
+            const uint32_t size = GetLE32(*data + TAG_SIZE);
+            // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn").
+            if (size < TAG_SIZE + CHUNK_HEADER_SIZE) {
+                return VP8_STATUS_BITSTREAM_ERROR;
+            }
+            if (size > MAX_CHUNK_PAYLOAD) {
+                return VP8_STATUS_BITSTREAM_ERROR;
+            }
+            if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
+                return VP8_STATUS_NOT_ENOUGH_DATA; // Truncated bitstream.
+            }
+            // We have a RIFF container. Skip it.
+            *riff_size = size;
+            *data += RIFF_HEADER_SIZE;
+            *data_size -= RIFF_HEADER_SIZE;
+        }
+    }
+    return VP8_STATUS_OK;
+}
+
+// Validates the VP8X header and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid VP8X header,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If a VP8X chunk is found, found_vp8x is set to true and *width_ptr,
+// *height_ptr and *flags_ptr are set to the corresponding values extracted
+// from the VP8X chunk.
+static VP8StatusCode ParseVP8X(const uint8_t** const data,
+                               size_t* const data_size,
+                               int* const found_vp8x,
+                               int* const width_ptr,
+                               int* const height_ptr,
+                               uint32_t* const flags_ptr) {
+    const uint32_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+    assert(data != NULL);
+    assert(data_size != NULL);
+    assert(found_vp8x != NULL);
+
+    *found_vp8x = 0;
+
+    if (*data_size < CHUNK_HEADER_SIZE) {
+        return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data.
+    }
+
+    if (!memcmp(*data, "VP8X", TAG_SIZE)) {
+        int width, height;
+        uint32_t flags;
+        const uint32_t chunk_size = GetLE32(*data + TAG_SIZE);
+        if (chunk_size != VP8X_CHUNK_SIZE) {
+            return VP8_STATUS_BITSTREAM_ERROR; // Wrong chunk size.
+        }
+
+        // Verify if enough data is available to validate the VP8X chunk.
+        if (*data_size < vp8x_size) {
+            return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data.
+        }
+        flags = GetLE32(*data + 8);
+        width = 1 + GetLE24(*data + 12);
+        height = 1 + GetLE24(*data + 15);
+        if (width * (uint64_t)height >= MAX_IMAGE_AREA) {
+            return VP8_STATUS_BITSTREAM_ERROR; // image is too large
+        }
+
+        if (flags_ptr != NULL) *flags_ptr = flags;
+        if (width_ptr != NULL) *width_ptr = width;
+        if (height_ptr != NULL) *height_ptr = height;
+        // Skip over VP8X header bytes.
+        *data += vp8x_size;
+        *data_size -= vp8x_size;
+        *found_vp8x = 1;
+    }
+    return VP8_STATUS_OK;
+}
+
+// Skips to the next VP8/VP8L chunk header in the data given the size of the
+// RIFF chunk 'riff_size'.
+// Returns VP8_STATUS_BITSTREAM_ERROR if any invalid chunk size is encountered,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If an alpha chunk is found, *alpha_data and *alpha_size are set
+// appropriately.
+static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
+                                         size_t* const data_size,
+                                         size_t const riff_size,
+                                         const uint8_t** const alpha_data,
+                                         size_t* const alpha_size) {
+    const uint8_t* buf;
+    size_t buf_size;
+    uint32_t total_size = TAG_SIZE +          // "WEBP".
+                          CHUNK_HEADER_SIZE + // "VP8Xnnnn".
+                          VP8X_CHUNK_SIZE;    // data.
+    assert(data != NULL);
+    assert(data_size != NULL);
+    buf = *data;
+    buf_size = *data_size;
+
+    assert(alpha_data != NULL);
+    assert(alpha_size != NULL);
+    *alpha_data = NULL;
+    *alpha_size = 0;
+
+    while (1) {
+        uint32_t chunk_size;
+        uint32_t disk_chunk_size; // chunk_size with padding
+
+        *data = buf;
+        *data_size = buf_size;
+
+        if (buf_size < CHUNK_HEADER_SIZE) { // Insufficient data.
+            return VP8_STATUS_NOT_ENOUGH_DATA;
+        }
+
+        chunk_size = GetLE32(buf + TAG_SIZE);
+        if (chunk_size > MAX_CHUNK_PAYLOAD) {
+            return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
+        }
+        // For odd-sized chunk-payload, there's one byte padding at the end.
+        disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1;
+        total_size += disk_chunk_size;
+
+        // Check that total bytes skipped so far does not exceed riff_size.
+        if (riff_size > 0 && (total_size > riff_size)) {
+            return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
+        }
+
+        // Start of a (possibly incomplete) VP8/VP8L chunk implies that we have
+        // parsed all the optional chunks.
+        // Note: This check must occur before the check 'buf_size < disk_chunk_size'
+        // below to allow incomplete VP8/VP8L chunks.
+        if (!memcmp(buf, "VP8 ", TAG_SIZE) || !memcmp(buf, "VP8L", TAG_SIZE)) {
+            return VP8_STATUS_OK;
+        }
+
+        if (buf_size < disk_chunk_size) { // Insufficient data.
+            return VP8_STATUS_NOT_ENOUGH_DATA;
+        }
+
+        if (!memcmp(buf, "ALPH", TAG_SIZE)) { // A valid ALPH header.
+            *alpha_data = buf + CHUNK_HEADER_SIZE;
+            *alpha_size = chunk_size;
+        }
+
+        // We have a full and valid chunk; skip it.
+        buf += disk_chunk_size;
+        buf_size -= disk_chunk_size;
+    }
+}
+
+// Validates the VP8/VP8L Header ("VP8 nnnn" or "VP8L nnnn") and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid (chunk larger than
+//         riff_size) VP8/VP8L header,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If a VP8/VP8L chunk is found, *chunk_size is set to the total number of bytes
+// extracted from the VP8/VP8L chunk header.
+// The flag '*is_lossless' is set to 1 in case of VP8L chunk / raw VP8L data.
+static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
+                                    size_t* const data_size,
+                                    int have_all_data,
+                                    size_t riff_size,
+                                    size_t* const chunk_size,
+                                    int* const is_lossless) {
+    const uint8_t* const data = *data_ptr;
+    const int is_vp8 = !memcmp(data, "VP8 ", TAG_SIZE);
+    const int is_vp8l = !memcmp(data, "VP8L", TAG_SIZE);
+    const uint32_t minimal_size = TAG_SIZE + CHUNK_HEADER_SIZE; // "WEBP" + "VP8 nnnn" OR
+                                                                // "WEBP" + "VP8Lnnnn"
+    assert(data != NULL);
+    assert(data_size != NULL);
+    assert(chunk_size != NULL);
+    assert(is_lossless != NULL);
+
+    if (*data_size < CHUNK_HEADER_SIZE) {
+        return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data.
+    }
+
+    if (is_vp8 || is_vp8l) {
+        // Bitstream contains VP8/VP8L header.
+        const uint32_t size = GetLE32(data + TAG_SIZE);
+        if ((riff_size >= minimal_size) && (size > riff_size - minimal_size)) {
+            return VP8_STATUS_BITSTREAM_ERROR; // Inconsistent size information.
+        }
+        if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
+            return VP8_STATUS_NOT_ENOUGH_DATA; // Truncated bitstream.
+        }
+        // Skip over CHUNK_HEADER_SIZE bytes from VP8/VP8L Header.
+        *chunk_size = size;
+        *data_ptr += CHUNK_HEADER_SIZE;
+        *data_size -= CHUNK_HEADER_SIZE;
+        *is_lossless = is_vp8l;
+    } else {
+        // Raw VP8/VP8L bitstream (no header).
+        *is_lossless = VP8LCheckSignature(data, *data_size);
+        *chunk_size = *data_size;
+    }
+
+    return VP8_STATUS_OK;
+}
+
+//------------------------------------------------------------------------------
+
+// Fetch '*width', '*height', '*has_alpha' and fill out 'headers' based on
+// 'data'. All the output parameters may be NULL. If 'headers' is NULL only the
+// minimal amount will be read to fetch the remaining parameters.
+// If 'headers' is non-NULL this function will attempt to locate both alpha
+// data (with or without a VP8X chunk) and the bitstream chunk (VP8/VP8L).
+// Note: The following chunk sequences (before the raw VP8/VP8L data) are
+// considered valid by this function:
+// RIFF + VP8(L)
+// RIFF + VP8X + (optional chunks) + VP8(L)
+// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
+// VP8(L)     <-- Not a valid WebP format: only allowed for internal purpose.
+static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
+                                          size_t data_size,
+                                          int* const width,
+                                          int* const height,
+                                          int* const has_alpha,
+                                          int* const has_animation,
+                                          int* const format,
+                                          WebPHeaderStructure* const headers) {
+    int canvas_width = 0;
+    int canvas_height = 0;
+    int image_width = 0;
+    int image_height = 0;
+    int found_riff = 0;
+    int found_vp8x = 0;
+    int animation_present = 0;
+    int fragments_present = 0;
+    const int have_all_data = (headers != NULL) ? headers->have_all_data : 0;
+
+    VP8StatusCode status;
+    WebPHeaderStructure hdrs;
+
+    if (data == NULL || data_size < RIFF_HEADER_SIZE) {
+        return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+    memset(&hdrs, 0, sizeof(hdrs));
+    hdrs.data = data;
+    hdrs.data_size = data_size;
+
+    // Skip over RIFF header.
+    status = ParseRIFF(&data, &data_size, have_all_data, &hdrs.riff_size);
+    if (status != VP8_STATUS_OK) {
+        return status; // Wrong RIFF header / insufficient data.
+    }
+    found_riff = (hdrs.riff_size > 0);
+
+    // Skip over VP8X.
+    {
+        uint32_t flags = 0;
+        status = ParseVP8X(&data, &data_size, &found_vp8x, &canvas_width, &canvas_height, &flags);
+        if (status != VP8_STATUS_OK) {
+            return status; // Wrong VP8X / insufficient data.
+        }
+        animation_present = !!(flags & ANIMATION_FLAG);
+        fragments_present = !!(flags & FRAGMENTS_FLAG);
+        if (!found_riff && found_vp8x) {
+            // Note: This restriction may be removed in the future, if it becomes
+            // necessary to send VP8X chunk to the decoder.
+            return VP8_STATUS_BITSTREAM_ERROR;
+        }
+        if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
+        if (has_animation != NULL) *has_animation = animation_present;
+        if (format != NULL) *format = 0; // default = undefined
+
+        image_width = canvas_width;
+        image_height = canvas_height;
+        if (found_vp8x && (animation_present || fragments_present) && headers == NULL) {
+            status = VP8_STATUS_OK;
+            goto ReturnWidthHeight; // Just return features from VP8X header.
+        }
+    }
+
+    if (data_size < TAG_SIZE) {
+        status = VP8_STATUS_NOT_ENOUGH_DATA;
+        goto ReturnWidthHeight;
+    }
+
+    // Skip over optional chunks if data started with "RIFF + VP8X" or "ALPH".
+    if ((found_riff && found_vp8x) || (!found_riff && !found_vp8x && !memcmp(data, "ALPH", TAG_SIZE))) {
+        status = ParseOptionalChunks(&data, &data_size, hdrs.riff_size, &hdrs.alpha_data, &hdrs.alpha_data_size);
+        if (status != VP8_STATUS_OK) {
+            goto ReturnWidthHeight; // Invalid chunk size / insufficient data.
+        }
+    }
+
+    // Skip over VP8/VP8L header.
+    status = ParseVP8Header(&data, &data_size, have_all_data, hdrs.riff_size, &hdrs.compressed_size, &hdrs.is_lossless);
+    if (status != VP8_STATUS_OK) {
+        goto ReturnWidthHeight; // Wrong VP8/VP8L chunk-header / insufficient data.
+    }
+    if (hdrs.compressed_size > MAX_CHUNK_PAYLOAD) {
+        return VP8_STATUS_BITSTREAM_ERROR;
+    }
+
+    if (format != NULL && !(animation_present || fragments_present)) {
+        *format = hdrs.is_lossless ? 2 : 1;
+    }
+
+    if (!hdrs.is_lossless) {
+        if (data_size < VP8_FRAME_HEADER_SIZE) {
+            status = VP8_STATUS_NOT_ENOUGH_DATA;
+            goto ReturnWidthHeight;
+        }
+        // Validates raw VP8 data.
+        if (!VP8GetInfo(data, data_size, (uint32_t)hdrs.compressed_size, &image_width, &image_height)) {
+            return VP8_STATUS_BITSTREAM_ERROR;
+        }
+    } else {
+        if (data_size < VP8L_FRAME_HEADER_SIZE) {
+            status = VP8_STATUS_NOT_ENOUGH_DATA;
+            goto ReturnWidthHeight;
+        }
+        // Validates raw VP8L data.
+        if (!VP8LGetInfo(data, data_size, &image_width, &image_height, has_alpha)) {
+            return VP8_STATUS_BITSTREAM_ERROR;
+        }
+    }
+    // Validates image size coherency.
+    if (found_vp8x) {
+        if (canvas_width != image_width || canvas_height != image_height) {
+            return VP8_STATUS_BITSTREAM_ERROR;
+        }
+    }
+    if (headers != NULL) {
+        *headers = hdrs;
+        headers->offset = data - headers->data;
+        assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
+        assert(headers->offset == headers->data_size - data_size);
+    }
+ReturnWidthHeight:
+    if (status == VP8_STATUS_OK || (status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) {
+        if (has_alpha != NULL) {
+            // If the data did not contain a VP8X/VP8L chunk the only definitive way
+            // to set this is by looking for alpha data (from an ALPH chunk).
+            *has_alpha |= (hdrs.alpha_data != NULL);
+        }
+        if (width != NULL) *width = image_width;
+        if (height != NULL) *height = image_height;
+        return VP8_STATUS_OK;
+    } else {
+        return status;
+    }
+}
+
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
+    VP8StatusCode status;
+    int has_animation = 0;
+    assert(headers != NULL);
+    // fill out headers, ignore width/height/has_alpha.
+    status = ParseHeadersInternal(headers->data, headers->data_size, NULL, NULL, NULL, &has_animation, NULL, headers);
+    if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+        // TODO(jzern): full support of animation frames will require API additions.
+        if (has_animation) {
+            status = VP8_STATUS_UNSUPPORTED_FEATURE;
+        }
+    }
+    return status;
+}
+
+//------------------------------------------------------------------------------
+// WebPDecParams
+
+void WebPResetDecParams(WebPDecParams* const params) {
+    if (params != NULL) {
+        memset(params, 0, sizeof(*params));
+    }
+}
+
+//------------------------------------------------------------------------------
+// "Into" decoding variants
+
+// Main flow
+static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size, WebPDecParams* const params) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    VP8StatusCode status;
+    VP8Io io;
+    WebPHeaderStructure headers;
+
+    headers.data = data;
+    headers.data_size = data_size;
+    headers.have_all_data = 1;
+    status = WebPParseHeaders(&headers); // Process Pre-VP8 chunks.
+    if (status != VP8_STATUS_OK) {
+        StopProfiling(&stop_watch, &timeDecodeInto, &countDecodeInto);
+        return status;
+    }
+
+    assert(params != NULL);
+    VP8InitIo(&io);
+    io.data = headers.data + headers.offset;
+    io.data_size = headers.data_size - headers.offset;
+    WebPInitCustomIo(params, &io); // Plug the I/O functions.
+
+    if (!headers.is_lossless) {
+        VP8Decoder* const dec = VP8New();
+        if (dec == NULL) {
+            StopProfiling(&stop_watch, &timeDecodeInto, &countDecodeInto);
+            return VP8_STATUS_OUT_OF_MEMORY;
+        }
+        dec->alpha_data_ = headers.alpha_data;
+        dec->alpha_data_size_ = headers.alpha_data_size;
+
+        // Decode bitstream header, update io->width/io->height.
+        if (!VP8GetHeaders(dec, &io)) {
+            status = dec->status_; // An error occurred. Grab error status.
+        } else {
+            // Allocate/check output buffers.
+            status = WebPAllocateDecBuffer(io.width, io.height, params->options, params->output);
+            if (status == VP8_STATUS_OK) { // Decode
+                // This change must be done before calling VP8Decode()
+                dec->mt_method_ = VP8GetThreadMethod(params->options, &headers, io.width, io.height);
+                VP8InitDithering(params->options, dec);
+                if (!VP8Decode(dec, &io)) {
+                    status = dec->status_;
+                }
+            }
+        }
+        VP8Delete(dec);
+    } else {
+        VP8LDecoder* const dec = VP8LNew();
+        if (dec == NULL) {
+            return VP8_STATUS_OUT_OF_MEMORY;
+        }
+        if (!VP8LDecodeHeader(dec, &io)) {
+            status = dec->status_; // An error occurred. Grab error status.
+        } else {
+            // Allocate/check output buffers.
+            status = WebPAllocateDecBuffer(io.width, io.height, params->options, params->output);
+            if (status == VP8_STATUS_OK) { // Decode
+                if (!VP8LDecodeImage(dec)) {
+                    status = dec->status_;
+                }
+            }
+        }
+        VP8LDelete(dec);
+    }
+
+    if (status != VP8_STATUS_OK) {
+        WebPFreeDecBuffer(params->output);
+    }
+
+    if (params->options != NULL && params->options->flip) {
+        status = WebPFlipBuffer(params->output);
+    }
+
+    StopProfiling(&stop_watch, &timeDecodeInto, &countDecodeInto);
+
+    return status;
+}
+
+// Helpers
+static uint8_t* DecodeIntoRGBABuffer(WEBP_CSP_MODE colorspace,
+                                     const uint8_t* const data,
+                                     size_t data_size,
+                                     uint8_t* const rgba,
+                                     int stride,
+                                     size_t size) {
+    WebPDecParams params;
+    WebPDecBuffer buf;
+    if (rgba == NULL) {
+        return NULL;
+    }
+    WebPInitDecBuffer(&buf);
+    WebPResetDecParams(&params);
+    params.output = &buf;
+    buf.colorspace = colorspace;
+    buf.u.RGBA.rgba = rgba;
+    buf.u.RGBA.stride = stride;
+    buf.u.RGBA.size = size;
+    buf.is_external_memory = 1;
+    if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+        return NULL;
+    }
+    return rgba;
+}
+
+uint8_t* WebPDecodeRGBInto(const uint8_t* data, size_t data_size, uint8_t* output, size_t size, int stride) {
+    return DecodeIntoRGBABuffer(MODE_RGB, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeRGBAInto(const uint8_t* data, size_t data_size, uint8_t* output, size_t size, int stride) {
+    return DecodeIntoRGBABuffer(MODE_RGBA, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeARGBInto(const uint8_t* data, size_t data_size, uint8_t* output, size_t size, int stride) {
+    return DecodeIntoRGBABuffer(MODE_ARGB, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeBGRInto(const uint8_t* data, size_t data_size, uint8_t* output, size_t size, int stride) {
+    return DecodeIntoRGBABuffer(MODE_BGR, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeBGRAInto(const uint8_t* data, size_t data_size, uint8_t* output, size_t size, int stride) {
+    return DecodeIntoRGBABuffer(MODE_BGRA, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeYUVInto(const uint8_t* data,
+                           size_t data_size,
+                           uint8_t* luma,
+                           size_t luma_size,
+                           int luma_stride,
+                           uint8_t* u,
+                           size_t u_size,
+                           int u_stride,
+                           uint8_t* v,
+                           size_t v_size,
+                           int v_stride) {
+    WebPDecParams params;
+    WebPDecBuffer output;
+    if (luma == NULL) return NULL;
+    WebPInitDecBuffer(&output);
+    WebPResetDecParams(&params);
+    params.output = &output;
+    output.colorspace = MODE_YUV;
+    output.u.YUVA.y = luma;
+    output.u.YUVA.y_stride = luma_stride;
+    output.u.YUVA.y_size = luma_size;
+    output.u.YUVA.u = u;
+    output.u.YUVA.u_stride = u_stride;
+    output.u.YUVA.u_size = u_size;
+    output.u.YUVA.v = v;
+    output.u.YUVA.v_stride = v_stride;
+    output.u.YUVA.v_size = v_size;
+    output.is_external_memory = 1;
+    if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+        return NULL;
+    }
+    return luma;
+}
+
+//------------------------------------------------------------------------------
+
+static uint8_t* Decode(WEBP_CSP_MODE mode,
+                       const uint8_t* const data,
+                       size_t data_size,
+                       int* const width,
+                       int* const height,
+                       WebPDecBuffer* const keep_info) {
+    WebPDecParams params;
+    WebPDecBuffer output;
+
+    WebPInitDecBuffer(&output);
+    WebPResetDecParams(&params);
+    params.output = &output;
+    output.colorspace = mode;
+
+    // Retrieve (and report back) the required dimensions from bitstream.
+    if (!WebPGetInfo(data, data_size, &output.width, &output.height)) {
+        return NULL;
+    }
+    if (width != NULL) *width = output.width;
+    if (height != NULL) *height = output.height;
+
+    // Decode
+    if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+        return NULL;
+    }
+    if (keep_info != NULL) { // keep track of the side-info
+        WebPCopyDecBuffer(&output, keep_info);
+    }
+    // return decoded samples (don't clear 'output'!)
+    return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y;
+}
+
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size, int* width, int* height) {
+    return Decode(MODE_RGB, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size, int* width, int* height) {
+    return Decode(MODE_RGBA, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size, int* width, int* height) {
+    return Decode(MODE_ARGB, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, int* width, int* height) {
+    return Decode(MODE_BGR, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size, int* width, int* height) {
+    return Decode(MODE_BGRA, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeYUV(const uint8_t* data,
+                       size_t data_size,
+                       int* width,
+                       int* height,
+                       uint8_t** u,
+                       uint8_t** v,
+                       int* stride,
+                       int* uv_stride) {
+    WebPDecBuffer output; // only to preserve the side-infos
+    uint8_t* const out = Decode(MODE_YUV, data, data_size, width, height, &output);
+
+    if (out != NULL) {
+        const WebPYUVABuffer* const buf = &output.u.YUVA;
+        *u = buf->u;
+        *v = buf->v;
+        *stride = buf->y_stride;
+        *uv_stride = buf->u_stride;
+        assert(buf->u_stride == buf->v_stride);
+    }
+    return out;
+}
+
+static void DefaultFeatures(WebPBitstreamFeatures* const features) {
+    assert(features != NULL);
+    memset(features, 0, sizeof(*features));
+}
+
+static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size, WebPBitstreamFeatures* const features) {
+    if (features == NULL || data == NULL) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+    DefaultFeatures(features);
+
+    // Only parse enough of the data to retrieve the features.
+    return ParseHeadersInternal(data, data_size, &features->width, &features->height, &features->has_alpha,
+                                &features->has_animation, &features->format, NULL);
+}
+
+//------------------------------------------------------------------------------
+// WebPGetInfo()
+
+int WebPGetInfo(const uint8_t* data, size_t data_size, int* width, int* height) {
+    WebPBitstreamFeatures features;
+
+    if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) {
+        return 0;
+    }
+
+    if (width != NULL) {
+        *width = features.width;
+    }
+    if (height != NULL) {
+        *height = features.height;
+    }
+
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Advance decoding API
+
+int WebPInitDecoderConfigInternal(WebPDecoderConfig* config, int version) {
+    if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+        return 0; // version mismatch
+    }
+    if (config == NULL) {
+        return 0;
+    }
+    memset(config, 0, sizeof(*config));
+    DefaultFeatures(&config->input);
+    WebPInitDecBuffer(&config->output);
+    return 1;
+}
+
+VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data,
+                                      size_t data_size,
+                                      WebPBitstreamFeatures* features,
+                                      int version) {
+    if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+        return VP8_STATUS_INVALID_PARAM; // version mismatch
+    }
+    if (features == NULL) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+    return GetFeatures(data, data_size, features);
+}
+
+VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size, WebPDecoderConfig* config) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    WebPDecParams params;
+    VP8StatusCode status;
+
+    if (config == NULL) {
+        return VP8_STATUS_INVALID_PARAM;
+    }
+
+    status = GetFeatures(data, data_size, &config->input);
+    if (status != VP8_STATUS_OK) {
+        if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+            return VP8_STATUS_BITSTREAM_ERROR; // Not-enough-data treated as error.
+        }
+        return status;
+    }
+
+    WebPResetDecParams(&params);
+    params.output = &config->output;
+    params.options = &config->options;
+    status = DecodeInto(data, data_size, &params);
+
+    StopProfiling(&stop_watch, &timeWebPDecode, &countWebPDecode);
+
+    return status;
+}
+
+//------------------------------------------------------------------------------
+// Cropping and rescaling.
+
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options, VP8Io* const io, WEBP_CSP_MODE src_colorspace) {
+    const int W = io->width;
+    const int H = io->height;
+    int x = 0, y = 0, w = W, h = H;
+
+    // Cropping
+    io->use_cropping = (options != NULL) && (options->use_cropping > 0);
+    if (io->use_cropping) {
+        w = options->crop_width;
+        h = options->crop_height;
+        x = options->crop_left;
+        y = options->crop_top;
+        if (!WebPIsRGBMode(src_colorspace)) { // only snap for YUV420
+            x &= ~1;
+            y &= ~1;
+        }
+        if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
+            return 0; // out of frame boundary error
+        }
+    }
+    io->crop_left = x;
+    io->crop_top = y;
+    io->crop_right = x + w;
+    io->crop_bottom = y + h;
+    io->mb_w = w;
+    io->mb_h = h;
+
+    // Scaling
+    io->use_scaling = (options != NULL) && (options->use_scaling > 0);
+    if (io->use_scaling) {
+        int scaled_width = options->scaled_width;
+        int scaled_height = options->scaled_height;
+        if (!WebPRescalerGetScaledDimensions(w, h, &scaled_width, &scaled_height)) {
+            return 0;
+        }
+        io->scaled_width = scaled_width;
+        io->scaled_height = scaled_height;
+    }
+
+    // Filter
+    io->bypass_filtering = options && options->bypass_filtering;
+
+// Fancy upsampler
+#ifdef FANCY_UPSAMPLING
+    io->fancy_upsampling = (options == NULL) || (!options->no_fancy_upsampling);
+#endif
+
+    if (io->use_scaling) {
+        // disable filter (only for large downscaling ratio).
+        io->bypass_filtering = (io->scaled_width < W * 3 / 4) && (io->scaled_height < H * 3 / 4);
+        io->fancy_upsampling = 0;
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dec/webpi.h b/codec/L2/demos/webpEnc/host/src/dec/webpi.h
new file mode 100644
index 0000000000..dfc32dbb91
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dec/webpi.h
@@ -0,0 +1,120 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Internal header: WebP decoding parameters and custom IO on buffer
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#ifndef WEBP_DEC_WEBPI_H_
+#define WEBP_DEC_WEBPI_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "../utils/rescaler.h"
+#include "./decode_vp8.h"
+
+//------------------------------------------------------------------------------
+// WebPDecParams: Decoding output parameters. Transient internal object.
+
+typedef struct WebPDecParams WebPDecParams;
+typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p);
+typedef int (*OutputAlphaFunc)(const VP8Io* const io, WebPDecParams* const p, int expected_num_out_lines);
+typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos, int max_out_lines);
+
+struct WebPDecParams {
+    WebPDecBuffer* output;          // output buffer.
+    uint8_t *tmp_y, *tmp_u, *tmp_v; // cache for the fancy upsampler
+                                    // or used for tmp rescaling
+
+    int last_y;                        // coordinate of the line that was last output
+    const WebPDecoderOptions* options; // if not NULL, use alt decoding features
+    // rescalers
+    WebPRescaler scaler_y, scaler_u, scaler_v, scaler_a;
+    void* memory; // overall scratch memory for the output work.
+
+    OutputFunc emit;              // output RGB or YUV samples
+    OutputAlphaFunc emit_alpha;   // output alpha channel
+    OutputRowFunc emit_alpha_row; // output one line of rescaled alpha values
+};
+
+// Should be called first, before any use of the WebPDecParams object.
+void WebPResetDecParams(WebPDecParams* const params);
+
+//------------------------------------------------------------------------------
+// Header parsing helpers
+
+// Structure storing a description of the RIFF headers.
+typedef struct {
+    const uint8_t* data;       // input buffer
+    size_t data_size;          // input buffer size
+    int have_all_data;         // true if all data is known to be available
+    size_t offset;             // offset to main data chunk (VP8 or VP8L)
+    const uint8_t* alpha_data; // points to alpha chunk (if present)
+    size_t alpha_data_size;    // alpha chunk size
+    size_t compressed_size;    // VP8/VP8L compressed data size
+    size_t riff_size;          // size of the riff payload (or 0 if absent)
+    int is_lossless;           // true if a VP8L chunk is present
+} WebPHeaderStructure;
+
+// Skips over all valid chunks prior to the first VP8/VP8L frame header.
+// Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk),
+// VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE
+// in the case of non-decodable features (animation for instance).
+// In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless
+// fields are updated appropriately upon success.
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
+
+//------------------------------------------------------------------------------
+// Misc utils
+
+// Initializes VP8Io with custom setup, io and teardown functions. The default
+// hooks will use the supplied 'params' as io->opaque handle.
+void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io);
+
+// Setup crop_xxx fields, mb_w and mb_h in io. 'src_colorspace' refers
+// to the *compressed* format, not the output one.
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options, VP8Io* const io, WEBP_CSP_MODE src_colorspace);
+
+//------------------------------------------------------------------------------
+// Internal functions regarding WebPDecBuffer memory (in buffer.c).
+// Don't really need to be externally visible for now.
+
+// Prepare 'buffer' with the requested initial dimensions width/height.
+// If no external storage is supplied, initializes buffer by allocating output
+// memory and setting up the stride information. Validate the parameters. Return
+// an error code in case of problem (no memory, or invalid stride / size /
+// dimension / etc.). If *options is not NULL, also verify that the options'
+// parameters are valid and apply them to the width/height dimensions of the
+// output buffer. This takes cropping / scaling / rotation into account.
+// Also incorporates the options->flip flag to flip the buffer parameters if
+// needed.
+VP8StatusCode WebPAllocateDecBuffer(int width,
+                                    int height,
+                                    const WebPDecoderOptions* const options,
+                                    WebPDecBuffer* const buffer);
+
+// Flip buffer vertically by negating the various strides.
+VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer);
+
+// Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the
+// memory (still held by 'src').
+void WebPCopyDecBuffer(const WebPDecBuffer* const src, WebPDecBuffer* const dst);
+
+// Copy and transfer ownership from src to dst (beware of parameter order!)
+void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_DEC_WEBPI_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/demux/Makefile.am b/codec/L2/demos/webpEnc/host/src/demux/Makefile.am
new file mode 100644
index 0000000000..d7ce525f2f
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/demux/Makefile.am
@@ -0,0 +1,14 @@
+lib_LTLIBRARIES = libwebpdemux.la
+
+libwebpdemux_la_SOURCES =
+libwebpdemux_la_SOURCES += anim_decode.c demux.c
+
+libwebpdemuxinclude_HEADERS =
+libwebpdemuxinclude_HEADERS += ../webp/demux.h
+libwebpdemuxinclude_HEADERS += ../webp/mux_types.h
+libwebpdemuxinclude_HEADERS += ../webp/types.h
+
+libwebpdemux_la_LIBADD = ../libwebp.la
+libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:0:0
+libwebpdemuxincludedir = $(includedir)/webp
+pkgconfig_DATA = libwebpdemux.pc
diff --git a/codec/L2/demos/webpEnc/host/src/demux/anim_decode.c b/codec/L2/demos/webpEnc/host/src/demux/anim_decode.c
new file mode 100644
index 0000000000..1989eb4ab4
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/demux/anim_decode.c
@@ -0,0 +1,442 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  AnimDecoder implementation.
+//
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include <assert.h>
+#include <string.h>
+
+#include "../utils/utils.h"
+#include "../webp/decode.h"
+#include "../webp/demux.h"
+
+#define NUM_CHANNELS 4
+
+typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int);
+static void BlendPixelRowNonPremult(uint32_t* const src,
+                                    const uint32_t* const dst, int num_pixels);
+static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
+                                 int num_pixels);
+
+struct WebPAnimDecoder {
+  WebPDemuxer* demux_;             // Demuxer created from given WebP bitstream.
+  WebPDecoderConfig config_;       // Decoder config.
+  // Note: we use a pointer to a function blending multiple pixels at a time to
+  // allow possible inlining of per-pixel blending function.
+  BlendRowFunc blend_func_;        // Pointer to the chose blend row function.
+  WebPAnimInfo info_;              // Global info about the animation.
+  uint8_t* curr_frame_;            // Current canvas (not disposed).
+  uint8_t* prev_frame_disposed_;   // Previous canvas (properly disposed).
+  int prev_frame_timestamp_;       // Previous frame timestamp (milliseconds).
+  WebPIterator prev_iter_;         // Iterator object for previous frame.
+  int prev_frame_was_keyframe_;    // True if previous frame was a keyframe.
+  int next_frame_;                 // Index of the next frame to be decoded
+                                   // (starting from 1).
+};
+
+static void DefaultDecoderOptions(WebPAnimDecoderOptions* const dec_options) {
+  dec_options->color_mode = MODE_RGBA;
+  dec_options->use_threads = 0;
+}
+
+int WebPAnimDecoderOptionsInitInternal(WebPAnimDecoderOptions* dec_options,
+                                       int abi_version) {
+  if (dec_options == NULL ||
+      WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) {
+    return 0;
+  }
+  DefaultDecoderOptions(dec_options);
+  return 1;
+}
+
+static int ApplyDecoderOptions(const WebPAnimDecoderOptions* const dec_options,
+                               WebPAnimDecoder* const dec) {
+  WEBP_CSP_MODE mode;
+  WebPDecoderConfig* config = &dec->config_;
+  assert(dec_options != NULL);
+
+  mode = dec_options->color_mode;
+  if (mode != MODE_RGBA && mode != MODE_BGRA &&
+      mode != MODE_rgbA && mode != MODE_bgrA) {
+    return 0;
+  }
+  dec->blend_func_ = (mode == MODE_RGBA || mode == MODE_BGRA)
+                         ? &BlendPixelRowNonPremult
+                         : &BlendPixelRowPremult;
+  WebPInitDecoderConfig(config);
+  config->output.colorspace = mode;
+  config->output.is_external_memory = 1;
+  config->options.use_threads = dec_options->use_threads;
+  // Note: config->output.u.RGBA is set at the time of decoding each frame.
+  return 1;
+}
+
+WebPAnimDecoder* WebPAnimDecoderNewInternal(
+    const WebPData* webp_data, const WebPAnimDecoderOptions* dec_options,
+    int abi_version) {
+  WebPAnimDecoderOptions options;
+  WebPAnimDecoder* dec = NULL;
+  if (webp_data == NULL ||
+      WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) {
+    return NULL;
+  }
+
+  // Note: calloc() so that the pointer members are initialized to NULL.
+  dec = (WebPAnimDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  if (dec == NULL) goto Error;
+
+  if (dec_options != NULL) {
+    options = *dec_options;
+  } else {
+    DefaultDecoderOptions(&options);
+  }
+  if (!ApplyDecoderOptions(&options, dec)) goto Error;
+
+  dec->demux_ = WebPDemux(webp_data);
+  if (dec->demux_ == NULL) goto Error;
+
+  dec->info_.canvas_width = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_WIDTH);
+  dec->info_.canvas_height = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_HEIGHT);
+  dec->info_.loop_count = WebPDemuxGetI(dec->demux_, WEBP_FF_LOOP_COUNT);
+  dec->info_.bgcolor = WebPDemuxGetI(dec->demux_, WEBP_FF_BACKGROUND_COLOR);
+  dec->info_.frame_count = WebPDemuxGetI(dec->demux_, WEBP_FF_FRAME_COUNT);
+
+  {
+    const int canvas_bytes =
+        dec->info_.canvas_width * NUM_CHANNELS * dec->info_.canvas_height;
+    // Note: calloc() because we fill frame with zeroes as well.
+    dec->curr_frame_ = WebPSafeCalloc(1ULL, canvas_bytes);
+    if (dec->curr_frame_ == NULL) goto Error;
+    dec->prev_frame_disposed_ = WebPSafeCalloc(1ULL, canvas_bytes);
+    if (dec->prev_frame_disposed_ == NULL) goto Error;
+  }
+
+  WebPAnimDecoderReset(dec);
+
+  return dec;
+
+ Error:
+  WebPAnimDecoderDelete(dec);
+  return NULL;
+}
+
+int WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec, WebPAnimInfo* info) {
+  if (dec == NULL || info == NULL) return 0;
+  *info = dec->info_;
+  return 1;
+}
+
+// Returns true if the frame covers the full canvas.
+static int IsFullFrame(int width, int height, int canvas_width,
+                       int canvas_height) {
+  return (width == canvas_width && height == canvas_height);
+}
+
+// Clear the canvas to transparent.
+static void ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width,
+                           uint32_t canvas_height) {
+  memset(buf, 0, canvas_width * NUM_CHANNELS * canvas_height);
+}
+
+// Clear given frame rectangle to transparent.
+static void ZeroFillFrameRect(uint8_t* buf, int buf_stride, int x_offset,
+                              int y_offset, int width, int height) {
+  int j;
+  assert(width * NUM_CHANNELS <= buf_stride);
+  buf += y_offset * buf_stride + x_offset * NUM_CHANNELS;
+  for (j = 0; j < height; ++j) {
+    memset(buf, 0, width * NUM_CHANNELS);
+    buf += buf_stride;
+  }
+}
+
+// Copy width * height pixels from 'src' to 'dst'.
+static void CopyCanvas(const uint8_t* src, uint8_t* dst,
+                       uint32_t width, uint32_t height) {
+  assert(src != NULL && dst != NULL);
+  memcpy(dst, src, width * NUM_CHANNELS * height);
+}
+
+// Returns true if the current frame is a key-frame.
+static int IsKeyFrame(const WebPIterator* const curr,
+                      const WebPIterator* const prev,
+                      int prev_frame_was_key_frame,
+                      int canvas_width, int canvas_height) {
+  if (curr->frame_num == 1) {
+    return 1;
+  } else if ((!curr->has_alpha || curr->blend_method == WEBP_MUX_NO_BLEND) &&
+             IsFullFrame(curr->width, curr->height,
+                         canvas_width, canvas_height)) {
+    return 1;
+  } else {
+    return (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) &&
+           (IsFullFrame(prev->width, prev->height, canvas_width,
+                        canvas_height) ||
+            prev_frame_was_key_frame);
+  }
+}
+
+
+// Blend a single channel of 'src' over 'dst', given their alpha channel values.
+// 'src' and 'dst' are assumed to be NOT pre-multiplied by alpha.
+static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
+                                      uint32_t dst, uint8_t dst_a,
+                                      uint32_t scale, int shift) {
+  const uint8_t src_channel = (src >> shift) & 0xff;
+  const uint8_t dst_channel = (dst >> shift) & 0xff;
+  const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
+  assert(blend_unscaled < (1ULL << 32) / scale);
+  return (blend_unscaled * scale) >> 24;
+}
+
+// Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha.
+static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) {
+  const uint8_t src_a = (src >> 24) & 0xff;
+
+  if (src_a == 0) {
+    return dst;
+  } else {
+    const uint8_t dst_a = (dst >> 24) & 0xff;
+    // This is the approximate integer arithmetic for the actual formula:
+    // dst_factor_a = (dst_a * (255 - src_a)) / 255.
+    const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8;
+    const uint8_t blend_a = src_a + dst_factor_a;
+    const uint32_t scale = (1UL << 24) / blend_a;
+
+    const uint8_t blend_r =
+        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0);
+    const uint8_t blend_g =
+        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8);
+    const uint8_t blend_b =
+        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16);
+    assert(src_a + dst_factor_a < 256);
+
+    return (blend_r << 0) |
+           (blend_g << 8) |
+           (blend_b << 16) |
+           ((uint32_t)blend_a << 24);
+  }
+}
+
+// Blend 'num_pixels' in 'src' over 'dst' assuming they are NOT pre-multiplied
+// by alpha.
+static void BlendPixelRowNonPremult(uint32_t* const src,
+                                    const uint32_t* const dst, int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
+    if (src_alpha != 0xff) {
+      src[i] = BlendPixelNonPremult(src[i], dst[i]);
+    }
+  }
+}
+
+// Individually multiply each channel in 'pix' by 'scale'.
+static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) {
+  uint32_t mask = 0x00FF00FF;
+  uint32_t rb = ((pix & mask) * scale) >> 8;
+  uint32_t ag = ((pix >> 8) & mask) * scale;
+  return (rb & mask) | (ag & ~mask);
+}
+
+// Blend 'src' over 'dst' assuming they are pre-multiplied by alpha.
+static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) {
+  const uint8_t src_a = (src >> 24) & 0xff;
+  return src + ChannelwiseMultiply(dst, 256 - src_a);
+}
+
+// Blend 'num_pixels' in 'src' over 'dst' assuming they are pre-multiplied by
+// alpha.
+static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
+                                 int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
+    if (src_alpha != 0xff) {
+      src[i] = BlendPixelPremult(src[i], dst[i]);
+    }
+  }
+}
+
+// Returns two ranges (<left, width> pairs) at row 'canvas_y', that belong to
+// 'src' but not 'dst'. A point range is empty if the corresponding width is 0.
+static void FindBlendRangeAtRow(const WebPIterator* const src,
+                                const WebPIterator* const dst, int canvas_y,
+                                int* const left1, int* const width1,
+                                int* const left2, int* const width2) {
+  const int src_max_x = src->x_offset + src->width;
+  const int dst_max_x = dst->x_offset + dst->width;
+  const int dst_max_y = dst->y_offset + dst->height;
+  assert(canvas_y >= src->y_offset && canvas_y < (src->y_offset + src->height));
+  *left1 = -1;
+  *width1 = 0;
+  *left2 = -1;
+  *width2 = 0;
+
+  if (canvas_y < dst->y_offset || canvas_y >= dst_max_y ||
+      src->x_offset >= dst_max_x || src_max_x <= dst->x_offset) {
+    *left1 = src->x_offset;
+    *width1 = src->width;
+    return;
+  }
+
+  if (src->x_offset < dst->x_offset) {
+    *left1 = src->x_offset;
+    *width1 = dst->x_offset - src->x_offset;
+  }
+
+  if (src_max_x > dst_max_x) {
+    *left2 = dst_max_x;
+    *width2 = src_max_x - dst_max_x;
+  }
+}
+
+int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
+                           uint8_t** buf_ptr, int* timestamp_ptr) {
+  WebPIterator iter;
+  uint32_t width;
+  uint32_t height;
+  int is_key_frame;
+  int timestamp;
+  BlendRowFunc blend_row;
+
+  if (dec == NULL || buf_ptr == NULL || timestamp_ptr == NULL) return 0;
+  if (!WebPAnimDecoderHasMoreFrames(dec)) return 0;
+
+  width = dec->info_.canvas_width;
+  height = dec->info_.canvas_height;
+  blend_row = dec->blend_func_;
+
+  // Get compressed frame.
+  if (!WebPDemuxGetFrame(dec->demux_, dec->next_frame_, &iter)) {
+    return 0;
+  }
+  timestamp = dec->prev_frame_timestamp_ + iter.duration;
+
+  // Initialize.
+  is_key_frame = IsKeyFrame(&iter, &dec->prev_iter_,
+                            dec->prev_frame_was_keyframe_, width, height);
+  if (is_key_frame) {
+    ZeroFillCanvas(dec->curr_frame_, width, height);
+  } else {
+    CopyCanvas(dec->prev_frame_disposed_, dec->curr_frame_, width, height);
+  }
+
+  // Decode.
+  {
+    const uint8_t* in = iter.fragment.bytes;
+    const size_t in_size = iter.fragment.size;
+    const size_t out_offset =
+        (iter.y_offset * width + iter.x_offset) * NUM_CHANNELS;
+    WebPDecoderConfig* const config = &dec->config_;
+    WebPRGBABuffer* const buf = &config->output.u.RGBA;
+    buf->stride = NUM_CHANNELS * width;
+    buf->size = buf->stride * iter.height;
+    buf->rgba = dec->curr_frame_ + out_offset;
+
+    if (WebPDecode(in, in_size, config) != VP8_STATUS_OK) {
+      goto Error;
+    }
+  }
+
+  // During the decoding of current frame, we may have set some pixels to be
+  // transparent (i.e. alpha < 255). However, the value of each of these
+  // pixels should have been determined by blending it against the value of
+  // that pixel in the previous frame if blending method of is WEBP_MUX_BLEND.
+  if (iter.frame_num > 1 && iter.blend_method == WEBP_MUX_BLEND &&
+      !is_key_frame) {
+    if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_NONE) {
+      int y;
+      // Blend transparent pixels with pixels in previous canvas.
+      for (y = 0; y < iter.height; ++y) {
+        const size_t offset =
+            (iter.y_offset + y) * width + iter.x_offset;
+        blend_row((uint32_t*)dec->curr_frame_ + offset,
+                  (uint32_t*)dec->prev_frame_disposed_ + offset, iter.width);
+      }
+    } else {
+      int y;
+      assert(dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND);
+      // We need to blend a transparent pixel with its value just after
+      // initialization. That is, blend it with:
+      // * Fully transparent pixel if it belongs to prevRect <-- No-op.
+      // * The pixel in the previous canvas otherwise <-- Need alpha-blending.
+      for (y = 0; y < iter.height; ++y) {
+        const int canvas_y = iter.y_offset + y;
+        int left1, width1, left2, width2;
+        FindBlendRangeAtRow(&iter, &dec->prev_iter_, canvas_y, &left1, &width1,
+                            &left2, &width2);
+        if (width1 > 0) {
+          const size_t offset1 = canvas_y * width + left1;
+          blend_row((uint32_t*)dec->curr_frame_ + offset1,
+                    (uint32_t*)dec->prev_frame_disposed_ + offset1, width1);
+        }
+        if (width2 > 0) {
+          const size_t offset2 = canvas_y * width + left2;
+          blend_row((uint32_t*)dec->curr_frame_ + offset2,
+                    (uint32_t*)dec->prev_frame_disposed_ + offset2, width2);
+        }
+      }
+    }
+  }
+
+  // Update info of the previous frame and dispose it for the next iteration.
+  dec->prev_frame_timestamp_ = timestamp;
+  dec->prev_iter_ = iter;
+  dec->prev_frame_was_keyframe_ = is_key_frame;
+  CopyCanvas(dec->curr_frame_, dec->prev_frame_disposed_, width, height);
+  if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
+    ZeroFillFrameRect(dec->prev_frame_disposed_, width * NUM_CHANNELS,
+                      dec->prev_iter_.x_offset, dec->prev_iter_.y_offset,
+                      dec->prev_iter_.width, dec->prev_iter_.height);
+  }
+  ++dec->next_frame_;
+
+  // All OK, fill in the values.
+  *buf_ptr = dec->curr_frame_;
+  *timestamp_ptr = timestamp;
+  return 1;
+
+ Error:
+  WebPDemuxReleaseIterator(&iter);
+  return 0;
+}
+
+int WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec) {
+  if (dec == NULL) return 0;
+  return (dec->next_frame_ <= (int)dec->info_.frame_count);
+}
+
+void WebPAnimDecoderReset(WebPAnimDecoder* dec) {
+  if (dec != NULL) {
+    dec->prev_frame_timestamp_ = 0;
+    memset(&dec->prev_iter_, 0, sizeof(dec->prev_iter_));
+    dec->prev_frame_was_keyframe_ = 0;
+    dec->next_frame_ = 1;
+  }
+}
+
+const WebPDemuxer* WebPAnimDecoderGetDemuxer(const WebPAnimDecoder* dec) {
+  if (dec == NULL) return NULL;
+  return dec->demux_;
+}
+
+void WebPAnimDecoderDelete(WebPAnimDecoder* dec) {
+  if (dec != NULL) {
+    WebPDemuxDelete(dec->demux_);
+    WebPSafeFree(dec->curr_frame_);
+    WebPSafeFree(dec->prev_frame_disposed_);
+    WebPSafeFree(dec);
+  }
+}
diff --git a/codec/L2/demos/webpEnc/host/src/demux/demux.c b/codec/L2/demos/webpEnc/host/src/demux/demux.c
new file mode 100644
index 0000000000..0d2989f6f4
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/demux/demux.c
@@ -0,0 +1,966 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  WebP container demux.
+//
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../utils/utils.h"
+#include "../webp/decode.h"     // WebPGetFeatures
+#include "../webp/demux.h"
+#include "../webp/format_constants.h"
+
+#define DMUX_MAJ_VERSION 0
+#define DMUX_MIN_VERSION 3
+#define DMUX_REV_VERSION 0
+
+typedef struct {
+  size_t start_;        // start location of the data
+  size_t end_;          // end location
+  size_t riff_end_;     // riff chunk end location, can be > end_.
+  size_t buf_size_;     // size of the buffer
+  const uint8_t* buf_;
+} MemBuffer;
+
+typedef struct {
+  size_t offset_;
+  size_t size_;
+} ChunkData;
+
+typedef struct Frame {
+  int x_offset_, y_offset_;
+  int width_, height_;
+  int has_alpha_;
+  int duration_;
+  WebPMuxAnimDispose dispose_method_;
+  WebPMuxAnimBlend blend_method_;
+  int frame_num_;
+  int complete_;   // img_components_ contains a full image.
+  ChunkData img_components_[2];  // 0=VP8{,L} 1=ALPH
+  struct Frame* next_;
+} Frame;
+
+typedef struct Chunk {
+  ChunkData data_;
+  struct Chunk* next_;
+} Chunk;
+
+struct WebPDemuxer {
+  MemBuffer mem_;
+  WebPDemuxState state_;
+  int is_ext_format_;
+  uint32_t feature_flags_;
+  int canvas_width_, canvas_height_;
+  int loop_count_;
+  uint32_t bgcolor_;
+  int num_frames_;
+  Frame* frames_;
+  Frame** frames_tail_;
+  Chunk* chunks_;  // non-image chunks
+  Chunk** chunks_tail_;
+};
+
+typedef enum {
+  PARSE_OK,
+  PARSE_NEED_MORE_DATA,
+  PARSE_ERROR
+} ParseStatus;
+
+typedef struct ChunkParser {
+  uint8_t id[4];
+  ParseStatus (*parse)(WebPDemuxer* const dmux);
+  int (*valid)(const WebPDemuxer* const dmux);
+} ChunkParser;
+
+static ParseStatus ParseSingleImage(WebPDemuxer* const dmux);
+static ParseStatus ParseVP8X(WebPDemuxer* const dmux);
+static int IsValidSimpleFormat(const WebPDemuxer* const dmux);
+static int IsValidExtendedFormat(const WebPDemuxer* const dmux);
+
+static const ChunkParser kMasterChunks[] = {
+  { { 'V', 'P', '8', ' ' }, ParseSingleImage, IsValidSimpleFormat },
+  { { 'V', 'P', '8', 'L' }, ParseSingleImage, IsValidSimpleFormat },
+  { { 'V', 'P', '8', 'X' }, ParseVP8X,        IsValidExtendedFormat },
+  { { '0', '0', '0', '0' }, NULL,             NULL },
+};
+
+//------------------------------------------------------------------------------
+
+int WebPGetDemuxVersion(void) {
+  return (DMUX_MAJ_VERSION << 16) | (DMUX_MIN_VERSION << 8) | DMUX_REV_VERSION;
+}
+
+// -----------------------------------------------------------------------------
+// MemBuffer
+
+static int RemapMemBuffer(MemBuffer* const mem,
+                          const uint8_t* data, size_t size) {
+  if (size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
+
+  mem->buf_ = data;
+  mem->end_ = mem->buf_size_ = size;
+  return 1;
+}
+
+static int InitMemBuffer(MemBuffer* const mem,
+                         const uint8_t* data, size_t size) {
+  memset(mem, 0, sizeof(*mem));
+  return RemapMemBuffer(mem, data, size);
+}
+
+// Return the remaining data size available in 'mem'.
+static WEBP_INLINE size_t MemDataSize(const MemBuffer* const mem) {
+  return (mem->end_ - mem->start_);
+}
+
+// Return true if 'size' exceeds the end of the RIFF chunk.
+static WEBP_INLINE int SizeIsInvalid(const MemBuffer* const mem, size_t size) {
+  return (size > mem->riff_end_ - mem->start_);
+}
+
+static WEBP_INLINE void Skip(MemBuffer* const mem, size_t size) {
+  mem->start_ += size;
+}
+
+static WEBP_INLINE void Rewind(MemBuffer* const mem, size_t size) {
+  mem->start_ -= size;
+}
+
+static WEBP_INLINE const uint8_t* GetBuffer(MemBuffer* const mem) {
+  return mem->buf_ + mem->start_;
+}
+
+// Read from 'mem' and skip the read bytes.
+static WEBP_INLINE uint8_t ReadByte(MemBuffer* const mem) {
+  const uint8_t byte = mem->buf_[mem->start_];
+  Skip(mem, 1);
+  return byte;
+}
+
+static WEBP_INLINE int ReadLE16s(MemBuffer* const mem) {
+  const uint8_t* const data = mem->buf_ + mem->start_;
+  const int val = GetLE16(data);
+  Skip(mem, 2);
+  return val;
+}
+
+static WEBP_INLINE int ReadLE24s(MemBuffer* const mem) {
+  const uint8_t* const data = mem->buf_ + mem->start_;
+  const int val = GetLE24(data);
+  Skip(mem, 3);
+  return val;
+}
+
+static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
+  const uint8_t* const data = mem->buf_ + mem->start_;
+  const uint32_t val = GetLE32(data);
+  Skip(mem, 4);
+  return val;
+}
+
+// -----------------------------------------------------------------------------
+// Secondary chunk parsing
+
+static void AddChunk(WebPDemuxer* const dmux, Chunk* const chunk) {
+  *dmux->chunks_tail_ = chunk;
+  chunk->next_ = NULL;
+  dmux->chunks_tail_ = &chunk->next_;
+}
+
+// Add a frame to the end of the list, ensuring the last frame is complete.
+// Returns true on success, false otherwise.
+static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
+  const Frame* const last_frame = *dmux->frames_tail_;
+  if (last_frame != NULL && !last_frame->complete_) return 0;
+
+  *dmux->frames_tail_ = frame;
+  frame->next_ = NULL;
+  dmux->frames_tail_ = &frame->next_;
+  return 1;
+}
+
+static void SetFrameInfo(size_t start_offset, size_t size,
+                         int frame_num, int complete,
+                         const WebPBitstreamFeatures* const features,
+                         Frame* const frame) {
+  frame->img_components_[0].offset_ = start_offset;
+  frame->img_components_[0].size_ = size;
+  frame->width_ = features->width;
+  frame->height_ = features->height;
+  frame->has_alpha_ |= features->has_alpha;
+  frame->frame_num_ = frame_num;
+  frame->complete_ = complete;
+}
+
+// Store image bearing chunks to 'frame'.
+static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
+                              MemBuffer* const mem, Frame* const frame) {
+  int alpha_chunks = 0;
+  int image_chunks = 0;
+  int done = (MemDataSize(mem) < min_size);
+  ParseStatus status = PARSE_OK;
+
+  if (done) return PARSE_NEED_MORE_DATA;
+
+  do {
+    const size_t chunk_start_offset = mem->start_;
+    const uint32_t fourcc = ReadLE32(mem);
+    const uint32_t payload_size = ReadLE32(mem);
+    const uint32_t payload_size_padded = payload_size + (payload_size & 1);
+    const size_t payload_available = (payload_size_padded > MemDataSize(mem))
+                                   ? MemDataSize(mem) : payload_size_padded;
+    const size_t chunk_size = CHUNK_HEADER_SIZE + payload_available;
+
+    if (payload_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+    if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR;
+    if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA;
+
+    switch (fourcc) {
+      case MKFOURCC('A', 'L', 'P', 'H'):
+        if (alpha_chunks == 0) {
+          ++alpha_chunks;
+          frame->img_components_[1].offset_ = chunk_start_offset;
+          frame->img_components_[1].size_ = chunk_size;
+          frame->has_alpha_ = 1;
+          frame->frame_num_ = frame_num;
+          Skip(mem, payload_available);
+        } else {
+          goto Done;
+        }
+        break;
+      case MKFOURCC('V', 'P', '8', 'L'):
+        if (alpha_chunks > 0) return PARSE_ERROR;  // VP8L has its own alpha
+        // fall through
+      case MKFOURCC('V', 'P', '8', ' '):
+        if (image_chunks == 0) {
+          // Extract the bitstream features, tolerating failures when the data
+          // is incomplete.
+          WebPBitstreamFeatures features;
+          const VP8StatusCode vp8_status =
+              WebPGetFeatures(mem->buf_ + chunk_start_offset, chunk_size,
+                              &features);
+          if (status == PARSE_NEED_MORE_DATA &&
+              vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
+            return PARSE_NEED_MORE_DATA;
+          } else if (vp8_status != VP8_STATUS_OK) {
+            // We have enough data, and yet WebPGetFeatures() failed.
+            return PARSE_ERROR;
+          }
+          ++image_chunks;
+          SetFrameInfo(chunk_start_offset, chunk_size, frame_num,
+                       status == PARSE_OK, &features, frame);
+          Skip(mem, payload_available);
+        } else {
+          goto Done;
+        }
+        break;
+ Done:
+      default:
+        // Restore fourcc/size when moving up one level in parsing.
+        Rewind(mem, CHUNK_HEADER_SIZE);
+        done = 1;
+        break;
+    }
+
+    if (mem->start_ == mem->riff_end_) {
+      done = 1;
+    } else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
+      status = PARSE_NEED_MORE_DATA;
+    }
+  } while (!done && status == PARSE_OK);
+
+  return status;
+}
+
+// Creates a new Frame if 'actual_size' is within bounds and 'mem' contains
+// enough data ('min_size') to parse the payload.
+// Returns PARSE_OK on success with *frame pointing to the new Frame.
+// Returns PARSE_NEED_MORE_DATA with insufficient data, PARSE_ERROR otherwise.
+static ParseStatus NewFrame(const MemBuffer* const mem,
+                            uint32_t min_size, uint32_t actual_size,
+                            Frame** frame) {
+  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
+  if (actual_size < min_size) return PARSE_ERROR;
+  if (MemDataSize(mem) < min_size)  return PARSE_NEED_MORE_DATA;
+
+  *frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(**frame));
+  return (*frame == NULL) ? PARSE_ERROR : PARSE_OK;
+}
+
+// Parse a 'ANMF' chunk and any image bearing chunks that immediately follow.
+// 'frame_chunk_size' is the previously validated, padded chunk size.
+static ParseStatus ParseAnimationFrame(
+    WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
+  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
+  int added_frame = 0;
+  int bits;
+  MemBuffer* const mem = &dmux->mem_;
+  Frame* frame;
+  ParseStatus status =
+      NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
+  if (status != PARSE_OK) return status;
+
+  frame->x_offset_       = 2 * ReadLE24s(mem);
+  frame->y_offset_       = 2 * ReadLE24s(mem);
+  frame->width_          = 1 + ReadLE24s(mem);
+  frame->height_         = 1 + ReadLE24s(mem);
+  frame->duration_       = ReadLE24s(mem);
+  bits = ReadByte(mem);
+  frame->dispose_method_ =
+      (bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
+  frame->blend_method_ = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
+  if (frame->width_ * (uint64_t)frame->height_ >= MAX_IMAGE_AREA) {
+    WebPSafeFree(frame);
+    return PARSE_ERROR;
+  }
+
+  // Store a frame only if the animation flag is set there is some data for
+  // this frame is available.
+  status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame);
+  if (status != PARSE_ERROR && is_animation && frame->frame_num_ > 0) {
+    added_frame = AddFrame(dmux, frame);
+    if (added_frame) {
+      ++dmux->num_frames_;
+    } else {
+      status = PARSE_ERROR;
+    }
+  }
+
+  if (!added_frame) WebPSafeFree(frame);
+  return status;
+}
+
+// General chunk storage, starting with the header at 'start_offset', allowing
+// the user to request the payload via a fourcc string. 'size' includes the
+// header and the unpadded payload size.
+// Returns true on success, false otherwise.
+static int StoreChunk(WebPDemuxer* const dmux,
+                      size_t start_offset, uint32_t size) {
+  Chunk* const chunk = (Chunk*)WebPSafeCalloc(1ULL, sizeof(*chunk));
+  if (chunk == NULL) return 0;
+
+  chunk->data_.offset_ = start_offset;
+  chunk->data_.size_ = size;
+  AddChunk(dmux, chunk);
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Primary chunk parsing
+
+static ParseStatus ReadHeader(MemBuffer* const mem) {
+  const size_t min_size = RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE;
+  uint32_t riff_size;
+
+  // Basic file level validation.
+  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
+  if (memcmp(GetBuffer(mem), "RIFF", CHUNK_SIZE_BYTES) ||
+      memcmp(GetBuffer(mem) + CHUNK_HEADER_SIZE, "WEBP", CHUNK_SIZE_BYTES)) {
+    return PARSE_ERROR;
+  }
+
+  riff_size = GetLE32(GetBuffer(mem) + TAG_SIZE);
+  if (riff_size < CHUNK_HEADER_SIZE) return PARSE_ERROR;
+  if (riff_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+
+  // There's no point in reading past the end of the RIFF chunk
+  mem->riff_end_ = riff_size + CHUNK_HEADER_SIZE;
+  if (mem->buf_size_ > mem->riff_end_) {
+    mem->buf_size_ = mem->end_ = mem->riff_end_;
+  }
+
+  Skip(mem, RIFF_HEADER_SIZE);
+  return PARSE_OK;
+}
+
+static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
+  const size_t min_size = CHUNK_HEADER_SIZE;
+  MemBuffer* const mem = &dmux->mem_;
+  Frame* frame;
+  ParseStatus status;
+  int image_added = 0;
+
+  if (dmux->frames_ != NULL) return PARSE_ERROR;
+  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
+  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
+
+  frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
+  if (frame == NULL) return PARSE_ERROR;
+
+  // For the single image case we allow parsing of a partial frame, but we need
+  // at least CHUNK_HEADER_SIZE for parsing.
+  status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame);
+  if (status != PARSE_ERROR) {
+    const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
+    // Clear any alpha when the alpha flag is missing.
+    if (!has_alpha && frame->img_components_[1].size_ > 0) {
+      frame->img_components_[1].offset_ = 0;
+      frame->img_components_[1].size_ = 0;
+      frame->has_alpha_ = 0;
+    }
+
+    // Use the frame width/height as the canvas values for non-vp8x files.
+    // Also, set ALPHA_FLAG if this is a lossless image with alpha.
+    if (!dmux->is_ext_format_ && frame->width_ > 0 && frame->height_ > 0) {
+      dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
+      dmux->canvas_width_ = frame->width_;
+      dmux->canvas_height_ = frame->height_;
+      dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
+    }
+    if (!AddFrame(dmux, frame)) {
+      status = PARSE_ERROR;  // last frame was left incomplete
+    } else {
+      image_added = 1;
+      dmux->num_frames_ = 1;
+    }
+  }
+
+  if (!image_added) WebPSafeFree(frame);
+  return status;
+}
+
+static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
+  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  MemBuffer* const mem = &dmux->mem_;
+  int anim_chunks = 0;
+  ParseStatus status = PARSE_OK;
+
+  do {
+    int store_chunk = 1;
+    const size_t chunk_start_offset = mem->start_;
+    const uint32_t fourcc = ReadLE32(mem);
+    const uint32_t chunk_size = ReadLE32(mem);
+    const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1);
+
+    if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+    if (SizeIsInvalid(mem, chunk_size_padded)) return PARSE_ERROR;
+
+    switch (fourcc) {
+      case MKFOURCC('V', 'P', '8', 'X'): {
+        return PARSE_ERROR;
+      }
+      case MKFOURCC('A', 'L', 'P', 'H'):
+      case MKFOURCC('V', 'P', '8', ' '):
+      case MKFOURCC('V', 'P', '8', 'L'): {
+        // check that this isn't an animation (all frames should be in an ANMF).
+        if (anim_chunks > 0 || is_animation) return PARSE_ERROR;
+
+        Rewind(mem, CHUNK_HEADER_SIZE);
+        status = ParseSingleImage(dmux);
+        break;
+      }
+      case MKFOURCC('A', 'N', 'I', 'M'): {
+        if (chunk_size_padded < ANIM_CHUNK_SIZE) return PARSE_ERROR;
+
+        if (MemDataSize(mem) < chunk_size_padded) {
+          status = PARSE_NEED_MORE_DATA;
+        } else if (anim_chunks == 0) {
+          ++anim_chunks;
+          dmux->bgcolor_ = ReadLE32(mem);
+          dmux->loop_count_ = ReadLE16s(mem);
+          Skip(mem, chunk_size_padded - ANIM_CHUNK_SIZE);
+        } else {
+          store_chunk = 0;
+          goto Skip;
+        }
+        break;
+      }
+      case MKFOURCC('A', 'N', 'M', 'F'): {
+        if (anim_chunks == 0) return PARSE_ERROR;  // 'ANIM' precedes frames.
+        status = ParseAnimationFrame(dmux, chunk_size_padded);
+        break;
+      }
+      case MKFOURCC('I', 'C', 'C', 'P'): {
+        store_chunk = !!(dmux->feature_flags_ & ICCP_FLAG);
+        goto Skip;
+      }
+      case MKFOURCC('E', 'X', 'I', 'F'): {
+        store_chunk = !!(dmux->feature_flags_ & EXIF_FLAG);
+        goto Skip;
+      }
+      case MKFOURCC('X', 'M', 'P', ' '): {
+        store_chunk = !!(dmux->feature_flags_ & XMP_FLAG);
+        goto Skip;
+      }
+ Skip:
+      default: {
+        if (chunk_size_padded <= MemDataSize(mem)) {
+          if (store_chunk) {
+            // Store only the chunk header and unpadded size as only the payload
+            // will be returned to the user.
+            if (!StoreChunk(dmux, chunk_start_offset,
+                            CHUNK_HEADER_SIZE + chunk_size)) {
+              return PARSE_ERROR;
+            }
+          }
+          Skip(mem, chunk_size_padded);
+        } else {
+          status = PARSE_NEED_MORE_DATA;
+        }
+      }
+    }
+
+    if (mem->start_ == mem->riff_end_) {
+      break;
+    } else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
+      status = PARSE_NEED_MORE_DATA;
+    }
+  } while (status == PARSE_OK);
+
+  return status;
+}
+
+static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
+  MemBuffer* const mem = &dmux->mem_;
+  uint32_t vp8x_size;
+
+  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
+
+  dmux->is_ext_format_ = 1;
+  Skip(mem, TAG_SIZE);  // VP8X
+  vp8x_size = ReadLE32(mem);
+  if (vp8x_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+  if (vp8x_size < VP8X_CHUNK_SIZE) return PARSE_ERROR;
+  vp8x_size += vp8x_size & 1;
+  if (SizeIsInvalid(mem, vp8x_size)) return PARSE_ERROR;
+  if (MemDataSize(mem) < vp8x_size) return PARSE_NEED_MORE_DATA;
+
+  dmux->feature_flags_ = ReadByte(mem);
+  Skip(mem, 3);  // Reserved.
+  dmux->canvas_width_  = 1 + ReadLE24s(mem);
+  dmux->canvas_height_ = 1 + ReadLE24s(mem);
+  if (dmux->canvas_width_ * (uint64_t)dmux->canvas_height_ >= MAX_IMAGE_AREA) {
+    return PARSE_ERROR;  // image final dimension is too large
+  }
+  Skip(mem, vp8x_size - VP8X_CHUNK_SIZE);  // skip any trailing data.
+  dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
+
+  if (SizeIsInvalid(mem, CHUNK_HEADER_SIZE)) return PARSE_ERROR;
+  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
+
+  return ParseVP8XChunks(dmux);
+}
+
+// -----------------------------------------------------------------------------
+// Format validation
+
+static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
+  const Frame* const frame = dmux->frames_;
+  if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
+
+  if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
+  if (dmux->state_ == WEBP_DEMUX_DONE && frame == NULL) return 0;
+
+  if (frame->width_ <= 0 || frame->height_ <= 0) return 0;
+  return 1;
+}
+
+// If 'exact' is true, check that the image resolution matches the canvas.
+// If 'exact' is false, check that the x/y offsets do not exceed the canvas.
+static int CheckFrameBounds(const Frame* const frame, int exact,
+                            int canvas_width, int canvas_height) {
+  if (exact) {
+    if (frame->x_offset_ != 0 || frame->y_offset_ != 0) {
+      return 0;
+    }
+    if (frame->width_ != canvas_width || frame->height_ != canvas_height) {
+      return 0;
+    }
+  } else {
+    if (frame->x_offset_ < 0 || frame->y_offset_ < 0) return 0;
+    if (frame->width_ + frame->x_offset_ > canvas_width) return 0;
+    if (frame->height_ + frame->y_offset_ > canvas_height) return 0;
+  }
+  return 1;
+}
+
+static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
+  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  const int is_fragmented = !!(dmux->feature_flags_ & FRAGMENTS_FLAG);
+  const Frame* f = dmux->frames_;
+
+  if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
+
+  if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
+  if (dmux->loop_count_ < 0) return 0;
+  if (dmux->state_ == WEBP_DEMUX_DONE && dmux->frames_ == NULL) return 0;
+  if (is_fragmented) return 0;
+
+  while (f != NULL) {
+    const int cur_frame_set = f->frame_num_;
+    int frame_count = 0;
+
+    // Check frame properties.
+    for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
+      const ChunkData* const image = f->img_components_;
+      const ChunkData* const alpha = f->img_components_ + 1;
+
+      if (!is_animation && f->frame_num_ > 1) return 0;
+
+      if (f->complete_) {
+        if (alpha->size_ == 0 && image->size_ == 0) return 0;
+        // Ensure alpha precedes image bitstream.
+        if (alpha->size_ > 0 && alpha->offset_ > image->offset_) {
+          return 0;
+        }
+
+        if (f->width_ <= 0 || f->height_ <= 0) return 0;
+      } else {
+        // There shouldn't be a partial frame in a complete file.
+        if (dmux->state_ == WEBP_DEMUX_DONE) return 0;
+
+        // Ensure alpha precedes image bitstream.
+        if (alpha->size_ > 0 && image->size_ > 0 &&
+            alpha->offset_ > image->offset_) {
+          return 0;
+        }
+        // There shouldn't be any frames after an incomplete one.
+        if (f->next_ != NULL) return 0;
+      }
+
+      if (f->width_ > 0 && f->height_ > 0 &&
+          !CheckFrameBounds(f, !is_animation,
+                            dmux->canvas_width_, dmux->canvas_height_)) {
+        return 0;
+      }
+
+      ++frame_count;
+    }
+  }
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// WebPDemuxer object
+
+static void InitDemux(WebPDemuxer* const dmux, const MemBuffer* const mem) {
+  dmux->state_ = WEBP_DEMUX_PARSING_HEADER;
+  dmux->loop_count_ = 1;
+  dmux->bgcolor_ = 0xFFFFFFFF;  // White background by default.
+  dmux->canvas_width_ = -1;
+  dmux->canvas_height_ = -1;
+  dmux->frames_tail_ = &dmux->frames_;
+  dmux->chunks_tail_ = &dmux->chunks_;
+  dmux->mem_ = *mem;
+}
+
+static ParseStatus CreateRawImageDemuxer(MemBuffer* const mem,
+                                         WebPDemuxer** demuxer) {
+  WebPBitstreamFeatures features;
+  const VP8StatusCode status =
+      WebPGetFeatures(mem->buf_, mem->buf_size_, &features);
+  *demuxer = NULL;
+  if (status != VP8_STATUS_OK) {
+    return (status == VP8_STATUS_NOT_ENOUGH_DATA) ? PARSE_NEED_MORE_DATA
+                                                  : PARSE_ERROR;
+  }
+
+  {
+    WebPDemuxer* const dmux = (WebPDemuxer*)WebPSafeCalloc(1ULL, sizeof(*dmux));
+    Frame* const frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
+    if (dmux == NULL || frame == NULL) goto Error;
+    InitDemux(dmux, mem);
+    SetFrameInfo(0, mem->buf_size_, 1 /*frame_num*/, 1 /*complete*/, &features,
+                 frame);
+    if (!AddFrame(dmux, frame)) goto Error;
+    dmux->state_ = WEBP_DEMUX_DONE;
+    dmux->canvas_width_ = frame->width_;
+    dmux->canvas_height_ = frame->height_;
+    dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
+    dmux->num_frames_ = 1;
+    assert(IsValidSimpleFormat(dmux));
+    *demuxer = dmux;
+    return PARSE_OK;
+
+ Error:
+    WebPSafeFree(dmux);
+    WebPSafeFree(frame);
+    return PARSE_ERROR;
+  }
+}
+
+WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
+                               WebPDemuxState* state, int version) {
+  const ChunkParser* parser;
+  int partial;
+  ParseStatus status = PARSE_ERROR;
+  MemBuffer mem;
+  WebPDemuxer* dmux;
+
+  if (state != NULL) *state = WEBP_DEMUX_PARSE_ERROR;
+
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DEMUX_ABI_VERSION)) return NULL;
+  if (data == NULL || data->bytes == NULL || data->size == 0) return NULL;
+
+  if (!InitMemBuffer(&mem, data->bytes, data->size)) return NULL;
+  status = ReadHeader(&mem);
+  if (status != PARSE_OK) {
+    // If parsing of the webp file header fails attempt to handle a raw
+    // VP8/VP8L frame. Note 'allow_partial' is ignored in this case.
+    if (status == PARSE_ERROR) {
+      status = CreateRawImageDemuxer(&mem, &dmux);
+      if (status == PARSE_OK) {
+        if (state != NULL) *state = WEBP_DEMUX_DONE;
+        return dmux;
+      }
+    }
+    if (state != NULL) {
+      *state = (status == PARSE_NEED_MORE_DATA) ? WEBP_DEMUX_PARSING_HEADER
+                                                : WEBP_DEMUX_PARSE_ERROR;
+    }
+    return NULL;
+  }
+
+  partial = (mem.buf_size_ < mem.riff_end_);
+  if (!allow_partial && partial) return NULL;
+
+  dmux = (WebPDemuxer*)WebPSafeCalloc(1ULL, sizeof(*dmux));
+  if (dmux == NULL) return NULL;
+  InitDemux(dmux, &mem);
+
+  status = PARSE_ERROR;
+  for (parser = kMasterChunks; parser->parse != NULL; ++parser) {
+    if (!memcmp(parser->id, GetBuffer(&dmux->mem_), TAG_SIZE)) {
+      status = parser->parse(dmux);
+      if (status == PARSE_OK) dmux->state_ = WEBP_DEMUX_DONE;
+      if (status == PARSE_NEED_MORE_DATA && !partial) status = PARSE_ERROR;
+      if (status != PARSE_ERROR && !parser->valid(dmux)) status = PARSE_ERROR;
+      if (status == PARSE_ERROR) dmux->state_ = WEBP_DEMUX_PARSE_ERROR;
+      break;
+    }
+  }
+  if (state != NULL) *state = dmux->state_;
+
+  if (status == PARSE_ERROR) {
+    WebPDemuxDelete(dmux);
+    return NULL;
+  }
+  return dmux;
+}
+
+void WebPDemuxDelete(WebPDemuxer* dmux) {
+  Chunk* c;
+  Frame* f;
+  if (dmux == NULL) return;
+
+  for (f = dmux->frames_; f != NULL;) {
+    Frame* const cur_frame = f;
+    f = f->next_;
+    WebPSafeFree(cur_frame);
+  }
+  for (c = dmux->chunks_; c != NULL;) {
+    Chunk* const cur_chunk = c;
+    c = c->next_;
+    WebPSafeFree(cur_chunk);
+  }
+  WebPSafeFree(dmux);
+}
+
+// -----------------------------------------------------------------------------
+
+uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
+  if (dmux == NULL) return 0;
+
+  switch (feature) {
+    case WEBP_FF_FORMAT_FLAGS:     return dmux->feature_flags_;
+    case WEBP_FF_CANVAS_WIDTH:     return (uint32_t)dmux->canvas_width_;
+    case WEBP_FF_CANVAS_HEIGHT:    return (uint32_t)dmux->canvas_height_;
+    case WEBP_FF_LOOP_COUNT:       return (uint32_t)dmux->loop_count_;
+    case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor_;
+    case WEBP_FF_FRAME_COUNT:      return (uint32_t)dmux->num_frames_;
+  }
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Frame iteration
+
+static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
+  const Frame* f;
+  for (f = dmux->frames_; f != NULL; f = f->next_) {
+    if (frame_num == f->frame_num_) break;
+  }
+  return f;
+}
+
+static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
+                                      const Frame* const frame,
+                                      size_t* const data_size) {
+  *data_size = 0;
+  if (frame != NULL) {
+    const ChunkData* const image = frame->img_components_;
+    const ChunkData* const alpha = frame->img_components_ + 1;
+    size_t start_offset = image->offset_;
+    *data_size = image->size_;
+
+    // if alpha exists it precedes image, update the size allowing for
+    // intervening chunks.
+    if (alpha->size_ > 0) {
+      const size_t inter_size = (image->offset_ > 0)
+                              ? image->offset_ - (alpha->offset_ + alpha->size_)
+                              : 0;
+      start_offset = alpha->offset_;
+      *data_size  += alpha->size_ + inter_size;
+    }
+    return mem_buf + start_offset;
+  }
+  return NULL;
+}
+
+// Create a whole 'frame' from VP8 (+ alpha) or lossless.
+static int SynthesizeFrame(const WebPDemuxer* const dmux,
+                           const Frame* const frame,
+                           WebPIterator* const iter) {
+  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  size_t payload_size = 0;
+  const uint8_t* const payload = GetFramePayload(mem_buf, frame, &payload_size);
+  if (payload == NULL) return 0;
+  assert(frame != NULL);
+
+  iter->frame_num      = frame->frame_num_;
+  iter->num_frames     = dmux->num_frames_;
+  iter->x_offset       = frame->x_offset_;
+  iter->y_offset       = frame->y_offset_;
+  iter->width          = frame->width_;
+  iter->height         = frame->height_;
+  iter->has_alpha      = frame->has_alpha_;
+  iter->duration       = frame->duration_;
+  iter->dispose_method = frame->dispose_method_;
+  iter->blend_method   = frame->blend_method_;
+  iter->complete       = frame->complete_;
+  iter->fragment.bytes = payload;
+  iter->fragment.size  = payload_size;
+  return 1;
+}
+
+static int SetFrame(int frame_num, WebPIterator* const iter) {
+  const Frame* frame;
+  const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
+  if (dmux == NULL || frame_num < 0) return 0;
+  if (frame_num > dmux->num_frames_) return 0;
+  if (frame_num == 0) frame_num = dmux->num_frames_;
+
+  frame = GetFrame(dmux, frame_num);
+  if (frame == NULL) return 0;
+
+  return SynthesizeFrame(dmux, frame, iter);
+}
+
+int WebPDemuxGetFrame(const WebPDemuxer* dmux, int frame, WebPIterator* iter) {
+  if (iter == NULL) return 0;
+
+  memset(iter, 0, sizeof(*iter));
+  iter->private_ = (void*)dmux;
+  return SetFrame(frame, iter);
+}
+
+int WebPDemuxNextFrame(WebPIterator* iter) {
+  if (iter == NULL) return 0;
+  return SetFrame(iter->frame_num + 1, iter);
+}
+
+int WebPDemuxPrevFrame(WebPIterator* iter) {
+  if (iter == NULL) return 0;
+  if (iter->frame_num <= 1) return 0;
+  return SetFrame(iter->frame_num - 1, iter);
+}
+
+void WebPDemuxReleaseIterator(WebPIterator* iter) {
+  (void)iter;
+}
+
+// -----------------------------------------------------------------------------
+// Chunk iteration
+
+static int ChunkCount(const WebPDemuxer* const dmux, const char fourcc[4]) {
+  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  const Chunk* c;
+  int count = 0;
+  for (c = dmux->chunks_; c != NULL; c = c->next_) {
+    const uint8_t* const header = mem_buf + c->data_.offset_;
+    if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
+  }
+  return count;
+}
+
+static const Chunk* GetChunk(const WebPDemuxer* const dmux,
+                             const char fourcc[4], int chunk_num) {
+  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  const Chunk* c;
+  int count = 0;
+  for (c = dmux->chunks_; c != NULL; c = c->next_) {
+    const uint8_t* const header = mem_buf + c->data_.offset_;
+    if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
+    if (count == chunk_num) break;
+  }
+  return c;
+}
+
+static int SetChunk(const char fourcc[4], int chunk_num,
+                    WebPChunkIterator* const iter) {
+  const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
+  int count;
+
+  if (dmux == NULL || fourcc == NULL || chunk_num < 0) return 0;
+  count = ChunkCount(dmux, fourcc);
+  if (count == 0) return 0;
+  if (chunk_num == 0) chunk_num = count;
+
+  if (chunk_num <= count) {
+    const uint8_t* const mem_buf = dmux->mem_.buf_;
+    const Chunk* const chunk = GetChunk(dmux, fourcc, chunk_num);
+    iter->chunk.bytes = mem_buf + chunk->data_.offset_ + CHUNK_HEADER_SIZE;
+    iter->chunk.size  = chunk->data_.size_ - CHUNK_HEADER_SIZE;
+    iter->num_chunks  = count;
+    iter->chunk_num   = chunk_num;
+    return 1;
+  }
+  return 0;
+}
+
+int WebPDemuxGetChunk(const WebPDemuxer* dmux,
+                      const char fourcc[4], int chunk_num,
+                      WebPChunkIterator* iter) {
+  if (iter == NULL) return 0;
+
+  memset(iter, 0, sizeof(*iter));
+  iter->private_ = (void*)dmux;
+  return SetChunk(fourcc, chunk_num, iter);
+}
+
+int WebPDemuxNextChunk(WebPChunkIterator* iter) {
+  if (iter != NULL) {
+    const char* const fourcc =
+        (const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
+    return SetChunk(fourcc, iter->chunk_num + 1, iter);
+  }
+  return 0;
+}
+
+int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
+  if (iter != NULL && iter->chunk_num > 1) {
+    const char* const fourcc =
+        (const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
+    return SetChunk(fourcc, iter->chunk_num - 1, iter);
+  }
+  return 0;
+}
+
+void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) {
+  (void)iter;
+}
+
diff --git a/codec/L2/demos/webpEnc/host/src/demux/libwebpdemux.pc.in b/codec/L2/demos/webpEnc/host/src/demux/libwebpdemux.pc.in
new file mode 100644
index 0000000000..6dfbbbdeee
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/demux/libwebpdemux.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libwebpdemux
+Description: Library for parsing the WebP graphics format container
+Version: @PACKAGE_VERSION@
+Requires: libwebp >= 0.2.0
+Cflags: -I${includedir}
+Libs: -L${libdir} -lwebpdemux
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/Makefile.am b/codec/L2/demos/webpEnc/host/src/dsp/Makefile.am
new file mode 100644
index 0000000000..01ca685d36
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/Makefile.am
@@ -0,0 +1,117 @@
+noinst_LTLIBRARIES = libwebpdsp.la libwebpdsp_avx2.la
+noinst_LTLIBRARIES += libwebpdsp_sse2.la libwebpdspdecode_sse2.la
+noinst_LTLIBRARIES += libwebpdsp_sse41.la libwebpdspdecode_sse41.la
+
+if BUILD_LIBWEBPDECODER
+  noinst_LTLIBRARIES += libwebpdspdecode.la
+endif
+
+common_HEADERS = ../webp/types.h
+commondir = $(includedir)/webp
+
+COMMON_SOURCES =
+COMMON_SOURCES += alpha_processing.c
+COMMON_SOURCES += alpha_processing_mips_dsp_r2.c
+COMMON_SOURCES += cpu.c
+COMMON_SOURCES += dec.c
+COMMON_SOURCES += dec_clip_tables.c
+COMMON_SOURCES += dec_mips32.c
+COMMON_SOURCES += dec_mips_dsp_r2.c
+COMMON_SOURCES += dec_neon.c
+COMMON_SOURCES += dsp.h
+COMMON_SOURCES += filters.c
+COMMON_SOURCES += filters_mips_dsp_r2.c
+COMMON_SOURCES += lossless.c
+COMMON_SOURCES += lossless.h
+COMMON_SOURCES += lossless_mips_dsp_r2.c
+COMMON_SOURCES += lossless_neon.c
+COMMON_SOURCES += mips_macro.h
+COMMON_SOURCES += neon.h
+COMMON_SOURCES += rescaler.c
+COMMON_SOURCES += rescaler_mips32.c
+COMMON_SOURCES += rescaler_mips_dsp_r2.c
+COMMON_SOURCES += rescaler_neon.c
+COMMON_SOURCES += upsampling.c
+COMMON_SOURCES += upsampling_mips_dsp_r2.c
+COMMON_SOURCES += upsampling_neon.c
+COMMON_SOURCES += yuv.c
+COMMON_SOURCES += yuv.h
+COMMON_SOURCES += yuv_mips32.c
+COMMON_SOURCES += yuv_mips_dsp_r2.c
+
+ENC_SOURCES =
+ENC_SOURCES += argb.c
+ENC_SOURCES += argb_mips_dsp_r2.c
+ENC_SOURCES += cost.c
+ENC_SOURCES += cost_mips32.c
+ENC_SOURCES += cost_mips_dsp_r2.c
+ENC_SOURCES += enc.c
+ENC_SOURCES += enc_mips32.c
+ENC_SOURCES += enc_mips_dsp_r2.c
+ENC_SOURCES += enc_neon.c
+ENC_SOURCES += lossless_enc.c
+ENC_SOURCES += lossless_enc_mips32.c
+ENC_SOURCES += lossless_enc_mips_dsp_r2.c
+ENC_SOURCES += lossless_enc_neon.c
+
+libwebpdsp_avx2_la_SOURCES =
+libwebpdsp_avx2_la_SOURCES += enc_avx2.c
+libwebpdsp_avx2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
+libwebpdsp_avx2_la_CFLAGS = $(AM_CFLAGS) $(AVX2_FLAGS)
+
+libwebpdspdecode_sse41_la_SOURCES =
+libwebpdspdecode_sse41_la_SOURCES += alpha_processing_sse41.c
+libwebpdspdecode_sse41_la_SOURCES += dec_sse41.c
+libwebpdspdecode_sse41_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
+libwebpdspdecode_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_FLAGS)
+
+libwebpdspdecode_sse2_la_SOURCES =
+libwebpdspdecode_sse2_la_SOURCES += alpha_processing_sse2.c
+libwebpdspdecode_sse2_la_SOURCES += dec_sse2.c
+libwebpdspdecode_sse2_la_SOURCES += filters_sse2.c
+libwebpdspdecode_sse2_la_SOURCES += lossless_sse2.c
+libwebpdspdecode_sse2_la_SOURCES += rescaler_sse2.c
+libwebpdspdecode_sse2_la_SOURCES += upsampling_sse2.c
+libwebpdspdecode_sse2_la_SOURCES += yuv_sse2.c
+libwebpdspdecode_sse2_la_CPPFLAGS = $(libwebpdsp_sse2_la_CPPFLAGS)
+libwebpdspdecode_sse2_la_CFLAGS = $(libwebpdsp_sse2_la_CFLAGS)
+
+libwebpdsp_sse2_la_SOURCES =
+libwebpdsp_sse2_la_SOURCES += argb_sse2.c
+libwebpdsp_sse2_la_SOURCES += cost_sse2.c
+libwebpdsp_sse2_la_SOURCES += enc_sse2.c
+libwebpdsp_sse2_la_SOURCES += lossless_enc_sse2.c
+libwebpdsp_sse2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
+libwebpdsp_sse2_la_CFLAGS = $(AM_CFLAGS) $(SSE2_FLAGS)
+libwebpdsp_sse2_la_LIBADD = libwebpdspdecode_sse2.la
+
+libwebpdsp_sse41_la_SOURCES =
+libwebpdsp_sse41_la_SOURCES += enc_sse41.c
+libwebpdsp_sse41_la_SOURCES += lossless_enc_sse41.c
+libwebpdsp_sse41_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
+libwebpdsp_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_FLAGS)
+libwebpdsp_sse41_la_LIBADD = libwebpdspdecode_sse41.la
+
+libwebpdsp_la_SOURCES = $(COMMON_SOURCES) $(ENC_SOURCES)
+
+noinst_HEADERS =
+noinst_HEADERS += ../dec/decode_vp8.h
+noinst_HEADERS += ../webp/decode.h
+
+libwebpdsp_la_CPPFLAGS =
+libwebpdsp_la_CPPFLAGS += $(AM_CPPFLAGS)
+libwebpdsp_la_CPPFLAGS += $(USE_EXPERIMENTAL_CODE) $(USE_SWAP_16BIT_CSP)
+libwebpdsp_la_LDFLAGS = -lm
+libwebpdsp_la_LIBADD =
+libwebpdsp_la_LIBADD += libwebpdsp_avx2.la libwebpdsp_sse2.la
+libwebpdsp_la_LIBADD += libwebpdsp_sse41.la
+
+if BUILD_LIBWEBPDECODER
+  libwebpdspdecode_la_SOURCES = $(COMMON_SOURCES)
+
+  libwebpdspdecode_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
+  libwebpdspdecode_la_LDFLAGS = $(libwebpdsp_la_LDFLAGS)
+  libwebpdspdecode_la_LIBADD =
+  libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse2.la
+  libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse41.la
+endif
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing.c b/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing.c
new file mode 100644
index 0000000000..39478dd7de
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing.c
@@ -0,0 +1,341 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include "./dsp.h"
+
+// Tables can be faster on some platform but incur some extra binary size (~2k).
+// #define USE_TABLES_FOR_ALPHA_MULT
+
+// -----------------------------------------------------------------------------
+
+#define MFIX 24 // 24bit fixed-point arithmetic
+#define HALF ((1u << MFIX) >> 1)
+#define KINV_255 ((1u << MFIX) / 255u)
+
+static uint32_t Mult(uint8_t x, uint32_t mult) {
+    const uint32_t v = (x * mult + HALF) >> MFIX;
+    assert(v <= 255); // <- 24bit precision is enough to ensure that.
+    return v;
+}
+
+#ifdef USE_TABLES_FOR_ALPHA_MULT
+
+static const uint32_t kMultTables[2][256] = {
+    {// (255u << MFIX) / alpha
+     0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000, 0x2a800000, 0x246db6db, 0x1fe00000,
+     0x1c555555, 0x19800000, 0x172e8ba2, 0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
+     0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8, 0x0aa00000, 0x0a333333, 0x09cec4ec,
+     0x0971c71c, 0x091b6db6, 0x08cb08d3, 0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
+     0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3, 0x06124924, 0x05ee23b8, 0x05cba2e8,
+     0x05aaaaaa, 0x058b2164, 0x056cefa8, 0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
+     0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0, 0x04400000, 0x042e29f7, 0x041ce739,
+     0x040c30c3, 0x03fc0000, 0x03ec4ec4, 0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
+     0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace, 0x0344ec4e, 0x033a5440, 0x03300000,
+     0x0325ed09, 0x031c18f9, 0x0312818a, 0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
+     0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b, 0x02a80000, 0x02a0fd5c, 0x029a1f58,
+     0x029364d9, 0x028ccccc, 0x0286562d, 0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
+     0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9, 0x023ca1af, 0x0237a6f4, 0x0232c234,
+     0x022df2df, 0x02293868, 0x02249249, 0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
+     0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213, 0x01ee8ba2, 0x01ead3ba, 0x01e72a07,
+     0x01e38e38, 0x01e00000, 0x01dc7f10, 0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
+     0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed, 0x01b33333, 0x01b05160, 0x01ad7943,
+     0x01aaaaaa, 0x01a7e567, 0x01a5294a, 0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
+     0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0, 0x01849249, 0x018245ae, 0x01800000,
+     0x017dc11f, 0x017b88ee, 0x0179574e, 0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
+     0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67, 0x015ef7bd, 0x015d1745, 0x015b3bea,
+     0x01596596, 0x01579435, 0x0155c7b4, 0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
+     0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b, 0x01400000, 0x013e7063, 0x013ce4a9,
+     0x013b5cc0, 0x0139d89d, 0x01385830, 0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
+     0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276, 0x01260dd6, 0x0124bc44, 0x01236db6,
+     0x01222222, 0x0120d97c, 0x011f93bc, 0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
+     0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358, 0x01100000, 0x010edf12, 0x010dc087,
+     0x010ca458, 0x010b8a7d, 0x010a72f0, 0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
+     0x01030c30, 0x01020612, 0x01010204, 0x01000000},
+    {// alpha * KINV_255
+     0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505, 0x00060606, 0x00070707, 0x00080808,
+     0x00090909, 0x000a0a0a, 0x000b0b0b, 0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
+     0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717, 0x00181818, 0x00191919, 0x001a1a1a,
+     0x001b1b1b, 0x001c1c1c, 0x001d1d1d, 0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
+     0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929, 0x002a2a2a, 0x002b2b2b, 0x002c2c2c,
+     0x002d2d2d, 0x002e2e2e, 0x002f2f2f, 0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
+     0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b, 0x003c3c3c, 0x003d3d3d, 0x003e3e3e,
+     0x003f3f3f, 0x00404040, 0x00414141, 0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
+     0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d, 0x004e4e4e, 0x004f4f4f, 0x00505050,
+     0x00515151, 0x00525252, 0x00535353, 0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
+     0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f, 0x00606060, 0x00616161, 0x00626262,
+     0x00636363, 0x00646464, 0x00656565, 0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
+     0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171, 0x00727272, 0x00737373, 0x00747474,
+     0x00757575, 0x00767676, 0x00777777, 0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
+     0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383, 0x00848484, 0x00858585, 0x00868686,
+     0x00878787, 0x00888888, 0x00898989, 0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
+     0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595, 0x00969696, 0x00979797, 0x00989898,
+     0x00999999, 0x009a9a9a, 0x009b9b9b, 0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
+     0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7, 0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa,
+     0x00ababab, 0x00acacac, 0x00adadad, 0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
+     0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9, 0x00bababa, 0x00bbbbbb, 0x00bcbcbc,
+     0x00bdbdbd, 0x00bebebe, 0x00bfbfbf, 0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
+     0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb, 0x00cccccc, 0x00cdcdcd, 0x00cecece,
+     0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1, 0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
+     0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd, 0x00dedede, 0x00dfdfdf, 0x00e0e0e0,
+     0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3, 0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
+     0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef, 0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2,
+     0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5, 0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
+     0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff}};
+
+static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
+    return kMultTables[!inverse][a];
+}
+
+#else
+
+static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
+    return inverse ? (255u << MFIX) / a : a * KINV_255;
+}
+
+#endif // USE_TABLES_FOR_ALPHA_MULT
+
+void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
+    int x;
+    for (x = 0; x < width; ++x) {
+        const uint32_t argb = ptr[x];
+        if (argb < 0xff000000u) {      // alpha < 255
+            if (argb <= 0x00ffffffu) { // alpha == 0
+                ptr[x] = 0;
+            } else {
+                const uint32_t alpha = (argb >> 24) & 0xff;
+                const uint32_t scale = GetScale(alpha, inverse);
+                uint32_t out = argb & 0xff000000u;
+                out |= Mult(argb >> 0, scale) << 0;
+                out |= Mult(argb >> 8, scale) << 8;
+                out |= Mult(argb >> 16, scale) << 16;
+                ptr[x] = out;
+            }
+        }
+    }
+}
+
+void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha, int width, int inverse) {
+    int x;
+    for (x = 0; x < width; ++x) {
+        const uint32_t a = alpha[x];
+        if (a != 255) {
+            if (a == 0) {
+                ptr[x] = 0;
+            } else {
+                const uint32_t scale = GetScale(a, inverse);
+                ptr[x] = Mult(ptr[x], scale);
+            }
+        }
+    }
+}
+
+#undef KINV_255
+#undef HALF
+#undef MFIX
+
+void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
+void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, int width, int inverse);
+
+//------------------------------------------------------------------------------
+// Generic per-plane calls
+
+void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, int inverse) {
+    int n;
+    for (n = 0; n < num_rows; ++n) {
+        WebPMultARGBRow((uint32_t*)ptr, width, inverse);
+        ptr += stride;
+    }
+}
+
+void WebPMultRows(
+    uint8_t* ptr, int stride, const uint8_t* alpha, int alpha_stride, int width, int num_rows, int inverse) {
+    int n;
+    for (n = 0; n < num_rows; ++n) {
+        WebPMultRow(ptr, alpha, width, inverse);
+        ptr += stride;
+        alpha += alpha_stride;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Premultiplied modes
+
+// non dithered-modes
+
+// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
+// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
+// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
+#if 1 // (int)(x * a / 255.)
+#define MULTIPLIER(a) ((a)*32897U)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+#else // (int)(x * a / 255. + .5)
+#define MULTIPLIER(a) ((a)*65793U)
+#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
+#endif
+
+static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first, int w, int h, int stride) {
+    while (h-- > 0) {
+        uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+        const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+        int i;
+        for (i = 0; i < w; ++i) {
+            const uint32_t a = alpha[4 * i];
+            if (a != 0xff) {
+                const uint32_t mult = MULTIPLIER(a);
+                rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+                rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+                rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+            }
+        }
+        rgba += stride;
+    }
+}
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+// rgbA4444
+
+#define MULTIPLIER(a) ((a)*0x1111) // 0x1111 ~= (1 << 16) / 15
+
+static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
+    return (x & 0xf0) | (x >> 4);
+}
+
+static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
+    return (x & 0x0f) | (x << 4);
+}
+
+static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
+    return (x * m) >> 16;
+}
+
+static WEBP_INLINE void ApplyAlphaMultiply4444(
+    uint8_t* rgba4444, int w, int h, int stride, int rg_byte_pos /* 0 or 1 */) {
+    while (h-- > 0) {
+        int i;
+        for (i = 0; i < w; ++i) {
+            const uint32_t rg = rgba4444[2 * i + rg_byte_pos];
+            const uint32_t ba = rgba4444[2 * i + (rg_byte_pos ^ 1)];
+            const uint8_t a = ba & 0x0f;
+            const uint32_t mult = MULTIPLIER(a);
+            const uint8_t r = multiply(dither_hi(rg), mult);
+            const uint8_t g = multiply(dither_lo(rg), mult);
+            const uint8_t b = multiply(dither_hi(ba), mult);
+            rgba4444[2 * i + rg_byte_pos] = (r & 0xf0) | ((g >> 4) & 0x0f);
+            rgba4444[2 * i + (rg_byte_pos ^ 1)] = (b & 0xf0) | a;
+        }
+        rgba4444 += stride;
+    }
+}
+#undef MULTIPLIER
+
+static void ApplyAlphaMultiply_16b(uint8_t* rgba4444, int w, int h, int stride) {
+#ifdef WEBP_SWAP_16BIT_CSP
+    ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
+#else
+    ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
+#endif
+}
+
+static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, int width, int height, uint8_t* dst, int dst_stride) {
+    uint32_t alpha_mask = 0xff;
+    int i, j;
+
+    for (j = 0; j < height; ++j) {
+        for (i = 0; i < width; ++i) {
+            const uint32_t alpha_value = alpha[i];
+            dst[4 * i] = alpha_value;
+            alpha_mask &= alpha_value;
+        }
+        alpha += alpha_stride;
+        dst += dst_stride;
+    }
+
+    return (alpha_mask != 0xff);
+}
+
+static void DispatchAlphaToGreen(
+    const uint8_t* alpha, int alpha_stride, int width, int height, uint32_t* dst, int dst_stride) {
+    int i, j;
+    for (j = 0; j < height; ++j) {
+        for (i = 0; i < width; ++i) {
+            dst[i] = alpha[i] << 8; // leave A/R/B channels zero'd.
+        }
+        alpha += alpha_stride;
+        dst += dst_stride;
+    }
+}
+
+static int ExtractAlpha(const uint8_t* argb, int argb_stride, int width, int height, uint8_t* alpha, int alpha_stride) {
+    uint8_t alpha_mask = 0xff;
+    int i, j;
+
+    for (j = 0; j < height; ++j) {
+        for (i = 0; i < width; ++i) {
+            const uint8_t alpha_value = argb[4 * i];
+            alpha[i] = alpha_value;
+            alpha_mask &= alpha_value;
+        }
+        argb += argb_stride;
+        alpha += alpha_stride;
+    }
+    return (alpha_mask == 0xff);
+}
+
+void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
+void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
+int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
+void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
+int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
+
+//------------------------------------------------------------------------------
+// Init function
+
+extern void WebPInitAlphaProcessingMIPSdspR2(void);
+extern void WebPInitAlphaProcessingSSE2(void);
+extern void WebPInitAlphaProcessingSSE41(void);
+
+static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used = (VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
+    if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    WebPMultARGBRow = WebPMultARGBRowC;
+    WebPMultRow = WebPMultRowC;
+    WebPApplyAlphaMultiply = ApplyAlphaMultiply;
+    WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
+    WebPDispatchAlpha = DispatchAlpha;
+    WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
+    WebPExtractAlpha = ExtractAlpha;
+
+    // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            WebPInitAlphaProcessingSSE2();
+#if defined(WEBP_USE_SSE41)
+            if (VP8GetCPUInfo(kSSE4_1)) {
+                WebPInitAlphaProcessingSSE41();
+            }
+#endif
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            WebPInitAlphaProcessingMIPSdspR2();
+        }
+#endif
+    }
+    alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing_mips_dsp_r2.c
new file mode 100644
index 0000000000..46f00ec09b
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing_mips_dsp_r2.c
@@ -0,0 +1,132 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
+//            Djordje Pesut  (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, int width, int height, uint8_t* dst, int dst_stride) {
+    uint32_t alpha_mask = 0xffffffff;
+    int i, j, temp0;
+
+    for (j = 0; j < height; ++j) {
+        uint8_t* pdst = dst;
+        const uint8_t* palpha = alpha;
+        for (i = 0; i < (width >> 2); ++i) {
+            int temp1, temp2, temp3;
+
+            __asm__ volatile(
+                "ulw    %[temp0],      0(%[palpha])                \n\t"
+                "addiu  %[palpha],     %[palpha],     4            \n\t"
+                "addiu  %[pdst],       %[pdst],       16           \n\t"
+                "srl    %[temp1],      %[temp0],      8            \n\t"
+                "srl    %[temp2],      %[temp0],      16           \n\t"
+                "srl    %[temp3],      %[temp0],      24           \n\t"
+                "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
+                "sb     %[temp0],      -16(%[pdst])                \n\t"
+                "sb     %[temp1],      -12(%[pdst])                \n\t"
+                "sb     %[temp2],      -8(%[pdst])                 \n\t"
+                "sb     %[temp3],      -4(%[pdst])                 \n\t"
+                : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+                  [palpha] "+r"(palpha), [pdst] "+r"(pdst), [alpha_mask] "+r"(alpha_mask)
+                :
+                : "memory");
+        }
+
+        for (i = 0; i < (width & 3); ++i) {
+            __asm__ volatile(
+                "lbu    %[temp0],      0(%[palpha])                \n\t"
+                "addiu  %[palpha],     %[palpha],     1            \n\t"
+                "sb     %[temp0],      0(%[pdst])                  \n\t"
+                "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
+                "addiu  %[pdst],       %[pdst],       4            \n\t"
+                : [temp0] "=&r"(temp0), [palpha] "+r"(palpha), [pdst] "+r"(pdst), [alpha_mask] "+r"(alpha_mask)
+                :
+                : "memory");
+        }
+        alpha += alpha_stride;
+        dst += dst_stride;
+    }
+
+    __asm__ volatile(
+        "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
+        "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
+        "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
+        "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
+        "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
+        "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
+        : [temp0] "=&r"(temp0), [alpha_mask] "+r"(alpha_mask)
+        :);
+
+    return (alpha_mask != 0xff);
+}
+
+static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
+    int x;
+    const uint32_t c_00ffffff = 0x00ffffffu;
+    const uint32_t c_ff000000 = 0xff000000u;
+    const uint32_t c_8000000 = 0x00800000u;
+    const uint32_t c_8000080 = 0x00800080u;
+    for (x = 0; x < width; ++x) {
+        const uint32_t argb = ptr[x];
+        if (argb < 0xff000000u) {      // alpha < 255
+            if (argb <= 0x00ffffffu) { // alpha == 0
+                ptr[x] = 0;
+            } else {
+                int temp0, temp1, temp2, temp3, alpha;
+                __asm__ volatile(
+                    "srl          %[alpha],   %[argb],       24                \n\t"
+                    "replv.qb     %[temp0],   %[alpha]                         \n\t"
+                    "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
+                    "beqz         %[inverse], 0f                               \n\t"
+                    "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
+                    "mflo         %[temp0]                                     \n\t"
+                    "0:                                                          \n\t"
+                    "andi         %[temp1],   %[argb],       0xff              \n\t"
+                    "ext          %[temp2],   %[argb],       8,             8  \n\t"
+                    "ext          %[temp3],   %[argb],       16,            8  \n\t"
+                    "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
+                    "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
+                    "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
+                    "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
+                    "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
+                    "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
+                    "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
+                    "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
+                    : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+                      [alpha] "=&r"(alpha)
+                    : [inverse] "r"(inverse), [c_00ffffff] "r"(c_00ffffff), [c_8000000] "r"(c_8000000),
+                      [c_8000080] "r"(c_8000080), [c_ff000000] "r"(c_ff000000), [argb] "r"(argb)
+                    : "memory", "hi", "lo");
+                ptr[x] = temp1;
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitAlphaProcessingMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
+    WebPDispatchAlpha = DispatchAlpha;
+    WebPMultARGBRow = MultARGBRow;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse2.c
new file mode 100644
index 0000000000..857b47dca4
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse2.c
@@ -0,0 +1,288 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>
+
+//------------------------------------------------------------------------------
+
+static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, int width, int height, uint8_t* dst, int dst_stride) {
+    // alpha_and stores an 'and' operation of all the alpha[] values. The final
+    // value is not 0xff if any of the alpha[] is not equal to 0xff.
+    uint32_t alpha_and = 0xff;
+    int i, j;
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i rgb_mask = _mm_set1_epi32(0xffffff00u); // to preserve RGB
+    const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
+    __m128i all_alphas = all_0xff;
+
+    // We must be able to access 3 extra bytes after the last written byte
+    // 'dst[4 * width - 4]', because we don't know if alpha is the first or the
+    // last byte of the quadruplet.
+    const int limit = (width - 1) & ~7;
+
+    for (j = 0; j < height; ++j) {
+        __m128i* out = (__m128i*)dst;
+        for (i = 0; i < limit; i += 8) {
+            // load 8 alpha bytes
+            const __m128i a0 = _mm_loadl_epi64((const __m128i*)&alpha[i]);
+            const __m128i a1 = _mm_unpacklo_epi8(a0, zero);
+            const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
+            const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
+            // load 8 dst pixels (32 bytes)
+            const __m128i b0_lo = _mm_loadu_si128(out + 0);
+            const __m128i b0_hi = _mm_loadu_si128(out + 1);
+            // mask dst alpha values
+            const __m128i b1_lo = _mm_and_si128(b0_lo, rgb_mask);
+            const __m128i b1_hi = _mm_and_si128(b0_hi, rgb_mask);
+            // combine
+            const __m128i b2_lo = _mm_or_si128(b1_lo, a2_lo);
+            const __m128i b2_hi = _mm_or_si128(b1_hi, a2_hi);
+            // store
+            _mm_storeu_si128(out + 0, b2_lo);
+            _mm_storeu_si128(out + 1, b2_hi);
+            // accumulate eight alpha 'and' in parallel
+            all_alphas = _mm_and_si128(all_alphas, a0);
+            out += 2;
+        }
+        for (; i < width; ++i) {
+            const uint32_t alpha_value = alpha[i];
+            dst[4 * i] = alpha_value;
+            alpha_and &= alpha_value;
+        }
+        alpha += alpha_stride;
+        dst += dst_stride;
+    }
+    // Combine the eight alpha 'and' into a 8-bit mask.
+    alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+    return (alpha_and != 0xff);
+}
+
+static void DispatchAlphaToGreen(
+    const uint8_t* alpha, int alpha_stride, int width, int height, uint32_t* dst, int dst_stride) {
+    int i, j;
+    const __m128i zero = _mm_setzero_si128();
+    const int limit = width & ~15;
+    for (j = 0; j < height; ++j) {
+        for (i = 0; i < limit; i += 16) { // process 16 alpha bytes
+            const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
+            const __m128i a1 = _mm_unpacklo_epi8(zero, a0); // note the 'zero' first!
+            const __m128i b1 = _mm_unpackhi_epi8(zero, a0);
+            const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
+            const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero);
+            const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
+            const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero);
+            _mm_storeu_si128((__m128i*)&dst[i + 0], a2_lo);
+            _mm_storeu_si128((__m128i*)&dst[i + 4], a2_hi);
+            _mm_storeu_si128((__m128i*)&dst[i + 8], b2_lo);
+            _mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi);
+        }
+        for (; i < width; ++i) dst[i] = alpha[i] << 8;
+        alpha += alpha_stride;
+        dst += dst_stride;
+    }
+}
+
+static int ExtractAlpha(const uint8_t* argb, int argb_stride, int width, int height, uint8_t* alpha, int alpha_stride) {
+    // alpha_and stores an 'and' operation of all the alpha[] values. The final
+    // value is not 0xff if any of the alpha[] is not equal to 0xff.
+    uint32_t alpha_and = 0xff;
+    int i, j;
+    const __m128i a_mask = _mm_set1_epi32(0xffu); // to preserve alpha
+    const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
+    __m128i all_alphas = all_0xff;
+
+    // We must be able to access 3 extra bytes after the last written byte
+    // 'src[4 * width - 4]', because we don't know if alpha is the first or the
+    // last byte of the quadruplet.
+    const int limit = (width - 1) & ~7;
+
+    for (j = 0; j < height; ++j) {
+        const __m128i* src = (const __m128i*)argb;
+        for (i = 0; i < limit; i += 8) {
+            // load 32 argb bytes
+            const __m128i a0 = _mm_loadu_si128(src + 0);
+            const __m128i a1 = _mm_loadu_si128(src + 1);
+            const __m128i b0 = _mm_and_si128(a0, a_mask);
+            const __m128i b1 = _mm_and_si128(a1, a_mask);
+            const __m128i c0 = _mm_packs_epi32(b0, b1);
+            const __m128i d0 = _mm_packus_epi16(c0, c0);
+            // store
+            _mm_storel_epi64((__m128i*)&alpha[i], d0);
+            // accumulate eight alpha 'and' in parallel
+            all_alphas = _mm_and_si128(all_alphas, d0);
+            src += 2;
+        }
+        for (; i < width; ++i) {
+            const uint32_t alpha_value = argb[4 * i];
+            alpha[i] = alpha_value;
+            alpha_and &= alpha_value;
+        }
+        argb += argb_stride;
+        alpha += alpha_stride;
+    }
+    // Combine the eight alpha 'and' into a 8-bit mask.
+    alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+    return (alpha_and == 0xff);
+}
+
+//------------------------------------------------------------------------------
+// Non-dither premultiplied modes
+
+#define MULTIPLIER(a) ((a)*0x8081)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+
+// We can't use a 'const int' for the SHUFFLE value, because it has to be an
+// immediate in the _mm_shufflexx_epi16() instruction. We really a macro here.
+#define APPLY_ALPHA(RGBX, SHUFFLE, MASK, MULT)                       \
+    do {                                                             \
+        const __m128i argb0 = _mm_loadl_epi64((__m128i*)&(RGBX));    \
+        const __m128i argb1 = _mm_unpacklo_epi8(argb0, zero);        \
+        const __m128i alpha0 = _mm_and_si128(argb1, MASK);           \
+        const __m128i alpha1 = _mm_shufflelo_epi16(alpha0, SHUFFLE); \
+        const __m128i alpha2 = _mm_shufflehi_epi16(alpha1, SHUFFLE); \
+        /* alpha2 = [0 a0 a0 a0][0 a1 a1 a1] */                      \
+        const __m128i scale0 = _mm_mullo_epi16(alpha2, MULT);        \
+        const __m128i scale1 = _mm_mulhi_epu16(alpha2, MULT);        \
+        const __m128i argb2 = _mm_mulhi_epu16(argb1, scale0);        \
+        const __m128i argb3 = _mm_mullo_epi16(argb1, scale1);        \
+        const __m128i argb4 = _mm_adds_epu16(argb2, argb3);          \
+        const __m128i argb5 = _mm_srli_epi16(argb4, 7);              \
+        const __m128i argb6 = _mm_or_si128(argb5, alpha0);           \
+        const __m128i argb7 = _mm_packus_epi16(argb6, zero);         \
+        _mm_storel_epi64((__m128i*)&(RGBX), argb7);                  \
+    } while (0)
+
+static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first, int w, int h, int stride) {
+    const __m128i zero = _mm_setzero_si128();
+    const int kSpan = 2;
+    const int w2 = w & ~(kSpan - 1);
+    while (h-- > 0) {
+        uint32_t* const rgbx = (uint32_t*)rgba;
+        int i;
+        if (!alpha_first) {
+            const __m128i kMask = _mm_set_epi16(0xff, 0, 0, 0, 0xff, 0, 0, 0);
+            const __m128i kMult = _mm_set_epi16(0, 0x8081, 0x8081, 0x8081, 0, 0x8081, 0x8081, 0x8081);
+            for (i = 0; i < w2; i += kSpan) {
+                APPLY_ALPHA(rgbx[i], _MM_SHUFFLE(0, 3, 3, 3), kMask, kMult);
+            }
+        } else {
+            const __m128i kMask = _mm_set_epi16(0, 0, 0, 0xff, 0, 0, 0, 0xff);
+            const __m128i kMult = _mm_set_epi16(0x8081, 0x8081, 0x8081, 0, 0x8081, 0x8081, 0x8081, 0);
+            for (i = 0; i < w2; i += kSpan) {
+                APPLY_ALPHA(rgbx[i], _MM_SHUFFLE(0, 0, 0, 3), kMask, kMult);
+            }
+        }
+        // Finish with left-overs.
+        for (; i < w; ++i) {
+            uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+            const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+            const uint32_t a = alpha[4 * i];
+            if (a != 0xff) {
+                const uint32_t mult = MULTIPLIER(a);
+                rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+                rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+                rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+            }
+        }
+        rgba += stride;
+    }
+}
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+// -----------------------------------------------------------------------------
+// Apply alpha value to rows
+
+// We use: kINV255 = (1 << 24) / 255 = 0x010101
+// So: a * kINV255 = (a << 16) | [(a << 8) | a]
+// -> _mm_mulhi_epu16() takes care of the (a<<16) part,
+// and _mm_mullo_epu16(a * 0x0101,...) takes care of the "(a << 8) | a" one.
+
+static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
+    int x = 0;
+    if (!inverse) {
+        const int kSpan = 2;
+        const __m128i zero = _mm_setzero_si128();
+        const __m128i kRound = _mm_set_epi16(0, 1 << 7, 1 << 7, 1 << 7, 0, 1 << 7, 1 << 7, 1 << 7);
+        const __m128i kMult = _mm_set_epi16(0, 0x0101, 0x0101, 0x0101, 0, 0x0101, 0x0101, 0x0101);
+        const __m128i kOne64 = _mm_set_epi16(1u << 8, 0, 0, 0, 1u << 8, 0, 0, 0);
+        const int w2 = width & ~(kSpan - 1);
+        for (x = 0; x < w2; x += kSpan) {
+            const __m128i argb0 = _mm_loadl_epi64((__m128i*)&ptr[x]);
+            const __m128i argb1 = _mm_unpacklo_epi8(argb0, zero);
+            const __m128i tmp0 = _mm_shufflelo_epi16(argb1, _MM_SHUFFLE(3, 3, 3, 3));
+            const __m128i tmp1 = _mm_shufflehi_epi16(tmp0, _MM_SHUFFLE(3, 3, 3, 3));
+            const __m128i tmp2 = _mm_srli_epi64(tmp1, 16);
+            const __m128i scale0 = _mm_mullo_epi16(tmp1, kMult);
+            const __m128i scale1 = _mm_or_si128(tmp2, kOne64);
+            const __m128i argb2 = _mm_mulhi_epu16(argb1, scale0);
+            const __m128i argb3 = _mm_mullo_epi16(argb1, scale1);
+            const __m128i argb4 = _mm_adds_epu16(argb2, argb3);
+            const __m128i argb5 = _mm_adds_epu16(argb4, kRound);
+            const __m128i argb6 = _mm_srli_epi16(argb5, 8);
+            const __m128i argb7 = _mm_packus_epi16(argb6, zero);
+            _mm_storel_epi64((__m128i*)&ptr[x], argb7);
+        }
+    }
+    width -= x;
+    if (width > 0) WebPMultARGBRowC(ptr + x, width, inverse);
+}
+
+static void MultRow(uint8_t* const ptr, const uint8_t* const alpha, int width, int inverse) {
+    int x = 0;
+    if (!inverse) {
+        const int kSpan = 8;
+        const __m128i zero = _mm_setzero_si128();
+        const __m128i kRound = _mm_set1_epi16(1 << 7);
+        const int w2 = width & ~(kSpan - 1);
+        for (x = 0; x < w2; x += kSpan) {
+            const __m128i v0 = _mm_loadl_epi64((__m128i*)&ptr[x]);
+            const __m128i v1 = _mm_unpacklo_epi8(v0, zero);
+            const __m128i alpha0 = _mm_loadl_epi64((const __m128i*)&alpha[x]);
+            const __m128i alpha1 = _mm_unpacklo_epi8(alpha0, zero);
+            const __m128i alpha2 = _mm_unpacklo_epi8(alpha0, alpha0);
+            const __m128i v2 = _mm_mulhi_epu16(v1, alpha2);
+            const __m128i v3 = _mm_mullo_epi16(v1, alpha1);
+            const __m128i v4 = _mm_adds_epu16(v2, v3);
+            const __m128i v5 = _mm_adds_epu16(v4, kRound);
+            const __m128i v6 = _mm_srli_epi16(v5, 8);
+            const __m128i v7 = _mm_packus_epi16(v6, zero);
+            _mm_storel_epi64((__m128i*)&ptr[x], v7);
+        }
+    }
+    width -= x;
+    if (width > 0) WebPMultRowC(ptr + x, alpha + x, width, inverse);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitAlphaProcessingSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
+    WebPMultARGBRow = MultARGBRow;
+    WebPMultRow = MultRow;
+    WebPApplyAlphaMultiply = ApplyAlphaMultiply;
+    WebPDispatchAlpha = DispatchAlpha;
+    WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
+    WebPExtractAlpha = ExtractAlpha;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse41.c b/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse41.c
new file mode 100644
index 0000000000..5867f36f20
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/alpha_processing_sse41.c
@@ -0,0 +1,86 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel, SSE4.1 variant.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+
+#include <smmintrin.h>
+
+//------------------------------------------------------------------------------
+
+static int ExtractAlpha(const uint8_t* argb, int argb_stride, int width, int height, uint8_t* alpha, int alpha_stride) {
+    // alpha_and stores an 'and' operation of all the alpha[] values. The final
+    // value is not 0xff if any of the alpha[] is not equal to 0xff.
+    uint32_t alpha_and = 0xff;
+    int i, j;
+    const __m128i all_0xff = _mm_set1_epi32(~0u);
+    __m128i all_alphas = all_0xff;
+
+    // We must be able to access 3 extra bytes after the last written byte
+    // 'src[4 * width - 4]', because we don't know if alpha is the first or the
+    // last byte of the quadruplet.
+    const int limit = (width - 1) & ~15;
+    const __m128i kCstAlpha0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0);
+    const __m128i kCstAlpha1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0, -1, -1, -1, -1);
+    const __m128i kCstAlpha2 = _mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+    const __m128i kCstAlpha3 = _mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+    for (j = 0; j < height; ++j) {
+        const __m128i* src = (const __m128i*)argb;
+        for (i = 0; i < limit; i += 16) {
+            // load 64 argb bytes
+            const __m128i a0 = _mm_loadu_si128(src + 0);
+            const __m128i a1 = _mm_loadu_si128(src + 1);
+            const __m128i a2 = _mm_loadu_si128(src + 2);
+            const __m128i a3 = _mm_loadu_si128(src + 3);
+            const __m128i b0 = _mm_shuffle_epi8(a0, kCstAlpha0);
+            const __m128i b1 = _mm_shuffle_epi8(a1, kCstAlpha1);
+            const __m128i b2 = _mm_shuffle_epi8(a2, kCstAlpha2);
+            const __m128i b3 = _mm_shuffle_epi8(a3, kCstAlpha3);
+            const __m128i c0 = _mm_or_si128(b0, b1);
+            const __m128i c1 = _mm_or_si128(b2, b3);
+            const __m128i d0 = _mm_or_si128(c0, c1);
+            // store
+            _mm_storeu_si128((__m128i*)&alpha[i], d0);
+            // accumulate sixteen alpha 'and' in parallel
+            all_alphas = _mm_and_si128(all_alphas, d0);
+            src += 4;
+        }
+        for (; i < width; ++i) {
+            const uint32_t alpha_value = argb[4 * i];
+            alpha[i] = alpha_value;
+            alpha_and &= alpha_value;
+        }
+        argb += argb_stride;
+        alpha += alpha_stride;
+    }
+    // Combine the sixteen alpha 'and' into an 8-bit mask.
+    alpha_and |= 0xff00u; // pretend the upper bits [8..15] were tested ok.
+    alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+    return (alpha_and == 0xffffu);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitAlphaProcessingSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
+    WebPExtractAlpha = ExtractAlpha;
+}
+
+#else // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingSSE41)
+
+#endif // WEBP_USE_SSE41
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/argb.c b/codec/L2/demos/webpEnc/host/src/dsp/argb.c
new file mode 100644
index 0000000000..38be7a2149
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/argb.c
@@ -0,0 +1,63 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   ARGB making functions.
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
+    return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
+}
+
+static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, uint32_t* out) {
+    int i;
+    for (i = 0; i < len; ++i) {
+        out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
+    }
+}
+
+static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, int step, uint32_t* out) {
+    int i, offset = 0;
+    for (i = 0; i < len; ++i) {
+        out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
+        offset += step;
+    }
+}
+
+void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*, const uint8_t*, int, uint32_t*);
+void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*, int, int, uint32_t*);
+
+extern void VP8EncDspARGBInitMIPSdspR2(void);
+extern void VP8EncDspARGBInitSSE2(void);
+
+static volatile VP8CPUInfo argb_last_cpuinfo_used = (VP8CPUInfo)&argb_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
+    if (argb_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    VP8PackARGB = PackARGB;
+    VP8PackRGB = PackRGB;
+
+    // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            VP8EncDspARGBInitSSE2();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            VP8EncDspARGBInitMIPSdspR2();
+        }
+#endif
+    }
+    argb_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/argb_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/argb_mips_dsp_r2.c
new file mode 100644
index 0000000000..e886bf87ca
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/argb_mips_dsp_r2.c
@@ -0,0 +1,103 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   ARGB making functions (mips version).
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, uint32_t* out) {
+    int temp0, temp1, temp2, temp3, offset;
+    const int rest = len & 1;
+    const uint32_t* const loop_end = out + len - rest;
+    const int step = 4;
+    __asm__ volatile(
+        "xor          %[offset],   %[offset], %[offset]    \n\t"
+        "beq          %[loop_end], %[out],    0f           \n\t"
+        "2:                                                  \n\t"
+        "lbux         %[temp0],    %[offset](%[a])         \n\t"
+        "lbux         %[temp1],    %[offset](%[r])         \n\t"
+        "lbux         %[temp2],    %[offset](%[g])         \n\t"
+        "lbux         %[temp3],    %[offset](%[b])         \n\t"
+        "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+        "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+        "addiu        %[out],      %[out],    4            \n\t"
+        "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+        "sw           %[temp0],    -4(%[out])              \n\t"
+        "addu         %[offset],   %[offset], %[step]      \n\t"
+        "bne          %[loop_end], %[out],    2b           \n\t"
+        "0:                                                  \n\t"
+        "beq          %[rest],     $zero,     1f           \n\t"
+        "lbux         %[temp0],    %[offset](%[a])         \n\t"
+        "lbux         %[temp1],    %[offset](%[r])         \n\t"
+        "lbux         %[temp2],    %[offset](%[g])         \n\t"
+        "lbux         %[temp3],    %[offset](%[b])         \n\t"
+        "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+        "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+        "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+        "sw           %[temp0],    0(%[out])               \n\t"
+        "1:                                                  \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+          [offset] "=&r"(offset), [out] "+&r"(out)
+        : [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step), [loop_end] "r"(loop_end), [rest] "r"(rest)
+        : "memory");
+}
+
+static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, int step, uint32_t* out) {
+    int temp0, temp1, temp2, offset;
+    const int rest = len & 1;
+    const int a = 0xff;
+    const uint32_t* const loop_end = out + len - rest;
+    __asm__ volatile(
+        "xor          %[offset],   %[offset], %[offset]    \n\t"
+        "beq          %[loop_end], %[out],    0f           \n\t"
+        "2:                                                  \n\t"
+        "lbux         %[temp0],    %[offset](%[r])         \n\t"
+        "lbux         %[temp1],    %[offset](%[g])         \n\t"
+        "lbux         %[temp2],    %[offset](%[b])         \n\t"
+        "ins          %[temp0],    %[a],      16,     16   \n\t"
+        "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+        "addiu        %[out],      %[out],    4            \n\t"
+        "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+        "sw           %[temp0],    -4(%[out])              \n\t"
+        "addu         %[offset],   %[offset], %[step]      \n\t"
+        "bne          %[loop_end], %[out],    2b           \n\t"
+        "0:                                                  \n\t"
+        "beq          %[rest],     $zero,     1f           \n\t"
+        "lbux         %[temp0],    %[offset](%[r])         \n\t"
+        "lbux         %[temp1],    %[offset](%[g])         \n\t"
+        "lbux         %[temp2],    %[offset](%[b])         \n\t"
+        "ins          %[temp0],    %[a],      16,     16   \n\t"
+        "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+        "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+        "sw           %[temp0],    0(%[out])               \n\t"
+        "1:                                                  \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [offset] "=&r"(offset), [out] "+&r"(out)
+        : [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step), [loop_end] "r"(loop_end), [rest] "r"(rest)
+        : "memory");
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspARGBInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
+    VP8PackARGB = PackARGB;
+    VP8PackRGB = PackRGB;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8EncDspARGBInitMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/argb_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/argb_sse2.c
new file mode 100644
index 0000000000..d1b9b4d37d
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/argb_sse2.c
@@ -0,0 +1,66 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   ARGB making functions (SSE2 version).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+#include <string.h>
+
+static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
+    return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
+}
+
+static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, uint32_t* out) {
+    if (g == r + 1) { // RGBA input order. Need to swap R and B.
+        int i = 0;
+        const int len_max = len & ~3; // max length processed in main loop
+        const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
+        assert(b == r + 2);
+        assert(a == r + 3);
+        for (; i < len_max; i += 4) {
+            const __m128i A = _mm_loadu_si128((const __m128i*)(r + 4 * i));
+            const __m128i B = _mm_and_si128(A, red_blue_mask);    // R 0 B 0
+            const __m128i C = _mm_andnot_si128(red_blue_mask, A); // 0 G 0 A
+            const __m128i D = _mm_shufflelo_epi16(B, _MM_SHUFFLE(2, 3, 0, 1));
+            const __m128i E = _mm_shufflehi_epi16(D, _MM_SHUFFLE(2, 3, 0, 1));
+            const __m128i F = _mm_or_si128(E, C);
+            _mm_storeu_si128((__m128i*)(out + i), F);
+        }
+        for (; i < len; ++i) {
+            out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
+        }
+    } else {
+        assert(g == b + 1);
+        assert(r == b + 2);
+        assert(a == b + 3);
+        memcpy(out, b, len * 4);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspARGBInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) {
+    VP8PackARGB = PackARGB;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8EncDspARGBInitSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/cost.c b/codec/L2/demos/webpEnc/host/src/dsp/cost.c
new file mode 100644
index 0000000000..b6ceca9667
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/cost.c
@@ -0,0 +1,247 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+#include "../enc/cost.h"
+
+//------------------------------------------------------------------------------
+// Boolean-cost cost table
+
+const uint16_t VP8EntropyCost[256] = {
+    1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216, 1178, 1152, 1110, 1076, 1061, 1024, 1024, 992, 968, 951,
+    939,  911,  896,  878,  871,  854,  838,  820,  811,  794,  786,  768,  768,  752,  740,  732,  720,  709, 704, 690,
+    683,  672,  666,  655,  647,  640,  631,  622,  615,  607,  598,  592,  586,  576,  572,  564,  559,  555, 547, 541,
+    534,  528,  522,  512,  512,  504,  500,  494,  488,  483,  477,  473,  467,  461,  458,  452,  448,  443, 438, 434,
+    427,  424,  419,  415,  410,  406,  403,  399,  394,  390,  384,  384,  377,  374,  370,  366,  362,  359, 355, 351,
+    347,  342,  342,  336,  333,  330,  326,  323,  320,  316,  312,  308,  305,  302,  299,  296,  293,  288, 287, 283,
+    280,  277,  274,  272,  268,  266,  262,  256,  256,  256,  251,  248,  245,  242,  240,  237,  234,  232, 228, 226,
+    223,  221,  218,  216,  214,  211,  208,  205,  203,  201,  198,  196,  192,  191,  188,  187,  183,  181, 179, 176,
+    175,  171,  171,  168,  165,  163,  160,  159,  156,  154,  152,  150,  148,  146,  144,  142,  139,  138, 135, 133,
+    131,  128,  128,  125,  123,  121,  119,  117,  115,  113,  111,  110,  107,  105,  103,  102,  100,  98,  96,  94,
+    92,   91,   89,   86,   86,   83,   82,   80,   77,   76,   74,   73,   71,   69,   67,   66,   64,   63,  61,  59,
+    57,   55,   54,   52,   51,   49,   47,   46,   44,   43,   41,   40,   38,   36,   35,   33,   32,   30,  29,  27,
+    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,   10,   9,    7,    6,    4,    3};
+
+//------------------------------------------------------------------------------
+// Level cost tables
+
+// fixed costs for coding levels, deduce from the coding tree.
+// This is only the part that doesn't depend on the probability state.
+const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
+    0,    256,  256,  256,  256,  432,  618,  630,  731,  640,  640,  828,  901,  948,  1021, 1101, 1174, 1221, 1294,
+    1042, 1085, 1115, 1158, 1202, 1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497, 1540, 1570, 1613, 1280, 1295, 1317,
+    1332, 1358, 1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532, 1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679, 1694,
+    1716, 1731, 1775, 1790, 1812, 1827, 1853, 1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759, 1765, 1774, 1780, 1800,
+    1806, 1815, 1821, 1832, 1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910, 1916, 1925, 1931, 1951, 1957, 1966, 1972,
+    1983, 1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059, 2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132, 2138, 2147,
+    2153, 2178, 2184, 2193, 2199, 2210, 2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283, 2289, 2298, 2304, 2168, 2174,
+    2183, 2189, 2200, 2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273, 2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
+    2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424, 2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500, 2506, 2515, 2521,
+    2541, 2547, 2556, 2562, 2573, 2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651, 2657, 2666, 2672, 2692, 2698, 2707,
+    2713, 2724, 2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572, 2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645, 2651,
+    2660, 2666, 2691, 2697, 2706, 2712, 2723, 2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796, 2802, 2811, 2817, 2840,
+    2846, 2855, 2861, 2872, 2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945, 2951, 2960, 2966, 2991, 2997, 3006, 3012,
+    3023, 3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096, 3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013, 3019, 3028,
+    3034, 3054, 3060, 3069, 3075, 3086, 3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164, 3170, 3179, 3185, 3205, 3211,
+    3220, 3226, 3237, 3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313, 3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
+    3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464, 3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537, 3543, 3552, 3558,
+    2816, 2822, 2831, 2837, 2848, 2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921, 2927, 2936, 2942, 2967, 2973, 2982,
+    2988, 2999, 3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072, 3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148, 3154,
+    3163, 3169, 3189, 3195, 3204, 3210, 3221, 3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299, 3305, 3314, 3320, 3340,
+    3346, 3355, 3361, 3372, 3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289, 3295, 3304, 3310, 3330, 3336, 3345, 3351,
+    3362, 3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440, 3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513, 3519, 3528,
+    3534, 3557, 3563, 3572, 3578, 3589, 3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662, 3668, 3677, 3683, 3708, 3714,
+    3723, 3729, 3740, 3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813, 3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
+    3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734, 3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812, 3818, 3827, 3833,
+    3853, 3859, 3868, 3874, 3885, 3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961, 3967, 3976, 3982, 4002, 4008, 4017,
+    4023, 4034, 4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112, 4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185, 4191,
+    4200, 4206, 4070, 4076, 4085, 4091, 4102, 4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175, 4181, 4190, 4196, 4221,
+    4227, 4236, 4242, 4253, 4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326, 4332, 4341, 4347, 4370, 4376, 4385, 4391,
+    4402, 4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475, 4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553, 4559, 4568,
+    4574, 4594, 4600, 4609, 4615, 4626, 4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547, 3553, 3562, 3568, 3588, 3594,
+    3603, 3609, 3620, 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698, 3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
+    3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847, 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920, 3926, 3935, 3941,
+    3966, 3972, 3981, 3987, 3998, 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071, 4077, 4086, 4092, 3956, 3962, 3971,
+    3977, 3988, 3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061, 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139, 4145,
+    4154, 4160, 4180, 4186, 4195, 4201, 4212, 4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288, 4294, 4303, 4309, 4329,
+    4335, 4344, 4350, 4361, 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439, 4445, 4454, 4460, 4480, 4486, 4495, 4501,
+    4512, 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360, 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433, 4439, 4448,
+    4454, 4479, 4485, 4494, 4500, 4511, 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584, 4590, 4599, 4605, 4628, 4634,
+    4643, 4649, 4660, 4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733, 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+    4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884, 4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801, 4807, 4816, 4822,
+    4842, 4848, 4857, 4863, 4874, 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952, 4958, 4967, 4973, 4993, 4999, 5008,
+    5014, 5025, 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101, 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174, 5180,
+    5189, 5195, 5220, 5226, 5235, 5241, 5252, 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325, 5331, 5340, 5346, 4604,
+    4610, 4619, 4625, 4636, 4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709, 4715, 4724, 4730, 4755, 4761, 4770, 4776,
+    4787, 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860, 4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936, 4942, 4951,
+    4957, 4977, 4983, 4992, 4998, 5009, 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087, 5093, 5102, 5108, 5128, 5134,
+    5143, 5149, 5160, 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077, 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
+    5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228, 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301, 5307, 5316, 5322,
+    5345, 5351, 5360, 5366, 5377, 5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450, 5456, 5465, 5471, 5496, 5502, 5511,
+    5517, 5528, 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601, 5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449, 5455,
+    5464, 5470, 5490, 5496, 5505, 5511, 5522, 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600, 5606, 5615, 5621, 5641,
+    5647, 5656, 5662, 5673, 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749, 5755, 5764, 5770, 5790, 5796, 5805, 5811,
+    5822, 5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900, 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973, 5979, 5988,
+    5994, 5858, 5864, 5873, 5879, 5890, 5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963, 5969, 5978, 5984, 6009, 6015,
+    6024, 6030, 6041, 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114, 6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
+    6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263, 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341, 6347, 6356, 6362,
+    6382, 6388, 6397, 6403, 6414, 6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547, 3553, 3562, 3568, 3588, 3594, 3603,
+    3609, 3620, 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698, 3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771, 3777,
+    3786, 3792, 3815, 3821, 3830, 3836, 3847, 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920, 3926, 3935, 3941, 3966,
+    3972, 3981, 3987, 3998, 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071, 4077, 4086, 4092, 3956, 3962, 3971, 3977,
+    3988, 3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061, 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139, 4145, 4154,
+    4160, 4180, 4186, 4195, 4201, 4212, 4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288, 4294, 4303, 4309, 4329, 4335,
+    4344, 4350, 4361, 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439, 4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
+    4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360, 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433, 4439, 4448, 4454,
+    4479, 4485, 4494, 4500, 4511, 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584, 4590, 4599, 4605, 4628, 4634, 4643,
+    4649, 4660, 4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733, 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811, 4817,
+    4826, 4832, 4852, 4858, 4867, 4873, 4884, 4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801, 4807, 4816, 4822, 4842,
+    4848, 4857, 4863, 4874, 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952, 4958, 4967, 4973, 4993, 4999, 5008, 5014,
+    5025, 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101, 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174, 5180, 5189,
+    5195, 5220, 5226, 5235, 5241, 5252, 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325, 5331, 5340, 5346, 4604, 4610,
+    4619, 4625, 4636, 4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709, 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+    4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860, 4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936, 4942, 4951, 4957,
+    4977, 4983, 4992, 4998, 5009, 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087, 5093, 5102, 5108, 5128, 5134, 5143,
+    5149, 5160, 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077, 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150, 5156,
+    5165, 5171, 5196, 5202, 5211, 5217, 5228, 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301, 5307, 5316, 5322, 5345,
+    5351, 5360, 5366, 5377, 5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450, 5456, 5465, 5471, 5496, 5502, 5511, 5517,
+    5528, 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601, 5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449, 5455, 5464,
+    5470, 5490, 5496, 5505, 5511, 5522, 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600, 5606, 5615, 5621, 5641, 5647,
+    5656, 5662, 5673, 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749, 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
+    5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900, 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973, 5979, 5988, 5994,
+    5858, 5864, 5873, 5879, 5890, 5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963, 5969, 5978, 5984, 6009, 6015, 6024,
+    6030, 6041, 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114, 6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190, 6196,
+    6205, 6211, 6231, 6237, 6246, 6252, 6263, 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341, 6347, 6356, 6362, 6382,
+    6388, 6397, 6403, 6414, 6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335, 5341, 5350, 5356, 5376, 5382, 5391, 5397,
+    5408, 5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486, 5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559, 5565, 5574,
+    5580, 5603, 5609, 5618, 5624, 5635, 5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708, 5714, 5723, 5729, 5754, 5760,
+    5769, 5775, 5786, 5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859, 5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
+    5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849, 5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927, 5933, 5942, 5948,
+    5968, 5974, 5983, 5989, 6000, 6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076, 6082, 6091, 6097, 6117, 6123, 6132,
+    6138, 6149, 6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227, 6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300, 6306,
+    6315, 6321, 6116, 6122, 6131, 6137, 6148, 6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221, 6227, 6236, 6242, 6267,
+    6273, 6282, 6288, 6299, 6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372, 6378, 6387, 6393, 6416, 6422, 6431, 6437,
+    6448, 6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521, 6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599, 6605, 6614,
+    6620, 6640, 6646, 6655, 6661, 6672, 6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589, 6595, 6604, 6610, 6630, 6636,
+    6645, 6651, 6662, 6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740, 6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
+    6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889, 6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962, 6968, 6977, 6983,
+    7008, 7014, 7023, 7029, 7040, 7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113, 7119, 7128, 7134, 6392, 6398, 6407,
+    6413, 6424, 6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497, 6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575, 6581,
+    6590, 6596, 6616, 6622, 6631, 6637, 6648, 6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724, 6730, 6739, 6745, 6765,
+    6771, 6780, 6786, 6797, 6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875, 6881, 6890, 6896, 6916, 6922, 6931, 6937,
+    6948, 6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865, 6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938, 6944, 6953,
+    6959, 6984, 6990, 6999, 7005, 7016, 7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089, 7095, 7104, 7110, 7133, 7139,
+    7148, 7154, 7165, 7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238, 7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
+    7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389, 7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237, 7243, 7252, 7258,
+    7278, 7284, 7293, 7299, 7310, 7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388, 7394, 7403, 7409, 7429, 7435, 7444,
+    7450, 7461, 7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537, 7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610, 7616,
+    7625, 7631, 7656, 7662, 7671, 7677, 7688, 7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761};
+
+//------------------------------------------------------------------------------
+// Tables for level coding
+
+const uint8_t VP8EncBands[16 + 1] = {
+    0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+    0 // sentinel
+};
+
+//------------------------------------------------------------------------------
+// Mode costs
+
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+    int n = res->first;
+    // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+    const int p0 = res->prob[n][ctx0][0];
+    CostArrayPtr const costs = res->costs;
+    const uint16_t* t = costs[n][ctx0];
+    // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+    // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+    // be missing during the loop.
+    int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+
+    if (res->last < 0) {
+        return VP8BitCost(0, p0);
+    }
+    for (; n < res->last; ++n) {
+        const int v = abs(res->coeffs[n]);
+        const int ctx = (v >= 2) ? 2 : v;
+        cost += VP8LevelCost(t, v);
+        t = costs[n + 1][ctx];
+    }
+    // Last coefficient is always non-zero
+    {
+        const int v = abs(res->coeffs[n]);
+        assert(v != 0);
+        cost += VP8LevelCost(t, v);
+        if (n < 15) {
+            const int b = VP8EncBands[n + 1];
+            const int ctx = (v == 1) ? 1 : 2;
+            const int last_p0 = res->prob[b][ctx][0];
+            cost += VP8BitCost(0, last_p0);
+        }
+    }
+    return cost;
+}
+
+static void SetResidualCoeffs(const int16_t* const coeffs, VP8Residual* const res) {
+    int n;
+    res->last = -1;
+    assert(res->first == 0 || coeffs[0] == 0);
+    for (n = 15; n >= 0; --n) {
+        if (coeffs[n]) {
+            res->last = n;
+            break;
+        }
+    }
+    res->coeffs = coeffs;
+}
+
+//------------------------------------------------------------------------------
+// init function
+
+VP8GetResidualCostFunc VP8GetResidualCost;
+VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
+
+extern void VP8EncDspCostInitMIPS32(void);
+extern void VP8EncDspCostInitMIPSdspR2(void);
+extern void VP8EncDspCostInitSSE2(void);
+
+static volatile VP8CPUInfo cost_last_cpuinfo_used = (VP8CPUInfo)&cost_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
+    if (cost_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    VP8GetResidualCost = GetResidualCost;
+    VP8SetResidualCoeffs = SetResidualCoeffs;
+
+    // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_MIPS32)
+        if (VP8GetCPUInfo(kMIPS32)) {
+            VP8EncDspCostInitMIPS32();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            VP8EncDspCostInitMIPSdspR2();
+        }
+#endif
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            VP8EncDspCostInitSSE2();
+        }
+#endif
+    }
+
+    cost_last_cpuinfo_used = VP8GetCPUInfo;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/cost_mips32.c b/codec/L2/demos/webpEnc/host/src/dsp/cost_mips32.c
new file mode 100644
index 0000000000..1c45626eae
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/cost_mips32.c
@@ -0,0 +1,148 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "../enc/cost.h"
+
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+    int temp0, temp1;
+    int v_reg, ctx_reg;
+    int n = res->first;
+    // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+    int p0 = res->prob[n][ctx0][0];
+    CostArrayPtr const costs = res->costs;
+    const uint16_t* t = costs[n][ctx0];
+    // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+    // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+    // be missing during the loop.
+    int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+    const int16_t* res_coeffs = res->coeffs;
+    const int res_last = res->last;
+    const int const_max_level = MAX_VARIABLE_LEVEL;
+    const int const_2 = 2;
+    const uint16_t** p_costs = &costs[n][0];
+    const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs);
+
+    if (res->last < 0) {
+        return VP8BitCost(0, p0);
+    }
+
+    __asm__ volatile(
+        ".set      push                                                        \n\t"
+        ".set      noreorder                                                   \n\t"
+        "subu      %[temp1],        %[res_last],        %[n]                   \n\t"
+        "sll       %[temp0],        %[n],               1                      \n\t"
+        "blez      %[temp1],        2f                                         \n\t"
+        " addu     %[res_coeffs],   %[res_coeffs],      %[temp0]               \n\t"
+        "1:                                                                      \n\t"
+        "lh        %[v_reg],        0(%[res_coeffs])                           \n\t"
+        "addiu     %[n],            %[n],               1                      \n\t"
+        "negu      %[temp0],        %[v_reg]                                   \n\t"
+        "slti      %[temp1],        %[v_reg],           0                      \n\t"
+        "movn      %[v_reg],        %[temp0],           %[temp1]               \n\t"
+        "sltiu     %[temp0],        %[v_reg],           2                      \n\t"
+        "move      %[ctx_reg],      %[v_reg]                                   \n\t"
+        "movz      %[ctx_reg],      %[const_2],         %[temp0]               \n\t"
+        "sll       %[temp1],        %[v_reg],           1                      \n\t"
+        "addu      %[temp1],        %[temp1],           %[VP8LevelFixedCosts]  \n\t"
+        "lhu       %[temp1],        0(%[temp1])                                \n\t"
+        "slt       %[temp0],        %[v_reg],           %[const_max_level]     \n\t"
+        "movz      %[v_reg],        %[const_max_level], %[temp0]               \n\t"
+        "addu      %[cost],         %[cost],            %[temp1]               \n\t"
+        "sll       %[v_reg],        %[v_reg],           1                      \n\t"
+        "sll       %[ctx_reg],      %[ctx_reg],         2                      \n\t"
+        "addu      %[v_reg],        %[v_reg],           %[t]                   \n\t"
+        "lhu       %[temp0],        0(%[v_reg])                                \n\t"
+        "addu      %[p_costs],      %[p_costs],         %[inc_p_costs]         \n\t"
+        "addu      %[t],            %[p_costs],         %[ctx_reg]             \n\t"
+        "addu      %[cost],         %[cost],            %[temp0]               \n\t"
+        "addiu     %[res_coeffs],   %[res_coeffs],      2                      \n\t"
+        "bne       %[n],            %[res_last],        1b                     \n\t"
+        " lw       %[t],            0(%[t])                                    \n\t"
+        "2:                                                                      \n\t"
+        ".set      pop                                                         \n\t"
+        : [cost] "+&r"(cost), [t] "+&r"(t), [n] "+&r"(n), [v_reg] "=&r"(v_reg), [ctx_reg] "=&r"(ctx_reg),
+          [p_costs] "+&r"(p_costs), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [res_coeffs] "+&r"(res_coeffs)
+        : [const_2] "r"(const_2), [const_max_level] "r"(const_max_level), [VP8LevelFixedCosts] "r"(VP8LevelFixedCosts),
+          [res_last] "r"(res_last), [inc_p_costs] "r"(inc_p_costs)
+        : "memory");
+
+    // Last coefficient is always non-zero
+    {
+        const int v = abs(res->coeffs[n]);
+        assert(v != 0);
+        cost += VP8LevelCost(t, v);
+        if (n < 15) {
+            const int b = VP8EncBands[n + 1];
+            const int ctx = (v == 1) ? 1 : 2;
+            const int last_p0 = res->prob[b][ctx][0];
+            cost += VP8BitCost(0, last_p0);
+        }
+    }
+    return cost;
+}
+
+static void SetResidualCoeffs(const int16_t* const coeffs, VP8Residual* const res) {
+    const int16_t* p_coeffs = (int16_t*)coeffs;
+    int temp0, temp1, temp2, n, n1;
+    assert(res->first == 0 || coeffs[0] == 0);
+
+    __asm__ volatile(
+        ".set     push                                      \n\t"
+        ".set     noreorder                                 \n\t"
+        "addiu    %[p_coeffs],   %[p_coeffs],    28         \n\t"
+        "li       %[n],          15                         \n\t"
+        "li       %[temp2],      -1                         \n\t"
+        "0:                                                   \n\t"
+        "ulw      %[temp0],      0(%[p_coeffs])             \n\t"
+        "beqz     %[temp0],      1f                         \n\t"
+#if defined(WORDS_BIGENDIAN)
+        " sll     %[temp1],      %[temp0],       16         \n\t"
+#else
+        " srl     %[temp1],      %[temp0],       16         \n\t"
+#endif
+        "addiu    %[n1],         %[n],           -1         \n\t"
+        "movz     %[temp0],      %[n1],          %[temp1]   \n\t"
+        "movn     %[temp0],      %[n],           %[temp1]   \n\t"
+        "j        2f                                        \n\t"
+        " addiu   %[temp2],      %[temp0],       0          \n\t"
+        "1:                                                   \n\t"
+        "addiu    %[n],          %[n],           -2         \n\t"
+        "bgtz     %[n],          0b                         \n\t"
+        " addiu   %[p_coeffs],   %[p_coeffs],    -4         \n\t"
+        "2:                                                   \n\t"
+        ".set     pop                                       \n\t"
+        : [p_coeffs] "+&r"(p_coeffs), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [n] "=&r"(n),
+          [n1] "=&r"(n1)
+        :
+        : "memory");
+    res->last = temp2;
+    res->coeffs = coeffs;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspCostInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) {
+    VP8GetResidualCost = GetResidualCost;
+    VP8SetResidualCoeffs = SetResidualCoeffs;
+}
+
+#else // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8EncDspCostInitMIPS32)
+
+#endif // WEBP_USE_MIPS32
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/cost_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/cost_mips_dsp_r2.c
new file mode 100644
index 0000000000..7532c9fe4e
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/cost_mips_dsp_r2.c
@@ -0,0 +1,104 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "../enc/cost.h"
+
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+    int temp0, temp1;
+    int v_reg, ctx_reg;
+    int n = res->first;
+    // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+    int p0 = res->prob[n][ctx0][0];
+    CostArrayPtr const costs = res->costs;
+    const uint16_t* t = costs[n][ctx0];
+    // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+    // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+    // be missing during the loop.
+    int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+    const int16_t* res_coeffs = res->coeffs;
+    const int res_last = res->last;
+    const int const_max_level = MAX_VARIABLE_LEVEL;
+    const int const_2 = 2;
+    const uint16_t** p_costs = &costs[n][0];
+    const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs);
+
+    if (res->last < 0) {
+        return VP8BitCost(0, p0);
+    }
+
+    __asm__ volatile(
+        ".set      push                                                     \n\t"
+        ".set      noreorder                                                \n\t"
+        "subu      %[temp1],        %[res_last],        %[n]                \n\t"
+        "blez      %[temp1],        2f                                      \n\t"
+        " nop                                                               \n\t"
+        "1:                                                                   \n\t"
+        "sll       %[temp0],        %[n],               1                   \n\t"
+        "lhx       %[v_reg],        %[temp0](%[res_coeffs])                 \n\t"
+        "addiu     %[n],            %[n],               1                   \n\t"
+        "absq_s.w  %[v_reg],        %[v_reg]                                \n\t"
+        "sltiu     %[temp0],        %[v_reg],           2                   \n\t"
+        "move      %[ctx_reg],      %[v_reg]                                \n\t"
+        "movz      %[ctx_reg],      %[const_2],         %[temp0]            \n\t"
+        "sll       %[temp1],        %[v_reg],           1                   \n\t"
+        "lhx       %[temp1],        %[temp1](%[VP8LevelFixedCosts])         \n\t"
+        "slt       %[temp0],        %[v_reg],           %[const_max_level]  \n\t"
+        "movz      %[v_reg],        %[const_max_level], %[temp0]            \n\t"
+        "addu      %[cost],         %[cost],            %[temp1]            \n\t"
+        "sll       %[v_reg],        %[v_reg],           1                   \n\t"
+        "sll       %[ctx_reg],      %[ctx_reg],         2                   \n\t"
+        "lhx       %[temp0],        %[v_reg](%[t])                          \n\t"
+        "addu      %[p_costs],      %[p_costs],         %[inc_p_costs]      \n\t"
+        "addu      %[t],            %[p_costs],         %[ctx_reg]          \n\t"
+        "addu      %[cost],         %[cost],            %[temp0]            \n\t"
+        "bne       %[n],            %[res_last],        1b                  \n\t"
+        " lw       %[t],            0(%[t])                                 \n\t"
+        "2:                                                                   \n\t"
+        ".set      pop                                                      \n\t"
+        : [cost] "+&r"(cost), [t] "+&r"(t), [n] "+&r"(n), [v_reg] "=&r"(v_reg), [ctx_reg] "=&r"(ctx_reg),
+          [p_costs] "+&r"(p_costs), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1)
+        : [const_2] "r"(const_2), [const_max_level] "r"(const_max_level), [VP8LevelFixedCosts] "r"(VP8LevelFixedCosts),
+          [res_last] "r"(res_last), [res_coeffs] "r"(res_coeffs), [inc_p_costs] "r"(inc_p_costs)
+        : "memory");
+
+    // Last coefficient is always non-zero
+    {
+        const int v = abs(res->coeffs[n]);
+        assert(v != 0);
+        cost += VP8LevelCost(t, v);
+        if (n < 15) {
+            const int b = VP8EncBands[n + 1];
+            const int ctx = (v == 1) ? 1 : 2;
+            const int last_p0 = res->prob[b][ctx][0];
+            cost += VP8BitCost(0, last_p0);
+        }
+    }
+    return cost;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspCostInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void) {
+    VP8GetResidualCost = GetResidualCost;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8EncDspCostInitMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/cost_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/cost_sse2.c
new file mode 100644
index 0000000000..0bb1a912a2
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/cost_sse2.c
@@ -0,0 +1,118 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of cost functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>
+
+#include "../enc/cost.h"
+#include "../enc/vp8enci.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+
+static void SetResidualCoeffsSSE2(const int16_t* const coeffs, VP8Residual* const res) {
+    const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
+    const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
+    // Use SSE2 to compare 16 values with a single instruction.
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i m0 = _mm_packs_epi16(c0, c1);
+    const __m128i m1 = _mm_cmpeq_epi8(m0, zero);
+    // Get the comparison results as a bitmask into 16bits. Negate the mask to get
+    // the position of entries that are not equal to zero. We don't need to mask
+    // out least significant bits according to res->first, since coeffs[0] is 0
+    // if res->first > 0.
+    const uint32_t mask = 0x0000ffffu ^ (uint32_t)_mm_movemask_epi8(m1);
+    // The position of the most significant non-zero bit indicates the position of
+    // the last non-zero value.
+    assert(res->first == 0 || coeffs[0] == 0);
+    res->last = mask ? BitsLog2Floor(mask) : -1;
+    res->coeffs = coeffs;
+}
+
+static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
+    uint8_t levels[16], ctxs[16];
+    uint16_t abs_levels[16];
+    int n = res->first;
+    // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+    const int p0 = res->prob[n][ctx0][0];
+    CostArrayPtr const costs = res->costs;
+    const uint16_t* t = costs[n][ctx0];
+    // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+    // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+    // be missing during the loop.
+    int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+
+    if (res->last < 0) {
+        return VP8BitCost(0, p0);
+    }
+
+    { // precompute clamped levels and contexts, packed to 8b.
+        const __m128i zero = _mm_setzero_si128();
+        const __m128i kCst2 = _mm_set1_epi8(2);
+        const __m128i kCst67 = _mm_set1_epi8(MAX_VARIABLE_LEVEL);
+        const __m128i c0 = _mm_loadu_si128((const __m128i*)&res->coeffs[0]);
+        const __m128i c1 = _mm_loadu_si128((const __m128i*)&res->coeffs[8]);
+        const __m128i D0 = _mm_sub_epi16(zero, c0);
+        const __m128i D1 = _mm_sub_epi16(zero, c1);
+        const __m128i E0 = _mm_max_epi16(c0, D0); // abs(v), 16b
+        const __m128i E1 = _mm_max_epi16(c1, D1);
+        const __m128i F = _mm_packs_epi16(E0, E1);
+        const __m128i G = _mm_min_epu8(F, kCst2);  // context = 0,1,2
+        const __m128i H = _mm_min_epu8(F, kCst67); // clamp_level in [0..67]
+
+        _mm_storeu_si128((__m128i*)&ctxs[0], G);
+        _mm_storeu_si128((__m128i*)&levels[0], H);
+
+        _mm_storeu_si128((__m128i*)&abs_levels[0], E0);
+        _mm_storeu_si128((__m128i*)&abs_levels[8], E1);
+    }
+    for (; n < res->last; ++n) {
+        const int ctx = ctxs[n];
+        const int level = levels[n];
+        const int flevel = abs_levels[n];              // full level
+        cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()
+        t = costs[n + 1][ctx];
+    }
+    // Last coefficient is always non-zero
+    {
+        const int level = levels[n];
+        const int flevel = abs_levels[n];
+        assert(flevel != 0);
+        cost += VP8LevelFixedCosts[flevel] + t[level];
+        if (n < 15) {
+            const int b = VP8EncBands[n + 1];
+            const int ctx = ctxs[n];
+            const int last_p0 = res->prob[b][ctx][0];
+            cost += VP8BitCost(0, last_p0);
+        }
+    }
+    return cost;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspCostInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {
+    VP8SetResidualCoeffs = SetResidualCoeffsSSE2;
+    VP8GetResidualCost = GetResidualCostSSE2;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8EncDspCostInitSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/cpu.c b/codec/L2/demos/webpEnc/host/src/dsp/cpu.c
new file mode 100644
index 0000000000..d32f985e87
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/cpu.c
@@ -0,0 +1,151 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// CPU detection
+//
+// Author: Christian Duvivier (cduvivier@google.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_ANDROID_NEON)
+#include <cpu-features.h>
+#endif
+
+//------------------------------------------------------------------------------
+// SSE2 detection.
+//
+
+// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
+#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
+static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
+    __asm__ volatile(
+        "mov %%ebx, %%edi\n"
+        "cpuid\n"
+        "xchg %%edi, %%ebx\n"
+        : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+        : "a"(info_type), "c"(0));
+}
+#elif defined(__x86_64__) && (defined(__code_model_medium__) || defined(__code_model_large__)) && defined(__PIC__)
+static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
+    __asm__ volatile(
+        "xchg{q}\t{%%rbx}, %q1\n"
+        "cpuid\n"
+        "xchg{q}\t{%%rbx}, %q1\n"
+        : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+        : "a"(info_type), "c"(0));
+}
+#elif defined(__i386__) || defined(__x86_64__)
+static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
+    __asm__ volatile("cpuid\n"
+                     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+                     : "a"(info_type), "c"(0));
+}
+#elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1
+#include <intrin.h>
+#define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0
+#elif defined(WEBP_MSC_SSE2)
+#define GetCPUInfo __cpuid
+#endif
+
+// NaCl has no support for xgetbv or the raw opcode.
+#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
+static WEBP_INLINE uint64_t xgetbv(void) {
+    const uint32_t ecx = 0;
+    uint32_t eax, edx;
+    // Use the raw opcode for xgetbv for compatibility with older toolchains.
+    __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n" : "=a"(eax), "=d"(edx) : "c"(ecx));
+    return ((uint64_t)edx << 32) | eax;
+}
+#elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
+#include <immintrin.h>
+#define xgetbv() _xgetbv(0)
+#elif defined(_MSC_VER) && defined(_M_IX86)
+static WEBP_INLINE uint64_t xgetbv(void) {
+    uint32_t eax_, edx_;
+    __asm {
+    xor ecx, ecx // ecx = 0
+            // Use the raw opcode for xgetbv for compatibility with older toolchains.
+    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
+    mov eax_, eax
+    mov edx_, edx
+    }
+    return ((uint64_t)edx_ << 32) | eax_;
+}
+#else
+#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
+static int x86CPUInfo(CPUFeature feature) {
+    int max_cpuid_value;
+    int cpu_info[4];
+
+    // get the highest feature value cpuid supports
+    GetCPUInfo(cpu_info, 0);
+    max_cpuid_value = cpu_info[0];
+    if (max_cpuid_value < 1) {
+        return 0;
+    }
+
+    GetCPUInfo(cpu_info, 1);
+    if (feature == kSSE2) {
+        return 0 != (cpu_info[3] & 0x04000000);
+    }
+    if (feature == kSSE3) {
+        return 0 != (cpu_info[2] & 0x00000001);
+    }
+    if (feature == kSSE4_1) {
+        return 0 != (cpu_info[2] & 0x00080000);
+    }
+    if (feature == kAVX) {
+        // bits 27 (OSXSAVE) & 28 (256-bit AVX)
+        if ((cpu_info[2] & 0x18000000) == 0x18000000) {
+            // XMM state and YMM state enabled by the OS.
+            return (xgetbv() & 0x6) == 0x6;
+        }
+    }
+    if (feature == kAVX2) {
+        if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
+            GetCPUInfo(cpu_info, 7);
+            return ((cpu_info[1] & 0x00000020) == 0x00000020);
+        }
+    }
+    return 0;
+}
+VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
+#elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test.
+static int AndroidCPUInfo(CPUFeature feature) {
+    const AndroidCpuFamily cpu_family = android_getCpuFamily();
+    const uint64_t cpu_features = android_getCpuFeatures();
+    if (feature == kNEON) {
+        return (cpu_family == ANDROID_CPU_FAMILY_ARM && 0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
+    }
+    return 0;
+}
+VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
+#elif defined(WEBP_USE_NEON)
+// define a dummy function to enable turning off NEON at runtime by setting
+// VP8DecGetCPUInfo = NULL
+static int armCPUInfo(CPUFeature feature) {
+    (void)feature;
+    return 1;
+}
+VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
+#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2)
+static int mipsCPUInfo(CPUFeature feature) {
+    if ((feature == kMIPS32) || (feature == kMIPSdspR2)) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
+#else
+VP8CPUInfo VP8GetCPUInfo = NULL;
+#endif
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/dec.c b/codec/L2/demos/webpEnc/host/src/dsp/dec.c
new file mode 100644
index 0000000000..2d210e7b63
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/dec.c
@@ -0,0 +1,754 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Speed-critical decoding functions, default plain-C implementations.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+#include "../dec/vp8i.h"
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE uint8_t clip_8b(int v) {
+    return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+}
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+#define STORE(x, y, v) dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
+
+#define STORE2(y, dc, d, c)    \
+    do {                       \
+        const int DC = (dc);   \
+        STORE(0, y, DC + (d)); \
+        STORE(1, y, DC + (c)); \
+        STORE(2, y, DC - (c)); \
+        STORE(3, y, DC - (d)); \
+    } while (0)
+
+#define MUL1(a) ((((a)*20091) >> 16) + (a))
+#define MUL2(a) (((a)*35468) >> 16)
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+    int C[4 * 4], *tmp;
+    int i;
+    tmp = C;
+    for (i = 0; i < 4; ++i) {                     // vertical pass
+        const int a = in[0] + in[8];              // [-4096, 4094]
+        const int b = in[0] - in[8];              // [-4095, 4095]
+        const int c = MUL2(in[4]) - MUL1(in[12]); // [-3783, 3783]
+        const int d = MUL1(in[4]) + MUL2(in[12]); // [-3785, 3781]
+        tmp[0] = a + d;                           // [-7881, 7875]
+        tmp[1] = b + c;                           // [-7878, 7878]
+        tmp[2] = b - c;                           // [-7878, 7878]
+        tmp[3] = a - d;                           // [-7877, 7879]
+        tmp += 4;
+        in++;
+    }
+    // Each pass is expanding the dynamic range by ~3.85 (upper bound).
+    // The exact value is (2. + (20091 + 35468) / 65536).
+    // After the second pass, maximum interval is [-3794, 3794], assuming
+    // an input in [-2048, 2047] interval. We then need to add a dst value
+    // in the [0, 255] range.
+    // In the worst case scenario, the input to clip_8b() can be as large as
+    // [-60713, 60968].
+    tmp = C;
+    for (i = 0; i < 4; ++i) { // horizontal pass
+        const int dc = tmp[0] + 4;
+        const int a = dc + tmp[8];
+        const int b = dc - tmp[8];
+        const int c = MUL2(tmp[4]) - MUL1(tmp[12]);
+        const int d = MUL1(tmp[4]) + MUL2(tmp[12]);
+        STORE(0, 0, a + d);
+        STORE(1, 0, b + c);
+        STORE(2, 0, b - c);
+        STORE(3, 0, a - d);
+        tmp++;
+        dst += BPS;
+    }
+}
+
+// Simplified transform when only in[0], in[1] and in[4] are non-zero
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+    const int a = in[0] + 4;
+    const int c4 = MUL2(in[4]);
+    const int d4 = MUL1(in[4]);
+    const int c1 = MUL2(in[1]);
+    const int d1 = MUL1(in[1]);
+    STORE2(0, a + d4, d1, c1);
+    STORE2(1, a + c4, d1, c1);
+    STORE2(2, a - c4, d1, c1);
+    STORE2(3, a - d4, d1, c1);
+}
+#undef MUL1
+#undef MUL2
+#undef STORE2
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+    TransformOne(in, dst);
+    if (do_two) {
+        TransformOne(in + 16, dst + 4);
+    }
+}
+
+static void TransformUV(const int16_t* in, uint8_t* dst) {
+    VP8Transform(in + 0 * 16, dst, 1);
+    VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
+}
+
+static void TransformDC(const int16_t* in, uint8_t* dst) {
+    const int DC = in[0] + 4;
+    int i, j;
+    for (j = 0; j < 4; ++j) {
+        for (i = 0; i < 4; ++i) {
+            STORE(i, j, DC);
+        }
+    }
+}
+
+static void TransformDCUV(const int16_t* in, uint8_t* dst) {
+    if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
+    if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
+    if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
+    if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4);
+}
+
+#undef STORE
+
+//------------------------------------------------------------------------------
+// Paragraph 14.3
+
+static void TransformWHT(const int16_t* in, int16_t* out) {
+    int tmp[16];
+    int i;
+    for (i = 0; i < 4; ++i) {
+        const int a0 = in[0 + i] + in[12 + i];
+        const int a1 = in[4 + i] + in[8 + i];
+        const int a2 = in[4 + i] - in[8 + i];
+        const int a3 = in[0 + i] - in[12 + i];
+        tmp[0 + i] = a0 + a1;
+        tmp[8 + i] = a0 - a1;
+        tmp[4 + i] = a3 + a2;
+        tmp[12 + i] = a3 - a2;
+    }
+    for (i = 0; i < 4; ++i) {
+        const int dc = tmp[0 + i * 4] + 3; // w/ rounder
+        const int a0 = dc + tmp[3 + i * 4];
+        const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
+        const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
+        const int a3 = dc - tmp[3 + i * 4];
+        out[0] = (a0 + a1) >> 3;
+        out[16] = (a3 + a2) >> 3;
+        out[32] = (a0 - a1) >> 3;
+        out[48] = (a3 - a2) >> 3;
+        out += 64;
+    }
+}
+
+void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+#define DST(x, y) dst[(x) + (y)*BPS]
+
+static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
+    const uint8_t* top = dst - BPS;
+    const uint8_t* const clip0 = VP8kclip1 - top[-1];
+    int y;
+    for (y = 0; y < size; ++y) {
+        const uint8_t* const clip = clip0 + dst[-1];
+        int x;
+        for (x = 0; x < size; ++x) {
+            dst[x] = clip[top[x]];
+        }
+        dst += BPS;
+    }
+}
+static void TM4(uint8_t* dst) {
+    TrueMotion(dst, 4);
+}
+static void TM8uv(uint8_t* dst) {
+    TrueMotion(dst, 8);
+}
+static void TM16(uint8_t* dst) {
+    TrueMotion(dst, 16);
+}
+
+//------------------------------------------------------------------------------
+// 16x16
+
+static void VE16(uint8_t* dst) { // vertical
+    int j;
+    for (j = 0; j < 16; ++j) {
+        memcpy(dst + j * BPS, dst - BPS, 16);
+    }
+}
+
+static void HE16(uint8_t* dst) { // horizontal
+    int j;
+    for (j = 16; j > 0; --j) {
+        memset(dst, dst[-1], 16);
+        dst += BPS;
+    }
+}
+
+static WEBP_INLINE void Put16(int v, uint8_t* dst) {
+    int j;
+    for (j = 0; j < 16; ++j) {
+        memset(dst + j * BPS, v, 16);
+    }
+}
+
+static void DC16(uint8_t* dst) { // DC
+    int DC = 16;
+    int j;
+    for (j = 0; j < 16; ++j) {
+        DC += dst[-1 + j * BPS] + dst[j - BPS];
+    }
+    Put16(DC >> 5, dst);
+}
+
+static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
+    int DC = 8;
+    int j;
+    for (j = 0; j < 16; ++j) {
+        DC += dst[-1 + j * BPS];
+    }
+    Put16(DC >> 4, dst);
+}
+
+static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
+    int DC = 8;
+    int i;
+    for (i = 0; i < 16; ++i) {
+        DC += dst[i - BPS];
+    }
+    Put16(DC >> 4, dst);
+}
+
+static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
+    Put16(0x80, dst);
+}
+
+VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
+
+//------------------------------------------------------------------------------
+// 4x4
+
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
+static void VE4(uint8_t* dst) { // vertical
+    const uint8_t* top = dst - BPS;
+    const uint8_t vals[4] = {AVG3(top[-1], top[0], top[1]), AVG3(top[0], top[1], top[2]), AVG3(top[1], top[2], top[3]),
+                             AVG3(top[2], top[3], top[4])};
+    int i;
+    for (i = 0; i < 4; ++i) {
+        memcpy(dst + i * BPS, vals, sizeof(vals));
+    }
+}
+
+static void HE4(uint8_t* dst) { // horizontal
+    const int A = dst[-1 - BPS];
+    const int B = dst[-1];
+    const int C = dst[-1 + BPS];
+    const int D = dst[-1 + 2 * BPS];
+    const int E = dst[-1 + 3 * BPS];
+    WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(A, B, C));
+    WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(B, C, D));
+    WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(C, D, E));
+    WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
+}
+
+static void DC4(uint8_t* dst) { // DC
+    uint32_t dc = 4;
+    int i;
+    for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
+    dc >>= 3;
+    for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
+}
+
+static void RD4(uint8_t* dst) { // Down-right
+    const int I = dst[-1 + 0 * BPS];
+    const int J = dst[-1 + 1 * BPS];
+    const int K = dst[-1 + 2 * BPS];
+    const int L = dst[-1 + 3 * BPS];
+    const int X = dst[-1 - BPS];
+    const int A = dst[0 - BPS];
+    const int B = dst[1 - BPS];
+    const int C = dst[2 - BPS];
+    const int D = dst[3 - BPS];
+    DST(0, 3) = AVG3(J, K, L);
+    DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
+    DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
+    DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
+    DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
+    DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
+    DST(3, 0) = AVG3(D, C, B);
+}
+
+static void LD4(uint8_t* dst) { // Down-Left
+    const int A = dst[0 - BPS];
+    const int B = dst[1 - BPS];
+    const int C = dst[2 - BPS];
+    const int D = dst[3 - BPS];
+    const int E = dst[4 - BPS];
+    const int F = dst[5 - BPS];
+    const int G = dst[6 - BPS];
+    const int H = dst[7 - BPS];
+    DST(0, 0) = AVG3(A, B, C);
+    DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+    DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
+    DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+    DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+    DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+    DST(3, 3) = AVG3(G, H, H);
+}
+
+static void VR4(uint8_t* dst) { // Vertical-Right
+    const int I = dst[-1 + 0 * BPS];
+    const int J = dst[-1 + 1 * BPS];
+    const int K = dst[-1 + 2 * BPS];
+    const int X = dst[-1 - BPS];
+    const int A = dst[0 - BPS];
+    const int B = dst[1 - BPS];
+    const int C = dst[2 - BPS];
+    const int D = dst[3 - BPS];
+    DST(0, 0) = DST(1, 2) = AVG2(X, A);
+    DST(1, 0) = DST(2, 2) = AVG2(A, B);
+    DST(2, 0) = DST(3, 2) = AVG2(B, C);
+    DST(3, 0) = AVG2(C, D);
+
+    DST(0, 3) = AVG3(K, J, I);
+    DST(0, 2) = AVG3(J, I, X);
+    DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+    DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+    DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+    DST(3, 1) = AVG3(B, C, D);
+}
+
+static void VL4(uint8_t* dst) { // Vertical-Left
+    const int A = dst[0 - BPS];
+    const int B = dst[1 - BPS];
+    const int C = dst[2 - BPS];
+    const int D = dst[3 - BPS];
+    const int E = dst[4 - BPS];
+    const int F = dst[5 - BPS];
+    const int G = dst[6 - BPS];
+    const int H = dst[7 - BPS];
+    DST(0, 0) = AVG2(A, B);
+    DST(1, 0) = DST(0, 2) = AVG2(B, C);
+    DST(2, 0) = DST(1, 2) = AVG2(C, D);
+    DST(3, 0) = DST(2, 2) = AVG2(D, E);
+
+    DST(0, 1) = AVG3(A, B, C);
+    DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+    DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+    DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+    DST(3, 2) = AVG3(E, F, G);
+    DST(3, 3) = AVG3(F, G, H);
+}
+
+static void HU4(uint8_t* dst) { // Horizontal-Up
+    const int I = dst[-1 + 0 * BPS];
+    const int J = dst[-1 + 1 * BPS];
+    const int K = dst[-1 + 2 * BPS];
+    const int L = dst[-1 + 3 * BPS];
+    DST(0, 0) = AVG2(I, J);
+    DST(2, 0) = DST(0, 1) = AVG2(J, K);
+    DST(2, 1) = DST(0, 2) = AVG2(K, L);
+    DST(1, 0) = AVG3(I, J, K);
+    DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+    DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+    DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+static void HD4(uint8_t* dst) { // Horizontal-Down
+    const int I = dst[-1 + 0 * BPS];
+    const int J = dst[-1 + 1 * BPS];
+    const int K = dst[-1 + 2 * BPS];
+    const int L = dst[-1 + 3 * BPS];
+    const int X = dst[-1 - BPS];
+    const int A = dst[0 - BPS];
+    const int B = dst[1 - BPS];
+    const int C = dst[2 - BPS];
+
+    DST(0, 0) = DST(2, 1) = AVG2(I, X);
+    DST(0, 1) = DST(2, 2) = AVG2(J, I);
+    DST(0, 2) = DST(2, 3) = AVG2(K, J);
+    DST(0, 3) = AVG2(L, K);
+
+    DST(3, 0) = AVG3(A, B, C);
+    DST(2, 0) = AVG3(X, A, B);
+    DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+    DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+    DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+    DST(1, 3) = AVG3(L, K, J);
+}
+
+#undef DST
+#undef AVG3
+#undef AVG2
+
+VP8PredFunc VP8PredLuma4[NUM_BMODES];
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void VE8uv(uint8_t* dst) { // vertical
+    int j;
+    for (j = 0; j < 8; ++j) {
+        memcpy(dst + j * BPS, dst - BPS, 8);
+    }
+}
+
+static void HE8uv(uint8_t* dst) { // horizontal
+    int j;
+    for (j = 0; j < 8; ++j) {
+        memset(dst, dst[-1], 8);
+        dst += BPS;
+    }
+}
+
+// helper for chroma-DC predictions
+static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
+    int j;
+    for (j = 0; j < 8; ++j) {
+        memset(dst + j * BPS, value, 8);
+    }
+}
+
+static void DC8uv(uint8_t* dst) { // DC
+    int dc0 = 8;
+    int i;
+    for (i = 0; i < 8; ++i) {
+        dc0 += dst[i - BPS] + dst[-1 + i * BPS];
+    }
+    Put8x8uv(dc0 >> 4, dst);
+}
+
+static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
+    int dc0 = 4;
+    int i;
+    for (i = 0; i < 8; ++i) {
+        dc0 += dst[i - BPS];
+    }
+    Put8x8uv(dc0 >> 3, dst);
+}
+
+static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
+    int dc0 = 4;
+    int i;
+    for (i = 0; i < 8; ++i) {
+        dc0 += dst[-1 + i * BPS];
+    }
+    Put8x8uv(dc0 >> 3, dst);
+}
+
+static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
+    Put8x8uv(0x80, dst);
+}
+
+VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
+
+//------------------------------------------------------------------------------
+// Edge filtering functions
+
+// 4 pixels in, 2 pixels out
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
+    const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+    const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892]
+    const int a1 = VP8ksclip2[(a + 4) >> 3];           // in [-16,15]
+    const int a2 = VP8ksclip2[(a + 3) >> 3];
+    p[-step] = VP8kclip1[p0 + a2];
+    p[0] = VP8kclip1[q0 - a1];
+}
+
+// 4 pixels in, 4 pixels out
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
+    const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+    const int a = 3 * (q0 - p0);
+    const int a1 = VP8ksclip2[(a + 4) >> 3];
+    const int a2 = VP8ksclip2[(a + 3) >> 3];
+    const int a3 = (a1 + 1) >> 1;
+    p[-2 * step] = VP8kclip1[p1 + a3];
+    p[-step] = VP8kclip1[p0 + a2];
+    p[0] = VP8kclip1[q0 - a1];
+    p[step] = VP8kclip1[q1 - a3];
+}
+
+// 6 pixels in, 6 pixels out
+static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
+    const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
+    const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
+    const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
+    // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
+    const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
+    const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
+    const int a3 = (9 * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+    p[-3 * step] = VP8kclip1[p2 + a3];
+    p[-2 * step] = VP8kclip1[p1 + a2];
+    p[-step] = VP8kclip1[p0 + a1];
+    p[0] = VP8kclip1[q0 - a1];
+    p[step] = VP8kclip1[q1 - a2];
+    p[2 * step] = VP8kclip1[q2 - a3];
+}
+
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
+    const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+    return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
+}
+
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
+    const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+    return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
+}
+
+static WEBP_INLINE int needs_filter2(const uint8_t* p, int step, int t, int it) {
+    const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
+    const int p0 = p[-step], q0 = p[0];
+    const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
+    if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0;
+    return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it && VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
+           VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+    int i;
+    const int thresh2 = 2 * thresh + 1;
+    for (i = 0; i < 16; ++i) {
+        if (needs_filter(p + i, stride, thresh2)) {
+            do_filter2(p + i, stride);
+        }
+    }
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+    int i;
+    const int thresh2 = 2 * thresh + 1;
+    for (i = 0; i < 16; ++i) {
+        if (needs_filter(p + i * stride, 1, thresh2)) {
+            do_filter2(p + i * stride, 1);
+        }
+    }
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4 * stride;
+        SimpleVFilter16(p, stride, thresh);
+    }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4;
+        SimpleHFilter16(p, stride, thresh);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+static WEBP_INLINE void FilterLoop26(
+    uint8_t* p, int hstride, int vstride, int size, int thresh, int ithresh, int hev_thresh) {
+    const int thresh2 = 2 * thresh + 1;
+    while (size-- > 0) {
+        if (needs_filter2(p, hstride, thresh2, ithresh)) {
+            if (hev(p, hstride, hev_thresh)) {
+                do_filter2(p, hstride);
+            } else {
+                do_filter6(p, hstride);
+            }
+        }
+        p += vstride;
+    }
+}
+
+static WEBP_INLINE void FilterLoop24(
+    uint8_t* p, int hstride, int vstride, int size, int thresh, int ithresh, int hev_thresh) {
+    const int thresh2 = 2 * thresh + 1;
+    while (size-- > 0) {
+        if (needs_filter2(p, hstride, thresh2, ithresh)) {
+            if (hev(p, hstride, hev_thresh)) {
+                do_filter2(p, hstride);
+            } else {
+                do_filter4(p, hstride);
+            }
+        }
+        p += vstride;
+    }
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4 * stride;
+        FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+    }
+}
+
+static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4;
+        FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+    }
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+    FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+    FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+    FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+    FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+//------------------------------------------------------------------------------
+
+VP8DecIdct2 VP8Transform;
+VP8DecIdct VP8TransformAC3;
+VP8DecIdct VP8TransformUV;
+VP8DecIdct VP8TransformDC;
+VP8DecIdct VP8TransformDCUV;
+
+VP8LumaFilterFunc VP8VFilter16;
+VP8LumaFilterFunc VP8HFilter16;
+VP8ChromaFilterFunc VP8VFilter8;
+VP8ChromaFilterFunc VP8HFilter8;
+VP8LumaFilterFunc VP8VFilter16i;
+VP8LumaFilterFunc VP8HFilter16i;
+VP8ChromaFilterFunc VP8VFilter8i;
+VP8ChromaFilterFunc VP8HFilter8i;
+VP8SimpleFilterFunc VP8SimpleVFilter16;
+VP8SimpleFilterFunc VP8SimpleHFilter16;
+VP8SimpleFilterFunc VP8SimpleVFilter16i;
+VP8SimpleFilterFunc VP8SimpleHFilter16i;
+
+extern void VP8DspInitSSE2(void);
+extern void VP8DspInitSSE41(void);
+extern void VP8DspInitNEON(void);
+extern void VP8DspInitMIPS32(void);
+extern void VP8DspInitMIPSdspR2(void);
+
+static volatile VP8CPUInfo dec_last_cpuinfo_used = (VP8CPUInfo)&dec_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
+    if (dec_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    VP8InitClipTables();
+
+    VP8TransformWHT = TransformWHT;
+    VP8Transform = TransformTwo;
+    VP8TransformUV = TransformUV;
+    VP8TransformDC = TransformDC;
+    VP8TransformDCUV = TransformDCUV;
+    VP8TransformAC3 = TransformAC3;
+
+    VP8VFilter16 = VFilter16;
+    VP8HFilter16 = HFilter16;
+    VP8VFilter8 = VFilter8;
+    VP8HFilter8 = HFilter8;
+    VP8VFilter16i = VFilter16i;
+    VP8HFilter16i = HFilter16i;
+    VP8VFilter8i = VFilter8i;
+    VP8HFilter8i = HFilter8i;
+    VP8SimpleVFilter16 = SimpleVFilter16;
+    VP8SimpleHFilter16 = SimpleHFilter16;
+    VP8SimpleVFilter16i = SimpleVFilter16i;
+    VP8SimpleHFilter16i = SimpleHFilter16i;
+
+    VP8PredLuma4[0] = DC4;
+    VP8PredLuma4[1] = TM4;
+    VP8PredLuma4[2] = VE4;
+    VP8PredLuma4[3] = HE4;
+    VP8PredLuma4[4] = RD4;
+    VP8PredLuma4[5] = VR4;
+    VP8PredLuma4[6] = LD4;
+    VP8PredLuma4[7] = VL4;
+    VP8PredLuma4[8] = HD4;
+    VP8PredLuma4[9] = HU4;
+
+    VP8PredLuma16[0] = DC16;
+    VP8PredLuma16[1] = TM16;
+    VP8PredLuma16[2] = VE16;
+    VP8PredLuma16[3] = HE16;
+    VP8PredLuma16[4] = DC16NoTop;
+    VP8PredLuma16[5] = DC16NoLeft;
+    VP8PredLuma16[6] = DC16NoTopLeft;
+
+    VP8PredChroma8[0] = DC8uv;
+    VP8PredChroma8[1] = TM8uv;
+    VP8PredChroma8[2] = VE8uv;
+    VP8PredChroma8[3] = HE8uv;
+    VP8PredChroma8[4] = DC8uvNoTop;
+    VP8PredChroma8[5] = DC8uvNoLeft;
+    VP8PredChroma8[6] = DC8uvNoTopLeft;
+
+    // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            VP8DspInitSSE2();
+#if defined(WEBP_USE_SSE41)
+            if (VP8GetCPUInfo(kSSE4_1)) {
+                VP8DspInitSSE41();
+            }
+#endif
+        }
+#endif
+#if defined(WEBP_USE_NEON)
+        if (VP8GetCPUInfo(kNEON)) {
+            VP8DspInitNEON();
+        }
+#endif
+#if defined(WEBP_USE_MIPS32)
+        if (VP8GetCPUInfo(kMIPS32)) {
+            VP8DspInitMIPS32();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            VP8DspInitMIPSdspR2();
+        }
+#endif
+    }
+    dec_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/dec_clip_tables.c b/codec/L2/demos/webpEnc/host/src/dsp/dec_clip_tables.c
new file mode 100644
index 0000000000..6f4a2cabf0
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/dec_clip_tables.c
@@ -0,0 +1,254 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Clipping tables for filtering
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#define USE_STATIC_TABLES // undefine to have run-time table initialization
+
+#ifdef USE_STATIC_TABLES
+
+static const uint8_t abs0[255 + 255 + 1] = {
+    0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed,
+    0xec, 0xeb, 0xea, 0xe9, 0xe8, 0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, 0xdd, 0xdc, 0xdb, 0xda,
+    0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0, 0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7,
+    0xc6, 0xc5, 0xc4, 0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8, 0xb7, 0xb6, 0xb5, 0xb4,
+    0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac, 0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1,
+    0xa0, 0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e,
+    0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c, 0x7b,
+    0x7a, 0x79, 0x78, 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70, 0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68,
+    0x67, 0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58, 0x57, 0x56, 0x55,
+    0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42,
+    0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, 0x2f,
+    0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c,
+    0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09,
+    0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
+    0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
+    0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+    0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43,
+    0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
+    0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c,
+    0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2,
+    0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5,
+    0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+    0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee,
+    0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};
+
+static const int8_t sclip1[1020 + 1020 + 1] = {
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6,
+    0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9,
+    0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc,
+    0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2,
+    0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+    0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
+    0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e,
+    0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51,
+    0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64,
+    0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+    0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f};
+
+static const int8_t sclip2[112 + 112 + 1] = {
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+    0xf0, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01,
+    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
+
+static const uint8_t clip1[255 + 511 + 1] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
+    0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
+    0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+    0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43,
+    0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
+    0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c,
+    0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2,
+    0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5,
+    0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+    0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee,
+    0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+
+#else
+
+// uninitialized tables
+static uint8_t abs0[255 + 255 + 1];
+static int8_t sclip1[1020 + 1020 + 1];
+static int8_t sclip2[112 + 112 + 1];
+static uint8_t clip1[255 + 511 + 1];
+
+// We declare this variable 'volatile' to prevent instruction reordering
+// and make sure it's set to true _last_ (so as to be thread-safe)
+static volatile int tables_ok = 0;
+
+#endif
+
+const int8_t* const VP8ksclip1 = &sclip1[1020];
+const int8_t* const VP8ksclip2 = &sclip2[112];
+const uint8_t* const VP8kclip1 = &clip1[255];
+const uint8_t* const VP8kabs0 = &abs0[255];
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
+#if !defined(USE_STATIC_TABLES)
+    int i;
+    if (!tables_ok) {
+        for (i = -255; i <= 255; ++i) {
+            abs0[255 + i] = (i < 0) ? -i : i;
+        }
+        for (i = -1020; i <= 1020; ++i) {
+            sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
+        }
+        for (i = -112; i <= 112; ++i) {
+            sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
+        }
+        for (i = -255; i <= 255 + 255; ++i) {
+            clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+        }
+        tables_ok = 1;
+    }
+#endif // USE_STATIC_TABLES
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/dec_mips32.c b/codec/L2/demos/webpEnc/host/src/dsp/dec_mips32.c
new file mode 100644
index 0000000000..1c719b6500
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/dec_mips32.c
@@ -0,0 +1,575 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of dsp functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "./mips_macro.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+static WEBP_INLINE int abs_mips32(int x) {
+    const int sign = x >> 31;
+    return (x ^ sign) - sign;
+}
+
+// 4 pixels in, 2 pixels out
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
+    const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+    const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
+    const int a1 = VP8ksclip2[(a + 4) >> 3];
+    const int a2 = VP8ksclip2[(a + 3) >> 3];
+    p[-step] = VP8kclip1[p0 + a2];
+    p[0] = VP8kclip1[q0 - a1];
+}
+
+// 4 pixels in, 4 pixels out
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
+    const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+    const int a = 3 * (q0 - p0);
+    const int a1 = VP8ksclip2[(a + 4) >> 3];
+    const int a2 = VP8ksclip2[(a + 3) >> 3];
+    const int a3 = (a1 + 1) >> 1;
+    p[-2 * step] = VP8kclip1[p1 + a3];
+    p[-step] = VP8kclip1[p0 + a2];
+    p[0] = VP8kclip1[q0 - a1];
+    p[step] = VP8kclip1[q1 - a3];
+}
+
+// 6 pixels in, 6 pixels out
+static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
+    const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
+    const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
+    const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
+    // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
+    const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
+    const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
+    const int a3 = (9 * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+    p[-3 * step] = VP8kclip1[p2 + a3];
+    p[-2 * step] = VP8kclip1[p1 + a2];
+    p[-step] = VP8kclip1[p0 + a1];
+    p[0] = VP8kclip1[q0 - a1];
+    p[step] = VP8kclip1[q1 - a2];
+    p[2 * step] = VP8kclip1[q2 - a3];
+}
+
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
+    const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+    return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);
+}
+
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
+    const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+    return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
+}
+
+static WEBP_INLINE int needs_filter2(const uint8_t* p, int step, int t, int it) {
+    const int p3 = p[-4 * step], p2 = p[-3 * step];
+    const int p1 = p[-2 * step], p0 = p[-step];
+    const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
+    if ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) > t) {
+        return 0;
+    }
+    return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it && abs_mips32(p1 - p0) <= it &&
+           abs_mips32(q3 - q2) <= it && abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
+}
+
+static WEBP_INLINE void FilterLoop26(
+    uint8_t* p, int hstride, int vstride, int size, int thresh, int ithresh, int hev_thresh) {
+    const int thresh2 = 2 * thresh + 1;
+    while (size-- > 0) {
+        if (needs_filter2(p, hstride, thresh2, ithresh)) {
+            if (hev(p, hstride, hev_thresh)) {
+                do_filter2(p, hstride);
+            } else {
+                do_filter6(p, hstride);
+            }
+        }
+        p += vstride;
+    }
+}
+
+static WEBP_INLINE void FilterLoop24(
+    uint8_t* p, int hstride, int vstride, int size, int thresh, int ithresh, int hev_thresh) {
+    const int thresh2 = 2 * thresh + 1;
+    while (size-- > 0) {
+        if (needs_filter2(p, hstride, thresh2, ithresh)) {
+            if (hev(p, hstride, hev_thresh)) {
+                do_filter2(p, hstride);
+            } else {
+                do_filter4(p, hstride);
+            }
+        }
+        p += vstride;
+    }
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+    FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+    FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+    FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+    FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4 * stride;
+        FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+    }
+}
+
+static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4;
+        FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+    int i;
+    const int thresh2 = 2 * thresh + 1;
+    for (i = 0; i < 16; ++i) {
+        if (needs_filter(p + i, stride, thresh2)) {
+            do_filter2(p + i, stride);
+        }
+    }
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+    int i;
+    const int thresh2 = 2 * thresh + 1;
+    for (i = 0; i < 16; ++i) {
+        if (needs_filter(p + i * stride, 1, thresh2)) {
+            do_filter2(p + i * stride, 1);
+        }
+    }
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4 * stride;
+        SimpleVFilter16(p, stride, thresh);
+    }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4;
+        SimpleHFilter16(p, stride, thresh);
+    }
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+    int temp0, temp1, temp2, temp3, temp4;
+    int temp5, temp6, temp7, temp8, temp9;
+    int temp10, temp11, temp12, temp13, temp14;
+    int temp15, temp16, temp17, temp18;
+    int16_t* p_in = (int16_t*)in;
+
+    // loops unrolled and merged to avoid usage of tmp buffer
+    // and to reduce number of stalls. MUL macro is written
+    // in assembler and inlined
+    __asm__ volatile(
+    "lh       %[temp0],  0(%[in])                      \n\t"
+    "lh       %[temp8],  16(%[in])                     \n\t"
+    "lh       %[temp4],  8(%[in])                      \n\t"
+    "lh       %[temp12], 24(%[in])                     \n\t"
+    "addu     %[temp16], %[temp0],  %[temp8]           \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp8]           \n\t"
+    "mul      %[temp8],  %[temp4],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp12], %[kC1]             \n\t"
+    "mul      %[temp4],  %[temp4],  %[kC1]             \n\t"
+    "mul      %[temp12], %[temp12], %[kC2]             \n\t"
+    "lh       %[temp1],  2(%[in])                      \n\t"
+    "lh       %[temp5],  10(%[in])                     \n\t"
+    "lh       %[temp9],  18(%[in])                     \n\t"
+    "lh       %[temp13], 26(%[in])                     \n\t"
+    "sra      %[temp8],  %[temp8],  16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp4],  %[temp4],  16                 \n\t"
+    "sra      %[temp12], %[temp12], 16                 \n\t"
+    "lh       %[temp2],  4(%[in])                      \n\t"
+    "lh       %[temp6],  12(%[in])                     \n\t"
+    "lh       %[temp10], 20(%[in])                     \n\t"
+    "lh       %[temp14], 28(%[in])                     \n\t"
+    "subu     %[temp17], %[temp8],  %[temp17]          \n\t"
+    "addu     %[temp4],  %[temp4],  %[temp12]          \n\t"
+    "addu     %[temp8],  %[temp16], %[temp4]           \n\t"
+    "subu     %[temp4],  %[temp16], %[temp4]           \n\t"
+    "addu     %[temp16], %[temp1],  %[temp9]           \n\t"
+    "subu     %[temp1],  %[temp1],  %[temp9]           \n\t"
+    "lh       %[temp3],  6(%[in])                      \n\t"
+    "lh       %[temp7],  14(%[in])                     \n\t"
+    "lh       %[temp11], 22(%[in])                     \n\t"
+    "lh       %[temp15], 30(%[in])                     \n\t"
+    "addu     %[temp12], %[temp0],  %[temp17]          \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp17]          \n\t"
+    "mul      %[temp9],  %[temp5],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp13], %[kC1]             \n\t"
+    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    "mul      %[temp13], %[temp13], %[kC2]             \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "subu     %[temp17], %[temp9],  %[temp17]          \n\t"
+    "sra      %[temp5],  %[temp5],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
+    "addu     %[temp13], %[temp1],  %[temp17]          \n\t"
+    "subu     %[temp1],  %[temp1],  %[temp17]          \n\t"
+    "mul      %[temp17], %[temp14], %[kC1]             \n\t"
+    "mul      %[temp14], %[temp14], %[kC2]             \n\t"
+    "addu     %[temp9],  %[temp16], %[temp5]           \n\t"
+    "subu     %[temp5],  %[temp16], %[temp5]           \n\t"
+    "addu     %[temp16], %[temp2],  %[temp10]          \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp10]          \n\t"
+    "mul      %[temp10], %[temp6],  %[kC2]             \n\t"
+    "mul      %[temp6],  %[temp6],  %[kC1]             \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp14], %[temp14], 16                 \n\t"
+    "sra      %[temp10], %[temp10], 16                 \n\t"
+    "sra      %[temp6],  %[temp6],  16                 \n\t"
+    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
+    "addu     %[temp6],  %[temp6],  %[temp14]          \n\t"
+    "addu     %[temp10], %[temp16], %[temp6]           \n\t"
+    "subu     %[temp6],  %[temp16], %[temp6]           \n\t"
+    "addu     %[temp14], %[temp2],  %[temp17]          \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp17]          \n\t"
+    "mul      %[temp17], %[temp15], %[kC1]             \n\t"
+    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
+    "addu     %[temp16], %[temp3],  %[temp11]          \n\t"
+    "subu     %[temp3],  %[temp3],  %[temp11]          \n\t"
+    "mul      %[temp11], %[temp7],  %[kC2]             \n\t"
+    "mul      %[temp7],  %[temp7],  %[kC1]             \n\t"
+    "addiu    %[temp8],  %[temp8],  4                  \n\t"
+    "addiu    %[temp12], %[temp12], 4                  \n\t"
+    "addiu    %[temp0],  %[temp0],  4                  \n\t"
+    "addiu    %[temp4],  %[temp4],  4                  \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp15], %[temp15], 16                 \n\t"
+    "sra      %[temp11], %[temp11], 16                 \n\t"
+    "sra      %[temp7],  %[temp7],  16                 \n\t"
+    "subu     %[temp17], %[temp11], %[temp17]          \n\t"
+    "addu     %[temp7],  %[temp7],  %[temp15]          \n\t"
+    "addu     %[temp15], %[temp3],  %[temp17]          \n\t"
+    "subu     %[temp3],  %[temp3],  %[temp17]          \n\t"
+    "addu     %[temp11], %[temp16], %[temp7]           \n\t"
+    "subu     %[temp7],  %[temp16], %[temp7]           \n\t"
+    "addu     %[temp16], %[temp8],  %[temp10]          \n\t"
+    "subu     %[temp8],  %[temp8],  %[temp10]          \n\t"
+    "mul      %[temp10], %[temp9],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp11], %[kC1]             \n\t"
+    "mul      %[temp9],  %[temp9],  %[kC1]             \n\t"
+    "mul      %[temp11], %[temp11], %[kC2]             \n\t"
+    "sra      %[temp10], %[temp10], 16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp11], %[temp11], 16                 \n\t"
+    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
+    "addu     %[temp11], %[temp9],  %[temp11]          \n\t"
+    "addu     %[temp10], %[temp12], %[temp14]          \n\t"
+    "subu     %[temp12], %[temp12], %[temp14]          \n\t"
+    "mul      %[temp14], %[temp13], %[kC2]             \n\t"
+    "mul      %[temp9],  %[temp15], %[kC1]             \n\t"
+    "mul      %[temp13], %[temp13], %[kC1]             \n\t"
+    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
+    "sra      %[temp14], %[temp14], 16                 \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "sra      %[temp15], %[temp15], 16                 \n\t"
+    "subu     %[temp9],  %[temp14], %[temp9]           \n\t"
+    "addu     %[temp15], %[temp13], %[temp15]          \n\t"
+    "addu     %[temp14], %[temp0],  %[temp2]           \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp2]           \n\t"
+    "mul      %[temp2],  %[temp1],  %[kC2]             \n\t"
+    "mul      %[temp13], %[temp3],  %[kC1]             \n\t"
+    "mul      %[temp1],  %[temp1],  %[kC1]             \n\t"
+    "mul      %[temp3],  %[temp3],  %[kC2]             \n\t"
+    "sra      %[temp2],  %[temp2],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "sra      %[temp1],  %[temp1],  16                 \n\t"
+    "sra      %[temp3],  %[temp3],  16                 \n\t"
+    "subu     %[temp13], %[temp2],  %[temp13]          \n\t"
+    "addu     %[temp3],  %[temp1],  %[temp3]           \n\t"
+    "addu     %[temp2],  %[temp4],  %[temp6]           \n\t"
+    "subu     %[temp4],  %[temp4],  %[temp6]           \n\t"
+    "mul      %[temp6],  %[temp5],  %[kC2]             \n\t"
+    "mul      %[temp1],  %[temp7],  %[kC1]             \n\t"
+    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    "mul      %[temp7],  %[temp7],  %[kC2]             \n\t"
+    "sra      %[temp6],  %[temp6],  16                 \n\t"
+    "sra      %[temp1],  %[temp1],  16                 \n\t"
+    "sra      %[temp5],  %[temp5],  16                 \n\t"
+    "sra      %[temp7],  %[temp7],  16                 \n\t"
+    "subu     %[temp1],  %[temp6],  %[temp1]           \n\t"
+    "addu     %[temp7],  %[temp5],  %[temp7]           \n\t"
+    "addu     %[temp5],  %[temp16], %[temp11]          \n\t"
+    "subu     %[temp16], %[temp16], %[temp11]          \n\t"
+    "addu     %[temp11], %[temp8],  %[temp17]          \n\t"
+    "subu     %[temp8],  %[temp8],  %[temp17]          \n\t"
+    "sra      %[temp5],  %[temp5],  3                  \n\t"
+    "sra      %[temp16], %[temp16], 3                  \n\t"
+    "sra      %[temp11], %[temp11], 3                  \n\t"
+    "sra      %[temp8],  %[temp8],  3                  \n\t"
+    "addu     %[temp17], %[temp10], %[temp15]          \n\t"
+    "subu     %[temp10], %[temp10], %[temp15]          \n\t"
+    "addu     %[temp15], %[temp12], %[temp9]           \n\t"
+    "subu     %[temp12], %[temp12], %[temp9]           \n\t"
+    "sra      %[temp17], %[temp17], 3                  \n\t"
+    "sra      %[temp10], %[temp10], 3                  \n\t"
+    "sra      %[temp15], %[temp15], 3                  \n\t"
+    "sra      %[temp12], %[temp12], 3                  \n\t"
+    "addu     %[temp9],  %[temp14], %[temp3]           \n\t"
+    "subu     %[temp14], %[temp14], %[temp3]           \n\t"
+    "addu     %[temp3],  %[temp0],  %[temp13]          \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp13]          \n\t"
+    "sra      %[temp9],  %[temp9],  3                  \n\t"
+    "sra      %[temp14], %[temp14], 3                  \n\t"
+    "sra      %[temp3],  %[temp3],  3                  \n\t"
+    "sra      %[temp0],  %[temp0],  3                  \n\t"
+    "addu     %[temp13], %[temp2],  %[temp7]           \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp7]           \n\t"
+    "addu     %[temp7],  %[temp4],  %[temp1]           \n\t"
+    "subu     %[temp4],  %[temp4],  %[temp1]           \n\t"
+    "sra      %[temp13], %[temp13], 3                  \n\t"
+    "sra      %[temp2],  %[temp2],  3                  \n\t"
+    "sra      %[temp7],  %[temp7],  3                  \n\t"
+    "sra      %[temp4],  %[temp4],  3                  \n\t"
+    "addiu    %[temp6],  $zero,     255                \n\t"
+    "lbu      %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp1],  %[temp1],  %[temp5]           \n\t"
+    "sra      %[temp5],  %[temp1],  8                  \n\t"
+    "sra      %[temp18], %[temp1],  31                 \n\t"
+    "beqz     %[temp5],  1f                            \n\t"
+    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
+    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
+  "1:                                                  \n\t"
+    "lbu      %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp18], %[temp18], %[temp11]          \n\t"
+    "sra      %[temp11], %[temp18], 8                  \n\t"
+    "sra      %[temp1],  %[temp18], 31                 \n\t"
+    "beqz     %[temp11], 2f                            \n\t"
+    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
+    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
+  "2:                                                  \n\t"
+    "lbu      %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp1],  %[temp1],  %[temp8]           \n\t"
+    "sra      %[temp8],  %[temp1],  8                  \n\t"
+    "sra      %[temp18], %[temp1],  31                 \n\t"
+    "beqz     %[temp8],  3f                            \n\t"
+    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
+    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
+  "3:                                                  \n\t"
+    "lbu      %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp18], %[temp18], %[temp16]          \n\t"
+    "sra      %[temp16], %[temp18], 8                  \n\t"
+    "sra      %[temp1],  %[temp18], 31                 \n\t"
+    "beqz     %[temp16], 4f                            \n\t"
+    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
+    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
+  "4:                                                  \n\t"
+    "sb       %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp17]          \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp15]          \n\t"
+    "addu     %[temp11], %[temp11], %[temp12]          \n\t"
+    "addu     %[temp16], %[temp16], %[temp10]          \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "beqz     %[temp18], 5f                            \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "5:                                                  \n\t"
+    "sra      %[temp18], %[temp8],  8                  \n\t"
+    "sra      %[temp1],  %[temp8],  31                 \n\t"
+    "beqz     %[temp18], 6f                            \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp1]           \n\t"
+  "6:                                                  \n\t"
+    "sra      %[temp18], %[temp11], 8                  \n\t"
+    "sra      %[temp1],  %[temp11], 31                 \n\t"
+    "sra      %[temp17], %[temp16], 8                  \n\t"
+    "sra      %[temp15], %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 7f                            \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp1]           \n\t"
+  "7:                                                  \n\t"
+    "beqz     %[temp17], 8f                            \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp15]          \n\t"
+  "8:                                                  \n\t"
+    "sb       %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp9]           \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp3]           \n\t"
+    "addu     %[temp11], %[temp11], %[temp0]           \n\t"
+    "addu     %[temp16], %[temp16], %[temp14]          \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "sra      %[temp17], %[temp8],  8                  \n\t"
+    "sra      %[temp15], %[temp8],  31                 \n\t"
+    "sra      %[temp12], %[temp11], 8                  \n\t"
+    "sra      %[temp10], %[temp11], 31                 \n\t"
+    "sra      %[temp9],  %[temp16], 8                  \n\t"
+    "sra      %[temp3],  %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 9f                            \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "9:                                                  \n\t"
+    "beqz     %[temp17], 10f                           \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
+  "10:                                                 \n\t"
+    "beqz     %[temp12], 11f                           \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
+  "11:                                                 \n\t"
+    "beqz     %[temp9],  12f                           \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
+  "12:                                                 \n\t"
+    "sb       %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp7]           \n\t"
+    "addu     %[temp11], %[temp11], %[temp4]           \n\t"
+    "addu     %[temp16], %[temp16], %[temp2]           \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "sra      %[temp17], %[temp8],  8                  \n\t"
+    "sra      %[temp15], %[temp8],  31                 \n\t"
+    "sra      %[temp12], %[temp11], 8                  \n\t"
+    "sra      %[temp10], %[temp11], 31                 \n\t"
+    "sra      %[temp9],  %[temp16], 8                  \n\t"
+    "sra      %[temp3],  %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 13f                           \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "13:                                                 \n\t"
+    "beqz     %[temp17], 14f                           \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
+  "14:                                                 \n\t"
+    "beqz     %[temp12], 15f                           \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
+  "15:                                                 \n\t"
+    "beqz     %[temp9],  16f                           \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
+  "16:                                                 \n\t"
+    "sb       %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
+      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
+      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
+      [temp18]"=&r"(temp18)
+    : [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+    TransformOne(in, dst);
+    if (do_two) {
+        TransformOne(in + 16, dst + 4);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPS32(void) {
+    VP8InitClipTables();
+
+    VP8Transform = TransformTwo;
+
+    VP8VFilter16 = VFilter16;
+    VP8HFilter16 = HFilter16;
+    VP8VFilter8 = VFilter8;
+    VP8HFilter8 = HFilter8;
+    VP8VFilter16i = VFilter16i;
+    VP8HFilter16i = HFilter16i;
+    VP8VFilter8i = VFilter8i;
+    VP8HFilter8i = HFilter8i;
+
+    VP8SimpleVFilter16 = SimpleVFilter16;
+    VP8SimpleHFilter16 = SimpleHFilter16;
+    VP8SimpleVFilter16i = SimpleVFilter16i;
+    VP8SimpleHFilter16i = SimpleHFilter16i;
+}
+
+#else // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8DspInitMIPS32)
+
+#endif // WEBP_USE_MIPS32
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/dec_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/dec_mips_dsp_r2.c
new file mode 100644
index 0000000000..bcabd89a9e
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/dec_mips_dsp_r2.c
@@ -0,0 +1,981 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of dsp functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./mips_macro.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+#define MUL(a, b) (((a) * (b)) >> 16)
+
+static void TransformDC(const int16_t* in, uint8_t* dst) {
+    int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
+
+    __asm__ volatile (
+    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, dst,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    "lh               %[temp5],  0(%[in])               \n\t"
+    "addiu            %[temp5],  %[temp5],  4           \n\t"
+    "ins              %[temp5],  %[temp5],  16, 16      \n\t"
+    "shra.ph          %[temp5],  %[temp5],  3           \n\t"
+    CONVERT_2_BYTES_TO_HALF(temp6, temp7, temp8, temp9, temp10, temp1, temp2,
+                            temp3, temp1, temp2, temp3, temp4)
+    STORE_SAT_SUM_X2(temp6, temp7, temp8, temp9, temp10, temp1, temp2, temp3,
+                     temp5, temp5, temp5, temp5, temp5, temp5, temp5, temp5,
+                     dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_10()
+    : [in]"r"(in), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+    const int a = in[0] + 4;
+    int c4 = MUL(in[4], kC2);
+    const int d4 = MUL(in[4], kC1);
+    const int c1 = MUL(in[1], kC2);
+    const int d1 = MUL(in[1], kC1);
+    int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+    int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
+
+    __asm__ volatile(
+        "ins              %[c4],      %[d4],     16,       16    \n\t"
+        "replv.ph         %[temp1],   %[a]                       \n\t"
+        "replv.ph         %[temp4],   %[d1]                      \n\t" ADD_SUB_HALVES(
+            temp2, temp3, temp1,
+            c4) "replv.ph         %[temp5],   %[c1]                      \n\t" SHIFT_R_SUM_X2(temp1, temp6, temp7,
+                                                                                              temp8, temp2, temp9,
+                                                                                              temp10, temp4, temp2,
+                                                                                              temp2, temp3, temp3,
+                                                                                              temp4, temp5, temp4,
+                                                                                              temp5)
+            LOAD_WITH_OFFSET_X4(temp3, temp5, temp11, temp12, dst, 0, 0, 0, 0, 0, 1, 2, 3, BPS) CONVERT_2_BYTES_TO_HALF(
+                temp13, temp14, temp3, temp15, temp5, temp16, temp11, temp17, temp3, temp5, temp11, temp12)
+                PACK_2_HALVES_TO_WORD(temp12, temp18, temp7, temp6, temp1, temp8, temp2, temp4, temp7, temp6, temp10,
+                                      temp9)
+                    STORE_SAT_SUM_X2(temp13, temp14, temp3, temp15, temp5, temp16, temp11, temp17, temp12, temp18,
+                                     temp1, temp8, temp2, temp4, temp7, temp6, dst, 0, 1, 2, 3, BPS)
+
+                        OUTPUT_EARLY_CLOBBER_REGS_18(),
+        [c4] "+&r"(c4)
+        : [dst] "r"(dst), [a] "r"(a), [d1] "r"(d1), [d4] "r"(d4), [c1] "r"(c1)
+        : "memory");
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+    int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+    int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
+
+    __asm__ volatile (
+    "ulw              %[temp1],   0(%[in])                 \n\t"
+    "ulw              %[temp2],   16(%[in])                \n\t"
+    LOAD_IN_X2(temp5, temp6, 24, 26)
+    ADD_SUB_HALVES(temp3, temp4, temp1, temp2)
+    LOAD_IN_X2(temp1, temp2, 8, 10)
+    MUL_SHIFT_SUM(temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14,
+                  temp10, temp8, temp9, temp7, temp1, temp2, temp5, temp6,
+                  temp13, temp11, temp14, temp12)
+    INSERT_HALF_X2(temp8, temp7, temp10, temp9)
+    "ulw              %[temp17],  4(%[in])                 \n\t"
+    "ulw              %[temp18],  20(%[in])                \n\t"
+    ADD_SUB_HALVES(temp1, temp2, temp3, temp8)
+    ADD_SUB_HALVES(temp5, temp6, temp4, temp7)
+    ADD_SUB_HALVES(temp7, temp8, temp17, temp18)
+    LOAD_IN_X2(temp17, temp18, 12, 14)
+    LOAD_IN_X2(temp9, temp10, 28, 30)
+    MUL_SHIFT_SUM(temp11, temp12, temp13, temp14, temp15, temp16, temp4, temp17,
+                  temp12, temp14, temp11, temp13, temp17, temp18, temp9, temp10,
+                  temp15, temp4, temp16, temp17)
+    INSERT_HALF_X2(temp11, temp12, temp13, temp14)
+    ADD_SUB_HALVES(temp17, temp8, temp8, temp11)
+    ADD_SUB_HALVES(temp3, temp4, temp7, temp12)
+
+    // horizontal
+    SRA_16(temp9, temp10, temp11, temp12, temp1, temp2, temp5, temp6)
+    INSERT_HALF_X2(temp1, temp6, temp5, temp2)
+    SRA_16(temp13, temp14, temp15, temp16, temp3, temp4, temp17, temp8)
+    "repl.ph          %[temp2],   0x4                      \n\t"
+    INSERT_HALF_X2(temp3, temp8, temp17, temp4)
+    "addq.ph          %[temp1],   %[temp1],  %[temp2]      \n\t"
+    "addq.ph          %[temp6],   %[temp6],  %[temp2]      \n\t"
+    ADD_SUB_HALVES(temp2, temp4, temp1, temp3)
+    ADD_SUB_HALVES(temp5, temp7, temp6, temp8)
+    MUL_SHIFT_SUM(temp1, temp3, temp6, temp8, temp9, temp13, temp17, temp18,
+                  temp3, temp13, temp1, temp9, temp9, temp13, temp11, temp15,
+                  temp6, temp17, temp8, temp18)
+    MUL_SHIFT_SUM(temp6, temp8, temp18, temp17, temp11, temp15, temp12, temp16,
+                  temp8, temp15, temp6, temp11, temp12, temp16, temp10, temp14,
+                  temp18, temp12, temp17, temp16)
+    INSERT_HALF_X2(temp1, temp3, temp9, temp13)
+    INSERT_HALF_X2(temp6, temp8, temp11, temp15)
+    SHIFT_R_SUM_X2(temp9, temp10, temp11, temp12, temp13, temp14, temp15,
+                   temp16, temp2, temp4, temp5, temp7, temp3, temp1, temp8,
+                   temp6)
+    PACK_2_HALVES_TO_WORD(temp1, temp2, temp3, temp4, temp9, temp12, temp13,
+                          temp16, temp11, temp10, temp15, temp14)
+    LOAD_WITH_OFFSET_X4(temp10, temp11, temp14, temp15, dst,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    CONVERT_2_BYTES_TO_HALF(temp5, temp6, temp7, temp8, temp17, temp18, temp10,
+                            temp11, temp10, temp11, temp14, temp15)
+    STORE_SAT_SUM_X2(temp5, temp6, temp7, temp8, temp17, temp18, temp10, temp11,
+                     temp9, temp12, temp1, temp2, temp13, temp16, temp3, temp4,
+                     dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_18()
+    : [dst]"r"(dst), [in]"r"(in), [kC1]"r"(kC1), [kC2]"r"(kC2)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+    TransformOne(in, dst);
+    if (do_two) {
+        TransformOne(in + 16, dst + 4);
+    }
+}
+
+static WEBP_INLINE void FilterLoop26(
+    uint8_t* p, int hstride, int vstride, int size, int thresh, int ithresh, int hev_thresh) {
+    const int thresh2 = 2 * thresh + 1;
+    int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+    int temp10, temp11, temp12, temp13, temp14, temp15;
+
+    __asm__ volatile(
+        ".set      push                                      \n\t"
+        ".set      noreorder                                 \n\t"
+        "1:                                                    \n\t"
+        "negu      %[temp1],  %[hstride]                     \n\t"
+        "addiu     %[size],   %[size],        -1             \n\t"
+        "sll       %[temp2],  %[hstride],     1              \n\t"
+        "sll       %[temp3],  %[temp1],       1              \n\t"
+        "addu      %[temp4],  %[temp2],       %[hstride]     \n\t"
+        "addu      %[temp5],  %[temp3],       %[temp1]       \n\t"
+        "lbu       %[temp7],  0(%[p])                        \n\t"
+        "sll       %[temp6],  %[temp3],       1              \n\t"
+        "lbux      %[temp8],  %[temp5](%[p])                 \n\t"
+        "lbux      %[temp9],  %[temp3](%[p])                 \n\t"
+        "lbux      %[temp10], %[temp1](%[p])                 \n\t"
+        "lbux      %[temp11], %[temp6](%[p])                 \n\t"
+        "lbux      %[temp12], %[hstride](%[p])               \n\t"
+        "lbux      %[temp13], %[temp2](%[p])                 \n\t"
+        "lbux      %[temp14], %[temp4](%[p])                 \n\t"
+        "subu      %[temp1],  %[temp10],      %[temp7]       \n\t"
+        "subu      %[temp2],  %[temp9],       %[temp12]      \n\t"
+        "absq_s.w  %[temp3],  %[temp1]                       \n\t"
+        "absq_s.w  %[temp4],  %[temp2]                       \n\t"
+        "negu      %[temp1],  %[temp1]                       \n\t"
+        "sll       %[temp3],  %[temp3],       2              \n\t"
+        "addu      %[temp15], %[temp3],       %[temp4]       \n\t"
+        "subu      %[temp3],  %[temp15],      %[thresh2]     \n\t"
+        "sll       %[temp6],  %[temp1],       1              \n\t"
+        "bgtz      %[temp3],  3f                             \n\t"
+        " subu     %[temp4],  %[temp11],      %[temp8]       \n\t"
+        "absq_s.w  %[temp4],  %[temp4]                       \n\t"
+        "shll_s.w  %[temp2],  %[temp2],       24             \n\t"
+        "subu      %[temp4],  %[temp4],       %[ithresh]     \n\t"
+        "bgtz      %[temp4],  3f                             \n\t"
+        " subu     %[temp3],  %[temp8],       %[temp9]       \n\t"
+        "absq_s.w  %[temp3],  %[temp3]                       \n\t"
+        "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+        "bgtz      %[temp3],  3f                             \n\t"
+        " subu     %[temp5],  %[temp9],       %[temp10]      \n\t"
+        "absq_s.w  %[temp3],  %[temp5]                       \n\t"
+        "absq_s.w  %[temp5],  %[temp5]                       \n\t"
+        "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+        "bgtz      %[temp3],  3f                             \n\t"
+        " subu     %[temp3],  %[temp14],      %[temp13]      \n\t"
+        "absq_s.w  %[temp3],  %[temp3]                       \n\t"
+        "slt       %[temp5],  %[hev_thresh],  %[temp5]       \n\t"
+        "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+        "bgtz      %[temp3],  3f                             \n\t"
+        " subu     %[temp3],  %[temp13],      %[temp12]      \n\t"
+        "absq_s.w  %[temp3],  %[temp3]                       \n\t"
+        "sra       %[temp4],  %[temp2],       24             \n\t"
+        "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+        "bgtz      %[temp3],  3f                             \n\t"
+        " subu     %[temp15], %[temp12],      %[temp7]       \n\t"
+        "absq_s.w  %[temp3],  %[temp15]                      \n\t"
+        "absq_s.w  %[temp15], %[temp15]                      \n\t"
+        "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+        "bgtz      %[temp3],  3f                             \n\t"
+        " slt      %[temp15], %[hev_thresh],  %[temp15]      \n\t"
+        "addu      %[temp3],  %[temp6],       %[temp1]       \n\t"
+        "or        %[temp2],  %[temp5],       %[temp15]      \n\t"
+        "addu      %[temp5],  %[temp4],       %[temp3]       \n\t"
+        "beqz      %[temp2],  4f                             \n\t"
+        " shra_r.w %[temp1],  %[temp5],       3              \n\t"
+        "addiu     %[temp2],  %[temp5],       3              \n\t"
+        "sra       %[temp2],  %[temp2],       3              \n\t"
+        "shll_s.w  %[temp1],  %[temp1],       27             \n\t"
+        "shll_s.w  %[temp2],  %[temp2],       27             \n\t"
+        "subu      %[temp3],  %[p],           %[hstride]     \n\t"
+        "sra       %[temp1],  %[temp1],       27             \n\t"
+        "sra       %[temp2],  %[temp2],       27             \n\t"
+        "subu      %[temp1],  %[temp7],       %[temp1]       \n\t"
+        "addu      %[temp2],  %[temp10],      %[temp2]       \n\t"
+        "lbux      %[temp2],  %[temp2](%[VP8kclip1])         \n\t"
+        "lbux      %[temp1],  %[temp1](%[VP8kclip1])         \n\t"
+        "sb        %[temp2],  0(%[temp3])                    \n\t"
+        "j         3f                                        \n\t"
+        " sb       %[temp1],  0(%[p])                        \n\t"
+        "4:                                                    \n\t"
+        "shll_s.w  %[temp5],  %[temp5],       24             \n\t"
+        "subu      %[temp14], %[p],           %[hstride]     \n\t"
+        "subu      %[temp11], %[temp14],      %[hstride]     \n\t"
+        "sra       %[temp6],  %[temp5],       24             \n\t"
+        "sll       %[temp1],  %[temp6],       3              \n\t"
+        "subu      %[temp15], %[temp11],      %[hstride]     \n\t"
+        "addu      %[temp2],  %[temp6],       %[temp1]       \n\t"
+        "sll       %[temp3],  %[temp2],       1              \n\t"
+        "addu      %[temp4],  %[temp3],       %[temp2]       \n\t"
+        "addiu     %[temp2],  %[temp2],       63             \n\t"
+        "addiu     %[temp3],  %[temp3],       63             \n\t"
+        "addiu     %[temp4],  %[temp4],       63             \n\t"
+        "sra       %[temp2],  %[temp2],       7              \n\t"
+        "sra       %[temp3],  %[temp3],       7              \n\t"
+        "sra       %[temp4],  %[temp4],       7              \n\t"
+        "addu      %[temp1],  %[temp8],       %[temp2]       \n\t"
+        "addu      %[temp5],  %[temp9],       %[temp3]       \n\t"
+        "addu      %[temp6],  %[temp10],      %[temp4]       \n\t"
+        "subu      %[temp8],  %[temp7],       %[temp4]       \n\t"
+        "subu      %[temp7],  %[temp12],      %[temp3]       \n\t"
+        "addu      %[temp10], %[p],           %[hstride]     \n\t"
+        "subu      %[temp9],  %[temp13],      %[temp2]       \n\t"
+        "addu      %[temp12], %[temp10],      %[hstride]     \n\t"
+        "lbux      %[temp2],  %[temp1](%[VP8kclip1])         \n\t"
+        "lbux      %[temp3],  %[temp5](%[VP8kclip1])         \n\t"
+        "lbux      %[temp4],  %[temp6](%[VP8kclip1])         \n\t"
+        "lbux      %[temp5],  %[temp8](%[VP8kclip1])         \n\t"
+        "lbux      %[temp6],  %[temp7](%[VP8kclip1])         \n\t"
+        "lbux      %[temp8],  %[temp9](%[VP8kclip1])         \n\t"
+        "sb        %[temp2],  0(%[temp15])                   \n\t"
+        "sb        %[temp3],  0(%[temp11])                   \n\t"
+        "sb        %[temp4],  0(%[temp14])                   \n\t"
+        "sb        %[temp5],  0(%[p])                        \n\t"
+        "sb        %[temp6],  0(%[temp10])                   \n\t"
+        "sb        %[temp8],  0(%[temp12])                   \n\t"
+        "3:                                                    \n\t"
+        "bgtz      %[size],   1b                             \n\t"
+        " addu     %[p],      %[p],           %[vstride]     \n\t"
+        ".set      pop                                       \n\t"
+        : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+          [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8), [temp9] "=&r"(temp9),
+          [temp10] "=&r"(temp10), [temp11] "=&r"(temp11), [temp12] "=&r"(temp12), [temp13] "=&r"(temp13),
+          [temp14] "=&r"(temp14), [temp15] "=&r"(temp15), [size] "+&r"(size), [p] "+&r"(p)
+        : [hstride] "r"(hstride), [thresh2] "r"(thresh2), [ithresh] "r"(ithresh), [vstride] "r"(vstride),
+          [hev_thresh] "r"(hev_thresh), [VP8kclip1] "r"(VP8kclip1)
+        : "memory");
+}
+
+static WEBP_INLINE void FilterLoop24(
+    uint8_t* p, int hstride, int vstride, int size, int thresh, int ithresh, int hev_thresh) {
+    int p0, q0, p1, q1, p2, q2, p3, q3;
+    int step1, step2, temp1, temp2, temp3, temp4;
+    uint8_t* pTemp0;
+    uint8_t* pTemp1;
+    const int thresh2 = 2 * thresh + 1;
+
+    __asm__ volatile(
+        ".set      push                                   \n\t"
+        ".set      noreorder                              \n\t"
+        "bltz      %[size],    3f                         \n\t"
+        " nop                                             \n\t"
+        "2:                                                 \n\t"
+        "negu      %[step1],   %[hstride]                 \n\t"
+        "lbu       %[q0],      0(%[p])                    \n\t"
+        "lbux      %[p0],      %[step1](%[p])             \n\t"
+        "subu      %[step1],   %[step1],      %[hstride]  \n\t"
+        "lbux      %[q1],      %[hstride](%[p])           \n\t"
+        "subu      %[temp1],   %[p0],         %[q0]       \n\t"
+        "lbux      %[p1],      %[step1](%[p])             \n\t"
+        "addu      %[step2],   %[hstride],    %[hstride]  \n\t"
+        "absq_s.w  %[temp2],   %[temp1]                   \n\t"
+        "subu      %[temp3],   %[p1],         %[q1]       \n\t"
+        "absq_s.w  %[temp4],   %[temp3]                   \n\t"
+        "sll       %[temp2],   %[temp2],      2           \n\t"
+        "addu      %[temp2],   %[temp2],      %[temp4]    \n\t"
+        "subu      %[temp4],   %[temp2],      %[thresh2]  \n\t"
+        "subu      %[step1],   %[step1],      %[hstride]  \n\t"
+        "bgtz      %[temp4],   0f                         \n\t"
+        " lbux     %[p2],      %[step1](%[p])             \n\t"
+        "subu      %[step1],   %[step1],      %[hstride]  \n\t"
+        "lbux      %[q2],      %[step2](%[p])             \n\t"
+        "lbux      %[p3],      %[step1](%[p])             \n\t"
+        "subu      %[temp4],   %[p2],         %[p1]       \n\t"
+        "addu      %[step2],   %[step2],      %[hstride]  \n\t"
+        "subu      %[temp2],   %[p3],         %[p2]       \n\t"
+        "absq_s.w  %[temp4],   %[temp4]                   \n\t"
+        "absq_s.w  %[temp2],   %[temp2]                   \n\t"
+        "lbux      %[q3],      %[step2](%[p])             \n\t"
+        "subu      %[temp4],   %[temp4],      %[ithresh]  \n\t"
+        "negu      %[temp1],   %[temp1]                   \n\t"
+        "bgtz      %[temp4],   0f                         \n\t"
+        " subu     %[temp2],   %[temp2],      %[ithresh]  \n\t"
+        "subu      %[p3],      %[p1],         %[p0]       \n\t"
+        "bgtz      %[temp2],   0f                         \n\t"
+        " absq_s.w %[p3],      %[p3]                      \n\t"
+        "subu      %[temp4],   %[q3],         %[q2]       \n\t"
+        "subu      %[pTemp0],  %[p],          %[hstride]  \n\t"
+        "absq_s.w  %[temp4],   %[temp4]                   \n\t"
+        "subu      %[temp2],   %[p3],         %[ithresh]  \n\t"
+        "sll       %[step1],   %[temp1],      1           \n\t"
+        "bgtz      %[temp2],   0f                         \n\t"
+        " subu     %[temp4],   %[temp4],      %[ithresh]  \n\t"
+        "subu      %[temp2],   %[q2],         %[q1]       \n\t"
+        "bgtz      %[temp4],   0f                         \n\t"
+        " absq_s.w %[temp2],   %[temp2]                   \n\t"
+        "subu      %[q3],      %[q1],         %[q0]       \n\t"
+        "absq_s.w  %[q3],      %[q3]                      \n\t"
+        "subu      %[temp2],   %[temp2],      %[ithresh]  \n\t"
+        "addu      %[temp1],   %[temp1],      %[step1]    \n\t"
+        "bgtz      %[temp2],   0f                         \n\t"
+        " subu     %[temp4],   %[q3],         %[ithresh]  \n\t"
+        "slt       %[p3],      %[hev_thresh], %[p3]       \n\t"
+        "bgtz      %[temp4],   0f                         \n\t"
+        " slt      %[q3],      %[hev_thresh], %[q3]       \n\t"
+        "or        %[q3],      %[q3],         %[p3]       \n\t"
+        "bgtz      %[q3],      1f                         \n\t"
+        " shra_r.w %[temp2],   %[temp1],      3           \n\t"
+        "addiu     %[temp1],   %[temp1],      3           \n\t"
+        "sra       %[temp1],   %[temp1],      3           \n\t"
+        "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
+        "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
+        "addu      %[pTemp1],  %[p],          %[hstride]  \n\t"
+        "sra       %[temp2],   %[temp2],      27          \n\t"
+        "sra       %[temp1],   %[temp1],      27          \n\t"
+        "addiu     %[step1],   %[temp2],      1           \n\t"
+        "sra       %[step1],   %[step1],      1           \n\t"
+        "addu      %[p0],      %[p0],         %[temp1]    \n\t"
+        "addu      %[p1],      %[p1],         %[step1]    \n\t"
+        "subu      %[q0],      %[q0],         %[temp2]    \n\t"
+        "subu      %[q1],      %[q1],         %[step1]    \n\t"
+        "lbux      %[temp2],   %[p0](%[VP8kclip1])        \n\t"
+        "lbux      %[temp3],   %[q0](%[VP8kclip1])        \n\t"
+        "lbux      %[temp4],   %[q1](%[VP8kclip1])        \n\t"
+        "sb        %[temp2],   0(%[pTemp0])               \n\t"
+        "lbux      %[temp1],   %[p1](%[VP8kclip1])        \n\t"
+        "subu      %[pTemp0],  %[pTemp0],    %[hstride]   \n\t"
+        "sb        %[temp3],   0(%[p])                    \n\t"
+        "sb        %[temp4],   0(%[pTemp1])               \n\t"
+        "j         0f                                     \n\t"
+        " sb       %[temp1],   0(%[pTemp0])               \n\t"
+        "1:                                                 \n\t"
+        "shll_s.w  %[temp3],   %[temp3],      24          \n\t"
+        "sra       %[temp3],   %[temp3],      24          \n\t"
+        "addu      %[temp1],   %[temp1],      %[temp3]    \n\t"
+        "shra_r.w  %[temp2],   %[temp1],      3           \n\t"
+        "addiu     %[temp1],   %[temp1],      3           \n\t"
+        "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
+        "sra       %[temp1],   %[temp1],      3           \n\t"
+        "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
+        "sra       %[temp2],   %[temp2],      27          \n\t"
+        "sra       %[temp1],   %[temp1],      27          \n\t"
+        "addu      %[p0],      %[p0],         %[temp1]    \n\t"
+        "subu      %[q0],      %[q0],         %[temp2]    \n\t"
+        "lbux      %[temp1],   %[p0](%[VP8kclip1])        \n\t"
+        "lbux      %[temp2],   %[q0](%[VP8kclip1])        \n\t"
+        "sb        %[temp2],   0(%[p])                    \n\t"
+        "sb        %[temp1],   0(%[pTemp0])               \n\t"
+        "0:                                                 \n\t"
+        "subu      %[size],    %[size],       1           \n\t"
+        "bgtz      %[size],    2b                         \n\t"
+        " addu     %[p],       %[p],          %[vstride]  \n\t"
+        "3:                                                 \n\t"
+        ".set      pop                                    \n\t"
+        : [p0] "=&r"(p0), [q0] "=&r"(q0), [p1] "=&r"(p1), [q1] "=&r"(q1), [p2] "=&r"(p2), [q2] "=&r"(q2),
+          [p3] "=&r"(p3), [q3] "=&r"(q3), [step2] "=&r"(step2), [step1] "=&r"(step1), [temp1] "=&r"(temp1),
+          [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [pTemp0] "=&r"(pTemp0),
+          [pTemp1] "=&r"(pTemp1), [p] "+&r"(p), [size] "+&r"(size)
+        : [vstride] "r"(vstride), [ithresh] "r"(ithresh), [hev_thresh] "r"(hev_thresh), [hstride] "r"(hstride),
+          [VP8kclip1] "r"(VP8kclip1), [thresh2] "r"(thresh2)
+        : "memory");
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+    FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+    FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4 * stride;
+        FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+    }
+}
+
+static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4;
+        FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+    }
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+    FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+    FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+#undef MUL
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+    int i;
+    const int thresh2 = 2 * thresh + 1;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+    uint8_t* p1 = p - stride;
+    __asm__ volatile(
+        ".set      push                                      \n\t"
+        ".set      noreorder                                 \n\t"
+        "li        %[i],        16                           \n\t"
+        "0:                                                    \n\t"
+        "negu      %[temp4],    %[stride]                    \n\t"
+        "sll       %[temp5],    %[temp4],       1            \n\t"
+        "lbu       %[temp2],    0(%[p])                      \n\t"
+        "lbux      %[temp3],    %[stride](%[p])              \n\t"
+        "lbux      %[temp1],    %[temp4](%[p])               \n\t"
+        "lbux      %[temp0],    %[temp5](%[p])               \n\t"
+        "subu      %[temp7],    %[temp1],       %[temp2]     \n\t"
+        "subu      %[temp6],    %[temp0],       %[temp3]     \n\t"
+        "absq_s.w  %[temp4],    %[temp7]                     \n\t"
+        "absq_s.w  %[temp5],    %[temp6]                     \n\t"
+        "sll       %[temp4],    %[temp4],       2            \n\t"
+        "subu      %[temp5],    %[temp5],       %[thresh2]   \n\t"
+        "addu      %[temp5],    %[temp4],       %[temp5]     \n\t"
+        "negu      %[temp8],    %[temp7]                     \n\t"
+        "bgtz      %[temp5],    1f                           \n\t"
+        " addiu    %[i],        %[i],           -1           \n\t"
+        "sll       %[temp4],    %[temp8],       1            \n\t"
+        "shll_s.w  %[temp5],    %[temp6],       24           \n\t"
+        "addu      %[temp3],    %[temp4],       %[temp8]     \n\t"
+        "sra       %[temp5],    %[temp5],       24           \n\t"
+        "addu      %[temp3],    %[temp3],       %[temp5]     \n\t"
+        "addiu     %[temp7],    %[temp3],       3            \n\t"
+        "sra       %[temp7],    %[temp7],       3            \n\t"
+        "shra_r.w  %[temp8],    %[temp3],       3            \n\t"
+        "shll_s.w  %[temp0],    %[temp7],       27           \n\t"
+        "shll_s.w  %[temp4],    %[temp8],       27           \n\t"
+        "sra       %[temp0],    %[temp0],       27           \n\t"
+        "sra       %[temp4],    %[temp4],       27           \n\t"
+        "addu      %[temp7],    %[temp1],       %[temp0]     \n\t"
+        "subu      %[temp2],    %[temp2],       %[temp4]     \n\t"
+        "lbux      %[temp3],    %[temp7](%[VP8kclip1])       \n\t"
+        "lbux      %[temp4],    %[temp2](%[VP8kclip1])       \n\t"
+        "sb        %[temp3],    0(%[p1])                     \n\t"
+        "sb        %[temp4],    0(%[p])                      \n\t"
+        "1:                                                    \n\t"
+        "addiu     %[p1],       %[p1],          1            \n\t"
+        "bgtz      %[i],        0b                           \n\t"
+        " addiu    %[p],        %[p],           1            \n\t"
+        " .set     pop                                       \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8), [p] "+&r"(p),
+          [i] "=&r"(i), [p1] "+&r"(p1)
+        : [stride] "r"(stride), [VP8kclip1] "r"(VP8kclip1), [thresh2] "r"(thresh2)
+        : "memory");
+}
+
+// TEMP0 = SRC[A + A1 * BPS]
+// TEMP1 = SRC[B + B1 * BPS]
+// TEMP2 = SRC[C + C1 * BPS]
+// TEMP3 = SRC[D + D1 * BPS]
+#define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3, A, A1, B, B1, C, C1, D, D1, SRC)                                      \
+    "lbu      %[" #TEMP0 "],   " #A "+" #A1 "*" XSTR(BPS) "(%[" #SRC                                                   \
+                                                          "]) \n\t"                                                    \
+                                                          "lbu      %[" #TEMP1 "],   " #B "+" #B1                      \
+                                                          "*" XSTR(BPS) "(%[" #SRC                                     \
+                                                                        "]) \n\t"                                      \
+                                                                        "lbu      %[" #TEMP2 "],   " #C "+" #C1        \
+                                                                        "*" XSTR(BPS) "(%[" #SRC                       \
+                                                                                      "]) \n\t"                        \
+                                                                                      "lbu      %[" #TEMP3 "],   " #D  \
+                                                                                      "+" #D1 "*" XSTR(BPS) "(%[" #SRC \
+                                                                                                            "]) \n\t"
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+    int i;
+    const int thresh2 = 2 * thresh + 1;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+    __asm__ volatile (
+    ".set      push                                     \n\t"
+    ".set      noreorder                                \n\t"
+    "li        %[i],       16                           \n\t"
+  "0:                                                   \n\t"
+    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -2, 0, -1, 0, 0, 0, 1, 0, p)
+    "subu      %[temp7],    %[temp1],       %[temp2]    \n\t"
+    "subu      %[temp6],    %[temp0],       %[temp3]    \n\t"
+    "absq_s.w  %[temp4],    %[temp7]                    \n\t"
+    "absq_s.w  %[temp5],    %[temp6]                    \n\t"
+    "sll       %[temp4],    %[temp4],       2           \n\t"
+    "addu      %[temp5],    %[temp4],       %[temp5]    \n\t"
+    "subu      %[temp5],    %[temp5],       %[thresh2]  \n\t"
+    "negu      %[temp8],    %[temp7]                    \n\t"
+    "bgtz      %[temp5],    1f                          \n\t"
+    " addiu    %[i],        %[i],           -1          \n\t"
+    "sll       %[temp4],    %[temp8],       1           \n\t"
+    "shll_s.w  %[temp5],    %[temp6],       24          \n\t"
+    "addu      %[temp3],    %[temp4],       %[temp8]    \n\t"
+    "sra       %[temp5],    %[temp5],       24          \n\t"
+    "addu      %[temp3],    %[temp3],       %[temp5]    \n\t"
+    "addiu     %[temp7],    %[temp3],       3           \n\t"
+    "sra       %[temp7],    %[temp7],       3           \n\t"
+    "shra_r.w  %[temp8],    %[temp3],       3           \n\t"
+    "shll_s.w  %[temp0],    %[temp7],       27          \n\t"
+    "shll_s.w  %[temp4],    %[temp8],       27          \n\t"
+    "sra       %[temp0],    %[temp0],       27          \n\t"
+    "sra       %[temp4],    %[temp4],       27          \n\t"
+    "addu      %[temp7],    %[temp1],       %[temp0]    \n\t"
+    "subu      %[temp2],    %[temp2],       %[temp4]    \n\t"
+    "lbux      %[temp3],    %[temp7](%[VP8kclip1])      \n\t"
+    "lbux      %[temp4],    %[temp2](%[VP8kclip1])      \n\t"
+    "sb        %[temp3],    -1(%[p])                    \n\t"
+    "sb        %[temp4],    0(%[p])                     \n\t"
+  "1:                                                   \n\t"
+    "bgtz      %[i],        0b                          \n\t"
+    " addu     %[p],        %[p],           %[stride]   \n\t"
+    ".set      pop                                      \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [p]"+&r"(p), [i]"=&r"(i)
+    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
+    : "memory"
+  );
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4 * stride;
+        SimpleVFilter16(p, stride, thresh);
+    }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4;
+        SimpleHFilter16(p, stride, thresh);
+    }
+}
+
+// DST[A * BPS]     = TEMP0
+// DST[B + C * BPS] = TEMP1
+#define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST)                                                             \
+    "usw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #DST                                                    \
+                                                "])         \n\t"                                             \
+                                                "usw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #DST \
+                                                                                                   "])  \n\t"
+
+static void VE4(uint8_t* dst) { // vertical
+    const uint8_t* top = dst - BPS;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+    __asm__ volatile(
+        "ulw             %[temp0],   -1(%[top])              \n\t"
+        "ulh             %[temp1],   3(%[top])               \n\t"
+        "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
+        "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t"
+        "preceu.ph.qbr   %[temp4],   %[temp1]                \n\t"
+        "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t"
+        "packrl.ph       %[temp6],   %[temp4],    %[temp3]   \n\t"
+        "shll.ph         %[temp5],   %[temp5],    1          \n\t"
+        "shll.ph         %[temp6],   %[temp6],    1          \n\t"
+        "addq.ph         %[temp2],   %[temp5],    %[temp2]   \n\t"
+        "addq.ph         %[temp6],   %[temp6],    %[temp4]   \n\t"
+        "addq.ph         %[temp2],   %[temp2],    %[temp3]   \n\t"
+        "addq.ph         %[temp6],   %[temp6],    %[temp3]   \n\t"
+        "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
+        "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
+        "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t" STORE_8_BYTES(temp4, temp4, 0, 0, 1, dst)
+            STORE_8_BYTES(temp4, temp4, 2, 0, 3, dst)
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [temp6] "=&r"(temp6)
+        : [top] "r"(top), [dst] "r"(dst)
+        : "memory");
+}
+
+static void DC4(uint8_t* dst) { // DC
+    int temp0, temp1, temp2, temp3, temp4;
+    __asm__ volatile (
+    "ulw          %[temp0],   -1*" XSTR(BPS) "(%[dst]) \n\t"
+    LOAD_4_BYTES(temp1, temp2, temp3, temp4, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    "ins          %[temp1],   %[temp2],    8,     8    \n\t"
+    "ins          %[temp1],   %[temp3],    16,    8    \n\t"
+    "ins          %[temp1],   %[temp4],    24,    8    \n\t"
+    "raddu.w.qb   %[temp0],   %[temp0]                 \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                 \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]    \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    3           \n\t"
+    "replv.qb     %[temp0],   %[temp0]                 \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 0, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 0, 3, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void RD4(uint8_t* dst) { // Down-right
+    int temp0, temp1, temp2, temp3, temp4;
+    int temp5, temp6, temp7, temp8;
+    __asm__ volatile (
+    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    "ulw            %[temp7],   -1-" XSTR(BPS) "(%[dst])       \n\t"
+    "ins            %[temp1],   %[temp0], 16, 16               \n\t"
+    "preceu.ph.qbr  %[temp5],   %[temp7]                       \n\t"
+    "ins            %[temp2],   %[temp1], 16, 16               \n\t"
+    "preceu.ph.qbl  %[temp4],   %[temp7]                       \n\t"
+    "ins            %[temp3],   %[temp2], 16, 16               \n\t"
+    "shll.ph        %[temp2],   %[temp2], 1                    \n\t"
+    "addq.ph        %[temp3],   %[temp3], %[temp1]             \n\t"
+    "packrl.ph      %[temp6],   %[temp5], %[temp1]             \n\t"
+    "addq.ph        %[temp3],   %[temp3], %[temp2]             \n\t"
+    "addq.ph        %[temp1],   %[temp1], %[temp5]             \n\t"
+    "shll.ph        %[temp6],   %[temp6], 1                    \n\t"
+    "addq.ph        %[temp1],   %[temp1], %[temp6]             \n\t"
+    "packrl.ph      %[temp0],   %[temp4], %[temp5]             \n\t"
+    "addq.ph        %[temp8],   %[temp5], %[temp4]             \n\t"
+    "shra_r.ph      %[temp3],   %[temp3], 2                    \n\t"
+    "shll.ph        %[temp0],   %[temp0], 1                    \n\t"
+    "shra_r.ph      %[temp1],   %[temp1], 2                    \n\t"
+    "addq.ph        %[temp8],   %[temp0], %[temp8]             \n\t"
+    "lbu            %[temp5],   3-" XSTR(BPS) "(%[dst])        \n\t"
+    "precrq.ph.w    %[temp7],   %[temp7], %[temp7]             \n\t"
+    "shra_r.ph      %[temp8],   %[temp8], 2                    \n\t"
+    "ins            %[temp7],   %[temp5], 0,  8                \n\t"
+    "precr.qb.ph    %[temp2],   %[temp1], %[temp3]             \n\t"
+    "raddu.w.qb     %[temp4],   %[temp7]                       \n\t"
+    "precr.qb.ph    %[temp6],   %[temp8], %[temp1]             \n\t"
+    "shra_r.w       %[temp4],   %[temp4], 2                    \n\t"
+    STORE_8_BYTES(temp2, temp6, 3, 0, 1, dst)
+    "prepend        %[temp2],   %[temp8], 8                    \n\t"
+    "prepend        %[temp6],   %[temp4], 8                    \n\t"
+    STORE_8_BYTES(temp2, temp6, 2, 0, 0, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+// TEMP0 = SRC[A * BPS]
+// TEMP1 = SRC[B + C * BPS]
+#define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC)                                                              \
+    "ulw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #SRC                                                    \
+                                                "])         \n\t"                                             \
+                                                "ulw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #SRC \
+                                                                                                   "])  \n\t"
+
+static void LD4(uint8_t* dst) { // Down-Left
+    int temp0, temp1, temp2, temp3, temp4;
+    int temp5, temp6, temp7, temp8, temp9;
+    __asm__ volatile (
+    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
+    "preceu.ph.qbl   %[temp2],    %[temp0]                     \n\t"
+    "preceu.ph.qbr   %[temp3],    %[temp0]                     \n\t"
+    "preceu.ph.qbr   %[temp4],    %[temp1]                     \n\t"
+    "preceu.ph.qbl   %[temp5],    %[temp1]                     \n\t"
+    "packrl.ph       %[temp6],    %[temp2],    %[temp3]        \n\t"
+    "packrl.ph       %[temp7],    %[temp4],    %[temp2]        \n\t"
+    "packrl.ph       %[temp8],    %[temp5],    %[temp4]        \n\t"
+    "shll.ph         %[temp6],    %[temp6],    1               \n\t"
+    "addq.ph         %[temp9],    %[temp2],    %[temp6]        \n\t"
+    "shll.ph         %[temp7],    %[temp7],    1               \n\t"
+    "addq.ph         %[temp9],    %[temp9],    %[temp3]        \n\t"
+    "shll.ph         %[temp8],    %[temp8],    1               \n\t"
+    "shra_r.ph       %[temp9],    %[temp9],    2               \n\t"
+    "addq.ph         %[temp3],    %[temp4],    %[temp7]        \n\t"
+    "addq.ph         %[temp0],    %[temp5],    %[temp8]        \n\t"
+    "addq.ph         %[temp3],    %[temp3],    %[temp2]        \n\t"
+    "addq.ph         %[temp0],    %[temp0],    %[temp4]        \n\t"
+    "shra_r.ph       %[temp3],    %[temp3],    2               \n\t"
+    "shra_r.ph       %[temp0],    %[temp0],    2               \n\t"
+    "srl             %[temp1],    %[temp1],    24              \n\t"
+    "sll             %[temp1],    %[temp1],    1               \n\t"
+    "raddu.w.qb      %[temp5],    %[temp5]                     \n\t"
+    "precr.qb.ph     %[temp9],    %[temp3],    %[temp9]        \n\t"
+    "precr.qb.ph     %[temp3],    %[temp0],    %[temp3]        \n\t"
+    "addu            %[temp1],    %[temp1],    %[temp5]        \n\t"
+    "shra_r.w        %[temp1],    %[temp1],    2               \n\t"
+    STORE_8_BYTES(temp9, temp3, 0, 0, 2, dst)
+    "prepend         %[temp9],    %[temp0],    8               \n\t"
+    "prepend         %[temp3],    %[temp1],    8               \n\t"
+    STORE_8_BYTES(temp9, temp3, 1, 0, 3, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void DC8uv(uint8_t* dst) { // DC
+    int temp0, temp1, temp2, temp3, temp4;
+    int temp5, temp6, temp7, temp8, temp9;
+    __asm__ volatile (
+    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
+    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    LOAD_4_BYTES(temp6, temp7, temp8, temp9, -1, 4, -1, 5, -1, 6, -1, 7, dst)
+    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
+    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
+    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
+    "addu         %[temp8],   %[temp8],    %[temp9]      \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
+    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp2]      \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp6]      \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    4             \n\t"
+    "replv.qb     %[temp0],   %[temp0]                   \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
+    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
+    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
+    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
+    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
+    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
+    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
+    int temp0, temp1;
+    __asm__ volatile (
+    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
+    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
+    "replv.qb     %[temp0],   %[temp0]                   \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
+    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
+    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
+    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
+    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
+    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
+    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
+    int temp0, temp1, temp2, temp3, temp4;
+    int temp5, temp6, temp7, temp8;
+    __asm__ volatile (
+    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    LOAD_4_BYTES(temp6, temp7, temp8, temp1, -1, 4, -1, 5, -1, 6, -1, 7, dst)
+    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
+    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
+    "addu         %[temp8],   %[temp8],    %[temp1]      \n\t"
+    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
+    "addu         %[temp0],   %[temp6],    %[temp2]      \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
+    "replv.qb     %[temp0],   %[temp0]                   \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
+    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
+    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
+    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
+    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
+    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
+    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+#undef LOAD_8_BYTES
+#undef STORE_8_BYTES
+#undef LOAD_4_BYTES
+
+#define CLIPPING(SIZE)                                             \
+    "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t"   \
+    "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t"   \
+    ".if " #SIZE                                                   \
+    " == 8                                      \n\t"              \
+    "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t"   \
+    "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t"   \
+    ".endif                                                  \n\t" \
+    "addu.ph         %[temp2],   %[temp2],   %[dst_1]      \n\t"   \
+    "addu.ph         %[temp0],   %[temp0],   %[dst_1]      \n\t"   \
+    ".if " #SIZE                                                   \
+    " == 8                                      \n\t"              \
+    "addu.ph         %[temp3],   %[temp3],   %[dst_1]      \n\t"   \
+    "addu.ph         %[temp1],   %[temp1],   %[dst_1]      \n\t"   \
+    ".endif                                                  \n\t" \
+    "shll_s.ph       %[temp2],   %[temp2],   7             \n\t"   \
+    "shll_s.ph       %[temp0],   %[temp0],   7             \n\t"   \
+    ".if " #SIZE                                                   \
+    " == 8                                      \n\t"              \
+    "shll_s.ph       %[temp3],   %[temp3],   7             \n\t"   \
+    "shll_s.ph       %[temp1],   %[temp1],   7             \n\t"   \
+    ".endif                                                  \n\t" \
+    "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t"   \
+    ".if " #SIZE                                                   \
+    " == 8                                      \n\t"              \
+    "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"   \
+    ".endif                                                  \n\t"
+
+#define CLIP_8B_TO_DST(DST, TOP, SIZE)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              \
+    do {                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            \
+        int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1];                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
+        int temp0, temp1, temp2, temp3;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
+        __asm__ volatile (                                                           \
+  ".if " #SIZE " < 8                                     \n\t"                 \
+    "ulw             %[temp0],   0(%[top])               \n\t"                 \
+    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
+    CLIPPING(4)                                                                \
+    "usw             %[temp0],   0(%[dst])               \n\t"                 \
+  ".else                                                 \n\t"                 \
+    "ulw             %[temp0],   0(%[top])               \n\t"                 \
+    "ulw             %[temp1],   4(%[top])               \n\t"                 \
+    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
+    CLIPPING(8)                                                                \
+    "usw             %[temp0],   0(%[dst])               \n\t"                 \
+    "usw             %[temp1],   4(%[dst])               \n\t"                 \
+  ".if " #SIZE " == 16                                   \n\t"                 \
+    "ulw             %[temp0],   8(%[top])               \n\t"                 \
+    "ulw             %[temp1],   12(%[top])              \n\t"                 \
+    CLIPPING(8)                                                                \
+    "usw             %[temp0],   8(%[dst])               \n\t"                 \
+    "usw             %[temp1],   12(%[dst])              \n\t"                 \
+  ".endif                                                \n\t"                 \
+  ".endif                                                \n\t"                 \
+    : [dst_1]"+&r"(dst_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),           \
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)                                 \
+    : [top_1]"r"(top_1), [top]"r"((TOP)), [dst]"r"((DST))                      \
+    : "memory"                                                                 \
+  ); \
+    } while (0)
+
+#define CLIP_TO_DST(DST, SIZE)                            \
+    do {                                                  \
+        int y;                                            \
+        const uint8_t* top = (DST)-BPS;                   \
+        const int top_1 = ((int)top[-1] << 16) + top[-1]; \
+        for (y = 0; y < (SIZE); ++y) {                    \
+            CLIP_8B_TO_DST((DST), top, (SIZE));           \
+            (DST) += BPS;                                 \
+        }                                                 \
+    } while (0)
+
+#define TRUE_MOTION(DST, SIZE) \
+    static void TrueMotion##SIZE(uint8_t*(DST)) { CLIP_TO_DST((DST), (SIZE)); }
+
+TRUE_MOTION(dst, 4)
+TRUE_MOTION(dst, 8)
+TRUE_MOTION(dst, 16)
+
+#undef TRUE_MOTION
+#undef CLIP_TO_DST
+#undef CLIP_8B_TO_DST
+#undef CLIPPING
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPSdspR2(void) {
+    VP8TransformDC = TransformDC;
+    VP8TransformAC3 = TransformAC3;
+    VP8Transform = TransformTwo;
+
+    VP8VFilter16 = VFilter16;
+    VP8HFilter16 = HFilter16;
+    VP8VFilter8 = VFilter8;
+    VP8HFilter8 = HFilter8;
+    VP8VFilter16i = VFilter16i;
+    VP8HFilter16i = HFilter16i;
+    VP8VFilter8i = VFilter8i;
+    VP8HFilter8i = HFilter8i;
+    VP8SimpleVFilter16 = SimpleVFilter16;
+    VP8SimpleHFilter16 = SimpleHFilter16;
+    VP8SimpleVFilter16i = SimpleVFilter16i;
+    VP8SimpleHFilter16i = SimpleHFilter16i;
+
+    VP8PredLuma4[0] = DC4;
+    VP8PredLuma4[1] = TrueMotion4;
+    VP8PredLuma4[2] = VE4;
+    VP8PredLuma4[4] = RD4;
+    VP8PredLuma4[6] = LD4;
+
+    VP8PredChroma8[0] = DC8uv;
+    VP8PredChroma8[1] = TrueMotion8;
+    VP8PredChroma8[4] = DC8uvNoTop;
+    VP8PredChroma8[5] = DC8uvNoLeft;
+
+    VP8PredLuma16[1] = TrueMotion16;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8DspInitMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/dec_neon.c b/codec/L2/demos/webpEnc/host/src/dsp/dec_neon.c
new file mode 100644
index 0000000000..4672975a7f
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/dec_neon.c
@@ -0,0 +1,1697 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// ARM NEON version of dsp functions and loop filtering.
+//
+// Authors: Somnath Banerjee (somnath@google.com)
+//          Johann Koenig (johannkoenig@google.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include "./neon.h"
+#include "../dec/vp8i.h"
+
+//------------------------------------------------------------------------------
+// NxM Loading functions
+
+// Load/Store vertical edge
+#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride)                             \
+    "vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride \
+    "\n"                                                                    \
+    "vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride \
+    "\n"                                                                    \
+    "vld4.8 {" #c1 "[2]," #c2 "[2]," #c3 "[2]," #c4 "[2]}," #b1 "," #stride \
+    "\n"                                                                    \
+    "vld4.8 {" #c1 "[3]," #c2 "[3]," #c3 "[3]," #c4 "[3]}," #b2 "," #stride \
+    "\n"                                                                    \
+    "vld4.8 {" #c1 "[4]," #c2 "[4]," #c3 "[4]," #c4 "[4]}," #b1 "," #stride \
+    "\n"                                                                    \
+    "vld4.8 {" #c1 "[5]," #c2 "[5]," #c3 "[5]," #c4 "[5]}," #b2 "," #stride \
+    "\n"                                                                    \
+    "vld4.8 {" #c1 "[6]," #c2 "[6]," #c3 "[6]," #c4 "[6]}," #b1 "," #stride \
+    "\n"                                                                    \
+    "vld4.8 {" #c1 "[7]," #c2 "[7]," #c3 "[7]," #c4 "[7]}," #b2 "," #stride "\n"
+
+#define STORE8x2(c1, c2, p, stride)                     \
+    "vst2.8   {" #c1 "[0], " #c2 "[0]}," #p "," #stride \
+    " \n"                                               \
+    "vst2.8   {" #c1 "[1], " #c2 "[1]}," #p "," #stride \
+    " \n"                                               \
+    "vst2.8   {" #c1 "[2], " #c2 "[2]}," #p "," #stride \
+    " \n"                                               \
+    "vst2.8   {" #c1 "[3], " #c2 "[3]}," #p "," #stride \
+    " \n"                                               \
+    "vst2.8   {" #c1 "[4], " #c2 "[4]}," #p "," #stride \
+    " \n"                                               \
+    "vst2.8   {" #c1 "[5], " #c2 "[5]}," #p "," #stride \
+    " \n"                                               \
+    "vst2.8   {" #c1 "[6], " #c2 "[6]}," #p "," #stride \
+    " \n"                                               \
+    "vst2.8   {" #c1 "[7], " #c2 "[7]}," #p "," #stride " \n"
+
+#if !defined(WORK_AROUND_GCC)
+
+// This intrinsics version makes gcc-4.6.3 crash during Load4x??() compilation
+// (register alloc, probably). The variants somewhat mitigate the problem, but
+// not quite. HFilter16i() remains problematic.
+static WEBP_INLINE uint8x8x4_t Load4x8(const uint8_t* const src, int stride) {
+    const uint8x8_t zero = vdup_n_u8(0);
+    uint8x8x4_t out;
+    INIT_VECTOR4(out, zero, zero, zero, zero);
+    out = vld4_lane_u8(src + 0 * stride, out, 0);
+    out = vld4_lane_u8(src + 1 * stride, out, 1);
+    out = vld4_lane_u8(src + 2 * stride, out, 2);
+    out = vld4_lane_u8(src + 3 * stride, out, 3);
+    out = vld4_lane_u8(src + 4 * stride, out, 4);
+    out = vld4_lane_u8(src + 5 * stride, out, 5);
+    out = vld4_lane_u8(src + 6 * stride, out, 6);
+    out = vld4_lane_u8(src + 7 * stride, out, 7);
+    return out;
+}
+
+static WEBP_INLINE void Load4x16(const uint8_t* const src,
+                                 int stride,
+                                 uint8x16_t* const p1,
+                                 uint8x16_t* const p0,
+                                 uint8x16_t* const q0,
+                                 uint8x16_t* const q1) {
+    // row0 = p1[0..7]|p0[0..7]|q0[0..7]|q1[0..7]
+    // row8 = p1[8..15]|p0[8..15]|q0[8..15]|q1[8..15]
+    const uint8x8x4_t row0 = Load4x8(src - 2 + 0 * stride, stride);
+    const uint8x8x4_t row8 = Load4x8(src - 2 + 8 * stride, stride);
+    *p1 = vcombine_u8(row0.val[0], row8.val[0]);
+    *p0 = vcombine_u8(row0.val[1], row8.val[1]);
+    *q0 = vcombine_u8(row0.val[2], row8.val[2]);
+    *q1 = vcombine_u8(row0.val[3], row8.val[3]);
+}
+
+#else // WORK_AROUND_GCC
+
+#define LOADQ_LANE_32b(VALUE, LANE)                                      \
+    do {                                                                 \
+        (VALUE) = vld1q_lane_u32((const uint32_t*)src, (VALUE), (LANE)); \
+        src += stride;                                                   \
+    } while (0)
+
+static WEBP_INLINE void Load4x16(const uint8_t* src,
+                                 int stride,
+                                 uint8x16_t* const p1,
+                                 uint8x16_t* const p0,
+                                 uint8x16_t* const q0,
+                                 uint8x16_t* const q1) {
+    const uint32x4_t zero = vdupq_n_u32(0);
+    uint32x4x4_t in;
+    INIT_VECTOR4(in, zero, zero, zero, zero);
+    src -= 2;
+    LOADQ_LANE_32b(in.val[0], 0);
+    LOADQ_LANE_32b(in.val[1], 0);
+    LOADQ_LANE_32b(in.val[2], 0);
+    LOADQ_LANE_32b(in.val[3], 0);
+    LOADQ_LANE_32b(in.val[0], 1);
+    LOADQ_LANE_32b(in.val[1], 1);
+    LOADQ_LANE_32b(in.val[2], 1);
+    LOADQ_LANE_32b(in.val[3], 1);
+    LOADQ_LANE_32b(in.val[0], 2);
+    LOADQ_LANE_32b(in.val[1], 2);
+    LOADQ_LANE_32b(in.val[2], 2);
+    LOADQ_LANE_32b(in.val[3], 2);
+    LOADQ_LANE_32b(in.val[0], 3);
+    LOADQ_LANE_32b(in.val[1], 3);
+    LOADQ_LANE_32b(in.val[2], 3);
+    LOADQ_LANE_32b(in.val[3], 3);
+    // Transpose four 4x4 parts:
+    {
+        const uint8x16x2_t row01 = vtrnq_u8(vreinterpretq_u8_u32(in.val[0]), vreinterpretq_u8_u32(in.val[1]));
+        const uint8x16x2_t row23 = vtrnq_u8(vreinterpretq_u8_u32(in.val[2]), vreinterpretq_u8_u32(in.val[3]));
+        const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]), vreinterpretq_u16_u8(row23.val[0]));
+        const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]), vreinterpretq_u16_u8(row23.val[1]));
+        *p1 = vreinterpretq_u8_u16(row02.val[0]);
+        *p0 = vreinterpretq_u8_u16(row13.val[0]);
+        *q0 = vreinterpretq_u8_u16(row02.val[1]);
+        *q1 = vreinterpretq_u8_u16(row13.val[1]);
+    }
+}
+#undef LOADQ_LANE_32b
+
+#endif // !WORK_AROUND_GCC
+
+static WEBP_INLINE void Load8x16(const uint8_t* const src,
+                                 int stride,
+                                 uint8x16_t* const p3,
+                                 uint8x16_t* const p2,
+                                 uint8x16_t* const p1,
+                                 uint8x16_t* const p0,
+                                 uint8x16_t* const q0,
+                                 uint8x16_t* const q1,
+                                 uint8x16_t* const q2,
+                                 uint8x16_t* const q3) {
+    Load4x16(src - 2, stride, p3, p2, p1, p0);
+    Load4x16(src + 2, stride, q0, q1, q2, q3);
+}
+
+static WEBP_INLINE void Load16x4(const uint8_t* const src,
+                                 int stride,
+                                 uint8x16_t* const p1,
+                                 uint8x16_t* const p0,
+                                 uint8x16_t* const q0,
+                                 uint8x16_t* const q1) {
+    *p1 = vld1q_u8(src - 2 * stride);
+    *p0 = vld1q_u8(src - 1 * stride);
+    *q0 = vld1q_u8(src + 0 * stride);
+    *q1 = vld1q_u8(src + 1 * stride);
+}
+
+static WEBP_INLINE void Load16x8(const uint8_t* const src,
+                                 int stride,
+                                 uint8x16_t* const p3,
+                                 uint8x16_t* const p2,
+                                 uint8x16_t* const p1,
+                                 uint8x16_t* const p0,
+                                 uint8x16_t* const q0,
+                                 uint8x16_t* const q1,
+                                 uint8x16_t* const q2,
+                                 uint8x16_t* const q3) {
+    Load16x4(src - 2 * stride, stride, p3, p2, p1, p0);
+    Load16x4(src + 2 * stride, stride, q0, q1, q2, q3);
+}
+
+static WEBP_INLINE void Load8x8x2(const uint8_t* const u,
+                                  const uint8_t* const v,
+                                  int stride,
+                                  uint8x16_t* const p3,
+                                  uint8x16_t* const p2,
+                                  uint8x16_t* const p1,
+                                  uint8x16_t* const p0,
+                                  uint8x16_t* const q0,
+                                  uint8x16_t* const q1,
+                                  uint8x16_t* const q2,
+                                  uint8x16_t* const q3) {
+    // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
+    // and the v-samples on the higher half.
+    *p3 = vcombine_u8(vld1_u8(u - 4 * stride), vld1_u8(v - 4 * stride));
+    *p2 = vcombine_u8(vld1_u8(u - 3 * stride), vld1_u8(v - 3 * stride));
+    *p1 = vcombine_u8(vld1_u8(u - 2 * stride), vld1_u8(v - 2 * stride));
+    *p0 = vcombine_u8(vld1_u8(u - 1 * stride), vld1_u8(v - 1 * stride));
+    *q0 = vcombine_u8(vld1_u8(u + 0 * stride), vld1_u8(v + 0 * stride));
+    *q1 = vcombine_u8(vld1_u8(u + 1 * stride), vld1_u8(v + 1 * stride));
+    *q2 = vcombine_u8(vld1_u8(u + 2 * stride), vld1_u8(v + 2 * stride));
+    *q3 = vcombine_u8(vld1_u8(u + 3 * stride), vld1_u8(v + 3 * stride));
+}
+
+#if !defined(WORK_AROUND_GCC)
+
+#define LOAD_UV_8(ROW) vcombine_u8(vld1_u8(u - 4 + (ROW)*stride), vld1_u8(v - 4 + (ROW)*stride))
+
+static WEBP_INLINE void Load8x8x2T(const uint8_t* const u,
+                                   const uint8_t* const v,
+                                   int stride,
+                                   uint8x16_t* const p3,
+                                   uint8x16_t* const p2,
+                                   uint8x16_t* const p1,
+                                   uint8x16_t* const p0,
+                                   uint8x16_t* const q0,
+                                   uint8x16_t* const q1,
+                                   uint8x16_t* const q2,
+                                   uint8x16_t* const q3) {
+    // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
+    // and the v-samples on the higher half.
+    const uint8x16_t row0 = LOAD_UV_8(0);
+    const uint8x16_t row1 = LOAD_UV_8(1);
+    const uint8x16_t row2 = LOAD_UV_8(2);
+    const uint8x16_t row3 = LOAD_UV_8(3);
+    const uint8x16_t row4 = LOAD_UV_8(4);
+    const uint8x16_t row5 = LOAD_UV_8(5);
+    const uint8x16_t row6 = LOAD_UV_8(6);
+    const uint8x16_t row7 = LOAD_UV_8(7);
+    // Perform two side-by-side 8x8 transposes
+    // u00 u01 u02 u03 u04 u05 u06 u07 | v00 v01 v02 v03 v04 v05 v06 v07
+    // u10 u11 u12 u13 u14 u15 u16 u17 | v10 v11 v12 ...
+    // u20 u21 u22 u23 u24 u25 u26 u27 | v20 v21 ...
+    // u30 u31 u32 u33 u34 u35 u36 u37 | ...
+    // u40 u41 u42 u43 u44 u45 u46 u47 | ...
+    // u50 u51 u52 u53 u54 u55 u56 u57 | ...
+    // u60 u61 u62 u63 u64 u65 u66 u67 | v60 ...
+    // u70 u71 u72 u73 u74 u75 u76 u77 | v70 v71 v72 ...
+    const uint8x16x2_t row01 = vtrnq_u8(row0, row1); // u00 u10 u02 u12 ...
+                                                     // u01 u11 u03 u13 ...
+    const uint8x16x2_t row23 = vtrnq_u8(row2, row3); // u20 u30 u22 u32 ...
+                                                     // u21 u31 u23 u33 ...
+    const uint8x16x2_t row45 = vtrnq_u8(row4, row5); // ...
+    const uint8x16x2_t row67 = vtrnq_u8(row6, row7); // ...
+    const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]), vreinterpretq_u16_u8(row23.val[0]));
+    const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]), vreinterpretq_u16_u8(row23.val[1]));
+    const uint16x8x2_t row46 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[0]), vreinterpretq_u16_u8(row67.val[0]));
+    const uint16x8x2_t row57 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[1]), vreinterpretq_u16_u8(row67.val[1]));
+    const uint32x4x2_t row04 = vtrnq_u32(vreinterpretq_u32_u16(row02.val[0]), vreinterpretq_u32_u16(row46.val[0]));
+    const uint32x4x2_t row26 = vtrnq_u32(vreinterpretq_u32_u16(row02.val[1]), vreinterpretq_u32_u16(row46.val[1]));
+    const uint32x4x2_t row15 = vtrnq_u32(vreinterpretq_u32_u16(row13.val[0]), vreinterpretq_u32_u16(row57.val[0]));
+    const uint32x4x2_t row37 = vtrnq_u32(vreinterpretq_u32_u16(row13.val[1]), vreinterpretq_u32_u16(row57.val[1]));
+    *p3 = vreinterpretq_u8_u32(row04.val[0]);
+    *p2 = vreinterpretq_u8_u32(row15.val[0]);
+    *p1 = vreinterpretq_u8_u32(row26.val[0]);
+    *p0 = vreinterpretq_u8_u32(row37.val[0]);
+    *q0 = vreinterpretq_u8_u32(row04.val[1]);
+    *q1 = vreinterpretq_u8_u32(row15.val[1]);
+    *q2 = vreinterpretq_u8_u32(row26.val[1]);
+    *q3 = vreinterpretq_u8_u32(row37.val[1]);
+}
+#undef LOAD_UV_8
+
+#endif // !WORK_AROUND_GCC
+
+static WEBP_INLINE void Store2x8(const uint8x8x2_t v, uint8_t* const dst, int stride) {
+    vst2_lane_u8(dst + 0 * stride, v, 0);
+    vst2_lane_u8(dst + 1 * stride, v, 1);
+    vst2_lane_u8(dst + 2 * stride, v, 2);
+    vst2_lane_u8(dst + 3 * stride, v, 3);
+    vst2_lane_u8(dst + 4 * stride, v, 4);
+    vst2_lane_u8(dst + 5 * stride, v, 5);
+    vst2_lane_u8(dst + 6 * stride, v, 6);
+    vst2_lane_u8(dst + 7 * stride, v, 7);
+}
+
+static WEBP_INLINE void Store2x16(const uint8x16_t p0, const uint8x16_t q0, uint8_t* const dst, int stride) {
+    uint8x8x2_t lo, hi;
+    lo.val[0] = vget_low_u8(p0);
+    lo.val[1] = vget_low_u8(q0);
+    hi.val[0] = vget_high_u8(p0);
+    hi.val[1] = vget_high_u8(q0);
+    Store2x8(lo, dst - 1 + 0 * stride, stride);
+    Store2x8(hi, dst - 1 + 8 * stride, stride);
+}
+
+#if !defined(WORK_AROUND_GCC)
+static WEBP_INLINE void Store4x8(const uint8x8x4_t v, uint8_t* const dst, int stride) {
+    vst4_lane_u8(dst + 0 * stride, v, 0);
+    vst4_lane_u8(dst + 1 * stride, v, 1);
+    vst4_lane_u8(dst + 2 * stride, v, 2);
+    vst4_lane_u8(dst + 3 * stride, v, 3);
+    vst4_lane_u8(dst + 4 * stride, v, 4);
+    vst4_lane_u8(dst + 5 * stride, v, 5);
+    vst4_lane_u8(dst + 6 * stride, v, 6);
+    vst4_lane_u8(dst + 7 * stride, v, 7);
+}
+
+static WEBP_INLINE void Store4x16(const uint8x16_t p1,
+                                  const uint8x16_t p0,
+                                  const uint8x16_t q0,
+                                  const uint8x16_t q1,
+                                  uint8_t* const dst,
+                                  int stride) {
+    uint8x8x4_t lo, hi;
+    INIT_VECTOR4(lo, vget_low_u8(p1), vget_low_u8(p0), vget_low_u8(q0), vget_low_u8(q1));
+    INIT_VECTOR4(hi, vget_high_u8(p1), vget_high_u8(p0), vget_high_u8(q0), vget_high_u8(q1));
+    Store4x8(lo, dst - 2 + 0 * stride, stride);
+    Store4x8(hi, dst - 2 + 8 * stride, stride);
+}
+#endif // !WORK_AROUND_GCC
+
+static WEBP_INLINE void Store16x2(const uint8x16_t p0, const uint8x16_t q0, uint8_t* const dst, int stride) {
+    vst1q_u8(dst - stride, p0);
+    vst1q_u8(dst, q0);
+}
+
+static WEBP_INLINE void Store16x4(const uint8x16_t p1,
+                                  const uint8x16_t p0,
+                                  const uint8x16_t q0,
+                                  const uint8x16_t q1,
+                                  uint8_t* const dst,
+                                  int stride) {
+    Store16x2(p1, p0, dst - stride, stride);
+    Store16x2(q0, q1, dst + stride, stride);
+}
+
+static WEBP_INLINE void Store8x2x2(
+    const uint8x16_t p0, const uint8x16_t q0, uint8_t* const u, uint8_t* const v, int stride) {
+    // p0 and q0 contain the u+v samples packed in low/high halves.
+    vst1_u8(u - stride, vget_low_u8(p0));
+    vst1_u8(u, vget_low_u8(q0));
+    vst1_u8(v - stride, vget_high_u8(p0));
+    vst1_u8(v, vget_high_u8(q0));
+}
+
+static WEBP_INLINE void Store8x4x2(const uint8x16_t p1,
+                                   const uint8x16_t p0,
+                                   const uint8x16_t q0,
+                                   const uint8x16_t q1,
+                                   uint8_t* const u,
+                                   uint8_t* const v,
+                                   int stride) {
+    // The p1...q1 registers contain the u+v samples packed in low/high halves.
+    Store8x2x2(p1, p0, u - stride, v - stride, stride);
+    Store8x2x2(q0, q1, u + stride, v + stride, stride);
+}
+
+#if !defined(WORK_AROUND_GCC)
+
+#define STORE6_LANE(DST, VAL0, VAL1, LANE)       \
+    do {                                         \
+        vst3_lane_u8((DST)-3, (VAL0), (LANE));   \
+        vst3_lane_u8((DST) + 0, (VAL1), (LANE)); \
+        (DST) += stride;                         \
+    } while (0)
+
+static WEBP_INLINE void Store6x8x2(const uint8x16_t p2,
+                                   const uint8x16_t p1,
+                                   const uint8x16_t p0,
+                                   const uint8x16_t q0,
+                                   const uint8x16_t q1,
+                                   const uint8x16_t q2,
+                                   uint8_t* u,
+                                   uint8_t* v,
+                                   int stride) {
+    uint8x8x3_t u0, u1, v0, v1;
+    INIT_VECTOR3(u0, vget_low_u8(p2), vget_low_u8(p1), vget_low_u8(p0));
+    INIT_VECTOR3(u1, vget_low_u8(q0), vget_low_u8(q1), vget_low_u8(q2));
+    INIT_VECTOR3(v0, vget_high_u8(p2), vget_high_u8(p1), vget_high_u8(p0));
+    INIT_VECTOR3(v1, vget_high_u8(q0), vget_high_u8(q1), vget_high_u8(q2));
+    STORE6_LANE(u, u0, u1, 0);
+    STORE6_LANE(u, u0, u1, 1);
+    STORE6_LANE(u, u0, u1, 2);
+    STORE6_LANE(u, u0, u1, 3);
+    STORE6_LANE(u, u0, u1, 4);
+    STORE6_LANE(u, u0, u1, 5);
+    STORE6_LANE(u, u0, u1, 6);
+    STORE6_LANE(u, u0, u1, 7);
+    STORE6_LANE(v, v0, v1, 0);
+    STORE6_LANE(v, v0, v1, 1);
+    STORE6_LANE(v, v0, v1, 2);
+    STORE6_LANE(v, v0, v1, 3);
+    STORE6_LANE(v, v0, v1, 4);
+    STORE6_LANE(v, v0, v1, 5);
+    STORE6_LANE(v, v0, v1, 6);
+    STORE6_LANE(v, v0, v1, 7);
+}
+#undef STORE6_LANE
+
+static WEBP_INLINE void Store4x8x2(const uint8x16_t p1,
+                                   const uint8x16_t p0,
+                                   const uint8x16_t q0,
+                                   const uint8x16_t q1,
+                                   uint8_t* const u,
+                                   uint8_t* const v,
+                                   int stride) {
+    uint8x8x4_t u0, v0;
+    INIT_VECTOR4(u0, vget_low_u8(p1), vget_low_u8(p0), vget_low_u8(q0), vget_low_u8(q1));
+    INIT_VECTOR4(v0, vget_high_u8(p1), vget_high_u8(p0), vget_high_u8(q0), vget_high_u8(q1));
+    vst4_lane_u8(u - 2 + 0 * stride, u0, 0);
+    vst4_lane_u8(u - 2 + 1 * stride, u0, 1);
+    vst4_lane_u8(u - 2 + 2 * stride, u0, 2);
+    vst4_lane_u8(u - 2 + 3 * stride, u0, 3);
+    vst4_lane_u8(u - 2 + 4 * stride, u0, 4);
+    vst4_lane_u8(u - 2 + 5 * stride, u0, 5);
+    vst4_lane_u8(u - 2 + 6 * stride, u0, 6);
+    vst4_lane_u8(u - 2 + 7 * stride, u0, 7);
+    vst4_lane_u8(v - 2 + 0 * stride, v0, 0);
+    vst4_lane_u8(v - 2 + 1 * stride, v0, 1);
+    vst4_lane_u8(v - 2 + 2 * stride, v0, 2);
+    vst4_lane_u8(v - 2 + 3 * stride, v0, 3);
+    vst4_lane_u8(v - 2 + 4 * stride, v0, 4);
+    vst4_lane_u8(v - 2 + 5 * stride, v0, 5);
+    vst4_lane_u8(v - 2 + 6 * stride, v0, 6);
+    vst4_lane_u8(v - 2 + 7 * stride, v0, 7);
+}
+
+#endif // !WORK_AROUND_GCC
+
+// Zero extend 'v' to an int16x8_t.
+static WEBP_INLINE int16x8_t ConvertU8ToS16(uint8x8_t v) {
+    return vreinterpretq_s16_u16(vmovl_u8(v));
+}
+
+// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
+// to the corresponding rows of 'dst'.
+static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst, const int16x8_t dst01, const int16x8_t dst23) {
+    // Unsigned saturate to 8b.
+    const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
+    const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
+
+    // Store the results.
+    vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0);
+    vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1);
+    vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0);
+    vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
+}
+
+static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23, uint8_t* const dst) {
+    uint32x2_t dst01 = vdup_n_u32(0);
+    uint32x2_t dst23 = vdup_n_u32(0);
+
+    // Load the source pixels.
+    dst01 = vld1_lane_u32((uint32_t*)(dst + 0 * BPS), dst01, 0);
+    dst23 = vld1_lane_u32((uint32_t*)(dst + 2 * BPS), dst23, 0);
+    dst01 = vld1_lane_u32((uint32_t*)(dst + 1 * BPS), dst01, 1);
+    dst23 = vld1_lane_u32((uint32_t*)(dst + 3 * BPS), dst23, 1);
+
+    {
+        // Convert to 16b.
+        const int16x8_t dst01_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst01));
+        const int16x8_t dst23_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst23));
+
+        // Descale with rounding.
+        const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
+        const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
+        // Add the inverse transform.
+        SaturateAndStore4x4(dst, out01, out23);
+    }
+}
+
+//-----------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static uint8x16_t NeedsFilter(
+    const uint8x16_t p1, const uint8x16_t p0, const uint8x16_t q0, const uint8x16_t q1, int thresh) {
+    const uint8x16_t thresh_v = vdupq_n_u8((uint8_t)thresh);
+    const uint8x16_t a_p0_q0 = vabdq_u8(p0, q0);              // abs(p0-q0)
+    const uint8x16_t a_p1_q1 = vabdq_u8(p1, q1);              // abs(p1-q1)
+    const uint8x16_t a_p0_q0_2 = vqaddq_u8(a_p0_q0, a_p0_q0); // 2 * abs(p0-q0)
+    const uint8x16_t a_p1_q1_2 = vshrq_n_u8(a_p1_q1, 1);      // abs(p1-q1) / 2
+    const uint8x16_t sum = vqaddq_u8(a_p0_q0_2, a_p1_q1_2);
+    const uint8x16_t mask = vcgeq_u8(thresh_v, sum);
+    return mask;
+}
+
+static int8x16_t FlipSign(const uint8x16_t v) {
+    const uint8x16_t sign_bit = vdupq_n_u8(0x80);
+    return vreinterpretq_s8_u8(veorq_u8(v, sign_bit));
+}
+
+static uint8x16_t FlipSignBack(const int8x16_t v) {
+    const int8x16_t sign_bit = vdupq_n_s8(0x80);
+    return vreinterpretq_u8_s8(veorq_s8(v, sign_bit));
+}
+
+static int8x16_t GetBaseDelta(const int8x16_t p1, const int8x16_t p0, const int8x16_t q0, const int8x16_t q1) {
+    const int8x16_t q0_p0 = vqsubq_s8(q0, p0);    // (q0-p0)
+    const int8x16_t p1_q1 = vqsubq_s8(p1, q1);    // (p1-q1)
+    const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0); // (p1-q1) + 1 * (q0 - p0)
+    const int8x16_t s2 = vqaddq_s8(q0_p0, s1);    // (p1-q1) + 2 * (q0 - p0)
+    const int8x16_t s3 = vqaddq_s8(q0_p0, s2);    // (p1-q1) + 3 * (q0 - p0)
+    return s3;
+}
+
+static int8x16_t GetBaseDelta0(const int8x16_t p0, const int8x16_t q0) {
+    const int8x16_t q0_p0 = vqsubq_s8(q0, p0);    // (q0-p0)
+    const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0); // 2 * (q0 - p0)
+    const int8x16_t s2 = vqaddq_s8(q0_p0, s1);    // 3 * (q0 - p0)
+    return s2;
+}
+
+//------------------------------------------------------------------------------
+
+static void ApplyFilter2NoFlip(
+    const int8x16_t p0s, const int8x16_t q0s, const int8x16_t delta, int8x16_t* const op0, int8x16_t* const oq0) {
+    const int8x16_t kCst3 = vdupq_n_s8(0x03);
+    const int8x16_t kCst4 = vdupq_n_s8(0x04);
+    const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
+    const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4);
+    const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3);
+    const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3);
+    *op0 = vqaddq_s8(p0s, delta3);
+    *oq0 = vqsubq_s8(q0s, delta4);
+}
+
+#if defined(WEBP_USE_INTRINSICS)
+
+static void ApplyFilter2(
+    const int8x16_t p0s, const int8x16_t q0s, const int8x16_t delta, uint8x16_t* const op0, uint8x16_t* const oq0) {
+    const int8x16_t kCst3 = vdupq_n_s8(0x03);
+    const int8x16_t kCst4 = vdupq_n_s8(0x04);
+    const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
+    const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4);
+    const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3);
+    const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3);
+    const int8x16_t sp0 = vqaddq_s8(p0s, delta3);
+    const int8x16_t sq0 = vqsubq_s8(q0s, delta4);
+    *op0 = FlipSignBack(sp0);
+    *oq0 = FlipSignBack(sq0);
+}
+
+static void DoFilter2(const uint8x16_t p1,
+                      const uint8x16_t p0,
+                      const uint8x16_t q0,
+                      const uint8x16_t q1,
+                      const uint8x16_t mask,
+                      uint8x16_t* const op0,
+                      uint8x16_t* const oq0) {
+    const int8x16_t p1s = FlipSign(p1);
+    const int8x16_t p0s = FlipSign(p0);
+    const int8x16_t q0s = FlipSign(q0);
+    const int8x16_t q1s = FlipSign(q1);
+    const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
+    const int8x16_t delta1 = vandq_s8(delta0, vreinterpretq_s8_u8(mask));
+    ApplyFilter2(p0s, q0s, delta1, op0, oq0);
+}
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+    uint8x16_t p1, p0, q0, q1, op0, oq0;
+    Load16x4(p, stride, &p1, &p0, &q0, &q1);
+    {
+        const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
+        DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
+    }
+    Store16x2(op0, oq0, p, stride);
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+    uint8x16_t p1, p0, q0, q1, oq0, op0;
+    Load4x16(p, stride, &p1, &p0, &q0, &q1);
+    {
+        const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
+        DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
+    }
+    Store2x16(op0, oq0, p, stride);
+}
+
+#else
+
+#define QRegs "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+
+#define FLIP_SIGN_BIT2(a, b, s)  \
+    "veor     " #a "," #a "," #s \
+    "               \n"          \
+    "veor     " #b "," #b "," #s "               \n"
+
+#define FLIP_SIGN_BIT4(a, b, c, d, s) \
+    FLIP_SIGN_BIT2(a, b, s)           \
+    FLIP_SIGN_BIT2(c, d, s)
+
+#define NEEDS_FILTER(p1, p0, q0, q1, thresh, mask)                                        \
+    "vabd.u8    q15," #p0 "," #q0                                                         \
+    "         \n" /* abs(p0 - q0) */                                                      \
+    "vabd.u8    q14," #p1 "," #q1                                                         \
+    "         \n"                               /* abs(p1 - q1) */                        \
+    "vqadd.u8   q15, q15, q15               \n" /* abs(p0 - q0) * 2 */                    \
+    "vshr.u8    q14, q14, #1                \n" /* abs(p1 - q1) / 2 */                    \
+    "vqadd.u8   q15, q15, q14     \n"           /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \
+    "vdup.8     q14, " #thresh                                                            \
+    "            \n"                                                                      \
+    "vcge.u8   " #mask ", q14, q15          \n" /* mask <= thresh */
+
+#define GET_BASE_DELTA(p1, p0, q0, q1, o)                                       \
+    "vqsub.s8   q15," #q0 "," #p0                                               \
+    "         \n" /* (q0 - p0) */                                               \
+    "vqsub.s8  " #o "," #p1 "," #q1                                             \
+    "       \n" /* (p1 - q1) */                                                 \
+    "vqadd.s8  " #o "," #o                                                      \
+    ", q15           \n" /* (p1 - q1) + 1 * (p0 - q0) */                        \
+    "vqadd.s8  " #o "," #o                                                      \
+    ", q15           \n"                        /* (p1 - q1) + 2 * (p0 - q0) */ \
+    "vqadd.s8  " #o "," #o ", q15           \n" /* (p1 - q1) + 3 * (p0 - q0) */
+
+#define DO_SIMPLE_FILTER(p0, q0, fl)                                       \
+    "vmov.i8    q15, #0x03                  \n"                            \
+    "vqadd.s8   q15, q15, " #fl                                            \
+    "           \n"                             /* filter1 = filter + 3 */ \
+    "vshr.s8    q15, q15, #3                \n" /* filter1 >> 3 */         \
+    "vqadd.s8  " #p0 "," #p0                                               \
+    ", q15         \n" /* p0 += filter1 */                                 \
+                                                                           \
+    "vmov.i8    q15, #0x04                  \n"                            \
+    "vqadd.s8   q15, q15, " #fl                                            \
+    "           \n"                             /* filter1 = filter + 4 */ \
+    "vshr.s8    q15, q15, #3                \n" /* filter2 >> 3 */         \
+    "vqsub.s8  " #q0 "," #q0 ", q15         \n" /* q0 -= filter2 */
+
+// Applies filter on 2 pixels (p0 and q0)
+#define DO_FILTER2(p1, p0, q0, q1, thresh)                                        \
+    NEEDS_FILTER(p1, p0, q0, q1, thresh, q9)        /* filter mask in q9 */       \
+    "vmov.i8    q10, #0x80                  \n"     /* sign bit */                \
+        FLIP_SIGN_BIT4(p1, p0, q0, q1, q10)         /* convert to signed value */ \
+        GET_BASE_DELTA(p1, p0, q0, q1, q11)         /* get filter level  */       \
+        "vand       q9, q9, q11                 \n" /* apply filter mask */       \
+        DO_SIMPLE_FILTER(p0, q0, q9)                /* apply filter */            \
+        FLIP_SIGN_BIT2(p0, q0, q10)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+    __asm__ volatile(
+        "sub        %[p], %[p], %[stride], lsl #1  \n" // p -= 2 * stride
+
+        "vld1.u8    {q1}, [%[p]], %[stride]        \n" // p1
+        "vld1.u8    {q2}, [%[p]], %[stride]        \n" // p0
+        "vld1.u8    {q3}, [%[p]], %[stride]        \n" // q0
+        "vld1.u8    {q12}, [%[p]]                  \n" // q1
+
+        DO_FILTER2(q1, q2, q3, q12, % [thresh])
+
+            "sub        %[p], %[p], %[stride], lsl #1  \n" // p -= 2 * stride
+
+            "vst1.u8    {q2}, [%[p]], %[stride]        \n" // store op0
+            "vst1.u8    {q3}, [%[p]]                   \n" // store oq0
+        : [p] "+r"(p)
+        : [stride] "r"(stride), [thresh] "r"(thresh)
+        : "memory", QRegs);
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+    __asm__ volatile (
+    "sub        r4, %[p], #2                   \n"  // base1 = p - 2
+    "lsl        r6, %[stride], #1              \n"  // r6 = 2 * stride
+    "add        r5, r4, %[stride]              \n"  // base2 = base1 + stride
+
+    LOAD8x4(d2, d3, d4, d5, [r4], [r5], r6)
+    LOAD8x4(d24, d25, d26, d27, [r4], [r5], r6)
+    "vswp       d3, d24                        \n"  // p1:q1 p0:q3
+    "vswp       d5, d26                        \n"  // q0:q2 q1:q4
+    "vswp       q2, q12                        \n"  // p1:q1 p0:q2 q0:q3 q1:q4
+
+    DO_FILTER2(q1, q2, q12, q13, %[thresh])
+
+    "sub        %[p], %[p], #1                 \n"  // p - 1
+
+    "vswp        d5, d24                       \n"
+    STORE8x2(d4, d5, [%[p]], %[stride])
+    STORE8x2(d24, d25, [%[p]], %[stride])
+
+    : [p] "+r"(p)
+    : [stride] "r"(stride), [thresh] "r"(thresh)
+    : "memory", "r4", "r5", "r6", QRegs
+  );
+}
+
+#endif // WEBP_USE_INTRINSICS
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+    uint32_t k;
+    for (k = 3; k != 0; --k) {
+        p += 4 * stride;
+        SimpleVFilter16(p, stride, thresh);
+    }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+    uint32_t k;
+    for (k = 3; k != 0; --k) {
+        p += 4;
+        SimpleHFilter16(p, stride, thresh);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+static uint8x16_t NeedsHev(
+    const uint8x16_t p1, const uint8x16_t p0, const uint8x16_t q0, const uint8x16_t q1, int hev_thresh) {
+    const uint8x16_t hev_thresh_v = vdupq_n_u8((uint8_t)hev_thresh);
+    const uint8x16_t a_p1_p0 = vabdq_u8(p1, p0); // abs(p1 - p0)
+    const uint8x16_t a_q1_q0 = vabdq_u8(q1, q0); // abs(q1 - q0)
+    const uint8x16_t mask1 = vcgtq_u8(a_p1_p0, hev_thresh_v);
+    const uint8x16_t mask2 = vcgtq_u8(a_q1_q0, hev_thresh_v);
+    const uint8x16_t mask = vorrq_u8(mask1, mask2);
+    return mask;
+}
+
+static uint8x16_t NeedsFilter2(const uint8x16_t p3,
+                               const uint8x16_t p2,
+                               const uint8x16_t p1,
+                               const uint8x16_t p0,
+                               const uint8x16_t q0,
+                               const uint8x16_t q1,
+                               const uint8x16_t q2,
+                               const uint8x16_t q3,
+                               int ithresh,
+                               int thresh) {
+    const uint8x16_t ithresh_v = vdupq_n_u8((uint8_t)ithresh);
+    const uint8x16_t a_p3_p2 = vabdq_u8(p3, p2); // abs(p3 - p2)
+    const uint8x16_t a_p2_p1 = vabdq_u8(p2, p1); // abs(p2 - p1)
+    const uint8x16_t a_p1_p0 = vabdq_u8(p1, p0); // abs(p1 - p0)
+    const uint8x16_t a_q3_q2 = vabdq_u8(q3, q2); // abs(q3 - q2)
+    const uint8x16_t a_q2_q1 = vabdq_u8(q2, q1); // abs(q2 - q1)
+    const uint8x16_t a_q1_q0 = vabdq_u8(q1, q0); // abs(q1 - q0)
+    const uint8x16_t max1 = vmaxq_u8(a_p3_p2, a_p2_p1);
+    const uint8x16_t max2 = vmaxq_u8(a_p1_p0, a_q3_q2);
+    const uint8x16_t max3 = vmaxq_u8(a_q2_q1, a_q1_q0);
+    const uint8x16_t max12 = vmaxq_u8(max1, max2);
+    const uint8x16_t max123 = vmaxq_u8(max12, max3);
+    const uint8x16_t mask2 = vcgeq_u8(ithresh_v, max123);
+    const uint8x16_t mask1 = NeedsFilter(p1, p0, q0, q1, thresh);
+    const uint8x16_t mask = vandq_u8(mask1, mask2);
+    return mask;
+}
+
+//  4-points filter
+
+static void ApplyFilter4(const int8x16_t p1,
+                         const int8x16_t p0,
+                         const int8x16_t q0,
+                         const int8x16_t q1,
+                         const int8x16_t delta0,
+                         uint8x16_t* const op1,
+                         uint8x16_t* const op0,
+                         uint8x16_t* const oq0,
+                         uint8x16_t* const oq1) {
+    const int8x16_t kCst3 = vdupq_n_s8(0x03);
+    const int8x16_t kCst4 = vdupq_n_s8(0x04);
+    const int8x16_t delta1 = vqaddq_s8(delta0, kCst4);
+    const int8x16_t delta2 = vqaddq_s8(delta0, kCst3);
+    const int8x16_t a1 = vshrq_n_s8(delta1, 3);
+    const int8x16_t a2 = vshrq_n_s8(delta2, 3);
+    const int8x16_t a3 = vrshrq_n_s8(a1, 1); // a3 = (a1 + 1) >> 1
+    *op0 = FlipSignBack(vqaddq_s8(p0, a2));  // clip(p0 + a2)
+    *oq0 = FlipSignBack(vqsubq_s8(q0, a1));  // clip(q0 - a1)
+    *op1 = FlipSignBack(vqaddq_s8(p1, a3));  // clip(p1 + a3)
+    *oq1 = FlipSignBack(vqsubq_s8(q1, a3));  // clip(q1 - a3)
+}
+
+static void DoFilter4(const uint8x16_t p1,
+                      const uint8x16_t p0,
+                      const uint8x16_t q0,
+                      const uint8x16_t q1,
+                      const uint8x16_t mask,
+                      const uint8x16_t hev_mask,
+                      uint8x16_t* const op1,
+                      uint8x16_t* const op0,
+                      uint8x16_t* const oq0,
+                      uint8x16_t* const oq1) {
+    // This is a fused version of DoFilter2() calling ApplyFilter2 directly
+    const int8x16_t p1s = FlipSign(p1);
+    int8x16_t p0s = FlipSign(p0);
+    int8x16_t q0s = FlipSign(q0);
+    const int8x16_t q1s = FlipSign(q1);
+    const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
+
+    // do_filter2 part (simple loopfilter on pixels with hev)
+    {
+        const int8x16_t delta = GetBaseDelta(p1s, p0s, q0s, q1s);
+        const int8x16_t simple_lf_delta = vandq_s8(delta, vreinterpretq_s8_u8(simple_lf_mask));
+        ApplyFilter2NoFlip(p0s, q0s, simple_lf_delta, &p0s, &q0s);
+    }
+
+    // do_filter4 part (complex loopfilter on pixels without hev)
+    {
+        const int8x16_t delta0 = GetBaseDelta0(p0s, q0s);
+        // we use: (mask & hev_mask) ^ mask = mask & !hev_mask
+        const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
+        const int8x16_t complex_lf_delta = vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
+        ApplyFilter4(p1s, p0s, q0s, q1s, complex_lf_delta, op1, op0, oq0, oq1);
+    }
+}
+
+//  6-points filter
+
+static void ApplyFilter6(const int8x16_t p2,
+                         const int8x16_t p1,
+                         const int8x16_t p0,
+                         const int8x16_t q0,
+                         const int8x16_t q1,
+                         const int8x16_t q2,
+                         const int8x16_t delta,
+                         uint8x16_t* const op2,
+                         uint8x16_t* const op1,
+                         uint8x16_t* const op0,
+                         uint8x16_t* const oq0,
+                         uint8x16_t* const oq1,
+                         uint8x16_t* const oq2) {
+    const int16x8_t kCst63 = vdupq_n_s16(63);
+    const int8x8_t kCst27 = vdup_n_s8(27);
+    const int8x8_t kCst18 = vdup_n_s8(18);
+    const int8x8_t kCst9 = vdup_n_s8(9);
+    const int8x8_t delta_lo = vget_low_s8(delta);
+    const int8x8_t delta_hi = vget_high_s8(delta);
+    const int16x8_t s1_lo = vmlal_s8(kCst63, kCst27, delta_lo); // 63 + 27 * a
+    const int16x8_t s1_hi = vmlal_s8(kCst63, kCst27, delta_hi); // 63 + 27 * a
+    const int16x8_t s2_lo = vmlal_s8(kCst63, kCst18, delta_lo); // 63 + 18 * a
+    const int16x8_t s2_hi = vmlal_s8(kCst63, kCst18, delta_hi); // 63 + 18 * a
+    const int16x8_t s3_lo = vmlal_s8(kCst63, kCst9, delta_lo);  // 63 + 9 * a
+    const int16x8_t s3_hi = vmlal_s8(kCst63, kCst9, delta_hi);  // 63 + 9 * a
+    const int8x8_t a1_lo = vqshrn_n_s16(s1_lo, 7);
+    const int8x8_t a1_hi = vqshrn_n_s16(s1_hi, 7);
+    const int8x8_t a2_lo = vqshrn_n_s16(s2_lo, 7);
+    const int8x8_t a2_hi = vqshrn_n_s16(s2_hi, 7);
+    const int8x8_t a3_lo = vqshrn_n_s16(s3_lo, 7);
+    const int8x8_t a3_hi = vqshrn_n_s16(s3_hi, 7);
+    const int8x16_t a1 = vcombine_s8(a1_lo, a1_hi);
+    const int8x16_t a2 = vcombine_s8(a2_lo, a2_hi);
+    const int8x16_t a3 = vcombine_s8(a3_lo, a3_hi);
+
+    *op0 = FlipSignBack(vqaddq_s8(p0, a1)); // clip(p0 + a1)
+    *oq0 = FlipSignBack(vqsubq_s8(q0, a1)); // clip(q0 - q1)
+    *oq1 = FlipSignBack(vqsubq_s8(q1, a2)); // clip(q1 - a2)
+    *op1 = FlipSignBack(vqaddq_s8(p1, a2)); // clip(p1 + a2)
+    *oq2 = FlipSignBack(vqsubq_s8(q2, a3)); // clip(q2 - a3)
+    *op2 = FlipSignBack(vqaddq_s8(p2, a3)); // clip(p2 + a3)
+}
+
+static void DoFilter6(const uint8x16_t p2,
+                      const uint8x16_t p1,
+                      const uint8x16_t p0,
+                      const uint8x16_t q0,
+                      const uint8x16_t q1,
+                      const uint8x16_t q2,
+                      const uint8x16_t mask,
+                      const uint8x16_t hev_mask,
+                      uint8x16_t* const op2,
+                      uint8x16_t* const op1,
+                      uint8x16_t* const op0,
+                      uint8x16_t* const oq0,
+                      uint8x16_t* const oq1,
+                      uint8x16_t* const oq2) {
+    // This is a fused version of DoFilter2() calling ApplyFilter2 directly
+    const int8x16_t p2s = FlipSign(p2);
+    const int8x16_t p1s = FlipSign(p1);
+    int8x16_t p0s = FlipSign(p0);
+    int8x16_t q0s = FlipSign(q0);
+    const int8x16_t q1s = FlipSign(q1);
+    const int8x16_t q2s = FlipSign(q2);
+    const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
+    const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
+
+    // do_filter2 part (simple loopfilter on pixels with hev)
+    {
+        const int8x16_t simple_lf_delta = vandq_s8(delta0, vreinterpretq_s8_u8(simple_lf_mask));
+        ApplyFilter2NoFlip(p0s, q0s, simple_lf_delta, &p0s, &q0s);
+    }
+
+    // do_filter6 part (complex loopfilter on pixels without hev)
+    {
+        // we use: (mask & hev_mask) ^ mask = mask & !hev_mask
+        const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
+        const int8x16_t complex_lf_delta = vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
+        ApplyFilter6(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta, op2, op1, op0, oq0, oq1, oq2);
+    }
+}
+
+// on macroblock edges
+
+static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+    Load16x8(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+    {
+        const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+        const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+        uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+        DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0, &oq0, &oq1, &oq2);
+        Store16x2(op2, op1, p - 2 * stride, stride);
+        Store16x2(op0, oq0, p + 0 * stride, stride);
+        Store16x2(oq1, oq2, p + 2 * stride, stride);
+    }
+}
+
+static void HFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+    Load8x16(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+    {
+        const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+        const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+        uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+        DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0, &oq0, &oq1, &oq2);
+        Store2x16(op2, op1, p - 2, stride);
+        Store2x16(op0, oq0, p + 0, stride);
+        Store2x16(oq1, oq2, p + 2, stride);
+    }
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    uint32_t k;
+    uint8x16_t p3, p2, p1, p0;
+    Load16x4(p + 2 * stride, stride, &p3, &p2, &p1, &p0);
+    for (k = 3; k != 0; --k) {
+        uint8x16_t q0, q1, q2, q3;
+        p += 4 * stride;
+        Load16x4(p + 2 * stride, stride, &q0, &q1, &q2, &q3);
+        {
+            const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+            const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+            // p3 and p2 are not just temporary variables here: they will be
+            // re-used for next span. And q2/q3 will become p1/p0 accordingly.
+            DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
+            Store16x4(p1, p0, p3, p2, p, stride);
+            p1 = q2;
+            p0 = q3;
+        }
+    }
+}
+
+#if !defined(WORK_AROUND_GCC)
+static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    uint32_t k;
+    uint8x16_t p3, p2, p1, p0;
+    Load4x16(p + 2, stride, &p3, &p2, &p1, &p0);
+    for (k = 3; k != 0; --k) {
+        uint8x16_t q0, q1, q2, q3;
+        p += 4;
+        Load4x16(p + 2, stride, &q0, &q1, &q2, &q3);
+        {
+            const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+            const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+            DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
+            Store4x16(p1, p0, p3, p2, p, stride);
+            p1 = q2;
+            p0 = q3;
+        }
+    }
+}
+#endif // !WORK_AROUND_GCC
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+    Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+    {
+        const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+        const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+        uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+        DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0, &oq0, &oq1, &oq2);
+        Store8x2x2(op2, op1, u - 2 * stride, v - 2 * stride, stride);
+        Store8x2x2(op0, oq0, u + 0 * stride, v + 0 * stride, stride);
+        Store8x2x2(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
+    }
+}
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+    u += 4 * stride;
+    v += 4 * stride;
+    Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+    {
+        const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+        const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+        uint8x16_t op1, op0, oq0, oq1;
+        DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
+        Store8x4x2(op1, op0, oq0, oq1, u, v, stride);
+    }
+}
+
+#if !defined(WORK_AROUND_GCC)
+static void HFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+    Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+    {
+        const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+        const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+        uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+        DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0, &oq0, &oq1, &oq2);
+        Store6x8x2(op2, op1, op0, oq0, oq1, oq2, u, v, stride);
+    }
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+    u += 4;
+    v += 4;
+    Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+    {
+        const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+        const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+        uint8x16_t op1, op0, oq0, oq1;
+        DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
+        Store4x8x2(op1, op0, oq0, oq1, u, v, stride);
+    }
+}
+#endif // !WORK_AROUND_GCC
+
+//-----------------------------------------------------------------------------
+// Inverse transforms (Paragraph 14.4)
+
+// Technically these are unsigned but vqdmulh is only available in signed.
+// vqdmulh returns high half (effectively >> 16) but also doubles the value,
+// changing the >> 16 to >> 15 and requiring an additional >> 1.
+// We use this to our advantage with kC2. The canonical value is 35468.
+// However, the high bit is set so treating it as signed will give incorrect
+// results. We avoid this by down shifting by 1 here to clear the highest bit.
+// Combined with the doubling effect of vqdmulh we get >> 16.
+// This can not be applied to kC1 because the lowest bit is set. Down shifting
+// the constant would reduce precision.
+
+// libwebp uses a trick to avoid some extra addition that libvpx does.
+// Instead of:
+// temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
+// libwebp adds 1 << 16 to cospi8sqrt2minus1 (kC1). However, this causes the
+// same issue with kC1 and vqdmulh that we work around by down shifting kC2
+
+static const int16_t kC1 = 20091;
+static const int16_t kC2 = 17734; // half of kC2, actually. See comment above.
+
+#if defined(WEBP_USE_INTRINSICS)
+static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1, int16x8x2_t* const out) {
+    // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
+    // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
+    const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
+                                                  // b0 d0 b1 d1 b2 d2 ...
+    *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
+}
+
+static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
+    // {rows} = in0 | in4
+    //          in8 | in12
+    // B1 = in4 | in12
+    const int16x8_t B1 = vcombine_s16(vget_high_s16(rows->val[0]), vget_high_s16(rows->val[1]));
+    // C0 = kC1 * in4 | kC1 * in12
+    // C1 = kC2 * in4 | kC2 * in12
+    const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
+    const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
+    const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]), vget_low_s16(rows->val[1])); // in0 + in8
+    const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]), vget_low_s16(rows->val[1])); // in0 - in8
+    // c = kC2 * in4 - kC1 * in12
+    // d = kC1 * in4 + kC2 * in12
+    const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
+    const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
+    const int16x8_t D0 = vcombine_s16(a, b);    // D0 = a | b
+    const int16x8_t D1 = vcombine_s16(d, c);    // D1 = d | c
+    const int16x8_t E0 = vqaddq_s16(D0, D1);    // a+d | b+c
+    const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
+    const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
+    Transpose8x2(E0, E1, rows);
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+    int16x8x2_t rows;
+    INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
+    TransformPass(&rows);
+    TransformPass(&rows);
+    Add4x4(rows.val[0], rows.val[1], dst);
+}
+
+#else
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+    const int kBPS = BPS;
+    // kC1, kC2. Padded because vld1.16 loads 8 bytes
+    const int16_t constants[4] = {kC1, kC2, 0, 0};
+    /* Adapted from libvpx: vp8/common/arm/neon/shortidct4x4llm_neon.asm */
+    __asm__ volatile(
+        "vld1.16         {q1, q2}, [%[in]]           \n"
+        "vld1.16         {d0}, [%[constants]]        \n"
+
+        /* d2: in[0]
+         * d3: in[8]
+         * d4: in[4]
+         * d5: in[12]
+         */
+        "vswp            d3, d4                      \n"
+
+        /* q8 = {in[4], in[12]} * kC1 * 2 >> 16
+         * q9 = {in[4], in[12]} * kC2 >> 16
+         */
+        "vqdmulh.s16     q8, q2, d0[0]               \n"
+        "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+        /* d22 = a = in[0] + in[8]
+         * d23 = b = in[0] - in[8]
+         */
+        "vqadd.s16       d22, d2, d3                 \n"
+        "vqsub.s16       d23, d2, d3                 \n"
+
+        /* The multiplication should be x * kC1 >> 16
+         * However, with vqdmulh we get x * kC1 * 2 >> 16
+         * (multiply, double, return high half)
+         * We avoided this in kC2 by pre-shifting the constant.
+         * q8 = in[4]/[12] * kC1 >> 16
+         */
+        "vshr.s16        q8, q8, #1                  \n"
+
+        /* Add {in[4], in[12]} back after the multiplication. This is handled by
+         * adding 1 << 16 to kC1 in the libwebp C code.
+         */
+        "vqadd.s16       q8, q2, q8                  \n"
+
+        /* d20 = c = in[4]*kC2 - in[12]*kC1
+         * d21 = d = in[4]*kC1 + in[12]*kC2
+         */
+        "vqsub.s16       d20, d18, d17               \n"
+        "vqadd.s16       d21, d19, d16               \n"
+
+        /* d2 = tmp[0] = a + d
+         * d3 = tmp[1] = b + c
+         * d4 = tmp[2] = b - c
+         * d5 = tmp[3] = a - d
+         */
+        "vqadd.s16       d2, d22, d21                \n"
+        "vqadd.s16       d3, d23, d20                \n"
+        "vqsub.s16       d4, d23, d20                \n"
+        "vqsub.s16       d5, d22, d21                \n"
+
+        "vzip.16         q1, q2                      \n"
+        "vzip.16         q1, q2                      \n"
+
+        "vswp            d3, d4                      \n"
+
+        /* q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
+         * q9 = {tmp[4], tmp[12]} * kC2 >> 16
+         */
+        "vqdmulh.s16     q8, q2, d0[0]               \n"
+        "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+        /* d22 = a = tmp[0] + tmp[8]
+         * d23 = b = tmp[0] - tmp[8]
+         */
+        "vqadd.s16       d22, d2, d3                 \n"
+        "vqsub.s16       d23, d2, d3                 \n"
+
+        /* See long winded explanations prior */
+        "vshr.s16        q8, q8, #1                  \n"
+        "vqadd.s16       q8, q2, q8                  \n"
+
+        /* d20 = c = in[4]*kC2 - in[12]*kC1
+         * d21 = d = in[4]*kC1 + in[12]*kC2
+         */
+        "vqsub.s16       d20, d18, d17               \n"
+        "vqadd.s16       d21, d19, d16               \n"
+
+        /* d2 = tmp[0] = a + d
+         * d3 = tmp[1] = b + c
+         * d4 = tmp[2] = b - c
+         * d5 = tmp[3] = a - d
+         */
+        "vqadd.s16       d2, d22, d21                \n"
+        "vqadd.s16       d3, d23, d20                \n"
+        "vqsub.s16       d4, d23, d20                \n"
+        "vqsub.s16       d5, d22, d21                \n"
+
+        "vld1.32         d6[0], [%[dst]], %[kBPS]    \n"
+        "vld1.32         d6[1], [%[dst]], %[kBPS]    \n"
+        "vld1.32         d7[0], [%[dst]], %[kBPS]    \n"
+        "vld1.32         d7[1], [%[dst]], %[kBPS]    \n"
+
+        "sub         %[dst], %[dst], %[kBPS], lsl #2 \n"
+
+        /* (val) + 4 >> 3 */
+        "vrshr.s16       d2, d2, #3                  \n"
+        "vrshr.s16       d3, d3, #3                  \n"
+        "vrshr.s16       d4, d4, #3                  \n"
+        "vrshr.s16       d5, d5, #3                  \n"
+
+        "vzip.16         q1, q2                      \n"
+        "vzip.16         q1, q2                      \n"
+
+        /* Must accumulate before saturating */
+        "vmovl.u8        q8, d6                      \n"
+        "vmovl.u8        q9, d7                      \n"
+
+        "vqadd.s16       q1, q1, q8                  \n"
+        "vqadd.s16       q2, q2, q9                  \n"
+
+        "vqmovun.s16     d0, q1                      \n"
+        "vqmovun.s16     d1, q2                      \n"
+
+        "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
+        "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
+        "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
+        "vst1.32         d1[1], [%[dst]]             \n"
+
+        : [in] "+r"(in), [dst] "+r"(dst)                       /* modified registers */
+        : [kBPS] "r"(kBPS), [constants] "r"(constants)         /* constants */
+        : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" /* clobbered */
+        );
+}
+
+#endif // WEBP_USE_INTRINSICS
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+    TransformOne(in, dst);
+    if (do_two) {
+        TransformOne(in + 16, dst + 4);
+    }
+}
+
+static void TransformDC(const int16_t* in, uint8_t* dst) {
+    const int16x8_t DC = vdupq_n_s16(in[0]);
+    Add4x4(DC, DC, dst);
+}
+
+//------------------------------------------------------------------------------
+
+#define STORE_WHT(dst, col, rows)                \
+    do {                                         \
+        *dst = vgetq_lane_s32(rows.val[0], col); \
+        (dst) += 16;                             \
+        *dst = vgetq_lane_s32(rows.val[1], col); \
+        (dst) += 16;                             \
+        *dst = vgetq_lane_s32(rows.val[2], col); \
+        (dst) += 16;                             \
+        *dst = vgetq_lane_s32(rows.val[3], col); \
+        (dst) += 16;                             \
+    } while (0)
+
+static void TransformWHT(const int16_t* in, int16_t* out) {
+    int32x4x4_t tmp;
+
+    {
+        // Load the source.
+        const int16x4_t in00_03 = vld1_s16(in + 0);
+        const int16x4_t in04_07 = vld1_s16(in + 4);
+        const int16x4_t in08_11 = vld1_s16(in + 8);
+        const int16x4_t in12_15 = vld1_s16(in + 12);
+        const int32x4_t a0 = vaddl_s16(in00_03, in12_15); // in[0..3] + in[12..15]
+        const int32x4_t a1 = vaddl_s16(in04_07, in08_11); // in[4..7] + in[8..11]
+        const int32x4_t a2 = vsubl_s16(in04_07, in08_11); // in[4..7] - in[8..11]
+        const int32x4_t a3 = vsubl_s16(in00_03, in12_15); // in[0..3] - in[12..15]
+        tmp.val[0] = vaddq_s32(a0, a1);
+        tmp.val[1] = vaddq_s32(a3, a2);
+        tmp.val[2] = vsubq_s32(a0, a1);
+        tmp.val[3] = vsubq_s32(a3, a2);
+        // Arrange the temporary results column-wise.
+        tmp = Transpose4x4(tmp);
+    }
+
+    {
+        const int32x4_t kCst3 = vdupq_n_s32(3);
+        const int32x4_t dc = vaddq_s32(tmp.val[0], kCst3); // add rounder
+        const int32x4_t a0 = vaddq_s32(dc, tmp.val[3]);
+        const int32x4_t a1 = vaddq_s32(tmp.val[1], tmp.val[2]);
+        const int32x4_t a2 = vsubq_s32(tmp.val[1], tmp.val[2]);
+        const int32x4_t a3 = vsubq_s32(dc, tmp.val[3]);
+
+        tmp.val[0] = vaddq_s32(a0, a1);
+        tmp.val[1] = vaddq_s32(a3, a2);
+        tmp.val[2] = vsubq_s32(a0, a1);
+        tmp.val[3] = vsubq_s32(a3, a2);
+
+        // right shift the results by 3.
+        tmp.val[0] = vshrq_n_s32(tmp.val[0], 3);
+        tmp.val[1] = vshrq_n_s32(tmp.val[1], 3);
+        tmp.val[2] = vshrq_n_s32(tmp.val[2], 3);
+        tmp.val[3] = vshrq_n_s32(tmp.val[3], 3);
+
+        STORE_WHT(out, 0, tmp);
+        STORE_WHT(out, 1, tmp);
+        STORE_WHT(out, 2, tmp);
+        STORE_WHT(out, 3, tmp);
+    }
+}
+
+#undef STORE_WHT
+
+//------------------------------------------------------------------------------
+
+#define MUL(a, b) (((a) * (b)) >> 16)
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+    static const int kC1_full = 20091 + (1 << 16);
+    static const int kC2_full = 35468;
+    const int16x4_t A = vld1_dup_s16(in);
+    const int16x4_t c4 = vdup_n_s16(MUL(in[4], kC2_full));
+    const int16x4_t d4 = vdup_n_s16(MUL(in[4], kC1_full));
+    const int c1 = MUL(in[1], kC2_full);
+    const int d1 = MUL(in[1], kC1_full);
+    const uint64_t cd = (uint64_t)(d1 & 0xffff) << 0 | (uint64_t)(c1 & 0xffff) << 16 | (uint64_t)(-c1 & 0xffff) << 32 |
+                        (uint64_t)(-d1 & 0xffff) << 48;
+    const int16x4_t CD = vcreate_s16(cd);
+    const int16x4_t B = vqadd_s16(A, CD);
+    const int16x8_t m0_m1 = vcombine_s16(vqadd_s16(B, d4), vqadd_s16(B, c4));
+    const int16x8_t m2_m3 = vcombine_s16(vqsub_s16(B, c4), vqsub_s16(B, d4));
+    Add4x4(m0_m1, m2_m3, dst);
+}
+#undef MUL
+
+//------------------------------------------------------------------------------
+// 4x4
+
+static void DC4(uint8_t* dst) {             // DC
+    const uint8x8_t A = vld1_u8(dst - BPS); // top row
+    const uint16x4_t p0 = vpaddl_u8(A);     // cascading summation of the top
+    const uint16x4_t p1 = vpadd_u16(p0, p0);
+    const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1));
+    const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1));
+    const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1));
+    const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1));
+    const uint16x8_t s0 = vaddq_u16(L0, L1);
+    const uint16x8_t s1 = vaddq_u16(L2, L3);
+    const uint16x8_t s01 = vaddq_u16(s0, s1);
+    const uint16x8_t sum = vaddq_u16(s01, vcombine_u16(p1, p1));
+    const uint8x8_t dc0 = vrshrn_n_u16(sum, 3); // (sum + 4) >> 3
+    const uint8x8_t dc = vdup_lane_u8(dc0, 0);
+    int i;
+    for (i = 0; i < 4; ++i) {
+        vst1_lane_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(dc), 0);
+    }
+}
+
+// TrueMotion (4x4 + 8x8)
+static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
+    const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1);            // top-left pixel 'A[-1]'
+    const uint8x8_t T = vld1_u8(dst - BPS);                     // top row 'A[0..3]'
+    const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL)); // A[c] - A[-1]
+    int y;
+    for (y = 0; y < size; y += 4) {
+        // left edge
+        const int16x8_t L0 = ConvertU8ToS16(vld1_dup_u8(dst + 0 * BPS - 1));
+        const int16x8_t L1 = ConvertU8ToS16(vld1_dup_u8(dst + 1 * BPS - 1));
+        const int16x8_t L2 = ConvertU8ToS16(vld1_dup_u8(dst + 2 * BPS - 1));
+        const int16x8_t L3 = ConvertU8ToS16(vld1_dup_u8(dst + 3 * BPS - 1));
+        const int16x8_t r0 = vaddq_s16(L0, d); // L[r] + A[c] - A[-1]
+        const int16x8_t r1 = vaddq_s16(L1, d);
+        const int16x8_t r2 = vaddq_s16(L2, d);
+        const int16x8_t r3 = vaddq_s16(L3, d);
+        // Saturate and store the result.
+        const uint32x2_t r0_u32 = vreinterpret_u32_u8(vqmovun_s16(r0));
+        const uint32x2_t r1_u32 = vreinterpret_u32_u8(vqmovun_s16(r1));
+        const uint32x2_t r2_u32 = vreinterpret_u32_u8(vqmovun_s16(r2));
+        const uint32x2_t r3_u32 = vreinterpret_u32_u8(vqmovun_s16(r3));
+        if (size == 4) {
+            vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0_u32, 0);
+            vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1_u32, 0);
+            vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2_u32, 0);
+            vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3_u32, 0);
+        } else {
+            vst1_u32((uint32_t*)(dst + 0 * BPS), r0_u32);
+            vst1_u32((uint32_t*)(dst + 1 * BPS), r1_u32);
+            vst1_u32((uint32_t*)(dst + 2 * BPS), r2_u32);
+            vst1_u32((uint32_t*)(dst + 3 * BPS), r3_u32);
+        }
+        dst += 4 * BPS;
+    }
+}
+
+static void TM4(uint8_t* dst) {
+    TrueMotion(dst, 4);
+}
+
+static void VE4(uint8_t* dst) { // vertical
+    // NB: avoid vld1_u64 here as an alignment hint may be added -> SIGBUS.
+    const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(dst - BPS - 1)); // top row
+    const uint64x1_t A1 = vshr_n_u64(A0, 8);
+    const uint64x1_t A2 = vshr_n_u64(A0, 16);
+    const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0);
+    const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1);
+    const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2);
+    const uint8x8_t b = vhadd_u8(ABCDEFGH, CDEFGH00);
+    const uint8x8_t avg = vrhadd_u8(b, BCDEFGH0);
+    int i;
+    for (i = 0; i < 4; ++i) {
+        vst1_lane_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(avg), 0);
+    }
+}
+
+static void RD4(uint8_t* dst) { // Down-right
+    const uint8x8_t XABCD_u8 = vld1_u8(dst - BPS - 1);
+    const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
+    const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
+    const uint32_t I = dst[-1 + 0 * BPS];
+    const uint32_t J = dst[-1 + 1 * BPS];
+    const uint32_t K = dst[-1 + 2 * BPS];
+    const uint32_t L = dst[-1 + 3 * BPS];
+    const uint64x1_t LKJI____ = vcreate_u64(L | (K << 8) | (J << 16) | (I << 24));
+    const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
+    const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
+    const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
+    const uint8_t D = vget_lane_u8(XABCD_u8, 4);
+    const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
+    const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
+    const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
+    const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
+    const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+    const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
+    const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+    const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+    const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+    vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0, 0);
+    vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1, 0);
+    vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2, 0);
+    vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
+}
+
+static void LD4(uint8_t* dst) { // Down-left
+    // Note using the same shift trick as VE4() is slower here.
+    const uint8x8_t ABCDEFGH = vld1_u8(dst - BPS + 0);
+    const uint8x8_t BCDEFGH0 = vld1_u8(dst - BPS + 1);
+    const uint8x8_t CDEFGH00 = vld1_u8(dst - BPS + 2);
+    const uint8x8_t CDEFGHH0 = vset_lane_u8(dst[-BPS + 7], CDEFGH00, 6);
+    const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGHH0);
+    const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0);
+    const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+    const uint32x2_t r0 = vreinterpret_u32_u8(avg2);
+    const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+    const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+    const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+    vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0, 0);
+    vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1, 0);
+    vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2, 0);
+    vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
+}
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void VE8uv(uint8_t* dst) { // vertical
+    const uint8x8_t top = vld1_u8(dst - BPS);
+    int j;
+    for (j = 0; j < 8; ++j) {
+        vst1_u8(dst + j * BPS, top);
+    }
+}
+
+static void HE8uv(uint8_t* dst) { // horizontal
+    int j;
+    for (j = 0; j < 8; ++j) {
+        const uint8x8_t left = vld1_dup_u8(dst - 1);
+        vst1_u8(dst, left);
+        dst += BPS;
+    }
+}
+
+static WEBP_INLINE void DC8(uint8_t* dst, int do_top, int do_left) {
+    uint16x8_t sum_top;
+    uint16x8_t sum_left;
+    uint8x8_t dc0;
+
+    if (do_top) {
+        const uint8x8_t A = vld1_u8(dst - BPS); // top row
+        const uint16x4_t p0 = vpaddl_u8(A);     // cascading summation of the top
+        const uint16x4_t p1 = vpadd_u16(p0, p0);
+        const uint16x4_t p2 = vpadd_u16(p1, p1);
+        sum_top = vcombine_u16(p2, p2);
+    }
+
+    if (do_left) {
+        const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1));
+        const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1));
+        const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1));
+        const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1));
+        const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + 4 * BPS - 1));
+        const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + 5 * BPS - 1));
+        const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + 6 * BPS - 1));
+        const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + 7 * BPS - 1));
+        const uint16x8_t s0 = vaddq_u16(L0, L1);
+        const uint16x8_t s1 = vaddq_u16(L2, L3);
+        const uint16x8_t s2 = vaddq_u16(L4, L5);
+        const uint16x8_t s3 = vaddq_u16(L6, L7);
+        const uint16x8_t s01 = vaddq_u16(s0, s1);
+        const uint16x8_t s23 = vaddq_u16(s2, s3);
+        sum_left = vaddq_u16(s01, s23);
+    }
+
+    if (do_top && do_left) {
+        const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+        dc0 = vrshrn_n_u16(sum, 4);
+    } else if (do_top) {
+        dc0 = vrshrn_n_u16(sum_top, 3);
+    } else if (do_left) {
+        dc0 = vrshrn_n_u16(sum_left, 3);
+    } else {
+        dc0 = vdup_n_u8(0x80);
+    }
+
+    {
+        const uint8x8_t dc = vdup_lane_u8(dc0, 0);
+        int i;
+        for (i = 0; i < 8; ++i) {
+            vst1_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(dc));
+        }
+    }
+}
+
+static void DC8uv(uint8_t* dst) {
+    DC8(dst, 1, 1);
+}
+static void DC8uvNoTop(uint8_t* dst) {
+    DC8(dst, 0, 1);
+}
+static void DC8uvNoLeft(uint8_t* dst) {
+    DC8(dst, 1, 0);
+}
+static void DC8uvNoTopLeft(uint8_t* dst) {
+    DC8(dst, 0, 0);
+}
+
+static void TM8uv(uint8_t* dst) {
+    TrueMotion(dst, 8);
+}
+
+//------------------------------------------------------------------------------
+// 16x16
+
+static void VE16(uint8_t* dst) { // vertical
+    const uint8x16_t top = vld1q_u8(dst - BPS);
+    int j;
+    for (j = 0; j < 16; ++j) {
+        vst1q_u8(dst + j * BPS, top);
+    }
+}
+
+static void HE16(uint8_t* dst) { // horizontal
+    int j;
+    for (j = 0; j < 16; ++j) {
+        const uint8x16_t left = vld1q_dup_u8(dst - 1);
+        vst1q_u8(dst, left);
+        dst += BPS;
+    }
+}
+
+static WEBP_INLINE void DC16(uint8_t* dst, int do_top, int do_left) {
+    uint16x8_t sum_top;
+    uint16x8_t sum_left;
+    uint8x8_t dc0;
+
+    if (do_top) {
+        const uint8x16_t A = vld1q_u8(dst - BPS); // top row
+        const uint16x8_t p0 = vpaddlq_u8(A);      // cascading summation of the top
+        const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
+        const uint16x4_t p2 = vpadd_u16(p1, p1);
+        const uint16x4_t p3 = vpadd_u16(p2, p2);
+        sum_top = vcombine_u16(p3, p3);
+    }
+
+    if (do_left) {
+        int i;
+        sum_left = vdupq_n_u16(0);
+        for (i = 0; i < 16; i += 8) {
+            const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + (i + 0) * BPS - 1));
+            const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + (i + 1) * BPS - 1));
+            const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + (i + 2) * BPS - 1));
+            const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + (i + 3) * BPS - 1));
+            const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + (i + 4) * BPS - 1));
+            const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + (i + 5) * BPS - 1));
+            const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + (i + 6) * BPS - 1));
+            const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + (i + 7) * BPS - 1));
+            const uint16x8_t s0 = vaddq_u16(L0, L1);
+            const uint16x8_t s1 = vaddq_u16(L2, L3);
+            const uint16x8_t s2 = vaddq_u16(L4, L5);
+            const uint16x8_t s3 = vaddq_u16(L6, L7);
+            const uint16x8_t s01 = vaddq_u16(s0, s1);
+            const uint16x8_t s23 = vaddq_u16(s2, s3);
+            const uint16x8_t sum = vaddq_u16(s01, s23);
+            sum_left = vaddq_u16(sum_left, sum);
+        }
+    }
+
+    if (do_top && do_left) {
+        const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+        dc0 = vrshrn_n_u16(sum, 5);
+    } else if (do_top) {
+        dc0 = vrshrn_n_u16(sum_top, 4);
+    } else if (do_left) {
+        dc0 = vrshrn_n_u16(sum_left, 4);
+    } else {
+        dc0 = vdup_n_u8(0x80);
+    }
+
+    {
+        const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
+        int i;
+        for (i = 0; i < 16; ++i) {
+            vst1q_u8(dst + i * BPS, dc);
+        }
+    }
+}
+
+static void DC16TopLeft(uint8_t* dst) {
+    DC16(dst, 1, 1);
+}
+static void DC16NoTop(uint8_t* dst) {
+    DC16(dst, 0, 1);
+}
+static void DC16NoLeft(uint8_t* dst) {
+    DC16(dst, 1, 0);
+}
+static void DC16NoTopLeft(uint8_t* dst) {
+    DC16(dst, 0, 0);
+}
+
+static void TM16(uint8_t* dst) {
+    const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1); // top-left pixel 'A[-1]'
+    const uint8x16_t T = vld1q_u8(dst - BPS);        // top row 'A[0..15]'
+    // A[c] - A[-1]
+    const int16x8_t d_lo = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(T), TL));
+    const int16x8_t d_hi = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(T), TL));
+    int y;
+    for (y = 0; y < 16; y += 4) {
+        // left edge
+        const int16x8_t L0 = ConvertU8ToS16(vld1_dup_u8(dst + 0 * BPS - 1));
+        const int16x8_t L1 = ConvertU8ToS16(vld1_dup_u8(dst + 1 * BPS - 1));
+        const int16x8_t L2 = ConvertU8ToS16(vld1_dup_u8(dst + 2 * BPS - 1));
+        const int16x8_t L3 = ConvertU8ToS16(vld1_dup_u8(dst + 3 * BPS - 1));
+        const int16x8_t r0_lo = vaddq_s16(L0, d_lo); // L[r] + A[c] - A[-1]
+        const int16x8_t r1_lo = vaddq_s16(L1, d_lo);
+        const int16x8_t r2_lo = vaddq_s16(L2, d_lo);
+        const int16x8_t r3_lo = vaddq_s16(L3, d_lo);
+        const int16x8_t r0_hi = vaddq_s16(L0, d_hi);
+        const int16x8_t r1_hi = vaddq_s16(L1, d_hi);
+        const int16x8_t r2_hi = vaddq_s16(L2, d_hi);
+        const int16x8_t r3_hi = vaddq_s16(L3, d_hi);
+        // Saturate and store the result.
+        const uint8x16_t row0 = vcombine_u8(vqmovun_s16(r0_lo), vqmovun_s16(r0_hi));
+        const uint8x16_t row1 = vcombine_u8(vqmovun_s16(r1_lo), vqmovun_s16(r1_hi));
+        const uint8x16_t row2 = vcombine_u8(vqmovun_s16(r2_lo), vqmovun_s16(r2_hi));
+        const uint8x16_t row3 = vcombine_u8(vqmovun_s16(r3_lo), vqmovun_s16(r3_hi));
+        vst1q_u8(dst + 0 * BPS, row0);
+        vst1q_u8(dst + 1 * BPS, row1);
+        vst1q_u8(dst + 2 * BPS, row2);
+        vst1q_u8(dst + 3 * BPS, row3);
+        dst += 4 * BPS;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitNEON(void) {
+    VP8Transform = TransformTwo;
+    VP8TransformAC3 = TransformAC3;
+    VP8TransformDC = TransformDC;
+    VP8TransformWHT = TransformWHT;
+
+    VP8VFilter16 = VFilter16;
+    VP8VFilter16i = VFilter16i;
+    VP8HFilter16 = HFilter16;
+#if !defined(WORK_AROUND_GCC)
+    VP8HFilter16i = HFilter16i;
+#endif
+    VP8VFilter8 = VFilter8;
+    VP8VFilter8i = VFilter8i;
+#if !defined(WORK_AROUND_GCC)
+    VP8HFilter8 = HFilter8;
+    VP8HFilter8i = HFilter8i;
+#endif
+    VP8SimpleVFilter16 = SimpleVFilter16;
+    VP8SimpleHFilter16 = SimpleHFilter16;
+    VP8SimpleVFilter16i = SimpleVFilter16i;
+    VP8SimpleHFilter16i = SimpleHFilter16i;
+
+    VP8PredLuma4[0] = DC4;
+    VP8PredLuma4[1] = TM4;
+    VP8PredLuma4[2] = VE4;
+    VP8PredLuma4[4] = RD4;
+    VP8PredLuma4[6] = LD4;
+
+    VP8PredLuma16[0] = DC16TopLeft;
+    VP8PredLuma16[1] = TM16;
+    VP8PredLuma16[2] = VE16;
+    VP8PredLuma16[3] = HE16;
+    VP8PredLuma16[4] = DC16NoTop;
+    VP8PredLuma16[5] = DC16NoLeft;
+    VP8PredLuma16[6] = DC16NoTopLeft;
+
+    VP8PredChroma8[0] = DC8uv;
+    VP8PredChroma8[1] = TM8uv;
+    VP8PredChroma8[2] = VE8uv;
+    VP8PredChroma8[3] = HE8uv;
+    VP8PredChroma8[4] = DC8uvNoTop;
+    VP8PredChroma8[5] = DC8uvNoLeft;
+    VP8PredChroma8[6] = DC8uvNoTopLeft;
+}
+
+#else // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8DspInitNEON)
+
+#endif // WEBP_USE_NEON
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/dec_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/dec_sse2.c
new file mode 100644
index 0000000000..885a9e9ea1
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/dec_sse2.c
@@ -0,0 +1,1294 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of some decoding functions (idct, loop filtering).
+//
+// Author: somnath@google.com (Somnath Banerjee)
+//         cduvivier@google.com (Christian Duvivier)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+// The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
+// one it seems => disable it by default. Uncomment the following to enable:
+// #define USE_TRANSFORM_AC3
+
+#include <emmintrin.h>
+#include "../dec/vp8i.h"
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
+    // This implementation makes use of 16-bit fixed point versions of two
+    // multiply constants:
+    //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
+    //    K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
+    //
+    // To be able to use signed 16-bit integers, we use the following trick to
+    // have constants within range:
+    // - Associated constants are obtained by subtracting the 16-bit fixed point
+    //   version of one:
+    //      k = K - (1 << 16)  =>  K = k + (1 << 16)
+    //      K1 = 85267  =>  k1 =  20091
+    //      K2 = 35468  =>  k2 = -30068
+    // - The multiplication of a variable by a constant become the sum of the
+    //   variable and the multiplication of that variable by the associated
+    //   constant:
+    //      (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
+    const __m128i k1 = _mm_set1_epi16(20091);
+    const __m128i k2 = _mm_set1_epi16(-30068);
+    __m128i T0, T1, T2, T3;
+
+    // Load and concatenate the transform coefficients (we'll do two transforms
+    // in parallel). In the case of only one transform, the second half of the
+    // vectors will just contain random value we'll never use nor store.
+    __m128i in0, in1, in2, in3;
+    {
+        in0 = _mm_loadl_epi64((const __m128i*)&in[0]);
+        in1 = _mm_loadl_epi64((const __m128i*)&in[4]);
+        in2 = _mm_loadl_epi64((const __m128i*)&in[8]);
+        in3 = _mm_loadl_epi64((const __m128i*)&in[12]);
+        // a00 a10 a20 a30   x x x x
+        // a01 a11 a21 a31   x x x x
+        // a02 a12 a22 a32   x x x x
+        // a03 a13 a23 a33   x x x x
+        if (do_two) {
+            const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]);
+            const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]);
+            const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]);
+            const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]);
+            in0 = _mm_unpacklo_epi64(in0, inB0);
+            in1 = _mm_unpacklo_epi64(in1, inB1);
+            in2 = _mm_unpacklo_epi64(in2, inB2);
+            in3 = _mm_unpacklo_epi64(in3, inB3);
+            // a00 a10 a20 a30   b00 b10 b20 b30
+            // a01 a11 a21 a31   b01 b11 b21 b31
+            // a02 a12 a22 a32   b02 b12 b22 b32
+            // a03 a13 a23 a33   b03 b13 b23 b33
+        }
+    }
+
+    // Vertical pass and subsequent transpose.
+    {
+        // First pass, c and d calculations are longer because of the "trick"
+        // multiplications.
+        const __m128i a = _mm_add_epi16(in0, in2);
+        const __m128i b = _mm_sub_epi16(in0, in2);
+        // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
+        const __m128i c1 = _mm_mulhi_epi16(in1, k2);
+        const __m128i c2 = _mm_mulhi_epi16(in3, k1);
+        const __m128i c3 = _mm_sub_epi16(in1, in3);
+        const __m128i c4 = _mm_sub_epi16(c1, c2);
+        const __m128i c = _mm_add_epi16(c3, c4);
+        // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
+        const __m128i d1 = _mm_mulhi_epi16(in1, k1);
+        const __m128i d2 = _mm_mulhi_epi16(in3, k2);
+        const __m128i d3 = _mm_add_epi16(in1, in3);
+        const __m128i d4 = _mm_add_epi16(d1, d2);
+        const __m128i d = _mm_add_epi16(d3, d4);
+
+        // Second pass.
+        const __m128i tmp0 = _mm_add_epi16(a, d);
+        const __m128i tmp1 = _mm_add_epi16(b, c);
+        const __m128i tmp2 = _mm_sub_epi16(b, c);
+        const __m128i tmp3 = _mm_sub_epi16(a, d);
+
+        // Transpose the two 4x4.
+        // a00 a01 a02 a03   b00 b01 b02 b03
+        // a10 a11 a12 a13   b10 b11 b12 b13
+        // a20 a21 a22 a23   b20 b21 b22 b23
+        // a30 a31 a32 a33   b30 b31 b32 b33
+        const __m128i transpose0_0 = _mm_unpacklo_epi16(tmp0, tmp1);
+        const __m128i transpose0_1 = _mm_unpacklo_epi16(tmp2, tmp3);
+        const __m128i transpose0_2 = _mm_unpackhi_epi16(tmp0, tmp1);
+        const __m128i transpose0_3 = _mm_unpackhi_epi16(tmp2, tmp3);
+        // a00 a10 a01 a11   a02 a12 a03 a13
+        // a20 a30 a21 a31   a22 a32 a23 a33
+        // b00 b10 b01 b11   b02 b12 b03 b13
+        // b20 b30 b21 b31   b22 b32 b23 b33
+        const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+        const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+        // a00 a10 a20 a30 a01 a11 a21 a31
+        // b00 b10 b20 b30 b01 b11 b21 b31
+        // a02 a12 a22 a32 a03 a13 a23 a33
+        // b02 b12 a22 b32 b03 b13 b23 b33
+        T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+        T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+        T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+        T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+        // a00 a10 a20 a30   b00 b10 b20 b30
+        // a01 a11 a21 a31   b01 b11 b21 b31
+        // a02 a12 a22 a32   b02 b12 b22 b32
+        // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+
+    // Horizontal pass and subsequent transpose.
+    {
+        // First pass, c and d calculations are longer because of the "trick"
+        // multiplications.
+        const __m128i four = _mm_set1_epi16(4);
+        const __m128i dc = _mm_add_epi16(T0, four);
+        const __m128i a = _mm_add_epi16(dc, T2);
+        const __m128i b = _mm_sub_epi16(dc, T2);
+        // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
+        const __m128i c1 = _mm_mulhi_epi16(T1, k2);
+        const __m128i c2 = _mm_mulhi_epi16(T3, k1);
+        const __m128i c3 = _mm_sub_epi16(T1, T3);
+        const __m128i c4 = _mm_sub_epi16(c1, c2);
+        const __m128i c = _mm_add_epi16(c3, c4);
+        // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
+        const __m128i d1 = _mm_mulhi_epi16(T1, k1);
+        const __m128i d2 = _mm_mulhi_epi16(T3, k2);
+        const __m128i d3 = _mm_add_epi16(T1, T3);
+        const __m128i d4 = _mm_add_epi16(d1, d2);
+        const __m128i d = _mm_add_epi16(d3, d4);
+
+        // Second pass.
+        const __m128i tmp0 = _mm_add_epi16(a, d);
+        const __m128i tmp1 = _mm_add_epi16(b, c);
+        const __m128i tmp2 = _mm_sub_epi16(b, c);
+        const __m128i tmp3 = _mm_sub_epi16(a, d);
+        const __m128i shifted0 = _mm_srai_epi16(tmp0, 3);
+        const __m128i shifted1 = _mm_srai_epi16(tmp1, 3);
+        const __m128i shifted2 = _mm_srai_epi16(tmp2, 3);
+        const __m128i shifted3 = _mm_srai_epi16(tmp3, 3);
+
+        // Transpose the two 4x4.
+        // a00 a01 a02 a03   b00 b01 b02 b03
+        // a10 a11 a12 a13   b10 b11 b12 b13
+        // a20 a21 a22 a23   b20 b21 b22 b23
+        // a30 a31 a32 a33   b30 b31 b32 b33
+        const __m128i transpose0_0 = _mm_unpacklo_epi16(shifted0, shifted1);
+        const __m128i transpose0_1 = _mm_unpacklo_epi16(shifted2, shifted3);
+        const __m128i transpose0_2 = _mm_unpackhi_epi16(shifted0, shifted1);
+        const __m128i transpose0_3 = _mm_unpackhi_epi16(shifted2, shifted3);
+        // a00 a10 a01 a11   a02 a12 a03 a13
+        // a20 a30 a21 a31   a22 a32 a23 a33
+        // b00 b10 b01 b11   b02 b12 b03 b13
+        // b20 b30 b21 b31   b22 b32 b23 b33
+        const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+        const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+        // a00 a10 a20 a30 a01 a11 a21 a31
+        // b00 b10 b20 b30 b01 b11 b21 b31
+        // a02 a12 a22 a32 a03 a13 a23 a33
+        // b02 b12 a22 b32 b03 b13 b23 b33
+        T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+        T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+        T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+        T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+        // a00 a10 a20 a30   b00 b10 b20 b30
+        // a01 a11 a21 a31   b01 b11 b21 b31
+        // a02 a12 a22 a32   b02 b12 b22 b32
+        // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+
+    // Add inverse transform to 'dst' and store.
+    {
+        const __m128i zero = _mm_setzero_si128();
+        // Load the reference(s).
+        __m128i dst0, dst1, dst2, dst3;
+        if (do_two) {
+            // Load eight bytes/pixels per line.
+            dst0 = _mm_loadl_epi64((__m128i*)(dst + 0 * BPS));
+            dst1 = _mm_loadl_epi64((__m128i*)(dst + 1 * BPS));
+            dst2 = _mm_loadl_epi64((__m128i*)(dst + 2 * BPS));
+            dst3 = _mm_loadl_epi64((__m128i*)(dst + 3 * BPS));
+        } else {
+            // Load four bytes/pixels per line.
+            dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS));
+            dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS));
+            dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS));
+            dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS));
+        }
+        // Convert to 16b.
+        dst0 = _mm_unpacklo_epi8(dst0, zero);
+        dst1 = _mm_unpacklo_epi8(dst1, zero);
+        dst2 = _mm_unpacklo_epi8(dst2, zero);
+        dst3 = _mm_unpacklo_epi8(dst3, zero);
+        // Add the inverse transform(s).
+        dst0 = _mm_add_epi16(dst0, T0);
+        dst1 = _mm_add_epi16(dst1, T1);
+        dst2 = _mm_add_epi16(dst2, T2);
+        dst3 = _mm_add_epi16(dst3, T3);
+        // Unsigned saturate to 8b.
+        dst0 = _mm_packus_epi16(dst0, dst0);
+        dst1 = _mm_packus_epi16(dst1, dst1);
+        dst2 = _mm_packus_epi16(dst2, dst2);
+        dst3 = _mm_packus_epi16(dst3, dst3);
+        // Store the results.
+        if (do_two) {
+            // Store eight bytes/pixels per line.
+            _mm_storel_epi64((__m128i*)(dst + 0 * BPS), dst0);
+            _mm_storel_epi64((__m128i*)(dst + 1 * BPS), dst1);
+            _mm_storel_epi64((__m128i*)(dst + 2 * BPS), dst2);
+            _mm_storel_epi64((__m128i*)(dst + 3 * BPS), dst3);
+        } else {
+            // Store four bytes/pixels per line.
+            WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
+            WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
+            WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
+            WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
+        }
+    }
+}
+
+#if defined(USE_TRANSFORM_AC3)
+#define MUL(a, b) (((a) * (b)) >> 16)
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+    static const int kC1 = 20091 + (1 << 16);
+    static const int kC2 = 35468;
+    const __m128i A = _mm_set1_epi16(in[0] + 4);
+    const __m128i c4 = _mm_set1_epi16(MUL(in[4], kC2));
+    const __m128i d4 = _mm_set1_epi16(MUL(in[4], kC1));
+    const int c1 = MUL(in[1], kC2);
+    const int d1 = MUL(in[1], kC1);
+    const __m128i CD = _mm_set_epi16(0, 0, 0, 0, -d1, -c1, c1, d1);
+    const __m128i B = _mm_adds_epi16(A, CD);
+    const __m128i m0 = _mm_adds_epi16(B, d4);
+    const __m128i m1 = _mm_adds_epi16(B, c4);
+    const __m128i m2 = _mm_subs_epi16(B, c4);
+    const __m128i m3 = _mm_subs_epi16(B, d4);
+    const __m128i zero = _mm_setzero_si128();
+    // Load the source pixels.
+    __m128i dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS));
+    __m128i dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS));
+    __m128i dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS));
+    __m128i dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS));
+    // Convert to 16b.
+    dst0 = _mm_unpacklo_epi8(dst0, zero);
+    dst1 = _mm_unpacklo_epi8(dst1, zero);
+    dst2 = _mm_unpacklo_epi8(dst2, zero);
+    dst3 = _mm_unpacklo_epi8(dst3, zero);
+    // Add the inverse transform.
+    dst0 = _mm_adds_epi16(dst0, _mm_srai_epi16(m0, 3));
+    dst1 = _mm_adds_epi16(dst1, _mm_srai_epi16(m1, 3));
+    dst2 = _mm_adds_epi16(dst2, _mm_srai_epi16(m2, 3));
+    dst3 = _mm_adds_epi16(dst3, _mm_srai_epi16(m3, 3));
+    // Unsigned saturate to 8b.
+    dst0 = _mm_packus_epi16(dst0, dst0);
+    dst1 = _mm_packus_epi16(dst1, dst1);
+    dst2 = _mm_packus_epi16(dst2, dst2);
+    dst3 = _mm_packus_epi16(dst3, dst3);
+    // Store the results.
+    WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
+    WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
+    WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
+    WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
+}
+#undef MUL
+#endif // USE_TRANSFORM_AC3
+
+//------------------------------------------------------------------------------
+// Loop Filter (Paragraph 15)
+
+// Compute abs(p - q) = subs(p - q) OR subs(q - p)
+#define MM_ABS(p, q) _mm_or_si128(_mm_subs_epu8((q), (p)), _mm_subs_epu8((p), (q)))
+
+// Shift each byte of "x" by 3 bits while preserving by the sign bit.
+static WEBP_INLINE void SignedShift8b(__m128i* const x) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i lo_0 = _mm_unpacklo_epi8(zero, *x);
+    const __m128i hi_0 = _mm_unpackhi_epi8(zero, *x);
+    const __m128i lo_1 = _mm_srai_epi16(lo_0, 3 + 8);
+    const __m128i hi_1 = _mm_srai_epi16(hi_0, 3 + 8);
+    *x = _mm_packs_epi16(lo_1, hi_1);
+}
+
+#define FLIP_SIGN_BIT2(a, b)            \
+    {                                   \
+        a = _mm_xor_si128(a, sign_bit); \
+        b = _mm_xor_si128(b, sign_bit); \
+    }
+
+#define FLIP_SIGN_BIT4(a, b, c, d) \
+    {                              \
+        FLIP_SIGN_BIT2(a, b);      \
+        FLIP_SIGN_BIT2(c, d);      \
+    }
+
+// input/output is uint8_t
+static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
+                                  const __m128i* const p0,
+                                  const __m128i* const q0,
+                                  const __m128i* const q1,
+                                  int hev_thresh,
+                                  __m128i* const not_hev) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i t_1 = MM_ABS(*p1, *p0);
+    const __m128i t_2 = MM_ABS(*q1, *q0);
+
+    const __m128i h = _mm_set1_epi8(hev_thresh);
+    const __m128i t_max = _mm_max_epu8(t_1, t_2);
+
+    const __m128i t_max_h = _mm_subs_epu8(t_max, h);
+    *not_hev = _mm_cmpeq_epi8(t_max_h, zero); // not_hev <= t1 && not_hev <= t2
+}
+
+// input pixels are int8_t
+static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
+                                     const __m128i* const p0,
+                                     const __m128i* const q0,
+                                     const __m128i* const q1,
+                                     __m128i* const delta) {
+    // beware of addition order, for saturation!
+    const __m128i p1_q1 = _mm_subs_epi8(*p1, *q1);  // p1 - q1
+    const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0);  // q0 - p0
+    const __m128i s1 = _mm_adds_epi8(p1_q1, q0_p0); // p1 - q1 + 1 * (q0 - p0)
+    const __m128i s2 = _mm_adds_epi8(q0_p0, s1);    // p1 - q1 + 2 * (q0 - p0)
+    const __m128i s3 = _mm_adds_epi8(q0_p0, s2);    // p1 - q1 + 3 * (q0 - p0)
+    *delta = s3;
+}
+
+// input and output are int8_t
+static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0, const __m128i* const fl) {
+    const __m128i k3 = _mm_set1_epi8(3);
+    const __m128i k4 = _mm_set1_epi8(4);
+    __m128i v3 = _mm_adds_epi8(*fl, k3);
+    __m128i v4 = _mm_adds_epi8(*fl, k4);
+
+    SignedShift8b(&v4);           // v4 >> 3
+    SignedShift8b(&v3);           // v3 >> 3
+    *q0 = _mm_subs_epi8(*q0, v4); // q0 -= v4
+    *p0 = _mm_adds_epi8(*p0, v3); // p0 += v3
+}
+
+// Updates values of 2 pixels at MB edge during complex filtering.
+// Update operations:
+// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
+// Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
+static WEBP_INLINE void Update2Pixels(__m128i* const pi,
+                                      __m128i* const qi,
+                                      const __m128i* const a0_lo,
+                                      const __m128i* const a0_hi) {
+    const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
+    const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
+    const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
+    const __m128i sign_bit = _mm_set1_epi8(0x80);
+    *pi = _mm_adds_epi8(*pi, delta);
+    *qi = _mm_subs_epi8(*qi, delta);
+    FLIP_SIGN_BIT2(*pi, *qi);
+}
+
+// input pixels are uint8_t
+static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
+                                    const __m128i* const p0,
+                                    const __m128i* const q0,
+                                    const __m128i* const q1,
+                                    int thresh,
+                                    __m128i* const mask) {
+    const __m128i m_thresh = _mm_set1_epi8(thresh);
+    const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1)
+    const __m128i kFE = _mm_set1_epi8(0xFE);
+    const __m128i t2 = _mm_and_si128(t1, kFE); // set lsb of each byte to zero
+    const __m128i t3 = _mm_srli_epi16(t2, 1);  // abs(p1 - q1) / 2
+
+    const __m128i t4 = MM_ABS(*p0, *q0);      // abs(p0 - q0)
+    const __m128i t5 = _mm_adds_epu8(t4, t4); // abs(p0 - q0) * 2
+    const __m128i t6 = _mm_adds_epu8(t5, t3); // abs(p0-q0)*2 + abs(p1-q1)/2
+
+    const __m128i t7 = _mm_subs_epu8(t6, m_thresh); // mask <= m_thresh
+    *mask = _mm_cmpeq_epi8(t7, _mm_setzero_si128());
+}
+
+//------------------------------------------------------------------------------
+// Edge filtering functions
+
+// Applies filter on 2 pixels (p0 and q0)
+static WEBP_INLINE void DoFilter2(
+    __m128i* const p1, __m128i* const p0, __m128i* const q0, __m128i* const q1, int thresh) {
+    __m128i a, mask;
+    const __m128i sign_bit = _mm_set1_epi8(0x80);
+    // convert p1/q1 to int8_t (for GetBaseDelta)
+    const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
+    const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
+
+    NeedsFilter(p1, p0, q0, q1, thresh, &mask);
+
+    FLIP_SIGN_BIT2(*p0, *q0);
+    GetBaseDelta(&p1s, p0, q0, &q1s, &a);
+    a = _mm_and_si128(a, mask); // mask filter values we don't care about
+    DoSimpleFilter(p0, q0, &a);
+    FLIP_SIGN_BIT2(*p0, *q0);
+}
+
+// Applies filter on 4 pixels (p1, p0, q0 and q1)
+static WEBP_INLINE void DoFilter4(__m128i* const p1,
+                                  __m128i* const p0,
+                                  __m128i* const q0,
+                                  __m128i* const q1,
+                                  const __m128i* const mask,
+                                  int hev_thresh) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i sign_bit = _mm_set1_epi8(0x80);
+    const __m128i k64 = _mm_set1_epi8(64);
+    const __m128i k3 = _mm_set1_epi8(3);
+    const __m128i k4 = _mm_set1_epi8(4);
+    __m128i not_hev;
+    __m128i t1, t2, t3;
+
+    // compute hev mask
+    GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
+
+    // convert to signed values
+    FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
+
+    t1 = _mm_subs_epi8(*p1, *q1);       // p1 - q1
+    t1 = _mm_andnot_si128(not_hev, t1); // hev(p1 - q1)
+    t2 = _mm_subs_epi8(*q0, *p0);       // q0 - p0
+    t1 = _mm_adds_epi8(t1, t2);         // hev(p1 - q1) + 1 * (q0 - p0)
+    t1 = _mm_adds_epi8(t1, t2);         // hev(p1 - q1) + 2 * (q0 - p0)
+    t1 = _mm_adds_epi8(t1, t2);         // hev(p1 - q1) + 3 * (q0 - p0)
+    t1 = _mm_and_si128(t1, *mask);      // mask filter values we don't care about
+
+    t2 = _mm_adds_epi8(t1, k3);   // 3 * (q0 - p0) + hev(p1 - q1) + 3
+    t3 = _mm_adds_epi8(t1, k4);   // 3 * (q0 - p0) + hev(p1 - q1) + 4
+    SignedShift8b(&t2);           // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
+    SignedShift8b(&t3);           // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
+    *p0 = _mm_adds_epi8(*p0, t2); // p0 += t2
+    *q0 = _mm_subs_epi8(*q0, t3); // q0 -= t3
+    FLIP_SIGN_BIT2(*p0, *q0);
+
+    // this is equivalent to signed (a + 1) >> 1 calculation
+    t2 = _mm_add_epi8(t3, sign_bit);
+    t3 = _mm_avg_epu8(t2, zero);
+    t3 = _mm_sub_epi8(t3, k64);
+
+    t3 = _mm_and_si128(not_hev, t3); // if !hev
+    *q1 = _mm_subs_epi8(*q1, t3);    // q1 -= t3
+    *p1 = _mm_adds_epi8(*p1, t3);    // p1 += t3
+    FLIP_SIGN_BIT2(*p1, *q1);
+}
+
+// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
+static WEBP_INLINE void DoFilter6(__m128i* const p2,
+                                  __m128i* const p1,
+                                  __m128i* const p0,
+                                  __m128i* const q0,
+                                  __m128i* const q1,
+                                  __m128i* const q2,
+                                  const __m128i* const mask,
+                                  int hev_thresh) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i sign_bit = _mm_set1_epi8(0x80);
+    __m128i a, not_hev;
+
+    // compute hev mask
+    GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
+
+    FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
+    FLIP_SIGN_BIT2(*p2, *q2);
+    GetBaseDelta(p1, p0, q0, q1, &a);
+
+    { // do simple filter on pixels with hev
+        const __m128i m = _mm_andnot_si128(not_hev, *mask);
+        const __m128i f = _mm_and_si128(a, m);
+        DoSimpleFilter(p0, q0, &f);
+    }
+
+    { // do strong filter on pixels with not hev
+        const __m128i k9 = _mm_set1_epi16(0x0900);
+        const __m128i k63 = _mm_set1_epi16(63);
+
+        const __m128i m = _mm_and_si128(not_hev, *mask);
+        const __m128i f = _mm_and_si128(a, m);
+
+        const __m128i f_lo = _mm_unpacklo_epi8(zero, f);
+        const __m128i f_hi = _mm_unpackhi_epi8(zero, f);
+
+        const __m128i f9_lo = _mm_mulhi_epi16(f_lo, k9); // Filter (lo) * 9
+        const __m128i f9_hi = _mm_mulhi_epi16(f_hi, k9); // Filter (hi) * 9
+
+        const __m128i a2_lo = _mm_add_epi16(f9_lo, k63); // Filter * 9 + 63
+        const __m128i a2_hi = _mm_add_epi16(f9_hi, k63); // Filter * 9 + 63
+
+        const __m128i a1_lo = _mm_add_epi16(a2_lo, f9_lo); // Filter * 18 + 63
+        const __m128i a1_hi = _mm_add_epi16(a2_hi, f9_hi); // Filter * 18 + 63
+
+        const __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo); // Filter * 27 + 63
+        const __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi); // Filter * 27 + 63
+
+        Update2Pixels(p2, q2, &a2_lo, &a2_hi);
+        Update2Pixels(p1, q1, &a1_lo, &a1_hi);
+        Update2Pixels(p0, q0, &a0_lo, &a0_hi);
+    }
+}
+
+// reads 8 rows across a vertical edge.
+static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride, __m128i* const p, __m128i* const q) {
+    // A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
+    // A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
+    const __m128i A0 = _mm_set_epi32(WebPMemToUint32(&b[6 * stride]), WebPMemToUint32(&b[2 * stride]),
+                                     WebPMemToUint32(&b[4 * stride]), WebPMemToUint32(&b[0 * stride]));
+    const __m128i A1 = _mm_set_epi32(WebPMemToUint32(&b[7 * stride]), WebPMemToUint32(&b[3 * stride]),
+                                     WebPMemToUint32(&b[5 * stride]), WebPMemToUint32(&b[1 * stride]));
+
+    // B0 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00
+    // B1 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20
+    const __m128i B0 = _mm_unpacklo_epi8(A0, A1);
+    const __m128i B1 = _mm_unpackhi_epi8(A0, A1);
+
+    // C0 = 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+    // C1 = 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+    const __m128i C0 = _mm_unpacklo_epi16(B0, B1);
+    const __m128i C1 = _mm_unpackhi_epi16(B0, B1);
+
+    // *p = 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+    // *q = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+    *p = _mm_unpacklo_epi32(C0, C1);
+    *q = _mm_unpackhi_epi32(C0, C1);
+}
+
+static WEBP_INLINE void Load16x4(const uint8_t* const r0,
+                                 const uint8_t* const r8,
+                                 int stride,
+                                 __m128i* const p1,
+                                 __m128i* const p0,
+                                 __m128i* const q0,
+                                 __m128i* const q1) {
+    // Assume the pixels around the edge (|) are numbered as follows
+    //                00 01 | 02 03
+    //                10 11 | 12 13
+    //                 ...  |  ...
+    //                e0 e1 | e2 e3
+    //                f0 f1 | f2 f3
+    //
+    // r0 is pointing to the 0th row (00)
+    // r8 is pointing to the 8th row (80)
+
+    // Load
+    // p1 = 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+    // q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+    // p0 = f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+    // q1 = f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+    Load8x4(r0, stride, p1, q0);
+    Load8x4(r8, stride, p0, q1);
+
+    {
+        // p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+        // p0 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+        // q0 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+        // q1 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+        const __m128i t1 = *p1;
+        const __m128i t2 = *q0;
+        *p1 = _mm_unpacklo_epi64(t1, *p0);
+        *p0 = _mm_unpackhi_epi64(t1, *p0);
+        *q0 = _mm_unpacklo_epi64(t2, *q1);
+        *q1 = _mm_unpackhi_epi64(t2, *q1);
+    }
+}
+
+static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
+    int i;
+    for (i = 0; i < 4; ++i, dst += stride) {
+        WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
+        *x = _mm_srli_si128(*x, 4);
+    }
+}
+
+// Transpose back and store
+static WEBP_INLINE void Store16x4(const __m128i* const p1,
+                                  const __m128i* const p0,
+                                  const __m128i* const q0,
+                                  const __m128i* const q1,
+                                  uint8_t* r0,
+                                  uint8_t* r8,
+                                  int stride) {
+    __m128i t1, p1_s, p0_s, q0_s, q1_s;
+
+    // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+    // p1 = f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+    t1 = *p0;
+    p0_s = _mm_unpacklo_epi8(*p1, t1);
+    p1_s = _mm_unpackhi_epi8(*p1, t1);
+
+    // q0 = 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+    // q1 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+    t1 = *q0;
+    q0_s = _mm_unpacklo_epi8(t1, *q1);
+    q1_s = _mm_unpackhi_epi8(t1, *q1);
+
+    // p0 = 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+    // q0 = 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+    t1 = p0_s;
+    p0_s = _mm_unpacklo_epi16(t1, q0_s);
+    q0_s = _mm_unpackhi_epi16(t1, q0_s);
+
+    // p1 = b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+    // q1 = f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+    t1 = p1_s;
+    p1_s = _mm_unpacklo_epi16(t1, q1_s);
+    q1_s = _mm_unpackhi_epi16(t1, q1_s);
+
+    Store4x4(&p0_s, r0, stride);
+    r0 += 4 * stride;
+    Store4x4(&q0_s, r0, stride);
+
+    Store4x4(&p1_s, r8, stride);
+    r8 += 4 * stride;
+    Store4x4(&q1_s, r8, stride);
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+    // Load
+    __m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
+    __m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
+    __m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
+    __m128i q1 = _mm_loadu_si128((__m128i*)&p[stride]);
+
+    DoFilter2(&p1, &p0, &q0, &q1, thresh);
+
+    // Store
+    _mm_storeu_si128((__m128i*)&p[-stride], p0);
+    _mm_storeu_si128((__m128i*)&p[0], q0);
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+    __m128i p1, p0, q0, q1;
+
+    p -= 2; // beginning of p1
+
+    Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
+    DoFilter2(&p1, &p0, &q0, &q1, thresh);
+    Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4 * stride;
+        SimpleVFilter16(p, stride, thresh);
+    }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+    int k;
+    for (k = 3; k > 0; --k) {
+        p += 4;
+        SimpleHFilter16(p, stride, thresh);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+#define MAX_DIFF1(p3, p2, p1, p0, m)         \
+    do {                                     \
+        m = MM_ABS(p1, p0);                  \
+        m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
+        m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
+    } while (0)
+
+#define MAX_DIFF2(p3, p2, p1, p0, m)         \
+    do {                                     \
+        m = _mm_max_epu8(m, MM_ABS(p1, p0)); \
+        m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
+        m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
+    } while (0)
+
+#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4)          \
+    {                                                     \
+        e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]); \
+        e2 = _mm_loadu_si128((__m128i*)&(p)[1 * stride]); \
+        e3 = _mm_loadu_si128((__m128i*)&(p)[2 * stride]); \
+        e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]); \
+    }
+
+#define LOADUV_H_EDGE(p, u, v, stride)                               \
+    do {                                                             \
+        const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]); \
+        const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]); \
+        p = _mm_unpacklo_epi64(U, V);                                \
+    } while (0)
+
+#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) \
+    {                                                 \
+        LOADUV_H_EDGE(e1, u, v, 0 * stride);          \
+        LOADUV_H_EDGE(e2, u, v, 1 * stride);          \
+        LOADUV_H_EDGE(e3, u, v, 2 * stride);          \
+        LOADUV_H_EDGE(e4, u, v, 3 * stride);          \
+    }
+
+#define STOREUV(p, u, v, stride)                     \
+    {                                                \
+        _mm_storel_epi64((__m128i*)&u[(stride)], p); \
+        p = _mm_srli_si128(p, 8);                    \
+        _mm_storel_epi64((__m128i*)&v[(stride)], p); \
+    }
+
+static WEBP_INLINE void ComplexMask(const __m128i* const p1,
+                                    const __m128i* const p0,
+                                    const __m128i* const q0,
+                                    const __m128i* const q1,
+                                    int thresh,
+                                    int ithresh,
+                                    __m128i* const mask) {
+    const __m128i it = _mm_set1_epi8(ithresh);
+    const __m128i diff = _mm_subs_epu8(*mask, it);
+    const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
+    __m128i filter_mask;
+    NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
+    *mask = _mm_and_si128(thresh_mask, filter_mask);
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    __m128i t1;
+    __m128i mask;
+    __m128i p2, p1, p0, q0, q1, q2;
+
+    // Load p3, p2, p1, p0
+    LOAD_H_EDGES4(p - 4 * stride, stride, t1, p2, p1, p0);
+    MAX_DIFF1(t1, p2, p1, p0, mask);
+
+    // Load q0, q1, q2, q3
+    LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
+    MAX_DIFF2(t1, q2, q1, q0, mask);
+
+    ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+    DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+    // Store
+    _mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
+    _mm_storeu_si128((__m128i*)&p[-2 * stride], p1);
+    _mm_storeu_si128((__m128i*)&p[-1 * stride], p0);
+    _mm_storeu_si128((__m128i*)&p[+0 * stride], q0);
+    _mm_storeu_si128((__m128i*)&p[+1 * stride], q1);
+    _mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
+}
+
+static void HFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    __m128i mask;
+    __m128i p3, p2, p1, p0, q0, q1, q2, q3;
+
+    uint8_t* const b = p - 4;
+    Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
+    MAX_DIFF1(p3, p2, p1, p0, mask);
+
+    Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
+    MAX_DIFF2(q3, q2, q1, q0, mask);
+
+    ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+    DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+    Store16x4(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
+    Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    int k;
+    __m128i p3, p2, p1, p0; // loop invariants
+
+    LOAD_H_EDGES4(p, stride, p3, p2, p1, p0); // prologue
+
+    for (k = 3; k > 0; --k) {
+        __m128i mask, tmp1, tmp2;
+        uint8_t* const b = p + 2 * stride; // beginning of p1
+        p += 4 * stride;
+
+        MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
+        LOAD_H_EDGES4(p, stride, p3, p2, tmp1, tmp2);
+        MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
+
+        // p3 and p2 are not just temporary variables here: they will be
+        // re-used for next span. And q2/q3 will become p1/p0 accordingly.
+        ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+        DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
+
+        // Store
+        _mm_storeu_si128((__m128i*)&b[0 * stride], p1);
+        _mm_storeu_si128((__m128i*)&b[1 * stride], p0);
+        _mm_storeu_si128((__m128i*)&b[2 * stride], p3);
+        _mm_storeu_si128((__m128i*)&b[3 * stride], p2);
+
+        // rotate samples
+        p1 = tmp1;
+        p0 = tmp2;
+    }
+}
+
+static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) {
+    int k;
+    __m128i p3, p2, p1, p0; // loop invariants
+
+    Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
+
+    for (k = 3; k > 0; --k) {
+        __m128i mask, tmp1, tmp2;
+        uint8_t* const b = p + 2; // beginning of p1
+
+        p += 4; // beginning of q0 (and next span)
+
+        MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
+        Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
+        MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
+
+        ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+        DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
+
+        Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
+
+        // rotate samples
+        p1 = tmp1;
+        p0 = tmp2;
+    }
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    __m128i mask;
+    __m128i t1, p2, p1, p0, q0, q1, q2;
+
+    // Load p3, p2, p1, p0
+    LOADUV_H_EDGES4(u - 4 * stride, v - 4 * stride, stride, t1, p2, p1, p0);
+    MAX_DIFF1(t1, p2, p1, p0, mask);
+
+    // Load q0, q1, q2, q3
+    LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
+    MAX_DIFF2(t1, q2, q1, q0, mask);
+
+    ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+    DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+    // Store
+    STOREUV(p2, u, v, -3 * stride);
+    STOREUV(p1, u, v, -2 * stride);
+    STOREUV(p0, u, v, -1 * stride);
+    STOREUV(q0, u, v, 0 * stride);
+    STOREUV(q1, u, v, 1 * stride);
+    STOREUV(q2, u, v, 2 * stride);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    __m128i mask;
+    __m128i p3, p2, p1, p0, q0, q1, q2, q3;
+
+    uint8_t* const tu = u - 4;
+    uint8_t* const tv = v - 4;
+    Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
+    MAX_DIFF1(p3, p2, p1, p0, mask);
+
+    Load16x4(u, v, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
+    MAX_DIFF2(q3, q2, q1, q0, mask);
+
+    ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+    DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+    Store16x4(&p3, &p2, &p1, &p0, tu, tv, stride);
+    Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    __m128i mask;
+    __m128i t1, t2, p1, p0, q0, q1;
+
+    // Load p3, p2, p1, p0
+    LOADUV_H_EDGES4(u, v, stride, t2, t1, p1, p0);
+    MAX_DIFF1(t2, t1, p1, p0, mask);
+
+    u += 4 * stride;
+    v += 4 * stride;
+
+    // Load q0, q1, q2, q3
+    LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
+    MAX_DIFF2(t2, t1, q1, q0, mask);
+
+    ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+    DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+
+    // Store
+    STOREUV(p1, u, v, -2 * stride);
+    STOREUV(p0, u, v, -1 * stride);
+    STOREUV(q0, u, v, 0 * stride);
+    STOREUV(q1, u, v, 1 * stride);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) {
+    __m128i mask;
+    __m128i t1, t2, p1, p0, q0, q1;
+    Load16x4(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
+    MAX_DIFF1(t2, t1, p1, p0, mask);
+
+    u += 4; // beginning of q0
+    v += 4;
+    Load16x4(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
+    MAX_DIFF2(t2, t1, q1, q0, mask);
+
+    ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+    DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+
+    u -= 2; // beginning of p1
+    v -= 2;
+    Store16x4(&p1, &p0, &q0, &q1, u, v, stride);
+}
+
+//------------------------------------------------------------------------------
+// 4x4 predictions
+
+#define DST(x, y) dst[(x) + (y)*BPS]
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+
+// We use the following 8b-arithmetic tricks:
+//     (a + 2 * b + c + 2) >> 2 = (AC + b + 1) >> 1
+//   where: AC = (a + c) >> 1 = [(a + c + 1) >> 1] - [(a^c) & 1]
+// and:
+//     (a + 2 * b + c + 2) >> 2 = (AB + BC + 1) >> 1 - (ab|bc)&lsb
+//   where: AC = (a + b + 1) >> 1,   BC = (b + c + 1) >> 1
+//   and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
+
+static void VE4(uint8_t* dst) { // vertical
+    const __m128i one = _mm_set1_epi8(1);
+    const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
+    const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
+    const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 2);
+    const __m128i a = _mm_avg_epu8(ABCDEFGH, CDEFGH00);
+    const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
+    const __m128i b = _mm_subs_epu8(a, lsb);
+    const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
+    const uint32_t vals = _mm_cvtsi128_si32(avg);
+    int i;
+    for (i = 0; i < 4; ++i) {
+        WebPUint32ToMem(dst + i * BPS, vals);
+    }
+}
+
+static void LD4(uint8_t* dst) { // Down-Left
+    const __m128i one = _mm_set1_epi8(1);
+    const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
+    const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
+    const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 2);
+    const __m128i CDEFGHH0 = _mm_insert_epi16(CDEFGH00, dst[-BPS + 7], 3);
+    const __m128i avg1 = _mm_avg_epu8(ABCDEFGH, CDEFGHH0);
+    const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
+    const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+    const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
+    WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcdefg));
+    WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
+    WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
+    WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
+}
+
+static void VR4(uint8_t* dst) { // Vertical-Right
+    const __m128i one = _mm_set1_epi8(1);
+    const int I = dst[-1 + 0 * BPS];
+    const int J = dst[-1 + 1 * BPS];
+    const int K = dst[-1 + 2 * BPS];
+    const int X = dst[-1 - BPS];
+    const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
+    const __m128i ABCD0 = _mm_srli_si128(XABCD, 1);
+    const __m128i abcd = _mm_avg_epu8(XABCD, ABCD0);
+    const __m128i _XABCD = _mm_slli_si128(XABCD, 1);
+    const __m128i IXABCD = _mm_insert_epi16(_XABCD, I | (X << 8), 0);
+    const __m128i avg1 = _mm_avg_epu8(IXABCD, ABCD0);
+    const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
+    const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+    const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
+    WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcd));
+    WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(efgh));
+    WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
+    WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
+
+    // these two are hard to implement in SSE2, so we keep the C-version:
+    DST(0, 2) = AVG3(J, I, X);
+    DST(0, 3) = AVG3(K, J, I);
+}
+
+static void VL4(uint8_t* dst) { // Vertical-Left
+    const __m128i one = _mm_set1_epi8(1);
+    const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
+    const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
+    const __m128i CDEFGH__ = _mm_srli_si128(ABCDEFGH, 2);
+    const __m128i avg1 = _mm_avg_epu8(ABCDEFGH, BCDEFGH_);
+    const __m128i avg2 = _mm_avg_epu8(CDEFGH__, BCDEFGH_);
+    const __m128i avg3 = _mm_avg_epu8(avg1, avg2);
+    const __m128i lsb1 = _mm_and_si128(_mm_xor_si128(avg1, avg2), one);
+    const __m128i ab = _mm_xor_si128(ABCDEFGH, BCDEFGH_);
+    const __m128i bc = _mm_xor_si128(CDEFGH__, BCDEFGH_);
+    const __m128i abbc = _mm_or_si128(ab, bc);
+    const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
+    const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
+    const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
+    WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(avg1));
+    WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(avg4));
+    WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
+    WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
+
+    // these two are hard to get and irregular
+    DST(3, 2) = (extra_out >> 0) & 0xff;
+    DST(3, 3) = (extra_out >> 8) & 0xff;
+}
+
+static void RD4(uint8_t* dst) { // Down-right
+    const __m128i one = _mm_set1_epi8(1);
+    const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
+    const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
+    const uint32_t I = dst[-1 + 0 * BPS];
+    const uint32_t J = dst[-1 + 1 * BPS];
+    const uint32_t K = dst[-1 + 2 * BPS];
+    const uint32_t L = dst[-1 + 3 * BPS];
+    const __m128i LKJI_____ = _mm_cvtsi32_si128(L | (K << 8) | (J << 16) | (I << 24));
+    const __m128i LKJIXABCD = _mm_or_si128(LKJI_____, ____XABCD);
+    const __m128i KJIXABCD_ = _mm_srli_si128(LKJIXABCD, 1);
+    const __m128i JIXABCD__ = _mm_srli_si128(LKJIXABCD, 2);
+    const __m128i avg1 = _mm_avg_epu8(JIXABCD__, LKJIXABCD);
+    const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
+    const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+    const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
+    WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(abcdefg));
+    WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
+    WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
+    WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
+}
+
+#undef DST
+#undef AVG3
+
+//------------------------------------------------------------------------------
+// Luma 16x16
+
+static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
+    const uint8_t* top = dst - BPS;
+    const __m128i zero = _mm_setzero_si128();
+    int y;
+    if (size == 4) {
+        const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
+        const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
+        for (y = 0; y < 4; ++y, dst += BPS) {
+            const int val = dst[-1] - top[-1];
+            const __m128i base = _mm_set1_epi16(val);
+            const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
+            WebPUint32ToMem(dst, _mm_cvtsi128_si32(out));
+        }
+    } else if (size == 8) {
+        const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+        const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
+        for (y = 0; y < 8; ++y, dst += BPS) {
+            const int val = dst[-1] - top[-1];
+            const __m128i base = _mm_set1_epi16(val);
+            const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
+            _mm_storel_epi64((__m128i*)dst, out);
+        }
+    } else {
+        const __m128i top_values = _mm_loadu_si128((const __m128i*)top);
+        const __m128i top_base_0 = _mm_unpacklo_epi8(top_values, zero);
+        const __m128i top_base_1 = _mm_unpackhi_epi8(top_values, zero);
+        for (y = 0; y < 16; ++y, dst += BPS) {
+            const int val = dst[-1] - top[-1];
+            const __m128i base = _mm_set1_epi16(val);
+            const __m128i out_0 = _mm_add_epi16(base, top_base_0);
+            const __m128i out_1 = _mm_add_epi16(base, top_base_1);
+            const __m128i out = _mm_packus_epi16(out_0, out_1);
+            _mm_storeu_si128((__m128i*)dst, out);
+        }
+    }
+}
+
+static void TM4(uint8_t* dst) {
+    TrueMotion(dst, 4);
+}
+static void TM8uv(uint8_t* dst) {
+    TrueMotion(dst, 8);
+}
+static void TM16(uint8_t* dst) {
+    TrueMotion(dst, 16);
+}
+
+static void VE16(uint8_t* dst) {
+    const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
+    int j;
+    for (j = 0; j < 16; ++j) {
+        _mm_storeu_si128((__m128i*)(dst + j * BPS), top);
+    }
+}
+
+static void HE16(uint8_t* dst) { // horizontal
+    int j;
+    for (j = 16; j > 0; --j) {
+        const __m128i values = _mm_set1_epi8(dst[-1]);
+        _mm_storeu_si128((__m128i*)dst, values);
+        dst += BPS;
+    }
+}
+
+static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
+    int j;
+    const __m128i values = _mm_set1_epi8(v);
+    for (j = 0; j < 16; ++j) {
+        _mm_storeu_si128((__m128i*)(dst + j * BPS), values);
+    }
+}
+
+static void DC16(uint8_t* dst) { // DC
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
+    const __m128i sad8x2 = _mm_sad_epu8(top, zero);
+    // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+    const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
+    int left = 0;
+    int j;
+    for (j = 0; j < 16; ++j) {
+        left += dst[-1 + j * BPS];
+    }
+    {
+        const int DC = _mm_cvtsi128_si32(sum) + left + 16;
+        Put16(DC >> 5, dst);
+    }
+}
+
+static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
+    int DC = 8;
+    int j;
+    for (j = 0; j < 16; ++j) {
+        DC += dst[-1 + j * BPS];
+    }
+    Put16(DC >> 4, dst);
+}
+
+static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
+    const __m128i sad8x2 = _mm_sad_epu8(top, zero);
+    // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+    const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
+    const int DC = _mm_cvtsi128_si32(sum) + 8;
+    Put16(DC >> 4, dst);
+}
+
+static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
+    Put16(0x80, dst);
+}
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void VE8uv(uint8_t* dst) { // vertical
+    int j;
+    const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
+    for (j = 0; j < 8; ++j) {
+        _mm_storel_epi64((__m128i*)(dst + j * BPS), top);
+    }
+}
+
+static void HE8uv(uint8_t* dst) { // horizontal
+    int j;
+    for (j = 0; j < 8; ++j) {
+        const __m128i values = _mm_set1_epi8(dst[-1]);
+        _mm_storel_epi64((__m128i*)dst, values);
+        dst += BPS;
+    }
+}
+
+// helper for chroma-DC predictions
+static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
+    int j;
+    const __m128i values = _mm_set1_epi8(v);
+    for (j = 0; j < 8; ++j) {
+        _mm_storel_epi64((__m128i*)(dst + j * BPS), values);
+    }
+}
+
+static void DC8uv(uint8_t* dst) { // DC
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
+    const __m128i sum = _mm_sad_epu8(top, zero);
+    int left = 0;
+    int j;
+    for (j = 0; j < 8; ++j) {
+        left += dst[-1 + j * BPS];
+    }
+    {
+        const int DC = _mm_cvtsi128_si32(sum) + left + 8;
+        Put8x8uv(DC >> 4, dst);
+    }
+}
+
+static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
+    const __m128i sum = _mm_sad_epu8(top, zero);
+    const int DC = _mm_cvtsi128_si32(sum) + 4;
+    Put8x8uv(DC >> 3, dst);
+}
+
+static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
+    int dc0 = 4;
+    int i;
+    for (i = 0; i < 8; ++i) {
+        dc0 += dst[-1 + i * BPS];
+    }
+    Put8x8uv(dc0 >> 3, dst);
+}
+
+static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
+    Put8x8uv(0x80, dst);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
+    VP8Transform = Transform;
+#if defined(USE_TRANSFORM_AC3)
+    VP8TransformAC3 = TransformAC3;
+#endif
+
+    VP8VFilter16 = VFilter16;
+    VP8HFilter16 = HFilter16;
+    VP8VFilter8 = VFilter8;
+    VP8HFilter8 = HFilter8;
+    VP8VFilter16i = VFilter16i;
+    VP8HFilter16i = HFilter16i;
+    VP8VFilter8i = VFilter8i;
+    VP8HFilter8i = HFilter8i;
+
+    VP8SimpleVFilter16 = SimpleVFilter16;
+    VP8SimpleHFilter16 = SimpleHFilter16;
+    VP8SimpleVFilter16i = SimpleVFilter16i;
+    VP8SimpleHFilter16i = SimpleHFilter16i;
+
+    VP8PredLuma4[1] = TM4;
+    VP8PredLuma4[2] = VE4;
+    VP8PredLuma4[4] = RD4;
+    VP8PredLuma4[5] = VR4;
+    VP8PredLuma4[6] = LD4;
+    VP8PredLuma4[7] = VL4;
+
+    VP8PredLuma16[0] = DC16;
+    VP8PredLuma16[1] = TM16;
+    VP8PredLuma16[2] = VE16;
+    VP8PredLuma16[3] = HE16;
+    VP8PredLuma16[4] = DC16NoTop;
+    VP8PredLuma16[5] = DC16NoLeft;
+    VP8PredLuma16[6] = DC16NoTopLeft;
+
+    VP8PredChroma8[0] = DC8uv;
+    VP8PredChroma8[1] = TM8uv;
+    VP8PredChroma8[2] = VE8uv;
+    VP8PredChroma8[3] = HE8uv;
+    VP8PredChroma8[4] = DC8uvNoTop;
+    VP8PredChroma8[5] = DC8uvNoLeft;
+    VP8PredChroma8[6] = DC8uvNoTopLeft;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8DspInitSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/dec_sse41.c b/codec/L2/demos/webpEnc/host/src/dsp/dec_sse41.c
new file mode 100644
index 0000000000..01c068e287
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/dec_sse41.c
@@ -0,0 +1,45 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE4 version of some decoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+
+#include <smmintrin.h>
+#include "../dec/vp8i.h"
+
+static void HE16(uint8_t* dst) { // horizontal
+    int j;
+    const __m128i kShuffle3 = _mm_set1_epi8(3);
+    for (j = 16; j > 0; --j) {
+        const __m128i in = _mm_cvtsi32_si128(WebPMemToUint32(dst - 4));
+        const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
+        _mm_storeu_si128((__m128i*)dst, values);
+        dst += BPS;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
+    VP8PredLuma16[3] = HE16;
+}
+
+#else // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(VP8DspInitSSE41)
+
+#endif // WEBP_USE_SSE41
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/dsp.h b/codec/L2/demos/webpEnc/host/src/dsp/dsp.h
new file mode 100644
index 0000000000..2e86f28938
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/dsp.h
@@ -0,0 +1,456 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   Speed-critical functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_DSP_H_
+#define WEBP_DSP_DSP_H_
+
+#include "../webp/types.h"
+#include "../utils/utils.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BPS 32 // this is the common stride for enc/dec
+
+//------------------------------------------------------------------------------
+// CPU detection
+
+#if defined(__GNUC__)
+#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
+#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
+#else
+#define LOCAL_GCC_VERSION 0
+#define LOCAL_GCC_PREREQ(maj, min) 0
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
+#endif
+
+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
+// files without intrinsics, allowing the corresponding Init() to be called.
+// Files containing intrinsics will need to be built targeting the instruction
+// set so should succeed on one of the earlier tests.
+#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2)
+// #define WEBP_USE_SSE2
+#endif
+
+#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41)
+#define WEBP_USE_SSE41
+#endif
+
+#if defined(__AVX2__) || defined(WEBP_HAVE_AVX2)
+#define WEBP_USE_AVX2
+#endif
+
+#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
+#define WEBP_ANDROID_NEON // Android targets that might support NEON
+#endif
+
+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
+// inline assembly would need to be modified for use with Native Client.
+#if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || defined(__aarch64__)) && !defined(__native_client__)
+#define WEBP_USE_NEON
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
+#define WEBP_USE_NEON
+#define WEBP_USE_INTRINSICS
+#endif
+
+#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
+#define WEBP_USE_MIPS32
+#if (__mips_isa_rev >= 2)
+#define WEBP_USE_MIPS32_R2
+#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
+#define WEBP_USE_MIPS_DSP_R2
+#endif
+#endif
+#endif
+
+// This macro prevents thread_sanitizer from reporting known concurrent writes.
+#define WEBP_TSAN_IGNORE_FUNCTION
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#undef WEBP_TSAN_IGNORE_FUNCTION
+#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
+#endif
+#endif
+
+typedef enum { kSSE2, kSSE3, kSSE4_1, kAVX, kAVX2, kNEON, kMIPS32, kMIPSdspR2 } CPUFeature;
+// returns true if the CPU supports the feature.
+typedef int (*VP8CPUInfo)(CPUFeature feature);
+WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;
+
+//------------------------------------------------------------------------------
+// Init stub generator
+
+// Defines an init function stub to ensure each module exposes a symbol,
+// avoiding a compiler warning.
+#define WEBP_DSP_INIT_STUB(func) \
+    extern void func(void);      \
+    WEBP_TSAN_IGNORE_FUNCTION void func(void) {}
+
+//------------------------------------------------------------------------------
+// Encoding
+
+// Transforms
+// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
+//          will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
+typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst, int do_two);
+typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
+typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
+extern VP8Idct VP8ITransform;
+extern VP8Fdct VP8FTransform;
+extern VP8Fdct VP8FTransform2; // performs two transforms at a time
+extern VP8WHT VP8FTransformWHT;
+// Predictions
+// *dst is the destination block. *top and *left can be NULL.
+typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left, const uint8_t* top);
+typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top);
+extern VP8Intra4Preds VP8EncPredLuma4;
+extern VP8IntraPreds VP8EncPredLuma16;
+extern VP8IntraPreds VP8EncPredChroma8;
+
+typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
+extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
+typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref, const uint16_t* const weights);
+extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
+
+typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
+extern VP8BlockCopy VP8Copy4x4;
+extern VP8BlockCopy VP8Copy16x8;
+// Quantization
+struct VP8Matrix; // forward declaration
+typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16], const struct VP8Matrix* const mtx);
+// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
+typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32], const struct VP8Matrix* const mtx);
+
+extern VP8QuantizeBlock VP8EncQuantizeBlock;
+extern VP8Quantize2Blocks VP8EncQuantize2Blocks;
+
+// specific to 2nd transform:
+typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16], const struct VP8Matrix* const mtx);
+extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
+
+extern const int VP8DspScan[16 + 4 + 4];
+
+// Collect histogram for susceptibility calculation.
+#define MAX_COEFF_THRESH 31 // size of histogram used by CollectHistogram.
+typedef struct {
+    // We only need to store max_value and last_non_zero, not the distribution.
+    int max_value;
+    int last_non_zero;
+} VP8Histogram;
+typedef void (*VP8CHisto)(
+    const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo);
+extern VP8CHisto VP8CollectHistogram;
+// General-purpose util function to help VP8CollectHistogram().
+void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], VP8Histogram* const histo);
+
+// must be called before using any of the above
+void VP8EncDspInit(void);
+
+//------------------------------------------------------------------------------
+// cost functions (encoding)
+
+extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p)
+// approximate cost per level:
+extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
+extern const uint8_t VP8EncBands[16 + 1];
+
+struct VP8Residual;
+typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs, struct VP8Residual* const res);
+extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
+
+// Cost calculation function.
+typedef int (*VP8GetResidualCostFunc)(int ctx0, const struct VP8Residual* const res);
+extern VP8GetResidualCostFunc VP8GetResidualCost;
+
+// must be called before anything using the above
+void VP8EncDspCostInit(void);
+
+//------------------------------------------------------------------------------
+// Decoding
+
+typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
+// when doing two transforms, coeffs is actually int16_t[2][16].
+typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
+extern VP8DecIdct2 VP8Transform;
+extern VP8DecIdct VP8TransformAC3;
+extern VP8DecIdct VP8TransformUV;
+extern VP8DecIdct VP8TransformDC;
+extern VP8DecIdct VP8TransformDCUV;
+extern VP8WHT VP8TransformWHT;
+
+// *dst is the destination block, with stride BPS. Boundary samples are
+// assumed accessible when needed.
+typedef void (*VP8PredFunc)(uint8_t* dst);
+extern VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
+extern VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
+extern VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
+
+// clipping tables (for filtering)
+extern const int8_t* const VP8ksclip1; // clips [-1020, 1020] to [-128, 127]
+extern const int8_t* const VP8ksclip2; // clips [-112, 112] to [-16, 15]
+extern const uint8_t* const VP8kclip1; // clips [-255,511] to [0,255]
+extern const uint8_t* const VP8kabs0;  // abs(x) for x in [-255,255]
+// must be called first
+void VP8InitClipTables(void);
+
+// simple filter (only for luma)
+typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh);
+extern VP8SimpleFilterFunc VP8SimpleVFilter16;
+extern VP8SimpleFilterFunc VP8SimpleHFilter16;
+extern VP8SimpleFilterFunc VP8SimpleVFilter16i; // filter 3 inner edges
+extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
+
+// regular filter (on both macroblock edges and inner edges)
+typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride, int thresh, int ithresh, int hev_t);
+typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_t);
+// on outer edge
+extern VP8LumaFilterFunc VP8VFilter16;
+extern VP8LumaFilterFunc VP8HFilter16;
+extern VP8ChromaFilterFunc VP8VFilter8;
+extern VP8ChromaFilterFunc VP8HFilter8;
+
+// on inner edge
+extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether
+extern VP8LumaFilterFunc VP8HFilter16i;
+extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether
+extern VP8ChromaFilterFunc VP8HFilter8i;
+
+// must be called before anything using the above
+void VP8DspInit(void);
+
+//------------------------------------------------------------------------------
+// WebP I/O
+
+#define FANCY_UPSAMPLING // undefined to remove fancy upsampling support
+
+// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
+// bottom_y can be NULL if only one line of output is needed (at top/bottom).
+typedef void (*WebPUpsampleLinePairFunc)(const uint8_t* top_y,
+                                         const uint8_t* bottom_y,
+                                         const uint8_t* top_u,
+                                         const uint8_t* top_v,
+                                         const uint8_t* cur_u,
+                                         const uint8_t* cur_v,
+                                         uint8_t* top_dst,
+                                         uint8_t* bottom_dst,
+                                         int len);
+
+#ifdef FANCY_UPSAMPLING
+
+// Fancy upsampling functions to convert YUV to RGB(A) modes
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+#endif // FANCY_UPSAMPLING
+
+// Per-row point-sampling methods.
+typedef void (*WebPSamplerRowFunc)(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len);
+// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
+void WebPSamplerProcessPlane(const uint8_t* y,
+                             int y_stride,
+                             const uint8_t* u,
+                             const uint8_t* v,
+                             int uv_stride,
+                             uint8_t* dst,
+                             int dst_stride,
+                             int width,
+                             int height,
+                             WebPSamplerRowFunc func);
+
+// Sampling functions to convert rows of YUV to RGB(A)
+extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
+
+// General function for converting two lines of ARGB or RGBA.
+// 'alpha_is_last' should be true if 0xff000000 is stored in memory as
+// as 0x00, 0x00, 0x00, 0xff (little endian).
+WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
+
+// YUV444->RGB converters
+typedef void (*WebPYUV444Converter)(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len);
+
+extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+
+// Must be called before using the WebPUpsamplers[] (and for premultiplied
+// colorspaces like rgbA, rgbA4444, etc)
+void WebPInitUpsamplers(void);
+// Must be called before using WebPSamplers[]
+void WebPInitSamplers(void);
+// Must be called before using WebPYUV444Converters[]
+void WebPInitYUV444Converters(void);
+
+//------------------------------------------------------------------------------
+// ARGB -> YUV converters
+
+// Convert ARGB samples to luma Y.
+extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
+// Convert ARGB samples to U/V with downsampling. do_store should be '1' for
+// even lines and '0' for odd ones. 'src_width' is the original width, not
+// the U/V one.
+extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v, int src_width, int do_store);
+
+// Convert a row of accumulated (four-values) of rgba32 toward U/V
+extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb, uint8_t* u, uint8_t* v, int width);
+
+// Convert RGB or BGR to Y
+extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
+extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
+
+// used for plain-C fallback.
+extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v, int src_width, int do_store);
+extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, uint8_t* u, uint8_t* v, int width);
+
+// Must be called before using the above.
+void WebPInitConvertARGBToYUV(void);
+
+//------------------------------------------------------------------------------
+// Rescaler
+
+struct WebPRescaler;
+
+// Import a row of data and save its contribution in the rescaler.
+// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
+// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
+typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk, const uint8_t* src);
+
+extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
+extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
+
+// Export one row (starting at x_out position) from rescaler.
+// 'Expand' corresponds to the wrk->y_expand case.
+// Otherwise 'Shrink' is to be used
+typedef void (*WebPRescalerExportRowFunc)(struct WebPRescaler* const wrk);
+extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
+extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
+
+// Plain-C implementation, as fall-back.
+extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk, const uint8_t* src);
+extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk, const uint8_t* src);
+extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
+extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);
+
+// Main entry calls:
+extern void WebPRescalerImportRow(struct WebPRescaler* const wrk, const uint8_t* src);
+// Export one row (starting at x_out position) from rescaler.
+extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
+
+// Must be called first before using the above.
+void WebPRescalerDspInit(void);
+
+//------------------------------------------------------------------------------
+// Utilities for processing transparent channel.
+
+// Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
+// alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
+extern void (*WebPApplyAlphaMultiply)(uint8_t* rgba, int alpha_first, int w, int h, int stride);
+
+// Same, buf specifically for RGBA4444 format
+extern void (*WebPApplyAlphaMultiply4444)(uint8_t* rgba4444, int w, int h, int stride);
+
+// Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
+// Returns true if alpha[] plane has non-trivial values different from 0xff.
+extern int (*WebPDispatchAlpha)(
+    const uint8_t* alpha, int alpha_stride, int width, int height, uint8_t* dst, int dst_stride);
+
+// Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the
+// A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units.
+extern void (*WebPDispatchAlphaToGreen)(
+    const uint8_t* alpha, int alpha_stride, int width, int height, uint32_t* dst, int dst_stride);
+
+// Extract the alpha values from 32b values in argb[] and pack them into alpha[]
+// (this is the opposite of WebPDispatchAlpha).
+// Returns true if there's only trivial 0xff alpha values.
+extern int (*WebPExtractAlpha)(
+    const uint8_t* argb, int argb_stride, int width, int height, uint8_t* alpha, int alpha_stride);
+
+// Pre-Multiply operation transforms x into x * A / 255  (where x=Y,R,G or B).
+// Un-Multiply operation transforms x into x * 255 / A.
+
+// Pre-Multiply or Un-Multiply (if 'inverse' is true) argb values in a row.
+extern void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
+
+// Same a WebPMultARGBRow(), but for several rows.
+void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, int inverse);
+
+// Same for a row of single values, with side alpha values.
+extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, int width, int inverse);
+
+// Same a WebPMultRow(), but for several 'num_rows' rows.
+void WebPMultRows(
+    uint8_t* ptr, int stride, const uint8_t* alpha, int alpha_stride, int width, int num_rows, int inverse);
+
+// Plain-C versions, used as fallback by some implementations.
+void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha, int width, int inverse);
+void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
+
+// To be called first before using the above.
+void WebPInitAlphaProcessing(void);
+
+// ARGB packing function: a/r/g/b input is rgba or bgra order.
+extern void (*VP8PackARGB)(
+    const uint8_t* a, const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, uint32_t* out);
+
+// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
+extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, int step, uint32_t* out);
+
+// To be called first before using the above.
+void VP8EncDspARGBInit(void);
+
+//------------------------------------------------------------------------------
+// Filter functions
+
+typedef enum { // Filter types.
+    WEBP_FILTER_NONE = 0,
+    WEBP_FILTER_HORIZONTAL,
+    WEBP_FILTER_VERTICAL,
+    WEBP_FILTER_GRADIENT,
+    WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1, // end marker
+    WEBP_FILTER_BEST,                            // meta-types
+    WEBP_FILTER_FAST
+} WEBP_FILTER_TYPE;
+
+typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height, int stride, uint8_t* out);
+typedef void (*WebPUnfilterFunc)(int width, int height, int stride, int row, int num_rows, uint8_t* data);
+
+// Filter the given data using the given predictor.
+// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
+// in raster order.
+// 'stride' is number of bytes per scan line (with possible padding).
+// 'out' should be pre-allocated.
+extern WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
+
+// In-place reconstruct the original data from the given filtered data.
+// The reconstruction will be done for 'num_rows' rows starting from 'row'
+// (assuming rows upto 'row - 1' are already reconstructed).
+extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+
+// To be called first before using the above.
+void VP8FiltersInit(void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_DSP_DSP_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/enc.c b/codec/L2/demos/webpEnc/host/src/dsp/enc.c
new file mode 100644
index 0000000000..c0615dc07b
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/enc.c
@@ -0,0 +1,771 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Speed-critical encoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h> // for abs()
+
+#include "./dsp.h"
+#include "../enc/vp8enci.h"
+
+static WEBP_INLINE uint8_t clip_8b(int v) {
+    return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int clip_max(int v, int max) {
+    return (v > max) ? max : v;
+}
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+const int VP8DspScan[16 + 4 + 4] = {
+    // Luma
+    0 + 0 * BPS, 4 + 0 * BPS,  8 + 0 * BPS, 12 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,  8 + 4 * BPS,  12 + 4 * BPS,
+    0 + 8 * BPS, 4 + 8 * BPS,  8 + 8 * BPS, 12 + 8 * BPS, 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
+
+    0 + 0 * BPS, 4 + 0 * BPS,  0 + 4 * BPS, 4 + 4 * BPS, // U
+    8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
+};
+
+// general-purpose util function
+void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], VP8Histogram* const histo) {
+    int max_value = 0, last_non_zero = 1;
+    int k;
+    for (k = 0; k <= MAX_COEFF_THRESH; ++k) {
+        const int value = distribution[k];
+        if (value > 0) {
+            if (value > max_value) max_value = value;
+            last_non_zero = k;
+        }
+    }
+    histo->max_value = max_value;
+    histo->last_non_zero = last_non_zero;
+}
+
+static void CollectHistogram(
+    const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo) {
+    int j;
+    int distribution[MAX_COEFF_THRESH + 1] = {0};
+    for (j = start_block; j < end_block; ++j) {
+        int k;
+        int16_t out[16];
+
+        VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+        // Convert coefficients to bin.
+        for (k = 0; k < 16; ++k) {
+            const int v = abs(out[k]) / 8; // TODO(skal): add rounding?
+            const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
+            ++distribution[clipped_value];
+        }
+    }
+    VP8SetHistogramData(distribution, histo);
+}
+
+//------------------------------------------------------------------------------
+// run-time tables (~4k)
+
+static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255]
+
+// We declare this variable 'volatile' to prevent instruction reordering
+// and make sure it's set to true _last_ (so as to be thread-safe)
+static volatile int tables_ok = 0;
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
+    if (!tables_ok) {
+        int i;
+        for (i = -255; i <= 255 + 255; ++i) {
+            clip1[255 + i] = clip_8b(i);
+        }
+        tables_ok = 1;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+#define STORE(x, y, v) dst[(x) + (y)*BPS] = clip_8b(ref[(x) + (y)*BPS] + ((v) >> 3))
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+#define MUL(a, b) (((a) * (b)) >> 16)
+
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, uint8_t* dst) {
+    int C[4 * 4], *tmp;
+    int i;
+    tmp = C;
+    for (i = 0; i < 4; ++i) { // vertical pass
+        const int a = in[0] + in[8];
+        const int b = in[0] - in[8];
+        const int c = MUL(in[4], kC2) - MUL(in[12], kC1);
+        const int d = MUL(in[4], kC1) + MUL(in[12], kC2);
+        tmp[0] = a + d;
+        tmp[1] = b + c;
+        tmp[2] = b - c;
+        tmp[3] = a - d;
+        tmp += 4;
+        in++;
+    }
+
+    tmp = C;
+    for (i = 0; i < 4; ++i) { // horizontal pass
+        const int dc = tmp[0] + 4;
+        const int a = dc + tmp[8];
+        const int b = dc - tmp[8];
+        const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
+        const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
+        STORE(0, i, a + d);
+        STORE(1, i, b + c);
+        STORE(2, i, b - c);
+        STORE(3, i, a - d);
+        tmp++;
+    }
+}
+
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, int do_two) {
+    ITransformOne(ref, in, dst);
+    if (do_two) {
+        ITransformOne(ref + 4, in + 16, dst + 4);
+    }
+}
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+    int i;
+    int tmp[16];
+    for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
+        const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255])
+        const int d1 = src[1] - ref[1];
+        const int d2 = src[2] - ref[2];
+        const int d3 = src[3] - ref[3];
+        const int a0 = (d0 + d3); // 10b                      [-510,510]
+        const int a1 = (d1 + d2);
+        const int a2 = (d1 - d2);
+        const int a3 = (d0 - d3);
+        tmp[0 + i * 4] = (a0 + a1) * 8;                       // 14b                      [-8160,8160]
+        tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542]
+        tmp[2 + i * 4] = (a0 - a1) * 8;
+        tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9;
+    }
+    for (i = 0; i < 4; ++i) {
+        const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b
+        const int a1 = (tmp[4 + i] + tmp[8 + i]);
+        const int a2 = (tmp[4 + i] - tmp[8 + i]);
+        const int a3 = (tmp[0 + i] - tmp[12 + i]);
+        out[0 + i] = (a0 + a1 + 7) >> 4; // 12b
+        out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
+        out[8 + i] = (a0 - a1 + 7) >> 4;
+        out[12 + i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
+    }
+}
+
+static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+    VP8FTransform(src, ref, out);
+    VP8FTransform(src + 4, ref + 4, out + 16);
+}
+
+static void FTransformWHT(const int16_t* in, int16_t* out) {
+    // input is 12b signed
+    int32_t tmp[16];
+    int i;
+    for (i = 0; i < 4; ++i, in += 64) {
+        const int a0 = (in[0 * 16] + in[2 * 16]); // 13b
+        const int a1 = (in[1 * 16] + in[3 * 16]);
+        const int a2 = (in[1 * 16] - in[3 * 16]);
+        const int a3 = (in[0 * 16] - in[2 * 16]);
+        tmp[0 + i * 4] = a0 + a1; // 14b
+        tmp[1 + i * 4] = a3 + a2;
+        tmp[2 + i * 4] = a3 - a2;
+        tmp[3 + i * 4] = a0 - a1;
+    }
+    for (i = 0; i < 4; ++i) {
+        const int a0 = (tmp[0 + i] + tmp[8 + i]); // 15b
+        const int a1 = (tmp[4 + i] + tmp[12 + i]);
+        const int a2 = (tmp[4 + i] - tmp[12 + i]);
+        const int a3 = (tmp[0 + i] - tmp[8 + i]);
+        const int b0 = a0 + a1; // 16b
+        const int b1 = a3 + a2;
+        const int b2 = a3 - a2;
+        const int b3 = a0 - a1;
+        out[0 + i] = b0 >> 1; // 15b
+        out[4 + i] = b1 >> 1;
+        out[8 + i] = b2 >> 1;
+        out[12 + i] = b3 >> 1;
+    }
+}
+
+#undef MUL
+#undef STORE
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
+    int j;
+    for (j = 0; j < size; ++j) {
+        memset(dst + j * BPS, value, size);
+    }
+}
+
+static WEBP_INLINE void VerticalPred(uint8_t* dst, const uint8_t* top, int size) {
+    int j;
+    if (top != NULL) {
+        for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
+    } else {
+        Fill(dst, 127, size);
+    }
+}
+
+static WEBP_INLINE void HorizontalPred(uint8_t* dst, const uint8_t* left, int size) {
+    if (left != NULL) {
+        int j;
+        for (j = 0; j < size; ++j) {
+            memset(dst + j * BPS, left[j], size);
+        }
+    } else {
+        Fill(dst, 129, size);
+    }
+}
+
+static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, const uint8_t* top, int size) {
+    int y;
+    if (left != NULL) {
+        if (top != NULL) {
+            const uint8_t* const clip = clip1 + 255 - left[-1];
+            for (y = 0; y < size; ++y) {
+                const uint8_t* const clip_table = clip + left[y];
+                int x;
+                for (x = 0; x < size; ++x) {
+                    dst[x] = clip_table[top[x]];
+                }
+                dst += BPS;
+            }
+        } else {
+            HorizontalPred(dst, left, size);
+        }
+    } else {
+        // true motion without left samples (hence: with default 129 value)
+        // is equivalent to VE prediction where you just copy the top samples.
+        // Note that if top samples are not available, the default value is
+        // then 129, and not 127 as in the VerticalPred case.
+        if (top != NULL) {
+            VerticalPred(dst, top, size);
+        } else {
+            Fill(dst, 129, size);
+        }
+    }
+}
+
+static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, const uint8_t* top, int size, int round, int shift) {
+    int DC = 0;
+    int j;
+    if (top != NULL) {
+        for (j = 0; j < size; ++j) DC += top[j];
+        if (left != NULL) { // top and left present
+            for (j = 0; j < size; ++j) DC += left[j];
+        } else { // top, but no left
+            DC += DC;
+        }
+        DC = (DC + round) >> shift;
+    } else if (left != NULL) { // left but no top
+        for (j = 0; j < size; ++j) DC += left[j];
+        DC += DC;
+        DC = (DC + round) >> shift;
+    } else { // no top, no left, nothing.
+        DC = 0x80;
+    }
+    Fill(dst, DC, size);
+}
+
+//------------------------------------------------------------------------------
+// Chroma 8x8 prediction (paragraph 12.2)
+
+static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    // U block
+    DCMode(C8DC8 + dst, left, top, 8, 8, 4);
+    VerticalPred(C8VE8 + dst, top, 8);
+    HorizontalPred(C8HE8 + dst, left, 8);
+    TrueMotion(C8TM8 + dst, left, top, 8);
+    // V block
+    dst += 8;
+    if (top != NULL) top += 8;
+    if (left != NULL) left += 16;
+    DCMode(C8DC8 + dst, left, top, 8, 8, 4);
+    VerticalPred(C8VE8 + dst, top, 8);
+    HorizontalPred(C8HE8 + dst, left, 8);
+    TrueMotion(C8TM8 + dst, left, top, 8);
+}
+
+//------------------------------------------------------------------------------
+// luma 16x16 prediction (paragraph 12.3)
+
+static void Intra16Preds(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    DCMode(I16DC16 + dst, left, top, 16, 16, 5);
+    VerticalPred(I16VE16 + dst, top, 16);
+    HorizontalPred(I16HE16 + dst, left, 16);
+    TrueMotion(I16TM16 + dst, left, top, 16);
+}
+
+//------------------------------------------------------------------------------
+// luma 4x4 prediction
+
+#define DST(x, y) dst[(x) + (y)*BPS]
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
+static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
+    const uint8_t vals[4] = {AVG3(top[-1], top[0], top[1]), AVG3(top[0], top[1], top[2]), AVG3(top[1], top[2], top[3]),
+                             AVG3(top[2], top[3], top[4])};
+    int i;
+    for (i = 0; i < 4; ++i) {
+        memcpy(dst + i * BPS, vals, 4);
+    }
+}
+
+static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
+    const int X = top[-1];
+    const int I = top[-2];
+    const int J = top[-3];
+    const int K = top[-4];
+    const int L = top[-5];
+    WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(X, I, J));
+    WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(I, J, K));
+    WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(J, K, L));
+    WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
+}
+
+static void DC4(uint8_t* dst, const uint8_t* top) {
+    uint32_t dc = 4;
+    int i;
+    for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
+    Fill(dst, dc >> 3, 4);
+}
+
+static void RD4(uint8_t* dst, const uint8_t* top) {
+    const int X = top[-1];
+    const int I = top[-2];
+    const int J = top[-3];
+    const int K = top[-4];
+    const int L = top[-5];
+    const int A = top[0];
+    const int B = top[1];
+    const int C = top[2];
+    const int D = top[3];
+    DST(0, 3) = AVG3(J, K, L);
+    DST(0, 2) = DST(1, 3) = AVG3(I, J, K);
+    DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J);
+    DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
+    DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X);
+    DST(2, 0) = DST(3, 1) = AVG3(C, B, A);
+    DST(3, 0) = AVG3(D, C, B);
+}
+
+static void LD4(uint8_t* dst, const uint8_t* top) {
+    const int A = top[0];
+    const int B = top[1];
+    const int C = top[2];
+    const int D = top[3];
+    const int E = top[4];
+    const int F = top[5];
+    const int G = top[6];
+    const int H = top[7];
+    DST(0, 0) = AVG3(A, B, C);
+    DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+    DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
+    DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+    DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+    DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+    DST(3, 3) = AVG3(G, H, H);
+}
+
+static void VR4(uint8_t* dst, const uint8_t* top) {
+    const int X = top[-1];
+    const int I = top[-2];
+    const int J = top[-3];
+    const int K = top[-4];
+    const int A = top[0];
+    const int B = top[1];
+    const int C = top[2];
+    const int D = top[3];
+    DST(0, 0) = DST(1, 2) = AVG2(X, A);
+    DST(1, 0) = DST(2, 2) = AVG2(A, B);
+    DST(2, 0) = DST(3, 2) = AVG2(B, C);
+    DST(3, 0) = AVG2(C, D);
+
+    DST(0, 3) = AVG3(K, J, I);
+    DST(0, 2) = AVG3(J, I, X);
+    DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+    DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+    DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+    DST(3, 1) = AVG3(B, C, D);
+}
+
+static void VL4(uint8_t* dst, const uint8_t* top) {
+    const int A = top[0];
+    const int B = top[1];
+    const int C = top[2];
+    const int D = top[3];
+    const int E = top[4];
+    const int F = top[5];
+    const int G = top[6];
+    const int H = top[7];
+    DST(0, 0) = AVG2(A, B);
+    DST(1, 0) = DST(0, 2) = AVG2(B, C);
+    DST(2, 0) = DST(1, 2) = AVG2(C, D);
+    DST(3, 0) = DST(2, 2) = AVG2(D, E);
+
+    DST(0, 1) = AVG3(A, B, C);
+    DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+    DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+    DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+    DST(3, 2) = AVG3(E, F, G);
+    DST(3, 3) = AVG3(F, G, H);
+}
+
+static void HU4(uint8_t* dst, const uint8_t* top) {
+    const int I = top[-2];
+    const int J = top[-3];
+    const int K = top[-4];
+    const int L = top[-5];
+    DST(0, 0) = AVG2(I, J);
+    DST(2, 0) = DST(0, 1) = AVG2(J, K);
+    DST(2, 1) = DST(0, 2) = AVG2(K, L);
+    DST(1, 0) = AVG3(I, J, K);
+    DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+    DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+    DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+static void HD4(uint8_t* dst, const uint8_t* top) {
+    const int X = top[-1];
+    const int I = top[-2];
+    const int J = top[-3];
+    const int K = top[-4];
+    const int L = top[-5];
+    const int A = top[0];
+    const int B = top[1];
+    const int C = top[2];
+
+    DST(0, 0) = DST(2, 1) = AVG2(I, X);
+    DST(0, 1) = DST(2, 2) = AVG2(J, I);
+    DST(0, 2) = DST(2, 3) = AVG2(K, J);
+    DST(0, 3) = AVG2(L, K);
+
+    DST(3, 0) = AVG3(A, B, C);
+    DST(2, 0) = AVG3(X, A, B);
+    DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+    DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+    DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+    DST(1, 3) = AVG3(L, K, J);
+}
+
+static void TM4(uint8_t* dst, const uint8_t* top) {
+    int x, y;
+    const uint8_t* const clip = clip1 + 255 - top[-1];
+    for (y = 0; y < 4; ++y) {
+        const uint8_t* const clip_table = clip + top[-2 - y];
+        for (x = 0; x < 4; ++x) {
+            dst[x] = clip_table[top[x]];
+        }
+        dst += BPS;
+    }
+}
+
+#undef DST
+#undef AVG3
+#undef AVG2
+
+// Left samples are top[-5 .. -2], top_left is top[-1], top are
+// located at top[0..3], and top right is top[4..7]
+static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+    DC4(I4DC4 + dst, top);
+    TM4(I4TM4 + dst, top);
+    VE4(I4VE4 + dst, top);
+    HE4(I4HE4 + dst, top);
+    RD4(I4RD4 + dst, top);
+    VR4(I4VR4 + dst, top);
+    LD4(I4LD4 + dst, top);
+    VL4(I4VL4 + dst, top);
+    HD4(I4HD4 + dst, top);
+    HU4(I4HU4 + dst, top);
+}
+
+//------------------------------------------------------------------------------
+// Metric
+
+static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, int w, int h) {
+    int count = 0;
+    int y, x;
+    for (y = 0; y < h; ++y) {
+        for (x = 0; x < w; ++x) {
+            const int diff = (int)a[x] - b[x];
+            count += diff * diff;
+        }
+        a += BPS;
+        b += BPS;
+    }
+    return count;
+}
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+    return GetSSE(a, b, 16, 16);
+}
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+    return GetSSE(a, b, 16, 8);
+}
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+    return GetSSE(a, b, 8, 8);
+}
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+    return GetSSE(a, b, 4, 4);
+}
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// Hadamard transform
+// Returns the weighted sum of the absolute value of transformed coefficients.
+static int TTransform(const uint8_t* in, const uint16_t* w) {
+    int sum = 0;
+    int tmp[16];
+    int i;
+    // horizontal pass
+    for (i = 0; i < 4; ++i, in += BPS) {
+        const int a0 = in[0] + in[2];
+        const int a1 = in[1] + in[3];
+        const int a2 = in[1] - in[3];
+        const int a3 = in[0] - in[2];
+        tmp[0 + i * 4] = a0 + a1;
+        tmp[1 + i * 4] = a3 + a2;
+        tmp[2 + i * 4] = a3 - a2;
+        tmp[3 + i * 4] = a0 - a1;
+    }
+    // vertical pass
+    for (i = 0; i < 4; ++i, ++w) {
+        const int a0 = tmp[0 + i] + tmp[8 + i];
+        const int a1 = tmp[4 + i] + tmp[12 + i];
+        const int a2 = tmp[4 + i] - tmp[12 + i];
+        const int a3 = tmp[0 + i] - tmp[8 + i];
+        const int b0 = a0 + a1;
+        const int b1 = a3 + a2;
+        const int b2 = a3 - a2;
+        const int b3 = a0 - a1;
+
+        sum += w[0] * abs(b0);
+        sum += w[4] * abs(b1);
+        sum += w[8] * abs(b2);
+        sum += w[12] * abs(b3);
+    }
+    return sum;
+}
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    const int sum1 = TTransform(a, w);
+    const int sum2 = TTransform(b, w);
+    return abs(sum2 - sum1) >> 5;
+}
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    int D = 0;
+    int x, y;
+    for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+        for (x = 0; x < 16; x += 4) {
+            D += Disto4x4(a + x + y, b + x + y, w);
+        }
+    }
+    return D;
+}
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+static const uint8_t kZigzag[16] = {0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15};
+
+// Simple quantization
+static int QuantizeBlock(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) {
+    int last = -1;
+    int n;
+    for (n = 0; n < 16; ++n) {
+        const int j = kZigzag[n];
+        const int sign = (in[j] < 0);
+        const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+        if (coeff > mtx->zthresh_[j]) {
+            const uint32_t Q = mtx->q_[j];
+            const uint32_t iQ = mtx->iq_[j];
+            const uint32_t B = mtx->bias_[j];
+            int level = QUANTDIV(coeff, iQ, B);
+            if (level > MAX_LEVEL) level = MAX_LEVEL;
+            if (sign) level = -level;
+            in[j] = level * Q;
+            out[n] = level;
+            if (level) last = n;
+        } else {
+            out[n] = 0;
+            in[j] = 0;
+        }
+    }
+    return (last >= 0);
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32], const VP8Matrix* const mtx) {
+    int nz;
+    nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+    nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+    return nz;
+}
+
+static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) {
+    int n, last = -1;
+    for (n = 0; n < 16; ++n) {
+        const int j = kZigzag[n];
+        const int sign = (in[j] < 0);
+        const uint32_t coeff = sign ? -in[j] : in[j];
+        assert(mtx->sharpen_[j] == 0);
+        if (coeff > mtx->zthresh_[j]) {
+            const uint32_t Q = mtx->q_[j];
+            const uint32_t iQ = mtx->iq_[j];
+            const uint32_t B = mtx->bias_[j];
+            int level = QUANTDIV(coeff, iQ, B);
+            if (level > MAX_LEVEL) level = MAX_LEVEL;
+            if (sign) level = -level;
+            in[j] = level * Q;
+            out[n] = level;
+            if (level) last = n;
+        } else {
+            out[n] = 0;
+            in[j] = 0;
+        }
+    }
+    return (last >= 0);
+}
+
+//------------------------------------------------------------------------------
+// Block copy
+
+static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
+    int y;
+    for (y = 0; y < h; ++y) {
+        memcpy(dst, src, w);
+        src += BPS;
+        dst += BPS;
+    }
+}
+
+static void Copy4x4(const uint8_t* src, uint8_t* dst) {
+    Copy(src, dst, 4, 4);
+}
+
+static void Copy16x8(const uint8_t* src, uint8_t* dst) {
+    Copy(src, dst, 16, 8);
+}
+
+//------------------------------------------------------------------------------
+// Initialization
+
+// Speed-critical function pointers. We have to initialize them to the default
+// implementations within VP8EncDspInit().
+VP8CHisto VP8CollectHistogram;
+VP8Idct VP8ITransform;
+VP8Fdct VP8FTransform;
+VP8Fdct VP8FTransform2;
+VP8WHT VP8FTransformWHT;
+VP8Intra4Preds VP8EncPredLuma4;
+VP8IntraPreds VP8EncPredLuma16;
+VP8IntraPreds VP8EncPredChroma8;
+VP8Metric VP8SSE16x16;
+VP8Metric VP8SSE8x8;
+VP8Metric VP8SSE16x8;
+VP8Metric VP8SSE4x4;
+VP8WMetric VP8TDisto4x4;
+VP8WMetric VP8TDisto16x16;
+VP8QuantizeBlock VP8EncQuantizeBlock;
+VP8Quantize2Blocks VP8EncQuantize2Blocks;
+VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
+VP8BlockCopy VP8Copy4x4;
+VP8BlockCopy VP8Copy16x8;
+
+extern void VP8EncDspInitSSE2(void);
+extern void VP8EncDspInitSSE41(void);
+extern void VP8EncDspInitAVX2(void);
+extern void VP8EncDspInitNEON(void);
+extern void VP8EncDspInitMIPS32(void);
+extern void VP8EncDspInitMIPSdspR2(void);
+
+static volatile VP8CPUInfo enc_last_cpuinfo_used = (VP8CPUInfo)&enc_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
+    if (enc_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    VP8DspInit(); // common inverse transforms
+    InitTables();
+
+    // default C implementations
+    VP8CollectHistogram = CollectHistogram;
+    VP8ITransform = ITransform;
+    VP8FTransform = FTransform;
+    VP8FTransform2 = FTransform2;
+    VP8FTransformWHT = FTransformWHT;
+    VP8EncPredLuma4 = Intra4Preds;
+    VP8EncPredLuma16 = Intra16Preds;
+    VP8EncPredChroma8 = IntraChromaPreds;
+    VP8SSE16x16 = SSE16x16;
+    VP8SSE8x8 = SSE8x8;
+    VP8SSE16x8 = SSE16x8;
+    VP8SSE4x4 = SSE4x4;
+    VP8TDisto4x4 = Disto4x4;
+    VP8TDisto16x16 = Disto16x16;
+    VP8EncQuantizeBlock = QuantizeBlock;
+    VP8EncQuantize2Blocks = Quantize2Blocks;
+    VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
+    VP8Copy4x4 = Copy4x4;
+    VP8Copy16x8 = Copy16x8;
+
+    // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            VP8EncDspInitSSE2();
+#if defined(WEBP_USE_SSE41)
+            if (VP8GetCPUInfo(kSSE4_1)) {
+                VP8EncDspInitSSE41();
+            }
+#endif
+        }
+#endif
+#if defined(WEBP_USE_AVX2)
+        if (VP8GetCPUInfo(kAVX2)) {
+            VP8EncDspInitAVX2();
+        }
+#endif
+#if defined(WEBP_USE_NEON)
+        if (VP8GetCPUInfo(kNEON)) {
+            VP8EncDspInitNEON();
+        }
+#endif
+#if defined(WEBP_USE_MIPS32)
+        if (VP8GetCPUInfo(kMIPS32)) {
+            VP8EncDspInitMIPS32();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            VP8EncDspInitMIPSdspR2();
+        }
+#endif
+    }
+    enc_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/enc_avx2.c b/codec/L2/demos/webpEnc/host/src/dsp/enc_avx2.c
new file mode 100644
index 0000000000..b6b3251cac
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/enc_avx2.c
@@ -0,0 +1,21 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// AVX2 version of speed-critical encoding functions.
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_AVX2)
+
+#endif // WEBP_USE_AVX2
+
+//------------------------------------------------------------------------------
+// Entry point
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitAVX2)
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/enc_mips32.c b/codec/L2/demos/webpEnc/host/src/dsp/enc_mips32.c
new file mode 100644
index 0000000000..6fcb89ea4a
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/enc_mips32.c
@@ -0,0 +1,835 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of speed-critical encoding functions.
+//
+// Author(s): Djordje Pesut    (djordje.pesut@imgtec.com)
+//            Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+//            Slobodan Prijic  (slobodan.prijic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "./mips_macro.h"
+#include "../enc/vp8enci.h"
+#include "../enc/cost.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+// macro for one vertical pass in ITransformOne
+// MUL macro inlined
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to load from in buffer
+// TEMP0..TEMP3 - registers for corresponding tmp elements
+// TEMP4..TEMP5 - temporary registers
+#define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
+    "lh      %[temp16],      " #A                                    \
+    "(%[temp20])                 \n\t"                               \
+    "lh      %[temp18],      " #B                                    \
+    "(%[temp20])                 \n\t"                               \
+    "lh      %[temp17],      " #C                                    \
+    "(%[temp20])                 \n\t"                               \
+    "lh      %[temp19],      " #D                                    \
+    "(%[temp20])                 \n\t"                               \
+    "addu    %[" #TEMP4                                              \
+    "],    %[temp16],      %[temp18]       \n\t"                     \
+    "subu    %[temp16],      %[temp16],      %[temp18]         \n\t" \
+    "mul     %[" #TEMP0                                              \
+    "],    %[temp17],      %[kC2]          \n\t"                     \
+    "mul     %[temp18],      %[temp19],      %[kC1]            \n\t" \
+    "mul     %[temp17],      %[temp17],      %[kC1]            \n\t" \
+    "mul     %[temp19],      %[temp19],      %[kC2]            \n\t" \
+    "sra     %[" #TEMP0 "],    %[" #TEMP0                            \
+    "],    16            \n\n"                                       \
+    "sra     %[temp18],      %[temp18],      16                \n\n" \
+    "sra     %[temp17],      %[temp17],      16                \n\n" \
+    "sra     %[temp19],      %[temp19],      16                \n\n" \
+    "subu    %[" #TEMP2 "],    %[" #TEMP0                            \
+    "],    %[temp18]     \n\t"                                       \
+    "addu    %[" #TEMP3                                              \
+    "],    %[temp17],      %[temp19]       \n\t"                     \
+    "addu    %[" #TEMP0 "],    %[" #TEMP4 "],    %[" #TEMP3          \
+    "] \n\t"                                                         \
+    "addu    %[" #TEMP1 "],    %[temp16],      %[" #TEMP2            \
+    "]   \n\t"                                                       \
+    "subu    %[" #TEMP2 "],    %[temp16],      %[" #TEMP2            \
+    "]   \n\t"                                                       \
+    "subu    %[" #TEMP3 "],    %[" #TEMP4 "],    %[" #TEMP3 "] \n\t"
+
+// macro for one horizontal pass in ITransformOne
+// MUL and STORE macros inlined
+// a = clip_8b(a) is replaced with: a = max(a, 0); a = min(a, 255)
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A - offset in bytes to load from ref and store to dst buffer
+// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12)                                                    \
+    "addiu   %[" #TEMP0 "],    %[" #TEMP0                                                                  \
+    "],    4               \n\t"                                                                           \
+    "addu    %[temp16],      %[" #TEMP0 "],    %[" #TEMP8                                                  \
+    "]     \n\t"                                                                                           \
+    "subu    %[temp17],      %[" #TEMP0 "],    %[" #TEMP8                                                  \
+    "]     \n\t"                                                                                           \
+    "mul     %[" #TEMP0 "],    %[" #TEMP4                                                                  \
+    "],    %[kC2]          \n\t"                                                                           \
+    "mul     %[" #TEMP8 "],    %[" #TEMP12                                                                 \
+    "],   %[kC1]          \n\t"                                                                            \
+    "mul     %[" #TEMP4 "],    %[" #TEMP4                                                                  \
+    "],    %[kC1]          \n\t"                                                                           \
+    "mul     %[" #TEMP12 "],   %[" #TEMP12                                                                 \
+    "],   %[kC2]          \n\t"                                                                            \
+    "sra     %[" #TEMP0 "],    %[" #TEMP0                                                                  \
+    "],    16              \n\t"                                                                           \
+    "sra     %[" #TEMP8 "],    %[" #TEMP8                                                                  \
+    "],    16              \n\t"                                                                           \
+    "sra     %[" #TEMP4 "],    %[" #TEMP4                                                                  \
+    "],    16              \n\t"                                                                           \
+    "sra     %[" #TEMP12 "],   %[" #TEMP12                                                                 \
+    "],   16              \n\t"                                                                            \
+    "subu    %[temp18],      %[" #TEMP0 "],    %[" #TEMP8                                                  \
+    "]     \n\t"                                                                                           \
+    "addu    %[temp19],      %[" #TEMP4 "],    %[" #TEMP12                                                 \
+    "]    \n\t"                                                                                            \
+    "addu    %[" #TEMP0                                                                                    \
+    "],    %[temp16],      %[temp19]         \n\t"                                                         \
+    "addu    %[" #TEMP4                                                                                    \
+    "],    %[temp17],      %[temp18]         \n\t"                                                         \
+    "subu    %[" #TEMP8                                                                                    \
+    "],    %[temp17],      %[temp18]         \n\t"                                                         \
+    "subu    %[" #TEMP12                                                                                   \
+    "],   %[temp16],      %[temp19]         \n\t"                                                          \
+    "lw      %[temp20],      0(%[args])                          \n\t"                                     \
+    "sra     %[" #TEMP0 "],    %[" #TEMP0                                                                  \
+    "],    3               \n\t"                                                                           \
+    "sra     %[" #TEMP4 "],    %[" #TEMP4                                                                  \
+    "],    3               \n\t"                                                                           \
+    "sra     %[" #TEMP8 "],    %[" #TEMP8                                                                  \
+    "],    3               \n\t"                                                                           \
+    "sra     %[" #TEMP12 "],   %[" #TEMP12                                                                 \
+    "],   3               \n\t"                                                                            \
+    "lbu     %[temp16],      0+" XSTR(                                                                     \
+        BPS) "*" #A                                                                                        \
+             "(%[temp20])   \n\t"                                                                          \
+             "lbu     %[temp17],      1+" XSTR(                                                            \
+                 BPS) "*" #A                                                                               \
+                      "(%[temp20])   \n\t"                                                                 \
+                      "lbu     %[temp18],      2+" XSTR(                                                   \
+                          BPS) "*" #A                                                                      \
+                               "(%[temp20])   \n\t"                                                        \
+                               "lbu     %[temp19],      3+" XSTR(                                          \
+                                   BPS) "*" #A                                                             \
+                                        "(%[temp20])   \n\t"                                               \
+                                        "addu    %[" #TEMP0 "],    %[temp16],      %[" #TEMP0              \
+                                        "]     \n\t"                                                       \
+                                        "addu    %[" #TEMP4 "],    %[temp17],      %[" #TEMP4              \
+                                        "]     \n\t"                                                       \
+                                        "addu    %[" #TEMP8 "],    %[temp18],      %[" #TEMP8              \
+                                        "]     \n\t"                                                       \
+                                        "addu    %[" #TEMP12 "],   %[temp19],      %[" #TEMP12             \
+                                        "]    \n\t"                                                        \
+                                        "slt     %[temp16],      %[" #TEMP0                                \
+                                        "],    $zero             \n\t"                                     \
+                                        "slt     %[temp17],      %[" #TEMP4                                \
+                                        "],    $zero             \n\t"                                     \
+                                        "slt     %[temp18],      %[" #TEMP8                                \
+                                        "],    $zero             \n\t"                                     \
+                                        "slt     %[temp19],      %[" #TEMP12                               \
+                                        "],   $zero             \n\t"                                      \
+                                        "movn    %[" #TEMP0                                                \
+                                        "],    $zero,          %[temp16]         \n\t"                     \
+                                        "movn    %[" #TEMP4                                                \
+                                        "],    $zero,          %[temp17]         \n\t"                     \
+                                        "movn    %[" #TEMP8                                                \
+                                        "],    $zero,          %[temp18]         \n\t"                     \
+                                        "movn    %[" #TEMP12                                               \
+                                        "],   $zero,          %[temp19]         \n\t"                      \
+                                        "addiu   %[temp20],      $zero,          255                 \n\t" \
+                                        "slt     %[temp16],      %[" #TEMP0                                \
+                                        "],    %[temp20]         \n\t"                                     \
+                                        "slt     %[temp17],      %[" #TEMP4                                \
+                                        "],    %[temp20]         \n\t"                                     \
+                                        "slt     %[temp18],      %[" #TEMP8                                \
+                                        "],    %[temp20]         \n\t"                                     \
+                                        "slt     %[temp19],      %[" #TEMP12                               \
+                                        "],   %[temp20]         \n\t"                                      \
+                                        "movz    %[" #TEMP0                                                \
+                                        "],    %[temp20],      %[temp16]         \n\t"                     \
+                                        "movz    %[" #TEMP4                                                \
+                                        "],    %[temp20],      %[temp17]         \n\t"                     \
+                                        "lw      %[temp16],      8(%[args])                          \n\t" \
+                                        "movz    %[" #TEMP8                                                \
+                                        "],    %[temp20],      %[temp18]         \n\t"                     \
+                                        "movz    %[" #TEMP12                                               \
+                                        "],   %[temp20],      %[temp19]         \n\t"                      \
+                                        "sb      %[" #TEMP0                                                \
+                                        "],    0+" XSTR(BPS) "*" #A                                        \
+                                                             "(%[temp16]) \n\t"                            \
+                                                             "sb      %[" #TEMP4 "],    1+" XSTR(          \
+                                                                 BPS) "*" #A                               \
+                                                                      "(%[temp16]) \n\t"                   \
+                                                                      "sb      %[" #TEMP8 "],    2+" XSTR( \
+                                                                          BPS) "*" #A                      \
+                                                                               "(%[temp16]) \n\t"          \
+                                                                               "sb      %[" #TEMP12        \
+                                                                               "],   3+" XSTR(BPS) "*" #A  \
+                                                                                                   "(%[temp16]) \n\t"
+
+// Does one or two inverse transforms.
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, uint8_t* dst) {
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+    int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
+    int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
+    const int* args[3] = {(const int*)ref, (const int*)in, (const int*)dst};
+
+    __asm__ volatile(
+        "lw      %[temp20],      4(%[args])                      \n\t" VERTICAL_PASS(0, 16, 8, 24, temp4, temp0, temp1,
+                                                                                     temp2, temp3)
+            VERTICAL_PASS(2, 18, 10, 26, temp8, temp4, temp5, temp6, temp7)
+                VERTICAL_PASS(4, 20, 12, 28, temp12, temp8, temp9, temp10, temp11)
+                    VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)
+
+                        HORIZONTAL_PASS(0, temp0, temp4, temp8, temp12) HORIZONTAL_PASS(1, temp1, temp5, temp9, temp13)
+                            HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14)
+                                HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15)
+
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8), [temp9] "=&r"(temp9),
+          [temp10] "=&r"(temp10), [temp11] "=&r"(temp11), [temp12] "=&r"(temp12), [temp13] "=&r"(temp13),
+          [temp14] "=&r"(temp14), [temp15] "=&r"(temp15), [temp16] "=&r"(temp16), [temp17] "=&r"(temp17),
+          [temp18] "=&r"(temp18), [temp19] "=&r"(temp19), [temp20] "=&r"(temp20)
+        : [args] "r"(args), [kC1] "r"(kC1), [kC2] "r"(kC2)
+        : "memory", "hi", "lo");
+}
+
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, int do_two) {
+    ITransformOne(ref, in, dst);
+    if (do_two) {
+        ITransformOne(ref + 4, in + 16, dst + 4);
+    }
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+// macro for one pass through for loop in QuantizeBlock
+// QUANTDIV macro inlined
+// J - offset in bytes (kZigzag[n] * 2)
+// K - offset in bytes (kZigzag[n] * 4)
+// N - offset in bytes (n * 2)
+#define QUANTIZE_ONE(J, K, N)                                                 \
+    "lh           %[temp0],       " #J                                        \
+    "(%[ppin])                     \n\t"                                      \
+    "lhu          %[temp1],       " #J                                        \
+    "(%[ppsharpen])                \n\t"                                      \
+    "lw           %[temp2],       " #K                                        \
+    "(%[ppzthresh])                \n\t"                                      \
+    "sra          %[sign],        %[temp0],           15              \n\t"   \
+    "xor          %[coeff],       %[temp0],           %[sign]         \n\t"   \
+    "subu         %[coeff],       %[coeff],           %[sign]         \n\t"   \
+    "addu         %[coeff],       %[coeff],           %[temp1]        \n\t"   \
+    "slt          %[temp4],       %[temp2],           %[coeff]        \n\t"   \
+    "addiu        %[temp5],       $zero,              0               \n\t"   \
+    "addiu        %[level],       $zero,              0               \n\t"   \
+    "beqz         %[temp4],       2f                                  \n\t"   \
+    "lhu          %[temp1],       " #J                                        \
+    "(%[ppiq])                     \n\t"                                      \
+    "lw           %[temp2],       " #K                                        \
+    "(%[ppbias])                   \n\t"                                      \
+    "lhu          %[temp3],       " #J                                        \
+    "(%[ppq])                      \n\t"                                      \
+    "mul          %[level],       %[coeff],           %[temp1]        \n\t"   \
+    "addu         %[level],       %[level],           %[temp2]        \n\t"   \
+    "sra          %[level],       %[level],           17              \n\t"   \
+    "slt          %[temp4],       %[max_level],       %[level]        \n\t"   \
+    "movn         %[level],       %[max_level],       %[temp4]        \n\t"   \
+    "xor          %[level],       %[level],           %[sign]         \n\t"   \
+    "subu         %[level],       %[level],           %[sign]         \n\t"   \
+    "mul          %[temp5],       %[level],           %[temp3]        \n\t"   \
+    "2:                                                                 \n\t" \
+    "sh           %[temp5],       " #J                                        \
+    "(%[ppin])                     \n\t"                                      \
+    "sh           %[level],       " #N "(%[pout])                     \n\t"
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    int sign, coeff, level, i;
+    int max_level = MAX_LEVEL;
+
+    int16_t* ppin = &in[0];
+    int16_t* pout = &out[0];
+    const uint16_t* ppsharpen = &mtx->sharpen_[0];
+    const uint32_t* ppzthresh = &mtx->zthresh_[0];
+    const uint16_t* ppq = &mtx->q_[0];
+    const uint16_t* ppiq = &mtx->iq_[0];
+    const uint32_t* ppbias = &mtx->bias_[0];
+
+    __asm__ volatile(
+        QUANTIZE_ONE(0, 0, 0) QUANTIZE_ONE(2, 4, 2) QUANTIZE_ONE(8, 16, 4) QUANTIZE_ONE(16, 32, 6)
+            QUANTIZE_ONE(10, 20, 8) QUANTIZE_ONE(4, 8, 10) QUANTIZE_ONE(6, 12, 12) QUANTIZE_ONE(12, 24, 14)
+                QUANTIZE_ONE(18, 36, 16) QUANTIZE_ONE(24, 48, 18) QUANTIZE_ONE(26, 52, 20) QUANTIZE_ONE(20, 40, 22)
+                    QUANTIZE_ONE(14, 28, 24) QUANTIZE_ONE(22, 44, 26) QUANTIZE_ONE(28, 56, 28) QUANTIZE_ONE(30, 60, 30)
+
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [sign] "=&r"(sign), [coeff] "=&r"(coeff), [level] "=&r"(level)
+        : [pout] "r"(pout), [ppin] "r"(ppin), [ppiq] "r"(ppiq), [max_level] "r"(max_level), [ppbias] "r"(ppbias),
+          [ppzthresh] "r"(ppzthresh), [ppsharpen] "r"(ppsharpen), [ppq] "r"(ppq)
+        : "memory", "hi", "lo");
+
+    // moved out from macro to increase possibility for earlier breaking
+    for (i = 15; i >= 0; i--) {
+        if (out[i]) return 1;
+    }
+    return 0;
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32], const VP8Matrix* const mtx) {
+    int nz;
+    nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+    nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+    return nz;
+}
+
+#undef QUANTIZE_ONE
+
+// macro for one horizontal pass in Disto4x4 (TTransform)
+// two calls of function TTransform are merged into single one
+// A - offset in bytes to load from a and b buffers
+// E..H - offsets in bytes to store first results to tmp buffer
+// E1..H1 - offsets in bytes to store second results to tmp buffer
+#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1)                                                                 \
+    "lbu    %[temp0],  0+" XSTR(                                                                                       \
+        BPS) "*" #A                                                                                                    \
+             "(%[a])  \n\t"                                                                                            \
+             "lbu    %[temp1],  1+" XSTR(                                                                              \
+                 BPS) "*" #A                                                                                           \
+                      "(%[a])  \n\t"                                                                                   \
+                      "lbu    %[temp2],  2+" XSTR(                                                                     \
+                          BPS) "*" #A                                                                                  \
+                               "(%[a])  \n\t"                                                                          \
+                               "lbu    %[temp3],  3+" XSTR(                                                            \
+                                   BPS) "*" #A                                                                         \
+                                        "(%[a])  \n\t"                                                                 \
+                                        "lbu    %[temp4],  0+" XSTR(                                                   \
+                                            BPS) "*" #A                                                                \
+                                                 "(%[b])  \n\t"                                                        \
+                                                 "lbu    %[temp5],  1+" XSTR(                                          \
+                                                     BPS) "*" #A                                                       \
+                                                          "(%[b])  \n\t"                                               \
+                                                          "lbu    %[temp6],  2+" XSTR(                                 \
+                                                              BPS) "*" #A                                              \
+                                                                   "(%[b])  \n\t"                                      \
+                                                                   "lbu    %[temp7],  3+" XSTR(                        \
+                                                                       BPS) "*" #A                                     \
+                                                                            "(%[b])  \n\t"                             \
+                                                                            "addu   %[temp8],  %[temp0],    %[temp2] " \
+                                                                            "        \n\t"                             \
+                                                                            "subu   %[temp0],  %[temp0],    %[temp2] " \
+                                                                            "        \n\t"                             \
+                                                                            "addu   %[temp2],  %[temp1],    %[temp3] " \
+                                                                            "        \n\t"                             \
+                                                                            "subu   %[temp1],  %[temp1],    %[temp3] " \
+                                                                            "        \n\t"                             \
+                                                                            "addu   %[temp3],  %[temp4],    %[temp6] " \
+                                                                            "        \n\t"                             \
+                                                                            "subu   %[temp4],  %[temp4],    %[temp6] " \
+                                                                            "        \n\t"                             \
+                                                                            "addu   %[temp6],  %[temp5],    %[temp7] " \
+                                                                            "        \n\t"                             \
+                                                                            "subu   %[temp5],  %[temp5],    %[temp7] " \
+                                                                            "        \n\t"                             \
+                                                                            "addu   %[temp7],  %[temp8],    %[temp2] " \
+                                                                            "        \n\t"                             \
+                                                                            "subu   %[temp2],  %[temp8],    %[temp2] " \
+                                                                            "        \n\t"                             \
+                                                                            "addu   %[temp8],  %[temp0],    %[temp1] " \
+                                                                            "        \n\t"                             \
+                                                                            "subu   %[temp0],  %[temp0],    %[temp1] " \
+                                                                            "        \n\t"                             \
+                                                                            "addu   %[temp1],  %[temp3],    %[temp6] " \
+                                                                            "        \n\t"                             \
+                                                                            "subu   %[temp3],  %[temp3],    %[temp6] " \
+                                                                            "        \n\t"                             \
+                                                                            "addu   %[temp6],  %[temp4],    %[temp5] " \
+                                                                            "        \n\t"                             \
+                                                                            "subu   %[temp4],  %[temp4],    %[temp5] " \
+                                                                            "        \n\t"                             \
+                                                                            "sw     %[temp7],  " #E                    \
+                                                                            "(%[tmp])                \n\t"             \
+                                                                            "sw     %[temp2],  " #H                    \
+                                                                            "(%[tmp])                \n\t"             \
+                                                                            "sw     %[temp8],  " #F                    \
+                                                                            "(%[tmp])                \n\t"             \
+                                                                            "sw     %[temp0],  " #G                    \
+                                                                            "(%[tmp])                \n\t"             \
+                                                                            "sw     %[temp1],  " #E1                   \
+                                                                            "(%[tmp])               \n\t"              \
+                                                                            "sw     %[temp3],  " #H1                   \
+                                                                            "(%[tmp])               \n\t"              \
+                                                                            "sw     %[temp6],  " #F1                   \
+                                                                            "(%[tmp])               \n\t"              \
+                                                                            "sw     %[temp4],  " #G1                   \
+                                                                            "(%[tmp])               \n\t"
+
+// macro for one vertical pass in Disto4x4 (TTransform)
+// two calls of function TTransform are merged into single one
+// since only one accu is available in mips32r1 instruction set
+//   first is done second call of function TTransform and after
+//   that first one.
+//   const int sum1 = TTransform(a, w);
+//   const int sum2 = TTransform(b, w);
+//   return abs(sum2 - sum1) >> 5;
+//   (sum2 - sum1) is calculated with madds (sub2) and msubs (sub1)
+// A..D - offsets in bytes to load first results from tmp buffer
+// A1..D1 - offsets in bytes to load second results from tmp buffer
+// E..H - offsets in bytes to load from w buffer
+#define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H) \
+    "lw     %[temp0],  " #A1                                  \
+    "(%[tmp])         \n\t"                                   \
+    "lw     %[temp1],  " #C1                                  \
+    "(%[tmp])         \n\t"                                   \
+    "lw     %[temp2],  " #B1                                  \
+    "(%[tmp])         \n\t"                                   \
+    "lw     %[temp3],  " #D1                                  \
+    "(%[tmp])         \n\t"                                   \
+    "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"          \
+    "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"          \
+    "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"          \
+    "subu   %[temp2],  %[temp2],    %[temp3]   \n\t"          \
+    "addu   %[temp3],  %[temp8],    %[temp1]   \n\t"          \
+    "subu   %[temp8],  %[temp8],    %[temp1]   \n\t"          \
+    "addu   %[temp1],  %[temp0],    %[temp2]   \n\t"          \
+    "subu   %[temp0],  %[temp0],    %[temp2]   \n\t"          \
+    "sra    %[temp4],  %[temp3],    31         \n\t"          \
+    "sra    %[temp5],  %[temp1],    31         \n\t"          \
+    "sra    %[temp6],  %[temp0],    31         \n\t"          \
+    "sra    %[temp7],  %[temp8],    31         \n\t"          \
+    "xor    %[temp3],  %[temp3],    %[temp4]   \n\t"          \
+    "xor    %[temp1],  %[temp1],    %[temp5]   \n\t"          \
+    "xor    %[temp0],  %[temp0],    %[temp6]   \n\t"          \
+    "xor    %[temp8],  %[temp8],    %[temp7]   \n\t"          \
+    "subu   %[temp3],  %[temp3],    %[temp4]   \n\t"          \
+    "subu   %[temp1],  %[temp1],    %[temp5]   \n\t"          \
+    "subu   %[temp0],  %[temp0],    %[temp6]   \n\t"          \
+    "subu   %[temp8],  %[temp8],    %[temp7]   \n\t"          \
+    "lhu    %[temp4],  " #E                                   \
+    "(%[w])            \n\t"                                  \
+    "lhu    %[temp5],  " #F                                   \
+    "(%[w])            \n\t"                                  \
+    "lhu    %[temp6],  " #G                                   \
+    "(%[w])            \n\t"                                  \
+    "lhu    %[temp7],  " #H                                   \
+    "(%[w])            \n\t"                                  \
+    "madd   %[temp4],  %[temp3]                \n\t"          \
+    "madd   %[temp5],  %[temp1]                \n\t"          \
+    "madd   %[temp6],  %[temp0]                \n\t"          \
+    "madd   %[temp7],  %[temp8]                \n\t"          \
+    "lw     %[temp0],  " #A                                   \
+    "(%[tmp])          \n\t"                                  \
+    "lw     %[temp1],  " #C                                   \
+    "(%[tmp])          \n\t"                                  \
+    "lw     %[temp2],  " #B                                   \
+    "(%[tmp])          \n\t"                                  \
+    "lw     %[temp3],  " #D                                   \
+    "(%[tmp])          \n\t"                                  \
+    "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"          \
+    "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"          \
+    "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"          \
+    "subu   %[temp2],  %[temp2],    %[temp3]   \n\t"          \
+    "addu   %[temp3],  %[temp8],    %[temp1]   \n\t"          \
+    "subu   %[temp1],  %[temp8],    %[temp1]   \n\t"          \
+    "addu   %[temp8],  %[temp0],    %[temp2]   \n\t"          \
+    "subu   %[temp0],  %[temp0],    %[temp2]   \n\t"          \
+    "sra    %[temp2],  %[temp3],    31         \n\t"          \
+    "xor    %[temp3],  %[temp3],    %[temp2]   \n\t"          \
+    "subu   %[temp3],  %[temp3],    %[temp2]   \n\t"          \
+    "msub   %[temp4],  %[temp3]                \n\t"          \
+    "sra    %[temp2],  %[temp8],    31         \n\t"          \
+    "sra    %[temp3],  %[temp0],    31         \n\t"          \
+    "sra    %[temp4],  %[temp1],    31         \n\t"          \
+    "xor    %[temp8],  %[temp8],    %[temp2]   \n\t"          \
+    "xor    %[temp0],  %[temp0],    %[temp3]   \n\t"          \
+    "xor    %[temp1],  %[temp1],    %[temp4]   \n\t"          \
+    "subu   %[temp8],  %[temp8],    %[temp2]   \n\t"          \
+    "subu   %[temp0],  %[temp0],    %[temp3]   \n\t"          \
+    "subu   %[temp1],  %[temp1],    %[temp4]   \n\t"          \
+    "msub   %[temp5],  %[temp8]                \n\t"          \
+    "msub   %[temp6],  %[temp0]                \n\t"          \
+    "msub   %[temp7],  %[temp1]                \n\t"
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    int tmp[32];
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+
+    __asm__ volatile(
+    HORIZONTAL_PASS(0,   0,  4,  8, 12,    64,  68,  72,  76)
+    HORIZONTAL_PASS(1,  16, 20, 24, 28,    80,  84,  88,  92)
+    HORIZONTAL_PASS(2,  32, 36, 40, 44,    96, 100, 104, 108)
+    HORIZONTAL_PASS(3,  48, 52, 56, 60,   112, 116, 120, 124)
+    "mthi   $zero                             \n\t"
+    "mtlo   $zero                             \n\t"
+    VERTICAL_PASS( 0, 16, 32, 48,     64, 80,  96, 112,   0,  8, 16, 24)
+    VERTICAL_PASS( 4, 20, 36, 52,     68, 84, 100, 116,   2, 10, 18, 26)
+    VERTICAL_PASS( 8, 24, 40, 56,     72, 88, 104, 120,   4, 12, 20, 28)
+    VERTICAL_PASS(12, 28, 44, 60,     76, 92, 108, 124,   6, 14, 22, 30)
+    "mflo   %[temp0]                          \n\t"
+    "sra    %[temp1],  %[temp0],  31          \n\t"
+    "xor    %[temp0],  %[temp0],  %[temp1]    \n\t"
+    "subu   %[temp0],  %[temp0],  %[temp1]    \n\t"
+    "sra    %[temp0],  %[temp0],  5           \n\t"
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+    : [a]"r"(a), [b]"r"(b), [w]"r"(w), [tmp]"r"(tmp)
+    : "memory", "hi", "lo"
+  );
+
+    return temp0;
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    int D = 0;
+    int x, y;
+    for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+        for (x = 0; x < 16; x += 4) {
+            D += Disto4x4(a + x + y, b + x + y, w);
+        }
+    }
+    return D;
+}
+
+// macro for one horizontal pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A - offset in bytes to load from src and ref buffers
+// TEMP0..TEMP3 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3)                                                                 \
+    "lw     %[" #TEMP1                                                                                                 \
+    "],  0(%[args])                           \n\t"                                                                    \
+    "lw     %[" #TEMP2                                                                                                 \
+    "],  4(%[args])                           \n\t"                                                                    \
+    "lbu    %[temp16],    0+" XSTR(                                                                                    \
+        BPS) "*" #A "(%[" #TEMP1                                                                                       \
+             "])  \n\t"                                                                                                \
+             "lbu    %[temp17],    0+" XSTR(                                                                           \
+                 BPS) "*" #A "(%[" #TEMP2                                                                              \
+                      "])  \n\t"                                                                                       \
+                      "lbu    %[temp18],    1+" XSTR(                                                                  \
+                          BPS) "*" #A "(%[" #TEMP1                                                                     \
+                               "])  \n\t"                                                                              \
+                               "lbu    %[temp19],    1+" XSTR(                                                         \
+                                   BPS) "*" #A "(%[" #TEMP2                                                            \
+                                        "])  \n\t"                                                                     \
+                                        "subu   %[temp20],    %[temp16],    %[temp17]                \n\t"             \
+                                        "lbu    %[temp16],    2+" XSTR(                                                \
+                                            BPS) "*" #A "(%[" #TEMP1                                                   \
+                                                 "])  \n\t"                                                            \
+                                                 "lbu    %[temp17],    2+" XSTR(                                       \
+                                                     BPS) "*" #A "(%[" #TEMP2                                          \
+                                                          "])  \n\t"                                                   \
+                                                          "subu   %[" #TEMP0                                           \
+                                                          "],  %[temp18],    %[temp19]              \n\t"              \
+                                                          "lbu    %[temp18],    3+" XSTR(                              \
+                                                              BPS) "*" #A "(%[" #TEMP1                                 \
+                                                                   "])  \n\t"                                          \
+                                                                   "lbu    %[temp19],    3+" XSTR(                     \
+                                                                       BPS) "*" #A "(%[" #TEMP2                        \
+                                                                            "])  \n\t"                                 \
+                                                                            "subu   %[" #TEMP1                         \
+                                                                            "],  %[temp16],    %[temp17]             " \
+                                                                            " \n\t"                                    \
+                                                                            "subu   %[" #TEMP2                         \
+                                                                            "],  %[temp18],    %[temp19]             " \
+                                                                            " \n\t"                                    \
+                                                                            "addu   %[" #TEMP3                         \
+                                                                            "],  %[temp20],    %[" #TEMP2              \
+                                                                            "]          \n\t"                          \
+                                                                            "subu   %[" #TEMP2                         \
+                                                                            "],  %[temp20],    %[" #TEMP2              \
+                                                                            "]          \n\t"                          \
+                                                                            "addu   %[temp20],    %[" #TEMP0           \
+                                                                            "],  %[" #TEMP1                            \
+                                                                            "]          \n\t"                          \
+                                                                            "subu   %[" #TEMP0 "],  %[" #TEMP0         \
+                                                                            "],  %[" #TEMP1                            \
+                                                                            "]        \n\t"                            \
+                                                                            "mul    %[temp16],    %[" #TEMP2           \
+                                                                            "],  %[c5352]               \n\t"          \
+                                                                            "mul    %[temp17],    %[" #TEMP2           \
+                                                                            "],  %[c2217]               \n\t"          \
+                                                                            "mul    %[temp18],    %[" #TEMP0           \
+                                                                            "],  %[c5352]               \n\t"          \
+                                                                            "mul    %[temp19],    %[" #TEMP0           \
+                                                                            "],  %[c2217]               \n\t"          \
+                                                                            "addu   %[" #TEMP1 "],  %[" #TEMP3         \
+                                                                            "],  %[temp20]            \n\t"            \
+                                                                            "subu   %[temp20],    %[" #TEMP3           \
+                                                                            "],  %[temp20]              \n\t"          \
+                                                                            "sll    %[" #TEMP0 "],  %[" #TEMP1         \
+                                                                            "],  3                    \n\t"            \
+                                                                            "sll    %[" #TEMP2                         \
+                                                                            "],  %[temp20],    3                     " \
+                                                                            " \n\t"                                    \
+                                                                            "addiu  %[temp16],    %[temp16],    1812 " \
+                                                                            "                    \n\t"                 \
+                                                                            "addiu  %[temp17],    %[temp17],    937  " \
+                                                                            "                    \n\t"                 \
+                                                                            "addu   %[temp16],    %[temp16],    "      \
+                                                                            "%[temp19]                \n\t"            \
+                                                                            "subu   %[temp17],    %[temp17],    "      \
+                                                                            "%[temp18]                \n\t"            \
+                                                                            "sra    %[" #TEMP1                         \
+                                                                            "],  %[temp16],    9                     " \
+                                                                            " \n\t"                                    \
+                                                                            "sra    %[" #TEMP3                         \
+                                                                            "],  %[temp17],    9                     " \
+                                                                            " \n\t"
+
+// macro for one vertical pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to store to out buffer
+// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12)   \
+    "addu   %[temp16],    %[" #TEMP0 "],  %[" #TEMP12            \
+    "]   \n\t"                                                   \
+    "subu   %[temp19],    %[" #TEMP0 "],  %[" #TEMP12            \
+    "]   \n\t"                                                   \
+    "addu   %[temp17],    %[" #TEMP4 "],  %[" #TEMP8             \
+    "]    \n\t"                                                  \
+    "subu   %[temp18],    %[" #TEMP4 "],  %[" #TEMP8             \
+    "]    \n\t"                                                  \
+    "mul    %[" #TEMP8                                           \
+    "],  %[temp19],    %[c2217]         \n\t"                    \
+    "mul    %[" #TEMP12                                          \
+    "], %[temp18],    %[c2217]         \n\t"                     \
+    "mul    %[" #TEMP4                                           \
+    "],  %[temp19],    %[c5352]         \n\t"                    \
+    "mul    %[temp18],    %[temp18],    %[c5352]           \n\t" \
+    "addiu  %[temp16],    %[temp16],    7                  \n\t" \
+    "addu   %[" #TEMP0                                           \
+    "],  %[temp16],    %[temp17]        \n\t"                    \
+    "sra    %[" #TEMP0 "],  %[" #TEMP0                           \
+    "],  4              \n\t"                                    \
+    "addu   %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4           \
+    "]  \n\t"                                                    \
+    "subu   %[" #TEMP4                                           \
+    "],  %[temp16],    %[temp17]        \n\t"                    \
+    "sra    %[" #TEMP4 "],  %[" #TEMP4                           \
+    "],  4              \n\t"                                    \
+    "addiu  %[" #TEMP8 "],  %[" #TEMP8                           \
+    "],  30000          \n\t"                                    \
+    "addiu  %[" #TEMP12 "], %[" #TEMP12                          \
+    "], 12000          \n\t"                                     \
+    "addiu  %[" #TEMP8 "],  %[" #TEMP8                           \
+    "],  21000          \n\t"                                    \
+    "subu   %[" #TEMP8 "],  %[" #TEMP8                           \
+    "],  %[temp18]      \n\t"                                    \
+    "sra    %[" #TEMP12 "], %[" #TEMP12                          \
+    "], 16             \n\t"                                     \
+    "sra    %[" #TEMP8 "],  %[" #TEMP8                           \
+    "],  16             \n\t"                                    \
+    "addiu  %[temp16],    %[" #TEMP12                            \
+    "], 1                \n\t"                                   \
+    "movn   %[" #TEMP12                                          \
+    "], %[temp16],    %[temp19]        \n\t"                     \
+    "sh     %[" #TEMP0 "],  " #A                                 \
+    "(%[temp20])              \n\t"                              \
+    "sh     %[" #TEMP4 "],  " #C                                 \
+    "(%[temp20])              \n\t"                              \
+    "sh     %[" #TEMP8 "],  " #D                                 \
+    "(%[temp20])              \n\t"                              \
+    "sh     %[" #TEMP12 "], " #B "(%[temp20])              \n\t"
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+    int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
+    int temp17, temp18, temp19, temp20;
+    const int c2217 = 2217;
+    const int c5352 = 5352;
+    const int* const args[3] = {(const int*)src, (const int*)ref, (const int*)out};
+
+    __asm__ volatile(
+        HORIZONTAL_PASS(0, temp0, temp1, temp2, temp3) HORIZONTAL_PASS(1, temp4, temp5, temp6, temp7)
+            HORIZONTAL_PASS(2, temp8, temp9, temp10, temp11) HORIZONTAL_PASS(
+                3, temp12, temp13, temp14,
+                temp15) "lw   %[temp20],    8(%[args])                     \n\t" VERTICAL_PASS(0, 8, 16, 24, temp0,
+                                                                                               temp4, temp8, temp12)
+                VERTICAL_PASS(2, 10, 18, 26, temp1, temp5, temp9, temp13)
+                    VERTICAL_PASS(4, 12, 20, 28, temp2, temp6, temp10, temp14)
+                        VERTICAL_PASS(6, 14, 22, 30, temp3, temp7, temp11, temp15)
+
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8), [temp9] "=&r"(temp9),
+          [temp10] "=&r"(temp10), [temp11] "=&r"(temp11), [temp12] "=&r"(temp12), [temp13] "=&r"(temp13),
+          [temp14] "=&r"(temp14), [temp15] "=&r"(temp15), [temp16] "=&r"(temp16), [temp17] "=&r"(temp17),
+          [temp18] "=&r"(temp18), [temp19] "=&r"(temp19), [temp20] "=&r"(temp20)
+        : [args] "r"(args), [c2217] "r"(c2217), [c5352] "r"(c5352)
+        : "memory", "hi", "lo");
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+#if !defined(WORK_AROUND_GCC)
+
+#define GET_SSE_INNER(A, B, C, D)                            \
+    "lbu     %[temp0],    " #A                               \
+    "(%[a])                 \n\t"                            \
+    "lbu     %[temp1],    " #A                               \
+    "(%[b])                 \n\t"                            \
+    "lbu     %[temp2],    " #B                               \
+    "(%[a])                 \n\t"                            \
+    "lbu     %[temp3],    " #B                               \
+    "(%[b])                 \n\t"                            \
+    "lbu     %[temp4],    " #C                               \
+    "(%[a])                 \n\t"                            \
+    "lbu     %[temp5],    " #C                               \
+    "(%[b])                 \n\t"                            \
+    "lbu     %[temp6],    " #D                               \
+    "(%[a])                 \n\t"                            \
+    "lbu     %[temp7],    " #D                               \
+    "(%[b])                 \n\t"                            \
+    "subu    %[temp0],    %[temp0],     %[temp1]       \n\t" \
+    "subu    %[temp2],    %[temp2],     %[temp3]       \n\t" \
+    "subu    %[temp4],    %[temp4],     %[temp5]       \n\t" \
+    "subu    %[temp6],    %[temp6],     %[temp7]       \n\t" \
+    "madd    %[temp0],    %[temp0]                     \n\t" \
+    "madd    %[temp2],    %[temp2]                     \n\t" \
+    "madd    %[temp4],    %[temp4]                     \n\t" \
+    "madd    %[temp6],    %[temp6]                     \n\t"
+
+#define GET_SSE(A, B, C, D)               \
+    GET_SSE_INNER(A, A + 1, A + 2, A + 3) \
+    GET_SSE_INNER(B, B + 1, B + 2, B + 3) \
+    GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
+    GET_SSE_INNER(D, D + 1, D + 2, D + 3)
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+    int count;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    __asm__ volatile(
+        "mult   $zero,    $zero                            \n\t"
+
+        GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS) GET_SSE(
+            1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS) GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)
+            GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)
+                GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)
+                    GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
+                        GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)
+                            GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)
+                                GET_SSE(8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS)
+                                    GET_SSE(9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS)
+                                        GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
+                                            GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
+                                                GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS) GET_SSE(
+                                                    13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
+                                                    GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
+                                                        GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
+
+                                                            "mflo    %[count]                                   \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
+        : [a] "r"(a), [b] "r"(b)
+        : "memory", "hi", "lo");
+    return count;
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+    int count;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    __asm__ volatile("mult   $zero,    $zero                            \n\t"
+
+                     GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)
+                         GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)
+                             GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)
+                                 GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)
+                                     GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)
+                                         GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
+                                             GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)
+                                                 GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)
+
+                                                     "mflo    %[count]                                   \n\t"
+                     : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+                       [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7),
+                       [count] "=&r"(count)
+                     : [a] "r"(a), [b] "r"(b)
+                     : "memory", "hi", "lo");
+    return count;
+}
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+    int count;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    __asm__ volatile(
+        "mult   $zero,    $zero                            \n\t"
+
+        GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS) GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
+            GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS) GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
+
+                "mflo    %[count]                                   \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
+        : [a] "r"(a), [b] "r"(b)
+        : "memory", "hi", "lo");
+    return count;
+}
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+    int count;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    __asm__ volatile("mult   $zero,    $zero                            \n\t"
+
+                     GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
+
+                         "mflo    %[count]                                   \n\t"
+                     : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+                       [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7),
+                       [count] "=&r"(count)
+                     : [a] "r"(a), [b] "r"(b)
+                     : "memory", "hi", "lo");
+    return count;
+}
+
+#undef GET_SSE
+#undef GET_SSE_INNER
+
+#endif // !WORK_AROUND_GCC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPS32(void) {
+    VP8ITransform = ITransform;
+    VP8FTransform = FTransform;
+    VP8EncQuantizeBlock = QuantizeBlock;
+    VP8EncQuantize2Blocks = Quantize2Blocks;
+    VP8TDisto4x4 = Disto4x4;
+    VP8TDisto16x16 = Disto16x16;
+#if !defined(WORK_AROUND_GCC)
+    VP8SSE16x16 = SSE16x16;
+    VP8SSE8x8 = SSE8x8;
+    VP8SSE16x8 = SSE16x8;
+    VP8SSE4x4 = SSE4x4;
+#endif
+}
+
+#else // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitMIPS32)
+
+#endif // WEBP_USE_MIPS32
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/enc_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/enc_mips_dsp_r2.c
new file mode 100644
index 0000000000..7b3144304a
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/enc_mips_dsp_r2.c
@@ -0,0 +1,1566 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of speed-critical encoding functions.
+//
+// Author(s): Darko Laus (darko.laus@imgtec.com)
+//            Mirko Raus (mirko.raus@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./mips_macro.h"
+#include "../enc/cost.h"
+#include "../enc/vp8enci.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+// O - output
+// I - input (macro doesn't change it)
+#define ADD_SUB_HALVES_X4(O0, O1, O2, O3, O4, O5, O6, O7, I0, I1, I2, I3, I4, I5, I6, I7) \
+    "addq.ph          %[" #O0 "],   %[" #I0 "],  %[" #I1                                  \
+    "]     \n\t"                                                                          \
+    "subq.ph          %[" #O1 "],   %[" #I0 "],  %[" #I1                                  \
+    "]     \n\t"                                                                          \
+    "addq.ph          %[" #O2 "],   %[" #I2 "],  %[" #I3                                  \
+    "]     \n\t"                                                                          \
+    "subq.ph          %[" #O3 "],   %[" #I2 "],  %[" #I3                                  \
+    "]     \n\t"                                                                          \
+    "addq.ph          %[" #O4 "],   %[" #I4 "],  %[" #I5                                  \
+    "]     \n\t"                                                                          \
+    "subq.ph          %[" #O5 "],   %[" #I4 "],  %[" #I5                                  \
+    "]     \n\t"                                                                          \
+    "addq.ph          %[" #O6 "],   %[" #I6 "],  %[" #I7                                  \
+    "]     \n\t"                                                                          \
+    "subq.ph          %[" #O7 "],   %[" #I6 "],  %[" #I7 "]     \n\t"
+
+// IO - input/output
+#define ABS_X8(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7) \
+    "absq_s.ph        %[" #IO0 "],   %[" #IO0          \
+    "]                \n\t"                            \
+    "absq_s.ph        %[" #IO1 "],   %[" #IO1          \
+    "]                \n\t"                            \
+    "absq_s.ph        %[" #IO2 "],   %[" #IO2          \
+    "]                \n\t"                            \
+    "absq_s.ph        %[" #IO3 "],   %[" #IO3          \
+    "]                \n\t"                            \
+    "absq_s.ph        %[" #IO4 "],   %[" #IO4          \
+    "]                \n\t"                            \
+    "absq_s.ph        %[" #IO5 "],   %[" #IO5          \
+    "]                \n\t"                            \
+    "absq_s.ph        %[" #IO6 "],   %[" #IO6          \
+    "]                \n\t"                            \
+    "absq_s.ph        %[" #IO7 "],   %[" #IO7 "]                \n\t"
+
+// dpa.w.ph $ac0 temp0 ,temp1
+//  $ac += temp0[31..16] * temp1[31..16] + temp0[15..0] * temp1[15..0]
+// dpax.w.ph $ac0 temp0 ,temp1
+//  $ac += temp0[31..16] * temp1[15..0] + temp0[15..0] * temp1[31..16]
+// O - output
+// I - input (macro doesn't change it)
+#define MUL_HALF(O0, I0, I1, I2, I3, I4, I5, I6, I7, I8, I9, I10, I11, I12, I13, I14, I15) \
+    "mult            $ac0,      $zero,     $zero              \n\t"                        \
+    "dpa.w.ph        $ac0,      %[" #I2 "],  %[" #I0                                       \
+    "]       \n\t"                                                                         \
+    "dpax.w.ph       $ac0,      %[" #I5 "],  %[" #I6                                       \
+    "]       \n\t"                                                                         \
+    "dpa.w.ph        $ac0,      %[" #I8 "],  %[" #I9                                       \
+    "]       \n\t"                                                                         \
+    "dpax.w.ph       $ac0,      %[" #I11 "], %[" #I4                                       \
+    "]       \n\t"                                                                         \
+    "dpa.w.ph        $ac0,      %[" #I12 "], %[" #I7                                       \
+    "]       \n\t"                                                                         \
+    "dpax.w.ph       $ac0,      %[" #I13 "], %[" #I1                                       \
+    "]       \n\t"                                                                         \
+    "dpa.w.ph        $ac0,      %[" #I14 "], %[" #I3                                       \
+    "]       \n\t"                                                                         \
+    "dpax.w.ph       $ac0,      %[" #I15 "], %[" #I10                                      \
+    "]      \n\t"                                                                          \
+    "mflo            %[" #O0 "],  $ac0                        \n\t"
+
+#define OUTPUT_EARLY_CLOBBER_REGS_17()                                                                \
+    OUTPUT_EARLY_CLOBBER_REGS_10()                                                                    \
+    , [temp11] "=&r"(temp11), [temp12] "=&r"(temp12), [temp13] "=&r"(temp13), [temp14] "=&r"(temp14), \
+        [temp15] "=&r"(temp15), [temp16] "=&r"(temp16), [temp17] "=&r"(temp17)
+
+// macro for one horizontal pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A - offset in bytes to load from src and ref buffers
+// TEMP0..TEMP3 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3)                                                              \
+    "lw              %[" #TEMP0                                                                                     \
+    "],   0(%[args])                          \n\t"                                                                 \
+    "lw              %[" #TEMP1                                                                                     \
+    "],   4(%[args])                          \n\t"                                                                 \
+    "lw              %[" #TEMP2                                                                                     \
+    "],   " XSTR(BPS) "*" #A "(%[" #TEMP0                                                                           \
+                      "]) \n\t"                                                                                     \
+                      "lw              %[" #TEMP3                                                                   \
+                      "],   " XSTR(BPS) "*" #A "(%[" #TEMP1                                                         \
+                                        "]) \n\t"                                                                   \
+                                        "preceu.ph.qbl   %[" #TEMP0 "],   %[" #TEMP2                                \
+                                        "]                       \n\t"                                              \
+                                        "preceu.ph.qbl   %[" #TEMP1 "],   %[" #TEMP3                                \
+                                        "]                       \n\t"                                              \
+                                        "preceu.ph.qbr   %[" #TEMP2 "],   %[" #TEMP2                                \
+                                        "]                       \n\t"                                              \
+                                        "preceu.ph.qbr   %[" #TEMP3 "],   %[" #TEMP3                                \
+                                        "]                       \n\t"                                              \
+                                        "subq.ph         %[" #TEMP0 "],   %[" #TEMP0 "],   %[" #TEMP1               \
+                                        "]      \n\t"                                                               \
+                                        "subq.ph         %[" #TEMP2 "],   %[" #TEMP2 "],   %[" #TEMP3               \
+                                        "]      \n\t"                                                               \
+                                        "rotr            %[" #TEMP0 "],   %[" #TEMP0                                \
+                                        "],   16                 \n\t"                                              \
+                                        "addq.ph         %[" #TEMP1 "],   %[" #TEMP2 "],   %[" #TEMP0               \
+                                        "]      \n\t"                                                               \
+                                        "subq.ph         %[" #TEMP3 "],   %[" #TEMP2 "],   %[" #TEMP0               \
+                                        "]      \n\t"                                                               \
+                                        "seh             %[" #TEMP0 "],   %[" #TEMP1                                \
+                                        "]                       \n\t"                                              \
+                                        "sra             %[temp16],     %[" #TEMP1                                  \
+                                        "],   16                   \n\t"                                            \
+                                        "seh             %[temp19],     %[" #TEMP3                                  \
+                                        "]                         \n\t"                                            \
+                                        "sra             %[" #TEMP3 "],   %[" #TEMP3                                \
+                                        "],   16                 \n\t"                                              \
+                                        "subu            %[" #TEMP2 "],   %[" #TEMP0                                \
+                                        "],   %[temp16]          \n\t"                                              \
+                                        "addu            %[" #TEMP0 "],   %[" #TEMP0                                \
+                                        "],   %[temp16]          \n\t"                                              \
+                                        "mul             %[temp17],     %[temp19],     %[c2217]               \n\t" \
+                                        "mul             %[temp18],     %[" #TEMP3                                  \
+                                        "],   %[c5352]             \n\t"                                            \
+                                        "mul             %[" #TEMP1                                                 \
+                                        "],   %[temp19],     %[c5352]             \n\t"                             \
+                                        "mul             %[temp16],     %[" #TEMP3                                  \
+                                        "],   %[c2217]             \n\t"                                            \
+                                        "sll             %[" #TEMP2 "],   %[" #TEMP2                                \
+                                        "],   3                  \n\t"                                              \
+                                        "sll             %[" #TEMP0 "],   %[" #TEMP0                                \
+                                        "],   3                  \n\t"                                              \
+                                        "subu            %[" #TEMP3                                                 \
+                                        "],   %[temp17],     %[temp18]            \n\t"                             \
+                                        "addu            %[" #TEMP1 "],   %[temp16],     %[" #TEMP1                 \
+                                        "]        \n\t"                                                             \
+                                        "addiu           %[" #TEMP3 "],   %[" #TEMP3                                \
+                                        "],   937                \n\t"                                              \
+                                        "addiu           %[" #TEMP1 "],   %[" #TEMP1                                \
+                                        "],   1812               \n\t"                                              \
+                                        "sra             %[" #TEMP3 "],   %[" #TEMP3                                \
+                                        "],   9                  \n\t"                                              \
+                                        "sra             %[" #TEMP1 "],   %[" #TEMP1 "],   9                  \n\t"
+
+// macro for one vertical pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to store to out buffer
+// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12)              \
+    "addu            %[temp16],     %[" #TEMP0 "],   %[" #TEMP12            \
+    "]   \n\t"                                                              \
+    "subu            %[temp19],     %[" #TEMP0 "],   %[" #TEMP12            \
+    "]   \n\t"                                                              \
+    "addu            %[temp17],     %[" #TEMP4 "],   %[" #TEMP8             \
+    "]    \n\t"                                                             \
+    "subu            %[temp18],     %[" #TEMP4 "],   %[" #TEMP8             \
+    "]    \n\t"                                                             \
+    "mul             %[" #TEMP8                                             \
+    "],   %[temp19],     %[c2217]         \n\t"                             \
+    "mul             %[" #TEMP12                                            \
+    "],  %[temp18],     %[c2217]         \n\t"                              \
+    "mul             %[" #TEMP4                                             \
+    "],   %[temp19],     %[c5352]         \n\t"                             \
+    "mul             %[temp18],     %[temp18],     %[c5352]           \n\t" \
+    "addiu           %[temp16],     %[temp16],     7                  \n\t" \
+    "addu            %[" #TEMP0                                             \
+    "],   %[temp16],     %[temp17]        \n\t"                             \
+    "sra             %[" #TEMP0 "],   %[" #TEMP0                            \
+    "],   4              \n\t"                                              \
+    "addu            %[" #TEMP12 "],  %[" #TEMP12 "],  %[" #TEMP4           \
+    "]  \n\t"                                                               \
+    "subu            %[" #TEMP4                                             \
+    "],   %[temp16],     %[temp17]        \n\t"                             \
+    "sra             %[" #TEMP4 "],   %[" #TEMP4                            \
+    "],   4              \n\t"                                              \
+    "addiu           %[" #TEMP8 "],   %[" #TEMP8                            \
+    "],   30000          \n\t"                                              \
+    "addiu           %[" #TEMP12 "],  %[" #TEMP12                           \
+    "],  12000          \n\t"                                               \
+    "addiu           %[" #TEMP8 "],   %[" #TEMP8                            \
+    "],   21000          \n\t"                                              \
+    "subu            %[" #TEMP8 "],   %[" #TEMP8                            \
+    "],   %[temp18]      \n\t"                                              \
+    "sra             %[" #TEMP12 "],  %[" #TEMP12                           \
+    "],  16             \n\t"                                               \
+    "sra             %[" #TEMP8 "],   %[" #TEMP8                            \
+    "],   16             \n\t"                                              \
+    "addiu           %[temp16],     %[" #TEMP12                             \
+    "],  1                \n\t"                                             \
+    "movn            %[" #TEMP12                                            \
+    "],  %[temp16],     %[temp19]        \n\t"                              \
+    "sh              %[" #TEMP0 "],   " #A                                  \
+    "(%[temp20])               \n\t"                                        \
+    "sh              %[" #TEMP4 "],   " #C                                  \
+    "(%[temp20])               \n\t"                                        \
+    "sh              %[" #TEMP8 "],   " #D                                  \
+    "(%[temp20])               \n\t"                                        \
+    "sh              %[" #TEMP12 "],  " #B "(%[temp20])               \n\t"
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+    const int c2217 = 2217;
+    const int c5352 = 5352;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+    int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
+    int temp17, temp18, temp19, temp20;
+    const int* const args[3] = {(const int*)src, (const int*)ref, (const int*)out};
+
+    __asm__ volatile(
+        HORIZONTAL_PASS(0, temp0, temp1, temp2, temp3) HORIZONTAL_PASS(1, temp4, temp5, temp6, temp7)
+            HORIZONTAL_PASS(2, temp8, temp9, temp10, temp11) HORIZONTAL_PASS(
+                3, temp12, temp13, temp14,
+                temp15) "lw            %[temp20],     8(%[args])                  \n\t" VERTICAL_PASS(0, 8, 16, 24,
+                                                                                                      temp0, temp4,
+                                                                                                      temp8, temp12)
+                VERTICAL_PASS(2, 10, 18, 26, temp1, temp5, temp9, temp13)
+                    VERTICAL_PASS(4, 12, 20, 28, temp2, temp6, temp10, temp14)
+                        VERTICAL_PASS(6, 14, 22, 30, temp3, temp7, temp11, temp15) OUTPUT_EARLY_CLOBBER_REGS_18(),
+        [temp0] "=&r"(temp0), [temp19] "=&r"(temp19), [temp20] "=&r"(temp20)
+        : [args] "r"(args), [c2217] "r"(c2217), [c5352] "r"(c5352)
+        : "memory", "hi", "lo");
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, uint8_t* dst) {
+    int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+    int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
+
+    __asm__ volatile (
+    "ulw              %[temp1],   0(%[in])                 \n\t"
+    "ulw              %[temp2],   16(%[in])                \n\t"
+    LOAD_IN_X2(temp5, temp6, 24, 26)
+    ADD_SUB_HALVES(temp3, temp4, temp1, temp2)
+    LOAD_IN_X2(temp1, temp2, 8, 10)
+    MUL_SHIFT_SUM(temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14,
+                  temp10, temp8, temp9, temp7, temp1, temp2, temp5, temp6,
+                  temp13, temp11, temp14, temp12)
+    INSERT_HALF_X2(temp8, temp7, temp10, temp9)
+    "ulw              %[temp17],  4(%[in])                 \n\t"
+    "ulw              %[temp18],  20(%[in])                \n\t"
+    ADD_SUB_HALVES(temp1, temp2, temp3, temp8)
+    ADD_SUB_HALVES(temp5, temp6, temp4, temp7)
+    ADD_SUB_HALVES(temp7, temp8, temp17, temp18)
+    LOAD_IN_X2(temp17, temp18, 12, 14)
+    LOAD_IN_X2(temp9, temp10, 28, 30)
+    MUL_SHIFT_SUM(temp11, temp12, temp13, temp14, temp15, temp16, temp4, temp17,
+                  temp12, temp14, temp11, temp13, temp17, temp18, temp9, temp10,
+                  temp15, temp4, temp16, temp17)
+    INSERT_HALF_X2(temp11, temp12, temp13, temp14)
+    ADD_SUB_HALVES(temp17, temp8, temp8, temp11)
+    ADD_SUB_HALVES(temp3, temp4, temp7, temp12)
+
+    // horizontal
+    SRA_16(temp9, temp10, temp11, temp12, temp1, temp2, temp5, temp6)
+    INSERT_HALF_X2(temp1, temp6, temp5, temp2)
+    SRA_16(temp13, temp14, temp15, temp16, temp3, temp4, temp17, temp8)
+    "repl.ph          %[temp2],   0x4                      \n\t"
+    INSERT_HALF_X2(temp3, temp8, temp17, temp4)
+    "addq.ph          %[temp1],   %[temp1],  %[temp2]      \n\t"
+    "addq.ph          %[temp6],   %[temp6],  %[temp2]      \n\t"
+    ADD_SUB_HALVES(temp2, temp4, temp1, temp3)
+    ADD_SUB_HALVES(temp5, temp7, temp6, temp8)
+    MUL_SHIFT_SUM(temp1, temp3, temp6, temp8, temp9, temp13, temp17, temp18,
+                  temp3, temp13, temp1, temp9, temp9, temp13, temp11, temp15,
+                  temp6, temp17, temp8, temp18)
+    MUL_SHIFT_SUM(temp6, temp8, temp18, temp17, temp11, temp15, temp12, temp16,
+                  temp8, temp15, temp6, temp11, temp12, temp16, temp10, temp14,
+                  temp18, temp12, temp17, temp16)
+    INSERT_HALF_X2(temp1, temp3, temp9, temp13)
+    INSERT_HALF_X2(temp6, temp8, temp11, temp15)
+    SHIFT_R_SUM_X2(temp9, temp10, temp11, temp12, temp13, temp14, temp15,
+                   temp16, temp2, temp4, temp5, temp7, temp3, temp1, temp8,
+                   temp6)
+    PACK_2_HALVES_TO_WORD(temp1, temp2, temp3, temp4, temp9, temp12, temp13,
+                          temp16, temp11, temp10, temp15, temp14)
+    LOAD_WITH_OFFSET_X4(temp10, temp11, temp14, temp15, ref,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    CONVERT_2_BYTES_TO_HALF(temp5, temp6, temp7, temp8, temp17, temp18, temp10,
+                            temp11, temp10, temp11, temp14, temp15)
+    STORE_SAT_SUM_X2(temp5, temp6, temp7, temp8, temp17, temp18, temp10, temp11,
+                     temp9, temp12, temp1, temp2, temp13, temp16, temp3, temp4,
+                     dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_18()
+    : [dst]"r"(dst), [in]"r"(in), [kC1]"r"(kC1), [kC2]"r"(kC2), [ref]"r"(ref)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, int do_two) {
+    ITransformOne(ref, in, dst);
+    if (do_two) {
+        ITransformOne(ref + 4, in + 16, dst + 4);
+    }
+}
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+    int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
+
+    __asm__ volatile(
+        LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, a, 0, 0, 0, 0, 0, 1, 2, 3, BPS) CONVERT_2_BYTES_TO_HALF(
+            temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp1, temp2, temp3,
+            temp4) ADD_SUB_HALVES_X4(temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp5, temp6, temp7, temp8,
+                                     temp9, temp10, temp11, temp12)
+            PACK_2_HALVES_TO_WORD(temp9, temp10, temp11, temp12, temp1, temp3, temp5, temp7, temp2, temp4, temp6,
+                                  temp8) ADD_SUB_HALVES_X4(temp2, temp4, temp6, temp8, temp9, temp1, temp3, temp10,
+                                                           temp1, temp9, temp3, temp10, temp5, temp11, temp7, temp12)
+                ADD_SUB_HALVES_X4(temp5, temp11, temp7, temp2, temp9, temp3, temp6, temp12, temp2, temp9, temp6, temp3,
+                                  temp4, temp1, temp8, temp10)
+                    ADD_SUB_HALVES_X4(temp1, temp4, temp10, temp8, temp7, temp11, temp5, temp2, temp5, temp7, temp11,
+                                      temp2, temp9, temp6, temp3,
+                                      temp12) ABS_X8(temp1, temp4, temp10, temp8, temp7, temp11, temp5, temp2)
+                        LOAD_WITH_OFFSET_X4(temp3, temp6, temp9, temp12, w, 0, 4, 8, 12, 0, 0, 0, 0, 0)
+                            LOAD_WITH_OFFSET_X4(temp13, temp14, temp15, temp16, w, 0, 4, 8, 12, 1, 1, 1, 1, 16)
+                                MUL_HALF(temp17, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10,
+                                         temp11, temp12, temp13, temp14, temp15,
+                                         temp16) LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, b,
+                                                                     0, 0, 0, 0, 0, 1, 2, 3, BPS)
+                                    CONVERT_2_BYTES_TO_HALF(temp5, temp6, temp7, temp8,
+                                                            temp9, temp10, temp11, temp12, temp1, temp2, temp3,
+                                                            temp4) ADD_SUB_HALVES_X4(temp1, temp2, temp3, temp4, temp5,
+                                                                                     temp6, temp7, temp8, temp5, temp6,
+                                                                                     temp7, temp8, temp9,
+                                                                                     temp10, temp11, temp12)
+                                        PACK_2_HALVES_TO_WORD(temp9, temp10, temp11, temp12, temp1,
+                                                              temp3, temp5, temp7, temp2, temp4, temp6, temp8)
+                                            ADD_SUB_HALVES_X4(temp2, temp4, temp6, temp8, temp9, temp1, temp3, temp10,
+                                                              temp1, temp9, temp3, temp10, temp5, temp11, temp7, temp12)
+                                                ADD_SUB_HALVES_X4(temp5, temp11, temp7, temp2, temp9, temp3,
+                                                                  temp6, temp12, temp2, temp9, temp6, temp3, temp4,
+                                                                  temp1, temp8, temp10)
+                                                    ADD_SUB_HALVES_X4(temp1, temp4, temp10, temp8, temp7, temp11, temp5,
+                                                                      temp2, temp5, temp7, temp11, temp2, temp9, temp6,
+                                                                      temp3, temp12)
+                                                        ABS_X8(temp1, temp4, temp10, temp8, temp7, temp11, temp5, temp2)
+                                                            LOAD_WITH_OFFSET_X4(temp3, temp6, temp9, temp12, w, 0, 4, 8,
+                                                                                12, 0, 0, 0, 0, 0)
+                                                                LOAD_WITH_OFFSET_X4(temp13, temp14, temp15, temp16, w,
+                                                                                    0, 4, 8, 12, 1, 1, 1, 1, 16)
+                                                                    MUL_HALF(temp3, temp1, temp2, temp3, temp4, temp5,
+                                                                             temp6, temp7, temp8, temp9, temp10, temp11,
+                                                                             temp12, temp13, temp14, temp15, temp16)
+                                                                        OUTPUT_EARLY_CLOBBER_REGS_17()
+        : [a] "r"(a), [b] "r"(b), [w] "r"(w)
+        : "memory", "hi", "lo");
+    return abs(temp3 - temp17) >> 5;
+}
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    int D = 0;
+    int x, y;
+    for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+        for (x = 0; x < 16; x += 4) {
+            D += Disto4x4(a + x + y, b + x + y, w);
+        }
+    }
+    return D;
+}
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+#define FILL_PART(J, SIZE) \
+    "usw        %[value],  0+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
+    "usw        %[value],  4+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
+  ".if " #SIZE " == 16                                     \n\t"      \
+    "usw        %[value],  8+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
+    "usw        %[value], 12+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
+  ".endif                                                  \n\t"
+
+#define FILL_8_OR_16(DST, VALUE, SIZE)                                                                               \
+    do {                                                                                                             \
+        int value = (VALUE);                                                                                         \
+        __asm__ volatile(                                                                                            \
+            "replv.qb   %[value],  %[value]                      \n\t" FILL_PART(0, SIZE) FILL_PART(1, SIZE)         \
+                FILL_PART(2, SIZE) FILL_PART(3, SIZE) FILL_PART(4, SIZE) FILL_PART(5, SIZE) FILL_PART(6, SIZE)       \
+                    FILL_PART(7, SIZE) ".if " #SIZE " == 16                                   \n\t" FILL_PART(8, 16) \
+                        FILL_PART(9, 16) FILL_PART(10, 16) FILL_PART(11, 16) FILL_PART(12, 16) FILL_PART(13, 16)     \
+                            FILL_PART(14, 16)                                                                        \
+                                FILL_PART(15, 16) ".endif                                                \n\t"       \
+            : [value] "+&r"(value)                                                                                   \
+            : [dst] "r"((DST))                                                                                       \
+            : "memory");                                                                                             \
+    } while (0)
+
+#define VERTICAL_PRED(DST, TOP, SIZE)                                                \
+    static WEBP_INLINE void VerticalPred##SIZE(uint8_t*(DST), const uint8_t*(TOP)) { \
+        int j;                                                                       \
+        if ((TOP)) {                                                                 \
+            for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE));     \
+        } else {                                                                     \
+            FILL_8_OR_16((DST), 127, (SIZE));                                        \
+        }                                                                            \
+    }
+
+VERTICAL_PRED(dst, top, 8)
+VERTICAL_PRED(dst, top, 16)
+
+#undef VERTICAL_PRED
+
+#define HORIZONTAL_PRED(DST, LEFT, SIZE)                                                \
+    static WEBP_INLINE void HorizontalPred##SIZE(uint8_t*(DST), const uint8_t*(LEFT)) { \
+        if (LEFT) {                                                                     \
+            int j;                                                                      \
+            for (j = 0; j < (SIZE); ++j) {                                              \
+                memset((DST) + j * BPS, (LEFT)[j], (SIZE));                             \
+            }                                                                           \
+        } else {                                                                        \
+            FILL_8_OR_16((DST), 129, (SIZE));                                           \
+        }                                                                               \
+    }
+
+HORIZONTAL_PRED(dst, left, 8)
+HORIZONTAL_PRED(dst, left, 16)
+
+#undef HORIZONTAL_PRED
+
+#define CLIPPING()                                               \
+    "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t" \
+    "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t" \
+    "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t" \
+    "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t" \
+    "addu.ph         %[temp2],   %[temp2],   %[leftY_1]    \n\t" \
+    "addu.ph         %[temp0],   %[temp0],   %[leftY_1]    \n\t" \
+    "addu.ph         %[temp3],   %[temp3],   %[leftY_1]    \n\t" \
+    "addu.ph         %[temp1],   %[temp1],   %[leftY_1]    \n\t" \
+    "shll_s.ph       %[temp2],   %[temp2],   7             \n\t" \
+    "shll_s.ph       %[temp0],   %[temp0],   7             \n\t" \
+    "shll_s.ph       %[temp3],   %[temp3],   7             \n\t" \
+    "shll_s.ph       %[temp1],   %[temp1],   7             \n\t" \
+    "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t" \
+    "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"
+
+#define CLIP_8B_TO_DST(DST, LEFT, TOP, SIZE)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  \
+    do {                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      \
+        int leftY_1 = ((int)(LEFT)[y] << 16) + (LEFT)[y];                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     \
+        int temp0, temp1, temp2, temp3;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       \
+        __asm__ volatile (                                                           \
+    "replv.ph        %[leftY_1], %[leftY_1]              \n\t"                 \
+    "ulw             %[temp0],   0(%[top])               \n\t"                 \
+    "ulw             %[temp1],   4(%[top])               \n\t"                 \
+    "subu.ph         %[leftY_1], %[leftY_1], %[left_1]   \n\t"                 \
+    CLIPPING()                                                                 \
+    "usw             %[temp0],   0(%[dst])               \n\t"                 \
+    "usw             %[temp1],   4(%[dst])               \n\t"                 \
+  ".if " #SIZE " == 16                                   \n\t"                 \
+    "ulw             %[temp0],   8(%[top])               \n\t"                 \
+    "ulw             %[temp1],   12(%[top])              \n\t"                 \
+    CLIPPING()                                                                 \
+    "usw             %[temp0],   8(%[dst])               \n\t"                 \
+    "usw             %[temp1],   12(%[dst])              \n\t"                 \
+  ".endif                                                \n\t"                 \
+    : [leftY_1]"+&r"(leftY_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),       \
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)                                 \
+    : [left_1]"r"(left_1), [top]"r"((TOP)), [dst]"r"((DST))                    \
+    : "memory"                                                                 \
+  ); \
+    } while (0)
+
+#define CLIP_TO_DST(DST, LEFT, TOP, SIZE)                        \
+    do {                                                         \
+        int y;                                                   \
+        const int left_1 = ((int)(LEFT)[-1] << 16) + (LEFT)[-1]; \
+        for (y = 0; y < (SIZE); ++y) {                           \
+            CLIP_8B_TO_DST((DST), (LEFT), (TOP), (SIZE));        \
+            (DST) += BPS;                                        \
+        }                                                        \
+    } while (0)
+
+#define TRUE_MOTION(DST, LEFT, TOP, SIZE)                                                                \
+    static WEBP_INLINE void TrueMotion##SIZE(uint8_t*(DST), const uint8_t*(LEFT), const uint8_t*(TOP)) { \
+        if ((LEFT) != NULL) {                                                                            \
+            if ((TOP) != NULL) {                                                                         \
+                CLIP_TO_DST((DST), (LEFT), (TOP), (SIZE));                                               \
+            } else {                                                                                     \
+                HorizontalPred##SIZE((DST), (LEFT));                                                     \
+            }                                                                                            \
+        } else {                                                                                         \
+            /* true motion without left samples (hence: with default 129 value)    */                    \
+            /* is equivalent to VE prediction where you just copy the top samples. */                    \
+            /* Note that if top samples are not available, the default value is    */                    \
+            /* then 129, and not 127 as in the VerticalPred case.                  */                    \
+            if ((TOP) != NULL) {                                                                         \
+                VerticalPred##SIZE((DST), (TOP));                                                        \
+            } else {                                                                                     \
+                FILL_8_OR_16((DST), 129, (SIZE));                                                        \
+            }                                                                                            \
+        }                                                                                                \
+    }
+
+TRUE_MOTION(dst, left, top, 8)
+TRUE_MOTION(dst, left, top, 16)
+
+#undef TRUE_MOTION
+#undef CLIP_TO_DST
+#undef CLIP_8B_TO_DST
+#undef CLIPPING
+
+static WEBP_INLINE void DCMode16(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    int DC, DC1;
+    int temp0, temp1, temp2, temp3;
+
+    __asm__ volatile(
+    "beqz        %[top],   2f                  \n\t"
+    LOAD_WITH_OFFSET_X4(temp0, temp1, temp2, temp3, top,
+                        0, 4, 8, 12,
+                        0, 0, 0, 0,
+                        0)
+    "raddu.w.qb  %[temp0], %[temp0]            \n\t"
+    "raddu.w.qb  %[temp1], %[temp1]            \n\t"
+    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+    "addu        %[temp0], %[temp0], %[temp1]  \n\t"
+    "addu        %[temp2], %[temp2], %[temp3]  \n\t"
+    "addu        %[DC],    %[temp0], %[temp2]  \n\t"
+    "move        %[DC1],   %[DC]               \n\t"
+    "beqz        %[left],  1f                  \n\t"
+    LOAD_WITH_OFFSET_X4(temp0, temp1, temp2, temp3, left,
+                        0, 4, 8, 12,
+                        0, 0, 0, 0,
+                        0)
+    "raddu.w.qb  %[temp0], %[temp0]            \n\t"
+    "raddu.w.qb  %[temp1], %[temp1]            \n\t"
+    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+    "addu        %[temp0], %[temp0], %[temp1]  \n\t"
+    "addu        %[temp2], %[temp2], %[temp3]  \n\t"
+    "addu        %[DC1],   %[temp0], %[temp2]  \n\t"
+  "1:                                          \n\t"
+    "addu        %[DC],   %[DC],     %[DC1]    \n\t"
+    "j           3f                            \n\t"
+  "2:                                          \n\t"
+    "beqz        %[left],  4f                  \n\t"
+    LOAD_WITH_OFFSET_X4(temp0, temp1, temp2, temp3, left,
+                        0, 4, 8, 12,
+                        0, 0, 0, 0,
+                        0)
+    "raddu.w.qb  %[temp0], %[temp0]            \n\t"
+    "raddu.w.qb  %[temp1], %[temp1]            \n\t"
+    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+    "addu        %[temp0], %[temp0], %[temp1]  \n\t"
+    "addu        %[temp2], %[temp2], %[temp3]  \n\t"
+    "addu        %[DC],    %[temp0], %[temp2]  \n\t"
+    "addu        %[DC],    %[DC],    %[DC]     \n\t"
+  "3:                                          \n\t"
+    "shra_r.w    %[DC],    %[DC],    5         \n\t"
+    "j           5f                            \n\t"
+  "4:                                          \n\t"
+    "li          %[DC],    0x80                \n\t"
+  "5:                                          \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [DC]"=&r"(DC),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [DC1]"=&r"(DC1)
+    : [left]"r"(left), [top]"r"(top)
+    : "memory"
+  );
+
+    FILL_8_OR_16(dst, DC, 16);
+}
+
+static WEBP_INLINE void DCMode8(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    int DC, DC1;
+    int temp0, temp1, temp2, temp3;
+
+    __asm__ volatile(
+        "beqz        %[top],   2f                  \n\t"
+        "ulw         %[temp0], 0(%[top])           \n\t"
+        "ulw         %[temp1], 4(%[top])           \n\t"
+        "raddu.w.qb  %[temp0], %[temp0]            \n\t"
+        "raddu.w.qb  %[temp1], %[temp1]            \n\t"
+        "addu        %[DC],    %[temp0], %[temp1]  \n\t"
+        "move        %[DC1],   %[DC]               \n\t"
+        "beqz        %[left],  1f                  \n\t"
+        "ulw         %[temp2], 0(%[left])          \n\t"
+        "ulw         %[temp3], 4(%[left])          \n\t"
+        "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+        "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+        "addu        %[DC1],   %[temp2], %[temp3]  \n\t"
+        "1:                                          \n\t"
+        "addu        %[DC],    %[DC],    %[DC1]    \n\t"
+        "j           3f                            \n\t"
+        "2:                                          \n\t"
+        "beqz        %[left],  4f                  \n\t"
+        "ulw         %[temp2], 0(%[left])          \n\t"
+        "ulw         %[temp3], 4(%[left])          \n\t"
+        "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+        "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+        "addu        %[DC],    %[temp2], %[temp3]  \n\t"
+        "addu        %[DC],    %[DC],    %[DC]     \n\t"
+        "3:                                          \n\t"
+        "shra_r.w    %[DC], %[DC], 4               \n\t"
+        "j           5f                            \n\t"
+        "4:                                          \n\t"
+        "li          %[DC], 0x80                   \n\t"
+        "5:                                          \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [DC] "=&r"(DC), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+          [DC1] "=&r"(DC1)
+        : [left] "r"(left), [top] "r"(top)
+        : "memory");
+
+    FILL_8_OR_16(dst, DC, 8);
+}
+
+static void DC4(uint8_t* dst, const uint8_t* top) {
+    int temp0, temp1;
+    __asm__ volatile(
+    "ulw          %[temp0],   0(%[top])               \n\t"
+    "ulw          %[temp1],   -5(%[top])              \n\t"
+    "raddu.w.qb   %[temp0],   %[temp0]                \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]   \n\t"
+    "addiu        %[temp0],   %[temp0],    4          \n\t"
+    "srl          %[temp0],   %[temp0],    3          \n\t"
+    "replv.qb     %[temp0],   %[temp0]                \n\t"
+    "usw          %[temp0],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw          %[temp0],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw          %[temp0],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw          %[temp0],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void TM4(uint8_t* dst, const uint8_t* top) {
+    int a10, a32, temp0, temp1, temp2, temp3, temp4, temp5;
+    const int c35 = 0xff00ff;
+    __asm__ volatile (
+    "lbu              %[temp1],  0(%[top])                     \n\t"
+    "lbu              %[a10],    1(%[top])                     \n\t"
+    "lbu              %[temp2],  2(%[top])                     \n\t"
+    "lbu              %[a32],    3(%[top])                     \n\t"
+    "ulw              %[temp0],  -5(%[top])                    \n\t"
+    "lbu              %[temp4],  -1(%[top])                    \n\t"
+    "append           %[a10],    %[temp1],   16                \n\t"
+    "append           %[a32],    %[temp2],   16                \n\t"
+    "replv.ph         %[temp4],  %[temp4]                      \n\t"
+    "shrl.ph          %[temp1],  %[temp0],   8                 \n\t"
+    "and              %[temp0],  %[temp0],   %[c35]            \n\t"
+    "subu.ph          %[temp1],  %[temp1],   %[temp4]          \n\t"
+    "subu.ph          %[temp0],  %[temp0],   %[temp4]          \n\t"
+    "srl              %[temp2],  %[temp1],   16                \n\t"
+    "srl              %[temp3],  %[temp0],   16                \n\t"
+    "replv.ph         %[temp2],  %[temp2]                      \n\t"
+    "replv.ph         %[temp3],  %[temp3]                      \n\t"
+    "replv.ph         %[temp4],  %[temp1]                      \n\t"
+    "replv.ph         %[temp5],  %[temp0]                      \n\t"
+    "addu.ph          %[temp0],  %[temp3],   %[a10]            \n\t"
+    "addu.ph          %[temp1],  %[temp3],   %[a32]            \n\t"
+    "addu.ph          %[temp3],  %[temp2],   %[a10]            \n\t"
+    "addu.ph          %[temp2],  %[temp2],   %[a32]            \n\t"
+    "shll_s.ph        %[temp0],  %[temp0],   7                 \n\t"
+    "shll_s.ph        %[temp1],  %[temp1],   7                 \n\t"
+    "shll_s.ph        %[temp3],  %[temp3],   7                 \n\t"
+    "shll_s.ph        %[temp2],  %[temp2],   7                 \n\t"
+    "precrqu_s.qb.ph  %[temp0],  %[temp1],   %[temp0]          \n\t"
+    "precrqu_s.qb.ph  %[temp1],  %[temp2],   %[temp3]          \n\t"
+    "addu.ph          %[temp2],  %[temp5],   %[a10]            \n\t"
+    "addu.ph          %[temp3],  %[temp5],   %[a32]            \n\t"
+    "addu.ph          %[temp5],  %[temp4],   %[a10]            \n\t"
+    "addu.ph          %[temp4],  %[temp4],   %[a32]            \n\t"
+    "shll_s.ph        %[temp2],  %[temp2],   7                 \n\t"
+    "shll_s.ph        %[temp3],  %[temp3],   7                 \n\t"
+    "shll_s.ph        %[temp4],  %[temp4],   7                 \n\t"
+    "shll_s.ph        %[temp5],  %[temp5],   7                 \n\t"
+    "precrqu_s.qb.ph  %[temp2],  %[temp3],   %[temp2]          \n\t"
+    "precrqu_s.qb.ph  %[temp3],  %[temp4],   %[temp5]          \n\t"
+    "usw              %[temp1],  0*" XSTR(BPS) "(%[dst])       \n\t"
+    "usw              %[temp0],  1*" XSTR(BPS) "(%[dst])       \n\t"
+    "usw              %[temp3],  2*" XSTR(BPS) "(%[dst])       \n\t"
+    "usw              %[temp2],  3*" XSTR(BPS) "(%[dst])       \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [a10]"=&r"(a10), [a32]"=&r"(a32)
+    : [c35]"r"(c35), [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void VE4(uint8_t* dst, const uint8_t* top) {
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+    __asm__ volatile(
+    "ulw             %[temp0],   -1(%[top])              \n\t"
+    "ulh             %[temp1],   3(%[top])               \n\t"
+    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp4],   %[temp1]                \n\t"
+    "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "packrl.ph       %[temp6],   %[temp4],    %[temp3]   \n\t"
+    "shll.ph         %[temp5],   %[temp5],    1          \n\t"
+    "shll.ph         %[temp6],   %[temp6],    1          \n\t"
+    "addq.ph         %[temp2],   %[temp5],    %[temp2]   \n\t"
+    "addq.ph         %[temp6],   %[temp6],    %[temp4]   \n\t"
+    "addq.ph         %[temp2],   %[temp2],    %[temp3]   \n\t"
+    "addq.ph         %[temp6],   %[temp6],    %[temp3]   \n\t"
+    "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
+    "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
+    "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t"
+    "usw             %[temp4],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp4],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp4],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp4],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void HE4(uint8_t* dst, const uint8_t* top) {
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+    __asm__ volatile(
+    "ulw             %[temp0],   -4(%[top])              \n\t"
+    "lbu             %[temp1],   -5(%[top])              \n\t"
+    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t"
+    "replv.ph        %[temp4],   %[temp1]                \n\t"
+    "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "packrl.ph       %[temp6],   %[temp2],    %[temp4]   \n\t"
+    "shll.ph         %[temp5],   %[temp5],    1          \n\t"
+    "shll.ph         %[temp6],   %[temp6],    1          \n\t"
+    "addq.ph         %[temp3],   %[temp3],    %[temp5]   \n\t"
+    "addq.ph         %[temp3],   %[temp3],    %[temp2]   \n\t"
+    "addq.ph         %[temp2],   %[temp2],    %[temp6]   \n\t"
+    "addq.ph         %[temp2],   %[temp2],    %[temp4]   \n\t"
+    "shra_r.ph       %[temp3],   %[temp3],    2          \n\t"
+    "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
+    "replv.qb        %[temp0],   %[temp3]                \n\t"
+    "replv.qb        %[temp1],   %[temp2]                \n\t"
+    "srl             %[temp3],   %[temp3],    16         \n\t"
+    "srl             %[temp2],   %[temp2],    16         \n\t"
+    "replv.qb        %[temp3],   %[temp3]                \n\t"
+    "replv.qb        %[temp2],   %[temp2]                \n\t"
+    "usw             %[temp3],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp0],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp2],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp1],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void RD4(uint8_t* dst, const uint8_t* top) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    int temp6, temp7, temp8, temp9, temp10, temp11;
+    __asm__ volatile(
+    "ulw             %[temp0],    -5(%[top])               \n\t"
+    "ulw             %[temp1],    -1(%[top])               \n\t"
+    "preceu.ph.qbl   %[temp2],    %[temp0]                 \n\t"
+    "preceu.ph.qbr   %[temp3],    %[temp0]                 \n\t"
+    "preceu.ph.qbr   %[temp4],    %[temp1]                 \n\t"
+    "preceu.ph.qbl   %[temp5],    %[temp1]                 \n\t"
+    "packrl.ph       %[temp6],    %[temp2],    %[temp3]    \n\t"
+    "packrl.ph       %[temp7],    %[temp4],    %[temp2]    \n\t"
+    "packrl.ph       %[temp8],    %[temp5],    %[temp4]    \n\t"
+    "shll.ph         %[temp6],    %[temp6],    1           \n\t"
+    "addq.ph         %[temp9],    %[temp2],    %[temp6]    \n\t"
+    "shll.ph         %[temp7],    %[temp7],    1           \n\t"
+    "addq.ph         %[temp9],    %[temp9],    %[temp3]    \n\t"
+    "shll.ph         %[temp8],    %[temp8],    1           \n\t"
+    "shra_r.ph       %[temp9],    %[temp9],    2           \n\t"
+    "addq.ph         %[temp10],   %[temp4],    %[temp7]    \n\t"
+    "addq.ph         %[temp11],   %[temp5],    %[temp8]    \n\t"
+    "addq.ph         %[temp10],   %[temp10],   %[temp2]    \n\t"
+    "addq.ph         %[temp11],   %[temp11],   %[temp4]    \n\t"
+    "shra_r.ph       %[temp10],   %[temp10],   2           \n\t"
+    "shra_r.ph       %[temp11],   %[temp11],   2           \n\t"
+    "lbu             %[temp0],    3(%[top])                \n\t"
+    "lbu             %[temp1],    2(%[top])                \n\t"
+    "lbu             %[temp2],    1(%[top])                \n\t"
+    "sll             %[temp1],    %[temp1],    1           \n\t"
+    "addu            %[temp0],    %[temp0],    %[temp1]    \n\t"
+    "addu            %[temp0],    %[temp0],    %[temp2]    \n\t"
+    "precr.qb.ph     %[temp9],    %[temp10],   %[temp9]    \n\t"
+    "shra_r.w        %[temp0],    %[temp0],    2           \n\t"
+    "precr.qb.ph     %[temp10],   %[temp11],   %[temp10]   \n\t"
+    "usw             %[temp9],    3*" XSTR(BPS) "(%[dst])  \n\t"
+    "usw             %[temp10],   1*" XSTR(BPS) "(%[dst])  \n\t"
+    "prepend         %[temp9],    %[temp11],   8           \n\t"
+    "prepend         %[temp10],   %[temp0],    8           \n\t"
+    "usw             %[temp9],    2*" XSTR(BPS) "(%[dst])  \n\t"
+    "usw             %[temp10],   0*" XSTR(BPS) "(%[dst])  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void VR4(uint8_t* dst, const uint8_t* top) {
+    int temp0, temp1, temp2, temp3, temp4;
+    int temp5, temp6, temp7, temp8, temp9;
+    __asm__ volatile (
+    "ulw              %[temp0],   -4(%[top])              \n\t"
+    "ulw              %[temp1],   0(%[top])               \n\t"
+    "preceu.ph.qbl    %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbr    %[temp0],   %[temp0]                \n\t"
+    "preceu.ph.qbla   %[temp3],   %[temp1]                \n\t"
+    "preceu.ph.qbra   %[temp1],   %[temp1]                \n\t"
+    "packrl.ph        %[temp7],   %[temp3],    %[temp2]   \n\t"
+    "addqh_r.ph       %[temp4],   %[temp1],    %[temp3]   \n\t"
+    "move             %[temp6],   %[temp1]                \n\t"
+    "append           %[temp1],   %[temp2],    16         \n\t"
+    "shll.ph          %[temp9],   %[temp6],    1          \n\t"
+    "addqh_r.ph       %[temp5],   %[temp7],    %[temp6]   \n\t"
+    "shll.ph          %[temp8],   %[temp7],    1          \n\t"
+    "addu.ph          %[temp3],   %[temp7],    %[temp3]   \n\t"
+    "addu.ph          %[temp1],   %[temp1],    %[temp6]   \n\t"
+    "packrl.ph        %[temp7],   %[temp2],    %[temp0]   \n\t"
+    "addu.ph          %[temp6],   %[temp0],    %[temp2]   \n\t"
+    "addu.ph          %[temp3],   %[temp3],    %[temp9]   \n\t"
+    "addu.ph          %[temp1],   %[temp1],    %[temp8]   \n\t"
+    "shll.ph          %[temp7],   %[temp7],    1          \n\t"
+    "shra_r.ph        %[temp3],   %[temp3],    2          \n\t"
+    "shra_r.ph        %[temp1],   %[temp1],    2          \n\t"
+    "addu.ph          %[temp6],   %[temp6],    %[temp7]   \n\t"
+    "shra_r.ph        %[temp6],   %[temp6],    2          \n\t"
+    "precrq.ph.w      %[temp8],   %[temp4],    %[temp5]   \n\t"
+    "append           %[temp4],   %[temp5],    16         \n\t"
+    "precrq.ph.w      %[temp2],   %[temp3],    %[temp1]   \n\t"
+    "append           %[temp3],   %[temp1],    16         \n\t"
+    "precr.qb.ph      %[temp8],   %[temp8],    %[temp4]   \n\t"
+    "precr.qb.ph      %[temp3],   %[temp2],    %[temp3]   \n\t"
+    "usw              %[temp8],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp3],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "append           %[temp3],   %[temp6],    8          \n\t"
+    "srl              %[temp6],   %[temp6],    16         \n\t"
+    "append           %[temp8],   %[temp6],    8          \n\t"
+    "usw              %[temp3],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp8],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void LD4(uint8_t* dst, const uint8_t* top) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    int temp6, temp7, temp8, temp9, temp10, temp11;
+    __asm__ volatile(
+    "ulw             %[temp0],    0(%[top])               \n\t"
+    "ulw             %[temp1],    4(%[top])               \n\t"
+    "preceu.ph.qbl   %[temp2],    %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp3],    %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp4],    %[temp1]                \n\t"
+    "preceu.ph.qbl   %[temp5],    %[temp1]                \n\t"
+    "packrl.ph       %[temp6],    %[temp2],    %[temp3]   \n\t"
+    "packrl.ph       %[temp7],    %[temp4],    %[temp2]   \n\t"
+    "packrl.ph       %[temp8],    %[temp5],    %[temp4]   \n\t"
+    "shll.ph         %[temp6],    %[temp6],    1          \n\t"
+    "addq.ph         %[temp9],    %[temp2],    %[temp6]   \n\t"
+    "shll.ph         %[temp7],    %[temp7],    1          \n\t"
+    "addq.ph         %[temp9],    %[temp9],    %[temp3]   \n\t"
+    "shll.ph         %[temp8],    %[temp8],    1          \n\t"
+    "shra_r.ph       %[temp9],    %[temp9],    2          \n\t"
+    "addq.ph         %[temp10],   %[temp4],    %[temp7]   \n\t"
+    "addq.ph         %[temp11],   %[temp5],    %[temp8]   \n\t"
+    "addq.ph         %[temp10],   %[temp10],   %[temp2]   \n\t"
+    "addq.ph         %[temp11],   %[temp11],   %[temp4]   \n\t"
+    "shra_r.ph       %[temp10],   %[temp10],   2          \n\t"
+    "shra_r.ph       %[temp11],   %[temp11],   2          \n\t"
+    "srl             %[temp1],    %[temp1],    24         \n\t"
+    "sll             %[temp1],    %[temp1],    1          \n\t"
+    "raddu.w.qb      %[temp5],    %[temp5]                \n\t"
+    "precr.qb.ph     %[temp9],    %[temp10],   %[temp9]   \n\t"
+    "precr.qb.ph     %[temp10],   %[temp11],   %[temp10]  \n\t"
+    "addu            %[temp1],    %[temp1],    %[temp5]   \n\t"
+    "shra_r.w        %[temp1],    %[temp1],    2          \n\t"
+    "usw             %[temp9],    0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp10],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "prepend         %[temp9],    %[temp11],   8          \n\t"
+    "prepend         %[temp10],   %[temp1],    8          \n\t"
+    "usw             %[temp9],    1*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp10],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void VL4(uint8_t* dst, const uint8_t* top) {
+    int temp0, temp1, temp2, temp3, temp4;
+    int temp5, temp6, temp7, temp8, temp9;
+    __asm__ volatile (
+    "ulw              %[temp0],   0(%[top])               \n\t"
+    "ulw              %[temp1],   4(%[top])               \n\t"
+    "preceu.ph.qbla   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbra   %[temp0],   %[temp0]                \n\t"
+    "preceu.ph.qbl    %[temp3],   %[temp1]                \n\t"
+    "preceu.ph.qbr    %[temp1],   %[temp1]                \n\t"
+    "addqh_r.ph       %[temp4],   %[temp0],    %[temp2]   \n\t"
+    "packrl.ph        %[temp7],   %[temp1],    %[temp0]   \n\t"
+    "precrq.ph.w      %[temp6],   %[temp1],    %[temp2]   \n\t"
+    "shll.ph          %[temp9],   %[temp2],    1          \n\t"
+    "addqh_r.ph       %[temp5],   %[temp7],    %[temp2]   \n\t"
+    "shll.ph          %[temp8],   %[temp7],    1          \n\t"
+    "addu.ph          %[temp2],   %[temp2],    %[temp6]   \n\t"
+    "addu.ph          %[temp0],   %[temp0],    %[temp7]   \n\t"
+    "packrl.ph        %[temp7],   %[temp3],    %[temp1]   \n\t"
+    "addu.ph          %[temp6],   %[temp1],    %[temp3]   \n\t"
+    "addu.ph          %[temp2],   %[temp2],    %[temp8]   \n\t"
+    "addu.ph          %[temp0],   %[temp0],    %[temp9]   \n\t"
+    "shll.ph          %[temp7],   %[temp7],    1          \n\t"
+    "shra_r.ph        %[temp2],   %[temp2],    2          \n\t"
+    "shra_r.ph        %[temp0],   %[temp0],    2          \n\t"
+    "addu.ph          %[temp6],   %[temp6],    %[temp7]   \n\t"
+    "shra_r.ph        %[temp6],   %[temp6],    2          \n\t"
+    "precrq.ph.w      %[temp8],   %[temp5],    %[temp4]   \n\t"
+    "append           %[temp5],   %[temp4],    16         \n\t"
+    "precrq.ph.w      %[temp3],   %[temp2],    %[temp0]   \n\t"
+    "append           %[temp2],   %[temp0],    16         \n\t"
+    "precr.qb.ph      %[temp8],   %[temp8],    %[temp5]   \n\t"
+    "precr.qb.ph      %[temp3],   %[temp3],    %[temp2]   \n\t"
+    "usw              %[temp8],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "prepend          %[temp8],   %[temp6],    8          \n\t"
+    "usw              %[temp3],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "srl              %[temp6],   %[temp6],    16         \n\t"
+    "prepend          %[temp3],   %[temp6],    8          \n\t"
+    "usw              %[temp8],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp3],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void HD4(uint8_t* dst, const uint8_t* top) {
+    int temp0, temp1, temp2, temp3, temp4;
+    int temp5, temp6, temp7, temp8, temp9;
+    __asm__ volatile (
+    "ulw              %[temp0],   -5(%[top])              \n\t"
+    "ulw              %[temp1],   -1(%[top])              \n\t"
+    "preceu.ph.qbla   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbra   %[temp0],   %[temp0]                \n\t"
+    "preceu.ph.qbl    %[temp3],   %[temp1]                \n\t"
+    "preceu.ph.qbr    %[temp1],   %[temp1]                \n\t"
+    "addqh_r.ph       %[temp4],   %[temp0],    %[temp2]   \n\t"
+    "packrl.ph        %[temp7],   %[temp1],    %[temp0]   \n\t"
+    "precrq.ph.w      %[temp6],   %[temp1],    %[temp2]   \n\t"
+    "shll.ph          %[temp9],   %[temp2],    1          \n\t"
+    "addqh_r.ph       %[temp5],   %[temp7],    %[temp2]   \n\t"
+    "shll.ph          %[temp8],   %[temp7],    1          \n\t"
+    "addu.ph          %[temp2],   %[temp2],    %[temp6]   \n\t"
+    "addu.ph          %[temp0],   %[temp0],    %[temp7]   \n\t"
+    "packrl.ph        %[temp7],   %[temp3],    %[temp1]   \n\t"
+    "addu.ph          %[temp6],   %[temp1],    %[temp3]   \n\t"
+    "addu.ph          %[temp2],   %[temp2],    %[temp8]   \n\t"
+    "addu.ph          %[temp0],   %[temp0],    %[temp9]   \n\t"
+    "shll.ph          %[temp7],   %[temp7],    1          \n\t"
+    "shra_r.ph        %[temp2],   %[temp2],    2          \n\t"
+    "shra_r.ph        %[temp0],   %[temp0],    2          \n\t"
+    "addu.ph          %[temp6],   %[temp6],    %[temp7]   \n\t"
+    "shra_r.ph        %[temp6],   %[temp6],    2          \n\t"
+    "precrq.ph.w      %[temp1],   %[temp2],    %[temp5]   \n\t"
+    "precrq.ph.w      %[temp3],   %[temp0],    %[temp4]   \n\t"
+    "precr.qb.ph      %[temp7],   %[temp6],    %[temp1]   \n\t"
+    "precr.qb.ph      %[temp6],   %[temp1],    %[temp3]   \n\t"
+    "usw              %[temp7],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp6],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "append           %[temp2],   %[temp5],    16         \n\t"
+    "append           %[temp0],   %[temp4],    16         \n\t"
+    "precr.qb.ph      %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "precr.qb.ph      %[temp4],   %[temp2],    %[temp0]   \n\t"
+    "usw              %[temp5],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp4],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void HU4(uint8_t* dst, const uint8_t* top) {
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+    __asm__ volatile (
+    "ulw             %[temp0],   -5(%[top])              \n\t"
+    "preceu.ph.qbl   %[temp1],   %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
+    "packrl.ph       %[temp3],   %[temp1],    %[temp2]   \n\t"
+    "replv.qb        %[temp7],   %[temp2]                \n\t"
+    "addqh_r.ph      %[temp4],   %[temp1],    %[temp3]   \n\t"
+    "addqh_r.ph      %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "shll.ph         %[temp6],   %[temp3],    1          \n\t"
+    "addu.ph         %[temp3],   %[temp2],    %[temp3]   \n\t"
+    "addu.ph         %[temp6],   %[temp1],    %[temp6]   \n\t"
+    "shll.ph         %[temp0],   %[temp2],    1          \n\t"
+    "addu.ph         %[temp6],   %[temp6],    %[temp2]   \n\t"
+    "addu.ph         %[temp0],   %[temp3],    %[temp0]   \n\t"
+    "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
+    "shra_r.ph       %[temp0],   %[temp0],    2          \n\t"
+    "packrl.ph       %[temp3],   %[temp6],    %[temp5]   \n\t"
+    "precrq.ph.w     %[temp2],   %[temp6],    %[temp4]   \n\t"
+    "append          %[temp0],   %[temp5],    16         \n\t"
+    "precr.qb.ph     %[temp3],   %[temp3],    %[temp2]   \n\t"
+    "usw             %[temp3],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "precr.qb.ph     %[temp1],   %[temp7],    %[temp0]   \n\t"
+    "usw             %[temp7],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    "packrl.ph       %[temp2],   %[temp1],    %[temp3]   \n\t"
+    "usw             %[temp1],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp2],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+//------------------------------------------------------------------------------
+// Chroma 8x8 prediction (paragraph 12.2)
+
+static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    // U block
+    DCMode8(C8DC8 + dst, left, top);
+    VerticalPred8(C8VE8 + dst, top);
+    HorizontalPred8(C8HE8 + dst, left);
+    TrueMotion8(C8TM8 + dst, left, top);
+    // V block
+    dst += 8;
+    if (top) top += 8;
+    if (left) left += 16;
+    DCMode8(C8DC8 + dst, left, top);
+    VerticalPred8(C8VE8 + dst, top);
+    HorizontalPred8(C8HE8 + dst, left);
+    TrueMotion8(C8TM8 + dst, left, top);
+}
+
+//------------------------------------------------------------------------------
+// luma 16x16 prediction (paragraph 12.3)
+
+static void Intra16Preds(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    DCMode16(I16DC16 + dst, left, top);
+    VerticalPred16(I16VE16 + dst, top);
+    HorizontalPred16(I16HE16 + dst, left);
+    TrueMotion16(I16TM16 + dst, left, top);
+}
+
+// Left samples are top[-5 .. -2], top_left is top[-1], top are
+// located at top[0..3], and top right is top[4..7]
+static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+    DC4(I4DC4 + dst, top);
+    TM4(I4TM4 + dst, top);
+    VE4(I4VE4 + dst, top);
+    HE4(I4HE4 + dst, top);
+    RD4(I4RD4 + dst, top);
+    VR4(I4VR4 + dst, top);
+    LD4(I4LD4 + dst, top);
+    VL4(I4VL4 + dst, top);
+    HD4(I4HD4 + dst, top);
+    HU4(I4HU4 + dst, top);
+}
+
+//------------------------------------------------------------------------------
+// Metric
+
+#if !defined(WORK_AROUND_GCC)
+
+#define GET_SSE_INNER(A)                                               \
+    "lw               %[temp0],    " #A                                \
+    "(%[a])                  \n\t"                                     \
+    "lw               %[temp1],    " #A                                \
+    "(%[b])                  \n\t"                                     \
+    "preceu.ph.qbr    %[temp2],    %[temp0]                      \n\t" \
+    "preceu.ph.qbl    %[temp0],    %[temp0]                      \n\t" \
+    "preceu.ph.qbr    %[temp3],    %[temp1]                      \n\t" \
+    "preceu.ph.qbl    %[temp1],    %[temp1]                      \n\t" \
+    "subq.ph          %[temp2],    %[temp2],    %[temp3]         \n\t" \
+    "subq.ph          %[temp0],    %[temp0],    %[temp1]         \n\t" \
+    "dpa.w.ph         $ac0,        %[temp2],    %[temp2]         \n\t" \
+    "dpa.w.ph         $ac0,        %[temp0],    %[temp0]         \n\t"
+
+#define GET_SSE(A, B, C, D) \
+    GET_SSE_INNER(A)        \
+    GET_SSE_INNER(B)        \
+    GET_SSE_INNER(C)        \
+    GET_SSE_INNER(D)
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+    int count;
+    int temp0, temp1, temp2, temp3;
+    __asm__ volatile(
+        "mult   $zero,    $zero                            \n\t" GET_SSE(
+            0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS) GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)
+            GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS) GET_SSE(
+                3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS,
+                12 + 3 * BPS) GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS,
+                                      12 + 4 * BPS) GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
+                GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS) GET_SSE(
+                    7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS,
+                    12 + 7 * BPS) GET_SSE(8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS,
+                                          12 + 8 * BPS) GET_SSE(9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS)
+                    GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
+                        GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
+                            GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
+                                GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
+                                    GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
+                                        GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS,
+                                                12 + 15 * BPS) "mflo   %[count]                                   \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [count] "=&r"(count)
+        : [a] "r"(a), [b] "r"(b)
+        : "memory", "hi", "lo");
+    return count;
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+    int count;
+    int temp0, temp1, temp2, temp3;
+    __asm__ volatile(
+        "mult   $zero,    $zero                            \n\t" GET_SSE(
+            0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS) GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)
+            GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)
+                GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)
+                    GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)
+                        GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
+                            GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)
+                                GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS,
+                                        12 + 7 * BPS) "mflo   %[count]                                   \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [count] "=&r"(count)
+        : [a] "r"(a), [b] "r"(b)
+        : "memory", "hi", "lo");
+    return count;
+}
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+    int count;
+    int temp0, temp1, temp2, temp3;
+    __asm__ volatile(
+        "mult   $zero,    $zero                            \n\t" GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
+            GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS) GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
+                GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS,
+                        4 + 7 * BPS) "mflo   %[count]                                   \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [count] "=&r"(count)
+        : [a] "r"(a), [b] "r"(b)
+        : "memory", "hi", "lo");
+    return count;
+}
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+    int count;
+    int temp0, temp1, temp2, temp3;
+    __asm__ volatile("mult   $zero,    $zero                            \n\t" GET_SSE(
+                         0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS) "mflo   %[count]                                   \n\t"
+                     : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+                       [count] "=&r"(count)
+                     : [a] "r"(a), [b] "r"(b)
+                     : "memory", "hi", "lo");
+    return count;
+}
+
+#undef GET_SSE
+#undef GET_SSE_INNER
+
+#endif // !WORK_AROUND_GCC
+
+#undef FILL_8_OR_16
+#undef FILL_PART
+#undef OUTPUT_EARLY_CLOBBER_REGS_17
+#undef MUL_HALF
+#undef ABS_X8
+#undef ADD_SUB_HALVES_X4
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+// macro for one pass through for loop in QuantizeBlock reading 2 values at time
+// QUANTDIV macro inlined
+// J - offset in bytes (kZigzag[n] * 2)
+// K - offset in bytes (kZigzag[n] * 4)
+// N - offset in bytes (n * 2)
+// N1 - offset in bytes ((n + 1) * 2)
+#define QUANTIZE_ONE(J, K, N, N1)                                      \
+    "ulw         %[temp1],     " #J                                    \
+    "(%[ppin])                 \n\t"                                   \
+    "ulw         %[temp2],     " #J                                    \
+    "(%[ppsharpen])            \n\t"                                   \
+    "lhu         %[temp3],     " #K                                    \
+    "(%[ppzthresh])            \n\t"                                   \
+    "lhu         %[temp6],     " #K                                    \
+    "+4(%[ppzthresh])          \n\t"                                   \
+    "absq_s.ph   %[temp4],     %[temp1]                        \n\t"   \
+    "ins         %[temp3],     %[temp6],         16,       16  \n\t"   \
+    "addu.ph     %[coeff],     %[temp4],         %[temp2]      \n\t"   \
+    "shra.ph     %[sign],      %[temp1],         15            \n\t"   \
+    "li          %[level],     0x10001                         \n\t"   \
+    "cmp.lt.ph   %[temp3],     %[coeff]                        \n\t"   \
+    "lhu         %[temp1],     " #J                                    \
+    "(%[ppiq])                 \n\t"                                   \
+    "pick.ph     %[temp5],     %[level],         $0            \n\t"   \
+    "lw          %[temp2],     " #K                                    \
+    "(%[ppbias])               \n\t"                                   \
+    "beqz        %[temp5],     0f                              \n\t"   \
+    "lhu         %[temp3],     " #J                                    \
+    "(%[ppq])                  \n\t"                                   \
+    "beq         %[temp5],     %[level],         1f            \n\t"   \
+    "andi        %[temp5],     %[temp5],         0x1           \n\t"   \
+    "andi        %[temp4],     %[coeff],         0xffff        \n\t"   \
+    "beqz        %[temp5],     2f                              \n\t"   \
+    "mul         %[level],     %[temp4],         %[temp1]      \n\t"   \
+    "sh          $0,           " #J                                    \
+    "+2(%[ppin])               \n\t"                                   \
+    "sh          $0,           " #N1                                   \
+    "(%[pout])                \n\t"                                    \
+    "addu        %[level],     %[level],         %[temp2]      \n\t"   \
+    "sra         %[level],     %[level],         17            \n\t"   \
+    "slt         %[temp4],     %[max_level],     %[level]      \n\t"   \
+    "movn        %[level],     %[max_level],     %[temp4]      \n\t"   \
+    "andi        %[temp6],     %[sign],          0xffff        \n\t"   \
+    "xor         %[level],     %[level],         %[temp6]      \n\t"   \
+    "subu        %[level],     %[level],         %[temp6]      \n\t"   \
+    "mul         %[temp5],     %[level],         %[temp3]      \n\t"   \
+    "or          %[ret],       %[ret],           %[level]      \n\t"   \
+    "sh          %[level],     " #N                                    \
+    "(%[pout])                 \n\t"                                   \
+    "sh          %[temp5],     " #J                                    \
+    "(%[ppin])                 \n\t"                                   \
+    "j           3f                                            \n\t"   \
+    "2:                                                          \n\t" \
+    "lhu         %[temp1],     " #J                                    \
+    "+2(%[ppiq])               \n\t"                                   \
+    "srl         %[temp5],     %[coeff],         16            \n\t"   \
+    "mul         %[level],     %[temp5],         %[temp1]      \n\t"   \
+    "lw          %[temp2],     " #K                                    \
+    "+4(%[ppbias])             \n\t"                                   \
+    "lhu         %[temp3],     " #J                                    \
+    "+2(%[ppq])                \n\t"                                   \
+    "addu        %[level],     %[level],         %[temp2]      \n\t"   \
+    "sra         %[level],     %[level],         17            \n\t"   \
+    "srl         %[temp6],     %[sign],          16            \n\t"   \
+    "slt         %[temp4],     %[max_level],     %[level]      \n\t"   \
+    "movn        %[level],     %[max_level],     %[temp4]      \n\t"   \
+    "xor         %[level],     %[level],         %[temp6]      \n\t"   \
+    "subu        %[level],     %[level],         %[temp6]      \n\t"   \
+    "mul         %[temp5],     %[level],         %[temp3]      \n\t"   \
+    "sh          $0,           " #J                                    \
+    "(%[ppin])                 \n\t"                                   \
+    "sh          $0,           " #N                                    \
+    "(%[pout])                 \n\t"                                   \
+    "or          %[ret],       %[ret],           %[level]      \n\t"   \
+    "sh          %[temp5],     " #J                                    \
+    "+2(%[ppin])               \n\t"                                   \
+    "sh          %[level],     " #N1                                   \
+    "(%[pout])                \n\t"                                    \
+    "j           3f                                            \n\t"   \
+    "1:                                                          \n\t" \
+    "lhu         %[temp1],     " #J                                    \
+    "(%[ppiq])                 \n\t"                                   \
+    "lw          %[temp2],     " #K                                    \
+    "(%[ppbias])               \n\t"                                   \
+    "ulw         %[temp3],     " #J                                    \
+    "(%[ppq])                  \n\t"                                   \
+    "andi        %[temp5],     %[coeff],         0xffff        \n\t"   \
+    "srl         %[temp0],     %[coeff],         16            \n\t"   \
+    "lhu         %[temp6],     " #J                                    \
+    "+2(%[ppiq])               \n\t"                                   \
+    "lw          %[coeff],     " #K                                    \
+    "+4(%[ppbias])             \n\t"                                   \
+    "mul         %[level],     %[temp5],         %[temp1]      \n\t"   \
+    "mul         %[temp4],     %[temp0],         %[temp6]      \n\t"   \
+    "addu        %[level],     %[level],         %[temp2]      \n\t"   \
+    "addu        %[temp4],     %[temp4],         %[coeff]      \n\t"   \
+    "precrq.ph.w %[level],     %[temp4],         %[level]      \n\t"   \
+    "shra.ph     %[level],     %[level],         1             \n\t"   \
+    "cmp.lt.ph   %[max_level1],%[level]                        \n\t"   \
+    "pick.ph     %[level],     %[max_level],     %[level]      \n\t"   \
+    "xor         %[level],     %[level],         %[sign]       \n\t"   \
+    "subu.ph     %[level],     %[level],         %[sign]       \n\t"   \
+    "mul.ph      %[temp3],     %[level],         %[temp3]      \n\t"   \
+    "or          %[ret],       %[ret],           %[level]      \n\t"   \
+    "sh          %[level],     " #N                                    \
+    "(%[pout])                 \n\t"                                   \
+    "srl         %[level],     %[level],         16            \n\t"   \
+    "sh          %[level],     " #N1                                   \
+    "(%[pout])                \n\t"                                    \
+    "usw         %[temp3],     " #J                                    \
+    "(%[ppin])                 \n\t"                                   \
+    "j           3f                                            \n\t"   \
+    "0:                                                          \n\t" \
+    "sh          $0,           " #N                                    \
+    "(%[pout])                 \n\t"                                   \
+    "sh          $0,           " #N1                                   \
+    "(%[pout])                \n\t"                                    \
+    "usw         $0,           " #J                                    \
+    "(%[ppin])                 \n\t"                                   \
+    "3:                                                          \n\t"
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) {
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+    int sign, coeff, level;
+    int max_level = MAX_LEVEL;
+    int max_level1 = max_level << 16 | max_level;
+    int ret = 0;
+
+    int16_t* ppin = &in[0];
+    int16_t* pout = &out[0];
+    const uint16_t* ppsharpen = &mtx->sharpen_[0];
+    const uint32_t* ppzthresh = &mtx->zthresh_[0];
+    const uint16_t* ppq = &mtx->q_[0];
+    const uint16_t* ppiq = &mtx->iq_[0];
+    const uint32_t* ppbias = &mtx->bias_[0];
+
+    __asm__ volatile(QUANTIZE_ONE(0, 0, 0, 2) QUANTIZE_ONE(4, 8, 10, 12) QUANTIZE_ONE(8, 16, 4, 8)
+                         QUANTIZE_ONE(12, 24, 14, 24) QUANTIZE_ONE(16, 32, 6, 16) QUANTIZE_ONE(20, 40, 22, 26)
+                             QUANTIZE_ONE(24, 48, 18, 20) QUANTIZE_ONE(28, 56, 28, 30)
+
+                     : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+                       [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [sign] "=&r"(sign), [coeff] "=&r"(coeff),
+                       [level] "=&r"(level), [temp6] "=&r"(temp6), [ret] "+&r"(ret)
+                     : [ppin] "r"(ppin), [pout] "r"(pout), [max_level1] "r"(max_level1), [ppiq] "r"(ppiq),
+                       [max_level] "r"(max_level), [ppbias] "r"(ppbias), [ppzthresh] "r"(ppzthresh),
+                       [ppsharpen] "r"(ppsharpen), [ppq] "r"(ppq)
+                     : "memory", "hi", "lo");
+
+    return (ret != 0);
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32], const VP8Matrix* const mtx) {
+    int nz;
+    nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+    nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+    return nz;
+}
+
+#undef QUANTIZE_ONE
+
+// macro for one horizontal pass in FTransformWHT
+// temp0..temp7 holds tmp[0]..tmp[15]
+// A, B, C, D - offset in bytes to load from in buffer
+// TEMP0, TEMP1 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS_WHT(A, B, C, D, TEMP0, TEMP1)               \
+    "lh              %[" #TEMP0 "],  " #A                           \
+    "(%[in])            \n\t"                                       \
+    "lh              %[" #TEMP1 "],  " #B                           \
+    "(%[in])            \n\t"                                       \
+    "lh              %[temp8],     " #C                             \
+    "(%[in])              \n\t"                                     \
+    "lh              %[temp9],     " #D                             \
+    "(%[in])              \n\t"                                     \
+    "ins             %[" #TEMP1 "],  %[" #TEMP0                     \
+    "],  16,  16  \n\t"                                             \
+    "ins             %[temp9],     %[temp8],     16,  16      \n\t" \
+    "subq.ph         %[temp8],     %[" #TEMP1                       \
+    "],  %[temp9]   \n\t"                                           \
+    "addq.ph         %[temp9],     %[" #TEMP1                       \
+    "],  %[temp9]   \n\t"                                           \
+    "precrq.ph.w     %[" #TEMP0                                     \
+    "],  %[temp8],     %[temp9]   \n\t"                             \
+    "append          %[temp8],     %[temp9],     16           \n\t" \
+    "subq.ph         %[" #TEMP1 "],  %[" #TEMP0                     \
+    "],  %[temp8] \n\t"                                             \
+    "addq.ph         %[" #TEMP0 "],  %[" #TEMP0                     \
+    "],  %[temp8] \n\t"                                             \
+    "rotr            %[" #TEMP1 "],  %[" #TEMP1 "],  16       \n\t"
+
+// macro for one vertical pass in FTransformWHT
+// temp0..temp7 holds tmp[0]..tmp[15]
+// A, B, C, D - offsets in bytes to store to out buffer
+// TEMP0, TEMP2, TEMP4 and TEMP6 - registers for corresponding tmp elements
+#define VERTICAL_PASS_WHT(A, B, C, D, TEMP0, TEMP2, TEMP4, TEMP6) \
+    "addq.ph         %[temp8],     %[" #TEMP0 "],  %[" #TEMP4     \
+    "]    \n\t"                                                   \
+    "addq.ph         %[temp9],     %[" #TEMP2 "],  %[" #TEMP6     \
+    "]    \n\t"                                                   \
+    "subq.ph         %[" #TEMP2 "],  %[" #TEMP2 "],  %[" #TEMP6   \
+    "]  \n\t"                                                     \
+    "subq.ph         %[" #TEMP6 "],  %[" #TEMP0 "],  %[" #TEMP4   \
+    "]  \n\t"                                                     \
+    "addqh.ph        %[" #TEMP0                                   \
+    "],  %[temp8],     %[temp9]         \n\t"                     \
+    "subqh.ph        %[" #TEMP4 "],  %[" #TEMP6 "],  %[" #TEMP2   \
+    "]  \n\t"                                                     \
+    "addqh.ph        %[" #TEMP2 "],  %[" #TEMP2 "],  %[" #TEMP6   \
+    "]  \n\t"                                                     \
+    "subqh.ph        %[" #TEMP6                                   \
+    "],  %[temp8],     %[temp9]         \n\t"                     \
+    "usw             %[" #TEMP0 "],  " #A                         \
+    "(%[out])                 \n\t"                               \
+    "usw             %[" #TEMP2 "],  " #B                         \
+    "(%[out])                 \n\t"                               \
+    "usw             %[" #TEMP4 "],  " #C                         \
+    "(%[out])                 \n\t"                               \
+    "usw             %[" #TEMP6 "],  " #D "(%[out])                 \n\t"
+
+static void FTransformWHT(const int16_t* in, int16_t* out) {
+    int temp0, temp1, temp2, temp3, temp4;
+    int temp5, temp6, temp7, temp8, temp9;
+
+    __asm__ volatile(
+        HORIZONTAL_PASS_WHT(0, 32, 64, 96, temp0, temp1) HORIZONTAL_PASS_WHT(128, 160, 192, 224, temp2, temp3)
+            HORIZONTAL_PASS_WHT(256, 288, 320, 352, temp4, temp5) HORIZONTAL_PASS_WHT(384, 416, 448, 480, temp6, temp7)
+                VERTICAL_PASS_WHT(0, 8, 16, 24, temp0, temp2, temp4, temp6)
+                    VERTICAL_PASS_WHT(4, 12, 20, 28, temp1, temp3, temp5, temp7)
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8), [temp9] "=&r"(temp9)
+        : [in] "r"(in), [out] "r"(out)
+        : "memory");
+}
+
+#undef VERTICAL_PASS_WHT
+#undef HORIZONTAL_PASS_WHT
+
+// macro for converting coefficients to bin
+// convert 8 coeffs at time
+// A, B, C, D - offsets in bytes to load from out buffer
+#define CONVERT_COEFFS_TO_BIN(A, B, C, D)                                                                                                                                \
+    "ulw        %[temp0],  " #A                                                                                                                                          \
+    "(%[out])                \n\t"                                                                                                                                       \
+    "ulw        %[temp1],  " #B                                                                                                                                          \
+    "(%[out])                \n\t"                                                                                                                                       \
+    "ulw        %[temp2],  " #C                                                                                                                                          \
+    "(%[out])                \n\t"                                                                                                                                       \
+    "ulw        %[temp3],  " #D                                                                                                                                          \
+    "(%[out])                \n\t"                                                                                                                                       \
+    "absq_s.ph  %[temp0],  %[temp0]                      \n\t"                                                                                                           \
+    "absq_s.ph  %[temp1],  %[temp1]                      \n\t"                                                                                                           \
+    "absq_s.ph  %[temp2],  %[temp2]                      \n\t"                                                                                                           \
+    "absq_s.ph  %[temp3],  %[temp3]                      \n\t" /* TODO(skal): add rounding ? shra_r.ph : shra.ph */ /*             for following 4 instructions       */ \
+    "shra.ph    %[temp0],  %[temp0],    3                \n\t"                                                                                                           \
+    "shra.ph    %[temp1],  %[temp1],    3                \n\t"                                                                                                           \
+    "shra.ph    %[temp2],  %[temp2],    3                \n\t"                                                                                                           \
+    "shra.ph    %[temp3],  %[temp3],    3                \n\t"                                                                                                           \
+    "shll_s.ph  %[temp0],  %[temp0],    10               \n\t"                                                                                                           \
+    "shll_s.ph  %[temp1],  %[temp1],    10               \n\t"                                                                                                           \
+    "shll_s.ph  %[temp2],  %[temp2],    10               \n\t"                                                                                                           \
+    "shll_s.ph  %[temp3],  %[temp3],    10               \n\t"                                                                                                           \
+    "shrl.ph    %[temp0],  %[temp0],    10               \n\t"                                                                                                           \
+    "shrl.ph    %[temp1],  %[temp1],    10               \n\t"                                                                                                           \
+    "shrl.ph    %[temp2],  %[temp2],    10               \n\t"                                                                                                           \
+    "shrl.ph    %[temp3],  %[temp3],    10               \n\t"                                                                                                           \
+    "shll.ph    %[temp0],  %[temp0],    2                \n\t"                                                                                                           \
+    "shll.ph    %[temp1],  %[temp1],    2                \n\t"                                                                                                           \
+    "shll.ph    %[temp2],  %[temp2],    2                \n\t"                                                                                                           \
+    "shll.ph    %[temp3],  %[temp3],    2                \n\t"                                                                                                           \
+    "ext        %[temp4],  %[temp0],    0,       16      \n\t"                                                                                                           \
+    "ext        %[temp0],  %[temp0],    16,      16      \n\t"                                                                                                           \
+    "addu       %[temp4],  %[temp4],    %[dist]          \n\t"                                                                                                           \
+    "addu       %[temp0],  %[temp0],    %[dist]          \n\t"                                                                                                           \
+    "ext        %[temp5],  %[temp1],    0,       16      \n\t"                                                                                                           \
+    "lw         %[temp8],  0(%[temp4])                   \n\t"                                                                                                           \
+    "ext        %[temp1],  %[temp1],    16,      16      \n\t"                                                                                                           \
+    "addu       %[temp5],  %[temp5],    %[dist]          \n\t"                                                                                                           \
+    "addiu      %[temp8],  %[temp8],    1                \n\t"                                                                                                           \
+    "sw         %[temp8],  0(%[temp4])                   \n\t"                                                                                                           \
+    "lw         %[temp8],  0(%[temp0])                   \n\t"                                                                                                           \
+    "addu       %[temp1],  %[temp1],    %[dist]          \n\t"                                                                                                           \
+    "ext        %[temp6],  %[temp2],    0,       16      \n\t"                                                                                                           \
+    "addiu      %[temp8],  %[temp8],    1                \n\t"                                                                                                           \
+    "sw         %[temp8],  0(%[temp0])                   \n\t"                                                                                                           \
+    "lw         %[temp8],  0(%[temp5])                   \n\t"                                                                                                           \
+    "ext        %[temp2],  %[temp2],    16,      16      \n\t"                                                                                                           \
+    "addu       %[temp6],  %[temp6],    %[dist]          \n\t"                                                                                                           \
+    "addiu      %[temp8],  %[temp8],    1                \n\t"                                                                                                           \
+    "sw         %[temp8],  0(%[temp5])                   \n\t"                                                                                                           \
+    "lw         %[temp8],  0(%[temp1])                   \n\t"                                                                                                           \
+    "addu       %[temp2],  %[temp2],    %[dist]          \n\t"                                                                                                           \
+    "ext        %[temp7],  %[temp3],    0,       16      \n\t"                                                                                                           \
+    "addiu      %[temp8],  %[temp8],    1                \n\t"                                                                                                           \
+    "sw         %[temp8],  0(%[temp1])                   \n\t"                                                                                                           \
+    "lw         %[temp8],  0(%[temp6])                   \n\t"                                                                                                           \
+    "ext        %[temp3],  %[temp3],    16,      16      \n\t"                                                                                                           \
+    "addu       %[temp7],  %[temp7],    %[dist]          \n\t"                                                                                                           \
+    "addiu      %[temp8],  %[temp8],    1                \n\t"                                                                                                           \
+    "sw         %[temp8],  0(%[temp6])                   \n\t"                                                                                                           \
+    "lw         %[temp8],  0(%[temp2])                   \n\t"                                                                                                           \
+    "addu       %[temp3],  %[temp3],    %[dist]          \n\t"                                                                                                           \
+    "addiu      %[temp8],  %[temp8],    1                \n\t"                                                                                                           \
+    "sw         %[temp8],  0(%[temp2])                   \n\t"                                                                                                           \
+    "lw         %[temp8],  0(%[temp7])                   \n\t"                                                                                                           \
+    "addiu      %[temp8],  %[temp8],    1                \n\t"                                                                                                           \
+    "sw         %[temp8],  0(%[temp7])                   \n\t"                                                                                                           \
+    "lw         %[temp8],  0(%[temp3])                   \n\t"                                                                                                           \
+    "addiu      %[temp8],  %[temp8],    1                \n\t"                                                                                                           \
+    "sw         %[temp8],  0(%[temp3])                   \n\t"
+
+static void CollectHistogram(
+    const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo) {
+    int j;
+    int distribution[MAX_COEFF_THRESH + 1] = {0};
+    const int max_coeff = (MAX_COEFF_THRESH << 16) + MAX_COEFF_THRESH;
+    for (j = start_block; j < end_block; ++j) {
+        int16_t out[16];
+        int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+
+        VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+        // Convert coefficients to bin.
+        __asm__ volatile(CONVERT_COEFFS_TO_BIN(0, 4, 8, 12) CONVERT_COEFFS_TO_BIN(16, 20, 24, 28)
+                         : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+                           [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7),
+                           [temp8] "=&r"(temp8)
+                         : [dist] "r"(distribution), [out] "r"(out), [max_coeff] "r"(max_coeff)
+                         : "memory");
+    }
+    VP8SetHistogramData(distribution, histo);
+}
+
+#undef CONVERT_COEFFS_TO_BIN
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void) {
+    VP8FTransform = FTransform;
+    VP8ITransform = ITransform;
+    VP8TDisto4x4 = Disto4x4;
+    VP8TDisto16x16 = Disto16x16;
+    VP8EncPredLuma16 = Intra16Preds;
+    VP8EncPredChroma8 = IntraChromaPreds;
+    VP8EncPredLuma4 = Intra4Preds;
+#if !defined(WORK_AROUND_GCC)
+    VP8SSE16x16 = SSE16x16;
+    VP8SSE8x8 = SSE8x8;
+    VP8SSE16x8 = SSE16x8;
+    VP8SSE4x4 = SSE4x4;
+#endif
+    VP8EncQuantizeBlock = QuantizeBlock;
+    VP8EncQuantize2Blocks = Quantize2Blocks;
+    VP8FTransformWHT = FTransformWHT;
+    VP8CollectHistogram = CollectHistogram;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/enc_neon.c b/codec/L2/demos/webpEnc/host/src/dsp/enc_neon.c
new file mode 100644
index 0000000000..fd62debe35
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/enc_neon.c
@@ -0,0 +1,875 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// ARM NEON version of speed-critical encoding functions.
+//
+// adapted from libvpx (http://www.webmproject.org/code/)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+
+#include "./neon.h"
+#include "../enc/vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+// Inverse transform.
+// This code is pretty much the same as TransformOne in the dec_neon.c, except
+// for subtraction to *ref. See the comments there for algorithmic explanations.
+
+static const int16_t kC1 = 20091;
+static const int16_t kC2 = 17734; // half of kC2, actually. See comment above.
+
+// This code works but is *slower* than the inlined-asm version below
+// (with gcc-4.6). So we disable it for now. Later, it'll be conditional to
+// WEBP_USE_INTRINSICS define.
+// With gcc-4.8, it's a little faster speed than inlined-assembly.
+#if defined(WEBP_USE_INTRINSICS)
+
+// Treats 'v' as an uint8x8_t and zero extends to an int16x8_t.
+static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) {
+    return vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(v)));
+}
+
+// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
+// to the corresponding rows of 'dst'.
+static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst, const int16x8_t dst01, const int16x8_t dst23) {
+    // Unsigned saturate to 8b.
+    const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
+    const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
+
+    // Store the results.
+    vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0);
+    vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1);
+    vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0);
+    vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
+}
+
+static WEBP_INLINE void Add4x4(const int16x8_t row01,
+                               const int16x8_t row23,
+                               const uint8_t* const ref,
+                               uint8_t* const dst) {
+    uint32x2_t dst01 = vdup_n_u32(0);
+    uint32x2_t dst23 = vdup_n_u32(0);
+
+    // Load the source pixels.
+    dst01 = vld1_lane_u32((uint32_t*)(ref + 0 * BPS), dst01, 0);
+    dst23 = vld1_lane_u32((uint32_t*)(ref + 2 * BPS), dst23, 0);
+    dst01 = vld1_lane_u32((uint32_t*)(ref + 1 * BPS), dst01, 1);
+    dst23 = vld1_lane_u32((uint32_t*)(ref + 3 * BPS), dst23, 1);
+
+    {
+        // Convert to 16b.
+        const int16x8_t dst01_s16 = ConvertU8ToS16(dst01);
+        const int16x8_t dst23_s16 = ConvertU8ToS16(dst23);
+
+        // Descale with rounding.
+        const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
+        const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
+        // Add the inverse transform.
+        SaturateAndStore4x4(dst, out01, out23);
+    }
+}
+
+static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1, int16x8x2_t* const out) {
+    // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
+    // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
+    const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
+                                                  // b0 d0 b1 d1 b2 d2 ...
+    *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
+}
+
+static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
+    // {rows} = in0 | in4
+    //          in8 | in12
+    // B1 = in4 | in12
+    const int16x8_t B1 = vcombine_s16(vget_high_s16(rows->val[0]), vget_high_s16(rows->val[1]));
+    // C0 = kC1 * in4 | kC1 * in12
+    // C1 = kC2 * in4 | kC2 * in12
+    const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
+    const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
+    const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]), vget_low_s16(rows->val[1])); // in0 + in8
+    const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]), vget_low_s16(rows->val[1])); // in0 - in8
+    // c = kC2 * in4 - kC1 * in12
+    // d = kC1 * in4 + kC2 * in12
+    const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
+    const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
+    const int16x8_t D0 = vcombine_s16(a, b);    // D0 = a | b
+    const int16x8_t D1 = vcombine_s16(d, c);    // D1 = d | c
+    const int16x8_t E0 = vqaddq_s16(D0, D1);    // a+d | b+c
+    const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
+    const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
+    Transpose8x2(E0, E1, rows);
+}
+
+static void ITransformOne(const uint8_t* ref, const int16_t* in, uint8_t* dst) {
+    int16x8x2_t rows;
+    INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
+    TransformPass(&rows);
+    TransformPass(&rows);
+    Add4x4(rows.val[0], rows.val[1], ref, dst);
+}
+
+#else
+
+static void ITransformOne(const uint8_t* ref, const int16_t* in, uint8_t* dst) {
+    const int kBPS = BPS;
+    const int16_t kC1C2[] = {kC1, kC2, 0, 0};
+
+    __asm__ volatile(
+        "vld1.16         {q1, q2}, [%[in]]           \n"
+        "vld1.16         {d0}, [%[kC1C2]]            \n"
+
+        // d2: in[0]
+        // d3: in[8]
+        // d4: in[4]
+        // d5: in[12]
+        "vswp            d3, d4                      \n"
+
+        // q8 = {in[4], in[12]} * kC1 * 2 >> 16
+        // q9 = {in[4], in[12]} * kC2 >> 16
+        "vqdmulh.s16     q8, q2, d0[0]               \n"
+        "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+        // d22 = a = in[0] + in[8]
+        // d23 = b = in[0] - in[8]
+        "vqadd.s16       d22, d2, d3                 \n"
+        "vqsub.s16       d23, d2, d3                 \n"
+
+        //  q8 = in[4]/[12] * kC1 >> 16
+        "vshr.s16        q8, q8, #1                  \n"
+
+        // Add {in[4], in[12]} back after the multiplication.
+        "vqadd.s16       q8, q2, q8                  \n"
+
+        // d20 = c = in[4]*kC2 - in[12]*kC1
+        // d21 = d = in[4]*kC1 + in[12]*kC2
+        "vqsub.s16       d20, d18, d17               \n"
+        "vqadd.s16       d21, d19, d16               \n"
+
+        // d2 = tmp[0] = a + d
+        // d3 = tmp[1] = b + c
+        // d4 = tmp[2] = b - c
+        // d5 = tmp[3] = a - d
+        "vqadd.s16       d2, d22, d21                \n"
+        "vqadd.s16       d3, d23, d20                \n"
+        "vqsub.s16       d4, d23, d20                \n"
+        "vqsub.s16       d5, d22, d21                \n"
+
+        "vzip.16         q1, q2                      \n"
+        "vzip.16         q1, q2                      \n"
+
+        "vswp            d3, d4                      \n"
+
+        // q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
+        // q9 = {tmp[4], tmp[12]} * kC2 >> 16
+        "vqdmulh.s16     q8, q2, d0[0]               \n"
+        "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+        // d22 = a = tmp[0] + tmp[8]
+        // d23 = b = tmp[0] - tmp[8]
+        "vqadd.s16       d22, d2, d3                 \n"
+        "vqsub.s16       d23, d2, d3                 \n"
+
+        "vshr.s16        q8, q8, #1                  \n"
+        "vqadd.s16       q8, q2, q8                  \n"
+
+        // d20 = c = in[4]*kC2 - in[12]*kC1
+        // d21 = d = in[4]*kC1 + in[12]*kC2
+        "vqsub.s16       d20, d18, d17               \n"
+        "vqadd.s16       d21, d19, d16               \n"
+
+        // d2 = tmp[0] = a + d
+        // d3 = tmp[1] = b + c
+        // d4 = tmp[2] = b - c
+        // d5 = tmp[3] = a - d
+        "vqadd.s16       d2, d22, d21                \n"
+        "vqadd.s16       d3, d23, d20                \n"
+        "vqsub.s16       d4, d23, d20                \n"
+        "vqsub.s16       d5, d22, d21                \n"
+
+        "vld1.32         d6[0], [%[ref]], %[kBPS]    \n"
+        "vld1.32         d6[1], [%[ref]], %[kBPS]    \n"
+        "vld1.32         d7[0], [%[ref]], %[kBPS]    \n"
+        "vld1.32         d7[1], [%[ref]], %[kBPS]    \n"
+
+        "sub         %[ref], %[ref], %[kBPS], lsl #2 \n"
+
+        // (val) + 4 >> 3
+        "vrshr.s16       d2, d2, #3                  \n"
+        "vrshr.s16       d3, d3, #3                  \n"
+        "vrshr.s16       d4, d4, #3                  \n"
+        "vrshr.s16       d5, d5, #3                  \n"
+
+        "vzip.16         q1, q2                      \n"
+        "vzip.16         q1, q2                      \n"
+
+        // Must accumulate before saturating
+        "vmovl.u8        q8, d6                      \n"
+        "vmovl.u8        q9, d7                      \n"
+
+        "vqadd.s16       q1, q1, q8                  \n"
+        "vqadd.s16       q2, q2, q9                  \n"
+
+        "vqmovun.s16     d0, q1                      \n"
+        "vqmovun.s16     d1, q2                      \n"
+
+        "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
+        "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
+        "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
+        "vst1.32         d1[1], [%[dst]]             \n"
+
+        : [in] "+r"(in), [dst] "+r"(dst)                       // modified registers
+        : [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref) // constants
+        : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" // clobbered
+        );
+}
+
+#endif // WEBP_USE_INTRINSICS
+
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, int do_two) {
+    ITransformOne(ref, in, dst);
+    if (do_two) {
+        ITransformOne(ref + 4, in + 16, dst + 4);
+    }
+}
+
+// Load all 4x4 pixels into a single uint8x16_t variable.
+static uint8x16_t Load4x4(const uint8_t* src) {
+    uint32x4_t out = vdupq_n_u32(0);
+    out = vld1q_lane_u32((const uint32_t*)(src + 0 * BPS), out, 0);
+    out = vld1q_lane_u32((const uint32_t*)(src + 1 * BPS), out, 1);
+    out = vld1q_lane_u32((const uint32_t*)(src + 2 * BPS), out, 2);
+    out = vld1q_lane_u32((const uint32_t*)(src + 3 * BPS), out, 3);
+    return vreinterpretq_u8_u32(out);
+}
+
+// Forward transform.
+
+#if defined(WEBP_USE_INTRINSICS)
+
+static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A,
+                                         const int16x4_t B,
+                                         const int16x4_t C,
+                                         const int16x4_t D,
+                                         int16x8_t* const out01,
+                                         int16x8_t* const out32) {
+    const int16x4x2_t AB = vtrn_s16(A, B);
+    const int16x4x2_t CD = vtrn_s16(C, D);
+    const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]), vreinterpret_s32_s16(CD.val[0]));
+    const int32x2x2_t tmp13 = vtrn_s32(vreinterpret_s32_s16(AB.val[1]), vreinterpret_s32_s16(CD.val[1]));
+    *out01 =
+        vreinterpretq_s16_s64(vcombine_s64(vreinterpret_s64_s32(tmp02.val[0]), vreinterpret_s64_s32(tmp13.val[0])));
+    *out32 =
+        vreinterpretq_s16_s64(vcombine_s64(vreinterpret_s64_s32(tmp13.val[1]), vreinterpret_s64_s32(tmp02.val[1])));
+}
+
+static WEBP_INLINE int16x8_t DiffU8ToS16(const uint8x8_t a, const uint8x8_t b) {
+    return vreinterpretq_s16_u16(vsubl_u8(a, b));
+}
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+    int16x8_t d0d1, d3d2; // working 4x4 int16 variables
+    {
+        const uint8x16_t S0 = Load4x4(src);
+        const uint8x16_t R0 = Load4x4(ref);
+        const int16x8_t D0D1 = DiffU8ToS16(vget_low_u8(S0), vget_low_u8(R0));
+        const int16x8_t D2D3 = DiffU8ToS16(vget_high_u8(S0), vget_high_u8(R0));
+        const int16x4_t D0 = vget_low_s16(D0D1);
+        const int16x4_t D1 = vget_high_s16(D0D1);
+        const int16x4_t D2 = vget_low_s16(D2D3);
+        const int16x4_t D3 = vget_high_s16(D2D3);
+        Transpose4x4_S16(D0, D1, D2, D3, &d0d1, &d3d2);
+    }
+    { // 1rst pass
+        const int32x4_t kCst937 = vdupq_n_s32(937);
+        const int32x4_t kCst1812 = vdupq_n_s32(1812);
+        const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2); // d0+d3 | d1+d2   (=a0|a1)
+        const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2); // d0-d3 | d1-d2   (=a3|a2)
+        const int16x8_t a0a1_2 = vshlq_n_s16(a0a1, 3);
+        const int16x4_t tmp0 = vadd_s16(vget_low_s16(a0a1_2), vget_high_s16(a0a1_2));
+        const int16x4_t tmp2 = vsub_s16(vget_low_s16(a0a1_2), vget_high_s16(a0a1_2));
+        const int32x4_t a3_2217 = vmull_n_s16(vget_low_s16(a3a2), 2217);
+        const int32x4_t a2_2217 = vmull_n_s16(vget_high_s16(a3a2), 2217);
+        const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352);
+        const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
+        const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9);
+        const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
+        Transpose4x4_S16(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
+    }
+    { // 2nd pass
+        // the (1<<16) addition is for the replacement: a3!=0  <-> 1-(a3==0)
+        const int32x4_t kCst12000 = vdupq_n_s32(12000 + (1 << 16));
+        const int32x4_t kCst51000 = vdupq_n_s32(51000);
+        const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2); // d0+d3 | d1+d2   (=a0|a1)
+        const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2); // d0-d3 | d1-d2   (=a3|a2)
+        const int16x4_t a0_k7 = vadd_s16(vget_low_s16(a0a1), vdup_n_s16(7));
+        const int16x4_t out0 = vshr_n_s16(vadd_s16(a0_k7, vget_high_s16(a0a1)), 4);
+        const int16x4_t out2 = vshr_n_s16(vsub_s16(a0_k7, vget_high_s16(a0a1)), 4);
+        const int32x4_t a3_2217 = vmull_n_s16(vget_low_s16(a3a2), 2217);
+        const int32x4_t a2_2217 = vmull_n_s16(vget_high_s16(a3a2), 2217);
+        const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352);
+        const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
+        const int16x4_t tmp1 = vaddhn_s32(a2_p_a3, kCst12000);
+        const int16x4_t out3 = vaddhn_s32(a3_m_a2, kCst51000);
+        const int16x4_t a3_eq_0 = vreinterpret_s16_u16(vceq_s16(vget_low_s16(a3a2), vdup_n_s16(0)));
+        const int16x4_t out1 = vadd_s16(tmp1, a3_eq_0);
+        vst1_s16(out + 0, out0);
+        vst1_s16(out + 4, out1);
+        vst1_s16(out + 8, out2);
+        vst1_s16(out + 12, out3);
+    }
+}
+
+#else
+
+// adapted from vp8/encoder/arm/neon/shortfdct_neon.asm
+static const int16_t kCoeff16[] = {5352, 5352, 5352, 5352, 2217, 2217, 2217, 2217};
+static const int32_t kCoeff32[] = {1812,  1812,  1812,  1812,  937,   937,   937,   937,
+                                   12000, 12000, 12000, 12000, 51000, 51000, 51000, 51000};
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+    const int kBPS = BPS;
+    const uint8_t* src_ptr = src;
+    const uint8_t* ref_ptr = ref;
+    const int16_t* coeff16 = kCoeff16;
+    const int32_t* coeff32 = kCoeff32;
+
+    __asm__ volatile(
+        // load src into q4, q5 in high half
+        "vld1.8 {d8},  [%[src_ptr]], %[kBPS]      \n"
+        "vld1.8 {d10}, [%[src_ptr]], %[kBPS]      \n"
+        "vld1.8 {d9},  [%[src_ptr]], %[kBPS]      \n"
+        "vld1.8 {d11}, [%[src_ptr]]               \n"
+
+        // load ref into q6, q7 in high half
+        "vld1.8 {d12}, [%[ref_ptr]], %[kBPS]      \n"
+        "vld1.8 {d14}, [%[ref_ptr]], %[kBPS]      \n"
+        "vld1.8 {d13}, [%[ref_ptr]], %[kBPS]      \n"
+        "vld1.8 {d15}, [%[ref_ptr]]               \n"
+
+        // Pack the high values in to q4 and q6
+        "vtrn.32     q4, q5                       \n"
+        "vtrn.32     q6, q7                       \n"
+
+        // d[0-3] = src - ref
+        "vsubl.u8    q0, d8, d12                  \n"
+        "vsubl.u8    q1, d9, d13                  \n"
+
+        // load coeff16 into q8(d16=5352, d17=2217)
+        "vld1.16     {q8}, [%[coeff16]]           \n"
+
+        // load coeff32 high half into q9 = 1812, q10 = 937
+        "vld1.32     {q9, q10}, [%[coeff32]]!     \n"
+
+        // load coeff32 low half into q11=12000, q12=51000
+        "vld1.32     {q11,q12}, [%[coeff32]]      \n"
+
+        // part 1
+        // Transpose. Register dN is the same as dN in C
+        "vtrn.32         d0, d2                   \n"
+        "vtrn.32         d1, d3                   \n"
+        "vtrn.16         d0, d1                   \n"
+        "vtrn.16         d2, d3                   \n"
+
+        "vadd.s16        d4, d0, d3               \n" // a0 = d0 + d3
+        "vadd.s16        d5, d1, d2               \n" // a1 = d1 + d2
+        "vsub.s16        d6, d1, d2               \n" // a2 = d1 - d2
+        "vsub.s16        d7, d0, d3               \n" // a3 = d0 - d3
+
+        "vadd.s16        d0, d4, d5               \n" // a0 + a1
+        "vshl.s16        d0, d0, #3               \n" // temp[0+i*4] = (a0+a1) << 3
+        "vsub.s16        d2, d4, d5               \n" // a0 - a1
+        "vshl.s16        d2, d2, #3               \n" // (temp[2+i*4] = (a0-a1) << 3
+
+        "vmlal.s16       q9, d7, d16              \n" // a3*5352 + 1812
+        "vmlal.s16       q10, d7, d17             \n" // a3*2217 + 937
+        "vmlal.s16       q9, d6, d17              \n" // a2*2217 + a3*5352 + 1812
+        "vmlsl.s16       q10, d6, d16             \n" // a3*2217 + 937 - a2*5352
+
+        // temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
+        // temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
+        "vshrn.s32       d1, q9, #9               \n"
+        "vshrn.s32       d3, q10, #9              \n"
+
+        // part 2
+        // transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
+        "vtrn.32         d0, d2                   \n"
+        "vtrn.32         d1, d3                   \n"
+        "vtrn.16         d0, d1                   \n"
+        "vtrn.16         d2, d3                   \n"
+
+        "vmov.s16        d26, #7                  \n"
+
+        "vadd.s16        d4, d0, d3               \n" // a1 = ip[0] + ip[12]
+        "vadd.s16        d5, d1, d2               \n" // b1 = ip[4] + ip[8]
+        "vsub.s16        d6, d1, d2               \n" // c1 = ip[4] - ip[8]
+        "vadd.s16        d4, d4, d26              \n" // a1 + 7
+        "vsub.s16        d7, d0, d3               \n" // d1 = ip[0] - ip[12]
+
+        "vadd.s16        d0, d4, d5               \n" // op[0] = a1 + b1 + 7
+        "vsub.s16        d2, d4, d5               \n" // op[8] = a1 - b1 + 7
+
+        "vmlal.s16       q11, d7, d16             \n" // d1*5352 + 12000
+        "vmlal.s16       q12, d7, d17             \n" // d1*2217 + 51000
+
+        "vceq.s16        d4, d7, #0               \n"
+
+        "vshr.s16        d0, d0, #4               \n"
+        "vshr.s16        d2, d2, #4               \n"
+
+        "vmlal.s16       q11, d6, d17             \n" // c1*2217 + d1*5352 + 12000
+        "vmlsl.s16       q12, d6, d16             \n" // d1*2217 - c1*5352 + 51000
+
+        "vmvn            d4, d4                   \n" // !(d1 == 0)
+        // op[4] = (c1*2217 + d1*5352 + 12000)>>16
+        "vshrn.s32       d1, q11, #16             \n"
+        // op[4] += (d1!=0)
+        "vsub.s16        d1, d1, d4               \n"
+        // op[12]= (d1*2217 - c1*5352 + 51000)>>16
+        "vshrn.s32       d3, q12, #16             \n"
+
+        // set result to out array
+        "vst1.16         {q0, q1}, [%[out]]   \n"
+        : [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
+          [coeff32] "+r"(coeff32) // modified registers
+        : [kBPS] "r"(kBPS), [coeff16] "r"(coeff16),
+          [out] "r"(out)                                                                                   // constants
+        : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13" // clobbered
+        );
+}
+
+#endif
+
+#define LOAD_LANE_16b(VALUE, LANE)                     \
+    do {                                               \
+        (VALUE) = vld1_lane_s16(src, (VALUE), (LANE)); \
+        src += stride;                                 \
+    } while (0)
+
+static void FTransformWHT(const int16_t* src, int16_t* out) {
+    const int stride = 16;
+    const int16x4_t zero = vdup_n_s16(0);
+    int32x4x4_t tmp0;
+    int16x4x4_t in;
+    INIT_VECTOR4(in, zero, zero, zero, zero);
+    LOAD_LANE_16b(in.val[0], 0);
+    LOAD_LANE_16b(in.val[1], 0);
+    LOAD_LANE_16b(in.val[2], 0);
+    LOAD_LANE_16b(in.val[3], 0);
+    LOAD_LANE_16b(in.val[0], 1);
+    LOAD_LANE_16b(in.val[1], 1);
+    LOAD_LANE_16b(in.val[2], 1);
+    LOAD_LANE_16b(in.val[3], 1);
+    LOAD_LANE_16b(in.val[0], 2);
+    LOAD_LANE_16b(in.val[1], 2);
+    LOAD_LANE_16b(in.val[2], 2);
+    LOAD_LANE_16b(in.val[3], 2);
+    LOAD_LANE_16b(in.val[0], 3);
+    LOAD_LANE_16b(in.val[1], 3);
+    LOAD_LANE_16b(in.val[2], 3);
+    LOAD_LANE_16b(in.val[3], 3);
+
+    {
+        // a0 = in[0 * 16] + in[2 * 16]
+        // a1 = in[1 * 16] + in[3 * 16]
+        // a2 = in[1 * 16] - in[3 * 16]
+        // a3 = in[0 * 16] - in[2 * 16]
+        const int32x4_t a0 = vaddl_s16(in.val[0], in.val[2]);
+        const int32x4_t a1 = vaddl_s16(in.val[1], in.val[3]);
+        const int32x4_t a2 = vsubl_s16(in.val[1], in.val[3]);
+        const int32x4_t a3 = vsubl_s16(in.val[0], in.val[2]);
+        tmp0.val[0] = vaddq_s32(a0, a1);
+        tmp0.val[1] = vaddq_s32(a3, a2);
+        tmp0.val[2] = vsubq_s32(a3, a2);
+        tmp0.val[3] = vsubq_s32(a0, a1);
+    }
+    {
+        const int32x4x4_t tmp1 = Transpose4x4(tmp0);
+        // a0 = tmp[0 + i] + tmp[ 8 + i]
+        // a1 = tmp[4 + i] + tmp[12 + i]
+        // a2 = tmp[4 + i] - tmp[12 + i]
+        // a3 = tmp[0 + i] - tmp[ 8 + i]
+        const int32x4_t a0 = vaddq_s32(tmp1.val[0], tmp1.val[2]);
+        const int32x4_t a1 = vaddq_s32(tmp1.val[1], tmp1.val[3]);
+        const int32x4_t a2 = vsubq_s32(tmp1.val[1], tmp1.val[3]);
+        const int32x4_t a3 = vsubq_s32(tmp1.val[0], tmp1.val[2]);
+        const int32x4_t b0 = vhaddq_s32(a0, a1); // (a0 + a1) >> 1
+        const int32x4_t b1 = vhaddq_s32(a3, a2); // (a3 + a2) >> 1
+        const int32x4_t b2 = vhsubq_s32(a3, a2); // (a3 - a2) >> 1
+        const int32x4_t b3 = vhsubq_s32(a0, a1); // (a0 - a1) >> 1
+        const int16x4_t out0 = vmovn_s32(b0);
+        const int16x4_t out1 = vmovn_s32(b1);
+        const int16x4_t out2 = vmovn_s32(b2);
+        const int16x4_t out3 = vmovn_s32(b3);
+
+        vst1_s16(out + 0, out0);
+        vst1_s16(out + 4, out1);
+        vst1_s16(out + 8, out2);
+        vst1_s16(out + 12, out3);
+    }
+}
+#undef LOAD_LANE_16b
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// a 0123, b 0123
+// a 4567, b 4567
+// a 89ab, b 89ab
+// a cdef, b cdef
+//
+// transpose
+//
+// a 048c, b 048c
+// a 159d, b 159d
+// a 26ae, b 26ae
+// a 37bf, b 37bf
+//
+static WEBP_INLINE uint8x8x4_t DistoTranspose4x4U8(uint8x8x4_t d4_in) {
+    const uint8x8x2_t d2_tmp0 = vtrn_u8(d4_in.val[0], d4_in.val[1]);
+    const uint8x8x2_t d2_tmp1 = vtrn_u8(d4_in.val[2], d4_in.val[3]);
+    const uint16x4x2_t d2_tmp2 = vtrn_u16(vreinterpret_u16_u8(d2_tmp0.val[0]), vreinterpret_u16_u8(d2_tmp1.val[0]));
+    const uint16x4x2_t d2_tmp3 = vtrn_u16(vreinterpret_u16_u8(d2_tmp0.val[1]), vreinterpret_u16_u8(d2_tmp1.val[1]));
+
+    d4_in.val[0] = vreinterpret_u8_u16(d2_tmp2.val[0]);
+    d4_in.val[2] = vreinterpret_u8_u16(d2_tmp2.val[1]);
+    d4_in.val[1] = vreinterpret_u8_u16(d2_tmp3.val[0]);
+    d4_in.val[3] = vreinterpret_u8_u16(d2_tmp3.val[1]);
+    return d4_in;
+}
+
+static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
+    const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]);
+    const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]);
+    const int32x4x2_t q2_tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[0]), vreinterpretq_s32_s16(q2_tmp1.val[0]));
+    const int32x4x2_t q2_tmp3 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[1]), vreinterpretq_s32_s16(q2_tmp1.val[1]));
+    q4_in.val[0] = vreinterpretq_s16_s32(q2_tmp2.val[0]);
+    q4_in.val[2] = vreinterpretq_s16_s32(q2_tmp2.val[1]);
+    q4_in.val[1] = vreinterpretq_s16_s32(q2_tmp3.val[0]);
+    q4_in.val[3] = vreinterpretq_s16_s32(q2_tmp3.val[1]);
+    return q4_in;
+}
+
+static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const uint8x8x4_t d4_in) {
+    // {a0, a1} = {in[0] + in[2], in[1] + in[3]}
+    // {a3, a2} = {in[0] - in[2], in[1] - in[3]}
+    const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[0], d4_in.val[2]));
+    const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[1], d4_in.val[3]));
+    const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[0], d4_in.val[2]));
+    const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[1], d4_in.val[3]));
+    int16x8x4_t q4_out;
+    // tmp[0] = a0 + a1
+    // tmp[1] = a3 + a2
+    // tmp[2] = a3 - a2
+    // tmp[3] = a0 - a1
+    INIT_VECTOR4(q4_out, vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2), vsubq_s16(q_a3, q_a2), vsubq_s16(q_a0, q_a1));
+    return q4_out;
+}
+
+static WEBP_INLINE int16x8x4_t DistoVerticalPass(int16x8x4_t q4_in) {
+    const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
+    const int16x8_t q_a1 = vaddq_s16(q4_in.val[1], q4_in.val[3]);
+    const int16x8_t q_a2 = vsubq_s16(q4_in.val[1], q4_in.val[3]);
+    const int16x8_t q_a3 = vsubq_s16(q4_in.val[0], q4_in.val[2]);
+
+    q4_in.val[0] = vaddq_s16(q_a0, q_a1);
+    q4_in.val[1] = vaddq_s16(q_a3, q_a2);
+    q4_in.val[2] = vabdq_s16(q_a3, q_a2);
+    q4_in.val[3] = vabdq_s16(q_a0, q_a1);
+    q4_in.val[0] = vabsq_s16(q4_in.val[0]);
+    q4_in.val[1] = vabsq_s16(q4_in.val[1]);
+    return q4_in;
+}
+
+static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
+    const uint16x8_t q_w07 = vld1q_u16(&w[0]);
+    const uint16x8_t q_w8f = vld1q_u16(&w[8]);
+    int16x4x4_t d4_w;
+    INIT_VECTOR4(d4_w, vget_low_s16(vreinterpretq_s16_u16(q_w07)), vget_high_s16(vreinterpretq_s16_u16(q_w07)),
+                 vget_low_s16(vreinterpretq_s16_u16(q_w8f)), vget_high_s16(vreinterpretq_s16_u16(q_w8f)));
+    return d4_w;
+}
+
+static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in, const int16x4x4_t d4_w) {
+    int32x2_t d_sum;
+    // sum += w[ 0] * abs(b0);
+    // sum += w[ 4] * abs(b1);
+    // sum += w[ 8] * abs(b2);
+    // sum += w[12] * abs(b3);
+    int32x4_t q_sum0 = vmull_s16(d4_w.val[0], vget_low_s16(q4_in.val[0]));
+    int32x4_t q_sum1 = vmull_s16(d4_w.val[1], vget_low_s16(q4_in.val[1]));
+    int32x4_t q_sum2 = vmull_s16(d4_w.val[2], vget_low_s16(q4_in.val[2]));
+    int32x4_t q_sum3 = vmull_s16(d4_w.val[3], vget_low_s16(q4_in.val[3]));
+    q_sum0 = vmlsl_s16(q_sum0, d4_w.val[0], vget_high_s16(q4_in.val[0]));
+    q_sum1 = vmlsl_s16(q_sum1, d4_w.val[1], vget_high_s16(q4_in.val[1]));
+    q_sum2 = vmlsl_s16(q_sum2, d4_w.val[2], vget_high_s16(q4_in.val[2]));
+    q_sum3 = vmlsl_s16(q_sum3, d4_w.val[3], vget_high_s16(q4_in.val[3]));
+
+    q_sum0 = vaddq_s32(q_sum0, q_sum1);
+    q_sum2 = vaddq_s32(q_sum2, q_sum3);
+    q_sum2 = vaddq_s32(q_sum0, q_sum2);
+    d_sum = vpadd_s32(vget_low_s32(q_sum2), vget_high_s32(q_sum2));
+    d_sum = vpadd_s32(d_sum, d_sum);
+    return d_sum;
+}
+
+#define LOAD_LANE_32b(src, VALUE, LANE) (VALUE) = vld1_lane_u32((const uint32_t*)(src), (VALUE), (LANE))
+
+// Hadamard transform
+// Returns the weighted sum of the absolute value of transformed coefficients.
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
+    uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
+    uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
+    uint32x2_t d_in_ab_cdef = vdup_n_u32(0);
+    uint8x8x4_t d4_in;
+
+    // load data a, b
+    LOAD_LANE_32b(a + 0 * BPS, d_in_ab_0123, 0);
+    LOAD_LANE_32b(a + 1 * BPS, d_in_ab_4567, 0);
+    LOAD_LANE_32b(a + 2 * BPS, d_in_ab_89ab, 0);
+    LOAD_LANE_32b(a + 3 * BPS, d_in_ab_cdef, 0);
+    LOAD_LANE_32b(b + 0 * BPS, d_in_ab_0123, 1);
+    LOAD_LANE_32b(b + 1 * BPS, d_in_ab_4567, 1);
+    LOAD_LANE_32b(b + 2 * BPS, d_in_ab_89ab, 1);
+    LOAD_LANE_32b(b + 3 * BPS, d_in_ab_cdef, 1);
+    INIT_VECTOR4(d4_in, vreinterpret_u8_u32(d_in_ab_0123), vreinterpret_u8_u32(d_in_ab_4567),
+                 vreinterpret_u8_u32(d_in_ab_89ab), vreinterpret_u8_u32(d_in_ab_cdef));
+
+    {
+        // horizontal pass
+        const uint8x8x4_t d4_t = DistoTranspose4x4U8(d4_in);
+        const int16x8x4_t q4_h = DistoHorizontalPass(d4_t);
+        const int16x4x4_t d4_w = DistoLoadW(w);
+        // vertical pass
+        const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_h);
+        const int16x8x4_t q4_v = DistoVerticalPass(q4_t);
+        int32x2_t d_sum = DistoSum(q4_v, d4_w);
+
+        // abs(sum2 - sum1) >> 5
+        d_sum = vabs_s32(d_sum);
+        d_sum = vshr_n_s32(d_sum, 5);
+        return vget_lane_s32(d_sum, 0);
+    }
+}
+#undef LOAD_LANE_32b
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    int D = 0;
+    int x, y;
+    for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+        for (x = 0; x < 16; x += 4) {
+            D += Disto4x4(a + x + y, b + x + y, w);
+        }
+    }
+    return D;
+}
+
+//------------------------------------------------------------------------------
+
+static void CollectHistogram(
+    const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo) {
+    const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
+    int j;
+    int distribution[MAX_COEFF_THRESH + 1] = {0};
+    for (j = start_block; j < end_block; ++j) {
+        int16_t out[16];
+        FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+        {
+            int k;
+            const int16x8_t a0 = vld1q_s16(out + 0);
+            const int16x8_t b0 = vld1q_s16(out + 8);
+            const uint16x8_t a1 = vreinterpretq_u16_s16(vabsq_s16(a0));
+            const uint16x8_t b1 = vreinterpretq_u16_s16(vabsq_s16(b0));
+            const uint16x8_t a2 = vshrq_n_u16(a1, 3);
+            const uint16x8_t b2 = vshrq_n_u16(b1, 3);
+            const uint16x8_t a3 = vminq_u16(a2, max_coeff_thresh);
+            const uint16x8_t b3 = vminq_u16(b2, max_coeff_thresh);
+            vst1q_s16(out + 0, vreinterpretq_s16_u16(a3));
+            vst1q_s16(out + 8, vreinterpretq_s16_u16(b3));
+            // Convert coefficients to bin.
+            for (k = 0; k < 16; ++k) {
+                ++distribution[out[k]];
+            }
+        }
+    }
+    VP8SetHistogramData(distribution, histo);
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a, const uint8_t* const b, uint32x4_t* const sum) {
+    const uint8x16_t a0 = vld1q_u8(a);
+    const uint8x16_t b0 = vld1q_u8(b);
+    const uint8x16_t abs_diff = vabdq_u8(a0, b0);
+    uint16x8_t prod = vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
+    prod = vmlal_u8(prod, vget_high_u8(abs_diff), vget_high_u8(abs_diff));
+    *sum = vpadalq_u16(*sum, prod); // pair-wise add and accumulate
+}
+
+// Horizontal sum of all four uint32_t values in 'sum'.
+static int SumToInt(uint32x4_t sum) {
+    const uint64x2_t sum2 = vpaddlq_u32(sum);
+    const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
+    return (int)sum3;
+}
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+    uint32x4_t sum = vdupq_n_u32(0);
+    int y;
+    for (y = 0; y < 16; ++y) {
+        AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
+    }
+    return SumToInt(sum);
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+    uint32x4_t sum = vdupq_n_u32(0);
+    int y;
+    for (y = 0; y < 8; ++y) {
+        AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
+    }
+    return SumToInt(sum);
+}
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+    uint32x4_t sum = vdupq_n_u32(0);
+    int y;
+    for (y = 0; y < 8; ++y) {
+        const uint8x8_t a0 = vld1_u8(a + y * BPS);
+        const uint8x8_t b0 = vld1_u8(b + y * BPS);
+        const uint8x8_t abs_diff = vabd_u8(a0, b0);
+        const uint16x8_t prod = vmull_u8(abs_diff, abs_diff);
+        sum = vpadalq_u16(sum, prod);
+    }
+    return SumToInt(sum);
+}
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+    const uint8x16_t a0 = Load4x4(a);
+    const uint8x16_t b0 = Load4x4(b);
+    const uint8x16_t abs_diff = vabdq_u8(a0, b0);
+    uint16x8_t prod = vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
+    prod = vmlal_u8(prod, vget_high_u8(abs_diff), vget_high_u8(abs_diff));
+    return SumToInt(vpaddlq_u16(prod));
+}
+
+//------------------------------------------------------------------------------
+
+// Compilation with gcc-4.6.x is problematic for now.
+#if !defined(WORK_AROUND_GCC)
+
+static int16x8_t Quantize(int16_t* const in, const VP8Matrix* const mtx, int offset) {
+    const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
+    const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
+    const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
+    const uint32x4_t bias0 = vld1q_u32(&mtx->bias_[offset + 0]);
+    const uint32x4_t bias1 = vld1q_u32(&mtx->bias_[offset + 4]);
+
+    const int16x8_t a = vld1q_s16(in + offset);               // in
+    const uint16x8_t b = vreinterpretq_u16_s16(vabsq_s16(a)); // coeff = abs(in)
+    const int16x8_t sign = vshrq_n_s16(a, 15);                // sign
+    const uint16x8_t c = vaddq_u16(b, sharp);                 // + sharpen
+    const uint32x4_t m0 = vmull_u16(vget_low_u16(c), vget_low_u16(iq));
+    const uint32x4_t m1 = vmull_u16(vget_high_u16(c), vget_high_u16(iq));
+    const uint32x4_t m2 = vhaddq_u32(m0, bias0);
+    const uint32x4_t m3 = vhaddq_u32(m1, bias1);                                  // (coeff * iQ + bias) >> 1
+    const uint16x8_t c0 = vcombine_u16(vshrn_n_u32(m2, 16), vshrn_n_u32(m3, 16)); // QFIX=17 = 16+1
+    const uint16x8_t c1 = vminq_u16(c0, vdupq_n_u16(MAX_LEVEL));
+    const int16x8_t c2 = veorq_s16(vreinterpretq_s16_u16(c1), sign);
+    const int16x8_t c3 = vsubq_s16(c2, sign); // restore sign
+    const int16x8_t c4 = vmulq_s16(c3, vreinterpretq_s16_u16(q));
+    vst1q_s16(in + offset, c4);
+    assert(QFIX == 17); // this function can't work as is if QFIX != 16+1
+    return c3;
+}
+
+static const uint8_t kShuffles[4][8] = {{0, 1, 2, 3, 8, 9, 16, 17},
+                                        {10, 11, 4, 5, 6, 7, 12, 13},
+                                        {18, 19, 24, 25, 26, 27, 20, 21},
+                                        {14, 15, 22, 23, 28, 29, 30, 31}};
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) {
+    const int16x8_t out0 = Quantize(in, mtx, 0);
+    const int16x8_t out1 = Quantize(in, mtx, 8);
+    uint8x8x4_t shuffles;
+// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
+// non-standard versions there.
+#if defined(__APPLE__) && defined(__aarch64__) && defined(__apple_build_version__) && \
+    (__apple_build_version__ < 6020037)
+    uint8x16x2_t all_out;
+    INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
+    INIT_VECTOR4(shuffles, vtbl2q_u8(all_out, vld1_u8(kShuffles[0])), vtbl2q_u8(all_out, vld1_u8(kShuffles[1])),
+                 vtbl2q_u8(all_out, vld1_u8(kShuffles[2])), vtbl2q_u8(all_out, vld1_u8(kShuffles[3])));
+#else
+    uint8x8x4_t all_out;
+    INIT_VECTOR4(all_out, vreinterpret_u8_s16(vget_low_s16(out0)), vreinterpret_u8_s16(vget_high_s16(out0)),
+                 vreinterpret_u8_s16(vget_low_s16(out1)), vreinterpret_u8_s16(vget_high_s16(out1)));
+    INIT_VECTOR4(shuffles, vtbl4_u8(all_out, vld1_u8(kShuffles[0])), vtbl4_u8(all_out, vld1_u8(kShuffles[1])),
+                 vtbl4_u8(all_out, vld1_u8(kShuffles[2])), vtbl4_u8(all_out, vld1_u8(kShuffles[3])));
+#endif
+    // Zigzag reordering
+    vst1_u8((uint8_t*)(out + 0), shuffles.val[0]);
+    vst1_u8((uint8_t*)(out + 4), shuffles.val[1]);
+    vst1_u8((uint8_t*)(out + 8), shuffles.val[2]);
+    vst1_u8((uint8_t*)(out + 12), shuffles.val[3]);
+    // test zeros
+    if (*(uint64_t*)(out + 0) != 0) return 1;
+    if (*(uint64_t*)(out + 4) != 0) return 1;
+    if (*(uint64_t*)(out + 8) != 0) return 1;
+    if (*(uint64_t*)(out + 12) != 0) return 1;
+    return 0;
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32], const VP8Matrix* const mtx) {
+    int nz;
+    nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+    nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+    return nz;
+}
+
+#endif // !WORK_AROUND_GCC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
+    VP8ITransform = ITransform;
+    VP8FTransform = FTransform;
+
+    VP8FTransformWHT = FTransformWHT;
+
+    VP8TDisto4x4 = Disto4x4;
+    VP8TDisto16x16 = Disto16x16;
+    VP8CollectHistogram = CollectHistogram;
+    VP8SSE16x16 = SSE16x16;
+    VP8SSE16x8 = SSE16x8;
+    VP8SSE8x8 = SSE8x8;
+    VP8SSE4x4 = SSE4x4;
+#if !defined(WORK_AROUND_GCC)
+    VP8EncQuantizeBlock = QuantizeBlock;
+    VP8EncQuantize2Blocks = Quantize2Blocks;
+#endif
+}
+
+#else // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitNEON)
+
+#endif // WEBP_USE_NEON
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/enc_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/enc_sse2.c
new file mode 100644
index 0000000000..1c7c53b1db
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/enc_sse2.c
@@ -0,0 +1,1439 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of speed-critical encoding functions.
+//
+// Author: Christian Duvivier (cduvivier@google.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <stdlib.h> // for abs()
+#include <emmintrin.h>
+
+#include "../enc/cost.h"
+#include "../enc/vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Quite useful macro for debugging. Left here for convenience.
+
+#if 0
+#include <stdio.h>
+static void PrintReg(const __m128i r, const char* const name, int size) {
+  int n;
+  union {
+    __m128i r;
+    uint8_t i8[16];
+    uint16_t i16[8];
+    uint32_t i32[4];
+    uint64_t i64[2];
+  } tmp;
+  tmp.r = r;
+  fprintf(stderr, "%s\t: ", name);
+  if (size == 8) {
+    for (n = 0; n < 16; ++n) fprintf(stderr, "%.2x ", tmp.i8[n]);
+  } else if (size == 16) {
+    for (n = 0; n < 8; ++n) fprintf(stderr, "%.4x ", tmp.i16[n]);
+  } else if (size == 32) {
+    for (n = 0; n < 4; ++n) fprintf(stderr, "%.8x ", tmp.i32[n]);
+  } else {
+    for (n = 0; n < 2; ++n) fprintf(stderr, "%.16lx ", tmp.i64[n]);
+  }
+  fprintf(stderr, "\n");
+}
+#endif
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+// Does one or two inverse transforms.
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, int do_two) {
+    // This implementation makes use of 16-bit fixed point versions of two
+    // multiply constants:
+    //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
+    //    K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
+    //
+    // To be able to use signed 16-bit integers, we use the following trick to
+    // have constants within range:
+    // - Associated constants are obtained by subtracting the 16-bit fixed point
+    //   version of one:
+    //      k = K - (1 << 16)  =>  K = k + (1 << 16)
+    //      K1 = 85267  =>  k1 =  20091
+    //      K2 = 35468  =>  k2 = -30068
+    // - The multiplication of a variable by a constant become the sum of the
+    //   variable and the multiplication of that variable by the associated
+    //   constant:
+    //      (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
+    const __m128i k1 = _mm_set1_epi16(20091);
+    const __m128i k2 = _mm_set1_epi16(-30068);
+    __m128i T0, T1, T2, T3;
+
+    // Load and concatenate the transform coefficients (we'll do two inverse
+    // transforms in parallel). In the case of only one inverse transform, the
+    // second half of the vectors will just contain random value we'll never
+    // use nor store.
+    __m128i in0, in1, in2, in3;
+    {
+        in0 = _mm_loadl_epi64((const __m128i*)&in[0]);
+        in1 = _mm_loadl_epi64((const __m128i*)&in[4]);
+        in2 = _mm_loadl_epi64((const __m128i*)&in[8]);
+        in3 = _mm_loadl_epi64((const __m128i*)&in[12]);
+        // a00 a10 a20 a30   x x x x
+        // a01 a11 a21 a31   x x x x
+        // a02 a12 a22 a32   x x x x
+        // a03 a13 a23 a33   x x x x
+        if (do_two) {
+            const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]);
+            const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]);
+            const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]);
+            const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]);
+            in0 = _mm_unpacklo_epi64(in0, inB0);
+            in1 = _mm_unpacklo_epi64(in1, inB1);
+            in2 = _mm_unpacklo_epi64(in2, inB2);
+            in3 = _mm_unpacklo_epi64(in3, inB3);
+            // a00 a10 a20 a30   b00 b10 b20 b30
+            // a01 a11 a21 a31   b01 b11 b21 b31
+            // a02 a12 a22 a32   b02 b12 b22 b32
+            // a03 a13 a23 a33   b03 b13 b23 b33
+        }
+    }
+
+    // Vertical pass and subsequent transpose.
+    {
+        // First pass, c and d calculations are longer because of the "trick"
+        // multiplications.
+        const __m128i a = _mm_add_epi16(in0, in2);
+        const __m128i b = _mm_sub_epi16(in0, in2);
+        // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
+        const __m128i c1 = _mm_mulhi_epi16(in1, k2);
+        const __m128i c2 = _mm_mulhi_epi16(in3, k1);
+        const __m128i c3 = _mm_sub_epi16(in1, in3);
+        const __m128i c4 = _mm_sub_epi16(c1, c2);
+        const __m128i c = _mm_add_epi16(c3, c4);
+        // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
+        const __m128i d1 = _mm_mulhi_epi16(in1, k1);
+        const __m128i d2 = _mm_mulhi_epi16(in3, k2);
+        const __m128i d3 = _mm_add_epi16(in1, in3);
+        const __m128i d4 = _mm_add_epi16(d1, d2);
+        const __m128i d = _mm_add_epi16(d3, d4);
+
+        // Second pass.
+        const __m128i tmp0 = _mm_add_epi16(a, d);
+        const __m128i tmp1 = _mm_add_epi16(b, c);
+        const __m128i tmp2 = _mm_sub_epi16(b, c);
+        const __m128i tmp3 = _mm_sub_epi16(a, d);
+
+        // Transpose the two 4x4.
+        // a00 a01 a02 a03   b00 b01 b02 b03
+        // a10 a11 a12 a13   b10 b11 b12 b13
+        // a20 a21 a22 a23   b20 b21 b22 b23
+        // a30 a31 a32 a33   b30 b31 b32 b33
+        const __m128i transpose0_0 = _mm_unpacklo_epi16(tmp0, tmp1);
+        const __m128i transpose0_1 = _mm_unpacklo_epi16(tmp2, tmp3);
+        const __m128i transpose0_2 = _mm_unpackhi_epi16(tmp0, tmp1);
+        const __m128i transpose0_3 = _mm_unpackhi_epi16(tmp2, tmp3);
+        // a00 a10 a01 a11   a02 a12 a03 a13
+        // a20 a30 a21 a31   a22 a32 a23 a33
+        // b00 b10 b01 b11   b02 b12 b03 b13
+        // b20 b30 b21 b31   b22 b32 b23 b33
+        const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+        const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+        // a00 a10 a20 a30 a01 a11 a21 a31
+        // b00 b10 b20 b30 b01 b11 b21 b31
+        // a02 a12 a22 a32 a03 a13 a23 a33
+        // b02 b12 a22 b32 b03 b13 b23 b33
+        T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+        T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+        T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+        T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+        // a00 a10 a20 a30   b00 b10 b20 b30
+        // a01 a11 a21 a31   b01 b11 b21 b31
+        // a02 a12 a22 a32   b02 b12 b22 b32
+        // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+
+    // Horizontal pass and subsequent transpose.
+    {
+        // First pass, c and d calculations are longer because of the "trick"
+        // multiplications.
+        const __m128i four = _mm_set1_epi16(4);
+        const __m128i dc = _mm_add_epi16(T0, four);
+        const __m128i a = _mm_add_epi16(dc, T2);
+        const __m128i b = _mm_sub_epi16(dc, T2);
+        // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
+        const __m128i c1 = _mm_mulhi_epi16(T1, k2);
+        const __m128i c2 = _mm_mulhi_epi16(T3, k1);
+        const __m128i c3 = _mm_sub_epi16(T1, T3);
+        const __m128i c4 = _mm_sub_epi16(c1, c2);
+        const __m128i c = _mm_add_epi16(c3, c4);
+        // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
+        const __m128i d1 = _mm_mulhi_epi16(T1, k1);
+        const __m128i d2 = _mm_mulhi_epi16(T3, k2);
+        const __m128i d3 = _mm_add_epi16(T1, T3);
+        const __m128i d4 = _mm_add_epi16(d1, d2);
+        const __m128i d = _mm_add_epi16(d3, d4);
+
+        // Second pass.
+        const __m128i tmp0 = _mm_add_epi16(a, d);
+        const __m128i tmp1 = _mm_add_epi16(b, c);
+        const __m128i tmp2 = _mm_sub_epi16(b, c);
+        const __m128i tmp3 = _mm_sub_epi16(a, d);
+        const __m128i shifted0 = _mm_srai_epi16(tmp0, 3);
+        const __m128i shifted1 = _mm_srai_epi16(tmp1, 3);
+        const __m128i shifted2 = _mm_srai_epi16(tmp2, 3);
+        const __m128i shifted3 = _mm_srai_epi16(tmp3, 3);
+
+        // Transpose the two 4x4.
+        // a00 a01 a02 a03   b00 b01 b02 b03
+        // a10 a11 a12 a13   b10 b11 b12 b13
+        // a20 a21 a22 a23   b20 b21 b22 b23
+        // a30 a31 a32 a33   b30 b31 b32 b33
+        const __m128i transpose0_0 = _mm_unpacklo_epi16(shifted0, shifted1);
+        const __m128i transpose0_1 = _mm_unpacklo_epi16(shifted2, shifted3);
+        const __m128i transpose0_2 = _mm_unpackhi_epi16(shifted0, shifted1);
+        const __m128i transpose0_3 = _mm_unpackhi_epi16(shifted2, shifted3);
+        // a00 a10 a01 a11   a02 a12 a03 a13
+        // a20 a30 a21 a31   a22 a32 a23 a33
+        // b00 b10 b01 b11   b02 b12 b03 b13
+        // b20 b30 b21 b31   b22 b32 b23 b33
+        const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+        const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+        // a00 a10 a20 a30 a01 a11 a21 a31
+        // b00 b10 b20 b30 b01 b11 b21 b31
+        // a02 a12 a22 a32 a03 a13 a23 a33
+        // b02 b12 a22 b32 b03 b13 b23 b33
+        T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+        T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+        T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+        T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+        // a00 a10 a20 a30   b00 b10 b20 b30
+        // a01 a11 a21 a31   b01 b11 b21 b31
+        // a02 a12 a22 a32   b02 b12 b22 b32
+        // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+
+    // Add inverse transform to 'ref' and store.
+    {
+        const __m128i zero = _mm_setzero_si128();
+        // Load the reference(s).
+        __m128i ref0, ref1, ref2, ref3;
+        if (do_two) {
+            // Load eight bytes/pixels per line.
+            ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
+            ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
+            ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
+            ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
+        } else {
+            // Load four bytes/pixels per line.
+            ref0 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[0 * BPS]));
+            ref1 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[1 * BPS]));
+            ref2 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[2 * BPS]));
+            ref3 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[3 * BPS]));
+        }
+        // Convert to 16b.
+        ref0 = _mm_unpacklo_epi8(ref0, zero);
+        ref1 = _mm_unpacklo_epi8(ref1, zero);
+        ref2 = _mm_unpacklo_epi8(ref2, zero);
+        ref3 = _mm_unpacklo_epi8(ref3, zero);
+        // Add the inverse transform(s).
+        ref0 = _mm_add_epi16(ref0, T0);
+        ref1 = _mm_add_epi16(ref1, T1);
+        ref2 = _mm_add_epi16(ref2, T2);
+        ref3 = _mm_add_epi16(ref3, T3);
+        // Unsigned saturate to 8b.
+        ref0 = _mm_packus_epi16(ref0, ref0);
+        ref1 = _mm_packus_epi16(ref1, ref1);
+        ref2 = _mm_packus_epi16(ref2, ref2);
+        ref3 = _mm_packus_epi16(ref3, ref3);
+        // Store the results.
+        if (do_two) {
+            // Store eight bytes/pixels per line.
+            _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
+            _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
+            _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
+            _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
+        } else {
+            // Store four bytes/pixels per line.
+            WebPUint32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0));
+            WebPUint32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1));
+            WebPUint32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2));
+            WebPUint32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3));
+        }
+    }
+}
+
+static void FTransformPass1(const __m128i* const in01,
+                            const __m128i* const in23,
+                            __m128i* const out01,
+                            __m128i* const out32) {
+    const __m128i k937 = _mm_set1_epi32(937);
+    const __m128i k1812 = _mm_set1_epi32(1812);
+
+    const __m128i k88p = _mm_set_epi16(8, 8, 8, 8, 8, 8, 8, 8);
+    const __m128i k88m = _mm_set_epi16(-8, 8, -8, 8, -8, 8, -8, 8);
+    const __m128i k5352_2217p = _mm_set_epi16(2217, 5352, 2217, 5352, 2217, 5352, 2217, 5352);
+    const __m128i k5352_2217m = _mm_set_epi16(-5352, 2217, -5352, 2217, -5352, 2217, -5352, 2217);
+
+    // *in01 = 00 01 10 11 02 03 12 13
+    // *in23 = 20 21 30 31 22 23 32 33
+    const __m128i shuf01_p = _mm_shufflehi_epi16(*in01, _MM_SHUFFLE(2, 3, 0, 1));
+    const __m128i shuf23_p = _mm_shufflehi_epi16(*in23, _MM_SHUFFLE(2, 3, 0, 1));
+    // 00 01 10 11 03 02 13 12
+    // 20 21 30 31 23 22 33 32
+    const __m128i s01 = _mm_unpacklo_epi64(shuf01_p, shuf23_p);
+    const __m128i s32 = _mm_unpackhi_epi64(shuf01_p, shuf23_p);
+    // 00 01 10 11 20 21 30 31
+    // 03 02 13 12 23 22 33 32
+    const __m128i a01 = _mm_add_epi16(s01, s32);
+    const __m128i a32 = _mm_sub_epi16(s01, s32);
+    // [d0 + d3 | d1 + d2 | ...] = [a0 a1 | a0' a1' | ... ]
+    // [d0 - d3 | d1 - d2 | ...] = [a3 a2 | a3' a2' | ... ]
+
+    const __m128i tmp0 = _mm_madd_epi16(a01, k88p); // [ (a0 + a1) << 3, ... ]
+    const __m128i tmp2 = _mm_madd_epi16(a01, k88m); // [ (a0 - a1) << 3, ... ]
+    const __m128i tmp1_1 = _mm_madd_epi16(a32, k5352_2217p);
+    const __m128i tmp3_1 = _mm_madd_epi16(a32, k5352_2217m);
+    const __m128i tmp1_2 = _mm_add_epi32(tmp1_1, k1812);
+    const __m128i tmp3_2 = _mm_add_epi32(tmp3_1, k937);
+    const __m128i tmp1 = _mm_srai_epi32(tmp1_2, 9);
+    const __m128i tmp3 = _mm_srai_epi32(tmp3_2, 9);
+    const __m128i s03 = _mm_packs_epi32(tmp0, tmp2);
+    const __m128i s12 = _mm_packs_epi32(tmp1, tmp3);
+    const __m128i s_lo = _mm_unpacklo_epi16(s03, s12); // 0 1 0 1 0 1...
+    const __m128i s_hi = _mm_unpackhi_epi16(s03, s12); // 2 3 2 3 2 3
+    const __m128i v23 = _mm_unpackhi_epi32(s_lo, s_hi);
+    *out01 = _mm_unpacklo_epi32(s_lo, s_hi);
+    *out32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2)); // 3 2 3 2 3 2..
+}
+
+static void FTransformPass2(const __m128i* const v01, const __m128i* const v32, int16_t* out) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i seven = _mm_set1_epi16(7);
+    const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217);
+    const __m128i k2217_5352 = _mm_set_epi16(2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352);
+    const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
+    const __m128i k51000 = _mm_set1_epi32(51000);
+
+    // Same operations are done on the (0,3) and (1,2) pairs.
+    // a0 = v0 + v3
+    // a1 = v1 + v2
+    // a3 = v0 - v3
+    // a2 = v1 - v2
+    const __m128i a01 = _mm_add_epi16(*v01, *v32);
+    const __m128i a32 = _mm_sub_epi16(*v01, *v32);
+    const __m128i a11 = _mm_unpackhi_epi64(a01, a01);
+    const __m128i a22 = _mm_unpackhi_epi64(a32, a32);
+    const __m128i a01_plus_7 = _mm_add_epi16(a01, seven);
+
+    // d0 = (a0 + a1 + 7) >> 4;
+    // d2 = (a0 - a1 + 7) >> 4;
+    const __m128i c0 = _mm_add_epi16(a01_plus_7, a11);
+    const __m128i c2 = _mm_sub_epi16(a01_plus_7, a11);
+    const __m128i d0 = _mm_srai_epi16(c0, 4);
+    const __m128i d2 = _mm_srai_epi16(c2, 4);
+
+    // f1 = ((b3 * 5352 + b2 * 2217 + 12000) >> 16)
+    // f3 = ((b3 * 2217 - b2 * 5352 + 51000) >> 16)
+    const __m128i b23 = _mm_unpacklo_epi16(a22, a32);
+    const __m128i c1 = _mm_madd_epi16(b23, k5352_2217);
+    const __m128i c3 = _mm_madd_epi16(b23, k2217_5352);
+    const __m128i d1 = _mm_add_epi32(c1, k12000_plus_one);
+    const __m128i d3 = _mm_add_epi32(c3, k51000);
+    const __m128i e1 = _mm_srai_epi32(d1, 16);
+    const __m128i e3 = _mm_srai_epi32(d3, 16);
+    const __m128i f1 = _mm_packs_epi32(e1, e1);
+    const __m128i f3 = _mm_packs_epi32(e3, e3);
+    // f1 = f1 + (a3 != 0);
+    // The compare will return (0xffff, 0) for (==0, !=0). To turn that into the
+    // desired (0, 1), we add one earlier through k12000_plus_one.
+    // -> f1 = f1 + 1 - (a3 == 0)
+    const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero));
+
+    const __m128i d0_g1 = _mm_unpacklo_epi64(d0, g1);
+    const __m128i d2_f3 = _mm_unpacklo_epi64(d2, f3);
+    _mm_storeu_si128((__m128i*)&out[0], d0_g1);
+    _mm_storeu_si128((__m128i*)&out[8], d2_f3);
+}
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+    const __m128i zero = _mm_setzero_si128();
+
+    // Load src and convert to 16b.
+    const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
+    const __m128i src1 = _mm_loadl_epi64((const __m128i*)&src[1 * BPS]);
+    const __m128i src2 = _mm_loadl_epi64((const __m128i*)&src[2 * BPS]);
+    const __m128i src3 = _mm_loadl_epi64((const __m128i*)&src[3 * BPS]);
+    const __m128i src_0 = _mm_unpacklo_epi8(src0, zero);
+    const __m128i src_1 = _mm_unpacklo_epi8(src1, zero);
+    const __m128i src_2 = _mm_unpacklo_epi8(src2, zero);
+    const __m128i src_3 = _mm_unpacklo_epi8(src3, zero);
+    // Load ref and convert to 16b.
+    const __m128i ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
+    const __m128i ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
+    const __m128i ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
+    const __m128i ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
+    const __m128i ref_0 = _mm_unpacklo_epi8(ref0, zero);
+    const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
+    const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
+    const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
+    // Compute difference. -> 00 01 02 03 00 00 00 00
+    const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
+    const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
+    const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
+    const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
+
+    // Unpack and shuffle
+    // 00 01 02 03   0 0 0 0
+    // 10 11 12 13   0 0 0 0
+    // 20 21 22 23   0 0 0 0
+    // 30 31 32 33   0 0 0 0
+    const __m128i shuf01 = _mm_unpacklo_epi32(diff0, diff1);
+    const __m128i shuf23 = _mm_unpacklo_epi32(diff2, diff3);
+    __m128i v01, v32;
+
+    // First pass
+    FTransformPass1(&shuf01, &shuf23, &v01, &v32);
+
+    // Second pass
+    FTransformPass2(&v01, &v32, out);
+}
+
+static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+    const __m128i zero = _mm_setzero_si128();
+
+    // Load src and convert to 16b.
+    const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
+    const __m128i src1 = _mm_loadl_epi64((const __m128i*)&src[1 * BPS]);
+    const __m128i src2 = _mm_loadl_epi64((const __m128i*)&src[2 * BPS]);
+    const __m128i src3 = _mm_loadl_epi64((const __m128i*)&src[3 * BPS]);
+    const __m128i src_0 = _mm_unpacklo_epi8(src0, zero);
+    const __m128i src_1 = _mm_unpacklo_epi8(src1, zero);
+    const __m128i src_2 = _mm_unpacklo_epi8(src2, zero);
+    const __m128i src_3 = _mm_unpacklo_epi8(src3, zero);
+    // Load ref and convert to 16b.
+    const __m128i ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
+    const __m128i ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
+    const __m128i ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
+    const __m128i ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
+    const __m128i ref_0 = _mm_unpacklo_epi8(ref0, zero);
+    const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
+    const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
+    const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
+    // Compute difference. -> 00 01 02 03  00' 01' 02' 03'
+    const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
+    const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
+    const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
+    const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
+
+    // Unpack and shuffle
+    // 00 01 02 03   0 0 0 0
+    // 10 11 12 13   0 0 0 0
+    // 20 21 22 23   0 0 0 0
+    // 30 31 32 33   0 0 0 0
+    const __m128i shuf01l = _mm_unpacklo_epi32(diff0, diff1);
+    const __m128i shuf23l = _mm_unpacklo_epi32(diff2, diff3);
+    const __m128i shuf01h = _mm_unpackhi_epi32(diff0, diff1);
+    const __m128i shuf23h = _mm_unpackhi_epi32(diff2, diff3);
+    __m128i v01l, v32l;
+    __m128i v01h, v32h;
+
+    // First pass
+    FTransformPass1(&shuf01l, &shuf23l, &v01l, &v32l);
+    FTransformPass1(&shuf01h, &shuf23h, &v01h, &v32h);
+
+    // Second pass
+    FTransformPass2(&v01l, &v32l, out + 0);
+    FTransformPass2(&v01h, &v32h, out + 16);
+}
+
+static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
+    const __m128i kMult1 = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
+    const __m128i kMult2 = _mm_set_epi16(0, 0, 0, 0, -1, 1, -1, 1);
+    const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
+    const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
+    const __m128i src2 = _mm_loadl_epi64((__m128i*)&in[2 * 16]);
+    const __m128i src3 = _mm_loadl_epi64((__m128i*)&in[3 * 16]);
+    const __m128i A01 = _mm_unpacklo_epi16(src0, src1); // A0 A1 | ...
+    const __m128i A23 = _mm_unpacklo_epi16(src2, src3); // A2 A3 | ...
+    const __m128i B0 = _mm_adds_epi16(A01, A23);        // a0 | a1 | ...
+    const __m128i B1 = _mm_subs_epi16(A01, A23);        // a3 | a2 | ...
+    const __m128i C0 = _mm_unpacklo_epi32(B0, B1);      // a0 | a1 | a3 | a2
+    const __m128i C1 = _mm_unpacklo_epi32(B1, B0);      // a3 | a2 | a0 | a1
+    const __m128i D0 = _mm_madd_epi16(C0, kMult1);      // out0, out1
+    const __m128i D1 = _mm_madd_epi16(C1, kMult2);      // out2, out3
+    *out = _mm_unpacklo_epi64(D0, D1);
+}
+
+static void FTransformWHT(const int16_t* in, int16_t* out) {
+    __m128i row0, row1, row2, row3;
+    FTransformWHTRow(in + 0 * 64, &row0);
+    FTransformWHTRow(in + 1 * 64, &row1);
+    FTransformWHTRow(in + 2 * 64, &row2);
+    FTransformWHTRow(in + 3 * 64, &row3);
+
+    {
+        const __m128i a0 = _mm_add_epi32(row0, row2);
+        const __m128i a1 = _mm_add_epi32(row1, row3);
+        const __m128i a2 = _mm_sub_epi32(row1, row3);
+        const __m128i a3 = _mm_sub_epi32(row0, row2);
+        const __m128i b0 = _mm_srai_epi32(_mm_add_epi32(a0, a1), 1);
+        const __m128i b1 = _mm_srai_epi32(_mm_add_epi32(a3, a2), 1);
+        const __m128i b2 = _mm_srai_epi32(_mm_sub_epi32(a3, a2), 1);
+        const __m128i b3 = _mm_srai_epi32(_mm_sub_epi32(a0, a1), 1);
+        const __m128i out0 = _mm_packs_epi32(b0, b1);
+        const __m128i out1 = _mm_packs_epi32(b2, b3);
+        _mm_storeu_si128((__m128i*)&out[0], out0);
+        _mm_storeu_si128((__m128i*)&out[8], out1);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+static void CollectHistogram(
+    const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
+    int j;
+    int distribution[MAX_COEFF_THRESH + 1] = {0};
+    for (j = start_block; j < end_block; ++j) {
+        int16_t out[16];
+        int k;
+
+        FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+        // Convert coefficients to bin (within out[]).
+        {
+            // Load.
+            const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
+            const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
+            const __m128i d0 = _mm_sub_epi16(zero, out0);
+            const __m128i d1 = _mm_sub_epi16(zero, out1);
+            const __m128i abs0 = _mm_max_epi16(out0, d0); // abs(v), 16b
+            const __m128i abs1 = _mm_max_epi16(out1, d1);
+            // v = abs(out) >> 3
+            const __m128i v0 = _mm_srai_epi16(abs0, 3);
+            const __m128i v1 = _mm_srai_epi16(abs1, 3);
+            // bin = min(v, MAX_COEFF_THRESH)
+            const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
+            const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
+            // Store.
+            _mm_storeu_si128((__m128i*)&out[0], bin0);
+            _mm_storeu_si128((__m128i*)&out[8], bin1);
+        }
+
+        // Convert coefficients to bin.
+        for (k = 0; k < 16; ++k) {
+            ++distribution[out[k]];
+        }
+    }
+    VP8SetHistogramData(distribution, histo);
+}
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+// helper for chroma-DC predictions
+static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
+    int j;
+    const __m128i values = _mm_set1_epi8(v);
+    for (j = 0; j < 8; ++j) {
+        _mm_storel_epi64((__m128i*)(dst + j * BPS), values);
+    }
+}
+
+static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
+    int j;
+    const __m128i values = _mm_set1_epi8(v);
+    for (j = 0; j < 16; ++j) {
+        _mm_store_si128((__m128i*)(dst + j * BPS), values);
+    }
+}
+
+static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
+    if (size == 4) {
+        int j;
+        for (j = 0; j < 4; ++j) {
+            memset(dst + j * BPS, value, 4);
+        }
+    } else if (size == 8) {
+        Put8x8uv(value, dst);
+    } else {
+        Put16(value, dst);
+    }
+}
+
+static WEBP_INLINE void VE8uv(uint8_t* dst, const uint8_t* top) {
+    int j;
+    const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+    for (j = 0; j < 8; ++j) {
+        _mm_storel_epi64((__m128i*)(dst + j * BPS), top_values);
+    }
+}
+
+static WEBP_INLINE void VE16(uint8_t* dst, const uint8_t* top) {
+    const __m128i top_values = _mm_load_si128((const __m128i*)top);
+    int j;
+    for (j = 0; j < 16; ++j) {
+        _mm_store_si128((__m128i*)(dst + j * BPS), top_values);
+    }
+}
+
+static WEBP_INLINE void VerticalPred(uint8_t* dst, const uint8_t* top, int size) {
+    if (top != NULL) {
+        if (size == 8) {
+            VE8uv(dst, top);
+        } else {
+            VE16(dst, top);
+        }
+    } else {
+        Fill(dst, 127, size);
+    }
+}
+
+static WEBP_INLINE void HE8uv(uint8_t* dst, const uint8_t* left) {
+    int j;
+    for (j = 0; j < 8; ++j) {
+        const __m128i values = _mm_set1_epi8(left[j]);
+        _mm_storel_epi64((__m128i*)dst, values);
+        dst += BPS;
+    }
+}
+
+static WEBP_INLINE void HE16(uint8_t* dst, const uint8_t* left) {
+    int j;
+    for (j = 0; j < 16; ++j) {
+        const __m128i values = _mm_set1_epi8(left[j]);
+        _mm_store_si128((__m128i*)dst, values);
+        dst += BPS;
+    }
+}
+
+static WEBP_INLINE void HorizontalPred(uint8_t* dst, const uint8_t* left, int size) {
+    if (left != NULL) {
+        if (size == 8) {
+            HE8uv(dst, left);
+        } else {
+            HE16(dst, left);
+        }
+    } else {
+        Fill(dst, 129, size);
+    }
+}
+
+static WEBP_INLINE void TM(uint8_t* dst, const uint8_t* left, const uint8_t* top, int size) {
+    const __m128i zero = _mm_setzero_si128();
+    int y;
+    if (size == 8) {
+        const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+        const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
+        for (y = 0; y < 8; ++y, dst += BPS) {
+            const int val = left[y] - left[-1];
+            const __m128i base = _mm_set1_epi16(val);
+            const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
+            _mm_storel_epi64((__m128i*)dst, out);
+        }
+    } else {
+        const __m128i top_values = _mm_load_si128((const __m128i*)top);
+        const __m128i top_base_0 = _mm_unpacklo_epi8(top_values, zero);
+        const __m128i top_base_1 = _mm_unpackhi_epi8(top_values, zero);
+        for (y = 0; y < 16; ++y, dst += BPS) {
+            const int val = left[y] - left[-1];
+            const __m128i base = _mm_set1_epi16(val);
+            const __m128i out_0 = _mm_add_epi16(base, top_base_0);
+            const __m128i out_1 = _mm_add_epi16(base, top_base_1);
+            const __m128i out = _mm_packus_epi16(out_0, out_1);
+            _mm_store_si128((__m128i*)dst, out);
+        }
+    }
+}
+
+static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, const uint8_t* top, int size) {
+    if (left != NULL) {
+        if (top != NULL) {
+            TM(dst, left, top, size);
+        } else {
+            HorizontalPred(dst, left, size);
+        }
+    } else {
+        // true motion without left samples (hence: with default 129 value)
+        // is equivalent to VE prediction where you just copy the top samples.
+        // Note that if top samples are not available, the default value is
+        // then 129, and not 127 as in the VerticalPred case.
+        if (top != NULL) {
+            VerticalPred(dst, top, size);
+        } else {
+            Fill(dst, 129, size);
+        }
+    }
+}
+
+static WEBP_INLINE void DC8uv(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+    const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
+    const __m128i sum_top = _mm_sad_epu8(top_values, zero);
+    const __m128i sum_left = _mm_sad_epu8(left_values, zero);
+    const int DC = _mm_cvtsi128_si32(sum_top) + _mm_cvtsi128_si32(sum_left) + 8;
+    Put8x8uv(DC >> 4, dst);
+}
+
+static WEBP_INLINE void DC8uvNoLeft(uint8_t* dst, const uint8_t* top) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+    const __m128i sum = _mm_sad_epu8(top_values, zero);
+    const int DC = _mm_cvtsi128_si32(sum) + 4;
+    Put8x8uv(DC >> 3, dst);
+}
+
+static WEBP_INLINE void DC8uvNoTop(uint8_t* dst, const uint8_t* left) {
+    // 'left' is contiguous so we can reuse the top summation.
+    DC8uvNoLeft(dst, left);
+}
+
+static WEBP_INLINE void DC8uvNoTopLeft(uint8_t* dst) {
+    Put8x8uv(0x80, dst);
+}
+
+static WEBP_INLINE void DC8uvMode(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    if (top != NULL) {
+        if (left != NULL) { // top and left present
+            DC8uv(dst, left, top);
+        } else { // top, but no left
+            DC8uvNoLeft(dst, top);
+        }
+    } else if (left != NULL) { // left but no top
+        DC8uvNoTop(dst, left);
+    } else { // no top, no left, nothing.
+        DC8uvNoTopLeft(dst);
+    }
+}
+
+static WEBP_INLINE void DC16(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i top_row = _mm_load_si128((const __m128i*)top);
+    const __m128i left_row = _mm_load_si128((const __m128i*)left);
+    const __m128i sad8x2 = _mm_sad_epu8(top_row, zero);
+    // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+    const __m128i sum_top = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
+    const __m128i sad8x2_left = _mm_sad_epu8(left_row, zero);
+    // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+    const __m128i sum_left = _mm_add_epi16(sad8x2_left, _mm_shuffle_epi32(sad8x2_left, 2));
+    const int DC = _mm_cvtsi128_si32(sum_top) + _mm_cvtsi128_si32(sum_left) + 16;
+    Put16(DC >> 5, dst);
+}
+
+static WEBP_INLINE void DC16NoLeft(uint8_t* dst, const uint8_t* top) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i top_row = _mm_load_si128((const __m128i*)top);
+    const __m128i sad8x2 = _mm_sad_epu8(top_row, zero);
+    // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+    const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
+    const int DC = _mm_cvtsi128_si32(sum) + 8;
+    Put16(DC >> 4, dst);
+}
+
+static WEBP_INLINE void DC16NoTop(uint8_t* dst, const uint8_t* left) {
+    // 'left' is contiguous so we can reuse the top summation.
+    DC16NoLeft(dst, left);
+}
+
+static WEBP_INLINE void DC16NoTopLeft(uint8_t* dst) {
+    Put16(0x80, dst);
+}
+
+static WEBP_INLINE void DC16Mode(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    if (top != NULL) {
+        if (left != NULL) { // top and left present
+            DC16(dst, left, top);
+        } else { // top, but no left
+            DC16NoLeft(dst, top);
+        }
+    } else if (left != NULL) { // left but no top
+        DC16NoTop(dst, left);
+    } else { // no top, no left, nothing.
+        DC16NoTopLeft(dst);
+    }
+}
+
+//------------------------------------------------------------------------------
+// 4x4 predictions
+
+#define DST(x, y) dst[(x) + (y)*BPS]
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
+// We use the following 8b-arithmetic tricks:
+//     (a + 2 * b + c + 2) >> 2 = (AC + b + 1) >> 1
+//   where: AC = (a + c) >> 1 = [(a + c + 1) >> 1] - [(a^c) & 1]
+// and:
+//     (a + 2 * b + c + 2) >> 2 = (AB + BC + 1) >> 1 - (ab|bc)&lsb
+//   where: AC = (a + b + 1) >> 1,   BC = (b + c + 1) >> 1
+//   and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
+
+static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
+    const __m128i one = _mm_set1_epi8(1);
+    const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
+    const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
+    const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 2);
+    const __m128i a = _mm_avg_epu8(ABCDEFGH, CDEFGH00);
+    const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
+    const __m128i b = _mm_subs_epu8(a, lsb);
+    const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
+    const uint32_t vals = _mm_cvtsi128_si32(avg);
+    int i;
+    for (i = 0; i < 4; ++i) {
+        WebPUint32ToMem(dst + i * BPS, vals);
+    }
+}
+
+static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
+    const int X = top[-1];
+    const int I = top[-2];
+    const int J = top[-3];
+    const int K = top[-4];
+    const int L = top[-5];
+    WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(X, I, J));
+    WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(I, J, K));
+    WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(J, K, L));
+    WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
+}
+
+static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
+    uint32_t dc = 4;
+    int i;
+    for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
+    Fill(dst, dc >> 3, 4);
+}
+
+static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) { // Down-Left
+    const __m128i one = _mm_set1_epi8(1);
+    const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
+    const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
+    const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 2);
+    const __m128i CDEFGHH0 = _mm_insert_epi16(CDEFGH00, top[7], 3);
+    const __m128i avg1 = _mm_avg_epu8(ABCDEFGH, CDEFGHH0);
+    const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
+    const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+    const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
+    WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcdefg));
+    WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
+    WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
+    WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
+}
+
+static WEBP_INLINE void VR4(uint8_t* dst, const uint8_t* top) { // Vertical-Right
+    const __m128i one = _mm_set1_epi8(1);
+    const int I = top[-2];
+    const int J = top[-3];
+    const int K = top[-4];
+    const int X = top[-1];
+    const __m128i XABCD = _mm_loadl_epi64((const __m128i*)(top - 1));
+    const __m128i ABCD0 = _mm_srli_si128(XABCD, 1);
+    const __m128i abcd = _mm_avg_epu8(XABCD, ABCD0);
+    const __m128i _XABCD = _mm_slli_si128(XABCD, 1);
+    const __m128i IXABCD = _mm_insert_epi16(_XABCD, I | (X << 8), 0);
+    const __m128i avg1 = _mm_avg_epu8(IXABCD, ABCD0);
+    const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
+    const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+    const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
+    WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcd));
+    WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(efgh));
+    WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
+    WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
+
+    // these two are hard to implement in SSE2, so we keep the C-version:
+    DST(0, 2) = AVG3(J, I, X);
+    DST(0, 3) = AVG3(K, J, I);
+}
+
+static WEBP_INLINE void VL4(uint8_t* dst, const uint8_t* top) { // Vertical-Left
+    const __m128i one = _mm_set1_epi8(1);
+    const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
+    const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
+    const __m128i CDEFGH__ = _mm_srli_si128(ABCDEFGH, 2);
+    const __m128i avg1 = _mm_avg_epu8(ABCDEFGH, BCDEFGH_);
+    const __m128i avg2 = _mm_avg_epu8(CDEFGH__, BCDEFGH_);
+    const __m128i avg3 = _mm_avg_epu8(avg1, avg2);
+    const __m128i lsb1 = _mm_and_si128(_mm_xor_si128(avg1, avg2), one);
+    const __m128i ab = _mm_xor_si128(ABCDEFGH, BCDEFGH_);
+    const __m128i bc = _mm_xor_si128(CDEFGH__, BCDEFGH_);
+    const __m128i abbc = _mm_or_si128(ab, bc);
+    const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
+    const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
+    const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
+    WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(avg1));
+    WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(avg4));
+    WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
+    WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
+
+    // these two are hard to get and irregular
+    DST(3, 2) = (extra_out >> 0) & 0xff;
+    DST(3, 3) = (extra_out >> 8) & 0xff;
+}
+
+static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) { // Down-right
+    const __m128i one = _mm_set1_epi8(1);
+    const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
+    const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
+    const __m128i KJIXABCD_ = _mm_srli_si128(LKJIXABCD, 1);
+    const __m128i JIXABCD__ = _mm_srli_si128(LKJIXABCD, 2);
+    const __m128i avg1 = _mm_avg_epu8(JIXABCD__, LKJIXABCD);
+    const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
+    const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+    const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
+    WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(abcdefg));
+    WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
+    WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
+    WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
+}
+
+static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
+    const int I = top[-2];
+    const int J = top[-3];
+    const int K = top[-4];
+    const int L = top[-5];
+    DST(0, 0) = AVG2(I, J);
+    DST(2, 0) = DST(0, 1) = AVG2(J, K);
+    DST(2, 1) = DST(0, 2) = AVG2(K, L);
+    DST(1, 0) = AVG3(I, J, K);
+    DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+    DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+    DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
+    const int X = top[-1];
+    const int I = top[-2];
+    const int J = top[-3];
+    const int K = top[-4];
+    const int L = top[-5];
+    const int A = top[0];
+    const int B = top[1];
+    const int C = top[2];
+
+    DST(0, 0) = DST(2, 1) = AVG2(I, X);
+    DST(0, 1) = DST(2, 2) = AVG2(J, I);
+    DST(0, 2) = DST(2, 3) = AVG2(K, J);
+    DST(0, 3) = AVG2(L, K);
+
+    DST(3, 0) = AVG3(A, B, C);
+    DST(2, 0) = AVG3(X, A, B);
+    DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+    DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+    DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+    DST(1, 3) = AVG3(L, K, J);
+}
+
+static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
+    const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
+    int y;
+    for (y = 0; y < 4; ++y, dst += BPS) {
+        const int val = top[-2 - y] - top[-1];
+        const __m128i base = _mm_set1_epi16(val);
+        const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
+        WebPUint32ToMem(dst, _mm_cvtsi128_si32(out));
+    }
+}
+
+#undef DST
+#undef AVG3
+#undef AVG2
+
+//------------------------------------------------------------------------------
+// luma 4x4 prediction
+
+// Left samples are top[-5 .. -2], top_left is top[-1], top are
+// located at top[0..3], and top right is top[4..7]
+static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+    DC4(I4DC4 + dst, top);
+    TM4(I4TM4 + dst, top);
+    VE4(I4VE4 + dst, top);
+    HE4(I4HE4 + dst, top);
+    RD4(I4RD4 + dst, top);
+    VR4(I4VR4 + dst, top);
+    LD4(I4LD4 + dst, top);
+    VL4(I4VL4 + dst, top);
+    HD4(I4HD4 + dst, top);
+    HU4(I4HU4 + dst, top);
+}
+
+//------------------------------------------------------------------------------
+// Chroma 8x8 prediction (paragraph 12.2)
+
+static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    // U block
+    DC8uvMode(C8DC8 + dst, left, top);
+    VerticalPred(C8VE8 + dst, top, 8);
+    HorizontalPred(C8HE8 + dst, left, 8);
+    TrueMotion(C8TM8 + dst, left, top, 8);
+    // V block
+    dst += 8;
+    if (top != NULL) top += 8;
+    if (left != NULL) left += 16;
+    DC8uvMode(C8DC8 + dst, left, top);
+    VerticalPred(C8VE8 + dst, top, 8);
+    HorizontalPred(C8HE8 + dst, left, 8);
+    TrueMotion(C8TM8 + dst, left, top, 8);
+}
+
+//------------------------------------------------------------------------------
+// luma 16x16 prediction (paragraph 12.3)
+
+static void Intra16Preds(uint8_t* dst, const uint8_t* left, const uint8_t* top) {
+    DC16Mode(I16DC16 + dst, left, top);
+    VerticalPred(I16VE16 + dst, top, 16);
+    HorizontalPred(I16HE16 + dst, left, 16);
+    TrueMotion(I16TM16 + dst, left, top, 16);
+}
+
+//------------------------------------------------------------------------------
+// Metric
+
+static WEBP_INLINE void SubtractAndAccumulate(const __m128i a, const __m128i b, __m128i* const sum) {
+    // take abs(a-b) in 8b
+    const __m128i a_b = _mm_subs_epu8(a, b);
+    const __m128i b_a = _mm_subs_epu8(b, a);
+    const __m128i abs_a_b = _mm_or_si128(a_b, b_a);
+    // zero-extend to 16b
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i C0 = _mm_unpacklo_epi8(abs_a_b, zero);
+    const __m128i C1 = _mm_unpackhi_epi8(abs_a_b, zero);
+    // multiply with self
+    const __m128i sum1 = _mm_madd_epi16(C0, C0);
+    const __m128i sum2 = _mm_madd_epi16(C1, C1);
+    *sum = _mm_add_epi32(sum1, sum2);
+}
+
+static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b, int num_pairs) {
+    __m128i sum = _mm_setzero_si128();
+    int32_t tmp[4];
+    int i;
+
+    for (i = 0; i < num_pairs; ++i) {
+        const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[BPS * 0]);
+        const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[BPS * 0]);
+        const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[BPS * 1]);
+        const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[BPS * 1]);
+        __m128i sum1, sum2;
+        SubtractAndAccumulate(a0, b0, &sum1);
+        SubtractAndAccumulate(a1, b1, &sum2);
+        sum = _mm_add_epi32(sum, _mm_add_epi32(sum1, sum2));
+        a += 2 * BPS;
+        b += 2 * BPS;
+    }
+    _mm_storeu_si128((__m128i*)tmp, sum);
+    return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
+}
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+    return SSE_16xN(a, b, 8);
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+    return SSE_16xN(a, b, 4);
+}
+
+#define LOAD_8x16b(ptr) _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+    const __m128i zero = _mm_setzero_si128();
+    int num_pairs = 4;
+    __m128i sum = zero;
+    int32_t tmp[4];
+    while (num_pairs-- > 0) {
+        const __m128i a0 = LOAD_8x16b(&a[BPS * 0]);
+        const __m128i a1 = LOAD_8x16b(&a[BPS * 1]);
+        const __m128i b0 = LOAD_8x16b(&b[BPS * 0]);
+        const __m128i b1 = LOAD_8x16b(&b[BPS * 1]);
+        // subtract
+        const __m128i c0 = _mm_subs_epi16(a0, b0);
+        const __m128i c1 = _mm_subs_epi16(a1, b1);
+        // multiply/accumulate with self
+        const __m128i d0 = _mm_madd_epi16(c0, c0);
+        const __m128i d1 = _mm_madd_epi16(c1, c1);
+        // collect
+        const __m128i sum01 = _mm_add_epi32(d0, d1);
+        sum = _mm_add_epi32(sum, sum01);
+        a += 2 * BPS;
+        b += 2 * BPS;
+    }
+    _mm_storeu_si128((__m128i*)tmp, sum);
+    return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
+}
+#undef LOAD_8x16b
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+    const __m128i zero = _mm_setzero_si128();
+
+    // Load values. Note that we read 8 pixels instead of 4,
+    // but the a/b buffers are over-allocated to that effect.
+    const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[BPS * 0]);
+    const __m128i a1 = _mm_loadl_epi64((const __m128i*)&a[BPS * 1]);
+    const __m128i a2 = _mm_loadl_epi64((const __m128i*)&a[BPS * 2]);
+    const __m128i a3 = _mm_loadl_epi64((const __m128i*)&a[BPS * 3]);
+    const __m128i b0 = _mm_loadl_epi64((const __m128i*)&b[BPS * 0]);
+    const __m128i b1 = _mm_loadl_epi64((const __m128i*)&b[BPS * 1]);
+    const __m128i b2 = _mm_loadl_epi64((const __m128i*)&b[BPS * 2]);
+    const __m128i b3 = _mm_loadl_epi64((const __m128i*)&b[BPS * 3]);
+    // Combine pair of lines.
+    const __m128i a01 = _mm_unpacklo_epi32(a0, a1);
+    const __m128i a23 = _mm_unpacklo_epi32(a2, a3);
+    const __m128i b01 = _mm_unpacklo_epi32(b0, b1);
+    const __m128i b23 = _mm_unpacklo_epi32(b2, b3);
+    // Convert to 16b.
+    const __m128i a01s = _mm_unpacklo_epi8(a01, zero);
+    const __m128i a23s = _mm_unpacklo_epi8(a23, zero);
+    const __m128i b01s = _mm_unpacklo_epi8(b01, zero);
+    const __m128i b23s = _mm_unpacklo_epi8(b23, zero);
+    // subtract, square and accumulate
+    const __m128i d0 = _mm_subs_epi16(a01s, b01s);
+    const __m128i d1 = _mm_subs_epi16(a23s, b23s);
+    const __m128i e0 = _mm_madd_epi16(d0, d0);
+    const __m128i e1 = _mm_madd_epi16(d1, d1);
+    const __m128i sum = _mm_add_epi32(e0, e1);
+
+    int32_t tmp[4];
+    _mm_storeu_si128((__m128i*)tmp, sum);
+    return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
+}
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// Hadamard transform
+// Returns the difference between the weighted sum of the absolute value of
+// transformed coefficients.
+static int TTransform(const uint8_t* inA, const uint8_t* inB, const uint16_t* const w) {
+    int32_t sum[4];
+    __m128i tmp_0, tmp_1, tmp_2, tmp_3;
+    const __m128i zero = _mm_setzero_si128();
+
+    // Load, combine and transpose inputs.
+    {
+        const __m128i inA_0 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 0]);
+        const __m128i inA_1 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 1]);
+        const __m128i inA_2 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 2]);
+        const __m128i inA_3 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 3]);
+        const __m128i inB_0 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 0]);
+        const __m128i inB_1 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 1]);
+        const __m128i inB_2 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 2]);
+        const __m128i inB_3 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 3]);
+
+        // Combine inA and inB (we'll do two transforms in parallel).
+        const __m128i inAB_0 = _mm_unpacklo_epi8(inA_0, inB_0);
+        const __m128i inAB_1 = _mm_unpacklo_epi8(inA_1, inB_1);
+        const __m128i inAB_2 = _mm_unpacklo_epi8(inA_2, inB_2);
+        const __m128i inAB_3 = _mm_unpacklo_epi8(inA_3, inB_3);
+        // a00 b00 a01 b01 a02 b03 a03 b03   0 0 0 0 0 0 0 0
+        // a10 b10 a11 b11 a12 b12 a13 b13   0 0 0 0 0 0 0 0
+        // a20 b20 a21 b21 a22 b22 a23 b23   0 0 0 0 0 0 0 0
+        // a30 b30 a31 b31 a32 b32 a33 b33   0 0 0 0 0 0 0 0
+
+        // Transpose the two 4x4, discarding the filling zeroes.
+        const __m128i transpose0_0 = _mm_unpacklo_epi8(inAB_0, inAB_2);
+        const __m128i transpose0_1 = _mm_unpacklo_epi8(inAB_1, inAB_3);
+        // a00 a20  b00 b20  a01 a21  b01 b21  a02 a22  b02 b22  a03 a23  b03 b23
+        // a10 a30  b10 b30  a11 a31  b11 b31  a12 a32  b12 b32  a13 a33  b13 b33
+        const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
+        const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
+        // a00 a10 a20 a30  b00 b10 b20 b30  a01 a11 a21 a31  b01 b11 b21 b31
+        // a02 a12 a22 a32  b02 b12 b22 b32  a03 a13 a23 a33  b03 b13 b23 b33
+
+        // Convert to 16b.
+        tmp_0 = _mm_unpacklo_epi8(transpose1_0, zero);
+        tmp_1 = _mm_unpackhi_epi8(transpose1_0, zero);
+        tmp_2 = _mm_unpacklo_epi8(transpose1_1, zero);
+        tmp_3 = _mm_unpackhi_epi8(transpose1_1, zero);
+        // a00 a10 a20 a30   b00 b10 b20 b30
+        // a01 a11 a21 a31   b01 b11 b21 b31
+        // a02 a12 a22 a32   b02 b12 b22 b32
+        // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+
+    // Horizontal pass and subsequent transpose.
+    {
+        // Calculate a and b (two 4x4 at once).
+        const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+        const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+        const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+        const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+        const __m128i b0 = _mm_add_epi16(a0, a1);
+        const __m128i b1 = _mm_add_epi16(a3, a2);
+        const __m128i b2 = _mm_sub_epi16(a3, a2);
+        const __m128i b3 = _mm_sub_epi16(a0, a1);
+        // a00 a01 a02 a03   b00 b01 b02 b03
+        // a10 a11 a12 a13   b10 b11 b12 b13
+        // a20 a21 a22 a23   b20 b21 b22 b23
+        // a30 a31 a32 a33   b30 b31 b32 b33
+
+        // Transpose the two 4x4.
+        const __m128i transpose0_0 = _mm_unpacklo_epi16(b0, b1);
+        const __m128i transpose0_1 = _mm_unpacklo_epi16(b2, b3);
+        const __m128i transpose0_2 = _mm_unpackhi_epi16(b0, b1);
+        const __m128i transpose0_3 = _mm_unpackhi_epi16(b2, b3);
+        // a00 a10 a01 a11   a02 a12 a03 a13
+        // a20 a30 a21 a31   a22 a32 a23 a33
+        // b00 b10 b01 b11   b02 b12 b03 b13
+        // b20 b30 b21 b31   b22 b32 b23 b33
+        const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+        const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+        // a00 a10 a20 a30 a01 a11 a21 a31
+        // b00 b10 b20 b30 b01 b11 b21 b31
+        // a02 a12 a22 a32 a03 a13 a23 a33
+        // b02 b12 a22 b32 b03 b13 b23 b33
+        tmp_0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+        tmp_1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+        tmp_2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+        tmp_3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+        // a00 a10 a20 a30   b00 b10 b20 b30
+        // a01 a11 a21 a31   b01 b11 b21 b31
+        // a02 a12 a22 a32   b02 b12 b22 b32
+        // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+
+    // Vertical pass and difference of weighted sums.
+    {
+        // Load all inputs.
+        const __m128i w_0 = _mm_loadu_si128((const __m128i*)&w[0]);
+        const __m128i w_8 = _mm_loadu_si128((const __m128i*)&w[8]);
+
+        // Calculate a and b (two 4x4 at once).
+        const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+        const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+        const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+        const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+        const __m128i b0 = _mm_add_epi16(a0, a1);
+        const __m128i b1 = _mm_add_epi16(a3, a2);
+        const __m128i b2 = _mm_sub_epi16(a3, a2);
+        const __m128i b3 = _mm_sub_epi16(a0, a1);
+
+        // Separate the transforms of inA and inB.
+        __m128i A_b0 = _mm_unpacklo_epi64(b0, b1);
+        __m128i A_b2 = _mm_unpacklo_epi64(b2, b3);
+        __m128i B_b0 = _mm_unpackhi_epi64(b0, b1);
+        __m128i B_b2 = _mm_unpackhi_epi64(b2, b3);
+
+        {
+            const __m128i d0 = _mm_sub_epi16(zero, A_b0);
+            const __m128i d1 = _mm_sub_epi16(zero, A_b2);
+            const __m128i d2 = _mm_sub_epi16(zero, B_b0);
+            const __m128i d3 = _mm_sub_epi16(zero, B_b2);
+            A_b0 = _mm_max_epi16(A_b0, d0); // abs(v), 16b
+            A_b2 = _mm_max_epi16(A_b2, d1);
+            B_b0 = _mm_max_epi16(B_b0, d2);
+            B_b2 = _mm_max_epi16(B_b2, d3);
+        }
+
+        // weighted sums
+        A_b0 = _mm_madd_epi16(A_b0, w_0);
+        A_b2 = _mm_madd_epi16(A_b2, w_8);
+        B_b0 = _mm_madd_epi16(B_b0, w_0);
+        B_b2 = _mm_madd_epi16(B_b2, w_8);
+        A_b0 = _mm_add_epi32(A_b0, A_b2);
+        B_b0 = _mm_add_epi32(B_b0, B_b2);
+
+        // difference of weighted sums
+        A_b0 = _mm_sub_epi32(A_b0, B_b0);
+        _mm_storeu_si128((__m128i*)&sum[0], A_b0);
+    }
+    return sum[0] + sum[1] + sum[2] + sum[3];
+}
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    const int diff_sum = TTransform(a, b, w);
+    return abs(diff_sum) >> 5;
+}
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    int D = 0;
+    int x, y;
+    for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+        for (x = 0; x < 16; x += 4) {
+            D += Disto4x4(a + x + y, b + x + y, w);
+        }
+    }
+    return D;
+}
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+static WEBP_INLINE int DoQuantizeBlock(int16_t in[16],
+                                       int16_t out[16],
+                                       const uint16_t* const sharpen,
+                                       const VP8Matrix* const mtx) {
+    const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
+    const __m128i zero = _mm_setzero_si128();
+    __m128i coeff0, coeff8;
+    __m128i out0, out8;
+    __m128i packed_out;
+
+    // Load all inputs.
+    __m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
+    __m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
+    const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq_[0]);
+    const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq_[8]);
+    const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q_[0]);
+    const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q_[8]);
+
+    // extract sign(in)  (0x0000 if positive, 0xffff if negative)
+    const __m128i sign0 = _mm_cmpgt_epi16(zero, in0);
+    const __m128i sign8 = _mm_cmpgt_epi16(zero, in8);
+
+    // coeff = abs(in) = (in ^ sign) - sign
+    coeff0 = _mm_xor_si128(in0, sign0);
+    coeff8 = _mm_xor_si128(in8, sign8);
+    coeff0 = _mm_sub_epi16(coeff0, sign0);
+    coeff8 = _mm_sub_epi16(coeff8, sign8);
+
+    // coeff = abs(in) + sharpen
+    if (sharpen != NULL) {
+        const __m128i sharpen0 = _mm_loadu_si128((const __m128i*)&sharpen[0]);
+        const __m128i sharpen8 = _mm_loadu_si128((const __m128i*)&sharpen[8]);
+        coeff0 = _mm_add_epi16(coeff0, sharpen0);
+        coeff8 = _mm_add_epi16(coeff8, sharpen8);
+    }
+
+    // out = (coeff * iQ + B) >> QFIX
+    {
+        // doing calculations with 32b precision (QFIX=17)
+        // out = (coeff * iQ)
+        const __m128i coeff_iQ0H = _mm_mulhi_epu16(coeff0, iq0);
+        const __m128i coeff_iQ0L = _mm_mullo_epi16(coeff0, iq0);
+        const __m128i coeff_iQ8H = _mm_mulhi_epu16(coeff8, iq8);
+        const __m128i coeff_iQ8L = _mm_mullo_epi16(coeff8, iq8);
+        __m128i out_00 = _mm_unpacklo_epi16(coeff_iQ0L, coeff_iQ0H);
+        __m128i out_04 = _mm_unpackhi_epi16(coeff_iQ0L, coeff_iQ0H);
+        __m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
+        __m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
+        // out = (coeff * iQ + B)
+        const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias_[0]);
+        const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias_[4]);
+        const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias_[8]);
+        const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias_[12]);
+        out_00 = _mm_add_epi32(out_00, bias_00);
+        out_04 = _mm_add_epi32(out_04, bias_04);
+        out_08 = _mm_add_epi32(out_08, bias_08);
+        out_12 = _mm_add_epi32(out_12, bias_12);
+        // out = QUANTDIV(coeff, iQ, B, QFIX)
+        out_00 = _mm_srai_epi32(out_00, QFIX);
+        out_04 = _mm_srai_epi32(out_04, QFIX);
+        out_08 = _mm_srai_epi32(out_08, QFIX);
+        out_12 = _mm_srai_epi32(out_12, QFIX);
+
+        // pack result as 16b
+        out0 = _mm_packs_epi32(out_00, out_04);
+        out8 = _mm_packs_epi32(out_08, out_12);
+
+        // if (coeff > 2047) coeff = 2047
+        out0 = _mm_min_epi16(out0, max_coeff_2047);
+        out8 = _mm_min_epi16(out8, max_coeff_2047);
+    }
+
+    // get sign back (if (sign[j]) out_n = -out_n)
+    out0 = _mm_xor_si128(out0, sign0);
+    out8 = _mm_xor_si128(out8, sign8);
+    out0 = _mm_sub_epi16(out0, sign0);
+    out8 = _mm_sub_epi16(out8, sign8);
+
+    // in = out * Q
+    in0 = _mm_mullo_epi16(out0, q0);
+    in8 = _mm_mullo_epi16(out8, q8);
+
+    _mm_storeu_si128((__m128i*)&in[0], in0);
+    _mm_storeu_si128((__m128i*)&in[8], in8);
+
+    // zigzag the output before storing it.
+    //
+    // The zigzag pattern can almost be reproduced with a small sequence of
+    // shuffles. After it, we only need to swap the 7th (ending up in third
+    // position instead of twelfth) and 8th values.
+    {
+        __m128i outZ0, outZ8;
+        outZ0 = _mm_shufflehi_epi16(out0, _MM_SHUFFLE(2, 1, 3, 0));
+        outZ0 = _mm_shuffle_epi32(outZ0, _MM_SHUFFLE(3, 1, 2, 0));
+        outZ0 = _mm_shufflehi_epi16(outZ0, _MM_SHUFFLE(3, 1, 0, 2));
+        outZ8 = _mm_shufflelo_epi16(out8, _MM_SHUFFLE(3, 0, 2, 1));
+        outZ8 = _mm_shuffle_epi32(outZ8, _MM_SHUFFLE(3, 1, 2, 0));
+        outZ8 = _mm_shufflelo_epi16(outZ8, _MM_SHUFFLE(1, 3, 2, 0));
+        _mm_storeu_si128((__m128i*)&out[0], outZ0);
+        _mm_storeu_si128((__m128i*)&out[8], outZ8);
+        packed_out = _mm_packs_epi16(outZ0, outZ8);
+    }
+    {
+        const int16_t outZ_12 = out[12];
+        const int16_t outZ_3 = out[3];
+        out[3] = outZ_12;
+        out[12] = outZ_3;
+    }
+
+    // detect if all 'out' values are zeroes or not
+    return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
+}
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) {
+    return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
+}
+
+static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) {
+    return DoQuantizeBlock(in, out, NULL, mtx);
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32], const VP8Matrix* const mtx) {
+    int nz;
+    const uint16_t* const sharpen = &mtx->sharpen_[0];
+    nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
+    nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
+    return nz;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) {
+    VP8CollectHistogram = CollectHistogram;
+    VP8EncPredLuma16 = Intra16Preds;
+    VP8EncPredChroma8 = IntraChromaPreds;
+    VP8EncPredLuma4 = Intra4Preds;
+    VP8EncQuantizeBlock = QuantizeBlock;
+    VP8EncQuantize2Blocks = Quantize2Blocks;
+    VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
+    VP8ITransform = ITransform;
+    VP8FTransform = FTransform;
+    VP8FTransform2 = FTransform2;
+    VP8FTransformWHT = FTransformWHT;
+    VP8SSE16x16 = SSE16x16;
+    VP8SSE16x8 = SSE16x8;
+    VP8SSE8x8 = SSE8x8;
+    VP8SSE4x4 = SSE4x4;
+    VP8TDisto4x4 = Disto4x4;
+    VP8TDisto16x16 = Disto16x16;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/enc_sse41.c b/codec/L2/demos/webpEnc/host/src/dsp/enc_sse41.c
new file mode 100644
index 0000000000..1f83978a46
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/enc_sse41.c
@@ -0,0 +1,366 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE4 version of some encoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+#include <smmintrin.h>
+#include <stdlib.h> // for abs()
+
+#include "../enc/vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms.
+
+static void CollectHistogram(
+    const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo) {
+    const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
+    int j;
+    int distribution[MAX_COEFF_THRESH + 1] = {0};
+    for (j = start_block; j < end_block; ++j) {
+        int16_t out[16];
+        int k;
+
+        VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+        // Convert coefficients to bin (within out[]).
+        {
+            // Load.
+            const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
+            const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
+            // v = abs(out) >> 3
+            const __m128i abs0 = _mm_abs_epi16(out0);
+            const __m128i abs1 = _mm_abs_epi16(out1);
+            const __m128i v0 = _mm_srai_epi16(abs0, 3);
+            const __m128i v1 = _mm_srai_epi16(abs1, 3);
+            // bin = min(v, MAX_COEFF_THRESH)
+            const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
+            const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
+            // Store.
+            _mm_storeu_si128((__m128i*)&out[0], bin0);
+            _mm_storeu_si128((__m128i*)&out[8], bin1);
+        }
+
+        // Convert coefficients to bin.
+        for (k = 0; k < 16; ++k) {
+            ++distribution[out[k]];
+        }
+    }
+    VP8SetHistogramData(distribution, histo);
+}
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// Hadamard transform
+// Returns the difference between the weighted sum of the absolute value of
+// transformed coefficients.
+static int TTransform(const uint8_t* inA, const uint8_t* inB, const uint16_t* const w) {
+    __m128i tmp_0, tmp_1, tmp_2, tmp_3;
+
+    // Load, combine and transpose inputs.
+    {
+        const __m128i inA_0 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 0]);
+        const __m128i inA_1 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 1]);
+        const __m128i inA_2 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 2]);
+        const __m128i inA_3 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 3]);
+        const __m128i inB_0 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 0]);
+        const __m128i inB_1 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 1]);
+        const __m128i inB_2 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 2]);
+        const __m128i inB_3 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 3]);
+
+        // Combine inA and inB (we'll do two transforms in parallel).
+        const __m128i inAB_0 = _mm_unpacklo_epi8(inA_0, inB_0);
+        const __m128i inAB_1 = _mm_unpacklo_epi8(inA_1, inB_1);
+        const __m128i inAB_2 = _mm_unpacklo_epi8(inA_2, inB_2);
+        const __m128i inAB_3 = _mm_unpacklo_epi8(inA_3, inB_3);
+        // a00 b00 a01 b01 a02 b03 a03 b03   0 0 0 0 0 0 0 0
+        // a10 b10 a11 b11 a12 b12 a13 b13   0 0 0 0 0 0 0 0
+        // a20 b20 a21 b21 a22 b22 a23 b23   0 0 0 0 0 0 0 0
+        // a30 b30 a31 b31 a32 b32 a33 b33   0 0 0 0 0 0 0 0
+
+        // Transpose the two 4x4, discarding the filling zeroes.
+        const __m128i transpose0_0 = _mm_unpacklo_epi8(inAB_0, inAB_2);
+        const __m128i transpose0_1 = _mm_unpacklo_epi8(inAB_1, inAB_3);
+        // a00 a20  b00 b20  a01 a21  b01 b21  a02 a22  b02 b22  a03 a23  b03 b23
+        // a10 a30  b10 b30  a11 a31  b11 b31  a12 a32  b12 b32  a13 a33  b13 b33
+        const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
+        const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
+        // a00 a10 a20 a30  b00 b10 b20 b30  a01 a11 a21 a31  b01 b11 b21 b31
+        // a02 a12 a22 a32  b02 b12 b22 b32  a03 a13 a23 a33  b03 b13 b23 b33
+
+        // Convert to 16b.
+        tmp_0 = _mm_cvtepu8_epi16(transpose1_0);
+        tmp_1 = _mm_cvtepu8_epi16(_mm_srli_si128(transpose1_0, 8));
+        tmp_2 = _mm_cvtepu8_epi16(transpose1_1);
+        tmp_3 = _mm_cvtepu8_epi16(_mm_srli_si128(transpose1_1, 8));
+        // a00 a10 a20 a30   b00 b10 b20 b30
+        // a01 a11 a21 a31   b01 b11 b21 b31
+        // a02 a12 a22 a32   b02 b12 b22 b32
+        // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+
+    // Horizontal pass and subsequent transpose.
+    {
+        // Calculate a and b (two 4x4 at once).
+        const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+        const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+        const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+        const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+        const __m128i b0 = _mm_add_epi16(a0, a1);
+        const __m128i b1 = _mm_add_epi16(a3, a2);
+        const __m128i b2 = _mm_sub_epi16(a3, a2);
+        const __m128i b3 = _mm_sub_epi16(a0, a1);
+        // a00 a01 a02 a03   b00 b01 b02 b03
+        // a10 a11 a12 a13   b10 b11 b12 b13
+        // a20 a21 a22 a23   b20 b21 b22 b23
+        // a30 a31 a32 a33   b30 b31 b32 b33
+
+        // Transpose the two 4x4.
+        const __m128i transpose0_0 = _mm_unpacklo_epi16(b0, b1);
+        const __m128i transpose0_1 = _mm_unpacklo_epi16(b2, b3);
+        const __m128i transpose0_2 = _mm_unpackhi_epi16(b0, b1);
+        const __m128i transpose0_3 = _mm_unpackhi_epi16(b2, b3);
+        // a00 a10 a01 a11   a02 a12 a03 a13
+        // a20 a30 a21 a31   a22 a32 a23 a33
+        // b00 b10 b01 b11   b02 b12 b03 b13
+        // b20 b30 b21 b31   b22 b32 b23 b33
+        const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+        const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+        const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+        // a00 a10 a20 a30 a01 a11 a21 a31
+        // b00 b10 b20 b30 b01 b11 b21 b31
+        // a02 a12 a22 a32 a03 a13 a23 a33
+        // b02 b12 a22 b32 b03 b13 b23 b33
+        tmp_0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+        tmp_1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+        tmp_2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+        tmp_3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+        // a00 a10 a20 a30   b00 b10 b20 b30
+        // a01 a11 a21 a31   b01 b11 b21 b31
+        // a02 a12 a22 a32   b02 b12 b22 b32
+        // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+
+    // Vertical pass and difference of weighted sums.
+    {
+        // Load all inputs.
+        const __m128i w_0 = _mm_loadu_si128((const __m128i*)&w[0]);
+        const __m128i w_8 = _mm_loadu_si128((const __m128i*)&w[8]);
+
+        // Calculate a and b (two 4x4 at once).
+        const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+        const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+        const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+        const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+        const __m128i b0 = _mm_add_epi16(a0, a1);
+        const __m128i b1 = _mm_add_epi16(a3, a2);
+        const __m128i b2 = _mm_sub_epi16(a3, a2);
+        const __m128i b3 = _mm_sub_epi16(a0, a1);
+
+        // Separate the transforms of inA and inB.
+        __m128i A_b0 = _mm_unpacklo_epi64(b0, b1);
+        __m128i A_b2 = _mm_unpacklo_epi64(b2, b3);
+        __m128i B_b0 = _mm_unpackhi_epi64(b0, b1);
+        __m128i B_b2 = _mm_unpackhi_epi64(b2, b3);
+
+        A_b0 = _mm_abs_epi16(A_b0);
+        A_b2 = _mm_abs_epi16(A_b2);
+        B_b0 = _mm_abs_epi16(B_b0);
+        B_b2 = _mm_abs_epi16(B_b2);
+
+        // weighted sums
+        A_b0 = _mm_madd_epi16(A_b0, w_0);
+        A_b2 = _mm_madd_epi16(A_b2, w_8);
+        B_b0 = _mm_madd_epi16(B_b0, w_0);
+        B_b2 = _mm_madd_epi16(B_b2, w_8);
+        A_b0 = _mm_add_epi32(A_b0, A_b2);
+        B_b0 = _mm_add_epi32(B_b0, B_b2);
+
+        // difference of weighted sums
+        A_b2 = _mm_sub_epi32(A_b0, B_b0);
+        // cascading summation of the differences
+        B_b0 = _mm_hadd_epi32(A_b2, A_b2);
+        B_b2 = _mm_hadd_epi32(B_b0, B_b0);
+        return _mm_cvtsi128_si32(B_b2);
+    }
+}
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    const int diff_sum = TTransform(a, b, w);
+    return abs(diff_sum) >> 5;
+}
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b, const uint16_t* const w) {
+    int D = 0;
+    int x, y;
+    for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+        for (x = 0; x < 16; x += 4) {
+            D += Disto4x4(a + x + y, b + x + y, w);
+        }
+    }
+    return D;
+}
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+// Generates a pshufb constant for shuffling 16b words.
+#define PSHUFB_CST(A, B, C, D, E, F, G, H)                                                                  \
+    _mm_set_epi8(2 * (H) + 1, 2 * (H) + 0, 2 * (G) + 1, 2 * (G) + 0, 2 * (F) + 1, 2 * (F) + 0, 2 * (E) + 1, \
+                 2 * (E) + 0, 2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, 2 * (B) + 1, 2 * (B) + 0, \
+                 2 * (A) + 1, 2 * (A) + 0)
+
+static WEBP_INLINE int DoQuantizeBlock(int16_t in[16],
+                                       int16_t out[16],
+                                       const uint16_t* const sharpen,
+                                       const VP8Matrix* const mtx) {
+    const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
+    const __m128i zero = _mm_setzero_si128();
+    __m128i out0, out8;
+    __m128i packed_out;
+
+    // Load all inputs.
+    __m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
+    __m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
+    const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq_[0]);
+    const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq_[8]);
+    const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q_[0]);
+    const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q_[8]);
+
+    // coeff = abs(in)
+    __m128i coeff0 = _mm_abs_epi16(in0);
+    __m128i coeff8 = _mm_abs_epi16(in8);
+
+    // coeff = abs(in) + sharpen
+    if (sharpen != NULL) {
+        const __m128i sharpen0 = _mm_loadu_si128((const __m128i*)&sharpen[0]);
+        const __m128i sharpen8 = _mm_loadu_si128((const __m128i*)&sharpen[8]);
+        coeff0 = _mm_add_epi16(coeff0, sharpen0);
+        coeff8 = _mm_add_epi16(coeff8, sharpen8);
+    }
+
+    // out = (coeff * iQ + B) >> QFIX
+    {
+        // doing calculations with 32b precision (QFIX=17)
+        // out = (coeff * iQ)
+        const __m128i coeff_iQ0H = _mm_mulhi_epu16(coeff0, iq0);
+        const __m128i coeff_iQ0L = _mm_mullo_epi16(coeff0, iq0);
+        const __m128i coeff_iQ8H = _mm_mulhi_epu16(coeff8, iq8);
+        const __m128i coeff_iQ8L = _mm_mullo_epi16(coeff8, iq8);
+        __m128i out_00 = _mm_unpacklo_epi16(coeff_iQ0L, coeff_iQ0H);
+        __m128i out_04 = _mm_unpackhi_epi16(coeff_iQ0L, coeff_iQ0H);
+        __m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
+        __m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
+        // out = (coeff * iQ + B)
+        const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias_[0]);
+        const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias_[4]);
+        const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias_[8]);
+        const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias_[12]);
+        out_00 = _mm_add_epi32(out_00, bias_00);
+        out_04 = _mm_add_epi32(out_04, bias_04);
+        out_08 = _mm_add_epi32(out_08, bias_08);
+        out_12 = _mm_add_epi32(out_12, bias_12);
+        // out = QUANTDIV(coeff, iQ, B, QFIX)
+        out_00 = _mm_srai_epi32(out_00, QFIX);
+        out_04 = _mm_srai_epi32(out_04, QFIX);
+        out_08 = _mm_srai_epi32(out_08, QFIX);
+        out_12 = _mm_srai_epi32(out_12, QFIX);
+
+        // pack result as 16b
+        out0 = _mm_packs_epi32(out_00, out_04);
+        out8 = _mm_packs_epi32(out_08, out_12);
+
+        // if (coeff > 2047) coeff = 2047
+        out0 = _mm_min_epi16(out0, max_coeff_2047);
+        out8 = _mm_min_epi16(out8, max_coeff_2047);
+    }
+
+    // put sign back
+    out0 = _mm_sign_epi16(out0, in0);
+    out8 = _mm_sign_epi16(out8, in8);
+
+    // in = out * Q
+    in0 = _mm_mullo_epi16(out0, q0);
+    in8 = _mm_mullo_epi16(out8, q8);
+
+    _mm_storeu_si128((__m128i*)&in[0], in0);
+    _mm_storeu_si128((__m128i*)&in[8], in8);
+
+    // zigzag the output before storing it. The re-ordering is:
+    //    0 1 2 3 4 5 6 7 | 8  9 10 11 12 13 14 15
+    // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15
+    // There's only two misplaced entries ([8] and [7]) that are crossing the
+    // reg's boundaries.
+    // We use pshufb instead of pshuflo/pshufhi.
+    {
+        const __m128i kCst_lo = PSHUFB_CST(0, 1, 4, -1, 5, 2, 3, 6);
+        const __m128i kCst_7 = PSHUFB_CST(-1, -1, -1, -1, 7, -1, -1, -1);
+        const __m128i tmp_lo = _mm_shuffle_epi8(out0, kCst_lo);
+        const __m128i tmp_7 = _mm_shuffle_epi8(out0, kCst_7); // extract #7
+        const __m128i kCst_hi = PSHUFB_CST(1, 4, 5, 2, -1, 3, 6, 7);
+        const __m128i kCst_8 = PSHUFB_CST(-1, -1, -1, 0, -1, -1, -1, -1);
+        const __m128i tmp_hi = _mm_shuffle_epi8(out8, kCst_hi);
+        const __m128i tmp_8 = _mm_shuffle_epi8(out8, kCst_8); // extract #8
+        const __m128i out_z0 = _mm_or_si128(tmp_lo, tmp_8);
+        const __m128i out_z8 = _mm_or_si128(tmp_hi, tmp_7);
+        _mm_storeu_si128((__m128i*)&out[0], out_z0);
+        _mm_storeu_si128((__m128i*)&out[8], out_z8);
+        packed_out = _mm_packs_epi16(out_z0, out_z8);
+    }
+
+    // detect if all 'out' values are zeroes or not
+    return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
+}
+
+#undef PSHUFB_CST
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) {
+    return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
+}
+
+static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) {
+    return DoQuantizeBlock(in, out, NULL, mtx);
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32], const VP8Matrix* const mtx) {
+    int nz;
+    const uint16_t* const sharpen = &mtx->sharpen_[0];
+    nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
+    nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
+    return nz;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitSSE41(void);
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE41(void) {
+    VP8CollectHistogram = CollectHistogram;
+    VP8EncQuantizeBlock = QuantizeBlock;
+    VP8EncQuantize2Blocks = Quantize2Blocks;
+    VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
+    VP8TDisto4x4 = Disto4x4;
+    VP8TDisto16x16 = Disto16x16;
+}
+
+#else // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitSSE41)
+
+#endif // WEBP_USE_SSE41
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/filters.c b/codec/L2/demos/webpEnc/host/src/dsp/filters.c
new file mode 100644
index 0000000000..6cfb72d86f
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/filters.c
@@ -0,0 +1,222 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#include "./dsp.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+#define SANITY_CHECK(in, out)                                     \
+    assert(in != NULL);                                           \
+    assert(out != NULL);                                          \
+    assert(width > 0);                                            \
+    assert(height > 0);                                           \
+    assert(stride >= width);                                      \
+    assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
+    (void)height; // Silence unused warning.
+
+static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred, uint8_t* dst, int length, int inverse) {
+    int i;
+    if (inverse) {
+        for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
+    } else {
+        for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
+    }
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter(
+    const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) {
+    const uint8_t* preds;
+    const size_t start_offset = row * stride;
+    const int last_row = row + num_rows;
+    SANITY_CHECK(in, out);
+    in += start_offset;
+    out += start_offset;
+    preds = inverse ? out : in;
+
+    if (row == 0) {
+        // Leftmost pixel is the same as input for topmost scanline.
+        out[0] = in[0];
+        PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+        row = 1;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+
+    // Filter line-by-line.
+    while (row < last_row) {
+        // Leftmost pixel is predicted from above.
+        PredictLine(in, preds - stride, out, 1, inverse);
+        PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+        ++row;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter(
+    const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) {
+    const uint8_t* preds;
+    const size_t start_offset = row * stride;
+    const int last_row = row + num_rows;
+    SANITY_CHECK(in, out);
+    in += start_offset;
+    out += start_offset;
+    preds = inverse ? out : in;
+
+    if (row == 0) {
+        // Very first top-left pixel is copied.
+        out[0] = in[0];
+        // Rest of top scan-line is left-predicted.
+        PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+        row = 1;
+        in += stride;
+        out += stride;
+    } else {
+        // We are starting from in-between. Make sure 'preds' points to prev row.
+        preds -= stride;
+    }
+
+    // Filter line-by-line.
+    while (row < last_row) {
+        PredictLine(in, preds, out, width, inverse);
+        ++row;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+    const int g = a + b - c;
+    return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
+}
+
+static WEBP_INLINE void DoGradientFilter(
+    const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) {
+    const uint8_t* preds;
+    const size_t start_offset = row * stride;
+    const int last_row = row + num_rows;
+    SANITY_CHECK(in, out);
+    in += start_offset;
+    out += start_offset;
+    preds = inverse ? out : in;
+
+    // left prediction for top scan-line
+    if (row == 0) {
+        out[0] = in[0];
+        PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+        row = 1;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+
+    // Filter line-by-line.
+    while (row < last_row) {
+        int w;
+        // leftmost pixel: predict from above.
+        PredictLine(in, preds - stride, out, 1, inverse);
+        for (w = 1; w < width; ++w) {
+            const int pred = GradientPredictor(preds[w - 1], preds[w - stride], preds[w - stride - 1]);
+            out[w] = in[w] + (inverse ? pred : -pred);
+        }
+        ++row;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+}
+
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+
+static void HorizontalFilter(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) {
+    DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void VerticalFilter(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) {
+    DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void GradientFilter(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) {
+    DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+//------------------------------------------------------------------------------
+
+static void VerticalUnfilter(int width, int height, int stride, int row, int num_rows, uint8_t* data) {
+    DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+static void HorizontalUnfilter(int width, int height, int stride, int row, int num_rows, uint8_t* data) {
+    DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+static void GradientUnfilter(int width, int height, int stride, int row, int num_rows, uint8_t* data) {
+    DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Init function
+
+WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
+WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+
+extern void VP8FiltersInitMIPSdspR2(void);
+extern void VP8FiltersInitSSE2(void);
+
+static volatile VP8CPUInfo filters_last_cpuinfo_used = (VP8CPUInfo)&filters_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
+    if (filters_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    WebPUnfilters[WEBP_FILTER_NONE] = NULL;
+    WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
+    WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
+    WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
+
+    WebPFilters[WEBP_FILTER_NONE] = NULL;
+    WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
+    WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
+    WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            VP8FiltersInitSSE2();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            VP8FiltersInitMIPSdspR2();
+        }
+#endif
+    }
+    filters_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/filters_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/filters_mips_dsp_r2.c
new file mode 100644
index 0000000000..d3befd487a
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/filters_mips_dsp_r2.c
@@ -0,0 +1,395 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
+//            Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "../dsp/dsp.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+#define SANITY_CHECK(in, out)                                     \
+    assert(in != NULL);                                           \
+    assert(out != NULL);                                          \
+    assert(width > 0);                                            \
+    assert(height > 0);                                           \
+    assert(stride >= width);                                      \
+    assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
+    (void)height; // Silence unused warning.
+
+// if INVERSE
+//   preds == &dst[-1] == &src[-1]
+// else
+//   preds == &src[-1] != &dst[-1]
+#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE)                                                                   \
+    do {                                                                                                             \
+        const uint8_t* psrc = (uint8_t*)(SRC);                                                                       \
+        uint8_t* pdst = (uint8_t*)(DST);                                                                             \
+        const int ilength = (int)(LENGTH);                                                                           \
+        int temp0, temp1, temp2, temp3, temp4, temp5, temp6;                                                         \
+        __asm__ volatile(                                                                                            \
+            ".set      push                                   \n\t"                                                  \
+            ".set      noreorder                              \n\t"                                                  \
+            "srl       %[temp0],    %[length],    0x2         \n\t"                                                  \
+            "beqz      %[temp0],    4f                        \n\t"                                                  \
+            " andi     %[temp6],    %[length],    0x3         \n\t"                                                  \
+            ".if " #INVERSE                                                                                          \
+            "                                   \n\t"                                                                \
+            "lbu       %[temp1],    -1(%[src])                \n\t"                                                  \
+            "1:                                                 \n\t"                                                \
+            "lbu       %[temp2],    0(%[src])                 \n\t"                                                  \
+            "lbu       %[temp3],    1(%[src])                 \n\t"                                                  \
+            "lbu       %[temp4],    2(%[src])                 \n\t"                                                  \
+            "lbu       %[temp5],    3(%[src])                 \n\t"                                                  \
+            "addiu     %[src],      %[src],       4           \n\t"                                                  \
+            "addiu     %[temp0],    %[temp0],     -1          \n\t"                                                  \
+            "addu      %[temp2],    %[temp2],     %[temp1]    \n\t"                                                  \
+            "addu      %[temp3],    %[temp3],     %[temp2]    \n\t"                                                  \
+            "addu      %[temp4],    %[temp4],     %[temp3]    \n\t"                                                  \
+            "addu      %[temp1],    %[temp5],     %[temp4]    \n\t"                                                  \
+            "sb        %[temp2],    -4(%[src])                \n\t"                                                  \
+            "sb        %[temp3],    -3(%[src])                \n\t"                                                  \
+            "sb        %[temp4],    -2(%[src])                \n\t"                                                  \
+            "bnez      %[temp0],    1b                        \n\t"                                                  \
+            " sb       %[temp1],    -1(%[src])                \n\t"                                                  \
+            ".else                                              \n\t"                                                \
+            "1:                                                 \n\t"                                                \
+            "ulw       %[temp1],    -1(%[src])                \n\t"                                                  \
+            "ulw       %[temp2],    0(%[src])                 \n\t"                                                  \
+            "addiu     %[src],      %[src],       4           \n\t"                                                  \
+            "addiu     %[temp0],    %[temp0],     -1          \n\t"                                                  \
+            "subu.qb   %[temp3],    %[temp2],     %[temp1]    \n\t"                                                  \
+            "usw       %[temp3],    0(%[dst])                 \n\t"                                                  \
+            "bnez      %[temp0],    1b                        \n\t"                                                  \
+            " addiu    %[dst],      %[dst],       4           \n\t"                                                  \
+            ".endif                                             \n\t"                                                \
+            "4:                                                 \n\t"                                                \
+            "beqz      %[temp6],    3f                        \n\t"                                                  \
+            " nop                                             \n\t"                                                  \
+            "2:                                                 \n\t"                                                \
+            "lbu       %[temp1],    -1(%[src])                \n\t"                                                  \
+            "lbu       %[temp2],    0(%[src])                 \n\t"                                                  \
+            "addiu     %[src],      %[src],       1           \n\t"                                                  \
+            ".if " #INVERSE                                                                                          \
+            "                                   \n\t"                                                                \
+            "addu      %[temp3],    %[temp1],     %[temp2]    \n\t"                                                  \
+            "sb        %[temp3],    -1(%[src])                \n\t"                                                  \
+            ".else                                              \n\t"                                                \
+            "subu      %[temp3],    %[temp1],     %[temp2]    \n\t"                                                  \
+            "sb        %[temp3],    0(%[dst])                 \n\t"                                                  \
+            ".endif                                             \n\t"                                                \
+            "addiu     %[temp6],    %[temp6],     -1          \n\t"                                                  \
+            "bnez      %[temp6],    2b                        \n\t"                                                  \
+            " addiu    %[dst],      %[dst],       1           \n\t"                                                  \
+            "3:                                                 \n\t"                                                \
+            ".set      pop                                    \n\t"                                                  \
+            : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),                \
+              [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [dst] "+&r"(pdst), [src] "+&r"(psrc) \
+            : [length] "r"(ilength)                                                                                  \
+            : "memory");                                                                                             \
+    } while (0)
+
+static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst, int length, int inverse) {
+    if (inverse) {
+        DO_PREDICT_LINE(src, dst, length, 1);
+    } else {
+        DO_PREDICT_LINE(src, dst, length, 0);
+    }
+}
+
+#define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE)                                     \
+    do {                                                                                              \
+        const uint8_t* psrc = (uint8_t*)(SRC);                                                        \
+        const uint8_t* ppred = (uint8_t*)(PRED);                                                      \
+        uint8_t* pdst = (uint8_t*)(DST);                                                              \
+        const int ilength = (int)(LENGTH);                                                            \
+        int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;                                   \
+        __asm__ volatile(                                                                             \
+            ".set      push                                   \n\t"                                   \
+            ".set      noreorder                              \n\t"                                   \
+            "srl       %[temp0],    %[length],    0x3         \n\t"                                   \
+            "beqz      %[temp0],    4f                        \n\t"                                   \
+            " andi     %[temp7],    %[length],    0x7         \n\t"                                   \
+            "1:                                                 \n\t"                                 \
+            "ulw       %[temp1],    0(%[src])                 \n\t"                                   \
+            "ulw       %[temp2],    0(%[pred])                \n\t"                                   \
+            "ulw       %[temp3],    4(%[src])                 \n\t"                                   \
+            "ulw       %[temp4],    4(%[pred])                \n\t"                                   \
+            "addiu     %[src],      %[src],       8           \n\t"                                   \
+            ".if " #INVERSE                                                                           \
+            "                                   \n\t"                                                 \
+            "addu.qb   %[temp5],    %[temp1],     %[temp2]    \n\t"                                   \
+            "addu.qb   %[temp6],    %[temp3],     %[temp4]    \n\t"                                   \
+            ".else                                              \n\t"                                 \
+            "subu.qb   %[temp5],    %[temp1],     %[temp2]    \n\t"                                   \
+            "subu.qb   %[temp6],    %[temp3],     %[temp4]    \n\t"                                   \
+            ".endif                                             \n\t"                                 \
+            "addiu     %[pred],     %[pred],      8           \n\t"                                   \
+            "usw       %[temp5],    0(%[dst])                 \n\t"                                   \
+            "usw       %[temp6],    4(%[dst])                 \n\t"                                   \
+            "addiu     %[temp0],    %[temp0],     -1          \n\t"                                   \
+            "bnez      %[temp0],    1b                        \n\t"                                   \
+            " addiu    %[dst],      %[dst],       8           \n\t"                                   \
+            "4:                                                 \n\t"                                 \
+            "beqz      %[temp7],    3f                        \n\t"                                   \
+            " nop                                             \n\t"                                   \
+            "2:                                                 \n\t"                                 \
+            "lbu       %[temp1],    0(%[src])                 \n\t"                                   \
+            "lbu       %[temp2],    0(%[pred])                \n\t"                                   \
+            "addiu     %[src],      %[src],       1           \n\t"                                   \
+            "addiu     %[pred],     %[pred],      1           \n\t"                                   \
+            ".if " #INVERSE                                                                           \
+            "                                   \n\t"                                                 \
+            "addu      %[temp3],    %[temp1],     %[temp2]    \n\t"                                   \
+            ".else                                              \n\t"                                 \
+            "subu      %[temp3],    %[temp1],     %[temp2]    \n\t"                                   \
+            ".endif                                             \n\t"                                 \
+            "sb        %[temp3],    0(%[dst])                 \n\t"                                   \
+            "addiu     %[temp7],    %[temp7],     -1          \n\t"                                   \
+            "bnez      %[temp7],    2b                        \n\t"                                   \
+            " addiu    %[dst],      %[dst],       1           \n\t"                                   \
+            "3:                                                 \n\t"                                 \
+            ".set      pop                                    \n\t"                                   \
+            : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), \
+              [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7), \
+              [pred] "+&r"(ppred), [dst] "+&r"(pdst), [src] "+&r"(psrc)                               \
+            : [length] "r"(ilength)                                                                   \
+            : "memory");                                                                              \
+    } while (0)
+
+#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST, INVERSE)                         \
+    do {                                                                       \
+        int temp1, temp2, temp3;                                               \
+        __asm__ volatile(                                                      \
+            "lbu       %[temp1],   0(%[src])               \n\t"               \
+            "lbu       %[temp2],   0(%[pred])              \n\t"               \
+            ".if " #INVERSE                                                    \
+            "                                \n\t"                             \
+            "addu      %[temp3],   %[temp1],   %[temp2]    \n\t"               \
+            ".else                                           \n\t"             \
+            "subu      %[temp3],   %[temp1],   %[temp2]    \n\t"               \
+            ".endif                                          \n\t"             \
+            "sb        %[temp3],   0(%[dst])               \n\t"               \
+            : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3) \
+            : [pred] "r"((PRED)), [dst] "r"((DST)), [src] "r"((SRC))           \
+            : "memory");                                                       \
+    } while (0)
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+#define FILTER_LINE_BY_LINE(INVERSE)                                 \
+    do {                                                             \
+        while (row < last_row) {                                     \
+            PREDICT_LINE_ONE_PASS(in, preds - stride, out, INVERSE); \
+            DO_PREDICT_LINE(in + 1, out + 1, width - 1, INVERSE);    \
+            ++row;                                                   \
+            preds += stride;                                         \
+            in += stride;                                            \
+            out += stride;                                           \
+        }                                                            \
+    } while (0)
+
+static WEBP_INLINE void DoHorizontalFilter(
+    const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) {
+    const uint8_t* preds;
+    const size_t start_offset = row * stride;
+    const int last_row = row + num_rows;
+    SANITY_CHECK(in, out);
+    in += start_offset;
+    out += start_offset;
+    preds = inverse ? out : in;
+
+    if (row == 0) {
+        // Leftmost pixel is the same as input for topmost scanline.
+        out[0] = in[0];
+        PredictLine(in + 1, out + 1, width - 1, inverse);
+        row = 1;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+
+    // Filter line-by-line.
+    if (inverse) {
+        FILTER_LINE_BY_LINE(1);
+    } else {
+        FILTER_LINE_BY_LINE(0);
+    }
+}
+
+#undef FILTER_LINE_BY_LINE
+
+static void HorizontalFilter(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) {
+    DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void HorizontalUnfilter(int width, int height, int stride, int row, int num_rows, uint8_t* data) {
+    DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+#define FILTER_LINE_BY_LINE(INVERSE)                                  \
+    do {                                                              \
+        while (row < last_row) {                                      \
+            DO_PREDICT_LINE_VERTICAL(in, preds, out, width, INVERSE); \
+            ++row;                                                    \
+            preds += stride;                                          \
+            in += stride;                                             \
+            out += stride;                                            \
+        }                                                             \
+    } while (0)
+
+static WEBP_INLINE void DoVerticalFilter(
+    const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) {
+    const uint8_t* preds;
+    const size_t start_offset = row * stride;
+    const int last_row = row + num_rows;
+    SANITY_CHECK(in, out);
+    in += start_offset;
+    out += start_offset;
+    preds = inverse ? out : in;
+
+    if (row == 0) {
+        // Very first top-left pixel is copied.
+        out[0] = in[0];
+        // Rest of top scan-line is left-predicted.
+        PredictLine(in + 1, out + 1, width - 1, inverse);
+        row = 1;
+        in += stride;
+        out += stride;
+    } else {
+        // We are starting from in-between. Make sure 'preds' points to prev row.
+        preds -= stride;
+    }
+
+    // Filter line-by-line.
+    if (inverse) {
+        FILTER_LINE_BY_LINE(1);
+    } else {
+        FILTER_LINE_BY_LINE(0);
+    }
+}
+
+#undef FILTER_LINE_BY_LINE
+#undef DO_PREDICT_LINE_VERTICAL
+
+static void VerticalFilter(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) {
+    DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void VerticalUnfilter(int width, int height, int stride, int row, int num_rows, uint8_t* data) {
+    DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+    int temp0;
+    __asm__ volatile(
+        "addu             %[temp0],   %[a],       %[b]        \n\t"
+        "subu             %[temp0],   %[temp0],   %[c]        \n\t"
+        "shll_s.w         %[temp0],   %[temp0],   23          \n\t"
+        "precrqu_s.qb.ph  %[temp0],   %[temp0],   $zero       \n\t"
+        "srl              %[temp0],   %[temp0],   24          \n\t"
+        : [temp0] "=&r"(temp0)
+        : [a] "r"(a), [b] "r"(b), [c] "r"(c));
+    return temp0;
+}
+
+#define FILTER_LINE_BY_LINE(INVERSE, PREDS, OPERATION)                                                      \
+    do {                                                                                                    \
+        while (row < last_row) {                                                                            \
+            int w;                                                                                          \
+            PREDICT_LINE_ONE_PASS(in, PREDS - stride, out, INVERSE);                                        \
+            for (w = 1; w < width; ++w) {                                                                   \
+                const int pred = GradientPredictor(PREDS[w - 1], PREDS[w - stride], PREDS[w - stride - 1]); \
+                out[w] = in[w] OPERATION pred;                                                              \
+            }                                                                                               \
+            ++row;                                                                                          \
+            in += stride;                                                                                   \
+            out += stride;                                                                                  \
+        }                                                                                                   \
+    } while (0)
+
+static WEBP_INLINE void DoGradientFilter(
+    const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) {
+    const uint8_t* preds;
+    const size_t start_offset = row * stride;
+    const int last_row = row + num_rows;
+    SANITY_CHECK(in, out);
+    in += start_offset;
+    out += start_offset;
+    preds = inverse ? out : in;
+
+    // left prediction for top scan-line
+    if (row == 0) {
+        out[0] = in[0];
+        PredictLine(in + 1, out + 1, width - 1, inverse);
+        row = 1;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+
+    // Filter line-by-line.
+    if (inverse) {
+        FILTER_LINE_BY_LINE(1, out, +);
+    } else {
+        FILTER_LINE_BY_LINE(0, in, -);
+    }
+}
+
+#undef FILTER_LINE_BY_LINE
+
+static void GradientFilter(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) {
+    DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void GradientUnfilter(int width, int height, int stride, int row, int num_rows, uint8_t* data) {
+    DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+#undef PREDICT_LINE_ONE_PASS
+#undef DO_PREDICT_LINE
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8FiltersInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMIPSdspR2(void) {
+    WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
+    WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
+    WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+
+    WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
+    WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
+    WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8FiltersInitMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/filters_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/filters_sse2.c
new file mode 100644
index 0000000000..e06d807ba4
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/filters_sse2.c
@@ -0,0 +1,331 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 variant of alpha filters
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+#define SANITY_CHECK(in, out)                                     \
+    assert(in != NULL);                                           \
+    assert(out != NULL);                                          \
+    assert(width > 0);                                            \
+    assert(height > 0);                                           \
+    assert(stride >= width);                                      \
+    assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
+    (void)height; // Silence unused warning.
+
+static void PredictLineTop(const uint8_t* src, const uint8_t* pred, uint8_t* dst, int length, int inverse) {
+    int i;
+    const int max_pos = length & ~31;
+    assert(length >= 0);
+    if (inverse) {
+        for (i = 0; i < max_pos; i += 32) {
+            const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i + 0]);
+            const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
+            const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i + 0]);
+            const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
+            const __m128i C0 = _mm_add_epi8(A0, B0);
+            const __m128i C1 = _mm_add_epi8(A1, B1);
+            _mm_storeu_si128((__m128i*)&dst[i + 0], C0);
+            _mm_storeu_si128((__m128i*)&dst[i + 16], C1);
+        }
+        for (; i < length; ++i) dst[i] = src[i] + pred[i];
+    } else {
+        for (i = 0; i < max_pos; i += 32) {
+            const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i + 0]);
+            const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
+            const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i + 0]);
+            const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
+            const __m128i C0 = _mm_sub_epi8(A0, B0);
+            const __m128i C1 = _mm_sub_epi8(A1, B1);
+            _mm_storeu_si128((__m128i*)&dst[i + 0], C0);
+            _mm_storeu_si128((__m128i*)&dst[i + 16], C1);
+        }
+        for (; i < length; ++i) dst[i] = src[i] - pred[i];
+    }
+}
+
+// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
+static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length, int inverse) {
+    int i;
+    if (length <= 0) return;
+    if (inverse) {
+        const int max_pos = length & ~7;
+        __m128i last = _mm_set_epi32(0, 0, 0, dst[-1]);
+        for (i = 0; i < max_pos; i += 8) {
+            const __m128i A0 = _mm_loadl_epi64((const __m128i*)(src + i));
+            const __m128i A1 = _mm_add_epi8(A0, last);
+            const __m128i A2 = _mm_slli_si128(A1, 1);
+            const __m128i A3 = _mm_add_epi8(A1, A2);
+            const __m128i A4 = _mm_slli_si128(A3, 2);
+            const __m128i A5 = _mm_add_epi8(A3, A4);
+            const __m128i A6 = _mm_slli_si128(A5, 4);
+            const __m128i A7 = _mm_add_epi8(A5, A6);
+            _mm_storel_epi64((__m128i*)(dst + i), A7);
+            last = _mm_srli_epi64(A7, 56);
+        }
+        for (; i < length; ++i) dst[i] = src[i] + dst[i - 1];
+    } else {
+        const int max_pos = length & ~31;
+        for (i = 0; i < max_pos; i += 32) {
+            const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
+            const __m128i B0 = _mm_loadu_si128((const __m128i*)(src + i + 0 - 1));
+            const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
+            const __m128i B1 = _mm_loadu_si128((const __m128i*)(src + i + 16 - 1));
+            const __m128i C0 = _mm_sub_epi8(A0, B0);
+            const __m128i C1 = _mm_sub_epi8(A1, B1);
+            _mm_storeu_si128((__m128i*)(dst + i + 0), C0);
+            _mm_storeu_si128((__m128i*)(dst + i + 16), C1);
+        }
+        for (; i < length; ++i) dst[i] = src[i] - src[i - 1];
+    }
+}
+
+static void PredictLineC(const uint8_t* src, const uint8_t* pred, uint8_t* dst, int length, int inverse) {
+    int i;
+    if (inverse) {
+        for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
+    } else {
+        for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
+    }
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter(
+    const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) {
+    const uint8_t* preds;
+    const size_t start_offset = row * stride;
+    const int last_row = row + num_rows;
+    SANITY_CHECK(in, out);
+    in += start_offset;
+    out += start_offset;
+    preds = inverse ? out : in;
+
+    if (row == 0) {
+        // Leftmost pixel is the same as input for topmost scanline.
+        out[0] = in[0];
+        PredictLineLeft(in + 1, out + 1, width - 1, inverse);
+        row = 1;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+
+    // Filter line-by-line.
+    while (row < last_row) {
+        // Leftmost pixel is predicted from above.
+        PredictLineC(in, preds - stride, out, 1, inverse);
+        PredictLineLeft(in + 1, out + 1, width - 1, inverse);
+        ++row;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter(
+    const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) {
+    const uint8_t* preds;
+    const size_t start_offset = row * stride;
+    const int last_row = row + num_rows;
+    SANITY_CHECK(in, out);
+    in += start_offset;
+    out += start_offset;
+    preds = inverse ? out : in;
+
+    if (row == 0) {
+        // Very first top-left pixel is copied.
+        out[0] = in[0];
+        // Rest of top scan-line is left-predicted.
+        PredictLineLeft(in + 1, out + 1, width - 1, inverse);
+        row = 1;
+        in += stride;
+        out += stride;
+    } else {
+        // We are starting from in-between. Make sure 'preds' points to prev row.
+        preds -= stride;
+    }
+
+    // Filter line-by-line.
+    while (row < last_row) {
+        PredictLineTop(in, preds, out, width, inverse);
+        ++row;
+        preds += stride;
+        in += stride;
+        out += stride;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictorC(uint8_t a, uint8_t b, uint8_t c) {
+    const int g = a + b - c;
+    return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
+}
+
+static void GradientPredictDirect(const uint8_t* const row, const uint8_t* const top, uint8_t* const out, int length) {
+    const int max_pos = length & ~7;
+    int i;
+    const __m128i zero = _mm_setzero_si128();
+    for (i = 0; i < max_pos; i += 8) {
+        const __m128i A0 = _mm_loadl_epi64((const __m128i*)&row[i - 1]);
+        const __m128i B0 = _mm_loadl_epi64((const __m128i*)&top[i]);
+        const __m128i C0 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
+        const __m128i D = _mm_loadl_epi64((const __m128i*)&row[i]);
+        const __m128i A1 = _mm_unpacklo_epi8(A0, zero);
+        const __m128i B1 = _mm_unpacklo_epi8(B0, zero);
+        const __m128i C1 = _mm_unpacklo_epi8(C0, zero);
+        const __m128i E = _mm_add_epi16(A1, B1);
+        const __m128i F = _mm_sub_epi16(E, C1);
+        const __m128i G = _mm_packus_epi16(F, zero);
+        const __m128i H = _mm_sub_epi8(D, G);
+        _mm_storel_epi64((__m128i*)(out + i), H);
+    }
+    for (; i < length; ++i) {
+        out[i] = row[i] - GradientPredictorC(row[i - 1], top[i], top[i - 1]);
+    }
+}
+
+static void GradientPredictInverse(const uint8_t* const in, const uint8_t* const top, uint8_t* const row, int length) {
+    if (length > 0) {
+        int i;
+        const int max_pos = length & ~7;
+        const __m128i zero = _mm_setzero_si128();
+        __m128i A = _mm_set_epi32(0, 0, 0, row[-1]); // left sample
+        for (i = 0; i < max_pos; i += 8) {
+            const __m128i tmp0 = _mm_loadl_epi64((const __m128i*)&top[i]);
+            const __m128i tmp1 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
+            const __m128i B = _mm_unpacklo_epi8(tmp0, zero);
+            const __m128i C = _mm_unpacklo_epi8(tmp1, zero);
+            const __m128i tmp2 = _mm_loadl_epi64((const __m128i*)&in[i]);
+            const __m128i D = _mm_unpacklo_epi8(tmp2, zero); // base input
+            const __m128i E = _mm_sub_epi16(B, C);           // unclipped gradient basis B - C
+            __m128i out = zero;                              // accumulator for output
+            __m128i mask_hi = _mm_set_epi32(0, 0, 0, 0xff);
+            int k = 8;
+            while (1) {
+                const __m128i tmp3 = _mm_add_epi16(A, E); // delta = A + B - C
+                const __m128i tmp4 = _mm_min_epi16(tmp3, mask_hi);
+                const __m128i tmp5 = _mm_max_epi16(tmp4, zero); // clipped delta
+                const __m128i tmp6 = _mm_add_epi16(tmp5, D);    // add to in[] values
+                A = _mm_and_si128(tmp6, mask_hi);               // 1-complement clip
+                out = _mm_or_si128(out, A);                     // accumulate output
+                if (--k == 0) break;
+                A = _mm_slli_si128(A, 2);             // rotate left sample
+                mask_hi = _mm_slli_si128(mask_hi, 2); // rotate mask
+            }
+            A = _mm_srli_si128(A, 14); // prepare left sample for next iteration
+            _mm_storel_epi64((__m128i*)&row[i], _mm_packus_epi16(out, zero));
+        }
+        for (; i < length; ++i) {
+            row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]);
+        }
+    }
+}
+
+static WEBP_INLINE void DoGradientFilter(
+    const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) {
+    const size_t start_offset = row * stride;
+    const int last_row = row + num_rows;
+    SANITY_CHECK(in, out);
+    in += start_offset;
+    out += start_offset;
+
+    // left prediction for top scan-line
+    if (row == 0) {
+        out[0] = in[0];
+        PredictLineLeft(in + 1, out + 1, width - 1, inverse);
+        row = 1;
+        in += stride;
+        out += stride;
+    }
+
+    // Filter line-by-line.
+    while (row < last_row) {
+        if (inverse) {
+            PredictLineC(in, out - stride, out, 1, inverse); // predict from above
+            GradientPredictInverse(in + 1, out + 1 - stride, out + 1, width - 1);
+        } else {
+            PredictLineC(in, in - stride, out, 1, inverse);
+            GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
+        }
+        ++row;
+        in += stride;
+        out += stride;
+    }
+}
+
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+
+static void HorizontalFilter(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) {
+    DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void VerticalFilter(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) {
+    DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void GradientFilter(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) {
+    DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+//------------------------------------------------------------------------------
+
+static void VerticalUnfilter(int width, int height, int stride, int row, int num_rows, uint8_t* data) {
+    DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+static void HorizontalUnfilter(int width, int height, int stride, int row, int num_rows, uint8_t* data) {
+    DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+static void GradientUnfilter(int width, int height, int stride, int row, int num_rows, uint8_t* data) {
+    DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8FiltersInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
+    WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
+    WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
+    WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
+
+    WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
+    WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
+    WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8FiltersInitSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless.c
new file mode 100644
index 0000000000..130280c203
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless.c
@@ -0,0 +1,991 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+//          Urvang Joshi (urvang@google.com)
+
+#include "./dsp.h"
+
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include "../dec/vp8li.h"
+#include "../utils/endian_inl.h"
+#include "../utils/profiling.h"
+#include "../utils/thread.h"
+#include "./lossless.h"
+
+#define MAX_DIFF_COST (1e30f)
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+// In-place sum of each component with mod 256.
+static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
+    const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
+    const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
+    *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+    return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+    return Average2(Average2(a0, a2), a1);
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, uint32_t a2, uint32_t a3) {
+    return Average2(Average2(a0, a1), Average2(a2, a3));
+}
+
+static WEBP_INLINE uint32_t Clip255(uint32_t a) {
+    if (a < 256) {
+        return a;
+    }
+    // return 0, when a is a negative integer.
+    // return 255, when a is positive.
+    return ~a >> 24;
+}
+
+static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
+    return Clip255(a + b - c);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, uint32_t c2) {
+    const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
+    const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, (c1 >> 16) & 0xff, (c2 >> 16) & 0xff);
+    const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, (c1 >> 8) & 0xff, (c2 >> 8) & 0xff);
+    const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
+    return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
+}
+
+static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
+    return Clip255(a + (a - b) / 2);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, uint32_t c2) {
+    const uint32_t ave = Average2(c0, c1);
+    const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
+    const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
+    const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
+    const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
+    return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
+}
+
+// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
+#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
+#define LOCAL_INLINE __attribute__((noinline))
+#else
+#define LOCAL_INLINE WEBP_INLINE
+#endif
+
+static LOCAL_INLINE int Sub3(int a, int b, int c) {
+    const int pb = b - c;
+    const int pa = a - c;
+    return abs(pb) - abs(pa);
+}
+
+#undef LOCAL_INLINE
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+    const int pa_minus_pb =
+        Sub3((a >> 24), (b >> 24), (c >> 24)) + Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
+        Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + Sub3((a)&0xff, (b)&0xff, (c)&0xff);
+    return (pa_minus_pb <= 0) ? a : b;
+}
+
+//------------------------------------------------------------------------------
+// Predictors
+
+static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
+    (void)top;
+    (void)left;
+    return ARGB_BLACK;
+}
+static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
+    (void)top;
+    return left;
+}
+static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
+    (void)left;
+    return top[0];
+}
+static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
+    (void)left;
+    return top[1];
+}
+static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
+    (void)left;
+    return top[-1];
+}
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average3(left, top[0], top[1]);
+    return pred;
+}
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average2(left, top[-1]);
+    return pred;
+}
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average2(left, top[0]);
+    return pred;
+}
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average2(top[-1], top[0]);
+    (void)left;
+    return pred;
+}
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average2(top[0], top[1]);
+    (void)left;
+    return pred;
+}
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+    return pred;
+}
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Select(top[0], left, top[-1]);
+    return pred;
+}
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+    return pred;
+}
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+    return pred;
+}
+
+//------------------------------------------------------------------------------
+
+// Inverse prediction.
+static void PredictorInverseTransform(const VP8LTransform* const transform, int y_start, int y_end, uint32_t* data) {
+    const int width = transform->xsize_;
+    if (y_start == 0) { // First Row follows the L (mode=1) mode.
+        int x;
+        const uint32_t pred0 = Predictor0(data[-1], NULL);
+        AddPixelsEq(data, pred0);
+        for (x = 1; x < width; ++x) {
+            const uint32_t pred1 = Predictor1(data[x - 1], NULL);
+            AddPixelsEq(data + x, pred1);
+        }
+        data += width;
+        ++y_start;
+    }
+
+    {
+        int y = y_start;
+        const int tile_width = 1 << transform->bits_;
+        const int mask = tile_width - 1;
+        const int safe_width = width & ~mask;
+        const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+        const uint32_t* pred_mode_base = transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+        while (y < y_end) {
+            const uint32_t pred2 = Predictor2(data[-1], data - width);
+            const uint32_t* pred_mode_src = pred_mode_base;
+            VP8LPredictorFunc pred_func;
+            int x = 1;
+            int t = 1;
+            // First pixel follows the T (mode=2) mode.
+            AddPixelsEq(data, pred2);
+            // .. the rest:
+            while (x < safe_width) {
+                pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+                for (; t < tile_width; ++t, ++x) {
+                    const uint32_t pred = pred_func(data[x - 1], data + x - width);
+                    AddPixelsEq(data + x, pred);
+                }
+                t = 0;
+            }
+            if (x < width) {
+                pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+                for (; x < width; ++x) {
+                    const uint32_t pred = pred_func(data[x - 1], data + x - width);
+                    AddPixelsEq(data + x, pred);
+                }
+            }
+            data += width;
+            ++y;
+            if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
+                pred_mode_base += tiles_per_row;
+            }
+        }
+    }
+}
+
+typedef struct InversePredWorkerData {
+    VP8LTransform* transform;
+    uint32_t* data;
+
+    int start;
+    int stop;
+    int pred_mode_base_stride;
+    int y_only;
+
+    int tile_width;
+    int mask;
+    int safe_width;
+    int tiles_per_row;
+    uint32_t* pred_mode_base;
+} IPWorkerData;
+
+// Inverse prediction row synchronization
+typedef struct InversePredSyncData {
+#ifdef WEBP_USE_THREAD
+    pthread_mutex_t* mutex_;
+    pthread_cond_t* cond_;
+#endif
+    // Allocate memory to store the index in each row.
+    int* cur_col;
+    // The optimal sync_range for different resolution and platform should be
+    // determined by testing. Currently, it is chosen to be a power-of-2 number.
+    int sync_range;
+    int rows;
+
+    // Row-based parallel Inverse Predictor data
+    IPWorkerData* ipdata;
+    int num_workers;
+} IpSync;
+
+// struct InversePred
+// {
+//   WebPWorker *workers;
+//   int num_workers;
+// };
+#ifdef WEBP_USE_THREAD
+static WEBP_INLINE void mutex_lock(pthread_mutex_t* const mutex) {
+    const int kMaxTryLocks = 4000;
+    int locked = 0;
+    int i;
+
+    for (i = 0; i < kMaxTryLocks; ++i) {
+        if (!pthread_mutex_trylock(mutex)) {
+            locked = 1;
+            break;
+        }
+    }
+
+    if (!locked) pthread_mutex_lock(mutex);
+}
+#endif // WEBP_USE_THREAD
+
+static WEBP_INLINE void sync_read(IpSync* const ip_sync, int r, int c) {
+#ifdef WEBP_USE_THREAD
+    const int nsync = ip_sync->sync_range;
+
+    // fprintf(stderr, "[%d, %d] %d\n", r, c, __LINE__);
+    if (r > 1 && !(c & (nsync - 1))) {
+        // fprintf(stderr, "[%d, %d] %d\n", r, c, __LINE__);
+        pthread_mutex_t* const mutex = &ip_sync->mutex_[r - 1];
+        mutex_lock(mutex);
+
+        // fprintf(stderr, "[%d, %d] %d\n", r, c, __LINE__);
+        while (c > ip_sync->cur_col[r - 1] - nsync) {
+            pthread_cond_wait(&ip_sync->cond_[r - 1], mutex);
+        }
+        // fprintf(stderr, "[%d, %d] %d\n", r, c, __LINE__);
+        pthread_mutex_unlock(mutex);
+    }
+#else
+    (void)ip_sync;
+    (void)r;
+    (void)c;
+#endif // WEBP_USE_THREAD
+}
+
+static WEBP_INLINE void sync_write(IpSync* const ip_sync, int r, int c, const int cols) {
+#ifdef WEBP_USE_THREAD
+    const int nsync = ip_sync->sync_range;
+    int cur;
+    // Only signal when there are enough filtered SB for next row to run.
+    int sig = 1;
+
+    if (c < cols - 1) {
+        cur = c;
+        if (c % nsync) sig = 0;
+    } else {
+        cur = cols + nsync;
+    }
+
+    if (sig) {
+        mutex_lock(&ip_sync->mutex_[r]);
+
+        ip_sync->cur_col[r] = cur;
+
+        pthread_cond_signal(&ip_sync->cond_[r]);
+        pthread_mutex_unlock(&ip_sync->mutex_[r]);
+    }
+#else
+    (void)ip_sync;
+    (void)r;
+    (void)c;
+    (void)cols;
+#endif // WEBP_USE_THREAD
+}
+
+static WEBP_INLINE void ThreadInversePredictorRows(uint32_t* data,
+                                                   const VP8LTransform* const transform,
+                                                   int y_start,
+                                                   int y_end,
+                                                   IPWorkerData* const ip_data,
+                                                   IpSync* const ip_sync) {
+    const int width = transform->xsize_;
+
+    int y = y_start;
+    const int tile_width = ip_data->tile_width;
+    const int mask = ip_data->mask;
+    const int safe_width = ip_data->safe_width;
+    const int tiles_per_row = ip_data->tiles_per_row;
+    const uint32_t* pred_mode_base = ip_data->pred_mode_base;
+
+    for (y = y_start; y < y_end; y += ip_sync->num_workers) {
+        // fprintf(stderr, "%d %d\n", y, __LINE__);
+        sync_read(ip_sync, y, 0);
+        // fprintf(stderr, "%d %d\n", y, __LINE__);
+        const uint32_t pred2 = Predictor2(data[-1], data - width);
+        // fprintf(stderr, "%d %d\n", y, __LINE__);
+        // First pixel follows the T (mode=2) mode.
+        AddPixelsEq(data, pred2);
+        sync_write(ip_sync, y, 0, width);
+
+        const uint32_t* pred_mode_src = pred_mode_base + (y >> transform->bits_) * tiles_per_row;
+        VP8LPredictorFunc pred_func;
+        int x = 1;
+        int t = 1;
+        // .. the rest:
+        while (x < safe_width) {
+            pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+            for (; t < tile_width; ++t, ++x) {
+                sync_read(ip_sync, y, x);
+                const uint32_t pred = pred_func(data[x - 1], data + x - width);
+                AddPixelsEq(data + x, pred);
+                sync_write(ip_sync, y, x, width);
+            }
+            t = 0;
+        }
+        if (x < width) {
+            pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+            for (; x < width; ++x) {
+                sync_read(ip_sync, y, x);
+                const uint32_t pred = pred_func(data[x - 1], data + x - width);
+                AddPixelsEq(data + x, pred);
+                sync_write(ip_sync, y, x, width);
+            }
+        }
+        data += width * ip_sync->num_workers;
+    }
+}
+
+// Row-based multi-threaded loopfilter hook
+static int InversePredictorWorker(IpSync* const ip_sync, IPWorkerData* const ip_data) {
+    ThreadInversePredictorRows(ip_data->data, ip_data->transform, ip_data->start, ip_data->stop, ip_data, ip_sync);
+    return 1;
+}
+
+// Set up nsync by width.
+static int get_sync_range(int width) {
+    // 128 is the best
+    return 128;
+    // nsync numbers are picked by testing. For example, for 4k
+    // video, using 4 gives best performance.
+    if (width < 640)
+        return 1;
+    else if (width <= 1280)
+        return 2;
+    else if (width <= 4096)
+        return 4;
+    else
+        return 8;
+}
+
+// Allocate memory for ip row synchronization
+static void InversePredictorAlloc(IpSync* ip_sync, int rows, int width, int num_workers) {
+    ip_sync->rows = rows;
+#ifdef WEBP_USE_THREAD
+    {
+        int i;
+        ip_sync->mutex_ = (pthread_mutex_t*)WebPSafeMalloc((uint64_t)rows, sizeof(*ip_sync->mutex_));
+        if (ip_sync->mutex_) {
+            for (i = 0; i < rows; ++i) {
+                pthread_mutex_init(&ip_sync->mutex_[i], NULL);
+            }
+        } else {
+            fprintf(stderr, "mutex_ creation failed\n");
+            return;
+        }
+
+        ip_sync->cond_ = (pthread_cond_t*)WebPSafeMalloc((uint64_t)rows, sizeof(*ip_sync->cond_));
+        if (ip_sync->cond_) {
+            for (i = 0; i < rows; ++i) {
+                pthread_cond_init(&ip_sync->cond_[i], NULL);
+            }
+        } else {
+            fprintf(stderr, "cond_ creation failed\n");
+            return;
+        }
+    }
+#endif // WEBP_USE_THREAD
+
+    ip_sync->ipdata = (IPWorkerData*)WebPSafeMalloc((uint64_t)num_workers, sizeof(*ip_sync->ipdata));
+    ip_sync->num_workers = num_workers;
+
+    ip_sync->cur_col = (int*)WebPSafeMalloc((uint64_t)rows, sizeof(*ip_sync->cur_col));
+
+    // Set up nsync.
+    ip_sync->sync_range = get_sync_range(width);
+}
+
+// Deallocate ip synchronization related mutex and data
+static void InversePredictorDealloc(IpSync* ip_sync) {
+    if (ip_sync != NULL) {
+#ifdef WEBP_USE_THREAD
+        int i;
+
+        if (ip_sync->mutex_ != NULL) {
+            for (i = 0; i < ip_sync->rows; ++i) {
+                pthread_mutex_destroy(&ip_sync->mutex_[i]);
+            }
+            WebPSafeFree(ip_sync->mutex_);
+        }
+        if (ip_sync->cond_ != NULL) {
+            for (i = 0; i < ip_sync->rows; ++i) {
+                pthread_cond_destroy(&ip_sync->cond_[i]);
+            }
+            WebPSafeFree(ip_sync->cond_);
+        }
+#endif // WEBP_USE_THREAD
+        WebPSafeFree(ip_sync->ipdata);
+        WebPSafeFree(ip_sync->cur_col);
+        // clear the structure as the source of this call may be a resize in which
+        // case this call will be followed by an _alloc() which may fail.
+        // vp9_zero(*ip_sync);
+        memset(&ip_sync, 0, sizeof(ip_sync));
+    }
+}
+
+// Inverse prediction.
+static void PredictorInverseTransformMt(
+    const VP8LTransform* const transform, int y_start, int y_end, uint32_t* data, int num_workers) {
+    IpSync ip_row_sync;
+    IpSync* const ip_sync = &ip_row_sync;
+    memset(ip_sync, 0, sizeof(*ip_sync));
+
+    const WebPWorkerInterface* const winterface = WebPGetWorkerInterface();
+
+    // TODO: move to unique init function
+    // int num_workers = 4;
+    // fprintf(stderr, "num_workers:%d %s %d\n", num_workers, __FUNCTION__, __LINE__);
+    WebPWorker* workers = (WebPWorker*)WebPSafeMalloc(num_workers, sizeof(*workers));
+    if (workers == NULL) {
+        fprintf(stderr, "Workers creation failed\n");
+        return;
+    }
+
+    int n = 0;
+    for (n = 0; n < num_workers; ++n) {
+        WebPWorker* const worker = &workers[n];
+        // ++pbi->num_tile_workers;
+
+        winterface->Init(worker);
+        if (n < num_workers - 1 && !winterface->Reset(worker)) {
+            fprintf(stderr, "Decoder threads creation failed\n");
+            return;
+        }
+    }
+
+    int rows = y_end - y_start; // TODO
+    if (!ip_sync->sync_range || rows != ip_sync->rows || num_workers > ip_sync->num_workers) {
+        InversePredictorDealloc(ip_sync);
+        InversePredictorAlloc(ip_sync, rows, transform->xsize_, num_workers);
+    }
+
+    // Initialize cur_col to -1 for all rows.
+    memset(ip_sync->cur_col, -1, sizeof(*ip_sync->cur_col) * rows);
+
+    const int width = transform->xsize_;
+    if (y_start == 0) { // First Row follows the L (mode=1) mode.
+        int x;
+        const uint32_t pred0 = Predictor0(data[-1], NULL);
+        AddPixelsEq(data, pred0);
+        for (x = 1; x < width; ++x) {
+            const uint32_t pred1 = Predictor1(data[x - 1], NULL);
+            AddPixelsEq(data + x, pred1);
+        }
+        data += width;
+        ++y_start;
+    }
+
+    int y = y_start;
+    const int tile_width = 1 << transform->bits_;
+    const int mask = tile_width - 1;
+    const int safe_width = width & ~mask;
+    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+    const uint32_t* pred_mode_base = transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+    int i = 0;
+    // TODO from y_start + 1
+    int start = y_start;
+    int stop = y_end;
+    for (i = 0; i < num_workers; ++i) {
+        WebPWorker* const worker = &workers[i];
+        IPWorkerData* const ip_data = &ip_sync->ipdata[i];
+
+        worker->hook = (WebPWorkerHook)InversePredictorWorker;
+        worker->data1 = ip_sync;
+        worker->data2 = ip_data;
+
+        // TODO
+        // vp9_loop_filter_data_reset(ip_data, frame, cm, planes);
+        ip_data->start = start + i;
+        ip_data->stop = stop;
+        ip_data->data = data + i * width;
+        ip_data->transform = transform;
+        ip_data->tile_width = tile_width;
+        ip_data->mask = mask;
+        ip_data->safe_width = safe_width;
+        ip_data->tiles_per_row = tiles_per_row;
+        ip_data->pred_mode_base = pred_mode_base;
+
+        // Start loopfiltering
+        if (i == num_workers - 1) {
+            winterface->Execute(worker);
+        } else {
+            winterface->Launch(worker);
+        }
+    }
+
+    // Wait till all rows are finished
+    for (i = 0; i < num_workers; ++i) {
+        winterface->Sync(&workers[i]);
+    }
+    for (i = 0; i < num_workers; ++i) {
+        winterface->End(&workers[i]);
+    }
+
+    if (ip_sync->num_workers > 0) {
+        InversePredictorDealloc(ip_sync);
+    }
+}
+
+// Add green to blue and red channels (i.e. perform the inverse transform of
+// 'subtract green').
+void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    int i;
+    for (i = 0; i < num_pixels; ++i) {
+        const uint32_t argb = data[i];
+        const uint32_t green = ((argb >> 8) & 0xff);
+        uint32_t red_blue = (argb & 0x00ff00ffu);
+        red_blue += (green << 16) | green;
+        red_blue &= 0x00ff00ffu;
+        data[i] = (argb & 0xff00ff00u) | red_blue;
+    }
+    StopProfiling(&stop_watch, &timeVP8LAddGreenToBlueAndRed, &countVP8LAddGreenToBlueAndRed);
+}
+
+static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, int8_t color) {
+    return (uint32_t)((int)(color_pred)*color) >> 5;
+}
+
+static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code, VP8LMultipliers* const m) {
+    m->green_to_red_ = (color_code >> 0) & 0xff;
+    m->green_to_blue_ = (color_code >> 8) & 0xff;
+    m->red_to_blue_ = (color_code >> 16) & 0xff;
+}
+
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data, int num_pixels) {
+    int i;
+    for (i = 0; i < num_pixels; ++i) {
+        const uint32_t argb = data[i];
+        const uint32_t green = argb >> 8;
+        const uint32_t red = argb >> 16;
+        uint32_t new_red = red;
+        uint32_t new_blue = argb;
+        new_red += ColorTransformDelta(m->green_to_red_, green);
+        new_red &= 0xff;
+        new_blue += ColorTransformDelta(m->green_to_blue_, green);
+        new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
+        new_blue &= 0xff;
+        data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+    }
+}
+
+// Color space inverse transform.
+static void ColorSpaceInverseTransform(const VP8LTransform* const transform, int y_start, int y_end, uint32_t* data) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    const int width = transform->xsize_;
+    const int tile_width = 1 << transform->bits_;
+    const int mask = tile_width - 1;
+    const int safe_width = width & ~mask;
+    const int remaining_width = width - safe_width;
+    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+    int y = y_start;
+    const uint32_t* pred_row = transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+    while (y < y_end) {
+        const uint32_t* pred = pred_row;
+        VP8LMultipliers m = {0, 0, 0};
+        const uint32_t* const data_safe_end = data + safe_width;
+        const uint32_t* const data_end = data + width;
+        while (data < data_safe_end) {
+            ColorCodeToMultipliers(*pred++, &m);
+            VP8LTransformColorInverse(&m, data, tile_width);
+            data += tile_width;
+        }
+        if (data < data_end) { // Left-overs using C-version.
+            ColorCodeToMultipliers(*pred++, &m);
+            VP8LTransformColorInverse(&m, data, remaining_width);
+            data += remaining_width;
+        }
+        ++y;
+        if ((y & mask) == 0) pred_row += tiles_per_row;
+    }
+
+    StopProfiling(&stop_watch, &timeColorSpaceInverseTransform, &countColorSpaceInverseTransform);
+}
+
+// Separate out pixels packed together using pixel-bundling.
+// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
+#define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX, GET_INDEX, GET_VALUE)           \
+    static void F_NAME(const TYPE* src, const uint32_t* const color_map, TYPE* dst, int y_start, int y_end,   \
+                       int width) {                                                                           \
+        int y;                                                                                                \
+        for (y = y_start; y < y_end; ++y) {                                                                   \
+            int x;                                                                                            \
+            for (x = 0; x < width; ++x) {                                                                     \
+                *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                                             \
+            }                                                                                                 \
+        }                                                                                                     \
+    }                                                                                                         \
+    STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform, int y_start, int y_end, const TYPE* src, \
+                               TYPE* dst) {                                                                   \
+        int y;                                                                                                \
+        const int bits_per_pixel = 8 >> transform->bits_;                                                     \
+        const int width = transform->xsize_;                                                                  \
+        const uint32_t* const color_map = transform->data_;                                                   \
+        if (bits_per_pixel < 8) {                                                                             \
+            const int pixels_per_byte = 1 << transform->bits_;                                                \
+            const int count_mask = pixels_per_byte - 1;                                                       \
+            const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                                              \
+            for (y = y_start; y < y_end; ++y) {                                                               \
+                uint32_t packed_pixels = 0;                                                                   \
+                int x;                                                                                        \
+                for (x = 0; x < width; ++x) {                                                                 \
+                    /* We need to load fresh 'packed_pixels' once every                */                     \
+                    /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */                     \
+                    /* is a power of 2, so can just use a mask for that, instead of    */                     \
+                    /* decrementing a counter.                                         */                     \
+                    if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);                             \
+                    *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);                                  \
+                    packed_pixels >>= bits_per_pixel;                                                         \
+                }                                                                                             \
+            }                                                                                                 \
+        } else {                                                                                              \
+            VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);                             \
+        }                                                                                                     \
+    }
+
+COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
+COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
+
+#undef COLOR_INDEX_INVERSE
+
+void VP8LInverseTransform(const VP8LTransform* const transform,
+                          int row_start,
+                          int row_end,
+                          const uint32_t* const in,
+                          uint32_t* const out,
+                          int thread_number) {
+    const int width = transform->xsize_;
+    assert(row_start < row_end);
+    assert(row_end <= transform->ysize_);
+    switch (transform->type_) {
+        case SUBTRACT_GREEN:
+            VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
+            break;
+        case PREDICTOR_TRANSFORM:
+            StopProfilingWatch stop_watch;
+            StartProfiling(&stop_watch);
+            if (thread_number != 0) {
+                PredictorInverseTransformMt(transform, row_start, row_end, out, thread_number);
+            } else {
+                PredictorInverseTransform(transform, row_start, row_end, out);
+            }
+
+            StopProfiling(&stop_watch, &timePredictorInverseTransform, &countPredictorInverseTransform);
+            StopProfilingWatch stop_watch1;
+            StartProfiling(&stop_watch1);
+            if (row_end != transform->ysize_) {
+                // The last predicted row in this iteration will be the top-pred row
+                // for the first row in next iteration.
+                memcpy(out - width, out + (row_end - row_start - 1) * width, width * sizeof(*out));
+            }
+            StopProfiling(&stop_watch1, &timeProcessRowsCopy2, &countProcessRowsCopy2);
+            break;
+        case CROSS_COLOR_TRANSFORM:
+            ColorSpaceInverseTransform(transform, row_start, row_end, out);
+            break;
+        case COLOR_INDEXING_TRANSFORM:
+            StopProfilingWatch stop_watch2;
+            StartProfiling(&stop_watch2);
+            if (in == out && transform->bits_ > 0) {
+                // Move packed pixels to the end of unpacked region, so that unpacking
+                // can occur seamlessly.
+                // Also, note that this is the only transform that applies on
+                // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
+                // transforms work on effective width of xsize_.
+                const int out_stride = (row_end - row_start) * width;
+                const int in_stride = (row_end - row_start) * VP8LSubSampleSize(transform->xsize_, transform->bits_);
+                uint32_t* const src = out + out_stride - in_stride;
+                memmove(src, out, in_stride * sizeof(*src));
+                ColorIndexInverseTransform(transform, row_start, row_end, src, out);
+            } else {
+                ColorIndexInverseTransform(transform, row_start, row_end, in, out);
+            }
+            StopProfiling(&stop_watch2, &timeColorIndexInverseTransform, &countColorIndexInverseTransform);
+            break;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Color space conversion.
+
+static int is_big_endian(void) {
+    static const union {
+        uint16_t w;
+        uint8_t b[2];
+    } tmp = {1};
+    return (tmp.b[0] != 1);
+}
+
+void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const src_end = src + num_pixels;
+    while (src < src_end) {
+        const uint32_t argb = *src++;
+        *dst++ = (argb >> 16) & 0xff;
+        *dst++ = (argb >> 8) & 0xff;
+        *dst++ = (argb >> 0) & 0xff;
+    }
+}
+
+void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const src_end = src + num_pixels;
+    while (src < src_end) {
+        const uint32_t argb = *src++;
+        *dst++ = (argb >> 16) & 0xff;
+        *dst++ = (argb >> 8) & 0xff;
+        *dst++ = (argb >> 0) & 0xff;
+        *dst++ = (argb >> 24) & 0xff;
+    }
+}
+
+void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const src_end = src + num_pixels;
+    while (src < src_end) {
+        const uint32_t argb = *src++;
+        const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
+        const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
+#ifdef WEBP_SWAP_16BIT_CSP
+        *dst++ = ba;
+        *dst++ = rg;
+#else
+        *dst++ = rg;
+        *dst++ = ba;
+#endif
+    }
+}
+
+void VP8LConvertBGRAToRGB565_C(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const src_end = src + num_pixels;
+    while (src < src_end) {
+        const uint32_t argb = *src++;
+        const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
+        const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
+#ifdef WEBP_SWAP_16BIT_CSP
+        *dst++ = gb;
+        *dst++ = rg;
+#else
+        *dst++ = rg;
+        *dst++ = gb;
+#endif
+    }
+}
+
+void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const src_end = src + num_pixels;
+    while (src < src_end) {
+        const uint32_t argb = *src++;
+        *dst++ = (argb >> 0) & 0xff;
+        *dst++ = (argb >> 8) & 0xff;
+        *dst++ = (argb >> 16) & 0xff;
+    }
+}
+
+static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, int swap_on_big_endian) {
+    if (is_big_endian() == swap_on_big_endian) {
+        const uint32_t* const src_end = src + num_pixels;
+        while (src < src_end) {
+            const uint32_t argb = *src++;
+
+#if !defined(WORDS_BIGENDIAN)
+#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
+            WebPUint32ToMem(dst, BSwap32(argb));
+#else // WEBP_REFERENCE_IMPLEMENTATION
+            dst[0] = (argb >> 24) & 0xff;
+            dst[1] = (argb >> 16) & 0xff;
+            dst[2] = (argb >> 8) & 0xff;
+            dst[3] = (argb >> 0) & 0xff;
+#endif
+#else // WORDS_BIGENDIAN
+            dst[0] = (argb >> 0) & 0xff;
+            dst[1] = (argb >> 8) & 0xff;
+            dst[2] = (argb >> 16) & 0xff;
+            dst[3] = (argb >> 24) & 0xff;
+#endif
+            dst += sizeof(argb);
+        }
+    } else {
+        memcpy(dst, src, num_pixels * sizeof(*src));
+    }
+}
+
+void VP8LConvertFromBGRA(const uint32_t* const in_data,
+                         int num_pixels,
+                         WEBP_CSP_MODE out_colorspace,
+                         uint8_t* const rgba) {
+    switch (out_colorspace) {
+        case MODE_RGB:
+            VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
+            break;
+        case MODE_RGBA:
+            VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
+            break;
+        case MODE_rgbA:
+            VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
+            WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+            break;
+        case MODE_BGR:
+            VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
+            break;
+        case MODE_BGRA:
+            CopyOrSwap(in_data, num_pixels, rgba, 1);
+            break;
+        case MODE_bgrA:
+            CopyOrSwap(in_data, num_pixels, rgba, 1);
+            WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+            break;
+        case MODE_ARGB:
+            CopyOrSwap(in_data, num_pixels, rgba, 0);
+            break;
+        case MODE_Argb:
+            CopyOrSwap(in_data, num_pixels, rgba, 0);
+            WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
+            break;
+        case MODE_RGBA_4444:
+            VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+            break;
+        case MODE_rgbA_4444:
+            VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+            WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
+            break;
+        case MODE_RGB_565:
+            VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
+            break;
+        default:
+            assert(0); // Code flow should not reach here.
+    }
+}
+
+//------------------------------------------------------------------------------
+
+VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+VP8LPredictorFunc VP8LPredictors[16];
+
+VP8LTransformColorFunc VP8LTransformColorInverse;
+
+VP8LConvertFunc VP8LConvertBGRAToRGB;
+VP8LConvertFunc VP8LConvertBGRAToRGBA;
+VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
+VP8LConvertFunc VP8LConvertBGRAToRGB565;
+VP8LConvertFunc VP8LConvertBGRAToBGR;
+
+VP8LMapARGBFunc VP8LMapColor32b;
+VP8LMapAlphaFunc VP8LMapColor8b;
+
+extern void VP8LDspInitSSE2(void);
+extern void VP8LDspInitNEON(void);
+extern void VP8LDspInitMIPSdspR2(void);
+
+static volatile VP8CPUInfo lossless_last_cpuinfo_used = (VP8CPUInfo)&lossless_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
+    if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    VP8LPredictors[0] = Predictor0;
+    VP8LPredictors[1] = Predictor1;
+    VP8LPredictors[2] = Predictor2;
+    VP8LPredictors[3] = Predictor3;
+    VP8LPredictors[4] = Predictor4;
+    VP8LPredictors[5] = Predictor5;
+    VP8LPredictors[6] = Predictor6;
+    VP8LPredictors[7] = Predictor7;
+    VP8LPredictors[8] = Predictor8;
+    VP8LPredictors[9] = Predictor9;
+    VP8LPredictors[10] = Predictor10;
+    VP8LPredictors[11] = Predictor11;
+    VP8LPredictors[12] = Predictor12;
+    VP8LPredictors[13] = Predictor13;
+    VP8LPredictors[14] = Predictor0; // <- padding security sentinels
+    VP8LPredictors[15] = Predictor0;
+
+    VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
+
+    VP8LTransformColorInverse = VP8LTransformColorInverse_C;
+
+    VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
+    VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
+    VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
+    VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
+    VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
+
+    VP8LMapColor32b = MapARGB;
+    VP8LMapColor8b = MapAlpha;
+
+    // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            VP8LDspInitSSE2();
+        }
+#endif
+#if defined(WEBP_USE_NEON)
+        if (VP8GetCPUInfo(kNEON)) {
+            VP8LDspInitNEON();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            VP8LDspInitMIPSdspR2();
+        }
+#endif
+    }
+    lossless_last_cpuinfo_used = VP8GetCPUInfo;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless.h b/codec/L2/demos/webpEnc/host/src/dsp/lossless.h
new file mode 100644
index 0000000000..ced7d6d483
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless.h
@@ -0,0 +1,335 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+
+#ifndef WEBP_DSP_LOSSLESS_H_
+#define WEBP_DSP_LOSSLESS_H_
+
+#include "../webp/types.h"
+#include "../webp/decode.h"
+
+#include "../enc/histogram.h"
+#include "../utils/utils.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "../enc/delta_palettization.h"
+#endif // WEBP_EXPERIMENTAL_FEATURES
+
+//------------------------------------------------------------------------------
+// Decoding
+
+typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
+extern VP8LPredictorFunc VP8LPredictors[16];
+
+typedef void (*VP8LProcessBlueAndRedFunc)(uint32_t* argb_data, int num_pixels);
+extern VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+
+typedef struct {
+    // Note: the members are uint8_t, so that any negative values are
+    // automatically converted to "mod 256" values.
+    uint8_t green_to_red_;
+    uint8_t green_to_blue_;
+    uint8_t red_to_blue_;
+} VP8LMultipliers;
+typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m, uint32_t* argb_data, int num_pixels);
+extern VP8LTransformColorFunc VP8LTransformColorInverse;
+
+struct VP8LTransform; // Defined in dec/vp8li.h.
+
+// Performs inverse transform of data given transform information, start and end
+// rows. Transform will be applied to rows [row_start, row_end[.
+// The *in and *out pointers refer to source and destination data respectively
+// corresponding to the intermediate row (row_start).
+void VP8LInverseTransform(const struct VP8LTransform* const transform,
+                          int row_start,
+                          int row_end,
+                          const uint32_t* const in,
+                          uint32_t* const out,
+                          int num_workers);
+
+// Color space conversion.
+typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels, uint8_t* dst);
+extern VP8LConvertFunc VP8LConvertBGRAToRGB;
+extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
+extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
+extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
+extern VP8LConvertFunc VP8LConvertBGRAToBGR;
+
+// Converts from BGRA to other color spaces.
+void VP8LConvertFromBGRA(const uint32_t* const in_data,
+                         int num_pixels,
+                         WEBP_CSP_MODE out_colorspace,
+                         uint8_t* const rgba);
+
+// color mapping related functions.
+static WEBP_INLINE uint32_t VP8GetARGBIndex(uint32_t idx) {
+    return (idx >> 8) & 0xff;
+}
+
+static WEBP_INLINE uint8_t VP8GetAlphaIndex(uint8_t idx) {
+    return idx;
+}
+
+static WEBP_INLINE uint32_t VP8GetARGBValue(uint32_t val) {
+    return val;
+}
+
+static WEBP_INLINE uint8_t VP8GetAlphaValue(uint32_t val) {
+    return (val >> 8) & 0xff;
+}
+
+typedef void (*VP8LMapARGBFunc)(
+    const uint32_t* src, const uint32_t* const color_map, uint32_t* dst, int y_start, int y_end, int width);
+typedef void (*VP8LMapAlphaFunc)(
+    const uint8_t* src, const uint32_t* const color_map, uint8_t* dst, int y_start, int y_end, int width);
+
+extern VP8LMapARGBFunc VP8LMapColor32b;
+extern VP8LMapAlphaFunc VP8LMapColor8b;
+
+// Similar to the static method ColorIndexInverseTransform() that is part of
+// lossless.c, but used only for alpha decoding. It takes uint8_t (rather than
+// uint32_t) arguments for 'src' and 'dst'.
+void VP8LColorIndexInverseTransformAlpha(
+    const struct VP8LTransform* const transform, int y_start, int y_end, const uint8_t* src, uint8_t* dst);
+
+// Expose some C-only fallback functions
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data, int num_pixels);
+
+void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGB565_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels);
+
+// Must be called before calling any of the above methods.
+void VP8LDspInit(void);
+
+//------------------------------------------------------------------------------
+// Encoding
+
+extern VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+extern VP8LTransformColorFunc VP8LTransformColor;
+typedef void (*VP8LCollectColorBlueTransformsFunc)(
+    const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_blue, int red_to_blue, int histo[]);
+extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
+
+typedef void (*VP8LCollectColorRedTransformsFunc)(
+    const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_red, int histo[]);
+extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
+
+// Expose some C-only fallback functions
+void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data, int num_pixels);
+void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
+void VP8LCollectColorRedTransforms_C(
+    const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_red, int histo[]);
+void VP8LCollectColorBlueTransforms_C(
+    const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_blue, int red_to_blue, int histo[]);
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+void VP8LResidualImage(int width,
+                       int height,
+                       int bits,
+                       int low_effort,
+                       uint32_t* const argb,
+                       uint32_t* const argb_scratch,
+                       uint32_t* const image,
+                       int exact,
+                       int use_ocl);
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int quality, uint32_t* const argb, uint32_t* image);
+
+//------------------------------------------------------------------------------
+// Misc methods.
+
+// Computes sampled size of 'size' when sampling using 'sampling bits'.
+static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size, uint32_t sampling_bits) {
+    return (size + (1 << sampling_bits) - 1) >> sampling_bits;
+}
+
+// -----------------------------------------------------------------------------
+// Faster logarithm for integers. Small values use a look-up table.
+
+// The threshold till approximate version of log_2 can be used.
+// Practically, we can get rid of the call to log() as the two values match to
+// very high degree (the ratio of these two is 0.99999x).
+// Keeping a high threshold for now.
+#define APPROX_LOG_WITH_CORRECTION_MAX 65536
+#define APPROX_LOG_MAX 4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+#define LOG_LOOKUP_IDX_MAX 256
+extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
+extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
+typedef float (*VP8LFastLog2SlowFunc)(uint32_t v);
+
+extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
+extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+
+static WEBP_INLINE float VP8LFastLog2(uint32_t v) {
+    return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
+}
+// Fast calculation of v * log2(v) for integer input.
+static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
+    return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
+}
+
+// -----------------------------------------------------------------------------
+// Huffman-cost related functions.
+
+typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
+typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, int length);
+typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256], const int Y[256]);
+
+extern VP8LCostFunc VP8LExtraCost;
+extern VP8LCostCombinedFunc VP8LExtraCostCombined;
+extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
+
+typedef struct {       // small struct to hold counters
+    int counts[2];     // index: 0=zero steak, 1=non-zero streak
+    int streaks[2][2]; // [zero/non-zero][streak<3 / streak>=3]
+} VP8LStreaks;
+
+typedef VP8LStreaks (*VP8LCostCombinedCountFunc)(const uint32_t* X, const uint32_t* Y, int length);
+
+extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
+
+typedef struct {           // small struct to hold bit entropy results
+    double entropy;        // entropy
+    uint32_t sum;          // sum of the population
+    int nonzeros;          // number of non-zero elements in the population
+    uint32_t max_val;      // maximum value in the population
+    uint32_t nonzero_code; // index of the last non-zero in the population
+} VP8LBitEntropy;
+
+void VP8LBitEntropyInit(VP8LBitEntropy* const entropy);
+
+// Get the combined symbol bit entropy and Huffman cost stats for the
+// distributions 'X' and 'Y'. Those results can then be refined according to
+// codec specific heuristics.
+void VP8LGetCombinedEntropyUnrefined(const uint32_t* const X,
+                                     const uint32_t* const Y,
+                                     int length,
+                                     VP8LBitEntropy* const bit_entropy,
+                                     VP8LStreaks* const stats);
+// Get the entropy for the distribution 'X'.
+void VP8LGetEntropyUnrefined(const uint32_t* const X,
+                             int length,
+                             VP8LBitEntropy* const bit_entropy,
+                             VP8LStreaks* const stats);
+
+void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n, VP8LBitEntropy* const entropy);
+
+typedef void (*GetEntropyUnrefinedHelperFunc)(uint32_t val,
+                                              int i,
+                                              uint32_t* const val_prev,
+                                              int* const i_prev,
+                                              VP8LBitEntropy* const bit_entropy,
+                                              VP8LStreaks* const stats);
+// Internal function used by VP8LGet*EntropyUnrefined.
+extern GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper;
+
+typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a,
+                                     const VP8LHistogram* const b,
+                                     VP8LHistogram* const out);
+extern VP8LHistogramAddFunc VP8LHistogramAdd;
+
+// -----------------------------------------------------------------------------
+// PrefixEncode()
+
+static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
+    const int log_floor = BitsLog2Floor(n);
+    if (n == (n & ~(n - 1))) // zero or a power of two.
+        return log_floor;
+    else
+        return log_floor + 1;
+}
+
+// Splitting of distance and length codes into prefixes and
+// extra bits. The prefixes are encoded with an entropy code
+// while the extra bits are stored just as normal bits.
+static WEBP_INLINE void VP8LPrefixEncodeBitsNoLUT(int distance, int* const code, int* const extra_bits) {
+    const int highest_bit = BitsLog2Floor(--distance);
+    const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
+    *extra_bits = highest_bit - 1;
+    *code = 2 * highest_bit + second_highest_bit;
+}
+
+static WEBP_INLINE void VP8LPrefixEncodeNoLUT(int distance,
+                                              int* const code,
+                                              int* const extra_bits,
+                                              int* const extra_bits_value) {
+    const int highest_bit = BitsLog2Floor(--distance);
+    const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
+    *extra_bits = highest_bit - 1;
+    *extra_bits_value = distance & ((1 << *extra_bits) - 1);
+    *code = 2 * highest_bit + second_highest_bit;
+}
+
+#define PREFIX_LOOKUP_IDX_MAX 512
+typedef struct {
+    int8_t code_;
+    int8_t extra_bits_;
+} VP8LPrefixCode;
+
+// These tables are derived using VP8LPrefixEncodeNoLUT.
+extern const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX];
+extern const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX];
+static WEBP_INLINE void VP8LPrefixEncodeBits(int distance, int* const code, int* const extra_bits) {
+    if (distance < PREFIX_LOOKUP_IDX_MAX) {
+        const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
+        *code = prefix_code.code_;
+        *extra_bits = prefix_code.extra_bits_;
+    } else {
+        VP8LPrefixEncodeBitsNoLUT(distance, code, extra_bits);
+    }
+}
+
+static WEBP_INLINE void VP8LPrefixEncode(int distance,
+                                         int* const code,
+                                         int* const extra_bits,
+                                         int* const extra_bits_value) {
+    if (distance < PREFIX_LOOKUP_IDX_MAX) {
+        const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
+        *code = prefix_code.code_;
+        *extra_bits = prefix_code.extra_bits_;
+        *extra_bits_value = kPrefixEncodeExtraBitsValue[distance];
+    } else {
+        VP8LPrefixEncodeNoLUT(distance, code, extra_bits, extra_bits_value);
+    }
+}
+
+// In-place difference of each component with mod 256.
+static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
+    const uint32_t alpha_and_green = 0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u);
+    const uint32_t red_and_blue = 0xff00ff00u + (a & 0x00ff00ffu) - (b & 0x00ff00ffu);
+    return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+void VP8LBundleColorMap(const uint8_t* const row, int width, int xbits, uint32_t* const dst);
+
+// Must be called before calling any of the above methods.
+void VP8LEncDspInit(void);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_DSP_LOSSLESS_H_
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc.c
new file mode 100644
index 0000000000..210af0ac87
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc.c
@@ -0,0 +1,1389 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transform methods for lossless encoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+//          Urvang Joshi (urvang@google.com)
+
+#include "./dsp.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "../dec/vp8li.h"
+#include "../utils/endian_inl.h"
+#include "./lossless.h"
+#include "./yuv.h"
+#include "../utils/profiling.h"
+#include "../../host/create_kernel.h"
+
+#define MAX_DIFF_COST (1e30f)
+#define GET_RESIDULE_RECONT 0
+#define SAVE_RESIDULE 0
+#define GRP_X 256
+#define GRP_Y 16
+
+static const int kPredLowEffort = 11;
+static const uint32_t kMaskAlpha = 0xff000000;
+
+// lookup table for small values of log2(int)
+const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
+    0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f, 1.5849625007211560f, 2.0000000000000000f,
+    2.3219280948873621f, 2.5849625007211560f, 2.8073549220576041f, 3.0000000000000000f, 3.1699250014423121f,
+    3.3219280948873621f, 3.4594316186372973f, 3.5849625007211560f, 3.7004397181410921f, 3.8073549220576041f,
+    3.9068905956085187f, 4.0000000000000000f, 4.0874628412503390f, 4.1699250014423121f, 4.2479275134435852f,
+    4.3219280948873626f, 4.3923174227787606f, 4.4594316186372973f, 4.5235619560570130f, 4.5849625007211560f,
+    4.6438561897747243f, 4.7004397181410917f, 4.7548875021634682f, 4.8073549220576037f, 4.8579809951275718f,
+    4.9068905956085187f, 4.9541963103868749f, 5.0000000000000000f, 5.0443941193584533f, 5.0874628412503390f,
+    5.1292830169449663f, 5.1699250014423121f, 5.2094533656289501f, 5.2479275134435852f, 5.2854022188622487f,
+    5.3219280948873626f, 5.3575520046180837f, 5.3923174227787606f, 5.4262647547020979f, 5.4594316186372973f,
+    5.4918530963296747f, 5.5235619560570130f, 5.5545888516776376f, 5.5849625007211560f, 5.6147098441152083f,
+    5.6438561897747243f, 5.6724253419714951f, 5.7004397181410917f, 5.7279204545631987f, 5.7548875021634682f,
+    5.7813597135246599f, 5.8073549220576037f, 5.8328900141647412f, 5.8579809951275718f, 5.8826430493618415f,
+    5.9068905956085187f, 5.9307373375628866f, 5.9541963103868749f, 5.9772799234999167f, 6.0000000000000000f,
+    6.0223678130284543f, 6.0443941193584533f, 6.0660891904577720f, 6.0874628412503390f, 6.1085244567781691f,
+    6.1292830169449663f, 6.1497471195046822f, 6.1699250014423121f, 6.1898245588800175f, 6.2094533656289501f,
+    6.2288186904958804f, 6.2479275134435852f, 6.2667865406949010f, 6.2854022188622487f, 6.3037807481771030f,
+    6.3219280948873626f, 6.3398500028846243f, 6.3575520046180837f, 6.3750394313469245f, 6.3923174227787606f,
+    6.4093909361377017f, 6.4262647547020979f, 6.4429434958487279f, 6.4594316186372973f, 6.4757334309663976f,
+    6.4918530963296747f, 6.5077946401986963f, 6.5235619560570130f, 6.5391588111080309f, 6.5545888516776376f,
+    6.5698556083309478f, 6.5849625007211560f, 6.5999128421871278f, 6.6147098441152083f, 6.6293566200796094f,
+    6.6438561897747243f, 6.6582114827517946f, 6.6724253419714951f, 6.6865005271832185f, 6.7004397181410917f,
+    6.7142455176661224f, 6.7279204545631987f, 6.7414669864011464f, 6.7548875021634682f, 6.7681843247769259f,
+    6.7813597135246599f, 6.7944158663501061f, 6.8073549220576037f, 6.8201789624151878f, 6.8328900141647412f,
+    6.8454900509443747f, 6.8579809951275718f, 6.8703647195834047f, 6.8826430493618415f, 6.8948177633079437f,
+    6.9068905956085187f, 6.9188632372745946f, 6.9307373375628866f, 6.9425145053392398f, 6.9541963103868749f,
+    6.9657842846620869f, 6.9772799234999167f, 6.9886846867721654f, 7.0000000000000000f, 7.0112272554232539f,
+    7.0223678130284543f, 7.0334230015374501f, 7.0443941193584533f, 7.0552824355011898f, 7.0660891904577720f,
+    7.0768155970508308f, 7.0874628412503390f, 7.0980320829605263f, 7.1085244567781691f, 7.1189410727235076f,
+    7.1292830169449663f, 7.1395513523987936f, 7.1497471195046822f, 7.1598713367783890f, 7.1699250014423121f,
+    7.1799090900149344f, 7.1898245588800175f, 7.1996723448363644f, 7.2094533656289501f, 7.2191685204621611f,
+    7.2288186904958804f, 7.2384047393250785f, 7.2479275134435852f, 7.2573878426926521f, 7.2667865406949010f,
+    7.2761244052742375f, 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771030f, 7.3128829552843557f,
+    7.3219280948873626f, 7.3309168781146167f, 7.3398500028846243f, 7.3487281542310771f, 7.3575520046180837f,
+    7.3663222142458160f, 7.3750394313469245f, 7.3837042924740519f, 7.3923174227787606f, 7.4008794362821843f,
+    7.4093909361377017f, 7.4178525148858982f, 7.4262647547020979f, 7.4346282276367245f, 7.4429434958487279f,
+    7.4512111118323289f, 7.4594316186372973f, 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642563f,
+    7.4918530963296747f, 7.4998458870832056f, 7.5077946401986963f, 7.5156998382840427f, 7.5235619560570130f,
+    7.5313814605163118f, 7.5391588111080309f, 7.5468944598876364f, 7.5545888516776376f, 7.5622424242210728f,
+    7.5698556083309478f, 7.5774288280357486f, 7.5849625007211560f, 7.5924570372680806f, 7.5999128421871278f,
+    7.6073303137496104f, 7.6147098441152083f, 7.6220518194563764f, 7.6293566200796094f, 7.6366246205436487f,
+    7.6438561897747243f, 7.6510516911789281f, 7.6582114827517946f, 7.6653359171851764f, 7.6724253419714951f,
+    7.6794800995054464f, 7.6865005271832185f, 7.6934869574993252f, 7.7004397181410917f, 7.7073591320808825f,
+    7.7142455176661224f, 7.7210991887071855f, 7.7279204545631987f, 7.7347096202258383f, 7.7414669864011464f,
+    7.7481928495894605f, 7.7548875021634682f, 7.7615512324444795f, 7.7681843247769259f, 7.7747870596011736f,
+    7.7813597135246599f, 7.7879025593914317f, 7.7944158663501061f, 7.8008998999203047f, 7.8073549220576037f,
+    7.8137811912170374f, 7.8201789624151878f, 7.8265484872909150f, 7.8328900141647412f, 7.8392037880969436f,
+    7.8454900509443747f, 7.8517490414160571f, 7.8579809951275718f, 7.8641861446542797f, 7.8703647195834047f,
+    7.8765169465649993f, 7.8826430493618415f, 7.8887432488982591f, 7.8948177633079437f, 7.9008668079807486f,
+    7.9068905956085187f, 7.9128893362299619f, 7.9188632372745946f, 7.9248125036057812f, 7.9307373375628866f,
+    7.9366379390025709f, 7.9425145053392398f, 7.9483672315846778f, 7.9541963103868749f, 7.9600019320680805f,
+    7.9657842846620869f, 7.9715435539507719f, 7.9772799234999167f, 7.9829935746943103f, 7.9886846867721654f,
+    7.9943534368588577f};
+
+const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
+    0.00000000f,    0.00000000f,    2.00000000f,    4.75488750f,    8.00000000f,    11.60964047f,   15.50977500f,
+    19.65148445f,   24.00000000f,   28.52932501f,   33.21928095f,   38.05374781f,   43.01955001f,   48.10571634f,
+    53.30296891f,   58.60335893f,   64.00000000f,   69.48686830f,   75.05865003f,   80.71062276f,   86.43856190f,
+    92.23866588f,   98.10749561f,   104.04192499f,  110.03910002f,  116.09640474f,  122.21143267f,  128.38196256f,
+    134.60593782f,  140.88144886f,  147.20671787f,  153.58008562f,  160.00000000f,  166.46500594f,  172.97373660f,
+    179.52490559f,  186.11730005f,  192.74977453f,  199.42124551f,  206.13068654f,  212.87712380f,  219.65963219f,
+    226.47733176f,  233.32938445f,  240.21499122f,  247.13338933f,  254.08384998f,  261.06567603f,  268.07820003f,
+    275.12078236f,  282.19280949f,  289.29369244f,  296.42286534f,  303.57978409f,  310.76392512f,  317.97478424f,
+    325.21187564f,  332.47473081f,  339.76289772f,  347.07593991f,  354.41343574f,  361.77497759f,  369.16017124f,
+    376.56863518f,  384.00000000f,  391.45390785f,  398.93001188f,  406.42797576f,  413.94747321f,  421.48818752f,
+    429.04981119f,  436.63204548f,  444.23460010f,  451.85719280f,  459.49954906f,  467.16140179f,  474.84249102f,
+    482.54256363f,  490.26137307f,  497.99867911f,  505.75424759f,  513.52785023f,  521.31926438f,  529.12827280f,
+    536.95466351f,  544.79822957f,  552.65876890f,  560.53608414f,  568.42998244f,  576.34027536f,  584.26677867f,
+    592.20931226f,  600.16769996f,  608.14176943f,  616.13135206f,  624.13628279f,  632.15640007f,  640.19154569f,
+    648.24156472f,  656.30630539f,  664.38561898f,  672.47935976f,  680.58738488f,  688.70955430f,  696.84573069f,
+    704.99577935f,  713.15956818f,  721.33696754f,  729.52785023f,  737.73209140f,  745.94956849f,  754.18016116f,
+    762.42375127f,  770.68022275f,  778.94946161f,  787.23135586f,  795.52579543f,  803.83267219f,  812.15187982f,
+    820.48331383f,  828.82687147f,  837.18245171f,  845.54995518f,  853.92928416f,  862.32034249f,  870.72303558f,
+    879.13727036f,  887.56295522f,  896.00000000f,  904.44831595f,  912.90781569f,  921.37841320f,  929.86002376f,
+    938.35256392f,  946.85595152f,  955.37010560f,  963.89494641f,  972.43039537f,  980.97637504f,  989.53280911f,
+    998.09962237f,  1006.67674069f, 1015.26409097f, 1023.86160116f, 1032.46920021f, 1041.08681805f, 1049.71438560f,
+    1058.35183469f, 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f, 1101.68498204f, 1110.38033993f,
+    1119.08512727f, 1127.79928282f, 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f, 1171.50849518f,
+    1180.27761738f, 1189.05570047f, 1197.84268914f, 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
+    1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f, 1277.31753781f, 1286.19068338f, 1295.07216828f,
+    1303.96194457f, 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f, 1348.53355734f, 1357.47210556f,
+    1366.41862452f, 1375.37307041f, 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f, 1420.26270412f,
+    1429.26381818f, 1438.27256558f, 1447.28890615f, 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
+    1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f, 1528.77123795f, 1537.86138993f, 1546.95871952f,
+    1556.06319119f, 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f, 1601.69146137f, 1610.83805860f,
+    1619.99155871f, 1629.15192882f, 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f, 1675.05570047f,
+    1684.25661744f, 1693.46418280f, 1702.67836605f, 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
+    1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f, 1785.89892323f, 1795.17766747f, 1804.46271172f,
+    1813.75402857f, 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f, 1860.30375965f, 1869.63214999f,
+    1878.96662767f, 1888.30716711f, 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f, 1935.09991037f,
+    1944.47629506f, 1953.85856831f, 1963.24670620f, 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
+    2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f};
+
+const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
+    {0, 0},  {0, 0},  {1, 0},  {2, 0},  {3, 0},  {4, 1},  {4, 1},  {5, 1},  {5, 1},  {6, 2},  {6, 2},  {6, 2},  {6, 2},
+    {7, 2},  {7, 2},  {7, 2},  {7, 2},  {8, 3},  {8, 3},  {8, 3},  {8, 3},  {8, 3},  {8, 3},  {8, 3},  {8, 3},  {9, 3},
+    {9, 3},  {9, 3},  {9, 3},  {9, 3},  {9, 3},  {9, 3},  {9, 3},  {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
+    {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {11, 4}, {11, 4}, {11, 4},
+    {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
+    {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+    {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+    {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+    {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+    {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {14, 6},
+    {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+    {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+    {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+    {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+    {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {15, 6}, {15, 6},
+    {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+    {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+    {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+    {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+    {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+    {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+    {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+};
+
+const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
+    0,   0,   0,   0,   0,   0,   1,   0,   1,   0,   1,   2,   3,   0,   1,   2,   3,   0,   1,   2,   3,   4,   5,
+    6,   7,   0,   1,   2,   3,   4,   5,   6,   7,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,
+    13,  14,  15,  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  0,   1,   2,   3,
+    4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
+    27,  28,  29,  30,  31,  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,
+    18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  0,   1,   2,   3,   4,   5,   6,   7,   8,
+    9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
+    32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,
+    55,  56,  57,  58,  59,  60,  61,  62,  63,  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,
+    14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,
+    37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+    60,  61,  62,  63,  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,
+    19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
+    42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
+    65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,
+    88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+    111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0,   1,   2,   3,   4,   5,
+    6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,
+    29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
+    52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+    75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
+    98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
+    121, 122, 123, 124, 125, 126};
+
+static float FastSLog2Slow(uint32_t v) {
+    assert(v >= LOG_LOOKUP_IDX_MAX);
+    if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+        int log_cnt = 0;
+        uint32_t y = 1;
+        int correction = 0;
+        const float v_f = (float)v;
+        const uint32_t orig_v = v;
+        do {
+            ++log_cnt;
+            v = v >> 1;
+            y = y << 1;
+        } while (v >= LOG_LOOKUP_IDX_MAX);
+        // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
+        // Xf = floor(Xf) * (1 + (v % y) / v)
+        // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
+        // The correction factor: log(1 + d) ~ d; for very small d values, so
+        // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
+        // LOG_2_RECIPROCAL ~ 23/16
+        correction = (23 * (orig_v & (y - 1))) >> 4;
+        return v_f * (kLog2Table[v] + log_cnt) + correction;
+    } else {
+        return (float)(LOG_2_RECIPROCAL * v * log((double)v));
+    }
+}
+
+static float FastLog2Slow(uint32_t v) {
+    assert(v >= LOG_LOOKUP_IDX_MAX);
+    if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+        int log_cnt = 0;
+        uint32_t y = 1;
+        const uint32_t orig_v = v;
+        double log_2;
+        do {
+            ++log_cnt;
+            v = v >> 1;
+            y = y << 1;
+        } while (v >= LOG_LOOKUP_IDX_MAX);
+        log_2 = kLog2Table[v] + log_cnt;
+        if (orig_v >= APPROX_LOG_MAX) {
+            // Since the division is still expensive, add this correction factor only
+            // for large values of 'v'.
+            const int correction = (23 * (orig_v & (y - 1))) >> 4;
+            log_2 += (double)correction / orig_v;
+        }
+        return (float)log_2;
+    } else {
+        return (float)(LOG_2_RECIPROCAL * log((double)v));
+    }
+}
+
+// Mostly used to reduce code size + readability
+static WEBP_INLINE int GetMin(int a, int b) {
+    return (a > b) ? b : a;
+}
+
+//------------------------------------------------------------------------------
+// Methods to calculate Entropy (Shannon).
+
+static float PredictionCostSpatial(const int counts[256], int weight_0, double exp_val) {
+    const int significant_symbols = 256 >> 4;
+    const double exp_decay_factor = 0.6;
+    double bits = weight_0 * counts[0];
+    int i;
+    for (i = 1; i < significant_symbols; ++i) {
+        bits += exp_val * (counts[i] + counts[256 - i]);
+        exp_val *= exp_decay_factor;
+    }
+    return (float)(-0.1 * bits);
+}
+
+// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
+static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
+    int i;
+    double retval = 0.;
+    int sumX = 0, sumXY = 0;
+    for (i = 0; i < 256; ++i) {
+        const int x = X[i];
+        if (x != 0) {
+            const int xy = x + Y[i];
+            sumX += x;
+            retval -= VP8LFastSLog2(x);
+            sumXY += xy;
+            retval -= VP8LFastSLog2(xy);
+        } else if (Y[i] != 0) {
+            sumXY += Y[i];
+            retval -= VP8LFastSLog2(Y[i]);
+        }
+    }
+    retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
+    return (float)retval;
+}
+
+static float PredictionCostSpatialHistogram(const int accumulated[4][256], const int tile[4][256]) {
+    int i;
+    double retval = 0;
+    for (i = 0; i < 4; ++i) {
+        const double kExpValue = 0.94;
+        retval += PredictionCostSpatial(tile[i], 1, kExpValue);
+        retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
+    }
+    return (float)retval;
+}
+
+void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
+    entropy->entropy = 0.;
+    entropy->sum = 0;
+    entropy->nonzeros = 0;
+    entropy->max_val = 0;
+    entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
+}
+
+void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n, VP8LBitEntropy* const entropy) {
+    int i;
+
+    VP8LBitEntropyInit(entropy);
+
+    for (i = 0; i < n; ++i) {
+        if (array[i] != 0) {
+            entropy->sum += array[i];
+            entropy->nonzero_code = i;
+            ++entropy->nonzeros;
+            entropy->entropy -= VP8LFastSLog2(array[i]);
+            if (entropy->max_val < array[i]) {
+                entropy->max_val = array[i];
+            }
+        }
+    }
+    entropy->entropy += VP8LFastSLog2(entropy->sum);
+}
+
+static WEBP_INLINE void GetEntropyUnrefinedHelper(uint32_t val,
+                                                  int i,
+                                                  uint32_t* const val_prev,
+                                                  int* const i_prev,
+                                                  VP8LBitEntropy* const bit_entropy,
+                                                  VP8LStreaks* const stats) {
+    const int streak = i - *i_prev;
+
+    // Gather info for the bit entropy.
+    if (*val_prev != 0) {
+        bit_entropy->sum += (*val_prev) * streak;
+        bit_entropy->nonzeros += streak;
+        bit_entropy->nonzero_code = *i_prev;
+        bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
+        if (bit_entropy->max_val < *val_prev) {
+            bit_entropy->max_val = *val_prev;
+        }
+    }
+
+    // Gather info for the Huffman cost.
+    stats->counts[*val_prev != 0] += (streak > 3);
+    stats->streaks[*val_prev != 0][(streak > 3)] += streak;
+
+    *val_prev = val;
+    *i_prev = i;
+}
+
+void VP8LGetEntropyUnrefined(const uint32_t* const X,
+                             int length,
+                             VP8LBitEntropy* const bit_entropy,
+                             VP8LStreaks* const stats) {
+    int i;
+    int i_prev = 0;
+    uint32_t x_prev = X[0];
+
+    memset(stats, 0, sizeof(*stats));
+    VP8LBitEntropyInit(bit_entropy);
+
+    for (i = 1; i < length; ++i) {
+        const uint32_t x = X[i];
+        if (x != x_prev) {
+            VP8LGetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats);
+        }
+    }
+    VP8LGetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
+
+    bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
+}
+
+void VP8LGetCombinedEntropyUnrefined(const uint32_t* const X,
+                                     const uint32_t* const Y,
+                                     int length,
+                                     VP8LBitEntropy* const bit_entropy,
+                                     VP8LStreaks* const stats) {
+    int i = 1;
+    int i_prev = 0;
+    uint32_t xy_prev = X[0] + Y[0];
+
+    memset(stats, 0, sizeof(*stats));
+    VP8LBitEntropyInit(bit_entropy);
+
+    for (i = 1; i < length; ++i) {
+        const uint32_t xy = X[i] + Y[i];
+        if (xy != xy_prev) {
+            VP8LGetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, bit_entropy, stats);
+        }
+    }
+    VP8LGetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);
+
+    bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
+}
+
+static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
+    ++histo_argb[0][argb >> 24];
+    ++histo_argb[1][(argb >> 16) & 0xff];
+    ++histo_argb[2][(argb >> 8) & 0xff];
+    ++histo_argb[3][argb & 0xff];
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE uint32_t
+Predict(VP8LPredictorFunc pred_func, int x, int y, const uint32_t* current_row, const uint32_t* upper_row) {
+    if (y == 0) {
+        return (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left.
+    } else if (x == 0) {
+        return upper_row[x]; // Top.
+    } else {
+        return pred_func(current_row[x - 1], upper_row + x);
+    }
+}
+
+// Returns best predictor and updates the accumulated histogram.
+static int GetBestPredictorForTile(int width,
+                                   int height,
+                                   int tile_x,
+                                   int tile_y,
+                                   int bits,
+                                   int accumulated[4][256],
+                                   const uint32_t* const argb_scratch,
+                                   int exact) {
+    const int kNumPredModes = 14;
+    const int col_start = tile_x << bits;
+    const int row_start = tile_y << bits;
+    const int tile_size = 1 << bits;
+    const int max_y = GetMin(tile_size, height - row_start);
+    const int max_x = GetMin(tile_size, width - col_start);
+    float best_diff = MAX_DIFF_COST;
+    int best_mode = 0;
+    int mode;
+    int histo_stack_1[4][256];
+    int histo_stack_2[4][256];
+    // Need pointers to be able to swap arrays.
+    int(*histo_argb)[256] = histo_stack_1;
+    int(*best_histo)[256] = histo_stack_2;
+
+    int i, j;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    for (mode = 0; mode < kNumPredModes; ++mode) {
+        const uint32_t* current_row = argb_scratch;
+        const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
+        float cur_diff;
+        int y;
+        memset(histo_argb, 0, sizeof(histo_stack_1));
+        for (y = 0; y < max_y; ++y) {
+            int x;
+            const int row = row_start + y;
+            const uint32_t* const upper_row = current_row;
+            current_row = upper_row + width;
+            for (x = 0; x < max_x; ++x) {
+                const int col = col_start + x;
+                const uint32_t predict = Predict(pred_func, col, row, current_row, upper_row);
+                uint32_t residual = VP8LSubPixels(current_row[col], predict);
+                if (!exact && (current_row[col] & kMaskAlpha) == 0) {
+                    residual &= kMaskAlpha; // See CopyTileWithPrediction.
+                }
+#if SAVE_RESIDULE
+                fprintf(stderr, "(%dx%d):0x%x ", col, row, residual);
+#endif
+                UpdateHisto(histo_argb, residual);
+            }
+        }
+#if SAVE_RESIDULE
+        fprintf(stderr, "\n");
+#endif
+        cur_diff = PredictionCostSpatialHistogram((const int(*)[256])accumulated, (const int(*)[256])histo_argb);
+        if (cur_diff < best_diff) {
+            int(*tmp)[256] = histo_argb;
+            histo_argb = best_histo;
+            best_histo = tmp;
+            best_diff = cur_diff;
+            best_mode = mode;
+        }
+    }
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 256; j++) {
+            accumulated[i][j] += best_histo[i][j];
+        }
+    }
+    StopProfiling(&stop_watch, &timeBestPredict, &countBestPredict);
+
+    return best_mode;
+}
+
+// Returns best predictor and updates the accumulated histogram.
+static int GetBestPredictorForTile2(int width,
+                                    int height,
+                                    int group_width,
+                                    int group_height,
+                                    int bits,
+                                    int tile_x,
+                                    int tile_y,
+                                    uint32_t* residual,
+                                    int accumulated[4][256]) {
+    const int kNumPredModes = 14;
+    float best_diff = MAX_DIFF_COST;
+    int best_mode = 0;
+    int mode, i, j, y;
+    float cur_diff;
+    const int col_start = tile_x << bits;
+    const int row_start = tile_y << bits;
+    const int tile_size = 1 << bits;
+    const int max_y = GetMin(tile_size, height - row_start);
+    const int max_x = GetMin(tile_size, width - col_start);
+    int histo_stack_1[4][256];
+    int histo_stack_2[4][256];
+    // Need pointers to be able to swap arrays.
+    int(*histo_argb)[256] = histo_stack_1;
+    int(*best_histo)[256] = histo_stack_2;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    for (mode = 0; mode < kNumPredModes; ++mode) {
+        // clear histo
+        memset(histo_argb, 0, sizeof(histo_stack_1));
+
+        // calculate histo for tile
+        for (y = 0; y < max_y; ++y) {
+            int x;
+            for (x = 0; x < max_x; ++x) {
+                int global_x = col_start + x;
+                int global_y = row_start + y;
+                int group_x = global_x / group_width;
+                int group_y = global_y / group_height;
+                int local_x = global_x % group_width;
+                int local_y = global_y % group_height;
+                int global_width = ((width + group_width - 1) / group_width) * group_width;
+                int offset =
+                    ((global_width / group_width) * group_y + group_x) * group_width * group_height * kNumPredModes;
+                offset += (local_y * group_width + local_x) * kNumPredModes + mode;
+#if SAVE_RESIDULE
+                fprintf(stderr, "(%dx%d):0x%x ", global_x, group_y, residual[offset]);
+#endif
+                UpdateHisto(histo_argb, residual[offset]);
+            }
+        }
+#if SAVE_RESIDULE
+        fprintf(stderr, "\n");
+#endif
+        // calculate best mode
+        cur_diff = PredictionCostSpatialHistogram((const int(*)[256])accumulated, histo_argb);
+        if (cur_diff < best_diff) {
+            int(*tmp)[256] = histo_argb;
+            histo_argb = best_histo;
+            best_histo = tmp;
+            best_diff = cur_diff;
+            best_mode = mode;
+        }
+    }
+
+    // update accumulate histo
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 256; j++) {
+            accumulated[i][j] += best_histo[i][j];
+        }
+    }
+
+    StopProfiling(&stop_watch, &timeGetBestPredictorForTile2, &countGetBestPredictorForTile2);
+
+    return best_mode;
+}
+
+static void CopyImageWithPrediction(int width,
+                                    int height,
+                                    int bits,
+                                    uint32_t* const modes,
+                                    uint32_t* const argb_scratch,
+                                    uint32_t* const argb,
+                                    int low_effort,
+                                    int exact) {
+    const int tiles_per_row = VP8LSubSampleSize(width, bits);
+    const int mask = (1 << bits) - 1;
+    // The row size is one pixel longer to allow the top right pixel to point to
+    // the leftmost pixel of the next row when at the right edge.
+    uint32_t* current_row = argb_scratch;
+    uint32_t* upper_row = argb_scratch + width + 1;
+    int y;
+    VP8LPredictorFunc pred_func = low_effort ? VP8LPredictors[kPredLowEffort] : NULL;
+
+    for (y = 0; y < height; ++y) {
+        int x;
+        uint32_t* tmp = upper_row;
+        upper_row = current_row;
+        current_row = tmp;
+        memcpy(current_row, argb + y * width, sizeof(*current_row) * width);
+        current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;
+
+        if (low_effort) {
+            for (x = 0; x < width; ++x) {
+                const uint32_t predict = Predict(pred_func, x, y, current_row, upper_row);
+                argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
+            }
+        } else {
+            for (x = 0; x < width; ++x) {
+                uint32_t predict, residual;
+                if ((x & mask) == 0) {
+                    const int mode = (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
+                    pred_func = VP8LPredictors[mode];
+                }
+                predict = Predict(pred_func, x, y, current_row, upper_row);
+                residual = VP8LSubPixels(current_row[x], predict);
+                if (!exact && (current_row[x] & kMaskAlpha) == 0) {
+                    // If alpha is 0, cleanup RGB. We can choose the RGB values of the
+                    // residual for best compression. The prediction of alpha itself can
+                    // be non-zero and must be kept though. We choose RGB of the residual
+                    // to be 0.
+                    residual &= kMaskAlpha;
+                    // Update input image so that next predictions use correct RGB value.
+                    current_row[x] = predict & ~kMaskAlpha;
+                    if (x == 0 && y != 0) upper_row[width] = current_row[x];
+                }
+                argb[y * width + x] = residual;
+            }
+        }
+    }
+}
+
+#if GET_RESIDULE_RECONT
+static WEBP_INLINE void VP8LResidualImageRec(int width,
+                                             int height,
+                                             int bits,
+                                             int low_effort,
+                                             uint32_t* const argb,
+                                             uint32_t* const argb_scratch,
+                                             uint32_t* const image,
+                                             int exact) {
+    int histo[4][256];
+    const int kNumPredModes = 14;
+    const int tiles_per_row = VP8LSubSampleSize(width, bits);
+    const int tiles_per_col = VP8LSubSampleSize(height, bits);
+    const int frame_size = width * height * sizeof(uint32_t);
+    const int group_width = GRP_X;
+    const int group_height = GRP_Y;
+    const int residual_size = ((width + group_width - 1) / group_width) * group_width *
+                              ((height + group_height - 1) / group_height) * group_height * kNumPredModes *
+                              sizeof(uint32_t);
+    int arg = 0;
+
+    size_t globalSize[] = {((width + group_width - 1) / group_width) * group_width,
+                           ((height + group_height - 1) / group_height) * group_height};
+    size_t localSize[] = {group_width, group_height};
+    size_t group_argb_size = (group_width + 2) * (group_height + 1) * sizeof(uint32_t);
+    size_t group_residule_size = group_width * group_height * kNumPredModes * sizeof(uint32_t);
+    uint32_t* residual = NULL;
+    int tile_y;
+    int group_x, group_y, local_x, local_y;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    if (low_effort) {
+        int i;
+        for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
+            image[i] = ARGB_BLACK | (kPredLowEffort << 8);
+        }
+        goto Ok;
+    }
+
+    residual = malloc(residual_size);
+    if (NULL == residual) {
+        fprintf(stderr, "%s %d malloc\n", __func__, __LINE__);
+        goto Error;
+    }
+
+    for (group_y = 0; group_y < globalSize[1] / group_height; group_y++) {
+        for (group_x = 0; group_x < globalSize[0] / group_width; group_x++) {
+            for (local_y = 0; local_y < group_height; local_y++) {
+                if (local_y + group_y * group_height > height) {
+                    break;
+                }
+                for (local_x = 0; local_x < group_width; local_x++) {
+                    if (local_x + group_x * group_width > width) {
+                        break;
+                    }
+
+                    // calculate res and save to local_residual
+                    int residule_offset =
+                        (group_y * globalSize[0] / group_width + group_x) * group_width * group_height * kNumPredModes +
+                        local_y * group_width * kNumPredModes + local_x * kNumPredModes; // for save res
+                    uint32_t* p_residule = residual + residule_offset;
+                    const int col = group_x * group_width + local_x;
+                    const int row = group_y * group_height + local_y;
+                    int mode;
+                    for (mode = 0; mode < kNumPredModes; ++mode) {
+                        const uint32_t* current_row = argb + width * row;
+                        const uint32_t* upper_row = (row == 0) ? NULL : (argb + width * (row - 1));
+                        const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
+                        const uint32_t predict = Predict(pred_func, col, row, current_row, upper_row);
+                        uint32_t residual = VP8LSubPixels(current_row[col], predict);
+                        if (!exact && (current_row[col] & kMaskAlpha) == 0) {
+                            residual &= kMaskAlpha; // See CopyTileWithPrediction.
+                        }
+                        p_residule[mode] = residual;
+                    }
+                }
+            }
+        }
+    }
+
+#if SAVE_RESIDULE
+    {
+        FILE* fp;
+        fp = fopen("residual.bin", "wb");
+        fwrite(residual, 1, residual_size, fp);
+        fclose(fp);
+    }
+#endif
+
+    memset(histo, 0, sizeof(histo));
+    for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+        int tile_x;
+        for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+            const int pred = GetBestPredictorForTile2(width, height, group_width, group_height, bits, tile_x, tile_y,
+                                                      residual, histo);
+            image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
+        }
+    }
+
+Ok:
+
+    CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb, low_effort, exact);
+Error:
+    if (NULL != residual) {
+        free(residual);
+    }
+    StopProfiling(&stop_watch, &timeVP8LResidualImage, &countVP8LResidualImage);
+    return;
+}
+#endif
+
+static WEBP_INLINE void VP8LResidualImageOcl(int width,
+                                             int height,
+                                             int bits,
+                                             int low_effort,
+                                             uint32_t* const argb,
+                                             uint32_t* const argb_scratch,
+                                             uint32_t* const image,
+                                             int exact) {
+    int histo[4][256];
+    const int kNumPredModes = 14;
+    const int tiles_per_row = VP8LSubSampleSize(width, bits);
+    const int tiles_per_col = VP8LSubSampleSize(height, bits);
+    const int frame_size = width * height * sizeof(uint32_t);
+    const int group_width = GRX_SIZE_RESIDUAL;
+    const int group_height = GRY_SIZE_RESIDUAL;
+    const int residual_size = residualpara.residual_size;
+
+    size_t globalSize[] = {((width + group_width - 1) / group_width) * group_width,
+                           ((height + group_height - 1) / group_height) * group_height};
+    size_t localSize[] = {group_width, group_height};
+    uint32_t* residual = NULL;
+    cl_int status;
+    int tile_y;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    if (low_effort) {
+        int i;
+        for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
+            image[i] = ARGB_BLACK | (kPredLowEffort << 8);
+        }
+        goto Ok;
+    }
+
+    residual = malloc(residual_size);
+    if (NULL == residual) {
+        fprintf(stderr, "%s %d malloc\n", __func__, __LINE__);
+        goto Error;
+    }
+
+    status = clEnqueueWriteBuffer(hardware.mQueue, residualpara.buffer_argb, CL_TRUE, (width + 1) * sizeof(uint32_t),
+                                  frame_size, argb, 0, NULL, NULL);
+    if (CL_SUCCESS != status) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        goto Error;
+    }
+
+    status = clSetKernelArg(residualimage.mKernel, 4, sizeof(int), &exact);
+    if (status != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        goto Error;
+    }
+
+    status = clEnqueueNDRangeKernel(hardware.mQueue, residualimage.mKernel, 2, 0, globalSize, localSize, 0, NULL, NULL);
+    if (status != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        goto Error;
+    }
+
+    status = clFinish(hardware.mQueue);
+    if (status != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        goto Error;
+    }
+
+    status = clEnqueueReadBuffer(hardware.mQueue, residualpara.buffer_residual, CL_TRUE, 0, residual_size, residual, 0,
+                                 NULL, NULL);
+    if (CL_SUCCESS != status) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(status));
+        goto Error;
+    }
+#if SAVE_RESIDULE
+    {
+        FILE* fp;
+        fp = fopen("residual.bin", "wb");
+        fwrite(residual, 1, residual_size, fp);
+        fclose(fp);
+    }
+#endif
+
+    memset(histo, 0, sizeof(histo));
+    for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+        int tile_x;
+        for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+            const int pred = GetBestPredictorForTile2(width, height, group_width, group_height, bits, tile_x, tile_y,
+                                                      residual, histo);
+            image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
+        }
+    }
+
+Ok:
+
+    CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb, low_effort, exact);
+Error:
+    // if(NULL != residual) {
+    //  free(residual);
+    //}
+    clReleaseMemObject(residualpara.buffer_argb);
+    clReleaseMemObject(residualpara.buffer_residual);
+    releaseKernel(residualimage);
+
+    StopProfiling(&stop_watch, &timeVP8LResidualImage, &countVP8LResidualImage);
+    return;
+}
+
+void VP8LResidualImage(int width,
+                       int height,
+                       int bits,
+                       int low_effort,
+                       uint32_t* const argb,
+                       uint32_t* const argb_scratch,
+                       uint32_t* const image,
+                       int exact,
+                       int use_ocl) {
+// if (use_ocl) {
+//   VP8LResidualImageOcl(width, height, bits, low_effort, argb, argb_scratch, image, exact);
+//   return;
+// }
+#if GET_RESIDULE_RECONT
+    VP8LResidualImageRec(width, height, bits, low_effort, argb, argb_scratch, image, exact);
+#else
+    const int max_tile_size = 1 << bits;
+    const int tiles_per_row = VP8LSubSampleSize(width, bits);
+    const int tiles_per_col = VP8LSubSampleSize(height, bits);
+    uint32_t* const upper_row = argb_scratch;
+    uint32_t* const current_tile_rows = argb_scratch + width;
+    int tile_y;
+    int histo[4][256];
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    if (low_effort) {
+        int i;
+        for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
+            image[i] = ARGB_BLACK | (kPredLowEffort << 8);
+        }
+    } else {
+        memset(histo, 0, sizeof(histo));
+        for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+            const int tile_y_offset = tile_y * max_tile_size;
+            const int this_tile_height = (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
+            int tile_x;
+            if (tile_y > 0) {
+                memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width, width * sizeof(*upper_row));
+            }
+            memcpy(current_tile_rows, &argb[tile_y_offset * width],
+                   this_tile_height * width * sizeof(*current_tile_rows));
+            for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+                const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, (int(*)[256])histo,
+                                                         argb_scratch, exact);
+                image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
+            }
+        }
+    }
+
+    CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb, low_effort, exact);
+    StopProfiling(&stop_watch, &timeVP8LResidualImage, &countVP8LResidualImage);
+#endif
+}
+
+void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
+    int i;
+    for (i = 0; i < num_pixels; ++i) {
+        const uint32_t argb = argb_data[i];
+        const uint32_t green = (argb >> 8) & 0xff;
+        const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
+        const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
+        argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
+    }
+}
+
+static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
+    m->green_to_red_ = 0;
+    m->green_to_blue_ = 0;
+    m->red_to_blue_ = 0;
+}
+
+static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, int8_t color) {
+    return (uint32_t)((int)(color_pred)*color) >> 5;
+}
+
+static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code, VP8LMultipliers* const m) {
+    m->green_to_red_ = (color_code >> 0) & 0xff;
+    m->green_to_blue_ = (color_code >> 8) & 0xff;
+    m->red_to_blue_ = (color_code >> 16) & 0xff;
+}
+
+static WEBP_INLINE uint32_t MultipliersToColorCode(const VP8LMultipliers* const m) {
+    return 0xff000000u | ((uint32_t)(m->red_to_blue_) << 16) | ((uint32_t)(m->green_to_blue_) << 8) | m->green_to_red_;
+}
+
+void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data, int num_pixels) {
+    int i;
+    for (i = 0; i < num_pixels; ++i) {
+        const uint32_t argb = data[i];
+        const uint32_t green = argb >> 8;
+        const uint32_t red = argb >> 16;
+        uint32_t new_red = red;
+        uint32_t new_blue = argb;
+        new_red -= ColorTransformDelta(m->green_to_red_, green);
+        new_red &= 0xff;
+        new_blue -= ColorTransformDelta(m->green_to_blue_, green);
+        new_blue -= ColorTransformDelta(m->red_to_blue_, red);
+        new_blue &= 0xff;
+        data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+    }
+}
+
+static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, uint32_t argb) {
+    const uint32_t green = argb >> 8;
+    uint32_t new_red = argb >> 16;
+    new_red -= ColorTransformDelta(green_to_red, green);
+    return (new_red & 0xff);
+}
+
+static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, uint8_t red_to_blue, uint32_t argb) {
+    const uint32_t green = argb >> 8;
+    const uint32_t red = argb >> 16;
+    uint8_t new_blue = argb;
+    new_blue -= ColorTransformDelta(green_to_blue, green);
+    new_blue -= ColorTransformDelta(red_to_blue, red);
+    return (new_blue & 0xff);
+}
+
+static float PredictionCostCrossColor(const int accumulated[256], const int counts[256]) {
+    // Favor low entropy, locally and globally.
+    // Favor small absolute values for PredictionCostSpatial
+    static const double kExpValue = 2.4;
+    return VP8LCombinedShannonEntropy(counts, accumulated) + PredictionCostSpatial(counts, 3, kExpValue);
+}
+
+void VP8LCollectColorRedTransforms_C(
+    const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_red, int histo[]) {
+    while (tile_height-- > 0) {
+        int x;
+        for (x = 0; x < tile_width; ++x) {
+            ++histo[TransformColorRed(green_to_red, argb[x])];
+        }
+        argb += stride;
+    }
+}
+
+static float GetPredictionCostCrossColorRed(const uint32_t* argb,
+                                            int stride,
+                                            int tile_width,
+                                            int tile_height,
+                                            VP8LMultipliers prev_x,
+                                            VP8LMultipliers prev_y,
+                                            int green_to_red,
+                                            const int accumulated_red_histo[256]) {
+    int histo[256] = {0};
+    float cur_diff;
+
+    VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height, green_to_red, histo);
+
+    cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
+    if ((uint8_t)green_to_red == prev_x.green_to_red_) {
+        cur_diff -= 3; // favor keeping the areas locally similar
+    }
+    if ((uint8_t)green_to_red == prev_y.green_to_red_) {
+        cur_diff -= 3; // favor keeping the areas locally similar
+    }
+    if (green_to_red == 0) {
+        cur_diff -= 3;
+    }
+    return cur_diff;
+}
+
+static void GetBestGreenToRed(const uint32_t* argb,
+                              int stride,
+                              int tile_width,
+                              int tile_height,
+                              VP8LMultipliers prev_x,
+                              VP8LMultipliers prev_y,
+                              int quality,
+                              const int accumulated_red_histo[256],
+                              VP8LMultipliers* const best_tx) {
+    const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
+    int green_to_red_best = 0;
+    int iter, offset;
+    float best_diff = GetPredictionCostCrossColorRed(argb, stride, tile_width, tile_height, prev_x, prev_y,
+                                                     green_to_red_best, accumulated_red_histo);
+    for (iter = 0; iter < kMaxIters; ++iter) {
+        // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
+        // one in color computation. Having initial delta here as 1 is sufficient
+        // to explore the range of (-2, 2).
+        const int delta = 32 >> iter;
+        // Try a negative and a positive delta from the best known value.
+        for (offset = -delta; offset <= delta; offset += 2 * delta) {
+            const int green_to_red_cur = offset + green_to_red_best;
+            const float cur_diff = GetPredictionCostCrossColorRed(argb, stride, tile_width, tile_height, prev_x, prev_y,
+                                                                  green_to_red_cur, accumulated_red_histo);
+            if (cur_diff < best_diff) {
+                best_diff = cur_diff;
+                green_to_red_best = green_to_red_cur;
+            }
+        }
+    }
+    best_tx->green_to_red_ = green_to_red_best;
+}
+
+void VP8LCollectColorBlueTransforms_C(const uint32_t* argb,
+                                      int stride,
+                                      int tile_width,
+                                      int tile_height,
+                                      int green_to_blue,
+                                      int red_to_blue,
+                                      int histo[]) {
+    while (tile_height-- > 0) {
+        int x;
+        for (x = 0; x < tile_width; ++x) {
+            ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[x])];
+        }
+        argb += stride;
+    }
+}
+
+static float GetPredictionCostCrossColorBlue(const uint32_t* argb,
+                                             int stride,
+                                             int tile_width,
+                                             int tile_height,
+                                             VP8LMultipliers prev_x,
+                                             VP8LMultipliers prev_y,
+                                             int green_to_blue,
+                                             int red_to_blue,
+                                             const int accumulated_blue_histo[256]) {
+    int histo[256] = {0};
+    float cur_diff;
+
+    VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height, green_to_blue, red_to_blue, histo);
+
+    cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
+    if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
+        cur_diff -= 3; // favor keeping the areas locally similar
+    }
+    if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
+        cur_diff -= 3; // favor keeping the areas locally similar
+    }
+    if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
+        cur_diff -= 3; // favor keeping the areas locally similar
+    }
+    if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
+        cur_diff -= 3; // favor keeping the areas locally similar
+    }
+    if (green_to_blue == 0) {
+        cur_diff -= 3;
+    }
+    if (red_to_blue == 0) {
+        cur_diff -= 3;
+    }
+    return cur_diff;
+}
+
+#define kGreenRedToBlueNumAxis 8
+#define kGreenRedToBlueMaxIters 7
+static void GetBestGreenRedToBlue(const uint32_t* argb,
+                                  int stride,
+                                  int tile_width,
+                                  int tile_height,
+                                  VP8LMultipliers prev_x,
+                                  VP8LMultipliers prev_y,
+                                  int quality,
+                                  const int accumulated_blue_histo[256],
+                                  VP8LMultipliers* const best_tx) {
+    const int8_t offset[kGreenRedToBlueNumAxis][2] = {{0, -1},  {0, 1},  {-1, 0}, {1, 0},
+                                                      {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};
+    const int8_t delta_lut[kGreenRedToBlueMaxIters] = {16, 16, 8, 4, 2, 2, 2};
+    const int iters = (quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4;
+    int green_to_blue_best = 0;
+    int red_to_blue_best = 0;
+    int iter;
+    // Initial value at origin:
+    float best_diff = GetPredictionCostCrossColorBlue(argb, stride, tile_width, tile_height, prev_x, prev_y,
+                                                      green_to_blue_best, red_to_blue_best, accumulated_blue_histo);
+    for (iter = 0; iter < iters; ++iter) {
+        const int delta = delta_lut[iter];
+        int axis;
+        for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) {
+            const int green_to_blue_cur = offset[axis][0] * delta + green_to_blue_best;
+            const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;
+            const float cur_diff =
+                GetPredictionCostCrossColorBlue(argb, stride, tile_width, tile_height, prev_x, prev_y,
+                                                green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);
+            if (cur_diff < best_diff) {
+                best_diff = cur_diff;
+                green_to_blue_best = green_to_blue_cur;
+                red_to_blue_best = red_to_blue_cur;
+            }
+            if (quality < 25 && iter == 4) {
+                // Only axis aligned diffs for lower quality.
+                break; // next iter.
+            }
+        }
+        if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) {
+            // Further iterations would not help.
+            break; // out of iter-loop.
+        }
+    }
+    best_tx->green_to_blue_ = green_to_blue_best;
+    best_tx->red_to_blue_ = red_to_blue_best;
+}
+#undef kGreenRedToBlueMaxIters
+#undef kGreenRedToBlueNumAxis
+
+static VP8LMultipliers GetBestColorTransformForTile(int tile_x,
+                                                    int tile_y,
+                                                    int bits,
+                                                    VP8LMultipliers prev_x,
+                                                    VP8LMultipliers prev_y,
+                                                    int quality,
+                                                    int xsize,
+                                                    int ysize,
+                                                    const int accumulated_red_histo[256],
+                                                    const int accumulated_blue_histo[256],
+                                                    const uint32_t* const argb) {
+    const int max_tile_size = 1 << bits;
+    const int tile_y_offset = tile_y * max_tile_size;
+    const int tile_x_offset = tile_x * max_tile_size;
+    const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
+    const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
+    const int tile_width = all_x_max - tile_x_offset;
+    const int tile_height = all_y_max - tile_y_offset;
+    const uint32_t* const tile_argb = argb + tile_y_offset * xsize + tile_x_offset;
+    VP8LMultipliers best_tx;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    MultipliersClear(&best_tx);
+
+    GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height, prev_x, prev_y, quality, accumulated_red_histo,
+                      &best_tx);
+    GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height, prev_x, prev_y, quality, accumulated_blue_histo,
+                          &best_tx);
+    StopProfiling(&stop_watch, &timeBestColor, &countBestColor);
+    return best_tx;
+}
+
+static void CopyTileWithColorTransform(
+    int xsize, int ysize, int tile_x, int tile_y, int max_tile_size, VP8LMultipliers color_transform, uint32_t* argb) {
+    const int xscan = GetMin(max_tile_size, xsize - tile_x);
+    int yscan = GetMin(max_tile_size, ysize - tile_y);
+    argb += tile_y * xsize + tile_x;
+    while (yscan-- > 0) {
+        VP8LTransformColor(&color_transform, argb, xscan);
+        argb += xsize;
+    }
+}
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int quality, uint32_t* const argb, uint32_t* image) {
+    const int max_tile_size = 1 << bits;
+    const int tile_xsize = VP8LSubSampleSize(width, bits);
+    const int tile_ysize = VP8LSubSampleSize(height, bits);
+    int accumulated_red_histo[256] = {0};
+    int accumulated_blue_histo[256] = {0};
+    int tile_x, tile_y;
+    VP8LMultipliers prev_x, prev_y;
+    MultipliersClear(&prev_y);
+    MultipliersClear(&prev_x);
+    for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+        for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+            int y;
+            const int tile_x_offset = tile_x * max_tile_size;
+            const int tile_y_offset = tile_y * max_tile_size;
+            const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
+            const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
+            const int offset = tile_y * tile_xsize + tile_x;
+            if (tile_y != 0) {
+                ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
+            }
+            prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits, prev_x, prev_y, quality, width, height,
+                                                  accumulated_red_histo, accumulated_blue_histo, argb);
+            image[offset] = MultipliersToColorCode(&prev_x);
+            CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset, max_tile_size, prev_x, argb);
+
+            // Gather accumulated histogram data.
+            for (y = tile_y_offset; y < all_y_max; ++y) {
+                int ix = y * width + tile_x_offset;
+                const int ix_end = ix + all_x_max - tile_x_offset;
+                for (; ix < ix_end; ++ix) {
+                    const uint32_t pix = argb[ix];
+                    if (ix >= 2 && pix == argb[ix - 2] && pix == argb[ix - 1]) {
+                        continue; // repeated pixels are handled by backward references
+                    }
+                    if (ix >= width + 2 && argb[ix - 2] == argb[ix - width - 2] &&
+                        argb[ix - 1] == argb[ix - width - 1] && pix == argb[ix - width]) {
+                        continue; // repeated pixels are handled by backward references
+                    }
+                    ++accumulated_red_histo[(pix >> 16) & 0xff];
+                    ++accumulated_blue_histo[(pix >> 0) & 0xff];
+                }
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
+void VP8LBundleColorMap(const uint8_t* const row, int width, int xbits, uint32_t* const dst) {
+    int x;
+    if (xbits > 0) {
+        const int bit_depth = 1 << (3 - xbits);
+        const int mask = (1 << xbits) - 1;
+        uint32_t code = 0xff000000;
+        for (x = 0; x < width; ++x) {
+            const int xsub = x & mask;
+            if (xsub == 0) {
+                code = 0xff000000;
+            }
+            code |= row[x] << (8 + bit_depth * xsub);
+            dst[x >> xbits] = code;
+        }
+    } else {
+        for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
+    }
+}
+
+//------------------------------------------------------------------------------
+
+static double ExtraCost(const uint32_t* population, int length) {
+    int i;
+    double cost = 0.;
+    for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
+    return cost;
+}
+
+static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y, int length) {
+    int i;
+    double cost = 0.;
+    for (i = 2; i < length - 2; ++i) {
+        const int xy = X[i + 2] + Y[i + 2];
+        cost += (i >> 1) * xy;
+    }
+    return cost;
+}
+
+//------------------------------------------------------------------------------
+
+static void HistogramAdd(const VP8LHistogram* const a, const VP8LHistogram* const b, VP8LHistogram* const out) {
+    int i;
+    const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
+    assert(a->palette_code_bits_ == b->palette_code_bits_);
+    if (b != out) {
+        for (i = 0; i < literal_size; ++i) {
+            out->literal_[i] = a->literal_[i] + b->literal_[i];
+        }
+        for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+            out->distance_[i] = a->distance_[i] + b->distance_[i];
+        }
+        for (i = 0; i < NUM_LITERAL_CODES; ++i) {
+            out->red_[i] = a->red_[i] + b->red_[i];
+            out->blue_[i] = a->blue_[i] + b->blue_[i];
+            out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
+        }
+    } else {
+        for (i = 0; i < literal_size; ++i) {
+            out->literal_[i] += a->literal_[i];
+        }
+        for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+            out->distance_[i] += a->distance_[i];
+        }
+        for (i = 0; i < NUM_LITERAL_CODES; ++i) {
+            out->red_[i] += a->red_[i];
+            out->blue_[i] += a->blue_[i];
+            out->alpha_[i] += a->alpha_[i];
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+
+VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+
+VP8LTransformColorFunc VP8LTransformColor;
+
+VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
+VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
+
+VP8LFastLog2SlowFunc VP8LFastLog2Slow;
+VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+
+VP8LCostFunc VP8LExtraCost;
+VP8LCostCombinedFunc VP8LExtraCostCombined;
+VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
+
+GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper;
+
+VP8LHistogramAddFunc VP8LHistogramAdd;
+
+extern void VP8LEncDspInitSSE2(void);
+extern void VP8LEncDspInitSSE41(void);
+extern void VP8LEncDspInitNEON(void);
+extern void VP8LEncDspInitMIPS32(void);
+extern void VP8LEncDspInitMIPSdspR2(void);
+
+static volatile VP8CPUInfo lossless_enc_last_cpuinfo_used = (VP8CPUInfo)&lossless_enc_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
+    if (lossless_enc_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    VP8LDspInit();
+
+    VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
+
+    VP8LTransformColor = VP8LTransformColor_C;
+
+    VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C;
+    VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C;
+
+    VP8LFastLog2Slow = FastLog2Slow;
+    VP8LFastSLog2Slow = FastSLog2Slow;
+
+    VP8LExtraCost = ExtraCost;
+    VP8LExtraCostCombined = ExtraCostCombined;
+    VP8LCombinedShannonEntropy = CombinedShannonEntropy;
+
+    VP8LGetEntropyUnrefinedHelper = GetEntropyUnrefinedHelper;
+
+    VP8LHistogramAdd = HistogramAdd;
+
+    // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            VP8LEncDspInitSSE2();
+#if defined(WEBP_USE_SSE41)
+            if (VP8GetCPUInfo(kSSE4_1)) {
+                VP8LEncDspInitSSE41();
+            }
+#endif
+        }
+#endif
+#if defined(WEBP_USE_NEON)
+        if (VP8GetCPUInfo(kNEON)) {
+            VP8LEncDspInitNEON();
+        }
+#endif
+#if defined(WEBP_USE_MIPS32)
+        if (VP8GetCPUInfo(kMIPS32)) {
+            VP8LEncDspInitMIPS32();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            VP8LEncDspInitMIPSdspR2();
+        }
+#endif
+    }
+    lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips32.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips32.c
new file mode 100644
index 0000000000..421685683c
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips32.c
@@ -0,0 +1,389 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of lossless functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+#include "./lossless.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+static float FastSLog2Slow(uint32_t v) {
+    assert(v >= LOG_LOOKUP_IDX_MAX);
+    if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+        uint32_t log_cnt, y, correction;
+        const int c24 = 24;
+        const float v_f = (float)v;
+        uint32_t temp;
+
+        // Xf = 256 = 2^8
+        // log_cnt is index of leading one in upper 24 bits
+        __asm__ volatile(
+            "clz      %[log_cnt], %[v]                      \n\t"
+            "addiu    %[y],       $zero,        1           \n\t"
+            "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t"
+            "sllv     %[y],       %[y],         %[log_cnt]  \n\t"
+            "srlv     %[temp],    %[v],         %[log_cnt]  \n\t"
+            : [log_cnt] "=&r"(log_cnt), [y] "=&r"(y), [temp] "=r"(temp)
+            : [c24] "r"(c24), [v] "r"(v));
+
+        // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
+        // Xf = floor(Xf) * (1 + (v % y) / v)
+        // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
+        // The correction factor: log(1 + d) ~ d; for very small d values, so
+        // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
+        // LOG_2_RECIPROCAL ~ 23/16
+
+        // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)
+        correction = (23 * (v & (y - 1))) >> 4;
+        return v_f * (kLog2Table[temp] + log_cnt) + correction;
+    } else {
+        return (float)(LOG_2_RECIPROCAL * v * log((double)v));
+    }
+}
+
+static float FastLog2Slow(uint32_t v) {
+    assert(v >= LOG_LOOKUP_IDX_MAX);
+    if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+        uint32_t log_cnt, y;
+        const int c24 = 24;
+        double log_2;
+        uint32_t temp;
+
+        __asm__ volatile(
+            "clz      %[log_cnt], %[v]                      \n\t"
+            "addiu    %[y],       $zero,        1           \n\t"
+            "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t"
+            "sllv     %[y],       %[y],         %[log_cnt]  \n\t"
+            "srlv     %[temp],    %[v],         %[log_cnt]  \n\t"
+            : [log_cnt] "=&r"(log_cnt), [y] "=&r"(y), [temp] "=r"(temp)
+            : [c24] "r"(c24), [v] "r"(v));
+
+        log_2 = kLog2Table[temp] + log_cnt;
+        if (v >= APPROX_LOG_MAX) {
+            // Since the division is still expensive, add this correction factor only
+            // for large values of 'v'.
+
+            const uint32_t correction = (23 * (v & (y - 1))) >> 4;
+            log_2 += (double)correction / v;
+        }
+        return (float)log_2;
+    } else {
+        return (float)(LOG_2_RECIPROCAL * log((double)v));
+    }
+}
+
+// C version of this function:
+//   int i = 0;
+//   int64_t cost = 0;
+//   const uint32_t* pop = &population[4];
+//   const uint32_t* LoopEnd = &population[length];
+//   while (pop != LoopEnd) {
+//     ++i;
+//     cost += i * *pop;
+//     cost += i * *(pop + 1);
+//     pop += 2;
+//   }
+//   return (double)cost;
+static double ExtraCost(const uint32_t* const population, int length) {
+    int i, temp0, temp1;
+    const uint32_t* pop = &population[4];
+    const uint32_t* const LoopEnd = &population[length];
+
+    __asm__ volatile(
+        "mult   $zero,    $zero                  \n\t"
+        "xor    %[i],     %[i],       %[i]       \n\t"
+        "beq    %[pop],   %[LoopEnd], 2f         \n\t"
+        "1:                                        \n\t"
+        "lw     %[temp0], 0(%[pop])              \n\t"
+        "lw     %[temp1], 4(%[pop])              \n\t"
+        "addiu  %[i],     %[i],       1          \n\t"
+        "addiu  %[pop],   %[pop],     8          \n\t"
+        "madd   %[i],     %[temp0]               \n\t"
+        "madd   %[i],     %[temp1]               \n\t"
+        "bne    %[pop],   %[LoopEnd], 1b         \n\t"
+        "2:                                        \n\t"
+        "mfhi   %[temp0]                         \n\t"
+        "mflo   %[temp1]                         \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [i] "=&r"(i), [pop] "+r"(pop)
+        : [LoopEnd] "r"(LoopEnd)
+        : "memory", "hi", "lo");
+
+    return (double)((int64_t)temp0 << 32 | temp1);
+}
+
+// C version of this function:
+//   int i = 0;
+//   int64_t cost = 0;
+//   const uint32_t* pX = &X[4];
+//   const uint32_t* pY = &Y[4];
+//   const uint32_t* LoopEnd = &X[length];
+//   while (pX != LoopEnd) {
+//     const uint32_t xy0 = *pX + *pY;
+//     const uint32_t xy1 = *(pX + 1) + *(pY + 1);
+//     ++i;
+//     cost += i * xy0;
+//     cost += i * xy1;
+//     pX += 2;
+//     pY += 2;
+//   }
+//   return (double)cost;
+static double ExtraCostCombined(const uint32_t* const X, const uint32_t* const Y, int length) {
+    int i, temp0, temp1, temp2, temp3;
+    const uint32_t* pX = &X[4];
+    const uint32_t* pY = &Y[4];
+    const uint32_t* const LoopEnd = &X[length];
+
+    __asm__ volatile(
+        "mult   $zero,    $zero                  \n\t"
+        "xor    %[i],     %[i],       %[i]       \n\t"
+        "beq    %[pX],    %[LoopEnd], 2f         \n\t"
+        "1:                                        \n\t"
+        "lw     %[temp0], 0(%[pX])               \n\t"
+        "lw     %[temp1], 0(%[pY])               \n\t"
+        "lw     %[temp2], 4(%[pX])               \n\t"
+        "lw     %[temp3], 4(%[pY])               \n\t"
+        "addiu  %[i],     %[i],       1          \n\t"
+        "addu   %[temp0], %[temp0],   %[temp1]   \n\t"
+        "addu   %[temp2], %[temp2],   %[temp3]   \n\t"
+        "addiu  %[pX],    %[pX],      8          \n\t"
+        "addiu  %[pY],    %[pY],      8          \n\t"
+        "madd   %[i],     %[temp0]               \n\t"
+        "madd   %[i],     %[temp2]               \n\t"
+        "bne    %[pX],    %[LoopEnd], 1b         \n\t"
+        "2:                                        \n\t"
+        "mfhi   %[temp0]                         \n\t"
+        "mflo   %[temp1]                         \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [i] "=&r"(i),
+          [pX] "+r"(pX), [pY] "+r"(pY)
+        : [LoopEnd] "r"(LoopEnd)
+        : "memory", "hi", "lo");
+
+    return (double)((int64_t)temp0 << 32 | temp1);
+}
+
+#define HUFFMAN_COST_PASS                                                                       \
+    __asm__ volatile(                                                                           \
+        "sll   %[temp1],  %[temp0],    3           \n\t"                                        \
+        "addiu %[temp3],  %[streak],   -3          \n\t"                                        \
+        "addu  %[temp2],  %[pstreaks], %[temp1]    \n\t"                                        \
+        "blez  %[temp3],  1f                       \n\t"                                        \
+        "srl   %[temp1],  %[temp1],    1           \n\t"                                        \
+        "addu  %[temp3],  %[pcnts],    %[temp1]    \n\t"                                        \
+        "lw    %[temp0],  4(%[temp2])              \n\t"                                        \
+        "lw    %[temp1],  0(%[temp3])              \n\t"                                        \
+        "addu  %[temp0],  %[temp0],    %[streak]   \n\t"                                        \
+        "addiu %[temp1],  %[temp1],    1           \n\t"                                        \
+        "sw    %[temp0],  4(%[temp2])              \n\t"                                        \
+        "sw    %[temp1],  0(%[temp3])              \n\t"                                        \
+        "b     2f                                  \n\t"                                        \
+        "1:                                          \n\t"                                      \
+        "lw    %[temp0],  0(%[temp2])              \n\t"                                        \
+        "addu  %[temp0],  %[temp0],    %[streak]   \n\t"                                        \
+        "sw    %[temp0],  0(%[temp2])              \n\t"                                        \
+        "2:                                          \n\t"                                      \
+        : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp0] "+r"(temp0) \
+        : [pstreaks] "r"(pstreaks), [pcnts] "r"(pcnts), [streak] "r"(streak)                    \
+        : "memory");
+
+// Returns the various RLE counts
+static WEBP_INLINE void GetEntropyUnrefinedHelper(uint32_t val,
+                                                  int i,
+                                                  uint32_t* const val_prev,
+                                                  int* const i_prev,
+                                                  VP8LBitEntropy* const bit_entropy,
+                                                  VP8LStreaks* const stats) {
+    int* const pstreaks = &stats->streaks[0][0];
+    int* const pcnts = &stats->counts[0];
+    int temp0, temp1, temp2, temp3;
+    const int streak = i - *i_prev;
+
+    // Gather info for the bit entropy.
+    if (*val_prev != 0) {
+        bit_entropy->sum += (*val_prev) * streak;
+        bit_entropy->nonzeros += streak;
+        bit_entropy->nonzero_code = *i_prev;
+        bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
+        if (bit_entropy->max_val < *val_prev) {
+            bit_entropy->max_val = *val_prev;
+        }
+    }
+
+    // Gather info for the Huffman cost.
+    temp0 = (*val_prev != 0);
+    HUFFMAN_COST_PASS
+
+    *val_prev = val;
+    *i_prev = i;
+}
+
+#define ASM_START                                 \
+  __asm__ volatile(                                     \
+    ".set   push                            \n\t"       \
+    ".set   at                              \n\t"       \
+    ".set   macro                           \n\t"       \
+  "1:                                       \n\t"
+
+// P2 = P0 + P1
+// A..D - offsets
+// E - temp variable to tell macro
+//     if pointer should be incremented
+// literal_ and successive histograms could be unaligned
+// so we must use ulw and usw
+#define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2)       \
+    "ulw    %[temp0], " #A "(%[" #P0                \
+    "])    \n\t"                                    \
+    "ulw    %[temp1], " #B "(%[" #P0                \
+    "])    \n\t"                                    \
+    "ulw    %[temp2], " #C "(%[" #P0                \
+    "])    \n\t"                                    \
+    "ulw    %[temp3], " #D "(%[" #P0                \
+    "])    \n\t"                                    \
+    "ulw    %[temp4], " #A "(%[" #P1                \
+    "])    \n\t"                                    \
+    "ulw    %[temp5], " #B "(%[" #P1                \
+    "])    \n\t"                                    \
+    "ulw    %[temp6], " #C "(%[" #P1                \
+    "])    \n\t"                                    \
+    "ulw    %[temp7], " #D "(%[" #P1                \
+    "])    \n\t"                                    \
+    "addu   %[temp4], %[temp4],   %[temp0]  \n\t"   \
+    "addu   %[temp5], %[temp5],   %[temp1]  \n\t"   \
+    "addu   %[temp6], %[temp6],   %[temp2]  \n\t"   \
+    "addu   %[temp7], %[temp7],   %[temp3]  \n\t"   \
+    "addiu  %[" #P0 "],  %[" #P0                    \
+    "],  16    \n\t"                                \
+    ".if " #E                                       \
+    " == 1                          \n\t"           \
+    "addiu  %[" #P1 "],  %[" #P1                    \
+    "],  16    \n\t"                                \
+    ".endif                                   \n\t" \
+    "usw    %[temp4], " #A "(%[" #P2                \
+    "])    \n\t"                                    \
+    "usw    %[temp5], " #B "(%[" #P2                \
+    "])    \n\t"                                    \
+    "usw    %[temp6], " #C "(%[" #P2                \
+    "])    \n\t"                                    \
+    "usw    %[temp7], " #D "(%[" #P2                \
+    "])    \n\t"                                    \
+    "addiu  %[" #P2 "], %[" #P2                     \
+    "],   16    \n\t"                               \
+    "bne    %[" #P0                                 \
+    "], %[LoopEnd], 1b      \n\t"                   \
+    ".set   pop                             \n\t"
+
+#define ASM_END_COMMON_0                        \
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),         \
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),         \
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),         \
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),         \
+      [pa]"+r"(pa), [pout]"+r"(pout)
+
+#define ASM_END_COMMON_1    \
+    : [LoopEnd]"r"(LoopEnd)                             \
+    : "memory", "at"                                    \
+  );
+
+#define ASM_END_0    \
+    ASM_END_COMMON_0 \
+    , [pb] "+r"(pb)ASM_END_COMMON_1
+
+#define ASM_END_1    \
+    ASM_END_COMMON_0 \
+    ASM_END_COMMON_1
+
+#define ADD_VECTOR(A, B, OUT, SIZE, EXTRA_SIZE)                 \
+    do {                                                        \
+        const uint32_t* pa = (const uint32_t*)(A);              \
+        const uint32_t* pb = (const uint32_t*)(B);              \
+        uint32_t* pout = (uint32_t*)(OUT);                      \
+        const uint32_t* const LoopEnd = pa + (SIZE);            \
+        assert((SIZE) % 4 == 0);                                \
+        ASM_START                                               \
+        ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)                \
+        ASM_END_0                                               \
+        if ((EXTRA_SIZE) > 0) {                                 \
+            const int last = (EXTRA_SIZE);                      \
+            int i;                                              \
+            for (i = 0; i < last; ++i) pout[i] = pa[i] + pb[i]; \
+        }                                                       \
+    } while (0)
+
+#define ADD_VECTOR_EQ(A, OUT, SIZE, EXTRA_SIZE)          \
+    do {                                                 \
+        const uint32_t* pa = (const uint32_t*)(A);       \
+        uint32_t* pout = (uint32_t*)(OUT);               \
+        const uint32_t* const LoopEnd = pa + (SIZE);     \
+        assert((SIZE) % 4 == 0);                         \
+        ASM_START                                        \
+        ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)       \
+        ASM_END_1                                        \
+        if ((EXTRA_SIZE) > 0) {                          \
+            const int last = (EXTRA_SIZE);               \
+            int i;                                       \
+            for (i = 0; i < last; ++i) pout[i] += pa[i]; \
+        }                                                \
+    } while (0)
+
+static void HistogramAdd(const VP8LHistogram* const a, const VP8LHistogram* const b, VP8LHistogram* const out) {
+    uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+    const int extra_cache_size = VP8LHistogramNumCodes(a->palette_code_bits_) - (NUM_LITERAL_CODES + NUM_LENGTH_CODES);
+    assert(a->palette_code_bits_ == b->palette_code_bits_);
+
+    if (b != out) {
+        ADD_VECTOR(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
+        ADD_VECTOR(a->distance_, b->distance_, out->distance_, NUM_DISTANCE_CODES, 0);
+        ADD_VECTOR(a->red_, b->red_, out->red_, NUM_LITERAL_CODES, 0);
+        ADD_VECTOR(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES, 0);
+        ADD_VECTOR(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
+    } else {
+        ADD_VECTOR_EQ(a->literal_, out->literal_, NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
+        ADD_VECTOR_EQ(a->distance_, out->distance_, NUM_DISTANCE_CODES, 0);
+        ADD_VECTOR_EQ(a->red_, out->red_, NUM_LITERAL_CODES, 0);
+        ADD_VECTOR_EQ(a->blue_, out->blue_, NUM_LITERAL_CODES, 0);
+        ADD_VECTOR_EQ(a->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
+    }
+}
+
+#undef ADD_VECTOR_EQ
+#undef ADD_VECTOR
+#undef ASM_END_1
+#undef ASM_END_0
+#undef ASM_END_COMMON_1
+#undef ASM_END_COMMON_0
+#undef ADD_TO_OUT
+#undef ASM_START
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
+    VP8LFastSLog2Slow = FastSLog2Slow;
+    VP8LFastLog2Slow = FastLog2Slow;
+    VP8LExtraCost = ExtraCost;
+    VP8LExtraCostCombined = ExtraCostCombined;
+    VP8LGetEntropyUnrefinedHelper = GetEntropyUnrefinedHelper;
+    VP8LHistogramAdd = HistogramAdd;
+}
+
+#else // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPS32)
+
+#endif // WEBP_USE_MIPS32
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips_dsp_r2.c
new file mode 100644
index 0000000000..84b00d6b71
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_mips_dsp_r2.c
@@ -0,0 +1,262 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transform methods for lossless encoder.
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./lossless.h"
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+    uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+    uint32_t* const p_loop1_end = argb_data + (num_pixels & ~3);
+    uint32_t* const p_loop2_end = p_loop1_end + (num_pixels & 3);
+    __asm__ volatile(
+        ".set       push                                          \n\t"
+        ".set       noreorder                                     \n\t"
+        "beq        %[argb_data],    %[p_loop1_end],     3f       \n\t"
+        " nop                                                     \n\t"
+        "0:                                                         \n\t"
+        "lw         %[temp0],        0(%[argb_data])              \n\t"
+        "lw         %[temp1],        4(%[argb_data])              \n\t"
+        "lw         %[temp2],        8(%[argb_data])              \n\t"
+        "lw         %[temp3],        12(%[argb_data])             \n\t"
+        "ext        %[temp4],        %[temp0],           8,    8  \n\t"
+        "ext        %[temp5],        %[temp1],           8,    8  \n\t"
+        "ext        %[temp6],        %[temp2],           8,    8  \n\t"
+        "ext        %[temp7],        %[temp3],           8,    8  \n\t"
+        "addiu      %[argb_data],    %[argb_data],       16       \n\t"
+        "replv.ph   %[temp4],        %[temp4]                     \n\t"
+        "replv.ph   %[temp5],        %[temp5]                     \n\t"
+        "replv.ph   %[temp6],        %[temp6]                     \n\t"
+        "replv.ph   %[temp7],        %[temp7]                     \n\t"
+        "subu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
+        "subu.qb    %[temp1],        %[temp1],           %[temp5] \n\t"
+        "subu.qb    %[temp2],        %[temp2],           %[temp6] \n\t"
+        "subu.qb    %[temp3],        %[temp3],           %[temp7] \n\t"
+        "sw         %[temp0],        -16(%[argb_data])            \n\t"
+        "sw         %[temp1],        -12(%[argb_data])            \n\t"
+        "sw         %[temp2],        -8(%[argb_data])             \n\t"
+        "bne        %[argb_data],    %[p_loop1_end],     0b       \n\t"
+        " sw        %[temp3],        -4(%[argb_data])             \n\t"
+        "3:                                                         \n\t"
+        "beq        %[argb_data],    %[p_loop2_end],     2f       \n\t"
+        " nop                                                     \n\t"
+        "1:                                                         \n\t"
+        "lw         %[temp0],        0(%[argb_data])              \n\t"
+        "addiu      %[argb_data],    %[argb_data],       4        \n\t"
+        "ext        %[temp4],        %[temp0],           8,    8  \n\t"
+        "replv.ph   %[temp4],        %[temp4]                     \n\t"
+        "subu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
+        "bne        %[argb_data],    %[p_loop2_end],     1b       \n\t"
+        " sw        %[temp0],        -4(%[argb_data])             \n\t"
+        "2:                                                         \n\t"
+        ".set       pop                                           \n\t"
+        : [argb_data] "+&r"(argb_data), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+          [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7)
+        : [p_loop1_end] "r"(p_loop1_end), [p_loop2_end] "r"(p_loop2_end)
+        : "memory");
+}
+
+static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, int8_t color) {
+    return (uint32_t)((int)(color_pred)*color) >> 5;
+}
+
+static void TransformColor(const VP8LMultipliers* const m, uint32_t* data, int num_pixels) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    uint32_t argb, argb1, new_red, new_red1;
+    const uint32_t G_to_R = m->green_to_red_;
+    const uint32_t G_to_B = m->green_to_blue_;
+    const uint32_t R_to_B = m->red_to_blue_;
+    uint32_t* const p_loop_end = data + (num_pixels & ~1);
+    __asm__ volatile(
+        ".set            push                                    \n\t"
+        ".set            noreorder                               \n\t"
+        "beq             %[data],      %[p_loop_end],  1f        \n\t"
+        " nop                                                    \n\t"
+        "replv.ph        %[temp0],     %[G_to_R]                 \n\t"
+        "replv.ph        %[temp1],     %[G_to_B]                 \n\t"
+        "replv.ph        %[temp2],     %[R_to_B]                 \n\t"
+        "shll.ph         %[temp0],     %[temp0],       8         \n\t"
+        "shll.ph         %[temp1],     %[temp1],       8         \n\t"
+        "shll.ph         %[temp2],     %[temp2],       8         \n\t"
+        "shra.ph         %[temp0],     %[temp0],       8         \n\t"
+        "shra.ph         %[temp1],     %[temp1],       8         \n\t"
+        "shra.ph         %[temp2],     %[temp2],       8         \n\t"
+        "0:                                                        \n\t"
+        "lw              %[argb],      0(%[data])                \n\t"
+        "lw              %[argb1],     4(%[data])                \n\t"
+        "lhu             %[new_red],   2(%[data])                \n\t"
+        "lhu             %[new_red1],  6(%[data])                \n\t"
+        "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t"
+        "precr.qb.ph     %[temp4],     %[argb],        %[argb1]  \n\t"
+        "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t"
+        "preceu.ph.qbla  %[temp4],     %[temp4]                  \n\t"
+        "shll.ph         %[temp3],     %[temp3],       8         \n\t"
+        "shll.ph         %[temp4],     %[temp4],       8         \n\t"
+        "shra.ph         %[temp3],     %[temp3],       8         \n\t"
+        "shra.ph         %[temp4],     %[temp4],       8         \n\t"
+        "mul.ph          %[temp5],     %[temp3],       %[temp0]  \n\t"
+        "mul.ph          %[temp3],     %[temp3],       %[temp1]  \n\t"
+        "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t"
+        "addiu           %[data],      %[data],        8         \n\t"
+        "ins             %[new_red1],  %[new_red],     16,   16  \n\t"
+        "ins             %[argb1],     %[argb],        16,   16  \n\t"
+        "shra.ph         %[temp5],     %[temp5],       5         \n\t"
+        "shra.ph         %[temp3],     %[temp3],       5         \n\t"
+        "shra.ph         %[temp4],     %[temp4],       5         \n\t"
+        "subu.ph         %[new_red1],  %[new_red1],    %[temp5]  \n\t"
+        "subu.ph         %[argb1],     %[argb1],       %[temp3]  \n\t"
+        "preceu.ph.qbra  %[temp5],     %[new_red1]               \n\t"
+        "subu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t"
+        "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t"
+        "sb              %[temp5],     -2(%[data])               \n\t"
+        "sb              %[temp3],     -4(%[data])               \n\t"
+        "sra             %[temp5],     %[temp5],       16        \n\t"
+        "sra             %[temp3],     %[temp3],       16        \n\t"
+        "sb              %[temp5],     -6(%[data])               \n\t"
+        "bne             %[data],      %[p_loop_end],  0b        \n\t"
+        " sb             %[temp3],     -8(%[data])               \n\t"
+        "1:                                                        \n\t"
+        ".set            pop                                     \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [new_red1] "=&r"(new_red1), [new_red] "=&r"(new_red), [argb] "=&r"(argb),
+          [argb1] "=&r"(argb1), [data] "+&r"(data)
+        : [G_to_R] "r"(G_to_R), [R_to_B] "r"(R_to_B), [G_to_B] "r"(G_to_B), [p_loop_end] "r"(p_loop_end)
+        : "memory", "hi", "lo");
+
+    if (num_pixels & 1) {
+        const uint32_t argb_ = data[0];
+        const uint32_t green = argb_ >> 8;
+        const uint32_t red = argb_ >> 16;
+        uint32_t new_blue = argb_;
+        new_red = red;
+        new_red -= ColorTransformDelta(m->green_to_red_, green);
+        new_red &= 0xff;
+        new_blue -= ColorTransformDelta(m->green_to_blue_, green);
+        new_blue -= ColorTransformDelta(m->red_to_blue_, red);
+        new_blue &= 0xff;
+        data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue);
+    }
+}
+
+static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, uint8_t red_to_blue, uint32_t argb) {
+    const uint32_t green = argb >> 8;
+    const uint32_t red = argb >> 16;
+    uint8_t new_blue = argb;
+    new_blue -= ColorTransformDelta(green_to_blue, green);
+    new_blue -= ColorTransformDelta(red_to_blue, red);
+    return (new_blue & 0xff);
+}
+
+static void CollectColorBlueTransforms(const uint32_t* argb,
+                                       int stride,
+                                       int tile_width,
+                                       int tile_height,
+                                       int green_to_blue,
+                                       int red_to_blue,
+                                       int histo[]) {
+    const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
+    const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
+    const uint32_t mask = 0xff00ffu;
+    while (tile_height-- > 0) {
+        int x;
+        const uint32_t* p_argb = argb;
+        argb += stride;
+        for (x = 0; x < (tile_width >> 1); ++x) {
+            int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+            __asm__ volatile(
+                "lw           %[temp0],  0(%[p_argb])             \n\t"
+                "lw           %[temp1],  4(%[p_argb])             \n\t"
+                "precr.qb.ph  %[temp2],  %[temp0],  %[temp1]      \n\t"
+                "ins          %[temp1],  %[temp0],  16,    16     \n\t"
+                "shra.ph      %[temp2],  %[temp2],  8             \n\t"
+                "shra.ph      %[temp3],  %[temp1],  8             \n\t"
+                "mul.ph       %[temp5],  %[temp2],  %[rtb]        \n\t"
+                "mul.ph       %[temp6],  %[temp3],  %[gtb]        \n\t"
+                "and          %[temp4],  %[temp1],  %[mask]       \n\t"
+                "addiu        %[p_argb], %[p_argb], 8             \n\t"
+                "shra.ph      %[temp5],  %[temp5],  5             \n\t"
+                "shra.ph      %[temp6],  %[temp6],  5             \n\t"
+                "subu.qb      %[temp2],  %[temp4],  %[temp5]      \n\t"
+                "subu.qb      %[temp2],  %[temp2],  %[temp6]      \n\t"
+                : [p_argb] "+&r"(p_argb), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+                  [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6)
+                : [rtb] "r"(rtb), [gtb] "r"(gtb), [mask] "r"(mask)
+                : "memory", "hi", "lo");
+            ++histo[(uint8_t)(temp2 >> 16)];
+            ++histo[(uint8_t)temp2];
+        }
+        if (tile_width & 1) {
+            ++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)];
+        }
+    }
+}
+
+static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, uint32_t argb) {
+    const uint32_t green = argb >> 8;
+    uint32_t new_red = argb >> 16;
+    new_red -= ColorTransformDelta(green_to_red, green);
+    return (new_red & 0xff);
+}
+
+static void CollectColorRedTransforms(
+    const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_red, int histo[]) {
+    const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
+    while (tile_height-- > 0) {
+        int x;
+        const uint32_t* p_argb = argb;
+        argb += stride;
+        for (x = 0; x < (tile_width >> 1); ++x) {
+            int temp0, temp1, temp2, temp3, temp4;
+            __asm__ volatile(
+                "lw           %[temp0],  0(%[p_argb])             \n\t"
+                "lw           %[temp1],  4(%[p_argb])             \n\t"
+                "precrq.ph.w  %[temp4],  %[temp0],  %[temp1]      \n\t"
+                "ins          %[temp1],  %[temp0],  16,    16     \n\t"
+                "shra.ph      %[temp3],  %[temp1],  8             \n\t"
+                "mul.ph       %[temp2],  %[temp3],  %[gtr]        \n\t"
+                "addiu        %[p_argb], %[p_argb], 8             \n\t"
+                "shra.ph      %[temp2],  %[temp2],  5             \n\t"
+                "subu.qb      %[temp2],  %[temp4],  %[temp2]      \n\t"
+                : [p_argb] "+&r"(p_argb), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
+                  [temp3] "=&r"(temp3), [temp4] "=&r"(temp4)
+                : [gtr] "r"(gtr)
+                : "memory", "hi", "lo");
+            ++histo[(uint8_t)(temp2 >> 16)];
+            ++histo[(uint8_t)temp2];
+        }
+        if (tile_width & 1) {
+            ++histo[TransformColorRed(green_to_red, *p_argb)];
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPSdspR2(void) {
+    VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+    VP8LTransformColor = TransformColor;
+    VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
+    VP8LCollectColorRedTransforms = CollectColorRedTransforms;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_neon.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_neon.c
new file mode 100644
index 0000000000..caba8de724
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_neon.c
@@ -0,0 +1,127 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON variant of methods for lossless encoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <arm_neon.h>
+
+#include "./lossless.h"
+#include "./neon.h"
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
+// non-standard versions there.
+#if defined(__APPLE__) && defined(__aarch64__) && defined(__apple_build_version__) && \
+    (__apple_build_version__ < 6020037)
+#define USE_VTBLQ
+#endif
+
+#ifdef USE_VTBLQ
+// 255 = byte will be zeroed
+static const uint8_t kGreenShuffle[16] = {1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255};
+
+static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb, const uint8x16_t shuffle) {
+    return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)), vtbl1q_u8(argb, vget_high_u8(shuffle)));
+}
+#else  // !USE_VTBLQ
+// 255 = byte will be zeroed
+static const uint8_t kGreenShuffle[8] = {1, 255, 1, 255, 5, 255, 5, 255};
+
+static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb, const uint8x8_t shuffle) {
+    return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle), vtbl1_u8(vget_high_u8(argb), shuffle));
+}
+#endif // USE_VTBLQ
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+    const uint32_t* const end = argb_data + (num_pixels & ~3);
+#ifdef USE_VTBLQ
+    const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
+#else
+    const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
+#endif
+    for (; argb_data < end; argb_data += 4) {
+        const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
+        const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
+        vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));
+    }
+    // fallthrough and finish off with plain-C
+    VP8LSubtractGreenFromBlueAndRed_C(argb_data, num_pixels & 3);
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static void TransformColor(const VP8LMultipliers* const m, uint32_t* argb_data, int num_pixels) {
+// sign-extended multiplying constants, pre-shifted by 6.
+#define CST(X) (((int16_t)(m->X << 8)) >> 6)
+    const int16_t rb[8] = {CST(green_to_blue_), CST(green_to_red_), CST(green_to_blue_), CST(green_to_red_),
+                           CST(green_to_blue_), CST(green_to_red_), CST(green_to_blue_), CST(green_to_red_)};
+    const int16x8_t mults_rb = vld1q_s16(rb);
+    const int16_t b2[8] = {
+        0, CST(red_to_blue_), 0, CST(red_to_blue_), 0, CST(red_to_blue_), 0, CST(red_to_blue_),
+    };
+    const int16x8_t mults_b2 = vld1q_s16(b2);
+#undef CST
+#ifdef USE_VTBLQ
+    static const uint8_t kg0g0[16] = {255, 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13};
+    const uint8x16_t shuffle = vld1q_u8(kg0g0);
+#else
+    static const uint8_t k0g0g[8] = {255, 1, 255, 1, 255, 5, 255, 5};
+    const uint8x8_t shuffle = vld1_u8(k0g0g);
+#endif
+    const uint32x4_t mask_rb = vdupq_n_u32(0x00ff00ffu); // red-blue masks
+    int i;
+    for (i = 0; i + 4 <= num_pixels; i += 4) {
+        const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i));
+        // 0 g 0 g
+        const uint8x16_t greens = DoGreenShuffle(in, shuffle);
+        // x dr  x db1
+        const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb);
+        // r 0   b   0
+        const int16x8_t B = vshlq_n_s16(vreinterpretq_s16_u8(in), 8);
+        // x db2 0   0
+        const int16x8_t C = vqdmulhq_s16(B, mults_b2);
+        // 0 0   x db2
+        const uint32x4_t D = vshrq_n_u32(vreinterpretq_u32_s16(C), 16);
+        // x dr  x  db
+        const int8x16_t E = vaddq_s8(vreinterpretq_s8_u32(D), vreinterpretq_s8_s16(A));
+        // 0 dr  0  db
+        const uint32x4_t F = vandq_u32(vreinterpretq_u32_s8(E), mask_rb);
+        const int8x16_t out = vsubq_s8(vreinterpretq_s8_u8(in), vreinterpretq_s8_u32(F));
+        vst1q_s8((int8_t*)(argb_data + i), out);
+    }
+    // fallthrough and finish off with plain-C
+    VP8LTransformColor_C(m, argb_data + i, num_pixels - i);
+}
+
+#undef USE_VTBLQ
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitNEON(void) {
+    VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+    VP8LTransformColor = TransformColor;
+}
+
+#else // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitNEON)
+
+#endif // WEBP_USE_NEON
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse2.c
new file mode 100644
index 0000000000..c8a09d18c5
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse2.c
@@ -0,0 +1,335 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 variant of methods for lossless encoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <assert.h>
+#include <emmintrin.h>
+#include "./lossless.h"
+
+// For sign-extended multiplying constants, pre-shifted by 5:
+#define CST_5b(X) (((int16_t)((uint16_t)X << 8)) >> 5)
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+    int i;
+    for (i = 0; i + 4 <= num_pixels; i += 4) {
+        const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+        const __m128i A = _mm_srli_epi16(in, 8);                     // 0 a 0 g
+        const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
+        const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0)); // 0g0g
+        const __m128i out = _mm_sub_epi8(in, C);
+        _mm_storeu_si128((__m128i*)&argb_data[i], out);
+    }
+    // fallthrough and finish off with plain-C
+    VP8LSubtractGreenFromBlueAndRed_C(argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static void TransformColor(const VP8LMultipliers* const m, uint32_t* argb_data, int num_pixels) {
+    const __m128i mults_rb = _mm_set_epi16(
+        CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_), CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
+        CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_), CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_));
+    const __m128i mults_b2 = _mm_set_epi16(CST_5b(m->red_to_blue_), 0, CST_5b(m->red_to_blue_), 0,
+                                           CST_5b(m->red_to_blue_), 0, CST_5b(m->red_to_blue_), 0);
+    const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
+    const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks
+    int i;
+    for (i = 0; i + 4 <= num_pixels; i += 4) {
+        const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+        const __m128i A = _mm_and_si128(in, mask_ag);                // a   0   g   0
+        const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
+        const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0)); // g0g0
+        const __m128i D = _mm_mulhi_epi16(C, mults_rb);                    // x dr  x db1
+        const __m128i E = _mm_slli_epi16(in, 8);                           // r 0   b   0
+        const __m128i F = _mm_mulhi_epi16(E, mults_b2);                    // x db2 0   0
+        const __m128i G = _mm_srli_epi32(F, 16);                           // 0 0   x db2
+        const __m128i H = _mm_add_epi8(G, D);                              // x dr  x  db
+        const __m128i I = _mm_and_si128(H, mask_rb);                       // 0 dr  0  db
+        const __m128i out = _mm_sub_epi8(in, I);
+        _mm_storeu_si128((__m128i*)&argb_data[i], out);
+    }
+    // fallthrough and finish off with plain-C
+    VP8LTransformColor_C(m, argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+#define SPAN 8
+static void CollectColorBlueTransforms(const uint32_t* argb,
+                                       int stride,
+                                       int tile_width,
+                                       int tile_height,
+                                       int green_to_blue,
+                                       int red_to_blue,
+                                       int histo[]) {
+    const __m128i mults_r =
+        _mm_set_epi16(CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0);
+    const __m128i mults_g = _mm_set_epi16(0, CST_5b(green_to_blue), 0, CST_5b(green_to_blue), 0, CST_5b(green_to_blue),
+                                          0, CST_5b(green_to_blue));
+    const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
+    const __m128i mask_b = _mm_set1_epi32(0x0000ff); // blue mask
+    int y;
+    for (y = 0; y < tile_height; ++y) {
+        const uint32_t* const src = argb + y * stride;
+        int i, x;
+        for (x = 0; x + SPAN <= tile_width; x += SPAN) {
+            uint16_t values[SPAN];
+            const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
+            const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
+            const __m128i A0 = _mm_slli_epi16(in0, 8); // r 0  | b 0
+            const __m128i A1 = _mm_slli_epi16(in1, 8);
+            const __m128i B0 = _mm_and_si128(in0, mask_g); // 0 0  | g 0
+            const __m128i B1 = _mm_and_si128(in1, mask_g);
+            const __m128i C0 = _mm_mulhi_epi16(A0, mults_r); // x db | 0 0
+            const __m128i C1 = _mm_mulhi_epi16(A1, mults_r);
+            const __m128i D0 = _mm_mulhi_epi16(B0, mults_g); // 0 0  | x db
+            const __m128i D1 = _mm_mulhi_epi16(B1, mults_g);
+            const __m128i E0 = _mm_sub_epi8(in0, D0); // x x  | x b'
+            const __m128i E1 = _mm_sub_epi8(in1, D1);
+            const __m128i F0 = _mm_srli_epi32(C0, 16); // 0 0  | x db
+            const __m128i F1 = _mm_srli_epi32(C1, 16);
+            const __m128i G0 = _mm_sub_epi8(E0, F0); // 0 0  | x b'
+            const __m128i G1 = _mm_sub_epi8(E1, F1);
+            const __m128i H0 = _mm_and_si128(G0, mask_b); // 0 0  | 0 b
+            const __m128i H1 = _mm_and_si128(G1, mask_b);
+            const __m128i I = _mm_packs_epi32(H0, H1); // 0 b' | 0 b'
+            _mm_storeu_si128((__m128i*)values, I);
+            for (i = 0; i < SPAN; ++i) ++histo[values[i]];
+        }
+    }
+    {
+        const int left_over = tile_width & (SPAN - 1);
+        if (left_over > 0) {
+            VP8LCollectColorBlueTransforms_C(argb + tile_width - left_over, stride, left_over, tile_height,
+                                             green_to_blue, red_to_blue, histo);
+        }
+    }
+}
+
+static void CollectColorRedTransforms(
+    const uint32_t* argb, int stride, int tile_width, int tile_height, int green_to_red, int histo[]) {
+    const __m128i mults_g = _mm_set_epi16(0, CST_5b(green_to_red), 0, CST_5b(green_to_red), 0, CST_5b(green_to_red), 0,
+                                          CST_5b(green_to_red));
+    const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
+    const __m128i mask = _mm_set1_epi32(0xff);
+
+    int y;
+    for (y = 0; y < tile_height; ++y) {
+        const uint32_t* const src = argb + y * stride;
+        int i, x;
+        for (x = 0; x + SPAN <= tile_width; x += SPAN) {
+            uint16_t values[SPAN];
+            const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
+            const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
+            const __m128i A0 = _mm_and_si128(in0, mask_g); // 0 0  | g 0
+            const __m128i A1 = _mm_and_si128(in1, mask_g);
+            const __m128i B0 = _mm_srli_epi32(in0, 16); // 0 0  | x r
+            const __m128i B1 = _mm_srli_epi32(in1, 16);
+            const __m128i C0 = _mm_mulhi_epi16(A0, mults_g); // 0 0  | x dr
+            const __m128i C1 = _mm_mulhi_epi16(A1, mults_g);
+            const __m128i E0 = _mm_sub_epi8(B0, C0); // x x  | x r'
+            const __m128i E1 = _mm_sub_epi8(B1, C1);
+            const __m128i F0 = _mm_and_si128(E0, mask); // 0 0  | 0 r'
+            const __m128i F1 = _mm_and_si128(E1, mask);
+            const __m128i I = _mm_packs_epi32(F0, F1);
+            _mm_storeu_si128((__m128i*)values, I);
+            for (i = 0; i < SPAN; ++i) ++histo[values[i]];
+        }
+    }
+    {
+        const int left_over = tile_width & (SPAN - 1);
+        if (left_over > 0) {
+            VP8LCollectColorRedTransforms_C(argb + tile_width - left_over, stride, left_over, tile_height, green_to_red,
+                                            histo);
+        }
+    }
+}
+#undef SPAN
+
+//------------------------------------------------------------------------------
+
+#define LINE_SIZE 16 // 8 or 16
+static void AddVector(const uint32_t* a, const uint32_t* b, uint32_t* out, int size) {
+    int i;
+    assert(size % LINE_SIZE == 0);
+    for (i = 0; i < size; i += LINE_SIZE) {
+        const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
+        const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
+#if (LINE_SIZE == 16)
+        const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
+        const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
+#endif
+        const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
+        const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
+#if (LINE_SIZE == 16)
+        const __m128i b2 = _mm_loadu_si128((const __m128i*)&b[i + 8]);
+        const __m128i b3 = _mm_loadu_si128((const __m128i*)&b[i + 12]);
+#endif
+        _mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
+        _mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
+#if (LINE_SIZE == 16)
+        _mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
+        _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
+#endif
+    }
+}
+
+static void AddVectorEq(const uint32_t* a, uint32_t* out, int size) {
+    int i;
+    assert(size % LINE_SIZE == 0);
+    for (i = 0; i < size; i += LINE_SIZE) {
+        const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
+        const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
+#if (LINE_SIZE == 16)
+        const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
+        const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
+#endif
+        const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
+        const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
+#if (LINE_SIZE == 16)
+        const __m128i b2 = _mm_loadu_si128((const __m128i*)&out[i + 8]);
+        const __m128i b3 = _mm_loadu_si128((const __m128i*)&out[i + 12]);
+#endif
+        _mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
+        _mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
+#if (LINE_SIZE == 16)
+        _mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
+        _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
+#endif
+    }
+}
+#undef LINE_SIZE
+
+// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
+// that's ok since the histogram values are less than 1<<28 (max picture size).
+static void HistogramAdd(const VP8LHistogram* const a, const VP8LHistogram* const b, VP8LHistogram* const out) {
+    int i;
+    const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
+    assert(a->palette_code_bits_ == b->palette_code_bits_);
+    if (b != out) {
+        AddVector(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES);
+        AddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
+        AddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
+        AddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
+    } else {
+        AddVectorEq(a->literal_, out->literal_, NUM_LITERAL_CODES);
+        AddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES);
+        AddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES);
+        AddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
+    }
+    for (i = NUM_LITERAL_CODES; i < literal_size; ++i) {
+        out->literal_[i] = a->literal_[i] + b->literal_[i];
+    }
+    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+        out->distance_[i] = a->distance_[i] + b->distance_[i];
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entropy
+
+// Checks whether the X or Y contribution is worth computing and adding.
+// Used in loop unrolling.
+#define ANALYZE_X_OR_Y(x_or_y, j)                                       \
+    do {                                                                \
+        if (x_or_y[i + j] != 0) retval -= VP8LFastSLog2(x_or_y[i + j]); \
+    } while (0)
+
+// Checks whether the X + Y contribution is worth computing and adding.
+// Used in loop unrolling.
+#define ANALYZE_XY(j)                        \
+    do {                                     \
+        if (tmp[j] != 0) {                   \
+            retval -= VP8LFastSLog2(tmp[j]); \
+            ANALYZE_X_OR_Y(X, j);            \
+        }                                    \
+    } while (0)
+
+static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
+    int i;
+    double retval = 0.;
+    int sumX, sumXY;
+    int32_t tmp[4];
+    __m128i zero = _mm_setzero_si128();
+    // Sums up X + Y, 4 ints at a time (and will merge it at the end for sumXY).
+    __m128i sumXY_128 = zero;
+    __m128i sumX_128 = zero;
+
+    for (i = 0; i < 256; i += 4) {
+        const __m128i x = _mm_loadu_si128((const __m128i*)(X + i));
+        const __m128i y = _mm_loadu_si128((const __m128i*)(Y + i));
+
+        // Check if any X is non-zero: this actually provides a speedup as X is
+        // usually sparse.
+        if (_mm_movemask_epi8(_mm_cmpeq_epi32(x, zero)) != 0xFFFF) {
+            const __m128i xy_128 = _mm_add_epi32(x, y);
+            sumXY_128 = _mm_add_epi32(sumXY_128, xy_128);
+
+            sumX_128 = _mm_add_epi32(sumX_128, x);
+
+            // Analyze the different X + Y.
+            _mm_storeu_si128((__m128i*)tmp, xy_128);
+
+            ANALYZE_XY(0);
+            ANALYZE_XY(1);
+            ANALYZE_XY(2);
+            ANALYZE_XY(3);
+        } else {
+            // X is fully 0, so only deal with Y.
+            sumXY_128 = _mm_add_epi32(sumXY_128, y);
+
+            ANALYZE_X_OR_Y(Y, 0);
+            ANALYZE_X_OR_Y(Y, 1);
+            ANALYZE_X_OR_Y(Y, 2);
+            ANALYZE_X_OR_Y(Y, 3);
+        }
+    }
+
+    // Sum up sumX_128 to get sumX.
+    _mm_storeu_si128((__m128i*)tmp, sumX_128);
+    sumX = tmp[3] + tmp[2] + tmp[1] + tmp[0];
+
+    // Sum up sumXY_128 to get sumXY.
+    _mm_storeu_si128((__m128i*)tmp, sumXY_128);
+    sumXY = tmp[3] + tmp[2] + tmp[1] + tmp[0];
+
+    retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
+    return (float)retval;
+}
+#undef ANALYZE_X_OR_Y
+#undef ANALYZE_XY
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
+    VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+    VP8LTransformColor = TransformColor;
+    VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
+    VP8LCollectColorRedTransforms = CollectColorRedTransforms;
+    VP8LHistogramAdd = HistogramAdd;
+    VP8LCombinedShannonEntropy = CombinedShannonEntropy;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse41.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse41.c
new file mode 100644
index 0000000000..2689a19603
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless_enc_sse41.c
@@ -0,0 +1,50 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE4.1 variant of methods for lossless encoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+#include <assert.h>
+#include <smmintrin.h>
+#include "./lossless.h"
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+    int i;
+    const __m128i kCstShuffle = _mm_set_epi8(-1, 13, -1, 13, -1, 9, -1, 9, -1, 5, -1, 5, -1, 1, -1, 1);
+    for (i = 0; i + 4 <= num_pixels; i += 4) {
+        const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
+        const __m128i in_0g0g = _mm_shuffle_epi8(in, kCstShuffle);
+        const __m128i out = _mm_sub_epi8(in, in_0g0g);
+        _mm_storeu_si128((__m128i*)&argb_data[i], out);
+    }
+    // fallthrough and finish off with plain-C
+    VP8LSubtractGreenFromBlueAndRed_C(argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
+    VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+}
+
+#else // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitSSE41)
+
+#endif // WEBP_USE_SSE41
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless_mips_dsp_r2.c
new file mode 100644
index 0000000000..78bce3dd9c
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless_mips_dsp_r2.c
@@ -0,0 +1,650 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./lossless.h"
+
+#define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)                                                     \
+    static void FUNC_NAME(const TYPE* src, const uint32_t* const color_map, TYPE* dst, int y_start, int y_end,     \
+                          int width) {                                                                             \
+        int y;                                                                                                     \
+        for (y = y_start; y < y_end; ++y) {                                                                        \
+            int x;                                                                                                 \
+            for (x = 0; x < (width >> 2); ++x) {                                                                   \
+                int tmp1, tmp2, tmp3, tmp4;                                                                        \
+                __asm__ volatile(".ifc        " #TYPE                                                              \
+                                 ",  uint8_t                  \n\t"                                                \
+                                 "lbu       %[tmp1],  0(%[src])                  \n\t"                             \
+                                 "lbu       %[tmp2],  1(%[src])                  \n\t"                             \
+                                 "lbu       %[tmp3],  2(%[src])                  \n\t"                             \
+                                 "lbu       %[tmp4],  3(%[src])                  \n\t"                             \
+                                 "addiu     %[src],   %[src],      4             \n\t"                             \
+                                 ".endif                                           \n\t"                           \
+                                 ".ifc        " #TYPE                                                              \
+                                 ",  uint32_t                 \n\t"                                                \
+                                 "lw        %[tmp1],  0(%[src])                  \n\t"                             \
+                                 "lw        %[tmp2],  4(%[src])                  \n\t"                             \
+                                 "lw        %[tmp3],  8(%[src])                  \n\t"                             \
+                                 "lw        %[tmp4],  12(%[src])                 \n\t"                             \
+                                 "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                             \
+                                 "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                             \
+                                 "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                             \
+                                 "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                             \
+                                 "addiu     %[src],   %[src],      16            \n\t"                             \
+                                 ".endif                                           \n\t"                           \
+                                 "sll       %[tmp1],  %[tmp1],     2             \n\t"                             \
+                                 "sll       %[tmp2],  %[tmp2],     2             \n\t"                             \
+                                 "sll       %[tmp3],  %[tmp3],     2             \n\t"                             \
+                                 "sll       %[tmp4],  %[tmp4],     2             \n\t"                             \
+                                 "lwx       %[tmp1],  %[tmp1](%[color_map])      \n\t"                             \
+                                 "lwx       %[tmp2],  %[tmp2](%[color_map])      \n\t"                             \
+                                 "lwx       %[tmp3],  %[tmp3](%[color_map])      \n\t"                             \
+                                 "lwx       %[tmp4],  %[tmp4](%[color_map])      \n\t"                             \
+                                 ".ifc        " #TYPE                                                              \
+                                 ",  uint8_t                  \n\t"                                                \
+                                 "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                             \
+                                 "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                             \
+                                 "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                             \
+                                 "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                             \
+                                 "sb        %[tmp1],  0(%[dst])                  \n\t"                             \
+                                 "sb        %[tmp2],  1(%[dst])                  \n\t"                             \
+                                 "sb        %[tmp3],  2(%[dst])                  \n\t"                             \
+                                 "sb        %[tmp4],  3(%[dst])                  \n\t"                             \
+                                 "addiu     %[dst],   %[dst],      4             \n\t"                             \
+                                 ".endif                                           \n\t"                           \
+                                 ".ifc        " #TYPE                                                              \
+                                 ",  uint32_t                 \n\t"                                                \
+                                 "sw        %[tmp1],  0(%[dst])                  \n\t"                             \
+                                 "sw        %[tmp2],  4(%[dst])                  \n\t"                             \
+                                 "sw        %[tmp3],  8(%[dst])                  \n\t"                             \
+                                 "sw        %[tmp4],  12(%[dst])                 \n\t"                             \
+                                 "addiu     %[dst],   %[dst],      16            \n\t"                             \
+                                 ".endif                                           \n\t"                           \
+                                 : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4), \
+                                   [src] "+&r"(src), [dst] "+r"(dst)                                               \
+                                 : [color_map] "r"(color_map)                                                      \
+                                 : "memory");                                                                      \
+            }                                                                                                      \
+            for (x = 0; x < (width & 3); ++x) {                                                                    \
+                *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                                                  \
+            }                                                                                                      \
+        }                                                                                                          \
+    }
+
+MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
+MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
+
+#undef MAP_COLOR_FUNCS
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, uint32_t c2) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    __asm__ volatile(
+        "preceu.ph.qbr   %[temp1],   %[c0]                 \n\t"
+        "preceu.ph.qbl   %[temp2],   %[c0]                 \n\t"
+        "preceu.ph.qbr   %[temp3],   %[c1]                 \n\t"
+        "preceu.ph.qbl   %[temp4],   %[c1]                 \n\t"
+        "preceu.ph.qbr   %[temp5],   %[c2]                 \n\t"
+        "preceu.ph.qbl   %[temp0],   %[c2]                 \n\t"
+        "subq.ph         %[temp3],   %[temp3],   %[temp5]  \n\t"
+        "subq.ph         %[temp4],   %[temp4],   %[temp0]  \n\t"
+        "addq.ph         %[temp1],   %[temp1],   %[temp3]  \n\t"
+        "addq.ph         %[temp2],   %[temp2],   %[temp4]  \n\t"
+        "shll_s.ph       %[temp1],   %[temp1],   7         \n\t"
+        "shll_s.ph       %[temp2],   %[temp2],   7         \n\t"
+        "precrqu_s.qb.ph %[temp2],   %[temp2],   %[temp1]  \n\t"
+        : [temp0] "=r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5)
+        : [c0] "r"(c0), [c1] "r"(c1), [c2] "r"(c2)
+        : "memory");
+    return temp2;
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, uint32_t c2) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    __asm__ volatile(
+        "adduh.qb         %[temp5],   %[c0],      %[c1]       \n\t"
+        "preceu.ph.qbr    %[temp3],   %[c2]                   \n\t"
+        "preceu.ph.qbr    %[temp1],   %[temp5]                \n\t"
+        "preceu.ph.qbl    %[temp2],   %[temp5]                \n\t"
+        "preceu.ph.qbl    %[temp4],   %[c2]                   \n\t"
+        "subq.ph          %[temp3],   %[temp1],   %[temp3]    \n\t"
+        "subq.ph          %[temp4],   %[temp2],   %[temp4]    \n\t"
+        "shrl.ph          %[temp5],   %[temp3],   15          \n\t"
+        "shrl.ph          %[temp0],   %[temp4],   15          \n\t"
+        "addq.ph          %[temp3],   %[temp3],   %[temp5]    \n\t"
+        "addq.ph          %[temp4],   %[temp0],   %[temp4]    \n\t"
+        "shra.ph          %[temp3],   %[temp3],   1           \n\t"
+        "shra.ph          %[temp4],   %[temp4],   1           \n\t"
+        "addq.ph          %[temp1],   %[temp1],   %[temp3]    \n\t"
+        "addq.ph          %[temp2],   %[temp2],   %[temp4]    \n\t"
+        "shll_s.ph        %[temp1],   %[temp1],   7           \n\t"
+        "shll_s.ph        %[temp2],   %[temp2],   7           \n\t"
+        "precrqu_s.qb.ph  %[temp1],   %[temp2],   %[temp1]    \n\t"
+        : [temp0] "=r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=r"(temp4),
+          [temp5] "=&r"(temp5)
+        : [c0] "r"(c0), [c1] "r"(c1), [c2] "r"(c2)
+        : "memory");
+    return temp1;
+}
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    __asm__ volatile(
+        "cmpgdu.lt.qb %[temp1], %[c],     %[b]             \n\t"
+        "pick.qb      %[temp1], %[b],     %[c]             \n\t"
+        "pick.qb      %[temp2], %[c],     %[b]             \n\t"
+        "cmpgdu.lt.qb %[temp4], %[c],     %[a]             \n\t"
+        "pick.qb      %[temp4], %[a],     %[c]             \n\t"
+        "pick.qb      %[temp5], %[c],     %[a]             \n\t"
+        "subu.qb      %[temp3], %[temp1], %[temp2]         \n\t"
+        "subu.qb      %[temp0], %[temp4], %[temp5]         \n\t"
+        "raddu.w.qb   %[temp3], %[temp3]                   \n\t"
+        "raddu.w.qb   %[temp0], %[temp0]                   \n\t"
+        "subu         %[temp3], %[temp3], %[temp0]         \n\t"
+        "slti         %[temp0], %[temp3], 0x1              \n\t"
+        "movz         %[a],     %[b],     %[temp0]         \n\t"
+        : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
+          [temp0] "=&r"(temp0), [a] "+&r"(a)
+        : [b] "r"(b), [c] "r"(c));
+    return a;
+}
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+    __asm__ volatile("adduh.qb    %[a0], %[a0], %[a1]       \n\t" : [a0] "+r"(a0) : [a1] "r"(a1));
+    return a0;
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+    return Average2(Average2(a0, a2), a1);
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, uint32_t a2, uint32_t a3) {
+    return Average2(Average2(a0, a1), Average2(a2, a3));
+}
+
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+    return Average3(left, top[0], top[1]);
+}
+
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+    return Average2(left, top[-1]);
+}
+
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+    return Average2(left, top[0]);
+}
+
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+    (void)left;
+    return Average2(top[-1], top[0]);
+}
+
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+    (void)left;
+    return Average2(top[0], top[1]);
+}
+
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+    return Average4(left, top[-1], top[0], top[1]);
+}
+
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+    return Select(top[0], left, top[-1]);
+}
+
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+    return ClampedAddSubtractFull(left, top[0], top[-1]);
+}
+
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+    return ClampedAddSubtractHalf(left, top[0], top[-1]);
+}
+
+// Add green to blue and red channels (i.e. perform the inverse transform of
+// 'subtract green').
+static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
+    uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+    uint32_t* const p_loop1_end = data + (num_pixels & ~3);
+    uint32_t* const p_loop2_end = data + num_pixels;
+    __asm__ volatile(
+        ".set       push                                          \n\t"
+        ".set       noreorder                                     \n\t"
+        "beq        %[data],         %[p_loop1_end],     3f       \n\t"
+        " nop                                                     \n\t"
+        "0:                                                         \n\t"
+        "lw         %[temp0],        0(%[data])                   \n\t"
+        "lw         %[temp1],        4(%[data])                   \n\t"
+        "lw         %[temp2],        8(%[data])                   \n\t"
+        "lw         %[temp3],        12(%[data])                  \n\t"
+        "ext        %[temp4],        %[temp0],           8,    8  \n\t"
+        "ext        %[temp5],        %[temp1],           8,    8  \n\t"
+        "ext        %[temp6],        %[temp2],           8,    8  \n\t"
+        "ext        %[temp7],        %[temp3],           8,    8  \n\t"
+        "addiu      %[data],         %[data],            16       \n\t"
+        "replv.ph   %[temp4],        %[temp4]                     \n\t"
+        "replv.ph   %[temp5],        %[temp5]                     \n\t"
+        "replv.ph   %[temp6],        %[temp6]                     \n\t"
+        "replv.ph   %[temp7],        %[temp7]                     \n\t"
+        "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
+        "addu.qb    %[temp1],        %[temp1],           %[temp5] \n\t"
+        "addu.qb    %[temp2],        %[temp2],           %[temp6] \n\t"
+        "addu.qb    %[temp3],        %[temp3],           %[temp7] \n\t"
+        "sw         %[temp0],        -16(%[data])                 \n\t"
+        "sw         %[temp1],        -12(%[data])                 \n\t"
+        "sw         %[temp2],        -8(%[data])                  \n\t"
+        "bne        %[data],         %[p_loop1_end],     0b       \n\t"
+        " sw        %[temp3],        -4(%[data])                  \n\t"
+        "3:                                                         \n\t"
+        "beq        %[data],         %[p_loop2_end],     2f       \n\t"
+        " nop                                                     \n\t"
+        "1:                                                         \n\t"
+        "lw         %[temp0],        0(%[data])                   \n\t"
+        "addiu      %[data],         %[data],            4        \n\t"
+        "ext        %[temp4],        %[temp0],           8,    8  \n\t"
+        "replv.ph   %[temp4],        %[temp4]                     \n\t"
+        "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
+        "bne        %[data],         %[p_loop2_end],     1b       \n\t"
+        " sw        %[temp0],        -4(%[data])                  \n\t"
+        "2:                                                         \n\t"
+        ".set       pop                                           \n\t"
+        : [data] "+&r"(data), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+          [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), [temp7] "=&r"(temp7)
+        : [p_loop1_end] "r"(p_loop1_end), [p_loop2_end] "r"(p_loop2_end)
+        : "memory");
+}
+
+static void TransformColorInverse(const VP8LMultipliers* const m, uint32_t* data, int num_pixels) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    uint32_t argb, argb1, new_red;
+    const uint32_t G_to_R = m->green_to_red_;
+    const uint32_t G_to_B = m->green_to_blue_;
+    const uint32_t R_to_B = m->red_to_blue_;
+    uint32_t* const p_loop_end = data + (num_pixels & ~1);
+    __asm__ volatile(
+        ".set            push                                    \n\t"
+        ".set            noreorder                               \n\t"
+        "beq             %[data],      %[p_loop_end],  1f        \n\t"
+        " nop                                                    \n\t"
+        "replv.ph        %[temp0],     %[G_to_R]                 \n\t"
+        "replv.ph        %[temp1],     %[G_to_B]                 \n\t"
+        "replv.ph        %[temp2],     %[R_to_B]                 \n\t"
+        "shll.ph         %[temp0],     %[temp0],       8         \n\t"
+        "shll.ph         %[temp1],     %[temp1],       8         \n\t"
+        "shll.ph         %[temp2],     %[temp2],       8         \n\t"
+        "shra.ph         %[temp0],     %[temp0],       8         \n\t"
+        "shra.ph         %[temp1],     %[temp1],       8         \n\t"
+        "shra.ph         %[temp2],     %[temp2],       8         \n\t"
+        "0:                                                        \n\t"
+        "lw              %[argb],      0(%[data])                \n\t"
+        "lw              %[argb1],     4(%[data])                \n\t"
+        "addiu           %[data],      %[data],        8         \n\t"
+        "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t"
+        "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t"
+        "shll.ph         %[temp3],     %[temp3],       8         \n\t"
+        "shra.ph         %[temp3],     %[temp3],       8         \n\t"
+        "mul.ph          %[temp5],     %[temp3],       %[temp0]  \n\t"
+        "mul.ph          %[temp3],     %[temp3],       %[temp1]  \n\t"
+        "precrq.ph.w     %[new_red],   %[argb],        %[argb1]  \n\t"
+        "ins             %[argb1],     %[argb],        16,   16  \n\t"
+        "shra.ph         %[temp5],     %[temp5],       5         \n\t"
+        "shra.ph         %[temp3],     %[temp3],       5         \n\t"
+        "addu.ph         %[new_red],   %[new_red],     %[temp5]  \n\t"
+        "addu.ph         %[argb1],     %[argb1],       %[temp3]  \n\t"
+        "preceu.ph.qbra  %[temp5],     %[new_red]                \n\t"
+        "shll.ph         %[temp4],     %[temp5],       8         \n\t"
+        "shra.ph         %[temp4],     %[temp4],       8         \n\t"
+        "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t"
+        "sb              %[temp5],     -2(%[data])               \n\t"
+        "sra             %[temp5],     %[temp5],       16        \n\t"
+        "shra.ph         %[temp4],     %[temp4],       5         \n\t"
+        "addu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t"
+        "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t"
+        "sb              %[temp5],     -6(%[data])               \n\t"
+        "sb              %[temp3],     -4(%[data])               \n\t"
+        "sra             %[temp3],     %[temp3],       16        \n\t"
+        "bne             %[data],      %[p_loop_end],  0b        \n\t"
+        " sb             %[temp3],     -8(%[data])               \n\t"
+        "1:                                                        \n\t"
+        ".set            pop                                     \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [new_red] "=&r"(new_red), [argb] "=&r"(argb), [argb1] "=&r"(argb1), [data] "+&r"(data)
+        : [G_to_R] "r"(G_to_R), [R_to_B] "r"(R_to_B), [G_to_B] "r"(G_to_B), [p_loop_end] "r"(p_loop_end)
+        : "memory", "hi", "lo");
+
+    // Fall-back to C-version for left-overs.
+    if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);
+}
+
+static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    int temp0, temp1, temp2, temp3;
+    const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+    const uint32_t* const p_loop2_end = src + num_pixels;
+    __asm__ volatile(
+        ".set       push                                       \n\t"
+        ".set       noreorder                                  \n\t"
+        "beq        %[src],      %[p_loop1_end],    3f         \n\t"
+        " nop                                                  \n\t"
+        "0:                                                      \n\t"
+        "lw         %[temp3],    12(%[src])                    \n\t"
+        "lw         %[temp2],    8(%[src])                     \n\t"
+        "lw         %[temp1],    4(%[src])                     \n\t"
+        "lw         %[temp0],    0(%[src])                     \n\t"
+        "ins        %[temp3],    %[temp2],          24,   8    \n\t"
+        "sll        %[temp2],    %[temp2],          8          \n\t"
+        "rotr       %[temp3],    %[temp3],          16         \n\t"
+        "ins        %[temp2],    %[temp1],          0,    16   \n\t"
+        "sll        %[temp1],    %[temp1],          8          \n\t"
+        "wsbh       %[temp3],    %[temp3]                      \n\t"
+        "balign     %[temp0],    %[temp1],          1          \n\t"
+        "wsbh       %[temp2],    %[temp2]                      \n\t"
+        "wsbh       %[temp0],    %[temp0]                      \n\t"
+        "usw        %[temp3],    8(%[dst])                     \n\t"
+        "rotr       %[temp0],    %[temp0],          16         \n\t"
+        "usw        %[temp2],    4(%[dst])                     \n\t"
+        "addiu      %[src],      %[src],            16         \n\t"
+        "usw        %[temp0],    0(%[dst])                     \n\t"
+        "bne        %[src],      %[p_loop1_end],    0b         \n\t"
+        " addiu     %[dst],      %[dst],            12         \n\t"
+        "3:                                                      \n\t"
+        "beq        %[src],      %[p_loop2_end],    2f         \n\t"
+        " nop                                                  \n\t"
+        "1:                                                      \n\t"
+        "lw         %[temp0],    0(%[src])                     \n\t"
+        "addiu      %[src],      %[src],            4          \n\t"
+        "wsbh       %[temp1],    %[temp0]                      \n\t"
+        "addiu      %[dst],      %[dst],            3          \n\t"
+        "ush        %[temp1],    -2(%[dst])                    \n\t"
+        "sra        %[temp0],    %[temp0],          16         \n\t"
+        "bne        %[src],      %[p_loop2_end],    1b         \n\t"
+        " sb        %[temp0],    -3(%[dst])                    \n\t"
+        "2:                                                      \n\t"
+        ".set       pop                                        \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dst] "+&r"(dst),
+          [src] "+&r"(src)
+        : [p_loop1_end] "r"(p_loop1_end), [p_loop2_end] "r"(p_loop2_end)
+        : "memory");
+}
+
+static void ConvertBGRAToRGBA(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    int temp0, temp1, temp2, temp3;
+    const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+    const uint32_t* const p_loop2_end = src + num_pixels;
+    __asm__ volatile(
+        ".set       push                                       \n\t"
+        ".set       noreorder                                  \n\t"
+        "beq        %[src],      %[p_loop1_end],    3f         \n\t"
+        " nop                                                  \n\t"
+        "0:                                                      \n\t"
+        "lw         %[temp0],    0(%[src])                     \n\t"
+        "lw         %[temp1],    4(%[src])                     \n\t"
+        "lw         %[temp2],    8(%[src])                     \n\t"
+        "lw         %[temp3],    12(%[src])                    \n\t"
+        "wsbh       %[temp0],    %[temp0]                      \n\t"
+        "wsbh       %[temp1],    %[temp1]                      \n\t"
+        "wsbh       %[temp2],    %[temp2]                      \n\t"
+        "wsbh       %[temp3],    %[temp3]                      \n\t"
+        "addiu      %[src],      %[src],            16         \n\t"
+        "balign     %[temp0],    %[temp0],          1          \n\t"
+        "balign     %[temp1],    %[temp1],          1          \n\t"
+        "balign     %[temp2],    %[temp2],          1          \n\t"
+        "balign     %[temp3],    %[temp3],          1          \n\t"
+        "usw        %[temp0],    0(%[dst])                     \n\t"
+        "usw        %[temp1],    4(%[dst])                     \n\t"
+        "usw        %[temp2],    8(%[dst])                     \n\t"
+        "usw        %[temp3],    12(%[dst])                    \n\t"
+        "bne        %[src],      %[p_loop1_end],    0b         \n\t"
+        " addiu     %[dst],      %[dst],            16         \n\t"
+        "3:                                                      \n\t"
+        "beq        %[src],      %[p_loop2_end],    2f         \n\t"
+        " nop                                                  \n\t"
+        "1:                                                      \n\t"
+        "lw         %[temp0],    0(%[src])                     \n\t"
+        "wsbh       %[temp0],    %[temp0]                      \n\t"
+        "addiu      %[src],      %[src],            4          \n\t"
+        "balign     %[temp0],    %[temp0],          1          \n\t"
+        "usw        %[temp0],    0(%[dst])                     \n\t"
+        "bne        %[src],      %[p_loop2_end],    1b         \n\t"
+        " addiu     %[dst],      %[dst],            4          \n\t"
+        "2:                                                      \n\t"
+        ".set       pop                                        \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dst] "+&r"(dst),
+          [src] "+&r"(src)
+        : [p_loop1_end] "r"(p_loop1_end), [p_loop2_end] "r"(p_loop2_end)
+        : "memory");
+}
+
+static void ConvertBGRAToRGBA4444(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+    const uint32_t* const p_loop2_end = src + num_pixels;
+    __asm__ volatile(
+        ".set           push                                       \n\t"
+        ".set           noreorder                                  \n\t"
+        "beq            %[src],      %[p_loop1_end],    3f         \n\t"
+        " nop                                                      \n\t"
+        "0:                                                          \n\t"
+        "lw             %[temp0],    0(%[src])                     \n\t"
+        "lw             %[temp1],    4(%[src])                     \n\t"
+        "lw             %[temp2],    8(%[src])                     \n\t"
+        "lw             %[temp3],    12(%[src])                    \n\t"
+        "ext            %[temp4],    %[temp0],          28,   4    \n\t"
+        "ext            %[temp5],    %[temp0],          12,   4    \n\t"
+        "ins            %[temp0],    %[temp4],          0,    4    \n\t"
+        "ext            %[temp4],    %[temp1],          28,   4    \n\t"
+        "ins            %[temp0],    %[temp5],          16,   4    \n\t"
+        "ext            %[temp5],    %[temp1],          12,   4    \n\t"
+        "ins            %[temp1],    %[temp4],          0,    4    \n\t"
+        "ext            %[temp4],    %[temp2],          28,   4    \n\t"
+        "ins            %[temp1],    %[temp5],          16,   4    \n\t"
+        "ext            %[temp5],    %[temp2],          12,   4    \n\t"
+        "ins            %[temp2],    %[temp4],          0,    4    \n\t"
+        "ext            %[temp4],    %[temp3],          28,   4    \n\t"
+        "ins            %[temp2],    %[temp5],          16,   4    \n\t"
+        "ext            %[temp5],    %[temp3],          12,   4    \n\t"
+        "ins            %[temp3],    %[temp4],          0,    4    \n\t"
+        "precr.qb.ph    %[temp1],    %[temp1],          %[temp0]   \n\t"
+        "ins            %[temp3],    %[temp5],          16,   4    \n\t"
+        "addiu          %[src],      %[src],            16         \n\t"
+        "precr.qb.ph    %[temp3],    %[temp3],          %[temp2]   \n\t"
+#ifdef WEBP_SWAP_16BIT_CSP
+        "usw            %[temp1],    0(%[dst])                     \n\t"
+        "usw            %[temp3],    4(%[dst])                     \n\t"
+#else
+        "wsbh           %[temp1],    %[temp1]                      \n\t"
+        "wsbh           %[temp3],    %[temp3]                      \n\t"
+        "usw            %[temp1],    0(%[dst])                     \n\t"
+        "usw            %[temp3],    4(%[dst])                     \n\t"
+#endif
+        "bne            %[src],      %[p_loop1_end],    0b         \n\t"
+        " addiu         %[dst],      %[dst],            8          \n\t"
+        "3:                                                          \n\t"
+        "beq            %[src],      %[p_loop2_end],    2f         \n\t"
+        " nop                                                      \n\t"
+        "1:                                                          \n\t"
+        "lw             %[temp0],    0(%[src])                     \n\t"
+        "ext            %[temp4],    %[temp0],          28,   4    \n\t"
+        "ext            %[temp5],    %[temp0],          12,   4    \n\t"
+        "ins            %[temp0],    %[temp4],          0,    4    \n\t"
+        "ins            %[temp0],    %[temp5],          16,   4    \n\t"
+        "addiu          %[src],      %[src],            4          \n\t"
+        "precr.qb.ph    %[temp0],    %[temp0],          %[temp0]   \n\t"
+#ifdef WEBP_SWAP_16BIT_CSP
+        "ush            %[temp0],    0(%[dst])                     \n\t"
+#else
+        "wsbh           %[temp0],    %[temp0]                      \n\t"
+        "ush            %[temp0],    0(%[dst])                     \n\t"
+#endif
+        "bne            %[src],      %[p_loop2_end],    1b         \n\t"
+        " addiu         %[dst],      %[dst],            2          \n\t"
+        "2:                                                          \n\t"
+        ".set           pop                                        \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [dst] "+&r"(dst), [src] "+&r"(src)
+        : [p_loop1_end] "r"(p_loop1_end), [p_loop2_end] "r"(p_loop2_end)
+        : "memory");
+}
+
+static void ConvertBGRAToRGB565(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+    const uint32_t* const p_loop2_end = src + num_pixels;
+    __asm__ volatile(
+        ".set           push                                       \n\t"
+        ".set           noreorder                                  \n\t"
+        "beq            %[src],      %[p_loop1_end],    3f         \n\t"
+        " nop                                                      \n\t"
+        "0:                                                          \n\t"
+        "lw             %[temp0],    0(%[src])                     \n\t"
+        "lw             %[temp1],    4(%[src])                     \n\t"
+        "lw             %[temp2],    8(%[src])                     \n\t"
+        "lw             %[temp3],    12(%[src])                    \n\t"
+        "ext            %[temp4],    %[temp0],          8,    16   \n\t"
+        "ext            %[temp5],    %[temp0],          5,    11   \n\t"
+        "ext            %[temp0],    %[temp0],          3,    5    \n\t"
+        "ins            %[temp4],    %[temp5],          0,    11   \n\t"
+        "ext            %[temp5],    %[temp1],          5,    11   \n\t"
+        "ins            %[temp4],    %[temp0],          0,    5    \n\t"
+        "ext            %[temp0],    %[temp1],          8,    16   \n\t"
+        "ext            %[temp1],    %[temp1],          3,    5    \n\t"
+        "ins            %[temp0],    %[temp5],          0,    11   \n\t"
+        "ext            %[temp5],    %[temp2],          5,    11   \n\t"
+        "ins            %[temp0],    %[temp1],          0,    5    \n\t"
+        "ext            %[temp1],    %[temp2],          8,    16   \n\t"
+        "ext            %[temp2],    %[temp2],          3,    5    \n\t"
+        "ins            %[temp1],    %[temp5],          0,    11   \n\t"
+        "ext            %[temp5],    %[temp3],          5,    11   \n\t"
+        "ins            %[temp1],    %[temp2],          0,    5    \n\t"
+        "ext            %[temp2],    %[temp3],          8,    16   \n\t"
+        "ext            %[temp3],    %[temp3],          3,    5    \n\t"
+        "ins            %[temp2],    %[temp5],          0,    11   \n\t"
+        "append         %[temp0],    %[temp4],          16         \n\t"
+        "ins            %[temp2],    %[temp3],          0,    5    \n\t"
+        "addiu          %[src],      %[src],            16         \n\t"
+        "append         %[temp2],    %[temp1],          16         \n\t"
+#ifdef WEBP_SWAP_16BIT_CSP
+        "usw            %[temp0],    0(%[dst])                     \n\t"
+        "usw            %[temp2],    4(%[dst])                     \n\t"
+#else
+        "wsbh           %[temp0],    %[temp0]                      \n\t"
+        "wsbh           %[temp2],    %[temp2]                      \n\t"
+        "usw            %[temp0],    0(%[dst])                     \n\t"
+        "usw            %[temp2],    4(%[dst])                     \n\t"
+#endif
+        "bne            %[src],      %[p_loop1_end],    0b         \n\t"
+        " addiu         %[dst],      %[dst],            8          \n\t"
+        "3:                                                          \n\t"
+        "beq            %[src],      %[p_loop2_end],    2f         \n\t"
+        " nop                                                      \n\t"
+        "1:                                                          \n\t"
+        "lw             %[temp0],    0(%[src])                     \n\t"
+        "ext            %[temp4],    %[temp0],          8,    16   \n\t"
+        "ext            %[temp5],    %[temp0],          5,    11   \n\t"
+        "ext            %[temp0],    %[temp0],          3,    5    \n\t"
+        "ins            %[temp4],    %[temp5],          0,    11   \n\t"
+        "addiu          %[src],      %[src],            4          \n\t"
+        "ins            %[temp4],    %[temp0],          0,    5    \n\t"
+#ifdef WEBP_SWAP_16BIT_CSP
+        "ush            %[temp4],    0(%[dst])                     \n\t"
+#else
+        "wsbh           %[temp4],    %[temp4]                      \n\t"
+        "ush            %[temp4],    0(%[dst])                     \n\t"
+#endif
+        "bne            %[src],      %[p_loop2_end],    1b         \n\t"
+        " addiu         %[dst],      %[dst],            2          \n\t"
+        "2:                                                          \n\t"
+        ".set           pop                                        \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+          [temp5] "=&r"(temp5), [dst] "+&r"(dst), [src] "+&r"(src)
+        : [p_loop1_end] "r"(p_loop1_end), [p_loop2_end] "r"(p_loop2_end)
+        : "memory");
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    int temp0, temp1, temp2, temp3;
+    const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+    const uint32_t* const p_loop2_end = src + num_pixels;
+    __asm__ volatile(
+        ".set       push                                         \n\t"
+        ".set       noreorder                                    \n\t"
+        "beq        %[src],      %[p_loop1_end],    3f           \n\t"
+        " nop                                                    \n\t"
+        "0:                                                        \n\t"
+        "lw         %[temp0],    0(%[src])                       \n\t"
+        "lw         %[temp1],    4(%[src])                       \n\t"
+        "lw         %[temp2],    8(%[src])                       \n\t"
+        "lw         %[temp3],    12(%[src])                      \n\t"
+        "ins        %[temp0],    %[temp1],          24,    8     \n\t"
+        "sra        %[temp1],    %[temp1],          8            \n\t"
+        "ins        %[temp1],    %[temp2],          16,    16    \n\t"
+        "sll        %[temp2],    %[temp2],          8            \n\t"
+        "balign     %[temp3],    %[temp2],          1            \n\t"
+        "addiu      %[src],      %[src],            16           \n\t"
+        "usw        %[temp0],    0(%[dst])                       \n\t"
+        "usw        %[temp1],    4(%[dst])                       \n\t"
+        "usw        %[temp3],    8(%[dst])                       \n\t"
+        "bne        %[src],      %[p_loop1_end],    0b           \n\t"
+        " addiu     %[dst],      %[dst],            12           \n\t"
+        "3:                                                        \n\t"
+        "beq        %[src],      %[p_loop2_end],    2f           \n\t"
+        " nop                                                    \n\t"
+        "1:                                                        \n\t"
+        "lw         %[temp0],    0(%[src])                       \n\t"
+        "addiu      %[src],      %[src],            4            \n\t"
+        "addiu      %[dst],      %[dst],            3            \n\t"
+        "ush        %[temp0],    -3(%[dst])                      \n\t"
+        "sra        %[temp0],    %[temp0],          16           \n\t"
+        "bne        %[src],      %[p_loop2_end],    1b           \n\t"
+        " sb        %[temp0],    -1(%[dst])                      \n\t"
+        "2:                                                        \n\t"
+        ".set       pop                                          \n\t"
+        : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dst] "+&r"(dst),
+          [src] "+&r"(src)
+        : [p_loop1_end] "r"(p_loop1_end), [p_loop2_end] "r"(p_loop2_end)
+        : "memory");
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LDspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
+    VP8LMapColor32b = MapARGB;
+    VP8LMapColor8b = MapAlpha;
+    VP8LPredictors[5] = Predictor5;
+    VP8LPredictors[6] = Predictor6;
+    VP8LPredictors[7] = Predictor7;
+    VP8LPredictors[8] = Predictor8;
+    VP8LPredictors[9] = Predictor9;
+    VP8LPredictors[10] = Predictor10;
+    VP8LPredictors[11] = Predictor11;
+    VP8LPredictors[12] = Predictor12;
+    VP8LPredictors[13] = Predictor13;
+    VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
+    VP8LTransformColorInverse = TransformColorInverse;
+    VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
+    VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
+    VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
+    VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
+    VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless_neon.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless_neon.c
new file mode 100644
index 0000000000..e5349ee125
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless_neon.c
@@ -0,0 +1,235 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON variant of methods for lossless decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <arm_neon.h>
+
+#include "./lossless.h"
+#include "./neon.h"
+
+//------------------------------------------------------------------------------
+// Colorspace conversion functions
+
+#if !defined(WORK_AROUND_GCC)
+// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
+// gcc-4.8.x at least.
+static void ConvertBGRAToRGBA(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const end = src + (num_pixels & ~15);
+    for (; src < end; src += 16) {
+        uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
+        // swap B and R. (VSWP d0,d2 has no intrinsics equivalent!)
+        const uint8x16_t tmp = pixel.val[0];
+        pixel.val[0] = pixel.val[2];
+        pixel.val[2] = tmp;
+        vst4q_u8(dst, pixel);
+        dst += 64;
+    }
+    VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst); // left-overs
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const end = src + (num_pixels & ~15);
+    for (; src < end; src += 16) {
+        const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
+        const uint8x16x3_t tmp = {{pixel.val[0], pixel.val[1], pixel.val[2]}};
+        vst3q_u8(dst, tmp);
+        dst += 48;
+    }
+    VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst); // left-overs
+}
+
+static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const end = src + (num_pixels & ~15);
+    for (; src < end; src += 16) {
+        const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
+        const uint8x16x3_t tmp = {{pixel.val[2], pixel.val[1], pixel.val[0]}};
+        vst3q_u8(dst, tmp);
+        dst += 48;
+    }
+    VP8LConvertBGRAToRGB_C(src, num_pixels & 15, dst); // left-overs
+}
+
+#else // WORK_AROUND_GCC
+
+// gcc-4.6.0 fallback
+
+static const uint8_t kRGBAShuffle[8] = {2, 1, 0, 3, 6, 5, 4, 7};
+
+static void ConvertBGRAToRGBA(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const end = src + (num_pixels & ~1);
+    const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
+    for (; src < end; src += 2) {
+        const uint8x8_t pixels = vld1_u8((uint8_t*)src);
+        vst1_u8(dst, vtbl1_u8(pixels, shuffle));
+        dst += 8;
+    }
+    VP8LConvertBGRAToRGBA_C(src, num_pixels & 1, dst); // left-overs
+}
+
+static const uint8_t kBGRShuffle[3][8] = {
+    {0, 1, 2, 4, 5, 6, 8, 9}, {10, 12, 13, 14, 16, 17, 18, 20}, {21, 22, 24, 25, 26, 28, 29, 30}};
+
+static void ConvertBGRAToBGR(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const end = src + (num_pixels & ~7);
+    const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
+    const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
+    const uint8x8_t shuffle2 = vld1_u8(kBGRShuffle[2]);
+    for (; src < end; src += 8) {
+        uint8x8x4_t pixels;
+        INIT_VECTOR4(pixels, vld1_u8((const uint8_t*)(src + 0)), vld1_u8((const uint8_t*)(src + 2)),
+                     vld1_u8((const uint8_t*)(src + 4)), vld1_u8((const uint8_t*)(src + 6)));
+        vst1_u8(dst + 0, vtbl4_u8(pixels, shuffle0));
+        vst1_u8(dst + 8, vtbl4_u8(pixels, shuffle1));
+        vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));
+        dst += 8 * 3;
+    }
+    VP8LConvertBGRAToBGR_C(src, num_pixels & 7, dst); // left-overs
+}
+
+static const uint8_t kRGBShuffle[3][8] = {
+    {2, 1, 0, 6, 5, 4, 10, 9}, {8, 14, 13, 12, 18, 17, 16, 22}, {21, 20, 26, 25, 24, 30, 29, 28}};
+
+static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const uint32_t* const end = src + (num_pixels & ~7);
+    const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
+    const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
+    const uint8x8_t shuffle2 = vld1_u8(kRGBShuffle[2]);
+    for (; src < end; src += 8) {
+        uint8x8x4_t pixels;
+        INIT_VECTOR4(pixels, vld1_u8((const uint8_t*)(src + 0)), vld1_u8((const uint8_t*)(src + 2)),
+                     vld1_u8((const uint8_t*)(src + 4)), vld1_u8((const uint8_t*)(src + 6)));
+        vst1_u8(dst + 0, vtbl4_u8(pixels, shuffle0));
+        vst1_u8(dst + 8, vtbl4_u8(pixels, shuffle1));
+        vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));
+        dst += 8 * 3;
+    }
+    VP8LConvertBGRAToRGB_C(src, num_pixels & 7, dst); // left-overs
+}
+
+#endif // !WORK_AROUND_GCC
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
+// non-standard versions there.
+#if defined(__APPLE__) && defined(__aarch64__) && defined(__apple_build_version__) && \
+    (__apple_build_version__ < 6020037)
+#define USE_VTBLQ
+#endif
+
+#ifdef USE_VTBLQ
+// 255 = byte will be zeroed
+static const uint8_t kGreenShuffle[16] = {1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255};
+
+static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb, const uint8x16_t shuffle) {
+    return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)), vtbl1q_u8(argb, vget_high_u8(shuffle)));
+}
+#else  // !USE_VTBLQ
+// 255 = byte will be zeroed
+static const uint8_t kGreenShuffle[8] = {1, 255, 1, 255, 5, 255, 5, 255};
+
+static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb, const uint8x8_t shuffle) {
+    return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle), vtbl1_u8(vget_high_u8(argb), shuffle));
+}
+#endif // USE_VTBLQ
+
+static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
+    const uint32_t* const end = argb_data + (num_pixels & ~3);
+#ifdef USE_VTBLQ
+    const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
+#else
+    const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
+#endif
+    for (; argb_data < end; argb_data += 4) {
+        const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
+        const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
+        vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
+    }
+    // fallthrough and finish off with plain-C
+    VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static void TransformColorInverse(const VP8LMultipliers* const m, uint32_t* argb_data, int num_pixels) {
+// sign-extended multiplying constants, pre-shifted by 6.
+#define CST(X) (((int16_t)(m->X << 8)) >> 6)
+    const int16_t rb[8] = {CST(green_to_blue_), CST(green_to_red_), CST(green_to_blue_), CST(green_to_red_),
+                           CST(green_to_blue_), CST(green_to_red_), CST(green_to_blue_), CST(green_to_red_)};
+    const int16x8_t mults_rb = vld1q_s16(rb);
+    const int16_t b2[8] = {
+        0, CST(red_to_blue_), 0, CST(red_to_blue_), 0, CST(red_to_blue_), 0, CST(red_to_blue_),
+    };
+    const int16x8_t mults_b2 = vld1q_s16(b2);
+#undef CST
+#ifdef USE_VTBLQ
+    static const uint8_t kg0g0[16] = {255, 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13};
+    const uint8x16_t shuffle = vld1q_u8(kg0g0);
+#else
+    static const uint8_t k0g0g[8] = {255, 1, 255, 1, 255, 5, 255, 5};
+    const uint8x8_t shuffle = vld1_u8(k0g0g);
+#endif
+    const uint32x4_t mask_ag = vdupq_n_u32(0xff00ff00u);
+    int i;
+    for (i = 0; i + 4 <= num_pixels; i += 4) {
+        const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i));
+        const uint32x4_t a0g0 = vandq_u32(vreinterpretq_u32_u8(in), mask_ag);
+        // 0 g 0 g
+        const uint8x16_t greens = DoGreenShuffle(in, shuffle);
+        // x dr  x db1
+        const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb);
+        // x r'  x   b'
+        const int8x16_t B = vaddq_s8(vreinterpretq_s8_u8(in), vreinterpretq_s8_s16(A));
+        // r' 0   b' 0
+        const int16x8_t C = vshlq_n_s16(vreinterpretq_s16_s8(B), 8);
+        // x db2  0  0
+        const int16x8_t D = vqdmulhq_s16(C, mults_b2);
+        // 0  x db2  0
+        const uint32x4_t E = vshrq_n_u32(vreinterpretq_u32_s16(D), 8);
+        // r' x  b'' 0
+        const int8x16_t F = vaddq_s8(vreinterpretq_s8_u32(E), vreinterpretq_s8_s16(C));
+        // 0  r'  0  b''
+        const uint16x8_t G = vshrq_n_u16(vreinterpretq_u16_s8(F), 8);
+        const uint32x4_t out = vorrq_u32(vreinterpretq_u32_u16(G), a0g0);
+        vst1q_u32(argb_data + i, out);
+    }
+    // Fall-back to C-version for left-overs.
+    VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
+}
+
+#undef USE_VTBLQ
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LDspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitNEON(void) {
+    VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
+    VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+    VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
+
+    VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
+    VP8LTransformColorInverse = TransformColorInverse;
+}
+
+#else // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8LDspInitNEON)
+
+#endif // WEBP_USE_NEON
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/lossless_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/lossless_sse2.c
new file mode 100644
index 0000000000..720f877501
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/lossless_sse2.c
@@ -0,0 +1,361 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 variant of methods for lossless decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <assert.h>
+#include <emmintrin.h>
+#include "./lossless.h"
+
+//------------------------------------------------------------------------------
+// Predictor Transform
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, uint32_t c2) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
+    const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
+    const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
+    const __m128i V1 = _mm_add_epi16(C0, C1);
+    const __m128i V2 = _mm_sub_epi16(V1, C2);
+    const __m128i b = _mm_packus_epi16(V2, V2);
+    const uint32_t output = _mm_cvtsi128_si32(b);
+    return output;
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, uint32_t c2) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
+    const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
+    const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
+    const __m128i avg = _mm_add_epi16(C1, C0);
+    const __m128i A0 = _mm_srli_epi16(avg, 1);
+    const __m128i A1 = _mm_sub_epi16(A0, B0);
+    const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
+    const __m128i A2 = _mm_sub_epi16(A1, BgtA);
+    const __m128i A3 = _mm_srai_epi16(A2, 1);
+    const __m128i A4 = _mm_add_epi16(A0, A3);
+    const __m128i A5 = _mm_packus_epi16(A4, A4);
+    const uint32_t output = _mm_cvtsi128_si32(A5);
+    return output;
+}
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+    int pa_minus_pb;
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i A0 = _mm_cvtsi32_si128(a);
+    const __m128i B0 = _mm_cvtsi32_si128(b);
+    const __m128i C0 = _mm_cvtsi32_si128(c);
+    const __m128i AC0 = _mm_subs_epu8(A0, C0);
+    const __m128i CA0 = _mm_subs_epu8(C0, A0);
+    const __m128i BC0 = _mm_subs_epu8(B0, C0);
+    const __m128i CB0 = _mm_subs_epu8(C0, B0);
+    const __m128i AC = _mm_or_si128(AC0, CA0);
+    const __m128i BC = _mm_or_si128(BC0, CB0);
+    const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
+    const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
+    const __m128i diff = _mm_sub_epi16(pb, pa);
+    {
+        int16_t out[8];
+        _mm_storeu_si128((__m128i*)out, diff);
+        pa_minus_pb = out[0] + out[1] + out[2] + out[3];
+    }
+    return (pa_minus_pb <= 0) ? a : b;
+}
+
+static WEBP_INLINE __m128i Average2_128i(uint32_t a0, uint32_t a1) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero);
+    const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
+    const __m128i sum = _mm_add_epi16(A1, A0);
+    const __m128i avg = _mm_srli_epi16(sum, 1);
+    return avg;
+}
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+    const __m128i avg = Average2_128i(a0, a1);
+    const __m128i A2 = _mm_packus_epi16(avg, avg);
+    const uint32_t output = _mm_cvtsi128_si32(A2);
+    return output;
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i avg1 = Average2_128i(a0, a2);
+    const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
+    const __m128i sum = _mm_add_epi16(avg1, A1);
+    const __m128i avg2 = _mm_srli_epi16(sum, 1);
+    const __m128i A2 = _mm_packus_epi16(avg2, avg2);
+    const uint32_t output = _mm_cvtsi128_si32(A2);
+    return output;
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, uint32_t a2, uint32_t a3) {
+    const __m128i avg1 = Average2_128i(a0, a1);
+    const __m128i avg2 = Average2_128i(a2, a3);
+    const __m128i sum = _mm_add_epi16(avg2, avg1);
+    const __m128i avg3 = _mm_srli_epi16(sum, 1);
+    const __m128i A0 = _mm_packus_epi16(avg3, avg3);
+    const uint32_t output = _mm_cvtsi128_si32(A0);
+    return output;
+}
+
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average3(left, top[0], top[1]);
+    return pred;
+}
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average2(left, top[-1]);
+    return pred;
+}
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average2(left, top[0]);
+    return pred;
+}
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average2(top[-1], top[0]);
+    (void)left;
+    return pred;
+}
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average2(top[0], top[1]);
+    (void)left;
+    return pred;
+}
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+    return pred;
+}
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = Select(top[0], left, top[-1]);
+    return pred;
+}
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+    return pred;
+}
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+    const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+    return pred;
+}
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
+    int i;
+    for (i = 0; i + 4 <= num_pixels; i += 4) {
+        const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+        const __m128i A = _mm_srli_epi16(in, 8);                     // 0 a 0 g
+        const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
+        const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0)); // 0g0g
+        const __m128i out = _mm_add_epi8(in, C);
+        _mm_storeu_si128((__m128i*)&argb_data[i], out);
+    }
+    // fallthrough and finish off with plain-C
+    VP8LAddGreenToBlueAndRed_C(argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static void TransformColorInverse(const VP8LMultipliers* const m, uint32_t* argb_data, int num_pixels) {
+// sign-extended multiplying constants, pre-shifted by 5.
+#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
+    const __m128i mults_rb =
+        _mm_set_epi16(CST(green_to_red_), CST(green_to_blue_), CST(green_to_red_), CST(green_to_blue_),
+                      CST(green_to_red_), CST(green_to_blue_), CST(green_to_red_), CST(green_to_blue_));
+    const __m128i mults_b2 =
+        _mm_set_epi16(CST(red_to_blue_), 0, CST(red_to_blue_), 0, CST(red_to_blue_), 0, CST(red_to_blue_), 0);
+#undef CST
+    const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
+    int i;
+    for (i = 0; i + 4 <= num_pixels; i += 4) {
+        const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+        const __m128i A = _mm_and_si128(in, mask_ag);                // a   0   g   0
+        const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
+        const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0)); // g0g0
+        const __m128i D = _mm_mulhi_epi16(C, mults_rb);                    // x dr  x db1
+        const __m128i E = _mm_add_epi8(in, D);                             // x r'  x   b'
+        const __m128i F = _mm_slli_epi16(E, 8);                            // r' 0   b' 0
+        const __m128i G = _mm_mulhi_epi16(F, mults_b2);                    // x db2  0  0
+        const __m128i H = _mm_srli_epi32(G, 8);                            // 0  x db2  0
+        const __m128i I = _mm_add_epi8(H, F);                              // r' x  b'' 0
+        const __m128i J = _mm_srli_epi16(I, 8);                            // 0  r'  0  b''
+        const __m128i out = _mm_or_si128(J, A);
+        _mm_storeu_si128((__m128i*)&argb_data[i], out);
+    }
+    // Fall-back to C-version for left-overs.
+    VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Color-space conversion functions
+
+static void ConvertBGRAToRGBA(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const __m128i* in = (const __m128i*)src;
+    __m128i* out = (__m128i*)dst;
+    while (num_pixels >= 8) {
+        const __m128i bgra0 = _mm_loadu_si128(in++);         // bgra0|bgra1|bgra2|bgra3
+        const __m128i bgra4 = _mm_loadu_si128(in++);         // bgra4|bgra5|bgra6|bgra7
+        const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4); // b0b4g0g4r0r4a0a4...
+        const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4); // b2b6g2g6r2r6a2a6...
+        const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);     // b0b2b4b6g0g2g4g6...
+        const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);     // b1b3b5b7g1g3g5g7...
+        const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);     // b0...b7 | g0...g7
+        const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);     // r0...r7 | a0...a7
+        const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);    // g0...g7 | a0...a7
+        const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);    // r0...r7 | b0...b7
+        const __m128i rg0 = _mm_unpacklo_epi8(rb0, ga0);     // r0g0r1g1 ... r6g6r7g7
+        const __m128i ba0 = _mm_unpackhi_epi8(rb0, ga0);     // b0a0b1a1 ... b6a6b7a7
+        const __m128i rgba0 = _mm_unpacklo_epi16(rg0, ba0);  // rgba0|rgba1...
+        const __m128i rgba4 = _mm_unpackhi_epi16(rg0, ba0);  // rgba4|rgba5...
+        _mm_storeu_si128(out++, rgba0);
+        _mm_storeu_si128(out++, rgba4);
+        num_pixels -= 8;
+    }
+    // left-overs
+    VP8LConvertBGRAToRGBA_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
+}
+
+static void ConvertBGRAToRGBA4444(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
+    const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
+    const __m128i* in = (const __m128i*)src;
+    __m128i* out = (__m128i*)dst;
+    while (num_pixels >= 8) {
+        const __m128i bgra0 = _mm_loadu_si128(in++);         // bgra0|bgra1|bgra2|bgra3
+        const __m128i bgra4 = _mm_loadu_si128(in++);         // bgra4|bgra5|bgra6|bgra7
+        const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4); // b0b4g0g4r0r4a0a4...
+        const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4); // b2b6g2g6r2r6a2a6...
+        const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);     // b0b2b4b6g0g2g4g6...
+        const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);     // b1b3b5b7g1g3g5g7...
+        const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);     // b0...b7 | g0...g7
+        const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);     // r0...r7 | a0...a7
+        const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);    // g0...g7 | a0...a7
+        const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);    // r0...r7 | b0...b7
+        const __m128i ga1 = _mm_srli_epi16(ga0, 4);          // g0-|g1-|...|a6-|a7-
+        const __m128i rb1 = _mm_and_si128(rb0, mask_0xf0);   // -r0|-r1|...|-b6|-a7
+        const __m128i ga2 = _mm_and_si128(ga1, mask_0x0f);   // g0-|g1-|...|a6-|a7-
+        const __m128i rgba0 = _mm_or_si128(ga2, rb1);        // rg0..rg7 | ba0..ba7
+        const __m128i rgba1 = _mm_srli_si128(rgba0, 8);      // ba0..ba7 | 0
+#ifdef WEBP_SWAP_16BIT_CSP
+        const __m128i rgba = _mm_unpacklo_epi8(rgba1, rgba0); // barg0...barg7
+#else
+        const __m128i rgba = _mm_unpacklo_epi8(rgba0, rgba1); // rgba0...rgba7
+#endif
+        _mm_storeu_si128(out++, rgba);
+        num_pixels -= 8;
+    }
+    // left-overs
+    VP8LConvertBGRAToRGBA4444_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
+}
+
+static void ConvertBGRAToRGB565(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const __m128i mask_0xe0 = _mm_set1_epi8(0xe0);
+    const __m128i mask_0xf8 = _mm_set1_epi8(0xf8);
+    const __m128i mask_0x07 = _mm_set1_epi8(0x07);
+    const __m128i* in = (const __m128i*)src;
+    __m128i* out = (__m128i*)dst;
+    while (num_pixels >= 8) {
+        const __m128i bgra0 = _mm_loadu_si128(in++);         // bgra0|bgra1|bgra2|bgra3
+        const __m128i bgra4 = _mm_loadu_si128(in++);         // bgra4|bgra5|bgra6|bgra7
+        const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4); // b0b4g0g4r0r4a0a4...
+        const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4); // b2b6g2g6r2r6a2a6...
+        const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);     // b0b2b4b6g0g2g4g6...
+        const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);     // b1b3b5b7g1g3g5g7...
+        const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);     // b0...b7 | g0...g7
+        const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);     // r0...r7 | a0...a7
+        const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);    // g0...g7 | a0...a7
+        const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);    // r0...r7 | b0...b7
+        const __m128i rb1 = _mm_and_si128(rb0, mask_0xf8);   // -r0..-r7|-b0..-b7
+        const __m128i g_lo1 = _mm_srli_epi16(ga0, 5);
+        const __m128i g_lo2 = _mm_and_si128(g_lo1, mask_0x07); // g0-...g7-|xx (3b)
+        const __m128i g_hi1 = _mm_slli_epi16(ga0, 3);
+        const __m128i g_hi2 = _mm_and_si128(g_hi1, mask_0xe0); // -g0...-g7|xx (3b)
+        const __m128i b0 = _mm_srli_si128(rb1, 8);             // -b0...-b7|0
+        const __m128i rg1 = _mm_or_si128(rb1, g_lo2);          // gr0...gr7|xx
+        const __m128i b1 = _mm_srli_epi16(b0, 3);
+        const __m128i gb1 = _mm_or_si128(b1, g_hi2); // bg0...bg7|xx
+#ifdef WEBP_SWAP_16BIT_CSP
+        const __m128i rgba = _mm_unpacklo_epi8(gb1, rg1); // rggb0...rggb7
+#else
+        const __m128i rgba = _mm_unpacklo_epi8(rg1, gb1);     // bgrb0...bgrb7
+#endif
+        _mm_storeu_si128(out++, rgba);
+        num_pixels -= 8;
+    }
+    // left-overs
+    VP8LConvertBGRAToRGB565_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src, int num_pixels, uint8_t* dst) {
+    const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
+    const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
+    const __m128i* in = (const __m128i*)src;
+    const uint8_t* const end = dst + num_pixels * 3;
+    // the last storel_epi64 below writes 8 bytes starting at offset 18
+    while (dst + 26 <= end) {
+        const __m128i bgra0 = _mm_loadu_si128(in++);      // bgra0|bgra1|bgra2|bgra3
+        const __m128i bgra4 = _mm_loadu_si128(in++);      // bgra4|bgra5|bgra6|bgra7
+        const __m128i a0l = _mm_and_si128(bgra0, mask_l); // bgr0|0|bgr0|0
+        const __m128i a4l = _mm_and_si128(bgra4, mask_l); // bgr0|0|bgr0|0
+        const __m128i a0h = _mm_and_si128(bgra0, mask_h); // 0|bgr0|0|bgr0
+        const __m128i a4h = _mm_and_si128(bgra4, mask_h); // 0|bgr0|0|bgr0
+        const __m128i b0h = _mm_srli_epi64(a0h, 8);       // 000b|gr00|000b|gr00
+        const __m128i b4h = _mm_srli_epi64(a4h, 8);       // 000b|gr00|000b|gr00
+        const __m128i c0 = _mm_or_si128(a0l, b0h);        // rgbrgb00|rgbrgb00
+        const __m128i c4 = _mm_or_si128(a4l, b4h);        // rgbrgb00|rgbrgb00
+        const __m128i c2 = _mm_srli_si128(c0, 8);
+        const __m128i c6 = _mm_srli_si128(c4, 8);
+        _mm_storel_epi64((__m128i*)(dst + 0), c0);
+        _mm_storel_epi64((__m128i*)(dst + 6), c2);
+        _mm_storel_epi64((__m128i*)(dst + 12), c4);
+        _mm_storel_epi64((__m128i*)(dst + 18), c6);
+        dst += 24;
+        num_pixels -= 8;
+    }
+    // left-overs
+    VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, dst);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LDspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE2(void) {
+    VP8LPredictors[5] = Predictor5;
+    VP8LPredictors[6] = Predictor6;
+    VP8LPredictors[7] = Predictor7;
+    VP8LPredictors[8] = Predictor8;
+    VP8LPredictors[9] = Predictor9;
+    VP8LPredictors[10] = Predictor10;
+    VP8LPredictors[11] = Predictor11;
+    VP8LPredictors[12] = Predictor12;
+    VP8LPredictors[13] = Predictor13;
+
+    VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
+    VP8LTransformColorInverse = TransformColorInverse;
+
+    VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
+    VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
+    VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
+    VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8LDspInitSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/mips_macro.h b/codec/L2/demos/webpEnc/host/src/dsp/mips_macro.h
new file mode 100644
index 0000000000..717b4279b2
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/mips_macro.h
@@ -0,0 +1,274 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS common macros
+
+#ifndef WEBP_DSP_MIPS_MACRO_H_
+#define WEBP_DSP_MIPS_MACRO_H_
+
+#if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409
+#define WORK_AROUND_GCC
+#endif
+
+#define STR(s) #s
+#define XSTR(s) STR(s)
+
+// O0[31..16 | 15..0] = I0[31..16 | 15..0] + I1[31..16 | 15..0]
+// O1[31..16 | 15..0] = I0[31..16 | 15..0] - I1[31..16 | 15..0]
+// O - output
+// I - input (macro doesn't change it)
+#define ADD_SUB_HALVES(O0, O1, I0, I1)                   \
+    "addq.ph          %[" #O0 "],   %[" #I0 "],  %[" #I1 \
+    "]           \n\t"                                   \
+    "subq.ph          %[" #O1 "],   %[" #I0 "],  %[" #I1 "]           \n\t"
+
+// O - output
+// I - input (macro doesn't change it)
+// I[0/1] - offset in bytes
+#define LOAD_IN_X2(O0, O1, I0, I1)        \
+    "lh               %[" #O0 "],   " #I0 \
+    "(%[in])                  \n\t"       \
+    "lh               %[" #O1 "],   " #I1 "(%[in])                  \n\t"
+
+// I0 - location
+// I1..I9 - offsets in bytes
+#define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3, I0, I1, I2, I3, I4, I5, I6, I7, I8, I9)                  \
+    "ulw    %[" #O0 "],    " #I1 "+" XSTR(I9) "*" #I5 "(%[" #I0                                      \
+                                              "])       \n\t"                                        \
+                                              "ulw    %[" #O1 "],    " #I2                           \
+                                              "+" XSTR(I9) "*" #I6 "(%[" #I0                         \
+                                                           "])       \n\t"                           \
+                                                           "ulw    %[" #O2 "],    " #I3              \
+                                                           "+" XSTR(I9) "*" #I7 "(%[" #I0            \
+                                                                        "])       \n\t"              \
+                                                                        "ulw    %[" #O3 "],    " #I4 \
+                                                                        "+" XSTR(I9) "*" #I8 "(%[" #I0 "])       \n\t"
+
+// O - output
+// IO - input/output
+// I - input (macro doesn't change it)
+#define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7, IO0, IO1, IO2, IO3, I0, I1, I2, I3, I4, I5, I6, I7) \
+    "mul              %[" #O0 "],   %[" #I0                                                               \
+    "],   %[kC2]        \n\t"                                                                             \
+    "mul              %[" #O1 "],   %[" #I0                                                               \
+    "],   %[kC1]        \n\t"                                                                             \
+    "mul              %[" #O2 "],   %[" #I1                                                               \
+    "],   %[kC2]        \n\t"                                                                             \
+    "mul              %[" #O3 "],   %[" #I1                                                               \
+    "],   %[kC1]        \n\t"                                                                             \
+    "mul              %[" #O4 "],   %[" #I2                                                               \
+    "],   %[kC2]        \n\t"                                                                             \
+    "mul              %[" #O5 "],   %[" #I2                                                               \
+    "],   %[kC1]        \n\t"                                                                             \
+    "mul              %[" #O6 "],   %[" #I3                                                               \
+    "],   %[kC2]        \n\t"                                                                             \
+    "mul              %[" #O7 "],   %[" #I3                                                               \
+    "],   %[kC1]        \n\t"                                                                             \
+    "sra              %[" #O0 "],   %[" #O0                                                               \
+    "],   16            \n\t"                                                                             \
+    "sra              %[" #O1 "],   %[" #O1                                                               \
+    "],   16            \n\t"                                                                             \
+    "sra              %[" #O2 "],   %[" #O2                                                               \
+    "],   16            \n\t"                                                                             \
+    "sra              %[" #O3 "],   %[" #O3                                                               \
+    "],   16            \n\t"                                                                             \
+    "sra              %[" #O4 "],   %[" #O4                                                               \
+    "],   16            \n\t"                                                                             \
+    "sra              %[" #O5 "],   %[" #O5                                                               \
+    "],   16            \n\t"                                                                             \
+    "sra              %[" #O6 "],   %[" #O6                                                               \
+    "],   16            \n\t"                                                                             \
+    "sra              %[" #O7 "],   %[" #O7                                                               \
+    "],   16            \n\t"                                                                             \
+    "addu             %[" #IO0 "],  %[" #IO0 "],  %[" #I4                                                 \
+    "]    \n\t"                                                                                           \
+    "addu             %[" #IO1 "],  %[" #IO1 "],  %[" #I5                                                 \
+    "]    \n\t"                                                                                           \
+    "subu             %[" #IO2 "],  %[" #IO2 "],  %[" #I6                                                 \
+    "]    \n\t"                                                                                           \
+    "subu             %[" #IO3 "],  %[" #IO3 "],  %[" #I7 "]    \n\t"
+
+// O - output
+// I - input (macro doesn't change it)
+#define INSERT_HALF_X2(O0, O1, I0, I1)      \
+    "ins              %[" #O0 "],   %[" #I0 \
+    "], 16,    16           \n\t"           \
+    "ins              %[" #O1 "],   %[" #I1 "], 16,    16           \n\t"
+
+// O - output
+// I - input (macro doesn't change it)
+#define SRA_16(O0, O1, O2, O3, I0, I1, I2, I3) \
+    "sra              %[" #O0 "],  %[" #I0     \
+    "],  16                  \n\t"             \
+    "sra              %[" #O1 "],  %[" #I1     \
+    "],  16                  \n\t"             \
+    "sra              %[" #O2 "],  %[" #I2     \
+    "],  16                  \n\t"             \
+    "sra              %[" #O3 "],  %[" #I3 "],  16                  \n\t"
+
+// temp0[31..16 | 15..0] = temp8[31..16 | 15..0] + temp12[31..16 | 15..0]
+// temp1[31..16 | 15..0] = temp8[31..16 | 15..0] - temp12[31..16 | 15..0]
+// temp0[31..16 | 15..0] = temp0[31..16 >> 3 | 15..0 >> 3]
+// temp1[31..16 | 15..0] = temp1[31..16 >> 3 | 15..0 >> 3]
+// O - output
+// I - input (macro doesn't change it)
+#define SHIFT_R_SUM_X2(O0, O1, O2, O3, O4, O5, O6, O7, I0, I1, I2, I3, I4, I5, I6, I7) \
+    "addq.ph          %[" #O0 "],   %[" #I0 "],   %[" #I4                              \
+    "]    \n\t"                                                                        \
+    "subq.ph          %[" #O1 "],   %[" #I0 "],   %[" #I4                              \
+    "]    \n\t"                                                                        \
+    "addq.ph          %[" #O2 "],   %[" #I1 "],   %[" #I5                              \
+    "]    \n\t"                                                                        \
+    "subq.ph          %[" #O3 "],   %[" #I1 "],   %[" #I5                              \
+    "]    \n\t"                                                                        \
+    "addq.ph          %[" #O4 "],   %[" #I2 "],   %[" #I6                              \
+    "]    \n\t"                                                                        \
+    "subq.ph          %[" #O5 "],   %[" #I2 "],   %[" #I6                              \
+    "]    \n\t"                                                                        \
+    "addq.ph          %[" #O6 "],   %[" #I3 "],   %[" #I7                              \
+    "]    \n\t"                                                                        \
+    "subq.ph          %[" #O7 "],   %[" #I3 "],   %[" #I7                              \
+    "]    \n\t"                                                                        \
+    "shra.ph          %[" #O0 "],   %[" #O0                                            \
+    "],   3             \n\t"                                                          \
+    "shra.ph          %[" #O1 "],   %[" #O1                                            \
+    "],   3             \n\t"                                                          \
+    "shra.ph          %[" #O2 "],   %[" #O2                                            \
+    "],   3             \n\t"                                                          \
+    "shra.ph          %[" #O3 "],   %[" #O3                                            \
+    "],   3             \n\t"                                                          \
+    "shra.ph          %[" #O4 "],   %[" #O4                                            \
+    "],   3             \n\t"                                                          \
+    "shra.ph          %[" #O5 "],   %[" #O5                                            \
+    "],   3             \n\t"                                                          \
+    "shra.ph          %[" #O6 "],   %[" #O6                                            \
+    "],   3             \n\t"                                                          \
+    "shra.ph          %[" #O7 "],   %[" #O7 "],   3             \n\t"
+
+// precrq.ph.w temp0, temp8, temp2
+//   temp0 = temp8[31..16] | temp2[31..16]
+// ins temp2, temp8, 16, 16
+//   temp2 = temp8[31..16] | temp2[15..0]
+// O - output
+// IO - input/output
+// I - input (macro doesn't change it)
+#define PACK_2_HALVES_TO_WORD(O0, O1, O2, O3, IO0, IO1, IO2, IO3, I0, I1, I2, I3) \
+    "precrq.ph.w      %[" #O0 "],    %[" #I0 "],  %[" #IO0                        \
+    "]       \n\t"                                                                \
+    "precrq.ph.w      %[" #O1 "],    %[" #I1 "],  %[" #IO1                        \
+    "]       \n\t"                                                                \
+    "ins              %[" #IO0 "],   %[" #I0                                      \
+    "],  16,    16         \n\t"                                                  \
+    "ins              %[" #IO1 "],   %[" #I1                                      \
+    "],  16,    16         \n\t"                                                  \
+    "precrq.ph.w      %[" #O2 "],    %[" #I2 "],  %[" #IO2                        \
+    "]       \n\t"                                                                \
+    "precrq.ph.w      %[" #O3 "],    %[" #I3 "],  %[" #IO3                        \
+    "]       \n\t"                                                                \
+    "ins              %[" #IO2 "],   %[" #I2                                      \
+    "],  16,    16         \n\t"                                                  \
+    "ins              %[" #IO3 "],   %[" #I3 "],  16,    16         \n\t"
+
+// preceu.ph.qbr temp0, temp8
+//   temp0 = 0 | 0 | temp8[23..16] | temp8[7..0]
+// preceu.ph.qbl temp1, temp8
+//   temp1 = temp8[23..16] | temp8[7..0] | 0 | 0
+// O - output
+// I - input (macro doesn't change it)
+#define CONVERT_2_BYTES_TO_HALF(O0, O1, O2, O3, O4, O5, O6, O7, I0, I1, I2, I3) \
+    "preceu.ph.qbr    %[" #O0 "],   %[" #I0                                     \
+    "]                      \n\t"                                               \
+    "preceu.ph.qbl    %[" #O1 "],   %[" #I0                                     \
+    "]                      \n\t"                                               \
+    "preceu.ph.qbr    %[" #O2 "],   %[" #I1                                     \
+    "]                      \n\t"                                               \
+    "preceu.ph.qbl    %[" #O3 "],   %[" #I1                                     \
+    "]                      \n\t"                                               \
+    "preceu.ph.qbr    %[" #O4 "],   %[" #I2                                     \
+    "]                      \n\t"                                               \
+    "preceu.ph.qbl    %[" #O5 "],   %[" #I2                                     \
+    "]                      \n\t"                                               \
+    "preceu.ph.qbr    %[" #O6 "],   %[" #I3                                     \
+    "]                      \n\t"                                               \
+    "preceu.ph.qbl    %[" #O7 "],   %[" #I3 "]                      \n\t"
+
+// temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0]
+// temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7]
+// temp1..temp7 same as temp0
+// precrqu_s.qb.ph temp0, temp1, temp0:
+//   temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8]
+// store temp0 to dst
+// IO - input/output
+// I - input (macro doesn't change it)
+#define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7, I0, I1, I2, I3, I4, I5, I6, I7, I8, I9, I10, I11,    \
+                         I12, I13)                                                                                    \
+    "addq.ph          %[" #IO0 "],  %[" #IO0 "],  %[" #I0                                                             \
+    "]          \n\t"                                                                                                 \
+    "addq.ph          %[" #IO1 "],  %[" #IO1 "],  %[" #I1                                                             \
+    "]          \n\t"                                                                                                 \
+    "addq.ph          %[" #IO2 "],  %[" #IO2 "],  %[" #I2                                                             \
+    "]          \n\t"                                                                                                 \
+    "addq.ph          %[" #IO3 "],  %[" #IO3 "],  %[" #I3                                                             \
+    "]          \n\t"                                                                                                 \
+    "addq.ph          %[" #IO4 "],  %[" #IO4 "],  %[" #I4                                                             \
+    "]          \n\t"                                                                                                 \
+    "addq.ph          %[" #IO5 "],  %[" #IO5 "],  %[" #I5                                                             \
+    "]          \n\t"                                                                                                 \
+    "addq.ph          %[" #IO6 "],  %[" #IO6 "],  %[" #I6                                                             \
+    "]          \n\t"                                                                                                 \
+    "addq.ph          %[" #IO7 "],  %[" #IO7 "],  %[" #I7                                                             \
+    "]          \n\t"                                                                                                 \
+    "shll_s.ph        %[" #IO0 "],  %[" #IO0                                                                          \
+    "],  7                   \n\t"                                                                                    \
+    "shll_s.ph        %[" #IO1 "],  %[" #IO1                                                                          \
+    "],  7                   \n\t"                                                                                    \
+    "shll_s.ph        %[" #IO2 "],  %[" #IO2                                                                          \
+    "],  7                   \n\t"                                                                                    \
+    "shll_s.ph        %[" #IO3 "],  %[" #IO3                                                                          \
+    "],  7                   \n\t"                                                                                    \
+    "shll_s.ph        %[" #IO4 "],  %[" #IO4                                                                          \
+    "],  7                   \n\t"                                                                                    \
+    "shll_s.ph        %[" #IO5 "],  %[" #IO5                                                                          \
+    "],  7                   \n\t"                                                                                    \
+    "shll_s.ph        %[" #IO6 "],  %[" #IO6                                                                          \
+    "],  7                   \n\t"                                                                                    \
+    "shll_s.ph        %[" #IO7 "],  %[" #IO7                                                                          \
+    "],  7                   \n\t"                                                                                    \
+    "precrqu_s.qb.ph  %[" #IO0 "],  %[" #IO1 "],  %[" #IO0                                                            \
+    "]         \n\t"                                                                                                  \
+    "precrqu_s.qb.ph  %[" #IO2 "],  %[" #IO3 "],  %[" #IO2                                                            \
+    "]         \n\t"                                                                                                  \
+    "precrqu_s.qb.ph  %[" #IO4 "],  %[" #IO5 "],  %[" #IO4                                                            \
+    "]         \n\t"                                                                                                  \
+    "precrqu_s.qb.ph  %[" #IO6 "],  %[" #IO7 "],  %[" #IO6                                                            \
+    "]         \n\t"                                                                                                  \
+    "usw              %[" #IO0 "],  " XSTR(I13) "*" #I9 "(%[" #I8                                                     \
+                                                "])   \n\t"                                                           \
+                                                "usw              %[" #IO2                                            \
+                                                "],  " XSTR(I13) "*" #I10 "(%[" #I8                                   \
+                                                                 "])  \n\t"                                           \
+                                                                 "usw              %[" #IO4                           \
+                                                                 "],  " XSTR(I13) "*" #I11 "(%[" #I8                  \
+                                                                                  "])  \n\t"                          \
+                                                                                  "usw              %[" #IO6          \
+                                                                                  "],  " XSTR(I13) "*" #I12 "(%[" #I8 \
+                                                                                                   "])  \n\t"
+
+#define OUTPUT_EARLY_CLOBBER_REGS_10()                             \
+  : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),             \
+    [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),             \
+    [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), [temp9]"=&r"(temp9),             \
+    [temp10]"=&r"(temp10)
+
+#define OUTPUT_EARLY_CLOBBER_REGS_18()                                                                \
+    OUTPUT_EARLY_CLOBBER_REGS_10()                                                                    \
+    , [temp11] "=&r"(temp11), [temp12] "=&r"(temp12), [temp13] "=&r"(temp13), [temp14] "=&r"(temp14), \
+        [temp15] "=&r"(temp15), [temp16] "=&r"(temp16), [temp17] "=&r"(temp17), [temp18] "=&r"(temp18)
+
+#endif // WEBP_DSP_MIPS_MACRO_H_
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/neon.h b/codec/L2/demos/webpEnc/host/src/dsp/neon.h
new file mode 100644
index 0000000000..0fb63d2509
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/neon.h
@@ -0,0 +1,83 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  NEON common code.
+
+#ifndef WEBP_DSP_NEON_H_
+#define WEBP_DSP_NEON_H_
+
+#include <arm_neon.h>
+
+#include "./dsp.h"
+
+// Right now, some intrinsics functions seem slower, so we disable them
+// everywhere except aarch64 where the inline assembly is incompatible.
+#if defined(__aarch64__)
+#define WEBP_USE_INTRINSICS // use intrinsics when possible
+#endif
+
+#define INIT_VECTOR2(v, a, b) \
+    do {                      \
+        v.val[0] = a;         \
+        v.val[1] = b;         \
+    } while (0)
+
+#define INIT_VECTOR3(v, a, b, c) \
+    do {                         \
+        v.val[0] = a;            \
+        v.val[1] = b;            \
+        v.val[2] = c;            \
+    } while (0)
+
+#define INIT_VECTOR4(v, a, b, c, d) \
+    do {                            \
+        v.val[0] = a;               \
+        v.val[1] = b;               \
+        v.val[2] = c;               \
+        v.val[3] = d;               \
+    } while (0)
+
+// if using intrinsics, this flag avoids some functions that make gcc-4.6.3
+// crash ("internal compiler error: in immed_double_const, at emit-rtl.").
+// (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
+#if !(LOCAL_GCC_PREREQ(4, 8) || defined(__aarch64__))
+#define WORK_AROUND_GCC
+#endif
+
+static WEBP_INLINE int32x4x4_t Transpose4x4(const int32x4x4_t rows) {
+    uint64x2x2_t row01, row23;
+
+    row01.val[0] = vreinterpretq_u64_s32(rows.val[0]);
+    row01.val[1] = vreinterpretq_u64_s32(rows.val[1]);
+    row23.val[0] = vreinterpretq_u64_s32(rows.val[2]);
+    row23.val[1] = vreinterpretq_u64_s32(rows.val[3]);
+    // Transpose 64-bit values (there's no vswp equivalent)
+    {
+        const uint64x1_t row0h = vget_high_u64(row01.val[0]);
+        const uint64x1_t row2l = vget_low_u64(row23.val[0]);
+        const uint64x1_t row1h = vget_high_u64(row01.val[1]);
+        const uint64x1_t row3l = vget_low_u64(row23.val[1]);
+        row01.val[0] = vcombine_u64(vget_low_u64(row01.val[0]), row2l);
+        row23.val[0] = vcombine_u64(row0h, vget_high_u64(row23.val[0]));
+        row01.val[1] = vcombine_u64(vget_low_u64(row01.val[1]), row3l);
+        row23.val[1] = vcombine_u64(row1h, vget_high_u64(row23.val[1]));
+    }
+    {
+        const int32x4x2_t out01 = vtrnq_s32(vreinterpretq_s32_u64(row01.val[0]), vreinterpretq_s32_u64(row01.val[1]));
+        const int32x4x2_t out23 = vtrnq_s32(vreinterpretq_s32_u64(row23.val[0]), vreinterpretq_s32_u64(row23.val[1]));
+        int32x4x4_t out;
+        out.val[0] = out01.val[0];
+        out.val[1] = out01.val[1];
+        out.val[2] = out23.val[0];
+        out.val[3] = out23.val[1];
+        return out;
+    }
+}
+
+#endif // WEBP_DSP_NEON_H_
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/rescaler.c b/codec/L2/demos/webpEnc/host/src/dsp/rescaler.c
new file mode 100644
index 0000000000..465a0a40e9
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/rescaler.c
@@ -0,0 +1,236 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "./dsp.h"
+#include "../utils/rescaler.h"
+
+//------------------------------------------------------------------------------
+// Implementations of critical functions ImportRow / ExportRow
+
+#define ROUNDER (WEBP_RESCALER_ONE >> 1)
+#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+
+//------------------------------------------------------------------------------
+// Row import
+
+void WebPRescalerImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {
+    const int x_stride = wrk->num_channels;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    int channel;
+    assert(!WebPRescalerInputDone(wrk));
+    assert(wrk->x_expand);
+    for (channel = 0; channel < x_stride; ++channel) {
+        int x_in = channel;
+        int x_out = channel;
+        // simple bilinear interpolation
+        int accum = wrk->x_add;
+        int left = src[x_in];
+        int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;
+        x_in += x_stride;
+        while (1) {
+            wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
+            x_out += x_stride;
+            if (x_out >= x_out_max) break;
+            accum -= wrk->x_sub;
+            if (accum < 0) {
+                left = right;
+                x_in += x_stride;
+                assert(x_in < wrk->src_width * x_stride);
+                right = src[x_in];
+                accum += wrk->x_add;
+            }
+        }
+        assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
+    }
+}
+
+void WebPRescalerImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {
+    const int x_stride = wrk->num_channels;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    int channel;
+    assert(!WebPRescalerInputDone(wrk));
+    assert(!wrk->x_expand);
+    for (channel = 0; channel < x_stride; ++channel) {
+        int x_in = channel;
+        int x_out = channel;
+        uint32_t sum = 0;
+        int accum = 0;
+        while (x_out < x_out_max) {
+            uint32_t base = 0;
+            accum += wrk->x_add;
+            while (accum > 0) {
+                accum -= wrk->x_sub;
+                assert(x_in < wrk->src_width * x_stride);
+                base = src[x_in];
+                sum += base;
+                x_in += x_stride;
+            }
+            { // Emit next horizontal pixel.
+                const rescaler_t frac = base * (-accum);
+                wrk->frow[x_out] = sum * wrk->x_sub - frac;
+                // fresh fractional start for next pixel
+                sum = (int)MULT_FIX(frac, wrk->fx_scale);
+            }
+            x_out += x_stride;
+        }
+        assert(accum == 0);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Row export
+
+void WebPRescalerExportRowExpandC(WebPRescaler* const wrk) {
+    int x_out;
+    uint8_t* const dst = wrk->dst;
+    rescaler_t* const irow = wrk->irow;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const rescaler_t* const frow = wrk->frow;
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0);
+    assert(wrk->y_expand);
+    assert(wrk->y_sub != 0);
+    if (wrk->y_accum == 0) {
+        for (x_out = 0; x_out < x_out_max; ++x_out) {
+            const uint32_t J = frow[x_out];
+            const int v = (int)MULT_FIX(J, wrk->fy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+        }
+    } else {
+        const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+        const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+        for (x_out = 0; x_out < x_out_max; ++x_out) {
+            const uint64_t I = (uint64_t)A * frow[x_out] + (uint64_t)B * irow[x_out];
+            const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
+            const int v = (int)MULT_FIX(J, wrk->fy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+        }
+    }
+}
+
+void WebPRescalerExportRowShrinkC(WebPRescaler* const wrk) {
+    int x_out;
+    uint8_t* const dst = wrk->dst;
+    rescaler_t* const irow = wrk->irow;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const rescaler_t* const frow = wrk->frow;
+    const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0);
+    assert(!wrk->y_expand);
+    if (yscale) {
+        for (x_out = 0; x_out < x_out_max; ++x_out) {
+            const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale);
+            const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+            irow[x_out] = frac; // new fractional start
+        }
+    } else {
+        for (x_out = 0; x_out < x_out_max; ++x_out) {
+            const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+            irow[x_out] = 0;
+        }
+    }
+}
+
+#undef MULT_FIX
+#undef ROUNDER
+
+//------------------------------------------------------------------------------
+// Main entry calls
+
+void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) {
+    assert(!WebPRescalerInputDone(wrk));
+    if (!wrk->x_expand) {
+        WebPRescalerImportRowShrink(wrk, src);
+    } else {
+        WebPRescalerImportRowExpand(wrk, src);
+    }
+}
+
+void WebPRescalerExportRow(WebPRescaler* const wrk) {
+    if (wrk->y_accum <= 0) {
+        assert(!WebPRescalerOutputDone(wrk));
+        if (wrk->y_expand) {
+            WebPRescalerExportRowExpand(wrk);
+        } else if (wrk->fxy_scale) {
+            WebPRescalerExportRowShrink(wrk);
+        } else { // very special case for src = dst = 1x1
+            int i;
+            assert(wrk->src_width == 1 && wrk->dst_width <= 2);
+            assert(wrk->src_height == 1 && wrk->dst_height == 1);
+            for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) {
+                wrk->dst[i] = wrk->irow[i];
+                wrk->irow[i] = 0;
+            }
+        }
+        wrk->y_accum += wrk->y_add;
+        wrk->dst += wrk->dst_stride;
+        ++wrk->dst_y;
+    }
+}
+
+//------------------------------------------------------------------------------
+
+WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
+WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
+
+WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
+WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
+
+extern void WebPRescalerDspInitSSE2(void);
+extern void WebPRescalerDspInitMIPS32(void);
+extern void WebPRescalerDspInitMIPSdspR2(void);
+extern void WebPRescalerDspInitNEON(void);
+
+static volatile VP8CPUInfo rescaler_last_cpuinfo_used = (VP8CPUInfo)&rescaler_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
+    if (rescaler_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    WebPRescalerImportRowExpand = WebPRescalerImportRowExpandC;
+    WebPRescalerImportRowShrink = WebPRescalerImportRowShrinkC;
+    WebPRescalerExportRowExpand = WebPRescalerExportRowExpandC;
+    WebPRescalerExportRowShrink = WebPRescalerExportRowShrinkC;
+
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            WebPRescalerDspInitSSE2();
+        }
+#endif
+#if defined(WEBP_USE_NEON)
+        if (VP8GetCPUInfo(kNEON)) {
+            WebPRescalerDspInitNEON();
+        }
+#endif
+#if defined(WEBP_USE_MIPS32)
+        if (VP8GetCPUInfo(kMIPS32)) {
+            WebPRescalerDspInitMIPS32();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            WebPRescalerDspInitMIPSdspR2();
+        }
+#endif
+    }
+    rescaler_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/rescaler_mips32.c b/codec/L2/demos/webpEnc/host/src/dsp/rescaler_mips32.c
new file mode 100644
index 0000000000..94d942949c
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/rescaler_mips32.c
@@ -0,0 +1,276 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of rescaling functions
+//
+// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include <assert.h>
+#include "../utils/rescaler.h"
+
+//------------------------------------------------------------------------------
+// Row import
+
+static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) {
+    const int x_stride = wrk->num_channels;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const int fx_scale = wrk->fx_scale;
+    const int x_add = wrk->x_add;
+    const int x_sub = wrk->x_sub;
+    const int x_stride1 = x_stride << 2;
+    int channel;
+    assert(!wrk->x_expand);
+    assert(!WebPRescalerInputDone(wrk));
+
+    for (channel = 0; channel < x_stride; ++channel) {
+        const uint8_t* src1 = src + channel;
+        rescaler_t* frow = wrk->frow + channel;
+        int temp1, temp2, temp3;
+        int base, frac, sum;
+        int accum, accum1;
+        int loop_c = x_out_max - channel;
+
+        __asm__ volatile(
+            "li     %[temp1],   0x8000                    \n\t"
+            "li     %[temp2],   0x10000                   \n\t"
+            "li     %[sum],     0                         \n\t"
+            "li     %[accum],   0                         \n\t"
+            "1:                                             \n\t"
+            "addu   %[accum],   %[accum],   %[x_add]      \n\t"
+            "li     %[base],    0                         \n\t"
+            "blez   %[accum],   3f                        \n\t"
+            "2:                                             \n\t"
+            "lbu    %[base],    0(%[src1])                \n\t"
+            "subu   %[accum],   %[accum],   %[x_sub]      \n\t"
+            "addu   %[src1],    %[src1],    %[x_stride]   \n\t"
+            "addu   %[sum],     %[sum],     %[base]       \n\t"
+            "bgtz   %[accum],   2b                        \n\t"
+            "3:                                             \n\t"
+            "negu   %[accum1],  %[accum]                  \n\t"
+            "mul    %[frac],    %[base],    %[accum1]     \n\t"
+            "mul    %[temp3],   %[sum],     %[x_sub]      \n\t"
+            "subu   %[loop_c],  %[loop_c],  %[x_stride]   \n\t"
+            "mult   %[temp1],   %[temp2]                  \n\t"
+            "maddu  %[frac],    %[fx_scale]               \n\t"
+            "mfhi   %[sum]                                \n\t"
+            "subu   %[temp3],   %[temp3],   %[frac]       \n\t"
+            "sw     %[temp3],   0(%[frow])                \n\t"
+            "addu   %[frow],    %[frow],    %[x_stride1]  \n\t"
+            "bgtz   %[loop_c],  1b                        \n\t"
+            : [accum] "=&r"(accum), [src1] "+r"(src1), [temp3] "=&r"(temp3), [sum] "=&r"(sum), [base] "=&r"(base),
+              [frac] "=&r"(frac), [frow] "+r"(frow), [accum1] "=&r"(accum1), [temp2] "=&r"(temp2), [temp1] "=&r"(temp1)
+            : [x_stride] "r"(x_stride), [fx_scale] "r"(fx_scale), [x_sub] "r"(x_sub), [x_add] "r"(x_add),
+              [loop_c] "r"(loop_c), [x_stride1] "r"(x_stride1)
+            : "memory", "hi", "lo");
+        assert(accum == 0);
+    }
+}
+
+static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) {
+    const int x_stride = wrk->num_channels;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const int x_add = wrk->x_add;
+    const int x_sub = wrk->x_sub;
+    const int src_width = wrk->src_width;
+    const int x_stride1 = x_stride << 2;
+    int channel;
+    assert(wrk->x_expand);
+    assert(!WebPRescalerInputDone(wrk));
+
+    for (channel = 0; channel < x_stride; ++channel) {
+        const uint8_t* src1 = src + channel;
+        rescaler_t* frow = wrk->frow + channel;
+        int temp1, temp2, temp3, temp4;
+        int frac;
+        int accum;
+        int x_out = channel;
+
+        __asm__ volatile(
+            "addiu  %[temp3],   %[src_width], -1            \n\t"
+            "lbu    %[temp2],   0(%[src1])                  \n\t"
+            "addu   %[src1],    %[src1],      %[x_stride]   \n\t"
+            "bgtz   %[temp3],   0f                          \n\t"
+            "addiu  %[temp1],   %[temp2],     0             \n\t"
+            "b      3f                                      \n\t"
+            "0:                                               \n\t"
+            "lbu    %[temp1],   0(%[src1])                  \n\t"
+            "3:                                               \n\t"
+            "addiu  %[accum],   %[x_add],     0             \n\t"
+            "1:                                               \n\t"
+            "subu   %[temp3],   %[temp2],     %[temp1]      \n\t"
+            "mul    %[temp3],   %[temp3],     %[accum]      \n\t"
+            "mul    %[temp4],   %[temp1],     %[x_add]      \n\t"
+            "addu   %[temp3],   %[temp4],     %[temp3]      \n\t"
+            "sw     %[temp3],   0(%[frow])                  \n\t"
+            "addu   %[frow],    %[frow],      %[x_stride1]  \n\t"
+            "addu   %[x_out],   %[x_out],     %[x_stride]   \n\t"
+            "subu   %[temp3],   %[x_out],     %[x_out_max]  \n\t"
+            "bgez   %[temp3],   2f                          \n\t"
+            "subu   %[accum],   %[accum],     %[x_sub]      \n\t"
+            "bgez   %[accum],   4f                          \n\t"
+            "addiu  %[temp2],   %[temp1],     0             \n\t"
+            "addu   %[src1],    %[src1],      %[x_stride]   \n\t"
+            "lbu    %[temp1],   0(%[src1])                  \n\t"
+            "addu   %[accum],   %[accum],     %[x_add]      \n\t"
+            "4:                                               \n\t"
+            "b      1b                                      \n\t"
+            "2:                                               \n\t"
+            : [src1] "+r"(src1), [accum] "=&r"(accum), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
+              [temp4] "=&r"(temp4), [x_out] "+r"(x_out), [frac] "=&r"(frac), [frow] "+r"(frow)
+            : [x_stride] "r"(x_stride), [x_add] "r"(x_add), [x_sub] "r"(x_sub), [x_stride1] "r"(x_stride1),
+              [src_width] "r"(src_width), [x_out_max] "r"(x_out_max)
+            : "memory", "hi", "lo");
+        assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Row export
+
+static void ExportRowExpand(WebPRescaler* const wrk) {
+    uint8_t* dst = wrk->dst;
+    rescaler_t* irow = wrk->irow;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const rescaler_t* frow = wrk->frow;
+    int temp0, temp1, temp3, temp4, temp5, loop_end;
+    const int temp2 = (int)wrk->fy_scale;
+    const int temp6 = x_out_max << 2;
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0);
+    assert(wrk->y_expand);
+    assert(wrk->y_sub != 0);
+    if (wrk->y_accum == 0) {
+        __asm__ volatile(
+            "li       %[temp3],    0x10000                    \n\t"
+            "li       %[temp4],    0x8000                     \n\t"
+            "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+            "1:                                                 \n\t"
+            "lw       %[temp0],    0(%[frow])                 \n\t"
+            "addiu    %[dst],      %[dst],      1             \n\t"
+            "addiu    %[frow],     %[frow],     4             \n\t"
+            "mult     %[temp3],    %[temp4]                   \n\t"
+            "maddu    %[temp0],    %[temp2]                   \n\t"
+            "mfhi     %[temp5]                                \n\t"
+            "sb       %[temp5],    -1(%[dst])                 \n\t"
+            "bne      %[frow],     %[loop_end], 1b            \n\t"
+            : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+              [temp5] "=&r"(temp5), [frow] "+r"(frow), [dst] "+r"(dst), [loop_end] "=&r"(loop_end)
+            : [temp2] "r"(temp2), [temp6] "r"(temp6)
+            : "memory", "hi", "lo");
+    } else {
+        const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+        const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+        __asm__ volatile(
+            "li       %[temp3],    0x10000                    \n\t"
+            "li       %[temp4],    0x8000                     \n\t"
+            "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+            "1:                                                 \n\t"
+            "lw       %[temp0],    0(%[frow])                 \n\t"
+            "lw       %[temp1],    0(%[irow])                 \n\t"
+            "addiu    %[dst],      %[dst],      1             \n\t"
+            "mult     %[temp3],    %[temp4]                   \n\t"
+            "maddu    %[A],        %[temp0]                   \n\t"
+            "maddu    %[B],        %[temp1]                   \n\t"
+            "addiu    %[frow],     %[frow],     4             \n\t"
+            "addiu    %[irow],     %[irow],     4             \n\t"
+            "mfhi     %[temp5]                                \n\t"
+            "mult     %[temp3],    %[temp4]                   \n\t"
+            "maddu    %[temp5],    %[temp2]                   \n\t"
+            "mfhi     %[temp5]                                \n\t"
+            "sb       %[temp5],    -1(%[dst])                 \n\t"
+            "bne      %[frow],     %[loop_end], 1b            \n\t"
+            : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+              [temp5] "=&r"(temp5), [frow] "+r"(frow), [irow] "+r"(irow), [dst] "+r"(dst), [loop_end] "=&r"(loop_end)
+            : [temp2] "r"(temp2), [temp6] "r"(temp6), [A] "r"(A), [B] "r"(B)
+            : "memory", "hi", "lo");
+    }
+}
+
+static void ExportRowShrink(WebPRescaler* const wrk) {
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    uint8_t* dst = wrk->dst;
+    rescaler_t* irow = wrk->irow;
+    const rescaler_t* frow = wrk->frow;
+    const int yscale = wrk->fy_scale * (-wrk->y_accum);
+    int temp0, temp1, temp3, temp4, temp5, loop_end;
+    const int temp2 = (int)wrk->fxy_scale;
+    const int temp6 = x_out_max << 2;
+
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0);
+    assert(!wrk->y_expand);
+    assert(wrk->fxy_scale != 0);
+    if (yscale) {
+        __asm__ volatile(
+            "li       %[temp3],    0x10000                    \n\t"
+            "li       %[temp4],    0x8000                     \n\t"
+            "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+            "1:                                                 \n\t"
+            "lw       %[temp0],    0(%[frow])                 \n\t"
+            "mult     %[temp3],    %[temp4]                   \n\t"
+            "addiu    %[frow],     %[frow],     4             \n\t"
+            "maddu    %[temp0],    %[yscale]                  \n\t"
+            "mfhi     %[temp1]                                \n\t"
+            "lw       %[temp0],    0(%[irow])                 \n\t"
+            "addiu    %[dst],      %[dst],      1             \n\t"
+            "addiu    %[irow],     %[irow],     4             \n\t"
+            "subu     %[temp0],    %[temp0],    %[temp1]      \n\t"
+            "mult     %[temp3],    %[temp4]                   \n\t"
+            "maddu    %[temp0],    %[temp2]                   \n\t"
+            "mfhi     %[temp5]                                \n\t"
+            "sw       %[temp1],    -4(%[irow])                \n\t"
+            "sb       %[temp5],    -1(%[dst])                 \n\t"
+            "bne      %[frow],     %[loop_end], 1b            \n\t"
+            : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+              [temp5] "=&r"(temp5), [frow] "+r"(frow), [irow] "+r"(irow), [dst] "+r"(dst), [loop_end] "=&r"(loop_end)
+            : [temp2] "r"(temp2), [yscale] "r"(yscale), [temp6] "r"(temp6)
+            : "memory", "hi", "lo");
+    } else {
+        __asm__ volatile(
+            "li       %[temp3],    0x10000                    \n\t"
+            "li       %[temp4],    0x8000                     \n\t"
+            "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
+            "1:                                                 \n\t"
+            "lw       %[temp0],    0(%[irow])                 \n\t"
+            "addiu    %[dst],      %[dst],      1             \n\t"
+            "addiu    %[irow],     %[irow],     4             \n\t"
+            "mult     %[temp3],    %[temp4]                   \n\t"
+            "maddu    %[temp0],    %[temp2]                   \n\t"
+            "mfhi     %[temp5]                                \n\t"
+            "sw       $zero,       -4(%[irow])                \n\t"
+            "sb       %[temp5],    -1(%[dst])                 \n\t"
+            "bne      %[irow],     %[loop_end], 1b            \n\t"
+            : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+              [temp5] "=&r"(temp5), [irow] "+r"(irow), [dst] "+r"(dst), [loop_end] "=&r"(loop_end)
+            : [temp2] "r"(temp2), [temp6] "r"(temp6)
+            : "memory", "hi", "lo");
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPRescalerDspInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPS32(void) {
+    WebPRescalerImportRowExpand = ImportRowExpand;
+    WebPRescalerImportRowShrink = ImportRowShrink;
+    WebPRescalerExportRowExpand = ExportRowExpand;
+    WebPRescalerExportRowShrink = ExportRowShrink;
+}
+
+#else // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPS32)
+
+#endif // WEBP_USE_MIPS32
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/rescaler_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/rescaler_mips_dsp_r2.c
new file mode 100644
index 0000000000..c3e18a1759
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/rescaler_mips_dsp_r2.c
@@ -0,0 +1,303 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of rescaling functions
+//
+// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include <assert.h>
+#include "../utils/rescaler.h"
+
+#define ROUNDER (WEBP_RESCALER_ONE >> 1)
+#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+
+//------------------------------------------------------------------------------
+// Row export
+
+static void ExportRowShrink(WebPRescaler* const wrk) {
+    int i;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    uint8_t* dst = wrk->dst;
+    rescaler_t* irow = wrk->irow;
+    const rescaler_t* frow = wrk->frow;
+    const int yscale = wrk->fy_scale * (-wrk->y_accum);
+    int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
+    const int temp7 = (int)wrk->fxy_scale;
+    const int temp6 = (x_out_max & ~0x3) << 2;
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0);
+    assert(!wrk->y_expand);
+    assert(wrk->fxy_scale != 0);
+    if (yscale) {
+        if (x_out_max >= 4) {
+            int temp8, temp9, temp10, temp11;
+            __asm__ volatile(
+                "li       %[temp3],    0x10000                    \n\t"
+                "li       %[temp4],    0x8000                     \n\t"
+                "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+                "1:                                                 \n\t"
+                "lw       %[temp0],    0(%[frow])                 \n\t"
+                "lw       %[temp1],    4(%[frow])                 \n\t"
+                "lw       %[temp2],    8(%[frow])                 \n\t"
+                "lw       %[temp5],    12(%[frow])                \n\t"
+                "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac0,        %[temp0],    %[yscale]     \n\t"
+                "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac1,        %[temp1],    %[yscale]     \n\t"
+                "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac2,        %[temp2],    %[yscale]     \n\t"
+                "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac3,        %[temp5],    %[yscale]     \n\t"
+                "addiu    %[frow],     %[frow],     16            \n\t"
+                "mfhi     %[temp0],    $ac0                       \n\t"
+                "mfhi     %[temp1],    $ac1                       \n\t"
+                "mfhi     %[temp2],    $ac2                       \n\t"
+                "mfhi     %[temp5],    $ac3                       \n\t"
+                "lw       %[temp8],    0(%[irow])                 \n\t"
+                "lw       %[temp9],    4(%[irow])                 \n\t"
+                "lw       %[temp10],   8(%[irow])                 \n\t"
+                "lw       %[temp11],   12(%[irow])                \n\t"
+                "addiu    %[dst],      %[dst],      4             \n\t"
+                "addiu    %[irow],     %[irow],     16            \n\t"
+                "subu     %[temp8],    %[temp8],    %[temp0]      \n\t"
+                "subu     %[temp9],    %[temp9],    %[temp1]      \n\t"
+                "subu     %[temp10],   %[temp10],   %[temp2]      \n\t"
+                "subu     %[temp11],   %[temp11],   %[temp5]      \n\t"
+                "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac0,        %[temp8],    %[temp7]      \n\t"
+                "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac1,        %[temp9],    %[temp7]      \n\t"
+                "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac2,        %[temp10],   %[temp7]      \n\t"
+                "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac3,        %[temp11],   %[temp7]      \n\t"
+                "mfhi     %[temp8],    $ac0                       \n\t"
+                "mfhi     %[temp9],    $ac1                       \n\t"
+                "mfhi     %[temp10],   $ac2                       \n\t"
+                "mfhi     %[temp11],   $ac3                       \n\t"
+                "sw       %[temp0],    -16(%[irow])               \n\t"
+                "sw       %[temp1],    -12(%[irow])               \n\t"
+                "sw       %[temp2],    -8(%[irow])                \n\t"
+                "sw       %[temp5],    -4(%[irow])                \n\t"
+                "sb       %[temp8],    -4(%[dst])                 \n\t"
+                "sb       %[temp9],    -3(%[dst])                 \n\t"
+                "sb       %[temp10],   -2(%[dst])                 \n\t"
+                "sb       %[temp11],   -1(%[dst])                 \n\t"
+                "bne      %[frow],     %[loop_end], 1b            \n\t"
+                : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+                  [temp5] "=&r"(temp5), [frow] "+r"(frow), [irow] "+r"(irow), [dst] "+r"(dst),
+                  [loop_end] "=&r"(loop_end), [temp8] "=&r"(temp8), [temp9] "=&r"(temp9), [temp10] "=&r"(temp10),
+                  [temp11] "=&r"(temp11), [temp2] "=&r"(temp2)
+                : [temp7] "r"(temp7), [yscale] "r"(yscale), [temp6] "r"(temp6)
+                : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo");
+        }
+        for (i = 0; i < (x_out_max & 0x3); ++i) {
+            const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale);
+            const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
+            assert(v >= 0 && v <= 255);
+            *dst++ = v;
+            *irow++ = frac; // new fractional start
+        }
+    } else {
+        if (x_out_max >= 4) {
+            __asm__ volatile(
+                "li       %[temp3],    0x10000                    \n\t"
+                "li       %[temp4],    0x8000                     \n\t"
+                "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
+                "1:                                                 \n\t"
+                "lw       %[temp0],    0(%[irow])                 \n\t"
+                "lw       %[temp1],    4(%[irow])                 \n\t"
+                "lw       %[temp2],    8(%[irow])                 \n\t"
+                "lw       %[temp5],    12(%[irow])                \n\t"
+                "addiu    %[dst],      %[dst],      4             \n\t"
+                "addiu    %[irow],     %[irow],     16            \n\t"
+                "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
+                "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
+                "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
+                "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
+                "maddu    $ac3,        %[temp5],    %[temp7]      \n\t"
+                "mfhi     %[temp0],    $ac0                       \n\t"
+                "mfhi     %[temp1],    $ac1                       \n\t"
+                "mfhi     %[temp2],    $ac2                       \n\t"
+                "mfhi     %[temp5],    $ac3                       \n\t"
+                "sw       $zero,       -16(%[irow])               \n\t"
+                "sw       $zero,       -12(%[irow])               \n\t"
+                "sw       $zero,       -8(%[irow])                \n\t"
+                "sw       $zero,       -4(%[irow])                \n\t"
+                "sb       %[temp0],    -4(%[dst])                 \n\t"
+                "sb       %[temp1],    -3(%[dst])                 \n\t"
+                "sb       %[temp2],    -2(%[dst])                 \n\t"
+                "sb       %[temp5],    -1(%[dst])                 \n\t"
+                "bne      %[irow],     %[loop_end], 1b            \n\t"
+                : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+                  [temp5] "=&r"(temp5), [irow] "+r"(irow), [dst] "+r"(dst), [loop_end] "=&r"(loop_end),
+                  [temp2] "=&r"(temp2)
+                : [temp7] "r"(temp7), [temp6] "r"(temp6)
+                : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo");
+        }
+        for (i = 0; i < (x_out_max & 0x3); ++i) {
+            const int v = (int)MULT_FIX(*irow, wrk->fxy_scale);
+            assert(v >= 0 && v <= 255);
+            *dst++ = v;
+            *irow++ = 0;
+        }
+    }
+}
+
+static void ExportRowExpand(WebPRescaler* const wrk) {
+    int i;
+    uint8_t* dst = wrk->dst;
+    rescaler_t* irow = wrk->irow;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const rescaler_t* frow = wrk->frow;
+    int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
+    const int temp6 = (x_out_max & ~0x3) << 2;
+    const int temp7 = (int)wrk->fy_scale;
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0);
+    assert(wrk->y_expand);
+    assert(wrk->y_sub != 0);
+    if (wrk->y_accum == 0) {
+        if (x_out_max >= 4) {
+            __asm__ volatile(
+                "li       %[temp4],    0x10000                    \n\t"
+                "li       %[temp5],    0x8000                     \n\t"
+                "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+                "1:                                                 \n\t"
+                "lw       %[temp0],    0(%[frow])                 \n\t"
+                "lw       %[temp1],    4(%[frow])                 \n\t"
+                "lw       %[temp2],    8(%[frow])                 \n\t"
+                "lw       %[temp3],    12(%[frow])                \n\t"
+                "addiu    %[dst],      %[dst],      4             \n\t"
+                "addiu    %[frow],     %[frow],     16            \n\t"
+                "mult     $ac0,        %[temp4],    %[temp5]      \n\t"
+                "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
+                "mult     $ac1,        %[temp4],    %[temp5]      \n\t"
+                "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
+                "mult     $ac2,        %[temp4],    %[temp5]      \n\t"
+                "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
+                "mult     $ac3,        %[temp4],    %[temp5]      \n\t"
+                "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
+                "mfhi     %[temp0],    $ac0                       \n\t"
+                "mfhi     %[temp1],    $ac1                       \n\t"
+                "mfhi     %[temp2],    $ac2                       \n\t"
+                "mfhi     %[temp3],    $ac3                       \n\t"
+                "sb       %[temp0],    -4(%[dst])                 \n\t"
+                "sb       %[temp1],    -3(%[dst])                 \n\t"
+                "sb       %[temp2],    -2(%[dst])                 \n\t"
+                "sb       %[temp3],    -1(%[dst])                 \n\t"
+                "bne      %[frow],     %[loop_end], 1b            \n\t"
+                : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+                  [temp5] "=&r"(temp5), [frow] "+r"(frow), [dst] "+r"(dst), [loop_end] "=&r"(loop_end),
+                  [temp2] "=&r"(temp2)
+                : [temp7] "r"(temp7), [temp6] "r"(temp6)
+                : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo");
+        }
+        for (i = 0; i < (x_out_max & 0x3); ++i) {
+            const uint32_t J = *frow++;
+            const int v = (int)MULT_FIX(J, wrk->fy_scale);
+            assert(v >= 0 && v <= 255);
+            *dst++ = v;
+        }
+    } else {
+        const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+        const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+        if (x_out_max >= 4) {
+            int temp8, temp9, temp10, temp11;
+            __asm__ volatile(
+                "li       %[temp8],    0x10000                    \n\t"
+                "li       %[temp9],    0x8000                     \n\t"
+                "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+                "1:                                                 \n\t"
+                "lw       %[temp0],    0(%[frow])                 \n\t"
+                "lw       %[temp1],    4(%[frow])                 \n\t"
+                "lw       %[temp2],    8(%[frow])                 \n\t"
+                "lw       %[temp3],    12(%[frow])                \n\t"
+                "lw       %[temp4],    0(%[irow])                 \n\t"
+                "lw       %[temp5],    4(%[irow])                 \n\t"
+                "lw       %[temp10],   8(%[irow])                 \n\t"
+                "lw       %[temp11],   12(%[irow])                \n\t"
+                "addiu    %[dst],      %[dst],      4             \n\t"
+                "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
+                "maddu    $ac0,        %[A],        %[temp0]      \n\t"
+                "maddu    $ac0,        %[B],        %[temp4]      \n\t"
+                "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
+                "maddu    $ac1,        %[A],        %[temp1]      \n\t"
+                "maddu    $ac1,        %[B],        %[temp5]      \n\t"
+                "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
+                "maddu    $ac2,        %[A],        %[temp2]      \n\t"
+                "maddu    $ac2,        %[B],        %[temp10]     \n\t"
+                "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
+                "maddu    $ac3,        %[A],        %[temp3]      \n\t"
+                "maddu    $ac3,        %[B],        %[temp11]     \n\t"
+                "addiu    %[frow],     %[frow],     16            \n\t"
+                "addiu    %[irow],     %[irow],     16            \n\t"
+                "mfhi     %[temp0],    $ac0                       \n\t"
+                "mfhi     %[temp1],    $ac1                       \n\t"
+                "mfhi     %[temp2],    $ac2                       \n\t"
+                "mfhi     %[temp3],    $ac3                       \n\t"
+                "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
+                "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
+                "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
+                "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
+                "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
+                "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
+                "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
+                "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
+                "mfhi     %[temp0],    $ac0                       \n\t"
+                "mfhi     %[temp1],    $ac1                       \n\t"
+                "mfhi     %[temp2],    $ac2                       \n\t"
+                "mfhi     %[temp3],    $ac3                       \n\t"
+                "sb       %[temp0],    -4(%[dst])                 \n\t"
+                "sb       %[temp1],    -3(%[dst])                 \n\t"
+                "sb       %[temp2],    -2(%[dst])                 \n\t"
+                "sb       %[temp3],    -1(%[dst])                 \n\t"
+                "bne      %[frow],     %[loop_end], 1b            \n\t"
+                : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4),
+                  [temp5] "=&r"(temp5), [frow] "+r"(frow), [irow] "+r"(irow), [dst] "+r"(dst),
+                  [loop_end] "=&r"(loop_end), [temp8] "=&r"(temp8), [temp9] "=&r"(temp9), [temp10] "=&r"(temp10),
+                  [temp11] "=&r"(temp11), [temp2] "=&r"(temp2)
+                : [temp7] "r"(temp7), [temp6] "r"(temp6), [A] "r"(A), [B] "r"(B)
+                : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo");
+        }
+        for (i = 0; i < (x_out_max & 0x3); ++i) {
+            const uint64_t I = (uint64_t)A * *frow++ + (uint64_t)B * *irow++;
+            const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
+            const int v = (int)MULT_FIX(J, wrk->fy_scale);
+            assert(v >= 0 && v <= 255);
+            *dst++ = v;
+        }
+    }
+}
+
+#undef MULT_FIX
+#undef ROUNDER
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPRescalerDspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
+    WebPRescalerExportRowExpand = ExportRowExpand;
+    WebPRescalerExportRowShrink = ExportRowShrink;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/rescaler_neon.c b/codec/L2/demos/webpEnc/host/src/dsp/rescaler_neon.c
new file mode 100644
index 0000000000..14e56f750b
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/rescaler_neon.c
@@ -0,0 +1,180 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON version of rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <arm_neon.h>
+#include <assert.h>
+#include "./neon.h"
+#include "../utils/rescaler.h"
+
+#define ROUNDER (WEBP_RESCALER_ONE >> 1)
+#define MULT_FIX_C(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+
+#define LOAD_32x4(SRC, DST) const uint32x4_t DST = vld1q_u32((SRC))
+#define LOAD_32x8(SRC, DST0, DST1) \
+    LOAD_32x4(SRC + 0, DST0);      \
+    LOAD_32x4(SRC + 4, DST1)
+
+#define STORE_32x8(SRC0, SRC1, DST) \
+    do {                            \
+        vst1q_u32((DST) + 0, SRC0); \
+        vst1q_u32((DST) + 4, SRC1); \
+    } while (0);
+
+#if (WEBP_RESCALER_RFIX == 32)
+#define MAKE_HALF_CST(C) vdupq_n_s32((int32_t)((C) >> 1))
+#define MULT_FIX(A, B) /* note: B is actualy scale>>1. See MAKE_HALF_CST */ \
+    vreinterpretq_u32_s32(vqrdmulhq_s32(vreinterpretq_s32_u32((A)), (B)))
+#else
+#error "MULT_FIX/WEBP_RESCALER_RFIX need some more work"
+#endif
+
+static uint32x4_t Interpolate(const rescaler_t* const frow, const rescaler_t* const irow, uint32_t A, uint32_t B) {
+    LOAD_32x4(frow, A0);
+    LOAD_32x4(irow, B0);
+    const uint64x2_t C0 = vmull_n_u32(vget_low_u32(A0), A);
+    const uint64x2_t C1 = vmull_n_u32(vget_high_u32(A0), A);
+    const uint64x2_t D0 = vmlal_n_u32(C0, vget_low_u32(B0), B);
+    const uint64x2_t D1 = vmlal_n_u32(C1, vget_high_u32(B0), B);
+    const uint32x4_t E = vcombine_u32(vrshrn_n_u64(D0, WEBP_RESCALER_RFIX), vrshrn_n_u64(D1, WEBP_RESCALER_RFIX));
+    return E;
+}
+
+static void RescalerExportRowExpand(WebPRescaler* const wrk) {
+    int x_out;
+    uint8_t* const dst = wrk->dst;
+    rescaler_t* const irow = wrk->irow;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const int max_span = x_out_max & ~7;
+    const rescaler_t* const frow = wrk->frow;
+    const uint32_t fy_scale = wrk->fy_scale;
+    const int32x4_t fy_scale_half = MAKE_HALF_CST(fy_scale);
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0);
+    assert(wrk->y_expand);
+    assert(wrk->y_sub != 0);
+    if (wrk->y_accum == 0) {
+        for (x_out = 0; x_out < max_span; x_out += 8) {
+            LOAD_32x4(frow + x_out + 0, A0);
+            LOAD_32x4(frow + x_out + 4, A1);
+            const uint32x4_t B0 = MULT_FIX(A0, fy_scale_half);
+            const uint32x4_t B1 = MULT_FIX(A1, fy_scale_half);
+            const uint16x4_t C0 = vmovn_u32(B0);
+            const uint16x4_t C1 = vmovn_u32(B1);
+            const uint8x8_t D = vmovn_u16(vcombine_u16(C0, C1));
+            vst1_u8(dst + x_out, D);
+        }
+        for (; x_out < x_out_max; ++x_out) {
+            const uint32_t J = frow[x_out];
+            const int v = (int)MULT_FIX_C(J, fy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+        }
+    } else {
+        const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+        const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+        for (x_out = 0; x_out < max_span; x_out += 8) {
+            const uint32x4_t C0 = Interpolate(frow + x_out + 0, irow + x_out + 0, A, B);
+            const uint32x4_t C1 = Interpolate(frow + x_out + 4, irow + x_out + 4, A, B);
+            const uint32x4_t D0 = MULT_FIX(C0, fy_scale_half);
+            const uint32x4_t D1 = MULT_FIX(C1, fy_scale_half);
+            const uint16x4_t E0 = vmovn_u32(D0);
+            const uint16x4_t E1 = vmovn_u32(D1);
+            const uint8x8_t F = vmovn_u16(vcombine_u16(E0, E1));
+            vst1_u8(dst + x_out, F);
+        }
+        for (; x_out < x_out_max; ++x_out) {
+            const uint64_t I = (uint64_t)A * frow[x_out] + (uint64_t)B * irow[x_out];
+            const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
+            const int v = (int)MULT_FIX_C(J, fy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+        }
+    }
+}
+
+static void RescalerExportRowShrink(WebPRescaler* const wrk) {
+    int x_out;
+    uint8_t* const dst = wrk->dst;
+    rescaler_t* const irow = wrk->irow;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const int max_span = x_out_max & ~7;
+    const rescaler_t* const frow = wrk->frow;
+    const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
+    const uint32_t fxy_scale = wrk->fxy_scale;
+    const uint32x4_t zero = vdupq_n_u32(0);
+    const int32x4_t yscale_half = MAKE_HALF_CST(yscale);
+    const int32x4_t fxy_scale_half = MAKE_HALF_CST(fxy_scale);
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0);
+    assert(!wrk->y_expand);
+    if (yscale) {
+        for (x_out = 0; x_out < max_span; x_out += 8) {
+            LOAD_32x8(frow + x_out, in0, in1);
+            LOAD_32x8(irow + x_out, in2, in3);
+            const uint32x4_t A0 = MULT_FIX(in0, yscale_half);
+            const uint32x4_t A1 = MULT_FIX(in1, yscale_half);
+            const uint32x4_t B0 = vqsubq_u32(in2, A0);
+            const uint32x4_t B1 = vqsubq_u32(in3, A1);
+            const uint32x4_t C0 = MULT_FIX(B0, fxy_scale_half);
+            const uint32x4_t C1 = MULT_FIX(B1, fxy_scale_half);
+            const uint16x4_t D0 = vmovn_u32(C0);
+            const uint16x4_t D1 = vmovn_u32(C1);
+            const uint8x8_t E = vmovn_u16(vcombine_u16(D0, D1));
+            vst1_u8(dst + x_out, E);
+            STORE_32x8(A0, A1, irow + x_out);
+        }
+        for (; x_out < x_out_max; ++x_out) {
+            const uint32_t frac = (uint32_t)MULT_FIX_C(frow[x_out], yscale);
+            const int v = (int)MULT_FIX_C(irow[x_out] - frac, wrk->fxy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+            irow[x_out] = frac; // new fractional start
+        }
+    } else {
+        for (x_out = 0; x_out < max_span; x_out += 8) {
+            LOAD_32x8(irow + x_out, in0, in1);
+            const uint32x4_t A0 = MULT_FIX(in0, fxy_scale_half);
+            const uint32x4_t A1 = MULT_FIX(in1, fxy_scale_half);
+            const uint16x4_t B0 = vmovn_u32(A0);
+            const uint16x4_t B1 = vmovn_u32(A1);
+            const uint8x8_t C = vmovn_u16(vcombine_u16(B0, B1));
+            vst1_u8(dst + x_out, C);
+            STORE_32x8(zero, zero, irow + x_out);
+        }
+        for (; x_out < x_out_max; ++x_out) {
+            const int v = (int)MULT_FIX_C(irow[x_out], fxy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+            irow[x_out] = 0;
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+
+extern void WebPRescalerDspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitNEON(void) {
+    WebPRescalerExportRowExpand = RescalerExportRowExpand;
+    WebPRescalerExportRowShrink = RescalerExportRowShrink;
+}
+
+#else // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(WebPRescalerDspInitNEON)
+
+#endif // WEBP_USE_NEON
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/rescaler_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/rescaler_sse2.c
new file mode 100644
index 0000000000..57af3e764c
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/rescaler_sse2.c
@@ -0,0 +1,369 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>
+
+#include <assert.h>
+#include "../utils/rescaler.h"
+
+//------------------------------------------------------------------------------
+// Implementations of critical functions ImportRow / ExportRow
+
+#define ROUNDER (WEBP_RESCALER_ONE >> 1)
+#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+
+// input: 8 bytes ABCDEFGH -> output: A0E0B0F0C0G0D0H0
+static void LoadTwoPixels(const uint8_t* const src, __m128i* out) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH
+    const __m128i B = _mm_unpacklo_epi8(A, zero);             // A0B0C0D0E0F0G0H0
+    const __m128i C = _mm_srli_si128(B, 8);                   // E0F0G0H0
+    *out = _mm_unpacklo_epi16(B, C);
+}
+
+// input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
+static void LoadHeightPixels(const uint8_t* const src, __m128i* out) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH
+    *out = _mm_unpacklo_epi8(A, zero);
+}
+
+static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk, const uint8_t* src) {
+    rescaler_t* frow = wrk->frow;
+    const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels;
+    const int x_add = wrk->x_add;
+    int accum = x_add;
+    __m128i cur_pixels;
+
+    assert(!WebPRescalerInputDone(wrk));
+    assert(wrk->x_expand);
+    if (wrk->num_channels == 4) {
+        if (wrk->src_width < 2) {
+            WebPRescalerImportRowExpandC(wrk, src);
+            return;
+        }
+        LoadTwoPixels(src, &cur_pixels);
+        src += 4;
+        while (1) {
+            const __m128i mult = _mm_set1_epi32(((x_add - accum) << 16) | accum);
+            const __m128i out = _mm_madd_epi16(cur_pixels, mult);
+            _mm_storeu_si128((__m128i*)frow, out);
+            frow += 4;
+            if (frow >= frow_end) break;
+            accum -= wrk->x_sub;
+            if (accum < 0) {
+                LoadTwoPixels(src, &cur_pixels);
+                src += 4;
+                accum += x_add;
+            }
+        }
+    } else {
+        int left;
+        const uint8_t* const src_limit = src + wrk->src_width - 8;
+        if (wrk->src_width < 8) {
+            WebPRescalerImportRowExpandC(wrk, src);
+            return;
+        }
+        LoadHeightPixels(src, &cur_pixels);
+        src += 7;
+        left = 7;
+        while (1) {
+            const __m128i mult = _mm_cvtsi32_si128(((x_add - accum) << 16) | accum);
+            const __m128i out = _mm_madd_epi16(cur_pixels, mult);
+            assert(sizeof(*frow) == sizeof(uint32_t));
+            WebPUint32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out));
+            frow += 1;
+            if (frow >= frow_end) break;
+            accum -= wrk->x_sub;
+            if (accum < 0) {
+                if (--left) {
+                    cur_pixels = _mm_srli_si128(cur_pixels, 2);
+                } else if (src <= src_limit) {
+                    LoadHeightPixels(src, &cur_pixels);
+                    src += 7;
+                    left = 7;
+                } else { // tail
+                    cur_pixels = _mm_srli_si128(cur_pixels, 2);
+                    cur_pixels = _mm_insert_epi16(cur_pixels, src[1], 1);
+                    src += 1;
+                    left = 1;
+                }
+                accum += x_add;
+            }
+        }
+    }
+    assert(accum == 0);
+}
+
+static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk, const uint8_t* src) {
+    const int x_sub = wrk->x_sub;
+    int accum = 0;
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i mult0 = _mm_set1_epi16(x_sub);
+    const __m128i mult1 = _mm_set1_epi32(wrk->fx_scale);
+    const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
+    __m128i sum = zero;
+    rescaler_t* frow = wrk->frow;
+    const rescaler_t* const frow_end = wrk->frow + 4 * wrk->dst_width;
+
+    if (wrk->num_channels != 4 || wrk->x_add > (x_sub << 7)) {
+        WebPRescalerImportRowShrinkC(wrk, src);
+        return;
+    }
+    assert(!WebPRescalerInputDone(wrk));
+    assert(!wrk->x_expand);
+
+    for (; frow < frow_end; frow += 4) {
+        __m128i base = zero;
+        accum += wrk->x_add;
+        while (accum > 0) {
+            const __m128i A = _mm_cvtsi32_si128(WebPMemToUint32(src));
+            src += 4;
+            base = _mm_unpacklo_epi8(A, zero);
+            // To avoid overflow, we need: base * x_add / x_sub < 32768
+            // => x_add < x_sub << 7. That's a 1/128 reduction ratio limit.
+            sum = _mm_add_epi16(sum, base);
+            accum -= x_sub;
+        }
+        { // Emit next horizontal pixel.
+            const __m128i mult = _mm_set1_epi16(-accum);
+            const __m128i frac0 = _mm_mullo_epi16(base, mult); // 16b x 16b -> 32b
+            const __m128i frac1 = _mm_mulhi_epu16(base, mult);
+            const __m128i frac = _mm_unpacklo_epi16(frac0, frac1); // frac is 32b
+            const __m128i A0 = _mm_mullo_epi16(sum, mult0);
+            const __m128i A1 = _mm_mulhi_epu16(sum, mult0);
+            const __m128i B0 = _mm_unpacklo_epi16(A0, A1);    // sum * x_sub
+            const __m128i frow_out = _mm_sub_epi32(B0, frac); // sum * x_sub - frac
+            const __m128i D0 = _mm_srli_epi64(frac, 32);
+            const __m128i D1 = _mm_mul_epu32(frac, mult1); // 32b x 16b -> 64b
+            const __m128i D2 = _mm_mul_epu32(D0, mult1);
+            const __m128i E1 = _mm_add_epi64(D1, rounder);
+            const __m128i E2 = _mm_add_epi64(D2, rounder);
+            const __m128i F1 = _mm_shuffle_epi32(E1, 1 | (3 << 2));
+            const __m128i F2 = _mm_shuffle_epi32(E2, 1 | (3 << 2));
+            const __m128i G = _mm_unpacklo_epi32(F1, F2);
+            sum = _mm_packs_epi32(G, zero);
+            _mm_storeu_si128((__m128i*)frow, frow_out);
+        }
+    }
+    assert(accum == 0);
+}
+
+//------------------------------------------------------------------------------
+// Row export
+
+// load *src as epi64, multiply by mult and store result in [out0 ... out3]
+static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src,
+                                            const __m128i* const mult,
+                                            __m128i* const out0,
+                                            __m128i* const out1,
+                                            __m128i* const out2,
+                                            __m128i* const out3) {
+    const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 0));
+    const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4));
+    const __m128i A2 = _mm_srli_epi64(A0, 32);
+    const __m128i A3 = _mm_srli_epi64(A1, 32);
+    if (mult != NULL) {
+        *out0 = _mm_mul_epu32(A0, *mult);
+        *out1 = _mm_mul_epu32(A1, *mult);
+        *out2 = _mm_mul_epu32(A2, *mult);
+        *out3 = _mm_mul_epu32(A3, *mult);
+    } else {
+        *out0 = A0;
+        *out1 = A1;
+        *out2 = A2;
+        *out3 = A3;
+    }
+}
+
+static WEBP_INLINE void ProcessRow(const __m128i* const A0,
+                                   const __m128i* const A1,
+                                   const __m128i* const A2,
+                                   const __m128i* const A3,
+                                   const __m128i* const mult,
+                                   uint8_t* const dst) {
+    const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
+    const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
+    const __m128i B0 = _mm_mul_epu32(*A0, *mult);
+    const __m128i B1 = _mm_mul_epu32(*A1, *mult);
+    const __m128i B2 = _mm_mul_epu32(*A2, *mult);
+    const __m128i B3 = _mm_mul_epu32(*A3, *mult);
+    const __m128i C0 = _mm_add_epi64(B0, rounder);
+    const __m128i C1 = _mm_add_epi64(B1, rounder);
+    const __m128i C2 = _mm_add_epi64(B2, rounder);
+    const __m128i C3 = _mm_add_epi64(B3, rounder);
+    const __m128i D0 = _mm_srli_epi64(C0, WEBP_RESCALER_RFIX);
+    const __m128i D1 = _mm_srli_epi64(C1, WEBP_RESCALER_RFIX);
+#if (WEBP_RESCALER_FIX < 32)
+    const __m128i D2 = _mm_and_si128(_mm_slli_epi64(C2, 32 - WEBP_RESCALER_RFIX), mask);
+    const __m128i D3 = _mm_and_si128(_mm_slli_epi64(C3, 32 - WEBP_RESCALER_RFIX), mask);
+#else
+    const __m128i D2 = _mm_and_si128(C2, mask);
+    const __m128i D3 = _mm_and_si128(C3, mask);
+#endif
+    const __m128i E0 = _mm_or_si128(D0, D2);
+    const __m128i E1 = _mm_or_si128(D1, D3);
+    const __m128i F = _mm_packs_epi32(E0, E1);
+    const __m128i G = _mm_packus_epi16(F, F);
+    _mm_storel_epi64((__m128i*)dst, G);
+}
+
+static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
+    int x_out;
+    uint8_t* const dst = wrk->dst;
+    rescaler_t* const irow = wrk->irow;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const rescaler_t* const frow = wrk->frow;
+    const __m128i mult = _mm_set_epi32(0, wrk->fy_scale, 0, wrk->fy_scale);
+
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0 && wrk->y_sub + wrk->y_accum >= 0);
+    assert(wrk->y_expand);
+    if (wrk->y_accum == 0) {
+        for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
+            __m128i A0, A1, A2, A3;
+            LoadDispatchAndMult(frow + x_out, NULL, &A0, &A1, &A2, &A3);
+            ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
+        }
+        for (; x_out < x_out_max; ++x_out) {
+            const uint32_t J = frow[x_out];
+            const int v = (int)MULT_FIX(J, wrk->fy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+        }
+    } else {
+        const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+        const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+        const __m128i mA = _mm_set_epi32(0, A, 0, A);
+        const __m128i mB = _mm_set_epi32(0, B, 0, B);
+        const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
+        for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
+            __m128i A0, A1, A2, A3, B0, B1, B2, B3;
+            LoadDispatchAndMult(frow + x_out, &mA, &A0, &A1, &A2, &A3);
+            LoadDispatchAndMult(irow + x_out, &mB, &B0, &B1, &B2, &B3);
+            {
+                const __m128i C0 = _mm_add_epi64(A0, B0);
+                const __m128i C1 = _mm_add_epi64(A1, B1);
+                const __m128i C2 = _mm_add_epi64(A2, B2);
+                const __m128i C3 = _mm_add_epi64(A3, B3);
+                const __m128i D0 = _mm_add_epi64(C0, rounder);
+                const __m128i D1 = _mm_add_epi64(C1, rounder);
+                const __m128i D2 = _mm_add_epi64(C2, rounder);
+                const __m128i D3 = _mm_add_epi64(C3, rounder);
+                const __m128i E0 = _mm_srli_epi64(D0, WEBP_RESCALER_RFIX);
+                const __m128i E1 = _mm_srli_epi64(D1, WEBP_RESCALER_RFIX);
+                const __m128i E2 = _mm_srli_epi64(D2, WEBP_RESCALER_RFIX);
+                const __m128i E3 = _mm_srli_epi64(D3, WEBP_RESCALER_RFIX);
+                ProcessRow(&E0, &E1, &E2, &E3, &mult, dst + x_out);
+            }
+        }
+        for (; x_out < x_out_max; ++x_out) {
+            const uint64_t I = (uint64_t)A * frow[x_out] + (uint64_t)B * irow[x_out];
+            const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
+            const int v = (int)MULT_FIX(J, wrk->fy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+        }
+    }
+}
+
+static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
+    int x_out;
+    uint8_t* const dst = wrk->dst;
+    rescaler_t* const irow = wrk->irow;
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    const rescaler_t* const frow = wrk->frow;
+    const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
+    assert(!WebPRescalerOutputDone(wrk));
+    assert(wrk->y_accum <= 0);
+    assert(!wrk->y_expand);
+    if (yscale) {
+        const int scale_xy = wrk->fxy_scale;
+        const __m128i mult_xy = _mm_set_epi32(0, scale_xy, 0, scale_xy);
+        const __m128i mult_y = _mm_set_epi32(0, yscale, 0, yscale);
+        const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
+        for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
+            __m128i A0, A1, A2, A3, B0, B1, B2, B3;
+            LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
+            LoadDispatchAndMult(frow + x_out, &mult_y, &B0, &B1, &B2, &B3);
+            {
+                const __m128i C0 = _mm_add_epi64(B0, rounder);
+                const __m128i C1 = _mm_add_epi64(B1, rounder);
+                const __m128i C2 = _mm_add_epi64(B2, rounder);
+                const __m128i C3 = _mm_add_epi64(B3, rounder);
+                const __m128i D0 = _mm_srli_epi64(C0, WEBP_RESCALER_RFIX); // = frac
+                const __m128i D1 = _mm_srli_epi64(C1, WEBP_RESCALER_RFIX);
+                const __m128i D2 = _mm_srli_epi64(C2, WEBP_RESCALER_RFIX);
+                const __m128i D3 = _mm_srli_epi64(C3, WEBP_RESCALER_RFIX);
+                const __m128i E0 = _mm_sub_epi64(A0, D0); // irow[x] - frac
+                const __m128i E1 = _mm_sub_epi64(A1, D1);
+                const __m128i E2 = _mm_sub_epi64(A2, D2);
+                const __m128i E3 = _mm_sub_epi64(A3, D3);
+                const __m128i F2 = _mm_slli_epi64(D2, 32);
+                const __m128i F3 = _mm_slli_epi64(D3, 32);
+                const __m128i G0 = _mm_or_si128(D0, F2);
+                const __m128i G1 = _mm_or_si128(D1, F3);
+                _mm_storeu_si128((__m128i*)(irow + x_out + 0), G0);
+                _mm_storeu_si128((__m128i*)(irow + x_out + 4), G1);
+                ProcessRow(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
+            }
+        }
+        for (; x_out < x_out_max; ++x_out) {
+            const uint32_t frac = (int)MULT_FIX(frow[x_out], yscale);
+            const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+            irow[x_out] = frac; // new fractional start
+        }
+    } else {
+        const uint32_t scale = wrk->fxy_scale;
+        const __m128i mult = _mm_set_epi32(0, scale, 0, scale);
+        const __m128i zero = _mm_setzero_si128();
+        for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
+            __m128i A0, A1, A2, A3;
+            LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
+            _mm_storeu_si128((__m128i*)(irow + x_out + 0), zero);
+            _mm_storeu_si128((__m128i*)(irow + x_out + 4), zero);
+            ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
+        }
+        for (; x_out < x_out_max; ++x_out) {
+            const int v = (int)MULT_FIX(irow[x_out], scale);
+            assert(v >= 0 && v <= 255);
+            dst[x_out] = v;
+            irow[x_out] = 0;
+        }
+    }
+}
+
+#undef MULT_FIX
+#undef ROUNDER
+
+//------------------------------------------------------------------------------
+
+extern void WebPRescalerDspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitSSE2(void) {
+    WebPRescalerImportRowExpand = RescalerImportRowExpandSSE2;
+    WebPRescalerImportRowShrink = RescalerImportRowShrinkSSE2;
+    WebPRescalerExportRowExpand = RescalerExportRowExpandSSE2;
+    WebPRescalerExportRowShrink = RescalerExportRowShrinkSSE2;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(WebPRescalerDspInitSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/upsampling.c b/codec/L2/demos/webpEnc/host/src/dsp/upsampling.c
new file mode 100644
index 0000000000..886aa67405
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/upsampling.c
@@ -0,0 +1,247 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV to RGB upsampling functions.
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+#include "./yuv.h"
+
+#include <assert.h>
+
+//------------------------------------------------------------------------------
+// Fancy upsampler
+
+#ifdef FANCY_UPSAMPLING
+
+// Fancy upsampling functions to convert YUV to RGB
+WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
+
+// Given samples laid out in a square as:
+//  [a b]
+//  [c d]
+// we interpolate u/v as:
+//  ([9*a + 3*b + 3*c +   d    3*a + 9*b + 3*c +   d] + [8 8]) / 16
+//  ([3*a +   b + 9*c + 3*d      a + 3*b + 3*c + 9*d]   [8 8]) / 16
+
+// We process u and v together stashed into 32bit (16bit each).
+#define LOAD_UV(u, v) ((u) | ((v) << 16))
+
+#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                                                        \
+    static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, const uint8_t* top_u, const uint8_t* top_v, \
+                          const uint8_t* cur_u, const uint8_t* cur_v, uint8_t* top_dst, uint8_t* bottom_dst,         \
+                          int len) {                                                                                 \
+        int x;                                                                                                       \
+        const int last_pixel_pair = (len - 1) >> 1;                                                                  \
+        uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */                                          \
+        uint32_t l_uv = LOAD_UV(cur_u[0], cur_v[0]);  /* left-sample */                                              \
+        assert(top_y != NULL);                                                                                       \
+        {                                                                                                            \
+            const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;                                              \
+            FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst);                                                        \
+        }                                                                                                            \
+        if (bottom_y != NULL) {                                                                                      \
+            const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;                                              \
+            FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst);                                                  \
+        }                                                                                                            \
+        for (x = 1; x <= last_pixel_pair; ++x) {                                                                     \
+            const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]); /* top sample */                                      \
+            const uint32_t uv = LOAD_UV(cur_u[x], cur_v[x]);   /* sample */                                            \
+            /* precompute invariant values associated with first and second diagonals*/                              \
+            const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u;                                             \
+            const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3;                                                 \
+            const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3;                                                  \
+            {                                                                                                        \
+                const uint32_t uv0 = (diag_12 + tl_uv) >> 1;                                                         \
+                const uint32_t uv1 = (diag_03 + t_uv) >> 1;                                                          \
+                FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), top_dst + (2 * x - 1) * XSTEP);                      \
+                FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16), top_dst + (2 * x - 0) * XSTEP);                      \
+            }                                                                                                        \
+            if (bottom_y != NULL) {                                                                                  \
+                const uint32_t uv0 = (diag_03 + l_uv) >> 1;                                                          \
+                const uint32_t uv1 = (diag_12 + uv) >> 1;                                                            \
+                FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), bottom_dst + (2 * x - 1) * XSTEP);                \
+                FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16), bottom_dst + (2 * x + 0) * XSTEP);                \
+            }                                                                                                        \
+            tl_uv = t_uv;                                                                                            \
+            l_uv = uv;                                                                                               \
+        }                                                                                                            \
+        if (!(len & 1)) {                                                                                            \
+            {                                                                                                        \
+                const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;                                          \
+                FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16), top_dst + (len - 1) * XSTEP);                          \
+            }                                                                                                        \
+            if (bottom_y != NULL) {                                                                                  \
+                const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;                                          \
+                FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16), bottom_dst + (len - 1) * XSTEP);                    \
+            }                                                                                                        \
+        }                                                                                                            \
+    }
+
+// All variants implemented.
+UPSAMPLE_FUNC(UpsampleRgbLinePair, VP8YuvToRgb, 3)
+UPSAMPLE_FUNC(UpsampleBgrLinePair, VP8YuvToBgr, 3)
+UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
+UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
+UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
+UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
+UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2)
+
+#undef LOAD_UV
+#undef UPSAMPLE_FUNC
+
+#endif // FANCY_UPSAMPLING
+
+//------------------------------------------------------------------------------
+
+#if !defined(FANCY_UPSAMPLING)
+#define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC)                                                                            \
+    static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, const uint8_t* top_u, const uint8_t* top_v,    \
+                          const uint8_t* bot_u, const uint8_t* bot_v, uint8_t* top_dst, uint8_t* bot_dst, int len) { \
+        const int half_len = len >> 1;                                                                               \
+        int x;                                                                                                       \
+        assert(top_dst != NULL);                                                                                     \
+        {                                                                                                            \
+            for (x = 0; x < half_len; ++x) {                                                                         \
+                FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x + 0);                                     \
+                FUNC(top_y[2 * x + 1], top_u[x], top_v[x], top_dst + 8 * x + 4);                                     \
+            }                                                                                                        \
+            if (len & 1) FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x);                                \
+        }                                                                                                            \
+        if (bot_dst != NULL) {                                                                                       \
+            for (x = 0; x < half_len; ++x) {                                                                         \
+                FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x + 0);                                     \
+                FUNC(bot_y[2 * x + 1], bot_u[x], bot_v[x], bot_dst + 8 * x + 4);                                     \
+            }                                                                                                        \
+            if (len & 1) FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x);                                \
+        }                                                                                                            \
+    }
+
+DUAL_SAMPLE_FUNC(DualLineSamplerBGRA, VP8YuvToBgra)
+DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb)
+#undef DUAL_SAMPLE_FUNC
+
+#endif // !FANCY_UPSAMPLING
+
+WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
+    WebPInitUpsamplers();
+    VP8YUVInit();
+#ifdef FANCY_UPSAMPLING
+    return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB];
+#else
+    return (alpha_is_last ? DualLineSamplerBGRA : DualLineSamplerARGB);
+#endif
+}
+
+//------------------------------------------------------------------------------
+// YUV444 converter
+
+#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP)                                                             \
+    extern void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len); \
+    void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) {       \
+        int i;                                                                                          \
+        for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]);                              \
+    }
+
+YUV444_FUNC(WebPYuv444ToRgbC, VP8YuvToRgb, 3)
+YUV444_FUNC(WebPYuv444ToBgrC, VP8YuvToBgr, 3)
+YUV444_FUNC(WebPYuv444ToRgbaC, VP8YuvToRgba, 4)
+YUV444_FUNC(WebPYuv444ToBgraC, VP8YuvToBgra, 4)
+YUV444_FUNC(WebPYuv444ToArgbC, VP8YuvToArgb, 4)
+YUV444_FUNC(WebPYuv444ToRgba4444C, VP8YuvToRgba4444, 2)
+YUV444_FUNC(WebPYuv444ToRgb565C, VP8YuvToRgb565, 2)
+
+#undef YUV444_FUNC
+
+WebPYUV444Converter WebPYUV444Converters[MODE_LAST];
+
+extern void WebPInitYUV444ConvertersMIPSdspR2(void);
+extern void WebPInitYUV444ConvertersSSE2(void);
+
+static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 = (VP8CPUInfo)&upsampling_last_cpuinfo_used1;
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) {
+    if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return;
+
+    WebPYUV444Converters[MODE_RGB] = WebPYuv444ToRgbC;
+    WebPYUV444Converters[MODE_RGBA] = WebPYuv444ToRgbaC;
+    WebPYUV444Converters[MODE_BGR] = WebPYuv444ToBgrC;
+    WebPYUV444Converters[MODE_BGRA] = WebPYuv444ToBgraC;
+    WebPYUV444Converters[MODE_ARGB] = WebPYuv444ToArgbC;
+    WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444C;
+    WebPYUV444Converters[MODE_RGB_565] = WebPYuv444ToRgb565C;
+    WebPYUV444Converters[MODE_rgbA] = WebPYuv444ToRgbaC;
+    WebPYUV444Converters[MODE_bgrA] = WebPYuv444ToBgraC;
+    WebPYUV444Converters[MODE_Argb] = WebPYuv444ToArgbC;
+    WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444C;
+
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            WebPInitYUV444ConvertersSSE2();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            WebPInitYUV444ConvertersMIPSdspR2();
+        }
+#endif
+    }
+    upsampling_last_cpuinfo_used1 = VP8GetCPUInfo;
+}
+
+//------------------------------------------------------------------------------
+// Main calls
+
+extern void WebPInitUpsamplersSSE2(void);
+extern void WebPInitUpsamplersNEON(void);
+extern void WebPInitUpsamplersMIPSdspR2(void);
+
+static volatile VP8CPUInfo upsampling_last_cpuinfo_used2 = (VP8CPUInfo)&upsampling_last_cpuinfo_used2;
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
+    if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return;
+
+#ifdef FANCY_UPSAMPLING
+    WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
+    WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
+    WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair;
+    WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
+    WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
+    WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
+    WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
+    WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
+    WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
+    WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
+    WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+
+    // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            WebPInitUpsamplersSSE2();
+        }
+#endif
+#if defined(WEBP_USE_NEON)
+        if (VP8GetCPUInfo(kNEON)) {
+            WebPInitUpsamplersNEON();
+        }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            WebPInitUpsamplersMIPSdspR2();
+        }
+#endif
+    }
+#endif // FANCY_UPSAMPLING
+    upsampling_last_cpuinfo_used2 = VP8GetCPUInfo;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/upsampling_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/upsampling_mips_dsp_r2.c
new file mode 100644
index 0000000000..a217e9bd5d
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/upsampling_mips_dsp_r2.c
@@ -0,0 +1,283 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV to RGB upsampling functions.
+//
+// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
+//            Djordje Pesut  (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+// Code is disabled for now, in favor of the plain-C version
+// TODO(djordje.pesut): adapt the code to reflect the C-version.
+#if 0 // defined(WEBP_USE_MIPS_DSP_R2)
+
+#include <assert.h>
+#include "./yuv.h"
+
+#if !defined(WEBP_YUV_USE_TABLE)
+
+#define YUV_TO_RGB(Y, U, V, R, G, B)                                                          \
+    do {                                                                                      \
+        const int t1 = kYScale * Y;                                                           \
+        const int t2 = kVToG * V;                                                             \
+        R = kVToR * V;                                                                        \
+        G = kUToG * U;                                                                        \
+        B = kUToB * U;                                                                        \
+        R = t1 + R;                                                                           \
+        G = t1 - G;                                                                           \
+        B = t1 + B;                                                                           \
+        R = R + kRCst;                                                                        \
+        G = G - t2 + kGCst;                                                                   \
+        B = B + kBCst;                                                                        \
+        __asm__ volatile("shll_s.w         %[" #R "],      %[" #R                             \
+                         "],        9          \n\t"                                          \
+                         "shll_s.w         %[" #G "],      %[" #G                             \
+                         "],        9          \n\t"                                          \
+                         "shll_s.w         %[" #B "],      %[" #B                             \
+                         "],        9          \n\t"                                          \
+                         "precrqu_s.qb.ph  %[" #R "],      %[" #R                             \
+                         "],        $zero      \n\t"                                          \
+                         "precrqu_s.qb.ph  %[" #G "],      %[" #G                             \
+                         "],        $zero      \n\t"                                          \
+                         "precrqu_s.qb.ph  %[" #B "],      %[" #B                             \
+                         "],        $zero      \n\t"                                          \
+                         "srl              %[" #R "],      %[" #R                             \
+                         "],        24         \n\t"                                          \
+                         "srl              %[" #G "],      %[" #G                             \
+                         "],        24         \n\t"                                          \
+                         "srl              %[" #B "],      %[" #B "],        24         \n\t" \
+                         : [R] "+r"(R), [G] "+r"(G), [B] "+r"(B)                              \
+                         :);                                                                  \
+    } while (0)
+
+static WEBP_INLINE void YuvToRgb(int y, int u, int v, uint8_t* const rgb) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  rgb[0] = r;
+  rgb[1] = g;
+  rgb[2] = b;
+}
+static WEBP_INLINE void YuvToBgr(int y, int u, int v, uint8_t* const bgr) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  bgr[0] = b;
+  bgr[1] = g;
+  bgr[2] = r;
+}
+static WEBP_INLINE void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  {
+    const int rg = (r & 0xf8) | (g >> 5);
+    const int gb = ((g << 3) & 0xe0) | (b >> 3);
+#ifdef WEBP_SWAP_16BIT_CSP
+    rgb[0] = gb;
+    rgb[1] = rg;
+#else
+    rgb[0] = rg;
+    rgb[1] = gb;
+#endif
+  }
+}
+static WEBP_INLINE void YuvToRgba4444(int y, int u, int v,
+                                      uint8_t* const argb) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  {
+    const int rg = (r & 0xf0) | (g >> 4);
+    const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
+#ifdef WEBP_SWAP_16BIT_CSP
+    argb[0] = ba;
+    argb[1] = rg;
+#else
+    argb[0] = rg;
+    argb[1] = ba;
+#endif
+   }
+}
+#endif // WEBP_YUV_USE_TABLE
+
+//-----------------------------------------------------------------------------
+// Alpha handling variants
+
+static WEBP_INLINE void YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
+                                  uint8_t* const argb) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  argb[0] = 0xff;
+  argb[1] = r;
+  argb[2] = g;
+  argb[3] = b;
+}
+static WEBP_INLINE void YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
+                                  uint8_t* const bgra) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  bgra[0] = b;
+  bgra[1] = g;
+  bgra[2] = r;
+  bgra[3] = 0xff;
+}
+static WEBP_INLINE void YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
+                                  uint8_t* const rgba) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  rgba[0] = r;
+  rgba[1] = g;
+  rgba[2] = b;
+  rgba[3] = 0xff;
+}
+
+//------------------------------------------------------------------------------
+// Fancy upsampler
+
+#ifdef FANCY_UPSAMPLING
+
+// Given samples laid out in a square as:
+//  [a b]
+//  [c d]
+// we interpolate u/v as:
+//  ([9*a + 3*b + 3*c +   d    3*a + 9*b + 3*c +   d] + [8 8]) / 16
+//  ([3*a +   b + 9*c + 3*d      a + 3*b + 3*c + 9*d]   [8 8]) / 16
+
+// We process u and v together stashed into 32bit (16bit each).
+#define LOAD_UV(u, v) ((u) | ((v) << 16))
+
+#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                                                        \
+    static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, const uint8_t* top_u, const uint8_t* top_v, \
+                          const uint8_t* cur_u, const uint8_t* cur_v, uint8_t* top_dst, uint8_t* bottom_dst,         \
+                          int len) {                                                                                 \
+        int x;                                                                                                       \
+        const int last_pixel_pair = (len - 1) >> 1;                                                                  \
+        uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */                                          \
+        uint32_t l_uv = LOAD_UV(cur_u[0], cur_v[0]);  /* left-sample */                                              \
+        assert(top_y != NULL);                                                                                       \
+        {                                                                                                            \
+            const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;                                              \
+            FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst);                                                        \
+        }                                                                                                            \
+        if (bottom_y != NULL) {                                                                                      \
+            const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;                                              \
+            FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst);                                                  \
+        }                                                                                                            \
+        for (x = 1; x <= last_pixel_pair; ++x) {                                                                     \
+            const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]); /* top sample */                                      \
+            const uint32_t uv = LOAD_UV(cur_u[x], cur_v[x]);   /* sample */                                            \
+            /* precompute invariant values associated with first and second diagonals*/                              \
+            const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u;                                             \
+            const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3;                                                 \
+            const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3;                                                  \
+            {                                                                                                        \
+                const uint32_t uv0 = (diag_12 + tl_uv) >> 1;                                                         \
+                const uint32_t uv1 = (diag_03 + t_uv) >> 1;                                                          \
+                FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), top_dst + (2 * x - 1) * XSTEP);                      \
+                FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16), top_dst + (2 * x - 0) * XSTEP);                      \
+            }                                                                                                        \
+            if (bottom_y != NULL) {                                                                                  \
+                const uint32_t uv0 = (diag_03 + l_uv) >> 1;                                                          \
+                const uint32_t uv1 = (diag_12 + uv) >> 1;                                                            \
+                FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), bottom_dst + (2 * x - 1) * XSTEP);                \
+                FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16), bottom_dst + (2 * x + 0) * XSTEP);                \
+            }                                                                                                        \
+            tl_uv = t_uv;                                                                                            \
+            l_uv = uv;                                                                                               \
+        }                                                                                                            \
+        if (!(len & 1)) {                                                                                            \
+            {                                                                                                        \
+                const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;                                          \
+                FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16), top_dst + (len - 1) * XSTEP);                          \
+            }                                                                                                        \
+            if (bottom_y != NULL) {                                                                                  \
+                const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;                                          \
+                FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16), bottom_dst + (len - 1) * XSTEP);                    \
+            }                                                                                                        \
+        }                                                                                                            \
+    }
+
+// All variants implemented.
+UPSAMPLE_FUNC(UpsampleRgbLinePair,      YuvToRgb,      3)
+UPSAMPLE_FUNC(UpsampleBgrLinePair,      YuvToBgr,      3)
+UPSAMPLE_FUNC(UpsampleRgbaLinePair,     YuvToRgba,     4)
+UPSAMPLE_FUNC(UpsampleBgraLinePair,     YuvToBgra,     4)
+UPSAMPLE_FUNC(UpsampleArgbLinePair,     YuvToArgb,     4)
+UPSAMPLE_FUNC(UpsampleRgba4444LinePair, YuvToRgba4444, 2)
+UPSAMPLE_FUNC(UpsampleRgb565LinePair,   YuvToRgb565,   2)
+
+#undef LOAD_UV
+#undef UPSAMPLE_FUNC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitUpsamplersMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersMIPSdspR2(void) {
+  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
+  WebPUpsamplers[MODE_BGRA]      = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
+  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
+  WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair;
+  WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair;
+  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+}
+
+#endif // FANCY_UPSAMPLING
+
+//------------------------------------------------------------------------------
+// YUV444 converter
+
+#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP)                                                              \
+    static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) { \
+        int i;                                                                                           \
+        for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]);                               \
+    }
+
+YUV444_FUNC(Yuv444ToRgb,      YuvToRgb,      3)
+YUV444_FUNC(Yuv444ToBgr,      YuvToBgr,      3)
+YUV444_FUNC(Yuv444ToRgba,     YuvToRgba,     4)
+YUV444_FUNC(Yuv444ToBgra,     YuvToBgra,     4)
+YUV444_FUNC(Yuv444ToArgb,     YuvToArgb,     4)
+YUV444_FUNC(Yuv444ToRgba4444, YuvToRgba4444, 2)
+YUV444_FUNC(Yuv444ToRgb565,   YuvToRgb565,   2)
+
+#undef YUV444_FUNC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitYUV444ConvertersMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersMIPSdspR2(void) {
+  WebPYUV444Converters[MODE_RGB]       = Yuv444ToRgb;
+  WebPYUV444Converters[MODE_RGBA]      = Yuv444ToRgba;
+  WebPYUV444Converters[MODE_BGR]       = Yuv444ToBgr;
+  WebPYUV444Converters[MODE_BGRA]      = Yuv444ToBgra;
+  WebPYUV444Converters[MODE_ARGB]      = Yuv444ToArgb;
+  WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444;
+  WebPYUV444Converters[MODE_RGB_565]   = Yuv444ToRgb565;
+  WebPYUV444Converters[MODE_rgbA]      = Yuv444ToRgba;
+  WebPYUV444Converters[MODE_bgrA]      = Yuv444ToBgra;
+  WebPYUV444Converters[MODE_Argb]      = Yuv444ToArgb;
+  WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
+
+#if 1 // !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_MIPS_DSP_R2))
+WEBP_DSP_INIT_STUB(WebPInitUpsamplersMIPSdspR2)
+#endif
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/upsampling_neon.c b/codec/L2/demos/webpEnc/host/src/dsp/upsampling_neon.c
new file mode 100644
index 0000000000..f3f52951d2
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/upsampling_neon.c
@@ -0,0 +1,307 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON version of YUV to RGB upsampling functions.
+//
+// Author: mans@mansr.com (Mans Rullgard)
+// Based on SSE code by: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+#include <arm_neon.h>
+#include <string.h>
+#include "./neon.h"
+#include "./yuv.h"
+
+#ifdef FANCY_UPSAMPLING
+
+//-----------------------------------------------------------------------------
+// U/V upsampling
+
+// Loads 9 pixels each from rows r1 and r2 and generates 16 pixels.
+#define UPSAMPLE_16PIXELS(r1, r2, out)                  \
+    {                                                   \
+        uint8x8_t a = vld1_u8(r1);                      \
+        uint8x8_t b = vld1_u8(r1 + 1);                  \
+        uint8x8_t c = vld1_u8(r2);                      \
+        uint8x8_t d = vld1_u8(r2 + 1);                  \
+                                                        \
+        uint16x8_t al = vshll_n_u8(a, 1);               \
+        uint16x8_t bl = vshll_n_u8(b, 1);               \
+        uint16x8_t cl = vshll_n_u8(c, 1);               \
+        uint16x8_t dl = vshll_n_u8(d, 1);               \
+                                                        \
+        uint8x8_t diag1, diag2;                         \
+        uint16x8_t sl;                                  \
+                                                        \
+        /* a + b + c + d */                             \
+        sl = vaddl_u8(a, b);                            \
+        sl = vaddw_u8(sl, c);                           \
+        sl = vaddw_u8(sl, d);                           \
+                                                        \
+        al = vaddq_u16(sl, al); /* 3a +  b +  c +  d */ \
+        bl = vaddq_u16(sl, bl); /*  a + 3b +  c +  d */ \
+                                                        \
+        al = vaddq_u16(al, dl); /* 3a +  b +  c + 3d */ \
+        bl = vaddq_u16(bl, cl); /*  a + 3b + 3c +  d */ \
+                                                        \
+        diag2 = vshrn_n_u16(al, 3);                     \
+        diag1 = vshrn_n_u16(bl, 3);                     \
+                                                        \
+        a = vrhadd_u8(a, diag1);                        \
+        b = vrhadd_u8(b, diag2);                        \
+        c = vrhadd_u8(c, diag2);                        \
+        d = vrhadd_u8(d, diag1);                        \
+                                                        \
+        {                                               \
+            uint8x8x2_t a_b, c_d;                       \
+            INIT_VECTOR2(a_b, a, b);                    \
+            INIT_VECTOR2(c_d, c, d);                    \
+            vst2_u8(out, a_b);                          \
+            vst2_u8(out + 32, c_d);                     \
+        }                                               \
+    }
+
+// Turn the macro into a function for reducing code-size when non-critical
+static void Upsample16Pixels(const uint8_t* r1, const uint8_t* r2, uint8_t* out) {
+    UPSAMPLE_16PIXELS(r1, r2, out);
+}
+
+#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out)                     \
+    {                                                                    \
+        uint8_t r1[9], r2[9];                                            \
+        memcpy(r1, (tb), (num_pixels));                                  \
+        memcpy(r2, (bb), (num_pixels));                                  \
+        /* replicate last byte */                                        \
+        memset(r1 + (num_pixels), r1[(num_pixels)-1], 9 - (num_pixels)); \
+        memset(r2 + (num_pixels), r2[(num_pixels)-1], 9 - (num_pixels)); \
+        Upsample16Pixels(r1, r2, out);                                   \
+    }
+
+//-----------------------------------------------------------------------------
+// YUV->RGB conversion
+
+// note: we represent the 33050 large constant as 32768 + 282
+static const int16_t kCoeffs1[4] = {19077, 26149, 6419, 13320};
+
+#define v255 vdup_n_u8(255)
+#define v_0x0f vdup_n_u8(15)
+
+#define STORE_Rgb(out, r, g, b)       \
+    do {                              \
+        uint8x8x3_t r_g_b;            \
+        INIT_VECTOR3(r_g_b, r, g, b); \
+        vst3_u8(out, r_g_b);          \
+    } while (0)
+
+#define STORE_Bgr(out, r, g, b)       \
+    do {                              \
+        uint8x8x3_t b_g_r;            \
+        INIT_VECTOR3(b_g_r, b, g, r); \
+        vst3_u8(out, b_g_r);          \
+    } while (0)
+
+#define STORE_Rgba(out, r, g, b)                 \
+    do {                                         \
+        uint8x8x4_t r_g_b_v255;                  \
+        INIT_VECTOR4(r_g_b_v255, r, g, b, v255); \
+        vst4_u8(out, r_g_b_v255);                \
+    } while (0)
+
+#define STORE_Bgra(out, r, g, b)                 \
+    do {                                         \
+        uint8x8x4_t b_g_r_v255;                  \
+        INIT_VECTOR4(b_g_r_v255, b, g, r, v255); \
+        vst4_u8(out, b_g_r_v255);                \
+    } while (0)
+
+#define STORE_Argb(out, r, g, b)                 \
+    do {                                         \
+        uint8x8x4_t v255_r_g_b;                  \
+        INIT_VECTOR4(v255_r_g_b, v255, r, g, b); \
+        vst4_u8(out, v255_r_g_b);                \
+    } while (0)
+
+#if !defined(WEBP_SWAP_16BIT_CSP)
+#define ZIP_U8(lo, hi) vzip_u8((lo), (hi))
+#else
+#define ZIP_U8(lo, hi) vzip_u8((hi), (lo))
+#endif
+
+#define STORE_Rgba4444(out, r, g, b)                                    \
+    do {                                                                \
+        const uint8x8_t r1 = vshl_n_u8(vshr_n_u8(r, 4), 4); /* 4bits */ \
+        const uint8x8_t g1 = vshr_n_u8(g, 4);                           \
+        const uint8x8_t ba = vorr_u8(b, v_0x0f);                        \
+        const uint8x8_t rg = vorr_u8(r1, g1);                           \
+        const uint8x8x2_t rgba4444 = ZIP_U8(rg, ba);                    \
+        vst1q_u8(out, vcombine_u8(rgba4444.val[0], rgba4444.val[1]));   \
+    } while (0)
+
+#define STORE_Rgb565(out, r, g, b)                                            \
+    do {                                                                      \
+        const uint8x8_t r1 = vshl_n_u8(vshr_n_u8(r, 3), 3); /* 5bits */       \
+        const uint8x8_t g1 = vshr_n_u8(g, 5);               /* upper 3bits */ \
+        const uint8x8_t g2 = vshl_n_u8(vshr_n_u8(g, 2), 5); /* lower 3bits */ \
+        const uint8x8_t b1 = vshr_n_u8(b, 3);               /* 5bits */       \
+        const uint8x8_t rg = vorr_u8(r1, g1);                                 \
+        const uint8x8_t gb = vorr_u8(g2, b1);                                 \
+        const uint8x8x2_t rgb565 = ZIP_U8(rg, gb);                            \
+        vst1q_u8(out, vcombine_u8(rgb565.val[0], rgb565.val[1]));             \
+    } while (0)
+
+#define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x)                \
+    do {                                                                  \
+        int i;                                                            \
+        for (i = 0; i < N; i += 8) {                                      \
+            const int off = ((cur_x) + i) * XSTEP;                        \
+            const uint8x8_t y = vld1_u8((src_y) + (cur_x) + i);           \
+            const uint8x8_t u = vld1_u8((src_uv) + i + 0);                \
+            const uint8x8_t v = vld1_u8((src_uv) + i + 16);               \
+            const int16x8_t Y0 = vreinterpretq_s16_u16(vshll_n_u8(y, 7)); \
+            const int16x8_t U0 = vreinterpretq_s16_u16(vshll_n_u8(u, 7)); \
+            const int16x8_t V0 = vreinterpretq_s16_u16(vshll_n_u8(v, 7)); \
+            const int16x8_t Y1 = vqdmulhq_lane_s16(Y0, coeff1, 0);        \
+            const int16x8_t R0 = vqdmulhq_lane_s16(V0, coeff1, 1);        \
+            const int16x8_t G0 = vqdmulhq_lane_s16(U0, coeff1, 2);        \
+            const int16x8_t G1 = vqdmulhq_lane_s16(V0, coeff1, 3);        \
+            const int16x8_t B0 = vqdmulhq_n_s16(U0, 282);                 \
+            const int16x8_t R1 = vqaddq_s16(Y1, R_Rounder);               \
+            const int16x8_t G2 = vqaddq_s16(Y1, G_Rounder);               \
+            const int16x8_t B1 = vqaddq_s16(Y1, B_Rounder);               \
+            const int16x8_t R2 = vqaddq_s16(R0, R1);                      \
+            const int16x8_t G3 = vqaddq_s16(G0, G1);                      \
+            const int16x8_t B2 = vqaddq_s16(B0, B1);                      \
+            const int16x8_t G4 = vqsubq_s16(G2, G3);                      \
+            const int16x8_t B3 = vqaddq_s16(B2, U0);                      \
+            const uint8x8_t R = vqshrun_n_s16(R2, YUV_FIX2);              \
+            const uint8x8_t G = vqshrun_n_s16(G4, YUV_FIX2);              \
+            const uint8x8_t B = vqshrun_n_s16(B3, YUV_FIX2);              \
+            STORE_##FMT(out + off, R, G, B);                              \
+        }                                                                 \
+    } while (0)
+
+#define CONVERT1(FUNC, XSTEP, N, src_y, src_uv, rgb, cur_x) \
+    {                                                       \
+        int i;                                              \
+        for (i = 0; i < N; i++) {                           \
+            const int off = ((cur_x) + i) * XSTEP;          \
+            const int y = src_y[(cur_x) + i];               \
+            const int u = (src_uv)[i];                      \
+            const int v = (src_uv)[i + 16];                 \
+            FUNC(y, u, v, rgb + off);                       \
+        }                                                   \
+    }
+
+#define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv, top_dst, bottom_dst, cur_x, len) \
+    {                                                                                   \
+        CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x);                           \
+        if (bottom_y != NULL) {                                                         \
+            CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x);          \
+        }                                                                               \
+    }
+
+#define CONVERT2RGB_1(FUNC, XSTEP, top_y, bottom_y, uv, top_dst, bottom_dst, cur_x, len) \
+    {                                                                                    \
+        CONVERT1(FUNC, XSTEP, len, top_y, uv, top_dst, cur_x);                           \
+        if (bottom_y != NULL) {                                                          \
+            CONVERT1(FUNC, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x);          \
+        }                                                                                \
+    }
+
+#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP)                                                                    \
+    static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, const uint8_t* top_u, const uint8_t* top_v, \
+                          const uint8_t* cur_u, const uint8_t* cur_v, uint8_t* top_dst, uint8_t* bottom_dst,         \
+                          int len) {                                                                                 \
+        int block;                                                                                                   \
+        /* 16 byte aligned array to cache reconstructed u and v */                                                   \
+        uint8_t uv_buf[2 * 32 + 15];                                                                                 \
+        uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);                                            \
+        const int uv_len = (len + 1) >> 1;                                                                           \
+        /* 9 pixels must be read-able for each block */                                                              \
+        const int num_blocks = (uv_len - 1) >> 3;                                                                    \
+        const int leftover = uv_len - num_blocks * 8;                                                                \
+        const int last_pos = 1 + 16 * num_blocks;                                                                    \
+                                                                                                                     \
+        const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                                                         \
+        const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                                                         \
+                                                                                                                     \
+        const int16x4_t coeff1 = vld1_s16(kCoeffs1);                                                                 \
+        const int16x8_t R_Rounder = vdupq_n_s16(-14234);                                                             \
+        const int16x8_t G_Rounder = vdupq_n_s16(8708);                                                               \
+        const int16x8_t B_Rounder = vdupq_n_s16(-17685);                                                             \
+                                                                                                                     \
+        /* Treat the first pixel in regular way */                                                                   \
+        assert(top_y != NULL);                                                                                       \
+        {                                                                                                            \
+            const int u0 = (top_u[0] + u_diag) >> 1;                                                                 \
+            const int v0 = (top_v[0] + v_diag) >> 1;                                                                 \
+            VP8YuvTo##FMT(top_y[0], u0, v0, top_dst);                                                                \
+        }                                                                                                            \
+        if (bottom_y != NULL) {                                                                                      \
+            const int u0 = (cur_u[0] + u_diag) >> 1;                                                                 \
+            const int v0 = (cur_v[0] + v_diag) >> 1;                                                                 \
+            VP8YuvTo##FMT(bottom_y[0], u0, v0, bottom_dst);                                                          \
+        }                                                                                                            \
+                                                                                                                     \
+        for (block = 0; block < num_blocks; ++block) {                                                               \
+            UPSAMPLE_16PIXELS(top_u, cur_u, r_uv);                                                                   \
+            UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16);                                                              \
+            CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst, 16 * block + 1, 16);               \
+            top_u += 8;                                                                                              \
+            cur_u += 8;                                                                                              \
+            top_v += 8;                                                                                              \
+            cur_v += 8;                                                                                              \
+        }                                                                                                            \
+                                                                                                                     \
+        UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv);                                                           \
+        UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16);                                                      \
+        CONVERT2RGB_1(VP8YuvTo##FMT, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst, last_pos, len - last_pos);   \
+    }
+
+// NEON variants of the fancy upsampler.
+NEON_UPSAMPLE_FUNC(UpsampleRgbLinePair, Rgb, 3)
+NEON_UPSAMPLE_FUNC(UpsampleBgrLinePair, Bgr, 3)
+NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePair, Rgba, 4)
+NEON_UPSAMPLE_FUNC(UpsampleBgraLinePair, Bgra, 4)
+NEON_UPSAMPLE_FUNC(UpsampleArgbLinePair, Argb, 4)
+NEON_UPSAMPLE_FUNC(UpsampleRgba4444LinePair, Rgba4444, 2)
+NEON_UPSAMPLE_FUNC(UpsampleRgb565LinePair, Rgb565, 2)
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+extern void WebPInitUpsamplersNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersNEON(void) {
+    WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
+    WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
+    WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair;
+    WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
+    WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
+    WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
+    WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
+    WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
+    WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
+    WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
+    WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+}
+
+#endif // FANCY_UPSAMPLING
+
+#endif // WEBP_USE_NEON
+
+#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_NEON))
+WEBP_DSP_INIT_STUB(WebPInitUpsamplersNEON)
+#endif
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/upsampling_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/upsampling_sse2.c
new file mode 100644
index 0000000000..c8b4a60487
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/upsampling_sse2.c
@@ -0,0 +1,245 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of YUV to RGB upsampling functions.
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+#include <string.h>
+#include "./yuv.h"
+
+#ifdef FANCY_UPSAMPLING
+
+// We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
+// u = (9*a + 3*b + 3*c + d + 8) / 16
+//   = (a + (a + 3*b + 3*c + d) / 8 + 1) / 2
+//   = (a + m + 1) / 2
+// where m = (a + 3*b + 3*c + d) / 8
+//         = ((a + b + c + d) / 2 + b + c) / 4
+//
+// Let's say  k = (a + b + c + d) / 4.
+// We can compute k as
+// k = (s + t + 1) / 2 - ((a^d) | (b^c) | (s^t)) & 1
+// where s = (a + d + 1) / 2 and t = (b + c + 1) / 2
+//
+// Then m can be written as
+// m = (k + t + 1) / 2 - (((b^c) & (s^t)) | (k^t)) & 1
+
+// Computes out = (k + in + 1) / 2 - ((ij & (s^t)) | (k^in)) & 1
+#define GET_M(ij, in, out)                                                                     \
+    do {                                                                                       \
+        const __m128i tmp0 = _mm_avg_epu8(k, (in));    /* (k + in + 1) / 2 */                  \
+        const __m128i tmp1 = _mm_and_si128((ij), st);  /* (ij) & (s^t) */                      \
+        const __m128i tmp2 = _mm_xor_si128(k, (in));   /* (k^in) */                            \
+        const __m128i tmp3 = _mm_or_si128(tmp1, tmp2); /* ((ij) & (s^t)) | (k^in) */           \
+        const __m128i tmp4 = _mm_and_si128(tmp3, one); /* & 1 -> lsb_correction */             \
+        (out) = _mm_sub_epi8(tmp0, tmp4);              /* (k + in + 1) / 2 - lsb_correction */ \
+    } while (0)
+
+// pack and store two alternating pixel rows
+#define PACK_AND_STORE(a, b, da, db, out)                                           \
+    do {                                                                            \
+        const __m128i t_a = _mm_avg_epu8(a, da); /* (9a + 3b + 3c +  d + 8) / 16 */ \
+        const __m128i t_b = _mm_avg_epu8(b, db); /* (3a + 9b +  c + 3d + 8) / 16 */ \
+        const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b);                            \
+        const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b);                            \
+        _mm_store_si128(((__m128i*)(out)) + 0, t_1);                                \
+        _mm_store_si128(((__m128i*)(out)) + 1, t_2);                                \
+    } while (0)
+
+// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
+#define UPSAMPLE_32PIXELS(r1, r2, out)                                             \
+    {                                                                              \
+        const __m128i one = _mm_set1_epi8(1);                                      \
+        const __m128i a = _mm_loadu_si128((const __m128i*)&(r1)[0]);               \
+        const __m128i b = _mm_loadu_si128((const __m128i*)&(r1)[1]);               \
+        const __m128i c = _mm_loadu_si128((const __m128i*)&(r2)[0]);               \
+        const __m128i d = _mm_loadu_si128((const __m128i*)&(r2)[1]);               \
+                                                                                   \
+        const __m128i s = _mm_avg_epu8(a, d);   /* s = (a + d + 1) / 2 */          \
+        const __m128i t = _mm_avg_epu8(b, c);   /* t = (b + c + 1) / 2 */          \
+        const __m128i st = _mm_xor_si128(s, t); /* st = s^t */                     \
+                                                                                   \
+        const __m128i ad = _mm_xor_si128(a, d); /* ad = a^d */                     \
+        const __m128i bc = _mm_xor_si128(b, c); /* bc = b^c */                     \
+                                                                                   \
+        const __m128i t1 = _mm_or_si128(ad, bc);   /* (a^d) | (b^c) */             \
+        const __m128i t2 = _mm_or_si128(t1, st);   /* (a^d) | (b^c) | (s^t) */     \
+        const __m128i t3 = _mm_and_si128(t2, one); /* (a^d) | (b^c) | (s^t) & 1 */ \
+        const __m128i t4 = _mm_avg_epu8(s, t);                                     \
+        const __m128i k = _mm_sub_epi8(t4, t3); /* k = (a + b + c + d) / 4 */      \
+        __m128i diag1, diag2;                                                      \
+                                                                                   \
+        GET_M(bc, t, diag1); /* diag1 = (a + 3b + 3c + d) / 8 */                   \
+        GET_M(ad, s, diag2); /* diag2 = (3a + b + c + 3d) / 8 */                   \
+                                                                                   \
+        /* pack the alternate pixels */                                            \
+        PACK_AND_STORE(a, b, diag1, diag2, out + 0);      /* store top */          \
+        PACK_AND_STORE(c, d, diag2, diag1, out + 2 * 32); /* store bottom */       \
+    }
+
+// Turn the macro into a function for reducing code-size when non-critical
+static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[], uint8_t* const out) {
+    UPSAMPLE_32PIXELS(r1, r2, out);
+}
+
+#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out)                             \
+    {                                                                            \
+        uint8_t r1[17], r2[17];                                                  \
+        memcpy(r1, (tb), (num_pixels));                                          \
+        memcpy(r2, (bb), (num_pixels));                                          \
+        /* replicate last byte */                                                \
+        memset(r1 + (num_pixels), r1[(num_pixels)-1], 17 - (num_pixels));        \
+        memset(r2 + (num_pixels), r2[(num_pixels)-1], 17 - (num_pixels));        \
+        /* using the shared function instead of the macro saves ~3k code size */ \
+        Upsample32Pixels(r1, r2, out);                                           \
+    }
+
+#define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, cur_x, num_pixels)                  \
+    {                                                                                                      \
+        int n;                                                                                             \
+        for (n = 0; n < (num_pixels); ++n) {                                                               \
+            FUNC(top_y[(cur_x) + n], r_u[n], r_v[n], top_dst + ((cur_x) + n) * XSTEP);                     \
+        }                                                                                                  \
+        if (bottom_y != NULL) {                                                                            \
+            for (n = 0; n < (num_pixels); ++n) {                                                           \
+                FUNC(bottom_y[(cur_x) + n], r_u[64 + n], r_v[64 + n], bottom_dst + ((cur_x) + n) * XSTEP); \
+            }                                                                                              \
+        }                                                                                                  \
+    }
+
+#define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, cur_x)          \
+    do {                                                                                  \
+        FUNC##32(top_y + (cur_x), r_u, r_v, top_dst + (cur_x)*XSTEP);                     \
+        if (bottom_y != NULL) {                                                           \
+            FUNC##32(bottom_y + (cur_x), r_u + 64, r_v + 64, bottom_dst + (cur_x)*XSTEP); \
+        }                                                                                 \
+    } while (0)
+
+#define SSE2_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                                                   \
+    static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, const uint8_t* top_u, const uint8_t* top_v, \
+                          const uint8_t* cur_u, const uint8_t* cur_v, uint8_t* top_dst, uint8_t* bottom_dst,         \
+                          int len) {                                                                                 \
+        int uv_pos, pos;                                                                                             \
+        /* 16byte-aligned array to cache reconstructed u and v */                                                    \
+        uint8_t uv_buf[4 * 32 + 15];                                                                                 \
+        uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);                                             \
+        uint8_t* const r_v = r_u + 32;                                                                               \
+                                                                                                                     \
+        assert(top_y != NULL);                                                                                       \
+        { /* Treat the first pixel in regular way */                                                                 \
+            const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                                                     \
+            const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                                                     \
+            const int u0_t = (top_u[0] + u_diag) >> 1;                                                               \
+            const int v0_t = (top_v[0] + v_diag) >> 1;                                                               \
+            FUNC(top_y[0], u0_t, v0_t, top_dst);                                                                     \
+            if (bottom_y != NULL) {                                                                                  \
+                const int u0_b = (cur_u[0] + u_diag) >> 1;                                                           \
+                const int v0_b = (cur_v[0] + v_diag) >> 1;                                                           \
+                FUNC(bottom_y[0], u0_b, v0_b, bottom_dst);                                                           \
+            }                                                                                                        \
+        }                                                                                                            \
+        /* For UPSAMPLE_32PIXELS, 17 u/v values must be read-able for each block */                                  \
+        for (pos = 1, uv_pos = 0; pos + 32 + 1 <= len; pos += 32, uv_pos += 16) {                                    \
+            UPSAMPLE_32PIXELS(top_u + uv_pos, cur_u + uv_pos, r_u);                                                  \
+            UPSAMPLE_32PIXELS(top_v + uv_pos, cur_v + uv_pos, r_v);                                                  \
+            CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, pos);                                  \
+        }                                                                                                            \
+        if (len > 1) {                                                                                               \
+            const int left_over = ((len + 1) >> 1) - (pos >> 1);                                                     \
+            assert(left_over > 0);                                                                                   \
+            UPSAMPLE_LAST_BLOCK(top_u + uv_pos, cur_u + uv_pos, left_over, r_u);                                     \
+            UPSAMPLE_LAST_BLOCK(top_v + uv_pos, cur_v + uv_pos, left_over, r_v);                                     \
+            CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, pos, len - pos);                          \
+        }                                                                                                            \
+    }
+
+// SSE2 variants of the fancy upsampler.
+SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePair, VP8YuvToRgb, 3)
+SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePair, VP8YuvToBgr, 3)
+SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
+SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
+SSE2_UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
+SSE2_UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
+SSE2_UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2)
+
+#undef GET_M
+#undef PACK_AND_STORE
+#undef UPSAMPLE_32PIXELS
+#undef UPSAMPLE_LAST_BLOCK
+#undef CONVERT2RGB
+#undef CONVERT2RGB_32
+#undef SSE2_UPSAMPLE_FUNC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+extern void WebPInitUpsamplersSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) {
+    WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
+    WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
+    WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair;
+    WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
+    WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
+    WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
+    WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
+    WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
+    WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
+    WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
+    WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+}
+
+#endif // FANCY_UPSAMPLING
+
+//------------------------------------------------------------------------------
+
+extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+extern void WebPInitYUV444ConvertersSSE2(void);
+
+#define YUV444_FUNC(FUNC_NAME, CALL, XSTEP)                                                                      \
+    extern void WebP##FUNC_NAME##C(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len); \
+    static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) {         \
+        int i;                                                                                                   \
+        const int max_len = len & ~31;                                                                           \
+        for (i = 0; i < max_len; i += 32) CALL(y + i, u + i, v + i, dst + i * XSTEP);                            \
+        if (i < len) { /* C-fallback */                                                                          \
+            WebP##FUNC_NAME##C(y + i, u + i, v + i, dst + i * XSTEP, len - i);                                   \
+        }                                                                                                        \
+    }
+
+YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba32, 4);
+YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra32, 4);
+YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb32, 3);
+YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr32, 3);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE2(void) {
+    WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba;
+    WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra;
+    WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb;
+    WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr;
+}
+
+#else
+
+WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersSSE2)
+
+#endif // WEBP_USE_SSE2
+
+#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE2))
+WEBP_DSP_INIT_STUB(WebPInitUpsamplersSSE2)
+#endif
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/yuv.c b/codec/L2/demos/webpEnc/host/src/dsp/yuv.c
new file mode 100644
index 0000000000..2c63bae599
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/yuv.c
@@ -0,0 +1,277 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV->RGB conversion functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./yuv.h"
+
+#if defined(WEBP_YUV_USE_TABLE)
+
+static int done = 0;
+
+static WEBP_INLINE uint8_t clip(int v, int max_value) {
+    return v < 0 ? 0 : v > max_value ? max_value : v;
+}
+
+int16_t VP8kVToR[256], VP8kUToB[256];
+int32_t VP8kVToG[256], VP8kUToG[256];
+uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
+uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {
+    int i;
+    if (done) {
+        return;
+    }
+#ifndef USE_YUVj
+    for (i = 0; i < 256; ++i) {
+        VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
+        VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
+        VP8kVToG[i] = -45773 * (i - 128);
+        VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX;
+    }
+    for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
+        const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX;
+        VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
+        VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
+    }
+#else
+    for (i = 0; i < 256; ++i) {
+        VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
+        VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
+        VP8kVToG[i] = -46802 * (i - 128);
+        VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
+    }
+    for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
+        const int k = i;
+        VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
+        VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
+    }
+#endif
+
+    done = 1;
+}
+
+#else
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {}
+
+#endif // WEBP_YUV_USE_TABLE
+
+//-----------------------------------------------------------------------------
+// Plain-C version
+
+#define ROW_FUNC(FUNC_NAME, FUNC, XSTEP)                                                                 \
+    static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) { \
+        const uint8_t* const end = dst + (len & ~1) * XSTEP;                                             \
+        while (dst != end) {                                                                             \
+            FUNC(y[0], u[0], v[0], dst);                                                                 \
+            FUNC(y[1], u[0], v[0], dst + XSTEP);                                                         \
+            y += 2;                                                                                      \
+            ++u;                                                                                         \
+            ++v;                                                                                         \
+            dst += 2 * XSTEP;                                                                            \
+        }                                                                                                \
+        if (len & 1) {                                                                                   \
+            FUNC(y[0], u[0], v[0], dst);                                                                 \
+        }                                                                                                \
+    }
+
+// All variants implemented.
+ROW_FUNC(YuvToRgbRow, VP8YuvToRgb, 3)
+ROW_FUNC(YuvToBgrRow, VP8YuvToBgr, 3)
+ROW_FUNC(YuvToRgbaRow, VP8YuvToRgba, 4)
+ROW_FUNC(YuvToBgraRow, VP8YuvToBgra, 4)
+ROW_FUNC(YuvToArgbRow, VP8YuvToArgb, 4)
+ROW_FUNC(YuvToRgba4444Row, VP8YuvToRgba4444, 2)
+ROW_FUNC(YuvToRgb565Row, VP8YuvToRgb565, 2)
+
+#undef ROW_FUNC
+
+// Main call for processing a plane with a WebPSamplerRowFunc function:
+void WebPSamplerProcessPlane(const uint8_t* y,
+                             int y_stride,
+                             const uint8_t* u,
+                             const uint8_t* v,
+                             int uv_stride,
+                             uint8_t* dst,
+                             int dst_stride,
+                             int width,
+                             int height,
+                             WebPSamplerRowFunc func) {
+    int j;
+    for (j = 0; j < height; ++j) {
+        func(y, u, v, dst, width);
+        y += y_stride;
+        if (j & 1) {
+            u += uv_stride;
+            v += uv_stride;
+        }
+        dst += dst_stride;
+    }
+}
+
+//-----------------------------------------------------------------------------
+// Main call
+
+WebPSamplerRowFunc WebPSamplers[MODE_LAST];
+
+extern void WebPInitSamplersSSE2(void);
+extern void WebPInitSamplersMIPS32(void);
+extern void WebPInitSamplersMIPSdspR2(void);
+
+static volatile VP8CPUInfo yuv_last_cpuinfo_used = (VP8CPUInfo)&yuv_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
+    if (yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    WebPSamplers[MODE_RGB] = YuvToRgbRow;
+    WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
+    WebPSamplers[MODE_BGR] = YuvToBgrRow;
+    WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+    WebPSamplers[MODE_ARGB] = YuvToArgbRow;
+    WebPSamplers[MODE_RGBA_4444] = YuvToRgba4444Row;
+    WebPSamplers[MODE_RGB_565] = YuvToRgb565Row;
+    WebPSamplers[MODE_rgbA] = YuvToRgbaRow;
+    WebPSamplers[MODE_bgrA] = YuvToBgraRow;
+    WebPSamplers[MODE_Argb] = YuvToArgbRow;
+    WebPSamplers[MODE_rgbA_4444] = YuvToRgba4444Row;
+
+    // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            WebPInitSamplersSSE2();
+        }
+#endif // WEBP_USE_SSE2
+#if defined(WEBP_USE_MIPS32)
+        if (VP8GetCPUInfo(kMIPS32)) {
+            WebPInitSamplersMIPS32();
+        }
+#endif // WEBP_USE_MIPS32
+#if defined(WEBP_USE_MIPS_DSP_R2)
+        if (VP8GetCPUInfo(kMIPSdspR2)) {
+            WebPInitSamplersMIPSdspR2();
+        }
+#endif // WEBP_USE_MIPS_DSP_R2
+    }
+    yuv_last_cpuinfo_used = VP8GetCPUInfo;
+}
+
+//-----------------------------------------------------------------------------
+// ARGB -> YUV converters
+
+static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
+    int i;
+    for (i = 0; i < width; ++i) {
+        const uint32_t p = argb[i];
+        y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff, YUV_HALF);
+    }
+}
+
+void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v, int src_width, int do_store) {
+    // No rounding. Last pixel is dealt with separately.
+    const int uv_width = src_width >> 1;
+    int i;
+    for (i = 0; i < uv_width; ++i) {
+        const uint32_t v0 = argb[2 * i + 0];
+        const uint32_t v1 = argb[2 * i + 1];
+        // VP8RGBToU/V expects four accumulated pixels. Hence we need to
+        // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less.
+        const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe);
+        const int g = ((v0 >> 7) & 0x1fe) + ((v1 >> 7) & 0x1fe);
+        const int b = ((v0 << 1) & 0x1fe) + ((v1 << 1) & 0x1fe);
+        const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2);
+        const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2);
+        if (do_store) {
+            u[i] = tmp_u;
+            v[i] = tmp_v;
+        } else {
+            // Approximated average-of-four. But it's an acceptable diff.
+            u[i] = (u[i] + tmp_u + 1) >> 1;
+            v[i] = (v[i] + tmp_v + 1) >> 1;
+        }
+    }
+    if (src_width & 1) { // last pixel
+        const uint32_t v0 = argb[2 * i + 0];
+        const int r = (v0 >> 14) & 0x3fc;
+        const int g = (v0 >> 6) & 0x3fc;
+        const int b = (v0 << 2) & 0x3fc;
+        const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2);
+        const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2);
+        if (do_store) {
+            u[i] = tmp_u;
+            v[i] = tmp_v;
+        } else {
+            u[i] = (u[i] + tmp_u + 1) >> 1;
+            v[i] = (v[i] + tmp_v + 1) >> 1;
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
+    int i;
+    for (i = 0; i < width; ++i, rgb += 3) {
+        y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
+    }
+}
+
+static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
+    int i;
+    for (i = 0; i < width; ++i, bgr += 3) {
+        y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
+    }
+}
+
+void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, uint8_t* u, uint8_t* v, int width) {
+    int i;
+    for (i = 0; i < width; i += 1, rgb += 4) {
+        const int r = rgb[0], g = rgb[1], b = rgb[2];
+        u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2);
+        v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2);
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
+void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
+void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb, uint8_t* u, uint8_t* v, int width);
+
+void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
+void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v, int src_width, int do_store);
+
+static volatile VP8CPUInfo rgba_to_yuv_last_cpuinfo_used = (VP8CPUInfo)&rgba_to_yuv_last_cpuinfo_used;
+
+extern void WebPInitConvertARGBToYUVSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
+    if (rgba_to_yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+    WebPConvertARGBToY = ConvertARGBToY;
+    WebPConvertARGBToUV = WebPConvertARGBToUV_C;
+
+    WebPConvertRGB24ToY = ConvertRGB24ToY;
+    WebPConvertBGR24ToY = ConvertBGR24ToY;
+
+    WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
+
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+        if (VP8GetCPUInfo(kSSE2)) {
+            WebPInitConvertARGBToYUVSSE2();
+        }
+#endif // WEBP_USE_SSE2
+    }
+    rgba_to_yuv_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/yuv.h b/codec/L2/demos/webpEnc/host/src/dsp/yuv.h
new file mode 100644
index 0000000000..9f13880d34
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/yuv.h
@@ -0,0 +1,224 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// inline YUV<->RGB conversion function
+//
+// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
+// More information at: http://en.wikipedia.org/wiki/YCbCr
+// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
+// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
+// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
+// We use 16bit fixed point operations for RGB->YUV conversion (YUV_FIX).
+//
+// For the Y'CbCr to RGB conversion, the BT.601 specification reads:
+//   R = 1.164 * (Y-16) + 1.596 * (V-128)
+//   G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128)
+//   B = 1.164 * (Y-16)                   + 2.018 * (U-128)
+// where Y is in the [16,235] range, and U/V in the [16,240] range.
+//
+// The fixed-point implementation used here is:
+//  R = (19077 . y             + 26149 . v - 14234) >> 6
+//  G = (19077 . y -  6419 . u - 13320 . v +  8708) >> 6
+//  B = (19077 . y + 33050 . u             - 17685) >> 6
+// where the '.' operator is the mulhi_epu16 variant:
+//   a . b = ((a << 8) * b) >> 16
+// that preserves 8 bits of fractional precision before final descaling.
+
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_YUV_H_
+#define WEBP_DSP_YUV_H_
+
+#include "./dsp.h"
+#include "../dec/decode_vp8.h"
+
+#if defined(WEBP_EXPERIMENTAL_FEATURES)
+// Do NOT activate this feature for real compression. This is only experimental!
+// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace.
+// This colorspace is close to Rec.601's Y'CbCr model with the notable
+// difference of allowing larger range for luma/chroma.
+// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its
+// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
+// #define USE_YUVj
+#endif
+
+//------------------------------------------------------------------------------
+// YUV -> RGB conversion
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+    YUV_FIX = 16, // fixed-point precision for RGB->YUV
+    YUV_HALF = 1 << (YUV_FIX - 1),
+    YUV_MASK = (256 << YUV_FIX) - 1,
+    YUV_RANGE_MIN = -227,      // min value of r/g/b output
+    YUV_RANGE_MAX = 256 + 226, // max value of r/g/b output
+
+    YUV_FIX2 = 6, // fixed-point precision for YUV->RGB
+    YUV_HALF2 = 1 << YUV_FIX2 >> 1,
+    YUV_MASK2 = (256 << YUV_FIX2) - 1
+};
+
+//------------------------------------------------------------------------------
+// slower on x86 by ~7-8%, but bit-exact with the SSE2/NEON version
+
+static WEBP_INLINE int MultHi(int v, int coeff) { // _mm_mulhi_epu16 emulation
+    return (v * coeff) >> 8;
+}
+
+static WEBP_INLINE int VP8Clip8(int v) {
+    return ((v & ~YUV_MASK2) == 0) ? (v >> YUV_FIX2) : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int VP8YUVToR(int y, int v) {
+    return VP8Clip8(MultHi(y, 19077) + MultHi(v, 26149) - 14234);
+}
+
+static WEBP_INLINE int VP8YUVToG(int y, int u, int v) {
+    return VP8Clip8(MultHi(y, 19077) - MultHi(u, 6419) - MultHi(v, 13320) + 8708);
+}
+
+static WEBP_INLINE int VP8YUVToB(int y, int u) {
+    return VP8Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685);
+}
+
+static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v, uint8_t* const rgb) {
+    rgb[0] = VP8YUVToR(y, v);
+    rgb[1] = VP8YUVToG(y, u, v);
+    rgb[2] = VP8YUVToB(y, u);
+}
+
+static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v, uint8_t* const bgr) {
+    bgr[0] = VP8YUVToB(y, u);
+    bgr[1] = VP8YUVToG(y, u, v);
+    bgr[2] = VP8YUVToR(y, v);
+}
+
+static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
+    const int r = VP8YUVToR(y, v);    // 5 usable bits
+    const int g = VP8YUVToG(y, u, v); // 6 usable bits
+    const int b = VP8YUVToB(y, u);    // 5 usable bits
+    const int rg = (r & 0xf8) | (g >> 5);
+    const int gb = ((g << 3) & 0xe0) | (b >> 3);
+#ifdef WEBP_SWAP_16BIT_CSP
+    rgb[0] = gb;
+    rgb[1] = rg;
+#else
+    rgb[0] = rg;
+    rgb[1] = gb;
+#endif
+}
+
+static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v, uint8_t* const argb) {
+    const int r = VP8YUVToR(y, v);    // 4 usable bits
+    const int g = VP8YUVToG(y, u, v); // 4 usable bits
+    const int b = VP8YUVToB(y, u);    // 4 usable bits
+    const int rg = (r & 0xf0) | (g >> 4);
+    const int ba = (b & 0xf0) | 0x0f; // overwrite the lower 4 bits
+#ifdef WEBP_SWAP_16BIT_CSP
+    argb[0] = ba;
+    argb[1] = rg;
+#else
+    argb[0] = rg;
+    argb[1] = ba;
+#endif
+}
+
+//-----------------------------------------------------------------------------
+// Alpha handling variants
+
+static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v, uint8_t* const argb) {
+    argb[0] = 0xff;
+    VP8YuvToRgb(y, u, v, argb + 1);
+}
+
+static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v, uint8_t* const bgra) {
+    VP8YuvToBgr(y, u, v, bgra);
+    bgra[3] = 0xff;
+}
+
+static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v, uint8_t* const rgba) {
+    VP8YuvToRgb(y, u, v, rgba);
+    rgba[3] = 0xff;
+}
+
+// Must be called before everything, to initialize the tables.
+void VP8YUVInit(void);
+
+//-----------------------------------------------------------------------------
+// SSE2 extra functions (mostly for upsampling_sse2.c)
+
+#if defined(WEBP_USE_SSE2)
+
+// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
+void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst);
+void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst);
+void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst);
+void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst);
+void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst);
+void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst);
+void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst);
+
+#endif // WEBP_USE_SSE2
+
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
+
+// Stub functions that can be called with various rounding values:
+static WEBP_INLINE int VP8ClipUV(int uv, int rounding) {
+    uv = (uv + rounding + (128 << (YUV_FIX + 2))) >> (YUV_FIX + 2);
+    return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255;
+}
+
+#ifndef USE_YUVj
+
+static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
+    const int luma = 16839 * r + 33059 * g + 6420 * b;
+    return (luma + rounding + (16 << YUV_FIX)) >> YUV_FIX; // no need to clip
+}
+
+static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
+    const int u = -9719 * r - 19081 * g + 28800 * b;
+    return VP8ClipUV(u, rounding);
+}
+
+static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
+    const int v = +28800 * r - 24116 * g - 4684 * b;
+    return VP8ClipUV(v, rounding);
+}
+
+#else
+
+// This JPEG-YUV colorspace, only for comparison!
+// These are also 16bit precision coefficients from Rec.601, but with full
+// [0..255] output range.
+static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
+    const int luma = 19595 * r + 38470 * g + 7471 * b;
+    return (luma + rounding) >> YUV_FIX; // no need to clip
+}
+
+static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
+    const int u = -11058 * r - 21710 * g + 32768 * b;
+    return VP8ClipUV(u, rounding);
+}
+
+static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
+    const int v = 32768 * r - 27439 * g - 5329 * b;
+    return VP8ClipUV(v, rounding);
+}
+
+#endif // USE_YUVj
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_DSP_YUV_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/yuv_mips32.c b/codec/L2/demos/webpEnc/host/src/dsp/yuv_mips32.c
new file mode 100644
index 0000000000..72bba67bc2
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/yuv_mips32.c
@@ -0,0 +1,102 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of YUV to RGB upsampling functions.
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+// Code is disabled for now, in favor of the plain-C version
+#if 0 // defined(WEBP_USE_MIPS32)
+
+#include "./yuv.h"
+
+//------------------------------------------------------------------------------
+// simple point-sampling
+
+#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A)                                                           \
+    static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) { \
+        int i, r, g, b;                                                                                  \
+        int temp0, temp1, temp2, temp3, temp4;                                                           \
+        for (i = 0; i < (len >> 1); i++) {                                                               \
+            temp1 = kVToR * v[0];                                                                        \
+            temp3 = kVToG * v[0];                                                                        \
+            temp2 = kUToG * u[0];                                                                        \
+            temp4 = kUToB * u[0];                                                                        \
+            temp0 = kYScale * y[0];                                                                      \
+            temp1 += kRCst;                                                                              \
+            temp3 -= kGCst;                                                                              \
+            temp2 += temp3;                                                                              \
+            temp4 += kBCst;                                                                              \
+            r = VP8Clip8(temp0 + temp1);                                                                 \
+            g = VP8Clip8(temp0 - temp2);                                                                 \
+            b = VP8Clip8(temp0 + temp4);                                                                 \
+            temp0 = kYScale * y[1];                                                                      \
+            dst[R] = r;                                                                                  \
+            dst[G] = g;                                                                                  \
+            dst[B] = b;                                                                                  \
+            if (A) dst[A] = 0xff;                                                                        \
+            r = VP8Clip8(temp0 + temp1);                                                                 \
+            g = VP8Clip8(temp0 - temp2);                                                                 \
+            b = VP8Clip8(temp0 + temp4);                                                                 \
+            dst[R + XSTEP] = r;                                                                          \
+            dst[G + XSTEP] = g;                                                                          \
+            dst[B + XSTEP] = b;                                                                          \
+            if (A) dst[A + XSTEP] = 0xff;                                                                \
+            y += 2;                                                                                      \
+            ++u;                                                                                         \
+            ++v;                                                                                         \
+            dst += 2 * XSTEP;                                                                            \
+        }                                                                                                \
+        if (len & 1) {                                                                                   \
+            temp1 = kVToR * v[0];                                                                        \
+            temp3 = kVToG * v[0];                                                                        \
+            temp2 = kUToG * u[0];                                                                        \
+            temp4 = kUToB * u[0];                                                                        \
+            temp0 = kYScale * y[0];                                                                      \
+            temp1 += kRCst;                                                                              \
+            temp3 -= kGCst;                                                                              \
+            temp2 += temp3;                                                                              \
+            temp4 += kBCst;                                                                              \
+            r = VP8Clip8(temp0 + temp1);                                                                 \
+            g = VP8Clip8(temp0 - temp2);                                                                 \
+            b = VP8Clip8(temp0 + temp4);                                                                 \
+            dst[R] = r;                                                                                  \
+            dst[G] = g;                                                                                  \
+            dst[B] = b;                                                                                  \
+            if (A) dst[A] = 0xff;                                                                        \
+        }                                                                                                \
+    }
+
+ROW_FUNC(YuvToRgbRow,      3, 0, 1, 2, 0)
+ROW_FUNC(YuvToRgbaRow,     4, 0, 1, 2, 3)
+ROW_FUNC(YuvToBgrRow,      3, 2, 1, 0, 0)
+ROW_FUNC(YuvToBgraRow,     4, 2, 1, 0, 3)
+
+#undef ROW_FUNC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitSamplersMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersMIPS32(void) {
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+}
+
+#else // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(WebPInitSamplersMIPS32)
+
+#endif // WEBP_USE_MIPS32
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/yuv_mips_dsp_r2.c b/codec/L2/demos/webpEnc/host/src/dsp/yuv_mips_dsp_r2.c
new file mode 100644
index 0000000000..9bbcc8f38d
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/yuv_mips_dsp_r2.c
@@ -0,0 +1,129 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS DSPr2 version of YUV to RGB upsampling functions.
+//
+// Author(s):  Branimir Vasic (branimir.vasic@imgtec.com)
+//             Djordje Pesut  (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+// Code is disabled for now, in favor of the plain-C version
+#if 0 // defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./yuv.h"
+
+//------------------------------------------------------------------------------
+// simple point-sampling
+
+#define ROW_FUNC_PART_1()                                               \
+    "lbu              %[temp3],   0(%[v])                         \n\t" \
+    "lbu              %[temp4],   0(%[u])                         \n\t" \
+    "lbu              %[temp0],   0(%[y])                         \n\t" \
+    "mul              %[temp1],   %[t_con_1],     %[temp3]        \n\t" \
+    "mul              %[temp3],   %[t_con_2],     %[temp3]        \n\t" \
+    "mul              %[temp2],   %[t_con_3],     %[temp4]        \n\t" \
+    "mul              %[temp4],   %[t_con_4],     %[temp4]        \n\t" \
+    "mul              %[temp0],   %[t_con_5],     %[temp0]        \n\t" \
+    "addu             %[temp1],   %[temp1],       %[t_con_6]      \n\t" \
+    "subu             %[temp3],   %[temp3],       %[t_con_7]      \n\t" \
+    "addu             %[temp2],   %[temp2],       %[temp3]        \n\t" \
+    "addu             %[temp4],   %[temp4],       %[t_con_8]      \n\t"
+
+#define ROW_FUNC_PART_2(R, G, B, K)                                       \
+    "addu             %[temp5],   %[temp0],       %[temp1]        \n\t"   \
+    "subu             %[temp6],   %[temp0],       %[temp2]        \n\t"   \
+    "addu             %[temp7],   %[temp0],       %[temp4]        \n\t"   \
+    ".if " #K                                                             \
+    "                                                     \n\t"           \
+    "lbu              %[temp0],   1(%[y])                         \n\t"   \
+    ".endif                                                         \n\t" \
+    "shll_s.w         %[temp5],   %[temp5],       9               \n\t"   \
+    "shll_s.w         %[temp6],   %[temp6],       9               \n\t"   \
+    ".if " #K                                                             \
+    "                                                     \n\t"           \
+    "mul              %[temp0],   %[t_con_5],     %[temp0]        \n\t"   \
+    ".endif                                                         \n\t" \
+    "shll_s.w         %[temp7],   %[temp7],       9               \n\t"   \
+    "precrqu_s.qb.ph  %[temp5],   %[temp5],       $zero           \n\t"   \
+    "precrqu_s.qb.ph  %[temp6],   %[temp6],       $zero           \n\t"   \
+    "precrqu_s.qb.ph  %[temp7],   %[temp7],       $zero           \n\t"   \
+    "srl              %[temp5],   %[temp5],       24              \n\t"   \
+    "srl              %[temp6],   %[temp6],       24              \n\t"   \
+    "srl              %[temp7],   %[temp7],       24              \n\t"   \
+    "sb               %[temp5],   " #R                                    \
+    "(%[dst])                  \n\t"                                      \
+    "sb               %[temp6],   " #G                                    \
+    "(%[dst])                  \n\t"                                      \
+    "sb               %[temp7],   " #B "(%[dst])                  \n\t"
+
+#define ASM_CLOBBER_LIST()                                               \
+  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),             \
+    [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),             \
+    [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)                                   \
+  : [t_con_1]"r"(t_con_1), [t_con_2]"r"(t_con_2), [t_con_3]"r"(t_con_3),       \
+    [t_con_4]"r"(t_con_4), [t_con_5]"r"(t_con_5), [t_con_6]"r"(t_con_6),       \
+    [u]"r"(u), [v]"r"(v), [y]"r"(y), [dst]"r"(dst),                            \
+    [t_con_7]"r"(t_con_7), [t_con_8]"r"(t_con_8)                               \
+  : "memory", "hi", "lo"
+
+#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A)                                                            \
+    static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) {  \
+        int i;                                                                                            \
+        uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;                                  \
+        const int t_con_1 = kVToR;                                                                        \
+        const int t_con_2 = kVToG;                                                                        \
+        const int t_con_3 = kUToG;                                                                        \
+        const int t_con_4 = kUToB;                                                                        \
+        const int t_con_5 = kYScale;                                                                      \
+        const int t_con_6 = kRCst;                                                                        \
+        const int t_con_7 = kGCst;                                                                        \
+        const int t_con_8 = kBCst;                                                                        \
+        for (i = 0; i < (len >> 1); i++) {                                                                \
+            __asm__ volatile(ROW_FUNC_PART_1() ROW_FUNC_PART_2(R, G, B, 1)                                \
+                                 ROW_FUNC_PART_2(R + XSTEP, G + XSTEP, B + XSTEP, 0) ASM_CLOBBER_LIST()); \
+            if (A) dst[A] = dst[A + XSTEP] = 0xff;                                                        \
+            y += 2;                                                                                       \
+            ++u;                                                                                          \
+            ++v;                                                                                          \
+            dst += 2 * XSTEP;                                                                             \
+        }                                                                                                 \
+        if (len & 1) {                                                                                    \
+            __asm__ volatile(ROW_FUNC_PART_1() ROW_FUNC_PART_2(R, G, B, 0) ASM_CLOBBER_LIST());           \
+            if (A) dst[A] = 0xff;                                                                         \
+        }                                                                                                 \
+    }
+
+ROW_FUNC(YuvToRgbRow,      3, 0, 1, 2, 0)
+ROW_FUNC(YuvToRgbaRow,     4, 0, 1, 2, 3)
+ROW_FUNC(YuvToBgrRow,      3, 2, 1, 0, 0)
+ROW_FUNC(YuvToBgraRow,     4, 2, 1, 0, 3)
+
+#undef ROW_FUNC
+#undef ASM_CLOBBER_LIST
+#undef ROW_FUNC_PART_2
+#undef ROW_FUNC_PART_1
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitSamplersMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersMIPSdspR2(void) {
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+}
+
+#else // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(WebPInitSamplersMIPSdspR2)
+
+#endif // WEBP_USE_MIPS_DSP_R2
diff --git a/codec/L2/demos/webpEnc/host/src/dsp/yuv_sse2.c b/codec/L2/demos/webpEnc/host/src/dsp/yuv_sse2.c
new file mode 100644
index 0000000000..aa8786df2f
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/dsp/yuv_sse2.c
@@ -0,0 +1,752 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV->RGB conversion functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./yuv.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <emmintrin.h>
+
+//-----------------------------------------------------------------------------
+// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
+
+// These constants are 14b fixed-point version of ITU-R BT.601 constants.
+// R = (19077 * y             + 26149 * v - 14234) >> 6
+// G = (19077 * y -  6419 * u - 13320 * v +  8708) >> 6
+// B = (19077 * y + 33050 * u             - 17685) >> 6
+static void ConvertYUV444ToRGB(const __m128i* const Y0,
+                               const __m128i* const U0,
+                               const __m128i* const V0,
+                               __m128i* const R,
+                               __m128i* const G,
+                               __m128i* const B) {
+    const __m128i k19077 = _mm_set1_epi16(19077);
+    const __m128i k26149 = _mm_set1_epi16(26149);
+    const __m128i k14234 = _mm_set1_epi16(14234);
+    const __m128i k33050 = _mm_set1_epi16(33050);
+    const __m128i k17685 = _mm_set1_epi16(17685);
+    const __m128i k6419 = _mm_set1_epi16(6419);
+    const __m128i k13320 = _mm_set1_epi16(13320);
+    const __m128i k8708 = _mm_set1_epi16(8708);
+
+    const __m128i Y1 = _mm_mulhi_epu16(*Y0, k19077);
+
+    const __m128i R0 = _mm_mulhi_epu16(*V0, k26149);
+    const __m128i R1 = _mm_sub_epi16(Y1, k14234);
+    const __m128i R2 = _mm_add_epi16(R1, R0);
+
+    const __m128i G0 = _mm_mulhi_epu16(*U0, k6419);
+    const __m128i G1 = _mm_mulhi_epu16(*V0, k13320);
+    const __m128i G2 = _mm_add_epi16(Y1, k8708);
+    const __m128i G3 = _mm_add_epi16(G0, G1);
+    const __m128i G4 = _mm_sub_epi16(G2, G3);
+
+    // be careful with the saturated *unsigned* arithmetic here!
+    const __m128i B0 = _mm_mulhi_epu16(*U0, k33050);
+    const __m128i B1 = _mm_adds_epu16(B0, Y1);
+    const __m128i B2 = _mm_subs_epu16(B1, k17685);
+
+    // use logical shift for B2, which can be larger than 32767
+    *R = _mm_srai_epi16(R2, 6); // range: [-14234, 30815]
+    *G = _mm_srai_epi16(G4, 6); // range: [-10953, 27710]
+    *B = _mm_srli_epi16(B2, 6); // range: [0, 34238]
+}
+
+// Load the bytes into the *upper* part of 16b words. That's "<< 8", basically.
+static WEBP_INLINE __m128i Load_HI_16(const uint8_t* src) {
+    const __m128i zero = _mm_setzero_si128();
+    return _mm_unpacklo_epi8(zero, _mm_loadl_epi64((const __m128i*)src));
+}
+
+// Load and replicate the U/V samples
+static WEBP_INLINE __m128i Load_UV_HI_8(const uint8_t* src) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
+    const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
+    return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples
+}
+
+// Convert 32 samples of YUV444 to R/G/B
+static void YUV444ToRGB(const uint8_t* const y,
+                        const uint8_t* const u,
+                        const uint8_t* const v,
+                        __m128i* const R,
+                        __m128i* const G,
+                        __m128i* const B) {
+    const __m128i Y0 = Load_HI_16(y), U0 = Load_HI_16(u), V0 = Load_HI_16(v);
+    ConvertYUV444ToRGB(&Y0, &U0, &V0, R, G, B);
+}
+
+// Convert 32 samples of YUV420 to R/G/B
+static void YUV420ToRGB(const uint8_t* const y,
+                        const uint8_t* const u,
+                        const uint8_t* const v,
+                        __m128i* const R,
+                        __m128i* const G,
+                        __m128i* const B) {
+    const __m128i Y0 = Load_HI_16(y), U0 = Load_UV_HI_8(u), V0 = Load_UV_HI_8(v);
+    ConvertYUV444ToRGB(&Y0, &U0, &V0, R, G, B);
+}
+
+// Pack R/G/B/A results into 32b output.
+static WEBP_INLINE void PackAndStore4(const __m128i* const R,
+                                      const __m128i* const G,
+                                      const __m128i* const B,
+                                      const __m128i* const A,
+                                      uint8_t* const dst) {
+    const __m128i rb = _mm_packus_epi16(*R, *B);
+    const __m128i ga = _mm_packus_epi16(*G, *A);
+    const __m128i rg = _mm_unpacklo_epi8(rb, ga);
+    const __m128i ba = _mm_unpackhi_epi8(rb, ga);
+    const __m128i RGBA_lo = _mm_unpacklo_epi16(rg, ba);
+    const __m128i RGBA_hi = _mm_unpackhi_epi16(rg, ba);
+    _mm_storeu_si128((__m128i*)(dst + 0), RGBA_lo);
+    _mm_storeu_si128((__m128i*)(dst + 16), RGBA_hi);
+}
+
+// Pack R/G/B/A results into 16b output.
+static WEBP_INLINE void PackAndStore4444(const __m128i* const R,
+                                         const __m128i* const G,
+                                         const __m128i* const B,
+                                         const __m128i* const A,
+                                         uint8_t* const dst) {
+#if !defined(WEBP_SWAP_16BIT_CSP)
+    const __m128i rg0 = _mm_packus_epi16(*R, *G);
+    const __m128i ba0 = _mm_packus_epi16(*B, *A);
+#else
+    const __m128i rg0 = _mm_packus_epi16(*B, *A);
+    const __m128i ba0 = _mm_packus_epi16(*R, *G);
+#endif
+    const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
+    const __m128i rb1 = _mm_unpacklo_epi8(rg0, ba0); // rbrbrbrbrb...
+    const __m128i ga1 = _mm_unpackhi_epi8(rg0, ba0); // gagagagaga...
+    const __m128i rb2 = _mm_and_si128(rb1, mask_0xf0);
+    const __m128i ga2 = _mm_srli_epi16(_mm_and_si128(ga1, mask_0xf0), 4);
+    const __m128i rgba4444 = _mm_or_si128(rb2, ga2);
+    _mm_storeu_si128((__m128i*)dst, rgba4444);
+}
+
+// Pack R/G/B results into 16b output.
+static WEBP_INLINE void PackAndStore565(const __m128i* const R,
+                                        const __m128i* const G,
+                                        const __m128i* const B,
+                                        uint8_t* const dst) {
+    const __m128i r0 = _mm_packus_epi16(*R, *R);
+    const __m128i g0 = _mm_packus_epi16(*G, *G);
+    const __m128i b0 = _mm_packus_epi16(*B, *B);
+    const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8(0xf8));
+    const __m128i b1 = _mm_and_si128(_mm_srli_epi16(b0, 3), _mm_set1_epi8(0x1f));
+    const __m128i g1 = _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0xe0)), 5);
+    const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3);
+    const __m128i rg = _mm_or_si128(r1, g1);
+    const __m128i gb = _mm_or_si128(g2, b1);
+#if !defined(WEBP_SWAP_16BIT_CSP)
+    const __m128i rgb565 = _mm_unpacklo_epi8(rg, gb);
+#else
+    const __m128i rgb565 = _mm_unpacklo_epi8(gb, rg);
+#endif
+    _mm_storeu_si128((__m128i*)dst, rgb565);
+}
+
+// Function used several times in PlanarTo24b.
+// It samples the in buffer as follows: one every two unsigned char is stored
+// at the beginning of the buffer, while the other half is stored at the end.
+static WEBP_INLINE void PlanarTo24bHelper(const __m128i* const in /*in[6]*/, __m128i* const out /*out[6]*/) {
+    const __m128i v_mask = _mm_set1_epi16(0x00ff);
+
+    // Take one every two upper 8b values.
+    out[0] = _mm_packus_epi16(_mm_and_si128(in[0], v_mask), _mm_and_si128(in[1], v_mask));
+    out[1] = _mm_packus_epi16(_mm_and_si128(in[2], v_mask), _mm_and_si128(in[3], v_mask));
+    out[2] = _mm_packus_epi16(_mm_and_si128(in[4], v_mask), _mm_and_si128(in[5], v_mask));
+    // Take one every two lower 8b values.
+    out[3] = _mm_packus_epi16(_mm_srli_epi16(in[0], 8), _mm_srli_epi16(in[1], 8));
+    out[4] = _mm_packus_epi16(_mm_srli_epi16(in[2], 8), _mm_srli_epi16(in[3], 8));
+    out[5] = _mm_packus_epi16(_mm_srli_epi16(in[4], 8), _mm_srli_epi16(in[5], 8));
+}
+
+// Pack the planar buffers
+// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
+// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
+static WEBP_INLINE void PlanarTo24b(__m128i* const in /*in[6]*/, uint8_t* rgb) {
+    // The input is 6 registers of sixteen 8b but for the sake of explanation,
+    // let's take 6 registers of four 8b values.
+    // To pack, we will keep taking one every two 8b integer and move it
+    // around as follows:
+    // Input:
+    //   r0r1r2r3 | r4r5r6r7 | g0g1g2g3 | g4g5g6g7 | b0b1b2b3 | b4b5b6b7
+    // Split the 6 registers in two sets of 3 registers: the first set as the even
+    // 8b bytes, the second the odd ones:
+    //   r0r2r4r6 | g0g2g4g6 | b0b2b4b6 | r1r3r5r7 | g1g3g5g7 | b1b3b5b7
+    // Repeat the same permutations twice more:
+    //   r0r4g0g4 | b0b4r1r5 | g1g5b1b5 | r2r6g2g6 | b2b6r3r7 | g3g7b3b7
+    //   r0g0b0r1 | g1b1r2g2 | b2r3g3b3 | r4g4b4r5 | g5b5r6g6 | b6r7g7b7
+    __m128i tmp[6];
+    PlanarTo24bHelper(in, tmp);
+    PlanarTo24bHelper(tmp, in);
+    PlanarTo24bHelper(in, tmp);
+    // We need to do it two more times than the example as we have sixteen bytes.
+    PlanarTo24bHelper(tmp, in);
+    PlanarTo24bHelper(in, tmp);
+
+    _mm_storeu_si128((__m128i*)(rgb + 0), tmp[0]);
+    _mm_storeu_si128((__m128i*)(rgb + 16), tmp[1]);
+    _mm_storeu_si128((__m128i*)(rgb + 32), tmp[2]);
+    _mm_storeu_si128((__m128i*)(rgb + 48), tmp[3]);
+    _mm_storeu_si128((__m128i*)(rgb + 64), tmp[4]);
+    _mm_storeu_si128((__m128i*)(rgb + 80), tmp[5]);
+}
+#undef MK_UINT32
+
+void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst) {
+    const __m128i kAlpha = _mm_set1_epi16(255);
+    int n;
+    for (n = 0; n < 32; n += 8, dst += 32) {
+        __m128i R, G, B;
+        YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
+        PackAndStore4(&R, &G, &B, &kAlpha, dst);
+    }
+}
+
+void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst) {
+    const __m128i kAlpha = _mm_set1_epi16(255);
+    int n;
+    for (n = 0; n < 32; n += 8, dst += 32) {
+        __m128i R, G, B;
+        YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
+        PackAndStore4(&B, &G, &R, &kAlpha, dst);
+    }
+}
+
+void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst) {
+    const __m128i kAlpha = _mm_set1_epi16(255);
+    int n;
+    for (n = 0; n < 32; n += 8, dst += 32) {
+        __m128i R, G, B;
+        YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
+        PackAndStore4(&kAlpha, &R, &G, &B, dst);
+    }
+}
+
+void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst) {
+    const __m128i kAlpha = _mm_set1_epi16(255);
+    int n;
+    for (n = 0; n < 32; n += 8, dst += 16) {
+        __m128i R, G, B;
+        YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
+        PackAndStore4444(&R, &G, &B, &kAlpha, dst);
+    }
+}
+
+void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst) {
+    int n;
+    for (n = 0; n < 32; n += 8, dst += 16) {
+        __m128i R, G, B;
+        YUV444ToRGB(y + n, u + n, v + n, &R, &G, &B);
+        PackAndStore565(&R, &G, &B, dst);
+    }
+}
+
+void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst) {
+    __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
+    __m128i rgb[6];
+
+    YUV444ToRGB(y + 0, u + 0, v + 0, &R0, &G0, &B0);
+    YUV444ToRGB(y + 8, u + 8, v + 8, &R1, &G1, &B1);
+    YUV444ToRGB(y + 16, u + 16, v + 16, &R2, &G2, &B2);
+    YUV444ToRGB(y + 24, u + 24, v + 24, &R3, &G3, &B3);
+
+    // Cast to 8b and store as RRRRGGGGBBBB.
+    rgb[0] = _mm_packus_epi16(R0, R1);
+    rgb[1] = _mm_packus_epi16(R2, R3);
+    rgb[2] = _mm_packus_epi16(G0, G1);
+    rgb[3] = _mm_packus_epi16(G2, G3);
+    rgb[4] = _mm_packus_epi16(B0, B1);
+    rgb[5] = _mm_packus_epi16(B2, B3);
+
+    // Pack as RGBRGBRGBRGB.
+    PlanarTo24b(rgb, dst);
+}
+
+void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst) {
+    __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
+    __m128i bgr[6];
+
+    YUV444ToRGB(y + 0, u + 0, v + 0, &R0, &G0, &B0);
+    YUV444ToRGB(y + 8, u + 8, v + 8, &R1, &G1, &B1);
+    YUV444ToRGB(y + 16, u + 16, v + 16, &R2, &G2, &B2);
+    YUV444ToRGB(y + 24, u + 24, v + 24, &R3, &G3, &B3);
+
+    // Cast to 8b and store as BBBBGGGGRRRR.
+    bgr[0] = _mm_packus_epi16(B0, B1);
+    bgr[1] = _mm_packus_epi16(B2, B3);
+    bgr[2] = _mm_packus_epi16(G0, G1);
+    bgr[3] = _mm_packus_epi16(G2, G3);
+    bgr[4] = _mm_packus_epi16(R0, R1);
+    bgr[5] = _mm_packus_epi16(R2, R3);
+
+    // Pack as BGRBGRBGRBGR.
+    PlanarTo24b(bgr, dst);
+}
+
+//-----------------------------------------------------------------------------
+// Arbitrary-length row conversion functions
+
+static void YuvToRgbaRow(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) {
+    const __m128i kAlpha = _mm_set1_epi16(255);
+    int n;
+    for (n = 0; n + 8 <= len; n += 8, dst += 32) {
+        __m128i R, G, B;
+        YUV420ToRGB(y, u, v, &R, &G, &B);
+        PackAndStore4(&R, &G, &B, &kAlpha, dst);
+        y += 8;
+        u += 4;
+        v += 4;
+    }
+    for (; n < len; ++n) { // Finish off
+        VP8YuvToRgba(y[0], u[0], v[0], dst);
+        dst += 4;
+        y += 1;
+        u += (n & 1);
+        v += (n & 1);
+    }
+}
+
+static void YuvToBgraRow(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) {
+    const __m128i kAlpha = _mm_set1_epi16(255);
+    int n;
+    for (n = 0; n + 8 <= len; n += 8, dst += 32) {
+        __m128i R, G, B;
+        YUV420ToRGB(y, u, v, &R, &G, &B);
+        PackAndStore4(&B, &G, &R, &kAlpha, dst);
+        y += 8;
+        u += 4;
+        v += 4;
+    }
+    for (; n < len; ++n) { // Finish off
+        VP8YuvToBgra(y[0], u[0], v[0], dst);
+        dst += 4;
+        y += 1;
+        u += (n & 1);
+        v += (n & 1);
+    }
+}
+
+static void YuvToArgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) {
+    const __m128i kAlpha = _mm_set1_epi16(255);
+    int n;
+    for (n = 0; n + 8 <= len; n += 8, dst += 32) {
+        __m128i R, G, B;
+        YUV420ToRGB(y, u, v, &R, &G, &B);
+        PackAndStore4(&kAlpha, &R, &G, &B, dst);
+        y += 8;
+        u += 4;
+        v += 4;
+    }
+    for (; n < len; ++n) { // Finish off
+        VP8YuvToArgb(y[0], u[0], v[0], dst);
+        dst += 4;
+        y += 1;
+        u += (n & 1);
+        v += (n & 1);
+    }
+}
+
+static void YuvToRgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) {
+    int n;
+    for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
+        __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
+        __m128i rgb[6];
+
+        YUV420ToRGB(y + 0, u + 0, v + 0, &R0, &G0, &B0);
+        YUV420ToRGB(y + 8, u + 4, v + 4, &R1, &G1, &B1);
+        YUV420ToRGB(y + 16, u + 8, v + 8, &R2, &G2, &B2);
+        YUV420ToRGB(y + 24, u + 12, v + 12, &R3, &G3, &B3);
+
+        // Cast to 8b and store as RRRRGGGGBBBB.
+        rgb[0] = _mm_packus_epi16(R0, R1);
+        rgb[1] = _mm_packus_epi16(R2, R3);
+        rgb[2] = _mm_packus_epi16(G0, G1);
+        rgb[3] = _mm_packus_epi16(G2, G3);
+        rgb[4] = _mm_packus_epi16(B0, B1);
+        rgb[5] = _mm_packus_epi16(B2, B3);
+
+        // Pack as RGBRGBRGBRGB.
+        PlanarTo24b(rgb, dst);
+
+        y += 32;
+        u += 16;
+        v += 16;
+    }
+    for (; n < len; ++n) { // Finish off
+        VP8YuvToRgb(y[0], u[0], v[0], dst);
+        dst += 3;
+        y += 1;
+        u += (n & 1);
+        v += (n & 1);
+    }
+}
+
+static void YuvToBgrRow(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len) {
+    int n;
+    for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
+        __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
+        __m128i bgr[6];
+
+        YUV420ToRGB(y + 0, u + 0, v + 0, &R0, &G0, &B0);
+        YUV420ToRGB(y + 8, u + 4, v + 4, &R1, &G1, &B1);
+        YUV420ToRGB(y + 16, u + 8, v + 8, &R2, &G2, &B2);
+        YUV420ToRGB(y + 24, u + 12, v + 12, &R3, &G3, &B3);
+
+        // Cast to 8b and store as BBBBGGGGRRRR.
+        bgr[0] = _mm_packus_epi16(B0, B1);
+        bgr[1] = _mm_packus_epi16(B2, B3);
+        bgr[2] = _mm_packus_epi16(G0, G1);
+        bgr[3] = _mm_packus_epi16(G2, G3);
+        bgr[4] = _mm_packus_epi16(R0, R1);
+        bgr[5] = _mm_packus_epi16(R2, R3);
+
+        // Pack as BGRBGRBGRBGR.
+        PlanarTo24b(bgr, dst);
+
+        y += 32;
+        u += 16;
+        v += 16;
+    }
+    for (; n < len; ++n) { // Finish off
+        VP8YuvToBgr(y[0], u[0], v[0], dst);
+        dst += 3;
+        y += 1;
+        u += (n & 1);
+        v += (n & 1);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitSamplersSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE2(void) {
+    WebPSamplers[MODE_RGB] = YuvToRgbRow;
+    WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
+    WebPSamplers[MODE_BGR] = YuvToBgrRow;
+    WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+    WebPSamplers[MODE_ARGB] = YuvToArgbRow;
+}
+
+//------------------------------------------------------------------------------
+// RGB24/32 -> YUV converters
+
+// Load eight 16b-words from *src.
+#define LOAD_16(src) _mm_loadu_si128((const __m128i*)(src))
+// Store either 16b-words into *dst
+#define STORE_16(V, dst) _mm_storeu_si128((__m128i*)(dst), (V))
+
+// Function that inserts a value of the second half of the in buffer in between
+// every two char of the first half.
+static WEBP_INLINE void RGB24PackedToPlanarHelper(const __m128i* const in /*in[6]*/, __m128i* const out /*out[6]*/) {
+    out[0] = _mm_unpacklo_epi8(in[0], in[3]);
+    out[1] = _mm_unpackhi_epi8(in[0], in[3]);
+    out[2] = _mm_unpacklo_epi8(in[1], in[4]);
+    out[3] = _mm_unpackhi_epi8(in[1], in[4]);
+    out[4] = _mm_unpacklo_epi8(in[2], in[5]);
+    out[5] = _mm_unpackhi_epi8(in[2], in[5]);
+}
+
+// Unpack the 8b input rgbrgbrgbrgb ... as contiguous registers:
+// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
+// Similar to PlanarTo24bHelper(), but in reverse order.
+static WEBP_INLINE void RGB24PackedToPlanar(const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
+    __m128i tmp[6];
+    tmp[0] = _mm_loadu_si128((const __m128i*)(rgb + 0));
+    tmp[1] = _mm_loadu_si128((const __m128i*)(rgb + 16));
+    tmp[2] = _mm_loadu_si128((const __m128i*)(rgb + 32));
+    tmp[3] = _mm_loadu_si128((const __m128i*)(rgb + 48));
+    tmp[4] = _mm_loadu_si128((const __m128i*)(rgb + 64));
+    tmp[5] = _mm_loadu_si128((const __m128i*)(rgb + 80));
+
+    RGB24PackedToPlanarHelper(tmp, out);
+    RGB24PackedToPlanarHelper(out, tmp);
+    RGB24PackedToPlanarHelper(tmp, out);
+    RGB24PackedToPlanarHelper(out, tmp);
+    RGB24PackedToPlanarHelper(tmp, out);
+}
+
+// Convert 8 packed ARGB to r[], g[], b[]
+static WEBP_INLINE void RGB32PackedToPlanar(const uint32_t* const argb,
+                                            __m128i* const r,
+                                            __m128i* const g,
+                                            __m128i* const b) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i in0 = LOAD_16(argb + 0); // argb3 | argb2 | argb1 | argb0
+    const __m128i in1 = LOAD_16(argb + 4); // argb7 | argb6 | argb5 | argb4
+    // column-wise transpose
+    const __m128i A0 = _mm_unpacklo_epi8(in0, in1);
+    const __m128i A1 = _mm_unpackhi_epi8(in0, in1);
+    const __m128i B0 = _mm_unpacklo_epi8(A0, A1);
+    const __m128i B1 = _mm_unpackhi_epi8(A0, A1);
+    // C0 = g7 g6 ... g1 g0 | b7 b6 ... b1 b0
+    // C1 = a7 a6 ... a1 a0 | r7 r6 ... r1 r0
+    const __m128i C0 = _mm_unpacklo_epi8(B0, B1);
+    const __m128i C1 = _mm_unpackhi_epi8(B0, B1);
+    // store 16b
+    *r = _mm_unpacklo_epi8(C1, zero);
+    *g = _mm_unpackhi_epi8(C0, zero);
+    *b = _mm_unpacklo_epi8(C0, zero);
+}
+
+// This macro computes (RG * MULT_RG + GB * MULT_GB + ROUNDER) >> DESCALE_FIX
+// It's a macro and not a function because we need to use immediate values with
+// srai_epi32, e.g.
+#define TRANSFORM(RG_LO, RG_HI, GB_LO, GB_HI, MULT_RG, MULT_GB, ROUNDER, DESCALE_FIX, OUT) \
+    do {                                                                                   \
+        const __m128i V0_lo = _mm_madd_epi16(RG_LO, MULT_RG);                              \
+        const __m128i V0_hi = _mm_madd_epi16(RG_HI, MULT_RG);                              \
+        const __m128i V1_lo = _mm_madd_epi16(GB_LO, MULT_GB);                              \
+        const __m128i V1_hi = _mm_madd_epi16(GB_HI, MULT_GB);                              \
+        const __m128i V2_lo = _mm_add_epi32(V0_lo, V1_lo);                                 \
+        const __m128i V2_hi = _mm_add_epi32(V0_hi, V1_hi);                                 \
+        const __m128i V3_lo = _mm_add_epi32(V2_lo, ROUNDER);                               \
+        const __m128i V3_hi = _mm_add_epi32(V2_hi, ROUNDER);                               \
+        const __m128i V5_lo = _mm_srai_epi32(V3_lo, DESCALE_FIX);                          \
+        const __m128i V5_hi = _mm_srai_epi32(V3_hi, DESCALE_FIX);                          \
+        (OUT) = _mm_packs_epi32(V5_lo, V5_hi);                                             \
+    } while (0)
+
+#define MK_CST_16(A, B) _mm_set_epi16((B), (A), (B), (A), (B), (A), (B), (A))
+static WEBP_INLINE void ConvertRGBToY(const __m128i* const R,
+                                      const __m128i* const G,
+                                      const __m128i* const B,
+                                      __m128i* const Y) {
+    const __m128i kRG_y = MK_CST_16(16839, 33059 - 16384);
+    const __m128i kGB_y = MK_CST_16(16384, 6420);
+    const __m128i kHALF_Y = _mm_set1_epi32((16 << YUV_FIX) + YUV_HALF);
+
+    const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G);
+    const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G);
+    const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B);
+    const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B);
+    TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_y, kGB_y, kHALF_Y, YUV_FIX, *Y);
+}
+
+static WEBP_INLINE void ConvertRGBToUV(
+    const __m128i* const R, const __m128i* const G, const __m128i* const B, __m128i* const U, __m128i* const V) {
+    const __m128i kRG_u = MK_CST_16(-9719, -19081);
+    const __m128i kGB_u = MK_CST_16(0, 28800);
+    const __m128i kRG_v = MK_CST_16(28800, 0);
+    const __m128i kGB_v = MK_CST_16(-24116, -4684);
+    const __m128i kHALF_UV = _mm_set1_epi32(((128 << YUV_FIX) + YUV_HALF) << 2);
+
+    const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G);
+    const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G);
+    const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B);
+    const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B);
+    TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_u, kGB_u, kHALF_UV, YUV_FIX + 2, *U);
+    TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_v, kGB_v, kHALF_UV, YUV_FIX + 2, *V);
+}
+
+#undef MK_CST_16
+#undef TRANSFORM
+
+static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
+    const int max_width = width & ~31;
+    int i;
+    for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
+        __m128i rgb_plane[6];
+        int j;
+
+        RGB24PackedToPlanar(rgb, rgb_plane);
+
+        for (j = 0; j < 2; ++j, i += 16) {
+            const __m128i zero = _mm_setzero_si128();
+            __m128i r, g, b, Y0, Y1;
+
+            // Convert to 16-bit Y.
+            r = _mm_unpacklo_epi8(rgb_plane[0 + j], zero);
+            g = _mm_unpacklo_epi8(rgb_plane[2 + j], zero);
+            b = _mm_unpacklo_epi8(rgb_plane[4 + j], zero);
+            ConvertRGBToY(&r, &g, &b, &Y0);
+
+            // Convert to 16-bit Y.
+            r = _mm_unpackhi_epi8(rgb_plane[0 + j], zero);
+            g = _mm_unpackhi_epi8(rgb_plane[2 + j], zero);
+            b = _mm_unpackhi_epi8(rgb_plane[4 + j], zero);
+            ConvertRGBToY(&r, &g, &b, &Y1);
+
+            // Cast to 8-bit and store.
+            STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
+        }
+    }
+    for (; i < width; ++i, rgb += 3) { // left-over
+        y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
+    }
+}
+
+static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
+    const int max_width = width & ~31;
+    int i;
+    for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
+        __m128i bgr_plane[6];
+        int j;
+
+        RGB24PackedToPlanar(bgr, bgr_plane);
+
+        for (j = 0; j < 2; ++j, i += 16) {
+            const __m128i zero = _mm_setzero_si128();
+            __m128i r, g, b, Y0, Y1;
+
+            // Convert to 16-bit Y.
+            b = _mm_unpacklo_epi8(bgr_plane[0 + j], zero);
+            g = _mm_unpacklo_epi8(bgr_plane[2 + j], zero);
+            r = _mm_unpacklo_epi8(bgr_plane[4 + j], zero);
+            ConvertRGBToY(&r, &g, &b, &Y0);
+
+            // Convert to 16-bit Y.
+            b = _mm_unpackhi_epi8(bgr_plane[0 + j], zero);
+            g = _mm_unpackhi_epi8(bgr_plane[2 + j], zero);
+            r = _mm_unpackhi_epi8(bgr_plane[4 + j], zero);
+            ConvertRGBToY(&r, &g, &b, &Y1);
+
+            // Cast to 8-bit and store.
+            STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
+        }
+    }
+    for (; i < width; ++i, bgr += 3) { // left-over
+        y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
+    }
+}
+
+static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
+    const int max_width = width & ~15;
+    int i;
+    for (i = 0; i < max_width; i += 16) {
+        __m128i r, g, b, Y0, Y1;
+        RGB32PackedToPlanar(&argb[i + 0], &r, &g, &b);
+        ConvertRGBToY(&r, &g, &b, &Y0);
+        RGB32PackedToPlanar(&argb[i + 8], &r, &g, &b);
+        ConvertRGBToY(&r, &g, &b, &Y1);
+        STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
+    }
+    for (; i < width; ++i) { // left-over
+        const uint32_t p = argb[i];
+        y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff, YUV_HALF);
+    }
+}
+
+// Horizontal add (doubled) of two 16b values, result is 16b.
+// in: A | B | C | D | ... -> out: 2*(A+B) | 2*(C+D) | ...
+static void HorizontalAddPack(const __m128i* const A, const __m128i* const B, __m128i* const out) {
+    const __m128i k2 = _mm_set1_epi16(2);
+    const __m128i C = _mm_madd_epi16(*A, k2);
+    const __m128i D = _mm_madd_epi16(*B, k2);
+    *out = _mm_packs_epi32(C, D);
+}
+
+static void ConvertARGBToUV(const uint32_t* argb, uint8_t* u, uint8_t* v, int src_width, int do_store) {
+    const int max_width = src_width & ~31;
+    int i;
+    for (i = 0; i < max_width; i += 32, u += 16, v += 16) {
+        __m128i r0, g0, b0, r1, g1, b1, U0, V0, U1, V1;
+        RGB32PackedToPlanar(&argb[i + 0], &r0, &g0, &b0);
+        RGB32PackedToPlanar(&argb[i + 8], &r1, &g1, &b1);
+        HorizontalAddPack(&r0, &r1, &r0);
+        HorizontalAddPack(&g0, &g1, &g0);
+        HorizontalAddPack(&b0, &b1, &b0);
+        ConvertRGBToUV(&r0, &g0, &b0, &U0, &V0);
+
+        RGB32PackedToPlanar(&argb[i + 16], &r0, &g0, &b0);
+        RGB32PackedToPlanar(&argb[i + 24], &r1, &g1, &b1);
+        HorizontalAddPack(&r0, &r1, &r0);
+        HorizontalAddPack(&g0, &g1, &g0);
+        HorizontalAddPack(&b0, &b1, &b0);
+        ConvertRGBToUV(&r0, &g0, &b0, &U1, &V1);
+
+        U0 = _mm_packus_epi16(U0, U1);
+        V0 = _mm_packus_epi16(V0, V1);
+        if (!do_store) {
+            const __m128i prev_u = LOAD_16(u);
+            const __m128i prev_v = LOAD_16(v);
+            U0 = _mm_avg_epu8(U0, prev_u);
+            V0 = _mm_avg_epu8(V0, prev_v);
+        }
+        STORE_16(U0, u);
+        STORE_16(V0, v);
+    }
+    if (i < src_width) { // left-over
+        WebPConvertARGBToUV_C(argb + i, u, v, src_width - i, do_store);
+    }
+}
+
+// Convert 16 packed ARGB 16b-values to r[], g[], b[]
+static WEBP_INLINE void RGBA32PackedToPlanar_16b(const uint16_t* const rgbx,
+                                                 __m128i* const r,
+                                                 __m128i* const g,
+                                                 __m128i* const b) {
+    const __m128i in0 = LOAD_16(rgbx + 0);  // r0 | g0 | b0 |x| r1 | g1 | b1 |x
+    const __m128i in1 = LOAD_16(rgbx + 8);  // r2 | g2 | b2 |x| r3 | g3 | b3 |x
+    const __m128i in2 = LOAD_16(rgbx + 16); // r4 | ...
+    const __m128i in3 = LOAD_16(rgbx + 24); // r6 | ...
+    // column-wise transpose
+    const __m128i A0 = _mm_unpacklo_epi16(in0, in1);
+    const __m128i A1 = _mm_unpackhi_epi16(in0, in1);
+    const __m128i A2 = _mm_unpacklo_epi16(in2, in3);
+    const __m128i A3 = _mm_unpackhi_epi16(in2, in3);
+    const __m128i B0 = _mm_unpacklo_epi16(A0, A1); // r0 r1 r2 r3 | g0 g1 ..
+    const __m128i B1 = _mm_unpackhi_epi16(A0, A1); // b0 b1 b2 b3 | x x x x
+    const __m128i B2 = _mm_unpacklo_epi16(A2, A3); // r4 r5 r6 r7 | g4 g5 ..
+    const __m128i B3 = _mm_unpackhi_epi16(A2, A3); // b4 b5 b6 b7 | x x x x
+    *r = _mm_unpacklo_epi64(B0, B2);
+    *g = _mm_unpackhi_epi64(B0, B2);
+    *b = _mm_unpacklo_epi64(B1, B3);
+}
+
+static void ConvertRGBA32ToUV(const uint16_t* rgb, uint8_t* u, uint8_t* v, int width) {
+    const int max_width = width & ~15;
+    const uint16_t* const last_rgb = rgb + 4 * max_width;
+    while (rgb < last_rgb) {
+        __m128i r, g, b, U0, V0, U1, V1;
+        RGBA32PackedToPlanar_16b(rgb + 0, &r, &g, &b);
+        ConvertRGBToUV(&r, &g, &b, &U0, &V0);
+        RGBA32PackedToPlanar_16b(rgb + 32, &r, &g, &b);
+        ConvertRGBToUV(&r, &g, &b, &U1, &V1);
+        STORE_16(_mm_packus_epi16(U0, U1), u);
+        STORE_16(_mm_packus_epi16(V0, V1), v);
+        u += 16;
+        v += 16;
+        rgb += 2 * 32;
+    }
+    if (max_width < width) { // left-over
+        WebPConvertRGBA32ToUV_C(rgb, u, v, width - max_width);
+    }
+}
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitConvertARGBToYUVSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
+    WebPConvertARGBToY = ConvertARGBToY;
+    WebPConvertARGBToUV = ConvertARGBToUV;
+
+    WebPConvertRGB24ToY = ConvertRGB24ToY;
+    WebPConvertBGR24ToY = ConvertBGR24ToY;
+
+    WebPConvertRGBA32ToUV = ConvertRGBA32ToUV;
+}
+
+#else // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
+WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
+
+#endif // WEBP_USE_SSE2
diff --git a/codec/L2/demos/webpEnc/host/src/enc/Makefile.am b/codec/L2/demos/webpEnc/host/src/enc/Makefile.am
new file mode 100644
index 0000000000..9d388e62ff
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/Makefile.am
@@ -0,0 +1,41 @@
+noinst_LTLIBRARIES = libwebpencode.la
+
+libwebpencode_la_SOURCES =
+libwebpencode_la_SOURCES += alpha.c
+libwebpencode_la_SOURCES += analysis.c
+libwebpencode_la_SOURCES += backward_references.c
+libwebpencode_la_SOURCES += backward_references.h
+libwebpencode_la_SOURCES += config.c
+libwebpencode_la_SOURCES += cost.c
+libwebpencode_la_SOURCES += cost.h
+libwebpencode_la_SOURCES += delta_palettization.c
+libwebpencode_la_SOURCES += delta_palettization.h
+libwebpencode_la_SOURCES += filter.c
+libwebpencode_la_SOURCES += frame.c
+libwebpencode_la_SOURCES += histogram.c
+libwebpencode_la_SOURCES += histogram.h
+libwebpencode_la_SOURCES += iterator.c
+libwebpencode_la_SOURCES += near_lossless.c
+libwebpencode_la_SOURCES += picture.c
+libwebpencode_la_SOURCES += picture_csp.c
+libwebpencode_la_SOURCES += picture_psnr.c
+libwebpencode_la_SOURCES += picture_rescale.c
+libwebpencode_la_SOURCES += picture_tools.c
+libwebpencode_la_SOURCES += quant.c
+libwebpencode_la_SOURCES += syntax.c
+libwebpencode_la_SOURCES += token.c
+libwebpencode_la_SOURCES += tree.c
+libwebpencode_la_SOURCES += vp8enci.h
+libwebpencode_la_SOURCES += vp8l.c
+libwebpencode_la_SOURCES += vp8li.h
+libwebpencode_la_SOURCES += webpenc.c
+
+libwebpencodeinclude_HEADERS =
+libwebpencodeinclude_HEADERS += ../webp/encode.h
+libwebpencodeinclude_HEADERS += ../webp/types.h
+noinst_HEADERS =
+noinst_HEADERS += ../webp/format_constants.h
+
+libwebpencode_la_LDFLAGS = -lm
+libwebpencode_la_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
+libwebpencodeincludedir = $(includedir)/webp
diff --git a/codec/L2/demos/webpEnc/host/src/enc/alpha.c b/codec/L2/demos/webpEnc/host/src/enc/alpha.c
new file mode 100644
index 0000000000..94a35cb6a9
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/alpha.c
@@ -0,0 +1,434 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha-plane compression.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../dsp/dsp.h"
+#include "../utils/filters.h"
+#include "../utils/quant_levels.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+// -----------------------------------------------------------------------------
+// Encodes the given alpha data via specified compression method 'method'.
+// The pre-processing (quantization) is performed if 'quality' is less than 100.
+// For such cases, the encoding is lossy. The valid range is [0, 100] for
+// 'quality' and [0, 1] for 'method':
+//   'method = 0' - No compression;
+//   'method = 1' - Use lossless coder on the alpha plane only
+// 'filter' values [0, 4] correspond to prediction modes none, horizontal,
+// vertical & gradient filters. The prediction mode 4 will try all the
+// prediction modes 0 to 3 and pick the best one.
+// 'effort_level': specifies how much effort must be spent to try and reduce
+//  the compressed output size. In range 0 (quick) to 6 (slow).
+//
+// 'output' corresponds to the buffer containing compressed alpha data.
+//          This buffer is allocated by this method and caller should call
+//          WebPSafeFree(*output) when done.
+// 'output_size' corresponds to size of this compressed alpha buffer.
+//
+// Returns 1 on successfully encoding the alpha and
+//         0 if either:
+//           invalid quality or method, or
+//           memory allocation for the compressed data fails.
+
+#include "../enc/vp8li.h"
+
+static int EncodeLossless(const uint8_t* const data,
+                          int width,
+                          int height,
+                          int effort_level, // in [0..6] range
+                          VP8LBitWriter* const bw,
+                          WebPAuxStats* const stats) {
+    int ok = 0;
+    WebPConfig config;
+    WebPPicture picture;
+
+    WebPPictureInit(&picture);
+    picture.width = width;
+    picture.height = height;
+    picture.use_argb = 1;
+    picture.stats = stats;
+    if (!WebPPictureAlloc(&picture)) return 0;
+
+    // Transfer the alpha values to the green channel.
+    WebPDispatchAlphaToGreen(data, width, picture.width, picture.height, picture.argb, picture.argb_stride);
+
+    WebPConfigInit(&config);
+    config.lossless = 1;
+    // Enable exact, or it would alter RGB values of transparent alpha, which is
+    // normally OK but not here since we are not encoding the input image but  an
+    // internal encoding-related image containing necessary exact information in
+    // RGB channels.
+    config.exact = 1;
+    config.method = effort_level; // impact is very small
+    // Set a low default quality for encoding alpha. Ensure that Alpha quality at
+    // lower methods (3 and below) is less than the threshold for triggering
+    // costly 'BackwardReferencesTraceBackwards'.
+    config.quality = 8.f * effort_level;
+    assert(config.quality >= 0 && config.quality <= 100.f);
+
+    ok = (VP8LEncodeStream(&config, &picture, bw) == VP8_ENC_OK);
+    WebPPictureFree(&picture);
+    ok = ok && !bw->error_;
+    if (!ok) {
+        VP8LBitWriterWipeOut(bw);
+        return 0;
+    }
+    return 1;
+}
+
+// -----------------------------------------------------------------------------
+
+// Small struct to hold the result of a filter mode compression attempt.
+typedef struct {
+    size_t score;
+    VP8BitWriter bw;
+    WebPAuxStats stats;
+} FilterTrial;
+
+// This function always returns an initialized 'bw' object, even upon error.
+static int EncodeAlphaInternal(const uint8_t* const data,
+                               int width,
+                               int height,
+                               int method,
+                               int filter,
+                               int reduce_levels,
+                               int effort_level, // in [0..6] range
+                               uint8_t* const tmp_alpha,
+                               FilterTrial* result) {
+    int ok = 0;
+    const uint8_t* alpha_src;
+    WebPFilterFunc filter_func;
+    uint8_t header;
+    const size_t data_size = width * height;
+    const uint8_t* output = NULL;
+    size_t output_size = 0;
+    VP8LBitWriter tmp_bw;
+
+    assert((uint64_t)data_size == (uint64_t)width * height); // as per spec
+    assert(filter >= 0 && filter < WEBP_FILTER_LAST);
+    assert(method >= ALPHA_NO_COMPRESSION);
+    assert(method <= ALPHA_LOSSLESS_COMPRESSION);
+    assert(sizeof(header) == ALPHA_HEADER_LEN);
+    // TODO(skal): have a common function and #define's to validate alpha params.
+
+    filter_func = WebPFilters[filter];
+    if (filter_func != NULL) {
+        filter_func(data, width, height, width, tmp_alpha);
+        alpha_src = tmp_alpha;
+    } else {
+        alpha_src = data;
+    }
+
+    if (method != ALPHA_NO_COMPRESSION) {
+        ok = VP8LBitWriterInit(&tmp_bw, data_size >> 3);
+        ok = ok && EncodeLossless(alpha_src, width, height, effort_level, &tmp_bw, &result->stats);
+        if (ok) {
+            output = VP8LBitWriterFinish(&tmp_bw);
+            output_size = VP8LBitWriterNumBytes(&tmp_bw);
+            if (output_size > data_size) {
+                // compressed size is larger than source! Revert to uncompressed mode.
+                method = ALPHA_NO_COMPRESSION;
+                VP8LBitWriterWipeOut(&tmp_bw);
+            }
+        } else {
+            VP8LBitWriterWipeOut(&tmp_bw);
+            return 0;
+        }
+    }
+
+    if (method == ALPHA_NO_COMPRESSION) {
+        output = alpha_src;
+        output_size = data_size;
+        ok = 1;
+    }
+
+    // Emit final result.
+    header = method | (filter << 2);
+    if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4;
+
+    VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size);
+    ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN);
+    ok = ok && VP8BitWriterAppend(&result->bw, output, output_size);
+
+    if (method != ALPHA_NO_COMPRESSION) {
+        VP8LBitWriterWipeOut(&tmp_bw);
+    }
+    ok = ok && !result->bw.error_;
+    result->score = VP8BitWriterSize(&result->bw);
+    return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+static int GetNumColors(const uint8_t* data, int width, int height, int stride) {
+    int j;
+    int colors = 0;
+    uint8_t color[256] = {0};
+
+    for (j = 0; j < height; ++j) {
+        int i;
+        const uint8_t* const p = data + j * stride;
+        for (i = 0; i < width; ++i) {
+            color[p[i]] = 1;
+        }
+    }
+    for (j = 0; j < 256; ++j) {
+        if (color[j] > 0) ++colors;
+    }
+    return colors;
+}
+
+#define FILTER_TRY_NONE (1 << WEBP_FILTER_NONE)
+#define FILTER_TRY_ALL ((1 << WEBP_FILTER_LAST) - 1)
+
+// Given the input 'filter' option, return an OR'd bit-set of filters to try.
+static uint32_t GetFilterMap(const uint8_t* alpha, int width, int height, int filter, int effort_level) {
+    uint32_t bit_map = 0U;
+    if (filter == WEBP_FILTER_FAST) {
+        // Quick estimate of the best candidate.
+        int try_filter_none = (effort_level > 3);
+        const int kMinColorsForFilterNone = 16;
+        const int kMaxColorsForFilterNone = 192;
+        const int num_colors = GetNumColors(alpha, width, height, width);
+        // For low number of colors, NONE yields better compression.
+        filter = (num_colors <= kMinColorsForFilterNone) ? WEBP_FILTER_NONE
+                                                         : WebPEstimateBestFilter(alpha, width, height, width);
+        bit_map |= 1 << filter;
+        // For large number of colors, try FILTER_NONE in addition to the best
+        // filter as well.
+        if (try_filter_none || num_colors > kMaxColorsForFilterNone) {
+            bit_map |= FILTER_TRY_NONE;
+        }
+    } else if (filter == WEBP_FILTER_NONE) {
+        bit_map = FILTER_TRY_NONE;
+    } else { // WEBP_FILTER_BEST -> try all
+        bit_map = FILTER_TRY_ALL;
+    }
+    return bit_map;
+}
+
+static void InitFilterTrial(FilterTrial* const score) {
+    score->score = (size_t)~0U;
+    VP8BitWriterInit(&score->bw, 0);
+}
+
+static int ApplyFiltersAndEncode(const uint8_t* alpha,
+                                 int width,
+                                 int height,
+                                 size_t data_size,
+                                 int method,
+                                 int filter,
+                                 int reduce_levels,
+                                 int effort_level,
+                                 uint8_t** const output,
+                                 size_t* const output_size,
+                                 WebPAuxStats* const stats) {
+    int ok = 1;
+    FilterTrial best;
+    uint32_t try_map = GetFilterMap(alpha, width, height, filter, effort_level);
+    InitFilterTrial(&best);
+
+    if (try_map != FILTER_TRY_NONE) {
+        uint8_t* filtered_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size);
+        if (filtered_alpha == NULL) return 0;
+
+        for (filter = WEBP_FILTER_NONE; ok && try_map; ++filter, try_map >>= 1) {
+            if (try_map & 1) {
+                FilterTrial trial;
+                ok = EncodeAlphaInternal(alpha, width, height, method, filter, reduce_levels, effort_level,
+                                         filtered_alpha, &trial);
+                if (ok && trial.score < best.score) {
+                    VP8BitWriterWipeOut(&best.bw);
+                    best = trial;
+                } else {
+                    VP8BitWriterWipeOut(&trial.bw);
+                }
+            }
+        }
+        WebPSafeFree(filtered_alpha);
+    } else {
+        ok = EncodeAlphaInternal(alpha, width, height, method, WEBP_FILTER_NONE, reduce_levels, effort_level, NULL,
+                                 &best);
+    }
+    if (ok) {
+        if (stats != NULL) {
+            stats->lossless_features = best.stats.lossless_features;
+            stats->histogram_bits = best.stats.histogram_bits;
+            stats->transform_bits = best.stats.transform_bits;
+            stats->cache_bits = best.stats.cache_bits;
+            stats->palette_size = best.stats.palette_size;
+            stats->lossless_size = best.stats.lossless_size;
+            stats->lossless_hdr_size = best.stats.lossless_hdr_size;
+            stats->lossless_data_size = best.stats.lossless_data_size;
+        }
+        *output_size = VP8BitWriterSize(&best.bw);
+        *output = VP8BitWriterBuf(&best.bw);
+    } else {
+        VP8BitWriterWipeOut(&best.bw);
+    }
+    return ok;
+}
+
+static int EncodeAlpha(VP8Encoder* const enc,
+                       int quality,
+                       int method,
+                       int filter,
+                       int effort_level,
+                       uint8_t** const output,
+                       size_t* const output_size) {
+    const WebPPicture* const pic = enc->pic_;
+    const int width = pic->width;
+    const int height = pic->height;
+
+    uint8_t* quant_alpha = NULL;
+    const size_t data_size = width * height;
+    uint64_t sse = 0;
+    int ok = 1;
+    const int reduce_levels = (quality < 100);
+
+    // quick sanity checks
+    assert((uint64_t)data_size == (uint64_t)width * height); // as per spec
+    assert(enc != NULL && pic != NULL && pic->a != NULL);
+    assert(output != NULL && output_size != NULL);
+    assert(width > 0 && height > 0);
+    assert(pic->a_stride >= width);
+    assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST);
+
+    if (quality < 0 || quality > 100) {
+        return 0;
+    }
+
+    if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) {
+        return 0;
+    }
+
+    if (method == ALPHA_NO_COMPRESSION) {
+        // Don't filter, as filtering will make no impact on compressed size.
+        filter = WEBP_FILTER_NONE;
+    }
+
+    quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size);
+    if (quant_alpha == NULL) {
+        return 0;
+    }
+
+    // Extract alpha data (width x height) from raw_data (stride x height).
+    WebPCopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height);
+
+    if (reduce_levels) { // No Quantization required for 'quality = 100'.
+        // 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence
+        // mapped to moderate quality 70. Hence Quality:[0, 70] -> Levels:[2, 16]
+        // and Quality:]70, 100] -> Levels:]16, 256].
+        const int alpha_levels = (quality <= 70) ? (2 + quality / 5) : (16 + (quality - 70) * 8);
+        ok = QuantizeLevels(quant_alpha, width, height, alpha_levels, &sse);
+    }
+
+    if (ok) {
+        VP8FiltersInit();
+        ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method, filter, reduce_levels, effort_level,
+                                   output, output_size, pic->stats);
+        if (pic->stats != NULL) { // need stats?
+            pic->stats->coded_size += (int)(*output_size);
+            enc->sse_[3] = sse;
+        }
+    }
+
+    WebPSafeFree(quant_alpha);
+    return ok;
+}
+
+//------------------------------------------------------------------------------
+// Main calls
+
+static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
+    const WebPConfig* config = enc->config_;
+    uint8_t* alpha_data = NULL;
+    size_t alpha_size = 0;
+    const int effort_level = config->method; // maps to [0..6]
+    const WEBP_FILTER_TYPE filter = (config->alpha_filtering == 0)
+                                        ? WEBP_FILTER_NONE
+                                        : (config->alpha_filtering == 1) ? WEBP_FILTER_FAST : WEBP_FILTER_BEST;
+    if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression, filter, effort_level, &alpha_data,
+                     &alpha_size)) {
+        return 0;
+    }
+    if (alpha_size != (uint32_t)alpha_size) { // Sanity check.
+        WebPSafeFree(alpha_data);
+        return 0;
+    }
+    enc->alpha_data_size_ = (uint32_t)alpha_size;
+    enc->alpha_data_ = alpha_data;
+    (void)dummy;
+    return 1;
+}
+
+void VP8EncInitAlpha(VP8Encoder* const enc) {
+    WebPInitAlphaProcessing();
+    enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
+    enc->alpha_data_ = NULL;
+    enc->alpha_data_size_ = 0;
+    if (enc->thread_level_ > 0) {
+        WebPWorker* const worker = &enc->alpha_worker_;
+        WebPGetWorkerInterface()->Init(worker);
+        worker->data1 = enc;
+        worker->data2 = NULL;
+        worker->hook = (WebPWorkerHook)CompressAlphaJob;
+    }
+}
+
+int VP8EncStartAlpha(VP8Encoder* const enc) {
+    if (enc->has_alpha_) {
+        if (enc->thread_level_ > 0) {
+            WebPWorker* const worker = &enc->alpha_worker_;
+            // Makes sure worker is good to go.
+            if (!WebPGetWorkerInterface()->Reset(worker)) {
+                return 0;
+            }
+            WebPGetWorkerInterface()->Launch(worker);
+            return 1;
+        } else {
+            return CompressAlphaJob(enc, NULL); // just do the job right away
+        }
+    }
+    return 1;
+}
+
+int VP8EncFinishAlpha(VP8Encoder* const enc) {
+    if (enc->has_alpha_) {
+        if (enc->thread_level_ > 0) {
+            WebPWorker* const worker = &enc->alpha_worker_;
+            if (!WebPGetWorkerInterface()->Sync(worker)) return 0; // error
+        }
+    }
+    return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+}
+
+int VP8EncDeleteAlpha(VP8Encoder* const enc) {
+    int ok = 1;
+    if (enc->thread_level_ > 0) {
+        WebPWorker* const worker = &enc->alpha_worker_;
+        // finish anything left in flight
+        ok = WebPGetWorkerInterface()->Sync(worker);
+        // still need to end the worker, even if !ok
+        WebPGetWorkerInterface()->End(worker);
+    }
+    WebPSafeFree(enc->alpha_data_);
+    enc->alpha_data_ = NULL;
+    enc->alpha_data_size_ = 0;
+    enc->has_alpha_ = 0;
+    return ok;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/enc/analysis.c b/codec/L2/demos/webpEnc/host/src/enc/analysis.c
new file mode 100644
index 0000000000..d54f976e8f
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/analysis.c
@@ -0,0 +1,784 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Macroblock analysis
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#ifdef WEBP_USE_THREAD
+#include <sys/sysinfo.h>
+#endif
+#include "./vp8enci.h"
+#include "./cost.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+#include "../../host/create_kernel.h"
+
+#define MAX_ITERS_K_MEANS 6
+
+//------------------------------------------------------------------------------
+// Smooth the segment map by replacing isolated block by the majority of its
+// neighbours.
+
+static void SmoothSegmentMap(VP8Encoder* const enc) {
+    int n, x, y;
+    const int w = enc->mb_w_;
+    const int h = enc->mb_h_;
+    const int majority_cnt_3_x_3_grid = 5;
+    uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));
+    assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec
+
+    if (tmp == NULL) return;
+    for (y = 1; y < h - 1; ++y) {
+        for (x = 1; x < w - 1; ++x) {
+            int cnt[NUM_MB_SEGMENTS] = {0};
+            const VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
+            int majority_seg = mb->segment_;
+            // Check the 8 neighbouring segment values.
+            cnt[mb[-w - 1].segment_]++; // top-left
+            cnt[mb[-w + 0].segment_]++; // top
+            cnt[mb[-w + 1].segment_]++; // top-right
+            cnt[mb[-1].segment_]++;     // left
+            cnt[mb[+1].segment_]++;     // right
+            cnt[mb[w - 1].segment_]++;  // bottom-left
+            cnt[mb[w + 0].segment_]++;  // bottom
+            cnt[mb[w + 1].segment_]++;  // bottom-right
+            for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
+                if (cnt[n] >= majority_cnt_3_x_3_grid) {
+                    majority_seg = n;
+                    break;
+                }
+            }
+            tmp[x + y * w] = majority_seg;
+        }
+    }
+    for (y = 1; y < h - 1; ++y) {
+        for (x = 1; x < w - 1; ++x) {
+            VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
+            mb->segment_ = tmp[x + y * w];
+        }
+    }
+    WebPSafeFree(tmp);
+}
+
+//------------------------------------------------------------------------------
+// set segment susceptibility alpha_ / beta_
+
+static WEBP_INLINE int clip(int v, int m, int M) {
+    return (v < m) ? m : (v > M) ? M : v;
+}
+
+static void SetSegmentAlphas(VP8Encoder* const enc, const int centers[NUM_MB_SEGMENTS], int mid) {
+    const int nb = enc->segment_hdr_.num_segments_;
+    int min = centers[0], max = centers[0];
+    int n;
+
+    if (nb > 1) {
+        for (n = 0; n < nb; ++n) {
+            if (min > centers[n]) min = centers[n];
+            if (max < centers[n]) max = centers[n];
+        }
+    }
+    if (max == min) max = min + 1;
+    assert(mid <= max && mid >= min);
+    for (n = 0; n < nb; ++n) {
+        const int alpha = 255 * (centers[n] - mid) / (max - min);
+        const int beta = 255 * (centers[n] - min) / (max - min);
+        enc->dqm_[n].alpha_ = clip(alpha, -127, 127);
+        enc->dqm_[n].beta_ = clip(beta, 0, 255);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+#define MAX_ALPHA 255               // 8b of precision for susceptibilities.
+#define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha.
+#define DEFAULT_ALPHA (-1)
+#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
+
+static int FinalAlphaValue(int alpha) {
+    alpha = MAX_ALPHA - alpha;
+    return clip(alpha, 0, MAX_ALPHA);
+}
+
+static int GetAlpha(const VP8Histogram* const histo) {
+    // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
+    // values which happen to be mostly noise. This leaves the maximum precision
+    // for handling the useful small values which contribute most.
+    const int max_value = histo->max_value;
+    const int last_non_zero = histo->last_non_zero;
+    const int alpha = (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
+    return alpha;
+}
+
+static void InitHistogram(VP8Histogram* const histo) {
+    histo->max_value = 0;
+    histo->last_non_zero = 1;
+}
+
+static void MergeHistograms(const VP8Histogram* const in, VP8Histogram* const out) {
+    if (in->max_value > out->max_value) {
+        out->max_value = in->max_value;
+    }
+    if (in->last_non_zero > out->last_non_zero) {
+        out->last_non_zero = in->last_non_zero;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Simplified k-Means, to assign Nb segments based on alpha-histogram
+
+static void AssignSegments(VP8Encoder* const enc, const int alphas[MAX_ALPHA + 1]) {
+    // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
+    // explicit check is needed to avoid spurious warning about 'n + 1' exceeding
+    // array bounds of 'centers' with some compilers (noticed with gcc-4.9).
+    const int nb =
+        (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ? enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS;
+    int centers[NUM_MB_SEGMENTS];
+    int weighted_average = 0;
+    int map[MAX_ALPHA + 1];
+    int a, n, k;
+    int min_a = 0, max_a = MAX_ALPHA, range_a;
+    // 'int' type is ok for histo, and won't overflow
+    int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
+
+    assert(nb >= 1);
+    assert(nb <= NUM_MB_SEGMENTS);
+
+    // bracket the input
+    for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {
+    }
+    min_a = n;
+    for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {
+    }
+    max_a = n;
+    range_a = max_a - min_a;
+
+    // Spread initial centers evenly
+    for (k = 0, n = 1; k < nb; ++k, n += 2) {
+        assert(n < 2 * nb);
+        centers[k] = min_a + (n * range_a) / (2 * nb);
+    }
+
+    for (k = 0; k < MAX_ITERS_K_MEANS; ++k) { // few iters are enough
+        int total_weight;
+        int displaced;
+        // Reset stats
+        for (n = 0; n < nb; ++n) {
+            accum[n] = 0;
+            dist_accum[n] = 0;
+        }
+        // Assign nearest center for each 'a'
+        n = 0; // track the nearest center for current 'a'
+        for (a = min_a; a <= max_a; ++a) {
+            if (alphas[a]) {
+                while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) {
+                    n++;
+                }
+                map[a] = n;
+                // accumulate contribution into best centroid
+                dist_accum[n] += a * alphas[a];
+                accum[n] += alphas[a];
+            }
+        }
+        // All point are classified. Move the centroids to the
+        // center of their respective cloud.
+        displaced = 0;
+        weighted_average = 0;
+        total_weight = 0;
+        for (n = 0; n < nb; ++n) {
+            if (accum[n]) {
+                const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n];
+                displaced += abs(centers[n] - new_center);
+                centers[n] = new_center;
+                weighted_average += new_center * accum[n];
+                total_weight += accum[n];
+            }
+        }
+        weighted_average = (weighted_average + total_weight / 2) / total_weight;
+        if (displaced < 5) break; // no need to keep on looping...
+    }
+
+    // Map each original value to the closest centroid
+    for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+        VP8MBInfo* const mb = &enc->mb_info_[n];
+        const int alpha = mb->alpha_;
+        mb->segment_ = map[alpha];
+        mb->alpha_ = centers[map[alpha]]; // for the record.
+    }
+
+    if (nb > 1) {
+        const int smooth = (enc->config_->preprocessing & 1);
+        if (smooth) SmoothSegmentMap(enc);
+    }
+
+    SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas.
+}
+
+//------------------------------------------------------------------------------
+// Macroblock analysis: collect histogram for each mode, deduce the maximal
+// susceptibility and set best modes for this macroblock.
+// Segment assignment is done later.
+
+// Number of modes to inspect for alpha_ evaluation. We don't need to test all
+// the possible modes during the analysis phase: we risk falling into a local
+// optimum, or be subject to boundary effect
+#define MAX_INTRA16_MODE 2
+#define MAX_INTRA4_MODE 2
+#define MAX_UV_MODE 2
+
+static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
+    const int max_mode = MAX_INTRA16_MODE;
+    int mode;
+    int best_alpha = DEFAULT_ALPHA;
+    int best_mode = 0;
+
+    VP8MakeLuma16Preds(it);
+    for (mode = 0; mode < max_mode; ++mode) {
+        VP8Histogram histo;
+        int alpha;
+
+        InitHistogram(&histo);
+        VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC, it->yuv_p_ + VP8I16ModeOffsets[mode], 0, 16, &histo);
+        alpha = GetAlpha(&histo);
+        if (IS_BETTER_ALPHA(alpha, best_alpha)) {
+            best_alpha = alpha;
+            best_mode = mode;
+        }
+    }
+    VP8SetIntra16Mode(it, best_mode);
+    return best_alpha;
+}
+
+static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, int best_alpha) {
+    uint8_t modes[16];
+    const int max_mode = MAX_INTRA4_MODE;
+    int i4_alpha;
+    VP8Histogram total_histo;
+    int cur_histo = 0;
+    InitHistogram(&total_histo);
+
+    VP8IteratorStartI4(it);
+    do {
+        int mode;
+        int best_mode_alpha = DEFAULT_ALPHA;
+        VP8Histogram histos[2];
+        const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
+
+        VP8MakeIntra4Preds(it);
+        for (mode = 0; mode < max_mode; ++mode) {
+            int alpha;
+
+            InitHistogram(&histos[cur_histo]);
+            VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], 0, 1, &histos[cur_histo]);
+            alpha = GetAlpha(&histos[cur_histo]);
+            if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) {
+                best_mode_alpha = alpha;
+                modes[it->i4_] = mode;
+                cur_histo ^= 1; // keep track of best histo so far.
+            }
+        }
+        // accumulate best histogram
+        MergeHistograms(&histos[cur_histo ^ 1], &total_histo);
+        // Note: we reuse the original samples for predictors
+    } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC));
+
+    i4_alpha = GetAlpha(&total_histo);
+    if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) {
+        VP8SetIntra4Mode(it, modes);
+        best_alpha = i4_alpha;
+    }
+    return best_alpha;
+}
+
+static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
+    int best_alpha = DEFAULT_ALPHA;
+    int best_mode = 0;
+    const int max_mode = MAX_UV_MODE;
+    int mode;
+
+    VP8MakeChroma8Preds(it);
+    for (mode = 0; mode < max_mode; ++mode) {
+        VP8Histogram histo;
+        int alpha;
+        InitHistogram(&histo);
+        VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC, it->yuv_p_ + VP8UVModeOffsets[mode], 16, 16 + 4 + 4, &histo);
+        alpha = GetAlpha(&histo);
+        if (IS_BETTER_ALPHA(alpha, best_alpha)) {
+            best_alpha = alpha;
+            best_mode = mode;
+        }
+    }
+    VP8SetIntraUVMode(it, best_mode);
+    return best_alpha;
+}
+
+static void MBAnalyze(VP8EncIterator* const it, int alphas[MAX_ALPHA + 1], int* const alpha, int* const uv_alpha) {
+    const VP8Encoder* const enc = it->enc_;
+    int best_alpha, best_uv_alpha;
+
+    VP8SetIntra16Mode(it, 0); // default: Intra16, DC_PRED
+    VP8SetSkip(it, 0);        // not skipped
+    VP8SetSegment(it, 0);     // default segment, spec-wise.
+
+    best_alpha = MBAnalyzeBestIntra16Mode(it);
+    if (enc->method_ >= 5) {
+        // We go and make a fast decision for intra4/intra16.
+        // It's usually not a good and definitive pick, but helps seeding the stats
+        // about level bit-cost.
+        // TODO(skal): improve criterion.
+        best_alpha = MBAnalyzeBestIntra4Mode(it, best_alpha);
+    }
+    best_uv_alpha = MBAnalyzeBestUVMode(it);
+
+    // Final susceptibility mix
+    best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
+    best_alpha = FinalAlphaValue(best_alpha);
+    alphas[best_alpha]++;
+    it->mb_->alpha_ = best_alpha; // for later remapping.
+
+    // Accumulate for later complexity analysis.
+    *alpha += best_alpha; // mixed susceptibility (not just luma)
+    *uv_alpha += best_uv_alpha;
+}
+
+static void DefaultMBInfo(VP8MBInfo* const mb) {
+    mb->type_ = 1; // I16x16
+    mb->uv_mode_ = 0;
+    mb->skip_ = 0;    // not skipped
+    mb->segment_ = 0; // default segment
+    mb->alpha_ = 0;
+}
+
+//------------------------------------------------------------------------------
+// Main analysis loop:
+// Collect all susceptibilities for each macroblock and record their
+// distribution in alphas[]. Segments is assigned a-posteriori, based on
+// this histogram.
+// We also pick an intra16 prediction mode, which shouldn't be considered
+// final except for fast-encode settings. We can also pick some intra4 modes
+// and decide intra4/intra16, but that's usually almost always a bad choice at
+// this stage.
+
+static void ResetAllMBInfo(VP8Encoder* const enc) {
+    int n;
+    for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+        DefaultMBInfo(&enc->mb_info_[n]);
+    }
+    // Default susceptibilities.
+    enc->dqm_[0].alpha_ = 0;
+    enc->dqm_[0].beta_ = 0;
+    // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value.
+    enc->alpha_ = 0;
+    enc->uv_alpha_ = 0;
+    WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+}
+
+// struct used to collect job result
+typedef struct {
+    WebPWorker worker;
+    int alphas[MAX_ALPHA + 1];
+    int alpha, uv_alpha;
+    VP8EncIterator it;
+    int delta_progress;
+} SegmentJob;
+
+// main work call
+static int DoSegmentsJob(SegmentJob* const job, VP8EncIterator* const it) {
+    int ok = 1;
+    if (!VP8IteratorIsDone(it)) {
+        uint8_t tmp[32 + WEBP_ALIGN_CST];
+        uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp);
+        do {
+            // Let's pretend we have perfect lossless reconstruction.
+            VP8IteratorImport(it, scratch);
+            MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha);
+            ok = VP8IteratorProgress(it, job->delta_progress);
+        } while (ok && VP8IteratorNext(it));
+    }
+    return ok;
+}
+
+static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
+    int i;
+    for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
+    dst->alpha += src->alpha;
+    dst->uv_alpha += src->uv_alpha;
+}
+
+// initialize the job struct with some TODOs
+static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job, int start_row, int end_row) {
+    WebPGetWorkerInterface()->Init(&job->worker);
+    job->worker.data1 = job;
+    job->worker.data2 = &job->it;
+    job->worker.hook = (WebPWorkerHook)DoSegmentsJob;
+    VP8IteratorInit(enc, &job->it);
+    VP8IteratorSetRow(&job->it, start_row);
+    VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_);
+    memset(job->alphas, 0, sizeof(job->alphas));
+    job->alpha = 0;
+    job->uv_alpha = 0;
+    // only one of both jobs can record the progress, since we don't
+    // expect the user's hook to be multi-thread safe
+    job->delta_progress = (start_row == 0) ? 20 : 0;
+}
+
+#define TOTAL_NUM (8)
+#define MT_NUM (TOTAL_NUM - 1)
+// main entry point
+int VP8EncAnalyze(VP8Encoder* const& enc) {
+    int ok = 1;
+    const int do_segments = 0; /* ALGORITHEM_CHANGE_RYANW
+         enc->config_->emulate_jpeg_size ||   // We need the complexity evaluation.
+         (enc->segment_hdr_.num_segments_ > 1) ||
+         (enc->method_ == 0);  // for method 0, we need preds_[] to be filled.*/
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    if (do_segments) {
+        const int mt_num = (get_nprocs() > TOTAL_NUM) ? MT_NUM : (get_nprocs() - 1);
+        const int last_row = enc->mb_h_;
+        // We give a little more than a half work to the main thread.
+        const int split_row = last_row / (mt_num + 1);
+        int more_row = last_row % (mt_num + 1);
+        const int total_mb = last_row * enc->mb_w_;
+#ifdef WEBP_USE_THREAD
+        const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it
+        const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
+#else
+        const int do_mt = 0;
+#endif
+        const WebPWorkerInterface* const worker_interface = WebPGetWorkerInterface();
+        SegmentJob main_job;
+        if (do_mt) {
+            SegmentJob side_job[MT_NUM];
+            int i;
+            int last_end_row;
+            if (more_row > 0) {
+                last_end_row = split_row + 1;
+                more_row -= 1;
+            } else {
+                last_end_row = split_row;
+            }
+            InitSegmentJob(enc, &main_job, 0, last_end_row);
+            for (i = 0; i < mt_num - 1; i++) {
+                if (more_row > 0) {
+                    InitSegmentJob(enc, &side_job[i], last_end_row, last_end_row + split_row + 1);
+                    more_row -= 1;
+                    last_end_row = last_end_row + split_row + 1;
+                } else {
+                    InitSegmentJob(enc, &side_job[i], last_end_row, last_end_row + split_row);
+                    last_end_row = last_end_row + split_row;
+                }
+            }
+            InitSegmentJob(enc, &side_job[mt_num - 1], last_end_row, last_row);
+            // we don't need to call Reset() on main_job.worker, since we're calling
+            // WebPWorkerExecute() on it
+            for (i = 0; i < mt_num; i++) {
+                ok &= worker_interface->Reset(&(side_job[i].worker));
+            }
+            // launch the two jobs in parallel
+            if (ok) {
+                for (i = 0; i < mt_num; i++) {
+                    worker_interface->Launch(&(side_job[i].worker));
+                }
+                worker_interface->Execute(&main_job.worker);
+                for (i = 0; i < mt_num; i++) {
+                    ok &= worker_interface->Sync(&(side_job[i].worker));
+                }
+                ok &= worker_interface->Sync(&main_job.worker);
+            }
+            for (i = 0; i < mt_num; i++) {
+                worker_interface->End(&(side_job[i].worker));
+            }
+            if (ok) {
+                for (i = 0; i < mt_num; i++) {
+                    MergeJobs(&side_job[i], &main_job); // merge results together
+                }
+            }
+        } else {
+            // Even for single-thread case, we use the generic Worker tools.
+            InitSegmentJob(enc, &main_job, 0, last_row);
+            worker_interface->Execute(&main_job.worker);
+            ok &= worker_interface->Sync(&main_job.worker);
+        }
+        worker_interface->End(&main_job.worker);
+        if (ok) {
+            enc->alpha_ = main_job.alpha / total_mb;
+            enc->uv_alpha_ = main_job.uv_alpha / total_mb;
+            AssignSegments(enc, main_job.alphas);
+        }
+    } else { // Use only one default segment.
+        ResetAllMBInfo(enc);
+    }
+    StopProfiling(&stop_watch, &timeEncAnalyze, &countEncAnalyze);
+    return ok;
+}
+
+int VP8EncAnalyzeOcl(VP8Encoder* const enc) {
+    int ok = 1;
+    cl_int err;
+    const int do_segments = enc->config_->emulate_jpeg_size || // We need the complexity evaluation.
+                            (enc->segment_hdr_.num_segments_ > 1) ||
+                            (enc->method_ == 0); // for method 0, we need preds_[] to be filled.
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    if (do_segments) {
+        int i, j;
+        int alphas_last[MAX_ALPHA + 1];
+        int index;
+        int width = enc->pic_->width;
+        int height = enc->pic_->height;
+        const int mb_w = enc->mb_w_;
+        const int mb_h = enc->mb_h_;
+        const int preds_w = 4 * mb_w + 1;
+        const int preds_h = 4 * mb_h + 1;
+        const int y_stride = enc->pic_->y_stride;
+        const int uv_stride = enc->pic_->uv_stride;
+        const int uv_height = (height + 1) >> 1;
+        const int uv_width = (width + 1) >> 1;
+        const int total_mb = mb_w * mb_h;
+        const int expand_yheight = RoundUp(height, 16);
+        const int expand_uvheight = RoundUp(uv_height, 8);
+
+        uint8_t mb_info[3 * mb_w * mb_h];
+
+        int output_alpha[mb_h];
+        int output_uvalpha[mb_h];
+        int alphas[mb_h][MAX_ALPHA + 1];
+
+        for (i = 0; i < mb_h; i++) {
+            output_alpha[i] = 0;
+            output_uvalpha[i] = 0;
+            for (j = 0; j < 256; j++) {
+                alphas[i][j] = 0;
+            }
+        }
+
+        int total_alpha = 0;
+        int total_uvalpha = 0;
+
+        int mb_size;
+        int preds_size;
+        int nz_size;
+        int y_size;
+        int u_size;
+        int v_size;
+
+        const int expand_y_size = (expand_yheight - height) * width;
+        uint8_t expand_y[expand_y_size];
+        if (expand_yheight > height) {
+            for (i = 0; i < expand_yheight - height; i++) {
+                memcpy(expand_y + i * width, enc->pic_->y + width * (height - 1), width);
+            }
+        }
+
+        const int expand_uv_size = (expand_uvheight - uv_height) * uv_width;
+        uint8_t expand_u[expand_uv_size];
+        uint8_t expand_v[expand_uv_size];
+        if (expand_uvheight > uv_height) {
+            for (i = 0; i < expand_uvheight - uv_height; i++) {
+                memcpy(expand_u + i * uv_width, enc->pic_->u + uv_width * (uv_height - 1), uv_width);
+                memcpy(expand_v + i * uv_width, enc->pic_->v + uv_width * (uv_height - 1), uv_width);
+            }
+        }
+
+        mb_size = mb_w * mb_h * sizeof(uint8_t);
+        preds_size = preds_w * preds_h * sizeof(uint8_t);
+        nz_size = (mb_w + 1) * sizeof(uint32_t) + WEBP_ALIGN_CST;
+        y_size = (uint64_t)y_stride * height;
+        u_size = (uint64_t)uv_stride * uv_height;
+        v_size = (uint64_t)uv_stride * uv_height;
+
+        analyzepara.method = enc->method_;
+        memset(alphas_last, 0, (MAX_ALPHA + 1) * sizeof(int));
+
+        // size_t globalSize[2] = {mb_w, mb_h};
+        // size_t localSize[2] = {mb_w, 1};
+        size_t globalSize[2] = {ANALYZE_GRX_SIZE, mb_h};
+        size_t localSize[2] = {ANALYZE_GRX_SIZE, 1};
+
+        err = clSetKernelArg(analyze.mKernel, 9, sizeof(cl_int), &(analyzepara.method));
+        if (err != CL_SUCCESS) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        err = clEnqueueWriteBuffer(hardware.mQueue, analyzepara.y, CL_TRUE, 0, y_size, enc->pic_->y, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        if (expand_yheight > height) {
+            err = clEnqueueWriteBuffer(hardware.mQueue, analyzepara.y, CL_TRUE, y_size, expand_y_size, expand_y, 0,
+                                       NULL, NULL);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                ok = 0;
+                goto Err;
+            }
+        }
+
+        err = clEnqueueWriteBuffer(hardware.mQueue, analyzepara.u, CL_TRUE, 0, u_size, enc->pic_->u, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        if (expand_uvheight > uv_height) {
+            err = clEnqueueWriteBuffer(hardware.mQueue, analyzepara.u, CL_TRUE, u_size, expand_uv_size, expand_u, 0,
+                                       NULL, NULL);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                ok = 0;
+                goto Err;
+            }
+        }
+
+        err = clEnqueueWriteBuffer(hardware.mQueue, analyzepara.v, CL_TRUE, 0, v_size, enc->pic_->v, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        if (expand_uvheight > uv_height) {
+            err = clEnqueueWriteBuffer(hardware.mQueue, analyzepara.v, CL_TRUE, v_size, expand_uv_size, expand_v, 0,
+                                       NULL, NULL);
+            if (CL_SUCCESS != err) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                ok = 0;
+                goto Err;
+            }
+        }
+
+        err = clEnqueueWriteBuffer(hardware.mQueue, analyzepara.output_alpha, CL_TRUE, 0, mb_h * sizeof(int),
+                                   &output_alpha, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        err = clEnqueueWriteBuffer(hardware.mQueue, analyzepara.output_uvalpha, CL_TRUE, 0, mb_h * sizeof(int),
+                                   &output_uvalpha, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        err = clEnqueueWriteBuffer(hardware.mQueue, analyzepara.alphas, CL_TRUE, 0,
+                                   mb_h * (MAX_ALPHA + 1) * sizeof(int), alphas, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        err = clEnqueueNDRangeKernel(hardware.mQueue, analyze.mKernel, 2, 0, globalSize, localSize, 0, NULL, NULL);
+        if (err != CL_SUCCESS) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        fprintf(stderr, "start clFinish\n");
+        err = clFinish(hardware.mQueue);
+        if (err != CL_SUCCESS) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+        fprintf(stderr, "stop clFinish\n");
+
+        err = clEnqueueReadBuffer(hardware.mQueue, analyzepara.output_alpha, CL_TRUE, 0, mb_h * sizeof(int),
+                                  &output_alpha, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        err = clEnqueueReadBuffer(hardware.mQueue, analyzepara.output_uvalpha, CL_TRUE, 0, mb_h * sizeof(int),
+                                  &output_uvalpha, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        err =
+            clEnqueueReadBuffer(hardware.mQueue, analyzepara.mb_info, CL_TRUE, 0, 3 * mb_size, mb_info, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        err =
+            clEnqueueReadBuffer(hardware.mQueue, analyzepara.preds, CL_TRUE, 0, preds_size, enc->preds_, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        err = clEnqueueReadBuffer(hardware.mQueue, analyzepara.alphas, CL_TRUE, 0, mb_h * (MAX_ALPHA + 1) * sizeof(int),
+                                  alphas, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            ok = 0;
+            goto Err;
+        }
+
+        for (index = 0; index < mb_size; index++) {
+            enc->mb_info_[index].type_ = mb_info[3 * index + 0];
+            enc->mb_info_[index].uv_mode_ = mb_info[3 * index + 1];
+            enc->mb_info_[index].alpha_ = mb_info[3 * index + 2];
+        }
+
+        for (i = 0; i < mb_h; i++) {
+            total_alpha += output_alpha[i];
+            total_uvalpha += output_uvalpha[i];
+            for (j = 0; j < 256; j++) {
+                alphas_last[j] += alphas[i][j];
+            }
+        }
+
+        enc->alpha_ = total_alpha / total_mb;
+        enc->uv_alpha_ = total_uvalpha / total_mb;
+
+        AssignSegments(enc, alphas_last);
+    } else { // Use only one default segment.
+        ResetAllMBInfo(enc);
+    }
+
+Err:
+    releaseKernel(analyze);
+    clReleaseMemObject(analyzepara.mb_info);
+    clReleaseMemObject(analyzepara.preds);
+    clReleaseMemObject(analyzepara.y);
+    clReleaseMemObject(analyzepara.u);
+    clReleaseMemObject(analyzepara.v);
+    // clReleaseMemObject(analyzepara.output_data);
+    clReleaseMemObject(analyzepara.output_alpha);
+    clReleaseMemObject(analyzepara.output_uvalpha);
+    StopProfiling(&stop_watch, &timeEncAnalyzeOcl, &countEncAnalyzeOcl);
+    return ok;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/enc/backward_references.c b/codec/L2/demos/webpEnc/host/src/enc/backward_references.c
new file mode 100644
index 0000000000..e9e828d93d
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/backward_references.c
@@ -0,0 +1,1081 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+
+#include <assert.h>
+#include <math.h>
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../dsp/lossless.h"
+#include "../dsp/dsp.h"
+#include "../utils/color_cache.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+
+#define VALUES_IN_BYTE 256
+
+#define MIN_BLOCK_SIZE 256 // minimum block size for backward references
+
+#define MAX_ENTROPY (1e30f)
+
+// 1M window (4M bytes) minus 120 special codes for short distances.
+#define WINDOW_SIZE ((1 << 20) - 120)
+
+// Bounds for the match length.
+#define MIN_LENGTH 2
+#define MAX_LENGTH 4096
+
+// -----------------------------------------------------------------------------
+
+static const uint8_t plane_to_code_lut[128] = {
+    96,  73,  55,  39,  23,  13,  5,  1,   255, 255, 255, 255, 255, 255, 255, 255, 101, 78, 58, 42, 26,  16,
+    8,   2,   0,   3,   9,   17,  27, 43,  59,  79,  102, 86,  62,  46,  32,  20,  10,  6,  4,  7,  11,  21,
+    33,  47,  63,  87,  105, 90,  70, 52,  37,  28,  18,  14,  12,  15,  19,  29,  38,  53, 71, 91, 110, 99,
+    82,  66,  48,  35,  30,  24,  22, 25,  31,  36,  49,  67,  83,  100, 115, 108, 94,  76, 64, 50, 44,  40,
+    34,  41,  45,  51,  65,  77,  95, 109, 118, 113, 103, 92,  80,  68,  60,  56,  54,  57, 61, 69, 81,  93,
+    104, 114, 119, 116, 111, 106, 97, 88,  84,  74,  72,  75,  85,  89,  98,  107, 112, 117};
+
+static int DistanceToPlaneCode(int xsize, int dist) {
+    const int yoffset = dist / xsize;
+    const int xoffset = dist - yoffset * xsize;
+    if (xoffset <= 8 && yoffset < 8) {
+        return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
+    } else if (xoffset > xsize - 8 && yoffset < 7) {
+        return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
+    }
+    return dist + 120;
+}
+
+// Returns the exact index where array1 and array2 are different if this
+// index is strictly superior to best_len_match. Otherwise, it returns 0.
+// If no two elements are the same, it returns max_limit.
+static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
+                                       const uint32_t* const array2,
+                                       int best_len_match,
+                                       int max_limit) {
+    int match_len;
+
+    // Before 'expensive' linear match, check if the two arrays match at the
+    // current best length index.
+    if (array1[best_len_match] != array2[best_len_match]) return 0;
+
+#if defined(WEBP_USE_SSE2)
+    // Check if anything is different up to best_len_match excluded.
+    // memcmp seems to be slower on ARM so it is disabled for now.
+    if (memcmp(array1, array2, best_len_match * sizeof(*array1))) return 0;
+    match_len = best_len_match + 1;
+#else
+    match_len = 0;
+#endif
+
+    while (match_len < max_limit && array1[match_len] == array2[match_len]) {
+        ++match_len;
+    }
+    return match_len;
+}
+
+// -----------------------------------------------------------------------------
+//  VP8LBackwardRefs
+
+struct PixOrCopyBlock {
+    PixOrCopyBlock* next_; // next block (or NULL)
+    PixOrCopy* start_;     // data start
+    int size_;             // currently used size
+};
+
+static void ClearBackwardRefs(VP8LBackwardRefs* const refs) {
+    assert(refs != NULL);
+    if (refs->tail_ != NULL) {
+        *refs->tail_ = refs->free_blocks_; // recycle all blocks at once
+    }
+    refs->free_blocks_ = refs->refs_;
+    refs->tail_ = &refs->refs_;
+    refs->last_block_ = NULL;
+    refs->refs_ = NULL;
+}
+
+void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
+    assert(refs != NULL);
+    ClearBackwardRefs(refs);
+    while (refs->free_blocks_ != NULL) {
+        PixOrCopyBlock* const next = refs->free_blocks_->next_;
+        WebPSafeFree(refs->free_blocks_);
+        refs->free_blocks_ = next;
+    }
+}
+
+void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
+    assert(refs != NULL);
+    memset(refs, 0, sizeof(*refs));
+    refs->tail_ = &refs->refs_;
+    refs->block_size_ = (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
+}
+
+VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
+    VP8LRefsCursor c;
+    c.cur_block_ = refs->refs_;
+    if (refs->refs_ != NULL) {
+        c.cur_pos = c.cur_block_->start_;
+        c.last_pos_ = c.cur_pos + c.cur_block_->size_;
+    } else {
+        c.cur_pos = NULL;
+        c.last_pos_ = NULL;
+    }
+    return c;
+}
+
+void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
+    PixOrCopyBlock* const b = c->cur_block_->next_;
+    c->cur_pos = (b == NULL) ? NULL : b->start_;
+    c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
+    c->cur_block_ = b;
+}
+
+// Create a new block, either from the free list or allocated
+static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
+    PixOrCopyBlock* b = refs->free_blocks_;
+    if (b == NULL) { // allocate new memory chunk
+        const size_t total_size = sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
+        b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
+        if (b == NULL) {
+            refs->error_ |= 1;
+            return NULL;
+        }
+        b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b)); // not always aligned
+    } else {                                                // recycle from free-list
+        refs->free_blocks_ = b->next_;
+    }
+    *refs->tail_ = b;
+    refs->tail_ = &b->next_;
+    refs->last_block_ = b;
+    b->next_ = NULL;
+    b->size_ = 0;
+    return b;
+}
+
+static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs, const PixOrCopy v) {
+    PixOrCopyBlock* b = refs->last_block_;
+    if (b == NULL || b->size_ == refs->block_size_) {
+        b = BackwardRefsNewBlock(refs);
+        if (b == NULL) return; // refs->error_ is set
+    }
+    b->start_[b->size_++] = v;
+}
+
+int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, VP8LBackwardRefs* const dst) {
+    const PixOrCopyBlock* b = src->refs_;
+    ClearBackwardRefs(dst);
+    assert(src->block_size_ == dst->block_size_);
+    while (b != NULL) {
+        PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst);
+        if (new_b == NULL) return 0; // dst->error_ is set
+        memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_));
+        new_b->size_ = b->size_;
+        b = b->next_;
+    }
+    return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Hash chains
+
+// initialize as empty
+static void HashChainReset(VP8LHashChain* const p) {
+    assert(p != NULL);
+    // Set the int32_t arrays to -1.
+    memset(p->chain_, 0xff, p->size_ * sizeof(*p->chain_));
+    memset(p->hash_to_first_index_, 0xff, HASH_SIZE * sizeof(*p->hash_to_first_index_));
+}
+
+int VP8LHashChainInit(VP8LHashChain* const p, int size) {
+    assert(p->size_ == 0);
+    assert(p->chain_ == NULL);
+    assert(size > 0);
+    p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_));
+    if (p->chain_ == NULL) return 0;
+    p->size_ = size;
+    HashChainReset(p);
+    return 1;
+}
+
+void VP8LHashChainClear(VP8LHashChain* const p) {
+    assert(p != NULL);
+    WebPSafeFree(p->chain_);
+    p->size_ = 0;
+    p->chain_ = NULL;
+}
+
+// -----------------------------------------------------------------------------
+
+#define HASH_MULTIPLIER_HI (0xc6a4a793U)
+#define HASH_MULTIPLIER_LO (0x5bd1e996U)
+
+static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) {
+    uint32_t key;
+    key = argb[1] * HASH_MULTIPLIER_HI;
+    key += argb[0] * HASH_MULTIPLIER_LO;
+    key = key >> (32 - HASH_BITS);
+    return key;
+}
+
+// Insertion of two pixels at a time.
+static void HashChainInsert(VP8LHashChain* const p, const uint32_t* const argb, int pos) {
+    const uint32_t hash_code = GetPixPairHash64(argb);
+    p->chain_[pos] = p->hash_to_first_index_[hash_code];
+    p->hash_to_first_index_[hash_code] = pos;
+}
+
+// Returns the maximum number of hash chain lookups to do for a
+// given compression quality. Return value in range [6, 86].
+static int GetMaxItersForQuality(int quality, int low_effort) {
+    return (low_effort ? 6 : 8) + (quality * quality) / 128;
+}
+
+static int GetWindowSizeForHashChain(int quality, int xsize) {
+    const int max_window_size =
+        (quality > 75) ? WINDOW_SIZE : (quality > 50) ? (xsize << 8) : (quality > 25) ? (xsize << 6) : (xsize << 4);
+    assert(xsize > 0);
+    return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size;
+}
+
+static WEBP_INLINE int MaxFindCopyLength(int len) {
+    return (len < MAX_LENGTH) ? len : MAX_LENGTH;
+}
+
+static void HashChainFindOffset(const VP8LHashChain* const p,
+                                int base_position,
+                                const uint32_t* const argb,
+                                int len,
+                                int window_size,
+                                int* const distance_ptr) {
+    const uint32_t* const argb_start = argb + base_position;
+    const int min_pos = (base_position > window_size) ? base_position - window_size : 0;
+    int pos;
+    assert(len <= MAX_LENGTH);
+    for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; pos >= min_pos; pos = p->chain_[pos]) {
+        const int curr_length = FindMatchLength(argb + pos, argb_start, len - 1, len);
+        if (curr_length == len) break;
+    }
+    *distance_ptr = base_position - pos;
+}
+
+static int HashChainFindCopy(const VP8LHashChain* const p,
+                             int base_position,
+                             const uint32_t* const argb,
+                             int max_len,
+                             int window_size,
+                             int iter_max,
+                             int* const distance_ptr,
+                             int* const length_ptr) {
+    const uint32_t* const argb_start = argb + base_position;
+    int iter = iter_max;
+    int best_length = 0;
+    int best_distance = 0;
+    const int min_pos = (base_position > window_size) ? base_position - window_size : 0;
+    int pos;
+    int length_max = 256;
+    if (max_len < length_max) {
+        length_max = max_len;
+    }
+    for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; pos >= min_pos; pos = p->chain_[pos]) {
+        int curr_length;
+        int distance;
+        if (--iter < 0) {
+            break;
+        }
+
+        curr_length = FindMatchLength(argb + pos, argb_start, best_length, max_len);
+        if (best_length < curr_length) {
+            distance = base_position - pos;
+            best_length = curr_length;
+            best_distance = distance;
+            if (curr_length >= length_max) {
+                break;
+            }
+        }
+    }
+    *distance_ptr = best_distance;
+    *length_ptr = best_length;
+    return (best_length >= MIN_LENGTH);
+}
+
+static WEBP_INLINE void AddSingleLiteral(uint32_t pixel,
+                                         int use_color_cache,
+                                         VP8LColorCache* const hashers,
+                                         VP8LBackwardRefs* const refs) {
+    PixOrCopy v;
+    if (use_color_cache) {
+        const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel);
+        if (VP8LColorCacheLookup(hashers, key) == pixel) {
+            v = PixOrCopyCreateCacheIdx(key);
+        } else {
+            v = PixOrCopyCreateLiteral(pixel);
+            VP8LColorCacheSet(hashers, key, pixel);
+        }
+    } else {
+        v = PixOrCopyCreateLiteral(pixel);
+    }
+    BackwardRefsCursorAdd(refs, v);
+}
+
+static int BackwardReferencesRle(
+    int xsize, int ysize, const uint32_t* const argb, int cache_bits, VP8LBackwardRefs* const refs) {
+    const int pix_count = xsize * ysize;
+    int i, k;
+    const int use_color_cache = (cache_bits > 0);
+    VP8LColorCache hashers;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) {
+        return 0;
+    }
+    ClearBackwardRefs(refs);
+    // Add first pixel as literal.
+    AddSingleLiteral(argb[0], use_color_cache, &hashers, refs);
+    i = 1;
+    while (i < pix_count) {
+        const int max_len = MaxFindCopyLength(pix_count - i);
+        const int kMinLength = 4;
+        const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len);
+        const int prev_row_len = (i < xsize) ? 0 : FindMatchLength(argb + i, argb + i - xsize, 0, max_len);
+        if (rle_len >= prev_row_len && rle_len >= kMinLength) {
+            BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len));
+            // We don't need to update the color cache here since it is always the
+            // same pixel being copied, and that does not change the color cache
+            // state.
+            i += rle_len;
+        } else if (prev_row_len >= kMinLength) {
+            BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len));
+            if (use_color_cache) {
+                for (k = 0; k < prev_row_len; ++k) {
+                    VP8LColorCacheInsert(&hashers, argb[i + k]);
+                }
+            }
+            i += prev_row_len;
+        } else {
+            AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+            i++;
+        }
+    }
+    if (use_color_cache) VP8LColorCacheClear(&hashers);
+    StopProfiling(&stop_watch, &timeBackwardRefRle, &countBackwardRefRle);
+    return !refs->error_;
+}
+
+static int BackwardReferencesLz77(int xsize,
+                                  int ysize,
+                                  const uint32_t* const argb,
+                                  int cache_bits,
+                                  int quality,
+                                  int low_effort,
+                                  VP8LHashChain* const hash_chain,
+                                  VP8LBackwardRefs* const refs) {
+    int i;
+    int ok = 0;
+    int cc_init = 0;
+    const int use_color_cache = (cache_bits > 0);
+    const int pix_count = xsize * ysize;
+    VP8LColorCache hashers;
+    int iter_max = GetMaxItersForQuality(quality, low_effort);
+    const int window_size = GetWindowSizeForHashChain(quality, xsize);
+    int min_matches = 32;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    if (use_color_cache) {
+        cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+        if (!cc_init) goto Error;
+    }
+    ClearBackwardRefs(refs);
+    HashChainReset(hash_chain);
+    for (i = 0; i < pix_count - 2;) {
+        // Alternative#1: Code the pixels starting at 'i' using backward reference.
+        int offset = 0;
+        int len = 0;
+        const int max_len = MaxFindCopyLength(pix_count - i);
+        HashChainFindCopy(hash_chain, i, argb, max_len, window_size, iter_max, &offset, &len);
+        if (len > MIN_LENGTH || (len == MIN_LENGTH && offset <= 512)) {
+            int offset2 = 0;
+            int len2 = 0;
+            int k;
+            min_matches = 8;
+            HashChainInsert(hash_chain, &argb[i], i);
+            if ((len < (max_len >> 2)) && !low_effort) {
+                // Evaluate Alternative#2: Insert the pixel at 'i' as literal, and code
+                // the pixels starting at 'i + 1' using backward reference.
+                HashChainFindCopy(hash_chain, i + 1, argb, max_len - 1, window_size, iter_max, &offset2, &len2);
+                if (len2 > len + 1) {
+                    AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+                    i++; // Backward reference to be done for next pixel.
+                    len = len2;
+                    offset = offset2;
+                }
+            }
+            BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
+            if (use_color_cache) {
+                for (k = 0; k < len; ++k) {
+                    VP8LColorCacheInsert(&hashers, argb[i + k]);
+                }
+            }
+            // Add to the hash_chain (but cannot add the last pixel).
+            if (offset >= 3 && offset != xsize) {
+                const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+                for (k = 2; k < last - 8; k += 2) {
+                    HashChainInsert(hash_chain, &argb[i + k], i + k);
+                }
+                for (; k < last; ++k) {
+                    HashChainInsert(hash_chain, &argb[i + k], i + k);
+                }
+            }
+            i += len;
+        } else {
+            AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+            HashChainInsert(hash_chain, &argb[i], i);
+            ++i;
+            --min_matches;
+            if (min_matches <= 0) {
+                AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+                HashChainInsert(hash_chain, &argb[i], i);
+                ++i;
+            }
+        }
+    }
+    while (i < pix_count) {
+        // Handle the last pixel(s).
+        AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+        ++i;
+    }
+
+    ok = !refs->error_;
+Error:
+    if (cc_init) VP8LColorCacheClear(&hashers);
+    StopProfiling(&stop_watch, &timeBackwardRefLz77, &countBackwardRefLz77);
+    return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+typedef struct {
+    double alpha_[VALUES_IN_BYTE];
+    double red_[VALUES_IN_BYTE];
+    double blue_[VALUES_IN_BYTE];
+    double distance_[NUM_DISTANCE_CODES];
+    double* literal_;
+} CostModel;
+
+static int BackwardReferencesTraceBackwards(int xsize,
+                                            int ysize,
+                                            const uint32_t* const argb,
+                                            int quality,
+                                            int cache_bits,
+                                            VP8LHashChain* const hash_chain,
+                                            VP8LBackwardRefs* const refs);
+
+static void ConvertPopulationCountTableToBitEstimates(int num_symbols,
+                                                      const uint32_t population_counts[],
+                                                      double output[]) {
+    uint32_t sum = 0;
+    int nonzeros = 0;
+    int i;
+    for (i = 0; i < num_symbols; ++i) {
+        sum += population_counts[i];
+        if (population_counts[i] > 0) {
+            ++nonzeros;
+        }
+    }
+    if (nonzeros <= 1) {
+        memset(output, 0, num_symbols * sizeof(*output));
+    } else {
+        const double logsum = VP8LFastLog2(sum);
+        for (i = 0; i < num_symbols; ++i) {
+            output[i] = logsum - VP8LFastLog2(population_counts[i]);
+        }
+    }
+}
+
+static int CostModelBuild(CostModel* const m, int cache_bits, VP8LBackwardRefs* const refs) {
+    int ok = 0;
+    VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
+    if (histo == NULL) goto Error;
+
+    VP8LHistogramCreate(histo, refs, cache_bits);
+
+    ConvertPopulationCountTableToBitEstimates(VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_,
+                                              m->literal_);
+    ConvertPopulationCountTableToBitEstimates(VALUES_IN_BYTE, histo->red_, m->red_);
+    ConvertPopulationCountTableToBitEstimates(VALUES_IN_BYTE, histo->blue_, m->blue_);
+    ConvertPopulationCountTableToBitEstimates(VALUES_IN_BYTE, histo->alpha_, m->alpha_);
+    ConvertPopulationCountTableToBitEstimates(NUM_DISTANCE_CODES, histo->distance_, m->distance_);
+    ok = 1;
+
+Error:
+    VP8LFreeHistogram(histo);
+    return ok;
+}
+
+static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
+    return m->alpha_[v >> 24] + m->red_[(v >> 16) & 0xff] + m->literal_[(v >> 8) & 0xff] + m->blue_[v & 0xff];
+}
+
+static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
+    const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
+    return m->literal_[literal_idx];
+}
+
+static WEBP_INLINE double GetLengthCost(const CostModel* const m, uint32_t length) {
+    int code, extra_bits;
+    VP8LPrefixEncodeBits(length, &code, &extra_bits);
+    return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
+}
+
+static WEBP_INLINE double GetDistanceCost(const CostModel* const m, uint32_t distance) {
+    int code, extra_bits;
+    VP8LPrefixEncodeBits(distance, &code, &extra_bits);
+    return m->distance_[code] + extra_bits;
+}
+
+static void AddSingleLiteralWithCostModel(const uint32_t* const argb,
+                                          VP8LHashChain* const hash_chain,
+                                          VP8LColorCache* const hashers,
+                                          const CostModel* const cost_model,
+                                          int idx,
+                                          int is_last,
+                                          int use_color_cache,
+                                          double prev_cost,
+                                          float* const cost,
+                                          uint16_t* const dist_array) {
+    double cost_val = prev_cost;
+    const uint32_t color = argb[0];
+    if (!is_last) {
+        HashChainInsert(hash_chain, argb, idx);
+    }
+    if (use_color_cache && VP8LColorCacheContains(hashers, color)) {
+        const double mul0 = 0.68;
+        const int ix = VP8LColorCacheGetIndex(hashers, color);
+        cost_val += GetCacheCost(cost_model, ix) * mul0;
+    } else {
+        const double mul1 = 0.82;
+        if (use_color_cache) VP8LColorCacheInsert(hashers, color);
+        cost_val += GetLiteralCost(cost_model, color) * mul1;
+    }
+    if (cost[idx] > cost_val) {
+        cost[idx] = (float)cost_val;
+        dist_array[idx] = 1; // only one is inserted.
+    }
+}
+
+static int BackwardReferencesHashChainDistanceOnly(int xsize,
+                                                   int ysize,
+                                                   const uint32_t* const argb,
+                                                   int quality,
+                                                   int cache_bits,
+                                                   VP8LHashChain* const hash_chain,
+                                                   VP8LBackwardRefs* const refs,
+                                                   uint16_t* const dist_array) {
+    int i;
+    int ok = 0;
+    int cc_init = 0;
+    const int pix_count = xsize * ysize;
+    const int use_color_cache = (cache_bits > 0);
+    float* const cost = (float*)WebPSafeMalloc(pix_count, sizeof(*cost));
+    const size_t literal_array_size =
+        sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES + ((cache_bits > 0) ? (1 << cache_bits) : 0));
+    const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
+    CostModel* const cost_model = (CostModel*)WebPSafeMalloc(1ULL, cost_model_size);
+    VP8LColorCache hashers;
+    const int skip_length = 32 + quality;
+    const int skip_min_distance_code = 2;
+    int iter_max = GetMaxItersForQuality(quality, 0);
+    const int window_size = GetWindowSizeForHashChain(quality, xsize);
+
+    if (cost == NULL || cost_model == NULL) goto Error;
+
+    cost_model->literal_ = (double*)(cost_model + 1);
+    if (use_color_cache) {
+        cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+        if (!cc_init) goto Error;
+    }
+
+    if (!CostModelBuild(cost_model, cache_bits, refs)) {
+        goto Error;
+    }
+
+    for (i = 0; i < pix_count; ++i) cost[i] = 1e38f;
+
+    // We loop one pixel at a time, but store all currently best points to
+    // non-processed locations from this point.
+    dist_array[0] = 0;
+    HashChainReset(hash_chain);
+    // Add first pixel as literal.
+    AddSingleLiteralWithCostModel(argb + 0, hash_chain, &hashers, cost_model, 0, 0, use_color_cache, 0.0, cost,
+                                  dist_array);
+    for (i = 1; i < pix_count - 1; ++i) {
+        int offset = 0;
+        int len = 0;
+        double prev_cost = cost[i - 1];
+        const int max_len = MaxFindCopyLength(pix_count - i);
+        HashChainFindCopy(hash_chain, i, argb, max_len, window_size, iter_max, &offset, &len);
+        if (len >= MIN_LENGTH) {
+            const int code = DistanceToPlaneCode(xsize, offset);
+            const double distance_cost = prev_cost + GetDistanceCost(cost_model, code);
+            int k;
+            for (k = 1; k < len; ++k) {
+                const double cost_val = distance_cost + GetLengthCost(cost_model, k);
+                if (cost[i + k] > cost_val) {
+                    cost[i + k] = (float)cost_val;
+                    dist_array[i + k] = k + 1;
+                }
+            }
+            // This if is for speedup only. It roughly doubles the speed, and
+            // makes compression worse by .1 %.
+            if (len >= skip_length && code <= skip_min_distance_code) {
+                // Long copy for short distances, let's skip the middle
+                // lookups for better copies.
+                // 1) insert the hashes.
+                if (use_color_cache) {
+                    for (k = 0; k < len; ++k) {
+                        VP8LColorCacheInsert(&hashers, argb[i + k]);
+                    }
+                }
+                // 2) Add to the hash_chain (but cannot add the last pixel)
+                {
+                    const int last = (len + i < pix_count - 1) ? len + i : pix_count - 1;
+                    for (k = i; k < last; ++k) {
+                        HashChainInsert(hash_chain, &argb[k], k);
+                    }
+                }
+                // 3) jump.
+                i += len - 1; // for loop does ++i, thus -1 here.
+                goto next_symbol;
+            }
+            if (len != MIN_LENGTH) {
+                int code_min_length;
+                double cost_total;
+                HashChainFindOffset(hash_chain, i, argb, MIN_LENGTH, window_size, &offset);
+                code_min_length = DistanceToPlaneCode(xsize, offset);
+                cost_total = prev_cost + GetDistanceCost(cost_model, code_min_length) + GetLengthCost(cost_model, 1);
+                if (cost[i + 1] > cost_total) {
+                    cost[i + 1] = (float)cost_total;
+                    dist_array[i + 1] = 2;
+                }
+            }
+        }
+        AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, 0, use_color_cache, prev_cost,
+                                      cost, dist_array);
+    next_symbol:;
+    }
+    // Handle the last pixel.
+    if (i == (pix_count - 1)) {
+        AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, 1, use_color_cache,
+                                      cost[pix_count - 2], cost, dist_array);
+    }
+    ok = !refs->error_;
+Error:
+    if (cc_init) VP8LColorCacheClear(&hashers);
+    WebPSafeFree(cost_model);
+    WebPSafeFree(cost);
+    return ok;
+}
+
+// We pack the path at the end of *dist_array and return
+// a pointer to this part of the array. Example:
+// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
+static void TraceBackwards(uint16_t* const dist_array,
+                           int dist_array_size,
+                           uint16_t** const chosen_path,
+                           int* const chosen_path_size) {
+    uint16_t* path = dist_array + dist_array_size;
+    uint16_t* cur = dist_array + dist_array_size - 1;
+    while (cur >= dist_array) {
+        const int k = *cur;
+        --path;
+        *path = k;
+        cur -= k;
+    }
+    *chosen_path = path;
+    *chosen_path_size = (int)(dist_array + dist_array_size - path);
+}
+
+static int BackwardReferencesHashChainFollowChosenPath(int xsize,
+                                                       int ysize,
+                                                       const uint32_t* const argb,
+                                                       int quality,
+                                                       int cache_bits,
+                                                       const uint16_t* const chosen_path,
+                                                       int chosen_path_size,
+                                                       VP8LHashChain* const hash_chain,
+                                                       VP8LBackwardRefs* const refs) {
+    const int pix_count = xsize * ysize;
+    const int use_color_cache = (cache_bits > 0);
+    int ix;
+    int i = 0;
+    int ok = 0;
+    int cc_init = 0;
+    const int window_size = GetWindowSizeForHashChain(quality, xsize);
+    VP8LColorCache hashers;
+
+    if (use_color_cache) {
+        cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+        if (!cc_init) goto Error;
+    }
+
+    ClearBackwardRefs(refs);
+    HashChainReset(hash_chain);
+    for (ix = 0; ix < chosen_path_size; ++ix) {
+        int offset = 0;
+        const int len = chosen_path[ix];
+        if (len != 1) {
+            int k;
+            HashChainFindOffset(hash_chain, i, argb, len, window_size, &offset);
+            BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
+            if (use_color_cache) {
+                for (k = 0; k < len; ++k) {
+                    VP8LColorCacheInsert(&hashers, argb[i + k]);
+                }
+            }
+            {
+                const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+                for (k = 0; k < last; ++k) {
+                    HashChainInsert(hash_chain, &argb[i + k], i + k);
+                }
+            }
+            i += len;
+        } else {
+            PixOrCopy v;
+            if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
+                // push pixel as a color cache index
+                const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]);
+                v = PixOrCopyCreateCacheIdx(idx);
+            } else {
+                if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+                v = PixOrCopyCreateLiteral(argb[i]);
+            }
+            BackwardRefsCursorAdd(refs, v);
+            if (i + 1 < pix_count) {
+                HashChainInsert(hash_chain, &argb[i], i);
+            }
+            ++i;
+        }
+    }
+    ok = !refs->error_;
+Error:
+    if (cc_init) VP8LColorCacheClear(&hashers);
+    return ok;
+}
+
+// Returns 1 on success.
+static int BackwardReferencesTraceBackwards(int xsize,
+                                            int ysize,
+                                            const uint32_t* const argb,
+                                            int quality,
+                                            int cache_bits,
+                                            VP8LHashChain* const hash_chain,
+                                            VP8LBackwardRefs* const refs) {
+    int ok = 0;
+    const int dist_array_size = xsize * ysize;
+    uint16_t* chosen_path = NULL;
+    int chosen_path_size = 0;
+    uint16_t* dist_array = (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
+
+    if (dist_array == NULL) goto Error;
+
+    if (!BackwardReferencesHashChainDistanceOnly(xsize, ysize, argb, quality, cache_bits, hash_chain, refs,
+                                                 dist_array)) {
+        goto Error;
+    }
+    TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
+    if (!BackwardReferencesHashChainFollowChosenPath(xsize, ysize, argb, quality, cache_bits, chosen_path,
+                                                     chosen_path_size, hash_chain, refs)) {
+        goto Error;
+    }
+    ok = 1;
+Error:
+    WebPSafeFree(dist_array);
+    return ok;
+}
+
+static void BackwardReferences2DLocality(int xsize, const VP8LBackwardRefs* const refs) {
+    VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+    while (VP8LRefsCursorOk(&c)) {
+        if (PixOrCopyIsCopy(c.cur_pos)) {
+            const int dist = c.cur_pos->argb_or_distance;
+            const int transformed_dist = DistanceToPlaneCode(xsize, dist);
+            c.cur_pos->argb_or_distance = transformed_dist;
+        }
+        VP8LRefsCursorNext(&c);
+    }
+}
+
+// Returns entropy for the given cache bits.
+static double ComputeCacheEntropy(const uint32_t* argb, const VP8LBackwardRefs* const refs, int cache_bits) {
+    const int use_color_cache = (cache_bits > 0);
+    int cc_init = 0;
+    double entropy = MAX_ENTROPY;
+    const double kSmallPenaltyForLargeCache = 4.0;
+    VP8LColorCache hashers;
+    VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+    VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits);
+    if (histo == NULL) goto Error;
+
+    if (use_color_cache) {
+        cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+        if (!cc_init) goto Error;
+    }
+    if (!use_color_cache) {
+        while (VP8LRefsCursorOk(&c)) {
+            VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
+            VP8LRefsCursorNext(&c);
+        }
+    } else {
+        while (VP8LRefsCursorOk(&c)) {
+            const PixOrCopy* const v = c.cur_pos;
+            if (PixOrCopyIsLiteral(v)) {
+                const uint32_t pix = *argb++;
+                const uint32_t key = VP8LColorCacheGetIndex(&hashers, pix);
+                if (VP8LColorCacheLookup(&hashers, key) == pix) {
+                    ++histo->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
+                } else {
+                    VP8LColorCacheSet(&hashers, key, pix);
+                    ++histo->blue_[pix & 0xff];
+                    ++histo->literal_[(pix >> 8) & 0xff];
+                    ++histo->red_[(pix >> 16) & 0xff];
+                    ++histo->alpha_[pix >> 24];
+                }
+            } else {
+                int len = PixOrCopyLength(v);
+                int code, extra_bits;
+                VP8LPrefixEncodeBits(len, &code, &extra_bits);
+                ++histo->literal_[NUM_LITERAL_CODES + code];
+                VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
+                ++histo->distance_[code];
+                do {
+                    VP8LColorCacheInsert(&hashers, *argb++);
+                } while (--len != 0);
+            }
+            VP8LRefsCursorNext(&c);
+        }
+    }
+    entropy = VP8LHistogramEstimateBits(histo) + kSmallPenaltyForLargeCache * cache_bits;
+Error:
+    if (cc_init) VP8LColorCacheClear(&hashers);
+    VP8LFreeHistogram(histo);
+    return entropy;
+}
+
+// Evaluate optimal cache bits for the local color cache.
+// The input *best_cache_bits sets the maximum cache bits to use (passing 0
+// implies disabling the local color cache). The local color cache is also
+// disabled for the lower (<= 25) quality.
+// Returns 0 in case of memory error.
+static int CalculateBestCacheSize(const uint32_t* const argb,
+                                  int xsize,
+                                  int ysize,
+                                  int quality,
+                                  VP8LHashChain* const hash_chain,
+                                  VP8LBackwardRefs* const refs,
+                                  int* const lz77_computed,
+                                  int* const best_cache_bits) {
+    int eval_low = 1;
+    int eval_high = 1;
+    double entropy_low = MAX_ENTROPY;
+    double entropy_high = MAX_ENTROPY;
+    const double cost_mul = 5e-4;
+    int cache_bits_low = 0;
+    int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits;
+
+    assert(cache_bits_high <= MAX_COLOR_CACHE_BITS);
+
+    *lz77_computed = 0;
+    if (cache_bits_high == 0) {
+        *best_cache_bits = 0;
+        // Local color cache is disabled.
+        return 1;
+    }
+    if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, quality, 0, hash_chain, refs)) {
+        return 0;
+    }
+    // Do a binary search to find the optimal entropy for cache_bits.
+    while (eval_low || eval_high) {
+        if (eval_low) {
+            entropy_low = ComputeCacheEntropy(argb, refs, cache_bits_low);
+            entropy_low += entropy_low * cache_bits_low * cost_mul;
+            eval_low = 0;
+        }
+        if (eval_high) {
+            entropy_high = ComputeCacheEntropy(argb, refs, cache_bits_high);
+            entropy_high += entropy_high * cache_bits_high * cost_mul;
+            eval_high = 0;
+        }
+        if (entropy_high < entropy_low) {
+            const int prev_cache_bits_low = cache_bits_low;
+            *best_cache_bits = cache_bits_high;
+            cache_bits_low = (cache_bits_low + cache_bits_high) / 2;
+            if (cache_bits_low != prev_cache_bits_low) eval_low = 1;
+        } else {
+            *best_cache_bits = cache_bits_low;
+            cache_bits_high = (cache_bits_low + cache_bits_high) / 2;
+            if (cache_bits_high != cache_bits_low) eval_high = 1;
+        }
+    }
+    *lz77_computed = 1;
+    return 1;
+}
+
+// Update (in-place) backward references for specified cache_bits.
+static int BackwardRefsWithLocalCache(const uint32_t* const argb, int cache_bits, VP8LBackwardRefs* const refs) {
+    int pixel_index = 0;
+    VP8LColorCache hashers;
+    VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+    if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
+
+    while (VP8LRefsCursorOk(&c)) {
+        PixOrCopy* const v = c.cur_pos;
+        if (PixOrCopyIsLiteral(v)) {
+            const uint32_t argb_literal = v->argb_or_distance;
+            if (VP8LColorCacheContains(&hashers, argb_literal)) {
+                const int ix = VP8LColorCacheGetIndex(&hashers, argb_literal);
+                *v = PixOrCopyCreateCacheIdx(ix);
+            } else {
+                VP8LColorCacheInsert(&hashers, argb_literal);
+            }
+            ++pixel_index;
+        } else {
+            // refs was created without local cache, so it can not have cache indexes.
+            int k;
+            assert(PixOrCopyIsCopy(v));
+            for (k = 0; k < v->len; ++k) {
+                VP8LColorCacheInsert(&hashers, argb[pixel_index++]);
+            }
+        }
+        VP8LRefsCursorNext(&c);
+    }
+    VP8LColorCacheClear(&hashers);
+    return 1;
+}
+
+static VP8LBackwardRefs* GetBackwardReferencesLowEffort(int width,
+                                                        int height,
+                                                        const uint32_t* const argb,
+                                                        int quality,
+                                                        int* const cache_bits,
+                                                        VP8LHashChain* const hash_chain,
+                                                        VP8LBackwardRefs refs_array[2]) {
+    VP8LBackwardRefs* refs_lz77 = &refs_array[0];
+    *cache_bits = 0;
+    if (!BackwardReferencesLz77(width, height, argb, 0, quality, 1 /* Low effort. */, hash_chain, refs_lz77)) {
+        return NULL;
+    }
+    BackwardReferences2DLocality(width, refs_lz77);
+    return refs_lz77;
+}
+
+static VP8LBackwardRefs* GetBackwardReferences(int width,
+                                               int height,
+                                               const uint32_t* const argb,
+                                               int quality,
+                                               int* const cache_bits,
+                                               VP8LHashChain* const hash_chain,
+                                               VP8LBackwardRefs refs_array[2]) {
+    int lz77_is_useful;
+    int lz77_computed;
+    double bit_cost_lz77, bit_cost_rle;
+    VP8LBackwardRefs* best = NULL;
+    VP8LBackwardRefs* refs_lz77 = &refs_array[0];
+    VP8LBackwardRefs* refs_rle = &refs_array[1];
+    VP8LHistogram* histo = NULL;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    if (!CalculateBestCacheSize(argb, width, height, quality, hash_chain, refs_lz77, &lz77_computed, cache_bits)) {
+        goto Error;
+    }
+
+    if (lz77_computed) {
+        // Transform refs_lz77 for the optimized cache_bits.
+        if (*cache_bits > 0) {
+            if (!BackwardRefsWithLocalCache(argb, *cache_bits, refs_lz77)) {
+                goto Error;
+            }
+        }
+    } else {
+        if (!BackwardReferencesLz77(width, height, argb, *cache_bits, quality, 0 /* Low effort. */, hash_chain,
+                                    refs_lz77)) {
+            goto Error;
+        }
+    }
+
+    if (!BackwardReferencesRle(width, height, argb, *cache_bits, refs_rle)) {
+        goto Error;
+    }
+
+    histo = VP8LAllocateHistogram(*cache_bits);
+    if (histo == NULL) goto Error;
+
+    {
+        // Evaluate LZ77 coding.
+        VP8LHistogramCreate(histo, refs_lz77, *cache_bits);
+        bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
+        // Evaluate RLE coding.
+        VP8LHistogramCreate(histo, refs_rle, *cache_bits);
+        bit_cost_rle = VP8LHistogramEstimateBits(histo);
+        // Decide if LZ77 is useful.
+        lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
+    }
+
+    // Choose appropriate backward reference.
+    if (lz77_is_useful) {
+        // TraceBackwards is costly. Don't execute it at lower quality.
+        const int try_lz77_trace_backwards = (quality >= 25);
+        best = refs_lz77; // default guess: lz77 is better
+        if (try_lz77_trace_backwards) {
+            VP8LBackwardRefs* const refs_trace = refs_rle;
+            if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) {
+                best = NULL;
+                goto Error;
+            }
+            if (BackwardReferencesTraceBackwards(width, height, argb, quality, *cache_bits, hash_chain, refs_trace)) {
+                double bit_cost_trace;
+                // Evaluate LZ77 coding.
+                VP8LHistogramCreate(histo, refs_trace, *cache_bits);
+                bit_cost_trace = VP8LHistogramEstimateBits(histo);
+                if (bit_cost_trace < bit_cost_lz77) {
+                    best = refs_trace;
+                }
+            }
+        }
+    } else {
+        best = refs_rle;
+    }
+
+    BackwardReferences2DLocality(width, best);
+
+Error:
+    VP8LFreeHistogram(histo);
+    StopProfiling(&stop_watch, &timeGetBackRef, &countGetBackRef);
+    return best;
+}
+
+VP8LBackwardRefs* VP8LGetBackwardReferences(int width,
+                                            int height,
+                                            const uint32_t* const argb,
+                                            int quality,
+                                            int low_effort,
+                                            int* const cache_bits,
+                                            VP8LHashChain* const hash_chain,
+                                            VP8LBackwardRefs refs_array[2]) {
+    if (low_effort) {
+        return GetBackwardReferencesLowEffort(width, height, argb, quality, cache_bits, hash_chain, refs_array);
+    } else {
+        return GetBackwardReferences(width, height, argb, quality, cache_bits, hash_chain, refs_array);
+    }
+}
diff --git a/codec/L2/demos/webpEnc/host/src/enc/backward_references.h b/codec/L2/demos/webpEnc/host/src/enc/backward_references.h
new file mode 100644
index 0000000000..9c36ca2268
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/backward_references.h
@@ -0,0 +1,198 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+
+#ifndef WEBP_ENC_BACKWARD_REFERENCES_H_
+#define WEBP_ENC_BACKWARD_REFERENCES_H_
+
+#include <assert.h>
+#include <stdlib.h>
+#include "../webp/types.h"
+#include "../webp/format_constants.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The maximum allowed limit is 11.
+#define MAX_COLOR_CACHE_BITS 10
+
+// -----------------------------------------------------------------------------
+// PixOrCopy
+
+enum Mode { kLiteral, kCacheIdx, kCopy, kNone };
+
+typedef struct {
+    // mode as uint8_t to make the memory layout to be exactly 8 bytes.
+    uint8_t mode;
+    uint16_t len;
+    uint32_t argb_or_distance;
+} PixOrCopy;
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateCopy(uint32_t distance, uint16_t len) {
+    PixOrCopy retval;
+    retval.mode = kCopy;
+    retval.argb_or_distance = distance;
+    retval.len = len;
+    return retval;
+}
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateCacheIdx(int idx) {
+    PixOrCopy retval;
+    assert(idx >= 0);
+    assert(idx < (1 << MAX_COLOR_CACHE_BITS));
+    retval.mode = kCacheIdx;
+    retval.argb_or_distance = idx;
+    retval.len = 1;
+    return retval;
+}
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateLiteral(uint32_t argb) {
+    PixOrCopy retval;
+    retval.mode = kLiteral;
+    retval.argb_or_distance = argb;
+    retval.len = 1;
+    return retval;
+}
+
+static WEBP_INLINE int PixOrCopyIsLiteral(const PixOrCopy* const p) {
+    return (p->mode == kLiteral);
+}
+
+static WEBP_INLINE int PixOrCopyIsCacheIdx(const PixOrCopy* const p) {
+    return (p->mode == kCacheIdx);
+}
+
+static WEBP_INLINE int PixOrCopyIsCopy(const PixOrCopy* const p) {
+    return (p->mode == kCopy);
+}
+
+static WEBP_INLINE uint32_t PixOrCopyLiteral(const PixOrCopy* const p, int component) {
+    assert(p->mode == kLiteral);
+    return (p->argb_or_distance >> (component * 8)) & 0xff;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyLength(const PixOrCopy* const p) {
+    return p->len;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyArgb(const PixOrCopy* const p) {
+    assert(p->mode == kLiteral);
+    return p->argb_or_distance;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyCacheIdx(const PixOrCopy* const p) {
+    assert(p->mode == kCacheIdx);
+    assert(p->argb_or_distance < (1U << MAX_COLOR_CACHE_BITS));
+    return p->argb_or_distance;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) {
+    assert(p->mode == kCopy);
+    return p->argb_or_distance;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LHashChain
+
+#define HASH_BITS 18
+#define HASH_SIZE (1 << HASH_BITS)
+
+typedef struct VP8LHashChain VP8LHashChain;
+struct VP8LHashChain {
+    // Stores the most recently added position with the given hash value.
+    int32_t hash_to_first_index_[HASH_SIZE];
+    // chain_[pos] stores the previous position with the same hash value
+    // for every pixel in the image.
+    int32_t* chain_;
+    // This is the maximum size of the hash_chain that can be constructed.
+    // Typically this is the pixel count (width x height) for a given image.
+    int size_;
+};
+
+// Must be called first, to set size.
+int VP8LHashChainInit(VP8LHashChain* const p, int size);
+void VP8LHashChainClear(VP8LHashChain* const p); // release memory
+
+// -----------------------------------------------------------------------------
+// VP8LBackwardRefs (block-based backward-references storage)
+
+// maximum number of reference blocks the image will be segmented into
+#define MAX_REFS_BLOCK_PER_IMAGE 16
+
+typedef struct PixOrCopyBlock PixOrCopyBlock; // forward declaration
+typedef struct VP8LBackwardRefs VP8LBackwardRefs;
+
+// Container for blocks chain
+struct VP8LBackwardRefs {
+    int block_size_;              // common block-size
+    int error_;                   // set to true if some memory error occurred
+    PixOrCopyBlock* refs_;        // list of currently used blocks
+    PixOrCopyBlock** tail_;       // for list recycling
+    PixOrCopyBlock* free_blocks_; // free-list
+    PixOrCopyBlock* last_block_;  // used for adding new refs (internal)
+};
+
+// Initialize the object. 'block_size' is the common block size to store
+// references (typically, width * height / MAX_REFS_BLOCK_PER_IMAGE).
+void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size);
+// Release memory for backward references.
+void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs);
+// Copies the 'src' backward refs to the 'dst'. Returns 0 in case of error.
+int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, VP8LBackwardRefs* const dst);
+
+// Cursor for iterating on references content
+typedef struct {
+    // public:
+    PixOrCopy* cur_pos; // current position
+    // private:
+    PixOrCopyBlock* cur_block_; // current block in the refs list
+    const PixOrCopy* last_pos_; // sentinel for switching to next block
+} VP8LRefsCursor;
+
+// Returns a cursor positioned at the beginning of the references list.
+VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs);
+// Returns true if cursor is pointing at a valid position.
+static WEBP_INLINE int VP8LRefsCursorOk(const VP8LRefsCursor* const c) {
+    return (c->cur_pos != NULL);
+}
+// Move to next block of references. Internal, not to be called directly.
+void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c);
+// Move to next position, or NULL. Should not be called if !VP8LRefsCursorOk().
+static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
+    assert(c != NULL);
+    assert(VP8LRefsCursorOk(c));
+    if (++c->cur_pos == c->last_pos_) VP8LRefsCursorNextBlock(c);
+}
+
+// -----------------------------------------------------------------------------
+// Main entry points
+
+// Evaluates best possible backward references for specified quality.
+// The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache
+// bits to use (passing 0 implies disabling the local color cache).
+// The optimal cache bits is evaluated and set for the *cache_bits parameter.
+// The return value is the pointer to the best of the two backward refs viz,
+// refs[0] or refs[1].
+VP8LBackwardRefs* VP8LGetBackwardReferences(int width,
+                                            int height,
+                                            const uint32_t* const argb,
+                                            int quality,
+                                            int low_effort,
+                                            int* const cache_bits,
+                                            VP8LHashChain* const hash_chain,
+                                            VP8LBackwardRefs refs[2]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBP_ENC_BACKWARD_REFERENCES_H_
diff --git a/codec/L2/demos/webpEnc/host/src/enc/config.c b/codec/L2/demos/webpEnc/host/src/enc/config.c
new file mode 100644
index 0000000000..134a10bf3d
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/config.c
@@ -0,0 +1,145 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Coding tools configuration
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "../webp/encode.h"
+
+//------------------------------------------------------------------------------
+// WebPConfig
+//------------------------------------------------------------------------------
+
+int WebPConfigInitInternal(WebPConfig* config, WebPPreset preset, float quality, int version) {
+    if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
+        return 0; // caller/system version mismatch!
+    }
+    if (config == NULL) return 0;
+
+    config->quality = quality;
+    config->target_size = 0;
+    config->target_PSNR = 0.;
+    config->method = 4;
+    config->sns_strength = 50;
+    config->filter_strength = 60; // mid-filtering
+    config->filter_sharpness = 0;
+    config->filter_type = 1; // default: strong (so U/V is filtered too)
+    config->partitions = 0;
+    config->segments = 4;
+    config->pass = 1;
+    config->show_compressed = 0;
+    config->preprocessing = 0;
+    config->autofilter = 0;
+    config->partition_limit = 0;
+    config->alpha_compression = 1;
+    config->alpha_filtering = 1;
+    config->alpha_quality = 100;
+    config->lossless = 0;
+    config->exact = 0;
+    config->use_ocl = 0;
+    config->image_hint = WEBP_HINT_DEFAULT;
+    config->emulate_jpeg_size = 0;
+    config->thread_level = 0;
+    config->low_memory = 0;
+    config->near_lossless = 100;
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    config->delta_palettization = 0;
+#endif // WEBP_EXPERIMENTAL_FEATURES
+
+    // TODO(skal): tune.
+    switch (preset) {
+        case WEBP_PRESET_PICTURE:
+            config->sns_strength = 80;
+            config->filter_sharpness = 4;
+            config->filter_strength = 35;
+            config->preprocessing &= ~2; // no dithering
+            break;
+        case WEBP_PRESET_PHOTO:
+            config->sns_strength = 80;
+            config->filter_sharpness = 3;
+            config->filter_strength = 30;
+            config->preprocessing |= 2;
+            break;
+        case WEBP_PRESET_DRAWING:
+            config->sns_strength = 25;
+            config->filter_sharpness = 6;
+            config->filter_strength = 10;
+            break;
+        case WEBP_PRESET_ICON:
+            config->sns_strength = 0;
+            config->filter_strength = 0; // disable filtering to retain sharpness
+            config->preprocessing &= ~2; // no dithering
+            break;
+        case WEBP_PRESET_TEXT:
+            config->sns_strength = 0;
+            config->filter_strength = 0; // disable filtering to retain sharpness
+            config->preprocessing &= ~2; // no dithering
+            config->segments = 2;
+            break;
+        case WEBP_PRESET_DEFAULT:
+        default:
+            break;
+    }
+    return WebPValidateConfig(config);
+}
+
+int WebPValidateConfig(const WebPConfig* config) {
+    if (config == NULL) return 0;
+    if (config->quality < 0 || config->quality > 100) return 0;
+    if (config->target_size < 0) return 0;
+    if (config->target_PSNR < 0) return 0;
+    if (config->method < 0 || config->method > 6) return 0;
+    if (config->segments < 1 || config->segments > 4) return 0;
+    if (config->sns_strength < 0 || config->sns_strength > 100) return 0;
+    if (config->filter_strength < 0 || config->filter_strength > 100) return 0;
+    if (config->filter_sharpness < 0 || config->filter_sharpness > 7) return 0;
+    if (config->filter_type < 0 || config->filter_type > 1) return 0;
+    if (config->autofilter < 0 || config->autofilter > 1) return 0;
+    if (config->pass < 1 || config->pass > 10) return 0;
+    if (config->show_compressed < 0 || config->show_compressed > 1) return 0;
+    if (config->preprocessing < 0 || config->preprocessing > 7) return 0;
+    if (config->partitions < 0 || config->partitions > 3) return 0;
+    if (config->partition_limit < 0 || config->partition_limit > 100) return 0;
+    if (config->alpha_compression < 0) return 0;
+    if (config->alpha_filtering < 0) return 0;
+    if (config->alpha_quality < 0 || config->alpha_quality > 100) return 0;
+    if (config->lossless < 0 || config->lossless > 1) return 0;
+    if (config->near_lossless < 0 || config->near_lossless > 100) return 0;
+    if (config->image_hint >= WEBP_HINT_LAST) return 0;
+    if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1) return 0;
+    if (config->thread_level < 0 || config->thread_level > 1) return 0;
+    if (config->low_memory < 0 || config->low_memory > 1) return 0;
+    if (config->exact < 0 || config->exact > 1) return 0;
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    if (config->delta_palettization < 0 || config->delta_palettization > 1) return 0;
+#endif // WEBP_EXPERIMENTAL_FEATURES
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#define MAX_LEVEL 9
+
+// Mapping between -z level and -m / -q parameter settings.
+static const struct {
+    uint8_t method_;
+    uint8_t quality_;
+} kLosslessPresets[MAX_LEVEL + 1] = {{0, 0},  {1, 20}, {2, 25}, {3, 30}, {3, 50},
+                                     {4, 50}, {4, 75}, {4, 90}, {5, 90}, {6, 100}};
+
+int WebPConfigLosslessPreset(WebPConfig* config, int level) {
+    if (config == NULL || level < 0 || level > MAX_LEVEL) return 0;
+    config->lossless = 1;
+    config->method = kLosslessPresets[level].method_;
+    config->quality = kLosslessPresets[level].quality_;
+    return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/cost.c b/codec/L2/demos/webpEnc/host/src/enc/cost.c
new file mode 100644
index 0000000000..bbd9248507
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/cost.c
@@ -0,0 +1,349 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Cost tables for level and modes
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./cost.h"
+
+//------------------------------------------------------------------------------
+// Level cost tables
+
+// For each given level, the following table gives the pattern of contexts to
+// use for coding it (in [][0]) as well as the bit value to use for each
+// context (in [][1]).
+const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
+    {0x001, 0x000}, {0x007, 0x001}, {0x00f, 0x005}, {0x00f, 0x00d}, {0x033, 0x003}, {0x033, 0x003}, {0x033, 0x023},
+    {0x033, 0x023}, {0x033, 0x023}, {0x033, 0x023}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013},
+    {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+    {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+    {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x153}};
+
+static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
+    int pattern = VP8LevelCodes[level - 1][0];
+    int bits = VP8LevelCodes[level - 1][1];
+    int cost = 0;
+    int i;
+    for (i = 2; pattern; ++i) {
+        if (pattern & 1) {
+            cost += VP8BitCost(bits & 1, probas[i]);
+        }
+        bits >>= 1;
+        pattern >>= 1;
+    }
+    return cost;
+}
+
+//------------------------------------------------------------------------------
+// Pre-calc level costs once for all
+
+void VP8CalculateLevelCosts(VP8EncProba* const proba) {
+    int ctype, band, ctx;
+
+    if (!proba->dirty_) return; // nothing to do.
+
+    for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
+        int n;
+        for (band = 0; band < NUM_BANDS; ++band) {
+            for (ctx = 0; ctx < NUM_CTX; ++ctx) {
+                const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
+                uint16_t* const table = proba->level_cost_[ctype][band][ctx];
+                const int cost0 = (ctx > 0) ? VP8BitCost(1, p[0]) : 0;
+                const int cost_base = VP8BitCost(1, p[1]) + cost0;
+                int v;
+                table[0] = VP8BitCost(0, p[1]) + cost0;
+                for (v = 1; v <= MAX_VARIABLE_LEVEL; ++v) {
+                    table[v] = cost_base + VariableLevelCost(v, p);
+                }
+                // Starting at level 67 and up, the variable part of the cost is
+                // actually constant.
+            }
+        }
+        for (n = 0; n < 16; ++n) { // replicate bands. We don't need to sentinel.
+            for (ctx = 0; ctx < NUM_CTX; ++ctx) {
+                proba->remapped_costs_[ctype][n][ctx] = proba->level_cost_[ctype][VP8EncBands[n]][ctx];
+            }
+        }
+    }
+    proba->dirty_ = 0;
+}
+
+//------------------------------------------------------------------------------
+// Mode cost tables.
+
+// These are the fixed probabilities (in the coding trees) turned into bit-cost
+// by calling VP8BitCost().
+const uint16_t VP8FixedCostsUV[4] = {302, 984, 439, 642};
+// note: these values include the fixed VP8BitCost(1, 145) mode selection cost.
+const uint16_t VP8FixedCostsI16[4] = {663, 919, 872, 919};
+const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
+    {{40, 1151, 1723, 1874, 2103, 2019, 1628, 1777, 2226, 2137},
+     {192, 469, 1296, 1308, 1849, 1794, 1781, 1703, 1713, 1522},
+     {142, 910, 762, 1684, 1849, 1576, 1460, 1305, 1801, 1657},
+     {559, 641, 1370, 421, 1182, 1569, 1612, 1725, 863, 1007},
+     {299, 1059, 1256, 1108, 636, 1068, 1581, 1883, 869, 1142},
+     {277, 1111, 707, 1362, 1089, 672, 1603, 1541, 1545, 1291},
+     {214, 781, 1609, 1303, 1632, 2229, 726, 1560, 1713, 918},
+     {152, 1037, 1046, 1759, 1983, 2174, 1358, 742, 1740, 1390},
+     {512, 1046, 1420, 753, 752, 1297, 1486, 1613, 460, 1207},
+     {424, 827, 1362, 719, 1462, 1202, 1199, 1476, 1199, 538}},
+    {{240, 402, 1134, 1491, 1659, 1505, 1517, 1555, 1979, 2099},
+     {467, 242, 960, 1232, 1714, 1620, 1834, 1570, 1676, 1391},
+     {500, 455, 463, 1507, 1699, 1282, 1564, 982, 2114, 2114},
+     {672, 643, 1372, 331, 1589, 1667, 1453, 1938, 996, 876},
+     {458, 783, 1037, 911, 738, 968, 1165, 1518, 859, 1033},
+     {504, 815, 504, 1139, 1219, 719, 1506, 1085, 1268, 1268},
+     {333, 630, 1445, 1239, 1883, 3672, 799, 1548, 1865, 598},
+     {399, 644, 746, 1342, 1856, 1350, 1493, 613, 1855, 1015},
+     {622, 749, 1205, 608, 1066, 1408, 1290, 1406, 546, 971},
+     {500, 753, 1041, 668, 1230, 1617, 1297, 1425, 1383, 523}},
+    {{394, 553, 523, 1502, 1536, 981, 1608, 1142, 1666, 2181},
+     {655, 430, 375, 1411, 1861, 1220, 1677, 1135, 1978, 1553},
+     {690, 640, 245, 1954, 2070, 1194, 1528, 982, 1972, 2232},
+     {559, 834, 741, 867, 1131, 980, 1225, 852, 1092, 784},
+     {690, 875, 516, 959, 673, 894, 1056, 1190, 1528, 1126},
+     {740, 951, 384, 1277, 1177, 492, 1579, 1155, 1846, 1513},
+     {323, 775, 1062, 1776, 3062, 1274, 813, 1188, 1372, 655},
+     {488, 971, 484, 1767, 1515, 1775, 1115, 503, 1539, 1461},
+     {740, 1006, 998, 709, 851, 1230, 1337, 788, 741, 721},
+     {522, 1073, 573, 1045, 1346, 887, 1046, 1146, 1203, 697}},
+    {{105, 864, 1442, 1009, 1934, 1840, 1519, 1920, 1673, 1579},
+     {534, 305, 1193, 683, 1388, 2164, 1802, 1894, 1264, 1170},
+     {305, 518, 877, 1108, 1426, 3215, 1425, 1064, 1320, 1242},
+     {683, 732, 1927, 257, 1493, 2048, 1858, 1552, 1055, 947},
+     {394, 814, 1024, 660, 959, 1556, 1282, 1289, 893, 1047},
+     {528, 615, 996, 940, 1201, 635, 1094, 2515, 803, 1358},
+     {347, 614, 1609, 1187, 3133, 1345, 1007, 1339, 1017, 667},
+     {218, 740, 878, 1605, 3650, 3650, 1345, 758, 1357, 1617},
+     {672, 750, 1541, 558, 1257, 1599, 1870, 2135, 402, 1087},
+     {592, 684, 1161, 430, 1092, 1497, 1475, 1489, 1095, 822}},
+    {{228, 1056, 1059, 1368, 752, 982, 1512, 1518, 987, 1782},
+     {494, 514, 818, 942, 965, 892, 1610, 1356, 1048, 1363},
+     {512, 648, 591, 1042, 761, 991, 1196, 1454, 1309, 1463},
+     {683, 749, 1043, 676, 841, 1396, 1133, 1138, 654, 939},
+     {622, 1101, 1126, 994, 361, 1077, 1203, 1318, 877, 1219},
+     {631, 1068, 857, 1650, 651, 477, 1650, 1419, 828, 1170},
+     {555, 727, 1068, 1335, 3127, 1339, 820, 1331, 1077, 429},
+     {504, 879, 624, 1398, 889, 889, 1392, 808, 891, 1406},
+     {683, 1602, 1289, 977, 578, 983, 1280, 1708, 406, 1122},
+     {399, 865, 1433, 1070, 1072, 764, 968, 1477, 1223, 678}},
+    {{333, 760, 935, 1638, 1010, 529, 1646, 1410, 1472, 2219},
+     {512, 494, 750, 1160, 1215, 610, 1870, 1868, 1628, 1169},
+     {572, 646, 492, 1934, 1208, 603, 1580, 1099, 1398, 1995},
+     {786, 789, 942, 581, 1018, 951, 1599, 1207, 731, 768},
+     {690, 1015, 672, 1078, 582, 504, 1693, 1438, 1108, 2897},
+     {768, 1267, 571, 2005, 1243, 244, 2881, 1380, 1786, 1453},
+     {452, 899, 1293, 903, 1311, 3100, 465, 1311, 1319, 813},
+     {394, 927, 942, 1103, 1358, 1104, 946, 593, 1363, 1109},
+     {559, 1005, 1007, 1016, 658, 1173, 1021, 1164, 623, 1028},
+     {564, 796, 632, 1005, 1014, 863, 2316, 1268, 938, 764}},
+    {{266, 606, 1098, 1228, 1497, 1243, 948, 1030, 1734, 1461},
+     {366, 585, 901, 1060, 1407, 1247, 876, 1134, 1620, 1054},
+     {452, 565, 542, 1729, 1479, 1479, 1016, 886, 2938, 1150},
+     {555, 1088, 1533, 950, 1354, 895, 834, 1019, 1021, 496},
+     {704, 815, 1193, 971, 973, 640, 1217, 2214, 832, 578},
+     {672, 1245, 579, 871, 875, 774, 872, 1273, 1027, 949},
+     {296, 1134, 2050, 1784, 1636, 3425, 442, 1550, 2076, 722},
+     {342, 982, 1259, 1846, 1848, 1848, 622, 568, 1847, 1052},
+     {555, 1064, 1304, 828, 746, 1343, 1075, 1329, 1078, 494},
+     {288, 1167, 1285, 1174, 1639, 1639, 833, 2254, 1304, 509}},
+    {{342, 719, 767, 1866, 1757, 1270, 1246, 550, 1746, 2151},
+     {483, 653, 694, 1509, 1459, 1410, 1218, 507, 1914, 1266},
+     {488, 757, 447, 2979, 1813, 1268, 1654, 539, 1849, 2109},
+     {522, 1097, 1085, 851, 1365, 1111, 851, 901, 961, 605},
+     {709, 716, 841, 728, 736, 945, 941, 862, 2845, 1057},
+     {512, 1323, 500, 1336, 1083, 681, 1342, 717, 1604, 1350},
+     {452, 1155, 1372, 1900, 1501, 3290, 311, 944, 1919, 922},
+     {403, 1520, 977, 2132, 1733, 3522, 1076, 276, 3335, 1547},
+     {559, 1374, 1101, 615, 673, 2462, 974, 795, 984, 984},
+     {547, 1122, 1062, 812, 1410, 951, 1140, 622, 1268, 651}},
+    {{165, 982, 1235, 938, 1334, 1366, 1659, 1578, 964, 1612},
+     {592, 422, 925, 847, 1139, 1112, 1387, 2036, 861, 1041},
+     {403, 837, 732, 770, 941, 1658, 1250, 809, 1407, 1407},
+     {896, 874, 1071, 381, 1568, 1722, 1437, 2192, 480, 1035},
+     {640, 1098, 1012, 1032, 684, 1382, 1581, 2106, 416, 865},
+     {559, 1005, 819, 914, 710, 770, 1418, 920, 838, 1435},
+     {415, 1258, 1245, 870, 1278, 3067, 770, 1021, 1287, 522},
+     {406, 990, 601, 1009, 1265, 1265, 1267, 759, 1017, 1277},
+     {968, 1182, 1329, 788, 1032, 1292, 1705, 1714, 203, 1403},
+     {732, 877, 1279, 471, 901, 1161, 1545, 1294, 755, 755}},
+    {{111, 931, 1378, 1185, 1933, 1648, 1148, 1714, 1873, 1307},
+     {406, 414, 1030, 1023, 1910, 1404, 1313, 1647, 1509, 793},
+     {342, 640, 575, 1088, 1241, 1349, 1161, 1350, 1756, 1502},
+     {559, 766, 1185, 357, 1682, 1428, 1329, 1897, 1219, 802},
+     {473, 909, 1164, 771, 719, 2508, 1427, 1432, 722, 782},
+     {342, 892, 785, 1145, 1150, 794, 1296, 1550, 973, 1057},
+     {208, 1036, 1326, 1343, 1606, 3395, 815, 1455, 1618, 712},
+     {228, 928, 890, 1046, 3499, 1711, 994, 829, 1720, 1318},
+     {768, 724, 1058, 636, 991, 1075, 1319, 1324, 616, 825},
+     {305, 1167, 1358, 899, 1587, 1587, 987, 1988, 1332, 501}}};
+
+//------------------------------------------------------------------------------
+// helper functions for residuals struct VP8Residual.
+
+void VP8InitResidual(int first, int coeff_type, VP8Encoder* const enc, VP8Residual* const res) {
+    res->coeff_type = coeff_type;
+    res->prob = enc->proba_.coeffs_[coeff_type];
+    res->stats = enc->proba_.stats_[coeff_type];
+    res->costs = enc->proba_.remapped_costs_[coeff_type];
+    res->first = first;
+}
+
+void VP8InitResidual_smp(int first, int coeff_type, VP8Encoder* const enc, VP8Residual* const res) {
+    res->coeff_type = coeff_type;
+    res->prob = enc->proba_.coeffs_[coeff_type];
+    res->stats = enc->proba_.stats_[coeff_type];
+    // res->costs = enc->proba_.remapped_costs_[coeff_type];
+    res->first = first;
+}
+
+//------------------------------------------------------------------------------
+// Mode costs
+
+int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
+    const int x = (it->i4_ & 3), y = (it->i4_ >> 2);
+    VP8Residual res;
+    VP8Encoder* const enc = it->enc_;
+    int R = 0;
+    int ctx;
+
+    VP8InitResidual(0, 3, enc, &res);
+    ctx = it->top_nz_[x] + it->left_nz_[y];
+    VP8SetResidualCoeffs(levels, &res);
+    R += VP8GetResidualCost(ctx, &res);
+    return R;
+}
+
+int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
+    VP8Residual res;
+    VP8Encoder* const enc = it->enc_;
+    int x, y;
+    int R = 0;
+
+    VP8IteratorNzToBytes(it); // re-import the non-zero context
+
+    // DC
+    VP8InitResidual(0, 1, enc, &res);
+    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+    R += VP8GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res);
+
+    // AC
+    VP8InitResidual(1, 0, enc, &res);
+    for (y = 0; y < 4; ++y) {
+        for (x = 0; x < 4; ++x) {
+            const int ctx = it->top_nz_[x] + it->left_nz_[y];
+            VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+            R += VP8GetResidualCost(ctx, &res);
+            it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0);
+        }
+    }
+    return R;
+}
+
+int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
+    VP8Residual res;
+    VP8Encoder* const enc = it->enc_;
+    int ch, x, y;
+    int R = 0;
+
+    VP8IteratorNzToBytes(it); // re-import the non-zero context
+
+    VP8InitResidual(0, 2, enc, &res);
+    for (ch = 0; ch <= 2; ch += 2) {
+        for (y = 0; y < 2; ++y) {
+            for (x = 0; x < 2; ++x) {
+                const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+                VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+                R += VP8GetResidualCost(ctx, &res);
+                it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0);
+            }
+        }
+    }
+    return R;
+}
+
+//------------------------------------------------------------------------------
+// Recording of token probabilities.
+
+// Record proba context used
+static int Record(int bit, proba_t* const stats) {
+    proba_t p = *stats;
+    if (p >= 0xffff0000u) {                // an overflow is inbound.
+        p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2.
+    }
+    // record bit count (lower 16 bits) and increment total count (upper 16 bits).
+    p += 0x00010000u + bit;
+    *stats = p;
+    return bit;
+}
+
+// We keep the table-free variant around for reference, in case.
+#define USE_LEVEL_CODE_TABLE
+
+// Simulate block coding, but only record statistics.
+// Note: no need to record the fixed probas.
+int VP8RecordCoeffs(int ctx, const VP8Residual* const res) {
+    int n = res->first;
+    // should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1
+    proba_t* s = res->stats[n][ctx];
+    if (res->last < 0) {
+        Record(0, s + 0);
+        return 0;
+    }
+    while (n <= res->last) {
+        int v;
+        Record(1, s + 0); // order of record doesn't matter
+        while ((v = res->coeffs[n++]) == 0) {
+            Record(0, s + 1);
+            s = res->stats[VP8EncBands[n]][0];
+        }
+        Record(1, s + 1);
+        if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1
+            s = res->stats[VP8EncBands[n]][1];
+        } else {
+            v = abs(v);
+#if !defined(USE_LEVEL_CODE_TABLE)
+            if (!Record(v > 4, s + 3)) {
+                if (Record(v != 2, s + 4)) Record(v == 4, s + 5);
+            } else if (!Record(v > 10, s + 6)) {
+                Record(v > 6, s + 7);
+            } else if (!Record((v >= 3 + (8 << 2)), s + 8)) {
+                Record((v >= 3 + (8 << 1)), s + 9);
+            } else {
+                Record((v >= 3 + (8 << 3)), s + 10);
+            }
+#else
+            if (v > MAX_VARIABLE_LEVEL) {
+                v = MAX_VARIABLE_LEVEL;
+            }
+
+            {
+                const int bits = VP8LevelCodes[v - 1][1];
+                int pattern = VP8LevelCodes[v - 1][0];
+                int i;
+                for (i = 0; (pattern >>= 1) != 0; ++i) {
+                    const int mask = 2 << i;
+                    if (pattern & 1) Record(!!(bits & mask), s + 3 + i);
+                }
+            }
+#endif
+            s = res->stats[VP8EncBands[n]][2];
+        }
+    }
+    if (n < 16) Record(0, s + 0);
+    return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/cost.h b/codec/L2/demos/webpEnc/host/src/enc/cost.h
new file mode 100644
index 0000000000..50d5970177
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/cost.h
@@ -0,0 +1,66 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Cost tables for level and modes.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_ENC_COST_H_
+#define WEBP_ENC_COST_H_
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./vp8enci.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// On-the-fly info about the current set of residuals. Handy to avoid
+// passing zillions of params.
+typedef struct VP8Residual VP8Residual;
+struct VP8Residual {
+    int first;
+    int last;
+    const int16_t* coeffs;
+
+    int coeff_type;
+    ProbaArray* prob;
+    StatsArray* stats;
+    CostArrayPtr costs;
+};
+
+void VP8InitResidual(int first, int coeff_type, VP8Encoder* const enc, VP8Residual* const res);
+
+int VP8RecordCoeffs(int ctx, const VP8Residual* const res);
+
+// Cost of coding one event with probability 'proba'.
+static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
+    return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba];
+}
+
+// Level cost calculations
+extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
+void VP8CalculateLevelCosts(VP8EncProba* const proba);
+static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) {
+    return VP8LevelFixedCosts[level] + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level];
+}
+
+// Mode costs
+extern const uint16_t VP8FixedCostsUV[4];
+extern const uint16_t VP8FixedCostsI16[4];
+extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES];
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_ENC_COST_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/enc/delta_palettization.c b/codec/L2/demos/webpEnc/host/src/enc/delta_palettization.c
new file mode 100644
index 0000000000..82079c3933
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/delta_palettization.c
@@ -0,0 +1,266 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Mislav Bradac (mislavm@google.com)
+//
+
+#include "./delta_palettization.h"
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "../webp/types.h"
+#include "../dsp/lossless.h"
+
+#define MK_COL(r, g, b) (((r) << 16) + ((g) << 8) + (b))
+
+// Format allows palette up to 256 entries, but more palette entries produce
+// bigger entropy. In the future it will probably be useful to add more entries
+// that are far from the origin of the palette or choose remaining entries
+// dynamically.
+#define DELTA_PALETTE_SIZE 226
+
+// Palette used for delta_palettization. Entries are roughly sorted by distance
+// of their signed equivalents from the origin.
+static const uint32_t kDeltaPalette[DELTA_PALETTE_SIZE] = {
+    MK_COL(0u, 0u, 0u),       MK_COL(255u, 255u, 255u), MK_COL(1u, 1u, 1u),       MK_COL(254u, 254u, 254u),
+    MK_COL(2u, 2u, 2u),       MK_COL(4u, 4u, 4u),       MK_COL(252u, 252u, 252u), MK_COL(250u, 0u, 0u),
+    MK_COL(0u, 250u, 0u),     MK_COL(0u, 0u, 250u),     MK_COL(6u, 0u, 0u),       MK_COL(0u, 6u, 0u),
+    MK_COL(0u, 0u, 6u),       MK_COL(0u, 0u, 248u),     MK_COL(0u, 0u, 8u),       MK_COL(0u, 248u, 0u),
+    MK_COL(0u, 248u, 248u),   MK_COL(0u, 248u, 8u),     MK_COL(0u, 8u, 0u),       MK_COL(0u, 8u, 248u),
+    MK_COL(0u, 8u, 8u),       MK_COL(8u, 8u, 8u),       MK_COL(248u, 0u, 0u),     MK_COL(248u, 0u, 248u),
+    MK_COL(248u, 0u, 8u),     MK_COL(248u, 248u, 0u),   MK_COL(248u, 8u, 0u),     MK_COL(8u, 0u, 0u),
+    MK_COL(8u, 0u, 248u),     MK_COL(8u, 0u, 8u),       MK_COL(8u, 248u, 0u),     MK_COL(8u, 8u, 0u),
+    MK_COL(23u, 23u, 23u),    MK_COL(13u, 13u, 13u),    MK_COL(232u, 232u, 232u), MK_COL(244u, 244u, 244u),
+    MK_COL(245u, 245u, 250u), MK_COL(50u, 50u, 50u),    MK_COL(204u, 204u, 204u), MK_COL(236u, 236u, 236u),
+    MK_COL(16u, 16u, 16u),    MK_COL(240u, 16u, 16u),   MK_COL(16u, 240u, 16u),   MK_COL(240u, 240u, 16u),
+    MK_COL(16u, 16u, 240u),   MK_COL(240u, 16u, 240u),  MK_COL(16u, 240u, 240u),  MK_COL(240u, 240u, 240u),
+    MK_COL(0u, 0u, 232u),     MK_COL(0u, 232u, 0u),     MK_COL(232u, 0u, 0u),     MK_COL(0u, 0u, 24u),
+    MK_COL(0u, 24u, 0u),      MK_COL(24u, 0u, 0u),      MK_COL(32u, 32u, 32u),    MK_COL(224u, 32u, 32u),
+    MK_COL(32u, 224u, 32u),   MK_COL(224u, 224u, 32u),  MK_COL(32u, 32u, 224u),   MK_COL(224u, 32u, 224u),
+    MK_COL(32u, 224u, 224u),  MK_COL(224u, 224u, 224u), MK_COL(0u, 0u, 176u),     MK_COL(0u, 0u, 80u),
+    MK_COL(0u, 176u, 0u),     MK_COL(0u, 176u, 176u),   MK_COL(0u, 176u, 80u),    MK_COL(0u, 80u, 0u),
+    MK_COL(0u, 80u, 176u),    MK_COL(0u, 80u, 80u),     MK_COL(176u, 0u, 0u),     MK_COL(176u, 0u, 176u),
+    MK_COL(176u, 0u, 80u),    MK_COL(176u, 176u, 0u),   MK_COL(176u, 80u, 0u),    MK_COL(80u, 0u, 0u),
+    MK_COL(80u, 0u, 176u),    MK_COL(80u, 0u, 80u),     MK_COL(80u, 176u, 0u),    MK_COL(80u, 80u, 0u),
+    MK_COL(0u, 0u, 152u),     MK_COL(0u, 0u, 104u),     MK_COL(0u, 152u, 0u),     MK_COL(0u, 152u, 152u),
+    MK_COL(0u, 152u, 104u),   MK_COL(0u, 104u, 0u),     MK_COL(0u, 104u, 152u),   MK_COL(0u, 104u, 104u),
+    MK_COL(152u, 0u, 0u),     MK_COL(152u, 0u, 152u),   MK_COL(152u, 0u, 104u),   MK_COL(152u, 152u, 0u),
+    MK_COL(152u, 104u, 0u),   MK_COL(104u, 0u, 0u),     MK_COL(104u, 0u, 152u),   MK_COL(104u, 0u, 104u),
+    MK_COL(104u, 152u, 0u),   MK_COL(104u, 104u, 0u),   MK_COL(216u, 216u, 216u), MK_COL(216u, 216u, 40u),
+    MK_COL(216u, 216u, 176u), MK_COL(216u, 216u, 80u),  MK_COL(216u, 40u, 216u),  MK_COL(216u, 40u, 40u),
+    MK_COL(216u, 40u, 176u),  MK_COL(216u, 40u, 80u),   MK_COL(216u, 176u, 216u), MK_COL(216u, 176u, 40u),
+    MK_COL(216u, 176u, 176u), MK_COL(216u, 176u, 80u),  MK_COL(216u, 80u, 216u),  MK_COL(216u, 80u, 40u),
+    MK_COL(216u, 80u, 176u),  MK_COL(216u, 80u, 80u),   MK_COL(40u, 216u, 216u),  MK_COL(40u, 216u, 40u),
+    MK_COL(40u, 216u, 176u),  MK_COL(40u, 216u, 80u),   MK_COL(40u, 40u, 216u),   MK_COL(40u, 40u, 40u),
+    MK_COL(40u, 40u, 176u),   MK_COL(40u, 40u, 80u),    MK_COL(40u, 176u, 216u),  MK_COL(40u, 176u, 40u),
+    MK_COL(40u, 176u, 176u),  MK_COL(40u, 176u, 80u),   MK_COL(40u, 80u, 216u),   MK_COL(40u, 80u, 40u),
+    MK_COL(40u, 80u, 176u),   MK_COL(40u, 80u, 80u),    MK_COL(80u, 216u, 216u),  MK_COL(80u, 216u, 40u),
+    MK_COL(80u, 216u, 176u),  MK_COL(80u, 216u, 80u),   MK_COL(80u, 40u, 216u),   MK_COL(80u, 40u, 40u),
+    MK_COL(80u, 40u, 176u),   MK_COL(80u, 40u, 80u),    MK_COL(80u, 176u, 216u),  MK_COL(80u, 176u, 40u),
+    MK_COL(80u, 176u, 176u),  MK_COL(80u, 176u, 80u),   MK_COL(80u, 80u, 216u),   MK_COL(80u, 80u, 40u),
+    MK_COL(80u, 80u, 176u),   MK_COL(80u, 80u, 80u),    MK_COL(0u, 0u, 192u),     MK_COL(0u, 0u, 64u),
+    MK_COL(0u, 0u, 128u),     MK_COL(0u, 192u, 0u),     MK_COL(0u, 192u, 192u),   MK_COL(0u, 192u, 64u),
+    MK_COL(0u, 192u, 128u),   MK_COL(0u, 64u, 0u),      MK_COL(0u, 64u, 192u),    MK_COL(0u, 64u, 64u),
+    MK_COL(0u, 64u, 128u),    MK_COL(0u, 128u, 0u),     MK_COL(0u, 128u, 192u),   MK_COL(0u, 128u, 64u),
+    MK_COL(0u, 128u, 128u),   MK_COL(176u, 216u, 216u), MK_COL(176u, 216u, 40u),  MK_COL(176u, 216u, 176u),
+    MK_COL(176u, 216u, 80u),  MK_COL(176u, 40u, 216u),  MK_COL(176u, 40u, 40u),   MK_COL(176u, 40u, 176u),
+    MK_COL(176u, 40u, 80u),   MK_COL(176u, 176u, 216u), MK_COL(176u, 176u, 40u),  MK_COL(176u, 176u, 176u),
+    MK_COL(176u, 176u, 80u),  MK_COL(176u, 80u, 216u),  MK_COL(176u, 80u, 40u),   MK_COL(176u, 80u, 176u),
+    MK_COL(176u, 80u, 80u),   MK_COL(192u, 0u, 0u),     MK_COL(192u, 0u, 192u),   MK_COL(192u, 0u, 64u),
+    MK_COL(192u, 0u, 128u),   MK_COL(192u, 192u, 0u),   MK_COL(192u, 192u, 192u), MK_COL(192u, 192u, 64u),
+    MK_COL(192u, 192u, 128u), MK_COL(192u, 64u, 0u),    MK_COL(192u, 64u, 192u),  MK_COL(192u, 64u, 64u),
+    MK_COL(192u, 64u, 128u),  MK_COL(192u, 128u, 0u),   MK_COL(192u, 128u, 192u), MK_COL(192u, 128u, 64u),
+    MK_COL(192u, 128u, 128u), MK_COL(64u, 0u, 0u),      MK_COL(64u, 0u, 192u),    MK_COL(64u, 0u, 64u),
+    MK_COL(64u, 0u, 128u),    MK_COL(64u, 192u, 0u),    MK_COL(64u, 192u, 192u),  MK_COL(64u, 192u, 64u),
+    MK_COL(64u, 192u, 128u),  MK_COL(64u, 64u, 0u),     MK_COL(64u, 64u, 192u),   MK_COL(64u, 64u, 64u),
+    MK_COL(64u, 64u, 128u),   MK_COL(64u, 128u, 0u),    MK_COL(64u, 128u, 192u),  MK_COL(64u, 128u, 64u),
+    MK_COL(64u, 128u, 128u),  MK_COL(128u, 0u, 0u),     MK_COL(128u, 0u, 192u),   MK_COL(128u, 0u, 64u),
+    MK_COL(128u, 0u, 128u),   MK_COL(128u, 192u, 0u),   MK_COL(128u, 192u, 192u), MK_COL(128u, 192u, 64u),
+    MK_COL(128u, 192u, 128u), MK_COL(128u, 64u, 0u),    MK_COL(128u, 64u, 192u),  MK_COL(128u, 64u, 64u),
+    MK_COL(128u, 64u, 128u),  MK_COL(128u, 128u, 0u),   MK_COL(128u, 128u, 192u), MK_COL(128u, 128u, 64u),
+    MK_COL(128u, 128u, 128u),
+};
+
+#undef MK_COL
+
+//------------------------------------------------------------------------------
+// TODO(skal): move the functions to dsp/lossless.c when the correct
+// granularity is found. For now, we'll just copy-paste some useful bits
+// here instead.
+
+// In-place sum of each component with mod 256.
+static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
+    const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
+    const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
+    *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+static WEBP_INLINE uint32_t Clip255(uint32_t a) {
+    if (a < 256) {
+        return a;
+    }
+    // return 0, when a is a negative integer.
+    // return 255, when a is positive.
+    return ~a >> 24;
+}
+
+// Delta palettization functions.
+static WEBP_INLINE int Square(int x) {
+    return x * x;
+}
+
+static WEBP_INLINE uint32_t Intensity(uint32_t a) {
+    return 30 * ((a >> 16) & 0xff) + 59 * ((a >> 8) & 0xff) + 11 * ((a >> 0) & 0xff);
+}
+
+static uint32_t CalcDist(uint32_t predicted_value, uint32_t actual_value, uint32_t palette_entry) {
+    int i;
+    uint32_t distance = 0;
+    AddPixelsEq(&predicted_value, palette_entry);
+    for (i = 0; i < 32; i += 8) {
+        const int32_t av = (actual_value >> i) & 0xff;
+        const int32_t pv = (predicted_value >> i) & 0xff;
+        distance += Square(pv - av);
+    }
+    // We sum square of intensity difference with factor 10, but because Intensity
+    // returns 100 times real intensity we need to multiply differences of colors
+    // by 1000.
+    distance *= 1000u;
+    distance += Square(Intensity(predicted_value) - Intensity(actual_value));
+    return distance;
+}
+
+static uint32_t Predict(int x, int y, uint32_t* image) {
+    const uint32_t t = (y == 0) ? ARGB_BLACK : image[x];
+    const uint32_t l = (x == 0) ? ARGB_BLACK : image[x - 1];
+    const uint32_t p = (((((t >> 24) & 0xff) + ((l >> 24) & 0xff)) / 2) << 24) +
+                       (((((t >> 16) & 0xff) + ((l >> 16) & 0xff)) / 2) << 16) +
+                       (((((t >> 8) & 0xff) + ((l >> 8) & 0xff)) / 2) << 8) +
+                       (((((t >> 0) & 0xff) + ((l >> 0) & 0xff)) / 2) << 0);
+    if (x == 0 && y == 0) return ARGB_BLACK;
+    if (x == 0) return t;
+    if (y == 0) return l;
+    return p;
+}
+
+static WEBP_INLINE int AddSubtractComponentFullWithCoefficient(int a, int b, int c) {
+    return Clip255(a + ((b - c) >> 2));
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFullWithCoefficient(uint32_t c0, uint32_t c1, uint32_t c2) {
+    const int a = AddSubtractComponentFullWithCoefficient(c0 >> 24, c1 >> 24, c2 >> 24);
+    const int r = AddSubtractComponentFullWithCoefficient((c0 >> 16) & 0xff, (c1 >> 16) & 0xff, (c2 >> 16) & 0xff);
+    const int g = AddSubtractComponentFullWithCoefficient((c0 >> 8) & 0xff, (c1 >> 8) & 0xff, (c2 >> 8) & 0xff);
+    const int b = AddSubtractComponentFullWithCoefficient(c0 & 0xff, c1 & 0xff, c2 & 0xff);
+    return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
+}
+
+//------------------------------------------------------------------------------
+
+// Find palette entry with minimum error from difference of actual pixel value
+// and predicted pixel value. Propagate error of pixel to its top and left pixel
+// in src array. Write predicted_value + palette_entry to new_image. Return
+// index of best palette entry.
+static int FindBestPaletteEntry(uint32_t src, uint32_t predicted_value, const uint32_t palette[], int palette_size) {
+    int i;
+    int idx = 0;
+    uint32_t best_distance = CalcDist(predicted_value, src, palette[0]);
+    for (i = 1; i < palette_size; ++i) {
+        const uint32_t distance = CalcDist(predicted_value, src, palette[i]);
+        if (distance < best_distance) {
+            best_distance = distance;
+            idx = i;
+        }
+    }
+    return idx;
+}
+
+static void ApplyBestPaletteEntry(
+    int x, int y, uint32_t new_value, uint32_t palette_value, uint32_t* src, int src_stride, uint32_t* new_image) {
+    AddPixelsEq(&new_value, palette_value);
+    if (x > 0) {
+        src[x - 1] = ClampedAddSubtractFullWithCoefficient(src[x - 1], new_value, src[x]);
+    }
+    if (y > 0) {
+        src[x - src_stride] = ClampedAddSubtractFullWithCoefficient(src[x - src_stride], new_value, src[x]);
+    }
+    new_image[x] = new_value;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point
+
+static WebPEncodingError ApplyDeltaPalette(uint32_t* src,
+                                           uint32_t* dst,
+                                           uint32_t src_stride,
+                                           uint32_t dst_stride,
+                                           const uint32_t* palette,
+                                           int palette_size,
+                                           int width,
+                                           int height,
+                                           int num_passes) {
+    int x, y;
+    WebPEncodingError err = VP8_ENC_OK;
+    uint32_t* new_image = (uint32_t*)WebPSafeMalloc(width, sizeof(*new_image));
+    uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row));
+    if (new_image == NULL || tmp_row == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    while (num_passes--) {
+        uint32_t* cur_src = src;
+        uint32_t* cur_dst = dst;
+        for (y = 0; y < height; ++y) {
+            for (x = 0; x < width; ++x) {
+                const uint32_t predicted_value = Predict(x, y, new_image);
+                tmp_row[x] = FindBestPaletteEntry(cur_src[x], predicted_value, palette, palette_size);
+                ApplyBestPaletteEntry(x, y, predicted_value, palette[tmp_row[x]], cur_src, src_stride, new_image);
+            }
+            for (x = 0; x < width; ++x) {
+                cur_dst[x] = palette[tmp_row[x]];
+            }
+            cur_src += src_stride;
+            cur_dst += dst_stride;
+        }
+    }
+Error:
+    WebPSafeFree(new_image);
+    WebPSafeFree(tmp_row);
+    return err;
+}
+
+// replaces enc->argb_ by a palettizable approximation of it,
+// and generates optimal enc->palette_[]
+WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) {
+    const WebPPicture* const pic = enc->pic_;
+    uint32_t* src = pic->argb;
+    uint32_t* dst = enc->argb_;
+    const int width = pic->width;
+    const int height = pic->height;
+
+    WebPEncodingError err = VP8_ENC_OK;
+    memcpy(enc->palette_, kDeltaPalette, sizeof(kDeltaPalette));
+    enc->palette_[DELTA_PALETTE_SIZE - 1] = src[0] - 0xff000000u;
+    enc->palette_size_ = DELTA_PALETTE_SIZE;
+    err = ApplyDeltaPalette(src, dst, pic->argb_stride, enc->current_width_, enc->palette_, enc->palette_size_, width,
+                            height, 2);
+    if (err != VP8_ENC_OK) goto Error;
+
+Error:
+    return err;
+}
+
+#else // !WEBP_EXPERIMENTAL_FEATURES
+
+WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) {
+    (void)enc;
+    return VP8_ENC_ERROR_INVALID_CONFIGURATION;
+}
+
+#endif // WEBP_EXPERIMENTAL_FEATURES
diff --git a/codec/L2/demos/webpEnc/host/src/enc/delta_palettization.h b/codec/L2/demos/webpEnc/host/src/enc/delta_palettization.h
new file mode 100644
index 0000000000..f91b5e8ff8
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/delta_palettization.h
@@ -0,0 +1,25 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Mislav Bradac (mislavm@google.com)
+//
+
+#ifndef WEBP_ENC_DELTA_PALETTIZATION_H_
+#define WEBP_ENC_DELTA_PALETTIZATION_H_
+
+#include "../webp/encode.h"
+#include "../enc/vp8li.h"
+
+// Replaces enc->argb_[] input by a palettizable approximation of it,
+// and generates optimal enc->palette_[].
+// This function can revert enc->use_palette_ / enc->use_predict_ flag
+// if delta-palettization is not producing expected saving.
+WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc);
+
+#endif // WEBP_ENC_DELTA_PALETTIZATION_H_
diff --git a/codec/L2/demos/webpEnc/host/src/enc/filter.c b/codec/L2/demos/webpEnc/host/src/enc/filter.c
new file mode 100644
index 0000000000..ed28cc8f61
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/filter.c
@@ -0,0 +1,336 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Selecting filter level
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include <assert.h>
+#include "./vp8enci.h"
+#include "../dsp/dsp.h"
+#include "../utils/profiling.h"
+
+// This table gives, for a given sharpness, the filtering strength to be
+// used (at least) in order to filter a given edge step delta.
+// This is constructed by brute force inspection: for all delta, we iterate
+// over all possible filtering strength / thresh until needs_filter() returns
+// true.
+#define MAX_DELTA_SIZE 64
+static const uint8_t kLevelsFromDelta[8][MAX_DELTA_SIZE] = {
+    {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+     22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+     44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63},
+    {0,  1,  2,  3,  5,  6,  7,  8,  9,  11, 12, 13, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27,
+     29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, 45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60,
+     62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63},
+    {0,  1,  2,  3,  5,  6,  7,  8,  9,  11, 12, 13, 14, 16, 17, 19, 20, 22, 23, 25, 26, 28,
+     29, 31, 32, 34, 35, 37, 38, 40, 41, 43, 44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61,
+     62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63},
+    {0,  1,  2,  3,  5,  6,  7,  8,  9,  11, 12, 13, 15, 16, 18, 19, 21, 22, 24, 25, 27, 28,
+     30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61,
+     63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63},
+    {0,  1,  2,  3,  5,  6,  7,  8,  9,  11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27, 29,
+     30, 32, 33, 35, 36, 38, 39, 41, 42, 44, 45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62,
+     63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63},
+    {0,  1,  2,  4,  5,  7,  8,  9,  11, 12, 13, 15, 16, 17, 19, 20, 22, 23, 25, 26, 28, 29,
+     31, 32, 34, 35, 37, 38, 40, 41, 43, 44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62,
+     63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63},
+    {0,  1,  2,  4,  5,  7,  8,  9,  11, 12, 13, 15, 16, 18, 19, 21, 22, 24, 25, 27, 28, 30,
+     31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63,
+     63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63},
+    {0,  1,  2,  4,  5,  7,  8,  9,  11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30,
+     32, 33, 35, 36, 38, 39, 41, 42, 44, 45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63,
+     63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63}};
+
+int VP8FilterStrengthFromDelta(int sharpness, int delta) {
+    const int pos = (delta < MAX_DELTA_SIZE) ? delta : MAX_DELTA_SIZE - 1;
+    assert(sharpness >= 0 && sharpness <= 7);
+    return kLevelsFromDelta[sharpness][pos];
+}
+
+//------------------------------------------------------------------------------
+// Paragraph 15.4: compute the inner-edge filtering strength
+
+static int GetILevel(int sharpness, int level) {
+    if (sharpness > 0) {
+        if (sharpness > 4) {
+            level >>= 2;
+        } else {
+            level >>= 1;
+        }
+        if (level > 9 - sharpness) {
+            level = 9 - sharpness;
+        }
+    }
+    if (level < 1) level = 1;
+    return level;
+}
+
+static void DoFilter(const VP8EncIterator* const it, int level) {
+    const VP8Encoder* const enc = it->enc_;
+    const int ilevel = GetILevel(enc->config_->filter_sharpness, level);
+    const int limit = 2 * level + ilevel;
+
+    uint8_t* const y_dst = it->yuv_out2_ + Y_OFF_ENC;
+    uint8_t* const u_dst = it->yuv_out2_ + U_OFF_ENC;
+    uint8_t* const v_dst = it->yuv_out2_ + V_OFF_ENC;
+
+    // copy current block to yuv_out2_
+    memcpy(y_dst, it->yuv_out_, YUV_SIZE_ENC * sizeof(uint8_t));
+
+    if (enc->filter_hdr_.simple_ == 1) { // simple
+        VP8SimpleHFilter16i(y_dst, BPS, limit);
+        VP8SimpleVFilter16i(y_dst, BPS, limit);
+    } else { // complex
+        const int hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+        VP8HFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
+        VP8HFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
+        VP8VFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
+        VP8VFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
+    }
+}
+
+//------------------------------------------------------------------------------
+// SSIM metric
+
+enum { KERNEL = 3 };
+static const double kMinValue = 1.e-10; // minimal threshold
+
+void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) {
+    dst->w += src->w;
+    dst->xm += src->xm;
+    dst->ym += src->ym;
+    dst->xxm += src->xxm;
+    dst->xym += src->xym;
+    dst->yym += src->yym;
+}
+
+static void VP8SSIMAccumulate(const uint8_t* src1,
+                              int stride1,
+                              const uint8_t* src2,
+                              int stride2,
+                              int xo,
+                              int yo,
+                              int W,
+                              int H,
+                              DistoStats* const stats) {
+    const int ymin = (yo - KERNEL < 0) ? 0 : yo - KERNEL;
+    const int ymax = (yo + KERNEL > H - 1) ? H - 1 : yo + KERNEL;
+    const int xmin = (xo - KERNEL < 0) ? 0 : xo - KERNEL;
+    const int xmax = (xo + KERNEL > W - 1) ? W - 1 : xo + KERNEL;
+    int x, y;
+    src1 += ymin * stride1;
+    src2 += ymin * stride2;
+    for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
+        for (x = xmin; x <= xmax; ++x) {
+            const int s1 = src1[x];
+            const int s2 = src2[x];
+            stats->w += 1;
+            stats->xm += s1;
+            stats->ym += s2;
+            stats->xxm += s1 * s1;
+            stats->xym += s1 * s2;
+            stats->yym += s2 * s2;
+        }
+    }
+}
+
+double VP8SSIMGet(const DistoStats* const stats) {
+    const double xmxm = stats->xm * stats->xm;
+    const double ymym = stats->ym * stats->ym;
+    const double xmym = stats->xm * stats->ym;
+    const double w2 = stats->w * stats->w;
+    double sxx = stats->xxm * stats->w - xmxm;
+    double syy = stats->yym * stats->w - ymym;
+    double sxy = stats->xym * stats->w - xmym;
+    double C1, C2;
+    double fnum;
+    double fden;
+    // small errors are possible, due to rounding. Clamp to zero.
+    if (sxx < 0.) sxx = 0.;
+    if (syy < 0.) syy = 0.;
+    C1 = 6.5025 * w2;
+    C2 = 58.5225 * w2;
+    fnum = (2 * xmym + C1) * (2 * sxy + C2);
+    fden = (xmxm + ymym + C1) * (sxx + syy + C2);
+    return (fden != 0.) ? fnum / fden : kMinValue;
+}
+
+double VP8SSIMGetSquaredError(const DistoStats* const s) {
+    if (s->w > 0.) {
+        const double iw2 = 1. / (s->w * s->w);
+        const double sxx = s->xxm * s->w - s->xm * s->xm;
+        const double syy = s->yym * s->w - s->ym * s->ym;
+        const double sxy = s->xym * s->w - s->xm * s->ym;
+        const double SSE = iw2 * (sxx + syy - 2. * sxy);
+        if (SSE > kMinValue) return SSE;
+    }
+    return kMinValue;
+}
+
+void VP8SSIMAccumulatePlane(
+    const uint8_t* src1, int stride1, const uint8_t* src2, int stride2, int W, int H, DistoStats* const stats) {
+    int x, y;
+    for (y = 0; y < H; ++y) {
+        for (x = 0; x < W; ++x) {
+            VP8SSIMAccumulate(src1, stride1, src2, stride2, x, y, W, H, stats);
+        }
+    }
+}
+
+static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
+    int x, y;
+    DistoStats s = {.0, .0, .0, .0, .0, .0};
+
+    // compute SSIM in a 10 x 10 window
+    for (x = 3; x < 13; x++) {
+        for (y = 3; y < 13; y++) {
+            VP8SSIMAccumulate(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, x, y, 16, 16, &s);
+        }
+    }
+    for (x = 1; x < 7; x++) {
+        for (y = 1; y < 7; y++) {
+            VP8SSIMAccumulate(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, x, y, 8, 8, &s);
+            VP8SSIMAccumulate(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, x, y, 8, 8, &s);
+        }
+    }
+    return VP8SSIMGet(&s);
+}
+
+//------------------------------------------------------------------------------
+// Exposed APIs: Encoder should call the following 3 functions to adjust
+// loop filter strength
+
+void VP8InitFilter(VP8EncIterator* const it) {
+    if (it->lf_stats_ != NULL) {
+        // fprintf(stderr, "VP8InitFilter\n");
+        int s, i;
+        for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+            for (i = 0; i < MAX_LF_LEVELS; i++) {
+                (*it->lf_stats_)[s][i] = 0;
+            }
+        }
+    }
+}
+
+void VP8StoreFilterStats(VP8EncIterator* const it) {
+    int d;
+    VP8Encoder* const enc = it->enc_;
+    const int s = it->mb_->segment_;
+    const int level0 = enc->dqm_[s].fstrength_;
+
+    // explore +/-quant range of values around level0
+    const int delta_min = -enc->dqm_[s].quant_;
+    const int delta_max = enc->dqm_[s].quant_;
+    const int step_size = (delta_max - delta_min >= 4) ? 4 : 1;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    // fprintf(stderr, "lf_stats_ == NULL\n");
+    if (it->lf_stats_ == NULL) return;
+
+    // NOTE: Currently we are applying filter only across the sublock edges
+    // There are two reasons for that.
+    // 1. Applying filter on macro block edges will change the pixels in
+    // the left and top macro blocks. That will be hard to restore
+    // 2. Macro Blocks on the bottom and right are not yet compressed. So we
+    // cannot apply filter on the right and bottom macro block edges.
+    // fprintf(stderr, "it->mb_->type_ == 1 && it->mb_->skip_\n");
+    if (it->mb_->type_ == 1 && it->mb_->skip_) return;
+
+    // Always try filter level  zero
+    (*it->lf_stats_)[s][0] += GetMBSSIM(it->yuv_in_, it->yuv_out_);
+
+    for (d = delta_min; d <= delta_max; d += step_size) {
+        const int level = level0 + d;
+        if (level <= 0 || level >= MAX_LF_LEVELS) {
+            // fprintf(stderr, "continue\n");
+            continue;
+        }
+        // fprintf(stderr, "DoFilter\n");
+        DoFilter(it, level);
+        (*it->lf_stats_)[s][level] += GetMBSSIM(it->yuv_in_, it->yuv_out2_);
+    }
+    StopProfiling(&stop_watch, &timeStoreFilterSts, &countStoreFilterSts);
+}
+
+void VP8AdjustFilterStrength(VP8EncIterator* const it) {
+    VP8Encoder* const enc = it->enc_;
+    if (it->lf_stats_ != NULL) {
+        // fprintf(stderr, "lf_stats_\n");
+        int s;
+        for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+            int i, best_level = 0;
+            // Improvement over filter level 0 should be at least 1e-5 (relatively)
+            double best_v = 1.00001 * (*it->lf_stats_)[s][0];
+            for (i = 1; i < MAX_LF_LEVELS; i++) {
+                const double v = (*it->lf_stats_)[s][i];
+                if (v > best_v) {
+                    best_v = v;
+                    best_level = i;
+                }
+            }
+            enc->dqm_[s].fstrength_ = best_level;
+        }
+    } else if (enc->config_->filter_strength > 0) {
+        // fprintf(stderr, "filter_strength\n");
+        int max_level = 0;
+        int s;
+        for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+            VP8SegmentInfo* const dqm = &enc->dqm_[s];
+            // this '>> 3' accounts for some inverse WHT scaling
+            const int delta = (dqm->max_edge_ * dqm->y2_.q_[1]) >> 3;
+            const int level = VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, delta);
+            if (level > dqm->fstrength_) {
+                dqm->fstrength_ = level;
+            }
+            if (max_level < dqm->fstrength_) {
+                max_level = dqm->fstrength_;
+            }
+        }
+        enc->filter_hdr_.level_ = max_level;
+    }
+}
+
+void VP8AdjustFilterStrengthOcl(VP8Encoder* const enc) {
+    if (enc->lf_stats_ != NULL) {
+        int s;
+        for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+            int i, best_level = 0;
+            // Improvement over filter level 0 should be at least 1e-5 (relatively)
+            double best_v = 1.00001 * (*enc->lf_stats_)[s][0];
+            for (i = 1; i < MAX_LF_LEVELS; i++) {
+                const double v = (*enc->lf_stats_)[s][i];
+                if (v > best_v) {
+                    best_v = v;
+                    best_level = i;
+                }
+            }
+            enc->dqm_[s].fstrength_ = best_level;
+        }
+    } else if (enc->config_->filter_strength > 0) {
+        int max_level = 0;
+        int s;
+        for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+            VP8SegmentInfo* const dqm = &enc->dqm_[s];
+            // this '>> 3' accounts for some inverse WHT scaling
+            const int delta = (dqm->max_edge_ * dqm->y2_.q_[1]) >> 3;
+            const int level = VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, delta);
+            if (level > dqm->fstrength_) {
+                dqm->fstrength_ = level;
+            }
+            if (max_level < dqm->fstrength_) {
+                max_level = dqm->fstrength_;
+            }
+        }
+        enc->filter_hdr_.level_ = max_level;
+    }
+}
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/frame.c b/codec/L2/demos/webpEnc/host/src/enc/frame.c
new file mode 100644
index 0000000000..875b6ad2f1
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/frame.c
@@ -0,0 +1,1301 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   frame coding and analysis
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <hls_stream.h>
+#include <ap_int.h>
+#include "vp8_hls_syn.h"
+
+#include <string.h>
+#include <math.h>
+
+#include "./cost.h"
+#include "./vp8enci.h"
+#include "../dsp/dsp.h"
+#include "../webp/format_constants.h" // RIFF constants
+#include "../utils/profiling.h"
+#include "../../host/create_kernel.h"
+
+#define SEGMENT_VISU 0
+#define DEBUG_SEARCH 0 // useful to track search convergence
+
+//------------------------------------------------------------------------------
+// multi-pass convergence
+
+#define HEADER_SIZE_ESTIMATE (RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8_FRAME_HEADER_SIZE)
+#define DQ_LIMIT 0.4 // convergence is considered reached if dq < DQ_LIMIT
+// we allow 2k of extra head-room in PARTITION0 limit.
+#define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11)
+
+typedef struct { // struct for organizing convergence in either size or PSNR
+    int is_first;
+    float dq;
+    float q, last_q;
+    double value, last_value; // PSNR or size
+    double target;
+    int do_size_search;
+} PassStats;
+
+static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) {
+    const uint64_t target_size = (uint64_t)enc->config_->target_size;
+    const int do_size_search = (target_size != 0);
+    const float target_PSNR = enc->config_->target_PSNR;
+
+    s->is_first = 1;
+    s->dq = 10.f;
+    s->q = s->last_q = enc->config_->quality;
+    s->target = do_size_search ? (double)target_size : (target_PSNR > 0.) ? target_PSNR : 40.; // default, just in case
+    s->value = s->last_value = 0.;
+    s->do_size_search = do_size_search;
+    return do_size_search;
+}
+
+static float Clamp(float v, float min, float max) {
+    return (v < min) ? min : (v > max) ? max : v;
+}
+
+static float ComputeNextQ(PassStats* const s) {
+    float dq;
+    if (s->is_first) {
+        dq = (s->value > s->target) ? -s->dq : s->dq;
+        s->is_first = 0;
+    } else if (s->value != s->last_value) {
+        const double slope = (s->target - s->value) / (s->last_value - s->value);
+        dq = (float)(slope * (s->last_q - s->q));
+    } else {
+        dq = 0.; // we're done?!
+    }
+    // Limit variable to avoid large swings.
+    s->dq = Clamp(dq, -30.f, 30.f);
+    s->last_q = s->q;
+    s->last_value = s->value;
+    s->q = Clamp(s->q + s->dq, 0.f, 100.f);
+    return s->q;
+}
+
+//------------------------------------------------------------------------------
+// Tables for level coding
+
+const uint8_t VP8Cat3[] = {173, 148, 140};
+const uint8_t VP8Cat4[] = {176, 155, 140, 135};
+const uint8_t VP8Cat5[] = {180, 157, 141, 134, 130};
+const uint8_t VP8Cat6[] = {254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
+
+//------------------------------------------------------------------------------
+// Reset the statistics about: number of skips, token proba, level cost,...
+
+static void ResetStats(VP8Encoder* const enc) {
+    VP8EncProba* const proba = &enc->proba_;
+    VP8CalculateLevelCosts(proba);
+    proba->nb_skip_ = 0;
+}
+
+//------------------------------------------------------------------------------
+// Skip decision probability
+
+#define SKIP_PROBA_THRESHOLD 250 // value below which using skip_proba is OK.
+
+static int CalcSkipProba(uint64_t nb, uint64_t total) {
+    return (int)(total ? (total - nb) * 255 / total : 255);
+}
+
+// Returns the bit-cost for coding the skip probability.
+static int FinalizeSkipProba(VP8Encoder* const enc) {
+    VP8EncProba* const proba = &enc->proba_;
+    const int nb_mbs = enc->mb_w_ * enc->mb_h_;
+    const int nb_events = proba->nb_skip_;
+    int size;
+    proba->skip_proba_ = CalcSkipProba(nb_events, nb_mbs);
+    proba->use_skip_proba_ = (proba->skip_proba_ < SKIP_PROBA_THRESHOLD);
+    size = 256; // 'use_skip_proba' bit
+    if (proba->use_skip_proba_) {
+        size +=
+            nb_events * VP8BitCost(1, proba->skip_proba_) + (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba_);
+        size += 8 * 256; // cost of signaling the skip_proba_ itself.
+    }
+    return size;
+}
+
+// Collect statistics and deduce probabilities for next coding pass.
+// Return the total bit-cost for coding the probability updates.
+static int CalcTokenProba(int nb, int total) {
+    assert(nb <= total);
+    return nb ? (255 - nb * 255 / total) : 255;
+}
+
+// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability.
+static int BranchCost(int nb, int total, int proba) {
+    return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
+}
+
+static void ResetTokenStats(VP8Encoder* const enc) {
+    VP8EncProba* const proba = &enc->proba_;
+    memset(proba->stats_, 0, sizeof(proba->stats_));
+}
+
+static int FinalizeTokenProbas(VP8EncProba* const proba) {
+    int has_changed = 0;
+    int size = 0;
+    int t, b, c, p;
+    for (t = 0; t < NUM_TYPES; ++t) {
+        for (b = 0; b < NUM_BANDS; ++b) {
+            for (c = 0; c < NUM_CTX; ++c) {
+                for (p = 0; p < NUM_PROBAS; ++p) {
+                    const proba_t stats = proba->stats_[t][b][c][p];
+                    const int nb = (stats >> 0) & 0xffff;
+                    const int total = (stats >> 16) & 0xffff;
+                    const int update_proba = VP8CoeffsUpdateProba[t][b][c][p];
+                    const int old_p = VP8CoeffsProba0[t][b][c][p];
+                    const int new_p = CalcTokenProba(nb, total);
+                    const int old_cost = BranchCost(nb, total, old_p) + VP8BitCost(0, update_proba);
+                    const int new_cost = BranchCost(nb, total, new_p) + VP8BitCost(1, update_proba) + 8 * 256;
+                    const int use_new_p = (old_cost > new_cost);
+                    size += VP8BitCost(use_new_p, update_proba);
+                    if (use_new_p) { // only use proba that seem meaningful enough.
+                        proba->coeffs_[t][b][c][p] = new_p;
+                        has_changed |= (new_p != old_p);
+                        size += 8 * 256;
+                    } else {
+                        proba->coeffs_[t][b][c][p] = old_p;
+                    }
+                }
+            }
+        }
+    }
+    proba->dirty_ = has_changed;
+    return size;
+}
+
+//------------------------------------------------------------------------------
+// Finalize Segment probability based on the coding tree
+
+static int GetProba(int a, int b) {
+    const int total = a + b;
+    return (total == 0) ? 255                            // that's the default probability.
+                        : (255 * a + total / 2) / total; // rounded proba
+}
+
+static void SetSegmentProbas(VP8Encoder* const enc) {
+    int p[NUM_MB_SEGMENTS] = {0};
+    int n;
+
+    for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+        const VP8MBInfo* const mb = &enc->mb_info_[n];
+        p[mb->segment_]++;
+    }
+    if (enc->pic_->stats != NULL) {
+        for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
+            enc->pic_->stats->segment_size[n] = p[n];
+        }
+    }
+    if (enc->segment_hdr_.num_segments_ > 1) {
+        uint8_t* const probas = enc->proba_.segments_;
+        probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
+        probas[1] = GetProba(p[0], p[1]);
+        probas[2] = GetProba(p[2], p[3]);
+
+        enc->segment_hdr_.update_map_ = (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
+        enc->segment_hdr_.size_ = p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
+                                  p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
+                                  p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
+                                  p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
+    } else {
+        enc->segment_hdr_.update_map_ = 0;
+        enc->segment_hdr_.size_ = 0;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Coefficient coding
+
+static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
+    int n = res->first;
+    // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+    const uint8_t* p = res->prob[n][ctx];
+    if (!VP8PutBit(bw, res->last >= 0, p[0])) {
+        return 0;
+    }
+
+    while (n < 16) {
+        const int c = res->coeffs[n++];
+        const int sign = c < 0;
+        int v = sign ? -c : c;
+        if (!VP8PutBit(bw, v != 0, p[1])) {
+            p = res->prob[VP8EncBands[n]][0];
+            continue;
+        }
+        if (!VP8PutBit(bw, v > 1, p[2])) {
+            p = res->prob[VP8EncBands[n]][1];
+        } else {
+            if (!VP8PutBit(bw, v > 4, p[3])) {
+                if (VP8PutBit(bw, v != 2, p[4])) VP8PutBit(bw, v == 4, p[5]);
+            } else if (!VP8PutBit(bw, v > 10, p[6])) {
+                if (!VP8PutBit(bw, v > 6, p[7])) {
+                    VP8PutBit(bw, v == 6, 159);
+                } else {
+                    VP8PutBit(bw, v >= 9, 165);
+                    VP8PutBit(bw, !(v & 1), 145);
+                }
+            } else {
+                int mask;
+                const uint8_t* tab;
+                if (v < 3 + (8 << 1)) { // VP8Cat3  (3b)
+                    VP8PutBit(bw, 0, p[8]);
+                    VP8PutBit(bw, 0, p[9]);
+                    v -= 3 + (8 << 0);
+                    mask = 1 << 2;
+                    tab = VP8Cat3;
+                } else if (v < 3 + (8 << 2)) { // VP8Cat4  (4b)
+                    VP8PutBit(bw, 0, p[8]);
+                    VP8PutBit(bw, 1, p[9]);
+                    v -= 3 + (8 << 1);
+                    mask = 1 << 3;
+                    tab = VP8Cat4;
+                } else if (v < 3 + (8 << 3)) { // VP8Cat5  (5b)
+                    VP8PutBit(bw, 1, p[8]);
+                    VP8PutBit(bw, 0, p[10]);
+                    v -= 3 + (8 << 2);
+                    mask = 1 << 4;
+                    tab = VP8Cat5;
+                } else { // VP8Cat6 (11b)
+                    VP8PutBit(bw, 1, p[8]);
+                    VP8PutBit(bw, 1, p[10]);
+                    v -= 3 + (8 << 3);
+                    mask = 1 << 10;
+                    tab = VP8Cat6;
+                }
+                while (mask) {
+                    VP8PutBit(bw, !!(v & mask), *tab++);
+                    mask >>= 1;
+                }
+            }
+            p = res->prob[VP8EncBands[n]][2];
+        }
+        VP8PutBitUniform(bw, sign);
+        if (n == 16 || !VP8PutBit(bw, n <= res->last, p[0])) {
+            return 1; // EOB
+        }
+    }
+    return 1;
+}
+
+static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it, const VP8ModeScore* const rd) {
+    int x, y, ch;
+    VP8Residual res;
+    uint64_t pos1, pos2, pos3;
+    const int i16 = (it->mb_->type_ == 1);
+    const int segment = it->mb_->segment_;
+    VP8Encoder* const enc = it->enc_;
+
+    VP8IteratorNzToBytes(it);
+
+    pos1 = VP8BitWriterPos(bw);
+    if (i16) {
+        VP8InitResidual(0, 1, enc, &res);
+        VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+        it->top_nz_[8] = it->left_nz_[8] = PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res);
+        VP8InitResidual(1, 0, enc, &res);
+    } else {
+        VP8InitResidual(0, 3, enc, &res);
+    }
+
+    // luma-AC
+    for (y = 0; y < 4; ++y) {
+        for (x = 0; x < 4; ++x) {
+            const int ctx = it->top_nz_[x] + it->left_nz_[y];
+            VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+            it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res);
+        }
+    }
+    pos2 = VP8BitWriterPos(bw);
+
+    // U/V
+    VP8InitResidual(0, 2, enc, &res);
+    for (ch = 0; ch <= 2; ch += 2) {
+        for (y = 0; y < 2; ++y) {
+            for (x = 0; x < 2; ++x) {
+                const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+                VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+                it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = PutCoeffs(bw, ctx, &res);
+            }
+        }
+    }
+    pos3 = VP8BitWriterPos(bw);
+    it->luma_bits_ = pos2 - pos1;
+    it->uv_bits_ = pos3 - pos2;
+    it->bit_count_[segment][i16] += it->luma_bits_;
+    it->bit_count_[segment][2] += it->uv_bits_;
+    VP8IteratorBytesToNz(it);
+}
+
+// Same as CodeResiduals, but doesn't actually write anything.
+// Instead, it just records the event distribution.
+static void RecordResiduals(VP8EncIterator* const it, const VP8ModeScore* const rd) {
+    int x, y, ch;
+    VP8Residual res;
+    VP8Encoder* const enc = it->enc_;
+
+    VP8IteratorNzToBytes(it);
+
+    if (it->mb_->type_ == 1) { // i16x16
+        VP8InitResidual(0, 1, enc, &res);
+        VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+        it->top_nz_[8] = it->left_nz_[8] = VP8RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
+        VP8InitResidual(1, 0, enc, &res);
+    } else {
+        VP8InitResidual(0, 3, enc, &res);
+    }
+
+    // luma-AC
+    for (y = 0; y < 4; ++y) {
+        for (x = 0; x < 4; ++x) {
+            const int ctx = it->top_nz_[x] + it->left_nz_[y];
+            VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+            it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffs(ctx, &res);
+        }
+    }
+
+    // U/V
+    VP8InitResidual(0, 2, enc, &res);
+    for (ch = 0; ch <= 2; ch += 2) {
+        for (y = 0; y < 2; ++y) {
+            for (x = 0; x < 2; ++x) {
+                const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+                VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+                it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = VP8RecordCoeffs(ctx, &res);
+            }
+        }
+    }
+
+    VP8IteratorBytesToNz(it);
+}
+
+//------------------------------------------------------------------------------
+// Token buffer
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd, VP8TBuffer* const tokens) {
+    int x, y, ch;
+    VP8Residual res;
+    VP8Encoder* const enc = it->enc_;
+
+    VP8IteratorNzToBytes(it);
+    if (it->mb_->type_ == 1) { // i16x16
+        const int ctx = it->top_nz_[8] + it->left_nz_[8];
+        VP8InitResidual(0, 1, enc, &res);
+        VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+        it->top_nz_[8] = it->left_nz_[8] = VP8RecordCoeffTokens(ctx, 1, res.first, res.last, res.coeffs, tokens);
+        VP8RecordCoeffs(ctx, &res);
+        VP8InitResidual(1, 0, enc, &res);
+    } else {
+        VP8InitResidual(0, 3, enc, &res);
+    }
+
+    // luma-AC
+    for (y = 0; y < 4; ++y) {
+        for (x = 0; x < 4; ++x) {
+            const int ctx = it->top_nz_[x] + it->left_nz_[y];
+            VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+            it->top_nz_[x] = it->left_nz_[y] =
+                VP8RecordCoeffTokens(ctx, res.coeff_type, res.first, res.last, res.coeffs, tokens);
+            VP8RecordCoeffs(ctx, &res);
+        }
+    }
+
+    // U/V
+    VP8InitResidual(0, 2, enc, &res);
+    for (ch = 0; ch <= 2; ch += 2) {
+        for (y = 0; y < 2; ++y) {
+            for (x = 0; x < 2; ++x) {
+                const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+                VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+                it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+                    VP8RecordCoeffTokens(ctx, 2, res.first, res.last, res.coeffs, tokens);
+                VP8RecordCoeffs(ctx, &res);
+            }
+        }
+    }
+    VP8IteratorBytesToNz(it);
+    return !tokens->error_;
+}
+
+#endif // !DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// ExtraInfo map / Debug function
+
+#if SEGMENT_VISU
+static void SetBlock(uint8_t* p, int value, int size) {
+    int y;
+    for (y = 0; y < size; ++y) {
+        memset(p, value, size);
+        p += BPS;
+    }
+}
+#endif
+
+static void ResetSSE(VP8Encoder* const enc) {
+    enc->sse_[0] = 0;
+    enc->sse_[1] = 0;
+    enc->sse_[2] = 0;
+    // Note: enc->sse_[3] is managed by alpha.c
+    enc->sse_count_ = 0;
+}
+
+static void StoreSSE(const VP8EncIterator* const it) {
+    VP8Encoder* const enc = it->enc_;
+    const uint8_t* const in = it->yuv_in_;
+    const uint8_t* const out = it->yuv_out_;
+    // Note: not totally accurate at boundary. And doesn't include in-loop filter.
+    enc->sse_[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC);
+    enc->sse_[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC);
+    enc->sse_[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC);
+    enc->sse_count_ += 16 * 16;
+}
+
+static void StoreSideInfo(const VP8EncIterator* const it) {
+    VP8Encoder* const enc = it->enc_;
+    const VP8MBInfo* const mb = it->mb_;
+    WebPPicture* const pic = enc->pic_;
+
+    if (pic->stats != NULL) {
+        StoreSSE(it);
+        enc->block_count_[0] += (mb->type_ == 0);
+        enc->block_count_[1] += (mb->type_ == 1);
+        enc->block_count_[2] += (mb->skip_ != 0);
+    }
+
+    if (pic->extra_info != NULL) {
+        uint8_t* const info = &pic->extra_info[it->x_ + it->y_ * enc->mb_w_];
+        switch (pic->extra_info_type) {
+            case 1:
+                *info = mb->type_;
+                break;
+            case 2:
+                *info = mb->segment_;
+                break;
+            case 3:
+                *info = enc->dqm_[mb->segment_].quant_;
+                break;
+            case 4:
+                *info = (mb->type_ == 1) ? it->preds_[0] : 0xff;
+                break;
+            case 5:
+                *info = mb->uv_mode_;
+                break;
+            case 6: {
+                const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
+                *info = (b > 255) ? 255 : b;
+                break;
+            }
+            case 7:
+                *info = mb->alpha_;
+                break;
+            default:
+                *info = 0;
+                break;
+        }
+    }
+#if SEGMENT_VISU // visualize segments and prediction modes
+    SetBlock(it->yuv_out_ + Y_OFF_ENC, mb->segment_ * 64, 16);
+    SetBlock(it->yuv_out_ + U_OFF_ENC, it->preds_[0] * 64, 8);
+    SetBlock(it->yuv_out_ + V_OFF_ENC, mb->uv_mode_ * 64, 8);
+#endif
+}
+
+static double GetPSNR(uint64_t mse, uint64_t size) {
+    return (mse > 0 && size > 0) ? 10. * log10(255. * 255. * size / mse) : 99;
+}
+
+//------------------------------------------------------------------------------
+//  StatLoop(): only collect statistics (number of skips, token usage, ...).
+//  This is used for deciding optimal probabilities. It also modifies the
+//  quantizer value if some target (size, PSNR) was specified.
+
+static void SetLoopParams(VP8Encoder* const enc, float q) {
+    // Make sure the quality parameter is inside valid bounds
+    q = Clamp(q, 0.f, 100.f);
+
+    VP8SetSegmentParams(enc, q); // setup segment quantizations and filters
+    SetSegmentProbas(enc);       // compute segment probabilities
+
+    ResetStats(enc);
+    ResetSSE(enc);
+}
+
+static uint64_t OneStatPass(
+    VP8Encoder* const enc, VP8RDLevel rd_opt, int nb_mbs, int percent_delta, PassStats* const s) {
+    VP8EncIterator it;
+    uint64_t size = 0;
+    uint64_t size_p0 = 0;
+    uint64_t distortion = 0;
+    const uint64_t pixel_count = nb_mbs * 384;
+
+    VP8IteratorInit(enc, &it);
+    SetLoopParams(enc, s->q);
+    do {
+        VP8ModeScore info;
+        VP8IteratorImport(&it, NULL);
+        if (VP8Decimate(&it, &info, rd_opt)) {
+            // Just record the number of skips and act like skip_proba is not used.
+            enc->proba_.nb_skip_++;
+        }
+        RecordResiduals(&it, &info);
+        size += info.R + info.H;
+        size_p0 += info.H;
+        distortion += info.D;
+        if (percent_delta && !VP8IteratorProgress(&it, percent_delta)) return 0;
+        VP8IteratorSaveBoundary(&it);
+    } while (VP8IteratorNext(&it) && --nb_mbs > 0);
+
+    size_p0 += enc->segment_hdr_.size_;
+    if (s->do_size_search) {
+        size += FinalizeSkipProba(enc);
+        size += FinalizeTokenProbas(&enc->proba_);
+        size = ((size + size_p0 + 1024) >> 11) + HEADER_SIZE_ESTIMATE;
+        s->value = (double)size;
+    } else {
+        s->value = GetPSNR(distortion, pixel_count);
+    }
+    return size_p0;
+}
+
+static int StatLoop(VP8Encoder* const enc) {
+    const int method = enc->method_;
+    const int do_search = enc->do_search_;
+    const int fast_probe = ((method == 0 || method == 3) && !do_search);
+    int num_pass_left = enc->config_->pass;
+    const int task_percent = 20;
+    const int percent_per_pass = (task_percent + num_pass_left / 2) / num_pass_left;
+    const int final_percent = enc->percent_ + task_percent;
+    const VP8RDLevel rd_opt = (method >= 3 || do_search) ? RD_OPT_BASIC : RD_OPT_NONE;
+    int nb_mbs = enc->mb_w_ * enc->mb_h_;
+    PassStats stats;
+
+    InitPassStats(enc, &stats);
+    ResetTokenStats(enc);
+
+    // Fast mode: quick analysis pass over few mbs. Better than nothing.
+    if (fast_probe) {
+        if (method == 3) { // we need more stats for method 3 to be reliable.
+            nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100;
+        } else {
+            nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50;
+        }
+    }
+
+    while (num_pass_left-- > 0) {
+        const int is_last_pass =
+            (fabs(stats.dq) <= DQ_LIMIT) || (num_pass_left == 0) || (enc->max_i4_header_bits_ == 0);
+        const uint64_t size_p0 = OneStatPass(enc, rd_opt, nb_mbs, percent_per_pass, &stats);
+        if (size_p0 == 0) return 0;
+#if (DEBUG_SEARCH > 0)
+        printf("#%d value:%.1lf -> %.1lf   q:%.2f -> %.2f\n", num_pass_left, stats.last_value, stats.value,
+               stats.last_q, stats.q);
+#endif
+        if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
+            ++num_pass_left;
+            enc->max_i4_header_bits_ >>= 1; // strengthen header bit limitation...
+            continue;                       // ...and start over
+        }
+        if (is_last_pass) {
+            break;
+        }
+        // If no target size: just do several pass without changing 'q'
+        if (do_search) {
+            ComputeNextQ(&stats);
+            if (fabs(stats.dq) <= DQ_LIMIT) break;
+        }
+    }
+    if (!do_search || !stats.do_size_search) {
+        // Need to finalize probas now, since it wasn't done during the search.
+        FinalizeSkipProba(enc);
+        FinalizeTokenProbas(&enc->proba_);
+    }
+    VP8CalculateLevelCosts(&enc->proba_); // finalize costs
+    return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
+}
+
+//------------------------------------------------------------------------------
+// Main loops
+//
+
+static const int kAverageBytesPerMB[8] = {50, 24, 16, 9, 7, 5, 3, 2};
+
+static int PreLoopInitialize(VP8Encoder* const enc) {
+    int p;
+    int ok = 1;
+    const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
+    const int bytes_per_parts = enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
+    // Initialize the bit-writers
+    for (p = 0; ok && p < enc->num_parts_; ++p) {
+        ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
+    }
+    if (!ok) {
+        VP8EncFreeBitWriters(enc); // malloc error occurred
+        WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    }
+    return ok;
+}
+
+static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
+    VP8Encoder* const enc = it->enc_;
+    if (ok) { // Finalize the partitions, check for extra errors.
+        int p;
+        for (p = 0; p < enc->num_parts_; ++p) {
+            VP8BitWriterFinish(enc->parts_ + p);
+            ok &= !enc->parts_[p].error_;
+        }
+    }
+
+    if (ok) {                           // All good. Finish up.
+        if (enc->pic_->stats != NULL) { // finalize byte counters...
+            int i, s;
+            for (i = 0; i <= 2; ++i) {
+                for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+                    enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
+                }
+            }
+        }
+        VP8AdjustFilterStrength(it); // ...and store filter stats.
+    } else {
+        // Something bad happened -> need to do some memory cleanup.
+        VP8EncFreeBitWriters(enc);
+    }
+    return ok;
+}
+
+//------------------------------------------------------------------------------
+//  VP8EncLoop(): does the final bitstream coding.
+
+static void ResetAfterSkip(VP8EncIterator* const it) {
+    if (it->mb_->type_ == 1) {
+        *it->nz_ = 0; // reset all predictors
+        it->left_nz_[8] = 0;
+    } else {
+        *it->nz_ &= (1 << 24); // preserve the dc_nz bit
+    }
+}
+
+int VP8EncLoop(VP8Encoder* const enc) {
+    VP8EncIterator it;
+    int ok = PreLoopInitialize(enc);
+    if (!ok) return 0;
+
+    StatLoop(enc); // stats-collection loop
+
+    VP8IteratorInit(enc, &it);
+    VP8InitFilter(&it);
+    do {
+        VP8ModeScore info;
+        const int dont_use_skip = !enc->proba_.use_skip_proba_;
+        const VP8RDLevel rd_opt = enc->rd_opt_level_;
+
+        VP8IteratorImport(&it, NULL);
+        // Warning! order is important: first call VP8Decimate() and
+        // *then* decide how to code the skip decision if there's one.
+        if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
+            CodeResiduals(it.bw_, &it, &info);
+        } else { // reset predictors after a skip
+            ResetAfterSkip(&it);
+        }
+        StoreSideInfo(&it);
+        VP8StoreFilterStats(&it);
+        VP8IteratorExport(&it);
+        ok = VP8IteratorProgress(&it, 20);
+        VP8IteratorSaveBoundary(&it);
+    } while (ok && VP8IteratorNext(&it));
+
+    return PostLoopFinalize(&it, ok);
+}
+
+//------------------------------------------------------------------------------
+// Single pass using Token Buffer.
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+#define MIN_COUNT 96 // minimum number of macroblocks before updating stats
+
+int VP8EncTokenLoop(VP8Encoder* const enc) {
+    // Roughly refresh the proba eight times per pass
+    int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
+    int num_pass_left = enc->config_->pass;
+    const int do_search = enc->do_search_;
+    VP8EncIterator it;
+    VP8EncProba* const proba = &enc->proba_;
+    const VP8RDLevel rd_opt = enc->rd_opt_level_;
+    const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384;
+    PassStats stats;
+    int ok;
+
+    InitPassStats(enc, &stats);
+    ok = PreLoopInitialize(enc);
+    if (!ok) return 0;
+
+    if (max_count < MIN_COUNT) max_count = MIN_COUNT;
+
+    assert(enc->num_parts_ == 1);
+    assert(enc->use_tokens_);
+    assert(proba->use_skip_proba_ == 0);
+    assert(rd_opt >= RD_OPT_BASIC); // otherwise, token-buffer won't be useful
+    assert(num_pass_left > 0);
+
+    while (ok && num_pass_left-- > 0) {
+        const int is_last_pass =
+            (fabs(stats.dq) <= DQ_LIMIT) || (num_pass_left == 0) || (enc->max_i4_header_bits_ == 0);
+        uint64_t size_p0 = 0;
+        uint64_t distortion = 0;
+        int cnt = max_count;
+        VP8IteratorInit(enc, &it);
+        SetLoopParams(enc, stats.q);
+        if (is_last_pass) {
+            ResetTokenStats(enc);
+            VP8InitFilter(&it); // don't collect stats until last pass (too costly)
+        }
+        VP8TBufferClear(&enc->tokens_);
+        do {
+            VP8ModeScore info;
+            VP8IteratorImport(&it, NULL);
+            if (--cnt < 0) {
+                FinalizeTokenProbas(proba);
+                VP8CalculateLevelCosts(proba); // refresh cost tables for rd-opt
+                cnt = max_count;
+            }
+            VP8Decimate(&it, &info, rd_opt);
+            ok = RecordTokens(&it, &info, &enc->tokens_);
+            if (!ok) {
+                WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+                break;
+            }
+            size_p0 += info.H;
+            distortion += info.D;
+            if (is_last_pass) {
+                StoreSideInfo(&it);
+                VP8StoreFilterStats(&it);
+                VP8IteratorExport(&it);
+                ok = VP8IteratorProgress(&it, 20);
+            }
+            VP8IteratorSaveBoundary(&it);
+        } while (ok && VP8IteratorNext(&it));
+        if (!ok) break;
+
+        size_p0 += enc->segment_hdr_.size_;
+        if (stats.do_size_search) {
+            uint64_t size = FinalizeTokenProbas(&enc->proba_);
+            size += VP8EstimateTokenSize(&enc->tokens_, (const uint8_t*)proba->coeffs_);
+            size = (size + size_p0 + 1024) >> 11; // -> size in bytes
+            size += HEADER_SIZE_ESTIMATE;
+            stats.value = (double)size;
+        } else { // compute and store PSNR
+            stats.value = GetPSNR(distortion, pixel_count);
+        }
+
+#if (DEBUG_SEARCH > 0)
+        printf("#%2d metric:%.1lf -> %.1lf   last_q=%.2lf q=%.2lf dq=%.2lf\n", num_pass_left, stats.last_value,
+               stats.value, stats.last_q, stats.q, stats.dq);
+#endif
+        if (size_p0 > PARTITION0_SIZE_LIMIT) {
+            ++num_pass_left;
+            enc->max_i4_header_bits_ >>= 1; // strengthen header bit limitation...
+            continue;                       // ...and start over
+        }
+        if (is_last_pass) {
+            break; // done
+        }
+        if (do_search) {
+            ComputeNextQ(&stats); // Adjust q
+        }
+    }
+    if (ok) {
+        if (!stats.do_size_search) {
+            FinalizeTokenProbas(&enc->proba_);
+        }
+        ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0, (const uint8_t*)proba->coeffs_, 1);
+    }
+    ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+    return PostLoopFinalize(&it, ok);
+}
+
+#else
+
+int VP8EncTokenLoop(VP8Encoder* const enc) {
+    (void)enc;
+    return 0; // we shouldn't be here.
+}
+
+#endif // DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+
+void Set_AllPicInfo(AllPicInfo* des, VP8Encoder* const enc) {
+    const WebPPicture* const pic = enc->pic_;
+
+    des->mb_w = enc->mb_w_; //
+
+    des->id_pic; // 0
+    des->cnt_line_mb;
+    des->y_stride = pic->y_stride;
+    des->uv_stride = pic->uv_stride;
+    des->width = pic->width;
+    des->height = pic->height;
+    des->mb_w = enc->mb_w_;              //
+    des->mb_h = enc->mb_h_;              // 5
+    VP8SegmentInfo* dqm = &enc->dqm_[0]; //
+    des->seg_lambda_p16 = dqm->lambda_i16_;
+    des->seg_lambda_p44 = dqm->lambda_i4_;
+    des->seg_tlambda = dqm->tlambda_;
+    des->seg_lambda_uv = dqm->lambda_uv_;
+    des->seg_tlambda_m = dqm->lambda_mode_; // 10
+
+    des->seg_y1_q_0 = dqm->y1_.q_[0]; // quantizer steps
+    des->seg_y1_q_n = dqm->y1_.q_[1];
+    des->seg_y1_iq_0 = dqm->y1_.iq_[0]; // reciprocals fixed point.
+    des->seg_y1_iq_n = dqm->y1_.iq_[1];
+    des->seg_y1_bias_0 = dqm->y1_.bias_[0]; // rounding bias
+    des->seg_y1_bias_n = dqm->y1_.bias_[1]; // 16
+
+    des->seg_y2_q_0 = dqm->y2_.q_[0]; // quantizer steps
+    des->seg_y2_q_n = dqm->y2_.q_[1];
+    des->seg_y2_iq_0 = dqm->y2_.iq_[0]; // reciprocals fixed point.
+    des->seg_y2_iq_n = dqm->y2_.iq_[1];
+    des->seg_y2_bias_0 = dqm->y2_.bias_[0]; // rounding bias
+    des->seg_y2_bias_n = dqm->y2_.bias_[1]; // 22
+
+    des->seg_uv_q_0 = dqm->uv_.q_[0]; // quantizer steps
+    des->seg_uv_q_n = dqm->uv_.q_[1];
+    des->seg_uv_iq_0 = dqm->uv_.iq_[0]; // reciprocals fixed point.
+    des->seg_uv_iq_n = dqm->uv_.iq_[1];
+    des->seg_uv_bias_0 = dqm->uv_.bias_[0]; // rounding bias
+    des->seg_uv_bias_n = dqm->uv_.bias_[1]; // 28
+    for (int i = 0; i < 16; i++) {
+        des->seg_y1_sharpen[i] = dqm->y1_.sharpen_[i];
+        des->seg_uv_sharpen[i] = dqm->uv_.sharpen_[i];
+    }
+}
+
+int VP8EncTokenLoopAsyncPicInfoSet(VP8Encoder*& enc, AllPicInfo& picinfo) {
+    VP8DefaultProbas(enc);
+    PassStats stats;
+    int ok;
+    int err;
+    InitPassStats(enc, &stats);
+    ok = PreLoopInitialize(enc);
+    if (!ok) {
+        fprintf(stderr, "PreLoopInitialize error\n");
+        return 0;
+    }
+    SetLoopParams(enc, stats.q);
+    ResetTokenStats(enc);
+    VP8TBufferClear(&enc->tokens_);
+
+    // AllPicInfo picinfo; //Picture information
+    ap_NoneZero ap_nz;
+    Set_AllPicInfo(&picinfo, enc); // Set picture information
+
+    return 1;
+};
+
+int VP8EncTokenLoopAsyncHost2Device(const int Numbatch,
+                                    VP8Encoder** enc,
+                                    AllPicInfo* picinfo,
+                                    const int buf,
+                                    cl_uint num_wait_event,
+                                    cl_event* wait_event,
+                                    std::array<cl_event, 4>& event) {
+    StopProfilingWatch watch;
+    double watch_time;
+    int watch_count;
+
+    int err, ok;
+    uint64_t y_size[Numbatch];
+    uint64_t uv_size[Numbatch];
+
+    for (int i = 0; i < Numbatch; i++) {
+        int xsize = enc[i]->pic_->width;
+        int ysize = enc[i]->pic_->height;
+        int y_width = xsize;
+        int y_height = ysize;
+        int uv_width = (xsize + 1) >> 1;
+        int uv_height = (ysize + 1) >> 1;
+
+        // bits size
+        y_size[i] = y_width * y_height * sizeof(uint8_t);
+        uv_size[i] = uv_width * uv_height * sizeof(uint8_t);
+    }
+
+    StartProfiling(&watch);
+
+    // input
+    uint64_t offset_info = 0;
+    for (int i = 0; i < Numbatch; i++) {
+        // fprintf(stderr, "SIZE:  %d  %d \n", sizeof(AllPicInfo), offset_info);
+
+        picinfo[i].id_pic = Numbatch - i - 1;
+        memcpy(encloopparaAsync[buf].inputcpu + offset_info, &picinfo[i], sizeof(AllPicInfo));
+        offset_info += Get_Busoffset_info_32bits() * sizeof(uint32_t);
+    }
+
+    err = clEnqueueMigrateMemObjects(hardware.mQueue, 1, &(encloopparaAsync[buf].input), 0, num_wait_event, wait_event,
+                                     &event[0]);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        ok = 0;
+    }
+    // fprintf(stderr, "INFO: COPY .input to Buffer.\n");
+
+    uint64_t offset_y = 0;
+    for (int i = 0; i < Numbatch; i++) {
+        memcpy(encloopparaAsync[buf].ycpu + offset_y, enc[i]->pic_->y, y_size[i]);
+        offset_y += Get_Busoffset_ysrc(y_size[i]) * sizeof(uint32_t);
+    }
+
+    err = clEnqueueMigrateMemObjects(hardware.mQueue, 1, &(encloopparaAsync[buf].y), 0, num_wait_event, wait_event,
+                                     &event[1]);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+    }
+    // fprintf(stderr, "INFO: COPY .y to Buffer.\n");
+
+    uint64_t offset_u = 0;
+    for (int i = 0; i < Numbatch; i++) {
+        memcpy(encloopparaAsync[buf].ucpu + offset_u, enc[i]->pic_->u, uv_size[i]);
+        offset_u += Get_Busoffset_uvsrc(uv_size[i]) * sizeof(uint32_t);
+    }
+
+    err = clEnqueueMigrateMemObjects(hardware.mQueue, 1, &(encloopparaAsync[buf].u), 0, num_wait_event, wait_event,
+                                     &event[2]);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+    }
+
+    // fprintf(stderr, "INFO: COPY .u to Buffer.\n");
+
+    uint64_t offset_v = 0;
+    for (int i = 0; i < Numbatch; i++) {
+        memcpy(encloopparaAsync[buf].vcpu + offset_v, enc[i]->pic_->v, uv_size[i]);
+        offset_v += Get_Busoffset_uvsrc(uv_size[i]) * sizeof(uint32_t);
+    }
+
+    err = clEnqueueMigrateMemObjects(hardware.mQueue, 1, &(encloopparaAsync[buf].v), 0, num_wait_event, wait_event,
+                                     &event[3]);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+    }
+    // fprintf(stderr, "INFO: COPY .v to Buffer.\n");
+
+    err = clEnqueueMigrateMemObjects(hardware.mQueue, 1, &(encloopparaAsync[buf].output),
+                                     CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED, 0, NULL, NULL);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        ok = 0;
+    }
+
+    /* err = clFinish(hardware.mQueue); */
+    /* if (err != CL_SUCCESS) { */
+    /*   fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err)); */
+    /*   ok = 0; */
+    /* } */
+
+    watch_time = 0.0;
+    StopProfiling(&watch, &watch_time, &watch_count);
+    fprintf(stderr, "INFO: Host2Device finished. Computation time is %f (ms) \n", watch_time);
+
+    return 1;
+};
+
+//------------------------------------------------------------------------------
+int VP8EncTokenLoopAsyncDevice2Host(const int buf,
+                                    cl_uint num_wait_event,
+                                    cl_event* wait_event,
+                                    std::array<cl_event, 4>& event) {
+    StopProfilingWatch watch;
+    double watch_time;
+    int watch_count;
+
+    int ok;
+    cl_int err;
+
+    //=============================================================//
+    StartProfiling(&watch);
+
+    err = clEnqueueMigrateMemObjects(hardware.mQueue, 1, &(encloopparaAsync[buf].output_prob),
+                                     CL_MIGRATE_MEM_OBJECT_HOST, num_wait_event, wait_event, &event[0]);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        ok = 0;
+    }
+    // fprintf(stderr, "INFO: COPY .output_prob to Host.\n");
+
+    // ret==========================================================//
+    err = clEnqueueMigrateMemObjects(hardware.mQueue, 1, &(encloopparaAsync[buf].output_ret),
+                                     CL_MIGRATE_MEM_OBJECT_HOST, num_wait_event, wait_event, &event[1]);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        ok = 0;
+    }
+    // fprintf(stderr, "INFO: COPY .output_ret to Host.\n");
+
+    // pred=========================================================//
+    err = clEnqueueMigrateMemObjects(hardware.mQueue, 1, &(encloopparaAsync[buf].output_pred),
+                                     CL_MIGRATE_MEM_OBJECT_HOST, num_wait_event, wait_event, &event[2]);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        ok = 0;
+    }
+    // fprintf(stderr, "INFO: COPY .output_pred to Host.\n");
+
+    // bw=============================================================//
+    err = clEnqueueMigrateMemObjects(hardware.mQueue, 1, &(encloopparaAsync[buf].output_bw), CL_MIGRATE_MEM_OBJECT_HOST,
+                                     num_wait_event, wait_event, &event[3]);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        ok = 0;
+    }
+    // fprintf(stderr, "INFO: COPY .output_bw to Host.\n");
+
+    watch_time = 0.0;
+    StopProfiling(&watch, &watch_time, &watch_count);
+    fprintf(stderr, "INFO: Device2Host finished. Computation time is %f (ms) \n", watch_time);
+
+    /* err = clFinish(hardware.mQueue); */
+    /* if (err != CL_SUCCESS) { */
+    /*   fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err)); */
+    /*   ok = 0; */
+    /* } */
+    return 0;
+};
+
+int VP8EncTokenLoopAsyncPredKernel(const int buf,
+                                   cl_uint num_wait_event,
+                                   cl_event* wait_event,
+                                   std::array<cl_event, 1>& event) {
+    int ok;
+    cl_int err;
+
+    StopProfilingWatch watch;
+    double watch_time;
+    int watch_count;
+
+    StartProfiling(&watch);
+
+    // Set args
+    int arg = 0;
+    int status;
+    err = clSetKernelArg(encloop.mKernelPred[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].input));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+
+    err = clSetKernelArg(encloop.mKernelPred[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].y));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+
+    err = clSetKernelArg(encloop.mKernelPred[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].u));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+
+    err = clSetKernelArg(encloop.mKernelPred[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].v));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+
+    err = clSetKernelArg(encloop.mKernelPred[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].output));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+
+    err = clSetKernelArg(encloop.mKernelPred[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].output_prob));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s :output_prob \n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+
+    // launch kernel
+    size_t globalSize[] = {1, 1, 1};
+    size_t localSize[] = {1, 1, 1};
+
+    err = clEnqueueNDRangeKernel(hardware.mQueue, encloop.mKernelPred[buf], 1, NULL, globalSize, localSize,
+                                 num_wait_event, wait_event, &event[0]);
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        ok = 0;
+    }
+
+    /* err = clFlush(hardware.mQueue); */
+    /* if (err != CL_SUCCESS) { */
+    /*   fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err)); */
+    /*   ok = 0; */
+    /* } */
+
+    /* err = clFinish(hardware.mQueue); */
+    /* if (err != CL_SUCCESS) { */
+    /*   fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err)); */
+    /*   ok = 0; */
+    /* } */
+
+    watch_time = 0.0;
+    StopProfiling(&watch, &watch_time, &watch_count);
+    fprintf(stderr, "INFO: PredKernel Finished. Computation time is %f (ms) \n", watch_time);
+
+    /* #ifdef USE_C_KERNEL */
+    /*   Err: */
+    /*      /\* releaseKernel(encloop); *\/ */
+    /*      /\* clReleaseMemObject(enclooppara.input); *\/ */
+    /*      /\* clReleaseMemObject(enclooppara.y); *\/ */
+    /*      /\* clReleaseMemObject(enclooppara.u); *\/ */
+    /*      /\* clReleaseMemObject(enclooppara.v); *\/ */
+    /*      /\* clReleaseMemObject(enclooppara.output); *\/ */
+    /*      return ok; */
+    /* #endif */
+
+    ok = 1;
+    return ok;
+}
+
+int VP8EncTokenLoopAsyncACKernel(const int buf,
+                                 cl_uint num_wait_event,
+                                 cl_event* wait_event,
+                                 std::array<cl_event, 1>& event) {
+    int ok;
+    cl_int err;
+
+    StopProfilingWatch watch;
+    double watch_time;
+    int watch_count;
+
+    StartProfiling(&watch);
+
+    int arg = 0;
+    int status;
+
+    // 1)
+    err = clSetKernelArg(encloop.mKernelAC[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].output));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s :output (level) \n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+    // 2)
+    err = clSetKernelArg(encloop.mKernelAC[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].output_prob));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s :output_prob \n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+    // 3)
+    err = clSetKernelArg(encloop.mKernelAC[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].output_bw));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s :output_bw \n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+    // 4)
+    err = clSetKernelArg(encloop.mKernelAC[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].output_ret));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s :output_ret \n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+    // 5)
+    err = clSetKernelArg(encloop.mKernelAC[buf], arg++, sizeof(cl_mem), &(encloopparaAsync[buf].output_pred));
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s :output_pred \n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+    }
+
+    // launch kernel
+    size_t globalSize[] = {1, 1, 1};
+    size_t localSize[] = {1, 1, 1};
+
+    err = clEnqueueNDRangeKernel(hardware.mQueue, encloop.mKernelAC[buf], 1, NULL, globalSize, localSize,
+                                 num_wait_event, wait_event, &event[0]);
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        ok = 0;
+    }
+
+    /* // Finish */
+    /* err = clFinish(hardware.mQueue); */
+    /* if (err != CL_SUCCESS) { */
+    /*   fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err)); */
+    /*   ok = 0; */
+    /* } */
+
+    watch_time = 0.0;
+    StopProfiling(&watch, &watch_time, &watch_count);
+    fprintf(stderr, "INFO: ACKernel Finished. Computation time is %f (ms) \n", watch_time);
+
+    ok = 1;
+    return ok;
+}
+
+void VP8BitWriterCpy_hls(VP8BitWriter* const bw_des, uint8_t* const pout_prob, uint8_t* const pout_bw) {
+    uint32_t* ptr = (uint32_t*)pout_prob + 2048 / 4 - SIZE32_AC_STATE;
+
+    bw_des->range_ = ptr[0]; // - 1;
+    bw_des->value_ = ptr[1];
+    bw_des->nb_bits_ = ptr[2];
+    bw_des->pos_ = ptr[3];
+    bw_des->run_ = ptr[4];
+    bw_des->max_pos_ = ptr[5];
+    bw_des->error_ = ptr[6];
+
+    bw_des->buf_ = (uint8_t*)(pout_bw);
+}
+
+int VP8EncTokenLoopAsyncAfterAC(
+
+    VP8Encoder* enc,
+    uint8_t* pout_prob,
+    uint8_t* pout_bw,   // = malloc(SIZE32_MEM_BW*4);/
+    uint8_t* pout_ret,  // = malloc(SIZE32_MEM_RET*4);
+    uint8_t* pout_pred, // = malloc(SIZE32_MEM_PRED*4);
+
+    VP8EncIterator& it) {
+    VP8IteratorInit(enc, &it);
+    it.do_trellis_ = 0; //(rd_opt RD_OPT_TRELLIS_ALL);
+
+    memcpy((uint8_t*)enc->proba_.coeffs_, (uint8_t*)pout_prob, 32 * 33);
+
+    enc->proba_.dirty_ = 1; // pout_prob[SIZE8_MEM_PROB-1];
+    VP8BitWriterCpy_hls(enc->parts_ + 0, pout_prob, pout_bw);
+
+    uint8_t* p_ret = pout_ret;            //_host;
+    uint8_t* p_pred = pout_pred;          //_host;
+    VP8MBInfo* p_mb_info = enc->mb_info_; // contextual macroblock infos (mb_w_ + 1)
+    uint8_t* p_preds = enc->preds_;       // predictions modes: (4*mb_w+1) * (4*mb_h+1)
+
+    for (int y_mb = 0; y_mb < enc->mb_h_; y_mb++) {
+        p_mb_info = enc->mb_info_ + y_mb * enc->mb_w_;
+        p_preds = enc->preds_ + y_mb * 4 * enc->preds_w_;
+        for (int x_mb = 0; x_mb < enc->mb_w_; x_mb++) {
+            uint8_t ret = *p_ret++;             //(ap_uint<6>)pt[416];
+            p_mb_info->uv_mode_ = ret & 15;     //(3,0);//it_m.ap_uv_mode_c;
+            p_mb_info->type_ = (ret & 16) >> 4; //(4,4);
+            p_mb_info->skip_ = (ret & 32) >> 5; //(5,5);//(it_r.ap_nz == 0);
+            for (int y = 0; y < 4; y++) {
+                for (int x = 0; x < 4; x += 2) {
+                    uint8_t tmp = *p_pred++;
+                    p_preds[x + enc->preds_w_ * y] = tmp & 15;            // pt[400+y*4+x];//tmp&15;//pt[400+y*4+x];//
+                    p_preds[x + 1 + enc->preds_w_ * y] = (tmp >> 4) & 15; // pt[400+y*4+x+1];//;(tmp>>4)&15;
+                }
+            }
+            p_preds += 4;
+            p_mb_info += 1;
+        }
+    }
+    return 1;
+};
diff --git a/codec/L2/demos/webpEnc/host/src/enc/frame.c_bck_3 b/codec/L2/demos/webpEnc/host/src/enc/frame.c_bck_3
new file mode 100644
index 0000000000..25e62bc43a
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/frame.c_bck_3
@@ -0,0 +1,2380 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   frame coding and analysis
+//
+// Author: Skal (pascal.massimino@gmail.com)
+#include <hls_stream.h>
+#include <ap_int.h>
+#include "../../src_syn/vp8_hls_syn.h"
+
+#include <string.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "./cost.h"
+#include "./vp8enci.h"
+#include "../dsp/dsp.h"
+#include "../webp/format_constants.h"  // RIFF constants
+#include "../utils/profiling.h"
+#include "../../examples/create_kernel.h"
+
+//#include "../../examples/my_syn.h"
+
+#define SEGMENT_VISU 0
+#define DEBUG_SEARCH 0    // useful to track search convergence
+#define DEBUG_PROBAS 0
+
+#include <stdio.h>
+
+void debug_probas(const VP8EncProba* const probas) {
+#if DEBUG_PROBAS
+  printf("simple variable\n");
+  printf("%d %d %d %d %d %d %d\n",
+		 probas->segments_[0],
+		 probas->segments_[1],
+		 probas->segments_[2],
+		 probas->skip_proba_,
+		 probas->dirty_,
+		 probas->use_skip_proba_,
+		 probas->nb_skip_);
+  printf("coeffs_===========================\n");
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          const uint8_t p0 = probas->coeffs_[t][b][c][p];
+          printf("coeffs_[%d][%d][%d][%d]:%d ",
+				 t, b, c, p, p0);
+        }
+        printf("\n");
+      }
+    }
+  }
+  printf("stats===============================\n");
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          // const proba_t stats = proba->stats_[t][b][c][p];
+          //uint32_t stats_p[NUM_TYPES * NUM_BANDS * NUM_CTX * NUM_PROBAS];
+          const proba_t stats = probas->stats_[t][b][c][p];
+          printf("Stats[%d][%d][%d][%d]:%d ",
+				 t, b, c, p, stats);
+        }
+        printf("\n");
+      }
+    }
+  }
+#endif
+}
+
+void debug_log_stats_p(
+					   VP8EncProba *proba
+					   // VP8EncLoopPointer* proba
+					   ) {
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          // const proba_t stats = proba->stats_[t][b][c][p];
+          //uint32_t stats_p[NUM_TYPES * NUM_BANDS * NUM_CTX * NUM_PROBAS];
+          const proba_t stats = proba->stats_[t][b][c][p];
+          printf("Stats[%d][%d][%d][%d]=%d\n",
+				 t, b, c, p, stats);
+        }
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// multi-pass convergence
+
+#define HEADER_SIZE_ESTIMATE (RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE +	\
+                              VP8_FRAME_HEADER_SIZE)
+#define DQ_LIMIT 0.4  // convergence is considered reached if dq < DQ_LIMIT
+// we allow 2k of extra head-room in PARTITION0 limit.
+#define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11)
+
+typedef struct {  // struct for organizing convergence in either size or PSNR
+  int is_first;
+  float dq;
+  float q, last_q;
+  double value, last_value;   // PSNR or size
+  double target;
+  int do_size_search;
+} PassStats;
+
+static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) {
+  const uint64_t target_size = (uint64_t)enc->config_->target_size;
+  const int do_size_search = (target_size != 0);
+  const float target_PSNR = enc->config_->target_PSNR;
+
+  s->is_first = 1;
+  s->dq = 10.f;
+  s->q = s->last_q = enc->config_->quality;
+  s->target = do_size_search ? (double)target_size
+	: (target_PSNR > 0.) ? target_PSNR
+	: 40.;   // default, just in case
+  s->value = s->last_value = 0.;
+  s->do_size_search = do_size_search;
+  return do_size_search;
+}
+
+static float Clamp(float v, float min, float max) {
+  return (v < min) ? min : (v > max) ? max : v;
+}
+
+static float ComputeNextQ(PassStats* const s) {
+  float dq;
+  if (s->is_first) {
+    dq = (s->value > s->target) ? -s->dq : s->dq;
+    s->is_first = 0;
+  } else if (s->value != s->last_value) {
+    const double slope = (s->target - s->value) / (s->last_value - s->value);
+    dq = (float)(slope * (s->last_q - s->q));
+  } else {
+    dq = 0.;  // we're done?!
+  }
+  // Limit variable to avoid large swings.
+  s->dq = Clamp(dq, -30.f, 30.f);
+  s->last_q = s->q;
+  s->last_value = s->value;
+  s->q = Clamp(s->q + s->dq, 0.f, 100.f);
+  return s->q;
+}
+
+//------------------------------------------------------------------------------
+// Tables for level coding
+
+const uint8_t VP8Cat3[] = { 173, 148, 140 };
+const uint8_t VP8Cat4[] = { 176, 155, 140, 135 };
+const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 };
+const uint8_t VP8Cat6[] =
+  { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
+
+//------------------------------------------------------------------------------
+// Reset the statistics about: number of skips, token proba, level cost,...
+
+static void ResetStats(VP8Encoder* const enc) {
+  VP8EncProba* const proba = &enc->proba_;
+  VP8CalculateLevelCosts(proba);
+  proba->nb_skip_ = 0;
+}
+
+//------------------------------------------------------------------------------
+// Skip decision probability
+
+#define SKIP_PROBA_THRESHOLD 250  // value below which using skip_proba is OK.
+
+static int CalcSkipProba(uint64_t nb, uint64_t total) {
+  return (int)(total ? (total - nb) * 255 / total : 255);
+}
+
+// Returns the bit-cost for coding the skip probability.
+static int FinalizeSkipProba(VP8Encoder* const enc) {
+  VP8EncProba* const proba = &enc->proba_;
+  const int nb_mbs = enc->mb_w_ * enc->mb_h_;
+  const int nb_events = proba->nb_skip_;
+  int size;
+  proba->skip_proba_ = CalcSkipProba(nb_events, nb_mbs);
+  proba->use_skip_proba_ = (proba->skip_proba_ < SKIP_PROBA_THRESHOLD);
+  size = 256;   // 'use_skip_proba' bit
+  if (proba->use_skip_proba_) {
+    size +=  nb_events * VP8BitCost(1, proba->skip_proba_)
+	  + (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba_);
+    size += 8 * 256;   // cost of signaling the skip_proba_ itself.
+  }
+  return size;
+}
+
+// Collect statistics and deduce probabilities for next coding pass.
+// Return the total bit-cost for coding the probability updates.
+static int CalcTokenProba(int nb, int total) {
+  assert(nb <= total);
+  return nb ? (255 - nb * 255 / total) : 255;
+}
+
+// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability.
+static int BranchCost(int nb, int total, int proba) {
+  return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
+}
+
+static void ResetTokenStats(VP8Encoder* const enc) {
+  VP8EncProba* const proba = &enc->proba_;
+  memset(proba->stats_, 0, sizeof(proba->stats_));
+}
+
+static int FinalizeTokenProbas(VP8EncProba* const proba) {
+  int has_changed = 0;
+  int size = 0;
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          const proba_t stats = proba->stats_[t][b][c][p];
+          // printf("%s [%d][%d][%d][%d]stats:%d\n",
+          //     __FUNCTION__, t, b, c, p, stats);
+          const int nb = (stats >> 0) & 0xffff;
+          const int total = (stats >> 16) & 0xffff;
+          const int update_proba = VP8CoeffsUpdateProba[t][b][c][p];
+          const int old_p = VP8CoeffsProba0[t][b][c][p];
+          const int new_p = CalcTokenProba(nb, total);
+          const int old_cost = BranchCost(nb, total, old_p)
+			+ VP8BitCost(0, update_proba);
+          const int new_cost = BranchCost(nb, total, new_p)
+			+ VP8BitCost(1, update_proba)
+			+ 8 * 256;
+          const int use_new_p = (old_cost > new_cost);
+          // printf("%s use_new_p:%d old_cost:%d new_cost:%d\n",
+          //     __FUNCTION__, use_new_p, old_cost, new_cost);
+          size += VP8BitCost(use_new_p, update_proba);
+          if (use_new_p) {  // only use proba that seem meaningful enough.
+            proba->coeffs_[t][b][c][p] = new_p;
+            has_changed |= (new_p != old_p);
+            // printf("%s has_changed:%d new_p:%d old_p:%d\n",
+            //   __FUNCTION__, has_changed, new_p, old_p);
+            size += 8 * 256;
+          } else {
+            proba->coeffs_[t][b][c][p] = old_p;
+          }
+        }
+      }
+    }
+  }
+  // printf("%d %d==========================\n", __LINE__, has_changed);
+  proba->dirty_ = has_changed;
+  return size;
+}
+
+//------------------------------------------------------------------------------
+// Finalize Segment probability based on the coding tree
+
+static int GetProba(int a, int b) {
+  const int total = a + b;
+  return (total == 0) ? 255     // that's the default probability.
+	: (255 * a + total / 2) / total;  // rounded proba
+}
+
+static void SetSegmentProbas(VP8Encoder* const enc) {
+  int p[NUM_MB_SEGMENTS] = { 0 };
+  int n;
+
+  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+    const VP8MBInfo* const mb = &enc->mb_info_[n];
+    p[mb->segment_]++;
+  }
+  if (enc->pic_->stats != NULL) {
+    for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
+      enc->pic_->stats->segment_size[n] = p[n];
+    }
+  }
+  if (enc->segment_hdr_.num_segments_ > 1) {
+    uint8_t* const probas = enc->proba_.segments_;
+    probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
+    probas[1] = GetProba(p[0], p[1]);
+    probas[2] = GetProba(p[2], p[3]);
+
+    enc->segment_hdr_.update_map_ =
+	  (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
+    enc->segment_hdr_.size_ =
+	  p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
+	  p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
+	  p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
+	  p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
+  } else {
+    enc->segment_hdr_.update_map_ = 0;
+    enc->segment_hdr_.size_ = 0;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Coefficient coding
+
+static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
+
+  int n = res->first;
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  const uint8_t* p = res->prob[n][ctx];
+  if (!VP8PutBit(bw, res->last >= 0, p[0])) {
+    return 0;
+  }
+
+  while (n < 16) {
+    const int c = res->coeffs[n++];
+    const int sign = c < 0;
+    int v = sign ? -c : c;
+    if (!VP8PutBit(bw, v != 0, p[1])) {
+      p = res->prob[VP8EncBands[n]][0];
+      continue;
+    }
+    if (!VP8PutBit(bw, v > 1, p[2])) {
+      p = res->prob[VP8EncBands[n]][1];
+    } else {
+      if (!VP8PutBit(bw, v > 4, p[3])) {
+        if (VP8PutBit(bw, v != 2, p[4]))
+          VP8PutBit(bw, v == 4, p[5]);
+      } else if (!VP8PutBit(bw, v > 10, p[6])) {
+        if (!VP8PutBit(bw, v > 6, p[7])) {
+          VP8PutBit(bw, v == 6, 159);
+        } else {
+          VP8PutBit(bw, v >= 9, 165);
+          VP8PutBit(bw, !(v & 1), 145);
+        }
+      } else {
+        int mask;
+        const uint8_t* tab;
+        if (v < 3 + (8 << 1)) {          // VP8Cat3  (3b)
+          VP8PutBit(bw, 0, p[8]);
+          VP8PutBit(bw, 0, p[9]);
+          v -= 3 + (8 << 0);
+          mask = 1 << 2;
+          tab = VP8Cat3;
+        } else if (v < 3 + (8 << 2)) {   // VP8Cat4  (4b)
+          VP8PutBit(bw, 0, p[8]);
+          VP8PutBit(bw, 1, p[9]);
+          v -= 3 + (8 << 1);
+          mask = 1 << 3;
+          tab = VP8Cat4;
+        } else if (v < 3 + (8 << 3)) {   // VP8Cat5  (5b)
+          VP8PutBit(bw, 1, p[8]);
+          VP8PutBit(bw, 0, p[10]);
+          v -= 3 + (8 << 2);
+          mask = 1 << 4;
+          tab = VP8Cat5;
+        } else {                         // VP8Cat6 (11b)
+          VP8PutBit(bw, 1, p[8]);
+          VP8PutBit(bw, 1, p[10]);
+          v -= 3 + (8 << 3);
+          mask = 1 << 10;
+          tab = VP8Cat6;
+        }
+        while (mask) {
+          VP8PutBit(bw, !!(v & mask), *tab++);
+          mask >>= 1;
+        }
+      }
+      p = res->prob[VP8EncBands[n]][2];
+    }
+    VP8PutBitUniform(bw, sign);
+    if (n == 16 || !VP8PutBit(bw, n <= res->last, p[0])) {
+      return 1;   // EOB
+    }
+  }
+  return 1;
+}
+
+static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
+                          const VP8ModeScore* const rd) {
+  int x, y, ch;
+  VP8Residual res;
+  uint64_t pos1, pos2, pos3;
+  const int i16 = (it->mb_->type_ == 1);
+  const int segment = it->mb_->segment_;
+  VP8Encoder* const enc = it->enc_;
+  StopProfilingWatch stop_watch;
+  StartProfiling(&stop_watch);
+
+  VP8IteratorNzToBytes(it);
+
+  pos1 = VP8BitWriterPos(bw);
+  if (i16) {
+    VP8InitResidual(0, 1, enc, &res);
+    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+    it->top_nz_[8] = it->left_nz_[8] =
+      PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res);
+    VP8InitResidual(1, 0, enc, &res);
+  } else {
+    VP8InitResidual(0, 3, enc, &res);
+  }
+
+  // luma-AC
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res);
+    }
+  }
+  pos2 = VP8BitWriterPos(bw);
+
+  // U/V
+  VP8InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+		  PutCoeffs(bw, ctx, &res);
+      }
+    }
+  }
+  pos3 = VP8BitWriterPos(bw);
+  it->luma_bits_ = pos2 - pos1;
+  it->uv_bits_ = pos3 - pos2;
+  it->bit_count_[segment][i16] += it->luma_bits_;
+  it->bit_count_[segment][2] += it->uv_bits_;
+  VP8IteratorBytesToNz(it);
+  StopProfiling(&stop_watch, &timeCodeResiduals, &countCodeResiduals);
+}
+
+// Same as CodeResiduals, but doesn't actually write anything.
+// Instead, it just records the event distribution.
+static void RecordResiduals(VP8EncIterator* const it,
+                            const VP8ModeScore* const rd) {
+  int x, y, ch;
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+
+  VP8IteratorNzToBytes(it);
+
+  if (it->mb_->type_ == 1) {   // i16x16
+    VP8InitResidual(0, 1, enc, &res);
+    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+    it->top_nz_[8] = it->left_nz_[8] =
+      VP8RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
+    VP8InitResidual(1, 0, enc, &res);
+  } else {
+    VP8InitResidual(0, 3, enc, &res);
+  }
+
+  // luma-AC
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffs(ctx, &res);
+    }
+  }
+
+  // U/V
+  VP8InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+		  VP8RecordCoeffs(ctx, &res);
+      }
+    }
+  }
+
+  VP8IteratorBytesToNz(it);
+}
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+#if TOKEN_RECONSTRUCT
+static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
+                        VP8TBufferKernel* const tokens) {
+#else
+  static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
+						  VP8TBuffer* const tokens) {
+#endif
+	int x, y, ch;
+	VP8Residual res;
+	VP8Encoder* const enc = it->enc_;
+
+	VP8IteratorNzToBytes(it);
+	if (it->mb_->type_ == 1) {   // i16x16
+	  const int ctx = it->top_nz_[8] + it->left_nz_[8];
+	  VP8InitResidual(0, 1, enc, &res);
+	  VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+	  it->top_nz_[8] = it->left_nz_[8] =
+        VP8RecordCoeffTokens(ctx, 1,
+                             res.first, res.last, res.coeffs, tokens);
+
+	  // static int i = 0;
+	  // printf("[%d] ctx:%d res.first:%d res.last:%d it->top_nz_[8]:%d\n",
+	  //       i++, ctx, res.first, res.last, it->top_nz_[8]);
+
+	  VP8RecordCoeffs(ctx, &res);
+	  VP8InitResidual(1, 0, enc, &res);
+	} else {
+	  VP8InitResidual(0, 3, enc, &res);
+	}
+
+	// luma-AC
+	for (y = 0; y < 4; ++y) {
+	  for (x = 0; x < 4; ++x) {
+		const int ctx = it->top_nz_[x] + it->left_nz_[y];
+		VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+		it->top_nz_[x] = it->left_nz_[y] =
+          VP8RecordCoeffTokens(ctx, res.coeff_type,
+                               res.first, res.last, res.coeffs, tokens);
+		VP8RecordCoeffs(ctx, &res);
+	  }
+	}
+
+	// U/V
+	VP8InitResidual(0, 2, enc, &res);
+	for (ch = 0; ch <= 2; ch += 2) {
+	  for (y = 0; y < 2; ++y) {
+		for (x = 0; x < 2; ++x) {
+		  const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+		  VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+		  it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+            VP8RecordCoeffTokens(ctx, 2,
+                                 res.first, res.last, res.coeffs, tokens);
+		  VP8RecordCoeffs(ctx, &res);
+		}
+	  }
+	}
+	VP8IteratorBytesToNz(it);
+	return !tokens->error_;
+  }
+
+#endif    // !DISABLE_TOKEN_BUFFER
+
+  //------------------------------------------------------------------------------
+  // ExtraInfo map / Debug function
+
+#if SEGMENT_VISU
+  static void SetBlock(uint8_t* p, int value, int size) {
+	int y;
+	for (y = 0; y < size; ++y) {
+	  memset(p, value, size);
+	  p += BPS;
+	}
+  }
+#endif
+
+  static void ResetSSE(VP8Encoder* const enc) {
+	enc->sse_[0] = 0;
+	enc->sse_[1] = 0;
+	enc->sse_[2] = 0;
+	// Note: enc->sse_[3] is managed by alpha.c
+	enc->sse_count_ = 0;
+  }
+
+  static void StoreSSE(const VP8EncIterator* const it) {
+	VP8Encoder* const enc = it->enc_;
+	const uint8_t* const in = it->yuv_in_;
+	const uint8_t* const out = it->yuv_out_;
+	// Note: not totally accurate at boundary. And doesn't include in-loop filter.
+	enc->sse_[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC);
+	enc->sse_[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC);
+	enc->sse_[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC);
+	enc->sse_count_ += 16 * 16;
+  }
+
+  static void StoreSideInfo(const VP8EncIterator* const it) {
+	VP8Encoder* const enc = it->enc_;
+	const VP8MBInfo* const mb = it->mb_;
+	WebPPicture* const pic = enc->pic_;
+
+	if (pic->stats != NULL) {
+	  StoreSSE(it);
+	  enc->block_count_[0] += (mb->type_ == 0);
+	  enc->block_count_[1] += (mb->type_ == 1);
+	  enc->block_count_[2] += (mb->skip_ != 0);
+	}
+
+	if (pic->extra_info != NULL) {
+	  uint8_t* const info = &pic->extra_info[it->x_ + it->y_ * enc->mb_w_];
+	  switch (pic->extra_info_type) {
+      case 1: *info = mb->type_; break;
+      case 2: *info = mb->segment_; break;
+      case 3: *info = enc->dqm_[mb->segment_].quant_; break;
+      case 4: *info = (mb->type_ == 1) ? it->preds_[0] : 0xff; break;
+      case 5: *info = mb->uv_mode_; break;
+      case 6: {
+        const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
+        *info = (b > 255) ? 255 : b; break;
+      }
+      case 7: *info = mb->alpha_; break;
+      default: *info = 0; break;
+	  }
+	}
+#if SEGMENT_VISU  // visualize segments and prediction modes
+	SetBlock(it->yuv_out_ + Y_OFF_ENC, mb->segment_ * 64, 16);
+	SetBlock(it->yuv_out_ + U_OFF_ENC, it->preds_[0] * 64, 8);
+	SetBlock(it->yuv_out_ + V_OFF_ENC, mb->uv_mode_ * 64, 8);
+#endif
+  }
+
+  static double GetPSNR(uint64_t mse, uint64_t size) {
+	return (mse > 0 && size > 0) ? 10. * log10(255. * 255. * size / mse) : 99;
+  }
+
+  //------------------------------------------------------------------------------
+  //  StatLoop(): only collect statistics (number of skips, token usage, ...).
+  //  This is used for deciding optimal probabilities. It also modifies the
+  //  quantizer value if some target (size, PSNR) was specified.
+
+  static void SetLoopParams(VP8Encoder* const enc, float q) {
+	// Make sure the quality parameter is inside valid bounds
+	q = Clamp(q, 0.f, 100.f);
+
+	VP8SetSegmentParams(enc, q);      // setup segment quantizations and filters
+	SetSegmentProbas(enc);            // compute segment probabilities
+
+	ResetStats(enc);
+	ResetSSE(enc);
+  }
+
+  static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
+							  int nb_mbs, int percent_delta,
+							  PassStats* const s) {
+	VP8EncIterator it;
+	uint64_t size = 0;
+	uint64_t size_p0 = 0;
+	uint64_t distortion = 0;
+	const uint64_t pixel_count = nb_mbs * 384;
+
+	VP8IteratorInit(enc, &it);
+	SetLoopParams(enc, s->q);
+	do {
+	  VP8ModeScore info;
+	  VP8IteratorImport(&it, NULL);
+	  if (VP8Decimate(&it, &info, rd_opt)) {
+		// Just record the number of skips and act like skip_proba is not used.
+		enc->proba_.nb_skip_++;
+	  }
+	  RecordResiduals(&it, &info);
+	  size += info.R + info.H;
+	  size_p0 += info.H;
+	  distortion += info.D;
+	  if (percent_delta && !VP8IteratorProgress(&it, percent_delta))
+		return 0;
+	  VP8IteratorSaveBoundary(&it);
+	} while (VP8IteratorNext(&it) && --nb_mbs > 0);
+
+	size_p0 += enc->segment_hdr_.size_;
+	if (s->do_size_search) {
+	  size += FinalizeSkipProba(enc);
+	  size += FinalizeTokenProbas(&enc->proba_);
+	  size = ((size + size_p0 + 1024) >> 11) + HEADER_SIZE_ESTIMATE;
+	  s->value = (double)size;
+	} else {
+	  s->value = GetPSNR(distortion, pixel_count);
+	}
+	return size_p0;
+  }
+
+  static int StatLoop(VP8Encoder* const enc) {
+	const int method = enc->method_;
+	const int do_search = enc->do_search_;
+	const int fast_probe = ((method == 0 || method == 3) && !do_search);
+	int num_pass_left = enc->config_->pass;
+	const int task_percent = 20;
+	const int percent_per_pass =
+      (task_percent + num_pass_left / 2) / num_pass_left;
+	const int final_percent = enc->percent_ + task_percent;
+	const VP8RDLevel rd_opt =
+      (method >= 3 || do_search) ? RD_OPT_BASIC : RD_OPT_NONE;
+	int nb_mbs = enc->mb_w_ * enc->mb_h_;
+	PassStats stats;
+	StopProfilingWatch stop_watch;
+	StartProfiling(&stop_watch);
+
+	InitPassStats(enc, &stats);
+	ResetTokenStats(enc);
+
+	// Fast mode: quick analysis pass over few mbs. Better than nothing.
+	if (fast_probe) {
+	  if (method == 3) {  // we need more stats for method 3 to be reliable.
+		nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100;
+	  } else {
+		nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50;
+	  }
+	}
+
+	while (num_pass_left-- > 0) {
+	  const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
+		(num_pass_left == 0) ||
+		(enc->max_i4_header_bits_ == 0);
+	  const uint64_t size_p0 =
+        OneStatPass(enc, rd_opt, nb_mbs, percent_per_pass, &stats);
+	  if (size_p0 == 0) return 0;
+#if (DEBUG_SEARCH > 0)
+	  printf("#%d value:%.1lf -> %.1lf   q:%.2f -> %.2f\n",
+			 num_pass_left, stats.last_value, stats.value, stats.last_q, stats.q);
+#endif
+	  if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
+		++num_pass_left;
+		enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
+		continue;                        // ...and start over
+	  }
+	  if (is_last_pass) {
+		break;
+	  }
+	  // If no target size: just do several pass without changing 'q'
+	  if (do_search) {
+		ComputeNextQ(&stats);
+		if (fabs(stats.dq) <= DQ_LIMIT) break;
+	  }
+	}
+	if (!do_search || !stats.do_size_search) {
+	  // Need to finalize probas now, since it wasn't done during the search.
+	  FinalizeSkipProba(enc);
+	  FinalizeTokenProbas(&enc->proba_);
+	}
+	VP8CalculateLevelCosts(&enc->proba_);  // finalize costs
+	StopProfiling(&stop_watch, &timeStatLoop, &countStatLoop);
+	return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
+  }
+
+  //------------------------------------------------------------------------------
+  // Main loops
+  //
+
+  static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
+
+  static int PreLoopInitialize(VP8Encoder* const enc) {
+	int p;
+	int ok = 1;
+	const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
+	const int bytes_per_parts =
+      enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
+	// Initialize the bit-writers
+	for (p = 0; ok && p < enc->num_parts_; ++p) {
+	  ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
+	}
+	if (!ok) {
+	  VP8EncFreeBitWriters(enc);  // malloc error occurred
+	  WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+	}
+	return ok;
+  }
+
+  static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
+	VP8Encoder* const enc = it->enc_;
+
+#if 1
+	if (ok) {      // Finalize the partitions, check for extra errors.
+	  int p;
+	  for (p = 0; p < enc->num_parts_; ++p) {
+		VP8BitWriterFinish(enc->parts_ + p);
+		ok &= !enc->parts_[p].error_;
+	  }
+	}
+
+	if (ok) {      // All good. Finish up.
+	  if (enc->pic_->stats != NULL) {  // finalize byte counters...
+		int i, s;
+		for (i = 0; i <= 2; ++i) {
+		  for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+			enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
+		  }
+		}
+	  }
+	  VP8AdjustFilterStrength(it);     // ...and store filter stats.
+	} else {
+	  // Something bad happened -> need to do some memory cleanup.
+	  VP8EncFreeBitWriters(enc);
+	}
+
+#endif
+	return ok;
+  }
+
+  static int PostLoopFinalizeOcl(VP8Encoder* const enc, uint64_t bit_count[4][3], int ok) {
+	if (ok) {      // Finalize the partitions, check for extra errors.
+	  int p;
+	  for (p = 0; p < enc->num_parts_; ++p) {
+		VP8BitWriterFinish(enc->parts_ + p);
+		ok &= !enc->parts_[p].error_;
+	  }
+	}
+
+	if (ok) {      // All good. Finish up.
+	  if (enc->pic_->stats != NULL) {  // finalize byte counters...
+		int i, s;
+		for (i = 0; i <= 2; ++i) {
+		  for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+			enc->residual_bytes_[i][s] = (int)((bit_count[s][i] + 7) >> 3);
+		  }
+		}
+	  }
+	  VP8AdjustFilterStrengthOcl(enc);     // ...and store filter stats.
+	} else {
+	  // Something bad happened -> need to do some memory cleanup.
+	  VP8EncFreeBitWriters(enc);
+	}
+	return ok;
+  }
+
+  //------------------------------------------------------------------------------
+  //  VP8EncLoop(): does the final bitstream coding.
+
+  static void ResetAfterSkip(VP8EncIterator* const it) {
+	if (it->mb_->type_ == 1) {
+	  *it->nz_ = 0;  // reset all predictors
+	  it->left_nz_[8] = 0;
+	} else {
+	  *it->nz_ &= (1 << 24);  // preserve the dc_nz bit
+	}
+  }
+
+  int VP8EncLoop(VP8Encoder* const enc) {
+
+	VP8EncIterator it;
+	int ok = PreLoopInitialize(enc);
+	StopProfilingWatch stop_watch;
+	StartProfiling(&stop_watch);
+
+	if (!ok) return 0;
+
+	StatLoop(enc);  // stats-collection loop
+	StatLoopFlag = 0;
+
+	VP8IteratorInit(enc, &it);
+	VP8InitFilter(&it);
+	do {
+	  VP8ModeScore info;
+	  const int dont_use_skip = !enc->proba_.use_skip_proba_;
+	  const VP8RDLevel rd_opt = enc->rd_opt_level_;
+
+	  VP8IteratorImport(&it, NULL);
+	  // Warning! order is important: first call VP8Decimate() and
+	  // *then* decide how to code the skip decision if there's one.
+	  if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
+		CodeResiduals(it.bw_, &it, &info);
+	  } else {   // reset predictors after a skip
+		ResetAfterSkip(&it);
+	  }
+	  StoreSideInfo(&it);
+	  VP8StoreFilterStats(&it);
+	  VP8IteratorExport(&it);
+	  ok = VP8IteratorProgress(&it, 20);
+	  VP8IteratorSaveBoundary(&it);
+	} while (ok && VP8IteratorNext(&it));
+
+	StopProfiling(&stop_watch, &timeEncLoop, &countEncLoop);
+	return PostLoopFinalize(&it, ok);
+  }
+
+  int VP8EncLoopOcl(VP8Encoder* const enc) {
+	return 0;
+  }
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+#define MIN_COUNT 96  // minimum number of macroblocks before updating stats
+ 
+  int VP8EncTokenLoop(VP8Encoder* const enc) {
+ 
+	printf(" In EncTokenLoop in frame.c \n");
+
+	// Roughly refresh the proba eight times per pass
+	int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
+	int num_pass_left = enc->config_->pass;
+	const int do_search = 0;//enc->do_search_;
+  
+	VP8EncIterator it;
+	VP8EncProba* const proba = &enc->proba_;
+	const VP8RDLevel rd_opt = enc->rd_opt_level_;
+	const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384;
+  
+	PassStats stats;
+	int ok;
+	StopProfilingWatch stop_watch;
+	StartProfiling(&stop_watch);
+
+	InitPassStats(enc, &stats);
+	ok = PreLoopInitialize(enc);
+	if (!ok) return 0;
+
+	if (max_count < MIN_COUNT) max_count = MIN_COUNT;
+
+	assert(enc->num_parts_ == 1);
+	assert(enc->use_tokens_);
+	assert(proba->use_skip_proba_ == 0);
+	assert(rd_opt >= RD_OPT_BASIC);   // otherwise, token-buffer won't be useful
+	assert(num_pass_left > 0);
+
+	while (ok && num_pass_left-- > 0) {
+
+	  const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
+		(num_pass_left == 0) ||
+		(enc->max_i4_header_bits_ == 0);
+	  uint64_t size_p0 = 0;
+	  uint64_t distortion = 0;
+	  int cnt = max_count;
+
+	  VP8IteratorInit(enc, &it);
+	  SetLoopParams(enc, stats.q);
+
+	  if (is_last_pass) {
+		ResetTokenStats(enc);
+		VP8InitFilter(&it);  // don't collect stats until last pass (too costly)
+	  }
+
+	  VP8TBufferClear(&enc->tokens_);
+
+#ifdef USE_C_KERNEL
+	  
+	  int i, j, index;
+
+	  cl_int err;
+	  EncloopInputData input_data;
+	  EncloopSegmentData segment_data;
+	  EncLoopOutputData output_data;
+	  VP8TBufferKernel output_tokens;
+
+	  VP8EncMatrix matrix_y1[NUM_MB_SEGMENTS];
+	  VP8EncMatrix matrix_y2[NUM_MB_SEGMENTS];
+	  VP8EncMatrix matrix_uv[NUM_MB_SEGMENTS];
+
+	  const int xsize = enc->pic_->width;
+	  const int ysize = enc->pic_->height;
+
+	  const int mb_w = (xsize + 15) >> 4; // nb of blocks in x 
+	  const int mb_h = (ysize + 15) >> 4; // nb of blocks in y
+
+	  const int preds_w = 4 * mb_w + 1; // prediction size in x
+	  const int preds_h = 4 * mb_h + 1; // prediction size in y
+
+	  const int y_width = xsize;
+	  const int y_height = ysize;
+
+	  const int uv_width = (xsize + 1) >> 1;
+	  const int uv_height = (ysize + 1) >> 1;
+
+	  const int y_stride = y_width;
+	  const int uv_stride = uv_width;
+
+	  const int expand_yheight = RoundUp(ysize, 16);
+	  const int expand_uvheight = RoundUp(uv_height, 8);
+
+	  uint64_t y_size = 0;
+	  uint64_t uv_size = 0;
+
+	  int mb_size = 0;
+	  int preds_size = 0;
+	  int nz_size = 0;
+	  int top_data_size = 0;
+	  int lf_stats_size = 0;
+	  int quant_matrix_size = 0;
+	  int coeffs_size = 0;
+	  int stats_size = 0;
+	  int level_cost_size = 0;
+	  int bw_buf_size = 0;
+	  int sse_size = 0;
+	  int block_count_size = 0;
+	  int extra_info_size = 0;
+	  int max_edge_size = 0;
+	  int bit_count_size = 0;
+	  int expand_y_size = 0;
+	  int expand_uv_size = 0;
+	  int input_size = 0;
+
+	  // bits size
+	  y_size = y_width * y_height * sizeof(uint8_t);
+	  uv_size = uv_width * uv_height * sizeof(uint8_t);
+	  mb_size = mb_w * mb_h * sizeof(uint8_t);
+	  preds_size = preds_w * preds_h * sizeof(uint8_t) + preds_w + 1;
+	  nz_size = (mb_w + 1 + 1) * sizeof(uint32_t) /*+ WEBP_ALIGN_CST*/;
+	  top_data_size = mb_w * 16 * sizeof(uint8_t);
+	  lf_stats_size = NUM_MB_SEGMENTS * MAX_LF_LEVELS * sizeof(double);
+	  quant_matrix_size = sizeof(VP8EncMatrix);
+	  coeffs_size = NUM_TYPES * NUM_BANDS * NUM_CTX * NUM_PROBAS * sizeof(uint8_t);
+	  stats_size = NUM_TYPES * NUM_BANDS * NUM_CTX * NUM_PROBAS * sizeof(uint32_t);
+	  level_cost_size = NUM_TYPES * NUM_BANDS * NUM_CTX * (MAX_VARIABLE_LEVEL + 1) * sizeof(uint16_t);
+	  bw_buf_size = 408000 * sizeof(uint8_t);
+	  sse_size = 4 * sizeof(uint64_t);
+	  block_count_size = 3 * sizeof(int);
+	  extra_info_size = mb_w * mb_h * sizeof(uint8_t);
+	  max_edge_size = NUM_MB_SEGMENTS * sizeof(int);
+	  bit_count_size = 4 * 3 * sizeof(uint64_t);
+	  input_size = sizeof(EncloopInputData);
+	  int output_size = MAX_NUM_MB_W * MAX_NUM_MB_H * 1024 * sizeof(uint16_t); // ??   
+
+	  int output_tokens_size = sizeof(uint16_t) * PAGE_COUNT * TOKENS_COUNT_PER_PAGE;
+
+	  input_data.width = xsize;
+	  input_data.height = ysize;
+	  input_data.filter_sharpness = enc->config_->filter_sharpness;
+	  input_data.show_compressed = enc->config_->show_compressed;
+	  input_data.extra_info_type = enc->pic_->extra_info_type;
+	  input_data.stats_add = enc->pic_->stats;
+	  input_data.simple = enc->filter_hdr_.simple_;
+	  input_data.num_parts = enc->num_parts_;
+	  input_data.max_i4_header_bits = enc->max_i4_header_bits_;
+
+	  if (enc->lf_stats_ == NULL) {
+		input_data.lf_stats_status = 0;
+	  } else {
+		input_data.lf_stats_status = 1;
+	  }
+
+	  input_data.use_skip_proba = !enc->proba_.use_skip_proba_;
+	  input_data.method = enc->method_;
+	  input_data.rd_opt = (int)enc->rd_opt_level_;
+
+	  for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+
+		segment_data.quant[i] = enc->dqm_[i].quant_;
+		segment_data.fstrength[i] = enc->dqm_[i].fstrength_;
+		segment_data.max_edge[i] = enc->dqm_[i].max_edge_;
+		segment_data.min_disto[i] = enc->dqm_[i].min_disto_;
+		segment_data.lambda_i16[i] = enc->dqm_[i].lambda_i16_;
+		segment_data.lambda_i4[i] = enc->dqm_[i].lambda_i4_;
+		segment_data.lambda_uv[i] = enc->dqm_[i].lambda_uv_;
+		segment_data.lambda_mode[i] = enc->dqm_[i].lambda_mode_;
+		segment_data.tlambda[i] = enc->dqm_[i].tlambda_;
+		segment_data.lambda_trellis_i16[i] = enc->dqm_[i].lambda_trellis_i16_;
+		segment_data.lambda_trellis_i4[i] = enc->dqm_[i].lambda_trellis_i4_;
+		segment_data.lambda_trellis_uv[i] = enc->dqm_[i].lambda_trellis_uv_;
+	  }
+
+	  expand_y_size = (expand_yheight - ysize) * xsize;
+	  uint8_t expand_y[expand_y_size];
+	  if (expand_yheight > ysize) {
+		for (i = 0; i < expand_yheight - ysize; i++) {
+		  memcpy(expand_y + i * xsize, enc->pic_->y + xsize * (ysize - 1), xsize);
+		}
+	  }
+
+	  // copy expanded block
+	  expand_uv_size = (expand_uvheight - uv_height) * uv_width;
+	  uint8_t expand_u[expand_uv_size];
+	  uint8_t expand_v[expand_uv_size];
+	  if (expand_uvheight > uv_height) {
+		for (i = 0; i < expand_uvheight - uv_height; i++) {
+		  memcpy(expand_u + i * uv_width, enc->pic_->u + uv_width * (uv_height - 1), uv_width);
+		  memcpy(expand_v + i * uv_width, enc->pic_->v + uv_width * (uv_height - 1), uv_width);
+		}
+	  }
+
+	  uint8_t mb_info[5 * mb_w * mb_h];
+	  for (index = 0; index < mb_size; index++) {
+		mb_info[5 * index + 0] = enc->mb_info_[index].type_;
+		mb_info[5 * index + 1] = enc->mb_info_[index].uv_mode_;
+		mb_info[5 * index + 2] = enc->mb_info_[index].skip_;
+		mb_info[5 * index + 3] = enc->mb_info_[index].segment_;
+		mb_info[5 * index + 4] = enc->mb_info_[index].alpha_;
+	  }
+
+	  for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+		VP8Matrix* matrix = &(enc->dqm_[i].y1_);
+		for (j = 0; j < 16; j++) {
+		  matrix_y1[i].q_[j] = matrix->q_[j];
+		  matrix_y1[i].iq_[j] = matrix->iq_[j];
+		  matrix_y1[i].bias_[j] = matrix->bias_[j];
+		  matrix_y1[i].zthresh_[j] = matrix->zthresh_[j];
+		  matrix_y1[i].sharpen_[j] = matrix->sharpen_[j];
+		}
+	  }
+
+	  for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+		VP8Matrix* matrix = &(enc->dqm_[i].y2_);
+		for (j = 0; j < 16; j++) {
+		  matrix_y2[i].q_[j] = matrix->q_[j];
+		  matrix_y2[i].iq_[j] = matrix->iq_[j];
+		  matrix_y2[i].bias_[j] = matrix->bias_[j];
+		  matrix_y2[i].zthresh_[j] = matrix->zthresh_[j];
+		  matrix_y2[i].sharpen_[j] = matrix->sharpen_[j];
+		}
+	  }
+
+	  for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+		VP8Matrix* matrix = &(enc->dqm_[i].uv_);
+		for (j = 0; j < 16; j++) {
+		  matrix_uv[i].q_[j] = matrix->q_[j];
+		  matrix_uv[i].iq_[j] = matrix->iq_[j];
+		  matrix_uv[i].bias_[j] = matrix->bias_[j];
+		  matrix_uv[i].zthresh_[j] = matrix->zthresh_[j];
+		  matrix_uv[i].sharpen_[j] = matrix->sharpen_[j];
+		}
+	  }
+
+	  output_data.range = enc->parts_[0].range_;
+	  output_data.value = enc->parts_[0].value_;
+	  output_data.run = enc->parts_[0].run_;
+	  output_data.nb_bits = enc->parts_[0].nb_bits_;
+	  output_data.pos = enc->parts_[0].pos_;
+	  output_data.max_pos = enc->parts_[0].max_pos_;
+	  output_data.error = enc->parts_[0].error_;
+
+	  uint8_t y_top[mb_w * 16];
+	  uint8_t uv_top[mb_w * 16];
+
+	  memset(y_top, 127, top_data_size);
+	  memset(uv_top, 127, top_data_size);
+
+	  int max_edge_data[NUM_MB_SEGMENTS];
+	  for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+		max_edge_data[i] = enc->dqm_[i].max_edge_;
+	  }
+	  uint64_t bit_count[4][3];
+
+	  size_t globalSize[] = {1, 1, 1};
+	  size_t localSize[] = {1, 1, 1};
+
+
+	  // copy buffer
+
+	  err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.input, CL_TRUE, 0, sizeof(EncloopInputData), &input_data, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+	  }
+
+	  err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.y, CL_TRUE, 0, y_size, enc->pic_->y, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+	  }
+	  /*
+		if (expand_yheight > y_height) {
+		err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.y, CL_TRUE, y_size, expand_y_size, expand_y, 0, NULL, NULL);
+		if(CL_SUCCESS != err) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+		}
+		}
+	  */
+	  err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.u, CL_TRUE, 0, uv_size, enc->pic_->u, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+	  }
+	  /*
+		if (expand_uvheight > uv_height) {
+		err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.u, CL_TRUE, uv_size, expand_uv_size, expand_u, 0, NULL, NULL);
+		if(CL_SUCCESS != err) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+		}
+		}
+	  */
+	  err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.v, CL_TRUE, 0, uv_size, enc->pic_->v, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+	  }
+	  /*
+		if (expand_uvheight > uv_height) {
+		err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.v, CL_TRUE, uv_size, expand_uv_size, expand_v, 0, NULL, NULL);
+		if(CL_SUCCESS != err) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+		}
+		}
+	  */
+	  /* err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.output_tokens, CL_TRUE, 0, output_tokens_size, output_tokens.tokens_, 0, NULL, NULL); */
+	  /* if(CL_SUCCESS != err) { */
+	  /* 	fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err)); */
+	  /* 	ok = 0; */
+	  /* 	goto Err; */
+	  /* } */
+
+
+	  // *********************************** run kernel ********************************
+
+	  err = clEnqueueNDRangeKernel(hardware.mQueue, encloop.mKernel, 1, 0,
+								   globalSize, localSize, 0, NULL, NULL);
+	  if (err != CL_SUCCESS) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+	  }
+
+	  output_size = MAX_NUM_MB_W * MAX_NUM_MB_H * 1024 * sizeof(uint16_t); // ??   
+
+	  // *************************************************************************
+
+
+	  // read buffer from device
+
+	  fprintf(stderr, "start enctokenloop clFinish\n");
+	  err = clFinish(hardware.mQueue);
+	  if (err != CL_SUCCESS) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+	  }
+	  fprintf(stderr, "stop enctokenloop clFinish\n");
+
+	  err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.y, CL_TRUE, 0, y_size, enc->pic_->y, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+		fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+		ok = 0;
+		goto Err;
+	  }
+
+	  err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.u, CL_TRUE, 0, uv_size, enc->pic_->u, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+	  	fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  	ok = 0;
+	  	goto Err;
+	  }
+
+	  err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.v, CL_TRUE, 0, uv_size, enc->pic_->v, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+	  	fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  	ok = 0;
+	  	goto Err;
+	  }
+
+	  /* err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.output, CL_TRUE, 0, output_size, output_tokens.tokens_, 0, NULL, NULL); */
+	  /* if(CL_SUCCESS != err) { */
+	  /* 	fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err)); */
+	  /* 	ok = 0; */
+	  /* 	goto Err; */
+	  /* } */
+
+	  /* output_tokens.error_ = output_data.error_; */
+	  /* enc->tokens_.left_ = output_data.left_; */
+	  /* enc->tokens_.page_size_ = output_data.page_size_; */
+	  /* enc->tokens_.error_ = output_data.error_; */
+	  /* ReadTokenFromKernel(&enc->tokens_, &output_tokens); */
+
+#else
+	  int index = 0;
+
+#if TOKEN_RECONSTRUCT
+	  VP8TBufferKernel tokens_;
+	  VP8TBufferKernelInit(&tokens_, enc->tokens_.page_size_);
+#endif
+
+	  printf("TOKEN_RECONSTRUCT :%d\n", TOKEN_RECONSTRUCT);
+
+	  do {
+		index++;
+		VP8ModeScore info;
+		VP8IteratorImport(&it, NULL);
+		if (--cnt < 0) {
+		  // printf("index:%d cnt:%d\n", index, cnt);
+		  // debug_log_stats_p(proba);
+		  FinalizeTokenProbas(proba);
+		  VP8CalculateLevelCosts(proba);  // refresh cost tables for rd-opt
+		  cnt = max_count;
+		}
+
+		VP8Decimate(&it, &info, rd_opt);
+#if TOKEN_RECONSTRUCT
+		ok = RecordTokens(&it, &info, &tokens_);
+#else
+		ok = RecordTokens(&it, &info, &enc->tokens_);
+#endif
+		// if (!ok) {
+		//   WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+		//   break;
+		// }
+		// size_p0 += info.H;
+		// distortion += info.D;
+		// if (is_last_pass) {
+		//   StoreSideInfo(&it);
+		//   VP8StoreFilterStats(&it);
+		//   VP8IteratorExport(&it);
+		//   ok = VP8IteratorProgress(&it, 20);
+		// }
+		VP8IteratorSaveBoundary(&it);
+	  } while (/*0*/ok && VP8IteratorNext(&it));// TODO
+
+	  debug_tokens(&enc->tokens_, &it);
+#endif
+
+#if TOKEN_RECONSTRUCT
+	  ReadTokenFromKernel(&enc->tokens_, &tokens_);
+#endif
+	  if (!ok) break;
+	  debug_probas(&enc->proba_);
+
+#if 1
+	  size_p0 += enc->segment_hdr_.size_;
+	  // printf("stats.do_size_search:%d\n", stats.do_size_search);
+	  if (stats.do_size_search) {
+		uint64_t size = FinalizeTokenProbas(&enc->proba_);
+		size += VP8EstimateTokenSize(&enc->tokens_,
+									 (const uint8_t*)proba->coeffs_);
+		size = (size + size_p0 + 1024) >> 11;  // -> size in bytes
+		size += HEADER_SIZE_ESTIMATE;
+		stats.value = (double)size;
+	  } else {  // compute and store PSNR
+		stats.value = GetPSNR(distortion, pixel_count);
+	  }
+
+#if (DEBUG_SEARCH > 0)
+	  printf("#%2d metric:%.1lf -> %.1lf   last_q=%.2lf q=%.2lf dq=%.2lf\n",
+			 num_pass_left, stats.last_value, stats.value,
+			 stats.last_q, stats.q, stats.dq);
+#endif
+	  // printf("size_p0:%d PARTITION0_SIZE_LIMIT:%d\n", size_p0, PARTITION0_SIZE_LIMIT);
+	  if (size_p0 > PARTITION0_SIZE_LIMIT) {
+		++num_pass_left;
+		enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
+		continue;                        // ...and start over
+	  }
+	  if (is_last_pass) {
+		break;   // done
+	  }
+	  if (do_search) {
+		ComputeNextQ(&stats);  // Adjust q
+	  }
+#endif
+	}
+#if 1
+	if (ok) {
+	  if (!stats.do_size_search) {
+		FinalizeTokenProbas(&enc->proba_);
+	  }
+	  ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
+						 (const uint8_t*)proba->coeffs_, 1);
+	}
+	ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+	StopProfiling(&stop_watch, &timeEncTokenLoop, &countEncTokenLoop);
+#endif
+	return PostLoopFinalize(&it, ok);
+	// return 1;
+
+#ifdef USE_C_KERNEL
+
+  Err:
+	releaseKernel(encloop);
+	clReleaseMemObject(enclooppara.input);
+	clReleaseMemObject(enclooppara.y);
+	clReleaseMemObject(enclooppara.u);
+	clReleaseMemObject(enclooppara.v);
+	/* clReleaseMemObject(enclooppara.mb_info); */
+	/* clReleaseMemObject(enclooppara.preds); */
+	/* clReleaseMemObject(enclooppara.nz); */
+	/* clReleaseMemObject(enclooppara.y_top); */
+	/* clReleaseMemObject(enclooppara.uv_top); */
+	/* clReleaseMemObject(enclooppara.quant_matrix); */
+	/* clReleaseMemObject(enclooppara.coeffs); */
+	/* clReleaseMemObject(enclooppara.stats); */
+	/* clReleaseMemObject(enclooppara.level_cost); */
+	/* clReleaseMemObject(enclooppara.bw_buf); */
+	/* clReleaseMemObject(enclooppara.sse); */
+	/* clReleaseMemObject(enclooppara.block_count); */
+	/* clReleaseMemObject(enclooppara.extra_info); */
+	/* clReleaseMemObject(enclooppara.max_edge); */
+	/* clReleaseMemObject(enclooppara.bit_count); */
+	/* clReleaseMemObject(enclooppara.sse_count); */
+	/* clReleaseMemObject(enclooppara.output_data); */
+	/* clReleaseMemObject(enclooppara.output_tokens); */
+	clReleaseMemObject(enclooppara.output);
+
+	return ok;
+#endif
+
+  }
+  //-------------------------------------------------------------------------------------//
+
+
+  static int RecordTokens_nrd2(
+							   VP8Encoder* const enc,
+							   ap_NoneZero* ap_nz,
+							   int x_, int y_,int type_,
+							   hls::stream< ap_int<WD_LEVEL*16> >* str_level_dc,
+							   hls::stream< ap_int<WD_LEVEL*16> >* str_level_y,
+							   hls::stream< ap_int<WD_LEVEL*16> >* str_level_uv,
+							   VP8TBuffer* const tokens) {
+    int x, y, ch;
+    VP8Residual res;
+    //VP8Encoder* const enc = it->enc_;
+
+    //VP8IteratorNzToBytes(it);
+    ap_uint<9> ap_top_nz = ap_nz->load_top9(x_, y_);
+    ap_uint<9> ap_left_nz = ap_nz->load_left9(x_);
+    int top_nz_[9] ;//=ap_top_nz[i];
+    int left_nz_[9];//= ap_left_nz[i];
+    for(int i=0;i<9;i++){
+  	  top_nz_[i] =ap_top_nz[i];
+  	  left_nz_[i]= ap_left_nz[i];
+    }
+    ap_int<WD_LEVEL*16> tmp16 = str_level_dc->read();
+    if (type_ == 1) {   // i16x16
+      const int ctx = top_nz_[8] + left_nz_[8];
+      VP8InitResidual(0, 1, enc, &res);
+      short int y_dc_levels[16];
+      CPY16(y_dc_levels, tmp16, WD_LEVEL);
+      VP8SetResidualCoeffs(y_dc_levels, &res);
+	  //    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+      top_nz_[8] = left_nz_[8] =
+		VP8RecordCoeffTokens(ctx, 1,
+							 res.first, res.last, res.coeffs, tokens);
+      VP8RecordCoeffs(ctx, &res);
+      VP8InitResidual(1, 0, enc, &res);
+    } else {
+      VP8InitResidual(0, 3, enc, &res);
+    }
+
+    // luma-AC
+    for (y = 0; y < 4; ++y) {
+      for (x = 0; x < 4; ++x) {
+        const int ctx = top_nz_[x] + left_nz_[y];
+		short int y_ac_levels[16];
+		ap_int<WD_LEVEL*16> tmp = str_level_y->read();
+		CPY16(y_ac_levels,tmp,WD_LEVEL);
+        //VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+        VP8SetResidualCoeffs(y_ac_levels, &res);
+        top_nz_[x] = left_nz_[y] =
+		  VP8RecordCoeffTokens(ctx, res.coeff_type,
+							   res.first, res.last, res.coeffs, tokens);
+        VP8RecordCoeffs(ctx, &res);
+      }
+    }
+
+    // U/V
+    VP8InitResidual(0, 2, enc, &res);
+    for (ch = 0; ch <= 2; ch += 2) {
+      for (y = 0; y < 2; ++y) {
+        for (x = 0; x < 2; ++x) {
+          const int ctx = top_nz_[4 + ch + x] + left_nz_[4 + ch + y];
+          short int uv_levels[16];
+          ap_int<WD_LEVEL*16> tmp = str_level_uv->read();
+          CPY16(uv_levels,tmp,WD_LEVEL);
+          //VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+          VP8SetResidualCoeffs(uv_levels, &res);
+          top_nz_[4 + ch + x] = left_nz_[4 + ch + y] =
+			VP8RecordCoeffTokens(ctx, 2, res.first, res.last, res.coeffs, tokens);
+          VP8RecordCoeffs(ctx, &res);
+        }
+      }
+    }
+    //VP8IteratorBytesToNz(it);
+    uint32_t nz=0;
+    nz |= (top_nz_[0] << 12) | (top_nz_[1] << 13);
+	nz |= (top_nz_[2] << 14) | (top_nz_[3] << 15);
+	nz |= (top_nz_[4] << 18) | (top_nz_[5] << 19);
+	nz |= (top_nz_[6] << 22) | (top_nz_[7] << 23);
+	nz |= (top_nz_[8] << 24);  // we propagate the _top_ bit, esp. for intra4
+	// left
+	nz |= (left_nz_[0] << 3) | (left_nz_[1] << 7);
+	nz |= (left_nz_[2] << 11);
+	nz |= (left_nz_[4] << 17) | (left_nz_[6] << 21);
+
+	// VP8IteratorNzToBytes(it);
+    ap_nz->left_nz[8] = left_nz_[8];
+    ap_nz->nz_current = nz;//*it->nz_;
+    ap_nz->store_nz(x_);
+	//  ap_uint<25> mask=0x1eef888 & ap_nz->nz_current;
+    return !tokens->error_;
+  }
+
+
+  static int RecordTokens_nrd2(
+							   VP8Encoder* const enc,
+							   ap_NoneZero* ap_nz,
+							   int x_, int y_,int type_,
+							   short int* y_dc_levels,
+							   short int* y_ac_levels,
+							   short int* uv_levels,
+							   VP8TBuffer* const tokens) {
+    int x, y, ch;
+    VP8Residual res;
+    //VP8Encoder* const enc = it->enc_;
+
+    //VP8IteratorNzToBytes(it);
+    ap_uint<9> ap_top_nz = ap_nz->load_top9(x_, y_);
+    ap_uint<9> ap_left_nz = ap_nz->load_left9(x_);
+    int top_nz_[9] ;//=ap_top_nz[i];
+    int left_nz_[9];//= ap_left_nz[i];
+    for(int i=0;i<9;i++){
+  	  top_nz_[i] =ap_top_nz[i];
+  	  left_nz_[i]= ap_left_nz[i];
+    }
+    //ap_int<WD_LEVEL*16> tmp16 = str_level_dc->read();
+    if (type_ == 1) {   // i16x16
+      const int ctx = top_nz_[8] + left_nz_[8];
+      VP8InitResidual(0, 1, enc, &res);
+      //short int y_dc_levels[16];
+      //CPY16(y_dc_levels, tmp16, WD_LEVEL);
+      VP8SetResidualCoeffs(y_dc_levels, &res);
+	  //    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+      top_nz_[8] = left_nz_[8] =
+		VP8RecordCoeffTokens(ctx, 1,
+							 res.first, res.last, res.coeffs, tokens);
+      VP8RecordCoeffs(ctx, &res);
+      VP8InitResidual(1, 0, enc, &res);
+    } else {
+      VP8InitResidual(0, 3, enc, &res);
+    }
+
+    // luma-AC
+    for (y = 0; y < 4; ++y) {
+      for (x = 0; x < 4; ++x) {
+        const int ctx = top_nz_[x] + left_nz_[y];
+		//short int y_ac_levels[16];
+        //  ap_int<WD_LEVEL*16> tmp = str_level_y->read();
+        //  CPY16(y_ac_levels,tmp,WD_LEVEL);
+        //VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+        VP8SetResidualCoeffs(y_ac_levels, &res);y_ac_levels+=16;
+        top_nz_[x] = left_nz_[y] =
+		  VP8RecordCoeffTokens(ctx, res.coeff_type,
+							   res.first, res.last, res.coeffs, tokens);
+        VP8RecordCoeffs(ctx, &res);
+      }
+    }
+
+    // U/V
+    VP8InitResidual(0, 2, enc, &res);
+    for (ch = 0; ch <= 2; ch += 2) {
+      for (y = 0; y < 2; ++y) {
+        for (x = 0; x < 2; ++x) {
+          const int ctx = top_nz_[4 + ch + x] + left_nz_[4 + ch + y];
+		  //  short int uv_levels[16];
+		  //  ap_int<WD_LEVEL*16> tmp = str_level_uv->read();
+		  //  CPY16(uv_levels,tmp,WD_LEVEL);
+          //VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+          VP8SetResidualCoeffs(uv_levels, &res);uv_levels+=16;
+          top_nz_[4 + ch + x] = left_nz_[4 + ch + y] =
+			VP8RecordCoeffTokens(ctx, 2, res.first, res.last, res.coeffs, tokens);
+          VP8RecordCoeffs(ctx, &res);
+        }
+      }
+    }
+    //VP8IteratorBytesToNz(it);
+    uint32_t nz=0;
+    nz |= (top_nz_[0] << 12) | (top_nz_[1] << 13);
+	nz |= (top_nz_[2] << 14) | (top_nz_[3] << 15);
+	nz |= (top_nz_[4] << 18) | (top_nz_[5] << 19);
+	nz |= (top_nz_[6] << 22) | (top_nz_[7] << 23);
+	nz |= (top_nz_[8] << 24);  // we propagate the _top_ bit, esp. for intra4
+	// left
+	nz |= (left_nz_[0] << 3) | (left_nz_[1] << 7);
+	nz |= (left_nz_[2] << 11);
+	nz |= (left_nz_[4] << 17) | (left_nz_[6] << 21);
+
+	// VP8IteratorNzToBytes(it);
+    ap_nz->left_nz[8] = left_nz_[8];
+    ap_nz->nz_current = nz;//*it->nz_;
+    ap_nz->store_nz(x_);
+	//  ap_uint<25> mask=0x1eef888 & ap_nz->nz_current;
+    return !tokens->error_;
+  }
+
+
+  void Set_AllPicInfo( AllPicInfo* des, VP8Encoder* const enc)
+  {
+	const WebPPicture* const pic = enc->pic_;
+
+	des->mb_w = enc->mb_w_;//
+
+  	des->id_pic;//0
+  	des->cnt_line_mb;
+  	des->y_stride       = pic->y_stride;
+  	des->uv_stride      = pic->uv_stride;
+  	des->width          = pic->width;
+  	des->height         = pic->height;
+  	des->mb_w           = enc->mb_w_;//
+  	des->mb_h           = enc->mb_h_;//5
+  	VP8SegmentInfo* dqm = &enc->dqm_[0];//
+  	des->seg_lambda_p16 = dqm->lambda_i16_;
+  	des->seg_lambda_p44 = dqm->lambda_i4_;
+  	des->seg_tlambda    = dqm->tlambda_;
+  	des->seg_lambda_uv  = dqm->lambda_uv_;
+  	des->seg_tlambda_m  = dqm->lambda_mode_;//10
+
+  	des->seg_y1_q_0      = dqm->y1_.q_[0];     // quantizer steps
+  	des->seg_y1_q_n      = dqm->y1_.q_[1];
+  	des->seg_y1_iq_0     = dqm->y1_.iq_[0];    // reciprocals fixed point.
+  	des->seg_y1_iq_n     = dqm->y1_.iq_[1];
+  	des->seg_y1_bias_0   = dqm->y1_.bias_[0];  // rounding bias
+  	des->seg_y1_bias_n   = dqm->y1_.bias_[1];//16
+
+  	des->seg_y2_q_0      = dqm->y2_.q_[0];     // quantizer steps
+  	des->seg_y2_q_n      = dqm->y2_.q_[1];
+  	des->seg_y2_iq_0     = dqm->y2_.iq_[0];    // reciprocals fixed point.
+  	des->seg_y2_iq_n     = dqm->y2_.iq_[1];
+  	des->seg_y2_bias_0   = dqm->y2_.bias_[0];  // rounding bias
+  	des->seg_y2_bias_n   = dqm->y2_.bias_[1];//22
+
+  	des->seg_uv_q_0      = dqm->uv_.q_[0];     // quantizer steps
+  	des->seg_uv_q_n      = dqm->uv_.q_[1];
+  	des->seg_uv_iq_0     = dqm->uv_.iq_[0];    // reciprocals fixed point.
+  	des->seg_uv_iq_n     = dqm->uv_.iq_[1];
+  	des->seg_uv_bias_0   = dqm->uv_.bias_[0];  // rounding bias
+  	des->seg_uv_bias_n   = dqm->uv_.bias_[1];//28
+  	for(int i=0; i<16 ;i++)
+	  {
+  		des->seg_y1_sharpen[i] = dqm->y1_.sharpen_[i];
+  		des->seg_uv_sharpen[i] = dqm->uv_.sharpen_[i];
+	  }
+  }
+  int VP8EncTokenLoop_ryanw(VP8Encoder* const enc) {
+
+  	const WebPPicture* const pic = enc->pic_;
+	VP8EncIterator it;
+	PassStats stats;
+	int ok;
+	InitPassStats(enc, &stats);
+	ok = PreLoopInitialize(enc);
+	if (!ok) return 0;
+	VP8IteratorInit(enc, &it);
+	SetLoopParams(enc, stats.q);
+	ResetTokenStats(enc);
+	VP8TBufferClear(&enc->tokens_);
+
+	it.do_trellis_ = 0;//(rd_opt RD_OPT_TRELLIS_ALL);
+	/*************************************************************/
+	/* Preparing data for FPGA                                   */
+	/*************************************************************/
+
+	AllPicInfo picinfo;//Picture information
+
+	Set_AllPicInfo( &picinfo, enc);//Set picture information
+	int size_info = sizeof(AllPicInfo);
+	//int* p_info = malloc( size_info*4);//244 bytes now
+	//interface of kernel top
+	/*--Following should be the interface of kernel -----------------------------------------------------------------*/
+	int p_info[128];
+	uint8_t* ysrc;
+	uint8_t* usrc;
+	uint8_t* vsrc;
+	int16_t* pout_level;
+	uint8_t* pout_out;
+	uint8_t* pout_pred;
+	uint8_t* pout_ret;
+	//   int16_t pout_mb[512];//for level, pred and ret,
+	//   uint8_t pout_mb_out[384];
+	/*-------------------------------------------------*/
+
+	int num_mb = picinfo.mb_w * picinfo.mb_h;
+	ysrc       = malloc( num_mb * 16 * 16 * sizeof(uint8_t));
+	usrc       = malloc( num_mb *  4 * 16 * sizeof(uint8_t));
+	vsrc       = malloc( num_mb *  4 * 16 * sizeof(uint8_t));
+	pout_level = malloc( num_mb *512 *      sizeof(int16_t));
+	//for pout_level, we plan to put all data of one MB into 1K Byte space and send it to DDR.
+	//Thus no need to prepare a buffer for coefficients and other data which is 2 times bigger than input buffer
+	pout_out   = malloc( num_mb * 24 * 16 * sizeof(uint8_t));
+	//   pout_pred  = malloc( num_mb * sizeof(uint64_t));
+	//   pout_ret   = malloc( num_mb * sizeof(uint8_t));
+
+	//for testing copy picture data from host to FPGA, should be replaced by formal code
+	memcpy( (void*)ysrc,   (void*)(pic->y), picinfo.y_stride  *   picinfo.height);
+	memcpy( (void*)usrc,   (void*)(pic->u), picinfo.uv_stride * ((picinfo.height+1)>>1));
+	memcpy( (void*)vsrc,   (void*)(pic->v), picinfo.uv_stride * ((picinfo.height+1)>>1));
+	memcpy( (void*)p_info, (void*)(&picinfo), size_info);
+
+	FILE* fp_ysrc=fopen("fp_ysrc.dat", "wb");
+	FILE* fp_usrc=fopen("fp_usrc.dat", "wb");
+	FILE* fp_vsrc=fopen("fp_vsrc.dat", "wb");
+	FILE* fp_p_info=fopen("fp_p_info.dat", "wb");
+	fwrite( (void*)ysrc, 		1, picinfo.y_stride  *   picinfo.height, 			fp_ysrc);
+	fwrite( (void*)usrc,   	1, picinfo.uv_stride * ((picinfo.height+1)>>1), 	fp_usrc);
+	fwrite( (void*)vsrc,    	1, picinfo.uv_stride * ((picinfo.height+1)>>1), 	fp_vsrc);
+	fwrite( (void*)p_info,  	1, size_info,									 	fp_p_info);
+	fclose(fp_ysrc);
+	fclose(fp_usrc);
+	fclose(fp_vsrc);
+	fclose(fp_p_info);
+
+	hls::stream< ap_uint<WD_PIX*16> >  str_out;
+
+	kernel_IntraPredLoop2(
+						  p_info,
+						  ysrc,
+						  usrc,
+						  vsrc,
+#ifdef _KEEP_PSNR_
+						  &str_out,
+#endif
+						  pout_level//(int32_t*)pout_level
+						  );
+
+	FILE* fp_level=fopen("fp_level.dat", "wa");
+	fwrite( (void*)pout_level, 1, num_mb *512 * sizeof(int16_t), fp_level);
+	fclose(fp_level);
+
+	int16_t* pt=pout_level;
+	ap_NoneZero ap_nz;
+	do {
+	  ap_uint<LG2_MAX_NUM_MB_W> x_mb = it.x_;
+	  ap_uint<LG2_MAX_NUM_MB_W> y_mb = it.y_;
+#ifdef _KEEP_PSNR_
+	  VP8IteratorImport( &it, NULL);
+#endif
+	  ap_uint<6> ret     = (ap_uint<6>)pt[416];
+	  it.mb_->uv_mode_   = ret(3,0);//it_m.ap_uv_mode_c;
+	  it.mb_->type_      = ret(4,4);
+	  it.mb_->skip_      = ret(5,5);//(it_r.ap_nz == 0);
+	  for(int y=0; y<4 ; y++){
+		for(int x=0; x<4 ; x++){
+		  it.preds_[x + it.enc_->preds_w_*y ] = pt[400+y*4+x];//SB_GET(mode_b,y,x,WD_MODE);
+		}
+	  }
+
+	  ok = RecordTokens_nrd2(enc, &ap_nz, x_mb, y_mb, it.mb_->type_, pt, pt+16, pt+16*17, &enc->tokens_);
+	  pt+=512;
+
+#ifdef _KEEP_PSNR_
+	  int VP8ScanUV[4 + 4] = {
+		0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
+		8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
+	  };
+	  for(int n=0;n<16;n++){
+		ap_uint<WD_PIX*16> tmp = str_out.read();
+		set_vect_to(tmp,it.yuv_out_ + VP8Scan[n],32);
+	  }
+	  for(int n = 0; n < 8; n += 1){
+		ap_uint<WD_PIX*16> tmp = str_out.read();
+		set_vect_to(tmp,it.yuv_out_ + U_OFF_ENC+ VP8ScanUV[n],32);
+	  }
+	  StoreSideInfo(&it);//just for PSRN calculation, can be passed
+#endif
+
+	} while (ok && VP8IteratorNext(&it));
+
+	FinalizeTokenProbas(&enc->proba_);//This is about AC
+	ok = VP8EmitTokens(&enc->tokens_, enc->parts_+0,(const uint8_t*)enc->proba_.coeffs_, 1);
+	return PostLoopFinalize(&it, ok);//This functions
+  }
+
+#else
+
+  int VP8EncTokenLoop(VP8Encoder* const enc) {
+	(void)enc;
+	return 0;   // we shouldn't be here.
+  }
+
+#endif    // DISABLE_TOKEN_BUFFER
+
+  int VP8EncTokenLoop_ryanw_k(VP8Encoder* const enc) {
+
+ 	printf(" *** EncTokenLoop() start \n");
+
+    VP8EncIterator it;
+    PassStats stats;
+    int ok;
+    InitPassStats(enc, &stats);
+    ok = PreLoopInitialize(enc);
+    if (!ok) return 0;
+    VP8IteratorInit(enc, &it);
+    SetLoopParams(enc, stats.q);
+    ResetTokenStats(enc);
+    VP8TBufferClear(&enc->tokens_);
+
+ 	StopProfilingWatch stop_watch;
+ 	StartProfiling(&stop_watch);
+
+	const int xsize = enc->pic_->width;
+	const int ysize = enc->pic_->height;
+
+	const int mb_w = (xsize + 15) >> 4; // nb of blocks in x
+	const int mb_h = (ysize + 15) >> 4; // nb of blocks in y
+	const int num_mb = mb_w * mb_h;
+
+	const int y_width = xsize;
+	const int y_height = ysize;
+
+	const int uv_width = (xsize + 1) >> 1;
+	const int uv_height = (ysize + 1) >> 1;
+
+	uint64_t y_size = 0;//
+	uint64_t uv_size = 0;//
+
+	// bits size
+	y_size = y_width * y_height * sizeof(uint8_t);//
+	uv_size = uv_width * uv_height * sizeof(uint8_t);//
+	uint64_t output_size = MAX_NUM_MB_W * MAX_NUM_MB_H * 512 * sizeof(uint16_t); // ??
+
+	int output_tokens_size = sizeof(uint16_t) * PAGE_COUNT * TOKENS_COUNT_PER_PAGE;
+
+	it.do_trellis_ = 0;//(rd_opt RD_OPT_TRELLIS_ALL);
+	AllPicInfo picinfo;//Picture information
+	ap_NoneZero ap_nz;
+
+	Set_AllPicInfo( &picinfo, enc);//Set picture information
+	//int size_info = sizeof(AllPicInfo);
+	uint8_t* ysrc;
+	uint8_t* usrc;
+	uint8_t* vsrc;
+	int16_t* pout_level;
+	uint8_t* pout_out;
+	uint8_t* pout_pred;
+	uint8_t* pout_ret;
+	pout_level = malloc( MAX_NUM_MB_W * MAX_NUM_MB_H  * 512 * sizeof(int16_t));
+	int16_t* pt=pout_level;
+
+	size_t globalSize[] = {1, 1, 1};
+	size_t localSize[] = {1, 1, 1};
+
+	output_size = MAX_NUM_MB_W * MAX_NUM_MB_H * 512 * sizeof(uint16_t); // ??
+
+	printf("    -- COPY .input to Buffer \n");
+	
+    cl_int err;
+	err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.input, CL_TRUE, 0, sizeof(AllPicInfo), &picinfo, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	printf("    -- COPY .y to Buffer \n");
+
+	err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.y, CL_TRUE, 0, y_size, enc->pic_->y, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	printf("    -- COPY .u to Buffer \n");
+
+	err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.u, CL_TRUE, 0, uv_size, enc->pic_->u, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.v, CL_TRUE, 0, uv_size, enc->pic_->v, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	printf("    -- COPY .v to Buffer \n");
+
+	// launch kernel
+	err = clEnqueueNDRangeKernel(hardware.mQueue, encloop.mKernel, 1, 0,
+								 globalSize, localSize, 0, NULL, NULL);
+	if (err != CL_SUCCESS) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	err = clFinish(hardware.mQueue);
+	if (err != CL_SUCCESS) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.y, CL_TRUE, 0, y_size, enc->pic_->y, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.output, CL_TRUE, 0, output_size, pout_level, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	do {
+	  int x_mb = it.x_;
+	  int y_mb = it.y_;
+
+	  ap_uint<6> ret     = (ap_uint<6>)pt[416];
+	  it.mb_->uv_mode_   = ret(3,0);//it_m.ap_uv_mode_c;
+	  it.mb_->type_      = ret(4,4);
+	  it.mb_->skip_      = ret(5,5);//(it_r.ap_nz == 0);
+	  for(int y=0; y<4 ; y++){
+		for(int x=0; x<4 ; x++){
+		  it.preds_[x + it.enc_->preds_w_*y ] = pt[400+y*4+x];//SB_GET(mode_b,y,x,WD_MODE);
+		}
+	  }
+
+	  ok = RecordTokens_nrd2(enc, &ap_nz, x_mb, y_mb, it.mb_->type_, pt, pt+16, pt+16*17, &enc->tokens_);
+	  pt+=512;
+
+	} while (ok && VP8IteratorNext(&it));
+
+	FinalizeTokenProbas(&enc->proba_);//This is about AC
+	ok = VP8EmitTokens(&enc->tokens_, enc->parts_+0,(const uint8_t*)enc->proba_.coeffs_, 1);
+	PostLoopFinalize(&it, ok);//This functions
+
+	/*
+	  #if TOKEN_RECONSTRUCT
+ 	  ReadTokenFromKernel(&enc->tokens_, &tokens_);
+	  #endif
+	*/
+
+
+#ifdef USE_C_KERNEL
+
+  Err:
+ 	releaseKernel(encloop);
+ 	clReleaseMemObject(enclooppara.input);
+ 	clReleaseMemObject(enclooppara.y);
+ 	clReleaseMemObject(enclooppara.u);
+ 	clReleaseMemObject(enclooppara.v);
+ 	clReleaseMemObject(enclooppara.output);
+
+ 	return ok;
+#endif
+
+  }
+  int VP8EncTokenLoop_ryanw_k2(VP8Encoder* const enc) {
+
+	printf(" In VP8EncTokenLoop_ryanw_k in frame.c \n");
+
+	// Roughly refresh the proba eight times per pass
+	/*	int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
+		int num_pass_left = enc->config_->pass;
+		const int do_search = 0;//enc->do_search_;
+
+		VP8EncIterator it;
+		VP8EncProba* const proba = &enc->proba_;
+		const VP8RDLevel rd_opt = enc->rd_opt_level_;
+		const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384;
+
+		PassStats stats;
+		int ok;
+		StopProfilingWatch stop_watch;
+		StartProfiling(&stop_watch);
+
+		InitPassStats(enc, &stats);
+		ok = PreLoopInitialize(enc);
+		if (!ok) return 0;
+
+		if (max_count < MIN_COUNT) max_count = MIN_COUNT;
+
+		assert(enc->num_parts_ == 1);
+		assert(enc->use_tokens_);
+		assert(proba->use_skip_proba_ == 0);
+		assert(rd_opt >= RD_OPT_BASIC);   // otherwise, token-buffer won't be useful
+		assert(num_pass_left > 0);
+
+		while (ok && num_pass_left-- > 0) {
+
+		const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
+		(num_pass_left == 0) ||
+		(enc->max_i4_header_bits_ == 0);
+		uint64_t size_p0 = 0;
+		uint64_t distortion = 0;
+		int cnt = max_count;
+
+		VP8IteratorInit(enc, &it);
+		SetLoopParams(enc, stats.q);
+
+		if (is_last_pass) {
+		ResetTokenStats(enc);
+		VP8InitFilter(&it);  // don't collect stats until last pass (too costly)
+		}
+
+		VP8TBufferClear(&enc->tokens_);*/
+
+
+	int i, j, index;
+
+	cl_int err;
+	EncloopInputData input_data;
+	// EncloopSegmentData segment_data;
+	// EncLoopOutputData output_data;
+	// VP8TBufferKernel output_tokens;
+
+	// VP8EncMatrix matrix_y1[NUM_MB_SEGMENTS];
+	// VP8EncMatrix matrix_y2[NUM_MB_SEGMENTS];
+	// VP8EncMatrix matrix_uv[NUM_MB_SEGMENTS];
+
+	const int xsize = enc->pic_->width;
+	const int ysize = enc->pic_->height;
+
+	const int mb_w = (xsize + 15) >> 4; // nb of blocks in x
+	const int mb_h = (ysize + 15) >> 4; // nb of blocks in y
+	const int num_mb = mb_w * mb_h;
+
+	const int preds_w = 4 * mb_w + 1; // prediction size in x
+	const int preds_h = 4 * mb_h + 1; // prediction size in y
+
+	const int y_width = xsize;
+	const int y_height = ysize;
+
+	const int uv_width = (xsize + 1) >> 1;
+	const int uv_height = (ysize + 1) >> 1;
+
+	const int y_stride = y_width;
+	const int uv_stride = uv_width;
+
+	const int expand_yheight = RoundUp(ysize, 16);
+	const int expand_uvheight = RoundUp(uv_height, 8);
+
+	uint64_t y_size = 0;
+	uint64_t uv_size = 0;
+
+	int mb_size = 0;
+	int preds_size = 0;
+	int nz_size = 0;
+	int top_data_size = 0;
+	int lf_stats_size = 0;
+	int quant_matrix_size = 0;
+	int coeffs_size = 0;
+	int stats_size = 0;
+	int level_cost_size = 0;
+	int bw_buf_size = 0;
+	int sse_size = 0;
+	int block_count_size = 0;
+	int extra_info_size = 0;
+	int max_edge_size = 0;
+	int bit_count_size = 0;
+	int expand_y_size = 0;
+	int expand_uv_size = 0;
+	int input_size = 0;
+
+	// bits size
+	y_size = y_width * y_height * sizeof(uint8_t);
+	uv_size = uv_width * uv_height * sizeof(uint8_t);
+	//mb_size = mb_w * mb_h * sizeof(uint8_t);
+	// preds_size = preds_w * preds_h * sizeof(uint8_t) + preds_w + 1;
+	//nz_size = (mb_w + 1 + 1) * sizeof(uint32_t) /*+ WEBP_ALIGN_CST*/;
+	/*	  top_data_size = mb_w * 16 * sizeof(uint8_t);
+		  lf_stats_size = NUM_MB_SEGMENTS * MAX_LF_LEVELS * sizeof(double);
+		  quant_matrix_size = sizeof(VP8EncMatrix);
+		  coeffs_size = NUM_TYPES * NUM_BANDS * NUM_CTX * NUM_PROBAS * sizeof(uint8_t);
+		  stats_size = NUM_TYPES * NUM_BANDS * NUM_CTX * NUM_PROBAS * sizeof(uint32_t);
+		  level_cost_size = NUM_TYPES * NUM_BANDS * NUM_CTX * (MAX_VARIABLE_LEVEL + 1) * sizeof(uint16_t);
+		  bw_buf_size = 408000 * sizeof(uint8_t);
+		  sse_size = 4 * sizeof(uint64_t);
+		  block_count_size = 3 * sizeof(int);
+		  extra_info_size = mb_w * mb_h * sizeof(uint8_t);
+		  max_edge_size = NUM_MB_SEGMENTS * sizeof(int);
+		  bit_count_size = 4 * 3 * sizeof(uint64_t);
+		  input_size = sizeof(EncloopInputData);*/
+	int output_size = MAX_NUM_MB_W * MAX_NUM_MB_H * 1024 * sizeof(uint16_t); // ??
+
+	int output_tokens_size = sizeof(uint16_t) * PAGE_COUNT * TOKENS_COUNT_PER_PAGE;
+
+	/*  input_data.width = xsize;
+		input_data.height = ysize;
+		input_data.filter_sharpness = enc->config_->filter_sharpness;
+		input_data.show_compressed = enc->config_->show_compressed;
+		input_data.extra_info_type = enc->pic_->extra_info_type;
+		input_data.stats_add = enc->pic_->stats;
+		input_data.simple = enc->filter_hdr_.simple_;
+		input_data.num_parts = enc->num_parts_;
+		input_data.max_i4_header_bits = enc->max_i4_header_bits_;
+
+		if (enc->lf_stats_ == NULL) {
+		input_data.lf_stats_status = 0;
+		} else {
+		input_data.lf_stats_status = 1;
+		}
+
+		input_data.use_skip_proba = !enc->proba_.use_skip_proba_;
+		input_data.method = enc->method_;
+		input_data.rd_opt = (int)enc->rd_opt_level_;
+
+		for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+
+		segment_data.quant[i] = enc->dqm_[i].quant_;
+		segment_data.fstrength[i] = enc->dqm_[i].fstrength_;
+		segment_data.max_edge[i] = enc->dqm_[i].max_edge_;
+		segment_data.min_disto[i] = enc->dqm_[i].min_disto_;
+		segment_data.lambda_i16[i] = enc->dqm_[i].lambda_i16_;
+		segment_data.lambda_i4[i] = enc->dqm_[i].lambda_i4_;
+		segment_data.lambda_uv[i] = enc->dqm_[i].lambda_uv_;
+		segment_data.lambda_mode[i] = enc->dqm_[i].lambda_mode_;
+		segment_data.tlambda[i] = enc->dqm_[i].tlambda_;
+		segment_data.lambda_trellis_i16[i] = enc->dqm_[i].lambda_trellis_i16_;
+		segment_data.lambda_trellis_i4[i] = enc->dqm_[i].lambda_trellis_i4_;
+		segment_data.lambda_trellis_uv[i] = enc->dqm_[i].lambda_trellis_uv_;
+		}
+
+		expand_y_size = (expand_yheight - ysize) * xsize;
+		uint8_t expand_y[expand_y_size];
+		if (expand_yheight > ysize) {
+		for (i = 0; i < expand_yheight - ysize; i++) {
+		memcpy(expand_y + i * xsize, enc->pic_->y + xsize * (ysize - 1), xsize);
+		}
+		}
+
+		// copy expanded block
+		expand_uv_size = (expand_uvheight - uv_height) * uv_width;
+		uint8_t expand_u[expand_uv_size];
+		uint8_t expand_v[expand_uv_size];
+		if (expand_uvheight > uv_height) {
+		for (i = 0; i < expand_uvheight - uv_height; i++) {
+		memcpy(expand_u + i * uv_width, enc->pic_->u + uv_width * (uv_height - 1), uv_width);
+		memcpy(expand_v + i * uv_width, enc->pic_->v + uv_width * (uv_height - 1), uv_width);
+		}
+		}
+
+		uint8_t mb_info[5 * mb_w * mb_h];
+		for (index = 0; index < mb_size; index++) {
+		mb_info[5 * index + 0] = enc->mb_info_[index].type_;
+		mb_info[5 * index + 1] = enc->mb_info_[index].uv_mode_;
+		mb_info[5 * index + 2] = enc->mb_info_[index].skip_;
+		mb_info[5 * index + 3] = enc->mb_info_[index].segment_;
+		mb_info[5 * index + 4] = enc->mb_info_[index].alpha_;
+		}
+
+		for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+		VP8Matrix* matrix = &(enc->dqm_[i].y1_);
+		for (j = 0; j < 16; j++) {
+		matrix_y1[i].q_[j] = matrix->q_[j];
+		matrix_y1[i].iq_[j] = matrix->iq_[j];
+		matrix_y1[i].bias_[j] = matrix->bias_[j];
+		matrix_y1[i].zthresh_[j] = matrix->zthresh_[j];
+		matrix_y1[i].sharpen_[j] = matrix->sharpen_[j];
+		}
+		}
+
+		for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+		VP8Matrix* matrix = &(enc->dqm_[i].y2_);
+		for (j = 0; j < 16; j++) {
+		matrix_y2[i].q_[j] = matrix->q_[j];
+		matrix_y2[i].iq_[j] = matrix->iq_[j];
+		matrix_y2[i].bias_[j] = matrix->bias_[j];
+		matrix_y2[i].zthresh_[j] = matrix->zthresh_[j];
+		matrix_y2[i].sharpen_[j] = matrix->sharpen_[j];
+		}
+		}
+
+		for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+		VP8Matrix* matrix = &(enc->dqm_[i].uv_);
+		for (j = 0; j < 16; j++) {
+		matrix_uv[i].q_[j] = matrix->q_[j];
+		matrix_uv[i].iq_[j] = matrix->iq_[j];
+		matrix_uv[i].bias_[j] = matrix->bias_[j];
+		matrix_uv[i].zthresh_[j] = matrix->zthresh_[j];
+		matrix_uv[i].sharpen_[j] = matrix->sharpen_[j];
+		}
+		}
+
+		output_data.range = enc->parts_[0].range_;
+		output_data.value = enc->parts_[0].value_;
+		output_data.run = enc->parts_[0].run_;
+		output_data.nb_bits = enc->parts_[0].nb_bits_;
+		output_data.pos = enc->parts_[0].pos_;
+		output_data.max_pos = enc->parts_[0].max_pos_;
+		output_data.error = enc->parts_[0].error_;
+
+		uint8_t y_top[mb_w * 16];
+		uint8_t uv_top[mb_w * 16];
+
+		memset(y_top, 127, top_data_size);
+		memset(uv_top, 127, top_data_size);
+
+		int max_edge_data[NUM_MB_SEGMENTS];
+		for (i = 0; i < NUM_MB_SEGMENTS; i++) {
+		max_edge_data[i] = enc->dqm_[i].max_edge_;
+		}
+		uint64_t bit_count[4][3];*/
+
+	size_t globalSize[] = {1, 1, 1};
+	size_t localSize[] = {1, 1, 1};
+
+    VP8EncIterator it;
+    PassStats stats;
+    int ok;
+    InitPassStats(enc, &stats);
+    ok = PreLoopInitialize(enc);
+    if (!ok) return 0;
+    VP8IteratorInit(enc, &it);
+    SetLoopParams(enc, stats.q);
+    ResetTokenStats(enc);
+    VP8TBufferClear(&enc->tokens_);
+
+    it.do_trellis_ = 0;//(rd_opt RD_OPT_TRELLIS_ALL);
+    AllPicInfo picinfo;//Picture information
+    ap_NoneZero ap_nz;
+
+	Set_AllPicInfo( &picinfo, enc);//Set picture information
+	int size_info = sizeof(AllPicInfo);
+    /*--Following should be the interface of kernel -----------------------------------------------------------------*/
+	//int p_info[64];
+	uint8_t* ysrc;
+	uint8_t* usrc;
+	uint8_t* vsrc;
+	int16_t* pout_level;
+	uint8_t* pout_out;
+	uint8_t* pout_pred;
+	uint8_t* pout_ret;
+	pout_level = malloc( num_mb * 512 * sizeof(int16_t));
+	int16_t* pt=pout_level;
+	if(pout_level==NULL){
+	  fprintf(stderr, "pout_level==NULL\n");
+	  goto Err;
+	}
+	// copy buffer
+
+	// err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.input, CL_TRUE, 0, sizeof(EncloopInputData), &input_data, 0, NULL, NULL);
+	err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.input, CL_TRUE, 0, sizeof(AllPicInfo), &picinfo, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.y, CL_TRUE, 0, y_size, enc->pic_->y, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+	/*
+	  if (expand_yheight > y_height) {
+	  err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.y, CL_TRUE, y_size, expand_y_size, expand_y, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	  }
+	  }*/
+
+	err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.u, CL_TRUE, 0, uv_size, enc->pic_->u, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+	/*
+	  if (expand_uvheight > uv_height) {
+	  err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.u, CL_TRUE, uv_size, expand_uv_size, expand_u, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	  }
+	  }*/
+
+	err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.v, CL_TRUE, 0, uv_size, enc->pic_->v, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+	/*
+	  if (expand_uvheight > uv_height) {
+	  err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.v, CL_TRUE, uv_size, expand_uv_size, expand_v, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	  }
+	  }
+	*/
+	/* err = clEnqueueWriteBuffer(hardware.mQueue, enclooppara.output_tokens, CL_TRUE, 0, output_tokens_size, output_tokens.tokens_, 0, NULL, NULL); */
+	/* if(CL_SUCCESS != err) { */
+	/* 	fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err)); */
+	/* 	ok = 0; */
+	/* 	goto Err; */
+	/* } */
+	// *********************************** run kernel ********************************
+
+	err = clEnqueueNDRangeKernel(hardware.mQueue, encloop.mKernel, 1, 0,
+								 globalSize, localSize, 0, NULL, NULL);
+	if (err != CL_SUCCESS) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	output_size = MAX_NUM_MB_W * MAX_NUM_MB_H * 1024 * sizeof(uint16_t); // ??
+
+	// *************************************************************************
+
+
+	// read buffer from device
+
+	fprintf(stderr, "start enctokenloop clFinish\n");
+	err = clFinish(hardware.mQueue);
+	if (err != CL_SUCCESS) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+	fprintf(stderr, "stop enctokenloop clFinish\n");
+	/*
+	  err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.y, CL_TRUE, 0, y_size, enc->pic_->y, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	  }
+
+	  err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.u, CL_TRUE, 0, uv_size, enc->pic_->u, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	  }
+
+	  err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.v, CL_TRUE, 0, uv_size, enc->pic_->v, 0, NULL, NULL);
+	  if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	  }
+	*/
+	// err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.output, CL_TRUE, 0, output_size, output_tokens.tokens_, 0, NULL, NULL);
+	err = clEnqueueReadBuffer(hardware.mQueue, enclooppara.output, CL_TRUE, 0, output_size, pout_level, 0, NULL, NULL);
+	if(CL_SUCCESS != err) {
+	  fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+	  ok = 0;
+	  goto Err;
+	}
+
+	/* output_tokens.error_ = output_data.error_; */
+	/* enc->tokens_.left_ = output_data.left_; */
+	/* enc->tokens_.page_size_ = output_data.page_size_; */
+	/* enc->tokens_.error_ = output_data.error_; */
+	/* ReadTokenFromKernel(&enc->tokens_, &output_tokens); */
+	do {
+	  ap_uint<LG2_MAX_NUM_MB_W> x_mb = it.x_;
+	  ap_uint<LG2_MAX_NUM_MB_W> y_mb = it.y_;
+
+	  ap_uint<6> ret     = (ap_uint<6>)pt[416];
+	  it.mb_->uv_mode_   = ret(3,0);//it_m.ap_uv_mode_c;
+	  it.mb_->type_      = ret(4,4);
+	  it.mb_->skip_      = ret(5,5);//(it_r.ap_nz == 0);
+	  for(int y=0; y<4 ; y++){
+		for(int x=0; x<4 ; x++){
+		  it.preds_[x + it.enc_->preds_w_*y ] = pt[400+y*4+x];//SB_GET(mode_b,y,x,WD_MODE);
+		}
+	  }
+
+	  ok = RecordTokens_nrd2(enc, &ap_nz, x_mb, y_mb, it.mb_->type_, pt, pt+16, pt+16*17, &enc->tokens_);
+	  pt+=512;
+
+	} while (ok && VP8IteratorNext(&it));
+
+	FinalizeTokenProbas(&enc->proba_);//This is about AC
+	ok = VP8EmitTokens(&enc->tokens_, enc->parts_+0,(const uint8_t*)enc->proba_.coeffs_, 1);
+	PostLoopFinalize(&it, ok);//This functions
+
+  Err:
+	releaseKernel(encloop);
+	clReleaseMemObject(enclooppara.input);
+	clReleaseMemObject(enclooppara.y);
+	clReleaseMemObject(enclooppara.u);
+	clReleaseMemObject(enclooppara.v);
+	/* clReleaseMemObject(enclooppara.mb_info); */
+	/* clReleaseMemObject(enclooppara.preds); */
+	/* clReleaseMemObject(enclooppara.nz); */
+	/* clReleaseMemObject(enclooppara.y_top); */
+	/* clReleaseMemObject(enclooppara.uv_top); */
+	/* clReleaseMemObject(enclooppara.quant_matrix); */
+	/* clReleaseMemObject(enclooppara.coeffs); */
+	/* clReleaseMemObject(enclooppara.stats); */
+	/* clReleaseMemObject(enclooppara.level_cost); */
+	/* clReleaseMemObject(enclooppara.bw_buf); */
+	/* clReleaseMemObject(enclooppara.sse); */
+	/* clReleaseMemObject(enclooppara.block_count); */
+	/* clReleaseMemObject(enclooppara.extra_info); */
+	/* clReleaseMemObject(enclooppara.max_edge); */
+	/* clReleaseMemObject(enclooppara.bit_count); */
+	/* clReleaseMemObject(enclooppara.sse_count); */
+	/* clReleaseMemObject(enclooppara.output_data); */
+	/* clReleaseMemObject(enclooppara.output_tokens); */
+	clReleaseMemObject(enclooppara.output);
+
+	return ok;
+
+
+  }
+
+  //------------------------------------------------------------------------------
+
diff --git a/codec/L2/demos/webpEnc/host/src/enc/histogram.c b/codec/L2/demos/webpEnc/host/src/enc/histogram.c
new file mode 100644
index 0000000000..9ad154fa6c
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/histogram.c
@@ -0,0 +1,1069 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include <stdio.h>
+#include <math.h>
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../dsp/lossless.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+#include "../utils/thread.h"
+
+// #define msg(x...)
+#define msg(x...)                                                                                        \
+    do {                                                                                                 \
+        fprintf(stderr, x);                                                                              \
+        fprintf(stderr, "---------File (%s), Func (%s), Line (%d)\n", __FILE__, __FUNCTION__, __LINE__); \
+    } while (0)
+
+#define MAX_COST 1.e38
+
+// Number of partitions for the three dominant (literal, red and blue) symbol
+// costs.
+#define NUM_PARTITIONS 4
+// The size of the bin-hash corresponding to the three dominant costs.
+#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
+// Maximum number of histograms allowed in greedy combining algorithm.
+#define MAX_HISTO_GREEDY 100
+
+static void HistogramClear(VP8LHistogram* const p) {
+    uint32_t* const literal = p->literal_;
+    const int cache_bits = p->palette_code_bits_;
+    const int histo_size = VP8LGetHistogramSize(cache_bits);
+    memset(p, 0, histo_size);
+    p->palette_code_bits_ = cache_bits;
+    p->literal_ = literal;
+}
+
+// Swap two histogram pointers.
+static void HistogramSwap(VP8LHistogram** const A, VP8LHistogram** const B) {
+    VP8LHistogram* const tmp = *A;
+    *A = *B;
+    *B = tmp;
+}
+
+static void HistogramCopy(const VP8LHistogram* const src, VP8LHistogram* const dst) {
+    uint32_t* const dst_literal = dst->literal_;
+    const int dst_cache_bits = dst->palette_code_bits_;
+    const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
+    assert(src->palette_code_bits_ == dst_cache_bits);
+    memcpy(dst, src, histo_size);
+    dst->literal_ = dst_literal;
+}
+
+int VP8LGetHistogramSize(int cache_bits) {
+    const int literal_size = VP8LHistogramNumCodes(cache_bits);
+    const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;
+    assert(total_size <= (size_t)0x7fffffff);
+    return (int)total_size;
+}
+
+void VP8LFreeHistogram(VP8LHistogram* const histo) {
+    WebPSafeFree(histo);
+}
+
+void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) {
+    WebPSafeFree(histo);
+}
+
+void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs, VP8LHistogram* const histo) {
+    VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+    while (VP8LRefsCursorOk(&c)) {
+        VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
+        VP8LRefsCursorNext(&c);
+    }
+}
+
+void VP8LHistogramCreate(VP8LHistogram* const p, const VP8LBackwardRefs* const refs, int palette_code_bits) {
+    if (palette_code_bits >= 0) {
+        p->palette_code_bits_ = palette_code_bits;
+    }
+    HistogramClear(p);
+    VP8LHistogramStoreRefs(refs, p);
+}
+
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
+    p->palette_code_bits_ = palette_code_bits;
+    HistogramClear(p);
+}
+
+VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
+    VP8LHistogram* histo = NULL;
+    const int total_size = VP8LGetHistogramSize(cache_bits);
+    uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
+    if (memory == NULL) return NULL;
+    histo = (VP8LHistogram*)memory;
+    // literal_ won't necessary be aligned.
+    histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
+    VP8LHistogramInit(histo, cache_bits);
+    return histo;
+}
+
+VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
+    int i;
+    VP8LHistogramSet* set;
+    const int histo_size = VP8LGetHistogramSize(cache_bits);
+    const size_t total_size = sizeof(*set) + size * (sizeof(*set->histograms) + histo_size + WEBP_ALIGN_CST);
+    uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
+    if (memory == NULL) return NULL;
+
+    set = (VP8LHistogramSet*)memory;
+    memory += sizeof(*set);
+    set->histograms = (VP8LHistogram**)memory;
+    memory += size * sizeof(*set->histograms);
+    set->max_size = size;
+    set->size = size;
+    for (i = 0; i < size; ++i) {
+        memory = (uint8_t*)WEBP_ALIGN(memory);
+        set->histograms[i] = (VP8LHistogram*)memory;
+        // literal_ won't necessary be aligned.
+        set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
+        VP8LHistogramInit(set->histograms[i], cache_bits);
+        memory += histo_size;
+    }
+    return set;
+}
+
+// -----------------------------------------------------------------------------
+
+void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, const PixOrCopy* const v) {
+    if (PixOrCopyIsLiteral(v)) {
+        ++histo->alpha_[PixOrCopyLiteral(v, 3)];
+        ++histo->red_[PixOrCopyLiteral(v, 2)];
+        ++histo->literal_[PixOrCopyLiteral(v, 1)];
+        ++histo->blue_[PixOrCopyLiteral(v, 0)];
+    } else if (PixOrCopyIsCacheIdx(v)) {
+        const int literal_ix = NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
+        ++histo->literal_[literal_ix];
+    } else {
+        int code, extra_bits;
+        VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits);
+        ++histo->literal_[NUM_LITERAL_CODES + code];
+        VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
+        ++histo->distance_[code];
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Entropy-related functions.
+
+static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
+    double mix;
+    if (entropy->nonzeros < 5) {
+        if (entropy->nonzeros <= 1) {
+            return 0;
+        }
+        // Two symbols, they will be 0 and 1 in a Huffman code.
+        // Let's mix in a bit of entropy to favor good clustering when
+        // distributions of these are combined.
+        if (entropy->nonzeros == 2) {
+            return 0.99 * entropy->sum + 0.01 * entropy->entropy;
+        }
+        // No matter what the entropy says, we cannot be better than min_limit
+        // with Huffman coding. I am mixing a bit of entropy into the
+        // min_limit since it produces much better (~0.5 %) compression results
+        // perhaps because of better entropy clustering.
+        if (entropy->nonzeros == 3) {
+            mix = 0.95;
+        } else {
+            mix = 0.7; // nonzeros == 4.
+        }
+    } else {
+        mix = 0.627;
+    }
+
+    {
+        double min_limit = 2 * entropy->sum - entropy->max_val;
+        min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy;
+        return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
+    }
+}
+
+double VP8LBitsEntropy(const uint32_t* const array, int n, uint32_t* const trivial_symbol) {
+    VP8LBitEntropy entropy;
+    VP8LBitsEntropyUnrefined(array, n, &entropy);
+    if (trivial_symbol != NULL) {
+        *trivial_symbol = (entropy.nonzeros == 1) ? entropy.nonzero_code : VP8L_NON_TRIVIAL_SYM;
+    }
+
+    return BitsEntropyRefine(&entropy);
+}
+
+static double InitialHuffmanCost(void) {
+    // Small bias because Huffman code length is typically not stored in
+    // full length.
+    static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
+    static const double kSmallBias = 9.1;
+    return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
+}
+
+// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
+static double FinalHuffmanCost(const VP8LStreaks* const stats) {
+    double retval = InitialHuffmanCost();
+    retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
+    retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
+    retval += 1.796875 * stats->streaks[0][0];
+    retval += 3.28125 * stats->streaks[1][0];
+    return retval;
+}
+
+// Get the symbol entropy for the distribution 'population'.
+// Set 'trivial_sym', if there's only one symbol present in the distribution.
+static double PopulationCost(const uint32_t* const population, int length, uint32_t* const trivial_sym) {
+    VP8LBitEntropy bit_entropy;
+    VP8LStreaks stats;
+    VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
+    if (trivial_sym != NULL) {
+        *trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code : VP8L_NON_TRIVIAL_SYM;
+    }
+
+    return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
+}
+
+static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, const uint32_t* const Y, int length) {
+    VP8LBitEntropy bit_entropy;
+    VP8LStreaks stats;
+    VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
+
+    return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
+}
+
+// Estimates the Entropy + Huffman + other block overhead size cost.
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
+    return PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL) +
+           PopulationCost(p->red_, NUM_LITERAL_CODES, NULL) + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL) +
+           PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL) + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL) +
+           VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES) +
+           VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
+}
+
+// -----------------------------------------------------------------------------
+// Various histogram combine/cost-eval functions
+
+static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
+                                       const VP8LHistogram* const b,
+                                       double cost_threshold,
+                                       double* cost) {
+    const int palette_code_bits = a->palette_code_bits_;
+    assert(a->palette_code_bits_ == b->palette_code_bits_);
+    *cost += GetCombinedEntropy(a->literal_, b->literal_, VP8LHistogramNumCodes(palette_code_bits));
+    *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, b->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
+    if (*cost > cost_threshold) return 0;
+
+    *cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES);
+    if (*cost > cost_threshold) return 0;
+
+    *cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES);
+    if (*cost > cost_threshold) return 0;
+
+    *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES);
+    if (*cost > cost_threshold) return 0;
+
+    *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
+    *cost += VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
+    if (*cost > cost_threshold) return 0;
+
+    return 1;
+}
+
+// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing
+// to the threshold value 'cost_threshold'. The score returned is
+//  Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.
+// Since the previous score passed is 'cost_threshold', we only need to compare
+// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
+// early.
+static double HistogramAddEval(const VP8LHistogram* const a,
+                               const VP8LHistogram* const b,
+                               VP8LHistogram* const out,
+                               double cost_threshold) {
+    double cost = 0;
+    const double sum_cost = a->bit_cost_ + b->bit_cost_;
+    cost_threshold += sum_cost;
+
+    if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
+        VP8LHistogramAdd(a, b, out);
+        out->bit_cost_ = cost;
+        out->palette_code_bits_ = a->palette_code_bits_;
+        out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ? a->trivial_symbol_ : VP8L_NON_TRIVIAL_SYM;
+    }
+
+    return cost - sum_cost;
+}
+
+// Same as HistogramAddEval(), except that the resulting histogram
+// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
+// the term C(b) which is constant over all the evaluations.
+static double HistogramAddThresh(const VP8LHistogram* const a, const VP8LHistogram* const b, double cost_threshold) {
+    double cost = -a->bit_cost_;
+    GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
+    return cost;
+}
+
+// -----------------------------------------------------------------------------
+
+// The structure to keep track of cost range for the three dominant entropy
+// symbols.
+// TODO(skal): Evaluate if float can be used here instead of double for
+// representing the entropy costs.
+typedef struct {
+    double literal_max_;
+    double literal_min_;
+    double red_max_;
+    double red_min_;
+    double blue_max_;
+    double blue_min_;
+} DominantCostRange;
+
+static void DominantCostRangeInit(DominantCostRange* const c) {
+    c->literal_max_ = 0.;
+    c->literal_min_ = MAX_COST;
+    c->red_max_ = 0.;
+    c->red_min_ = MAX_COST;
+    c->blue_max_ = 0.;
+    c->blue_min_ = MAX_COST;
+}
+
+static void UpdateDominantCostRange(const VP8LHistogram* const h, DominantCostRange* const c) {
+    if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_;
+    if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_;
+    if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_;
+    if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_;
+    if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_;
+    if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_;
+}
+
+static void UpdateHistogramCost(VP8LHistogram* const h) {
+    uint32_t alpha_sym, red_sym, blue_sym;
+    const double alpha_cost = PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym);
+    const double distance_cost =
+        PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) + VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
+    const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
+    h->literal_cost_ =
+        PopulationCost(h->literal_, num_codes, NULL) + VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
+    h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
+    h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
+    h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + alpha_cost + distance_cost;
+    if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
+        h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM;
+    } else {
+        h->trivial_symbol_ = ((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0);
+    }
+}
+
+static int GetBinIdForEntropy(double min, double max, double val) {
+    const double range = max - min + 1e-6;
+    const double delta = val - min;
+    return (int)(NUM_PARTITIONS * delta / range);
+}
+
+static int GetHistoBinIndexLowEffort(const VP8LHistogram* const h, const DominantCostRange* const c) {
+    const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_, h->literal_cost_);
+    assert(bin_id < NUM_PARTITIONS);
+    return bin_id;
+}
+
+static int GetHistoBinIndex(const VP8LHistogram* const h, const DominantCostRange* const c) {
+    const int bin_id =
+        GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) +
+        NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_, h->red_cost_) +
+        NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_, c->literal_max_, h->literal_cost_);
+    assert(bin_id < BIN_SIZE);
+    return bin_id;
+}
+
+// Construct the histograms from backward references.
+static void HistogramBuild(int xsize,
+                           int histo_bits,
+                           const VP8LBackwardRefs* const backward_refs,
+                           VP8LHistogramSet* const image_histo) {
+    int x = 0, y = 0;
+    const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);
+    VP8LHistogram** const histograms = image_histo->histograms;
+    VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);
+    assert(histo_bits > 0);
+    while (VP8LRefsCursorOk(&c)) {
+        const PixOrCopy* const v = c.cur_pos;
+        const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
+        VP8LHistogramAddSinglePixOrCopy(histograms[ix], v);
+        x += PixOrCopyLength(v);
+        while (x >= xsize) {
+            x -= xsize;
+            ++y;
+        }
+        VP8LRefsCursorNext(&c);
+    }
+}
+
+// Copies the histograms and computes its bit_cost.
+static void HistogramCopyAndAnalyze(VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) {
+    int i;
+    const int histo_size = orig_histo->size;
+    VP8LHistogram** const orig_histograms = orig_histo->histograms;
+    VP8LHistogram** const histograms = image_histo->histograms;
+    for (i = 0; i < histo_size; ++i) {
+        VP8LHistogram* const histo = orig_histograms[i];
+        UpdateHistogramCost(histo);
+        // Copy histograms from orig_histo[] to image_histo[].
+        HistogramCopy(histo, histograms[i]);
+    }
+}
+
+// Partition histograms to different entropy bins for three dominant (literal,
+// red and blue) symbol costs and compute the histogram aggregate bit_cost.
+static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, int16_t* const bin_map, int low_effort) {
+    int i;
+    VP8LHistogram** const histograms = image_histo->histograms;
+    const int histo_size = image_histo->size;
+    const int bin_depth = histo_size + 1;
+    DominantCostRange cost_range;
+    DominantCostRangeInit(&cost_range);
+
+    // Analyze the dominant (literal, red and blue) entropy costs.
+    for (i = 0; i < histo_size; ++i) {
+        VP8LHistogram* const histo = histograms[i];
+        UpdateDominantCostRange(histo, &cost_range);
+    }
+
+    // bin-hash histograms on three of the dominant (literal, red and blue)
+    // symbol costs.
+    for (i = 0; i < histo_size; ++i) {
+        int num_histos;
+        VP8LHistogram* const histo = histograms[i];
+        const int16_t bin_id = low_effort ? (int16_t)GetHistoBinIndexLowEffort(histo, &cost_range)
+                                          : (int16_t)GetHistoBinIndex(histo, &cost_range);
+        const int bin_offset = bin_id * bin_depth;
+        // bin_map[n][0] for every bin 'n' maintains the counter for the number of
+        // histograms in that bin.
+        // Get and increment the num_histos in that bin.
+        num_histos = ++bin_map[bin_offset];
+        assert(bin_offset + num_histos < bin_depth * BIN_SIZE);
+        // Add histogram i'th index at num_histos (last) position in the bin_map.
+        bin_map[bin_offset + num_histos] = i;
+    }
+}
+
+// Compact the histogram set by removing unused entries.
+static void HistogramCompactBins(VP8LHistogramSet* const image_histo) {
+    VP8LHistogram** const histograms = image_histo->histograms;
+    int i, j;
+
+    for (i = 0, j = 0; i < image_histo->size; ++i) {
+        if (histograms[i] != NULL && histograms[i]->bit_cost_ != 0.) {
+            if (j < i) {
+                histograms[j] = histograms[i];
+                histograms[i] = NULL;
+            }
+            ++j;
+        }
+    }
+    image_histo->size = j;
+}
+
+static VP8LHistogram* HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
+                                                 VP8LHistogram* cur_combo,
+                                                 int16_t* const bin_map,
+                                                 int bin_depth,
+                                                 int num_bins,
+                                                 double combine_cost_factor,
+                                                 int low_effort) {
+    int bin_id;
+    VP8LHistogram** const histograms = image_histo->histograms;
+
+    for (bin_id = 0; bin_id < num_bins; ++bin_id) {
+        const int bin_offset = bin_id * bin_depth;
+        const int num_histos = bin_map[bin_offset];
+        const int idx1 = bin_map[bin_offset + 1];
+        int num_combine_failures = 0;
+        int n;
+        for (n = 2; n <= num_histos; ++n) {
+            const int idx2 = bin_map[bin_offset + n];
+            if (low_effort) {
+                // Merge all histograms with the same bin index, irrespective of cost of
+                // the merged histograms.
+                VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]);
+                histograms[idx2]->bit_cost_ = 0.;
+            } else {
+                const double bit_cost_idx2 = histograms[idx2]->bit_cost_;
+                if (bit_cost_idx2 > 0.) {
+                    const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor;
+                    const double curr_cost_diff =
+                        HistogramAddEval(histograms[idx1], histograms[idx2], cur_combo, bit_cost_thresh);
+                    if (curr_cost_diff < bit_cost_thresh) {
+                        // Try to merge two histograms only if the combo is a trivial one or
+                        // the two candidate histograms are already non-trivial.
+                        // For some images, 'try_combine' turns out to be false for a lot of
+                        // histogram pairs. In that case, we fallback to combining
+                        // histograms as usual to avoid increasing the header size.
+                        const int try_combine = (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) ||
+                                                ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) &&
+                                                 (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM));
+                        const int max_combine_failures = 32;
+                        if (try_combine || (num_combine_failures >= max_combine_failures)) {
+                            HistogramSwap(&cur_combo, &histograms[idx1]);
+                            histograms[idx2]->bit_cost_ = 0.;
+                        } else {
+                            ++num_combine_failures;
+                        }
+                    }
+                }
+            }
+        }
+        if (low_effort) {
+            // Update the bit_cost for the merged histograms (per bin index).
+            UpdateHistogramCost(histograms[idx1]);
+        }
+    }
+    HistogramCompactBins(image_histo);
+    return cur_combo;
+}
+
+static uint32_t MyRand(uint32_t* seed) {
+    *seed *= 16807U;
+    if (*seed == 0) {
+        *seed = 1;
+    }
+    return *seed;
+}
+
+// -----------------------------------------------------------------------------
+// Histogram pairs priority queue
+
+// Pair of histograms. Negative idx1 value means that pair is out-of-date.
+typedef struct {
+    int idx1;
+    int idx2;
+    double cost_diff;
+    double cost_combo;
+} HistogramPair;
+
+typedef struct {
+    HistogramPair* queue;
+    int size;
+    int max_size;
+} HistoQueue;
+
+static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) {
+    histo_queue->size = 0;
+    // max_index^2 for the queue size is safe. If you look at
+    // HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes
+    // data to the queue, you insert at most:
+    // - max_index*(max_index-1)/2 (the first two for loops)
+    // - max_index - 1 in the last for loop at the first iteration of the while
+    //   loop, max_index - 2 at the second iteration ... therefore
+    //   max_index*(max_index-1)/2 overall too
+    histo_queue->max_size = max_index * max_index;
+    // We allocate max_size + 1 because the last element at index "size" is
+    // used as temporary data (and it could be up to max_size).
+    histo_queue->queue = WebPSafeMalloc(histo_queue->max_size + 1, sizeof(*histo_queue->queue));
+    return histo_queue->queue != NULL;
+}
+
+static void HistoQueueClear(HistoQueue* const histo_queue) {
+    assert(histo_queue != NULL);
+    WebPSafeFree(histo_queue->queue);
+}
+
+static void SwapHistogramPairs(HistogramPair* p1, HistogramPair* p2) {
+    const HistogramPair tmp = *p1;
+    *p1 = *p2;
+    *p2 = tmp;
+}
+
+// Given a valid priority queue in range [0, queue_size) this function checks
+// whether histo_queue[queue_size] should be accepted and swaps it with the
+// front if it is smaller. Otherwise, it leaves it as is.
+static void UpdateQueueFront(HistoQueue* const histo_queue) {
+    if (histo_queue->queue[histo_queue->size].cost_diff >= 0) return;
+
+    if (histo_queue->queue[histo_queue->size].cost_diff < histo_queue->queue[0].cost_diff) {
+        SwapHistogramPairs(histo_queue->queue, histo_queue->queue + histo_queue->size);
+    }
+    ++histo_queue->size;
+
+    // We cannot add more elements than the capacity.
+    // The allocation adds an extra element to the official capacity so that
+    // histo_queue->queue[histo_queue->max_size] is read/written within bound.
+    assert(histo_queue->size <= histo_queue->max_size);
+}
+
+// -----------------------------------------------------------------------------
+
+static void PreparePair(
+    VP8LHistogram** histograms, int idx1, int idx2, HistogramPair* const pair, VP8LHistogram* const histos) {
+    if (idx1 > idx2) {
+        const int tmp = idx2;
+        idx2 = idx1;
+        idx1 = tmp;
+    }
+    pair->idx1 = idx1;
+    pair->idx2 = idx2;
+    pair->cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2], histos, 0);
+    pair->cost_combo = histos->bit_cost_;
+}
+
+// Combines histograms by continuously choosing the one with the highest cost
+// reduction.
+static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, VP8LHistogram* const histos) {
+    int ok = 0;
+    int image_histo_size = image_histo->size;
+    int i, j;
+    VP8LHistogram** const histograms = image_histo->histograms;
+    // Indexes of remaining histograms.
+    int* const clusters = WebPSafeMalloc(image_histo_size, sizeof(*clusters));
+    // Priority queue of histogram pairs.
+    HistoQueue histo_queue;
+
+    if (!HistoQueueInit(&histo_queue, image_histo_size) || clusters == NULL) {
+        goto End;
+    }
+
+    for (i = 0; i < image_histo_size; ++i) {
+        // Initialize clusters indexes.
+        clusters[i] = i;
+        for (j = i + 1; j < image_histo_size; ++j) {
+            // Initialize positions array.
+            PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size], histos);
+            UpdateQueueFront(&histo_queue);
+        }
+    }
+
+    while (image_histo_size > 1 && histo_queue.size > 0) {
+        HistogramPair* copy_to;
+        const int idx1 = histo_queue.queue[0].idx1;
+        const int idx2 = histo_queue.queue[0].idx2;
+        VP8LHistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);
+        histograms[idx1]->bit_cost_ = histo_queue.queue[0].cost_combo;
+        // Remove merged histogram.
+        for (i = 0; i + 1 < image_histo_size; ++i) {
+            if (clusters[i] >= idx2) {
+                clusters[i] = clusters[i + 1];
+            }
+        }
+        --image_histo_size;
+
+        // Remove pairs intersecting the just combined best pair. This will
+        // therefore pop the head of the queue.
+        copy_to = histo_queue.queue;
+        for (i = 0; i < histo_queue.size; ++i) {
+            HistogramPair* const p = histo_queue.queue + i;
+            if (p->idx1 == idx1 || p->idx2 == idx1 || p->idx1 == idx2 || p->idx2 == idx2) {
+                // Do not copy the invalid pair.
+                continue;
+            }
+            if (p->cost_diff < histo_queue.queue[0].cost_diff) {
+                // Replace the top of the queue if we found better.
+                SwapHistogramPairs(histo_queue.queue, p);
+            }
+            SwapHistogramPairs(copy_to, p);
+            ++copy_to;
+        }
+        histo_queue.size = (int)(copy_to - histo_queue.queue);
+
+        // Push new pairs formed with combined histogram to the queue.
+        for (i = 0; i < image_histo_size; ++i) {
+            if (clusters[i] != idx1) {
+                PreparePair(histograms, idx1, clusters[i], &histo_queue.queue[histo_queue.size], histos);
+                UpdateQueueFront(&histo_queue);
+            }
+        }
+    }
+    // Move remaining histograms to the beginning of the array.
+    for (i = 0; i < image_histo_size; ++i) {
+        if (i != clusters[i]) { // swap the two histograms
+            HistogramSwap(&histograms[i], &histograms[clusters[i]]);
+        }
+    }
+
+    image_histo->size = image_histo_size;
+    ok = 1;
+
+End:
+    WebPSafeFree(clusters);
+    HistoQueueClear(&histo_queue);
+    return ok;
+}
+
+static VP8LHistogram* HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
+                                                 VP8LHistogram* tmp_histo,
+                                                 VP8LHistogram* best_combo,
+                                                 int quality,
+                                                 int min_cluster_size) {
+    int iter;
+    uint32_t seed = 0;
+    int tries_with_no_success = 0;
+    int image_histo_size = image_histo->size;
+    const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8;
+    const int outer_iters = image_histo_size * iter_mult;
+    const int num_pairs = image_histo_size / 2;
+    const int num_tries_no_success = outer_iters / 2;
+    VP8LHistogram** const histograms = image_histo->histograms;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    // Collapse similar histograms in 'image_histo'.
+    ++min_cluster_size;
+    for (iter = 0; iter < outer_iters && image_histo_size >= min_cluster_size; ++iter) {
+        double best_cost_diff = 0.;
+        int best_idx1 = -1, best_idx2 = 1;
+        int j;
+        const int num_tries = (num_pairs < image_histo_size) ? num_pairs : image_histo_size;
+        seed += iter;
+        for (j = 0; j < num_tries; ++j) {
+            double curr_cost_diff;
+            // Choose two histograms at random and try to combine them.
+            const uint32_t idx1 = MyRand(&seed) % image_histo_size;
+            const uint32_t tmp = (j & 7) + 1;
+            const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1);
+            const uint32_t idx2 = (idx1 + diff + 1) % image_histo_size;
+            if (idx1 == idx2) {
+                continue;
+            }
+
+            // Calculate cost reduction on combining.
+            curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2], tmp_histo, best_cost_diff);
+            if (curr_cost_diff < best_cost_diff) { // found a better pair?
+                HistogramSwap(&best_combo, &tmp_histo);
+                best_cost_diff = curr_cost_diff;
+                best_idx1 = idx1;
+                best_idx2 = idx2;
+            }
+        }
+
+        if (best_idx1 >= 0) {
+            HistogramSwap(&best_combo, &histograms[best_idx1]);
+            // swap best_idx2 slot with last one (which is now unused)
+            --image_histo_size;
+            if (best_idx2 != image_histo_size) {
+                HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]);
+                histograms[image_histo_size] = NULL;
+            }
+            tries_with_no_success = 0;
+        }
+        if (++tries_with_no_success >= num_tries_no_success) {
+            break;
+        }
+    }
+    image_histo->size = image_histo_size;
+    StopProfiling(&stop_watch, &timeHistogramCombineStochastic, &countHistogramCombineStochastic);
+    return best_combo;
+}
+
+typedef struct HistogramAddThreshWorkerData {
+    VP8LHistogramSet* orig_histo;
+    VP8LHistogramSet* image_histo;
+    uint16_t* symbols;
+
+    int start;
+    int stop;
+    int num_workers;
+    int worker_index;
+} HistoWorkerData;
+
+static HistoWorkerData* HistogramAddThreshAlloc(HistoWorkerData* worker_data, int num_workers) {
+    worker_data = (HistoWorkerData*)WebPSafeMalloc((uint64_t)num_workers, sizeof(*worker_data));
+    if (worker_data == NULL) {
+        return NULL;
+    }
+
+    worker_data->num_workers = num_workers;
+
+    return worker_data;
+}
+
+// Deallocate ip synchronization related mutex and data
+static void HistogramAddThreshDealloc(HistoWorkerData* worker_data) {
+    if (worker_data != NULL) {
+        WebPSafeFree(worker_data);
+        // clear the structure as the source of this call may be a resize in which
+        // case this call will be followed by an _alloc() which may fail.
+        // vp9_zero(*worker_data);
+        memset(&worker_data, 0, sizeof(worker_data));
+    }
+}
+
+static void ThreadHistogramAddThresh(HistoWorkerData* const worker_data) {
+    VP8LHistogram** const orig_histograms = worker_data->orig_histo->histograms;
+    VP8LHistogram** const histograms = worker_data->image_histo->histograms;
+    uint16_t* const symbols = worker_data->symbols;
+
+    // const int orig_histo_size = worker_data->orig_histo->size;
+    const int image_histo_size = worker_data->image_histo->size;
+    int start = worker_data->start;
+    int stop = worker_data->stop;
+
+    int i = 0;
+    for (i = start; i < stop; ++i) {
+        int best_out = 0;
+        double best_bits = HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST);
+        int k;
+        for (k = 1; k < image_histo_size; ++k) {
+            const double cur_bits = HistogramAddThresh(histograms[k], orig_histograms[i], best_bits);
+            if (cur_bits < best_bits) {
+                best_bits = cur_bits;
+                best_out = k;
+            }
+        }
+        symbols[i] = best_out;
+    }
+}
+
+// Row-based multi-threaded loopfilter hook
+static int HistogramAddThreshWorker(HistoWorkerData* const worker_data, HistoWorkerData* const _worker_data) {
+    ThreadHistogramAddThresh(worker_data);
+
+    return 1;
+}
+
+static void HistogramAddThreshMt(const VP8LHistogramSet* const orig_histo,
+                                 const VP8LHistogramSet* const image_histo,
+                                 uint16_t* const symbols,
+                                 int number_thread) {
+    const int orig_histo_size = orig_histo->size;
+    const WebPWorkerInterface* const winterface = WebPGetWorkerInterface();
+
+    int num_workers = number_thread;
+    // msg("num_workers:%d", num_workers);
+    WebPWorker* workers = (WebPWorker*)WebPSafeMalloc(num_workers, sizeof(*workers));
+    if (workers == NULL) {
+        fprintf(stderr, "Workers creation failed\n");
+        return;
+    }
+
+    int n = 0;
+    for (n = 0; n < num_workers; ++n) {
+        WebPWorker* const worker = &workers[n];
+        // ++pbi->num_tile_workers;
+
+        winterface->Init(worker);
+        if (n < num_workers - 1 && !winterface->Reset(worker)) {
+            fprintf(stderr, "encoder threads for HistogramAddThreshMt creation failed\n");
+            return;
+        }
+    }
+
+    HistoWorkerData* worker_data = HistogramAddThreshAlloc(worker_data, num_workers);
+    if (NULL == worker_data) {
+        fprintf(stderr, "memory creation failed\n");
+        return;
+    }
+
+    int range = orig_histo_size / num_workers;
+    int i = 0;
+    for (i = 0; i < num_workers; ++i) {
+        WebPWorker* const worker = &workers[i];
+        // msg("worker_data[i]->num_workers:%d \n", worker_data[i].num_workers);
+        HistoWorkerData* const _worker_data = &worker_data[i];
+
+        worker->hook = (WebPWorkerHook)HistogramAddThreshWorker;
+        worker->data1 = _worker_data;
+        worker->data2 = _worker_data;
+
+        _worker_data->start = range * i;
+        if (i != num_workers - 1) {
+            _worker_data->stop = range * (i + 1);
+        } else {
+            _worker_data->stop = orig_histo_size;
+        }
+
+        // msg("_worker_data->start:%d \n", _worker_data->start);
+        // msg("_worker_data->stop:%d \n", _worker_data->stop);
+        _worker_data->worker_index = i;
+        _worker_data->orig_histo = orig_histo;
+        _worker_data->image_histo = image_histo;
+        _worker_data->symbols = symbols;
+
+        // Start loopfiltering
+        if (i == num_workers - 1) {
+            winterface->Execute(worker);
+        } else {
+            winterface->Launch(worker);
+        }
+    }
+
+    // Wait till all workers are finished
+    for (i = 0; i < num_workers; ++i) {
+        winterface->Sync(&workers[i]);
+    }
+    for (i = 0; i < num_workers; ++i) {
+        winterface->End(&workers[i]);
+    }
+
+    if (worker_data->num_workers > 0) {
+        HistogramAddThreshDealloc(worker_data);
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+static void HistogramRemap(const VP8LHistogramSet* const orig_histo,
+                           const VP8LHistogramSet* const image_histo,
+                           uint16_t* const symbols,
+                           int number_thread) {
+    int i;
+    VP8LHistogram** const orig_histograms = orig_histo->histograms;
+    VP8LHistogram** const histograms = image_histo->histograms;
+    const int orig_histo_size = orig_histo->size;
+    const int image_histo_size = image_histo->size;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    if (image_histo_size > 1) {
+#ifdef WEBP_USE_THREAD
+        if (number_thread > 0) {
+            HistogramAddThreshMt(orig_histo, image_histo, symbols, number_thread);
+        } else {
+            for (i = 0; i < orig_histo_size; ++i) {
+                int best_out = 0;
+                double best_bits = HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST);
+                int k;
+                for (k = 1; k < image_histo_size; ++k) {
+                    const double cur_bits = HistogramAddThresh(histograms[k], orig_histograms[i], best_bits);
+                    if (cur_bits < best_bits) {
+                        best_bits = cur_bits;
+                        best_out = k;
+                    }
+                }
+                symbols[i] = best_out;
+            }
+        }
+#else
+        for (i = 0; i < orig_histo_size; ++i) {
+            int best_out = 0;
+            double best_bits = HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST);
+            int k;
+            for (k = 1; k < image_histo_size; ++k) {
+                const double cur_bits = HistogramAddThresh(histograms[k], orig_histograms[i], best_bits);
+                if (cur_bits < best_bits) {
+                    best_bits = cur_bits;
+                    best_out = k;
+                }
+            }
+            symbols[i] = best_out;
+        }
+#endif
+    } else {
+        assert(image_histo_size == 1);
+        for (i = 0; i < orig_histo_size; ++i) {
+            symbols[i] = 0;
+        }
+    }
+
+    // Recompute each out based on raw and symbols.
+    for (i = 0; i < image_histo_size; ++i) {
+        HistogramClear(histograms[i]);
+    }
+
+    for (i = 0; i < orig_histo_size; ++i) {
+        const int idx = symbols[i];
+        VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]);
+    }
+    StopProfiling(&stop_watch, &timeHistogramRemap, &countHistogramRemap);
+}
+
+static double GetCombineCostFactor(int histo_size, int quality) {
+    double combine_cost_factor = 0.16;
+    if (quality < 90) {
+        if (histo_size > 256) combine_cost_factor /= 2.;
+        if (histo_size > 512) combine_cost_factor /= 2.;
+        if (histo_size > 1024) combine_cost_factor /= 2.;
+        if (quality <= 50) combine_cost_factor /= 2.;
+    }
+    return combine_cost_factor;
+}
+
+int VP8LGetHistoImageSymbols(int xsize,
+                             int ysize,
+                             const VP8LBackwardRefs* const refs,
+                             int quality,
+                             int low_effort,
+                             int histo_bits,
+                             int cache_bits,
+                             VP8LHistogramSet* const image_histo,
+                             VP8LHistogramSet* const tmp_histos,
+                             uint16_t* const histogram_symbols,
+                             int number_thread) {
+    int ok = 0;
+    const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
+    const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
+    const int image_histo_raw_size = histo_xsize * histo_ysize;
+    const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
+
+    // The bin_map for every bin follows following semantics:
+    // bin_map[n][0] = num_histo; // The number of histograms in that bin.
+    // bin_map[n][1] = index of first histogram in that bin;
+    // bin_map[n][num_histo] = index of last histogram in that bin;
+    // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = unused indices.
+    const int bin_depth = image_histo_raw_size + 1;
+    int16_t* bin_map = NULL;
+    VP8LHistogramSet* const orig_histo = VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
+    VP8LHistogram* cur_combo;
+    const int entropy_combine = (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    if (orig_histo == NULL) goto Error;
+
+    // Don't attempt linear bin-partition heuristic for:
+    // histograms of small sizes, as bin_map will be very sparse and;
+    // Maximum quality (q==100), to preserve the compression gains at that level.
+    if (entropy_combine) {
+        const int bin_map_size = bin_depth * entropy_combine_num_bins;
+        bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map));
+        if (bin_map == NULL) goto Error;
+    }
+
+    // Construct the histograms from backward references.
+    HistogramBuild(xsize, histo_bits, refs, orig_histo);
+    // Copies the histograms and computes its bit_cost.
+    HistogramCopyAndAnalyze(orig_histo, image_histo);
+
+    cur_combo = tmp_histos->histograms[1]; // pick up working slot
+    if (entropy_combine) {
+        const double combine_cost_factor = GetCombineCostFactor(image_histo_raw_size, quality);
+        HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
+        // Collapse histograms with similar entropy.
+        cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map, bin_depth, entropy_combine_num_bins,
+                                               combine_cost_factor, low_effort);
+    }
+
+    // Don't combine the histograms using stochastic and greedy heuristics for
+    // low-effort compression mode.
+    if (!low_effort || !entropy_combine) {
+        const float x = quality / 100.f;
+        // cubic ramp between 1 and MAX_HISTO_GREEDY:
+        const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
+        cur_combo =
+            HistogramCombineStochastic(image_histo, tmp_histos->histograms[0], cur_combo, quality, threshold_size);
+        if ((image_histo->size <= threshold_size) && !HistogramCombineGreedy(image_histo, cur_combo)) {
+            goto Error;
+        }
+    }
+
+    // TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also.
+    // Find the optimal map from original histograms to the final ones.
+    HistogramRemap(orig_histo, image_histo, histogram_symbols, number_thread);
+
+    ok = 1;
+
+Error:
+    WebPSafeFree(bin_map);
+    VP8LFreeHistogramSet(orig_histo);
+    StopProfiling(&stop_watch, &timeGetHistoImg, &countGetHistoImg);
+    return ok;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/enc/histogram.h b/codec/L2/demos/webpEnc/host/src/enc/histogram.h
new file mode 100644
index 0000000000..4a712b1eb1
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/histogram.h
@@ -0,0 +1,121 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Models the histograms of literal and distance codes.
+
+#ifndef WEBP_ENC_HISTOGRAM_H_
+#define WEBP_ENC_HISTOGRAM_H_
+
+#include <string.h>
+
+#include "./backward_references.h"
+#include "../webp/format_constants.h"
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Not a trivial literal symbol.
+#define VP8L_NON_TRIVIAL_SYM (0xffffffff)
+
+// A simple container for histograms of data.
+typedef struct {
+    // literal_ contains green literal, palette-code and
+    // copy-length-prefix histogram
+    uint32_t* literal_; // Pointer to the allocated buffer for literal.
+    uint32_t red_[NUM_LITERAL_CODES];
+    uint32_t blue_[NUM_LITERAL_CODES];
+    uint32_t alpha_[NUM_LITERAL_CODES];
+    // Backward reference prefix-code histogram.
+    uint32_t distance_[NUM_DISTANCE_CODES];
+    int palette_code_bits_;
+    uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha
+                              // literal symbols are single valued.
+    double bit_cost_;         // cached value of bit cost.
+    double literal_cost_;     // Cached values of dominant entropy costs:
+    double red_cost_;         // literal, red & blue.
+    double blue_cost_;
+} VP8LHistogram;
+
+// Collection of histograms with fixed capacity, allocated as one
+// big memory chunk. Can be destroyed by calling WebPSafeFree().
+typedef struct {
+    int size;     // number of slots currently in use
+    int max_size; // maximum capacity
+    VP8LHistogram** histograms;
+} VP8LHistogramSet;
+
+// Create the histogram.
+//
+// The input data is the PixOrCopy data, which models the literals, stop
+// codes and backward references (both distances and lengths).  Also: if
+// palette_code_bits is >= 0, initialize the histogram with this value.
+void VP8LHistogramCreate(VP8LHistogram* const p, const VP8LBackwardRefs* const refs, int palette_code_bits);
+
+// Return the size of the histogram for a given palette_code_bits.
+int VP8LGetHistogramSize(int palette_code_bits);
+
+// Set the palette_code_bits and reset the stats.
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits);
+
+// Collect all the references into a histogram (without reset)
+void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs, VP8LHistogram* const histo);
+
+// Free the memory allocated for the histogram.
+void VP8LFreeHistogram(VP8LHistogram* const histo);
+
+// Free the memory allocated for the histogram set.
+void VP8LFreeHistogramSet(VP8LHistogramSet* const histo);
+
+// Allocate an array of pointer to histograms, allocated and initialized
+// using 'cache_bits'. Return NULL in case of memory error.
+VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits);
+
+// Allocate and initialize histogram object with specified 'cache_bits'.
+// Returns NULL in case of memory error.
+// Special case of VP8LAllocateHistogramSet, with size equals 1.
+VP8LHistogram* VP8LAllocateHistogram(int cache_bits);
+
+// Accumulate a token 'v' into a histogram.
+void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, const PixOrCopy* const v);
+
+static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
+    return NUM_LITERAL_CODES + NUM_LENGTH_CODES + ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
+}
+
+// Builds the histogram image.
+int VP8LGetHistoImageSymbols(int xsize,
+                             int ysize,
+                             const VP8LBackwardRefs* const refs,
+                             int quality,
+                             int low_effort,
+                             int histogram_bits,
+                             int cache_bits,
+                             VP8LHistogramSet* const image_in,
+                             VP8LHistogramSet* const tmp_histos,
+                             uint16_t* const histogram_symbols,
+                             int number_thread);
+
+// Returns the entropy for the symbols in the input array.
+// Also sets trivial_symbol to the code value, if the array has only one code
+// value. Otherwise, set it to VP8L_NON_TRIVIAL_SYM.
+double VP8LBitsEntropy(const uint32_t* const array, int n, uint32_t* const trivial_symbol);
+
+// Estimate how many bits the combined entropy of literals and distance
+// approximately maps to.
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBP_ENC_HISTOGRAM_H_
diff --git a/codec/L2/demos/webpEnc/host/src/enc/iterator.c b/codec/L2/demos/webpEnc/host/src/enc/iterator.c
new file mode 100644
index 0000000000..b7a18a6bf3
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/iterator.c
@@ -0,0 +1,445 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// VP8Iterator: block iterator
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <string.h>
+
+#include "./vp8enci.h"
+
+//------------------------------------------------------------------------------
+// VP8Iterator
+//------------------------------------------------------------------------------
+
+static void InitLeft(VP8EncIterator* const it) {
+    it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = (it->y_ > 0) ? 129 : 127;
+    memset(it->y_left_, 129, 16);
+    memset(it->u_left_, 129, 8);
+    memset(it->v_left_, 129, 8);
+    it->left_nz_[8] = 0;
+}
+
+static void InitTop(VP8EncIterator* const it) {
+    const VP8Encoder* const enc = it->enc_;
+    const size_t top_size = enc->mb_w_ * 16;
+    memset(enc->y_top_, 127, 2 * top_size);
+    memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
+}
+
+void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
+    VP8Encoder* const enc = it->enc_;
+    it->x_ = 0;
+    it->y_ = y;
+    it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)];
+    it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;
+    it->nz_ = enc->nz_;
+    it->mb_ = enc->mb_info_ + y * enc->mb_w_;
+    it->y_top_ = enc->y_top_;
+    it->uv_top_ = enc->uv_top_;
+    InitLeft(it);
+}
+
+void VP8IteratorReset(VP8EncIterator* const it) {
+    VP8Encoder* const enc = it->enc_;
+    VP8IteratorSetRow(it, 0);
+    VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_); // default
+    InitTop(it);
+    InitLeft(it);
+    memset(it->bit_count_, 0, sizeof(it->bit_count_));
+    it->do_trellis_ = 0;
+}
+
+void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) {
+    it->count_down_ = it->count_down0_ = count_down;
+}
+
+int VP8IteratorIsDone(const VP8EncIterator* const it) {
+    return (it->count_down_ <= 0);
+}
+
+void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
+    it->enc_ = enc;
+    it->y_stride_ = enc->pic_->y_stride;
+    it->uv_stride_ = enc->pic_->uv_stride;
+    it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_);
+    it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC;
+    it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC;
+    it->yuv_p_ = it->yuv_out2_ + YUV_SIZE_ENC;
+    it->lf_stats_ = enc->lf_stats_;
+    it->percent0_ = enc->percent_;
+    it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);
+    it->u_left_ = it->y_left_ + 16 + 16;
+    it->v_left_ = it->u_left_ + 16;
+    VP8IteratorReset(it);
+}
+
+int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
+    VP8Encoder* const enc = it->enc_;
+    if (delta && enc->pic_->progress_hook != NULL) {
+        const int done = it->count_down0_ - it->count_down_;
+        const int percent = (it->count_down0_ <= 0) ? it->percent0_ : it->percent0_ + delta * done / it->count_down0_;
+        return WebPReportProgress(enc->pic_, percent, &enc->percent_);
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Import the source samples into the cache. Takes care of replicating
+// boundary pixels if necessary.
+
+static WEBP_INLINE int MinSize(int a, int b) {
+    return (a < b) ? a : b;
+}
+
+static void ImportBlock(const uint8_t* src, int src_stride, uint8_t* dst, int w, int h, int size) {
+    int i;
+    for (i = 0; i < h; ++i) {
+        memcpy(dst, src, w);
+        if (w < size) {
+            memset(dst + w, dst[w - 1], size - w);
+        }
+        dst += BPS;
+        src += src_stride;
+    }
+    for (i = h; i < size; ++i) {
+        memcpy(dst, dst - BPS, size);
+        dst += BPS;
+    }
+}
+
+static void ImportLine(const uint8_t* src, int src_stride, uint8_t* dst, int len, int total_len) {
+    int i;
+    for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src;
+    for (; i < total_len; ++i) dst[i] = dst[len - 1];
+}
+
+void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32) {
+    const VP8Encoder* const enc = it->enc_;
+    const int x = it->x_, y = it->y_;
+    const WebPPicture* const pic = enc->pic_;
+    const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16;
+    const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;
+    const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;
+    const int w = MinSize(pic->width - x * 16, 16);
+    const int h = MinSize(pic->height - y * 16, 16);
+    const int uv_w = (w + 1) >> 1;
+    const int uv_h = (h + 1) >> 1;
+
+    ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF_ENC, w, h, 16);
+    ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8);
+    ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8);
+
+    if (tmp_32 == 0) return;
+
+    // Import source (uncompressed) samples into boundary.
+    if (x == 0) {
+        InitLeft(it);
+    } else {
+        if (y == 0) {
+            it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127;
+        } else {
+            it->y_left_[-1] = ysrc[-1 - pic->y_stride];
+            it->u_left_[-1] = usrc[-1 - pic->uv_stride];
+            it->v_left_[-1] = vsrc[-1 - pic->uv_stride];
+        }
+        ImportLine(ysrc - 1, pic->y_stride, it->y_left_, h, 16);
+        ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8);
+        ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8);
+    }
+
+    it->y_top_ = tmp_32 + 0;
+    it->uv_top_ = tmp_32 + 16;
+    if (y == 0) {
+        memset(tmp_32, 127, 32 * sizeof(*tmp_32));
+    } else {
+        ImportLine(ysrc - pic->y_stride, 1, tmp_32, w, 16);
+        ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16, uv_w, 8);
+        ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Copy back the compressed samples into user space if requested.
+
+static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride, int w, int h) {
+    while (h-- > 0) {
+        memcpy(dst, src, w);
+        dst += dst_stride;
+        src += BPS;
+    }
+}
+
+void VP8IteratorExport(const VP8EncIterator* const it) {
+    const VP8Encoder* const enc = it->enc_;
+    if (enc->config_->show_compressed) {
+        const int x = it->x_, y = it->y_;
+        const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
+        const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC;
+        const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC;
+        const WebPPicture* const pic = enc->pic_;
+        uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
+        uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
+        uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8;
+        int w = (pic->width - x * 16);
+        int h = (pic->height - y * 16);
+
+        if (w > 16) w = 16;
+        if (h > 16) h = 16;
+
+        // Luma plane
+        ExportBlock(ysrc, ydst, pic->y_stride, w, h);
+
+        { // U/V planes
+            const int uv_w = (w + 1) >> 1;
+            const int uv_h = (h + 1) >> 1;
+            ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h);
+            ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h);
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// Non-zero contexts setup/teardown
+
+// Nz bits:
+//  0  1  2  3  Y
+//  4  5  6  7
+//  8  9 10 11
+// 12 13 14 15
+// 16 17        U
+// 18 19
+// 20 21        V
+// 22 23
+// 24           DC-intra16
+
+// Convert packed context to byte array
+#define BIT(nz, n) (!!((nz) & (1 << (n))))
+
+void VP8IteratorNzToBytes(VP8EncIterator* const it) {
+    const int tnz = it->nz_[0], lnz = it->nz_[-1];
+    int* const top_nz = it->top_nz_;
+    int* const left_nz = it->left_nz_;
+
+    // Top-Y
+    top_nz[0] = BIT(tnz, 12);
+    top_nz[1] = BIT(tnz, 13);
+    top_nz[2] = BIT(tnz, 14);
+    top_nz[3] = BIT(tnz, 15);
+    // Top-U
+    top_nz[4] = BIT(tnz, 18);
+    top_nz[5] = BIT(tnz, 19);
+    // Top-V
+    top_nz[6] = BIT(tnz, 22);
+    top_nz[7] = BIT(tnz, 23);
+    // DC
+    top_nz[8] = BIT(tnz, 24);
+
+    // left-Y
+    left_nz[0] = BIT(lnz, 3);
+    left_nz[1] = BIT(lnz, 7);
+    left_nz[2] = BIT(lnz, 11);
+    left_nz[3] = BIT(lnz, 15);
+    // left-U
+    left_nz[4] = BIT(lnz, 17);
+    left_nz[5] = BIT(lnz, 19);
+    // left-V
+    left_nz[6] = BIT(lnz, 21);
+    left_nz[7] = BIT(lnz, 23);
+    // left-DC is special, iterated separately
+}
+
+void VP8IteratorBytesToNz(VP8EncIterator* const it) {
+    uint32_t nz = 0;
+    const int* const top_nz = it->top_nz_;
+    const int* const left_nz = it->left_nz_;
+    // top
+    nz |= (top_nz[0] << 12) | (top_nz[1] << 13);
+    nz |= (top_nz[2] << 14) | (top_nz[3] << 15);
+    nz |= (top_nz[4] << 18) | (top_nz[5] << 19);
+    nz |= (top_nz[6] << 22) | (top_nz[7] << 23);
+    nz |= (top_nz[8] << 24); // we propagate the _top_ bit, esp. for intra4
+    // left
+    nz |= (left_nz[0] << 3) | (left_nz[1] << 7);
+    nz |= (left_nz[2] << 11);
+    nz |= (left_nz[4] << 17) | (left_nz[6] << 21);
+
+    *it->nz_ = nz;
+}
+
+#undef BIT
+
+//------------------------------------------------------------------------------
+// Advance to the next position, doing the bookkeeping.
+
+void VP8IteratorSaveBoundary(VP8EncIterator* const it) {
+    VP8Encoder* const enc = it->enc_;
+    const int x = it->x_, y = it->y_;
+    const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
+    const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC;
+    if (x < enc->mb_w_ - 1) { // left
+        int i;
+        for (i = 0; i < 16; ++i) {
+            it->y_left_[i] = ysrc[15 + i * BPS];
+        }
+        for (i = 0; i < 8; ++i) {
+            it->u_left_[i] = uvsrc[7 + i * BPS];
+            it->v_left_[i] = uvsrc[15 + i * BPS];
+        }
+        // top-left (before 'top'!)
+        it->y_left_[-1] = it->y_top_[15];
+        it->u_left_[-1] = it->uv_top_[0 + 7];
+        it->v_left_[-1] = it->uv_top_[8 + 7];
+    }
+    if (y < enc->mb_h_ - 1) { // top
+        memcpy(it->y_top_, ysrc + 15 * BPS, 16);
+        memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8);
+    }
+}
+
+int VP8IteratorNext(VP8EncIterator* const it) {
+    it->preds_ += 4;
+    it->mb_ += 1;
+    it->nz_ += 1;
+    it->y_top_ += 16;
+    it->uv_top_ += 16;
+    it->x_ += 1;
+    if (it->x_ == it->enc_->mb_w_) {
+        VP8IteratorSetRow(it, ++it->y_);
+    }
+    return (0 < --it->count_down_);
+}
+
+//------------------------------------------------------------------------------
+// Helper function to set mode properties
+
+void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
+    uint8_t* preds = it->preds_;
+    int y;
+    for (y = 0; y < 4; ++y) {
+        memset(preds, mode, 4);
+        preds += it->enc_->preds_w_;
+    }
+    it->mb_->type_ = 1;
+}
+
+void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) {
+    uint8_t* preds = it->preds_;
+    int y;
+    for (y = 4; y > 0; --y) {
+        memcpy(preds, modes, 4 * sizeof(*modes));
+        preds += it->enc_->preds_w_;
+        modes += 4;
+    }
+    it->mb_->type_ = 0;
+}
+
+void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) {
+    it->mb_->uv_mode_ = mode;
+}
+
+void VP8SetSkip(const VP8EncIterator* const it, int skip) {
+    it->mb_->skip_ = skip;
+}
+
+void VP8SetSegment(const VP8EncIterator* const it, int segment) {
+    it->mb_->segment_ = segment;
+}
+
+//------------------------------------------------------------------------------
+// Intra4x4 sub-blocks iteration
+//
+//  We store and update the boundary samples into an array of 37 pixels. They
+//  are updated as we iterate and reconstructs each intra4x4 blocks in turn.
+//  The position of the samples has the following snake pattern:
+//
+// 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36  <- Top-right
+// --+-----------+-----------+-----------+-----------+
+// 15|         19|         23|         27|         31|
+// 14|         18|         22|         26|         30|
+// 13|         17|         21|         25|         29|
+// 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28|
+// --+-----------+-----------+-----------+-----------+
+// 11|         15|         19|         23|         27|
+// 10|         14|         18|         22|         26|
+//  9|         13|         17|         21|         25|
+//  8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24|
+// --+-----------+-----------+-----------+-----------+
+//  7|         11|         15|         19|         23|
+//  6|         10|         14|         18|         22|
+//  5|          9|         13|         17|         21|
+//  4| 5  6  7  8| 9 10 11 12|13 14 15 16|17 18 19 20|
+// --+-----------+-----------+-----------+-----------+
+//  3|          7|         11|         15|         19|
+//  2|          6|         10|         14|         18|
+//  1|          5|          9|         13|         17|
+//  0| 1  2  3  4| 5  6  7  8| 9 10 11 12|13 14 15 16|
+// --+-----------+-----------+-----------+-----------+
+
+// Array to record the position of the top sample to pass to the prediction
+// functions in dsp.c.
+static const uint8_t VP8TopLeftI4[16] = {17, 21, 25, 29, 13, 17, 21, 25, 9, 13, 17, 21, 5, 9, 13, 17};
+
+void VP8IteratorStartI4(VP8EncIterator* const it) {
+    const VP8Encoder* const enc = it->enc_;
+    int i;
+
+    it->i4_ = 0; // first 4x4 sub-block
+    it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0];
+
+    // Import the boundary samples
+    for (i = 0; i < 17; ++i) { // left
+        it->i4_boundary_[i] = it->y_left_[15 - i];
+    }
+    for (i = 0; i < 16; ++i) { // top
+        it->i4_boundary_[17 + i] = it->y_top_[i];
+    }
+    // top-right samples have a special case on the far right of the picture
+    if (it->x_ < enc->mb_w_ - 1) {
+        for (i = 16; i < 16 + 4; ++i) {
+            it->i4_boundary_[17 + i] = it->y_top_[i];
+        }
+    } else { // else, replicate the last valid pixel four times
+        for (i = 16; i < 16 + 4; ++i) {
+            it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
+        }
+    }
+    VP8IteratorNzToBytes(it); // import the non-zero context
+}
+
+int VP8IteratorRotateI4(VP8EncIterator* const it, const uint8_t* const yuv_out) {
+    const uint8_t* const blk = yuv_out + VP8Scan[it->i4_];
+    uint8_t* const top = it->i4_top_;
+    int i;
+
+    // Update the cache with 7 fresh samples
+    for (i = 0; i <= 3; ++i) {
+        top[-4 + i] = blk[i + 3 * BPS]; // store future top samples
+    }
+    if ((it->i4_ & 3) != 3) {      // if not on the right sub-blocks #3, #7, #11, #15
+        for (i = 0; i <= 2; ++i) { // store future left samples
+            top[i] = blk[3 + (2 - i) * BPS];
+        }
+    } else { // else replicate top-right samples, as says the specs.
+        for (i = 0; i <= 3; ++i) {
+            top[i] = top[i + 4];
+        }
+    }
+    // move pointers to next sub-block
+    ++it->i4_;
+    if (it->i4_ == 16) { // we're done
+        return 0;
+    }
+
+    it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_];
+    return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/kernel/oclErrorCodes.cpp b/codec/L2/demos/webpEnc/host/src/enc/kernel/oclErrorCodes.cpp
new file mode 100644
index 0000000000..db48073a00
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/kernel/oclErrorCodes.cpp
@@ -0,0 +1,86 @@
+
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <map>
+#include <string>
+
+#include <CL/cl.h>
+
+#define TO_STRING(x) #x
+
+static const std::pair<cl_int, std::string> map_pairs[] = {
+    std::make_pair(CL_SUCCESS, TO_STRING(CL_SUCCESS)),
+    std::make_pair(CL_DEVICE_NOT_FOUND, TO_STRING(CL_DEVICE_NOT_FOUND)),
+    std::make_pair(CL_DEVICE_NOT_AVAILABLE, TO_STRING(CL_DEVICE_NOT_AVAILABLE)),
+    std::make_pair(CL_COMPILER_NOT_AVAILABLE, TO_STRING(CL_COMPILER_NOT_AVAILABLE)),
+    std::make_pair(CL_MEM_OBJECT_ALLOCATION_FAILURE, TO_STRING(CL_MEM_OBJECT_ALLOCATION_FAILURE)),
+    std::make_pair(CL_OUT_OF_RESOURCES, TO_STRING(CL_OUT_OF_RESOURCES)),
+    std::make_pair(CL_OUT_OF_HOST_MEMORY, TO_STRING(CL_OUT_OF_HOST_MEMORY)),
+    std::make_pair(CL_PROFILING_INFO_NOT_AVAILABLE, TO_STRING(CL_PROFILING_INFO_NOT_AVAILABLE)),
+    std::make_pair(CL_MEM_COPY_OVERLAP, TO_STRING(CL_MEM_COPY_OVERLAP)),
+    std::make_pair(CL_IMAGE_FORMAT_MISMATCH, TO_STRING(CL_IMAGE_FORMAT_MISMATCH)),
+    std::make_pair(CL_IMAGE_FORMAT_NOT_SUPPORTED, TO_STRING(CL_IMAGE_FORMAT_NOT_SUPPORTED)),
+    std::make_pair(CL_BUILD_PROGRAM_FAILURE, TO_STRING(CL_BUILD_PROGRAM_FAILURE)),
+    std::make_pair(CL_MAP_FAILURE, TO_STRING(CL_MAP_FAILURE)),
+    std::make_pair(CL_MISALIGNED_SUB_BUFFER_OFFSET, TO_STRING(CL_MISALIGNED_SUB_BUFFER_OFFSET)),
+    std::make_pair(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, TO_STRING(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_W)),
+    std::make_pair(CL_INVALID_VALUE, TO_STRING(CL_INVALID_VALUE)),
+    std::make_pair(CL_INVALID_DEVICE_TYPE, TO_STRING(CL_INVALID_DEVICE_TYPE)),
+    std::make_pair(CL_INVALID_PLATFORM, TO_STRING(CL_INVALID_PLATFORM)),
+    std::make_pair(CL_INVALID_DEVICE, TO_STRING(CL_INVALID_DEVICE)),
+    std::make_pair(CL_INVALID_CONTEXT, TO_STRING(CL_INVALID_CONTEXT)),
+    std::make_pair(CL_INVALID_QUEUE_PROPERTIES, TO_STRING(CL_INVALID_QUEUE_PROPERTIES)),
+    std::make_pair(CL_INVALID_COMMAND_QUEUE, TO_STRING(CL_INVALID_COMMAND_QUEUE)),
+    std::make_pair(CL_INVALID_HOST_PTR, TO_STRING(CL_INVALID_HOST_PTR)),
+    std::make_pair(CL_INVALID_MEM_OBJECT, TO_STRING(CL_INVALID_MEM_OBJECT)),
+    std::make_pair(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, TO_STRING(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)),
+    std::make_pair(CL_INVALID_IMAGE_SIZE, TO_STRING(CL_INVALID_IMAGE_SIZE)),
+    std::make_pair(CL_INVALID_SAMPLER, TO_STRING(CL_INVALID_SAMPLER)),
+    std::make_pair(CL_INVALID_BINARY, TO_STRING(CL_INVALID_BINARY)),
+    std::make_pair(CL_INVALID_BUILD_OPTIONS, TO_STRING(CL_INVALID_BUILD_OPTIONS)),
+    std::make_pair(CL_INVALID_PROGRAM, TO_STRING(CL_INVALID_PROGRAM)),
+    std::make_pair(CL_INVALID_PROGRAM_EXECUTABLE, TO_STRING(CL_INVALID_PROGRAM_EXECUTABLE)),
+    std::make_pair(CL_INVALID_KERNEL_NAME, TO_STRING(CL_INVALID_KERNEL_NAME)),
+    std::make_pair(CL_INVALID_KERNEL_DEFINITION, TO_STRING(CL_INVALID_KERNEL_DEFINITION)),
+    std::make_pair(CL_INVALID_KERNEL, TO_STRING(CL_INVALID_KERNEL)),
+    std::make_pair(CL_INVALID_ARG_INDEX, TO_STRING(CL_INVALID_ARG_INDEX)),
+    std::make_pair(CL_INVALID_ARG_VALUE, TO_STRING(CL_INVALID_ARG_VALUE)),
+    std::make_pair(CL_INVALID_ARG_SIZE, TO_STRING(CL_INVALID_ARG_SIZE)),
+    std::make_pair(CL_INVALID_KERNEL_ARGS, TO_STRING(CL_INVALID_KERNEL_ARGS)),
+    std::make_pair(CL_INVALID_WORK_DIMENSION, TO_STRING(CL_INVALID_WORK_DIMENSION)),
+    std::make_pair(CL_INVALID_WORK_GROUP_SIZE, TO_STRING(CL_INVALID_WORK_GROUP_SIZE)),
+    std::make_pair(CL_INVALID_WORK_ITEM_SIZE, TO_STRING(CL_INVALID_WORK_ITEM_SIZE)),
+    std::make_pair(CL_INVALID_GLOBAL_OFFSET, TO_STRING(CL_INVALID_GLOBAL_OFFSET)),
+    std::make_pair(CL_INVALID_EVENT_WAIT_LIST, TO_STRING(CL_INVALID_EVENT_WAIT_LIST)),
+    std::make_pair(CL_INVALID_EVENT, TO_STRING(CL_INVALID_EVENT)),
+    std::make_pair(CL_INVALID_OPERATION, TO_STRING(CL_INVALID_OPERATION)),
+    std::make_pair(CL_INVALID_GL_OBJECT, TO_STRING(CL_INVALID_GL_OBJECT)),
+    std::make_pair(CL_INVALID_BUFFER_SIZE, TO_STRING(CL_INVALID_BUFFER_SIZE)),
+    std::make_pair(CL_INVALID_MIP_LEVEL, TO_STRING(CL_INVALID_MIP_LEVEL)),
+    std::make_pair(CL_INVALID_GLOBAL_WORK_SIZE, TO_STRING(CL_INVALID_GLOBAL_WORK_SIZE)),
+    std::make_pair(CL_INVALID_PROPERTY, TO_STRING(CL_INVALID_PROPERTY))};
+
+static const std::map<cl_int, std::string> oclErrorCodes(map_pairs,
+                                                         map_pairs + sizeof(map_pairs) / sizeof(map_pairs[0]));
+
+extern "C" const char* oclErrorCode(cl_int code) {
+    std::map<cl_int, std::string>::const_iterator iter = oclErrorCodes.find(code);
+    if (iter == oclErrorCodes.end())
+        return "UNKNOWN ERROR";
+    else
+        return iter->second.c_str();
+}
diff --git a/codec/L2/demos/webpEnc/host/src/enc/kernel/oclHelper.cpp b/codec/L2/demos/webpEnc/host/src/enc/kernel/oclHelper.cpp
new file mode 100644
index 0000000000..6788c53fd1
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/kernel/oclHelper.cpp
@@ -0,0 +1,325 @@
+
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oclHelper.h"
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <stdio.h>
+
+static int loadFile2Memory(const char* filename, char** result) {
+    int size = 0;
+
+    std::ifstream stream(filename, std::ifstream::binary);
+    if (!stream) {
+        return -1;
+    }
+
+    stream.seekg(0, stream.end);
+    size = stream.tellg();
+    stream.seekg(0, stream.beg);
+
+    *result = new char[size + 1];
+    stream.read(*result, size);
+    if (!stream) {
+        return -2;
+    }
+    stream.close();
+    (*result)[size] = 0;
+    return size;
+}
+
+static void getDeviceVersion(oclHardware& hardware) {
+    char versionString[512];
+    size_t size = 0;
+    cl_int err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_VERSION, 511, versionString, &size);
+    if (err != CL_SUCCESS) {
+        std::cout << oclErrorCode(err) << "\n";
+        return;
+    }
+    unsigned major = 0;
+    unsigned minor = 0;
+    unsigned state = 0;
+    for (size_t i = 0; i < size; i++) {
+        if (versionString[i] == ' ') {
+            state++;
+            continue;
+        }
+        if (versionString[i] == '.') {
+            state++;
+            continue;
+        }
+        if (state == 0) {
+            continue;
+        }
+        if (state == 1) {
+            major *= 10;
+            major += (versionString[i] - '0');
+            continue;
+        }
+        if (state == 2) {
+            minor *= 10;
+            minor += (versionString[i] - '0');
+            continue;
+        }
+        break;
+    }
+    hardware.mMajorVersion = major;
+    hardware.mMinorVersion = minor;
+}
+
+static void getDeviceInfo(oclHardware& hardware) {
+    cl_ulong localMemSize;
+    cl_ulong allocMemSize;
+    cl_ulong globalMemSize;
+    size_t groupSize;
+    cl_uint unitsSize;
+    cl_uint maxDimension;
+    size_t itemsize[] = {0, 0, 0};
+    size_t size = 0;
+    cl_int err =
+        clGetDeviceInfo(hardware.mDevice, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMemSize), &localMemSize, &size);
+    if (err != CL_SUCCESS) {
+        std::cout << oclErrorCode(err) << "\n";
+        return;
+    }
+    err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(groupSize), &groupSize, &size);
+    if (err != CL_SUCCESS) {
+        std::cout << oclErrorCode(err) << "\n";
+        return;
+    }
+    err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(unitsSize), &unitsSize, &size);
+    if (err != CL_SUCCESS) {
+        std::cout << oclErrorCode(err) << "\n";
+        return;
+    }
+    err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(maxDimension), &maxDimension,
+                          &size);
+    if (err != CL_SUCCESS) {
+        std::cout << oclErrorCode(err) << "\n";
+        return;
+    }
+    err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(itemsize), itemsize, &size);
+    if (err != CL_SUCCESS) {
+        std::cout << oclErrorCode(err) << "\n";
+        return;
+    }
+    err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(allocMemSize), &allocMemSize, &size);
+    if (err != CL_SUCCESS) {
+        std::cout << oclErrorCode(err) << "\n";
+        return;
+    }
+    err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalMemSize), &globalMemSize, &size);
+    if (err != CL_SUCCESS) {
+        std::cout << oclErrorCode(err) << "\n";
+        return;
+    }
+    // fprintf(stderr, "Max compute units size: %d\n", unitsSize);
+    // fprintf(stderr, "Max work group size: %d\n", groupSize);
+    // fprintf(stderr, "Local memory size: %d\n", localMemSize);
+    // fprintf(stderr, "Global memory size: %d\n", globalMemSize);
+    // fprintf(stderr, "Max alloc memory size: %d\n", allocMemSize);
+    // fprintf(stderr, "Max item dimensions: %d\n", maxDimension);
+    // fprintf(stderr, "Max item size: %d %d %d\n", itemsize[0], itemsize[1], itemsize[2]);
+}
+
+static int compileProgram(const oclHardware& hardware, oclSoftware& software) {
+    cl_int err = clBuildProgram(software.mProgram, 1, &hardware.mDevice, software.mCompileOptions, 0, 0);
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        size_t size = 0;
+        err = clGetProgramBuildInfo(software.mProgram, hardware.mDevice, CL_PROGRAM_BUILD_LOG, 0, 0, &size);
+        if (err != CL_SUCCESS) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            return -1;
+        }
+
+        std::vector<char> log(size + 1);
+        err = clGetProgramBuildInfo(software.mProgram, hardware.mDevice, CL_PROGRAM_BUILD_LOG, size, &log[0], 0);
+        if (err != CL_SUCCESS) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            return -1;
+        }
+
+        std::cout << &log[0] << "\n";
+        return -1;
+    }
+
+    return 0;
+}
+
+oclHardware getOclHardware(cl_device_type type, char* target_device) {
+    xf::common::utils_sw::Logger logger(std::cerr);
+    cl_int err;
+
+    oclHardware hardware = {0, 0, 0, 0, 0, 0};
+    cl_platform_id platforms[16] = {0};
+    cl_device_id devices[16];
+    cl_device_id device_id;
+    char platformName[256];
+    char deviceName[256];
+    cl_uint platformCount = 0;
+    err = clGetPlatformIDs(0, 0, &platformCount);
+    err = clGetPlatformIDs(16, platforms, &platformCount);
+    if (err != CL_SUCCESS) {
+        std::cout << oclErrorCode(err) << "\n";
+        return hardware;
+    }
+
+    fprintf(stderr, "INFO: Number of Platforms: %d\n", platformCount);
+
+    for (cl_uint i = 0; i < platformCount; i++) {
+        err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 256, platformName, 0);
+        if (err != CL_SUCCESS) {
+            std::cout << oclErrorCode(err) << "\n";
+            return hardware;
+        }
+
+        if (strcmp(platformName, "Xilinx") != 0) {
+            // skip non-Xilinx platform
+            continue;
+        }
+
+        fprintf(stderr, "INFO: Selected Platform: %s\n", platformName);
+
+        // iterate all devices to find the target device
+        cl_uint deviceCount = 0;
+        err = clGetDeviceIDs(platforms[i], type, 0, NULL, &deviceCount);
+        if ((err != CL_SUCCESS) || (deviceCount == 0)) {
+            fprintf(stderr, "ERROR: clGetDeviceIDs count error: %s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            continue;
+        };
+
+        fprintf(stderr, "INFO: Number of devices for platform %d: %d\n", i, deviceCount);
+        err = clGetDeviceIDs(platforms[i], type, deviceCount, devices, NULL);
+        if (err != CL_SUCCESS) {
+            fprintf(stderr, "ERROR: clGetDeviceIDs device error: %s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            continue;
+        }
+
+        cl_uint idev;
+        for (idev = 0; idev < deviceCount; idev++) {
+            err = clGetDeviceInfo(devices[idev], CL_DEVICE_NAME, 256, deviceName, 0);
+            if (err != CL_SUCCESS) {
+                fprintf(stderr, "ERROR: clGetDeviceInfo: %s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                return hardware;
+            }
+
+            fprintf(stderr, "INFO: target_device found:   %s\n", deviceName);
+
+            if (strcmp(deviceName, "xilinx_u200_gen3x16_xdma_base_1") == 0 ||
+                strcmp(deviceName, "xilinx_u200_gen3x16_xdma_base_2") == 0 ||
+                strcmp(deviceName, "xilinx_u200_gen3x16_xdma_1_202110_1") == 0 ||
+                strcmp(deviceName, "xilinx_u200_gen3x16_xdma_2_202110_1") == 0) {
+                device_id = devices[idev];
+                fprintf(stderr, "INFO: target_device chosen:  %s\n", deviceName);
+                break;
+            }
+        }
+
+        if (idev == deviceCount) {
+            fprintf(stderr, "ERROR: target device %s not found \n: %s %d %s", target_device, __func__, __LINE__,
+                    oclErrorCode(err));
+            return hardware;
+        }
+
+        cl_context_properties contextData[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], 0};
+        // cl_context context = clCreateContextFromType(contextData, type, 0, 0, &err);
+        cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
+        logger.logCreateContext(err);
+        if (err != CL_SUCCESS) {
+            continue;
+        }
+
+        // cl_command_queue queue = clCreateCommandQueue(context, device_id, 0, &err);
+        cl_command_queue queue = clCreateCommandQueue(context, device_id, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+        logger.logCreateCommandQueue(err);
+        if (err != CL_SUCCESS) {
+            std::cout << oclErrorCode(err) << "\n";
+            return hardware;
+        }
+
+        hardware.mPlatform = platforms[i];
+        hardware.mContext = context;
+        hardware.mDevice = device_id;
+        hardware.mQueue = queue;
+        getDeviceVersion(hardware);
+        fprintf(stderr, "INFO: OpenCL Version: %d.%d\n", hardware.mMajorVersion, hardware.mMinorVersion);
+        return hardware;
+    }
+    return hardware;
+}
+
+extern "C" int getOclSoftware(oclSoftware& soft, const oclHardware& hardware) {
+    xf::common::utils_sw::Logger logger(std::cerr);
+    cl_int err;
+
+    cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT;
+    err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_TYPE, sizeof(deviceType), &deviceType, 0);
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        return -1;
+    }
+
+    unsigned char* kernelCode = 0;
+    fprintf(stderr, "INFO: Loading %s\n", soft.mFileName);
+
+    int size = loadFile2Memory(soft.mFileName, (char**)&kernelCode);
+    if (size < 0) {
+        fprintf(stderr, "Failed to load kernel\n");
+        return -2;
+    }
+
+    fprintf(stderr, "INFO: Loading %s Finished\n", soft.mFileName);
+
+    if (deviceType == CL_DEVICE_TYPE_ACCELERATOR) {
+        size_t n = size;
+        soft.mProgram = clCreateProgramWithBinary(hardware.mContext, 1, &hardware.mDevice, &n,
+                                                  (const unsigned char**)&kernelCode, 0, &err);
+        logger.logCreateProgram(err);
+    } else {
+        soft.mProgram = clCreateProgramWithSource(hardware.mContext, 1, (const char**)&kernelCode, 0, &err);
+        logger.logCreateProgram(err);
+    }
+    if (!soft.mProgram || (err != CL_SUCCESS)) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        delete[] kernelCode;
+        return -3;
+    }
+
+    err = compileProgram(hardware, soft);
+    delete[] kernelCode;
+    return err;
+}
+
+extern "C" void releaseSoftware(oclSoftware& software) {
+    clReleaseProgram(software.mProgram);
+}
+
+extern "C" void releaseKernel(oclKernelInfo& kernelinfo) {
+    clReleaseKernel(kernelinfo.mKernel);
+}
+
+extern "C" void releaseHardware(oclHardware& hardware) {
+    clReleaseCommandQueue(hardware.mQueue);
+    clReleaseContext(hardware.mContext);
+    if ((hardware.mMajorVersion >= 1) && (hardware.mMinorVersion > 1)) {
+        // Only available in OpenCL >= 1.2
+        clReleaseDevice(hardware.mDevice);
+    }
+}
diff --git a/codec/L2/demos/webpEnc/host/src/enc/kernel/oclHelper.h b/codec/L2/demos/webpEnc/host/src/enc/kernel/oclHelper.h
new file mode 100644
index 0000000000..46d9ae137a
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/kernel/oclHelper.h
@@ -0,0 +1,62 @@
+
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _OCL_HELP_H_
+#define _OCL_HELP_H_
+
+#include <CL/cl.h>
+#include "vp8_AsyncConfig.h"
+#include "xf_utils_sw/logger.hpp"
+
+struct oclHardware {
+    cl_platform_id mPlatform;
+    cl_context mContext;
+    cl_device_id mDevice;
+    cl_command_queue mQueue;
+    short mMajorVersion;
+    short mMinorVersion;
+};
+
+struct oclSoftware {
+    cl_program mProgram;
+    char mCompileOptions[1024];
+    char mFileName[1024];
+};
+
+struct oclKernelInfo {
+    cl_kernel mKernel;
+    char mKernelName[128];
+    cl_kernel mKernel2;
+    char mKernelName2[128];
+
+    cl_kernel* mKernelPred; //[NasyncDepth*Ninstances];
+    cl_kernel* mKernelAC;   //[NasyncDepth*Ninstances];
+};
+
+extern "C" oclHardware getOclHardware(cl_device_type type, char* target_device);
+
+extern "C" int getOclSoftware(oclSoftware& software, const oclHardware& hardware);
+
+extern "C" void releaseSoftware(oclSoftware& software);
+
+extern "C" void releaseKernel(oclKernelInfo& kernelinfo);
+
+extern "C" void releaseHardware(oclHardware& hardware);
+
+extern "C" const char* oclErrorCode(cl_int code);
+
+#endif
diff --git a/codec/L2/demos/webpEnc/host/src/enc/near_lossless.c b/codec/L2/demos/webpEnc/host/src/enc/near_lossless.c
new file mode 100644
index 0000000000..049938cae9
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/near_lossless.c
@@ -0,0 +1,321 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Near-lossless image preprocessing adjusts pixel values to help
+// compressibility with a guarantee of maximum deviation between original and
+// resulting pixel values.
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+// Converted to C by Aleksander Kramarz (akramarz@google.com)
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "../dsp/lossless.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+#include "./vp8enci.h"
+#include "../../host/create_kernel.h"
+// #include "./kernel/oclHelper.h"
+
+#define MIN_DIM_FOR_NEAR_LOSSLESS 64
+#define MAX_LIMIT_BITS 5
+
+// Computes quantized pixel value and distance from original value.
+static void GetValAndDistance(int a, int initial, int bits, int* const val, int* const distance) {
+    const int mask = ~((1 << bits) - 1);
+    *val = (initial & mask) | (initial >> (8 - bits));
+    *distance = 2 * abs(a - *val);
+}
+
+// Clamps the value to range [0, 255].
+static int Clamp8b(int val) {
+    const int min_val = 0;
+    const int max_val = 0xff;
+    return (val < min_val) ? min_val : (val > max_val) ? max_val : val;
+}
+
+// Quantizes values {a, a+(1<<bits), a-(1<<bits)} and returns the nearest one.
+static int FindClosestDiscretized(int a, int bits) {
+    int best_val = a, i;
+    int min_distance = 256;
+
+    for (i = -1; i <= 1; ++i) {
+        int candidate, distance;
+        const int val = Clamp8b(a + i * (1 << bits));
+        GetValAndDistance(a, val, bits, &candidate, &distance);
+        if (i != 0) {
+            ++distance;
+        }
+        // Smallest distance but favor i == 0 over i == -1 and i == 1
+        // since that keeps the overall intensity more constant in the
+        // images.
+        if (distance < min_distance) {
+            min_distance = distance;
+            best_val = candidate;
+        }
+    }
+    return best_val;
+}
+
+// Applies FindClosestDiscretized to all channels of pixel.
+static uint32_t ClosestDiscretizedArgb(uint32_t a, int bits) {
+    return (FindClosestDiscretized(a >> 24, bits) << 24) | (FindClosestDiscretized((a >> 16) & 0xff, bits) << 16) |
+           (FindClosestDiscretized((a >> 8) & 0xff, bits) << 8) | (FindClosestDiscretized(a & 0xff, bits));
+}
+
+// Checks if distance between corresponding channel values of pixels a and b
+// is within the given limit.
+static int IsNear(uint32_t a, uint32_t b, int limit) {
+    int k;
+    for (k = 0; k < 4; ++k) {
+        const int delta = (int)((a >> (k * 8)) & 0xff) - (int)((b >> (k * 8)) & 0xff);
+        if (delta >= limit || delta <= -limit) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+static int IsSmooth(
+    const uint32_t* const prev_row, const uint32_t* const curr_row, const uint32_t* const next_row, int ix, int limit) {
+    // Check that all pixels in 4-connected neighborhood are smooth.
+    return (IsNear(curr_row[ix], curr_row[ix - 1], limit) && IsNear(curr_row[ix], curr_row[ix + 1], limit) &&
+            IsNear(curr_row[ix], prev_row[ix], limit) && IsNear(curr_row[ix], next_row[ix], limit));
+}
+
+// Adjusts pixel values of image with given maximum error.
+static void NearLossless(int xsize, int ysize, uint32_t* argb, int limit_bits, uint32_t* copy_buffer) {
+    int x, y;
+    const int limit = 1 << limit_bits;
+    uint32_t* prev_row = copy_buffer;
+    uint32_t* curr_row = prev_row + xsize;
+    uint32_t* next_row = curr_row + xsize;
+    memcpy(copy_buffer, argb, xsize * 2 * sizeof(argb[0]));
+
+    for (y = 1; y < ysize - 1; ++y) {
+        uint32_t* const curr_argb_row = argb + y * xsize;
+        uint32_t* const next_argb_row = curr_argb_row + xsize;
+        memcpy(next_row, next_argb_row, xsize * sizeof(argb[0]));
+        for (x = 1; x < xsize - 1; ++x) {
+            if (!IsSmooth(prev_row, curr_row, next_row, x, limit)) {
+                curr_argb_row[x] = ClosestDiscretizedArgb(curr_row[x], limit_bits);
+            }
+        }
+        {
+            // Three-way swap.
+            uint32_t* const temp = prev_row;
+            prev_row = curr_row;
+            curr_row = next_row;
+            next_row = temp;
+        }
+    }
+}
+
+static int QualityToLimitBits(int quality) {
+    // quality mapping:
+    //  0..19 -> 5
+    //  0..39 -> 4
+    //  0..59 -> 3
+    //  0..79 -> 2
+    //  0..99 -> 1
+    //  100   -> 0
+    return MAX_LIMIT_BITS - quality / 20;
+}
+
+int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) {
+    int i;
+    uint32_t* const copy_buffer = (uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer));
+    const int limit_bits = QualityToLimitBits(quality);
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    assert(argb != NULL);
+    assert(limit_bits >= 0);
+    assert(limit_bits <= MAX_LIMIT_BITS);
+    if (copy_buffer == NULL) {
+        StopProfiling(&stop_watch, &timeVP8ApplyNearLossless, &countVP8ApplyNearLossless);
+        return 0;
+    }
+    // For small icon images, don't attempt to apply near-lossless compression.
+    if (xsize < MIN_DIM_FOR_NEAR_LOSSLESS && ysize < MIN_DIM_FOR_NEAR_LOSSLESS) {
+        WebPSafeFree(copy_buffer);
+        StopProfiling(&stop_watch, &timeVP8ApplyNearLossless, &countVP8ApplyNearLossless);
+        return 1;
+    }
+
+    for (i = limit_bits; i != 0; --i) {
+        NearLossless(xsize, ysize, argb, i, copy_buffer);
+    }
+
+    WebPSafeFree(copy_buffer);
+    StopProfiling(&stop_watch, &timeVP8ApplyNearLossless, &countVP8ApplyNearLossless);
+    return 1;
+}
+
+int VP8ApplyNearLosslessOcl(int xsize, int ysize, uint32_t* argb, int quality) {
+    const int limit_bits = QualityToLimitBits(quality);
+    int status = 1;
+    int arg = 1;
+
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    assert(argb != NULL);
+    assert(limit_bits >= 0);
+    assert(limit_bits <= MAX_LIMIT_BITS);
+
+    // For small icon images, don't attempt to apply near-lossless compression.
+    if (xsize < MIN_DIM_FOR_NEAR_LOSSLESS && ysize < MIN_DIM_FOR_NEAR_LOSSLESS) {
+        StopProfiling(&stop_watch, &timeVP8ApplyNearLosslessOcl, &countVP8ApplyNearLosslessOcl);
+        return 1;
+    }
+
+    const int argb_size = xsize * ysize * sizeof(uint32_t);
+    int device_width;
+    int global_width;
+    int global_height;
+
+    size_t globalSize[2];
+    size_t localSize[2];
+
+#ifdef HANDLE_MULTI_PIXELS_PER_ITEM
+    global_width = RoundUp(xsize - PADDING_SIZE, GRX_SIZE * PIXELS_PER_ITEM);
+    global_height = RoundUp(ysize - PADDING_SIZE, GRY_SIZE);
+#elif defined USE_VECTOR
+    if (xsize > IMAGE_4K) {
+        device_width = RoundUp(xsize, VECTOR_GRX_SIZE_4K * VECTOR_LENGTH);
+        global_width = RoundUp(xsize - VECTOR_WIDTH_PADDING, VECTOR_GRX_SIZE_4K * VECTOR_LENGTH);
+        global_height = RoundUp(ysize - PADDING_SIZE, VECTOR_GRY_SIZE_4K);
+    } else {
+        device_width = RoundUp(xsize, VECTOR_GRX_SIZE * VECTOR_LENGTH);
+        global_width = RoundUp(xsize - VECTOR_WIDTH_PADDING, VECTOR_GRX_SIZE * VECTOR_LENGTH);
+        global_height = RoundUp(ysize - PADDING_SIZE, VECTOR_GRY_SIZE);
+    }
+#else
+    global_width = RoundUp(xsize - PADDING_SIZE, GRX_SIZE);
+    global_height = RoundUp(ysize - PADDING_SIZE, GRY_SIZE);
+#endif
+
+#ifdef HANDLE_MULTI_PIXELS_PER_ITEM
+    globalSize[0] = global_width / PIXELS_PER_ITEM;
+    globalSize[1] = global_height;
+    localSize[0] = GRX_SIZE;
+    localSize[1] = GRY_SIZE;
+#elif defined USE_VECTOR
+    if (xsize > IMAGE_4K) {
+        globalSize[0] = global_width / VECTOR_LENGTH;
+        globalSize[1] = global_height;
+        localSize[0] = VECTOR_GRX_SIZE_4K;
+        localSize[1] = VECTOR_GRY_SIZE_4K;
+    } else {
+        globalSize[0] = global_width / VECTOR_LENGTH;
+        globalSize[1] = global_height;
+        localSize[0] = VECTOR_GRX_SIZE;
+        localSize[1] = VECTOR_GRY_SIZE;
+    }
+#else
+    globalSize[0] = global_width;
+    globalSize[1] = global_height;
+    localSize[0] = GRX_SIZE;
+    localSize[1] = GRY_SIZE;
+#endif
+
+    cl_int err;
+
+    err = clEnqueueWriteBuffer(hardware.mQueue, nearpara.input_argb, CL_TRUE, 0, argb_size, argb, 0, NULL, NULL);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    err = clEnqueueWriteBuffer(hardware.mQueue, nearpara.output_argb, CL_TRUE, 0, argb_size, argb, 0, NULL, NULL);
+    if (CL_SUCCESS != err) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        status = -1;
+        goto Err;
+    }
+
+    for (int k = limit_bits; k != 0; --k) {
+        nearpara.limitbits = k;
+        err = clSetKernelArg(nearlossless.mKernel, 7, sizeof(int), &(nearpara.limitbits));
+        if (err != CL_SUCCESS) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            status = -1;
+            goto Err;
+        }
+
+        if (k % 2 == limit_bits % 2) {
+            err = clSetKernelArg(nearlossless.mKernel, 0, sizeof(cl_mem), &(nearpara.input_argb));
+            if (err != CL_SUCCESS) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                status = -1;
+                goto Err;
+            }
+
+            err = clSetKernelArg(nearlossless.mKernel, 1, sizeof(cl_mem), &(nearpara.output_argb));
+            if (err != CL_SUCCESS) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                status = -1;
+                goto Err;
+            }
+        } else {
+            err = clSetKernelArg(nearlossless.mKernel, 0, sizeof(cl_mem), &(nearpara.output_argb));
+            if (err != CL_SUCCESS) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                status = -1;
+                goto Err;
+            }
+
+            err = clSetKernelArg(nearlossless.mKernel, 1, sizeof(cl_mem), &(nearpara.input_argb));
+            if (err != CL_SUCCESS) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                status = -1;
+                goto Err;
+            }
+        }
+
+        err = clEnqueueNDRangeKernel(hardware.mQueue, nearlossless.mKernel, 2, 0, globalSize, localSize, 0, NULL, NULL);
+        if (err != CL_SUCCESS) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            status = -1;
+            goto Err;
+        }
+
+        err = clFinish(hardware.mQueue);
+        if (err != CL_SUCCESS) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            status = -1;
+            goto Err;
+        }
+    }
+
+    if (limit_bits % 2 == 1) {
+        err = clEnqueueReadBuffer(hardware.mQueue, nearpara.output_argb, CL_TRUE, 0, argb_size, argb, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            status = -1;
+            goto Err;
+        }
+    } else {
+        err = clEnqueueReadBuffer(hardware.mQueue, nearpara.input_argb, CL_TRUE, 0, argb_size, argb, 0, NULL, NULL);
+        if (CL_SUCCESS != err) {
+            fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            status = -1;
+            goto Err;
+        }
+    }
+
+Err:
+    releaseKernel(nearlossless);
+    clReleaseMemObject(nearpara.input_argb);
+    clReleaseMemObject(nearpara.output_argb);
+
+    StopProfiling(&stop_watch, &timeVP8ApplyNearLosslessOcl, &countVP8ApplyNearLosslessOcl);
+    return status;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/enc/picture.c b/codec/L2/demos/webpEnc/host/src/enc/picture.c
new file mode 100644
index 0000000000..29875e59e3
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/picture.c
@@ -0,0 +1,291 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture class basis
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../dsp/dsp.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// WebPPicture
+//------------------------------------------------------------------------------
+
+static int DummyWriter(const uint8_t* data, size_t data_size, const WebPPicture* const picture) {
+    // The following are to prevent 'unused variable' error message.
+    (void)data;
+    (void)data_size;
+    (void)picture;
+    return 1;
+}
+
+int WebPPictureInitInternal(WebPPicture* picture, int version) {
+    if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
+        return 0; // caller/system version mismatch!
+    }
+    if (picture != NULL) {
+        memset(picture, 0, sizeof(*picture));
+        picture->writer = DummyWriter;
+        WebPEncodingSetError(picture, VP8_ENC_OK);
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static void WebPPictureResetBufferARGB(WebPPicture* const picture) {
+    picture->memory_argb_ = NULL;
+    picture->argb = NULL;
+    picture->argb_stride = 0;
+}
+
+static void WebPPictureResetBufferYUVA(WebPPicture* const picture) {
+    picture->memory_ = NULL;
+    picture->y = picture->u = picture->v = picture->a = NULL;
+    picture->y_stride = picture->uv_stride = 0;
+    picture->a_stride = 0;
+}
+
+void WebPPictureResetBuffers(WebPPicture* const picture) {
+    WebPPictureResetBufferARGB(picture);
+    WebPPictureResetBufferYUVA(picture);
+}
+
+int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
+    void* memory;
+    const uint64_t argb_size = (uint64_t)width * height;
+
+    assert(picture != NULL);
+
+    WebPSafeFree(picture->memory_argb_);
+    WebPPictureResetBufferARGB(picture);
+
+    if (width <= 0 || height <= 0) {
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
+    }
+    // allocate a new buffer.
+    memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb));
+    if (memory == NULL) {
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    }
+    // TODO(skal): align plane to cache line?
+    picture->memory_argb_ = memory;
+    picture->argb = (uint32_t*)memory;
+    picture->argb_stride = width;
+    return 1;
+}
+
+int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
+    const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
+    const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
+    const int y_stride = width;
+    const int uv_width = (width + 1) >> 1;
+    const int uv_height = (height + 1) >> 1;
+    const int uv_stride = uv_width;
+    int a_width, a_stride;
+    uint64_t y_size, uv_size, a_size, total_size;
+    uint8_t* mem;
+
+    assert(picture != NULL);
+
+    WebPSafeFree(picture->memory_);
+    WebPPictureResetBufferYUVA(picture);
+
+    if (uv_csp != WEBP_YUV420) {
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+    }
+
+    // alpha
+    a_width = has_alpha ? width : 0;
+    a_stride = a_width;
+    y_size = (uint64_t)y_stride * height;
+    uv_size = (uint64_t)uv_stride * uv_height;
+    a_size = (uint64_t)a_stride * height;
+
+    total_size = y_size + a_size + 2 * uv_size;
+
+    // Security and validation checks
+    if (width <= 0 || height <= 0 ||     // luma/alpha param error
+        uv_width < 0 || uv_height < 0) { // u/v param error
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
+    }
+    // allocate a new buffer.
+    mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+    if (mem == NULL) {
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    }
+
+    // From now on, we're in the clear, we can no longer fail...
+    picture->memory_ = (void*)mem;
+    picture->y_stride = y_stride;
+    picture->uv_stride = uv_stride;
+    picture->a_stride = a_stride;
+
+    // TODO(skal): we could align the y/u/v planes and adjust stride.
+    picture->y = mem;
+    mem += y_size;
+
+    picture->u = mem;
+    mem += uv_size;
+    picture->v = mem;
+    mem += uv_size;
+
+    if (a_size > 0) {
+        picture->a = mem;
+        mem += a_size;
+    }
+    (void)mem; // makes the static analyzer happy
+    return 1;
+}
+
+int WebPPictureAlloc(WebPPicture* picture) {
+    if (picture != NULL) {
+        const int width = picture->width;
+        const int height = picture->height;
+
+        WebPPictureFree(picture); // erase previous buffer
+
+        if (!picture->use_argb) {
+            return WebPPictureAllocYUVA(picture, width, height);
+        } else {
+            return WebPPictureAllocARGB(picture, width, height);
+        }
+    }
+    return 1;
+}
+
+void WebPPictureFree(WebPPicture* picture) {
+    if (picture != NULL) {
+        WebPSafeFree(picture->memory_);
+        WebPSafeFree(picture->memory_argb_);
+        WebPPictureResetBuffers(picture);
+    }
+}
+
+//------------------------------------------------------------------------------
+// WebPMemoryWriter: Write-to-memory
+
+void WebPMemoryWriterInit(WebPMemoryWriter* writer) {
+    writer->mem = NULL;
+    writer->size = 0;
+    writer->max_size = 0;
+}
+
+int WebPMemoryWrite(const uint8_t* data, size_t data_size, const WebPPicture* picture) {
+    WebPMemoryWriter* const w = (WebPMemoryWriter*)picture->custom_ptr;
+    uint64_t next_size;
+    if (w == NULL) {
+        return 1;
+    }
+    next_size = (uint64_t)w->size + data_size;
+    if (next_size > w->max_size) {
+        uint8_t* new_mem;
+        uint64_t next_max_size = 2ULL * w->max_size;
+        if (next_max_size < next_size) next_max_size = next_size;
+        if (next_max_size < 8192ULL) next_max_size = 8192ULL;
+        new_mem = (uint8_t*)WebPSafeMalloc(next_max_size, 1);
+        if (new_mem == NULL) {
+            return 0;
+        }
+        if (w->size > 0) {
+            memcpy(new_mem, w->mem, w->size);
+        }
+        WebPSafeFree(w->mem);
+        w->mem = new_mem;
+        // down-cast is ok, thanks to WebPSafeMalloc
+        w->max_size = (size_t)next_max_size;
+    }
+    if (data_size > 0) {
+        memcpy(w->mem + w->size, data, data_size);
+        w->size += data_size;
+    }
+    return 1;
+}
+
+void WebPMemoryWriterClear(WebPMemoryWriter* writer) {
+    if (writer != NULL) {
+        WebPSafeFree(writer->mem);
+        writer->mem = NULL;
+        writer->size = 0;
+        writer->max_size = 0;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Simplest high-level calls:
+
+typedef int (*Importer)(WebPPicture* const, const uint8_t* const, int);
+
+static size_t Encode(const uint8_t* rgba,
+                     int width,
+                     int height,
+                     int stride,
+                     Importer import,
+                     float quality_factor,
+                     int lossless,
+                     uint8_t** output) {
+    WebPPicture pic;
+    WebPConfig config;
+    WebPMemoryWriter wrt;
+    int ok;
+
+    if (!WebPConfigPreset(&config, WEBP_PRESET_DEFAULT, quality_factor) || !WebPPictureInit(&pic)) {
+        return 0; // shouldn't happen, except if system installation is broken
+    }
+
+    config.lossless = !!lossless;
+    pic.use_argb = !!lossless;
+    pic.width = width;
+    pic.height = height;
+    pic.writer = WebPMemoryWrite;
+    pic.custom_ptr = &wrt;
+    WebPMemoryWriterInit(&wrt);
+
+    // ok = import(&pic, rgba, stride) && WebPEncode(&config, &pic);
+    WebPPictureFree(&pic);
+    if (!ok) {
+        WebPMemoryWriterClear(&wrt);
+        *output = NULL;
+        return 0;
+    }
+    *output = wrt.mem;
+    return wrt.size;
+}
+
+#define ENCODE_FUNC(NAME, IMPORTER)                                                 \
+    size_t NAME(const uint8_t* in, int w, int h, int bps, float q, uint8_t** out) { \
+        return Encode(in, w, h, bps, IMPORTER, q, 0, out);                          \
+    }
+
+ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB)
+ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR)
+ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA)
+ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA)
+
+#undef ENCODE_FUNC
+
+#define LOSSLESS_DEFAULT_QUALITY 70.
+#define LOSSLESS_ENCODE_FUNC(NAME, IMPORTER)                                      \
+    size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) {        \
+        return Encode(in, w, h, bps, IMPORTER, LOSSLESS_DEFAULT_QUALITY, 1, out); \
+    }
+
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA)
+
+#undef LOSSLESS_ENCODE_FUNC
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/picture_csp.c b/codec/L2/demos/webpEnc/host/src/enc/picture_csp.c
new file mode 100644
index 0000000000..d8e0f8c5b7
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/picture_csp.c
@@ -0,0 +1,1052 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture utils for colorspace conversion
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "../utils/random.h"
+#include "../utils/utils.h"
+#include "../dsp/yuv.h"
+
+// Uncomment to disable gamma-compression during RGB->U/V averaging
+#define USE_GAMMA_COMPRESSION
+
+// If defined, use table to compute x / alpha.
+#define USE_INVERSE_ALPHA_TABLE
+
+static const union {
+    uint32_t argb;
+    uint8_t bytes[4];
+} test_endian = {0xff000000u};
+#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
+
+//------------------------------------------------------------------------------
+// Detection of non-trivial transparency
+
+// Returns true if alpha[] has non-0xff values.
+static int CheckNonOpaque(const uint8_t* alpha, int width, int height, int x_step, int y_step) {
+    if (alpha == NULL) return 0;
+    while (height-- > 0) {
+        int x;
+        for (x = 0; x < width * x_step; x += x_step) {
+            if (alpha[x] != 0xff) return 1; // TODO(skal): check 4/8 bytes at a time.
+        }
+        alpha += y_step;
+    }
+    return 0;
+}
+
+// Checking for the presence of non-opaque alpha.
+int WebPPictureHasTransparency(const WebPPicture* picture) {
+    if (picture == NULL) return 0;
+    if (!picture->use_argb) {
+        return CheckNonOpaque(picture->a, picture->width, picture->height, 1, picture->a_stride);
+    } else {
+        int x, y;
+        const uint32_t* argb = picture->argb;
+        if (argb == NULL) return 0;
+        for (y = 0; y < picture->height; ++y) {
+            for (x = 0; x < picture->width; ++x) {
+                if (argb[x] < 0xff000000u) return 1; // test any alpha values != 0xff
+            }
+            argb += picture->argb_stride;
+        }
+    }
+    return 0;
+}
+
+//------------------------------------------------------------------------------
+// Code for gamma correction
+
+#if defined(USE_GAMMA_COMPRESSION)
+
+// gamma-compensates loss of resolution during chroma subsampling
+#define kGamma 0.80  // for now we use a different gamma value than kGammaF
+#define kGammaFix 12 // fixed-point precision for linear values
+#define kGammaScale ((1 << kGammaFix) - 1)
+#define kGammaTabFix 7 // fixed-point fractional bits precision
+#define kGammaTabScale (1 << kGammaTabFix)
+#define kGammaTabRounder (kGammaTabScale >> 1)
+#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
+
+static int kLinearToGammaTab[kGammaTabSize + 1];
+static uint16_t kGammaToLinearTab[256];
+static volatile int kGammaTablesOk = 0;
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {
+    if (!kGammaTablesOk) {
+        int v;
+        const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
+        const double norm = 1. / 255.;
+        for (v = 0; v <= 255; ++v) {
+            kGammaToLinearTab[v] = (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
+        }
+        for (v = 0; v <= kGammaTabSize; ++v) {
+            kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
+        }
+        kGammaTablesOk = 1;
+    }
+}
+
+static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
+    return kGammaToLinearTab[v];
+}
+
+static WEBP_INLINE int Interpolate(int v) {
+    const int tab_pos = v >> (kGammaTabFix + 2);   // integer part
+    const int x = v & ((kGammaTabScale << 2) - 1); // fractional part
+    const int v0 = kLinearToGammaTab[tab_pos];
+    const int v1 = kLinearToGammaTab[tab_pos + 1];
+    const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate
+    assert(tab_pos + 1 < kGammaTabSize + 1);
+    return y;
+}
+
+// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
+// U/V value, suitable for RGBToU/V calls.
+static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
+    const int y = Interpolate(base_value << shift); // final uplifted value
+    return (y + kGammaTabRounder) >> kGammaTabFix;  // descale
+}
+
+#else
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {}
+static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
+    return v;
+}
+static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
+    return (int)(base_value << shift);
+}
+
+#endif // USE_GAMMA_COMPRESSION
+
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
+
+static int RGBToY(int r, int g, int b, VP8Random* const rg) {
+    return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF) : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
+}
+
+static int RGBToU(int r, int g, int b, VP8Random* const rg) {
+    return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2) : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
+}
+
+static int RGBToV(int r, int g, int b, VP8Random* const rg) {
+    return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2) : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
+}
+
+//------------------------------------------------------------------------------
+// Smart RGB->YUV conversion
+
+static const int kNumIterations = 6;
+static const int kMinDimensionIterativeConversion = 4;
+
+// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
+// banding sometimes. Better use extra precision.
+#define SFIX 2              // fixed-point precision of RGB and Y/W
+typedef int16_t fixed_t;    // signed type with extra SFIX precision for UV
+typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W
+
+#define SHALF (1 << SFIX >> 1)
+#define MAX_Y_T ((256 << SFIX) - 1)
+#define SROUNDER (1 << (YUV_FIX + SFIX - 1))
+
+#if defined(USE_GAMMA_COMPRESSION)
+
+// float variant of gamma-correction
+// We use tables of different size and precision, along with a 'real-world'
+// Gamma value close to ~2.
+#define kGammaF 2.2
+static float kGammaToLinearTabF[MAX_Y_T + 1]; // size scales with Y_FIX
+static float kLinearToGammaTabF[kGammaTabSize + 2];
+static volatile int kGammaTablesFOk = 0;
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {
+    if (!kGammaTablesFOk) {
+        int v;
+        const double norm = 1. / MAX_Y_T;
+        const double scale = 1. / kGammaTabSize;
+        for (v = 0; v <= MAX_Y_T; ++v) {
+            kGammaToLinearTabF[v] = (float)pow(norm * v, kGammaF);
+        }
+        for (v = 0; v <= kGammaTabSize; ++v) {
+            kLinearToGammaTabF[v] = (float)(MAX_Y_T * pow(scale * v, 1. / kGammaF));
+        }
+        // to prevent small rounding errors to cause read-overflow:
+        kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize];
+        kGammaTablesFOk = 1;
+    }
+}
+
+static WEBP_INLINE float GammaToLinearF(int v) {
+    return kGammaToLinearTabF[v];
+}
+
+static WEBP_INLINE int LinearToGammaF(float value) {
+    const float v = value * kGammaTabSize;
+    const int tab_pos = (int)v;
+    const float x = v - (float)tab_pos; // fractional part
+    const float v0 = kLinearToGammaTabF[tab_pos + 0];
+    const float v1 = kLinearToGammaTabF[tab_pos + 1];
+    const float y = v1 * x + v0 * (1.f - x); // interpolate
+    return (int)(y + .5);
+}
+
+#else
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {}
+static WEBP_INLINE float GammaToLinearF(int v) {
+    const float norm = 1.f / MAX_Y_T;
+    return norm * v;
+}
+static WEBP_INLINE int LinearToGammaF(float value) {
+    return (int)(MAX_Y_T * value + .5);
+}
+
+#endif // USE_GAMMA_COMPRESSION
+
+//------------------------------------------------------------------------------
+
+static uint8_t clip_8b(fixed_t v) {
+    return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
+}
+
+static fixed_y_t clip_y(int y) {
+    return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
+}
+
+//------------------------------------------------------------------------------
+
+static int RGBToGray(int r, int g, int b) {
+    const int luma = 19595 * r + 38470 * g + 7471 * b + YUV_HALF;
+    return (luma >> YUV_FIX);
+}
+
+static float RGBToGrayF(float r, float g, float b) {
+    return 0.299f * r + 0.587f * g + 0.114f * b;
+}
+
+static int ScaleDown(int a, int b, int c, int d) {
+    const float A = GammaToLinearF(a);
+    const float B = GammaToLinearF(b);
+    const float C = GammaToLinearF(c);
+    const float D = GammaToLinearF(d);
+    return LinearToGammaF(0.25f * (A + B + C + D));
+}
+
+static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) {
+    while (len-- > 0) {
+        const float R = GammaToLinearF(src[0]);
+        const float G = GammaToLinearF(src[1]);
+        const float B = GammaToLinearF(src[2]);
+        const float Y = RGBToGrayF(R, G, B);
+        *dst++ = (fixed_y_t)LinearToGammaF(Y);
+        src += 3;
+    }
+}
+
+static int UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, fixed_t* dst, fixed_y_t* tmp, int len) {
+    int diff = 0;
+    while (len-- > 0) {
+        const int r = ScaleDown(src1[0], src1[3], src2[0], src2[3]);
+        const int g = ScaleDown(src1[1], src1[4], src2[1], src2[4]);
+        const int b = ScaleDown(src1[2], src1[5], src2[2], src2[5]);
+        const int W = RGBToGray(r, g, b);
+        const int r_avg = (src1[0] + src1[3] + src2[0] + src2[3] + 2) >> 2;
+        const int g_avg = (src1[1] + src1[4] + src2[1] + src2[4] + 2) >> 2;
+        const int b_avg = (src1[2] + src1[5] + src2[2] + src2[5] + 2) >> 2;
+        dst[0] = (fixed_t)(r - W);
+        dst[1] = (fixed_t)(g - W);
+        dst[2] = (fixed_t)(b - W);
+        dst += 3;
+        src1 += 6;
+        src2 += 6;
+        if (tmp != NULL) {
+            tmp[0] = tmp[1] = clip_y(W);
+            tmp += 2;
+        }
+        diff += abs(RGBToGray(r_avg, g_avg, b_avg) - W);
+    }
+    return diff;
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int Filter(const fixed_t* const A, const fixed_t* const B, int rightwise) {
+    int v;
+    if (!rightwise) {
+        v = (A[0] * 9 + A[-3] * 3 + B[0] * 3 + B[-3]);
+    } else {
+        v = (A[0] * 9 + A[+3] * 3 + B[0] * 3 + B[+3]);
+    }
+    return (v + 8) >> 4;
+}
+
+static WEBP_INLINE int Filter2(int A, int B) {
+    return (A * 3 + B + 2) >> 2;
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX
+    return ((fixed_y_t)a << SFIX) | SHALF;
+}
+
+static void ImportOneRow(const uint8_t* const r_ptr,
+                         const uint8_t* const g_ptr,
+                         const uint8_t* const b_ptr,
+                         int step,
+                         int pic_width,
+                         fixed_y_t* const dst) {
+    int i;
+    for (i = 0; i < pic_width; ++i) {
+        const int off = i * step;
+        dst[3 * i + 0] = UpLift(r_ptr[off]);
+        dst[3 * i + 1] = UpLift(g_ptr[off]);
+        dst[3 * i + 2] = UpLift(b_ptr[off]);
+    }
+    if (pic_width & 1) { // replicate rightmost pixel
+        memcpy(dst + 3 * pic_width, dst + 3 * (pic_width - 1), 3 * sizeof(*dst));
+    }
+}
+
+static void InterpolateTwoRows(const fixed_y_t* const best_y,
+                               const fixed_t* const prev_uv,
+                               const fixed_t* const cur_uv,
+                               const fixed_t* const next_uv,
+                               int w,
+                               fixed_y_t* const out1,
+                               fixed_y_t* const out2) {
+    int i, k;
+    { // special boundary case for i==0
+        const int W0 = best_y[0];
+        const int W1 = best_y[w];
+        for (k = 0; k <= 2; ++k) {
+            out1[k] = clip_y(Filter2(cur_uv[k], prev_uv[k]) + W0);
+            out2[k] = clip_y(Filter2(cur_uv[k], next_uv[k]) + W1);
+        }
+    }
+    for (i = 1; i < w - 1; ++i) {
+        const int W0 = best_y[i + 0];
+        const int W1 = best_y[i + w];
+        const int off = 3 * (i >> 1);
+        for (k = 0; k <= 2; ++k) {
+            const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1);
+            const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1);
+            out1[3 * i + k] = clip_y(tmp0 + W0);
+            out2[3 * i + k] = clip_y(tmp1 + W1);
+        }
+    }
+    { // special boundary case for i == w - 1
+        const int W0 = best_y[i + 0];
+        const int W1 = best_y[i + w];
+        const int off = 3 * (i >> 1);
+        for (k = 0; k <= 2; ++k) {
+            out1[3 * i + k] = clip_y(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0);
+            out2[3 * i + k] = clip_y(Filter2(cur_uv[off + k], next_uv[off + k]) + W1);
+        }
+    }
+}
+
+static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
+    const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
+    return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
+}
+
+static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
+    const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER;
+    return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
+}
+
+static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
+    const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER;
+    return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
+}
+
+static int ConvertWRGBToYUV(const fixed_y_t* const best_y, const fixed_t* const best_uv, WebPPicture* const picture) {
+    int i, j;
+    const int w = (picture->width + 1) & ~1;
+    const int h = (picture->height + 1) & ~1;
+    const int uv_w = w >> 1;
+    const int uv_h = h >> 1;
+    for (j = 0; j < picture->height; ++j) {
+        for (i = 0; i < picture->width; ++i) {
+            const int off = 3 * ((i >> 1) + (j >> 1) * uv_w);
+            const int off2 = i + j * picture->y_stride;
+            const int W = best_y[i + j * w];
+            const int r = best_uv[off + 0] + W;
+            const int g = best_uv[off + 1] + W;
+            const int b = best_uv[off + 2] + W;
+            picture->y[off2] = ConvertRGBToY(r, g, b);
+        }
+    }
+    for (j = 0; j < uv_h; ++j) {
+        uint8_t* const dst_u = picture->u + j * picture->uv_stride;
+        uint8_t* const dst_v = picture->v + j * picture->uv_stride;
+        for (i = 0; i < uv_w; ++i) {
+            const int off = 3 * (i + j * uv_w);
+            const int r = best_uv[off + 0];
+            const int g = best_uv[off + 1];
+            const int b = best_uv[off + 2];
+            dst_u[i] = ConvertRGBToU(r, g, b);
+            dst_v[i] = ConvertRGBToV(r, g, b);
+        }
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Main function
+
+#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
+
+static int PreprocessARGB(const uint8_t* const r_ptr,
+                          const uint8_t* const g_ptr,
+                          const uint8_t* const b_ptr,
+                          int step,
+                          int rgb_stride,
+                          WebPPicture* const picture) {
+    // we expand the right/bottom border if needed
+    const int w = (picture->width + 1) & ~1;
+    const int h = (picture->height + 1) & ~1;
+    const int uv_w = w >> 1;
+    const int uv_h = h >> 1;
+    int i, j, iter;
+
+    // TODO(skal): allocate one big memory chunk. But for now, it's easier
+    // for valgrind debugging to have several chunks.
+    fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
+    fixed_y_t* const best_y = SAFE_ALLOC(w, h, fixed_y_t);
+    fixed_y_t* const target_y = SAFE_ALLOC(w, h, fixed_y_t);
+    fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
+    fixed_t* const best_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+    fixed_t* const target_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+    fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
+    int ok;
+    int diff_sum = 0;
+    const int first_diff_threshold = (int)(2.5 * w * h);
+    const int min_improvement = 5; // stop if improvement is below this %
+    const int min_first_improvement = 80;
+
+    if (best_y == NULL || best_uv == NULL || target_y == NULL || target_uv == NULL || best_rgb_y == NULL ||
+        best_rgb_uv == NULL || tmp_buffer == NULL) {
+        ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+        goto End;
+    }
+    assert(picture->width >= kMinDimensionIterativeConversion);
+    assert(picture->height >= kMinDimensionIterativeConversion);
+
+    // Import RGB samples to W/RGB representation.
+    for (j = 0; j < picture->height; j += 2) {
+        const int is_last_row = (j == picture->height - 1);
+        fixed_y_t* const src1 = tmp_buffer;
+        fixed_y_t* const src2 = tmp_buffer + 3 * w;
+        const int off1 = j * rgb_stride;
+        const int off2 = off1 + rgb_stride;
+        const int uv_off = (j >> 1) * 3 * uv_w;
+        fixed_y_t* const dst_y = best_y + j * w;
+
+        // prepare two rows of input
+        ImportOneRow(r_ptr + off1, g_ptr + off1, b_ptr + off1, step, picture->width, src1);
+        if (!is_last_row) {
+            ImportOneRow(r_ptr + off2, g_ptr + off2, b_ptr + off2, step, picture->width, src2);
+        } else {
+            memcpy(src2, src1, 3 * w * sizeof(*src2));
+        }
+        UpdateW(src1, target_y + (j + 0) * w, w);
+        UpdateW(src2, target_y + (j + 1) * w, w);
+        diff_sum += UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w);
+        memcpy(best_uv + uv_off, target_uv + uv_off, 3 * uv_w * sizeof(*best_uv));
+        memcpy(dst_y + w, dst_y, w * sizeof(*dst_y));
+    }
+
+    // Iterate and resolve clipping conflicts.
+    for (iter = 0; iter < kNumIterations; ++iter) {
+        int k;
+        const fixed_t* cur_uv = best_uv;
+        const fixed_t* prev_uv = best_uv;
+        const int old_diff_sum = diff_sum;
+        diff_sum = 0;
+        for (j = 0; j < h; j += 2) {
+            fixed_y_t* const src1 = tmp_buffer;
+            fixed_y_t* const src2 = tmp_buffer + 3 * w;
+            {
+                const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
+                InterpolateTwoRows(best_y + j * w, prev_uv, cur_uv, next_uv, w, src1, src2);
+                prev_uv = cur_uv;
+                cur_uv = next_uv;
+            }
+
+            UpdateW(src1, best_rgb_y + 0 * w, w);
+            UpdateW(src2, best_rgb_y + 1 * w, w);
+            diff_sum += UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w);
+
+            // update two rows of Y and one row of RGB
+            for (i = 0; i < 2 * w; ++i) {
+                const int off = i + j * w;
+                const int diff_y = target_y[off] - best_rgb_y[i];
+                const int new_y = (int)best_y[off] + diff_y;
+                best_y[off] = clip_y(new_y);
+            }
+            for (i = 0; i < uv_w; ++i) {
+                const int off = 3 * (i + (j >> 1) * uv_w);
+                int W;
+                for (k = 0; k <= 2; ++k) {
+                    const int diff_uv = (int)target_uv[off + k] - best_rgb_uv[3 * i + k];
+                    best_uv[off + k] += diff_uv;
+                }
+                W = RGBToGray(best_uv[off + 0], best_uv[off + 1], best_uv[off + 2]);
+                for (k = 0; k <= 2; ++k) {
+                    best_uv[off + k] -= W;
+                }
+            }
+        }
+        // test exit condition
+        if (diff_sum > 0) {
+            const int improvement = 100 * abs(diff_sum - old_diff_sum) / diff_sum;
+            // Check if first iteration gave good result already, without a large
+            // jump of improvement (otherwise it means we need to try few extra
+            // iterations, just to be sure).
+            if (iter == 0 && diff_sum < first_diff_threshold && improvement < min_first_improvement) {
+                break;
+            }
+            // then, check if improvement is stalling.
+            if (improvement < min_improvement) {
+                break;
+            }
+        } else {
+            break;
+        }
+    }
+
+    // final reconstruction
+    ok = ConvertWRGBToYUV(best_y, best_uv, picture);
+
+End:
+    WebPSafeFree(best_y);
+    WebPSafeFree(best_uv);
+    WebPSafeFree(target_y);
+    WebPSafeFree(target_uv);
+    WebPSafeFree(best_rgb_y);
+    WebPSafeFree(best_rgb_uv);
+    WebPSafeFree(tmp_buffer);
+    return ok;
+}
+#undef SAFE_ALLOC
+
+//------------------------------------------------------------------------------
+// "Fast" regular RGB->YUV
+
+#define SUM4(ptr, step)                                                                                       \
+    LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[(step)]) + GammaToLinear((ptr)[rgb_stride]) + \
+                      GammaToLinear((ptr)[rgb_stride + (step)]),                                              \
+                  0)
+
+#define SUM2(ptr) LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
+
+#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
+#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
+
+#if defined(USE_INVERSE_ALPHA_TABLE)
+
+static const int kAlphaFix = 19;
+// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
+// formula is then equal to v / a in most (99.6%) cases. Note that this table
+// and constant are adjusted very tightly to fit 32b arithmetic.
+// In particular, they use the fact that the operands for 'v / a' are actually
+// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
+// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
+// overflow is: kGammaFix + kAlphaFix <= 31.
+static const uint32_t kInvAlpha[4 * 0xff + 1] = {
+    0, /* alpha = 0 */
+    524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536, 58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
+    30840,  29127,  27594,  26214,  24966,  23831, 22795, 21845, 20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
+    15887,  15420,  14979,  14563,  14169,  13797, 13443, 13107, 12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
+    10699,  10485,  10280,  10082,  9892,   9709,  9532,  9362,  9198,  9039,  8886,  8738,  8594,  8456,  8322,  8192,
+    8065,   7943,   7825,   7710,   7598,   7489,  7384,  7281,  7182,  7084,  6990,  6898,  6808,  6721,  6636,  6553,
+    6472,   6393,   6316,   6241,   6168,   6096,  6026,  5957,  5890,  5825,  5761,  5698,  5637,  5577,  5518,  5461,
+    5405,   5349,   5295,   5242,   5190,   5140,  5090,  5041,  4993,  4946,  4899,  4854,  4809,  4766,  4723,  4681,
+    4639,   4599,   4559,   4519,   4481,   4443,  4405,  4369,  4332,  4297,  4262,  4228,  4194,  4161,  4128,  4096,
+    4064,   4032,   4002,   3971,   3942,   3912,  3883,  3855,  3826,  3799,  3771,  3744,  3718,  3692,  3666,  3640,
+    3615,   3591,   3566,   3542,   3518,   3495,  3472,  3449,  3426,  3404,  3382,  3360,  3339,  3318,  3297,  3276,
+    3256,   3236,   3216,   3196,   3177,   3158,  3139,  3120,  3102,  3084,  3066,  3048,  3030,  3013,  2995,  2978,
+    2962,   2945,   2928,   2912,   2896,   2880,  2864,  2849,  2833,  2818,  2803,  2788,  2774,  2759,  2744,  2730,
+    2716,   2702,   2688,   2674,   2661,   2647,  2634,  2621,  2608,  2595,  2582,  2570,  2557,  2545,  2532,  2520,
+    2508,   2496,   2484,   2473,   2461,   2449,  2438,  2427,  2416,  2404,  2394,  2383,  2372,  2361,  2351,  2340,
+    2330,   2319,   2309,   2299,   2289,   2279,  2269,  2259,  2250,  2240,  2231,  2221,  2212,  2202,  2193,  2184,
+    2175,   2166,   2157,   2148,   2139,   2131,  2122,  2114,  2105,  2097,  2088,  2080,  2072,  2064,  2056,  2048,
+    2040,   2032,   2024,   2016,   2008,   2001,  1993,  1985,  1978,  1971,  1963,  1956,  1949,  1941,  1934,  1927,
+    1920,   1913,   1906,   1899,   1892,   1885,  1879,  1872,  1865,  1859,  1852,  1846,  1839,  1833,  1826,  1820,
+    1814,   1807,   1801,   1795,   1789,   1783,  1777,  1771,  1765,  1759,  1753,  1747,  1741,  1736,  1730,  1724,
+    1718,   1713,   1707,   1702,   1696,   1691,  1685,  1680,  1675,  1669,  1664,  1659,  1653,  1648,  1643,  1638,
+    1633,   1628,   1623,   1618,   1613,   1608,  1603,  1598,  1593,  1588,  1583,  1579,  1574,  1569,  1565,  1560,
+    1555,   1551,   1546,   1542,   1537,   1533,  1528,  1524,  1519,  1515,  1510,  1506,  1502,  1497,  1493,  1489,
+    1485,   1481,   1476,   1472,   1468,   1464,  1460,  1456,  1452,  1448,  1444,  1440,  1436,  1432,  1428,  1424,
+    1420,   1416,   1413,   1409,   1405,   1401,  1398,  1394,  1390,  1387,  1383,  1379,  1376,  1372,  1368,  1365,
+    1361,   1358,   1354,   1351,   1347,   1344,  1340,  1337,  1334,  1330,  1327,  1323,  1320,  1317,  1314,  1310,
+    1307,   1304,   1300,   1297,   1294,   1291,  1288,  1285,  1281,  1278,  1275,  1272,  1269,  1266,  1263,  1260,
+    1257,   1254,   1251,   1248,   1245,   1242,  1239,  1236,  1233,  1230,  1227,  1224,  1222,  1219,  1216,  1213,
+    1210,   1208,   1205,   1202,   1199,   1197,  1194,  1191,  1188,  1186,  1183,  1180,  1178,  1175,  1172,  1170,
+    1167,   1165,   1162,   1159,   1157,   1154,  1152,  1149,  1147,  1144,  1142,  1139,  1137,  1134,  1132,  1129,
+    1127,   1125,   1122,   1120,   1117,   1115,  1113,  1110,  1108,  1106,  1103,  1101,  1099,  1096,  1094,  1092,
+    1089,   1087,   1085,   1083,   1081,   1078,  1076,  1074,  1072,  1069,  1067,  1065,  1063,  1061,  1059,  1057,
+    1054,   1052,   1050,   1048,   1046,   1044,  1042,  1040,  1038,  1036,  1034,  1032,  1030,  1028,  1026,  1024,
+    1022,   1020,   1018,   1016,   1014,   1012,  1010,  1008,  1006,  1004,  1002,  1000,  998,   996,   994,   992,
+    991,    989,    987,    985,    983,    981,   979,   978,   976,   974,   972,   970,   969,   967,   965,   963,
+    961,    960,    958,    956,    954,    953,   951,   949,   948,   946,   944,   942,   941,   939,   937,   936,
+    934,    932,    931,    929,    927,    926,   924,   923,   921,   919,   918,   916,   914,   913,   911,   910,
+    908,    907,    905,    903,    902,    900,   899,   897,   896,   894,   893,   891,   890,   888,   887,   885,
+    884,    882,    881,    879,    878,    876,   875,   873,   872,   870,   869,   868,   866,   865,   863,   862,
+    860,    859,    858,    856,    855,    853,   852,   851,   849,   848,   846,   845,   844,   842,   841,   840,
+    838,    837,    836,    834,    833,    832,   830,   829,   828,   826,   825,   824,   823,   821,   820,   819,
+    817,    816,    815,    814,    812,    811,   810,   809,   807,   806,   805,   804,   802,   801,   800,   799,
+    798,    796,    795,    794,    793,    791,   790,   789,   788,   787,   786,   784,   783,   782,   781,   780,
+    779,    777,    776,    775,    774,    773,   772,   771,   769,   768,   767,   766,   765,   764,   763,   762,
+    760,    759,    758,    757,    756,    755,   754,   753,   752,   751,   750,   748,   747,   746,   745,   744,
+    743,    742,    741,    740,    739,    738,   737,   736,   735,   734,   733,   732,   731,   730,   729,   728,
+    727,    726,    725,    724,    723,    722,   721,   720,   719,   718,   717,   716,   715,   714,   713,   712,
+    711,    710,    709,    708,    707,    706,   705,   704,   703,   702,   701,   700,   699,   699,   698,   697,
+    696,    695,    694,    693,    692,    691,   690,   689,   688,   688,   687,   686,   685,   684,   683,   682,
+    681,    680,    680,    679,    678,    677,   676,   675,   674,   673,   673,   672,   671,   670,   669,   668,
+    667,    667,    666,    665,    664,    663,   662,   661,   661,   660,   659,   658,   657,   657,   656,   655,
+    654,    653,    652,    652,    651,    650,   649,   648,   648,   647,   646,   645,   644,   644,   643,   642,
+    641,    640,    640,    639,    638,    637,   637,   636,   635,   634,   633,   633,   632,   631,   630,   630,
+    629,    628,    627,    627,    626,    625,   624,   624,   623,   622,   621,   621,   620,   619,   618,   618,
+    617,    616,    616,    615,    614,    613,   613,   612,   611,   611,   610,   609,   608,   608,   607,   606,
+    606,    605,    604,    604,    603,    602,   601,   601,   600,   599,   599,   598,   597,   597,   596,   595,
+    595,    594,    593,    593,    592,    591,   591,   590,   589,   589,   588,   587,   587,   586,   585,   585,
+    584,    583,    583,    582,    581,    581,   580,   579,   579,   578,   578,   577,   576,   576,   575,   574,
+    574,    573,    572,    572,    571,    571,   570,   569,   569,   568,   568,   567,   566,   566,   565,   564,
+    564,    563,    563,    562,    561,    561,   560,   560,   559,   558,   558,   557,   557,   556,   555,   555,
+    554,    554,    553,    553,    552,    551,   551,   550,   550,   549,   548,   548,   547,   547,   546,   546,
+    545,    544,    544,    543,    543,    542,   542,   541,   541,   540,   539,   539,   538,   538,   537,   537,
+    536,    536,    535,    534,    534,    533,   533,   532,   532,   531,   531,   530,   530,   529,   529,   528,
+    527,    527,    526,    526,    525,    525,   524,   524,   523,   523,   522,   522,   521,   521,   520,   520,
+    519,    519,    518,    518,    517,    517,   516,   516,   515,   515,   514,   514};
+
+// Note that LinearToGamma() expects the values to be premultiplied by 4,
+// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
+#define DIVIDE_BY_ALPHA(sum, a) (((sum)*kInvAlpha[(a)]) >> (kAlphaFix - 2))
+
+#else
+
+#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
+
+#endif // USE_INVERSE_ALPHA_TABLE
+
+static WEBP_INLINE int LinearToGammaWeighted(
+    const uint8_t* src, const uint8_t* a_ptr, uint32_t total_a, int step, int rgb_stride) {
+    const uint32_t sum = a_ptr[0] * GammaToLinear(src[0]) + a_ptr[step] * GammaToLinear(src[step]) +
+                         a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
+                         a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
+    assert(total_a > 0 && total_a <= 4 * 0xff);
+#if defined(USE_INVERSE_ALPHA_TABLE)
+    assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
+#endif
+    return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
+}
+
+static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
+                                      const uint8_t* const g_ptr,
+                                      const uint8_t* const b_ptr,
+                                      int step,
+                                      uint8_t* const dst_y,
+                                      int width,
+                                      VP8Random* const rg) {
+    int i, j;
+    for (i = 0, j = 0; i < width; i += 1, j += step) {
+        dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
+    }
+}
+
+static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr,
+                                       const uint8_t* const g_ptr,
+                                       const uint8_t* const b_ptr,
+                                       const uint8_t* const a_ptr,
+                                       int rgb_stride,
+                                       uint16_t* dst,
+                                       int width) {
+    int i, j;
+    // we loop over 2x2 blocks and produce one R/G/B/A value for each.
+    for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
+        const uint32_t a = SUM4ALPHA(a_ptr + j);
+        int r, g, b;
+        if (a == 4 * 0xff || a == 0) {
+            r = SUM4(r_ptr + j, 4);
+            g = SUM4(g_ptr + j, 4);
+            b = SUM4(b_ptr + j, 4);
+        } else {
+            r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
+            g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
+            b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
+        }
+        dst[0] = r;
+        dst[1] = g;
+        dst[2] = b;
+        dst[3] = a;
+    }
+    if (width & 1) {
+        const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
+        int r, g, b;
+        if (a == 4 * 0xff || a == 0) {
+            r = SUM2(r_ptr + j);
+            g = SUM2(g_ptr + j);
+            b = SUM2(b_ptr + j);
+        } else {
+            r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
+            g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
+            b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
+        }
+        dst[0] = r;
+        dst[1] = g;
+        dst[2] = b;
+        dst[3] = a;
+    }
+}
+
+static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
+                                      const uint8_t* const g_ptr,
+                                      const uint8_t* const b_ptr,
+                                      int step,
+                                      int rgb_stride,
+                                      uint16_t* dst,
+                                      int width) {
+    int i, j;
+    for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
+        dst[0] = SUM4(r_ptr + j, step);
+        dst[1] = SUM4(g_ptr + j, step);
+        dst[2] = SUM4(b_ptr + j, step);
+    }
+    if (width & 1) {
+        dst[0] = SUM2(r_ptr + j);
+        dst[1] = SUM2(g_ptr + j);
+        dst[2] = SUM2(b_ptr + j);
+    }
+}
+
+static WEBP_INLINE void ConvertRowsToUV(
+    const uint16_t* rgb, uint8_t* const dst_u, uint8_t* const dst_v, int width, VP8Random* const rg) {
+    int i;
+    for (i = 0; i < width; i += 1, rgb += 4) {
+        const int r = rgb[0], g = rgb[1], b = rgb[2];
+        dst_u[i] = RGBToU(r, g, b, rg);
+        dst_v[i] = RGBToV(r, g, b, rg);
+    }
+}
+
+static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
+                              const uint8_t* const g_ptr,
+                              const uint8_t* const b_ptr,
+                              const uint8_t* const a_ptr,
+                              int step,       // bytes per pixel
+                              int rgb_stride, // bytes per scanline
+                              float dithering,
+                              int use_iterative_conversion,
+                              WebPPicture* const picture) {
+    int y;
+    const int width = picture->width;
+    const int height = picture->height;
+    const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
+    const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr
+
+    picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
+    picture->use_argb = 0;
+
+    // disable smart conversion if source is too small (overkill).
+    if (width < kMinDimensionIterativeConversion || height < kMinDimensionIterativeConversion) {
+        use_iterative_conversion = 0;
+    }
+
+    if (!WebPPictureAllocYUVA(picture, width, height)) {
+        return 0;
+    }
+    if (has_alpha) {
+        WebPInitAlphaProcessing();
+        assert(step == 4);
+#if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
+        assert(kAlphaFix + kGammaFix <= 31);
+#endif
+    }
+
+    if (use_iterative_conversion) {
+        InitGammaTablesF();
+        if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
+            return 0;
+        }
+        if (has_alpha) {
+            WebPExtractAlpha(a_ptr, rgb_stride, width, height, picture->a, picture->a_stride);
+        }
+    } else {
+        const int uv_width = (width + 1) >> 1;
+        int use_dsp = (step == 3); // use special function in this case
+        // temporary storage for accumulated R/G/B values during conversion to U/V
+        uint16_t* const tmp_rgb = (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb));
+        uint8_t* dst_y = picture->y;
+        uint8_t* dst_u = picture->u;
+        uint8_t* dst_v = picture->v;
+        uint8_t* dst_a = picture->a;
+
+        VP8Random base_rg;
+        VP8Random* rg = NULL;
+        if (dithering > 0.) {
+            VP8InitRandom(&base_rg, dithering);
+            rg = &base_rg;
+            use_dsp = 0; // can't use dsp in this case
+        }
+        WebPInitConvertARGBToYUV();
+        InitGammaTables();
+
+        if (tmp_rgb == NULL) return 0; // malloc error
+
+        // Downsample Y/U/V planes, two rows at a time
+        for (y = 0; y < (height >> 1); ++y) {
+            int rows_have_alpha = has_alpha;
+            const int off1 = (2 * y + 0) * rgb_stride;
+            const int off2 = (2 * y + 1) * rgb_stride;
+            if (use_dsp) {
+                if (is_rgb) {
+                    WebPConvertRGB24ToY(r_ptr + off1, dst_y, width);
+                    WebPConvertRGB24ToY(r_ptr + off2, dst_y + picture->y_stride, width);
+                } else {
+                    WebPConvertBGR24ToY(b_ptr + off1, dst_y, width);
+                    WebPConvertBGR24ToY(b_ptr + off2, dst_y + picture->y_stride, width);
+                }
+            } else {
+                ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step, dst_y, width, rg);
+                ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step, dst_y + picture->y_stride, width, rg);
+            }
+            dst_y += 2 * picture->y_stride;
+            if (has_alpha) {
+                rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride, width, 2, dst_a, picture->a_stride);
+                dst_a += 2 * picture->a_stride;
+            }
+            // Collect averaged R/G/B(/A)
+            if (!rows_have_alpha) {
+                AccumulateRGB(r_ptr + off1, g_ptr + off1, b_ptr + off1, step, rgb_stride, tmp_rgb, width);
+            } else {
+                AccumulateRGBA(r_ptr + off1, g_ptr + off1, b_ptr + off1, a_ptr + off1, rgb_stride, tmp_rgb, width);
+            }
+            // Convert to U/V
+            if (rg == NULL) {
+                WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
+            } else {
+                ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
+            }
+            dst_u += picture->uv_stride;
+            dst_v += picture->uv_stride;
+        }
+        if (height & 1) { // extra last row
+            const int off = 2 * y * rgb_stride;
+            int row_has_alpha = has_alpha;
+            if (use_dsp) {
+                if (r_ptr < b_ptr) {
+                    WebPConvertRGB24ToY(r_ptr + off, dst_y, width);
+                } else {
+                    WebPConvertBGR24ToY(b_ptr + off, dst_y, width);
+                }
+            } else {
+                ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step, dst_y, width, rg);
+            }
+            if (row_has_alpha) {
+                row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0);
+            }
+            // Collect averaged R/G/B(/A)
+            if (!row_has_alpha) {
+                // Collect averaged R/G/B
+                AccumulateRGB(r_ptr + off, g_ptr + off, b_ptr + off, step, /* rgb_stride = */ 0, tmp_rgb, width);
+            } else {
+                AccumulateRGBA(r_ptr + off, g_ptr + off, b_ptr + off, a_ptr + off,
+                               /* rgb_stride = */ 0, tmp_rgb, width);
+            }
+            if (rg == NULL) {
+                WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
+            } else {
+                ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
+            }
+        }
+        WebPSafeFree(tmp_rgb);
+    }
+    return 1;
+}
+
+#undef SUM4
+#undef SUM2
+#undef SUM4ALPHA
+#undef SUM2ALPHA
+
+//------------------------------------------------------------------------------
+// call for ARGB->YUVA conversion
+
+static int PictureARGBToYUVA(WebPPicture* picture,
+                             WebPEncCSP colorspace,
+                             float dithering,
+                             int use_iterative_conversion) {
+    if (picture == NULL) return 0;
+    if (picture->argb == NULL) {
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+    } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+    } else {
+        const uint8_t* const argb = (const uint8_t*)picture->argb;
+        const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1;
+        const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2;
+        const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3;
+        const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0;
+
+        picture->colorspace = WEBP_YUV420;
+        return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, dithering, use_iterative_conversion,
+                                  picture);
+    }
+}
+
+int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace, float dithering) {
+    return PictureARGBToYUVA(picture, colorspace, dithering, 0);
+}
+
+int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
+    return PictureARGBToYUVA(picture, colorspace, 0.f, 0);
+}
+
+int WebPPictureSmartARGBToYUVA(WebPPicture* picture) {
+    return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1);
+}
+
+//------------------------------------------------------------------------------
+// call for YUVA -> ARGB conversion
+
+int WebPPictureYUVAToARGB(WebPPicture* picture) {
+    if (picture == NULL) return 0;
+    if (picture->y == NULL || picture->u == NULL || picture->v == NULL) {
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+    }
+    if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+    }
+    if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+    }
+    // Allocate a new argb buffer (discarding the previous one).
+    if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
+    picture->use_argb = 1;
+
+    // Convert
+    {
+        int y;
+        const int width = picture->width;
+        const int height = picture->height;
+        const int argb_stride = 4 * picture->argb_stride;
+        uint8_t* dst = (uint8_t*)picture->argb;
+        const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
+        WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
+
+        // First row, with replicated top samples.
+        upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
+        cur_y += picture->y_stride;
+        dst += argb_stride;
+        // Center rows.
+        for (y = 1; y + 1 < height; y += 2) {
+            const uint8_t* const top_u = cur_u;
+            const uint8_t* const top_v = cur_v;
+            cur_u += picture->uv_stride;
+            cur_v += picture->uv_stride;
+            upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v, dst, dst + argb_stride, width);
+            cur_y += 2 * picture->y_stride;
+            dst += 2 * argb_stride;
+        }
+        // Last row (if needed), with replicated bottom samples.
+        if (height > 1 && !(height & 1)) {
+            upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
+        }
+        // Insert alpha values if needed, in replacement for the default 0xff ones.
+        if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
+            for (y = 0; y < height; ++y) {
+                uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
+                const uint8_t* const src = picture->a + y * picture->a_stride;
+                int x;
+                for (x = 0; x < width; ++x) {
+                    argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
+                }
+            }
+        }
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// automatic import / conversion
+
+static int Import(
+    WebPPicture* const picture, const uint8_t* const rgb, int rgb_stride, int step, int swap_rb, int import_alpha) {
+    int y;
+    const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0);
+    const uint8_t* const g_ptr = rgb + 1;
+    const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2);
+    const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL;
+    const int width = picture->width;
+    const int height = picture->height;
+
+    if (!picture->use_argb) {
+        return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, 0.f /* no dithering */, 0, picture);
+    }
+    if (!WebPPictureAlloc(picture)) return 0;
+
+    VP8EncDspARGBInit();
+
+    if (import_alpha) {
+        assert(step == 4);
+        for (y = 0; y < height; ++y) {
+            uint32_t* const dst = &picture->argb[y * picture->argb_stride];
+            const int offset = y * rgb_stride;
+            VP8PackARGB(a_ptr + offset, r_ptr + offset, g_ptr + offset, b_ptr + offset, width, dst);
+        }
+    } else {
+        assert(step >= 3);
+        for (y = 0; y < height; ++y) {
+            uint32_t* const dst = &picture->argb[y * picture->argb_stride];
+            const int offset = y * rgb_stride;
+            VP8PackRGB(r_ptr + offset, g_ptr + offset, b_ptr + offset, width, step, dst);
+        }
+    }
+    return 1;
+}
+
+// Public API
+
+int WebPPictureImportRGB(WebPPicture* picture, const uint8_t* rgb, int rgb_stride) {
+    return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 0, 0) : 0;
+}
+
+int WebPPictureImportBGR(WebPPicture* picture, const uint8_t* rgb, int rgb_stride) {
+    return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 1, 0) : 0;
+}
+
+int WebPPictureImportRGBA(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) {
+    return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 1) : 0;
+}
+
+int WebPPictureImportBGRA(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) {
+    return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 1) : 0;
+}
+
+int WebPPictureImportRGBX(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) {
+    return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 0) : 0;
+}
+
+int WebPPictureImportBGRX(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) {
+    return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 0) : 0;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/picture_psnr.c b/codec/L2/demos/webpEnc/host/src/enc/picture_psnr.c
new file mode 100644
index 0000000000..e676191c43
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/picture_psnr.c
@@ -0,0 +1,155 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture tools for measuring distortion
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <math.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// local-min distortion
+//
+// For every pixel in the *reference* picture, we search for the local best
+// match in the compressed image. This is not a symmetrical measure.
+
+#define RADIUS 2 // search radius. Shouldn't be too large.
+
+static void AccumulateLSIM(
+    const uint8_t* src, int src_stride, const uint8_t* ref, int ref_stride, int w, int h, DistoStats* stats) {
+    int x, y;
+    double total_sse = 0.;
+    for (y = 0; y < h; ++y) {
+        const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS;
+        const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1;
+        for (x = 0; x < w; ++x) {
+            const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS;
+            const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1;
+            double best_sse = 255. * 255.;
+            const double value = (double)ref[y * ref_stride + x];
+            int i, j;
+            for (j = y_0; j < y_1; ++j) {
+                const uint8_t* const s = src + j * src_stride;
+                for (i = x_0; i < x_1; ++i) {
+                    const double diff = s[i] - value;
+                    const double sse = diff * diff;
+                    if (sse < best_sse) best_sse = sse;
+                }
+            }
+            total_sse += best_sse;
+        }
+    }
+    stats->w = w * h;
+    stats->xm = 0;
+    stats->ym = 0;
+    stats->xxm = total_sse;
+    stats->yym = 0;
+    stats->xxm = 0;
+}
+#undef RADIUS
+
+//------------------------------------------------------------------------------
+// Distortion
+
+// Max value returned in case of exact similarity.
+static const double kMinDistortion_dB = 99.;
+static float GetPSNR(const double v) {
+    return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.)) : kMinDistortion_dB);
+}
+
+int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, int type, float result[5]) {
+    DistoStats stats[5];
+    int w, h;
+
+    memset(stats, 0, sizeof(stats));
+
+    if (src == NULL || ref == NULL || src->width != ref->width || src->height != ref->height ||
+        src->use_argb != ref->use_argb || result == NULL) {
+        return 0;
+    }
+    w = src->width;
+    h = src->height;
+
+    if (src->use_argb == 1) {
+        if (src->argb == NULL || ref->argb == NULL) {
+            return 0;
+        } else {
+            int i, j, c;
+            uint8_t *tmp1, *tmp2;
+            uint8_t* const tmp_plane = (uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane));
+            if (tmp_plane == NULL) return 0;
+            tmp1 = tmp_plane;
+            tmp2 = tmp_plane + w * h;
+            for (c = 0; c < 4; ++c) {
+                for (j = 0; j < h; ++j) {
+                    for (i = 0; i < w; ++i) {
+                        tmp1[j * w + i] = src->argb[i + j * src->argb_stride] >> (c * 8);
+                        tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8);
+                    }
+                }
+                if (type >= 2) {
+                    AccumulateLSIM(tmp1, w, tmp2, w, w, h, &stats[c]);
+                } else {
+                    VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]);
+                }
+            }
+            free(tmp_plane);
+        }
+    } else {
+        int has_alpha, uv_w, uv_h;
+        if (src->y == NULL || ref->y == NULL || src->u == NULL || ref->u == NULL || src->v == NULL || ref->v == NULL) {
+            return 0;
+        }
+        has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT);
+        if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) ||
+            (has_alpha && (src->a == NULL || ref->a == NULL))) {
+            return 0;
+        }
+
+        uv_w = (src->width + 1) >> 1;
+        uv_h = (src->height + 1) >> 1;
+        if (type >= 2) {
+            AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride, w, h, &stats[0]);
+            AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride, uv_w, uv_h, &stats[1]);
+            AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride, uv_w, uv_h, &stats[2]);
+            if (has_alpha) {
+                AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride, w, h, &stats[3]);
+            }
+        } else {
+            VP8SSIMAccumulatePlane(src->y, src->y_stride, ref->y, ref->y_stride, w, h, &stats[0]);
+            VP8SSIMAccumulatePlane(src->u, src->uv_stride, ref->u, ref->uv_stride, uv_w, uv_h, &stats[1]);
+            VP8SSIMAccumulatePlane(src->v, src->uv_stride, ref->v, ref->uv_stride, uv_w, uv_h, &stats[2]);
+            if (has_alpha) {
+                VP8SSIMAccumulatePlane(src->a, src->a_stride, ref->a, ref->a_stride, w, h, &stats[3]);
+            }
+        }
+    }
+    // Final stat calculations.
+    {
+        int c;
+        for (c = 0; c <= 4; ++c) {
+            if (type == 1) {
+                const double v = VP8SSIMGet(&stats[c]);
+                result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v) : kMinDistortion_dB);
+            } else {
+                const double v = VP8SSIMGetSquaredError(&stats[c]);
+                result[c] = GetPSNR(v);
+            }
+            // Accumulate forward
+            if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
+        }
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/picture_rescale.c b/codec/L2/demos/webpEnc/host/src/enc/picture_rescale.c
new file mode 100644
index 0000000000..7787ed44cb
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/picture_rescale.c
@@ -0,0 +1,236 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture tools: copy, crop, rescaling and view.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../utils/rescaler.h"
+#include "../utils/utils.h"
+
+#define HALVE(x) (((x) + 1) >> 1)
+
+// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them
+// into 'dst'. Mark 'dst' as not owning any memory.
+static void PictureGrabSpecs(const WebPPicture* const src, WebPPicture* const dst) {
+    assert(src != NULL && dst != NULL);
+    *dst = *src;
+    WebPPictureResetBuffers(dst);
+}
+
+//------------------------------------------------------------------------------
+
+// Adjust top-left corner to chroma sample position.
+static void SnapTopLeftPosition(const WebPPicture* const pic, int* const left, int* const top) {
+    if (!pic->use_argb) {
+        *left &= ~1;
+        *top &= ~1;
+    }
+}
+
+// Adjust top-left corner and verify that the sub-rectangle is valid.
+static int AdjustAndCheckRectangle(
+    const WebPPicture* const pic, int* const left, int* const top, int width, int height) {
+    SnapTopLeftPosition(pic, left, top);
+    if ((*left) < 0 || (*top) < 0) return 0;
+    if (width <= 0 || height <= 0) return 0;
+    if ((*left) + width > pic->width) return 0;
+    if ((*top) + height > pic->height) return 0;
+    return 1;
+}
+
+int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
+    if (src == NULL || dst == NULL) return 0;
+    if (src == dst) return 1;
+
+    PictureGrabSpecs(src, dst);
+    if (!WebPPictureAlloc(dst)) return 0;
+
+    if (!src->use_argb) {
+        WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride, dst->width, dst->height);
+        WebPCopyPlane(src->u, src->uv_stride, dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
+        WebPCopyPlane(src->v, src->uv_stride, dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
+        if (dst->a != NULL) {
+            WebPCopyPlane(src->a, src->a_stride, dst->a, dst->a_stride, dst->width, dst->height);
+        }
+    } else {
+        WebPCopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride, (uint8_t*)dst->argb, 4 * dst->argb_stride,
+                      4 * dst->width, dst->height);
+    }
+    return 1;
+}
+
+int WebPPictureIsView(const WebPPicture* picture) {
+    if (picture == NULL) return 0;
+    if (picture->use_argb) {
+        return (picture->memory_argb_ == NULL);
+    }
+    return (picture->memory_ == NULL);
+}
+
+int WebPPictureView(const WebPPicture* src, int left, int top, int width, int height, WebPPicture* dst) {
+    if (src == NULL || dst == NULL) return 0;
+
+    // verify rectangle position.
+    if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0;
+
+    if (src != dst) { // beware of aliasing! We don't want to leak 'memory_'.
+        PictureGrabSpecs(src, dst);
+    }
+    dst->width = width;
+    dst->height = height;
+    if (!src->use_argb) {
+        dst->y = src->y + top * src->y_stride + left;
+        dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1);
+        dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1);
+        dst->y_stride = src->y_stride;
+        dst->uv_stride = src->uv_stride;
+        if (src->a != NULL) {
+            dst->a = src->a + top * src->a_stride + left;
+            dst->a_stride = src->a_stride;
+        }
+    } else {
+        dst->argb = src->argb + top * src->argb_stride + left;
+        dst->argb_stride = src->argb_stride;
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Picture cropping
+
+int WebPPictureCrop(WebPPicture* pic, int left, int top, int width, int height) {
+    WebPPicture tmp;
+
+    if (pic == NULL) return 0;
+    if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0;
+
+    PictureGrabSpecs(pic, &tmp);
+    tmp.width = width;
+    tmp.height = height;
+    if (!WebPPictureAlloc(&tmp)) return 0;
+
+    if (!pic->use_argb) {
+        const int y_offset = top * pic->y_stride + left;
+        const int uv_offset = (top / 2) * pic->uv_stride + left / 2;
+        WebPCopyPlane(pic->y + y_offset, pic->y_stride, tmp.y, tmp.y_stride, width, height);
+        WebPCopyPlane(pic->u + uv_offset, pic->uv_stride, tmp.u, tmp.uv_stride, HALVE(width), HALVE(height));
+        WebPCopyPlane(pic->v + uv_offset, pic->uv_stride, tmp.v, tmp.uv_stride, HALVE(width), HALVE(height));
+
+        if (tmp.a != NULL) {
+            const int a_offset = top * pic->a_stride + left;
+            WebPCopyPlane(pic->a + a_offset, pic->a_stride, tmp.a, tmp.a_stride, width, height);
+        }
+    } else {
+        const uint8_t* const src = (const uint8_t*)(pic->argb + top * pic->argb_stride + left);
+        WebPCopyPlane(src, pic->argb_stride * 4, (uint8_t*)tmp.argb, tmp.argb_stride * 4, width * 4, height);
+    }
+    WebPPictureFree(pic);
+    *pic = tmp;
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Simple picture rescaler
+
+static void RescalePlane(const uint8_t* src,
+                         int src_width,
+                         int src_height,
+                         int src_stride,
+                         uint8_t* dst,
+                         int dst_width,
+                         int dst_height,
+                         int dst_stride,
+                         rescaler_t* const work,
+                         int num_channels) {
+    WebPRescaler rescaler;
+    int y = 0;
+    WebPRescalerInit(&rescaler, src_width, src_height, dst, dst_width, dst_height, dst_stride, num_channels, work);
+    while (y < src_height) {
+        y += WebPRescalerImport(&rescaler, src_height - y, src + y * src_stride, src_stride);
+        WebPRescalerExport(&rescaler);
+    }
+}
+
+static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) {
+    assert(pic->argb != NULL);
+    WebPMultARGBRows((uint8_t*)pic->argb, pic->argb_stride * sizeof(*pic->argb), pic->width, pic->height, inverse);
+}
+
+static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
+    if (pic->a != NULL) {
+        WebPMultRows(pic->y, pic->y_stride, pic->a, pic->a_stride, pic->width, pic->height, inverse);
+    }
+}
+
+int WebPPictureRescale(WebPPicture* pic, int width, int height) {
+    WebPPicture tmp;
+    int prev_width, prev_height;
+    rescaler_t* work;
+
+    if (pic == NULL) return 0;
+    prev_width = pic->width;
+    prev_height = pic->height;
+    if (!WebPRescalerGetScaledDimensions(prev_width, prev_height, &width, &height)) {
+        return 0;
+    }
+
+    PictureGrabSpecs(pic, &tmp);
+    tmp.width = width;
+    tmp.height = height;
+    if (!WebPPictureAlloc(&tmp)) return 0;
+
+    if (!pic->use_argb) {
+        work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
+        if (work == NULL) {
+            WebPPictureFree(&tmp);
+            return 0;
+        }
+        // If present, we need to rescale alpha first (for AlphaMultiplyY).
+        if (pic->a != NULL) {
+            WebPInitAlphaProcessing();
+            RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, tmp.a, width, height, tmp.a_stride, work, 1);
+        }
+
+        // We take transparency into account on the luma plane only. That's not
+        // totally exact blending, but still is a good approximation.
+        AlphaMultiplyY(pic, 0);
+        RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, tmp.y, width, height, tmp.y_stride, work, 1);
+        AlphaMultiplyY(&tmp, 1);
+
+        RescalePlane(pic->u, HALVE(prev_width), HALVE(prev_height), pic->uv_stride, tmp.u, HALVE(width), HALVE(height),
+                     tmp.uv_stride, work, 1);
+        RescalePlane(pic->v, HALVE(prev_width), HALVE(prev_height), pic->uv_stride, tmp.v, HALVE(width), HALVE(height),
+                     tmp.uv_stride, work, 1);
+    } else {
+        work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
+        if (work == NULL) {
+            WebPPictureFree(&tmp);
+            return 0;
+        }
+        // In order to correctly interpolate colors, we need to apply the alpha
+        // weighting first (black-matting), scale the RGB values, and remove
+        // the premultiplication afterward (while preserving the alpha channel).
+        WebPInitAlphaProcessing();
+        AlphaMultiplyARGB(pic, 0);
+        RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, pic->argb_stride * 4, (uint8_t*)tmp.argb,
+                     width, height, tmp.argb_stride * 4, work, 4);
+        AlphaMultiplyARGB(&tmp, 1);
+    }
+    WebPPictureFree(pic);
+    WebPSafeFree(work);
+    *pic = tmp;
+    return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/picture_tools.c b/codec/L2/demos/webpEnc/host/src/enc/picture_tools.c
new file mode 100644
index 0000000000..4189b900e6
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/picture_tools.c
@@ -0,0 +1,221 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture tools: alpha handling, etc.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "./vp8enci.h"
+#include "../dsp/yuv.h"
+
+static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
+    return (0xff000000u | (r << 16) | (g << 8) | b);
+}
+
+//------------------------------------------------------------------------------
+// Helper: clean up fully transparent area to help compressibility.
+
+#define SIZE 8
+#define SIZE2 (SIZE / 2)
+static int is_transparent_area(const uint8_t* ptr, int stride, int size) {
+    int y, x;
+    for (y = 0; y < size; ++y) {
+        for (x = 0; x < size; ++x) {
+            if (ptr[x]) {
+                return 0;
+            }
+        }
+        ptr += stride;
+    }
+    return 1;
+}
+
+static int is_transparent_argb_area(const uint32_t* ptr, int stride, int size) {
+    int y, x;
+    for (y = 0; y < size; ++y) {
+        for (x = 0; x < size; ++x) {
+            if (ptr[x] & 0xff000000u) {
+                return 0;
+            }
+        }
+        ptr += stride;
+    }
+    return 1;
+}
+
+static void flatten(uint8_t* ptr, int v, int stride, int size) {
+    int y;
+    for (y = 0; y < size; ++y) {
+        memset(ptr, v, size);
+        ptr += stride;
+    }
+}
+
+static void flatten_argb(uint32_t* ptr, uint32_t v, int stride, int size) {
+    int x, y;
+    for (y = 0; y < size; ++y) {
+        for (x = 0; x < size; ++x) ptr[x] = v;
+        ptr += stride;
+    }
+}
+
+void WebPCleanupTransparentArea(WebPPicture* pic) {
+    int x, y, w, h;
+    if (pic == NULL) return;
+    w = pic->width / SIZE;
+    h = pic->height / SIZE;
+
+    // note: we ignore the left-overs on right/bottom
+    if (pic->use_argb) {
+        uint32_t argb_value = 0;
+        for (y = 0; y < h; ++y) {
+            int need_reset = 1;
+            for (x = 0; x < w; ++x) {
+                const int off = (y * pic->argb_stride + x) * SIZE;
+                if (is_transparent_argb_area(pic->argb + off, pic->argb_stride, SIZE)) {
+                    if (need_reset) {
+                        argb_value = pic->argb[off];
+                        need_reset = 0;
+                    }
+                    flatten_argb(pic->argb + off, argb_value, pic->argb_stride, SIZE);
+                } else {
+                    need_reset = 1;
+                }
+            }
+        }
+    } else {
+        const uint8_t* const a_ptr = pic->a;
+        int values[3] = {0};
+        if (a_ptr == NULL) return; // nothing to do
+        for (y = 0; y < h; ++y) {
+            int need_reset = 1;
+            for (x = 0; x < w; ++x) {
+                const int off_a = (y * pic->a_stride + x) * SIZE;
+                const int off_y = (y * pic->y_stride + x) * SIZE;
+                const int off_uv = (y * pic->uv_stride + x) * SIZE2;
+                if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) {
+                    if (need_reset) {
+                        values[0] = pic->y[off_y];
+                        values[1] = pic->u[off_uv];
+                        values[2] = pic->v[off_uv];
+                        need_reset = 0;
+                    }
+                    flatten(pic->y + off_y, values[0], pic->y_stride, SIZE);
+                    flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2);
+                    flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2);
+                } else {
+                    need_reset = 1;
+                }
+            }
+        }
+    }
+}
+
+#undef SIZE
+#undef SIZE2
+
+void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) {
+    int x, y, w, h;
+    uint32_t* argb;
+    assert(pic != NULL && pic->use_argb);
+    w = pic->width;
+    h = pic->height;
+    argb = pic->argb;
+
+    for (y = 0; y < h; ++y) {
+        for (x = 0; x < w; ++x) {
+            if ((argb[x] & 0xff000000) == 0) {
+                argb[x] = 0x00000000;
+            }
+        }
+        argb += pic->argb_stride;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Blend color and remove transparency info
+
+#define BLEND(V0, V1, ALPHA) ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 16)
+#define BLEND_10BIT(V0, V1, ALPHA) ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 18)
+
+void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
+    const int red = (background_rgb >> 16) & 0xff;
+    const int green = (background_rgb >> 8) & 0xff;
+    const int blue = (background_rgb >> 0) & 0xff;
+    int x, y;
+    if (pic == NULL) return;
+    if (!pic->use_argb) {
+        const int uv_width = (pic->width >> 1); // omit last pixel during u/v loop
+        const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF);
+        // VP8RGBToU/V expects the u/v values summed over four pixels
+        const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
+        const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
+        const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
+        if (!has_alpha || pic->a == NULL) return; // nothing to do
+        for (y = 0; y < pic->height; ++y) {
+            // Luma blending
+            uint8_t* const y_ptr = pic->y + y * pic->y_stride;
+            uint8_t* const a_ptr = pic->a + y * pic->a_stride;
+            for (x = 0; x < pic->width; ++x) {
+                const int alpha = a_ptr[x];
+                if (alpha < 0xff) {
+                    y_ptr[x] = BLEND(Y0, y_ptr[x], a_ptr[x]);
+                }
+            }
+            // Chroma blending every even line
+            if ((y & 1) == 0) {
+                uint8_t* const u = pic->u + (y >> 1) * pic->uv_stride;
+                uint8_t* const v = pic->v + (y >> 1) * pic->uv_stride;
+                uint8_t* const a_ptr2 = (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
+                for (x = 0; x < uv_width; ++x) {
+                    // Average four alpha values into a single blending weight.
+                    // TODO(skal): might lead to visible contouring. Can we do better?
+                    const int alpha = a_ptr[2 * x + 0] + a_ptr[2 * x + 1] + a_ptr2[2 * x + 0] + a_ptr2[2 * x + 1];
+                    u[x] = BLEND_10BIT(U0, u[x], alpha);
+                    v[x] = BLEND_10BIT(V0, v[x], alpha);
+                }
+                if (pic->width & 1) { // rightmost pixel
+                    const int alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
+                    u[x] = BLEND_10BIT(U0, u[x], alpha);
+                    v[x] = BLEND_10BIT(V0, v[x], alpha);
+                }
+            }
+            memset(a_ptr, 0xff, pic->width);
+        }
+    } else {
+        uint32_t* argb = pic->argb;
+        const uint32_t background = MakeARGB32(red, green, blue);
+        for (y = 0; y < pic->height; ++y) {
+            for (x = 0; x < pic->width; ++x) {
+                const int alpha = (argb[x] >> 24) & 0xff;
+                if (alpha != 0xff) {
+                    if (alpha > 0) {
+                        int r = (argb[x] >> 16) & 0xff;
+                        int g = (argb[x] >> 8) & 0xff;
+                        int b = (argb[x] >> 0) & 0xff;
+                        r = BLEND(red, r, alpha);
+                        g = BLEND(green, g, alpha);
+                        b = BLEND(blue, b, alpha);
+                        argb[x] = MakeARGB32(r, g, b);
+                    } else {
+                        argb[x] = background;
+                    }
+                }
+            }
+            argb += pic->argb_stride;
+        }
+    }
+}
+
+#undef BLEND
+#undef BLEND_10BIT
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/quant.c b/codec/L2/demos/webpEnc/host/src/enc/quant.c
new file mode 100644
index 0000000000..3b8f133ee6
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/quant.c
@@ -0,0 +1,1449 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   Quantization
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h> // for abs()
+
+#include "./vp8enci.h"
+#include "./cost.h"
+#include "../utils/profiling.h"
+
+#define DO_TRELLIS_I4 1
+#define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.
+#define DO_TRELLIS_UV 0  // disable trellis for UV. Risky. Not worth.
+#define USE_TDISTO 1
+
+#define MID_ALPHA 64  // neutral value for susceptibility
+#define MIN_ALPHA 30  // lowest usable value for susceptibility
+#define MAX_ALPHA 100 // higher meaningful value for susceptibility
+
+#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
+                      // power-law modulation. Must be strictly less than 1.
+
+#define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision
+
+// number of non-zero coeffs below which we consider the block very flat
+// (and apply a penalty to complex predictions)
+#define FLATNESS_LIMIT_I16 10 // I16 mode
+#define FLATNESS_LIMIT_I4 3   // I4 mode
+#define FLATNESS_LIMIT_UV 2   // UV mode
+#define FLATNESS_PENALTY 140  // roughly ~1bit per block
+
+#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
+
+#define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda)
+
+// #define DEBUG_BLOCK
+
+//------------------------------------------------------------------------------
+
+#if 1 // defined(DEBUG_BLOCK)
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define DEBUG_BLOCK 0
+#define DEBUG_PickBestIntra4 0
+static void PrintBlockInfo(const VP8EncIterator* const it, const VP8ModeScore* const rd) {
+#if DEBUG_BLOCK
+    int i, j;
+    const int is_i16 = (it->mb_->type_ == 1);
+    const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;
+    const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;
+    const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;
+    const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;
+    printf("SOURCE / OUTPUT / ABS DELTA\n");
+    for (j = 0; j < 16; ++j) {
+        for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
+        printf("     ");
+        for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);
+        printf("     ");
+        for (i = 0; i < 16; ++i) {
+            printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));
+        }
+        printf("\n");
+    }
+    printf("\n"); // newline before the U/V block
+    for (j = 0; j < 8; ++j) {
+        for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);
+        printf(" ");
+        for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);
+        printf("    ");
+        for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);
+        printf(" ");
+        for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);
+        printf("   ");
+        for (i = 0; i < 8; ++i) {
+            printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
+        }
+        printf(" ");
+        for (i = 8; i < 16; ++i) {
+            printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
+        }
+        printf("\n");
+    }
+    printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n", (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
+           (int)rd->score);
+    if (is_i16) {
+        printf("Mode: %d\n", rd->mode_i16);
+        printf("y_dc_levels:");
+        for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
+        printf("\n");
+    } else {
+        printf("Modes[16]: ");
+        for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
+        printf("\n");
+    }
+    printf("y_ac_levels:\n");
+    for (j = 0; j < 16; ++j) {
+        for (i = is_i16 ? 1 : 0; i < 16; ++i) {
+            printf("%4d ", rd->y_ac_levels[j][i]);
+        }
+        printf("\n");
+    }
+    printf("\n");
+    printf("uv_levels (mode=%d):\n", rd->mode_uv);
+    for (j = 0; j < 8; ++j) {
+        for (i = 0; i < 16; ++i) {
+            printf("%4d ", rd->uv_levels[j][i]);
+        }
+        printf("\n");
+    }
+    printf("\n");
+// printf("yuv_p\n");
+// for (i = 0; i < PRED_SIZE_ENC; i++) {
+//     printf("%4d ", it->yuv_p_[i]);
+//     if ((i % 16 == 0)) {
+//       printf("%d\n",i);
+//     }
+// }
+// printf("\n");
+#endif
+}
+
+#endif // DEBUG_BLOCK
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int clip(int v, int m, int M) {
+    return v < m ? m : v > M ? M : v;
+}
+
+static const uint8_t kZigzag[16] = {0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15};
+
+static const uint8_t kDcTable[128] = {
+    4,   5,   6,   7,   8,   9,   10,  10,  11,  12,  13,  14,  15,  16,  17,  17,  18,  19,  20,  20,  21,  21,
+    22,  22,  23,  23,  24,  25,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,
+    40,  41,  42,  43,  44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,
+    61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  76,  77,  78,  79,  80,  81,
+    82,  83,  84,  85,  86,  87,  88,  89,  91,  93,  95,  96,  98,  100, 101, 102, 104, 106, 108, 110, 112, 114,
+    116, 118, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157};
+
+static const uint16_t kAcTable[128] = {
+    4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
+    26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
+    48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,  78,  80,
+    82,  84,  86,  88,  90,  92,  94,  96,  98,  100, 102, 104, 106, 108, 110, 112, 114, 116, 119, 122, 125, 128,
+    131, 134, 137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201,
+    205, 209, 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284};
+
+static const uint16_t kAcTable2[128] = {
+    8,   8,   9,   10,  12,  13,  15,  17,  18,  20,  21,  23,  24,  26,  27,  29,  31,  32,  34,  35,  37,  38,
+    40,  41,  43,  44,  46,  48,  49,  51,  52,  54,  55,  57,  58,  60,  62,  63,  65,  66,  68,  69,  71,  72,
+    74,  75,  77,  79,  80,  82,  83,  85,  86,  88,  89,  93,  96,  99,  102, 105, 108, 111, 114, 117, 120, 124,
+    127, 130, 133, 136, 139, 142, 145, 148, 151, 155, 158, 161, 164, 167, 170, 173, 176, 179, 184, 189, 193, 198,
+    203, 207, 212, 217, 221, 226, 230, 235, 240, 244, 249, 254, 258, 263, 268, 274, 280, 286, 292, 299, 305, 311,
+    317, 323, 330, 336, 342, 348, 354, 362, 370, 379, 385, 393, 401, 409, 416, 424, 432, 440};
+
+static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]
+    {96, 110},
+    {96, 108},
+    {110, 115}};
+
+// Sharpening by (slightly) raising the hi-frequency coeffs.
+// Hack-ish but helpful for mid-bitrate range. Use with care.
+#define SHARPEN_BITS 11 // number of descaling bits for sharpening bias
+static const uint8_t kFreqSharpening[16] = {0, 30, 60, 90, 30, 60, 90, 90, 60, 90, 90, 90, 90, 90, 90, 90};
+
+//------------------------------------------------------------------------------
+// Initialize quantization parameters in VP8Matrix
+
+// Returns the average quantizer
+static int ExpandMatrix(VP8Matrix* const m, int type) {
+    int i, sum;
+    for (i = 0; i < 2; ++i) {
+        const int is_ac_coeff = (i > 0);
+        const int bias = kBiasMatrices[type][is_ac_coeff];
+        m->iq_[i] = (1 << QFIX) / m->q_[i];
+        m->bias_[i] = BIAS(bias);
+        // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
+        //   * zero if coeff <= zthresh
+        //   * non-zero if coeff > zthresh
+        m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
+    }
+    for (i = 2; i < 16; ++i) {
+        m->q_[i] = m->q_[1];
+        m->iq_[i] = m->iq_[1];
+        m->bias_[i] = m->bias_[1];
+        m->zthresh_[i] = m->zthresh_[1];
+    }
+    for (sum = 0, i = 0; i < 16; ++i) {
+        if (type == 0) { // we only use sharpening for AC luma coeffs
+            m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
+        } else {
+            m->sharpen_[i] = 0;
+        }
+        sum += m->q_[i];
+    }
+    return (sum + 8) >> 4;
+}
+
+static void SetupMatrices(VP8Encoder* enc) {
+    int i;
+    const int tlambda_scale = (enc->method_ >= 4) ? enc->config_->sns_strength : 0;
+    const int num_segments = enc->segment_hdr_.num_segments_;
+    for (i = 0; i < num_segments; ++i) {
+        VP8SegmentInfo* const m = &enc->dqm_[i];
+        const int q = m->quant_;
+        int q4, q16, quv;
+        m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
+        m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];
+
+        m->y2_.q_[0] = kDcTable[clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
+        m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
+
+        m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
+        m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
+
+        q4 = ExpandMatrix(&m->y1_, 0);
+        q16 = ExpandMatrix(&m->y2_, 1);
+        quv = ExpandMatrix(&m->uv_, 2);
+
+        m->lambda_i4_ = (3 * q4 * q4) >> 7;
+        m->lambda_i16_ = (3 * q16 * q16);
+        m->lambda_uv_ = (3 * quv * quv) >> 6;
+        m->lambda_mode_ = (1 * q4 * q4) >> 7;
+        m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;
+        m->lambda_trellis_i16_ = (q16 * q16) >> 2;
+        m->lambda_trellis_uv_ = (quv * quv) << 1;
+        m->tlambda_ = (tlambda_scale * q4) >> 5;
+
+        m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto
+        m->max_edge_ = 0;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Initialize filtering parameters
+
+// Very small filter-strength values have close to no visual effect. So we can
+// save a little decoding-CPU by turning filtering off for these.
+#define FSTRENGTH_CUTOFF 2
+
+static void SetupFilterStrength(VP8Encoder* const enc) {
+    int i;
+    // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
+    const int level0 = 5 * enc->config_->filter_strength;
+    for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+        VP8SegmentInfo* const m = &enc->dqm_[i];
+        // We focus on the quantization of AC coeffs.
+        const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
+        const int base_strength = VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
+        // Segments with lower complexity ('beta') will be less filtered.
+        const int f = base_strength * level0 / (256 + m->beta_);
+        m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
+    }
+    // We record the initial strength (mainly for the case of 1-segment only).
+    enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
+    enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
+    enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
+}
+
+//------------------------------------------------------------------------------
+
+// Note: if you change the values below, remember that the max range
+// allowed by the syntax for DQ_UV is [-16,16].
+#define MAX_DQ_UV (6)
+#define MIN_DQ_UV (-4)
+
+// We want to emulate jpeg-like behaviour where the expected "good" quality
+// is around q=75. Internally, our "good" middle is around c=50. So we
+// map accordingly using linear piece-wise function
+static double QualityToCompression(double c) {
+    const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
+    // The file size roughly scales as pow(quantizer, 3.). Actually, the
+    // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
+    // in the mid-quant range. So we scale the compressibility inversely to
+    // this power-law: quant ~= compression ^ 1/3. This law holds well for
+    // low quant. Finer modeling for high-quant would make use of kAcTable[]
+    // more explicitly.
+    const double v = pow(linear_c, 1 / 3.);
+    return v;
+}
+
+static double QualityToJPEGCompression(double c, double alpha) {
+    // We map the complexity 'alpha' and quality setting 'c' to a compression
+    // exponent empirically matched to the compression curve of libjpeg6b.
+    // On average, the WebP output size will be roughly similar to that of a
+    // JPEG file compressed with same quality factor.
+    const double amin = 0.30;
+    const double amax = 0.85;
+    const double exp_min = 0.4;
+    const double exp_max = 0.9;
+    const double slope = (exp_min - exp_max) / (amax - amin);
+    // Linearly interpolate 'expn' from exp_min to exp_max
+    // in the [amin, amax] range.
+    const double expn = (alpha > amax) ? exp_min : (alpha < amin) ? exp_max : exp_max + slope * (alpha - amin);
+    const double v = pow(c, expn);
+    return v;
+}
+
+static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1, const VP8SegmentInfo* const S2) {
+    return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
+}
+
+static void SimplifySegments(VP8Encoder* const enc) {
+    int map[NUM_MB_SEGMENTS] = {0, 1, 2, 3};
+    const int num_segments = enc->segment_hdr_.num_segments_;
+    int num_final_segments = 1;
+    int s1, s2;
+    for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments
+        const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
+        int found = 0;
+        // check if we already have similar segment
+        for (s2 = 0; s2 < num_final_segments; ++s2) {
+            const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
+            if (SegmentsAreEquivalent(S1, S2)) {
+                found = 1;
+                break;
+            }
+        }
+        map[s1] = s2;
+        if (!found) {
+            if (num_final_segments != s1) {
+                enc->dqm_[num_final_segments] = enc->dqm_[s1];
+            }
+            ++num_final_segments;
+        }
+    }
+    if (num_final_segments < num_segments) { // Remap
+        int i = enc->mb_w_ * enc->mb_h_;
+        while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
+        enc->segment_hdr_.num_segments_ = num_final_segments;
+        // Replicate the trailing segment infos (it's mostly cosmetics)
+        for (i = num_final_segments; i < num_segments; ++i) {
+            enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
+        }
+    }
+}
+
+void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
+    int i;
+    int dq_uv_ac, dq_uv_dc;
+    const int num_segments = enc->segment_hdr_.num_segments_;
+    const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
+    const double Q = quality / 100.;
+    const double c_base =
+        enc->config_->emulate_jpeg_size ? QualityToJPEGCompression(Q, enc->alpha_ / 255.) : QualityToCompression(Q);
+    for (i = 0; i < num_segments; ++i) {
+        // We modulate the base coefficient to accommodate for the quantization
+        // susceptibility and allow denser segments to be quantized more.
+        const double expn = 1. - amp * enc->dqm_[i].alpha_;
+        const double c = pow(c_base, expn);
+        const int q = (int)(127. * (1. - c));
+        assert(expn > 0.);
+        enc->dqm_[i].quant_ = clip(q, 0, 127);
+    }
+
+    // purely indicative in the bitstream (except for the 1-segment case)
+    enc->base_quant_ = enc->dqm_[0].quant_;
+
+    // fill-in values for the unused segments (required by the syntax)
+    for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
+        enc->dqm_[i].quant_ = enc->base_quant_;
+    }
+
+    // uv_alpha_ is normally spread around ~60. The useful range is
+    // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
+    // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
+    dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV) / (MAX_ALPHA - MIN_ALPHA);
+    // we rescale by the user-defined strength of adaptation
+    dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
+    // and make it safe.
+    dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
+    // We also boost the dc-uv-quant a little, based on sns-strength, since
+    // U/V channels are quite more reactive to high quants (flat DC-blocks
+    // tend to appear, and are unpleasant).
+    dq_uv_dc = -4 * enc->config_->sns_strength / 100;
+    dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed
+
+    enc->dq_y1_dc_ = 0; // TODO(skal): dq-lum
+    enc->dq_y2_dc_ = 0;
+    enc->dq_y2_ac_ = 0;
+    enc->dq_uv_dc_ = dq_uv_dc;
+    enc->dq_uv_ac_ = dq_uv_ac;
+
+    SetupFilterStrength(enc); // initialize segments' filtering, eventually
+
+    if (num_segments > 1) SimplifySegments(enc);
+
+    SetupMatrices(enc); // finalize quantization matrices
+}
+
+//------------------------------------------------------------------------------
+// Form the predictions in cache
+
+// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
+const int VP8I16ModeOffsets[4] = {I16DC16, I16TM16, I16VE16, I16HE16};
+const int VP8UVModeOffsets[4] = {C8DC8, C8TM8, C8VE8, C8HE8};
+
+// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
+const int VP8I4ModeOffsets[NUM_BMODES] = {I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4};
+
+void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
+    const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
+    const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
+    VP8EncPredLuma16(it->yuv_p_, left, top);
+}
+
+void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
+    const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
+    const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
+    VP8EncPredChroma8(it->yuv_p_, left, top);
+}
+
+static const uint8_t VP8TopLeftI4ttt[16] = {17, 21, 25, 29, 13, 17, 21, 25, 9, 13, 17, 21, 5, 9, 13, 17};
+
+void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
+    VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
+}
+
+//------------------------------------------------------------------------------
+// Quantize
+
+// Layout:
+// +----+----+
+// |YYYY|UUVV| 0
+// |YYYY|UUVV| 4
+// |YYYY|....| 8
+// |YYYY|....| 12
+// +----+----+
+
+const int VP8Scan[16] = {
+    // Luma
+    0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,  8 + 4 * BPS,  12 + 4 * BPS,
+    0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
+};
+
+static const int VP8ScanUV[4 + 4] = {
+    0 + 0 * BPS, 4 + 0 * BPS,  0 + 4 * BPS, 4 + 4 * BPS, // U
+    8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
+};
+
+//------------------------------------------------------------------------------
+// Distortion measurement
+
+static const uint16_t kWeightY[16] = {38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2};
+
+static const uint16_t kWeightTrellis[16] = {
+#if USE_TDISTO == 0
+    16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16
+#else
+    30, 27, 19, 11, 27, 24, 17, 10, 19,
+    17, 12, 8,  11, 10, 8,  6
+#endif
+};
+
+// Init/Copy the common fields in score.
+static void InitScore(VP8ModeScore* const rd) {
+    rd->D = 0;
+    rd->SD = 0;
+    rd->R = 0;
+    rd->H = 0;
+    rd->nz = 0;
+    rd->score = MAX_COST;
+}
+
+static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
+    dst->D = src->D;
+    dst->SD = src->SD;
+    dst->R = src->R;
+    dst->H = src->H;
+    dst->nz = src->nz; // note that nz is not accumulated, but just copied.
+    dst->score = src->score;
+}
+
+static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
+    dst->D += src->D;
+    dst->SD += src->SD;
+    dst->R += src->R;
+    dst->H += src->H;
+    dst->nz |= src->nz; // here, new nz bits are accumulated.
+    dst->score += src->score;
+}
+
+//------------------------------------------------------------------------------
+// Performs trellis-optimized quantization.
+
+// Trellis node
+typedef struct {
+    int8_t prev;   // best previous node
+    int8_t sign;   // sign of coeff_i
+    int16_t level; // level
+} Node;
+
+// Score state
+typedef struct {
+    score_t score;         // partial RD score
+    const uint16_t* costs; // shortcut to cost tables
+} ScoreState;
+
+// If a coefficient was quantized to a value Q (using a neutral bias),
+// we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
+// We don't test negative values though.
+#define MIN_DELTA 0 // how much lower level to try
+#define MAX_DELTA 1 // how much higher
+#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
+#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])
+#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
+
+static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
+    rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);
+}
+
+static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, score_t distortion) {
+    return rate * lambda + RD_DISTO_MULT * distortion;
+}
+
+static int TrellisQuantizeBlock(const VP8Encoder* const enc,
+                                int16_t in[16],
+                                int16_t out[16],
+                                int ctx0,
+                                int coeff_type,
+                                const VP8Matrix* const mtx,
+                                int lambda) {
+    const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
+    CostArrayPtr const costs = (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
+    const int first = (coeff_type == 0) ? 1 : 0;
+    Node nodes[16][NUM_NODES];
+    ScoreState score_states[2][NUM_NODES];
+    ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
+    ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
+    int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous
+    score_t best_score;
+    int n, m, p, last;
+
+    {
+        score_t cost;
+        const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
+        const int last_proba = probas[VP8EncBands[first]][ctx0][0];
+
+        // compute the position of the last interesting coefficient
+        last = first - 1;
+        for (n = 15; n >= first; --n) {
+            const int j = kZigzag[n];
+            const int err = in[j] * in[j];
+            if (err > thresh) {
+                last = n;
+                break;
+            }
+        }
+        // we don't need to go inspect up to n = 16 coeffs. We can just go up
+        // to last + 1 (inclusive) without losing much.
+        if (last < 15) ++last;
+
+        // compute 'skip' score. This is the max score one can do.
+        cost = VP8BitCost(0, last_proba);
+        best_score = RDScoreTrellis(lambda, cost, 0);
+
+        // initialize source node.
+        for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
+            const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
+            ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
+            ss_cur[m].costs = costs[first][ctx0];
+        }
+    }
+
+    // traverse trellis.
+    for (n = first; n <= last; ++n) {
+        const int j = kZigzag[n];
+        const uint32_t Q = mtx->q_[j];
+        const uint32_t iQ = mtx->iq_[j];
+        const uint32_t B = BIAS(0x00); // neutral bias
+        // note: it's important to take sign of the _original_ coeff,
+        // so we don't have to consider level < 0 afterward.
+        const int sign = (in[j] < 0);
+        const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+        int level0 = QUANTDIV(coeff0, iQ, B);
+        if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
+
+        { // Swap current and previous score states
+            ScoreState* const tmp = ss_cur;
+            ss_cur = ss_prev;
+            ss_prev = tmp;
+        }
+
+        // test all alternate level values around level0.
+        for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
+            Node* const cur = &NODE(n, m);
+            int level = level0 + m;
+            const int ctx = (level > 2) ? 2 : level;
+            const int band = VP8EncBands[n + 1];
+            score_t base_score, last_pos_score;
+            score_t best_cur_score = MAX_COST;
+            int best_prev = 0; // default, in case
+
+            ss_cur[m].score = MAX_COST;
+            ss_cur[m].costs = costs[n + 1][ctx];
+            if (level > MAX_LEVEL || level < 0) { // node is dead?
+                continue;
+            }
+
+            // Compute extra rate cost if last coeff's position is < 15
+            {
+                const score_t last_pos_cost = (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
+                last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
+            }
+
+            {
+                // Compute delta_error = how much coding this level will
+                // subtract to max_error as distortion.
+                // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2
+                const int new_error = coeff0 - level * Q;
+                const int delta_error = kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);
+                base_score = RDScoreTrellis(lambda, 0, delta_error);
+            }
+
+            // Inspect all possible non-dead predecessors. Retain only the best one.
+            for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
+                // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
+                // eliminated since their score can't be better than the current best.
+                const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
+                // Examine node assuming it's a non-terminal one.
+                const score_t score = base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
+                if (score < best_cur_score) {
+                    best_cur_score = score;
+                    best_prev = p;
+                }
+            }
+            // Store best finding in current node.
+            cur->sign = sign;
+            cur->level = level;
+            cur->prev = best_prev;
+            ss_cur[m].score = best_cur_score;
+
+            // Now, record best terminal node (and thus best entry in the graph).
+            if (level != 0) {
+                const score_t score = best_cur_score + last_pos_score;
+                if (score < best_score) {
+                    best_score = score;
+                    best_path[0] = n;         // best eob position
+                    best_path[1] = m;         // best node index
+                    best_path[2] = best_prev; // best predecessor
+                }
+            }
+        }
+    }
+
+    // Fresh start
+    memset(in + first, 0, (16 - first) * sizeof(*in));
+    memset(out + first, 0, (16 - first) * sizeof(*out));
+    if (best_path[0] == -1) {
+        return 0; // skip!
+    }
+
+    {
+        // Unwind the best path.
+        // Note: best-prev on terminal node is not necessarily equal to the
+        // best_prev for non-terminal. So we patch best_path[2] in.
+        int nz = 0;
+        int best_node = best_path[1];
+        n = best_path[0];
+        NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal
+
+        for (; n >= first; --n) {
+            const Node* const node = &NODE(n, best_node);
+            const int j = kZigzag[n];
+            out[n] = node->sign ? -node->level : node->level;
+            nz |= node->level;
+            in[j] = out[n] * mtx->q_[j];
+            best_node = node->prev;
+        }
+        return (nz != 0);
+    }
+}
+
+#undef NODE
+
+//------------------------------------------------------------------------------
+// Performs: difference, transform, quantize, back-transform, add
+// all at once. Output is the reconstructed block in *yuv_out, and the
+// quantized levels in *levels.
+
+static int ReconstructIntra16(VP8EncIterator* const it, VP8ModeScore* const rd, uint8_t* const yuv_out, int mode) {
+    const VP8Encoder* const enc = it->enc_;
+    const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
+    const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
+    const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+    int nz = 0;
+    int n;
+    int16_t tmp[16][16], dc_tmp[16];
+
+    for (n = 0; n < 16; n += 2) {
+        VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
+    }
+    VP8FTransformWHT(tmp[0], dc_tmp);
+    nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
+
+    if (DO_TRELLIS_I16 && it->do_trellis_) {
+        int x, y;
+        VP8IteratorNzToBytes(it);
+        for (y = 0, n = 0; y < 4; ++y) {
+            for (x = 0; x < 4; ++x, ++n) {
+                const int ctx = it->top_nz_[x] + it->left_nz_[y];
+                const int non_zero =
+                    TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, &dqm->y1_, dqm->lambda_trellis_i16_);
+                it->top_nz_[x] = it->left_nz_[y] = non_zero;
+                rd->y_ac_levels[n][0] = 0;
+                nz |= non_zero << n;
+            }
+        }
+    } else {
+        for (n = 0; n < 16; n += 2) {
+            // Zero-out the first coeff, so that: a) nz is correct below, and
+            // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
+            tmp[n][0] = tmp[n + 1][0] = 0;
+            nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
+            assert(rd->y_ac_levels[n + 0][0] == 0);
+            assert(rd->y_ac_levels[n + 1][0] == 0);
+        }
+    }
+
+    // Transform back
+    VP8TransformWHT(dc_tmp, tmp[0]);
+    for (n = 0; n < 16; n += 2) {
+        VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
+    }
+
+    return nz;
+}
+
+static int ReconstructIntra4(
+    VP8EncIterator* const it, int16_t levels[16], const uint8_t* const src, uint8_t* const yuv_out, int mode) {
+    const VP8Encoder* const enc = it->enc_;
+    const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
+    const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+    int nz = 0;
+    int16_t tmp[16];
+
+    VP8FTransform(src, ref, tmp);
+    if (DO_TRELLIS_I4 && it->do_trellis_) {
+        const int x = it->i4_ & 3, y = it->i4_ >> 2;
+        const int ctx = it->top_nz_[x] + it->left_nz_[y];
+        nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_, dqm->lambda_trellis_i4_);
+    } else {
+        nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
+    }
+    VP8ITransform(ref, tmp, yuv_out, 0);
+    return nz;
+}
+
+static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, uint8_t* const yuv_out, int mode) {
+    const VP8Encoder* const enc = it->enc_;
+    const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
+    const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
+    const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+    int nz = 0;
+    int n;
+    int16_t tmp[8][16];
+
+    for (n = 0; n < 8; n += 2) {
+        VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
+    }
+    if (DO_TRELLIS_UV && it->do_trellis_) {
+        int ch, x, y;
+        for (ch = 0, n = 0; ch <= 2; ch += 2) {
+            for (y = 0; y < 2; ++y) {
+                for (x = 0; x < 2; ++x, ++n) {
+                    const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+                    const int non_zero =
+                        TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, &dqm->uv_, dqm->lambda_trellis_uv_);
+                    it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
+                    nz |= non_zero << n;
+                }
+            }
+        }
+    } else {
+        for (n = 0; n < 8; n += 2) {
+            nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
+        }
+    }
+
+    for (n = 0; n < 8; n += 2) {
+        VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);
+    }
+    return (nz << 16);
+}
+
+//------------------------------------------------------------------------------
+// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
+// Pick the mode is lower RD-cost = Rate + lambda * Distortion.
+
+static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
+    // We look at the first three AC coefficients to determine what is the average
+    // delta between each sub-4x4 block.
+    const int v0 = abs(DCs[1]);
+    const int v1 = abs(DCs[4]);
+    const int v2 = abs(DCs[5]);
+    int max_v = (v0 > v1) ? v1 : v0;
+    max_v = (v2 > max_v) ? v2 : max_v;
+    if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
+}
+
+static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
+    VP8ModeScore* const tmp = *a;
+    *a = *b;
+    *b = tmp;
+}
+
+static void SwapPtr(uint8_t** a, uint8_t** b) {
+    uint8_t* const tmp = *a;
+    *a = *b;
+    *b = tmp;
+}
+
+static void SwapOut(VP8EncIterator* const it) {
+    SwapPtr(&it->yuv_out_, &it->yuv_out2_);
+}
+
+static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
+    score_t score = 0;
+    while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
+        int i;
+        for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
+            score += (levels[i] != 0);
+            if (score > thresh) return 0;
+        }
+        levels += 16;
+    }
+    return 1;
+}
+
+static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
+    const int kNumBlocks = 16;
+    VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
+    const int lambda = dqm->lambda_i16_;
+    const int tlambda = dqm->tlambda_;
+    const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
+    VP8ModeScore rd_tmp;
+    VP8ModeScore* rd_cur = &rd_tmp;
+    VP8ModeScore* rd_best = rd;
+    int mode;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    rd->mode_i16 = -1;
+
+    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+        uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer
+        rd_cur->mode_i16 = mode;
+
+        // Reconstruct
+        rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);
+
+        // Measure RD-score
+        rd_cur->D = VP8SSE16x16(src, tmp_dst);
+        rd_cur->SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
+        rd_cur->H = VP8FixedCostsI16[mode];
+        rd_cur->R = VP8GetCostLuma16(it, rd_cur);
+        if (mode > 0 && IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
+            // penalty to avoid flat area to be mispredicted by complex mode
+            rd_cur->R += FLATNESS_PENALTY * kNumBlocks;
+        }
+
+        // Since we always examine Intra16 first, we can overwrite *rd directly.
+        SetRDScore(lambda, rd_cur);
+#if 1
+        if (mode == 0 || rd_cur->score < rd_best->score) {
+            SwapModeScore(&rd_cur, &rd_best);
+            SwapOut(it);
+        }
+#endif
+    }
+
+    if (rd_best != rd) {
+        memcpy(rd, rd_best, sizeof(*rd));
+    }
+    SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
+    VP8SetIntra16Mode(it, rd->mode_i16);
+
+    // we have a blocky macroblock (only DCs are non-zero) with fairly high
+    // distortion, record max delta so we can later adjust the minimal filtering
+    // strength needed to smooth these blocks out.
+    if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {
+        StoreMaxDelta(dqm, rd->y_dc_levels);
+    }
+    StopProfiling(&stop_watch, &timeBestIntra16, &countBestIntra16);
+}
+
+//------------------------------------------------------------------------------
+
+// return the cost array corresponding to the surrounding prediction modes.
+static const uint16_t* GetCostModeI4(VP8EncIterator* const it, const uint8_t modes[16]) {
+    const int preds_w = it->enc_->preds_w_;
+    const int x = (it->i4_ & 3), y = it->i4_ >> 2;
+    const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
+    const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
+    return VP8FixedCostsI4[top][left];
+}
+
+static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
+    const VP8Encoder* const enc = it->enc_;
+    const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+    const int lambda = dqm->lambda_i4_;
+    const int tlambda = dqm->tlambda_;
+    const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
+    uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
+    int total_header_bits = 0;
+    VP8ModeScore rd_best;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    if (enc->max_i4_header_bits_ == 0) {
+        StopProfiling(&stop_watch, &timeBestIntra4, &countBestIntra4);
+        return 0;
+    }
+
+    InitScore(&rd_best);
+    rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)
+    SetRDScore(dqm->lambda_mode_, &rd_best);
+#if DEBUG_PickBestIntra4
+    if ((it->x_ >= 118 && it->y_ == 0) || (it->x_ == 0 && it->y_ == 1)) {
+        printf("line:%d [%d][%d]it->i4_:%d\n", __LINE__, it->y_, it->x_, it->i4_);
+        printf("it_var->i4_:%d i4_boundary_p\n", it->i4_);
+        int yuv_p_index = 0;
+        for (yuv_p_index = 0; yuv_p_index < 37; yuv_p_index++) {
+            printf("[%d]:[%d] ", yuv_p_index, it->i4_boundary_[yuv_p_index]);
+            if (yuv_p_index % 30 == 0) {
+                printf("\n");
+            }
+        }
+        printf("\n-------------------------------\n");
+    }
+#endif
+    VP8IteratorStartI4(it);
+#if DEBUG_PickBestIntra4
+    if ((it->x_ >= 118 && it->y_ == 0) || (it->x_ == 0 && it->y_ == 1)) {
+        printf("line:%d [%d][%d]it->i4_:%d\n", __LINE__, it->y_, it->x_, it->i4_);
+        printf("it_var->i4_:%d i4_boundary_p\n", it->i4_);
+        int yuv_p_index = 0;
+        for (yuv_p_index = 0; yuv_p_index < 37; yuv_p_index++) {
+            printf("[%d]:[%d] ", yuv_p_index, it->i4_boundary_[yuv_p_index]);
+            if (yuv_p_index % 30 == 0) {
+                printf("\n");
+            }
+        }
+        printf("\n-------------------------------\n");
+    }
+#endif
+    do {
+        const int kNumBlocks = 1;
+        VP8ModeScore rd_i4;
+        int mode;
+        int best_mode = -1;
+        const uint8_t* const src = src0 + VP8Scan[it->i4_];
+        const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
+        uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
+        uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.
+
+        InitScore(&rd_i4);
+
+#if DEBUG_PickBestIntra4
+        if ((it->i4_ == 15 && it->x_ >= 118 && it->y_ == 0) || (it->i4_ <= 1 && it->x_ == 0 && it->y_ == 1)) {
+            printf("line:%d [%d][%d]it->i4_:%d\n", __LINE__, it->y_, it->x_, it->i4_);
+            int yuv_p_index = 0;
+            for (yuv_p_index = 0; yuv_p_index < PRED_SIZE_ENC; yuv_p_index++) {
+                printf("[%d]:[%d] ", yuv_p_index, it->yuv_p_[yuv_p_index]);
+                if (yuv_p_index % 30 == 0) {
+                    printf("\n");
+                }
+            }
+            printf("\n-------------------------------modes_i4\n");
+            for (yuv_p_index = 0; yuv_p_index < 16; yuv_p_index++) {
+                printf("[%d]:[%d] ", yuv_p_index, rd->modes_i4[yuv_p_index]);
+                if (yuv_p_index % 30 == 0) {
+                    printf("\n");
+                }
+            }
+            printf("\n-------------------------------mode_costs\n");
+            for (yuv_p_index = 0; yuv_p_index < 16; yuv_p_index++) {
+                printf("[%d]:[%d] ", yuv_p_index, mode_costs[yuv_p_index]);
+                if (yuv_p_index % 30 == 0) {
+                    printf("\n");
+                }
+            }
+            printf("\n-------------------------------\n");
+        }
+#endif
+        VP8MakeIntra4Preds(it);
+        for (mode = 0; mode < NUM_BMODES; ++mode) {
+#if DEBUG_PickBestIntra4
+            if ((it->i4_ == 15 && it->x_ >= 118 && it->y_ == 0 && mode == NUM_BMODES - 1) ||
+                (it->i4_ <= 1 && it->x_ == 0 && it->y_ == 1 && mode == 0)) {
+                printf("line:%d [%d][%d]it->i4_:%d\n", __LINE__, it->y_, it->x_, it->i4_);
+                printf("mode:%d\n", mode);
+                int yuv_p_index = 0;
+                for (yuv_p_index = 0; yuv_p_index < PRED_SIZE_ENC; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, it->yuv_p_[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\n-------------------------------\n");
+            }
+#endif
+            VP8ModeScore rd_tmp;
+            int16_t tmp_levels[16];
+
+#if DEBUG_PickBestIntra4
+            if (it->x_ == 0 && it->y_ == 1 && it->i4_ <= 1 && mode <= 1) {
+                printf("line: %d\n", __LINE__);
+                printf("it->i4_:%d i4_boundary_p\n", it->i4_);
+                int yuv_p_index = 0;
+                printf("\nyuv_out2_-------------------------------\n");
+                for (yuv_p_index = 0; yuv_p_index < YUV_SIZE_ENC; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, it->yuv_out2_[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\nyuv_out_-------------------------------\n");
+                for (yuv_p_index = 0; yuv_p_index < YUV_SIZE_ENC; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, it->yuv_out_[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\n-------------------------------\n");
+            }
+#endif
+            // Reconstruct
+            rd_tmp.nz = ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
+
+            // Compute RD-score
+            rd_tmp.D = VP8SSE4x4(src, tmp_dst);
+            rd_tmp.SD = tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) : 0;
+            rd_tmp.H = mode_costs[mode];
+
+#if DEBUG_PickBestIntra4
+            if (it->x_ == 0 && it->y_ == 1 && it->i4_ <= 1 && mode <= 1) {
+                printf("line: %d\n", __LINE__);
+                printf("it->i4_:%d i4_boundary_p\n", it->i4_);
+                int yuv_p_index = 0;
+                printf("\nyuv_out2_-------------------------------\n");
+                for (yuv_p_index = 0; yuv_p_index < YUV_SIZE_ENC; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, it->yuv_out2_[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\nyuv_out_-------------------------------\n");
+                for (yuv_p_index = 0; yuv_p_index < YUV_SIZE_ENC; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, it->yuv_out_[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\n-------------------------------\n");
+            }
+#endif
+            // Add flatness penalty
+            if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
+                rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
+            } else {
+                rd_tmp.R = 0;
+            }
+
+            // early-out check
+            SetRDScore(lambda, &rd_tmp);
+            if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;
+
+            // finish computing score
+            rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);
+            SetRDScore(lambda, &rd_tmp);
+
+            if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
+                CopyScore(&rd_i4, &rd_tmp);
+
+#if DEBUG_PickBestIntra4
+                if (it->x_ == 0 && it->y_ == 1) {
+                    printf("[i4_:%d]best_mode:%d mode:%d rd_i4.H:%lld\n", it->i4_, best_mode, mode, rd_i4.H);
+                }
+#endif
+                best_mode = mode;
+                // printf("swap_flag mode:%d it->i4_:%d\n", mode,  it->i4_);
+                SwapPtr(&tmp_dst, &best_block);
+                memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(rd_best.y_ac_levels[it->i4_]));
+            }
+        }
+        SetRDScore(dqm->lambda_mode_, &rd_i4);
+
+#if DEBUG_PickBestIntra4
+        if (it->x_ == 0 && it->y_ == 1) {
+            printf("[i4_:%d]best_mode:%d rd_i4.H:%lld rd_best.H:%lld\n", it->i4_, best_mode, rd_i4.H, rd_best.H);
+        }
+#endif
+        AddScore(&rd_best, &rd_i4);
+
+#if DEBUG_PickBestIntra4
+        if (it->x_ == 0 && it->y_ == 1) {
+            printf("[i4_:%d]best_mode:%d rd_i4.H:%lld rd_best.H:%lld\n", it->i4_, best_mode, rd_i4.H, rd_best.H);
+        }
+#endif
+        if (rd_best.score >= rd->score) {
+            StopProfiling(&stop_watch, &timeBestIntra4, &countBestIntra4);
+#if DEBUG_PickBestIntra4
+            if ((it->x_ == 119 && it->y_ == 0)) {
+                printf("line:%d [%d][%d]it->i4_:%d\n", __LINE__, it->y_, it->x_, it->i4_);
+                int yuv_p_index = 0;
+                for (yuv_p_index = 0; yuv_p_index < PRED_SIZE_ENC; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, it->yuv_p_[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\n-------------------------------modes_i4\n");
+                for (yuv_p_index = 0; yuv_p_index < 16; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, rd->modes_i4[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\n-------------------------------mode_costs\n");
+                for (yuv_p_index = 0; yuv_p_index < 16; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, mode_costs[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\n-------------------------------\n");
+            }
+#endif
+            return 0;
+        }
+        total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];
+        if (total_header_bits > enc->max_i4_header_bits_) {
+            StopProfiling(&stop_watch, &timeBestIntra4, &countBestIntra4);
+#if DEBUG_PickBestIntra4
+            if ((it->x_ == 119 && it->y_ == 0)) {
+                printf("line:%d [%d][%d]it->i4_:%d\n", __LINE__, it->y_, it->x_, it->i4_);
+                int yuv_p_index = 0;
+                for (yuv_p_index = 0; yuv_p_index < PRED_SIZE_ENC; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, it->yuv_p_[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\n-------------------------------modes_i4\n");
+                for (yuv_p_index = 0; yuv_p_index < 16; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, rd->modes_i4[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\n-------------------------------mode_costs\n");
+                for (yuv_p_index = 0; yuv_p_index < 16; yuv_p_index++) {
+                    printf("[%d]:[%d] ", yuv_p_index, mode_costs[yuv_p_index]);
+                    if (yuv_p_index % 30 == 0) {
+                        printf("\n");
+                    }
+                }
+                printf("\n-------------------------------\n");
+            }
+#endif
+            return 0;
+        }
+
+        // Copy selected samples if not in the right place already.
+
+        // printf("swap_flag:%d it->i4_:%d\n", best_block != best_blocks + VP8Scan[it->i4_],  it->i4_);
+        if (best_block != best_blocks + VP8Scan[it->i4_]) {
+            VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
+        }
+
+        rd->modes_i4[it->i4_] = best_mode;
+        it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
+    } while (VP8IteratorRotateI4(it, best_blocks));
+
+#if 1
+    // finalize state
+    CopyScore(rd, &rd_best);
+    VP8SetIntra4Mode(it, rd->modes_i4);
+    SwapOut(it);
+    memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
+#endif
+    StopProfiling(&stop_watch, &timeBestIntra4, &countBestIntra4);
+    return 1; // select intra4x4 over intra16x16
+}
+
+//------------------------------------------------------------------------------
+
+static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
+    const int kNumBlocks = 8;
+    const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
+    const int lambda = dqm->lambda_uv_;
+    const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
+    uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer
+    uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
+    uint8_t* dst = dst0;
+    VP8ModeScore rd_best;
+    int mode;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    rd->mode_uv = -1;
+    InitScore(&rd_best);
+    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+        VP8ModeScore rd_uv;
+
+        // Reconstruct
+        rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
+
+        // Compute RD-score
+        rd_uv.D = VP8SSE16x8(src, tmp_dst);
+        rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas.
+        rd_uv.H = VP8FixedCostsUV[mode];
+        rd_uv.R = VP8GetCostUV(it, &rd_uv);
+        if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
+            rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
+        }
+
+        SetRDScore(lambda, &rd_uv);
+        if (mode == 0 || rd_uv.score < rd_best.score) {
+            CopyScore(&rd_best, &rd_uv);
+            rd->mode_uv = mode;
+            memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
+            SwapPtr(&dst, &tmp_dst);
+        }
+    }
+    VP8SetIntraUVMode(it, rd->mode_uv);
+    AddScore(rd, &rd_best);
+    if (dst != dst0) { // copy 16x8 block if needed
+        VP8Copy16x8(dst, dst0);
+    }
+    StopProfiling(&stop_watch, &timeBestUV, &countBestUV);
+}
+
+//------------------------------------------------------------------------------
+// Final reconstruction and quantization.
+
+static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
+    const VP8Encoder* const enc = it->enc_;
+    const int is_i16 = (it->mb_->type_ == 1);
+    int nz = 0;
+
+    if (is_i16) {
+        nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
+    } else {
+        VP8IteratorStartI4(it);
+        do {
+            const int mode = it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
+            const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
+            uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
+            VP8MakeIntra4Preds(it);
+            nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], src, dst, mode) << it->i4_;
+        } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
+    }
+
+    nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
+    rd->nz = nz;
+}
+
+// Refine intra16/intra4 sub-modes based on distortion only (not rate).
+static void RefineUsingDistortion(VP8EncIterator* const it,
+                                  int try_both_modes,
+                                  int refine_uv_mode,
+                                  VP8ModeScore* const rd) {
+    score_t best_score = MAX_COST;
+    score_t score_i4 = (score_t)I4_PENALTY;
+    int16_t tmp_levels[16][16];
+    uint8_t modes_i4[16];
+    int nz = 0;
+    int mode;
+    int is_i16 = try_both_modes || (it->mb_->type_ == 1);
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    if (is_i16) { // First, evaluate Intra16 distortion
+        int best_mode = -1;
+        const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
+        for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+            const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
+            const score_t score = VP8SSE16x16(src, ref);
+            if (score < best_score) {
+                best_mode = mode;
+                best_score = score;
+            }
+        }
+        VP8SetIntra16Mode(it, best_mode);
+        // we'll reconstruct later, if i16 mode actually gets selected
+    }
+
+    // Next, evaluate Intra4
+    if (try_both_modes || !is_i16) {
+        // We don't evaluate the rate here, but just account for it through a
+        // constant penalty (i4 mode usually needs more bits compared to i16).
+        is_i16 = 0;
+        VP8IteratorStartI4(it);
+        do {
+            int best_i4_mode = -1;
+            score_t best_i4_score = MAX_COST;
+            const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
+
+            VP8MakeIntra4Preds(it);
+            for (mode = 0; mode < NUM_BMODES; ++mode) {
+                const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
+                const score_t score = VP8SSE4x4(src, ref);
+                if (score < best_i4_score) {
+                    best_i4_mode = mode;
+                    best_i4_score = score;
+                }
+            }
+            modes_i4[it->i4_] = best_i4_mode;
+            score_i4 += best_i4_score;
+            if (score_i4 >= best_score) {
+                // Intra4 won't be better than Intra16. Bail out and pick Intra16.
+                is_i16 = 1;
+                break;
+            } else { // reconstruct partial block inside yuv_out2_ buffer
+                uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
+                nz |= ReconstructIntra4(it, tmp_levels[it->i4_], src, tmp_dst, best_i4_mode) << it->i4_;
+            }
+        } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
+    }
+
+    // Final reconstruction, depending on which mode is selected.
+    if (!is_i16) {
+        VP8SetIntra4Mode(it, modes_i4);
+        memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels));
+        SwapOut(it);
+        best_score = score_i4;
+    } else {
+        nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
+    }
+
+    // ... and UV!
+    if (refine_uv_mode) {
+        int best_mode = -1;
+        score_t best_uv_score = MAX_COST;
+        const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
+        for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+            const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
+            const score_t score = VP8SSE16x8(src, ref);
+            if (score < best_uv_score) {
+                best_mode = mode;
+                best_uv_score = score;
+            }
+        }
+        VP8SetIntraUVMode(it, best_mode);
+    }
+    nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
+
+    rd->nz = nz;
+    rd->score = best_score;
+
+    if (StatLoopFlag) {
+        StopProfiling(&stop_watch, &timeRefineUsingDist_2, &countRefineUsingDist_2);
+    } else {
+        StopProfiling(&stop_watch, &timeRefineUsingDist, &countRefineUsingDist);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, VP8RDLevel rd_opt) {
+// if ((it->x_ >= 118 && it->y_ == 0) || (it->x_ == 0 && it->y_ == 1)) {
+#if DEBUG_BLOCK
+    printf("[%d][%d]=====================================\n", it->y_, it->x_);
+#endif
+    // PrintBlockInfo(it, rd);
+    // }
+
+    int is_skipped;
+    const int method = it->enc_->method_;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    InitScore(rd);
+
+    int yuv_p_index = 0;
+    // We can perform predictions for Luma16x16 and Chroma8x8 already.
+    // Luma4x4 predictions needs to be done as-we-go.
+    VP8MakeLuma16Preds(it);
+
+    // if ((it->x_ >= 118 && it->y_ == 0) || (it->x_ == 0 && it->y_ == 1)) {
+    //   printf("line:%d[%d][%d]=====================================\n", __LINE__, it->y_, it->x_);
+    //   PrintBlockInfo(it, rd);
+    // }
+    VP8MakeChroma8Preds(it);
+
+    if (rd_opt > RD_OPT_NONE) {
+        StopProfilingWatch stop_watch;
+        StartProfiling(&stop_watch);
+        it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
+
+        // memset(it->yuv_out2_, 0, sizeof(uint8_t) * YUV_SIZE_ENC);
+        // memset(it->yuv_out_, 0, sizeof(uint8_t) * YUV_SIZE_ENC);
+
+        // if ((it->x_ >= 118 && it->y_ == 0) || (it->x_ == 0 && it->y_ == 1)) {
+        //   printf("line:%d[%d][%d]=====================================\n", __LINE__, it->y_, it->x_);
+        //   PrintBlockInfo(it, rd);
+        // }
+        PickBestIntra16(it, rd);
+        // if ((it->x_ >= 118 && it->y_ == 0) || (it->x_ == 0 && it->y_ == 1)) {
+        //   printf("line:%d[%d][%d]=====================================\n", __LINE__, it->y_, it->x_);
+        //   PrintBlockInfo(it, rd);
+        // }
+        if (method >= 2) {
+            PickBestIntra4(it, rd);
+        }
+        PickBestUV(it, rd);
+        if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
+            it->do_trellis_ = 1;
+            SimpleQuantize(it, rd);
+        }
+        StopProfiling(&stop_watch, &timeVP8Decimate_BestIntra, &countVP8Decimate_BestIntra);
+    } else {
+        // At this point we have heuristically decided intra16 / intra4.
+        // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).
+        // For method <= 1, we don't re-examine the decision but just go ahead with
+        // quantization/reconstruction.
+        RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);
+    }
+    is_skipped = (rd->nz == 0);
+    VP8SetSkip(it, is_skipped);
+
+    if (StatLoopFlag) {
+        StopProfiling(&stop_watch, &timeVP8Decimate_2, &countVP8Decimate_2);
+    } else {
+        StopProfiling(&stop_watch, &timeVP8Decimate, &countVP8Decimate);
+    }
+    // if ((it->x_ >= 118 && it->y_ == 0) || (it->x_ == 0 && it->y_ == 1)) {
+    // printf("line:%d[%d][%d]=====================================\n", __LINE__, it->y_, it->x_);
+    PrintBlockInfo(it, rd);
+    // }
+    return is_skipped;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/enc/syntax.c b/codec/L2/demos/webpEnc/host/src/enc/syntax.c
new file mode 100644
index 0000000000..955005a1f5
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/syntax.c
@@ -0,0 +1,358 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Header syntax writing
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "../utils/utils.h"
+#include "../webp/format_constants.h" // RIFF constants
+#include "../webp/mux_types.h"        // ALPHA_FLAG
+#include "./vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Helper functions
+
+static int IsVP8XNeeded(const VP8Encoder* const enc) {
+    return !!enc->has_alpha_; // Currently the only case when VP8X is needed.
+                              // This could change in the future.
+}
+
+static int PutPaddingByte(const WebPPicture* const pic) {
+    const uint8_t pad_byte[1] = {0};
+    return !!pic->writer(pad_byte, 1, pic);
+}
+
+//------------------------------------------------------------------------------
+// Writers for header's various pieces (in order of appearance)
+
+static WebPEncodingError PutRIFFHeader(const VP8Encoder* const enc, size_t riff_size) {
+    const WebPPicture* const pic = enc->pic_;
+    uint8_t riff[RIFF_HEADER_SIZE] = {'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P'};
+    assert(riff_size == (uint32_t)riff_size);
+    PutLE32(riff + TAG_SIZE, (uint32_t)riff_size);
+    if (!pic->writer(riff, sizeof(riff), pic)) {
+        return VP8_ENC_ERROR_BAD_WRITE;
+    }
+    return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
+    const WebPPicture* const pic = enc->pic_;
+    uint8_t vp8x[CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE] = {'V', 'P', '8', 'X'};
+    uint32_t flags = 0;
+
+    assert(IsVP8XNeeded(enc));
+    assert(pic->width >= 1 && pic->height >= 1);
+    assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
+
+    if (enc->has_alpha_) {
+        flags |= ALPHA_FLAG;
+    }
+
+    PutLE32(vp8x + TAG_SIZE, VP8X_CHUNK_SIZE);
+    PutLE32(vp8x + CHUNK_HEADER_SIZE, flags);
+    PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1);
+    PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1);
+    if (!pic->writer(vp8x, sizeof(vp8x), pic)) {
+        return VP8_ENC_ERROR_BAD_WRITE;
+    }
+    return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutAlphaChunk(const VP8Encoder* const enc) {
+    const WebPPicture* const pic = enc->pic_;
+    uint8_t alpha_chunk_hdr[CHUNK_HEADER_SIZE] = {'A', 'L', 'P', 'H'};
+
+    assert(enc->has_alpha_);
+
+    // Alpha chunk header.
+    PutLE32(alpha_chunk_hdr + TAG_SIZE, enc->alpha_data_size_);
+    if (!pic->writer(alpha_chunk_hdr, sizeof(alpha_chunk_hdr), pic)) {
+        return VP8_ENC_ERROR_BAD_WRITE;
+    }
+
+    // Alpha chunk data.
+    if (!pic->writer(enc->alpha_data_, enc->alpha_data_size_, pic)) {
+        return VP8_ENC_ERROR_BAD_WRITE;
+    }
+
+    // Padding.
+    if ((enc->alpha_data_size_ & 1) && !PutPaddingByte(pic)) {
+        return VP8_ENC_ERROR_BAD_WRITE;
+    }
+    return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8Header(const WebPPicture* const pic, size_t vp8_size) {
+    uint8_t vp8_chunk_hdr[CHUNK_HEADER_SIZE] = {'V', 'P', '8', ' '};
+    assert(vp8_size == (uint32_t)vp8_size);
+    PutLE32(vp8_chunk_hdr + TAG_SIZE, (uint32_t)vp8_size);
+    if (!pic->writer(vp8_chunk_hdr, sizeof(vp8_chunk_hdr), pic)) {
+        return VP8_ENC_ERROR_BAD_WRITE;
+    }
+    return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8FrameHeader(const WebPPicture* const pic, int profile, size_t size0) {
+    uint8_t vp8_frm_hdr[VP8_FRAME_HEADER_SIZE];
+    uint32_t bits;
+
+    if (size0 >= VP8_MAX_PARTITION0_SIZE) { // partition #0 is too big to fit
+        return VP8_ENC_ERROR_PARTITION0_OVERFLOW;
+    }
+
+    // Paragraph 9.1.
+    bits = 0                         // keyframe (1b)
+           | (profile << 1)          // profile (3b)
+           | (1 << 4)                // visible (1b)
+           | ((uint32_t)size0 << 5); // partition length (19b)
+    vp8_frm_hdr[0] = (bits >> 0) & 0xff;
+    vp8_frm_hdr[1] = (bits >> 8) & 0xff;
+    vp8_frm_hdr[2] = (bits >> 16) & 0xff;
+    // signature
+    vp8_frm_hdr[3] = (VP8_SIGNATURE >> 16) & 0xff;
+    vp8_frm_hdr[4] = (VP8_SIGNATURE >> 8) & 0xff;
+    vp8_frm_hdr[5] = (VP8_SIGNATURE >> 0) & 0xff;
+    // dimensions
+    vp8_frm_hdr[6] = pic->width & 0xff;
+    vp8_frm_hdr[7] = pic->width >> 8;
+    vp8_frm_hdr[8] = pic->height & 0xff;
+    vp8_frm_hdr[9] = pic->height >> 8;
+
+    if (!pic->writer(vp8_frm_hdr, sizeof(vp8_frm_hdr), pic)) {
+        return VP8_ENC_ERROR_BAD_WRITE;
+    }
+    return VP8_ENC_OK;
+}
+
+// WebP Headers.
+static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0, size_t vp8_size, size_t riff_size) {
+    WebPPicture* const pic = enc->pic_;
+    WebPEncodingError err = VP8_ENC_OK;
+
+    // RIFF header.
+    err = PutRIFFHeader(enc, riff_size);
+    if (err != VP8_ENC_OK) goto Error;
+
+    // VP8X.
+    if (IsVP8XNeeded(enc)) {
+        err = PutVP8XHeader(enc);
+        if (err != VP8_ENC_OK) goto Error;
+    }
+
+    // Alpha.
+    if (enc->has_alpha_) {
+        err = PutAlphaChunk(enc);
+        if (err != VP8_ENC_OK) goto Error;
+    }
+
+    // VP8 header.
+    err = PutVP8Header(pic, vp8_size);
+    if (err != VP8_ENC_OK) goto Error;
+
+    // VP8 frame header.
+    err = PutVP8FrameHeader(pic, enc->profile_, size0);
+    if (err != VP8_ENC_OK) goto Error;
+
+    // All OK.
+    return 1;
+
+// Error.
+Error:
+    return WebPEncodingSetError(pic, err);
+}
+
+// Segmentation header
+static void PutSegmentHeader(VP8BitWriter* const bw, const VP8Encoder* const enc) {
+    const VP8EncSegmentHeader* const hdr = &enc->segment_hdr_;
+    const VP8EncProba* const proba = &enc->proba_;
+    if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) {
+        // We always 'update' the quant and filter strength values
+        const int update_data = 1;
+        int s;
+        VP8PutBitUniform(bw, hdr->update_map_);
+        if (VP8PutBitUniform(bw, update_data)) {
+            // we always use absolute values, not relative ones
+            VP8PutBitUniform(bw, 1); // (segment_feature_mode = 1. Paragraph 9.3.)
+            for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+                VP8PutSignedBits(bw, enc->dqm_[s].quant_, 7);
+            }
+            for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+                VP8PutSignedBits(bw, enc->dqm_[s].fstrength_, 6);
+            }
+        }
+        if (hdr->update_map_) {
+            for (s = 0; s < 3; ++s) {
+                if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) {
+                    VP8PutBits(bw, proba->segments_[s], 8);
+                }
+            }
+        }
+    }
+}
+
+// Filtering parameters header
+static void PutFilterHeader(VP8BitWriter* const bw, const VP8EncFilterHeader* const hdr) {
+    const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0);
+    VP8PutBitUniform(bw, hdr->simple_);
+    VP8PutBits(bw, hdr->level_, 6);
+    VP8PutBits(bw, hdr->sharpness_, 3);
+    if (VP8PutBitUniform(bw, use_lf_delta)) {
+        // '0' is the default value for i4x4_lf_delta_ at frame #0.
+        const int need_update = (hdr->i4x4_lf_delta_ != 0);
+        if (VP8PutBitUniform(bw, need_update)) {
+            // we don't use ref_lf_delta => emit four 0 bits
+            VP8PutBits(bw, 0, 4);
+            // we use mode_lf_delta for i4x4
+            VP8PutSignedBits(bw, hdr->i4x4_lf_delta_, 6);
+            VP8PutBits(bw, 0, 3); // all others unused
+        }
+    }
+}
+
+// Nominal quantization parameters
+static void PutQuant(VP8BitWriter* const bw, const VP8Encoder* const enc) {
+    VP8PutBits(bw, enc->base_quant_, 7);
+    VP8PutSignedBits(bw, enc->dq_y1_dc_, 4);
+    VP8PutSignedBits(bw, enc->dq_y2_dc_, 4);
+    VP8PutSignedBits(bw, enc->dq_y2_ac_, 4);
+    VP8PutSignedBits(bw, enc->dq_uv_dc_, 4);
+    VP8PutSignedBits(bw, enc->dq_uv_ac_, 4);
+}
+
+// Partition sizes
+static int EmitPartitionsSize(const VP8Encoder* const enc, WebPPicture* const pic) {
+    uint8_t buf[3 * (MAX_NUM_PARTITIONS - 1)];
+    int p;
+    for (p = 0; p < enc->num_parts_ - 1; ++p) {
+        const size_t part_size = VP8BitWriterSize(enc->parts_ + p);
+        if (part_size >= VP8_MAX_PARTITION_SIZE) {
+            return WebPEncodingSetError(pic, VP8_ENC_ERROR_PARTITION_OVERFLOW);
+        }
+        buf[3 * p + 0] = (part_size >> 0) & 0xff;
+        buf[3 * p + 1] = (part_size >> 8) & 0xff;
+        buf[3 * p + 2] = (part_size >> 16) & 0xff;
+    }
+    return p ? pic->writer(buf, 3 * p, pic) : 1;
+}
+
+//------------------------------------------------------------------------------
+
+static int GeneratePartition0(VP8Encoder* const enc) {
+    VP8BitWriter* const bw = &enc->bw_;
+    const int mb_size = enc->mb_w_ * enc->mb_h_;
+    uint64_t pos1, pos2, pos3;
+
+    pos1 = VP8BitWriterPos(bw);
+    if (!VP8BitWriterInit(bw, mb_size * 7 / 8)) { // ~7 bits per macroblock
+        return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    }
+    VP8PutBitUniform(bw, 0); // colorspace
+    VP8PutBitUniform(bw, 0); // clamp type
+
+    PutSegmentHeader(bw, enc);
+    PutFilterHeader(bw, &enc->filter_hdr_);
+    VP8PutBits(bw, enc->num_parts_ == 8 ? 3 : enc->num_parts_ == 4 ? 2 : enc->num_parts_ == 2 ? 1 : 0, 2);
+    PutQuant(bw, enc);
+    VP8PutBitUniform(bw, 0); // no proba update
+    VP8WriteProbas(bw, &enc->proba_);
+    pos2 = VP8BitWriterPos(bw);
+    VP8CodeIntraModes(enc);
+    VP8BitWriterFinish(bw);
+
+    pos3 = VP8BitWriterPos(bw);
+
+    if (enc->pic_->stats) {
+        enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
+        enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
+        enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_;
+    }
+    if (bw->error_) {
+        return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    }
+    return 1;
+}
+
+void VP8EncFreeBitWriters(VP8Encoder* const enc) {
+    int p;
+    VP8BitWriterWipeOut(&enc->bw_);
+    for (p = 0; p < enc->num_parts_; ++p) {
+        VP8BitWriterWipeOut(enc->parts_ + p);
+    }
+}
+
+int VP8EncWrite(VP8Encoder* const enc) {
+    WebPPicture* const pic = enc->pic_;
+    VP8BitWriter* const bw = &enc->bw_;
+    const int task_percent = 19;
+    const int percent_per_part = task_percent / enc->num_parts_;
+    const int final_percent = enc->percent_ + task_percent;
+    int ok = 0;
+    size_t vp8_size, pad, riff_size;
+    int p;
+
+    // Partition #0 with header and partition sizes
+    ok = GeneratePartition0(enc);
+    if (!ok) return 0;
+
+    // Compute VP8 size
+    vp8_size = VP8_FRAME_HEADER_SIZE + VP8BitWriterSize(bw) + 3 * (enc->num_parts_ - 1);
+    for (p = 0; p < enc->num_parts_; ++p) {
+        vp8_size += VP8BitWriterSize(enc->parts_ + p);
+    }
+    pad = vp8_size & 1;
+    vp8_size += pad;
+
+    // Compute RIFF size
+    // At the minimum it is: "WEBPVP8 nnnn" + VP8 data size.
+    riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8_size;
+    if (IsVP8XNeeded(enc)) { // Add size for: VP8X header + data.
+        riff_size += CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+    }
+    if (enc->has_alpha_) { // Add size for: ALPH header + data.
+        const uint32_t padded_alpha_size = enc->alpha_data_size_ + (enc->alpha_data_size_ & 1);
+        riff_size += CHUNK_HEADER_SIZE + padded_alpha_size;
+    }
+    // Sanity check.
+    if (riff_size > 0xfffffffeU) {
+        return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG);
+    }
+
+    // Emit headers and partition #0
+    {
+        const uint8_t* const part0 = VP8BitWriterBuf(bw);
+        const size_t size0 = VP8BitWriterSize(bw);
+        ok = ok && PutWebPHeaders(enc, size0, vp8_size, riff_size) && pic->writer(part0, size0, pic) &&
+             EmitPartitionsSize(enc, pic);
+        VP8BitWriterWipeOut(bw); // will free the internal buffer.
+    }
+
+    // Token partitions
+    for (p = 0; p < enc->num_parts_; ++p) {
+        const uint8_t* const buf = VP8BitWriterBuf(enc->parts_ + p);
+        const size_t size = VP8BitWriterSize(enc->parts_ + p);
+        if (size) ok = ok && pic->writer(buf, size, pic);
+        VP8BitWriterWipeOut(enc->parts_ + p); // will free the internal buffer.
+        ok = ok && WebPReportProgress(pic, enc->percent_ + percent_per_part, &enc->percent_);
+    }
+
+    // Padding byte
+    if (ok && pad) {
+        ok = PutPaddingByte(pic);
+    }
+
+    enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size);
+    ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_);
+    return ok;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/token.c b/codec/L2/demos/webpEnc/host/src/enc/token.c
new file mode 100644
index 0000000000..3dd88a33d4
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/token.c
@@ -0,0 +1,600 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Paginated token buffer
+//
+//  A 'token' is a bit value associated with a probability, either fixed
+// or a later-to-be-determined after statistics have been collected.
+// For dynamic probability, we just record the slot id (idx) for the probability
+// value in the final probability array (uint8_t* probas in VP8EmitTokens).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "./cost.h"
+#include "./vp8enci.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+// we use pages to reduce the number of memcpy()
+#define MIN_PAGE_SIZE 8192 // minimum number of token per page
+#define FIXED_PROBA_BIT (1u << 14)
+
+typedef uint16_t token_t; // bit #15: bit value
+                          // bit #14: flags for constant proba or idx
+                          // bits #0..13: slot or constant proba
+struct VP8Tokens {
+    VP8Tokens* next_; // pointer to next page
+};
+// Token data is located in memory just after the next_ field.
+// This macro is used to return their address and hide the trick.
+#define TOKEN_DATA(p) ((const token_t*)&(p)[1])
+
+//------------------------------------------------------------------------------
+// Token buffer
+#define DEBUG_TOKENS 0
+void debug_tokens(VP8TBuffer* const b, VP8EncIterator* const it_var) {
+#if DEBUG_TOKENS
+    const VP8Tokens* p = b->pages_;
+
+    printf("\n[%d][%d]\n", it_var->y_, it_var->x_);
+    int page_index = 0;
+    while (p != NULL) {
+        const VP8Tokens* const next = p->next_;
+        // printf("%s p:%d next:%d\n", __FUNCTION__, p, next);
+        int n = b->page_size_;
+        const token_t* const tokens = TOKEN_DATA(p);
+        int i = 0;
+        printf("page:%d \n", page_index++);
+        for (i = 0; i < n; i++) {
+            printf("%4d ", tokens[i]);
+            if (i % 30 == 0) {
+                printf("\n");
+            }
+        }
+        p = next;
+        printf("\n");
+    }
+    printf("\n");
+#endif
+}
+static int TBufferNewPageFromKernel(VP8TBuffer* const b, VP8TBufferKernel* const tokens_kernel, int tokens_index) {
+    VP8Tokens* page = NULL;
+    const size_t size = PAGE_SIZE;
+
+    static int i = 0;
+    // printf("i:%d size:%d sizeof(*page):%d b->page_size_:%d sizeof(token_t):%d\n",
+    //     i, size, sizeof(*page), b->page_size_, sizeof(token_t));
+    i++;
+    page = (VP8Tokens*)WebPSafeMalloc(1ULL, size);
+
+    if (page == NULL) {
+        printf("Failed to allocate pages[%d]\n", tokens_index);
+        return 0;
+    }
+
+    // static int j = 0;
+    // if (b->pages_ != NULL && b->tokens_ != NULL) {
+    // printf("jj:%d page:%d b:%d b->pages:%d b->pages_->next_:%d b->last_page_:%d *(b->last_page_):%d  b->left_:%d
+    // b->tokens_:%d sizeof(page->next_):%d sizeof(unsigned long long):%d\n",
+    //     j, page, b, b->pages_, b->pages_->next_, b->last_page_ , *(b->last_page_), b->left_, b->tokens_,
+    //     sizeof(page->next_), sizeof(unsigned long long));
+    // }
+
+    page->next_ = NULL;
+
+    *b->last_page_ = page;
+    b->last_page_ = &page->next_;
+    // b->left_ = b->page_size_;
+    b->tokens_ = (token_t*)TOKEN_DATA(page);
+
+    // printf("j:%d page:%d b:%d b->pages:%d b->pages_->next_:%d b->last_page_:%d *(b->last_page_):%d  b->left_:%d
+    // b->tokens_:%d sizeof(page->next_):%d sizeof(unsigned long long):%d\n\n",
+    //     j, page, b, b->pages_, b->pages_->next_, b->last_page_ , *(b->last_page_), b->left_, b->tokens_,
+    //     sizeof(page->next_), sizeof(unsigned long long));
+    // j++;
+
+    memcpy(b->tokens_, tokens_kernel->tokens_ + tokens_index * TOKENS_COUNT_PER_PAGE,
+           TOKENS_COUNT_PER_PAGE * TOKENS_SIZE);
+    b->left_ = tokens_kernel->left_;
+    b->error_ = tokens_kernel->error_;
+    b->page_size_ = tokens_kernel->page_size_;
+    return 1;
+}
+
+void ReadTokenFromKernel(VP8TBuffer* const b, VP8TBufferKernel* const tokens_kernel) {
+    int tokens_index = 0;
+    for (tokens_index = 0; tokens_index < tokens_kernel->page_count_; tokens_index++) {
+        if (0 == TBufferNewPageFromKernel(b, tokens_kernel, tokens_index)) {
+            printf("ReadTokenFromKernel: Failed to allocate pages[%d]\n", tokens_index);
+        }
+    }
+}
+
+#if TOKEN_RECONSTRUCT
+void VP8TBufferInit(VP8TBuffer* const b, int page_size) {
+    b->tokens_ = NULL;
+    b->pages_ = NULL;
+    b->last_page_ = &b->pages_;
+    b->left_ = 0;
+    b->page_size_ = (page_size < MIN_PAGE_SIZE) ? MIN_PAGE_SIZE : page_size;
+    b->error_ = 0;
+}
+
+void VP8TBufferKernelInit(VP8TBufferKernel* const b, int page_size) {
+    // b->tokens_ = NULL;
+    // b->pages_ = NULL;
+    // b->last_page_ = &b->pages_;
+    memset(b->tokens_, 0, PAGE_COUNT * TOKENS_COUNT_PER_PAGE * sizeof(token_t));
+    b->cur_page_ = 0;
+    b->page_count_ = 0;
+    b->left_ = 0;
+    b->page_size_ = (page_size < MIN_PAGE_SIZE) ? MIN_PAGE_SIZE : page_size;
+    b->error_ = 0;
+}
+
+void VP8TBufferClear(VP8TBuffer* const b) {
+    if (b != NULL) {
+        VP8Tokens* p = b->pages_;
+        while (p != NULL) {
+            VP8Tokens* const next = p->next_;
+            WebPSafeFree(p);
+            p = next;
+        }
+        VP8TBufferInit(b, b->page_size_);
+    }
+}
+
+static int TBufferNewPageFromKernel(VP8TBuffer* const b, VP8TBufferKernel* const tokens_kernel, int tokens_index) {
+    VP8Tokens* page = NULL;
+    const size_t size = PAGE_SIZE;
+
+    static int i = 0;
+    // printf("i:%d size:%d sizeof(*page):%d b->page_size_:%d sizeof(token_t):%d\n",
+    //     i, size, sizeof(*page), b->page_size_, sizeof(token_t));
+    i++;
+    page = (VP8Tokens*)WebPSafeMalloc(1ULL, size);
+
+    if (page == NULL) {
+        printf("Failed to allocate pages[%d]\n", tokens_index);
+        return 0;
+    }
+
+    // static int j = 0;
+    // if (b->pages_ != NULL && b->tokens_ != NULL) {
+    // printf("jj:%d page:%d b:%d b->pages:%d b->pages_->next_:%d b->last_page_:%d *(b->last_page_):%d  b->left_:%d
+    // b->tokens_:%d sizeof(page->next_):%d sizeof(unsigned long long):%d\n",
+    //     j, page, b, b->pages_, b->pages_->next_, b->last_page_ , *(b->last_page_), b->left_, b->tokens_,
+    //     sizeof(page->next_), sizeof(unsigned long long));
+    // }
+
+    page->next_ = NULL;
+
+    *b->last_page_ = page;
+    b->last_page_ = &page->next_;
+    // b->left_ = b->page_size_;
+    b->tokens_ = (token_t*)TOKEN_DATA(page);
+
+    // printf("j:%d page:%d b:%d b->pages:%d b->pages_->next_:%d b->last_page_:%d *(b->last_page_):%d  b->left_:%d
+    // b->tokens_:%d sizeof(page->next_):%d sizeof(unsigned long long):%d\n\n",
+    //     j, page, b, b->pages_, b->pages_->next_, b->last_page_ , *(b->last_page_), b->left_, b->tokens_,
+    //     sizeof(page->next_), sizeof(unsigned long long));
+    // j++;
+
+    memcpy(b->tokens_, tokens_kernel->tokens_ + tokens_index * TOKENS_COUNT_PER_PAGE,
+           TOKENS_COUNT_PER_PAGE * TOKENS_SIZE);
+    b->left_ = tokens_kernel->left_;
+    b->error_ = tokens_kernel->error_;
+    b->page_size_ = tokens_kernel->page_size_;
+    return 1;
+}
+
+void ReadTokenFromKernel(VP8TBuffer* const b, VP8TBufferKernel* const tokens_kernel) {
+    int tokens_index = 0;
+    for (tokens_index = 0; tokens_index < tokens_kernel->page_count_; tokens_index++) {
+        if (0 == TBufferNewPageFromKernel(b, tokens_kernel, tokens_index)) {
+            printf("ReadTokenFromKernel: Failed to allocate pages[%d]\n", tokens_index);
+        }
+    }
+}
+
+static int TBufferNewPage(VP8TBufferKernel* const b) {
+    // VP8Tokens* page = NULL;
+    if (!b->error_) {
+        if (0 != b->page_count_) {
+            b->cur_page_++;
+        }
+
+        b->page_count_++;
+        printf("%s %s %d b->cur_page_:%d b->page_count_:%d\n", __FILE__, __FUNCTION__, __LINE__, b->cur_page_,
+               b->page_count_);
+    }
+    if (b->page_count_ >= PAGE_COUNT) {
+        b->error_ = 1;
+        return 0;
+    }
+
+    b->left_ = b->page_size_;
+
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#define TOKEN_ID(t, b, ctx) (NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t))))
+
+static WEBP_INLINE uint32_t AddToken(VP8TBufferKernel* const b, uint32_t bit, uint32_t proba_idx) {
+    assert(proba_idx < FIXED_PROBA_BIT);
+    assert(bit <= 1);
+    if (b->left_ > 0 || TBufferNewPage(b)) {
+        const int slot = --b->left_;
+        b->tokens_[b->cur_page_ * TOKENS_COUNT_PER_PAGE + slot] = (bit << 15) | proba_idx;
+    }
+    return bit;
+}
+
+static WEBP_INLINE void AddConstantToken(VP8TBufferKernel* const b, uint32_t bit, uint32_t proba) {
+    assert(proba < 256);
+    assert(bit <= 1);
+    if (b->left_ > 0 || TBufferNewPage(b)) {
+        const int slot = --b->left_;
+        b->tokens_[b->cur_page_ * TOKENS_COUNT_PER_PAGE + slot] = (bit << 15) | FIXED_PROBA_BIT | proba;
+    }
+}
+
+int VP8RecordCoeffTokens(const int ctx,
+                         const int coeff_type,
+                         int first,
+                         int last,
+                         const int16_t* const coeffs,
+                         VP8TBufferKernel* const tokens) {
+    int n = first;
+    uint32_t base_id = TOKEN_ID(coeff_type, n, ctx);
+    if (!AddToken(tokens, last >= 0, base_id + 0)) {
+        return 0;
+    }
+
+    while (n < 16) {
+        const int c = coeffs[n++];
+        const int sign = c < 0;
+        const uint32_t v = sign ? -c : c;
+        if (!AddToken(tokens, v != 0, base_id + 1)) {
+            base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 0); // ctx=0
+            continue;
+        }
+        if (!AddToken(tokens, v > 1, base_id + 2)) {
+            base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 1); // ctx=1
+        } else {
+            if (!AddToken(tokens, v > 4, base_id + 3)) {
+                if (AddToken(tokens, v != 2, base_id + 4)) AddToken(tokens, v == 4, base_id + 5);
+            } else if (!AddToken(tokens, v > 10, base_id + 6)) {
+                if (!AddToken(tokens, v > 6, base_id + 7)) {
+                    AddConstantToken(tokens, v == 6, 159);
+                } else {
+                    AddConstantToken(tokens, v >= 9, 165);
+                    AddConstantToken(tokens, !(v & 1), 145);
+                }
+            } else {
+                int mask;
+                const uint8_t* tab;
+                uint32_t residue = v - 3;
+                if (residue < (8 << 1)) { // VP8Cat3  (3b)
+                    AddToken(tokens, 0, base_id + 8);
+                    AddToken(tokens, 0, base_id + 9);
+                    residue -= (8 << 0);
+                    mask = 1 << 2;
+                    tab = VP8Cat3;
+                } else if (residue < (8 << 2)) { // VP8Cat4  (4b)
+                    AddToken(tokens, 0, base_id + 8);
+                    AddToken(tokens, 1, base_id + 9);
+                    residue -= (8 << 1);
+                    mask = 1 << 3;
+                    tab = VP8Cat4;
+                } else if (residue < (8 << 3)) { // VP8Cat5  (5b)
+                    AddToken(tokens, 1, base_id + 8);
+                    AddToken(tokens, 0, base_id + 10);
+                    residue -= (8 << 2);
+                    mask = 1 << 4;
+                    tab = VP8Cat5;
+                } else { // VP8Cat6 (11b)
+                    AddToken(tokens, 1, base_id + 8);
+                    AddToken(tokens, 1, base_id + 10);
+                    residue -= (8 << 3);
+                    mask = 1 << 10;
+                    tab = VP8Cat6;
+                }
+                while (mask) {
+                    AddConstantToken(tokens, !!(residue & mask), *tab++);
+                    mask >>= 1;
+                }
+            }
+            base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 2); // ctx=2
+        }
+        AddConstantToken(tokens, sign, 128);
+        if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) {
+            return 1; // EOB
+        }
+    }
+    return 1;
+}
+
+#else
+
+//------------------------------------------------------------------------------
+
+void VP8TBufferInit(VP8TBuffer* const b, int page_size) {
+    b->tokens_ = NULL;
+    b->pages_ = NULL;
+    b->last_page_ = &b->pages_;
+    b->left_ = 0;
+    b->page_size_ = (page_size < MIN_PAGE_SIZE) ? MIN_PAGE_SIZE : page_size;
+    b->error_ = 0;
+}
+
+void VP8TBufferClear(VP8TBuffer* const b) {
+    if (b != NULL) {
+        VP8Tokens* p = b->pages_;
+        while (p != NULL) {
+            VP8Tokens* const next = p->next_;
+            WebPSafeFree(p);
+            p = next;
+        }
+        VP8TBufferInit(b, b->page_size_);
+    }
+}
+
+#include <stdio.h>
+static int TBufferNewPage(VP8TBuffer* const b) {
+    VP8Tokens* page = NULL;
+    if (!b->error_) {
+        const size_t size = sizeof(*page) + b->page_size_ * sizeof(token_t);
+
+        static int i = 0;
+        /* printf("i:%d size:%d sizeof(*page):%d b->page_size_:%d sizeof(token_t):%d\n", */
+        /*     i, size, sizeof(*page), b->page_size_, sizeof(token_t)); */
+        i++;
+        page = (VP8Tokens*)WebPSafeMalloc(1ULL, size);
+    }
+    if (page == NULL) {
+        b->error_ = 1;
+        return 0;
+    }
+
+    // static int j = 0;
+    // if (b->pages_ != NULL && b->tokens_ != NULL) {
+    // printf("jj:%d page:%d b:%d b->pages:%d b->pages_->next_:%d b->last_page_:%d *(b->last_page_):%d  b->left_:%d
+    // b->tokens_:%d sizeof(page->next_):%d sizeof(unsigned long long):%d\n",
+    //     j, page, b, b->pages_, b->pages_->next_, b->last_page_ , *(b->last_page_), b->left_, b->tokens_,
+    //     sizeof(page->next_), sizeof(unsigned long long));
+    // }
+
+    page->next_ = NULL;
+
+    *b->last_page_ = page;
+    b->last_page_ = &page->next_;
+    b->left_ = b->page_size_;
+    b->tokens_ = (token_t*)TOKEN_DATA(page);
+
+    // printf("j:%d page:%d b:%d b->pages:%d b->pages_->next_:%d b->last_page_:%d *(b->last_page_):%d  b->left_:%d
+    // b->tokens_:%d sizeof(page->next_):%d sizeof(unsigned long long):%d\n\n",
+    //     j, page, b, b->pages_, b->pages_->next_, b->last_page_ , *(b->last_page_), b->left_, b->tokens_,
+    //     sizeof(page->next_), sizeof(unsigned long long));
+    // //(*(b->last_page_))->next_:%d
+    // j++;
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#define TOKEN_ID(t, b, ctx) (NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t))))
+
+static WEBP_INLINE uint32_t AddToken(VP8TBuffer* const b, uint32_t bit, uint32_t proba_idx) {
+    assert(proba_idx < FIXED_PROBA_BIT);
+    assert(bit <= 1);
+    if (b->left_ > 0 || TBufferNewPage(b)) {
+        const int slot = --b->left_;
+
+        // printf("0 %s %d b->tokens_[slot] :%d %d %d %d\n",
+        //        __FUNCTION__, __LINE__, b->tokens_[slot], bit<<15, proba_idx, (bit << 15) | proba_idx);
+        b->tokens_[slot] = (bit << 15) | proba_idx;
+        // printf("1 %s %d b->tokens_[slot] :%d %d %d %d\n",
+        //        __FUNCTION__, __LINE__, b->tokens_[slot], bit<<15, proba_idx, (bit << 15) | proba_idx);
+    }
+    return bit;
+}
+
+static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b, uint32_t bit, uint32_t proba) {
+    assert(proba < 256);
+    assert(bit <= 1);
+    if (b->left_ > 0 || TBufferNewPage(b)) {
+        const int slot = --b->left_;
+        b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba;
+    }
+}
+
+int VP8RecordCoeffTokens(
+    const int ctx, const int coeff_type, int first, int last, const int16_t* const coeffs, VP8TBuffer* const tokens) {
+    int n = first;
+    uint32_t base_id = TOKEN_ID(coeff_type, n, ctx);
+    if (!AddToken(tokens, last >= 0, base_id + 0)) {
+        return 0;
+    }
+
+    while (n < 16) {
+        const int c = coeffs[n++];
+        const int sign = c < 0;
+        const uint32_t v = sign ? -c : c;
+        if (!AddToken(tokens, v != 0, base_id + 1)) {
+            base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 0); // ctx=0
+            continue;
+        }
+        if (!AddToken(tokens, v > 1, base_id + 2)) {
+            base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 1); // ctx=1
+        } else {
+            if (!AddToken(tokens, v > 4, base_id + 3)) {
+                if (AddToken(tokens, v != 2, base_id + 4)) AddToken(tokens, v == 4, base_id + 5);
+            } else if (!AddToken(tokens, v > 10, base_id + 6)) {
+                if (!AddToken(tokens, v > 6, base_id + 7)) {
+                    AddConstantToken(tokens, v == 6, 159);
+                } else {
+                    AddConstantToken(tokens, v >= 9, 165);
+                    AddConstantToken(tokens, !(v & 1), 145);
+                }
+            } else {
+                int mask;
+                const uint8_t* tab;
+                uint32_t residue = v - 3;
+                if (residue < (8 << 1)) { // VP8Cat3  (3b)
+                    AddToken(tokens, 0, base_id + 8);
+                    AddToken(tokens, 0, base_id + 9);
+                    residue -= (8 << 0);
+                    mask = 1 << 2;
+                    tab = VP8Cat3;
+                } else if (residue < (8 << 2)) { // VP8Cat4  (4b)
+                    AddToken(tokens, 0, base_id + 8);
+                    AddToken(tokens, 1, base_id + 9);
+                    residue -= (8 << 1);
+                    mask = 1 << 3;
+                    tab = VP8Cat4;
+                } else if (residue < (8 << 3)) { // VP8Cat5  (5b)
+                    AddToken(tokens, 1, base_id + 8);
+                    AddToken(tokens, 0, base_id + 10);
+                    residue -= (8 << 2);
+                    mask = 1 << 4;
+                    tab = VP8Cat5;
+                } else { // VP8Cat6 (11b)
+                    AddToken(tokens, 1, base_id + 8);
+                    AddToken(tokens, 1, base_id + 10);
+                    residue -= (8 << 3);
+                    mask = 1 << 10;
+                    tab = VP8Cat6;
+                }
+                while (mask) {
+                    AddConstantToken(tokens, !!(residue & mask), *tab++);
+                    mask >>= 1;
+                }
+            }
+            base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 2); // ctx=2
+        }
+        AddConstantToken(tokens, sign, 128);
+        if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) {
+            return 1; // EOB
+        }
+    }
+    return 1;
+}
+
+#undef TOKEN_ID
+#endif
+//------------------------------------------------------------------------------
+// This function works, but isn't currently used. Saved for later.
+
+#if 0
+
+static void Record(int bit, proba_t* const stats) {
+  proba_t p = *stats;
+  if (p >= 0xffff0000u) {               // an overflow is inbound.
+    p = ((p + 1u) >> 1) & 0x7fff7fffu;  // -> divide the stats by 2.
+  }
+  // record bit count (lower 16 bits) and increment total count (upper 16 bits).
+  p += 0x00010000u + bit;
+  *stats = p;
+}
+
+void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) {
+  const VP8Tokens* p = b->pages_;
+  while (p != NULL) {
+    const int N = (p->next_ == NULL) ? b->left_ : 0;
+    int n = MAX_NUM_TOKEN;
+    const token_t* const tokens = TOKEN_DATA(p);
+    while (n-- > N) {
+      const token_t token = tokens[n];
+      if (!(token & FIXED_PROBA_BIT)) {
+        Record((token >> 15) & 1, stats + (token & 0x3fffu));
+      }
+    }
+    p = p->next_;
+  }
+}
+
+#endif // 0
+
+//------------------------------------------------------------------------------
+// Final coding pass, with known probabilities
+
+int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, const uint8_t* const probas, int final_pass) {
+    const VP8Tokens* p = b->pages_;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    assert(!b->error_);
+    while (p != NULL) {
+        const VP8Tokens* const next = p->next_;
+        // printf("%s p:%d next:%d\n", __FUNCTION__, p, next);
+        const int N = (next == NULL) ? b->left_ : 0;
+        int n = b->page_size_;
+        const token_t* const tokens = TOKEN_DATA(p);
+        while (n-- > N) {
+            const token_t token = tokens[n];
+            const int bit = (token >> 15) & 1;
+            if (token & FIXED_PROBA_BIT) {
+                VP8PutBit(bw, bit, token & 0xffu); // constant proba
+            } else {
+                VP8PutBit(bw, bit, probas[token & 0x3fffu]);
+            }
+        }
+        if (final_pass) WebPSafeFree((void*)p);
+        p = next;
+    }
+    if (final_pass) b->pages_ = NULL;
+    StopProfiling(&stop_watch, &timeVP8EmitTokens, &countVP8EmitTokens);
+    return 1;
+}
+
+// Size estimation
+size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas) {
+    size_t size = 0;
+    const VP8Tokens* p = b->pages_;
+    assert(!b->error_);
+    while (p != NULL) {
+        const VP8Tokens* const next = p->next_;
+        const int N = (next == NULL) ? b->left_ : 0;
+        int n = b->page_size_;
+        const token_t* const tokens = TOKEN_DATA(p);
+        while (n-- > N) {
+            const token_t token = tokens[n];
+            const int bit = token & (1 << 15);
+            if (token & FIXED_PROBA_BIT) {
+                size += VP8BitCost(bit, token & 0xffu);
+            } else {
+                size += VP8BitCost(bit, probas[token & 0x3fffu]);
+            }
+        }
+        p = next;
+    }
+    return size;
+}
+
+//------------------------------------------------------------------------------
+
+#else // DISABLE_TOKEN_BUFFER
+
+void VP8TBufferInit(VP8TBuffer* const b) {
+    (void)b;
+}
+void VP8TBufferClear(VP8TBuffer* const b) {
+    (void)b;
+}
+
+#endif // !DISABLE_TOKEN_BUFFER
diff --git a/codec/L2/demos/webpEnc/host/src/enc/tree.c b/codec/L2/demos/webpEnc/host/src/enc/tree.c
new file mode 100644
index 0000000000..19d9ef225b
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/tree.c
@@ -0,0 +1,432 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Coding of token probabilities, intra modes and segments.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Default probabilities
+
+// Paragraph 13.5
+const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+    {{{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128},
+      {189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128},
+      {106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128}},
+     {
+         {1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128},
+         {181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128},
+         {78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128},
+     },
+     {
+         {1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128},
+         {184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128},
+         {77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128},
+     },
+     {{1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128},
+      {170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128},
+      {37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128}},
+     {{1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128},
+      {207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128},
+      {102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128}},
+     {{1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128},
+      {177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128},
+      {80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128}},
+     {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
+    {{{198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62},
+      {131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1},
+      {68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128}},
+     {{1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128},
+      {184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128},
+      {81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128}},
+     {{1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128},
+      {99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128},
+      {23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128}},
+     {{1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128},
+      {109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128},
+      {44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128}},
+     {{1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128},
+      {94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128},
+      {22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128}},
+     {{1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128},
+      {124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128},
+      {35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128}},
+     {{1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128},
+      {121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128},
+      {45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128}},
+     {{1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128},
+      {203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+      {137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128}}},
+    {{{253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128},
+      {175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128},
+      {73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128}},
+     {{1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128},
+      {239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128},
+      {155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128}},
+     {{1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128},
+      {201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128},
+      {69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128}},
+     {{1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128},
+      {223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128},
+      {141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128}},
+     {{1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+      {190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128},
+      {149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128},
+      {213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128},
+      {55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+     {{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+      {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
+    {{{202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255},
+      {126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128},
+      {61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128}},
+     {{1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128},
+      {166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128},
+      {39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128}},
+     {{1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128},
+      {124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128},
+      {24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128}},
+     {{1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128},
+      {149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128},
+      {28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128}},
+     {{1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128},
+      {123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128},
+      {20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128}},
+     {{1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128},
+      {168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128},
+      {47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128}},
+     {{1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128},
+      {141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128},
+      {42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128}},
+     {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      {238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}}}};
+
+void VP8DefaultProbas(VP8Encoder* const enc) {
+    VP8EncProba* const probas = &enc->proba_;
+    probas->use_skip_proba_ = 0;
+    memset(probas->segments_, 255u, sizeof(probas->segments_));
+    memcpy(probas->coeffs_, VP8CoeffsProba0, sizeof(VP8CoeffsProba0));
+    // Note: we could hard-code the level_costs_ corresponding to VP8CoeffsProba0,
+    // but that's ~11k of static data. Better call VP8CalculateLevelCosts() later.
+    probas->dirty_ = 1;
+}
+
+// Paragraph 11.5.  900bytes.
+static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
+    {{231, 120, 48, 89, 115, 113, 120, 152, 112},
+     {152, 179, 64, 126, 170, 118, 46, 70, 95},
+     {175, 69, 143, 80, 85, 82, 72, 155, 103},
+     {56, 58, 10, 171, 218, 189, 17, 13, 152},
+     {114, 26, 17, 163, 44, 195, 21, 10, 173},
+     {121, 24, 80, 195, 26, 62, 44, 64, 85},
+     {144, 71, 10, 38, 171, 213, 144, 34, 26},
+     {170, 46, 55, 19, 136, 160, 33, 206, 71},
+     {63, 20, 8, 114, 114, 208, 12, 9, 226},
+     {81, 40, 11, 96, 182, 84, 29, 16, 36}},
+    {{134, 183, 89, 137, 98, 101, 106, 165, 148},
+     {72, 187, 100, 130, 157, 111, 32, 75, 80},
+     {66, 102, 167, 99, 74, 62, 40, 234, 128},
+     {41, 53, 9, 178, 241, 141, 26, 8, 107},
+     {74, 43, 26, 146, 73, 166, 49, 23, 157},
+     {65, 38, 105, 160, 51, 52, 31, 115, 128},
+     {104, 79, 12, 27, 217, 255, 87, 17, 7},
+     {87, 68, 71, 44, 114, 51, 15, 186, 23},
+     {47, 41, 14, 110, 182, 183, 21, 17, 194},
+     {66, 45, 25, 102, 197, 189, 23, 18, 22}},
+    {{88, 88, 147, 150, 42, 46, 45, 196, 205},
+     {43, 97, 183, 117, 85, 38, 35, 179, 61},
+     {39, 53, 200, 87, 26, 21, 43, 232, 171},
+     {56, 34, 51, 104, 114, 102, 29, 93, 77},
+     {39, 28, 85, 171, 58, 165, 90, 98, 64},
+     {34, 22, 116, 206, 23, 34, 43, 166, 73},
+     {107, 54, 32, 26, 51, 1, 81, 43, 31},
+     {68, 25, 106, 22, 64, 171, 36, 225, 114},
+     {34, 19, 21, 102, 132, 188, 16, 76, 124},
+     {62, 18, 78, 95, 85, 57, 50, 48, 51}},
+    {{193, 101, 35, 159, 215, 111, 89, 46, 111},
+     {60, 148, 31, 172, 219, 228, 21, 18, 111},
+     {112, 113, 77, 85, 179, 255, 38, 120, 114},
+     {40, 42, 1, 196, 245, 209, 10, 25, 109},
+     {88, 43, 29, 140, 166, 213, 37, 43, 154},
+     {61, 63, 30, 155, 67, 45, 68, 1, 209},
+     {100, 80, 8, 43, 154, 1, 51, 26, 71},
+     {142, 78, 78, 16, 255, 128, 34, 197, 171},
+     {41, 40, 5, 102, 211, 183, 4, 1, 221},
+     {51, 50, 17, 168, 209, 192, 23, 25, 82}},
+    {{138, 31, 36, 171, 27, 166, 38, 44, 229},
+     {67, 87, 58, 169, 82, 115, 26, 59, 179},
+     {63, 59, 90, 180, 59, 166, 93, 73, 154},
+     {40, 40, 21, 116, 143, 209, 34, 39, 175},
+     {47, 15, 16, 183, 34, 223, 49, 45, 183},
+     {46, 17, 33, 183, 6, 98, 15, 32, 183},
+     {57, 46, 22, 24, 128, 1, 54, 17, 37},
+     {65, 32, 73, 115, 28, 128, 23, 128, 205},
+     {40, 3, 9, 115, 51, 192, 18, 6, 223},
+     {87, 37, 9, 115, 59, 77, 64, 21, 47}},
+    {{104, 55, 44, 218, 9, 54, 53, 130, 226},
+     {64, 90, 70, 205, 40, 41, 23, 26, 57},
+     {54, 57, 112, 184, 5, 41, 38, 166, 213},
+     {30, 34, 26, 133, 152, 116, 10, 32, 134},
+     {39, 19, 53, 221, 26, 114, 32, 73, 255},
+     {31, 9, 65, 234, 2, 15, 1, 118, 73},
+     {75, 32, 12, 51, 192, 255, 160, 43, 51},
+     {88, 31, 35, 67, 102, 85, 55, 186, 85},
+     {56, 21, 23, 111, 59, 205, 45, 37, 192},
+     {55, 38, 70, 124, 73, 102, 1, 34, 98}},
+    {{125, 98, 42, 88, 104, 85, 117, 175, 82},
+     {95, 84, 53, 89, 128, 100, 113, 101, 45},
+     {75, 79, 123, 47, 51, 128, 81, 171, 1},
+     {57, 17, 5, 71, 102, 57, 53, 41, 49},
+     {38, 33, 13, 121, 57, 73, 26, 1, 85},
+     {41, 10, 67, 138, 77, 110, 90, 47, 114},
+     {115, 21, 2, 10, 102, 255, 166, 23, 6},
+     {101, 29, 16, 10, 85, 128, 101, 196, 26},
+     {57, 18, 10, 102, 102, 213, 34, 20, 43},
+     {117, 20, 15, 36, 163, 128, 68, 1, 26}},
+    {{102, 61, 71, 37, 34, 53, 31, 243, 192},
+     {69, 60, 71, 38, 73, 119, 28, 222, 37},
+     {68, 45, 128, 34, 1, 47, 11, 245, 171},
+     {62, 17, 19, 70, 146, 85, 55, 62, 70},
+     {37, 43, 37, 154, 100, 163, 85, 160, 1},
+     {63, 9, 92, 136, 28, 64, 32, 201, 85},
+     {75, 15, 9, 9, 64, 255, 184, 119, 16},
+     {86, 6, 28, 5, 64, 255, 25, 248, 1},
+     {56, 8, 17, 132, 137, 255, 55, 116, 128},
+     {58, 15, 20, 82, 135, 57, 26, 121, 40}},
+    {{164, 50, 31, 137, 154, 133, 25, 35, 218},
+     {51, 103, 44, 131, 131, 123, 31, 6, 158},
+     {86, 40, 64, 135, 148, 224, 45, 183, 128},
+     {22, 26, 17, 131, 240, 154, 14, 1, 209},
+     {45, 16, 21, 91, 64, 222, 7, 1, 197},
+     {56, 21, 39, 155, 60, 138, 23, 102, 213},
+     {83, 12, 13, 54, 192, 255, 68, 47, 28},
+     {85, 26, 85, 85, 128, 128, 32, 146, 171},
+     {18, 11, 7, 63, 144, 171, 4, 4, 246},
+     {35, 27, 10, 146, 174, 171, 12, 26, 128}},
+    {{190, 80, 35, 99, 180, 80, 126, 54, 45},
+     {85, 126, 47, 87, 176, 51, 41, 20, 32},
+     {101, 75, 128, 139, 118, 146, 116, 128, 85},
+     {56, 41, 15, 176, 236, 85, 37, 9, 62},
+     {71, 30, 17, 119, 118, 255, 17, 18, 138},
+     {101, 38, 60, 138, 55, 70, 43, 26, 142},
+     {146, 36, 19, 30, 171, 255, 97, 27, 20},
+     {138, 45, 61, 62, 219, 1, 81, 188, 64},
+     {32, 41, 20, 117, 151, 142, 20, 21, 163},
+     {112, 19, 12, 61, 195, 128, 48, 4, 24}}};
+
+static int PutI4Mode(VP8BitWriter* const bw, int mode, const uint8_t* const prob) {
+    if (VP8PutBit(bw, mode != B_DC_PRED, prob[0])) {
+        if (VP8PutBit(bw, mode != B_TM_PRED, prob[1])) {
+            if (VP8PutBit(bw, mode != B_VE_PRED, prob[2])) {
+                if (!VP8PutBit(bw, mode >= B_LD_PRED, prob[3])) {
+                    if (VP8PutBit(bw, mode != B_HE_PRED, prob[4])) {
+                        VP8PutBit(bw, mode != B_RD_PRED, prob[5]);
+                    }
+                } else {
+                    if (VP8PutBit(bw, mode != B_LD_PRED, prob[6])) {
+                        if (VP8PutBit(bw, mode != B_VL_PRED, prob[7])) {
+                            VP8PutBit(bw, mode != B_HD_PRED, prob[8]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return mode;
+}
+
+static void PutI16Mode(VP8BitWriter* const bw, int mode) {
+    if (VP8PutBit(bw, (mode == TM_PRED || mode == H_PRED), 156)) {
+        VP8PutBit(bw, mode == TM_PRED, 128); // TM or HE
+    } else {
+        VP8PutBit(bw, mode == V_PRED, 163); // VE or DC
+    }
+}
+
+static void PutUVMode(VP8BitWriter* const bw, int uv_mode) {
+    if (VP8PutBit(bw, uv_mode != DC_PRED, 142)) {
+        if (VP8PutBit(bw, uv_mode != V_PRED, 114)) {
+            VP8PutBit(bw, uv_mode != H_PRED, 183); // else: TM_PRED
+        }
+    }
+}
+
+static void PutSegment(VP8BitWriter* const bw, int s, const uint8_t* p) {
+    if (VP8PutBit(bw, s >= 2, p[0])) p += 1;
+    VP8PutBit(bw, s & 1, p[1]);
+}
+
+void VP8CodeIntraModes(VP8Encoder* const enc) {
+    VP8BitWriter* const bw = &enc->bw_;
+    VP8EncIterator it;
+    VP8IteratorInit(enc, &it);
+    do {
+        const VP8MBInfo* const mb = it.mb_;
+        const uint8_t* preds = it.preds_;
+        if (enc->segment_hdr_.update_map_) {
+            PutSegment(bw, mb->segment_, enc->proba_.segments_);
+        }
+        if (enc->proba_.use_skip_proba_) {
+            VP8PutBit(bw, mb->skip_, enc->proba_.skip_proba_);
+        }
+        if (VP8PutBit(bw, (mb->type_ != 0), 145)) { // i16x16
+            PutI16Mode(bw, preds[0]);
+        } else {
+            const int preds_w = enc->preds_w_;
+            const uint8_t* top_pred = preds - preds_w;
+            int x, y;
+            for (y = 0; y < 4; ++y) {
+                int left = preds[-1];
+                for (x = 0; x < 4; ++x) {
+                    const uint8_t* const probas = kBModesProba[top_pred[x]][left];
+                    left = PutI4Mode(bw, preds[x], probas);
+                }
+                top_pred = preds;
+                preds += preds_w;
+            }
+        }
+        PutUVMode(bw, mb->uv_mode_);
+    } while (VP8IteratorNext(&it));
+}
+
+//------------------------------------------------------------------------------
+// Paragraph 13
+
+const uint8_t VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = { // 4, 8, 4(3), 8(11)}=1024 *
+    {{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+      {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+      {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255},
+      {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+       255}}}, /////////////////////////////////////////////////////////////////////
+    {{{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255},
+      {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255}},
+     {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+       255}}}, /////////////////////////////////////////////////////////////
+    {{{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255},
+      {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255},
+      {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255}},
+     {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+       255}}}, /////////////////////////////////////////////////////////////
+    {{{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255},
+      {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+      {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}}};
+
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas) {
+    int t, b, c, p;
+    for (t = 0; t < NUM_TYPES; ++t) {
+        for (b = 0; b < NUM_BANDS; ++b) {
+            for (c = 0; c < NUM_CTX; ++c) {
+                for (p = 0; p < NUM_PROBAS; ++p) {
+                    const uint8_t p0 = probas->coeffs_[t][b][c][p];
+                    const int update = (p0 != VP8CoeffsProba0[t][b][c][p]);
+                    if (VP8PutBit(bw, update, VP8CoeffsUpdateProba[t][b][c][p])) {
+                        VP8PutBits(bw, p0, 8);
+                    }
+                }
+            }
+        }
+    }
+    if (VP8PutBitUniform(bw, probas->use_skip_proba_)) {
+        VP8PutBits(bw, probas->skip_proba_, 8);
+    }
+}
diff --git a/codec/L2/demos/webpEnc/host/src/enc/vp8enci.h b/codec/L2/demos/webpEnc/host/src/enc/vp8enci.h
new file mode 100644
index 0000000000..e95eea9781
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/vp8enci.h
@@ -0,0 +1,614 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   WebP encoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_ENC_VP8ENCI_H_
+#define WEBP_ENC_VP8ENCI_H_
+
+#include <string.h> // for memcpy()
+#include "../dec/common.h"
+#include "../dsp/dsp.h"
+#include "../utils/bit_writer.h"
+#include "../utils/thread.h"
+#include "../utils/utils.h"
+#include "../webp/encode.h"
+
+#include "vp8_hls_syn.h"
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "./vp8li.h"
+#endif // WEBP_EXPERIMENTAL_FEATURES
+
+#include "CL/cl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Various defines and enums
+
+// version numbers
+#define ENC_MAJ_VERSION 0
+#define ENC_MIN_VERSION 5
+#define ENC_REV_VERSION 0
+
+enum {
+    MAX_LF_LEVELS = 64,      // Maximum loop filter level
+    MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
+    MAX_LEVEL = 2047         // max level (note: max codable is 2047 + 67)
+};
+
+typedef enum {             // Rate-distortion optimization levels
+    RD_OPT_NONE = 0,       // no rd-opt
+    RD_OPT_BASIC = 1,      // basic scoring (no trellis)
+    RD_OPT_TRELLIS = 2,    // perform trellis-quant on the final decision only
+    RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)
+} VP8RDLevel;
+
+// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
+// The original or reconstructed samples can be accessed using VP8Scan[].
+// The predicted blocks can be accessed using offsets to yuv_p_ and
+// the arrays VP8*ModeOffsets[].
+// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_)
+//   (see VP8Scan[] for accessing the blocks, along with
+//   Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC):
+//             +----+----+
+//  Y_OFF_ENC  |YYYY|UUVV|
+//  U_OFF_ENC  |YYYY|UUVV|
+//  V_OFF_ENC  |YYYY|....| <- 25% wasted U/V area
+//             |YYYY|....|
+//             +----+----+
+// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC)
+//   Intra16 predictions (16x16 block each, two per row):
+//         |I16DC16|I16TM16|
+//         |I16VE16|I16HE16|
+//   Chroma U/V predictions (16x8 block each, two per row):
+//         |C8DC8|C8TM8|
+//         |C8VE8|C8HE8|
+//   Intra 4x4 predictions (4x4 block each)
+//         |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|
+//         |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted
+#define YUV_SIZE_ENC (BPS * 16)
+#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds
+#define Y_OFF_ENC (0)
+#define U_OFF_ENC (16)
+#define V_OFF_ENC (16 + 8)
+
+extern const int VP8Scan[16];         // in quant.c
+extern const int VP8UVModeOffsets[4]; // in analyze.c
+extern const int VP8I16ModeOffsets[4];
+extern const int VP8I4ModeOffsets[NUM_BMODES];
+
+// Layout of prediction blocks
+// intra 16x16
+#define I16DC16 (0 * 16 * BPS)
+#define I16TM16 (I16DC16 + 16)
+#define I16VE16 (1 * 16 * BPS)
+#define I16HE16 (I16VE16 + 16)
+// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
+#define C8DC8 (2 * 16 * BPS)
+#define C8TM8 (C8DC8 + 1 * 16)
+#define C8VE8 (2 * 16 * BPS + 8 * BPS)
+#define C8HE8 (C8VE8 + 1 * 16)
+// intra 4x4
+#define I4DC4 (3 * 16 * BPS + 0)
+#define I4TM4 (I4DC4 + 4)
+#define I4VE4 (I4DC4 + 8)
+#define I4HE4 (I4DC4 + 12)
+#define I4RD4 (I4DC4 + 16)
+#define I4VR4 (I4DC4 + 20)
+#define I4LD4 (I4DC4 + 24)
+#define I4VL4 (I4DC4 + 28)
+#define I4HD4 (3 * 16 * BPS + 4 * BPS)
+#define I4HU4 (I4HD4 + 4)
+#define I4TMP (I4HD4 + 8)
+
+typedef int64_t score_t; // type used for scores, rate, distortion
+// Note that MAX_COST is not the maximum allowed by sizeof(score_t),
+// in order to allow overflowing computations.
+#define MAX_COST ((score_t)0x7fffffffffffffLL)
+
+#define QFIX 17
+#define BIAS(b) ((b) << (QFIX - 8))
+// Fun fact: this is the _only_ line where we're actually being lossy and
+// discarding bits.
+static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
+    return (int)((n * iQ + B) >> QFIX);
+}
+
+// Uncomment the following to remove token-buffer code:
+// #define DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// Headers
+/*
+enum { MB_FEATURE_TREE_PROBS = 3,
+       NUM_MB_SEGMENTS = 4,
+       NUM_REF_LF_DELTAS = 4,
+       NUM_MODE_LF_DELTAS = 4,    // I4x4, ZERO, *, SPLIT
+       MAX_NUM_PARTITIONS = 8,
+       // Probabilities
+       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
+       NUM_BANDS = 8,
+       NUM_CTX = 3,
+       NUM_PROBAS = 11
+     };*/
+typedef uint32_t proba_t;                        // 16b + 16b
+typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS]; // 3x11VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]
+typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
+typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
+typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting
+typedef const uint16_t* CostArrayMap[16][NUM_CTX];
+typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
+
+typedef struct VP8Encoder VP8Encoder;
+
+// segment features
+typedef struct {
+    int num_segments_; // Actual number of segments. 1 segment only = unused.
+    int update_map_;   // whether to update the segment map or not.
+                       // must be 0 if there's only 1 segment.
+    int size_;         // bit-cost for transmitting the segment map
+} VP8EncSegmentHeader;
+
+// Struct collecting all frame-persistent probabilities.
+typedef struct {
+    uint8_t segments_[3];                        // probabilities for segment tree
+    uint8_t skip_proba_;                         // final probability of being skipped.
+    ProbaArray coeffs_[NUM_TYPES][NUM_BANDS];    // 1056 bytes = 1byte *3*11*4*8
+    StatsArray stats_[NUM_TYPES][NUM_BANDS];     // 4224 bytes = 4byte *3*11*4*8
+    CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes= 2byte *3*68*4*8
+    CostArrayMap remapped_costs_[NUM_TYPES];     // 1536 bytes = 2byte *16*3*4
+    int dirty_;                                  // if true, need to call VP8CalculateLevelCosts()
+    int use_skip_proba_;                         // Note: we always use skip_proba for now.
+    int nb_skip_;                                // number of skipped blocks
+} VP8EncProba;
+
+// Filter parameters. Not actually used in the code (we don't perform
+// the in-loop filtering), but filled from user's config
+typedef struct {
+    int simple_;        // filtering type: 0=complex, 1=simple
+    int level_;         // base filter level [0..63]
+    int sharpness_;     // [0..7]
+    int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16
+} VP8EncFilterHeader;
+
+//------------------------------------------------------------------------------
+// Informations about the macroblocks.
+
+typedef struct {
+    // block type
+    unsigned int type_ : 2; // 0=i4x4, 1=i16x16
+    unsigned int uv_mode_ : 2;
+    unsigned int skip_ : 1;
+    unsigned int segment_ : 2;
+    uint8_t alpha_; // quantization-susceptibility
+} VP8MBInfo;
+
+typedef struct VP8Matrix {
+    uint16_t q_[16];       // quantizer steps
+    uint16_t iq_[16];      // reciprocals, fixed point.
+    uint32_t bias_[16];    // rounding bias
+    uint32_t zthresh_[16]; // value below which a coefficient is zeroed
+    uint16_t sharpen_[16]; // frequency boosters for slight sharpening
+} VP8Matrix;
+
+typedef struct {
+    VP8Matrix y1_, y2_, uv_; // quantization matrices
+    int alpha_;              // quant-susceptibility, range [-127,127]. Zero is neutral.
+                             // Lower values indicate a lower risk of blurriness.
+    int beta_;               // filter-susceptibility, range [0,255].
+    int quant_;              // final segment quantizer.
+    int fstrength_;          // final in-loop filtering strength
+    int max_edge_;           // max edge delta (for filtering strength)
+    int min_disto_;          // minimum distortion required to trigger filtering record
+    // reactivities
+    int lambda_i16_, lambda_i4_, lambda_uv_;
+    int lambda_mode_, lambda_trellis_, tlambda_;
+    int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
+} VP8SegmentInfo;
+
+// Handy transient struct to accumulate score and info during RD-optimization
+// and mode evaluation.
+typedef struct {
+    score_t D, SD;           // Distortion, spectral distortion
+    score_t H, R, score;     // header bits, rate, score.
+    int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma.
+    int16_t y_ac_levels[16][16];
+    int16_t uv_levels[4 + 4][16];
+    int mode_i16;         // mode number for intra16 prediction
+    uint8_t modes_i4[16]; // mode numbers for intra4 predictions
+    int mode_uv;          // mode number of chroma prediction
+    uint32_t nz;          // non-zero blocks
+} VP8ModeScore;
+
+// Iterator structure to iterate through macroblocks, pointing to the
+// right neighbouring data (samples, predictions, contexts, ...)
+typedef struct {
+    int x_, y_;                // current macroblock
+    int y_stride_, uv_stride_; // respective strides
+    uint8_t* yuv_in_;          // input samples
+    uint8_t* yuv_out_;         // output samples
+    uint8_t* yuv_out2_;        // secondary buffer swapped with yuv_out_.
+    uint8_t* yuv_p_;           // scratch buffer for prediction
+    VP8Encoder* enc_;          // back-pointer
+    VP8MBInfo* mb_;            // current macroblock
+    VP8BitWriter* bw_;         // current bit-writer
+    uint8_t* preds_;           // intra mode predictors (4x4 blocks)
+    uint32_t* nz_;             // non-zero pattern
+    uint8_t i4_boundary_[37];  // 32+5 boundary samples needed by intra4x4
+    uint8_t* i4_top_;          // pointer to the current top boundary sample
+    int i4_;                   // current intra4x4 mode being tested
+    int top_nz_[9];            // top-non-zero context.
+    int left_nz_[9];           // left-non-zero. left_nz[8] is independent.
+    uint64_t bit_count_[4][3]; // bit counters for coded levels.
+    uint64_t luma_bits_;       // macroblock bit-cost for luma
+    uint64_t uv_bits_;         // macroblock bit-cost for chroma
+    LFStats* lf_stats_;        // filter stats (borrowed from enc_)
+    int do_trellis_;           // if true, perform extra level optimisation
+    int count_down_;           // number of mb still to be processed
+    int count_down0_;          // starting counter value (for progress)
+    int percent0_;             // saved initial progress percent
+
+    uint8_t* y_left_; // left luma samples (addressable from index -1 to 15).
+    uint8_t* u_left_; // left u samples (addressable from index -1 to 7)
+    uint8_t* v_left_; // left v samples (addressable from index -1 to 7)
+
+    uint8_t* y_top_;  // top luma samples at position 'x_'
+    uint8_t* uv_top_; // top u/v samples at position 'x_', packed as 16 bytes
+
+    // memory for storing y/u/v_left_
+    uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST];
+    // memory for yuv_*
+    uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST];
+} VP8EncIterator;
+
+// in iterator.c
+// must be called first
+void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
+// restart a scan
+void VP8IteratorReset(VP8EncIterator* const it);
+// reset iterator position to row 'y'
+void VP8IteratorSetRow(VP8EncIterator* const it, int y);
+// set count down (=number of iterations to go)
+void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down);
+// return true if iteration is finished
+int VP8IteratorIsDone(const VP8EncIterator* const it);
+// Import uncompressed samples from source.
+// If tmp_32 is not NULL, import boundary samples too.
+// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.
+void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32);
+// export decimated samples
+void VP8IteratorExport(const VP8EncIterator* const it);
+// go to next macroblock. Returns false if not finished.
+int VP8IteratorNext(VP8EncIterator* const it);
+// save the yuv_out_ boundary values to top_/left_ arrays for next iterations.
+void VP8IteratorSaveBoundary(VP8EncIterator* const it);
+// Report progression based on macroblock rows. Return 0 for user-abort request.
+int VP8IteratorProgress(const VP8EncIterator* const it, int final_delta_percent);
+// Intra4x4 iterations
+void VP8IteratorStartI4(VP8EncIterator* const it);
+// returns true if not done.
+int VP8IteratorRotateI4(VP8EncIterator* const it, const uint8_t* const yuv_out);
+
+// Non-zero context setup/teardown
+void VP8IteratorNzToBytes(VP8EncIterator* const it);
+void VP8IteratorBytesToNz(VP8EncIterator* const it);
+
+// Helper functions to set mode properties
+void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
+void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);
+void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
+void VP8SetSkip(const VP8EncIterator* const it, int skip);
+void VP8SetSegment(const VP8EncIterator* const it, int segment);
+
+//------------------------------------------------------------------------------
+// Paginated token buffer
+
+typedef struct VP8Tokens VP8Tokens; // struct details in token.c
+
+#define TOKEN_RECONSTRUCT 0
+#define PAGE_COUNT 30
+#define TOKENS_COUNT_PER_PAGE 155040 // 615600 //155040 //mb_w * mb_h * 4 * scale
+#define TOKENS_SIZE 2                // sizeof(token_t)
+#define PAGE_POINTER_SIZE 8          // sizeof(*page)
+#define PAGE_SIZE ((TOKENS_COUNT_PER_PAGE * TOKENS_SIZE) + PAGE_POINTER_SIZE)
+typedef struct {
+    // VP8Tokens* pages_;        // first page
+    // VP8Tokens** last_page_;   // last page
+    uint16_t tokens_[MAX_NUM_MB_W * MAX_NUM_MB_H * 1024 * sizeof(uint16_t)];
+
+    int cur_page_;
+    int page_count_;
+    int left_;      // how many free tokens left before the page is full
+    int page_size_; // number of tokens per page
+    int error_;     // true in case of malloc error
+} VP8TBufferKernel;
+
+typedef struct {
+#if !defined(DISABLE_TOKEN_BUFFER)
+    VP8Tokens* pages_;      // first page
+    VP8Tokens** last_page_; // last page
+    uint16_t* tokens_;      // set to (*last_page_)->tokens_
+    int left_;              // how many free tokens left before the page is full
+    int page_size_;         // number of tokens per page
+#endif
+    int error_; // true in case of malloc error
+} VP8TBuffer;
+
+void debug_tokens(VP8TBuffer* const b, VP8EncIterator* const it_var);
+// initialize an empty buffer
+void VP8TBufferInit(VP8TBuffer* const b, int page_size);
+void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory
+
+void VP8TBufferKernelInit(VP8TBufferKernel* const b, int page_size);
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+// Finalizes bitstream when probabilities are known.
+// Deletes the allocated token memory if final_pass is true.
+int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, const uint8_t* const probas, int final_pass);
+
+#if TOKEN_RECONSTRUCT
+// record the coding of coefficients without knowing the probabilities yet
+int VP8RecordCoeffTokens(const int ctx,
+                         const int coeff_type,
+                         int first,
+                         int last,
+                         const int16_t* const coeffs,
+                         VP8TBufferKernel* const tokens);
+#else
+int VP8RecordCoeffTokens(
+    const int ctx, const int coeff_type, int first, int last, const int16_t* const coeffs, VP8TBuffer* const tokens);
+
+#endif
+void ReadTokenFromKernel(VP8TBuffer* const tokens, VP8TBufferKernel* const tokens_kernel);
+
+// Estimate the final coded size given a set of 'probas'.
+size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas);
+
+// unused for now
+void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
+
+#endif // !DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// VP8Encoder
+
+struct VP8Encoder {
+    const WebPConfig* config_; // user configuration and parameters
+    WebPPicture* pic_;         // input / output picture
+
+    // headers
+    VP8EncFilterHeader filter_hdr_;   // filtering information
+    VP8EncSegmentHeader segment_hdr_; // segment information
+
+    int profile_; // VP8's profile, deduced from Config.
+
+    // dimension, in macroblock units.
+    int mb_w_, mb_h_;
+    int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1)
+
+    // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
+    int num_parts_;
+
+    // per-partition boolean decoders.
+    VP8BitWriter bw_;                        // part0
+    VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
+    VP8TBuffer tokens_;                      // token buffer
+
+    int percent_; // for progress
+
+    // transparency blob
+    int has_alpha_;
+    uint8_t* alpha_data_; // non-NULL if transparency is present
+    uint32_t alpha_data_size_;
+    WebPWorker alpha_worker_;
+
+    // quantization info (one set of DC/AC dequant factor per segment)
+    VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
+    int base_quant_; // nominal quantizer value. Only used
+                     // for relative coding of segments' quant.
+    int alpha_;      // global susceptibility (<=> complexity)
+    int uv_alpha_;   // U/V quantization susceptibility
+    // global offset of quantizers, shared by all segments
+    int dq_y1_dc_;
+    int dq_y2_dc_, dq_y2_ac_;
+    int dq_uv_dc_, dq_uv_ac_;
+
+    // probabilities and statistics
+    VP8EncProba proba_;
+    uint64_t sse_[4];    // sum of Y/U/V/A squared errors for all macroblocks
+    uint64_t sse_count_; // pixel count for the sse_[] stats
+    int coded_size_;
+    int residual_bytes_[3][4];
+    int block_count_[3];
+
+    // quality/speed settings
+    int method_;              // 0=fastest, 6=best/slowest.
+    VP8RDLevel rd_opt_level_; // Deduced from method_.
+    int max_i4_header_bits_;  // partition #0 safeness factor
+    int thread_level_;        // derived from config->thread_level
+    int do_search_;           // derived from config->target_XXX
+    int use_tokens_;          // if true, use token buffer
+
+    // Memory
+    VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
+    uint8_t* preds_;     // predictions modes: (4*mb_w+1) * (4*mb_h+1)
+    uint32_t* nz_;       // non-zero bit context: mb_w+1
+    uint8_t* y_top_;     // top luma samples.
+    uint8_t* uv_top_;    // top u/v samples.
+                         // U and V are packed into 16 bytes (8 U + 8 V)
+    LFStats* lf_stats_;  // autofilter stats (if NULL, autofilter is off)
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// in tree.c
+extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+extern const uint8_t VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+// Reset the token probabilities to their initial (default) values
+void VP8DefaultProbas(VP8Encoder* const enc);
+// Write the token probabilities
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas);
+// Writes the partition #0 modes (that is: all intra modes)
+void VP8CodeIntraModes(VP8Encoder* const enc);
+
+// in syntax.c
+// Generates the final bitstream by coding the partition0 and headers,
+// and appending an assembly of all the pre-coded token partitions.
+// Return true if everything is ok.
+int VP8EncWrite(VP8Encoder* const enc);
+// Release memory allocated for bit-writing in VP8EncLoop & seq.
+void VP8EncFreeBitWriters(VP8Encoder* const enc);
+
+// in frame.c
+extern const uint8_t VP8Cat3[];
+extern const uint8_t VP8Cat4[];
+extern const uint8_t VP8Cat5[];
+extern const uint8_t VP8Cat6[];
+
+// Form all the four Intra16x16 predictions in the yuv_p_ cache
+void VP8MakeLuma16Preds(const VP8EncIterator* const it);
+// Form all the four Chroma8x8 predictions in the yuv_p_ cache
+void VP8MakeChroma8Preds(const VP8EncIterator* const it);
+// Form all the ten Intra4x4 predictions in the yuv_p_ cache
+// for the 4x4 block it->i4_
+void VP8MakeIntra4Preds(const VP8EncIterator* const it);
+// Rate calculation
+int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
+int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
+int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
+// Main coding calls
+int VP8EncLoop(VP8Encoder* const enc);
+int VP8EncLoopOcl(VP8Encoder* const enc);
+int VP8EncTokenLoop(VP8Encoder* const enc);
+
+int PostLoopFinalize(VP8EncIterator* const, int);
+
+int VP8EncTokenLoopAsyncPicInfoSet(VP8Encoder*&, AllPicInfo&);
+
+int VP8EncTokenLoopAsyncHost2Device(
+    const int, VP8Encoder**, AllPicInfo*, const int, cl_uint, cl_event*, std::array<cl_event, 4>&);
+
+int PostAnalysis(VP8Encoder* const&, VP8EncIterator&);
+
+int VP8EncTokenLoopAsyncPredKernel(const int buf,
+                                   cl_uint num_wait_event,
+                                   cl_event* wait_event,
+                                   std::array<cl_event, 1>& event);
+
+int VP8EncTokenLoopAsyncACKernel(const int buf,
+                                 cl_uint num_wait_event,
+                                 cl_event* wait_event,
+                                 std::array<cl_event, 1>& event);
+
+int VP8EncTokenLoopAsyncDevice2Host(const int buf,
+                                    cl_uint num_wait_event,
+                                    cl_event* wait_event,
+                                    std::array<cl_event, 4>& event);
+
+int VP8EncTokenLoopAsyncAfterAC(VP8Encoder* enc,
+                                uint8_t* pout_prob,
+                                uint8_t* pout_bw,   // = malloc(SIZE32_MEM_BW*4);/
+                                uint8_t* pout_ret,  // = malloc(SIZE32_MEM_RET*4);
+                                uint8_t* pout_pred, // = malloc(SIZE32_MEM_PRED*4);
+                                VP8EncIterator& it);
+
+// in webpenc.c
+// Assign an error code to a picture. Return false for convenience.
+int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);
+int WebPReportProgress(const WebPPicture* const pic, int percent, int* const percent_store);
+
+// in analysis.c
+// Main analysis loop. Decides the segmentations and complexity.
+// Assigns a first guess for Intra16 and uvmode_ prediction modes.
+int VP8EncAnalyze(VP8Encoder* const& enc);
+// Main analyze using opencl
+int VP8EncAnalyzeOcl(VP8Encoder* const enc);
+
+// in quant.c
+// Sets up segment's quantization values, base_quant_ and filter strengths.
+void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
+// Pick best modes and fills the levels. Returns true if skipped.
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, VP8RDLevel rd_opt);
+
+// in alpha.c
+void VP8EncInitAlpha(VP8Encoder* const enc);  // initialize alpha compression
+int VP8EncStartAlpha(VP8Encoder* const enc);  // start alpha coding process
+int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data
+int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
+
+// in filter.c
+
+// SSIM utils
+typedef struct { double w, xm, ym, xxm, xym, yym; } DistoStats;
+void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst);
+void VP8SSIMAccumulatePlane(
+    const uint8_t* src1, int stride1, const uint8_t* src2, int stride2, int W, int H, DistoStats* const stats);
+double VP8SSIMGet(const DistoStats* const stats);
+double VP8SSIMGetSquaredError(const DistoStats* const stats);
+
+// autofilter
+void VP8InitFilter(VP8EncIterator* const it);
+void VP8StoreFilterStats(VP8EncIterator* const it);
+void VP8AdjustFilterStrength(VP8EncIterator* const it);
+void VP8AdjustFilterStrengthOcl(VP8Encoder* const enc);
+
+// returns the approximate filtering strength needed to smooth a edge
+// step of 'delta', given a sharpness parameter 'sharpness'.
+int VP8FilterStrengthFromDelta(int sharpness, int delta);
+
+// misc utils for picture_*.c:
+
+// Remove reference to the ARGB/YUVA buffer (doesn't free anything).
+void WebPPictureResetBuffers(WebPPicture* const picture);
+
+// Allocates ARGB buffer of given dimension (previous one is always free'd).
+// Preserves the YUV(A) buffer. Returns false in case of error (invalid param,
+// out-of-memory).
+int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
+
+// Allocates YUVA buffer of given dimension (previous one is always free'd).
+// Uses picture->csp to determine whether an alpha buffer is needed.
+// Preserves the ARGB buffer.
+// Returns false in case of error (invalid param, out-of-memory).
+int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
+
+// Clean-up the RGB samples under fully transparent area, to help lossless
+// compressibility (no guarantee, though). Assumes that pic->use_argb is true.
+void WebPCleanupTransparentAreaLossless(WebPPicture* const pic);
+
+// in near_lossless.c
+// Near lossless preprocessing in RGB color-space.
+int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality);
+// Near lossless preprocessing in RGB color-space.
+int VP8ApplyNearLosslessOcl(int xsize, int ysize, uint32_t* argb, int quality);
+// Near lossless adjustment for predictors.
+void VP8ApplyNearLosslessPredict(int xsize,
+                                 int ysize,
+                                 int pred_bits,
+                                 const uint32_t* argb_orig,
+                                 uint32_t* argb,
+                                 uint32_t* argb_scratch,
+                                 const uint32_t* const transform_data,
+                                 int quality,
+                                 int subtract_green);
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_ENC_VP8ENCI_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/enc/vp8enci_ryanw.h b/codec/L2/demos/webpEnc/host/src/enc/vp8enci_ryanw.h
new file mode 100644
index 0000000000..8dac45704b
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/vp8enci_ryanw.h
@@ -0,0 +1,569 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   WebP encoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_ENC_VP8ENCI_H_
+#define WEBP_ENC_VP8ENCI_H_
+
+#include <string.h> // for memcpy()
+#include "../dec/common.h"
+#include "../dsp/dsp.h"
+#include "../utils/bit_writer.h"
+#include "../utils/thread.h"
+#include "../utils/utils.h"
+#include "../webp/encode.h"
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "./vp8li.h"
+#endif // WEBP_EXPERIMENTAL_FEATURES
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Various defines and enums
+
+// version numbers
+#define ENC_MAJ_VERSION 0
+#define ENC_MIN_VERSION 5
+#define ENC_REV_VERSION 0
+
+enum {
+    MAX_LF_LEVELS = 64,      // Maximum loop filter level
+    MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
+    MAX_LEVEL = 2047         // max level (note: max codable is 2047 + 67)
+};
+
+typedef enum {             // Rate-distortion optimization levels
+    RD_OPT_NONE = 0,       // no rd-opt
+    RD_OPT_BASIC = 1,      // basic scoring (no trellis)
+    RD_OPT_TRELLIS = 2,    // perform trellis-quant on the final decision only
+    RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)
+} VP8RDLevel;
+
+// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
+// The original or reconstructed samples can be accessed using VP8Scan[].
+// The predicted blocks can be accessed using offsets to yuv_p_ and
+// the arrays VP8*ModeOffsets[].
+// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_)
+//   (see VP8Scan[] for accessing the blocks, along with
+//   Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC):
+//             +----+----+
+//  Y_OFF_ENC  |YYYY|UUVV|
+//  U_OFF_ENC  |YYYY|UUVV|
+//  V_OFF_ENC  |YYYY|....| <- 25% wasted U/V area
+//             |YYYY|....|
+//             +----+----+
+// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC)
+//   Intra16 predictions (16x16 block each, two per row):
+//         |I16DC16|I16TM16|
+//         |I16VE16|I16HE16|
+//   Chroma U/V predictions (16x8 block each, two per row):
+//         |C8DC8|C8TM8|
+//         |C8VE8|C8HE8|
+//   Intra 4x4 predictions (4x4 block each)
+//         |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|
+//         |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted
+#define YUV_SIZE_ENC (BPS * 16)
+#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds
+#define Y_OFF_ENC (0)
+#define U_OFF_ENC (16)
+#define V_OFF_ENC (16 + 8)
+
+extern const int VP8Scan[16];         // in quant.c
+extern const int VP8UVModeOffsets[4]; // in analyze.c
+extern const int VP8I16ModeOffsets[4];
+extern const int VP8I4ModeOffsets[NUM_BMODES];
+
+// Layout of prediction blocks
+// intra 16x16
+#define I16DC16 (0 * 16 * BPS)
+#define I16TM16 (I16DC16 + 16)
+#define I16VE16 (1 * 16 * BPS)
+#define I16HE16 (I16VE16 + 16)
+// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
+#define C8DC8 (2 * 16 * BPS)
+#define C8TM8 (C8DC8 + 1 * 16)
+#define C8VE8 (2 * 16 * BPS + 8 * BPS)
+#define C8HE8 (C8VE8 + 1 * 16)
+// intra 4x4
+#define I4DC4 (3 * 16 * BPS + 0)
+#define I4TM4 (I4DC4 + 4)
+#define I4VE4 (I4DC4 + 8)
+#define I4HE4 (I4DC4 + 12)
+#define I4RD4 (I4DC4 + 16)
+#define I4VR4 (I4DC4 + 20)
+#define I4LD4 (I4DC4 + 24)
+#define I4VL4 (I4DC4 + 28)
+#define I4HD4 (3 * 16 * BPS + 4 * BPS)
+#define I4HU4 (I4HD4 + 4)
+#define I4TMP (I4HD4 + 8)
+
+typedef int64_t score_t; // type used for scores, rate, distortion
+// Note that MAX_COST is not the maximum allowed by sizeof(score_t),
+// in order to allow overflowing computations.
+#define MAX_COST ((score_t)0x7fffffffffffffLL)
+
+#define QFIX 17
+#define BIAS(b) ((b) << (QFIX - 8))
+// Fun fact: this is the _only_ line where we're actually being lossy and
+// discarding bits.
+static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
+    return (int)((n * iQ + B) >> QFIX);
+}
+
+// Uncomment the following to remove token-buffer code:
+// #define DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// Headers
+
+typedef uint32_t proba_t; // 16b + 16b
+typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
+typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
+typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
+typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting
+typedef const uint16_t* CostArrayMap[16][NUM_CTX];
+typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
+
+typedef struct VP8Encoder VP8Encoder;
+
+// segment features
+typedef struct {
+    int num_segments_; // Actual number of segments. 1 segment only = unused.
+    int update_map_;   // whether to update the segment map or not.
+                       // must be 0 if there's only 1 segment.
+    int size_;         // bit-cost for transmitting the segment map
+} VP8EncSegmentHeader;
+
+// Struct collecting all frame-persistent probabilities.
+typedef struct {
+    uint8_t segments_[3];                        // probabilities for segment tree
+    uint8_t skip_proba_;                         // final probability of being skipped.
+    ProbaArray coeffs_[NUM_TYPES][NUM_BANDS];    // 1056 bytes
+    StatsArray stats_[NUM_TYPES][NUM_BANDS];     // 4224 bytes
+    CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes
+    CostArrayMap remapped_costs_[NUM_TYPES];     // 1536 bytes
+    int dirty_;                                  // if true, need to call VP8CalculateLevelCosts()
+    int use_skip_proba_;                         // Note: we always use skip_proba for now.
+    int nb_skip_;                                // number of skipped blocks
+} VP8EncProba;
+
+// Filter parameters. Not actually used in the code (we don't perform
+// the in-loop filtering), but filled from user's config
+typedef struct {
+    int simple_;        // filtering type: 0=complex, 1=simple
+    int level_;         // base filter level [0..63]
+    int sharpness_;     // [0..7]
+    int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16
+} VP8EncFilterHeader;
+
+//------------------------------------------------------------------------------
+// Informations about the macroblocks.
+
+typedef struct {
+    // block type
+    unsigned int type_ : 2; // 0=i4x4, 1=i16x16
+    unsigned int uv_mode_ : 2;
+    unsigned int skip_ : 1;
+    unsigned int segment_ : 2;
+    uint8_t alpha_; // quantization-susceptibility
+} VP8MBInfo;
+
+typedef struct VP8Matrix {
+    uint16_t q_[16];       // quantizer steps
+    uint16_t iq_[16];      // reciprocals, fixed point.
+    uint32_t bias_[16];    // rounding bias
+    uint32_t zthresh_[16]; // value below which a coefficient is zeroed
+    uint16_t sharpen_[16]; // frequency boosters for slight sharpening
+} VP8Matrix;
+
+typedef struct {
+    VP8Matrix y1_, y2_, uv_; // quantization matrices
+    int alpha_;              // quant-susceptibility, range [-127,127]. Zero is neutral.
+                             // Lower values indicate a lower risk of blurriness.
+    int beta_;               // filter-susceptibility, range [0,255].
+    int quant_;              // final segment quantizer.
+    int fstrength_;          // final in-loop filtering strength
+    int max_edge_;           // max edge delta (for filtering strength)
+    int min_disto_;          // minimum distortion required to trigger filtering record
+    // reactivities
+    int lambda_i16_, lambda_i4_, lambda_uv_;
+    int lambda_mode_, lambda_trellis_, tlambda_;
+    int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
+} VP8SegmentInfo;
+
+// Handy transient struct to accumulate score and info during RD-optimization
+// and mode evaluation.
+typedef struct {
+    score_t D, SD;           // Distortion, spectral distortion
+    score_t H, R, score;     // header bits, rate, score.
+    int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma.
+    int16_t y_ac_levels[16][16];
+    int16_t uv_levels[4 + 4][16];
+    int mode_i16;         // mode number for intra16 prediction
+    uint8_t modes_i4[16]; // mode numbers for intra4 predictions
+    int mode_uv;          // mode number of chroma prediction
+    uint32_t nz;          // non-zero blocks
+} VP8ModeScore;
+
+// Iterator structure to iterate through macroblocks, pointing to the
+// right neighbouring data (samples, predictions, contexts, ...)
+typedef struct {
+    int x_, y_;                // current macroblock
+    int y_stride_, uv_stride_; // respective strides
+    uint8_t* yuv_in_;          // input samples
+    uint8_t* yuv_out_;         // output samples
+    uint8_t* yuv_out2_;        // secondary buffer swapped with yuv_out_.
+    uint8_t* yuv_p_;           // scratch buffer for prediction
+    VP8Encoder* enc_;          // back-pointer
+    VP8MBInfo* mb_;            // current macroblock
+    VP8BitWriter* bw_;         // current bit-writer
+    uint8_t* preds_;           // intra mode predictors (4x4 blocks)
+    uint32_t* nz_;             // non-zero pattern
+    uint8_t i4_boundary_[37];  // 32+5 boundary samples needed by intra4x4
+    uint8_t* i4_top_;          // pointer to the current top boundary sample
+    int i4_;                   // current intra4x4 mode being tested
+    int top_nz_[9];            // top-non-zero context.
+    int left_nz_[9];           // left-non-zero. left_nz[8] is independent.
+    uint64_t bit_count_[4][3]; // bit counters for coded levels.
+    uint64_t luma_bits_;       // macroblock bit-cost for luma
+    uint64_t uv_bits_;         // macroblock bit-cost for chroma
+    LFStats* lf_stats_;        // filter stats (borrowed from enc_)
+    int do_trellis_;           // if true, perform extra level optimisation
+    int count_down_;           // number of mb still to be processed
+    int count_down0_;          // starting counter value (for progress)
+    int percent0_;             // saved initial progress percent
+
+    uint8_t* y_left_; // left luma samples (addressable from index -1 to 15).
+    uint8_t* u_left_; // left u samples (addressable from index -1 to 7)
+    uint8_t* v_left_; // left v samples (addressable from index -1 to 7)
+
+    uint8_t* y_top_;  // top luma samples at position 'x_'
+    uint8_t* uv_top_; // top u/v samples at position 'x_', packed as 16 bytes
+
+    // memory for storing y/u/v_left_
+    uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST];
+    // memory for yuv_*
+    uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST];
+} VP8EncIterator;
+
+// in iterator.c
+// must be called first
+void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
+// restart a scan
+void VP8IteratorReset(VP8EncIterator* const it);
+// reset iterator position to row 'y'
+void VP8IteratorSetRow(VP8EncIterator* const it, int y);
+// set count down (=number of iterations to go)
+void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down);
+// return true if iteration is finished
+int VP8IteratorIsDone(const VP8EncIterator* const it);
+// Import uncompressed samples from source.
+// If tmp_32 is not NULL, import boundary samples too.
+// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.
+void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32);
+// export decimated samples
+void VP8IteratorExport(const VP8EncIterator* const it);
+// go to next macroblock. Returns false if not finished.
+int VP8IteratorNext(VP8EncIterator* const it);
+// save the yuv_out_ boundary values to top_/left_ arrays for next iterations.
+void VP8IteratorSaveBoundary(VP8EncIterator* const it);
+// Report progression based on macroblock rows. Return 0 for user-abort request.
+int VP8IteratorProgress(const VP8EncIterator* const it, int final_delta_percent);
+// Intra4x4 iterations
+void VP8IteratorStartI4(VP8EncIterator* const it);
+// returns true if not done.
+int VP8IteratorRotateI4(VP8EncIterator* const it, const uint8_t* const yuv_out);
+
+// Non-zero context setup/teardown
+void VP8IteratorNzToBytes(VP8EncIterator* const it);
+void VP8IteratorBytesToNz(VP8EncIterator* const it);
+
+// Helper functions to set mode properties
+void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
+void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);
+void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
+void VP8SetSkip(const VP8EncIterator* const it, int skip);
+void VP8SetSegment(const VP8EncIterator* const it, int segment);
+
+//------------------------------------------------------------------------------
+// Paginated token buffer
+
+typedef struct VP8Tokens VP8Tokens; // struct details in token.c
+
+#define TOKEN_RECONSTRUCT 0
+#define PAGE_COUNT 30
+#define TOKENS_COUNT_PER_PAGE 155040 // 615600 //155040 //mb_w * mb_h * 4 * scale
+#define TOKENS_SIZE 2                // sizeof(token_t)
+#define PAGE_POINTER_SIZE 8          // sizeof(*page)
+#define PAGE_SIZE ((TOKENS_COUNT_PER_PAGE * TOKENS_SIZE) + PAGE_POINTER_SIZE)
+typedef struct {
+    // VP8Tokens* pages_;        // first page
+    // VP8Tokens** last_page_;   // last page
+    uint16_t tokens_[PAGE_COUNT * TOKENS_COUNT_PER_PAGE];
+    int cur_page_;
+    int page_count_;
+    int left_;      // how many free tokens left before the page is full
+    int page_size_; // number of tokens per page
+    int error_;     // true in case of malloc error
+} VP8TBufferKernel;
+
+typedef struct {
+#if !defined(DISABLE_TOKEN_BUFFER)
+    VP8Tokens* pages_;      // first page
+    VP8Tokens** last_page_; // last page
+    uint16_t* tokens_;      // set to (*last_page_)->tokens_
+    int left_;              // how many free tokens left before the page is full
+    int page_size_;         // number of tokens per page
+#endif
+    int error_; // true in case of malloc error
+} VP8TBuffer;
+
+void debug_tokens(VP8TBuffer* const b, VP8EncIterator* const it_var);
+// initialize an empty buffer
+void VP8TBufferInit(VP8TBuffer* const b, int page_size);
+void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory
+
+void VP8TBufferKernelInit(VP8TBufferKernel* const b, int page_size);
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+// Finalizes bitstream when probabilities are known.
+// Deletes the allocated token memory if final_pass is true.
+int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, const uint8_t* const probas, int final_pass);
+
+#if TOKEN_RECONSTRUCT
+// record the coding of coefficients without knowing the probabilities yet
+int VP8RecordCoeffTokens(const int ctx,
+                         const int coeff_type,
+                         int first,
+                         int last,
+                         const int16_t* const coeffs,
+                         VP8TBufferKernel* const tokens);
+#else
+int VP8RecordCoeffTokens(
+    const int ctx, const int coeff_type, int first, int last, const int16_t* const coeffs, VP8TBuffer* const tokens);
+
+#endif
+void ReadTokenFromKernel(VP8TBuffer* const tokens, VP8TBufferKernel* const tokens_kernel);
+
+// Estimate the final coded size given a set of 'probas'.
+size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas);
+
+// unused for now
+void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
+
+#endif // !DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// VP8Encoder
+
+struct VP8Encoder {
+    const WebPConfig* config_; // user configuration and parameters
+    WebPPicture* pic_;         // input / output picture
+
+    // headers
+    VP8EncFilterHeader filter_hdr_;   // filtering information
+    VP8EncSegmentHeader segment_hdr_; // segment information
+
+    int profile_; // VP8's profile, deduced from Config.
+
+    // dimension, in macroblock units.
+    int mb_w_, mb_h_;
+    int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1)
+
+    // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
+    int num_parts_;
+
+    // per-partition boolean decoders.
+    VP8BitWriter bw_;                        // part0
+    VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
+    VP8TBuffer tokens_;                      // token buffer
+
+    int percent_; // for progress
+
+    // transparency blob
+    int has_alpha_;
+    uint8_t* alpha_data_; // non-NULL if transparency is present
+    uint32_t alpha_data_size_;
+    WebPWorker alpha_worker_;
+
+    // quantization info (one set of DC/AC dequant factor per segment)
+    VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
+    int base_quant_; // nominal quantizer value. Only used
+                     // for relative coding of segments' quant.
+    int alpha_;      // global susceptibility (<=> complexity)
+    int uv_alpha_;   // U/V quantization susceptibility
+    // global offset of quantizers, shared by all segments
+    int dq_y1_dc_;
+    int dq_y2_dc_, dq_y2_ac_;
+    int dq_uv_dc_, dq_uv_ac_;
+
+    // probabilities and statistics
+    VP8EncProba proba_;
+    uint64_t sse_[4];    // sum of Y/U/V/A squared errors for all macroblocks
+    uint64_t sse_count_; // pixel count for the sse_[] stats
+    int coded_size_;
+    int residual_bytes_[3][4];
+    int block_count_[3];
+
+    // quality/speed settings
+    int method_;              // 0=fastest, 6=best/slowest.
+    VP8RDLevel rd_opt_level_; // Deduced from method_.
+    int max_i4_header_bits_;  // partition #0 safeness factor
+    int thread_level_;        // derived from config->thread_level
+    int do_search_;           // derived from config->target_XXX
+    int use_tokens_;          // if true, use token buffer
+
+    // Memory
+    VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
+    uint8_t* preds_;     // predictions modes: (4*mb_w+1) * (4*mb_h+1)
+    uint32_t* nz_;       // non-zero bit context: mb_w+1
+    uint8_t* y_top_;     // top luma samples.
+    uint8_t* uv_top_;    // top u/v samples.
+                         // U and V are packed into 16 bytes (8 U + 8 V)
+    LFStats* lf_stats_;  // autofilter stats (if NULL, autofilter is off)
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// in tree.c
+extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+extern const uint8_t VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+// Reset the token probabilities to their initial (default) values
+void VP8DefaultProbas(VP8Encoder* const enc);
+// Write the token probabilities
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas);
+// Writes the partition #0 modes (that is: all intra modes)
+void VP8CodeIntraModes(VP8Encoder* const enc);
+
+// in syntax.c
+// Generates the final bitstream by coding the partition0 and headers,
+// and appending an assembly of all the pre-coded token partitions.
+// Return true if everything is ok.
+int VP8EncWrite(VP8Encoder* const enc);
+// Release memory allocated for bit-writing in VP8EncLoop & seq.
+void VP8EncFreeBitWriters(VP8Encoder* const enc);
+
+// in frame.c
+extern const uint8_t VP8Cat3[];
+extern const uint8_t VP8Cat4[];
+extern const uint8_t VP8Cat5[];
+extern const uint8_t VP8Cat6[];
+
+// Form all the four Intra16x16 predictions in the yuv_p_ cache
+void VP8MakeLuma16Preds(const VP8EncIterator* const it);
+// Form all the four Chroma8x8 predictions in the yuv_p_ cache
+void VP8MakeChroma8Preds(const VP8EncIterator* const it);
+// Form all the ten Intra4x4 predictions in the yuv_p_ cache
+// for the 4x4 block it->i4_
+void VP8MakeIntra4Preds(const VP8EncIterator* const it);
+// Rate calculation
+int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
+int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
+int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
+// Main coding calls
+int VP8EncLoop(VP8Encoder* const enc);
+int VP8EncLoopOcl(VP8Encoder* const enc);
+int VP8EncTokenLoop(VP8Encoder* const enc);
+int VP8EncTokenLoop_ryanw_k(VP8Encoder* const& enc);
+int VP8EncTokenLoop_ryanw(VP8Encoder* const enc);
+
+// in webpenc.c
+// Assign an error code to a picture. Return false for convenience.
+int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);
+int WebPReportProgress(const WebPPicture* const pic, int percent, int* const percent_store);
+
+// in analysis.c
+// Main analysis loop. Decides the segmentations and complexity.
+// Assigns a first guess for Intra16 and uvmode_ prediction modes.
+int VP8EncAnalyze(VP8Encoder* const enc);
+// Main analyze using opencl
+int VP8EncAnalyzeOcl(VP8Encoder* const enc);
+
+// in quant.c
+// Sets up segment's quantization values, base_quant_ and filter strengths.
+void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
+// Pick best modes and fills the levels. Returns true if skipped.
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, VP8RDLevel rd_opt);
+
+// in alpha.c
+void VP8EncInitAlpha(VP8Encoder* const enc);  // initialize alpha compression
+int VP8EncStartAlpha(VP8Encoder* const enc);  // start alpha coding process
+int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data
+int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
+
+// in filter.c
+
+// SSIM utils
+typedef struct { double w, xm, ym, xxm, xym, yym; } DistoStats;
+void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst);
+void VP8SSIMAccumulatePlane(
+    const uint8_t* src1, int stride1, const uint8_t* src2, int stride2, int W, int H, DistoStats* const stats);
+double VP8SSIMGet(const DistoStats* const stats);
+double VP8SSIMGetSquaredError(const DistoStats* const stats);
+
+// autofilter
+void VP8InitFilter(VP8EncIterator* const it);
+void VP8StoreFilterStats(VP8EncIterator* const it);
+void VP8AdjustFilterStrength(VP8EncIterator* const it);
+void VP8AdjustFilterStrengthOcl(VP8Encoder* const enc);
+
+// returns the approximate filtering strength needed to smooth a edge
+// step of 'delta', given a sharpness parameter 'sharpness'.
+int VP8FilterStrengthFromDelta(int sharpness, int delta);
+
+// misc utils for picture_*.c:
+
+// Remove reference to the ARGB/YUVA buffer (doesn't free anything).
+void WebPPictureResetBuffers(WebPPicture* const picture);
+
+// Allocates ARGB buffer of given dimension (previous one is always free'd).
+// Preserves the YUV(A) buffer. Returns false in case of error (invalid param,
+// out-of-memory).
+int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
+
+// Allocates YUVA buffer of given dimension (previous one is always free'd).
+// Uses picture->csp to determine whether an alpha buffer is needed.
+// Preserves the ARGB buffer.
+// Returns false in case of error (invalid param, out-of-memory).
+int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
+
+// Clean-up the RGB samples under fully transparent area, to help lossless
+// compressibility (no guarantee, though). Assumes that pic->use_argb is true.
+void WebPCleanupTransparentAreaLossless(WebPPicture* const pic);
+
+// in near_lossless.c
+// Near lossless preprocessing in RGB color-space.
+int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality);
+// Near lossless preprocessing in RGB color-space.
+int VP8ApplyNearLosslessOcl(int xsize, int ysize, uint32_t* argb, int quality);
+// Near lossless adjustment for predictors.
+void VP8ApplyNearLosslessPredict(int xsize,
+                                 int ysize,
+                                 int pred_bits,
+                                 const uint32_t* argb_orig,
+                                 uint32_t* argb,
+                                 uint32_t* argb_scratch,
+                                 const uint32_t* const transform_data,
+                                 int quality,
+                                 int subtract_green);
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_ENC_VP8ENCI_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/enc/vp8l.c b/codec/L2/demos/webpEnc/host/src/enc/vp8l.c
new file mode 100644
index 0000000000..345c5825c0
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/vp8l.c
@@ -0,0 +1,1552 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// main entry for the lossless encoder.
+//
+// Author: Vikas Arora (vikaas.arora@gmail.com)
+//
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "./vp8enci.h"
+#include "./vp8li.h"
+#include "../dsp/lossless.h"
+#include "../utils/bit_writer.h"
+#include "../utils/huffman_encode.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+#include "../utils/profiling.h"
+
+#include "./delta_palettization.h"
+
+#define PALETTE_KEY_RIGHT_SHIFT 22 // Key for 1K buffer.
+// Maximum number of histogram images (sub-blocks).
+#define MAX_HUFF_IMAGE_SIZE 2600
+
+// Palette reordering for smaller sum of deltas (and for smaller storage).
+
+static int PaletteCompareColorsForQsort(const void* p1, const void* p2) {
+    const uint32_t a = WebPMemToUint32(p1);
+    const uint32_t b = WebPMemToUint32(p2);
+    assert(a != b);
+    return (a < b) ? -1 : 1;
+}
+
+static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) {
+    return (v <= 128) ? v : (256 - v);
+}
+
+// Computes a value that is related to the entropy created by the
+// palette entry diff.
+//
+// Note that the last & 0xff is a no-operation in the next statement, but
+// removed by most compilers and is here only for regularity of the code.
+static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) {
+    const uint32_t diff = VP8LSubPixels(col1, col2);
+    const int kMoreWeightForRGBThanForAlpha = 9;
+    uint32_t score;
+    score = PaletteComponentDistance((diff >> 0) & 0xff);
+    score += PaletteComponentDistance((diff >> 8) & 0xff);
+    score += PaletteComponentDistance((diff >> 16) & 0xff);
+    score *= kMoreWeightForRGBThanForAlpha;
+    score += PaletteComponentDistance((diff >> 24) & 0xff);
+    return score;
+}
+
+static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) {
+    const uint32_t tmp = *col1;
+    *col1 = *col2;
+    *col2 = tmp;
+}
+
+static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) {
+    // Find greedily always the closest color of the predicted color to minimize
+    // deltas in the palette. This reduces storage needs since the
+    // palette is stored with delta encoding.
+    uint32_t predict = 0x00000000;
+    int i, k;
+    for (i = 0; i < num_colors; ++i) {
+        int best_ix = i;
+        uint32_t best_score = ~0U;
+        for (k = i; k < num_colors; ++k) {
+            const uint32_t cur_score = PaletteColorDistance(palette[k], predict);
+            if (best_score > cur_score) {
+                best_score = cur_score;
+                best_ix = k;
+            }
+        }
+        SwapColor(&palette[best_ix], &palette[i]);
+        predict = palette[i];
+    }
+}
+
+// The palette has been sorted by alpha. This function checks if the other
+// components of the palette have a monotonic development with regards to
+// position in the palette. If all have monotonic development, there is
+// no benefit to re-organize them greedily. A monotonic development
+// would be spotted in green-only situations (like lossy alpha) or gray-scale
+// images.
+static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) {
+    uint32_t predict = 0x000000;
+    int i;
+    uint8_t sign_found = 0x00;
+    for (i = 0; i < num_colors; ++i) {
+        const uint32_t diff = VP8LSubPixels(palette[i], predict);
+        const uint8_t rd = (diff >> 16) & 0xff;
+        const uint8_t gd = (diff >> 8) & 0xff;
+        const uint8_t bd = (diff >> 0) & 0xff;
+        if (rd != 0x00) {
+            sign_found |= (rd < 0x80) ? 1 : 2;
+        }
+        if (gd != 0x00) {
+            sign_found |= (gd < 0x80) ? 8 : 16;
+        }
+        if (bd != 0x00) {
+            sign_found |= (bd < 0x80) ? 64 : 128;
+        }
+        predict = palette[i];
+    }
+    return (sign_found & (sign_found << 1)) != 0; // two consequent signs.
+}
+
+// -----------------------------------------------------------------------------
+// Palette
+
+// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE,
+// creates a palette and returns true, else returns false.
+static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
+                                   int low_effort,
+                                   uint32_t palette[MAX_PALETTE_SIZE],
+                                   int* const palette_size) {
+    int i, x, y, key;
+    int num_colors = 0;
+    uint8_t in_use[MAX_PALETTE_SIZE * 4] = {0};
+    uint32_t colors[MAX_PALETTE_SIZE * 4];
+    static const uint32_t kHashMul = 0x1e35a7bd;
+    const uint32_t* argb = pic->argb;
+    const int width = pic->width;
+    const int height = pic->height;
+    uint32_t last_pix = ~argb[0]; // so we're sure that last_pix != argb[0]
+
+    for (y = 0; y < height; ++y) {
+        for (x = 0; x < width; ++x) {
+            if (argb[x] == last_pix) {
+                continue;
+            }
+            last_pix = argb[x];
+            key = (kHashMul * last_pix) >> PALETTE_KEY_RIGHT_SHIFT;
+            while (1) {
+                if (!in_use[key]) {
+                    colors[key] = last_pix;
+                    in_use[key] = 1;
+                    ++num_colors;
+                    if (num_colors > MAX_PALETTE_SIZE) {
+                        return 0;
+                    }
+                    break;
+                } else if (colors[key] == last_pix) {
+                    // The color is already there.
+                    break;
+                } else {
+                    // Some other color sits there.
+                    // Do linear conflict resolution.
+                    ++key;
+                    key &= (MAX_PALETTE_SIZE * 4 - 1); // key mask for 1K buffer.
+                }
+            }
+        }
+        argb += pic->argb_stride;
+    }
+
+    // TODO(skal): could we reuse in_use[] to speed up EncodePalette()?
+    num_colors = 0;
+    for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) {
+        if (in_use[i]) {
+            palette[num_colors] = colors[i];
+            ++num_colors;
+        }
+    }
+    *palette_size = num_colors;
+    qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort);
+    if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) {
+        GreedyMinimizeDeltas(palette, num_colors);
+    }
+    return 1;
+}
+
+// These five modes are evaluated and their respective entropy is computed.
+typedef enum {
+    kDirect = 0,
+    kSpatial = 1,
+    kSubGreen = 2,
+    kSpatialSubGreen = 3,
+    kPalette = 4,
+    kNumEntropyIx = 5
+} EntropyIx;
+
+typedef enum {
+    kHistoAlpha = 0,
+    kHistoAlphaPred,
+    kHistoGreen,
+    kHistoGreenPred,
+    kHistoRed,
+    kHistoRedPred,
+    kHistoBlue,
+    kHistoBluePred,
+    kHistoRedSubGreen,
+    kHistoRedPredSubGreen,
+    kHistoBlueSubGreen,
+    kHistoBluePredSubGreen,
+    kHistoPalette,
+    kHistoTotal // Must be last.
+} HistoIx;
+
+static void AddSingleSubGreen(uint32_t p, uint32_t* r, uint32_t* b) {
+    const uint32_t green = p >> 8; // The upper bits are masked away later.
+    ++r[((p >> 16) - green) & 0xff];
+    ++b[(p - green) & 0xff];
+}
+
+static void AddSingle(uint32_t p, uint32_t* a, uint32_t* r, uint32_t* g, uint32_t* b) {
+    ++a[p >> 24];
+    ++r[(p >> 16) & 0xff];
+    ++g[(p >> 8) & 0xff];
+    ++b[(p & 0xff)];
+}
+
+static int AnalyzeEntropy(const uint32_t* argb,
+                          int width,
+                          int height,
+                          int argb_stride,
+                          int use_palette,
+                          EntropyIx* const min_entropy_ix,
+                          int* const red_and_blue_always_zero) {
+    // Allocate histogram set with cache_bits = 0.
+    uint32_t* const histo = (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256);
+    if (histo != NULL) {
+        int i, x, y;
+        const uint32_t* prev_row = argb;
+        const uint32_t* curr_row = argb + argb_stride;
+        for (y = 1; y < height; ++y) {
+            uint32_t prev_pix = curr_row[0];
+            for (x = 1; x < width; ++x) {
+                const uint32_t pix = curr_row[x];
+                const uint32_t pix_diff = VP8LSubPixels(pix, prev_pix);
+                if ((pix_diff == 0) || (pix == prev_row[x])) continue;
+                prev_pix = pix;
+                AddSingle(pix, &histo[kHistoAlpha * 256], &histo[kHistoRed * 256], &histo[kHistoGreen * 256],
+                          &histo[kHistoBlue * 256]);
+                AddSingle(pix_diff, &histo[kHistoAlphaPred * 256], &histo[kHistoRedPred * 256],
+                          &histo[kHistoGreenPred * 256], &histo[kHistoBluePred * 256]);
+                AddSingleSubGreen(pix, &histo[kHistoRedSubGreen * 256], &histo[kHistoBlueSubGreen * 256]);
+                AddSingleSubGreen(pix_diff, &histo[kHistoRedPredSubGreen * 256], &histo[kHistoBluePredSubGreen * 256]);
+                {
+                    // Approximate the palette by the entropy of the multiplicative hash.
+                    const int hash = ((pix + (pix >> 19)) * 0x39c5fba7) >> 24;
+                    ++histo[kHistoPalette * 256 + (hash & 0xff)];
+                }
+            }
+            prev_row = curr_row;
+            curr_row += argb_stride;
+        }
+        {
+            double entropy_comp[kHistoTotal];
+            double entropy[kNumEntropyIx];
+            EntropyIx k;
+            EntropyIx last_mode_to_analyze = use_palette ? kPalette : kSpatialSubGreen;
+            int j;
+            // Let's add one zero to the predicted histograms. The zeros are removed
+            // too efficiently by the pix_diff == 0 comparison, at least one of the
+            // zeros is likely to exist.
+            ++histo[kHistoRedPredSubGreen * 256];
+            ++histo[kHistoBluePredSubGreen * 256];
+            ++histo[kHistoRedPred * 256];
+            ++histo[kHistoGreenPred * 256];
+            ++histo[kHistoBluePred * 256];
+            ++histo[kHistoAlphaPred * 256];
+
+            for (j = 0; j < kHistoTotal; ++j) {
+                entropy_comp[j] = VP8LBitsEntropy(&histo[j * 256], 256, NULL);
+            }
+            entropy[kDirect] = entropy_comp[kHistoAlpha] + entropy_comp[kHistoRed] + entropy_comp[kHistoGreen] +
+                               entropy_comp[kHistoBlue];
+            entropy[kSpatial] = entropy_comp[kHistoAlphaPred] + entropy_comp[kHistoRedPred] +
+                                entropy_comp[kHistoGreenPred] + entropy_comp[kHistoBluePred];
+            entropy[kSubGreen] = entropy_comp[kHistoAlpha] + entropy_comp[kHistoRedSubGreen] +
+                                 entropy_comp[kHistoGreen] + entropy_comp[kHistoBlueSubGreen];
+            entropy[kSpatialSubGreen] = entropy_comp[kHistoAlphaPred] + entropy_comp[kHistoRedPredSubGreen] +
+                                        entropy_comp[kHistoGreenPred] + entropy_comp[kHistoBluePredSubGreen];
+            // Palette mode seems more efficient in a breakeven case. Bias with 1.0.
+            entropy[kPalette] = entropy_comp[kHistoPalette] - 1.0;
+
+            *min_entropy_ix = kDirect;
+
+            int k_int = k;
+            for (k_int = kDirect + 1; k_int <= last_mode_to_analyze; ++k_int) {
+                if (entropy[*min_entropy_ix] > entropy[k_int]) {
+                    *min_entropy_ix = k_int;
+                }
+            }
+            k = k_int;
+
+            *red_and_blue_always_zero = 1;
+            // Let's check if the histogram of the chosen entropy mode has
+            // non-zero red and blue values. If all are zero, we can later skip
+            // the cross color optimization.
+            {
+                static const uint8_t kHistoPairs[5][2] = {{kHistoRed, kHistoBlue},
+                                                          {kHistoRedPred, kHistoBluePred},
+                                                          {kHistoRedSubGreen, kHistoBlueSubGreen},
+                                                          {kHistoRedPredSubGreen, kHistoBluePredSubGreen},
+                                                          {kHistoRed, kHistoBlue}};
+                const uint32_t* const red_histo = &histo[256 * kHistoPairs[*min_entropy_ix][0]];
+                const uint32_t* const blue_histo = &histo[256 * kHistoPairs[*min_entropy_ix][1]];
+                for (i = 1; i < 256; ++i) {
+                    if ((red_histo[i] | blue_histo[i]) != 0) {
+                        *red_and_blue_always_zero = 0;
+                        break;
+                    }
+                }
+            }
+        }
+        free(histo);
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static int GetHistoBits(int method, int use_palette, int width, int height) {
+    // Make tile size a function of encoding method (Range: 0 to 6).
+    int histo_bits = (use_palette ? 9 : 7) - method;
+    while (1) {
+        const int huff_image_size = VP8LSubSampleSize(width, histo_bits) * VP8LSubSampleSize(height, histo_bits);
+        if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break;
+        ++histo_bits;
+    }
+    return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS
+                                           : (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
+}
+
+static int GetTransformBits(int method, int histo_bits) {
+    const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5;
+    return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits;
+}
+
+static int AnalyzeAndInit(VP8LEncoder* const enc) {
+    const WebPPicture* const pic = enc->pic_;
+    const int width = pic->width;
+    const int height = pic->height;
+    const int pix_cnt = width * height;
+    const WebPConfig* const config = enc->config_;
+    const int method = config->method;
+    const int low_effort = (config->method == 0);
+    // we round the block size up, so we're guaranteed to have
+    // at max MAX_REFS_BLOCK_PER_IMAGE blocks used:
+    int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1;
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    assert(pic != NULL && pic->argb != NULL);
+
+    enc->use_cross_color_ = 0;
+    enc->use_predict_ = 0;
+    enc->use_subtract_green_ = 0;
+    enc->use_palette_ = AnalyzeAndCreatePalette(pic, low_effort, enc->palette_, &enc->palette_size_);
+
+    // TODO(jyrki): replace the decision to be based on an actual estimate
+    // of entropy, or even spatial variance of entropy.
+    enc->histo_bits_ = GetHistoBits(method, enc->use_palette_, pic->width, pic->height);
+    enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_);
+
+    if (low_effort) {
+        // AnalyzeEntropy is somewhat slow.
+        enc->use_predict_ = !enc->use_palette_;
+        enc->use_subtract_green_ = !enc->use_palette_;
+        enc->use_cross_color_ = 0;
+    } else {
+        int red_and_blue_always_zero;
+        EntropyIx min_entropy_ix;
+        if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, enc->use_palette_, &min_entropy_ix,
+                            &red_and_blue_always_zero)) {
+            return 0;
+        }
+        enc->use_palette_ = (min_entropy_ix == kPalette);
+        enc->use_subtract_green_ = (min_entropy_ix == kSubGreen) || (min_entropy_ix == kSpatialSubGreen);
+        enc->use_predict_ = (min_entropy_ix == kSpatial) || (min_entropy_ix == kSpatialSubGreen);
+        enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_;
+    }
+
+    if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0;
+
+    // palette-friendly input typically uses less literals
+    //  -> reduce block size a bit
+    if (enc->use_palette_) refs_block_size /= 2;
+    VP8LBackwardRefsInit(&enc->refs_[0], refs_block_size);
+    VP8LBackwardRefsInit(&enc->refs_[1], refs_block_size);
+
+    return 1;
+}
+
+// Returns false in case of memory error.
+static int GetHuffBitLengthsAndCodes(const VP8LHistogramSet* const histogram_image,
+                                     HuffmanTreeCode* const huffman_codes) {
+    int i, k;
+    int ok = 0;
+    uint64_t total_length_size = 0;
+    uint8_t* mem_buf = NULL;
+    const int histogram_image_size = histogram_image->size;
+    int max_num_symbols = 0;
+    uint8_t* buf_rle = NULL;
+    HuffmanTree* huff_tree = NULL;
+
+    // Iterate over all histograms and get the aggregate number of codes used.
+    for (i = 0; i < histogram_image_size; ++i) {
+        const VP8LHistogram* const histo = histogram_image->histograms[i];
+        HuffmanTreeCode* const codes = &huffman_codes[5 * i];
+        for (k = 0; k < 5; ++k) {
+            const int num_symbols =
+                (k == 0) ? VP8LHistogramNumCodes(histo->palette_code_bits_) : (k == 4) ? NUM_DISTANCE_CODES : 256;
+            codes[k].num_symbols = num_symbols;
+            total_length_size += num_symbols;
+        }
+    }
+
+    // Allocate and Set Huffman codes.
+    {
+        uint16_t* codes;
+        uint8_t* lengths;
+        mem_buf = (uint8_t*)WebPSafeCalloc(total_length_size, sizeof(*lengths) + sizeof(*codes));
+        if (mem_buf == NULL) goto End;
+
+        codes = (uint16_t*)mem_buf;
+        lengths = (uint8_t*)&codes[total_length_size];
+        for (i = 0; i < 5 * histogram_image_size; ++i) {
+            const int bit_length = huffman_codes[i].num_symbols;
+            huffman_codes[i].codes = codes;
+            huffman_codes[i].code_lengths = lengths;
+            codes += bit_length;
+            lengths += bit_length;
+            if (max_num_symbols < bit_length) {
+                max_num_symbols = bit_length;
+            }
+        }
+    }
+
+    buf_rle = (uint8_t*)WebPSafeMalloc(1ULL, max_num_symbols);
+    huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * max_num_symbols, sizeof(*huff_tree));
+    if (buf_rle == NULL || huff_tree == NULL) goto End;
+
+    // Create Huffman trees.
+    for (i = 0; i < histogram_image_size; ++i) {
+        HuffmanTreeCode* const codes = &huffman_codes[5 * i];
+        VP8LHistogram* const histo = histogram_image->histograms[i];
+        VP8LCreateHuffmanTree(histo->literal_, 15, buf_rle, huff_tree, codes + 0);
+        VP8LCreateHuffmanTree(histo->red_, 15, buf_rle, huff_tree, codes + 1);
+        VP8LCreateHuffmanTree(histo->blue_, 15, buf_rle, huff_tree, codes + 2);
+        VP8LCreateHuffmanTree(histo->alpha_, 15, buf_rle, huff_tree, codes + 3);
+        VP8LCreateHuffmanTree(histo->distance_, 15, buf_rle, huff_tree, codes + 4);
+    }
+    ok = 1;
+End:
+    WebPSafeFree(huff_tree);
+    WebPSafeFree(buf_rle);
+    if (!ok) {
+        WebPSafeFree(mem_buf);
+        memset(huffman_codes, 0, 5 * histogram_image_size * sizeof(*huffman_codes));
+    }
+    return ok;
+}
+
+static void StoreHuffmanTreeOfHuffmanTreeToBitMask(VP8LBitWriter* const bw, const uint8_t* code_length_bitdepth) {
+    // RFC 1951 will calm you down if you are worried about this funny sequence.
+    // This sequence is tuned from that, but more weighted for lower symbol count,
+    // and more spiking histograms.
+    static const uint8_t kStorageOrder[CODE_LENGTH_CODES] = {17, 18, 0, 1,  2,  3,  4,  5,  16, 6,
+                                                             7,  8,  9, 10, 11, 12, 13, 14, 15};
+    int i;
+    // Throw away trailing zeros:
+    int codes_to_store = CODE_LENGTH_CODES;
+    for (; codes_to_store > 4; --codes_to_store) {
+        if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+            break;
+        }
+    }
+    VP8LPutBits(bw, codes_to_store - 4, 4);
+    for (i = 0; i < codes_to_store; ++i) {
+        VP8LPutBits(bw, code_length_bitdepth[kStorageOrder[i]], 3);
+    }
+}
+
+static void ClearHuffmanTreeIfOnlyOneSymbol(HuffmanTreeCode* const huffman_code) {
+    int k;
+    int count = 0;
+    for (k = 0; k < huffman_code->num_symbols; ++k) {
+        if (huffman_code->code_lengths[k] != 0) {
+            ++count;
+            if (count > 1) return;
+        }
+    }
+    for (k = 0; k < huffman_code->num_symbols; ++k) {
+        huffman_code->code_lengths[k] = 0;
+        huffman_code->codes[k] = 0;
+    }
+}
+
+static void StoreHuffmanTreeToBitMask(VP8LBitWriter* const bw,
+                                      const HuffmanTreeToken* const tokens,
+                                      const int num_tokens,
+                                      const HuffmanTreeCode* const huffman_code) {
+    int i;
+    for (i = 0; i < num_tokens; ++i) {
+        const int ix = tokens[i].code;
+        const int extra_bits = tokens[i].extra_bits;
+        VP8LPutBits(bw, huffman_code->codes[ix], huffman_code->code_lengths[ix]);
+        switch (ix) {
+            case 16:
+                VP8LPutBits(bw, extra_bits, 2);
+                break;
+            case 17:
+                VP8LPutBits(bw, extra_bits, 3);
+                break;
+            case 18:
+                VP8LPutBits(bw, extra_bits, 7);
+                break;
+        }
+    }
+}
+
+// 'huff_tree' and 'tokens' are pre-alloacted buffers.
+static void StoreFullHuffmanCode(VP8LBitWriter* const bw,
+                                 HuffmanTree* const huff_tree,
+                                 HuffmanTreeToken* const tokens,
+                                 const HuffmanTreeCode* const tree) {
+    uint8_t code_length_bitdepth[CODE_LENGTH_CODES] = {0};
+    uint16_t code_length_bitdepth_symbols[CODE_LENGTH_CODES] = {0};
+    const int max_tokens = tree->num_symbols;
+    int num_tokens;
+    HuffmanTreeCode huffman_code;
+    huffman_code.num_symbols = CODE_LENGTH_CODES;
+    huffman_code.code_lengths = code_length_bitdepth;
+    huffman_code.codes = code_length_bitdepth_symbols;
+
+    VP8LPutBits(bw, 0, 1);
+    num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens);
+    {
+        uint32_t histogram[CODE_LENGTH_CODES] = {0};
+        uint8_t buf_rle[CODE_LENGTH_CODES] = {0};
+        int i;
+        for (i = 0; i < num_tokens; ++i) {
+            ++histogram[tokens[i].code];
+        }
+
+        VP8LCreateHuffmanTree(histogram, 7, buf_rle, huff_tree, &huffman_code);
+    }
+
+    StoreHuffmanTreeOfHuffmanTreeToBitMask(bw, code_length_bitdepth);
+    ClearHuffmanTreeIfOnlyOneSymbol(&huffman_code);
+    {
+        int trailing_zero_bits = 0;
+        int trimmed_length = num_tokens;
+        int write_trimmed_length;
+        int length;
+        int i = num_tokens;
+        while (i-- > 0) {
+            const int ix = tokens[i].code;
+            if (ix == 0 || ix == 17 || ix == 18) {
+                --trimmed_length; // discount trailing zeros
+                trailing_zero_bits += code_length_bitdepth[ix];
+                if (ix == 17) {
+                    trailing_zero_bits += 3;
+                } else if (ix == 18) {
+                    trailing_zero_bits += 7;
+                }
+            } else {
+                break;
+            }
+        }
+        write_trimmed_length = (trimmed_length > 1 && trailing_zero_bits > 12);
+        length = write_trimmed_length ? trimmed_length : num_tokens;
+        VP8LPutBits(bw, write_trimmed_length, 1);
+        if (write_trimmed_length) {
+            const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1);
+            const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
+            VP8LPutBits(bw, nbitpairs - 1, 3);
+            assert(trimmed_length >= 2);
+            VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2);
+        }
+        StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code);
+    }
+}
+
+// 'huff_tree' and 'tokens' are pre-alloacted buffers.
+static void StoreHuffmanCode(VP8LBitWriter* const bw,
+                             HuffmanTree* const huff_tree,
+                             HuffmanTreeToken* const tokens,
+                             const HuffmanTreeCode* const huffman_code) {
+    int i;
+    int count = 0;
+    int symbols[2] = {0, 0};
+    const int kMaxBits = 8;
+    const int kMaxSymbol = 1 << kMaxBits;
+
+    // Check whether it's a small tree.
+    for (i = 0; i < huffman_code->num_symbols && count < 3; ++i) {
+        if (huffman_code->code_lengths[i] != 0) {
+            if (count < 2) symbols[count] = i;
+            ++count;
+        }
+    }
+
+    if (count == 0) { // emit minimal tree for empty cases
+        // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
+        VP8LPutBits(bw, 0x01, 4);
+    } else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) {
+        VP8LPutBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols.
+        VP8LPutBits(bw, count - 1, 1);
+        if (symbols[0] <= 1) {
+            VP8LPutBits(bw, 0, 1); // Code bit for small (1 bit) symbol value.
+            VP8LPutBits(bw, symbols[0], 1);
+        } else {
+            VP8LPutBits(bw, 1, 1);
+            VP8LPutBits(bw, symbols[0], 8);
+        }
+        if (count == 2) {
+            VP8LPutBits(bw, symbols[1], 8);
+        }
+    } else {
+        StoreFullHuffmanCode(bw, huff_tree, tokens, huffman_code);
+    }
+}
+
+static WEBP_INLINE void WriteHuffmanCode(VP8LBitWriter* const bw, const HuffmanTreeCode* const code, int code_index) {
+    const int depth = code->code_lengths[code_index];
+    const int symbol = code->codes[code_index];
+    VP8LPutBits(bw, symbol, depth);
+}
+
+static WEBP_INLINE void WriteHuffmanCodeWithExtraBits(
+    VP8LBitWriter* const bw, const HuffmanTreeCode* const code, int code_index, int bits, int n_bits) {
+    const int depth = code->code_lengths[code_index];
+    const int symbol = code->codes[code_index];
+    VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits);
+}
+
+static WebPEncodingError StoreImageToBitMask(VP8LBitWriter* const bw,
+                                             int width,
+                                             int histo_bits,
+                                             VP8LBackwardRefs* const refs,
+                                             const uint16_t* histogram_symbols,
+                                             const HuffmanTreeCode* const huffman_codes) {
+    const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
+    const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits);
+    // x and y trace the position in the image.
+    int x = 0;
+    int y = 0;
+    int tile_x = x & tile_mask;
+    int tile_y = y & tile_mask;
+    int histogram_ix = histogram_symbols[0];
+    const HuffmanTreeCode* codes = huffman_codes + 5 * histogram_ix;
+    VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+    while (VP8LRefsCursorOk(&c)) {
+        const PixOrCopy* const v = c.cur_pos;
+        if ((tile_x != (x & tile_mask)) || (tile_y != (y & tile_mask))) {
+            tile_x = x & tile_mask;
+            tile_y = y & tile_mask;
+            histogram_ix = histogram_symbols[(y >> histo_bits) * histo_xsize + (x >> histo_bits)];
+            codes = huffman_codes + 5 * histogram_ix;
+        }
+        if (PixOrCopyIsLiteral(v)) {
+            static const int order[] = {1, 2, 0, 3};
+            int k;
+            for (k = 0; k < 4; ++k) {
+                const int code = PixOrCopyLiteral(v, order[k]);
+                WriteHuffmanCode(bw, codes + k, code);
+            }
+        } else if (PixOrCopyIsCacheIdx(v)) {
+            const int code = PixOrCopyCacheIdx(v);
+            const int literal_ix = 256 + NUM_LENGTH_CODES + code;
+            WriteHuffmanCode(bw, codes, literal_ix);
+        } else {
+            int bits, n_bits;
+            int code;
+
+            const int distance = PixOrCopyDistance(v);
+            VP8LPrefixEncode(v->len, &code, &n_bits, &bits);
+            WriteHuffmanCodeWithExtraBits(bw, codes, 256 + code, bits, n_bits);
+
+            // Don't write the distance with the extra bits code since
+            // the distance can be up to 18 bits of extra bits, and the prefix
+            // 15 bits, totaling to 33, and our PutBits only supports up to 32 bits.
+            // TODO(jyrki): optimize this further.
+            VP8LPrefixEncode(distance, &code, &n_bits, &bits);
+            WriteHuffmanCode(bw, codes + 4, code);
+            VP8LPutBits(bw, bits, n_bits);
+        }
+        x += PixOrCopyLength(v);
+        while (x >= width) {
+            x -= width;
+            ++y;
+        }
+        VP8LRefsCursorNext(&c);
+    }
+    return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK;
+}
+
+// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31
+static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
+                                              const uint32_t* const argb,
+                                              VP8LHashChain* const hash_chain,
+                                              VP8LBackwardRefs refs_array[2],
+                                              int width,
+                                              int height,
+                                              int quality) {
+    int i;
+    int max_tokens = 0;
+    WebPEncodingError err = VP8_ENC_OK;
+    VP8LBackwardRefs* refs;
+    HuffmanTreeToken* tokens = NULL;
+    HuffmanTreeCode huffman_codes[5] = {{0, NULL, NULL}};
+    const uint16_t histogram_symbols[1] = {0}; // only one tree, one symbol
+    int cache_bits = 0;
+    VP8LHistogramSet* histogram_image = NULL;
+    HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
+    if (huff_tree == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    // Calculate backward references from ARGB image.
+    refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits, hash_chain, refs_array);
+    if (refs == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+    histogram_image = VP8LAllocateHistogramSet(1, cache_bits);
+    if (histogram_image == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    // Build histogram image and symbols from backward references.
+    VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]);
+
+    // Create Huffman bit lengths and codes for each histogram image.
+    assert(histogram_image->size == 1);
+    if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    // No color cache, no Huffman image.
+    VP8LPutBits(bw, 0, 1);
+
+    // Find maximum number of symbols for the huffman tree-set.
+    for (i = 0; i < 5; ++i) {
+        HuffmanTreeCode* const codes = &huffman_codes[i];
+        if (max_tokens < codes->num_symbols) {
+            max_tokens = codes->num_symbols;
+        }
+    }
+
+    tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
+    if (tokens == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    // Store Huffman codes.
+    for (i = 0; i < 5; ++i) {
+        HuffmanTreeCode* const codes = &huffman_codes[i];
+        StoreHuffmanCode(bw, huff_tree, tokens, codes);
+        ClearHuffmanTreeIfOnlyOneSymbol(codes);
+    }
+
+    // Store actual literals.
+    err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, huffman_codes);
+
+Error:
+    WebPSafeFree(tokens);
+    WebPSafeFree(huff_tree);
+    VP8LFreeHistogramSet(histogram_image);
+    WebPSafeFree(huffman_codes[0].codes);
+    return err;
+}
+
+static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
+                                             const uint32_t* const argb,
+                                             VP8LHashChain* const hash_chain,
+                                             VP8LBackwardRefs refs_array[2],
+                                             int width,
+                                             int height,
+                                             int quality,
+                                             int low_effort,
+                                             int* cache_bits,
+                                             int histogram_bits,
+                                             size_t init_byte_position,
+                                             int* const hdr_size,
+                                             int* const data_size,
+                                             int number_thread) {
+    WebPEncodingError err = VP8_ENC_OK;
+    const uint32_t histogram_image_xysize =
+        VP8LSubSampleSize(width, histogram_bits) * VP8LSubSampleSize(height, histogram_bits);
+    VP8LHistogramSet* histogram_image = NULL;
+    VP8LHistogramSet* tmp_histos = NULL;
+    int histogram_image_size = 0;
+    size_t bit_array_size = 0;
+    HuffmanTree* huff_tree = NULL;
+    HuffmanTreeToken* tokens = NULL;
+    HuffmanTreeCode* huffman_codes = NULL;
+    VP8LBackwardRefs refs;
+    VP8LBackwardRefs* best_refs;
+    StopProfilingWatch stop_watch;
+    uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_symbols));
+    assert(histogram_bits >= MIN_HUFFMAN_BITS);
+    assert(histogram_bits <= MAX_HUFFMAN_BITS);
+    assert(hdr_size != NULL);
+    assert(data_size != NULL);
+
+    VP8LBackwardRefsInit(&refs, refs_array[0].block_size_);
+    if (histogram_symbols == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    *cache_bits = MAX_COLOR_CACHE_BITS;
+    // 'best_refs' is the reference to the best backward refs and points to one
+    // of refs_array[0] or refs_array[1].
+    // Calculate backward references from ARGB image.
+    best_refs = VP8LGetBackwardReferences(width, height, argb, quality, low_effort, cache_bits, hash_chain, refs_array);
+    if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+    histogram_image = VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits);
+    tmp_histos = VP8LAllocateHistogramSet(2, *cache_bits);
+    if (histogram_image == NULL || tmp_histos == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    // Build histogram image and symbols from backward references.
+    if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort, histogram_bits, *cache_bits,
+                                  histogram_image, tmp_histos, histogram_symbols, number_thread)) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+    // Create Huffman bit lengths and codes for each histogram image.
+    histogram_image_size = histogram_image->size;
+    bit_array_size = 5 * histogram_image_size;
+    huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, sizeof(*huffman_codes));
+    // Note: some histogram_image entries may point to tmp_histos[], so the latter
+    // need to outlive the following call to GetHuffBitLengthsAndCodes().
+    if (huffman_codes == NULL || !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+    // Free combined histograms.
+    VP8LFreeHistogramSet(histogram_image);
+    histogram_image = NULL;
+
+    // Free scratch histograms.
+    VP8LFreeHistogramSet(tmp_histos);
+    tmp_histos = NULL;
+
+    // Color Cache parameters.
+    if (*cache_bits > 0) {
+        VP8LPutBits(bw, 1, 1);
+        VP8LPutBits(bw, *cache_bits, 4);
+    } else {
+        VP8LPutBits(bw, 0, 1);
+    }
+
+    // Huffman image + meta huffman.
+    {
+        const int write_histogram_image = (histogram_image_size > 1);
+        VP8LPutBits(bw, write_histogram_image, 1);
+        if (write_histogram_image) {
+            uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_argb));
+            int max_index = 0;
+            uint32_t i;
+            if (histogram_argb == NULL) {
+                err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+                goto Error;
+            }
+            for (i = 0; i < histogram_image_xysize; ++i) {
+                const int symbol_index = histogram_symbols[i] & 0xffff;
+                histogram_argb[i] = (symbol_index << 8);
+                if (symbol_index >= max_index) {
+                    max_index = symbol_index + 1;
+                }
+            }
+            histogram_image_size = max_index;
+
+            VP8LPutBits(bw, histogram_bits - 2, 3);
+            StartProfiling(&stop_watch);
+            err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array,
+                                       VP8LSubSampleSize(width, histogram_bits),
+                                       VP8LSubSampleSize(height, histogram_bits), quality);
+            StopProfiling(&stop_watch, &timeEncode, &countEncode);
+            WebPSafeFree(histogram_argb);
+            if (err != VP8_ENC_OK) goto Error;
+        }
+    }
+
+    // Store Huffman codes.
+    {
+        int i;
+        int max_tokens = 0;
+        huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
+        if (huff_tree == NULL) {
+            err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+            goto Error;
+        }
+        // Find maximum number of symbols for the huffman tree-set.
+        for (i = 0; i < 5 * histogram_image_size; ++i) {
+            HuffmanTreeCode* const codes = &huffman_codes[i];
+            if (max_tokens < codes->num_symbols) {
+                max_tokens = codes->num_symbols;
+            }
+        }
+        tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
+        if (tokens == NULL) {
+            err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+            goto Error;
+        }
+        for (i = 0; i < 5 * histogram_image_size; ++i) {
+            HuffmanTreeCode* const codes = &huffman_codes[i];
+            StoreHuffmanCode(bw, huff_tree, tokens, codes);
+            ClearHuffmanTreeIfOnlyOneSymbol(codes);
+        }
+    }
+
+    *hdr_size = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
+    // Store actual literals.
+    err = StoreImageToBitMask(bw, width, histogram_bits, &refs, histogram_symbols, huffman_codes);
+    *data_size = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size);
+
+Error:
+    WebPSafeFree(tokens);
+    WebPSafeFree(huff_tree);
+    VP8LFreeHistogramSet(histogram_image);
+    VP8LFreeHistogramSet(tmp_histos);
+    VP8LBackwardRefsClear(&refs);
+    if (huffman_codes != NULL) {
+        WebPSafeFree(huffman_codes->codes);
+        WebPSafeFree(huffman_codes);
+    }
+    WebPSafeFree(histogram_symbols);
+    return err;
+}
+
+// -----------------------------------------------------------------------------
+// Transforms
+
+static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, VP8LBitWriter* const bw) {
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+    VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+    VP8LPutBits(bw, SUBTRACT_GREEN, 2);
+    VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
+    StopProfiling(&stop_watch, &timeApplySubtractGreen, &countApplySubtractGreen);
+}
+
+static WebPEncodingError ApplyPredictFilter(
+    const VP8LEncoder* const enc, int width, int height, int quality, int low_effort, VP8LBitWriter* const bw) {
+    const int pred_bits = enc->transform_bits_;
+    const int transform_width = VP8LSubSampleSize(width, pred_bits);
+    const int transform_height = VP8LSubSampleSize(height, pred_bits);
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_, enc->argb_scratch_, enc->transform_data_,
+                      enc->config_->exact, enc->config_->use_ocl);
+    VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+    VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
+    assert(pred_bits >= 2);
+    VP8LPutBits(bw, pred_bits - 2, 3);
+    StopProfiling(&stop_watch, &timeEncPredict, &countEncPredict);
+    return EncodeImageNoHuffman(bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
+                                (VP8LBackwardRefs*)enc->refs_, // cast const away
+                                transform_width, transform_height, quality);
+}
+
+static WebPEncodingError ApplyCrossColorFilter(
+    const VP8LEncoder* const enc, int width, int height, int quality, VP8LBitWriter* const bw) {
+    const int ccolor_transform_bits = enc->transform_bits_;
+    const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
+    const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
+    StopProfilingWatch stop_watch;
+    StartProfiling(&stop_watch);
+
+    VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, enc->argb_, enc->transform_data_);
+    VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+    VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2);
+    assert(ccolor_transform_bits >= 2);
+    VP8LPutBits(bw, ccolor_transform_bits - 2, 3);
+    StopProfiling(&stop_watch, &timeEncColorFilt, &countEncColorFilt);
+    return EncodeImageNoHuffman(bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
+                                (VP8LBackwardRefs*)enc->refs_, // cast const away
+                                transform_width, transform_height, quality);
+}
+
+// -----------------------------------------------------------------------------
+
+static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic, size_t riff_size, size_t vp8l_size) {
+    uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = {
+        'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P', 'V', 'P', '8', 'L', 0, 0, 0, 0, VP8L_MAGIC_BYTE,
+    };
+    PutLE32(riff + TAG_SIZE, (uint32_t)riff_size);
+    PutLE32(riff + RIFF_HEADER_SIZE + TAG_SIZE, (uint32_t)vp8l_size);
+    if (!pic->writer(riff, sizeof(riff), pic)) {
+        return VP8_ENC_ERROR_BAD_WRITE;
+    }
+    return VP8_ENC_OK;
+}
+
+static int WriteImageSize(const WebPPicture* const pic, VP8LBitWriter* const bw) {
+    const int width = pic->width - 1;
+    const int height = pic->height - 1;
+    assert(width < WEBP_MAX_DIMENSION && height < WEBP_MAX_DIMENSION);
+
+    VP8LPutBits(bw, width, VP8L_IMAGE_SIZE_BITS);
+    VP8LPutBits(bw, height, VP8L_IMAGE_SIZE_BITS);
+    return !bw->error_;
+}
+
+static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) {
+    VP8LPutBits(bw, has_alpha, 1);
+    VP8LPutBits(bw, VP8L_VERSION, VP8L_VERSION_BITS);
+    return !bw->error_;
+}
+
+static WebPEncodingError WriteImage(const WebPPicture* const pic, VP8LBitWriter* const bw, size_t* const coded_size) {
+    WebPEncodingError err = VP8_ENC_OK;
+    const uint8_t* const webpll_data = VP8LBitWriterFinish(bw);
+    const size_t webpll_size = VP8LBitWriterNumBytes(bw);
+    const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size;
+    const size_t pad = vp8l_size & 1;
+    const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad;
+
+    err = WriteRiffHeader(pic, riff_size, vp8l_size);
+    if (err != VP8_ENC_OK) goto Error;
+
+    if (!pic->writer(webpll_data, webpll_size, pic)) {
+        err = VP8_ENC_ERROR_BAD_WRITE;
+        goto Error;
+    }
+
+    if (pad) {
+        const uint8_t pad_byte[1] = {0};
+        if (!pic->writer(pad_byte, 1, pic)) {
+            err = VP8_ENC_ERROR_BAD_WRITE;
+            goto Error;
+        }
+    }
+    *coded_size = CHUNK_HEADER_SIZE + riff_size;
+    return VP8_ENC_OK;
+
+Error:
+    return err;
+}
+
+// -----------------------------------------------------------------------------
+
+// Allocates the memory for argb (W x H) buffer, 2 rows of context for
+// prediction and transform data.
+// Flags influencing the memory allocated:
+//  enc->transform_bits_
+//  enc->use_predict_, enc->use_cross_color_
+static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, int width, int height) {
+    WebPEncodingError err = VP8_ENC_OK;
+    if (enc->argb_ == NULL) {
+        const int tile_size = 1 << enc->transform_bits_;
+        const uint64_t image_size = width * height;
+        // Ensure enough size for tiles, as well as for two scanlines and two
+        // extra pixels for CopyImageWithPrediction.
+        const uint64_t argb_scratch_size = enc->use_predict_ ? tile_size * width + width + 2 : 0;
+        const int transform_data_size =
+            (enc->use_predict_ || enc->use_cross_color_)
+                ? VP8LSubSampleSize(width, enc->transform_bits_) * VP8LSubSampleSize(height, enc->transform_bits_)
+                : 0;
+        const uint64_t total_size =
+            image_size + WEBP_ALIGN_CST + argb_scratch_size + WEBP_ALIGN_CST + (uint64_t)transform_data_size;
+        uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+        if (mem == NULL) {
+            err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+            goto Error;
+        }
+        enc->argb_ = mem;
+        mem = (uint32_t*)WEBP_ALIGN(mem + image_size);
+        enc->argb_scratch_ = mem;
+        mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size);
+        enc->transform_data_ = mem;
+        enc->current_width_ = width;
+    }
+Error:
+    return err;
+}
+
+static void ClearTransformBuffer(VP8LEncoder* const enc) {
+    WebPSafeFree(enc->argb_);
+    enc->argb_ = NULL;
+}
+
+static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) {
+    WebPEncodingError err = VP8_ENC_OK;
+    const WebPPicture* const picture = enc->pic_;
+    const int width = picture->width;
+    const int height = picture->height;
+    int y;
+    err = AllocateTransformBuffer(enc, width, height);
+    if (err != VP8_ENC_OK) return err;
+    for (y = 0; y < height; ++y) {
+        memcpy(enc->argb_ + y * width, picture->argb + y * picture->argb_stride, width * sizeof(*enc->argb_));
+    }
+    assert(enc->current_width_ == width);
+    return VP8_ENC_OK;
+}
+
+// -----------------------------------------------------------------------------
+
+static void MapToPalette(const uint32_t palette[],
+                         int num_colors,
+                         uint32_t* const last_pix,
+                         int* const last_idx,
+                         const uint32_t* src,
+                         uint8_t* dst,
+                         int width) {
+    int x;
+    int prev_idx = *last_idx;
+    uint32_t prev_pix = *last_pix;
+    for (x = 0; x < width; ++x) {
+        const uint32_t pix = src[x];
+        if (pix != prev_pix) {
+            int i;
+            for (i = 0; i < num_colors; ++i) {
+                if (pix == palette[i]) {
+                    prev_idx = i;
+                    prev_pix = pix;
+                    break;
+                }
+            }
+        }
+        dst[x] = prev_idx;
+    }
+    *last_idx = prev_idx;
+    *last_pix = prev_pix;
+}
+
+// Remap argb values in src[] to packed palettes entries in dst[]
+// using 'row' as a temporary buffer of size 'width'.
+// We assume that all src[] values have a corresponding entry in the palette.
+// Note: src[] can be the same as dst[]
+static WebPEncodingError ApplyPalette(const uint32_t* src,
+                                      uint32_t src_stride,
+                                      uint32_t* dst,
+                                      uint32_t dst_stride,
+                                      const uint32_t* palette,
+                                      int palette_size,
+                                      int width,
+                                      int height,
+                                      int xbits) {
+    // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be
+    // made to work in-place.
+    uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row));
+    int i, x, y;
+    int use_LUT = 1;
+
+    if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
+    for (i = 0; i < palette_size; ++i) {
+        if ((palette[i] & 0xffff00ffu) != 0) {
+            use_LUT = 0;
+            break;
+        }
+    }
+
+    if (use_LUT) {
+        uint8_t inv_palette[MAX_PALETTE_SIZE] = {0};
+        for (i = 0; i < palette_size; ++i) {
+            const int color = (palette[i] >> 8) & 0xff;
+            inv_palette[color] = i;
+        }
+        for (y = 0; y < height; ++y) {
+            for (x = 0; x < width; ++x) {
+                const int color = (src[x] >> 8) & 0xff;
+                tmp_row[x] = inv_palette[color];
+            }
+            VP8LBundleColorMap(tmp_row, width, xbits, dst);
+            src += src_stride;
+            dst += dst_stride;
+        }
+    } else {
+        // Use 1 pixel cache for ARGB pixels.
+        uint32_t last_pix = palette[0];
+        int last_idx = 0;
+        for (y = 0; y < height; ++y) {
+            MapToPalette(palette, palette_size, &last_pix, &last_idx, src, tmp_row, width);
+            VP8LBundleColorMap(tmp_row, width, xbits, dst);
+            src += src_stride;
+            dst += dst_stride;
+        }
+    }
+    WebPSafeFree(tmp_row);
+    return VP8_ENC_OK;
+}
+
+// Note: Expects "enc->palette_" to be set properly.
+static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, int in_place) {
+    WebPEncodingError err = VP8_ENC_OK;
+    const WebPPicture* const pic = enc->pic_;
+    const int width = pic->width;
+    const int height = pic->height;
+    const uint32_t* const palette = enc->palette_;
+    const uint32_t* src = in_place ? enc->argb_ : pic->argb;
+    const int src_stride = in_place ? enc->current_width_ : pic->argb_stride;
+    const int palette_size = enc->palette_size_;
+    int xbits;
+
+    // Replace each input pixel by corresponding palette index.
+    // This is done line by line.
+    if (palette_size <= 4) {
+        xbits = (palette_size <= 2) ? 3 : 2;
+    } else {
+        xbits = (palette_size <= 16) ? 1 : 0;
+    }
+
+    err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
+    if (err != VP8_ENC_OK) return err;
+
+    err = ApplyPalette(src, src_stride, enc->argb_, enc->current_width_, palette, palette_size, width, height, xbits);
+    return err;
+}
+
+// Save palette_[] to bitstream.
+static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, VP8LEncoder* const enc) {
+    int i;
+    uint32_t tmp_palette[MAX_PALETTE_SIZE];
+    const int palette_size = enc->palette_size_;
+    const uint32_t* const palette = enc->palette_;
+    VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+    VP8LPutBits(bw, COLOR_INDEXING_TRANSFORM, 2);
+    assert(palette_size >= 1 && palette_size <= MAX_PALETTE_SIZE);
+    VP8LPutBits(bw, palette_size - 1, 8);
+    for (i = palette_size - 1; i >= 1; --i) {
+        tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
+    }
+    tmp_palette[0] = palette[0];
+    return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, enc->refs_, palette_size, 1, 20 /* quality */);
+}
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+
+static WebPEncodingError EncodeDeltaPalettePredictorImage(VP8LBitWriter* const bw,
+                                                          VP8LEncoder* const enc,
+                                                          int quality) {
+    const WebPPicture* const pic = enc->pic_;
+    const int width = pic->width;
+    const int height = pic->height;
+
+    const int pred_bits = 5;
+    const int transform_width = VP8LSubSampleSize(width, pred_bits);
+    const int transform_height = VP8LSubSampleSize(height, pred_bits);
+    const int pred = 7; // default is Predictor7 (Top/Left Average)
+    const int tiles_per_row = VP8LSubSampleSize(width, pred_bits);
+    const int tiles_per_col = VP8LSubSampleSize(height, pred_bits);
+    uint32_t* predictors;
+    int tile_x, tile_y;
+    WebPEncodingError err = VP8_ENC_OK;
+
+    predictors = (uint32_t*)WebPSafeMalloc(tiles_per_col * tiles_per_row, sizeof(*predictors));
+    if (predictors == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
+
+    for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+        for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+            predictors[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
+        }
+    }
+
+    VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+    VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
+    VP8LPutBits(bw, pred_bits - 2, 3);
+    err = EncodeImageNoHuffman(bw, predictors, &enc->hash_chain_,
+                               (VP8LBackwardRefs*)enc->refs_, // cast const away
+                               transform_width, transform_height, quality);
+    WebPSafeFree(predictors);
+    return err;
+}
+
+#endif // WEBP_EXPERIMENTAL_FEATURES
+
+// -----------------------------------------------------------------------------
+// VP8LEncoder
+
+static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config, const WebPPicture* const picture) {
+    VP8LEncoder* const enc = (VP8LEncoder*)WebPSafeCalloc(1ULL, sizeof(*enc));
+    if (enc == NULL) {
+        WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+        return NULL;
+    }
+    enc->config_ = config;
+    enc->pic_ = picture;
+
+    VP8LEncDspInit();
+
+    return enc;
+}
+
+static void VP8LEncoderDelete(VP8LEncoder* enc) {
+    if (enc != NULL) {
+        VP8LHashChainClear(&enc->hash_chain_);
+        VP8LBackwardRefsClear(&enc->refs_[0]);
+        VP8LBackwardRefsClear(&enc->refs_[1]);
+        ClearTransformBuffer(enc);
+        WebPSafeFree(enc);
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Main call
+
+WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
+                                   const WebPPicture* const picture,
+                                   VP8LBitWriter* const bw) {
+    WebPEncodingError err = VP8_ENC_OK;
+    const int quality = (int)config->quality;
+    const int low_effort = (config->method == 0);
+    const int width = picture->width;
+    const int height = picture->height;
+    const int number_thread = config->thread_number;
+    VP8LEncoder* const enc = VP8LEncoderNew(config, picture);
+    const size_t byte_position = VP8LBitWriterNumBytes(bw);
+    int use_near_lossless = 0;
+    int hdr_size = 0;
+    int data_size = 0;
+    int use_delta_palettization = 0;
+    StopProfilingWatch stop_watch;
+
+    if (enc == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    // ---------------------------------------------------------------------------
+    // Analyze image (entropy, num_palettes etc)
+
+    if (!AnalyzeAndInit(enc)) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    StartProfiling(&stop_watch);
+    // Apply near-lossless preprocessing.
+    use_near_lossless = !enc->use_palette_ && (config->near_lossless < 100);
+    // if (config->use_ocl) {
+    //   if (use_near_lossless) {
+    //     int result;
+    //     result = VP8ApplyNearLosslessOcl(width, height, picture->argb,
+    //                                      config->near_lossless);
+    //     if (result == 0) {
+    //       err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    //       goto Error;
+    //     }
+    //     else if (result == -1) {
+    //       err = VP8_ENC_ERROR_OCL_FAILED;
+    //       goto Error;
+    //     }
+    //   }
+    // } else {
+    if (use_near_lossless) {
+        if (!VP8ApplyNearLossless(width, height, picture->argb, config->near_lossless)) {
+            err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+            goto Error;
+        }
+    }
+// }
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    if (config->delta_palettization) {
+        enc->use_predict_ = 1;
+        enc->use_cross_color_ = 0;
+        enc->use_subtract_green_ = 0;
+        enc->use_palette_ = 1;
+        err = MakeInputImageCopy(enc);
+        if (err != VP8_ENC_OK) goto Error;
+        err = WebPSearchOptimalDeltaPalette(enc);
+        if (err != VP8_ENC_OK) goto Error;
+        if (enc->use_palette_) {
+            err = AllocateTransformBuffer(enc, width, height);
+            if (err != VP8_ENC_OK) goto Error;
+            err = EncodeDeltaPalettePredictorImage(bw, enc, quality);
+            if (err != VP8_ENC_OK) goto Error;
+            use_delta_palettization = 1;
+        }
+    }
+#endif // WEBP_EXPERIMENTAL_FEATURES
+
+    // Encode palette
+    if (enc->use_palette_) {
+        err = EncodePalette(bw, enc);
+        if (err != VP8_ENC_OK) goto Error;
+        err = MapImageFromPalette(enc, use_delta_palettization);
+        if (err != VP8_ENC_OK) goto Error;
+    }
+    if (!use_delta_palettization) {
+        // In case image is not packed.
+        if (enc->argb_ == NULL) {
+            err = MakeInputImageCopy(enc);
+            if (err != VP8_ENC_OK) goto Error;
+        }
+
+        // -------------------------------------------------------------------------
+        // Apply transforms and write transform data.
+
+        if (enc->use_subtract_green_) {
+            ApplySubtractGreen(enc, enc->current_width_, height, bw);
+        }
+
+        if (enc->use_predict_) {
+            err = ApplyPredictFilter(enc, enc->current_width_, height, quality, low_effort, bw);
+            if (err != VP8_ENC_OK) goto Error;
+        }
+
+        if (enc->use_cross_color_) {
+            err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw);
+            if (err != VP8_ENC_OK) goto Error;
+        }
+    }
+
+    VP8LPutBits(bw, !TRANSFORM_PRESENT, 1); // No more transforms.
+    StopProfiling(&stop_watch, &timeApplyTransforms, &countApplyTransforms);
+
+    // ---------------------------------------------------------------------------
+    // Encode and write the transformed image.
+    err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, enc->current_width_, height, quality,
+                              low_effort, &enc->cache_bits_, enc->histo_bits_, byte_position, &hdr_size, &data_size,
+                              number_thread);
+    if (err != VP8_ENC_OK) goto Error;
+
+    if (picture->stats != NULL) {
+        WebPAuxStats* const stats = picture->stats;
+        stats->lossless_features = 0;
+        if (enc->use_predict_) stats->lossless_features |= 1;
+        if (enc->use_cross_color_) stats->lossless_features |= 2;
+        if (enc->use_subtract_green_) stats->lossless_features |= 4;
+        if (enc->use_palette_) stats->lossless_features |= 8;
+        stats->histogram_bits = enc->histo_bits_;
+        stats->transform_bits = enc->transform_bits_;
+        stats->cache_bits = enc->cache_bits_;
+        stats->palette_size = enc->palette_size_;
+        stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position);
+        stats->lossless_hdr_size = hdr_size;
+        stats->lossless_data_size = data_size;
+    }
+
+Error:
+    VP8LEncoderDelete(enc);
+    return err;
+}
+
+int VP8LEncodeImage(const WebPConfig* const config, const WebPPicture* const picture) {
+    int width, height;
+    int has_alpha;
+    size_t coded_size;
+    int percent = 0;
+    int initial_size;
+    WebPEncodingError err = VP8_ENC_OK;
+    VP8LBitWriter bw;
+
+    if (picture == NULL) return 0;
+
+    if (config == NULL || picture->argb == NULL) {
+        err = VP8_ENC_ERROR_NULL_PARAMETER;
+        WebPEncodingSetError(picture, err);
+        return 0;
+    }
+
+    width = picture->width;
+    height = picture->height;
+    // Initialize BitWriter with size corresponding to 16 bpp to photo images and
+    // 8 bpp for graphical images.
+    initial_size = (config->image_hint == WEBP_HINT_GRAPH) ? width * height : width * height * 2;
+    if (!VP8LBitWriterInit(&bw, initial_size)) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    if (!WebPReportProgress(picture, 1, &percent)) {
+    UserAbort:
+        err = VP8_ENC_ERROR_USER_ABORT;
+        goto Error;
+    }
+    // Reset stats (for pure lossless coding)
+    if (picture->stats != NULL) {
+        WebPAuxStats* const stats = picture->stats;
+        memset(stats, 0, sizeof(*stats));
+        stats->PSNR[0] = 99.f;
+        stats->PSNR[1] = 99.f;
+        stats->PSNR[2] = 99.f;
+        stats->PSNR[3] = 99.f;
+        stats->PSNR[4] = 99.f;
+    }
+
+    // Write image size.
+    if (!WriteImageSize(picture, &bw)) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    has_alpha = WebPPictureHasTransparency(picture);
+    // Write the non-trivial Alpha flag and lossless version.
+    if (!WriteRealAlphaAndVersion(&bw, has_alpha)) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+    }
+
+    if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort;
+
+    // Encode main image stream.
+    err = VP8LEncodeStream(config, picture, &bw);
+    if (err != VP8_ENC_OK) goto Error;
+
+    // TODO(skal): have a fine-grained progress report in VP8LEncodeStream().
+    if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort;
+
+    // Finish the RIFF chunk.
+    err = WriteImage(picture, &bw, &coded_size);
+    if (err != VP8_ENC_OK) goto Error;
+
+    if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort;
+
+    // Save size.
+    if (picture->stats != NULL) {
+        picture->stats->coded_size += (int)coded_size;
+        picture->stats->lossless_size = (int)coded_size;
+    }
+
+    if (picture->extra_info != NULL) {
+        const int mb_w = (width + 15) >> 4;
+        const int mb_h = (height + 15) >> 4;
+        memset(picture->extra_info, 0, mb_w * mb_h * sizeof(*picture->extra_info));
+    }
+
+Error:
+    if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    VP8LBitWriterWipeOut(&bw);
+    if (err != VP8_ENC_OK) {
+        WebPEncodingSetError(picture, err);
+        return 0;
+    }
+    return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/enc/vp8li.h b/codec/L2/demos/webpEnc/host/src/enc/vp8li.h
new file mode 100644
index 0000000000..f1499b0937
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/vp8li.h
@@ -0,0 +1,76 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Lossless encoder: internal header.
+//
+// Author: Vikas Arora (vikaas.arora@gmail.com)
+
+#ifndef WEBP_ENC_VP8LI_H_
+#define WEBP_ENC_VP8LI_H_
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../utils/bit_writer.h"
+#include "../webp/encode.h"
+#include "../webp/format_constants.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    const WebPConfig* config_; // user configuration and parameters
+    const WebPPicture* pic_;   // input picture.
+
+    uint32_t* argb_;           // Transformed argb image data.
+    uint32_t* argb_scratch_;   // Scratch memory for argb rows
+                               // (used for prediction).
+    uint32_t* transform_data_; // Scratch memory for transform data.
+    int current_width_;        // Corresponds to packed image width.
+
+    // Encoding parameters derived from quality parameter.
+    int histo_bits_;
+    int transform_bits_;
+    int cache_bits_; // If equal to 0, don't use color cache.
+
+    // Encoding parameters derived from image characteristics.
+    int use_cross_color_;
+    int use_subtract_green_;
+    int use_predict_;
+    int use_palette_;
+    int palette_size_;
+    uint32_t palette_[MAX_PALETTE_SIZE];
+
+    // Some 'scratch' (potentially large) objects.
+    struct VP8LBackwardRefs refs_[2]; // Backward Refs array corresponding to
+                                      // LZ77 & RLE coding.
+    VP8LHashChain hash_chain_;        // HashChain data for constructing
+                                      // backward references.
+} VP8LEncoder;
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// Encodes the picture.
+// Returns 0 if config or picture is NULL or picture doesn't have valid argb
+// input.
+int VP8LEncodeImage(const WebPConfig* const config, const WebPPicture* const picture);
+
+// Encodes the main image stream using the supplied bit writer.
+WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
+                                   const WebPPicture* const picture,
+                                   VP8LBitWriter* const bw);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_ENC_VP8LI_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/enc/webpenc.c b/codec/L2/demos/webpEnc/host/src/enc/webpenc.c
new file mode 100644
index 0000000000..f6c8ec2e90
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/enc/webpenc.c
@@ -0,0 +1,624 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebP encoder: main entry point
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "./cost.h"
+#include "./vp8enci.h"
+#include "./vp8li.h"
+#include "../utils/utils.h"
+#include "../utils/profiling.h"
+
+#include "kernel/oclHelper.h"
+#include "../../host/create_kernel.h"
+
+// #define PRINT_MEMORY_INFO
+
+// #define _WEBP_BLOCKRUN_
+
+#include "vp8_AsyncConfig.h"
+
+#ifdef PRINT_MEMORY_INFO
+#include <stdio.h>
+#endif
+
+void writevec2file(char* fpname, uint8_t* vec, int size) {
+    FILE* fp = fopen(fpname, "w");
+    for (int i = 0; i < size; i++) {
+        fprintf(fp, "%d ", vec[i]);
+    }
+    fprintf(fp, "\n\n");
+    fclose(fp);
+}
+
+//------------------------------------------------------------------------------
+
+int WebPGetEncoderVersion(void) {
+    return (ENC_MAJ_VERSION << 16) | (ENC_MIN_VERSION << 8) | ENC_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
+// VP8Encoder
+//------------------------------------------------------------------------------
+
+static void ResetSegmentHeader(VP8Encoder* const enc) {
+    VP8EncSegmentHeader* const hdr = &enc->segment_hdr_;
+    hdr->num_segments_ = enc->config_->segments;
+    hdr->update_map_ = (hdr->num_segments_ > 1);
+    hdr->size_ = 0;
+}
+
+static void ResetFilterHeader(VP8Encoder* const enc) {
+    VP8EncFilterHeader* const hdr = &enc->filter_hdr_;
+    hdr->simple_ = 1;
+    hdr->level_ = 0;
+    hdr->sharpness_ = 0;
+    hdr->i4x4_lf_delta_ = 0;
+}
+
+static void ResetBoundaryPredictions(VP8Encoder* const enc) {
+    // init boundary values once for all
+    // Note: actually, initializing the preds_[] is only needed for intra4.
+    int i;
+    uint8_t* const top = enc->preds_ - enc->preds_w_;
+    uint8_t* const left = enc->preds_ - 1;
+    for (i = -1; i < 4 * enc->mb_w_; ++i) {
+        top[i] = B_DC_PRED;
+    }
+    for (i = 0; i < 4 * enc->mb_h_; ++i) {
+        left[i * enc->preds_w_] = B_DC_PRED;
+    }
+    enc->nz_[-1] = 0; // constant
+}
+
+// Mapping from config->method_ to coding tools used.
+//-------------------+---+---+---+---+---+---+---+
+//   Method          | 0 | 1 | 2 | 3 |(4)| 5 | 6 |
+//-------------------+---+---+---+---+---+---+---+
+// fast probe        | x |   |   | x |   |   |   |
+//-------------------+---+---+---+---+---+---+---+
+// dynamic proba     | ~ | x | x | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// fast mode analysis|   |   |   |   | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// basic rd-opt      |   |   |   | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// disto-refine i4/16| x | x | x |   |   |   |   |
+//-------------------+---+---+---+---+---+---+---+
+// disto-refine uv   |   | x | x |   |   |   |   |
+//-------------------+---+---+---+---+---+---+---+
+// rd-opt i4/16      |   |   | ~ | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// token buffer (opt)|   |   |   | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// Trellis           |   |   |   |   |   | x |Ful|
+//-------------------+---+---+---+---+---+---+---+
+// full-SNS          |   |   |   |   | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+
+static void MapConfigToTools(VP8Encoder* const enc) {
+    const WebPConfig* const config = enc->config_;
+    const int method = config->method;
+    const int limit = 100 - config->partition_limit;
+    enc->method_ = method;
+    enc->rd_opt_level_ = (method >= 6) ? RD_OPT_TRELLIS_ALL
+                                       : (method >= 5) ? RD_OPT_TRELLIS : (method >= 3) ? RD_OPT_BASIC : RD_OPT_NONE;
+    enc->max_i4_header_bits_ = 256 * 16 * 16 *                // upper bound: up to 16bit per 4x4 block
+                               (limit * limit) / (100 * 100); // ... modulated with a quadratic curve.
+
+    enc->thread_level_ = config->thread_level;
+
+    enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0);
+    if (!config->low_memory) {
+#if !defined(DISABLE_TOKEN_BUFFER)
+        enc->use_tokens_ = (enc->rd_opt_level_ >= RD_OPT_BASIC); // need rd stats
+#endif
+        if (enc->use_tokens_) {
+            enc->num_parts_ = 1; // doesn't work with multi-partition
+        }
+    }
+}
+
+// Memory scaling with dimensions:
+//  memory (bytes) ~= 2.25 * w + 0.0625 * w * h
+//
+// Typical memory footprint (614x440 picture)
+//              encoder: 22111
+//                 info: 4368
+//                preds: 17741
+//          top samples: 1263
+//             non-zero: 175
+//             lf-stats: 0
+//                total: 45658
+// Transient object sizes:
+//       VP8EncIterator: 3360
+//         VP8ModeScore: 872
+//       VP8SegmentInfo: 732
+//          VP8EncProba: 18352
+//              LFStats: 2048
+// Picture size (yuv): 419328
+
+static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, WebPPicture* const picture) {
+    VP8Encoder* enc = new VP8Encoder;
+    const int use_filter = (config->filter_strength > 0) || (config->autofilter > 0);
+    const int mb_w = (picture->width + 15) >> 4;
+    const int mb_h = (picture->height + 15) >> 4;
+    const int preds_w = 4 * mb_w + 1;
+    const int preds_h = 4 * mb_h + 1;
+    const size_t preds_size = preds_w * preds_h * sizeof(*enc->preds_);
+    const int top_stride = mb_w * 16;
+    const size_t nz_size = (mb_w + 1) * sizeof(*enc->nz_) + WEBP_ALIGN_CST;
+    const size_t info_size = mb_w * mb_h * sizeof(*enc->mb_info_);
+    const size_t samples_size = 2 * top_stride * sizeof(*enc->y_top_) // top-luma/u/v
+                                + WEBP_ALIGN_CST;                     // align all
+    const size_t lf_stats_size = config->autofilter ? sizeof(*enc->lf_stats_) + WEBP_ALIGN_CST : 0;
+    uint8_t* mem;
+    const uint64_t size = (uint64_t)sizeof(*enc) // main struct
+                          + WEBP_ALIGN_CST       // cache alignment
+                          + info_size            // modes info
+                          + preds_size           // prediction modes
+                          + samples_size         // top/left samples
+                          + nz_size              // coeff context bits
+                          + lf_stats_size;       // autofilter stats
+
+#ifdef PRINT_MEMORY_INFO
+    printf("===================================\n");
+    printf(
+        "Memory used:\n"
+        "             encoder: %ld\n"
+        "                info: %ld\n"
+        "               preds: %ld\n"
+        "         top samples: %ld\n"
+        "            non-zero: %ld\n"
+        "            lf-stats: %ld\n"
+        "               total: %ld\n",
+        sizeof(*enc) + WEBP_ALIGN_CST, info_size, preds_size, samples_size, nz_size, lf_stats_size, size);
+    printf(
+        "Transient object sizes:\n"
+        "      VP8EncIterator: %ld\n"
+        "        VP8ModeScore: %ld\n"
+        "      VP8SegmentInfo: %ld\n"
+        "         VP8EncProba: %ld\n"
+        "             LFStats: %ld\n",
+        sizeof(VP8EncIterator), sizeof(VP8ModeScore), sizeof(VP8SegmentInfo), sizeof(VP8EncProba), sizeof(LFStats));
+    printf("Picture size (yuv): %ld\n", mb_w * mb_h * 384 * sizeof(uint8_t));
+    printf("===================================\n");
+#endif
+    mem = (uint8_t*)WebPSafeMalloc(size, sizeof(*mem));
+    if (mem == NULL) {
+        WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+        return NULL;
+    }
+    enc = (VP8Encoder*)mem;
+    mem = (uint8_t*)WEBP_ALIGN(mem + sizeof(*enc));
+    memset(enc, 0, sizeof(*enc));
+    enc->num_parts_ = 1 << config->partitions;
+    enc->mb_w_ = mb_w;
+    enc->mb_h_ = mb_h;
+    enc->preds_w_ = preds_w;
+    enc->mb_info_ = (VP8MBInfo*)mem;
+    mem += info_size;
+    enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_;
+    mem += preds_size;
+    enc->nz_ = 1 + (uint32_t*)WEBP_ALIGN(mem);
+    mem += nz_size;
+    enc->lf_stats_ = lf_stats_size ? (LFStats*)WEBP_ALIGN(mem) : NULL;
+    mem += lf_stats_size;
+
+    // top samples (all 16-aligned)
+    mem = (uint8_t*)WEBP_ALIGN(mem);
+    enc->y_top_ = (uint8_t*)mem;
+    enc->uv_top_ = enc->y_top_ + top_stride;
+    mem += 2 * top_stride;
+    assert(mem <= (uint8_t*)enc + size);
+
+    enc->config_ = config;
+    enc->profile_ = use_filter ? ((config->filter_type == 1) ? 0 : 1) : 2;
+    enc->pic_ = picture;
+    enc->percent_ = 0;
+
+    MapConfigToTools(enc);
+    VP8EncDspInit();
+    // VP8DefaultProbas(enc);
+    ResetSegmentHeader(enc);
+    ResetFilterHeader(enc);
+    ResetBoundaryPredictions(enc);
+    VP8EncDspCostInit();
+    VP8EncInitAlpha(enc);
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+    // lower quality means smaller output -> we modulate a little the page
+    // size based on quality. This is just a crude 1rst-order prediction.
+    {
+        const float scale = 1.f + config->quality * 5.f / 100.f; // in [1,6]
+        VP8TBufferInit(&enc->tokens_, (int)(mb_w * mb_h * 4 * scale));
+    }
+#endif
+    return enc;
+}
+
+static int DeleteVP8Encoder(VP8Encoder* enc) {
+    int ok = 1;
+    if (enc != NULL) {
+        ok = VP8EncDeleteAlpha(enc);
+        VP8TBufferClear(&enc->tokens_);
+        WebPSafeFree(enc);
+    }
+    return ok;
+}
+
+//------------------------------------------------------------------------------
+
+static double GetPSNR(uint64_t err, uint64_t size) {
+    return (err > 0 && size > 0) ? 10. * log10(255. * 255. * size / err) : 99.;
+}
+
+static void FinalizePSNR(const VP8Encoder* const enc) {
+    WebPAuxStats* stats = enc->pic_->stats;
+    const uint64_t size = enc->sse_count_;
+    const uint64_t* const sse = enc->sse_;
+    stats->PSNR[0] = (float)GetPSNR(sse[0], size);
+    stats->PSNR[1] = (float)GetPSNR(sse[1], size / 4);
+    stats->PSNR[2] = (float)GetPSNR(sse[2], size / 4);
+    stats->PSNR[3] = (float)GetPSNR(sse[0] + sse[1] + sse[2], size * 3 / 2);
+    stats->PSNR[4] = (float)GetPSNR(sse[3], size);
+}
+
+static void StoreStats(VP8Encoder* const enc) {
+    WebPAuxStats* const stats = enc->pic_->stats;
+    if (stats != NULL) {
+        int i, s;
+        for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+            stats->segment_level[i] = enc->dqm_[i].fstrength_;
+            stats->segment_quant[i] = enc->dqm_[i].quant_;
+            for (s = 0; s <= 2; ++s) {
+                stats->residual_bytes[s][i] = enc->residual_bytes_[s][i];
+            }
+        }
+        FinalizePSNR(enc);
+        stats->coded_size = enc->coded_size_;
+        for (i = 0; i < 3; ++i) {
+            stats->block_count[i] = enc->block_count_[i];
+        }
+    }
+    WebPReportProgress(enc->pic_, 100, &enc->percent_); // done!
+}
+
+int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error) {
+    assert((int)error < VP8_ENC_ERROR_LAST);
+    assert((int)error >= VP8_ENC_OK);
+    ((WebPPicture*)pic)->error_code = error;
+    return 0;
+}
+
+int WebPReportProgress(const WebPPicture* const pic, int percent, int* const percent_store) {
+    if (percent_store != NULL && percent != *percent_store) {
+        *percent_store = percent;
+        if (pic->progress_hook && !pic->progress_hook(percent, pic)) {
+            // user abort requested
+            WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
+            return 0;
+        }
+    }
+    return 1; // ok
+}
+//------------------------------------------------------------------------------
+
+VP8Encoder* PreAnalysis(const WebPConfig*& config, WebPPicture* pic) {
+    int ok;
+
+    if (!config->exact) {
+        WebPCleanupTransparentArea(pic);
+    }
+
+    if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) {
+        // Make sure we have YUVA samples.
+        if (config->preprocessing & 4) {
+            if (!WebPPictureSmartARGBToYUVA(pic)) {
+                return 0;
+            }
+        } else {
+            float dithering = 0.f;
+            if (config->preprocessing & 2) {
+                const float x = config->quality / 100.f;
+                const float x2 = x * x;
+                // slowly decreasing from max dithering at low quality (q->0)
+                // to 0.5 dithering amplitude at high quality (q->100)
+                dithering = 1.0f + (0.5f - 1.0f) * x2 * x2;
+            }
+            if (!WebPPictureARGBToYUVADithered(pic, WEBP_YUV420, dithering)) {
+                return 0;
+            }
+        }
+    }
+
+    VP8Encoder* enc = InitVP8Encoder(config, pic);
+    //  if (enc == NULL) return 0;  // pic->error is already set.
+
+    // Note: each of the tasks below account for 20% in the progress report.
+    // if (config->use_ocl) {
+    // ok = VP8EncAnalyzeOcl(enc);
+    // } else {
+
+    ok = VP8EncAnalyze(enc);
+    // }
+
+    /* // ***************************************************** */
+
+    /* // Analysis is done, proceed to actual coding. */
+    ok = VP8EncStartAlpha(enc); // possibly done in parallel
+
+    return enc;
+};
+
+int PostAnalysis(VP8Encoder* const& enc, VP8EncIterator& it) {
+    int ok = 1;
+
+    PostLoopFinalize(&it, ok); // This functions
+    VP8EncFinishAlpha(enc);
+
+    return 0;
+};
+
+int WebPEncodeAsync(const int Numpic, const int Numbatch, const WebPConfig* config, WebPPicture** pic) {
+    int ok = 0;
+    cl_int err;
+    StopProfilingWatch watch, watchloop;
+    double watch_time, watchloop_time;
+    int watch_count, watchloop_count;
+
+    const uint64_t output_size_prob = SIZE8_MEM_PROB * Numbatch;
+    const uint64_t output_size_bw = SIZE8_MEM_BW;
+    const uint64_t output_size_ret = SIZE8_MEM_RET;
+    const uint64_t output_size_pred = SIZE8_MEM_PRED;
+
+    if (pic == NULL) return 0;
+
+    for (int i = 0; i < Numpic; i++) {
+        WebPPicture* pic_i = (*pic) + i;
+
+        WebPEncodingSetError(pic_i, VP8_ENC_OK); // all ok so far
+
+        if (config == NULL) // bad params
+            return WebPEncodingSetError(pic_i, VP8_ENC_ERROR_NULL_PARAMETER);
+        if (!WebPValidateConfig(config)) return WebPEncodingSetError(pic_i, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+        if (pic_i->width <= 0 || pic_i->height <= 0) return WebPEncodingSetError(pic_i, VP8_ENC_ERROR_BAD_DIMENSION);
+        if (pic_i->width > WEBP_MAX_DIMENSION || pic_i->height > WEBP_MAX_DIMENSION)
+            return WebPEncodingSetError(pic_i, VP8_ENC_ERROR_BAD_DIMENSION);
+        if (pic_i->stats != NULL) memset(pic_i->stats, 0, sizeof(pic_i->stats));
+    };
+
+    // enc
+    VP8Encoder* enc[Numpic];
+    AllPicInfo picinfo[Numpic];
+
+    int Nloop = int((Numpic + Numbatch - 1) / Numbatch);
+    fprintf(stderr, "INFO: Nloop = %d \n", Nloop);
+
+    uint8_t* output_prob[Numpic];
+    uint8_t* pout_bw[Numpic];
+    uint8_t* pout_ret[Numpic];
+    uint8_t* pout_pred[Numpic];
+
+    for (int i = 0; i < Nloop; i++) {
+        output_prob[i] = malloc(SIZE8_MEM_PROB * Numbatch);
+        pout_bw[i] = malloc(SIZE32_MEM_BW * 4);
+        pout_ret[i] = malloc(SIZE32_MEM_RET * 4);
+        pout_pred[i] = malloc(SIZE32_MEM_PRED * 4);
+    }
+
+    // Pre-analysis
+    for (int i = 0; i < Numpic; i++) {
+        enc[i] = PreAnalysis(config, (*pic) + i);
+    }
+
+    // picinfo
+    for (int i = 0; i < Numpic; i++) {
+        VP8EncTokenLoopAsyncPicInfoSet(enc[i], picinfo[i]);
+    }
+
+    // events
+    std::array<cl_event, 4> event_host2dev[NasyncDepth * Ninstances];
+    std::array<cl_event, 1> event_kernelpred[NasyncDepth * Ninstances];
+    std::array<cl_event, 1> event_kernelac[NasyncDepth * Ninstances];
+    std::array<cl_event, 4> event_dev2host[NasyncDepth * Ninstances];
+
+    int NumpicRest = Numpic;
+
+    fprintf(stderr, "INFO: VP8EncTokenLoopAsync starts ... \n\n");
+
+    StartProfiling(&watch);
+    StartProfiling(&watchloop);
+
+    for (int i = 0; i < Nloop; i++) {
+        int nb = NumpicRest > Numbatch ? Numbatch : NumpicRest; // number of pictures traited in this iteration
+        NumpicRest -= Numbatch;                                 // number of pictures to be traited in future iterations
+        // std::cout << "Npic: " << nb << "  " << NumpicRest << "  " << Nloop << std::endl;
+
+        int bufinst = i % Ninstances;
+        int bufevent = i % (NasyncDepth * Ninstances);
+        int buf = bufinst * NasyncDepth + int(bufevent / Ninstances);
+
+        fprintf(stderr, "\n*** Picture: %d - %d,  Buffer: %d, Instance: %d, Event: %d *** \n", (i)*Numbatch + 1,
+                (i)*Numbatch + nb, buf, bufinst, bufevent);
+
+        if (i >= (NasyncDepth * Ninstances)) {
+            int bufp = i - NasyncDepth * Ninstances; // previous picture (finished)
+
+            // Wait for events
+            err = clWaitForEvents(event_dev2host[bufevent].size(), event_dev2host[bufevent].data());
+            if (err != CL_SUCCESS) {
+                fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+            };
+
+            memcpy(output_prob[bufp], encloopparaAsync[buf].probcpu, output_size_prob);
+            memcpy(pout_bw[bufp], encloopparaAsync[buf].bwcpu, output_size_bw);
+            memcpy(pout_ret[bufp], encloopparaAsync[buf].retcpu, output_size_ret);
+            memcpy(pout_pred[bufp], encloopparaAsync[buf].predcpu, output_size_pred);
+
+            // Release previous used events
+            for (int k = 0; k < event_host2dev[bufevent].size(); k++) {
+                err = clReleaseEvent(event_host2dev[bufevent][k]);
+                if (err != CL_SUCCESS) {
+                    fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                };
+            }
+            for (int k = 0; k < event_kernelpred[bufevent].size(); k++) {
+                err = clReleaseEvent(event_kernelpred[bufevent][k]);
+                if (err != CL_SUCCESS) {
+                    fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                };
+            }
+            for (int k = 0; k < event_kernelac[bufevent].size(); k++) {
+                err = clReleaseEvent(event_kernelac[bufevent][k]);
+                if (err != CL_SUCCESS) {
+                    fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                };
+            }
+            for (int k = 0; k < event_dev2host[bufevent].size(); k++) {
+                err = clReleaseEvent(event_dev2host[bufevent][k]);
+                if (err != CL_SUCCESS) {
+                    fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+                };
+            };
+        };
+
+        std::cout << "HtoD webpen.c" << std::endl;
+
+        // Host to Device
+        VP8EncTokenLoopAsyncHost2Device(nb, &enc[i * Numbatch], &picinfo[i * Numbatch], buf, 0, NULL,
+                                        event_host2dev[bufevent]);
+
+        // Pred kernel
+        VP8EncTokenLoopAsyncPredKernel(buf, event_host2dev[bufevent].size(), event_host2dev[bufevent].data(),
+                                       event_kernelpred[bufevent]);
+
+        // AC kernel
+        VP8EncTokenLoopAsyncACKernel(buf, event_kernelpred[bufevent].size(), event_kernelpred[bufevent].data(),
+                                     event_kernelac[bufevent]);
+
+        // Device to Host
+        VP8EncTokenLoopAsyncDevice2Host(buf, event_kernelac[bufevent].size(), event_kernelac[bufevent].data(),
+                                        event_dev2host[bufevent]);
+    }
+
+    err = clFinish(hardware.mQueue);
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err));
+        ok = 0;
+    }
+
+    /* /\* // Wait for events *\/ */
+    /* /\* err = clWaitForEvents(event_dev2host[0].size(), event_dev2host[0].data()); *\/ */
+    /* /\* if(err != CL_SUCCESS){ *\/ */
+    /* /\*   fprintf(stderr, "%s %d %s\n", __func__, __LINE__, oclErrorCode(err)); *\/ */
+    /* /\* }; *\/ */
+
+    watchloop_time = 0.0;
+    StopProfiling(&watchloop, &watchloop_time, &watchloop_count);
+    fprintf(stderr, "\nINFO: Loop of Pictures Finished. Computation time is %f (ms) \n", watchloop_time);
+
+    for (int i = 0; i < Nloop; i++) {
+        int bufinst = i % Ninstances;
+        int bufevent = i % (NasyncDepth * Ninstances);
+        int buf = bufinst * NasyncDepth + int(bufevent / Ninstances);
+
+        memcpy(output_prob[i], encloopparaAsync[buf].probcpu, output_size_prob);
+        memcpy(pout_bw[i], encloopparaAsync[buf].bwcpu, output_size_bw);
+        memcpy(pout_ret[i], encloopparaAsync[buf].retcpu, output_size_ret);
+        memcpy(pout_pred[i], encloopparaAsync[buf].predcpu, output_size_pred);
+    }
+
+    /* /\* uint32_t* ps = (uint32_t*)output_prob[0]; *\/ */
+    /* /\* for(int i=0;i<512;i++) *\/ */
+    /* /\*   fprintf(stderr, "(%d,%x)\n", i, ps[i]); *\/ */
+
+    /* /\* int rstpic = (Numpic-NasyncDepth*Ninstances)>=0 ? Numpic-NasyncDepth*Ninstances: 0; *\/ */
+    /* /\* for(int i=rstpic;i<Numpic;i++){ *\/ */
+
+    /* /\*   int bufinst = i % Ninstances; *\/ */
+    /* /\*   int bufevent = i % (NasyncDepth*Ninstances); *\/ */
+    /* /\*   int buf = bufinst * NasyncDepth + int(bufevent/Ninstances); *\/ */
+    /* /\*   memcpy(output_prob[i], encloopparaAsync[buf].probcpu, output_size_prob); *\/ */
+    /* /\*   memcpy(pout_bw[i], encloopparaAsync[buf].bwcpu, output_size_bw); *\/ */
+    /* /\*   memcpy(pout_ret[i], encloopparaAsync[buf].retcpu, output_size_ret); *\/ */
+    /* /\*   memcpy(pout_pred[i], encloopparaAsync[buf].predcpu, output_size_pred); *\/ */
+    /* /\* } *\/ */
+
+    // After AC and Post Analysis on CPU
+    for (int i = 0; i < Nloop; i++) {
+        uint64_t offset_prob = 0;
+        uint64_t offset_pred = 0;
+        uint64_t offset_bw = 0;
+        uint64_t offset_ret = 0;
+
+        for (int j = 0; j < Numbatch; j++) {
+            int p = i * Numbatch + j;
+
+            if (p < Numpic) {
+                VP8EncIterator it;
+
+                // After AC
+                VP8EncTokenLoopAsyncAfterAC(enc[p], output_prob[i] + offset_prob, pout_bw[i] + offset_bw,
+                                            pout_ret[i] + offset_ret, pout_pred[i] + offset_pred, it);
+
+                // offset
+                int num_mb = picinfo[p].mb_w * picinfo[p].mb_h;
+                offset_prob += 2048;
+                offset_bw += Get_Busoffset_pout_bw(num_mb) * sizeof(uint32_t);
+                offset_ret += Get_Busoffset_pout_ret(num_mb) * sizeof(uint32_t);
+                offset_pred += Get_Busoffset_pout_pred(num_mb) * sizeof(uint32_t);
+
+                // Post Analysis
+                PostAnalysis(enc[p], it);
+            }
+        }
+    };
+
+    watch_time = 0.0;
+    StopProfiling(&watch, &watch_time, &watch_count);
+    fprintf(stderr, "\nINFO: VP8EncTokenLoopAsync Finished. Computation time is %f (ms) \n", watch_time);
+
+    for (int i = 0; i < Numpic; i++) {
+        FILE* out = fopen((*pic)[i].custom_ptr_name, "wb");
+        if (out == NULL) {
+            fprintf(stderr, "Error! Cannot open output file '%s'\n", (*pic)[i].custom_ptr_name);
+        }
+        (*pic)[i].custom_ptr = (void*)out;
+        enc[i]->pic_->custom_ptr = (void*)out;
+
+        ok = VP8EncWrite(enc[i]);
+
+        StoreStats(enc[i]);
+        if (!ok) {
+            VP8EncFreeBitWriters(enc[i]);
+        }
+        ok = DeleteVP8Encoder(enc[i]); // must always be called, even if !ok
+
+        fclose(out);
+    };
+
+    // free
+    for (int i = 0; i < Nloop; i++) {
+        free(output_prob[i]);
+        free(pout_bw[i]);
+        free(pout_ret[i]);
+        free(pout_pred[i]);
+    }
+
+    return ok;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/extras/Makefile.am b/codec/L2/demos/webpEnc/host/src/extras/Makefile.am
new file mode 100644
index 0000000000..6598676e21
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/extras/Makefile.am
@@ -0,0 +1,13 @@
+lib_LTLIBRARIES = libwebpextras.la
+
+libwebpextras_la_SOURCES =
+libwebpextras_la_SOURCES += extras.c
+
+libwebpextrasinclude_HEADERS =
+libwebpextrasinclude_HEADERS += ../webp/extras.h
+libwebpextrasinclude_HEADERS += ../webp/types.h
+
+libwebpextras_la_LIBADD = ../libwebp.la
+libwebpextras_la_LDFLAGS = -no-undefined -version-info 0:0:0
+libwebpextrasincludedir = $(includedir)/webp
+pkgconfig_DATA = libwebpextras.pc
diff --git a/codec/L2/demos/webpEnc/host/src/extras/extras.c b/codec/L2/demos/webpEnc/host/src/extras/extras.c
new file mode 100644
index 0000000000..ca32fbcd86
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/extras/extras.c
@@ -0,0 +1,111 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Additional WebP utilities.
+//
+
+#include "../webp/extras.h"
+
+#include <string.h>
+
+#define XTRA_MAJ_VERSION 0
+#define XTRA_MIN_VERSION 0
+#define XTRA_REV_VERSION 0
+
+//------------------------------------------------------------------------------
+
+int WebPGetExtrasVersion(void) {
+  return (XTRA_MAJ_VERSION << 16) | (XTRA_MIN_VERSION << 8) | XTRA_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
+
+int WebPImportGray(const uint8_t* gray_data, WebPPicture* pic) {
+  int y, width, uv_width;
+  if (pic == NULL || gray_data == NULL) return 0;
+  pic->colorspace = WEBP_YUV420;
+  if (!WebPPictureAlloc(pic)) return 0;
+  width = pic->width;
+  uv_width = (width + 1) >> 1;
+  for (y = 0; y < pic->height; ++y) {
+    memcpy(pic->y + y * pic->y_stride, gray_data, width);
+    gray_data += width;    // <- we could use some 'data_stride' here if needed
+    if ((y & 1) == 0) {
+      memset(pic->u + (y >> 1) * pic->uv_stride, 128, uv_width);
+      memset(pic->v + (y >> 1) * pic->uv_stride, 128, uv_width);
+    }
+  }
+  return 1;
+}
+
+int WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic) {
+  int x, y;
+  if (pic == NULL || rgb565 == NULL) return 0;
+  pic->colorspace = WEBP_YUV420;
+  pic->use_argb = 1;
+  if (!WebPPictureAlloc(pic)) return 0;
+  for (y = 0; y < pic->height; ++y) {
+    const int width = pic->width;
+    uint32_t* dst = pic->argb + y * pic->argb_stride;
+    for (x = 0; x < width; ++x) {
+#ifdef WEBP_SWAP_16BIT_CSP
+      const uint32_t rg = rgb565[2 * x + 1];
+      const uint32_t gb = rgb565[2 * x + 0];
+#else
+      const uint32_t rg = rgb565[2 * x + 0];
+      const uint32_t gb = rgb565[2 * x + 1];
+#endif
+      uint32_t r = rg & 0xf8;
+      uint32_t g = ((rg << 5) | (gb >> 3)) & 0xfc;
+      uint32_t b = (gb << 5);
+      // dithering
+      r = r | (r >> 5);
+      g = g | (g >> 6);
+      b = b | (b >> 5);
+      dst[x] = (r << 16) | (g << 8) | b;
+    }
+    rgb565 += 2 * width;
+  }
+  return 1;
+}
+
+int WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic) {
+  int x, y;
+  if (pic == NULL || rgb4444 == NULL) return 0;
+  pic->colorspace = WEBP_YUV420;
+  pic->use_argb = 1;
+  if (!WebPPictureAlloc(pic)) return 0;
+  for (y = 0; y < pic->height; ++y) {
+    const int width = pic->width;
+    uint32_t* dst = pic->argb + y * pic->argb_stride;
+    for (x = 0; x < width; ++x) {
+#ifdef WEBP_SWAP_16BIT_CSP
+      const uint32_t rg = rgb4444[2 * x + 1];
+      const uint32_t ba = rgb4444[2 * x + 0];
+#else
+      const uint32_t rg = rgb4444[2 * x + 0];
+      const uint32_t ba = rgb4444[2 * x + 1];
+#endif
+      uint32_t r = rg & 0xf0;
+      uint32_t g = (rg << 4);
+      uint32_t b = (ba & 0xf0);
+      uint32_t a = (ba << 4);
+      // dithering
+      r = r | (r >> 4);
+      g = g | (g >> 4);
+      b = b | (b >> 4);
+      a = a | (a >> 4);
+      dst[x] = (a << 24) | (r << 16) | (g << 8) | b;
+    }
+    rgb4444 += 2 * width;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/mux/Makefile.am b/codec/L2/demos/webpEnc/host/src/mux/Makefile.am
new file mode 100644
index 0000000000..a25bf2860b
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/mux/Makefile.am
@@ -0,0 +1,18 @@
+lib_LTLIBRARIES = libwebpmux.la
+
+libwebpmux_la_SOURCES =
+libwebpmux_la_SOURCES += anim_encode.c
+libwebpmux_la_SOURCES += muxedit.c
+libwebpmux_la_SOURCES += muxi.h
+libwebpmux_la_SOURCES += muxinternal.c
+libwebpmux_la_SOURCES += muxread.c
+
+libwebpmuxinclude_HEADERS =
+libwebpmuxinclude_HEADERS += ../webp/mux.h
+libwebpmuxinclude_HEADERS += ../webp/mux_types.h
+libwebpmuxinclude_HEADERS += ../webp/types.h
+
+libwebpmux_la_LIBADD = ../libwebp.la
+libwebpmux_la_LDFLAGS = -no-undefined -version-info 2:0:0
+libwebpmuxincludedir = $(includedir)/webp
+pkgconfig_DATA = libwebpmux.pc
diff --git a/codec/L2/demos/webpEnc/host/src/mux/anim_encode.c b/codec/L2/demos/webpEnc/host/src/mux/anim_encode.c
new file mode 100644
index 0000000000..60d888fa76
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/mux/anim_encode.c
@@ -0,0 +1,1323 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  AnimEncoder implementation.
+//
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "../utils/utils.h"
+#include "../webp/decode.h"
+#include "../webp/encode.h"
+#include "../webp/format_constants.h"
+#include "../webp/mux.h"
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#define snprintf _snprintf
+#endif
+
+#define ERROR_STR_MAX_LENGTH 100
+
+//------------------------------------------------------------------------------
+// Internal structs.
+
+// Stores frame rectangle dimensions.
+typedef struct { int x_offset_, y_offset_, width_, height_; } FrameRect;
+
+// Used to store two candidates of encoded data for an animation frame. One of
+// the two will be chosen later.
+typedef struct {
+    WebPMuxFrameInfo sub_frame_; // Encoded frame rectangle.
+    WebPMuxFrameInfo key_frame_; // Encoded frame if it is a key-frame.
+    int is_key_frame_;           // True if 'key_frame' has been chosen.
+} EncodedFrame;
+
+struct WebPAnimEncoder {
+    const int canvas_width_;               // Canvas width.
+    const int canvas_height_;              // Canvas height.
+    const WebPAnimEncoderOptions options_; // Global encoding options.
+
+    FrameRect prev_rect_;     // Previous WebP frame rectangle.
+    WebPConfig last_config_;  // Cached in case a re-encode is needed.
+    WebPConfig last_config2_; // 2nd cached config; only valid if
+                              // 'options_.allow_mixed' is true.
+
+    WebPPicture* curr_canvas_; // Only pointer; we don't own memory.
+
+    // Canvas buffers.
+    WebPPicture curr_canvas_copy_;  // Possibly modified current canvas.
+    int curr_canvas_copy_modified_; // True if pixels in 'curr_canvas_copy_'
+                                    // differ from those in 'curr_canvas_'.
+
+    WebPPicture prev_canvas_;          // Previous canvas.
+    WebPPicture prev_canvas_disposed_; // Previous canvas disposed to background.
+
+    // Encoded data.
+    EncodedFrame* encoded_frames_; // Array of encoded frames.
+    size_t size_;                  // Number of allocated frames.
+    size_t start_;                 // Frame start index.
+    size_t count_;                 // Number of valid frames.
+    size_t flush_count_;           // If >0, 'flush_count' frames starting from
+                                   // 'start' are ready to be added to mux.
+
+    // key-frame related.
+    int64_t best_delta_;        // min(canvas size - frame size) over the frames.
+                                // Can be negative in certain cases due to
+                                // transparent pixels in a frame.
+    int keyframe_;              // Index of selected key-frame relative to 'start_'.
+    int count_since_key_frame_; // Frames seen since the last key-frame.
+
+    int first_timestamp_;          // Timestamp of the first frame.
+    int prev_timestamp_;           // Timestamp of the last added frame.
+    int prev_candidate_undecided_; // True if it's not yet decided if previous
+                                   // frame would be a sub-frame or a key-frame.
+
+    // Misc.
+    int is_first_frame_; // True if first frame is yet to be added/being added.
+    int got_null_frame_; // True if WebPAnimEncoderAdd() has already been called
+                         // with a NULL frame.
+
+    size_t in_frame_count_;  // Number of input frames processed so far.
+    size_t out_frame_count_; // Number of frames added to mux so far. This may be
+                             // different from 'in_frame_count_' due to merging.
+
+    WebPMux* mux_;                         // Muxer to assemble the WebP bitstream.
+    char error_str_[ERROR_STR_MAX_LENGTH]; // Error string. Empty if no error.
+};
+
+// -----------------------------------------------------------------------------
+// Life of WebPAnimEncoder object.
+
+#define DELTA_INFINITY (1ULL << 32)
+#define KEYFRAME_NONE (-1)
+
+// Reset the counters in the WebPAnimEncoder.
+static void ResetCounters(WebPAnimEncoder* const enc) {
+    enc->start_ = 0;
+    enc->count_ = 0;
+    enc->flush_count_ = 0;
+    enc->best_delta_ = DELTA_INFINITY;
+    enc->keyframe_ = KEYFRAME_NONE;
+}
+
+static void DisableKeyframes(WebPAnimEncoderOptions* const enc_options) {
+    enc_options->kmax = INT_MAX;
+    enc_options->kmin = enc_options->kmax - 1;
+}
+
+#define MAX_CACHED_FRAMES 30
+
+static void SanitizeEncoderOptions(WebPAnimEncoderOptions* const enc_options) {
+    int print_warning = enc_options->verbose;
+
+    if (enc_options->minimize_size) {
+        DisableKeyframes(enc_options);
+    }
+
+    if (enc_options->kmin <= 0) {
+        DisableKeyframes(enc_options);
+        print_warning = 0;
+    }
+    if (enc_options->kmax <= 0) { // All frames will be key-frames.
+        enc_options->kmin = 0;
+        enc_options->kmax = 0;
+        return;
+    }
+
+    if (enc_options->kmin >= enc_options->kmax) {
+        enc_options->kmin = enc_options->kmax - 1;
+        if (print_warning) {
+            fprintf(stderr, "WARNING: Setting kmin = %d, so that kmin < kmax.\n", enc_options->kmin);
+        }
+    } else {
+        const int kmin_limit = enc_options->kmax / 2 + 1;
+        if (enc_options->kmin < kmin_limit && kmin_limit < enc_options->kmax) {
+            // This ensures that enc.keyframe + kmin >= kmax is always true. So, we
+            // can flush all the frames in the 'count_since_key_frame == kmax' case.
+            enc_options->kmin = kmin_limit;
+            if (print_warning) {
+                fprintf(stderr, "WARNING: Setting kmin = %d, so that kmin >= kmax / 2 + 1.\n", enc_options->kmin);
+            }
+        }
+    }
+    // Limit the max number of frames that are allocated.
+    if (enc_options->kmax - enc_options->kmin > MAX_CACHED_FRAMES) {
+        enc_options->kmin = enc_options->kmax - MAX_CACHED_FRAMES;
+        if (print_warning) {
+            fprintf(stderr, "WARNING: Setting kmin = %d, so that kmax - kmin <= %d.\n", enc_options->kmin,
+                    MAX_CACHED_FRAMES);
+        }
+    }
+    assert(enc_options->kmin < enc_options->kmax);
+}
+
+#undef MAX_CACHED_FRAMES
+
+static void DefaultEncoderOptions(WebPAnimEncoderOptions* const enc_options) {
+    enc_options->anim_params.loop_count = 0;
+    enc_options->anim_params.bgcolor = 0xffffffff; // White.
+    enc_options->minimize_size = 0;
+    DisableKeyframes(enc_options);
+    enc_options->allow_mixed = 0;
+}
+
+int WebPAnimEncoderOptionsInitInternal(WebPAnimEncoderOptions* enc_options, int abi_version) {
+    if (enc_options == NULL || WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_MUX_ABI_VERSION)) {
+        return 0;
+    }
+    DefaultEncoderOptions(enc_options);
+    return 1;
+}
+
+#define TRANSPARENT_COLOR 0x00ffffff
+
+static void ClearRectangle(WebPPicture* const picture, int left, int top, int width, int height) {
+    int j;
+    for (j = top; j < top + height; ++j) {
+        uint32_t* const dst = picture->argb + j * picture->argb_stride;
+        int i;
+        for (i = left; i < left + width; ++i) {
+            dst[i] = TRANSPARENT_COLOR;
+        }
+    }
+}
+
+static void WebPUtilClearPic(WebPPicture* const picture, const FrameRect* const rect) {
+    if (rect != NULL) {
+        ClearRectangle(picture, rect->x_offset_, rect->y_offset_, rect->width_, rect->height_);
+    } else {
+        ClearRectangle(picture, 0, 0, picture->width, picture->height);
+    }
+}
+
+static void MarkNoError(WebPAnimEncoder* const enc) {
+    enc->error_str_[0] = '\0'; // Empty string.
+}
+
+static void MarkError(WebPAnimEncoder* const enc, const char* str) {
+    if (snprintf(enc->error_str_, ERROR_STR_MAX_LENGTH, "%s.", str) < 0) {
+        assert(0); // FIX ME!
+    }
+}
+
+static void MarkError2(WebPAnimEncoder* const enc, const char* str, int error_code) {
+    if (snprintf(enc->error_str_, ERROR_STR_MAX_LENGTH, "%s: %d.", str, error_code) < 0) {
+        assert(0); // FIX ME!
+    }
+}
+
+WebPAnimEncoder* WebPAnimEncoderNewInternal(int width,
+                                            int height,
+                                            const WebPAnimEncoderOptions* enc_options,
+                                            int abi_version) {
+    WebPAnimEncoder* enc;
+
+    if (WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_MUX_ABI_VERSION)) {
+        return NULL;
+    }
+    if (width <= 0 || height <= 0 || (width * (uint64_t)height) >= MAX_IMAGE_AREA) {
+        return NULL;
+    }
+
+    enc = (WebPAnimEncoder*)WebPSafeCalloc(1, sizeof(*enc));
+    if (enc == NULL) return NULL;
+    // sanity inits, so we can call WebPAnimEncoderDelete():
+    enc->encoded_frames_ = NULL;
+    enc->mux_ = NULL;
+    MarkNoError(enc);
+
+    // Dimensions and options.
+    *(int*)&enc->canvas_width_ = width;
+    *(int*)&enc->canvas_height_ = height;
+    if (enc_options != NULL) {
+        *(WebPAnimEncoderOptions*)&enc->options_ = *enc_options;
+        SanitizeEncoderOptions((WebPAnimEncoderOptions*)&enc->options_);
+    } else {
+        DefaultEncoderOptions((WebPAnimEncoderOptions*)&enc->options_);
+    }
+
+    // Canvas buffers.
+    if (!WebPPictureInit(&enc->curr_canvas_copy_) || !WebPPictureInit(&enc->prev_canvas_) ||
+        !WebPPictureInit(&enc->prev_canvas_disposed_)) {
+        goto Err;
+    }
+    enc->curr_canvas_copy_.width = width;
+    enc->curr_canvas_copy_.height = height;
+    enc->curr_canvas_copy_.use_argb = 1;
+    if (!WebPPictureAlloc(&enc->curr_canvas_copy_) || !WebPPictureCopy(&enc->curr_canvas_copy_, &enc->prev_canvas_) ||
+        !WebPPictureCopy(&enc->curr_canvas_copy_, &enc->prev_canvas_disposed_)) {
+        goto Err;
+    }
+    WebPUtilClearPic(&enc->prev_canvas_, NULL);
+    enc->curr_canvas_copy_modified_ = 1;
+
+    // Encoded frames.
+    ResetCounters(enc);
+    // Note: one extra storage is for the previous frame.
+    enc->size_ = enc->options_.kmax - enc->options_.kmin + 1;
+    // We need space for at least 2 frames. But when kmin, kmax are both zero,
+    // enc->size_ will be 1. So we handle that special case below.
+    if (enc->size_ < 2) enc->size_ = 2;
+    enc->encoded_frames_ = (EncodedFrame*)WebPSafeCalloc(enc->size_, sizeof(*enc->encoded_frames_));
+    if (enc->encoded_frames_ == NULL) goto Err;
+
+    enc->mux_ = WebPMuxNew();
+    if (enc->mux_ == NULL) goto Err;
+
+    enc->count_since_key_frame_ = 0;
+    enc->first_timestamp_ = 0;
+    enc->prev_timestamp_ = 0;
+    enc->prev_candidate_undecided_ = 0;
+    enc->is_first_frame_ = 1;
+    enc->got_null_frame_ = 0;
+
+    return enc; // All OK.
+
+Err:
+    WebPAnimEncoderDelete(enc);
+    return NULL;
+}
+
+// Release the data contained by 'encoded_frame'.
+static void FrameRelease(EncodedFrame* const encoded_frame) {
+    if (encoded_frame != NULL) {
+        WebPDataClear(&encoded_frame->sub_frame_.bitstream);
+        WebPDataClear(&encoded_frame->key_frame_.bitstream);
+        memset(encoded_frame, 0, sizeof(*encoded_frame));
+    }
+}
+
+void WebPAnimEncoderDelete(WebPAnimEncoder* enc) {
+    if (enc != NULL) {
+        WebPPictureFree(&enc->curr_canvas_copy_);
+        WebPPictureFree(&enc->prev_canvas_);
+        WebPPictureFree(&enc->prev_canvas_disposed_);
+        if (enc->encoded_frames_ != NULL) {
+            size_t i;
+            for (i = 0; i < enc->size_; ++i) {
+                FrameRelease(&enc->encoded_frames_[i]);
+            }
+            WebPSafeFree(enc->encoded_frames_);
+        }
+        WebPMuxDelete(enc->mux_);
+        WebPSafeFree(enc);
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Frame addition.
+
+// Returns cached frame at the given 'position'.
+static EncodedFrame* GetFrame(const WebPAnimEncoder* const enc, size_t position) {
+    assert(enc->start_ + position < enc->size_);
+    return &enc->encoded_frames_[enc->start_ + position];
+}
+
+// Returns true if 'length' number of pixels in 'src' and 'dst' are identical,
+// assuming the given step sizes between pixels.
+static WEBP_INLINE int ComparePixels(const uint32_t* src, int src_step, const uint32_t* dst, int dst_step, int length) {
+    assert(length > 0);
+    while (length-- > 0) {
+        if (*src != *dst) {
+            return 0;
+        }
+        src += src_step;
+        dst += dst_step;
+    }
+    return 1;
+}
+
+static int IsEmptyRect(const FrameRect* const rect) {
+    return (rect->width_ == 0) || (rect->height_ == 0);
+}
+
+// Assumes that an initial valid guess of change rectangle 'rect' is passed.
+static void MinimizeChangeRectangle(const WebPPicture* const src, const WebPPicture* const dst, FrameRect* const rect) {
+    int i, j;
+    // Sanity checks.
+    assert(src->width == dst->width && src->height == dst->height);
+    assert(rect->x_offset_ + rect->width_ <= dst->width);
+    assert(rect->y_offset_ + rect->height_ <= dst->height);
+
+    // Left boundary.
+    for (i = rect->x_offset_; i < rect->x_offset_ + rect->width_; ++i) {
+        const uint32_t* const src_argb = &src->argb[rect->y_offset_ * src->argb_stride + i];
+        const uint32_t* const dst_argb = &dst->argb[rect->y_offset_ * dst->argb_stride + i];
+        if (ComparePixels(src_argb, src->argb_stride, dst_argb, dst->argb_stride, rect->height_)) {
+            --rect->width_; // Redundant column.
+            ++rect->x_offset_;
+        } else {
+            break;
+        }
+    }
+    if (rect->width_ == 0) goto NoChange;
+
+    // Right boundary.
+    for (i = rect->x_offset_ + rect->width_ - 1; i >= rect->x_offset_; --i) {
+        const uint32_t* const src_argb = &src->argb[rect->y_offset_ * src->argb_stride + i];
+        const uint32_t* const dst_argb = &dst->argb[rect->y_offset_ * dst->argb_stride + i];
+        if (ComparePixels(src_argb, src->argb_stride, dst_argb, dst->argb_stride, rect->height_)) {
+            --rect->width_; // Redundant column.
+        } else {
+            break;
+        }
+    }
+    if (rect->width_ == 0) goto NoChange;
+
+    // Top boundary.
+    for (j = rect->y_offset_; j < rect->y_offset_ + rect->height_; ++j) {
+        const uint32_t* const src_argb = &src->argb[j * src->argb_stride + rect->x_offset_];
+        const uint32_t* const dst_argb = &dst->argb[j * dst->argb_stride + rect->x_offset_];
+        if (ComparePixels(src_argb, 1, dst_argb, 1, rect->width_)) {
+            --rect->height_; // Redundant row.
+            ++rect->y_offset_;
+        } else {
+            break;
+        }
+    }
+    if (rect->height_ == 0) goto NoChange;
+
+    // Bottom boundary.
+    for (j = rect->y_offset_ + rect->height_ - 1; j >= rect->y_offset_; --j) {
+        const uint32_t* const src_argb = &src->argb[j * src->argb_stride + rect->x_offset_];
+        const uint32_t* const dst_argb = &dst->argb[j * dst->argb_stride + rect->x_offset_];
+        if (ComparePixels(src_argb, 1, dst_argb, 1, rect->width_)) {
+            --rect->height_; // Redundant row.
+        } else {
+            break;
+        }
+    }
+    if (rect->height_ == 0) goto NoChange;
+
+    if (IsEmptyRect(rect)) {
+    NoChange:
+        rect->x_offset_ = 0;
+        rect->y_offset_ = 0;
+        rect->width_ = 0;
+        rect->height_ = 0;
+    }
+}
+
+// Snap rectangle to even offsets (and adjust dimensions if needed).
+static WEBP_INLINE void SnapToEvenOffsets(FrameRect* const rect) {
+    rect->width_ += (rect->x_offset_ & 1);
+    rect->height_ += (rect->y_offset_ & 1);
+    rect->x_offset_ &= ~1;
+    rect->y_offset_ &= ~1;
+}
+
+// Given previous and current canvas, picks the optimal rectangle for the
+// current frame. The initial guess for 'rect' will be the full canvas.
+static int GetSubRect(const WebPPicture* const prev_canvas,
+                      const WebPPicture* const curr_canvas,
+                      int is_key_frame,
+                      int is_first_frame,
+                      int empty_rect_allowed,
+                      FrameRect* const rect,
+                      WebPPicture* const sub_frame) {
+    rect->x_offset_ = 0;
+    rect->y_offset_ = 0;
+    rect->width_ = curr_canvas->width;
+    rect->height_ = curr_canvas->height;
+    if (!is_key_frame || is_first_frame) { // Optimize frame rectangle.
+        // Note: This behaves as expected for first frame, as 'prev_canvas' is
+        // initialized to a fully transparent canvas in the beginning.
+        MinimizeChangeRectangle(prev_canvas, curr_canvas, rect);
+    }
+
+    if (IsEmptyRect(rect)) {
+        if (empty_rect_allowed) { // No need to get 'sub_frame'.
+            return 1;
+        } else { // Force a 1x1 rectangle.
+            rect->width_ = 1;
+            rect->height_ = 1;
+            assert(rect->x_offset_ == 0);
+            assert(rect->y_offset_ == 0);
+        }
+    }
+
+    SnapToEvenOffsets(rect);
+    return WebPPictureView(curr_canvas, rect->x_offset_, rect->y_offset_, rect->width_, rect->height_, sub_frame);
+}
+
+static void DisposeFrameRectangle(int dispose_method, const FrameRect* const rect, WebPPicture* const curr_canvas) {
+    assert(rect != NULL);
+    if (dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
+        WebPUtilClearPic(curr_canvas, rect);
+    }
+}
+
+static uint32_t RectArea(const FrameRect* const rect) {
+    return (uint32_t)rect->width_ * rect->height_;
+}
+
+static int IsBlendingPossible(const WebPPicture* const src, const WebPPicture* const dst, const FrameRect* const rect) {
+    int i, j;
+    assert(src->width == dst->width && src->height == dst->height);
+    assert(rect->x_offset_ + rect->width_ <= dst->width);
+    assert(rect->y_offset_ + rect->height_ <= dst->height);
+    for (j = rect->y_offset_; j < rect->y_offset_ + rect->height_; ++j) {
+        for (i = rect->x_offset_; i < rect->x_offset_ + rect->width_; ++i) {
+            const uint32_t src_pixel = src->argb[j * src->argb_stride + i];
+            const uint32_t dst_pixel = dst->argb[j * dst->argb_stride + i];
+            const uint32_t dst_alpha = dst_pixel >> 24;
+            if (dst_alpha != 0xff && src_pixel != dst_pixel) {
+                // In this case, if we use blending, we can't attain the desired
+                // 'dst_pixel' value for this pixel. So, blending is not possible.
+                return 0;
+            }
+        }
+    }
+    return 1;
+}
+
+#define MIN_COLORS_LOSSY 31     // Don't try lossy below this threshold.
+#define MAX_COLORS_LOSSLESS 194 // Don't try lossless above this threshold.
+#define MAX_COLOR_COUNT 256     // Power of 2 greater than MAX_COLORS_LOSSLESS.
+#define HASH_SIZE (MAX_COLOR_COUNT * 4)
+#define HASH_RIGHT_SHIFT 22 // 32 - log2(HASH_SIZE).
+
+// TODO(urvang): Also used in enc/vp8l.c. Move to utils.
+// If the number of colors in the 'pic' is at least MAX_COLOR_COUNT, return
+// MAX_COLOR_COUNT. Otherwise, return the exact number of colors in the 'pic'.
+static int GetColorCount(const WebPPicture* const pic) {
+    int x, y;
+    int num_colors = 0;
+    uint8_t in_use[HASH_SIZE] = {0};
+    uint32_t colors[HASH_SIZE];
+    static const uint32_t kHashMul = 0x1e35a7bd;
+    const uint32_t* argb = pic->argb;
+    const int width = pic->width;
+    const int height = pic->height;
+    uint32_t last_pix = ~argb[0]; // so we're sure that last_pix != argb[0]
+
+    for (y = 0; y < height; ++y) {
+        for (x = 0; x < width; ++x) {
+            int key;
+            if (argb[x] == last_pix) {
+                continue;
+            }
+            last_pix = argb[x];
+            key = (kHashMul * last_pix) >> HASH_RIGHT_SHIFT;
+            while (1) {
+                if (!in_use[key]) {
+                    colors[key] = last_pix;
+                    in_use[key] = 1;
+                    ++num_colors;
+                    if (num_colors >= MAX_COLOR_COUNT) {
+                        return MAX_COLOR_COUNT; // Exact count not needed.
+                    }
+                    break;
+                } else if (colors[key] == last_pix) {
+                    break; // The color is already there.
+                } else {
+                    // Some other color sits here, so do linear conflict resolution.
+                    ++key;
+                    key &= (HASH_SIZE - 1); // Key mask.
+                }
+            }
+        }
+        argb += pic->argb_stride;
+    }
+    return num_colors;
+}
+
+#undef MAX_COLOR_COUNT
+#undef HASH_SIZE
+#undef HASH_RIGHT_SHIFT
+
+// For pixels in 'rect', replace those pixels in 'dst' that are same as 'src' by
+// transparent pixels.
+static void IncreaseTransparency(const WebPPicture* const src, const FrameRect* const rect, WebPPicture* const dst) {
+    int i, j;
+    assert(src != NULL && dst != NULL && rect != NULL);
+    assert(src->width == dst->width && src->height == dst->height);
+    for (j = rect->y_offset_; j < rect->y_offset_ + rect->height_; ++j) {
+        const uint32_t* const psrc = src->argb + j * src->argb_stride;
+        uint32_t* const pdst = dst->argb + j * dst->argb_stride;
+        for (i = rect->x_offset_; i < rect->x_offset_ + rect->width_; ++i) {
+            if (psrc[i] == pdst[i]) {
+                pdst[i] = TRANSPARENT_COLOR;
+            }
+        }
+    }
+}
+
+#undef TRANSPARENT_COLOR
+
+// Replace similar blocks of pixels by a 'see-through' transparent block
+// with uniform average color.
+static void FlattenSimilarBlocks(const WebPPicture* const src, const FrameRect* const rect, WebPPicture* const dst) {
+    int i, j;
+    const int block_size = 8;
+    const int y_start = (rect->y_offset_ + block_size) & ~(block_size - 1);
+    const int y_end = (rect->y_offset_ + rect->height_) & ~(block_size - 1);
+    const int x_start = (rect->x_offset_ + block_size) & ~(block_size - 1);
+    const int x_end = (rect->x_offset_ + rect->width_) & ~(block_size - 1);
+    assert(src != NULL && dst != NULL && rect != NULL);
+    assert(src->width == dst->width && src->height == dst->height);
+    assert((block_size & (block_size - 1)) == 0); // must be a power of 2
+    // Iterate over each block and count similar pixels.
+    for (j = y_start; j < y_end; j += block_size) {
+        for (i = x_start; i < x_end; i += block_size) {
+            int cnt = 0;
+            int avg_r = 0, avg_g = 0, avg_b = 0;
+            int x, y;
+            const uint32_t* const psrc = src->argb + j * src->argb_stride + i;
+            uint32_t* const pdst = dst->argb + j * dst->argb_stride + i;
+            for (y = 0; y < block_size; ++y) {
+                for (x = 0; x < block_size; ++x) {
+                    const uint32_t src_pixel = psrc[x + y * src->argb_stride];
+                    const int alpha = src_pixel >> 24;
+                    if (alpha == 0xff && src_pixel == pdst[x + y * dst->argb_stride]) {
+                        ++cnt;
+                        avg_r += (src_pixel >> 16) & 0xff;
+                        avg_g += (src_pixel >> 8) & 0xff;
+                        avg_b += (src_pixel >> 0) & 0xff;
+                    }
+                }
+            }
+            // If we have a fully similar block, we replace it with an
+            // average transparent block. This compresses better in lossy mode.
+            if (cnt == block_size * block_size) {
+                const uint32_t color =
+                    (0x00 << 24) | ((avg_r / cnt) << 16) | ((avg_g / cnt) << 8) | ((avg_b / cnt) << 0);
+                for (y = 0; y < block_size; ++y) {
+                    for (x = 0; x < block_size; ++x) {
+                        pdst[x + y * dst->argb_stride] = color;
+                    }
+                }
+            }
+        }
+    }
+}
+
+static int EncodeFrame(const WebPConfig* const config, WebPPicture* const pic, WebPMemoryWriter* const memory) {
+    pic->use_argb = 1;
+    pic->writer = WebPMemoryWrite;
+    pic->custom_ptr = memory;
+    /* if (!WebPEncode(config, pic)) { */
+    /*   return 0; */
+    /* } */
+    return 1;
+}
+
+// Struct representing a candidate encoded frame including its metadata.
+typedef struct {
+    WebPMemoryWriter mem_;
+    WebPMuxFrameInfo info_;
+    FrameRect rect_;
+    int evaluate_; // True if this candidate should be evaluated.
+} Candidate;
+
+// Generates a candidate encoded frame given a picture and metadata.
+static WebPEncodingError EncodeCandidate(WebPPicture* const sub_frame,
+                                         const FrameRect* const rect,
+                                         const WebPConfig* const config,
+                                         int use_blending,
+                                         Candidate* const candidate) {
+    WebPEncodingError error_code = VP8_ENC_OK;
+    assert(candidate != NULL);
+    memset(candidate, 0, sizeof(*candidate));
+
+    // Set frame rect and info.
+    candidate->rect_ = *rect;
+    candidate->info_.id = WEBP_CHUNK_ANMF;
+    candidate->info_.x_offset = rect->x_offset_;
+    candidate->info_.y_offset = rect->y_offset_;
+    candidate->info_.dispose_method = WEBP_MUX_DISPOSE_NONE; // Set later.
+    candidate->info_.blend_method = use_blending ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
+    candidate->info_.duration = 0; // Set in next call to WebPAnimEncoderAdd().
+
+    // Encode picture.
+    WebPMemoryWriterInit(&candidate->mem_);
+
+    if (!EncodeFrame(config, sub_frame, &candidate->mem_)) {
+        error_code = sub_frame->error_code;
+        goto Err;
+    }
+
+    candidate->evaluate_ = 1;
+    return error_code;
+
+Err:
+    WebPMemoryWriterClear(&candidate->mem_);
+    return error_code;
+}
+
+static void CopyCurrentCanvas(WebPAnimEncoder* const enc) {
+    if (enc->curr_canvas_copy_modified_) {
+        WebPCopyPixels(enc->curr_canvas_, &enc->curr_canvas_copy_);
+        enc->curr_canvas_copy_modified_ = 0;
+    }
+}
+
+enum { LL_DISP_NONE = 0, LL_DISP_BG, LOSSY_DISP_NONE, LOSSY_DISP_BG, CANDIDATE_COUNT };
+
+// Generates candidates for a given dispose method given pre-filled 'rect'
+// and 'sub_frame'.
+static WebPEncodingError GenerateCandidates(WebPAnimEncoder* const enc,
+                                            Candidate candidates[CANDIDATE_COUNT],
+                                            WebPMuxAnimDispose dispose_method,
+                                            int is_lossless,
+                                            int is_key_frame,
+                                            const FrameRect* const rect,
+                                            WebPPicture* sub_frame,
+                                            const WebPConfig* const config_ll,
+                                            const WebPConfig* const config_lossy) {
+    WebPEncodingError error_code = VP8_ENC_OK;
+    const int is_dispose_none = (dispose_method == WEBP_MUX_DISPOSE_NONE);
+    Candidate* const candidate_ll = is_dispose_none ? &candidates[LL_DISP_NONE] : &candidates[LL_DISP_BG];
+    Candidate* const candidate_lossy = is_dispose_none ? &candidates[LOSSY_DISP_NONE] : &candidates[LOSSY_DISP_BG];
+    WebPPicture* const curr_canvas = &enc->curr_canvas_copy_;
+    const WebPPicture* const prev_canvas = is_dispose_none ? &enc->prev_canvas_ : &enc->prev_canvas_disposed_;
+    const int use_blending = !is_key_frame && IsBlendingPossible(prev_canvas, curr_canvas, rect);
+
+    // Pick candidates to be tried.
+    if (!enc->options_.allow_mixed) {
+        candidate_ll->evaluate_ = is_lossless;
+        candidate_lossy->evaluate_ = !is_lossless;
+    } else { // Use a heuristic for trying lossless and/or lossy compression.
+        const int num_colors = GetColorCount(sub_frame);
+        candidate_ll->evaluate_ = (num_colors < MAX_COLORS_LOSSLESS);
+        candidate_lossy->evaluate_ = (num_colors >= MIN_COLORS_LOSSY);
+    }
+
+    // Generate candidates.
+    if (candidate_ll->evaluate_) {
+        CopyCurrentCanvas(enc);
+        if (use_blending) {
+            IncreaseTransparency(prev_canvas, rect, curr_canvas);
+            enc->curr_canvas_copy_modified_ = 1;
+        }
+        error_code = EncodeCandidate(sub_frame, rect, config_ll, use_blending, candidate_ll);
+        if (error_code != VP8_ENC_OK) return error_code;
+    }
+    if (candidate_lossy->evaluate_) {
+        CopyCurrentCanvas(enc);
+        if (use_blending) {
+            FlattenSimilarBlocks(prev_canvas, rect, curr_canvas);
+            enc->curr_canvas_copy_modified_ = 1;
+        }
+        error_code = EncodeCandidate(sub_frame, rect, config_lossy, use_blending, candidate_lossy);
+        if (error_code != VP8_ENC_OK) return error_code;
+    }
+    return error_code;
+}
+
+#undef MIN_COLORS_LOSSY
+#undef MAX_COLORS_LOSSLESS
+
+static void GetEncodedData(const WebPMemoryWriter* const memory, WebPData* const encoded_data) {
+    encoded_data->bytes = memory->mem;
+    encoded_data->size = memory->size;
+}
+
+// Sets dispose method of the previous frame to be 'dispose_method'.
+static void SetPreviousDisposeMethod(WebPAnimEncoder* const enc, WebPMuxAnimDispose dispose_method) {
+    const size_t position = enc->count_ - 2;
+    EncodedFrame* const prev_enc_frame = GetFrame(enc, position);
+    assert(enc->count_ >= 2); // As current and previous frames are in enc.
+
+    if (enc->prev_candidate_undecided_) {
+        assert(dispose_method == WEBP_MUX_DISPOSE_NONE);
+        prev_enc_frame->sub_frame_.dispose_method = dispose_method;
+        prev_enc_frame->key_frame_.dispose_method = dispose_method;
+    } else {
+        WebPMuxFrameInfo* const prev_info =
+            prev_enc_frame->is_key_frame_ ? &prev_enc_frame->key_frame_ : &prev_enc_frame->sub_frame_;
+        prev_info->dispose_method = dispose_method;
+    }
+}
+
+static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) {
+    const size_t position = enc->count_ - 1;
+    EncodedFrame* const prev_enc_frame = GetFrame(enc, position);
+    int new_duration;
+
+    assert(enc->count_ >= 1);
+    assert(prev_enc_frame->sub_frame_.duration == prev_enc_frame->key_frame_.duration);
+    assert(prev_enc_frame->sub_frame_.duration == (prev_enc_frame->sub_frame_.duration & (MAX_DURATION - 1)));
+    assert(duration == (duration & (MAX_DURATION - 1)));
+
+    new_duration = prev_enc_frame->sub_frame_.duration + duration;
+    if (new_duration >= MAX_DURATION) { // Special case.
+        // Separate out previous frame from earlier merged frames to avoid overflow.
+        // We add a 1x1 transparent frame for the previous frame, with blending on.
+        const FrameRect rect = {0, 0, 1, 1};
+        const uint8_t lossless_1x1_bytes[] = {0x52, 0x49, 0x46, 0x46, 0x14, 0x00, 0x00, 0x00, 0x57, 0x45,
+                                              0x42, 0x50, 0x56, 0x50, 0x38, 0x4c, 0x08, 0x00, 0x00, 0x00,
+                                              0x2f, 0x00, 0x00, 0x00, 0x10, 0x88, 0x88, 0x08};
+        const WebPData lossless_1x1 = {lossless_1x1_bytes, sizeof(lossless_1x1_bytes)};
+        const uint8_t lossy_1x1_bytes[] = {
+            0x52, 0x49, 0x46, 0x46, 0x40, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50, 0x56, 0x50, 0x38, 0x58, 0x0a, 0x00,
+            0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x4c, 0x50, 0x48, 0x02, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x56, 0x50, 0x38, 0x20, 0x18, 0x00, 0x00, 0x00, 0x30, 0x01, 0x00, 0x9d, 0x01, 0x2a,
+            0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x34, 0x25, 0xa4, 0x00, 0x03, 0x70, 0x00, 0xfe, 0xfb, 0xfd, 0x50, 0x00};
+        const WebPData lossy_1x1 = {lossy_1x1_bytes, sizeof(lossy_1x1_bytes)};
+        const int can_use_lossless = (enc->last_config_.lossless || enc->options_.allow_mixed);
+        EncodedFrame* const curr_enc_frame = GetFrame(enc, enc->count_);
+        curr_enc_frame->is_key_frame_ = 0;
+        curr_enc_frame->sub_frame_.id = WEBP_CHUNK_ANMF;
+        curr_enc_frame->sub_frame_.x_offset = 0;
+        curr_enc_frame->sub_frame_.y_offset = 0;
+        curr_enc_frame->sub_frame_.dispose_method = WEBP_MUX_DISPOSE_NONE;
+        curr_enc_frame->sub_frame_.blend_method = WEBP_MUX_BLEND;
+        curr_enc_frame->sub_frame_.duration = duration;
+        if (!WebPDataCopy(can_use_lossless ? &lossless_1x1 : &lossy_1x1, &curr_enc_frame->sub_frame_.bitstream)) {
+            return 0;
+        }
+        ++enc->count_;
+        ++enc->count_since_key_frame_;
+        enc->flush_count_ = enc->count_ - 1;
+        enc->prev_candidate_undecided_ = 0;
+        enc->prev_rect_ = rect;
+    } else { // Regular case.
+        // Increase duration of the previous frame by 'duration'.
+        prev_enc_frame->sub_frame_.duration = new_duration;
+        prev_enc_frame->key_frame_.duration = new_duration;
+    }
+    return 1;
+}
+
+// Pick the candidate encoded frame with smallest size and release other
+// candidates.
+// TODO(later): Perhaps a rough SSIM/PSNR produced by the encoder should
+// also be a criteria, in addition to sizes.
+static void PickBestCandidate(WebPAnimEncoder* const enc,
+                              Candidate* const candidates,
+                              int is_key_frame,
+                              EncodedFrame* const encoded_frame) {
+    int i;
+    int best_idx = -1;
+    size_t best_size = ~0;
+    for (i = 0; i < CANDIDATE_COUNT; ++i) {
+        if (candidates[i].evaluate_) {
+            const size_t candidate_size = candidates[i].mem_.size;
+            if (candidate_size < best_size) {
+                best_idx = i;
+                best_size = candidate_size;
+            }
+        }
+    }
+    assert(best_idx != -1);
+    for (i = 0; i < CANDIDATE_COUNT; ++i) {
+        if (candidates[i].evaluate_) {
+            if (i == best_idx) {
+                WebPMuxFrameInfo* const dst = is_key_frame ? &encoded_frame->key_frame_ : &encoded_frame->sub_frame_;
+                *dst = candidates[i].info_;
+                GetEncodedData(&candidates[i].mem_, &dst->bitstream);
+                if (!is_key_frame) {
+                    // Note: Previous dispose method only matters for non-keyframes.
+                    // Also, we don't want to modify previous dispose method that was
+                    // selected when a non key-frame was assumed.
+                    const WebPMuxAnimDispose prev_dispose_method =
+                        (best_idx == LL_DISP_NONE || best_idx == LOSSY_DISP_NONE) ? WEBP_MUX_DISPOSE_NONE
+                                                                                  : WEBP_MUX_DISPOSE_BACKGROUND;
+                    SetPreviousDisposeMethod(enc, prev_dispose_method);
+                }
+                enc->prev_rect_ = candidates[i].rect_; // save for next frame.
+            } else {
+                WebPMemoryWriterClear(&candidates[i].mem_);
+                candidates[i].evaluate_ = 0;
+            }
+        }
+    }
+}
+
+// Depending on the configuration, tries different compressions
+// (lossy/lossless), dispose methods, blending methods etc to encode the current
+// frame and outputs the best one in 'encoded_frame'.
+// 'frame_skipped' will be set to true if this frame should actually be skipped.
+static WebPEncodingError SetFrame(WebPAnimEncoder* const enc,
+                                  const WebPConfig* const config,
+                                  int is_key_frame,
+                                  EncodedFrame* const encoded_frame,
+                                  int* const frame_skipped) {
+    int i;
+    WebPEncodingError error_code = VP8_ENC_OK;
+    const WebPPicture* const curr_canvas = &enc->curr_canvas_copy_;
+    const WebPPicture* const prev_canvas = &enc->prev_canvas_;
+    Candidate candidates[CANDIDATE_COUNT];
+    const int is_lossless = config->lossless;
+    const int is_first_frame = enc->is_first_frame_;
+
+    int try_dispose_none = 1; // Default.
+    FrameRect rect_none;
+    WebPPicture sub_frame_none;
+    // First frame cannot be skipped as there is no 'previous frame' to merge it
+    // to. So, empty rectangle is not allowed for the first frame.
+    const int empty_rect_allowed_none = !is_first_frame;
+
+    // If current frame is a key-frame, dispose method of previous frame doesn't
+    // matter, so we don't try dispose to background.
+    // Also, if key-frame insertion is on, and previous frame could be picked as
+    // either a sub-frame or a key-frame, then we can't be sure about what frame
+    // rectangle would be disposed. In that case too, we don't try dispose to
+    // background.
+    const int dispose_bg_possible = !is_key_frame && !enc->prev_candidate_undecided_;
+    int try_dispose_bg = 0; // Default.
+    FrameRect rect_bg;
+    WebPPicture sub_frame_bg;
+
+    WebPConfig config_ll = *config;
+    WebPConfig config_lossy = *config;
+    config_ll.lossless = 1;
+    config_lossy.lossless = 0;
+    enc->last_config_ = *config;
+    enc->last_config2_ = config->lossless ? config_lossy : config_ll;
+    *frame_skipped = 0;
+
+    if (!WebPPictureInit(&sub_frame_none) || !WebPPictureInit(&sub_frame_bg)) {
+        return VP8_ENC_ERROR_INVALID_CONFIGURATION;
+    }
+
+    for (i = 0; i < CANDIDATE_COUNT; ++i) {
+        candidates[i].evaluate_ = 0;
+    }
+
+    // Change-rectangle assuming previous frame was DISPOSE_NONE.
+    GetSubRect(prev_canvas, curr_canvas, is_key_frame, is_first_frame, empty_rect_allowed_none, &rect_none,
+               &sub_frame_none);
+
+    if (IsEmptyRect(&rect_none)) {
+        // Don't encode the frame at all. Instead, the duration of the previous
+        // frame will be increased later.
+        assert(empty_rect_allowed_none);
+        *frame_skipped = 1;
+        goto End;
+    }
+
+    if (dispose_bg_possible) {
+        // Change-rectangle assuming previous frame was DISPOSE_BACKGROUND.
+        WebPPicture* const prev_canvas_disposed = &enc->prev_canvas_disposed_;
+        WebPCopyPixels(prev_canvas, prev_canvas_disposed);
+        DisposeFrameRectangle(WEBP_MUX_DISPOSE_BACKGROUND, &enc->prev_rect_, prev_canvas_disposed);
+        // Even if there is exact pixel match between 'disposed previous canvas' and
+        // 'current canvas', we can't skip current frame, as there may not be exact
+        // pixel match between 'previous canvas' and 'current canvas'. So, we don't
+        // allow empty rectangle in this case.
+        GetSubRect(prev_canvas_disposed, curr_canvas, is_key_frame, is_first_frame, 0 /* empty_rect_allowed */,
+                   &rect_bg, &sub_frame_bg);
+        assert(!IsEmptyRect(&rect_bg));
+
+        if (enc->options_.minimize_size) { // Try both dispose methods.
+            try_dispose_bg = 1;
+            try_dispose_none = 1;
+        } else if (RectArea(&rect_bg) < RectArea(&rect_none)) {
+            try_dispose_bg = 1; // Pick DISPOSE_BACKGROUND.
+            try_dispose_none = 0;
+        }
+    }
+
+    if (try_dispose_none) {
+        error_code = GenerateCandidates(enc, candidates, WEBP_MUX_DISPOSE_NONE, is_lossless, is_key_frame, &rect_none,
+                                        &sub_frame_none, &config_ll, &config_lossy);
+        if (error_code != VP8_ENC_OK) goto Err;
+    }
+
+    if (try_dispose_bg) {
+        assert(!enc->is_first_frame_);
+        assert(dispose_bg_possible);
+        error_code = GenerateCandidates(enc, candidates, WEBP_MUX_DISPOSE_BACKGROUND, is_lossless, is_key_frame,
+                                        &rect_bg, &sub_frame_bg, &config_ll, &config_lossy);
+        if (error_code != VP8_ENC_OK) goto Err;
+    }
+
+    PickBestCandidate(enc, candidates, is_key_frame, encoded_frame);
+
+    goto End;
+
+Err:
+    for (i = 0; i < CANDIDATE_COUNT; ++i) {
+        if (candidates[i].evaluate_) {
+            WebPMemoryWriterClear(&candidates[i].mem_);
+        }
+    }
+
+End:
+    WebPPictureFree(&sub_frame_none);
+    WebPPictureFree(&sub_frame_bg);
+    return error_code;
+}
+
+// Calculate the penalty incurred if we encode given frame as a key frame
+// instead of a sub-frame.
+static int64_t KeyFramePenalty(const EncodedFrame* const encoded_frame) {
+    return ((int64_t)encoded_frame->key_frame_.bitstream.size - encoded_frame->sub_frame_.bitstream.size);
+}
+
+static int CacheFrame(WebPAnimEncoder* const enc, const WebPConfig* const config) {
+    int ok = 0;
+    int frame_skipped = 0;
+    WebPEncodingError error_code = VP8_ENC_OK;
+    const size_t position = enc->count_;
+    EncodedFrame* const encoded_frame = GetFrame(enc, position);
+
+    ++enc->count_;
+
+    if (enc->is_first_frame_) { // Add this as a key-frame.
+        error_code = SetFrame(enc, config, 1, encoded_frame, &frame_skipped);
+        if (error_code != VP8_ENC_OK) goto End;
+        assert(frame_skipped == 0); // First frame can't be skipped, even if empty.
+        assert(position == 0 && enc->count_ == 1);
+        encoded_frame->is_key_frame_ = 1;
+        enc->flush_count_ = 0;
+        enc->count_since_key_frame_ = 0;
+        enc->prev_candidate_undecided_ = 0;
+    } else {
+        ++enc->count_since_key_frame_;
+        if (enc->count_since_key_frame_ <= enc->options_.kmin) {
+            // Add this as a frame rectangle.
+            error_code = SetFrame(enc, config, 0, encoded_frame, &frame_skipped);
+            if (error_code != VP8_ENC_OK) goto End;
+            if (frame_skipped) goto Skip;
+            encoded_frame->is_key_frame_ = 0;
+            enc->flush_count_ = enc->count_ - 1;
+            enc->prev_candidate_undecided_ = 0;
+        } else {
+            int64_t curr_delta;
+
+            // Add this as a frame rectangle to enc.
+            error_code = SetFrame(enc, config, 0, encoded_frame, &frame_skipped);
+            if (error_code != VP8_ENC_OK) goto End;
+            if (frame_skipped) goto Skip;
+
+            // Add this as a key-frame to enc, too.
+            error_code = SetFrame(enc, config, 1, encoded_frame, &frame_skipped);
+            if (error_code != VP8_ENC_OK) goto End;
+            assert(frame_skipped == 0); // Key-frame cannot be an empty rectangle.
+
+            // Analyze size difference of the two variants.
+            curr_delta = KeyFramePenalty(encoded_frame);
+            if (curr_delta <= enc->best_delta_) { // Pick this as the key-frame.
+                if (enc->keyframe_ != KEYFRAME_NONE) {
+                    EncodedFrame* const old_keyframe = GetFrame(enc, enc->keyframe_);
+                    assert(old_keyframe->is_key_frame_);
+                    old_keyframe->is_key_frame_ = 0;
+                }
+                encoded_frame->is_key_frame_ = 1;
+                enc->keyframe_ = (int)position;
+                enc->best_delta_ = curr_delta;
+                enc->flush_count_ = enc->count_ - 1; // We can flush previous frames.
+            } else {
+                encoded_frame->is_key_frame_ = 0;
+            }
+            // Note: We need '>=' below because when kmin and kmax are both zero,
+            // count_since_key_frame will always be > kmax.
+            if (enc->count_since_key_frame_ >= enc->options_.kmax) {
+                enc->flush_count_ = enc->count_ - 1;
+                enc->count_since_key_frame_ = 0;
+                enc->keyframe_ = KEYFRAME_NONE;
+                enc->best_delta_ = DELTA_INFINITY;
+            }
+            enc->prev_candidate_undecided_ = 1;
+        }
+    }
+
+    // Update previous to previous and previous canvases for next call.
+    WebPCopyPixels(enc->curr_canvas_, &enc->prev_canvas_);
+    enc->is_first_frame_ = 0;
+
+Skip:
+    ok = 1;
+    ++enc->in_frame_count_;
+
+End:
+    if (!ok || frame_skipped) {
+        FrameRelease(encoded_frame);
+        // We reset some counters, as the frame addition failed/was skipped.
+        --enc->count_;
+        if (!enc->is_first_frame_) --enc->count_since_key_frame_;
+        if (!ok) {
+            MarkError2(enc, "ERROR adding frame. WebPEncodingError", error_code);
+        }
+    }
+    enc->curr_canvas_->error_code = error_code; // report error_code
+    assert(ok || error_code != VP8_ENC_OK);
+    return ok;
+}
+
+static int FlushFrames(WebPAnimEncoder* const enc) {
+    while (enc->flush_count_ > 0) {
+        WebPMuxError err;
+        EncodedFrame* const curr = GetFrame(enc, 0);
+        const WebPMuxFrameInfo* const info = curr->is_key_frame_ ? &curr->key_frame_ : &curr->sub_frame_;
+        assert(enc->mux_ != NULL);
+        err = WebPMuxPushFrame(enc->mux_, info, 1);
+        if (err != WEBP_MUX_OK) {
+            MarkError2(enc, "ERROR adding frame. WebPMuxError", err);
+            return 0;
+        }
+        if (enc->options_.verbose) {
+            fprintf(stderr, "INFO: Added frame. offset:%d,%d dispose:%d blend:%d\n", info->x_offset, info->y_offset,
+                    info->dispose_method, info->blend_method);
+        }
+        ++enc->out_frame_count_;
+        FrameRelease(curr);
+        ++enc->start_;
+        --enc->flush_count_;
+        --enc->count_;
+        if (enc->keyframe_ != KEYFRAME_NONE) --enc->keyframe_;
+    }
+
+    if (enc->count_ == 1 && enc->start_ != 0) {
+        // Move enc->start to index 0.
+        const int enc_start_tmp = (int)enc->start_;
+        EncodedFrame temp = enc->encoded_frames_[0];
+        enc->encoded_frames_[0] = enc->encoded_frames_[enc_start_tmp];
+        enc->encoded_frames_[enc_start_tmp] = temp;
+        FrameRelease(&enc->encoded_frames_[enc_start_tmp]);
+        enc->start_ = 0;
+    }
+    return 1;
+}
+
+#undef DELTA_INFINITY
+#undef KEYFRAME_NONE
+
+int WebPAnimEncoderAdd(WebPAnimEncoder* enc, WebPPicture* frame, int timestamp, const WebPConfig* encoder_config) {
+    WebPConfig config;
+
+    if (enc == NULL) {
+        return 0;
+    }
+    MarkNoError(enc);
+
+    if (!enc->is_first_frame_) {
+        // Make sure timestamps are non-decreasing (integer wrap-around is OK).
+        const uint32_t prev_frame_duration = (uint32_t)timestamp - enc->prev_timestamp_;
+        if (prev_frame_duration >= MAX_DURATION) {
+            if (frame != NULL) {
+                frame->error_code = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+            }
+            MarkError(enc, "ERROR adding frame: timestamps must be non-decreasing");
+            return 0;
+        }
+        if (!IncreasePreviousDuration(enc, (int)prev_frame_duration)) {
+            return 0;
+        }
+    } else {
+        enc->first_timestamp_ = timestamp;
+    }
+
+    if (frame == NULL) { // Special: last call.
+        enc->got_null_frame_ = 1;
+        enc->prev_timestamp_ = timestamp;
+        return 1;
+    }
+
+    if (frame->width != enc->canvas_width_ || frame->height != enc->canvas_height_) {
+        frame->error_code = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+        MarkError(enc, "ERROR adding frame: Invalid frame dimensions");
+        return 0;
+    }
+
+    if (!frame->use_argb) { // Convert frame from YUV(A) to ARGB.
+        if (enc->options_.verbose) {
+            fprintf(stderr,
+                    "WARNING: Converting frame from YUV(A) to ARGB format; "
+                    "this incurs a small loss.\n");
+        }
+        if (!WebPPictureYUVAToARGB(frame)) {
+            MarkError(enc, "ERROR converting frame from YUV(A) to ARGB");
+            return 0;
+        }
+    }
+
+    if (encoder_config != NULL) {
+        config = *encoder_config;
+    } else {
+        WebPConfigInit(&config);
+        config.lossless = 1;
+    }
+    assert(enc->curr_canvas_ == NULL);
+    enc->curr_canvas_ = frame; // Store reference.
+    assert(enc->curr_canvas_copy_modified_ == 1);
+    CopyCurrentCanvas(enc);
+
+    if (!CacheFrame(enc, &config)) {
+        return 0;
+    }
+
+    if (!FlushFrames(enc)) {
+        return 0;
+    }
+    enc->curr_canvas_ = NULL;
+    enc->curr_canvas_copy_modified_ = 1;
+    enc->prev_timestamp_ = timestamp;
+    return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Bitstream assembly.
+
+static int DecodeFrameOntoCanvas(const WebPMuxFrameInfo* const frame, WebPPicture* const canvas) {
+    const WebPData* const image = &frame->bitstream;
+    WebPPicture sub_image;
+    WebPDecoderConfig config;
+    WebPInitDecoderConfig(&config);
+    WebPUtilClearPic(canvas, NULL);
+    if (WebPGetFeatures(image->bytes, image->size, &config.input) != VP8_STATUS_OK) {
+        return 0;
+    }
+    if (!WebPPictureView(canvas, frame->x_offset, frame->y_offset, config.input.width, config.input.height,
+                         &sub_image)) {
+        return 0;
+    }
+    config.output.is_external_memory = 1;
+    config.output.colorspace = MODE_BGRA;
+    config.output.u.RGBA.rgba = (uint8_t*)sub_image.argb;
+    config.output.u.RGBA.stride = sub_image.argb_stride * 4;
+    config.output.u.RGBA.size = config.output.u.RGBA.stride * sub_image.height;
+
+    if (WebPDecode(image->bytes, image->size, &config) != VP8_STATUS_OK) {
+        return 0;
+    }
+    return 1;
+}
+
+static int FrameToFullCanvas(WebPAnimEncoder* const enc,
+                             const WebPMuxFrameInfo* const frame,
+                             WebPData* const full_image) {
+    WebPPicture* const canvas_buf = &enc->curr_canvas_copy_;
+    WebPMemoryWriter mem1, mem2;
+    WebPMemoryWriterInit(&mem1);
+    WebPMemoryWriterInit(&mem2);
+
+    if (!DecodeFrameOntoCanvas(frame, canvas_buf)) goto Err;
+    if (!EncodeFrame(&enc->last_config_, canvas_buf, &mem1)) goto Err;
+    GetEncodedData(&mem1, full_image);
+
+    if (enc->options_.allow_mixed) {
+        if (!EncodeFrame(&enc->last_config_, canvas_buf, &mem2)) goto Err;
+        if (mem2.size < mem1.size) {
+            GetEncodedData(&mem2, full_image);
+            WebPMemoryWriterClear(&mem1);
+        } else {
+            WebPMemoryWriterClear(&mem2);
+        }
+    }
+    return 1;
+
+Err:
+    WebPMemoryWriterClear(&mem1);
+    WebPMemoryWriterClear(&mem2);
+    return 0;
+}
+
+// Convert a single-frame animation to a non-animated image if appropriate.
+// TODO(urvang): Can we pick one of the two heuristically (based on frame
+// rectangle and/or presence of alpha)?
+static WebPMuxError OptimizeSingleFrame(WebPAnimEncoder* const enc, WebPData* const webp_data) {
+    WebPMuxError err = WEBP_MUX_OK;
+    int canvas_width, canvas_height;
+    WebPMuxFrameInfo frame;
+    WebPData full_image;
+    WebPData webp_data2;
+    WebPMux* const mux = WebPMuxCreate(webp_data, 0);
+    if (mux == NULL) return WEBP_MUX_BAD_DATA;
+    assert(enc->out_frame_count_ == 1);
+    WebPDataInit(&frame.bitstream);
+    WebPDataInit(&full_image);
+    WebPDataInit(&webp_data2);
+
+    err = WebPMuxGetFrame(mux, 1, &frame);
+    if (err != WEBP_MUX_OK) goto End;
+    if (frame.id != WEBP_CHUNK_ANMF) goto End; // Non-animation: nothing to do.
+    err = WebPMuxGetCanvasSize(mux, &canvas_width, &canvas_height);
+    if (err != WEBP_MUX_OK) goto End;
+    if (!FrameToFullCanvas(enc, &frame, &full_image)) {
+        err = WEBP_MUX_BAD_DATA;
+        goto End;
+    }
+    err = WebPMuxSetImage(mux, &full_image, 1);
+    if (err != WEBP_MUX_OK) goto End;
+    err = WebPMuxAssemble(mux, &webp_data2);
+    if (err != WEBP_MUX_OK) goto End;
+
+    if (webp_data2.size < webp_data->size) { // Pick 'webp_data2' if smaller.
+        WebPDataClear(webp_data);
+        *webp_data = webp_data2;
+        WebPDataInit(&webp_data2);
+    }
+
+End:
+    WebPDataClear(&frame.bitstream);
+    WebPDataClear(&full_image);
+    WebPMuxDelete(mux);
+    WebPDataClear(&webp_data2);
+    return err;
+}
+
+int WebPAnimEncoderAssemble(WebPAnimEncoder* enc, WebPData* webp_data) {
+    WebPMux* mux;
+    WebPMuxError err;
+
+    if (enc == NULL) {
+        return 0;
+    }
+    MarkNoError(enc);
+
+    if (webp_data == NULL) {
+        MarkError(enc, "ERROR assembling: NULL input");
+        return 0;
+    }
+
+    if (enc->in_frame_count_ == 0) {
+        MarkError(enc, "ERROR: No frames to assemble");
+        return 0;
+    }
+
+    if (!enc->got_null_frame_ && enc->in_frame_count_ > 1 && enc->count_ > 0) {
+        // set duration of the last frame to be avg of durations of previous frames.
+        const double delta_time = enc->prev_timestamp_ - enc->first_timestamp_;
+        const int average_duration = (int)(delta_time / (enc->in_frame_count_ - 1));
+        if (!IncreasePreviousDuration(enc, average_duration)) {
+            return 0;
+        }
+    }
+
+    // Flush any remaining frames.
+    enc->flush_count_ = enc->count_;
+    if (!FlushFrames(enc)) {
+        return 0;
+    }
+
+    // Set definitive canvas size.
+    mux = enc->mux_;
+    err = WebPMuxSetCanvasSize(mux, enc->canvas_width_, enc->canvas_height_);
+    if (err != WEBP_MUX_OK) goto Err;
+
+    err = WebPMuxSetAnimationParams(mux, &enc->options_.anim_params);
+    if (err != WEBP_MUX_OK) goto Err;
+
+    // Assemble into a WebP bitstream.
+    err = WebPMuxAssemble(mux, webp_data);
+    if (err != WEBP_MUX_OK) goto Err;
+
+    if (enc->out_frame_count_ == 1) {
+        err = OptimizeSingleFrame(enc, webp_data);
+        if (err != WEBP_MUX_OK) goto Err;
+    }
+    return 1;
+
+Err:
+    MarkError2(enc, "ERROR assembling WebP", err);
+    return 0;
+}
+
+const char* WebPAnimEncoderGetError(WebPAnimEncoder* enc) {
+    if (enc == NULL) return NULL;
+    return enc->error_str_;
+}
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/mux/libwebpmux.pc.in b/codec/L2/demos/webpEnc/host/src/mux/libwebpmux.pc.in
new file mode 100644
index 0000000000..8f87dfe5d0
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/mux/libwebpmux.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libwebpmux
+Description: Library for manipulating the WebP graphics format container
+Version: @PACKAGE_VERSION@
+Requires: libwebp >= 0.2.0
+Cflags: -I${includedir}
+Libs: -L${libdir} -lwebpmux
diff --git a/codec/L2/demos/webpEnc/host/src/mux/muxedit.c b/codec/L2/demos/webpEnc/host/src/mux/muxedit.c
new file mode 100644
index 0000000000..61a8995004
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/mux/muxedit.c
@@ -0,0 +1,680 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Set and delete APIs for mux.
+//
+// Authors: Urvang (urvang@google.com)
+//          Vikas (vikasa@google.com)
+
+#include <assert.h>
+#include "./muxi.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// Life of a mux object.
+
+static void MuxInit(WebPMux* const mux) {
+    assert(mux != NULL);
+    memset(mux, 0, sizeof(*mux));
+    mux->canvas_width_ = 0; // just to be explicit
+    mux->canvas_height_ = 0;
+}
+
+WebPMux* WebPNewInternal(int version) {
+    if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) {
+        return NULL;
+    } else {
+        WebPMux* const mux = (WebPMux*)WebPSafeMalloc(1ULL, sizeof(WebPMux));
+        if (mux != NULL) MuxInit(mux);
+        return mux;
+    }
+}
+
+// Delete all images in 'wpi_list'.
+static void DeleteAllImages(WebPMuxImage** const wpi_list) {
+    while (*wpi_list != NULL) {
+        *wpi_list = MuxImageDelete(*wpi_list);
+    }
+}
+
+static void MuxRelease(WebPMux* const mux) {
+    assert(mux != NULL);
+    DeleteAllImages(&mux->images_);
+    ChunkListDelete(&mux->vp8x_);
+    ChunkListDelete(&mux->iccp_);
+    ChunkListDelete(&mux->anim_);
+    ChunkListDelete(&mux->exif_);
+    ChunkListDelete(&mux->xmp_);
+    ChunkListDelete(&mux->unknown_);
+}
+
+void WebPMuxDelete(WebPMux* mux) {
+    if (mux != NULL) {
+        MuxRelease(mux);
+        WebPSafeFree(mux);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Helper method(s).
+
+// Handy MACRO, makes MuxSet() very symmetric to MuxGet().
+#define SWITCH_ID_LIST(INDEX, LIST)                          \
+    if (idx == (INDEX)) {                                    \
+        err = ChunkAssignData(&chunk, data, copy_data, tag); \
+        if (err == WEBP_MUX_OK) {                            \
+            err = ChunkSetNth(&chunk, (LIST), nth);          \
+        }                                                    \
+        return err;                                          \
+    }
+
+static WebPMuxError MuxSet(WebPMux* const mux, uint32_t tag, uint32_t nth, const WebPData* const data, int copy_data) {
+    WebPChunk chunk;
+    WebPMuxError err = WEBP_MUX_NOT_FOUND;
+    const CHUNK_INDEX idx = ChunkGetIndexFromTag(tag);
+    assert(mux != NULL);
+    assert(!IsWPI(kChunks[idx].id));
+
+    ChunkInit(&chunk);
+    SWITCH_ID_LIST(IDX_VP8X, &mux->vp8x_);
+    SWITCH_ID_LIST(IDX_ICCP, &mux->iccp_);
+    SWITCH_ID_LIST(IDX_ANIM, &mux->anim_);
+    SWITCH_ID_LIST(IDX_EXIF, &mux->exif_);
+    SWITCH_ID_LIST(IDX_XMP, &mux->xmp_);
+    SWITCH_ID_LIST(IDX_UNKNOWN, &mux->unknown_);
+    return err;
+}
+#undef SWITCH_ID_LIST
+
+// Create data for frame/fragment given image data, offsets and duration.
+static WebPMuxError CreateFrameFragmentData(
+    int width, int height, const WebPMuxFrameInfo* const info, int is_frame, WebPData* const frame_frgm) {
+    uint8_t* frame_frgm_bytes;
+    const size_t frame_frgm_size = kChunks[is_frame ? IDX_ANMF : IDX_FRGM].size;
+
+    assert(width > 0 && height > 0 && info->duration >= 0);
+    assert(info->dispose_method == (info->dispose_method & 1));
+    // Note: assertion on upper bounds is done in PutLE24().
+
+    frame_frgm_bytes = (uint8_t*)WebPSafeMalloc(1ULL, frame_frgm_size);
+    if (frame_frgm_bytes == NULL) return WEBP_MUX_MEMORY_ERROR;
+
+    PutLE24(frame_frgm_bytes + 0, info->x_offset / 2);
+    PutLE24(frame_frgm_bytes + 3, info->y_offset / 2);
+
+    if (is_frame) {
+        PutLE24(frame_frgm_bytes + 6, width - 1);
+        PutLE24(frame_frgm_bytes + 9, height - 1);
+        PutLE24(frame_frgm_bytes + 12, info->duration);
+        frame_frgm_bytes[15] = (info->blend_method == WEBP_MUX_NO_BLEND ? 2 : 0) |
+                               (info->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND ? 1 : 0);
+    }
+
+    frame_frgm->bytes = frame_frgm_bytes;
+    frame_frgm->size = frame_frgm_size;
+    return WEBP_MUX_OK;
+}
+
+// Outputs image data given a bitstream. The bitstream can either be a
+// single-image WebP file or raw VP8/VP8L data.
+// Also outputs 'is_lossless' to be true if the given bitstream is lossless.
+static WebPMuxError GetImageData(const WebPData* const bitstream,
+                                 WebPData* const image,
+                                 WebPData* const alpha,
+                                 int* const is_lossless) {
+    WebPDataInit(alpha); // Default: no alpha.
+    if (bitstream->size < TAG_SIZE || memcmp(bitstream->bytes, "RIFF", TAG_SIZE)) {
+        // It is NOT webp file data. Return input data as is.
+        *image = *bitstream;
+    } else {
+        // It is webp file data. Extract image data from it.
+        const WebPMuxImage* wpi;
+        WebPMux* const mux = WebPMuxCreate(bitstream, 0);
+        if (mux == NULL) return WEBP_MUX_BAD_DATA;
+        wpi = mux->images_;
+        assert(wpi != NULL && wpi->img_ != NULL);
+        *image = wpi->img_->data_;
+        if (wpi->alpha_ != NULL) {
+            *alpha = wpi->alpha_->data_;
+        }
+        WebPMuxDelete(mux);
+    }
+    *is_lossless = VP8LCheckSignature(image->bytes, image->size);
+    return WEBP_MUX_OK;
+}
+
+static WebPMuxError DeleteChunks(WebPChunk** chunk_list, uint32_t tag) {
+    WebPMuxError err = WEBP_MUX_NOT_FOUND;
+    assert(chunk_list);
+    while (*chunk_list) {
+        WebPChunk* const chunk = *chunk_list;
+        if (chunk->tag_ == tag) {
+            *chunk_list = ChunkDelete(chunk);
+            err = WEBP_MUX_OK;
+        } else {
+            chunk_list = &chunk->next_;
+        }
+    }
+    return err;
+}
+
+static WebPMuxError MuxDeleteAllNamedData(WebPMux* const mux, uint32_t tag) {
+    const WebPChunkId id = ChunkGetIdFromTag(tag);
+    assert(mux != NULL);
+    if (IsWPI(id)) return WEBP_MUX_INVALID_ARGUMENT;
+    return DeleteChunks(MuxGetChunkListFromId(mux, id), tag);
+}
+
+//------------------------------------------------------------------------------
+// Set API(s).
+
+WebPMuxError WebPMuxSetChunk(WebPMux* mux, const char fourcc[4], const WebPData* chunk_data, int copy_data) {
+    uint32_t tag;
+    WebPMuxError err;
+    if (mux == NULL || fourcc == NULL || chunk_data == NULL || chunk_data->bytes == NULL ||
+        chunk_data->size > MAX_CHUNK_PAYLOAD) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    tag = ChunkGetTagFromFourCC(fourcc);
+
+    // Delete existing chunk(s) with the same 'fourcc'.
+    err = MuxDeleteAllNamedData(mux, tag);
+    if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
+
+    // Add the given chunk.
+    return MuxSet(mux, tag, 1, chunk_data, copy_data);
+}
+
+// Creates a chunk from given 'data' and sets it as 1st chunk in 'chunk_list'.
+static WebPMuxError AddDataToChunkList(const WebPData* const data,
+                                       int copy_data,
+                                       uint32_t tag,
+                                       WebPChunk** chunk_list) {
+    WebPChunk chunk;
+    WebPMuxError err;
+    ChunkInit(&chunk);
+    err = ChunkAssignData(&chunk, data, copy_data, tag);
+    if (err != WEBP_MUX_OK) goto Err;
+    err = ChunkSetNth(&chunk, chunk_list, 1);
+    if (err != WEBP_MUX_OK) goto Err;
+    return WEBP_MUX_OK;
+Err:
+    ChunkRelease(&chunk);
+    return err;
+}
+
+// Extracts image & alpha data from the given bitstream and then sets wpi.alpha_
+// and wpi.img_ appropriately.
+static WebPMuxError SetAlphaAndImageChunks(const WebPData* const bitstream, int copy_data, WebPMuxImage* const wpi) {
+    int is_lossless = 0;
+    WebPData image, alpha;
+    WebPMuxError err = GetImageData(bitstream, &image, &alpha, &is_lossless);
+    const int image_tag = is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
+    if (err != WEBP_MUX_OK) return err;
+    if (alpha.bytes != NULL) {
+        err = AddDataToChunkList(&alpha, copy_data, kChunks[IDX_ALPHA].tag, &wpi->alpha_);
+        if (err != WEBP_MUX_OK) return err;
+    }
+    err = AddDataToChunkList(&image, copy_data, image_tag, &wpi->img_);
+    if (err != WEBP_MUX_OK) return err;
+    return MuxImageFinalize(wpi) ? WEBP_MUX_OK : WEBP_MUX_INVALID_ARGUMENT;
+}
+
+WebPMuxError WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream, int copy_data) {
+    WebPMuxImage wpi;
+    WebPMuxError err;
+
+    // Sanity checks.
+    if (mux == NULL || bitstream == NULL || bitstream->bytes == NULL || bitstream->size > MAX_CHUNK_PAYLOAD) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    if (mux->images_ != NULL) {
+        // Only one 'simple image' can be added in mux. So, remove present images.
+        DeleteAllImages(&mux->images_);
+    }
+
+    MuxImageInit(&wpi);
+    err = SetAlphaAndImageChunks(bitstream, copy_data, &wpi);
+    if (err != WEBP_MUX_OK) goto Err;
+
+    // Add this WebPMuxImage to mux.
+    err = MuxImagePush(&wpi, &mux->images_);
+    if (err != WEBP_MUX_OK) goto Err;
+
+    // All is well.
+    return WEBP_MUX_OK;
+
+Err: // Something bad happened.
+    MuxImageRelease(&wpi);
+    return err;
+}
+
+WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* frame, int copy_data) {
+    WebPMuxImage wpi;
+    WebPMuxError err;
+    int is_frame;
+    const WebPData* const bitstream = &frame->bitstream;
+
+    // Sanity checks.
+    if (mux == NULL || frame == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+    is_frame = (frame->id == WEBP_CHUNK_ANMF);
+    if (!(is_frame || (frame->id == WEBP_CHUNK_FRGM))) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    if (frame->id == WEBP_CHUNK_FRGM) { // Dead experiment.
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    if (bitstream->bytes == NULL || bitstream->size > MAX_CHUNK_PAYLOAD) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    if (mux->images_ != NULL) {
+        const WebPMuxImage* const image = mux->images_;
+        const uint32_t image_id = (image->header_ != NULL) ? ChunkGetIdFromTag(image->header_->tag_) : WEBP_CHUNK_IMAGE;
+        if (image_id != frame->id) {
+            return WEBP_MUX_INVALID_ARGUMENT; // Conflicting frame types.
+        }
+    }
+
+    MuxImageInit(&wpi);
+    err = SetAlphaAndImageChunks(bitstream, copy_data, &wpi);
+    if (err != WEBP_MUX_OK) goto Err;
+    assert(wpi.img_ != NULL); // As SetAlphaAndImageChunks() was successful.
+
+    {
+        WebPData frame_frgm;
+        const uint32_t tag = kChunks[is_frame ? IDX_ANMF : IDX_FRGM].tag;
+        WebPMuxFrameInfo tmp = *frame;
+        tmp.x_offset &= ~1; // Snap offsets to even.
+        tmp.y_offset &= ~1;
+        if (!is_frame) { // Reset unused values.
+            tmp.duration = 1;
+            tmp.dispose_method = WEBP_MUX_DISPOSE_NONE;
+            tmp.blend_method = WEBP_MUX_BLEND;
+        }
+        if (tmp.x_offset < 0 || tmp.x_offset >= MAX_POSITION_OFFSET || tmp.y_offset < 0 ||
+            tmp.y_offset >= MAX_POSITION_OFFSET || (tmp.duration < 0 || tmp.duration >= MAX_DURATION) ||
+            tmp.dispose_method != (tmp.dispose_method & 1)) {
+            err = WEBP_MUX_INVALID_ARGUMENT;
+            goto Err;
+        }
+        err = CreateFrameFragmentData(wpi.width_, wpi.height_, &tmp, is_frame, &frame_frgm);
+        if (err != WEBP_MUX_OK) goto Err;
+        // Add frame/fragment chunk (with copy_data = 1).
+        err = AddDataToChunkList(&frame_frgm, 1, tag, &wpi.header_);
+        WebPDataClear(&frame_frgm); // frame_frgm owned by wpi.header_ now.
+        if (err != WEBP_MUX_OK) goto Err;
+    }
+
+    // Add this WebPMuxImage to mux.
+    err = MuxImagePush(&wpi, &mux->images_);
+    if (err != WEBP_MUX_OK) goto Err;
+
+    // All is well.
+    return WEBP_MUX_OK;
+
+Err: // Something bad happened.
+    MuxImageRelease(&wpi);
+    return err;
+}
+
+WebPMuxError WebPMuxSetAnimationParams(WebPMux* mux, const WebPMuxAnimParams* params) {
+    WebPMuxError err;
+    uint8_t data[ANIM_CHUNK_SIZE];
+    const WebPData anim = {data, ANIM_CHUNK_SIZE};
+
+    if (mux == NULL || params == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+    if (params->loop_count < 0 || params->loop_count >= MAX_LOOP_COUNT) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    // Delete any existing ANIM chunk(s).
+    err = MuxDeleteAllNamedData(mux, kChunks[IDX_ANIM].tag);
+    if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
+
+    // Set the animation parameters.
+    PutLE32(data, params->bgcolor);
+    PutLE16(data + 4, params->loop_count);
+    return MuxSet(mux, kChunks[IDX_ANIM].tag, 1, &anim, 1);
+}
+
+WebPMuxError WebPMuxSetCanvasSize(WebPMux* mux, int width, int height) {
+    WebPMuxError err;
+    if (mux == NULL) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    if (width < 0 || height < 0 || width > MAX_CANVAS_SIZE || height > MAX_CANVAS_SIZE) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    if (width * (uint64_t)height >= MAX_IMAGE_AREA) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    if ((width * height) == 0 && (width | height) != 0) {
+        // one of width / height is zero, but not both -> invalid!
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    // If we already assembled a VP8X chunk, invalidate it.
+    err = MuxDeleteAllNamedData(mux, kChunks[IDX_VP8X].tag);
+    if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
+
+    mux->canvas_width_ = width;
+    mux->canvas_height_ = height;
+    return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+// Delete API(s).
+
+WebPMuxError WebPMuxDeleteChunk(WebPMux* mux, const char fourcc[4]) {
+    if (mux == NULL || fourcc == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+    return MuxDeleteAllNamedData(mux, ChunkGetTagFromFourCC(fourcc));
+}
+
+WebPMuxError WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth) {
+    if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+    return MuxImageDeleteNth(&mux->images_, nth);
+}
+
+//------------------------------------------------------------------------------
+// Assembly of the WebP RIFF file.
+
+static WebPMuxError GetFrameFragmentInfo(const WebPChunk* const frame_frgm_chunk,
+                                         int* const x_offset,
+                                         int* const y_offset,
+                                         int* const duration) {
+    const uint32_t tag = frame_frgm_chunk->tag_;
+    const int is_frame = (tag == kChunks[IDX_ANMF].tag);
+    const WebPData* const data = &frame_frgm_chunk->data_;
+    const size_t expected_data_size = is_frame ? ANMF_CHUNK_SIZE : FRGM_CHUNK_SIZE;
+    assert(frame_frgm_chunk != NULL);
+    assert(tag == kChunks[IDX_ANMF].tag || tag == kChunks[IDX_FRGM].tag);
+    if (data->size != expected_data_size) return WEBP_MUX_INVALID_ARGUMENT;
+
+    *x_offset = 2 * GetLE24(data->bytes + 0);
+    *y_offset = 2 * GetLE24(data->bytes + 3);
+    if (is_frame) *duration = GetLE24(data->bytes + 12);
+    return WEBP_MUX_OK;
+}
+
+static WebPMuxError GetImageInfo(const WebPMuxImage* const wpi,
+                                 int* const x_offset,
+                                 int* const y_offset,
+                                 int* const duration,
+                                 int* const width,
+                                 int* const height) {
+    const WebPChunk* const frame_frgm_chunk = wpi->header_;
+    WebPMuxError err;
+    assert(wpi != NULL);
+    assert(frame_frgm_chunk != NULL);
+
+    // Get offsets and duration from ANMF/FRGM chunk.
+    err = GetFrameFragmentInfo(frame_frgm_chunk, x_offset, y_offset, duration);
+    if (err != WEBP_MUX_OK) return err;
+
+    // Get width and height from VP8/VP8L chunk.
+    if (width != NULL) *width = wpi->width_;
+    if (height != NULL) *height = wpi->height_;
+    return WEBP_MUX_OK;
+}
+
+// Returns the tightest dimension for the canvas considering the image list.
+static WebPMuxError GetAdjustedCanvasSize(const WebPMux* const mux,
+                                          uint32_t flags,
+                                          int* const width,
+                                          int* const height) {
+    WebPMuxImage* wpi = NULL;
+    assert(mux != NULL);
+    assert(width != NULL && height != NULL);
+
+    wpi = mux->images_;
+    assert(wpi != NULL);
+    assert(wpi->img_ != NULL);
+
+    if (wpi->next_ != NULL) {
+        int max_x = 0;
+        int max_y = 0;
+        int64_t image_area = 0;
+        // if we have a chain of wpi's, header_ is necessarily set
+        assert(wpi->header_ != NULL);
+        // Aggregate the bounding box for animation frames & fragmented images.
+        for (; wpi != NULL; wpi = wpi->next_) {
+            int x_offset = 0, y_offset = 0, duration = 0, w = 0, h = 0;
+            const WebPMuxError err = GetImageInfo(wpi, &x_offset, &y_offset, &duration, &w, &h);
+            const int max_x_pos = x_offset + w;
+            const int max_y_pos = y_offset + h;
+            if (err != WEBP_MUX_OK) return err;
+            assert(x_offset < MAX_POSITION_OFFSET);
+            assert(y_offset < MAX_POSITION_OFFSET);
+
+            if (max_x_pos > max_x) max_x = max_x_pos;
+            if (max_y_pos > max_y) max_y = max_y_pos;
+            image_area += w * h;
+        }
+        *width = max_x;
+        *height = max_y;
+        // Crude check to validate that there are no image overlaps/holes for
+        // fragmented images. Check that the aggregated image area for individual
+        // fragments exactly matches the image area of the constructed canvas.
+        // However, the area-match is necessary but not sufficient condition.
+        if ((flags & FRAGMENTS_FLAG) && (image_area != (max_x * max_y))) {
+            *width = 0;
+            *height = 0;
+            return WEBP_MUX_INVALID_ARGUMENT;
+        }
+    } else {
+        // For a single image, canvas dimensions are same as image dimensions.
+        *width = wpi->width_;
+        *height = wpi->height_;
+    }
+    return WEBP_MUX_OK;
+}
+
+// VP8X format:
+// Total Size : 10,
+// Flags  : 4 bytes,
+// Width  : 3 bytes,
+// Height : 3 bytes.
+static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
+    WebPMuxError err = WEBP_MUX_OK;
+    uint32_t flags = 0;
+    int width = 0;
+    int height = 0;
+    uint8_t data[VP8X_CHUNK_SIZE];
+    const WebPData vp8x = {data, VP8X_CHUNK_SIZE};
+    const WebPMuxImage* images = NULL;
+
+    assert(mux != NULL);
+    images = mux->images_; // First image.
+    if (images == NULL || images->img_ == NULL || images->img_->data_.bytes == NULL) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    // If VP8X chunk(s) is(are) already present, remove them (and later add new
+    // VP8X chunk with updated flags).
+    err = MuxDeleteAllNamedData(mux, kChunks[IDX_VP8X].tag);
+    if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
+
+    // Set flags.
+    if (mux->iccp_ != NULL && mux->iccp_->data_.bytes != NULL) {
+        flags |= ICCP_FLAG;
+    }
+    if (mux->exif_ != NULL && mux->exif_->data_.bytes != NULL) {
+        flags |= EXIF_FLAG;
+    }
+    if (mux->xmp_ != NULL && mux->xmp_->data_.bytes != NULL) {
+        flags |= XMP_FLAG;
+    }
+    if (images->header_ != NULL) {
+        if (images->header_->tag_ == kChunks[IDX_FRGM].tag) {
+            // This is a fragmented image.
+            flags |= FRAGMENTS_FLAG;
+        } else if (images->header_->tag_ == kChunks[IDX_ANMF].tag) {
+            // This is an image with animation.
+            flags |= ANIMATION_FLAG;
+        }
+    }
+    if (MuxImageCount(images, WEBP_CHUNK_ALPHA) > 0) {
+        flags |= ALPHA_FLAG; // Some images have an alpha channel.
+    }
+
+    err = GetAdjustedCanvasSize(mux, flags, &width, &height);
+    if (err != WEBP_MUX_OK) return err;
+
+    if (width <= 0 || height <= 0) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    if (width > MAX_CANVAS_SIZE || height > MAX_CANVAS_SIZE) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    if (mux->canvas_width_ != 0 || mux->canvas_height_ != 0) {
+        if (width > mux->canvas_width_ || height > mux->canvas_height_) {
+            return WEBP_MUX_INVALID_ARGUMENT;
+        }
+        width = mux->canvas_width_;
+        height = mux->canvas_height_;
+    }
+
+    if (flags == 0) {
+        // For Simple Image, VP8X chunk should not be added.
+        return WEBP_MUX_OK;
+    }
+
+    if (MuxHasAlpha(images)) {
+        // This means some frames explicitly/implicitly contain alpha.
+        // Note: This 'flags' update must NOT be done for a lossless image
+        // without a VP8X chunk!
+        flags |= ALPHA_FLAG;
+    }
+
+    PutLE32(data + 0, flags);      // VP8X chunk flags.
+    PutLE24(data + 4, width - 1);  // canvas width.
+    PutLE24(data + 7, height - 1); // canvas height.
+
+    return MuxSet(mux, kChunks[IDX_VP8X].tag, 1, &vp8x, 1);
+}
+
+// Cleans up 'mux' by removing any unnecessary chunks.
+static WebPMuxError MuxCleanup(WebPMux* const mux) {
+    int num_frames;
+    int num_fragments;
+    int num_anim_chunks;
+
+    // If we have an image with a single fragment or frame, and its rectangle
+    // covers the whole canvas, convert it to a non-animated non-fragmented image
+    // (to avoid writing FRGM/ANMF chunk unnecessarily).
+    WebPMuxError err = WebPMuxNumChunks(mux, kChunks[IDX_ANMF].id, &num_frames);
+    if (err != WEBP_MUX_OK) return err;
+    err = WebPMuxNumChunks(mux, kChunks[IDX_FRGM].id, &num_fragments);
+    if (err != WEBP_MUX_OK) return err;
+    if (num_frames == 1 || num_fragments == 1) {
+        WebPMuxImage* frame_frag;
+        err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, &frame_frag);
+        assert(err == WEBP_MUX_OK); // We know that one frame/fragment does exist.
+        assert(frame_frag != NULL);
+        if (frame_frag->header_ != NULL &&
+            ((mux->canvas_width_ == 0 && mux->canvas_height_ == 0) ||
+             (frame_frag->width_ == mux->canvas_width_ && frame_frag->height_ == mux->canvas_height_))) {
+            assert(frame_frag->header_->tag_ == kChunks[IDX_ANMF].tag ||
+                   frame_frag->header_->tag_ == kChunks[IDX_FRGM].tag);
+            ChunkDelete(frame_frag->header_); // Removes ANMF/FRGM chunk.
+            frame_frag->header_ = NULL;
+            num_frames = 0;
+            num_fragments = 0;
+        }
+    }
+    // Remove ANIM chunk if this is a non-animated image.
+    err = WebPMuxNumChunks(mux, kChunks[IDX_ANIM].id, &num_anim_chunks);
+    if (err != WEBP_MUX_OK) return err;
+    if (num_anim_chunks >= 1 && num_frames == 0) {
+        err = MuxDeleteAllNamedData(mux, kChunks[IDX_ANIM].tag);
+        if (err != WEBP_MUX_OK) return err;
+    }
+    return WEBP_MUX_OK;
+}
+
+// Total size of a list of images.
+static size_t ImageListDiskSize(const WebPMuxImage* wpi_list) {
+    size_t size = 0;
+    while (wpi_list != NULL) {
+        size += MuxImageDiskSize(wpi_list);
+        wpi_list = wpi_list->next_;
+    }
+    return size;
+}
+
+// Write out the given list of images into 'dst'.
+static uint8_t* ImageListEmit(const WebPMuxImage* wpi_list, uint8_t* dst) {
+    while (wpi_list != NULL) {
+        dst = MuxImageEmit(wpi_list, dst);
+        wpi_list = wpi_list->next_;
+    }
+    return dst;
+}
+
+WebPMuxError WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data) {
+    size_t size = 0;
+    uint8_t* data = NULL;
+    uint8_t* dst = NULL;
+    WebPMuxError err;
+
+    if (assembled_data == NULL) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    // Clean up returned data, in case something goes wrong.
+    memset(assembled_data, 0, sizeof(*assembled_data));
+
+    if (mux == NULL) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    // Finalize mux.
+    err = MuxCleanup(mux);
+    if (err != WEBP_MUX_OK) return err;
+    err = CreateVP8XChunk(mux);
+    if (err != WEBP_MUX_OK) return err;
+
+    // Allocate data.
+    size = ChunkListDiskSize(mux->vp8x_) + ChunkListDiskSize(mux->iccp_) + ChunkListDiskSize(mux->anim_) +
+           ImageListDiskSize(mux->images_) + ChunkListDiskSize(mux->exif_) + ChunkListDiskSize(mux->xmp_) +
+           ChunkListDiskSize(mux->unknown_) + RIFF_HEADER_SIZE;
+
+    data = (uint8_t*)WebPSafeMalloc(1ULL, size);
+    if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
+
+    // Emit header & chunks.
+    dst = MuxEmitRiffHeader(data, size);
+    dst = ChunkListEmit(mux->vp8x_, dst);
+    dst = ChunkListEmit(mux->iccp_, dst);
+    dst = ChunkListEmit(mux->anim_, dst);
+    dst = ImageListEmit(mux->images_, dst);
+    dst = ChunkListEmit(mux->exif_, dst);
+    dst = ChunkListEmit(mux->xmp_, dst);
+    dst = ChunkListEmit(mux->unknown_, dst);
+    assert(dst == data + size);
+
+    // Validate mux.
+    err = MuxValidate(mux);
+    if (err != WEBP_MUX_OK) {
+        WebPSafeFree(data);
+        data = NULL;
+        size = 0;
+    }
+
+    // Finalize data.
+    assembled_data->bytes = data;
+    assembled_data->size = size;
+
+    return err;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/mux/muxi.h b/codec/L2/demos/webpEnc/host/src/mux/muxi.h
new file mode 100644
index 0000000000..57963f78d5
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/mux/muxi.h
@@ -0,0 +1,231 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Internal header for mux library.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_MUX_MUXI_H_
+#define WEBP_MUX_MUXI_H_
+
+#include <stdlib.h>
+#include "../dec/vp8i.h"
+#include "../dec/vp8li.h"
+#include "../webp/mux.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Defines and constants.
+
+#define MUX_MAJ_VERSION 0
+#define MUX_MIN_VERSION 3
+#define MUX_REV_VERSION 0
+
+// Chunk object.
+typedef struct WebPChunk WebPChunk;
+struct WebPChunk {
+    uint32_t tag_;
+    int owner_; // True if *data_ memory is owned internally.
+                // VP8X, ANIM, and other internally created chunks
+                // like ANMF/FRGM are always owned.
+    WebPData data_;
+    WebPChunk* next_;
+};
+
+// MuxImage object. Store a full WebP image (including ANMF/FRGM chunk, ALPH
+// chunk and VP8/VP8L chunk),
+typedef struct WebPMuxImage WebPMuxImage;
+struct WebPMuxImage {
+    WebPChunk* header_;  // Corresponds to WEBP_CHUNK_ANMF/WEBP_CHUNK_FRGM.
+    WebPChunk* alpha_;   // Corresponds to WEBP_CHUNK_ALPHA.
+    WebPChunk* img_;     // Corresponds to WEBP_CHUNK_IMAGE.
+    WebPChunk* unknown_; // Corresponds to WEBP_CHUNK_UNKNOWN.
+    int width_;
+    int height_;
+    int has_alpha_;  // Through ALPH chunk or as part of VP8L.
+    int is_partial_; // True if only some of the chunks are filled.
+    WebPMuxImage* next_;
+};
+
+// Main mux object. Stores data chunks.
+struct WebPMux {
+    WebPMuxImage* images_;
+    WebPChunk* iccp_;
+    WebPChunk* exif_;
+    WebPChunk* xmp_;
+    WebPChunk* anim_;
+    WebPChunk* vp8x_;
+
+    WebPChunk* unknown_;
+    int canvas_width_;
+    int canvas_height_;
+};
+
+// CHUNK_INDEX enum: used for indexing within 'kChunks' (defined below) only.
+// Note: the reason for having two enums ('WebPChunkId' and 'CHUNK_INDEX') is to
+// allow two different chunks to have the same id (e.g. WebPChunkId
+// 'WEBP_CHUNK_IMAGE' can correspond to CHUNK_INDEX 'IDX_VP8' or 'IDX_VP8L').
+typedef enum {
+    IDX_VP8X = 0,
+    IDX_ICCP,
+    IDX_ANIM,
+    IDX_ANMF,
+    IDX_FRGM,
+    IDX_ALPHA,
+    IDX_VP8,
+    IDX_VP8L,
+    IDX_EXIF,
+    IDX_XMP,
+    IDX_UNKNOWN,
+
+    IDX_NIL,
+    IDX_LAST_CHUNK
+} CHUNK_INDEX;
+
+#define NIL_TAG 0x00000000u // To signal void chunk.
+
+typedef struct {
+    uint32_t tag;
+    WebPChunkId id;
+    uint32_t size;
+} ChunkInfo;
+
+extern const ChunkInfo kChunks[IDX_LAST_CHUNK];
+
+//------------------------------------------------------------------------------
+// Chunk object management.
+
+// Initialize.
+void ChunkInit(WebPChunk* const chunk);
+
+// Get chunk index from chunk tag. Returns IDX_UNKNOWN if not found.
+CHUNK_INDEX ChunkGetIndexFromTag(uint32_t tag);
+
+// Get chunk id from chunk tag. Returns WEBP_CHUNK_UNKNOWN if not found.
+WebPChunkId ChunkGetIdFromTag(uint32_t tag);
+
+// Convert a fourcc string to a tag.
+uint32_t ChunkGetTagFromFourCC(const char fourcc[4]);
+
+// Get chunk index from fourcc. Returns IDX_UNKNOWN if given fourcc is unknown.
+CHUNK_INDEX ChunkGetIndexFromFourCC(const char fourcc[4]);
+
+// Search for nth chunk with given 'tag' in the chunk list.
+// nth = 0 means "last of the list".
+WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag);
+
+// Fill the chunk with the given data.
+WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data, int copy_data, uint32_t tag);
+
+// Sets 'chunk' at nth position in the 'chunk_list'.
+// nth = 0 has the special meaning "last of the list".
+// On success ownership is transferred from 'chunk' to the 'chunk_list'.
+WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list, uint32_t nth);
+
+// Releases chunk and returns chunk->next_.
+WebPChunk* ChunkRelease(WebPChunk* const chunk);
+
+// Deletes given chunk & returns chunk->next_.
+WebPChunk* ChunkDelete(WebPChunk* const chunk);
+
+// Deletes all chunks in the given chunk list.
+void ChunkListDelete(WebPChunk** const chunk_list);
+
+// Returns size of the chunk including chunk header and padding byte (if any).
+static WEBP_INLINE size_t SizeWithPadding(size_t chunk_size) {
+    return CHUNK_HEADER_SIZE + ((chunk_size + 1) & ~1U);
+}
+
+// Size of a chunk including header and padding.
+static WEBP_INLINE size_t ChunkDiskSize(const WebPChunk* chunk) {
+    const size_t data_size = chunk->data_.size;
+    assert(data_size < MAX_CHUNK_PAYLOAD);
+    return SizeWithPadding(data_size);
+}
+
+// Total size of a list of chunks.
+size_t ChunkListDiskSize(const WebPChunk* chunk_list);
+
+// Write out the given list of chunks into 'dst'.
+uint8_t* ChunkListEmit(const WebPChunk* chunk_list, uint8_t* dst);
+
+//------------------------------------------------------------------------------
+// MuxImage object management.
+
+// Initialize.
+void MuxImageInit(WebPMuxImage* const wpi);
+
+// Releases image 'wpi' and returns wpi->next.
+WebPMuxImage* MuxImageRelease(WebPMuxImage* const wpi);
+
+// Delete image 'wpi' and return the next image in the list or NULL.
+// 'wpi' can be NULL.
+WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi);
+
+// Count number of images matching the given tag id in the 'wpi_list'.
+// If id == WEBP_CHUNK_NIL, all images will be matched.
+int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id);
+
+// Update width/height/has_alpha info from chunks within wpi.
+// Also remove ALPH chunk if not needed.
+int MuxImageFinalize(WebPMuxImage* const wpi);
+
+// Check if given ID corresponds to an image related chunk.
+static WEBP_INLINE int IsWPI(WebPChunkId id) {
+    switch (id) {
+        case WEBP_CHUNK_ANMF:
+        case WEBP_CHUNK_FRGM:
+        case WEBP_CHUNK_ALPHA:
+        case WEBP_CHUNK_IMAGE:
+            return 1;
+        default:
+            return 0;
+    }
+}
+
+// Pushes 'wpi' at the end of 'wpi_list'.
+WebPMuxError MuxImagePush(const WebPMuxImage* wpi, WebPMuxImage** wpi_list);
+
+// Delete nth image in the image list.
+WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth);
+
+// Get nth image in the image list.
+WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth, WebPMuxImage** wpi);
+
+// Total size of the given image.
+size_t MuxImageDiskSize(const WebPMuxImage* const wpi);
+
+// Write out the given image into 'dst'.
+uint8_t* MuxImageEmit(const WebPMuxImage* const wpi, uint8_t* dst);
+
+//------------------------------------------------------------------------------
+// Helper methods for mux.
+
+// Checks if the given image list contains at least one image with alpha.
+int MuxHasAlpha(const WebPMuxImage* images);
+
+// Write out RIFF header into 'data', given total data size 'size'.
+uint8_t* MuxEmitRiffHeader(uint8_t* const data, size_t size);
+
+// Returns the list where chunk with given ID is to be inserted in mux.
+WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id);
+
+// Validates the given mux object.
+WebPMuxError MuxValidate(const WebPMux* const mux);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_MUX_MUXI_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/mux/muxinternal.c b/codec/L2/demos/webpEnc/host/src/mux/muxinternal.c
new file mode 100644
index 0000000000..b830d78191
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/mux/muxinternal.c
@@ -0,0 +1,545 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Internal objects and utils for mux.
+//
+// Authors: Urvang (urvang@google.com)
+//          Vikas (vikasa@google.com)
+
+#include <assert.h>
+#include "./muxi.h"
+#include "../utils/utils.h"
+
+#define UNDEFINED_CHUNK_SIZE (-1)
+
+const ChunkInfo kChunks[] = {{MKFOURCC('V', 'P', '8', 'X'), WEBP_CHUNK_VP8X, VP8X_CHUNK_SIZE},
+                             {MKFOURCC('I', 'C', 'C', 'P'), WEBP_CHUNK_ICCP, UNDEFINED_CHUNK_SIZE},
+                             {MKFOURCC('A', 'N', 'I', 'M'), WEBP_CHUNK_ANIM, ANIM_CHUNK_SIZE},
+                             {MKFOURCC('A', 'N', 'M', 'F'), WEBP_CHUNK_ANMF, ANMF_CHUNK_SIZE},
+                             {MKFOURCC('F', 'R', 'G', 'M'), WEBP_CHUNK_FRGM, FRGM_CHUNK_SIZE},
+                             {MKFOURCC('A', 'L', 'P', 'H'), WEBP_CHUNK_ALPHA, UNDEFINED_CHUNK_SIZE},
+                             {MKFOURCC('V', 'P', '8', ' '), WEBP_CHUNK_IMAGE, UNDEFINED_CHUNK_SIZE},
+                             {MKFOURCC('V', 'P', '8', 'L'), WEBP_CHUNK_IMAGE, UNDEFINED_CHUNK_SIZE},
+                             {MKFOURCC('E', 'X', 'I', 'F'), WEBP_CHUNK_EXIF, UNDEFINED_CHUNK_SIZE},
+                             {MKFOURCC('X', 'M', 'P', ' '), WEBP_CHUNK_XMP, UNDEFINED_CHUNK_SIZE},
+                             {NIL_TAG, WEBP_CHUNK_UNKNOWN, UNDEFINED_CHUNK_SIZE},
+
+                             {NIL_TAG, WEBP_CHUNK_NIL, UNDEFINED_CHUNK_SIZE}};
+
+//------------------------------------------------------------------------------
+
+int WebPGetMuxVersion(void) {
+    return (MUX_MAJ_VERSION << 16) | (MUX_MIN_VERSION << 8) | MUX_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
+// Life of a chunk object.
+
+void ChunkInit(WebPChunk* const chunk) {
+    assert(chunk);
+    memset(chunk, 0, sizeof(*chunk));
+    chunk->tag_ = NIL_TAG;
+}
+
+WebPChunk* ChunkRelease(WebPChunk* const chunk) {
+    WebPChunk* next;
+    if (chunk == NULL) return NULL;
+    if (chunk->owner_) {
+        WebPDataClear(&chunk->data_);
+    }
+    next = chunk->next_;
+    ChunkInit(chunk);
+    return next;
+}
+
+//------------------------------------------------------------------------------
+// Chunk misc methods.
+
+CHUNK_INDEX ChunkGetIndexFromTag(uint32_t tag) {
+    int i;
+    for (i = 0; kChunks[i].tag != NIL_TAG; ++i) {
+        if (tag == kChunks[i].tag) return (CHUNK_INDEX)i;
+    }
+    return IDX_UNKNOWN;
+}
+
+WebPChunkId ChunkGetIdFromTag(uint32_t tag) {
+    int i;
+    for (i = 0; kChunks[i].tag != NIL_TAG; ++i) {
+        if (tag == kChunks[i].tag) return kChunks[i].id;
+    }
+    return WEBP_CHUNK_UNKNOWN;
+}
+
+uint32_t ChunkGetTagFromFourCC(const char fourcc[4]) {
+    return MKFOURCC(fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
+}
+
+CHUNK_INDEX ChunkGetIndexFromFourCC(const char fourcc[4]) {
+    const uint32_t tag = ChunkGetTagFromFourCC(fourcc);
+    return ChunkGetIndexFromTag(tag);
+}
+
+//------------------------------------------------------------------------------
+// Chunk search methods.
+
+// Returns next chunk in the chunk list with the given tag.
+static WebPChunk* ChunkSearchNextInList(WebPChunk* chunk, uint32_t tag) {
+    while (chunk != NULL && chunk->tag_ != tag) {
+        chunk = chunk->next_;
+    }
+    return chunk;
+}
+
+WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag) {
+    uint32_t iter = nth;
+    first = ChunkSearchNextInList(first, tag);
+    if (first == NULL) return NULL;
+
+    while (--iter != 0) {
+        WebPChunk* next_chunk = ChunkSearchNextInList(first->next_, tag);
+        if (next_chunk == NULL) break;
+        first = next_chunk;
+    }
+    return ((nth > 0) && (iter > 0)) ? NULL : first;
+}
+
+// Outputs a pointer to 'prev_chunk->next_',
+//   where 'prev_chunk' is the pointer to the chunk at position (nth - 1).
+// Returns true if nth chunk was found.
+static int ChunkSearchListToSet(WebPChunk** chunk_list, uint32_t nth, WebPChunk*** const location) {
+    uint32_t count = 0;
+    assert(chunk_list != NULL);
+    *location = chunk_list;
+
+    while (*chunk_list != NULL) {
+        WebPChunk* const cur_chunk = *chunk_list;
+        ++count;
+        if (count == nth) return 1; // Found.
+        chunk_list = &cur_chunk->next_;
+        *location = chunk_list;
+    }
+
+    // *chunk_list is ok to be NULL if adding at last location.
+    return (nth == 0 || (count == nth - 1)) ? 1 : 0;
+}
+
+//------------------------------------------------------------------------------
+// Chunk writer methods.
+
+WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data, int copy_data, uint32_t tag) {
+    // For internally allocated chunks, always copy data & make it owner of data.
+    if (tag == kChunks[IDX_VP8X].tag || tag == kChunks[IDX_ANIM].tag) {
+        copy_data = 1;
+    }
+
+    ChunkRelease(chunk);
+
+    if (data != NULL) {
+        if (copy_data) { // Copy data.
+            if (!WebPDataCopy(data, &chunk->data_)) return WEBP_MUX_MEMORY_ERROR;
+            chunk->owner_ = 1; // Chunk is owner of data.
+        } else {               // Don't copy data.
+            chunk->data_ = *data;
+        }
+    }
+    chunk->tag_ = tag;
+    return WEBP_MUX_OK;
+}
+
+WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list, uint32_t nth) {
+    WebPChunk* new_chunk;
+
+    if (!ChunkSearchListToSet(chunk_list, nth, &chunk_list)) {
+        return WEBP_MUX_NOT_FOUND;
+    }
+
+    new_chunk = (WebPChunk*)WebPSafeMalloc(1ULL, sizeof(*new_chunk));
+    if (new_chunk == NULL) return WEBP_MUX_MEMORY_ERROR;
+    *new_chunk = *chunk;
+    chunk->owner_ = 0;
+    new_chunk->next_ = *chunk_list;
+    *chunk_list = new_chunk;
+    return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+// Chunk deletion method(s).
+
+WebPChunk* ChunkDelete(WebPChunk* const chunk) {
+    WebPChunk* const next = ChunkRelease(chunk);
+    WebPSafeFree(chunk);
+    return next;
+}
+
+void ChunkListDelete(WebPChunk** const chunk_list) {
+    while (*chunk_list != NULL) {
+        *chunk_list = ChunkDelete(*chunk_list);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Chunk serialization methods.
+
+static uint8_t* ChunkEmit(const WebPChunk* const chunk, uint8_t* dst) {
+    const size_t chunk_size = chunk->data_.size;
+    assert(chunk);
+    assert(chunk->tag_ != NIL_TAG);
+    PutLE32(dst + 0, chunk->tag_);
+    PutLE32(dst + TAG_SIZE, (uint32_t)chunk_size);
+    assert(chunk_size == (uint32_t)chunk_size);
+    memcpy(dst + CHUNK_HEADER_SIZE, chunk->data_.bytes, chunk_size);
+    if (chunk_size & 1) dst[CHUNK_HEADER_SIZE + chunk_size] = 0; // Add padding.
+    return dst + ChunkDiskSize(chunk);
+}
+
+uint8_t* ChunkListEmit(const WebPChunk* chunk_list, uint8_t* dst) {
+    while (chunk_list != NULL) {
+        dst = ChunkEmit(chunk_list, dst);
+        chunk_list = chunk_list->next_;
+    }
+    return dst;
+}
+
+size_t ChunkListDiskSize(const WebPChunk* chunk_list) {
+    size_t size = 0;
+    while (chunk_list != NULL) {
+        size += ChunkDiskSize(chunk_list);
+        chunk_list = chunk_list->next_;
+    }
+    return size;
+}
+
+//------------------------------------------------------------------------------
+// Life of a MuxImage object.
+
+void MuxImageInit(WebPMuxImage* const wpi) {
+    assert(wpi);
+    memset(wpi, 0, sizeof(*wpi));
+}
+
+WebPMuxImage* MuxImageRelease(WebPMuxImage* const wpi) {
+    WebPMuxImage* next;
+    if (wpi == NULL) return NULL;
+    ChunkDelete(wpi->header_);
+    ChunkDelete(wpi->alpha_);
+    ChunkDelete(wpi->img_);
+    ChunkListDelete(&wpi->unknown_);
+
+    next = wpi->next_;
+    MuxImageInit(wpi);
+    return next;
+}
+
+//------------------------------------------------------------------------------
+// MuxImage search methods.
+
+// Get a reference to appropriate chunk list within an image given chunk tag.
+static WebPChunk** GetChunkListFromId(const WebPMuxImage* const wpi, WebPChunkId id) {
+    assert(wpi != NULL);
+    switch (id) {
+        case WEBP_CHUNK_ANMF:
+        case WEBP_CHUNK_FRGM:
+            return (WebPChunk**)&wpi->header_;
+        case WEBP_CHUNK_ALPHA:
+            return (WebPChunk**)&wpi->alpha_;
+        case WEBP_CHUNK_IMAGE:
+            return (WebPChunk**)&wpi->img_;
+        default:
+            return NULL;
+    }
+}
+
+int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id) {
+    int count = 0;
+    const WebPMuxImage* current;
+    for (current = wpi_list; current != NULL; current = current->next_) {
+        if (id == WEBP_CHUNK_NIL) {
+            ++count; // Special case: count all images.
+        } else {
+            const WebPChunk* const wpi_chunk = *GetChunkListFromId(current, id);
+            if (wpi_chunk != NULL) {
+                const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
+                if (wpi_chunk_id == id) ++count; // Count images with a matching 'id'.
+            }
+        }
+    }
+    return count;
+}
+
+// Outputs a pointer to 'prev_wpi->next_',
+//   where 'prev_wpi' is the pointer to the image at position (nth - 1).
+// Returns true if nth image was found.
+static int SearchImageToGetOrDelete(WebPMuxImage** wpi_list, uint32_t nth, WebPMuxImage*** const location) {
+    uint32_t count = 0;
+    assert(wpi_list);
+    *location = wpi_list;
+
+    if (nth == 0) {
+        nth = MuxImageCount(*wpi_list, WEBP_CHUNK_NIL);
+        if (nth == 0) return 0; // Not found.
+    }
+
+    while (*wpi_list != NULL) {
+        WebPMuxImage* const cur_wpi = *wpi_list;
+        ++count;
+        if (count == nth) return 1; // Found.
+        wpi_list = &cur_wpi->next_;
+        *location = wpi_list;
+    }
+    return 0; // Not found.
+}
+
+//------------------------------------------------------------------------------
+// MuxImage writer methods.
+
+WebPMuxError MuxImagePush(const WebPMuxImage* wpi, WebPMuxImage** wpi_list) {
+    WebPMuxImage* new_wpi;
+
+    while (*wpi_list != NULL) {
+        WebPMuxImage* const cur_wpi = *wpi_list;
+        if (cur_wpi->next_ == NULL) break;
+        wpi_list = &cur_wpi->next_;
+    }
+
+    new_wpi = (WebPMuxImage*)WebPSafeMalloc(1ULL, sizeof(*new_wpi));
+    if (new_wpi == NULL) return WEBP_MUX_MEMORY_ERROR;
+    *new_wpi = *wpi;
+    new_wpi->next_ = NULL;
+
+    if (*wpi_list != NULL) {
+        (*wpi_list)->next_ = new_wpi;
+    } else {
+        *wpi_list = new_wpi;
+    }
+    return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+// MuxImage deletion methods.
+
+WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi) {
+    // Delete the components of wpi. If wpi is NULL this is a noop.
+    WebPMuxImage* const next = MuxImageRelease(wpi);
+    WebPSafeFree(wpi);
+    return next;
+}
+
+WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth) {
+    assert(wpi_list);
+    if (!SearchImageToGetOrDelete(wpi_list, nth, &wpi_list)) {
+        return WEBP_MUX_NOT_FOUND;
+    }
+    *wpi_list = MuxImageDelete(*wpi_list);
+    return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+// MuxImage reader methods.
+
+WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth, WebPMuxImage** wpi) {
+    assert(wpi_list);
+    assert(wpi);
+    if (!SearchImageToGetOrDelete((WebPMuxImage**)wpi_list, nth, (WebPMuxImage***)&wpi_list)) {
+        return WEBP_MUX_NOT_FOUND;
+    }
+    *wpi = (WebPMuxImage*)*wpi_list;
+    return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+// MuxImage serialization methods.
+
+// Size of an image.
+size_t MuxImageDiskSize(const WebPMuxImage* const wpi) {
+    size_t size = 0;
+    if (wpi->header_ != NULL) size += ChunkDiskSize(wpi->header_);
+    if (wpi->alpha_ != NULL) size += ChunkDiskSize(wpi->alpha_);
+    if (wpi->img_ != NULL) size += ChunkDiskSize(wpi->img_);
+    if (wpi->unknown_ != NULL) size += ChunkListDiskSize(wpi->unknown_);
+    return size;
+}
+
+// Special case as ANMF/FRGM chunk encapsulates other image chunks.
+static uint8_t* ChunkEmitSpecial(const WebPChunk* const header, size_t total_size, uint8_t* dst) {
+    const size_t header_size = header->data_.size;
+    const size_t offset_to_next = total_size - CHUNK_HEADER_SIZE;
+    assert(header->tag_ == kChunks[IDX_ANMF].tag || header->tag_ == kChunks[IDX_FRGM].tag);
+    PutLE32(dst + 0, header->tag_);
+    PutLE32(dst + TAG_SIZE, (uint32_t)offset_to_next);
+    assert(header_size == (uint32_t)header_size);
+    memcpy(dst + CHUNK_HEADER_SIZE, header->data_.bytes, header_size);
+    if (header_size & 1) {
+        dst[CHUNK_HEADER_SIZE + header_size] = 0; // Add padding.
+    }
+    return dst + ChunkDiskSize(header);
+}
+
+uint8_t* MuxImageEmit(const WebPMuxImage* const wpi, uint8_t* dst) {
+    // Ordering of chunks to be emitted is strictly as follows:
+    // 1. ANMF/FRGM chunk (if present).
+    // 2. ALPH chunk (if present).
+    // 3. VP8/VP8L chunk.
+    assert(wpi);
+    if (wpi->header_ != NULL) {
+        dst = ChunkEmitSpecial(wpi->header_, MuxImageDiskSize(wpi), dst);
+    }
+    if (wpi->alpha_ != NULL) dst = ChunkEmit(wpi->alpha_, dst);
+    if (wpi->img_ != NULL) dst = ChunkEmit(wpi->img_, dst);
+    if (wpi->unknown_ != NULL) dst = ChunkListEmit(wpi->unknown_, dst);
+    return dst;
+}
+
+//------------------------------------------------------------------------------
+// Helper methods for mux.
+
+int MuxHasAlpha(const WebPMuxImage* images) {
+    while (images != NULL) {
+        if (images->has_alpha_) return 1;
+        images = images->next_;
+    }
+    return 0;
+}
+
+uint8_t* MuxEmitRiffHeader(uint8_t* const data, size_t size) {
+    PutLE32(data + 0, MKFOURCC('R', 'I', 'F', 'F'));
+    PutLE32(data + TAG_SIZE, (uint32_t)size - CHUNK_HEADER_SIZE);
+    assert(size == (uint32_t)size);
+    PutLE32(data + TAG_SIZE + CHUNK_SIZE_BYTES, MKFOURCC('W', 'E', 'B', 'P'));
+    return data + RIFF_HEADER_SIZE;
+}
+
+WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id) {
+    assert(mux != NULL);
+    switch (id) {
+        case WEBP_CHUNK_VP8X:
+            return (WebPChunk**)&mux->vp8x_;
+        case WEBP_CHUNK_ICCP:
+            return (WebPChunk**)&mux->iccp_;
+        case WEBP_CHUNK_ANIM:
+            return (WebPChunk**)&mux->anim_;
+        case WEBP_CHUNK_EXIF:
+            return (WebPChunk**)&mux->exif_;
+        case WEBP_CHUNK_XMP:
+            return (WebPChunk**)&mux->xmp_;
+        default:
+            return (WebPChunk**)&mux->unknown_;
+    }
+}
+
+static int IsNotCompatible(int feature, int num_items) {
+    return (feature != 0) != (num_items > 0);
+}
+
+#define NO_FLAG 0
+
+// Test basic constraints:
+// retrieval, maximum number of chunks by index (use -1 to skip)
+// and feature incompatibility (use NO_FLAG to skip).
+// On success returns WEBP_MUX_OK and stores the chunk count in *num.
+static WebPMuxError ValidateChunk(
+    const WebPMux* const mux, CHUNK_INDEX idx, WebPFeatureFlags feature, uint32_t vp8x_flags, int max, int* num) {
+    const WebPMuxError err = WebPMuxNumChunks(mux, kChunks[idx].id, num);
+    if (err != WEBP_MUX_OK) return err;
+    if (max > -1 && *num > max) return WEBP_MUX_INVALID_ARGUMENT;
+    if (feature != NO_FLAG && IsNotCompatible(vp8x_flags & feature, *num)) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    return WEBP_MUX_OK;
+}
+
+WebPMuxError MuxValidate(const WebPMux* const mux) {
+    int num_iccp;
+    int num_exif;
+    int num_xmp;
+    int num_anim;
+    int num_frames;
+    int num_fragments;
+    int num_vp8x;
+    int num_images;
+    int num_alpha;
+    uint32_t flags;
+    WebPMuxError err;
+
+    // Verify mux is not NULL.
+    if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+    // Verify mux has at least one image.
+    if (mux->images_ == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+    err = WebPMuxGetFeatures(mux, &flags);
+    if (err != WEBP_MUX_OK) return err;
+
+    // At most one color profile chunk.
+    err = ValidateChunk(mux, IDX_ICCP, ICCP_FLAG, flags, 1, &num_iccp);
+    if (err != WEBP_MUX_OK) return err;
+
+    // At most one EXIF metadata.
+    err = ValidateChunk(mux, IDX_EXIF, EXIF_FLAG, flags, 1, &num_exif);
+    if (err != WEBP_MUX_OK) return err;
+
+    // At most one XMP metadata.
+    err = ValidateChunk(mux, IDX_XMP, XMP_FLAG, flags, 1, &num_xmp);
+    if (err != WEBP_MUX_OK) return err;
+
+    // Animation: ANIMATION_FLAG, ANIM chunk and ANMF chunk(s) are consistent.
+    // At most one ANIM chunk.
+    err = ValidateChunk(mux, IDX_ANIM, NO_FLAG, flags, 1, &num_anim);
+    if (err != WEBP_MUX_OK) return err;
+    err = ValidateChunk(mux, IDX_ANMF, NO_FLAG, flags, -1, &num_frames);
+    if (err != WEBP_MUX_OK) return err;
+
+    {
+        const int has_animation = !!(flags & ANIMATION_FLAG);
+        if (has_animation && (num_anim == 0 || num_frames == 0)) {
+            return WEBP_MUX_INVALID_ARGUMENT;
+        }
+        if (!has_animation && (num_anim == 1 || num_frames > 0)) {
+            return WEBP_MUX_INVALID_ARGUMENT;
+        }
+    }
+
+    // Fragmentation: FRAGMENTS_FLAG and FRGM chunk(s) are consistent.
+    err = ValidateChunk(mux, IDX_FRGM, FRAGMENTS_FLAG, flags, -1, &num_fragments);
+    if (err != WEBP_MUX_OK) return err;
+
+    // Verify either VP8X chunk is present OR there is only one elem in
+    // mux->images_.
+    err = ValidateChunk(mux, IDX_VP8X, NO_FLAG, flags, 1, &num_vp8x);
+    if (err != WEBP_MUX_OK) return err;
+    err = ValidateChunk(mux, IDX_VP8, NO_FLAG, flags, -1, &num_images);
+    if (err != WEBP_MUX_OK) return err;
+    if (num_vp8x == 0 && num_images != 1) return WEBP_MUX_INVALID_ARGUMENT;
+
+    // ALPHA_FLAG & alpha chunk(s) are consistent.
+    if (MuxHasAlpha(mux->images_)) {
+        if (num_vp8x > 0) {
+            // VP8X chunk is present, so it should contain ALPHA_FLAG.
+            if (!(flags & ALPHA_FLAG)) return WEBP_MUX_INVALID_ARGUMENT;
+        } else {
+            // VP8X chunk is not present, so ALPH chunks should NOT be present either.
+            err = WebPMuxNumChunks(mux, WEBP_CHUNK_ALPHA, &num_alpha);
+            if (err != WEBP_MUX_OK) return err;
+            if (num_alpha > 0) return WEBP_MUX_INVALID_ARGUMENT;
+        }
+    } else { // Mux doesn't need alpha. So, ALPHA_FLAG should NOT be present.
+        if (flags & ALPHA_FLAG) return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    // num_fragments & num_images are consistent.
+    if (num_fragments > 0 && num_images != num_fragments) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    return WEBP_MUX_OK;
+}
+
+#undef NO_FLAG
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/mux/muxread.c b/codec/L2/demos/webpEnc/host/src/mux/muxread.c
new file mode 100644
index 0000000000..7edf193fa5
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/mux/muxread.c
@@ -0,0 +1,520 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Read APIs for mux.
+//
+// Authors: Urvang (urvang@google.com)
+//          Vikas (vikasa@google.com)
+
+#include <assert.h>
+#include "./muxi.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// Helper method(s).
+
+// Handy MACRO.
+#define SWITCH_ID_LIST(INDEX, LIST)                                                        \
+    if (idx == (INDEX)) {                                                                  \
+        const WebPChunk* const chunk = ChunkSearchList((LIST), nth, kChunks[(INDEX)].tag); \
+        if (chunk) {                                                                       \
+            *data = chunk->data_;                                                          \
+            return WEBP_MUX_OK;                                                            \
+        } else {                                                                           \
+            return WEBP_MUX_NOT_FOUND;                                                     \
+        }                                                                                  \
+    }
+
+static WebPMuxError MuxGet(const WebPMux* const mux, CHUNK_INDEX idx, uint32_t nth, WebPData* const data) {
+    assert(mux != NULL);
+    assert(!IsWPI(kChunks[idx].id));
+    WebPDataInit(data);
+
+    SWITCH_ID_LIST(IDX_VP8X, mux->vp8x_);
+    SWITCH_ID_LIST(IDX_ICCP, mux->iccp_);
+    SWITCH_ID_LIST(IDX_ANIM, mux->anim_);
+    SWITCH_ID_LIST(IDX_EXIF, mux->exif_);
+    SWITCH_ID_LIST(IDX_XMP, mux->xmp_);
+    SWITCH_ID_LIST(IDX_UNKNOWN, mux->unknown_);
+    return WEBP_MUX_NOT_FOUND;
+}
+#undef SWITCH_ID_LIST
+
+// Fill the chunk with the given data (includes chunk header bytes), after some
+// verifications.
+static WebPMuxError ChunkVerifyAndAssign(
+    WebPChunk* chunk, const uint8_t* data, size_t data_size, size_t riff_size, int copy_data) {
+    uint32_t chunk_size;
+    WebPData chunk_data;
+
+    // Sanity checks.
+    if (data_size < CHUNK_HEADER_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA;
+    chunk_size = GetLE32(data + TAG_SIZE);
+
+    {
+        const size_t chunk_disk_size = SizeWithPadding(chunk_size);
+        if (chunk_disk_size > riff_size) return WEBP_MUX_BAD_DATA;
+        if (chunk_disk_size > data_size) return WEBP_MUX_NOT_ENOUGH_DATA;
+    }
+
+    // Data assignment.
+    chunk_data.bytes = data + CHUNK_HEADER_SIZE;
+    chunk_data.size = chunk_size;
+    return ChunkAssignData(chunk, &chunk_data, copy_data, GetLE32(data + 0));
+}
+
+int MuxImageFinalize(WebPMuxImage* const wpi) {
+    const WebPChunk* const img = wpi->img_;
+    const WebPData* const image = &img->data_;
+    const int is_lossless = (img->tag_ == kChunks[IDX_VP8L].tag);
+    int w, h;
+    int vp8l_has_alpha = 0;
+    const int ok = is_lossless ? VP8LGetInfo(image->bytes, image->size, &w, &h, &vp8l_has_alpha)
+                               : VP8GetInfo(image->bytes, image->size, image->size, &w, &h);
+    assert(img != NULL);
+    if (ok) {
+        // Ignore ALPH chunk accompanying VP8L.
+        if (is_lossless && (wpi->alpha_ != NULL)) {
+            ChunkDelete(wpi->alpha_);
+            wpi->alpha_ = NULL;
+        }
+        wpi->width_ = w;
+        wpi->height_ = h;
+        wpi->has_alpha_ = vp8l_has_alpha || (wpi->alpha_ != NULL);
+    }
+    return ok;
+}
+
+static int MuxImageParse(const WebPChunk* const chunk, int copy_data, WebPMuxImage* const wpi) {
+    const uint8_t* bytes = chunk->data_.bytes;
+    size_t size = chunk->data_.size;
+    const uint8_t* const last = bytes + size;
+    WebPChunk subchunk;
+    size_t subchunk_size;
+    ChunkInit(&subchunk);
+
+    assert(chunk->tag_ == kChunks[IDX_ANMF].tag || chunk->tag_ == kChunks[IDX_FRGM].tag);
+    assert(!wpi->is_partial_);
+
+    // ANMF/FRGM.
+    {
+        const size_t hdr_size = (chunk->tag_ == kChunks[IDX_ANMF].tag) ? ANMF_CHUNK_SIZE : FRGM_CHUNK_SIZE;
+        const WebPData temp = {bytes, hdr_size};
+        // Each of ANMF and FRGM chunk contain a header at the beginning. So, its
+        // size should at least be 'hdr_size'.
+        if (size < hdr_size) goto Fail;
+        ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_);
+    }
+    ChunkSetNth(&subchunk, &wpi->header_, 1);
+    wpi->is_partial_ = 1; // Waiting for ALPH and/or VP8/VP8L chunks.
+
+    // Rest of the chunks.
+    subchunk_size = ChunkDiskSize(&subchunk) - CHUNK_HEADER_SIZE;
+    bytes += subchunk_size;
+    size -= subchunk_size;
+
+    while (bytes != last) {
+        ChunkInit(&subchunk);
+        if (ChunkVerifyAndAssign(&subchunk, bytes, size, size, copy_data) != WEBP_MUX_OK) {
+            goto Fail;
+        }
+        switch (ChunkGetIdFromTag(subchunk.tag_)) {
+            case WEBP_CHUNK_ALPHA:
+                if (wpi->alpha_ != NULL) goto Fail; // Consecutive ALPH chunks.
+                if (ChunkSetNth(&subchunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Fail;
+                wpi->is_partial_ = 1; // Waiting for a VP8 chunk.
+                break;
+            case WEBP_CHUNK_IMAGE:
+                if (ChunkSetNth(&subchunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Fail;
+                if (!MuxImageFinalize(wpi)) goto Fail;
+                wpi->is_partial_ = 0; // wpi is completely filled.
+                break;
+            case WEBP_CHUNK_UNKNOWN:
+                if (wpi->is_partial_) goto Fail; // Encountered an unknown chunk
+                                                 // before some image chunks.
+                if (ChunkSetNth(&subchunk, &wpi->unknown_, 0) != WEBP_MUX_OK) goto Fail;
+                break;
+            default:
+                goto Fail;
+                break;
+        }
+        subchunk_size = ChunkDiskSize(&subchunk);
+        bytes += subchunk_size;
+        size -= subchunk_size;
+    }
+    if (wpi->is_partial_) goto Fail;
+    return 1;
+
+Fail:
+    ChunkRelease(&subchunk);
+    return 0;
+}
+
+//------------------------------------------------------------------------------
+// Create a mux object from WebP-RIFF data.
+
+WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data, int version) {
+    size_t riff_size;
+    uint32_t tag;
+    const uint8_t* end;
+    WebPMux* mux = NULL;
+    WebPMuxImage* wpi = NULL;
+    const uint8_t* data;
+    size_t size;
+    WebPChunk chunk;
+    ChunkInit(&chunk);
+
+    // Sanity checks.
+    if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) {
+        return NULL; // version mismatch
+    }
+    if (bitstream == NULL) return NULL;
+
+    data = bitstream->bytes;
+    size = bitstream->size;
+
+    if (data == NULL) return NULL;
+    if (size < RIFF_HEADER_SIZE) return NULL;
+    if (GetLE32(data + 0) != MKFOURCC('R', 'I', 'F', 'F') ||
+        GetLE32(data + CHUNK_HEADER_SIZE) != MKFOURCC('W', 'E', 'B', 'P')) {
+        return NULL;
+    }
+
+    mux = WebPMuxNew();
+    if (mux == NULL) return NULL;
+
+    if (size < RIFF_HEADER_SIZE + TAG_SIZE) goto Err;
+
+    tag = GetLE32(data + RIFF_HEADER_SIZE);
+    if (tag != kChunks[IDX_VP8].tag && tag != kChunks[IDX_VP8L].tag && tag != kChunks[IDX_VP8X].tag) {
+        goto Err; // First chunk should be VP8, VP8L or VP8X.
+    }
+
+    riff_size = SizeWithPadding(GetLE32(data + TAG_SIZE));
+    if (riff_size > MAX_CHUNK_PAYLOAD || riff_size > size) {
+        goto Err;
+    } else {
+        if (riff_size < size) { // Redundant data after last chunk.
+            size = riff_size;   // To make sure we don't read any data beyond mux_size.
+        }
+    }
+
+    end = data + size;
+    data += RIFF_HEADER_SIZE;
+    size -= RIFF_HEADER_SIZE;
+
+    wpi = (WebPMuxImage*)WebPSafeMalloc(1ULL, sizeof(*wpi));
+    if (wpi == NULL) goto Err;
+    MuxImageInit(wpi);
+
+    // Loop over chunks.
+    while (data != end) {
+        size_t data_size;
+        WebPChunkId id;
+        WebPChunk** chunk_list;
+        if (ChunkVerifyAndAssign(&chunk, data, size, riff_size, copy_data) != WEBP_MUX_OK) {
+            goto Err;
+        }
+        data_size = ChunkDiskSize(&chunk);
+        id = ChunkGetIdFromTag(chunk.tag_);
+        switch (id) {
+            case WEBP_CHUNK_ALPHA:
+                if (wpi->alpha_ != NULL) goto Err; // Consecutive ALPH chunks.
+                if (ChunkSetNth(&chunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Err;
+                wpi->is_partial_ = 1; // Waiting for a VP8 chunk.
+                break;
+            case WEBP_CHUNK_IMAGE:
+                if (ChunkSetNth(&chunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Err;
+                if (!MuxImageFinalize(wpi)) goto Err;
+                wpi->is_partial_ = 0; // wpi is completely filled.
+            PushImage:
+                // Add this to mux->images_ list.
+                if (MuxImagePush(wpi, &mux->images_) != WEBP_MUX_OK) goto Err;
+                MuxImageInit(wpi); // Reset for reading next image.
+                break;
+            case WEBP_CHUNK_ANMF:
+                if (wpi->is_partial_) goto Err; // Previous wpi is still incomplete.
+                if (!MuxImageParse(&chunk, copy_data, wpi)) goto Err;
+                ChunkRelease(&chunk);
+                goto PushImage;
+                break;
+            default:                                         // A non-image chunk.
+                if (wpi->is_partial_) goto Err;              // Encountered a non-image chunk before
+                                                             // getting all chunks of an image.
+                chunk_list = MuxGetChunkListFromId(mux, id); // List to add this chunk.
+                if (ChunkSetNth(&chunk, chunk_list, 0) != WEBP_MUX_OK) goto Err;
+                if (id == WEBP_CHUNK_VP8X) { // grab global specs
+                    mux->canvas_width_ = GetLE24(data + 12) + 1;
+                    mux->canvas_height_ = GetLE24(data + 15) + 1;
+                }
+                break;
+        }
+        data += data_size;
+        size -= data_size;
+        ChunkInit(&chunk);
+    }
+
+    // Validate mux if complete.
+    if (MuxValidate(mux) != WEBP_MUX_OK) goto Err;
+
+    MuxImageDelete(wpi);
+    return mux; // All OK;
+
+Err: // Something bad happened.
+    ChunkRelease(&chunk);
+    MuxImageDelete(wpi);
+    WebPMuxDelete(mux);
+    return NULL;
+}
+
+//------------------------------------------------------------------------------
+// Get API(s).
+
+// Validates that the given mux has a single image.
+static WebPMuxError ValidateForSingleImage(const WebPMux* const mux) {
+    const int num_images = MuxImageCount(mux->images_, WEBP_CHUNK_IMAGE);
+    const int num_frames = MuxImageCount(mux->images_, WEBP_CHUNK_ANMF);
+    const int num_fragments = MuxImageCount(mux->images_, WEBP_CHUNK_FRGM);
+
+    if (num_images == 0) {
+        // No images in mux.
+        return WEBP_MUX_NOT_FOUND;
+    } else if (num_images == 1 && num_frames == 0 && num_fragments == 0) {
+        // Valid case (single image).
+        return WEBP_MUX_OK;
+    } else {
+        // Frame/Fragment case OR an invalid mux.
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+}
+
+// Get the canvas width, height and flags after validating that VP8X/VP8/VP8L
+// chunk and canvas size are valid.
+static WebPMuxError MuxGetCanvasInfo(const WebPMux* const mux, int* width, int* height, uint32_t* flags) {
+    int w, h;
+    uint32_t f = 0;
+    WebPData data;
+    assert(mux != NULL);
+
+    // Check if VP8X chunk is present.
+    if (MuxGet(mux, IDX_VP8X, 1, &data) == WEBP_MUX_OK) {
+        if (data.size < VP8X_CHUNK_SIZE) return WEBP_MUX_BAD_DATA;
+        f = GetLE32(data.bytes + 0);
+        w = GetLE24(data.bytes + 4) + 1;
+        h = GetLE24(data.bytes + 7) + 1;
+    } else {
+        const WebPMuxImage* const wpi = mux->images_;
+        // Grab user-forced canvas size as default.
+        w = mux->canvas_width_;
+        h = mux->canvas_height_;
+        if (w == 0 && h == 0 && ValidateForSingleImage(mux) == WEBP_MUX_OK) {
+            // single image and not forced canvas size => use dimension of first frame
+            assert(wpi != NULL);
+            w = wpi->width_;
+            h = wpi->height_;
+        }
+        if (wpi != NULL) {
+            if (wpi->has_alpha_) f |= ALPHA_FLAG;
+        }
+    }
+    if (w * (uint64_t)h >= MAX_IMAGE_AREA) return WEBP_MUX_BAD_DATA;
+
+    if (width != NULL) *width = w;
+    if (height != NULL) *height = h;
+    if (flags != NULL) *flags = f;
+    return WEBP_MUX_OK;
+}
+
+WebPMuxError WebPMuxGetCanvasSize(const WebPMux* mux, int* width, int* height) {
+    if (mux == NULL || width == NULL || height == NULL) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    return MuxGetCanvasInfo(mux, width, height, NULL);
+}
+
+WebPMuxError WebPMuxGetFeatures(const WebPMux* mux, uint32_t* flags) {
+    if (mux == NULL || flags == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+    return MuxGetCanvasInfo(mux, NULL, NULL, flags);
+}
+
+static uint8_t* EmitVP8XChunk(uint8_t* const dst, int width, int height, uint32_t flags) {
+    const size_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+    assert(width >= 1 && height >= 1);
+    assert(width <= MAX_CANVAS_SIZE && height <= MAX_CANVAS_SIZE);
+    assert(width * (uint64_t)height < MAX_IMAGE_AREA);
+    PutLE32(dst, MKFOURCC('V', 'P', '8', 'X'));
+    PutLE32(dst + TAG_SIZE, VP8X_CHUNK_SIZE);
+    PutLE32(dst + CHUNK_HEADER_SIZE, flags);
+    PutLE24(dst + CHUNK_HEADER_SIZE + 4, width - 1);
+    PutLE24(dst + CHUNK_HEADER_SIZE + 7, height - 1);
+    return dst + vp8x_size;
+}
+
+// Assemble a single image WebP bitstream from 'wpi'.
+static WebPMuxError SynthesizeBitstream(const WebPMuxImage* const wpi, WebPData* const bitstream) {
+    uint8_t* dst;
+
+    // Allocate data.
+    const int need_vp8x = (wpi->alpha_ != NULL);
+    const size_t vp8x_size = need_vp8x ? CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE : 0;
+    const size_t alpha_size = need_vp8x ? ChunkDiskSize(wpi->alpha_) : 0;
+    // Note: No need to output ANMF/FRGM chunk for a single image.
+    const size_t size = RIFF_HEADER_SIZE + vp8x_size + alpha_size + ChunkDiskSize(wpi->img_);
+    uint8_t* const data = (uint8_t*)WebPSafeMalloc(1ULL, size);
+    if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
+
+    // Main RIFF header.
+    dst = MuxEmitRiffHeader(data, size);
+
+    if (need_vp8x) {
+        dst = EmitVP8XChunk(dst, wpi->width_, wpi->height_, ALPHA_FLAG); // VP8X.
+        dst = ChunkListEmit(wpi->alpha_, dst);                           // ALPH.
+    }
+
+    // Bitstream.
+    dst = ChunkListEmit(wpi->img_, dst);
+    assert(dst == data + size);
+
+    // Output.
+    bitstream->bytes = data;
+    bitstream->size = size;
+    return WEBP_MUX_OK;
+}
+
+WebPMuxError WebPMuxGetChunk(const WebPMux* mux, const char fourcc[4], WebPData* chunk_data) {
+    CHUNK_INDEX idx;
+    if (mux == NULL || fourcc == NULL || chunk_data == NULL) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    idx = ChunkGetIndexFromFourCC(fourcc);
+    if (IsWPI(kChunks[idx].id)) { // An image chunk.
+        return WEBP_MUX_INVALID_ARGUMENT;
+    } else if (idx != IDX_UNKNOWN) { // A known chunk type.
+        return MuxGet(mux, idx, 1, chunk_data);
+    } else { // An unknown chunk type.
+        const WebPChunk* const chunk = ChunkSearchList(mux->unknown_, 1, ChunkGetTagFromFourCC(fourcc));
+        if (chunk == NULL) return WEBP_MUX_NOT_FOUND;
+        *chunk_data = chunk->data_;
+        return WEBP_MUX_OK;
+    }
+}
+
+static WebPMuxError MuxGetImageInternal(const WebPMuxImage* const wpi, WebPMuxFrameInfo* const info) {
+    // Set some defaults for unrelated fields.
+    info->x_offset = 0;
+    info->y_offset = 0;
+    info->duration = 1;
+    info->dispose_method = WEBP_MUX_DISPOSE_NONE;
+    info->blend_method = WEBP_MUX_BLEND;
+    // Extract data for related fields.
+    info->id = ChunkGetIdFromTag(wpi->img_->tag_);
+    return SynthesizeBitstream(wpi, &info->bitstream);
+}
+
+static WebPMuxError MuxGetFrameFragmentInternal(const WebPMuxImage* const wpi, WebPMuxFrameInfo* const frame) {
+    const int is_frame = (wpi->header_->tag_ == kChunks[IDX_ANMF].tag);
+    const CHUNK_INDEX idx = is_frame ? IDX_ANMF : IDX_FRGM;
+    const WebPData* frame_frgm_data;
+    if (!is_frame) return WEBP_MUX_INVALID_ARGUMENT;
+    assert(wpi->header_ != NULL); // Already checked by WebPMuxGetFrame().
+    // Get frame/fragment chunk.
+    frame_frgm_data = &wpi->header_->data_;
+    if (frame_frgm_data->size < kChunks[idx].size) return WEBP_MUX_BAD_DATA;
+    // Extract info.
+    frame->x_offset = 2 * GetLE24(frame_frgm_data->bytes + 0);
+    frame->y_offset = 2 * GetLE24(frame_frgm_data->bytes + 3);
+    if (is_frame) {
+        const uint8_t bits = frame_frgm_data->bytes[15];
+        frame->duration = GetLE24(frame_frgm_data->bytes + 12);
+        frame->dispose_method = (bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
+        frame->blend_method = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
+    } else { // Defaults for unused values.
+        frame->duration = 1;
+        frame->dispose_method = WEBP_MUX_DISPOSE_NONE;
+        frame->blend_method = WEBP_MUX_BLEND;
+    }
+    frame->id = ChunkGetIdFromTag(wpi->header_->tag_);
+    return SynthesizeBitstream(wpi, &frame->bitstream);
+}
+
+WebPMuxError WebPMuxGetFrame(const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame) {
+    WebPMuxError err;
+    WebPMuxImage* wpi;
+
+    // Sanity checks.
+    if (mux == NULL || frame == NULL) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    // Get the nth WebPMuxImage.
+    err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, nth, &wpi);
+    if (err != WEBP_MUX_OK) return err;
+
+    // Get frame info.
+    if (wpi->header_ == NULL) {
+        return MuxGetImageInternal(wpi, frame);
+    } else {
+        return MuxGetFrameFragmentInternal(wpi, frame);
+    }
+}
+
+WebPMuxError WebPMuxGetAnimationParams(const WebPMux* mux, WebPMuxAnimParams* params) {
+    WebPData anim;
+    WebPMuxError err;
+
+    if (mux == NULL || params == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+    err = MuxGet(mux, IDX_ANIM, 1, &anim);
+    if (err != WEBP_MUX_OK) return err;
+    if (anim.size < kChunks[WEBP_CHUNK_ANIM].size) return WEBP_MUX_BAD_DATA;
+    params->bgcolor = GetLE32(anim.bytes);
+    params->loop_count = GetLE16(anim.bytes + 4);
+
+    return WEBP_MUX_OK;
+}
+
+// Get chunk index from chunk id. Returns IDX_NIL if not found.
+static CHUNK_INDEX ChunkGetIndexFromId(WebPChunkId id) {
+    int i;
+    for (i = 0; kChunks[i].id != WEBP_CHUNK_NIL; ++i) {
+        if (id == kChunks[i].id) return (CHUNK_INDEX)i;
+    }
+    return IDX_NIL;
+}
+
+// Count number of chunks matching 'tag' in the 'chunk_list'.
+// If tag == NIL_TAG, any tag will be matched.
+static int CountChunks(const WebPChunk* const chunk_list, uint32_t tag) {
+    int count = 0;
+    const WebPChunk* current;
+    for (current = chunk_list; current != NULL; current = current->next_) {
+        if (tag == NIL_TAG || current->tag_ == tag) {
+            count++; // Count chunks whose tags match.
+        }
+    }
+    return count;
+}
+
+WebPMuxError WebPMuxNumChunks(const WebPMux* mux, WebPChunkId id, int* num_elements) {
+    if (mux == NULL || num_elements == NULL) {
+        return WEBP_MUX_INVALID_ARGUMENT;
+    }
+
+    if (IsWPI(id)) {
+        *num_elements = MuxImageCount(mux->images_, id);
+    } else {
+        WebPChunk* const* chunk_list = MuxGetChunkListFromId(mux, id);
+        const CHUNK_INDEX idx = ChunkGetIndexFromId(id);
+        *num_elements = CountChunks(*chunk_list, kChunks[idx].tag);
+    }
+
+    return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/utils/Makefile.am b/codec/L2/demos/webpEnc/host/src/utils/Makefile.am
new file mode 100644
index 0000000000..534cc836e3
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/Makefile.am
@@ -0,0 +1,44 @@
+noinst_LTLIBRARIES = libwebputils.la
+
+if BUILD_LIBWEBPDECODER
+  noinst_LTLIBRARIES += libwebputilsdecode.la
+endif
+
+common_HEADERS = ../webp/types.h
+commondir = $(includedir)/webp
+
+COMMON_SOURCES =
+COMMON_SOURCES += bit_reader.c
+COMMON_SOURCES += bit_reader.h
+COMMON_SOURCES += bit_reader_inl.h
+COMMON_SOURCES += color_cache.c
+COMMON_SOURCES += color_cache.h
+COMMON_SOURCES += endian_inl.h
+COMMON_SOURCES += filters.c
+COMMON_SOURCES += filters.h
+COMMON_SOURCES += huffman.c
+COMMON_SOURCES += huffman.h
+COMMON_SOURCES += quant_levels_dec.c
+COMMON_SOURCES += quant_levels_dec.h
+COMMON_SOURCES += rescaler.c
+COMMON_SOURCES += rescaler.h
+COMMON_SOURCES += random.c
+COMMON_SOURCES += random.h
+COMMON_SOURCES += thread.c
+COMMON_SOURCES += thread.h
+COMMON_SOURCES += utils.c
+COMMON_SOURCES += utils.h
+
+ENC_SOURCES =
+ENC_SOURCES += bit_writer.c
+ENC_SOURCES += bit_writer.h
+ENC_SOURCES += huffman_encode.c
+ENC_SOURCES += huffman_encode.h
+ENC_SOURCES += quant_levels.c
+ENC_SOURCES += quant_levels.h
+
+libwebputils_la_SOURCES = $(COMMON_SOURCES) $(ENC_SOURCES)
+
+if BUILD_LIBWEBPDECODER
+  libwebputilsdecode_la_SOURCES = $(COMMON_SOURCES)
+endif
diff --git a/codec/L2/demos/webpEnc/host/src/utils/bit_reader.c b/codec/L2/demos/webpEnc/host/src/utils/bit_reader.c
new file mode 100644
index 0000000000..c90f870b23
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/bit_reader.c
@@ -0,0 +1,194 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Boolean decoder non-inlined methods
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include "./bit_reader_inl.h"
+
+//------------------------------------------------------------------------------
+// VP8BitReader
+
+void VP8BitReaderSetBuffer(VP8BitReader* const br, const uint8_t* const start, size_t size) {
+    br->buf_ = start;
+    br->buf_end_ = start + size;
+    br->buf_max_ = (size >= sizeof(lbit_t)) ? start + size - sizeof(lbit_t) + 1 : start;
+}
+
+void VP8InitBitReader(VP8BitReader* const br, const uint8_t* const start, size_t size) {
+    assert(br != NULL);
+    assert(start != NULL);
+    assert(size < (1u << 31)); // limit ensured by format and upstream checks
+    br->range_ = 255 - 1;
+    br->value_ = 0;
+    br->bits_ = -8; // to load the very first 8bits
+    br->eof_ = 0;
+    VP8BitReaderSetBuffer(br, start, size);
+    VP8LoadNewBytes(br);
+}
+
+void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) {
+    if (br->buf_ != NULL) {
+        br->buf_ += offset;
+        br->buf_end_ += offset;
+        br->buf_max_ += offset;
+    }
+}
+
+const uint8_t kVP8Log2Range[128] = {7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+                                    3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0};
+
+// range = ((range - 1) << kVP8Log2Range[range]) + 1
+const uint8_t kVP8NewRange[128] = {
+    127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239, 127, 135, 143, 151, 159, 167, 175,
+    183, 191, 199, 207, 215, 223, 231, 239, 247, 127, 131, 135, 139, 143, 147, 151, 155, 159, 163, 167, 171, 175,
+    179, 183, 187, 191, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 127, 129, 131,
+    133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, 175,
+    177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219,
+    221, 223, 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 127};
+
+void VP8LoadFinalBytes(VP8BitReader* const br) {
+    assert(br != NULL && br->buf_ != NULL);
+    // Only read 8bits at a time
+    if (br->buf_ < br->buf_end_) {
+        br->bits_ += 8;
+        br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8);
+    } else if (!br->eof_) {
+        br->value_ <<= 8;
+        br->bits_ += 8;
+        br->eof_ = 1;
+    } else {
+        br->bits_ = 0; // This is to avoid undefined behaviour with shifts.
+    }
+}
+
+//------------------------------------------------------------------------------
+// Higher-level calls
+
+uint32_t VP8GetValue(VP8BitReader* const br, int bits) {
+    uint32_t v = 0;
+    while (bits-- > 0) {
+        v |= VP8GetBit(br, 0x80) << bits;
+    }
+    return v;
+}
+
+int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
+    const int value = VP8GetValue(br, bits);
+    return VP8Get(br) ? -value : value;
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitReader
+
+#define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits.
+
+#if !defined(WEBP_FORCE_ALIGNED) && (defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
+                                     defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64))
+#define VP8L_USE_UNALIGNED_LOAD
+#endif
+
+static const uint32_t kBitMask[VP8L_MAX_NUM_BIT_READ + 1] = {
+    0,        0x000001, 0x000003, 0x000007, 0x00000f, 0x00001f, 0x00003f, 0x00007f, 0x0000ff,
+    0x0001ff, 0x0003ff, 0x0007ff, 0x000fff, 0x001fff, 0x003fff, 0x007fff, 0x00ffff, 0x01ffff,
+    0x03ffff, 0x07ffff, 0x0fffff, 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff};
+
+void VP8LInitBitReader(VP8LBitReader* const br, const uint8_t* const start, size_t length) {
+    size_t i;
+    vp8l_val_t value = 0;
+    assert(br != NULL);
+    assert(start != NULL);
+    assert(length < 0xfffffff8u); // can't happen with a RIFF chunk.
+
+    br->len_ = length;
+    br->val_ = 0;
+    br->bit_pos_ = 0;
+    br->eos_ = 0;
+
+    if (length > sizeof(br->val_)) {
+        length = sizeof(br->val_);
+    }
+    for (i = 0; i < length; ++i) {
+        value |= (vp8l_val_t)start[i] << (8 * i);
+    }
+    br->val_ = value;
+    br->pos_ = length;
+    br->buf_ = start;
+}
+
+void VP8LBitReaderSetBuffer(VP8LBitReader* const br, const uint8_t* const buf, size_t len) {
+    assert(br != NULL);
+    assert(buf != NULL);
+    assert(len < 0xfffffff8u); // can't happen with a RIFF chunk.
+    br->buf_ = buf;
+    br->len_ = len;
+    // pos_ > len_ should be considered a param error.
+    br->eos_ = (br->pos_ > br->len_) || VP8LIsEndOfStream(br);
+}
+
+static void VP8LSetEndOfStream(VP8LBitReader* const br) {
+    br->eos_ = 1;
+    br->bit_pos_ = 0; // To avoid undefined behaviour with shifts.
+}
+
+// If not at EOS, reload up to VP8L_LBITS byte-by-byte
+static void ShiftBytes(VP8LBitReader* const br) {
+    while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
+        br->val_ >>= 8;
+        br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (VP8L_LBITS - 8);
+        ++br->pos_;
+        br->bit_pos_ -= 8;
+    }
+    if (VP8LIsEndOfStream(br)) {
+        VP8LSetEndOfStream(br);
+    }
+}
+
+void VP8LDoFillBitWindow(VP8LBitReader* const br) {
+    assert(br->bit_pos_ >= VP8L_WBITS);
+// TODO(jzern): given the fixed read size it may be possible to force
+//              alignment in this block.
+#if defined(VP8L_USE_UNALIGNED_LOAD)
+    if (br->pos_ + sizeof(br->val_) < br->len_) {
+        br->val_ >>= VP8L_WBITS;
+        br->bit_pos_ -= VP8L_WBITS;
+        // The expression below needs a little-endian arch to work correctly.
+        // This gives a large speedup for decoding speed.
+        br->val_ |= (vp8l_val_t)WebPMemToUint32(br->buf_ + br->pos_) << (VP8L_LBITS - VP8L_WBITS);
+        br->pos_ += VP8L_LOG8_WBITS;
+        return;
+    }
+#endif
+    ShiftBytes(br); // Slow path.
+}
+
+uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
+    assert(n_bits >= 0);
+    // Flag an error if end_of_stream or n_bits is more than allowed limit.
+    if (!br->eos_ && n_bits <= VP8L_MAX_NUM_BIT_READ) {
+        const uint32_t val = VP8LPrefetchBits(br) & kBitMask[n_bits];
+        const int new_bits = br->bit_pos_ + n_bits;
+        br->bit_pos_ = new_bits;
+        ShiftBytes(br);
+        return val;
+    } else {
+        VP8LSetEndOfStream(br);
+        return 0;
+    }
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/utils/bit_reader.h b/codec/L2/demos/webpEnc/host/src/utils/bit_reader.h
new file mode 100644
index 0000000000..317427dffd
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/bit_reader.h
@@ -0,0 +1,169 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Boolean decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora (vikaas.arora@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_READER_H_
+#define WEBP_UTILS_BIT_READER_H_
+
+#include <assert.h>
+#ifdef _MSC_VER
+#include <stdlib.h> // _byteswap_ulong
+#endif
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The Boolean decoder needs to maintain infinite precision on the value_ field.
+// However, since range_ is only 8bit, we only need an active window of 8 bits
+// for value_. Left bits (MSB) gets zeroed and shifted away when value_ falls
+// below 128, range_ is updated, and fresh bits read from the bitstream are
+// brought in as LSB. To avoid reading the fresh bits one by one (slow), we
+// cache WEBP_BITS of them ahead. The total of (WEBP_BITS + 8) bits must fit into a
+// natural register (with type bit_t). To fetch WEBP_BITS bits from bitstream we
+// use a type lbit_t.
+//
+// WEBP_BITS can be any multiple of 8 from 8 to 56 (inclusive).
+// Pick values that fit natural register size.
+
+#if defined(__i386__) || defined(_M_IX86) // x86 32bit
+#define WEBP_BITS 24
+#elif defined(__x86_64__) || defined(_M_X64) // x86 64bit
+#define WEBP_BITS 56
+#elif defined(__arm__) || defined(_M_ARM) // ARM
+#define WEBP_BITS 24
+#elif defined(__aarch64__) // ARM 64bit
+#define WEBP_BITS 56
+#elif defined(__mips__) // MIPS
+#define WEBP_BITS 24
+#else // reasonable default
+#define WEBP_BITS 24
+#endif
+
+//------------------------------------------------------------------------------
+// Derived types and constants:
+//   bit_t = natural register type for storing 'value_' (which is WEBP_BITS+8 bits)
+//   range_t = register for 'range_' (which is 8bits only)
+
+#if (WEBP_BITS > 24)
+typedef uint64_t bit_t;
+#else
+typedef uint32_t bit_t;
+#endif
+
+typedef uint32_t range_t;
+
+//------------------------------------------------------------------------------
+// Bitreader
+
+typedef struct VP8BitReader VP8BitReader;
+struct VP8BitReader {
+    // boolean decoder  (keep the field ordering as is!)
+    bit_t value_;   // current value
+    range_t range_; // current range minus 1. In [127, 254] interval.
+    int bits_;      // number of valid bits left
+    // read buffer
+    const uint8_t* buf_;     // next byte to be read
+    const uint8_t* buf_end_; // end of read buffer
+    const uint8_t* buf_max_; // max packed-read position on buffer
+    int eof_;                // true if input is exhausted
+};
+
+// Initialize the bit reader and the boolean decoder.
+void VP8InitBitReader(VP8BitReader* const br, const uint8_t* const start, size_t size);
+// Sets the working read buffer.
+void VP8BitReaderSetBuffer(VP8BitReader* const br, const uint8_t* const start, size_t size);
+
+// Update internal pointers to displace the byte buffer by the
+// relative offset 'offset'.
+void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset);
+
+// return the next value made of 'num_bits' bits
+uint32_t VP8GetValue(VP8BitReader* const br, int num_bits);
+static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) {
+    return VP8GetValue(br, 1);
+}
+
+// return the next value with sign-extension.
+int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);
+
+// bit_reader_inl.h will implement the following methods:
+//   static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob)
+//   static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v)
+// and should be included by the .c files that actually need them.
+// This is to avoid recompiling the whole library whenever this file is touched,
+// and also allowing platform-specific ad-hoc hacks.
+
+// -----------------------------------------------------------------------------
+// Bitreader for lossless format
+
+// maximum number of bits (inclusive) the bit-reader can handle:
+#define VP8L_MAX_NUM_BIT_READ 24
+
+#define VP8L_LBITS 64 // Number of bits prefetched (= bit-size of vp8l_val_t).
+#define VP8L_WBITS 32 // Minimum number of bytes ready after VP8LFillBitWindow.
+
+typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit.
+
+typedef struct {
+    vp8l_val_t val_;     // pre-fetched bits
+    const uint8_t* buf_; // input byte buffer
+    size_t len_;         // buffer length
+    size_t pos_;         // byte position in buf_
+    int bit_pos_;        // current bit-reading position in val_
+    int eos_;            // true if a bit was read past the end of buffer
+} VP8LBitReader;
+
+void VP8LInitBitReader(VP8LBitReader* const br, const uint8_t* const start, size_t length);
+
+//  Sets a new data buffer.
+void VP8LBitReaderSetBuffer(VP8LBitReader* const br, const uint8_t* const buffer, size_t length);
+
+// Reads the specified number of bits from read buffer.
+// Flags an error in case end_of_stream or n_bits is more than the allowed limit
+// of VP8L_MAX_NUM_BIT_READ (inclusive).
+// Flags eos_ if this read attempt is going to cross the read buffer.
+uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
+
+// Return the prefetched bits, so they can be looked up.
+static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) {
+    return (uint32_t)(br->val_ >> (br->bit_pos_ & (VP8L_LBITS - 1)));
+}
+
+// Returns true if there was an attempt at reading bit past the end of
+// the buffer. Doesn't set br->eos_ flag.
+static WEBP_INLINE int VP8LIsEndOfStream(const VP8LBitReader* const br) {
+    assert(br->pos_ <= br->len_);
+    return br->eos_ || ((br->pos_ == br->len_) && (br->bit_pos_ > VP8L_LBITS));
+}
+
+// For jumping over a number of bits in the bit stream when accessed with
+// VP8LPrefetchBits and VP8LFillBitWindow.
+static WEBP_INLINE void VP8LSetBitPos(VP8LBitReader* const br, int val) {
+    br->bit_pos_ = val;
+    br->eos_ = VP8LIsEndOfStream(br);
+}
+
+// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
+// Speed critical, but infrequent part of the code can be non-inlined.
+extern void VP8LDoFillBitWindow(VP8LBitReader* const br);
+static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) {
+    if (br->bit_pos_ >= VP8L_WBITS) VP8LDoFillBitWindow(br);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_BIT_READER_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/utils/bit_reader_inl.h b/codec/L2/demos/webpEnc/host/src/utils/bit_reader_inl.h
new file mode 100644
index 0000000000..be37d19f2d
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/bit_reader_inl.h
@@ -0,0 +1,171 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Specific inlined methods for boolean decoder [VP8GetBit() ...]
+// This file should be included by the .c sources that actually need to call
+// these methods.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_READER_INL_H_
+#define WEBP_UTILS_BIT_READER_INL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#ifdef WEBP_FORCE_ALIGNED
+#include <string.h> // memcpy
+#endif
+
+#include "../dsp/dsp.h"
+#include "./bit_reader.h"
+#include "./endian_inl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Derived type lbit_t = natural type for memory I/O
+
+#if (WEBP_BITS > 32)
+typedef uint64_t lbit_t;
+#elif (WEBP_BITS > 16)
+typedef uint32_t lbit_t;
+#elif (WEBP_BITS > 8)
+typedef uint16_t lbit_t;
+#else
+typedef uint8_t lbit_t;
+#endif
+
+extern const uint8_t kVP8Log2Range[128];
+extern const uint8_t kVP8NewRange[128];
+
+// special case for the tail byte-reading
+void VP8LoadFinalBytes(VP8BitReader* const br);
+
+//------------------------------------------------------------------------------
+// Inlined critical functions
+
+// makes sure br->value_ has at least WEBP_BITS bits worth of data
+static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
+    assert(br != NULL && br->buf_ != NULL);
+    // Read 'WEBP_BITS' bits at a time if possible.
+    if (br->buf_ < br->buf_max_) {
+        // convert memory type to register type (with some zero'ing!)
+        bit_t bits;
+#if defined(WEBP_FORCE_ALIGNED)
+        lbit_t in_bits;
+        memcpy(&in_bits, br->buf_, sizeof(in_bits));
+#elif defined(WEBP_USE_MIPS32)
+        // This is needed because of un-aligned read.
+        lbit_t in_bits;
+        lbit_t* p_buf_ = (lbit_t*)br->buf_;
+        __asm__ volatile(
+            ".set   push                             \n\t"
+            ".set   at                               \n\t"
+            ".set   macro                            \n\t"
+            "ulw    %[in_bits], 0(%[p_buf_])         \n\t"
+            ".set   pop                              \n\t"
+            : [in_bits] "=r"(in_bits)
+            : [p_buf_] "r"(p_buf_)
+            : "memory", "at");
+#else
+        const lbit_t in_bits = *(const lbit_t*)br->buf_;
+#endif
+        br->buf_ += WEBP_BITS >> 3;
+#if !defined(WORDS_BIGENDIAN)
+#if (WEBP_BITS > 32)
+        bits = BSwap64(in_bits);
+        bits >>= 64 - WEBP_BITS;
+#elif (WEBP_BITS >= 24)
+        bits = BSwap32(in_bits);
+        bits >>= (32 - WEBP_BITS);
+#elif (WEBP_BITS == 16)
+        bits = BSwap16(in_bits);
+#else  // WEBP_BITS == 8
+        bits = (bit_t)in_bits;
+#endif // WEBP_BITS > 32
+#else  // WORDS_BIGENDIAN
+        bits = (bit_t)in_bits;
+        if (WEBP_BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - WEBP_BITS);
+#endif
+        br->value_ = bits | (br->value_ << WEBP_BITS);
+        br->bits_ += WEBP_BITS;
+    } else {
+        VP8LoadFinalBytes(br); // no need to be inlined
+    }
+}
+
+// Read a bit with proba 'prob'. Speed-critical function!
+static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
+    // Don't move this declaration! It makes a big speed difference to store
+    // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
+    // alter br->range_ value.
+    range_t range = br->range_;
+    if (br->bits_ < 0) {
+        VP8LoadNewBytes(br);
+    }
+    {
+        const int pos = br->bits_;
+        const range_t split = (range * prob) >> 8;
+        const range_t value = (range_t)(br->value_ >> pos);
+#if defined(__arm__) || defined(_M_ARM) // ARM-specific
+        const int bit = ((int)(split - value) >> 31) & 1;
+        if (value > split) {
+            range -= split + 1;
+            br->value_ -= (bit_t)(split + 1) << pos;
+        } else {
+            range = split;
+        }
+#else // faster version on x86
+        int bit; // Don't use 'const int bit = (value > split);", it's slower.
+        if (value > split) {
+            range -= split + 1;
+            br->value_ -= (bit_t)(split + 1) << pos;
+            bit = 1;
+        } else {
+            range = split;
+            bit = 0;
+        }
+#endif
+        if (range <= (range_t)0x7e) {
+            const int shift = kVP8Log2Range[range];
+            range = kVP8NewRange[range];
+            br->bits_ -= shift;
+        }
+        br->range_ = range;
+        return bit;
+    }
+}
+
+// simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here)
+static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
+    if (br->bits_ < 0) {
+        VP8LoadNewBytes(br);
+    }
+    {
+        const int pos = br->bits_;
+        const range_t split = br->range_ >> 1;
+        const range_t value = (range_t)(br->value_ >> pos);
+        const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0
+        br->bits_ -= 1;
+        br->range_ += mask;
+        br->range_ |= 1;
+        br->value_ -= (bit_t)((split + 1) & mask) << pos;
+        return (v ^ mask) - mask;
+    }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_UTILS_BIT_READER_INL_H_
diff --git a/codec/L2/demos/webpEnc/host/src/utils/bit_writer.c b/codec/L2/demos/webpEnc/host/src/utils/bit_writer.c
new file mode 100644
index 0000000000..a6dd62a370
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/bit_writer.c
@@ -0,0 +1,311 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Bit writing and boolean coder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora (vikaas.arora@gmail.com)
+
+#include <assert.h>
+#include <string.h> // for memcpy()
+#include <stdlib.h>
+
+#include "./bit_writer.h"
+#include "./endian_inl.h"
+#include "./utils.h"
+
+#include <iostream>
+
+//------------------------------------------------------------------------------
+// VP8BitWriter
+
+static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) {
+    uint8_t* new_buf;
+    size_t new_size;
+    const uint64_t needed_size_64b = (uint64_t)bw->pos_ + extra_size;
+    const size_t needed_size = (size_t)needed_size_64b;
+    if (needed_size_64b != needed_size) {
+        bw->error_ = 1;
+        return 0;
+    }
+    if (needed_size <= bw->max_pos_) return 1;
+    // If the following line wraps over 32bit, the test just after will catch it.
+    new_size = 2 * bw->max_pos_;
+    if (new_size < needed_size) new_size = needed_size;
+    if (new_size < 1024) new_size = 1024;
+    new_buf = (uint8_t*)WebPSafeMalloc(1ULL, new_size);
+    if (new_buf == NULL) {
+        bw->error_ = 1;
+        return 0;
+    }
+    if (bw->pos_ > 0) {
+        assert(bw->buf_ != NULL);
+        memcpy(new_buf, bw->buf_, bw->pos_);
+    }
+    WebPSafeFree(bw->buf_);
+    bw->buf_ = new_buf;
+    bw->max_pos_ = new_size;
+    return 1;
+}
+
+static void Flush(VP8BitWriter* const bw) {
+    const int s = 8 + bw->nb_bits_;
+    const int32_t bits = bw->value_ >> s;
+    assert(bw->nb_bits_ >= 0);
+    bw->value_ -= bits << s;
+    bw->nb_bits_ -= 8;
+    if ((bits & 0xff) != 0xff) {
+        size_t pos = bw->pos_;
+        if (!BitWriterResize(bw, bw->run_ + 1)) {
+            return;
+        }
+        if (bits & 0x100) { // overflow -> propagate carry over pending 0xff's
+            if (pos > 0) bw->buf_[pos - 1]++;
+        }
+        if (bw->run_ > 0) {
+            const int value = (bits & 0x100) ? 0x00 : 0xff;
+            for (; bw->run_ > 0; --bw->run_) bw->buf_[pos++] = value;
+        }
+        bw->buf_[pos++] = bits;
+        bw->pos_ = pos;
+    } else {
+        bw->run_++; // delay writing of bytes 0xff, pending eventual carry.
+    }
+}
+
+//------------------------------------------------------------------------------
+// renormalization
+
+static const uint8_t kNorm[128] = { // renorm_sizes[i] = 8 - log2(i)
+    7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0};
+
+// range = ((range + 1) << kVP8Log2Range[range]) - 1
+static const uint8_t kNewRange[128] = {
+    127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239, 127, 135, 143, 151, 159, 167, 175,
+    183, 191, 199, 207, 215, 223, 231, 239, 247, 127, 131, 135, 139, 143, 147, 151, 155, 159, 163, 167, 171, 175,
+    179, 183, 187, 191, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 127, 129, 131,
+    133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, 175,
+    177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219,
+    221, 223, 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 127};
+
+int VP8PutBit(VP8BitWriter* const& bw, int bit, int prob) {
+    const int split = (bw->range_ * prob) >> 8;
+    if (bit) {
+        bw->value_ += split + 1;
+        bw->range_ -= split + 1;
+    } else {
+        bw->range_ = split;
+    }
+    if (bw->range_ < 127) { // emit 'shift' bits out and renormalize
+        const int shift = kNorm[bw->range_];
+        bw->range_ = kNewRange[bw->range_];
+        bw->value_ <<= shift;
+        bw->nb_bits_ += shift;
+        if (bw->nb_bits_ > 0) Flush(bw);
+    }
+    return bit;
+}
+
+int VP8PutBitUniform(VP8BitWriter* const bw, int bit) {
+    const int split = bw->range_ >> 1;
+    if (bit) {
+        bw->value_ += split + 1;
+        bw->range_ -= split + 1;
+    } else {
+        bw->range_ = split;
+    }
+    if (bw->range_ < 127) {
+        bw->range_ = kNewRange[bw->range_];
+        bw->value_ <<= 1;
+        bw->nb_bits_ += 1;
+        if (bw->nb_bits_ > 0) Flush(bw);
+    }
+    return bit;
+}
+
+void VP8PutBits(VP8BitWriter* const bw, uint32_t value, int nb_bits) {
+    uint32_t mask;
+    assert(nb_bits > 0 && nb_bits < 32);
+    for (mask = 1u << (nb_bits - 1); mask; mask >>= 1) VP8PutBitUniform(bw, value & mask);
+}
+
+void VP8PutSignedBits(VP8BitWriter* const bw, int value, int nb_bits) {
+    if (!VP8PutBitUniform(bw, value != 0)) return;
+    if (value < 0) {
+        VP8PutBits(bw, ((-value) << 1) | 1, nb_bits + 1);
+    } else {
+        VP8PutBits(bw, value << 1, nb_bits + 1);
+    }
+}
+
+//------------------------------------------------------------------------------
+
+int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size) {
+    bw->range_ = 255 - 1;
+    bw->value_ = 0;
+    bw->run_ = 0;
+    bw->nb_bits_ = -8;
+    bw->pos_ = 0;
+    bw->max_pos_ = 0;
+    bw->error_ = 0;
+    bw->buf_ = NULL;
+    return (expected_size > 0) ? BitWriterResize(bw, expected_size) : 1;
+}
+
+uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw) {
+    // fprintf(stderr, "VP8PutBits start: %d \n", bw->nb_bits_);
+
+    VP8PutBits(bw, 0, 9 - bw->nb_bits_);
+
+    // fprintf(stderr, "VP8PutBits finished \n");
+
+    bw->nb_bits_ = 0; // pad with zeroes
+    Flush(bw);
+    return bw->buf_;
+}
+
+int VP8BitWriterAppend(VP8BitWriter* const bw, const uint8_t* data, size_t size) {
+    assert(data != NULL);
+    if (bw->nb_bits_ != -8) return 0; // Flush() must have been called
+    if (!BitWriterResize(bw, size)) return 0;
+    memcpy(bw->buf_ + bw->pos_, data, size);
+    bw->pos_ += size;
+    return 1;
+}
+
+void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
+    if (bw != NULL) {
+        // WebPSafeFree(bw->buf_);
+        memset(bw, 0, sizeof(*bw));
+    }
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitWriter
+
+// This is the minimum amount of size the memory buffer is guaranteed to grow
+// when extra space is needed.
+#define MIN_EXTRA_SIZE (32768ULL)
+
+// Returns 1 on success.
+static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) {
+    uint8_t* allocated_buf;
+    size_t allocated_size;
+    const size_t max_bytes = bw->end_ - bw->buf_;
+    const size_t current_size = bw->cur_ - bw->buf_;
+    const uint64_t size_required_64b = (uint64_t)current_size + extra_size;
+    const size_t size_required = (size_t)size_required_64b;
+    if (size_required != size_required_64b) {
+        bw->error_ = 1;
+        return 0;
+    }
+    if (max_bytes > 0 && size_required <= max_bytes) return 1;
+    allocated_size = (3 * max_bytes) >> 1;
+    if (allocated_size < size_required) allocated_size = size_required;
+    // make allocated size multiple of 1k
+    allocated_size = (((allocated_size >> 10) + 1) << 10);
+    allocated_buf = (uint8_t*)WebPSafeMalloc(1ULL, allocated_size);
+    if (allocated_buf == NULL) {
+        bw->error_ = 1;
+        return 0;
+    }
+    if (current_size > 0) {
+        memcpy(allocated_buf, bw->buf_, current_size);
+    }
+    WebPSafeFree(bw->buf_);
+    bw->buf_ = allocated_buf;
+    bw->cur_ = bw->buf_ + current_size;
+    bw->end_ = bw->buf_ + allocated_size;
+    return 1;
+}
+
+int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size) {
+    memset(bw, 0, sizeof(*bw));
+    return VP8LBitWriterResize(bw, expected_size);
+}
+
+void VP8LBitWriterWipeOut(VP8LBitWriter* const bw) {
+    if (bw != NULL) {
+        WebPSafeFree(bw->buf_);
+        memset(bw, 0, sizeof(*bw));
+    }
+}
+
+void VP8LPutBitsFlushBits(VP8LBitWriter* const bw) {
+    // If needed, make some room by flushing some bits out.
+    if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
+        const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
+        if (extra_size != (size_t)extra_size || !VP8LBitWriterResize(bw, (size_t)extra_size)) {
+            bw->cur_ = bw->buf_;
+            bw->error_ = 1;
+            return;
+        }
+    }
+    *(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)bw->bits_);
+    bw->cur_ += VP8L_WRITER_BYTES;
+    bw->bits_ >>= VP8L_WRITER_BITS;
+    bw->used_ -= VP8L_WRITER_BITS;
+}
+
+void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits) {
+    assert(n_bits <= 32);
+    // That's the max we can handle:
+    assert(sizeof(vp8l_wtype_t) == 2);
+    if (n_bits > 0) {
+        vp8l_atype_t lbits = bw->bits_;
+        int used = bw->used_;
+// Special case of overflow handling for 32bit accumulator (2-steps flush).
+#if VP8L_WRITER_BITS == 16
+        if (used + n_bits >= VP8L_WRITER_MAX_BITS) {
+            // Fill up all the VP8L_WRITER_MAX_BITS so it can be flushed out below.
+            const int shift = VP8L_WRITER_MAX_BITS - used;
+            lbits |= (vp8l_atype_t)bits << used;
+            used = VP8L_WRITER_MAX_BITS;
+            n_bits -= shift;
+            bits >>= shift;
+            assert(n_bits <= VP8L_WRITER_MAX_BITS);
+        }
+#endif
+        // If needed, make some room by flushing some bits out.
+        while (used >= VP8L_WRITER_BITS) {
+            if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
+                const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
+                if (extra_size != (size_t)extra_size || !VP8LBitWriterResize(bw, (size_t)extra_size)) {
+                    bw->cur_ = bw->buf_;
+                    bw->error_ = 1;
+                    return;
+                }
+            }
+            *(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)lbits);
+            bw->cur_ += VP8L_WRITER_BYTES;
+            lbits >>= VP8L_WRITER_BITS;
+            used -= VP8L_WRITER_BITS;
+        }
+        bw->bits_ = lbits | ((vp8l_atype_t)bits << used);
+        bw->used_ = used + n_bits;
+    }
+}
+
+uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
+    // flush leftover bits
+    if (VP8LBitWriterResize(bw, (bw->used_ + 7) >> 3)) {
+        while (bw->used_ > 0) {
+            *bw->cur_++ = (uint8_t)bw->bits_;
+            bw->bits_ >>= 8;
+            bw->used_ -= 8;
+        }
+        bw->used_ = 0;
+    }
+    return bw->buf_;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/utils/bit_writer.h b/codec/L2/demos/webpEnc/host/src/utils/bit_writer.h
new file mode 100644
index 0000000000..ca48dd2ce7
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/bit_writer.h
@@ -0,0 +1,143 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Bit writing and boolean coder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_WRITER_H_
+#define WEBP_UTILS_BIT_WRITER_H_
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Bit-writing
+
+typedef struct VP8BitWriter VP8BitWriter;
+struct VP8BitWriter {
+    int32_t range_; // range-1
+    int32_t value_;
+    int run_;      // number of outstanding bits
+    int nb_bits_;  // number of pending bits
+    uint8_t* buf_; // internal buffer. Re-allocated regularly. Not owned.
+    size_t pos_;
+    size_t max_pos_;
+    int error_; // true in case of error
+};
+
+// Initialize the object. Allocates some initial memory based on expected_size.
+int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size);
+// Finalize the bitstream coding. Returns a pointer to the internal buffer.
+uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw);
+// Release any pending memory and zeroes the object. Not a mandatory call.
+// Only useful in case of error, when the internal buffer hasn't been grabbed!
+void VP8BitWriterWipeOut(VP8BitWriter* const bw);
+
+int VP8PutBit(VP8BitWriter* const& bw, int bit, int prob);
+int VP8PutBitUniform(VP8BitWriter* const bw, int bit);
+void VP8PutBits(VP8BitWriter* const bw, uint32_t value, int nb_bits);
+void VP8PutSignedBits(VP8BitWriter* const bw, int value, int nb_bits);
+
+// Appends some bytes to the internal buffer. Data is copied.
+int VP8BitWriterAppend(VP8BitWriter* const bw, const uint8_t* data, size_t size);
+
+// return approximate write position (in bits)
+static WEBP_INLINE uint64_t VP8BitWriterPos(const VP8BitWriter* const bw) {
+    return (uint64_t)(bw->pos_ + bw->run_) * 8 + 8 + bw->nb_bits_;
+}
+
+// Returns a pointer to the internal buffer.
+static WEBP_INLINE uint8_t* VP8BitWriterBuf(const VP8BitWriter* const bw) {
+    return bw->buf_;
+}
+// Returns the size of the internal buffer.
+static WEBP_INLINE size_t VP8BitWriterSize(const VP8BitWriter* const bw) {
+    return bw->pos_;
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitWriter
+
+#if defined(__x86_64__) || defined(_M_X64) // 64bit
+typedef uint64_t vp8l_atype_t;             // accumulator type
+typedef uint32_t vp8l_wtype_t;             // writing type
+#define WSWAP HToLE32
+#define VP8L_WRITER_BYTES 4     // sizeof(vp8l_wtype_t)
+#define VP8L_WRITER_BITS 32     // 8 * sizeof(vp8l_wtype_t)
+#define VP8L_WRITER_MAX_BITS 64 // 8 * sizeof(vp8l_atype_t)
+#else
+typedef uint32_t vp8l_atype_t;
+typedef uint16_t vp8l_wtype_t;
+#define WSWAP HToLE16
+#define VP8L_WRITER_BYTES 2
+#define VP8L_WRITER_BITS 16
+#define VP8L_WRITER_MAX_BITS 32
+#endif
+
+typedef struct {
+    vp8l_atype_t bits_; // bit accumulator
+    int used_;          // number of bits used in accumulator
+    uint8_t* buf_;      // start of buffer
+    uint8_t* cur_;      // current write position
+    uint8_t* end_;      // end of buffer
+
+    // After all bits are written (VP8LBitWriterFinish()), the caller must observe
+    // the state of error_. A value of 1 indicates that a memory allocation
+    // failure has happened during bit writing. A value of 0 indicates successful
+    // writing of bits.
+    int error_;
+} VP8LBitWriter;
+
+static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) {
+    return (bw->cur_ - bw->buf_) + ((bw->used_ + 7) >> 3);
+}
+
+// Returns false in case of memory allocation error.
+int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size);
+// Finalize the bitstream coding. Returns a pointer to the internal buffer.
+uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw);
+// Release any pending memory and zeroes the object.
+void VP8LBitWriterWipeOut(VP8LBitWriter* const bw);
+
+// Internal function for VP8LPutBits flushing 32 bits from the written state.
+void VP8LPutBitsFlushBits(VP8LBitWriter* const bw);
+
+// PutBits internal function used in the 16 bit vp8l_wtype_t case.
+void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits);
+
+// This function writes bits into bytes in increasing addresses (little endian),
+// and within a byte least-significant-bit first.
+// This function can write up to 32 bits in one go, but VP8LBitReader can only
+// read 24 bits max (VP8L_MAX_NUM_BIT_READ).
+// VP8LBitWriter's error_ flag is set in case of  memory allocation error.
+static WEBP_INLINE void VP8LPutBits(VP8LBitWriter* const bw, uint32_t bits, int n_bits) {
+    if (sizeof(vp8l_wtype_t) == 4) {
+        if (n_bits > 0) {
+            if (bw->used_ >= 32) {
+                VP8LPutBitsFlushBits(bw);
+            }
+            bw->bits_ |= (vp8l_atype_t)bits << bw->used_;
+            bw->used_ += n_bits;
+        }
+    } else {
+        VP8LPutBitsInternal(bw, bits, n_bits);
+    }
+}
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_BIT_WRITER_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/utils/color_cache.c b/codec/L2/demos/webpEnc/host/src/utils/color_cache.c
new file mode 100644
index 0000000000..27851f8440
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/color_cache.c
@@ -0,0 +1,46 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Color Cache for WebP Lossless
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./color_cache.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// VP8LColorCache.
+
+int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
+    const int hash_size = 1 << hash_bits;
+    assert(cc != NULL);
+    assert(hash_bits > 0);
+    cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size, sizeof(*cc->colors_));
+    if (cc->colors_ == NULL) return 0;
+    cc->hash_shift_ = 32 - hash_bits;
+    cc->hash_bits_ = hash_bits;
+    return 1;
+}
+
+void VP8LColorCacheClear(VP8LColorCache* const cc) {
+    if (cc != NULL) {
+        WebPSafeFree(cc->colors_);
+        cc->colors_ = NULL;
+    }
+}
+
+void VP8LColorCacheCopy(const VP8LColorCache* const src, VP8LColorCache* const dst) {
+    assert(src != NULL);
+    assert(dst != NULL);
+    assert(src->hash_bits_ == dst->hash_bits_);
+    memcpy(dst->colors_, src->colors_, ((size_t)1u << dst->hash_bits_) * sizeof(*dst->colors_));
+}
diff --git a/codec/L2/demos/webpEnc/host/src/utils/color_cache.h b/codec/L2/demos/webpEnc/host/src/utils/color_cache.h
new file mode 100644
index 0000000000..180910cfc3
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/color_cache.h
@@ -0,0 +1,74 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Color Cache for WebP Lossless
+//
+// Authors: Jyrki Alakuijala (jyrki@google.com)
+//          Urvang Joshi (urvang@google.com)
+
+#ifndef WEBP_UTILS_COLOR_CACHE_H_
+#define WEBP_UTILS_COLOR_CACHE_H_
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Main color cache struct.
+typedef struct {
+    uint32_t* colors_; // color entries
+    int hash_shift_;   // Hash shift: 32 - hash_bits_.
+    int hash_bits_;
+} VP8LColorCache;
+
+static const uint32_t kHashMul = 0x1e35a7bd;
+
+static WEBP_INLINE uint32_t VP8LColorCacheLookup(const VP8LColorCache* const cc, uint32_t key) {
+    assert((key >> cc->hash_bits_) == 0u);
+    return cc->colors_[key];
+}
+
+static WEBP_INLINE void VP8LColorCacheSet(const VP8LColorCache* const cc, uint32_t key, uint32_t argb) {
+    assert((key >> cc->hash_bits_) == 0u);
+    cc->colors_[key] = argb;
+}
+
+static WEBP_INLINE void VP8LColorCacheInsert(const VP8LColorCache* const cc, uint32_t argb) {
+    const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
+    cc->colors_[key] = argb;
+}
+
+static WEBP_INLINE int VP8LColorCacheGetIndex(const VP8LColorCache* const cc, uint32_t argb) {
+    return (kHashMul * argb) >> cc->hash_shift_;
+}
+
+static WEBP_INLINE int VP8LColorCacheContains(const VP8LColorCache* const cc, uint32_t argb) {
+    const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
+    return (cc->colors_[key] == argb);
+}
+
+//------------------------------------------------------------------------------
+
+// Initializes the color cache with 'hash_bits' bits for the keys.
+// Returns false in case of memory error.
+int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits);
+
+void VP8LColorCacheCopy(const VP8LColorCache* const src, VP8LColorCache* const dst);
+
+// Delete the memory associated to color cache.
+void VP8LColorCacheClear(VP8LColorCache* const color_cache);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBP_UTILS_COLOR_CACHE_H_
diff --git a/codec/L2/demos/webpEnc/host/src/utils/endian_inl.h b/codec/L2/demos/webpEnc/host/src/utils/endian_inl.h
new file mode 100644
index 0000000000..76f83327a3
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/endian_inl.h
@@ -0,0 +1,98 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Endian related functions.
+
+#ifndef WEBP_UTILS_ENDIAN_INL_H_
+#define WEBP_UTILS_ENDIAN_INL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include "../dsp/dsp.h"
+#include "../webp/types.h"
+
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) && (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+                                  (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
+#if defined(WORDS_BIGENDIAN)
+#define HToLE32 BSwap32
+#define HToLE16 BSwap16
+#else
+#define HToLE32(x) (x)
+#define HToLE16(x) (x)
+#endif
+
+#if !defined(HAVE_CONFIG_H)
+#if LOCAL_GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
+#define HAVE_BUILTIN_BSWAP16
+#endif
+#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
+#define HAVE_BUILTIN_BSWAP32
+#endif
+#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
+#define HAVE_BUILTIN_BSWAP64
+#endif
+#endif // !HAVE_CONFIG_H
+
+static WEBP_INLINE uint16_t BSwap16(uint16_t x) {
+#if defined(HAVE_BUILTIN_BSWAP16)
+    return __builtin_bswap16(x);
+#elif defined(_MSC_VER)
+    return _byteswap_ushort(x);
+#else
+    // gcc will recognize a 'rorw $8, ...' here:
+    return (x >> 8) | ((x & 0xff) << 8);
+#endif // HAVE_BUILTIN_BSWAP16
+}
+
+static WEBP_INLINE uint32_t BSwap32(uint32_t x) {
+#if defined(WEBP_USE_MIPS32_R2)
+    uint32_t ret;
+    __asm__ volatile(
+        "wsbh   %[ret], %[x]          \n\t"
+        "rotr   %[ret], %[ret],  16   \n\t"
+        : [ret] "=r"(ret)
+        : [x] "r"(x));
+    return ret;
+#elif defined(HAVE_BUILTIN_BSWAP32)
+    return __builtin_bswap32(x);
+#elif defined(__i386__) || defined(__x86_64__)
+    uint32_t swapped_bytes;
+    __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x));
+    return swapped_bytes;
+#elif defined(_MSC_VER)
+    return (uint32_t)_byteswap_ulong(x);
+#else
+    return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+#endif // HAVE_BUILTIN_BSWAP32
+}
+
+static WEBP_INLINE uint64_t BSwap64(uint64_t x) {
+#if defined(HAVE_BUILTIN_BSWAP64)
+    return __builtin_bswap64(x);
+#elif defined(__x86_64__)
+    uint64_t swapped_bytes;
+    __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x));
+    return swapped_bytes;
+#elif defined(_MSC_VER)
+    return (uint64_t)_byteswap_uint64(x);
+#else  // generic code for swapping 64-bit values (suggested by bdb@)
+    x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32);
+    x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16);
+    x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8);
+    return x;
+#endif // HAVE_BUILTIN_BSWAP64
+}
+
+#endif // WEBP_UTILS_ENDIAN_INL_H_
diff --git a/codec/L2/demos/webpEnc/host/src/utils/filters.c b/codec/L2/demos/webpEnc/host/src/utils/filters.c
new file mode 100644
index 0000000000..c0aa7ece51
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/filters.c
@@ -0,0 +1,74 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// filter estimation
+//
+// Author: Urvang (urvang@google.com)
+
+#include "./filters.h"
+#include <stdlib.h>
+#include <string.h>
+
+// -----------------------------------------------------------------------------
+// Quick estimate of a potentially interesting filter mode to try.
+
+#define SMAX 16
+#define SDIFF(a, b) (abs((a) - (b)) >> 4) // Scoring diff, in [0..SMAX)
+
+static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+    const int g = a + b - c;
+    return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
+}
+
+WEBP_FILTER_TYPE WebPEstimateBestFilter(const uint8_t* data, int width, int height, int stride) {
+    int i, j;
+    int bins[WEBP_FILTER_LAST][SMAX];
+    memset(bins, 0, sizeof(bins));
+
+    // We only sample every other pixels. That's enough.
+    for (j = 2; j < height - 1; j += 2) {
+        const uint8_t* const p = data + j * stride;
+        int mean = p[0];
+        for (i = 2; i < width - 1; i += 2) {
+            const int diff0 = SDIFF(p[i], mean);
+            const int diff1 = SDIFF(p[i], p[i - 1]);
+            const int diff2 = SDIFF(p[i], p[i - width]);
+            const int grad_pred = GradientPredictor(p[i - 1], p[i - width], p[i - width - 1]);
+            const int diff3 = SDIFF(p[i], grad_pred);
+            bins[WEBP_FILTER_NONE][diff0] = 1;
+            bins[WEBP_FILTER_HORIZONTAL][diff1] = 1;
+            bins[WEBP_FILTER_VERTICAL][diff2] = 1;
+            bins[WEBP_FILTER_GRADIENT][diff3] = 1;
+            mean = (3 * mean + p[i] + 2) >> 2;
+        }
+    }
+    {
+        int filter;
+        WEBP_FILTER_TYPE best_filter = WEBP_FILTER_NONE;
+        int best_score = 0x7fffffff;
+        for (filter = WEBP_FILTER_NONE; filter < WEBP_FILTER_LAST; ++filter) {
+            int score = 0;
+            for (i = 0; i < SMAX; ++i) {
+                if (bins[filter][i] > 0) {
+                    score += i;
+                }
+            }
+            if (score < best_score) {
+                best_score = score;
+                best_filter = (WEBP_FILTER_TYPE)filter;
+            }
+        }
+        return best_filter;
+    }
+}
+
+#undef SMAX
+#undef SDIFF
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/utils/filters.h b/codec/L2/demos/webpEnc/host/src/utils/filters.h
new file mode 100644
index 0000000000..309e69e388
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/filters.h
@@ -0,0 +1,31 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_UTILS_FILTERS_H_
+#define WEBP_UTILS_FILTERS_H_
+
+#include "../webp/types.h"
+#include "../dsp/dsp.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Fast estimate of a potentially good filter.
+WEBP_FILTER_TYPE WebPEstimateBestFilter(const uint8_t* data, int width, int height, int stride);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_FILTERS_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/utils/huffman.c b/codec/L2/demos/webpEnc/host/src/utils/huffman.c
new file mode 100644
index 0000000000..cc60ee3d99
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/huffman.c
@@ -0,0 +1,202 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for building and looking up Huffman trees.
+//
+// Author: Urvang Joshi (urvang@google.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./huffman.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+// Huffman data read via DecodeImageStream is represented in two (red and green)
+// bytes.
+#define MAX_HTREE_GROUPS 0x10000
+
+HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups) {
+    HTreeGroup* const htree_groups = (HTreeGroup*)WebPSafeMalloc(num_htree_groups, sizeof(*htree_groups));
+    if (htree_groups == NULL) {
+        return NULL;
+    }
+    assert(num_htree_groups <= MAX_HTREE_GROUPS);
+    return htree_groups;
+}
+
+void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups) {
+    if (htree_groups != NULL) {
+        WebPSafeFree(htree_groups);
+    }
+}
+
+// Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the
+// bit-wise reversal of the len least significant bits of key.
+static WEBP_INLINE uint32_t GetNextKey(uint32_t key, int len) {
+    uint32_t step = 1 << (len - 1);
+    while (key & step) {
+        step >>= 1;
+    }
+    return (key & (step - 1)) + step;
+}
+
+// Stores code in table[0], table[step], table[2*step], ..., table[end].
+// Assumes that end is an integer multiple of step.
+static WEBP_INLINE void ReplicateValue(HuffmanCode* table, int step, int end, HuffmanCode code) {
+    assert(end % step == 0);
+    do {
+        end -= step;
+        table[end] = code;
+    } while (end > 0);
+}
+
+// Returns the table width of the next 2nd level table. count is the histogram
+// of bit lengths for the remaining symbols, len is the code length of the next
+// processed symbol
+static WEBP_INLINE int NextTableBitSize(const int* const count, int len, int root_bits) {
+    int left = 1 << (len - root_bits);
+    while (len < MAX_ALLOWED_CODE_LENGTH) {
+        left -= count[len];
+        if (left <= 0) break;
+        ++len;
+        left <<= 1;
+    }
+    return len - root_bits;
+}
+
+int VP8LBuildHuffmanTable(HuffmanCode* const root_table,
+                          int root_bits,
+                          const int code_lengths[],
+                          int code_lengths_size) {
+    HuffmanCode* table = root_table; // next available space in table
+    int total_size = 1 << root_bits; // total size root table + 2nd level table
+    int* sorted = NULL;              // symbols sorted by code length
+    int len;                         // current code length
+    int symbol;                      // symbol index in original or sorted table
+    // number of codes of each length:
+    int count[MAX_ALLOWED_CODE_LENGTH + 1] = {0};
+    // offsets in sorted table for each length:
+    int offset[MAX_ALLOWED_CODE_LENGTH + 1];
+
+    assert(code_lengths_size != 0);
+    assert(code_lengths != NULL);
+    assert(root_table != NULL);
+    assert(root_bits > 0);
+
+    // Build histogram of code lengths.
+    for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+        if (code_lengths[symbol] > MAX_ALLOWED_CODE_LENGTH) {
+            return 0;
+        }
+        ++count[code_lengths[symbol]];
+    }
+
+    // Error, all code lengths are zeros.
+    if (count[0] == code_lengths_size) {
+        return 0;
+    }
+
+    // Generate offsets into sorted symbol table by code length.
+    offset[1] = 0;
+    for (len = 1; len < MAX_ALLOWED_CODE_LENGTH; ++len) {
+        if (count[len] > (1 << len)) {
+            return 0;
+        }
+        offset[len + 1] = offset[len] + count[len];
+    }
+
+    sorted = (int*)WebPSafeMalloc(code_lengths_size, sizeof(*sorted));
+    if (sorted == NULL) {
+        return 0;
+    }
+
+    // Sort symbols by length, by symbol order within each length.
+    for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+        const int symbol_code_length = code_lengths[symbol];
+        if (code_lengths[symbol] > 0) {
+            sorted[offset[symbol_code_length]++] = symbol;
+        }
+    }
+
+    // Special case code with only one value.
+    if (offset[MAX_ALLOWED_CODE_LENGTH] == 1) {
+        HuffmanCode code;
+        code.bits = 0;
+        code.value = (uint16_t)sorted[0];
+        ReplicateValue(table, 1, total_size, code);
+        WebPSafeFree(sorted);
+        return total_size;
+    }
+
+    {
+        int step;                         // step size to replicate values in current table
+        uint32_t low = -1;                // low bits for current root entry
+        uint32_t mask = total_size - 1;   // mask for low bits
+        uint32_t key = 0;                 // reversed prefix code
+        int num_nodes = 1;                // number of Huffman tree nodes
+        int num_open = 1;                 // number of open branches in current tree level
+        int table_bits = root_bits;       // key length of current table
+        int table_size = 1 << table_bits; // size of current table
+        symbol = 0;
+        // Fill in root table.
+        for (len = 1, step = 2; len <= root_bits; ++len, step <<= 1) {
+            num_open <<= 1;
+            num_nodes += num_open;
+            num_open -= count[len];
+            if (num_open < 0) {
+                WebPSafeFree(sorted);
+                return 0;
+            }
+            for (; count[len] > 0; --count[len]) {
+                HuffmanCode code;
+                code.bits = (uint8_t)len;
+                code.value = (uint16_t)sorted[symbol++];
+                ReplicateValue(&table[key], step, table_size, code);
+                key = GetNextKey(key, len);
+            }
+        }
+
+        // Fill in 2nd level tables and add pointers to root table.
+        for (len = root_bits + 1, step = 2; len <= MAX_ALLOWED_CODE_LENGTH; ++len, step <<= 1) {
+            num_open <<= 1;
+            num_nodes += num_open;
+            num_open -= count[len];
+            if (num_open < 0) {
+                WebPSafeFree(sorted);
+                return 0;
+            }
+            for (; count[len] > 0; --count[len]) {
+                HuffmanCode code;
+                if ((key & mask) != low) {
+                    table += table_size;
+                    table_bits = NextTableBitSize(count, len, root_bits);
+                    table_size = 1 << table_bits;
+                    total_size += table_size;
+                    low = key & mask;
+                    root_table[low].bits = (uint8_t)(table_bits + root_bits);
+                    root_table[low].value = (uint16_t)((table - root_table) - low);
+                }
+                code.bits = (uint8_t)(len - root_bits);
+                code.value = (uint16_t)sorted[symbol++];
+                ReplicateValue(&table[key >> root_bits], step, table_size, code);
+                key = GetNextKey(key, len);
+            }
+        }
+
+        // Check if tree is full.
+        if (num_nodes != 2 * offset[MAX_ALLOWED_CODE_LENGTH] - 1) {
+            WebPSafeFree(sorted);
+            return 0;
+        }
+    }
+
+    WebPSafeFree(sorted);
+    return total_size;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/utils/huffman.h b/codec/L2/demos/webpEnc/host/src/utils/huffman.h
new file mode 100644
index 0000000000..4e5684da05
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/huffman.h
@@ -0,0 +1,89 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for building and looking up Huffman trees.
+//
+// Author: Urvang Joshi (urvang@google.com)
+
+#ifndef WEBP_UTILS_HUFFMAN_H_
+#define WEBP_UTILS_HUFFMAN_H_
+
+#include <assert.h>
+#include "../webp/format_constants.h"
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define HUFFMAN_TABLE_BITS 8
+#define HUFFMAN_TABLE_MASK ((1 << HUFFMAN_TABLE_BITS) - 1)
+
+#define LENGTHS_TABLE_BITS 7
+#define LENGTHS_TABLE_MASK ((1 << LENGTHS_TABLE_BITS) - 1)
+
+// Huffman lookup table entry
+typedef struct {
+    uint8_t bits;   // number of bits used for this symbol
+    uint16_t value; // symbol value or table offset
+} HuffmanCode;
+
+// long version for holding 32b values
+typedef struct {
+    int bits;       // number of bits used for this symbol,
+                    // or an impossible value if not a literal code.
+    uint32_t value; // 32b packed ARGB value if literal,
+                    // or non-literal symbol otherwise
+} HuffmanCode32;
+
+#define HUFFMAN_PACKED_BITS 6
+#define HUFFMAN_PACKED_TABLE_SIZE (1u << HUFFMAN_PACKED_BITS)
+
+// Huffman table group.
+// Includes special handling for the following cases:
+//  - is_trivial_literal: one common literal base for RED/BLUE/ALPHA (not GREEN)
+//  - is_trivial_code: only 1 code (no bit is read from bitstream)
+//  - use_packed_table: few enough literal symbols, so all the bit codes
+//    can fit into a small look-up table packed_table[]
+// The common literal base, if applicable, is stored in 'literal_arb'.
+typedef struct HTreeGroup HTreeGroup;
+struct HTreeGroup {
+    HuffmanCode* htrees[HUFFMAN_CODES_PER_META_CODE];
+    int is_trivial_literal; // True, if huffman trees for Red, Blue & Alpha
+                            // Symbols are trivial (have a single code).
+    uint32_t literal_arb;   // If is_trivial_literal is true, this is the
+                            // ARGB value of the pixel, with Green channel
+                            // being set to zero.
+    int is_trivial_code;    // true if is_trivial_literal with only one code
+    int use_packed_table;   // use packed table below for short literal code
+    // table mapping input bits to a packed values, or escape case to literal code
+    HuffmanCode32 packed_table[HUFFMAN_PACKED_TABLE_SIZE];
+};
+
+// Creates the instance of HTreeGroup with specified number of tree-groups.
+HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups);
+
+// Releases the memory allocated for HTreeGroup.
+void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups);
+
+// Builds Huffman lookup table assuming code lengths are in symbol order.
+// The 'code_lengths' is pre-allocated temporary memory buffer used for creating
+// the huffman table.
+// Returns built table size or 0 in case of error (invalid tree or
+// memory error).
+int VP8LBuildHuffmanTable(HuffmanCode* const root_table,
+                          int root_bits,
+                          const int code_lengths[],
+                          int code_lengths_size);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_UTILS_HUFFMAN_H_
diff --git a/codec/L2/demos/webpEnc/host/src/utils/huffman_encode.c b/codec/L2/demos/webpEnc/host/src/utils/huffman_encode.c
new file mode 100644
index 0000000000..bfc52ae70a
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/huffman_encode.c
@@ -0,0 +1,407 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Entropy encoding (Huffman) for webp lossless.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./huffman_encode.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+// -----------------------------------------------------------------------------
+// Util function to optimize the symbol map for RLE coding
+
+// Heuristics for selecting the stride ranges to collapse.
+static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
+    return abs(a - b) < 4;
+}
+
+// Change the population counts in a way that the consequent
+// Huffman tree compression, especially its RLE-part, give smaller output.
+static void OptimizeHuffmanForRle(int length, uint8_t* const good_for_rle, uint32_t* const counts) {
+    // 1) Let's make the Huffman code more compatible with rle encoding.
+    int i;
+    for (; length >= 0; --length) {
+        if (length == 0) {
+            return; // All zeros.
+        }
+        if (counts[length - 1] != 0) {
+            // Now counts[0..length - 1] does not have trailing zeros.
+            break;
+        }
+    }
+    // 2) Let's mark all population counts that already can be encoded
+    // with an rle code.
+    {
+        // Let's not spoil any of the existing good rle codes.
+        // Mark any seq of 0's that is longer as 5 as a good_for_rle.
+        // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
+        uint32_t symbol = counts[0];
+        int stride = 0;
+        for (i = 0; i < length + 1; ++i) {
+            if (i == length || counts[i] != symbol) {
+                if ((symbol == 0 && stride >= 5) || (symbol != 0 && stride >= 7)) {
+                    int k;
+                    for (k = 0; k < stride; ++k) {
+                        good_for_rle[i - k - 1] = 1;
+                    }
+                }
+                stride = 1;
+                if (i != length) {
+                    symbol = counts[i];
+                }
+            } else {
+                ++stride;
+            }
+        }
+    }
+    // 3) Let's replace those population counts that lead to more rle codes.
+    {
+        uint32_t stride = 0;
+        uint32_t limit = counts[0];
+        uint32_t sum = 0;
+        for (i = 0; i < length + 1; ++i) {
+            if (i == length || good_for_rle[i] || (i != 0 && good_for_rle[i - 1]) ||
+                !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
+                if (stride >= 4 || (stride >= 3 && sum == 0)) {
+                    uint32_t k;
+                    // The stride must end, collapse what we have, if we have enough (4).
+                    uint32_t count = (sum + stride / 2) / stride;
+                    if (count < 1) {
+                        count = 1;
+                    }
+                    if (sum == 0) {
+                        // Don't make an all zeros stride to be upgraded to ones.
+                        count = 0;
+                    }
+                    for (k = 0; k < stride; ++k) {
+                        // We don't want to change value at counts[i],
+                        // that is already belonging to the next stride. Thus - 1.
+                        counts[i - k - 1] = count;
+                    }
+                }
+                stride = 0;
+                sum = 0;
+                if (i < length - 3) {
+                    // All interesting strides have a count of at least 4,
+                    // at least when non-zeros.
+                    limit = (counts[i] + counts[i + 1] + counts[i + 2] + counts[i + 3] + 2) / 4;
+                } else if (i < length) {
+                    limit = counts[i];
+                } else {
+                    limit = 0;
+                }
+            }
+            ++stride;
+            if (i != length) {
+                sum += counts[i];
+                if (stride >= 4) {
+                    limit = (sum + stride / 2) / stride;
+                }
+            }
+        }
+    }
+}
+
+// A comparer function for two Huffman trees: sorts first by 'total count'
+// (more comes first), and then by 'value' (more comes first).
+static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
+    const HuffmanTree* const t1 = (const HuffmanTree*)ptr1;
+    const HuffmanTree* const t2 = (const HuffmanTree*)ptr2;
+    if (t1->total_count_ > t2->total_count_) {
+        return -1;
+    } else if (t1->total_count_ < t2->total_count_) {
+        return 1;
+    } else {
+        assert(t1->value_ != t2->value_);
+        return (t1->value_ < t2->value_) ? -1 : 1;
+    }
+}
+
+static void SetBitDepths(const HuffmanTree* const tree,
+                         const HuffmanTree* const pool,
+                         uint8_t* const bit_depths,
+                         int level) {
+    if (tree->pool_index_left_ >= 0) {
+        SetBitDepths(&pool[tree->pool_index_left_], pool, bit_depths, level + 1);
+        SetBitDepths(&pool[tree->pool_index_right_], pool, bit_depths, level + 1);
+    } else {
+        bit_depths[tree->value_] = level;
+    }
+}
+
+// Create an optimal Huffman tree.
+//
+// (data,length): population counts.
+// tree_limit: maximum bit depth (inclusive) of the codes.
+// bit_depths[]: how many bits are used for the symbol.
+//
+// Returns 0 when an error has occurred.
+//
+// The catch here is that the tree cannot be arbitrarily deep
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+static void GenerateOptimalTree(const uint32_t* const histogram,
+                                int histogram_size,
+                                HuffmanTree* tree,
+                                int tree_depth_limit,
+                                uint8_t* const bit_depths) {
+    uint32_t count_min;
+    HuffmanTree* tree_pool;
+    int tree_size_orig = 0;
+    int i;
+
+    for (i = 0; i < histogram_size; ++i) {
+        if (histogram[i] != 0) {
+            ++tree_size_orig;
+        }
+    }
+
+    if (tree_size_orig == 0) { // pretty optimal already!
+        return;
+    }
+
+    tree_pool = tree + tree_size_orig;
+
+    // For block sizes with less than 64k symbols we never need to do a
+    // second iteration of this loop.
+    // If we actually start running inside this loop a lot, we would perhaps
+    // be better off with the Katajainen algorithm.
+    assert(tree_size_orig <= (1 << (tree_depth_limit - 1)));
+    for (count_min = 1;; count_min *= 2) {
+        int tree_size = tree_size_orig;
+        // We need to pack the Huffman tree in tree_depth_limit bits.
+        // So, we try by faking histogram entries to be at least 'count_min'.
+        int idx = 0;
+        int j;
+        for (j = 0; j < histogram_size; ++j) {
+            if (histogram[j] != 0) {
+                const uint32_t count = (histogram[j] < count_min) ? count_min : histogram[j];
+                tree[idx].total_count_ = count;
+                tree[idx].value_ = j;
+                tree[idx].pool_index_left_ = -1;
+                tree[idx].pool_index_right_ = -1;
+                ++idx;
+            }
+        }
+
+        // Build the Huffman tree.
+        qsort(tree, tree_size, sizeof(*tree), CompareHuffmanTrees);
+
+        if (tree_size > 1) { // Normal case.
+            int tree_pool_size = 0;
+            while (tree_size > 1) { // Finish when we have only one root.
+                uint32_t count;
+                tree_pool[tree_pool_size++] = tree[tree_size - 1];
+                tree_pool[tree_pool_size++] = tree[tree_size - 2];
+                count = tree_pool[tree_pool_size - 1].total_count_ + tree_pool[tree_pool_size - 2].total_count_;
+                tree_size -= 2;
+                {
+                    // Search for the insertion point.
+                    int k;
+                    for (k = 0; k < tree_size; ++k) {
+                        if (tree[k].total_count_ <= count) {
+                            break;
+                        }
+                    }
+                    memmove(tree + (k + 1), tree + k, (tree_size - k) * sizeof(*tree));
+                    tree[k].total_count_ = count;
+                    tree[k].value_ = -1;
+
+                    tree[k].pool_index_left_ = tree_pool_size - 1;
+                    tree[k].pool_index_right_ = tree_pool_size - 2;
+                    tree_size = tree_size + 1;
+                }
+            }
+            SetBitDepths(&tree[0], tree_pool, bit_depths, 0);
+        } else if (tree_size == 1) { // Trivial case: only one element.
+            bit_depths[tree[0].value_] = 1;
+        }
+
+        {
+            // Test if this Huffman tree satisfies our 'tree_depth_limit' criteria.
+            int max_depth = bit_depths[0];
+            for (j = 1; j < histogram_size; ++j) {
+                if (max_depth < bit_depths[j]) {
+                    max_depth = bit_depths[j];
+                }
+            }
+            if (max_depth <= tree_depth_limit) {
+                break;
+            }
+        }
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Coding of the Huffman tree values
+
+static HuffmanTreeToken* CodeRepeatedValues(int repetitions, HuffmanTreeToken* tokens, int value, int prev_value) {
+    assert(value <= MAX_ALLOWED_CODE_LENGTH);
+    if (value != prev_value) {
+        tokens->code = value;
+        tokens->extra_bits = 0;
+        ++tokens;
+        --repetitions;
+    }
+    while (repetitions >= 1) {
+        if (repetitions < 3) {
+            int i;
+            for (i = 0; i < repetitions; ++i) {
+                tokens->code = value;
+                tokens->extra_bits = 0;
+                ++tokens;
+            }
+            break;
+        } else if (repetitions < 7) {
+            tokens->code = 16;
+            tokens->extra_bits = repetitions - 3;
+            ++tokens;
+            break;
+        } else {
+            tokens->code = 16;
+            tokens->extra_bits = 3;
+            ++tokens;
+            repetitions -= 6;
+        }
+    }
+    return tokens;
+}
+
+static HuffmanTreeToken* CodeRepeatedZeros(int repetitions, HuffmanTreeToken* tokens) {
+    while (repetitions >= 1) {
+        if (repetitions < 3) {
+            int i;
+            for (i = 0; i < repetitions; ++i) {
+                tokens->code = 0; // 0-value
+                tokens->extra_bits = 0;
+                ++tokens;
+            }
+            break;
+        } else if (repetitions < 11) {
+            tokens->code = 17;
+            tokens->extra_bits = repetitions - 3;
+            ++tokens;
+            break;
+        } else if (repetitions < 139) {
+            tokens->code = 18;
+            tokens->extra_bits = repetitions - 11;
+            ++tokens;
+            break;
+        } else {
+            tokens->code = 18;
+            tokens->extra_bits = 0x7f; // 138 repeated 0s
+            ++tokens;
+            repetitions -= 138;
+        }
+    }
+    return tokens;
+}
+
+int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree, HuffmanTreeToken* tokens, int max_tokens) {
+    HuffmanTreeToken* const starting_token = tokens;
+    HuffmanTreeToken* const ending_token = tokens + max_tokens;
+    const int depth_size = tree->num_symbols;
+    int prev_value = 8; // 8 is the initial value for rle.
+    int i = 0;
+    assert(tokens != NULL);
+    while (i < depth_size) {
+        const int value = tree->code_lengths[i];
+        int k = i + 1;
+        int runs;
+        while (k < depth_size && tree->code_lengths[k] == value) ++k;
+        runs = k - i;
+        if (value == 0) {
+            tokens = CodeRepeatedZeros(runs, tokens);
+        } else {
+            tokens = CodeRepeatedValues(runs, tokens, value, prev_value);
+            prev_value = value;
+        }
+        i += runs;
+        assert(tokens <= ending_token);
+    }
+    (void)ending_token; // suppress 'unused variable' warning
+    return (int)(tokens - starting_token);
+}
+
+// -----------------------------------------------------------------------------
+
+// Pre-reversed 4-bit values.
+static const uint8_t kReversedBits[16] = {0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+                                          0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
+
+static uint32_t ReverseBits(int num_bits, uint32_t bits) {
+    uint32_t retval = 0;
+    int i = 0;
+    while (i < num_bits) {
+        i += 4;
+        retval |= kReversedBits[bits & 0xf] << (MAX_ALLOWED_CODE_LENGTH + 1 - i);
+        bits >>= 4;
+    }
+    retval >>= (MAX_ALLOWED_CODE_LENGTH + 1 - num_bits);
+    return retval;
+}
+
+// Get the actual bit values for a tree of bit depths.
+static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) {
+    // 0 bit-depth means that the symbol does not exist.
+    int i;
+    int len;
+    uint32_t next_code[MAX_ALLOWED_CODE_LENGTH + 1];
+    int depth_count[MAX_ALLOWED_CODE_LENGTH + 1] = {0};
+
+    assert(tree != NULL);
+    len = tree->num_symbols;
+    for (i = 0; i < len; ++i) {
+        const int code_length = tree->code_lengths[i];
+        assert(code_length <= MAX_ALLOWED_CODE_LENGTH);
+        ++depth_count[code_length];
+    }
+    depth_count[0] = 0; // ignore unused symbol
+    next_code[0] = 0;
+    {
+        uint32_t code = 0;
+        for (i = 1; i <= MAX_ALLOWED_CODE_LENGTH; ++i) {
+            code = (code + depth_count[i - 1]) << 1;
+            next_code[i] = code;
+        }
+    }
+    for (i = 0; i < len; ++i) {
+        const int code_length = tree->code_lengths[i];
+        tree->codes[i] = ReverseBits(code_length, next_code[code_length]++);
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Main entry point
+
+void VP8LCreateHuffmanTree(uint32_t* const histogram,
+                           int tree_depth_limit,
+                           uint8_t* const buf_rle,
+                           HuffmanTree* const huff_tree,
+                           HuffmanTreeCode* const huff_code) {
+    const int num_symbols = huff_code->num_symbols;
+    memset(buf_rle, 0, num_symbols * sizeof(*buf_rle));
+    OptimizeHuffmanForRle(num_symbols, buf_rle, histogram);
+    GenerateOptimalTree(histogram, num_symbols, huff_tree, tree_depth_limit, huff_code->code_lengths);
+    // Create the actual bit codes for the bit lengths.
+    ConvertBitDepthsToSymbols(huff_code);
+}
diff --git a/codec/L2/demos/webpEnc/host/src/utils/huffman_encode.h b/codec/L2/demos/webpEnc/host/src/utils/huffman_encode.h
new file mode 100644
index 0000000000..2a93231b7e
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/huffman_encode.h
@@ -0,0 +1,61 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Entropy encoding (Huffman) for webp lossless
+
+#ifndef WEBP_UTILS_HUFFMAN_ENCODE_H_
+#define WEBP_UTILS_HUFFMAN_ENCODE_H_
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Struct for holding the tree header in coded form.
+typedef struct {
+    uint8_t code;       // value (0..15) or escape code (16,17,18)
+    uint8_t extra_bits; // extra bits for escape codes
+} HuffmanTreeToken;
+
+// Struct to represent the tree codes (depth and bits array).
+typedef struct {
+    int num_symbols;       // Number of symbols.
+    uint8_t* code_lengths; // Code lengths of the symbols.
+    uint16_t* codes;       // Symbol Codes.
+} HuffmanTreeCode;
+
+// Struct to represent the Huffman tree.
+typedef struct {
+    uint32_t total_count_; // Symbol frequency.
+    int value_;            // Symbol value.
+    int pool_index_left_;  // Index for the left sub-tree.
+    int pool_index_right_; // Index for the right sub-tree.
+} HuffmanTree;
+
+// Turn the Huffman tree into a token sequence.
+// Returns the number of tokens used.
+int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree, HuffmanTreeToken* tokens, int max_tokens);
+
+// Create an optimized tree, and tokenize it.
+// 'buf_rle' and 'huff_tree' are pre-allocated and the 'tree' is the constructed
+// huffman code tree.
+void VP8LCreateHuffmanTree(uint32_t* const histogram,
+                           int tree_depth_limit,
+                           uint8_t* const buf_rle,
+                           HuffmanTree* const huff_tree,
+                           HuffmanTreeCode* const tree);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBP_UTILS_HUFFMAN_ENCODE_H_
diff --git a/codec/L2/demos/webpEnc/host/src/utils/profiling.c b/codec/L2/demos/webpEnc/host/src/utils/profiling.c
new file mode 100644
index 0000000000..d28d7b7c55
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/profiling.c
@@ -0,0 +1,666 @@
+#include <stdio.h>
+#include "profiling.h"
+
+// Decode profiling
+
+// lossy
+double timeWebPDecode = 0;
+int countWebPDecode = 0;
+double timeGetCoeffs = 0;
+int countGetCoeffs = 0;
+double timeDecodeInto = 0;
+int countDecodeInto = 0;
+double timeVP8ProcessRow = 0;
+int countVP8ProcessRow = 0;
+double timeFinishRow = 0;
+int countFinishRow = 0;
+double timeParseFrame = 0;
+int countParseFrame = 0;
+double timeVP8DecodeMB = 0;
+int countVP8DecodeMB = 0;
+double timeVP8ParseIntraModeRow = 0;
+int countVP8ParseIntraModeRow = 0;
+double timeReconstructRow = 0;
+int countReconstructRow = 0;
+double timeParseResiduals = 0;
+int countParseResiduals = 0;
+double timeVP8TransformWHT = 0;
+int countVP8TransformWHT = 0;
+double timeParseResidualsIf1 = 0;
+int countParseResidualsIf1 = 0;
+double timeParseResidualsLoop1 = 0;
+int countParseResidualsLoop1 = 0;
+double timeParseResidualsLoop2 = 0;
+int countParseResidualsLoop2 = 0;
+
+// lossless
+double timeVP8LDecodeImage = 0;
+int countVP8LDecodeImage = 0;
+double timeDecodeImageData = 0;
+int countDecodeImageData = 0;
+double timeProcessRows = 0;
+int countProcessRows = 0;
+double timeApplyInverseTransforms = 0;
+int countApplyInverseTransforms = 0;
+double timeProcessRowsCopy1 = 0;
+int countProcessRowsCopy1 = 0;
+double timeProcessRowsCopy2 = 0;
+int countProcessRowsCopy2 = 0;
+double timeVP8LAddGreenToBlueAndRed = 0;
+int countVP8LAddGreenToBlueAndRed = 0;
+double timeColorSpaceInverseTransform = 0;
+int countColorSpaceInverseTransform = 0;
+double timePredictorInverseTransform = 0;
+int countPredictorInverseTransform = 0;
+double timeColorIndexInverseTransform = 0;
+int countColorIndexInverseTransform = 0;
+
+void ResetDecodeProfilingData() {
+    // lossy
+    timeWebPDecode = 0;
+    countWebPDecode = 0;
+    timeGetCoeffs = 0;
+    countGetCoeffs = 0;
+    timeDecodeInto = 0;
+    countDecodeInto = 0;
+    timeVP8ProcessRow = 0;
+    countVP8ProcessRow = 0;
+    timeFinishRow = 0;
+    countFinishRow = 0;
+    timeParseFrame = 0;
+    countParseFrame = 0;
+    timeVP8DecodeMB = 0;
+    countVP8DecodeMB = 0;
+    timeVP8ParseIntraModeRow = 0;
+    countVP8ParseIntraModeRow = 0;
+    timeReconstructRow = 0;
+    countReconstructRow = 0;
+    timeParseResiduals = 0;
+    countParseResiduals = 0;
+    timeVP8TransformWHT = 0;
+    countVP8TransformWHT = 0;
+    timeParseResidualsIf1 = 0;
+    countParseResidualsIf1 = 0;
+    timeParseResidualsLoop1 = 0;
+    countParseResidualsLoop1 = 0;
+    timeParseResidualsLoop2 = 0;
+    countParseResidualsLoop2 = 0;
+
+    // lossless
+    timeVP8LDecodeImage = 0;
+    countVP8LDecodeImage = 0;
+    timeDecodeImageData = 0;
+    countDecodeImageData = 0;
+    timeProcessRows = 0;
+    countProcessRows = 0;
+    timeApplyInverseTransforms = 0;
+    countApplyInverseTransforms = 0;
+    timeProcessRowsCopy1 = 0;
+    countProcessRowsCopy1 = 0;
+    timeProcessRowsCopy2 = 0;
+    countProcessRowsCopy2 = 0;
+    timeVP8LAddGreenToBlueAndRed = 0;
+    countVP8LAddGreenToBlueAndRed = 0;
+    timeColorSpaceInverseTransform = 0;
+    countColorSpaceInverseTransform = 0;
+    timePredictorInverseTransform = 0;
+    countPredictorInverseTransform = 0;
+    timeColorIndexInverseTransform = 0;
+    countColorIndexInverseTransform = 0;
+}
+
+// Display profiling result
+void DisplayDecodeProfilingResult() {
+    if (0 != countWebPDecode) {
+        fprintf(stderr, "WebPDecode Total Time: %.6f ms, count:%d, average time: %.6f ms\n", timeWebPDecode,
+                countWebPDecode, timeWebPDecode / countWebPDecode);
+    }
+    if (0 != countDecodeInto) {
+        fprintf(stderr, "  DecodeInto Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeDecodeInto, countDecodeInto, timeDecodeInto / countDecodeInto,
+                timeDecodeInto * 100 / timeWebPDecode);
+    }
+
+    // lossy start
+    if (0 != countParseFrame) {
+        fprintf(stderr, "    ParseFrame Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeParseFrame, countParseFrame, timeParseFrame / countParseFrame,
+                timeParseFrame * 100 / timeWebPDecode);
+    }
+    if (0 != countVP8ParseIntraModeRow) {
+        fprintf(stderr,
+                "      VP8ParseIntraModeRow Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8ParseIntraModeRow, countVP8ParseIntraModeRow,
+                timeVP8ParseIntraModeRow / countVP8ParseIntraModeRow, timeVP8ParseIntraModeRow * 100 / timeWebPDecode);
+    }
+    if (0 != countVP8DecodeMB) {
+        fprintf(stderr, "      VP8DecodeMB Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8DecodeMB, countVP8DecodeMB, timeVP8DecodeMB / countVP8DecodeMB,
+                timeVP8DecodeMB * 100 / timeWebPDecode);
+    }
+    if (0 != countParseResiduals) {
+        fprintf(stderr,
+                "        ParseResiduals Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeParseResiduals, countParseResiduals, timeParseResiduals / countParseResiduals,
+                timeParseResiduals * 100 / timeWebPDecode);
+    }
+    if (0 != countVP8TransformWHT) {
+        fprintf(stderr,
+                "          VP8TransformWHT Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8TransformWHT, countVP8TransformWHT, timeVP8TransformWHT / countVP8TransformWHT,
+                timeVP8TransformWHT * 100 / timeWebPDecode);
+    }
+    if (0 != countGetCoeffs) {
+        fprintf(stderr, "          GetCoeffs Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeGetCoeffs, countGetCoeffs, timeGetCoeffs / countGetCoeffs, timeGetCoeffs * 100 / timeWebPDecode);
+    }
+    if (0 != countParseResidualsIf1) {
+        fprintf(stderr,
+                "          ParseResidualsIf1 Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeParseResidualsIf1, countParseResidualsIf1, timeParseResidualsIf1 / countParseResidualsIf1,
+                timeParseResidualsIf1 * 100 / timeWebPDecode);
+    }
+    if (0 != countParseResidualsLoop1) {
+        fprintf(
+            stderr,
+            "          ParseResidualsLoop1 Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+            timeParseResidualsLoop1, countParseResidualsLoop1, timeParseResidualsLoop1 / countParseResidualsLoop1,
+            timeParseResidualsLoop1 * 100 / timeWebPDecode);
+    }
+    if (0 != countParseResidualsLoop2) {
+        fprintf(
+            stderr,
+            "          ParseResidualsLoop2 Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+            timeParseResidualsLoop2, countParseResidualsLoop2, timeParseResidualsLoop2 / countParseResidualsLoop2,
+            timeParseResidualsLoop2 * 100 / timeWebPDecode);
+    }
+    if (0 != countVP8ProcessRow) {
+        fprintf(stderr, "      VP8ProcessRow Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8ProcessRow, countVP8ProcessRow, timeVP8ProcessRow / countVP8ProcessRow,
+                timeVP8ProcessRow * 100 / timeWebPDecode);
+    }
+    if (0 != countReconstructRow) {
+        fprintf(stderr,
+                "        ReconstructRow Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeReconstructRow, countReconstructRow, timeReconstructRow / countReconstructRow,
+                timeReconstructRow * 100 / timeWebPDecode);
+    }
+    if (0 != countFinishRow) {
+        fprintf(stderr, "        FinishRow Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeFinishRow, countFinishRow, timeFinishRow / countFinishRow, timeFinishRow * 100 / timeWebPDecode);
+    }
+    // lossy end
+
+    // lossless start
+    // if (0 != countVP8LDecodeImage) {
+    //   fprintf(stderr, "  VP8LDecodeImage Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+    //           timeVP8LDecodeImage, countVP8LDecodeImage, timeVP8LDecodeImage / countVP8LDecodeImage,
+    //           timeVP8LDecodeImage * 100 / timeWebPDecode);
+    // }
+    if (0 != countDecodeImageData) {
+        fprintf(stderr, "    DecodeImageData Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeDecodeImageData, countDecodeImageData, timeDecodeImageData / countDecodeImageData,
+                timeDecodeImageData * 100 / timeWebPDecode);
+    }
+    if (0 != countProcessRows) {
+        fprintf(stderr, "      ProcessRows Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeProcessRows, countProcessRows, timeProcessRows / countProcessRows,
+                timeProcessRows * 100 / timeWebPDecode);
+    }
+    if (0 != countApplyInverseTransforms) {
+        fprintf(
+            stderr,
+            "        ApplyInverseTransforms Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+            timeApplyInverseTransforms, countApplyInverseTransforms,
+            timeApplyInverseTransforms / countApplyInverseTransforms,
+            timeApplyInverseTransforms * 100 / timeWebPDecode);
+    }
+    if (0 != countProcessRowsCopy1) {
+        fprintf(stderr,
+                "          ProcessRowsCopy1 Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeProcessRowsCopy1, countProcessRowsCopy1, timeProcessRowsCopy1 / countProcessRowsCopy1,
+                timeProcessRowsCopy1 * 100 / timeWebPDecode);
+    }
+    if (0 != countVP8LAddGreenToBlueAndRed) {
+        fprintf(stderr,
+                "          VP8LAddGreenToBlueAndRed Total Time: %.6f ms, count:%d, average time: %.6f ms, "
+                "percentage:%.6f%\n",
+                timeVP8LAddGreenToBlueAndRed, countVP8LAddGreenToBlueAndRed,
+                timeVP8LAddGreenToBlueAndRed / countVP8LAddGreenToBlueAndRed,
+                timeVP8LAddGreenToBlueAndRed * 100 / timeWebPDecode);
+    }
+    if (0 != countColorSpaceInverseTransform) {
+        fprintf(stderr,
+                "          ColorSpaceInverseTransform Total Time: %.6f ms, count:%d, average time: %.6f ms, "
+                "percentage:%.6f%\n",
+                timeColorSpaceInverseTransform, countColorSpaceInverseTransform,
+                timeColorSpaceInverseTransform / countColorSpaceInverseTransform,
+                timeColorSpaceInverseTransform * 100 / timeWebPDecode);
+    }
+    if (0 != countPredictorInverseTransform) {
+        fprintf(stderr,
+                "          PredictorInverseTransform Total Time: %.6f ms, count:%d, average time: %.6f ms, "
+                "percentage:%.6f%\n",
+                timePredictorInverseTransform, countPredictorInverseTransform,
+                timePredictorInverseTransform / countPredictorInverseTransform,
+                timePredictorInverseTransform * 100 / timeWebPDecode);
+    }
+    if (0 != countProcessRowsCopy2) {
+        fprintf(stderr,
+                "          ProcessRowsCopy2 Total Time: %.6f ms, count:%d, average time: %.6f ms, percentage:%.6f%\n",
+                timeProcessRowsCopy2, countProcessRowsCopy2, timeProcessRowsCopy2 / countProcessRowsCopy2,
+                timeProcessRowsCopy2 * 100 / timeWebPDecode);
+    }
+    if (0 != countColorIndexInverseTransform) {
+        fprintf(stderr,
+                "        ColorIndexInverseTransform Total Time: %.6f ms, count:%d, average time: %.6f ms, "
+                "percentage:%.6f%\n",
+                timeColorIndexInverseTransform, countColorIndexInverseTransform,
+                timeColorIndexInverseTransform / countColorIndexInverseTransform,
+                timeColorIndexInverseTransform * 100 / timeWebPDecode);
+    }
+    // lossless end
+}
+
+// Encode profiling
+double timeWebPEncode = 0;
+int countWebPEncode = 0;
+
+// lossless
+double timeApplyTransforms = 0;
+int countApplyTransforms = 0;
+double timeEncode = 0;
+int countEncode = 0;
+double timeVP8ApplyNearLossless = 0;
+int countVP8ApplyNearLossless = 0;
+double timeVP8ApplyNearLosslessOcl = 0;
+int countVP8ApplyNearLosslessOcl = 0;
+double timeApplySubtractGreen = 0;
+int countApplySubtractGreen = 0;
+double timeEncPredict = 0;
+int countEncPredict = 0;
+double timeVP8LResidualImage = 0;
+int countVP8LResidualImage = 0;
+double timeGetPredModesResiduleForTile = 0;
+int countGetPredModesResiduleForTile = 0;
+double timeGetBestPredictorForTile2 = 0;
+int countGetBestPredictorForTile2 = 0;
+double timeBestPredict = 0;
+int countBestPredict = 0;
+double timeEncColorFilt = 0;
+int countEncColorFilt = 0;
+double timeBestColor = 0;
+int countBestColor = 0;
+double timeGetBackRef = 0;
+int countGetBackRef = 0;
+double timeBackwardRefLz77 = 0;
+int countBackwardRefLz77 = 0;
+double timeBackwardRefRle = 0;
+int countBackwardRefRle = 0;
+double timeGetHistoImg = 0;
+int countGetHistoImg = 0;
+double timeHistogramCombineStochastic = 0;
+int countHistogramCombineStochastic = 0;
+double timeHistogramCombineStochastic_forloop = 0;
+int countHistogramCombineStochastic_forloop = 0;
+double timeHistogramAddEval = 0;
+int countHistogramAddEval = 0;
+double timeHistogramRemap = 0;
+int countHistogramRemap = 0;
+
+// lossy
+double timeEncAnalyze = 0;
+int countEncAnalyze = 0;
+double timeEncAnalyzeOcl = 0;
+int countEncAnalyzeOcl = 0;
+double timeEncTokenLoop = 0;
+int countEncTokenLoop = 0;
+double timeEncLoop = 0;
+int countEncLoop = 0;
+double timeEncLoopOcl = 0;
+int countEncLoopOcl = 0;
+int StatLoopFlag = 1;
+double timeStatLoop = 0;
+int countStatLoop = 0;
+double timeVP8Decimate = 0;
+int countVP8Decimate = 0;
+double timeVP8EmitTokens = 0;
+int countVP8EmitTokens = 0;
+double timeCodeResiduals = 0;
+int countCodeResiduals = 0;
+double timeVP8Decimate_2 = 0;
+int countVP8Decimate_2 = 0;
+double timeStoreFilterSts = 0;
+int countStoreFilterSts = 0;
+double timeVP8Decimate_BestIntra = 0;
+int countVP8Decimate_BestIntra = 0;
+double timeBestIntra16 = 0;
+int countBestIntra16 = 0;
+double timeBestIntra4 = 0;
+int countBestIntra4 = 0;
+double timeBestUV = 0;
+int countBestUV = 0;
+double timeRefineUsingDist = 0;
+int countRefineUsingDist = 0;
+double timeRefineUsingDist_2 = 0;
+int countRefineUsingDist_2 = 0;
+
+void ResetEncodeProfilingData() {
+    timeWebPEncode = 0;
+    countWebPEncode = 0;
+
+    // lossless
+    timeApplyTransforms = 0;
+    countApplyTransforms = 0;
+    timeEncode = 0;
+    countEncode = 0;
+    timeVP8ApplyNearLossless = 0;
+    countVP8ApplyNearLossless = 0;
+    timeVP8ApplyNearLosslessOcl = 0;
+    countVP8ApplyNearLosslessOcl = 0;
+    timeApplySubtractGreen = 0;
+    countApplySubtractGreen = 0;
+    timeEncPredict = 0;
+    countEncPredict = 0;
+    timeVP8LResidualImage = 0;
+    countVP8LResidualImage = 0;
+    timeGetPredModesResiduleForTile = 0;
+    countGetPredModesResiduleForTile = 0;
+    timeGetBestPredictorForTile2 = 0;
+    countGetBestPredictorForTile2 = 0;
+    timeBestPredict = 0;
+    countBestPredict = 0;
+    timeEncColorFilt = 0;
+    countEncColorFilt = 0;
+    timeBestColor = 0;
+    countBestColor = 0;
+    timeGetBackRef = 0;
+    countGetBackRef = 0;
+    timeBackwardRefLz77 = 0;
+    countBackwardRefLz77 = 0;
+    timeBackwardRefRle = 0;
+    countBackwardRefRle = 0;
+    timeGetHistoImg = 0;
+    countGetHistoImg = 0;
+    timeHistogramCombineStochastic = 0;
+    countHistogramCombineStochastic = 0;
+    timeHistogramCombineStochastic_forloop = 0;
+    countHistogramCombineStochastic_forloop = 0;
+    timeHistogramAddEval = 0;
+    countHistogramAddEval = 0;
+    timeHistogramRemap = 0;
+    countHistogramRemap = 0;
+
+    // lossy
+    timeEncAnalyze = 0;
+    countEncAnalyze = 0;
+    timeEncAnalyzeOcl = 0;
+    countEncAnalyzeOcl = 0;
+    timeEncTokenLoop = 0;
+    countEncTokenLoop = 0;
+    timeEncLoop = 0;
+    countEncLoop = 0;
+    timeEncLoopOcl = 0;
+    countEncLoopOcl = 0;
+    StatLoopFlag = 1;
+    timeStatLoop = 0;
+    countStatLoop = 0;
+    timeVP8Decimate = 0;
+    countVP8Decimate = 0;
+    timeVP8EmitTokens = 0;
+    countVP8EmitTokens = 0;
+    timeCodeResiduals = 0;
+    countCodeResiduals = 0;
+    timeVP8Decimate_2 = 0;
+    countVP8Decimate_2 = 0;
+    timeStoreFilterSts = 0;
+    countStoreFilterSts = 0;
+    timeVP8Decimate_BestIntra = 0;
+    countVP8Decimate_BestIntra = 0;
+    timeBestIntra16 = 0;
+    countBestIntra16 = 0;
+    timeBestIntra4 = 0;
+    countBestIntra4 = 0;
+    timeBestUV = 0;
+    countBestUV = 0;
+    timeRefineUsingDist = 0;
+    countRefineUsingDist = 0;
+    timeRefineUsingDist_2 = 0;
+    countRefineUsingDist_2 = 0;
+}
+
+// Display profiling result
+void DisplayEncodeProfilingResult() {
+    // display all profiling data
+    if (0 != countWebPEncode) {
+        fprintf(stderr, "WebPEncode Total Time: %.6f ms, count:%d, average time: %.6f ms\n", timeWebPEncode,
+                countWebPEncode, timeWebPEncode / countWebPEncode);
+    }
+    if (0 != countApplyTransforms) {
+        fprintf(stderr, "lossless encode.......\n");
+        fprintf(stderr,
+                "  ApplyTransforms\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeApplyTransforms, countApplyTransforms, timeApplyTransforms / countApplyTransforms,
+                timeApplyTransforms * 100 / timeWebPEncode);
+    }
+    if (0 != timeVP8ApplyNearLossless) {
+        fprintf(stderr,
+                "    VP8ApplyNearLossless\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8ApplyNearLossless, countVP8ApplyNearLossless,
+                timeVP8ApplyNearLossless / countVP8ApplyNearLossless, timeVP8ApplyNearLossless * 100 / timeWebPEncode);
+    }
+    if (0 != timeVP8ApplyNearLosslessOcl) {
+        fprintf(
+            stderr,
+            "    VP8ApplyNearLosslessOcl\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+            timeVP8ApplyNearLosslessOcl, countVP8ApplyNearLosslessOcl,
+            timeVP8ApplyNearLosslessOcl / countVP8ApplyNearLosslessOcl,
+            timeVP8ApplyNearLosslessOcl * 100 / timeWebPEncode);
+    }
+    if (0 != countApplySubtractGreen) {
+        fprintf(stderr,
+                "    ApplySubtractGreen\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeApplySubtractGreen, countApplySubtractGreen, timeApplySubtractGreen / countApplySubtractGreen,
+                timeApplySubtractGreen * 100 / timeWebPEncode);
+    }
+    if (0 != countEncPredict) {
+        fprintf(stderr,
+                "    ApplyPredictFilter\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeEncPredict, countEncPredict, timeEncPredict / countEncPredict,
+                timeEncPredict * 100 / timeWebPEncode);
+    }
+    if (0 != countVP8LResidualImage) {
+        fprintf(stderr,
+                "      VP8LResidualImage\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8LResidualImage, countVP8LResidualImage, timeVP8LResidualImage / countVP8LResidualImage,
+                timeVP8LResidualImage * 100 / timeWebPEncode);
+    }
+    if (0 != countBestPredict) {
+        fprintf(stderr,
+                "        GetBestPredictorForTile\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, "
+                "percentage:%.6f%\n",
+                timeBestPredict, countBestPredict, timeBestPredict / countBestPredict,
+                timeBestPredict * 100 / timeWebPEncode);
+    }
+    if (0 != countGetPredModesResiduleForTile) {
+        fprintf(stderr,
+                "        GetPredModesResiduleForTile Total Time: %.6f ms, count:%6d, average time: %.6f ms, "
+                "percentage:%.6f%\n",
+                timeGetPredModesResiduleForTile, countGetPredModesResiduleForTile,
+                timeGetPredModesResiduleForTile / countGetPredModesResiduleForTile,
+                timeGetPredModesResiduleForTile * 100 / timeWebPEncode);
+    }
+    if (0 != countGetBestPredictorForTile2) {
+        fprintf(stderr,
+                "        GetBestPredictorForTile2\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, "
+                "percentage:%.6f%\n",
+                timeGetBestPredictorForTile2, countGetBestPredictorForTile2,
+                timeGetBestPredictorForTile2 / countGetBestPredictorForTile2,
+                timeGetBestPredictorForTile2 * 100 / timeWebPEncode);
+    }
+    if (0 != countEncColorFilt) {
+        fprintf(stderr,
+                "    ApplyCrossColorFilter\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeEncColorFilt, countEncColorFilt, timeEncColorFilt / countEncColorFilt,
+                timeEncColorFilt * 100 / timeWebPEncode);
+    }
+    if (0 != countBestColor) {
+        fprintf(stderr,
+                "      GetBestColorTransformForTileTotal Time: %.6f ms, count:%6d, average time: %.6f ms, "
+                "percentage:%.6f%\n",
+                timeBestColor, countBestColor, timeBestColor / countBestColor, timeBestColor * 100 / timeWebPEncode);
+    }
+    if (0 != countGetBackRef) {
+        fprintf(stderr,
+                "  GetBackwardReferences\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeGetBackRef, countGetBackRef, timeGetBackRef / countGetBackRef,
+                timeGetBackRef * 100 / timeWebPEncode);
+    }
+    if (0 != countBackwardRefLz77) {
+        fprintf(stderr,
+                "    BackwardReferencesLz77\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeBackwardRefLz77, countBackwardRefLz77, timeBackwardRefLz77 / countBackwardRefLz77,
+                timeBackwardRefLz77 * 100 / timeWebPEncode);
+    }
+    if (0 != countBackwardRefRle) {
+        fprintf(stderr,
+                "    BackwardReferencesRle\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeBackwardRefRle, countBackwardRefRle, timeBackwardRefRle / countBackwardRefRle,
+                timeBackwardRefRle * 100 / timeWebPEncode);
+    }
+    if (0 != countGetHistoImg) {
+        fprintf(stderr,
+                "  VP8LGetHistoImageSymbols\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeGetHistoImg, countGetHistoImg, timeGetHistoImg / countGetHistoImg,
+                timeGetHistoImg * 100 / timeWebPEncode);
+    }
+    if (0 != countHistogramCombineStochastic) {
+        fprintf(
+            stderr,
+            "    HistogramCombineStochastic\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+            timeHistogramCombineStochastic, countHistogramCombineStochastic,
+            timeHistogramCombineStochastic / countHistogramCombineStochastic,
+            timeHistogramCombineStochastic * 100 / timeWebPEncode);
+    }
+    if (0 != countHistogramCombineStochastic_forloop) {
+        fprintf(stderr,
+                "    HistogramCombineStochastic_forloop\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, "
+                "percentage:%.6f%\n",
+                timeHistogramCombineStochastic_forloop, countHistogramCombineStochastic_forloop,
+                timeHistogramCombineStochastic_forloop / countHistogramCombineStochastic_forloop,
+                timeHistogramCombineStochastic_forloop * 100 / timeWebPEncode);
+    }
+    if (0 != countHistogramAddEval) {
+        fprintf(stderr,
+                "      HistogramAddEval\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeHistogramAddEval, countHistogramAddEval, timeHistogramAddEval / countHistogramAddEval,
+                timeHistogramAddEval * 100 / timeWebPEncode);
+    }
+    if (0 != countHistogramRemap) {
+        fprintf(stderr,
+                "    HistogramRemap\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeHistogramRemap, countHistogramRemap, timeHistogramRemap / countHistogramRemap,
+                timeHistogramRemap * 100 / timeWebPEncode);
+    }
+    if (0 != countEncode) {
+        fprintf(stderr, "  Encode\t\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeEncode, countEncode, timeEncode / countEncode, timeEncode * 100 / timeWebPEncode);
+    }
+    if (0 != countEncAnalyze) {
+        fprintf(stderr, "lossy encode.......\n");
+        fprintf(
+            stderr, "  VP8EncAnalyze\t\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+            timeEncAnalyze, countEncAnalyze, timeEncAnalyze / countEncAnalyze, timeEncAnalyze * 100 / timeWebPEncode);
+    }
+    if (0 != countEncAnalyzeOcl) {
+        fprintf(stderr, "lossy encode.......\n");
+        fprintf(stderr,
+                "  VP8EncAnalyzeOcl\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeEncAnalyzeOcl, countEncAnalyzeOcl, timeEncAnalyzeOcl / countEncAnalyzeOcl,
+                timeEncAnalyzeOcl * 100 / timeWebPEncode);
+    }
+    if (0 != countEncTokenLoop) {
+        fprintf(stderr,
+                "  VP8EncTokenLoop\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeEncTokenLoop, countEncTokenLoop, timeEncTokenLoop / countEncTokenLoop,
+                timeEncTokenLoop * 100 / timeWebPEncode);
+    }
+    if (0 != countEncLoop) {
+        fprintf(stderr, "  VP8EncLoop\t\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeEncLoop, countEncLoop, timeEncLoop / countEncLoop, timeEncLoop * 100 / timeWebPEncode);
+    }
+    if (0 != countEncLoopOcl) {
+        fprintf(
+            stderr, "  VP8EncLoopOcl\t\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+            timeEncLoopOcl, countEncLoopOcl, timeEncLoopOcl / countEncLoopOcl, timeEncLoopOcl * 100 / timeWebPEncode);
+    }
+    if (0 != countStatLoop) {
+        fprintf(stderr, "    StatLoop\t\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeStatLoop, countStatLoop, timeStatLoop / countStatLoop, timeStatLoop * 100 / timeWebPEncode);
+    }
+    if (0 != countVP8Decimate_2 && 0 != countEncLoop) {
+        fprintf(stderr,
+                "      VP8Decimate\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8Decimate_2, countVP8Decimate_2, timeVP8Decimate_2 / countVP8Decimate_2,
+                timeVP8Decimate_2 * 100 / timeWebPEncode);
+    } else if (0 != countVP8Decimate_2) {
+        fprintf(stderr,
+                "    VP8Decimate\t\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8Decimate_2, countVP8Decimate_2, timeVP8Decimate_2 / countVP8Decimate_2,
+                timeVP8Decimate_2 * 100 / timeWebPEncode);
+    }
+    if (0 != countRefineUsingDist) {
+        fprintf(
+            stderr,
+            "        RefineUsingDistortion\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+            timeRefineUsingDist_2, countRefineUsingDist_2, timeRefineUsingDist_2 / countRefineUsingDist_2,
+            timeRefineUsingDist_2 * 100 / timeWebPEncode);
+    }
+    if (0 != countStoreFilterSts) {
+        fprintf(
+            stderr,
+            "    VP8StoreFilterStats\t\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+            timeStoreFilterSts, countStoreFilterSts, timeStoreFilterSts / countStoreFilterSts,
+            timeStoreFilterSts * 100 / timeWebPEncode);
+    }
+    if (0 != countCodeResiduals) {
+        fprintf(stderr,
+                "    CodeResiduals\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeCodeResiduals, countCodeResiduals, timeCodeResiduals / countCodeResiduals,
+                timeCodeResiduals * 100 / timeWebPEncode);
+    }
+    if (0 != countVP8Decimate) {
+        fprintf(stderr,
+                "    VP8Decimate\t\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8Decimate, countVP8Decimate, timeVP8Decimate / countVP8Decimate,
+                timeVP8Decimate * 100 / timeWebPEncode);
+    }
+    if (0 != countRefineUsingDist) {
+        fprintf(
+            stderr,
+            "      RefineUsingDistortion\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+            timeRefineUsingDist, countRefineUsingDist, timeRefineUsingDist / countRefineUsingDist,
+            timeRefineUsingDist * 100 / timeWebPEncode);
+    }
+    if (0 != countBestIntra16) {
+        fprintf(stderr,
+                "      PickBestIntra16\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeBestIntra16, countBestIntra16, timeBestIntra16 / countBestIntra16,
+                timeBestIntra16 * 100 / timeWebPEncode);
+    }
+    if (0 != countBestIntra4) {
+        fprintf(
+            stderr, "      PickBestIntra4\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+            timeBestIntra4, countBestIntra4, timeBestIntra4 / countBestIntra4, timeBestIntra4 * 100 / timeWebPEncode);
+    }
+    if (0 != countBestUV) {
+        fprintf(stderr, "      PickBestUV\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeBestUV, countBestUV, timeBestUV / countBestUV, timeBestUV * 100 / timeWebPEncode);
+    }
+    if (0 != countVP8EmitTokens) {
+        fprintf(stderr,
+                "    VP8EmitTokens\t\tTotal Time: %.6f ms, count:%6d, average time: %.6f ms, percentage:%.6f%\n",
+                timeVP8EmitTokens, countVP8EmitTokens, timeVP8EmitTokens / countVP8EmitTokens,
+                timeVP8EmitTokens * 100 / timeWebPEncode);
+    }
+}
diff --git a/codec/L2/demos/webpEnc/host/src/utils/profiling.h b/codec/L2/demos/webpEnc/host/src/utils/profiling.h
new file mode 100644
index 0000000000..d5aa882e1d
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/profiling.h
@@ -0,0 +1,195 @@
+#ifndef WEBP_PROFILING_H_
+#define WEBP_PROFILING_H_
+
+#include "webp/types.h"
+
+// Decode profiling
+// lossy
+extern double timeWebPDecode;
+extern int countWebPDecode;
+extern double timeGetCoeffs;
+extern int countGetCoeffs;
+extern double timeDecodeInto;
+extern int countDecodeInto;
+extern double timeVP8ProcessRow;
+extern int countVP8ProcessRow;
+extern double timeFinishRow;
+extern int countFinishRow;
+extern double timeParseFrame;
+extern int countParseFrame;
+extern double timeVP8DecodeMB;
+extern int countVP8DecodeMB;
+extern double timeVP8ParseIntraModeRow;
+extern int countVP8ParseIntraModeRow;
+extern double timeReconstructRow;
+extern int countReconstructRow;
+extern double timeParseResiduals;
+extern int countParseResiduals;
+extern double timeVP8TransformWHT;
+extern int countVP8TransformWHT;
+extern double timeParseResidualsIf1;
+extern int countParseResidualsIf1;
+extern double timeParseResidualsLoop1;
+extern int countParseResidualsLoop1;
+extern double timeParseResidualsLoop2;
+extern int countParseResidualsLoop2;
+
+// lossless
+extern double timeVP8LDecodeImage;
+extern int countVP8LDecodeImage;
+extern double timeDecodeImageData;
+extern int countDecodeImageData;
+extern double timeProcessRows;
+extern int countProcessRows;
+extern double timeApplyInverseTransforms;
+extern int countApplyInverseTransforms;
+extern double timeProcessRowsCopy1;
+extern int countProcessRowsCopy1;
+extern double timeProcessRowsCopy2;
+extern int countProcessRowsCopy2;
+extern double timeVP8LAddGreenToBlueAndRed;
+extern int countVP8LAddGreenToBlueAndRed;
+extern double timeColorSpaceInverseTransform;
+extern int countColorSpaceInverseTransform;
+extern double timePredictorInverseTransform;
+extern int countPredictorInverseTransform;
+extern double timeColorIndexInverseTransform;
+extern int countColorIndexInverseTransform;
+
+// Encode profiling
+// lossless
+extern double timeApplyTransforms;
+extern int countApplyTransforms;
+extern double timeEncode;
+extern int countEncode;
+extern double timeVP8ApplyNearLossless;
+extern int countVP8ApplyNearLossless;
+extern double timeVP8ApplyNearLosslessOcl;
+extern int countVP8ApplyNearLosslessOcl;
+extern double timeApplySubtractGreen;
+extern int countApplySubtractGreen;
+extern double timeWebPEncode;
+extern int countWebPEncode;
+extern double timeEncPredict;
+extern int countEncPredict;
+extern double timeVP8LResidualImage;
+extern int countVP8LResidualImage;
+extern double timeGetPredModesResiduleForTile;
+extern int countGetPredModesResiduleForTile;
+extern double timeGetBestPredictorForTile2;
+extern int countGetBestPredictorForTile2;
+extern double timeBestPredict;
+extern int countBestPredict;
+extern double timeEncColorFilt;
+extern int countEncColorFilt;
+extern double timeBestColor;
+extern int countBestColor;
+extern double timeGetBackRef;
+extern int countGetBackRef;
+extern double timeBackwardRefLz77;
+extern int countBackwardRefLz77;
+extern double timeBackwardRefRle;
+extern int countBackwardRefRle;
+extern double timeGetHistoImg;
+extern int countGetHistoImg;
+extern double timeHistogramCombineStochastic;
+extern int countHistogramCombineStochastic;
+extern double timeHistogramCombineStochastic_forloop;
+extern int countHistogramCombineStochastic_forloop;
+extern double timeHistogramAddEval;
+extern int countHistogramAddEval;
+extern double timeHistogramRemap;
+extern int countHistogramRemap;
+
+// lossy
+extern double timeEncAnalyze;
+extern int countEncAnalyze;
+extern double timeEncAnalyzeOcl;
+extern int countEncAnalyzeOcl;
+extern double timeEncTokenLoop;
+extern int countEncTokenLoop;
+extern double timeEncLoop;
+extern int countEncLoop;
+extern double timeEncLoopOcl;
+extern int countEncLoopOcl;
+extern int StatLoopFlag;
+extern double timeStatLoop;
+extern int countStatLoop;
+extern double timeVP8Decimate;
+extern int countVP8Decimate;
+extern double timeVP8EmitTokens;
+extern int countVP8EmitTokens;
+extern double timeCodeResiduals;
+extern int countCodeResiduals;
+extern double timeVP8Decimate_2;
+extern int countVP8Decimate_2;
+extern double timeStoreFilterSts;
+extern int countStoreFilterSts;
+extern double timeVP8Decimate_BestIntra;
+extern int countVP8Decimate_BestIntra;
+extern double timeBestIntra16;
+extern int countBestIntra16;
+extern double timeBestIntra4;
+extern int countBestIntra4;
+extern double timeBestUV;
+extern int countBestUV;
+extern double timeRefineUsingDist;
+extern int countRefineUsingDist;
+extern double timeRefineUsingDist_2;
+extern int countRefineUsingDist_2;
+#if defined _WIN32 && !defined __GNUC__
+#include <windows.h>
+
+typedef LARGE_INTEGER StopProfilingWatch;
+
+static WEBP_INLINE void StartProfiling(StopProfilingWatch* watch) {
+    QueryPerformanceCounter(watch);
+}
+
+static WEBP_INLINE double StopProfiling(StopProfilingWatch* watch) {
+    const LARGE_INTEGER old_value = *watch;
+    LARGE_INTEGER freq;
+    if (!QueryPerformanceCounter(watch)) return 0.0;
+    if (!QueryPerformanceFrequency(&freq)) return 0.0;
+    if (freq.QuadPart == 0) return 0.0;
+    return (watch->QuadPart - old_value.QuadPart) / (double)freq.QuadPart;
+}
+
+#else               /* !_WIN32 */
+#include <string.h> // memcpy
+#include <sys/time.h>
+
+typedef struct timeval StopProfilingWatch;
+
+static WEBP_INLINE void StartProfiling(StopProfilingWatch* watch) {
+    gettimeofday(watch, NULL);
+}
+
+static WEBP_INLINE double StopProfiling(StopProfilingWatch* watch, double* total_time, int* count) {
+    struct timeval old_value;
+    double delta_sec, delta_usec;
+#if 1
+    old_value.tv_sec = watch->tv_sec;
+    old_value.tv_usec = watch->tv_usec;
+#else
+    memcpy(&old_value, watch, sizeof(old_value));
+#endif
+    gettimeofday(watch, NULL);
+    delta_sec = (double)watch->tv_sec - old_value.tv_sec;
+    delta_usec = (double)watch->tv_usec - old_value.tv_usec;
+    double cur_time = delta_sec + delta_usec / 1000000.0;
+    cur_time *= 1000;
+    *total_time += cur_time;
+    (*count)++;
+
+    return cur_time;
+}
+
+void DisplayDecodeProfilingResult();
+void DisplayEncodeProfilingResult();
+void ResetDecodeProfilingData();
+void ResetEncodeProfilingData();
+
+#endif /* _WIN32 */
+
+#endif /* WEBP_PROFILING_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/utils/quant_levels.c b/codec/L2/demos/webpEnc/host/src/utils/quant_levels.c
new file mode 100644
index 0000000000..d048fc5b43
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/quant_levels.c
@@ -0,0 +1,137 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Quantize levels for specified number of quantization-levels ([2, 256]).
+// Min and max values are preserved (usual 0 and 255 for alpha plane).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "./quant_levels.h"
+
+#define NUM_SYMBOLS 256
+
+#define MAX_ITER 6           // Maximum number of convergence steps.
+#define ERROR_THRESHOLD 1e-4 // MSE stopping criterion.
+
+// -----------------------------------------------------------------------------
+// Quantize levels.
+
+int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels, uint64_t* const sse) {
+    int freq[NUM_SYMBOLS] = {0};
+    int q_level[NUM_SYMBOLS] = {0};
+    double inv_q_level[NUM_SYMBOLS] = {0};
+    int min_s = 255, max_s = 0;
+    const size_t data_size = height * width;
+    int i, num_levels_in, iter;
+    double last_err = 1.e38, err = 0.;
+    const double err_threshold = ERROR_THRESHOLD * data_size;
+
+    if (data == NULL) {
+        return 0;
+    }
+
+    if (width <= 0 || height <= 0) {
+        return 0;
+    }
+
+    if (num_levels < 2 || num_levels > 256) {
+        return 0;
+    }
+
+    {
+        size_t n;
+        num_levels_in = 0;
+        for (n = 0; n < data_size; ++n) {
+            num_levels_in += (freq[data[n]] == 0);
+            if (min_s > data[n]) min_s = data[n];
+            if (max_s < data[n]) max_s = data[n];
+            ++freq[data[n]];
+        }
+    }
+
+    if (num_levels_in <= num_levels) goto End; // nothing to do!
+
+    // Start with uniformly spread centroids.
+    for (i = 0; i < num_levels; ++i) {
+        inv_q_level[i] = min_s + (double)(max_s - min_s) * i / (num_levels - 1);
+    }
+
+    // Fixed values. Won't be changed.
+    q_level[min_s] = 0;
+    q_level[max_s] = num_levels - 1;
+    assert(inv_q_level[0] == min_s);
+    assert(inv_q_level[num_levels - 1] == max_s);
+
+    // k-Means iterations.
+    for (iter = 0; iter < MAX_ITER; ++iter) {
+        double q_sum[NUM_SYMBOLS] = {0};
+        double q_count[NUM_SYMBOLS] = {0};
+        int s, slot = 0;
+
+        // Assign classes to representatives.
+        for (s = min_s; s <= max_s; ++s) {
+            // Keep track of the nearest neighbour 'slot'
+            while (slot < num_levels - 1 && 2 * s > inv_q_level[slot] + inv_q_level[slot + 1]) {
+                ++slot;
+            }
+            if (freq[s] > 0) {
+                q_sum[slot] += s * freq[s];
+                q_count[slot] += freq[s];
+            }
+            q_level[s] = slot;
+        }
+
+        // Assign new representatives to classes.
+        if (num_levels > 2) {
+            for (slot = 1; slot < num_levels - 1; ++slot) {
+                const double count = q_count[slot];
+                if (count > 0.) {
+                    inv_q_level[slot] = q_sum[slot] / count;
+                }
+            }
+        }
+
+        // Compute convergence error.
+        err = 0.;
+        for (s = min_s; s <= max_s; ++s) {
+            const double error = s - inv_q_level[q_level[s]];
+            err += freq[s] * error * error;
+        }
+
+        // Check for convergence: we stop as soon as the error is no
+        // longer improving.
+        if (last_err - err < err_threshold) break;
+        last_err = err;
+    }
+
+    // Remap the alpha plane to quantized values.
+    {
+        // double->int rounding operation can be costly, so we do it
+        // once for all before remapping. We also perform the data[] -> slot
+        // mapping, while at it (avoid one indirection in the final loop).
+        uint8_t map[NUM_SYMBOLS];
+        int s;
+        size_t n;
+        for (s = min_s; s <= max_s; ++s) {
+            const int slot = q_level[s];
+            map[s] = (uint8_t)(inv_q_level[slot] + .5);
+        }
+        // Final pass.
+        for (n = 0; n < data_size; ++n) {
+            data[n] = map[data[n]];
+        }
+    }
+End:
+    // Store sum of squared error if needed.
+    if (sse != NULL) *sse = (uint64_t)err;
+
+    return 1;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/utils/quant_levels.h b/codec/L2/demos/webpEnc/host/src/utils/quant_levels.h
new file mode 100644
index 0000000000..026acefd53
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/quant_levels.h
@@ -0,0 +1,35 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha plane quantization utility
+//
+// Author:  Vikas Arora (vikasa@google.com)
+
+#ifndef WEBP_UTILS_QUANT_LEVELS_H_
+#define WEBP_UTILS_QUANT_LEVELS_H_
+
+#include <stdlib.h>
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Replace the input 'data' of size 'width'x'height' with 'num-levels'
+// quantized values. If not NULL, 'sse' will contain the sum of squared error.
+// Valid range for 'num_levels' is [2, 256].
+// Returns false in case of error (data is NULL, or parameters are invalid).
+int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels, uint64_t* const sse);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_QUANT_LEVELS_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/utils/quant_levels_dec.c b/codec/L2/demos/webpEnc/host/src/utils/quant_levels_dec.c
new file mode 100644
index 0000000000..428744fb6e
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/quant_levels_dec.c
@@ -0,0 +1,271 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Implement gradient smoothing: we replace a current alpha value by its
+// surrounding average if it's close enough (that is: the change will be less
+// than the minimum distance between two quantized level).
+// We use sliding window for computing the 2d moving average.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./quant_levels_dec.h"
+
+#include <string.h> // for memset
+
+#include "./utils.h"
+
+// #define USE_DITHERING   // uncomment to enable ordered dithering (not vital)
+
+#define FIX 16                           // fix-point precision for averaging
+#define LFIX 2                           // extra precision for look-up table
+#define LUT_SIZE ((1 << (8 + LFIX)) - 1) // look-up table size
+
+#if defined(USE_DITHERING)
+
+#define DFIX 4  // extra precision for ordered dithering
+#define DSIZE 4 // dithering size (must be a power of two)
+// cf. http://en.wikipedia.org/wiki/Ordered_dithering
+static const uint8_t kOrderedDither[DSIZE][DSIZE] = {{0, 8, 2, 10}, // coefficients are in DFIX fixed-point precision
+                                                     {12, 4, 14, 6},
+                                                     {3, 11, 1, 9},
+                                                     {15, 7, 13, 5}};
+
+#else
+#define DFIX 0
+#endif
+
+typedef struct {
+    int width_, height_; // dimension
+    int row_;            // current input row being processed
+    uint8_t* src_;       // input pointer
+    uint8_t* dst_;       // output pointer
+
+    int radius_; // filter radius (=delay)
+    int scale_;  // normalization factor, in FIX bits precision
+
+    void* mem_; // all memory
+
+    // various scratch buffers
+    uint16_t* start_;
+    uint16_t* cur_;
+    uint16_t* end_;
+    uint16_t* top_;
+    uint16_t* average_;
+
+    // input levels distribution
+    int num_levels_;     // number of quantized levels
+    int min_, max_;      // min and max level values
+    int min_level_dist_; // smallest distance between two consecutive levels
+
+    int16_t* correction_; // size = 1 + 2*LUT_SIZE  -> ~4k memory
+} SmoothParams;
+
+//------------------------------------------------------------------------------
+
+#define CLIP_MASK (int)(~0U << (8 + DFIX))
+static WEBP_INLINE uint8_t clip_8b(int v) {
+    return (!(v & CLIP_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u;
+}
+
+// vertical accumulation
+static void VFilter(SmoothParams* const p) {
+    const uint8_t* src = p->src_;
+    const int w = p->width_;
+    uint16_t* const cur = p->cur_;
+    const uint16_t* const top = p->top_;
+    uint16_t* const out = p->end_;
+    uint16_t sum = 0; // all arithmetic is modulo 16bit
+    int x;
+
+    for (x = 0; x < w; ++x) {
+        uint16_t new_value;
+        sum += src[x];
+        new_value = top[x] + sum;
+        out[x] = new_value - cur[x]; // vertical sum of 'r' pixels.
+        cur[x] = new_value;
+    }
+    // move input pointers one row down
+    p->top_ = p->cur_;
+    p->cur_ += w;
+    if (p->cur_ == p->end_) p->cur_ = p->start_; // roll-over
+    // We replicate edges, as it's somewhat easier as a boundary condition.
+    // That's why we don't update the 'src' pointer on top/bottom area:
+    if (p->row_ >= 0 && p->row_ < p->height_ - 1) {
+        p->src_ += p->width_;
+    }
+}
+
+// horizontal accumulation. We use mirror replication of missing pixels, as it's
+// a little easier to implement (surprisingly).
+static void HFilter(SmoothParams* const p) {
+    const uint16_t* const in = p->end_;
+    uint16_t* const out = p->average_;
+    const uint32_t scale = p->scale_;
+    const int w = p->width_;
+    const int r = p->radius_;
+
+    int x;
+    for (x = 0; x <= r; ++x) { // left mirroring
+        const uint16_t delta = in[x + r - 1] + in[r - x];
+        out[x] = (delta * scale) >> FIX;
+    }
+    for (; x < w - r; ++x) { // bulk middle run
+        const uint16_t delta = in[x + r] - in[x - r - 1];
+        out[x] = (delta * scale) >> FIX;
+    }
+    for (; x < w; ++x) { // right mirroring
+        const uint16_t delta = 2 * in[w - 1] - in[2 * w - 2 - r - x] - in[x - r - 1];
+        out[x] = (delta * scale) >> FIX;
+    }
+}
+
+// emit one filtered output row
+static void ApplyFilter(SmoothParams* const p) {
+    const uint16_t* const average = p->average_;
+    const int w = p->width_;
+    const int16_t* const correction = p->correction_;
+#if defined(USE_DITHERING)
+    const uint8_t* const dither = kOrderedDither[p->row_ % DSIZE];
+#endif
+    uint8_t* const dst = p->dst_;
+    int x;
+    for (x = 0; x < w; ++x) {
+        const int v = dst[x];
+        if (v < p->max_ && v > p->min_) {
+            const int c = (v << DFIX) + correction[average[x] - (v << LFIX)];
+#if defined(USE_DITHERING)
+            dst[x] = clip_8b(c + dither[x % DSIZE]);
+#else
+            dst[x] = clip_8b(c);
+#endif
+        }
+    }
+    p->dst_ += w; // advance output pointer
+}
+
+//------------------------------------------------------------------------------
+// Initialize correction table
+
+static void InitCorrectionLUT(int16_t* const lut, int min_dist) {
+    // The correction curve is:
+    //   f(x) = x for x <= threshold2
+    //   f(x) = 0 for x >= threshold1
+    // and a linear interpolation for range x=[threshold2, threshold1]
+    // (along with f(-x) = -f(x) symmetry).
+    // Note that: threshold2 = 3/4 * threshold1
+    const int threshold1 = min_dist << LFIX;
+    const int threshold2 = (3 * threshold1) >> 2;
+    const int max_threshold = threshold2 << DFIX;
+    const int delta = threshold1 - threshold2;
+    int i;
+    for (i = 1; i <= LUT_SIZE; ++i) {
+        int c = (i <= threshold2) ? (i << DFIX) : (i < threshold1) ? max_threshold * (threshold1 - i) / delta : 0;
+        c >>= LFIX;
+        lut[+i] = +c;
+        lut[-i] = -c;
+    }
+    lut[0] = 0;
+}
+
+static void CountLevels(const uint8_t* const data, int size, SmoothParams* const p) {
+    int i, last_level;
+    uint8_t used_levels[256] = {0};
+    p->min_ = 255;
+    p->max_ = 0;
+    for (i = 0; i < size; ++i) {
+        const int v = data[i];
+        if (v < p->min_) p->min_ = v;
+        if (v > p->max_) p->max_ = v;
+        used_levels[v] = 1;
+    }
+    // Compute the mininum distance between two non-zero levels.
+    p->min_level_dist_ = p->max_ - p->min_;
+    last_level = -1;
+    for (i = 0; i < 256; ++i) {
+        if (used_levels[i]) {
+            ++p->num_levels_;
+            if (last_level >= 0) {
+                const int level_dist = i - last_level;
+                if (level_dist < p->min_level_dist_) {
+                    p->min_level_dist_ = level_dist;
+                }
+            }
+            last_level = i;
+        }
+    }
+}
+
+// Initialize all params.
+static int InitParams(uint8_t* const data, int width, int height, int radius, SmoothParams* const p) {
+    const int R = 2 * radius + 1; // total size of the kernel
+
+    const size_t size_scratch_m = (R + 1) * width * sizeof(*p->start_);
+    const size_t size_m = width * sizeof(*p->average_);
+    const size_t size_lut = (1 + 2 * LUT_SIZE) * sizeof(*p->correction_);
+    const size_t total_size = size_scratch_m + size_m + size_lut;
+    uint8_t* mem = (uint8_t*)WebPSafeMalloc(1U, total_size);
+
+    if (mem == NULL) return 0;
+    p->mem_ = (void*)mem;
+
+    p->start_ = (uint16_t*)mem;
+    p->cur_ = p->start_;
+    p->end_ = p->start_ + R * width;
+    p->top_ = p->end_ - width;
+    memset(p->top_, 0, width * sizeof(*p->top_));
+    mem += size_scratch_m;
+
+    p->average_ = (uint16_t*)mem;
+    mem += size_m;
+
+    p->width_ = width;
+    p->height_ = height;
+    p->src_ = data;
+    p->dst_ = data;
+    p->radius_ = radius;
+    p->scale_ = (1 << (FIX + LFIX)) / (R * R); // normalization constant
+    p->row_ = -radius;
+
+    // analyze the input distribution so we can best-fit the threshold
+    CountLevels(data, width * height, p);
+
+    // correction table
+    p->correction_ = ((int16_t*)mem) + LUT_SIZE;
+    InitCorrectionLUT(p->correction_, p->min_level_dist_);
+
+    return 1;
+}
+
+static void CleanupParams(SmoothParams* const p) {
+    WebPSafeFree(p->mem_);
+}
+
+int WebPDequantizeLevels(uint8_t* const data, int width, int height, int strength) {
+    const int radius = 4 * strength / 100;
+    if (strength < 0 || strength > 100) return 0;
+    if (data == NULL || width <= 0 || height <= 0) return 0; // bad params
+    if (radius > 0) {
+        SmoothParams p;
+        memset(&p, 0, sizeof(p));
+        if (!InitParams(data, width, height, radius, &p)) return 0;
+        if (p.num_levels_ > 2) {
+            for (; p.row_ < p.height_; ++p.row_) {
+                VFilter(&p); // accumulate average of input
+                // Need to wait few rows in order to prime the filter,
+                // before emitting some output.
+                if (p.row_ >= p.radius_) {
+                    HFilter(&p);
+                    ApplyFilter(&p);
+                }
+            }
+        }
+        CleanupParams(&p);
+    }
+    return 1;
+}
diff --git a/codec/L2/demos/webpEnc/host/src/utils/quant_levels_dec.h b/codec/L2/demos/webpEnc/host/src/utils/quant_levels_dec.h
new file mode 100644
index 0000000000..54e12d2c1c
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/quant_levels_dec.h
@@ -0,0 +1,34 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha plane de-quantization utility
+//
+// Author:  Vikas Arora (vikasa@google.com)
+
+#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_H_
+#define WEBP_UTILS_QUANT_LEVELS_DEC_H_
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Apply post-processing to input 'data' of size 'width'x'height' assuming that
+// the source was quantized to a reduced number of levels.
+// Strength is in [0..100] and controls the amount of dithering applied.
+// Returns false in case of error (data is NULL, invalid parameters,
+// malloc failure, ...).
+int WebPDequantizeLevels(uint8_t* const data, int width, int height, int strength);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_QUANT_LEVELS_DEC_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/utils/random.c b/codec/L2/demos/webpEnc/host/src/utils/random.c
new file mode 100644
index 0000000000..f7bc76c509
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/random.c
@@ -0,0 +1,38 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Pseudo-random utilities
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <string.h>
+#include "./random.h"
+
+//------------------------------------------------------------------------------
+
+// 31b-range values
+static const uint32_t kRandomTable[VP8_RANDOM_TABLE_SIZE] = {
+    0x0de15230, 0x03b31886, 0x775faccb, 0x1c88626a, 0x68385c55, 0x14b3b828, 0x4a85fef8, 0x49ddb84b,
+    0x64fcf397, 0x5c550289, 0x4a290000, 0x0d7ec1da, 0x5940b7ab, 0x5492577d, 0x4e19ca72, 0x38d38c69,
+    0x0c01ee65, 0x32a1755f, 0x5437f652, 0x5abb2c32, 0x0faa57b1, 0x73f533e7, 0x685feeda, 0x7563cce2,
+    0x6e990e83, 0x4730a7ed, 0x4fc0d9c6, 0x496b153c, 0x4f1403fa, 0x541afb0c, 0x73990b32, 0x26d7cb1c,
+    0x6fcc3706, 0x2cbb77d8, 0x75762f2a, 0x6425ccdd, 0x24b35461, 0x0a7d8715, 0x220414a8, 0x141ebf67,
+    0x56b41583, 0x73e502e3, 0x44cab16f, 0x28264d42, 0x73baaefb, 0x0a50ebed, 0x1d6ab6fb, 0x0d3ad40b,
+    0x35db3b68, 0x2b081e83, 0x77ce6b95, 0x5181e5f0, 0x78853bbc, 0x009f9494, 0x27e5ed3c};
+
+void VP8InitRandom(VP8Random* const rg, float dithering) {
+    memcpy(rg->tab_, kRandomTable, sizeof(rg->tab_));
+    rg->index1_ = 0;
+    rg->index2_ = 31;
+    rg->amp_ = (dithering < 0.0) ? 0
+                                 : (dithering > 1.0) ? (1 << VP8_RANDOM_DITHER_FIX)
+                                                     : (uint32_t)((1 << VP8_RANDOM_DITHER_FIX) * dithering);
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/utils/random.h b/codec/L2/demos/webpEnc/host/src/utils/random.h
new file mode 100644
index 0000000000..7383ccd285
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/random.h
@@ -0,0 +1,62 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Pseudo-random utilities
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_RANDOM_H_
+#define WEBP_UTILS_RANDOM_H_
+
+#include <assert.h>
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VP8_RANDOM_DITHER_FIX 8 // fixed-point precision for dithering
+#define VP8_RANDOM_TABLE_SIZE 55
+
+typedef struct {
+    int index1_, index2_;
+    uint32_t tab_[VP8_RANDOM_TABLE_SIZE];
+    int amp_;
+} VP8Random;
+
+// Initializes random generator with an amplitude 'dithering' in range [0..1].
+void VP8InitRandom(VP8Random* const rg, float dithering);
+
+// Returns a centered pseudo-random number with 'num_bits' amplitude.
+// (uses D.Knuth's Difference-based random generator).
+// 'amp' is in VP8_RANDOM_DITHER_FIX fixed-point precision.
+static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits, int amp) {
+    int diff;
+    assert(num_bits + VP8_RANDOM_DITHER_FIX <= 31);
+    diff = rg->tab_[rg->index1_] - rg->tab_[rg->index2_];
+    if (diff < 0) diff += (1u << 31);
+    rg->tab_[rg->index1_] = diff;
+    if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0;
+    if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0;
+    // sign-extend, 0-center
+    diff = (int)((uint32_t)diff << 1) >> (32 - num_bits);
+    diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX; // restrict range
+    diff += 1 << (num_bits - 1);                  // shift back to 0.5-center
+    return diff;
+}
+
+static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
+    return VP8RandomBits2(rg, num_bits, rg->amp_);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_RANDOM_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/utils/rescaler.c b/codec/L2/demos/webpEnc/host/src/utils/rescaler.c
new file mode 100644
index 0000000000..145318c157
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/rescaler.c
@@ -0,0 +1,143 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "../dsp/dsp.h"
+#include "./rescaler.h"
+
+//------------------------------------------------------------------------------
+
+void WebPRescalerInit(WebPRescaler* const wrk,
+                      int src_width,
+                      int src_height,
+                      uint8_t* const dst,
+                      int dst_width,
+                      int dst_height,
+                      int dst_stride,
+                      int num_channels,
+                      rescaler_t* const work) {
+    const int x_add = src_width, x_sub = dst_width;
+    const int y_add = src_height, y_sub = dst_height;
+    wrk->x_expand = (src_width < dst_width);
+    wrk->y_expand = (src_height < dst_height);
+    wrk->src_width = src_width;
+    wrk->src_height = src_height;
+    wrk->dst_width = dst_width;
+    wrk->dst_height = dst_height;
+    wrk->src_y = 0;
+    wrk->dst_y = 0;
+    wrk->dst = dst;
+    wrk->dst_stride = dst_stride;
+    wrk->num_channels = num_channels;
+
+    // for 'x_expand', we use bilinear interpolation
+    wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add;
+    wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
+    if (!wrk->x_expand) { // fx_scale is not used otherwise
+        wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub);
+    }
+    // vertical scaling parameters
+    wrk->y_add = wrk->y_expand ? y_add - 1 : y_add;
+    wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub;
+    wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add;
+    if (!wrk->y_expand) {
+        // this is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast.
+        const uint64_t ratio = (uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add);
+        if (ratio != (uint32_t)ratio) {
+            // We can't represent the ratio with the current fixed-point precision.
+            // => We special-case fxy_scale = 0, in WebPRescalerExportRow().
+            wrk->fxy_scale = 0;
+        } else {
+            wrk->fxy_scale = (uint32_t)ratio;
+        }
+        wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub);
+    } else {
+        wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add);
+        // wrk->fxy_scale is unused here.
+    }
+    wrk->irow = work;
+    wrk->frow = work + num_channels * dst_width;
+    memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
+
+    WebPRescalerDspInit();
+}
+
+int WebPRescalerGetScaledDimensions(int src_width, int src_height, int* const scaled_width, int* const scaled_height) {
+    assert(scaled_width != NULL);
+    assert(scaled_height != NULL);
+    {
+        int width = *scaled_width;
+        int height = *scaled_height;
+
+        // if width is unspecified, scale original proportionally to height ratio.
+        if (width == 0) {
+            width = (src_width * height + src_height / 2) / src_height;
+        }
+        // if height is unspecified, scale original proportionally to width ratio.
+        if (height == 0) {
+            height = (src_height * width + src_width / 2) / src_width;
+        }
+        // Check if the overall dimensions still make sense.
+        if (width <= 0 || height <= 0) {
+            return 0;
+        }
+
+        *scaled_width = width;
+        *scaled_height = height;
+        return 1;
+    }
+}
+
+//------------------------------------------------------------------------------
+// all-in-one calls
+
+int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) {
+    const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub;
+    return (num_lines > max_num_lines) ? max_num_lines : num_lines;
+}
+
+int WebPRescalerImport(WebPRescaler* const wrk, int num_lines, const uint8_t* src, int src_stride) {
+    int total_imported = 0;
+    while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) {
+        if (wrk->y_expand) {
+            rescaler_t* const tmp = wrk->irow;
+            wrk->irow = wrk->frow;
+            wrk->frow = tmp;
+        }
+        WebPRescalerImportRow(wrk, src);
+        if (!wrk->y_expand) { // Accumulate the contribution of the new row.
+            int x;
+            for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) {
+                wrk->irow[x] += wrk->frow[x];
+            }
+        }
+        ++wrk->src_y;
+        src += src_stride;
+        ++total_imported;
+        wrk->y_accum -= wrk->y_sub;
+    }
+    return total_imported;
+}
+
+int WebPRescalerExport(WebPRescaler* const rescaler) {
+    int total_exported = 0;
+    while (WebPRescalerHasPendingOutput(rescaler)) {
+        WebPRescalerExportRow(rescaler);
+        ++total_exported;
+    }
+    return total_exported;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/utils/rescaler.h b/codec/L2/demos/webpEnc/host/src/utils/rescaler.h
new file mode 100644
index 0000000000..8b127175d8
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/rescaler.h
@@ -0,0 +1,96 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_RESCALER_H_
+#define WEBP_UTILS_RESCALER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "../webp/types.h"
+
+#define WEBP_RESCALER_RFIX 32 // fixed-point precision for multiplies
+#define WEBP_RESCALER_ONE (1ull << WEBP_RESCALER_RFIX)
+#define WEBP_RESCALER_FRAC(x, y) ((uint32_t)(((uint64_t)(x) << WEBP_RESCALER_RFIX) / (y)))
+
+// Structure used for on-the-fly rescaling
+typedef uint32_t rescaler_t; // type for side-buffer
+typedef struct WebPRescaler WebPRescaler;
+struct WebPRescaler {
+    int x_expand;              // true if we're expanding in the x direction
+    int y_expand;              // true if we're expanding in the y direction
+    int num_channels;          // bytes to jump between pixels
+    uint32_t fx_scale;         // fixed-point scaling factors
+    uint32_t fy_scale;         // ''
+    uint32_t fxy_scale;        // ''
+    int y_accum;               // vertical accumulator
+    int y_add, y_sub;          // vertical increments
+    int x_add, x_sub;          // horizontal increments
+    int src_width, src_height; // source dimensions
+    int dst_width, dst_height; // destination dimensions
+    int src_y, dst_y;          // row counters for input and output
+    uint8_t* dst;
+    int dst_stride;
+    rescaler_t *irow, *frow; // work buffer
+};
+
+// Initialize a rescaler given scratch area 'work' and dimensions of src & dst.
+void WebPRescalerInit(WebPRescaler* const rescaler,
+                      int src_width,
+                      int src_height,
+                      uint8_t* const dst,
+                      int dst_width,
+                      int dst_height,
+                      int dst_stride,
+                      int num_channels,
+                      rescaler_t* const work);
+
+// If either 'scaled_width' or 'scaled_height' (but not both) is 0 the value
+// will be calculated preserving the aspect ratio, otherwise the values are
+// left unmodified. Returns true on success, false if either value is 0 after
+// performing the scaling calculation.
+int WebPRescalerGetScaledDimensions(int src_width, int src_height, int* const scaled_width, int* const scaled_height);
+
+// Returns the number of input lines needed next to produce one output line,
+// considering that the maximum available input lines are 'max_num_lines'.
+int WebPRescaleNeededLines(const WebPRescaler* const rescaler, int max_num_lines);
+
+// Import multiple rows over all channels, until at least one row is ready to
+// be exported. Returns the actual number of lines that were imported.
+int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows, const uint8_t* src, int src_stride);
+
+// Export as many rows as possible. Return the numbers of rows written.
+int WebPRescalerExport(WebPRescaler* const rescaler);
+
+// Return true if input is finished
+static WEBP_INLINE int WebPRescalerInputDone(const WebPRescaler* const rescaler) {
+    return (rescaler->src_y >= rescaler->src_height);
+}
+// Return true if output is finished
+static WEBP_INLINE int WebPRescalerOutputDone(const WebPRescaler* const rescaler) {
+    return (rescaler->dst_y >= rescaler->dst_height);
+}
+
+// Return true if there are pending output rows ready.
+static WEBP_INLINE int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
+    return !WebPRescalerOutputDone(rescaler) && (rescaler->y_accum <= 0);
+}
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_RESCALER_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/utils/thread.c b/codec/L2/demos/webpEnc/host/src/utils/thread.c
new file mode 100644
index 0000000000..6fb3f8d29c
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/thread.c
@@ -0,0 +1,340 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <string.h> // for memset()
+#include "./thread.h"
+#include "./utils.h"
+
+#ifdef WEBP_USE_THREAD
+
+#if defined(_WIN32)
+
+#include <windows.h>
+typedef HANDLE pthread_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
+
+#if _WIN32_WINNT >= 0x0600 // Windows Vista / Server 2008 or greater
+#define USE_WINDOWS_CONDITION_VARIABLE
+typedef CONDITION_VARIABLE pthread_cond_t;
+#else
+typedef struct {
+    HANDLE waiting_sem_;
+    HANDLE received_sem_;
+    HANDLE signal_event_;
+} pthread_cond_t;
+#endif // _WIN32_WINNT >= 0x600
+
+#ifndef WINAPI_FAMILY_PARTITION
+#define WINAPI_PARTITION_DESKTOP 1
+#define WINAPI_FAMILY_PARTITION(x) x
+#endif
+
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#define USE_CREATE_THREAD
+#endif
+
+#else // !_WIN32
+
+#include <pthread.h>
+
+#endif // _WIN32
+
+struct WebPWorkerImpl {
+    pthread_mutex_t mutex_;
+    pthread_cond_t condition_;
+    pthread_t thread_;
+};
+
+#if defined(_WIN32)
+
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+#include <process.h>
+
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+#if _WIN32_WINNT >= 0x0501 // Windows XP or greater
+#define WaitForSingleObject(obj, timeout) WaitForSingleObjectEx(obj, timeout, FALSE /*bAlertable*/)
+#endif
+
+static int pthread_create(pthread_t* const thread, const void* attr, unsigned int(__stdcall* start)(void*), void* arg) {
+    (void)attr;
+#ifdef USE_CREATE_THREAD
+    *thread = CreateThread(NULL,          /* lpThreadAttributes */
+                           0,             /* dwStackSize */
+                           start, arg, 0, /* dwStackSize */
+                           NULL);         /* lpThreadId */
+#else
+    *thread = (pthread_t)_beginthreadex(NULL,          /* void *security */
+                                        0,             /* unsigned stack_size */
+                                        start, arg, 0, /* unsigned initflag */
+                                        NULL);         /* unsigned *thrdaddr */
+#endif
+    if (*thread == NULL) return 1;
+    SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+    return 0;
+}
+
+static int pthread_join(pthread_t thread, void** value_ptr) {
+    (void)value_ptr;
+    return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || CloseHandle(thread) == 0);
+}
+
+// Mutex
+static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
+    (void)mutexattr;
+#if _WIN32_WINNT >= 0x0600 // Windows Vista / Server 2008 or greater
+    InitializeCriticalSectionEx(mutex, 0 /*dwSpinCount*/, 0 /*Flags*/);
+#else
+    InitializeCriticalSection(mutex);
+#endif
+    return 0;
+}
+
+static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
+    EnterCriticalSection(mutex);
+    return 0;
+}
+
+static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
+    LeaveCriticalSection(mutex);
+    return 0;
+}
+
+static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
+    DeleteCriticalSection(mutex);
+    return 0;
+}
+
+// Condition
+static int pthread_cond_destroy(pthread_cond_t* const condition) {
+    int ok = 1;
+#ifdef USE_WINDOWS_CONDITION_VARIABLE
+    (void)condition;
+#else
+    ok &= (CloseHandle(condition->waiting_sem_) != 0);
+    ok &= (CloseHandle(condition->received_sem_) != 0);
+    ok &= (CloseHandle(condition->signal_event_) != 0);
+#endif
+    return !ok;
+}
+
+static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
+    (void)cond_attr;
+#ifdef USE_WINDOWS_CONDITION_VARIABLE
+    InitializeConditionVariable(condition);
+#else
+    condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+    condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+    condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+    if (condition->waiting_sem_ == NULL || condition->received_sem_ == NULL || condition->signal_event_ == NULL) {
+        pthread_cond_destroy(condition);
+        return 1;
+    }
+#endif
+    return 0;
+}
+
+static int pthread_cond_signal(pthread_cond_t* const condition) {
+    int ok = 1;
+#ifdef USE_WINDOWS_CONDITION_VARIABLE
+    WakeConditionVariable(condition);
+#else
+    if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+        // a thread is waiting in pthread_cond_wait: allow it to be notified
+        ok = SetEvent(condition->signal_event_);
+        // wait until the event is consumed so the signaler cannot consume
+        // the event via its own pthread_cond_wait.
+        ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != WAIT_OBJECT_0);
+    }
+#endif
+    return !ok;
+}
+
+static int pthread_cond_wait(pthread_cond_t* const condition, pthread_mutex_t* const mutex) {
+    int ok;
+#ifdef USE_WINDOWS_CONDITION_VARIABLE
+    ok = SleepConditionVariableCS(condition, mutex, INFINITE);
+#else
+    // note that there is a consumer available so the signal isn't dropped in
+    // pthread_cond_signal
+    if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) return 1;
+    // now unlock the mutex so pthread_cond_signal may be issued
+    pthread_mutex_unlock(mutex);
+    ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == WAIT_OBJECT_0);
+    ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+    pthread_mutex_lock(mutex);
+#endif
+    return !ok;
+}
+
+#else // !_WIN32
+#define THREADFN void*
+#define THREAD_RETURN(val) val
+#endif // _WIN32
+
+//------------------------------------------------------------------------------
+
+static void Execute(WebPWorker* const worker); // Forward declaration.
+
+static THREADFN ThreadLoop(void* ptr) {
+    WebPWorker* const worker = (WebPWorker*)ptr;
+    int done = 0;
+    while (!done) {
+        pthread_mutex_lock(&worker->impl_->mutex_);
+        while (worker->status_ == OK) { // wait in idling mode
+            pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+        }
+        if (worker->status_ == WORK) {
+            Execute(worker);
+            worker->status_ = OK;
+        } else if (worker->status_ == NOT_OK) { // finish the worker
+            done = 1;
+        }
+        // signal to the main thread that we're done (for Sync())
+        pthread_cond_signal(&worker->impl_->condition_);
+        pthread_mutex_unlock(&worker->impl_->mutex_);
+    }
+    return THREAD_RETURN(NULL); // Thread is finished
+}
+
+// main thread state control
+static void ChangeState(WebPWorker* const worker, WebPWorkerStatus new_status) {
+    // No-op when attempting to change state on a thread that didn't come up.
+    // Checking status_ without acquiring the lock first would result in a data
+    // race.
+    if (worker->impl_ == NULL) return;
+
+    pthread_mutex_lock(&worker->impl_->mutex_);
+    if (worker->status_ >= OK) {
+        // wait for the worker to finish
+        while (worker->status_ != OK) {
+            pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+        }
+        // assign new status and release the working thread if needed
+        if (new_status != OK) {
+            worker->status_ = new_status;
+            pthread_cond_signal(&worker->impl_->condition_);
+        }
+    }
+    pthread_mutex_unlock(&worker->impl_->mutex_);
+}
+
+#endif // WEBP_USE_THREAD
+
+//------------------------------------------------------------------------------
+
+static void Init(WebPWorker* const worker) {
+    memset(worker, 0, sizeof(*worker));
+    worker->status_ = NOT_OK;
+}
+
+static int Sync(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+    ChangeState(worker, OK);
+#endif
+    assert(worker->status_ <= OK);
+    return !worker->had_error;
+}
+
+static int Reset(WebPWorker* const worker) {
+    int ok = 1;
+    worker->had_error = 0;
+    if (worker->status_ < OK) {
+#ifdef WEBP_USE_THREAD
+        worker->impl_ = (WebPWorkerImpl*)WebPSafeCalloc(1, sizeof(*worker->impl_));
+        if (worker->impl_ == NULL) {
+            return 0;
+        }
+        if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
+            goto Error;
+        }
+        if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
+            pthread_mutex_destroy(&worker->impl_->mutex_);
+            goto Error;
+        }
+        pthread_mutex_lock(&worker->impl_->mutex_);
+        ok = !pthread_create(&worker->impl_->thread_, NULL, ThreadLoop, worker);
+        if (ok) worker->status_ = OK;
+        pthread_mutex_unlock(&worker->impl_->mutex_);
+        if (!ok) {
+            pthread_mutex_destroy(&worker->impl_->mutex_);
+            pthread_cond_destroy(&worker->impl_->condition_);
+        Error:
+            WebPSafeFree(worker->impl_);
+            worker->impl_ = NULL;
+            return 0;
+        }
+#else
+        worker->status_ = OK;
+#endif
+    } else if (worker->status_ > OK) {
+        ok = Sync(worker);
+    }
+    assert(!ok || (worker->status_ == OK));
+    return ok;
+}
+
+static void Execute(WebPWorker* const worker) {
+    if (worker->hook != NULL) {
+        worker->had_error |= !worker->hook(worker->data1, worker->data2);
+    }
+}
+
+static void Launch(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+    ChangeState(worker, WORK);
+#else
+    Execute(worker);
+#endif
+}
+
+static void End(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+    if (worker->impl_ != NULL) {
+        ChangeState(worker, NOT_OK);
+        pthread_join(worker->impl_->thread_, NULL);
+        pthread_mutex_destroy(&worker->impl_->mutex_);
+        pthread_cond_destroy(&worker->impl_->condition_);
+        WebPSafeFree(worker->impl_);
+        worker->impl_ = NULL;
+    }
+#else
+    worker->status_ = NOT_OK;
+    assert(worker->impl_ == NULL);
+#endif
+    assert(worker->status_ == NOT_OK);
+}
+
+//------------------------------------------------------------------------------
+
+static WebPWorkerInterface g_worker_interface = {Init, Reset, Sync, Launch, Execute, End};
+
+int WebPSetWorkerInterface(const WebPWorkerInterface* const winterface) {
+    if (winterface == NULL || winterface->Init == NULL || winterface->Reset == NULL || winterface->Sync == NULL ||
+        winterface->Launch == NULL || winterface->Execute == NULL || winterface->End == NULL) {
+        return 0;
+    }
+    g_worker_interface = *winterface;
+    return 1;
+}
+
+const WebPWorkerInterface* WebPGetWorkerInterface(void) {
+    return &g_worker_interface;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/utils/thread.h b/codec/L2/demos/webpEnc/host/src/utils/thread.h
new file mode 100644
index 0000000000..e3c12dc48e
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/thread.h
@@ -0,0 +1,92 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_THREAD_H_
+#define WEBP_UTILS_THREAD_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// State of the worker thread object
+typedef enum {
+    NOT_OK = 0, // object is unusable
+    OK,         // ready to work
+    WORK        // busy finishing the current task
+} WebPWorkerStatus;
+
+// Function to be called by the worker thread. Takes two opaque pointers as
+// arguments (data1 and data2), and should return false in case of error.
+typedef int (*WebPWorkerHook)(void*, void*);
+
+// Platform-dependent implementation details for the worker.
+typedef struct WebPWorkerImpl WebPWorkerImpl;
+
+// Synchronization object used to launch job in the worker thread
+typedef struct {
+    WebPWorkerImpl* impl_;
+    WebPWorkerStatus status_;
+    WebPWorkerHook hook; // hook to call
+    void* data1;         // first argument passed to 'hook'
+    void* data2;         // second argument passed to 'hook'
+    int had_error;       // return value of the last call to 'hook'
+} WebPWorker;
+
+// The interface for all thread-worker related functions. All these functions
+// must be implemented.
+typedef struct {
+    // Must be called first, before any other method.
+    void (*Init)(WebPWorker* const worker);
+    // Must be called to initialize the object and spawn the thread. Re-entrant.
+    // Will potentially launch the thread. Returns false in case of error.
+    int (*Reset)(WebPWorker* const worker);
+    // Makes sure the previous work is finished. Returns true if worker->had_error
+    // was not set and no error condition was triggered by the working thread.
+    int (*Sync)(WebPWorker* const worker);
+    // Triggers the thread to call hook() with data1 and data2 arguments. These
+    // hook/data1/data2 values can be changed at any time before calling this
+    // function, but not be changed afterward until the next call to Sync().
+    void (*Launch)(WebPWorker* const worker);
+    // This function is similar to Launch() except that it calls the
+    // hook directly instead of using a thread. Convenient to bypass the thread
+    // mechanism while still using the WebPWorker structs. Sync() must
+    // still be called afterward (for error reporting).
+    void (*Execute)(WebPWorker* const worker);
+    // Kill the thread and terminate the object. To use the object again, one
+    // must call Reset() again.
+    void (*End)(WebPWorker* const worker);
+} WebPWorkerInterface;
+
+// Install a new set of threading functions, overriding the defaults. This
+// should be done before any workers are started, i.e., before any encoding or
+// decoding takes place. The contents of the interface struct are copied, it
+// is safe to free the corresponding memory after this call. This function is
+// not thread-safe. Return false in case of invalid pointer or methods.
+WEBP_EXTERN(int) WebPSetWorkerInterface(const WebPWorkerInterface* const winterface);
+
+// Retrieve the currently set thread worker interface.
+WEBP_EXTERN(const WebPWorkerInterface*) WebPGetWorkerInterface(void);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_THREAD_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/utils/utils.c b/codec/L2/demos/webpEnc/host/src/utils/utils.c
new file mode 100644
index 0000000000..5aafa28e92
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/utils.c
@@ -0,0 +1,242 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include <string.h> // for memcpy()
+#include "../webp/decode.h"
+#include "../webp/encode.h"
+#include "./utils.h"
+
+// If PRINT_MEM_INFO is defined, extra info (like total memory used, number of
+// alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
+// and not multi-thread safe!).
+// An interesting alternative is valgrind's 'massif' tool:
+//    http://valgrind.org/docs/manual/ms-manual.html
+// Here is an example command line:
+/*    valgrind --tool=massif --massif-out-file=massif.out \
+               --stacks=yes --alloc-fn=WebPSafeAlloc --alloc-fn=WebPSafeCalloc
+      ms_print massif.out
+*/
+// In addition:
+// * if PRINT_MEM_TRAFFIC is defined, all the details of the malloc/free cycles
+//   are printed.
+// * if MALLOC_FAIL_AT is defined, the global environment variable
+//   $MALLOC_FAIL_AT is used to simulate a memory error when calloc or malloc
+//   is called for the nth time. Example usage:
+//   export MALLOC_FAIL_AT=50 && ./examples/cwebp input.png
+// * if MALLOC_LIMIT is defined, the global environment variable $MALLOC_LIMIT
+//   sets the maximum amount of memory (in bytes) made available to libwebp.
+//   This can be used to emulate environment with very limited memory.
+//   Example: export MALLOC_LIMIT=64000000 && ./examples/dwebp picture.webp
+
+// #define PRINT_MEM_INFO
+// #define PRINT_MEM_TRAFFIC
+// #define MALLOC_FAIL_AT
+// #define MALLOC_LIMIT
+
+//------------------------------------------------------------------------------
+// Checked memory allocation
+
+#if defined(PRINT_MEM_INFO)
+
+#include <stdio.h>
+
+static int num_malloc_calls = 0;
+static int num_calloc_calls = 0;
+static int num_free_calls = 0;
+static int countdown_to_fail = 0; // 0 = off
+
+typedef struct MemBlock MemBlock;
+struct MemBlock {
+    void* ptr_;
+    size_t size_;
+    MemBlock* next_;
+};
+
+static MemBlock* all_blocks = NULL;
+static size_t total_mem = 0;
+static size_t total_mem_allocated = 0;
+static size_t high_water_mark = 0;
+static size_t mem_limit = 0;
+
+static int exit_registered = 0;
+
+static void PrintMemInfo(void) {
+    fprintf(stderr, "\nMEMORY INFO:\n");
+    fprintf(stderr, "num calls to: malloc = %4d\n", num_malloc_calls);
+    fprintf(stderr, "              calloc = %4d\n", num_calloc_calls);
+    fprintf(stderr, "              free   = %4d\n", num_free_calls);
+    fprintf(stderr, "total_mem: %u\n", (uint32_t)total_mem);
+    fprintf(stderr, "total_mem allocated: %u\n", (uint32_t)total_mem_allocated);
+    fprintf(stderr, "high-water mark: %u\n", (uint32_t)high_water_mark);
+    while (all_blocks != NULL) {
+        MemBlock* b = all_blocks;
+        all_blocks = b->next_;
+        free(b);
+    }
+}
+
+static void Increment(int* const v) {
+    if (!exit_registered) {
+#if defined(MALLOC_FAIL_AT)
+        {
+            const char* const malloc_fail_at_str = getenv("MALLOC_FAIL_AT");
+            if (malloc_fail_at_str != NULL) {
+                countdown_to_fail = atoi(malloc_fail_at_str);
+            }
+        }
+#endif
+#if defined(MALLOC_LIMIT)
+        {
+            const char* const malloc_limit_str = getenv("MALLOC_LIMIT");
+            if (malloc_limit_str != NULL) {
+                mem_limit = atoi(malloc_limit_str);
+            }
+        }
+#endif
+        (void)countdown_to_fail;
+        (void)mem_limit;
+        atexit(PrintMemInfo);
+        exit_registered = 1;
+    }
+    ++*v;
+}
+
+static void AddMem(void* ptr, size_t size) {
+    if (ptr != NULL) {
+        MemBlock* const b = (MemBlock*)malloc(sizeof(*b));
+        if (b == NULL) abort();
+        b->next_ = all_blocks;
+        all_blocks = b;
+        b->ptr_ = ptr;
+        b->size_ = size;
+        total_mem += size;
+        total_mem_allocated += size;
+#if defined(PRINT_MEM_TRAFFIC)
+#if defined(MALLOC_FAIL_AT)
+        fprintf(stderr, "fail-count: %5d [mem=%u]\n", num_malloc_calls + num_calloc_calls, (uint32_t)total_mem);
+#else
+        fprintf(stderr, "Mem: %u (+%u)\n", (uint32_t)total_mem, (uint32_t)size);
+#endif
+#endif
+        if (total_mem > high_water_mark) high_water_mark = total_mem;
+    }
+}
+
+static void SubMem(void* ptr) {
+    if (ptr != NULL) {
+        MemBlock** b = &all_blocks;
+        // Inefficient search, but that's just for debugging.
+        while (*b != NULL && (*b)->ptr_ != ptr) b = &(*b)->next_;
+        if (*b == NULL) {
+            fprintf(stderr, "Invalid pointer free! (%p)\n", ptr);
+            abort();
+        }
+        {
+            MemBlock* const block = *b;
+            *b = block->next_;
+            total_mem -= block->size_;
+#if defined(PRINT_MEM_TRAFFIC)
+            fprintf(stderr, "Mem: %u (-%u)\n", (uint32_t)total_mem, (uint32_t)block->size_);
+#endif
+            free(block);
+        }
+    }
+}
+
+#else
+#define Increment(v) \
+    do {             \
+    } while (0)
+#define AddMem(p, s) \
+    do {             \
+    } while (0)
+#define SubMem(p) \
+    do {          \
+    } while (0)
+#endif
+
+// Returns 0 in case of overflow of nmemb * size.
+static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
+    const uint64_t total_size = nmemb * size;
+    if (nmemb == 0) return 1;
+    if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
+    if (total_size != (size_t)total_size) return 0;
+#if defined(PRINT_MEM_INFO) && defined(MALLOC_FAIL_AT)
+    if (countdown_to_fail > 0 && --countdown_to_fail == 0) {
+        return 0; // fake fail!
+    }
+#endif
+#if defined(MALLOC_LIMIT)
+    if (mem_limit > 0 && total_mem + total_size >= mem_limit) {
+        return 0; // fake fail!
+    }
+#endif
+
+    return 1;
+}
+
+void* WebPSafeMalloc(uint64_t nmemb, size_t size) {
+    void* ptr;
+    Increment(&num_malloc_calls);
+    if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
+    assert(nmemb * size > 0);
+    ptr = malloc((size_t)(nmemb * size));
+    AddMem(ptr, (size_t)(nmemb * size));
+    return ptr;
+}
+
+void* WebPSafeCalloc(uint64_t nmemb, size_t size) {
+    void* ptr;
+    Increment(&num_calloc_calls);
+    if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
+    assert(nmemb * size > 0);
+    ptr = calloc((size_t)nmemb, size);
+    AddMem(ptr, (size_t)(nmemb * size));
+    return ptr;
+}
+
+void WebPSafeFree(void* const ptr) {
+    if (ptr != NULL) {
+        Increment(&num_free_calls);
+        SubMem(ptr);
+    }
+    free(ptr);
+}
+
+// Public API function.
+void WebPFree(void* ptr) {
+    free(ptr);
+}
+
+//------------------------------------------------------------------------------
+
+void WebPCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) {
+    assert(src != NULL && dst != NULL);
+    assert(src_stride >= width && dst_stride >= width);
+    while (height-- > 0) {
+        memcpy(dst, src, width);
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
+
+void WebPCopyPixels(const WebPPicture* const src, WebPPicture* const dst) {
+    assert(src != NULL && dst != NULL);
+    assert(src->width == dst->width && src->height == dst->height);
+    assert(src->use_argb && dst->use_argb);
+    WebPCopyPlane((uint8_t*)src->argb, 4 * src->argb_stride, (uint8_t*)dst->argb, 4 * dst->argb_stride, 4 * src->width,
+                  src->height);
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/src/utils/utils.h b/codec/L2/demos/webpEnc/host/src/utils/utils.h
new file mode 100644
index 0000000000..6fb22e672d
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/utils/utils.h
@@ -0,0 +1,162 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Authors: Skal (pascal.massimino@gmail.com)
+//          Urvang (urvang@google.com)
+
+#ifndef WEBP_UTILS_UTILS_H_
+#define WEBP_UTILS_UTILS_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include <assert.h>
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Memory allocation
+
+// This is the maximum memory amount that libwebp will ever try to allocate.
+#define WEBP_MAX_ALLOCABLE_MEMORY (1ULL << 40)
+
+// size-checking safe malloc/calloc: verify that the requested size is not too
+// large, or return NULL. You don't need to call these for constructs like
+// malloc(sizeof(foo)), but only if there's picture-dependent size involved
+// somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this
+// safe malloc() borrows the signature from calloc(), pointing at the dangerous
+// underlying multiply involved.
+WEBP_EXTERN(void*) WebPSafeMalloc(uint64_t nmemb, size_t size);
+// Note that WebPSafeCalloc() expects the second argument type to be 'size_t'
+// in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
+WEBP_EXTERN(void*) WebPSafeCalloc(uint64_t nmemb, size_t size);
+
+// Companion deallocation function to the above allocations.
+WEBP_EXTERN(void) WebPSafeFree(void* const ptr);
+
+//------------------------------------------------------------------------------
+// Alignment
+
+#define WEBP_ALIGN_CST 31
+#define WEBP_ALIGN(PTR) ((uintptr_t)((PTR) + WEBP_ALIGN_CST) & ~WEBP_ALIGN_CST)
+
+#if defined(WEBP_FORCE_ALIGNED)
+#include <string.h>
+// memcpy() is the safe way of moving potentially unaligned 32b memory.
+static WEBP_INLINE uint32_t WebPMemToUint32(const uint8_t* const ptr) {
+    uint32_t A;
+    memcpy(&A, (const int*)ptr, sizeof(A));
+    return A;
+}
+static WEBP_INLINE void WebPUint32ToMem(uint8_t* const ptr, uint32_t val) {
+    memcpy(ptr, &val, sizeof(val));
+}
+#else
+static WEBP_INLINE uint32_t WebPMemToUint32(const uint8_t* const ptr) {
+    return *(const uint32_t*)ptr;
+}
+static WEBP_INLINE void WebPUint32ToMem(uint8_t* const ptr, uint32_t val) {
+    *(uint32_t*)ptr = val;
+}
+#endif
+
+//------------------------------------------------------------------------------
+// Reading/writing data.
+
+// Read 16, 24 or 32 bits stored in little-endian order.
+static WEBP_INLINE int GetLE16(const uint8_t* const data) {
+    return (int)(data[0] << 0) | (data[1] << 8);
+}
+
+static WEBP_INLINE int GetLE24(const uint8_t* const data) {
+    return GetLE16(data) | (data[2] << 16);
+}
+
+static WEBP_INLINE uint32_t GetLE32(const uint8_t* const data) {
+    return GetLE16(data) | ((uint32_t)GetLE16(data + 2) << 16);
+}
+
+// Store 16, 24 or 32 bits in little-endian order.
+static WEBP_INLINE void PutLE16(uint8_t* const data, int val) {
+    assert(val < (1 << 16));
+    data[0] = (val >> 0);
+    data[1] = (val >> 8);
+}
+
+static WEBP_INLINE void PutLE24(uint8_t* const data, int val) {
+    assert(val < (1 << 24));
+    PutLE16(data, val & 0xffff);
+    data[2] = (val >> 16);
+}
+
+static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
+    PutLE16(data, (int)(val & 0xffff));
+    PutLE16(data + 2, (int)(val >> 16));
+}
+
+// Returns (int)floor(log2(n)). n must be > 0.
+// use GNU builtins where available.
+#if defined(__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+    return 31 ^ __builtin_clz(n);
+}
+#elif defined(_MSC_VER) && _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+    unsigned long first_set_bit;
+    _BitScanReverse(&first_set_bit, n);
+    return first_set_bit;
+}
+#else
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+    int log = 0;
+    uint32_t value = n;
+    int i;
+
+    for (i = 4; i >= 0; --i) {
+        const int shift = (1 << i);
+        const uint32_t x = value >> shift;
+        if (x != 0) {
+            value = x;
+            log += shift;
+        }
+    }
+    return log;
+}
+#endif
+
+//------------------------------------------------------------------------------
+// Pixel copying.
+
+struct WebPPicture;
+
+// Copy width x height pixels from 'src' to 'dst' honoring the strides.
+WEBP_EXTERN(void)
+WebPCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height);
+
+// Copy ARGB pixels from 'src' to 'dst' honoring strides. 'src' and 'dst' are
+// assumed to be already allocated and using ARGB data.
+WEBP_EXTERN(void) WebPCopyPixels(const struct WebPPicture* const src, struct WebPPicture* const dst);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_UTILS_UTILS_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/webp/decode.h b/codec/L2/demos/webpEnc/host/src/webp/decode.h
new file mode 100644
index 0000000000..e770c40941
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/webp/decode.h
@@ -0,0 +1,512 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Main decoding functions for WebP images.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_DECODE_H_
+#define WEBP_WEBP_DECODE_H_
+
+#include "./types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBP_DECODER_ABI_VERSION 0x0208 // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum VP8StatusCode VP8StatusCode;
+// typedef enum WEBP_CSP_MODE WEBP_CSP_MODE;
+typedef struct WebPRGBABuffer WebPRGBABuffer;
+typedef struct WebPYUVABuffer WebPYUVABuffer;
+typedef struct WebPDecBuffer WebPDecBuffer;
+typedef struct WebPIDecoder WebPIDecoder;
+typedef struct WebPBitstreamFeatures WebPBitstreamFeatures;
+typedef struct WebPDecoderOptions WebPDecoderOptions;
+typedef struct WebPDecoderConfig WebPDecoderConfig;
+
+// Return the decoder's version number, packed in hexadecimal using 8bits for
+// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetDecoderVersion(void);
+
+// Retrieve basic header information: width, height.
+// This function will also validate the header and return 0 in
+// case of formatting error.
+// Pointers 'width' and 'height' can be passed NULL if deemed irrelevant.
+WEBP_EXTERN(int) WebPGetInfo(const uint8_t* data, size_t data_size, int* width, int* height);
+
+// Decodes WebP images pointed to by 'data' and returns RGBA samples, along
+// with the dimensions in *width and *height. The ordering of samples in
+// memory is R, G, B, A, R, G, B, A... in scan order (endian-independent).
+// The returned pointer should be deleted calling WebPFree().
+// Returns NULL in case of error.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGBA(const uint8_t* data, size_t data_size, int* width, int* height);
+
+// Same as WebPDecodeRGBA, but returning A, R, G, B, A, R, G, B... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeARGB(const uint8_t* data, size_t data_size, int* width, int* height);
+
+// Same as WebPDecodeRGBA, but returning B, G, R, A, B, G, R, A... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeBGRA(const uint8_t* data, size_t data_size, int* width, int* height);
+
+// Same as WebPDecodeRGBA, but returning R, G, B, R, G, B... ordered data.
+// If the bitstream contains transparency, it is ignored.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGB(const uint8_t* data, size_t data_size, int* width, int* height);
+
+// Same as WebPDecodeRGB, but returning B, G, R, B, G, R... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size, int* width, int* height);
+
+// Decode WebP images pointed to by 'data' to Y'UV format(*). The pointer
+// returned is the Y samples buffer. Upon return, *u and *v will point to
+// the U and V chroma data. These U and V buffers need NOT be passed to
+// WebPFree(), unlike the returned Y luma one. The dimension of the U and V
+// planes are both (*width + 1) / 2 and (*height + 1)/ 2.
+// Upon return, the Y buffer has a stride returned as '*stride', while U and V
+// have a common stride returned as '*uv_stride'.
+// Return NULL in case of error.
+// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
+WEBP_EXTERN(uint8_t*)
+WebPDecodeYUV(const uint8_t* data,
+              size_t data_size,
+              int* width,
+              int* height,
+              uint8_t** u,
+              uint8_t** v,
+              int* stride,
+              int* uv_stride);
+
+// Releases memory returned by the WebPDecode*() functions above.
+WEBP_EXTERN(void) WebPFree(void* ptr);
+
+// These five functions are variants of the above ones, that decode the image
+// directly into a pre-allocated buffer 'output_buffer'. The maximum storage
+// available in this buffer is indicated by 'output_buffer_size'. If this
+// storage is not sufficient (or an error occurred), NULL is returned.
+// Otherwise, output_buffer is returned, for convenience.
+// The parameter 'output_stride' specifies the distance (in bytes)
+// between scanlines. Hence, output_buffer_size is expected to be at least
+// output_stride x picture-height.
+WEBP_EXTERN(uint8_t*)
+WebPDecodeRGBAInto(
+    const uint8_t* data, size_t data_size, uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*)
+WebPDecodeARGBInto(
+    const uint8_t* data, size_t data_size, uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*)
+WebPDecodeBGRAInto(
+    const uint8_t* data, size_t data_size, uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// RGB and BGR variants. Here too the transparency information, if present,
+// will be dropped and ignored.
+WEBP_EXTERN(uint8_t*)
+WebPDecodeRGBInto(
+    const uint8_t* data, size_t data_size, uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*)
+WebPDecodeBGRInto(
+    const uint8_t* data, size_t data_size, uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// WebPDecodeYUVInto() is a variant of WebPDecodeYUV() that operates directly
+// into pre-allocated luma/chroma plane buffers. This function requires the
+// strides to be passed: one for the luma plane and one for each of the
+// chroma ones. The size of each plane buffer is passed as 'luma_size',
+// 'u_size' and 'v_size' respectively.
+// Pointer to the luma plane ('*luma') is returned or NULL if an error occurred
+// during decoding (or because some buffers were found to be too small).
+WEBP_EXTERN(uint8_t*)
+WebPDecodeYUVInto(const uint8_t* data,
+                  size_t data_size,
+                  uint8_t* luma,
+                  size_t luma_size,
+                  int luma_stride,
+                  uint8_t* u,
+                  size_t u_size,
+                  int u_stride,
+                  uint8_t* v,
+                  size_t v_size,
+                  int v_stride);
+
+//------------------------------------------------------------------------------
+// Output colorspaces and buffer
+
+// Colorspaces
+// Note: the naming describes the byte-ordering of packed samples in memory.
+// For instance, MODE_BGRA relates to samples ordered as B,G,R,A,B,G,R,A,...
+// Non-capital names (e.g.:MODE_Argb) relates to pre-multiplied RGB channels.
+// RGBA-4444 and RGB-565 colorspaces are represented by following byte-order:
+// RGBA-4444: [r3 r2 r1 r0 g3 g2 g1 g0], [b3 b2 b1 b0 a3 a2 a1 a0], ...
+// RGB-565: [r4 r3 r2 r1 r0 g5 g4 g3], [g2 g1 g0 b4 b3 b2 b1 b0], ...
+// In the case WEBP_SWAP_16BITS_CSP is defined, the bytes are swapped for
+// these two modes:
+// RGBA-4444: [b3 b2 b1 b0 a3 a2 a1 a0], [r3 r2 r1 r0 g3 g2 g1 g0], ...
+// RGB-565: [g2 g1 g0 b4 b3 b2 b1 b0], [r4 r3 r2 r1 r0 g5 g4 g3], ...
+
+typedef enum WEBP_CSP_MODE {
+    MODE_RGB = 0,
+    MODE_RGBA = 1,
+    MODE_BGR = 2,
+    MODE_BGRA = 3,
+    MODE_ARGB = 4,
+    MODE_RGBA_4444 = 5,
+    MODE_RGB_565 = 6,
+    // RGB-premultiplied transparent modes (alpha value is preserved)
+    MODE_rgbA = 7,
+    MODE_bgrA = 8,
+    MODE_Argb = 9,
+    MODE_rgbA_4444 = 10,
+    // YUV modes must come after RGB ones.
+    MODE_YUV = 11,
+    MODE_YUVA = 12, // yuv 4:2:0
+    MODE_LAST = 13
+} WEBP_CSP_MODE;
+
+// Some useful macros:
+static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) {
+    return (mode == MODE_rgbA || mode == MODE_bgrA || mode == MODE_Argb || mode == MODE_rgbA_4444);
+}
+
+static WEBP_INLINE int WebPIsAlphaMode(WEBP_CSP_MODE mode) {
+    return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB || mode == MODE_RGBA_4444 ||
+            mode == MODE_YUVA || WebPIsPremultipliedMode(mode));
+}
+
+static WEBP_INLINE int WebPIsRGBMode(WEBP_CSP_MODE mode) {
+    return (mode < MODE_YUV);
+}
+
+//------------------------------------------------------------------------------
+// WebPDecBuffer: Generic structure for describing the output sample buffer.
+
+struct WebPRGBABuffer { // view as RGBA
+    uint8_t* rgba;      // pointer to RGBA samples
+    int stride;         // stride in bytes from one scanline to the next.
+    size_t size;        // total size of the *rgba buffer.
+};
+
+struct WebPYUVABuffer {     // view as YUVA
+    uint8_t *y, *u, *v, *a; // pointer to luma, chroma U/V, alpha samples
+    int y_stride;           // luma stride
+    int u_stride, v_stride; // chroma strides
+    int a_stride;           // alpha stride
+    size_t y_size;          // luma plane size
+    size_t u_size, v_size;  // chroma planes size
+    size_t a_size;          // alpha-plane size
+};
+
+// Output buffer
+struct WebPDecBuffer {
+    WEBP_CSP_MODE colorspace; // Colorspace.
+    int width, height;        // Dimensions.
+    int is_external_memory;   // If true, 'internal_memory' pointer is not used.
+    union {
+        WebPRGBABuffer RGBA;
+        WebPYUVABuffer YUVA;
+    } u;             // Nameless union of buffer parameters.
+    uint32_t pad[4]; // padding for later use
+
+    uint8_t* private_memory; // Internally allocated memory (only when
+                             // is_external_memory is false). Should not be used
+                             // externally, but accessed via the buffer union.
+};
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer*, int);
+
+// Initialize the structure as empty. Must be called before any other use.
+// Returns false in case of version mismatch
+static WEBP_INLINE int WebPInitDecBuffer(WebPDecBuffer* buffer) {
+    return WebPInitDecBufferInternal(buffer, WEBP_DECODER_ABI_VERSION);
+}
+
+// Free any memory associated with the buffer. Must always be called last.
+// Note: doesn't free the 'buffer' structure itself.
+WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer);
+
+//------------------------------------------------------------------------------
+// Enumeration of the status codes
+
+typedef enum VP8StatusCode {
+    VP8_STATUS_OK = 0,
+    VP8_STATUS_OUT_OF_MEMORY,
+    VP8_STATUS_INVALID_PARAM,
+    VP8_STATUS_BITSTREAM_ERROR,
+    VP8_STATUS_UNSUPPORTED_FEATURE,
+    VP8_STATUS_SUSPENDED,
+    VP8_STATUS_USER_ABORT,
+    VP8_STATUS_NOT_ENOUGH_DATA
+} VP8StatusCode;
+
+//------------------------------------------------------------------------------
+// Incremental decoding
+//
+// This API allows streamlined decoding of partial data.
+// Picture can be incrementally decoded as data become available thanks to the
+// WebPIDecoder object. This object can be left in a SUSPENDED state if the
+// picture is only partially decoded, pending additional input.
+// Code example:
+//
+//   WebPInitDecBuffer(&buffer);
+//   buffer.colorspace = mode;
+//   ...
+//   WebPIDecoder* idec = WebPINewDecoder(&buffer);
+//   while (has_more_data) {
+//     // ... (get additional data)
+//     status = WebPIAppend(idec, new_data, new_data_size);
+//     if (status != VP8_STATUS_SUSPENDED ||
+//       break;
+//     }
+//
+//     // The above call decodes the current available buffer.
+//     // Part of the image can now be refreshed by calling to
+//     // WebPIDecGetRGB()/WebPIDecGetYUVA() etc.
+//   }
+//   WebPIDelete(idec);
+
+// Creates a new incremental decoder with the supplied buffer parameter.
+// This output_buffer can be passed NULL, in which case a default output buffer
+// is used (with MODE_RGB). Otherwise, an internal reference to 'output_buffer'
+// is kept, which means that the lifespan of 'output_buffer' must be larger than
+// that of the returned WebPIDecoder object.
+// The supplied 'output_buffer' content MUST NOT be changed between calls to
+// WebPIAppend() or WebPIUpdate() unless 'output_buffer.is_external_memory' is
+// set to 1. In such a case, it is allowed to modify the pointers, size and
+// stride of output_buffer.u.RGBA or output_buffer.u.YUVA, provided they remain
+// within valid bounds.
+// All other fields of WebPDecBuffer MUST remain constant between calls.
+// Returns NULL if the allocation failed.
+WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
+
+// This function allocates and initializes an incremental-decoder object, which
+// will output the RGB/A samples specified by 'csp' into a preallocated
+// buffer 'output_buffer'. The size of this buffer is at least
+// 'output_buffer_size' and the stride (distance in bytes between two scanlines)
+// is specified by 'output_stride'.
+// Additionally, output_buffer can be passed NULL in which case the output
+// buffer will be allocated automatically when the decoding starts. The
+// colorspace 'csp' is taken into account for allocating this buffer. All other
+// parameters are ignored.
+// Returns NULL if the allocation failed, or if some parameters are invalid.
+WEBP_EXTERN(WebPIDecoder*)
+WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// This function allocates and initializes an incremental-decoder object, which
+// will output the raw luma/chroma samples into a preallocated planes if
+// supplied. The luma plane is specified by its pointer 'luma', its size
+// 'luma_size' and its stride 'luma_stride'. Similarly, the chroma-u plane
+// is specified by the 'u', 'u_size' and 'u_stride' parameters, and the chroma-v
+// plane by 'v' and 'v_size'. And same for the alpha-plane. The 'a' pointer
+// can be pass NULL in case one is not interested in the transparency plane.
+// Conversely, 'luma' can be passed NULL if no preallocated planes are supplied.
+// In this case, the output buffer will be automatically allocated (using
+// MODE_YUVA) when decoding starts. All parameters are then ignored.
+// Returns NULL if the allocation failed or if a parameter is invalid.
+WEBP_EXTERN(WebPIDecoder*)
+WebPINewYUVA(uint8_t* luma,
+             size_t luma_size,
+             int luma_stride,
+             uint8_t* u,
+             size_t u_size,
+             int u_stride,
+             uint8_t* v,
+             size_t v_size,
+             int v_stride,
+             uint8_t* a,
+             size_t a_size,
+             int a_stride);
+
+// Deprecated version of the above, without the alpha plane.
+// Kept for backward compatibility.
+WEBP_EXTERN(WebPIDecoder*)
+WebPINewYUV(uint8_t* luma,
+            size_t luma_size,
+            int luma_stride,
+            uint8_t* u,
+            size_t u_size,
+            int u_stride,
+            uint8_t* v,
+            size_t v_size,
+            int v_stride);
+
+// Deletes the WebPIDecoder object and associated memory. Must always be called
+// if WebPINewDecoder, WebPINewRGB or WebPINewYUV succeeded.
+WEBP_EXTERN(void) WebPIDelete(WebPIDecoder* idec);
+
+// Copies and decodes the next available data. Returns VP8_STATUS_OK when
+// the image is successfully decoded. Returns VP8_STATUS_SUSPENDED when more
+// data is expected. Returns error in other cases.
+WEBP_EXTERN(VP8StatusCode) WebPIAppend(WebPIDecoder* idec, const uint8_t* data, size_t data_size);
+
+// A variant of the above function to be used when data buffer contains
+// partial data from the beginning. In this case data buffer is not copied
+// to the internal memory.
+// Note that the value of the 'data' pointer can change between calls to
+// WebPIUpdate, for instance when the data buffer is resized to fit larger data.
+WEBP_EXTERN(VP8StatusCode) WebPIUpdate(WebPIDecoder* idec, const uint8_t* data, size_t data_size);
+
+// Returns the RGB/A image decoded so far. Returns NULL if output params
+// are not initialized yet. The RGB/A output type corresponds to the colorspace
+// specified during call to WebPINewDecoder() or WebPINewRGB().
+// *last_y is the index of last decoded row in raster scan order. Some pointers
+// (*last_y, *width etc.) can be NULL if corresponding information is not
+// needed.
+WEBP_EXTERN(uint8_t*) WebPIDecGetRGB(const WebPIDecoder* idec, int* last_y, int* width, int* height, int* stride);
+
+// Same as above function to get a YUVA image. Returns pointer to the luma
+// plane or NULL in case of error. If there is no alpha information
+// the alpha pointer '*a' will be returned NULL.
+WEBP_EXTERN(uint8_t*)
+WebPIDecGetYUVA(const WebPIDecoder* idec,
+                int* last_y,
+                uint8_t** u,
+                uint8_t** v,
+                uint8_t** a,
+                int* width,
+                int* height,
+                int* stride,
+                int* uv_stride,
+                int* a_stride);
+
+// Deprecated alpha-less version of WebPIDecGetYUVA(): it will ignore the
+// alpha information (if present). Kept for backward compatibility.
+static WEBP_INLINE uint8_t* WebPIDecGetYUV(const WebPIDecoder* idec,
+                                           int* last_y,
+                                           uint8_t** u,
+                                           uint8_t** v,
+                                           int* width,
+                                           int* height,
+                                           int* stride,
+                                           int* uv_stride) {
+    return WebPIDecGetYUVA(idec, last_y, u, v, NULL, width, height, stride, uv_stride, NULL);
+}
+
+// Generic call to retrieve information about the displayable area.
+// If non NULL, the left/right/width/height pointers are filled with the visible
+// rectangular area so far.
+// Returns NULL in case the incremental decoder object is in an invalid state.
+// Otherwise returns the pointer to the internal representation. This structure
+// is read-only, tied to WebPIDecoder's lifespan and should not be modified.
+WEBP_EXTERN(const WebPDecBuffer*)
+WebPIDecodedArea(const WebPIDecoder* idec, int* left, int* top, int* width, int* height);
+
+//------------------------------------------------------------------------------
+// Advanced decoding parametrization
+//
+//  Code sample for using the advanced decoding API
+/*
+     // A) Init a configuration object
+     WebPDecoderConfig config;
+     CHECK(WebPInitDecoderConfig(&config));
+
+     // B) optional: retrieve the bitstream's features.
+     CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
+
+     // C) Adjust 'config', if needed
+     config.no_fancy_upsampling = 1;
+     config.output.colorspace = MODE_BGRA;
+     // etc.
+
+     // Note that you can also make config.output point to an externally
+     // supplied memory buffer, provided it's big enough to store the decoded
+     // picture. Otherwise, config.output will just be used to allocate memory
+     // and store the decoded picture.
+
+     // D) Decode!
+     CHECK(WebPDecode(data, data_size, &config) == VP8_STATUS_OK);
+
+     // E) Decoded image is now in config.output (and config.output.u.RGBA)
+
+     // F) Reclaim memory allocated in config's object. It's safe to call
+     // this function even if the memory is external and wasn't allocated
+     // by WebPDecode().
+     WebPFreeDecBuffer(&config.output);
+*/
+
+// Features gathered from the bitstream
+struct WebPBitstreamFeatures {
+    int width;         // Width in pixels, as read from the bitstream.
+    int height;        // Height in pixels, as read from the bitstream.
+    int has_alpha;     // True if the bitstream contains an alpha channel.
+    int has_animation; // True if the bitstream is an animation.
+    int format;        // 0 = undefined (/mixed), 1 = lossy, 2 = lossless
+
+    uint32_t pad[5]; // padding for later use
+};
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(const uint8_t*, size_t, WebPBitstreamFeatures*, int);
+
+// Retrieve features from the bitstream. The *features structure is filled
+// with information gathered from the bitstream.
+// Returns VP8_STATUS_OK when the features are successfully retrieved. Returns
+// VP8_STATUS_NOT_ENOUGH_DATA when more data is needed to retrieve the
+// features from headers. Returns error in other cases.
+static WEBP_INLINE VP8StatusCode WebPGetFeatures(const uint8_t* data,
+                                                 size_t data_size,
+                                                 WebPBitstreamFeatures* features) {
+    return WebPGetFeaturesInternal(data, data_size, features, WEBP_DECODER_ABI_VERSION);
+}
+
+// Decoding options
+struct WebPDecoderOptions {
+    int bypass_filtering;            // if true, skip the in-loop filtering
+    int no_fancy_upsampling;         // if true, use faster pointwise upsampler
+    int use_cropping;                // if true, cropping is applied _first_
+    int crop_left, crop_top;         // top-left position for cropping.
+                                     // Will be snapped to even values.
+    int crop_width, crop_height;     // dimension of the cropping area
+    int use_scaling;                 // if true, scaling is applied _afterward_
+    int scaled_width, scaled_height; // final resolution
+    int use_threads;                 // if true, use multi-threaded decoding
+    int dithering_strength;          // dithering strength (0=Off, 100=full)
+    int flip;                        // flip output vertically
+    int alpha_dithering_strength;    // alpha dithering strength in [0..100]
+    int thread_number;               // multi-threaded decoding for lossless mode
+
+    uint32_t pad[5]; // padding for later use
+};
+
+// Main object storing the configuration for advanced decoding.
+struct WebPDecoderConfig {
+    WebPBitstreamFeatures input; // Immutable bitstream features (optional)
+    WebPDecBuffer output;        // Output buffer (can point to external mem)
+    WebPDecoderOptions options;  // Decoding options
+};
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);
+
+// Initialize the configuration as empty. This function must always be
+// called first, unless WebPGetFeatures() is to be called.
+// Returns false in case of mismatched version.
+static WEBP_INLINE int WebPInitDecoderConfig(WebPDecoderConfig* config) {
+    return WebPInitDecoderConfigInternal(config, WEBP_DECODER_ABI_VERSION);
+}
+
+// Instantiate a new incremental decoder object with the requested
+// configuration. The bitstream can be passed using 'data' and 'data_size'
+// parameter, in which case the features will be parsed and stored into
+// config->input. Otherwise, 'data' can be NULL and no parsing will occur.
+// Note that 'config' can be NULL too, in which case a default configuration
+// is used.
+// The return WebPIDecoder object must always be deleted calling WebPIDelete().
+// Returns NULL in case of error (and config->status will then reflect
+// the error condition).
+WEBP_EXTERN(WebPIDecoder*) WebPIDecode(const uint8_t* data, size_t data_size, WebPDecoderConfig* config);
+
+// Non-incremental version. This version decodes the full data at once, taking
+// 'config' into account. Returns decoding status (which should be VP8_STATUS_OK
+// if the decoding was successful).
+WEBP_EXTERN(VP8StatusCode) WebPDecode(const uint8_t* data, size_t data_size, WebPDecoderConfig* config);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_WEBP_DECODE_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/webp/demux.h b/codec/L2/demos/webpEnc/host/src/webp/demux.h
new file mode 100644
index 0000000000..dda4b5cd4c
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/webp/demux.h
@@ -0,0 +1,345 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Demux API.
+// Enables extraction of image and extended format data from WebP files.
+
+// Code Example: Demuxing WebP data to extract all the frames, ICC profile
+// and EXIF/XMP metadata.
+/*
+  WebPDemuxer* demux = WebPDemux(&webp_data);
+
+  uint32_t width = WebPDemuxGetI(demux, WEBP_FF_CANVAS_WIDTH);
+  uint32_t height = WebPDemuxGetI(demux, WEBP_FF_CANVAS_HEIGHT);
+  // ... (Get information about the features present in the WebP file).
+  uint32_t flags = WebPDemuxGetI(demux, WEBP_FF_FORMAT_FLAGS);
+
+  // ... (Iterate over all frames).
+  WebPIterator iter;
+  if (WebPDemuxGetFrame(demux, 1, &iter)) {
+    do {
+      // ... (Consume 'iter'; e.g. Decode 'iter.fragment' with WebPDecode(),
+      // ... and get other frame properties like width, height, offsets etc.
+      // ... see 'struct WebPIterator' below for more info).
+    } while (WebPDemuxNextFrame(&iter));
+    WebPDemuxReleaseIterator(&iter);
+  }
+
+  // ... (Extract metadata).
+  WebPChunkIterator chunk_iter;
+  if (flags & ICCP_FLAG) WebPDemuxGetChunk(demux, "ICCP", 1, &chunk_iter);
+  // ... (Consume the ICC profile in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & EXIF_FLAG) WebPDemuxGetChunk(demux, "EXIF", 1, &chunk_iter);
+  // ... (Consume the EXIF metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & XMP_FLAG) WebPDemuxGetChunk(demux, "XMP ", 1, &chunk_iter);
+  // ... (Consume the XMP metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  WebPDemuxDelete(demux);
+*/
+
+#ifndef WEBP_WEBP_DEMUX_H_
+#define WEBP_WEBP_DEMUX_H_
+
+#include "./decode.h" // for WEBP_CSP_MODE
+#include "./mux_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBP_DEMUX_ABI_VERSION 0x0107 // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPDemuxState WebPDemuxState;
+// typedef enum WebPFormatFeature WebPFormatFeature;
+typedef struct WebPDemuxer WebPDemuxer;
+typedef struct WebPIterator WebPIterator;
+typedef struct WebPChunkIterator WebPChunkIterator;
+typedef struct WebPAnimInfo WebPAnimInfo;
+typedef struct WebPAnimDecoderOptions WebPAnimDecoderOptions;
+
+//------------------------------------------------------------------------------
+
+// Returns the version number of the demux library, packed in hexadecimal using
+// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetDemuxVersion(void);
+
+//------------------------------------------------------------------------------
+// Life of a Demux object
+
+typedef enum WebPDemuxState {
+    WEBP_DEMUX_PARSE_ERROR = -1,   // An error occurred while parsing.
+    WEBP_DEMUX_PARSING_HEADER = 0, // Not enough data to parse full header.
+    WEBP_DEMUX_PARSED_HEADER = 1,  // Header parsing complete,
+                                   // data may be available.
+    WEBP_DEMUX_DONE = 2            // Entire file has been parsed.
+} WebPDemuxState;
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(const WebPData*, int, WebPDemuxState*, int);
+
+// Parses the full WebP file given by 'data'. For single images the WebP file
+// header alone or the file header and the chunk header may be absent.
+// Returns a WebPDemuxer object on successful parse, NULL otherwise.
+static WEBP_INLINE WebPDemuxer* WebPDemux(const WebPData* data) {
+    return WebPDemuxInternal(data, 0, NULL, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Parses the possibly incomplete WebP file given by 'data'.
+// If 'state' is non-NULL it will be set to indicate the status of the demuxer.
+// Returns NULL in case of error or if there isn't enough data to start parsing;
+// and a WebPDemuxer object on successful parse.
+// Note that WebPDemuxer keeps internal pointers to 'data' memory segment.
+// If this data is volatile, the demuxer object should be deleted (by calling
+// WebPDemuxDelete()) and WebPDemuxPartial() called again on the new data.
+// This is usually an inexpensive operation.
+static WEBP_INLINE WebPDemuxer* WebPDemuxPartial(const WebPData* data, WebPDemuxState* state) {
+    return WebPDemuxInternal(data, 1, state, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Frees memory associated with 'dmux'.
+WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
+
+//------------------------------------------------------------------------------
+// Data/information extraction.
+
+typedef enum WebPFormatFeature {
+    WEBP_FF_FORMAT_FLAGS, // Extended format flags present in the 'VP8X' chunk.
+    WEBP_FF_CANVAS_WIDTH,
+    WEBP_FF_CANVAS_HEIGHT,
+    WEBP_FF_LOOP_COUNT,
+    WEBP_FF_BACKGROUND_COLOR,
+    WEBP_FF_FRAME_COUNT // Number of frames present in the demux object.
+                        // In case of a partial demux, this is the number of
+                        // frames seen so far, with the last frame possibly
+                        // being partial.
+} WebPFormatFeature;
+
+// Get the 'feature' value from the 'dmux'.
+// NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
+// returned a state > WEBP_DEMUX_PARSING_HEADER.
+WEBP_EXTERN(uint32_t) WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature);
+
+//------------------------------------------------------------------------------
+// Frame iteration.
+
+struct WebPIterator {
+    int frame_num;
+    int num_frames;                    // equivalent to WEBP_FF_FRAME_COUNT.
+    int x_offset, y_offset;            // offset relative to the canvas.
+    int width, height;                 // dimensions of this frame.
+    int duration;                      // display duration in milliseconds.
+    WebPMuxAnimDispose dispose_method; // dispose method for the frame.
+    int complete;                      // true if 'fragment' contains a full frame. partial images
+                                       // may still be decoded with the WebP incremental decoder.
+    WebPData fragment;                 // The frame given by 'frame_num'. Note for historical
+                                       // reasons this is called a fragment.
+    int has_alpha;                     // True if the frame contains transparency.
+    WebPMuxAnimBlend blend_method;     // Blend operation for the frame.
+
+    uint32_t pad[2]; // padding for later use.
+    void* private_;  // for internal use only.
+};
+
+// Retrieves frame 'frame_number' from 'dmux'.
+// 'iter->fragment' points to the frame on return from this function.
+// Setting 'frame_number' equal to 0 will return the last frame of the image.
+// Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
+// Call WebPDemuxReleaseIterator() when use of the iterator is complete.
+// NOTE: 'dmux' must persist for the lifetime of 'iter'.
+WEBP_EXTERN(int) WebPDemuxGetFrame(const WebPDemuxer* dmux, int frame_number, WebPIterator* iter);
+
+// Sets 'iter->fragment' to point to the next ('iter->frame_num' + 1) or
+// previous ('iter->frame_num' - 1) frame. These functions do not loop.
+// Returns true on success, false otherwise.
+WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter);
+WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter);
+
+// Releases any memory associated with 'iter'.
+// Must be called before any subsequent calls to WebPDemuxGetChunk() on the same
+// iter. Also, must be called before destroying the associated WebPDemuxer with
+// WebPDemuxDelete().
+WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter);
+
+//------------------------------------------------------------------------------
+// Chunk iteration.
+
+struct WebPChunkIterator {
+    // The current and total number of chunks with the fourcc given to
+    // WebPDemuxGetChunk().
+    int chunk_num;
+    int num_chunks;
+    WebPData chunk; // The payload of the chunk.
+
+    uint32_t pad[6]; // padding for later use
+    void* private_;
+};
+
+// Retrieves the 'chunk_number' instance of the chunk with id 'fourcc' from
+// 'dmux'.
+// 'fourcc' is a character array containing the fourcc of the chunk to return,
+// e.g., "ICCP", "XMP ", "EXIF", etc.
+// Setting 'chunk_number' equal to 0 will return the last chunk in a set.
+// Returns true if the chunk is found, false otherwise. Image related chunk
+// payloads are accessed through WebPDemuxGetFrame() and related functions.
+// Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
+// NOTE: 'dmux' must persist for the lifetime of the iterator.
+WEBP_EXTERN(int)
+WebPDemuxGetChunk(const WebPDemuxer* dmux, const char fourcc[4], int chunk_number, WebPChunkIterator* iter);
+
+// Sets 'iter->chunk' to point to the next ('iter->chunk_num' + 1) or previous
+// ('iter->chunk_num' - 1) chunk. These functions do not loop.
+// Returns true on success, false otherwise.
+WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter);
+WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter);
+
+// Releases any memory associated with 'iter'.
+// Must be called before destroying the associated WebPDemuxer with
+// WebPDemuxDelete().
+WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
+
+//------------------------------------------------------------------------------
+// WebPAnimDecoder API
+//
+// This API allows decoding (possibly) animated WebP images.
+//
+// Code Example:
+/*
+  WebPAnimDecoderOptions dec_options;
+  WebPAnimDecoderOptionsInit(&dec_options);
+  // Tune 'dec_options' as needed.
+  WebPAnimDecoder* dec = WebPAnimDecoderNew(webp_data, &dec_options);
+  WebPAnimInfo anim_info;
+  WebPAnimDecoderGetInfo(dec, &anim_info);
+  for (uint32_t i = 0; i < anim_info.loop_count; ++i) {
+    while (WebPAnimDecoderHasMoreFrames(dec)) {
+      uint8_t* buf;
+      int timestamp;
+      WebPAnimDecoderGetNext(dec, &buf, &timestamp);
+      // ... (Render 'buf' based on 'timestamp').
+      // ... (Do NOT free 'buf', as it is owned by 'dec').
+    }
+    WebPAnimDecoderReset(dec);
+  }
+  const WebPDemuxer* demuxer = WebPAnimDecoderGetDemuxer(dec);
+  // ... (Do something using 'demuxer'; e.g. get EXIF/XMP/ICC data).
+  WebPAnimDecoderDelete(dec);
+*/
+
+typedef struct WebPAnimDecoder WebPAnimDecoder; // Main opaque object.
+
+// Global options.
+struct WebPAnimDecoderOptions {
+    // Output colorspace. Only the following modes are supported:
+    // MODE_RGBA, MODE_BGRA, MODE_rgbA and MODE_bgrA.
+    WEBP_CSP_MODE color_mode;
+    int use_threads;     // If true, use multi-threaded decoding.
+    uint32_t padding[7]; // Padding for later use.
+};
+
+// Internal, version-checked, entry point.
+WEBP_EXTERN(int) WebPAnimDecoderOptionsInitInternal(WebPAnimDecoderOptions*, int);
+
+// Should always be called, to initialize a fresh WebPAnimDecoderOptions
+// structure before modification. Returns false in case of version mismatch.
+// WebPAnimDecoderOptionsInit() must have succeeded before using the
+// 'dec_options' object.
+static WEBP_INLINE int WebPAnimDecoderOptionsInit(WebPAnimDecoderOptions* dec_options) {
+    return WebPAnimDecoderOptionsInitInternal(dec_options, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Internal, version-checked, entry point.
+WEBP_EXTERN(WebPAnimDecoder*) WebPAnimDecoderNewInternal(const WebPData*, const WebPAnimDecoderOptions*, int);
+
+// Creates and initializes a WebPAnimDecoder object.
+// Parameters:
+//   webp_data - (in) WebP bitstream. This should remain unchanged during the
+//                    lifetime of the output WebPAnimDecoder object.
+//   dec_options - (in) decoding options. Can be passed NULL to choose
+//                      reasonable defaults (in particular, color mode MODE_RGBA
+//                      will be picked).
+// Returns:
+//   A pointer to the newly created WebPAnimDecoder object, or NULL in case of
+//   parsing error, invalid option or memory error.
+static WEBP_INLINE WebPAnimDecoder* WebPAnimDecoderNew(const WebPData* webp_data,
+                                                       const WebPAnimDecoderOptions* dec_options) {
+    return WebPAnimDecoderNewInternal(webp_data, dec_options, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Global information about the animation..
+struct WebPAnimInfo {
+    uint32_t canvas_width;
+    uint32_t canvas_height;
+    uint32_t loop_count;
+    uint32_t bgcolor;
+    uint32_t frame_count;
+    uint32_t pad[4]; // padding for later use
+};
+
+// Get global information about the animation.
+// Parameters:
+//   dec - (in) decoder instance to get information from.
+//   info - (out) global information fetched from the animation.
+// Returns:
+//   True on success.
+WEBP_EXTERN(int) WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec, WebPAnimInfo* info);
+
+// Fetch the next frame from 'dec' based on options supplied to
+// WebPAnimDecoderNew(). This will be a fully reconstructed canvas of size
+// 'canvas_width * 4 * canvas_height', and not just the frame sub-rectangle. The
+// returned buffer 'buf' is valid only until the next call to
+// WebPAnimDecoderGetNext(), WebPAnimDecoderReset() or WebPAnimDecoderDelete().
+// Parameters:
+//   dec - (in/out) decoder instance from which the next frame is to be fetched.
+//   buf - (out) decoded frame.
+//   timestamp - (out) timestamp of the frame in milliseconds.
+// Returns:
+//   False if any of the arguments are NULL, or if there is a parsing or
+//   decoding error, or if there are no more frames. Otherwise, returns true.
+WEBP_EXTERN(int) WebPAnimDecoderGetNext(WebPAnimDecoder* dec, uint8_t** buf, int* timestamp);
+
+// Check if there are more frames left to decode.
+// Parameters:
+//   dec - (in) decoder instance to be checked.
+// Returns:
+//   True if 'dec' is not NULL and some frames are yet to be decoded.
+//   Otherwise, returns false.
+WEBP_EXTERN(int) WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec);
+
+// Resets the WebPAnimDecoder object, so that next call to
+// WebPAnimDecoderGetNext() will restart decoding from 1st frame. This would be
+// helpful when all frames need to be decoded multiple times (e.g.
+// info.loop_count times) without destroying and recreating the 'dec' object.
+// Parameters:
+//   dec - (in/out) decoder instance to be reset
+WEBP_EXTERN(void) WebPAnimDecoderReset(WebPAnimDecoder* dec);
+
+// Grab the internal demuxer object.
+// Getting the demuxer object can be useful if one wants to use operations only
+// available through demuxer; e.g. to get XMP/EXIF/ICC metadata. The returned
+// demuxer object is owned by 'dec' and is valid only until the next call to
+// WebPAnimDecoderDelete().
+//
+// Parameters:
+//   dec - (in) decoder instance from which the demuxer object is to be fetched.
+WEBP_EXTERN(const WebPDemuxer*) WebPAnimDecoderGetDemuxer(const WebPAnimDecoder* dec);
+
+// Deletes the WebPAnimDecoder object.
+// Parameters:
+//   dec - (in/out) decoder instance to be deleted
+WEBP_EXTERN(void) WebPAnimDecoderDelete(WebPAnimDecoder* dec);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_WEBP_DEMUX_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/webp/encode.h b/codec/L2/demos/webpEnc/host/src/webp/encode.h
new file mode 100644
index 0000000000..c19608de13
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/webp/encode.h
@@ -0,0 +1,506 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   WebP encoder: main interface
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_ENCODE_H_
+#define WEBP_WEBP_ENCODE_H_
+
+#include "./types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBP_ENCODER_ABI_VERSION 0x0209 // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPImageHint WebPImageHint;
+// typedef enum WebPEncCSP WebPEncCSP;
+// typedef enum WebPPreset WebPPreset;
+// typedef enum WebPEncodingError WebPEncodingError;
+typedef struct WebPConfig WebPConfig;
+typedef struct WebPPicture WebPPicture; // main structure for I/O
+typedef struct WebPAuxStats WebPAuxStats;
+typedef struct WebPMemoryWriter WebPMemoryWriter;
+
+// Return the encoder's version number, packed in hexadecimal using 8bits for
+// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetEncoderVersion(void);
+
+//------------------------------------------------------------------------------
+// One-stop-shop call! No questions asked:
+
+// Returns the size of the compressed data (pointed to by *output), or 0 if
+// an error occurred. The compressed data must be released by the caller
+// using the call 'WebPFree(*output)'.
+// These functions compress using the lossy format, and the quality_factor
+// can go from 0 (smaller output, lower quality) to 100 (best quality,
+// larger output).
+WEBP_EXTERN(size_t)
+WebPEncodeRGB(const uint8_t* rgb, int width, int height, int stride, float quality_factor, uint8_t** output);
+WEBP_EXTERN(size_t)
+WebPEncodeBGR(const uint8_t* bgr, int width, int height, int stride, float quality_factor, uint8_t** output);
+WEBP_EXTERN(size_t)
+WebPEncodeRGBA(const uint8_t* rgba, int width, int height, int stride, float quality_factor, uint8_t** output);
+WEBP_EXTERN(size_t)
+WebPEncodeBGRA(const uint8_t* bgra, int width, int height, int stride, float quality_factor, uint8_t** output);
+
+// These functions are the equivalent of the above, but compressing in a
+// lossless manner. Files are usually larger than lossy format, but will
+// not suffer any compression loss.
+WEBP_EXTERN(size_t) WebPEncodeLosslessRGB(const uint8_t* rgb, int width, int height, int stride, uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessBGR(const uint8_t* bgr, int width, int height, int stride, uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessRGBA(const uint8_t* rgba, int width, int height, int stride, uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra, int width, int height, int stride, uint8_t** output);
+
+// Releases memory returned by the WebPEncode*() functions above.
+WEBP_EXTERN(void) WebPFree(void* ptr);
+
+//------------------------------------------------------------------------------
+// Coding parameters
+
+// Image characteristics hint for the underlying encoder.
+typedef enum WebPImageHint {
+    WEBP_HINT_DEFAULT = 0, // default preset.
+    WEBP_HINT_PICTURE,     // digital picture, like portrait, inner shot
+    WEBP_HINT_PHOTO,       // outdoor photograph, with natural lighting
+    WEBP_HINT_GRAPH,       // Discrete tone image (graph, map-tile etc).
+    WEBP_HINT_LAST
+} WebPImageHint;
+
+// Compression parameters.
+struct WebPConfig {
+    int lossless;  // Lossless encoding (0=lossy(default), 1=lossless).
+    float quality; // between 0 (smallest file) and 100 (biggest)
+    int method;    // quality/speed trade-off (0=fast, 6=slower-better)
+
+    WebPImageHint image_hint; // Hint for image type (lossless only for now).
+
+    // Parameters related to lossy compression only:
+    int target_size;       // if non-zero, set the desired target size in bytes.
+                           // Takes precedence over the 'compression' parameter.
+    float target_PSNR;     // if non-zero, specifies the minimal distortion to
+                           // try to achieve. Takes precedence over target_size.
+    int segments;          // maximum number of segments to use, in [1..4]
+    int sns_strength;      // Spatial Noise Shaping. 0=off, 100=maximum.
+    int filter_strength;   // range: [0 = off .. 100 = strongest]
+    int filter_sharpness;  // range: [0 = off .. 7 = least sharp]
+    int filter_type;       // filtering type: 0 = simple, 1 = strong (only used
+                           // if filter_strength > 0 or autofilter > 0)
+    int autofilter;        // Auto adjust filter's strength [0 = off, 1 = on]
+    int alpha_compression; // Algorithm for encoding the alpha plane (0 = none,
+                           // 1 = compressed with WebP lossless). Default is 1.
+    int alpha_filtering;   // Predictive filtering method for alpha plane.
+                           //  0: none, 1: fast, 2: best. Default if 1.
+    int alpha_quality;     // Between 0 (smallest size) and 100 (lossless).
+                           // Default is 100.
+    int pass;              // number of entropy-analysis passes (in [1..10]).
+
+    int show_compressed;   // if true, export the compressed picture back.
+                           // In-loop filtering is not applied.
+    int preprocessing;     // preprocessing filter:
+                           // 0=none, 1=segment-smooth, 2=pseudo-random dithering
+    int partitions;        // log2(number of token partitions) in [0..3]. Default
+                           // is set to 0 for easier progressive decoding.
+    int partition_limit;   // quality degradation allowed to fit the 512k limit
+                           // on prediction modes coding (0: no degradation,
+                           // 100: maximum possible degradation).
+    int emulate_jpeg_size; // If true, compression parameters will be remapped
+                           // to better match the expected output size from
+                           // JPEG compression. Generally, the output size will
+                           // be similar but the degradation will be lower.
+    int thread_level;      // If non-zero, try and use multi-threaded encoding.
+    int low_memory;        // If set, reduce memory usage (but increase CPU use).
+
+    int near_lossless; // Near lossless encoding [0 = off(default) .. 100].
+                       // This feature is experimental.
+    int exact;         // if non-zero, preserve the exact RGB values under
+                       // transparent area. Otherwise, discard this invisible
+                       // RGB information for better compression. The default
+                       // value is 0.
+    int use_ocl;       // Use opencl
+
+    int thread_number; // multi-threaded encoding
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    int delta_palettization;
+    uint32_t pad[2]; // padding for later use
+#else
+    uint32_t pad[3]; // padding for later use
+#endif // WEBP_EXPERIMENTAL_FEATURES
+};
+
+// Enumerate some predefined settings for WebPConfig, depending on the type
+// of source picture. These presets are used when calling WebPConfigPreset().
+typedef enum WebPPreset {
+    WEBP_PRESET_DEFAULT = 0, // default preset.
+    WEBP_PRESET_PICTURE,     // digital picture, like portrait, inner shot
+    WEBP_PRESET_PHOTO,       // outdoor photograph, with natural lighting
+    WEBP_PRESET_DRAWING,     // hand or line drawing, with high-contrast details
+    WEBP_PRESET_ICON,        // small-sized colorful images
+    WEBP_PRESET_TEXT         // text-like
+} WebPPreset;
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
+
+// Should always be called, to initialize a fresh WebPConfig structure before
+// modification. Returns false in case of version mismatch. WebPConfigInit()
+// must have succeeded before using the 'config' object.
+// Note that the default values are lossless=0 and quality=75.
+static WEBP_INLINE int WebPConfigInit(WebPConfig* config) {
+    return WebPConfigInitInternal(config, WEBP_PRESET_DEFAULT, 75.f, WEBP_ENCODER_ABI_VERSION);
+}
+
+// This function will initialize the configuration according to a predefined
+// set of parameters (referred to by 'preset') and a given quality factor.
+// This function can be called as a replacement to WebPConfigInit(). Will
+// return false in case of error.
+static WEBP_INLINE int WebPConfigPreset(WebPConfig* config, WebPPreset preset, float quality) {
+    return WebPConfigInitInternal(config, preset, quality, WEBP_ENCODER_ABI_VERSION);
+}
+
+// Activate the lossless compression mode with the desired efficiency level
+// between 0 (fastest, lowest compression) and 9 (slower, best compression).
+// A good default level is '6', providing a fair tradeoff between compression
+// speed and final compressed size.
+// This function will overwrite several fields from config: 'method', 'quality'
+// and 'lossless'. Returns false in case of parameter error.
+WEBP_EXTERN(int) WebPConfigLosslessPreset(WebPConfig* config, int level);
+
+// Returns true if 'config' is non-NULL and all configuration parameters are
+// within their valid ranges.
+WEBP_EXTERN(int) WebPValidateConfig(const WebPConfig* config);
+
+//------------------------------------------------------------------------------
+// Input / Output
+// Structure for storing auxiliary statistics (mostly for lossy encoding).
+
+struct WebPAuxStats {
+    int coded_size; // final size
+
+    float PSNR[5];            // peak-signal-to-noise ratio for Y/U/V/All/Alpha
+    int block_count[3];       // number of intra4/intra16/skipped macroblocks
+    int header_bytes[2];      // approximate number of bytes spent for header
+                              // and mode-partition #0
+    int residual_bytes[3][4]; // approximate number of bytes spent for
+                              // DC/AC/uv coefficients for each (0..3) segments.
+    int segment_size[4];      // number of macroblocks in each segments
+    int segment_quant[4];     // quantizer values for each segments
+    int segment_level[4];     // filtering strength for each segments [0..63]
+
+    int alpha_data_size; // size of the transparency data
+    int layer_data_size; // size of the enhancement layer data
+
+    // lossless encoder statistics
+    uint32_t lossless_features; // bit0:predictor bit1:cross-color transform
+                                // bit2:subtract-green bit3:color indexing
+    int histogram_bits;         // number of precision bits of histogram
+    int transform_bits;         // precision bits for transform
+    int cache_bits;             // number of bits for color cache lookup
+    int palette_size;           // number of color in palette, if used
+    int lossless_size;          // final lossless size
+    int lossless_hdr_size;      // lossless header (transform, huffman etc) size
+    int lossless_data_size;     // lossless image data size
+
+    uint32_t pad[2]; // padding for later use
+};
+
+// Signature for output function. Should return true if writing was successful.
+// data/data_size is the segment of data to write, and 'picture' is for
+// reference (and so one can make use of picture->custom_ptr).
+typedef int (*WebPWriterFunction)(const uint8_t* data, size_t data_size, const WebPPicture* picture);
+
+// WebPMemoryWrite: a special WebPWriterFunction that writes to memory using
+// the following WebPMemoryWriter object (to be set as a custom_ptr).
+struct WebPMemoryWriter {
+    uint8_t* mem;    // final buffer (of size 'max_size', larger than 'size').
+    size_t size;     // final size
+    size_t max_size; // total capacity
+    uint32_t pad[1]; // padding for later use
+};
+
+// The following must be called first before any use.
+WEBP_EXTERN(void) WebPMemoryWriterInit(WebPMemoryWriter* writer);
+
+// The following must be called to deallocate writer->mem memory. The 'writer'
+// object itself is not deallocated.
+WEBP_EXTERN(void) WebPMemoryWriterClear(WebPMemoryWriter* writer);
+// The custom writer to be used with WebPMemoryWriter as custom_ptr. Upon
+// completion, writer.mem and writer.size will hold the coded data.
+// writer.mem must be freed by calling WebPMemoryWriterClear.
+WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size, const WebPPicture* picture);
+
+// Progress hook, called from time to time to report progress. It can return
+// false to request an abort of the encoding process, or true otherwise if
+// everything is OK.
+typedef int (*WebPProgressHook)(int percent, const WebPPicture* picture);
+
+// Color spaces.
+typedef enum WebPEncCSP {
+    // chroma sampling
+    WEBP_YUV420 = 0,       // 4:2:0
+    WEBP_YUV420A = 4,      // alpha channel variant
+    WEBP_CSP_UV_MASK = 3,  // bit-mask to get the UV sampling factors
+    WEBP_CSP_ALPHA_BIT = 4 // bit that is set if alpha is present
+} WebPEncCSP;
+
+// Encoding error conditions.
+typedef enum WebPEncodingError {
+    VP8_ENC_OK = 0,
+    VP8_ENC_ERROR_OUT_OF_MEMORY,           // memory error allocating objects
+    VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY, // memory error while flushing bits
+    VP8_ENC_ERROR_NULL_PARAMETER,          // a pointer parameter is NULL
+    VP8_ENC_ERROR_INVALID_CONFIGURATION,   // configuration is invalid
+    VP8_ENC_ERROR_BAD_DIMENSION,           // picture has invalid width/height
+    VP8_ENC_ERROR_PARTITION0_OVERFLOW,     // partition is bigger than 512k
+    VP8_ENC_ERROR_PARTITION_OVERFLOW,      // partition is bigger than 16M
+    VP8_ENC_ERROR_BAD_WRITE,               // error while flushing bytes
+    VP8_ENC_ERROR_FILE_TOO_BIG,            // file is bigger than 4G
+    VP8_ENC_ERROR_USER_ABORT,              // abort request by user
+    VP8_ENC_ERROR_OCL_FAILED,              // under ocl method only
+    VP8_ENC_ERROR_LAST                     // list terminator. always last.
+} WebPEncodingError;
+
+// maximum width/height allowed (inclusive), in pixels
+#define WEBP_MAX_DIMENSION 16383
+
+// Main exchange structure (input samples, output bytes, statistics)
+struct WebPPicture {
+    //   INPUT
+    //////////////
+    // Main flag for encoder selecting between ARGB or YUV input.
+    // It is recommended to use ARGB input (*argb, argb_stride) for lossless
+    // compression, and YUV input (*y, *u, *v, etc.) for lossy compression
+    // since these are the respective native colorspace for these formats.
+    int use_argb;
+
+    // YUV input (mostly used for input to lossy compression)
+    WebPEncCSP colorspace;   // colorspace: should be YUV420 for now (=Y'CbCr).
+    int width, height;       // dimensions (less or equal to WEBP_MAX_DIMENSION)
+    uint8_t *y, *u, *v;      // pointers to luma/chroma planes.
+    int y_stride, uv_stride; // luma/chroma strides.
+    uint8_t* a;              // pointer to the alpha plane
+    int a_stride;            // stride of the alpha plane
+    uint32_t pad1[2];        // padding for later use
+
+    // ARGB input (mostly used for input to lossless compression)
+    uint32_t* argb;   // Pointer to argb (32 bit) plane.
+    int argb_stride;  // This is stride in pixels units, not bytes.
+    uint32_t pad2[3]; // padding for later use
+
+    //   OUTPUT
+    ///////////////
+    // Byte-emission hook, to store compressed bytes as they are ready.
+    WebPWriterFunction writer; // can be NULL
+    void* custom_ptr;          // can be used by the writer.
+    char custom_ptr_name[100];
+
+    // map for extra information (only for lossy compression mode)
+    int extra_info_type; // 1: intra type, 2: segment, 3: quant
+                         // 4: intra-16 prediction mode,
+                         // 5: chroma prediction mode,
+                         // 6: bit cost, 7: distortion
+    uint8_t* extra_info; // if not NULL, points to an array of size
+                         // ((width + 15) / 16) * ((height + 15) / 16) that
+                         // will be filled with a macroblock map, depending
+                         // on extra_info_type.
+
+    //   STATS AND REPORTS
+    ///////////////////////////
+    // Pointer to side statistics (updated only if not NULL)
+    WebPAuxStats* stats;
+
+    // Error code for the latest error encountered during encoding
+    WebPEncodingError error_code;
+
+    // If not NULL, report progress during encoding.
+    WebPProgressHook progress_hook;
+
+    void* user_data; // this field is free to be set to any value and
+                     // used during callbacks (like progress-report e.g.).
+
+    uint32_t pad3[3]; // padding for later use
+
+    // Unused for now
+    uint8_t *pad4, *pad5;
+    uint32_t pad6[8]; // padding for later use
+
+    // PRIVATE FIELDS
+    ////////////////////
+    void* memory_;      // row chunk of memory for yuva planes
+    void* memory_argb_; // and for argb too.
+    void* pad7[2];      // padding for later use
+};
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPPictureInitInternal(WebPPicture*, int);
+
+// Should always be called, to initialize the structure. Returns false in case
+// of version mismatch. WebPPictureInit() must have succeeded before using the
+// 'picture' object.
+// Note that, by default, use_argb is false and colorspace is WEBP_YUV420.
+static WEBP_INLINE int WebPPictureInit(WebPPicture* picture) {
+    return WebPPictureInitInternal(picture, WEBP_ENCODER_ABI_VERSION);
+}
+
+//------------------------------------------------------------------------------
+// WebPPicture utils
+
+// Convenience allocation / deallocation based on picture->width/height:
+// Allocate y/u/v buffers as per colorspace/width/height specification.
+// Note! This function will free the previous buffer if needed.
+// Returns false in case of memory error.
+WEBP_EXTERN(int) WebPPictureAlloc(WebPPicture* picture);
+
+// Release the memory allocated by WebPPictureAlloc() or WebPPictureImport*().
+// Note that this function does _not_ free the memory used by the 'picture'
+// object itself.
+// Besides memory (which is reclaimed) all other fields of 'picture' are
+// preserved.
+WEBP_EXTERN(void) WebPPictureFree(WebPPicture* picture);
+
+// Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return, *dst
+// will fully own the copied pixels (this is not a view). The 'dst' picture need
+// not be initialized as its content is overwritten.
+// Returns false in case of memory allocation error.
+WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
+
+// Compute PSNR, SSIM or LSIM distortion metric between two pictures. Results
+// are in dB, stored in result[] in the Y/U/V/Alpha/All or B/G/R/A/All order.
+// Returns false in case of error (src and ref don't have same dimension, ...)
+// Warning: this function is rather CPU-intensive.
+WEBP_EXTERN(int)
+WebPPictureDistortion(const WebPPicture* src,
+                      const WebPPicture* ref,
+                      int metric_type, // 0 = PSNR, 1 = SSIM, 2 = LSIM
+                      float result[5]);
+
+// self-crops a picture to the rectangle defined by top/left/width/height.
+// Returns false in case of memory allocation error, or if the rectangle is
+// outside of the source picture.
+// The rectangle for the view is defined by the top-left corner pixel
+// coordinates (left, top) as well as its width and height. This rectangle
+// must be fully be comprised inside the 'src' source picture. If the source
+// picture uses the YUV420 colorspace, the top and left coordinates will be
+// snapped to even values.
+WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture, int left, int top, int width, int height);
+
+// Extracts a view from 'src' picture into 'dst'. The rectangle for the view
+// is defined by the top-left corner pixel coordinates (left, top) as well
+// as its width and height. This rectangle must be fully be comprised inside
+// the 'src' source picture. If the source picture uses the YUV420 colorspace,
+// the top and left coordinates will be snapped to even values.
+// Picture 'src' must out-live 'dst' picture. Self-extraction of view is allowed
+// ('src' equal to 'dst') as a mean of fast-cropping (but note that doing so,
+// the original dimension will be lost). Picture 'dst' need not be initialized
+// with WebPPictureInit() if it is different from 'src', since its content will
+// be overwritten.
+// Returns false in case of memory allocation error or invalid parameters.
+WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src, int left, int top, int width, int height, WebPPicture* dst);
+
+// Returns true if the 'picture' is actually a view and therefore does
+// not own the memory for pixels.
+WEBP_EXTERN(int) WebPPictureIsView(const WebPPicture* picture);
+
+// Rescale a picture to new dimension width x height.
+// If either 'width' or 'height' (but not both) is 0 the corresponding
+// dimension will be calculated preserving the aspect ratio.
+// No gamma correction is applied.
+// Returns false in case of error (invalid parameter or insufficient memory).
+WEBP_EXTERN(int) WebPPictureRescale(WebPPicture* pic, int width, int height);
+
+// Colorspace conversion function to import RGB samples.
+// Previous buffer will be free'd, if any.
+// *rgb buffer should have a size of at least height * rgb_stride.
+// Returns false in case of memory error.
+WEBP_EXTERN(int) WebPPictureImportRGB(WebPPicture* picture, const uint8_t* rgb, int rgb_stride);
+// Same, but for RGBA buffer.
+WEBP_EXTERN(int) WebPPictureImportRGBA(WebPPicture* picture, const uint8_t* rgba, int rgba_stride);
+// Same, but for RGBA buffer. Imports the RGB direct from the 32-bit format
+// input buffer ignoring the alpha channel. Avoids needing to copy the data
+// to a temporary 24-bit RGB buffer to import the RGB only.
+WEBP_EXTERN(int) WebPPictureImportRGBX(WebPPicture* picture, const uint8_t* rgbx, int rgbx_stride);
+
+// Variants of the above, but taking BGR(A|X) input.
+WEBP_EXTERN(int) WebPPictureImportBGR(WebPPicture* picture, const uint8_t* bgr, int bgr_stride);
+WEBP_EXTERN(int) WebPPictureImportBGRA(WebPPicture* picture, const uint8_t* bgra, int bgra_stride);
+WEBP_EXTERN(int) WebPPictureImportBGRX(WebPPicture* picture, const uint8_t* bgrx, int bgrx_stride);
+
+// Converts picture->argb data to the YUV420A format. The 'colorspace'
+// parameter is deprecated and should be equal to WEBP_YUV420.
+// Upon return, picture->use_argb is set to false. The presence of real
+// non-opaque transparent values is detected, and 'colorspace' will be
+// adjusted accordingly. Note that this method is lossy.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP /*colorspace = WEBP_YUV420*/);
+
+// Same as WebPPictureARGBToYUVA(), but the conversion is done using
+// pseudo-random dithering with a strength 'dithering' between
+// 0.0 (no dithering) and 1.0 (maximum dithering). This is useful
+// for photographic picture.
+WEBP_EXTERN(int) WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace, float dithering);
+
+// Performs 'smart' RGBA->YUVA420 downsampling and colorspace conversion.
+// Downsampling is handled with extra care in case of color clipping. This
+// method is roughly 2x slower than WebPPictureARGBToYUVA() but produces better
+// YUV representation.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureSmartARGBToYUVA(WebPPicture* picture);
+
+// Converts picture->yuv to picture->argb and sets picture->use_argb to true.
+// The input format must be YUV_420 or YUV_420A.
+// Note that the use of this method is discouraged if one has access to the
+// raw ARGB samples, since using YUV420 is comparatively lossy. Also, the
+// conversion from YUV420 to ARGB incurs a small loss too.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureYUVAToARGB(WebPPicture* picture);
+
+// Helper function: given a width x height plane of RGBA or YUV(A) samples
+// clean-up the YUV or RGB samples under fully transparent area, to help
+// compressibility (no guarantee, though).
+WEBP_EXTERN(void) WebPCleanupTransparentArea(WebPPicture* picture);
+
+// Scan the picture 'picture' for the presence of non fully opaque alpha values.
+// Returns true in such case. Otherwise returns false (indicating that the
+// alpha plane can be ignored altogether e.g.).
+WEBP_EXTERN(int) WebPPictureHasTransparency(const WebPPicture* picture);
+
+// Remove the transparency information (if present) by blending the color with
+// the background color 'background_rgb' (specified as 24bit RGB triplet).
+// After this call, all alpha values are reset to 0xff.
+WEBP_EXTERN(void) WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
+
+//------------------------------------------------------------------------------
+// Main call
+
+// Main encoding call, after config and picture have been initialized.
+// 'picture' must be less than 16384x16384 in dimension (cf WEBP_MAX_DIMENSION),
+// and the 'config' object must be a valid one.
+// Returns false in case of error, true otherwise.
+// In case of error, picture->error_code is updated accordingly.
+// 'picture' can hold the source samples in both YUV(A) or ARGB input, depending
+// on the value of 'picture->use_argb'. It is highly recommended to use
+// the former for lossy encoding, and the latter for lossless encoding
+// (when config.lossless is true). Automatic conversion from one format to
+// another is provided but they both incur some loss.
+// WEBP_EXTERN(int) WebPEncode(const WebPConfig* config, WebPPicture* picture);
+
+WEBP_EXTERN(int) WebPEncodeAsync(const int, const int, const WebPConfig* config, WebPPicture** picture);
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_WEBP_ENCODE_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/webp/extras.h b/codec/L2/demos/webpEnc/host/src/webp/extras.h
new file mode 100644
index 0000000000..989f310339
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/webp/extras.h
@@ -0,0 +1,51 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+
+#ifndef WEBP_WEBP_EXTRAS_H_
+#define WEBP_WEBP_EXTRAS_H_
+
+#include "./types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./encode.h"
+
+#define WEBP_EXTRAS_ABI_VERSION 0x0000 // MAJOR(8b) + MINOR(8b)
+
+//------------------------------------------------------------------------------
+
+// Returns the version number of the extras library, packed in hexadecimal using
+// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetExtrasVersion(void);
+
+//------------------------------------------------------------------------------
+// Ad-hoc colorspace importers.
+
+// Import luma sample (gray scale image) into 'picture'. The 'picture'
+// width and height must be set prior to calling this function.
+WEBP_EXTERN(int) WebPImportGray(const uint8_t* gray, WebPPicture* picture);
+
+// Import rgb sample in RGB565 packed format into 'picture'. The 'picture'
+// width and height must be set prior to calling this function.
+WEBP_EXTERN(int) WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);
+
+// Import rgb sample in RGB4444 packed format into 'picture'. The 'picture'
+// width and height must be set prior to calling this function.
+WEBP_EXTERN(int) WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_WEBP_EXTRAS_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/webp/format_constants.h b/codec/L2/demos/webpEnc/host/src/webp/format_constants.h
new file mode 100644
index 0000000000..f98f4c4107
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/webp/format_constants.h
@@ -0,0 +1,88 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Internal header for constants related to WebP file format.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_WEBP_FORMAT_CONSTANTS_H_
+#define WEBP_WEBP_FORMAT_CONSTANTS_H_
+
+// Create fourcc of the chunk from the chunk tag characters.
+#define MKFOURCC(a, b, c, d) ((a) | (b) << 8 | (c) << 16 | (uint32_t)(d) << 24)
+
+// VP8 related constants.
+#define VP8_SIGNATURE 0x9d012a            // Signature in VP8 data.
+#define VP8_MAX_PARTITION0_SIZE (1 << 19) // max size of mode partition
+#define VP8_MAX_PARTITION_SIZE (1 << 24)  // max size for token partition
+#define VP8_FRAME_HEADER_SIZE 10          // Size of the frame header within VP8 data.
+
+// VP8L related constants.
+#define VP8L_SIGNATURE_SIZE 1    // VP8L signature size.
+#define VP8L_MAGIC_BYTE 0x2f     // VP8L signature byte.
+#define VP8L_IMAGE_SIZE_BITS 14  // Number of bits used to store
+                                 // width and height.
+#define VP8L_VERSION_BITS 3      // 3 bits reserved for version.
+#define VP8L_VERSION 0           // version 0
+#define VP8L_FRAME_HEADER_SIZE 5 // Size of the VP8L frame header.
+
+#define MAX_PALETTE_SIZE 256
+#define MAX_CACHE_BITS 11
+#define HUFFMAN_CODES_PER_META_CODE 5
+#define ARGB_BLACK 0xff000000
+
+#define DEFAULT_CODE_LENGTH 8
+#define MAX_ALLOWED_CODE_LENGTH 15
+
+#define NUM_LITERAL_CODES 256
+#define NUM_LENGTH_CODES 24
+#define NUM_DISTANCE_CODES 40
+#define CODE_LENGTH_CODES 19
+
+#define MIN_HUFFMAN_BITS 2 // min number of Huffman bits
+#define MAX_HUFFMAN_BITS 9 // max number of Huffman bits
+
+#define TRANSFORM_PRESENT 1 // The bit to be written when next data
+                            // to be read is a transform.
+#define NUM_TRANSFORMS 4    // Maximum number of allowed transform
+                            // in a bitstream.
+typedef enum {
+    PREDICTOR_TRANSFORM = 0,
+    CROSS_COLOR_TRANSFORM = 1,
+    SUBTRACT_GREEN = 2,
+    COLOR_INDEXING_TRANSFORM = 3
+} VP8LImageTransformType;
+
+// Alpha related constants.
+#define ALPHA_HEADER_LEN 1
+#define ALPHA_NO_COMPRESSION 0
+#define ALPHA_LOSSLESS_COMPRESSION 1
+#define ALPHA_PREPROCESSED_LEVELS 1
+
+// Mux related constants.
+#define TAG_SIZE 4          // Size of a chunk tag (e.g. "VP8L").
+#define CHUNK_SIZE_BYTES 4  // Size needed to store chunk's size.
+#define CHUNK_HEADER_SIZE 8 // Size of a chunk header.
+#define RIFF_HEADER_SIZE 12 // Size of the RIFF header ("RIFFnnnnWEBP").
+#define ANMF_CHUNK_SIZE 16  // Size of an ANMF chunk.
+#define ANIM_CHUNK_SIZE 6   // Size of an ANIM chunk.
+#define FRGM_CHUNK_SIZE 6   // Size of a FRGM chunk.
+#define VP8X_CHUNK_SIZE 10  // Size of a VP8X chunk.
+
+#define MAX_CANVAS_SIZE (1 << 24)     // 24-bit max for VP8X width/height.
+#define MAX_IMAGE_AREA (1ULL << 32)   // 32-bit max for width x height.
+#define MAX_LOOP_COUNT (1 << 16)      // maximum value for loop-count
+#define MAX_DURATION (1 << 24)        // maximum duration
+#define MAX_POSITION_OFFSET (1 << 24) // maximum frame/fragment x/y offset
+
+// Maximum chunk payload is such that adding the header and padding won't
+// overflow a uint32_t.
+#define MAX_CHUNK_PAYLOAD (~0U - CHUNK_HEADER_SIZE - 1)
+
+#endif /* WEBP_WEBP_FORMAT_CONSTANTS_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/webp/mux.h b/codec/L2/demos/webpEnc/host/src/webp/mux.h
new file mode 100644
index 0000000000..5f4a86c323
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/webp/mux.h
@@ -0,0 +1,511 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  RIFF container manipulation and encoding for WebP images.
+//
+// Authors: Urvang (urvang@google.com)
+//          Vikas (vikasa@google.com)
+
+#ifndef WEBP_WEBP_MUX_H_
+#define WEBP_WEBP_MUX_H_
+
+#include "./mux_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBP_MUX_ABI_VERSION 0x0106 // MAJOR(8b) + MINOR(8b)
+
+//------------------------------------------------------------------------------
+// Mux API
+//
+// This API allows manipulation of WebP container images containing features
+// like color profile, metadata, animation and fragmented images.
+//
+// Code Example#1: Create a WebPMux object with image data, color profile and
+// XMP metadata.
+/*
+  int copy_data = 0;
+  WebPMux* mux = WebPMuxNew();
+  // ... (Prepare image data).
+  WebPMuxSetImage(mux, &image, copy_data);
+  // ... (Prepare ICCP color profile data).
+  WebPMuxSetChunk(mux, "ICCP", &icc_profile, copy_data);
+  // ... (Prepare XMP metadata).
+  WebPMuxSetChunk(mux, "XMP ", &xmp, copy_data);
+  // Get data from mux in WebP RIFF format.
+  WebPMuxAssemble(mux, &output_data);
+  WebPMuxDelete(mux);
+  // ... (Consume output_data; e.g. write output_data.bytes to file).
+  WebPDataClear(&output_data);
+*/
+
+// Code Example#2: Get image and color profile data from a WebP file.
+/*
+  int copy_data = 0;
+  // ... (Read data from file).
+  WebPMux* mux = WebPMuxCreate(&data, copy_data);
+  WebPMuxGetFrame(mux, 1, &image);
+  // ... (Consume image; e.g. call WebPDecode() to decode the data).
+  WebPMuxGetChunk(mux, "ICCP", &icc_profile);
+  // ... (Consume icc_data).
+  WebPMuxDelete(mux);
+  free(data);
+*/
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPMuxError WebPMuxError;
+// typedef enum WebPChunkId WebPChunkId;
+typedef struct WebPMux WebPMux; // main opaque object.
+typedef struct WebPMuxFrameInfo WebPMuxFrameInfo;
+typedef struct WebPMuxAnimParams WebPMuxAnimParams;
+typedef struct WebPAnimEncoderOptions WebPAnimEncoderOptions;
+
+// Error codes
+typedef enum WebPMuxError {
+    WEBP_MUX_OK = 1,
+    WEBP_MUX_NOT_FOUND = 0,
+    WEBP_MUX_INVALID_ARGUMENT = -1,
+    WEBP_MUX_BAD_DATA = -2,
+    WEBP_MUX_MEMORY_ERROR = -3,
+    WEBP_MUX_NOT_ENOUGH_DATA = -4
+} WebPMuxError;
+
+// IDs for different types of chunks.
+typedef enum WebPChunkId {
+    WEBP_CHUNK_VP8X,    // VP8X
+    WEBP_CHUNK_ICCP,    // ICCP
+    WEBP_CHUNK_ANIM,    // ANIM
+    WEBP_CHUNK_ANMF,    // ANMF
+    WEBP_CHUNK_FRGM,    // FRGM
+    WEBP_CHUNK_ALPHA,   // ALPH
+    WEBP_CHUNK_IMAGE,   // VP8/VP8L
+    WEBP_CHUNK_EXIF,    // EXIF
+    WEBP_CHUNK_XMP,     // XMP
+    WEBP_CHUNK_UNKNOWN, // Other chunks.
+    WEBP_CHUNK_NIL
+} WebPChunkId;
+
+//------------------------------------------------------------------------------
+
+// Returns the version number of the mux library, packed in hexadecimal using
+// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetMuxVersion(void);
+
+//------------------------------------------------------------------------------
+// Life of a Mux object
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPMux*) WebPNewInternal(int);
+
+// Creates an empty mux object.
+// Returns:
+//   A pointer to the newly created empty mux object.
+//   Or NULL in case of memory error.
+static WEBP_INLINE WebPMux* WebPMuxNew(void) {
+    return WebPNewInternal(WEBP_MUX_ABI_VERSION);
+}
+
+// Deletes the mux object.
+// Parameters:
+//   mux - (in/out) object to be deleted
+WEBP_EXTERN(void) WebPMuxDelete(WebPMux* mux);
+
+//------------------------------------------------------------------------------
+// Mux creation.
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPMux*) WebPMuxCreateInternal(const WebPData*, int, int);
+
+// Creates a mux object from raw data given in WebP RIFF format.
+// Parameters:
+//   bitstream - (in) the bitstream data in WebP RIFF format
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
+// Returns:
+//   A pointer to the mux object created from given data - on success.
+//   NULL - In case of invalid data or memory error.
+static WEBP_INLINE WebPMux* WebPMuxCreate(const WebPData* bitstream, int copy_data) {
+    return WebPMuxCreateInternal(bitstream, copy_data, WEBP_MUX_ABI_VERSION);
+}
+
+//------------------------------------------------------------------------------
+// Non-image chunks.
+
+// Note: Only non-image related chunks should be managed through chunk APIs.
+// (Image related chunks are: "ANMF", "FRGM", "VP8 ", "VP8L" and "ALPH").
+// To add, get and delete images, use WebPMuxSetImage(), WebPMuxPushFrame(),
+// WebPMuxGetFrame() and WebPMuxDeleteFrame().
+
+// Adds a chunk with id 'fourcc' and data 'chunk_data' in the mux object.
+// Any existing chunk(s) with the same id will be removed.
+// Parameters:
+//   mux - (in/out) object to which the chunk is to be added
+//   fourcc - (in) a character array containing the fourcc of the given chunk;
+//                 e.g., "ICCP", "XMP ", "EXIF" etc.
+//   chunk_data - (in) the chunk data to be added
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, fourcc or chunk_data is NULL
+//                               or if fourcc corresponds to an image chunk.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError)
+WebPMuxSetChunk(WebPMux* mux, const char fourcc[4], const WebPData* chunk_data, int copy_data);
+
+// Gets a reference to the data of the chunk with id 'fourcc' in the mux object.
+// The caller should NOT free the returned data.
+// Parameters:
+//   mux - (in) object from which the chunk data is to be fetched
+//   fourcc - (in) a character array containing the fourcc of the chunk;
+//                 e.g., "ICCP", "XMP ", "EXIF" etc.
+//   chunk_data - (out) returned chunk data
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, fourcc or chunk_data is NULL
+//                               or if fourcc corresponds to an image chunk.
+//   WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given id.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetChunk(const WebPMux* mux, const char fourcc[4], WebPData* chunk_data);
+
+// Deletes the chunk with the given 'fourcc' from the mux object.
+// Parameters:
+//   mux - (in/out) object from which the chunk is to be deleted
+//   fourcc - (in) a character array containing the fourcc of the chunk;
+//                 e.g., "ICCP", "XMP ", "EXIF" etc.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or fourcc is NULL
+//                               or if fourcc corresponds to an image chunk.
+//   WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given fourcc.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteChunk(WebPMux* mux, const char fourcc[4]);
+
+//------------------------------------------------------------------------------
+// Images.
+
+// Encapsulates data about a single frame/fragment.
+struct WebPMuxFrameInfo {
+    WebPData bitstream; // image data: can be a raw VP8/VP8L bitstream
+                        // or a single-image WebP file.
+    int x_offset;       // x-offset of the frame.
+    int y_offset;       // y-offset of the frame.
+    int duration;       // duration of the frame (in milliseconds).
+
+    WebPChunkId id;                    // frame type: should be one of WEBP_CHUNK_ANMF,
+                                       // WEBP_CHUNK_FRGM or WEBP_CHUNK_IMAGE
+    WebPMuxAnimDispose dispose_method; // Disposal method for the frame.
+    WebPMuxAnimBlend blend_method;     // Blend operation for the frame.
+    uint32_t pad[1];                   // padding for later use
+};
+
+// Sets the (non-animated and non-fragmented) image in the mux object.
+// Note: Any existing images (including frames/fragments) will be removed.
+// Parameters:
+//   mux - (in/out) object in which the image is to be set
+//   bitstream - (in) can be a raw VP8/VP8L bitstream or a single-image
+//               WebP file (non-animated and non-fragmented)
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream, int copy_data);
+
+// Adds a frame at the end of the mux object.
+// Notes: (1) frame.id should be one of WEBP_CHUNK_ANMF or WEBP_CHUNK_FRGM
+//        (2) For setting a non-animated non-fragmented image, use
+//            WebPMuxSetImage() instead.
+//        (3) Type of frame being pushed must be same as the frames in mux.
+//        (4) As WebP only supports even offsets, any odd offset will be snapped
+//            to an even location using: offset &= ~1
+// Parameters:
+//   mux - (in/out) object to which the frame is to be added
+//   frame - (in) frame data.
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or frame is NULL
+//                               or if content of 'frame' is invalid.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* frame, int copy_data);
+
+// Gets the nth frame from the mux object.
+// The content of 'frame->bitstream' is allocated using malloc(), and NOT
+// owned by the 'mux' object. It MUST be deallocated by the caller by calling
+// WebPDataClear().
+// nth=0 has a special meaning - last position.
+// Parameters:
+//   mux - (in) object from which the info is to be fetched
+//   nth - (in) index of the frame in the mux object
+//   frame - (out) data of the returned frame
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or frame is NULL.
+//   WEBP_MUX_NOT_FOUND - if there are less than nth frames in the mux object.
+//   WEBP_MUX_BAD_DATA - if nth frame chunk in mux is invalid.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetFrame(const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame);
+
+// Deletes a frame from the mux object.
+// nth=0 has a special meaning - last position.
+// Parameters:
+//   mux - (in/out) object from which a frame is to be deleted
+//   nth - (in) The position from which the frame is to be deleted
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL.
+//   WEBP_MUX_NOT_FOUND - If there are less than nth frames in the mux object
+//                        before deletion.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth);
+
+//------------------------------------------------------------------------------
+// Animation.
+
+// Animation parameters.
+struct WebPMuxAnimParams {
+    uint32_t bgcolor; // Background color of the canvas stored (in MSB order) as:
+                      // Bits 00 to 07: Alpha.
+                      // Bits 08 to 15: Red.
+                      // Bits 16 to 23: Green.
+                      // Bits 24 to 31: Blue.
+    int loop_count;   // Number of times to repeat the animation [0 = infinite].
+};
+
+// Sets the animation parameters in the mux object. Any existing ANIM chunks
+// will be removed.
+// Parameters:
+//   mux - (in/out) object in which ANIM chunk is to be set/added
+//   params - (in) animation parameters.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetAnimationParams(WebPMux* mux, const WebPMuxAnimParams* params);
+
+// Gets the animation parameters from the mux object.
+// Parameters:
+//   mux - (in) object from which the animation parameters to be fetched
+//   params - (out) animation parameters extracted from the ANIM chunk
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
+//   WEBP_MUX_NOT_FOUND - if ANIM chunk is not present in mux object.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetAnimationParams(const WebPMux* mux, WebPMuxAnimParams* params);
+
+//------------------------------------------------------------------------------
+// Misc Utilities.
+
+// Sets the canvas size for the mux object. The width and height can be
+// specified explicitly or left as zero (0, 0).
+// * When width and height are specified explicitly, then this frame bound is
+//   enforced during subsequent calls to WebPMuxAssemble() and an error is
+//   reported if any animated frame does not completely fit within the canvas.
+// * When unspecified (0, 0), the constructed canvas will get the frame bounds
+//   from the bounding-box over all frames after calling WebPMuxAssemble().
+// Parameters:
+//   mux - (in) object to which the canvas size is to be set
+//   width - (in) canvas width
+//   height - (in) canvas height
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL; or
+//                               width or height are invalid or out of bounds
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetCanvasSize(WebPMux* mux, int width, int height);
+
+// Gets the canvas size from the mux object.
+// Note: This method assumes that the VP8X chunk, if present, is up-to-date.
+// That is, the mux object hasn't been modified since the last call to
+// WebPMuxAssemble() or WebPMuxCreate().
+// Parameters:
+//   mux - (in) object from which the canvas size is to be fetched
+//   width - (out) canvas width
+//   height - (out) canvas height
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, width or height is NULL.
+//   WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetCanvasSize(const WebPMux* mux, int* width, int* height);
+
+// Gets the feature flags from the mux object.
+// Note: This method assumes that the VP8X chunk, if present, is up-to-date.
+// That is, the mux object hasn't been modified since the last call to
+// WebPMuxAssemble() or WebPMuxCreate().
+// Parameters:
+//   mux - (in) object from which the features are to be fetched
+//   flags - (out) the flags specifying which features are present in the
+//           mux object. This will be an OR of various flag values.
+//           Enum 'WebPFeatureFlags' can be used to test individual flag values.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or flags is NULL.
+//   WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetFeatures(const WebPMux* mux, uint32_t* flags);
+
+// Gets number of chunks with the given 'id' in the mux object.
+// Parameters:
+//   mux - (in) object from which the info is to be fetched
+//   id - (in) chunk id specifying the type of chunk
+//   num_elements - (out) number of chunks with the given chunk id
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, or num_elements is NULL.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux, WebPChunkId id, int* num_elements);
+
+// Assembles all chunks in WebP RIFF format and returns in 'assembled_data'.
+// This function also validates the mux object.
+// Note: The content of 'assembled_data' will be ignored and overwritten.
+// Also, the content of 'assembled_data' is allocated using malloc(), and NOT
+// owned by the 'mux' object. It MUST be deallocated by the caller by calling
+// WebPDataClear(). It's always safe to call WebPDataClear() upon return,
+// even in case of error.
+// Parameters:
+//   mux - (in/out) object whose chunks are to be assembled
+//   assembled_data - (out) assembled WebP data
+// Returns:
+//   WEBP_MUX_BAD_DATA - if mux object is invalid.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or assembled_data is NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data);
+
+//------------------------------------------------------------------------------
+// WebPAnimEncoder API
+//
+// This API allows encoding (possibly) animated WebP images.
+//
+// Code Example:
+/*
+  WebPAnimEncoderOptions enc_options;
+  WebPAnimEncoderOptionsInit(&enc_options);
+  // Tune 'enc_options' as needed.
+  WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &enc_options);
+  while(<there are more frames>) {
+    WebPConfig config;
+    WebPConfigInit(&config);
+    // Tune 'config' as needed.
+    WebPAnimEncoderAdd(enc, frame, timestamp_ms, &config);
+  }
+  WebPAnimEncoderAdd(enc, NULL, timestamp_ms, NULL);
+  WebPAnimEncoderAssemble(enc, webp_data);
+  WebPAnimEncoderDelete(enc);
+  // Write the 'webp_data' to a file, or re-mux it further.
+*/
+
+typedef struct WebPAnimEncoder WebPAnimEncoder; // Main opaque object.
+
+// Forward declarations. Defined in encode.h.
+struct WebPPicture;
+struct WebPConfig;
+
+// Global options.
+struct WebPAnimEncoderOptions {
+    WebPMuxAnimParams anim_params; // Animation parameters.
+    int minimize_size;             // If true, minimize the output size (slow). Implicitly
+                                   // disables key-frame insertion.
+    int kmin;
+    int kmax;        // Minimum and maximum distance between consecutive key
+                     // frames in the output. The library may insert some key
+                     // frames as needed to satisfy this criteria.
+                     // Note that these conditions should hold: kmax > kmin
+                     // and kmin >= kmax / 2 + 1. Also, if kmin == 0, then
+                     // key-frame insertion is disabled; and if kmax == 0,
+                     // then all frames will be key-frames.
+    int allow_mixed; // If true, use mixed compression mode; may choose
+                     // either lossy and lossless for each frame.
+    int verbose;     // If true, print info and warning messages to stderr.
+
+    uint32_t padding[4]; // Padding for later use.
+};
+
+// Internal, version-checked, entry point.
+WEBP_EXTERN(int) WebPAnimEncoderOptionsInitInternal(WebPAnimEncoderOptions*, int);
+
+// Should always be called, to initialize a fresh WebPAnimEncoderOptions
+// structure before modification. Returns false in case of version mismatch.
+// WebPAnimEncoderOptionsInit() must have succeeded before using the
+// 'enc_options' object.
+static WEBP_INLINE int WebPAnimEncoderOptionsInit(WebPAnimEncoderOptions* enc_options) {
+    return WebPAnimEncoderOptionsInitInternal(enc_options, WEBP_MUX_ABI_VERSION);
+}
+
+// Internal, version-checked, entry point.
+WEBP_EXTERN(WebPAnimEncoder*) WebPAnimEncoderNewInternal(int, int, const WebPAnimEncoderOptions*, int);
+
+// Creates and initializes a WebPAnimEncoder object.
+// Parameters:
+//   width/height - (in) canvas width and height of the animation.
+//   enc_options - (in) encoding options; can be passed NULL to pick
+//                      reasonable defaults.
+// Returns:
+//   A pointer to the newly created WebPAnimEncoder object.
+//   Or NULL in case of memory error.
+static WEBP_INLINE WebPAnimEncoder* WebPAnimEncoderNew(int width,
+                                                       int height,
+                                                       const WebPAnimEncoderOptions* enc_options) {
+    return WebPAnimEncoderNewInternal(width, height, enc_options, WEBP_MUX_ABI_VERSION);
+}
+
+// Optimize the given frame for WebP, encode it and add it to the
+// WebPAnimEncoder object.
+// The last call to 'WebPAnimEncoderAdd' should be with frame = NULL, which
+// indicates that no more frames are to be added. This call is also used to
+// determine the duration of the last frame.
+// Parameters:
+//   enc - (in/out) object to which the frame is to be added.
+//   frame - (in/out) frame data in ARGB or YUV(A) format. If it is in YUV(A)
+//           format, it will be converted to ARGB, which incurs a small loss.
+//   timestamp_ms - (in) timestamp of this frame in milliseconds.
+//                       Duration of a frame would be calculated as
+//                       "timestamp of next frame - timestamp of this frame".
+//                       Hence, timestamps should be in non-decreasing order.
+//   config - (in) encoding options; can be passed NULL to pick
+//            reasonable defaults.
+// Returns:
+//   On error, returns false and frame->error_code is set appropriately.
+//   Otherwise, returns true.
+WEBP_EXTERN(int)
+WebPAnimEncoderAdd(WebPAnimEncoder* enc, struct WebPPicture* frame, int timestamp_ms, const struct WebPConfig* config);
+
+// Assemble all frames added so far into a WebP bitstream.
+// This call should be preceded by  a call to 'WebPAnimEncoderAdd' with
+// frame = NULL; if not, the duration of the last frame will be internally
+// estimated.
+// Parameters:
+//   enc - (in/out) object from which the frames are to be assembled.
+//   webp_data - (out) generated WebP bitstream.
+// Returns:
+//   True on success.
+WEBP_EXTERN(int) WebPAnimEncoderAssemble(WebPAnimEncoder* enc, WebPData* webp_data);
+
+// Get error string corresponding to the most recent call using 'enc'. The
+// returned string is owned by 'enc' and is valid only until the next call to
+// WebPAnimEncoderAdd() or WebPAnimEncoderAssemble() or WebPAnimEncoderDelete().
+// Parameters:
+//   enc - (in/out) object from which the error string is to be fetched.
+// Returns:
+//   NULL if 'enc' is NULL. Otherwise, returns the error string if the last call
+//   to 'enc' had an error, or an empty string if the last call was a success.
+WEBP_EXTERN(const char*) WebPAnimEncoderGetError(WebPAnimEncoder* enc);
+
+// Deletes the WebPAnimEncoder object.
+// Parameters:
+//   enc - (in/out) object to be deleted
+WEBP_EXTERN(void) WebPAnimEncoderDelete(WebPAnimEncoder* enc);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_WEBP_MUX_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/webp/mux_types.h b/codec/L2/demos/webpEnc/host/src/webp/mux_types.h
new file mode 100644
index 0000000000..228efa17c7
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/webp/mux_types.h
@@ -0,0 +1,97 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Data-types common to the mux and demux libraries.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_WEBP_MUX_TYPES_H_
+#define WEBP_WEBP_MUX_TYPES_H_
+
+#include <stdlib.h> // free()
+#include <string.h> // memset()
+#include "./types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPFeatureFlags WebPFeatureFlags;
+// typedef enum WebPMuxAnimDispose WebPMuxAnimDispose;
+// typedef enum WebPMuxAnimBlend WebPMuxAnimBlend;
+typedef struct WebPData WebPData;
+
+// VP8X Feature Flags.
+typedef enum WebPFeatureFlags {
+    FRAGMENTS_FLAG = 0x00000001,
+    ANIMATION_FLAG = 0x00000002,
+    XMP_FLAG = 0x00000004,
+    EXIF_FLAG = 0x00000008,
+    ALPHA_FLAG = 0x00000010,
+    ICCP_FLAG = 0x00000020
+} WebPFeatureFlags;
+
+// Dispose method (animation only). Indicates how the area used by the current
+// frame is to be treated before rendering the next frame on the canvas.
+typedef enum WebPMuxAnimDispose {
+    WEBP_MUX_DISPOSE_NONE,      // Do not dispose.
+    WEBP_MUX_DISPOSE_BACKGROUND // Dispose to background color.
+} WebPMuxAnimDispose;
+
+// Blend operation (animation only). Indicates how transparent pixels of the
+// current frame are blended with those of the previous canvas.
+typedef enum WebPMuxAnimBlend {
+    WEBP_MUX_BLEND,   // Blend.
+    WEBP_MUX_NO_BLEND // Do not blend.
+} WebPMuxAnimBlend;
+
+// Data type used to describe 'raw' data, e.g., chunk data
+// (ICC profile, metadata) and WebP compressed image data.
+struct WebPData {
+    const uint8_t* bytes;
+    size_t size;
+};
+
+// Initializes the contents of the 'webp_data' object with default values.
+static WEBP_INLINE void WebPDataInit(WebPData* webp_data) {
+    if (webp_data != NULL) {
+        memset(webp_data, 0, sizeof(*webp_data));
+    }
+}
+
+// Clears the contents of the 'webp_data' object by calling free(). Does not
+// deallocate the object itself.
+static WEBP_INLINE void WebPDataClear(WebPData* webp_data) {
+    if (webp_data != NULL) {
+        free((void*)webp_data->bytes);
+        WebPDataInit(webp_data);
+    }
+}
+
+// Allocates necessary storage for 'dst' and copies the contents of 'src'.
+// Returns true on success.
+static WEBP_INLINE int WebPDataCopy(const WebPData* src, WebPData* dst) {
+    if (src == NULL || dst == NULL) return 0;
+    WebPDataInit(dst);
+    if (src->bytes != NULL && src->size != 0) {
+        dst->bytes = (uint8_t*)malloc(src->size);
+        if (dst->bytes == NULL) return 0;
+        memcpy((void*)dst->bytes, src->bytes, src->size);
+        dst->size = src->size;
+    }
+    return 1;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* WEBP_WEBP_MUX_TYPES_H_ */
diff --git a/codec/L2/demos/webpEnc/host/src/webp/types.h b/codec/L2/demos/webpEnc/host/src/webp/types.h
new file mode 100644
index 0000000000..2c3de67b86
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/src/webp/types.h
@@ -0,0 +1,51 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Common types
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_TYPES_H_
+#define WEBP_WEBP_TYPES_H_
+
+#include <stddef.h> // for size_t
+
+#ifndef _MSC_VER
+#include <inttypes.h>
+#if defined(__cplusplus) || !defined(__STRICT_ANSI__) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
+#define WEBP_INLINE inline
+#else
+#define WEBP_INLINE
+#endif
+#else
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
+#define WEBP_INLINE __forceinline
+#endif /* _MSC_VER */
+
+#ifndef WEBP_EXTERN
+// This explicitly marks library functions and allows for changing the
+// signature for e.g., Windows DLL builds.
+#if defined(__GNUC__) && __GNUC__ >= 4
+#define WEBP_EXTERN(type) extern __attribute__((visibility("default"))) type
+#else
+#define WEBP_EXTERN(type) extern type
+#endif /* __GNUC__ >= 4 */
+#endif /* WEBP_EXTERN */
+
+// Macro to check ABI compatibility (same major revision number)
+#define WEBP_ABI_IS_INCOMPATIBLE(a, b) (((a) >> 8) != ((b) >> 8))
+
+#endif /* WEBP_WEBP_TYPES_H_ */
diff --git a/codec/L2/demos/webpEnc/host/stopwatch.h b/codec/L2/demos/webpEnc/host/stopwatch.h
new file mode 100644
index 0000000000..63a60f2d6a
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/stopwatch.h
@@ -0,0 +1,59 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Helper functions to measure elapsed time.
+//
+// Author: Mikolaj Zalewski (mikolajz@google.com)
+
+#ifndef WEBP_EXAMPLES_STOPWATCH_H_
+#define WEBP_EXAMPLES_STOPWATCH_H_
+
+#include "webp/types.h"
+
+#if defined _WIN32 && !defined __GNUC__
+#include <windows.h>
+
+typedef LARGE_INTEGER Stopwatch;
+
+static WEBP_INLINE void StopwatchReset(Stopwatch* watch) {
+    QueryPerformanceCounter(watch);
+}
+
+static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
+    const LARGE_INTEGER old_value = *watch;
+    LARGE_INTEGER freq;
+    if (!QueryPerformanceCounter(watch)) return 0.0;
+    if (!QueryPerformanceFrequency(&freq)) return 0.0;
+    if (freq.QuadPart == 0) return 0.0;
+    return (watch->QuadPart - old_value.QuadPart) / (double)freq.QuadPart;
+}
+
+#else               /* !_WIN32 */
+#include <string.h> // memcpy
+#include <sys/time.h>
+
+typedef struct timeval Stopwatch;
+
+static WEBP_INLINE void StopwatchReset(Stopwatch* watch) {
+    gettimeofday(watch, NULL);
+}
+
+static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
+    struct timeval old_value;
+    double delta_sec, delta_usec;
+    memcpy(&old_value, watch, sizeof(old_value));
+    gettimeofday(watch, NULL);
+    delta_sec = (double)watch->tv_sec - old_value.tv_sec;
+    delta_usec = (double)watch->tv_usec - old_value.tv_usec;
+    return delta_sec + delta_usec / 1000000.0;
+}
+
+#endif /* _WIN32 */
+
+#endif /* WEBP_EXAMPLES_STOPWATCH_H_ */
diff --git a/codec/L2/demos/webpEnc/host/tiffdec.c b/codec/L2/demos/webpEnc/host/tiffdec.c
new file mode 100644
index 0000000000..1f49655428
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/tiffdec.c
@@ -0,0 +1,189 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// TIFF decode.
+
+#include "./tiffdec.h"
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#ifdef WEBP_HAVE_TIFF
+#include <tiffio.h>
+
+#include "webp/encode.h"
+#include "./metadata.h"
+
+static const struct {
+    ttag_t tag;
+    size_t storage_offset;
+} kTIFFMetadataMap[] = {
+    {TIFFTAG_ICCPROFILE, METADATA_OFFSET(iccp)}, {TIFFTAG_XMLPACKET, METADATA_OFFSET(xmp)}, {0, 0},
+};
+
+// Returns true on success. The caller must use MetadataFree() on 'metadata' in
+// all cases.
+static int ExtractMetadataFromTIFF(TIFF* const tif, Metadata* const metadata) {
+    int i;
+    toff_t exif_ifd_offset;
+
+    for (i = 0; kTIFFMetadataMap[i].tag != 0; ++i) {
+        MetadataPayload* const payload = (MetadataPayload*)((uint8_t*)metadata + kTIFFMetadataMap[i].storage_offset);
+        void* tag_data;
+        uint32 tag_data_len;
+
+        if (TIFFGetField(tif, kTIFFMetadataMap[i].tag, &tag_data_len, &tag_data) &&
+            !MetadataCopy((const char*)tag_data, tag_data_len, payload)) {
+            return 0;
+        }
+    }
+
+    // TODO(jzern): To extract the raw EXIF directory some parsing of it would be
+    // necessary to determine the overall size. In addition, value offsets in
+    // individual directory entries may need to be updated as, depending on the
+    // type, they are file based.
+    // Exif 2.2 Section 4.6.2 Tag Structure
+    // TIFF Revision 6.0 Part 1 Section 2 TIFF Structure #Image File Directory
+    if (TIFFGetField(tif, TIFFTAG_EXIFIFD, &exif_ifd_offset)) {
+        fprintf(stderr, "Warning: EXIF extraction from TIFF is unsupported.\n");
+    }
+    return 1;
+}
+
+// Ad-hoc structure to supply read-from-memory functionalities.
+typedef struct {
+    const uint8_t* data;
+    toff_t size;
+    toff_t pos;
+} MyData;
+
+static int MyClose(thandle_t opaque) {
+    (void)opaque;
+    return 0;
+}
+
+static toff_t MySize(thandle_t opaque) {
+    const MyData* const my_data = (MyData*)opaque;
+    return my_data->size;
+}
+
+static toff_t MySeek(thandle_t opaque, toff_t offset, int whence) {
+    MyData* const my_data = (MyData*)opaque;
+    offset += (whence == SEEK_CUR) ? my_data->pos : (whence == SEEK_SET) ? 0 : my_data->size;
+    if (offset > my_data->size) return (toff_t)-1;
+    my_data->pos = offset;
+    return offset;
+}
+
+static int MyMapFile(thandle_t opaque, void** base, toff_t* size) {
+    (void)opaque;
+    (void)base;
+    (void)size;
+    return 0;
+}
+static void MyUnmapFile(thandle_t opaque, void* base, toff_t size) {
+    (void)opaque;
+    (void)base;
+    (void)size;
+}
+
+static tsize_t MyRead(thandle_t opaque, void* dst, tsize_t size) {
+    MyData* const my_data = (MyData*)opaque;
+    if (my_data->pos + size > my_data->size) {
+        size = my_data->size - my_data->pos;
+    }
+    if (size > 0) {
+        memcpy(dst, my_data->data + my_data->pos, size);
+        my_data->pos += size;
+    }
+    return size;
+}
+
+int ReadTIFF(
+    const uint8_t* const data, size_t data_size, WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
+    MyData my_data = {data, (toff_t)data_size, 0};
+    TIFF* const tif =
+        TIFFClientOpen("Memory", "r", &my_data, MyRead, MyRead, MySeek, MyClose, MySize, MyMapFile, MyUnmapFile);
+    uint32 width, height;
+    uint32* raster;
+    int ok = 0;
+    tdir_t dircount;
+
+    if (tif == NULL) {
+        fprintf(stderr, "Error! Cannot parse TIFF file\n");
+        return 0;
+    }
+
+    dircount = TIFFNumberOfDirectories(tif);
+    if (dircount > 1) {
+        fprintf(stderr,
+                "Warning: multi-directory TIFF files are not supported.\n"
+                "Only the first will be used, %d will be ignored.\n",
+                dircount - 1);
+    }
+
+    if (!(TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &width) && TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height))) {
+        fprintf(stderr, "Error! Cannot retrieve TIFF image dimensions.\n");
+        goto End;
+    }
+    raster = (uint32*)_TIFFmalloc(width * height * sizeof(*raster));
+    if (raster != NULL) {
+        if (TIFFReadRGBAImageOriented(tif, width, height, raster, ORIENTATION_TOPLEFT, 1)) {
+            const int stride = width * sizeof(*raster);
+            pic->width = width;
+            pic->height = height;
+// TIFF data is ABGR
+#ifdef WORDS_BIGENDIAN
+            TIFFSwabArrayOfLong(raster, width * height);
+#endif
+            ok = keep_alpha ? WebPPictureImportRGBA(pic, (const uint8_t*)raster, stride)
+                            : WebPPictureImportRGBX(pic, (const uint8_t*)raster, stride);
+        }
+        _TIFFfree(raster);
+    } else {
+        fprintf(stderr, "Error allocating TIFF RGBA memory!\n");
+    }
+
+    if (ok) {
+        if (metadata != NULL) {
+            ok = ExtractMetadataFromTIFF(tif, metadata);
+            if (!ok) {
+                fprintf(stderr, "Error extracting TIFF metadata!\n");
+                MetadataFree(metadata);
+                WebPPictureFree(pic);
+            }
+        }
+    }
+End:
+    TIFFClose(tif);
+    return ok;
+}
+#else  // !WEBP_HAVE_TIFF
+int ReadTIFF(const uint8_t* const data,
+             size_t data_size,
+             struct WebPPicture* const pic,
+             int keep_alpha,
+             struct Metadata* const metadata) {
+    (void)data;
+    (void)data_size;
+    (void)pic;
+    (void)keep_alpha;
+    (void)metadata;
+    fprintf(stderr,
+            "TIFF support not compiled. Please install the libtiff "
+            "development package before building.\n");
+    return 0;
+}
+#endif // WEBP_HAVE_TIFF
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/tiffdec.h b/codec/L2/demos/webpEnc/host/tiffdec.h
new file mode 100644
index 0000000000..e54119613b
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/tiffdec.h
@@ -0,0 +1,39 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// TIFF decode.
+
+#ifndef WEBP_EXAMPLES_TIFFDEC_H_
+#define WEBP_EXAMPLES_TIFFDEC_H_
+
+#include "webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct Metadata;
+struct WebPPicture;
+
+// Reads a TIFF from 'data', returning the decoded output in 'pic'.
+// Output is RGBA or YUVA, depending on pic->use_argb value.
+// If 'keep_alpha' is true and the TIFF has an alpha channel, the output is RGBA
+// or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
+// Returns true on success.
+int ReadTIFF(const uint8_t* const data,
+             size_t data_size,
+             struct WebPPicture* const pic,
+             int keep_alpha,
+             struct Metadata* const metadata);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_EXAMPLES_TIFFDEC_H_
diff --git a/codec/L2/demos/webpEnc/host/vwebp.c b/codec/L2/demos/webpEnc/host/vwebp.c
new file mode 100644
index 0000000000..e4d975ad32
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/vwebp.c
@@ -0,0 +1,547 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Simple OpenGL-based WebP file viewer.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(WEBP_HAVE_GL)
+
+#if defined(HAVE_GLUT_GLUT_H)
+#include <GLUT/glut.h>
+#else
+#include <GL/glut.h>
+#ifdef FREEGLUT
+#include <GL/freeglut.h>
+#endif
+#endif
+
+#ifdef WEBP_HAVE_QCMS
+#include <qcms.h>
+#endif
+
+#include "webp/decode.h"
+#include "webp/demux.h"
+
+#include "./example_util.h"
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#define snprintf _snprintf
+#endif
+
+// Unfortunate global variables. Gathered into a struct for comfort.
+static struct {
+    int has_animation;
+    int has_color_profile;
+    int done;
+    int decoding_error;
+    int print_info;
+    int use_color_profile;
+
+    int canvas_width, canvas_height;
+    int loop_count;
+    uint32_t bg_color;
+
+    const char* file_name;
+    WebPData data;
+    WebPDecoderConfig config;
+    const WebPDecBuffer* pic;
+    WebPDemuxer* dmux;
+    WebPIterator curr_frame;
+    WebPIterator prev_frame;
+    WebPChunkIterator iccp;
+} kParams;
+
+static void ClearPreviousPic(void) {
+    WebPFreeDecBuffer((WebPDecBuffer*)kParams.pic);
+    kParams.pic = NULL;
+}
+
+static void ClearParams(void) {
+    ClearPreviousPic();
+    WebPDataClear(&kParams.data);
+    WebPDemuxReleaseIterator(&kParams.curr_frame);
+    WebPDemuxReleaseIterator(&kParams.prev_frame);
+    WebPDemuxReleaseChunkIterator(&kParams.iccp);
+    WebPDemuxDelete(kParams.dmux);
+    kParams.dmux = NULL;
+}
+
+// Sets the previous frame to the dimensions of the canvas and has it dispose
+// to background to cause the canvas to be cleared.
+static void ClearPreviousFrame(void) {
+    WebPIterator* const prev = &kParams.prev_frame;
+    prev->width = kParams.canvas_width;
+    prev->height = kParams.canvas_height;
+    prev->x_offset = prev->y_offset = 0;
+    prev->dispose_method = WEBP_MUX_DISPOSE_BACKGROUND;
+}
+
+// -----------------------------------------------------------------------------
+// Color profile handling
+static int ApplyColorProfile(const WebPData* const profile, WebPDecBuffer* const rgba) {
+#ifdef WEBP_HAVE_QCMS
+    int i, ok = 0;
+    uint8_t* line;
+    uint8_t major_revision;
+    qcms_profile* input_profile = NULL;
+    qcms_profile* output_profile = NULL;
+    qcms_transform* transform = NULL;
+    const qcms_data_type input_type = QCMS_DATA_RGBA_8;
+    const qcms_data_type output_type = QCMS_DATA_RGBA_8;
+    const qcms_intent intent = QCMS_INTENT_DEFAULT;
+
+    if (profile == NULL || rgba == NULL) return 0;
+    if (profile->bytes == NULL || profile->size < 10) return 1;
+    major_revision = profile->bytes[8];
+
+    qcms_enable_iccv4();
+    input_profile = qcms_profile_from_memory(profile->bytes, profile->size);
+    // qcms_profile_is_bogus() is broken with ICCv4.
+    if (input_profile == NULL || (major_revision < 4 && qcms_profile_is_bogus(input_profile))) {
+        fprintf(stderr, "Color profile is bogus!\n");
+        goto Error;
+    }
+
+    output_profile = qcms_profile_sRGB();
+    if (output_profile == NULL) {
+        fprintf(stderr, "Error creating output color profile!\n");
+        goto Error;
+    }
+
+    qcms_profile_precache_output_transform(output_profile);
+    transform = qcms_transform_create(input_profile, input_type, output_profile, output_type, intent);
+    if (transform == NULL) {
+        fprintf(stderr, "Error creating color transform!\n");
+        goto Error;
+    }
+
+    line = rgba->u.RGBA.rgba;
+    for (i = 0; i < rgba->height; ++i, line += rgba->u.RGBA.stride) {
+        qcms_transform_data(transform, line, line, rgba->width);
+    }
+    ok = 1;
+
+Error:
+    if (input_profile != NULL) qcms_profile_release(input_profile);
+    if (output_profile != NULL) qcms_profile_release(output_profile);
+    if (transform != NULL) qcms_transform_release(transform);
+    return ok;
+#else
+    (void)profile;
+    (void)rgba;
+    return 1;
+#endif // WEBP_HAVE_QCMS
+}
+
+//------------------------------------------------------------------------------
+// File decoding
+
+static int Decode(void) { // Fills kParams.curr_frame
+    const WebPIterator* const curr = &kParams.curr_frame;
+    WebPDecoderConfig* const config = &kParams.config;
+    WebPDecBuffer* const output_buffer = &config->output;
+    int ok = 0;
+
+    ClearPreviousPic();
+    output_buffer->colorspace = MODE_RGBA;
+    ok = (WebPDecode(curr->fragment.bytes, curr->fragment.size, config) == VP8_STATUS_OK);
+    if (!ok) {
+        fprintf(stderr, "Decoding of frame #%d failed!\n", curr->frame_num);
+    } else {
+        kParams.pic = output_buffer;
+        if (kParams.use_color_profile) {
+            ok = ApplyColorProfile(&kParams.iccp.chunk, output_buffer);
+            if (!ok) {
+                fprintf(stderr, "Applying color profile to frame #%d failed!\n", curr->frame_num);
+            }
+        }
+    }
+    return ok;
+}
+
+static void decode_callback(int what) {
+    if (what == 0 && !kParams.done) {
+        int duration = 0;
+        if (kParams.dmux != NULL) {
+            WebPIterator* const curr = &kParams.curr_frame;
+            if (!WebPDemuxNextFrame(curr)) {
+                WebPDemuxReleaseIterator(curr);
+                if (WebPDemuxGetFrame(kParams.dmux, 1, curr)) {
+                    --kParams.loop_count;
+                    kParams.done = (kParams.loop_count == 0);
+                    if (kParams.done) return;
+                    ClearPreviousFrame();
+                } else {
+                    kParams.decoding_error = 1;
+                    kParams.done = 1;
+                    return;
+                }
+            }
+            duration = curr->duration;
+        }
+        if (!Decode()) {
+            kParams.decoding_error = 1;
+            kParams.done = 1;
+        } else {
+            glutPostRedisplay();
+            glutTimerFunc(duration, decode_callback, what);
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// Callbacks
+
+static void HandleKey(unsigned char key, int pos_x, int pos_y) {
+    (void)pos_x;
+    (void)pos_y;
+    if (key == 'q' || key == 'Q' || key == 27 /* Esc */) {
+#ifdef FREEGLUT
+        glutLeaveMainLoop();
+#else
+        ClearParams();
+        exit(0);
+#endif
+    } else if (key == 'c') {
+        if (kParams.has_color_profile && !kParams.decoding_error) {
+            kParams.use_color_profile = 1 - kParams.use_color_profile;
+
+            if (kParams.has_animation) {
+                // Restart the completed animation to pickup the color profile change.
+                if (kParams.done && kParams.loop_count == 0) {
+                    kParams.loop_count = (int)WebPDemuxGetI(kParams.dmux, WEBP_FF_LOOP_COUNT) + 1;
+                    kParams.done = 0;
+                    // Start the decode loop immediately.
+                    glutTimerFunc(0, decode_callback, 0);
+                }
+            } else {
+                Decode();
+                glutPostRedisplay();
+            }
+        }
+    } else if (key == 'i') {
+        kParams.print_info = 1 - kParams.print_info;
+        glutPostRedisplay();
+    }
+}
+
+static void HandleReshape(int width, int height) {
+    // TODO(skal): proper handling of resize, esp. for large pictures.
+    // + key control of the zoom.
+    glViewport(0, 0, width, height);
+    glMatrixMode(GL_PROJECTION);
+    glLoadIdentity();
+    glMatrixMode(GL_MODELVIEW);
+    glLoadIdentity();
+}
+
+static void PrintString(const char* const text) {
+    void* const font = GLUT_BITMAP_9_BY_15;
+    int i;
+    for (i = 0; text[i]; ++i) {
+        glutBitmapCharacter(font, text[i]);
+    }
+}
+
+static float GetColorf(uint32_t color, int shift) {
+    return (color >> shift) / 255.f;
+}
+
+static void DrawCheckerBoard(void) {
+    const int square_size = 8; // must be a power of 2
+    int x, y;
+    GLint viewport[4]; // x, y, width, height
+
+    glPushMatrix();
+
+    glGetIntegerv(GL_VIEWPORT, viewport);
+    // shift to integer coordinates with (0,0) being top-left.
+    glOrtho(0, viewport[2], viewport[3], 0, -1, 1);
+    for (y = 0; y < viewport[3]; y += square_size) {
+        for (x = 0; x < viewport[2]; x += square_size) {
+            const GLubyte color = 128 + 64 * (!((x + y) & square_size));
+            glColor3ub(color, color, color);
+            glRecti(x, y, x + square_size, y + square_size);
+        }
+    }
+    glPopMatrix();
+}
+
+static void HandleDisplay(void) {
+    const WebPDecBuffer* const pic = kParams.pic;
+    const WebPIterator* const curr = &kParams.curr_frame;
+    WebPIterator* const prev = &kParams.prev_frame;
+    GLfloat xoff, yoff;
+    if (pic == NULL) return;
+    glPushMatrix();
+    glPixelZoom(1, -1);
+    xoff = (GLfloat)(2. * curr->x_offset / kParams.canvas_width);
+    yoff = (GLfloat)(2. * curr->y_offset / kParams.canvas_height);
+    glRasterPos2f(-1.f + xoff, 1.f - yoff);
+    glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, pic->u.RGBA.stride / 4);
+
+    if (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND || curr->blend_method == WEBP_MUX_NO_BLEND) {
+        // TODO(later): these offsets and those above should factor in window size.
+        //              they will be incorrect if the window is resized.
+        // glScissor() takes window coordinates (0,0 at bottom left).
+        int window_x, window_y;
+        int frame_w, frame_h;
+        if (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
+            // Clear the previous frame rectangle.
+            window_x = prev->x_offset;
+            window_y = kParams.canvas_height - prev->y_offset - prev->height;
+            frame_w = prev->width;
+            frame_h = prev->height;
+        } else { // curr->blend_method == WEBP_MUX_NO_BLEND.
+            // We simulate no-blending behavior by first clearing the current frame
+            // rectangle (to a checker-board) and then alpha-blending against it.
+            window_x = curr->x_offset;
+            window_y = kParams.canvas_height - curr->y_offset - curr->height;
+            frame_w = curr->width;
+            frame_h = curr->height;
+        }
+        glEnable(GL_SCISSOR_TEST);
+        // Only update the requested area, not the whole canvas.
+        glScissor(window_x, window_y, frame_w, frame_h);
+
+        glClear(GL_COLOR_BUFFER_BIT); // use clear color
+        DrawCheckerBoard();
+
+        glDisable(GL_SCISSOR_TEST);
+    }
+
+    *prev = *curr;
+
+    glDrawPixels(pic->width, pic->height, GL_RGBA, GL_UNSIGNED_BYTE, (GLvoid*)pic->u.RGBA.rgba);
+    if (kParams.print_info) {
+        char tmp[32];
+
+        glColor4f(0.90f, 0.0f, 0.90f, 1.0f);
+        glRasterPos2f(-0.95f, 0.90f);
+        PrintString(kParams.file_name);
+
+        snprintf(tmp, sizeof(tmp), "Dimension:%d x %d", pic->width, pic->height);
+        glColor4f(0.90f, 0.0f, 0.90f, 1.0f);
+        glRasterPos2f(-0.95f, 0.80f);
+        PrintString(tmp);
+        if (curr->x_offset != 0 || curr->y_offset != 0) {
+            snprintf(tmp, sizeof(tmp), " (offset:%d,%d)", curr->x_offset, curr->y_offset);
+            glRasterPos2f(-0.95f, 0.70f);
+            PrintString(tmp);
+        }
+    }
+    glPopMatrix();
+    glFlush();
+}
+
+static void StartDisplay(void) {
+    const int width = kParams.canvas_width;
+    const int height = kParams.canvas_height;
+    glutInitDisplayMode(GLUT_RGBA);
+    glutInitWindowSize(width, height);
+    glutCreateWindow("WebP viewer");
+    glutDisplayFunc(HandleDisplay);
+    glutIdleFunc(NULL);
+    glutKeyboardFunc(HandleKey);
+    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+    glEnable(GL_BLEND);
+    glClearColor(GetColorf(kParams.bg_color, 0), GetColorf(kParams.bg_color, 8), GetColorf(kParams.bg_color, 16),
+                 GetColorf(kParams.bg_color, 24));
+    HandleReshape(width, height);
+    glClear(GL_COLOR_BUFFER_BIT);
+    DrawCheckerBoard();
+}
+
+//------------------------------------------------------------------------------
+// Main
+
+static void Help(void) {
+    printf(
+        "Usage: vwebp in_file [options]\n\n"
+        "Decodes the WebP image file and visualize it using OpenGL\n"
+        "Options are:\n"
+        "  -version  .... print version number and exit\n"
+        "  -noicc ....... don't use the icc profile if present\n"
+        "  -nofancy ..... don't use the fancy YUV420 upscaler\n"
+        "  -nofilter .... disable in-loop filtering\n"
+        "  -dither <int>  dithering strength (0..100), default=50\n"
+        "  -noalphadither disable alpha plane dithering\n"
+        "  -mt .......... use multi-threading\n"
+        "  -info ........ print info\n"
+        "  -h     ....... this help message\n"
+        "\n"
+        "Keyboard shortcuts:\n"
+        "  'c' ................ toggle use of color profile\n"
+        "  'i' ................ overlay file information\n"
+        "  'q' / 'Q' / ESC .... quit\n");
+}
+
+int main(int argc, char* argv[]) {
+    int c;
+    WebPDecoderConfig* const config = &kParams.config;
+    WebPIterator* const curr = &kParams.curr_frame;
+
+    if (!WebPInitDecoderConfig(config)) {
+        fprintf(stderr, "Library version mismatch!\n");
+        return -1;
+    }
+    config->options.dithering_strength = 50;
+    config->options.alpha_dithering_strength = 100;
+    kParams.use_color_profile = 1;
+
+    for (c = 1; c < argc; ++c) {
+        int parse_error = 0;
+        if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
+            Help();
+            return 0;
+        } else if (!strcmp(argv[c], "-noicc")) {
+            kParams.use_color_profile = 0;
+        } else if (!strcmp(argv[c], "-nofancy")) {
+            config->options.no_fancy_upsampling = 1;
+        } else if (!strcmp(argv[c], "-nofilter")) {
+            config->options.bypass_filtering = 1;
+        } else if (!strcmp(argv[c], "-noalphadither")) {
+            config->options.alpha_dithering_strength = 0;
+        } else if (!strcmp(argv[c], "-dither") && c + 1 < argc) {
+            config->options.dithering_strength = ExUtilGetInt(argv[++c], 0, &parse_error);
+        } else if (!strcmp(argv[c], "-info")) {
+            kParams.print_info = 1;
+        } else if (!strcmp(argv[c], "-version")) {
+            const int dec_version = WebPGetDecoderVersion();
+            const int dmux_version = WebPGetDemuxVersion();
+            printf("WebP Decoder version: %d.%d.%d\nWebP Demux version: %d.%d.%d\n", (dec_version >> 16) & 0xff,
+                   (dec_version >> 8) & 0xff, dec_version & 0xff, (dmux_version >> 16) & 0xff,
+                   (dmux_version >> 8) & 0xff, dmux_version & 0xff);
+            return 0;
+        } else if (!strcmp(argv[c], "-mt")) {
+            config->options.use_threads = 1;
+        } else if (!strcmp(argv[c], "--")) {
+            if (c < argc - 1) kParams.file_name = argv[++c];
+            break;
+        } else if (argv[c][0] == '-') {
+            printf("Unknown option '%s'\n", argv[c]);
+            Help();
+            return -1;
+        } else {
+            kParams.file_name = argv[c];
+        }
+
+        if (parse_error) {
+            Help();
+            return -1;
+        }
+    }
+
+    if (kParams.file_name == NULL) {
+        printf("missing input file!!\n");
+        Help();
+        return 0;
+    }
+
+    if (!ExUtilReadFile(kParams.file_name, &kParams.data.bytes, &kParams.data.size)) {
+        goto Error;
+    }
+
+    if (!WebPGetInfo(kParams.data.bytes, kParams.data.size, NULL, NULL)) {
+        fprintf(stderr, "Input file doesn't appear to be WebP format.\n");
+        goto Error;
+    }
+
+    kParams.dmux = WebPDemux(&kParams.data);
+    if (kParams.dmux == NULL) {
+        fprintf(stderr, "Could not create demuxing object!\n");
+        goto Error;
+    }
+
+    if (WebPDemuxGetI(kParams.dmux, WEBP_FF_FORMAT_FLAGS) & FRAGMENTS_FLAG) {
+        fprintf(stderr, "Image fragments are not supported for now!\n");
+        goto Error;
+    }
+    kParams.canvas_width = WebPDemuxGetI(kParams.dmux, WEBP_FF_CANVAS_WIDTH);
+    kParams.canvas_height = WebPDemuxGetI(kParams.dmux, WEBP_FF_CANVAS_HEIGHT);
+    if (kParams.print_info) {
+        printf("Canvas: %d x %d\n", kParams.canvas_width, kParams.canvas_height);
+    }
+
+    ClearPreviousFrame();
+
+    memset(&kParams.iccp, 0, sizeof(kParams.iccp));
+    kParams.has_color_profile = !!(WebPDemuxGetI(kParams.dmux, WEBP_FF_FORMAT_FLAGS) & ICCP_FLAG);
+    if (kParams.has_color_profile) {
+#ifdef WEBP_HAVE_QCMS
+        if (!WebPDemuxGetChunk(kParams.dmux, "ICCP", 1, &kParams.iccp)) goto Error;
+        printf("VP8X: Found color profile\n");
+#else
+        fprintf(stderr,
+                "Warning: color profile present, but qcms is unavailable!\n"
+                "Build libqcms from Mozilla or Chromium and define WEBP_HAVE_QCMS "
+                "before building.\n");
+#endif
+    }
+
+    if (!WebPDemuxGetFrame(kParams.dmux, 1, curr)) goto Error;
+
+    kParams.has_animation = (curr->num_frames > 1);
+    kParams.loop_count = (int)WebPDemuxGetI(kParams.dmux, WEBP_FF_LOOP_COUNT);
+    kParams.bg_color = WebPDemuxGetI(kParams.dmux, WEBP_FF_BACKGROUND_COLOR);
+    printf("VP8X: Found %d images in file (loop count = %d)\n", curr->num_frames, kParams.loop_count);
+
+    // Decode first frame
+    if (!Decode()) goto Error;
+
+    // Position iterator to last frame. Next call to HandleDisplay will wrap over.
+    // We take this into account by bumping up loop_count.
+    WebPDemuxGetFrame(kParams.dmux, 0, curr);
+    if (kParams.loop_count) ++kParams.loop_count;
+
+#if defined(__unix__) || defined(__CYGWIN__)
+    // Work around GLUT compositor bug.
+    // https://bugs.launchpad.net/ubuntu/+source/freeglut/+bug/369891
+    setenv("XLIB_SKIP_ARGB_VISUALS", "1", 1);
+#endif
+
+    // Start display (and timer)
+    glutInit(&argc, argv);
+#ifdef FREEGLUT
+    glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE, GLUT_ACTION_CONTINUE_EXECUTION);
+#endif
+    StartDisplay();
+
+    if (kParams.has_animation) glutTimerFunc(0, decode_callback, 0);
+    glutMainLoop();
+
+    // Should only be reached when using FREEGLUT:
+    ClearParams();
+    return 0;
+
+Error:
+    ClearParams();
+    return -1;
+}
+
+#else // !WEBP_HAVE_GL
+
+int main(int argc, const char* argv[]) {
+    fprintf(stderr, "OpenGL support not enabled in %s.\n", argv[0]);
+    (void)argc;
+    return 0;
+}
+
+#endif
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/webpdec.c b/codec/L2/demos/webpEnc/host/webpdec.c
new file mode 100644
index 0000000000..825e3031e4
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/webpdec.c
@@ -0,0 +1,69 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebP decode.
+
+#include "./webpdec.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "webp/decode.h"
+#include "webp/encode.h"
+#include "./example_util.h"
+#include "./metadata.h"
+
+int ReadWebP(
+    const uint8_t* const data, size_t data_size, WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
+    int ok = 0;
+    VP8StatusCode status = VP8_STATUS_OK;
+    WebPDecoderConfig config;
+    WebPDecBuffer* const output_buffer = &config.output;
+    WebPBitstreamFeatures* const bitstream = &config.input;
+
+    // TODO(jzern): add Exif/XMP/ICC extraction.
+    if (metadata != NULL) {
+        fprintf(stderr, "Warning: metadata extraction from WebP is unsupported.\n");
+    }
+
+    if (!WebPInitDecoderConfig(&config)) {
+        fprintf(stderr, "Library version mismatch!\n");
+        return 0;
+    }
+
+    status = WebPGetFeatures(data, data_size, bitstream);
+    if (status != VP8_STATUS_OK) {
+        ExUtilPrintWebPError("input data", status);
+        return 0;
+    }
+    {
+        const int has_alpha = keep_alpha && bitstream->has_alpha;
+        // TODO(skal): use MODE_YUV(A), depending on the expected
+        // input pic->use_argb. This would save some conversion steps.
+        output_buffer->colorspace = has_alpha ? MODE_RGBA : MODE_RGB;
+
+        status = ExUtilDecodeWebP(data, data_size, 0, &config);
+        if (status == VP8_STATUS_OK) {
+            const uint8_t* const rgba = output_buffer->u.RGBA.rgba;
+            const int stride = output_buffer->u.RGBA.stride;
+            pic->width = output_buffer->width;
+            pic->height = output_buffer->height;
+            ok = has_alpha ? WebPPictureImportRGBA(pic, rgba, stride) : WebPPictureImportRGB(pic, rgba, stride);
+        }
+    }
+
+    if (status != VP8_STATUS_OK) {
+        ExUtilPrintWebPError("input data", status);
+    }
+
+    WebPFreeDecBuffer(output_buffer);
+    return ok;
+}
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/webpdec.h b/codec/L2/demos/webpEnc/host/webpdec.h
new file mode 100644
index 0000000000..d38cb76d30
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/webpdec.h
@@ -0,0 +1,39 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebP decode.
+
+#ifndef WEBP_EXAMPLES_WEBPDEC_H_
+#define WEBP_EXAMPLES_WEBPDEC_H_
+
+#include "webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct Metadata;
+struct WebPPicture;
+
+// Reads a WebP from 'in_file', returning the decoded output in 'pic'.
+// Output is RGBA or YUVA, depending on pic->use_argb value.
+// If 'keep_alpha' is true and the WebP has an alpha channel, the output is RGBA
+// or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
+// Returns true on success.
+int ReadWebP(const uint8_t* const data,
+             size_t data_size,
+             struct WebPPicture* const pic,
+             int keep_alpha,
+             struct Metadata* const metadata);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_EXAMPLES_WEBPDEC_H_
diff --git a/codec/L2/demos/webpEnc/host/webpmux.c b/codec/L2/demos/webpEnc/host/webpmux.c
new file mode 100644
index 0000000000..5beb8389fc
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/webpmux.c
@@ -0,0 +1,1002 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Simple command-line to create a WebP container file and to extract or strip
+//  relevant data from the container file.
+//
+// Authors: Vikas (vikaas.arora@gmail.com),
+//          Urvang (urvang@google.com)
+
+/*  Usage examples:
+
+  Create container WebP file:
+    webpmux -frame anim_1.webp +100+10+10   \
+            -frame anim_2.webp +100+25+25+1 \
+            -frame anim_3.webp +100+50+50+1 \
+            -frame anim_4.webp +100         \
+            -loop 10 -bgcolor 128,255,255,255 \
+            -o out_animation_container.webp
+
+    webpmux -set icc image_profile.icc in.webp -o out_icc_container.webp
+    webpmux -set exif image_metadata.exif in.webp -o out_exif_container.webp
+    webpmux -set xmp image_metadata.xmp in.webp -o out_xmp_container.webp
+
+  Extract relevant data from WebP container file:
+    webpmux -get frgm n in.webp -o out_fragment.webp
+    webpmux -get frame n in.webp -o out_frame.webp
+    webpmux -get icc in.webp -o image_profile.icc
+    webpmux -get exif in.webp -o image_metadata.exif
+    webpmux -get xmp in.webp -o image_metadata.xmp
+
+  Strip data from WebP Container file:
+    webpmux -strip icc in.webp -o out.webp
+    webpmux -strip exif in.webp -o out.webp
+    webpmux -strip xmp in.webp -o out.webp
+
+  Misc:
+    webpmux -info in.webp
+    webpmux [ -h | -help ]
+    webpmux -version
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "webp/decode.h"
+#include "webp/mux.h"
+#include "./example_util.h"
+
+//------------------------------------------------------------------------------
+// Config object to parse command-line arguments.
+
+typedef enum { NIL_ACTION = 0, ACTION_GET, ACTION_SET, ACTION_STRIP, ACTION_INFO, ACTION_HELP } ActionType;
+
+typedef enum { NIL_SUBTYPE = 0, SUBTYPE_ANMF, SUBTYPE_LOOP, SUBTYPE_BGCOLOR } FeatureSubType;
+
+typedef struct {
+    FeatureSubType subtype_;
+    const char* filename_;
+    const char* params_;
+} FeatureArg;
+
+typedef enum {
+    NIL_FEATURE = 0,
+    FEATURE_EXIF,
+    FEATURE_XMP,
+    FEATURE_ICCP,
+    FEATURE_ANMF,
+    FEATURE_FRGM,
+    LAST_FEATURE
+} FeatureType;
+
+static const char* const kFourccList[LAST_FEATURE] = {NULL, "EXIF", "XMP ", "ICCP", "ANMF", "FRGM"};
+
+static const char* const kDescriptions[LAST_FEATURE] = {NULL,          "EXIF metadata",   "XMP metadata",
+                                                        "ICC profile", "Animation frame", "Image fragment"};
+
+typedef struct {
+    FeatureType type_;
+    FeatureArg* args_;
+    int arg_count_;
+} Feature;
+
+typedef struct {
+    ActionType action_type_;
+    const char* input_;
+    const char* output_;
+    Feature feature_;
+} WebPMuxConfig;
+
+//------------------------------------------------------------------------------
+// Helper functions.
+
+static int CountOccurrences(const char* arglist[], int list_length, const char* arg) {
+    int i;
+    int num_occurences = 0;
+
+    for (i = 0; i < list_length; ++i) {
+        if (!strcmp(arglist[i], arg)) {
+            ++num_occurences;
+        }
+    }
+    return num_occurences;
+}
+
+static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
+    "WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA", "WEBP_MUX_MEMORY_ERROR",
+    "WEBP_MUX_NOT_ENOUGH_DATA"};
+
+static const char* ErrorString(WebPMuxError err) {
+    assert(err <= WEBP_MUX_NOT_FOUND && err >= WEBP_MUX_NOT_ENOUGH_DATA);
+    return kErrorMessages[-err];
+}
+
+#define RETURN_IF_ERROR(ERR_MSG)  \
+    if (err != WEBP_MUX_OK) {     \
+        fprintf(stderr, ERR_MSG); \
+        return err;               \
+    }
+
+#define RETURN_IF_ERROR3(ERR_MSG, FORMAT_STR1, FORMAT_STR2) \
+    if (err != WEBP_MUX_OK) {                               \
+        fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2); \
+        return err;                                         \
+    }
+
+#define ERROR_GOTO1(ERR_MSG, LABEL) \
+    do {                            \
+        fprintf(stderr, ERR_MSG);   \
+        ok = 0;                     \
+        goto LABEL;                 \
+    } while (0)
+
+#define ERROR_GOTO2(ERR_MSG, FORMAT_STR, LABEL) \
+    do {                                        \
+        fprintf(stderr, ERR_MSG, FORMAT_STR);   \
+        ok = 0;                                 \
+        goto LABEL;                             \
+    } while (0)
+
+#define ERROR_GOTO3(ERR_MSG, FORMAT_STR1, FORMAT_STR2, LABEL) \
+    do {                                                      \
+        fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2);   \
+        ok = 0;                                               \
+        goto LABEL;                                           \
+    } while (0)
+
+static WebPMuxError DisplayInfo(const WebPMux* mux) {
+    int width, height;
+    uint32_t flag;
+
+    WebPMuxError err = WebPMuxGetCanvasSize(mux, &width, &height);
+    assert(err == WEBP_MUX_OK); // As WebPMuxCreate() was successful earlier.
+    printf("Canvas size: %d x %d\n", width, height);
+
+    err = WebPMuxGetFeatures(mux, &flag);
+    if (flag & FRAGMENTS_FLAG) err = WEBP_MUX_INVALID_ARGUMENT;
+    RETURN_IF_ERROR("Failed to retrieve features\n");
+
+    if (flag == 0) {
+        printf("No features present.\n");
+        return err;
+    }
+
+    // Print the features present.
+    printf("Features present:");
+    if (flag & ANIMATION_FLAG) printf(" animation");
+    if (flag & FRAGMENTS_FLAG) printf(" image fragments");
+    if (flag & ICCP_FLAG) printf(" ICC profile");
+    if (flag & EXIF_FLAG) printf(" EXIF metadata");
+    if (flag & XMP_FLAG) printf(" XMP metadata");
+    if (flag & ALPHA_FLAG) printf(" transparency");
+    printf("\n");
+
+    if ((flag & ANIMATION_FLAG) || (flag & FRAGMENTS_FLAG)) {
+        const int is_anim = !!(flag & ANIMATION_FLAG);
+        const WebPChunkId id = is_anim ? WEBP_CHUNK_ANMF : WEBP_CHUNK_FRGM;
+        const char* const type_str = is_anim ? "frame" : "fragment";
+        int nFrames;
+
+        if (is_anim) {
+            WebPMuxAnimParams params;
+            err = WebPMuxGetAnimationParams(mux, &params);
+            assert(err == WEBP_MUX_OK);
+            printf("Background color : 0x%.8X  Loop Count : %d\n", params.bgcolor, params.loop_count);
+        }
+
+        err = WebPMuxNumChunks(mux, id, &nFrames);
+        assert(err == WEBP_MUX_OK);
+
+        printf("Number of %ss: %d\n", type_str, nFrames);
+        if (nFrames > 0) {
+            int i;
+            printf("No.: width height alpha x_offset y_offset ");
+            if (is_anim) printf("duration   dispose blend ");
+            printf("image_size\n");
+            for (i = 1; i <= nFrames; i++) {
+                WebPMuxFrameInfo frame;
+                err = WebPMuxGetFrame(mux, i, &frame);
+                if (err == WEBP_MUX_OK) {
+                    WebPBitstreamFeatures features;
+                    const VP8StatusCode status =
+                        WebPGetFeatures(frame.bitstream.bytes, frame.bitstream.size, &features);
+                    assert(status == VP8_STATUS_OK); // Checked by WebPMuxCreate().
+                    (void)status;
+                    printf("%3d: %5d %5d %5s %8d %8d ", i, features.width, features.height,
+                           features.has_alpha ? "yes" : "no", frame.x_offset, frame.y_offset);
+                    if (is_anim) {
+                        const char* const dispose =
+                            (frame.dispose_method == WEBP_MUX_DISPOSE_NONE) ? "none" : "background";
+                        const char* const blend = (frame.blend_method == WEBP_MUX_BLEND) ? "yes" : "no";
+                        printf("%8d %10s %5s ", frame.duration, dispose, blend);
+                    }
+                    printf("%10d\n", (int)frame.bitstream.size);
+                }
+                WebPDataClear(&frame.bitstream);
+                RETURN_IF_ERROR3("Failed to retrieve %s#%d\n", type_str, i);
+            }
+        }
+    }
+
+    if (flag & ICCP_FLAG) {
+        WebPData icc_profile;
+        err = WebPMuxGetChunk(mux, "ICCP", &icc_profile);
+        assert(err == WEBP_MUX_OK);
+        printf("Size of the ICC profile data: %d\n", (int)icc_profile.size);
+    }
+
+    if (flag & EXIF_FLAG) {
+        WebPData exif;
+        err = WebPMuxGetChunk(mux, "EXIF", &exif);
+        assert(err == WEBP_MUX_OK);
+        printf("Size of the EXIF metadata: %d\n", (int)exif.size);
+    }
+
+    if (flag & XMP_FLAG) {
+        WebPData xmp;
+        err = WebPMuxGetChunk(mux, "XMP ", &xmp);
+        assert(err == WEBP_MUX_OK);
+        printf("Size of the XMP metadata: %d\n", (int)xmp.size);
+    }
+
+    if ((flag & ALPHA_FLAG) && !(flag & (ANIMATION_FLAG | FRAGMENTS_FLAG))) {
+        WebPMuxFrameInfo image;
+        err = WebPMuxGetFrame(mux, 1, &image);
+        if (err == WEBP_MUX_OK) {
+            printf("Size of the image (with alpha): %d\n", (int)image.bitstream.size);
+        }
+        WebPDataClear(&image.bitstream);
+        RETURN_IF_ERROR("Failed to retrieve the image\n");
+    }
+
+    return WEBP_MUX_OK;
+}
+
+static void PrintHelp(void) {
+    printf("Usage: webpmux -get GET_OPTIONS INPUT -o OUTPUT\n");
+    printf("       webpmux -set SET_OPTIONS INPUT -o OUTPUT\n");
+    printf("       webpmux -strip STRIP_OPTIONS INPUT -o OUTPUT\n");
+    printf(
+        "       webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]"
+        "\n");
+    printf("               [-bgcolor BACKGROUND_COLOR] -o OUTPUT\n");
+    printf("       webpmux -info INPUT\n");
+    printf("       webpmux [-h|-help]\n");
+    printf("       webpmux -version\n");
+
+    printf("\n");
+    printf("GET_OPTIONS:\n");
+    printf(" Extract relevant data:\n");
+    printf("   icc       get ICC profile\n");
+    printf("   exif      get EXIF metadata\n");
+    printf("   xmp       get XMP metadata\n");
+    printf("   frame n   get nth frame\n");
+
+    printf("\n");
+    printf("SET_OPTIONS:\n");
+    printf(" Set color profile/metadata:\n");
+    printf("   icc  file.icc     set ICC profile\n");
+    printf("   exif file.exif    set EXIF metadata\n");
+    printf("   xmp  file.xmp     set XMP metadata\n");
+    printf("   where:    'file.icc' contains the ICC profile to be set,\n");
+    printf("             'file.exif' contains the EXIF metadata to be set\n");
+    printf("             'file.xmp' contains the XMP metadata to be set\n");
+
+    printf("\n");
+    printf("STRIP_OPTIONS:\n");
+    printf(" Strip color profile/metadata:\n");
+    printf("   icc       strip ICC profile\n");
+    printf("   exif      strip EXIF metadata\n");
+    printf("   xmp       strip XMP metadata\n");
+
+    printf("\n");
+    printf("FRAME_OPTIONS(i):\n");
+    printf(" Create animation:\n");
+    printf("   file_i +di+[xi+yi[+mi[bi]]]\n");
+    printf("   where:    'file_i' is the i'th animation frame (WebP format),\n");
+    printf("             'di' is the pause duration before next frame,\n");
+    printf("             'xi','yi' specify the image offset for this frame,\n");
+    printf("             'mi' is the dispose method for this frame (0 or 1),\n");
+    printf(
+        "             'bi' is the blending method for this frame (+b or -b)"
+        "\n");
+
+    printf("\n");
+    printf("LOOP_COUNT:\n");
+    printf(" Number of times to repeat the animation.\n");
+    printf(" Valid range is 0 to 65535 [Default: 0 (infinite)].\n");
+
+    printf("\n");
+    printf("BACKGROUND_COLOR:\n");
+    printf(" Background color of the canvas.\n");
+    printf("  A,R,G,B\n");
+    printf(
+        "  where:    'A', 'R', 'G' and 'B' are integers in the range 0 to 255 "
+        "specifying\n");
+    printf(
+        "            the Alpha, Red, Green and Blue component values "
+        "respectively\n");
+    printf("            [Default: 255,255,255,255]\n");
+
+    printf("\nINPUT & OUTPUT are in WebP format.\n");
+
+    printf("\nNote: The nature of EXIF, XMP and ICC data is not checked");
+    printf(" and is assumed to be\nvalid.\n");
+}
+
+static void WarnAboutOddOffset(const WebPMuxFrameInfo* const info) {
+    if ((info->x_offset | info->y_offset) & 1) {
+        fprintf(stderr,
+                "Warning: odd offsets will be snapped to even values"
+                " (%d, %d) -> (%d, %d)\n",
+                info->x_offset, info->y_offset, info->x_offset & ~1, info->y_offset & ~1);
+    }
+}
+
+static int ReadFileToWebPData(const char* const filename, WebPData* const webp_data) {
+    const uint8_t* data;
+    size_t size;
+    if (!ExUtilReadFile(filename, &data, &size)) return 0;
+    webp_data->bytes = data;
+    webp_data->size = size;
+    return 1;
+}
+
+static int CreateMux(const char* const filename, WebPMux** mux) {
+    WebPData bitstream;
+    assert(mux != NULL);
+    if (!ReadFileToWebPData(filename, &bitstream)) return 0;
+    *mux = WebPMuxCreate(&bitstream, 1);
+    free((void*)bitstream.bytes);
+    if (*mux != NULL) return 1;
+    fprintf(stderr, "Failed to create mux object from file %s.\n", filename);
+    return 0;
+}
+
+static int WriteData(const char* filename, const WebPData* const webpdata) {
+    int ok = 0;
+    FILE* fout = strcmp(filename, "-") ? fopen(filename, "wb") : ExUtilSetBinaryMode(stdout);
+    if (fout == NULL) {
+        fprintf(stderr, "Error opening output WebP file %s!\n", filename);
+        return 0;
+    }
+    if (fwrite(webpdata->bytes, webpdata->size, 1, fout) != 1) {
+        fprintf(stderr, "Error writing file %s!\n", filename);
+    } else {
+        fprintf(stderr, "Saved file %s (%d bytes)\n", filename, (int)webpdata->size);
+        ok = 1;
+    }
+    if (fout != stdout) fclose(fout);
+    return ok;
+}
+
+static int WriteWebP(WebPMux* const mux, const char* filename) {
+    int ok;
+    WebPData webp_data;
+    const WebPMuxError err = WebPMuxAssemble(mux, &webp_data);
+    if (err != WEBP_MUX_OK) {
+        fprintf(stderr, "Error (%s) assembling the WebP file.\n", ErrorString(err));
+        return 0;
+    }
+    ok = WriteData(filename, &webp_data);
+    WebPDataClear(&webp_data);
+    return ok;
+}
+
+static int ParseFrameArgs(const char* args, WebPMuxFrameInfo* const info) {
+    int dispose_method, dummy;
+    char plus_minus, blend_method;
+    const int num_args = sscanf(args, "+%d+%d+%d+%d%c%c+%d", &info->duration, &info->x_offset, &info->y_offset,
+                                &dispose_method, &plus_minus, &blend_method, &dummy);
+    switch (num_args) {
+        case 1:
+            info->x_offset = info->y_offset = 0; // fall through
+        case 3:
+            dispose_method = 0; // fall through
+        case 4:
+            plus_minus = '+';
+            blend_method = 'b'; // fall through
+        case 6:
+            break;
+        case 2:
+        case 5:
+        default:
+            return 0;
+    }
+
+    WarnAboutOddOffset(info);
+
+    // Note: The sanity of the following conversion is checked by
+    // WebPMuxPushFrame().
+    info->dispose_method = (WebPMuxAnimDispose)dispose_method;
+
+    if (blend_method != 'b') return 0;
+    if (plus_minus != '-' && plus_minus != '+') return 0;
+    info->blend_method = (plus_minus == '+') ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
+    return 1;
+}
+
+static int ParseFragmentArgs(const char* args, WebPMuxFrameInfo* const info) {
+    const int ok = (sscanf(args, "+%d+%d", &info->x_offset, &info->y_offset) == 2);
+    if (ok) WarnAboutOddOffset(info);
+    return ok;
+}
+
+static int ParseBgcolorArgs(const char* args, uint32_t* const bgcolor) {
+    uint32_t a, r, g, b;
+    if (sscanf(args, "%u,%u,%u,%u", &a, &r, &g, &b) != 4) return 0;
+    if (a >= 256 || r >= 256 || g >= 256 || b >= 256) return 0;
+    *bgcolor = (a << 24) | (r << 16) | (g << 8) | (b << 0);
+    return 1;
+}
+
+//------------------------------------------------------------------------------
+// Clean-up.
+
+static void DeleteConfig(WebPMuxConfig* config) {
+    if (config != NULL) {
+        free(config->feature_.args_);
+        memset(config, 0, sizeof(*config));
+    }
+}
+
+//------------------------------------------------------------------------------
+// Parsing.
+
+// Basic syntactic checks on the command-line arguments.
+// Returns 1 on valid, 0 otherwise.
+// Also fills up num_feature_args to be number of feature arguments given.
+// (e.g. if there are 4 '-frame's and 1 '-loop', then num_feature_args = 5).
+static int ValidateCommandLine(int argc, const char* argv[], int* num_feature_args) {
+    int num_frame_args;
+    int num_frgm_args;
+    int num_loop_args;
+    int num_bgcolor_args;
+    int ok = 1;
+
+    assert(num_feature_args != NULL);
+    *num_feature_args = 0;
+
+    // Simple checks.
+    if (CountOccurrences(argv, argc, "-get") > 1) {
+        ERROR_GOTO1("ERROR: Multiple '-get' arguments specified.\n", ErrValidate);
+    }
+    if (CountOccurrences(argv, argc, "-set") > 1) {
+        ERROR_GOTO1("ERROR: Multiple '-set' arguments specified.\n", ErrValidate);
+    }
+    if (CountOccurrences(argv, argc, "-strip") > 1) {
+        ERROR_GOTO1("ERROR: Multiple '-strip' arguments specified.\n", ErrValidate);
+    }
+    if (CountOccurrences(argv, argc, "-info") > 1) {
+        ERROR_GOTO1("ERROR: Multiple '-info' arguments specified.\n", ErrValidate);
+    }
+    if (CountOccurrences(argv, argc, "-o") > 1) {
+        ERROR_GOTO1("ERROR: Multiple output files specified.\n", ErrValidate);
+    }
+
+    // Compound checks.
+    num_frame_args = CountOccurrences(argv, argc, "-frame");
+    num_frgm_args = CountOccurrences(argv, argc, "-frgm");
+    num_loop_args = CountOccurrences(argv, argc, "-loop");
+    num_bgcolor_args = CountOccurrences(argv, argc, "-bgcolor");
+
+    if (num_loop_args > 1) {
+        ERROR_GOTO1("ERROR: Multiple loop counts specified.\n", ErrValidate);
+    }
+    if (num_bgcolor_args > 1) {
+        ERROR_GOTO1("ERROR: Multiple background colors specified.\n", ErrValidate);
+    }
+
+    if ((num_frame_args == 0) && (num_loop_args + num_bgcolor_args > 0)) {
+        ERROR_GOTO1(
+            "ERROR: Loop count and background color are relevant only in "
+            "case of animation.\n",
+            ErrValidate);
+    }
+    if (num_frame_args > 0 && num_frgm_args > 0) {
+        ERROR_GOTO1(
+            "ERROR: Only one of frames & fragments can be specified at a "
+            "time.\n",
+            ErrValidate);
+    }
+
+    assert(ok == 1);
+    if (num_frame_args == 0 && num_frgm_args == 0) {
+        // Single argument ('set' action for ICCP/EXIF/XMP, OR a 'get' action).
+        *num_feature_args = 1;
+    } else {
+        // Multiple arguments ('set' action for animation or fragmented image).
+        if (num_frame_args > 0) {
+            *num_feature_args = num_frame_args + num_loop_args + num_bgcolor_args;
+        } else {
+            *num_feature_args = num_frgm_args;
+        }
+    }
+
+ErrValidate:
+    return ok;
+}
+
+#define ACTION_IS_NIL (config->action_type_ == NIL_ACTION)
+
+#define FEATURETYPE_IS_NIL (feature->type_ == NIL_FEATURE)
+
+#define CHECK_NUM_ARGS_LESS(NUM, LABEL)                                   \
+    if (argc < i + (NUM)) {                                               \
+        fprintf(stderr, "ERROR: Too few arguments for '%s'.\n", argv[i]); \
+        goto LABEL;                                                       \
+    }
+
+#define CHECK_NUM_ARGS_NOT_EQUAL(NUM, LABEL)                               \
+    if (argc != i + (NUM)) {                                               \
+        fprintf(stderr, "ERROR: Too many arguments for '%s'.\n", argv[i]); \
+        goto LABEL;                                                        \
+    }
+
+// Parses command-line arguments to fill up config object. Also performs some
+// semantic checks.
+static int ParseCommandLine(int argc, const char* argv[], WebPMuxConfig* config) {
+    int i = 0;
+    int feature_arg_index = 0;
+    int ok = 1;
+
+    while (i < argc) {
+        Feature* const feature = &config->feature_;
+        FeatureArg* const arg = &feature->args_[feature_arg_index];
+        if (argv[i][0] == '-') { // One of the action types or output.
+            if (!strcmp(argv[i], "-set")) {
+                if (ACTION_IS_NIL) {
+                    config->action_type_ = ACTION_SET;
+                } else {
+                    ERROR_GOTO1("ERROR: Multiple actions specified.\n", ErrParse);
+                }
+                ++i;
+            } else if (!strcmp(argv[i], "-get")) {
+                if (ACTION_IS_NIL) {
+                    config->action_type_ = ACTION_GET;
+                } else {
+                    ERROR_GOTO1("ERROR: Multiple actions specified.\n", ErrParse);
+                }
+                ++i;
+            } else if (!strcmp(argv[i], "-strip")) {
+                if (ACTION_IS_NIL) {
+                    config->action_type_ = ACTION_STRIP;
+                    feature->arg_count_ = 0;
+                } else {
+                    ERROR_GOTO1("ERROR: Multiple actions specified.\n", ErrParse);
+                }
+                ++i;
+            } else if (!strcmp(argv[i], "-frame")) {
+                CHECK_NUM_ARGS_LESS(3, ErrParse);
+                if (ACTION_IS_NIL || config->action_type_ == ACTION_SET) {
+                    config->action_type_ = ACTION_SET;
+                } else {
+                    ERROR_GOTO1("ERROR: Multiple actions specified.\n", ErrParse);
+                }
+                if (FEATURETYPE_IS_NIL || feature->type_ == FEATURE_ANMF) {
+                    feature->type_ = FEATURE_ANMF;
+                } else {
+                    ERROR_GOTO1("ERROR: Multiple features specified.\n", ErrParse);
+                }
+                arg->subtype_ = SUBTYPE_ANMF;
+                arg->filename_ = argv[i + 1];
+                arg->params_ = argv[i + 2];
+                ++feature_arg_index;
+                i += 3;
+            } else if (!strcmp(argv[i], "-loop") || !strcmp(argv[i], "-bgcolor")) {
+                CHECK_NUM_ARGS_LESS(2, ErrParse);
+                if (ACTION_IS_NIL || config->action_type_ == ACTION_SET) {
+                    config->action_type_ = ACTION_SET;
+                } else {
+                    ERROR_GOTO1("ERROR: Multiple actions specified.\n", ErrParse);
+                }
+                if (FEATURETYPE_IS_NIL || feature->type_ == FEATURE_ANMF) {
+                    feature->type_ = FEATURE_ANMF;
+                } else {
+                    ERROR_GOTO1("ERROR: Multiple features specified.\n", ErrParse);
+                }
+                arg->subtype_ = !strcmp(argv[i], "-loop") ? SUBTYPE_LOOP : SUBTYPE_BGCOLOR;
+                arg->params_ = argv[i + 1];
+                ++feature_arg_index;
+                i += 2;
+            } else if (!strcmp(argv[i], "-o")) {
+                CHECK_NUM_ARGS_LESS(2, ErrParse);
+                config->output_ = argv[i + 1];
+                i += 2;
+            } else if (!strcmp(argv[i], "-info")) {
+                CHECK_NUM_ARGS_NOT_EQUAL(2, ErrParse);
+                if (config->action_type_ != NIL_ACTION) {
+                    ERROR_GOTO1("ERROR: Multiple actions specified.\n", ErrParse);
+                } else {
+                    config->action_type_ = ACTION_INFO;
+                    feature->arg_count_ = 0;
+                    config->input_ = argv[i + 1];
+                }
+                i += 2;
+            } else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "-help")) {
+                PrintHelp();
+                DeleteConfig(config);
+                exit(0);
+            } else if (!strcmp(argv[i], "-version")) {
+                const int version = WebPGetMuxVersion();
+                printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+                DeleteConfig(config);
+                exit(0);
+            } else if (!strcmp(argv[i], "--")) {
+                if (i < argc - 1) {
+                    ++i;
+                    if (config->input_ == NULL) {
+                        config->input_ = argv[i];
+                    } else {
+                        ERROR_GOTO2("ERROR at '%s': Multiple input files specified.\n", argv[i], ErrParse);
+                    }
+                }
+                break;
+            } else {
+                ERROR_GOTO2("ERROR: Unknown option: '%s'.\n", argv[i], ErrParse);
+            }
+        } else { // One of the feature types or input.
+            if (ACTION_IS_NIL) {
+                ERROR_GOTO1("ERROR: Action must be specified before other arguments.\n", ErrParse);
+            }
+            if (!strcmp(argv[i], "icc") || !strcmp(argv[i], "exif") || !strcmp(argv[i], "xmp")) {
+                if (FEATURETYPE_IS_NIL) {
+                    feature->type_ = (!strcmp(argv[i], "icc"))
+                                         ? FEATURE_ICCP
+                                         : (!strcmp(argv[i], "exif")) ? FEATURE_EXIF : FEATURE_XMP;
+                } else {
+                    ERROR_GOTO1("ERROR: Multiple features specified.\n", ErrParse);
+                }
+                if (config->action_type_ == ACTION_SET) {
+                    CHECK_NUM_ARGS_LESS(2, ErrParse);
+                    arg->filename_ = argv[i + 1];
+                    ++feature_arg_index;
+                    i += 2;
+                } else {
+                    ++i;
+                }
+            } else if (!strcmp(argv[i], "frame") && (config->action_type_ == ACTION_GET)) {
+                CHECK_NUM_ARGS_LESS(2, ErrParse);
+                feature->type_ = (!strcmp(argv[i], "frame")) ? FEATURE_ANMF : FEATURE_FRGM;
+                arg->params_ = argv[i + 1];
+                ++feature_arg_index;
+                i += 2;
+            } else { // Assume input file.
+                if (config->input_ == NULL) {
+                    config->input_ = argv[i];
+                } else {
+                    ERROR_GOTO2("ERROR at '%s': Multiple input files specified.\n", argv[i], ErrParse);
+                }
+                ++i;
+            }
+        }
+    }
+ErrParse:
+    return ok;
+}
+
+// Additional checks after config is filled.
+static int ValidateConfig(WebPMuxConfig* config) {
+    int ok = 1;
+    Feature* const feature = &config->feature_;
+
+    // Action.
+    if (ACTION_IS_NIL) {
+        ERROR_GOTO1("ERROR: No action specified.\n", ErrValidate2);
+    }
+
+    // Feature type.
+    if (FEATURETYPE_IS_NIL && config->action_type_ != ACTION_INFO) {
+        ERROR_GOTO1("ERROR: No feature specified.\n", ErrValidate2);
+    }
+
+    // Input file.
+    if (config->input_ == NULL) {
+        if (config->action_type_ != ACTION_SET) {
+            ERROR_GOTO1("ERROR: No input file specified.\n", ErrValidate2);
+        } else if (feature->type_ != FEATURE_ANMF && feature->type_ != FEATURE_FRGM) {
+            ERROR_GOTO1("ERROR: No input file specified.\n", ErrValidate2);
+        }
+    }
+
+    // Output file.
+    if (config->output_ == NULL && config->action_type_ != ACTION_INFO) {
+        ERROR_GOTO1("ERROR: No output file specified.\n", ErrValidate2);
+    }
+
+ErrValidate2:
+    return ok;
+}
+
+// Create config object from command-line arguments.
+static int InitializeConfig(int argc, const char* argv[], WebPMuxConfig* config) {
+    int num_feature_args = 0;
+    int ok = 1;
+
+    assert(config != NULL);
+    memset(config, 0, sizeof(*config));
+
+    // Validate command-line arguments.
+    if (!ValidateCommandLine(argc, argv, &num_feature_args)) {
+        ERROR_GOTO1("Exiting due to command-line parsing error.\n", Err1);
+    }
+
+    config->feature_.arg_count_ = num_feature_args;
+    config->feature_.args_ = (FeatureArg*)calloc(num_feature_args, sizeof(*config->feature_.args_));
+    if (config->feature_.args_ == NULL) {
+        ERROR_GOTO1("ERROR: Memory allocation error.\n", Err1);
+    }
+
+    // Parse command-line.
+    if (!ParseCommandLine(argc, argv, config) || !ValidateConfig(config)) {
+        ERROR_GOTO1("Exiting due to command-line parsing error.\n", Err1);
+    }
+
+Err1:
+    return ok;
+}
+
+#undef ACTION_IS_NIL
+#undef FEATURETYPE_IS_NIL
+#undef CHECK_NUM_ARGS_LESS
+#undef CHECK_NUM_ARGS_MORE
+
+//------------------------------------------------------------------------------
+// Processing.
+
+static int GetFrameFragment(const WebPMux* mux, const WebPMuxConfig* config, int is_frame) {
+    WebPMuxError err = WEBP_MUX_OK;
+    WebPMux* mux_single = NULL;
+    int num = 0;
+    int ok = 1;
+    int parse_error = 0;
+    const WebPChunkId id = is_frame ? WEBP_CHUNK_ANMF : WEBP_CHUNK_FRGM;
+    WebPMuxFrameInfo info;
+    WebPDataInit(&info.bitstream);
+
+    num = ExUtilGetInt(config->feature_.args_[0].params_, 10, &parse_error);
+    if (num < 0) {
+        ERROR_GOTO1("ERROR: Frame/Fragment index must be non-negative.\n", ErrGet);
+    }
+    if (parse_error) goto ErrGet;
+
+    err = WebPMuxGetFrame(mux, num, &info);
+    if (err == WEBP_MUX_OK && info.id != id) err = WEBP_MUX_NOT_FOUND;
+    if (err != WEBP_MUX_OK) {
+        ERROR_GOTO3("ERROR (%s): Could not get frame %d.\n", ErrorString(err), num, ErrGet);
+    }
+
+    mux_single = WebPMuxNew();
+    if (mux_single == NULL) {
+        err = WEBP_MUX_MEMORY_ERROR;
+        ERROR_GOTO2("ERROR (%s): Could not allocate a mux object.\n", ErrorString(err), ErrGet);
+    }
+    err = WebPMuxSetImage(mux_single, &info.bitstream, 1);
+    if (err != WEBP_MUX_OK) {
+        ERROR_GOTO2("ERROR (%s): Could not create single image mux object.\n", ErrorString(err), ErrGet);
+    }
+
+    ok = WriteWebP(mux_single, config->output_);
+
+ErrGet:
+    WebPDataClear(&info.bitstream);
+    WebPMuxDelete(mux_single);
+    return ok && !parse_error;
+}
+
+// Read and process config.
+static int Process(const WebPMuxConfig* config) {
+    WebPMux* mux = NULL;
+    WebPData chunk;
+    WebPMuxError err = WEBP_MUX_OK;
+    int ok = 1;
+    const Feature* const feature = &config->feature_;
+
+    switch (config->action_type_) {
+        case ACTION_GET: {
+            ok = CreateMux(config->input_, &mux);
+            if (!ok) goto Err2;
+            switch (feature->type_) {
+                case FEATURE_ANMF:
+                case FEATURE_FRGM:
+                    ok = GetFrameFragment(mux, config, (feature->type_ == FEATURE_ANMF) ? 1 : 0);
+                    break;
+
+                case FEATURE_ICCP:
+                case FEATURE_EXIF:
+                case FEATURE_XMP:
+                    err = WebPMuxGetChunk(mux, kFourccList[feature->type_], &chunk);
+                    if (err != WEBP_MUX_OK) {
+                        ERROR_GOTO3("ERROR (%s): Could not get the %s.\n", ErrorString(err),
+                                    kDescriptions[feature->type_], Err2);
+                    }
+                    ok = WriteData(config->output_, &chunk);
+                    break;
+
+                default:
+                    ERROR_GOTO1("ERROR: Invalid feature for action 'get'.\n", Err2);
+                    break;
+            }
+            break;
+        }
+        case ACTION_SET: {
+            switch (feature->type_) {
+                case FEATURE_ANMF: {
+                    int i;
+                    WebPMuxAnimParams params = {0xFFFFFFFF, 0};
+                    mux = WebPMuxNew();
+                    if (mux == NULL) {
+                        ERROR_GOTO2("ERROR (%s): Could not allocate a mux object.\n",
+                                    ErrorString(WEBP_MUX_MEMORY_ERROR), Err2);
+                    }
+                    for (i = 0; i < feature->arg_count_; ++i) {
+                        switch (feature->args_[i].subtype_) {
+                            case SUBTYPE_BGCOLOR: {
+                                uint32_t bgcolor;
+                                ok = ParseBgcolorArgs(feature->args_[i].params_, &bgcolor);
+                                if (!ok) {
+                                    ERROR_GOTO1("ERROR: Could not parse the background color \n", Err2);
+                                }
+                                params.bgcolor = bgcolor;
+                                break;
+                            }
+                            case SUBTYPE_LOOP: {
+                                int parse_error = 0;
+                                const int loop_count = ExUtilGetInt(feature->args_[i].params_, 10, &parse_error);
+                                if (loop_count < 0 || loop_count > 65535) {
+                                    // Note: This is only a 'necessary' condition for loop_count
+                                    // to be valid. The 'sufficient' conditioned in checked in
+                                    // WebPMuxSetAnimationParams() method called later.
+                                    ERROR_GOTO1(
+                                        "ERROR: Loop count must be in the range 0 to "
+                                        "65535.\n",
+                                        Err2);
+                                }
+                                ok = !parse_error;
+                                if (!ok) goto Err2;
+                                params.loop_count = loop_count;
+                                break;
+                            }
+                            case SUBTYPE_ANMF: {
+                                WebPMuxFrameInfo frame;
+                                frame.id = WEBP_CHUNK_ANMF;
+                                ok = ReadFileToWebPData(feature->args_[i].filename_, &frame.bitstream);
+                                if (!ok) goto Err2;
+                                ok = ParseFrameArgs(feature->args_[i].params_, &frame);
+                                if (!ok) {
+                                    WebPDataClear(&frame.bitstream);
+                                    ERROR_GOTO1("ERROR: Could not parse frame properties.\n", Err2);
+                                }
+                                err = WebPMuxPushFrame(mux, &frame, 1);
+                                WebPDataClear(&frame.bitstream);
+                                if (err != WEBP_MUX_OK) {
+                                    ERROR_GOTO3(
+                                        "ERROR (%s): Could not add a frame at index %d."
+                                        "\n",
+                                        ErrorString(err), i, Err2);
+                                }
+                                break;
+                            }
+                            default: {
+                                ERROR_GOTO1("ERROR: Invalid subtype for 'frame'", Err2);
+                                break;
+                            }
+                        }
+                    }
+                    err = WebPMuxSetAnimationParams(mux, &params);
+                    if (err != WEBP_MUX_OK) {
+                        ERROR_GOTO2("ERROR (%s): Could not set animation parameters.\n", ErrorString(err), Err2);
+                    }
+                    break;
+                }
+
+                case FEATURE_FRGM: {
+                    int i;
+                    mux = WebPMuxNew();
+                    if (mux == NULL) {
+                        ERROR_GOTO2("ERROR (%s): Could not allocate a mux object.\n",
+                                    ErrorString(WEBP_MUX_MEMORY_ERROR), Err2);
+                    }
+                    for (i = 0; i < feature->arg_count_; ++i) {
+                        WebPMuxFrameInfo frgm;
+                        frgm.id = WEBP_CHUNK_FRGM;
+                        ok = ReadFileToWebPData(feature->args_[i].filename_, &frgm.bitstream);
+                        if (!ok) goto Err2;
+                        ok = ParseFragmentArgs(feature->args_[i].params_, &frgm);
+                        if (!ok) {
+                            WebPDataClear(&frgm.bitstream);
+                            ERROR_GOTO1("ERROR: Could not parse fragment properties.\n", Err2);
+                        }
+                        err = WebPMuxPushFrame(mux, &frgm, 1);
+                        WebPDataClear(&frgm.bitstream);
+                        if (err != WEBP_MUX_OK) {
+                            ERROR_GOTO3("ERROR (%s): Could not add a fragment at index %d.\n", ErrorString(err), i,
+                                        Err2);
+                        }
+                    }
+                    break;
+                }
+
+                case FEATURE_ICCP:
+                case FEATURE_EXIF:
+                case FEATURE_XMP: {
+                    ok = CreateMux(config->input_, &mux);
+                    if (!ok) goto Err2;
+                    ok = ReadFileToWebPData(feature->args_[0].filename_, &chunk);
+                    if (!ok) goto Err2;
+                    err = WebPMuxSetChunk(mux, kFourccList[feature->type_], &chunk, 1);
+                    free((void*)chunk.bytes);
+                    if (err != WEBP_MUX_OK) {
+                        ERROR_GOTO3("ERROR (%s): Could not set the %s.\n", ErrorString(err),
+                                    kDescriptions[feature->type_], Err2);
+                    }
+                    break;
+                }
+                default: {
+                    ERROR_GOTO1("ERROR: Invalid feature for action 'set'.\n", Err2);
+                    break;
+                }
+            }
+            ok = WriteWebP(mux, config->output_);
+            break;
+        }
+        case ACTION_STRIP: {
+            ok = CreateMux(config->input_, &mux);
+            if (!ok) goto Err2;
+            if (feature->type_ == FEATURE_ICCP || feature->type_ == FEATURE_EXIF || feature->type_ == FEATURE_XMP) {
+                err = WebPMuxDeleteChunk(mux, kFourccList[feature->type_]);
+                if (err != WEBP_MUX_OK) {
+                    ERROR_GOTO3("ERROR (%s): Could not strip the %s.\n", ErrorString(err),
+                                kDescriptions[feature->type_], Err2);
+                }
+            } else {
+                ERROR_GOTO1("ERROR: Invalid feature for action 'strip'.\n", Err2);
+                break;
+            }
+            ok = WriteWebP(mux, config->output_);
+            break;
+        }
+        case ACTION_INFO: {
+            ok = CreateMux(config->input_, &mux);
+            if (!ok) goto Err2;
+            ok = (DisplayInfo(mux) == WEBP_MUX_OK);
+            break;
+        }
+        default: {
+            assert(0); // Invalid action.
+            break;
+        }
+    }
+
+Err2:
+    WebPMuxDelete(mux);
+    return ok;
+}
+
+//------------------------------------------------------------------------------
+// Main.
+
+int main(int argc, const char* argv[]) {
+    WebPMuxConfig config;
+    int ok = InitializeConfig(argc - 1, argv + 1, &config);
+    if (ok) {
+        ok = Process(&config);
+    } else {
+        PrintHelp();
+    }
+    DeleteConfig(&config);
+    return !ok;
+}
+
+//------------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/wicdec.c b/codec/L2/demos/webpEnc/host/wicdec.c
new file mode 100644
index 0000000000..8bedd814a0
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/wicdec.c
@@ -0,0 +1,353 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Windows Imaging Component (WIC) decode.
+
+#include "./wicdec.h"
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifdef HAVE_WINCODEC_H
+#ifdef __MINGW32__
+#define INITGUID // Without this GUIDs are declared extern and fail to link
+#endif
+#define CINTERFACE
+#define COBJMACROS
+#define _WIN32_IE 0x500 // Workaround bug in shlwapi.h when compiling C++
+                        // code with COBJMACROS.
+#include <ole2.h>       // CreateStreamOnHGlobal()
+#include <shlwapi.h>
+#include <windows.h>
+#include <wincodec.h>
+
+#include "webp/encode.h"
+#include "./example_util.h"
+#include "./metadata.h"
+
+#define IFS(fn)                                                         \
+    do {                                                                \
+        if (SUCCEEDED(hr)) {                                            \
+            hr = (fn);                                                  \
+            if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr); \
+        }                                                               \
+    } while (0)
+
+// modified version of DEFINE_GUID from guiddef.h.
+#define WEBP_DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
+    static const GUID name = {l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
+
+#ifdef __cplusplus
+#define MAKE_REFGUID(x) (x)
+#else
+#define MAKE_REFGUID(x) &(x)
+#endif
+
+typedef struct WICFormatImporter {
+    const GUID* pixel_format;
+    int bytes_per_pixel;
+    int (*import)(WebPPicture* const, const uint8_t* const, int);
+} WICFormatImporter;
+
+// From Microsoft SDK 7.0a -- wincodec.h
+// Create local copies for compatibility when building against earlier
+// versions of the SDK.
+WEBP_DEFINE_GUID(
+    GUID_WICPixelFormat24bppBGR_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0c);
+WEBP_DEFINE_GUID(
+    GUID_WICPixelFormat24bppRGB_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0d);
+WEBP_DEFINE_GUID(
+    GUID_WICPixelFormat32bppBGRA_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0f);
+WEBP_DEFINE_GUID(
+    GUID_WICPixelFormat32bppRGBA_, 0xf5c7ad2d, 0x6a8d, 0x43dd, 0xa7, 0xa8, 0xa2, 0x99, 0x35, 0x26, 0x1a, 0xe9);
+WEBP_DEFINE_GUID(
+    GUID_WICPixelFormat64bppBGRA_, 0x1562ff7c, 0xd352, 0x46f9, 0x97, 0x9e, 0x42, 0x97, 0x6b, 0x79, 0x22, 0x46);
+WEBP_DEFINE_GUID(
+    GUID_WICPixelFormat64bppRGBA_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x16);
+
+static HRESULT OpenInputStream(const char* filename, IStream** stream) {
+    HRESULT hr = S_OK;
+    if (!strcmp(filename, "-")) {
+        const uint8_t* data = NULL;
+        size_t data_size = 0;
+        const int ok = ExUtilReadFile(filename, &data, &data_size);
+        if (ok) {
+            HGLOBAL image = GlobalAlloc(GMEM_MOVEABLE, data_size);
+            if (image != NULL) {
+                void* const image_mem = GlobalLock(image);
+                if (image_mem != NULL) {
+                    memcpy(image_mem, data, data_size);
+                    GlobalUnlock(image);
+                    IFS(CreateStreamOnHGlobal(image, TRUE, stream));
+                } else {
+                    hr = E_FAIL;
+                }
+            } else {
+                hr = E_OUTOFMEMORY;
+            }
+            free((void*)data);
+        } else {
+            hr = E_FAIL;
+        }
+    } else {
+        IFS(SHCreateStreamOnFileA(filename, STGM_READ, stream));
+    }
+
+    if (FAILED(hr)) {
+        fprintf(stderr, "Error opening input file %s (%08lx)\n", filename, hr);
+    }
+    return hr;
+}
+
+// -----------------------------------------------------------------------------
+// Metadata processing
+
+// Stores the first non-zero sized color profile from 'frame' to 'iccp'.
+// Returns an HRESULT to indicate success or failure. The caller is responsible
+// for freeing 'iccp->bytes' in either case.
+static HRESULT ExtractICCP(IWICImagingFactory* const factory,
+                           IWICBitmapFrameDecode* const frame,
+                           MetadataPayload* const iccp) {
+    HRESULT hr = S_OK;
+    UINT i, count;
+    IWICColorContext** color_contexts;
+
+    IFS(IWICBitmapFrameDecode_GetColorContexts(frame, 0, NULL, &count));
+    if (FAILED(hr) || count == 0) return hr;
+
+    color_contexts = (IWICColorContext**)calloc(count, sizeof(*color_contexts));
+    if (color_contexts == NULL) return E_OUTOFMEMORY;
+    for (i = 0; SUCCEEDED(hr) && i < count; ++i) {
+        IFS(IWICImagingFactory_CreateColorContext(factory, &color_contexts[i]));
+    }
+
+    if (SUCCEEDED(hr)) {
+        UINT num_color_contexts;
+        IFS(IWICBitmapFrameDecode_GetColorContexts(frame, count, color_contexts, &num_color_contexts));
+        assert(FAILED(hr) || num_color_contexts <= count);
+        for (i = 0; SUCCEEDED(hr) && i < num_color_contexts; ++i) {
+            WICColorContextType type;
+            IFS(IWICColorContext_GetType(color_contexts[i], &type));
+            if (SUCCEEDED(hr) && type == WICColorContextProfile) {
+                UINT size;
+                IFS(IWICColorContext_GetProfileBytes(color_contexts[i], 0, NULL, &size));
+                if (SUCCEEDED(hr) && size > 0) {
+                    iccp->bytes = (uint8_t*)malloc(size);
+                    if (iccp->bytes == NULL) {
+                        hr = E_OUTOFMEMORY;
+                        break;
+                    }
+                    iccp->size = size;
+                    IFS(IWICColorContext_GetProfileBytes(color_contexts[i], (UINT)iccp->size, iccp->bytes, &size));
+                    if (SUCCEEDED(hr) && size != iccp->size) {
+                        fprintf(stderr, "Warning! ICC profile size (%u) != expected (%u)\n", size,
+                                (uint32_t)iccp->size);
+                        iccp->size = size;
+                    }
+                    break;
+                }
+            }
+        }
+    }
+    for (i = 0; i < count; ++i) {
+        if (color_contexts[i] != NULL) IUnknown_Release(color_contexts[i]);
+    }
+    free(color_contexts);
+    return hr;
+}
+
+static HRESULT ExtractMetadata(IWICImagingFactory* const factory,
+                               IWICBitmapFrameDecode* const frame,
+                               Metadata* const metadata) {
+    // TODO(jzern): add XMP/EXIF extraction.
+    const HRESULT hr = ExtractICCP(factory, frame, &metadata->iccp);
+    if (FAILED(hr)) MetadataFree(metadata);
+    return hr;
+}
+
+// -----------------------------------------------------------------------------
+
+static int HasPalette(GUID pixel_format) {
+    return (IsEqualGUID(MAKE_REFGUID(pixel_format), MAKE_REFGUID(GUID_WICPixelFormat1bppIndexed)) ||
+            IsEqualGUID(MAKE_REFGUID(pixel_format), MAKE_REFGUID(GUID_WICPixelFormat2bppIndexed)) ||
+            IsEqualGUID(MAKE_REFGUID(pixel_format), MAKE_REFGUID(GUID_WICPixelFormat4bppIndexed)) ||
+            IsEqualGUID(MAKE_REFGUID(pixel_format), MAKE_REFGUID(GUID_WICPixelFormat8bppIndexed)));
+}
+
+static int HasAlpha(IWICImagingFactory* const factory,
+                    IWICBitmapDecoder* const decoder,
+                    IWICBitmapFrameDecode* const frame,
+                    GUID pixel_format) {
+    int has_alpha;
+    if (HasPalette(pixel_format)) {
+        IWICPalette* frame_palette = NULL;
+        IWICPalette* global_palette = NULL;
+        BOOL frame_palette_has_alpha = FALSE;
+        BOOL global_palette_has_alpha = FALSE;
+
+        // A palette may exist at the frame or container level,
+        // check IWICPalette::HasAlpha() for both if present.
+        if (SUCCEEDED(IWICImagingFactory_CreatePalette(factory, &frame_palette)) &&
+            SUCCEEDED(IWICBitmapFrameDecode_CopyPalette(frame, frame_palette))) {
+            IWICPalette_HasAlpha(frame_palette, &frame_palette_has_alpha);
+        }
+        if (SUCCEEDED(IWICImagingFactory_CreatePalette(factory, &global_palette)) &&
+            SUCCEEDED(IWICBitmapDecoder_CopyPalette(decoder, global_palette))) {
+            IWICPalette_HasAlpha(global_palette, &global_palette_has_alpha);
+        }
+        has_alpha = frame_palette_has_alpha || global_palette_has_alpha;
+
+        if (frame_palette != NULL) IUnknown_Release(frame_palette);
+        if (global_palette != NULL) IUnknown_Release(global_palette);
+    } else {
+        has_alpha = IsEqualGUID(MAKE_REFGUID(pixel_format), MAKE_REFGUID(GUID_WICPixelFormat32bppRGBA_)) ||
+                    IsEqualGUID(MAKE_REFGUID(pixel_format), MAKE_REFGUID(GUID_WICPixelFormat32bppBGRA_)) ||
+                    IsEqualGUID(MAKE_REFGUID(pixel_format), MAKE_REFGUID(GUID_WICPixelFormat64bppRGBA_)) ||
+                    IsEqualGUID(MAKE_REFGUID(pixel_format), MAKE_REFGUID(GUID_WICPixelFormat64bppBGRA_));
+    }
+    return has_alpha;
+}
+
+int ReadPictureWithWIC(const char* const filename, WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
+    // From Microsoft SDK 6.0a -- ks.h
+    // Define a local copy to avoid link errors under mingw.
+    WEBP_DEFINE_GUID(GUID_NULL_, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    static const WICFormatImporter kAlphaFormatImporters[] = {
+        {&GUID_WICPixelFormat32bppBGRA_, 4, WebPPictureImportBGRA},
+        {&GUID_WICPixelFormat32bppRGBA_, 4, WebPPictureImportRGBA},
+        {NULL, 0, NULL},
+    };
+    static const WICFormatImporter kNonAlphaFormatImporters[] = {
+        {&GUID_WICPixelFormat24bppBGR_, 3, WebPPictureImportBGR},
+        {&GUID_WICPixelFormat24bppRGB_, 3, WebPPictureImportRGB},
+        {NULL, 0, NULL},
+    };
+    HRESULT hr = S_OK;
+    IWICBitmapFrameDecode* frame = NULL;
+    IWICFormatConverter* converter = NULL;
+    IWICImagingFactory* factory = NULL;
+    IWICBitmapDecoder* decoder = NULL;
+    IStream* stream = NULL;
+    UINT frame_count = 0;
+    UINT width = 0, height = 0;
+    BYTE* rgb = NULL;
+    WICPixelFormatGUID src_pixel_format = GUID_WICPixelFormatUndefined;
+    const WICFormatImporter* importer = NULL;
+    GUID src_container_format = GUID_NULL_;
+    static const GUID* kAlphaContainers[] = {&GUID_ContainerFormatBmp, &GUID_ContainerFormatPng,
+                                             &GUID_ContainerFormatTiff, NULL};
+    int has_alpha = 0;
+    int stride;
+
+    IFS(CoInitialize(NULL));
+    IFS(CoCreateInstance(MAKE_REFGUID(CLSID_WICImagingFactory), NULL, CLSCTX_INPROC_SERVER,
+                         MAKE_REFGUID(IID_IWICImagingFactory), (LPVOID*)&factory));
+    if (hr == REGDB_E_CLASSNOTREG) {
+        fprintf(stderr,
+                "Couldn't access Windows Imaging Component (are you running "
+                "Windows XP SP3 or newer?). Most formats not available. "
+                "Use -s for the available YUV input.\n");
+    }
+    // Prepare for image decoding.
+    IFS(OpenInputStream(filename, &stream));
+    IFS(IWICImagingFactory_CreateDecoderFromStream(factory, stream, NULL, WICDecodeMetadataCacheOnDemand, &decoder));
+    IFS(IWICBitmapDecoder_GetFrameCount(decoder, &frame_count));
+    if (SUCCEEDED(hr) && frame_count == 0) {
+        fprintf(stderr, "No frame found in input file.\n");
+        hr = E_FAIL;
+    }
+    IFS(IWICBitmapDecoder_GetFrame(decoder, 0, &frame));
+    IFS(IWICBitmapFrameDecode_GetPixelFormat(frame, &src_pixel_format));
+    IFS(IWICBitmapDecoder_GetContainerFormat(decoder, &src_container_format));
+
+    if (SUCCEEDED(hr) && keep_alpha) {
+        const GUID** guid;
+        for (guid = kAlphaContainers; *guid != NULL; ++guid) {
+            if (IsEqualGUID(MAKE_REFGUID(src_container_format), MAKE_REFGUID(**guid))) {
+                has_alpha = HasAlpha(factory, decoder, frame, src_pixel_format);
+                break;
+            }
+        }
+    }
+
+    // Prepare for pixel format conversion (if necessary).
+    IFS(IWICImagingFactory_CreateFormatConverter(factory, &converter));
+
+    for (importer = has_alpha ? kAlphaFormatImporters : kNonAlphaFormatImporters;
+         hr == S_OK && importer->import != NULL; ++importer) {
+        BOOL can_convert;
+        const HRESULT cchr = IWICFormatConverter_CanConvert(converter, MAKE_REFGUID(src_pixel_format),
+                                                            MAKE_REFGUID(*importer->pixel_format), &can_convert);
+        if (SUCCEEDED(cchr) && can_convert) break;
+    }
+    if (importer->import == NULL) hr = E_FAIL;
+
+    IFS(IWICFormatConverter_Initialize(converter, (IWICBitmapSource*)frame, importer->pixel_format,
+                                       WICBitmapDitherTypeNone, NULL, 0.0, WICBitmapPaletteTypeCustom));
+
+    // Decode.
+    IFS(IWICFormatConverter_GetSize(converter, &width, &height));
+    stride = importer->bytes_per_pixel * width * sizeof(*rgb);
+    if (SUCCEEDED(hr)) {
+        rgb = (BYTE*)malloc(stride * height);
+        if (rgb == NULL) hr = E_OUTOFMEMORY;
+    }
+    IFS(IWICFormatConverter_CopyPixels(converter, NULL, stride, stride * height, rgb));
+
+    // WebP conversion.
+    if (SUCCEEDED(hr)) {
+        int ok;
+        pic->width = width;
+        pic->height = height;
+        pic->use_argb = 1; // For WIC, we always force to argb
+        ok = importer->import(pic, rgb, stride);
+        if (!ok) hr = E_FAIL;
+    }
+    if (SUCCEEDED(hr)) {
+        if (metadata != NULL) {
+            hr = ExtractMetadata(factory, frame, metadata);
+            if (FAILED(hr)) {
+                fprintf(stderr, "Error extracting image metadata using WIC!\n");
+            }
+        }
+    }
+
+    // Cleanup.
+    if (converter != NULL) IUnknown_Release(converter);
+    if (frame != NULL) IUnknown_Release(frame);
+    if (decoder != NULL) IUnknown_Release(decoder);
+    if (factory != NULL) IUnknown_Release(factory);
+    if (stream != NULL) IUnknown_Release(stream);
+    free(rgb);
+    return SUCCEEDED(hr);
+}
+#else  // !HAVE_WINCODEC_H
+int ReadPictureWithWIC(const char* const filename,
+                       struct WebPPicture* const pic,
+                       int keep_alpha,
+                       struct Metadata* const metadata) {
+    (void)filename;
+    (void)pic;
+    (void)keep_alpha;
+    (void)metadata;
+    fprintf(stderr,
+            "Windows Imaging Component (WIC) support not compiled. "
+            "Visual Studio and mingw-w64 builds support WIC. Make sure "
+            "wincodec.h detection is working correctly if using autoconf "
+            "and HAVE_WINCODEC_H is defined before building.\n");
+    return 0;
+}
+#endif // HAVE_WINCODEC_H
+
+// -----------------------------------------------------------------------------
diff --git a/codec/L2/demos/webpEnc/host/wicdec.h b/codec/L2/demos/webpEnc/host/wicdec.h
new file mode 100644
index 0000000000..45abb7614d
--- /dev/null
+++ b/codec/L2/demos/webpEnc/host/wicdec.h
@@ -0,0 +1,35 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Windows Imaging Component (WIC) decode.
+
+#ifndef WEBP_EXAMPLES_WICDEC_H_
+#define WEBP_EXAMPLES_WICDEC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct Metadata;
+struct WebPPicture;
+
+// Reads an image from 'filename', returning the decoded output in 'pic'.
+// If 'keep_alpha' is true and the image has an alpha channel, the output is
+// RGBA otherwise it will be RGB. pic->use_argb is always forced to true.
+// Returns true on success.
+int ReadPictureWithWIC(const char* const filename,
+                       struct WebPPicture* const pic,
+                       int keep_alpha,
+                       struct Metadata* const metadata);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_EXAMPLES_WICDEC_H_
diff --git a/codec/L2/demos/webpEnc/images/small32x32.png b/codec/L2/demos/webpEnc/images/small32x32.png
new file mode 100644
index 0000000000..e50f46a988
Binary files /dev/null and b/codec/L2/demos/webpEnc/images/small32x32.png differ
diff --git a/codec/L2/demos/webpEnc/kernel/vp8_hls_ac.cpp b/codec/L2/demos/webpEnc/kernel/vp8_hls_ac.cpp
new file mode 100644
index 0000000000..6faef2e5b6
--- /dev/null
+++ b/codec/L2/demos/webpEnc/kernel/vp8_hls_ac.cpp
@@ -0,0 +1,2242 @@
+/**********
+           Copyright (c) 2017, Xilinx, Inc.
+           All rights reserved.
+           Redistribution and use in source and binary forms, with or without modification,
+           are permitted provided that the following conditions are met:
+           1. Redistributions of source code must retain the above copyright notice,
+           this list of conditions and the following disclaimer.
+           2. Redistributions in binary form must reproduce the above copyright notice,
+           this list of conditions and the following disclaimer in the documentation
+           and/or other materials provided with the distribution.
+           3. Neither the name of the copyright holder nor the names of its contributors
+           may be used to endorse or promote products derived from this software
+           without specific prior written permission.
+           THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+           ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+           THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+           IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+           INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+           PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+           HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+           OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+           EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********/
+
+#include <ap_int.h>
+#include <hls_stream.h>
+#include "vp8_hls_syn.h"
+#include "vp8_hls_syn2.h"
+#include <stdio.h>
+#include <string.h>
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//==================================kernel_2_ArithmeticCoding===========================================//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+// kernel_2_ArithmeticCoding
+//|-memcpy
+//|-Kernel2_top_read
+//|-kernel_2_RecordTokens_pre
+//|-kernel_2_CreateTokens_with_isFinal
+//|-VP8EmitTokens_str_hls_4stages
+//|-PackStr2Mem32_t_NoLast
+//|-PackWideStr2Mem32_t_NoLast
+
+void kernel_2_ArithmeticCoding(uint32_t pin_level[SIZE32_MEM_BW],
+                               uint8_t* pin_prob, // 2048 instead of [4 * 8 * 3 * 11],
+                               uint32_t pout_bw[SIZE32_MEM_BW],
+                               uint32_t pout_ret[SIZE32_MEM_RET],
+                               uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem3 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem4 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+    uint8_t prob[4 * 8 * 3 * 11];
+    memcpy(prob, pin_prob, sizeof(prob));
+
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc;
+#pragma HLS STREAM variable = str_level_dc depth = 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_ac;
+#pragma HLS STREAM variable = str_level_ac depth = 8 * 16
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv;
+#pragma HLS STREAM variable = str_level_uv depth = 8 * 8
+    hls::stream<ap_uint<64> > str_pred;
+#pragma HLS STREAM variable = str_pred depth = 64
+    hls::stream<ap_uint<6> > str_ret;
+#pragma HLS STREAM variable = str_ret depth = 64
+    hls::stream<ap_uint<1> > str_type_mb;
+#pragma HLS STREAM variable = str_type_mb depth = 64
+    hls::stream<uint16_t> str_mb_h;
+#pragma HLS STREAM variable = str_mb_h depth = 64
+    hls::stream<uint16_t> str_mb_w;
+#pragma HLS STREAM variable = str_mb_w depth = 64
+    Kernel2_top_read(pin_level,
+
+                     str_level_dc, str_level_ac, str_level_uv, str_pred, str_ret, str_type_mb, str_mb_h, str_mb_w);
+    hls::stream<ap_uint<64> > str_0_dc;
+#pragma HLS STREAM variable = str_0_dc depth = 64
+    hls::stream<ap_uint<64> > str_1_dc;
+#pragma HLS STREAM variable = str_1_dc depth = 64
+    hls::stream<ap_uint<64> > str_2_dc;
+#pragma HLS STREAM variable = str_2_dc depth = 64
+    hls::stream<ap_uint<64> > str_3_dc;
+#pragma HLS STREAM variable = str_3_dc depth = 64
+    hls::stream<ap_uint<64> > str_0_ac;
+#pragma HLS STREAM variable = str_0_ac depth = 64
+    hls::stream<ap_uint<64> > str_1_ac;
+#pragma HLS STREAM variable = str_1_ac depth = 64
+    hls::stream<ap_uint<64> > str_2_ac;
+#pragma HLS STREAM variable = str_2_ac depth = 64
+    hls::stream<ap_uint<64> > str_3_ac;
+#pragma HLS STREAM variable = str_3_ac depth = 64
+    hls::stream<ap_uint<64> > str_0_uv;
+#pragma HLS STREAM variable = str_0_uv depth = 64
+    hls::stream<ap_uint<64> > str_1_uv;
+#pragma HLS STREAM variable = str_1_uv depth = 64
+    hls::stream<ap_uint<64> > str_2_uv;
+#pragma HLS STREAM variable = str_2_uv depth = 64
+    hls::stream<ap_uint<64> > str_3_uv;
+#pragma HLS STREAM variable = str_3_uv depth = 64
+    hls::stream<ap_uint<1> > str_type_mb_out;
+#pragma HLS STREAM variable = str_type_mb_out depth = 64
+
+    hls::stream<uint16_t> str_mb_h_out;
+#pragma HLS STREAM variable = str_mb_h_out depth = 64
+    hls::stream<uint16_t> str_mb_w_out;
+#pragma HLS STREAM variable = str_mb_w_out depth = 64
+    kernel_2_RecordTokens_pre(str_mb_h, str_mb_w, str_type_mb, str_level_dc, str_level_ac, str_level_uv, str_0_dc,
+                              str_1_dc, str_2_dc, str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv,
+                              str_2_uv, str_3_uv, str_mb_h_out, str_mb_w_out, str_type_mb_out);
+
+    hls::stream<uint16_t> str_mb_h_out2;
+#pragma HLS STREAM variable = str_mb_h_out2 depth = 64
+    hls::stream<uint16_t> str_mb_w_out2;
+#pragma HLS STREAM variable = str_mb_w_out2 depth = 64
+    hls::stream<ap_uint<16> > tokens_str_final;
+#pragma HLS STREAM variable = tokens_str_final depth = 1024
+    kernel_2_CreateTokens_with_isFinal(str_mb_h_out, str_mb_w_out, str_type_mb_out, str_0_dc, str_1_dc, str_2_dc,
+                                       str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv, str_2_uv,
+                                       str_3_uv, str_mb_h_out2, str_mb_w_out2, tokens_str_final);
+
+    uint16_t mb_h = str_mb_h_out2.read();
+    uint16_t mb_w = str_mb_w_out2.read();
+    VP8EmitTokens_str_hls_4stages(pout_bw, tokens_str_final,
+                                  (uint8_t*)prob); // VP8EmitTokens_hls(pout_bw, &tokens, (uint8_t*)prob);
+    PackStr2Mem32_t_NoLast<6, 256>(pout_ret, str_ret, mb_h * mb_w);
+    PackWideStr2Mem32_t_NoLast<64, 256>(pout_pred, str_pred, mb_h * mb_w);
+}
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+void Kernel2_top_read(uint32_t pin_level[SIZE32_MEM_LEVEL],
+                      // output
+                      hls::stream<ap_int<WD_LEVEL * 16> >& str_level_dc,
+                      hls::stream<ap_int<WD_LEVEL * 16> >& str_level_ac,
+                      hls::stream<ap_int<WD_LEVEL * 16> >& str_level_uv,
+                      hls::stream<ap_uint<64> >& str_pred,
+                      hls::stream<ap_uint<6> >& str_ret,
+                      hls::stream<ap_uint<1> >& str_type_mb,
+                      hls::stream<uint16_t>& str_mb_h,
+                      hls::stream<uint16_t>& str_mb_w) {
+    //#pragma HLS INTERFACE m_axi port=pin_level    offset=slave bundle=gmem0 depth=65536*512/2
+    // num_read_outstanding=32 num_write_outstanding=32 max_read_burst_length=16 max_write_burst_length=16
+    //#pragma HLS INTERFACE s_axilite port=pin_level bundle=control
+    //#pragma HLS INTERFACE s_axilite port=return bundle=control
+    uint16_t y_mb = 0;
+    uint16_t x_mb = 0;
+    uint16_t mb_h = 1;
+    uint16_t mb_w = 1;
+    uint32_t tmp_arr[256];
+    uint32_t* psrc = pin_level;
+    uint32_t num_mb = 0;
+READ_ARRAY_TO_STR:
+    do {
+#pragma HLS LOOP_TRIPCOUNT min = 120 * 68 max = 256 * 256
+#pragma HLS PIPELINE
+        memcpy(tmp_arr, psrc, 256 * sizeof(uint32_t));
+        psrc += 256;
+        if (num_mb == 0) {
+            mb_h = tmp_arr[420 / 2] >> 16;
+            mb_w = tmp_arr[420 / 2] & 0xffff;
+            str_mb_h.write(mb_h);
+            str_mb_w.write(mb_w);
+        }
+        Kernel2_read__array_to_str(tmp_arr, str_level_dc, str_level_ac, str_level_uv, str_pred, str_ret, str_type_mb);
+        if (x_mb != (mb_w - 1))
+            x_mb++;
+        else {
+            x_mb = 0;
+            y_mb++;
+        }
+        num_mb++;
+    } while (y_mb != (mb_h) || x_mb != 0);
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+void Kernel2_read__array_to_str(uint32_t pin[256],
+                                hls::stream<ap_int<WD_LEVEL * 16> >& str_level_dc,
+                                hls::stream<ap_int<WD_LEVEL * 16> >& str_level_ac,
+                                hls::stream<ap_int<WD_LEVEL * 16> >& str_level_uv,
+                                hls::stream<ap_uint<64> >& str_pred,
+                                hls::stream<ap_uint<6> >& str_ret,
+                                hls::stream<ap_uint<1> >& str_type_mb) {
+#pragma HLS PIPELINE
+    uint32_t* plevel = pin;
+    int x, y, ch;
+    ap_int<WD_LEVEL* 16> tmp = SetVectFrom32bit(plevel);
+    str_level_dc.write(tmp);
+    plevel += 16 / 2;
+
+    // luma-AC
+    for (y = 0; y < 4; ++y) {
+    READ_ARRAY_16:
+        for (x = 0; x < 4; ++x) {
+#pragma HLS PIPELINE
+            ap_int<WD_LEVEL* 16> tmp = SetVectFrom32bit(plevel);
+            str_level_ac.write(tmp);
+            int16_t test[16];
+            CPY16(test, tmp, WD_LEVEL);
+            plevel += 16 / 2;
+        }
+    }
+
+    // U/V
+    for (ch = 0; ch <= 2; ch += 2) {
+        for (y = 0; y < 2; ++y) {
+            for (x = 0; x < 2; ++x) {
+#pragma HLS PIPELINE
+                ap_int<WD_LEVEL* 16> tmp = SetVectFrom32bit(plevel);
+                str_level_uv.write(tmp);
+                plevel += 16 / 2;
+            }
+        }
+    }
+
+    ap_uint<64> vct_pred = SetVect64From32bit(pin + 200);
+    str_pred.write(vct_pred);
+    ap_uint<6> ret = (ap_uint<6>)pin[416 / 2];
+    str_ret.write(ret);
+    str_type_mb.write(ret(4, 4));
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_int<WD_LEVEL * 16> SetVectFrom32bit(uint32_t* pin) {
+#pragma HLS INLINE
+    ap_int<WD_LEVEL * 16> ret;
+    for (int i = 0; i < 8; i++) {
+#pragma HLS PIPELINE
+        ap_int<32> tmp32 = pin[i];
+        ap_int<WD_LEVEL> tmp_l = tmp32(WD_LEVEL - 1, 0);
+        ap_int<WD_LEVEL> tmp_h = tmp32(WD_LEVEL - 1 + 16, 16);
+        ret((i * 2 + 1) * WD_LEVEL - 1, (i * 2 + 0) * WD_LEVEL) = tmp_l(WD_LEVEL - 1, 0);
+        ret((i * 2 + 2) * WD_LEVEL - 1, (i * 2 + 1) * WD_LEVEL) = tmp_h(WD_LEVEL - 1, 0);
+    }
+    return ret;
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<4 * 16> SetVect64From32bit(uint32_t* pin) {
+#pragma HLS INLINE
+    ap_uint<4 * 16> ret;
+    for (int i = 0; i < 8; i++) {
+#pragma HLS PIPELINE
+        ap_uint<32> tmp32 = pin[i];
+        ap_uint<4> tmp_l = tmp32(4 - 1, 0);
+        ap_uint<4> tmp_h = tmp32(4 - 1 + 16, 16);
+        ret((i * 2 + 1) * 4 - 1, i * 2 * 4) = tmp_l(4 - 1, 0);
+        ret((i * 2 + 2) * 4 - 1, (i * 2 + 1) * 4) = tmp_h(4 - 1, 0);
+    }
+    return ret;
+}
+
+static ap_int<5> FindLast(ap_int<WD_LEVEL * 16> level) {
+#pragma HLS PIPELINE II = 1
+    ap_int<5> ret = 15;
+FIND_LAST:
+    for (ret = 15; ret > -1; ret--) {
+        ap_int<WD_LEVEL> tmp = VCT_GET(level, ret, WD_LEVEL);
+        if (tmp != 0) return ret;
+    }
+    return ret;
+}
+static ap_uint<3> VP8EncBands_hls(ap_uint<5> n) {
+/*const uint8_t VP8EncBands[16 + 1] = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0  };*/
+#pragma HLS INLINE
+    if (n < 4)
+        return n;
+    else if (n == 4)
+        return 6;
+    else if (n == 5)
+        return 4;
+    else if (n == 6)
+        return 5;
+    else if (n == 15)
+        return 7;
+    else if (n == 16)
+        return 0;
+    else
+        return 6;
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+void kernel_2_RecordTokens_pre(hls::stream<uint16_t>& str_mb_h,
+                               hls::stream<uint16_t>& str_mb_w,
+                               hls::stream<ap_uint<1> >& str_type_mb,
+                               hls::stream<ap_int<WD_LEVEL * 16> >& str_level_dc,
+                               hls::stream<ap_int<WD_LEVEL * 16> >& str_level_ac,
+                               hls::stream<ap_int<WD_LEVEL * 16> >& str_level_uv,
+                               hls::stream<ap_uint<64> >& str_0_dc,
+                               hls::stream<ap_uint<64> >& str_1_dc,
+                               hls::stream<ap_uint<64> >& str_2_dc,
+                               hls::stream<ap_uint<64> >& str_3_dc,
+                               hls::stream<ap_uint<64> >& str_0_ac,
+                               hls::stream<ap_uint<64> >& str_1_ac,
+                               hls::stream<ap_uint<64> >& str_2_ac,
+                               hls::stream<ap_uint<64> >& str_3_ac,
+                               hls::stream<ap_uint<64> >& str_0_uv,
+                               hls::stream<ap_uint<64> >& str_1_uv,
+                               hls::stream<ap_uint<64> >& str_2_uv,
+                               hls::stream<ap_uint<64> >& str_3_uv,
+                               hls::stream<uint16_t>& str_mb_h_out,
+                               hls::stream<uint16_t>& str_mb_w_out,
+                               hls::stream<ap_uint<1> >& str_type_mb_out) {
+    static ap_NoneZero ap_nz;
+    uint16_t mb_h = str_mb_h.read();
+    uint16_t mb_w = str_mb_w.read();
+    str_mb_h_out.write(mb_h);
+    str_mb_w_out.write(mb_w);
+RECORD_TOKENS_Y:
+    for (uint16_t y_mb = 0; y_mb < mb_h; y_mb++)
+#pragma HLS LOOP_TRIPCOUNT min = 16 max = 68
+    X:
+        for (uint16_t x_mb = 0; x_mb < mb_w; x_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 16 max = 120
+            RecordTokens_nrd2_mb_w(&ap_nz, x_mb, y_mb, str_type_mb, str_level_dc, str_level_ac, str_level_uv, str_0_dc,
+                                   str_1_dc, str_2_dc, str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv,
+                                   str_1_uv, str_2_uv, str_3_uv, str_type_mb_out);
+        }
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+/////RecordTokens_nrd2_mb_w////////////////////////////////
+void RecordTokens_nrd2_mb_w(ap_NoneZero* ap_nz,
+                            int x_,
+                            int y_,
+                            hls::stream<ap_uint<1> >& str_type_mb,
+                            hls::stream<ap_int<WD_LEVEL * 16> >& str_level_dc,
+                            hls::stream<ap_int<WD_LEVEL * 16> >& str_level_ac,
+                            hls::stream<ap_int<WD_LEVEL * 16> >& str_level_uv,
+                            hls::stream<ap_uint<64> >& str_0_dc,
+                            hls::stream<ap_uint<64> >& str_1_dc,
+                            hls::stream<ap_uint<64> >& str_2_dc,
+                            hls::stream<ap_uint<64> >& str_3_dc,
+                            hls::stream<ap_uint<64> >& str_0_ac,
+                            hls::stream<ap_uint<64> >& str_1_ac,
+                            hls::stream<ap_uint<64> >& str_2_ac,
+                            hls::stream<ap_uint<64> >& str_3_ac,
+                            hls::stream<ap_uint<64> >& str_0_uv,
+                            hls::stream<ap_uint<64> >& str_1_uv,
+                            hls::stream<ap_uint<64> >& str_2_uv,
+                            hls::stream<ap_uint<64> >& str_3_uv,
+                            hls::stream<ap_uint<1> >& str_type_mb_out) {
+    int x, y, ch;
+    ap_uint<9> ap_top_nz = ap_nz->load_top9(x_, y_);
+    ap_uint<9> ap_left_nz = ap_nz->load_left9(x_);
+    ap_uint<9> top_nz_ = ap_top_nz;   //=ap_top_nz[i];f
+    ap_uint<9> left_nz_ = ap_left_nz; //= ap_left_nz[i];
+
+    ap_uint<1> type_ = str_type_mb.read();
+    str_type_mb_out.write(type_);
+    ap_int<WD_LEVEL* 16> c_hls = str_level_dc.read();
+    if (type_ == 1) { // i16x16
+        const int ctx = top_nz_[8] + left_nz_[8];
+        int last = FindLast(c_hls);
+        top_nz_[8] = left_nz_[8] = last < 0 ? 0 : 1;
+        VP8RecordCoeffTokens_hls_w(ctx, 1, last, c_hls, str_0_dc, str_1_dc, str_2_dc, str_3_dc);
+    }
+
+// luma-AC
+VP8_RECORD_COEFF_TOKENS_W_LUMA:
+    for (y = 0; y < 4; ++y) {
+    LUMA_X:
+        for (x = 0; x < 4; ++x) {
+            const int ctx = top_nz_[x] + left_nz_[y];
+            ap_int<WD_LEVEL* 16> c_hls = str_level_ac.read();
+            int16_t test[16];
+            CPY16(test, c_hls, WD_LEVEL);
+            int last = FindLast(c_hls);
+            int coeff_type = type_ == 1 ? 0 : 3;
+            top_nz_[x] = left_nz_[y] = last < 0 ? 0 : 1;
+            VP8RecordCoeffTokens_hls_w(ctx, coeff_type, last, c_hls, str_0_ac, str_1_ac, str_2_ac, str_3_ac);
+        }
+    }
+
+// U/V
+VP8_RECORD_COEFF_TOKENS_W_UV:
+    for (ch = 0; ch <= 2; ch += 2) {
+    UV_Y:
+        for (y = 0; y < 2; ++y) {
+        UV_X:
+            for (x = 0; x < 2; ++x) {
+                const int ctx = top_nz_[4 + ch + x] + left_nz_[4 + ch + y];
+                ap_int<WD_LEVEL* 16> c_hls = str_level_uv.read();
+                int last = FindLast(c_hls);
+                top_nz_[4 + ch + x] = left_nz_[4 + ch + y] = last < 0 ? 0 : 1;
+                VP8RecordCoeffTokens_hls_w(ctx, 2, last, c_hls, str_0_uv, str_1_uv, str_2_uv, str_3_uv);
+            }
+        }
+    }
+
+    uint32_t nz = 0;
+    nz |= (ap_uint<25>)((top_nz_[0] << 12) | (top_nz_[1] << 13));
+    nz |= (ap_uint<25>)((top_nz_[2] << 14) | (top_nz_[3] << 15));
+    nz |= (ap_uint<25>)((top_nz_[4] << 18) | (top_nz_[5] << 19));
+    nz |= (ap_uint<25>)((top_nz_[6] << 22) | (top_nz_[7] << 23));
+    nz |= (ap_uint<25>)((top_nz_[8] << 24)); // we propagate the _top_ bit, esp. for intra4
+    // left
+    nz |= (ap_uint<25>)((left_nz_[0] << 3) | (left_nz_[1] << 7));
+    nz |= (ap_uint<25>)((left_nz_[2] << 11));
+    nz |= (ap_uint<25>)((left_nz_[4] << 17) | (left_nz_[6] << 21));
+
+    ap_nz->left_nz[8] = left_nz_[8];
+    ap_nz->nz_current = nz; //*it->nz_;
+    ap_nz->store_nz(x_);
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+void VP8RecordCoeffTokens_hls_w(ap_uint<2> ctx,
+                                ap_uint<2> coeff_type,
+                                ap_int<5> last,
+                                ap_int<WD_LEVEL * 16> coeffs,
+                                hls::stream<ap_uint<64> >& str_0,
+                                hls::stream<ap_uint<64> >& str_1,
+                                hls::stream<ap_uint<64> >& str_2,
+                                hls::stream<ap_uint<64> >& str_3) {
+    TokensStr0_hls(ctx, coeff_type, last, str_0);
+    ap_uint<11> base_id_last = TOKEN_ID2((ap_uint<11>)coeff_type, (coeff_type == 0 ? 1 : 0)) +
+                               ctx * 11; // TOKEN_ID0(coeff_type, coeff_type==0, ctx);
+    ap_uint<11> base_id = base_id_last;
+TOKEN_ID2:
+    for (int i = 0; i <= last; i++) {
+#pragma HLS LOOP_TRIPCOUNT min = 0 max = 16
+#pragma HLS PIPELINE
+        if (i == 0 && coeff_type == 0) // first==1)
+            continue;
+        ap_int<WD_LEVEL> c = (ap_int<WD_LEVEL>)VCT_GET(coeffs, i, WD_LEVEL); // coeffs[i];
+        ap_uint<1> sign = c < 0;
+        ap_uint<WD_LEVEL> v;
+        if (c < 0)
+            v = (-c);
+        else
+            v = c;
+        // str_1-----------------------------
+        // sign
+        ap_uint<1> isV_N0 = v != 0;
+        ap_uint<1> isLastBEi = i < last;
+        ap_uint<1> isV_B1 = v > 1;
+        // str_2-------------------------------------------
+        ap_uint<1> isV_B4 = v > 4;
+        ap_uint<1> isV_N2 = v != 2;
+        ap_uint<1> isV_4 = v == 4;
+        ap_uint<1> isV_B10 = v > 10;
+        // str_3-------------------------------------------
+        ap_uint<1> isV_B6 = v > 6;
+        ap_uint<1> isV_6 = v == 6;
+        ap_uint<1> isV_BE9 = v >= 9;
+        ap_uint<1> isV_even = 1 - v & 1; //!(v & 1)
+        //-------------------------------------------------
+
+        ap_uint<11> base_id_next;
+        const uint8_t VP8EncBands[16 + 1] = {0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0};
+        uint8_t VP8EncBands_ = VP8EncBands_hls(i + 1);
+        if (v == 0)
+            base_id_next = TOKEN_ID2((ap_uint<11>)coeff_type, VP8EncBands_); // VP8EncBands[i + 1]);//
+        else if (v == 1)
+            base_id_next = TOKEN_ID2((ap_uint<11>)coeff_type, VP8EncBands_) + 11;
+        else
+            base_id_next = TOKEN_ID2((ap_uint<11>)coeff_type, VP8EncBands_) + 22;
+
+        TokensStr1_hls(isV_N0, isV_B1, sign, isLastBEi, base_id, base_id_next, v, str_1);
+        TokensStr2_hls(isV_B4, isV_N2, isV_4, isV_B10, base_id, str_2);
+        TokensStr3_hls(isV_B6, isV_6, isV_BE9, isV_even, base_id, str_3);
+        base_id = base_id_next;
+    }
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+void PackToken_hls(ap_uint<64>& w, ap_uint<2> be, uint32_t bit, uint32_t proba_idx) {
+    ap_uint<16> tmp = (bit << 15) | proba_idx;
+    w(be * 16 + 15, be * 16) = tmp(15, 0);
+}
+/////PackConstantToken_hls////////////////
+void PackConstantToken_hls(ap_uint<64>& w, ap_uint<2> be, uint32_t bit, uint32_t proba_idx) {
+    ap_uint<16> tmp = (bit << 15) | (1u << 14) | proba_idx;
+    w(be * 16 + 15, be * 16) = tmp(15, 0);
+}
+/////TokensStr0_hls////////////////////////
+void TokensStr0_hls(ap_uint<2> ctx, ap_uint<2> coeff_type, ap_int<5> last, hls::stream<ap_uint<64> >& str_0) {
+#pragma HLS PIPELINE II = 1
+    ap_uint<64> w = 0;
+    ap_uint<1> isLastN = last < 0;
+    ap_uint<11> base_id_last = TOKEN_ID2((ap_uint<11>)coeff_type, (coeff_type == 0 ? 1 : 0)) + ctx * 11;
+    w(16 + 4, 16) = last;
+    w(16 + 8 + 2, 16 + 8) = coeff_type;
+    PackToken_hls(w, 0, isLastN, base_id_last);
+    str_0.write(w);
+}
+/////TokensStr1_hls//////////////////////////////////////
+
+void TokensStr1_hls(ap_uint<1> isV_N0, // = v!=0,
+                    ap_uint<1> isV_B1, // = v>1
+                    ap_uint<1> sign,
+                    ap_uint<1> isLastBEi, // = i<last,
+                    ap_uint<11> base_id,
+                    ap_uint<11> base_id_next,
+                    ap_uint<11> v,
+                    hls::stream<ap_uint<64> >& str_1) {
+#pragma HLS PIPELINE II = 1
+    ap_uint<64> w = 0;
+    PackToken_hls(w, 0, isV_N0, base_id + 1);
+    PackToken_hls(w, 1, isV_B1, base_id + 2);
+    PackConstantToken_hls(w, 2, sign, v);
+    PackToken_hls(w, 3, isLastBEi, base_id_next);
+    str_1.write(w);
+}
+/////TokensStr2_hls/////////////////////////////////////////
+void TokensStr2_hls(ap_uint<1> isV_B4,  // = v>4;
+                    ap_uint<1> isV_N2,  // = v!=2;
+                    ap_uint<1> isV_4,   // = v==4;
+                    ap_uint<1> isV_B10, // = v>10;
+                    ap_uint<11> base_id,
+                    hls::stream<ap_uint<64> >& str_2) {
+#pragma HLS PIPELINE II = 1
+    ap_uint<64> w = 0;
+    PackToken_hls(w, 0, isV_B4, base_id + 3);
+    PackToken_hls(w, 1, isV_N2, base_id + 4);
+    PackToken_hls(w, 2, isV_4, base_id + 5);
+    PackToken_hls(w, 3, isV_B10, base_id + 6);
+    str_2.write(w);
+}
+/////TokensStr3_hls//////////////////////////////////////
+void TokensStr3_hls(ap_uint<1> isV_B6,   // = v>6;
+                    ap_uint<1> isV_6,    // = v==6;
+                    ap_uint<1> isV_BE9,  // = v>=9;
+                    ap_uint<1> isV_even, // = 1-v&1;//!(v & 1)
+                    ap_uint<11> base_id,
+                    hls::stream<ap_uint<64> >& str_3) {
+#pragma HLS PIPELINE II = 1
+    ap_uint<64> w = 0;
+    PackToken_hls(w, 0, isV_B6, base_id + 7);
+    PackToken_hls(w, 1, isV_6, 159);
+    PackToken_hls(w, 2, isV_BE9, 165);
+    PackToken_hls(w, 3, isV_even, 145);
+    str_3.write(w);
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+void kernel_2_CreateTokens_with_isFinal(hls::stream<uint16_t>& str_mb_h,
+                                        hls::stream<uint16_t>& str_mb_w,
+                                        hls::stream<ap_uint<1> >& str_type_mb,
+                                        hls::stream<ap_uint<64> >& str_0_dc,
+                                        hls::stream<ap_uint<64> >& str_1_dc,
+                                        hls::stream<ap_uint<64> >& str_2_dc,
+                                        hls::stream<ap_uint<64> >& str_3_dc,
+                                        hls::stream<ap_uint<64> >& str_0_ac,
+                                        hls::stream<ap_uint<64> >& str_1_ac,
+                                        hls::stream<ap_uint<64> >& str_2_ac,
+                                        hls::stream<ap_uint<64> >& str_3_ac,
+                                        hls::stream<ap_uint<64> >& str_0_uv,
+                                        hls::stream<ap_uint<64> >& str_1_uv,
+                                        hls::stream<ap_uint<64> >& str_2_uv,
+                                        hls::stream<ap_uint<64> >& str_3_uv,
+                                        hls::stream<uint16_t>& str_mb_h_out,
+                                        hls::stream<uint16_t>& str_mb_w_out,
+                                        hls::stream<ap_uint<16> >& str_tokens_final) {
+    uint16_t mb_h = str_mb_h.read();
+    uint16_t mb_w = str_mb_w.read();
+    str_mb_h_out.write(mb_h);
+    str_mb_w_out.write(mb_w);
+RECORD_TOKENS_ADD_FINAL:
+    for (uint16_t y_mb = 0; y_mb < mb_h; y_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 16 max = 68
+        for (uint16_t x_mb = 0; x_mb < mb_w; x_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 16 max = 120
+            RecordTokens_nrd2_mb_r_str_AddFinal(str_type_mb, str_0_dc, str_1_dc, str_2_dc, str_3_dc, str_0_ac, str_1_ac,
+                                                str_2_ac, str_3_ac, str_0_uv, str_1_uv, str_2_uv, str_3_uv,
+                                                str_tokens_final,
+                                                y_mb == (mb_h - 1) && (x_mb == (mb_w - 1))); //&tokens);
+        }
+    }
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+void RecordTokens_nrd2_mb_r_str_AddFinal(hls::stream<ap_uint<1> >& str_type_mb,
+                                         hls::stream<ap_uint<64> >& str_0_dc,
+                                         hls::stream<ap_uint<64> >& str_1_dc,
+                                         hls::stream<ap_uint<64> >& str_2_dc,
+                                         hls::stream<ap_uint<64> >& str_3_dc,
+                                         hls::stream<ap_uint<64> >& str_0_ac,
+                                         hls::stream<ap_uint<64> >& str_1_ac,
+                                         hls::stream<ap_uint<64> >& str_2_ac,
+                                         hls::stream<ap_uint<64> >& str_3_ac,
+                                         hls::stream<ap_uint<64> >& str_0_uv,
+                                         hls::stream<ap_uint<64> >& str_1_uv,
+                                         hls::stream<ap_uint<64> >& str_2_uv,
+                                         hls::stream<ap_uint<64> >& str_3_uv,
+                                         hls::stream<ap_uint<16> >& tokens,
+                                         bool isFinal) {
+    int x, y, ch;
+    ap_uint<1> type_mb = str_type_mb.read();
+    //#pragma HLS DATAFLOW
+    if (type_mb == 1) {
+        VP8RecordCoeffTokens_hls_r_str_AddFanel(str_0_dc, str_1_dc, str_2_dc, str_3_dc, tokens, false);
+    }
+    for (y = 0; y < 4; ++y)
+    ADD_FINAL_4X4:
+        for (x = 0; x < 4; ++x) {
+            VP8RecordCoeffTokens_hls_r_str_AddFanel(str_0_ac, str_1_ac, str_2_ac, str_3_ac, tokens, false);
+        }
+    for (ch = 0; ch <= 2; ch += 2)
+        for (y = 0; y < 2; ++y)
+        ADD_FINAL_2X2X2:
+            for (x = 0; x < 2; ++x) {
+                VP8RecordCoeffTokens_hls_r_str_AddFanel(str_0_uv, str_1_uv, str_2_uv, str_3_uv, tokens,
+                                                        isFinal && (ch == 2) && (y == 1) && (x == 1));
+            }
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+////////VP8RecordCoeffTokens_hls_r_str_AddFanel///////////////////////////////
+#define GET_isLastN_B(w) (w(15 + 0 * 16, 15 + 0 * 16))
+#define GET_isLastN_P(w) (w(10 + 0 * 16, 0 + 0 * 16))
+#define GET_last(w) ((ap_int<5>)w(16 + 4, 16))
+#define GET_coeff_type(w) (w(16 + 8 + 2, 16 + 8))
+#define GET_isV_N0_B(w) (w(15 + 0 * 16, 15 + 0 * 16))
+#define GET_isV_N0_P(w) (w(10 + 0 * 16, 0 + 0 * 16))
+#define GET_isV_B1_B(w) (w(15 + 1 * 16, 15 + 1 * 16))
+#define GET_isV_B1_P(w) (w(10 + 1 * 16, 0 + 1 * 16))
+#define GET_sign_B(w) (w(15 + 2 * 16, 15 + 2 * 16))
+#define GET_sign_v(w) (w(10 + 2 * 16, 0 + 2 * 16))
+#define GET_isLastBEi_B(w) (w(15 + 3 * 16, 15 + 3 * 16))
+#define GET_isLastBEi_P(w) (w(10 + 3 * 16, 0 + 3 * 16))
+#define GET_isV_B4_B(w) (w(15 + 0 * 16, 15 + 0 * 16))
+#define GET_isV_B4_P(w) (w(10 + 0 * 16, 0 + 0 * 16))
+#define GET_isV_N2_B(w) (w(15 + 1 * 16, 15 + 1 * 16))
+#define GET_isV_N2_P(w) (w(10 + 1 * 16, 0 + 1 * 16))
+#define GET_isV_4_B(w) (w(15 + 2 * 16, 15 + 2 * 16))
+#define GET_isV_4_P(w) (w(10 + 2 * 16, 0 + 2 * 16))
+#define GET_isV_B10_B(w) (w(15 + 3 * 16, 15 + 3 * 16))
+#define GET_isV_B10_P(w) (w(10 + 3 * 16, 0 + 3 * 16))
+#define GET_isV_B6_B(w) (w(15 + 0 * 16, 15 + 0 * 16))
+#define GET_isV_B6_P(w) (w(10 + 0 * 16, 0 + 0 * 16))
+#define GET_isV_6_B(w) (w(15 + 1 * 16, 15 + 1 * 16))
+#define GET_isV_6_P(w) (w(10 + 1 * 16, 0 + 1 * 16))
+#define GET_isV_BE9_B(w) (w(15 + 2 * 16, 15 + 2 * 16))
+#define GET_isV_BE9_P(w) (w(10 + 2 * 16, 0 + 2 * 16))
+#define GET_isV_even_B(w) (w(15 + 3 * 16, 15 + 3 * 16))
+#define GET_isV_even_P(w) (w(10 + 3 * 16, 0 + 3 * 16))
+int VP8RecordCoeffTokens_hls_r_str_AddFanel(hls::stream<ap_uint<64> >& str_0,
+                                            hls::stream<ap_uint<64> >& str_1,
+                                            hls::stream<ap_uint<64> >& str_2,
+                                            hls::stream<ap_uint<64> >& str_3,
+                                            hls::stream<ap_uint<16> >& tokens,
+                                            bool isFinal) {
+    ap_uint<64> w0 = str_0.read();
+    ap_uint<1> b_w0 = GET_isLastN_B(w0);
+    ap_uint<11> p_w0 = GET_isLastN_P(w0);
+    ap_uint<5> last_w0 = GET_last(w0);
+    ap_uint<5> type_w0 = GET_coeff_type(w0);
+    ap_uint<11> base_id = p_w0;
+
+    if (!AddToken_hls_AddFanel(tokens, !b_w0, p_w0, isFinal & b_w0)) { // last==-1
+        return 0;
+    }
+
+    for (int i = 0; i <= last_w0; i++) {
+#pragma HLS LOOP_TRIPCOUNT min = 0 max = 16
+#pragma HLS PIPELINE off
+        if (i == 0 && type_w0 == 0) // first==1)
+            continue;
+        ap_uint<64> w1 = str_1.read();
+        ap_uint<64> w2 = str_2.read();
+        ap_uint<64> w3 = str_3.read();
+        ap_uint<1> sign_b = GET_sign_B(w1);            // c < 0;
+        ap_uint<WD_LEVEL - 1> v = GET_sign_v(w1);      // c < 0;w4s(WD_LEVEL-2,0) ;
+        ap_uint<1> isV_N0 = GET_isV_N0_B(w1);          // v!=0;//
+        ap_uint<1> isLastBEi = GET_isLastBEi_B(w1);    //  i<last_w0;//;i<last;//
+        ap_uint<1> isV_B1 = GET_isV_B1_B(w1);          // v>1;//
+        ap_uint<11> isV_N0_p = GET_isV_N0_P(w1);       // v!=0;//
+        ap_uint<11> isLastBEi_p = GET_isLastBEi_P(w1); //  i<last_w0;//;i<last;//
+        ap_uint<11> isV_B1_p = GET_isV_B1_P(w1);       // v>1;//
+        // str_2-------------------------------------------
+        ap_uint<1> isV_B4 = GET_isV_B4_B(w2);      // v>4;
+        ap_uint<1> isV_N2 = GET_isV_N2_B(w2);      // v!=2;
+        ap_uint<1> isV_4 = GET_isV_4_B(w2);        // v==4;
+        ap_uint<1> isV_B10 = GET_isV_B10_B(w2);    // v>10;
+        ap_uint<11> isV_B4_p = GET_isV_B4_P(w2);   // v>4;
+        ap_uint<11> isV_N2_p = GET_isV_N2_P(w2);   // v!=2;
+        ap_uint<11> isV_4_p = GET_isV_4_P(w2);     // v==4;
+        ap_uint<11> isV_B10_p = GET_isV_B10_P(w2); // v>10;
+        // str_3-------------------------------------------
+        ap_uint<1> isV_B6 = GET_isV_B6_B(w3);        // v>6;
+        ap_uint<1> isV_6 = GET_isV_6_B(w3);          // v==6;
+        ap_uint<1> isV_BE9 = GET_isV_BE9_B(w3);      // v>=9;
+        ap_uint<1> isV_even = GET_isV_even_B(w3);    // 1-v&1;//!(v & 1)
+        ap_uint<11> isV_B6_p = GET_isV_B6_P(w3);     // v>6;
+        ap_uint<11> isV_6_p = GET_isV_6_P(w3);       // v==6;
+        ap_uint<11> isV_BE9_p = GET_isV_BE9_P(w3);   // v>=9;
+        ap_uint<11> isV_even_p = GET_isV_even_P(w3); // 1-v&1;//!(v & 1)
+        //-------------------------------------------------
+        ap_uint<1> isV_S19 = v < 19; // residue < (8 << 1)
+        ap_uint<1> isV_S35 = v < 35; // residue < (8 << 2)
+        ap_uint<1> isV_S67 = v < 67; // residue < (8 << 2)
+
+        AddToken_hls_AddFanel(tokens, isV_N0, isV_N0_p, 0);
+        base_id = isV_N0_p - 1;
+        if (v != 0) {
+            if (AddToken_hls_AddFanel(tokens, isV_B1, isV_B1_p, 0)) {       // v=[2,2047]
+                if (!AddToken_hls_AddFanel(tokens, isV_B4, isV_B4_p, 0)) {  // v=[2,4]
+                    if (AddToken_hls_AddFanel(tokens, isV_N2, isV_N2_p, 0)) // v=[3,4]
+                        AddToken_hls_AddFanel(tokens, isV_4, isV_4_p, 0);   // v=[4,4]
+                } else if (!AddToken_hls_AddFanel(tokens, isV_B10, isV_B10_p,
+                                                  0)) { // base_id + 6)) {//v=[5,10]//GET__B, GET__P
+                    if (!AddToken_hls_AddFanel(tokens, isV_B6, isV_B6_p,
+                                               0)) { // base_id + 7)) {//v=[5,6]//GET__B, GET__P
+                        AddConstantToken_hls_AddFanel(tokens, isV_6, 159, 0);    // v=[6]//GET__B, GET__P
+                    } else {                                                     // v=[7,10]
+                        AddConstantToken_hls_AddFanel(tokens, isV_BE9, 165, 0);  // v=[9,10]//GET__B, GET__P
+                        AddConstantToken_hls_AddFanel(tokens, isV_even, 145, 0); // v=[8,10]//GET__B, GET__P
+                    }
+                } else { // v=[11~2047]
+                    const uint8_t* tab;
+                    uint16_t residue = v - 3; //[8~2044]
+                    if (isV_S19) {            //[8 15]        // VP8Cat3  (3b)
+                        AddToken_hls_AddFanel(tokens, 0, base_id + 8, 0);
+                        AddToken_hls_AddFanel(tokens, 0, base_id + 9, 0);
+                        residue -= (8 << 0);
+                        const uint8_t VP8Cat3[] = {173, 148, 140};
+                        tab = VP8Cat3;
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 11 & 4), 173, 0);
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 11 & 2), 148, 0);
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 11 & 1), 140, 0);
+                    } else if (isV_S35) { //[16,31]// VP8Cat4  (4b)
+                        AddToken_hls_AddFanel(tokens, 0, base_id + 8, 0);
+                        AddToken_hls_AddFanel(tokens, 1, base_id + 9, 0);
+                        residue -= (8 << 1);
+                        const uint8_t VP8Cat4[] = {176, 155, 140, 135};
+                        tab = VP8Cat4;
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 19 & 8), 176, 0);
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 19 & 4), 155, 0);
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 19 & 2), 140, 0);
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 19 & 1), 135, 0);
+                    } else if (isV_S67) { // [32,63] VP8Cat5  (5b)
+                        AddToken_hls_AddFanel(tokens, 1, base_id + 8, 0);
+                        AddToken_hls_AddFanel(tokens, 0, base_id + 10, 0);
+                        residue -= (8 << 2);
+                        const uint8_t VP8Cat5[] = {180, 157, 141, 134, 130};
+                        tab = VP8Cat5;
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 35 & 16), 180, 0);
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 35 & 8), 157, 0);
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 35 & 4), 141, 0);
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 35 & 2), 134, 0);
+                        AddConstantToken_hls_AddFanel(tokens, !!(v - 35 & 1), 130, 0);
+                    } else { // [64,2048)VP8Cat6 (11b)
+                        AddToken_hls_AddFanel(tokens, 1, base_id + 8, 0);
+                        AddToken_hls_AddFanel(tokens, 1, base_id + 10, 0);
+                        residue -= (8 << 3);
+                        const uint8_t VP8Cat6[] = {254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
+                        tab = VP8Cat6;
+                    ADD_CONSTANT_TOKEN:
+                        for (int k = 10; k >= 0; k--)
+                            AddConstantToken_hls_AddFanel(tokens, !!(v - 67 & (1 << k)), *tab++, 0);
+                    } //[64,2048)
+                }     // v=[11~2047]
+            }         // v=[2~2047]
+        }             // v!=0
+        if (v == 0) continue;
+        AddConstantToken_hls_AddFanel(tokens, sign_b, 128, isFinal & (15 == i));
+        if (i == 15 || !AddToken_hls_AddFanel(tokens, isLastBEi, isLastBEi_p, isFinal && (!isLastBEi))) {
+            return 1; // EOB
+        }
+    } // for
+    return 1;
+}
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<1> AddConstantToken_hls_AddFanel(hls::stream<ap_uint<16> >& str_tokens,
+                                         ap_uint<1> bit,
+                                         ap_uint<11> proba_idx,
+                                         ap_uint<1> isFinal) {
+#pragma HLS PIPELINE
+    ap_uint<16> tmp;
+    tmp[15] = bit;
+    tmp[14] = 1;
+    tmp[13] = 0;
+    tmp[12] = isFinal;
+    tmp[11] = 0;
+    tmp(10, 0) = proba_idx;
+    str_tokens.write(tmp);
+    return bit;
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<1> AddToken_hls_AddFanel(hls::stream<ap_uint<16> >& str_tokens,
+                                 ap_uint<1> bit,
+                                 ap_uint<11> proba_idx,
+                                 ap_uint<1> isFinal) {
+#pragma HLS PIPELINE
+    ap_uint<16> tmp;
+    tmp[15] = bit;
+    tmp[14] = 0;
+    tmp[13] = 0;
+    tmp[12] = isFinal;
+    tmp[11] = 0;
+    tmp(10, 0) = proba_idx;
+    str_tokens.write(tmp);
+    return bit;
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+//==================================kernel_2_ArithmeticCoding===========================================//
+
+void VP8EmitTokens_str_hls_4stages(uint32_t pout_bw[SIZE32_MEM_BW],
+                                   hls::stream<ap_uint<16> >& str_token,
+                                   uint8_t probas[4 * 8 * 3 * 11]) {
+    hls::stream<ap_uint<18> > str_Last_isBit_Bits;
+#pragma HLS STREAM variable = str_Last_isBit_Bits depth = 64
+    ap_uint<8> bw_range;  // = 254;      // range-1
+    ap_uint<24> bw_value; // = 0;
+    ap_int<4> bw_nb_bits; // = -8;
+    ap_uint<32> bw_pos;   // = 0;
+    ap_uint<16> bw_run;   // = 0;
+
+    VP8EmitTokens_allstr_hls_dataflow_4stages(pout_bw, str_token, probas, bw_range, bw_value, bw_nb_bits, bw_pos,
+                                              bw_run);
+
+    uint32_t* p_bw = pout_bw + SIZE32_MEM_BW - SIZE32_AC_STATE;
+    p_bw[0] = bw_range;
+    p_bw[1] = bw_value;
+    p_bw[2] = bw_nb_bits;
+    p_bw[3] = bw_pos;
+    p_bw[4] = bw_run;
+    p_bw[5] = MAX_NUM_MB_W * MAX_NUM_MB_H * 384 / SYSTEM_MIN_COMP_RATIO - 1; // max_pos
+    p_bw[6] = 0;                                                             // error
+    p_bw[7] = 0;                                                             // index_ac_encoder / num_segment
+}
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+void VP8EmitTokens_allstr_hls_dataflow_4stages(uint32_t pout_bw[SIZE32_MEM_BW],
+                                               hls::stream<ap_uint<16> >& str_token,
+                                               uint8_t probas[4 * 8 * 3 * 11],
+                                               ap_uint<8>& bw_range,  // = 254;      // range-1
+                                               ap_uint<24>& bw_value, //= 0;
+                                               ap_int<4>& bw_nb_bits, // = -8;
+                                               ap_uint<32>& bw_pos,   //= 0
+                                               ap_uint<16>& bw_run)   // = 0,
+{
+#pragma HLS DATAFLOW
+    // range loop (a loop)
+    hls::stream<ap_uint<2 + 3 + 8> > str_fnl_bit_shift_split_1;
+#pragma HLS STREAM variable = str_fnl_bit_shift_split_1 depth = 64
+    bw_range = hls_AC_range_str(str_token, probas, str_fnl_bit_shift_split_1);
+
+    // Value loop (c loop)
+    hls::stream<ap_uint<18> > str_Last_isBit_Bits;
+#pragma HLS STREAM variable = str_Last_isBit_Bits depth = 64
+    ap_uint<4 + 24> nb_value = hls_AC_value_str(str_fnl_bit_shift_split_1, str_Last_isBit_Bits);
+    bw_nb_bits = nb_value(27, 24);
+    bw_value = nb_value(23, 0);
+
+    // Package loop-1
+    hls::stream<ap_uint<26> > str_isFinal_run_cy_pre;
+#pragma HLS STREAM variable = str_isFinal_run_cy_pre depth = 1024
+    ap_uint<16> run = VP8PutBit_hls_BytePackage_str_run(str_Last_isBit_Bits, str_isFinal_run_cy_pre);
+
+    // Package loop-2
+    hls::stream<ap_uint<9> > str_Last_byte;
+#pragma HLS STREAM variable = str_Last_byte depth = 1024
+    ap_uint<32> pos = VP8PutBit_hls_BytePackage_str_pos(str_isFinal_run_cy_pre, str_Last_byte);
+
+    bw_run = run(15, 0);
+    bw_pos = pos(31, 0);
+    PackStr2Mem_t<9, 8, 256>(pout_bw, str_Last_byte);
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<8> hls_AC_range_str(hls::stream<ap_uint<16> >& str_token,
+                            uint8_t probas[4 * 8 * 3 * 11],
+                            hls::stream<ap_uint<2 + 3 + 8> >& str_fnl_bit_shift_split_1) {
+    ap_uint<8> range_old = 254;
+    ap_uint<8> split_1;
+    ap_uint<3> shift;
+    ap_uint<2 + 3 + 8> tmp;
+    ap_uint<1> isFinal = 0;
+AC_RANGE_STR:
+    do {
+#pragma HLS LOOP_TRIPCOUNT min = 1920 * 1088 / 256 * 384 * 2 max = 4096 * 4096 / 256 * 384 * 2
+#pragma HLS PIPELINE II = 1
+        ap_uint<16> token = str_token.read(); //[i];
+        isFinal = token[12];
+        ap_uint<1> bit = token[15];
+        ap_uint<1> isFix = token[14];
+        ap_uint<8> p;
+        if (isFix)
+            p = token(7, 0);
+        else
+            p = probas[token(10, 0)];
+        ap_uint<8> tmp_p = (range_old * p) >> 8;
+        split_1 = tmp_p + 1;
+
+        ap_uint<8> range_new;
+        ap_uint<8> range_nor1 = range_old - tmp_p;
+        ap_uint<8> range_nor2 = tmp_p + 1;
+
+        if (bit) {
+            shift = range_nor1.countLeadingZeros();
+            range_new = (range_nor1 << range_nor1.countLeadingZeros()) - 1;
+        } else {
+            shift = range_nor2.countLeadingZeros();
+            range_new = (range_nor2 << range_nor2.countLeadingZeros()) - 1;
+        }
+        tmp[12] = isFinal;
+        tmp[11] = bit;
+        tmp(10, 8) = shift;
+        tmp(7, 0) = split_1;
+        str_fnl_bit_shift_split_1.write(tmp);
+        range_old = range_new;
+    } while (isFinal == 0);
+    return range_old;
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<4 + 24> hls_AC_value_str(hls::stream<ap_uint<2 + 3 + 8> >& str_fnl_bit_shift_split_1,
+                                 hls::stream<ap_uint<18> >& str_fnl_isBit_Bits) {
+#pragma HLS INLINE OFF
+    ap_uint<24> v_old = 0;
+    ap_int<4> nb_old = -8;
+
+    ap_uint<1> isFinal = 0;
+    ap_uint<1> bit;
+    ap_uint<3> shift;
+    ap_uint<8> split_1;
+
+    ap_uint<16> bits;
+    ap_uint<1> isBits;
+
+AC_VALUE_STR:
+    do {
+#pragma HLS LOOP_TRIPCOUNT min = 1920 * 1088 / 256 * 384 * 2 max = 4096 * 4096 / 256 * 384 * 2
+#pragma HLS PIPELINE II = 1
+        ap_uint<2 + 3 + 8> fnl_bit_shift_split_1 = str_fnl_bit_shift_split_1.read();
+        isFinal = fnl_bit_shift_split_1[12];
+        bit = fnl_bit_shift_split_1[11];
+        shift = fnl_bit_shift_split_1(10, 8);
+        split_1 = fnl_bit_shift_split_1(7, 0);
+        isBits = 0;
+        ap_uint<24> v_new = v_old; //
+        ap_int<4> nb_new = nb_old; //
+        if (bit)
+            // v_old += split_1;
+            v_new += split_1;
+        v_new <<= shift;
+        nb_new += shift;
+        if (nb_new > 0) {
+            isBits = 1;
+            ap_uint<4> s = 8 + nb_new;
+            bits = v_new(23, s);
+            v_new(23, s) = 0; // v_old -= bits << s;
+            nb_new -= 8;
+        }
+        ap_uint<18> Last_isBit_Bits;
+        Last_isBit_Bits(17, 17) = isFinal;
+        Last_isBit_Bits(16, 16) = isBits;
+        Last_isBit_Bits(15, 0) = bits;
+        if (isBits || isFinal) str_fnl_isBit_Bits.write(Last_isBit_Bits);
+        v_old = v_new;
+        nb_old = nb_new;
+    } while (isFinal == 0);
+    ap_uint<4 + 24> ret;
+    ret(27, 24) = nb_old;
+    ret(23, 0) = v_old;
+    return ret;
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<16> VP8PutBit_hls_BytePackage_str_run(hls::stream<ap_uint<18> >& str_Last_isBit_Bits,
+                                              hls::stream<ap_uint<26> >& str_isFinal_run_cy_pre) {
+    // hls::stream<ap_uint<9+16> > str_isFinal_run_cy_pre;
+    ap_uint<26> isFinal_run_cy_pre; // 1+16+1+8
+    ap_uint<16> p_run_ = 0;
+    ap_uint<8> byte_pre = 0xff; // 0xff is the initial value that means byte_pre is never used
+    ap_uint<1> isLast;
+BYTE_PACKAGE:
+    do { /*This loop iterates p_run_ and byte_pre*/
+#pragma HLS PIPELINE
+        ap_uint<18> Last_isBit_Bits = str_Last_isBit_Bits.read();
+        isLast = Last_isBit_Bits(17, 17);
+        ap_uint<1> isBits = Last_isBit_Bits(16, 16);
+        ap_uint<16> bits = Last_isBit_Bits(15, 0);
+
+        if (isBits) {
+            if (byte_pre == 0xff) {
+                byte_pre(7, 0) = bits(7, 0);
+            } else if ((bits & 0xff) != 0xff) {
+                isFinal_run_cy_pre(7, 0) = byte_pre(7, 0);
+                isFinal_run_cy_pre(8, 8) = bits(8, 8);
+                isFinal_run_cy_pre(16 + 8, 9) = p_run_;
+                isFinal_run_cy_pre[25] = 0;
+                p_run_ = 0;
+                byte_pre(7, 0) = bits(7, 0);
+                str_isFinal_run_cy_pre.write(isFinal_run_cy_pre);
+            } else {
+                p_run_++;
+            }
+        }
+    } while (isLast == 0);
+
+    if (isLast && byte_pre != 0xff) {
+        isFinal_run_cy_pre(7, 0) = byte_pre(7, 0);
+        isFinal_run_cy_pre(8, 8) = 0;      // cy
+        isFinal_run_cy_pre(16 + 8, 9) = 0; // run
+        isFinal_run_cy_pre[25] = 1;        // Final
+        str_isFinal_run_cy_pre.write(isFinal_run_cy_pre);
+    }
+
+    return p_run_;
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<32> VP8PutBit_hls_BytePackage_str_pos(hls::stream<ap_uint<26> >& str_isFinal_run_cy_pre,
+                                              hls::stream<ap_uint<9> >& str_Last_byte) {
+    ap_uint<32> p_pos_ = 0;
+    ap_uint<1> isLast;
+DO_BYTE_PACKAGE_POS:
+    do {
+        ap_uint<1 + 16 + 9> isFinal_run_cy_pre = str_isFinal_run_cy_pre.read();
+        isLast = isFinal_run_cy_pre[25];
+        ap_uint<16> run = isFinal_run_cy_pre(24, 9);
+        ap_uint<1> cy = isFinal_run_cy_pre[8];
+        ap_uint<9> byte = isFinal_run_cy_pre(7, 0) + cy;
+        byte[8] = isLast;
+        str_Last_byte.write(byte);
+        p_pos_++;
+        ap_uint<9> stuff;
+        if (cy)
+            stuff = 0;
+        else
+            stuff = 0x0ff;
+    BYTE_PACKAGE_POS:
+        for (int i = 0; i < run; i++) {
+#pragma HLS PIPELINE
+            str_Last_byte.write(stuff);
+            p_pos_++;
+        }
+    } while (isLast == 0);
+
+    return p_pos_;
+}
+//==================================kernel_2_ArithmeticCoding===========================================//
+/*
+ * //Other used for host convenience
+ */
+void set_vect_to(ap_uint<8 * 16> src, unsigned char* des, int strip) {
+    ap_uint<8 * 16> sb;
+SET_VECT_FUNC:
+    for (int i = 0; i < 4; i++)
+    SET_VECT_FUNC_IN:
+        for (int j = 0; j < 4; j++) {
+            des[j + strip * i] = SB_GET(src, i, j, 8);
+        }
+}
+//////////////////////////////////////////////////////////////////////////////
+extern "C" {
+void kernel_2_ArithmeticCoding_1_5axi(uint32_t pin_level[SIZE32_MEM_BW],
+                                      uint8_t* pin_prob, // 2048 instead of [4 * 8 * 3 * 11],
+                                      uint32_t pout_bw[SIZE32_MEM_BW],
+                                      uint32_t pout_ret[SIZE32_MEM_RET],
+                                      uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem3 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem4 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+    uint8_t prob[4 * 8 * 3 * 11];
+    memcpy(prob, pin_prob, sizeof(prob));
+
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc;
+#pragma HLS STREAM variable = str_level_dc depth = 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_ac;
+#pragma HLS STREAM variable = str_level_ac depth = 8 * 16
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv;
+#pragma HLS STREAM variable = str_level_uv depth = 8 * 8
+    hls::stream<ap_uint<64> > str_pred;
+#pragma HLS STREAM variable = str_pred depth = 64
+    hls::stream<ap_uint<6> > str_ret;
+#pragma HLS STREAM variable = str_ret depth = 64
+    hls::stream<ap_uint<1> > str_type_mb;
+#pragma HLS STREAM variable = str_type_mb depth = 64
+    hls::stream<uint16_t> str_mb_h;
+#pragma HLS STREAM variable = str_mb_h depth = 64
+    hls::stream<uint16_t> str_mb_w;
+#pragma HLS STREAM variable = str_mb_w depth = 64
+    Kernel2_top_read(pin_level,
+
+                     str_level_dc, str_level_ac, str_level_uv, str_pred, str_ret, str_type_mb, str_mb_h, str_mb_w);
+    hls::stream<ap_uint<64> > str_0_dc;
+#pragma HLS STREAM variable = str_0_dc depth = 64
+    hls::stream<ap_uint<64> > str_1_dc;
+#pragma HLS STREAM variable = str_1_dc depth = 64
+    hls::stream<ap_uint<64> > str_2_dc;
+#pragma HLS STREAM variable = str_2_dc depth = 64
+    hls::stream<ap_uint<64> > str_3_dc;
+#pragma HLS STREAM variable = str_3_dc depth = 64
+    hls::stream<ap_uint<64> > str_0_ac;
+#pragma HLS STREAM variable = str_0_ac depth = 64
+    hls::stream<ap_uint<64> > str_1_ac;
+#pragma HLS STREAM variable = str_1_ac depth = 64
+    hls::stream<ap_uint<64> > str_2_ac;
+#pragma HLS STREAM variable = str_2_ac depth = 64
+    hls::stream<ap_uint<64> > str_3_ac;
+#pragma HLS STREAM variable = str_3_ac depth = 64
+    hls::stream<ap_uint<64> > str_0_uv;
+#pragma HLS STREAM variable = str_0_uv depth = 64
+    hls::stream<ap_uint<64> > str_1_uv;
+#pragma HLS STREAM variable = str_1_uv depth = 64
+    hls::stream<ap_uint<64> > str_2_uv;
+#pragma HLS STREAM variable = str_2_uv depth = 64
+    hls::stream<ap_uint<64> > str_3_uv;
+#pragma HLS STREAM variable = str_3_uv depth = 64
+    hls::stream<ap_uint<1> > str_type_mb_out;
+#pragma HLS STREAM variable = str_type_mb_out depth = 64
+
+    hls::stream<uint16_t> str_mb_h_out;
+#pragma HLS STREAM variable = str_mb_h_out depth = 64
+    hls::stream<uint16_t> str_mb_w_out;
+#pragma HLS STREAM variable = str_mb_w_out depth = 64
+    kernel_2_RecordTokens_pre(str_mb_h, str_mb_w, str_type_mb, str_level_dc, str_level_ac, str_level_uv, str_0_dc,
+                              str_1_dc, str_2_dc, str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv,
+                              str_2_uv, str_3_uv, str_mb_h_out, str_mb_w_out, str_type_mb_out);
+
+    hls::stream<uint16_t> str_mb_h_out2;
+#pragma HLS STREAM variable = str_mb_h_out2 depth = 64
+    hls::stream<uint16_t> str_mb_w_out2;
+#pragma HLS STREAM variable = str_mb_w_out2 depth = 64
+    hls::stream<ap_uint<16> > tokens_str_final;
+#pragma HLS STREAM variable = tokens_str_final depth = 1024
+    kernel_2_CreateTokens_with_isFinal(str_mb_h_out, str_mb_w_out, str_type_mb_out, str_0_dc, str_1_dc, str_2_dc,
+                                       str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv, str_2_uv,
+                                       str_3_uv, str_mb_h_out2, str_mb_w_out2, tokens_str_final);
+
+    uint16_t mb_h = str_mb_h_out2.read();
+    uint16_t mb_w = str_mb_w_out2.read();
+    VP8EmitTokens_str_hls_4stages(pout_bw, tokens_str_final,
+                                  (uint8_t*)prob); // VP8EmitTokens_hls(pout_bw, &tokens, (uint8_t*)prob);
+    PackStr2Mem32_t_NoLast<6, 256>(pout_ret, str_ret, mb_h * mb_w);
+    PackWideStr2Mem32_t_NoLast<64, 256>(pout_pred, str_pred, mb_h * mb_w);
+}
+}
+
+extern "C" {
+void kernel_2_ArithmeticCoding_2_5axi(uint32_t pin_level[SIZE32_MEM_BW],
+                                      uint8_t* pin_prob, // 2048 instead of [4 * 8 * 3 * 11],
+                                      uint32_t pout_bw[SIZE32_MEM_BW],
+                                      uint32_t pout_ret[SIZE32_MEM_RET],
+                                      uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem3 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem4 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+    uint8_t prob[4 * 8 * 3 * 11];
+    memcpy(prob, pin_prob, sizeof(prob));
+
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc;
+#pragma HLS STREAM variable = str_level_dc depth = 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_ac;
+#pragma HLS STREAM variable = str_level_ac depth = 8 * 16
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv;
+#pragma HLS STREAM variable = str_level_uv depth = 8 * 8
+    hls::stream<ap_uint<64> > str_pred;
+#pragma HLS STREAM variable = str_pred depth = 64
+    hls::stream<ap_uint<6> > str_ret;
+#pragma HLS STREAM variable = str_ret depth = 64
+    hls::stream<ap_uint<1> > str_type_mb;
+#pragma HLS STREAM variable = str_type_mb depth = 64
+    hls::stream<uint16_t> str_mb_h;
+#pragma HLS STREAM variable = str_mb_h depth = 64
+    hls::stream<uint16_t> str_mb_w;
+#pragma HLS STREAM variable = str_mb_w depth = 64
+    Kernel2_top_read(pin_level,
+
+                     str_level_dc, str_level_ac, str_level_uv, str_pred, str_ret, str_type_mb, str_mb_h, str_mb_w);
+    hls::stream<ap_uint<64> > str_0_dc;
+#pragma HLS STREAM variable = str_0_dc depth = 64
+    hls::stream<ap_uint<64> > str_1_dc;
+#pragma HLS STREAM variable = str_1_dc depth = 64
+    hls::stream<ap_uint<64> > str_2_dc;
+#pragma HLS STREAM variable = str_2_dc depth = 64
+    hls::stream<ap_uint<64> > str_3_dc;
+#pragma HLS STREAM variable = str_3_dc depth = 64
+    hls::stream<ap_uint<64> > str_0_ac;
+#pragma HLS STREAM variable = str_0_ac depth = 64
+    hls::stream<ap_uint<64> > str_1_ac;
+#pragma HLS STREAM variable = str_1_ac depth = 64
+    hls::stream<ap_uint<64> > str_2_ac;
+#pragma HLS STREAM variable = str_2_ac depth = 64
+    hls::stream<ap_uint<64> > str_3_ac;
+#pragma HLS STREAM variable = str_3_ac depth = 64
+    hls::stream<ap_uint<64> > str_0_uv;
+#pragma HLS STREAM variable = str_0_uv depth = 64
+    hls::stream<ap_uint<64> > str_1_uv;
+#pragma HLS STREAM variable = str_1_uv depth = 64
+    hls::stream<ap_uint<64> > str_2_uv;
+#pragma HLS STREAM variable = str_2_uv depth = 64
+    hls::stream<ap_uint<64> > str_3_uv;
+#pragma HLS STREAM variable = str_3_uv depth = 64
+    hls::stream<ap_uint<1> > str_type_mb_out;
+#pragma HLS STREAM variable = str_type_mb_out depth = 64
+
+    hls::stream<uint16_t> str_mb_h_out;
+#pragma HLS STREAM variable = str_mb_h_out depth = 64
+    hls::stream<uint16_t> str_mb_w_out;
+#pragma HLS STREAM variable = str_mb_w_out depth = 64
+    kernel_2_RecordTokens_pre(str_mb_h, str_mb_w, str_type_mb, str_level_dc, str_level_ac, str_level_uv, str_0_dc,
+                              str_1_dc, str_2_dc, str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv,
+                              str_2_uv, str_3_uv, str_mb_h_out, str_mb_w_out, str_type_mb_out);
+
+    hls::stream<uint16_t> str_mb_h_out2;
+#pragma HLS STREAM variable = str_mb_h_out2 depth = 64
+    hls::stream<uint16_t> str_mb_w_out2;
+#pragma HLS STREAM variable = str_mb_w_out2 depth = 64
+    hls::stream<ap_uint<16> > tokens_str_final;
+#pragma HLS STREAM variable = tokens_str_final depth = 1024
+    kernel_2_CreateTokens_with_isFinal(str_mb_h_out, str_mb_w_out, str_type_mb_out, str_0_dc, str_1_dc, str_2_dc,
+                                       str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv, str_2_uv,
+                                       str_3_uv, str_mb_h_out2, str_mb_w_out2, tokens_str_final);
+
+    uint16_t mb_h = str_mb_h_out2.read();
+    uint16_t mb_w = str_mb_w_out2.read();
+    VP8EmitTokens_str_hls_4stages(pout_bw, tokens_str_final,
+                                  (uint8_t*)prob); // VP8EmitTokens_hls(pout_bw, &tokens, (uint8_t*)prob);
+    PackStr2Mem32_t_NoLast<6, 256>(pout_ret, str_ret, mb_h * mb_w);
+    PackWideStr2Mem32_t_NoLast<64, 256>(pout_pred, str_pred, mb_h * mb_w);
+}
+}
+
+extern "C" {
+void kernel_2_ArithmeticCoding_3_5axi(uint32_t pin_level[SIZE32_MEM_BW],
+                                      uint8_t* pin_prob, // 2048 instead of [4 * 8 * 3 * 11],
+                                      uint32_t pout_bw[SIZE32_MEM_BW],
+                                      uint32_t pout_ret[SIZE32_MEM_RET],
+                                      uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem3 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem4 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+    uint8_t prob[4 * 8 * 3 * 11];
+    memcpy(prob, pin_prob, sizeof(prob));
+
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc;
+#pragma HLS STREAM variable = str_level_dc depth = 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_ac;
+#pragma HLS STREAM variable = str_level_ac depth = 8 * 16
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv;
+#pragma HLS STREAM variable = str_level_uv depth = 8 * 8
+    hls::stream<ap_uint<64> > str_pred;
+#pragma HLS STREAM variable = str_pred depth = 64
+    hls::stream<ap_uint<6> > str_ret;
+#pragma HLS STREAM variable = str_ret depth = 64
+    hls::stream<ap_uint<1> > str_type_mb;
+#pragma HLS STREAM variable = str_type_mb depth = 64
+    hls::stream<uint16_t> str_mb_h;
+#pragma HLS STREAM variable = str_mb_h depth = 64
+    hls::stream<uint16_t> str_mb_w;
+#pragma HLS STREAM variable = str_mb_w depth = 64
+    Kernel2_top_read(pin_level,
+
+                     str_level_dc, str_level_ac, str_level_uv, str_pred, str_ret, str_type_mb, str_mb_h, str_mb_w);
+    hls::stream<ap_uint<64> > str_0_dc;
+#pragma HLS STREAM variable = str_0_dc depth = 64
+    hls::stream<ap_uint<64> > str_1_dc;
+#pragma HLS STREAM variable = str_1_dc depth = 64
+    hls::stream<ap_uint<64> > str_2_dc;
+#pragma HLS STREAM variable = str_2_dc depth = 64
+    hls::stream<ap_uint<64> > str_3_dc;
+#pragma HLS STREAM variable = str_3_dc depth = 64
+    hls::stream<ap_uint<64> > str_0_ac;
+#pragma HLS STREAM variable = str_0_ac depth = 64
+    hls::stream<ap_uint<64> > str_1_ac;
+#pragma HLS STREAM variable = str_1_ac depth = 64
+    hls::stream<ap_uint<64> > str_2_ac;
+#pragma HLS STREAM variable = str_2_ac depth = 64
+    hls::stream<ap_uint<64> > str_3_ac;
+#pragma HLS STREAM variable = str_3_ac depth = 64
+    hls::stream<ap_uint<64> > str_0_uv;
+#pragma HLS STREAM variable = str_0_uv depth = 64
+    hls::stream<ap_uint<64> > str_1_uv;
+#pragma HLS STREAM variable = str_1_uv depth = 64
+    hls::stream<ap_uint<64> > str_2_uv;
+#pragma HLS STREAM variable = str_2_uv depth = 64
+    hls::stream<ap_uint<64> > str_3_uv;
+#pragma HLS STREAM variable = str_3_uv depth = 64
+    hls::stream<ap_uint<1> > str_type_mb_out;
+#pragma HLS STREAM variable = str_type_mb_out depth = 64
+
+    hls::stream<uint16_t> str_mb_h_out;
+#pragma HLS STREAM variable = str_mb_h_out depth = 64
+    hls::stream<uint16_t> str_mb_w_out;
+#pragma HLS STREAM variable = str_mb_w_out depth = 64
+    kernel_2_RecordTokens_pre(str_mb_h, str_mb_w, str_type_mb, str_level_dc, str_level_ac, str_level_uv, str_0_dc,
+                              str_1_dc, str_2_dc, str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv,
+                              str_2_uv, str_3_uv, str_mb_h_out, str_mb_w_out, str_type_mb_out);
+
+    hls::stream<uint16_t> str_mb_h_out2;
+#pragma HLS STREAM variable = str_mb_h_out2 depth = 64
+    hls::stream<uint16_t> str_mb_w_out2;
+#pragma HLS STREAM variable = str_mb_w_out2 depth = 64
+    hls::stream<ap_uint<16> > tokens_str_final;
+#pragma HLS STREAM variable = tokens_str_final depth = 1024
+    kernel_2_CreateTokens_with_isFinal(str_mb_h_out, str_mb_w_out, str_type_mb_out, str_0_dc, str_1_dc, str_2_dc,
+                                       str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv, str_2_uv,
+                                       str_3_uv, str_mb_h_out2, str_mb_w_out2, tokens_str_final);
+
+    uint16_t mb_h = str_mb_h_out2.read();
+    uint16_t mb_w = str_mb_w_out2.read();
+    VP8EmitTokens_str_hls_4stages(pout_bw, tokens_str_final,
+                                  (uint8_t*)prob); // VP8EmitTokens_hls(pout_bw, &tokens, (uint8_t*)prob);
+    PackStr2Mem32_t_NoLast<6, 256>(pout_ret, str_ret, mb_h * mb_w);
+    PackWideStr2Mem32_t_NoLast<64, 256>(pout_pred, str_pred, mb_h * mb_w);
+}
+}
+
+extern "C" {
+void kernel_2_ArithmeticCoding_4_5axi(uint32_t pin_level[SIZE32_MEM_BW],
+                                      uint8_t* pin_prob, // 2048 instead of [4 * 8 * 3 * 11],
+                                      uint32_t pout_bw[SIZE32_MEM_BW],
+                                      uint32_t pout_ret[SIZE32_MEM_RET],
+                                      uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem3 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem4 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+    uint8_t prob[4 * 8 * 3 * 11];
+    memcpy(prob, pin_prob, sizeof(prob));
+
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc;
+#pragma HLS STREAM variable = str_level_dc depth = 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_ac;
+#pragma HLS STREAM variable = str_level_ac depth = 8 * 16
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv;
+#pragma HLS STREAM variable = str_level_uv depth = 8 * 8
+    hls::stream<ap_uint<64> > str_pred;
+#pragma HLS STREAM variable = str_pred depth = 64
+    hls::stream<ap_uint<6> > str_ret;
+#pragma HLS STREAM variable = str_ret depth = 64
+    hls::stream<ap_uint<1> > str_type_mb;
+#pragma HLS STREAM variable = str_type_mb depth = 64
+    hls::stream<uint16_t> str_mb_h;
+#pragma HLS STREAM variable = str_mb_h depth = 64
+    hls::stream<uint16_t> str_mb_w;
+#pragma HLS STREAM variable = str_mb_w depth = 64
+    Kernel2_top_read(pin_level,
+
+                     str_level_dc, str_level_ac, str_level_uv, str_pred, str_ret, str_type_mb, str_mb_h, str_mb_w);
+    hls::stream<ap_uint<64> > str_0_dc;
+#pragma HLS STREAM variable = str_0_dc depth = 64
+    hls::stream<ap_uint<64> > str_1_dc;
+#pragma HLS STREAM variable = str_1_dc depth = 64
+    hls::stream<ap_uint<64> > str_2_dc;
+#pragma HLS STREAM variable = str_2_dc depth = 64
+    hls::stream<ap_uint<64> > str_3_dc;
+#pragma HLS STREAM variable = str_3_dc depth = 64
+    hls::stream<ap_uint<64> > str_0_ac;
+#pragma HLS STREAM variable = str_0_ac depth = 64
+    hls::stream<ap_uint<64> > str_1_ac;
+#pragma HLS STREAM variable = str_1_ac depth = 64
+    hls::stream<ap_uint<64> > str_2_ac;
+#pragma HLS STREAM variable = str_2_ac depth = 64
+    hls::stream<ap_uint<64> > str_3_ac;
+#pragma HLS STREAM variable = str_3_ac depth = 64
+    hls::stream<ap_uint<64> > str_0_uv;
+#pragma HLS STREAM variable = str_0_uv depth = 64
+    hls::stream<ap_uint<64> > str_1_uv;
+#pragma HLS STREAM variable = str_1_uv depth = 64
+    hls::stream<ap_uint<64> > str_2_uv;
+#pragma HLS STREAM variable = str_2_uv depth = 64
+    hls::stream<ap_uint<64> > str_3_uv;
+#pragma HLS STREAM variable = str_3_uv depth = 64
+    hls::stream<ap_uint<1> > str_type_mb_out;
+#pragma HLS STREAM variable = str_type_mb_out depth = 64
+
+    hls::stream<uint16_t> str_mb_h_out;
+#pragma HLS STREAM variable = str_mb_h_out depth = 64
+    hls::stream<uint16_t> str_mb_w_out;
+#pragma HLS STREAM variable = str_mb_w_out depth = 64
+    kernel_2_RecordTokens_pre(str_mb_h, str_mb_w, str_type_mb, str_level_dc, str_level_ac, str_level_uv, str_0_dc,
+                              str_1_dc, str_2_dc, str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv,
+                              str_2_uv, str_3_uv, str_mb_h_out, str_mb_w_out, str_type_mb_out);
+
+    hls::stream<uint16_t> str_mb_h_out2;
+#pragma HLS STREAM variable = str_mb_h_out2 depth = 64
+    hls::stream<uint16_t> str_mb_w_out2;
+#pragma HLS STREAM variable = str_mb_w_out2 depth = 64
+    hls::stream<ap_uint<16> > tokens_str_final;
+#pragma HLS STREAM variable = tokens_str_final depth = 1024
+    kernel_2_CreateTokens_with_isFinal(str_mb_h_out, str_mb_w_out, str_type_mb_out, str_0_dc, str_1_dc, str_2_dc,
+                                       str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv, str_2_uv,
+                                       str_3_uv, str_mb_h_out2, str_mb_w_out2, tokens_str_final);
+
+    uint16_t mb_h = str_mb_h_out2.read();
+    uint16_t mb_w = str_mb_w_out2.read();
+    VP8EmitTokens_str_hls_4stages(pout_bw, tokens_str_final,
+                                  (uint8_t*)prob); // VP8EmitTokens_hls(pout_bw, &tokens, (uint8_t*)prob);
+    PackStr2Mem32_t_NoLast<6, 256>(pout_ret, str_ret, mb_h * mb_w);
+    PackWideStr2Mem32_t_NoLast<64, 256>(pout_pred, str_pred, mb_h * mb_w);
+}
+}
+
+/****************************************************/
+// changes
+// The old versions will be added with a sufix '_old'
+// to disable them
+/****************************************************/
+ap_uint<8> hls_AC_range_str_32bits(hls::stream<ap_uint<16> >& str_token,
+                                   // uint8_t probas[4 * 8 * 3 * 11],
+                                   uint32_t* probas, //[1 * 8 * 3 * 11],
+                                   hls::stream<ap_uint<2 + 3 + 8> >& str_fnl_bit_shift_split_1) {
+    ap_uint<8> range_old = 254;
+    ap_uint<8> split_1;
+    ap_uint<3> shift;
+    ap_uint<2 + 3 + 8> tmp;
+    ap_uint<1> isFinal = 0;
+AC_RANGE_STR32:
+    do {
+#pragma HLS LOOP_TRIPCOUNT min = 512 * 512 / 256 * 384 * 2 max = 4096 * 4096 / 256 * 384 * 2
+#pragma HLS PIPELINE II = 1
+        ap_uint<16> token = str_token.read();
+        isFinal = token[12];
+        ap_uint<1> bit = token[15];
+        ap_uint<1> isFix = token[14];
+        ap_uint<8> p;
+        if (isFix)
+            p = token(7, 0);
+        else {
+            // p = probas[token(10, 0)];
+            ap_uint<32> tmp_p = probas[token(10, 2)];
+            ap_uint<2> be = token(1, 0);
+            p = tmp_p(be * 8 + 7, be * 8);
+        }
+        ap_uint<8> tmp_p = (range_old * p) >> 8;
+        split_1 = tmp_p + 1;
+
+        ap_uint<8> range_new;
+        ap_uint<8> range_nor1 = range_old - tmp_p;
+        ap_uint<8> range_nor2 = tmp_p + 1;
+
+        if (bit) {
+            shift = range_nor1.countLeadingZeros();
+            range_new = (range_nor1 << range_nor1.countLeadingZeros()) - 1;
+        } else {
+            shift = range_nor2.countLeadingZeros();
+            range_new = (range_nor2 << range_nor2.countLeadingZeros()) - 1;
+        }
+        tmp[12] = isFinal;
+        tmp[11] = bit;
+        tmp(10, 8) = shift;
+        tmp(7, 0) = split_1;
+        str_fnl_bit_shift_split_1.write(tmp);
+        range_old = range_new;
+    } while (isFinal == 0);
+    return range_old;
+}
+void VP8EmitTokens_allstr_hls_dataflow_4stages_32bits(uint32_t* pout_bw, //[SIZE32_MEM_BW],
+                                                      hls::stream<ap_uint<16> >& str_token,
+                                                      uint32_t* probas, //[1 * 8 * 3 * 11],
+                                                      ap_uint<8>& bw_range,
+                                                      ap_uint<24>& bw_value,
+                                                      ap_int<4>& bw_nb_bits,
+                                                      ap_uint<32>& bw_pos,
+                                                      ap_uint<16>& bw_run) {
+#pragma HLS DATAFLOW
+    // range loop (a loop)
+    hls::stream<ap_uint<2 + 3 + 8> > str_fnl_bit_shift_split_1;
+#pragma HLS STREAM variable = str_fnl_bit_shift_split_1 depth = 64
+    bw_range = hls_AC_range_str_32bits(str_token, probas, str_fnl_bit_shift_split_1);
+
+    // Value loop (c loop)
+    hls::stream<ap_uint<18> > str_Last_isBit_Bits;
+#pragma HLS STREAM variable = str_Last_isBit_Bits depth = 64
+    ap_uint<4 + 24> nb_value = hls_AC_value_str(str_fnl_bit_shift_split_1, str_Last_isBit_Bits);
+    bw_nb_bits = nb_value(27, 24);
+    bw_value = nb_value(23, 0);
+
+    // Package loop-1
+    hls::stream<ap_uint<26> > str_isFinal_run_cy_pre;
+#pragma HLS STREAM variable = str_isFinal_run_cy_pre depth = 1024
+    ap_uint<16> run = VP8PutBit_hls_BytePackage_str_run(str_Last_isBit_Bits, str_isFinal_run_cy_pre);
+
+    // Package loop-2
+    hls::stream<ap_uint<9> > str_Last_byte;
+#pragma HLS STREAM variable = str_Last_byte depth = 1024
+    ap_uint<32> pos = VP8PutBit_hls_BytePackage_str_pos(str_isFinal_run_cy_pre, str_Last_byte);
+
+    bw_run = run(15, 0);
+    bw_pos = pos(31, 0);
+PACK_STR2:
+    PackStr2Mem_t<9, 8, 256>(pout_bw, str_Last_byte);
+}
+void VP8EmitTokens_str_hls_4stages_32bits(uint32_t* pout_bw, //[SIZE32_MEM_BW],
+                                          hls::stream<ap_uint<16> >& str_token,
+                                          uint32_t* probas) {
+    hls::stream<ap_uint<18> > str_Last_isBit_Bits;
+#pragma HLS STREAM variable = str_Last_isBit_Bits depth = 64
+    ap_uint<8> bw_range;  // = 254;
+    ap_uint<24> bw_value; // = 0;
+    ap_int<4> bw_nb_bits; // = -8;
+    ap_uint<32> bw_pos;   // = 0;
+    ap_uint<16> bw_run;   // = 0;
+
+    VP8EmitTokens_allstr_hls_dataflow_4stages_32bits(pout_bw, str_token, probas, bw_range, bw_value, bw_nb_bits, bw_pos,
+                                                     bw_run);
+
+    uint32_t* p_bw = probas + 2048 / 4 - SIZE32_AC_STATE;
+    p_bw[0] = bw_range;
+    p_bw[1] = bw_value;
+    p_bw[2] = bw_nb_bits;
+    p_bw[3] = bw_pos;
+    p_bw[4] = bw_run;
+    p_bw[5] = (1024 + bw_pos) << 2; // MAX_NUM_MB_W * MAX_NUM_MB_H * 384 / SYSTEM_MIN_COMP_RATIO - 1;
+    p_bw[6] = 0;
+    p_bw[7] = 0;
+}
+void Pack_ret6_pred64_to32bits(uint32_t* pdes_ret,
+                               uint32_t* pdes_pred,
+                               hls::stream<ap_uint<6> >& str_ret,
+                               hls::stream<ap_uint<64> >& str_pred,
+                               int num_str) {
+    const int BURST_32 = 256;        // Just use 1 2KB-BLOCK RAM
+    const int B_RET = BURST_32 * 4;  // 2048 Bytes
+    const int B_PRED = BURST_32 / 2; // 256 DWords
+
+    uint32_t buff_ret[BURST_32];
+    uint32_t buff_pred[BURST_32];
+    ap_uint<8> tmp_ret_r = 0;
+    ap_uint<32> tmp_ret32;
+    int cnt_ret = 0;          // 0~BURST_32-1
+    int cnt_pred = 0;         // 0~BURST_32-1
+    int offset_ret_buff = 0;  // 0,1,2,3
+    int offset_pred_read = 0; // 0,1
+PRED64_TO32:
+    for (int i_read = 0; i_read < num_str; i_read++) {
+#pragma HLS PIPELINE off
+        ap_uint<1> isFnl;
+        if (i_read == num_str - 1)
+            isFnl = 1;
+        else
+            isFnl = 0;
+        tmp_ret_r(5, 0) = str_ret.read();
+        ap_uint<64> pred_r = str_pred.read();
+        int off_ret = i_read & 3;
+        tmp_ret32(off_ret * 8 + 7, off_ret * 8) = tmp_ret_r(7, 0);
+        if ((i_read & 3) == 3 || isFnl) buff_ret[cnt_ret++] = tmp_ret32;
+        buff_pred[cnt_pred++] = pred_r(31, 0);
+        buff_pred[cnt_pred++] = pred_r(63, 32);
+
+        if (cnt_ret == BURST_32 || isFnl) {
+            memcpy(pdes_ret, buff_ret, cnt_ret * 4);
+            pdes_ret += cnt_ret;
+            cnt_ret = 0;
+        }
+        if (cnt_pred == BURST_32 || isFnl) {
+            memcpy(pdes_pred, buff_pred, cnt_pred * 4);
+            pdes_pred += cnt_pred;
+            cnt_pred = 0;
+        }
+    } // for i_read;
+}
+
+void kernel_2_Top_dataflow(uint32_t* pin_level, //[SIZE32_MEM_BW],
+                           uint32_t* prob,
+                           uint32_t* pout_bw,   //[SIZE32_MEM_BW],
+                           uint32_t* pout_ret,  //[SIZE32_MEM_RET],
+                           uint32_t* pout_pred) //[SIZE32_MEM_PRED])
+{
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc;
+#pragma HLS STREAM variable = str_level_dc depth = 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_ac;
+#pragma HLS STREAM variable = str_level_ac depth = 8 * 16
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv;
+#pragma HLS STREAM variable = str_level_uv depth = 8 * 8
+    hls::stream<ap_uint<64> > str_pred;
+#pragma HLS STREAM variable = str_pred depth = 64
+    hls::stream<ap_uint<6> > str_ret;
+#pragma HLS STREAM variable = str_ret depth = 64
+    hls::stream<ap_uint<1> > str_type_mb;
+#pragma HLS STREAM variable = str_type_mb depth = 64
+    hls::stream<uint16_t> str_mb_h;
+#pragma HLS STREAM variable = str_mb_h depth = 64
+    hls::stream<uint16_t> str_mb_w;
+#pragma HLS STREAM variable = str_mb_w depth = 64
+    Kernel2_top_read(pin_level, str_level_dc, str_level_ac, str_level_uv, str_pred, str_ret, str_type_mb, str_mb_h,
+                     str_mb_w);
+    hls::stream<ap_uint<64> > str_0_dc;
+#pragma HLS STREAM variable = str_0_dc depth = 64
+    hls::stream<ap_uint<64> > str_1_dc;
+#pragma HLS STREAM variable = str_1_dc depth = 64
+    hls::stream<ap_uint<64> > str_2_dc;
+#pragma HLS STREAM variable = str_2_dc depth = 64
+    hls::stream<ap_uint<64> > str_3_dc;
+#pragma HLS STREAM variable = str_3_dc depth = 64
+    hls::stream<ap_uint<64> > str_0_ac;
+#pragma HLS STREAM variable = str_0_ac depth = 64
+    hls::stream<ap_uint<64> > str_1_ac;
+#pragma HLS STREAM variable = str_1_ac depth = 64
+    hls::stream<ap_uint<64> > str_2_ac;
+#pragma HLS STREAM variable = str_2_ac depth = 64
+    hls::stream<ap_uint<64> > str_3_ac;
+#pragma HLS STREAM variable = str_3_ac depth = 64
+    hls::stream<ap_uint<64> > str_0_uv;
+#pragma HLS STREAM variable = str_0_uv depth = 64
+    hls::stream<ap_uint<64> > str_1_uv;
+#pragma HLS STREAM variable = str_1_uv depth = 64
+    hls::stream<ap_uint<64> > str_2_uv;
+#pragma HLS STREAM variable = str_2_uv depth = 64
+    hls::stream<ap_uint<64> > str_3_uv;
+#pragma HLS STREAM variable = str_3_uv depth = 64
+    hls::stream<ap_uint<1> > str_type_mb_out;
+#pragma HLS STREAM variable = str_type_mb_out depth = 64
+
+    hls::stream<uint16_t> str_mb_h_out;
+#pragma HLS STREAM variable = str_mb_h_out depth = 64
+    hls::stream<uint16_t> str_mb_w_out;
+#pragma HLS STREAM variable = str_mb_w_out depth = 64
+    kernel_2_RecordTokens_pre(str_mb_h, str_mb_w, str_type_mb, str_level_dc, str_level_ac, str_level_uv, str_0_dc,
+                              str_1_dc, str_2_dc, str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv,
+                              str_2_uv, str_3_uv, str_mb_h_out, str_mb_w_out, str_type_mb_out);
+
+    hls::stream<uint16_t> str_mb_h_out2;
+#pragma HLS STREAM variable = str_mb_h_out2 depth = 64
+    hls::stream<uint16_t> str_mb_w_out2;
+#pragma HLS STREAM variable = str_mb_w_out2 depth = 64
+    hls::stream<ap_uint<16> > tokens_str_final;
+#pragma HLS STREAM variable = tokens_str_final depth = 1024
+    kernel_2_CreateTokens_with_isFinal(str_mb_h_out, str_mb_w_out, str_type_mb_out, str_0_dc, str_1_dc, str_2_dc,
+                                       str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv, str_2_uv,
+                                       str_3_uv, str_mb_h_out2, str_mb_w_out2, tokens_str_final);
+
+    uint16_t mb_h = str_mb_h_out2.read();
+    uint16_t mb_w = str_mb_w_out2.read();
+    VP8EmitTokens_str_hls_4stages_32bits(pout_bw, tokens_str_final, (uint32_t*)prob);
+    // PackStr2Mem32_t_NoLast<6, 256>(pout_ret, str_ret, mb_h * mb_w);
+    // PackWideStr2Mem32_t_NoLast<64, 256>(pout_pred, str_pred, mb_h * mb_w);
+    Pack_ret6_pred64_to32bits(pout_ret,     // uint32_t* pdes_ret,
+                              pout_pred,    // uint32_t* pdes_pred,
+                              str_ret,      // hls::stream<ap_uint<6> > str_ret,
+                              str_pred,     // hls::stream<ap_uint<64> > str_pred,
+                              mb_h * mb_w); // int num_str)
+}
+
+void kernel_2_Top_dataflow(uint32_t pin_level[SIZE32_MEM_BW],
+                           uint32_t* prob,
+                           uint32_t pout_bw[SIZE32_MEM_BW],
+                           uint32_t pout_ret[SIZE32_MEM_RET],
+                           uint32_t pout_pred[SIZE32_MEM_PRED],
+                           uint32_t* pt_num_mb) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc;
+#pragma HLS STREAM variable = str_level_dc depth = 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_ac;
+#pragma HLS STREAM variable = str_level_ac depth = 8 * 16
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv;
+#pragma HLS STREAM variable = str_level_uv depth = 8 * 8
+    hls::stream<ap_uint<64> > str_pred;
+#pragma HLS STREAM variable = str_pred depth = 64
+    hls::stream<ap_uint<6> > str_ret;
+#pragma HLS STREAM variable = str_ret depth = 64
+    hls::stream<ap_uint<1> > str_type_mb;
+#pragma HLS STREAM variable = str_type_mb depth = 64
+    hls::stream<uint16_t> str_mb_h;
+#pragma HLS STREAM variable = str_mb_h depth = 64
+    hls::stream<uint16_t> str_mb_w;
+#pragma HLS STREAM variable = str_mb_w depth = 64
+    Kernel2_top_read(pin_level, str_level_dc, str_level_ac, str_level_uv, str_pred, str_ret, str_type_mb, str_mb_h,
+                     str_mb_w);
+    hls::stream<ap_uint<64> > str_0_dc;
+#pragma HLS STREAM variable = str_0_dc depth = 64
+    hls::stream<ap_uint<64> > str_1_dc;
+#pragma HLS STREAM variable = str_1_dc depth = 64
+    hls::stream<ap_uint<64> > str_2_dc;
+#pragma HLS STREAM variable = str_2_dc depth = 64
+    hls::stream<ap_uint<64> > str_3_dc;
+#pragma HLS STREAM variable = str_3_dc depth = 64
+    hls::stream<ap_uint<64> > str_0_ac;
+#pragma HLS STREAM variable = str_0_ac depth = 64
+    hls::stream<ap_uint<64> > str_1_ac;
+#pragma HLS STREAM variable = str_1_ac depth = 64
+    hls::stream<ap_uint<64> > str_2_ac;
+#pragma HLS STREAM variable = str_2_ac depth = 64
+    hls::stream<ap_uint<64> > str_3_ac;
+#pragma HLS STREAM variable = str_3_ac depth = 64
+    hls::stream<ap_uint<64> > str_0_uv;
+#pragma HLS STREAM variable = str_0_uv depth = 64
+    hls::stream<ap_uint<64> > str_1_uv;
+#pragma HLS STREAM variable = str_1_uv depth = 64
+    hls::stream<ap_uint<64> > str_2_uv;
+#pragma HLS STREAM variable = str_2_uv depth = 64
+    hls::stream<ap_uint<64> > str_3_uv;
+#pragma HLS STREAM variable = str_3_uv depth = 64
+    hls::stream<ap_uint<1> > str_type_mb_out;
+#pragma HLS STREAM variable = str_type_mb_out depth = 64
+
+    hls::stream<uint16_t> str_mb_h_out;
+#pragma HLS STREAM variable = str_mb_h_out depth = 64
+    hls::stream<uint16_t> str_mb_w_out;
+#pragma HLS STREAM variable = str_mb_w_out depth = 64
+    kernel_2_RecordTokens_pre(str_mb_h, str_mb_w, str_type_mb, str_level_dc, str_level_ac, str_level_uv, str_0_dc,
+                              str_1_dc, str_2_dc, str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv,
+                              str_2_uv, str_3_uv, str_mb_h_out, str_mb_w_out, str_type_mb_out);
+
+    hls::stream<uint16_t> str_mb_h_out2;
+#pragma HLS STREAM variable = str_mb_h_out2 depth = 64
+    hls::stream<uint16_t> str_mb_w_out2;
+#pragma HLS STREAM variable = str_mb_w_out2 depth = 64
+    hls::stream<ap_uint<16> > tokens_str_final;
+#pragma HLS STREAM variable = tokens_str_final depth = 1024
+    kernel_2_CreateTokens_with_isFinal(str_mb_h_out, str_mb_w_out, str_type_mb_out, str_0_dc, str_1_dc, str_2_dc,
+                                       str_3_dc, str_0_ac, str_1_ac, str_2_ac, str_3_ac, str_0_uv, str_1_uv, str_2_uv,
+                                       str_3_uv, str_mb_h_out2, str_mb_w_out2, tokens_str_final);
+
+    uint16_t mb_h = str_mb_h_out2.read();
+    uint16_t mb_w = str_mb_w_out2.read();
+    uint32_t num_mb = mb_h * mb_w;
+    VP8EmitTokens_str_hls_4stages_32bits(pout_bw, tokens_str_final, (uint32_t*)prob);
+    // PackStr2Mem32_t_NoLast<6, 256>(pout_ret, str_ret, mb_h * mb_w);
+    // PackWideStr2Mem32_t_NoLast<64, 256>(pout_pred, str_pred, mb_h * mb_w);
+    Pack_ret6_pred64_to32bits(pout_ret,  // uint32_t* pdes_ret,A
+                              pout_pred, // uint32_t* pdes_pred,
+                              str_ret,   // hls::stream<ap_uint<6> > str_ret,
+                              str_pred,  // hls::stream<ap_uint<64> > str_pred,
+                              num_mb);   // int num_str)
+    *pt_num_mb = num_mb;
+}
+#define USING_PIC_BURST
+void kernel_2_ArithmeticCoding_core( // NoWrapper
+    uint32_t pin_level[SIZE32_MEM_BW],
+    uint32_t pin_prob[2048 / 4],
+    uint32_t pout_bw[SIZE32_MEM_BW],
+    uint32_t pout_ret[SIZE32_MEM_RET],
+    uint32_t pout_pred[SIZE32_MEM_PRED]) {
+    uint32_t prob[1 * 8 * 3 * 11];
+    memcpy(prob, pin_prob, sizeof(prob));
+    kernel_2_Top_dataflow(pin_level,  // uint32_t pin_level[SIZE32_MEM_BW],
+                          prob,       // uint32_t* prob,
+                          pout_bw,    // uint32_t pout_bw[SIZE32_MEM_BW],
+                          pout_ret,   // uint32_t pout_ret[SIZE32_MEM_RET],
+                          pout_pred); // uint32_t pout_pred[SIZE32_MEM_PRED])
+}
+void kernel_2_ArithmeticCoding_core_wrapper_bad_bad(uint32_t pin_level_mult[SIZE32_MEM_BW],
+                                                    uint32_t pin_prob_mult[2048 / 4 * 64],
+                                                    uint32_t pout_bw_mult[SIZE32_MEM_BW],
+                                                    uint32_t pout_ret_mult[SIZE32_MEM_RET],
+                                                    uint32_t pout_pred_mult[SIZE32_MEM_PRED]) {
+    uint32_t* pin_level = pin_level_mult; //_mult[SIZE32_MEM_BW],
+    uint32_t* pin_prob = pin_prob_mult;   //_mult[2048/4*64],
+    uint32_t* pout_bw = pout_bw_mult;     //_mult[SIZE32_MEM_BW],
+    uint32_t* pout_ret = pout_ret_mult;   //_mult[SIZE32_MEM_RET],
+    uint32_t* pout_pred = pout_pred_mult; //_mult[SIZE32_MEM_PRED])
+
+    uint32_t offset_pin_level = 0; // = Get_Busoffset_level(num_mb);
+    uint32_t offset_prob = 0;      // Get_Busoffset_prob_32bits();
+    uint32_t offset_pout_bw = 0;   // Get_Busoffset_pout_bw(num_mb);
+    uint32_t offset_pout_ret = 0;  // Get_Busoffset_pout_ret(num_mb);
+    uint32_t offset_pout_pred = 0; // Get_Busoffset_pout_pred(num_mb);
+
+    uint32_t pid_mult = pin_prob[OFF_PID_PROB_8BIT / 4];
+CORE_WRAPPER_BAD:
+    for (int myloop = 0; myloop <= pid_mult; myloop++) {
+        uint32_t prob[512]; //[1 * 8 * 3 * 11];
+
+        memcpy(prob, pin_prob, (OFF_NUM_MB_32 + 1) * 4);
+        for (int k = 256; k < (OFF_NUM_MB_32 + 1); k++) printf("%d, ", prob[k]);
+        printf("\n");
+
+        uint32_t num_mb = prob[OFF_NUM_MB_32];
+        kernel_2_Top_dataflow(pin_level + offset_pin_level, // uint32_t pin_level[SIZE32_MEM_BW],
+                              prob,                         // uint32_t* prob,
+                              pout_bw + offset_pout_bw,     // uint32_t pout_bw[SIZE32_MEM_BW],
+                              pout_ret + offset_pout_ret,   // uint32_t pout_ret[SIZE32_MEM_RET],
+                              pout_pred + offset_pout_pred);
+    SHIFT_PROB_504:
+        for (int k = 0; k < 7; k++) pin_prob[offset_prob + 504 + k] = prob[504 + k];
+        for (int k = 0; k < 7; k++) printf("%d, ", pin_prob[offset_prob + 504 + k]);
+        printf("\n");
+        // memcpy(pin_prob, prob, (7)*4 );
+        // pin_prob[500] = i;
+        offset_pin_level += Get_Busoffset_level(num_mb);
+        offset_prob += Get_Busoffset_prob_32bits();
+        offset_pout_bw += Get_Busoffset_pout_bw(num_mb);
+        offset_pout_ret += Get_Busoffset_pout_ret(num_mb);
+        offset_pout_pred += Get_Busoffset_pout_pred(num_mb);
+
+    } // while( pid_mult!=0);
+}
+void kernel_2_ArithmeticCoding_core_wrapper(uint32_t pin_level_mult[SIZE32_MEM_BW],
+                                            uint32_t pin_prob_mult[2048 / 4 * 64],
+                                            uint32_t pout_bw_mult[SIZE32_MEM_BW],
+                                            uint32_t pout_ret_mult[SIZE32_MEM_RET],
+                                            uint32_t pout_pred_mult[SIZE32_MEM_PRED]) {
+    uint32_t* pin_level = pin_level_mult; //_mult[SIZE32_MEM_BW],
+    uint32_t* pin_prob = pin_prob_mult;   //_mult[2048/4*64],
+    uint32_t* pout_bw = pout_bw_mult;     //_mult[SIZE32_MEM_BW],
+    uint32_t* pout_ret = pout_ret_mult;   //_mult[SIZE32_MEM_RET],
+    uint32_t* pout_pred = pout_pred_mult; //_mult[SIZE32_MEM_PRED])
+
+    uint32_t pid_mult = pin_prob[OFF_PID_PROB_8BIT / 4];
+CORE_WRAPPER:
+    for (int myloop = 0; myloop <= pid_mult; myloop++) {
+        //	do{
+        uint32_t prob[512]; //[1 * 8 * 3 * 11];
+    COPY_PROB_256:
+        for (int k = 0; k < 265; k++) prob[k] = pin_prob[k];
+        // memcpy(prob, pin_prob,4 * 8 * 3 * 11);// sizeof(prob));
+        // pid_mult = pin_prob[OFF_PID_PROB_8BIT/4];
+        // pid_mult -= 1;
+        uint32_t num_mb = prob[264];
+        kernel_2_Top_dataflow(pin_level,  // uint32_t pin_level[SIZE32_MEM_BW],
+                              prob,       // uint32_t* prob,
+                              pout_bw,    // uint32_t pout_bw[SIZE32_MEM_BW],
+                              pout_ret,   // uint32_t pout_ret[SIZE32_MEM_RET],
+                              pout_pred); //,	//uint32_t pout_pred[SIZE32_MEM_PRED])
+                                          //&num_mb);
+                                          // memcpy(pin_prob+504, prob+504, 28);//the last DWord is used for dirty
+    MEMCPY_504:
+        for (int k = 0; k < 7; k++) pin_prob[504 + k] = prob[504 + k];
+        // pin_prob[500] = i;
+        uint32_t offset_pin_level = Get_Busoffset_level(num_mb);
+        uint32_t offset_prob = Get_Busoffset_prob_32bits();
+        uint32_t offset_pout_bw = Get_Busoffset_pout_bw(num_mb);
+        uint32_t offset_pout_ret = Get_Busoffset_pout_ret(num_mb);
+        uint32_t offset_pout_pred = Get_Busoffset_pout_pred(num_mb);
+        pin_level += offset_pin_level; //_mult[SIZE32_MEM_BW],
+        pin_prob += offset_prob;       //_mult[2048/4*64],
+        pout_bw += offset_pout_bw;     //_mult[SIZE32_MEM_BW],
+        pout_ret += offset_pout_ret;   //_mult[SIZE32_MEM_RET],
+        pout_pred += offset_pout_pred; //_mult[SIZE32_MEM_PRED])
+    }                                  // while( pid_mult!=0);
+}
+
+// namespace xf {
+// namespace codec {
+
+extern "C" {
+void webp_2_ArithmeticCoding_1( // NoWrapper
+    uint32_t pin_level[SIZE32_MEM_BW],
+    uint32_t pin_prob[2048 / 4 * 64],
+    uint32_t pout_bw[SIZE32_MEM_BW],
+    uint32_t pout_ret[SIZE32_MEM_RET],
+    uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth =              \
+    2048 / 4 * 64 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem1 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem1 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+//#pragma HLS DATAFLOW
+// memcpy(pout_bw, pin_prob, 2048*5);
+// memcpy(pout_bw+2048*5, pin_level, 1024*5);
+/*	uint32_t tmp[2048*8];
+        memcpy(tmp, pin_prob, 2048*5);
+                memcpy(pout_bw, tmp, 2048*5);
+
+                memcpy(tmp, pin_level, 1024*5);
+
+                memcpy(pout_bw+2048*5, tmp, 1024*5);*/
+/*		for(int i=0;i<2048/4*5;i++)
+                        pout_bw[i] = pin_prob[i];
+
+                for(int i=0;i<1024/4*5;i++)
+                                pout_bw[2048/4*5+i] = pin_level[i];
+
+*/
+#ifdef USING_PIC_BURST
+    kernel_2_ArithmeticCoding_core_wrapper(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#else
+    kernel_2_ArithmeticCoding_core(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#endif
+}
+}
+//} // namespace codec
+//} // namespace xf
+
+extern "C" {
+void kernel_2_ArithmeticCoding_2( // NoWrapper
+    uint32_t pin_level[SIZE32_MEM_BW],
+    uint32_t pin_prob[2048 / 4 * 64],
+    uint32_t pout_bw[SIZE32_MEM_BW],
+    uint32_t pout_ret[SIZE32_MEM_RET],
+    uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth =              \
+    2048 / 4 * 64 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem1 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem1 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+#ifdef USING_PIC_BURST
+    kernel_2_ArithmeticCoding_core_wrapper(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#else
+    kernel_2_ArithmeticCoding_core(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#endif
+}
+}
+
+extern "C" {
+void kernel_2_ArithmeticCoding_3( // NoWrapper
+    uint32_t pin_level[SIZE32_MEM_BW],
+    uint32_t pin_prob[2048 / 4 * 64],
+    uint32_t pout_bw[SIZE32_MEM_BW],
+    uint32_t pout_ret[SIZE32_MEM_RET],
+    uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth =              \
+    2048 / 4 * 64 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem1 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem1 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+#ifdef USING_PIC_BURST
+    kernel_2_ArithmeticCoding_core_wrapper(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#else
+    kernel_2_ArithmeticCoding_core(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#endif
+}
+}
+
+extern "C" {
+void kernel_2_ArithmeticCoding_4( // NoWrapper
+    uint32_t pin_level[SIZE32_MEM_BW],
+    uint32_t pin_prob[2048 / 4 * 64],
+    uint32_t pout_bw[SIZE32_MEM_BW],
+    uint32_t pout_ret[SIZE32_MEM_RET],
+    uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth =              \
+    2048 / 4 * 64 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem1 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem1 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+#ifdef USING_PIC_BURST
+    kernel_2_ArithmeticCoding_core_wrapper(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#else
+    kernel_2_ArithmeticCoding_core(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#endif
+}
+}
+
+extern "C" {
+void kernel_2_ArithmeticCoding_5( // NoWrapper
+    uint32_t pin_level[SIZE32_MEM_BW],
+    uint32_t pin_prob[2048 / 4 * 64],
+    uint32_t pout_bw[SIZE32_MEM_BW],
+    uint32_t pout_ret[SIZE32_MEM_RET],
+    uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth =              \
+    2048 / 4 * 64 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem1 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem1 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+#ifdef USING_PIC_BURST
+    kernel_2_ArithmeticCoding_core_wrapper(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#else
+    kernel_2_ArithmeticCoding_core(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#endif
+}
+}
+
+extern "C" {
+void kernel_2_ArithmeticCoding_6( // NoWrapper
+    uint32_t pin_level[SIZE32_MEM_BW],
+    uint32_t pin_prob[2048 / 4 * 64],
+    uint32_t pout_bw[SIZE32_MEM_BW],
+    uint32_t pout_ret[SIZE32_MEM_RET],
+    uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth =              \
+    2048 / 4 * 64 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem1 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem1 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+#ifdef USING_PIC_BURST
+    kernel_2_ArithmeticCoding_core_wrapper(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#else
+    kernel_2_ArithmeticCoding_core(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#endif
+}
+}
+
+extern "C" {
+void kernel_2_ArithmeticCoding_7( // NoWrapper
+    uint32_t pin_level[SIZE32_MEM_BW],
+    uint32_t pin_prob[2048 / 4 * 64],
+    uint32_t pout_bw[SIZE32_MEM_BW],
+    uint32_t pout_ret[SIZE32_MEM_RET],
+    uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth =              \
+    2048 / 4 * 64 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem1 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem1 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+#ifdef USING_PIC_BURST
+    kernel_2_ArithmeticCoding_core_wrapper(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#else
+    kernel_2_ArithmeticCoding_core(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#endif
+}
+}
+
+extern "C" {
+void kernel_2_ArithmeticCoding_8( // NoWrapper
+    uint32_t pin_level[SIZE32_MEM_BW],
+    uint32_t pin_prob[2048 / 4 * 64],
+    uint32_t pout_bw[SIZE32_MEM_BW],
+    uint32_t pout_ret[SIZE32_MEM_RET],
+    uint32_t pout_pred[SIZE32_MEM_PRED]) {
+#pragma HLS INTERFACE m_axi port = pin_level offset = slave bundle = gmem0 depth =               \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pin_prob offset = slave bundle = gmem1 depth =              \
+    2048 / 4 * 64 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_bw offset = slave bundle = gmem2 depth =                     \
+    65536 * 384 / 4 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_ret offset = slave bundle = gmem1 depth =              \
+    65536 * 1 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+#pragma HLS INTERFACE m_axi port = pout_pred offset = slave bundle = gmem1 depth =                  \
+    65536 * 16 / 2 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16 // 32bb
+
+#pragma HLS INTERFACE s_axilite port = pin_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pin_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_bw bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_ret bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_pred bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+#pragma HLS DATAFLOW
+
+#ifdef USING_PIC_BURST
+    kernel_2_ArithmeticCoding_core_wrapper(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#else
+    kernel_2_ArithmeticCoding_core(pin_level, pin_prob, pout_bw, pout_ret, pout_pred);
+#endif
+}
+}
diff --git a/codec/L2/demos/webpEnc/kernel/vp8_hls_pred.cpp b/codec/L2/demos/webpEnc/kernel/vp8_hls_pred.cpp
new file mode 100644
index 0000000000..1a227b6e59
--- /dev/null
+++ b/codec/L2/demos/webpEnc/kernel/vp8_hls_pred.cpp
@@ -0,0 +1,4517 @@
+/**********
+           Copyright (c) 2017, Xilinx, Inc.
+           All rights reserved.
+           Redistribution and use in source and binary forms, with or without modification,
+           are permitted provided that the following conditions are met:
+           1. Redistributions of source code must retain the above copyright notice,
+           this list of conditions and the following disclaimer.
+           2. Redistributions in binary form must reproduce the above copyright notice,
+           this list of conditions and the following disclaimer in the documentation
+           and/or other materials provided with the distribution.
+           3. Neither the name of the copyright holder nor the names of its contributors
+           may be used to endorse or promote products derived from this software
+           without specific prior written permission.
+           THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+           ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+           THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+           IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+           INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+           PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+           HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+           OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+           EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********/
+
+#include <ap_int.h>
+#include <hls_stream.h>
+#include "vp8_hls_syn.h"
+#include "vp8_hls_syn2.h"
+#include <stdio.h>
+#include <string.h>
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//==========================      kernel_IntraPredLoop2_NoOut             ==============================//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+// kernel_IntraPredLoop2_NoOut
+//|-memcpy
+//|-TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO
+//  |-TopVp8_read_2_32bit_NoStruct
+//  | |-TopVp8_read__dataflow_32bit...
+//  |-TopVp8_compute...
+//  |-TopVp8_RecordCoeff_hls_cnt
+//  | |-FindLast
+//  | |-VP8RecordCoeffs_hls_str_w_cnt
+//  |   |-Record_str
+//  |   |-VP8EncBands_hls
+//  |-TopVp8_RecordProb_hls_cnt
+//  | |-RecordPorb_ReadCoeff_dataflow2_cnt
+//  |   |-RecordPorb_ReadCoeff_dataflow_dc_cnt
+//  |     |-RecordPorb_ReadCoeff_dataflow_ac_cnt
+//  |     | |-VP8RecordCoeffs_hls_str_r_cnt
+//  |     |-RecordPorb_ReadCoeff_dataflow_uv_cnt...
+//  |     |-RecordPorb_ReadCoeff_dataflow2_cnt...
+//  |-TopVp8_send_32bit
+void kernel_IntraPredLoop2_NoOut_core(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+    int p_readinfo[64];
+    memcpy(p_readinfo, p_info, 64 * sizeof(int));
+    ap_uint<32> id_pic;
+    ap_uint<32> mb_line;
+    ap_uint<LG2_MAX_W_PIX> y_stride;
+    ap_uint<LG2_MAX_W_PIX> uv_stride;
+    ap_uint<LG2_MAX_W_PIX> width;
+    ap_uint<LG2_MAX_W_PIX> height;
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w;
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h;
+    ap_uint<WD_LMD> lambda_p16;
+    ap_uint<WD_LMD> lambda_p44;
+    ap_uint<WD_LMD> tlambda;
+    ap_uint<WD_LMD> lambda_uv;
+    ap_uint<WD_LMD> tlambda_m;
+    hls_QMatrix hls_qm1, hls_qm2, hls_qm_uv;
+    ap_int<WD_sharpen * 16> ap_sharpen, ap_sharpen_uv;
+
+    // Initializing image variables, once for one picture
+    { // For convenience, extend the code at top module to show all parameters used by kernel of intra-prediction
+        id_pic = p_readinfo[0];  // reserved for future
+        mb_line = p_readinfo[1]; // reserved for future, to show current line number of mb
+        y_stride = p_readinfo[2];
+        uv_stride = p_readinfo[3];
+        width = p_readinfo[4];
+        height = p_readinfo[5];
+        mb_w = p_readinfo[2 + 2 + 2];
+        mb_h = p_readinfo[3 + 2 + 2];
+        lambda_p16 = p_readinfo[4 + 2 + 2];
+        lambda_p44 = p_readinfo[5 + 2 + 2];
+        tlambda = p_readinfo[6 + 2 + 2];
+        lambda_uv = p_readinfo[7 + 2 + 2];
+        tlambda_m = p_readinfo[8 + 2 + 2];
+
+        hls_qm1.q_0 = p_readinfo[11 + 2]; // quantizer steps
+        hls_qm1.q_n = p_readinfo[12 + 2];
+        hls_qm1.iq_0 = p_readinfo[13 + 2]; // reciprocals fixed point.
+        hls_qm1.iq_n = p_readinfo[14 + 2];
+        hls_qm1.bias_0 = p_readinfo[15 + 2]; // rounding bias
+        hls_qm1.bias_n = p_readinfo[16 + 2];
+
+        hls_qm2.q_0 = p_readinfo[17 + 2]; // quantizer steps
+        hls_qm2.q_n = p_readinfo[18 + 2];
+        hls_qm2.iq_0 = p_readinfo[19 + 2]; // reciprocals fixed point.
+        hls_qm2.iq_n = p_readinfo[20 + 2];
+        hls_qm2.bias_0 = p_readinfo[21 + 2]; // rounding bias
+        hls_qm2.bias_n = p_readinfo[22 + 2];
+
+        hls_qm_uv.q_0 = p_readinfo[23 + 2]; // quantizer steps
+        hls_qm_uv.q_n = p_readinfo[24 + 2];
+        hls_qm_uv.iq_0 = p_readinfo[25 + 2]; // reciprocals fixed point.
+        hls_qm_uv.iq_n = p_readinfo[26 + 2];
+        hls_qm_uv.bias_0 = p_readinfo[27 + 2]; // rounding bias
+        hls_qm_uv.bias_n = p_readinfo[28 + 2];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen, i, WD_sharpen) = p_info[29 + 2 + i];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen_uv, i, WD_sharpen) = p_readinfo[29 + 2 + 16 + i];
+    } // end of initialization
+    int dirty = 0;
+    TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO(id_pic,     // p_info[0],
+                                                      mb_line,    // p_info[1],
+                                                      y_stride,   // p_info[2],  // ,//pic->y_stride,
+                                                      uv_stride,  // p_info[3], // ,//pic->uv_stride
+                                                      width,      // p_info[4],  // ,//pic->width
+                                                      height,     // p_info[5],  // ,//pic->height
+                                                      mb_w,       // p_info[2+2+2],///,
+                                                      mb_h,       // p_info[3+2+2],//,
+                                                      lambda_p16, // p_info[4+2+2],//dqm->lambda_i16_,
+                                                      lambda_p44, // p_info[5+2+2],//dqm->lambda_i4_,
+                                                      tlambda,    // p_info[6+2+2],//dqm->tlambda_,
+                                                      lambda_uv,  // p_info[7+2+2],//dqm->lambda_uv_,
+                                                      tlambda_m,  // p_info[8+2+2],//dqm->lambda_mode_,
+                                                      hls_qm1, hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv,
+                                                      ysrc,       // 4096x4096
+                                                      usrc,       // 2048x2048
+                                                      vsrc,       // 2048x2048
+                                                      pout_level, // 65536*512
+                                                      pout_prob, &dirty);
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem0 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem1 depth = 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem1 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem2 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+    kernel_IntraPredLoop2_NoOut_core(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+}
+}
+
+void TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO(ap_uint<32> id_pic,
+                                                       ap_uint<32> mb_line,
+                                                       ap_uint<LG2_MAX_W_PIX> y_stride,
+                                                       ap_uint<LG2_MAX_W_PIX> uv_stride,
+                                                       ap_uint<LG2_MAX_W_PIX> width,
+                                                       ap_uint<LG2_MAX_W_PIX> height,
+                                                       ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                                       ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                                                       ap_uint<WD_LMD> lambda_p16,
+                                                       ap_uint<WD_LMD> lambda_p44,
+                                                       ap_uint<WD_LMD> tlambda,
+                                                       ap_uint<WD_LMD> lambda_uv,
+                                                       ap_uint<WD_LMD> tlambda_m,
+                                                       hls_QMatrix hls_qm1,
+                                                       hls_QMatrix hls_qm2,
+                                                       hls_QMatrix hls_qm_uv,
+                                                       ap_int<WD_sharpen * 16> ap_sharpen,
+                                                       ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                                       uint32_t* ysrc,
+                                                       uint32_t* usrc,
+                                                       uint32_t* vsrc,
+                                                       int32_t* pout_level,
+                                                       uint8_t* pout_prob,
+                                                       int* dirty) {
+    // hls::stream<ap_uint<WD_PIX * 16> > str_out_inst;
+    // hls::stream<ap_uint<WD_PIX* 16> >* str_out = &str_out_inst;
+    hls::stream<ap_uint<WD_PIX * 16> > str_out;
+#pragma HLS STREAM variable = str_out depth = 8 * 8
+#pragma HLS DATAFLOW
+    hls::stream<ap_uint<WD_PIX * 16> > str_din_y;
+    hls::stream<ap_uint<WD_PIX * 16> > str_din_uv;
+#pragma HLS STREAM variable = str_din_y depth = 16 * 128
+#pragma HLS STREAM variable = str_din_uv depth = 8 * 128
+    TopVp8_read_2_32bit_NoStruct( // For 4k,
+        ysrc, usrc, vsrc, y_stride, uv_stride, width, height, mb_w, mb_h, &str_din_y, &str_din_uv);
+
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_y;
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc;
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv;
+    hls::stream<ap_int<64> > str_pred;
+    hls::stream<ap_int<6> > str_ret;
+#pragma HLS STREAM variable = str_level_y depth = 16 * 4 * 4 // Deep
+#pragma HLS STREAM variable = str_level_dc depth = 1 * 4 * 4 // Deep
+#pragma HLS STREAM variable = str_level_uv depth = 8 * 4 * 4 // Deep
+#pragma HLS STREAM variable = str_pred depth = 1 * 4 * 4     // Deep
+#pragma HLS STREAM variable = str_ret depth = 1 * 4 * 4      // Deep
+    TopVp8_compute(mb_w, mb_h, &str_din_y, &str_din_uv, lambda_p16, lambda_p44, tlambda, lambda_uv, tlambda_m, hls_qm1,
+                   hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv, &str_out, &str_level_dc, &str_level_y, &str_level_uv,
+                   &str_pred, &str_ret);
+
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_y2;
+#pragma HLS STREAM variable = str_level_y2 depth = 4 * 16 * 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc2;
+#pragma HLS STREAM variable = str_level_dc2 depth = 4 * 1 * 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv2;
+#pragma HLS STREAM variable = str_level_uv2 depth = 4 * 8 * 4
+    hls::stream<ap_int<64> > str_pred2;
+#pragma HLS STREAM variable = str_pred2 depth = 4 * 1 * 4
+    hls::stream<ap_int<6> > str_ret2;
+#pragma HLS STREAM variable = str_ret2 depth = 4 * 1 * 4
+    hls::stream<ap_uint<1> > str_mb_type;
+#pragma HLS STREAM variable = str_mb_type depth = 16 * 1
+    hls::stream<ap_uint<11> > str_rec_dc;
+#pragma HLS STREAM variable = str_rec_dc depth = 2 * 1 * 16 * 64
+    hls::stream<ap_uint<11> > str_rec_ac;
+#pragma HLS STREAM variable = str_rec_ac depth = 4 * 16 * 16 * 8
+    hls::stream<ap_uint<11> > str_rec_uv;
+#pragma HLS STREAM variable = str_rec_uv depth = 2 * 8 * 16 * 8
+    hls::stream<ap_uint<8> > str_cnt_dc;
+#pragma HLS STREAM variable = str_cnt_dc depth = 2 * 1 * 1 * 64
+    hls::stream<ap_uint<8> > str_cnt_ac;
+#pragma HLS STREAM variable = str_cnt_ac depth = 2 * 16 * 1 * 8
+    hls::stream<ap_uint<8> > str_cnt_uv;
+#pragma HLS STREAM variable = str_cnt_uv depth = 2 * 8 * 1 * 8
+    TopVp8_RecordCoeff_hls_cnt(mb_w, mb_h, &str_level_dc, &str_level_y, &str_level_uv, &str_pred, &str_ret, str_mb_type,
+                               &str_level_dc2, &str_level_y2, &str_level_uv2, &str_pred2, &str_ret2, str_rec_dc,
+                               str_rec_ac, str_rec_uv, str_cnt_dc, str_cnt_ac, str_cnt_uv);
+
+    *dirty = TopVp8_RecordProb_hls_cnt(mb_w, mb_h, str_mb_type, str_rec_dc, str_rec_ac, str_rec_uv, str_cnt_dc,
+                                       str_cnt_ac, str_cnt_uv, pout_prob);
+
+    TopVp8_send_32bit(mb_w, mb_h, &str_level_dc2, &str_level_y2, &str_level_uv2, &str_pred2, &str_ret2, pout_level);
+
+SEND_OUT_READ:
+    for (int y = 0; y < mb_h; y++)
+    X:
+        for (int x = 0; x < mb_w; x++)
+        I:
+            for (int i = 0; i < 24; i++) str_out.read();
+}
+
+void TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO_HideDirty(ap_uint<32> id_pic,
+                                                                 ap_uint<32> mb_line,
+                                                                 ap_uint<LG2_MAX_W_PIX> y_stride,
+                                                                 ap_uint<LG2_MAX_W_PIX> uv_stride,
+                                                                 ap_uint<LG2_MAX_W_PIX> width,
+                                                                 ap_uint<LG2_MAX_W_PIX> height,
+                                                                 ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                                                 ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                                                                 ap_uint<WD_LMD> lambda_p16,
+                                                                 ap_uint<WD_LMD> lambda_p44,
+                                                                 ap_uint<WD_LMD> tlambda,
+                                                                 ap_uint<WD_LMD> lambda_uv,
+                                                                 ap_uint<WD_LMD> tlambda_m,
+                                                                 hls_QMatrix hls_qm1,
+                                                                 hls_QMatrix hls_qm2,
+                                                                 hls_QMatrix hls_qm_uv,
+                                                                 ap_int<WD_sharpen * 16> ap_sharpen,
+                                                                 ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                                                 uint32_t* ysrc,
+                                                                 uint32_t* usrc,
+                                                                 uint32_t* vsrc,
+                                                                 int32_t* pout_level,
+                                                                 uint8_t* pout_prob) {
+#pragma HLS DATAFLOW
+    hls::stream<ap_uint<WD_PIX * 16> > str_din_y;
+    hls::stream<ap_uint<WD_PIX * 16> > str_din_uv;
+#pragma HLS STREAM variable = str_din_y depth = 16 * 128
+#pragma HLS STREAM variable = str_din_uv depth = 8 * 128
+    TopVp8_read_2_32bit_NoStruct( // For 4k,
+        ysrc, usrc, vsrc, y_stride, uv_stride, width, height, mb_w, mb_h, &str_din_y, &str_din_uv);
+
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_y;
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc;
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv;
+    hls::stream<ap_int<64> > str_pred;
+    hls::stream<ap_int<6> > str_ret;
+#pragma HLS STREAM variable = str_level_y depth = 16 * 4 * 4 // Deep
+#pragma HLS STREAM variable = str_level_dc depth = 1 * 4 * 4 // Deep
+#pragma HLS STREAM variable = str_level_uv depth = 8 * 4 * 4 // Deep
+#pragma HLS STREAM variable = str_pred depth = 1 * 4 * 4     // Deep
+#pragma HLS STREAM variable = str_ret depth = 1 * 4 * 4      // Deep
+    TopVp8_compute_NoOut(mb_w, mb_h, &str_din_y, &str_din_uv, lambda_p16, lambda_p44, tlambda, lambda_uv, tlambda_m,
+                         hls_qm1, hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv, &str_level_dc, &str_level_y,
+                         &str_level_uv, &str_pred, &str_ret);
+
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_y2;
+#pragma HLS STREAM variable = str_level_y2 depth = 4 * 16 * 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_dc2;
+#pragma HLS STREAM variable = str_level_dc2 depth = 4 * 1 * 4
+    hls::stream<ap_int<WD_LEVEL * 16> > str_level_uv2;
+#pragma HLS STREAM variable = str_level_uv2 depth = 4 * 8 * 4
+    hls::stream<ap_int<64> > str_pred2;
+#pragma HLS STREAM variable = str_pred2 depth = 4 * 1 * 4
+    hls::stream<ap_int<6> > str_ret2;
+#pragma HLS STREAM variable = str_ret2 depth = 4 * 1 * 4
+    hls::stream<ap_uint<1> > str_mb_type;
+#pragma HLS STREAM variable = str_mb_type depth = 16 * 1
+    hls::stream<ap_uint<11> > str_rec_dc;
+#pragma HLS STREAM variable = str_rec_dc depth = 2 * 1 * 16 * 64
+    hls::stream<ap_uint<11> > str_rec_ac;
+#pragma HLS STREAM variable = str_rec_ac depth = 4 * 16 * 16 * 8
+    hls::stream<ap_uint<11> > str_rec_uv;
+#pragma HLS STREAM variable = str_rec_uv depth = 2 * 8 * 16 * 8
+    hls::stream<ap_uint<8> > str_cnt_dc;
+#pragma HLS STREAM variable = str_cnt_dc depth = 2 * 1 * 1 * 64
+    hls::stream<ap_uint<8> > str_cnt_ac;
+#pragma HLS STREAM variable = str_cnt_ac depth = 2 * 16 * 1 * 8
+    hls::stream<ap_uint<8> > str_cnt_uv;
+#pragma HLS STREAM variable = str_cnt_uv depth = 2 * 8 * 1 * 8
+    TopVp8_RecordCoeff_hls_cnt(mb_w, mb_h, &str_level_dc, &str_level_y, &str_level_uv, &str_pred, &str_ret, str_mb_type,
+                               &str_level_dc2, &str_level_y2, &str_level_uv2, &str_pred2, &str_ret2, str_rec_dc,
+                               str_rec_ac, str_rec_uv, str_cnt_dc, str_cnt_ac, str_cnt_uv);
+
+    TopVp8_RecordProb_hls_cnt_HideDirty(mb_w, mb_h, str_mb_type, str_rec_dc, str_rec_ac, str_rec_uv, str_cnt_dc,
+                                        str_cnt_ac, str_cnt_uv, pout_prob);
+
+    TopVp8_send_32bit(mb_w, mb_h, &str_level_dc2, &str_level_y2, &str_level_uv2, &str_pred2, &str_ret2, pout_level);
+}
+//////////======================================================================/////////////////////////////
+//////////====================  TopVp8_send_32bit  =================/////////////////////////////
+//////////======================================================================/////////////////////////////
+void TopVp8_send_32bit(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                       ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                       hls::stream<ap_int<64> >* str_pred,
+                       hls::stream<ap_int<6> >* str_ret,
+                       // output
+                       int32_t* pout_level) {
+#pragma HLS dataflow
+    hls::stream<int16_t> tmp_str;
+    int16_t tmp_arr[512];
+
+SEND_ARRAY:
+    for (int y_mb = 0; y_mb < mb_h; y_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 68 max = 256
+        for (int x_mb = 0; x_mb < mb_w; x_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 120 max = 256
+            TopVp8_send__strs_to_array(tmp_arr,
+                                       //&tmp_str,
+                                       str_level_dc, str_level_y, str_level_uv, str_pred, str_ret);
+            tmp_arr[420] = mb_w;
+            tmp_arr[421] = mb_h;
+        SEND_256:
+            for (int i = 0; i < 256; i++) {
+#pragma HLS pipeline
+                ap_uint<32> tmp;
+                tmp(15, 0) = tmp_arr[i * 2];
+                tmp(31, 16) = tmp_arr[i * 2 + 1];
+                pout_level[i] = tmp;
+            }
+            pout_level += 256;
+        }
+    }
+}
+//////////====================  TopVp8_send_32bit  =================/////////////////////////////
+void TopVp8_send__strs_to_array(short int* pout,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                hls::stream<ap_int<64> >* str_pred,
+                                hls::stream<ap_int<6> >* str_ret) {
+#pragma HLS PIPELINE
+    int x, y, ch;
+    ap_int<WD_LEVEL* 16> tmp16 = str_level_dc->read();
+
+    short int* y_dc_levels = pout; //[16];
+    CPY16(y_dc_levels, tmp16, WD_LEVEL);
+    pout += 16;
+
+    // luma-AC
+    for (y = 0; y < 4; ++y) {
+    SEND_ARRAY_LU:
+        for (x = 0; x < 4; ++x) {
+#pragma HLS PIPELINE
+            short int* y_ac_levels = pout; //;[16]
+            ap_int<WD_LEVEL* 16> tmp = str_level_y->read();
+            CPY16(y_ac_levels, tmp, WD_LEVEL);
+            pout += 16;
+        }
+    }
+
+    // U/V
+    for (ch = 0; ch <= 2; ch += 2) {
+        for (y = 0; y < 2; ++y) {
+        SEND_ARRAY_UV:
+            for (x = 0; x < 2; ++x) {
+#pragma HLS PIPELINE
+                short int* uv_levels = pout; //;[16];
+                ap_int<WD_LEVEL* 16> tmp = str_level_uv->read();
+                CPY16(uv_levels, tmp, WD_LEVEL);
+                pout += 16;
+            }
+        }
+    }
+
+    uint16_t* pred16 = (uint16_t*)pout;
+    ap_uint<64> pred = str_pred->read();
+#pragma HLS PIPELINE
+    CPY16U(pred16, pred, 4);
+    pout += 16;
+    ap_uint<6> ret = str_ret->read();
+    *pout = (uint16_t)ret;
+}
+//////////====================  TopVp8_send_32bit  =================/////////////////////////////
+
+//////////======================================================================/////////////////////////////
+//////////====================  TopVp8_read_2_32bit_NoStruct  =================/////////////////////////////
+//////////======================================================================/////////////////////////////
+void TopVp8_read_2_32bit_NoStruct(
+    // input
+    uint32_t* ysrc,
+    uint32_t* usrc,
+    uint32_t* vsrc,
+    ap_uint<LG2_MAX_W_PIX> y_stride,
+    ap_uint<LG2_MAX_W_PIX> uv_stride,
+    ap_uint<LG2_MAX_W_PIX> width,
+    ap_uint<LG2_MAX_W_PIX> height,
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+    // output
+    hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+    hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv) {
+    TopVp8_read__dataflow_32bit( //
+        // input
+        y_stride, uv_stride, width, height, mb_w, mb_h, ysrc, usrc, vsrc,
+        // output
+        str_din_y, str_din_uv);
+}
+
+void TopVp8_read__dataflow_32bit(
+    // input
+    ap_uint<LG2_MAX_W_PIX> y_stride,  //
+    ap_uint<LG2_MAX_W_PIX> uv_stride, //
+    ap_uint<LG2_MAX_W_PIX> width,     //
+    ap_uint<LG2_MAX_W_PIX> height,    //
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w,   //
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h,   //
+    uint32_t ysrc[MAX_W_PIX * MAX_H_PIX / 4],
+    uint32_t usrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+    uint32_t vsrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+    // output
+    hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+    hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv) {
+    /* MB Line buffer */
+    uint32_t buff_line_mb_y[MAX_W_PIX * 16 / 4];
+#pragma HLS RESOURCE variable = buff_line_mb_y core = XPM_MEMORY uram // 32bb
+    uint32_t buff_line_mb_u[MAX_W_PIX * 4 / 4];                       // 32bb
+#pragma HLS RESOURCE variable = buff_line_mb_y core = XPM_MEMORY uram // 32bb
+    uint32_t buff_line_mb_v[MAX_W_PIX * 4 / 4];                       // 32bb
+#pragma HLS RESOURCE variable = buff_line_mb_y core = XPM_MEMORY uram // 32bb
+// uint32_t  buff_line_mb_y2[MAX_W_PIX*16/4];//32bb
+// uint32_t  buff_line_mb_u2[MAX_W_PIX*4/4];//32bb
+// uint32_t  buff_line_mb_v2[MAX_W_PIX*4/4];//32bb
+TOPVP8_DATAFOW:
+    for (int y_mb = 0; y_mb < mb_h; y_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 68 max = 256
+#pragma HLS dataflow
+        hls_ReadMBLine_32bit_const(ysrc, //[MAX_W_PIX*MAX_H_PIX],
+                                   usrc, //[MAX_W_PIX*MAX_H_PIX/4],
+                                   vsrc, //[MAX_W_PIX*MAX_H_PIX/4],
+                                   y_mb, y_stride, uv_stride,
+                                   // output
+                                   buff_line_mb_y, //[MAX_W_PIX*16],
+                                   buff_line_mb_u, //[MAX_W_PIX*4],
+                                   buff_line_mb_v  //[MAX_W_PIX*4]
+                                   );
+
+        TopVp8_read_MB_32bit_const( // about 650 * mb_w;
+            width,                  //      = p_info[4];  // = pic->width
+            height,                 //      = p_info[5];  // = pic->height
+            mb_w,                   // = p_info[2+2+2];///;
+            mb_h,                   // = p_info[3+2+2];//;
+            y_mb, buff_line_mb_y, buff_line_mb_u, buff_line_mb_v, y_stride, uv_stride,
+            // output
+            str_din_y, str_din_uv);
+    }
+}
+
+void hls_ReadMBLine_32bit_const_old(uint32_t ysrc[MAX_W_PIX * MAX_H_PIX / 4],
+                                    uint32_t usrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                    uint32_t vsrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                    int y_mb,
+                                    int y_stride,
+                                    int uv_stride,
+                                    // output
+                                    uint32_t buff_line_mb_y[MAX_W_PIX * 16 / 4], // 32bb
+                                    uint32_t buff_line_mb_u[MAX_W_PIX * 4 / 4],  // 32bb
+                                    uint32_t buff_line_mb_v[MAX_W_PIX * 4 / 4]   // 32bb
+                                    ) {
+    int offset_y = y_mb * y_stride * 16 / 4;
+    int offset_uv = y_mb * uv_stride * 8 / 4;
+#pragma HLS dataflow
+    hls_CopyMBLine_y_32bit_const(buff_line_mb_y, ysrc + offset_y, y_stride * 16 / 4);
+    hls_CopyMBLine_uv_32bit_const(buff_line_mb_u, usrc + offset_uv, uv_stride * 8 / 4);
+    hls_CopyMBLine_uv_32bit_const(buff_line_mb_v, vsrc + offset_uv, uv_stride * 8 / 4);
+}
+
+void hls_CopyMBLine_yuv_32bit_const(uint32_t ydes[MAX_W_PIX * 16 / 4],
+                                    uint32_t udes[MAX_W_PIX / 2 * 8 / 4],
+                                    uint32_t vdes[MAX_W_PIX / 2 * 8 / 4],
+                                    uint32_t ysrc[MAX_W_PIX * MAX_H_PIX / 4],
+                                    uint32_t usrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                    uint32_t vsrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                    int num_read_y,
+                                    int num_read_uv) {
+    int num_loop = (num_read_uv + NUM_BURST_READ - 1) >> LG2_NUM_BURST_READ; //
+    int start_addr_y = 0;
+    int start_addr_u = 0;
+    int start_addr_v = 0;
+COPY_YUV_Y32:
+    for (int line = 0; line < num_loop; line++) {
+#pragma HLS LOOP_TRIPCOUNT min = 1920 / 2 * 8 / 4 / 64 max = 4096 / 2 * 8 / 4 / 64
+        //#pragma HLS PIPELINE
+        //        memcpy(ydes, ysrc, NUM_BURST_READ * sizeof(uint32_t) * 4);
+        //        ydes += (NUM_BURST_READ << 2);
+        //        ysrc += (NUM_BURST_READ << 2);
+        for (int i = 0; i < NUM_BURST_READ * 4; i++) {
+#pragma HLS PIPELINE
+            ydes[i + start_addr_y] = ysrc[i + start_addr_y];
+        }
+        start_addr_y += (NUM_BURST_READ << 2);
+    }
+COPY_YUV_U32:
+    for (int line = 0; line < num_loop; line++) {
+#pragma HLS LOOP_TRIPCOUNT min = 1920 / 2 * 8 / 4 / 64 max = 4096 / 2 * 8 / 4 / 64
+        //#pragma HLS PIPELINE
+        //        memcpy(udes, usrc, NUM_BURST_READ * sizeof(uint32_t));
+        //        udes += NUM_BURST_READ;
+        //        usrc += NUM_BURST_READ;
+        for (int i = 0; i < NUM_BURST_READ; i++) {
+#pragma HLS PIPELINE
+            udes[i + start_addr_u] = usrc[i + start_addr_u];
+        }
+        start_addr_u += NUM_BURST_READ;
+    }
+COPY_YUV_V32:
+    for (int line = 0; line < num_loop; line++) {
+#pragma HLS LOOP_TRIPCOUNT min = 1920 / 2 * 8 / 4 / 64 max = 4096 / 2 * 8 / 4 / 64
+        //#pragma HLS PIPELINE
+        //        memcpy(vdes, vsrc, NUM_BURST_READ * sizeof(uint32_t));
+        //        vdes += NUM_BURST_READ;
+        //        vsrc += NUM_BURST_READ;
+        for (int i = 0; i < NUM_BURST_READ; i++) {
+#pragma HLS PIPELINE
+            vdes[i + start_addr_v] = vsrc[i + start_addr_v];
+        }
+        start_addr_v += NUM_BURST_READ;
+    }
+}
+void hls_ReadMBLine_32bit_const(uint32_t ysrc[MAX_W_PIX * MAX_H_PIX / 4],
+                                uint32_t usrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                uint32_t vsrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                int y_mb,
+                                int y_stride,
+                                int uv_stride,
+                                // output
+                                uint32_t buff_line_mb_y[MAX_W_PIX * 16 / 4],
+                                uint32_t buff_line_mb_u[MAX_W_PIX * 4 / 4],
+                                uint32_t buff_line_mb_v[MAX_W_PIX * 4 / 4]) {
+#pragma HLS INLINE OFF
+    int offset_y = y_mb * y_stride * 16 / 4;
+    int offset_uv = y_mb * uv_stride * 8 / 4;
+    //#pragma HLS dataflow
+    hls_CopyMBLine_yuv_32bit_const(buff_line_mb_y, buff_line_mb_u, buff_line_mb_v, ysrc + offset_y, usrc + offset_uv,
+                                   vsrc + offset_uv, y_stride * 16 / 4, uv_stride * 8 / 4);
+}
+void hls_CopyMBLine_y_32bit_const(uint32_t ydes[MAX_W_PIX * 16 / 4],
+                                  uint32_t ysrc[MAX_W_PIX * MAX_H_PIX / 4],
+                                  int num_read) {
+    int num_loop = (num_read + NUM_BURST_READ - 1) >> LG2_NUM_BURST_READ; //
+COPY_Y32:
+    for (int line = 0; line < num_loop; line++) {
+#pragma HLS LOOP_TRIPCOUNT min = 1920 * 16 / 4 / 64 max = 4096 * 16 / 4 / 64
+#pragma HLS PIPELINE
+        memcpy(ydes, ysrc, NUM_BURST_READ * sizeof(uint32_t));
+        ydes += NUM_BURST_READ;
+        ysrc += NUM_BURST_READ;
+    }
+}
+void hls_CopyMBLine_uv_32bit_const(uint32_t uvdes[MAX_W_PIX / 2 * 8 / 4],
+                                   uint32_t uvsrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                   int num_read) {
+    int num_loop = (num_read + NUM_BURST_READ - 1) >> LG2_NUM_BURST_READ; //
+COPY_UV32:
+    for (int line = 0; line < num_loop; line++) {
+#pragma HLS LOOP_TRIPCOUNT min = 1920 / 2 * 8 / 4 / 64 max = 4096 / 2 * 8 / 4 / 64
+#pragma HLS PIPELINE
+        memcpy(uvdes, uvsrc, NUM_BURST_READ * sizeof(uint32_t));
+        uvdes += NUM_BURST_READ;
+        uvsrc += NUM_BURST_READ;
+    }
+}
+
+void TopVp8_read_MB_32bit_const(ap_uint<LG2_MAX_W_PIX> width,   //      = p_info[4];  // = pic->width
+                                ap_uint<LG2_MAX_W_PIX> height,  //      = p_info[5];  // = pic->height
+                                ap_uint<LG2_MAX_NUM_MB_W> mb_w, // = p_info[2+2+2];///;
+                                ap_uint<LG2_MAX_NUM_MB_H> mb_h, // = p_info[3+2+2];//;
+                                int y_mb,
+                                uint32_t buff_line_mb_y[MAX_W_PIX * 16 / 4],
+                                uint32_t buff_line_mb_u[MAX_W_PIX * 4 / 4],
+                                uint32_t buff_line_mb_v[MAX_W_PIX * 4 / 4],
+                                // uint32_t  buff_line_mb_y2[MAX_W_PIX*16/4],
+                                // uint32_t  buff_line_mb_u2[MAX_W_PIX*4/4],
+                                // uint32_t  buff_line_mb_v2[MAX_W_PIX*4/4],
+                                int stride_y,
+                                int stride_uv,
+                                // output
+                                hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+                                hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv) {
+#pragma HLS INLINE OFF
+TOP_READ32:
+    for (int x_mb = 0; x_mb < mb_w; x_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 120 max = 256
+        ap_uint<WD_PIX * 16> ap_yuv_in_[24];
+        ap_uint<WD_PIX * 16> ap_y_in_[16];
+        ap_uint<WD_PIX * 16> ap_u_in_[4];
+        ap_uint<WD_PIX * 16> ap_v_in_[4];
+        hls_GetMB_parallel_32bit_const( // 599 cycyle dataflow
+            buff_line_mb_y,             //[MAX_W_PIX*16],
+            buff_line_mb_u,             //[MAX_W_PIX*4],
+            buff_line_mb_v,             //[MAX_W_PIX*4],
+            x_mb, y_mb, width, height, stride_y, stride_uv,
+            ap_y_in_, //[16],
+            ap_u_in_, //[4],
+            ap_v_in_  //[4]
+            );
+
+        for (int i = 0; i < 16; i++) str_din_y->write(ap_y_in_[i]);
+        for (int i = 0; i < 4; i++) str_din_uv->write(ap_u_in_[i]);
+        for (int i = 0; i < 4; i++) str_din_uv->write(ap_v_in_[i]);
+    }
+}
+void hls_GetMB_parallel_32bit_const(uint32_t ysrc_MBline[MAX_W_PIX * 16 / 4],
+                                    uint32_t usrc_MBline[MAX_W_PIX * 4 / 4],
+                                    uint32_t vsrc_MBline[MAX_W_PIX * 4 / 4],
+                                    int x_mb,
+                                    int y_mb,
+                                    int width,
+                                    int height,
+                                    int stride_y,
+                                    int stride_uv,
+                                    ap_uint<WD_PIX * 16> ap_y_in_[16],
+                                    ap_uint<WD_PIX * 16> ap_u_in_[4],
+                                    ap_uint<WD_PIX * 16> ap_v_in_[4]) {
+#pragma HLS INLINE OFF
+    //#pragma HLS DATAFLOW
+    hls_GetMB_y_32bit_const(ysrc_MBline, x_mb, y_mb, width, height, stride_y, ap_y_in_);
+    hls_GetMB_uv_32bit_const(usrc_MBline, x_mb, y_mb, width, height, stride_uv, ap_u_in_);
+    hls_GetMB_uv_32bit_const(vsrc_MBline, x_mb, y_mb, width, height, stride_uv, ap_v_in_);
+}
+
+static int MinSize32(int a, int b) {
+    return (a < b) ? a : b;
+};
+void hls_GetMB_y_32bit_const(uint32_t src[MAX_W_PIX * 16 / 4],
+                             int x_mb,
+                             int y_mb,
+                             int width,
+                             int height,
+                             int stride,
+                             ap_uint<WD_PIX * 16> ap_y_in_[16]) {
+    int x = x_mb;
+    int y = y_mb;
+    const int w = MinSize32(width - x * 16, 16);
+    const int h = MinSize32(height - y * 16, 16);
+    int off = (w - 1) % 4;
+    uint32_t* ysrc = src + x_mb * 16 / 4; // 32bb
+    // Two following variables create a slide window
+    uint32_t rem_dat;
+    uint32_t crt_dat;
+    uint32_t w32;
+    int addr8 = 0;
+    for (int i = 0; i < 16; ++i) {
+        int addr32 = (addr8) >> 2;
+        int num_rem = 4 - (addr8 & 3);
+        rem_dat = ysrc[addr32++];
+    GET_Y32_IN:
+        for (int base = 0; base < 4; base++) { // j= 0, 4, 8, 12; base = 0,1,2,3
+#pragma HLS PIPELINE II = 1
+            int j = base << 2;
+            bool isAllIn = base < (w >> 2); //((j+3)<=(w-1));
+            bool isAllOut = (j >= w);
+            bool isOver = (num_rem + j) >= w;
+            uint32_t rem_dat_2 = rem_dat;
+            if (isOver)
+                crt_dat = rem_dat;
+            else
+                crt_dat = rem_dat = ysrc[addr32++];
+
+            w32 = get32bits_2_const(num_rem, rem_dat_2, crt_dat);
+            ap_uint<32> tmp = GetEdgeImage(w32, off, isAllIn, isAllOut);
+            VCT_GET(ap_y_in_[(i & 12) + base], i % 4, WD_PIX * 4) = tmp(31, 0);
+        }
+        if (i < (h - 1)) addr8 += stride;
+    }
+}
+
+ap_uint<32> GetEdgeImage(ap_uint<32> org, int off, bool isAllIn, bool isAllOut) {
+#pragma HLS PIPELINE
+    ap_uint<32> tmp = org;
+    ap_uint<8> edge = org(7 + off * 8, off * 8);
+    if (isAllIn) {
+        tmp = org;
+    } else if (isAllOut) {
+        tmp(7, 0) = edge;
+        tmp(15, 8) = edge;
+        tmp(23, 16) = edge;
+        tmp(31, 24) = edge;
+    } else { // at the edge
+        if (off == 0) {
+            tmp(15, 8) = edge;
+            tmp(23, 16) = edge;
+            tmp(31, 24) = edge;
+        } else if (off == 1) {
+            tmp(23, 16) = edge;
+            tmp(31, 24) = edge;
+        } else if (off == 2) {
+            tmp(31, 24) = edge;
+        } else {
+            tmp(7, 0) = edge;
+            tmp(15, 8) = edge;
+            tmp(23, 16) = edge;
+            tmp(31, 24) = edge;
+        }
+    }
+    return tmp;
+}
+
+void hls_GetMB_uv_32bit_const(uint32_t src[MAX_W_PIX * 4 / 4],
+                              int x_mb,
+                              int y_mb,
+                              int width,
+                              int height,
+                              int stride,
+                              ap_uint<WD_PIX * 16> ap_uv_in_[4]) {
+#pragma HLS INLINE OFF
+    int x = x_mb;
+    int y = y_mb;
+    const int w = MinSize32(width - x * 16, 16);
+    const int h = MinSize32(height - y * 16, 16);
+    const int uv_w = (w + 1) >> 1;
+    const int uv_h = (h + 1) >> 1;
+    int off = (uv_w - 1) % 4;
+    uint32_t* uvsrc = src + x_mb * 8 / 4; // 32bb
+    uint32_t rem_dat;
+    uint32_t crt_dat;
+    uint32_t w32;
+    int addr8 = 0;
+    int rem_num = 0;
+
+    for (int i = 0; i < 8; ++i) {
+        int addr32 = (addr8 + 0) / 4;
+        int num_rem = 4 - (addr8 & 3);
+        rem_dat = uvsrc[addr32++];
+    GET_UV32_IN:
+        for (int base = 0; base < 2; base++) {
+#pragma HLS PIPELINE
+            int j = base * 4;
+            bool isAllIn = ((j + 3) <= (uv_w - 1));
+            bool isAllOut = (j > (uv_w - 1));
+            bool isOver = (num_rem + j) >= uv_w;
+            { // base = 0, 1
+                uint32_t rem_dat_2 = rem_dat;
+                if (isOver)
+                    crt_dat = rem_dat;
+                else
+                    crt_dat = rem_dat = uvsrc[addr32++];
+                w32 = get32bits_2_const(num_rem, rem_dat_2, crt_dat);
+                ap_uint<32> tmp = GetEdgeImage(w32, off, isAllIn, isAllOut);
+                VCT_GET(ap_uv_in_[(i & 4) / 2 + base], i % 4, WD_PIX * 4) = tmp(31, 0);
+            }
+        }
+        if (i < (uv_h - 1)) addr8 += stride;
+    }
+}
+
+ap_uint<32> get32bits_2_const(ap_uint<3> n_rem, ap_uint<32> rem, ap_uint<32> crt) {
+#pragma HLS PIPELINE
+    if (n_rem == 4) return rem;
+    if (n_rem == 0) return crt;
+    ap_uint<32> tmp;
+    ap_uint<5> bits = 8 * n_rem;
+    rem = rem >> (32 - bits);
+    tmp = rem;
+    crt = crt << bits;
+    tmp(31, bits) = crt(31, bits);
+    return tmp;
+}
+//////////======================================================================/////////////////////////////
+//////////=====================   TopVp8_compute      ==========================/////////////////////////////
+//////////======================================================================/////////////////////////////
+// TopVp8_compute_NoOut===========================================================================/
+//(Note, following names of functions may has already changed but not updated)
+//-Intraprediction_mb_syn_str2
+//--hls_LoadPre_out
+//--hls_LoadPre_mode
+//--Pickup_dataflow3
+//---Pickup_Y44
+//----hls_p4_test
+//----hls_GetCost
+//----hls_channel_p44
+//-----hls_FTransform
+//-----hls_QuantizeBlock
+//-----hls_ITransformOne
+//-----hls_SSE4X4
+//-----hls_Disto4x4
+//-----hls_fast_cost
+//---Pickup_Y16
+//----hls_channel_p16
+//-----hls_p16_test
+//-----hls_FTransform
+//-----hls_FTransformWHT
+//-----hls_QuantizeBlockWHT
+//-----hls_IFTransformWHT
+//-----hls_QuantizeBlock
+//-----hls_ITransformOne
+//-----hls_SSE4X4
+//-----hls_Disto4x4
+//-----hls_fast_cost
+//-----hls_ca_score
+//---Pickup_UV
+//----hls_p8_test
+//----hls_channel_uv_8
+//-----hls_p8_test
+//-----hls_FTransform
+//-----hls_QuantizeBlock
+//-----hls_ITransformOne
+//-----hls_fast_cost
+//-----hls_ca_score
+//--hls_SetBestAs4_mode
+void TopVp8_compute(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                    ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                    hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+                    hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv,
+                    ap_uint<WD_LMD> lambda_p16,
+                    ap_uint<WD_LMD> lambda_p44,
+                    ap_uint<WD_LMD> tlambda,
+                    ap_uint<WD_LMD> lambda_uv,
+                    ap_uint<WD_LMD> tlambda_m,
+                    hls_QMatrix hls_qm1,
+                    hls_QMatrix hls_qm2,
+                    hls_QMatrix hls_qm_uv,
+                    ap_int<WD_sharpen * 16> ap_sharpen,
+                    ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                    hls::stream<ap_uint<WD_PIX * 16> >* str_out,
+                    hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                    hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                    hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                    hls::stream<ap_int<64> >* str_pred,
+                    hls::stream<ap_int<6> >* str_ret) {
+#pragma HLS interface ap_stable port = lambda_p16
+#pragma HLS interface ap_stable port = lambda_p44
+#pragma HLS interface ap_stable port = tlambda
+#pragma HLS interface ap_stable port = lambda_uv
+#pragma HLS interface ap_stable port = tlambda_m
+
+    for (int y_mb = 0; y_mb < mb_h; y_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 68 max = 256
+        for (int x_mb = 0; x_mb < mb_w; x_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 120 max = 256
+            // Intraprediction_mb_syn_str2(// &it,
+            Intraprediction_mb_syn_str2_widen( // &it,
+                x_mb, y_mb, mb_w, str_din_y, str_din_uv, lambda_p16, lambda_p44, tlambda, lambda_uv, tlambda_m, hls_qm1,
+                hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv, str_out, str_level_dc, str_level_y, str_level_uv,
+                str_pred, str_ret);
+        }
+    }
+}
+
+void TopVp8_compute_NoOut(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                          ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                          hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+                          hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv,
+                          ap_uint<WD_LMD> lambda_p16,
+                          ap_uint<WD_LMD> lambda_p44,
+                          ap_uint<WD_LMD> tlambda,
+                          ap_uint<WD_LMD> lambda_uv,
+                          ap_uint<WD_LMD> tlambda_m,
+                          hls_QMatrix hls_qm1,
+                          hls_QMatrix hls_qm2,
+                          hls_QMatrix hls_qm_uv,
+                          ap_int<WD_sharpen * 16> ap_sharpen,
+                          ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                          hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                          hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                          hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                          hls::stream<ap_int<64> >* str_pred,
+                          hls::stream<ap_int<6> >* str_ret) {
+#pragma HLS interface ap_stable port = lambda_p16
+#pragma HLS interface ap_stable port = lambda_p44
+#pragma HLS interface ap_stable port = tlambda
+#pragma HLS interface ap_stable port = lambda_uv
+#pragma HLS interface ap_stable port = tlambda_m
+
+VP8_COMPUTE_NOOUT:
+    for (int y_mb = 0; y_mb < mb_h; y_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 68 max = 256
+        for (int x_mb = 0; x_mb < mb_w; x_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 120 max = 256
+            // Intraprediction_mb_syn_str2(// &it,
+            Intraprediction_mb_syn_str2_widen_NoOut( // &it,
+                x_mb, y_mb, mb_w, str_din_y, str_din_uv, lambda_p16, lambda_p44, tlambda, lambda_uv, tlambda_m, hls_qm1,
+                hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv, str_level_dc, str_level_y, str_level_uv, str_pred,
+                str_ret);
+        }
+    }
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+void Intraprediction_mb_syn_str2_widen(ap_uint<LG2_MAX_NUM_MB_W> x_mb,
+                                       ap_uint<LG2_MAX_NUM_MB_W> y_mb,
+                                       ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                       hls::stream<ap_uint<WD_PIX * 16> >* str_ap_yuv_in_y,
+                                       hls::stream<ap_uint<WD_PIX * 16> >* str_ap_yuv_in_uv,
+                                       ap_uint<WD_LMD> lambda_p16,
+                                       ap_uint<WD_LMD> lambda_p44,
+                                       ap_uint<WD_LMD> tlambda,
+                                       ap_uint<WD_LMD> lambda_uv,
+                                       ap_uint<WD_LMD> tlambda_m,
+                                       hls_QMatrix hls_qm1,
+                                       hls_QMatrix hls_qm2,
+                                       hls_QMatrix hls_qm_uv,
+                                       ap_int<WD_sharpen * 16> ap_sharpen,
+                                       ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                       hls::stream<ap_uint<WD_PIX * 16> >* str_out,
+                                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                       hls::stream<ap_int<64> >* str_pred,
+                                       hls::stream<ap_int<6> >* str_ret) {
+    ap_uint<WD_PIX * 16> ap_y_in_y44[16 + 0];
+    ap_uint<WD_PIX * 16> ap_y_in_y16[16 + 0];
+    ap_uint<WD_PIX * 16> ap_uv_in_[8];
+READ_AP_Y_WIDEN:
+    for (int i = 0; i < 16; i++) {
+#pragma HLS PIPELINE
+        ap_y_in_y44[i] = str_ap_yuv_in_y->read();
+        ap_y_in_y16[i] = ap_y_in_y44[i];
+        if (i & 1) ap_uv_in_[i / 2] = str_ap_yuv_in_uv->read();
+    }
+
+    static ap_uint<WD_PIX * 4> static_ap_y_top_[MAX_NUM_MB_W * 4];
+    //#pragma HLS RESOURCE  variable=static_ap_y_top_ core=RAM_2P_LUTRAM
+    static ap_uint<WD_PIX * 4> static_ap_uv_top_[MAX_NUM_MB_W * 4];
+    //#pragma HLS RESOURCE  variable=static_ap_uv_top_ core=RAM_2P_LUTRAM
+    static ap_uint<WD_PIX * 4> static_ap_y_left_[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_left_ complete dim=1
+    static ap_uint<WD_PIX * 4> static_ap_uv_left_[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_uv_left_ complete dim=1
+    static ap_uint<WD_PIX * 4> static_ap_y_top_c[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_top_c complete dim=1
+    ap_uint<WD_PIX * 4> ap_y_left_c[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_left_c complete dim=1
+    ap_uint<WD_PIX * 4> ap_y4_top_c[4];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_top_c complete dim = 1
+    ap_uint<WD_PIX * 4> ap_y4_left_c[4];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_left_c complete dim = 1
+    static ap_uint<WD_PIX * 4> static_ap_uv_top_c[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_uv_top_c complete dim=1
+    ap_uint<WD_PIX * 4> ap_uv_left_c[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_uv_left_c complete dim=1
+    // Variables for mode
+    static ap_uint<WD_MODE> static_ap_y_top_mode[MAX_NUM_MB_W * 4];
+    // storage for past, updated when bottoms are available
+    static ap_uint<WD_MODE> static_ap_y_left_mode[4]; // storage for past, updated when right are available
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_left_mode complete dim=1
+    ap_uint<WD_MODE> ap_y_top_c_mode[4]; // at beginning, default is DC
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_top_c_mode complete dim=1
+    ap_uint<WD_MODE> ap_y_left_c_mode[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_left_c_mode complete dim=1
+    ap_uint<WD_MODE> ap_y4_top_c_mode[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_top_c_mode complete dim = 1
+    ap_uint<WD_MODE> ap_y16_mode_c;
+    ap_uint<WD_MODE> ap_uv_mode_c;
+    ap_uint<WD_MODE> ap_y_m_mode;
+
+    // Variables for rd and nz
+    ap_uint<25> ap_nz_all = 0;
+    str_rd rd_y16_cb[2];
+    str_rd rd_uv_cb[2];
+    // str_rd_i4       rd_y4_acc;
+    ap_uint<25> rd_y4_acc_nz = 0;
+    ap_uint<WD_RD_SCORE + 4> rd_y4_acc_score = -1;
+
+    ap_uint<1> istop = (y_mb == 0);
+    ap_uint<1> isleft = (x_mb == 0);
+    ap_uint<1> isright = (x_mb == mb_w - 1);
+
+    ap_int<WD_LEVEL * 16> ap_y_level_cb[2][17];
+    ap_int<WD_LEVEL * 16> ap_y16dc_level_cb[2];
+    ap_int<WD_LEVEL * 16> ap_y4_level_cb[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_level_cb complete dim = 1
+    ap_int<WD_LEVEL * 16> ap_uv_level_cb[2][16];
+    // it_o->hls_LoadPre( ap_y_top_, ap_uv_top_, x_mb, y_mb, mb_w);
+    ap_uint<WD_PIX * 16> ap_y_out_cb[2][17];
+    ap_uint<WD_PIX * 16> ap_y4_out_cb[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_out_cb complete dim = 1
+    ap_uint<WD_PIX * 16> ap_uv_out_cb[2][17];
+    ap_uint<WD_PIX * 4> ap_y4_topright_c;
+    ap_uint<WD_PIX> ap_y_m;
+    ap_uint<WD_PIX> ap_u_m;
+    ap_uint<WD_PIX> ap_v_m;
+
+    hls_LoadPre_out_widen(&ap_y_m, &ap_u_m, &ap_v_m,
+                          static_ap_y_top_c,  //[4],
+                          ap_y4_top_c,        //[4],
+                          static_ap_uv_top_c, //[4],
+                          &ap_y4_topright_c,
+                          ap_y_left_c,        //[4],
+                          ap_y4_left_c,       //[4],
+                          ap_uv_left_c,       //[4],
+                          static_ap_y_left_,  //[4],
+                          static_ap_uv_left_, //[4],
+                          static_ap_y_top_, static_ap_uv_top_, x_mb, y_mb, mb_w);
+
+    ap_uint<WD_PIX * 4> ap_y_top_c_y44[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y_top_c_y44 complete dim = 1
+
+    ap_uint<WD_PIX * 4> ap_y_top_c_y16[4];
+#pragma HLS ARRAY_PARTITION variable = ap_y_top_c_y16 complete dim = 1
+
+    ap_uint<WD_PIX * 4> ap_y_left_c_y44[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y_left_c_y44 complete dim = 1
+
+    ap_uint<WD_PIX * 4> ap_y_left_c_y16[4];
+#pragma HLS ARRAY_PARTITION variable = ap_y_left_c_y16 complete dim = 1
+
+    for (int i = 0; i < 4; i++) {
+#pragma HLS UNROLL
+        ap_y_top_c_y44[i] = static_ap_y_top_c[i];
+        ap_y_top_c_y16[i] = static_ap_y_top_c[i];
+        ap_y_left_c_y44[i << 2] = ap_y_left_c[i];
+        ap_y_left_c_y16[i] = ap_y_left_c[i];
+        ;
+    }
+    hls_LoadPre_mode_widen(static_ap_y_top_mode, static_ap_y_left_mode, ap_y_top_c_mode, ap_y4_top_c_mode,
+                           ap_y_left_c_mode, &ap_y_m_mode, x_mb, y_mb, mb_w);
+    // it_m->hls_LoadPre( x_mb, y_mb, mb_w);
+    // rd_y4_acc.init();
+    int mode_p16;
+    int mode_uv;
+    // for 4x4
+    int n_sb;
+    int mode;
+    ap_uint<1> b_uv = 0;
+    ap_uint<2> b_y = 0;
+    /*******************************/
+    /*       Pickup Y44             */
+    /*******************************/
+    Pickup_dataflow3_widen(
+        // Parameters unParameters changed for one picture/segment
+        tlambda,       //                  :ap_uint<WD_LMD>         I__
+        tlambda_m,     //                 ap_uint<WD_LMD>         I__
+        lambda_p44,    //                ap_uint<WD_LMD>        I__
+        lambda_p16,    //                ap_uint<WD_LMD>         I__
+        lambda_uv,     //                 ap_uint<WD_LMD>         I__
+        hls_qm1,       // y44,y16            hls_QMatrix             I__
+        hls_qm2,       // y16                hls_QMatrix             I__
+        hls_qm_uv,     //                 hls_QMatrix             I__
+        ap_sharpen,    //                ap_int<WD_sharpen*16>   I__
+        ap_sharpen_uv, //             ap_int<WD_sharpen*16>   I__
+        // Parameters changed for each MB
+        ap_y_in_y44, //[16],//         ap_uint<WD_PIX*16>      I__
+        ap_y_in_y16, //[16],//         ap_uint<WD_PIX*16>      I__
+        ap_uv_in_,   //[8]
+        istop,       //                     ap_uint<1>             I__
+        isleft,      //                    ap_uint<1>             I__
+        isright,     //                   ap_uint<1>             I__
+        // image context
+        ap_y4_top_c,        // ap_y_top_c_y44,//[16],//          ap_uint<WD_PIX*4>       I__
+        ap_y_top_c_y16,     // ap_y_top_c,//[4],//          ap_uint<WD_PIX*4>       I__
+        ap_y4_left_c,       // ap_y_left_c_y44,//[16],//         ap_uint<WD_PIX*4>       I__
+        ap_y_left_c_y16,    // ap_y_left_c,//[4],//         ap_uint<WD_PIX*4>       I__
+        static_ap_uv_top_c, //[4],//         ap_uint<WD_PIX*4>       I__
+        ap_uv_left_c,       //[4],//        ap_uint<WD_PIX*4>       I__
+        ap_y_m,             //                    ap_uint<WD_PIX>         I__
+        ap_u_m,             //                    ap_uint<WD_PIX>         I__
+        ap_v_m,             //                    ap_uint<WD_PIX>         I__
+        ap_y4_topright_c,   //          ap_uint<WD_PIX*4>       I__
+                            // mode context
+        ap_y_top_c_mode,    //[4],//     ap_uint<WD_MODE>        I__
+        ap_y_left_c_mode,   //[4],//    ap_uint<WD_MODE>        I__
+                            // OUTPUT
+        ap_y4_out_cb,       //[16],//       ap_uint<WD_PIX*16>      O__
+        ap_y_out_cb,        //[2][17],//     ap_uint<WD_PIX*16>      O__
+        ap_uv_out_cb,       //[2][17],//    ap_uint<WD_PIX*16>      O__
+        ap_y4_level_cb,     //[17],//     ap_int<WD_LEVEL*16>     O__
+        ap_y_level_cb,      //[2][17],//   ap_int<WD_LEVEL*16>     O__
+        ap_y16dc_level_cb,  //[2] //   ap_int<WD_LEVEL*16>     O__
+        ap_uv_level_cb,     //[2][16],//  ap_int<WD_LEVEL*16>     O__
+        //&rd_y4_acc,//                str_rd_i4*              OP_
+        &rd_y4_acc_score, &rd_y4_acc_nz,
+        rd_y16_cb,        //[2],//           str_rd                  O__
+        rd_uv_cb,         //[2],//            str_rd                  O__
+        ap_y4_top_c_mode, //[16],//   ap_uint<WD_MODE>        IO_
+        &ap_y16_mode_c,   //            ap_uint<WD_MODE>*       OP_
+        &ap_uv_mode_c,    //             ap_uint<WD_MODE>*       OP_
+        &b_uv,            //                     ap_uint<1>*             OP_
+        &b_y              //                       ap_uint<2>*             OP_
+        );
+    ap_uint<6> ret;
+    ap_uint<WD_MODE * 16> ap_y_mode_b;
+    // ret[5]==1;
+    ret[5] = ret[5] & (rd_uv_cb[b_uv].nz(23, 16) == 0);
+    // ap_nz_all(23,16)    = rd_uv_cb[b_uv].nz(23,16);
+    /**********************************************/
+    /* Pickup the best mode for y
+     * Set nz, level, out, preds, mb_->type       */
+    /**********************************************/
+    if (rd_y4_acc_score < rd_y16_cb[(1 & b_y)].score) {
+        int x_sb_w = (int)x_mb << 2;
+        ret[5] = ret[5] & (rd_y4_acc_nz(15, 0) == 0);
+        hls_SetBestAs4_mode_widen(static_ap_y_top_mode, static_ap_y_left_mode, ap_y4_top_c_mode, ap_y_left_c_mode,
+                                  &ap_y_mode_b, x_sb_w);
+        b_y = 2;
+        hls_StoreTopLeft_y(static_ap_y_top_, static_ap_y_left_, ap_y4_out_cb, x_mb);
+        for (int n = 0; n < 16; n++) {
+#pragma HLS UNROLL
+            str_out->write(ap_y4_out_cb[n]);
+            str_level_y->write(ap_y4_level_cb[n]);
+        }
+    } else {
+        int x_sb_w = (int)x_mb << 2;
+        ret[5] = ret[5] & (rd_y16_cb[(1 & b_y)].nz(15, 0) == 0);
+        ret[5] = ret[5] & (rd_y16_cb[(1 & b_y)].nz[24] == 0);
+        hls_SetBestAs16_mode_widen(static_ap_y_top_mode, static_ap_y_left_mode, ap_y16_mode_c, &ap_y_mode_b, x_sb_w);
+        b_y &= 1;
+        hls_StoreTopLeft_y(static_ap_y_top_, static_ap_y_left_, ap_y_out_cb[b_y], x_mb);
+        for (int n = 0; n < 16; n++) {
+#pragma HLS UNROLL
+            str_out->write(ap_y_out_cb[b_y][n]);
+            str_level_y->write(ap_y_level_cb[b_y][n]);
+        }
+    }
+    hls_StoreTopLeft_uv(static_ap_uv_top_, static_ap_uv_left_, ap_uv_out_cb[b_uv], x_mb);
+    str_level_dc->write(ap_y16dc_level_cb[b_y & 1]); //[16]);
+    // str_level_dc->write(ap_y_level_cb[b_y&1][16]);
+    for (int n = 0; n < 8; n += 1) {
+#pragma HLS UNROLL
+        str_out->write(ap_uv_out_cb[b_uv][n]); // b[n]);//,it.yuv_out_ + U_OFF_ENC+ VP8ScanUV[n],32);
+        str_level_uv->write(ap_uv_level_cb[b_uv][n]);
+    }
+
+    /**********************************************/
+    /* write return value                         */
+    /**********************************************/
+    str_pred->write(ap_y_mode_b);
+    ret(3, 0) = ap_uv_mode_c(1, 0);
+    ret(4, 4) = ~b_y(1, 1); // it.mb_->type_ = 0;
+    str_ret->write(ret);
+}
+
+void Intraprediction_mb_syn_str2_widen_NoOut(ap_uint<LG2_MAX_NUM_MB_W> x_mb,
+                                             ap_uint<LG2_MAX_NUM_MB_W> y_mb,
+                                             ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                             hls::stream<ap_uint<WD_PIX * 16> >* str_ap_yuv_in_y,
+                                             hls::stream<ap_uint<WD_PIX * 16> >* str_ap_yuv_in_uv,
+                                             ap_uint<WD_LMD> lambda_p16,
+                                             ap_uint<WD_LMD> lambda_p44,
+                                             ap_uint<WD_LMD> tlambda,
+                                             ap_uint<WD_LMD> lambda_uv,
+                                             ap_uint<WD_LMD> tlambda_m,
+                                             hls_QMatrix hls_qm1,
+                                             hls_QMatrix hls_qm2,
+                                             hls_QMatrix hls_qm_uv,
+                                             ap_int<WD_sharpen * 16> ap_sharpen,
+                                             ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                             // NoOut: hls::stream<ap_uint<WD_PIX * 16> >* str_out,
+                                             hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                             hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                             hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                             hls::stream<ap_int<64> >* str_pred,
+                                             hls::stream<ap_int<6> >* str_ret) {
+    ap_uint<WD_PIX * 16> ap_y_in_y44[16 + 0];
+    ap_uint<WD_PIX * 16> ap_y_in_y16[16 + 0];
+    ap_uint<WD_PIX * 16> ap_uv_in_[8];
+
+READ_AP_UV_WIDEN:
+    for (int i = 0; i < 16; i++) {
+#pragma HLS PIPELINE
+        ap_y_in_y44[i] = str_ap_yuv_in_y->read();
+        ap_y_in_y16[i] = ap_y_in_y44[i];
+        if (i & 1) ap_uv_in_[i / 2] = str_ap_yuv_in_uv->read();
+    }
+
+    static ap_uint<WD_PIX * 4> static_ap_y_top_[MAX_NUM_MB_W * 4];
+#pragma HLS RESOURCE variable = static_ap_y_top_ core = RAM_2P_LUTRAM
+    static ap_uint<WD_PIX * 4> static_ap_uv_top_[MAX_NUM_MB_W * 4];
+#pragma HLS RESOURCE variable = static_ap_uv_top_ core = RAM_2P_LUTRAM
+    static ap_uint<WD_PIX * 4> static_ap_y_left_[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_left_ complete dim=1
+    static ap_uint<WD_PIX * 4> static_ap_uv_left_[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_uv_left_ complete dim=1
+    static ap_uint<WD_PIX * 4> static_ap_y_top_c[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_top_c complete dim=1
+    ap_uint<WD_PIX * 4> ap_y_left_c[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_left_c complete dim=1
+    ap_uint<WD_PIX * 4> ap_y4_top_c[4];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_top_c complete dim = 1
+    ap_uint<WD_PIX * 4> ap_y4_left_c[4];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_left_c complete dim = 1
+    static ap_uint<WD_PIX * 4> static_ap_uv_top_c[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_uv_top_c complete dim=1
+    ap_uint<WD_PIX * 4> ap_uv_left_c[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_uv_left_c complete dim=1
+    // Variables for mode
+    static ap_uint<WD_MODE> static_ap_y_top_mode[MAX_NUM_MB_W * 4];
+    // storage for past, updated when bottoms are available
+    static ap_uint<WD_MODE> static_ap_y_left_mode[4]; // storage for past, updated when right are available
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_left_mode complete dim=1
+    ap_uint<WD_MODE> ap_y_top_c_mode[4]; // at beginning, default is DC
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_top_c_mode complete dim=1
+    ap_uint<WD_MODE> ap_y_left_c_mode[4];
+    //#pragma HLS ARRAY_PARTITION variable=ap_y_left_c_mode complete dim=1
+    ap_uint<WD_MODE> ap_y4_top_c_mode[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_top_c_mode complete dim = 1
+    ap_uint<WD_MODE> ap_y16_mode_c;
+    ap_uint<WD_MODE> ap_uv_mode_c;
+    ap_uint<WD_MODE> ap_y_m_mode;
+
+    // Variables for rd and nz
+    ap_uint<25> ap_nz_all = 0;
+    str_rd rd_y16_cb[2];
+    str_rd rd_uv_cb[2];
+    // str_rd_i4       rd_y4_acc;
+    ap_uint<25> rd_y4_acc_nz = 0;
+    ap_uint<WD_RD_SCORE + 4> rd_y4_acc_score = -1;
+
+    ap_uint<1> istop = (y_mb == 0);
+    ap_uint<1> isleft = (x_mb == 0);
+    ap_uint<1> isright = (x_mb == mb_w - 1);
+
+    ap_int<WD_LEVEL * 16> ap_y_level_cb[2][17];
+    ap_int<WD_LEVEL * 16> ap_y16dc_level_cb[2];
+    ap_int<WD_LEVEL * 16> ap_y4_level_cb[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_level_cb complete dim = 1
+    ap_int<WD_LEVEL * 16> ap_uv_level_cb[2][16];
+    // it_o->hls_LoadPre( ap_y_top_, ap_uv_top_, x_mb, y_mb, mb_w);
+    ap_uint<WD_PIX * 16> ap_y_out_cb[2][17];
+    ap_uint<WD_PIX * 16> ap_y4_out_cb[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y4_out_cb complete dim = 1
+    ap_uint<WD_PIX * 16> ap_uv_out_cb[2][17];
+    ap_uint<WD_PIX * 4> ap_y4_topright_c;
+    ap_uint<WD_PIX> ap_y_m;
+    ap_uint<WD_PIX> ap_u_m;
+    ap_uint<WD_PIX> ap_v_m;
+
+    hls_LoadPre_out_widen(&ap_y_m, &ap_u_m, &ap_v_m,
+                          static_ap_y_top_c,  //[4],
+                          ap_y4_top_c,        //[4],
+                          static_ap_uv_top_c, //[4],
+                          &ap_y4_topright_c,
+                          ap_y_left_c,        //[4],
+                          ap_y4_left_c,       //[4],
+                          ap_uv_left_c,       //[4],
+                          static_ap_y_left_,  //[4],
+                          static_ap_uv_left_, //[4],
+                          static_ap_y_top_, static_ap_uv_top_, x_mb, y_mb, mb_w);
+
+    ap_uint<WD_PIX * 4> ap_y_top_c_y44[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y_top_c_y44 complete dim = 1
+
+    ap_uint<WD_PIX * 4> ap_y_top_c_y16[4];
+#pragma HLS ARRAY_PARTITION variable = ap_y_top_c_y16 complete dim = 1
+
+    ap_uint<WD_PIX * 4> ap_y_left_c_y44[16];
+#pragma HLS ARRAY_PARTITION variable = ap_y_left_c_y44 complete dim = 1
+
+    ap_uint<WD_PIX * 4> ap_y_left_c_y16[4];
+#pragma HLS ARRAY_PARTITION variable = ap_y_left_c_y16 complete dim = 1
+
+    for (int i = 0; i < 4; i++) {
+#pragma HLS UNROLL
+        ap_y_top_c_y44[i] = static_ap_y_top_c[i];
+        ap_y_top_c_y16[i] = static_ap_y_top_c[i];
+        ap_y_left_c_y44[i << 2] = ap_y_left_c[i];
+        ap_y_left_c_y16[i] = ap_y_left_c[i];
+        ;
+    }
+    hls_LoadPre_mode_widen(static_ap_y_top_mode, static_ap_y_left_mode, ap_y_top_c_mode, ap_y4_top_c_mode,
+                           ap_y_left_c_mode, &ap_y_m_mode, x_mb, y_mb, mb_w);
+    // it_m->hls_LoadPre( x_mb, y_mb, mb_w);
+    // rd_y4_acc.init();
+    int mode_p16;
+    int mode_uv;
+    // for 4x4
+    int n_sb;
+    int mode;
+    ap_uint<1> b_uv = 0;
+    ap_uint<2> b_y = 0;
+    /*******************************/
+    /*       Pickup Y44             */
+    /*******************************/
+    Pickup_dataflow3_widen(
+        // Parameters unParameters changed for one picture/segment
+        tlambda,       //                  :ap_uint<WD_LMD>         I__
+        tlambda_m,     //                 ap_uint<WD_LMD>         I__
+        lambda_p44,    //                ap_uint<WD_LMD>        I__
+        lambda_p16,    //                ap_uint<WD_LMD>         I__
+        lambda_uv,     //                 ap_uint<WD_LMD>         I__
+        hls_qm1,       // y44,y16            hls_QMatrix             I__
+        hls_qm2,       // y16                hls_QMatrix             I__
+        hls_qm_uv,     //                 hls_QMatrix             I__
+        ap_sharpen,    //                ap_int<WD_sharpen*16>   I__
+        ap_sharpen_uv, //             ap_int<WD_sharpen*16>   I__
+        // Parameters changed for each MB
+        ap_y_in_y44, //[16],//         ap_uint<WD_PIX*16>      I__
+        ap_y_in_y16, //[16],//         ap_uint<WD_PIX*16>      I__
+        ap_uv_in_,   //[8]
+        istop,       //                     ap_uint<1>             I__
+        isleft,      //                    ap_uint<1>             I__
+        isright,     //                   ap_uint<1>             I__
+        // image context
+        ap_y4_top_c,        // ap_y_top_c_y44,//[16],//          ap_uint<WD_PIX*4>       I__
+        ap_y_top_c_y16,     // ap_y_top_c,//[4],//          ap_uint<WD_PIX*4>       I__
+        ap_y4_left_c,       // ap_y_left_c_y44,//[16],//         ap_uint<WD_PIX*4>       I__
+        ap_y_left_c_y16,    // ap_y_left_c,//[4],//         ap_uint<WD_PIX*4>       I__
+        static_ap_uv_top_c, //[4],//         ap_uint<WD_PIX*4>       I__
+        ap_uv_left_c,       //[4],//        ap_uint<WD_PIX*4>       I__
+        ap_y_m,             //                    ap_uint<WD_PIX>         I__
+        ap_u_m,             //                    ap_uint<WD_PIX>         I__
+        ap_v_m,             //                    ap_uint<WD_PIX>         I__
+        ap_y4_topright_c,   //          ap_uint<WD_PIX*4>       I__
+                            // mode context
+        ap_y_top_c_mode,    //[4],//     ap_uint<WD_MODE>        I__
+        ap_y_left_c_mode,   //[4],//    ap_uint<WD_MODE>        I__
+                            // OUTPUT
+        ap_y4_out_cb,       //[16],//       ap_uint<WD_PIX*16>      O__
+        ap_y_out_cb,        //[2][17],//     ap_uint<WD_PIX*16>      O__
+        ap_uv_out_cb,       //[2][17],//    ap_uint<WD_PIX*16>      O__
+        ap_y4_level_cb,     //[17],//     ap_int<WD_LEVEL*16>     O__
+        ap_y_level_cb,      //[2][17],//   ap_int<WD_LEVEL*16>     O__
+        ap_y16dc_level_cb,  //[2] //   ap_int<WD_LEVEL*16>     O__
+        ap_uv_level_cb,     //[2][16],//  ap_int<WD_LEVEL*16>     O__
+        //&rd_y4_acc,//                str_rd_i4*              OP_
+        &rd_y4_acc_score, &rd_y4_acc_nz,
+        rd_y16_cb,        //[2],//           str_rd                  O__
+        rd_uv_cb,         //[2],//            str_rd                  O__
+        ap_y4_top_c_mode, //[16],//   ap_uint<WD_MODE>        IO_
+        &ap_y16_mode_c,   //            ap_uint<WD_MODE>*       OP_
+        &ap_uv_mode_c,    //             ap_uint<WD_MODE>*       OP_
+        &b_uv,            //                     ap_uint<1>*             OP_
+        &b_y              //                       ap_uint<2>*             OP_
+        );
+    ap_uint<6> ret;
+    ap_uint<WD_MODE * 16> ap_y_mode_b;
+    // ret[5]==1;
+    ret[5] = ret[5] & (rd_uv_cb[b_uv].nz(23, 16) == 0);
+    // ap_nz_all(23,16)    = rd_uv_cb[b_uv].nz(23,16);
+    /**********************************************/
+    /* Pickup the best mode for y
+     * Set nz, level, out, preds, mb_->type       */
+    /**********************************************/
+    if (rd_y4_acc_score < rd_y16_cb[(1 & b_y)].score) {
+        int x_sb_w = (int)x_mb << 2;
+        ret[5] = ret[5] & (rd_y4_acc_nz(15, 0) == 0);
+        hls_SetBestAs4_mode_widen(static_ap_y_top_mode, static_ap_y_left_mode, ap_y4_top_c_mode, ap_y_left_c_mode,
+                                  &ap_y_mode_b, x_sb_w);
+        b_y = 2;
+        hls_StoreTopLeft_y(static_ap_y_top_, static_ap_y_left_, ap_y4_out_cb, x_mb);
+        for (int n = 0; n < 16; n++) {
+#pragma HLS UNROLL
+            // str_out->write(ap_y4_out_cb[n]);
+            str_level_y->write(ap_y4_level_cb[n]);
+        }
+    } else {
+        int x_sb_w = (int)x_mb << 2;
+        ret[5] = ret[5] & (rd_y16_cb[(1 & b_y)].nz(15, 0) == 0);
+        ret[5] = ret[5] & (rd_y16_cb[(1 & b_y)].nz[24] == 0);
+        hls_SetBestAs16_mode_widen(static_ap_y_top_mode, static_ap_y_left_mode, ap_y16_mode_c, &ap_y_mode_b, x_sb_w);
+        b_y &= 1;
+        hls_StoreTopLeft_y(static_ap_y_top_, static_ap_y_left_, ap_y_out_cb[b_y], x_mb);
+        for (int n = 0; n < 16; n++) {
+#pragma HLS UNROLL
+            // str_out->write(ap_y_out_cb[b_y][n]);
+            str_level_y->write(ap_y_level_cb[b_y][n]);
+        }
+    }
+    hls_StoreTopLeft_uv(static_ap_uv_top_, static_ap_uv_left_, ap_uv_out_cb[b_uv], x_mb);
+    str_level_dc->write(ap_y16dc_level_cb[b_y & 1]); //[16]);
+    // str_level_dc->write(ap_y_level_cb[b_y&1][16]);
+    for (int n = 0; n < 8; n += 1) {
+#pragma HLS UNROLL
+        // str_out->write(ap_uv_out_cb[b_uv][n]);//b[n]);//,it.yuv_out_ + U_OFF_ENC+ VP8ScanUV[n],32);
+        str_level_uv->write(ap_uv_level_cb[b_uv][n]);
+    }
+
+    /**********************************************/
+    /* write return value                         */
+    /**********************************************/
+    str_pred->write(ap_y_mode_b);
+    ret(3, 0) = ap_uv_mode_c(1, 0);
+    ret(4, 4) = ~b_y(1, 1); // it.mb_->type_ = 0;
+    str_ret->write(ret);
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_StoreTopLeft_uv(ap_uint<WD_PIX * 4> ap_uv_top_[MAX_NUM_MB_W * 4],
+                         ap_uint<WD_PIX * 4> ap_uv_left_[4],
+                         ap_uint<WD_PIX * 16> ap_uv_out_cb[8],
+                         ap_uint<LG2_MAX_NUM_MB_W> x_mb) {
+    for (int i = 0; i < 4; i++) {
+        int x_sb_w = (int)x_mb << 2;
+#pragma HLS UNROLL
+        VCT_GET(ap_uv_left_[i], 0, WD_PIX) = SB_GET(ap_uv_out_cb[i * 2 + 1], 0, 3, WD_PIX);
+        VCT_GET(ap_uv_left_[i], 1, WD_PIX) = SB_GET(ap_uv_out_cb[i * 2 + 1], 1, 3, WD_PIX);
+        VCT_GET(ap_uv_left_[i], 2, WD_PIX) = SB_GET(ap_uv_out_cb[i * 2 + 1], 2, 3, WD_PIX);
+        VCT_GET(ap_uv_left_[i], 3, WD_PIX) = SB_GET(ap_uv_out_cb[i * 2 + 1], 3, 3, WD_PIX);
+        VCT_GET(ap_uv_top_[x_sb_w + 0], i, WD_PIX) = SB_GET(ap_uv_out_cb[2], 3, i, WD_PIX);
+        VCT_GET(ap_uv_top_[x_sb_w + 1], i, WD_PIX) = SB_GET(ap_uv_out_cb[3], 3, i, WD_PIX);
+        VCT_GET(ap_uv_top_[x_sb_w + 2], i, WD_PIX) = SB_GET(ap_uv_out_cb[6], 3, i, WD_PIX);
+        VCT_GET(ap_uv_top_[x_sb_w + 3], i, WD_PIX) = SB_GET(ap_uv_out_cb[7], 3, i, WD_PIX);
+    }
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_StoreTopLeft_y(ap_uint<WD_PIX * 4> ap_y_top_[MAX_NUM_MB_W * 4],
+                        ap_uint<WD_PIX * 4> ap_y_left_[4],
+                        ap_uint<WD_PIX * 16> ap_y_out_cb[16],
+                        ap_uint<LG2_MAX_NUM_MB_W> x_mb) {
+    for (int i = 0; i < 4; i++) {
+        int x_sb_w = (int)x_mb << 2;
+#pragma HLS UNROLL
+        VCT_GET(ap_y_left_[i], 0, WD_PIX) = SB_GET(ap_y_out_cb[i * 4 + 3], 0, 3, WD_PIX);
+        VCT_GET(ap_y_left_[i], 1, WD_PIX) = SB_GET(ap_y_out_cb[i * 4 + 3], 1, 3, WD_PIX);
+        VCT_GET(ap_y_left_[i], 2, WD_PIX) = SB_GET(ap_y_out_cb[i * 4 + 3], 2, 3, WD_PIX);
+        VCT_GET(ap_y_left_[i], 3, WD_PIX) = SB_GET(ap_y_out_cb[i * 4 + 3], 3, 3, WD_PIX);
+
+        VCT_GET(ap_y_top_[x_sb_w + 0], i, WD_PIX) = SB_GET(ap_y_out_cb[12], 3, i, WD_PIX);
+        VCT_GET(ap_y_top_[x_sb_w + 1], i, WD_PIX) = SB_GET(ap_y_out_cb[13], 3, i, WD_PIX);
+        VCT_GET(ap_y_top_[x_sb_w + 2], i, WD_PIX) = SB_GET(ap_y_out_cb[14], 3, i, WD_PIX);
+        VCT_GET(ap_y_top_[x_sb_w + 3], i, WD_PIX) = SB_GET(ap_y_out_cb[15], 3, i, WD_PIX);
+    }
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+void hls_LoadPre_out_widen(ap_uint<WD_PIX>* ap_y_m,
+                           ap_uint<WD_PIX>* ap_u_m,
+                           ap_uint<WD_PIX>* ap_v_m,
+                           ap_uint<WD_PIX * 4> ap_y_top_c[4],
+                           ap_uint<WD_PIX * 4> ap_y4_top_c[4],
+                           ap_uint<WD_PIX * 4> ap_uv_top_c[4],
+                           ap_uint<WD_PIX * 4>* ap_y4_topright_c,
+                           ap_uint<WD_PIX * 4> ap_y_left_c[4],
+                           ap_uint<WD_PIX * 4> ap_y4_left_c[4],
+                           ap_uint<WD_PIX * 4> ap_uv_left_c[4],
+                           ap_uint<WD_PIX * 4> ap_y_left_[4],
+                           ap_uint<WD_PIX * 4> ap_uv_left_[4],
+                           ap_uint<WD_PIX * 4> ap_y_top_[MAX_NUM_MB_W * 4],
+                           ap_uint<WD_PIX * 4> ap_uv_top_[MAX_NUM_MB_W * 4],
+                           ap_uint<LG2_MAX_NUM_MB_W> x_mb,
+                           ap_uint<LG2_MAX_NUM_MB_W> y_mb,
+                           ap_uint<LG2_MAX_NUM_MB_W> mb_w) {
+    int x = (int)x_mb << 2;
+    int y = (int)y_mb << 2;
+    ap_uint<1> istop = (y_mb == 0);
+    ap_uint<1> isleft = (x_mb == 0);
+
+    if (y > 0) {
+        *ap_y_m = VCT_GET(ap_y_top_c[3], 3, WD_PIX);
+        *ap_u_m = VCT_GET(ap_uv_top_c[1], 3, WD_PIX);
+        *ap_v_m = VCT_GET(ap_uv_top_c[3], 3, WD_PIX);
+    } else {
+        *ap_y_m = 0x7f;
+        *ap_u_m = 0x7f;
+        *ap_v_m = 0x7f;
+    }
+
+    if (y > 0) {
+        ap_y_top_c[0] = ap_y4_top_c[0] = ap_y_top_[x + 0];
+        ap_y_top_c[1] = ap_y4_top_c[1] = ap_y_top_[x + 1];
+        ap_y_top_c[2] = ap_y4_top_c[2] = ap_y_top_[x + 2];
+        ap_y_top_c[3] = ap_y4_top_c[3] = ap_y_top_[x + 3];
+        if (x_mb < mb_w - 1)
+            (*ap_y4_topright_c) = ap_y_top_[x + 4];
+        else {
+            VCT_GET((*ap_y4_topright_c), 0, WD_PIX) = VCT_GET(ap_y4_top_c[3], 3, WD_PIX);
+            VCT_GET((*ap_y4_topright_c), 1, WD_PIX) = VCT_GET(ap_y4_top_c[3], 3, WD_PIX);
+            VCT_GET((*ap_y4_topright_c), 2, WD_PIX) = VCT_GET(ap_y4_top_c[3], 3, WD_PIX);
+            VCT_GET((*ap_y4_topright_c), 3, WD_PIX) = VCT_GET(ap_y4_top_c[3], 3, WD_PIX);
+        }
+    } else {
+        ap_y_top_c[0] = ap_y4_top_c[0] = 0x7f7f7f7f;
+        ap_y_top_c[1] = ap_y4_top_c[1] = 0x7f7f7f7f;
+        ap_y_top_c[2] = ap_y4_top_c[2] = 0x7f7f7f7f;
+        ap_y_top_c[3] = ap_y4_top_c[3] = 0x7f7f7f7f;
+        (*ap_y4_topright_c) = 0x7f7f7f7f;
+    }
+    if (x > 0) {
+        ap_y_left_c[0] = ap_y4_left_c[0] = ap_y_left_[0];
+        ap_y_left_c[1] = ap_y4_left_c[1] = ap_y_left_[1];
+        ap_y_left_c[2] = ap_y4_left_c[2] = ap_y_left_[2];
+        ap_y_left_c[3] = ap_y4_left_c[3] = ap_y_left_[3];
+    } else {
+        ap_y_left_c[0] = ap_y4_left_c[0] = 0x81818181;
+        ap_y_left_c[1] = ap_y4_left_c[1] = 0x81818181;
+        ap_y_left_c[2] = ap_y4_left_c[2] = 0x81818181;
+        ap_y_left_c[3] = ap_y4_left_c[3] = 0x81818181;
+    }
+    ap_uv_top_c[0] = ap_uv_top_[x + 0];
+    ap_uv_top_c[1] = ap_uv_top_[x + 1];
+    ap_uv_top_c[2] = ap_uv_top_[x + 2];
+    ap_uv_top_c[3] = ap_uv_top_[x + 3];
+    ap_uv_left_c[0] = ap_uv_left_[0];
+    ap_uv_left_c[1] = ap_uv_left_[1];
+    ap_uv_left_c[2] = ap_uv_left_[2];
+    ap_uv_left_c[3] = ap_uv_left_[3];
+}
+
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_LoadPre_mode_widen(ap_uint<WD_MODE> ap_y_top_mode[MAX_NUM_MB_W * 4],
+                            ap_uint<WD_MODE> ap_y_left_mode[4],
+                            ap_uint<WD_MODE> ap_y_top_c_mode[4],
+                            ap_uint<WD_MODE> ap_y4_top_c_mode[16],
+                            ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                            ap_uint<WD_MODE>* ap_y_m_mode,
+                            ap_uint<LG2_MAX_NUM_MB_W> x_mb,
+                            ap_uint<LG2_MAX_NUM_MB_W> y_mb,
+                            ap_uint<LG2_MAX_NUM_MB_W> mb_w) {
+    int x = (int)x_mb << 2;
+    int y = (int)y_mb << 2;
+    if (y > 0) {
+        *ap_y_m_mode = ap_y_top_c_mode[3];
+    } else {
+        *ap_y_m_mode = DC_PRED; // 0
+    }
+    if (y > 0) {
+        ap_y_top_c_mode[0] = ap_y4_top_c_mode[0] = ap_y_top_mode[x + 0];
+        ap_y_top_c_mode[1] = ap_y4_top_c_mode[1] = ap_y_top_mode[x + 1];
+        ap_y_top_c_mode[2] = ap_y4_top_c_mode[2] = ap_y_top_mode[x + 2];
+        ap_y_top_c_mode[3] = ap_y4_top_c_mode[3] = ap_y_top_mode[x + 3];
+    } else {
+        ap_y_top_c_mode[0] = ap_y4_top_c_mode[0] = DC_PRED; // 0
+        ap_y_top_c_mode[1] = ap_y4_top_c_mode[1] = DC_PRED; // 0
+        ap_y_top_c_mode[2] = ap_y4_top_c_mode[2] = DC_PRED; // 0
+        ap_y_top_c_mode[3] = ap_y4_top_c_mode[3] = DC_PRED; // 0
+    }
+    if (x > 0) {
+        ap_y_left_c_mode[0] = ap_y_left_mode[0];
+        ap_y_left_c_mode[1] = ap_y_left_mode[1];
+        ap_y_left_c_mode[2] = ap_y_left_mode[2];
+        ap_y_left_c_mode[3] = ap_y_left_mode[3];
+    } else {
+        ap_y_left_c_mode[0] = DC_PRED;
+        ap_y_left_c_mode[1] = DC_PRED; // 0
+        ap_y_left_c_mode[2] = DC_PRED; // 0
+        ap_y_left_c_mode[3] = DC_PRED; // 0
+    }
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+ap_uint<12> hls_GetCost_widen(ap_uint<4> n_sb,
+                              ap_uint<4> mode,
+                              ap_uint<WD_MODE> ap_y_top_c_mode[4], // at beginning, default is DC
+                              ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                              ap_uint<WD_MODE> local_mod) {
+#pragma HLS PIPELINE
+    const int x_sb = (n_sb & 3), y_sb = n_sb >> 2;
+    int left2 = ap_y_left_c_mode[y_sb];
+    int top2 = (y_sb == 0) ? (int)(ap_y_top_c_mode[x_sb]) : (int)(local_mod);
+    return my_VP8FixedCostsI4[top2][left2][mode];
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void Pickup_dataflow3_widen(
+    // Parameters unParameters changed for one picture/segment
+    ap_uint<WD_LMD> I__tlambda,               //              :
+    ap_uint<WD_LMD> I__tlambda_m,             //
+    ap_uint<WD_LMD> I__lambda_p44,            //
+    ap_uint<WD_LMD> I__lambda_p16,            //
+    ap_uint<WD_LMD> I__lambda_uv,             //
+    hls_QMatrix I__hls_qm1,                   // y44,y16
+    hls_QMatrix I__hls_qm2,                   // y16
+    hls_QMatrix I__hls_qm_uv,                 //
+    ap_int<WD_sharpen * 16> I__ap_sharpen,    //
+    ap_int<WD_sharpen * 16> I__ap_sharpen_uv, //
+    // Parameters changed for each MB
+    ap_uint<WD_PIX * 16> I__ap_yuv_in_y44[16], //
+    ap_uint<WD_PIX * 16> I__ap_yuv_in_y16[16], //
+    ap_uint<WD_PIX * 16> I__ap_uv_in_[8],      //
+    ap_uint<1> I__istop,                       //
+    ap_uint<1> I__isleft,                      //
+    ap_uint<1> I__isright,                     //
+    // image context
+    ap_uint<WD_PIX * 4> I__ap_y_top_c_y44[4],  //
+    ap_uint<WD_PIX * 4> I__ap_y_top_c_y16[4],  //
+    ap_uint<WD_PIX * 4> I__ap_y_left_c_y44[4], //
+    ap_uint<WD_PIX * 4> I__ap_y_left_c_y16[4], //
+    ap_uint<WD_PIX * 4> I__ap_uv_top_c[4],     //
+    ap_uint<WD_PIX * 4> I__ap_uv_left_c[4],    //
+    ap_uint<WD_PIX> I__ap_y_m,                 //
+    ap_uint<WD_PIX> I__ap_u_m,                 //
+    ap_uint<WD_PIX> I__ap_v_m,                 //
+    ap_uint<WD_PIX * 4> I__ap_y4_topright_c,   //
+    // mode context
+    ap_uint<WD_MODE> I__ap_y_top_c_mode[4],  //
+    ap_uint<WD_MODE> I__ap_y_left_c_mode[4], //
+    // OUTPUT
+    ap_uint<WD_PIX * 16> O__ap_y4_out_cb[16],       //
+    ap_uint<WD_PIX * 16> O__ap_y_out_cb[2][17],     //
+    ap_uint<WD_PIX * 16> O__ap_uv_out_cb[2][17],    //
+    ap_int<WD_LEVEL * 16> O__ap_y4_level_cb[17],    //
+    ap_int<WD_LEVEL * 16> O__ap_y_level_cb[2][17],  //
+    ap_int<WD_LEVEL * 16> O__ap_y16dc_level_cb[2],  //
+    ap_int<WD_LEVEL * 16> O__ap_uv_level_cb[2][16], //
+    // str_rd_i4*              OP_rd_y4_acc,//
+    ap_uint<WD_RD_SCORE + 4>* O__score_acc,
+    ap_uint<25>* O__nz_mb,
+    str_rd O__rd_y16_cb[2],                  //
+    str_rd O__rd_uv_cb[2],                   //
+    ap_uint<WD_MODE> O_ap_y4_top_c_mode[16], //
+    ap_uint<WD_MODE>* OP_ap_y16_mode_c,      //
+    ap_uint<WD_MODE>* OP_ap_uv_mode_c,       //
+    ap_uint<1>* OP_b_uv,                     //
+    ap_uint<2>* OP_b_y                       //
+    ) {
+#pragma HLS DATAFLOW
+    Pickup_Y44_widen(
+        // OP_rd_y4_acc->nz = Pickup_Y44_new(
+        I__istop, I__isleft,
+        I__ap_y_top_c_y44,   //[4],
+        I__ap_y_left_c_y44,  //[4],
+        I__ap_y_top_c_mode,  //[4],// at beginning, default is DC
+        I__ap_y_left_c_mode, //[4],
+        //  IO_ap_y4_top_c_mode,//[16],
+        I__ap_y4_topright_c, I__ap_y_m,
+        I__ap_yuv_in_y44, //[16],
+        I__hls_qm1, I__ap_sharpen, I__lambda_p44, I__tlambda, I__tlambda_m,
+        // OUTPUT
+        O__ap_y4_out_cb, O__ap_y4_level_cb, O__score_acc, O__nz_mb, O_ap_y4_top_c_mode);
+
+    Pickup_Y16(I__tlambda,    //     = dqm->tlambda_;
+               I__tlambda_m,  //   = dqm->lambda_mode_;
+               I__lambda_p16, //  = dqm->lambda_i16_;
+               I__hls_qm1,    //
+               I__hls_qm2,    //
+               I__ap_sharpen,
+               // Parameters changed for each MB
+               I__ap_yuv_in_y16, //[16],
+               I__istop, I__isleft, I__isright,
+               // image context
+               I__ap_y_top_c_y16,  //[4],
+               I__ap_y_left_c_y16, //[4],
+               I__ap_y_m,
+               // OUTPUT
+               O__ap_y_out_cb,       //[2][17],
+               O__ap_y_level_cb,     //[2][17],
+               O__ap_y16dc_level_cb, //[2],
+               O__rd_y16_cb,         //[2],
+               OP_ap_y16_mode_c,     //
+               OP_b_y                //
+               );
+
+    Pickup_UV(
+        // Parameters unParameters changed for one picture/segment
+        I__tlambda,   //     = dqm->tlambda_;
+        I__tlambda_m, //   = dqm->lambda_mode_;
+        I__lambda_uv, //   = dqm->lambda_uv_;
+        I__hls_qm_uv, I__ap_sharpen_uv,
+        // Parameters changed for each MB
+        I__ap_uv_in_, //[8],
+        I__istop, I__isleft, I__isright,
+        // image context
+        I__ap_uv_top_c,  //[4],
+        I__ap_uv_left_c, //[4],
+        I__ap_u_m, I__ap_v_m,
+        // OUTPUT
+        O__ap_uv_out_cb,   //[2][17],
+        O__ap_uv_level_cb, //[2][16],
+        O__rd_uv_cb,       //[2],
+        OP_ap_uv_mode_c, OP_b_uv);
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<4> Map_k_2_n_sb_0(int k) {
+// 0, 1, 2, 3, 4,
+#pragma HLS INLINE
+    ap_uint<4> ret;
+    if (k <= 3)
+        return k;
+    else if (k <= 5)
+        return k + 2;
+    else if (k <= 7)
+        return k + 4;
+    else
+        return k + 6;
+}
+ap_uint<4> Map_k_2_n_sb(int k, ap_uint<1> idx) {
+#pragma HLS INLINE
+    ap_uint<4> ret;
+    if (k <= 1)
+        return k;
+    else if (k <= 7)
+        return Map_k_2_n_sb_0(k) + (ap_uint<4>)idx * 2;
+    else
+        return Map_k_2_n_sb_0(k);
+};
+void Pickup_Y44_widen(ap_uint<1> istop,
+                      ap_uint<1> isleft,
+                      ap_uint<WD_PIX * 4> ap_y_top_c[4],
+                      ap_uint<WD_PIX * 4> ap_y_left_c[4],
+                      ap_uint<WD_MODE> ap_y_top_c_mode[4], // at beginning, default is DC
+                      ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                      // ap_uint<WD_MODE>        ap_y4_top_c_mode[16],
+                      ap_uint<WD_PIX * 4> ap_y4_topright_c,
+                      ap_uint<WD_PIX * 4> ap_y_m,
+                      // ap_uint<4>              MACRO_n_sb,
+                      ap_uint<WD_PIX * 16> ap_yuv_in[16],
+                      hls_QMatrix hls_qm1,
+                      ap_int<WD_sharpen * 16> ap_sharpen,
+                      ap_uint<WD_LMD> lambda_p44,
+                      ap_uint<WD_LMD> tlambda,
+                      ap_uint<WD_LMD> tlambda_m,
+                      ap_uint<WD_PIX * 16> ap_y4_out_mb[16],
+                      ap_int<WD_LEVEL * 16> ap_y4_level_mb[16],
+                      ap_uint<WD_RD_SCORE + 4>* score_acc,
+                      ap_uint<25>* nz_mb,
+                      ap_uint<WD_MODE> O__modes_mb[16]) {
+    //#pragma HLS INLINE
+    /////////////////////////////////////////////
+    // slow updated in loop without pipeline
+    ap_uint<WD_MODE> local_mode_array[16];
+    ap_uint<WD_PIX * 4> local_y4_top_c[16];
+#pragma HLS ARRAY_PARTITION variable = local_y4_top_c complete dim = 1
+    ap_uint<WD_PIX * 4> local_left_c[16];
+#pragma HLS ARRAY_PARTITION variable = local_left_c complete dim = 1
+    for (int i = 0; i < 4; i++) {
+#pragma HLS UNROLL
+        local_y4_top_c[i] = ap_y_top_c[i];
+        local_left_c[i * 4] = ap_y_left_c[i];
+        // local_mode_array[i] = ap_y_top_c_mode[i];
+    }
+#pragma HLS ARRAY_PARTITION variable = local_mode_array complete dim = 1
+    //////////////////////////////////////////////
+    // fast updated (read and wrote) in pipeline
+    ap_uint<WD_RD_SCORE + 4> score_b_array[16];
+#pragma HLS ARRAY_PARTITION variable = score_b_array complete dim = 1
+    /////////////////////////////////////////////
+    // simple output, only be wrote in loop
+    ap_uint<25> nz_b = 0;
+    ap_int<WD_LEVEL * 16> local_level_array[16];
+#pragma HLS ARRAY_PARTITION variable = local_level_array complete dim = 1
+    ap_uint<WD_LEVEL * 16> local_out_array[16];
+#pragma HLS ARRAY_PARTITION variable = local_out_array complete dim = 1
+    for (int k_p44 = 0; k_p44 < 10; k_p44++) {
+        //#pragma HLS PIPELINE
+        ap_uint<4> mode_b[2];
+        ap_uint<4> n_sb2[2];
+#pragma HLS ARRAY_PARTITION variable = mode_b complete dim = 1
+#pragma HLS ARRAY_PARTITION variable = n_sb2 complete dim = 1
+        n_sb2[0] = Map_k_2_n_sb(k_p44, 0);
+        n_sb2[1] = Map_k_2_n_sb(k_p44, 1);
+        const int loop1 = (k_p44 < 2 || k_p44 > 13) ? 10 : 20;
+    PICKUP_Y44:
+        for (int fmod = 0; fmod < loop1; fmod++) {
+#pragma HLS PIPELINE
+            //#pragma HLS dependence array inter false
+            int set = fmod >= 10 ? 1 : 0;
+            ap_uint<4> n_sb = Map_k_2_n_sb(k_p44, set);
+            ap_uint<4> MACRO_mode = fmod >= 10 ? fmod - 10 : fmod; // fmod(4,1);
+            ap_uint<WD_PIX* 16> ap_yuv_in_sb = ap_yuv_in[n_sb];
+            ap_uint<WD_PIX * 4> abcd, efgh, ijkl;
+            ap_uint<WD_PIX> x44;
+            hls_LoadPreds4_ins(local_y4_top_c, local_left_c, ap_y4_topright_c, ap_y_m, &abcd, &efgh, &ijkl, &x44,
+                               isleft, istop, n_sb);
+
+            ap_uint<1> MACRO_isfirst = (MACRO_mode == 0);
+            ap_uint<WD_PIX * 16> ap_ref_p44;
+            ap_ref_p44 = hls_p4_test(abcd, efgh, ijkl, x44, MACRO_mode);
+            ap_uint<WD_MODE> mode_up;
+            // if(n_sb>3)
+            // mode_up = local_mode_array[n_sb-4];
+            ap_uint<12> pre_dis_h;
+            pre_dis_h = hls_GetCost(n_sb, MACRO_mode, ap_y_top_c_mode, ap_y_left_c_mode, local_mode_array);
+
+            ap_uint<WD_PIX * 16> ap_y4_out_tmp;
+            ap_int<WD_LEVEL * 16> ap_y4_level_tmp;
+            ap_uint<WD_RD_SCORE + 4> score_sb;
+            ap_uint<25> nz_sb;
+            ap_uint<4> mode_out;
+            hls_channel_p44(MACRO_mode, ap_yuv_in_sb, ap_ref_p44, hls_qm1, ap_sharpen, lambda_p44, tlambda, tlambda_m,
+                            pre_dis_h, &ap_y4_out_tmp, &ap_y4_level_tmp, &score_sb, &nz_sb, &mode_out);
+            // if (MACRO_isfirst || score_sb < score_b_tmp)
+            if (MACRO_isfirst || score_sb < score_b_array[n_sb]) {
+                score_b_array[n_sb] = score_sb;
+                local_out_array[n_sb] = ap_y4_out_tmp;
+                local_level_array[n_sb] = ap_y4_level_tmp;
+                nz_b[n_sb] = nz_sb;
+                mode_b[set] = mode_out;
+                // local_mode_array[n_sb] = mode_out;
+            }
+        } // for mode
+
+        local_mode_array[n_sb2[0]] = mode_b[0];
+        local_mode_array[n_sb2[1]] = (loop1 == 20) ? mode_b[1] : mode_b[0];
+        if (n_sb2[0] < 12) local_y4_top_c[n_sb2[0] + 4] = VCT_GET(local_out_array[n_sb2[0]], 3, WD_PIX * 4);
+        if (n_sb2[1] < 12) local_y4_top_c[n_sb2[1] + 4] = VCT_GET(local_out_array[n_sb2[1]], 3, WD_PIX * 4);
+        if ((n_sb2[0] & 3) != 3) { // 3,7,11,15 //VCT_SET_COL_SB(sb, col, wd, vect)
+            VCT_GET(local_left_c[n_sb2[0] + 1], 0, WD_PIX) = VCT_GET(local_out_array[n_sb2[0]], 3, WD_PIX);
+            VCT_GET(local_left_c[n_sb2[0] + 1], 1, WD_PIX) = VCT_GET(local_out_array[n_sb2[0]], 7, WD_PIX);
+            VCT_GET(local_left_c[n_sb2[0] + 1], 2, WD_PIX) = VCT_GET(local_out_array[n_sb2[0]], 11, WD_PIX);
+            VCT_GET(local_left_c[n_sb2[0] + 1], 3, WD_PIX) = VCT_GET(local_out_array[n_sb2[0]], 15, WD_PIX);
+        }
+        if ((n_sb2[1] & 3) != 3) { // 3,7,11,15 //VCT_SET_COL_SB(sb, col, wd, vect)
+            VCT_GET(local_left_c[n_sb2[1] + 1], 0, WD_PIX) = VCT_GET(local_out_array[n_sb2[1]], 3, WD_PIX);
+            VCT_GET(local_left_c[n_sb2[1] + 1], 1, WD_PIX) = VCT_GET(local_out_array[n_sb2[1]], 7, WD_PIX);
+            VCT_GET(local_left_c[n_sb2[1] + 1], 2, WD_PIX) = VCT_GET(local_out_array[n_sb2[1]], 11, WD_PIX);
+            VCT_GET(local_left_c[n_sb2[1] + 1], 3, WD_PIX) = VCT_GET(local_out_array[n_sb2[1]], 15, WD_PIX);
+        }
+
+    }                                           // for n_sb;
+    ap_uint<WD_RD_SCORE + 4> score_acc_tmp = 0; //(1<<WD_RD_SCORE+2);
+    for (int i = 0; i < 16; i++) {
+#pragma HLS UNROLL
+        O__modes_mb[i] = local_mode_array[i];
+        score_acc_tmp += score_b_array[i];
+        ap_y4_level_mb[i] = local_level_array[i];
+        ap_y4_out_mb[i] = local_out_array[i];
+    }
+    *nz_mb |= nz_b;
+    *score_acc = score_acc_tmp;
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<12> hls_GetCost(ap_uint<4> n_sb,
+                        ap_uint<4> mode,
+                        ap_uint<WD_MODE> ap_y_top_c_mode[4], // at beginning, default is DC
+                        ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                        ap_uint<WD_MODE> ap_y4_top_c_mode[16]) {
+#pragma HLS PIPELINE
+    const int x_sb = (n_sb & 3), y_sb = n_sb >> 2;
+    int left2 = ap_y_left_c_mode[y_sb];
+    int top2 = (y_sb == 0) ? (int)(ap_y_top_c_mode[x_sb]) : (int)(ap_y4_top_c_mode[n_sb - 4]);
+    return my_VP8FixedCostsI4[top2][left2][mode];
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_LoadPreds4_ins(ap_uint<WD_PIX * 4> ap_y4_top_c[16],
+                        ap_uint<WD_PIX * 4> ap_y4_left_c[16],
+                        ap_uint<WD_PIX * 4> ap_y4_topright_c,
+                        ap_uint<WD_PIX * 4> ap_y_m,
+                        ap_uint<WD_PIX * 4>* abcd,
+                        ap_uint<WD_PIX * 4>* efgh,
+                        ap_uint<WD_PIX * 4>* ijkl,
+                        ap_uint<WD_PIX>* x44,
+                        ap_uint<1> isleft,
+                        ap_uint<1> istop,
+                        ap_uint<4> n_sb) {
+#pragma HLS PIPELINE
+    *abcd = ap_y4_top_c[n_sb];
+    if ((n_sb & 3) != 3) // 3,7,11,15
+        *efgh = ap_y4_top_c[n_sb + 1];
+    else
+        *efgh = ap_y4_topright_c;
+    *ijkl = ap_y4_left_c[n_sb];
+    if (n_sb == 0) {
+        if (!isleft)
+            *x44 = ap_y_m;
+        else if (!istop)
+            *x44 = 0X81;
+        else
+            *x44 = 0X7f;
+    } else if ((n_sb & 3) != 0) //! 0,4,8,12
+        *x44 = VCT_GET(ap_y4_top_c[n_sb - 1], 3, WD_PIX);
+    else // 4,8,12
+        *x44 = VCT_GET(ap_y4_left_c[n_sb - 4], 3, WD_PIX);
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_p4_test(ap_uint<WD_PIX * 4> abcd,
+                                 ap_uint<WD_PIX * 4> efgh,
+                                 ap_uint<WD_PIX * 4> ijkl,
+                                 ap_uint<WD_PIX> x44,
+                                 ap_uint<4> mode) {
+    switch (mode) {
+        case B_DC_PRED:
+            return hls_DC4(abcd, ijkl);
+        case B_VE_PRED:
+            return hls_VE4(abcd, efgh, x44);
+        case B_HE_PRED:
+            return hls_HE4(ijkl, x44);
+        case B_RD_PRED:
+            return hls_RD4(abcd, ijkl, x44);
+        case B_LD_PRED:
+            return hls_LD4(abcd, efgh);
+        case B_VR_PRED:
+            return hls_VR4(abcd, ijkl, x44);
+        case B_VL_PRED:
+            return hls_VL4(abcd, efgh);
+        case B_HU_PRED:
+            return hls_HU4(ijkl);
+        case B_HD_PRED:
+            return hls_HD4(abcd, ijkl, x44); // ref:544 vs lut:100 ,3.19+1.25 ,
+        default:
+            return hls_TM4(abcd, ijkl, x44);
+    } // case
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_channel_p44(ap_uint<4> mode_in,
+                     ap_uint<WD_PIX * 16> ap_yuv_in_sb,
+                     ap_uint<WD_PIX * 16> ap_ref_p44,
+                     hls_QMatrix hls_qm1,
+                     ap_int<WD_sharpen * 16> ap_sharpen,
+                     ap_uint<WD_LMD> lambda_p44,
+                     ap_uint<WD_LMD> tlambda,
+                     ap_uint<WD_LMD> tlambda_m,
+                     ap_uint<12> pre_dis_h,
+                     ap_uint<WD_PIX * 16>* ap_y4_out_cb_n_sb2,
+                     ap_int<WD_LEVEL * 16>* ap_y4_level_cb_n_sb2,
+                     ap_uint<WD_RD_SCORE + 4>* score_sb,
+                     ap_uint<25>* nz_sb,
+                     ap_uint<4>* mode_out) {
+#pragma HLS INLINE
+    //#pragma HLS PIPELINE
+    ap_uint<WD_PIX* 16> ap_src_p44 = ap_yuv_in_sb;
+    str_dis rd_dis4;
+    rd_dis4.init();
+    ap_uint<WD_DCT* 16> ap_dct_p44 = hls_FTransform(ap_src_p44, ap_ref_p44);
+    ap_int<WD_IQT * 16> ap_iqt_p44;
+    ap_uint<5> ap_nz;
+    ap_uint<WD_PIX * 16> ap_y4_out_tmp;
+    ap_int<WD_LEVEL * 16> ap_y4_level_tmp;
+    ap_nz = hls_QuantizeBlock(ap_dct_p44, &ap_y4_level_tmp, &ap_iqt_p44, &hls_qm1, ap_sharpen, 0);
+    rd_dis4.nz = (ap_nz != 0); //<<n_sb;
+    if (rd_dis4.nz) ap_nz -= (VCT_GET(ap_y4_level_tmp, 0, WD_LEVEL) != 0);
+    ap_y4_out_tmp = hls_ITransformOne(ap_ref_p44, ap_iqt_p44);
+    rd_dis4.d = hls_SSE4X4(ap_src_p44, ap_y4_out_tmp);
+    rd_dis4.sd = hls_Disto4x4(ap_src_p44, ap_y4_out_tmp);
+    rd_dis4.sd = (rd_dis4.sd * tlambda + 128) >> 8;
+    rd_dis4.h = pre_dis_h; // hls_GetCost(n_sb, mode,  ap_y_top_c_mode, ap_y_left_c_mode,  ap_y4_top_c_mode);
+    if ((mode_in > 0) && (ap_nz <= 3))
+        rd_dis4.r = 140;
+    else
+        rd_dis4.r = 0;
+    rd_dis4.r += hls_fast_cost(ap_y4_level_tmp, 0);
+    rd_dis4.ca_score(tlambda_m);
+    // output
+    *score_sb = rd_dis4.score;
+    *nz_sb = rd_dis4.nz;
+    *ap_y4_out_cb_n_sb2 = ap_y4_out_tmp;
+    *ap_y4_level_cb_n_sb2 = ap_y4_level_tmp;
+    *mode_out = mode_in;
+}
+
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_TTR> hls_TTransform(ap_uint<WD_PIX * 16> in) {
+#pragma HLS INLINE
+    //#pragma HLS PIPELINE
+    ap_uint<WD_TTR> sum(0);
+    ap_int<WD_PIX + 3> tmp[16];
+    ap_uint<WD_TTW> WeightY[16] = {38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2};
+#pragma HLS ARRAY_PARTITION variable = WeightY complete dim = 1
+    for (int i = 0; i < 4; ++i) {
+#pragma HLS unroll
+        ap_int<WD_PIX + 2> a0 = ((ap_int<WD_PIX + 2>)VCT_GET(in, 0 + i * 4, WD_PIX) +
+                                 (ap_int<WD_PIX + 2>)VCT_GET(in, 2 + i * 4, WD_PIX)); // 10b
+        ap_int<WD_PIX + 2> a1 =
+            ((ap_int<WD_PIX + 2>)VCT_GET(in, 1 + i * 4, WD_PIX) + (ap_int<WD_PIX + 2>)VCT_GET(in, 3 + i * 4, WD_PIX));
+        ap_int<WD_PIX + 2> a2 =
+            ((ap_int<WD_PIX + 2>)VCT_GET(in, 1 + i * 4, WD_PIX) - (ap_int<WD_PIX + 2>)VCT_GET(in, 3 + i * 4, WD_PIX));
+        ap_int<WD_PIX + 2> a3 =
+            ((ap_int<WD_PIX + 2>)VCT_GET(in, 0 + i * 4, WD_PIX) - (ap_int<WD_PIX + 2>)VCT_GET(in, 2 + i * 4, WD_PIX));
+        tmp[0 + i * 4] = a0 + a1; // 11b
+        tmp[1 + i * 4] = a3 + a2;
+        tmp[2 + i * 4] = a3 - a2;
+        tmp[3 + i * 4] = a0 - a1;
+    }
+    for (int i = 0; i < 4; ++i) {
+#pragma HLS unroll
+        ap_int<WD_PIX + 4> a0 = (tmp[0 + i] + tmp[8 + i]); // 12b
+        ap_int<WD_PIX + 4> a1 = (tmp[4 + i] + tmp[12 + i]);
+        ap_int<WD_PIX + 4> a2 = (tmp[4 + i] - tmp[12 + i]);
+        ap_int<WD_PIX + 4> a3 = (tmp[0 + i] - tmp[8 + i]);
+        ap_int<WD_PIX + 5> b0 = a0 + a1; // 13b
+        ap_int<WD_PIX + 5> b1 = a3 + a2;
+        ap_int<WD_PIX + 5> b2 = a3 - a2;
+        ap_int<WD_PIX + 5> b3 = a0 - a1;
+        // sum += (ap_uint<WD_TTR>)WeightY[ 0 + i] * b0.abs();     //error: no member named 'abs' in 'ap_int<13>???
+        // sum += (ap_uint<WD_TTR>)WeightY[ 4 + i] * b1.abs();
+        // sum += (ap_uint<WD_TTR>)WeightY[ 8 + i] * b2.abs();
+        // sum += (ap_uint<WD_TTR>)WeightY[12 + i] * b3.abs();
+        if (b0 < 0) b0 = -b0;
+        if (b1 < 0) b1 = -b1;
+        if (b2 < 0) b2 = -b2;
+        if (b3 < 0) b3 = -b3;
+        sum += (ap_uint<WD_TTR>)WeightY[0 + i] * b0;  //.abs();     //
+        sum += (ap_uint<WD_TTR>)WeightY[4 + i] * b1;  //.abs();
+        sum += (ap_uint<WD_TTR>)WeightY[8 + i] * b2;  //.abs();
+        sum += (ap_uint<WD_TTR>)WeightY[12 + i] * b3; //.abs();
+    }
+    return sum;
+}
+
+/* FOR SD CACULATION */
+ap_uint<WD_DISTO> hls_Disto4x4(ap_uint<WD_PIX * 16> a, ap_uint<WD_PIX * 16> b) {
+#pragma HLS INLINE
+    ap_uint<WD_TTR> sum1 = hls_TTransform(a);
+    ap_uint<WD_TTR> sum2 = hls_TTransform(b);
+    ap_int<WD_TTR + 2> tmp = (ap_int<WD_TTR + 2>)sum1 - sum2;
+    //   ap_uint<WD_DISTO>   val     = (__abs(tmp))>>5;
+    // ap_uint<WD_DISTO>   val     = (tmp.abs())>>5; //error: no member named 'abs' in 'ap_int<13>???
+    if (tmp < 0) tmp = -tmp;
+    ap_uint<WD_DISTO> val = (tmp) >> 5;
+    return val;
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+/* FOR R CACULATION */
+ap_uint<WD_LEVEL + 4> hls_LV0(ap_uint<WD_LEVEL - 1> lv) {
+    if (lv == 0) return 0;
+    if (lv == 1) return 760;
+    if (lv == 2) return 1500;
+    if (lv == 3) return 2000;
+    if (lv == 4) return 2300;
+    if (lv == 5) return 2500;
+    ap_uint<WD_LEVEL + 4> tmp = lv;
+    tmp = ((ap_uint<WD_LEVEL + 4>)lv) << 7;
+    tmp += ((ap_uint<WD_LEVEL + 4>)lv) << 6;
+    tmp += ((ap_uint<WD_LEVEL + 4>)lv) << 3;
+    return 1500 + tmp;
+}
+ap_uint<WD_LEVEL> hls_LV1(ap_uint<WD_LEVEL - 1> lv) {
+    if (lv == 0) return 0;
+    if (lv == 1) return 1000;
+    if (lv == 2) return 1100;
+    return 1200;
+}
+
+ap_uint<WD_LEVEL> hls_LV2(ap_uint<WD_LEVEL - 1> lv) {
+    if (lv == 0) return 0;
+    if (lv == 1) return 1000;
+    if (lv == 2) return 1100;
+    if (lv == 3) return 1150;
+    return 1180;
+}
+ap_uint<WD_LEVEL> hls_LVn(ap_uint<WD_LEVEL - 1> lv) {
+    if (lv == 0) return 0;
+    return 500;
+}
+
+ap_uint<WD_FAST> hls_fast_cost(ap_int<WD_LEVEL * 16> vlevel, ap_uint<2> type) {
+    ap_uint<WD_FAST * 16> r_fast;
+    ap_uint<WD_LEVEL - 1> levels[16];
+#pragma HLS INLINE
+    //#pragma HLS pipeline
+    for (int i = 0; i < 13; i++) {
+#pragma HLS unroll
+        ap_int<WD_LEVEL> alevel = (ap_int<WD_LEVEL>)VCT_GET(vlevel, (i + type[1]), WD_LEVEL);
+        if (alevel < 0)
+            levels[i] = (0 - alevel);
+        else
+            levels[i] = alevel;
+    }
+    ap_uint<7> offset;
+    if (type == 0) // NORMAL
+        offset = 0;
+    else if (type == 1) // DC
+        offset = 96;
+    else
+        offset = 113;                                   // AC
+    ap_uint<WD_FAST> tmp = hls_LV0((levels[0]));        // +
+    tmp += hls_LV1((levels[1]));                        // +
+    tmp += hls_LV2((levels[2]));                        // +
+    tmp += 2 * hls_LVn((levels[3]));                    // +
+    tmp += 2 * hls_LVn((levels[4]));                    // +
+    tmp += 3 * hls_LVn((levels[5]));                    // +
+    tmp += 3 * hls_LVn((levels[6]));                    // +
+    tmp += 4 * hls_LVn((levels[7]));                    // +
+    tmp += 4 * hls_LVn((levels[8]));                    // +
+    tmp += 5 * hls_LVn((levels[9]));                    // +
+    tmp += 5 * hls_LVn((levels[10]));                   // +
+    tmp += 5 * hls_LVn((levels[11]));                   // +
+    if (type[1] != 1) tmp += 5 * hls_LVn((levels[12])); // +
+    //                  tmp += 5*hls_LVn((levels[13]));// +
+    //                  tmp += 5*hls_LVn((levels[14]));// +
+    //                  tmp += 5*hls_LVn((levels[15]));//;
+    return tmp + offset;
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+/* FOR S CACULATION */
+ap_uint<WD_SSE4> hls_SSE4X4(ap_uint<WD_PIX * 16> src, ap_uint<WD_PIX * 16> rec) {
+#pragma HLS INLINE
+    ap_uint<WD_SSE4> sse4 = 0;
+    //#pragma HLS pipeline
+    for (int i = 0; i < 16; i++) {
+#pragma HLS unroll
+        ap_int<WD_PIX + 1> sub = (ap_int<WD_PIX + 1>)VCT_GET(src, i, WD_PIX) - VCT_GET(rec, i, WD_PIX);
+        sse4 += sub * sub;
+    }
+    return sse4;
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+/* NORMAL TRANSFORM */
+ap_int<WD_DCT * 16> hls_FTransform(ap_uint<WD_PIX * 16> src_ap, ap_uint<WD_PIX * 16> ref_ap) {
+    /*FF:1531; LUT:1749; DSP:33;4.89+0.62ns; 7 cycles */
+    ap_int<WD_DCT * 16> out_ap;
+    ap_int<(14) * 16> tmp_ap;
+#pragma HLS INLINE
+    //#pragma HLS PIPELINE
+    for (int i = 0; i < 4; ++i) {
+#pragma HLS unroll
+        ap_int<WD_SUB> d0_ap = SB_GET(src_ap, i, 0, WD_PIX) - SB_GET(ref_ap, i, 0, WD_PIX); //
+        ap_int<WD_SUB> d1_ap = SB_GET(src_ap, i, 1, WD_PIX) - SB_GET(ref_ap, i, 1, WD_PIX);
+        ap_int<WD_SUB> d2_ap = SB_GET(src_ap, i, 2, WD_PIX) - SB_GET(ref_ap, i, 2, WD_PIX);
+        ap_int<WD_SUB> d3_ap = SB_GET(src_ap, i, 3, WD_PIX) - SB_GET(ref_ap, i, 3, WD_PIX);
+        ap_int<WD_SUB + 1> a0_ap = (d0_ap + d3_ap); // 10b                      [-510,510]
+        ap_int<WD_SUB + 1> a1_ap = (d1_ap + d2_ap);
+        ap_int<WD_SUB + 1> a2_ap = (d1_ap - d2_ap);
+        ap_int<WD_SUB + 1> a3_ap = (d0_ap - d3_ap);
+        VCT_GET(tmp_ap, i * 4 + 0, 14) = (a0_ap + a1_ap) * 8;                       // 14b   [-8160,8160]
+        VCT_GET(tmp_ap, i * 4 + 1, 14) = (a2_ap * 2217 + a3_ap * 5352 + 1812) >> 9; // [-7536,7542]
+        VCT_GET(tmp_ap, i * 4 + 2, 14) = (a0_ap - a1_ap) * 8;
+        VCT_GET(tmp_ap, i * 4 + 3, 14) = (a3_ap * 2217 - a2_ap * 5352 + 937) >> 9;
+    }
+    for (int i = 0; i < 4; ++i) {
+#pragma HLS unroll
+        ap_int<15> a0_ap = (ap_int<14>)VCT_GET(tmp_ap, (0 + i), 14) +
+                           (ap_int<14>)VCT_GET(tmp_ap, (12 + i), 14); //(tmp_ap[0 + i] + tmp_ap[12 + i]);  // 15b
+        ap_int<15> a1_ap = (ap_int<14>)VCT_GET(tmp_ap, (4 + i), 14) +
+                           (ap_int<14>)VCT_GET(tmp_ap, (8 + i), 14); //(tmp_ap[4 + i] + tmp_ap[8 + i]);
+        ap_int<15> a2_ap = (ap_int<14>)VCT_GET(tmp_ap, (4 + i), 14) -
+                           (ap_int<14>)VCT_GET(tmp_ap, (8 + i), 14); //(tmp_ap[4 + i] - tmp_ap[8 + i]);
+        ap_int<15> a3_ap = (ap_int<14>)VCT_GET(tmp_ap, (0 + i), 14) -
+                           (ap_int<14>)VCT_GET(tmp_ap, (12 + i), 14); //(tmp_ap[0 + i] - tmp_ap[12 + i]);
+        VCT_GET(out_ap, 0 + i, WD_DCT) = (a0_ap + a1_ap + 7) >> 4;    // 12b
+        VCT_GET(out_ap, 4 + i, WD_DCT) = ((a2_ap * 2217 + a3_ap * 5352 + 12000) >> 16) + (a3_ap != 0);
+        VCT_GET(out_ap, 8 + i, WD_DCT) = (a0_ap - a1_ap + 7) >> 4;
+        VCT_GET(out_ap, 12 + i, WD_DCT) = ((a3_ap * 2217 - a2_ap * 5352 + 51000) >> 16);
+    }
+    return out_ap;
+}
+
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+ap_uint<5> hls_QuantizeBlock(ap_int<WD_DCT * 16> in,
+                             ap_int<WD_LEVEL * 16>* out,
+                             ap_int<WD_DCT * 16>* out2,
+                             hls_QMatrix* pQM, // frequency boosters for slight sharpening
+                             ap_uint<WD_sharpen * 16> sharpen_,
+                             ap_uint<1> is16) // frequency boosters for slight sharpening
+{
+#pragma HLS INLINE
+    return hls_QuantizeBlock_old(in, out, out2, pQM->q_0, pQM->q_n, pQM->iq_0, pQM->iq_n, pQM->bias_0, pQM->bias_n,
+                                 sharpen_, is16);
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+/* QUANTITION FOR NORMAL AND DC */
+ap_uint<5> hls_QuantizeBlock_old(ap_int<WD_DCT * 16> in,
+                                 ap_int<WD_LEVEL * 16>* out,
+                                 ap_int<WD_DCT * 16>* out2,
+                                 ap_uint<WD_q> q_0, // quantizer steps
+                                 ap_uint<WD_q> q_n,
+                                 ap_uint<WD_iq> iq_0, // reciprocals, fixed point.
+                                 ap_uint<WD_iq> iq_n,
+                                 ap_uint<WD_bias> bias_0, // rounding bias
+                                 ap_uint<WD_bias> bias_n,
+                                 ap_uint<WD_sharpen * 16> sharpen_,
+                                 ap_uint<1> is16) // frequency boosters for slight sharpening
+{
+#pragma HLS INLINE
+    //#pragma HLS pipeline
+    ap_uint<5> last = 0;
+    for (int n = 0; n < 16; ++n) {
+#pragma HLS unroll
+        if (is16 && n == 0) {
+            VCT_GET((*out2), 0, WD_DCT) = 0;
+            VCT_GET((*out), 0, WD_LEVEL) = 0;
+            continue;
+        }
+        const ap_uint<4> j = ZIGZAG(n);
+        const ap_int<WD_DCT> coeffs = (ap_int<WD_DCT>)VCT_GET(in, j, WD_DCT);
+        const ap_uint<1> sign = coeffs[WD_DCT - 1];
+        const ap_uint<WD_DCT - 1> coeff = (sign == 1 ? (ap_uint<WD_DCT - 1>)(-coeffs) : (ap_uint<WD_DCT - 1>)(coeffs)) +
+                                          VCT_GET(sharpen_, j, WD_sharpen);
+        const ap_uint<WD_q> Q = n == 0 ? q_0 : q_n;
+        const ap_uint<WD_iq> iQ = n == 0 ? iq_0 : iq_n;
+        const ap_uint<WD_bias> B = n == 0 ? bias_0 : bias_n;
+        ap_uint<WD_MLEVEL> level = (ap_uint<WD_MLEVEL>)((coeff * iQ + B) >> 17);
+        if (level > MY_MAX_LEVEL) level = MY_MAX_LEVEL;
+        if (sign) level = -level;
+        ap_int<WD_DCT> rec = level * Q;
+        VCT_GET((*out2), j, WD_DCT) = level * Q;
+        VCT_GET((*out), n, WD_LEVEL) = level;
+        if (level) last += 1;
+    }
+    return (last);
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+/*Invers  Transforms */
+const ap_uint<17> kC1 = 20091 + (1 << 16);
+const ap_uint<17> kC2 = 35468;
+#define MUL(a, b) (((a) * (b)) >> 16)
+ap_uint<WD_PIX * 16> hls_ITransformOne(ap_uint<WD_PIX * 16> ap_ref, ap_int<WD_IQT * 16> ap_in) {
+    ap_uint<WD_PIX * 16> ap_des;
+    ap_int<WD_IQT + 3> ap_tmp[16];
+#pragma HLS INLINE
+    //#pragma HLS pipeline
+    for (int i = 0; i < 4; ++i) { // vertical pass
+#pragma HLS unroll
+        ap_int<WD_IQT + 2> ap_a, ap_b, ap_c, ap_d;
+        ap_a = (ap_int<WD_IQT>)VCT_GET(ap_in, i + 0, WD_IQT) + (ap_int<WD_IQT>)VCT_GET(ap_in, i + 8, WD_IQT);
+        ap_b = (ap_int<WD_IQT>)VCT_GET(ap_in, i + 0, WD_IQT) - (ap_int<WD_IQT>)VCT_GET(ap_in, i + 8, WD_IQT);
+        ap_c = MUL((ap_int<WD_IQT>)VCT_GET(ap_in, i + 4, WD_IQT), kC2) -
+               MUL((ap_int<WD_IQT>)VCT_GET(ap_in, i + 12, WD_IQT), kC1);
+        ap_d = MUL((ap_int<WD_IQT>)VCT_GET(ap_in, i + 4, WD_IQT), kC1) +
+               MUL((ap_int<WD_IQT>)VCT_GET(ap_in, i + 12, WD_IQT), kC2);
+        ap_tmp[i * 4 + 0] = ap_a + ap_d;
+        ap_tmp[i * 4 + 1] = ap_b + ap_c;
+        ap_tmp[i * 4 + 2] = ap_b - ap_c;
+        ap_tmp[i * 4 + 3] = ap_a - ap_d;
+    }
+    for (int i = 0; i < 4; ++i) { // horizontal pass
+#pragma HLS unroll
+        ap_int<WD_IQT + 4> ap_dc, ap_a, ap_b, ap_c, ap_d;
+        ap_int<WD_IQT + 1> s0, s1, s2, s3;
+        ap_int<WD_IQT + 2> r0, r1, r2, r3;
+        ap_dc = 4 + ap_tmp[i + 0];
+        ap_a = ap_dc + ap_tmp[i + 8];
+        ap_b = ap_dc - ap_tmp[i + 8];
+        ap_c = MUL(ap_tmp[i + 4], kC2) - MUL(ap_tmp[i + 12], kC1);
+        ap_d = MUL(ap_tmp[i + 4], kC1) + MUL(ap_tmp[i + 12], kC2);
+        s0 = (ap_a + ap_d) >> 3;
+        s1 = (ap_b + ap_c) >> 3;
+        s2 = (ap_b - ap_c) >> 3;
+        s3 = (ap_a - ap_d) >> 3;
+        r0 = (ap_uint<WD_IQT + 2>)VCT_GET(ap_ref, 0 + i * 4, WD_PIX) + s0;
+        r1 = (ap_uint<WD_IQT + 2>)VCT_GET(ap_ref, 1 + i * 4, WD_PIX) + s1;
+        r2 = (ap_uint<WD_IQT + 2>)VCT_GET(ap_ref, 2 + i * 4, WD_PIX) + s2;
+        r3 = (ap_uint<WD_IQT + 2>)VCT_GET(ap_ref, 3 + i * 4, WD_PIX) + s3;
+        VCT_GET(ap_des, 0 + i * 4, WD_PIX) = (r0 < 0) ? 0 : (r0 > 255) ? 255 : r0(WD_PIX - 1, 0);
+        VCT_GET(ap_des, 1 + i * 4, WD_PIX) = (r1 < 0) ? 0 : (r1 > 255) ? 255 : r1(WD_PIX - 1, 0);
+        VCT_GET(ap_des, 2 + i * 4, WD_PIX) = (r2 < 0) ? 0 : (r2 > 255) ? 255 : r2(WD_PIX - 1, 0);
+        VCT_GET(ap_des, 3 + i * 4, WD_PIX) = (r3 < 0) ? 0 : (r3 > 255) ? 255 : r3(WD_PIX - 1, 0);
+    }
+    return ap_des;
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void Pickup_Y16(ap_uint<WD_LMD> I__tlambda,            //              :
+                ap_uint<WD_LMD> I__tlambda_m,          //
+                ap_uint<WD_LMD> I__lambda_p16,         //
+                hls_QMatrix I__hls_qm1,                // y44,y16
+                hls_QMatrix I__hls_qm2,                // y16
+                ap_int<WD_sharpen * 16> I__ap_sharpen, //
+                // Parameters changed for each MB
+                ap_uint<WD_PIX * 16> I__ap_y_in_[16], //
+                ap_uint<1> I__istop,                  //
+                ap_uint<1> I__isleft,                 //
+                ap_uint<1> I__isright,                //
+                // image context
+                ap_uint<WD_PIX * 4> I__ap_y_top_c[4],  //
+                ap_uint<WD_PIX * 4> I__ap_y_left_c[4], //
+                ap_uint<WD_PIX> I__ap_y_m,             //
+                // OUTPUT
+                ap_uint<WD_PIX * 16> O__ap_y_out_cb[2][17],    //
+                ap_int<WD_LEVEL * 16> O__ap_y_level_cb[2][17], //
+                ap_int<WD_LEVEL * 16> O__ap_y16dc_level_cb[2], //
+                str_rd O__rd_y16_cb[2],                        //
+                ap_uint<WD_MODE>* OP_ap_y16_mode_c,            //
+                ap_uint<2>* OP_b_y                             //
+                ) {
+    // Pickup Best Y16x16, less than 400 cycles
+    *OP_b_y = 0;
+PICKUP_Y16:
+    for (int mode_p16 = 0; mode_p16 < 4; mode_p16++) {
+//#pragma HLS DATAFLOW
+#pragma HLS latency max = 80
+        int mode_uv = mode_p16;
+        ap_uint<25> nz_y16_tmp;
+        O__rd_y16_cb[1 - (1 & (*OP_b_y))].score =
+            hls_channel_p16(mode_p16, I__istop, I__isleft, I__ap_y_top_c, I__ap_y_left_c, I__ap_y_m, I__ap_y_in_,
+                            I__hls_qm1, I__hls_qm2, I__ap_sharpen, I__tlambda, I__tlambda_m,
+                            // OUTPUT
+                            O__ap_y_level_cb[1 - (1 & *OP_b_y)], &O__ap_y16dc_level_cb[1 - (1 & *OP_b_y)],
+                            O__ap_y_out_cb[1 - (1 & *OP_b_y)], &nz_y16_tmp);
+        O__rd_y16_cb[1 - (1 & (*OP_b_y))].nz = &nz_y16_tmp;
+        if (mode_p16 == 0 || O__rd_y16_cb[1 - (1 & (*OP_b_y))].score < O__rd_y16_cb[(1 & *OP_b_y)].score) {
+            *OP_ap_y16_mode_c = mode_p16;
+            *OP_b_y = 1 - (1 & *OP_b_y);
+        }
+    }
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+ap_uint<WD_RD_SCORE + 4> hls_channel_p16(ap_uint<4> mode_p16,
+                                         ap_uint<1> istop,
+                                         ap_uint<1> isleft,
+                                         ap_uint<WD_PIX * 4> ap_y_top_c[4],
+                                         ap_uint<WD_PIX * 4> ap_y_left_c[4],
+                                         ap_uint<WD_PIX> ap_y_m,
+                                         ap_uint<WD_PIX * 16> ap_yuv_in_[24],
+                                         hls_QMatrix hls_qm1,
+                                         hls_QMatrix hls_qm2,
+                                         ap_int<WD_sharpen * 16> ap_sharpen,
+                                         ap_uint<WD_LMD> tlambda,   //     = dqm->tlambda_;
+                                         ap_uint<WD_LMD> tlambda_m, //   = dqm->lambda_mode_;
+                                         ap_int<WD_LEVEL * 16> ap_y16_level_c[17],
+                                         ap_int<WD_LEVEL * 16>* ap_y16dc_level_c,
+                                         ap_uint<WD_PIX * 16> ap_y16_out_c[17],
+                                         ap_uint<25>* nz) {
+#pragma HLS INLINE
+
+    str_dis rd_dis16;
+    rd_dis16.init();
+    ap_uint<WD_PIX * 16> ap_ref_p16[16];
+    ap_int<WD_IWHT * 16> ap_iwht_dc;
+
+    ap_int<WD_DCT * 16> ap_dct_out[16];
+    ap_int<WD_DCT * 16> ap_wht_in;
+    ap_int<WD_WHT * 16> ap_wht_out;
+    ap_int<WD_IQT * 16> ap_iqt_ac[16];
+    ap_int<WD_WHT * 16> ap_iqt_dc;
+
+CHANNEL_P16_WHT:
+    for (int n = 0; n < 16; n += 1) {
+#pragma HLS PIPELINE
+        ap_ref_p16[n] = hls_p16_test(mode_p16, n, istop, isleft, ap_y_top_c, ap_y_left_c, ap_y_m);
+        ap_dct_out[n] = hls_FTransform(ap_yuv_in_[n], ap_ref_p16[n]);
+        ap_uint<5> score_nz =
+            hls_QuantizeBlock(ap_dct_out[n], &ap_y16_level_c[n], &ap_iqt_ac[n], &hls_qm1, ap_sharpen, 1);
+        rd_dis16.nz |= (ap_uint<25>)((score_nz != 0) << n);
+        VCT_GET(ap_wht_in, n, WD_DCT) = (ap_int<WD_DCT>)VCT_GET(ap_dct_out[n], 0, WD_DCT);
+    } // for n
+    ap_wht_out = hls_FTransformWHT(ap_wht_in);
+    ap_int<WD_LEVEL * 16> tmp_level;
+    rd_dis16.nz(24, 24) = hls_QuantizeBlockWHT(ap_wht_out, &tmp_level, &ap_iqt_dc, &hls_qm2);
+    ap_y16_level_c[16] = tmp_level;
+    ap_y16dc_level_c[0] = tmp_level;
+    ap_iwht_dc = hls_ITransformWHT(ap_iqt_dc);
+CHANNEL_P16_ONE:
+    for (int n = 0; n < 16; n += 1) {
+#pragma HLS PIPELINE
+        ap_int<WD_IQT* 16> ap_dcac = ap_iqt_ac[n];
+        VCT_GET(ap_dcac, 0, WD_IQT) = (ap_int<WD_IWHT>)VCT_GET(ap_iwht_dc, n, WD_IWHT);
+        ap_y16_out_c[n] = hls_ITransformOne(ap_ref_p16[n], ap_dcac);
+        rd_dis16.d += hls_SSE4X4(ap_yuv_in_[n], ap_y16_out_c[n]);
+        rd_dis16.sd += hls_Disto4x4(ap_yuv_in_[n], ap_y16_out_c[n]);
+        rd_dis16.r += hls_fast_cost(ap_y16_level_c[n], 2);
+    }
+
+    rd_dis16.r += hls_fast_cost(ap_y16dc_level_c[16 - 16], 1);
+    rd_dis16.sd = (rd_dis16.sd * tlambda + 128) >> 8;
+    const ap_uint<10> my_VP8FixedCostsI16[4] = {663, 919, 872, 919};
+    rd_dis16.h = my_VP8FixedCostsI16[mode_p16];
+
+    *nz = rd_dis16.nz;
+    return hls_ca_score(tlambda_m, &rd_dis16, mode_p16);
+}
+
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+/*Invers  Transforms */
+ap_int<WD_IWHT * 16> hls_ITransformWHT(ap_int<WD_WHT * 16> in) {
+// FF:0, lut:1248; 4.12+0.62ns; Latency:1
+// input is 12b signed
+#pragma HLS INLINE
+    //#pragma HLS pipeline
+    ap_int<WD_IWHT * 16> out;
+    ap_int<WD_WHT + 2> tmp[16];
+    for (int i = 0; i < 4; ++i) {
+#pragma HLS unroll
+        ap_int<WD_WHT + 1> a0 =
+            ((ap_int<WD_WHT>)VCT_GET(in, 0 + i, WD_WHT) + (ap_int<WD_WHT>)VCT_GET(in, 12 + i, WD_WHT)); // 16b
+        ap_int<WD_WHT + 1> a1 =
+            ((ap_int<WD_WHT>)VCT_GET(in, 4 + i, WD_WHT) + (ap_int<WD_WHT>)VCT_GET(in, 8 + i, WD_WHT));
+        ap_int<WD_WHT + 1> a2 =
+            ((ap_int<WD_WHT>)VCT_GET(in, 4 + i, WD_WHT) - (ap_int<WD_WHT>)VCT_GET(in, 8 + i, WD_WHT));
+        ap_int<WD_WHT + 1> a3 =
+            ((ap_int<WD_WHT>)VCT_GET(in, 0 + i, WD_WHT) - (ap_int<WD_WHT>)VCT_GET(in, 12 + i, WD_WHT));
+        tmp[0 + i] = a0 + a1; // 17b
+        tmp[8 + i] = a0 - a1;
+        tmp[4 + i] = a3 + a2;
+        tmp[12 + i] = a3 - a2;
+    }
+    for (int i = 0; i < 4; ++i) {
+#pragma HLS unroll
+        ap_int<WD_WHT + 3> dc = (tmp[0 + i * 4] + 3);  // 18b
+        ap_int<WD_WHT + 3> a0 = (dc + tmp[3 + i * 4]); // 18b
+        ap_int<WD_WHT + 3> a1 = (tmp[1 + i * 4] + tmp[2 + i * 4]);
+        ap_int<WD_WHT + 3> a2 = (tmp[1 + i * 4] - tmp[2 + i * 4]);
+        ap_int<WD_WHT + 3> a3 = (dc - tmp[3 + i * 4]);
+        ap_int<WD_WHT + 4> b0 = a0 + a1; // 19b
+        ap_int<WD_WHT + 4> b1 = a3 + a2;
+        ap_int<WD_WHT + 4> b2 = a0 - a1;
+        ap_int<WD_WHT + 4> b3 = a3 - a2;
+        VCT_GET(out, 0 + i * 4, WD_IWHT) = b0 >> 3; // 16b
+        VCT_GET(out, 1 + i * 4, WD_IWHT) = b1 >> 3;
+        VCT_GET(out, 2 + i * 4, WD_IWHT) = b2 >> 3;
+        VCT_GET(out, 3 + i * 4, WD_IWHT) = b3 >> 3;
+    }
+    return out;
+}
+
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+/* QUANTITION FOR DC */
+ap_uint<1> hls_QuantizeBlockWHT_old(ap_int<WD_WHT * 16> in,
+                                    ap_int<WD_LEVEL * 16>* out,
+                                    ap_int<WD_WHT * 16>* out2,
+                                    ap_uint<WD_q> q_0, // quantizer steps
+                                    ap_uint<WD_q> q_n,
+                                    ap_uint<WD_iq> iq_0, // reciprocals, fixed point.
+                                    ap_uint<WD_iq> iq_n,
+                                    ap_uint<WD_bias> bias_0, // rounding bias
+                                    ap_uint<WD_bias> bias_n) // frequency boosters for slight sharpening
+{
+    ap_uint<1> last = 0;
+    ap_int<WD_LEVEL * 16> xout;
+    ap_int<WD_WHT * 16> xout2;
+#pragma HLS pipeline
+    for (int n = 0; n < 16; ++n) {
+#pragma HLS unroll
+        const ap_uint<4> j = ZIGZAG(n);
+        const ap_int<WD_WHT> coeffs = (ap_int<WD_WHT>)VCT_GET(in, j, WD_WHT);
+        const ap_uint<1> sign = coeffs[WD_WHT - 1];
+        const ap_uint<WD_WHT - 1> coeff = (sign == 1 ? (ap_uint<WD_WHT - 1>)(-coeffs) : (ap_uint<WD_WHT - 1>)(coeffs));
+        const ap_uint<WD_q> Q = n == 0 ? q_0 : q_n;
+        const ap_uint<WD_iq> iQ = n == 0 ? iq_0 : iq_n;
+        const ap_uint<WD_bias> B = n == 0 ? bias_0 : bias_n;
+        ap_uint<WD_MLEVEL> level = (ap_uint<WD_MLEVEL>)((coeff * iQ + B) >> 17);
+        if (level > MY_MAX_LEVEL) level = MY_MAX_LEVEL;
+        if (sign) level = -level;
+        ap_int<WD_WHT> rec = level * Q;
+        VCT_GET((*out2), j, WD_WHT) = level * Q; //(ap_int<WD_WHT>)(level * Q);
+        VCT_GET((xout2), j, WD_WHT) = level * Q; //(ap_int<WD_WHT>)(level * Q);
+        VCT_GET((xout), n, WD_LEVEL) = level;    //(ap_int<WD_LEVEL>)level;
+        VCT_GET((*out), n, WD_LEVEL) = level;    //(ap_int<WD_LEVEL>)level;
+        if (level) last = 1;
+    }
+    return last;
+}
+ap_uint<1> hls_QuantizeBlockWHT(ap_int<WD_WHT * 16> in,
+                                ap_int<WD_LEVEL * 16>* out,
+                                ap_int<WD_WHT * 16>* out2,
+                                hls_QMatrix* pQM) // frequency boosters for slight sharpening
+{
+    return hls_QuantizeBlockWHT_old(in, out, out2, pQM->q_0, pQM->q_n, pQM->iq_0, pQM->iq_n, pQM->bias_0, pQM->bias_n);
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+ap_uint<WD_PIX * 16> hls_DC16_4_y( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> top0,
+    ap_uint<WD_PIX * 4> top1,
+    ap_uint<WD_PIX * 4> top2,
+    ap_uint<WD_PIX * 4> top3,
+    ap_uint<WD_PIX * 4> left0,
+    ap_uint<WD_PIX * 4> left1,
+    ap_uint<WD_PIX * 4> left2,
+    ap_uint<WD_PIX * 4> left3,
+    ap_uint<1> istop,
+    ap_uint<1> isleft) {
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    ap_uint<WD_PIX + 5> DC = 0;
+    ap_uint<WD_PIX> tmp;
+
+    if (istop == 0) {
+        DC += (AP_TREEADD4_VCT(top0, WD_PIX));
+        DC += (AP_TREEADD4_VCT(top1, WD_PIX));
+        DC += (AP_TREEADD4_VCT(top2, WD_PIX));
+        DC += (AP_TREEADD4_VCT(top3, WD_PIX));
+        if (isleft == 0) {
+            DC += (AP_TREEADD4_VCT(left0, WD_PIX));
+            DC += (AP_TREEADD4_VCT(left1, WD_PIX));
+            DC += (AP_TREEADD4_VCT(left2, WD_PIX));
+            DC += (AP_TREEADD4_VCT(left3, WD_PIX));
+        } else
+            DC += DC;
+        DC = (DC + (8 << 1)) >> (4 + 1);
+    } else if (isleft == 0) {
+        DC += (AP_TREEADD4_VCT(left0, WD_PIX));
+        DC += (AP_TREEADD4_VCT(left1, WD_PIX));
+        DC += (AP_TREEADD4_VCT(left2, WD_PIX));
+        DC += (AP_TREEADD4_VCT(left3, WD_PIX));
+        DC += DC;
+        DC = (DC + (8 << 1)) >> (4 + 1);
+    } else
+        DC = 0X80;
+    tmp = DC(WD_PIX - 1, 0);
+    SB_SET_LINE_VAL(sb, 0, WD_PIX, tmp);
+    SB_SET_LINE_VAL(sb, 1, WD_PIX, tmp);
+    SB_SET_LINE_VAL(sb, 2, WD_PIX, tmp);
+    SB_SET_LINE_VAL(sb, 3, WD_PIX, tmp);
+    return sb;
+};
+
+ap_uint<WD_PIX * 16> hls_p16_test(ap_uint<2> mode,
+                                  ap_uint<4> n,
+                                  ap_uint<1> istop,
+                                  ap_uint<1> isleft,
+                                  ap_uint<WD_PIX * 4> ap_y_top_c[4],
+                                  ap_uint<WD_PIX * 4> ap_y_left_c[4],
+                                  ap_uint<WD_PIX> ap_y_m) {
+#pragma HLS PIPELINE
+    // ap_uint<WD_PIX*4>* top  = ap_y_top_c;
+    // ap_uint<WD_PIX*4>* left = ap_y_left_c;
+    ap_uint<WD_PIX* 4> abcd = ap_y_top_c[n & 3];
+    ap_uint<WD_PIX* 4> ijkl = ap_y_left_c[n >> 2];
+    ap_uint<WD_PIX> X44 = ap_y_m;
+    switch (mode) {
+        case B_DC_PRED:
+            return hls_DC16_4_y(ap_y_top_c[0], ap_y_top_c[1], ap_y_top_c[2], ap_y_top_c[3], ap_y_left_c[0],
+                                ap_y_left_c[1], ap_y_left_c[2], ap_y_left_c[3], istop, isleft);
+        case B_TM_PRED:
+            return hls_TM16_4(abcd, ijkl, X44, istop, isleft);
+        case B_VE_PRED:
+            return hls_VE16_4(abcd, istop);
+        default:
+            return hls_HE16_4(ijkl, isleft);
+    } // case
+}
+
+ap_uint<WD_RD_SCORE + 4> hls_ca_score(ap_uint<WD_LMD> lmbda, str_dis* dis, ap_uint<4> m) {
+#pragma HLS PIPELINE
+    return (((ap_uint<WD_RD_SCORE + 4>)(dis->d + (ap_uint<WD_SSE4 + 4>)(dis->sd))) << 8) +
+           ((ap_uint<WD_RD_SCORE + 4>)(dis->r + dis->h)) * lmbda;
+}; // ca_score
+
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+void Pickup_UV(
+    // Parameters unParameters changed for one picture/segment
+    ap_uint<WD_LMD> I__tlambda,               //              :
+    ap_uint<WD_LMD> I__tlambda_m,             //
+    ap_uint<WD_LMD> I__lambda_uv,             //
+    hls_QMatrix I__hls_qm_uv,                 //
+    ap_int<WD_sharpen * 16> I__ap_sharpen_uv, //
+    // Parameters changed for each MB
+    ap_uint<WD_PIX * 16> I__ap_uv_in_[8], //
+    ap_uint<1> I__istop,                  //
+    ap_uint<1> I__isleft,                 //
+    ap_uint<1> I__isright,                //
+    // image context
+    ap_uint<WD_PIX * 4> I__ap_uv_top_c[4],          //
+    ap_uint<WD_PIX * 4> I__ap_uv_left_c[4],         //
+    ap_uint<WD_PIX> I__ap_u_m,                      //
+    ap_uint<WD_PIX> I__ap_v_m,                      //
+    ap_uint<WD_PIX * 16> O__ap_uv_out_cb[2][17],    //
+    ap_int<WD_LEVEL * 16> O__ap_uv_level_cb[2][16], //
+    str_rd O__rd_uv_cb[2],                          //
+    ap_uint<WD_MODE>* OP_ap_uv_mode_c,              //
+    ap_uint<1>* OP_b_uv                             //
+    ) {
+    // Pickup Best Y16x16, less than 400 cycles
+    *OP_b_uv = 0;
+PICKUP_UV:
+    for (int mode_uv = 0; mode_uv < 4; mode_uv++) {
+        // Pickup Best UV, less than 400 cycles
+        ap_uint<25> nz_tmp;
+        O__rd_uv_cb[1 - *OP_b_uv].score =
+            hls_channel_uv_8(mode_uv, I__istop, I__isleft, I__ap_uv_top_c, I__ap_uv_left_c, I__ap_u_m, I__ap_v_m,
+                             I__ap_uv_in_, I__hls_qm_uv, I__ap_sharpen_uv, I__lambda_uv,
+                             // OUTPUT
+                             O__ap_uv_level_cb[1 - *OP_b_uv], O__ap_uv_out_cb[1 - *OP_b_uv], &nz_tmp);
+        O__rd_uv_cb[1 - *OP_b_uv].nz = nz_tmp;
+        if (mode_uv == 0 || O__rd_uv_cb[1 - *OP_b_uv].score < O__rd_uv_cb[*OP_b_uv].score) {
+            *OP_ap_uv_mode_c = mode_uv;
+            *OP_b_uv = 1 - *OP_b_uv;
+        }
+    }
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+ap_uint<WD_RD_SCORE + 4> hls_channel_uv_8(ap_uint<4> mode_uv,
+                                          ap_uint<1> istop,
+                                          ap_uint<1> isleft,
+                                          ap_uint<WD_PIX * 4> ap_uv_top_c[4],
+                                          ap_uint<WD_PIX * 4> ap_uv_left_c[4],
+                                          ap_uint<WD_PIX> ap_u_m,
+                                          ap_uint<WD_PIX> ap_v_m,
+                                          ap_uint<WD_PIX * 16> ap_uv_in_[8],
+                                          hls_QMatrix hls_qm_uv,
+                                          ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                          ap_uint<WD_LMD> lambda_uv, //     = dqm->tlambda_;
+                                          ap_int<WD_LEVEL * 16> ap_uv_level_c[8],
+                                          ap_uint<WD_PIX * 16> ap_uv_out_c[8],
+                                          ap_uint<25>* nz) {
+#pragma HLS INLINE
+    str_dis rd_dis;
+    const ap_uint<10> my_VP8FixedCostsUV[4] = {302, 984, 439, 642};
+    ap_uint<WD_PIX * 16> ap_ref_uv;
+    ap_int<WD_IQT * 16> ap_iqt_uv;
+    ap_int<WD_DCT * 16> ap_dct_out;
+    ap_uint<5> score_nz;
+    rd_dis.init();
+    rd_dis.h = my_VP8FixedCostsUV[mode_uv];
+CHANNEL_UV_8:
+    for (int n = 0; n < 8; n += 1) {
+#pragma HLS PIPELINE
+        ap_ref_uv = hls_p8_test(mode_uv, n, istop, isleft, ap_uv_top_c, ap_uv_left_c, ap_u_m, ap_v_m);
+        ap_dct_out = hls_FTransform(ap_uv_in_[n], ap_ref_uv);
+        score_nz = hls_QuantizeBlock(ap_dct_out, &ap_uv_level_c[n], &ap_iqt_uv, &hls_qm_uv, ap_sharpen_uv, 0);
+        ap_uv_out_c[n] = hls_ITransformOne(ap_ref_uv, ap_iqt_uv);
+        rd_dis.nz |= (ap_uint<25>)((score_nz != 0) << (n + 16));
+        rd_dis.d += hls_SSE4X4(ap_uv_in_[n], ap_uv_out_c[n]);
+        rd_dis.r += hls_fast_cost(ap_uv_level_c[n], 2);
+    }
+    *nz = rd_dis.nz;
+    return hls_ca_score(lambda_uv, &rd_dis, mode_uv);
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+/*******************************************/
+/* Prediction img Generation: Y44          */
+/*******************************************/
+/* 1 */
+ap_uint<WD_PIX * 16> hls_DC4( // ref:581  lut 56, 4.46+1.25 ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl //,
+    ) {                      //
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    ap_uint<WD_PIX> tmp = ap_uint<WD_PIX>(
+        (AP_TREEADD2((AP_TREEADD4_VCT(abcd, WD_PIX)), (AP_TREEADD4_VCT(ijkl, WD_PIX)), (WD_PIX + 2)) + 4) >> 3);
+    SB_SET_LINE_VAL(sb, 0, WD_PIX, tmp);
+    SB_SET_LINE_VAL(sb, 1, WD_PIX, tmp);
+    SB_SET_LINE_VAL(sb, 2, WD_PIX, tmp);
+    SB_SET_LINE_VAL(sb, 3, WD_PIX, tmp);
+    return sb;
+};
+
+/* 2 */
+ap_uint<WD_PIX * 16> hls_VE4( // ref: lut:56, 3.19+1.25//lut 452vs997, 2.72+1.25ns,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> efgh,
+    ap_uint<WD_PIX> X44) { // vertical
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    const ap_uint<WD_PIX> val0(ap_uint<WD_PIX>(AP_AVG3(X44, A44, B44, WD_PIX)));
+    const ap_uint<WD_PIX> val1(ap_uint<WD_PIX>(AP_AVG3(A44, B44, C44, WD_PIX)));
+    const ap_uint<WD_PIX> val2(ap_uint<WD_PIX>(AP_AVG3(B44, C44, D44, WD_PIX)));
+    const ap_uint<WD_PIX> val3(ap_uint<WD_PIX>(AP_AVG3(C44, D44, E44, WD_PIX)));
+
+    SB_SET_COL_VAL(sb, 0, WD_PIX, val0);
+    SB_SET_COL_VAL(sb, 1, WD_PIX, val1);
+    SB_SET_COL_VAL(sb, 2, WD_PIX, val2);
+    SB_SET_COL_VAL(sb, 3, WD_PIX, val3);
+    return sb;
+};
+
+/* 3 */
+ap_uint<WD_PIX * 16> hls_HE4( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44) {
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    SB_SET_LINE_VAL(sb, 0, WD_PIX, ap_uint<WD_PIX>(AP_AVG3(X44, I44, J44, WD_PIX)));
+    SB_SET_LINE_VAL(sb, 1, WD_PIX, ap_uint<WD_PIX>(AP_AVG3(I44, J44, K44, WD_PIX)));
+    SB_SET_LINE_VAL(sb, 2, WD_PIX, ap_uint<WD_PIX>(AP_AVG3(J44, K44, L44, WD_PIX)));
+    SB_SET_LINE_VAL(sb, 3, WD_PIX, ap_uint<WD_PIX>(AP_AVG3(K44, L44, L44, WD_PIX)));
+    return sb;
+};
+
+/* 4 */
+ap_uint<WD_PIX * 16> hls_RD4( // ref: lut:98  3.19+1.25, ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44) {
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    AP_DST(sb, 0, 3, WD_PIX) = AP_AVG3(J44, K44, L44, WD_PIX);
+    AP_DST(sb, 0, 2, WD_PIX) = AP_DST(sb, 1, 3, WD_PIX) = AP_AVG3(I44, J44, K44, WD_PIX);
+    AP_DST(sb, 0, 1, WD_PIX) = AP_DST(sb, 1, 2, WD_PIX) = AP_DST(sb, 2, 3, WD_PIX) = AP_AVG3(X44, I44, J44, WD_PIX);
+    AP_DST(sb, 0, 0, WD_PIX) = AP_DST(sb, 1, 1, WD_PIX) = AP_DST(sb, 2, 2, WD_PIX) = AP_DST(sb, 3, 3, WD_PIX) =
+        AP_AVG3(A44, X44, I44, WD_PIX);
+    AP_DST(sb, 1, 0, WD_PIX) = AP_DST(sb, 2, 1, WD_PIX) = AP_DST(sb, 3, 2, WD_PIX) = AP_AVG3(B44, A44, X44, WD_PIX);
+    AP_DST(sb, 2, 0, WD_PIX) = AP_DST(sb, 3, 1, WD_PIX) = AP_AVG3(C44, B44, A44, WD_PIX);
+    AP_DST(sb, 3, 0, WD_PIX) = AP_AVG3(D44, C44, B44, WD_PIX);
+    return sb;
+};
+
+/* 5 */
+ap_uint<WD_PIX * 16> hls_LD4( // ref: lut:98  3.19+1.25  , ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> efgh) { //
+#pragma HLS PIPELINE
+
+    ap_uint<WD_PIX * 16> sb;
+    AP_DST(sb, 0, 0, WD_PIX) = AP_AVG3(A44, B44, C44, WD_PIX);
+    AP_DST(sb, 1, 0, WD_PIX) = AP_DST(sb, 0, 1, WD_PIX) = AP_AVG3(B44, C44, D44, WD_PIX);
+    AP_DST(sb, 2, 0, WD_PIX) = AP_DST(sb, 1, 1, WD_PIX) = AP_DST(sb, 0, 2, WD_PIX) = AP_AVG3(C44, D44, E44, WD_PIX);
+    AP_DST(sb, 3, 0, WD_PIX) = AP_DST(sb, 2, 1, WD_PIX) = AP_DST(sb, 1, 2, WD_PIX) = AP_DST(sb, 0, 3, WD_PIX) =
+        AP_AVG3(D44, E44, F44, WD_PIX);
+    AP_DST(sb, 3, 1, WD_PIX) = AP_DST(sb, 2, 2, WD_PIX) = AP_DST(sb, 1, 3, WD_PIX) = AP_AVG3(E44, F44, G44, WD_PIX);
+    AP_DST(sb, 3, 2, WD_PIX) = AP_DST(sb, 2, 3, WD_PIX) = AP_AVG3(F44, G44, H44, WD_PIX);
+    AP_DST(sb, 3, 3, WD_PIX) = AP_AVG3(G44, H44, H44, WD_PIX);
+    return sb;
+};
+
+/* 6 */
+ap_uint<WD_PIX * 16> hls_VR4( // ref: lut: 100  3.19+1.25 , ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44) { //
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    AP_DST(sb, 0, 0, WD_PIX) = AP_DST(sb, 1, 2, WD_PIX) = AP_AVG2(X44, A44, WD_PIX);
+    AP_DST(sb, 1, 0, WD_PIX) = AP_DST(sb, 2, 2, WD_PIX) = AP_AVG2(A44, B44, WD_PIX);
+    AP_DST(sb, 2, 0, WD_PIX) = AP_DST(sb, 3, 2, WD_PIX) = AP_AVG2(B44, C44, WD_PIX);
+    AP_DST(sb, 3, 0, WD_PIX) = AP_AVG2(C44, D44, WD_PIX);
+
+    AP_DST(sb, 0, 3, WD_PIX) = AP_AVG3(K44, J44, I44, WD_PIX);
+    AP_DST(sb, 0, 2, WD_PIX) = AP_AVG3(J44, I44, X44, WD_PIX);
+    AP_DST(sb, 0, 1, WD_PIX) = AP_DST(sb, 1, 3, WD_PIX) = AP_AVG3(I44, X44, A44, WD_PIX);
+    AP_DST(sb, 1, 1, WD_PIX) = AP_DST(sb, 2, 3, WD_PIX) = AP_AVG3(X44, A44, B44, WD_PIX);
+    AP_DST(sb, 2, 1, WD_PIX) = AP_DST(sb, 3, 3, WD_PIX) = AP_AVG3(A44, B44, C44, WD_PIX);
+    AP_DST(sb, 3, 1, WD_PIX) = AP_AVG3(B44, C44, D44, WD_PIX);
+    return sb;
+};
+
+/* 7 */
+ap_uint<WD_PIX * 16> hls_VL4( // ref: lut: 100 3.19+1.25 , ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> efgh) { //
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    AP_DST(sb, 0, 0, WD_PIX) = AP_AVG2(A44, B44, WD_PIX);
+    AP_DST(sb, 1, 0, WD_PIX) = AP_DST(sb, 0, 2, WD_PIX) = AP_AVG2(B44, C44, WD_PIX);
+    AP_DST(sb, 2, 0, WD_PIX) = AP_DST(sb, 1, 2, WD_PIX) = AP_AVG2(C44, D44, WD_PIX);
+    AP_DST(sb, 3, 0, WD_PIX) = AP_DST(sb, 2, 2, WD_PIX) = AP_AVG2(D44, E44, WD_PIX);
+
+    AP_DST(sb, 0, 1, WD_PIX) = AP_AVG3(A44, B44, C44, WD_PIX);
+    AP_DST(sb, 1, 1, WD_PIX) = AP_DST(sb, 0, 3, WD_PIX) = AP_AVG3(B44, C44, D44, WD_PIX);
+    AP_DST(sb, 2, 1, WD_PIX) = AP_DST(sb, 1, 3, WD_PIX) = AP_AVG3(C44, D44, E44, WD_PIX);
+    AP_DST(sb, 3, 1, WD_PIX) = AP_DST(sb, 2, 3, WD_PIX) = AP_AVG3(D44, E44, F44, WD_PIX);
+    AP_DST(sb, 3, 2, WD_PIX) = AP_AVG3(E44, F44, G44, WD_PIX);
+    AP_DST(sb, 3, 3, WD_PIX) = AP_AVG3(F44, G44, H44, WD_PIX);
+    return sb;
+};
+
+/* 8 */
+ap_uint<WD_PIX * 16> hls_HU4(   // ref: lut 54 3.19+1.25 , ,
+    ap_uint<WD_PIX * 4> ijkl) { //
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    AP_DST(sb, 0, 0, WD_PIX) = AP_AVG2(I44, J44, WD_PIX);
+    AP_DST(sb, 2, 0, WD_PIX) = AP_DST(sb, 0, 1, WD_PIX) = AP_AVG2(J44, K44, WD_PIX);
+    AP_DST(sb, 2, 1, WD_PIX) = AP_DST(sb, 0, 2, WD_PIX) = AP_AVG2(K44, L44, WD_PIX);
+    AP_DST(sb, 1, 0, WD_PIX) = AP_AVG3(I44, J44, K44, WD_PIX);
+    AP_DST(sb, 3, 0, WD_PIX) = AP_DST(sb, 1, 1, WD_PIX) = AP_AVG3(J44, K44, L44, WD_PIX);
+    AP_DST(sb, 3, 1, WD_PIX) = AP_DST(sb, 1, 2, WD_PIX) = AP_AVG3(K44, L44, L44, WD_PIX);
+    AP_DST(sb, 3, 2, WD_PIX) = AP_DST(sb, 2, 2, WD_PIX) = AP_DST(sb, 0, 3, WD_PIX) = AP_DST(sb, 1, 3, WD_PIX) =
+        AP_DST(sb, 2, 3, WD_PIX) = AP_DST(sb, 3, 3, WD_PIX) = L44;
+    return sb;
+};
+
+/* 9 */
+ap_uint<WD_PIX * 16> hls_HD4( // ref:544 vs lut:100 ,3.19+1.25 ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44) { //
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    AP_DST(sb, 0, 0, WD_PIX) = AP_DST(sb, 2, 1, WD_PIX) = AP_AVG2(I44, X44, WD_PIX);
+    AP_DST(sb, 0, 1, WD_PIX) = AP_DST(sb, 2, 2, WD_PIX) = AP_AVG2(J44, I44, WD_PIX);
+    AP_DST(sb, 0, 2, WD_PIX) = AP_DST(sb, 2, 3, WD_PIX) = AP_AVG2(K44, J44, WD_PIX);
+    AP_DST(sb, 0, 3, WD_PIX) = AP_AVG2(L44, K44, WD_PIX);
+
+    AP_DST(sb, 3, 0, WD_PIX) = AP_AVG3(A44, B44, C44, WD_PIX);
+    AP_DST(sb, 2, 0, WD_PIX) = AP_AVG3(X44, A44, B44, WD_PIX);
+    AP_DST(sb, 1, 0, WD_PIX) = AP_DST(sb, 3, 1, WD_PIX) = AP_AVG3(I44, X44, A44, WD_PIX);
+    AP_DST(sb, 1, 1, WD_PIX) = AP_DST(sb, 3, 2, WD_PIX) = AP_AVG3(J44, I44, X44, WD_PIX);
+    AP_DST(sb, 1, 2, WD_PIX) = AP_DST(sb, 3, 3, WD_PIX) = AP_AVG3(K44, J44, I44, WD_PIX);
+    AP_DST(sb, 1, 3, WD_PIX) = AP_AVG3(L44, K44, J44, WD_PIX);
+
+    return sb;
+};
+
+/* 10 */
+ap_uint<WD_PIX * 16> hls_TM4( // ref: lut 516, 4.07+1.25,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44) { //
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    ap_int<WD_PIX + 2> tmp;
+    for (int i = 0; i < 4; i++)
+#pragma HLS unroll
+        for (int j = 0; j < 4; j++) {
+#pragma HLS unroll
+            tmp = AP_TREEADD2((VCT_GET(abcd, j, WD_PIX)), (VCT_GET(ijkl, i, WD_PIX)), WD_PIX) - X44;
+            if (tmp > 255)
+                tmp = 255;
+            else if (tmp < 0)
+                tmp = 0;
+            SB_GET(sb, i, j, WD_PIX) = tmp;
+        }
+    return sb;
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_p8_test(ap_uint<2> mode,
+                                 ap_uint<3> n,
+                                 ap_uint<1> istop,
+                                 ap_uint<1> isleft,
+                                 ap_uint<WD_PIX * 4> ap_uv_top_c[4],
+                                 ap_uint<WD_PIX * 4> ap_uv_left_c[4],
+                                 ap_uint<WD_PIX> ap_u_m,
+                                 ap_uint<WD_PIX> ap_v_m) {
+    ap_uint<WD_PIX * 4> top[2];
+    ap_uint<WD_PIX * 4> left[2];
+    ap_uint<WD_PIX * 4> abcd;
+    ap_uint<WD_PIX * 4> ijkl;
+    ap_uint<WD_PIX> X44;
+    if (n < 4) {
+        top[0] = ap_uv_top_c[0];
+        top[1] = ap_uv_top_c[1];
+        left[0] = ap_uv_left_c[0];
+        left[1] = ap_uv_left_c[1];
+        abcd = ap_uv_top_c[n & 1];
+        ijkl = ap_uv_left_c[n >> 1];
+        X44 = ap_u_m;
+    } else {
+        n -= 4;
+        top[0] = ap_uv_top_c[2];
+        top[1] = ap_uv_top_c[3];
+        left[0] = ap_uv_left_c[2];
+        left[1] = ap_uv_left_c[3];
+        abcd = ap_uv_top_c[2 + (n & 1)];
+        ijkl = ap_uv_left_c[2 + (n >> 1)];
+        X44 = ap_v_m;
+    }
+    switch (mode) {
+        case B_DC_PRED:
+            return hls_DC16_4_uv_old(top, left, istop, isleft);
+        case B_TM_PRED:
+            return hls_TM16_4(abcd, ijkl, X44, istop, isleft);
+        case B_VE_PRED:
+            return hls_VE16_4(abcd, istop);
+        default:
+            return hls_HE16_4(ijkl, isleft);
+    } // case
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_DC16_4_uv_old( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> top[2],
+    ap_uint<WD_PIX * 4> left[2],
+    ap_uint<1> istop,
+    ap_uint<1> isleft) {
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    ap_uint<WD_PIX + 5> DC = 0;
+    ap_uint<WD_PIX> tmp;
+
+    if (istop == 0) {
+        for (int j = 0; j < (2 << 0); ++j) DC += (AP_TREEADD4_VCT(top[j], WD_PIX));
+        if (isleft == 0)
+            for (int j = 0; j < (2 << 0); ++j) DC += (AP_TREEADD4_VCT(left[j], WD_PIX));
+        else
+            DC += DC;
+        DC = (DC + (8 << 0)) >> (4 + 0);
+    } else if (isleft == 0) {
+        for (int j = 0; j < (2 << 0); ++j) DC += (AP_TREEADD4_VCT(left[j], WD_PIX));
+        DC += DC;
+        DC = (DC + (8 << 0)) >> (4 + 0);
+    } else
+        DC = 0X80;
+    tmp = DC(WD_PIX - 1, 0);
+    SB_SET_LINE_VAL(sb, 0, WD_PIX, tmp);
+    SB_SET_LINE_VAL(sb, 1, WD_PIX, tmp);
+    SB_SET_LINE_VAL(sb, 2, WD_PIX, tmp);
+    SB_SET_LINE_VAL(sb, 3, WD_PIX, tmp);
+    return sb;
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_TM16_4( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44,
+    ap_uint<1> istop,
+    ap_uint<1> isleft) { //
+#pragma HLS PIPELINE
+    if (isleft == 0) {
+        if (istop == 0)
+            return hls_TM4(abcd, ijkl, X44);
+        else
+            return hls_HE16_4(ijkl, isleft);
+    } else {
+        if (istop == 0)
+            return hls_VE16_4(abcd, istop);
+        else
+            return hls_HE16_4(ijkl, isleft);
+    }
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_VE16_4( // ref: lut:56, 3.19+1.25//lut 452vs997, 2.72+1.25ns,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<1> istop) { // vertical
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    const ap_uint<WD_PIX> val0 = (istop != 1) ? (A44) : 127;
+    const ap_uint<WD_PIX> val1 = (istop != 1) ? (B44) : 127;
+    const ap_uint<WD_PIX> val2 = (istop != 1) ? (C44) : 127;
+    const ap_uint<WD_PIX> val3 = (istop != 1) ? (D44) : 127;
+
+    SB_SET_COL_VAL(sb, 0, WD_PIX, val0);
+    SB_SET_COL_VAL(sb, 1, WD_PIX, val1);
+    SB_SET_COL_VAL(sb, 2, WD_PIX, val2);
+    SB_SET_COL_VAL(sb, 3, WD_PIX, val3);
+    return sb;
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_HE16_4( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<1> isleft) { //
+#pragma HLS PIPELINE
+    ap_uint<WD_PIX * 16> sb;
+    const ap_uint<WD_PIX> val0 = (isleft != 1) ? (I44) : 129;
+    const ap_uint<WD_PIX> val1 = (isleft != 1) ? (J44) : 129;
+    const ap_uint<WD_PIX> val2 = (isleft != 1) ? (K44) : 129;
+    const ap_uint<WD_PIX> val3 = (isleft != 1) ? (L44) : 129;
+    SB_SET_LINE_VAL(sb, 0, WD_PIX, val0);
+    SB_SET_LINE_VAL(sb, 1, WD_PIX, val1);
+    SB_SET_LINE_VAL(sb, 2, WD_PIX, val2);
+    SB_SET_LINE_VAL(sb, 3, WD_PIX, val3);
+    return sb;
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_int<WD_WHT * 16> hls_FTransformWHT(ap_int<WD_DCT * 16> in) {
+    // FF:0, lut:1248; 4.12+0.62ns; Latency:1
+    // input is 12b signed
+    ap_int<WD_WHT * 16> out;
+    ap_int<WD_DCT + 2> tmp[16];
+    for (int i = 0; i < 4; ++i) {
+#pragma HLS unroll
+        ap_int<WD_DCT + 1> a0 =
+            ((ap_int<WD_DCT>)VCT_GET(in, 0 + i * 4, WD_DCT) + (ap_int<WD_DCT>)VCT_GET(in, 2 + i * 4, WD_DCT)); // 13b
+        ap_int<WD_DCT + 1> a1 =
+            ((ap_int<WD_DCT>)VCT_GET(in, 1 + i * 4, WD_DCT) + (ap_int<WD_DCT>)VCT_GET(in, 3 + i * 4, WD_DCT));
+        ap_int<WD_DCT + 1> a2 =
+            ((ap_int<WD_DCT>)VCT_GET(in, 1 + i * 4, WD_DCT) - (ap_int<WD_DCT>)VCT_GET(in, 3 + i * 4, WD_DCT));
+        ap_int<WD_DCT + 1> a3 =
+            ((ap_int<WD_DCT>)VCT_GET(in, 0 + i * 4, WD_DCT) - (ap_int<WD_DCT>)VCT_GET(in, 2 + i * 4, WD_DCT));
+        tmp[0 + i * 4] = a0 + a1; // 14b
+        tmp[1 + i * 4] = a3 + a2;
+        tmp[2 + i * 4] = a3 - a2;
+        tmp[3 + i * 4] = a0 - a1;
+    }
+    for (int i = 0; i < 4; ++i) {
+#pragma HLS unroll
+        ap_int<WD_DCT + 3> a0 = (tmp[0 + i] + tmp[8 + i]); // 15b
+        ap_int<WD_DCT + 3> a1 = (tmp[4 + i] + tmp[12 + i]);
+        ap_int<WD_DCT + 3> a2 = (tmp[4 + i] - tmp[12 + i]);
+        ap_int<WD_DCT + 3> a3 = (tmp[0 + i] - tmp[8 + i]);
+        ap_int<WD_DCT + 4> b0 = a0 + a1; // 16b
+        ap_int<WD_DCT + 4> b1 = a3 + a2;
+        ap_int<WD_DCT + 4> b2 = a3 - a2;
+        ap_int<WD_DCT + 4> b3 = a0 - a1;
+        VCT_GET(out, 0 + i, WD_WHT) = b0 >> 1; // 15b
+        VCT_GET(out, 4 + i, WD_WHT) = b1 >> 1;
+        VCT_GET(out, 8 + i, WD_WHT) = b2 >> 1;
+        VCT_GET(out, 12 + i, WD_WHT) = b3 >> 1;
+    }
+    return out;
+}
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_SetBestAs4_mode_widen(ap_uint<WD_MODE> ap_y_top_mode[MAX_NUM_MB_W * 4],
+                               ap_uint<WD_MODE> ap_y_left_mode[4],
+                               ap_uint<WD_MODE> ap_y4_top_c_mode[16],
+                               ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                               ap_uint<WD_MODE * 16>* ap_y_mode_b,
+                               ap_uint<LG2_MAX_NUM_MB_W + 2> x_sb_w) {
+    for (int y = 0; y < 4; y++) {
+#pragma HLS unroll
+        for (int x = 0; x < 4; x++) {
+#pragma HLS unroll
+            SB_GET((*ap_y_mode_b), y, x, WD_MODE) = ap_y4_top_c_mode[x + y * 4];
+            if (x == 3) ap_y_left_mode[y] = ap_y4_top_c_mode[x + y * 4];
+            if (y == 3) ap_y_top_mode[x_sb_w + x] = ap_y4_top_c_mode[x + y * 4];
+        }
+    }
+};
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+void hls_SetBestAs16_mode_widen(ap_uint<WD_MODE> ap_y_top_mode[MAX_NUM_MB_W * 4],
+                                ap_uint<WD_MODE> ap_y_left_mode[4],
+                                ap_uint<WD_MODE> ap_y16_mode_c,
+                                ap_uint<WD_MODE * 16>* ap_y_mode_b,
+                                ap_uint<LG2_MAX_NUM_MB_W + 2> x_sb_w) {
+    for (int y = 0; y < 4; y++) {
+#pragma HLS unroll
+        for (int x = 0; x < 4; x++) {
+#pragma HLS unroll
+            SB_GET((*ap_y_mode_b), y, x, WD_MODE) = ap_y16_mode_c; // it->ap_rd_y16_b->mode;
+            if (x == 3) ap_y_left_mode[y] = ap_y16_mode_c;         // it->ap_rd_y16_b->mode;
+            if (y == 3) ap_y_top_mode[x_sb_w + x] = ap_y16_mode_c; // it->ap_rd_y16_b->mode;
+        }
+    }
+};
+//////////======================================================================/////////////////////////////
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+//////////======================================================================/////////////////////////////
+void TopVp8_RecordCoeff_hls_cnt(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                hls::stream<ap_int<64> >* str_pred,
+                                hls::stream<ap_int<6> >* str_ret,
+                                // output
+                                hls::stream<ap_uint<1> >& str_mb_type,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc2,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y2,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv2,
+                                hls::stream<ap_int<64> >* str_pred2,
+                                hls::stream<ap_int<6> >* str_ret2,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                                hls::stream<ap_uint<8> >& str_cnt_dc,
+                                hls::stream<ap_uint<8> >& str_cnt_ac,
+                                hls::stream<ap_uint<8> >& str_cnt_uv) {
+    hls::stream<ap_uint<2> > str_dc_ctx;
+    hls::stream<ap_uint<2> > str_ac_ctx;
+    hls::stream<ap_uint<2> > str_uv_ctx;
+    hls::stream<ap_int<5> > str_dc_last;
+    hls::stream<ap_int<5> > str_ac_last;
+    hls::stream<ap_int<5> > str_uv_last;
+    hls::stream<ap_int<WD_LEVEL> > str_dc;
+    hls::stream<ap_int<WD_LEVEL> > str_ac;
+    hls::stream<ap_int<WD_LEVEL> > str_uv;
+
+    ap_NoneZero ap_nz;
+    ap_uint<9> left_nz_dc = 0;
+    ap_uint<9> ap_left_nz = 0;
+    for (int y_mb = 0; y_mb < mb_h; y_mb++) { // printf("\ny=%2d: ", y_mb);
+#pragma HLS LOOP_TRIPCOUNT min = 68 max = 256
+#pragma HLS PIPELINE off
+    RECORD_COEFF:
+        for (int x_mb = 0; x_mb < mb_w; x_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 120 max = 256
+#pragma HLS PIPELINE
+            if (x_mb == 0) {
+                left_nz_dc = 0;
+                ap_left_nz = 0;
+            }
+            // loading the constx about nz
+            ap_uint<9> ap_top_nz = ap_nz.load_top9(x_mb, y_mb);
+            // ap_uint<9> ap_left_nz = ap_nz.load_left9(x_mb);
+
+            // ap_uint<9> top_nz_ = ap_top_nz;
+            ap_uint<9> top_nz_dc = ap_top_nz;
+            ap_uint<9> top_nz_y = ap_top_nz;
+            ap_uint<9> top_nz_uv = ap_top_nz;
+
+            // ap_uint<9> left_nz_ = ap_left_nz;
+            ap_uint<9> left_nz_y = ap_left_nz;
+            ap_uint<9> left_nz_uv = ap_left_nz;
+
+            left_nz_dc =
+                RecordCoeff_dataflow(str_level_dc, str_level_y, str_level_uv, str_pred, str_ret,
+                                     // output
+                                     str_mb_type, str_level_dc2, str_level_y2, str_level_uv2, str_pred2, str_ret2,
+                                     str_rec_dc, str_rec_ac, str_rec_uv, str_cnt_dc, str_cnt_ac, str_cnt_uv,
+                                     top_nz_dc,  //
+                                     left_nz_dc, // = ap_left_nz;
+                                     top_nz_y,   // = ap_top_nz;
+                                     left_nz_y,  // = ap_left_nz;
+                                     top_nz_uv,  // = ap_top_nz;
+                                     left_nz_uv  // = ap_left_nz;
+                                     );
+            top_nz_dc[8] = left_nz_dc[0];
+            left_nz_dc[0] = 0;
+            ap_uint<9> top_nz_;
+            top_nz_(3, 0) = top_nz_y(3, 0);
+            top_nz_(7, 4) = top_nz_uv(7, 4);
+            top_nz_[8] = top_nz_dc[8];
+
+            ap_uint<9> left_nz_;
+            left_nz_(3, 0) = left_nz_y(3, 0);
+            left_nz_(7, 4) = left_nz_uv(7, 4);
+            ap_uint<25> nz = 0;
+            nz |= (ap_uint<25>)((top_nz_[0] << 12) | (top_nz_[1] << 13));
+            nz |= (ap_uint<25>)((top_nz_[2] << 14) | (top_nz_[3] << 15));
+            nz |= (ap_uint<25>)((top_nz_[4] << 18) | (top_nz_[5] << 19));
+            nz |= (ap_uint<25>)((top_nz_[6] << 22) | (top_nz_[7] << 23));
+            nz |= (ap_uint<25>)((top_nz_[8] << 24)); // we propagate the _top_ bit, esp. for intra4
+            // left
+            nz |= (ap_uint<25>)((left_nz_[0] << 3) | (left_nz_[1] << 7));
+            nz |= (ap_uint<25>)((left_nz_[2] << 11));
+            nz |= (ap_uint<25>)((left_nz_[4] << 17) | (left_nz_[6] << 21));
+
+            ap_nz.left_nz[8] = left_nz_[8];
+            ap_nz.nz_current = nz; //*it->nz_;
+            ap_nz.store_nz(x_mb);
+
+            ap_left_nz[0] = nz(3, 3);
+            ap_left_nz[1] = nz(7, 7);
+            ap_left_nz[2] = nz(11, 11);
+            ap_left_nz[3] = nz(15, 15);
+            // left-U
+            ap_left_nz[4] = nz(17, 17);
+            ap_left_nz[5] = nz(19, 19);
+            // left-V
+            ap_left_nz[6] = nz(21, 21);
+            ap_left_nz[7] = nz(23, 23);
+        }
+    }
+}
+
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+ap_uint<9> RecordCoeff_dataflow(hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                hls::stream<ap_int<64> >* str_pred,
+                                hls::stream<ap_int<6> >* str_ret,
+                                // output
+                                hls::stream<ap_uint<1> >& str_mb_type,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc2,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y2,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv2,
+                                hls::stream<ap_int<64> >* str_pred2,
+                                hls::stream<ap_int<6> >* str_ret2,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                                hls::stream<ap_uint<8> >& str_cnt_dc,
+                                hls::stream<ap_uint<8> >& str_cnt_ac,
+                                hls::stream<ap_uint<8> >& str_cnt_uv,
+                                ap_uint<9>& top_nz_dc, //
+                                ap_uint<9> left_nz_dc, // = ap_left_nz;
+                                ap_uint<9>& top_nz_y,  // = ap_top_nz;
+                                ap_uint<9>& left_nz_y, // = ap_left_nz;
+                                ap_uint<9>& top_nz_uv, // = ap_top_nz;
+                                ap_uint<9>& left_nz_uv // = ap_left_nz;
+                                ) {
+#pragma HLS INLINE OFF
+#pragma HLS DATAFLOW
+    // for old  pred pass
+    ap_uint<64> pred = str_pred->read();
+    str_pred2->write(pred);
+    // for old ret pass
+    ap_uint<6> ret = str_ret->read();
+    str_ret2->write(ret);
+    // get mb_type
+    ap_uint<1> mb_type = ret(4, 4);
+    str_mb_type.write(mb_type);
+    ap_uint<9> leftreturn = RecordCoeff_dataflow_dc(mb_type, str_level_dc, str_level_dc2, str_rec_dc, str_cnt_dc,
+                                                    top_nz_dc, //
+                                                    left_nz_dc);
+    RecordCoeff_dataflow_y(mb_type, str_level_y, str_level_y2, str_rec_ac, str_cnt_ac,
+                           top_nz_y, // = ap_top_nz;
+                           left_nz_y);
+    RecordCoeff_dataflow_uv(str_level_uv, str_level_uv2, str_rec_uv, str_cnt_uv,
+                            top_nz_uv, // = ap_top_nz;
+                            left_nz_uv // = ap_left_nz;
+                            );
+    return leftreturn;
+}
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+ap_uint<9> RecordCoeff_dataflow_dc(ap_uint<1> mb_type,
+                                   hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                   // output
+                                   hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc2,
+                                   hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                                   hls::stream<ap_uint<8> >& str_cnt_dc,
+                                   ap_uint<9>& top_nz_dc, //
+                                   ap_uint<9> left_nz_dc  // = ap_left_nz;
+                                   ) {
+#pragma HLS INLINE OFF
+    ap_int<WD_LEVEL* 16> tmp16 = str_level_dc->read();
+    str_level_dc2->write(tmp16);
+    if (mb_type == 1) { // i16x16
+        ap_uint<2> ctx = top_nz_dc[8] + left_nz_dc[8];
+        ap_int<5> last = FindLast(tmp16);
+        VP8RecordCoeffs_hls_str_w_cnt(ctx, tmp16, 0, last, str_rec_dc, str_cnt_dc);
+        top_nz_dc[8] = left_nz_dc[8] = last < 0 ? 0 : 1;
+        int b = left_nz_dc[8];
+        // printf("%d",b);
+    } // else printf(" ");//int a=top_nz_dc;int b=left_nz_dc;printf("%d",b);
+    return left_nz_dc & 256 | top_nz_dc[8];
+}
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+
+void RecordCoeff_dataflow_y(ap_uint<1> mb_type,
+                            hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                            // output
+                            hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y2,
+                            hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                            hls::stream<ap_uint<8> >& str_cnt_ac,
+                            ap_uint<9>& top_nz_y, // = ap_top_nz;
+                            ap_uint<9>& left_nz_y // = ap_left_nz;
+                            ) {
+#pragma HLS INLINE OFF
+    int x, y;
+    // luma-AC
+    for (y = 0; y < 4; ++y) {
+#pragma HLS PIPELINE
+    RECORD_COEFF_Y_IN:
+        for (x = 0; x < 4; ++x) {
+#pragma HLS PIPELINE
+            ap_uint<2> ctx = top_nz_y[x] + left_nz_y[y];
+            ap_int<WD_LEVEL* 16> tmp = str_level_y->read();
+            str_level_y2->write(tmp); // for old
+            ap_int<5> last = FindLast(tmp);
+            VP8RecordCoeffs_hls_str_w_cnt(ctx, tmp, mb_type == 1, last, str_rec_ac, str_cnt_ac);
+            top_nz_y[x] = left_nz_y[y] = last < 0 ? 0 : 1;
+        }
+    }
+}
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+
+void RecordCoeff_dataflow_uv(hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                             // output
+                             hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv2,
+                             hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                             hls::stream<ap_uint<8> >& str_cnt_uv,
+                             ap_uint<9>& top_nz_uv, // = ap_top_nz;
+                             ap_uint<9>& left_nz_uv // = ap_left_nz;
+                             ) {
+#pragma HLS INLINE OFF
+    int x, y, ch;
+// U/V
+RECORD_COEFF_UV_0:
+    for (ch = 0; ch <= 2; ch += 2) {
+    RECORD_COEFF_UV_1:
+        for (y = 0; y < 2; ++y) {
+        RECORD_COEFF_UV_2:
+            for (x = 0; x < 2; ++x) {
+#pragma HLS PIPELINE
+                ap_uint<2> ctx = top_nz_uv[4 + ch + x] + left_nz_uv[4 + ch + y];
+                ap_int<WD_LEVEL* 16> tmp = str_level_uv->read();
+                str_level_uv2->write(tmp); // for old
+                ap_int<5> last = FindLast(tmp);
+                VP8RecordCoeffs_hls_str_w_cnt(ctx, tmp, 0, last, str_rec_uv, str_cnt_uv);
+                top_nz_uv[4 + ch + x] = left_nz_uv[4 + ch + y] = last < 0 ? 0 : 1;
+            }
+        }
+    }
+}
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+
+static ap_int<5> FindLast(ap_int<WD_LEVEL * 16> level) {
+#pragma HLS PIPELINE II = 1
+    ap_int<5> ret = 15;
+    for (ret = 15; ret > -1; ret--) {
+        ap_int<WD_LEVEL> tmp = VCT_GET(level, ret, WD_LEVEL);
+        if (tmp != 0) return ret;
+    }
+    return ret;
+}
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+int VP8RecordCoeffs_hls_str_w_cnt(ap_uint<2> ctx,
+                                  ap_int<WD_LEVEL * 16> coeffs,
+                                  ap_uint<1> first,
+                                  ap_int<5> last,
+                                  hls::stream<ap_uint<11> >& str_rec,
+                                  hls::stream<ap_uint<8> >& str_cnt) {
+    ap_uint<8> cnt = 0;
+    int n = first;
+    ap_uint<3> band_a = first;
+    ap_uint<2> ctx_a = ctx;
+    ap_uint<4> off_a = 0;
+    if (last < 0) {
+        Record_str(str_rec, 1, 0, band_a, ctx_a, 0);
+        cnt++; // printf("cnt=%d \n",cnt.VAL );
+        str_cnt.write(cnt);
+        return 0;
+    }
+    ap_uint<1> isEealy_0 = 0;
+RECORD_COEFF_STR:
+    for (; n <= last; n++) {
+#pragma HLS LOOP_TRIPCOUNT min = 0 max = 16
+#pragma HLS PIPELINE
+        ap_int<WD_LEVEL> v;
+        if (isEealy_0 == 0) {
+            Record_str(str_rec, 0, 1, band_a, ctx_a, 0);
+            cnt++; // printf("cnt=%d \n;",cnt.VAL );
+        }
+        v = (ap_int<WD_LEVEL>)VCT_GET(coeffs, n, WD_LEVEL);
+        if (v == 0) {
+            isEealy_0 = 1;
+            Record_str(str_rec, 0, 0, band_a, ctx_a, 1);
+            cnt++; // printf("cnt=%d\n ;",cnt.VAL );
+            band_a = VP8EncBands_hls(n + 1);
+            ctx_a = 0;
+            continue;
+        }
+        isEealy_0 = 0;
+        Record_str(str_rec, 0, 1, band_a, ctx_a, 1);
+        cnt++; // printf("cnt=%d \n;",cnt.VAL );
+        Record_str(str_rec, 0, 2u < (unsigned int)(v + 1), band_a, ctx_a, 2);
+        cnt++;                               // printf("cnt=%d \n;",cnt.VAL );
+        if (!(2u < (unsigned int)(v + 1))) { // v = -1 or 1
+            band_a = VP8EncBands_hls(n + 1);
+            ctx_a = 1;
+        } else {
+            if (v < 0) v = -v;
+            if (v > 67) v = 67;
+
+            ap_uint<9> bits = VP8LevelCodes_hls[v - 1][1];
+            int pattern = VP8LevelCodes_hls[v - 1][0];
+            int i;
+        RECORD_COEFF_STR_INNER:
+            for (i = 0; (pattern >>= 1) != 0; ++i) {
+#pragma HLS LOOP_TRIPCOUNT min = 1 max = 8
+#pragma HLS PIPELINE
+                const int mask = 2 << i;
+                if (pattern & 1) {
+                    Record_str(str_rec, 0, !!(bits & mask), band_a, ctx_a, 3 + i);
+                    cnt++; // printf("cnt=%d\n ;",cnt.VAL );
+                }
+            }
+            band_a = VP8EncBands_hls(n + 1);
+            ctx_a = 2;
+        }
+    } // while
+    if (n < 16) {
+        Record_str(str_rec, 1, 0, band_a, ctx_a, 0);
+        cnt++; // printf("cnt=%d \n",cnt.VAL );
+    }
+    str_cnt.write(cnt);
+    return 1;
+}
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+void Record_str(hls::stream<ap_uint<11> >& str_rec,
+                ap_uint<1> isEnd,
+                ap_uint<1> bit,
+                ap_uint<3> band,
+                ap_uint<2> ctx,
+                ap_uint<4> off) {
+    //#pragma HLS PIPELINE
+    ap_uint<11> tmp;
+    tmp(10, 10) = isEnd;
+    tmp(9, 9) = bit;
+    tmp(8, 6) = band;
+    tmp(5, 4) = ctx;
+    tmp(3, 0) = off;
+    str_rec.write(tmp);
+}
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+static ap_uint<3> VP8EncBands_hls(ap_uint<5> n) {
+/*const uint8_t VP8EncBands[16 + 1] = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0  };*/
+#pragma HLS INLINE
+    if (n < 4)
+        return n;
+    else if (n == 4)
+        return 6;
+    else if (n == 5)
+        return 4;
+    else if (n == 6)
+        return 5;
+    else if (n == 15)
+        return 7;
+    else if (n == 16)
+        return 0;
+    else
+        return 6;
+}
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+
+//////////======================================================================/////////////////////////////
+//////////============  TopVp8_RecordProb_hls_cnt                    ===========/////////////////////////////
+//////////======================================================================/////////////////////////////
+int TopVp8_RecordProb_hls_cnt(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                              ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                              hls::stream<ap_uint<1> >& str_mb_type,
+                              hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                              hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                              hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                              hls::stream<ap_uint<8> >& str_cnt_dc,
+                              hls::stream<ap_uint<8> >& str_cnt_ac,
+                              hls::stream<ap_uint<8> >& str_cnt_uv,
+                              uint8_t* pout_prob // 4, 8, 3,11
+                              ) {
+    //#pragma HLS INTERFACE m_axi port=pout_prob offset=slave bundle=gmem depth=4*8*3*11
+    //#pragma HLS INTERFACE s_axilite port=pout_prob bundle=control
+    //#pragma HLS INTERFACE s_axilite port=return bundle=control
+    uint32_t stats[4][8][3][11];
+#pragma HLS ARRAY_PARTITION variable = stats complete dim = 1
+    uint8_t p_coeffs[4][8][3][11];
+#pragma HLS ARRAY_PARTITION variable = p_coeffs complete dim = 1
+    int t, b, c, p;
+    for (t = 0; t < 4; ++t)
+#pragma HLS UNROLL
+    RECORD_PROB_INIT_B:
+        for (b = 0; b < 8; ++b)
+        C:
+            for (c = 0; c < 3; ++c)
+            P:
+                for (p = 0; p < 11; ++p) {
+#pragma HLS PIPELINE
+                    stats[t][b][c][p] = 0;
+                    p_coeffs[t][b][c][p] = hls_VP8CoeffsProba0[t][b][c][p];
+                }
+
+RECORD_PROB_READ_Y:
+    for (int y_mb = 0; y_mb < mb_h; y_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 68 max = 256
+    X:
+        for (int x_mb = 0; x_mb < mb_w; x_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 120 max = 256
+            //#pragma HLS PIPELINE
+            ap_uint<1> type_mb = str_mb_type.read();
+            RecordPorb_ReadCoeff_dataflow2_cnt(type_mb, str_rec_dc, str_rec_ac, str_rec_uv, str_cnt_dc, str_cnt_ac,
+                                               str_cnt_uv, stats[1], stats[0], stats[3], stats[2]);
+        }
+    }
+    int dirty = 1;
+    int size = sizeof(p_coeffs);
+    FinalizeTokenProbas_hls(stats, p_coeffs, &dirty);
+    memcpy(pout_prob, p_coeffs, size);
+    pout_prob[SIZE8_MEM_PROB - 1] = dirty;
+    return dirty;
+}
+
+void TopVp8_RecordProb_hls_cnt_HideDirty(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                         ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                                         hls::stream<ap_uint<1> >& str_mb_type,
+                                         hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                                         hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                                         hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                                         hls::stream<ap_uint<8> >& str_cnt_dc,
+                                         hls::stream<ap_uint<8> >& str_cnt_ac,
+                                         hls::stream<ap_uint<8> >& str_cnt_uv,
+                                         uint8_t* pout_prob // 4, 8, 3,11
+                                         ) {
+    //#pragma HLS INTERFACE m_axi port=pout_prob offset=slave bundle=gmem depth=4*8*3*11
+    //#pragma HLS INTERFACE s_axilite port=pout_prob bundle=control
+    //#pragma HLS INTERFACE s_axilite port=return bundle=control
+    uint32_t stats[4][8][3][11];
+#pragma HLS ARRAY_PARTITION variable = stats complete dim = 1
+    uint8_t p_coeffs[4][8][3][11];
+#pragma HLS ARRAY_PARTITION variable = p_coeffs complete dim = 1
+    int t, b, c, p;
+    for (t = 0; t < 4; ++t)
+#pragma HLS UNROLL
+        for (b = 0; b < 8; ++b)
+            for (c = 0; c < 3; ++c)
+            HIDEDIRTY_INIT:
+                for (p = 0; p < 11; ++p) {
+#pragma HLS PIPELINE
+                    stats[t][b][c][p] = 0;
+                    p_coeffs[t][b][c][p] = hls_VP8CoeffsProba0[t][b][c][p];
+                }
+
+    for (int y_mb = 0; y_mb < mb_h; y_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 68 max = 256
+        for (int x_mb = 0; x_mb < mb_w; x_mb++) {
+#pragma HLS LOOP_TRIPCOUNT min = 120 max = 256
+            //#pragma HLS PIPELINE
+            ap_uint<1> type_mb = str_mb_type.read();
+            RecordPorb_ReadCoeff_dataflow2_cnt(type_mb, str_rec_dc, str_rec_ac, str_rec_uv, str_cnt_dc, str_cnt_ac,
+                                               str_cnt_uv, stats[1], stats[0], stats[3], stats[2]);
+        }
+    }
+    int dirty = 1;
+    int size = sizeof(p_coeffs);
+    FinalizeTokenProbas_hls(stats, p_coeffs, &dirty);
+    memcpy(pout_prob, p_coeffs, size);
+    pout_prob[SIZE8_MEM_PROB - 1] = dirty;
+}
+
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+void RecordPorb_ReadCoeff_dataflow2_cnt(ap_uint<1> mb_type,
+                                        hls::stream<ap_uint<11> >& str_rec_dc,
+                                        hls::stream<ap_uint<11> >& str_rec_ac,
+                                        hls::stream<ap_uint<11> >& str_rec_uv,
+                                        hls::stream<ap_uint<8> >& str_cnt_dc,
+                                        hls::stream<ap_uint<8> >& str_cnt_ac,
+                                        hls::stream<ap_uint<8> >& str_cnt_uv,
+                                        uint32_t stats_dc[8][3][11],
+                                        uint32_t stats_ac0_dc[8][3][11],
+                                        uint32_t stats_ac3[8][3][11],
+                                        uint32_t stats_uv[8][3][11]) {
+#pragma HLS DATAFLOW
+    RecordPorb_ReadCoeff_dataflow_dc_cnt(mb_type, str_rec_dc, str_cnt_dc, stats_dc);
+    RecordPorb_ReadCoeff_dataflow_ac_cnt(mb_type, str_rec_ac, str_cnt_ac, stats_ac0_dc, stats_ac3);
+    RecordPorb_ReadCoeff_dataflow_uv_cnt(str_rec_uv, str_cnt_uv, stats_uv);
+}
+
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+////RecordPorb_ReadCoeff_dataflow_dc_cnt//////////////////////////
+void RecordPorb_ReadCoeff_dataflow_dc_cnt(ap_uint<1> mb_type,
+                                          hls::stream<ap_uint<11> >& str_rec_dc,
+                                          hls::stream<ap_uint<8> >& str_cnt,
+                                          uint32_t stats_dc[8][3][11]) {
+    if (mb_type == 1) VP8RecordCoeffs_hls_str_r_cnt(str_rec_dc, str_cnt, stats_dc);
+}
+
+////RecordPorb_ReadCoeff_dataflow_ac_cnt//////////////////////////
+void RecordPorb_ReadCoeff_dataflow_ac_cnt(ap_uint<1> mb_type,
+                                          hls::stream<ap_uint<11> >& str_rec_ac,
+                                          hls::stream<ap_uint<8> >& str_cnt,
+                                          uint32_t stats_ac0_dc[8][3][11],
+                                          uint32_t stats_ac3[8][3][11]) {
+    //#pragma HLS PIPELINE
+    for (int i = 0; i < 16; i++)
+        if (mb_type)
+            VP8RecordCoeffs_hls_str_r_cnt(str_rec_ac, str_cnt, stats_ac0_dc);
+        else
+            VP8RecordCoeffs_hls_str_r_cnt(str_rec_ac, str_cnt, stats_ac3);
+}
+////RecordPorb_ReadCoeff_dataflow_uv_cnt//////////////////////////
+void RecordPorb_ReadCoeff_dataflow_uv_cnt(hls::stream<ap_uint<11> >& str_rec_uv,
+                                          hls::stream<ap_uint<8> >& str_cnt,
+                                          uint32_t stats_uv[8][3][11]) {
+    //#pragma HLS PIPELINE
+    for (int i = 0; i < 8; i++) VP8RecordCoeffs_hls_str_r_cnt(str_rec_uv, str_cnt, stats_uv);
+}
+
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+
+//////VP8RecordCoeffs_hls_str_r_cnt//////////////////
+void VP8RecordCoeffs_hls_str_r_cnt(hls::stream<ap_uint<11> >& str_rec,
+                                   hls::stream<ap_uint<8> >& str_cnt,
+                                   uint32_t stats[8][3][11]) {
+#pragma HLS INLINE OFF
+    uint32_t state, state0;
+    ap_uint<1> bit, bit0;
+    ap_uint<9> addr, addr0;
+    addr0 = 0x1ff; // seens never be access
+    addr = 0;
+
+    ap_uint<8> cnt = str_cnt.read();
+RECORD_COEFFS_INNER:
+    for (int i = 0; i < cnt; i++) {
+#pragma HLS dependence array inter false
+#pragma HLS LOOP_TRIPCOUNT min = 18 max = 127
+#pragma HLS PIPELINE II = 1
+        ap_uint<11> tmp = str_rec.read();
+        addr = tmp(8, 0);
+        ap_uint<1> bit = tmp(9, 9);
+
+        ap_uint<3> band = addr(8, 6);
+        ap_uint<2> ctx = addr(5, 4);
+        ap_uint<4> off = addr(3, 0);
+        if (addr != addr0) {
+            state = stats[band][ctx][off];
+        } else {
+            state = state0;
+        }
+        ap_uint<3> band0 = addr0(8, 6);
+        ap_uint<2> ctx0 = addr0(5, 4);
+        ap_uint<4> off0 = addr0(3, 0);
+        if (i != 0) stats[band0][ctx0][off0] = state0;
+
+        state0 = Record_hls(bit, state);
+        addr0 = addr;
+    }
+    ap_uint<3> band0 = addr0(8, 6);
+    ap_uint<2> ctx0 = addr0(5, 4);
+    ap_uint<4> off0 = addr0(3, 0);
+    stats[band0][ctx0][off0] = state0;
+}
+void VP8RecordCoeffs_hls_str_r_cnt_old(hls::stream<ap_uint<11> >& str_rec,
+                                       hls::stream<ap_uint<8> >& str_cnt,
+                                       uint32_t stats[8][3][11]) {
+    ap_uint<8> cnt = str_cnt.read();
+RECORD_COEFFS_OLD:
+    for (int i = 0; i < cnt; i++) {
+#pragma HLS LOOP_TRIPCOUNT min = 18 max = 127
+#pragma HLS PIPELINE II = 2
+        ap_uint<11> tmp = str_rec.read();
+        ap_uint<1> isEnd = tmp(10, 10); // = isEnd;
+        ap_uint<1> bit = tmp(9, 9);     // = bit;
+        ap_uint<3> band = tmp(8, 6);    // = band;
+        ap_uint<2> ctx = tmp(5, 4);     // = ctx;
+        ap_uint<4> off = tmp(3, 0);     // = off;
+        uint32_t state_old = stats[band][ctx][off];
+        stats[band][ctx][off] = Record_hls(bit, state_old);
+    }
+}
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+ap_uint<32> Record_hls(ap_uint<1> bit, ap_uint<32> p) {
+#pragma HLS PIPELINE
+    // ap_uint<32> p = *stats;
+    ap_uint<16> p_h = p(31, 16);
+    ap_uint<16> p_l = p(15, 0);
+    if (p_h == 0xffff) { // an overflow is inbound.
+        p_h = 0x7fff;
+        p_l = (p_l + 1 + (bit << 1)) >> 1;
+    } else {
+        p_h += 1;
+        p_l += bit;
+    }
+    p(31, 16) = p_h;
+    p(15, 0) = p_l;
+    //*stats = p;
+    return p;
+}
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+static uint8_t hls_CalcTokenProba(int nb, int total) { // in fact return value range from 0~255, only needs  8 bits
+    return nb ? (255 - nb * 255 / total) : 255;
+}
+
+static int hls_VP8BitCost(int bit, uint8_t proba) {
+    return !bit ? hls_VP8EntropyCost[proba] : hls_VP8EntropyCost[255 - proba];
+}
+
+static int hls_BranchCost(int nb, int total, int proba) {
+    return nb * hls_VP8BitCost(1, proba) + (total - nb) * hls_VP8BitCost(0, proba);
+}
+int FinalizeTokenProbas_hls(uint32_t p_stats[4][8][3][11], uint8_t p_coeffs_[4][8][3][11], int* dirty) {
+    int has_changed = 0;
+    int size = 0;
+    int t, b, c, p;
+INA_TOKEN_PROB:
+    for (t = 0; t < 4; ++t) {
+        //#pragma HLS UNROLL
+        for (b = 0; b < 8; ++b) {
+        //#pragma HLS PIPELINE
+        C:
+            for (c = 0; c < 3; ++c) {
+            //#pragma HLS PIPELINE
+            P:
+                for (p = 0; p < 11; ++p) {
+#pragma HLS PIPELINE
+                    uint32_t stats = p_stats[t][b][c][p];
+                    // wr//if(stats!=0)// printf("%s [%d][%d][%d][%d]stats:%d\n",
+                    // wr//  printf("t=%d, b=%d, c=%d, p= %d, stats=%x\n",t,b,c,p,stats);//     __FUNCTION__, t, b, c,
+                    // p, stats);
+                    const int nb = (stats >> 0) & 0xffff;
+                    const int total = (stats >> 16) & 0xffff;
+                    const int update_proba = hls_VP8CoeffsUpdateProba[t][b][c][p];
+                    const int old_p = hls_VP8CoeffsProba0[t][b][c][p];
+                    const int new_p = hls_CalcTokenProba(nb, total);
+                    const int old_cost = hls_BranchCost(nb, total, old_p) + hls_VP8BitCost(0, update_proba);
+                    const int new_cost = hls_BranchCost(nb, total, new_p) + hls_VP8BitCost(1, update_proba) + 8 * 256;
+                    const int use_new_p = (old_cost > new_cost);
+                    // printf("%s use_new_p:%d old_cost:%d new_cost:%d\n",
+                    //     __FUNCTION__, use_new_p, old_cost, new_cost);
+                    size += hls_VP8BitCost(use_new_p, update_proba);
+                    if (use_new_p) { // only use proba that seem meaningful enough.
+                        p_coeffs_[t][b][c][p] = new_p;
+                        has_changed |= (new_p != old_p);
+                        // printf("%s has_changed:%d new_p:%d old_p:%d\n",
+                        //   __FUNCTION__, has_changed, new_p, old_p);
+                        size += 8 * 256;
+                    } else {
+                        p_coeffs_[t][b][c][p] = old_p;
+                    }
+                }
+            }
+        }
+    }
+    // printf("%d %d==========================\n", __LINE__, has_changed);
+    *dirty = has_changed;
+    return size;
+}
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_1_6axi(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem1 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem0 depth = 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem2 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem3 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem4 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem5 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+    int p_readinfo[64];
+    memcpy(p_readinfo, p_info, 64 * sizeof(int));
+    ap_uint<32> id_pic;
+    ap_uint<32> mb_line;
+    ap_uint<LG2_MAX_W_PIX> y_stride;
+    ap_uint<LG2_MAX_W_PIX> uv_stride;
+    ap_uint<LG2_MAX_W_PIX> width;
+    ap_uint<LG2_MAX_W_PIX> height;
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w;
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h;
+    ap_uint<WD_LMD> lambda_p16;
+    ap_uint<WD_LMD> lambda_p44;
+    ap_uint<WD_LMD> tlambda;
+    ap_uint<WD_LMD> lambda_uv;
+    ap_uint<WD_LMD> tlambda_m;
+    hls_QMatrix hls_qm1, hls_qm2, hls_qm_uv;
+    ap_int<WD_sharpen * 16> ap_sharpen, ap_sharpen_uv;
+
+    // Initializing image variables, once for one picture
+    { // For convenience, extend the code at top module to show all parameters used by kernel of intra-prediction
+        id_pic = p_readinfo[0];  // reserved for future
+        mb_line = p_readinfo[1]; // reserved for future, to show current line number of mb
+        y_stride = p_readinfo[2];
+        uv_stride = p_readinfo[3];
+        width = p_readinfo[4];
+        height = p_readinfo[5];
+        mb_w = p_readinfo[2 + 2 + 2];
+        mb_h = p_readinfo[3 + 2 + 2];
+        lambda_p16 = p_readinfo[4 + 2 + 2];
+        lambda_p44 = p_readinfo[5 + 2 + 2];
+        tlambda = p_readinfo[6 + 2 + 2];
+        lambda_uv = p_readinfo[7 + 2 + 2];
+        tlambda_m = p_readinfo[8 + 2 + 2];
+
+        hls_qm1.q_0 = p_readinfo[11 + 2]; // quantizer steps
+        hls_qm1.q_n = p_readinfo[12 + 2];
+        hls_qm1.iq_0 = p_readinfo[13 + 2]; // reciprocals fixed point.
+        hls_qm1.iq_n = p_readinfo[14 + 2];
+        hls_qm1.bias_0 = p_readinfo[15 + 2]; // rounding bias
+        hls_qm1.bias_n = p_readinfo[16 + 2];
+
+        hls_qm2.q_0 = p_readinfo[17 + 2]; // quantizer steps
+        hls_qm2.q_n = p_readinfo[18 + 2];
+        hls_qm2.iq_0 = p_readinfo[19 + 2]; // reciprocals fixed point.
+        hls_qm2.iq_n = p_readinfo[20 + 2];
+        hls_qm2.bias_0 = p_readinfo[21 + 2]; // rounding bias
+        hls_qm2.bias_n = p_readinfo[22 + 2];
+
+        hls_qm_uv.q_0 = p_readinfo[23 + 2]; // quantizer steps
+        hls_qm_uv.q_n = p_readinfo[24 + 2];
+        hls_qm_uv.iq_0 = p_readinfo[25 + 2]; // reciprocals fixed point.
+        hls_qm_uv.iq_n = p_readinfo[26 + 2];
+        hls_qm_uv.bias_0 = p_readinfo[27 + 2]; // rounding bias
+        hls_qm_uv.bias_n = p_readinfo[28 + 2];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen, i, WD_sharpen) = p_info[29 + 2 + i];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen_uv, i, WD_sharpen) = p_readinfo[29 + 2 + 16 + i];
+    } // end of initialization
+    int dirty = 0;
+    TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO(id_pic,     // p_info[0],
+                                                      mb_line,    // p_info[1],
+                                                      y_stride,   // p_info[2],  // ,//pic->y_stride,
+                                                      uv_stride,  // p_info[3], // ,//pic->uv_stride
+                                                      width,      // p_info[4],  // ,//pic->width
+                                                      height,     // p_info[5],  // ,//pic->height
+                                                      mb_w,       // p_info[2+2+2],///,
+                                                      mb_h,       // p_info[3+2+2],//,
+                                                      lambda_p16, // p_info[4+2+2],//dqm->lambda_i16_,
+                                                      lambda_p44, // p_info[5+2+2],//dqm->lambda_i4_,
+                                                      tlambda,    // p_info[6+2+2],//dqm->tlambda_,
+                                                      lambda_uv,  // p_info[7+2+2],//dqm->lambda_uv_,
+                                                      tlambda_m,  // p_info[8+2+2],//dqm->lambda_mode_,
+                                                      hls_qm1, hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv,
+                                                      ysrc,       // 4096x4096
+                                                      usrc,       // 2048x2048
+                                                      vsrc,       // 2048x2048
+                                                      pout_level, // 65536*512
+                                                      pout_prob, &dirty);
+}
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_2_6axi(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem1 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem0 depth = 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem2 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem3 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem4 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem5 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+    int p_readinfo[64];
+    memcpy(p_readinfo, p_info, 64 * sizeof(int));
+    ap_uint<32> id_pic;
+    ap_uint<32> mb_line;
+    ap_uint<LG2_MAX_W_PIX> y_stride;
+    ap_uint<LG2_MAX_W_PIX> uv_stride;
+    ap_uint<LG2_MAX_W_PIX> width;
+    ap_uint<LG2_MAX_W_PIX> height;
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w;
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h;
+    ap_uint<WD_LMD> lambda_p16;
+    ap_uint<WD_LMD> lambda_p44;
+    ap_uint<WD_LMD> tlambda;
+    ap_uint<WD_LMD> lambda_uv;
+    ap_uint<WD_LMD> tlambda_m;
+    hls_QMatrix hls_qm1, hls_qm2, hls_qm_uv;
+    ap_int<WD_sharpen * 16> ap_sharpen, ap_sharpen_uv;
+
+    // Initializing image variables, once for one picture
+    { // For convenience, extend the code at top module to show all parameters used by kernel of intra-prediction
+        id_pic = p_readinfo[0];  // reserved for future
+        mb_line = p_readinfo[1]; // reserved for future, to show current line number of mb
+        y_stride = p_readinfo[2];
+        uv_stride = p_readinfo[3];
+        width = p_readinfo[4];
+        height = p_readinfo[5];
+        mb_w = p_readinfo[2 + 2 + 2];
+        mb_h = p_readinfo[3 + 2 + 2];
+        lambda_p16 = p_readinfo[4 + 2 + 2];
+        lambda_p44 = p_readinfo[5 + 2 + 2];
+        tlambda = p_readinfo[6 + 2 + 2];
+        lambda_uv = p_readinfo[7 + 2 + 2];
+        tlambda_m = p_readinfo[8 + 2 + 2];
+
+        hls_qm1.q_0 = p_readinfo[11 + 2]; // quantizer steps
+        hls_qm1.q_n = p_readinfo[12 + 2];
+        hls_qm1.iq_0 = p_readinfo[13 + 2]; // reciprocals fixed point.
+        hls_qm1.iq_n = p_readinfo[14 + 2];
+        hls_qm1.bias_0 = p_readinfo[15 + 2]; // rounding bias
+        hls_qm1.bias_n = p_readinfo[16 + 2];
+
+        hls_qm2.q_0 = p_readinfo[17 + 2]; // quantizer steps
+        hls_qm2.q_n = p_readinfo[18 + 2];
+        hls_qm2.iq_0 = p_readinfo[19 + 2]; // reciprocals fixed point.
+        hls_qm2.iq_n = p_readinfo[20 + 2];
+        hls_qm2.bias_0 = p_readinfo[21 + 2]; // rounding bias
+        hls_qm2.bias_n = p_readinfo[22 + 2];
+
+        hls_qm_uv.q_0 = p_readinfo[23 + 2]; // quantizer steps
+        hls_qm_uv.q_n = p_readinfo[24 + 2];
+        hls_qm_uv.iq_0 = p_readinfo[25 + 2]; // reciprocals fixed point.
+        hls_qm_uv.iq_n = p_readinfo[26 + 2];
+        hls_qm_uv.bias_0 = p_readinfo[27 + 2]; // rounding bias
+        hls_qm_uv.bias_n = p_readinfo[28 + 2];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen, i, WD_sharpen) = p_info[29 + 2 + i];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen_uv, i, WD_sharpen) = p_readinfo[29 + 2 + 16 + i];
+    } // end of initialization
+    int dirty = 0;
+    TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO(id_pic,     // p_info[0],
+                                                      mb_line,    // p_info[1],
+                                                      y_stride,   // p_info[2],  // ,//pic->y_stride,
+                                                      uv_stride,  // p_info[3], // ,//pic->uv_stride
+                                                      width,      // p_info[4],  // ,//pic->width
+                                                      height,     // p_info[5],  // ,//pic->height
+                                                      mb_w,       // p_info[2+2+2],///,
+                                                      mb_h,       // p_info[3+2+2],//,
+                                                      lambda_p16, // p_info[4+2+2],//dqm->lambda_i16_,
+                                                      lambda_p44, // p_info[5+2+2],//dqm->lambda_i4_,
+                                                      tlambda,    // p_info[6+2+2],//dqm->tlambda_,
+                                                      lambda_uv,  // p_info[7+2+2],//dqm->lambda_uv_,
+                                                      tlambda_m,  // p_info[8+2+2],//dqm->lambda_mode_,
+                                                      hls_qm1, hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv,
+                                                      ysrc,       // 4096x4096
+                                                      usrc,       // 2048x2048
+                                                      vsrc,       // 2048x2048
+                                                      pout_level, // 65536*512
+                                                      pout_prob, &dirty);
+}
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_3_6axi(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem1 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem0 depth = 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem2 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem3 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem4 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem5 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+    int p_readinfo[64];
+    memcpy(p_readinfo, p_info, 64 * sizeof(int));
+    ap_uint<32> id_pic;
+    ap_uint<32> mb_line;
+    ap_uint<LG2_MAX_W_PIX> y_stride;
+    ap_uint<LG2_MAX_W_PIX> uv_stride;
+    ap_uint<LG2_MAX_W_PIX> width;
+    ap_uint<LG2_MAX_W_PIX> height;
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w;
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h;
+    ap_uint<WD_LMD> lambda_p16;
+    ap_uint<WD_LMD> lambda_p44;
+    ap_uint<WD_LMD> tlambda;
+    ap_uint<WD_LMD> lambda_uv;
+    ap_uint<WD_LMD> tlambda_m;
+    hls_QMatrix hls_qm1, hls_qm2, hls_qm_uv;
+    ap_int<WD_sharpen * 16> ap_sharpen, ap_sharpen_uv;
+
+    // Initializing image variables, once for one picture
+    { // For convenience, extend the code at top module to show all parameters used by kernel of intra-prediction
+        id_pic = p_readinfo[0];  // reserved for future
+        mb_line = p_readinfo[1]; // reserved for future, to show current line number of mb
+        y_stride = p_readinfo[2];
+        uv_stride = p_readinfo[3];
+        width = p_readinfo[4];
+        height = p_readinfo[5];
+        mb_w = p_readinfo[2 + 2 + 2];
+        mb_h = p_readinfo[3 + 2 + 2];
+        lambda_p16 = p_readinfo[4 + 2 + 2];
+        lambda_p44 = p_readinfo[5 + 2 + 2];
+        tlambda = p_readinfo[6 + 2 + 2];
+        lambda_uv = p_readinfo[7 + 2 + 2];
+        tlambda_m = p_readinfo[8 + 2 + 2];
+
+        hls_qm1.q_0 = p_readinfo[11 + 2]; // quantizer steps
+        hls_qm1.q_n = p_readinfo[12 + 2];
+        hls_qm1.iq_0 = p_readinfo[13 + 2]; // reciprocals fixed point.
+        hls_qm1.iq_n = p_readinfo[14 + 2];
+        hls_qm1.bias_0 = p_readinfo[15 + 2]; // rounding bias
+        hls_qm1.bias_n = p_readinfo[16 + 2];
+
+        hls_qm2.q_0 = p_readinfo[17 + 2]; // quantizer steps
+        hls_qm2.q_n = p_readinfo[18 + 2];
+        hls_qm2.iq_0 = p_readinfo[19 + 2]; // reciprocals fixed point.
+        hls_qm2.iq_n = p_readinfo[20 + 2];
+        hls_qm2.bias_0 = p_readinfo[21 + 2]; // rounding bias
+        hls_qm2.bias_n = p_readinfo[22 + 2];
+
+        hls_qm_uv.q_0 = p_readinfo[23 + 2]; // quantizer steps
+        hls_qm_uv.q_n = p_readinfo[24 + 2];
+        hls_qm_uv.iq_0 = p_readinfo[25 + 2]; // reciprocals fixed point.
+        hls_qm_uv.iq_n = p_readinfo[26 + 2];
+        hls_qm_uv.bias_0 = p_readinfo[27 + 2]; // rounding bias
+        hls_qm_uv.bias_n = p_readinfo[28 + 2];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen, i, WD_sharpen) = p_info[29 + 2 + i];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen_uv, i, WD_sharpen) = p_readinfo[29 + 2 + 16 + i];
+    } // end of initialization
+    int dirty = 0;
+    TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO(id_pic,     // p_info[0],
+                                                      mb_line,    // p_info[1],
+                                                      y_stride,   // p_info[2],  // ,//pic->y_stride,
+                                                      uv_stride,  // p_info[3], // ,//pic->uv_stride
+                                                      width,      // p_info[4],  // ,//pic->width
+                                                      height,     // p_info[5],  // ,//pic->height
+                                                      mb_w,       // p_info[2+2+2],///,
+                                                      mb_h,       // p_info[3+2+2],//,
+                                                      lambda_p16, // p_info[4+2+2],//dqm->lambda_i16_,
+                                                      lambda_p44, // p_info[5+2+2],//dqm->lambda_i4_,
+                                                      tlambda,    // p_info[6+2+2],//dqm->tlambda_,
+                                                      lambda_uv,  // p_info[7+2+2],//dqm->lambda_uv_,
+                                                      tlambda_m,  // p_info[8+2+2],//dqm->lambda_mode_,
+                                                      hls_qm1, hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv,
+                                                      ysrc,       // 4096x4096
+                                                      usrc,       // 2048x2048
+                                                      vsrc,       // 2048x2048
+                                                      pout_level, // 65536*512
+                                                      pout_prob, &dirty);
+}
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_4_6axi(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem1 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem0 depth = 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem2 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem3 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem4 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem5 depth = 2048 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+    int p_readinfo[64];
+    memcpy(p_readinfo, p_info, 64 * sizeof(int));
+    ap_uint<32> id_pic;
+    ap_uint<32> mb_line;
+    ap_uint<LG2_MAX_W_PIX> y_stride;
+    ap_uint<LG2_MAX_W_PIX> uv_stride;
+    ap_uint<LG2_MAX_W_PIX> width;
+    ap_uint<LG2_MAX_W_PIX> height;
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w;
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h;
+    ap_uint<WD_LMD> lambda_p16;
+    ap_uint<WD_LMD> lambda_p44;
+    ap_uint<WD_LMD> tlambda;
+    ap_uint<WD_LMD> lambda_uv;
+    ap_uint<WD_LMD> tlambda_m;
+    hls_QMatrix hls_qm1, hls_qm2, hls_qm_uv;
+    ap_int<WD_sharpen * 16> ap_sharpen, ap_sharpen_uv;
+
+    // Initializing image variables, once for one picture
+    { // For convenience, extend the code at top module to show all parameters used by kernel of intra-prediction
+        id_pic = p_readinfo[0];  // reserved for future
+        mb_line = p_readinfo[1]; // reserved for future, to show current line number of mb
+        y_stride = p_readinfo[2];
+        uv_stride = p_readinfo[3];
+        width = p_readinfo[4];
+        height = p_readinfo[5];
+        mb_w = p_readinfo[2 + 2 + 2];
+        mb_h = p_readinfo[3 + 2 + 2];
+        lambda_p16 = p_readinfo[4 + 2 + 2];
+        lambda_p44 = p_readinfo[5 + 2 + 2];
+        tlambda = p_readinfo[6 + 2 + 2];
+        lambda_uv = p_readinfo[7 + 2 + 2];
+        tlambda_m = p_readinfo[8 + 2 + 2];
+
+        hls_qm1.q_0 = p_readinfo[11 + 2]; // quantizer steps
+        hls_qm1.q_n = p_readinfo[12 + 2];
+        hls_qm1.iq_0 = p_readinfo[13 + 2]; // reciprocals fixed point.
+        hls_qm1.iq_n = p_readinfo[14 + 2];
+        hls_qm1.bias_0 = p_readinfo[15 + 2]; // rounding bias
+        hls_qm1.bias_n = p_readinfo[16 + 2];
+
+        hls_qm2.q_0 = p_readinfo[17 + 2]; // quantizer steps
+        hls_qm2.q_n = p_readinfo[18 + 2];
+        hls_qm2.iq_0 = p_readinfo[19 + 2]; // reciprocals fixed point.
+        hls_qm2.iq_n = p_readinfo[20 + 2];
+        hls_qm2.bias_0 = p_readinfo[21 + 2]; // rounding bias
+        hls_qm2.bias_n = p_readinfo[22 + 2];
+
+        hls_qm_uv.q_0 = p_readinfo[23 + 2]; // quantizer steps
+        hls_qm_uv.q_n = p_readinfo[24 + 2];
+        hls_qm_uv.iq_0 = p_readinfo[25 + 2]; // reciprocals fixed point.
+        hls_qm_uv.iq_n = p_readinfo[26 + 2];
+        hls_qm_uv.bias_0 = p_readinfo[27 + 2]; // rounding bias
+        hls_qm_uv.bias_n = p_readinfo[28 + 2];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen, i, WD_sharpen) = p_info[29 + 2 + i];
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen_uv, i, WD_sharpen) = p_readinfo[29 + 2 + 16 + i];
+    } // end of initialization
+    int dirty = 0;
+    TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO(id_pic,     // p_info[0],
+                                                      mb_line,    // p_info[1],
+                                                      y_stride,   // p_info[2],  // ,//pic->y_stride,
+                                                      uv_stride,  // p_info[3], // ,//pic->uv_stride
+                                                      width,      // p_info[4],  // ,//pic->width
+                                                      height,     // p_info[5],  // ,//pic->height
+                                                      mb_w,       // p_info[2+2+2],///,
+                                                      mb_h,       // p_info[3+2+2],//,
+                                                      lambda_p16, // p_info[4+2+2],//dqm->lambda_i16_,
+                                                      lambda_p44, // p_info[5+2+2],//dqm->lambda_i4_,
+                                                      tlambda,    // p_info[6+2+2],//dqm->tlambda_,
+                                                      lambda_uv,  // p_info[7+2+2],//dqm->lambda_uv_,
+                                                      tlambda_m,  // p_info[8+2+2],//dqm->lambda_mode_,
+                                                      hls_qm1, hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv,
+                                                      ysrc,       // 4096x4096
+                                                      usrc,       // 2048x2048
+                                                      vsrc,       // 2048x2048
+                                                      pout_level, // 65536*512
+                                                      pout_prob, &dirty);
+}
+}
+void kernel_IntraPredLoop2_NoOut_core_wrapper( // 1)	Set protocol of Id_pic : 63take fully use of 4k x 4k buff)
+    int32_t* p_info_mult,                      // 2)	Enlarge size of p_info 64 times
+    uint32_t* ysrc_mult,                       // 3)	The largest size of pout_level, ysrc, usrc and vsrc do not change.
+    uint32_t* usrc_mult,                       // 3)	Offset stride is the MB number x 256/4 for y, x 64/4 for u and v;
+    uint32_t* vsrc_mult,
+    int32_t* pout_level_mult, // 4)	Change protocol of pout_level:
+    uint8_t* pout_prob_mult)  // 5)	Enlarge size of pout_prob 64 times;
+{
+    int pid_mult = p_info_mult[0];
+    ;
+    int32_t* p_info = p_info_mult;
+    uint32_t* ysrc = ysrc_mult; //,             //3)	The largest size of pout_level, ysrc, usrc and vsrc do not
+                                // change.
+    uint32_t* usrc = usrc_mult; //,				//3)	Offset stride is the MB number x 256/4 for y, x
+                                // 64/4 for u and v;
+    uint32_t* vsrc = vsrc_mult; //,
+    int32_t* pout_level = pout_level_mult; //,        //4)	Change protocol of pout_level:
+    uint8_t* pout_prob = pout_prob_mult;   //
+TOP_LOOP:
+    for (int toploop = 0; toploop <= pid_mult; toploop++) {
+        // do{
+        int p_readinfo[64];
+        memcpy(p_readinfo, p_info, 64 * sizeof(int));
+        ap_uint<32> id_pic;
+        ap_uint<32> mb_line;
+        ap_uint<LG2_MAX_W_PIX> y_stride;
+        ap_uint<LG2_MAX_W_PIX> uv_stride;
+        ap_uint<LG2_MAX_W_PIX> width;
+        ap_uint<LG2_MAX_W_PIX> height;
+        ap_uint<LG2_MAX_NUM_MB_W> mb_w;
+        ap_uint<LG2_MAX_NUM_MB_H> mb_h;
+        ap_uint<WD_LMD> lambda_p16;
+        ap_uint<WD_LMD> lambda_p44;
+        ap_uint<WD_LMD> tlambda;
+        ap_uint<WD_LMD> lambda_uv;
+        ap_uint<WD_LMD> tlambda_m;
+        hls_QMatrix hls_qm1, hls_qm2, hls_qm_uv;
+        ap_int<WD_sharpen * 16> ap_sharpen, ap_sharpen_uv;
+
+        // Initializing image variables, once for one picture
+        { // For convenience, extend the code at top module to show all parameters used by kernel of intra-prediction
+            id_pic = p_readinfo[0];  // reserved for future
+            mb_line = p_readinfo[1]; // reserved for future, to show current line number of mb
+            y_stride = p_readinfo[2];
+            uv_stride = p_readinfo[3];
+            width = p_readinfo[4];
+            height = p_readinfo[5];
+            mb_w = p_readinfo[2 + 2 + 2];
+            mb_h = p_readinfo[3 + 2 + 2];
+            lambda_p16 = p_readinfo[4 + 2 + 2];
+            lambda_p44 = p_readinfo[5 + 2 + 2];
+            tlambda = p_readinfo[6 + 2 + 2];
+            lambda_uv = p_readinfo[7 + 2 + 2];
+            tlambda_m = p_readinfo[8 + 2 + 2];
+
+            hls_qm1.q_0 = p_readinfo[11 + 2]; // quantizer steps
+            hls_qm1.q_n = p_readinfo[12 + 2];
+            hls_qm1.iq_0 = p_readinfo[13 + 2]; // reciprocals fixed point.
+            hls_qm1.iq_n = p_readinfo[14 + 2];
+            hls_qm1.bias_0 = p_readinfo[15 + 2]; // rounding bias
+            hls_qm1.bias_n = p_readinfo[16 + 2];
+
+            hls_qm2.q_0 = p_readinfo[17 + 2]; // quantizer steps
+            hls_qm2.q_n = p_readinfo[18 + 2];
+            hls_qm2.iq_0 = p_readinfo[19 + 2]; // reciprocals fixed point.
+            hls_qm2.iq_n = p_readinfo[20 + 2];
+            hls_qm2.bias_0 = p_readinfo[21 + 2]; // rounding bias
+            hls_qm2.bias_n = p_readinfo[22 + 2];
+
+            hls_qm_uv.q_0 = p_readinfo[23 + 2]; // quantizer steps
+            hls_qm_uv.q_n = p_readinfo[24 + 2];
+            hls_qm_uv.iq_0 = p_readinfo[25 + 2]; // reciprocals fixed point.
+            hls_qm_uv.iq_n = p_readinfo[26 + 2];
+            hls_qm_uv.bias_0 = p_readinfo[27 + 2]; // rounding bias
+            hls_qm_uv.bias_n = p_readinfo[28 + 2];
+            for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+                VCT_GET(ap_sharpen, i, WD_sharpen) = p_info[29 + 2 + i];
+            for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+                VCT_GET(ap_sharpen_uv, i, WD_sharpen) = p_readinfo[29 + 2 + 16 + i];
+        } // end of initialization
+
+        int dirty = 0;
+        TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO(id_pic,     // p_info[0],
+                                                          mb_line,    // p_info[1],
+                                                          y_stride,   // p_info[2],  // ,//pic->y_stride,
+                                                          uv_stride,  // p_info[3], // ,//pic->uv_stride
+                                                          width,      // p_info[4],  // ,//pic->width
+                                                          height,     // p_info[5],  // ,//pic->height
+                                                          mb_w,       // p_info[2+2+2],///,
+                                                          mb_h,       // p_info[3+2+2],//,
+                                                          lambda_p16, // p_info[4+2+2],//dqm->lambda_i16_,
+                                                          lambda_p44, // p_info[5+2+2],//dqm->lambda_i4_,
+                                                          tlambda,    // p_info[6+2+2],//dqm->tlambda_,
+                                                          lambda_uv,  // p_info[7+2+2],//dqm->lambda_uv_,
+                                                          tlambda_m,  // p_info[8+2+2],//dqm->lambda_mode_,
+                                                          hls_qm1, hls_qm2, hls_qm_uv, ap_sharpen, ap_sharpen_uv,
+                                                          ysrc,       // 4096x4096
+                                                          usrc,       // 2048x2048
+                                                          vsrc,       // 2048x2048
+                                                          pout_level, // 65536*512
+                                                          pout_prob, &dirty);
+        int num_mb = mb_w * mb_h;
+        int* ptmp_prob = (int*)pout_prob;
+        ptmp_prob[OFF_NUM_MB_32] = num_mb; // will be sued by kernel 2
+        ptmp_prob[OFF_PID_PROB_8BIT / 4] = pid_mult - toploop;
+        int offset_info = Get_Busoffset_info_32bits();
+        int offset_ysrc = Get_Busoffset_ysrc(width * height);
+        int offset_uv = Get_Busoffset_uvsrc(((width + 1) >> 1) * ((height + 1) >> 1));
+        int offset_level = Get_Busoffset_level(num_mb); // 32bits only
+        int offset_prob = 2048;                         // 8 bits only
+        p_info += offset_info;
+        ysrc += offset_ysrc;
+        usrc += offset_uv;
+        vsrc += offset_uv;
+        pout_level += offset_level;
+        pout_prob += offset_prob;
+    } // while( pid_mult!=0);
+}
+
+// namespace xf {
+// namespace codec {
+extern "C" {
+void webp_IntraPredLoop2_NoOut_1(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem0 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem1 depth = 64 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem1 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem2 depth = 2048 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+    /*    for(int i=0;i<5;i++){
+            for(int j=0;j<256;j++){
+                    pout_prob[i*2048+j]=p_info[j]>>24;
+                    pout_prob[i*2048+j]=p_info[j]>>16;
+                    pout_prob[i*2048+j]=p_info[j]>>8;
+                    pout_prob[i*2048+j]=p_info[j];
+
+            }
+            for(int j=0;j<1024;j++){
+
+            if(j==1200-1024)
+                    pout_prob[i*2048+1200]=5-1-i;
+            else
+                    pout_prob[i*2048+1024+j]=(j&(15<<4))+i;
+            }
+        }*/
+    kernel_IntraPredLoop2_NoOut_core_wrapper(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+}
+}
+//} // namespace codec
+//} // namespace xf
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_2(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem0 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem1 depth = 64 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem1 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem2 depth = 2048 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+#ifdef USING_PIC_BURST
+    kernel_IntraPredLoop2_NoOut_core_wrapper(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#else
+    kernel_IntraPredLoop2_NoOut_core(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#endif
+}
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_3(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem0 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem1 depth = 64 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem1 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem2 depth = 2048 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+#ifdef USING_PIC_BURST
+    kernel_IntraPredLoop2_NoOut_core_wrapper(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#else
+    kernel_IntraPredLoop2_NoOut_core(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#endif
+}
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_4(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem0 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem1 depth = 64 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem1 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem2 depth = 2048 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+#ifdef USING_PIC_BURST
+    kernel_IntraPredLoop2_NoOut_core_wrapper(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#else
+    kernel_IntraPredLoop2_NoOut_core(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#endif
+}
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_5(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem0 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem1 depth = 64 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem1 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem2 depth = 2048 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+#ifdef USING_PIC_BURST
+    kernel_IntraPredLoop2_NoOut_core_wrapper(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#else
+    kernel_IntraPredLoop2_NoOut_core(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#endif
+}
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_6(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem0 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem1 depth = 64 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem1 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem2 depth = 2048 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+    kernel_IntraPredLoop2_NoOut_core_wrapper(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+}
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_7(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem0 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem1 depth = 64 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem1 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem2 depth = 2048 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+#ifdef USING_PIC_BURST
+    kernel_IntraPredLoop2_NoOut_core_wrapper(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#else
+    kernel_IntraPredLoop2_NoOut_core(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#endif
+}
+}
+
+extern "C" {
+void kernel_IntraPredLoop2_NoOut_8(
+    int32_t* p_info, uint32_t* ysrc, uint32_t* usrc, uint32_t* vsrc, int32_t* pout_level, uint8_t* pout_prob) {
+#pragma HLS INTERFACE m_axi port = pout_level offset = slave bundle = gmem0 depth =              \
+    65536 * 512 / 2 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = p_info offset = slave bundle = gmem1 depth = 64 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = ysrc offset = slave bundle = gmem1 depth =                    \
+    4096 * 4096 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = usrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = vsrc offset = slave bundle = gmem1 depth =                    \
+    2048 * 2048 / 4 num_read_outstanding = 32 num_write_outstanding = 32 max_read_burst_length = \
+        16 max_write_burst_length = 16
+#pragma HLS INTERFACE m_axi port = pout_prob offset = slave bundle = gmem2 depth = 2048 * 64 num_read_outstanding = \
+    32 num_write_outstanding = 32 max_read_burst_length = 16 max_write_burst_length = 16
+#pragma HLS INTERFACE s_axilite port = p_info bundle = control
+#pragma HLS INTERFACE s_axilite port = ysrc bundle = control
+#pragma HLS INTERFACE s_axilite port = usrc bundle = control
+#pragma HLS INTERFACE s_axilite port = vsrc bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_level bundle = control
+#pragma HLS INTERFACE s_axilite port = pout_prob bundle = control
+#pragma HLS INTERFACE s_axilite port = return bundle = control
+
+#ifdef USING_PIC_BURST
+    kernel_IntraPredLoop2_NoOut_core_wrapper(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#else
+    kernel_IntraPredLoop2_NoOut_core(p_info, ysrc, usrc, vsrc, pout_level, pout_prob);
+#endif
+}
+}
diff --git a/codec/L2/demos/webpEnc/list.rst b/codec/L2/demos/webpEnc/list.rst
new file mode 100644
index 0000000000..47afefe3e9
--- /dev/null
+++ b/codec/L2/demos/webpEnc/list.rst
@@ -0,0 +1 @@
+images/small32x32.png
diff --git a/codec/L2/demos/webpEnc/utils.mk b/codec/L2/demos/webpEnc/utils.mk
new file mode 100644
index 0000000000..0ee80e90da
--- /dev/null
+++ b/codec/L2/demos/webpEnc/utils.mk
@@ -0,0 +1,270 @@
+#
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+
+#get suffix of kernel by PLATFORM
+VITIS_VER = $(shell v++ --version | grep 'v++' | sed 's/^[[:space:]]*//' | sed -e 's/^[*]* v++ v//g' | cut -d " " -f1)
+DEVICE_TYPE = $(shell platforminfo -p $(PLATFORM) | grep 'FPGA Family' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(DEVICE_TYPE), versal)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+LINK_TARGET_FMT := xsa
+else
+LINK_TARGET_FMT := xclbin
+endif
+else
+LINK_TARGET_FMT := xclbin
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+ifeq ($(HOST_ARCH), x86)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#Check OS and setting env for xrt c++ api
+OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
+OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
+
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/codec/L2/include/hw/jpegDec/XAcc_idct.hpp b/codec/L2/include/hw/jpegDec/XAcc_idct.hpp
index 3845874616..71a9774234 100644
--- a/codec/L2/include/hw/jpegDec/XAcc_idct.hpp
+++ b/codec/L2/include/hw/jpegDec/XAcc_idct.hpp
@@ -747,8 +747,8 @@ inline void kernelJpegDecoderTop(ap_uint<AXI_WIDTH>* jpeg_pointer,
 
     //for reset of the decoder
     uint32_t rst_cnt;
-    int rtn;
-    int rtn2;
+    int rtn = 0;
+    int rtn2 = 0;
 
     //tables
 	uint8_t 						   q_tables[2][8][8];
diff --git a/codec/L2/include/hw/jpegDec/XAcc_jpegdecoder.hpp b/codec/L2/include/hw/jpegDec/XAcc_jpegdecoder.hpp
index 83182fb1d8..52ed682558 100644
--- a/codec/L2/include/hw/jpegDec/XAcc_jpegdecoder.hpp
+++ b/codec/L2/include/hw/jpegDec/XAcc_jpegdecoder.hpp
@@ -826,7 +826,7 @@ void mcu_decoder(
 #pragma HLS DATAFLOW
 
     // clang-format off
-	    hls::stream<xf::codec::sos_data> huff_sos_strm;
+	    hls::stream<xf::codec::details::sos_data> huff_sos_strm;
 	#pragma HLS DATA_PACK variable = huff_sos_strm
 	#pragma HLS RESOURCE  variable = huff_sos_strm core = FIFO_LUTRAM
 	#pragma HLS STREAM    variable = huff_sos_strm depth = 32
diff --git a/codec/L2/include/hw/jpegDec/utils_XAcc_jpeg.hpp b/codec/L2/include/hw/jpegDec/utils_XAcc_jpeg.hpp
index b26e0489f6..9fb4a06abe 100644
--- a/codec/L2/include/hw/jpegDec/utils_XAcc_jpeg.hpp
+++ b/codec/L2/include/hw/jpegDec/utils_XAcc_jpeg.hpp
@@ -81,9 +81,14 @@ typedef ap_uint<14> HCODE_T;
 // to decode 420 800*800 need 50*50*4 =10000
 // to decode 444 800*800 need 100*100*3 =30000
 #define MAXCMP_BC (1036800)
+// ------------------------------------------------------------
+#define MAX_NUM_BLOCK88_W (512)
+#define MAX_NUM_BLOCK88_H (512)
+#define MAX_NUM_BLOCK88 (MAX_NUM_BLOCK88_W * MAX_NUM_BLOCK88_H)
 
 namespace xf {
 namespace codec {
+
 // ------------------------------------------------------------
 // input width AXI_WIDTH = 16, means the decoder cloud process 16bits per cycle, in max speed
 // output width OUT_WIDTH = 64,means the decoder meigrate 8 Bytes of YUV per cycle, a faster design to be explore.
@@ -91,7 +96,10 @@ namespace codec {
 #define OUT_WIDTH (64)
 //
 enum COLOR_FORMAT { C400 = 0, C420, C422, C444 };
-//
+
+// ------------------------------------------------------------
+/// all basic information for image (size, idct, tables)
+/// maybe used by the next process like lepton, CNN, ...
 struct bas_info {
     COLOR_FORMAT format;
     uint16_t axi_width[MAX_NUM_COLOR];
@@ -112,6 +120,24 @@ struct bas_info {
     uint8_t hls_mbs[MAX_NUM_COLOR];
     uint32_t all_blocks;
 };
+// ------------------------------------------------------------
+/// image size information for jpeg mcu
+struct img_info {
+    uint8_t hls_cs_cmpc;
+    uint32_t hls_mcuc; // the total mcu
+    uint16_t hls_mcuh; // the horizontal mcu
+    uint16_t hls_mcuv;
+};
+// ------------------------------------------------------------
+/// component size information for jpeg mcu
+struct cmp_info {
+    int sfv; // sample factor vertical
+    int sfh; // sample factor horizontal
+    int mbs; // blocks in mcu
+    int bcv; // block count vertical (interleaved)
+    int bch; // block count horizontal (interleaved)
+    int bc;  // block count (all) (interleaved)
+};
 
 // for IQ IDCT
 //#ifndef __SYNTHESIS__
@@ -122,9 +148,12 @@ typedef uint8_t idct_out_t;
 // typedef ap_uint<8> idct_out_t;
 //#endif
 
-#define MAX_NUM_BLOCK88_W (512)
-#define MAX_NUM_BLOCK88_H (512)
-#define MAX_NUM_BLOCK88 (MAX_NUM_BLOCK88_W * MAX_NUM_BLOCK88_H)
+} // namespace codec
+} // namespace xf
+
+namespace xf {
+namespace codec {
+namespace details {
 //
 const static uint8_t hls_jpeg_zigzag_to_raster[64] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
                                                       12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
@@ -138,15 +167,6 @@ const short hls_icos_base_8192_scaled[64] = {
     8192,   6436,  -11363, 2260,  9633, -9633, -2260,  11363,  -6436, 4433,  -10703, 10703, -4433,
     -4433,  10703, -10703, 4433,  2260, -6436, 9633,   -11363, 11363, -9633, 6436,   -2260,
 };
-
-// ------------------------------------------------------------
-
-} // namespace codec
-} // namespace xf
-
-namespace xf {
-namespace codec {
-
 // ------------------------------------------------------------
 struct hls_huff_DHT {
     unsigned short tbl1[2][CMPhuff][1 << DHT1];
@@ -165,22 +185,9 @@ struct sos_data {
     bool rst; //
     bool end_sos;
 };
+
 // ------------------------------------------------------------
-struct img_info {
-    uint8_t hls_cs_cmpc;
-    uint32_t hls_mcuc; // the total mcu
-    uint16_t hls_mcuh; // the horizontal mcu
-    uint16_t hls_mcuv;
-};
-// ------------------------------------------------------------
-struct cmp_info {
-    int sfv; // sample factor vertical
-    int sfh; // sample factor horizontal
-    int mbs; // blocks in mcu
-    int bcv; // block count vertical (interleaved)
-    int bch; // block count horizontal (interleaved)
-    int bc;  // block count (all) (interleaved)
-};
+} // namespace details
 } // namespace codec
 } // namespace xf
 
diff --git a/codec/L2/include/hw/jxlEnc/hls_cluster_histogram.hpp b/codec/L2/include/hw/jxlEnc/hls_cluster_histogram.hpp
new file mode 100644
index 0000000000..56b8abb8dd
--- /dev/null
+++ b/codec/L2/include/hw/jxlEnc/hls_cluster_histogram.hpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HLS_CLUSTER_HISTOGRAM_HPP
+#define HLS_CLUSTER_HISTOGRAM_HPP
+
+#include "ap_int.h"
+#include "hls_stream.h"
+#include "hls_math.h"
+
+namespace xf {
+namespace codec {
+
+/**
+ * @brief JXL ANS cluster Histogram kernel
+ *
+ * @param config                    configuration for the kernel.
+ * @param histograms0_ptr           histograms for Block Context Map.
+ * @param histo_totalcnt0_ptr       Count of context for histograms for Block Context Map.
+ * @param histo_size0_ptr           size for each context
+ * @param nonempty_histo0_ptr       indicate which context is empty
+ * @param ctx_map0_ptr              the input context map
+ * @param histograms_clusd0_ptr     the clustered histogram
+ * @param histograms_clusdin0_ptr   the context for the clustered histogram
+ * @param histograms1_ptr           histograms for Modular frame tree.
+ * @param histo_totalcnt1_ptr       Count of context for histograms for Modular frame tree.
+ * @param histo_size1_ptr           size for each context
+ * @param nonempty_histo1_ptr       indicate which context is empty
+ * @param ctx_map1_ptr              the input context map
+ * @param histograms_clusd1_ptr     the clustered histogram
+ * @param histograms_clusdin1_ptr   the context for the clustered histogram
+ * @param histograms2_ptr           histograms for code from Modular frame.
+ * @param histo_totalcnt2_ptr       Count of context for histograms for Modular frame.
+ * @param histo_size2_ptr           size for each context
+ * @param nonempty_histo2_ptr       indicate which context is empty
+ * @param ctx_map2_ptr              the input context map
+ * @param histograms_clusd2_ptr     the clustered histogram
+ * @param histograms_clusdin2_ptr   the context for the clustered histogram
+ * @param histograms3_ptr           histograms for coef orders.
+ * @param histo_totalcnt3_ptr       Count of context for histograms for coef orders.
+ * @param histo_size3_ptr           size for each context
+ * @param nonempty_histo3_ptr       indicate which context is empty
+ * @param ctx_map3_ptr              the input context map
+ * @param histograms_clusd3_ptr     the clustered histogram
+ * @param histograms_clusdin3_ptr   the context for the clustered histogram
+ * @param histograms4_ptr           histograms for ac coefficients.
+ * @param histo_totalcnt4_ptr       Count of context for histograms for ac coefficients.
+ * @param histo_size4_ptr           size for each context
+ * @param nonempty_histo4_ptr       indicate which context is empty
+ * @param ctx_map4_ptr              the input context map
+ * @param histograms_clusd4_ptr     the clustered histogram
+ * @param histograms_clusdin4_ptr   the context for the clustered histogram
+ */
+
+extern "C" void JxlEnc_ans_clusterHistogram(uint32_t* config,
+                                            int32_t* histograms0_ptr,
+                                            uint32_t* histo_totalcnt0_ptr,
+                                            uint32_t* histo_size0_ptr,
+
+                                            uint32_t* nonempty_histo0_ptr,
+
+                                            uint8_t* ctx_map0_ptr,
+
+                                            int32_t* histograms_clusd0_ptr,
+                                            uint32_t* histo_size_clusd0_ptr,
+
+                                            int32_t* histograms_clusdin0_ptr,
+                                            //====================
+                                            int32_t* histograms1_ptr,
+                                            uint32_t* histo_totalcnt1_ptr,
+                                            uint32_t* histo_size1_ptr,
+
+                                            uint32_t* nonempty_histo1_ptr,
+
+                                            uint8_t* ctx_map1_ptr,
+
+                                            int32_t* histograms_clusd1_ptr,
+                                            uint32_t* histo_size_clusd1_ptr,
+
+                                            int32_t* histograms_clusdin1_ptr,
+                                            //======================
+                                            int32_t* histograms2_ptr,
+                                            uint32_t* histo_totalcnt2_ptr,
+                                            uint32_t* histo_size2_ptr,
+
+                                            uint32_t* nonempty_histo2_ptr,
+
+                                            uint8_t* ctx_map2_ptr,
+
+                                            int32_t* histograms_clusd2_ptr,
+                                            uint32_t* histo_size_clusd2_ptr,
+
+                                            int32_t* histograms_clusdin2_ptr,
+                                            //======================
+                                            int32_t* histograms3_ptr,
+                                            uint32_t* histo_totalcnt3_ptr,
+                                            uint32_t* histo_size3_ptr,
+
+                                            uint32_t* nonempty_histo3_ptr,
+
+                                            uint8_t* ctx_map3_ptr,
+
+                                            int32_t* histograms_clusd3_ptr,
+                                            uint32_t* histo_size_clusd3_ptr,
+
+                                            int32_t* histograms_clusdin3_ptr,
+                                            //======================
+                                            int32_t* histograms4_ptr,
+                                            uint32_t* histo_totalcnt4_ptr,
+                                            uint32_t* histo_size4_ptr,
+
+                                            uint32_t* nonempty_histo4_ptr,
+
+                                            uint8_t* ctx_map4_ptr,
+
+                                            int32_t* histograms_clusd4_ptr,
+                                            uint32_t* histo_size_clusd4_ptr,
+
+                                            int32_t* histograms_clusdin4_ptr);
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/include/hw/jxlEnc/hls_init_histogram.hpp b/codec/L2/include/hw/jxlEnc/hls_init_histogram.hpp
new file mode 100644
index 0000000000..2631b30321
--- /dev/null
+++ b/codec/L2/include/hw/jxlEnc/hls_init_histogram.hpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HLS_INIT_HISTOGRAM_HPP
+#define HLS_INIT_HISTOGRAM_HPP
+
+#include <ap_int.h>
+#include <hls_stream.h>
+#include <stdint.h>
+
+const int PIXEL_W = 2048;
+const int PIXEL_H = 2048;
+const int FRAME_DIM = 3;
+const int ALL_PIXEL = PIXEL_W * PIXEL_H * FRAME_DIM;
+const int MAX_NUM_BLK88_W = PIXEL_W / 8;
+const int MAX_NUM_BLK88_H = PIXEL_H / 8;
+const int MAX_NUM_BLK88 = MAX_NUM_BLK88_W * MAX_NUM_BLK88_H;
+const int MAX_ORDERS_SIZE = (3 * 64 + 3 * 64 + 3 * 256 + 3 * 1024);
+const int MAX_QF_THRESH_SIZE = 256;
+const int MAX_CTX_MAP_SIZE = 256;
+const int MAX_AC_TOKEN_SIZE = ALL_PIXEL;
+
+namespace xf {
+namespace codec {
+
+/**
+* @brief JXL ANS init Histogram kernel
+*
+* @param config                    configuration for the kernel.
+* @param ac_coef_ordered_ddr       ac coefficients
+* @param strategy_ddr              ac strategy
+* @param qf_ddr                    quant field
+* @param qdc_ddr                   qdc
+* @param ctx_map                   ctx_map ddr
+* @param qf_thresholds             quantfield_thresholds
+* @param ac_tokens_ddr             the ouput of ac tokens
+* @param token0_ptr                tokens for Block Context Map
+* @param token1_ptr                tokens for Modular frame tree
+* @param token2_ptr                tokens for coef orders
+* @param token3_ptr                tokens for Modular frames
+* @param histograms0_ptr           histograms for Block Context Map.
+* @param histo_totalcnt0_ptr       Count of context for histograms for Block Context Map.
+* @param histo_size0_ptr           size for each context
+* @param nonempty_histo0_ptr       indicate which context is empty
+* @param histograms1_ptr           histograms for Modular frame tree.
+* @param histo_totalcnt1_ptr       Count of context for histograms for Modular frame tree.
+* @param histo_size1_ptr           size for each context
+* @param nonempty_histo1_ptr       indicate which context is empty
+* @param histograms2_ptr           histograms for code from Modular frame.
+* @param histo_totalcnt2_ptr       Count of context for histograms for Modular frame.
+* @param histo_size2_ptr           size for each context
+* @param nonempty_histo2_ptr       indicate which context is empty
+* @param histograms3_ptr           histograms for coef orders.
+* @param histo_totalcnt3_ptr       Count of context for histograms for coef orders.
+* @param histo_size3_ptr           size for each context
+* @param nonempty_histo3_ptr       indicate which context is empty
+* @param histograms4_ptr           histograms for ac coefficients.
+* @param histo_totalcnt4_ptr       Count of context for histograms for ac coefficients.
+* @param histo_size4_ptr           size for each context
+* @param nonempty_histo4_ptr       indicate which context is empty
+*/
+
+extern "C" void JxlEnc_ans_initHistogram(
+    //===============================================
+    int config[32], // HBM-7
+    //========================================
+    int32_t ac_coeff_ordered_ddr[ALL_PIXEL],   // HBM-2
+    int32_t strategy_ddr[MAX_NUM_BLK88],       // HBM-3
+    int32_t qf_ddr[MAX_NUM_BLK88],             // HBM-4
+    uint8_t qdc_ddr[MAX_NUM_BLK88],            // HBM-5
+    uint8_t ctx_map[MAX_QF_THRESH_SIZE],       // HBM-6
+    uint32_t qf_thresholds[MAX_CTX_MAP_SIZE],  // HBM-6
+    uint64_t ac_tokens_ddr[MAX_AC_TOKEN_SIZE], // HBM-8
+    //======================================
+    ap_uint<64>* tokens0_ptr, // HBM-9
+    ap_uint<64>* tokens1_ptr, // HBM-10
+    ap_uint<64>* tokens2_ptr, // HBM-11
+    ap_uint<64>* tokens3_ptr, // HBM-12
+    //=====================================
+    int32_t* histograms0_ptr,       // HBM-10
+    uint32_t* histograms_size0_ptr, // HBM-11
+    uint32_t* total_count0_ptr,     // HBM-12
+    uint32_t* nonempty0_ptr,        // HBM-9
+    //=====================================
+    int32_t* histograms1_ptr,       // HBM-10
+    uint32_t* histograms_size1_ptr, // HBM-11
+    uint32_t* total_count1_ptr,     // HBM-12
+    uint32_t* nonempty1_ptr,        // HBM-9
+    //=====================================
+    int32_t* histograms2_ptr,       // HBM-10
+    uint32_t* histograms_size2_ptr, // HBM-11
+    uint32_t* total_count2_ptr,     // HBM-12
+    uint32_t* nonempty2_ptr,        // HBM-9
+    //=====================================
+    int32_t* histograms3_ptr,       // 24
+    uint32_t* histograms_size3_ptr, // 25
+    uint32_t* total_count3_ptr,     // 26
+    uint32_t* nonempty3_ptr,        // 27
+    //=====================================
+    int32_t* histograms4_ptr,       // 28
+    uint32_t* histograms_size4_ptr, // 29
+    uint32_t* total_count4_ptr,     // 30
+    uint32_t* nonempty4_ptr         // 31
+    );
+} // namespace codec
+} // namespace xf
+
+#endif
diff --git a/codec/L2/include/hw/jxlEnc/hls_lossy_enc_compute.hpp b/codec/L2/include/hw/jxlEnc/hls_lossy_enc_compute.hpp
new file mode 100644
index 0000000000..a5db810502
--- /dev/null
+++ b/codec/L2/include/hw/jxlEnc/hls_lossy_enc_compute.hpp
@@ -0,0 +1,3958 @@
+/*
+ * Copyright 2022 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HLS_LOSSY_ENC_COMPUTE_HPP
+#define HLS_LOSSY_ENC_COMPUTE_HPP
+
+#include <stdint.h>
+#include <hls_stream.h>
+#include "hls_math.h"
+#include "stddef.h"
+#include "stdint.h"
+
+typedef ap_fixed<38, 24> ca_x_t;
+typedef ap_fixed<38, 24> cb_x_t;
+typedef ap_fixed<38, 18> ca_b_t;
+typedef ap_fixed<38, 21> cb_b_t;
+
+enum Type {
+    // Regular block size DCT
+    DCT = 0,
+    // Encode pixels without transforming
+    IDENTITY = 1,
+    // Use 2-by-2 DCT
+    DCT2X2 = 2,
+    // Use 4-by-4 DCT
+    DCT4X4 = 3,
+    // Use 16-by-16 DCT
+    DCT16X16 = 4,
+    // Use 32-by-32 DCT
+    DCT32X32 = 5
+};
+
+template <typename T>
+T DivCeil(T a, size_t b) {
+    return (a + b - 1) / b; // 8
+}
+
+template <typename I, typename F>
+inline F bitsToF(I in) {
+    union {
+        I __I;
+        F __F;
+    } __T;
+    __T.__I = in;
+    return __T.__F;
+}
+
+template <typename F, typename I>
+inline I fToBits(F in) {
+    union {
+        I __I;
+        F __F;
+    } __T;
+    __T.__F = in;
+    return __T.__I;
+}
+
+template <typename MType, typename DType>
+union cast;
+
+template <typename DT>
+union cast<DT, int8_t> {
+    DT f;
+    int8_t i;
+};
+
+template <typename DT>
+union cast<DT, int32_t> {
+    DT f;
+    int32_t i;
+};
+
+template <typename DT>
+union cast<DT, uint32_t> {
+    DT f;
+    uint32_t i;
+};
+
+template <typename DT>
+union cast<DT, int64_t> {
+    DT f;
+    int64_t i;
+};
+
+const int PIXEL_W = 2048;
+const int PIXEL_H = 2048;
+const int FRAME_DIM = 3;
+const int ALL_PIXEL = PIXEL_W * PIXEL_H * FRAME_DIM;
+const int BLOCK8_W = PIXEL_W / 8;
+const int BLOCK8_H = PIXEL_H / 8;
+const int BLOCK8_NUM = BLOCK8_W * BLOCK8_H * FRAME_DIM;
+const int TILE_W = PIXEL_W / 64;
+const int TILE_H = PIXEL_H / 64;
+const int MAX_ORDER = 320 * 3 + 1;
+const int MAX_NUM_CONFIG = 32;
+
+const size_t kBlockDim = 8;
+const size_t kColorTileDim = 64;
+const size_t kDCTBlockSize = 64;
+const size_t kEncTileDimInBlocks = 8;
+const int kGlobalScaleDenom = 1 << 16;
+const size_t kColorTileDimInBlocks = 8; // kColorTileDim / kBlockDim
+const int global_scale = 4587;          // global_scale_(global_scale)
+const float global_scale_float = global_scale * (1.0 / kGlobalScaleDenom);
+const float inv_global_scale = 1.0 * kGlobalScaleDenom / global_scale;
+
+static const uint8_t kDefaultColorFactor = 84;
+static float color_scale = 1.0f / (uint32_t)kDefaultColorFactor;
+static const float kYToBRatio = 1.0f;
+static float base_correlation_x = 0.0f;
+static float base_correlation_b = kYToBRatio;
+
+static const float kDefaultQuantBias[4] = {
+    1.0f - 0.05465007330715401f, 1.0f - 0.07005449891748593f, 1.0f - 0.049935103337343655f, 0.145f,
+};
+
+const float qmx8x8[64] = {0,
+                          3150,
+                          3139.258544921875,
+                          2648.63037109375,
+                          2234.68115234375,
+                          1885.427490234375,
+                          1590.758056640625,
+                          1342.1417236328125,
+                          3150,
+                          3150,
+                          3015.8095703125,
+                          2576.583984375,
+                          2188.4150390625,
+                          1853.965576171875,
+                          1568.5406494140625,
+                          1326.029296875,
+                          3139.258544921875,
+                          3015.8095703125,
+                          2726.995361328125,
+                          2389.616455078125,
+                          2062.382568359375,
+                          1765.966552734375,
+                          1505.3934326171875,
+                          1279.74853515625,
+                          2648.63037109375,
+                          2576.583984375,
+                          2389.616455078125,
+                          2144.407470703125,
+                          1885.427490234375,
+                          1637.12109375,
+                          1410.3748779296875,
+                          1208.7896728515625,
+                          2234.68115234375,
+                          2188.4150390625,
+                          2062.382568359375,
+                          1885.427490234375,
+                          1686.2821044921875,
+                          1485.4266357421875,
+                          1294.8450927734375,
+                          1060.5933837890625,
+                          1885.427490234375,
+                          1853.965576171875,
+                          1765.966552734375,
+                          1637.12109375,
+                          1485.4266357421875,
+                          1326.029296875,
+                          1169.4920654296875,
+                          785.9630126953125,
+                          1590.758056640625,
+                          1568.5406494140625,
+                          1505.3934326171875,
+                          1410.3748779296875,
+                          1294.8450927734375,
+                          1169.4920654296875,
+                          838.70172119140625,
+                          558.03729248046875,
+                          1342.1417236328125,
+                          1326.029296875,
+                          1279.74853515625,
+                          1208.7896728515625,
+                          1060.5933837890625,
+                          785.9630126953125,
+                          558.03729248046875,
+                          382.654693603515625};
+const float qmb8x8[64] = {0,
+                          293.959503173828125,
+                          169.4699554443359375,
+                          119.41248321533203125,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          83.5508270263671875,
+                          58.871856689453125,
+                          293.959503173828125,
+                          233.598114013671875,
+                          156.02716064453125,
+                          112.8175048828125,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          81.16471099853515625,
+                          57.425174713134765625,
+                          169.4699554443359375,
+                          156.02716064453125,
+                          126.80493927001953125,
+                          96.60062408447265625,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          74.5768890380859375,
+                          53.37267303466796875,
+                          119.41248321533203125,
+                          112.8175048828125,
+                          96.60062408447265625,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          65.20384979248046875,
+                          47.455181121826171875,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          72.55352020263671875,
+                          54.6778106689453125,
+                          39.419506072998046875,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          85.33333587646484375,
+                          72.55352020263671875,
+                          57.425174713134765625,
+                          44.331756591796875,
+                          29.2122058868408203125,
+                          83.5508270263671875,
+                          81.16471099853515625,
+                          74.5768890380859375,
+                          65.20384979248046875,
+                          54.6778106689453125,
+                          44.331756591796875,
+                          31.1723690032958984375,
+                          20.7407989501953125,
+                          58.871856689453125,
+                          57.425174713134765625,
+                          53.37267303466796875,
+                          47.455181121826171875,
+                          39.419506072998046875,
+                          29.2122058868408203125,
+                          20.7407989501953125,
+                          14.22228240966796875};
+const float qmx16x16[256] = {0,
+                             0,
+                             5616.41552734375,
+                             4437.5478515625,
+                             3710.523681640625,
+                             3312.083740234375,
+                             2956.42822265625,
+                             2638.9638671875,
+                             2378.979736328125,
+                             2146.23095703125,
+                             1936.2532958984375,
+                             1722.1861572265625,
+                             1498.605712890625,
+                             1304.0516357421875,
+                             1134.7548828125,
+                             951.88201904296875,
+                             0,
+                             0,
+                             5312.58251953125,
+                             4271.09716796875,
+                             3658.995849609375,
+                             3275.037109375,
+                             2928.763916015625,
+                             2617.745361328125,
+                             2363.779541015625,
+                             2134.027099609375,
+                             1926.335693359375,
+                             1711.357177734375,
+                             1489.962646484375,
+                             1297.1055908203125,
+                             1129.140380859375,
+                             946.136962890625,
+                             5616.41552734375,
+                             5312.58251953125,
+                             4620.5927734375,
+                             3880.564697265625,
+                             3516.761474609375,
+                             3170.294189453125,
+                             2849.415283203125,
+                             2562.00634765625,
+                             2319.431640625,
+                             2098.26171875,
+                             1897.1728515625,
+                             1679.534423828125,
+                             1464.5052490234375,
+                             1276.60888671875,
+                             1112.54638671875,
+                             929.18414306640625,
+                             4437.5478515625,
+                             4271.09716796875,
+                             3880.564697265625,
+                             3609.647705078125,
+                             3312.083740234375,
+                             3013.74951171875,
+                             2727.90283203125,
+                             2474.977294921875,
+                             2249.396484375,
+                             2041.3057861328125,
+                             1850.436279296875,
+                             1628.6099853515625,
+                             1423.5849609375,
+                             1243.5428466796875,
+                             1077.57275390625,
+                             901.83697509765625,
+                             3710.523681640625,
+                             3658.995849609375,
+                             3516.761474609375,
+                             3312.083740234375,
+                             3073.944580078125,
+                             2824.097412109375,
+                             2580.273681640625,
+                             2363.779541015625,
+                             2158.580810546875,
+                             1966.6195068359375,
+                             1778.0765380859375,
+                             1561.4259033203125,
+                             1369.259765625,
+                             1199.417236328125,
+                             1031.115478515625,
+                             865.35723876953125,
+                             3312.083740234375,
+                             3275.037109375,
+                             3170.294189453125,
+                             3013.74951171875,
+                             2824.097412109375,
+                             2617.7451171875,
+                             2425.913330078125,
+                             2235.929931640625,
+                             2052.44384765625,
+                             1878.2061767578125,
+                             1679.534423828125,
+                             1481.3988037109375,
+                             1304.0516357421875,
+                             1146.111572265625,
+                             975.34478759765625,
+                             821.329833984375,
+                             2956.42822265625,
+                             2928.763916015625,
+                             2849.415283203125,
+                             2727.90283203125,
+                             2580.273681640625,
+                             2425.913330078125,
+                             2263.03759765625,
+                             2098.26171875,
+                             1936.2532958984375,
+                             1766.65966796875,
+                             1570.745849609375,
+                             1392.13525390625,
+                             1230.6845703125,
+                             1077.57275390625,
+                             912.64251708984375,
+                             771.521240234375,
+                             2638.9638671875,
+                             2617.745361328125,
+                             2562.00634765625,
+                             2474.977294921875,
+                             2363.779541015625,
+                             2235.929931640625,
+                             2098.26171875,
+                             1956.3931884765625,
+                             1813.078369140625,
+                             1628.6099853515625,
+                             1456.1728515625,
+                             1297.1055908203125,
+                             1151.8544921875,
+                             993.46435546875,
+                             845.40533447265625,
+                             717.73773193359375,
+                             2378.979736328125,
+                             2363.779541015625,
+                             2319.431640625,
+                             2249.396484375,
+                             2158.580810546875,
+                             2052.44384765625,
+                             1936.2532958984375,
+                             1813.078369140625,
+                             1648.672119140625,
+                             1489.962646484375,
+                             1339.6640625,
+                             1199.41748046875,
+                             1057.3155517578125,
+                             907.21795654296875,
+                             775.87847900390625,
+                             661.70928955078125,
+                             2146.23095703125,
+                             2134.027099609375,
+                             2098.26171875,
+                             2041.3057861328125,
+                             1966.6195068359375,
+                             1878.2061767578125,
+                             1766.65966796875,
+                             1628.6099853515625,
+                             1489.962646484375,
+                             1354.3355712890625,
+                             1224.331787109375,
+                             1098.37109375,
+                             951.88201904296875,
+                             821.329833984375,
+                             706.04150390625,
+                             604.9959716796875,
+                             1936.2532958984375,
+                             1926.335693359375,
+                             1897.1728515625,
+                             1850.436279296875,
+                             1778.0765380859375,
+                             1679.534423828125,
+                             1570.745849609375,
+                             1456.1728515625,
+                             1339.6640625,
+                             1224.331787109375,
+                             1112.546142578125,
+                             975.344482421875,
+                             850.33416748046875,
+                             737.81219482421875,
+                             637.54150390625,
+                             531.86663818359375,
+                             1722.1861572265625,
+                             1711.357177734375,
+                             1679.534423828125,
+                             1628.6099853515625,
+                             1561.4259033203125,
+                             1481.3988037109375,
+                             1392.13525390625,
+                             1297.1055908203125,
+                             1199.41748046875,
+                             1098.37109375,
+                             975.344482421875,
+                             860.30999755859375,
+                             754.41485595703125,
+                             658.18359375,
+                             565.16876220703125,
+                             455.065155029296875,
+                             1498.605712890625,
+                             1489.962646484375,
+                             1464.5052490234375,
+                             1423.5849609375,
+                             1369.259765625,
+                             1304.0516357421875,
+                             1230.6845703125,
+                             1151.8544921875,
+                             1057.3155517578125,
+                             951.88201904296875,
+                             850.33416748046875,
+                             754.41485595703125,
+                             665.2603759765625,
+                             582.76104736328125,
+                             475.56475830078125,
+                             385.666412353515625,
+                             1304.0516357421875,
+                             1297.1055908203125,
+                             1276.60888671875,
+                             1243.5428466796875,
+                             1199.417236328125,
+                             1146.111572265625,
+                             1077.57275390625,
+                             993.46435546875,
+                             907.21795654296875,
+                             821.329833984375,
+                             737.81219482421875,
+                             658.18359375,
+                             582.76104736328125,
+                             482.643035888671875,
+                             396.77593994140625,
+                             324.0394287109375,
+                             1134.7548828125,
+                             1129.140380859375,
+                             1112.54638671875,
+                             1077.57275390625,
+                             1031.115478515625,
+                             975.34478759765625,
+                             912.64251708984375,
+                             845.40533447265625,
+                             775.87847900390625,
+                             706.04150390625,
+                             637.54150390625,
+                             565.16876220703125,
+                             475.56475830078125,
+                             396.77593994140625,
+                             328.516326904296875,
+                             270.136077880859375,
+                             951.88201904296875,
+                             946.136962890625,
+                             929.18414306640625,
+                             901.83697509765625,
+                             865.35723876953125,
+                             821.329833984375,
+                             771.521240234375,
+                             717.73773193359375,
+                             661.70928955078125,
+                             604.9959716796875,
+                             531.86663818359375,
+                             455.065155029296875,
+                             385.666412353515625,
+                             324.0394287109375,
+                             270.136077880859375,
+                             223.608489990234375};
+const float qmb16x16[256] = {0,
+                             0,
+                             615.61383056640625,
+                             448.953399658203125,
+                             337.930267333984375,
+                             263.80755615234375,
+                             205.943115234375,
+                             160.7708892822265625,
+                             141.832733154296875,
+                             126.30164337158203125,
+                             112.47124481201171875,
+                             100.76338958740234375,
+                             91.12081146240234375,
+                             82.40099334716796875,
+                             74.5156097412109375,
+                             58.896236419677734375,
+                             0,
+                             0,
+                             571.40203857421875,
+                             426.5322265625,
+                             327.784393310546875,
+                             257.417816162109375,
+                             201.76556396484375,
+                             157.9664306640625,
+                             140.8123321533203125,
+                             125.4929656982421875,
+                             111.822540283203125,
+                             100.30467987060546875,
+                             90.7403564453125,
+                             82.08327484130859375,
+                             74.2487335205078125,
+                             58.3933258056640625,
+                             615.61383056640625,
+                             571.40203857421875,
+                             473.94189453125,
+                             372.602783203125,
+                             300.644775390625,
+                             239.809600830078125,
+                             190.039825439453125,
+                             154.1826629638671875,
+                             137.8400421142578125,
+                             123.12636566162109375,
+                             109.9174652099609375,
+                             98.95200347900390625,
+                             89.61621856689453125,
+                             81.14296722412109375,
+                             73.45781707763671875,
+                             56.916744232177734375,
+                             448.953399658203125,
+                             426.5322265625,
+                             372.602783203125,
+                             318.224456787109375,
+                             263.80755615234375,
+                             214.746795654296875,
+                             172.8172607421875,
+                             148.2958526611328125,
+                             133.160797119140625,
+                             119.3681488037109375,
+                             106.87210845947265625,
+                             96.77252197265625,
+                             87.79785919189453125,
+                             79.6171722412109375,
+                             70.20831298828125,
+                             54.558437347412109375,
+                             337.930267333984375,
+                             327.784393310546875,
+                             300.644775390625,
+                             263.80755615234375,
+                             224.2069549560546875,
+                             186.3783111572265625,
+                             155.42156982421875,
+                             140.8123321533203125,
+                             127.12058258056640625,
+                             114.4600982666015625,
+                             103.11833953857421875,
+                             93.86804962158203125,
+                             85.36130523681640625,
+                             77.5634307861328125,
+                             65.95937347412109375,
+                             51.458751678466796875,
+                             263.80755615234375,
+                             257.417816162109375,
+                             239.809600830078125,
+                             214.746795654296875,
+                             186.3783111572265625,
+                             157.966400146484375,
+                             144.9885406494140625,
+                             132.263153076171875,
+                             120.10205078125,
+                             108.6804351806640625,
+                             98.95200347900390625,
+                             90.36280059814453125,
+                             82.40099334716796875,
+                             75.0543060302734375,
+                             60.963199615478515625,
+                             47.78974151611328125,
+                             205.943115234375,
+                             201.76556396484375,
+                             190.039825439453125,
+                             172.8172607421875,
+                             155.42156982421875,
+                             144.9885406494140625,
+                             134.070770263671875,
+                             123.12636566162109375,
+                             112.47124481201171875,
+                             102.63896942138671875,
+                             94.27301025390625,
+                             86.3905029296875,
+                             79.02082061767578125,
+                             70.20831298828125,
+                             55.486751556396484375,
+                             43.7368011474609375,
+                             160.7708892822265625,
+                             157.9664306640625,
+                             154.1826629638671875,
+                             148.2958526611328125,
+                             140.8123321533203125,
+                             132.263153076171875,
+                             123.12636566162109375,
+                             113.789886474609375,
+                             104.58271026611328125,
+                             96.77252197265625,
+                             89.247100830078125,
+                             82.08327484130859375,
+                             75.3261566162109375,
+                             62.573711395263671875,
+                             49.786182403564453125,
+                             39.48137664794921875,
+                             141.832733154296875,
+                             140.8123321533203125,
+                             137.8400421142578125,
+                             133.160797119140625,
+                             127.12058258056640625,
+                             120.10205078125,
+                             112.47124481201171875,
+                             104.58271026611328125,
+                             97.63336944580078125,
+                             90.7403564453125,
+                             84.02266693115234375,
+                             77.563446044921875,
+                             68.3460235595703125,
+                             55.02014923095703125,
+                             44.087116241455078125,
+                             35.18759918212890625,
+                             126.30164337158203125,
+                             125.4929656982421875,
+                             123.12636566162109375,
+                             119.3681488037109375,
+                             114.4600982666015625,
+                             108.6804351806640625,
+                             102.63896942138671875,
+                             96.77252197265625,
+                             90.7403564453125,
+                             84.68727874755859375,
+                             78.725555419921875,
+                             72.135589599609375,
+                             58.896236419677734375,
+                             47.78974151611328125,
+                             38.57308197021484375,
+                             30.9930629730224609375,
+                             112.47124481201171875,
+                             111.822540283203125,
+                             109.9174652099609375,
+                             106.87210845947265625,
+                             103.11833953857421875,
+                             98.95200347900390625,
+                             94.27301025390625,
+                             89.247100830078125,
+                             84.02266693115234375,
+                             78.725555419921875,
+                             73.4578094482421875,
+                             60.96317291259765625,
+                             50.197849273681640625,
+                             41.054691314697265625,
+                             33.38103485107421875,
+                             24.7806758880615234375,
+                             100.76338958740234375,
+                             100.30467987060546875,
+                             98.95200347900390625,
+                             96.77252197265625,
+                             93.86804962158203125,
+                             90.36280059814453125,
+                             86.3905029296875,
+                             82.08327484130859375,
+                             77.563446044921875,
+                             72.135589599609375,
+                             60.96317291259765625,
+                             51.034107208251953125,
+                             42.369472503662109375,
+                             34.922313690185546875,
+                             27.726070404052734375,
+                             18.572216033935546875,
+                             91.12081146240234375,
+                             90.7403564453125,
+                             89.61621856689453125,
+                             87.79785919189453125,
+                             85.36130523681640625,
+                             82.40099334716796875,
+                             79.02082061767578125,
+                             75.3261566162109375,
+                             68.3460235595703125,
+                             58.896236419677734375,
+                             50.197849273681640625,
+                             42.369472503662109375,
+                             35.455394744873046875,
+                             29.34313201904296875,
+                             20.1489048004150390625,
+                             13.67640781402587890625,
+                             82.40099334716796875,
+                             82.08327484130859375,
+                             81.14296722412109375,
+                             79.6171722412109375,
+                             77.5634307861328125,
+                             75.0543060302734375,
+                             70.20831298828125,
+                             62.573711395263671875,
+                             55.02014923095703125,
+                             47.78974151611328125,
+                             41.054691314697265625,
+                             34.922313690185546875,
+                             29.34313201904296875,
+                             20.706996917724609375,
+                             14.41384983062744140625,
+                             9.911548614501953125,
+                             74.5156097412109375,
+                             74.2487335205078125,
+                             73.45781707763671875,
+                             70.20831298828125,
+                             65.95937347412109375,
+                             60.963199615478515625,
+                             55.486751556396484375,
+                             49.786182403564453125,
+                             44.087116241455078125,
+                             38.57308197021484375,
+                             33.38103485107421875,
+                             27.726070404052734375,
+                             20.1489048004150390625,
+                             14.41384983062744140625,
+                             10.16626739501953125,
+                             7.0798015594482421875,
+                             58.896236419677734375,
+                             58.3933258056640625,
+                             56.916744232177734375,
+                             54.558437347412109375,
+                             51.458751678466796875,
+                             47.78974151611328125,
+                             43.7368011474609375,
+                             39.48137664794921875,
+                             35.18759918212890625,
+                             30.9930629730224609375,
+                             24.7806758880615234375,
+                             18.572216033935546875,
+                             13.67640781402587890625,
+                             9.911548614501953125,
+                             7.0798015594482421875,
+                             4.99121952056884765625};
+const float qmx32x32[1024] = {0,
+                              0,
+                              0,
+                              0,
+                              10016.1787109375,
+                              8949.0185546875,
+                              7995.55859375,
+                              7162.60107421875,
+                              6422.4755859375,
+                              5758.8291015625,
+                              5163.75830078125,
+                              4630.1767578125,
+                              4151.732421875,
+                              3734.188232421875,
+                              3370.10986328125,
+                              3041.52880859375,
+                              2744.98388671875,
+                              2477.35107421875,
+                              2235.813232421875,
+                              2038.7496337890625,
+                              1932.1097412109375,
+                              1831.0474853515625,
+                              1735.2716064453125,
+                              1644.505615234375,
+                              1558.4873046875,
+                              1476.968017578125,
+                              1386.82666015625,
+                              1301.5286865234375,
+                              1221.4771728515625,
+                              1146.34912109375,
+                              1075.8421630859375,
+                              1009.6715087890625,
+                              0,
+                              0,
+                              0,
+                              0,
+                              9878.224609375,
+                              8849.744140625,
+                              7921.35595703125,
+                              7107.29541015625,
+                              6379.01171875,
+                              5724.1455078125,
+                              5135.74365234375,
+                              4607.32568359375,
+                              4132.939453125,
+                              3719.505126953125,
+                              3357.800537109375,
+                              3031.1572265625,
+                              2736.20654296875,
+                              2469.894287109375,
+                              2229.455810546875,
+                              2035.871337890625,
+                              1929.51806640625,
+                              1828.7081298828125,
+                              1733.1553955078125,
+                              1642.5870361328125,
+                              1556.7445068359375,
+                              1475.38232421875,
+                              1385.1351318359375,
+                              1300,
+                              1220.09375,
+                              1145.095703125,
+                              1074.7049560546875,
+                              1008.638671875,
+                              0,
+                              0,
+                              0,
+                              0,
+                              9497.3408203125,
+                              8569.009765625,
+                              7710.1953125,
+                              6947.08251953125,
+                              6252.30078125,
+                              5622.568359375,
+                              5053.4169921875,
+                              4539.9931640625,
+                              4077.45068359375,
+                              3676.055419921875,
+                              3321.326416015625,
+                              3000.390625,
+                              2710.1435546875,
+                              2447.7333984375,
+                              2210.550537109375,
+                              2027.2841796875,
+                              1921.7830810546875,
+                              1821.7237548828125,
+                              1726.834716796875,
+                              1636.8553466796875,
+                              1551.537353515625,
+                              1470.240966796875,
+                              1380.0811767578125,
+                              1295.431396484375,
+                              1215.958251953125,
+                              1141.3475341796875,
+                              1071.3038330078125,
+                              1005.5491943359375,
+                              0,
+                              0,
+                              0,
+                              0,
+                              8949.0185546875,
+                              8149.28955078125,
+                              7394.22412109375,
+                              6697.34423828125,
+                              6052.48828125,
+                              5461.01953125,
+                              4921.63427734375,
+                              4431.66796875,
+                              3987.818603515625,
+                              3605.5732421875,
+                              3262.003173828125,
+                              2950.239990234375,
+                              2667.58154296875,
+                              2411.486328125,
+                              2179.5849609375,
+                              2013.1302490234375,
+                              1909.0233154296875,
+                              1810.194091796875,
+                              1716.393798828125,
+                              1627.38232421875,
+                              1542.9271240234375,
+                              1460.985595703125,
+                              1371.7230224609375,
+                              1287.8731689453125,
+                              1209.1142578125,
+                              1135.1424560546875,
+                              1065.6719970703125,
+                              1000.4320068359375,
+                              10016.1787109375,
+                              9878.224609375,
+                              9497.3408203125,
+                              8949.0185546875,
+                              8310.703125,
+                              7644.4052734375,
+                              6999.5673828125,
+                              6379.01171875,
+                              5793.93896484375,
+                              5249.58837890625,
+                              4747.62841796875,
+                              4287.62841796875,
+                              3871.052001953125,
+                              3510.74609375,
+                              3181.8896484375,
+                              2882.297607421875,
+                              2609.764404296875,
+                              2362.1328125,
+                              2137.337890625,
+                              1993.63818359375,
+                              1891.4306640625,
+                              1794.2806396484375,
+                              1701.970703125,
+                              1614.285400390625,
+                              1531.013916015625,
+                              1448.1861572265625,
+                              1360.1573486328125,
+                              1277.407958984375,
+                              1199.633056640625,
+                              1126.5428466796875,
+                              1057.8629150390625,
+                              993.33349609375,
+                              8949.0185546875,
+                              8849.744140625,
+                              8569.009765625,
+                              8149.28955078125,
+                              7644.4052734375,
+                              7107.29541015625,
+                              6556.77978515625,
+                              6014.109375,
+                              5492.59033203125,
+                              4999.91259765625,
+                              4539.9931640625,
+                              4114.296875,
+                              3734.188232421875,
+                              3394.959228515625,
+                              3083.605224609375,
+                              2798.61328125,
+                              2538.306396484375,
+                              2300.954345703125,
+                              2084.83349609375,
+                              1969.1136474609375,
+                              1869.26220703125,
+                              1774.201171875,
+                              1683.7496337890625,
+                              1597.7220458984375,
+                              1515.93359375,
+                              1431.994873046875,
+                              1345.5145263671875,
+                              1264.1485595703125,
+                              1187.612060546875,
+                              1115.6322021484375,
+                              1047.949462890625,
+                              984.94073486328125,
+                              7995.55859375,
+                              7921.35595703125,
+                              7710.1953125,
+                              7394.22412109375,
+                              6999.5673828125,
+                              6556.77978515625,
+                              6091.37744140625,
+                              5622.568359375,
+                              5163.75830078125,
+                              4723.701171875,
+                              4307.68896484375,
+                              3918.65771484375,
+                              3578.03271484375,
+                              3262.003173828125,
+                              2970.1337890625,
+                              2701.544189453125,
+                              2455.08642578125,
+                              2229.455810546875,
+                              2041.63623046875,
+                              1939.925048828125,
+                              1842.8297119140625,
+                              1750.22119140625,
+                              1661.9576416015625,
+                              1577.8870849609375,
+                              1497.8533935546875,
+                              1412.5999755859375,
+                              1327.9566650390625,
+                              1248.234619140625,
+                              1173.171875,
+                              1102.515625,
+                              1036.0238037109375,
+                              975.33502197265625,
+                              7162.60107421875,
+                              7107.29541015625,
+                              6947.08251953125,
+                              6697.34423828125,
+                              6379.01171875,
+                              6014.109375,
+                              5622.568359375,
+                              5220.6748046875,
+                              4820.7763671875,
+                              4431.66796875,
+                              4059.243408203125,
+                              3719.505126953125,
+                              3407.500244140625,
+                              3115.7998046875,
+                              2844.60986328125,
+                              2593.611328125,
+                              2362.133056640625,
+                              2149.279541015625,
+                              2004.73095703125,
+                              1906.490966796875,
+                              1812.4891357421875,
+                              1722.6444091796875,
+                              1636.8553466796875,
+                              1555.00537109375,
+                              1476.9681396484375,
+                              1390.2191162109375,
+                              1307.6717529296875,
+                              1229.8292236328125,
+                              1156.454345703125,
+                              1087.3167724609375,
+                              1022.19244384765625,
+                              964.1702880859375,
+                              6422.4755859375,
+                              6379.01171875,
+                              6252.30078125,
+                              6052.48828125,
+                              5793.93896484375,
+                              5492.59033203125,
+                              5163.75830078125,
+                              4820.7763671875,
+                              4474.4306640625,
+                              4132.939453125,
+                              3809.151123046875,
+                              3510.74560546875,
+                              3227.259765625,
+                              2960.15966796875,
+                              2710.1435546875,
+                              2477.3515625,
+                              2261.5234375,
+                              2062.12646484375,
+                              1963.7435302734375,
+                              1869.26220703125,
+                              1778.6273193359375,
+                              1691.8035888671875,
+                              1608.7301025390625,
+                              1529.3251953125,
+                              1450.00341796875,
+                              1365.0950927734375,
+                              1284.86962890625,
+                              1209.1142578125,
+                              1137.61865234375,
+                              1070.173828125,
+                              1006.57757568359375,
+                              951.533935546875,
+                              5758.8291015625,
+                              5724.1455078125,
+                              5622.568359375,
+                              5461.01953125,
+                              5249.58837890625,
+                              4999.91259765625,
+                              4723.701171875,
+                              4431.66796875,
+                              4132.939453125,
+                              3839.8818359375,
+                              3564.3984375,
+                              3297.380859375,
+                              3041.52880859375,
+                              2798.61328125,
+                              2569.677490234375,
+                              2355.211669921875,
+                              2155.288330078125,
+                              2013.1302490234375,
+                              1919.218017578125,
+                              1828.7081298828125,
+                              1741.64990234375,
+                              1658.050537109375,
+                              1577.8870849609375,
+                              1501.1123046875,
+                              1419.6033935546875,
+                              1337.488037109375,
+                              1259.777099609375,
+                              1186.287841796875,
+                              1116.836181640625,
+                              1051.2381591796875,
+                              989.35601806640625,
+                              937.5218505859375,
+                              5163.75830078125,
+                              5135.74365234375,
+                              5053.4169921875,
+                              4921.63427734375,
+                              4747.62841796875,
+                              4539.9931640625,
+                              4307.68896484375,
+                              4059.243408203125,
+                              3809.151123046875,
+                              3564.3984375,
+                              3321.326416015625,
+                              3083.605224609375,
+                              2853.95654296875,
+                              2634.296142578125,
+                              2425.8837890625,
+                              2229.455810546875,
+                              2053.26318359375,
+                              1961.06884765625,
+                              1871.700927734375,
+                              1785.30419921875,
+                              1701.970703125,
+                              1621.7459716796875,
+                              1544.6424560546875,
+                              1470.240966796875,
+                              1386.82666015625,
+                              1307.6715087890625,
+                              1232.6331787109375,
+                              1161.558837890625,
+                              1094.29150390625,
+                              1030.6707763671875,
+                              972.74078369140625,
+                              922.236572265625,
+                              4630.1767578125,
+                              4607.32568359375,
+                              4539.9931640625,
+                              4431.66796875,
+                              4287.62841796875,
+                              4114.296875,
+                              3918.65771484375,
+                              3719.505126953125,
+                              3510.74560546875,
+                              3297.380859375,
+                              3083.605224609375,
+                              2872.7998046875,
+                              2667.58154296875,
+                              2469.89453125,
+                              2281.107177734375,
+                              2102.114501953125,
+                              1993.638427734375,
+                              1906.490966796875,
+                              1821.723876953125,
+                              1739.5189208984375,
+                              1660.0018310546875,
+                              1583.2529296875,
+                              1509.314697265625,
+                              1431.994873046875,
+                              1351.9913330078125,
+                              1275.923828125,
+                              1203.682373046875,
+                              1135.1424560546875,
+                              1070.173828125,
+                              1008.638671875,
+                              954.87835693359375,
+                              905.78668212890625,
+                              4151.732421875,
+                              4132.939453125,
+                              4077.45068359375,
+                              3987.818603515625,
+                              3871.052001953125,
+                              3734.188232421875,
+                              3578.03271484375,
+                              3407.500244140625,
+                              3227.259765625,
+                              3041.52880859375,
+                              2853.95654296875,
+                              2667.58154296875,
+                              2484.843994140625,
+                              2307.630126953125,
+                              2137.337890625,
+                              2015.9456787109375,
+                              1932.1097412109375,
+                              1849.968017578125,
+                              1769.796142578125,
+                              1691.8035888671875,
+                              1616.1446533203125,
+                              1542.9271240234375,
+                              1472.10400390625,
+                              1391.9202880859375,
+                              1315.4149169921875,
+                              1242.5257568359375,
+                              1173.171875,
+                              1107.2579345703125,
+                              1044.676513671875,
+                              985.8211669921875,
+                              935.8944091796875,
+                              888.28326416015625,
+                              3734.188232421875,
+                              3719.505126953125,
+                              3676.055419921875,
+                              3605.5732421875,
+                              3510.74609375,
+                              3394.959228515625,
+                              3262.003173828125,
+                              3115.7998046875,
+                              2960.15966796875,
+                              2798.61328125,
+                              2634.296142578125,
+                              2469.89453125,
+                              2307.630126953125,
+                              2149.279541015625,
+                              2027.2845458984375,
+                              1947.802001953125,
+                              1869.26220703125,
+                              1792.02880859375,
+                              1716.393798828125,
+                              1642.5870361328125,
+                              1570.7830810546875,
+                              1501.1123046875,
+                              1426.662841796875,
+                              1350.36767578125,
+                              1277.407958984375,
+                              1207.7525634765625,
+                              1141.3475341796875,
+                              1078.122314453125,
+                              1017.99298095703125,
+                              964.1702880859375,
+                              915.9161376953125,
+                              869.84039306640625,
+                              3370.10986328125,
+                              3357.800537109375,
+                              3321.326416015625,
+                              3262.003173828125,
+                              3181.8896484375,
+                              3083.605224609375,
+                              2970.1337890625,
+                              2844.60986328125,
+                              2710.1435546875,
+                              2569.677490234375,
+                              2425.8837890625,
+                              2281.107177734375,
+                              2137.337890625,
+                              2027.2845458984375,
+                              1953.087890625,
+                              1879.05322265625,
+                              1805.62060546875,
+                              1733.1553955078125,
+                              1661.9576416015625,
+                              1592.2684326171875,
+                              1524.27880859375,
+                              1455.4775390625,
+                              1380.0811767578125,
+                              1307.6715087890625,
+                              1238.270751953125,
+                              1171.8729248046875,
+                              1108.4483642578125,
+                              1047.949462890625,
+                              990.31439208984375,
+                              941.60955810546875,
+                              895.07000732421875,
+                              850.57257080078125,
+                              3041.52880859375,
+                              3031.1572265625,
+                              3000.390625,
+                              2950.239990234375,
+                              2882.297607421875,
+                              2798.61328125,
+                              2701.544189453125,
+                              2593.611328125,
+                              2477.3515625,
+                              2355.211669921875,
+                              2229.455810546875,
+                              2102.114501953125,
+                              2015.9456787109375,
+                              1947.802001953125,
+                              1879.05322265625,
+                              1810.194091796875,
+                              1741.64990234375,
+                              1673.7799072265625,
+                              1606.8861083984375,
+                              1541.215087890625,
+                              1476.9681396484375,
+                              1403.9237060546875,
+                              1332.708740234375,
+                              1264.1485595703125,
+                              1198.2880859375,
+                              1135.1424560546875,
+                              1074.7049560546875,
+                              1016.9471435546875,
+                              965.0220947265625,
+                              918.27838134765625,
+                              873.48193359375,
+                              830.5924072265625,
+                              2744.98388671875,
+                              2736.20654296875,
+                              2710.1435546875,
+                              2667.58154296875,
+                              2609.764404296875,
+                              2538.306396484375,
+                              2455.08642578125,
+                              2362.133056640625,
+                              2261.5234375,
+                              2155.288330078125,
+                              2053.26318359375,
+                              1993.638427734375,
+                              1932.1097412109375,
+                              1869.26220703125,
+                              1805.62060546875,
+                              1741.64990234375,
+                              1677.7550048828125,
+                              1614.285400390625,
+                              1551.537353515625,
+                              1489.759765625,
+                              1421.3629150390625,
+                              1351.9913330078125,
+                              1284.869384765625,
+                              1220.09375,
+                              1157.7274169921875,
+                              1097.8046875,
+                              1040.3365478515625,
+                              985.8211669921875,
+                              939.15362548828125,
+                              894.31201171875,
+                              851.27447509765625,
+                              810.0118408203125,
+                              2477.35107421875,
+                              2469.894287109375,
+                              2447.7333984375,
+                              2411.486328125,
+                              2362.1328125,
+                              2300.954345703125,
+                              2229.455810546875,
+                              2149.279541015625,
+                              2062.12646484375,
+                              2013.1302490234375,
+                              1961.06884765625,
+                              1906.490966796875,
+                              1849.968017578125,
+                              1792.02880859375,
+                              1733.1553955078125,
+                              1673.7799072265625,
+                              1614.285400390625,
+                              1555.00537109375,
+                              1496.228515625,
+                              1431.994873046875,
+                              1365.0950927734375,
+                              1300,
+                              1236.8575439453125,
+                              1175.777099609375,
+                              1116.836181640625,
+                              1060.0849609375,
+                              1005.5491943359375,
+                              957.398681640625,
+                              912.78082275390625,
+                              869.84039306640625,
+                              828.56634521484375,
+                              788.9383544921875,
+                              2235.813232421875,
+                              2229.455810546875,
+                              2210.550537109375,
+                              2179.5849609375,
+                              2137.337890625,
+                              2084.83349609375,
+                              2041.63623046875,
+                              2004.73095703125,
+                              1963.7435302734375,
+                              1919.218017578125,
+                              1871.700927734375,
+                              1821.723876953125,
+                              1769.796142578125,
+                              1716.393798828125,
+                              1661.9576416015625,
+                              1606.8861083984375,
+                              1551.537353515625,
+                              1496.228515625,
+                              1435.5677490234375,
+                              1371.7230224609375,
+                              1309.21435546875,
+                              1248.234619140625,
+                              1188.9384765625,
+                              1131.4447021484375,
+                              1075.8421630859375,
+                              1022.19244384765625,
+                              972.74078369140625,
+                              928.625244140625,
+                              886.03875732421875,
+                              844.98834228515625,
+                              805.47149658203125,
+                              767.47625732421875,
+                              2038.7496337890625,
+                              2035.871337890625,
+                              2027.2841796875,
+                              2013.1302490234375,
+                              1993.63818359375,
+                              1969.1136474609375,
+                              1939.925048828125,
+                              1906.490966796875,
+                              1869.26220703125,
+                              1828.7081298828125,
+                              1785.30419921875,
+                              1739.5189208984375,
+                              1691.8035888671875,
+                              1642.5870361328125,
+                              1592.2684326171875,
+                              1541.215087890625,
+                              1489.759765625,
+                              1431.994873046875,
+                              1371.7230224609375,
+                              1312.3087158203125,
+                              1253.9849853515625,
+                              1196.9451904296875,
+                              1141.3475341796875,
+                              1087.3167724609375,
+                              1034.9495849609375,
+                              984.94073486328125,
+                              941.60955810546875,
+                              899.6387939453125,
+                              859.05474853515625,
+                              819.87261962890625,
+                              782.09710693359375,
+                              745.7244873046875,
+                              1932.1097412109375,
+                              1929.51806640625,
+                              1921.7830810546875,
+                              1909.0233154296875,
+                              1891.4306640625,
+                              1869.26220703125,
+                              1842.8297119140625,
+                              1812.4891357421875,
+                              1778.6273193359375,
+                              1741.64990234375,
+                              1701.970703125,
+                              1660.0018310546875,
+                              1616.1446533203125,
+                              1570.7830810546875,
+                              1524.27880859375,
+                              1476.9681396484375,
+                              1421.3629150390625,
+                              1365.0950927734375,
+                              1309.21435546875,
+                              1253.9849853515625,
+                              1199.633056640625,
+                              1146.34912109375,
+                              1094.29150390625,
+                              1043.5887451171875,
+                              994.34295654296875,
+                              951.5340576171875,
+                              910.440185546875,
+                              870.5670166015625,
+                              831.9473876953125,
+                              794.60296630859375,
+                              758.54522705078125,
+                              723.7767333984375,
+                              1831.0474853515625,
+                              1828.7081298828125,
+                              1821.7237548828125,
+                              1810.194091796875,
+                              1794.2806396484375,
+                              1774.201171875,
+                              1750.22119140625,
+                              1722.6444091796875,
+                              1691.8035888671875,
+                              1658.050537109375,
+                              1621.7459716796875,
+                              1583.2529296875,
+                              1542.9271240234375,
+                              1501.1123046875,
+                              1455.4775390625,
+                              1403.9237060546875,
+                              1351.9913330078125,
+                              1300,
+                              1248.234619140625,
+                              1196.9451904296875,
+                              1146.34912109375,
+                              1096.6314697265625,
+                              1047.949462890625,
+                              1000.4320068359375,
+                              958.2410888671875,
+                              918.27825927734375,
+                              879.35675048828125,
+                              841.5264892578125,
+                              804.8258056640625,
+                              769.28125,
+                              734.91033935546875,
+                              701.72100830078125,
+                              1735.2716064453125,
+                              1733.1553955078125,
+                              1726.834716796875,
+                              1716.393798828125,
+                              1701.970703125,
+                              1683.7496337890625,
+                              1661.9576416015625,
+                              1636.8553466796875,
+                              1608.7301025390625,
+                              1577.8870849609375,
+                              1544.6424560546875,
+                              1509.314697265625,
+                              1472.10400390625,
+                              1426.662841796875,
+                              1380.0811767578125,
+                              1332.708740234375,
+                              1284.869384765625,
+                              1236.8575439453125,
+                              1188.9384765625,
+                              1141.3475341796875,
+                              1094.29150390625,
+                              1047.949462890625,
+                              1002.47418212890625,
+                              961.62213134765625,
+                              923.03155517578125,
+                              885.292236328125,
+                              848.471923828125,
+                              812.6236572265625,
+                              777.789794921875,
+                              744.00146484375,
+                              711.28009033203125,
+                              684.97052001953125,
+                              1644.505615234375,
+                              1642.5870361328125,
+                              1636.8553466796875,
+                              1627.38232421875,
+                              1614.285400390625,
+                              1597.7220458984375,
+                              1577.8870849609375,
+                              1555.00537109375,
+                              1529.3251953125,
+                              1501.1123046875,
+                              1470.240966796875,
+                              1431.994873046875,
+                              1391.9202880859375,
+                              1350.36767578125,
+                              1307.6715087890625,
+                              1264.1485595703125,
+                              1220.09375,
+                              1175.777099609375,
+                              1131.4447021484375,
+                              1087.3167724609375,
+                              1043.5887451171875,
+                              1000.4320068359375,
+                              961.62213134765625,
+                              924.62445068359375,
+                              888.28326416015625,
+                              852.68096923828125,
+                              817.8857421875,
+                              783.95404052734375,
+                              750.92999267578125,
+                              718.84820556640625,
+                              690.50970458984375,
+                              669.78717041015625,
+                              1558.4873046875,
+                              1556.7445068359375,
+                              1551.537353515625,
+                              1542.9271240234375,
+                              1531.013916015625,
+                              1515.93359375,
+                              1497.8533935546875,
+                              1476.9681396484375,
+                              1450.00341796875,
+                              1419.6033935546875,
+                              1386.82666015625,
+                              1351.9913330078125,
+                              1315.4149169921875,
+                              1277.407958984375,
+                              1238.270751953125,
+                              1198.2880859375,
+                              1157.7274169921875,
+                              1116.836181640625,
+                              1075.8421630859375,
+                              1034.9495849609375,
+                              994.34295654296875,
+                              958.2410888671875,
+                              923.03155517578125,
+                              888.28326416015625,
+                              854.0911865234375,
+                              820.53631591796875,
+                              787.68792724609375,
+                              755.60260009765625,
+                              724.32769775390625,
+                              694.7138671875,
+                              674.44976806640625,
+                              654.522705078125,
+                              1476.968017578125,
+                              1475.38232421875,
+                              1470.240966796875,
+                              1460.985595703125,
+                              1448.1861572265625,
+                              1431.994873046875,
+                              1412.5999755859375,
+                              1390.2191162109375,
+                              1365.0950927734375,
+                              1337.488037109375,
+                              1307.6715087890625,
+                              1275.923828125,
+                              1242.5257568359375,
+                              1207.7525634765625,
+                              1171.8729248046875,
+                              1135.1424560546875,
+                              1097.8046875,
+                              1060.0849609375,
+                              1022.19244384765625,
+                              984.94073486328125,
+                              951.5340576171875,
+                              918.27825927734375,
+                              885.292236328125,
+                              852.68096923828125,
+                              820.53631591796875,
+                              788.9383544921875,
+                              757.955322265625,
+                              727.6448974609375,
+                              698.0548095703125,
+                              677.8135986328125,
+                              658.3643798828125,
+                              639.217041015625,
+                              1386.82666015625,
+                              1385.1351318359375,
+                              1380.0811767578125,
+                              1371.7230224609375,
+                              1360.1573486328125,
+                              1345.5145263671875,
+                              1327.9566650390625,
+                              1307.6717529296875,
+                              1284.86962890625,
+                              1259.777099609375,
+                              1232.6331787109375,
+                              1203.682373046875,
+                              1173.171875,
+                              1141.3475341796875,
+                              1108.4483642578125,
+                              1074.7049560546875,
+                              1040.3365478515625,
+                              1005.5491943359375,
+                              972.74078369140625,
+                              941.60955810546875,
+                              910.440185546875,
+                              879.35675048828125,
+                              848.471923828125,
+                              817.8857421875,
+                              787.68792724609375,
+                              757.955322265625,
+                              728.75579833984375,
+                              700.14642333984375,
+                              679.84564208984375,
+                              660.9462890625,
+                              642.2919921875,
+                              623.90673828125,
+                              1301.5286865234375,
+                              1300,
+                              1295.431396484375,
+                              1287.8731689453125,
+                              1277.407958984375,
+                              1264.1485595703125,
+                              1248.234619140625,
+                              1229.8292236328125,
+                              1209.1142578125,
+                              1186.287841796875,
+                              1161.558837890625,
+                              1135.1424560546875,
+                              1107.2579345703125,
+                              1078.122314453125,
+                              1047.949462890625,
+                              1016.9471435546875,
+                              985.8211669921875,
+                              957.398681640625,
+                              928.625244140625,
+                              899.6387939453125,
+                              870.5670166015625,
+                              841.5264892578125,
+                              812.6236572265625,
+                              783.95404052734375,
+                              755.60260009765625,
+                              727.6448974609375,
+                              700.14642333984375,
+                              680.52508544921875,
+                              662.2437744140625,
+                              644.1488037109375,
+                              626.26806640625,
+                              608.6259765625,
+                              1221.4771728515625,
+                              1220.09375,
+                              1215.958251953125,
+                              1209.1142578125,
+                              1199.633056640625,
+                              1187.612060546875,
+                              1173.171875,
+                              1156.454345703125,
+                              1137.61865234375,
+                              1116.836181640625,
+                              1094.29150390625,
+                              1070.173828125,
+                              1044.676513671875,
+                              1017.99298095703125,
+                              990.31439208984375,
+                              965.0220947265625,
+                              939.15362548828125,
+                              912.78082275390625,
+                              886.03875732421875,
+                              859.05474853515625,
+                              831.9473876953125,
+                              804.8258056640625,
+                              777.789794921875,
+                              750.92999267578125,
+                              724.32769775390625,
+                              698.0548095703125,
+                              679.84564208984375,
+                              662.2437744140625,
+                              644.769775390625,
+                              627.45428466796875,
+                              610.32489013671875,
+                              593.4061279296875,
+                              1146.34912109375,
+                              1145.095703125,
+                              1141.3475341796875,
+                              1135.1424560546875,
+                              1126.5428466796875,
+                              1115.6322021484375,
+                              1102.515625,
+                              1087.3167724609375,
+                              1070.173828125,
+                              1051.2381591796875,
+                              1030.6707763671875,
+                              1008.638671875,
+                              985.8211669921875,
+                              964.1702880859375,
+                              941.60955810546875,
+                              918.27838134765625,
+                              894.31201171875,
+                              869.84039306640625,
+                              844.98834228515625,
+                              819.87261962890625,
+                              794.60296630859375,
+                              769.28125,
+                              744.00146484375,
+                              718.84820556640625,
+                              694.7138671875,
+                              677.8135986328125,
+                              660.9462890625,
+                              644.1488037109375,
+                              627.45428466796875,
+                              610.89300537109375,
+                              594.491943359375,
+                              578.27581787109375,
+                              1075.8421630859375,
+                              1074.7049560546875,
+                              1071.3038330078125,
+                              1065.6719970703125,
+                              1057.8629150390625,
+                              1047.949462890625,
+                              1036.0238037109375,
+                              1022.19244384765625,
+                              1006.57757568359375,
+                              989.35601806640625,
+                              972.74078369140625,
+                              954.87835693359375,
+                              935.8944091796875,
+                              915.9161376953125,
+                              895.07000732421875,
+                              873.48193359375,
+                              851.27447509765625,
+                              828.56634521484375,
+                              805.47149658203125,
+                              782.09710693359375,
+                              758.54522705078125,
+                              734.91033935546875,
+                              711.28009033203125,
+                              690.50970458984375,
+                              674.44976806640625,
+                              658.3643798828125,
+                              642.2919921875,
+                              626.26806640625,
+                              610.32489013671875,
+                              594.491943359375,
+                              578.7960205078125,
+                              563.26104736328125,
+                              1009.6715087890625,
+                              1008.638671875,
+                              1005.5491943359375,
+                              1000.4320068359375,
+                              993.33349609375,
+                              984.94073486328125,
+                              975.33502197265625,
+                              964.1702880859375,
+                              951.533935546875,
+                              937.5218505859375,
+                              922.236572265625,
+                              905.78668212890625,
+                              888.28326416015625,
+                              869.84039306640625,
+                              850.57257080078125,
+                              830.5924072265625,
+                              810.0118408203125,
+                              788.9383544921875,
+                              767.47625732421875,
+                              745.7244873046875,
+                              723.7767333984375,
+                              701.72100830078125,
+                              684.97052001953125,
+                              669.78717041015625,
+                              654.522705078125,
+                              639.217041015625,
+                              623.90673828125,
+                              608.6259765625,
+                              593.4061279296875,
+                              578.27581787109375,
+                              563.26104736328125,
+                              548.38555908203125};
+
+const float qmb32x32[1024] = {0,
+                              0,
+                              0,
+                              0,
+                              1554.123779296875,
+                              1242.53955078125,
+                              993.424560546875,
+                              821.7386474609375,
+                              688.02374267578125,
+                              576.06719970703125,
+                              482.32843017578125,
+                              403.842987060546875,
+                              338.12896728515625,
+                              283.2335205078125,
+                              237.367095947265625,
+                              198.92822265625,
+                              166.714080810546875,
+                              139.71661376953125,
+                              117.09114837646484375,
+                              100.3662261962890625,
+                              93.5875701904296875,
+                              87.26671600341796875,
+                              81.37277984619140625,
+                              75.876922607421875,
+                              70.75225830078125,
+                              65.97368621826171875,
+                              62.470378875732421875,
+                              59.2027587890625,
+                              56.10607147216796875,
+                              53.17134857177734375,
+                              50.390140533447265625,
+                              47.75440216064453125,
+                              0,
+                              0,
+                              0,
+                              0,
+                              1511.89892578125,
+                              1215.3125,
+                              975.19708251953125,
+                              811.43212890625,
+                              680.45819091796875,
+                              570.4285888671875,
+                              478.07489013671875,
+                              400.602691650390625,
+                              335.640289306640625,
+                              281.318328857421875,
+                              235.8760223388671875,
+                              197.7614898681640625,
+                              165.797119140625,
+                              138.9931640625,
+                              116.51837158203125,
+                              100.18183135986328125,
+                              93.42420196533203125,
+                              87.1216278076171875,
+                              81.24362945556640625,
+                              75.76171875,
+                              70.6492919921875,
+                              65.8815155029296875,
+                              62.4058837890625,
+                              59.143909454345703125,
+                              56.05228424072265625,
+                              53.12213897705078125,
+                              50.345058441162109375,
+                              47.71305084228515625,
+                              0,
+                              0,
+                              0,
+                              0,
+                              1398.31689453125,
+                              1139.9393310546875,
+                              926.46905517578125,
+                              781.85968017578125,
+                              658.5869140625,
+                              554.03814697265625,
+                              465.6590576171875,
+                              391.11358642578125,
+                              328.3336181640625,
+                              275.68292236328125,
+                              231.480926513671875,
+                              194.317352294921875,
+                              163.086822509765625,
+                              136.85247802734375,
+                              114.822052001953125,
+                              99.632171630859375,
+                              92.93701934814453125,
+                              86.6887664794921875,
+                              80.85819244384765625,
+                              75.4178009033203125,
+                              70.34185791015625,
+                              65.63607025146484375,
+                              62.2131195068359375,
+                              58.967945098876953125,
+                              55.891448974609375,
+                              52.97493743896484375,
+                              50.21018218994140625,
+                              47.589336395263671875,
+                              0,
+                              0,
+                              0,
+                              0,
+                              1242.53955078125,
+                              1031.720703125,
+                              865.44610595703125,
+                              736.6123046875,
+                              624.660888671875,
+                              528.352294921875,
+                              446.04833984375,
+                              376.0323486328125,
+                              316.66259765625,
+                              266.643341064453125,
+                              224.4067230224609375,
+                              188.7577972412109375,
+                              158.70111083984375,
+                              133.38116455078125,
+                              112.06615447998046875,
+                              98.72772216796875,
+                              92.1346588134765625,
+                              85.97531890869140625,
+                              80.2224273681640625,
+                              74.85018157958984375,
+                              69.8341827392578125,
+                              65.2862091064453125,
+                              61.8940887451171875,
+                              58.6766357421875,
+                              55.625087738037109375,
+                              52.731090545654296875,
+                              49.9866943359375,
+                              47.384288787841796875,
+                              1554.123779296875,
+                              1511.89892578125,
+                              1398.31689453125,
+                              1242.53955078125,
+                              1072.7047119140625,
+                              913.631103515625,
+                              791.500732421875,
+                              680.45819091796875,
+                              581.7969970703125,
+                              495.450836181640625,
+                              420.656097412109375,
+                              356.335296630859375,
+                              301.346527099609375,
+                              254.681488037109375,
+                              214.9998931884765625,
+                              181.3341522216796875,
+                              152.824005126953125,
+                              128.7149505615234375,
+                              108.35161590576171875,
+                              97.48529052734375,
+                              91.031036376953125,
+                              84.99285125732421875,
+                              79.34609222412109375,
+                              74.0670623779296875,
+                              69.13317108154296875,
+                              64.80181121826171875,
+                              61.4521331787109375,
+                              58.2728424072265625,
+                              55.255706787109375,
+                              52.392795562744140625,
+                              49.676509857177734375,
+                              47.09958648681640625,
+                              1242.53955078125,
+                              1215.3125,
+                              1139.9393310546875,
+                              1031.720703125,
+                              913.631103515625,
+                              811.43212890625,
+                              711.60491943359375,
+                              618.22406005859375,
+                              533.33489990234375,
+                              457.657806396484375,
+                              391.11358642578125,
+                              333.17852783203125,
+                              283.2335205078125,
+                              240.389190673828125,
+                              203.69110107421875,
+                              172.3627471923828125,
+                              145.6892547607421875,
+                              123.02779388427734375,
+                              103.80861663818359375,
+                              95.92728424072265625,
+                              89.64476776123046875,
+                              83.756927490234375,
+                              78.2422027587890625,
+                              73.07941436767578125,
+                              68.24814605712890625,
+                              64.18810272216796875,
+                              60.89176177978515625,
+                              57.760517120361328125,
+                              54.786739349365234375,
+                              51.96302032470703125,
+                              49.28223419189453125,
+                              46.73752593994140625,
+                              993.424560546875,
+                              975.19708251953125,
+                              926.46905517578125,
+                              865.44610595703125,
+                              791.500732421875,
+                              711.60491943359375,
+                              631.20953369140625,
+                              554.03814697265625,
+                              482.32843017578125,
+                              417.209503173828125,
+                              359.053955078125,
+                              307.756866455078125,
+                              263.14556884765625,
+                              224.4067230224609375,
+                              190.9550628662109375,
+                              162.196685791015625,
+                              137.5612030029296875,
+                              116.51837158203125,
+                              100.55123138427734375,
+                              94.08060455322265625,
+                              87.99832916259765625,
+                              82.28644561767578125,
+                              76.92669677734375,
+                              71.90074920654296875,
+                              67.1905670166015625,
+                              63.451557159423828125,
+                              60.218593597412109375,
+                              57.14452362060546875,
+                              54.22241973876953125,
+                              51.44549560546875,
+                              48.807163238525390625,
+                              46.300994873046875,
+                              821.7386474609375,
+                              811.43212890625,
+                              781.85968017578125,
+                              736.6123046875,
+                              680.45819091796875,
+                              618.22406005859375,
+                              554.03814697265625,
+                              491.01513671875,
+                              431.260406494140625,
+                              376.0323486328125,
+                              325.949676513671875,
+                              281.318328857421875,
+                              241.92047119140625,
+                              207.3672332763671875,
+                              177.27020263671875,
+                              151.198699951171875,
+                              128.7149810791015625,
+                              109.3962554931640625,
+                              98.19190216064453125,
+                              91.9756011962890625,
+                              86.117218017578125,
+                              80.60289764404296875,
+                              75.4178009033203125,
+                              70.54657745361328125,
+                              65.97369384765625,
+                              62.5996856689453125,
+                              59.439167022705078125,
+                              56.430583953857421875,
+                              53.567768096923828125,
+                              50.844631195068359375,
+                              48.255126953125,
+                              45.7933807373046875,
+                              688.02374267578125,
+                              680.45819091796875,
+                              658.5869140625,
+                              624.660888671875,
+                              581.7969970703125,
+                              533.33489990234375,
+                              482.32843017578125,
+                              431.260406494140625,
+                              381.958526611328125,
+                              335.640289306640625,
+                              293.0960693359375,
+                              254.681427001953125,
+                              220.30645751953125,
+                              189.852081298828125,
+                              163.086822509765625,
+                              139.7166595458984375,
+                              119.4195404052734375,
+                              101.8691864013671875,
+                              95.58690643310546875,
+                              89.64476776123046875,
+                              84.02899932861328125,
+                              78.7296905517578125,
+                              73.73546600341796875,
+                              69.03392791748046875,
+                              64.8706207275390625,
+                              61.6408843994140625,
+                              58.560794830322265625,
+                              55.625087738037109375,
+                              52.82842254638671875,
+                              50.165355682373046875,
+                              47.630523681640625,
+                              45.21855926513671875,
+                              576.06719970703125,
+                              570.4285888671875,
+                              554.03814697265625,
+                              528.352294921875,
+                              495.450836181640625,
+                              457.657806396484375,
+                              417.209503173828125,
+                              376.0323486328125,
+                              335.640289306640625,
+                              297.17999267578125,
+                              261.421112060546875,
+                              228.6143646240234375,
+                              198.92822265625,
+                              172.3627471923828125,
+                              148.803924560546875,
+                              128.066162109375,
+                              109.923492431640625,
+                              98.72772216796875,
+                              92.77559661865234375,
+                              87.1216278076171875,
+                              81.76230621337890625,
+                              76.69138336181640625,
+                              71.90074920654296875,
+                              67.38091278076171875,
+                              63.7176971435546875,
+                              60.584194183349609375,
+                              57.59142303466796875,
+                              54.7350311279296875,
+                              52.010478973388671875,
+                              49.41309356689453125,
+                              46.93811798095703125,
+                              44.580780029296875,
+                              482.32843017578125,
+                              478.07489013671875,
+                              465.6590576171875,
+                              446.04833984375,
+                              420.656097412109375,
+                              391.11358642578125,
+                              359.053955078125,
+                              325.949676513671875,
+                              293.0960693359375,
+                              261.421112060546875,
+                              231.480926513671875,
+                              203.69110107421875,
+                              178.2744598388671875,
+                              155.306304931640625,
+                              134.7555084228515625,
+                              116.51837158203125,
+                              101.29721832275390625,
+                              95.41748046875,
+                              89.79701995849609375,
+                              84.4398193359375,
+                              79.34609222412109375,
+                              74.51293182373046875,
+                              69.9352569580078125,
+                              65.63607025146484375,
+                              62.470378875732421875,
+                              59.439159393310546875,
+                              56.53945159912109375,
+                              53.767810821533203125,
+                              51.120525360107421875,
+                              48.59365081787109375,
+                              46.18306732177734375,
+                              43.884586334228515625,
+                              403.842987060546875,
+                              400.602691650390625,
+                              391.11358642578125,
+                              376.0323486328125,
+                              356.335296630859375,
+                              333.17852783203125,
+                              307.756866455078125,
+                              281.318328857421875,
+                              254.681427001953125,
+                              228.6143646240234375,
+                              203.69110107421875,
+                              180.3063201904296875,
+                              158.70111083984375,
+                              138.993194580078125,
+                              121.20597076416015625,
+                              105.29486083984375,
+                              97.48529815673828125,
+                              91.9756011962890625,
+                              86.68878173828125,
+                              81.632110595703125,
+                              76.80889129638671875,
+                              72.21916961669921875,
+                              67.86054229736328125,
+                              64.18810272216796875,
+                              61.139739990234375,
+                              58.215541839599609375,
+                              55.413524627685546875,
+                              52.731090545654296875,
+                              50.165355682373046875,
+                              47.71305084228515625,
+                              45.3707275390625,
+                              43.134796142578125,
+                              338.12896728515625,
+                              335.640289306640625,
+                              328.3336181640625,
+                              316.66259765625,
+                              301.346527099609375,
+                              283.2335205078125,
+                              263.14556884765625,
+                              241.92047119140625,
+                              220.30645751953125,
+                              198.92822265625,
+                              178.2744598388671875,
+                              158.70111083984375,
+                              140.44512939453125,
+                              123.64312744140625,
+                              108.35161590576171875,
+                              98.907470703125,
+                              93.5875701904296875,
+                              88.4422607421875,
+                              83.48635101318359375,
+                              78.7296905517578125,
+                              74.17812347412109375,
+                              69.8341827392578125,
+                              65.70645904541015625,
+                              62.66451263427734375,
+                              59.736907958984375,
+                              56.923252105712890625,
+                              54.22241973876953125,
+                              51.632717132568359375,
+                              49.151935577392578125,
+                              46.77752685546875,
+                              44.50667572021484375,
+                              42.336353302001953125,
+                              283.2335205078125,
+                              281.318328857421875,
+                              275.68292236328125,
+                              266.643341064453125,
+                              254.681488037109375,
+                              240.389190673828125,
+                              224.4067230224609375,
+                              207.3672332763671875,
+                              189.852081298828125,
+                              172.3627471923828125,
+                              155.306304931640625,
+                              138.993194580078125,
+                              123.64312744140625,
+                              109.3962554931640625,
+                              99.63219451904296875,
+                              94.5781402587890625,
+                              89.64476776123046875,
+                              84.85404205322265625,
+                              80.2224273681640625,
+                              75.76171875,
+                              71.4796905517578125,
+                              67.38091278076171875,
+                              63.985767364501953125,
+                              61.0775909423828125,
+                              58.2728424072265625,
+                              55.5720672607421875,
+                              52.97493743896484375,
+                              50.480510711669921875,
+                              48.087291717529296875,
+                              45.7933807373046875,
+                              43.59656524658203125,
+                              41.49433135986328125,
+                              237.367095947265625,
+                              235.8760223388671875,
+                              231.480926513671875,
+                              224.4067230224609375,
+                              214.9998931884765625,
+                              203.69110107421875,
+                              190.9550628662109375,
+                              177.27020263671875,
+                              163.086822509765625,
+                              148.803924560546875,
+                              134.7555084228515625,
+                              121.20597076416015625,
+                              108.35161590576171875,
+                              99.63219451904296875,
+                              94.912353515625,
+                              90.2564239501953125,
+                              85.69269561767578125,
+                              81.24362945556640625,
+                              76.92669677734375,
+                              72.75490570068359375,
+                              68.737579345703125,
+                              65.0778350830078125,
+                              62.2131195068359375,
+                              59.439159393310546875,
+                              56.75823211669921875,
+                              54.171604156494140625,
+                              51.679691314697265625,
+                              49.28223419189453125,
+                              46.97840118408203125,
+                              44.766880035400390625,
+                              42.64601898193359375,
+                              40.613834381103515625,
+                              198.92822265625,
+                              197.7614898681640625,
+                              194.317352294921875,
+                              188.7577972412109375,
+                              181.3341522216796875,
+                              172.3627471923828125,
+                              162.196685791015625,
+                              151.198699951171875,
+                              139.7166595458984375,
+                              128.066162109375,
+                              116.51837158203125,
+                              105.29486083984375,
+                              98.907470703125,
+                              94.5781402587890625,
+                              90.2564239501953125,
+                              85.97531890869140625,
+                              81.76230621337890625,
+                              77.6397247314453125,
+                              73.62548065185546875,
+                              69.73333740234375,
+                              65.97369384765625,
+                              63.1215667724609375,
+                              60.4009246826171875,
+                              57.760517120361328125,
+                              55.203277587890625,
+                              52.731090545654296875,
+                              50.345058441162109375,
+                              48.045475006103515625,
+                              45.832118988037109375,
+                              43.7042236328125,
+                              41.660648345947265625,
+                              39.699886322021484375,
+                              166.714080810546875,
+                              165.797119140625,
+                              163.086822509765625,
+                              158.70111083984375,
+                              152.824005126953125,
+                              145.6892547607421875,
+                              137.5612030029296875,
+                              128.7149810791015625,
+                              119.4195404052734375,
+                              109.923492431640625,
+                              101.29721832275390625,
+                              97.48529815673828125,
+                              93.5875701904296875,
+                              89.64476776123046875,
+                              85.69269561767578125,
+                              81.76230621337890625,
+                              77.87981414794921875,
+                              74.0670623779296875,
+                              70.34185791015625,
+                              66.7183837890625,
+                              63.7845306396484375,
+                              61.139739990234375,
+                              58.560787200927734375,
+                              56.05228424072265625,
+                              53.617671966552734375,
+                              51.25939178466796875,
+                              48.979061126708984375,
+                              46.77752685546875,
+                              44.6550750732421875,
+                              42.611438751220703125,
+                              40.645923614501953125,
+                              38.757503509521484375,
+                              139.71661376953125,
+                              138.9931640625,
+                              136.85247802734375,
+                              133.38116455078125,
+                              128.7149505615234375,
+                              123.02779388427734375,
+                              116.51837158203125,
+                              109.3962554931640625,
+                              101.8691864013671875,
+                              98.72772216796875,
+                              95.41748046875,
+                              91.9756011962890625,
+                              88.4422607421875,
+                              84.85404205322265625,
+                              81.24362945556640625,
+                              77.6397247314453125,
+                              74.0670623779296875,
+                              70.54657745361328125,
+                              67.09571075439453125,
+                              64.18810272216796875,
+                              61.6408843994140625,
+                              59.143909454345703125,
+                              56.703399658203125,
+                              54.324310302734375,
+                              52.010478973388671875,
+                              49.764812469482421875,
+                              47.589336395263671875,
+                              45.485385894775390625,
+                              43.453662872314453125,
+                              41.49433135986328125,
+                              39.607158660888671875,
+                              37.7914886474609375,
+                              117.09114837646484375,
+                              116.51837158203125,
+                              114.822052001953125,
+                              112.06615447998046875,
+                              108.35161590576171875,
+                              103.80861663818359375,
+                              100.55123138427734375,
+                              98.19190216064453125,
+                              95.58690643310546875,
+                              92.77559661865234375,
+                              89.79701995849609375,
+                              86.68878173828125,
+                              83.48635101318359375,
+                              80.2224273681640625,
+                              76.92669677734375,
+                              73.62548065185546875,
+                              70.34185791015625,
+                              67.09571075439453125,
+                              64.32361602783203125,
+                              61.8940887451171875,
+                              59.498504638671875,
+                              57.14452362060546875,
+                              54.838520050048828125,
+                              52.585674285888671875,
+                              50.390140533447265625,
+                              48.255126953125,
+                              46.18306732177734375,
+                              44.175624847412109375,
+                              42.233917236328125,
+                              40.358489990234375,
+                              38.54946136474609375,
+                              36.8065338134765625,
+                              100.3662261962890625,
+                              100.18183135986328125,
+                              99.632171630859375,
+                              98.72772216796875,
+                              97.48529052734375,
+                              95.92728424072265625,
+                              94.08060455322265625,
+                              91.9756011962890625,
+                              89.64476776123046875,
+                              87.1216278076171875,
+                              84.4398193359375,
+                              81.632110595703125,
+                              78.7296905517578125,
+                              75.76171875,
+                              72.75490570068359375,
+                              69.73333740234375,
+                              66.7183837890625,
+                              64.18810272216796875,
+                              61.8940887451171875,
+                              59.61750030517578125,
+                              57.367244720458984375,
+                              55.150913238525390625,
+                              52.97493743896484375,
+                              50.844631195068359375,
+                              48.76433563232421875,
+                              46.73752593994140625,
+                              44.766880035400390625,
+                              42.854427337646484375,
+                              41.001552581787109375,
+                              39.2091522216796875,
+                              37.477649688720703125,
+                              35.807086944580078125,
+                              93.5875701904296875,
+                              93.42420196533203125,
+                              92.93701934814453125,
+                              92.1346588134765625,
+                              91.031036376953125,
+                              89.64476776123046875,
+                              87.99832916259765625,
+                              86.117218017578125,
+                              84.02899932861328125,
+                              81.76230621337890625,
+                              79.34609222412109375,
+                              76.80889129638671875,
+                              74.17812347412109375,
+                              71.4796905517578125,
+                              68.737579345703125,
+                              65.97369384765625,
+                              63.7845306396484375,
+                              61.6408843994140625,
+                              59.498504638671875,
+                              57.367244720458984375,
+                              55.255706787109375,
+                              53.17134857177734375,
+                              51.120525360107421875,
+                              49.10861968994140625,
+                              47.1400909423828125,
+                              45.21856689453125,
+                              43.34696197509765625,
+                              41.52751922607421875,
+                              39.761898040771484375,
+                              38.05126190185546875,
+                              36.396320343017578125,
+                              34.797382354736328125,
+                              87.26671600341796875,
+                              87.1216278076171875,
+                              86.6887664794921875,
+                              85.97531890869140625,
+                              84.99285125732421875,
+                              83.756927490234375,
+                              82.28644561767578125,
+                              80.60289764404296875,
+                              78.7296905517578125,
+                              76.69138336181640625,
+                              74.51293182373046875,
+                              72.21916961669921875,
+                              69.8341827392578125,
+                              67.38091278076171875,
+                              65.0778350830078125,
+                              63.1215667724609375,
+                              61.139739990234375,
+                              59.143909454345703125,
+                              57.14452362060546875,
+                              55.150913238525390625,
+                              53.17134857177734375,
+                              51.2130279541015625,
+                              49.28223419189453125,
+                              47.384288787841796875,
+                              45.52370452880859375,
+                              43.70421600341796875,
+                              41.92890167236328125,
+                              40.20015716552734375,
+                              38.519870758056640625,
+                              36.88941192626953125,
+                              35.309741973876953125,
+                              33.78139495849609375,
+                              81.37277984619140625,
+                              81.24362945556640625,
+                              80.85819244384765625,
+                              80.2224273681640625,
+                              79.34609222412109375,
+                              78.2422027587890625,
+                              76.92669677734375,
+                              75.4178009033203125,
+                              73.73546600341796875,
+                              71.90074920654296875,
+                              69.9352569580078125,
+                              67.86054229736328125,
+                              65.70645904541015625,
+                              63.985767364501953125,
+                              62.2131195068359375,
+                              60.4009246826171875,
+                              58.560787200927734375,
+                              56.703399658203125,
+                              54.838520050048828125,
+                              52.97493743896484375,
+                              51.120525360107421875,
+                              49.28223419189453125,
+                              47.4661407470703125,
+                              45.677494049072265625,
+                              43.920803070068359375,
+                              42.199848175048828125,
+                              40.517787933349609375,
+                              38.877155303955078125,
+                              37.279994964599609375,
+                              35.72786712646484375,
+                              34.221893310546875,
+                              32.9320831298828125,
+                              75.876922607421875,
+                              75.76171875,
+                              75.4178009033203125,
+                              74.85018157958984375,
+                              74.0670623779296875,
+                              73.07941436767578125,
+                              71.90074920654296875,
+                              70.54657745361328125,
+                              69.03392791748046875,
+                              67.38091278076171875,
+                              65.63607025146484375,
+                              64.18810272216796875,
+                              62.66451263427734375,
+                              61.0775909423828125,
+                              59.439159393310546875,
+                              57.760517120361328125,
+                              56.05228424072265625,
+                              54.324310302734375,
+                              52.585674285888671875,
+                              50.844631195068359375,
+                              49.10861968994140625,
+                              47.384288787841796875,
+                              45.677494049072265625,
+                              43.993377685546875,
+                              42.336353302001953125,
+                              40.710224151611328125,
+                              39.118167877197265625,
+                              37.562847137451171875,
+                              36.04637908935546875,
+                              34.570468902587890625,
+                              33.22452545166015625,
+                              32.131664276123046875,
+                              70.75225830078125,
+                              70.6492919921875,
+                              70.34185791015625,
+                              69.8341827392578125,
+                              69.13317108154296875,
+                              68.24814605712890625,
+                              67.1905670166015625,
+                              65.97369384765625,
+                              64.8706207275390625,
+                              63.7176971435546875,
+                              62.470378875732421875,
+                              61.139739990234375,
+                              59.736907958984375,
+                              58.2728424072265625,
+                              56.75823211669921875,
+                              55.203277587890625,
+                              53.617671966552734375,
+                              52.010478973388671875,
+                              50.390140533447265625,
+                              48.76433563232421875,
+                              47.1400909423828125,
+                              45.52370452880859375,
+                              43.920803070068359375,
+                              42.336353302001953125,
+                              40.774688720703125,
+                              39.239543914794921875,
+                              37.734134674072265625,
+                              36.26111602783203125,
+                              34.822742462158203125,
+                              33.446636199951171875,
+                              32.3772735595703125,
+                              31.328754425048828125,
+                              65.97368621826171875,
+                              65.8815155029296875,
+                              65.63607025146484375,
+                              65.2862091064453125,
+                              64.80181121826171875,
+                              64.18810272216796875,
+                              63.451557159423828125,
+                              62.5996856689453125,
+                              61.6408843994140625,
+                              60.584194183349609375,
+                              59.439159393310546875,
+                              58.215541839599609375,
+                              56.923252105712890625,
+                              55.5720672607421875,
+                              54.171604156494140625,
+                              52.731090545654296875,
+                              51.25939178466796875,
+                              49.764812469482421875,
+                              48.255126953125,
+                              46.73752593994140625,
+                              45.21856689453125,
+                              43.70421600341796875,
+                              42.199848175048828125,
+                              40.710224151611328125,
+                              39.239543914794921875,
+                              37.7914886474609375,
+                              36.3692169189453125,
+                              34.975429534912109375,
+                              33.612384796142578125,
+                              32.55457305908203125,
+                              31.5306549072265625,
+                              30.5255126953125,
+                              62.470378875732421875,
+                              62.4058837890625,
+                              62.2131195068359375,
+                              61.8940887451171875,
+                              61.4521331787109375,
+                              60.89176177978515625,
+                              60.218593597412109375,
+                              59.439167022705078125,
+                              58.560794830322265625,
+                              57.59142303466796875,
+                              56.53945159912109375,
+                              55.413524627685546875,
+                              54.22241973876953125,
+                              52.97493743896484375,
+                              51.679691314697265625,
+                              50.345058441162109375,
+                              48.979061126708984375,
+                              47.589336395263671875,
+                              46.18306732177734375,
+                              44.766880035400390625,
+                              43.34696197509765625,
+                              41.92890167236328125,
+                              40.517787933349609375,
+                              39.118167877197265625,
+                              37.734134674072265625,
+                              36.3692169189453125,
+                              35.026554107666015625,
+                              33.708812713623046875,
+                              32.66172027587890625,
+                              31.6664142608642578125,
+                              30.6867351531982421875,
+                              29.723903656005859375,
+                              59.2027587890625,
+                              59.143909454345703125,
+                              58.967945098876953125,
+                              58.6766357421875,
+                              58.2728424072265625,
+                              57.760517120361328125,
+                              57.14452362060546875,
+                              56.430583953857421875,
+                              55.625087738037109375,
+                              54.7350311279296875,
+                              53.767810821533203125,
+                              52.731090545654296875,
+                              51.632717132568359375,
+                              50.480510711669921875,
+                              49.28223419189453125,
+                              48.045475006103515625,
+                              46.77752685546875,
+                              45.485385894775390625,
+                              44.175624847412109375,
+                              42.854427337646484375,
+                              41.52751922607421875,
+                              40.20015716552734375,
+                              38.877155303955078125,
+                              37.562847137451171875,
+                              36.26111602783203125,
+                              34.975429534912109375,
+                              33.708812713623046875,
+                              32.697551727294921875,
+                              31.7346553802490234375,
+                              30.7841281890869140625,
+                              29.8474140167236328125,
+                              28.925754547119140625,
+                              56.10607147216796875,
+                              56.05228424072265625,
+                              55.891448974609375,
+                              55.625087738037109375,
+                              55.255706787109375,
+                              54.786739349365234375,
+                              54.22241973876953125,
+                              53.567768096923828125,
+                              52.82842254638671875,
+                              52.010478973388671875,
+                              51.120525360107421875,
+                              50.165355682373046875,
+                              49.151935577392578125,
+                              48.087291717529296875,
+                              46.97840118408203125,
+                              45.832118988037109375,
+                              44.6550750732421875,
+                              43.453662872314453125,
+                              42.233917236328125,
+                              41.001552581787109375,
+                              39.761898040771484375,
+                              38.519870758056640625,
+                              37.279994964599609375,
+                              36.04637908935546875,
+                              34.822742462158203125,
+                              33.612384796142578125,
+                              32.66172027587890625,
+                              31.7346553802490234375,
+                              30.81670379638671875,
+                              29.9094753265380859375,
+                              29.0143985748291015625,
+                              28.132732391357421875,
+                              53.17134857177734375,
+                              53.12213897705078125,
+                              52.97493743896484375,
+                              52.731090545654296875,
+                              52.392795562744140625,
+                              51.96302032470703125,
+                              51.44549560546875,
+                              50.844631195068359375,
+                              50.165355682373046875,
+                              49.41309356689453125,
+                              48.59365081787109375,
+                              47.71305084228515625,
+                              46.77752685546875,
+                              45.7933807373046875,
+                              44.766880035400390625,
+                              43.7042236328125,
+                              42.611438751220703125,
+                              41.49433135986328125,
+                              40.358489990234375,
+                              39.2091522216796875,
+                              38.05126190185546875,
+                              36.88941192626953125,
+                              35.72786712646484375,
+                              34.570468902587890625,
+                              33.446636199951171875,
+                              32.55457305908203125,
+                              31.6664142608642578125,
+                              30.7841281890869140625,
+                              29.9094753265380859375,
+                              29.0440425872802734375,
+                              28.189243316650390625,
+                              27.346340179443359375,
+                              50.390140533447265625,
+                              50.345058441162109375,
+                              50.21018218994140625,
+                              49.9866943359375,
+                              49.676509857177734375,
+                              49.28223419189453125,
+                              48.807163238525390625,
+                              48.255126953125,
+                              47.630523681640625,
+                              46.93811798095703125,
+                              46.18306732177734375,
+                              45.3707275390625,
+                              44.50667572021484375,
+                              43.59656524658203125,
+                              42.64601898193359375,
+                              41.660648345947265625,
+                              40.645923614501953125,
+                              39.607158660888671875,
+                              38.54946136474609375,
+                              37.477649688720703125,
+                              36.396320343017578125,
+                              35.309741973876953125,
+                              34.221893310546875,
+                              33.22452545166015625,
+                              32.3772735595703125,
+                              31.5306549072265625,
+                              30.6867351531982421875,
+                              29.8474140167236328125,
+                              29.0143985748291015625,
+                              28.189243316650390625,
+                              27.3733463287353515625,
+                              26.56793975830078125,
+                              47.75440216064453125,
+                              47.71305084228515625,
+                              47.589336395263671875,
+                              47.384288787841796875,
+                              47.09958648681640625,
+                              46.73752593994140625,
+                              46.300994873046875,
+                              45.7933807373046875,
+                              45.21855926513671875,
+                              44.580780029296875,
+                              43.884586334228515625,
+                              43.134796142578125,
+                              42.336353302001953125,
+                              41.49433135986328125,
+                              40.613834381103515625,
+                              39.699886322021484375,
+                              38.757503509521484375,
+                              37.7914886474609375,
+                              36.8065338134765625,
+                              35.807086944580078125,
+                              34.797382354736328125,
+                              33.78139495849609375,
+                              32.9320831298828125,
+                              32.131664276123046875,
+                              31.328754425048828125,
+                              30.5255126953125,
+                              29.723903656005859375,
+                              28.925754547119140625,
+                              28.132732391357421875,
+                              27.346340179443359375,
+                              26.56793975830078125,
+                              25.7987575531005859375};
+
+static float dequant_table[] = {
+    0.00031746,  0.00031746,  0.000318547, 0.000377554, 0.000447491, 0.000530384, 0.000628631, 0.000745078, 0.00031746,
+    0.00031746,  0.000331586, 0.000388111, 0.000456952, 0.000539384, 0.000637535, 0.000754131, 0.000318547, 0.000331586,
+    0.000366704, 0.000418477, 0.000484876, 0.000566262, 0.000664278, 0.000781404, 0.000377554, 0.000388111, 0.000418477,
+    0.000466329, 0.000530384, 0.000610828, 0.000709031, 0.000827274, 0.000447491, 0.000456952, 0.000484876, 0.000530384,
+    0.000593021, 0.000673207, 0.000772293, 0.000942868, 0.000530384, 0.000539384, 0.000566262, 0.000610828, 0.000673207,
+    0.000754131, 0.000855072, 0.00127232,  0.000628631, 0.000637535, 0.000664278, 0.000709031, 0.000772293, 0.000855072,
+    0.00119232,  0.00179199,  0.000745078, 0.000754131, 0.000781404, 0.000827274, 0.000942868, 0.00127232,  0.00179199,
+    0.00261332,  0.00178571,  0.00178571,  0.00179048,  0.00204418,  0.00233383,  0.00266452,  0.00304207,  0.00347311,
+    0.00178571,  0.00178571,  0.00184737,  0.00208861,  0.00237221,  0.00269971,  0.00307561,  0.00350598,  0.00179048,
+    0.00184737,  0.00199823,  0.00221497,  0.00248451,  0.00280405,  0.00317576,  0.00360445,  0.00204418,  0.00208861,
+    0.00221497,  0.00241009,  0.00266452,  0.00297468,  0.00334138,  0.0037684,   0.00233383,  0.00237221,  0.00248451,
+    0.00266452,  0.00290684,  0.00320899,  0.00357164,  0.00399808,  0.00266452,  0.00269971,  0.00280405,  0.00297468,
+    0.00320899,  0.00350598,  0.00386677,  0.0042947,   0.00304207,  0.00307561,  0.00317576,  0.00334138,  0.00357164,
+    0.00386677,  0.0042286,   0.00466073,  0.00347311,  0.00350598,  0.00360445,  0.0037684,   0.00399808,  0.0042947,
+    0.00466073,  0.00510017,  0.00195312,  0.00340183,  0.00590075,  0.00837433,  0.0117188,   0.0117188,   0.0119688,
+    0.016986,    0.00340183,  0.00428086,  0.00640914,  0.00886387,  0.0117188,   0.0117188,   0.0123206,   0.017414,
+    0.00590075,  0.00640914,  0.00788613,  0.0103519,   0.0117188,   0.0117188,   0.013409,    0.0187362,   0.00837433,
+    0.00886387,  0.0103519,   0.0117188,   0.0117188,   0.0117188,   0.0153365,   0.0210725,   0.0117188,   0.0117188,
+    0.0117188,   0.0117188,   0.0117188,   0.0137829,   0.018289,    0.0253682,   0.0117188,   0.0117188,   0.0117188,
+    0.0117188,   0.0137829,   0.017414,    0.0225572,   0.0342323,   0.0119688,   0.0123206,   0.013409,    0.0153365,
+    0.018289,    0.0225572,   0.0320797,   0.0482141,   0.016986,    0.017414,    0.0187362,   0.0210725,   0.0253682,
+    0.0342323,   0.0482141,   0.0703122,   0.00357143,  0.000316456, 0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.000316456, 0.000316456, 0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.00357143,
+    0.00357143,  0.00357143,  0.00357143,  0.00357143,  0.0166667,   0.00115741,  0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.00115741,  0.00115741,  0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,
+    0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0166667,   0.0555556,   0.005,       0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.005,       0.005,       0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,
+    0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.0555556,   0.00033456,  0.000260417, 0.00078125,
+    0.00078125,  0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.000260417, 0.000390625, 0.00078125,  0.00078125,
+    0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00078125,  0.00078125,  0.0015625,   0.0015625,   0.00208333,
+    0.00208333,  0.00208333,  0.00208333,  0.00078125,  0.00078125,  0.0015625,   0.0015625,   0.00208333,  0.00208333,
+    0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00333333,  0.00333333,  0.00333333,
+    0.00333333,  0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00333333,  0.00333333,  0.00333333,  0.00333333,
+    0.00208333,  0.00208333,  0.00208333,  0.00208333,  0.00333333,  0.00333333,  0.00333333,  0.00333333,  0.00208333,
+    0.00208333,  0.00208333,  0.00208333,  0.00333333,  0.00333333,  0.00333333,  0.00333333,  0.00033456,  0.00104167,
+    0.003125,    0.003125,    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00104167,  0.0015625,   0.003125,
+    0.003125,    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.003125,    0.003125,    0.00555556,  0.00555556,
+    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.003125,    0.003125,    0.00555556,  0.00555556,  0.00714286,
+    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00833333,  0.00833333,
+    0.00833333,  0.00833333,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00833333,  0.00833333,  0.00833333,
+    0.00833333,  0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00833333,  0.00833333,  0.00833333,  0.00833333,
+    0.00714286,  0.00714286,  0.00714286,  0.00714286,  0.00833333,  0.00833333,  0.00833333,  0.00833333,  0.00033456,
+    0.0015625,   0.0078125,   0.0078125,   0.03125,     0.03125,     0.03125,     0.03125,     0.0015625,   0.003125,
+    0.0078125,   0.0078125,   0.03125,     0.03125,     0.03125,     0.03125,     0.0078125,   0.0078125,   0.015625,
+    0.015625,    0.03125,     0.03125,     0.03125,     0.03125,     0.0078125,   0.0078125,   0.015625,    0.015625,
+    0.03125,     0.03125,     0.03125,     0.03125,     0.03125,     0.03125,     0.03125,     0.03125,     0.0625,
+    0.0625,      0.0625,      0.0625,      0.03125,     0.03125,     0.03125,     0.03125,     0.0625,      0.0625,
+    0.0625,      0.0625,      0.03125,     0.03125,     0.03125,     0.03125,     0.0625,      0.0625,      0.0625,
+    0.0625,      0.03125,     0.03125,     0.03125,     0.03125,     0.0625,      0.0625,      0.0625,      0.0625,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545, 0.000454545,
+    0.000454545, 0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,  0.00255102,
+    0.00255102,  0.00255102,  0.00892857,  0.00892857,  0.0104546,   0.0104546,   0.0122415,   0.0122415,   0.0146543,
+    0.0146543,   0.00892857,  0.00892857,  0.0104546,   0.0104546,   0.0122415,   0.0122415,   0.0146543,   0.0146543,
+    0.0104546,   0.0104546,   0.0111607,   0.0111607,   0.012706,    0.012706,    0.0153522,   0.0153522,   0.0104546,
+    0.0104546,   0.0111607,   0.0111607,   0.012706,    0.012706,    0.0153522,   0.0153522,   0.0122415,   0.0122415,
+    0.012706,    0.012706,    0.0139509,   0.0139509,   0.0174327,   0.0174327,   0.0122415,   0.0122415,   0.012706,
+    0.012706,    0.0139509,   0.0139509,   0.0174327,   0.0174327,   0.0146543,   0.0146543,   0.0153522,   0.0153522,
+    0.0174327,   0.0174327,   0.0209263,   0.0209263,   0.0146543,   0.0146543,   0.0153522,   0.0153522,   0.0174327,
+    0.0174327,   0.0209263,   0.0209263,   0.00011115,  0.000140677, 0.000178049, 0.00022535,  0.000269504, 0.000301925,
+    0.000338246, 0.000378937, 0.000420348, 0.000465933, 0.000516461, 0.000580657, 0.000667287, 0.000766841, 0.000881248,
+    0.00105055,  0.000140677, 0.000155098, 0.000188232, 0.000234132, 0.000273299, 0.00030534,  0.000341441, 0.000382008,
+    0.000423051, 0.000468598, 0.00051912,  0.000584332, 0.000671158, 0.000770947, 0.000885629, 0.00105693,  0.000178049,
+    0.000188232, 0.000216422, 0.000257694, 0.000284353, 0.000315428, 0.000350949, 0.000390319, 0.00043114,  0.000476585,
+    0.0005271,   0.000595403, 0.000682824, 0.000783325, 0.000898839, 0.00107621,  0.00022535,  0.000234132, 0.000257694,
+    0.000277035, 0.000301925, 0.000331813, 0.000366582, 0.000404044, 0.000444564, 0.000489882, 0.000540413, 0.000614021,
+    0.000702452, 0.000804154, 0.000928012, 0.00110885,  0.000269504, 0.000273299, 0.000284353, 0.000301925, 0.000325315,
+    0.000354095, 0.000387556, 0.000423051, 0.000463267, 0.000508487, 0.000562406, 0.00064044,  0.000730322, 0.000833738,
+    0.000969823, 0.00115559,  0.000301925, 0.00030534,  0.000315428, 0.000331813, 0.000354095, 0.000382008, 0.000412216,
+    0.000447241, 0.000487224, 0.000532423, 0.000595403, 0.000675038, 0.000766841, 0.000872515, 0.00102528,  0.00121754,
+    0.000338246, 0.000341441, 0.000350949, 0.000366582, 0.000387556, 0.000412216, 0.000441884, 0.000476585, 0.000516461,
+    0.00056604,  0.00063664,  0.000718321, 0.000812556, 0.000928012, 0.00109572,  0.00129614,  0.000378937, 0.000382008,
+    0.000390319, 0.000404044, 0.000423051, 0.000447241, 0.000476585, 0.000511145, 0.000551548, 0.000614021, 0.000686732,
+    0.000770947, 0.000868165, 0.00100658,  0.00118286,  0.00139327,  0.000420348, 0.000423051, 0.00043114,  0.000444564,
+    0.000463267, 0.000487224, 0.000516461, 0.000551548, 0.000606549, 0.000671158, 0.000746456, 0.000833738, 0.000945791,
+    0.00110227,  0.00128886,  0.00151124,  0.000465933, 0.000468598, 0.000476585, 0.000489882, 0.000508487, 0.000532423,
+    0.00056604,  0.000614021, 0.000671158, 0.000738369, 0.000816772, 0.000910439, 0.00105055,  0.00121754,  0.00141635,
+    0.0016529,   0.000516461, 0.00051912,  0.0005271,   0.000540413, 0.000562406, 0.000595403, 0.00063664,  0.000686732,
+    0.000746456, 0.000816772, 0.000898839, 0.00102528,  0.00117601,  0.00135536,  0.00156853,  0.00188017,  0.000580657,
+    0.000584332, 0.000595403, 0.000614021, 0.00064044,  0.000675038, 0.000718321, 0.000770947, 0.000833738, 0.000910439,
+    0.00102528,  0.00116237,  0.00132553,  0.00151933,  0.00176938,  0.00219749,  0.000667287, 0.000671158, 0.000682824,
+    0.000702452, 0.000730322, 0.000766841, 0.000812556, 0.000868165, 0.000945791, 0.00105055,  0.00117601,  0.00132553,
+    0.00150317,  0.00171597,  0.00210276,  0.00259291,  0.000766841, 0.000770947, 0.000783325, 0.000804154, 0.000833738,
+    0.000872515, 0.000928012, 0.00100658,  0.00110227,  0.00121754,  0.00135536,  0.00151933,  0.00171597,  0.00207192,
+    0.00252031,  0.00308604,  0.000881248, 0.000885629, 0.000898839, 0.000928012, 0.000969823, 0.00102528,  0.00109572,
+    0.00118286,  0.00128886,  0.00141635,  0.00156853,  0.00176938,  0.00210276,  0.00252031,  0.00304399,  0.00370184,
+    0.00105055,  0.00105693,  0.00107621,  0.00110885,  0.00115559,  0.00121754,  0.00129614,  0.00139327,  0.00151124,
+    0.0016529,   0.00188017,  0.00219749,  0.00259291,  0.00308604,  0.00370184,  0.0044721,   0.000313334, 0.000362504,
+    0.000419391, 0.000485204, 0.000567019, 0.000670359, 0.000792533, 0.000936973, 0.00104528,  0.00116095,  0.00128941,
+    0.00142184,  0.0015506,   0.00169102,  0.00184416,  0.00199463,  0.000362504, 0.000385066, 0.000434073, 0.000496818,
+    0.000578827, 0.000681565, 0.000803591, 0.000948188, 0.00105214,  0.00116772,  0.00129618,  0.00142744,  0.0015562,
+    0.00169666,  0.00184987,  0.00199976,  0.000419391, 0.000434073, 0.00047322,  0.000530783, 0.000613658, 0.000715011,
+    0.000836789, 0.00096922,  0.00107265,  0.00118801,  0.00131649,  0.00144425,  0.00157301,  0.00171359,  0.00186702,
+    0.00201515,  0.000485204, 0.000496818, 0.000530783, 0.000590527, 0.000670359, 0.000770417, 0.0008923,   0.00100397,
+    0.0011067,   0.00122181,  0.00135039,  0.00147223,  0.00160105,  0.00174186,  0.00189247,  0.00204083,  0.000567019,
+    0.000578827, 0.000613658, 0.000670359, 0.000748285, 0.000847868, 0.000962226, 0.00105214,  0.00115418,  0.00126912,
+    0.00139381,  0.00151141,  0.00164036,  0.00178153,  0.00192816,  0.00207686,  0.000670359, 0.000681565, 0.000715011,
+    0.000770417, 0.000847868, 0.000948188, 0.00102467,  0.00111349,  0.00121505,  0.00133005,  0.00144425,  0.00156181,
+    0.00169102,  0.00183274,  0.00197415,  0.00212335,  0.000792533, 0.000803591, 0.000836789, 0.0008923,   0.000962226,
+    0.00102467,  0.0010999,   0.00118801,  0.00128941,  0.00139942,  0.00150581,  0.0016235,   0.00175318,  0.00189247,
+    0.00203055,  0.00218041,  0.000936973, 0.000948188, 0.00096922,  0.00100397,  0.00105214,  0.00111349,  0.00118801,
+    0.00127588,  0.00137698,  0.00147223,  0.00157862,  0.00169666,  0.00182704,  0.00195881,  0.0020975,   0.00224824,
+    0.00104528,  0.00105214,  0.00107265,  0.0011067,   0.00115418,  0.00121505,  0.00128941,  0.00137698,  0.00146104,
+    0.0015562,   0.00166286,  0.00178153,  0.00190776,  0.00203569,  0.00217521,  0.00232705,  0.00116095,  0.00116772,
+    0.00118801,  0.00122181,  0.00126912,  0.00133005,  0.00139942,  0.00147223,  0.0015562,   0.0016516,   0.00175885,
+    0.0018772,   0.00199463,  0.00212335,  0.00226395,  0.00241714,  0.00128941,  0.00129618,  0.00131649,  0.00135039,
+    0.00139381,  0.00144425,  0.00150581,  0.00157862,  0.00166286,  0.00175885,  0.00186702,  0.00197415,  0.00209234,
+    0.0022221,   0.00236404,  0.00253057,  0.00142184,  0.00142744,  0.00144425,  0.00147223,  0.00151141,  0.00156181,
+    0.0016235,   0.00169666,  0.00178153,  0.0018772,   0.00197415,  0.00208202,  0.00220124,  0.00233233,  0.00248006,
+    0.00266504,  0.0015506,   0.0015562,   0.00157301,  0.00160105,  0.00164036,  0.00169102,  0.00175318,  0.00182704,
+    0.00190776,  0.00199463,  0.00209234,  0.00220124,  0.00232178,  0.00245495,  0.00262634,  0.00281554,  0.00169102,
+    0.00169666,  0.00171359,  0.00174186,  0.00178153,  0.00183274,  0.00189247,  0.00195881,  0.00203569,  0.00212335,
+    0.0022221,   0.00233233,  0.00245495,  0.00261349,  0.00278912,  0.00298309,  0.00184416,  0.00184987,  0.00186702,
+    0.00189247,  0.00192816,  0.00197415,  0.00203055,  0.0020975,   0.00217521,  0.00226395,  0.00236404,  0.00248006,
+    0.00262634,  0.00278912,  0.00296953,  0.00316883,  0.00199463,  0.00199976,  0.00201515,  0.00204083,  0.00207686,
+    0.00212335,  0.00218041,  0.00224824,  0.00232705,  0.00241714,  0.00253057,  0.00266504,  0.00281554,  0.00298309,
+    0.00316883,  0.00337407,  0.000863928, 0.00118463,  0.00162439,  0.0022274,   0.00295919,  0.00379064,  0.00485571,
+    0.00622003,  0.00705056,  0.00791755,  0.00889116,  0.00992424,  0.0109744,   0.0121358,   0.01342,     0.016979,
+    0.00118463,  0.00135013,  0.00175008,  0.00234449,  0.00305079,  0.00388474,  0.00495625,  0.00633046,  0.00710165,
+    0.00796857,  0.00894274,  0.00996962,  0.0110205,   0.0121827,   0.0134682,   0.0171252,   0.00162439,  0.00175008,
+    0.00210996,  0.00268382,  0.00332618,  0.00416997,  0.00526206,  0.00648581,  0.00725479,  0.00812174,  0.00909774,
+    0.0101059,   0.0111587,   0.0123239,   0.0136133,   0.0175695,   0.0022274,   0.00234449,  0.00268382,  0.00314244,
+    0.00379064,  0.00465665,  0.00578646,  0.00674328,  0.00750972,  0.00837744,  0.00935698,  0.0103335,   0.0113898,
+    0.0125601,   0.0142433,   0.018329,    0.00295919,  0.00305079,  0.00332618,  0.00379064,  0.00446017,  0.00536543,
+    0.00643411,  0.00710165,  0.00786655,  0.00873667,  0.0096976,   0.0106533,   0.0117149,   0.0128927,   0.0151608,
+    0.019433,    0.00379064,  0.00388474,  0.00416997,  0.00465665,  0.00536543,  0.00633046,  0.0068971,   0.00756068,
+    0.00832625,  0.00920129,  0.0101059,   0.0110665,   0.0121358,   0.0133237,   0.0164033,   0.020925,    0.00485571,
+    0.00495625,  0.00526206,  0.00578646,  0.00643411,  0.0068971,   0.00745875,  0.00812174,  0.00889116,  0.00974289,
+    0.0106075,   0.0115753,   0.0126549,   0.0142433,   0.0180223,   0.022864,    0.00622003,  0.00633046,  0.00648581,
+    0.00674328,  0.00710165,  0.00756068,  0.00812174,  0.00878813,  0.00956181,  0.0103335,   0.0112048,   0.0121827,
+    0.0132756,   0.0159812,   0.0200859,   0.0253284,   0.00705056,  0.00710165,  0.00725479,  0.00750972,  0.00786655,
+    0.00832625,  0.00889116,  0.00956181,  0.0102424,   0.0110205,   0.0119015,   0.0128927,   0.0146314,   0.0181752,
+    0.0226824,   0.0284191,   0.00791755,  0.00796857,  0.00812174,  0.00837744,  0.00873667,  0.00920129,  0.00974289,
+    0.0103335,   0.0110205,   0.0118081,   0.0127024,   0.0138628,   0.016979,    0.020925,    0.0259248,   0.0322653,
+    0.00889116,  0.00894274,  0.00909774,  0.00935698,  0.0096976,   0.0101059,   0.0106075,   0.0112048,   0.0119015,
+    0.0127024,   0.0136133,   0.0164033,   0.0199212,   0.0243578,   0.0299571,   0.040354,    0.00992424,  0.00996962,
+    0.0101059,   0.0103335,   0.0106533,   0.0110665,   0.0115753,   0.0121827,   0.0128927,   0.0138628,   0.0164033,
+    0.0195947,   0.0236019,   0.028635,    0.0360671,   0.0538439,   0.0109744,   0.0110205,   0.0111587,   0.0113898,
+    0.0117149,   0.0121358,   0.0126549,   0.0132756,   0.0146314,   0.016979,    0.0199212,   0.0236019,   0.0282045,
+    0.0340795,   0.0496305,   0.0731186,   0.0121358,   0.0121827,   0.0123239,   0.0125601,   0.0128927,   0.0133237,
+    0.0142433,   0.0159812,   0.0181752,   0.020925,    0.0243578,   0.028635,    0.0340795,   0.0482929,   0.0693777,
+    0.100892,    0.01342,     0.0134682,   0.0136133,   0.0142433,   0.0151608,   0.0164033,   0.0180223,   0.0200859,
+    0.0226824,   0.0259248,   0.0299571,   0.0360671,   0.0496305,   0.0693777,   0.0983645,   0.141247,    0.016979,
+    0.0171252,   0.0175695,   0.018329,    0.019433,    0.020925,    0.022864,    0.0253284,   0.0284191,   0.0322653,
+    0.040354,    0.0538439,   0.0731186,   0.100892,    0.141247,    0.200352,    6.36197e-05, 7.12062e-05, 7.96975e-05,
+    8.92013e-05, 9.98385e-05, 0.000111744, 0.000125069, 0.000139614, 0.000155703, 0.000173646, 0.000193657, 0.000215974,
+    0.000240863, 0.000267796, 0.000296726, 0.000328782, 0.000364301, 0.000403657, 0.000447265, 0.000490497, 0.000517569,
+    0.000546135, 0.000576279, 0.000608085, 0.000641648, 0.000677063, 0.000721071, 0.000768327, 0.000818681, 0.000872335,
+    0.000929504, 0.000990421, 7.12062e-05, 7.46078e-05, 8.18455e-05, 9.08471e-05, 0.000101233, 0.000112998, 0.000126241,
+    0.000140701, 0.000156764, 0.000174699, 0.000194714, 0.000217046, 0.000241959, 0.000268853, 0.000297814, 0.000329907,
+    0.000365469, 0.000404876, 0.00044854,  0.00049119,  0.000518264, 0.000546834, 0.000576982, 0.000608796, 0.000642366,
+    0.00067779,  0.000721951, 0.000769231, 0.000819609, 0.000873289, 0.000930488, 0.000991435, 7.96975e-05, 8.18455e-05,
+    8.74937e-05, 9.5499e-05,  0.000105293, 0.0001167,   0.000129698, 0.000143945, 0.000159941, 0.000177855, 0.000197886,
+    0.000220265, 0.000245251, 0.000272031, 0.000301085, 0.00033329,  0.000368984, 0.000408541, 0.000452376, 0.000493271,
+    0.00052035,  0.000548931, 0.000579094, 0.000610927, 0.000644522, 0.000680161, 0.000724595, 0.000771944, 0.000822397,
+    0.000876157, 0.000933442, 0.000994481, 8.92013e-05, 9.08471e-05, 9.5499e-05,  0.000102605, 0.000111744, 0.00012271,
+    0.000135241, 0.000149313, 0.000165221, 0.000183116, 0.000203185, 0.000225649, 0.000250764, 0.000277348, 0.00030656,
+    0.000338955, 0.000374871, 0.000414682, 0.000458803, 0.000496739, 0.000523828, 0.000552427, 0.000582617, 0.000614484,
+    0.000648119, 0.000684469, 0.00072901,  0.000776474, 0.000827052, 0.000880947, 0.000938375, 0.000999568, 9.98385e-05,
+    0.000101233, 0.000105293, 0.000111744, 0.000120327, 0.000130815, 0.000142866, 0.000156764, 0.000172594, 0.000190491,
+    0.000210631, 0.000233229, 0.000258328, 0.00028484,  0.000314279, 0.000346945, 0.000383176, 0.000423346, 0.000467872,
+    0.000501596, 0.0005287,   0.000557326, 0.000587554, 0.000619469, 0.000653162, 0.000690519, 0.000735209, 0.000782835,
+    0.000833588, 0.000887672, 0.000945302, 0.00100671,  0.000111744, 0.000112998, 0.0001167,   0.00012271,  0.000130815,
+    0.000140701, 0.000152514, 0.000166276, 0.000182063, 0.000200004, 0.000220265, 0.000243055, 0.000267796, 0.000294554,
+    0.000324296, 0.00035732,  0.000393964, 0.000434602, 0.000479655, 0.000507843, 0.00053497,  0.000563634, 0.000593912,
+    0.000625891, 0.00065966,  0.000698327, 0.00074321,  0.000791046, 0.000842026, 0.000896353, 0.000954244, 0.00101529,
+    0.000125069, 0.000126241, 0.000129698, 0.000135241, 0.000142866, 0.000152514, 0.000164166, 0.000177855, 0.000193657,
+    0.000211698, 0.000232143, 0.000255189, 0.000279483, 0.00030656,  0.000336685, 0.000370159, 0.000407318, 0.00044854,
+    0.000489803, 0.000515484, 0.000542644, 0.000571356, 0.0006017,   0.000633759, 0.000667622, 0.000707914, 0.000753037,
+    0.000801131, 0.00085239,  0.000907017, 0.000965229, 0.00102529,  0.000139614, 0.000140701, 0.000143945, 0.000149313,
+    0.000156764, 0.000166276, 0.000177855, 0.000191546, 0.000207435, 0.000225649, 0.000246351, 0.000268853, 0.00029347,
+    0.000320945, 0.000351542, 0.000385563, 0.000423346, 0.000465272, 0.00049882,  0.000524524, 0.000551727, 0.000580503,
+    0.000610927, 0.000643085, 0.000677063, 0.000719311, 0.000764718, 0.000813121, 0.000864712, 0.000919695, 0.000978289,
+    0.00103716,  0.000155703, 0.000156764, 0.000159941, 0.000165221, 0.000172594, 0.000182063, 0.000193657, 0.000207435,
+    0.000223492, 0.000241959, 0.000262526, 0.00028484,  0.00030986,  0.00033782,  0.000368984, 0.000403657, 0.00044218,
+    0.000484936, 0.000509231, 0.00053497,  0.000562231, 0.000591085, 0.000621608, 0.000653883, 0.000689654, 0.00073255,
+    0.000778289, 0.000827052, 0.000879029, 0.000934428, 0.000993465, 0.00105093,  0.000173646, 0.000174699, 0.000177855,
+    0.000183116, 0.000190491, 0.000200004, 0.000211698, 0.000225649, 0.000241959, 0.000260425, 0.000280552, 0.000303271,
+    0.000328782, 0.00035732,  0.000389154, 0.00042459,  0.000463975, 0.000496739, 0.000521046, 0.000546834, 0.000574168,
+    0.000603118, 0.000633759, 0.000666173, 0.000704422, 0.00074767,  0.000793791, 0.000842966, 0.000895386, 0.000951259,
+    0.00101076,  0.00106664,  0.000193657, 0.000194714, 0.000197886, 0.000203185, 0.000210631, 0.000220265, 0.000232143,
+    0.000246351, 0.000262526, 0.000280552, 0.000301085, 0.000324296, 0.000350391, 0.000379608, 0.000412221, 0.00044854,
+    0.00048703,  0.000509926, 0.000534273, 0.000560129, 0.000587554, 0.000616619, 0.000647399, 0.000680161, 0.000721071,
+    0.000764718, 0.000811271, 0.000860912, 0.000913833, 0.000970242, 0.00102802,  0.00108432,  0.000215974, 0.000217046,
+    0.000220265, 0.000225649, 0.000233229, 0.000243055, 0.000255189, 0.000268853, 0.00028484,  0.000303271, 0.000324296,
+    0.000348092, 0.000374871, 0.000404876, 0.000438384, 0.000475711, 0.000501595, 0.000524524, 0.000548931, 0.000574872,
+    0.000602409, 0.000631611, 0.000662552, 0.000698327, 0.00073965,  0.000783746, 0.000830784, 0.000880947, 0.000934428,
+    0.000991435, 0.00104725,  0.00110401,  0.000240863, 0.000241959, 0.000245251, 0.000250764, 0.000258328, 0.000267796,
+    0.000279483, 0.00029347,  0.00030986,  0.000328782, 0.000350391, 0.000374871, 0.00040244,  0.000433345, 0.000467872,
+    0.000496045, 0.000517569, 0.00054055,  0.000565037, 0.000591085, 0.000618757, 0.000648119, 0.0006793,   0.000718432,
+    0.000760216, 0.000804812, 0.00085239,  0.000903132, 0.000957234, 0.00101438,  0.0010685,   0.00112577,  0.000267796,
+    0.000268853, 0.000272031, 0.000277348, 0.00028484,  0.000294554, 0.00030656,  0.000320945, 0.00033782,  0.00035732,
+    0.000379608, 0.000404876, 0.000433345, 0.000465272, 0.000493271, 0.000513399, 0.00053497,  0.000558027, 0.000582617,
+    0.000608796, 0.000636625, 0.000666173, 0.000700936, 0.000740539, 0.000782835, 0.000827984, 0.000876157, 0.000927539,
+    0.000982325, 0.00103716,  0.0010918,   0.00114964,  0.000296726, 0.000297814, 0.000301085, 0.00030656,  0.000314279,
+    0.000324296, 0.000336685, 0.000351542, 0.000368984, 0.000389154, 0.000412221, 0.000438384, 0.000467872, 0.000493271,
+    0.00051201,  0.000532183, 0.000553826, 0.000576982, 0.0006017,   0.000628035, 0.000656048, 0.00068706,  0.000724595,
+    0.000764718, 0.000807578, 0.000853335, 0.000902162, 0.000954244, 0.00100978,  0.00106201,  0.00111723,  0.00117568,
+    0.000328782, 0.000329907, 0.00033329,  0.000338955, 0.000346945, 0.00035732,  0.000370159, 0.000385563, 0.000403657,
+    0.00042459,  0.00044854,  0.000475711, 0.000496045, 0.000513399, 0.000532183, 0.000552427, 0.000574168, 0.00059745,
+    0.000622322, 0.000648839, 0.000677063, 0.000712289, 0.000750352, 0.000791046, 0.000834524, 0.000880947, 0.000930488,
+    0.000983335, 0.00103625,  0.00108899,  0.00114484,  0.00120396,  0.000364301, 0.000365469, 0.000368984, 0.000374871,
+    0.000383176, 0.000393964, 0.000407318, 0.000423346, 0.00044218,  0.000463975, 0.00048703,  0.000501595, 0.000517569,
+    0.00053497,  0.000553826, 0.000574168, 0.000596035, 0.000619469, 0.000644522, 0.000671249, 0.00070355,  0.00073965,
+    0.000778289, 0.000819609, 0.000863761, 0.000910909, 0.000961228, 0.00101438,  0.00106479,  0.00111818,  0.00117471,
+    0.00123455,  0.000403657, 0.000404876, 0.000408541, 0.000414682, 0.000423346, 0.000434602, 0.00044854,  0.000465272,
+    0.000484936, 0.000496739, 0.000509926, 0.000524524, 0.00054055,  0.000558027, 0.000576982, 0.00059745,  0.000619469,
+    0.000643085, 0.000668347, 0.000698327, 0.00073255,  0.000769231, 0.000808501, 0.000850501, 0.000895386, 0.000943321,
+    0.000994481, 0.0010445,   0.00109555,  0.00114964,  0.0012069,   0.00126753,  0.000447265, 0.00044854,  0.000452376,
+    0.000458803, 0.000467872, 0.000479655, 0.000489803, 0.00049882,  0.000509231, 0.000521046, 0.000534273, 0.000548931,
+    0.000565037, 0.000582617, 0.0006017,   0.000622322, 0.000644522, 0.000668347, 0.000696588, 0.00072901,  0.000763817,
+    0.000801131, 0.000841086, 0.000883826, 0.000929504, 0.000978289, 0.00102802,  0.00107686,  0.00112862,  0.00118345,
+    0.00124151,  0.00130297,  0.000490497, 0.00049119,  0.000493271, 0.000496739, 0.000501596, 0.000507843, 0.000515484,
+    0.000524524, 0.00053497,  0.000546834, 0.000560129, 0.000574872, 0.000591085, 0.000608796, 0.000628035, 0.000648839,
+    0.000671249, 0.000698327, 0.00072901,  0.000762016, 0.000797458, 0.00083546,  0.000876157, 0.000919695, 0.000966231,
+    0.00101529,  0.00106201,  0.00111156,  0.00116407,  0.0012197,   0.00127861,  0.00134098,  0.000517569, 0.000518264,
+    0.00052035,  0.000523828, 0.0005287,   0.00053497,  0.000542644, 0.000551727, 0.000562231, 0.000574168, 0.000587554,
+    0.000602409, 0.000618757, 0.000636625, 0.000656048, 0.000677063, 0.00070355,  0.00073255,  0.000763817, 0.000797458,
+    0.000833588, 0.000872335, 0.000913833, 0.000958232, 0.00100569,  0.00105093,  0.00109837,  0.00114868,  0.001202,
+    0.00125849,  0.00131831,  0.00138164,  0.000546135, 0.000546834, 0.000548931, 0.000552427, 0.000557326, 0.000563634,
+    0.000571356, 0.000580503, 0.000591085, 0.000603118, 0.000616619, 0.000631611, 0.000648119, 0.000666173, 0.00068706,
+    0.000712289, 0.00073965,  0.000769231, 0.000801131, 0.00083546,  0.000872335, 0.000911883, 0.000954244, 0.000999568,
+    0.00104358,  0.00108899,  0.00113719,  0.00118832,  0.0012425,   0.00129991,  0.00136071,  0.00142507,  0.000576279,
+    0.000576982, 0.000579094, 0.000582617, 0.000587554, 0.000593912, 0.0006017,   0.000610927, 0.000621608, 0.000633759,
+    0.000647399, 0.000662552, 0.0006793,   0.000700936, 0.000724595, 0.000750352, 0.000778289, 0.000808501, 0.000841086,
+    0.000876157, 0.000913833, 0.000954244, 0.000997532, 0.00103991,  0.00108339,  0.00112957,  0.00117859,  0.00123058,
+    0.00128569,  0.00134408,  0.00140592,  0.00145992,  0.000608085, 0.000608796, 0.000610927, 0.000614484, 0.000619469,
+    0.000625891, 0.000633759, 0.000643085, 0.000653883, 0.000666173, 0.000680161, 0.000698327, 0.000718432, 0.000740539,
+    0.000764718, 0.000791046, 0.000819609, 0.000850501, 0.000883826, 0.000919695, 0.000958232, 0.000999568, 0.00103991,
+    0.00108152,  0.00112577,  0.00117277,  0.00122266,  0.00127559,  0.00133168,  0.00139111,  0.00144821,  0.00149301,
+    0.000641648, 0.000642366, 0.000644522, 0.000648119, 0.000653162, 0.00065966,  0.000667622, 0.000677063, 0.000689654,
+    0.000704422, 0.000721071, 0.00073965,  0.000760216, 0.000782835, 0.000807578, 0.000834524, 0.000863761, 0.000895386,
+    0.000929504, 0.000966231, 0.00100569,  0.00104358,  0.00108339,  0.00112577,  0.00117084,  0.00121872,  0.00126954,
+    0.00132345,  0.00138059,  0.00143944,  0.00148269,  0.00152783,  0.000677063, 0.00067779,  0.000680161, 0.000684469,
+    0.000690519, 0.000698327, 0.000707914, 0.000719311, 0.00073255,  0.00074767,  0.000764718, 0.000783746, 0.000804812,
+    0.000827984, 0.000853335, 0.000880947, 0.000910909, 0.000943321, 0.000978289, 0.00101529,  0.00105093,  0.00108899,
+    0.00112957,  0.00117277,  0.00121872,  0.00126753,  0.00131934,  0.0013743,   0.00143255,  0.00147533,  0.00151892,
+    0.00156441,  0.000721071, 0.000721951, 0.000724595, 0.00072901,  0.000735209, 0.00074321,  0.000753037, 0.000764718,
+    0.000778289, 0.000793791, 0.000811271, 0.000830784, 0.00085239,  0.000876157, 0.000902162, 0.000930488, 0.000961228,
+    0.000994481, 0.00102802,  0.00106201,  0.00109837,  0.00113719,  0.00117859,  0.00122266,  0.00126954,  0.00131934,
+    0.0013722,   0.00142827,  0.00147092,  0.00151298,  0.00155692,  0.0016028,   0.000768327, 0.000769231, 0.000771944,
+    0.000776474, 0.000782835, 0.000791046, 0.000801131, 0.000813121, 0.000827052, 0.000842966, 0.000860912, 0.000880947,
+    0.000903132, 0.000927539, 0.000954244, 0.000983335, 0.00101438,  0.0010445,   0.00107686,  0.00111156,  0.00114868,
+    0.00118832,  0.00123058,  0.00127559,  0.00132345,  0.0013743,   0.00142827,  0.00146945,  0.00151002,  0.00155244,
+    0.00159676,  0.00164305,  0.000818681, 0.000819609, 0.000822397, 0.000827052, 0.000833588, 0.000842026, 0.00085239,
+    0.000864712, 0.000879029, 0.000895386, 0.000913833, 0.000934428, 0.000957234, 0.000982325, 0.00100978,  0.00103625,
+    0.00106479,  0.00109555,  0.00112862,  0.00116407,  0.001202,    0.0012425,   0.00128569,  0.00133168,  0.00138059,
+    0.00143255,  0.00147092,  0.00151002,  0.00155094,  0.00159374,  0.00163847,  0.00168519,  0.000872335, 0.000873289,
+    0.000876157, 0.000880947, 0.000887672, 0.000896353, 0.000907017, 0.000919695, 0.000934428, 0.000951259, 0.000970242,
+    0.000991435, 0.00101438,  0.00103716,  0.00106201,  0.00108899,  0.00111818,  0.00114964,  0.00118345,  0.0012197,
+    0.00125849,  0.00129991,  0.00134408,  0.00139111,  0.00143944,  0.00147533,  0.00151298,  0.00155244,  0.00159374,
+    0.00163695,  0.00168211,  0.00172928,  0.000929504, 0.000930488, 0.000933442, 0.000938375, 0.000945302, 0.000954244,
+    0.000965229, 0.000978289, 0.000993465, 0.00101076,  0.00102802,  0.00104725,  0.0010685,   0.0010918,   0.00111723,
+    0.00114484,  0.00117471,  0.0012069,   0.00124151,  0.00127861,  0.00131831,  0.00136071,  0.00140592,  0.00144821,
+    0.00148269,  0.00151892,  0.00155692,  0.00159676,  0.00163847,  0.00168211,  0.00172772,  0.00177538,  0.000990421,
+    0.000991435, 0.000994481, 0.000999568, 0.00100671,  0.00101529,  0.00102529,  0.00103716,  0.00105093,  0.00106664,
+    0.00108432,  0.00110401,  0.00112577,  0.00114964,  0.00117568,  0.00120396,  0.00123455,  0.00126753,  0.00130297,
+    0.00134098,  0.00138164,  0.00142507,  0.00145992,  0.00149301,  0.00152783,  0.00156441,  0.0016028,   0.00164305,
+    0.00168519,  0.00172928,  0.00177538,  0.00182353,  0.000136878, 0.000150402, 0.000165263, 0.000181592, 0.000199534,
+    0.000219249, 0.000240912, 0.000264002, 0.000289027, 0.000316423, 0.000346417, 0.000379254, 0.000415204, 0.000450291,
+    0.000483257, 0.000518636, 0.000556606, 0.000597355, 0.000641088, 0.00068713,  0.000732919, 0.000781759, 0.000833854,
+    0.00088942,  0.000948689, 0.00101191,  0.00107258,  0.00113651,  0.00120424,  0.00127601,  0.00135205,  0.00143263,
+    0.000150402, 0.000156389, 0.00016898,  0.00018439,  0.000201862, 0.000221304, 0.000242798, 0.000265706, 0.000290661,
+    0.000318014, 0.000347985, 0.000380815, 0.000416771, 0.000451514, 0.000484476, 0.000519858, 0.000557835, 0.000598597,
+    0.000642346, 0.000688297, 0.000734101, 0.00078296,  0.000835077, 0.000890668, 0.000949965, 0.00101321,  0.00107378,
+    0.00113772,  0.00120548,  0.00127728,  0.00135336,  0.00143397,  0.000165263, 0.00016898,  0.00017868,  0.000192254,
+    0.000208611, 0.000227352, 0.000248337, 0.000270784, 0.000295543, 0.000322778, 0.000352686, 0.000385499, 0.000421474,
+    0.000455183, 0.000488134, 0.000523523, 0.000561524, 0.000602324, 0.000646125, 0.000691799, 0.000737651, 0.000786566,
+    0.000838749, 0.000894414, 0.000953794, 0.00101695,  0.00107736,  0.00114138,  0.00120922,  0.00128111,  0.00135728,
+    0.00143799,  0.000181592, 0.00018439,  0.000192254, 0.000204148, 0.000219249, 0.000237105, 0.000257117, 0.000279142,
+    0.000303622, 0.000330686, 0.00036051,  0.000393307, 0.000429325, 0.000461293, 0.000494231, 0.000529636, 0.000567679,
+    0.000608545, 0.000652433, 0.000697644, 0.000743575, 0.000792586, 0.000844879, 0.00090067,  0.000960189, 0.00102282,
+    0.00108335,  0.00114749,  0.00121546,  0.00128749,  0.00136382,  0.00144469,  0.000199534, 0.000201862, 0.000208611,
+    0.000219249, 0.000233247, 0.00025011,  0.000269098, 0.000290661, 0.00031483,  0.000341708, 0.000371447, 0.000404247,
+    0.000439264, 0.000469839, 0.000502768, 0.000538204, 0.000576312, 0.000617274, 0.000661288, 0.000705842, 0.000751888,
+    0.000801034, 0.000853484, 0.000909452, 0.000969168, 0.00103106,  0.00109175,  0.00115606,  0.00122422,  0.00129645,
+    0.00137299,  0.00145411,  0.000219249, 0.000221304, 0.000227352, 0.000237105, 0.00025011,  0.000265706, 0.000284102,
+    0.00030523,  0.000329107, 0.000355817, 0.000385499, 0.000418338, 0.000450291, 0.000480818, 0.000513751, 0.000549237,
+    0.000587438, 0.000628532, 0.000672714, 0.000716412, 0.000762608, 0.000811933, 0.000864586, 0.000920784, 0.000980757,
+    0.00104169,  0.00110257,  0.00116711,  0.00123551,  0.00130801,  0.00138483,  0.00146575,  0.000240912, 0.000242798,
+    0.000248337, 0.000257117, 0.000269098, 0.000284102, 0.000302012, 0.000322778, 0.000346417, 0.000373009, 0.000402683,
+    0.000435582, 0.000463735, 0.000494231, 0.000527191, 0.000562755, 0.000601081, 0.000642346, 0.000685964, 0.000729375,
+    0.000775761, 0.000825309, 0.000878217, 0.000934701, 0.000994989, 0.00105472,  0.00111586,  0.00118067,  0.00124938,
+    0.00132219,  0.00139936,  0.00147962,  0.000264002, 0.000265706, 0.000270784, 0.000279142, 0.000290661, 0.00030523,
+    0.000322778, 0.000343278, 0.000366761, 0.000393307, 0.000423044, 0.000451514, 0.000479598, 0.000510089, 0.000543105,
+    0.000578782, 0.000617274, 0.000658755, 0.000701155, 0.000744761, 0.000791381, 0.000841199, 0.000894414, 0.000951241,
+    0.00101191,  0.0010702,   0.00113164,  0.00119678,  0.00126584,  0.00133904,  0.00141662,  0.00149609,  0.000289027,
+    0.000290661, 0.000295543, 0.000303622, 0.00031483,  0.000329107, 0.000346417, 0.000366761, 0.000390183, 0.000416771,
+    0.000444168, 0.000469839, 0.00049789,  0.000528413, 0.000561524, 0.000597355, 0.000636059, 0.000677807, 0.000718765,
+    0.000762608, 0.000809507, 0.000859646, 0.000913224, 0.000970453, 0.00102988,  0.00108814,  0.00114994,  0.00121546,
+    0.00128493,  0.00135858,  0.00143665,  0.00151518,  0.000316423, 0.000318014, 0.000322778, 0.000330686, 0.000341708,
+    0.000355817, 0.000373009, 0.000393307, 0.000416771, 0.000441717, 0.000464956, 0.000490573, 0.000518636, 0.000549237,
+    0.000582489, 0.000618523, 0.00065749,  0.000697644, 0.000738835, 0.00078296,  0.000830189, 0.000880703, 0.000934701,
+    0.000992396, 0.00104998,  0.00110861,  0.00117081,  0.00123677,  0.00130672,  0.00138088,  0.00145946,  0.00153694,
+    0.000346417, 0.000347985, 0.000352686, 0.00036051,  0.000371447, 0.000385499, 0.000402683, 0.000423044, 0.000444168,
+    0.000464956, 0.000488134, 0.000513751, 0.000541879, 0.00057261,  0.000606055, 0.000642346, 0.000681302, 0.000719942,
+    0.000761415, 0.000805873, 0.000853484, 0.00090443,  0.000958909, 0.00101695,  0.00107258,  0.00113164,  0.0011943,
+    0.00126076,  0.00133125,  0.00140599,  0.00148341,  0.00156141,  0.000379254, 0.000380815, 0.000385499, 0.000393307,
+    0.000404247, 0.000418338, 0.000435582, 0.000451514, 0.000469839, 0.000490573, 0.000513751, 0.000539429, 0.000567679,
+    0.000598597, 0.000632293, 0.0006689,   0.000705842, 0.000744761, 0.000786566, 0.00083141,  0.00087946,  0.000930898,
+    0.000985923, 0.00104169,  0.00109776,  0.00115729,  0.00122046,  0.00128749,  0.00135858,  0.00143397,  0.00151008,
+    0.00158866,  0.000415204, 0.000416771, 0.000421474, 0.000429325, 0.000439264, 0.000450291, 0.000463735, 0.000479598,
+    0.00049789,  0.000518636, 0.000541879, 0.000567679, 0.000596114, 0.000627279, 0.000661288, 0.000696474, 0.000732919,
+    0.000772168, 0.00081436,  0.000859646, 0.000908195, 0.000960189, 0.00101577,  0.001069,    0.00112556,  0.00118562,
+    0.00124938,  0.00131703,  0.00138879,  0.00146449,  0.00153951,  0.00161874,  0.000450291, 0.000451514, 0.000455183,
+    0.000461293, 0.000469839, 0.000480818, 0.000494231, 0.000510089, 0.000528413, 0.000549237, 0.00057261,  0.000598597,
+    0.000627279, 0.000658755, 0.000691799, 0.000725835, 0.000762608, 0.000802243, 0.000844879, 0.000890668, 0.000939779,
+    0.000992396, 0.00104524,  0.00109896,  0.00115606,  0.00121671,  0.00128111,  0.00134945,  0.00142195,  0.00149609,
+    0.00157177,  0.00165171,  0.000483257, 0.000484476, 0.000488134, 0.000494231, 0.000502768, 0.000513751, 0.000527191,
+    0.000543105, 0.000561524, 0.000582489, 0.000606055, 0.000632293, 0.000661288, 0.000691799, 0.000723476, 0.000757839,
+    0.000794997, 0.000835077, 0.000878217, 0.000924573, 0.000974312, 0.00102635,  0.00107736,  0.00113164,  0.00118934,
+    0.00125064,  0.00131574,  0.00138483,  0.00145815,  0.00153052,  0.00160694,  0.00168766,  0.000518636, 0.000519858,
+    0.000523523, 0.000529636, 0.000538204, 0.000549237, 0.000562755, 0.000578782, 0.000597355, 0.000618523, 0.000642346,
+    0.0006689,   0.000696474, 0.000725835, 0.000757839, 0.000792586, 0.000830189, 0.000870774, 0.000914482, 0.00096147,
+    0.00101191,  0.00106066,  0.00111223,  0.00116711,  0.00122547,  0.00128749,  0.00135336,  0.00142328,  0.00149482,
+    0.00156788,  0.00164509,  0.00172666,  0.000556606, 0.000557835, 0.000561524, 0.000567679, 0.000576312, 0.000587438,
+    0.000601081, 0.000617274, 0.000636059, 0.00065749,  0.000681302, 0.000705842, 0.000732919, 0.000762608, 0.000794997,
+    0.000830189, 0.000868297, 0.000909452, 0.000953794, 0.00100148,  0.00104879,  0.00109776,  0.00114994,  0.00120548,
+    0.00126457,  0.00132737,  0.00139407,  0.00146449,  0.00153437,  0.00160825,  0.00168632,  0.00176881,  0.000597355,
+    0.000598597, 0.000602324, 0.000608545, 0.000617274, 0.000628532, 0.000642346, 0.000658755, 0.000677807, 0.000697644,
+    0.000719942, 0.000744761, 0.000772168, 0.000802243, 0.000835077, 0.000870774, 0.000909452, 0.000951241, 0.000996287,
+    0.00104169,  0.00108814,  0.00113772,  0.00119057,  0.00124685,  0.00130672,  0.00137037,  0.00143799,  0.00150626,
+    0.00157696,  0.00165171,  0.00173072,  0.0018142,   0.000641088, 0.000642346, 0.000646125, 0.000652433, 0.000661288,
+    0.000672714, 0.000685964, 0.000701155, 0.000718765, 0.000738835, 0.000761415, 0.000786566, 0.00081436,  0.000844879,
+    0.000878217, 0.000914482, 0.000953794, 0.000996287, 0.00103932,  0.00108335,  0.00113042,  0.00118067,  0.00123426,
+    0.00129133,  0.00135205,  0.00141662,  0.00148341,  0.00155109,  0.00162268,  0.00169838,  0.00177839,  0.00186294,
+    0.00068713,  0.000688297, 0.000691799, 0.000697644, 0.000705842, 0.000716412, 0.000729375, 0.000744761, 0.000762608,
+    0.00078296,  0.000805873, 0.00083141,  0.000859646, 0.000890668, 0.000924573, 0.00096147,  0.00100148,  0.00104169,
+    0.00108335,  0.00112799,  0.00117574,  0.00122673,  0.00128111,  0.00133904,  0.00140069,  0.00146575,  0.00153052,
+    0.00159909,  0.00167164,  0.00174836,  0.00182945,  0.00191514,  0.000732919, 0.000734101, 0.000737651, 0.000743575,
+    0.000751888, 0.000762608, 0.000775761, 0.000791381, 0.000809507, 0.000830189, 0.000853484, 0.00087946,  0.000908195,
+    0.000939779, 0.000974312, 0.00101191,  0.00104879,  0.00108814,  0.00113042,  0.00117574,  0.00122422,  0.00127601,
+    0.00133125,  0.00139011,  0.00145276,  0.00151518,  0.00158086,  0.00165039,  0.00172396,  0.00180177,  0.00188402,
+    0.00197093,  0.000781759, 0.00078296,  0.000786566, 0.000792586, 0.000801034, 0.000811933, 0.000825309, 0.000841199,
+    0.000859646, 0.000880703, 0.00090443,  0.000930898, 0.000960189, 0.000992396, 0.00102635,  0.00106066,  0.00109776,
+    0.00113772,  0.00118067,  0.00122673,  0.00127601,  0.00132866,  0.00138483,  0.00144469,  0.00150498,  0.00156788,
+    0.00163453,  0.0017051,   0.00177977,  0.00185874,  0.00194222,  0.00203043,  0.000833854, 0.000835077, 0.000838749,
+    0.000844879, 0.000853484, 0.000864586, 0.000878217, 0.000894414, 0.000913224, 0.000934701, 0.000958909, 0.000985923,
+    0.00101577,  0.00104524,  0.00107736,  0.00111223,  0.00114994,  0.00119057,  0.00123426,  0.00128111,  0.00133125,
+    0.00138483,  0.00144201,  0.0014999,   0.00156012,  0.00162399,  0.00169168,  0.00176335,  0.00183919,  0.0019194,
+    0.0020042,   0.00207984,  0.00088942,  0.000890668, 0.000894414, 0.00090067,  0.000909452, 0.000920784, 0.000934701,
+    0.000951241, 0.000970453, 0.000992396, 0.00101695,  0.00104169,  0.001069,    0.00109896,  0.00113164,  0.00116711,
+    0.00120548,  0.00124685,  0.00129133,  0.00133904,  0.00139011,  0.00144469,  0.0014999,   0.00155754,  0.00161874,
+    0.00168365,  0.00175244,  0.00182529,  0.00190238,  0.00198391,  0.00206298,  0.0021275,   0.000948689, 0.000949965,
+    0.000953794, 0.000960189, 0.000969168, 0.000980757, 0.000994989, 0.00101191,  0.00102988,  0.00104998,  0.00107258,
+    0.00109776,  0.00112556,  0.00115606,  0.00118934,  0.00122547,  0.00126457,  0.00130672,  0.00135205,  0.00140069,
+    0.00145276,  0.00150498,  0.00156012,  0.00161874,  0.00168098,  0.001747,    0.00181697,  0.00189107,  0.00196949,
+    0.00205037,  0.00211263,  0.00217764,  0.00101191,  0.00101321,  0.00101695,  0.00102282,  0.00103106,  0.00104169,
+    0.00105472,  0.0010702,   0.00108814,  0.00110861,  0.00113164,  0.00115729,  0.00118562,  0.00121671,  0.00125064,
+    0.00128749,  0.00132737,  0.00137037,  0.00141662,  0.00146575,  0.00151518,  0.00156788,  0.00162399,  0.00168365,
+    0.001747,    0.0018142,   0.00188543,  0.00196085,  0.00204068,  0.00210204,  0.0021648,   0.00223034,  0.00107258,
+    0.00107378,  0.00107736,  0.00108335,  0.00109175,  0.00110257,  0.00111586,  0.00113164,  0.00114994,  0.00117081,
+    0.0011943,   0.00122046,  0.00124938,  0.00128111,  0.00131574,  0.00135336,  0.00139407,  0.00143799,  0.00148341,
+    0.00153052,  0.00158086,  0.00163453,  0.00169168,  0.00175244,  0.00181697,  0.00188543,  0.00195798,  0.00203482,
+    0.00209569,  0.00215626,  0.00221955,  0.00228566,  0.00113651,  0.00113772,  0.00114138,  0.00114749,  0.00115606,
+    0.00116711,  0.00118067,  0.00119678,  0.00121546,  0.00123677,  0.00126076,  0.00128749,  0.00131703,  0.00134945,
+    0.00138483,  0.00142328,  0.00146449,  0.00150626,  0.00155109,  0.00159909,  0.00165039,  0.0017051,   0.00176335,
+    0.00182529,  0.00189107,  0.00196085,  0.00203482,  0.00209357,  0.00215199,  0.00221309,  0.00227695,  0.00234366,
+    0.00120424,  0.00120548,  0.00120922,  0.00121546,  0.00122422,  0.00123551,  0.00124938,  0.00126584,  0.00128493,
+    0.00130672,  0.00133125,  0.00135858,  0.00138879,  0.00142195,  0.00145815,  0.00149482,  0.00153437,  0.00157696,
+    0.00162268,  0.00167164,  0.00172396,  0.00177977,  0.00183919,  0.00190238,  0.00196949,  0.00204068,  0.00209569,
+    0.00215199,  0.00221093,  0.0022726,   0.00233707,  0.00240442,  0.00127601,  0.00127728,  0.00128111,  0.00128749,
+    0.00129645,  0.00130801,  0.00132219,  0.00133904,  0.00135858,  0.00138088,  0.00140599,  0.00143397,  0.00146449,
+    0.00149609,  0.00153052,  0.00156788,  0.00160825,  0.00165171,  0.00169838,  0.00174836,  0.00180177,  0.00185874,
+    0.0019194,   0.00198391,  0.00205037,  0.00210204,  0.00215626,  0.00221309,  0.0022726,   0.00233487,  0.00239998,
+    0.002468,    0.00135205,  0.00135336,  0.00135728,  0.00136382,  0.00137299,  0.00138483,  0.00139936,  0.00141662,
+    0.00143665,  0.00145946,  0.00148341,  0.00151008,  0.00153951,  0.00157177,  0.00160694,  0.00164509,  0.00168632,
+    0.00173072,  0.00177839,  0.00182945,  0.00188402,  0.00194222,  0.0020042,   0.00206298,  0.00211263,  0.0021648,
+    0.00221955,  0.00227695,  0.00233707,  0.00239998,  0.00246576,  0.0025345,   0.00143263,  0.00143397,  0.00143799,
+    0.00144469,  0.00145411,  0.00146575,  0.00147962,  0.00149609,  0.00151518,  0.00153694,  0.00156141,  0.00158866,
+    0.00161874,  0.00165171,  0.00168766,  0.00172666,  0.00176881,  0.0018142,   0.00186294,  0.00191514,  0.00197093,
+    0.00203043,  0.00207984,  0.0021275,   0.00217764,  0.00223034,  0.00228566,  0.00234366,  0.00240442,  0.002468,
+    0.0025345,   0.00260399,  0.000262914, 0.000328843, 0.000411305, 0.000514445, 0.000643449, 0.000804803, 0.00100662,
+    0.00121693,  0.00145344,  0.00173591,  0.00207328,  0.00247621,  0.00295745,  0.00353066,  0.00421288,  0.00502694,
+    0.00599829,  0.00715734,  0.00854036,  0.00996351,  0.0106852,   0.0114591,   0.0122891,   0.0131792,   0.0141338,
+    0.0151576,   0.0160076,   0.0168911,   0.0178234,   0.0188071,   0.0198452,   0.0209405,   0.000328843, 0.000360778,
+    0.000433614, 0.000533468, 0.00066142,  0.000822834, 0.00102543,  0.00123239,  0.0014696,   0.00175307,  0.00209172,
+    0.00249624,  0.00297938,  0.00355469,  0.00423952,  0.0050566,   0.00603147,  0.0071946,   0.00858234,  0.00998185,
+    0.0107039,   0.0114782,   0.0123087,   0.0131993,   0.0141544,   0.0151788,   0.0160241,   0.0169079,   0.0178405,
+    0.0188245,   0.0198629,   0.0209586,   0.000411305, 0.000433614, 0.00049507,  0.000589093, 0.000715145, 0.00087724,
+    0.00107937,  0.001279,    0.0015184,   0.00180493,  0.00214749,  0.0025568,   0.00304568,  0.00362736,  0.00432001,
+    0.00514622,  0.0061317,   0.00730714,  0.00870913,  0.0100369,   0.01076,     0.0115355,   0.0123673,   0.0132595,
+    0.0142163,   0.0152355,   0.0160738,   0.0169584,   0.0178918,   0.0188769,   0.0199163,   0.0210131,   0.000514445,
+    0.000533468, 0.000589093, 0.000679349, 0.000804803, 0.000969255, 0.00115547,  0.00135757,  0.00160087,  0.00189268,
+    0.00224191,  0.00265935,  0.00315794,  0.00375033,  0.00445619,  0.00529779,  0.00630115,  0.00749731,  0.0089233,
+    0.0101289,   0.0108537,   0.0116312,   0.0124653,   0.01336,     0.0143196,   0.0153172,   0.0161566,   0.0170426,
+    0.0179775,   0.0189641,   0.0200053,   0.021104,    0.000643449, 0.00066142,  0.000715145, 0.000804803, 0.000932223,
+    0.00109453,  0.00126342,  0.0014696,   0.00171881,  0.00201836,  0.00237724,  0.00280635,  0.00331844,  0.00392647,
+    0.00465117,  0.00551468,  0.00654347,  0.00776911,  0.00922921,  0.010258,    0.0109853,   0.0117657,   0.012603,
+    0.0135013,   0.0144648,   0.0154317,   0.0162728,   0.0171607,   0.0180977,   0.0190866,   0.0201302,   0.0212316,
+    0.000804803, 0.000822834, 0.00087724,  0.000969255, 0.00109453,  0.00123239,  0.00140527,  0.00161754,  0.00187499,
+    0.00218504,  0.0025568,   0.00300139,  0.00353066,  0.00415992,  0.00490939,  0.00580172,  0.00686392,  0.00812824,
+    0.00963311,  0.0104246,   0.0111551,   0.0119393,   0.0127808,   0.0136837,   0.0146524,   0.0155792,   0.0164226,
+    0.0173129,   0.0182526,   0.0192445,   0.0202913,   0.0213961,   0.00100662,  0.00102543,  0.00107937,  0.00115547,
+    0.00126342,  0.00140527,  0.00158426,  0.00180493,  0.00207328,  0.00239688,  0.0027851,   0.00324932,  0.00380018,
+    0.00445619,  0.00523683,  0.00616535,  0.00726949,  0.00858234,  0.00994518,  0.0106292,   0.0113639,   0.0121527,
+    0.0129994,   0.0139081,   0.014883,    0.0157601,   0.0166062,   0.0174995,   0.0184426,   0.019438,    0.0204888,
+    0.0215978,   0.00121693,  0.00123239,  0.001279,    0.00135757,  0.0014696,   0.00161754,  0.00180493,  0.0020366,
+    0.00231878,  0.00265935,  0.00306796,  0.00355469,  0.00413359,  0.00482236,  0.00564111,  0.00661381,  0.0077691,
+    0.00914108,  0.0101841,   0.0108724,   0.0116121,   0.0124065,   0.0132595,   0.014175,    0.0151576,   0.0159745,
+    0.0168239,   0.0177209,   0.0186679,   0.0196678,   0.0207232,   0.0218372,   0.00145344,  0.0014696,   0.0015184,
+    0.00160087,  0.00171881,  0.00187499,  0.00207328,  0.00231878,  0.00261809,  0.00297938,  0.00341185,  0.00392647,
+    0.00453913,  0.00526726,  0.0061317,   0.00715734,  0.00837384,  0.00981651,  0.0104617,   0.0111551,   0.0119007,
+    0.0127017,   0.013562,    0.0144856,   0.0154153,   0.016223,    0.0170763,   0.0179775,   0.0189292,   0.0199341,
+    0.0209949,   0.0221148,   0.00173591,  0.00175307,  0.00180493,  0.00189268,  0.00201836,  0.00218504,  0.00239688,
+    0.00265935,  0.00297938,  0.00336496,  0.00382525,  0.00437418,  0.00502694,  0.00580172,  0.00672025,  0.00780846,
+    0.00909724,  0.0101289,   0.0107787,   0.0114782,   0.0122306,   0.0130393,   0.0139081,   0.014841,    0.0156942,
+    0.016506,    0.0173637,   0.0182698,   0.0192269,   0.0202376,   0.0213046,   0.0224312,   0.00207328,  0.00209172,
+    0.00214749,  0.00224191,  0.00237724,  0.0025568,   0.0027851,   0.00306796,  0.00341185,  0.00382525,  0.00432001,
+    0.00490939,  0.00560933,  0.00643889,  0.00742085,  0.00858234,  0.00987194,  0.0104803,   0.0111362,   0.0118428,
+    0.012603,    0.0134205,   0.0142989,   0.0152355,   0.0160076,   0.0168239,   0.0176868,   0.0185985,   0.0195616,
+    0.0205788,   0.021653,    0.022787,    0.00247621,  0.00249624,  0.0025568,   0.00265935,  0.00280635,  0.00300139,
+    0.00324932,  0.00355469,  0.00392647,  0.00437418,  0.00490939,  0.00554612,  0.00630115,  0.0071946,   0.00825042,
+    0.00949714,  0.010258,    0.0108724,   0.0115355,   0.0122501,   0.0130193,   0.0138467,   0.0147361,   0.0155792,
+    0.016356,    0.0171775,   0.0180461,   0.0189641,   0.0199341,   0.0209586,   0.0220406,   0.0231831,   0.00295745,
+    0.00297938,  0.00304568,  0.00315794,  0.00331844,  0.00353066,  0.00380018,  0.00413359,  0.00453913,  0.00502694,
+    0.00560933,  0.00630115,  0.00712022,  0.00808779,  0.00922921,  0.0101105,   0.0106852,   0.0113068,   0.011978,
+    0.0127017,   0.0134811,   0.0143196,   0.0152192,   0.015958,    0.0167401,   0.0175675,   0.0184426,   0.0193676,
+    0.0203451,   0.0213778,   0.0224685,   0.0236204,   0.00353066,  0.00355469,  0.00362736,  0.00375033,  0.00392647,
+    0.00415992,  0.00445619,  0.00482236,  0.00526726,  0.00580172,  0.00643889,  0.0071946,   0.00808779,  0.00914108,
+    0.0100369,   0.0105733,   0.0111551,   0.0117849,   0.0124653,   0.0131993,   0.01399,     0.014841,    0.0156285,
+    0.0163726,   0.0171607,   0.0179947,   0.0188769,   0.0198096,   0.0207955,   0.0218372,   0.0229376,   0.0240997,
+    0.00421288,  0.00423952,  0.00432001,  0.00445619,  0.00465117,  0.00490939,  0.00523683,  0.00564111,  0.0061317,
+    0.00672025,  0.00742085,  0.00825042,  0.00922921,  0.0100369,   0.010536,    0.0110795,   0.0116696,   0.0123087,
+    0.0129994,   0.0137448,   0.0145481,   0.0153662,   0.0160738,   0.0168239,   0.0176186,   0.0184599,   0.01935,
+    0.0202913,   0.0212864,   0.0223379,   0.0234488,   0.0246222,   0.00502694,  0.0050566,   0.00514622,  0.00529779,
+    0.00551468,  0.00580172,  0.00616535,  0.00661381,  0.00715734,  0.00780846,  0.00858234,  0.00949714,  0.0101105,
+    0.0105733,   0.0110795,   0.0116312,   0.0122306,   0.01288,     0.0135823,   0.0143403,   0.0151576,   0.0158424,
+    0.016556,    0.0173129,   0.0181149,   0.0189641,   0.0198629,   0.0208136,   0.0218188,   0.0228811,   0.0240035,
+    0.025189,    0.00599829,  0.00603147,  0.0061317,   0.00630115,  0.00654347,  0.00686392,  0.00726949,  0.0077691,
+    0.00837384,  0.00909724,  0.00987194,  0.010258,    0.0106852,   0.0111551,   0.0116696,   0.0122306,   0.0128403,
+    0.0135013,   0.0142163,   0.0149884,   0.0156778,   0.016356,    0.0170763,   0.0178405,   0.0186506,   0.0195086,
+    0.0204169,   0.0213778,   0.0223939,   0.0234679,   0.0246027,   0.0258015,   0.00715734,  0.0071946,   0.00730714,
+    0.00749731,  0.00776911,  0.00812824,  0.00858234,  0.00914108,  0.00981651,  0.0101289,   0.0104803,   0.0108724,
+    0.0113068,   0.0117849,   0.0123087,   0.01288,     0.0135013,   0.014175,    0.0149041,   0.0155792,   0.016223,
+    0.0169079,   0.0176356,   0.018408,    0.0192269,   0.0200945,   0.0210131,   0.0219851,   0.023013,    0.0240997,
+    0.025248,    0.026461,    0.00854036,  0.00858234,  0.00870913,  0.0089233,   0.00922921,  0.00963311,  0.00994518,
+    0.0101841,   0.0104617,   0.0107787,   0.0111362,   0.0115355,   0.011978,    0.0124653,   0.0129994,   0.0135823,
+    0.0142163,   0.0149041,   0.0155464,   0.0161566,   0.0168071,   0.0174995,   0.0182354,   0.0190166,   0.0198452,
+    0.0207232,   0.021653,    0.0226369,   0.0236777,   0.0247779,   0.0259407,   0.0271691,   0.00996351,  0.00998185,
+    0.0100369,   0.0101289,   0.010258,    0.0104246,   0.0106292,   0.0108724,   0.0111551,   0.0114782,   0.0118428,
+    0.0122501,   0.0127017,   0.0131993,   0.0137448,   0.0143403,   0.0149884,   0.0155792,   0.0161566,   0.0167736,
+    0.0174315,   0.0181321,   0.0188769,   0.0196678,   0.0205068,   0.0213961,   0.0223379,   0.0233348,   0.0243893,
+    0.0255042,   0.0266826,   0.0279274,   0.0106852,   0.0107039,   0.01076,     0.0108537,   0.0109853,   0.0111551,
+    0.0113639,   0.0116121,   0.0119007,   0.0122306,   0.012603,    0.0130193,   0.0134811,   0.01399,     0.0145481,
+    0.0151576,   0.0156778,   0.016223,    0.0168071,   0.0174315,   0.0180977,   0.0188071,   0.0195616,   0.020363,
+    0.0212134,   0.0221148,   0.0230697,   0.0240804,   0.0251497,   0.0262803,   0.0274753,   0.0287378,   0.0114591,
+    0.0114782,   0.0115355,   0.0116312,   0.0117657,   0.0119393,   0.0121527,   0.0124065,   0.0127017,   0.0130393,
+    0.0134205,   0.0138467,   0.0143196,   0.014841,    0.0153662,   0.0158424,   0.016356,    0.0169079,   0.0174995,
+    0.0181321,   0.0188071,   0.0195263,   0.0202913,   0.021104,    0.0219666,   0.0228811,   0.0238499,   0.0248755,
+    0.0259606,   0.027108,    0.0283208,   0.0296021,   0.0122891,   0.0123087,   0.0123673,   0.0124653,   0.012603,
+    0.0127808,   0.0129994,   0.0132595,   0.013562,    0.0139081,   0.0142989,   0.0147361,   0.0152192,   0.0156285,
+    0.0160738,   0.016556,    0.0170763,   0.0176356,   0.0182354,   0.0188769,   0.0195616,   0.0202913,   0.0210677,
+    0.0218926,   0.0227683,   0.0236968,   0.0246805,   0.025722,    0.026824,    0.0279894,   0.0292211,   0.0303655,
+    0.0131792,   0.0131993,   0.0132595,   0.01336,     0.0135013,   0.0136837,   0.0139081,   0.014175,    0.0144856,
+    0.014841,    0.0152355,   0.0155792,   0.015958,    0.0163726,   0.0168239,   0.0173129,   0.0178405,   0.018408,
+    0.0190166,   0.0196678,   0.020363,    0.021104,    0.0218926,   0.0227307,   0.0236204,   0.0245639,   0.0255636,
+    0.026622,    0.027742,    0.0289264,   0.0300982,   0.0311219,   0.0141338,   0.0141544,   0.0142163,   0.0143196,
+    0.0144648,   0.0146524,   0.014883,    0.0151576,   0.0154153,   0.0156942,   0.0160076,   0.016356,    0.0167401,
+    0.0171607,   0.0176186,   0.0181149,   0.0186506,   0.0192269,   0.0198452,   0.0205068,   0.0212134,   0.0219666,
+    0.0227683,   0.0236204,   0.024525,    0.0254845,   0.0265012,   0.0275778,   0.0287169,   0.0298984,   0.0308859,
+    0.0319196,   0.0151576,   0.0151788,   0.0152355,   0.0153172,   0.0154317,   0.0155792,   0.0157601,   0.0159745,
+    0.016223,    0.016506,    0.0168239,   0.0171775,   0.0175675,   0.0179947,   0.0184599,   0.0189641,   0.0195086,
+    0.0200945,   0.0207232,   0.0213961,   0.0221148,   0.0228811,   0.0236968,   0.0245639,   0.0254845,   0.026461,
+    0.0274958,   0.0285915,   0.0297509,   0.0307177,   0.0317152,   0.0327595,   0.0160076,   0.0160241,   0.0160738,
+    0.0161566,   0.0162728,   0.0164226,   0.0166062,   0.0168239,   0.0170763,   0.0173637,   0.0176868,   0.0180461,
+    0.0184426,   0.0188769,   0.01935,     0.0198629,   0.0204169,   0.0210131,   0.021653,    0.0223379,   0.0230697,
+    0.0238499,   0.0246805,   0.0255636,   0.0265012,   0.0274958,   0.0285498,   0.0296658,   0.0306169,   0.0315792,
+    0.0325874,   0.033643,    0.0168911,   0.0169079,   0.0169584,   0.0170426,   0.0171607,   0.0173129,   0.0174995,
+    0.0177209,   0.0179775,   0.0182698,   0.0185985,   0.0189641,   0.0193676,   0.0198096,   0.0202913,   0.0208136,
+    0.0213778,   0.0219851,   0.0226369,   0.0233348,   0.0240804,   0.0248755,   0.025722,    0.026622,    0.0275778,
+    0.0285915,   0.0296658,   0.0305833,   0.0315113,   0.0324843,   0.0335037,   0.0345713,   0.0178234,   0.0178405,
+    0.0178918,   0.0179775,   0.0180977,   0.0182526,   0.0184426,   0.0186679,   0.0189292,   0.0192269,   0.0195616,
+    0.0199341,   0.0203451,   0.0207955,   0.0212864,   0.0218188,   0.0223939,   0.023013,    0.0236777,   0.0243893,
+    0.0251497,   0.0259606,   0.026824,    0.027742,    0.0287169,   0.0297509,   0.0306169,   0.0315113,   0.0324499,
+    0.0334342,   0.0344656,   0.0355458,   0.0188071,   0.0188245,   0.0188769,   0.0189641,   0.0190866,   0.0192445,
+    0.019438,    0.0196678,   0.0199341,   0.0202376,   0.0205788,   0.0209586,   0.0213778,   0.0218372,   0.0223379,
+    0.0228811,   0.0234679,   0.0240997,   0.0247779,   0.0255042,   0.0262803,   0.027108,    0.0279894,   0.0289264,
+    0.0298984,   0.0307177,   0.0315792,   0.0324843,   0.0334342,   0.0344305,   0.0354745,   0.036568,    0.0198452,
+    0.0198629,   0.0199163,   0.0200053,   0.0201302,   0.0202913,   0.0204888,   0.0207232,   0.0209949,   0.0213046,
+    0.021653,    0.0220406,   0.0224685,   0.0229376,   0.0234488,   0.0240035,   0.0246027,   0.025248,    0.0259407,
+    0.0266826,   0.0274753,   0.0283208,   0.0292211,   0.0300982,   0.0308859,   0.0317152,   0.0325874,   0.0335037,
+    0.0344656,   0.0354745,   0.0365319,   0.0376393,   0.0209405,   0.0209586,   0.0210131,   0.021104,    0.0212316,
+    0.0213961,   0.0215978,   0.0218372,   0.0221148,   0.0224312,   0.022787,    0.0231831,   0.0236204,   0.0240997,
+    0.0246222,   0.025189,    0.0258015,   0.026461,    0.0271691,   0.0279274,   0.0287378,   0.0296021,   0.0303655,
+    0.0311219,   0.0319196,   0.0327595,   0.033643,    0.0345713,   0.0355458,   0.036568,    0.0376393,   0.0387616,
+    0.000138107, 0.000160471, 0.000186456, 0.000216649, 0.000251732, 0.000292495, 0.00033986};
+
+static float inv_dequant_stable[] = {
+    0.000000,     3150.000000, 3139.258545, 2648.630371, 2234.681152,  1885.427490, 1590.758057, 1342.141724,
+    3150.000000,  3150.000000, 3015.809570, 2576.583984, 2188.415039,  1853.965576, 1568.540649, 1326.029297,
+    3139.258545,  3015.809570, 2726.995361, 2389.616455, 2062.382568,  1765.966431, 1505.393555, 1279.748535,
+    2648.630371,  2576.583984, 2389.616455, 2144.407471, 1885.427490,  1637.121094, 1410.374878, 1208.789673,
+    2234.681152,  2188.415039, 2062.382568, 1885.427490, 1686.281982,  1485.426636, 1294.845093, 1060.593384,
+    1885.427490,  1853.965576, 1765.966431, 1637.121094, 1485.426636,  1326.029297, 1169.492065, 785.962952,
+    1590.758057,  1568.540649, 1505.393555, 1410.374878, 1294.845093,  1169.492065, 838.701721,  558.037292,
+    1342.141724,  1326.029297, 1279.748535, 1208.789673, 1060.593384,  785.962952,  558.037292,  382.654694,
+    0.000000,     560.000000,  558.510437,  489.194183,  428.480621,   375.302246,  328.723816,  287.926147,
+    560.000000,   560.000000,  541.309387,  478.786804,  421.547455,   370.409943,  325.138336,  285.227325,
+    558.510437,   541.309387,  500.443756,  451.472992,  402.494324,   356.627594,  314.885712,  277.434692,
+    489.194183,   478.786804,  451.472992,  414.922729,  375.302246,   336.170715,  299.277435,  265.364777,
+    428.480621,   421.547455,  402.494324,  375.302246,  344.016449,   311.624298,  279.983337,  250.119843,
+    375.302246,   370.409943,  356.627594,  336.170715,  311.624298,   285.227325,  258.613495,  232.845169,
+    328.723816,   325.138336,  314.885712,  299.277435,  279.983337,   258.613495,  236.484726,  214.558777,
+    287.926147,   285.227325,  277.434692,  265.364777,  250.119843,   232.845169,  214.558777,  196.071777,
+    0.000000,     293.959503,  169.469955,  119.412476,  85.333336,    85.333336,   83.550827,   58.871857,
+    293.959503,   233.598114,  156.027161,  112.817505,  85.333336,    85.333336,   81.164711,   57.425171,
+    169.469955,   156.027161,  126.804932,  96.600616,   85.333336,    85.333336,   74.576889,   53.372673,
+    119.412476,   112.817505,  96.600616,   85.333336,   85.333336,    85.333336,   65.203850,   47.455181,
+    85.333336,    85.333336,   85.333336,   85.333336,   85.333336,    72.553520,   54.677811,   39.419506,
+    85.333336,    85.333336,   85.333336,   85.333336,   72.553520,    57.425171,   44.331757,   29.212204,
+    83.550827,    81.164711,   74.576889,   65.203850,   54.677811,    44.331757,   31.172369,   20.740799,
+    58.871857,    57.425171,   53.372673,   47.455181,   39.419506,    29.212204,   20.740799,   14.222282,
+    0.000000,     3160.000000, 280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    3160.000000,  3160.000000, 280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    280.000000,   280.000000,  280.000000,  280.000000,  280.000000,   280.000000,  280.000000,  280.000000,
+    0.000000,     864.000000,  60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    864.000000,   864.000000,  60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    60.000000,    60.000000,   60.000000,   60.000000,   60.000000,    60.000000,   60.000000,   60.000000,
+    0.000000,     200.000000,  18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    200.000000,   200.000000,  18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    18.000000,    18.000000,   18.000000,   18.000000,   18.000000,    18.000000,   18.000000,   18.000000,
+    0.000000,     3840.000000, 1280.000000, 1280.000000, 480.000000,   480.000000,  480.000000,  480.000000,
+    3840.000000,  2560.000000, 1280.000000, 1280.000000, 480.000000,   480.000000,  480.000000,  480.000000,
+    1280.000000,  1280.000000, 640.000000,  640.000000,  480.000000,   480.000000,  480.000000,  480.000000,
+    1280.000000,  1280.000000, 640.000000,  640.000000,  480.000000,   480.000000,  480.000000,  480.000000,
+    480.000000,   480.000000,  480.000000,  480.000000,  300.000000,   300.000000,  300.000000,  300.000000,
+    480.000000,   480.000000,  480.000000,  480.000000,  300.000000,   300.000000,  300.000000,  300.000000,
+    480.000000,   480.000000,  480.000000,  480.000000,  300.000000,   300.000000,  300.000000,  300.000000,
+    480.000000,   480.000000,  480.000000,  480.000000,  300.000000,   300.000000,  300.000000,  300.000000,
+    0.000000,     960.000000,  320.000000,  320.000000,  140.000000,   140.000000,  140.000000,  140.000000,
+    960.000000,   640.000000,  320.000000,  320.000000,  140.000000,   140.000000,  140.000000,  140.000000,
+    320.000000,   320.000000,  180.000000,  180.000000,  140.000000,   140.000000,  140.000000,  140.000000,
+    320.000000,   320.000000,  180.000000,  180.000000,  140.000000,   140.000000,  140.000000,  140.000000,
+    140.000000,   140.000000,  140.000000,  140.000000,  120.000000,   120.000000,  120.000000,  120.000000,
+    140.000000,   140.000000,  140.000000,  140.000000,  120.000000,   120.000000,  120.000000,  120.000000,
+    140.000000,   140.000000,  140.000000,  140.000000,  120.000000,   120.000000,  120.000000,  120.000000,
+    140.000000,   140.000000,  140.000000,  140.000000,  120.000000,   120.000000,  120.000000,  120.000000,
+    0.000000,     640.000000,  128.000000,  128.000000,  32.000000,    32.000000,   32.000000,   32.000000,
+    640.000000,   320.000000,  128.000000,  128.000000,  32.000000,    32.000000,   32.000000,   32.000000,
+    128.000000,   128.000000,  64.000000,   64.000000,   32.000000,    32.000000,   32.000000,   32.000000,
+    128.000000,   128.000000,  64.000000,   64.000000,   32.000000,    32.000000,   32.000000,   32.000000,
+    32.000000,    32.000000,   32.000000,   32.000000,   16.000000,    16.000000,   16.000000,   16.000000,
+    32.000000,    32.000000,   32.000000,   32.000000,   16.000000,    16.000000,   16.000000,   16.000000,
+    32.000000,    32.000000,   32.000000,   32.000000,   16.000000,    16.000000,   16.000000,   16.000000,
+    32.000000,    32.000000,   32.000000,   32.000000,   16.000000,    16.000000,   16.000000,   16.000000,
+    0.000000,     2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    2200.000000,  2200.000000, 2200.000000, 2200.000000, 2200.000000,  2200.000000, 2200.000000, 2200.000000,
+    0.000000,     392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    392.000000,   392.000000,  392.000000,  392.000000,  392.000000,   392.000000,  392.000000,  392.000000,
+    0.000000,     112.000000,  95.651627,   95.651627,   81.689583,    81.689583,   68.239342,   68.239342,
+    112.000000,   112.000000,  95.651627,   95.651627,   81.689583,    81.689583,   68.239342,   68.239342,
+    95.651627,    95.651627,   89.600014,   89.600014,   78.702759,    78.702759,   65.137154,   65.137154,
+    95.651627,    95.651627,   89.600014,   89.600014,   78.702759,    78.702759,   65.137154,   65.137154,
+    81.689583,    81.689583,   78.702759,   78.702759,   71.680023,    71.680023,   57.363346,   57.363346,
+    81.689583,    81.689583,   78.702759,   78.702759,   71.680023,    71.680023,   57.363346,   57.363346,
+    68.239342,    68.239342,   65.137154,   65.137154,   57.363346,    57.363346,   47.786716,   47.786716,
+    68.239342,    68.239342,   65.137154,   65.137154,   57.363346,    57.363346,   47.786716,   47.786716,
+    0.000000,     0.000000,    5616.416016, 4437.547852, 3710.523682,  3312.083740, 2956.428467, 2638.963867,
+    2378.979736,  2146.230957, 1936.253296, 1722.186157, 1498.605713,  1304.051636, 1134.754883, 951.882080,
+    0.000000,     0.000000,    5312.582520, 4271.097168, 3658.995850,  3275.037109, 2928.764160, 2617.745361,
+    2363.779541,  2134.027100, 1926.335571, 1711.357300, 1489.962646,  1297.105591, 1129.140381, 946.136963,
+    5616.416016,  5312.582520, 4620.592773, 3880.564697, 3516.761230,  3170.294189, 2849.415527, 2562.006348,
+    2319.431641,  2098.261719, 1897.172852, 1679.534424, 1464.505249,  1276.608887, 1112.546509, 929.184143,
+    4437.547852,  4271.097168, 3880.564697, 3609.647705, 3312.083740,  3013.749512, 2727.902588, 2474.977295,
+    2249.396484,  2041.305786, 1850.436279, 1628.609985, 1423.584961,  1243.542969, 1077.572754, 901.837036,
+    3710.523682,  3658.995850, 3516.761230, 3312.083740, 3073.944824,  2824.097412, 2580.273682, 2363.779541,
+    2158.580811,  1966.619507, 1778.076416, 1561.425903, 1369.259766,  1199.417236, 1031.115479, 865.357239,
+    3312.083740,  3275.037109, 3170.294189, 3013.749512, 2824.097412,  2617.745117, 2425.913330, 2235.929932,
+    2052.443848,  1878.206177, 1679.534424, 1481.398804, 1304.051636,  1146.111572, 975.344788,  821.329834,
+    2956.428467,  2928.764160, 2849.415527, 2727.902588, 2580.273682,  2425.913330, 2263.037598, 2098.261719,
+    1936.253296,  1766.659668, 1570.745850, 1392.135254, 1230.684570,  1077.572754, 912.642517,  771.521240,
+    2638.963867,  2617.745361, 2562.006348, 2474.977295, 2363.779541,  2235.929932, 2098.261719, 1956.393188,
+    1813.078247,  1628.609985, 1456.172852, 1297.105591, 1151.854614,  993.464294,  845.405334,  717.737732,
+    2378.979736,  2363.779541, 2319.431641, 2249.396484, 2158.580811,  2052.443848, 1936.253296, 1813.078247,
+    1648.672119,  1489.962646, 1339.664185, 1199.417480, 1057.315552,  907.217957,  775.878479,  661.709290,
+    2146.230957,  2134.027100, 2098.261719, 2041.305786, 1966.619507,  1878.206177, 1766.659668, 1628.609985,
+    1489.962646,  1354.335571, 1224.331787, 1098.371216, 951.882080,   821.329834,  706.041565,  604.996033,
+    1936.253296,  1926.335571, 1897.172852, 1850.436279, 1778.076416,  1679.534424, 1570.745850, 1456.172852,
+    1339.664185,  1224.331787, 1112.546143, 975.344482,  850.334167,   737.812134,  637.541504,  531.866638,
+    1722.186157,  1711.357300, 1679.534424, 1628.609985, 1561.425903,  1481.398804, 1392.135254, 1297.105591,
+    1199.417480,  1098.371216, 975.344482,  860.309998,  754.414917,   658.183533,  565.168762,  455.065186,
+    1498.605713,  1489.962646, 1464.505249, 1423.584961, 1369.259766,  1304.051636, 1230.684570, 1151.854614,
+    1057.315552,  951.882080,  850.334167,  754.414917,  665.260315,   582.761047,  475.564758,  385.666412,
+    1304.051636,  1297.105591, 1276.608887, 1243.542969, 1199.417236,  1146.111572, 1077.572754, 993.464294,
+    907.217957,   821.329834,  737.812134,  658.183533,  582.761047,   482.643036,  396.775940,  324.039429,
+    1134.754883,  1129.140381, 1112.546509, 1077.572754, 1031.115479,  975.344788,  912.642517,  845.405334,
+    775.878479,   706.041565,  637.541504,  565.168762,  475.564758,   396.775940,  328.516357,  270.136078,
+    951.882080,   946.136963,  929.184143,  901.837036,  865.357239,   821.329834,  771.521240,  717.737732,
+    661.709290,   604.996033,  531.866638,  455.065186,  385.666412,   324.039429,  270.136078,  223.608490,
+    0.000000,     0.000000,    2384.412598, 2060.989746, 1763.609009,  1491.737793, 1261.776978, 1067.266357,
+    956.677612,   861.364075,  775.546631,  703.312927,  644.910889,   591.358521,  542.252991,  501.345215,
+    0.000000,     0.000000,    2303.758789, 2012.809937, 1727.632080,  1467.211548, 1244.414307, 1054.643066,
+    950.447205,   856.371826,  771.497620,  700.552734,  642.589600,   589.392944,  540.578857,  500.060272,
+    2384.412598,  2303.758789, 2113.184082, 1884.007446, 1629.571411,  1398.579590, 1195.044922, 1031.757080,
+    932.273987,   841.744202,  759.593811,  692.403137,  635.722961,   583.569458,  535.612549,  496.242188,
+    2060.989746,  2012.809937, 1884.007446, 1693.401611, 1491.737793,  1297.998291, 1120.699707, 996.043396,
+    903.588257,   818.460022,  740.524109,  679.239563,  624.590454,   574.100037,  528.409058,  489.997620,
+    1763.609009,  1727.632080, 1629.571411, 1491.737793, 1336.388306,  1179.428345, 1039.256348, 950.447205,
+    866.416626,   787.946472,  717.456177,  661.633423,  609.623047,   561.314270,  518.629089,  481.495361,
+    1491.737793,  1467.211548, 1398.579590, 1297.998291, 1179.428345,  1054.642944, 975.919922,  898.074402,
+    823.012451,   751.853821,  692.403137,  640.284668,  591.358521,   545.629761,  506.546997,  470.954254,
+    1261.776978,  1244.414307, 1195.044922, 1120.699707, 1039.256348,  975.919922,  909.174133,  841.744202,
+    775.546631,   714.580872,  664.092590,  615.952393,  570.392151,   528.409058,  492.477875,  458.628601,
+    1067.266357,  1054.643066, 1031.757080, 996.043396,  950.447205,   898.074402,  841.744202,  783.770203,
+    726.228333,   679.239563,  633.465698,  589.392944,  547.332581,   510.515045,  476.757660,  444.792908,
+    956.677612,   950.447205,  932.273987,  903.588257,  866.416626,   823.012451,  775.546631,  726.228333,
+    684.443726,   642.589600,  601.375000,  561.314392,  524.175049,   491.234863,  459.724792,  429.728699,
+    861.364075,   856.371826,  841.744202,  818.460022,  787.946472,   751.853821,  714.580872,  679.239563,
+    642.589600,   605.472290,  568.554810,  532.708740,  501.345215,   470.954254,  441.705719,  413.711823,
+    775.546631,   771.497620,  759.593811,  740.524109,  717.456177,   692.403137,  664.092590,  633.465698,
+    601.375000,   568.554810,  535.612427,  506.546936,  477.933990,   450.024689,  423.003998,  395.167694,
+    703.312927,   700.552734,  692.403137,  679.239563,  661.633423,   640.284668,  615.952393,  589.392944,
+    561.314392,   532.708740,  506.546936,  480.302856,  454.290039,   428.756592,  403.216187,  375.228302,
+    644.910889,   642.589600,  635.722961,  624.590454,  609.623047,   591.358521,  570.392151,  547.332581,
+    524.175049,   501.345215,  477.933990,  454.290039,  430.704803,   407.340515,  380.757690,  355.171173,
+    591.358521,   589.392944,  583.569458,  574.100037,  561.314270,   545.629761,  528.409058,  510.515045,
+    491.234863,   470.954254,  450.024689,  428.756592,  407.340515,   382.629913,  358.535706,  335.223267,
+    542.252991,   540.578857,  535.612549,  528.409058,  518.629089,   506.546997,  492.477875,  476.757660,
+    459.724792,   441.705719,  423.003998,  403.216187,  380.757690,   358.535706,  336.753815,  315.574097,
+    501.345215,   500.060272,  496.242188,  489.997620,  481.495361,   470.954254,  458.628601,  444.792908,
+    429.728699,   413.711823,  395.167694,  375.228302,  355.171173,   335.223267,  315.574097,  296.378265,
+    0.000000,     0.000000,    615.613831,  448.953400,  337.930267,   263.807556,  205.943130,  160.770889,
+    141.832733,   126.301643,  112.471252,  100.763390,  91.120811,    82.400993,   74.515610,   58.896236,
+    0.000000,     0.000000,    571.402039,  426.532227,  327.784393,   257.417816,  201.765564,  157.966431,
+    140.812332,   125.492966,  111.822540,  100.304680,  90.740356,    82.083275,   74.248734,   58.393326,
+    615.613831,   571.402039,  473.941895,  372.602753,  300.644775,   239.809601,  190.039810,  154.182663,
+    137.840027,   123.126366,  109.917458,  98.951996,   89.616219,    81.142967,   73.457825,   56.916744,
+    448.953400,   426.532227,  372.602753,  318.224457,  263.807556,   214.746811,  172.817261,  148.295853,
+    133.160797,   119.368141,  106.872108,  96.772522,   87.797852,    79.617172,   70.208313,   54.558437,
+    337.930267,   327.784393,  300.644775,  263.807556,  224.206940,   186.378311,  155.421555,  140.812332,
+    127.120590,   114.460091,  103.118340,  93.868050,   85.361305,    77.563431,   65.959373,   51.458752,
+    263.807556,   257.417816,  239.809601,  214.746811,  186.378311,   157.966400,  144.988541,  132.263153,
+    120.102051,   108.680435,  98.951996,   90.362801,   82.400993,    75.054314,   60.963200,   47.789742,
+    205.943130,   201.765564,  190.039810,  172.817261,  155.421555,   144.988541,  134.070770,  123.126366,
+    112.471252,   102.638969,  94.273003,   86.390495,   79.020828,    70.208313,   55.486752,   43.736801,
+    160.770889,   157.966431,  154.182663,  148.295853,  140.812332,   132.263153,  123.126366,  113.789886,
+    104.582710,   96.772522,   89.247108,   82.083275,   75.326157,    62.573708,   49.786186,   39.481380,
+    141.832733,   140.812332,  137.840027,  133.160797,  127.120590,   120.102051,  112.471252,  104.582710,
+    97.633369,    90.740356,   84.022667,   77.563446,   68.346024,    55.020145,   44.087116,   35.187599,
+    126.301643,   125.492966,  123.126366,  119.368141,  114.460091,   108.680435,  102.638969,  96.772522,
+    90.740356,    84.687279,   78.725555,   72.135590,   58.896236,    47.789742,   38.573082,   30.993063,
+    112.471252,   111.822540,  109.917458,  106.872108,  103.118340,   98.951996,   94.273003,   89.247108,
+    84.022667,    78.725555,   73.457809,   60.963173,   50.197853,    41.054691,   33.381031,   24.780676,
+    100.763390,   100.304680,  98.951996,   96.772522,   93.868050,    90.362801,   86.390495,   82.083275,
+    77.563446,    72.135590,   60.963173,   51.034103,   42.369473,    34.922314,   27.726070,   18.572216,
+    91.120811,    90.740356,   89.616219,   87.797852,   85.361305,    82.400993,   79.020828,   75.326157,
+    68.346024,    58.896236,   50.197853,   42.369473,   35.455399,    29.343132,   20.148905,   13.676408,
+    82.400993,    82.083275,   81.142967,   79.617172,   77.563431,    75.054314,   70.208313,   62.573708,
+    55.020145,    47.789742,   41.054691,   34.922314,   29.343132,    20.706997,   14.413850,   9.911549,
+    74.515610,    74.248734,   73.457825,   70.208313,   65.959373,    60.963200,   55.486752,   49.786186,
+    44.087116,    38.573082,   33.381031,   27.726070,   20.148905,    14.413850,   10.166267,   7.079802,
+    58.896236,    58.393326,   56.916744,   54.558437,   51.458752,    47.789742,   43.736801,   39.481380,
+    35.187599,    30.993063,   24.780676,   18.572216,   13.676408,    9.911549,    7.079802,    4.991220,
+    0.000000,     0.000000,    0.000000,    0.000000,    10016.177734, 8949.019531, 7995.559082, 7162.601074,
+    6422.475586,  5758.828613, 5163.758301, 4630.176758, 4151.732422,  3734.188232, 3370.109863, 3041.528564,
+    2744.983643,  2477.351074, 2235.813232, 2038.749634, 1932.109741,  1831.047485, 1735.271729, 1644.505737,
+    1558.487183,  1476.968018, 1386.826660, 1301.528687, 1221.477173,  1146.349243, 1075.842163, 1009.671509,
+    0.000000,     0.000000,    0.000000,    0.000000,    9878.224609,  8849.744141, 7921.355469, 7107.295410,
+    6379.011230,  5724.145508, 5135.744141, 4607.326172, 4132.939453,  3719.505127, 3357.800781, 3031.157227,
+    2736.206543,  2469.894287, 2229.455811, 2035.871338, 1929.518066,  1828.708130, 1733.155273, 1642.587036,
+    1556.744507,  1475.382324, 1385.135010, 1300.000122, 1220.093750,  1145.095825, 1074.704956, 1008.638672,
+    0.000000,     0.000000,    0.000000,    0.000000,    9497.340820,  8569.009766, 7710.195312, 6947.082520,
+    6252.300781,  5622.568359, 5053.416504, 4539.993652, 4077.450684,  3676.055664, 3321.326660, 3000.390625,
+    2710.143555,  2447.733643, 2210.550537, 2027.284180, 1921.783081,  1821.723755, 1726.834595, 1636.855469,
+    1551.537354,  1470.240967, 1380.081177, 1295.431274, 1215.958252,  1141.347534, 1071.303833, 1005.549255,
+    0.000000,     0.000000,    0.000000,    0.000000,    8949.019531,  8149.289551, 7394.224121, 6697.344727,
+    6052.488281,  5461.019531, 4921.634277, 4431.667969, 3987.818604,  3605.573242, 3262.003174, 2950.239990,
+    2667.581299,  2411.486328, 2179.584961, 2013.130249, 1909.023438,  1810.194092, 1716.393799, 1627.382446,
+    1542.927124,  1460.985596, 1371.723022, 1287.873291, 1209.114380,  1135.142578, 1065.671997, 1000.432007,
+    10016.177734, 9878.224609, 9497.340820, 8949.019531, 8310.704102,  7644.405273, 6999.567383, 6379.011230,
+    5793.938965,  5249.588867, 4747.628418, 4287.628418, 3871.052002,  3510.746094, 3181.889648, 2882.297852,
+    2609.764404,  2362.132812, 2137.337891, 1993.638184, 1891.430786,  1794.280640, 1701.970703, 1614.285400,
+    1531.013916,  1448.186157, 1360.157349, 1277.407837, 1199.633057,  1126.542847, 1057.862915, 993.333557,
+    8949.019531,  8849.744141, 8569.009766, 8149.289551, 7644.405273,  7107.295410, 6556.779785, 6014.109863,
+    5492.590332,  4999.912598, 4539.993652, 4114.296875, 3734.188232,  3394.959229, 3083.605469, 2798.613281,
+    2538.306152,  2300.954346, 2084.833496, 1969.113525, 1869.262207,  1774.201294, 1683.749756, 1597.722046,
+    1515.933594,  1431.994873, 1345.514648, 1264.148682, 1187.612061,  1115.632202, 1047.949463, 984.940796,
+    7995.559082,  7921.355469, 7710.195312, 7394.224121, 6999.567383,  6556.779785, 6091.377441, 5622.568359,
+    5163.758301,  4723.701172, 4307.688965, 3918.657715, 3578.032715,  3262.003174, 2970.133789, 2701.544189,
+    2455.086426,  2229.455811, 2041.636230, 1939.925049, 1842.829834,  1750.221191, 1661.957764, 1577.887085,
+    1497.853394,  1412.599976, 1327.956665, 1248.234619, 1173.171875,  1102.515625, 1036.023804, 975.335022,
+    7162.601074,  7107.295410, 6947.082520, 6697.344727, 6379.011230,  6014.109863, 5622.568359, 5220.674805,
+    4820.776367,  4431.667969, 4059.243408, 3719.505127, 3407.500000,  3115.799805, 2844.609619, 2593.611328,
+    2362.133057,  2149.279541, 2004.730957, 1906.490967, 1812.489014,  1722.644409, 1636.855469, 1555.005493,
+    1476.968018,  1390.219116, 1307.671753, 1229.829224, 1156.454346,  1087.316772, 1022.192383, 964.170288,
+    6422.475586,  6379.011230, 6252.300781, 6052.488281, 5793.938965,  5492.590332, 5163.758301, 4820.776367,
+    4474.431152,  4132.939453, 3809.151123, 3510.745605, 3227.259766,  2960.159424, 2710.143555, 2477.351562,
+    2261.523438,  2062.126221, 1963.743530, 1869.262207, 1778.627319,  1691.803589, 1608.729980, 1529.325195,
+    1450.003418,  1365.094971, 1284.869629, 1209.114380, 1137.618530,  1070.173828, 1006.577637, 951.533936,
+    5758.828613,  5724.145508, 5622.568359, 5461.019531, 5249.588867,  4999.912598, 4723.701172, 4431.667969,
+    4132.939453,  3839.881836, 3564.398193, 3297.380859, 3041.528564,  2798.613281, 2569.677490, 2355.211426,
+    2155.288330,  2013.130249, 1919.218018, 1828.708130, 1741.649902,  1658.050537, 1577.887085, 1501.112305,
+    1419.603271,  1337.488037, 1259.777100, 1186.287842, 1116.836182,  1051.238037, 989.356018,  937.521851,
+    5163.758301,  5135.744141, 5053.416504, 4921.634277, 4747.628418,  4539.993652, 4307.688965, 4059.243408,
+    3809.151123,  3564.398193, 3321.326660, 3083.605469, 2853.956543,  2634.296387, 2425.883789, 2229.455811,
+    2053.263184,  1961.068726, 1871.700928, 1785.304199, 1701.970703,  1621.745972, 1544.642456, 1470.240967,
+    1386.826660,  1307.671509, 1232.633179, 1161.558716, 1094.291504,  1030.670776, 972.740723,  922.236572,
+    4630.176758,  4607.326172, 4539.993652, 4431.667969, 4287.628418,  4114.296875, 3918.657715, 3719.505127,
+    3510.745605,  3297.380859, 3083.605469, 2872.799805, 2667.581299,  2469.894531, 2281.107178, 2102.114502,
+    1993.638428,  1906.490967, 1821.723877, 1739.518799, 1660.001831,  1583.252930, 1509.314819, 1431.994873,
+    1351.991333,  1275.923828, 1203.682495, 1135.142578, 1070.173828,  1008.638672, 954.878296,  905.786682,
+    4151.732422,  4132.939453, 4077.450684, 3987.818604, 3871.052002,  3734.188232, 3578.032715, 3407.500000,
+    3227.259766,  3041.528564, 2853.956543, 2667.581299, 2484.843994,  2307.630371, 2137.337891, 2015.945557,
+    1932.109741,  1849.967896, 1769.796143, 1691.803589, 1616.144531,  1542.927124, 1472.104004, 1391.920288,
+    1315.414917,  1242.525757, 1173.171875, 1107.257935, 1044.676514,  985.821167,  935.894348,  888.283264,
+    3734.188232,  3719.505127, 3676.055664, 3605.573242, 3510.746094,  3394.959229, 3262.003174, 3115.799805,
+    2960.159424,  2798.613281, 2634.296387, 2469.894531, 2307.630371,  2149.279541, 2027.284424, 1947.802002,
+    1869.262207,  1792.028931, 1716.393799, 1642.587036, 1570.783203,  1501.112305, 1426.662964, 1350.367676,
+    1277.407837,  1207.752563, 1141.347534, 1078.122192, 1017.992920,  964.170288,  915.916138,  869.840393,
+    3370.109863,  3357.800781, 3321.326660, 3262.003174, 3181.889648,  3083.605469, 2970.133789, 2844.609619,
+    2710.143555,  2569.677490, 2425.883789, 2281.107178, 2137.337891,  2027.284424, 1953.087891, 1879.053223,
+    1805.620605,  1733.155273, 1661.957764, 1592.268433, 1524.278809,  1455.477539, 1380.081177, 1307.671509,
+    1238.270752,  1171.872925, 1108.448364, 1047.949463, 990.314453,   941.609558,  895.070007,  850.572571,
+    3041.528564,  3031.157227, 3000.390625, 2950.239990, 2882.297852,  2798.613281, 2701.544189, 2593.611328,
+    2477.351562,  2355.211426, 2229.455811, 2102.114502, 2015.945557,  1947.802002, 1879.053223, 1810.194092,
+    1741.649902,  1673.779907, 1606.886108, 1541.215088, 1476.968018,  1403.923828, 1332.708740, 1264.148682,
+    1198.288086,  1135.142578, 1074.704956, 1016.947144, 965.022034,   918.278442,  873.481934,  830.592407,
+    2744.983643,  2736.206543, 2710.143555, 2667.581299, 2609.764404,  2538.306152, 2455.086426, 2362.133057,
+    2261.523438,  2155.288330, 2053.263184, 1993.638428, 1932.109741,  1869.262207, 1805.620605, 1741.649902,
+    1677.755005,  1614.285400, 1551.537354, 1489.759766, 1421.362915,  1351.991333, 1284.869263, 1220.093750,
+    1157.727417,  1097.804565, 1040.336426, 985.821167,  939.153625,   894.312012,  851.274536,  810.011841,
+    2477.351074,  2469.894287, 2447.733643, 2411.486328, 2362.132812,  2300.954346, 2229.455811, 2149.279541,
+    2062.126221,  2013.130249, 1961.068726, 1906.490967, 1849.967896,  1792.028931, 1733.155273, 1673.779907,
+    1614.285400,  1555.005493, 1496.228516, 1431.994873, 1365.094971,  1300.000122, 1236.857544, 1175.777100,
+    1116.836182,  1060.084961, 1005.549255, 957.398621,  912.780823,   869.840393,  828.566345,  788.938354,
+    2235.813232,  2229.455811, 2210.550537, 2179.584961, 2137.337891,  2084.833496, 2041.636230, 2004.730957,
+    1963.743530,  1919.218018, 1871.700928, 1821.723877, 1769.796143,  1716.393799, 1661.957764, 1606.886108,
+    1551.537354,  1496.228516, 1435.567749, 1371.723022, 1309.214355,  1248.234619, 1188.938477, 1131.444702,
+    1075.842163,  1022.192383, 972.740723,  928.625183,  886.038757,   844.988281,  805.471558,  767.476257,
+    2038.749634,  2035.871338, 2027.284180, 2013.130249, 1993.638184,  1969.113525, 1939.925049, 1906.490967,
+    1869.262207,  1828.708130, 1785.304199, 1739.518799, 1691.803589,  1642.587036, 1592.268433, 1541.215088,
+    1489.759766,  1431.994873, 1371.723022, 1312.308716, 1253.984985,  1196.945190, 1141.347534, 1087.316772,
+    1034.949585,  984.940796,  941.609558,  899.638794,  859.054749,   819.872620,  782.097107,  745.724487,
+    1932.109741,  1929.518066, 1921.783081, 1909.023438, 1891.430786,  1869.262207, 1842.829834, 1812.489014,
+    1778.627319,  1741.649902, 1701.970703, 1660.001831, 1616.144531,  1570.783203, 1524.278809, 1476.968018,
+    1421.362915,  1365.094971, 1309.214355, 1253.984985, 1199.633057,  1146.349243, 1094.291504, 1043.588867,
+    994.342957,   951.534058,  910.440247,  870.566956,  831.947388,   794.602966,  758.545227,  723.776733,
+    1831.047485,  1828.708130, 1821.723755, 1810.194092, 1794.280640,  1774.201294, 1750.221191, 1722.644409,
+    1691.803589,  1658.050537, 1621.745972, 1583.252930, 1542.927124,  1501.112305, 1455.477539, 1403.923828,
+    1351.991333,  1300.000122, 1248.234619, 1196.945190, 1146.349243,  1096.631470, 1047.949463, 1000.432007,
+    958.241089,   918.278259,  879.356750,  841.526489,  804.825806,   769.281250,  734.910400,  701.720947,
+    1735.271729,  1733.155273, 1726.834595, 1716.393799, 1701.970703,  1683.749756, 1661.957764, 1636.855469,
+    1608.729980,  1577.887085, 1544.642456, 1509.314819, 1472.104004,  1426.662964, 1380.081177, 1332.708740,
+    1284.869263,  1236.857544, 1188.938477, 1141.347534, 1094.291504,  1047.949463, 1002.474182, 961.622131,
+    923.031555,   885.292297,  848.471924,  812.623657,  777.789856,   744.001404,  711.280090,  684.970581,
+    1644.505737,  1642.587036, 1636.855469, 1627.382446, 1614.285400,  1597.722046, 1577.887085, 1555.005493,
+    1529.325195,  1501.112305, 1470.240967, 1431.994873, 1391.920288,  1350.367676, 1307.671509, 1264.148682,
+    1220.093750,  1175.777100, 1131.444702, 1087.316772, 1043.588867,  1000.432007, 961.622131,  924.624451,
+    888.283264,   852.680969,  817.885742,  783.954041,  750.929993,   718.848206,  690.509705,  669.787170,
+    1558.487183,  1556.744507, 1551.537354, 1542.927124, 1531.013916,  1515.933594, 1497.853394, 1476.968018,
+    1450.003418,  1419.603271, 1386.826660, 1351.991333, 1315.414917,  1277.407837, 1238.270752, 1198.288086,
+    1157.727417,  1116.836182, 1075.842163, 1034.949585, 994.342957,   958.241089,  923.031555,  888.283264,
+    854.091187,   820.536316,  787.687988,  755.602661,  724.327698,   694.713867,  674.449768,  654.522705,
+    1476.968018,  1475.382324, 1470.240967, 1460.985596, 1448.186157,  1431.994873, 1412.599976, 1390.219116,
+    1365.094971,  1337.488037, 1307.671509, 1275.923828, 1242.525757,  1207.752563, 1171.872925, 1135.142578,
+    1097.804565,  1060.084961, 1022.192383, 984.940796,  951.534058,   918.278259,  885.292297,  852.680969,
+    820.536316,   788.938354,  757.955322,  727.644897,  698.054810,   677.813538,  658.364380,  639.217041,
+    1386.826660,  1385.135010, 1380.081177, 1371.723022, 1360.157349,  1345.514648, 1327.956665, 1307.671753,
+    1284.869629,  1259.777100, 1232.633179, 1203.682495, 1173.171875,  1141.347534, 1108.448364, 1074.704956,
+    1040.336426,  1005.549255, 972.740723,  941.609558,  910.440247,   879.356750,  848.471924,  817.885742,
+    787.687988,   757.955322,  728.755737,  700.146423,  679.845642,   660.946289,  642.291992,  623.906799,
+    1301.528687,  1300.000122, 1295.431274, 1287.873291, 1277.407837,  1264.148682, 1248.234619, 1229.829224,
+    1209.114380,  1186.287842, 1161.558716, 1135.142578, 1107.257935,  1078.122192, 1047.949463, 1016.947144,
+    985.821167,   957.398621,  928.625183,  899.638794,  870.566956,   841.526489,  812.623657,  783.954041,
+    755.602661,   727.644897,  700.146423,  680.525146,  662.243774,   644.148804,  626.268066,  608.625916,
+    1221.477173,  1220.093750, 1215.958252, 1209.114380, 1199.633057,  1187.612061, 1173.171875, 1156.454346,
+    1137.618530,  1116.836182, 1094.291504, 1070.173828, 1044.676514,  1017.992920, 990.314453,  965.022034,
+    939.153625,   912.780823,  886.038757,  859.054749,  831.947388,   804.825806,  777.789856,  750.929993,
+    724.327698,   698.054810,  679.845642,  662.243774,  644.769775,   627.454285,  610.324890,  593.406128,
+    1146.349243,  1145.095825, 1141.347534, 1135.142578, 1126.542847,  1115.632202, 1102.515625, 1087.316772,
+    1070.173828,  1051.238037, 1030.670776, 1008.638672, 985.821167,   964.170288,  941.609558,  918.278442,
+    894.312012,   869.840393,  844.988281,  819.872620,  794.602966,   769.281250,  744.001404,  718.848206,
+    694.713867,   677.813538,  660.946289,  644.148804,  627.454285,   610.892944,  594.491943,  578.275757,
+    1075.842163,  1074.704956, 1071.303833, 1065.671997, 1057.862915,  1047.949463, 1036.023804, 1022.192383,
+    1006.577637,  989.356018,  972.740723,  954.878296,  935.894348,   915.916138,  895.070007,  873.481934,
+    851.274536,   828.566345,  805.471558,  782.097107,  758.545227,   734.910400,  711.280090,  690.509705,
+    674.449768,   658.364380,  642.291992,  626.268066,  610.324890,   594.491943,  578.796021,  563.260986,
+    1009.671509,  1008.638672, 1005.549255, 1000.432007, 993.333557,   984.940796,  975.335022,  964.170288,
+    951.533936,   937.521851,  922.236572,  905.786682,  888.283264,   869.840393,  850.572571,  830.592407,
+    810.011841,   788.938354,  767.476257,  745.724487,  723.776733,   701.720947,  684.970581,  669.787170,
+    654.522705,   639.217041,  623.906799,  608.625916,  593.406128,   578.275757,  563.260986,  548.385559,
+    0.000000,     0.000000,    0.000000,    0.000000,    5011.678711,  4561.026367, 4150.897949, 3787.853271,
+    3459.890381,  3160.322998, 2886.693115, 2636.754883, 2408.457275,  2220.788330, 2069.294189, 1928.134521,
+    1796.604248,  1674.046265, 1559.848999, 1455.328247, 1364.407104,  1279.166016, 1199.250488, 1124.327759,
+    1054.085815,  988.231934,  932.328857,  879.889832,  830.400330,   783.694336,  739.615356,  698.015564,
+    0.000000,     0.000000,    0.000000,    0.000000,    4953.881836,  4518.670410, 4118.654297, 3763.552734,
+    3440.437256,  3144.510986, 2873.683594, 2625.945068, 2399.401855,  2214.770264, 2064.085693, 1923.603760,
+    1792.645508,  1670.574097, 1556.793091, 1452.861328, 1362.209839,  1277.203857, 1197.494507, 1122.752808,
+    1052.670532,  986.958130,  931.291748,  878.947388,  829.542664,   782.912842,  738.902405,  697.364319,
+    0.000000,     0.000000,    0.000000,    0.000000,    4793.614746,  4398.468262, 4026.789795, 3692.973877,
+    3383.596924,  3098.108398, 2835.382080, 2594.041016, 2372.622803,  2196.918701, 2048.617188, 1910.134766,
+    1780.867554,  1660.236206, 1547.688721, 1445.505981, 1355.655151,  1271.348633, 1192.252319, 1118.050049,
+    1048.443970,  983.335632,  928.192505,  876.130005,  826.978088,   780.575439,  736.769653,  695.416138,
+    0.000000,     0.000000,    0.000000,    0.000000,    4561.026367,  4217.543457, 3889.284912, 3582.401611,
+    3293.564941,  3024.014893, 2773.850098, 2542.543945, 2329.235352,  2167.820068, 2023.344727, 1888.087524,
+    1761.557373,  1643.263794, 1532.723755, 1433.395996, 1344.854126,  1261.692749, 1183.601440, 1110.284790,
+    1041.461182,  977.688965,  923.064758,  871.467163,  822.732300,   776.704590,  733.236694,  692.188110,
+    5011.678711,  4953.881836, 4793.614746, 4561.026367, 4287.298828,  3998.239258, 3716.125000, 3440.437256,
+    3176.312988,  2926.477539, 2692.172852, 2473.735840, 2276.533936,  2128.389404, 1988.987183, 1858.031494,
+    1735.171753,  1620.026123, 1512.199829, 1416.746704, 1329.985962,  1248.385864, 1171.668457, 1099.563843,
+    1031.812866,  969.874817,  915.964600,  865.006897,  816.846924,   771.336731,  728.335144,  687.707825,
+    4561.026367,  4518.670410, 4398.468262, 4217.543457, 3998.239258,  3763.552734, 3519.859619, 3276.214600,
+    3038.523682,  2810.433105, 2594.041016, 2390.411621, 2220.788330,  2079.791016, 1946.466553, 1820.706665,
+    1702.307983,  1591.010010, 1486.515869, 1395.845215, 1311.290039,  1231.629150, 1156.622437, 1086.030518,
+    1019.620911,  959.981201,  906.967834,  856.815063,  809.379150,   764.521179,  722.108276,  682.245117,
+    4150.897949,  4118.654297, 4026.789795, 3889.284912, 3716.125000,  3519.859619, 3311.126465, 3098.108398,
+    2886.693115,  2680.902100, 2483.341797, 2295.779541, 2156.401855,  2023.344727, 1896.847168, 1776.973755,
+    1663.669312,  1556.793091, 1457.802856, 1371.036987, 1289.056152,  1211.667358, 1138.670532, 1069.861084,
+    1005.035828,  948.117004,  896.168518,  846.973267,  800.399719,   756.319885,  714.610474,  675.848572,
+    3787.853271,  3763.552734, 3692.973877, 3582.401611, 3440.437256,  3276.214600, 3098.108398, 2913.088135,
+    2726.568848,  2542.543945, 2363.822754, 2214.770264, 2085.079346,  1960.440552, 1841.264404, 1727.767578,
+    1620.026245,  1518.013916, 1426.217773, 1342.712280, 1263.613770,  1188.779175, 1118.050049, 1051.258545,
+    988.231995,   934.408508,  883.676331,  835.576843,  789.991882,   746.805908,  705.905090,  668.410767,
+    3459.890381,  3440.437256, 3383.596924, 3293.564941, 3176.312988,  3038.523682, 2886.693115, 2726.568848,
+    2562.898193,  2399.401855, 2251.398438, 2128.389404, 2008.477295,  1892.457642, 1780.867554, 1674.046387,
+    1572.181152,  1475.345703, 1391.275391, 1311.290039, 1235.319580,  1163.268921, 1095.021729, 1030.446289,
+    970.984558,   918.996521,  869.613525,  822.732300,  778.249512,   736.060913,  696.064697,  659.988525,
+    3160.322998,  3144.510986, 3098.108398, 3024.014893, 2926.477539,  2810.433105, 2680.902100, 2542.543945,
+    2399.401855,  2263.892822, 2150.739014, 2038.433105, 1928.134521,  1820.706665, 1716.770630, 1616.755371,
+    1520.935669,  1433.395996, 1353.482666, 1277.203857, 1204.545532,  1135.456787, 1069.861084, 1007.662292,
+    952.402710,   902.032532,  854.112671,  808.556152,  765.273621,   724.174500,  685.184509,  650.644348,
+    2886.693115,  2873.683594, 2835.382080, 2773.850098, 2692.172852,  2594.041016, 2483.341797, 2363.822754,
+    2251.398438,  2150.739014, 2048.617188, 1946.466553, 1845.429077,  1746.389160, 1650.014648, 1556.793091,
+    1467.777588,  1389.000244, 1313.344360, 1240.890259, 1171.668457,  1105.668823, 1042.851685, 983.335632,
+    932.328857,   883.676147,  837.313965,  793.171143,  751.173340,   711.242554,  674.120605,  640.444885,
+    2636.754883,  2625.945068, 2594.041016, 2542.543945, 2473.735840,  2390.411621, 2295.779541, 2214.770264,
+    2128.389404,  2038.433105, 1946.466553, 1853.812378, 1761.557373,  1670.574341, 1581.545288, 1494.991333,
+    1416.746948,  1342.712280, 1271.348755, 1202.776001, 1137.061523,  1074.231323, 1014.277527, 959.981201,
+    910.948364,   864.090393,  819.361023,  776.704590,  736.060913,   697.364319,  662.218079,  629.460938,
+    2408.457275,  2399.401855, 2372.622803, 2329.235352, 2276.533936,  2220.788330, 2156.401855, 2085.079346,
+    2008.477295,  1928.134521, 1845.429077, 1761.557373, 1677.531860,  1594.187866, 1512.199829, 1435.803467,
+    1364.407104,  1295.054199, 1227.958252, 1163.268921, 1101.084717,  1041.461182, 984.471924,  935.451172,
+    888.446838,   843.439941,  800.399719,  759.286133,  720.051270,   682.831299,  649.558655,  617.765076,
+    2220.788330,  2214.770264, 2196.918701, 2167.820068, 2128.389404,  2079.791016, 2023.344727, 1960.440552,
+    1892.457642,  1820.706665, 1746.389160, 1670.574341, 1594.187866,  1518.013916, 1445.506348, 1377.724487,
+    1311.290039,  1246.504883, 1183.601440, 1122.752808, 1064.079834,  1007.662292, 956.720947,  909.950623,
+    865.006897,   821.887329,  780.575439,  741.044800,  703.259888,   668.410767,  636.225525,  605.431763,
+    2069.294189,  2064.085693, 2048.617188, 2023.344727, 1988.987183,  1946.466553, 1896.847168, 1841.264404,
+    1780.867554,  1716.770630, 1650.014648, 1581.545288, 1512.199829,  1445.506348, 1382.215332, 1319.541870,
+    1257.865967,  1197.494507, 1138.670532, 1081.580688, 1026.364746,  974.326904,  928.192505,  883.676147,
+    840.805603,   799.591431,  760.030457,  722.108276,  685.801392,   653.370850,  622.301086,  592.535889,
+    1928.134521,  1923.603760, 1910.134766, 1888.087524, 1858.031494,  1820.706665, 1776.973755, 1727.767578,
+    1674.046387,  1616.755371, 1556.793091, 1494.991333, 1435.803467,  1377.724487, 1319.541870, 1261.692749,
+    1204.545532,  1148.403564, 1093.514893, 1040.073730, 988.231995,   942.804993,  899.092651,  856.815063,
+    816.011780,   776.704590,  738.902405,  702.601074,  668.978333,   637.802612,  607.867737,  579.151306,
+    1796.604248,  1792.645508, 1780.867554, 1761.557373, 1735.171753,  1702.307983, 1663.669312, 1620.026245,
+    1572.181152,  1520.935669, 1467.777588, 1416.746948, 1364.407104,  1311.290039, 1257.865967, 1204.545532,
+    1151.679443,  1099.563843, 1048.443970, 998.518372,  953.479187,   910.948364,  869.613342,  829.542664,
+    790.784973,   753.372253,  717.322815,  682.831299,  651.732788,   621.794556,  593.005920,  565.351440,
+    1674.046265,  1670.574097, 1660.236206, 1643.263794, 1620.026123,  1591.010010, 1556.793091, 1518.013916,
+    1475.345703,  1433.395996, 1389.000244, 1342.712280, 1295.054199,  1246.504883, 1197.494507, 1148.403564,
+    1099.563843,  1051.258545, 1003.726868, 959.981201,  918.996521,   878.947388,  839.930481,  802.020447,
+    765.273621,   729.730225,  695.416138,  663.897949,  634.132019,   605.431763,  577.793396,  551.206848,
+    1559.848999,  1556.793091, 1547.688721, 1532.723755, 1512.199829,  1486.515869, 1457.802856, 1426.217773,
+    1391.275391,  1353.482666, 1313.344360, 1271.348755, 1227.958252,  1183.601440, 1138.670532, 1093.514893,
+    1048.443970,  1003.726868, 962.165222,  923.064758,  884.626892,   846.973267,  810.203491,  774.396851,
+    739.615356,   705.905090,  674.120605,  644.708618,  616.264648,   588.796265,  562.305176,  536.786438,
+    1455.328247,  1452.861328, 1445.505981, 1433.395996, 1416.746704,  1395.845215, 1371.036987, 1342.712280,
+    1311.290039,  1277.203857, 1240.890259, 1202.776001, 1163.268921,  1122.752808, 1081.580688, 1040.073730,
+    998.518372,   959.981201,  923.064758,  886.533447,  850.530762,   815.177795,  780.575439,  746.805908,
+    713.934814,   682.245117,  653.370850,  625.353882,  598.214355,   571.965027,  546.611938,  522.155396,
+    1364.407104,  1362.209839, 1355.655151, 1344.854126, 1329.985962,  1311.290039, 1289.056152, 1263.613770,
+    1235.319580,  1204.545532, 1171.668457, 1137.061523, 1101.084717,  1064.079834, 1026.364746, 988.231995,
+    953.479187,   918.996521,  884.626892,  850.530762,  816.846924,   783.694336,  751.173340,  719.367554,
+    688.345093,   659.988647,  632.568970,  605.917847,  580.059387,   555.010437,  530.781067,  507.375702,
+    1279.166016,  1277.203857, 1271.348633, 1261.692749, 1248.385864,  1231.629150, 1211.667358, 1188.779175,
+    1163.268921,  1135.456787, 1105.668823, 1074.231323, 1041.461182,  1007.662292, 974.326904,  942.804993,
+    910.948364,   878.947388,  846.973267,  815.177795,  783.694336,   752.638062,  722.108276,  692.188110,
+    664.459473,   637.802490,  611.796875,  586.477539,  561.871887,   537.999817,  514.875244,  492.505737,
+    1199.250488,  1197.494507, 1192.252319, 1183.601440, 1171.668457,  1156.622437, 1138.670532, 1118.050049,
+    1095.021729,  1069.861084, 1042.851685, 1014.277527, 984.471924,   956.720947,  928.192505,  899.092651,
+    869.613342,   839.930481,  810.203491,  780.575439,  751.173340,   722.108276,  693.476562,  666.712769,
+    640.975464,   615.765564,  591.129272,  567.103455,  543.718079,   520.995728,  498.952637,  480.805573,
+    1124.327759,  1122.752808, 1118.050049, 1110.284790, 1099.563843,  1086.030518, 1069.861084, 1051.258545,
+    1030.446289,  1007.662292, 983.335632,  959.981201,  935.451172,   909.950623,  883.676147,  856.815063,
+    829.542664,   802.020447,  774.396851,  746.805908,  719.367554,   692.188110,  666.712769,  642.038696,
+    617.765076,   593.947571,  570.632690,  547.859314,  525.658264,   504.054474,  484.734985,  470.036285,
+    1054.085815,  1052.670532, 1048.443970, 1041.461182, 1031.812866,  1019.620911, 1005.035828, 988.231995,
+    970.984558,   952.402710,  932.328857,  910.948364,  888.446838,   865.006897,  840.805603,  816.011780,
+    790.784973,   765.273621,  739.615356,  713.934814,  688.345093,   664.459473,  640.975464,  617.765076,
+    594.891724,   572.410034,  550.367126,  528.801819,  507.746918,   487.717651,  473.343079,  459.212067,
+    988.231934,   986.958130,  983.335632,  977.688965,  969.874817,   959.981201,  948.117004,  934.408508,
+    918.996521,   902.032532,  883.676147,  864.090393,  843.439941,   821.887329,  799.591431,  776.704590,
+    753.372253,   729.730225,  705.905090,  682.245117,  659.988647,   637.802490,  615.765564,  593.947571,
+    572.410034,   551.206848,  530.384338,  509.981781,  490.032288,   475.728912,  461.936005,  448.361359,
+    932.328857,   931.291748,  928.192505,  923.064758,  915.964600,   906.967834,  896.168518,  883.676331,
+    869.613525,   854.112671,  837.313965,  819.361023,  800.399719,   780.575439,  760.030457,  738.902405,
+    717.322815,   695.416138,  674.120605,  653.370850,  632.568970,   611.796875,  591.129272,  570.632690,
+    550.367126,   530.384338,  510.730133,  491.443481,  477.170258,   463.766785,  450.541077,  437.510101,
+    879.889832,   878.947388,  876.130005,  871.467163,  865.006897,   856.815063,  846.973267,  835.576843,
+    822.732300,   808.556152,  793.171143,  776.704590,  759.286133,   741.044800,  722.108276,  702.601074,
+    682.831299,   663.897949,  644.708618,  625.353882,  605.917847,   586.477539,  567.103455,  547.859314,
+    528.801819,   509.981781,  491.443481,  477.652222,  464.686829,   451.857361,  439.183533,  426.682556,
+    830.400330,   829.542664,  826.978088,  822.732300,  816.846924,   809.379150,  800.399719,  789.991882,
+    778.249512,   765.273621,  751.173340,  736.060913,  720.051270,   703.259888,  685.801392,  668.978333,
+    651.732788,   634.132019,  616.264648,  598.214355,  580.059387,   561.871887,  543.718079,  525.658264,
+    507.746918,   490.032288,  477.170258,  464.686829,  452.297577,   440.024200,  427.886261,  415.901093,
+    783.694336,   782.912842,  780.575439,  776.704590,  771.336731,   764.521179,  756.319885,  746.805908,
+    736.060913,   724.174500,  711.242554,  697.364319,  682.831299,   668.410767,  653.370850,  637.802612,
+    621.794556,   605.431763,  588.796265,  571.965027,  555.010437,   537.999817,  520.995728,  504.054474,
+    487.717651,   475.728912,  463.766785,  451.857361,  440.024200,   428.288727,  416.670166,  405.185883,
+    739.615356,   738.902405,  736.769653,  733.236694,  728.335144,   722.108276,  714.610474,  705.905090,
+    696.064697,   685.184509,  674.120605,  662.218079,  649.558655,   636.225525,  622.301086,  607.867737,
+    593.005920,   577.793396,  562.305176,  546.611938,  530.781067,   514.875244,  498.952637,  484.734985,
+    473.343079,   461.936005,  450.541077,  439.183533,  427.886261,   416.670166,  405.554260,  394.555481,
+    698.015564,   697.364319,  695.416138,  692.188110,  687.707825,   682.245117,  675.848572,  668.410767,
+    659.988525,   650.644348,  640.444885,  629.460938,  617.765076,   605.431763,  592.535889,  579.151306,
+    565.351440,   551.206848,  536.786438,  522.155396,  507.375702,   492.505737,  480.805573,  470.036285,
+    459.212067,   448.361359,  437.510101,  426.682556,  415.901093,   405.185883,  394.555481,  384.026642,
+    0.000000,     0.000000,    0.000000,    0.000000,    1554.123779,  1242.539551, 993.424500,  821.738708,
+    688.023743,   576.067200,  482.328461,  403.842957,  338.128937,   283.233490,  237.367096,  198.928207,
+    166.714081,   139.716614,  117.091141,  100.366226,  93.587563,    87.266724,   81.372780,   75.876930,
+    70.752258,    65.973686,   62.470375,   59.202759,   56.106068,    53.171352,   50.390141,   47.754402,
+    0.000000,     0.000000,    0.000000,    0.000000,    1511.898926,  1215.312500, 975.197021,  811.432129,
+    680.458130,   570.428589,  478.074890,  400.602692,  335.640289,   281.318329,  235.876022,  197.761490,
+    165.797119,   138.993164,  116.518372,  100.181839,  93.424210,    87.121628,   81.243637,   75.761719,
+    70.649292,    65.881516,   62.405884,   59.143909,   56.052280,    53.122139,   50.345058,   47.713055,
+    0.000000,     0.000000,    0.000000,    0.000000,    1398.316895,  1139.939331, 926.469055,  781.859680,
+    658.586914,   554.038086,  465.659058,  391.113556,  328.333618,   275.682922,  231.480927,  194.317368,
+    163.086807,   136.852478,  114.822044,  99.632172,   92.937019,    86.688774,   80.858185,   75.417809,
+    70.341858,    65.636070,   62.213120,   58.967945,   55.891445,    52.974934,   50.210182,   47.589340,
+    0.000000,     0.000000,    0.000000,    0.000000,    1242.539551,  1031.720703, 865.446106,  736.612244,
+    624.660889,   528.352295,  446.048340,  376.032318,  316.662598,   266.643341,  224.406723,  188.757797,
+    158.701111,   133.381165,  112.066162,  98.727715,   92.134651,    85.975319,   80.222427,   74.850182,
+    69.834183,    65.286209,   61.894089,   58.676640,   55.625088,    52.731091,   49.986694,   47.384289,
+    1554.123779,  1511.898926, 1398.316895, 1242.539551, 1072.704712,  913.631165,  791.500732,  680.458130,
+    581.796936,   495.450836,  420.656097,  356.335327,  301.346527,   254.681503,  214.999893,  181.334152,
+    152.824005,   128.714935,  108.351624,  97.485291,   91.031036,    84.992851,   79.346092,   74.067062,
+    69.133171,    64.801811,   61.452133,   58.272842,   55.255707,    52.392796,   49.676514,   47.099586,
+    1242.539551,  1215.312500, 1139.939331, 1031.720703, 913.631165,   811.432129,  711.604919,  618.224060,
+    533.334961,   457.657806,  391.113556,  333.178528,  283.233490,   240.389191,  203.691101,  172.362762,
+    145.689255,   123.027794,  103.808617,  95.927284,   89.644768,    83.756927,   78.242203,   73.079414,
+    68.248146,    64.188103,   60.891762,   57.760517,   54.786736,    51.963017,   49.282234,   46.737526,
+    993.424500,   975.197021,  926.469055,  865.446106,  791.500732,   711.604919,  631.209534,  554.038086,
+    482.328461,   417.209503,  359.053955,  307.756866,  263.145569,   224.406723,  190.955078,  162.196686,
+    137.561203,   116.518372,  100.551231,  94.080605,   87.998329,    82.286446,   76.926704,   71.900749,
+    67.190567,    63.451557,   60.218594,   57.144524,   54.222420,    51.445496,   48.807163,   46.300991,
+    821.738708,   811.432129,  781.859680,  736.612244,  680.458130,   618.224060,  554.038086,  491.015137,
+    431.260406,   376.032318,  325.949646,  281.318329,  241.920471,   207.367233,  177.270203,  151.198700,
+    128.714981,   109.396255,  98.191902,   91.975601,   86.117218,    80.602898,   75.417809,   70.546577,
+    65.973694,    62.599686,   59.439167,   56.430584,   53.567768,    50.844631,   48.255127,   45.793381,
+    688.023743,   680.458130,  658.586914,  624.660889,  581.796936,   533.334961,  482.328461,  431.260406,
+    381.958527,   335.640289,  293.096069,  254.681427,  220.306442,   189.852081,  163.086807,  139.716660,
+    119.419540,   101.869186,  95.586914,   89.644768,   84.028999,    78.729691,   73.735466,   69.033928,
+    64.870621,    61.640888,   58.560795,   55.625088,   52.828423,    50.165352,   47.630527,   45.218559,
+    576.067200,   570.428589,  554.038086,  528.352295,  495.450836,   457.657806,  417.209503,  376.032318,
+    335.640289,   297.179993,  261.421112,  228.614380,  198.928207,   172.362762,  148.803925,  128.066162,
+    109.923485,   98.727715,   92.775597,   87.121628,   81.762306,    76.691391,   71.900749,   67.380913,
+    63.717697,    60.584194,   57.591423,   54.735031,   52.010479,    49.413094,   46.938114,   44.580780,
+    482.328461,   478.074890,  465.659058,  446.048340,  420.656097,   391.113556,  359.053955,  325.949646,
+    293.096069,   261.421112,  231.480927,  203.691101,  178.274475,   155.306305,  134.755493,  116.518372,
+    101.297218,   95.417488,   89.797020,   84.439827,   79.346092,    74.512932,   69.935249,   65.636070,
+    62.470375,    59.439159,   56.539452,   53.767811,   51.120529,    48.593651,   46.183067,   43.884586,
+    403.842957,   400.602692,  391.113556,  376.032318,  356.335327,   333.178528,  307.756866,  281.318329,
+    254.681427,   228.614380,  203.691101,  180.306320,  158.701111,   138.993195,  121.205971,  105.294853,
+    97.485306,    91.975601,   86.688774,   81.632111,   76.808891,    72.219170,   67.860542,   64.188103,
+    61.139740,    58.215542,   55.413525,   52.731091,   50.165352,    47.713055,   45.370728,   43.134796,
+    338.128937,   335.640289,  328.333618,  316.662598,  301.346527,   283.233490,  263.145569,  241.920471,
+    220.306442,   198.928207,  178.274475,  158.701111,  140.445145,   123.643127,  108.351624,  98.907478,
+    93.587563,    88.442261,   83.486351,   78.729691,   74.178123,    69.834183,   65.706459,   62.664513,
+    59.736908,    56.923252,   54.222420,   51.632717,   49.151936,    46.777527,   44.506676,   42.336353,
+    283.233490,   281.318329,  275.682922,  266.643341,  254.681503,   240.389191,  224.406723,  207.367233,
+    189.852081,   172.362762,  155.306305,  138.993195,  123.643127,   109.396255,  99.632195,   94.578140,
+    89.644768,    84.854034,   80.222427,   75.761719,   71.479691,    67.380913,   63.985767,   61.077591,
+    58.272842,    55.572067,   52.974934,   50.480511,   48.087288,    45.793381,   43.596565,   41.494331,
+    237.367096,   235.876022,  231.480927,  224.406723,  214.999893,   203.691101,  190.955078,  177.270203,
+    163.086807,   148.803925,  134.755493,  121.205971,  108.351624,   99.632195,   94.912354,   90.256424,
+    85.692696,    81.243637,   76.926704,   72.754906,   68.737579,    65.077835,   62.213120,   59.439159,
+    56.758232,    54.171604,   51.679691,   49.282234,   46.978401,    44.766880,   42.646019,   40.613834,
+    198.928207,   197.761490,  194.317368,  188.757797,  181.334152,   172.362762,  162.196686,  151.198700,
+    139.716660,   128.066162,  116.518372,  105.294853,  98.907478,    94.578140,   90.256424,   85.975319,
+    81.762306,    77.639725,   73.625481,   69.733337,   65.973694,    63.121567,   60.400925,   57.760517,
+    55.203278,    52.731091,   50.345058,   48.045479,   45.832119,    43.704224,   41.660648,   39.699886,
+    166.714081,   165.797119,  163.086807,  158.701111,  152.824005,   145.689255,  137.561203,  128.714981,
+    119.419540,   109.923485,  101.297218,  97.485306,   93.587563,    89.644768,   85.692696,   81.762306,
+    77.879822,    74.067062,   70.341858,   66.718384,   63.784531,    61.139740,   58.560787,   56.052280,
+    53.617672,    51.259396,   48.979061,   46.777527,   44.655071,    42.611439,   40.645927,   38.757504,
+    139.716614,   138.993164,  136.852478,  133.381165,  128.714935,   123.027794,  116.518372,  109.396255,
+    101.869186,   98.727715,   95.417488,   91.975601,   88.442261,    84.854034,   81.243637,   77.639725,
+    74.067062,    70.546577,   67.095711,   64.188103,   61.640888,    59.143909,   56.703403,   54.324310,
+    52.010479,    49.764812,   47.589340,   45.485382,   43.453663,    41.494331,   39.607159,   37.791485,
+    117.091141,   116.518372,  114.822044,  112.066162,  108.351624,   103.808617,  100.551231,  98.191902,
+    95.586914,    92.775597,   89.797020,   86.688774,   83.486351,    80.222427,   76.926704,   73.625481,
+    70.341858,    67.095711,   64.323616,   61.894089,   59.498505,    57.144524,   54.838520,   52.585674,
+    50.390141,    48.255127,   46.183067,   44.175625,   42.233917,    40.358486,   38.549461,   36.806534,
+    100.366226,   100.181839,  99.632172,   98.727715,   97.485291,    95.927284,   94.080605,   91.975601,
+    89.644768,    87.121628,   84.439827,   81.632111,   78.729691,    75.761719,   72.754906,   69.733337,
+    66.718384,    64.188103,   61.894089,   59.617504,   57.367245,    55.150913,   52.974934,   50.844631,
+    48.764336,    46.737526,   44.766880,   42.854427,   41.001553,    39.209156,   37.477650,   35.807087,
+    93.587563,    93.424210,   92.937019,   92.134651,   91.031036,    89.644768,   87.998329,   86.117218,
+    84.028999,    81.762306,   79.346092,   76.808891,   74.178123,    71.479691,   68.737579,   65.973694,
+    63.784531,    61.640888,   59.498505,   57.367245,   55.255707,    53.171352,   51.120529,   49.108620,
+    47.140091,    45.218567,   43.346962,   41.527519,   39.761902,    38.051262,   36.396320,   34.797382,
+    87.266724,    87.121628,   86.688774,   85.975319,   84.992851,    83.756927,   82.286446,   80.602898,
+    78.729691,    76.691391,   74.512932,   72.219170,   69.834183,    67.380913,   65.077835,   63.121567,
+    61.139740,    59.143909,   57.144524,   55.150913,   53.171352,    51.213028,   49.282234,   47.384289,
+    45.523705,    43.704216,   41.928902,   40.200157,   38.519875,    36.889412,   35.309746,   33.781395,
+    81.372780,    81.243637,   80.858185,   80.222427,   79.346092,    78.242203,   76.926704,   75.417809,
+    73.735466,    71.900749,   69.935249,   67.860542,   65.706459,    63.985767,   62.213120,   60.400925,
+    58.560787,    56.703403,   54.838520,   52.974934,   51.120529,    49.282234,   47.466137,   45.677494,
+    43.920807,    42.199848,   40.517788,   38.877151,   37.279995,    35.727867,   34.221893,   32.932083,
+    75.876930,    75.761719,   75.417809,   74.850182,   74.067062,    73.079414,   71.900749,   70.546577,
+    69.033928,    67.380913,   65.636070,   64.188103,   62.664513,    61.077591,   59.439159,   57.760517,
+    56.052280,    54.324310,   52.585674,   50.844631,   49.108620,    47.384289,   45.677494,   43.993374,
+    42.336353,    40.710224,   39.118168,   37.562847,   36.046383,    34.570469,   33.224525,   32.131664,
+    70.752258,    70.649292,   70.341858,   69.834183,   69.133171,    68.248146,   67.190567,   65.973694,
+    64.870621,    63.717697,   62.470375,   61.139740,   59.736908,    58.272842,   56.758232,   55.203278,
+    53.617672,    52.010479,   50.390141,   48.764336,   47.140091,    45.523705,   43.920807,   42.336353,
+    40.774689,    39.239544,   37.734135,   36.261116,   34.822742,    33.446636,   32.377274,   31.328754,
+    65.973686,    65.881516,   65.636070,   65.286209,   64.801811,    64.188103,   63.451557,   62.599686,
+    61.640888,    60.584194,   59.439159,   58.215542,   56.923252,    55.572067,   54.171604,   52.731091,
+    51.259396,    49.764812,   48.255127,   46.737526,   45.218567,    43.704216,   42.199848,   40.710224,
+    39.239544,    37.791485,   36.369217,   34.975430,   33.612385,    32.554573,   31.530655,   30.525511,
+    62.470375,    62.405884,   62.213120,   61.894089,   61.452133,    60.891762,   60.218594,   59.439167,
+    58.560795,    57.591423,   56.539452,   55.413525,   54.222420,    52.974934,   51.679691,   50.345058,
+    48.979061,    47.589340,   46.183067,   44.766880,   43.346962,    41.928902,   40.517788,   39.118168,
+    37.734135,    36.369217,   35.026558,   33.708813,   32.661720,    31.666414,   30.686733,   29.723904,
+    59.202759,    59.143909,   58.967945,   58.676640,   58.272842,    57.760517,   57.144524,   56.430584,
+    55.625088,    54.735031,   53.767811,   52.731091,   51.632717,    50.480511,   49.282234,   48.045479,
+    46.777527,    45.485382,   44.175625,   42.854427,   41.527519,    40.200157,   38.877151,   37.562847,
+    36.261116,    34.975430,   33.708813,   32.697552,   31.734655,    30.784130,   29.847412,   28.925755,
+    56.106068,    56.052280,   55.891445,   55.625088,   55.255707,    54.786736,   54.222420,   53.567768,
+    52.828423,    52.010479,   51.120529,   50.165352,   49.151936,    48.087288,   46.978401,   45.832119,
+    44.655071,    43.453663,   42.233917,   41.001553,   39.761902,    38.519875,   37.279995,   36.046383,
+    34.822742,    33.612385,   32.661720,   31.734655,   30.816704,    29.909475,   29.014397,   28.132734,
+    53.171352,    53.122139,   52.974934,   52.731091,   52.392796,    51.963017,   51.445496,   50.844631,
+    50.165352,    49.413094,   48.593651,   47.713055,   46.777527,    45.793381,   44.766880,   43.704224,
+    42.611439,    41.494331,   40.358486,   39.209156,   38.051262,    36.889412,   35.727867,   34.570469,
+    33.446636,    32.554573,   31.666414,   30.784130,   29.909475,    29.044043,   28.189245,   27.346340,
+    50.390141,    50.345058,   50.210182,   49.986694,   49.676514,    49.282234,   48.807163,   48.255127,
+    47.630527,    46.938114,   46.183067,   45.370728,   44.506676,    43.596565,   42.646019,   41.660648,
+    40.645927,    39.607159,   38.549461,   37.477650,   36.396320,    35.309746,   34.221893,   33.224525,
+    32.377274,    31.530655,   30.686733,   29.847412,   29.014397,    28.189245,   27.373348,   26.567940,
+    47.754402,    47.713055,   47.589340,   47.384289,   47.099586,    46.737526,   46.300991,   45.793381,
+    45.218559,    44.580780,   43.884586,   43.134796,   42.336353,    41.494331,   40.613834,   39.699886,
+    38.757504,    37.791485,   36.806534,   35.807087,   34.797382,    33.781395,   32.932083,   32.131664,
+    31.328754,    30.525511,   29.723904,   28.925755,   28.132734,    27.346340,   26.567940,   25.798756};
+
+namespace xf {
+namespace codec {
+// ------------------------------------------------------------
+/**
+ * @brief Level 2 : kernel implement for JXL lossy frame encode computing
+ *
+ * @param config the int config signal, such as image size, field stride and etc.
+ * @param config_fl the floating config signal, such as cost, inv_global_scale and etc.
+ * @param hls_opsin_1 the input RGB image data for channnel-1.
+ * @param hls_opsin_2 the input RGB image data for channnel-2.
+ * @param hls_opsin_3 the input RGB image data for channnel-3.
+ * @param quant_field_row the initial quant_filed data.
+ * @param masking_filed_row the initial masking_filed data.
+ * @param aq_map_f the initial adjust quant map data.
+ * @param cmap_axi the output of color correlation map.
+ * @param ac_coef_axiout the output of quanted AC coefficients.
+ * @param strategy_all the output of strategy for each block in image
+ * @param raw_quant_field_i the output of computed raw_quant_field
+ * @param hls_order the output of orders for each block in image
+ * @param hls_dc8x8 the DC coefficients output for 8x8 blocks
+ * @param hls_dc16x16 the DC coefficients output for 16x16 blocks
+ * @param hls_dc32x32 the DC coefficients output for 32x32 blocks
+ */
+// ------------------------------------------------------------
+
+extern "C" void JxlEnc_lossy_enc_compute(int config[MAX_NUM_CONFIG],
+                                         float config_fl[MAX_NUM_CONFIG],
+                                         float* hls_opsin_1,
+                                         float* hls_opsin_2,
+                                         float* hls_opsin_3,
+                                         float* quant_field_row,
+                                         float* masking_field_row,
+                                         float* aq_map_f,
+                                         int8_t* cmap_axi,
+                                         int* ac_coef_axiout,
+                                         //    unsigned char* strategy_all,
+                                         uint8_t* strategy_all,
+                                         int* raw_quant_field_i,
+                                         uint32_t* hls_order,
+                                         float* hls_dc8x8,
+                                         float* hls_dc16x16,
+                                         float* hls_dc32x32);
+} // namespace codec
+} // namespace xf
+
+#endif
\ No newline at end of file
diff --git a/codec/L2/include/hw/leptonEnc/jpegDec/XAcc_lepjfifparser.hpp b/codec/L2/include/hw/leptonEnc/jpegDec/XAcc_lepjfifparser.hpp
new file mode 100644
index 0000000000..7ec9d16969
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/jpegDec/XAcc_lepjfifparser.hpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file XAcc_jfifparser.h
+ * @brief parser_jpg_top template function API.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef _XACC_JFIFPARSER_HPP_
+#define _XACC_JFIFPARSER_HPP_
+
+#ifndef __cplusplus
+#error "XF Image Library only works with C++."
+#endif
+
+#include "XAcc_lepjpegdecoder.hpp"
+
+namespace xf {
+namespace codec {
+namespace details {
+// ------------------------------------------------------------
+/**
+ * @brief parser the jfif register for the jepg decoder
+ *
+ * @tparam CH_W size of data path in dataflow region, in bit.
+ *         when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases.
+ *         when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource.
+ *
+ * @param datatoDDR the pointer to DDR.
+ * @param size the total bytes to be read from DDR.
+ * @param r the index of vector to be read from AXI in all cases
+ * @param c the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char)
+ * @param dht_tbl1/dht_tbl2 the segment data of Define huffman table marker.
+ * @param hls_cmp the shift register organized by the index of each color component.
+ * @param hls_mbs the number of blocks in mcu for each component.
+ * @param left the number of bytes to be read from DDR after parser.
+ * @param image info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests.
+ * @param hls_compinfo image information may be used by lepton.
+ * @param rtn return flag.
+ * @param plep information used by lepton.
+ */
+void parser_jpg_top(ap_uint<AXI_WIDTH>* datatoDDR,
+                    const int size,
+                    int& r,
+                    int& c,
+                    uint16_t dht_tbl1[2][2][1 << DHT1],
+                    uint16_t dht_tbl2[2][2][1 << DHT2],
+                    ap_uint<12>& hls_cmp,
+                    int& left,
+                    // image info
+                    img_info& img_info,
+                    uint8_t hls_mbs[MAX_NUM_COLOR],
+                    hls_compInfo hls_compinfo[MAX_NUM_COLOR],
+                    bool& rtn,
+                    decOutput* plep);
+
+// ------------------------------------------------------------
+/**
+ * @brief Level 1 : decode all mcu
+ *
+ * @tparam CH_W size of data path in dataflow region, in bit.
+ *         when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases.
+ *         when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource.
+ *
+ * @param ptr the pointer to DDR.
+ * @param sz the total bytes to be read from DDR.
+ * @param c the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char)
+ * @param dht_tbl1/dht_tbl2 the segment data of Define huffman table marker.
+ * @param hls_cmp the shift register organized by the index of each color component.
+ * @param hls_mbs the number of blocks in mcu for each component.
+ * @param image info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests.
+ * @param block_strm the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val
+ */
+void decoder_jpg_top(ap_uint<AXI_WIDTH>* ptr,
+                     const int sz,
+                     const int c,
+                     const uint16_t dht_tbl1[2][2][1 << DHT1],
+                     const uint16_t dht_tbl2[2][2][1 << DHT2],
+                     ap_uint<12> hls_cmp,
+                     const uint8_t hls_mbs[MAX_NUM_COLOR],
+                     const img_info img_info,
+
+                     uint32_t& rst_cnt,
+                     hls::stream<ap_uint<24> >& block_strm);
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+
+namespace xf {
+namespace codec {
+// ------------------------------------------------------------
+/**
+* @brief Level 2 : kernel for jfif parser + huffman decoder
+*
+* @tparam CH_W size of data path in dataflow region, in bit.
+*         when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases.
+*         when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource.
+*
+* @param datatoDDR the pointer to DDR.
+* @param size the total bytes to be read from DDR.
+* @param hls_mcuc total mcu.
+* @param hls_cmpnfo the component info used by lepton.
+* @param block_strm the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val
+* @param rtn the flag of the decode succeed
+*/
+void kernel_parser_decoder(ap_uint<AXI_WIDTH>* datatoDDR,
+                           const int size,
+                           img_info& img_info,
+                           hls_compInfo hls_cmpnfo[MAX_NUM_COLOR],
+                           hls::stream<ap_uint<24> >& block_strm,
+                           bool& rtn,
+                           decOutput* plep);
+
+} // namespace codec
+} // namespace xf
+
+#endif
diff --git a/codec/L2/include/hw/leptonEnc/jpegDec/XAcc_lepjpegdecoder.hpp b/codec/L2/include/hw/leptonEnc/jpegDec/XAcc_lepjpegdecoder.hpp
new file mode 100644
index 0000000000..28cb3de23d
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/jpegDec/XAcc_lepjpegdecoder.hpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file XAcc_jpegdecoder.h
+ * @brief mcu_decoder template function API.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef __cplusplus
+#error " XAcc_jpegdecoder hls::stream<> interface, and thus requires C++"
+#endif
+
+#ifndef _XACC_JPEGDECODER_HPP_
+#define _XACC_JPEGDECODER_HPP_
+#include "XAcc_common.hpp"
+#include "axi_to_stream.hpp"
+#include <ap_int.h>
+#include <hls_stream.h>
+
+#ifndef __SYNTHESIS__
+// For debug
+#include <bitset>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <cstdio>
+#endif
+
+#define _XF_IMAGE_VOID_CAST static_cast<void>
+// XXX toggle here to debug this file
+//#ifndef __SYNTHESIS__
+#if 0
+#define _XF_IMAGE_PRINT(msg...) \
+    do {                        \
+        printf(msg);            \
+    } while (0)
+#else
+#define _XF_IMAGE_PRINT(msg...) (_XF_IMAGE_VOID_CAST(0))
+
+#endif
+
+// ------------------------------------------------------------
+#define DHT1 (9)        // the number of leading bits of huffman codes
+#define DHT2 (10)       // the number of tail bits of huffman codes
+#define DHT_S 16 - DHT1 // the exponent of the address weight, weight = 2^DHT_S
+#define SCALE1 (1 << DHT1)
+#define SCALE2 (1 << DHT2)
+#define DHT_M (1 << DHT2)
+#define MAX_NUM_COLOR (3)     // the max number of cmp for this decoder, current is 3
+#define MAX_DEC_PIX (1000000) // the max bytes of input jpg, 1M is enough for 800*800 co-sim
+#define CMPhuff (2)           // the max number of huffman tables for all cmp, current is 2
+// ------------------------------------------------------------
+#define BURST_LENTH (64)
+#define CH_W (16)
+#if (CH_W == 32)
+typedef uint32_t CHType; // channel data type
+#else
+typedef uint16_t CHType; // channel data type
+#endif
+// ------------------------------------------------------------
+// tmp vecter for the max of image'block of all cmps, to decode the hq.jpg need 1036800
+// to decode 800*800 need 50*50*4 =10000
+#define MAXCMP_BC (1036800)
+
+// ------------------------------------------------------------
+#if 0
+#define AXI_WIDTH (16)
+
+enum COLOR_FORMAT { C400 = 0, C420, C422, C444 };
+
+const static uint8_t hls_jpeg_zigzag_to_raster[64] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                                      12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                                      35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                                      58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+const short hls_icos_base_8192_scaled[64] = {
+    8192,   8192,  8192,   8192,  8192, 8192,  8192,   8192,   11363, 9633,  6436,   2260,  -2260,
+    -6436,  -9633, -11363, 10703, 4433, -4433, -10703, -10703, -4433, 4433,  10703,  9633,  -2260,
+    -11363, -6436, 6436,   11363, 2260, -9633, 8192,   -8192,  -8192, 8192,  8192,   -8192, -8192,
+    8192,   6436,  -11363, 2260,  9633, -9633, -2260,  11363,  -6436, 4433,  -10703, 10703, -4433,
+    -4433,  10703, -10703, 4433,  2260, -6436, 9633,   -11363, 11363, -9633, 6436,   -2260,
+};
+
+struct decOutput {
+    COLOR_FORMAT format;
+    uint16_t axi_width[MAX_NUM_COLOR];
+    uint16_t axi_height[MAX_NUM_COLOR];
+    uint8_t axi_map_row2cmp[4];
+    uint8_t min_nois_thld_x[MAX_NUM_COLOR][64];
+    uint8_t min_nois_thld_y[MAX_NUM_COLOR][64];
+    uint8_t q_tables[MAX_NUM_COLOR][8][8];
+    int32_t idct_q_table_x[MAX_NUM_COLOR][8][8];
+    int32_t idct_q_table_y[MAX_NUM_COLOR][8][8];
+    int32_t idct_q_table_l[MAX_NUM_COLOR][8][8];//todo
+
+    uint16_t axi_mcuv;
+    uint8_t axi_num_cmp_mcu;
+    uint8_t axi_num_cmp;
+};
+#endif
+// ------------------------------------------------------------
+
+namespace xf {
+namespace codec {
+
+// ------------------------------------------------------------
+struct hls_huff_DHT {
+    unsigned short tbl1[2][CMPhuff][1 << DHT1];
+    unsigned short tbl2[2][CMPhuff][1 << DHT2];
+};
+// ------------------------------------------------------------
+struct hls_huff_segment {
+    unsigned char size[16]; // the number of the i+1 bits huffman codes
+    unsigned char val[256];
+};
+// ------------------------------------------------------------
+struct sos_data {
+    uint8_t bits;
+    uint8_t garbage_bits;
+    CHType data;
+    bool rst; // todo
+    bool end_sos;
+};
+// ------------------------------------------------------------
+struct img_info {
+    uint8_t hls_cs_cmpc;
+    uint32_t hls_mcuc; // the total mcu
+    uint16_t hls_mcuh; // the horizontal mcu
+    uint16_t hls_mcuv;
+};
+// ------------------------------------------------------------
+struct hls_compInfo {
+    int sfv; // sample factor vertical
+    int sfh; // sample factor horizontal
+    int mbs; // blocks in mcu
+    int bcv; // block count vertical (interleaved)
+    int bch; // block count horizontal (interleaved)
+    int bc;  // block count (all) (interleaved)
+};
+} // namespace codec
+} // namespace xf
+
+// ------------------------------------------------------------
+
+namespace xf {
+namespace codec {
+namespace details {
+// ------------------------------------------------------------
+/**
+ * @brief Level 1 : decode all mcu
+ *
+ * @tparam CH_W size of data path in dataflow region, in bit.
+ *         when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases.
+ *         when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource.
+ *
+ * @param image_strm the stream of compressed data after SOS marker.
+ * @param eof_strm the stream of end flag for image_strm, synchronous signal using false and an addtional true in the
+ * end.
+ * @param dht_tbl1/dht_tbl2 the segment data of Define huffman table marker.
+ * @param hls_cmp the shift register organized by the index of each color component.
+ * @param image info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests.
+ * @param block_strm the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val
+ */
+void mcu_decoder(
+    // input
+    hls::stream<CHType>& image_strm,
+    hls::stream<bool>& eof_strm,
+    const uint16_t dht_tbl1[2][2][1 << DHT1],
+    const uint16_t dht_tbl2[2][2][1 << DHT2],
+    ap_uint<12> hls_cmp,
+
+    // image info
+    const uint8_t hls_cs_cmpc, // component count in current scan
+    const uint8_t hls_mbs[MAX_NUM_COLOR],
+    const uint16_t hls_mcuh, // the horizontal mcu
+    const uint32_t hls_mcuc, // the total mcu
+
+    // output
+    uint32_t& rst_cnt,
+    hls::stream<ap_uint<24> >& block_strm);
+// ------------------------------------------------------------
+/**
+ * @brief convert strm to Aligned_block for the lepton
+ *
+ * @param block_strm the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val
+ * @param hls_block the maxsize block, will be copy to the aligned_block
+ * @param hls_sfv the sample factor vertical for each component
+ * @param hls_sfh the sample factor horizontal for each component
+ * @param hls_mbs the blocks in mcu for each component.
+ * @param hls_bch the max block count horizontal (interleaved)
+ * @param hls_bc  the max block count (all) (interleaved)
+ * @param sta     the status of the process, 0: keep doing, 1: reset decoder(only for lepton), 2: decode done
+ */
+void hls_next_mcupos2(hls::stream<ap_uint<24> >& block_strm,
+                      int16_t hls_block[MAX_NUM_COLOR * MAXCMP_BC * 64],
+                      int hls_sfv[4],
+                      int hls_sfh[4],
+                      const uint8_t hls_mbs[4],
+                      int hls_bch,
+                      int hls_bc,
+                      int32_t hls_mcuc,
+                      uint8_t hls_cs_cmpc,
+                      int& sta);
+
+// ------------------------------------------------------------
+/**
+ * @brief sort block in mcu to Aligned_block in line for the lepton
+ *
+ * @param block_strm the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val
+ * @param hls_block the maxsize block, will be copy to the aligned_block
+ * @param hls_sfv the sample factor vertical for each component
+ * @param hls_sfh the sample factor horizontal for each component
+ * @param hls_mbs the blocks in mcu for each component.
+ * @param hls_bch the max block count horizontal (interleaved)
+ * @param hls_bc  the max block count (all) (interleaved)
+ * @param sta     the status of the process, 0: keep doing, 1: reset decoder(only for lepton), 2: decode done
+ */
+void hls_next_mcupos_strm(hls::stream<ap_uint<24> >& block_strm,
+                          const decOutput plep,
+                          hls::stream<ap_int<11> > str_coef[8],
+                          uint16_t axi_width[MAX_NUM_COLOR],
+                          uint16_t axi_height[MAX_NUM_COLOR]);
+// ------------------------------------------------------------
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/include/hw/leptonEnc/lepton/XAcc_77.hpp b/codec/L2/include/hw/leptonEnc/lepton/XAcc_77.hpp
new file mode 100644
index 0000000000..8e915c3bbb
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/lepton/XAcc_77.hpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file XAcc_77.hpp
+ * @brief lepton 7x7 function API.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef __cplusplus
+#error " XAcc_77.hpp hls::stream<> interface, and thus requires C++"
+#endif
+
+#ifndef _XACC_77_H_
+#define _XACC_77_H_
+#include <ap_int.h>
+#include <hls_stream.h>
+#include <hls_math.h>
+#include "XAcc_common.hpp"
+
+static ap_uint<4> hls_nonzero_to_bin_9[50] = {
+    0, 1, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+};
+
+static const unsigned char hls_unzigzag49[] = {9,  10, 17, 25, 18, 11, 12, 19, 26, 33, 41, 34, 27, 20, 13, 14, 21,
+                                               28, 35, 42, 49, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58,
+                                               59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+
+namespace xf {
+namespace codec {
+namespace details {
+
+void duplicate_coef(hls::stream<ap_int<11> > coef[8],
+                    ap_uint<32> len,
+                    hls::stream<ap_int<11> > strm_coef[8],
+                    hls::stream<coef_t> str_rast8[8],
+                    hls::stream<coef_t>& str_dc_in);
+
+void preprocess(ap_uint<32> len,
+                ap_uint<3> id_cmp,
+                bool is_top_row,
+                hls::stream<ap_int<11> > coef[8],
+
+                hls::stream<ap_int<11> >& coef_7x7,
+                hls::stream<ap_int<11> >& coef_lft,
+                hls::stream<ap_int<11> >& coef_abv,
+                hls::stream<ap_int<11> >& coef_abv_lft,
+                hls::stream<ap_int<11> > coef_h[8],
+                hls::stream<ap_int<11> > coef_above_h[8],
+                hls::stream<bool>& strm_has_left_h,
+                hls::stream<bool>& coef_h_e,
+                hls::stream<ap_int<11> > coef_v[8],
+                hls::stream<ap_int<11> > coef_left_v[8],
+                hls::stream<bool>& strm_has_left_v,
+                hls::stream<bool>& coef_v_e,
+                hls::stream<ap_uint<6> >& non_zero_cnt,
+                hls::stream<ap_uint<6> >& non_zero_cnt_lft,
+                hls::stream<ap_uint<6> >& non_zero_cnt_abv,
+                hls::stream<ap_uint<6> >& non_zero_7x7,
+                hls::stream<ap_uint<6> >& non_zero_h_out,
+                hls::stream<ap_uint<3> >& coef_cnt_h_len,
+                hls::stream<ap_uint<3> >& strm_lane_h,
+                hls::stream<ap_uint<3> >& coef_cnt_v_len,
+                hls::stream<ap_uint<3> >& strm_lane_v,
+                hls::stream<ap_uint<3> >& eob_x,
+                hls::stream<ap_uint<3> >& eob_y);
+
+void hls_serialize_tokens_77(ap_uint<32> len,
+                             bool above_present,
+                             hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+                             hls::stream<ap_int<11> >& strm_coef_here,
+                             hls::stream<ap_int<11> >& strm_coef_above,
+                             hls::stream<ap_int<11> >& strm_coef_left,
+                             hls::stream<ap_int<11> >& strm_coef_above_left,
+
+                             hls::stream<ap_uint<6> >& strm_nz_cur,
+                             hls::stream<ap_uint<6> >& strm_nz_abv,
+                             hls::stream<ap_uint<6> >& strm_nz_lft,
+
+                             hls::stream<ap_uint<4> >& strm_sel_tab,
+                             hls::stream<bool>& strm_cur_bit,
+                             hls::stream<short>& strm_len,
+                             //	    hls::stream<bool>		 & strm_e,
+                             hls::stream<ap_uint<16> >& strm_addr1,
+                             hls::stream<ap_uint<16> >& strm_addr2,
+                             hls::stream<ap_uint<16> >& strm_addr3,
+                             hls::stream<ap_uint<16> >& strm_addr4);
+
+void pre_serialize_tokens_77(ap_uint<32> len,
+                             bool above_present,
+                             hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+                             hls::stream<ap_int<11> >& strm_coef_here,
+                             hls::stream<ap_int<11> >& strm_coef_above,
+                             hls::stream<ap_int<11> >& strm_coef_left,
+                             hls::stream<ap_int<11> >& strm_coef_above_left,
+
+                             hls::stream<ap_uint<6> >& strm_nz_cur,
+                             hls::stream<ap_uint<6> >& strm_nz_abv,
+                             hls::stream<ap_uint<6> >& strm_nz_lft,
+
+                             hls::stream<ap_uint<4> >& strm_nonzero_bin_tmp,
+
+                             hls::stream<ap_uint<6> >& strm_7x7_nz,
+                             hls::stream<ap_uint<4> >& strm_7x7_length,
+
+                             hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+                             hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+                             hls::stream<bool>& strm_7x7_cur_bit_sign_tmp,
+
+                             hls::stream<ap_uint<11> >& strm_abs_coef,
+                             hls::stream<ap_uint<6> >& strm_coord);
+
+void push_bit_7x7(ap_uint<32> len,
+                  hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+                  hls::stream<ap_uint<4> >& strm_7x7_length,
+
+                  hls::stream<ap_uint<4> >& strm_nonzero_bin,
+
+                  hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+                  hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+                  hls::stream<bool>& strm_cur_bit_sign,
+
+                  hls::stream<ap_uint<11> >& strm_abs_coef,
+                  hls::stream<ap_uint<6> >& strm_7x7_coord_nois,
+
+                  hls::stream<ap_uint<4> >& strm_sel_tab,
+                  hls::stream<bool>& strm_cur_bit,
+                  hls::stream<bool>& strm_e,
+                  hls::stream<ap_uint<16> >& strm_addr1,
+                  hls::stream<ap_uint<16> >& strm_addr2,
+                  hls::stream<ap_uint<16> >& strm_addr3,
+                  hls::stream<ap_uint<16> >& strm_addr4);
+
+void push_bit_7x7_v2(ap_uint<32> len,
+                     hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+                     hls::stream<ap_uint<4> >& strm_7x7_length,
+
+                     hls::stream<ap_uint<4> >& strm_nonzero_bin,
+
+                     hls::stream<ap_uint<4> >& strm_7x7_num_nonzero_bin,
+                     hls::stream<ap_uint<4> >& strm_7x7_bsr_best_prior,
+
+                     hls::stream<bool>& strm_cur_bit_sign,
+
+                     hls::stream<ap_uint<11> >& strm_abs_coef,
+                     hls::stream<ap_uint<6> >& strm_7x7_coord_nois,
+
+                     hls::stream<ap_uint<4> >& strm_sel_tab,
+                     hls::stream<bool>& strm_cur_bit,
+                     hls::stream<short>& strm_len,
+                     //	hls::stream<bool>		 & strm_e,
+                     hls::stream<ap_uint<16> >& strm_addr1,
+                     hls::stream<ap_uint<16> >& strm_addr2,
+                     hls::stream<ap_uint<16> >& strm_addr3,
+                     hls::stream<ap_uint<16> >& strm_addr4);
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/include/hw/leptonEnc/lepton/XAcc_arith.hpp b/codec/L2/include/hw/leptonEnc/lepton/XAcc_arith.hpp
new file mode 100644
index 0000000000..38216fd73c
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/lepton/XAcc_arith.hpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file XAcc_arith.hpp
+ * @brief lepton arith function API.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef __cplusplus
+#error " XAcc_arith.hpp hls::stream<> interface, and thus requires C++"
+#endif
+
+#ifndef _XACC_ARITH_H_
+#define _XACC_ARITH_H_
+#include <ap_int.h>
+#include <hls_stream.h>
+#include "XAcc_common.hpp"
+////TMEP
+
+namespace xf {
+namespace codec {
+namespace details {
+
+void vpx_enc_range(
+    // input
+    unsigned char* br_range,
+    hls::stream<bool>& strm_bit,
+    hls::stream<uint8_t>& strm_prob,
+    hls::stream<bool>& strm_e_range,
+    hls::stream<uint8_t>& strm_tab_dbg,
+    // output
+    hls::stream<bool>& strm_range_o_e,
+    hls::stream<ap_uint<3> >& strm_range_o_shift,
+    hls::stream<unsigned char>& strm_range_o_split);
+
+void vpx_enc_value(
+    //
+    int* br_count,
+    unsigned int* br_lowvalue,
+    hls::stream<bool>& strm_range_o_e,
+    hls::stream<ap_uint<3> >& strm_range_o_shift,
+    hls::stream<unsigned char>& strm_range_o_split,
+    // Outout ////////////////////
+    hls::stream<bool>& strm_value_o_e,
+    hls::stream<bool>& strm_value_o_cy,
+    hls::stream<unsigned char>& strm_value_o_byte);
+
+void vpx_enc_run(unsigned char* br_pre_byte,
+                 unsigned short* br_run,
+                 bool* br_isFirst,
+                 hls::stream<bool>& strm_value_o_e,
+                 hls::stream<bool>& strm_value_o_cy,
+                 hls::stream<unsigned char>& strm_value_o_byte,
+                 // Outout ////////////////////
+                 hls::stream<bool>& strm_CyByte_o_e,
+                 hls::stream<bool>& strm_CyByte_o_cy,
+                 hls::stream<unsigned char>& strm_CyByte_o_byte,
+                 hls::stream<unsigned short>& strm_CyByte_o_run);
+
+void vpx_enc_pos(unsigned int* br_pos,
+                 hls::stream<bool>& strm_CyByte_o_e,
+                 hls::stream<bool>& strm_CyByte_o_cy,
+                 hls::stream<unsigned char>& strm_CyByte_o_byte,
+                 hls::stream<unsigned short>& strm_CyByte_o_run,
+                 // Outout ////////////////////
+                 hls::stream<bool>& strm_pos_o_e,
+                 hls::stream<unsigned char>& strm_pos_o_byte);
+
+void vpx_enc_syn(
+    // Iteration for variable
+    unsigned char* range,
+    int* cnt,
+    unsigned int* value,
+    unsigned char* pre_byte,
+    unsigned short* run,
+    bool* br_isFirst,
+    unsigned int* pos,
+    // input
+    hls::stream<bool>& strm_bit,
+    hls::stream<uint8_t>& strm_prob,
+    hls::stream<bool>& strm_e_range,
+    hls::stream<uint8_t>& strm_tab_dbg,
+    // output
+    hls::stream<bool>& strm_pos_o_e,
+    hls::stream<ap_uint<8> >& strm_pos_o_byte);
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/include/hw/leptonEnc/lepton/XAcc_common.hpp b/codec/L2/include/hw/leptonEnc/lepton/XAcc_common.hpp
new file mode 100644
index 0000000000..fc3f9aa873
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/lepton/XAcc_common.hpp
@@ -0,0 +1,394 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file XAcc_common.hpp
+ * @brief lepton common include struct, top of prepare_engine and push_engine function.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef __cplusplus
+#error " XAcc_common.hpp hls::stream<> interface, and thus requires C++"
+#endif
+
+#ifndef _XACC_COMMON_HPP_
+#define _XACC_COMMON_HPP_
+#include <ap_int.h>
+#include <hls_stream.h>
+#include <stdint.h>
+
+enum { NZ_CNT_7x7, NZ_CNT_1x8, NZ_CNT_8x1, NOIS_CNT, NOIS_CNT_DC, THRE_CNT, EXP_CNT, EXP_CNT_X, EXP_CNT_DC, SIGN_CNT };
+
+const static uint8_t hls_raster_to_jpeg_zigzag[64] = {0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42,
+                                                      3,  8,  12, 17, 25, 30, 41, 43, 9,  11, 18, 24, 31, 40, 44, 53,
+                                                      10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
+                                                      21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63};
+
+const static uint8_t hls_jpeg_zigzag_to_raster[64] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                                      12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                                      35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                                      58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+const static uint8_t hls_raster_to_aligned[64] = {49, 50, 51, 52, 53, 54, 55, 56, 57, 0,  1,  5,  6,  14, 15, 27,
+                                                  58, 2,  4,  7,  13, 16, 26, 28, 59, 3,  8,  12, 17, 25, 29, 38,
+                                                  60, 9,  11, 18, 24, 30, 37, 39, 61, 10, 19, 23, 31, 36, 40, 45,
+                                                  62, 20, 22, 32, 35, 41, 44, 46, 63, 21, 33, 34, 42, 43, 47, 48};
+
+const static uint8_t hls_zigzag_to_aligned[64] = {49, 50, 57, 58, 0,  51, 52, 1,  2,  59, 60, 3,  4,  5,  53, 54,
+                                                  6,  7,  8,  9,  61, 62, 10, 11, 12, 13, 14, 55, 56, 15, 16, 17,
+                                                  18, 19, 20, 63, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+                                                  33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48};
+
+#define _MACRO_MIN(a, b) ((a > b) ? (b) : (a))
+#define _MACRO_ABS(a) ((a > 0) ? (a) : (-a))
+
+typedef int16_t coef_t;
+struct coeff_64 {
+    coef_t data[64];
+    coeff_64() { ; };
+    coeff_64(int16_t* psrc) {
+        for (int i = 0; i < 64; i++) data[i] = psrc[i];
+    };
+    void ExportTo(int16_t* pdes) {
+        for (int i = 0; i < 64; i++) pdes[i] = data[i];
+    };
+};
+struct coeff_77_t {
+    coef_t data[49];
+};
+
+struct coeff_edge_t {
+    coef_t data[8];
+};
+
+typedef uint8_t pix_t;
+typedef int16_t epix_t;
+struct pix_edge_t {
+    epix_t data[8];
+};
+
+#define MAX_EXPONENT_PIX (11)
+#define MAX_NUM_COLOR (3)
+#define MAX_NUM_BLOCK88_W (1024)
+#define MAX_NUM_BLOCK88_H (1024)
+#define MAX_NUM_BLOCK88 (MAX_NUM_BLOCK88_W * MAX_NUM_BLOCK88_H)
+#define MAX_PIX_W (MAX_NUM_BLOCK88_W << 3)
+#define MAX_PIX_H (MAX_NUM_BLOCK88_H << 3)
+#define MAX_NUM_PIX (MAX_NUM_BLOCK88 * MAX_NUM_COLOR * 64)
+#define MAX_NUM_COEF (MAX_NUM_BLOCK88 * MAX_NUM_COLOR * 64)
+#define MAX_SIZE_COEF (MAX_NUM_COEF * 2)
+
+#define BITS_DDR (64)
+#define SCAL_AXI (1)
+#define BITS_AXI (BITS_DDR * SCAL_AXI) // Must be (WD_DDR*SCAL_AXI)
+#define AXI_WIDTH (16)
+
+#define DIV_FOR_WD_AXI (SCAL_AXI)
+#define NUM_COEF_AXI (BITS_AXI / 16)
+#define MAX_COEF_AXI (MAX_NUM_COEF / NUM_COEF_AXI)
+#define STRIP_COEFF_AXI (64 / NUM_COEF_AXI)
+struct WD_AXI {
+    coef_t data[NUM_COEF_AXI]; // 4
+};
+
+#define MAX_NUM_AXI (MAX_NUM_COEF / NUM_COEF_AXI)
+
+struct hls_Branch {
+   public:
+    uint8_t counts_[2];
+    // uint8_t probability_;
+   public:
+    // uint8_t prob() const { return probability_; }
+    void set_identity() {
+        counts_[0] = 1;
+        counts_[1] = 1;
+        //  probability_ = 128;
+    }
+    // bool is_identity() const {
+    //  return counts_[0] == 1 && counts_[1] == 1 && probability_ == 128;
+    //}
+    static hls_Branch identity() {
+        hls_Branch retval;
+        retval.set_identity();
+        return retval;
+    }
+    uint32_t true_count() const { return counts_[1]; }
+    uint32_t false_count() const { return counts_[0]; }
+    struct ProbUpdate {
+        struct ProbOutcome {
+            uint8_t log_prob;
+        };
+        uint8_t prob;
+        ProbOutcome next[2];
+        uint8_t& log_prob_false() { return next[0].log_prob; }
+        uint8_t& log_prob_true() { return next[1].log_prob; }
+    };
+
+    void record_obs_and_update(bool obs) {
+        bool isFull = counts_[obs] == 0xff;
+        bool isOne = counts_[!obs] == 1;
+        if (!isFull)
+            counts_[obs]++;
+        else if (isOne) {
+            counts_[obs] = 0xff;
+        } else {
+            counts_[obs] = 129;
+            counts_[1 - obs] = ((1 + counts_[1 - obs]) >> 1);
+        }
+    }
+    void record_obs_and_update_almost_org(bool obs) {
+        // unsigned int fcount = counts_[0];
+        //  unsigned int tcount = counts_[1];
+        bool overflow = (counts_[obs]++ == 0xff);
+        if (overflow) { // check less than 512
+            bool neverseen = counts_[!obs] == 1;
+            if (neverseen) {
+                counts_[obs] = 0xff;
+                //    probability_ = obs ? 0 : 255;
+            } else {
+                // counts_[0] = ((1 + (unsigned int)fcount) >> 1);
+                //  counts_[1] = ((1 + (unsigned int)tcount) >> 1);
+                counts_[1 - obs] = ((1 + (unsigned int)counts_[1 - obs]) >> 1);
+                counts_[obs] = 129;
+                //     probability_ = optimize(counts_[0] + counts_[1]);
+            }
+        } else {
+            //   probability_ = optimize(counts_[0] + counts_[1]);
+        }
+    }
+    uint8_t prob2() {
+        unsigned int fcount = counts_[0];
+        unsigned int tcount = counts_[1];
+        if (fcount == 1 && tcount == 255) return 0;
+        if (fcount == 255 && tcount == 1) return 255;
+        uint8_t normal = optimize();
+        return normal;
+    }
+    void normalize() {
+        counts_[0] = ((1 + (unsigned int)counts_[0]) >> 1);
+        counts_[1] = ((1 + (unsigned int)counts_[1]) >> 1);
+    }
+
+    uint8_t optimize() const {
+        uint16_t cnt_lsb = counts_[0];
+        uint16_t sum = counts_[0] + counts_[1];
+        const int prob = (cnt_lsb << 8) / sum; // fast_divide18bit_by_10bit(cnt_lsb << 8, sum);
+        return (uint8_t)prob;
+    }
+
+    hls_Branch() {}
+};
+struct tmp_struct {
+    bool value;
+    hls_Branch* branch;
+    int bill;
+};
+
+// template <class T>
+// static void hls_cp8(T des[8], T src[8])
+//{
+//#pragma HLS inline
+//#pragma HLS PIPELINE
+//	INNER_LOOP:
+//	for(int i=0;i<8;i++)
+//		des[i] = src[i];
+//}
+
+struct struct_arith {
+    int count;
+    unsigned int value;
+    unsigned char pre_byte;
+    unsigned short run;
+    unsigned int pos;
+    unsigned char range;
+    bool isFirst;
+};
+
+enum COLOR_FORMAT { C400 = 0, C420, C422, C444 };
+
+struct decOutput {
+    COLOR_FORMAT format;
+    uint16_t axi_width[MAX_NUM_COLOR];  // colldata->block_width(i);
+    uint16_t axi_height[MAX_NUM_COLOR]; // colldata->block_width(i);
+    uint8_t axi_map_row2cmp[4];         //     AXI                   2,1,0,0 2,1,0
+    uint8_t min_nois_thld_x[MAX_NUM_COLOR][64];
+    uint8_t min_nois_thld_y[MAX_NUM_COLOR][64];
+    uint8_t q_tables[MAX_NUM_COLOR][8][8]; //[64],
+    int32_t idct_q_table_x[MAX_NUM_COLOR][8][8];
+    int32_t idct_q_table_y[MAX_NUM_COLOR][8][8];
+    int32_t idct_q_table_l[MAX_NUM_COLOR][8][8];
+
+    uint16_t axi_mcuv;
+    uint8_t axi_num_cmp_mcu;
+    uint8_t axi_num_cmp;
+};
+
+const short hls_icos_base_8192_scaled[64] = {
+    8192,   8192,  8192,   8192,  8192, 8192,  8192,   8192,   11363, 9633,  6436,   2260,  -2260,
+    -6436,  -9633, -11363, 10703, 4433, -4433, -10703, -10703, -4433, 4433,  10703,  9633,  -2260,
+    -11363, -6436, 6436,   11363, 2260, -9633, 8192,   -8192,  -8192, 8192,  8192,   -8192, -8192,
+    8192,   6436,  -11363, 2260,  9633, -9633, -2260,  11363,  -6436, 4433,  -10703, 10703, -4433,
+    -4433,  10703, -10703, 4433,  2260, -6436, 9633,   -11363, 11363, -9633, 6436,   -2260,
+};
+
+const short hls_icos_idct_linear_8192_scaled[64] = {
+    1024, 1420,  1338,  1204,  1024,  805,   554,   283,  1024, 1204,  554,   -283,  -1024, -1420, -1338, -805,
+    1024, 805,   -554,  -1420, -1024, 283,   1338,  1204, 1024, 283,   -1338, -805,  1024,  1204,  -554,  -1420,
+    1024, -283,  -1338, 805,   1024,  -1204, -554,  1420, 1024, -805,  -554,  1420,  -1024, -283,  1338,  -1204,
+    1024, -1204, 554,   283,   -1024, 1420,  -1338, 805,  1024, -1420, 1338,  -1204, 1024,  -805,  554,   -283,
+};
+#if 0
+/////template PackStr2Mem_t////////////////////////
+template < int W_STR,int B_LAST, int N_BURST>
+int PackStr2Mem_t(
+    uint8_t* pdes,
+	struct_arith    axi_arith,
+    hls::stream<ap_uint<W_STR> > &str_s)
+{
+    const int N_BYTE = ((W_STR-1+7)/8);
+    const int N_PACK = (4/N_BYTE);
+    uint8_t* ptmp = pdes;//uint32_t
+    int num_w = 0;
+    ap_uint<1> isLast = 0;
+    uint8_t buff[512];//uint32_t
+#ifndef __SYNTHESIS__
+    assert(N_BURST<=512);
+#endif
+    ap_uint<N_BYTE * 8 * N_PACK> tmp;
+    //hls::stream<ap_uint<1>> str_last_buff;
+    do {
+        for (int i = 0; i < N_BURST * N_PACK; i++) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<2> bs = i % N_PACK;
+            if (isLast == 0) {
+                ap_uint<W_STR> w = str_s.read();
+                isLast = w(W_STR-1, W_STR-1);
+                tmp( bs*N_BYTE*8+W_STR-2 , bs*N_BYTE*8) = w(W_STR-2, 0);
+                num_w++;
+            } else
+                tmp( bs*N_BYTE*8+W_STR-2 , bs*N_BYTE*8 ) = 0;
+            if (bs == N_PACK - 1) {
+                buff[i / N_PACK] = tmp;
+                //str_last_buff.write(isLast);
+            }
+        }
+        //memcpy((void*)ptmp, (void*)buff, N_BURST*N_PACK);
+        for (int j = 0; j < N_BURST; j++)
+#pragma HLS PIPELINE II = 1
+            ptmp[j] = buff[j];
+        ptmp += N_BURST;
+    } while (isLast == 0);
+
+    pdes[axi_arith.pos++] = axi_arith.pre_byte;
+	for(; axi_arith.run > 0; axi_arith.run--)
+		pdes[axi_arith.pos++] = 0xff;
+
+    return num_w;
+}
+
+/////template PackStr2Mem_t////////////////////////
+template < int W_STR,int B_LAST, int N_BURST>
+int PackStr2Mem_t(
+    uint8_t* pdes,
+    hls::stream<ap_uint<W_STR> > &str_s)
+{
+    const int N_BYTE = ((W_STR-1+7)/8);
+    const int N_PACK = (4/N_BYTE);
+    uint8_t* ptmp = pdes;//uint32_t
+    int num_w = 0;
+    ap_uint<1> isLast = 0;
+    uint8_t buff[512];//uint32_t
+#ifndef __SYNTHESIS__
+    assert(N_BURST<=512);
+#endif
+    ap_uint<N_BYTE * 8 * N_PACK> tmp;
+    //hls::stream<ap_uint<1>> str_last_buff;
+    do {
+        for (int i = 0; i < N_BURST * N_PACK; i++) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<2> bs = i % N_PACK;
+            if (isLast == 0) {
+                ap_uint<W_STR> w = str_s.read();
+                isLast = w(W_STR-1, W_STR-1);
+                tmp( bs*N_BYTE*8+W_STR-2 , bs*N_BYTE*8) = w(W_STR-2, 0);
+                num_w++;
+            } else
+                tmp( bs*N_BYTE*8+W_STR-2 , bs*N_BYTE*8 ) = 0;
+            if (bs == N_PACK - 1) {
+                buff[i / N_PACK] = tmp;
+                //str_last_buff.write(isLast);
+            }
+        }
+        //memcpy((void*)ptmp, (void*)buff, N_BURST*N_PACK);
+        for (int j = 0; j < N_BURST; j++)
+#pragma HLS PIPELINE II = 1
+            ptmp[j] = buff[j];
+        ptmp += N_BURST;
+    } while (isLast == 0);
+
+    return num_w;
+}
+#endif
+
+void kernel_LeptonE_strmIn(
+    // input
+    hls::stream<ap_int<11> > coef[8],
+
+    uint16_t axi_width[MAX_NUM_COLOR], // colldata->block_width(i);
+    uint8_t axi_map_row2cmp[4],        //     AXI                   2,1,0,0 2,1,0
+    uint8_t min_nois_thld_x[MAX_NUM_COLOR][64],
+    uint8_t min_nois_thld_y[MAX_NUM_COLOR][64],
+    uint8_t q_tables[MAX_NUM_COLOR][8][8], //[64],
+    int32_t idct_q_table_x[MAX_NUM_COLOR][8][8],
+    int32_t idct_q_table_y[MAX_NUM_COLOR][8][8],
+
+    uint16_t axi_mcuv,
+    uint8_t axi_num_cmp_mcu,
+
+    // output
+    struct_arith& axi_arith,
+    hls::stream<bool>& strm_pos_o_e,
+    hls::stream<unsigned char>& strm_pos_o_byte
+
+    );
+
+namespace xf {
+namespace codec {
+namespace details {
+
+void kernel_LeptonE_strmIn_engine(
+    // input
+    hls::stream<ap_int<11> > coef[8],
+
+    uint16_t axi_width[MAX_NUM_COLOR], // colldata->block_width(i);
+    uint8_t axi_map_row2cmp[4],        //     AXI                   2,1,0,0 2,1,0
+    uint8_t min_nois_thld_x[MAX_NUM_COLOR][64],
+    uint8_t min_nois_thld_y[MAX_NUM_COLOR][64],
+    uint8_t q_tables[MAX_NUM_COLOR][8][8], //[64],
+    int32_t idct_q_table_x[MAX_NUM_COLOR][8][8],
+    int32_t idct_q_table_y[MAX_NUM_COLOR][8][8],
+
+    uint16_t axi_mcuv,
+    uint8_t axi_num_cmp_mcu,
+
+    // output
+    struct_arith& axi_arith,
+    hls::stream<bool>& strm_pos_o_e,
+    hls::stream<ap_uint<8> >& strm_pos_o_byte);
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/include/hw/leptonEnc/lepton/XAcc_dc.hpp b/codec/L2/include/hw/leptonEnc/lepton/XAcc_dc.hpp
new file mode 100644
index 0000000000..466b992026
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/lepton/XAcc_dc.hpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file XAcc_dc.hpp
+ * @brief lepton dc function API.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef __cplusplus
+#error " XAcc_dc.hpp hls::stream<> interface, and thus requires C++"
+#endif
+
+#ifndef _XACC_DC_HPP_
+#define _XACC_DC_HPP_
+#include <ap_int.h>
+#include <hls_stream.h>
+#include "XAcc_common.hpp"
+
+namespace xf {
+namespace codec {
+namespace details {
+
+void hls_serialize_tokens_dc(bool above_present,
+                             ap_uint<2> id_cmp,
+                             uint16_t block_width,
+                             uint8_t q_tables0[MAX_NUM_COLOR][8][8],
+                             uint8_t q0,
+
+                             hls::stream<coef_t> str_rast8[8],
+                             hls::stream<coef_t>& str_dc_in,
+
+                             hls::stream<ap_uint<4> >& strm_sel_tab,
+                             hls::stream<bool>& strm_cur_bit,
+                             hls::stream<short>& strm_len,
+                             //		hls::stream<bool>		 & strm_e,
+                             hls::stream<ap_uint<16> >& strm_addr1,
+                             hls::stream<ap_uint<16> >& strm_addr2,
+                             hls::stream<ap_uint<16> >& strm_addr3);
+
+void pre_serialize_tokens_dc(bool above_present,
+                             ap_uint<2> id_cmp,
+                             uint16_t block_width,
+                             uint8_t q_tables0[MAX_NUM_COLOR][8][8],
+                             uint8_t q0,
+
+                             hls::stream<coef_t> strm_in[8],
+                             hls::stream<coef_t>& str_dc_in,
+
+                             hls::stream<int16_t>& strm_coef,
+                             hls::stream<int>& strm_uncertainty,
+                             hls::stream<int>& strm_uncertainty2);
+
+void dc_push_bit(uint16_t block_width,
+
+                 hls::stream<int16_t>& strm_coef,
+                 hls::stream<int>& strm_uncertainty,
+                 hls::stream<int>& strm_uncertainty2,
+
+                 hls::stream<ap_uint<4> >& strm_sel_tab,
+                 hls::stream<bool>& strm_cur_bit,
+                 hls::stream<bool>& strm_e,
+                 hls::stream<ap_uint<16> >& strm_addr1,
+                 hls::stream<ap_uint<16> >& strm_addr2,
+                 hls::stream<ap_uint<16> >& strm_addr3
+
+                 );
+
+void dc_push_bit_v2(uint16_t block_width,
+
+                    hls::stream<int16_t>& strm_coef,
+                    hls::stream<int>& strm_uncertainty,
+                    hls::stream<int>& strm_uncertainty2,
+
+                    hls::stream<ap_uint<4> >& strm_sel_tab,
+                    hls::stream<bool>& strm_cur_bit,
+                    hls::stream<short>& strm_len,
+                    //		hls::stream<bool>		 & strm_e,
+                    hls::stream<ap_uint<16> >& strm_addr1,
+                    hls::stream<ap_uint<16> >& strm_addr2,
+                    hls::stream<ap_uint<16> >& strm_addr3
+
+                    );
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/include/hw/leptonEnc/lepton/XAcc_edges.hpp b/codec/L2/include/hw/leptonEnc/lepton/XAcc_edges.hpp
new file mode 100644
index 0000000000..7a3677510b
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/lepton/XAcc_edges.hpp
@@ -0,0 +1,322 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file XAcc_edge.hpp
+ * @brief lepton edge function API.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef __cplusplus
+#error " XAcc_edge.hpp hls::stream<> interface, and thus requires C++"
+#endif
+
+#ifndef _XACC_EDGES_HPP_
+#define _XACC_EDGES_HPP_
+#include <ap_int.h>
+#include <hls_stream.h>
+#include <hls_math.h>
+#include "XAcc_common.hpp"
+
+namespace xf {
+namespace codec {
+namespace details {
+
+template <bool is_horizontal>
+void prepare_edge(ap_uint<32> block_width,
+                  bool ap_color,
+                  uint8_t min_nois_thld[MAX_NUM_COLOR][64],
+                  bool above_present,
+                  hls::stream<ap_uint<3> >& strm_lane,
+                  int32_t idct_q_table_x[MAX_NUM_COLOR][8][8],
+
+                  hls::stream<ap_int<11> > strm_coef_here[8],
+                  hls::stream<ap_int<11> > strm_coef_above[8],
+                  hls::stream<bool>& strm_has_left,
+                  hls::stream<bool>& strm_coef_end,
+
+                  hls::stream<ap_uint<4> >& strm_length_exp_out,
+
+                  hls::stream<ap_uint<4> >& strm_best_prior_exp,
+
+                  hls::stream<bool>& strm_cur_bit_sign,
+                  hls::stream<ap_uint<2> >& strm_tri_sign,
+
+                  hls::stream<ap_uint<11> >& strm_abs_coef_nois,
+                  hls::stream<ap_uint<8> >& strm_ctx_nois,
+                  hls::stream<ap_uint<8> >& strm_min_nois,
+                  hls::stream<ap_uint<6> >& strm_coord_nois
+
+                  ) {
+    bool e;
+
+    e = strm_coef_end.read();
+
+    while (!e) {
+#pragma HLS pipeline II = 1
+        // by zyl ii=1
+        ap_uint<3> aligned_block_offset;
+        if (is_horizontal)
+            aligned_block_offset = 50;
+        else
+            aligned_block_offset = 57;
+
+        int lane = strm_lane.read();
+
+        ap_int<11> lak_coef_here[8];
+        ap_int<11> lak_coef_above[8];
+        lak_coef_here[0] = 0;
+
+        ap_int<11> coef = strm_coef_here[0].read(); // by zyl
+        lak_coef_above[0] = strm_coef_above[0].read();
+
+        for (int i = 1; i < 8; i++) {
+#pragma HLS unroll
+            lak_coef_here[i] = strm_coef_here[i].read();
+            lak_coef_above[i] = strm_coef_above[i].read();
+        }
+
+        ap_int<32> best_prior;
+        ap_uint<3> num_nonzeros_bin;
+        ap_int<32> bsr_best_prior;
+
+        int sum[7];
+
+        if ((!strm_has_left.read() && !is_horizontal) || (is_horizontal && above_present)) {
+            best_prior = lak_coef_above[0] * idct_q_table_x[ap_color][lane + 1][0];
+            for (int i = 1; i < 8; ++i) {
+#pragma HLS unroll
+                int sign = (i & 1) ? 1 : -1;
+                sum[i - 1] = idct_q_table_x[ap_color][lane + 1][i] * (lak_coef_here[i] + sign * lak_coef_above[i]);
+            }
+            best_prior = (best_prior - sum[0] - sum[1] - sum[2] - sum[3] - sum[4] - sum[5] - sum[6]) /
+                         idct_q_table_x[ap_color][lane + 1][0];
+        } else {
+            best_prior = 0;
+        }
+
+        bsr_best_prior = hls::min(32 - hls::abs(best_prior).countLeadingZeros(), 11);
+
+        // ap_int<11> coef = strm_coef_here[0].read();
+
+        ap_uint<11> abs_coef = hls::abs(coef);
+        ap_uint<4> length;
+        length = 11 - abs_coef.countLeadingZeros();
+
+        strm_length_exp_out.write(length);
+        strm_best_prior_exp.write(bsr_best_prior);
+
+        uint8_t min_threshold;
+
+        if (is_horizontal)
+            min_threshold = min_nois_thld[ap_color][lane + 1];
+        else
+            min_threshold = min_nois_thld[ap_color][(lane + 1) << 3];
+
+        if (length != 0) {
+            strm_cur_bit_sign.write(coef >= 0);
+            strm_tri_sign.write(best_prior == 0 ? 0 : (best_prior > 0 ? 1 : 2));
+        }
+
+        strm_abs_coef_nois.write(abs_coef);
+        uint16_t ctx_abs = hls::abs(best_prior);
+        if (is_horizontal)
+            strm_coord_nois.write(lane + 1);
+        else
+            strm_coord_nois.write((lane + 1) << 3);
+        strm_ctx_nois.write(hls::min(ctx_abs >> min_threshold, 255));
+        strm_min_nois.write(min_threshold);
+        e = strm_coef_end.read();
+    }
+}
+
+void push_bit_edge_0(ap_uint<32> block_width,
+                     hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+
+                     hls::stream<ap_uint<3> >& strm_h_nz_len,
+                     hls::stream<ap_uint<3> >& strm_eob_x,
+                     hls::stream<ap_uint<4> >& strm_length_h,
+
+                     hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+                     hls::stream<bool>& strm_cur_bit_sign_h,
+                     hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+                     hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+                     hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+                     hls::stream<ap_uint<8> >& strm_min_nois_h,
+                     hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+                     hls::stream<ap_uint<3> >& strm_v_nz_len,
+                     hls::stream<ap_uint<3> >& strm_eob_y,
+                     hls::stream<ap_uint<4> >& strm_length_v,
+
+                     hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+                     hls::stream<bool>& strm_cur_bit_sign_v,
+                     hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+                     hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+                     hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+                     hls::stream<ap_uint<8> >& strm_min_nois_v,
+                     hls::stream<ap_uint<6> >& strm_coord_nois_v,
+
+                     hls::stream<short>& strm_edge_len,
+
+                     hls::stream<ap_uint<4> >& strm_sel_tab,
+                     hls::stream<bool>& strm_cur_bit,
+                     // hls::stream<bool>		 & strm_e,
+                     hls::stream<ap_uint<16> >& strm_addr1,
+                     hls::stream<ap_uint<16> >& strm_addr2,
+                     hls::stream<ap_uint<16> >& strm_addr3,
+                     hls::stream<ap_uint<16> >& strm_addr4);
+
+void hls_serialize_tokens_edges(ap_uint<32> block_width,
+                                bool ap_color,
+                                uint8_t min_nois_thld_x[MAX_NUM_COLOR][64],
+                                uint8_t min_nois_thld_y[MAX_NUM_COLOR][64],
+                                bool left_present,
+                                bool above_present,
+                                bool above_right_present,
+                                hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7_h,
+                                hls::stream<ap_uint<3> >& strm_h_nz_len,
+                                hls::stream<ap_uint<3> >& strm_lane_h,
+
+                                hls::stream<ap_uint<3> >& strm_v_nz_len,
+                                hls::stream<ap_uint<3> >& strm_lane_v,
+
+                                hls::stream<ap_uint<3> >& strm_eob_x,
+                                hls::stream<ap_uint<3> >& strm_eob_y,
+
+                                int32_t idct_q_table_x[MAX_NUM_COLOR][8][8],
+                                int32_t idct_q_table_y[MAX_NUM_COLOR][8][8],
+
+                                hls::stream<ap_int<11> > strm_coef_h_here[8],
+                                hls::stream<ap_int<11> > strm_coef_h_above[8],
+                                hls::stream<bool>& strm_has_left_h,
+                                hls::stream<bool>& strm_coef_h_end,
+                                hls::stream<ap_int<11> > strm_coef_v_here[8],
+                                hls::stream<ap_int<11> > strm_coef_v_left[8],
+                                hls::stream<bool>& strm_has_left_v,
+                                hls::stream<bool>& strm_coef_v_end,
+
+                                hls::stream<ap_uint<4> >& strm_sel_tab,
+                                hls::stream<bool>& strm_cur_bit,
+                                hls::stream<bool>& strm_e,
+                                hls::stream<ap_uint<16> >& strm_addr1,
+                                hls::stream<ap_uint<16> >& strm_addr2,
+                                hls::stream<ap_uint<16> >& strm_addr3,
+                                hls::stream<ap_uint<16> >& strm_addr4);
+
+void pre_serialize_tokens_edges(ap_uint<32> block_width,
+                                bool ap_color,
+                                uint8_t min_nois_thld_x[MAX_NUM_COLOR][64],
+                                uint8_t min_nois_thld_y[MAX_NUM_COLOR][64],
+                                bool left_present,
+                                bool above_present,
+                                bool above_right_present,
+                                // hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7_h,
+                                // hls::stream<ap_uint<3> >& strm_h_nz_len,
+                                hls::stream<ap_uint<3> >& strm_lane_h,
+
+                                // hls::stream<ap_uint<3> >& strm_v_nz_len,
+                                hls::stream<ap_uint<3> >& strm_lane_v,
+
+                                // hls::stream<ap_uint<3> >& strm_eob_x,
+                                // hls::stream<ap_uint<3> >& strm_eob_y,
+
+                                int32_t idct_q_table_x[MAX_NUM_COLOR][8][8],
+                                int32_t idct_q_table_y[MAX_NUM_COLOR][8][8],
+
+                                hls::stream<ap_int<11> > strm_coef_h_here[8],
+                                hls::stream<ap_int<11> > strm_coef_h_above[8],
+                                hls::stream<bool>& strm_has_left_h,
+                                hls::stream<bool>& strm_coef_h_end,
+                                hls::stream<ap_int<11> > strm_coef_v_here[8],
+                                hls::stream<ap_int<11> > strm_coef_v_left[8],
+                                hls::stream<bool>& strm_has_left_v,
+                                hls::stream<bool>& strm_coef_v_end,
+
+                                hls::stream<ap_uint<4> >& strm_length_exp_h,
+
+                                hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+                                hls::stream<bool>& strm_cur_bit_sign_h,
+                                hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+                                hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+                                hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+                                hls::stream<ap_uint<8> >& strm_min_nois_h,
+                                hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+                                hls::stream<ap_uint<4> >& strm_length_exp_v,
+
+                                hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+                                hls::stream<bool>& strm_cur_bit_sign_v,
+                                hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+                                hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+                                hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+                                hls::stream<ap_uint<8> >& strm_min_nois_v,
+                                hls::stream<ap_uint<6> >& strm_coord_nois_v
+
+                                );
+
+void push_bit_edge(ap_uint<32> block_width,
+                   hls::stream<ap_uint<6> >& strm_num_nonzeros_7x7,
+
+                   hls::stream<ap_uint<3> >& strm_h_nz_len,
+                   hls::stream<ap_uint<3> >& strm_eob_x,
+                   hls::stream<ap_uint<4> >& strm_length_h,
+
+                   hls::stream<ap_uint<4> >& strm_best_prior_exp_h,
+
+                   hls::stream<bool>& strm_cur_bit_sign_h,
+                   hls::stream<ap_uint<2> >& strm_tri_sign_h,
+
+                   hls::stream<ap_uint<11> >& strm_abs_coef_nois_h,
+                   hls::stream<ap_uint<8> >& strm_ctx_nois_h,
+                   hls::stream<ap_uint<8> >& strm_min_nois_h,
+                   hls::stream<ap_uint<6> >& strm_coord_nois_h,
+
+                   hls::stream<ap_uint<3> >& strm_v_nz_len,
+                   hls::stream<ap_uint<3> >& strm_eob_y,
+                   hls::stream<ap_uint<4> >& strm_length_v,
+
+                   hls::stream<ap_uint<4> >& strm_best_prior_exp_v,
+
+                   hls::stream<bool>& strm_cur_bit_sign_v,
+                   hls::stream<ap_uint<2> >& strm_tri_sign_v,
+
+                   hls::stream<ap_uint<11> >& strm_abs_coef_nois_v,
+                   hls::stream<ap_uint<8> >& strm_ctx_nois_v,
+                   hls::stream<ap_uint<8> >& strm_min_nois_v,
+                   hls::stream<ap_uint<6> >& strm_coord_nois_v,
+
+                   hls::stream<ap_uint<4> >& strm_sel_tab,
+                   hls::stream<bool>& strm_cur_bit,
+                   hls::stream<bool>& strm_e,
+                   hls::stream<ap_uint<16> >& strm_addr1,
+                   hls::stream<ap_uint<16> >& strm_addr2,
+                   hls::stream<ap_uint<16> >& strm_addr3,
+                   hls::stream<ap_uint<16> >& strm_addr4);
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+
+#endif
diff --git a/codec/L2/include/hw/leptonEnc/lepton/XAcc_model.hpp b/codec/L2/include/hw/leptonEnc/lepton/XAcc_model.hpp
new file mode 100644
index 0000000000..ecd04a2f92
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/lepton/XAcc_model.hpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file XAcc_model.hpp
+ * @brief lepton model include init ram, collect 77/dc/edge and probability look up function API.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef __cplusplus
+#error " XAcc_model.hpp hls::stream<> interface, and thus requires C++"
+#endif
+
+#ifndef _XACC_MODEL_HPP_
+#define _XACC_MODEL_HPP_
+#include <stdint.h>
+#include <stdio.h>
+#include "XAcc_common.hpp"
+
+class stt_range {
+#define STTRANGE (5)
+   public:
+    uint8_t cnt_min[STTRANGE];
+    uint8_t cnt_max[STTRANGE];
+    stt_range() {
+        for (int i = 0; i < STTRANGE; i++) {
+            cnt_min[i] = 255;
+            cnt_max[i] = 0;
+        }
+    }
+    void print_range(int i) { printf("s%d: [ %d, %d ] \n", i + 1, cnt_min[i], cnt_max[i]); }
+    void print_range(char* name) {
+        printf("%s\n", name);
+        for (int i = 0; i < STTRANGE; i++) printf("s%d: [ %d, %d ] \n", i + 1, cnt_min[i], cnt_max[i]);
+    }
+    void update(int i, uint8_t s) {
+        if (s > cnt_max[i]) cnt_max[i] = s;
+        if (s < cnt_min[i]) cnt_min[i] = s;
+    }
+    void update5(uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4, uint8_t s5) {
+        update(0, s1);
+        update(1, s2);
+        update(2, s3);
+        update(3, s4);
+        update(4, s5);
+    }
+};
+struct hls_Model {
+    hls_Branch num_nonzeros_counts_7x7_[2][26][6][32]; // 9984
+
+    hls_Branch num_nonzeros_counts_1x8_[2][8][8][3][4]; // 1536
+    hls_Branch num_nonzeros_counts_8x1_[2][8][8][3][4]; // 1536
+
+    hls_Branch residual_noise_counts_[2][64][10][10];      // 2	64	10	10 =12800
+    hls_Branch residual_noise_counts_dc_[12][10];          // 2244 12=12
+    hls_Branch residual_threshold_counts_[2][256][8][128]; // 2*256*8*128 = 524288
+
+    hls_Branch exponent_counts_[2][10][49][12][11];   // 2*10*49*12*11 = 129360
+    hls_Branch exponent_counts_x_[2][10][15][12][11]; // 2*10*15*12*11 = 39600
+    hls_Branch exponent_counts_dc_[12][17][11];       // 12*17*11 = 2244
+
+    hls_Branch sign_counts_[2][4][12];   // 2*4*12 = 96
+    hls_Branch sign_counts_77[2][4][12]; // 2*4*12 = 96
+
+    hls_Branch* num_nonzeros_counts_7x7_at(uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4) {
+#pragma HLS INLINE
+        return &num_nonzeros_counts_7x7_[s1][s2][s3][s4];
+    }
+
+    hls_Branch* num_nonzeros_counts_1x8_at(uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4, uint8_t s5) {
+#pragma HLS INLINE
+        return &num_nonzeros_counts_1x8_[s1][s2][s3][s4][s5];
+    }
+    hls_Branch* num_nonzeros_counts_8x1_at(uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4, uint8_t s5) {
+#pragma HLS INLINE
+        return &num_nonzeros_counts_8x1_[s1][s2][s3][s4][s5];
+    } //[2][8][8][3][4];
+
+    hls_Branch* residual_noise_counts_at(uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4) {
+#pragma HLS INLINE
+        return &residual_noise_counts_[s1][s2][s3][s4];
+    } //[2][64][10][10];
+
+    hls_Branch* residual_noise_counts_dc_at(uint8_t s1, uint8_t s2) { // uint8_t s3, uint8_t s4){
+#pragma HLS INLINE
+        return &residual_noise_counts_dc_[s1][s2]; //[s3][s4];
+    }                                              //[12][10];
+
+    uint8_t cnt_s1_min;
+    uint8_t cnt_s2_min;
+    uint8_t cnt_s3_min;
+    uint8_t cnt_s4_min;
+    uint8_t cnt_s1_max;
+    uint8_t cnt_s2_max;
+    uint8_t cnt_s3_max;
+    uint8_t cnt_s4_max;
+    void init_1() {
+        cnt_s1_min = 255;
+        cnt_s2_min = 255;
+        cnt_s3_min = 255;
+        cnt_s4_min = 255;
+        cnt_s1_max = 000;
+        cnt_s2_max = 000;
+        cnt_s3_max = 000;
+        cnt_s4_max = 000;
+    }
+    void print_range() {
+        printf("s1: [ %d, %d ] \n", cnt_s1_min, cnt_s1_max);
+        printf("s2: [ %d, %d ] \n", cnt_s2_min, cnt_s2_max);
+        printf("s3: [ %d, %d ] \n", cnt_s3_min, cnt_s3_max);
+        printf("s4: [ %d, %d ] \n", cnt_s4_min, cnt_s4_max);
+    }
+    hls_Branch* residual_threshold_counts_at(uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4) {
+#pragma HLS INLINE
+
+        if (s1 > cnt_s1_max) cnt_s1_max = s1;
+        if (s1 < cnt_s1_min) cnt_s1_min = s1;
+        if (s2 > cnt_s2_max) cnt_s2_max = s2;
+        if (s2 < cnt_s2_min) cnt_s2_min = s2;
+
+        if (s3 > cnt_s3_max) cnt_s3_max = s3;
+        if (s3 < cnt_s3_min) cnt_s3_min = s3;
+
+        if (s4 > cnt_s4_max) cnt_s4_max = s4;
+        if (s4 < cnt_s4_min) cnt_s4_min = s4;
+
+        return &residual_threshold_counts_[s1][s2][s3][s4];
+    } //[2][(1<<(1 + 7))][1 + 7][1<<7 ];
+
+    stt_range stt_counts;
+    hls_Branch* exponent_counts_at(uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4, uint8_t s5) {
+#pragma HLS INLINE
+        stt_counts.update5(s1, s2, s3, s4, s5);
+        return &exponent_counts_[s1][s2][s3][s4][s5];
+    } //[2][10][15][12][12];
+
+    hls_Branch* exponent_counts_x_at(uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4, uint8_t s5) {
+#pragma HLS INLINE
+        return &exponent_counts_x_[s1][s2][s3][s4][s5];
+    } //[2][10][49][12][12];
+
+    hls_Branch* exponent_counts_dc_at(uint8_t s1, uint8_t s2, uint8_t s3) {
+#pragma HLS INLINE
+        return &exponent_counts_dc_[s1][s2][s3];
+    } //[12][17][12];
+
+    hls_Branch* sign_counts_at(uint8_t s1, uint8_t s2, uint8_t s3) {
+#pragma HLS INLINE
+        return &sign_counts_[s1][s2][s3];
+    } //[2][4][12];
+
+    hls_Branch* sign_counts_77_at(uint8_t s1, uint8_t s2, uint8_t s3) {
+#pragma HLS INLINE
+        // return &sign_counts_77[s1][s2][s3];
+        return &sign_counts_[s1][s2][s3];
+    } //[2][4][12];
+
+    enum Printability { PRINTABLE_INSIGNIFICANT = 1, PRINTABLE_OK = 2, CLOSE_TO_50 = 4, CLOSE_TO_ONE_ANOTHER = 8 };
+};
+
+namespace xf {
+namespace codec {
+namespace details {
+
+// ------------------------------------------------------------
+void init_hlsmodel();
+
+// ------------------------------------------------------------
+void probability_look_up(ap_uint<1> ap_color,
+
+                         hls::stream<ap_uint<4> >& strm_sel_tab,
+                         hls::stream<bool>& strm_cur_bit,
+                         hls::stream<bool>& strm_e_in,
+                         hls::stream<ap_uint<16> >& strm_addr1,
+                         hls::stream<ap_uint<16> >& strm_addr2,
+                         hls::stream<ap_uint<16> >& strm_addr3,
+                         hls::stream<ap_uint<16> >& strm_addr4,
+
+                         hls::stream<bool>& strm_bit,
+                         hls::stream<uint8_t>& strm_prob,
+                         hls::stream<bool>& strm_e,
+                         hls::stream<uint8_t>& strm_tab_dbg);
+
+// ------------------------------------------------------------
+template <class T0, class T1, class T2>
+void collect(uint16_t num_blk,
+
+             hls::stream<ap_uint<4> >& strm_sel_tab_77,
+             hls::stream<bool>& strm_cur_bit_77,
+             hls::stream<T0>& strm_len0_77,
+             hls::stream<ap_uint<16> >& strm_addr1_77,
+             hls::stream<ap_uint<16> >& strm_addr2_77,
+             hls::stream<ap_uint<16> >& strm_addr3_77,
+             hls::stream<ap_uint<16> >& strm_addr4_77,
+
+             hls::stream<ap_uint<4> >& strm_sel_tab_edge,
+             hls::stream<bool>& strm_cur_bit_edge,
+             hls::stream<T1>& strm_len1_edge,
+             hls::stream<ap_uint<16> >& strm_addr1_edge,
+             hls::stream<ap_uint<16> >& strm_addr2_edge,
+             hls::stream<ap_uint<16> >& strm_addr3_edge,
+             hls::stream<ap_uint<16> >& strm_addr4_edge,
+
+             hls::stream<ap_uint<4> >& strm_sel_tab_dc,
+             hls::stream<bool>& strm_cur_bit_dc,
+             hls::stream<T2>& strm_len2_dc,
+             hls::stream<ap_uint<16> >& strm_addr1_dc,
+             hls::stream<ap_uint<16> >& strm_addr2_dc,
+             hls::stream<ap_uint<16> >& strm_addr3_dc,
+
+             hls::stream<ap_uint<4> >& strm_sel_tab,
+             hls::stream<bool>& strm_cur_bit,
+             hls::stream<bool>& strm_out_e,
+             hls::stream<ap_uint<16> >& strm_addr1,
+             hls::stream<ap_uint<16> >& strm_addr2,
+             hls::stream<ap_uint<16> >& strm_addr3,
+             hls::stream<ap_uint<16> >& strm_addr4) {
+    int next_blk = 0;
+    T0 len0;
+    T1 len1;
+    T2 len2;
+    len0 = strm_len0_77.read();
+    len1 = strm_len1_edge.read();
+    len2 = strm_len2_dc.read();
+
+    while (next_blk < num_blk) { //
+#pragma HLS pipeline II = 1
+        int data_w;
+        ap_uint<4> data_w_sel_t;
+        bool data_w_value;
+        ap_uint<16> data_w_addr1;
+        ap_uint<16> data_w_addr2;
+        ap_uint<16> data_w_addr3;
+        ap_uint<16> data_w_addr4;
+        if (len0 != 0) {
+            data_w_sel_t = strm_sel_tab_77.read();
+            data_w_value = strm_cur_bit_77.read();
+            data_w_addr1 = strm_addr1_77.read();
+            data_w_addr2 = strm_addr2_77.read();
+            data_w_addr3 = strm_addr3_77.read();
+            data_w_addr4 = strm_addr4_77.read();
+            len0--;
+        } else if (len1 != 0) {
+            data_w_sel_t = strm_sel_tab_edge.read();
+            data_w_value = strm_cur_bit_edge.read();
+            data_w_addr1 = strm_addr1_edge.read();
+            data_w_addr2 = strm_addr2_edge.read();
+            data_w_addr3 = strm_addr3_edge.read();
+            data_w_addr4 = strm_addr4_edge.read();
+            len1--;
+        } else if (len2 != 0) {
+            data_w_sel_t = strm_sel_tab_dc.read();
+            data_w_value = strm_cur_bit_dc.read();
+            data_w_addr1 = strm_addr1_dc.read();
+            data_w_addr2 = strm_addr2_dc.read();
+            data_w_addr3 = strm_addr3_dc.read();
+            data_w_addr4 = 0; // strm_addr4_dc.read();
+            len2--;
+            if (len0 == 0 && len1 == 0 && len2 == 0) {
+                if (next_blk < num_blk - 1) {
+                    len0 = strm_len0_77.read();
+                    len1 = strm_len1_edge.read();
+                    len2 = strm_len2_dc.read();
+                }
+                next_blk++;
+            }
+        }
+        strm_sel_tab.write(data_w_sel_t); //,
+        strm_cur_bit.write(data_w_value); //,
+        strm_addr1.write(data_w_addr1);   //,
+        strm_addr2.write(data_w_addr2);   //,
+        strm_addr3.write(data_w_addr3);   //,
+        strm_addr4.write(data_w_addr4);   //
+        strm_out_e.write(false);
+        char tmp[1024];
+    } // while(cnt_blk < num_blk)
+    strm_out_e.write(true);
+};
+
+} // namespace details
+} // namespace codec
+} // namespace xf
+
+#endif
diff --git a/codec/L2/include/hw/leptonEnc/lepton/XModified.hpp b/codec/L2/include/hw/leptonEnc/lepton/XModified.hpp
new file mode 100644
index 0000000000..7a0b97502f
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/lepton/XModified.hpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file XModified.hpp
+ * @brief lepton XModified function API.
+ *
+ * This file is part of HLS algorithm library.
+ */
+
+#ifndef __cplusplus
+#error " XModified.hpp hls::stream<> interface, and thus requires C++"
+#endif
+
+#ifndef _XMODIFIED_HPP_
+#define _XMODIFIED_HPP_
+//#include "model.hh"
+#include "XAcc_common.hpp"
+#include "XAcc_model.hpp"
+//#include "bool_encoder.hh"
+//#include "boolwriter.hh"
+
+enum hls_Index : uint8_t {
+#ifdef OPTIMIZED_7x7
+    AC_7x7_INDEX = 0,
+    AC_7x7_END = 49,
+    DC_INDEX = 49,
+    ROW_X_INDEX = 50,
+    ROW_X_END = 57,
+    ROW_Y_INDEX = 57,
+    ROW_Y_END = 64
+#else
+    // AC_7x7_INDEX = 9,
+    // AC_7x7_END = 63,
+    DC_INDEX = 0,
+// ROW_X_INDEX = 1,
+// ROW_X_END = 7,
+// ROW_Y_INDEX = 57,
+// ROW_Y_END = 64
+#endif
+};
+
+struct COEF8 {
+    int16_t data[8];
+};
+
+struct struct_edge_data {
+    uint16_t v[8];
+    uint16_t h[8];
+};
+struct struct_ctx_edge {
+    uint16_t here_v[8];
+    uint16_t here_h[8];
+    uint16_t left_v[8];
+    uint16_t above_h[8];
+};
+extern uint8_t Shift_table[256][2];
+
+class stt_dis {
+#define STTDIS (8)
+   public:
+    hls_Branch* history[STTDIS];
+    int cnt_dis[STTDIS];
+    unsigned long total;
+    int pos;
+    stt_dis() {
+        total = 0;
+        pos = 0;
+        for (int i = 0; i < STTDIS; i++) {
+            history[i] = 0;
+            cnt_dis[i] = 0;
+        }
+    }
+    void print_dis() {
+        for (int i = 1; i < STTDIS; i++) {
+            printf("dis_%d = %d Percentage:%f\% \n", i, cnt_dis[i], (float)cnt_dis[i] / (float)total * 100.0);
+        }
+    }
+    int get_dis(hls_Branch* pb) {
+        int ret = 0;
+        history[pos++] = pb;
+        for (int i = 1; i < STTDIS; i++) {
+            int ph = pos - i - 1;
+            if (ph < 0) ph += STTDIS;
+            total++;
+            if (pos == STTDIS) pos = 0;
+            if (i >= total) return 0;
+            if (history[ph] == pb) {
+                cnt_dis[ret + 1]++;
+                return ret + 1;
+            }
+            ret++;
+        }
+        return 0;
+    }
+};
+
+class hls_AlignedBlock {
+   public:
+    int16_t coef[64];
+    enum Index : uint8_t {
+        AC_7x7_INDEX = 0,
+        AC_7x7_END = 49,
+        DC_INDEX = 49,
+        ROW_X_INDEX = 50,
+        ROW_X_END = 57,
+        ROW_Y_INDEX = 57,
+        ROW_Y_END = 64
+    };
+
+   public:
+    hls_AlignedBlock() {}
+    int16_t* raw_data() { return coef; }
+    const int16_t* raw_data() const { return coef; }
+    uint8_t recalculate_coded_length() const {
+        uint8_t num_nonzeros_7x7 = 0;
+        /* how many tokens are we going to encode? */
+        for (uint8_t index = 0; index < 64; index++) {
+            uint8_t xy = hls_jpeg_zigzag_to_raster[index];
+            uint8_t x = xy & 7;
+            uint8_t y = xy >> 3;
+            if (coef[hls_raster_to_aligned[xy]]) {
+                // coded_length_ = index + 1;
+                if (x > 0 && y > 0) {
+                    ++num_nonzeros_7x7;
+                }
+            }
+        }
+        return num_nonzeros_7x7;
+    }
+
+    int16_t& dc() { return coef[DC_INDEX]; }
+    int16_t dc() const { return coef[DC_INDEX]; }
+
+    int16_t& mutable_coefficients_raster(uint8_t index) { return coef[hls_raster_to_aligned[index]]; }
+    int16_t coefficients_raster(uint8_t index) const { return coef[hls_raster_to_aligned[index]]; }
+
+    int16_t& mutable_coefficients_zigzag(uint8_t index) { return coef[hls_zigzag_to_aligned[index]]; }
+    int16_t coefficients_zigzag(uint8_t index) const { return coef[hls_zigzag_to_aligned[index]]; }
+};
+
+namespace xf {
+namespace codec {
+namespace details {
+
+int hls_color_index(int c);
+
+uint8_t hls_get_num_nonzeros_context(
+    bool all_present, bool above_present, bool left_present, uint8_t num_nonzeros_above, uint8_t num_nonzeros_left);
+
+uint16_t hls_compute_aavrg(bool all_present,
+                           bool left_present,
+                           bool above_present,
+                           uint16_t abs_coef_left,
+                           uint16_t abs_coef_above,     //[64],
+                           uint16_t abs_coef_above_left //[64]
+
+                           );
+
+uint16_t abs16(int16_t din);
+/*int32_t hls_compute_lak(int COLOR,
+                        unsigned int band,
+                        bool all_present,
+                        bool left_present,
+                        bool above_present,
+                        int16_t coef_here[64],
+                        int16_t coef_left[64],
+                        int16_t coef_above[64]);*/
+
+// void vpx_enc(
+//	uint16_t block_width,
+//
+//    hls::stream<bool>&    strm_bit,
+//    hls::stream<uint8_t>& strm_prob,
+//    hls::stream<bool>&    strm_e,
+//	hls::stream<uint8_t>& strm_tab_dbg,
+//
+//    vpx_writer& boolwriter
+//
+//);
+} // namespace details
+} // namespace codec
+} // namespace xf
+
+#endif
diff --git a/codec/L2/include/hw/leptonEnc/lepton/jpeg_dec_lepton_enc.hpp b/codec/L2/include/hw/leptonEnc/lepton/jpeg_dec_lepton_enc.hpp
new file mode 100644
index 0000000000..6a89fda0c6
--- /dev/null
+++ b/codec/L2/include/hw/leptonEnc/lepton/jpeg_dec_lepton_enc.hpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2021 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef _JPEG_DEC_LEPTON_ENC_HPP_
+#define _JPEG_DEC_LEPTON_ENC_HPP_
+
+/**
+ * @file jpeg_dec_lepton_enc.hpp
+ * @brief interface of IMAGE Jpeg Decoder Lepton Encoder internal kernel.
+ */
+
+#include "XAcc_common.hpp"
+#include "XAcc_model.hpp"
+#include "XAcc_lepjpegdecoder.hpp"
+#include "XAcc_lepjfifparser.hpp"
+#include "XAcc_arith.hpp"
+#include "XAcc_77.hpp"
+#include "XAcc_edges.hpp"
+#include "XAcc_dc.hpp"
+#include "stream_to_axi.hpp"
+
+namespace xf {
+namespace codec {
+// ------------------------------------------------------------
+/**
+ * @brief IMGAE Jpeg Decoder Lepton Encoder internal kernel
+ * \rst
+ * For detailed document, see :ref:`JpegD_LeptonE_kernel_design`.
+ * \endrst
+ * @param datainDDR input image buffer.
+ * @param jpgSize size of input image buffer.
+ * @param arithInfo meta information of output buffer.
+ * @param res output lepton format data buffer.
+ */
+
+void jpegDecLeptonEnc(ap_uint<AXI_WIDTH>* datainDDR, int jpgSize, int* arithInfo, ap_uint<8>* res);
+
+} // namespace codec
+} // namespace xf
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/XAccPIKKernel1.hpp b/codec/L2/include/hw/pikEnc/XAccPIKKernel1.hpp
new file mode 100644
index 0000000000..15b9cf2e2d
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/XAccPIKKernel1.hpp
@@ -0,0 +1,2329 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file XAccPIKKernel1.hpp
+ */
+
+#ifndef _XF_CODEC_XACCPIKKERNEL1_HPP_
+#define _XF_CODEC_XACCPIKKERNEL1_HPP_
+
+#include "pik_common.hpp"
+#include "resize_mem.hpp"
+#include "xf_utils_hw/axi_to_multi_stream.hpp"
+
+#include <ap_int.h>
+#include <hls_math.h>
+#include <hls_stream.h>
+#include <iostream>
+#include <stdio.h>
+#include <string.h>
+#include <vector>
+
+static const int depth_to_buf = MAX_NUM_BLOCK88_W * MAX_NUM_BLOCK88_H * 3;
+
+static const int kRadius = 2;
+static const float kScaleR = 1.0f;
+static const float kScaleG = 1.0f;    // 2.0f - kScaleR;
+static const float kInvScaleR = 1.0f; // 1.0f / kScaleR;
+static const float kInvScaleG = 1.0f; // 1.0f / kScaleG;
+static const double kGaborishInverse = 0.92718927264540152;
+
+static const float kOpsinAbsorbanceMatrix[9] = {0.00117476669,  0.00248521916,  0.000304727902,
+                                                0.000868968258, 0.00268593687,  0.000377467572,
+                                                0.000786771008, 0.000275945873, 0.0021850043};
+
+static const float kOpsinAbsorbanceBias[3] = {0.00105043163, 0.000960550329, 0.000559058797};
+
+static const double kGaborish[5] = {-0.092359145662814029, -0.039253623634014627, 0.016176494530216929,
+                                    0.00083458437774987476, 0.004512465323949319};
+
+static float qmxlocal[64] = {
+    3436.970459, 1844.711548, 1476.212524, 1346.807495, 1294.897095, 1219.305786, 1336.331299, 2710.854736,
+    1844.711548, 1194.931519, 1049.940308, 975.210693,  927.143311,  873.855469,  985.372742,  1992.254883,
+    1476.212524, 1049.940308, 969.959534,  940.327026,  888.448120,  860.512329,  1072.243286, 2168.390381,
+    1346.807495, 975.210693,  940.327026,  879.867188,  833.526062,  832.720642,  1202.815796, 2435.889893,
+    1294.897095, 927.143311,  888.448120,  833.526062,  794.444641,  952.046143,  1430.633179, 2310.361572,
+    1219.305786, 873.855469,  860.512329,  832.720642,  952.046143,  1227.564819, 1841.159546, 1258.626343,
+    1336.331299, 985.372742,  1072.243286, 1202.815796, 1430.633179, 1841.159546, 1102.327393, 693.008972,
+    2710.854736, 1992.254883, 2168.390381, 2435.889893, 2310.361572, 1258.626343, 693.008972,  467.984436};
+
+static float qmblocal[64] = {
+    270.962311, 168.165771, 68.006966,  141.364029, 296.826141, 222.024902, 202.583298, 710.839050,
+    168.165771, 34.349800,  16.236092,  33.422955,  74.581551,  69.456650,  80.322029,  308.227905,
+    68.006966,  16.236092,  37.456863,  47.443451,  50.487465,  40.416729,  55.474945,  205.449615,
+    141.364029, 33.422955,  47.443451,  80.821915,  49.162575,  33.970627,  74.286789,  250.847351,
+    296.826141, 74.581551,  50.487465,  49.162575,  52.696980,  61.840820,  122.950966, 273.765747,
+    222.024902, 69.456650,  40.416729,  33.970627,  61.840820,  141.816147, 268.525055, 170.392395,
+    202.583298, 80.322029,  55.474945,  74.286789,  122.950966, 268.525055, 203.730957, 111.424103,
+    710.839050, 308.227905, 205.449615, 250.847351, 273.765747, 170.392395, 111.424103, 87.561890};
+
+static float qmxglb[64] = {
+    3436.970459, 1844.711548, 1476.212524, 1346.807495, 1294.897095, 1219.305786, 1336.331299, 2710.854736,
+    1844.711548, 1194.931519, 1049.940308, 975.210693,  927.143311,  873.855469,  985.372742,  1992.254883,
+    1476.212524, 1049.940308, 969.959534,  940.327026,  888.448120,  860.512329,  1072.243286, 2168.390381,
+    1346.807495, 975.210693,  940.327026,  879.867188,  833.526062,  832.720642,  1202.815796, 2435.889893,
+    1294.897095, 927.143311,  888.448120,  833.526062,  794.444641,  952.046143,  1430.633179, 2310.361572,
+    1219.305786, 873.855469,  860.512329,  832.720642,  952.046143,  1227.564819, 1841.159546, 1258.626343,
+    1336.331299, 985.372742,  1072.243286, 1202.815796, 1430.633179, 1841.159546, 1102.327393, 693.008972,
+    2710.854736, 1992.254883, 2168.390381, 2435.889893, 2310.361572, 1258.626343, 693.008972,  467.984436};
+
+static float qmbglb[64] = {
+    270.962311, 168.165771, 68.006966,  141.364029, 296.826141, 222.024902, 202.583298, 710.839050,
+    168.165771, 34.349800,  16.236092,  33.422955,  74.581551,  69.456650,  80.322029,  308.227905,
+    68.006966,  16.236092,  37.456863,  47.443451,  50.487465,  40.416729,  55.474945,  205.449615,
+    141.364029, 33.422955,  47.443451,  80.821915,  49.162575,  33.970627,  74.286789,  250.847351,
+    296.826141, 74.581551,  50.487465,  49.162575,  52.696980,  61.840820,  122.950966, 273.765747,
+    222.024902, 69.456650,  40.416729,  33.970627,  61.840820,  141.816147, 268.525055, 170.392395,
+    202.583298, 80.322029,  55.474945,  74.286789,  122.950966, 268.525055, 203.730957, 111.424103,
+    710.839050, 308.227905, 205.449615, 250.847351, 273.765747, 170.392395, 111.424103, 87.561890};
+
+inline float hls_SimpleGammaRGB(float v) {
+#pragma HLS inline
+    int ix;
+    ix = fToBits<float, int>(v);
+    ix = 0x2a50f200 + ix / 3;
+
+    float x0;
+    x0 = bitsToF<int, float>(ix);
+
+    float kOneThird = 0.333333343; // 1.0f / 3.0f;
+    float x1 = kOneThird * (2.0f * x0 + v / (x0 * x0));
+
+    float x2 = kOneThird * (2.0f * x1 + v / (x1 * x1));
+    return x2;
+}
+
+inline void hls_LinerToXyb(const float r, const float g, const float b, float& valx, float& valy, float& valz) {
+    float mixed[3];
+    const float* mix = &kOpsinAbsorbanceMatrix[0];
+    const float* bias = &kOpsinAbsorbanceBias[0];
+    ap_uint<3> c;
+
+RGB_TO_MIXED:
+    for (c = 0; c < 3; c++) {
+#pragma HLS pipeline II = 1
+        ap_uint<3> c_tmp = c * 3;
+        mixed[c] = mix[c_tmp] * r + mix[c_tmp + 1] * g + mix[c_tmp + 2] * b + bias[c];
+        mixed[c] = 0.0f > mixed[c] ? 0.0f : mixed[c];
+        mixed[c] = hls_SimpleGammaRGB(mixed[c]);
+    }
+    float mix0 = kScaleR * mixed[0];
+    float mix1 = kScaleG * mixed[1];
+    valx = (mix0 - mix1) * 0.5f;
+    valy = (mix0 + mix1) * 0.5f;
+    valz = mixed[2];
+}
+
+inline void hls_OpsinDynamicsImage(
+    hls::stream<float> row_in[3], int xsize, int ysize, hls::stream<float> row_out[3], hls::stream<float>& row_y) {
+    float row_in0, row_in1, row_in2;
+    float row_xyb0, row_xyb1, row_xyb2;
+
+LINEER_TO_XYB:
+    for (int y = 0; y < ysize; y++) {
+#pragma HLS LOOP_TRIPCOUNT min = 512 max = 512
+        for (int x = 0; x < xsize; x++) {
+#pragma HLS LOOP_TRIPCOUNT min = 512 max = 512
+#pragma HLS pipeline II = 3
+            row_in0 = row_in[0].read();
+            row_in1 = row_in[1].read();
+            row_in2 = row_in[2].read();
+
+            hls_LinerToXyb(row_in0, row_in1, row_in2, row_xyb0, row_xyb1, row_xyb2);
+
+            row_out[0].write(row_xyb0);
+            row_out[1].write(row_xyb1);
+            row_out[2].write(row_xyb2);
+
+            row_y.write(row_xyb1);
+        }
+    }
+}
+
+inline float FPTwoMul(float in1, float in2) {
+#pragma HLS inline
+    float r = 0.0;
+    r = in1 * in2;
+    return r;
+}
+
+inline float FPTwoAdd(float in1, float in2) {
+#pragma HLS inline
+    float r = 0.0;
+    r = in1 + in2;
+    return r;
+}
+
+inline int DivCeil(int a, int b) {
+#pragma HLS inline
+    return (a + b - 1) / b;
+}
+
+inline void hls_GaborishInverse(hls::stream<float> io_strm[3], int xsize, int ysize, hls::stream<float> opsin_strm[3]) {
+    float normalized[9] = {
+        1.6812343597412109375,           -0.14397151768207550048828125,      0.02521628327667713165283203125,
+        -0.14397151768207550048828125,   -0.0611894316971302032470703125,    0.00130096892826259136199951171875,
+        0.02521628327667713165283203125, 0.00130096892826259136199951171875, 0.00703413225710391998291015625};
+
+#ifndef __SYNTHESIS__
+    std::vector<std::vector<std::vector<float> > > linebuf(
+        3, std::vector<std::vector<float> >(5, std::vector<float>(4096)));
+#else
+    float linebuf[3][5][4096];
+#pragma HLS RESOURCE variable = linebuf core = RAM_S2P_URAM
+#pragma HLS array_partition variable = linebuf dim = 1 complete
+#pragma HLS array_partition variable = linebuf dim = 2 complete
+#pragma HLS dependence variable = linebuf inter false
+#pragma HLS dependence variable = linebuf intra false
+#endif
+    Window<5, 5, float> window[3];
+#pragma HLS array_partition variable = window dim = 1 complete
+
+    float temp_in[3][5];
+    float temp_out[3][5];
+
+    const int x_blocks = (xsize + 7) / 8;
+    const int y_blocks = (ysize + 7) / 8;
+    int ali_xsize = 8 * x_blocks;
+    int ali_ysize = 8 * y_blocks;
+    bool is_align = xsize == ali_xsize ? true : false;
+
+Y:
+    for (int iy = 0; iy < ali_ysize + 2; iy++) {
+#pragma HLS LOOP_TRIPCOUNT min = 514 max = 514
+    X:
+        for (int ix = 0; ix < ali_xsize + 2; ix++) {
+#pragma HLS LOOP_TRIPCOUNT min = 514 max = 514
+            for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+                for (int i = 0; i < 5; i++) {
+#pragma HLS UNROLL
+                    temp_out[c][i] = linebuf[c][i][ix];
+                }
+
+                window[c].shift_left();
+                if (iy < ysize && ix < xsize) {
+                    float tmp = io_strm[c].read();
+                    temp_in[c][4] = tmp;
+                } else {
+                    temp_in[c][4] = temp_out[c][4];
+                }
+
+                for (int i = 4; i > 0; i--) {
+                    temp_in[c][i - 1] = temp_out[c][i];
+                }
+
+            LOAD_FOR_WINDOW:
+                for (int i = 0; i < 5; i++) {
+#pragma HLS unroll
+                    if (iy == 0 && ix == 0) {
+                        window[c].val[i][0] = temp_in[c][4];
+                        window[c].val[i][1] = temp_in[c][4];
+                        window[c].val[i][2] = temp_in[c][4];
+                        window[c].val[i][3] = temp_in[c][4];
+                        window[c].val[i][4] = temp_in[c][4];
+                    } else if (iy > 0 && ix == 0) {
+                        window[c].val[i][0] = temp_in[c][i];
+                        window[c].val[i][1] = temp_in[c][i];
+                        window[c].val[i][2] = temp_in[c][i];
+                        window[c].val[i][3] = temp_in[c][i];
+                        window[c].val[i][4] = temp_in[c][i];
+                    } else if (ix < xsize) {
+                        if (iy > 0) {
+                            if (ix == 1) window[c].val[i][1] = temp_in[c][i];
+                            window[c].val[i][4] = temp_in[c][i];
+                        } else {
+                            if (ix == 1) window[c].val[i][1] = temp_in[c][4];
+                            window[c].val[i][4] = temp_in[c][4];
+                        }
+                    } else {
+                        if (is_align && ix >= (xsize + 1)) window[c].val[i][4] = window[c].val[i][1];
+                    }
+                }
+
+                for (int i = 0; i < 5; i++) {
+#pragma HLS unroll
+                    linebuf[c][i][ix] = (iy > 0) ? temp_in[c][i] : temp_in[c][4];
+                } // i
+
+                if (iy >= 2 && ix >= 2) {
+                    float sum = 0.0;
+                SUM_ADD_Y:
+                    for (int ky = -kRadius, y = 0; ky <= kRadius; ky++, y++) {
+#pragma HLS unroll
+                    SUM_ADD_X:
+                        for (int kx = -kRadius, x = 0; kx <= kRadius; kx++, x++) {
+#pragma HLS unroll
+                            const int wy = hls::abs(ky);
+                            const int wx = hls::abs(kx);
+                            float tmp = normalized[wy * (kRadius + 1) + wx];
+                            sum = FPTwoAdd(sum, FPTwoMul(window[c].val[y][x], tmp));
+                        } // kx
+                    }     // ky
+                    opsin_strm[c].write(sum);
+                }
+            }
+        }
+    }
+}
+
+inline double hls_SimpleGamma(float v) {
+#pragma HLS inline
+    // A simple HDR compatible gamma function.
+    // mul and mul2 represent a scaling difference between pik and butteraugli.
+    static const float mul = 103.34350600371506;
+    static const float mul2 = 1.0 / (67.797075768826289);
+
+    v *= mul;
+
+    static const float kRetMul = mul2 * 18.6580932135;
+    static const float kRetAdd = mul2 * -20.2789020414;
+    static const float kVOffset = 7.14672470003;
+
+    if (v < 0) {
+        // This should happen rarely, but may lead to a NaN, which is rather
+        // undesirable. Since negative photons don't exist we solve the NaNs by
+        // clamping here.
+        v = 0;
+    }
+    return kRetMul * hls::log(v + kVOffset) + kRetAdd;
+}
+
+inline double hls_RatioOfCubicRootToSimpleGamma(float v) {
+#pragma HLS inline
+    // The opsin space in pik is the cubic root of photons, i.e., v * v * v
+    // is related to the number of photons.
+    //
+    // SimpleGamma(v * v * v) is the psychovisual space in butteraugli.
+    // This ratio allows quantization to move from pik's opsin space to
+    // butteraugli's log-gamma space.
+    return v / hls_SimpleGamma(v * v * v);
+}
+
+inline void hls_RatioOfCubicRootToSimpleGammaStrm(
+    int xsize, int ysize, float cutoff, hls::stream<float>& orig, hls::stream<float>& diff, hls::stream<float>& out) {
+    static const double mul0 = 0.046650519741099357;
+
+    // PIK's gamma is 3.0 to be able to decode faster with two muls.
+    // Butteraugli's gamma is matching the gamma of human eye, around 2.6.
+    // We approximate the gamma difference by adding one cubic root into
+    // the adaptive quantization. This gives us a total gamma of 2.6666
+    // for quantization uses.
+    static const double match_gamma_offset = 0.55030107636310233;
+    float origin, differe, res, fin;
+
+LOOP_GAMMA:
+    for (int i = 0; i < xsize * ysize - 1; i++) {
+#pragma HLS pipeline II = 1
+        origin = orig.read();
+        differe = diff.read();
+        res = mul0 * differe * hls_RatioOfCubicRootToSimpleGamma(origin + match_gamma_offset);
+        fin = res < cutoff ? res : cutoff;
+        out.write(fin);
+    }
+
+    origin = orig.read();
+    differe = diff.read();
+    out.write(fin);
+}
+
+inline void hls_ConvolveY(
+    int xsize, int ysize, hls::stream<float>& orig_in, hls::stream<float>& orig_out, hls::stream<float>& diff_out) {
+    static const float kOverWeightBorders = 1.4;
+
+    float origAbove[2][4096];
+#pragma HLS resource variable = origAbove core = RAM_S2P_URAM
+#pragma HLS ARRAY_PARTITION variable = origAbove complete dim = 1
+#pragma HLS DEPENDENCE variable = origAbove inter false
+
+    float orgL[3];
+#pragma HLS ARRAY_PARTITION variable = orgL complete dim = 0
+
+    float diff, orgabv, orgcur, orgnxt;
+
+LOOP_CONVOLVE_Y:
+    for (int i = 0; i < xsize; i++) {
+#pragma HLS pipeline II = 1
+        //		above[1][i] = in.read();
+        origAbove[1][i] = orig_in.read();
+    }
+
+    orgL[2] = orgL[1];
+    orgL[1] = orgL[0];
+    orgL[0] = origAbove[1][0];
+
+    ap_uint<10> cnt;
+    cnt = 0;
+
+    for (int i = 0; i < xsize; i++) {
+#pragma HLS pipeline II = 1
+        orgnxt = orig_in.read();
+
+        orgL[2] = orgL[1];
+        orgL[1] = orgL[0];
+        orgL[0] = origAbove[1][cnt + 1];
+        if (i == 0) {
+            diff = hls::fabs(orgL[1] - orgnxt) + hls::fabs(orgL[1] - orgnxt) + hls::fabs(orgL[0] - orgL[1]) +
+                   hls::fabs(orgL[0] - orgL[1]);
+        } else if (i != xsize - 1) {
+            diff = hls::fabs(orgL[1] - orgnxt) + hls::fabs(orgL[1] - orgnxt) + hls::fabs(orgL[0] - orgL[1]) +
+                   hls::fabs(orgL[1] - orgL[2]) + 3 * hls::fabs(orgL[0] - orgL[2]);
+        } else if (i == xsize - 1) {
+            diff = kOverWeightBorders * (hls::fabs(orgL[1] - orgnxt) + hls::fabs(orgL[1] - orgnxt));
+        }
+
+        origAbove[0][i] = orgnxt;
+
+        diff_out.write(diff);
+        orig_out.write(orgL[1]);
+        cnt++;
+    }
+
+    bool lb;
+    lb = 1;
+
+    for (int i = 0; i < ysize - 2; i++) {
+        lb = !lb;
+        cnt = 0;
+        orgL[2] = orgL[1];
+        orgL[1] = orgL[0];
+        orgL[0] = origAbove[lb][0];
+
+        for (int j = 0; j < xsize; j++) {
+#pragma HLS PIPELINE II = 1
+            orgabv = origAbove[!lb][j];
+            orgnxt = orig_in.read();
+
+            orgL[2] = orgL[1];
+            orgL[1] = orgL[0];
+            orgL[0] = origAbove[lb][cnt + 1];
+            if (j == 0) {
+                diff = hls::fabs(orgL[1] - orgnxt) + hls::fabs(orgL[1] - orgabv) + hls::fabs(orgL[0] - orgL[1]) +
+                       hls::fabs(orgL[0] - orgL[1]) + 3 * hls::fabs(orgabv - orgnxt);
+            } else if (j != xsize - 1) {
+                diff = hls::fabs(orgL[1] - orgnxt) + hls::fabs(orgL[1] - orgabv) + hls::fabs(orgL[0] - orgL[1]) +
+                       hls::fabs(orgL[1] - orgL[2]) + 3 * (hls::fabs(orgL[0] - orgL[2]) + hls::fabs(orgabv - orgnxt));
+            } else if (j == xsize - 1) {
+                diff = kOverWeightBorders * (hls::fabs(orgL[1] - orgnxt) + hls::fabs(orgL[1] - orgnxt));
+            }
+
+            origAbove[!lb][j] = orgnxt;
+
+            diff_out.write(diff);
+            orig_out.write(orgL[1]);
+            cnt++;
+        }
+    }
+
+    orgL[2] = orgL[1];
+    orgL[1] = orgL[0];
+    orgL[0] = origAbove[!lb][0];
+    cnt = 0;
+
+    for (int i = 0; i < xsize; i++) {
+#pragma HLS PIPELINE II = 1
+        orgL[2] = orgL[1];
+        orgL[1] = orgL[0];
+        orgL[0] = origAbove[!lb][cnt + 1];
+        if (i != xsize - 1) {
+            diff = kOverWeightBorders * 2 * hls::fabs(orgL[0] - orgL[1]);
+        } else if (i == xsize - 1) {
+            diff = kOverWeightBorders * 2 * hls::fabs(orgL[1] - orgL[2]);
+        }
+
+        diff_out.write(diff);
+        orig_out.write(orgL[1]);
+        cnt++;
+    }
+}
+
+inline void hls_ExpandStrm(ap_uint<32> xsize, ap_uint<32> ysize, hls::stream<float>& in, hls::stream<float>& out) {
+    ap_uint<32> out_xsize, out_ysize;
+    float res;
+    float right_last[3];
+
+    if (xsize.range(2, 0) != 0)
+        out_xsize = (xsize | 7) + 1;
+    else
+        out_xsize = xsize;
+
+    if (ysize.range(2, 0) != 0)
+        out_ysize = (ysize | 7) + 1;
+    else
+        out_ysize = ysize;
+
+    float sum[4096];
+#pragma HLS resource variable = sum core = RAM_S2P_URAM
+#pragma HLS DEPENDENCE variable = sum inter distance = 8
+
+    float sum_right[7];
+#pragma HLS resource variable = sum_right core = RAM_S2P_LUTRAM
+#pragma HLS DEPENDENCE variable = sum_right inter distance = 8
+
+    for (int i = 0; i < 3; i++) {
+#pragma HLS unroll
+        right_last[i] = 0;
+    }
+
+    for (int j = 0; j < ysize; j++) {
+        for (int i = 0; i < out_xsize; i++) {
+#pragma HLS pipeline II = 1
+            if (i < xsize) {
+                right_last[2] = right_last[1];
+                right_last[1] = right_last[0];
+                right_last[0] = in.read();
+                res = right_last[0];
+                float sum_pre = sum[i];
+                if (j == 0 || j + 3 == ysize)
+                    sum[i] = res;
+                else if (j + 3 > ysize)
+                    sum[i] = sum_pre + res;
+            } else {
+                res = (right_last[2] + right_last[1] + right_last[0]) * 0.3333333333333333333333333;
+                float sum_pre = sum_right[i - xsize];
+                if (j == 0 || j + 3 == ysize)
+                    sum_right[i - xsize] = res;
+                else if (j + 3 > ysize)
+                    sum_right[i - xsize] = sum_pre + res;
+            }
+            out.write(res);
+        }
+    }
+
+    for (int j = ysize; j < out_ysize; j++) {
+        for (int i = 0; i < out_xsize; i++) {
+#pragma HLS pipeline II = 1
+            if (i < xsize)
+                res = sum[i] * 0.3333333333333333333333333;
+            else
+                res = sum_right[i - xsize] * 0.3333333333333333333333333;
+            out.write(res);
+        }
+    }
+}
+
+inline void hls_ConvolveX35(int xsize, int ysize, hls::stream<float>& in, hls::stream<float>& out) {
+    float kernel[35] = {
+        0.0060024694539606571197509765625, 0.0076467366889119148254394531250, 0.0095995273441076278686523437500,
+        0.0118754766881465911865234375000, 0.0144770387560129165649414062500, 0.0173914544284343719482421875000,
+        0.0205882601439952850341796875000, 0.0240176711231470108032226562500, 0.0276102013885974884033203125000,
+        0.0312777720391750335693359375000, 0.0349164046347141265869140625000, 0.0384105704724788665771484375000,
+        0.0416389256715774536132812500000, 0.0444811210036277770996093750000, 0.0468251816928386688232421875000,
+        0.0485747642815113067626953125000, 0.0496557392179965972900390625000, 0.0500213839113712310791015625000,
+        0.0496557392179965972900390625000, 0.0485747642815113067626953125000, 0.0468251816928386688232421875000,
+        0.0444811210036277770996093750000, 0.0416389256715774536132812500000, 0.0384105704724788665771484375000,
+        0.0349164046347141265869140625000, 0.0312777720391750335693359375000, 0.0276102013885974884033203125000,
+        0.0240176711231470108032226562500, 0.0205882601439952850341796875000, 0.0173914544284343719482421875000,
+        0.0144770387560129165649414062500, 0.0118754766881465911865234375000, 0.0095995273441076278686523437500,
+        0.0076467366889119148254394531250, 0.0060024694539606571197509765625};
+
+    float window[35];
+#pragma HLS ARRAY_PARTITION variable = window complete dim = 0
+    float last[19];
+#pragma HLS ARRAY_PARTITION variable = last complete dim = 0
+    float res[7];
+#pragma HLS ARRAY_PARTITION variable = res complete dim = 0
+    float sum, tmp;
+
+    for (int i = 0; i < 7; i++) {
+#pragma HLS unroll
+        res[i] = 0;
+    }
+
+    for (int m = 0; m < ysize; m++) {
+        for (int i = 0; i < 18; i++) {
+#pragma HLS pipeline II = 1
+            if (i < xsize) tmp = in.read();
+
+            for (int j = 0; j < 17; j++) {
+#pragma HLS unroll
+                window[17 + j] = window[18 + j];
+            }
+            window[34] = tmp;
+
+            for (int j = 0; j < 16; j++) {
+#pragma HLS unroll
+                window[16 - j] = window[15 - j];
+            }
+            if (i < xsize)
+                window[0] = tmp;
+            else
+                window[0] = last[17];
+
+            if (i == 0) {
+                for (int j = 0; j < 19; j++) {
+#pragma HLS unroll
+                    last[j] = tmp;
+                }
+            } else if (i < xsize) {
+                for (int j = 0; j < 18; j++) {
+#pragma HLS unroll
+                    last[j] = last[j + 1];
+                }
+                last[18] = tmp;
+            } else {
+                for (int j = 0; j < 18; j++) {
+#pragma HLS unroll
+                    last[18 - j] = last[17 - j];
+                }
+            }
+        }
+
+        int state, cnt;
+        float reg[35];
+#pragma HLS ARRAY_PARTITION variable = reg complete dim = 0
+        state = 8;
+        cnt = 0;
+
+        for (int i = 0; i < xsize + 4; i++) {
+#pragma HLS pipeline II = 1
+
+            if (state == 8) {
+                cnt++;
+                if (cnt < 4)
+                    state = 8;
+                else
+                    state = 0;
+            } else if (state == 0) {
+                for (int j = 0; j < 35; j++) {
+#pragma HLS unroll
+                    reg[j] = window[34 - j];
+                }
+                res[0] = reg[0] * kernel[0] + reg[1] * kernel[1] + reg[2] * kernel[2] + reg[3] * kernel[3] +
+                         reg[4] * kernel[4];
+                state = 1;
+            } else if (state == 1) {
+                for (int j = 0; j < 30; j++) {
+#pragma HLS unroll
+                    reg[j] = reg[j + 5];
+                }
+                res[1] = reg[0] * kernel[5] + reg[1] * kernel[6] + reg[2] * kernel[7] + reg[3] * kernel[8] +
+                         reg[4] * kernel[9];
+                state = 2;
+            } else if (state == 2) {
+                for (int j = 0; j < 30; j++) {
+#pragma HLS unroll
+                    reg[j] = reg[j + 5];
+                }
+                res[2] = reg[0] * kernel[10] + reg[1] * kernel[11] + reg[2] * kernel[12] + reg[3] * kernel[13] +
+                         reg[4] * kernel[14];
+                state = 3;
+            } else if (state == 3) {
+                for (int j = 0; j < 30; j++) {
+#pragma HLS unroll
+                    reg[j] = reg[j + 5];
+                }
+                res[3] = reg[0] * kernel[15] + reg[1] * kernel[16] + reg[2] * kernel[17] + reg[3] * kernel[18] +
+                         reg[4] * kernel[19];
+                state = 4;
+            } else if (state == 4) {
+                for (int j = 0; j < 30; j++) {
+#pragma HLS unroll
+                    reg[j] = reg[j + 5];
+                }
+                res[4] = reg[0] * kernel[20] + reg[1] * kernel[21] + reg[2] * kernel[22] + reg[3] * kernel[23] +
+                         reg[4] * kernel[24];
+                state = 5;
+            } else if (state == 5) {
+                for (int j = 0; j < 30; j++) {
+#pragma HLS unroll
+                    reg[j] = reg[j + 5];
+                }
+                res[5] = reg[0] * kernel[25] + reg[1] * kernel[26] + reg[2] * kernel[27] + reg[3] * kernel[28] +
+                         reg[4] * kernel[29];
+                state = 6;
+            } else if (state == 6) {
+                for (int j = 0; j < 30; j++) {
+#pragma HLS unroll
+                    reg[j] = reg[j + 5];
+                }
+                res[6] = reg[0] * kernel[30] + reg[1] * kernel[31] + reg[2] * kernel[32] + reg[3] * kernel[33] +
+                         reg[4] * kernel[34];
+                state = 7;
+            } else if (state == 7) {
+                for (int j = 0; j < 30; j++) {
+#pragma HLS unroll
+                    reg[j] = reg[j + 5];
+                }
+                out.write(res[0] + res[1] + res[2] + res[3] + res[4] + res[5] + res[6]);
+                state = 0;
+            }
+
+            if (i < xsize - 18) {
+                tmp = in.read();
+                for (int j = 0; j < 34; j++) {
+#pragma HLS unroll
+                    window[34 - j] = window[33 - j];
+                }
+                window[0] = tmp;
+
+                for (int j = 0; j < 18; j++) {
+#pragma HLS unroll
+                    last[j] = last[j + 1];
+                }
+                last[18] = tmp;
+            } else {
+                tmp = last[17];
+                for (int j = 0; j < 34; j++) {
+#pragma HLS unroll
+                    window[34 - j] = window[33 - j];
+                }
+                window[0] = tmp;
+
+                for (int j = 0; j < 18; j++) {
+#pragma HLS unroll
+                    last[18 - j] = last[17 - j];
+                }
+                last[0] = 0;
+            }
+        }
+    }
+}
+
+inline void hls_ConvolveY35(int xsize, int ysize, hls::stream<float>& in, hls::stream<float>& out) {
+    float kernel[35] = {
+        0.0060024694539606571197509765625, 0.0076467366889119148254394531250, 0.0095995273441076278686523437500,
+        0.0118754766881465911865234375000, 0.0144770387560129165649414062500, 0.0173914544284343719482421875000,
+        0.0205882601439952850341796875000, 0.0240176711231470108032226562500, 0.0276102013885974884033203125000,
+        0.0312777720391750335693359375000, 0.0349164046347141265869140625000, 0.0384105704724788665771484375000,
+        0.0416389256715774536132812500000, 0.0444811210036277770996093750000, 0.0468251816928386688232421875000,
+        0.0485747642815113067626953125000, 0.0496557392179965972900390625000, 0.0500213839113712310791015625000,
+        0.0496557392179965972900390625000, 0.0485747642815113067626953125000, 0.0468251816928386688232421875000,
+        0.0444811210036277770996093750000, 0.0416389256715774536132812500000, 0.0384105704724788665771484375000,
+        0.0349164046347141265869140625000, 0.0312777720391750335693359375000, 0.0276102013885974884033203125000,
+        0.0240176711231470108032226562500, 0.0205882601439952850341796875000, 0.0173914544284343719482421875000,
+        0.0144770387560129165649414062500, 0.0118754766881465911865234375000, 0.0095995273441076278686523437500,
+        0.0076467366889119148254394531250, 0.0060024694539606571197509765625};
+#pragma HLS resource variable = kernel core = ROM_1P_LUTRAM
+
+    float lb_ram[5][4096];
+#pragma HLS resource variable = lb_ram core = RAM_S2P_URAM
+#pragma HLS ARRAY_PARTITION variable = lb_ram complete dim = 1
+#pragma HLS DEPENDENCE variable = lb_ram inter distance = 2
+
+    float reg;
+
+    float data[5];
+#pragma HLS ARRAY_PARTITION variable = data complete dim = 0
+
+    int r[5][3];
+#pragma HLS ARRAY_PARTITION variable = r complete dim = 0
+
+    int p[5];
+#pragma HLS ARRAY_PARTITION variable = p complete dim = 0
+    p[0] = 4;
+    p[1] = 12;
+    p[2] = 20;
+    p[3] = 28;
+    p[4] = 36;
+
+    int cnt[5];
+#pragma HLS ARRAY_PARTITION variable = cnt complete dim = 0
+    cnt[0] = 0;
+    cnt[1] = 0;
+    cnt[2] = 0;
+    cnt[3] = 0;
+    cnt[4] = 0;
+
+    float tmp1[5], tmp2[5], tmp3[5];
+#pragma HLS ARRAY_PARTITION variable = tmp1 complete dim = 0
+#pragma HLS ARRAY_PARTITION variable = tmp2 complete dim = 0
+#pragma HLS ARRAY_PARTITION variable = tmp3 complete dim = 0
+
+    for (int i = 0; i < xsize; i++) {
+#pragma HLS pipeline II = 1
+        reg = in.read();
+        lb_ram[0][i] = reg * kernel[13];
+        lb_ram[1][i] = reg * kernel[5];
+        lb_ram[2][i] = 0;
+        lb_ram[3][i] = 0;
+        lb_ram[4][i] = 0;
+    }
+    cnt[0]++;
+    cnt[1]++;
+
+    for (int i = 1; i < ysize - 1; i++) {
+        for (int m = 0; m < 5; m++) {
+#pragma HLS UNROLL
+            r[m][0] = -i - p[m]; //(i - p0) - 2 * i
+            r[m][1] = i - p[m];
+            r[m][2] = i - p[m] + 2 * (ysize - 1 - i);
+        }
+
+        for (int m = 0; m < 5; m++) {
+#pragma HLS UNROLL
+            if (r[m][0] > -18 && r[m][0] < 18) {
+                cnt[m]++;
+            }
+            if (r[m][1] > -18 && r[m][1] < 18) {
+                cnt[m]++;
+            }
+            if (r[m][2] > -18 && r[m][2] < 18) {
+                cnt[m]++;
+            }
+        }
+
+        for (int j = 0; j < xsize; j++) {
+#pragma HLS PIPELINE II = 8
+            reg = in.read();
+            for (int m = 0; m < 5; m++) {
+#pragma HLS UNROLL
+                data[m] = lb_ram[m][j];
+            }
+
+            for (int m = 0; m < 5; m++) {
+#pragma HLS UNROLL
+                if (r[m][0] > -18 && r[m][0] < 18) {
+                    tmp1[m] = reg * kernel[17 + r[m][0]];
+                } else {
+                    tmp1[m] = 0;
+                }
+
+                if (r[m][1] > -18 && r[m][1] < 18) {
+                    tmp2[m] = reg * kernel[17 + r[m][1]];
+                } else {
+                    tmp2[m] = 0;
+                }
+
+                if (r[m][2] > -18 && r[m][2] < 18) {
+                    tmp3[m] = reg * kernel[17 + r[m][2]];
+                } else {
+                    tmp3[m] = 0;
+                }
+            }
+
+            for (int m = 0; m < 5; m++) {
+#pragma HLS UNROLL
+                if (cnt[m] == 35) {
+                    lb_ram[m][j] = 0;
+                } else {
+                    lb_ram[m][j] = data[m] + tmp1[m] + tmp2[m] + tmp3[m];
+                }
+            }
+
+            if (cnt[0] == 35) {
+                out.write(data[0] + tmp1[0] + tmp2[0] + tmp3[0]);
+            } else if (cnt[1] == 35) {
+                out.write(data[1] + tmp1[1] + tmp2[1] + tmp3[1]);
+            } else if (cnt[2] == 35) {
+                out.write(data[2] + tmp1[2] + tmp2[2] + tmp3[2]);
+            } else if (cnt[3] == 35) {
+                out.write(data[3] + tmp1[3] + tmp2[3] + tmp3[3]);
+            } else if (cnt[4] == 35) {
+                out.write(data[4] + tmp1[4] + tmp2[4] + tmp3[4]);
+            }
+        }
+
+        for (int m = 0; m < 5; m++) {
+#pragma HLS unroll
+            if (cnt[m] == 35) {
+                p[m] = p[m] + 40;
+            }
+        }
+
+        for (int m = 0; m < 5; m++) {
+#pragma HLS unroll
+            if (cnt[m] == 35) cnt[m] = 0;
+        }
+    }
+
+    for (int i = 0; i < xsize; i++) {
+#pragma HLS pipeline II = 8
+        reg = in.read();
+        if (ysize != 8) {
+            if (p[0] < p[4]) {
+                out.write(lb_ram[0][i] + reg * kernel[28]);
+                lb_ram[1][i] = lb_ram[1][i] + reg * kernel[20];
+            } else if (p[1] < p[0]) {
+                out.write(lb_ram[1][i] + reg * kernel[28]);
+                lb_ram[2][i] = lb_ram[2][i] + reg * kernel[20];
+            } else if (p[2] < p[1]) {
+                out.write(lb_ram[2][i] + reg * kernel[28]);
+                lb_ram[3][i] = lb_ram[3][i] + reg * kernel[20];
+            } else if (p[3] < p[2]) {
+                out.write(lb_ram[3][i] + reg * kernel[28]);
+                lb_ram[4][i] = lb_ram[4][i] + reg * kernel[20];
+            } else if (p[4] < p[3]) {
+                out.write(lb_ram[4][i] + reg * kernel[28]);
+                lb_ram[0][i] = lb_ram[0][i] + reg * kernel[20];
+            }
+        }
+    }
+
+    for (int i = 0; i < xsize; i++) {
+#pragma HLS pipeline II = 1
+        if (p[0] < p[4]) {
+            out.write(lb_ram[1][i]);
+        } else if (p[1] < p[0]) {
+            out.write(lb_ram[2][i]);
+        } else if (p[2] < p[1]) {
+            out.write(lb_ram[3][i]);
+        } else if (p[3] < p[2]) {
+            out.write(lb_ram[4][i]);
+        } else if (p[4] < p[3]) {
+            out.write(lb_ram[0][i]);
+        }
+    }
+}
+
+inline void hls_ComputeMaskStrm(int xsize, int ysize, hls::stream<float>& diff, hls::stream<float>& res) {
+    static const float kBase = 1.329262607500535;
+    static const float kMul1 = 0.010994306366172898;
+    static const float kOffset1 = 0.00683227084849159;
+    static const float kMul2 = -0.1949226495025296;
+    static const float kOffset2 = 0.075052668223305155;
+
+    for (int y = 0; y < ysize; ++y) {
+        for (int x = 0; x < xsize; ++x) {
+#pragma HLS pipeline II = 1
+            float val = diff.read();
+            // Avoid division by zero.
+            float div = hls::max<float>(val + kOffset1, 1e-3);
+            res.write(kBase + kMul1 / div + kMul2 / (val * val + kOffset2));
+        }
+    }
+}
+
+inline void hls_Exp(int xsize, int ysize, hls::stream<float>& in, hls::stream<float>& out) {
+    for (int y = 0; y < ysize; ++y) {
+        for (int x = 0; x < xsize; ++x) {
+#pragma HLS pipeline II = 1
+            out.write(hls::exp(in.read()));
+        }
+    }
+}
+
+inline void hls_scale(
+    int xsize, int ysize, int outx, int outy, float lambda, hls::stream<float>& in, hls::stream<float>& out) {
+    if (xsize > 1 && ysize > 1) {
+        for (size_t y = 0; y < outy; ++y) {
+            for (size_t x = 0; x < outx; ++x) {
+#pragma HLS pipeline II = 1
+                out.write(lambda * in.read());
+            }
+        }
+    } else if (xsize == 1) {
+        for (size_t y = 0; y < outy; ++y) {
+#pragma HLS pipeline II = 1
+            out.write(lambda);
+        }
+    } else if (ysize == 1) {
+        for (size_t x = 0; x < outx; ++x) {
+#pragma HLS pipeline II = 1
+            out.write(lambda);
+        }
+    }
+}
+
+inline void hls_average(int xsize, int ysize, hls::stream<float>& qfStrm, hls::stream<float>& avgStrm) {
+    float sum = 0;
+
+    for (int x = 0; x < xsize * ysize; x++) {
+#pragma HLS pipeline II = 8
+        sum = sum + qfStrm.read();
+    }
+    avgStrm.write(sum / (xsize * ysize));
+}
+
+inline void duplicateQF(
+    int xsize, int ysize, hls::stream<float>& input, hls::stream<float>& output1, hls::stream<float>& output2) {
+    for (int i = 0; i < xsize; i++) {
+        for (int j = 0; j < ysize; j++) {
+#pragma HLS pipeline II = 1
+            float reg = input.read();
+            output1.write(reg);
+            output2.write(reg);
+        }
+    }
+}
+
+inline void initQFStrm(int xsize,
+                       int ysize,
+                       float cutoff,
+                       float lamda,
+                       hls::stream<float>& in,
+                       hls::stream<float>& qfStrmOut,
+                       hls::stream<float>& avgStrmOut) {
+#pragma HLS inline
+
+    hls::stream<float> mid("mid");
+#pragma HLS STREAM variable = mid depth = 32
+
+    hls::stream<float> diff("diff");
+#pragma HLS STREAM variable = diff depth = 32
+    hls::stream<float> orig("orig");
+#pragma HLS STREAM variable = orig depth = 32
+    hls::stream<float> gamma("gamma");
+#pragma HLS STREAM variable = orig depth = 32
+    hls::stream<float> expd("expd");
+#pragma HLS STREAM variable = orig depth = 32
+    hls::stream<float> mid35("mid35");
+#pragma HLS STREAM variable = mid35 depth = 32
+    hls::stream<float> mask("mask");
+#pragma HLS STREAM variable = mask depth = 32
+    hls::stream<float> exp("exp");
+#pragma HLS STREAM variable = exp depth = 32
+    hls::stream<float> scale("scale");
+#pragma HLS STREAM variable = exp depth = 32
+    hls::stream<float> qfStrm("qfStrm");
+#pragma HLS STREAM variable = qfStrm depth = 32
+    hls::stream<float> avgS1Strm("avgS1Strm");
+#pragma HLS STREAM variable = qfStrm depth = 32
+
+    static const int kResolution = 8;
+    int out_xsize = (xsize + kResolution - 1) / kResolution;
+    int out_ysize = (ysize + kResolution - 1) / kResolution;
+
+    hls_ConvolveY(xsize, ysize, in, orig, diff);
+    hls_RatioOfCubicRootToSimpleGammaStrm(xsize, ysize, cutoff, orig, diff, gamma);
+    hls_ExpandStrm(xsize, ysize, gamma, expd);
+    hls_ConvolveX35(out_xsize << 3, out_ysize << 3, expd, mid35);
+    hls_ConvolveY35(out_xsize, out_ysize << 3, mid35, mask);
+    hls_ComputeMaskStrm(out_xsize, out_ysize, mask, exp);
+    hls_Exp(out_xsize, out_ysize, exp, scale);
+    hls_scale(xsize, ysize, out_xsize, out_ysize, lamda, scale, qfStrm);
+    duplicateQF(out_xsize, out_ysize, qfStrm, qfStrmOut, avgS1Strm);
+    hls_average(out_xsize, out_ysize, avgS1Strm, avgStrmOut);
+}
+
+inline void QFwriteCalAddr(int xsize,
+                           int ysize,
+                           hls::stream<float>& input,
+                           hls::stream<float>& dataStrm,
+                           hls::stream<ap_uint<32> >& addrStrm) {
+    int x4 = (xsize + 4 - 1) / 4;
+    int y4 = (ysize + 4 - 1) / 4;
+    float y[4];
+#pragma HLS ARRAY_PARTITION variable = y complete dim = 1
+    bool ping = 0;
+
+    for (int y = 0; y < y4; y++) {
+        for (int j = 0; j < 4; j++) {
+            for (int x = 0; x < x4; x++) {
+                for (int i = 0; i < 4; i++) {
+#pragma HLS pipeline II = 1
+                    if ((x * 4 + i) < xsize && (y * 4 + j) < ysize)
+                        dataStrm.write(input.read());
+                    else
+                        dataStrm.write(0);
+                }
+            }
+        }
+    }
+}
+
+inline void QFwriteDDRCtrl(int xsize,
+                           int ysize,
+                           hls::stream<float>& avgStrm,
+                           hls::stream<float>& avg_outStrm,
+                           hls::stream<float>& dataStrm,
+                           hls::stream<ap_uint<32> >& addrStrm,
+                           ap_uint<32>* axi_qf) {
+    float reg;
+    unsigned int reg_int;
+    ap_uint<32> reg_apint;
+
+    int x4 = (xsize + 4 - 1) / 4;
+    int y4 = (ysize + 4 - 1) / 4;
+    int n = x4 * 4 * y4;
+    ap_uint<32> addr;
+
+    for (int y = 0; y < y4; y++) {
+        for (int j = 0; j < 4; j++) {
+            for (int x = 0; x < x4; x++) {
+                for (int i = 0; i < 4; i++) {
+#pragma HLS pipeline II = 1
+                    reg = dataStrm.read();
+                    reg_int = fToBits<float, unsigned int>(reg);
+                    reg_apint = reg_int;
+                    axi_qf[y * x4 * 16 + x * 16 + j * 4 + 2 + i] = reg_apint;
+                }
+            }
+        }
+    }
+    reg = avgStrm.read();
+    avg_outStrm.write(reg);
+    reg_int = fToBits<float, unsigned int>(reg);
+    reg_apint = reg_int;
+    axi_qf[0] = reg_apint;
+}
+
+inline void QFwrite(int xsize,
+                    int ysize,
+                    hls::stream<float>& qfStrm,
+                    hls::stream<float>& avgStrm,
+                    hls::stream<float>& avg_outStrm,
+                    ap_uint<32>* axi_qf) {
+#pragma HLS dataflow
+    hls::stream<float> dataStrm;
+#pragma HLS STREAM variable = dataStrm depth = 32
+    hls::stream<ap_uint<32> > addrStrm;
+#pragma HLS STREAM variable = addrStrm depth = 32
+
+    QFwriteCalAddr(xsize, ysize, qfStrm, dataStrm, addrStrm);
+
+    QFwriteDDRCtrl(xsize, ysize, avgStrm, avg_outStrm, dataStrm, addrStrm, axi_qf);
+}
+
+inline void QFload(int xsize,
+                   int ysize,
+                   hls::stream<float>& avgStrm,
+                   ap_uint<32>* axi_qf,
+                   hls::stream<float>& qfStrm,
+                   hls::stream<float>& avg_outStrm) {
+    float reg;
+    unsigned int reg_int;
+    ap_uint<32> reg_apint;
+
+    int x4 = (xsize + 4 - 1) / 4;
+    int y4 = (ysize + 4 - 1) / 4;
+    int n = x4 * y4;
+
+    reg_apint = axi_qf[0];
+    reg_int = reg_apint;
+    reg = bitsToF<unsigned int, float>(reg_int);
+    avg_outStrm.write(avgStrm.read());
+    int cnt = 2;
+    for (size_t x = 0; x < n; ++x) {
+        for (int i = 0; i < 16; i++) {
+#pragma HLS pipeline II = 1
+            reg_apint = axi_qf[cnt];
+            reg_int = reg_apint;
+            reg = bitsToF<unsigned int, float>(reg_int);
+            qfStrm.write(reg);
+            cnt++;
+        }
+    }
+}
+
+inline void QFcalabsavgS1(
+    int xsize, int ysize, hls::stream<float>& qfStrm, hls::stream<float>& avgStrm, hls::stream<float>& sum4Strm) {
+    int x4 = (xsize + 4 - 1) / 4;
+    int y4 = (ysize + 4 - 1) / 4;
+    int n = x4 * y4 * 16;
+
+    float reg[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+    float sum = 0;
+    int size8 = (x4 * y4 + 8 - 1) / 8;
+
+    float avg = avgStrm.read();
+
+    for (int y = 0; y < y4; y++) {
+        for (int x = 0; x < x4; x++) {
+            for (int j = 0; j < 2; j++) {
+                for (int i = 0; i < 8; i++) {
+#pragma HLS pipeline II = 1
+                    reg[7] = reg[6];
+                    reg[6] = reg[5];
+                    reg[5] = reg[4];
+                    reg[4] = reg[3];
+                    reg[3] = reg[2];
+                    reg[2] = reg[1];
+                    reg[1] = reg[0];
+                    if (x * 4 + i % 4 < xsize && y * 4 + j * 2 + i / 4 < ysize) {
+                        reg[0] = hls::abs(qfStrm.read() - avg);
+                    } else {
+                        qfStrm.read();
+                        reg[0] = 0;
+                    }
+                    sum = reg[0] + reg[1] + reg[2] + reg[3] + reg[4] + reg[5] + reg[6] + reg[7];
+                    if (i == 7) sum4Strm.write(sum);
+                }
+            }
+        }
+    }
+}
+
+inline void QFcalabsavgS2(int xsize, int ysize, hls::stream<float>& sum4Strm, hls::stream<float>& absAvgStrm) {
+    int x4 = (xsize + 4 - 1) / 4;
+    int y4 = (ysize + 4 - 1) / 4;
+    int n = x4 * y4 * 16;
+
+    float sum = 0;
+    int size8 = (n + 8 - 1) / 8;
+
+    for (int x = 0; x < size8; x++) {
+#pragma HLS pipeline II = 8
+        sum = sum + sum4Strm.read();
+    }
+
+    absAvgStrm.write(sum / (xsize * ysize));
+}
+
+inline void QFWriteOutDataflow(
+    int xsize, int ysize, hls::stream<float>& avgStrm, ap_uint<32>* axi_qf, hls::stream<float>& absAvgStrm) {
+#pragma HLS dataflow
+
+    hls::stream<float> qfloadStrm("qfloadStrm");
+#pragma HLS STREAM variable = qfloadStrm depth = 32
+
+    hls::stream<float> avgloadStrm("qfavgloadStrm");
+#pragma HLS STREAM variable = avgloadStrm depth = 32
+
+    hls::stream<float> sum4Strm("qfsum4Strm");
+#pragma HLS STREAM variable = sum4Strm depth = 32
+
+    QFload(xsize, ysize, avgStrm, axi_qf, qfloadStrm, avgloadStrm);
+
+    QFcalabsavgS1(xsize, ysize, qfloadStrm, avgloadStrm, sum4Strm);
+
+    QFcalabsavgS2(xsize, ysize, sum4Strm, absAvgStrm);
+}
+
+inline void absAvgWrite(hls::stream<float>& absAvgStrm, ap_uint<32>* axi_qf) {
+    float reg = absAvgStrm.read();
+    unsigned int reg_int = fToBits<float, unsigned int>(reg);
+    ap_uint<32> reg_apint = reg_int;
+    axi_qf[1] = reg_apint;
+}
+
+inline void QFWriteOut(
+    int xsize, int ysize, hls::stream<float>& qfStrm, hls::stream<float>& avgStrm, ap_uint<32>* axi_qf) {
+    hls::stream<float> avg_outStrm("avg_outStrm");
+#pragma HLS STREAM variable = avg_outStrm depth = 2
+
+    hls::stream<float> absAvgStrm("absAvgStrm");
+#pragma HLS STREAM variable = absAvgStrm depth = 2
+
+    QFwrite(xsize, ysize, qfStrm, avgStrm, avg_outStrm, axi_qf);
+
+    QFWriteOutDataflow(xsize, ysize, avg_outStrm, axi_qf, absAvgStrm);
+
+    absAvgWrite(absAvgStrm, axi_qf);
+}
+
+inline void hls_combineblock(int xsize, int ysize, hls::stream<float>& input, hls::stream<float>& output) {
+    float tmp[2][24];
+#pragma HLS ARRAY_PARTITION variable = tmp dim = 1
+    bool ping = 0;
+
+    for (int j = 0; j < 8; j++) {
+        for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+            tmp[ping][c * 8 + j] = input.read();
+        }
+    }
+    ping = !ping;
+    for (int i = 0; i < xsize * ysize / 8 - 1; i++) {
+        for (int j = 0; j < 8; j++) {
+            for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+                tmp[ping][c * 8 + j] = input.read();
+                output.write(tmp[!ping][3 * j + c]);
+            }
+        }
+        ping = !ping;
+    }
+
+    for (int j = 0; j < 8; j++) {
+        for (int c = 0; c < 3; c++) {
+#pragma HLS PIPELINE II = 1
+            output.write(tmp[!ping][3 * j + c]);
+        }
+    }
+}
+
+inline void hls_splitblock(int xsize, int ysize, hls::stream<float>& input, hls::stream<float>& output) {
+    float tmp[2][24];
+#pragma HLS ARRAY_PARTITION variable = tmp dim = 1
+    bool ping = 0;
+
+    for (int c = 0; c < 3; c++) {
+        for (int j = 0; j < 8; j++) {
+#pragma HLS PIPELINE II = 1
+            tmp[ping][j * 3 + c] = input.read();
+        }
+    }
+    ping = !ping;
+    for (int i = 0; i < xsize * ysize / 8 - 1; i++) {
+        for (int c = 0; c < 3; c++) {
+            for (int j = 0; j < 8; j++) {
+#pragma HLS PIPELINE II = 1
+                tmp[ping][j * 3 + c] = input.read();
+                output.write(tmp[!ping][c * 8 + j] / 64);
+            }
+        }
+        ping = !ping;
+    }
+
+    for (int c = 0; c < 3; c++) {
+        for (int j = 0; j < 8; j++) {
+#pragma HLS PIPELINE II = 1
+            output.write(tmp[!ping][c * 8 + j] / 64);
+        }
+    }
+}
+
+inline void hls_DCT8Core(float in[8], float out[8]) {
+    const float c1 = 0.707106781186548f; // 1 / sqrt(2)
+    const float c2 = 0.382683432365090f; // cos(3 * pi / 8)
+    const float c3 = 1.30656296487638f;  // 1 / (2 * cos(3 * pi / 8))
+    const float c4 = 0.541196100146197f; // sqrt(2) * cos(3 * pi / 8)
+
+    float i0 = in[0]; // in[i][0];
+    float i1 = in[1]; // in[i][1];
+    float i2 = in[2];
+    float i3 = in[3];
+    float i4 = in[4];
+    float i5 = in[5];
+    float i6 = in[6];
+    float i7 = in[7];
+
+    const float t00 = i0 + i7;
+    const float t01 = i0 - i7;
+    const float t02 = i3 + i4;
+    const float t03 = i3 - i4;
+    const float t04 = i2 + i5;
+    const float t05 = i2 - i5;
+    const float t06 = i1 + i6;
+    const float t07 = i1 - i6;
+    const float t08 = t00 + t02;
+    const float t09 = t00 - t02;
+    const float t10 = t06 + t04;
+    const float t11 = t06 - t04;
+    const float t12 = t07 + t05;
+    const float t13 = t01 + t07;
+    const float t14 = t05 + t03;
+    const float t15 = t11 + t09;
+    const float t16 = t14 - t13;
+    const float t17 = c1 * t15;
+    const float t18 = c1 * t12;
+    const float t19 = c2 * t16;
+    const float t20 = t01 + t18;
+    const float t21 = t01 - t18;
+    const float t22 = c3 * t13 + t19;
+    const float t23 = c4 * t14 + t19;
+    out[0] = (t08 + t10);
+    out[1] = (t20 + t22);
+    out[2] = (t09 + t17);
+    out[3] = (t21 - t23);
+    out[4] = (t08 - t10);
+    out[5] = (t21 + t23);
+    out[6] = (t09 - t17);
+    out[7] = (t20 - t22);
+}
+
+inline void hls_CmapDCT1D(int xsize, int ysize, hls::stream<float>& input, hls::stream<float>& output) {
+    float from[8];
+    float to[8];
+
+    for (int i = 0; i < 3 * xsize * ysize / 8; i++) {
+#pragma HLS PIPELINE II = 8
+        from[0] = input.read();
+        from[1] = input.read();
+        from[2] = input.read();
+        from[3] = input.read();
+        from[4] = input.read();
+        from[5] = input.read();
+        from[6] = input.read();
+        from[7] = input.read();
+        hls_DCT8Core(from, to);
+        output.write(to[0]);
+        output.write(to[1]);
+        output.write(to[2]);
+        output.write(to[3]);
+        output.write(to[4]);
+        output.write(to[5]);
+        output.write(to[6]);
+        output.write(to[7]);
+    }
+}
+
+inline void hls_transpose(int xsize, int ysize, hls::stream<float>& input, hls::stream<float>& output) {
+#ifndef __SYNTHESIS__
+    std::vector<float> linebuffer_ping(98304);
+    std::vector<float> linebuffer_pong(98304);
+#else
+    float linebuffer_ping[98304];
+#pragma HLS RESOURCE variable = linebuffer_ping core = RAM_S2P_URAM
+    float linebuffer_pong[98304];
+#pragma HLS RESOURCE variable = linebuffer_pong core = RAM_S2P_URAM
+#endif
+
+    bool ping = 0;
+
+    for (int j = 0; j < 8; j++) {
+        for (int i = 0; i < xsize / 8; i++) {
+            for (int c = 0; c < 3; c++) {
+                for (int k = 0; k < 8; k++) {
+#pragma HLS PIPELINE II = 1
+                    linebuffer_pong[i * 192 + k * 24 + c * 8 + j] = input.read();
+                }
+            }
+        }
+    }
+    ping = !ping;
+
+    for (int n = 0; n < ysize / 8 - 1; n++) {
+        for (int j = 0; j < 8; j++) {
+            for (int i = 0; i < xsize / 8; i++) {
+                for (int c = 0; c < 3; c++) {
+                    for (int k = 0; k < 8; k++) {
+#pragma HLS PIPELINE II = 1
+                        if (ping == 1) {
+                            linebuffer_ping[i * 192 + k * 24 + c * 8 + j] = input.read();
+                            output.write(linebuffer_pong[j * xsize * 3 + i * 24 + c * 8 + k]);
+                        } else {
+                            linebuffer_pong[i * 192 + k * 24 + c * 8 + j] = input.read();
+                            output.write(linebuffer_ping[j * xsize * 3 + i * 24 + c * 8 + k]);
+                        }
+                    }
+                }
+            }
+        }
+        ping = !ping;
+    }
+
+    for (int j = 0; j < 8; j++) {
+        for (int i = 0; i < xsize / 8; i++) {
+            for (int c = 0; c < 3; c++) {
+                for (int k = 0; k < 8; k++) {
+#pragma HLS PIPELINE II = 1
+                    if (ping == 1) {
+                        output.write(linebuffer_pong[j * xsize * 3 + i * 24 + c * 8 + k]);
+                    } else {
+                        output.write(linebuffer_ping[j * xsize * 3 + i * 24 + c * 8 + k]);
+                    }
+                }
+            }
+        }
+    }
+}
+
+inline void hls_dct2DCmap(int xsize, int ysize, hls::stream<float>& input, hls::stream<float>& output) {
+#pragma HLS inline
+    hls::stream<float> tmp1("tmp1");
+#pragma HLS STREAM variable = tmp1 depth = 32
+    hls::stream<float> tmp2("tmp2");
+#pragma HLS STREAM variable = tmp2 depth = 32
+    hls::stream<float> tmp3("tmp3");
+#pragma HLS STREAM variable = tmp3 depth = 32
+    hls::stream<float> tmp4("tmp4");
+#pragma HLS STREAM variable = tmp4 depth = 32
+
+    hls_combineblock(xsize, ysize, input, tmp1);
+    hls_CmapDCT1D(xsize, ysize, tmp1, tmp2);
+    hls_transpose(xsize, ysize, tmp2, tmp3);
+    hls_CmapDCT1D(xsize, ysize, tmp3, tmp4);
+    hls_splitblock(xsize, ysize, tmp4, output);
+}
+
+inline void FindIndexOfSumMaximum(const int* array, const size_t len, int* idx, int* sum) {
+    int maxval = 0;
+    int val = 0;
+    int maxidx = 0;
+    for (size_t i = 1; i < len; ++i) {
+        val += array[i];
+        if (val > maxval) {
+            maxval = val;
+            maxidx = i;
+        }
+    }
+    *idx = maxidx;
+    *sum = maxval;
+}
+
+inline void hls_FindBestCorrelationCntDataFlow(int xsize,
+                                               int ysize,
+                                               hls::stream<float>& input,
+                                               hls::stream<float>& tilemaxStrm,
+                                               hls::stream<ap_uint<8> >& tileidxStrm) {
+    ap_uint<32> cur_y;
+
+    ap_int<64> d_num_zerosx_add[2][64][256];
+#pragma HLS RESOURCE variable = d_num_zerosx_add core = RAM_S2P_URAM
+#pragma HLS ARRAY_PARTITION variable = d_num_zerosx_add complete dim = 1
+
+    ap_int<64> d_num_zerosx_sub[2][64][256];
+#pragma HLS RESOURCE variable = d_num_zerosx_sub core = RAM_S2P_URAM
+#pragma HLS ARRAY_PARTITION variable = d_num_zerosx_sub complete dim = 1
+
+    ap_int<64> d_num_zerosx_pre[2][64][256];
+#pragma HLS RESOURCE variable = d_num_zerosx_pre core = RAM_S2P_URAM
+#pragma HLS ARRAY_PARTITION variable = d_num_zerosx_pre complete dim = 1
+
+    int ty;
+    ty = (ysize + 64 - 1) / 64;
+
+    for (int j = 0; j < (xsize + 64 - 1) / 64; j++) {
+        for (int i = 0; i < 256; i++) {
+#pragma HLS PIPELINE II = 1
+            d_num_zerosx_add[0][j][i] = 0;
+            d_num_zerosx_sub[0][j][i] = 0;
+            d_num_zerosx_pre[0][j][i] = 0;
+            d_num_zerosx_add[1][j][i] = 0;
+            d_num_zerosx_sub[1][j][i] = 0;
+            d_num_zerosx_pre[1][j][i] = 0;
+        }
+    }
+
+    int SCALEX = 256;
+    int OFFSETX = 128;
+    int SCALEB = 128;
+    int OFFSETB = 0;
+
+    float acceptancex = -0.625f;
+    float acceptanceb = 0.25f;
+
+    float kZeroBiasDefault[3] = {0.65f, 0.6f, 0.7f};
+    int N = 8;
+    int block_size = N * N;
+    float kScalex = SCALEX;
+    float kScaleb = SCALEB;
+    float kZeroThreshx = kScalex * kZeroBiasDefault[0];
+    float kZeroThreshb = kScaleb * kZeroBiasDefault[2];
+    size_t kColorTileDimInBlocks = 8;
+
+    ap_uint<32> y_local;
+    if (ysize - cur_y > 64)
+        y_local = 64;
+    else
+        y_local = ysize - cur_y;
+
+    ap_uint<9> ycnt = 0;
+    ap_uint<15> xcnt = 0;
+    ap_uint<6> txcnt = 0;
+    ap_uint<3> bxcnt = 0;
+    ap_uint<3> bycnt = 0;
+    ap_uint<3> bycnt_r = 0;
+    ap_uint<6> cnt64 = 0;
+
+    ap_uint<6> sumtxcnt = 0;
+    ap_uint<8> sumcnt256 = 0;
+    ap_uint<32> sumcnt = 0;
+    ap_uint<32> cnt = 0;
+    bool ping = true;
+
+    ap_uint<32> d_num_zerosx;
+    ap_int<32> tilemax = 0;
+    ap_uint<8> tileidx = 0;
+    ap_int<16> tilereg = 0;
+    ap_uint<32> apxsize = xsize;
+    ap_uint<32> apysize = ysize;
+
+    while (cnt < ysize * xsize && cnt < 64 * xsize) {
+#pragma HLS PIPELINE II = 3
+        if (cnt64 == 0) {
+            float colorx = input.read();
+            float colory = input.read();
+            float colorb = input.read();
+        } else {
+            float colorx = input.read();
+            float colory = input.read();
+            float colorb = input.read();
+
+            const float scaled_mx = colory * qmxlocal[cnt64];
+            const float scaled_mb = colory * qmblocal[cnt64];
+            const float scaled_sx = kScalex * colorx * qmxlocal[cnt64] + OFFSETX * scaled_mx;
+            const float scaled_sb = kScaleb * colorb * qmblocal[cnt64] + OFFSETB * scaled_mb;
+
+            // Increment num_zeros[idx] if
+            //   std::abs(scaled_s - (idx - OFFSET) *
+            //   scaled_m) < kZeroThresh
+            if (hls::abs(scaled_mx) >= 1e-8) {
+                float from;
+                float to;
+                if (scaled_mx > 0) {
+                    from = (scaled_sx - kZeroThreshx) / scaled_mx;
+                    to = (scaled_sx + kZeroThreshx) / scaled_mx;
+                } else {
+                    from = (scaled_sx + kZeroThreshx) / scaled_mx;
+                    to = (scaled_sx - kZeroThreshx) / scaled_mx;
+                }
+                // Instead of clamping the both values
+                // we just check that range is sane.
+                if (from < 0.0f) {
+                    from = 0.0f;
+                }
+                if (to > 255.0f) {
+                    to = 255.0f;
+                }
+                if (from <= to) {
+                    if (from < 255) {
+                        d_num_zerosx_add[ping][txcnt][(int)std::ceil(from)]++;
+                    }
+                    if (to < 255) {
+                        d_num_zerosx_sub[ping][txcnt][(int)std::floor(to + 1)]--;
+                    }
+                }
+            }
+        }
+
+        bycnt_r = bycnt;
+        if (xcnt != xsize * 8 - 1) {
+            xcnt++;
+        } else {
+            xcnt = 0;
+            bycnt++;
+        }
+
+        if (bycnt_r == 7 && bycnt == 0) {
+            ping = !ping;
+        }
+
+        cnt64 = xcnt.range(5, 0);
+        bxcnt = xcnt.range(8, 6);
+        txcnt = xcnt.range(14, 9);
+        cnt++;
+    }
+
+    while (cnt < ysize * xsize) {
+#pragma HLS pipeline II = 3
+        if (cnt64 == 0) {
+            float colorx = input.read();
+            float colory = input.read();
+            float colorb = input.read();
+        } else {
+            float colorx = input.read();
+            float colory = input.read();
+            float colorb = input.read();
+
+            const float scaled_mx = colory * qmxlocal[cnt64];
+            const float scaled_mb = colory * qmblocal[cnt64];
+            const float scaled_sx = kScalex * colorx * qmxlocal[cnt64] + OFFSETX * scaled_mx;
+            const float scaled_sb = kScaleb * colorb * qmblocal[cnt64] + OFFSETB * scaled_mb;
+
+            // Increment num_zeros[idx] if
+            //   std::abs(scaled_s - (idx - OFFSET) *
+            //   scaled_m) < kZeroThresh
+            if (hls::abs(scaled_mx) >= 1e-8) {
+                float from;
+                float to;
+                if (scaled_mx > 0) {
+                    from = (scaled_sx - kZeroThreshx) / scaled_mx;
+                    to = (scaled_sx + kZeroThreshx) / scaled_mx;
+                } else {
+                    from = (scaled_sx + kZeroThreshx) / scaled_mx;
+                    to = (scaled_sx - kZeroThreshx) / scaled_mx;
+                }
+                // Instead of clamping the both values
+                // we just check that range is sane.
+                if (from < 0.0f) {
+                    from = 0.0f;
+                }
+                if (to > 255.0f) {
+                    to = 255.0f;
+                }
+                if (from <= to) {
+                    if (from < 255) {
+                        d_num_zerosx_add[ping][txcnt][(int)std::ceil(from)]++;
+                    }
+                    if (to < 255) {
+                        d_num_zerosx_sub[ping][txcnt][(int)std::floor(to + 1)]--;
+                    }
+                }
+            }
+        }
+
+        if (sumcnt < ((xsize + 64 - 1) / 64) * 256) {
+            if (sumcnt256 != 0) {
+                d_num_zerosx =
+                    d_num_zerosx_add[!ping][sumtxcnt][sumcnt256] + d_num_zerosx_sub[!ping][sumtxcnt][sumcnt256];
+                tilereg = tilereg + d_num_zerosx - d_num_zerosx_pre[!ping][sumtxcnt][sumcnt256];
+                d_num_zerosx_pre[!ping][sumtxcnt][sumcnt256] = d_num_zerosx;
+                if (tilemax < tilereg) {
+                    tilemax = tilereg;
+                    tileidx = sumcnt256;
+                }
+                if (sumcnt256 == 255 && sumcnt != ((xsize + 64 - 1) / 64) * 256 - 1) {
+                    tilemaxStrm.write((float)tilemax / (64 * 64));
+                    tileidxStrm.write(tileidx);
+                    tilemax = 0;
+                    tileidx = 0;
+                    tilereg = 0;
+                } else if (sumcnt256 == 255 && sumcnt == ((xsize + 64 - 1) / 64) * 256 - 1) {
+                    ap_uint<32> tmp = apxsize.range(5, 0) == 0 ? 64 : apxsize.range(5, 0);
+                    tilemaxStrm.write((float)tilemax / (64 * tmp));
+                    tileidxStrm.write(tileidx);
+                    tilemax = 0;
+                    tileidx = 0;
+                    tilereg = 0;
+                }
+            }
+        }
+
+        bycnt_r = bycnt;
+        if (xcnt != xsize * 8 - 1) {
+            xcnt++;
+        } else {
+            xcnt = 0;
+            bycnt++;
+        }
+
+        if (bycnt_r == 7 && bycnt == 0) {
+            ping = !ping;
+            sumcnt = 0;
+        } else {
+            sumcnt++;
+        }
+
+        sumcnt256 = sumcnt.range(7, 0);
+        sumtxcnt = sumcnt.range(13, 8);
+        cnt64 = xcnt.range(5, 0);
+        bxcnt = xcnt.range(8, 6);
+        txcnt = xcnt.range(14, 9);
+        cnt++;
+    }
+
+    if (bycnt != 0) {
+        ping = !ping;
+    }
+    sumcnt = 0;
+    sumcnt256 = sumcnt.range(7, 0);
+    sumtxcnt = sumcnt.range(13, 8);
+
+    while (sumcnt < ((xsize + 64 - 1) / 64) * 256) {
+#pragma HLS PIPELINE II = 1
+
+        if (sumcnt256 != 0) {
+            d_num_zerosx = d_num_zerosx_add[!ping][sumtxcnt][sumcnt256] + d_num_zerosx_sub[!ping][sumtxcnt][sumcnt256];
+            tilereg = tilereg + d_num_zerosx - d_num_zerosx_pre[!ping][sumtxcnt][sumcnt256];
+            if (tilemax < tilereg) {
+                tilemax = tilereg;
+                tileidx = sumcnt256;
+            }
+            if (sumcnt256 == 255 && sumcnt != ((xsize + 64 - 1) / 64) * 256 - 1) {
+                ap_uint<32> tmpy = apysize.range(5, 0) == 0 ? 64 : apysize.range(5, 0);
+                tilemaxStrm.write((float)tilemax / (tmpy * 64));
+                tileidxStrm.write(tileidx);
+                tilemax = 0;
+                tileidx = 0;
+                tilereg = 0;
+            } else if (sumcnt256 == 255 && sumcnt == ((xsize + 64 - 1) / 64) * 256 - 1) {
+                ap_uint<32> tmpx = apxsize.range(5, 0) == 0 ? 64 : apxsize.range(5, 0);
+                ap_uint<32> tmpy = apysize.range(5, 0) == 0 ? 64 : apysize.range(5, 0);
+                tilemaxStrm.write((float)tilemax / (tmpy * tmpx));
+                tileidxStrm.write(tileidx);
+                tilemax = 0;
+                tileidx = 0;
+                tilereg = 0;
+            }
+        }
+        sumcnt++;
+        sumcnt256 = sumcnt.range(7, 0);
+        sumtxcnt = sumcnt.range(13, 8);
+    }
+}
+
+inline void hls_globalCnt(int xsize,
+                          int ysize,
+                          hls::stream<float>& input,
+                          hls::stream<float>& globalxmaxStrm,
+                          hls::stream<ap_uint<8> >& globalxidxStrm,
+                          hls::stream<float>& globalbmaxStrm,
+                          hls::stream<ap_uint<8> >& globalbidxStrm) {
+    ap_int<32> d_num_zeros_globalx_add[256];
+    ap_int<32> d_num_zeros_globalx_sub[256];
+    ap_int<32> d_num_zeros_globalb_add[256];
+    ap_int<32> d_num_zeros_globalb_sub[256];
+
+    int SCALEX = 256;
+    int OFFSETX = 128;
+    int SCALEB = 128;
+    int OFFSETB = 0;
+
+    float acceptancex = -0.625f;
+    float acceptanceb = 0.25f;
+
+    float kZeroBiasDefault[3] = {0.65f, 0.6f, 0.7f};
+    int N = 8;
+    int block_size = N * N;
+    float kScalex = SCALEX;
+    float kScaleb = SCALEB;
+    float kZeroThreshx = kScalex * kZeroBiasDefault[0];
+    float kZeroThreshb = kScaleb * kZeroBiasDefault[2];
+    size_t kColorTileDimInBlocks = 8;
+
+    ap_uint<6> cnt64 = 0;
+    ap_uint<32> cnt = 0;
+
+    for (int i = 0; i < 256; i++) {
+#pragma HLS pipeline ii = 1
+        d_num_zeros_globalx_add[i] = 0;
+        d_num_zeros_globalx_sub[i] = 0;
+        d_num_zeros_globalb_add[i] = 0;
+        d_num_zeros_globalb_sub[i] = 0;
+    }
+
+    while (cnt < xsize * ysize) {
+#pragma HLS pipeline II = 3
+        if (cnt64 == 0) {
+            input.read();
+            input.read();
+            input.read();
+        } else {
+            float colorx = input.read();
+            float colory = input.read();
+            float colorb = input.read();
+
+            const float scaled_mx = colory * qmxglb[cnt64];
+            const float scaled_mb = colory * qmbglb[cnt64];
+            const float scaled_sx = kScalex * colorx * qmxglb[cnt64] + OFFSETX * scaled_mx;
+            const float scaled_sb = kScaleb * colorb * qmbglb[cnt64] + OFFSETB * scaled_mb;
+
+            // Increment num_zeros[idx] if
+            //   std::abs(scaled_s - (idx - OFFSET) *
+            //   scaled_m) < kZeroThresh
+            if (hls::abs(scaled_mx) >= 1e-8) {
+                float from;
+                float to;
+                if (scaled_mx > 0) {
+                    from = (scaled_sx - kZeroThreshx) / scaled_mx;
+                    to = (scaled_sx + kZeroThreshx) / scaled_mx;
+                } else {
+                    from = (scaled_sx + kZeroThreshx) / scaled_mx;
+                    to = (scaled_sx - kZeroThreshx) / scaled_mx;
+                }
+                // Instead of clamping the both values
+                // we just check that range is sane.
+                if (from < 0.0f) {
+                    from = 0.0f;
+                }
+                if (to > 255.0f) {
+                    to = 255.0f;
+                }
+                if (from <= to) {
+                    if (from < 255) {
+                        d_num_zeros_globalx_add[(int)std::ceil(from)]++;
+                    }
+                    if (to < 255) {
+                        d_num_zeros_globalx_sub[(int)std::floor(to + 1)]--;
+                    }
+                }
+            }
+
+            if (hls::abs(scaled_mb) >= 1e-8) {
+                float from;
+                float to;
+                if (scaled_mb > 0) {
+                    from = (scaled_sb - kZeroThreshb) / scaled_mb;
+                    to = (scaled_sb + kZeroThreshb) / scaled_mb;
+                } else {
+                    from = (scaled_sb + kZeroThreshb) / scaled_mb;
+                    to = (scaled_sb - kZeroThreshb) / scaled_mb;
+                }
+                // Instead of clamping the both values
+                // we just check that range is sane.
+                if (from < 0.0f) {
+                    from = 0.0f;
+                }
+                if (to > 255.0f) {
+                    to = 255.0f;
+                }
+                if (from <= to) {
+                    if (from < 255) {
+                        d_num_zeros_globalb_add[(int)std::ceil(from)]++;
+                    }
+                    if (to < 255) {
+                        d_num_zeros_globalb_sub[(int)std::floor(to + 1)]--;
+                    }
+                }
+            }
+        }
+        cnt++;
+        cnt64 = cnt.range(5, 0);
+    }
+
+    ap_int<32> glbbmax;
+    ap_uint<8> glbbidx;
+    ap_int<32> glbxmax;
+    ap_uint<8> glbxidx;
+    ap_int<32> glbbreg;
+    ap_int<32> glbxreg;
+
+    glbbmax = 0;
+    glbbidx = 0;
+    glbxmax = 0;
+    glbxidx = 0;
+    glbbreg = 0;
+    glbxreg = 0;
+
+    for (int i = 1; i < 256; ++i) {
+#pragma HLS pipeline ii = 1
+        glbbreg = glbbreg + d_num_zeros_globalb_add[i] + d_num_zeros_globalb_sub[i];
+        if (glbbmax < glbbreg) {
+            glbbmax = glbbreg;
+            glbbidx = i;
+        }
+
+        glbxreg = glbxreg + d_num_zeros_globalx_add[i] + d_num_zeros_globalx_sub[i];
+        if (glbxmax < glbxreg) {
+            glbxmax = glbxreg;
+            glbxidx = i;
+        }
+    }
+
+    globalxmaxStrm.write((float)glbxmax / (xsize * ysize));
+    globalxidxStrm.write(glbxidx);
+    globalbmaxStrm.write((float)glbbmax / (xsize * ysize));
+    globalbidxStrm.write(glbbidx);
+}
+
+inline void hls_FindBestCorrelationStore(int xsize,
+                                         int ysize,
+                                         hls::stream<float>& tilemaxStrm,
+                                         hls::stream<ap_uint<8> >& tileidxStrm,
+                                         hls::stream<float>& globalxmaxStrm,
+                                         hls::stream<ap_uint<8> >& globalxidxStrm,
+                                         hls::stream<float>& globalbmaxStrm,
+                                         hls::stream<ap_uint<8> >& globalbidxStrm,
+                                         float tilemax[64][64],
+                                         ap_uint<8> tileidx[64][64],
+                                         float glbxmax[1],
+                                         ap_uint<8> glbxidx[1],
+                                         float glbbmax[1],
+                                         ap_uint<8> glbbidx[1]) {
+    int tx = (xsize + 64 - 1) / 64;
+    int ty = (ysize + 64 - 1) / 64;
+
+    for (int j = 0; j < ty; j++) {
+        for (int i = 0; i < tx; i++) {
+#pragma HLS pipeline ii = 1
+            tilemax[j][i] = tilemaxStrm.read();
+            tileidx[j][i] = tileidxStrm.read();
+        }
+    }
+
+    glbxmax[0] = globalxmaxStrm.read();
+    glbxidx[0] = globalxidxStrm.read();
+    glbbmax[0] = globalbmaxStrm.read();
+    glbbidx[0] = globalbidxStrm.read();
+}
+
+inline void duplicate(
+    int xsize, int ysize, hls::stream<float>& input, hls::stream<float>& output1, hls::stream<float>& output2) {
+    for (int c = 0; c < 3; c++) {
+        for (int i = 0; i < xsize; i++) {
+            for (int j = 0; j < ysize; j++) {
+                float reg = input.read();
+                output1.write(reg);
+                output2.write(reg);
+            }
+        }
+    }
+}
+
+inline void hls_FindBestCorrelationDataFlow(int xsize,
+                                            int ysize,
+                                            hls::stream<float>& input,
+                                            float tilemax[64][64],
+                                            ap_uint<8> tileidx[64][64],
+                                            float glbxmax[1],
+                                            ap_uint<8> glbxidx[1],
+                                            float glbbmax[1],
+                                            ap_uint<8> glbbidx[1]) {
+#pragma HLS DATAFLOW
+    hls::stream<float> output1;
+#pragma HLS STREAM variable = output1 depth = 32
+
+    hls::stream<float> output2("cmap output2");
+#pragma HLS STREAM variable = output2 depth = 32
+
+    hls::stream<float> globalxmaxStrm;
+#pragma HLS STREAM variable = globalxmaxStrm depth = 2
+
+    hls::stream<ap_uint<8> > globalxidxStrm;
+#pragma HLS STREAM variable = globalxidxStrm depth = 2
+
+    hls::stream<float> globalbmaxStrm;
+#pragma HLS STREAM variable = globalbmaxStrm depth = 2
+
+    hls::stream<ap_uint<8> > globalbidxStrm;
+#pragma HLS STREAM variable = globalbidxStrm depth = 2
+
+    hls::stream<float> tilemaxStrm("cmap tilemaxStrm");
+#pragma HLS STREAM variable = tilemaxStrm depth = 32
+
+    hls::stream<ap_uint<8> > tileidxStrm("cmap tileidxStrm");
+#pragma HLS STREAM variable = tileidxStrm depth = 32
+
+    duplicate(xsize, ysize, input, output1, output2);
+    hls_globalCnt(xsize, ysize, output1, globalxmaxStrm, globalxidxStrm, globalbmaxStrm, globalbidxStrm);
+
+    hls_FindBestCorrelationCntDataFlow(xsize, ysize, output2, tilemaxStrm, tileidxStrm);
+
+    hls_FindBestCorrelationStore(xsize, ysize, tilemaxStrm, tileidxStrm, globalxmaxStrm, globalxidxStrm, globalbmaxStrm,
+                                 globalbidxStrm, tilemax, tileidx, glbxmax, glbxidx, glbbmax, glbbidx);
+}
+
+inline void hls_FindBestCorrelationforward(int xsize,
+                                           int ysize,
+                                           float tilemax[64][64],
+                                           ap_uint<8> tileidx[64][64],
+                                           float glbxmax[1],
+                                           ap_uint<8> glbxidx[1],
+                                           float glbbmax[1],
+                                           ap_uint<8> glbbidx[1],
+
+                                           hls::stream<ap_uint<8> >& cmapxStrm,
+                                           hls::stream<ap_uint<8> >& dcxStrm,
+                                           hls::stream<ap_uint<8> >& cmapbStrm,
+                                           hls::stream<ap_uint<8> >& dcbStrm) {
+    int tx = (xsize + 64 - 1) / 64;
+    int ty = (ysize + 64 - 1) / 64;
+
+    float acceptancex = -0.625f;
+    int N = 8;
+    int block_size = N * N;
+    size_t kColorTileDimInBlocks = 8;
+
+    float global_normalized_sumx = (float)glbxmax[0] / (xsize * ysize);
+    float normalized_acceptance = acceptancex * kColorTileDimInBlocks * kColorTileDimInBlocks * block_size;
+
+    dcxStrm.write(glbxidx[0]);
+    dcbStrm.write(glbbidx[0]);
+    for (int j = 0; j < ty; j++) {
+        for (int i = 0; i < tx; i++) {
+#pragma HLS pipeline ii = 1
+            if (tilemax[j][i] <= normalized_acceptance + global_normalized_sumx)
+                cmapxStrm.write(glbxidx[0]);
+            else
+                cmapxStrm.write(tileidx[j][i]);
+            cmapbStrm.write(glbbidx[0]);
+        }
+    }
+}
+
+inline void hls_FindBestCorrelation_v2(int xsize,
+                                       int ysize,
+                                       hls::stream<float>& input,
+                                       hls::stream<ap_uint<8> >& cmapxStrm,
+                                       hls::stream<ap_uint<8> >& dcxStrm,
+                                       hls::stream<ap_uint<8> >& cmapbStrm,
+                                       hls::stream<ap_uint<8> >& dcbStrm) {
+    float tilemax[64][64];
+    ap_uint<8> tileidx[64][64];
+    float glbxmax[1];
+    ap_uint<8> glbxidx[1];
+    float glbbmax[1];
+    ap_uint<8> glbbidx[1];
+
+    hls_FindBestCorrelationDataFlow(xsize, ysize, input, tilemax, tileidx, glbxmax, glbxidx, glbbmax, glbbidx);
+
+    hls_FindBestCorrelationforward(xsize, ysize, tilemax, tileidx, glbxmax, glbxidx, glbbmax, glbbidx, cmapxStrm,
+                                   dcxStrm, cmapbStrm, dcbStrm);
+}
+
+inline void k1XYBCalAddr(int xsize,
+                         int ysize,
+                         hls::stream<float>& input,
+                         hls::stream<float>& dataStrm,
+                         hls::stream<ap_uint<32> >& addrStrm) {
+    int x32 = (xsize + 32 - 1) / 32;
+    int y32 = (ysize + 32 - 1) / 32;
+    float xyb[2][96];
+#pragma HLS ARRAY_PARTITION variable = xyb complete dim = 0
+    bool ping = 0;
+
+    for (int y = 0; y < y32; y++) {
+        for (int j = 0; j < 32; j++) {
+            for (int x = 0; x < x32; x++) {
+                for (int i = 0; i < 32; i++) {
+                    for (int c = 0; c < 3; c++) {
+#pragma HLS pipeline II = 1
+                        if ((x * 32 + i) < xsize && (y * 32 + j) < ysize) {
+                            xyb[ping][i + c * 32] = input.read();
+                        }
+                        dataStrm.write(xyb[!ping][i * 3 + c]);
+                    }
+                }
+                ping = !ping;
+            }
+        }
+    }
+
+    for (int i = 0; i < 32; i++) {
+        for (int c = 0; c < 3; c++) {
+#pragma HLS pipeline II = 1
+            dataStrm.write(xyb[!ping][i * 3 + c]);
+        }
+    }
+}
+
+inline void k1XYBDDRCtrl(int xsize,
+                         int ysize,
+                         hls::stream<float>& dataStrm,
+                         hls::stream<ap_uint<32> >& addrStrm,
+                         ap_uint<32> axi_out[AXI_OUT]) {
+    float reg;
+    unsigned int reg_int;
+    ap_uint<32> reg_apint;
+
+    int x32 = (xsize + 32 - 1) / 32;
+    int y32 = (ysize + 32 - 1) / 32;
+    int n = x32 * 32 * y32;
+    ap_uint<32> addr;
+
+    for (int i = 0; i < 32; i++) {
+        for (int c = 0; c < 3; c++) {
+#pragma HLS pipeline II = 1
+            dataStrm.read();
+        }
+    }
+
+    for (int y = 0; y < y32; y++) {
+        for (int j = 0; j < 32; j++) {
+            for (int x = 0; x < x32; x++) {
+                for (int c = 0; c < 3; c++) {
+                    for (int i = 0; i < 32; i++) {
+#pragma HLS pipeline II = 1
+                        reg = dataStrm.read();
+                        reg_int = fToBits<float, unsigned int>(reg);
+                        reg_apint = reg_int;
+                        axi_out[c * x32 * y32 * 1024 + y * x32 * 1024 + x * 1024 + j * 32 + i] = reg_apint;
+                    }
+                }
+            }
+        }
+    }
+}
+
+inline void k1XYBWriteOut(int xsize, int ysize, hls::stream<float>& input, ap_uint<32> axi_out[AXI_OUT]) {
+#pragma HLS inline
+
+    hls::stream<float> dataStrm;
+#pragma HLS STREAM variable = dataStrm depth = 32
+    hls::stream<ap_uint<32> > addrStrm;
+#pragma HLS STREAM variable = addrStrm depth = 32
+
+    k1XYBCalAddr(xsize, ysize, input, dataStrm, addrStrm);
+
+    k1XYBDDRCtrl(xsize, ysize, dataStrm, addrStrm, axi_out);
+}
+
+inline void k1CmapWriteOut(int xsize,
+                           int ysize,
+                           hls::stream<ap_uint<8> >& cmapxStrm,
+                           hls::stream<ap_uint<8> >& dcxStrm,
+                           hls::stream<ap_uint<8> >& cmapbStrm,
+                           hls::stream<ap_uint<8> >& dcbStrm,
+                           ap_uint<32> axi_cmap[AXI_CMAP]) {
+    float reg;
+    unsigned int reg_int;
+
+    int cntCmap;
+    cntCmap = 2;
+    int x64 = (xsize + 64 - 1) / 64;
+    int y64 = (ysize + 64 - 1) / 64;
+    for (int i = 0; i < y64; i++) {
+        for (int j = 0; j < x64; j++) {
+#pragma HLS pipeline ii = 2
+            axi_cmap[cntCmap] = cmapxStrm.read();
+            cntCmap++;
+            axi_cmap[cntCmap] = cmapbStrm.read();
+            cntCmap++;
+        }
+    }
+    axi_cmap[0] = dcxStrm.read();
+    axi_cmap[1] = dcbStrm.read();
+}
+
+inline void dupxyb(
+    int xsize, int ysize, hls::stream<float> xybGabStrm[3], hls::stream<float>& dctin, hls::stream<float>& output) {
+    for (int y = 0; y < ysize; y++) {
+        for (int x = 0; x < xsize; x++) {
+#pragma HLS pipeline II = 3
+            float reg = xybGabStrm[0].read();
+            dctin.write(reg);
+            output.write(reg);
+            reg = xybGabStrm[1].read();
+            dctin.write(reg);
+            output.write(reg);
+            reg = xybGabStrm[2].read();
+            dctin.write(reg);
+            output.write(reg);
+        }
+    }
+}
+
+inline void loadToStrm(
+    int xsize, int ysize, hls::stream<DT> ostrm[3], hls::stream<bool> e_ostrm[3], hls::stream<float> rgbStrm[3]) {
+    for (size_t y = 0; y < ysize; ++y) {
+        for (size_t x = 0; x < xsize; x++) {
+#pragma HLS pipeline II = 1
+            e_ostrm[0].read();
+            e_ostrm[1].read();
+            e_ostrm[2].read();
+            rgbStrm[0].write(bitsToF<int, float>(ostrm[0].read()));
+            rgbStrm[1].write(bitsToF<int, float>(ostrm[1].read()));
+            rgbStrm[2].write(bitsToF<int, float>(ostrm[2].read()));
+        }
+    }
+
+    e_ostrm[0].read();
+    e_ostrm[1].read();
+    e_ostrm[2].read();
+}
+
+inline void kernel1_core(ap_uint<AXI_WIDTH> rbuf[BUF_DEPTH / 2],
+                         const int len[3],
+                         const int offsets[3],
+                         int xsize,
+                         int ysize,
+                         float quant_ac,
+                         ap_uint<32> axi_out[AXI_OUT],
+                         ap_uint<32> axi_cmap[AXI_CMAP],
+                         ap_uint<32> axi_qf[AXI_QF]) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    hls::stream<DT> ostrm[3];
+#pragma HLS STREAM variable = ostrm depth = 1024
+#pragma HLS ARRAY_PARTITION variable = ostrm complete dim = 0
+#pragma HLS RESOURCE variable = ostrm core = FIFO_BRAM
+
+    hls::stream<bool> e_ostrm[3];
+#pragma HLS STREAM variable = e_ostrm depth = 1024
+#pragma HLS ARRAY_PARTITION variable = e_ostrm complete dim = 0
+#pragma HLS RESOURCE variable = e_ostrm core = FIFO_LUTRAM
+
+    hls::stream<float> rgbStrm[3];
+#pragma HLS STREAM variable = rgbStrm depth = 1024
+#pragma HLS ARRAY_PARTITION variable = rgbStrm complete dim = 0
+#pragma HLS RESOURCE variable = rgbStrm core = FIFO_LUTRAM
+
+    hls::stream<float> xybStrm[3];
+#pragma HLS STREAM variable = xybStrm depth = 32
+#pragma HLS ARRAY_PARTITION variable = xybStrm complete dim = 0
+#pragma HLS RESOURCE variable = xybStrm core = FIFO_LUTRAM
+
+    hls::stream<float> xybGabStrm[3];
+#pragma HLS STREAM variable = xybGabStrm depth = 32
+#pragma HLS ARRAY_PARTITION variable = xybGabStrm complete dim = 0
+#pragma HLS RESOURCE variable = xybGabStrm core = FIFO_LUTRAM
+
+    hls::stream<float> yOrigStrm("yorig");
+#pragma HLS STREAM variable = yOrigStrm depth = 32
+#pragma HLS RESOURCE variable = yOrigStrm core = FIFO_LUTRAM
+
+    hls::stream<float> qfinStrm;
+#pragma HLS STREAM variable = qfinStrm depth = 32
+#pragma HLS RESOURCE variable = qfinStrm core = FIFO_LUTRAM
+
+    hls::stream<float> qfStrm;
+#pragma HLS STREAM variable = qfStrm depth = 32
+#pragma HLS RESOURCE variable = qfStrm core = FIFO_LUTRAM
+
+    hls::stream<float> avgStrm;
+#pragma HLS STREAM variable = avgStrm depth = 32
+#pragma HLS RESOURCE variable = avgStrm core = FIFO_LUTRAM
+
+    hls::stream<ap_uint<8> > cmapxStrm("cmapxStrm");
+#pragma HLS STREAM variable = cmapxStrm depth = 32
+#pragma HLS RESOURCE variable = cmapxStrm core = FIFO_LUTRAM
+
+    hls::stream<ap_uint<8> > dcxStrm("dcxStrm");
+#pragma HLS STREAM variable = dcxStrm depth = 32
+#pragma HLS RESOURCE variable = dcxStrm core = FIFO_LUTRAM
+
+    hls::stream<ap_uint<8> > cmapbStrm("cmapbStrm");
+#pragma HLS STREAM variable = cmapbStrm depth = 32
+#pragma HLS RESOURCE variable = cmapbStrm core = FIFO_LUTRAM
+
+    hls::stream<ap_uint<8> > dcbStrm("dcbStrm");
+#pragma HLS STREAM variable = dcbStrm depth = 32
+#pragma HLS RESOURCE variable = dcbStrm core = FIFO_LUTRAM
+
+    hls::stream<float> dctin("dctin");
+#pragma HLS STREAM variable = dctin depth = 32
+#pragma HLS RESOURCE variable = dctin core = FIFO_LUTRAM
+
+    hls::stream<float> dctout("dctout");
+#pragma HLS STREAM variable = dctout depth = 32
+#pragma HLS RESOURCE variable = dctout core = FIFO_LUTRAM
+
+    hls::stream<float> output("output");
+#pragma HLS STREAM variable = output depth = 32
+#pragma HLS RESOURCE variable = output core = FIFO_LUTRAM
+
+    static const int kResolution = 8;
+    const size_t out_xsize = (xsize + kResolution - 1) / kResolution;
+    const size_t out_ysize = (ysize + kResolution - 1) / kResolution;
+
+    xf::common::utils_hw::axiToMultiStream<1024, AXI_WIDTH, DT, DT, DT>(rbuf, ostrm[0], e_ostrm[0], ostrm[1],
+                                                                        e_ostrm[1], ostrm[2], e_ostrm[2], len, offsets);
+
+    loadToStrm(xsize, ysize, ostrm, e_ostrm, rgbStrm);
+
+    hls_OpsinDynamicsImage(rgbStrm, xsize, ysize, xybStrm, yOrigStrm);
+
+    hls_GaborishInverse(xybStrm, xsize, ysize, xybGabStrm);
+
+    initQFStrm(xsize, ysize, 0.11883287948847132, quant_ac, yOrigStrm, qfStrm, avgStrm);
+
+    QFWriteOut(out_xsize, out_ysize, qfStrm, avgStrm, axi_qf);
+
+    dupxyb(out_xsize * 8, // opsin.xsize(),
+           out_ysize * 8, // opsin.ysize(),
+           xybGabStrm, dctin, output);
+
+    hls_dct2DCmap(out_xsize * 8, // opsin.xsize(),
+                  out_ysize * 8, // opsin.ysize(),
+                  dctin, dctout);
+
+    hls_FindBestCorrelation_v2(out_xsize * 8, // opsin.xsize(),
+                               out_ysize * 8, // opsin.ysize(),
+                               dctout, cmapxStrm, dcxStrm, cmapbStrm, dcbStrm);
+
+    k1CmapWriteOut(out_xsize * 8, // opsin.xsize(),
+                   out_ysize * 8, // opsin.ysize(),
+                   cmapxStrm, dcxStrm, cmapbStrm, dcbStrm, axi_cmap);
+
+    k1XYBWriteOut(out_xsize * 8, // opsin.xsize(),
+                  out_ysize * 8, // opsin.ysize(),
+                  output, axi_out);
+}
+
+inline void loadConfig(
+    ap_uint<32> config[MAX_NUM_CONFIG], int len[3], int offsets[3], int& xsize, int& ysize, float& quant_ac) {
+#pragma HLS INLINE off
+
+    len[0] = config[0];
+    len[1] = config[1];
+    len[2] = config[2];
+
+    offsets[0] = config[3];
+    offsets[1] = config[4];
+    offsets[2] = config[5];
+
+    xsize = config[6];
+    ysize = config[7];
+    int32_t quant_ac_tmp = config[8];
+    quant_ac = bitsToF<int32_t, float>(quant_ac_tmp);
+}
+
+namespace xf {
+namespace codec {
+// ------------------------------------------------------------
+/**
+ * @brief Level 2 : kernel1 implement for pik
+ *
+ * @param config control signals, such as image size information, stream length and offsets of each colors.
+ * @param rbuf the input RGB data, alligned by different colors.
+ * @param axi_out the output of XYB data generated by color transformation of RGB.
+ * @param axi_cmap color correlation map, for adjust color in AC quantization.
+ * @param axi_qf information of the quant-field, for choosing correct quant-table in AC quantization.
+ *
+ */
+extern "C" void pikEncKernel1Top(ap_uint<32> config[MAX_NUM_CONFIG],
+                                 ap_uint<AXI_WIDTH> rbuf[BUF_DEPTH / 2],
+                                 ap_uint<32> axi_out[AXI_OUT],
+                                 ap_uint<32> axi_cmap[AXI_CMAP],
+                                 ap_uint<32> axi_qf[AXI_QF]);
+} // namespace codec
+} // namespace xf
+#endif //_XF_CODEC_XACCPIKKERNEL1_HPP_
diff --git a/codec/L2/include/hw/pikEnc/XAccPIKKernel2.hpp b/codec/L2/include/hw/pikEnc/XAccPIKKernel2.hpp
new file mode 100644
index 0000000000..ac1c19d103
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/XAccPIKKernel2.hpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file XAccPIKKernel2.hpp
+ */
+
+#ifndef _XF_CODEC_XACCPIKKERNEL2_HPP_
+#define _XF_CODEC_XACCPIKKERNEL2_HPP_
+
+#include <ap_int.h>
+#include <hls_math.h>
+#include <hls_stream.h>
+
+#include "pik_common.hpp"
+
+struct Config {
+    uint32_t xsize;
+    uint32_t ysize;
+    uint32_t xblock8;
+    uint32_t yblock8;
+    uint32_t xblock32;
+    uint32_t yblock32;
+    uint32_t xgroup;
+    uint32_t ygroup;
+
+    int in_quant_field_num;
+    int cmap_num0;
+    int cmap_num1;
+    int ac_num;
+    int dc_num;
+    int acs_num;
+    int out_quant_field_num;
+
+    bool kChooseAcStrategy;
+    float discretization_factor;
+    float kMulInhomogeneity16x16;
+    float kMulInhomogeneity32x32;
+    float butteraugli_target;
+    float intensity_multiplier;
+    float quant_dc;
+
+    int src_num[3];
+    int src_offset[3];
+};
+
+struct Quantizer {
+    int quant_dc;
+    int global_scale;
+    float global_scale_float;
+    float inv_global_scale;
+    float inv_quant_dc;
+};
+
+namespace xf {
+namespace codec {
+// ------------------------------------------------------------
+/**
+ * @brief Level 2 : kernel2 implement for pik
+ *
+ * @param config control signals, such as image size information, stream length and offsets of each colors.
+ * @param src the input XYB data, alligned by different colors and stored in 32x32 of rectangular block order.
+ * @param quant_field_in information of the floating point quant-field, for choosing correct quant-table in AC
+ * quantization.
+ * @param cmap color correlation map, for adjust color in AC quantization.
+ * @param ac output quantized AC stream.
+ * @param dc output quantized DC stream.
+ * @param quant_field_out output information of quantized quant-field, it indicate the correct quant-table for decoder.
+ * @param ac_strategy output AC strategy is the information about DCT size of each 8x8 block in image.
+ * @param block output block show the beginning position of each DCT computation.
+ * @param order output information of order is used for zigzag encoding in kernel3.
+ *
+ */
+
+extern "C" void pikEncKernel2Top(ap_uint<AXI_SZ> config[MAX_NUM_CONFIG],
+
+                                 ap_uint<2 * AXI_SZ> src[AXI_OUT / 2],
+                                 ap_uint<AXI_SZ> quant_field_in[AXI_QF],
+                                 ap_uint<AXI_SZ> cmap[AXI_CMAP],
+
+                                 ap_uint<AXI_SZ> ac[MAX_NUM_AC],
+                                 ap_uint<AXI_SZ> dc[MAX_NUM_DC],
+                                 ap_uint<AXI_SZ> quant_field_out[AXI_QF],
+                                 ap_uint<AXI_SZ> ac_strategy[MAX_NUM_BLOCK88],
+                                 ap_uint<AXI_SZ> block[MAX_NUM_BLOCK88],
+                                 ap_uint<AXI_SZ> order[MAX_NUM_ORDER]);
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/include/hw/pikEnc/XAccPIKKernel3.hpp b/codec/L2/include/hw/pikEnc/XAccPIKKernel3.hpp
new file mode 100644
index 0000000000..dd95bb4afc
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/XAccPIKKernel3.hpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file XAccPIKKernel3.hpp
+ */
+
+#ifndef _XF_CODEC_XACCPIKKERNEL3_HPP_
+#define _XF_CODEC_XACCPIKKERNEL3_HPP_
+
+#include "kernel3/ac_tokenize.hpp"
+#include "kernel3/ans.hpp"
+#include "kernel3/build_table_encode_histo.hpp"
+
+#include "kernel3/build_cluster.hpp"
+#include "kernel3/ctrl_tokenize.hpp"
+#include "kernel3/dc_shrink.hpp"
+#include "kernel3/dc_tokenize.hpp"
+
+#include "pik_common.hpp"
+
+struct ConfigKernel3 {
+    uint32_t xsize;
+    uint32_t ysize;
+    uint32_t xblock8;
+    uint32_t yblock8;
+    uint32_t xblock32;
+    uint32_t yblock32;
+    uint32_t xblock64;
+    uint32_t yblock64;
+    uint32_t ac_xgroup;
+    uint32_t ac_ygroup;
+    uint32_t dc_xgroup;
+    uint32_t dc_ygroup;
+    uint32_t ac_group;
+    uint32_t dc_group;
+    uint32_t num_dc;
+    uint32_t num_ac;
+};
+
+#ifndef __SYNTHESIS__
+#define DEBUG (1)
+#define DEBUGAXItoPikAcStream
+#define DEBUGAXItoStream
+#endif
+
+namespace xf {
+namespace codec {
+// ------------------------------------------------------------
+/**
+ * @brief Level 2 : kernel3 implement for pik
+ *
+ * @param config control signals, such as image size information, stream length and offsets of each colors.
+ * @param ddr_ac input quantized AC stream.
+ * @param ddr_dc input quantized DC stream.
+ * @param ddr_quant_field input of quant-table information for decoder
+ * @param ddr_ac_strategy input information of DCT size of each small block8x8.
+ * @param ddr_block input information of DCT starting position.
+ * @param hls_order input information of encoding order.
+ * @param histo_cfg output config for AC and DC histo lengths.
+ * @param dc_histo_code_out encoding result of DC histo.
+ * @param dc_code_out encoding result of DC token.
+ * @param ac_histo_code_out the encoding result of AC histo.
+ * @param ac_code_out encoding result of AC token.
+ *
+ */
+
+extern "C" void pikEncKernel3Top(ap_uint<32>* config,
+
+                                 ap_uint<32>* ddr_ac,
+                                 ap_uint<32>* ddr_dc,
+                                 ap_uint<32>* ddr_quant_field,
+                                 ap_uint<32>* ddr_ac_strategy,
+                                 ap_uint<32>* ddr_block,
+                                 ap_uint<32>* hls_order,
+
+                                 ap_uint<32>* histo_cfg,
+                                 ap_uint<32>* dc_histo_code_out,
+                                 ap_uint<32>* dc_code_out,
+                                 ap_uint<32>* ac_histo_code_out,
+                                 ap_uint<32>* ac_code_out);
+} // namespace codec
+} // namespace xf
+#endif
diff --git a/codec/L2/include/hw/pikEnc/dct.hpp b/codec/L2/include/hw/pikEnc/dct.hpp
new file mode 100644
index 0000000000..6214748738
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/dct.hpp
@@ -0,0 +1,1591 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file dct.hpp
+ */
+
+#ifndef _XF_CODEC_DCT_HPP_
+#define _XF_CODEC_DCT_HPP_
+
+#ifndef __cplusplus
+#error "Vitis Codec Library only works with C++."
+#endif
+
+#include <ap_int.h>
+#include <hls_stream.h>
+
+const float kDCTScales2[2] = {0.707106781186547524f, 0.707106781186547524f};
+
+const float kIDCTScales2[2] = {0.707106781186547524f, 0.707106781186547524f};
+
+const float kDCTScales4[4] = {0.5f, 0.653281482438188264f, 0.5f, 0.270598050073098492f};
+
+const float kIDCTScales4[4] = {0.5f, 0.382683432365089772f, 0.5f, 0.923879532511286756f};
+
+const float kDCTScales8[8] = {0.353553390593273762f, 0.254897789552079584f, 0.270598050073098492f,
+                              0.30067244346752264f,  0.353553390593273762f, 0.449988111568207852f,
+                              0.653281482438188264f, 1.28145772387075309f};
+
+const float kIDCTScales8[8] = {0.353553390593273762f, 0.490392640201615225f, 0.461939766255643378f,
+                               0.415734806151272619f, 0.353553390593273762f, 0.277785116509801112f,
+                               0.191341716182544886f, 0.0975451610080641339f};
+
+const float kIDCTScales16[16] = {0.25f,
+                                 0.177632042131274808f,
+                                 0.180239955501736978f,
+                                 0.184731156892216368f,
+                                 0.191341716182544886f,
+                                 0.200444985785954314f,
+                                 0.212607523691814112f,
+                                 0.228686034616512494f,
+                                 0.25f,
+                                 0.278654739432954475f,
+                                 0.318189645143208485f,
+                                 0.375006192208515097f,
+                                 0.461939766255643378f,
+                                 0.608977011699708658f,
+                                 0.906127446352887843f,
+                                 1.80352839005774887f};
+
+const float kDCTScales16[16] = {0.25f,
+                                0.351850934381595615f,
+                                0.346759961330536865f,
+                                0.33832950029358817f,
+                                0.326640741219094132f,
+                                0.311806253246667808f,
+                                0.293968900604839679f,
+                                0.273300466750439372f,
+                                0.25f,
+                                0.224291896585659071f,
+                                0.196423739596775545f,
+                                0.166663914619436624f,
+                                0.135299025036549246f,
+                                0.102631131880589345f,
+                                0.0689748448207357531f,
+                                0.0346542922997728657f};
+
+const float kIDCTScales32[32] = {
+    0.176776695296636881f, 0.125150749558799075f, 0.125604821547038926f, 0.126367739974385915f, 0.127448894776039792f,
+    0.128861827480656137f, 0.13062465373492222f,  0.132760647772446044f, 0.135299025036549246f, 0.138275974008611132f,
+    0.141736008704089426f, 0.145733742051533468f, 0.15033622173376132f,  0.155626030758916204f, 0.161705445839997532f,
+    0.168702085363751436f, 0.176776695296636881f, 0.186134067750574612f, 0.197038655862812556f, 0.20983741135388176f,
+    0.224994055784103926f, 0.243142059465490173f, 0.265169421497586868f, 0.292359983358221239f, 0.326640741219094132f,
+    0.371041154078541569f, 0.430611774559583482f, 0.514445252488352888f, 0.640728861935376545f, 0.851902104617179697f,
+    1.27528715467229096f,  2.5475020308870142f};
+
+const float kDCTScales32[32] = {
+    0.176776695296636881f,  0.249698864051293098f,  0.248796181668049222f,  0.247294127491195243f,
+    0.245196320100807612f,  0.242507813298635998f,  0.239235083933052216f,  0.235386016295755195f,
+    0.230969883127821689f,  0.225997323280860833f,  0.220480316087088757f,  0.214432152500068017f,
+    0.207867403075636309f,  0.200801882870161227f,  0.19325261334068424f,   0.185237781338739773f,
+    0.176776695296636881f,  0.1678897387117546f,    0.158598321040911375f,  0.148924826123108336f,
+    0.138892558254900556f,  0.128525686048305432f,  0.117849184206499412f,  0.106888773357570524f,
+    0.0956708580912724429f, 0.0842224633480550127f, 0.0725711693136155919f, 0.0607450449758159725f,
+    0.048772580504032067f,  0.0366826186138404379f, 0.0245042850823901505f, 0.0122669185818545036f};
+
+const float kL1Norm2[2] = {
+    1.0000000000000000000f, 1.0000000000000000000f,
+};
+
+const float kL1Norm4[4] = {
+    1.0000000000000000000f, //
+    0.9238795325112867561f, // cos(pi/8)
+    1.0000000000000000000f, //
+    0.9238795325112867561f, // cos(pi/8)
+};
+
+const float kL1Norm8[8] = {
+    1.0000000000000000000f, //
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9238795325112867561f, // cos(pi/8)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    1.0000000000000000000f, //
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9238795325112867561f, // cos(pi/8)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+
+};
+
+const float kL1Norm16[16] = {
+    1.0000000000000000000f, //
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9238795325112867561f, // cos(pi/8)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    1.0000000000000000000f, //
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9238795325112867561f, // cos(pi/8)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+};
+
+const float kL1Norm32[32] = {
+    1.0000000000000000000f, //
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9238795325112867561f, // cos(pi/8)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    1.0000000000000000000f, //
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9238795325112867561f, // cos(pi/8)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9061274463528878431f, // cos(pi/8) * cos(pi/16)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+    0.9017641950288744354f, // cos(pi/8) * cos(pi/16) * cos(pi/32)
+    0.9006779805633546924f, // cos(pi/8) * cos(pi/16) * cos(pi/32) * cos(pi/64)
+};
+
+const float kL1NormInv2[2] = {
+    1.000000000000000000f, 1.000000000000000000f,
+};
+
+const float kL1NormInv4[4] = {
+    1.000000000000000000f, 1.082392200292393968f, 1.000000000000000000f, 1.082392200292393968f,
+};
+
+const float kL1NormInv8[8] = {
+    1.000000000000000000f, 1.103597517131772049f, 1.082392200292393968f, 1.103597517131772049f,
+    1.000000000000000000f, 1.103597517131772049f, 1.082392200292393968f, 1.103597517131772049f,
+};
+
+const float kL1NormInv16[16] = {
+    1.000000000000000000f, 1.108937353592731700f, 1.103597517131772049f, 1.108937353592731700f,
+    1.082392200292393968f, 1.108937353592731700f, 1.103597517131772049f, 1.108937353592731700f,
+    1.000000000000000000f, 1.108937353592731700f, 1.103597517131772049f, 1.108937353592731700f,
+    1.082392200292393968f, 1.108937353592731700f, 1.103597517131772049f, 1.108937353592731700f,
+};
+
+const float kL1NormInv32[32] = {
+    1.000000000000000000, 1.110274728127050414, 1.108937353592731379, 1.110274728127050414, 1.103597517131772010,
+    1.110274728127050636, 1.108937353592731379, 1.110274728127050414, 1.082392200292393580, 1.110274728127050414,
+    1.108937353592730934, 1.110274728127050414, 1.103597517131771788, 1.110274728127050414, 1.108937353592731156,
+    1.110274728127050414, 0.999999999999999556, 1.110274728127049970, 1.108937353592731601, 1.110274728127051080,
+    1.103597517131771788, 1.110274728127050414, 1.108937353592732045, 1.110274728127050192, 1.082392200292394691,
+    1.110274728127049526, 1.108937353592733155, 1.110274728127050858, 1.103597517131772232, 1.110274728127051969,
+    1.108937353592732933, 1.110274728127050414,
+};
+
+template <int N>
+float DCTScales(int x) {
+#pragma HLS inline
+
+    return N == 2
+               ? kDCTScales2[x]
+               : (N == 4 ? kDCTScales4[x] : (N == 8 ? kDCTScales8[x] : (N == 16 ? kDCTScales16[x] : kDCTScales32[x])));
+}
+
+template <int N>
+float IDCTScales(int x) {
+#pragma HLS inline
+
+    return N == 2 ? kIDCTScales2[x]
+                  : (N == 4 ? kIDCTScales4[x]
+                            : (N == 8 ? kIDCTScales8[x] : (N == 16 ? kIDCTScales16[x] : kIDCTScales32[x])));
+}
+
+template <int N>
+float L1Norm(int x) {
+#pragma HLS inline
+
+    return N == 2 ? kL1Norm2[x]
+                  : (N == 4 ? kL1Norm4[x] : (N == 8 ? kL1Norm8[x] : (N == 16 ? kL1Norm16[x] : kL1Norm32[x])));
+}
+
+template <int N>
+float L1NormInv(int x) {
+#pragma HLS inline
+
+    return N == 2
+               ? kL1NormInv2[x]
+               : (N == 4 ? kL1NormInv4[x] : (N == 8 ? kL1NormInv8[x] : (N == 16 ? kL1NormInv16[x] : kL1NormInv32[x])));
+}
+
+template <int N>
+float DCTTotalScale(int x, int y) {
+    return N * DCTScales<N>(x) * DCTScales<N>(y) * L1NormInv<N>(x) * L1NormInv<N>(y);
+}
+
+template <int N>
+float DCTInvTotalScale(int x, int y) {
+    return N * IDCTScales<N>(x) * IDCTScales<N>(y) * L1Norm<N>(x) * L1Norm<N>(y);
+}
+
+template <bool scale>
+void dct4_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    const float c2_8 = 1.414213562373095048f; // 2 * cos(2 * pi / 8)
+
+    for (ap_uint<8> by = 0; by < 8; by++) {
+        for (ap_uint<8> bx = 0; bx < 8; bx++) {
+            for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS LOOP_FLATTEN off
+#pragma HLS pipeline
+
+                float i0 = in[(by(2, 0), (ap_uint<2>)0, bx(2, 0), x(1, 0))];
+                float i1 = in[(by(2, 0), (ap_uint<2>)1, bx(2, 0), x(1, 0))];
+                float i2 = in[(by(2, 0), (ap_uint<2>)2, bx(2, 0), x(1, 0))];
+                float i3 = in[(by(2, 0), (ap_uint<2>)3, bx(2, 0), x(1, 0))];
+
+                float t0 = i0 + i3;
+                float t1 = i1 + i2;
+                float t2 = i0 - i3;
+                float t3 = i1 - i2;
+
+                float t4 = t0 + t1;
+                float t5 = t0 - t1;
+                float t6 = t2 - t3;
+                float t7 = t3 * c2_8;
+                float t8 = t6 + t7;
+                float t9 = t6 - t7;
+
+                if (scale) {
+                    out[(by(2, 0), (ap_uint<2>)0, bx(2, 0), x(1, 0))] = t4 / 16;
+                    out[(by(2, 0), (ap_uint<2>)1, bx(2, 0), x(1, 0))] = t8 / 16;
+                    out[(by(2, 0), (ap_uint<2>)2, bx(2, 0), x(1, 0))] = t5 / 16;
+                    out[(by(2, 0), (ap_uint<2>)3, bx(2, 0), x(1, 0))] = t9 / 16;
+                } else {
+                    out[(by(2, 0), (ap_uint<2>)0, bx(2, 0), x(1, 0))] = t4;
+                    out[(by(2, 0), (ap_uint<2>)1, bx(2, 0), x(1, 0))] = t8;
+                    out[(by(2, 0), (ap_uint<2>)2, bx(2, 0), x(1, 0))] = t5;
+                    out[(by(2, 0), (ap_uint<2>)3, bx(2, 0), x(1, 0))] = t9;
+                }
+
+#ifdef DEBUF_DCT
+                std::cout << "dc4_block: by=" << by << " bx=" << bx << " i0=" << i0 << " i1=" << i1 << " i2=" << i2
+                          << " i3=" << i3 << std::endl;
+                std::cout << "dc4_block: by=" << by << " bx=" << bx << " o0=" << t4 << " o1=" << t8 << " o2=" << t5
+                          << " o3=" << t9 << std::endl;
+#endif
+            }
+        }
+    }
+}
+
+template <bool scale>
+void dct8_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    float c1 = 0.707106781186548f; // 1 / sqrt(2)
+    float c2 = 0.382683432365090f; // cos(3 * pi / 8)
+    float c3 = 1.30656296487638f;  // 1 / (2 * cos(3 * pi / 8))
+    float c4 = 0.541196100146197f; // sqrt(2) * cos(3 * pi / 8)
+
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS LOOP_FLATTEN off
+#pragma HLS pipeline
+
+                float t00 =
+                    in[(by(1, 0), (ap_uint<3>)0, bx(1, 0), x(2, 0))] + in[(by(1, 0), (ap_uint<3>)7, bx(1, 0), x(2, 0))];
+                float t01 =
+                    in[(by(1, 0), (ap_uint<3>)0, bx(1, 0), x(2, 0))] - in[(by(1, 0), (ap_uint<3>)7, bx(1, 0), x(2, 0))];
+                float t02 =
+                    in[(by(1, 0), (ap_uint<3>)3, bx(1, 0), x(2, 0))] + in[(by(1, 0), (ap_uint<3>)4, bx(1, 0), x(2, 0))];
+                float t03 =
+                    in[(by(1, 0), (ap_uint<3>)3, bx(1, 0), x(2, 0))] - in[(by(1, 0), (ap_uint<3>)4, bx(1, 0), x(2, 0))];
+                float t04 =
+                    in[(by(1, 0), (ap_uint<3>)2, bx(1, 0), x(2, 0))] + in[(by(1, 0), (ap_uint<3>)5, bx(1, 0), x(2, 0))];
+                float t05 =
+                    in[(by(1, 0), (ap_uint<3>)2, bx(1, 0), x(2, 0))] - in[(by(1, 0), (ap_uint<3>)5, bx(1, 0), x(2, 0))];
+                float t06 =
+                    in[(by(1, 0), (ap_uint<3>)1, bx(1, 0), x(2, 0))] + in[(by(1, 0), (ap_uint<3>)6, bx(1, 0), x(2, 0))];
+                float t07 =
+                    in[(by(1, 0), (ap_uint<3>)1, bx(1, 0), x(2, 0))] - in[(by(1, 0), (ap_uint<3>)6, bx(1, 0), x(2, 0))];
+
+                float t08 = t00 + t02;
+                float t09 = t00 - t02;
+                float t10 = t06 + t04;
+                float t11 = t06 - t04;
+                float t12 = t07 + t05;
+                float t13 = t01 + t07;
+                float t14 = t05 + t03;
+
+                float t15 = t11 + t09;
+                float t16 = t14 - t13;
+
+                float t17 = c1 * t15;
+                float t18 = c1 * t12;
+                float t19 = c2 * t16;
+                float t20 = c3 * t13;
+                float t21 = c4 * t14;
+
+                float t22 = t20 + t19;
+                float t23 = t21 + t19;
+                float t24 = t01 + t18;
+                float t25 = t01 - t18;
+
+                float t26 = t08 + t10;
+                float t27 = t24 + t22;
+                float t28 = t09 + t17;
+                float t29 = t25 - t23;
+                float t30 = t08 - t10;
+                float t31 = t25 + t23;
+                float t32 = t09 - t17;
+                float t33 = t24 - t22;
+
+                if (scale) {
+                    out[(by(1, 0), (ap_uint<3>)0, bx(1, 0), x(2, 0))] = t26 / 64;
+                    out[(by(1, 0), (ap_uint<3>)1, bx(1, 0), x(2, 0))] = t27 / 64;
+                    out[(by(1, 0), (ap_uint<3>)2, bx(1, 0), x(2, 0))] = t28 / 64;
+                    out[(by(1, 0), (ap_uint<3>)3, bx(1, 0), x(2, 0))] = t29 / 64;
+                    out[(by(1, 0), (ap_uint<3>)4, bx(1, 0), x(2, 0))] = t30 / 64;
+                    out[(by(1, 0), (ap_uint<3>)5, bx(1, 0), x(2, 0))] = t31 / 64;
+                    out[(by(1, 0), (ap_uint<3>)6, bx(1, 0), x(2, 0))] = t32 / 64;
+                    out[(by(1, 0), (ap_uint<3>)7, bx(1, 0), x(2, 0))] = t33 / 64;
+                } else {
+                    out[(by(1, 0), (ap_uint<3>)0, bx(1, 0), x(2, 0))] = t26;
+                    out[(by(1, 0), (ap_uint<3>)1, bx(1, 0), x(2, 0))] = t27;
+                    out[(by(1, 0), (ap_uint<3>)2, bx(1, 0), x(2, 0))] = t28;
+                    out[(by(1, 0), (ap_uint<3>)3, bx(1, 0), x(2, 0))] = t29;
+                    out[(by(1, 0), (ap_uint<3>)4, bx(1, 0), x(2, 0))] = t30;
+                    out[(by(1, 0), (ap_uint<3>)5, bx(1, 0), x(2, 0))] = t31;
+                    out[(by(1, 0), (ap_uint<3>)6, bx(1, 0), x(2, 0))] = t32;
+                    out[(by(1, 0), (ap_uint<3>)7, bx(1, 0), x(2, 0))] = t33;
+                }
+            }
+        }
+    }
+}
+
+template <bool scale>
+void dct16_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    float c1_16 = 1.9615705608064609f;  // 2 * cos(1 * pi / 16)
+    float c2_16 = 1.8477590650225735f;  // 2 * cos(2 * pi / 16)
+    float c3_16 = 1.6629392246050905f;  // 2 * cos(3 * pi / 16)
+    float c4_16 = 1.4142135623730951f;  // 2 * cos(4 * pi / 16)
+    float c5_16 = 1.1111404660392046f;  // 2 * cos(5 * pi / 16)
+    float c6_16 = 0.7653668647301797f;  // 2 * cos(6 * pi / 16)
+    float c7_16 = 0.39018064403225666f; // 2 * cos(7 * pi / 16)
+
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+            for (ap_uint<8> x = 0; x < 16; x++) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS LOOP_FLATTEN off
+#pragma HLS pipeline
+
+                float t00 = in[((ap_uint<1>)by[0], (ap_uint<4>)0, (ap_uint<1>)bx[0], x(3, 0))] +
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)15, (ap_uint<1>)bx[0], x(3, 0))];
+                float t01 = in[((ap_uint<1>)by[0], (ap_uint<4>)1, (ap_uint<1>)bx[0], x(3, 0))] +
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)14, (ap_uint<1>)bx[0], x(3, 0))];
+                float t02 = in[((ap_uint<1>)by[0], (ap_uint<4>)2, (ap_uint<1>)bx[0], x(3, 0))] +
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)13, (ap_uint<1>)bx[0], x(3, 0))];
+                float t03 = in[((ap_uint<1>)by[0], (ap_uint<4>)3, (ap_uint<1>)bx[0], x(3, 0))] +
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)12, (ap_uint<1>)bx[0], x(3, 0))];
+                float t04 = in[((ap_uint<1>)by[0], (ap_uint<4>)4, (ap_uint<1>)bx[0], x(3, 0))] +
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)11, (ap_uint<1>)bx[0], x(3, 0))];
+                float t05 = in[((ap_uint<1>)by[0], (ap_uint<4>)5, (ap_uint<1>)bx[0], x(3, 0))] +
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)10, (ap_uint<1>)bx[0], x(3, 0))];
+                float t06 = in[((ap_uint<1>)by[0], (ap_uint<4>)6, (ap_uint<1>)bx[0], x(3, 0))] +
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)9, (ap_uint<1>)bx[0], x(3, 0))];
+                float t07 = in[((ap_uint<1>)by[0], (ap_uint<4>)7, (ap_uint<1>)bx[0], x(3, 0))] +
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)8, (ap_uint<1>)bx[0], x(3, 0))];
+                float t08 = in[((ap_uint<1>)by[0], (ap_uint<4>)0, (ap_uint<1>)bx[0], x(3, 0))] -
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)15, (ap_uint<1>)bx[0], x(3, 0))];
+                float t09 = in[((ap_uint<1>)by[0], (ap_uint<4>)1, (ap_uint<1>)bx[0], x(3, 0))] -
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)14, (ap_uint<1>)bx[0], x(3, 0))];
+                float t10 = in[((ap_uint<1>)by[0], (ap_uint<4>)2, (ap_uint<1>)bx[0], x(3, 0))] -
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)13, (ap_uint<1>)bx[0], x(3, 0))];
+                float t11 = in[((ap_uint<1>)by[0], (ap_uint<4>)3, (ap_uint<1>)bx[0], x(3, 0))] -
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)12, (ap_uint<1>)bx[0], x(3, 0))];
+                float t12 = in[((ap_uint<1>)by[0], (ap_uint<4>)4, (ap_uint<1>)bx[0], x(3, 0))] -
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)11, (ap_uint<1>)bx[0], x(3, 0))];
+                float t13 = in[((ap_uint<1>)by[0], (ap_uint<4>)5, (ap_uint<1>)bx[0], x(3, 0))] -
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)10, (ap_uint<1>)bx[0], x(3, 0))];
+                float t14 = in[((ap_uint<1>)by[0], (ap_uint<4>)6, (ap_uint<1>)bx[0], x(3, 0))] -
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)9, (ap_uint<1>)bx[0], x(3, 0))];
+                float t15 = in[((ap_uint<1>)by[0], (ap_uint<4>)7, (ap_uint<1>)bx[0], x(3, 0))] -
+                            in[((ap_uint<1>)by[0], (ap_uint<4>)8, (ap_uint<1>)bx[0], x(3, 0))];
+
+                float t16 = t00 + t07;
+                float t17 = t01 + t06;
+                float t18 = t02 + t05;
+                float t19 = t03 + t04;
+                float t20 = t00 - t07;
+                float t21 = t01 - t06;
+                float t22 = t02 - t05;
+                float t23 = t03 - t04;
+                float t24 = t16 + t19;
+                float t25 = t17 + t18;
+                float t26 = t16 - t19;
+                float t27 = t17 - t18;
+                float t30 = t26 - t27;
+                float t31 = t27 * c4_16;
+                float t34 = t20 - t23;
+                float t35 = t21 - t22;
+                float t36 = t22 * c4_16;
+                float t37 = t23 * c4_16;
+                float t38 = t34 + t36;
+                float t39 = t35 + t37;
+                float t40 = t34 - t36;
+                float t41 = t35 - t37;
+                float t42 = t38 - t39;
+                float t43 = t39 * c2_16;
+                float t46 = t40 - t41;
+                float t47 = t41 * c6_16;
+                float t50 = t08 - t15;
+                float t51 = t09 - t14;
+                float t52 = t10 - t13;
+                float t53 = t11 - t12;
+                float t54 = t12 * c4_16;
+                float t55 = t13 * c4_16;
+                float t56 = t14 * c4_16;
+                float t57 = t15 * c4_16;
+                float t58 = t50 + t54;
+                float t59 = t51 + t55;
+                float t60 = t52 + t56;
+                float t61 = t53 + t57;
+                float t62 = t50 - t54;
+                float t63 = t51 - t55;
+                float t64 = t52 - t56;
+                float t65 = t53 - t57;
+                float t66 = t58 - t61;
+                float t67 = t59 - t60;
+                float t68 = t60 * c2_16;
+                float t69 = t61 * c2_16;
+                float t70 = t66 + t68;
+                float t71 = t67 + t69;
+                float t72 = t66 - t68;
+                float t73 = t67 - t69;
+                float t74 = t70 - t71;
+                float t75 = t71 * c1_16;
+                float t78 = t72 - t73;
+                float t79 = t73 * c7_16;
+                float t82 = t62 - t65;
+                float t83 = t63 - t64;
+                float t84 = t64 * c6_16;
+                float t85 = t65 * c6_16;
+                float t86 = t82 + t84;
+                float t87 = t83 + t85;
+                float t88 = t82 - t84;
+                float t89 = t83 - t85;
+                float t90 = t86 - t87;
+                float t91 = t87 * c3_16;
+                float t94 = t88 - t89;
+                float t95 = t89 * c5_16;
+
+                float t96 = t24 + t25;
+                float t97 = t24 - t25;
+                float t98 = t30 + t31;
+                float t99 = t30 - t31;
+                float t100 = t42 + t43;
+                float t101 = t42 - t43;
+                float t102 = t46 + t47;
+                float t103 = t46 - t47;
+                float t104 = t74 + t75;
+                float t105 = t74 - t75;
+                float t106 = t78 + t79;
+                float t107 = t78 - t79;
+                float t108 = t90 + t91;
+                float t109 = t90 - t91;
+                float t110 = t94 + t95;
+                float t111 = t94 - t95;
+
+#ifdef DEBUG_DCT
+                std::cout << "t0=" << t00 << std::endl;
+                std::cout << "t1=" << t01 << std::endl;
+                std::cout << "t2=" << t02 << std::endl;
+                std::cout << "t3=" << t03 << std::endl;
+                std::cout << "t4=" << t04 << std::endl;
+                std::cout << "t5=" << t05 << std::endl;
+                std::cout << "t6=" << t06 << std::endl;
+                std::cout << "t7=" << t07 << std::endl;
+                std::cout << "t8=" << t08 << std::endl;
+                std::cout << "t9=" << t09 << std::endl;
+                std::cout << "t10=" << t10 << std::endl;
+                std::cout << "t11=" << t11 << std::endl;
+                std::cout << "t12=" << t12 << std::endl;
+                std::cout << "t13=" << t13 << std::endl;
+                std::cout << "t14=" << t14 << std::endl;
+                std::cout << "t15=" << t15 << std::endl;
+#endif
+
+                if (scale) {
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)0, (ap_uint<1>)bx[0], x(3, 0))] = t96 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)8, (ap_uint<1>)bx[0], x(3, 0))] = t97 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)4, (ap_uint<1>)bx[0], x(3, 0))] = t98 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)12, (ap_uint<1>)bx[0], x(3, 0))] = t99 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)2, (ap_uint<1>)bx[0], x(3, 0))] = t100 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)14, (ap_uint<1>)bx[0], x(3, 0))] = t101 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)6, (ap_uint<1>)bx[0], x(3, 0))] = t102 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)10, (ap_uint<1>)bx[0], x(3, 0))] = t103 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)1, (ap_uint<1>)bx[0], x(3, 0))] = t104 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)15, (ap_uint<1>)bx[0], x(3, 0))] = t105 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)7, (ap_uint<1>)bx[0], x(3, 0))] = t106 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)9, (ap_uint<1>)bx[0], x(3, 0))] = t107 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)3, (ap_uint<1>)bx[0], x(3, 0))] = t108 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)13, (ap_uint<1>)bx[0], x(3, 0))] = t109 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)5, (ap_uint<1>)bx[0], x(3, 0))] = t110 / 256;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)11, (ap_uint<1>)bx[0], x(3, 0))] = t111 / 256;
+                } else {
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)0, (ap_uint<1>)bx[0], x(3, 0))] = t96;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)8, (ap_uint<1>)bx[0], x(3, 0))] = t97;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)4, (ap_uint<1>)bx[0], x(3, 0))] = t98;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)12, (ap_uint<1>)bx[0], x(3, 0))] = t99;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)2, (ap_uint<1>)bx[0], x(3, 0))] = t100;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)14, (ap_uint<1>)bx[0], x(3, 0))] = t101;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)6, (ap_uint<1>)bx[0], x(3, 0))] = t102;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)10, (ap_uint<1>)bx[0], x(3, 0))] = t103;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)1, (ap_uint<1>)bx[0], x(3, 0))] = t104;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)15, (ap_uint<1>)bx[0], x(3, 0))] = t105;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)7, (ap_uint<1>)bx[0], x(3, 0))] = t106;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)9, (ap_uint<1>)bx[0], x(3, 0))] = t107;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)3, (ap_uint<1>)bx[0], x(3, 0))] = t108;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)13, (ap_uint<1>)bx[0], x(3, 0))] = t109;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)5, (ap_uint<1>)bx[0], x(3, 0))] = t110;
+                    out[((ap_uint<1>)by[0], (ap_uint<4>)11, (ap_uint<1>)bx[0], x(3, 0))] = t111;
+                }
+            }
+        }
+    }
+}
+
+template <bool scale>
+void dct32_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    float c2_64 = 1.990369453344393857f;  // 2 * cos(2 * pi / 64)
+    float c4_64 = 1.961570560806460861f;  // 2 * cos(4 * pi / 64)
+    float c6_64 = 1.913880671464417649f;  // 2 * cos(6 * pi / 64)
+    float c8_64 = 1.847759065022573477f;  // 2 * cos(8 * pi / 64)
+    float c10_64 = 1.763842528696710099f; // 2 * cos(10 * pi / 64)
+    float c12_64 = 1.662939224605090471f; // 2 * cos(12 * pi / 64)
+    float c14_64 = 1.546020906725473987f; // 2 * cos(14 * pi / 64)
+    float c16_64 = 1.414213562373095145f; // 2 * cos(16 * pi / 64)
+    float c18_64 = 1.268786568327290976f; // 2 * cos(18 * pi / 64)
+    float c20_64 = 1.111140466039204577f; // 2 * cos(20 * pi / 64)
+    float c22_64 = 0.942793473651995617f; // 2 * cos(22 * pi / 64)
+    float c24_64 = 0.765366864730179675f; // 2 * cos(24 * pi / 64)
+    float c26_64 = 0.580569354508924662f; // 2 * cos(26 * pi / 64)
+    float c28_64 = 0.390180644032256663f; // 2 * cos(28 * pi / 64)
+    float c30_64 = 0.196034280659121540f; // 2 * cos(30 * pi / 64)
+
+    for (ap_uint<8> i = 0; i < 32; i++) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS pipeline
+
+        float t00 = in[((ap_uint<5>)0, i(4, 0))] + in[((ap_uint<5>)31, i(4, 0))];
+        float t01 = in[((ap_uint<5>)1, i(4, 0))] + in[((ap_uint<5>)30, i(4, 0))];
+        float t02 = in[((ap_uint<5>)2, i(4, 0))] + in[((ap_uint<5>)29, i(4, 0))];
+        float t03 = in[((ap_uint<5>)3, i(4, 0))] + in[((ap_uint<5>)28, i(4, 0))];
+        float t04 = in[((ap_uint<5>)4, i(4, 0))] + in[((ap_uint<5>)27, i(4, 0))];
+        float t05 = in[((ap_uint<5>)5, i(4, 0))] + in[((ap_uint<5>)26, i(4, 0))];
+        float t06 = in[((ap_uint<5>)6, i(4, 0))] + in[((ap_uint<5>)25, i(4, 0))];
+        float t07 = in[((ap_uint<5>)7, i(4, 0))] + in[((ap_uint<5>)24, i(4, 0))];
+        float t08 = in[((ap_uint<5>)8, i(4, 0))] + in[((ap_uint<5>)23, i(4, 0))];
+        float t09 = in[((ap_uint<5>)9, i(4, 0))] + in[((ap_uint<5>)22, i(4, 0))];
+        float t10 = in[((ap_uint<5>)10, i(4, 0))] + in[((ap_uint<5>)21, i(4, 0))];
+        float t11 = in[((ap_uint<5>)11, i(4, 0))] + in[((ap_uint<5>)20, i(4, 0))];
+        float t12 = in[((ap_uint<5>)12, i(4, 0))] + in[((ap_uint<5>)19, i(4, 0))];
+        float t13 = in[((ap_uint<5>)13, i(4, 0))] + in[((ap_uint<5>)18, i(4, 0))];
+        float t14 = in[((ap_uint<5>)14, i(4, 0))] + in[((ap_uint<5>)17, i(4, 0))];
+        float t15 = in[((ap_uint<5>)15, i(4, 0))] + in[((ap_uint<5>)16, i(4, 0))];
+        float t16 = in[((ap_uint<5>)0, i(4, 0))] - in[((ap_uint<5>)31, i(4, 0))];
+        float t17 = in[((ap_uint<5>)1, i(4, 0))] - in[((ap_uint<5>)30, i(4, 0))];
+        float t18 = in[((ap_uint<5>)2, i(4, 0))] - in[((ap_uint<5>)29, i(4, 0))];
+        float t19 = in[((ap_uint<5>)3, i(4, 0))] - in[((ap_uint<5>)28, i(4, 0))];
+        float t20 = in[((ap_uint<5>)4, i(4, 0))] - in[((ap_uint<5>)27, i(4, 0))];
+        float t21 = in[((ap_uint<5>)5, i(4, 0))] - in[((ap_uint<5>)26, i(4, 0))];
+        float t22 = in[((ap_uint<5>)6, i(4, 0))] - in[((ap_uint<5>)25, i(4, 0))];
+        float t23 = in[((ap_uint<5>)7, i(4, 0))] - in[((ap_uint<5>)24, i(4, 0))];
+        float t24 = in[((ap_uint<5>)8, i(4, 0))] - in[((ap_uint<5>)23, i(4, 0))];
+        float t25 = in[((ap_uint<5>)9, i(4, 0))] - in[((ap_uint<5>)22, i(4, 0))];
+        float t26 = in[((ap_uint<5>)10, i(4, 0))] - in[((ap_uint<5>)21, i(4, 0))];
+        float t27 = in[((ap_uint<5>)11, i(4, 0))] - in[((ap_uint<5>)20, i(4, 0))];
+        float t28 = in[((ap_uint<5>)12, i(4, 0))] - in[((ap_uint<5>)19, i(4, 0))];
+        float t29 = in[((ap_uint<5>)13, i(4, 0))] - in[((ap_uint<5>)18, i(4, 0))];
+        float t30 = in[((ap_uint<5>)14, i(4, 0))] - in[((ap_uint<5>)17, i(4, 0))];
+        float t31 = in[((ap_uint<5>)15, i(4, 0))] - in[((ap_uint<5>)16, i(4, 0))];
+
+        float t32 = t00 + t15;
+        float t33 = t01 + t14;
+        float t34 = t02 + t13;
+        float t35 = t03 + t12;
+        float t36 = t04 + t11;
+        float t37 = t05 + t10;
+        float t38 = t06 + t09;
+        float t39 = t07 + t08;
+        float t40 = t00 - t15;
+        float t41 = t01 - t14;
+        float t42 = t02 - t13;
+        float t43 = t03 - t12;
+        float t44 = t04 - t11;
+        float t45 = t05 - t10;
+        float t46 = t06 - t09;
+        float t47 = t07 - t08;
+        float t48 = t32 + t39;
+        float t49 = t33 + t38;
+        float t50 = t34 + t37;
+        float t51 = t35 + t36;
+        float t52 = t32 - t39;
+        float t53 = t33 - t38;
+        float t54 = t34 - t37;
+        float t55 = t35 - t36;
+        float t56 = t48 + t51;
+        float t57 = t49 + t50;
+        float t58 = t48 - t51;
+        float t59 = t49 - t50;
+        float t60 = t56 + t57;
+        float t61 = t56 - t57;
+        float t62 = t58 - t59;
+        float t63 = t59 * c16_64;
+        float t64 = t62 + t63;
+        float t65 = t62 - t63;
+        float t66 = t52 - t55;
+        float t67 = t53 - t54;
+        float t68 = t54 * c16_64;
+        float t69 = t55 * c16_64;
+        float t70 = t66 + t68;
+        float t71 = t67 + t69;
+        float t72 = t66 - t68;
+        float t73 = t67 - t69;
+        float t74 = t70 - t71;
+        float t75 = t71 * c8_64;
+        float t76 = t74 + t75;
+        float t77 = t74 - t75;
+        float t78 = t72 - t73;
+        float t79 = t73 * c24_64;
+        float t80 = t78 + t79;
+        float t81 = t78 - t79;
+        float t82 = t40 - t47;
+        float t83 = t41 - t46;
+        float t84 = t42 - t45;
+        float t85 = t43 - t44;
+        float t86 = t44 * c16_64;
+        float t87 = t45 * c16_64;
+        float t88 = t46 * c16_64;
+        float t89 = t47 * c16_64;
+        float t90 = t82 + t86;
+        float t91 = t83 + t87;
+        float t92 = t84 + t88;
+        float t93 = t85 + t89;
+        float t94 = t82 - t86;
+        float t95 = t83 - t87;
+        float t96 = t84 - t88;
+        float t97 = t85 - t89;
+        float t98 = t90 - t93;
+        float t99 = t91 - t92;
+        float t100 = t92 * c8_64;
+        float t101 = t93 * c8_64;
+        float t102 = t98 + t100;
+        float t103 = t99 + t101;
+        float t104 = t98 - t100;
+        float t105 = t99 - t101;
+        float t106 = t102 - t103;
+        float t107 = t103 * c4_64;
+        float t108 = t106 + t107;
+        float t109 = t106 - t107;
+        float t110 = t104 - t105;
+        float t111 = t105 * c28_64;
+        float t112 = t110 + t111;
+        float t113 = t110 - t111;
+        float t114 = t94 - t97;
+        float t115 = t95 - t96;
+        float t116 = t96 * c24_64;
+        float t117 = t97 * c24_64;
+        float t118 = t114 + t116;
+        float t119 = t115 + t117;
+        float t120 = t114 - t116;
+        float t121 = t115 - t117;
+        float t122 = t118 - t119;
+        float t123 = t119 * c12_64;
+        float t124 = t122 + t123;
+        float t125 = t122 - t123;
+        float t126 = t120 - t121;
+        float t127 = t121 * c20_64;
+        float t128 = t126 + t127;
+        float t129 = t126 - t127;
+        float t130 = t16 - t31;
+        float t131 = t17 - t30;
+        float t132 = t18 - t29;
+        float t133 = t19 - t28;
+        float t134 = t20 - t27;
+        float t135 = t21 - t26;
+        float t136 = t22 - t25;
+        float t137 = t23 - t24;
+        float t138 = t24 * c16_64;
+        float t139 = t25 * c16_64;
+        float t140 = t26 * c16_64;
+        float t141 = t27 * c16_64;
+        float t142 = t28 * c16_64;
+        float t143 = t29 * c16_64;
+        float t144 = t30 * c16_64;
+        float t145 = t31 * c16_64;
+        float t146 = t130 + t138;
+        float t147 = t131 + t139;
+        float t148 = t132 + t140;
+        float t149 = t133 + t141;
+        float t150 = t134 + t142;
+        float t151 = t135 + t143;
+        float t152 = t136 + t144;
+        float t153 = t137 + t145;
+        float t154 = t130 - t138;
+        float t155 = t131 - t139;
+        float t156 = t132 - t140;
+        float t157 = t133 - t141;
+        float t158 = t134 - t142;
+        float t159 = t135 - t143;
+        float t160 = t136 - t144;
+        float t161 = t137 - t145;
+        float t162 = t146 - t153;
+        float t163 = t147 - t152;
+        float t164 = t148 - t151;
+        float t165 = t149 - t150;
+        float t166 = t150 * c8_64;
+        float t167 = t151 * c8_64;
+        float t168 = t152 * c8_64;
+        float t169 = t153 * c8_64;
+        float t170 = t162 + t166;
+        float t171 = t163 + t167;
+        float t172 = t164 + t168;
+        float t173 = t165 + t169;
+        float t174 = t162 - t166;
+        float t175 = t163 - t167;
+        float t176 = t164 - t168;
+        float t177 = t165 - t169;
+        float t178 = t170 - t173;
+        float t179 = t171 - t172;
+        float t180 = t172 * c4_64;
+        float t181 = t173 * c4_64;
+        float t182 = t178 + t180;
+        float t183 = t179 + t181;
+        float t184 = t178 - t180;
+        float t185 = t179 - t181;
+        float t186 = t182 - t183;
+        float t187 = t183 * c2_64;
+        float t188 = t186 + t187;
+        float t189 = t186 - t187;
+        float t190 = t184 - t185;
+        float t191 = t185 * c30_64;
+        float t192 = t190 + t191;
+        float t193 = t190 - t191;
+        float t194 = t174 - t177;
+        float t195 = t175 - t176;
+        float t196 = t176 * c28_64;
+        float t197 = t177 * c28_64;
+        float t198 = t194 + t196;
+        float t199 = t195 + t197;
+        float t200 = t194 - t196;
+        float t201 = t195 - t197;
+        float t202 = t198 - t199;
+        float t203 = t199 * c14_64;
+        float t204 = t202 + t203;
+        float t205 = t202 - t203;
+        float t206 = t200 - t201;
+        float t207 = t201 * c18_64;
+        float t208 = t206 + t207;
+        float t209 = t206 - t207;
+        float t210 = t154 - t161;
+        float t211 = t155 - t160;
+        float t212 = t156 - t159;
+        float t213 = t157 - t158;
+        float t214 = t158 * c24_64;
+        float t215 = t159 * c24_64;
+        float t216 = t160 * c24_64;
+        float t217 = t161 * c24_64;
+        float t218 = t210 + t214;
+        float t219 = t211 + t215;
+        float t220 = t212 + t216;
+        float t221 = t213 + t217;
+        float t222 = t210 - t214;
+        float t223 = t211 - t215;
+        float t224 = t212 - t216;
+        float t225 = t213 - t217;
+        float t226 = t218 - t221;
+        float t227 = t219 - t220;
+        float t228 = t220 * c12_64;
+        float t229 = t221 * c12_64;
+        float t230 = t226 + t228;
+        float t231 = t227 + t229;
+        float t232 = t226 - t228;
+        float t233 = t227 - t229;
+        float t234 = t230 - t231;
+        float t235 = t231 * c6_64;
+        float t236 = t234 + t235;
+        float t237 = t234 - t235;
+        float t238 = t232 - t233;
+        float t239 = t233 * c26_64;
+        float t240 = t238 + t239;
+        float t241 = t238 - t239;
+        float t242 = t222 - t225;
+        float t243 = t223 - t224;
+        float t244 = t224 * c20_64;
+        float t245 = t225 * c20_64;
+        float t246 = t242 + t244;
+        float t247 = t243 + t245;
+        float t248 = t242 - t244;
+        float t249 = t243 - t245;
+        float t250 = t246 - t247;
+        float t251 = t247 * c10_64;
+        float t252 = t250 + t251;
+        float t253 = t250 - t251;
+        float t254 = t248 - t249;
+        float t255 = t249 * c22_64;
+        float t256 = t254 + t255;
+        float t257 = t254 - t255;
+
+        if (scale) {
+            out[((ap_uint<5>)0, i(4, 0))] = t60 / 1024;
+            out[((ap_uint<5>)1, i(4, 0))] = t188 / 1024;
+            out[((ap_uint<5>)2, i(4, 0))] = t108 / 1024;
+            out[((ap_uint<5>)3, i(4, 0))] = t236 / 1024;
+            out[((ap_uint<5>)4, i(4, 0))] = t76 / 1024;
+            out[((ap_uint<5>)5, i(4, 0))] = t252 / 1024;
+            out[((ap_uint<5>)6, i(4, 0))] = t124 / 1024;
+            out[((ap_uint<5>)7, i(4, 0))] = t204 / 1024;
+            out[((ap_uint<5>)8, i(4, 0))] = t64 / 1024;
+            out[((ap_uint<5>)9, i(4, 0))] = t208 / 1024;
+            out[((ap_uint<5>)10, i(4, 0))] = t128 / 1024;
+            out[((ap_uint<5>)11, i(4, 0))] = t256 / 1024;
+            out[((ap_uint<5>)12, i(4, 0))] = t80 / 1024;
+            out[((ap_uint<5>)13, i(4, 0))] = t240 / 1024;
+            out[((ap_uint<5>)14, i(4, 0))] = t112 / 1024;
+            out[((ap_uint<5>)15, i(4, 0))] = t192 / 1024;
+            out[((ap_uint<5>)16, i(4, 0))] = t61 / 1024;
+            out[((ap_uint<5>)17, i(4, 0))] = t193 / 1024;
+            out[((ap_uint<5>)18, i(4, 0))] = t113 / 1024;
+            out[((ap_uint<5>)19, i(4, 0))] = t241 / 1024;
+            out[((ap_uint<5>)20, i(4, 0))] = t81 / 1024;
+            out[((ap_uint<5>)21, i(4, 0))] = t257 / 1024;
+            out[((ap_uint<5>)22, i(4, 0))] = t129 / 1024;
+            out[((ap_uint<5>)23, i(4, 0))] = t209 / 1024;
+            out[((ap_uint<5>)24, i(4, 0))] = t65 / 1024;
+            out[((ap_uint<5>)25, i(4, 0))] = t205 / 1024;
+            out[((ap_uint<5>)26, i(4, 0))] = t125 / 1024;
+            out[((ap_uint<5>)27, i(4, 0))] = t253 / 1024;
+            out[((ap_uint<5>)28, i(4, 0))] = t77 / 1024;
+            out[((ap_uint<5>)29, i(4, 0))] = t237 / 1024;
+            out[((ap_uint<5>)30, i(4, 0))] = t109 / 1024;
+            out[((ap_uint<5>)31, i(4, 0))] = t189 / 1024;
+        } else {
+            out[((ap_uint<5>)0, i(4, 0))] = t60;
+            out[((ap_uint<5>)1, i(4, 0))] = t188;
+            out[((ap_uint<5>)2, i(4, 0))] = t108;
+            out[((ap_uint<5>)3, i(4, 0))] = t236;
+            out[((ap_uint<5>)4, i(4, 0))] = t76;
+            out[((ap_uint<5>)5, i(4, 0))] = t252;
+            out[((ap_uint<5>)6, i(4, 0))] = t124;
+            out[((ap_uint<5>)7, i(4, 0))] = t204;
+            out[((ap_uint<5>)8, i(4, 0))] = t64;
+            out[((ap_uint<5>)9, i(4, 0))] = t208;
+            out[((ap_uint<5>)10, i(4, 0))] = t128;
+            out[((ap_uint<5>)11, i(4, 0))] = t256;
+            out[((ap_uint<5>)12, i(4, 0))] = t80;
+            out[((ap_uint<5>)13, i(4, 0))] = t240;
+            out[((ap_uint<5>)14, i(4, 0))] = t112;
+            out[((ap_uint<5>)15, i(4, 0))] = t192;
+            out[((ap_uint<5>)16, i(4, 0))] = t61;
+            out[((ap_uint<5>)17, i(4, 0))] = t193;
+            out[((ap_uint<5>)18, i(4, 0))] = t113;
+            out[((ap_uint<5>)19, i(4, 0))] = t241;
+            out[((ap_uint<5>)20, i(4, 0))] = t81;
+            out[((ap_uint<5>)21, i(4, 0))] = t257;
+            out[((ap_uint<5>)22, i(4, 0))] = t129;
+            out[((ap_uint<5>)23, i(4, 0))] = t209;
+            out[((ap_uint<5>)24, i(4, 0))] = t65;
+            out[((ap_uint<5>)25, i(4, 0))] = t205;
+            out[((ap_uint<5>)26, i(4, 0))] = t125;
+            out[((ap_uint<5>)27, i(4, 0))] = t253;
+            out[((ap_uint<5>)28, i(4, 0))] = t77;
+            out[((ap_uint<5>)29, i(4, 0))] = t237;
+            out[((ap_uint<5>)30, i(4, 0))] = t109;
+            out[((ap_uint<5>)31, i(4, 0))] = t189;
+        }
+    }
+}
+
+template <int N>
+void TransposeN(float in[N * N], float out[N * N]) {
+#pragma HLS INLINE off
+
+    for (ap_uint<8> y = 0; y < N; y++) {
+        for (ap_uint<8> x = 0; x < N; x++) {
+#pragma HLS pipeline II = 1
+
+            out[y * N + x] = in[x * N + y];
+        }
+    }
+}
+
+void TransposeBlock32(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    for (ap_uint<8> y = 0; y < 32; y++) {
+        for (ap_uint<8> x = 0; x < 32; x++) {
+#pragma HLS pipeline II = 1
+
+            ap_uint<10> addr_i, addr_o;
+            addr_i(9, 5) = x(4, 0);
+            addr_i(4, 0) = y(4, 0);
+            addr_o(9, 5) = y(4, 0);
+            addr_o(4, 0) = x(4, 0);
+
+            out[addr_o] = in[addr_i];
+        }
+    }
+}
+
+void TransposeBlock16(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+            for (ap_uint<8> y = 0; y < 16; y++) {
+                for (ap_uint<8> x = 0; x < 16; x++) {
+#pragma HLS pipeline II = 1
+
+                    ap_uint<10> addr_i, addr_o;
+                    addr_i[9] = (ap_uint<1>)by[0];
+                    addr_i(8, 5) = x(3, 0);
+                    addr_i[4] = (ap_uint<1>)bx[0];
+                    addr_i(3, 0) = y(3, 0);
+                    addr_o[9] = (ap_uint<1>)by[0];
+                    addr_o(8, 5) = y(3, 0);
+                    addr_o[4] = (ap_uint<1>)bx[0];
+                    addr_o(3, 0) = x(3, 0);
+
+                    out[addr_o] = in[addr_i];
+                }
+            }
+        }
+    }
+}
+
+void TransposeBlock8(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> y = 0; y < 8; y++) {
+                for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS pipeline II = 1
+
+                    ap_uint<10> addr_i, addr_o;
+                    addr_i(9, 8) = by(1, 0);
+                    addr_i(7, 5) = x(2, 0);
+                    addr_i(4, 3) = bx(1, 0);
+                    addr_i(2, 0) = y(2, 0);
+                    addr_o(9, 8) = by(1, 0);
+                    addr_o(7, 5) = y(2, 0);
+                    addr_o(4, 3) = bx(1, 0);
+                    addr_o(2, 0) = x(2, 0);
+
+                    out[addr_o] = in[addr_i];
+                }
+            }
+        }
+    }
+}
+
+void TransposeBlock4(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    for (ap_uint<8> by = 0; by < 8; by++) {
+        for (ap_uint<8> bx = 0; bx < 8; bx++) {
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+                    ap_uint<10> addr_i, addr_o;
+                    addr_i(9, 7) = by(2, 0);
+                    addr_i(6, 5) = x(1, 0);
+                    addr_i(4, 2) = bx(2, 0);
+                    addr_i(1, 0) = y(1, 0);
+                    addr_o(9, 7) = by(2, 0);
+                    addr_o(6, 5) = y(1, 0);
+                    addr_o(4, 2) = bx(2, 0);
+                    addr_o(1, 0) = x(1, 0);
+
+                    out[addr_o] = in[addr_i];
+                }
+            }
+        }
+    }
+}
+
+void ComputeDC(float block0[4], float block1[4]) {
+#pragma HLS INLINE off
+
+    float a0, b0, c0, d0;
+    a0 = block0[0] + block0[1];
+    b0 = block0[0] - block0[1];
+    c0 = block0[2] + block0[3];
+    d0 = block0[2] - block0[3];
+
+    float a1, b1, c1, d1;
+    a1 = a0 + c0;
+    b1 = a0 - c0;
+    c1 = b0 + d0;
+    d1 = b0 - d0;
+
+    block1[0] = a1 * 0.25f;
+    block1[1] = b1 * 0.25f;
+    block1[2] = c1 * 0.25f;
+    block1[3] = d1 * 0.25f;
+}
+
+void LoadBlock4to8(ap_uint<8> by, ap_uint<8> bx, float in[1024], float tmp[64], float block0[4]) {
+#pragma HLS INLINE off
+
+load:
+    for (ap_uint<8> dy = 0; dy < 2; dy++) {
+        for (ap_uint<8> dx = 0; dx < 2; dx++) {
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS pipeline II = 1
+
+                    ap_uint<10> addr_i;
+                    ap_uint<6> addr_o;
+                    addr_i(9, 8) = by(1, 0);
+                    addr_i[7] = dy[0];
+                    addr_i(6, 5) = y(1, 0);
+                    addr_i(4, 3) = bx(1, 0);
+                    addr_i[2] = dx[0];
+                    addr_i(1, 0) = x(1, 0);
+
+                    addr_o(5, 4) = y(1, 0);
+                    addr_o[3] = dy[0];
+                    addr_o(2, 1) = x(1, 0);
+                    addr_o[0] = dx[0];
+
+                    tmp[addr_o] = in[addr_i];
+
+                    if (x == 0 && y == 0) block0[(dy[0], dx[0])] = in[addr_i];
+                }
+            }
+        }
+    }
+}
+
+void FeedBlock4to8(ap_uint<8> by, ap_uint<8> bx, float tmp[64], float block1[4], float out[1024]) {
+#pragma HLS INLINE off
+
+feed:
+    for (ap_uint<8> y = 0; y < 8; y++) {
+        for (ap_uint<8> x = 0; x < 8; x++) {
+#pragma HLS pipeline II = 1
+
+            ap_uint<6> addr_i = (y(2, 0), x(2, 0));
+            ap_uint<10> addr_o = (by(1, 0), y(2, 0), bx(1, 0), x(2, 0));
+
+            if (y < 2 && x < 2)
+                out[addr_o] = block1[((ap_uint<1>)y[0], (ap_uint<1>)x[0])];
+            else
+                out[addr_o] = tmp[addr_i];
+        }
+    }
+}
+
+void TransformBlock4to8(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+#pragma HLS dataflow
+
+            float tmp[64];
+            float block0[4];
+            float block1[4];
+
+            LoadBlock4to8(by, bx, in, tmp, block0);
+            ComputeDC(block0, block1);
+            FeedBlock4to8(by, bx, tmp, block1, out);
+        }
+    }
+}
+
+void DCT4x4_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+#pragma HLS dataflow
+
+    float temp0[1024];
+#pragma HLS RESOURCE variable = temp0 core = RAM_2P_BRAM
+    float temp1[1024];
+#pragma HLS RESOURCE variable = temp1 core = RAM_2P_BRAM
+    float temp2[1024];
+#pragma HLS RESOURCE variable = temp2 core = RAM_2P_BRAM
+
+    dct4_block<false>(in, temp0);
+    TransposeBlock4(temp0, temp1);
+    dct4_block<true>(temp1, temp2);
+    TransformBlock4to8(temp2, out);
+
+#ifdef DEBUF_DCT
+    for (ap_uint<8> by = 0; by < 8; by++) {
+        for (ap_uint<8> bx = 0; bx < 8; bx++) {
+            for (ap_uint<8> y = 0; y < 4; y++) {
+                for (ap_uint<8> x = 0; x < 4; x++) {
+                    ap_uint<10> addr;
+                    addr(9, 7) = by(2, 0);
+                    addr(6, 5) = y(1, 0);
+                    addr(4, 2) = bx(2, 0);
+                    addr(1, 0) = x(1, 0);
+
+                    std::cout << "dct4_before_interleave: id=" << addr << " " << temp2[addr] << std::endl;
+                }
+            }
+        }
+    }
+
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> y = 0; y < 8; y++) {
+                for (ap_uint<8> x = 0; x < 8; x++) {
+                    ap_uint<10> addr;
+                    addr(9, 8) = by(1, 0);
+                    addr(7, 5) = y(2, 0);
+                    addr(4, 3) = bx(1, 0);
+                    addr(2, 0) = x(2, 0);
+
+                    std::cout << "dct4_after_interleave: id=" << addr << " " << out[addr] << std::endl;
+                }
+            }
+        }
+    }
+#endif
+}
+
+void DCT4x4Top(ap_uint<16> xblock, ap_uint<16> yblock, hls::stream<float>& in, hls::stream<float>& out) {
+#pragma HLS INLINE off
+    for (ap_uint<8> cnty = 0; cnty < yblock; cnty++) {
+        for (ap_uint<8> cntx = 0; cntx < xblock; cntx++) {
+#pragma HLS DATAFLOW
+
+            float temp0[1024];
+#pragma HLS RESOURCE variable = temp0 core = RAM_2P_BRAM
+            float temp1[1024];
+#pragma HLS RESOURCE variable = temp1 core = RAM_2P_BRAM
+            float temp2[1024];
+#pragma HLS RESOURCE variable = temp2 core = RAM_2P_BRAM
+            float temp3[1024];
+#pragma HLS RESOURCE variable = temp3 core = RAM_2P_BRAM
+            float temp4[1024];
+#pragma HLS RESOURCE variable = temp4 core = RAM_2P_BRAM
+
+        load:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+
+                temp0[i] = in.read();
+            }
+
+            dct4_block<false>(temp0, temp1);
+            TransposeBlock4(temp1, temp2);
+            dct4_block<true>(temp2, temp3);
+            TransformBlock4to8(temp3, temp4);
+
+        feed:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+
+                out.write(temp4[i]);
+            }
+        }
+    }
+}
+
+void DCT8x8_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+#pragma HLS dataflow
+
+    float temp0[1024];
+#pragma HLS RESOURCE variable = temp0 core = RAM_2P_BRAM
+    float temp1[1024];
+#pragma HLS RESOURCE variable = temp1 core = RAM_2P_BRAM
+
+    dct8_block<false>(in, temp0);
+    TransposeBlock8(temp0, temp1);
+    dct8_block<true>(temp1, out);
+
+#ifdef DEBUG_DCT
+    for (ap_uint<8> by = 0; by < 4; by++) {
+        for (ap_uint<8> bx = 0; bx < 4; bx++) {
+            for (ap_uint<8> y = 0; y < 8; y++) {
+                for (ap_uint<8> x = 0; x < 8; x++) {
+                    ap_uint<10> addr;
+                    addr(9, 8) = by(1, 0);
+                    addr(7, 5) = y(2, 0);
+                    addr(4, 3) = bx(1, 0);
+                    addr(2, 0) = x(2, 0);
+
+                    if (by == 0 && bx == 1)
+                        std::cout << "dct8: id=" << addr << " in=" << in[addr] << " temp0=" << temp0[addr]
+                                  << " temp1=" << temp1[addr] << " out=" << out[addr] << std::endl;
+                }
+            }
+        }
+    }
+#endif
+}
+
+void DCT8x8Top(ap_uint<16> xblock, ap_uint<16> yblock, hls::stream<float>& in, hls::stream<float>& out) {
+#pragma HLS INLINE off
+    for (ap_uint<8> cnty = 0; cnty < yblock; cnty++) {
+        for (ap_uint<8> cntx = 0; cntx < xblock; cntx++) {
+#pragma HLS DATAFLOW
+
+            float temp0[1024];
+#pragma HLS RESOURCE variable = temp0 core = RAM_2P_BRAM
+            float temp1[1024];
+#pragma HLS RESOURCE variable = temp1 core = RAM_2P_BRAM
+            float temp2[1024];
+#pragma HLS RESOURCE variable = temp2 core = RAM_2P_BRAM
+            float temp3[1024];
+#pragma HLS RESOURCE variable = temp3 core = RAM_2P_BRAM
+
+        load:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+
+                temp0[i] = in.read();
+            }
+
+            dct8_block<false>(temp0, temp1);
+            TransposeBlock8(temp1, temp2);
+            dct8_block<true>(temp2, temp3);
+
+        feed:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+
+                out.write(temp3[i]);
+            }
+        }
+    }
+}
+
+void DCT16x16_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+#pragma HLS dataflow
+
+    float temp0[1024];
+#pragma HLS RESOURCE variable = temp0 core = RAM_2P_BRAM
+    float temp1[1024];
+#pragma HLS RESOURCE variable = temp1 core = RAM_2P_BRAM
+
+    dct16_block<false>(in, temp0);
+    TransposeBlock16(temp0, temp1);
+    dct16_block<true>(temp1, out);
+
+#ifdef DEBUG_DCT
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+            for (ap_uint<8> y = 0; y < 16; y++) {
+                for (ap_uint<8> x = 0; x < 16; x++) {
+                    ap_uint<10> addr;
+                    addr[9] = (ap_uint<1>)by[0];
+                    addr(8, 5) = y(3, 0);
+                    addr[4] = (ap_uint<1>)bx[0];
+                    addr(3, 0) = x(3, 0);
+
+                    std::cout << "dct16: id=" << addr << " in=" << in[addr] << " temp0=" << temp0[addr]
+                              << " temp1=" << temp1[addr] << " out=" << out[addr] << std::endl;
+                }
+            }
+        }
+    }
+#endif
+}
+
+void DCT16x16Top(ap_uint<16> xblock, ap_uint<16> yblock, hls::stream<float>& in, hls::stream<float>& out) {
+#pragma HLS INLINE off
+    for (ap_uint<8> cnty = 0; cnty < yblock; cnty++) {
+        for (ap_uint<8> cntx = 0; cntx < xblock; cntx++) {
+#pragma HLS DATAFLOW
+
+            float temp0[1024];
+#pragma HLS RESOURCE variable = temp0 core = RAM_2P_BRAM
+            float temp1[1024];
+#pragma HLS RESOURCE variable = temp1 core = RAM_2P_BRAM
+            float temp2[1024];
+#pragma HLS RESOURCE variable = temp2 core = RAM_2P_BRAM
+            float temp3[1024];
+#pragma HLS RESOURCE variable = temp3 core = RAM_2P_BRAM
+
+        load:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+
+                temp0[i] = in.read();
+            }
+
+            dct16_block<false>(temp0, temp1);
+            TransposeBlock16(temp1, temp2);
+            dct16_block<true>(temp2, temp3);
+
+        feed:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+#pragma HLS PIPELINE II = 1
+
+                out.write(temp3[i]);
+            }
+        }
+    }
+}
+
+void DCT32x32_block(float in[1024], float out[1024]) {
+#pragma HLS INLINE off
+#pragma HLS dataflow
+
+    float temp0[1024];
+#pragma HLS RESOURCE variable = temp0 core = RAM_2P_BRAM
+    float temp1[1024];
+#pragma HLS RESOURCE variable = temp1 core = RAM_2P_BRAM
+
+    dct32_block<false>(in, temp0);
+    TransposeBlock32(temp0, temp1);
+    dct32_block<true>(temp1, out);
+}
+
+void DCT32x32Top(ap_uint<16> xblock, ap_uint<16> yblock, hls::stream<float>& in, hls::stream<float>& out) {
+#pragma HLS INLINE off
+    for (ap_uint<8> cnty = 0; cnty < yblock; cnty++) {
+        for (ap_uint<8> cntx = 0; cntx < xblock; cntx++) {
+#pragma HLS DATAFLOW
+
+            float temp0[1024];
+#pragma HLS RESOURCE variable = temp0 core = RAM_2P_BRAM
+            float temp1[1024];
+#pragma HLS RESOURCE variable = temp1 core = RAM_2P_BRAM
+            float temp2[1024];
+#pragma HLS RESOURCE variable = temp2 core = RAM_2P_BRAM
+            float temp3[1024];
+#pragma HLS RESOURCE variable = temp3 core = RAM_2P_BRAM
+
+        load:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+                temp0[i] = in.read();
+            }
+
+            dct32_block<false>(temp0, temp1);
+            TransposeBlock32(temp1, temp2);
+            dct32_block<true>(temp2, temp3);
+
+        feed:
+            for (ap_uint<16> i = 0; i < 1024; i++) {
+                out.write(temp3[i]);
+            }
+        }
+    }
+}
+
+void DCT2x2_block16(float in[16], float out[16]) {
+#pragma HLS INLINE off
+
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+#pragma HLS pipeline
+
+            float a00 = in[((ap_uint<1>)by[0], (ap_uint<1>)0, (ap_uint<1>)bx[0], (ap_uint<1>)0)];
+            float a01 = in[((ap_uint<1>)by[0], (ap_uint<1>)0, (ap_uint<1>)bx[0], (ap_uint<1>)1)];
+            float a10 = in[((ap_uint<1>)by[0], (ap_uint<1>)1, (ap_uint<1>)bx[0], (ap_uint<1>)0)];
+            float a11 = in[((ap_uint<1>)by[0], (ap_uint<1>)1, (ap_uint<1>)bx[0], (ap_uint<1>)1)];
+
+            // std::cout<<"dct: a00="<<a00<<" a01="<<a01<<" a10="<<a10<<"
+            // a11="<<a11<<std::endl;
+
+            float t0 = a00 + a01;
+            float t1 = a10 + a11;
+            float t2 = a00 - a01;
+            float t3 = a10 - a11;
+
+            float o00 = t0 + t1;
+            float o01 = t0 - t1;
+            float o10 = t2 + t3;
+            float o11 = t2 - t3;
+
+            out[((ap_uint<1>)by[0], (ap_uint<1>)0, (ap_uint<1>)bx[0], (ap_uint<1>)0)] = o00 / 4;
+            out[((ap_uint<1>)by[0], (ap_uint<1>)0, (ap_uint<1>)bx[0], (ap_uint<1>)1)] = o01 / 4;
+            out[((ap_uint<1>)by[0], (ap_uint<1>)1, (ap_uint<1>)bx[0], (ap_uint<1>)0)] = o10 / 4;
+            out[((ap_uint<1>)by[0], (ap_uint<1>)1, (ap_uint<1>)bx[0], (ap_uint<1>)1)] = o11 / 4;
+        }
+    }
+}
+
+template <bool scale>
+void dct4_block16(float in[16], float out[16]) {
+#pragma HLS INLINE off
+
+    const float c2_8 = 0.7071067811865475244f; // 0.5 / cos(2 * pi / 8)
+
+    for (ap_uint<8> x = 0; x < 4; x++) {
+#pragma HLS DEPENDENCE variable = in inter false
+#pragma HLS DEPENDENCE variable = out inter false
+#pragma HLS pipeline
+
+        float i0 = in[((ap_uint<2>)0, x(1, 0))];
+        float i1 = in[((ap_uint<2>)1, x(1, 0))];
+        float i2 = in[((ap_uint<2>)2, x(1, 0))];
+        float i3 = in[((ap_uint<2>)3, x(1, 0))];
+
+        float t0 = i0 + i2;
+        float i2n = -i2;
+        float t1 = i0 + i2n;
+        float t2 = i1 + i3;
+        float i3n = -i3;
+        float t3 = i1 + i3n;
+        float t4 = t3 * c2_8;
+        float t5 = t2 + t4;
+        float t6 = t0 + t5;
+        float t7 = t1 + t4;
+        float t5n = -t5;
+        float t4n = -t4;
+        float t8 = t0 + t5n;
+        float t9 = t1 + t4n;
+
+        if (scale) {
+            out[((ap_uint<2>)0, x(1, 0))] = t6 / 16;
+            out[((ap_uint<2>)1, x(1, 0))] = t7 / 16;
+            out[((ap_uint<2>)2, x(1, 0))] = t9 / 16;
+            out[((ap_uint<2>)3, x(1, 0))] = t8 / 16;
+        } else {
+            out[((ap_uint<2>)0, x(1, 0))] = t6;
+            out[((ap_uint<2>)1, x(1, 0))] = t7;
+            out[((ap_uint<2>)2, x(1, 0))] = t9;
+            out[((ap_uint<2>)3, x(1, 0))] = t8;
+        }
+    }
+}
+
+void DCT4x4_block16(float in[16], float out[16]) {
+#pragma HLS INLINE off
+#pragma HLS dataflow
+
+    float temp0[16];
+#pragma HLS RESOURCE variable = temp0 core = RAM_2P_BRAM
+    float temp1[16];
+#pragma HLS RESOURCE variable = temp1 core = RAM_2P_BRAM
+
+    dct4_block16<false>(in, temp0);
+    TransposeN<4>(temp0, temp1);
+    dct4_block16<false>(temp1, out);
+}
+
+void idct4_block16(float from[16], float to[16]) {
+    const float c2_8 = 0.707106769;
+
+LOOP_IDCT4X4:
+    for (ap_uint<8> i = 0; i < 4; i++) {
+#pragma HLS DEPENDENCE variable = from inter false
+#pragma HLS DEPENDENCE variable = to inter false
+#pragma HLS pipeline
+
+        float i0 = from[((ap_uint<2>)0, i(1, 0))];
+        float i1 = from[((ap_uint<2>)1, i(1, 0))];
+        float i2 = from[((ap_uint<2>)2, i(1, 0))];
+        float i3 = from[((ap_uint<2>)3, i(1, 0))];
+
+        float t0 = i0 + i2;
+        float t1 = i0 - i2;
+        float t2 = i1 + i3;
+        float t3 = i1 - i3;
+
+        float t4 = t3 * c2_8;
+        float t5 = t2 + t4;
+
+        float t6 = t0 + t5;
+        float t7 = t1 + t4;
+        float t8 = t0 - t5;
+        float t9 = t1 - t4;
+
+        to[((ap_uint<2>)0, i(1, 0))] = t6;
+        to[((ap_uint<2>)1, i(1, 0))] = t7;
+        to[((ap_uint<2>)2, i(1, 0))] = t9;
+        to[((ap_uint<2>)3, i(1, 0))] = t8;
+    }
+}
+
+void IDCT2x2_block16(float from[16], float to[16]) {
+    float dest[4];
+LOOP_IDCT2X2:
+    for (ap_uint<8> by = 0; by < 2; by++) {
+        for (ap_uint<8> bx = 0; bx < 2; bx++) {
+#pragma HLS pipeline
+
+            float a00 = from[((ap_uint<1>)by[0], (ap_uint<1>)0, (ap_uint<1>)bx[0], (ap_uint<1>)0)];
+            float a01 = from[((ap_uint<1>)by[0], (ap_uint<1>)0, (ap_uint<1>)bx[0], (ap_uint<1>)1)];
+            float a10 = from[((ap_uint<1>)by[0], (ap_uint<1>)1, (ap_uint<1>)bx[0], (ap_uint<1>)0)];
+            float a11 = from[((ap_uint<1>)by[0], (ap_uint<1>)1, (ap_uint<1>)bx[0], (ap_uint<1>)1)];
+
+            // std::cout<<"idct: a00="<<a00<<" a01="<<a01<<" a10="<<a10<<"
+            // a11="<<a11<<std::endl;
+
+            float t0 = a00 + a01;
+            float t1 = a00 - a01;
+            float t2 = a10 + a11;
+            float t3 = a10 - a11;
+
+            dest[0] = t0 + t2;
+            dest[1] = t0 - t2;
+            dest[2] = t1 + t3;
+            dest[3] = t1 - t3;
+
+            to[((ap_uint<1>)by[0], (ap_uint<1>)0, (ap_uint<1>)bx[0], (ap_uint<1>)0)] = dest[0];
+            to[((ap_uint<1>)by[0], (ap_uint<1>)0, (ap_uint<1>)bx[0], (ap_uint<1>)1)] = dest[1];
+            to[((ap_uint<1>)by[0], (ap_uint<1>)1, (ap_uint<1>)bx[0], (ap_uint<1>)0)] = dest[2];
+            to[((ap_uint<1>)by[0], (ap_uint<1>)1, (ap_uint<1>)bx[0], (ap_uint<1>)1)] = dest[3];
+        }
+    }
+}
+
+void IDCT4x4_block16(float from[16], float to[16]) {
+#pragma HLS DATAFLOW
+
+    float from0[16];
+    float to0[16];
+
+    idct4_block16(from, from0);
+    TransposeN<4>(from0, to0);
+    idct4_block16(to0, to);
+}
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/dequant.hpp b/codec/L2/include/hw/pikEnc/dequant.hpp
new file mode 100644
index 0000000000..f75a640cfd
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/dequant.hpp
@@ -0,0 +1,2649 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file dequant.hpp
+ */
+
+#ifndef _XF_CODEC_DEQUANT_HPP_
+#define _XF_CODEC_DEQUANT_HPP_
+
+#ifndef __cplusplus
+#error "Vitis Codec Library only works with C++."
+#endif
+
+#include <ap_int.h>
+
+const float dequant0_matrix4x4[3][64] = {
+    0.00076352688, 0.00036029922, 0.0011553109, 0.0011553109, 0.0025205531, 0.0025205531, 0.0071363659, 0.0071363659,
+    0.00036029922, 0.00057008932, 0.0011553109, 0.0011553109, 0.0025205531, 0.0025205531, 0.0071363659, 0.0071363659,
+    0.0011553109,  0.0011553109,  0.0010383829, 0.0010383829, 0.0047177076, 0.0047177076, 0.011022241,  0.011022241,
+    0.0011553109,  0.0011553109,  0.0010383829, 0.0010383829, 0.0047177076, 0.0047177076, 0.011022241,  0.011022241,
+    0.0025205531,  0.0025205531,  0.0047177076, 0.0047177076, 0.0033692066, 0.0033692066, 0.013451101,  0.013451101,
+    0.0025205531,  0.0025205531,  0.0047177076, 0.0047177076, 0.0033692066, 0.0033692066, 0.013451101,  0.013451101,
+    0.0071363659,  0.0071363659,  0.011022241,  0.011022241,  0.013451101,  0.013451101,  0.017449437,  0.017449437,
+    0.0071363659,  0.0071363659,  0.011022241,  0.011022241,  0.013451101,  0.013451101,  0.017449437,  0.017449437,
+    0.0044780076,  0.0012398829,  0.0031354439, 0.0031354439, 0.0066977735, 0.0066977735, 0.021900313,  0.021900313,
+    0.0012398829,  0.0014682054,  0.0031354439, 0.0031354439, 0.0066977735, 0.0066977735, 0.021900313,  0.021900313,
+    0.0031354439,  0.0031354439,  0.0036335052, 0.0036335052, 0.0068652928, 0.0068652928, 0.01835954,   0.01835954,
+    0.0031354439,  0.0031354439,  0.0036335052, 0.0036335052, 0.0068652928, 0.0068652928, 0.01835954,   0.01835954,
+    0.0066977735,  0.0066977735,  0.0068652928, 0.0068652928, 0.016142365,  0.016142365,  0.027852703,  0.027852703,
+    0.0066977735,  0.0066977735,  0.0068652928, 0.0068652928, 0.016142365,  0.016142365,  0.027852703,  0.027852703,
+    0.021900313,   0.021900313,   0.01835954,   0.01835954,   0.027852703,  0.027852703,  0.060083967,  0.060083967,
+    0.021900313,   0.021900313,   0.01835954,   0.01835954,   0.027852703,  0.027852703,  0.060083967,  0.060083967,
+    0.0076058088,  0.0071930392,  0.0159935,    0.0159935,    0.035495624,  0.035495624,  0.059421029,  0.059421029,
+    0.0071930392,  0.012544631,   0.0159935,    0.0159935,    0.035495624,  0.035495624,  0.059421029,  0.059421029,
+    0.0159935,     0.0159935,     0.080566496,  0.080566496,  0.065065965,  0.065065965,  0.13197067,   0.13197067,
+    0.0159935,     0.0159935,     0.080566496,  0.080566496,  0.065065965,  0.065065965,  0.13197067,   0.13197067,
+    0.035495624,   0.035495624,   0.065065965,  0.065065965,  0.06381201,   0.06381201,   0.5660603,    0.5660603,
+    0.035495624,   0.035495624,   0.065065965,  0.065065965,  0.06381201,   0.06381201,   0.5660603,    0.5660603,
+    0.059421029,   0.059421029,   0.13197067,   0.13197067,   0.5660603,    0.5660603,    3.9365194,    3.9365194,
+    0.059421029,   0.059421029,   0.13197067,   0.13197067,   0.5660603,    0.5660603,    3.9365194,    3.9365194};
+
+const float dequant0_matrix8x8[3][64] = {
+    0.0002909539,  0.00054209016, 0.00067740923, 0.00074249663, 0.00077226211, 0.00082013884, 0.00074831746,
+    0.00036888735, 0.00054209016, 0.00083686807, 0.00095243513, 0.0010254194,  0.0010785819,  0.001144354,
+    0.0010148444,  0.00050194381, 0.00067740923, 0.00095243513, 0.0010309708,  0.0010634598,  0.0011255582,
+    0.0011620984,  0.00093262421, 0.00046117156, 0.00074249663, 0.0010254194,  0.0010634598,  0.0011365352,
+    0.0011997225,  0.001200883,   0.00083138247, 0.0004105276,  0.00077226211, 0.0010785819,  0.0011255582,
+    0.0011997225,  0.0012587409,  0.0010503692,  0.00069899118, 0.00043283266, 0.00082013884, 0.001144354,
+    0.0011620984,  0.001200883,   0.0010503692,  0.00081462093, 0.00054313598, 0.00079451699, 0.00074831746,
+    0.0010148444,  0.00093262421, 0.00083138247, 0.00069899118, 0.00054313598, 0.00090717152, 0.0014429828,
+    0.00036888735, 0.00050194381, 0.00046117156, 0.0004105276,  0.00043283266, 0.00079451699, 0.0014429828,
+    0.0021368233,  0.0018947646,  0.0020298155,  0.0018158669,  0.0023109741,  0.002771968,   0.0028875093,
+    0.0026061691,  0.0014496285,  0.0020298155,  0.0033498062,  0.0029625066,  0.0040782108,  0.0047020554,
+    0.0046986658,  0.004059691,   0.0022195177,  0.0018158669,  0.0029625066,  0.003855702,   0.0039039075,
+    0.0044844504,  0.0048484555,  0.004212948,   0.002350494,   0.0023109741,  0.0040782108,  0.0039039075,
+    0.005005613,   0.0047252625,  0.0043872506,  0.0035329545,  0.0020367121,  0.002771968,   0.0047020554,
+    0.0044844504,  0.0047252625,  0.0053944732,  0.0043888669,  0.0030360108,  0.0016807605,  0.0028875093,
+    0.0046986658,  0.0048484555,  0.0043872506,  0.0043888669,  0.0039804466,  0.0027267861,  0.0015264176,
+    0.0026061691,  0.004059691,   0.004212948,   0.0035329545,  0.0030360108,  0.0027267861,  0.0022856754,
+    0.0012781189,  0.0014496285,  0.0022195177,  0.002350494,   0.0020367121,  0.0016807605,  0.0015264176,
+    0.0012781189,  0.00079561322, 0.0036905503,  0.0059465133,  0.014704376,   0.0070739356,  0.0033689756,
+    0.0045039994,  0.0049362411,  0.0014067882,  0.0059465133,  0.02911225,    0.061591174,   0.029919557,
+    0.013408141,   0.01439747,    0.012449885,   0.0032443525,  0.014704376,   0.061591174,   0.026697377,
+    0.021077724,   0.019806897,   0.024742229,   0.018026156,   0.0048673735,  0.0070739356,  0.029919557,
+    0.021077724,   0.012372882,   0.020340675,   0.029437196,   0.013461344,   0.0039864881,  0.0033689756,
+    0.013408141,   0.019806897,   0.020340675,   0.01897642,    0.016170548,   0.0081333239,  0.003652758,
+    0.0045039994,  0.01439747,    0.024742229,   0.029437196,   0.016170548,   0.0070513831,  0.0037240474,
+    0.0058688065,  0.0049362411,  0.012449885,   0.018026156,   0.013461344,   0.0081333239,  0.0037240474,
+    0.0049084341,  0.0089747189,  0.0014067882,  0.0032443525,  0.0048673735,  0.0039864881,  0.003652758,
+    0.0058688065,  0.0089747189,  0.011420494};
+
+const float dequant0_matrix16x16[3][256] = {
+    8.6941407e-05, 9.6448246e-05, 0.00013085619, 0.00017933025, 0.00024836665, 0.00027373253, 0.0002828775,
+    0.00029644763, 0.0003157453,  0.00036460641, 0.00045703814, 0.00059130753, 0.0007995927,  0.0017282541,
+    0.012332106,   0.1177097,     9.6448246e-05, 6.6194349e-05, 8.9682457e-05, 0.00011046594, 0.00015460247,
+    0.00016983721, 0.00017969888, 0.00019148602, 0.00020649449, 0.00024233664, 0.00030588722, 0.00039798973,
+    0.00054070033, 0.0012416398,  0.0088538285,  0.084458858,   0.00013085619, 8.9682457e-05, 0.00010278081,
+    0.00014380792, 0.00017201646, 0.00016728821, 0.00016724656, 0.00017489056, 0.00019119326, 0.00023126998,
+    0.00029385358, 0.00038439123, 0.00052454998, 0.0014314948,  0.010118864,   0.095793746,   0.00017933025,
+    0.00011046594, 0.00014380792, 0.00016282526, 0.00017587373, 0.00018702789, 0.00018481401, 0.00018729926,
+    0.00019578976, 0.00023316547, 0.00028713772, 0.00037734731, 0.00051693874, 0.0018694391,  0.012999591,
+    0.12131298,    0.00024836665, 0.00015460247, 0.00017201646, 0.00017587373, 0.00016836988, 0.00018151697,
+    0.0001956971,  0.00020576089, 0.00022431443, 0.00026457073, 0.00032181898, 0.00040609311, 0.00053771475,
+    0.0027711354,  0.018724207,   0.171187,      0.00027373253, 0.00016983721, 0.00016728821, 0.00018702789,
+    0.00018151697, 0.00017808966, 0.00019275362, 0.00021269334, 0.00025798281, 0.00030524193, 0.00036844157,
+    0.00046176789, 0.00090772583, 0.0050451984,  0.032186501,   0.27849218,    0.0002828775,  0.00017969888,
+    0.00016724656, 0.00018481401, 0.0001956971,  0.00019275362, 0.00019311177, 0.00023000367, 0.00027816661,
+    0.00034266408, 0.00042883624, 0.00053407787, 0.0019287922,  0.010249138,   0.0628227,     0.52449799,
+    0.00029644763, 0.00019148602, 0.00017489056, 0.00018729926, 0.00020576089, 0.00021269334, 0.00023000367,
+    0.0002549284,  0.00030712178, 0.00037721035, 0.00047448673, 0.00097330485, 0.0045551551,  0.023077184,
+    0.13550578,    1.0883172,     0.0003157453,  0.00020649449, 0.00019119326, 0.00019578976, 0.00022431443,
+    0.00025798281, 0.00027816661, 0.00030712178, 0.00034762156, 0.00042543863, 0.0005989233,  0.0025047492,
+    0.011368512,   0.056990173,   0.3214075,     2.4235594,     0.00036460641, 0.00024233664, 0.00023126998,
+    0.00023316547, 0.00026457073, 0.00030524193, 0.00034266408, 0.00037721035, 0.00042543863, 0.00049277348,
+    0.0017959751,  0.0071011903,  0.030609839,   0.14635129,    0.81423062,    4.1576195,     0.00045703814,
+    0.00030588722, 0.00029385358, 0.00028713772, 0.00032181898, 0.00036844157, 0.00042883624, 0.00047448673,
+    0.0005989233,  0.0017959751,  0.0058984202,  0.022078902,   0.090429001,   0.41230315,    1.8386297,
+    7.2313967,     0.00059130753, 0.00039798973, 0.00038439123, 0.00037734731, 0.00040609311, 0.00046176789,
+    0.00053407787, 0.00097330485, 0.0025047492,  0.0071011903,  0.022078902,   0.075213447,   0.29305896,
+    1.2194115,     3.4551265,     13.306986,     0.0007995927,  0.00054070033, 0.00052454998, 0.00051693874,
+    0.00053771475, 0.00090772583, 0.0019287922,  0.0045551551,  0.011368512,   0.030609839,   0.090429001,
+    0.29305896,    1.0477177,     2.5220733,     6.9886661,     26.357689,     0.0017282541,  0.0012416398,
+    0.0014314948,  0.0018694391,  0.0027711354,  0.0050451984,  0.010249138,   0.023077184,   0.056990173,
+    0.14635129,    0.41230315,    1.2194115,     2.5220733,     5.7303133,     15.535759,     57.391628,
+    0.012332106,   0.0088538285,  0.010118864,   0.012999591,   0.018724207,   0.032186501,   0.0628227,
+    0.13550578,    0.3214075,     0.81423062,    1.8386297,     3.4551265,     6.9886661,     15.535759,
+    39.925846,     144.52841,     0.1177097,     0.084458858,   0.095793746,   0.12131298,    0.171187,
+    0.27849218,    0.52449799,    1.0883172,     2.4235594,     4.1576195,     7.2313967,     13.306986,
+    26.357689,     57.391628,     144.52841,     497.76062,     0.00039600098, 0.00034607144, 0.00040833891,
+    0.00048667059, 0.0005861785,  0.00074920553, 0.00098458503, 0.0013121488,  0.0017772652,  0.0022973588,
+    0.0028583913,  0.0036706869,  0.0049268291,  0.0072197518,  0.012596095,   0.029396452,   0.00034607144,
+    0.00024747173, 0.00027387534, 0.0003365741,  0.00040938996, 0.0005285684,  0.00069563533, 0.00092797622,
+    0.0012577978,  0.0016151961,  0.0020118796,  0.0025859487,  0.0034734753,  0.0051075728,  0.0089140097,
+    0.020809252,   0.00040833891, 0.00027387534, 0.00031539286, 0.00034493519, 0.00041945101, 0.00054777943,
+    0.0007227718,  0.00096404023, 0.001303279,   0.0016352652,  0.0020375939,  0.0026197163,  0.0035195949,
+    0.0052199606,  0.0091060577,  0.021249145,   0.00048667059, 0.0003365741,  0.00034493519, 0.00041010656,
+    0.00048113358, 0.00058858463, 0.00077134289, 0.0010256943,  0.0013762834,  0.0016811473,  0.0021005531,
+    0.0026996068,  0.0036257072,  0.0054506757,  0.009496971,   0.022137705,   0.0005861785,  0.00040938996,
+    0.00041945101, 0.00048113358, 0.00059573731, 0.00071104773, 0.00087649847, 0.0011293787,  0.0014301358,
+    0.0017464008,  0.002182483,   0.0028128312,  0.0037867145,  0.0058138543,  0.010114919,   0.02353812,
+    0.00074920553, 0.0005285684,  0.00054777943, 0.00058858463, 0.00071104773, 0.0008852075,  0.0010738227,
+    0.0013219716,  0.001527268,   0.0018505802,  0.0023102255,  0.0029750105,  0.0040830439,  0.0062906044,
+    0.01094307,    0.025472319,   0.00098458503, 0.00069563533, 0.0007227718,  0.00077134289, 0.00087649847,
+    0.0010738227,  0.0013484384,  0.0015134697,  0.0017388589,  0.0020487665,  0.0024944625,  0.0032075676,
+    0.0045332746,  0.0069634146,  0.012081781,   0.028057842,   0.0013121488,  0.00092797622, 0.00096404023,
+    0.0010256943,  0.0011293787,  0.0013219716,  0.0015134697,  0.0017614318,  0.0020141515,  0.0023621877,
+    0.0028521135,  0.0036510243,  0.0051573389,  0.0078939945,  0.013653559,   0.031619865,   0.0017772652,
+    0.0012577978,  0.001303279,   0.0013762834,  0.0014301358,  0.001527268,   0.0017388589,  0.0020141515,
+    0.002376725,   0.0027760784,  0.0033578034,  0.0044371258,  0.0061621098,  0.0092011821,  0.015820401,
+    0.03678393,    0.0022973588,  0.0016151961,  0.0016352652,  0.0016811473,  0.0017464008,  0.0018505802,
+    0.0020487665,  0.0023621877,  0.0027760784,  0.0033338354,  0.0042134481,  0.0055337516,  0.0076407115,
+    0.011347383,   0.019103128,   0.049214453,   0.0028583913,  0.0020118796,  0.0020375939,  0.0021005531,
+    0.002182483,   0.0023102255,  0.0024944625,  0.0028521135,  0.0033578034,  0.0042134481,  0.0054442864,
+    0.0071092523,  0.0097620692,  0.014422105,   0.02557276,    0.070264891,   0.0036706869,  0.0025859487,
+    0.0026197163,  0.0026996068,  0.0028128312,  0.0029750105,  0.0032075676,  0.0036510243,  0.0044371258,
+    0.0055337516,  0.0071092523,  0.0094711715,  0.012937739,   0.019291667,   0.038823757,   0.10550891,
+    0.0049268291,  0.0034734753,  0.0035195949,  0.0036257072,  0.0037867145,  0.0040830439,  0.0045332746,
+    0.0051573389,  0.0061621098,  0.0076407115,  0.0097620692,  0.012937739,   0.017999291,   0.031373069,
+    0.062424377,   0.167826,      0.0072197518,  0.0051075728,  0.0052199606,  0.0054506757,  0.0058138543,
+    0.0062906044,  0.0069634146,  0.0078939945,  0.0092011821,  0.011347383,   0.014422105,   0.019291667,
+    0.031373069,   0.055231433,   0.10869808,    0.2891748,     0.012596095,   0.0089140097,  0.0091060577,
+    0.009496971,   0.010114919,   0.01094307,    0.012081781,   0.013653559,   0.015820401,   0.019103128,
+    0.02557276,    0.038823757,   0.062424377,   0.10869808,    0.21590571,    0.56856865,    0.029396452,
+    0.020809252,   0.021249145,   0.022137705,   0.02353812,    0.025472319,   0.028057842,   0.031619865,
+    0.03678393,    0.049214453,   0.070264891,   0.10550891,    0.167826,      0.2891748,     0.56856865,
+    1.5101935,     0.00069132249, 0.0010815193,  0.0017454268,  0.0028453015,  0.0046874369,  0.0047010141,
+    0.0040570637,  0.0035506648,  0.0031582527,  0.0039758035,  0.006985045,   0.012666198,   0.024005895,
+    0.057713043,   0.2467743,     1.4114686,     0.0010815193,  0.00067276059, 0.0012984627,  0.0022066862,
+    0.0036029078,  0.0033287695,  0.0028739227,  0.0025160075,  0.0022385314,  0.0029198492,  0.0051128156,
+    0.0092460476,  0.017484134,   0.042972263,   0.18313001,    1.0444078,     0.0017454268,  0.0012984627,
+    0.0013351721,  0.0023470016,  0.0033850435,  0.0030727212,  0.0027693673,  0.0024766023,  0.0022112229,
+    0.0032111497,  0.0055785025,  0.010022596,   0.018849386,   0.04953425,    0.20919789,    1.1837591,
+    0.0028453015,  0.0022066862,  0.0023470016,  0.0027035389,  0.0027250734,  0.0026314682,  0.0024308208,
+    0.0022387633,  0.0021502564,  0.0036681695,  0.0064038844,  0.011393034,   0.021249678,   0.062385138,
+    0.25973764,    1.451555,      0.0046874369,  0.0036029078,  0.0033850435,  0.0027250734,  0.0021667613,
+    0.0021522474,  0.0020895076,  0.0019863129,  0.0025009788,  0.0042195194,  0.007313136,   0.013109547,
+    0.024602434,   0.085390151,   0.34971833,    1.9222946,     0.0047010141,  0.0033287695,  0.0030727212,
+    0.0026314682,  0.0021522474,  0.0017763178,  0.0017569333,  0.0018497383,  0.0030575816,  0.0051033041,
+    0.0087390682,  0.015506574,   0.03419143,    0.12187894,    0.49420494,    2.6978834,     0.0040570637,
+    0.0028739227,  0.0027693673,  0.0024308208,  0.0020895076,  0.0017569333,  0.0014928841,  0.0023629696,
+    0.0038356045,  0.0063890605,  0.010930002,   0.01915928,    0.054220565,   0.18829457,    0.74606138,
+    3.9897807,     0.0035506648,  0.0025160075,  0.0024766023,  0.0022387633,  0.0019863129,  0.0018497383,
+    0.0023629696,  0.0031767336,  0.0050553046,  0.0082801571,  0.013994551,   0.029805269,   0.093009129,
+    0.31386197,    1.2120669,     6.3341594,     0.0031582527,  0.0022385314,  0.0022112229,  0.0021502564,
+    0.0025009788,  0.0030575816,  0.0038356045,  0.0050553046,  0.0069825784,  0.011235781,   0.019635202,
+    0.055844314,   0.16970058,    0.56211448,    2.1147799,     10.700346,     0.0039758035,  0.0029198492,
+    0.0032111497,  0.0036681695,  0.0042195194,  0.0051033041,  0.0063890605,  0.0082801571,  0.011235781,
+    0.015955226,   0.04111724,    0.112705,      0.33115557,    1.0637885,     3.9274936,     17.23595,
+    0.006985045,   0.0051128156,  0.0055785025,  0.0064038844,  0.007313136,   0.0087390682,  0.010930002,
+    0.013994551,   0.019635202,   0.04111724,    0.092569597,   0.24464482,    0.69495994,    2.1639838,
+    7.3197994,     29.072956,     0.012666198,   0.0092460476,  0.010022596,   0.011393034,   0.013109547,
+    0.015506574,   0.01915928,    0.029805269,   0.055844314,   0.112705,      0.24464482,    0.57213938,
+    1.5720237,     4.6755047,     13.410465,     52.19664,      0.024005895,   0.017484134,   0.018849386,
+    0.021249678,   0.024602434,   0.03419143,    0.054220565,   0.093009129,   0.16970058,    0.33115557,
+    0.69495994,    1.5720237,     3.8629961,     9.4337931,     26.474764,     100.96515,     0.057713043,
+    0.042972263,   0.04953425,    0.062385138,   0.085390151,   0.12187894,    0.18829457,    0.31386197,
+    0.56211448,    1.0637885,     2.1639838,     4.6755047,     9.4337931,     20.937027,     57.505886,
+    214.89687,     0.2467743,     0.18313001,    0.20919789,    0.25973764,    0.34971833,    0.49420494,
+    0.74606138,    1.2120669,     2.1147799,     3.9274936,     7.3197994,     13.410465,     26.474764,
+    57.505886,     144.56065,     529.50238,     1.4114686,     1.0444078,     1.1837591,     1.451555,
+    1.9222946,     2.6978834,     3.9897807,     6.3341594,     10.700346,     17.23595,      29.072956,
+    52.19664,      100.96515,     214.89687,     529.50238,     1785.9767};
+
+const float dequant0_matrix32x32[3][1024] = {
+    3.6023353e-05, 4.7485129e-05, 5.8148311e-05, 7.1379516e-05, 8.7837536e-05, 0.00010836149, 0.00013402395,
+    0.00013984936, 0.00013758539, 0.00013574096, 0.00013431697, 0.00013332062, 0.00013276614, 0.00013034284,
+    0.00012594002, 0.00012217819, 0.0001190508,  0.00011656478, 0.00011474322, 0.00012188913, 0.00016943525,
+    0.00023737965, 0.00033562747, 0.00047973637, 0.00069487351, 0.001023312,   0.0020070006,  0.0041111726,
+    0.0087794187,  0.020014564,   0.051372338,   0.17595468,    4.7485129e-05, 2.3922055e-05, 3.0301986e-05,
+    4.5417441e-05, 7.1285736e-05, 9.0684785e-05, 0.00010896237, 0.00010955311, 0.00010636745, 0.00010387948,
+    0.00010196272, 0.00010054386, 9.9582219e-05, 9.7177792e-05, 9.3538241e-05, 9.0446316e-05, 8.7878834e-05,
+    8.5826992e-05, 8.4297106e-05, 9.0152811e-05, 0.00012504045, 0.00017483013, 0.00024673855, 0.00035209421,
+    0.00050921342, 0.00074884825, 0.0014747089,  0.0030166102,  0.0064336546,  0.014649257,   0.03755879,
+    0.12850724,    5.8148311e-05, 3.0301986e-05, 3.1925785e-05, 6.2082814e-05, 4.8148726e-05, 5.9288523e-05,
+    8.7128392e-05, 9.8823781e-05, 0.00010840771, 0.00011285518, 0.00010984355, 0.00010757463, 0.00010594203,
+    0.00010245113, 9.8243203e-05, 9.4688156e-05, 9.1741233e-05, 8.9377958e-05, 8.759318e-05,  9.59725e-05,
+    0.00013270826, 0.00018504279, 0.00026050268, 0.00037089363, 0.00053529057, 0.00079188979, 0.0015696345,
+    0.0032036791,  0.006818593,   0.015496098,   0.039659217,   0.13546796,    7.1379516e-05, 4.5417441e-05,
+    6.2082814e-05, 4.2815278e-05, 7.9491496e-05, 8.3820349e-05, 6.9794878e-05, 6.4144406e-05, 7.4116695e-05,
+    8.3001447e-05, 9.094907e-05,  9.814732e-05,  0.00010479001, 0.00010772577, 0.00010309856, 9.9065626e-05,
+    9.5727934e-05, 9.3044677e-05, 9.0998714e-05, 0.00010392844, 0.00014316791, 0.00019894708, 0.00027921025,
+    0.0003964066,  0.00057063322, 0.00086592074, 0.0017108019,  0.0034813138,  0.0073888344,  0.016748458,
+    0.042760596,   0.14573096,    8.7837536e-05, 7.1285736e-05, 4.8148726e-05, 7.9491496e-05, 5.7702688e-05,
+    9.3768082e-05, 0.00010685319, 8.267763e-05,  6.7383102e-05, 5.9459566e-05, 6.7216999e-05, 7.4507858e-05,
+    8.1035359e-05, 8.4555511e-05, 8.7558867e-05, 9.0202593e-05, 9.2635317e-05, 9.4999879e-05, 9.4486371e-05,
+    0.00011441334, 0.00015691627, 0.00021717971, 0.00030368852, 0.00042972399, 0.00061670481, 0.00097007764,
+    0.0019088162,  0.0038696625,  0.0081844097,  0.018491486,   0.047067408,   0.15995315,    0.00010836149,
+    9.0684785e-05, 5.9288523e-05, 8.3820349e-05, 9.3768082e-05, 6.4679705e-05, 8.5727108e-05, 0.00010771409,
+    9.3236245e-05, 7.7160919e-05, 6.5960317e-05, 5.7894802e-05, 6.2323947e-05, 6.6301116e-05, 6.9845759e-05,
+    7.306334e-05,  7.6064927e-05, 7.8965917e-05, 8.1887767e-05, 0.00011299414, 0.00016209931, 0.00023365305,
+    0.00033505241, 0.00047231393, 0.00067547208, 0.0011128781,  0.0021792827,  0.0043982645,  0.0092638051,
+    0.020849204,   0.052876592,   0.16858153,    0.00013402395, 0.00010896237, 8.7128392e-05, 6.9794878e-05,
+    0.00010685319, 8.5727108e-05, 6.3228545e-05, 8.0343605e-05, 9.8303324e-05, 0.00010209033, 8.5850937e-05,
+    7.4151001e-05, 6.3394582e-05, 5.5238434e-05, 5.5796354e-05, 5.9204292e-05, 6.2438397e-05, 6.558458e-05,
+    7.3094248e-05, 0.00010504625, 0.00015138833, 0.00021912932, 0.0003191361,  0.00046864239, 0.00069576764,
+    0.0012598214,  0.0025407695,  0.0051095891,  0.010710777,   0.023998523,   0.060610231,   0.17021742,
+    0.00013984936, 0.00010955311, 9.8823781e-05, 6.4144406e-05, 8.267763e-05,  0.00010771409, 8.0343605e-05,
+    6.2136249e-05, 7.660492e-05,  9.1846341e-05, 0.00010756763, 9.1990696e-05, 7.7838296e-05, 6.7140754e-05,
+    5.8907954e-05, 5.2481202e-05, 5.1350049e-05, 5.4532175e-05, 6.8726011e-05, 9.916423e-05,  0.00014344335,
+    0.0002083459,  0.00030440185, 0.0004483306,  0.00066744082, 0.0012999001,  0.0026211354,  0.0054349718,
+    0.011737515,   0.027037593,   0.070070185,   0.17149711,    0.00013758539, 0.00010636745, 0.00010840771,
+    7.4116695e-05, 6.7383102e-05, 9.3236245e-05, 9.8303324e-05, 7.660492e-05,  6.1395869e-05, 7.401441e-05,
+    8.6448825e-05, 9.7273121e-05, 9.4305702e-05, 8.0745209e-05, 7.0316069e-05, 6.2186271e-05, 5.5784836e-05,
+    5.0714323e-05, 6.5730928e-05, 9.512106e-05,  0.00013798039, 0.00020094345, 0.00029432218, 0.00043450613,
+    0.00069452584, 0.001371033,   0.0027620618,  0.0057223607,  0.012348444,   0.02842387,    0.073611557,
+    0.15523638,    0.00013574096, 0.00010387948, 0.00011285518, 8.3001447e-05, 5.9459566e-05, 7.7160919e-05,
+    0.00010209033, 9.1846341e-05, 7.401441e-05,  6.0548504e-05, 6.9839843e-05, 7.9154248e-05, 8.836521e-05,
+    9.5998839e-05, 8.3152358e-05, 7.3129027e-05, 6.5233886e-05, 6.8411915e-05, 8.4194486e-05, 0.00010528413,
+    0.00013480806, 0.00019668374, 0.00028859731, 0.00042678794, 0.00075014768, 0.0014782182,  0.0029731498,
+    0.0061504068,  0.013253534,   0.030467277,   0.078465275,   0.13781379,    0.00013431697, 0.00010196272,
+    0.00010984355, 9.094907e-05,  6.7216999e-05, 6.5960317e-05, 8.5850937e-05, 0.00010756763, 8.6448825e-05,
+    6.9839843e-05, 5.7402325e-05, 6.5355809e-05, 7.3386436e-05, 8.1423859e-05, 8.943578e-05,  8.530372e-05,
+    7.7794233e-05, 9.3046023e-05, 0.00011322236, 0.00014006831, 0.00017609817, 0.00022499407, 0.00029225985,
+    0.00042835347, 0.00082884677, 0.001629176,   0.0032691469,  0.0067481515,  0.014512495,   0.033299044,
+    0.06922318,    0.12214311,    0.00013332062, 0.00010054386, 0.00010757463, 9.814732e-05,  7.4507858e-05,
+    5.7894802e-05, 7.4151001e-05, 9.1990696e-05, 9.7273121e-05, 7.9154248e-05, 6.5355809e-05, 5.4758104e-05,
+    6.1723047e-05, 6.8813068e-05, 7.5996257e-05, 8.3270323e-05, 0.00010768669, 0.00012740848, 0.00015341518,
+    0.00018788269, 0.00023393871, 0.00029615185, 0.00038133378, 0.00056585873, 0.00097444904, 0.0018352539,
+    0.0036716289,  0.0075579025,  0.016211953,   0.037108809,   0.061003428,   0.10813522,    0.00013276614,
+    9.9582219e-05, 0.00010594203, 0.00010479001, 8.1035359e-05, 6.2323947e-05, 6.3394582e-05, 7.7838296e-05,
+    9.4305702e-05, 8.836521e-05,  7.3386436e-05, 6.1723047e-05, 5.2579268e-05, 5.8812744e-05, 6.5216627e-05,
+    8.2553663e-05, 0.00011746787, 0.00016737721, 0.00020925321, 0.00025386584, 0.00031323297, 0.00039307572,
+    0.00050304458, 0.0008371906,  0.0014246708,  0.0024851307,  0.0044618333,  0.0086401487,  0.018475281,
+    0.04013985,    0.053734157,   0.095683835,   0.00013034284, 9.7177792e-05, 0.00010245113, 0.00010772577,
+    8.4555511e-05, 6.6301116e-05, 5.5238434e-05, 6.7140754e-05, 8.0745209e-05, 9.5998839e-05, 8.1423859e-05,
+    6.8813068e-05, 5.8812744e-05, 5.0840626e-05, 6.2802093e-05, 8.8842957e-05, 0.00012588511, 0.0001787434,
+    0.00025448241, 0.00034528313, 0.00042242263, 0.00052573788, 0.00076630549, 0.0012597241,  0.0021186692,
+    0.0036545296,  0.0064916844,  0.01195686,    0.023111777,   0.035196837,   0.047338061,   0.084672391,
+    0.00012594002, 9.3538241e-05, 9.8243203e-05, 0.00010309856, 8.7558867e-05, 6.9845759e-05, 5.5796354e-05,
+    5.8907954e-05, 7.0316069e-05, 8.3152358e-05, 8.943578e-05,  7.5996257e-05, 6.5216627e-05, 6.2802093e-05,
+    6.9284266e-05, 9.7446515e-05, 0.00013739559, 0.00019426746, 0.00027559226, 0.00039252552, 0.0005617991,
+    0.00074624532, 0.0011858167,  0.0019260488,  0.003202145,   0.0054626567,  0.0096012847,  0.017506059,
+    0.033511735,   0.036903262,   0.045523666,   0.075064376,   0.00012217819, 9.0446316e-05, 9.4688156e-05,
+    9.9065626e-05, 9.0202593e-05, 7.306334e-05,  5.9204292e-05, 5.2481202e-05, 6.2186271e-05, 7.3129027e-05,
+    8.530372e-05,  8.3270323e-05, 8.2553663e-05, 8.8842957e-05, 9.7446515e-05, 0.00010886286, 0.00015265013,
+    0.00021480687, 0.00030346535, 0.00043066181, 0.00061443396, 0.0010896012,  0.0018624857,  0.0029899392,
+    0.0049151219,  0.0082942424,  0.014426572,   0.026041286,   0.035969965,   0.039324421,   0.048491772,
+    0.079925604,   0.0001190508,  8.7878834e-05, 9.1741233e-05, 9.5727934e-05, 9.2635317e-05, 7.6064927e-05,
+    6.2438397e-05, 5.1350049e-05, 5.5784836e-05, 6.5233886e-05, 7.7794233e-05, 0.00010768669, 0.00011746787,
+    0.00012588511, 0.00013739559, 0.00015265013, 0.00017256087, 0.00024155903, 0.00033968751, 0.00048010345,
+    0.00079832785, 0.0014242902,  0.0025716727,  0.0047059646,  0.0076574814,  0.012785331,   0.022011017,
+    0.03745044,    0.038278699,   0.041845389,   0.051593613,   0.085022964,   0.00011656478, 8.5826992e-05,
+    8.9377958e-05, 9.3044677e-05, 9.4999879e-05, 7.8965917e-05, 6.558458e-05,  5.4532175e-05, 5.0714323e-05,
+    6.8411915e-05, 9.3046023e-05, 0.00012740848, 0.00016737721, 0.0001787434,  0.00019426746, 0.00021480687,
+    0.00024155903, 0.00027618342, 0.00038645029, 0.00061551511, 0.0010759425,  0.0019028325,  0.0034076818,
+    0.0061881426,  0.011418056,   0.020000016,   0.034087602,   0.039794367,   0.040685836,   0.044485144,
+    0.054854382,   0.090400733,   0.00011474322, 8.4297106e-05, 8.759318e-05,  9.0998714e-05, 9.4486371e-05,
+    8.1887767e-05, 7.3094248e-05, 6.8726011e-05, 6.5730928e-05, 8.4194486e-05, 0.00011322236, 0.00015341518,
+    0.00020925321, 0.00025448241, 0.00027559226, 0.00030346535, 0.00033968751, 0.00038645029, 0.00050021743,
+    0.0008559238,  0.0014819041,  0.0025972966,  0.0046121944,  0.0083091557,  0.015217415,   0.028416166,
+    0.043130342,   0.042246785,   0.043216646,   0.04727279,    0.058311719,   0.096123755,   0.00012188913,
+    9.0152811e-05, 9.59725e-05,   0.00010392844, 0.00011441334, 0.00011299414, 0.00010504625, 9.916423e-05,
+    9.512106e-05,  0.00010528413, 0.00014006831, 0.00018788269, 0.00025386584, 0.00034528313, 0.00039252552,
+    0.00043066181, 0.00048010345, 0.00061551511, 0.0008559238,  0.0012161328,  0.0020851884,  0.0036213107,
+    0.0063752397,  0.011392047,   0.02070329,    0.03612854,    0.039032985,   0.043199129,   0.04590892,
+    0.050251149,   0.062020246,   0.10228495,    0.00016943525, 0.00012504045, 0.00013270826, 0.00014316791,
+    0.00015691627, 0.00016209931, 0.00015138833, 0.00014344335, 0.00013798039, 0.00013480806, 0.00017609817,
+    0.00023393871, 0.00031323297, 0.00042242263, 0.0005617991,  0.00061443396, 0.00079832785, 0.0010759425,
+    0.0014819041,  0.0020851884,  0.0029972054,  0.0051572081,  0.0089998404,  0.015948871,   0.028757192,
+    0.032310378,   0.035030056,   0.038904343,   0.044836834,   0.053481963,   0.066058092,   0.10901628,
+    0.00023737965, 0.00017483013, 0.00018504279, 0.00019894708, 0.00021717971, 0.00023365305, 0.00021912932,
+    0.0002083459,  0.00020094345, 0.00019668374, 0.00022499407, 0.00029615185, 0.00039307572, 0.00052573788,
+    0.00074624532, 0.0010896012,  0.0014242902,  0.0019028325,  0.0025972966,  0.0036213107,  0.0051572081,
+    0.0075033265,  0.012978748,   0.02280736,    0.027205519,   0.029114837,   0.03166876,    0.03528671,
+    0.040800784,   0.050143749,   0.069101594,   0.11650559,    0.00033562747, 0.00024673855, 0.00026050268,
+    0.00027921025, 0.00030368852, 0.00033505241, 0.0003191361,  0.00030440185, 0.00029432218, 0.00028859731,
+    0.00029225985, 0.00038133378, 0.00050304458, 0.00076630549, 0.0011858167,  0.0018624857,  0.0025716727,
+    0.0034076818,  0.0046121944,  0.0063752397,  0.0089998404,  0.012978748,   0.019131092,   0.023234539,
+    0.024663156,   0.026473302,   0.028883345,   0.032282136,   0.037441988,   0.046157762,   0.063803799,
+    0.097442903,   0.00047973637, 0.00035209421, 0.00037089363, 0.0003964066,  0.00042972399, 0.00047231393,
+    0.00046864239, 0.0004483306,  0.00043450613, 0.00042678794, 0.00042835347, 0.00056585873, 0.0008371906,
+    0.0012597241,  0.0019260488,  0.0029899392,  0.0047059646,  0.0061881426,  0.0083091557,  0.011392047,
+    0.015948871,   0.02280736,    0.023234539,   0.02124157,    0.022608465,   0.024335282,   0.026626121,
+    0.029845085,   0.034716032,   0.042922091,   0.054095831,   0.071022667,   0.00069487351, 0.00050921342,
+    0.00053529057, 0.00057063322, 0.00061670481, 0.00067547208, 0.00069576764, 0.00066744082, 0.00069452584,
+    0.00075014768, 0.00082884677, 0.00097444904, 0.0014246708,  0.0021186692,  0.003202145,   0.0049151219,
+    0.0076574814,  0.011418056,   0.015217415,   0.02070329,    0.028757192,   0.027205519,   0.024663156,
+    0.022608465,   0.021008318,   0.022671077,   0.024870832,   0.027952766,   0.03260373,    0.039315179,
+    0.039317548,   0.052039407,   0.001023312,   0.00074884825, 0.00079188979, 0.00086592074, 0.00097007764,
+    0.0011128781,  0.0012598214,  0.0012999001,  0.001371033,   0.0014782182,  0.001629176,   0.0018352539,
+    0.0024851307,  0.0036545296,  0.0054626567,  0.0082942424,  0.012785331,   0.020000016,   0.028416166,
+    0.03612854,    0.032310378,   0.029114837,   0.026473302,   0.024335282,   0.022671077,   0.021478029,
+    0.023619946,   0.026613781,   0.031121623,   0.028607776,   0.028840896,   0.038476393,   0.0020070006,
+    0.0014747089,  0.0015696345,  0.0017108019,  0.0019088162,  0.0021792827,  0.0025407695,  0.0026211354,
+    0.0027620618,  0.0029731498,  0.0032691469,  0.0036716289,  0.0044618333,  0.0064916844,  0.0096012847,
+    0.014426572,   0.022011017,   0.034087602,   0.043130342,   0.039032985,   0.035030056,   0.03166876,
+    0.028883345,   0.026626121,   0.024870832,   0.023619946,   0.022920383,   0.025886349,   0.023199789,
+    0.021124862,   0.021465059,   0.028859012,   0.0041111726,  0.0030166102,  0.0032036791,  0.0034813138,
+    0.0038696625,  0.0043982645,  0.0051095891,  0.0054349718,  0.0057223607,  0.0061504068,  0.0067481515,
+    0.0075579025,  0.0086401487,  0.01195686,    0.017506059,   0.026041286,   0.03745044,    0.039794367,
+    0.042246785,   0.043199129,   0.038904343,   0.03528671,    0.032282136,   0.029845085,   0.027952766,
+    0.026613781,   0.025886349,   0.020038661,   0.017388897,   0.015956772,   0.016338442,   0.02213328,
+    0.0087794187,  0.0064336546,  0.006818593,   0.0073888344,  0.0081844097,  0.0092638051,  0.010710777,
+    0.011737515,   0.012348444,   0.013253534,   0.014512495,   0.016211953,   0.018475281,   0.023111777,
+    0.033511735,   0.035969965,   0.038278699,   0.040685836,   0.043216646,   0.04590892,    0.044836834,
+    0.040800784,   0.037441988,   0.034716032,   0.03260373,    0.031121623,   0.023199789,   0.017388897,
+    0.013509996,   0.012490984,   0.01288554,    0.017585071,   0.020014564,   0.014649257,   0.015496098,
+    0.016748458,   0.018491486,   0.020849204,   0.023998523,   0.027037593,   0.02842387,    0.030467277,
+    0.033299044,   0.037108809,   0.04013985,    0.035196837,   0.036903262,   0.039324421,   0.041845389,
+    0.044485144,   0.04727279,    0.050251149,   0.053481963,   0.050143749,   0.046157762,   0.042922091,
+    0.039315179,   0.028607776,   0.021124862,   0.015956772,   0.012490984,   0.010380118,   0.010786027,
+    0.014826181,   0.051372338,   0.03755879,    0.039659217,   0.042760596,   0.047067408,   0.052876592,
+    0.060610231,   0.070070185,   0.073611557,   0.078465275,   0.06922318,    0.061003428,   0.053734157,
+    0.047338061,   0.045523666,   0.048491772,   0.051593613,   0.054854382,   0.058311719,   0.062020246,
+    0.066058092,   0.069101594,   0.063803799,   0.054095831,   0.039317548,   0.028840896,   0.021465059,
+    0.016338442,   0.01288554,    0.010786027,   0.010110092,   0.013994874,   0.17595468,    0.12850724,
+    0.13546796,    0.14573096,    0.15995315,    0.16858153,    0.17021742,    0.17149711,    0.15523638,
+    0.13781379,    0.12214311,    0.10813522,    0.095683835,   0.084672391,   0.075064376,   0.079925604,
+    0.085022964,   0.090400733,   0.096123755,   0.10228495,    0.10901628,    0.11650559,    0.097442903,
+    0.071022667,   0.052039407,   0.038476393,   0.028859012,   0.02213328,    0.017585071,   0.014826181,
+    0.013994874,   0.017534142,   0.00017707126, 0.0001480639,  0.00016780054, 0.00019063143, 0.00021710296,
+    0.00024787075, 0.00028372489, 0.00031671999, 0.00035102671, 0.00039015003, 0.0004349151,  0.00048632181,
+    0.0005455903,  0.00060966006, 0.00067817559, 0.00075744296, 0.0008497017,  0.00095781003, 0.0010854676,
+    0.0012357334,  0.0014086879,  0.0016184739,  0.0018765966,  0.0021997194,  0.002612893,   0.0031555507,
+    0.0040466604,  0.0053533185,  0.0073829745,  0.010869758,   0.018018194,   0.039855745,   0.0001480639,
+    0.00010538891, 0.00010069195, 0.00011690574, 0.0001371219,  0.00016000759, 0.00018614267, 0.00020961632,
+    0.00023437649, 0.0002622726,  0.00029394444, 0.00033013042, 0.00037170923, 0.00041637814, 0.0004643999,
+    0.00051986717, 0.00058435171, 0.00065985468, 0.00074896315, 0.00085367227, 0.00097437395, 0.0011207485,
+    0.0013008264,  0.0015262378,  0.0018144646,  0.002193026,   0.0028165665,  0.0037284214,  0.0051450753,
+    0.0075791497,  0.012570018,   0.027817938,   0.00016780054, 0.00010069195, 0.00012605802, 0.0001268106,
+    0.0001345543,  0.00015255461, 0.00017769246, 0.00019953751, 0.0002237842,  0.00025130101, 0.00028306333,
+    0.00031920706, 0.00036062195, 0.00040437246, 0.00045215141, 0.00050725805, 0.00057125726, 0.00064613792,
+    0.00073446817, 0.00083778438, 0.00095738034, 0.0011023845,  0.0012807549,  0.0015040152,  0.0017894866,
+    0.0021669078,  0.0027882589,  0.0036929436,  0.0050986623,  0.0075142612,  0.012467773,   0.027602749,
+    0.00019063143, 0.00011690574, 0.0001268106,  0.00015151658, 0.0001564352,  0.00016693826, 0.00017903787,
+    0.00019482298, 0.00021931141, 0.00024674702, 0.00027776035, 0.00031307945, 0.00035356692, 0.00039516465,
+    0.0004428348,  0.00049778185, 0.00056154456, 0.00063610583, 0.00072402402, 0.00082609872, 0.00094506511,
+    0.0010892812,  0.0012666637,  0.0014886761,  0.0017725482,  0.0021565645,  0.0027762495,  0.0036786217,
+    0.0050809076,  0.0074908487,  0.012433167,   0.02753487,    0.00021710296, 0.0001371219,  0.0001345543,
+    0.0001564352,  0.00018301635, 0.00019103213, 0.00019891099, 0.00021062233, 0.00022562918, 0.00024527154,
+    0.00027648974, 0.00031203785, 0.00035209427, 0.0003930493,  0.00043994986, 0.00049402053, 0.00055679504,
+    0.00063022313, 0.00071764801, 0.00081863307, 0.00093744061, 0.001081446,   0.0012585566,  0.0014802213,
+    0.0017636477,  0.0021591801,  0.0027805718,  0.0036855093,  0.0050918958,  0.0075090546,  0.012466465,
+    0.02761494,    0.00024787075, 0.00016000759, 0.00015255461, 0.00016693826, 0.00019103213, 0.0002155252,
+    0.00022246495, 0.00023313538, 0.00024776958, 0.00026611183, 0.0002883035,  0.00031473255, 0.00035249389,
+    0.00039379625, 0.00044109311, 0.000495616,   0.00055890926, 0.00063293753, 0.00072023802, 0.00081925828,
+    0.00093718304, 0.0010801372,  0.0012565156,  0.0014787434,  0.0017628924,  0.002175004,   0.0028015445,
+    0.0037140276,  0.0051322081,  0.0075697373,  0.012569092,   0.02741445,    0.00028372489, 0.00018614267,
+    0.00017769246, 0.00017903787, 0.00019891099, 0.00022246495, 0.00024936345, 0.00025970236, 0.0002738453,
+    0.00029204748, 0.00031465336, 0.00034174335, 0.00036875275, 0.00040052799, 0.00044537513, 0.00050056411,
+    0.00056463969, 0.00063958851, 0.00072704646, 0.00082664948, 0.00094591256, 0.001090488,   0.0012683113,
+    0.0014908726,  0.0017754439,  0.0022075414,  0.0028399569,  0.0037649933,  0.0052029593,  0.0076745213,
+    0.012743721,   0.026927261,   0.00031671999, 0.00020961632, 0.00019953751, 0.00019482298, 0.00021062233,
+    0.00023313538, 0.00025970236, 0.00029003757, 0.00030429225, 0.00032265869, 0.00034540024, 0.00037039586,
+    0.00039815414, 0.00043094621, 0.00046952089, 0.00051485293, 0.00057416997, 0.00065037975, 0.0007375839,
+    0.00083874189, 0.00095987355, 0.0011067194,  0.0012873393,  0.0015134049,  0.0018024599,  0.0022644065,
+    0.002912289,   0.0038554242,  0.005320638,   0.0078380974,  0.01299985,    0.026449621,   0.00035102671,
+    0.00023437649, 0.0002237842,  0.00021931141, 0.00022562918, 0.00024776958, 0.0002738453,  0.00030429225,
+    0.00033918605, 0.00035813198, 0.000380043,   0.00040352362, 0.00043199339, 0.00046604805, 0.00050621247,
+    0.00055350625, 0.00060924928, 0.00067515491, 0.00075265532, 0.00085583632, 0.00097940431, 0.0011292148,
+    0.0013134903,  0.0015441383,  0.0018575913,  0.0023375514,  0.00300512,    0.0039767851,  0.0054861424,
+    0.0080791777,  0.013395431,   0.026088707,   0.00039015003, 0.0002622726,  0.00025130101, 0.00024674702,
+    0.00024527154, 0.00026611183, 0.00029204748, 0.00032265869, 0.00035813198, 0.00039764532, 0.00041749049,
+    0.0004418896,  0.000471304,   0.00050638703, 0.0005484593,  0.00059807982, 0.00065663754, 0.00072369125,
+    0.00080087944, 0.00089300267, 0.0010049412,  0.0011584698,  0.0013473318,  0.0015837309,  0.0019310683,
+    0.0024286029,  0.0031204908,  0.0041273846,  0.0056912252,  0.0083775017,  0.01386874,    0.025760984,
+    0.0004349151,  0.00029394444, 0.00028306333, 0.00027776035, 0.00027648974, 0.0002883035,  0.00031465336,
+    0.00034540024, 0.000380043,   0.00041749049, 0.00046008758, 0.00048566549, 0.0005165693,  0.00055346655,
+    0.00059722661, 0.00064936228, 0.00071082846, 0.00077940908, 0.00086079625, 0.00095798075, 0.0010749884,
+    0.001217348,   0.0013928403,  0.0016348839,  0.0020207826,  0.0025396293,  0.0032609967,  0.0043105762,
+    0.0059404108,  0.0087395925,  0.013688202,   0.025454629,   0.00048632181, 0.00033013042, 0.00031920706,
+    0.00031307945, 0.00031203785, 0.00031473255, 0.00034174335, 0.00037039586, 0.00040352362, 0.0004418896,
+    0.00048566549, 0.00053564616, 0.00056836149, 0.00060747023, 0.00065390544, 0.00070886861, 0.00077067583,
+    0.00084332074, 0.0009295785,  0.0010326204,  0.001156715,   0.0013077231,  0.0014938937,  0.0017586248,
+    0.0021398899,  0.0026732364,  0.0034299041,  0.0045305812,  0.00623939,    0.0091736475,  0.013522077,
+    0.025175273,   0.0005455903,  0.00037170923, 0.00036062195, 0.00035356692, 0.00035209427, 0.00035249389,
+    0.00036875275, 0.00039815414, 0.00043199339, 0.000471304,   0.0005165693,  0.00056836149, 0.00062771526,
+    0.0006694512,  0.00071903912, 0.00077550631, 0.00084022118, 0.00091686472, 0.0010085718,  0.0011183885,
+    0.0012506677,  0.0014116542,  0.0016106677,  0.0019274375,  0.0023405962,  0.0028935277,  0.0036588546,
+    0.004792687,   0.0065952893,  0.0095673967,  0.013373735,   0.024929052,   0.00060966006, 0.00041637814,
+    0.00040437246, 0.00039516465, 0.0003930493,  0.00039379625, 0.00040052799, 0.00043094621, 0.00046604805,
+    0.00050638703, 0.00055346655, 0.00060747023, 0.0006694512,  0.00074076012, 0.000792315,   0.0008509137,
+    0.00092024123, 0.0010023764,  0.0011000349,  0.0012170941,  0.0013588334,  0.001531342,   0.0017800233,
+    0.0021256125,  0.0025760736,  0.003178556,   0.0040119761,  0.005218829,   0.0070847394,  0.0094644791,
+    0.013246854,   0.024722738,   0.00067817559, 0.0004643999,  0.00045215141, 0.0004428348,  0.00043994986,
+    0.00044109311, 0.00044537513, 0.00046952089, 0.00050621247, 0.0005484593,  0.00059722661, 0.00065390544,
+    0.00071903912, 0.000792315,   0.00087457988, 0.00093771284, 0.0010124183,  0.0011009406,  0.0012062084,
+    0.0013320963,  0.001483815,   0.0016822729,  0.0019794905,  0.0023588818,  0.0028530811,  0.0035136454,
+    0.0044268607,  0.0057485211,  0.0077908854,  0.0096009346,  0.013295894,   0.024567602,   0.00075744296,
+    0.00051986717, 0.00050725805, 0.00049778185, 0.00049402053, 0.000495616,   0.00050056411, 0.00051485293,
+    0.00055350625, 0.00059807982, 0.00064936228, 0.00070886861, 0.00077550631, 0.0008509137,  0.00093771284,
+    0.0010380344,  0.0011190202,  0.0012149868,  0.0013291101,  0.0014655889,  0.0016300683,  0.0018874101,
+    0.002215296,   0.0026344741,  0.0031801381,  0.0039090244,  0.0049160789,  0.0063727167,  0.0079447664,
+    0.0097800363,  0.013552951,   0.025058351,   0.0008497017,  0.00058435171, 0.00057125726, 0.00056154456,
+    0.00055679504, 0.00055890926, 0.00056463969, 0.00057416997, 0.00060924928, 0.00065663754, 0.00071082846,
+    0.00077067583, 0.00084022118, 0.00092024123, 0.0010124183,  0.0011190202,  0.0012429155,  0.0013475876,
+    0.0014720538,  0.0016208906,  0.0018417919,  0.0021324588,  0.0024969804,  0.0029616053,  0.0035680921,
+    0.004377713,   0.0054956237,  0.0070217471,  0.0081049222,  0.0099847578,  0.013846565,   0.025618585,
+    0.00095781003, 0.00065985468, 0.00064613792, 0.00063610583, 0.00063022313, 0.00063293753, 0.00063958851,
+    0.00065037975, 0.00067515491, 0.00072369125, 0.00077940908, 0.00084332074, 0.00091686472, 0.0010023764,
+    0.0011009406,  0.0012149868,  0.0013475876,  0.0015026716,  0.0016392763,  0.0018354144,  0.0020991911,
+    0.0024254359,  0.0028342868,  0.0033550526,  0.0040319678,  0.0049364707,  0.0061860783,  0.0071764197,
+    0.0082905078,  0.010221609,   0.014185824,   0.026265224,   0.0010854676,  0.00074896315, 0.00073446817,
+    0.00072402402, 0.00071764801, 0.00072023802, 0.00072704646, 0.0007375839,  0.00075265532, 0.00080087944,
+    0.00086079625, 0.0009295785,  0.0010085718,  0.0011000349,  0.0012062084,  0.0013291101,  0.0014720538,
+    0.0016392763,  0.0018667057,  0.0021107879,  0.0024092472,  0.0027781241,  0.0032400733,  0.003828061,
+    0.0045918357,  0.0056099505,  0.0066320617,  0.0073582768,  0.0085081877,  0.010498863,   0.014582291,
+    0.027019849,   0.0012357334,  0.00085367227, 0.00083778438, 0.00082609872, 0.00081863307, 0.00081925828,
+    0.00082664948, 0.00083874189, 0.00085583632, 0.00089300267, 0.00095798075, 0.0010326204,  0.0011183885,
+    0.0012170941,  0.0013320963,  0.0014655889,  0.0016208906,  0.0018354144,  0.0021107879,  0.002445403,
+    0.0027857737,  0.0032061476,  0.0037322138,  0.0044013448,  0.0052699335,  0.0063273781,  0.0068253218,
+    0.0075759734,  0.008766559,   0.010827218,   0.015050948,   0.027910447,   0.0014086879,  0.00097437395,
+    0.00095738034, 0.00094506511, 0.00093744061, 0.00093718304, 0.00094591256, 0.00095987355, 0.00097940431,
+    0.0010049412,  0.0010749884,  0.001156715,   0.0012506677,  0.0013588334,  0.001483815,   0.0016300683,
+    0.0018417919,  0.0020991911,  0.0024092472,  0.0027857737,  0.0032474191,  0.0037305967,  0.0043348297,
+    0.0051028528,  0.0060991398,  0.0065366258,  0.0070584421,  0.0078425398,  0.0090804324,  0.01122079,
+    0.015611588,   0.028974038,   0.0016184739,  0.0011207485,  0.0011023845,  0.0010892812,  0.001081446,
+    0.0010801372,  0.001090488,   0.0011067194,  0.0011292148,  0.0011584698,  0.001217348,   0.0013077231,
+    0.0014116542,  0.001531342,   0.0016822729,  0.0018874101,  0.0021324588,  0.0024254359,  0.0027781241,
+    0.0032061476,  0.0037305967,  0.0043805442,  0.005081166,   0.005971096,   0.0064163683,  0.0067926222,
+    0.0073427465,  0.0081667602,  0.0094650239,  0.011705467,   0.016292507,   0.030260772,   0.0018765966,
+    0.0013008264,  0.0012807549,  0.0012666637,  0.0012585566,  0.0012565156,  0.0012683113,  0.0012873393,
+    0.0013134903,  0.0013473318,  0.0013928403,  0.0014938937,  0.0016106677,  0.0017800233,  0.0019794905,
+    0.002215296,   0.0024969804,  0.0028342868,  0.0032400733,  0.0037322138,  0.0043348297,  0.005081166,
+    0.0060181972,  0.0064315144,  0.0067087831,  0.0071103009,  0.007694534,   0.00856693,    0.0099386694,
+    0.012302885,   0.017139552,   0.034165442,   0.0021997194,  0.0015262378,  0.0015040152,  0.0014886761,
+    0.0014802213,  0.0014787434,  0.0014908726,  0.0015134049,  0.0015441383,  0.0015837309,  0.0016348839,
+    0.0017586248,  0.0019274375,  0.0021256125,  0.0023588818,  0.0026344741,  0.0029616053,  0.0033550526,
+    0.003828061,   0.0044013448,  0.0051028528,  0.005971096,   0.0064315144,  0.006778033,   0.0070787491,
+    0.00751105,    0.0081371684,  0.0090693021,  0.010532098,   0.013050069,   0.01886756,    0.039911121,
+    0.002612893,   0.0018144646,  0.0017894866,  0.0017725482,  0.0017636477,  0.0017628924,  0.0017754439,
+    0.0018024599,  0.0018575913,  0.0019310683,  0.0020207826,  0.0021398899,  0.0023405962,  0.0025760736,
+    0.0028530811,  0.0031801381,  0.0035680921,  0.0040319678,  0.0045918357,  0.0052699335,  0.0060991398,
+    0.0064163683,  0.0067087831,  0.0070787491,  0.0075560398,  0.0080267517,  0.0087054996,  0.0097130574,
+    0.011291193,   0.01415265,    0.022383973,   0.047296364,   0.0031555507,  0.002193026,   0.0021669078,
+    0.0021565645,  0.0021591801,  0.002175004,   0.0022075414,  0.0022644065,  0.0023375514,  0.0024286029,
+    0.0025396293,  0.0026732364,  0.0028935277,  0.003178556,   0.0035136454,  0.0039090244,  0.004377713,
+    0.0049364707,  0.0056099505,  0.0063273781,  0.0065366258,  0.0067926222,  0.0071103009,  0.00751105,
+    0.0080267517,  0.0087072961,  0.0094540818,  0.01055955,    0.012287834,   0.017107019,   0.027026452,
+    0.057043046,   0.0040466604,  0.0028165665,  0.0027882589,  0.0027762495,  0.0027805718,  0.0028015445,
+    0.0028399569,  0.002912289,   0.00300512,    0.0031204908,  0.0032609967,  0.0034299041,  0.0036588546,
+    0.0040119761,  0.0044268607,  0.0049160789,  0.0054956237,  0.0061860783,  0.0066320617,  0.0068253218,
+    0.0070584421,  0.0073427465,  0.007694534,   0.0081371684,  0.0087054996,  0.0094540818,  0.010473616,
+    0.011710811,   0.015105025,   0.021145405,   0.033370156,   0.070356794,   0.0053533185,  0.0037284214,
+    0.0036929436,  0.0036786217,  0.0036855093,  0.0037140276,  0.0037649933,  0.0038554242,  0.0039767851,
+    0.0041273846,  0.0043105762,  0.0045305812,  0.004792687,   0.005218829,   0.0057485211,  0.0063727167,
+    0.0070217471,  0.0071764197,  0.0073582768,  0.0075759734,  0.0078425398,  0.0081667602,  0.00856693,
+    0.0090693021,  0.0097130574,  0.01055955,    0.011710811,   0.014720184,   0.019256866,   0.026928712,
+    0.042451967,   0.089411013,   0.0073829745,  0.0051450753,  0.0050986623,  0.0050809076,  0.0050918958,
+    0.0051322081,  0.0052029593,  0.005320638,   0.0054861424,  0.0056912252,  0.0059404108,  0.00623939,
+    0.0065952893,  0.0070847394,  0.0077908854,  0.0079447664,  0.0081049222,  0.0082905078,  0.0085081877,
+    0.008766559,   0.0090804324,  0.0094650239,  0.0099386694,  0.010532098,   0.011291193,   0.012287834,
+    0.015105025,   0.019256866,   0.025613198,   0.035780203,   0.056347881,   0.1185571,     0.010869758,
+    0.0075791497,  0.0075142612,  0.0074908487,  0.0075090546,  0.0075697373,  0.0076745213,  0.0078380974,
+    0.0080791777,  0.0083775017,  0.0087395925,  0.0091736475,  0.0095673967,  0.0094644791,  0.0096009346,
+    0.0097800363,  0.0099847578,  0.010221609,   0.010498863,   0.010827218,   0.01122079,    0.011705467,
+    0.012302885,   0.013050069,   0.01415265,    0.017107019,   0.021145405,   0.026928712,   0.035780203,
+    0.050789613,   0.079905123,   0.167955,      0.018018194,   0.012570018,   0.012467773,   0.012433167,
+    0.012466465,   0.012569092,   0.012743721,   0.01299985,    0.013395431,   0.01386874,    0.013688202,
+    0.013522077,   0.013373735,   0.013246854,   0.013295894,   0.013552951,   0.013846565,   0.014185824,
+    0.014582291,   0.015050948,   0.015611588,   0.016292507,   0.017139552,   0.01886756,    0.022383973,
+    0.027026452,   0.033370156,   0.042451967,   0.056347881,   0.079905123,   0.12767085,    0.2680957,
+    0.039855745,   0.027817938,   0.027602749,   0.02753487,    0.02761494,    0.02741445,    0.026927261,
+    0.026449621,   0.026088707,   0.025760984,   0.025454629,   0.025175273,   0.024929052,   0.024722738,
+    0.024567602,   0.025058351,   0.025618585,   0.026265224,   0.027019849,   0.027910447,   0.028974038,
+    0.030260772,   0.034165442,   0.039911121,   0.047296364,   0.057043046,   0.070356794,   0.089411013,
+    0.1185571,     0.167955,      0.2680957,     0.57145911,    0.00013418234, 0.0006049251,  0.00091066037,
+    0.0013742581,  0.0020789795,  0.0031529742,  0.0047940579,  0.0066822106,  0.0090455888,  0.01227949,
+    0.016718803,   0.022833707,   0.031287532,   0.04329041,    0.060539998,   0.085005395,   0.11988349,
+    0.1698903,     0.2420485,     0.35366896,    0.55906385,    0.89069045,    1.4320773,     2.3277552,
+    3.8341234,     6.4208732,     6.4711189,     6.5222421,     6.8532467,     7.6873393,     9.7086449,
+    16.36174,      0.0006049251,  0.00011932456, 0.00017314893, 0.00092670874, 0.0030400949,  0.0048829787,
+    0.0064810561,  0.008134827,   0.010263085,   0.013195192,   0.017206132,   0.02268807,    0.030195586,
+    0.04078763,    0.055855636,   0.077019662,   0.10691789,    0.14942388,    0.21028057,    0.30461702,
+    0.4767313,     0.75268865,    1.2003123,     1.9365072,     3.1679394,     5.2719989,     5.2261767,
+    5.2413945,     5.4821115,     6.1230659,     7.7022696,     12.932133,     0.00091066037, 0.00017314893,
+    0.00021325199, 0.00016197078, 0.00043656986, 0.0012493286,  0.0034393235,  0.007199015,   0.013543337,
+    0.021284834,   0.026407257,   0.033435442,   0.043025061,   0.056576762,   0.075616755,   0.10210453,
+    0.13917492,    0.19140878,    0.26556596,    0.38259834,    0.59180522,    0.92456293,    1.4603651,
+    2.3356354,     3.7905874,     6.1649399,     5.971221,      5.9585981,     6.2033305,     6.8987508,
+    8.6431923,     14.457601,     0.0013742581,  0.00092670874, 0.00016197078, 0.00038297498, 0.00030043678,
+    0.0005229098,  0.0010558417,  0.0020832235,  0.0045462833,  0.0089888899,  0.016560849,   0.028978385,
+    0.048805397,   0.080738395,   0.10568261,    0.13947651,    0.18635184,    0.25181603,    0.34395835,
+    0.49451202,    0.75495046,    1.1655655,     1.8213812,     2.8846927,     4.6400166,     7.0804925,
+    6.8208117,     6.7723327,     7.0178375,     7.771028,      9.6971111,     16.160028,     0.0020789795,
+    0.0030400949,  0.00043656986, 0.00030043678, 0.00069117465, 0.00060122908, 0.00065309223, 0.0012022241,
+    0.0020809236,  0.0035657326,  0.0071555926,  0.013477785,   0.024227513,   0.042423192,   0.072187662,
+    0.12014475,    0.1965843,     0.31753355,    0.45572233,    0.65627056,    0.98778462,    1.5055732,
+    2.3253837,     3.6438873,     5.8042407,     8.1243677,     7.784637,      7.6912303,     7.9336996,
+    8.7479982,     10.873236,     18.053457,     0.0031529742,  0.0048829787,  0.0012493286,  0.0005229098,
+    0.00060122908, 0.0011363724,  0.00099386612, 0.00094489945, 0.0014953471,  0.0025369313,  0.0041583185,
+    0.0066373236,  0.012458848,   0.022893829,   0.040666584,   0.070325673,   0.11907424,    0.19832209,
+    0.32618815,    0.57480901,    1.0168946,     1.7934734,     3.0344427,     4.701344,      7.4109964,
+    9.3071136,     8.871829,      8.7234831,     8.9586277,     9.8374596,     12.180492,     20.815918,
+    0.0047940579,  0.0064810561,  0.0034393235,  0.0010558417,  0.00065309223, 0.00099386612, 0.0017445473,
+    0.0016134479,  0.0015893861,  0.001972053,   0.0032800706,  0.0053152614,  0.0085345553,  0.01348386,
+    0.023616049,   0.042234696,   0.073732503,   0.12627064,    0.21663,       0.39399582,    0.71103722,
+    1.2770947,     2.2896698,     4.1105385,     7.4153934,     9.3500586,     10.035181,     9.8765917,
+    10.099777,     11.046735,     13.627371,     24.790821,     0.0066822106,  0.008134827,   0.007199015,
+    0.0020832235,  0.0012022241,  0.00094489945, 0.0016134479,  0.0026923497,  0.0025965059,  0.0026324505,
+    0.0027862408,  0.0044582803,  0.0072217113,  0.011514382,   0.018128315,   0.028261589,   0.046918221,
+    0.082333378,   0.14901558,    0.27633411,    0.50772202,    0.92716414,    1.6879661,     3.0736275,
+    5.6181879,     6.2314086,     6.7986908,     7.5747204,     8.7350492,     10.684034,     14.627648,
+    29.82436,      0.0090455888,  0.010263085,   0.013543337,   0.0045462833,  0.0020809236,  0.0014953471,
+    0.0015893861,  0.0025965059,  0.0041777501,  0.0041634608,  0.0043391753,  0.0047207596,  0.0063369367,
+    0.010153828,   0.016076043,   0.025209887,   0.039242301,   0.060757544,   0.10547609,    0.19888687,
+    0.37119669,    0.68787342,    1.2696241,     2.341697,      3.7752187,     4.1388817,     4.5891786,
+    5.1908016,     6.0711932,     7.5249195,     10.431505,     24.992622,     0.01227949,    0.013195192,
+    0.021284834,   0.0089888899,  0.0035657326,  0.0025369313,  0.001972053,   0.0026324505,  0.0041634608,
+    0.0065367497,  0.0067546363,  0.0072296225,  0.0079858499,  0.0092515061,  0.014682275,   0.023096533,
+    0.036081705,   0.058406167,   0.098097458,   0.16462554,    0.27846655,    0.52257407,    0.97611964,
+    1.8208135,     2.4625993,     2.7453349,     3.092191,      3.5495558,     4.2095933,     5.28616,
+    7.4360328,     19.87001,      0.016718803,   0.017206132,   0.026407257,   0.016560849,   0.0071555926,
+    0.0041583185,  0.0032800706,  0.0027862408,  0.0043391753,  0.0067546363,  0.010453798,   0.011045729,
+    0.012032345,   0.013475897,   0.015478805,   0.021740414,   0.034256596,   0.057665356,   0.096885458,
+    0.16270079,    0.27350101,    0.46097651,    0.78047782,    1.4245517,     1.6085129,     1.8220749,
+    2.0835114,     2.4260564,     2.9162443,     3.7090354,     5.9184146,     15.942664,     0.022833707,
+    0.02268807,    0.033435442,   0.028978385,   0.013477785,   0.0066373236,  0.0053152614,  0.0044582803,
+    0.0047207596,  0.0072296225,  0.011045729,   0.016822061,   0.018118076,   0.020047,      0.022740088,
+    0.026395155,   0.034802306,   0.058469579,   0.098111406,   0.16463387,    0.27664605,    0.46623507,
+    0.78947681,    1.0510172,     1.0914104,     1.2122254,     1.4062309,     1.6599592,     2.0214381,
+    2.6028881,     4.7606173,     12.92082,      0.031287532,   0.030195586,   0.043025061,   0.048805397,
+    0.024227513,   0.012458848,   0.0085345553,  0.0072217113,  0.0063369367,  0.0079858499,  0.012032345,
+    0.018118076,   0.027247805,   0.029844834,   0.033492252,   0.039949544,   0.050345849,   0.064840168,
+    0.10185393,    0.17055818,    0.28615069,    0.48168921,    0.81125957,    0.84477854,    0.88429779,
+    0.93431234,    1.0019039,     1.1386966,     1.4039634,     1.8783089,     3.873533,      10.586617,
+    0.04329041,    0.04078763,    0.056576762,   0.080738395,   0.042423192,   0.022893829,   0.01348386,
+    0.011514382,   0.010153828,   0.0092515061,  0.013475897,   0.020047,      0.029844834,   0.044444092,
+    0.050864838,   0.061888985,   0.077053823,   0.098043829,   0.1273638,     0.18087117,    0.30264586,
+    0.50835156,    0.64871246,    0.68077815,    0.71799982,    0.7641288,     0.82514644,    0.9114911,
+    1.0436512,     1.5371119,     3.1908491,     8.7763805,     0.060539998,   0.055855636,   0.075616755,
+    0.10568261,    0.072187662,   0.040666584,   0.023616049,   0.018128315,   0.016076043,   0.014682275,
+    0.015478805,   0.022740088,   0.033492252,   0.050864838,   0.080078036,   0.096400164,   0.11871461,
+    0.14939259,    0.19194049,    0.25160855,    0.33637759,    0.49364296,    0.52088779,    0.55056387,
+    0.58474308,    0.62656069,    0.68107182,    0.75715458,    0.87228853,    1.4918332,     2.8751035,
+    7.3745074,     0.085005395,   0.077019662,   0.10210453,    0.13947651,    0.12014475,    0.070325673,
+    0.042234696,   0.028261589,   0.025209887,   0.023096533,   0.021740414,   0.026395155,   0.039949544,
+    0.061888985,   0.096400164,   0.15090649,    0.18401642,    0.22924937,    0.29156548,    0.37835249,
+    0.50078136,    0.44352117,    0.4203603,     0.44723365,    0.47808158,    0.51553309,    0.5638693,
+    0.63065255,    0.8661899,     1.4882317,     2.8708451,     7.3706393,     0.11988349,    0.10691789,
+    0.13917492,    0.18635184,    0.1965843,     0.11907424,    0.073732503,   0.046918221,   0.039242301,
+    0.036081705,   0.034256596,   0.034802306,   0.050345849,   0.077053823,   0.11871461,    0.18401642,
+    0.28689298,    0.35415682,    0.44624463,    0.57366085,    0.5500719,     0.47384694,    0.41330901,
+    0.3655999,     0.39274001,    0.42600521,    0.46865317,    0.54125428,    0.87413955,    1.5026597,
+    2.9003561,     7.4510942,     0.1698903,     0.14942388,    0.19140878,    0.25181603,    0.31753355,
+    0.19832209,    0.12627064,    0.082333378,   0.060757544,   0.058406167,   0.057665356,   0.058469579,
+    0.064840168,   0.098043829,   0.14939259,    0.22924937,    0.35415682,    0.55071211,    0.68799752,
+    0.68459702,    0.58427244,    0.50413674,    0.44030654,    0.38988301,    0.3507928,     0.35384047,
+    0.39136419,    0.55328703,    0.89359522,    1.5363207,     2.9660227,     7.6221566,     0.2420485,
+    0.21028057,    0.26556596,    0.34395835,    0.45572233,    0.32618815,    0.21663,       0.14901558,
+    0.10547609,    0.098097458,   0.096885458,   0.098111406,   0.10185393,    0.1273638,     0.19194049,
+    0.29156548,    0.44624463,    0.68799752,    0.85277349,    0.72264946,    0.61826247,    0.53456134,
+    0.4676773,     0.41470417,    0.37355721,    0.34292278,    0.36920604,    0.57340759,    0.92576504,
+    1.5912914,     3.0718582,     7.8941569,     0.35366896,    0.30461702,    0.38259834,    0.49451202,
+    0.65627056,    0.57480901,    0.39399582,    0.27633411,    0.19888687,    0.16462554,    0.16270079,
+    0.16463387,    0.17055818,    0.18087117,    0.25160855,    0.37835249,    0.57366085,    0.68459702,
+    0.72264946,    0.76046145,    0.65243596,    0.5654645,     0.49572998,    0.44034767,    0.39724395,
+    0.37715927,    0.48040938,    0.63373578,    0.97253412,    1.6707735,     3.2239802,     8.2826815,
+    0.55906385,    0.4767313,     0.59180522,    0.75495046,    0.98778462,    1.0168946,     0.71103722,
+    0.50772202,    0.37119669,    0.27846655,    0.27350101,    0.27664605,    0.28615069,    0.30264586,
+    0.33637759,    0.50078136,    0.5500719,     0.58427244,    0.61826247,    0.65243596,    0.68743873,
+    0.59740764,    0.52496356,    0.46726638,    0.42227256,    0.50650573,    0.6423589,     0.84369779,
+    1.1618383,     1.7794881,     3.4313586,     8.8104601,     0.89069045,    0.75268865,    0.92456293,
+    1.1655655,     1.5055732,     1.7934734,     1.2770947,     0.92716414,    0.68787342,    0.52257407,
+    0.46097651,    0.46623507,    0.48168921,    0.50835156,    0.49364296,    0.44352117,    0.47384694,
+    0.50413674,    0.53456134,    0.5654645,     0.59740764,    0.63123727,    0.55613047,    0.49614108,
+    0.5583728,     0.68588966,    0.86631417,    1.1332039,     1.5541564,     2.2876582,     3.8105242,
+    9.5103588,     1.4320773,     1.2003123,     1.4603651,     1.8213812,     2.3253837,     3.0344427,
+    2.2896698,     1.6879661,     1.2696241,     0.97611964,    0.78047782,    0.78947681,    0.81125957,
+    0.64871246,    0.52088779,    0.4203603,     0.41330901,    0.44030654,    0.4676773,     0.49572998,
+    0.52496356,    0.55613047,    0.59033686,    0.64081579,    0.76616907,    0.93760991,    1.1797429,
+    1.5372728,     2.1002181,     3.0795774,     5.1100478,     12.872087,     2.3277552,     1.9365072,
+    2.3356354,     2.8846927,     3.6438873,     4.701344,      4.1105385,     3.0736275,     2.341697,
+    1.8208135,     1.4245517,     1.0510172,     0.84477854,    0.68077815,    0.55056387,    0.44723365,
+    0.3655999,     0.38988301,    0.41470417,    0.44034767,    0.46726638,    0.49614108,    0.64081579,
+    0.89180779,    1.0626506,     1.2959058,     1.6247751,     2.1095555,     2.8716202,     4.1953917,
+    7.3946252,     20.583164,     3.8341234,     3.1679394,     3.7905874,     4.6400166,     5.8042407,
+    7.4109964,     7.4153934,     5.6181879,     3.7752187,     2.4625993,     1.6085129,     1.0914104,
+    0.88429779,    0.71799982,    0.58474308,    0.47808158,    0.39274001,    0.3507928,     0.37355721,
+    0.39724395,    0.42227256,    0.5583728,     0.76616907,    1.0626506,     1.493126,      1.8149942,
+    2.2680588,     2.9348414,     3.9813967,     5.9057994,     12.139041,     33.552834,     6.4208732,
+    5.2719989,     6.1649399,     7.0804925,     8.1243677,     9.3071136,     9.3500586,     6.2314086,
+    4.1388817,     2.7453349,     1.8220749,     1.2122254,     0.93431234,    0.7641288,     0.62656069,
+    0.51553309,    0.42600521,    0.35384047,    0.34292278,    0.37715927,    0.50650573,    0.68588966,
+    0.93760991,    1.2959058,     1.8149942,     2.5841644,     3.2192664,     4.152535,      5.615222,
+    9.9793873,     20.37047,      55.921146,     6.4711189,     5.2261767,     5.971221,      6.8208117,
+    7.784637,      8.871829,      10.035181,     6.7986908,     4.5891786,     3.092191,      2.0835114,
+    1.4062309,     1.0019039,     0.82514644,    0.68107182,    0.5638693,     0.46865317,    0.39136419,
+    0.36920604,    0.48040938,    0.6423589,     0.86631417,    1.1797429,     1.6247751,     2.2680588,
+    3.2192664,     4.6684008,     6.0039721,     9.6931858,     17.31189,      35.101326,     95.721848,
+    6.5222421,     5.2413945,     5.9585981,     6.7723327,     7.6912303,     8.7234831,     9.8765917,
+    7.5747204,     5.1908016,     3.5495558,     2.4260564,     1.6599592,     1.1386966,     0.9114911,
+    0.75715458,    0.63065255,    0.54125428,    0.55328703,    0.57340759,    0.63373578,    0.84369779,
+    1.1332039,     1.5372728,     2.1095555,     2.9348414,     4.152535,      6.0039721,     10.622843,
+    17.502508,     31.055145,     62.558273,     169.49916,     6.8532467,     5.4821115,     6.2033305,
+    7.0178375,     7.9336996,     8.9586277,     10.099777,     8.7350492,     6.0711932,     4.2095933,
+    2.9162443,     2.0214381,     1.4039634,     1.0436512,     0.87228853,    0.8661899,     0.87413955,
+    0.89359522,    0.92576504,    0.97253412,    1.1618383,     1.5541564,     2.1002181,     2.8716202,
+    3.9813967,     5.615222,      9.6931858,     17.502508,     33.078506,     58.32177,      116.74635,
+    314.34311,     7.6873393,     6.1230659,     6.8987508,     7.771028,      8.7479982,     9.8374596,
+    11.046735,     10.684034,     7.5249195,     5.28616,       3.7090354,     2.6028881,     1.8783089,
+    1.5371119,     1.4918332,     1.4882317,     1.5026597,     1.5363207,     1.5912914,     1.6707735,
+    1.7794881,     2.2876582,     3.0795774,     4.1953917,     5.9057994,     9.9793873,     17.31189,
+    31.055145,     58.32177,      117.38472,     233.54657,     625.01978,     9.7086449,     7.7022696,
+    8.6431923,     9.6971111,     10.873236,     12.180492,     13.627371,     14.627648,     10.431505,
+    7.4360328,     5.9184146,     4.7606173,     3.873533,      3.1908491,     2.8751035,     2.8708451,
+    2.9003561,     2.9660227,     3.0718582,     3.2239802,     3.4313586,     3.8105242,     5.1100478,
+    7.3946252,     12.139041,     20.37047,      35.101326,     62.558273,     116.74635,     233.54657,
+    528.05969,     1404.894,      16.36174,      12.932133,     14.457601,     16.160028,     18.053457,
+    20.815918,     24.790821,     29.82436,      24.992622,     19.87001,      15.942664,     12.92082,
+    10.586617,     8.7763805,     7.3745074,     7.3706393,     7.4510942,     7.6221566,     7.8941569,
+    8.2826815,     8.8104601,     9.5103588,     12.872087,     20.583164,     33.552834,     55.921146,
+    95.721848,     169.49916,     314.34311,     625.01978,     1404.894,      4229.9087};
+
+const float dequant1_matrix4x4[3][64] = {
+    0.00076352688, 0.00036029922, 0.0011553109, 0.0011553109, 0.0025205531, 0.0025205531, 0.0071363659, 0.0071363659,
+    0.00036029922, 0.00057008932, 0.0011553109, 0.0011553109, 0.0025205531, 0.0025205531, 0.0071363659, 0.0071363659,
+    0.0011553109,  0.0011553109,  0.0010383829, 0.0010383829, 0.0047177076, 0.0047177076, 0.011022241,  0.011022241,
+    0.0011553109,  0.0011553109,  0.0010383829, 0.0010383829, 0.0047177076, 0.0047177076, 0.011022241,  0.011022241,
+    0.0025205531,  0.0025205531,  0.0047177076, 0.0047177076, 0.0033692066, 0.0033692066, 0.013451101,  0.013451101,
+    0.0025205531,  0.0025205531,  0.0047177076, 0.0047177076, 0.0033692066, 0.0033692066, 0.013451101,  0.013451101,
+    0.0071363659,  0.0071363659,  0.011022241,  0.011022241,  0.013451101,  0.013451101,  0.017449437,  0.017449437,
+    0.0071363659,  0.0071363659,  0.011022241,  0.011022241,  0.013451101,  0.013451101,  0.017449437,  0.017449437,
+    0.0044780076,  0.0012398829,  0.0031354439, 0.0031354439, 0.0066977735, 0.0066977735, 0.021900313,  0.021900313,
+    0.0012398829,  0.0014682054,  0.0031354439, 0.0031354439, 0.0066977735, 0.0066977735, 0.021900313,  0.021900313,
+    0.0031354439,  0.0031354439,  0.0036335052, 0.0036335052, 0.0068652928, 0.0068652928, 0.01835954,   0.01835954,
+    0.0031354439,  0.0031354439,  0.0036335052, 0.0036335052, 0.0068652928, 0.0068652928, 0.01835954,   0.01835954,
+    0.0066977735,  0.0066977735,  0.0068652928, 0.0068652928, 0.016142365,  0.016142365,  0.027852703,  0.027852703,
+    0.0066977735,  0.0066977735,  0.0068652928, 0.0068652928, 0.016142365,  0.016142365,  0.027852703,  0.027852703,
+    0.021900313,   0.021900313,   0.01835954,   0.01835954,   0.027852703,  0.027852703,  0.060083967,  0.060083967,
+    0.021900313,   0.021900313,   0.01835954,   0.01835954,   0.027852703,  0.027852703,  0.060083967,  0.060083967,
+    0.0076058088,  0.0071930392,  0.0159935,    0.0159935,    0.035495624,  0.035495624,  0.059421029,  0.059421029,
+    0.0071930392,  0.012544631,   0.0159935,    0.0159935,    0.035495624,  0.035495624,  0.059421029,  0.059421029,
+    0.0159935,     0.0159935,     0.080566496,  0.080566496,  0.065065965,  0.065065965,  0.13197067,   0.13197067,
+    0.0159935,     0.0159935,     0.080566496,  0.080566496,  0.065065965,  0.065065965,  0.13197067,   0.13197067,
+    0.035495624,   0.035495624,   0.065065965,  0.065065965,  0.06381201,   0.06381201,   0.5660603,    0.5660603,
+    0.035495624,   0.035495624,   0.065065965,  0.065065965,  0.06381201,   0.06381201,   0.5660603,    0.5660603,
+    0.059421029,   0.059421029,   0.13197067,   0.13197067,   0.5660603,    0.5660603,    3.9365194,    3.9365194,
+    0.059421029,   0.059421029,   0.13197067,   0.13197067,   0.5660603,    0.5660603,    3.9365194,    3.9365194};
+
+const float dequant1_matrix8x8[3][64] = {
+    0.0002909539,  0.00054209016, 0.00067740923, 0.00074249663, 0.00077226211, 0.00082013884, 0.00074831746,
+    0.00036888735, 0.00054209016, 0.00083686807, 0.00095243513, 0.0010254194,  0.0010785819,  0.001144354,
+    0.0010148444,  0.00050194381, 0.00067740923, 0.00095243513, 0.0010309708,  0.0010634598,  0.0011255582,
+    0.0011620984,  0.00093262421, 0.00046117156, 0.00074249663, 0.0010254194,  0.0010634598,  0.0011365352,
+    0.0011997225,  0.001200883,   0.00083138247, 0.0004105276,  0.00077226211, 0.0010785819,  0.0011255582,
+    0.0011997225,  0.0012587409,  0.0010503692,  0.00069899118, 0.00043283266, 0.00082013884, 0.001144354,
+    0.0011620984,  0.001200883,   0.0010503692,  0.00081462093, 0.00054313598, 0.00079451699, 0.00074831746,
+    0.0010148444,  0.00093262421, 0.00083138247, 0.00069899118, 0.00054313598, 0.00090717152, 0.0014429828,
+    0.00036888735, 0.00050194381, 0.00046117156, 0.0004105276,  0.00043283266, 0.00079451699, 0.0014429828,
+    0.0021368233,  0.0018947646,  0.0020298155,  0.0018158669,  0.0023109741,  0.002771968,   0.0028875093,
+    0.0026061691,  0.0014496285,  0.0020298155,  0.0033498062,  0.0029625066,  0.0040782108,  0.0047020554,
+    0.0046986658,  0.004059691,   0.0022195177,  0.0018158669,  0.0029625066,  0.003855702,   0.0039039075,
+    0.0044844504,  0.0048484555,  0.004212948,   0.002350494,   0.0023109741,  0.0040782108,  0.0039039075,
+    0.005005613,   0.0047252625,  0.0043872506,  0.0035329545,  0.0020367121,  0.002771968,   0.0047020554,
+    0.0044844504,  0.0047252625,  0.0053944732,  0.0043888669,  0.0030360108,  0.0016807605,  0.0028875093,
+    0.0046986658,  0.0048484555,  0.0043872506,  0.0043888669,  0.0039804466,  0.0027267861,  0.0015264176,
+    0.0026061691,  0.004059691,   0.004212948,   0.0035329545,  0.0030360108,  0.0027267861,  0.0022856754,
+    0.0012781189,  0.0014496285,  0.0022195177,  0.002350494,   0.0020367121,  0.0016807605,  0.0015264176,
+    0.0012781189,  0.00079561322, 0.0036905503,  0.0059465133,  0.014704376,   0.0070739356,  0.0033689756,
+    0.0045039994,  0.0049362411,  0.0014067882,  0.0059465133,  0.02911225,    0.061591174,   0.029919557,
+    0.013408141,   0.01439747,    0.012449885,   0.0032443525,  0.014704376,   0.061591174,   0.026697377,
+    0.021077724,   0.019806897,   0.024742229,   0.018026156,   0.0048673735,  0.0070739356,  0.029919557,
+    0.021077724,   0.012372882,   0.020340675,   0.029437196,   0.013461344,   0.0039864881,  0.0033689756,
+    0.013408141,   0.019806897,   0.020340675,   0.01897642,    0.016170548,   0.0081333239,  0.003652758,
+    0.0045039994,  0.01439747,    0.024742229,   0.029437196,   0.016170548,   0.0070513831,  0.0037240474,
+    0.0058688065,  0.0049362411,  0.012449885,   0.018026156,   0.013461344,   0.0081333239,  0.0037240474,
+    0.0049084341,  0.0089747189,  0.0014067882,  0.0032443525,  0.0048673735,  0.0039864881,  0.003652758,
+    0.0058688065,  0.0089747189,  0.011420494};
+
+const float dequant1_matrix16x16[3][256] = {
+    8.6941407e-05, 9.6448246e-05, 0.00013085619, 0.00017933025, 0.00024836665, 0.00027373253, 0.0002828775,
+    0.00029644763, 0.0003157453,  0.00036460641, 0.00045703814, 0.00059130753, 0.0007995927,  0.0017282541,
+    0.012332106,   0.1177097,     9.6448246e-05, 6.6194349e-05, 8.9682457e-05, 0.00011046594, 0.00015460247,
+    0.00016983721, 0.00017969888, 0.00019148602, 0.00020649449, 0.00024233664, 0.00030588722, 0.00039798973,
+    0.00054070033, 0.0012416398,  0.0088538285,  0.084458858,   0.00013085619, 8.9682457e-05, 0.00010278081,
+    0.00014380792, 0.00017201646, 0.00016728821, 0.00016724656, 0.00017489056, 0.00019119326, 0.00023126998,
+    0.00029385358, 0.00038439123, 0.00052454998, 0.0014314948,  0.010118864,   0.095793746,   0.00017933025,
+    0.00011046594, 0.00014380792, 0.00016282526, 0.00017587373, 0.00018702789, 0.00018481401, 0.00018729926,
+    0.00019578976, 0.00023316547, 0.00028713772, 0.00037734731, 0.00051693874, 0.0018694391,  0.012999591,
+    0.12131298,    0.00024836665, 0.00015460247, 0.00017201646, 0.00017587373, 0.00016836988, 0.00018151697,
+    0.0001956971,  0.00020576089, 0.00022431443, 0.00026457073, 0.00032181898, 0.00040609311, 0.00053771475,
+    0.0027711354,  0.018724207,   0.171187,      0.00027373253, 0.00016983721, 0.00016728821, 0.00018702789,
+    0.00018151697, 0.00017808966, 0.00019275362, 0.00021269334, 0.00025798281, 0.00030524193, 0.00036844157,
+    0.00046176789, 0.00090772583, 0.0050451984,  0.032186501,   0.27849218,    0.0002828775,  0.00017969888,
+    0.00016724656, 0.00018481401, 0.0001956971,  0.00019275362, 0.00019311177, 0.00023000367, 0.00027816661,
+    0.00034266408, 0.00042883624, 0.00053407787, 0.0019287922,  0.010249138,   0.0628227,     0.52449799,
+    0.00029644763, 0.00019148602, 0.00017489056, 0.00018729926, 0.00020576089, 0.00021269334, 0.00023000367,
+    0.0002549284,  0.00030712178, 0.00037721035, 0.00047448673, 0.00097330485, 0.0045551551,  0.023077184,
+    0.13550578,    1.0883172,     0.0003157453,  0.00020649449, 0.00019119326, 0.00019578976, 0.00022431443,
+    0.00025798281, 0.00027816661, 0.00030712178, 0.00034762156, 0.00042543863, 0.0005989233,  0.0025047492,
+    0.011368512,   0.056990173,   0.3214075,     2.4235594,     0.00036460641, 0.00024233664, 0.00023126998,
+    0.00023316547, 0.00026457073, 0.00030524193, 0.00034266408, 0.00037721035, 0.00042543863, 0.00049277348,
+    0.0017959751,  0.0071011903,  0.030609839,   0.14635129,    0.81423062,    4.1576195,     0.00045703814,
+    0.00030588722, 0.00029385358, 0.00028713772, 0.00032181898, 0.00036844157, 0.00042883624, 0.00047448673,
+    0.0005989233,  0.0017959751,  0.0058984202,  0.022078902,   0.090429001,   0.41230315,    1.8386297,
+    7.2313967,     0.00059130753, 0.00039798973, 0.00038439123, 0.00037734731, 0.00040609311, 0.00046176789,
+    0.00053407787, 0.00097330485, 0.0025047492,  0.0071011903,  0.022078902,   0.075213447,   0.29305896,
+    1.2194115,     3.4551265,     13.306986,     0.0007995927,  0.00054070033, 0.00052454998, 0.00051693874,
+    0.00053771475, 0.00090772583, 0.0019287922,  0.0045551551,  0.011368512,   0.030609839,   0.090429001,
+    0.29305896,    1.0477177,     2.5220733,     6.9886661,     26.357689,     0.0017282541,  0.0012416398,
+    0.0014314948,  0.0018694391,  0.0027711354,  0.0050451984,  0.010249138,   0.023077184,   0.056990173,
+    0.14635129,    0.41230315,    1.2194115,     2.5220733,     5.7303133,     15.535759,     57.391628,
+    0.012332106,   0.0088538285,  0.010118864,   0.012999591,   0.018724207,   0.032186501,   0.0628227,
+    0.13550578,    0.3214075,     0.81423062,    1.8386297,     3.4551265,     6.9886661,     15.535759,
+    39.925846,     144.52841,     0.1177097,     0.084458858,   0.095793746,   0.12131298,    0.171187,
+    0.27849218,    0.52449799,    1.0883172,     2.4235594,     4.1576195,     7.2313967,     13.306986,
+    26.357689,     57.391628,     144.52841,     497.76062,     0.00039600098, 0.00034607144, 0.00040833891,
+    0.00048667059, 0.0005861785,  0.00074920553, 0.00098458503, 0.0013121488,  0.0017772652,  0.0022973588,
+    0.0028583913,  0.0036706869,  0.0049268291,  0.0072197518,  0.012596095,   0.029396452,   0.00034607144,
+    0.00024747173, 0.00027387534, 0.0003365741,  0.00040938996, 0.0005285684,  0.00069563533, 0.00092797622,
+    0.0012577978,  0.0016151961,  0.0020118796,  0.0025859487,  0.0034734753,  0.0051075728,  0.0089140097,
+    0.020809252,   0.00040833891, 0.00027387534, 0.00031539286, 0.00034493519, 0.00041945101, 0.00054777943,
+    0.0007227718,  0.00096404023, 0.001303279,   0.0016352652,  0.0020375939,  0.0026197163,  0.0035195949,
+    0.0052199606,  0.0091060577,  0.021249145,   0.00048667059, 0.0003365741,  0.00034493519, 0.00041010656,
+    0.00048113358, 0.00058858463, 0.00077134289, 0.0010256943,  0.0013762834,  0.0016811473,  0.0021005531,
+    0.0026996068,  0.0036257072,  0.0054506757,  0.009496971,   0.022137705,   0.0005861785,  0.00040938996,
+    0.00041945101, 0.00048113358, 0.00059573731, 0.00071104773, 0.00087649847, 0.0011293787,  0.0014301358,
+    0.0017464008,  0.002182483,   0.0028128312,  0.0037867145,  0.0058138543,  0.010114919,   0.02353812,
+    0.00074920553, 0.0005285684,  0.00054777943, 0.00058858463, 0.00071104773, 0.0008852075,  0.0010738227,
+    0.0013219716,  0.001527268,   0.0018505802,  0.0023102255,  0.0029750105,  0.0040830439,  0.0062906044,
+    0.01094307,    0.025472319,   0.00098458503, 0.00069563533, 0.0007227718,  0.00077134289, 0.00087649847,
+    0.0010738227,  0.0013484384,  0.0015134697,  0.0017388589,  0.0020487665,  0.0024944625,  0.0032075676,
+    0.0045332746,  0.0069634146,  0.012081781,   0.028057842,   0.0013121488,  0.00092797622, 0.00096404023,
+    0.0010256943,  0.0011293787,  0.0013219716,  0.0015134697,  0.0017614318,  0.0020141515,  0.0023621877,
+    0.0028521135,  0.0036510243,  0.0051573389,  0.0078939945,  0.013653559,   0.031619865,   0.0017772652,
+    0.0012577978,  0.001303279,   0.0013762834,  0.0014301358,  0.001527268,   0.0017388589,  0.0020141515,
+    0.002376725,   0.0027760784,  0.0033578034,  0.0044371258,  0.0061621098,  0.0092011821,  0.015820401,
+    0.03678393,    0.0022973588,  0.0016151961,  0.0016352652,  0.0016811473,  0.0017464008,  0.0018505802,
+    0.0020487665,  0.0023621877,  0.0027760784,  0.0033338354,  0.0042134481,  0.0055337516,  0.0076407115,
+    0.011347383,   0.019103128,   0.049214453,   0.0028583913,  0.0020118796,  0.0020375939,  0.0021005531,
+    0.002182483,   0.0023102255,  0.0024944625,  0.0028521135,  0.0033578034,  0.0042134481,  0.0054442864,
+    0.0071092523,  0.0097620692,  0.014422105,   0.02557276,    0.070264891,   0.0036706869,  0.0025859487,
+    0.0026197163,  0.0026996068,  0.0028128312,  0.0029750105,  0.0032075676,  0.0036510243,  0.0044371258,
+    0.0055337516,  0.0071092523,  0.0094711715,  0.012937739,   0.019291667,   0.038823757,   0.10550891,
+    0.0049268291,  0.0034734753,  0.0035195949,  0.0036257072,  0.0037867145,  0.0040830439,  0.0045332746,
+    0.0051573389,  0.0061621098,  0.0076407115,  0.0097620692,  0.012937739,   0.017999291,   0.031373069,
+    0.062424377,   0.167826,      0.0072197518,  0.0051075728,  0.0052199606,  0.0054506757,  0.0058138543,
+    0.0062906044,  0.0069634146,  0.0078939945,  0.0092011821,  0.011347383,   0.014422105,   0.019291667,
+    0.031373069,   0.055231433,   0.10869808,    0.2891748,     0.012596095,   0.0089140097,  0.0091060577,
+    0.009496971,   0.010114919,   0.01094307,    0.012081781,   0.013653559,   0.015820401,   0.019103128,
+    0.02557276,    0.038823757,   0.062424377,   0.10869808,    0.21590571,    0.56856865,    0.029396452,
+    0.020809252,   0.021249145,   0.022137705,   0.02353812,    0.025472319,   0.028057842,   0.031619865,
+    0.03678393,    0.049214453,   0.070264891,   0.10550891,    0.167826,      0.2891748,     0.56856865,
+    1.5101935,     0.00069132249, 0.0010815193,  0.0017454268,  0.0028453015,  0.0046874369,  0.0047010141,
+    0.0040570637,  0.0035506648,  0.0031582527,  0.0039758035,  0.006985045,   0.012666198,   0.024005895,
+    0.057713043,   0.2467743,     1.4114686,     0.0010815193,  0.00067276059, 0.0012984627,  0.0022066862,
+    0.0036029078,  0.0033287695,  0.0028739227,  0.0025160075,  0.0022385314,  0.0029198492,  0.0051128156,
+    0.0092460476,  0.017484134,   0.042972263,   0.18313001,    1.0444078,     0.0017454268,  0.0012984627,
+    0.0013351721,  0.0023470016,  0.0033850435,  0.0030727212,  0.0027693673,  0.0024766023,  0.0022112229,
+    0.0032111497,  0.0055785025,  0.010022596,   0.018849386,   0.04953425,    0.20919789,    1.1837591,
+    0.0028453015,  0.0022066862,  0.0023470016,  0.0027035389,  0.0027250734,  0.0026314682,  0.0024308208,
+    0.0022387633,  0.0021502564,  0.0036681695,  0.0064038844,  0.011393034,   0.021249678,   0.062385138,
+    0.25973764,    1.451555,      0.0046874369,  0.0036029078,  0.0033850435,  0.0027250734,  0.0021667613,
+    0.0021522474,  0.0020895076,  0.0019863129,  0.0025009788,  0.0042195194,  0.007313136,   0.013109547,
+    0.024602434,   0.085390151,   0.34971833,    1.9222946,     0.0047010141,  0.0033287695,  0.0030727212,
+    0.0026314682,  0.0021522474,  0.0017763178,  0.0017569333,  0.0018497383,  0.0030575816,  0.0051033041,
+    0.0087390682,  0.015506574,   0.03419143,    0.12187894,    0.49420494,    2.6978834,     0.0040570637,
+    0.0028739227,  0.0027693673,  0.0024308208,  0.0020895076,  0.0017569333,  0.0014928841,  0.0023629696,
+    0.0038356045,  0.0063890605,  0.010930002,   0.01915928,    0.054220565,   0.18829457,    0.74606138,
+    3.9897807,     0.0035506648,  0.0025160075,  0.0024766023,  0.0022387633,  0.0019863129,  0.0018497383,
+    0.0023629696,  0.0031767336,  0.0050553046,  0.0082801571,  0.013994551,   0.029805269,   0.093009129,
+    0.31386197,    1.2120669,     6.3341594,     0.0031582527,  0.0022385314,  0.0022112229,  0.0021502564,
+    0.0025009788,  0.0030575816,  0.0038356045,  0.0050553046,  0.0069825784,  0.011235781,   0.019635202,
+    0.055844314,   0.16970058,    0.56211448,    2.1147799,     10.700346,     0.0039758035,  0.0029198492,
+    0.0032111497,  0.0036681695,  0.0042195194,  0.0051033041,  0.0063890605,  0.0082801571,  0.011235781,
+    0.015955226,   0.04111724,    0.112705,      0.33115557,    1.0637885,     3.9274936,     17.23595,
+    0.006985045,   0.0051128156,  0.0055785025,  0.0064038844,  0.007313136,   0.0087390682,  0.010930002,
+    0.013994551,   0.019635202,   0.04111724,    0.092569597,   0.24464482,    0.69495994,    2.1639838,
+    7.3197994,     29.072956,     0.012666198,   0.0092460476,  0.010022596,   0.011393034,   0.013109547,
+    0.015506574,   0.01915928,    0.029805269,   0.055844314,   0.112705,      0.24464482,    0.57213938,
+    1.5720237,     4.6755047,     13.410465,     52.19664,      0.024005895,   0.017484134,   0.018849386,
+    0.021249678,   0.024602434,   0.03419143,    0.054220565,   0.093009129,   0.16970058,    0.33115557,
+    0.69495994,    1.5720237,     3.8629961,     9.4337931,     26.474764,     100.96515,     0.057713043,
+    0.042972263,   0.04953425,    0.062385138,   0.085390151,   0.12187894,    0.18829457,    0.31386197,
+    0.56211448,    1.0637885,     2.1639838,     4.6755047,     9.4337931,     20.937027,     57.505886,
+    214.89687,     0.2467743,     0.18313001,    0.20919789,    0.25973764,    0.34971833,    0.49420494,
+    0.74606138,    1.2120669,     2.1147799,     3.9274936,     7.3197994,     13.410465,     26.474764,
+    57.505886,     144.56065,     529.50238,     1.4114686,     1.0444078,     1.1837591,     1.451555,
+    1.9222946,     2.6978834,     3.9897807,     6.3341594,     10.700346,     17.23595,      29.072956,
+    52.19664,      100.96515,     214.89687,     529.50238,     1785.9767};
+
+const float dequant1_matrix32x32[3][1024] = {
+    3.6023353e-05, 4.7485129e-05, 5.8148311e-05, 7.1379516e-05, 8.7837536e-05, 0.00010836149, 0.00013402395,
+    0.00013984936, 0.00013758539, 0.00013574096, 0.00013431697, 0.00013332062, 0.00013276614, 0.00013034284,
+    0.00012594002, 0.00012217819, 0.0001190508,  0.00011656478, 0.00011474322, 0.00012188913, 0.00016943525,
+    0.00023737965, 0.00033562747, 0.00047973637, 0.00069487351, 0.001023312,   0.0020070006,  0.0041111726,
+    0.0087794187,  0.020014564,   0.051372338,   0.17595468,    4.7485129e-05, 2.3922055e-05, 3.0301986e-05,
+    4.5417441e-05, 7.1285736e-05, 9.0684785e-05, 0.00010896237, 0.00010955311, 0.00010636745, 0.00010387948,
+    0.00010196272, 0.00010054386, 9.9582219e-05, 9.7177792e-05, 9.3538241e-05, 9.0446316e-05, 8.7878834e-05,
+    8.5826992e-05, 8.4297106e-05, 9.0152811e-05, 0.00012504045, 0.00017483013, 0.00024673855, 0.00035209421,
+    0.00050921342, 0.00074884825, 0.0014747089,  0.0030166102,  0.0064336546,  0.014649257,   0.03755879,
+    0.12850724,    5.8148311e-05, 3.0301986e-05, 3.1925785e-05, 6.2082814e-05, 4.8148726e-05, 5.9288523e-05,
+    8.7128392e-05, 9.8823781e-05, 0.00010840771, 0.00011285518, 0.00010984355, 0.00010757463, 0.00010594203,
+    0.00010245113, 9.8243203e-05, 9.4688156e-05, 9.1741233e-05, 8.9377958e-05, 8.759318e-05,  9.59725e-05,
+    0.00013270826, 0.00018504279, 0.00026050268, 0.00037089363, 0.00053529057, 0.00079188979, 0.0015696345,
+    0.0032036791,  0.006818593,   0.015496098,   0.039659217,   0.13546796,    7.1379516e-05, 4.5417441e-05,
+    6.2082814e-05, 4.2815278e-05, 7.9491496e-05, 8.3820349e-05, 6.9794878e-05, 6.4144406e-05, 7.4116695e-05,
+    8.3001447e-05, 9.094907e-05,  9.814732e-05,  0.00010479001, 0.00010772577, 0.00010309856, 9.9065626e-05,
+    9.5727934e-05, 9.3044677e-05, 9.0998714e-05, 0.00010392844, 0.00014316791, 0.00019894708, 0.00027921025,
+    0.0003964066,  0.00057063322, 0.00086592074, 0.0017108019,  0.0034813138,  0.0073888344,  0.016748458,
+    0.042760596,   0.14573096,    8.7837536e-05, 7.1285736e-05, 4.8148726e-05, 7.9491496e-05, 5.7702688e-05,
+    9.3768082e-05, 0.00010685319, 8.267763e-05,  6.7383102e-05, 5.9459566e-05, 6.7216999e-05, 7.4507858e-05,
+    8.1035359e-05, 8.4555511e-05, 8.7558867e-05, 9.0202593e-05, 9.2635317e-05, 9.4999879e-05, 9.4486371e-05,
+    0.00011441334, 0.00015691627, 0.00021717971, 0.00030368852, 0.00042972399, 0.00061670481, 0.00097007764,
+    0.0019088162,  0.0038696625,  0.0081844097,  0.018491486,   0.047067408,   0.15995315,    0.00010836149,
+    9.0684785e-05, 5.9288523e-05, 8.3820349e-05, 9.3768082e-05, 6.4679705e-05, 8.5727108e-05, 0.00010771409,
+    9.3236245e-05, 7.7160919e-05, 6.5960317e-05, 5.7894802e-05, 6.2323947e-05, 6.6301116e-05, 6.9845759e-05,
+    7.306334e-05,  7.6064927e-05, 7.8965917e-05, 8.1887767e-05, 0.00011299414, 0.00016209931, 0.00023365305,
+    0.00033505241, 0.00047231393, 0.00067547208, 0.0011128781,  0.0021792827,  0.0043982645,  0.0092638051,
+    0.020849204,   0.052876592,   0.16858153,    0.00013402395, 0.00010896237, 8.7128392e-05, 6.9794878e-05,
+    0.00010685319, 8.5727108e-05, 6.3228545e-05, 8.0343605e-05, 9.8303324e-05, 0.00010209033, 8.5850937e-05,
+    7.4151001e-05, 6.3394582e-05, 5.5238434e-05, 5.5796354e-05, 5.9204292e-05, 6.2438397e-05, 6.558458e-05,
+    7.3094248e-05, 0.00010504625, 0.00015138833, 0.00021912932, 0.0003191361,  0.00046864239, 0.00069576764,
+    0.0012598214,  0.0025407695,  0.0051095891,  0.010710777,   0.023998523,   0.060610231,   0.17021742,
+    0.00013984936, 0.00010955311, 9.8823781e-05, 6.4144406e-05, 8.267763e-05,  0.00010771409, 8.0343605e-05,
+    6.2136249e-05, 7.660492e-05,  9.1846341e-05, 0.00010756763, 9.1990696e-05, 7.7838296e-05, 6.7140754e-05,
+    5.8907954e-05, 5.2481202e-05, 5.1350049e-05, 5.4532175e-05, 6.8726011e-05, 9.916423e-05,  0.00014344335,
+    0.0002083459,  0.00030440185, 0.0004483306,  0.00066744082, 0.0012999001,  0.0026211354,  0.0054349718,
+    0.011737515,   0.027037593,   0.070070185,   0.17149711,    0.00013758539, 0.00010636745, 0.00010840771,
+    7.4116695e-05, 6.7383102e-05, 9.3236245e-05, 9.8303324e-05, 7.660492e-05,  6.1395869e-05, 7.401441e-05,
+    8.6448825e-05, 9.7273121e-05, 9.4305702e-05, 8.0745209e-05, 7.0316069e-05, 6.2186271e-05, 5.5784836e-05,
+    5.0714323e-05, 6.5730928e-05, 9.512106e-05,  0.00013798039, 0.00020094345, 0.00029432218, 0.00043450613,
+    0.00069452584, 0.001371033,   0.0027620618,  0.0057223607,  0.012348444,   0.02842387,    0.073611557,
+    0.15523638,    0.00013574096, 0.00010387948, 0.00011285518, 8.3001447e-05, 5.9459566e-05, 7.7160919e-05,
+    0.00010209033, 9.1846341e-05, 7.401441e-05,  6.0548504e-05, 6.9839843e-05, 7.9154248e-05, 8.836521e-05,
+    9.5998839e-05, 8.3152358e-05, 7.3129027e-05, 6.5233886e-05, 6.8411915e-05, 8.4194486e-05, 0.00010528413,
+    0.00013480806, 0.00019668374, 0.00028859731, 0.00042678794, 0.00075014768, 0.0014782182,  0.0029731498,
+    0.0061504068,  0.013253534,   0.030467277,   0.078465275,   0.13781379,    0.00013431697, 0.00010196272,
+    0.00010984355, 9.094907e-05,  6.7216999e-05, 6.5960317e-05, 8.5850937e-05, 0.00010756763, 8.6448825e-05,
+    6.9839843e-05, 5.7402325e-05, 6.5355809e-05, 7.3386436e-05, 8.1423859e-05, 8.943578e-05,  8.530372e-05,
+    7.7794233e-05, 9.3046023e-05, 0.00011322236, 0.00014006831, 0.00017609817, 0.00022499407, 0.00029225985,
+    0.00042835347, 0.00082884677, 0.001629176,   0.0032691469,  0.0067481515,  0.014512495,   0.033299044,
+    0.06922318,    0.12214311,    0.00013332062, 0.00010054386, 0.00010757463, 9.814732e-05,  7.4507858e-05,
+    5.7894802e-05, 7.4151001e-05, 9.1990696e-05, 9.7273121e-05, 7.9154248e-05, 6.5355809e-05, 5.4758104e-05,
+    6.1723047e-05, 6.8813068e-05, 7.5996257e-05, 8.3270323e-05, 0.00010768669, 0.00012740848, 0.00015341518,
+    0.00018788269, 0.00023393871, 0.00029615185, 0.00038133378, 0.00056585873, 0.00097444904, 0.0018352539,
+    0.0036716289,  0.0075579025,  0.016211953,   0.037108809,   0.061003428,   0.10813522,    0.00013276614,
+    9.9582219e-05, 0.00010594203, 0.00010479001, 8.1035359e-05, 6.2323947e-05, 6.3394582e-05, 7.7838296e-05,
+    9.4305702e-05, 8.836521e-05,  7.3386436e-05, 6.1723047e-05, 5.2579268e-05, 5.8812744e-05, 6.5216627e-05,
+    8.2553663e-05, 0.00011746787, 0.00016737721, 0.00020925321, 0.00025386584, 0.00031323297, 0.00039307572,
+    0.00050304458, 0.0008371906,  0.0014246708,  0.0024851307,  0.0044618333,  0.0086401487,  0.018475281,
+    0.04013985,    0.053734157,   0.095683835,   0.00013034284, 9.7177792e-05, 0.00010245113, 0.00010772577,
+    8.4555511e-05, 6.6301116e-05, 5.5238434e-05, 6.7140754e-05, 8.0745209e-05, 9.5998839e-05, 8.1423859e-05,
+    6.8813068e-05, 5.8812744e-05, 5.0840626e-05, 6.2802093e-05, 8.8842957e-05, 0.00012588511, 0.0001787434,
+    0.00025448241, 0.00034528313, 0.00042242263, 0.00052573788, 0.00076630549, 0.0012597241,  0.0021186692,
+    0.0036545296,  0.0064916844,  0.01195686,    0.023111777,   0.035196837,   0.047338061,   0.084672391,
+    0.00012594002, 9.3538241e-05, 9.8243203e-05, 0.00010309856, 8.7558867e-05, 6.9845759e-05, 5.5796354e-05,
+    5.8907954e-05, 7.0316069e-05, 8.3152358e-05, 8.943578e-05,  7.5996257e-05, 6.5216627e-05, 6.2802093e-05,
+    6.9284266e-05, 9.7446515e-05, 0.00013739559, 0.00019426746, 0.00027559226, 0.00039252552, 0.0005617991,
+    0.00074624532, 0.0011858167,  0.0019260488,  0.003202145,   0.0054626567,  0.0096012847,  0.017506059,
+    0.033511735,   0.036903262,   0.045523666,   0.075064376,   0.00012217819, 9.0446316e-05, 9.4688156e-05,
+    9.9065626e-05, 9.0202593e-05, 7.306334e-05,  5.9204292e-05, 5.2481202e-05, 6.2186271e-05, 7.3129027e-05,
+    8.530372e-05,  8.3270323e-05, 8.2553663e-05, 8.8842957e-05, 9.7446515e-05, 0.00010886286, 0.00015265013,
+    0.00021480687, 0.00030346535, 0.00043066181, 0.00061443396, 0.0010896012,  0.0018624857,  0.0029899392,
+    0.0049151219,  0.0082942424,  0.014426572,   0.026041286,   0.035969965,   0.039324421,   0.048491772,
+    0.079925604,   0.0001190508,  8.7878834e-05, 9.1741233e-05, 9.5727934e-05, 9.2635317e-05, 7.6064927e-05,
+    6.2438397e-05, 5.1350049e-05, 5.5784836e-05, 6.5233886e-05, 7.7794233e-05, 0.00010768669, 0.00011746787,
+    0.00012588511, 0.00013739559, 0.00015265013, 0.00017256087, 0.00024155903, 0.00033968751, 0.00048010345,
+    0.00079832785, 0.0014242902,  0.0025716727,  0.0047059646,  0.0076574814,  0.012785331,   0.022011017,
+    0.03745044,    0.038278699,   0.041845389,   0.051593613,   0.085022964,   0.00011656478, 8.5826992e-05,
+    8.9377958e-05, 9.3044677e-05, 9.4999879e-05, 7.8965917e-05, 6.558458e-05,  5.4532175e-05, 5.0714323e-05,
+    6.8411915e-05, 9.3046023e-05, 0.00012740848, 0.00016737721, 0.0001787434,  0.00019426746, 0.00021480687,
+    0.00024155903, 0.00027618342, 0.00038645029, 0.00061551511, 0.0010759425,  0.0019028325,  0.0034076818,
+    0.0061881426,  0.011418056,   0.020000016,   0.034087602,   0.039794367,   0.040685836,   0.044485144,
+    0.054854382,   0.090400733,   0.00011474322, 8.4297106e-05, 8.759318e-05,  9.0998714e-05, 9.4486371e-05,
+    8.1887767e-05, 7.3094248e-05, 6.8726011e-05, 6.5730928e-05, 8.4194486e-05, 0.00011322236, 0.00015341518,
+    0.00020925321, 0.00025448241, 0.00027559226, 0.00030346535, 0.00033968751, 0.00038645029, 0.00050021743,
+    0.0008559238,  0.0014819041,  0.0025972966,  0.0046121944,  0.0083091557,  0.015217415,   0.028416166,
+    0.043130342,   0.042246785,   0.043216646,   0.04727279,    0.058311719,   0.096123755,   0.00012188913,
+    9.0152811e-05, 9.59725e-05,   0.00010392844, 0.00011441334, 0.00011299414, 0.00010504625, 9.916423e-05,
+    9.512106e-05,  0.00010528413, 0.00014006831, 0.00018788269, 0.00025386584, 0.00034528313, 0.00039252552,
+    0.00043066181, 0.00048010345, 0.00061551511, 0.0008559238,  0.0012161328,  0.0020851884,  0.0036213107,
+    0.0063752397,  0.011392047,   0.02070329,    0.03612854,    0.039032985,   0.043199129,   0.04590892,
+    0.050251149,   0.062020246,   0.10228495,    0.00016943525, 0.00012504045, 0.00013270826, 0.00014316791,
+    0.00015691627, 0.00016209931, 0.00015138833, 0.00014344335, 0.00013798039, 0.00013480806, 0.00017609817,
+    0.00023393871, 0.00031323297, 0.00042242263, 0.0005617991,  0.00061443396, 0.00079832785, 0.0010759425,
+    0.0014819041,  0.0020851884,  0.0029972054,  0.0051572081,  0.0089998404,  0.015948871,   0.028757192,
+    0.032310378,   0.035030056,   0.038904343,   0.044836834,   0.053481963,   0.066058092,   0.10901628,
+    0.00023737965, 0.00017483013, 0.00018504279, 0.00019894708, 0.00021717971, 0.00023365305, 0.00021912932,
+    0.0002083459,  0.00020094345, 0.00019668374, 0.00022499407, 0.00029615185, 0.00039307572, 0.00052573788,
+    0.00074624532, 0.0010896012,  0.0014242902,  0.0019028325,  0.0025972966,  0.0036213107,  0.0051572081,
+    0.0075033265,  0.012978748,   0.02280736,    0.027205519,   0.029114837,   0.03166876,    0.03528671,
+    0.040800784,   0.050143749,   0.069101594,   0.11650559,    0.00033562747, 0.00024673855, 0.00026050268,
+    0.00027921025, 0.00030368852, 0.00033505241, 0.0003191361,  0.00030440185, 0.00029432218, 0.00028859731,
+    0.00029225985, 0.00038133378, 0.00050304458, 0.00076630549, 0.0011858167,  0.0018624857,  0.0025716727,
+    0.0034076818,  0.0046121944,  0.0063752397,  0.0089998404,  0.012978748,   0.019131092,   0.023234539,
+    0.024663156,   0.026473302,   0.028883345,   0.032282136,   0.037441988,   0.046157762,   0.063803799,
+    0.097442903,   0.00047973637, 0.00035209421, 0.00037089363, 0.0003964066,  0.00042972399, 0.00047231393,
+    0.00046864239, 0.0004483306,  0.00043450613, 0.00042678794, 0.00042835347, 0.00056585873, 0.0008371906,
+    0.0012597241,  0.0019260488,  0.0029899392,  0.0047059646,  0.0061881426,  0.0083091557,  0.011392047,
+    0.015948871,   0.02280736,    0.023234539,   0.02124157,    0.022608465,   0.024335282,   0.026626121,
+    0.029845085,   0.034716032,   0.042922091,   0.054095831,   0.071022667,   0.00069487351, 0.00050921342,
+    0.00053529057, 0.00057063322, 0.00061670481, 0.00067547208, 0.00069576764, 0.00066744082, 0.00069452584,
+    0.00075014768, 0.00082884677, 0.00097444904, 0.0014246708,  0.0021186692,  0.003202145,   0.0049151219,
+    0.0076574814,  0.011418056,   0.015217415,   0.02070329,    0.028757192,   0.027205519,   0.024663156,
+    0.022608465,   0.021008318,   0.022671077,   0.024870832,   0.027952766,   0.03260373,    0.039315179,
+    0.039317548,   0.052039407,   0.001023312,   0.00074884825, 0.00079188979, 0.00086592074, 0.00097007764,
+    0.0011128781,  0.0012598214,  0.0012999001,  0.001371033,   0.0014782182,  0.001629176,   0.0018352539,
+    0.0024851307,  0.0036545296,  0.0054626567,  0.0082942424,  0.012785331,   0.020000016,   0.028416166,
+    0.03612854,    0.032310378,   0.029114837,   0.026473302,   0.024335282,   0.022671077,   0.021478029,
+    0.023619946,   0.026613781,   0.031121623,   0.028607776,   0.028840896,   0.038476393,   0.0020070006,
+    0.0014747089,  0.0015696345,  0.0017108019,  0.0019088162,  0.0021792827,  0.0025407695,  0.0026211354,
+    0.0027620618,  0.0029731498,  0.0032691469,  0.0036716289,  0.0044618333,  0.0064916844,  0.0096012847,
+    0.014426572,   0.022011017,   0.034087602,   0.043130342,   0.039032985,   0.035030056,   0.03166876,
+    0.028883345,   0.026626121,   0.024870832,   0.023619946,   0.022920383,   0.025886349,   0.023199789,
+    0.021124862,   0.021465059,   0.028859012,   0.0041111726,  0.0030166102,  0.0032036791,  0.0034813138,
+    0.0038696625,  0.0043982645,  0.0051095891,  0.0054349718,  0.0057223607,  0.0061504068,  0.0067481515,
+    0.0075579025,  0.0086401487,  0.01195686,    0.017506059,   0.026041286,   0.03745044,    0.039794367,
+    0.042246785,   0.043199129,   0.038904343,   0.03528671,    0.032282136,   0.029845085,   0.027952766,
+    0.026613781,   0.025886349,   0.020038661,   0.017388897,   0.015956772,   0.016338442,   0.02213328,
+    0.0087794187,  0.0064336546,  0.006818593,   0.0073888344,  0.0081844097,  0.0092638051,  0.010710777,
+    0.011737515,   0.012348444,   0.013253534,   0.014512495,   0.016211953,   0.018475281,   0.023111777,
+    0.033511735,   0.035969965,   0.038278699,   0.040685836,   0.043216646,   0.04590892,    0.044836834,
+    0.040800784,   0.037441988,   0.034716032,   0.03260373,    0.031121623,   0.023199789,   0.017388897,
+    0.013509996,   0.012490984,   0.01288554,    0.017585071,   0.020014564,   0.014649257,   0.015496098,
+    0.016748458,   0.018491486,   0.020849204,   0.023998523,   0.027037593,   0.02842387,    0.030467277,
+    0.033299044,   0.037108809,   0.04013985,    0.035196837,   0.036903262,   0.039324421,   0.041845389,
+    0.044485144,   0.04727279,    0.050251149,   0.053481963,   0.050143749,   0.046157762,   0.042922091,
+    0.039315179,   0.028607776,   0.021124862,   0.015956772,   0.012490984,   0.010380118,   0.010786027,
+    0.014826181,   0.051372338,   0.03755879,    0.039659217,   0.042760596,   0.047067408,   0.052876592,
+    0.060610231,   0.070070185,   0.073611557,   0.078465275,   0.06922318,    0.061003428,   0.053734157,
+    0.047338061,   0.045523666,   0.048491772,   0.051593613,   0.054854382,   0.058311719,   0.062020246,
+    0.066058092,   0.069101594,   0.063803799,   0.054095831,   0.039317548,   0.028840896,   0.021465059,
+    0.016338442,   0.01288554,    0.010786027,   0.010110092,   0.013994874,   0.17595468,    0.12850724,
+    0.13546796,    0.14573096,    0.15995315,    0.16858153,    0.17021742,    0.17149711,    0.15523638,
+    0.13781379,    0.12214311,    0.10813522,    0.095683835,   0.084672391,   0.075064376,   0.079925604,
+    0.085022964,   0.090400733,   0.096123755,   0.10228495,    0.10901628,    0.11650559,    0.097442903,
+    0.071022667,   0.052039407,   0.038476393,   0.028859012,   0.02213328,    0.017585071,   0.014826181,
+    0.013994874,   0.017534142,   0.00017707126, 0.0001480639,  0.00016780054, 0.00019063143, 0.00021710296,
+    0.00024787075, 0.00028372489, 0.00031671999, 0.00035102671, 0.00039015003, 0.0004349151,  0.00048632181,
+    0.0005455903,  0.00060966006, 0.00067817559, 0.00075744296, 0.0008497017,  0.00095781003, 0.0010854676,
+    0.0012357334,  0.0014086879,  0.0016184739,  0.0018765966,  0.0021997194,  0.002612893,   0.0031555507,
+    0.0040466604,  0.0053533185,  0.0073829745,  0.010869758,   0.018018194,   0.039855745,   0.0001480639,
+    0.00010538891, 0.00010069195, 0.00011690574, 0.0001371219,  0.00016000759, 0.00018614267, 0.00020961632,
+    0.00023437649, 0.0002622726,  0.00029394444, 0.00033013042, 0.00037170923, 0.00041637814, 0.0004643999,
+    0.00051986717, 0.00058435171, 0.00065985468, 0.00074896315, 0.00085367227, 0.00097437395, 0.0011207485,
+    0.0013008264,  0.0015262378,  0.0018144646,  0.002193026,   0.0028165665,  0.0037284214,  0.0051450753,
+    0.0075791497,  0.012570018,   0.027817938,   0.00016780054, 0.00010069195, 0.00012605802, 0.0001268106,
+    0.0001345543,  0.00015255461, 0.00017769246, 0.00019953751, 0.0002237842,  0.00025130101, 0.00028306333,
+    0.00031920706, 0.00036062195, 0.00040437246, 0.00045215141, 0.00050725805, 0.00057125726, 0.00064613792,
+    0.00073446817, 0.00083778438, 0.00095738034, 0.0011023845,  0.0012807549,  0.0015040152,  0.0017894866,
+    0.0021669078,  0.0027882589,  0.0036929436,  0.0050986623,  0.0075142612,  0.012467773,   0.027602749,
+    0.00019063143, 0.00011690574, 0.0001268106,  0.00015151658, 0.0001564352,  0.00016693826, 0.00017903787,
+    0.00019482298, 0.00021931141, 0.00024674702, 0.00027776035, 0.00031307945, 0.00035356692, 0.00039516465,
+    0.0004428348,  0.00049778185, 0.00056154456, 0.00063610583, 0.00072402402, 0.00082609872, 0.00094506511,
+    0.0010892812,  0.0012666637,  0.0014886761,  0.0017725482,  0.0021565645,  0.0027762495,  0.0036786217,
+    0.0050809076,  0.0074908487,  0.012433167,   0.02753487,    0.00021710296, 0.0001371219,  0.0001345543,
+    0.0001564352,  0.00018301635, 0.00019103213, 0.00019891099, 0.00021062233, 0.00022562918, 0.00024527154,
+    0.00027648974, 0.00031203785, 0.00035209427, 0.0003930493,  0.00043994986, 0.00049402053, 0.00055679504,
+    0.00063022313, 0.00071764801, 0.00081863307, 0.00093744061, 0.001081446,   0.0012585566,  0.0014802213,
+    0.0017636477,  0.0021591801,  0.0027805718,  0.0036855093,  0.0050918958,  0.0075090546,  0.012466465,
+    0.02761494,    0.00024787075, 0.00016000759, 0.00015255461, 0.00016693826, 0.00019103213, 0.0002155252,
+    0.00022246495, 0.00023313538, 0.00024776958, 0.00026611183, 0.0002883035,  0.00031473255, 0.00035249389,
+    0.00039379625, 0.00044109311, 0.000495616,   0.00055890926, 0.00063293753, 0.00072023802, 0.00081925828,
+    0.00093718304, 0.0010801372,  0.0012565156,  0.0014787434,  0.0017628924,  0.002175004,   0.0028015445,
+    0.0037140276,  0.0051322081,  0.0075697373,  0.012569092,   0.02741445,    0.00028372489, 0.00018614267,
+    0.00017769246, 0.00017903787, 0.00019891099, 0.00022246495, 0.00024936345, 0.00025970236, 0.0002738453,
+    0.00029204748, 0.00031465336, 0.00034174335, 0.00036875275, 0.00040052799, 0.00044537513, 0.00050056411,
+    0.00056463969, 0.00063958851, 0.00072704646, 0.00082664948, 0.00094591256, 0.001090488,   0.0012683113,
+    0.0014908726,  0.0017754439,  0.0022075414,  0.0028399569,  0.0037649933,  0.0052029593,  0.0076745213,
+    0.012743721,   0.026927261,   0.00031671999, 0.00020961632, 0.00019953751, 0.00019482298, 0.00021062233,
+    0.00023313538, 0.00025970236, 0.00029003757, 0.00030429225, 0.00032265869, 0.00034540024, 0.00037039586,
+    0.00039815414, 0.00043094621, 0.00046952089, 0.00051485293, 0.00057416997, 0.00065037975, 0.0007375839,
+    0.00083874189, 0.00095987355, 0.0011067194,  0.0012873393,  0.0015134049,  0.0018024599,  0.0022644065,
+    0.002912289,   0.0038554242,  0.005320638,   0.0078380974,  0.01299985,    0.026449621,   0.00035102671,
+    0.00023437649, 0.0002237842,  0.00021931141, 0.00022562918, 0.00024776958, 0.0002738453,  0.00030429225,
+    0.00033918605, 0.00035813198, 0.000380043,   0.00040352362, 0.00043199339, 0.00046604805, 0.00050621247,
+    0.00055350625, 0.00060924928, 0.00067515491, 0.00075265532, 0.00085583632, 0.00097940431, 0.0011292148,
+    0.0013134903,  0.0015441383,  0.0018575913,  0.0023375514,  0.00300512,    0.0039767851,  0.0054861424,
+    0.0080791777,  0.013395431,   0.026088707,   0.00039015003, 0.0002622726,  0.00025130101, 0.00024674702,
+    0.00024527154, 0.00026611183, 0.00029204748, 0.00032265869, 0.00035813198, 0.00039764532, 0.00041749049,
+    0.0004418896,  0.000471304,   0.00050638703, 0.0005484593,  0.00059807982, 0.00065663754, 0.00072369125,
+    0.00080087944, 0.00089300267, 0.0010049412,  0.0011584698,  0.0013473318,  0.0015837309,  0.0019310683,
+    0.0024286029,  0.0031204908,  0.0041273846,  0.0056912252,  0.0083775017,  0.01386874,    0.025760984,
+    0.0004349151,  0.00029394444, 0.00028306333, 0.00027776035, 0.00027648974, 0.0002883035,  0.00031465336,
+    0.00034540024, 0.000380043,   0.00041749049, 0.00046008758, 0.00048566549, 0.0005165693,  0.00055346655,
+    0.00059722661, 0.00064936228, 0.00071082846, 0.00077940908, 0.00086079625, 0.00095798075, 0.0010749884,
+    0.001217348,   0.0013928403,  0.0016348839,  0.0020207826,  0.0025396293,  0.0032609967,  0.0043105762,
+    0.0059404108,  0.0087395925,  0.013688202,   0.025454629,   0.00048632181, 0.00033013042, 0.00031920706,
+    0.00031307945, 0.00031203785, 0.00031473255, 0.00034174335, 0.00037039586, 0.00040352362, 0.0004418896,
+    0.00048566549, 0.00053564616, 0.00056836149, 0.00060747023, 0.00065390544, 0.00070886861, 0.00077067583,
+    0.00084332074, 0.0009295785,  0.0010326204,  0.001156715,   0.0013077231,  0.0014938937,  0.0017586248,
+    0.0021398899,  0.0026732364,  0.0034299041,  0.0045305812,  0.00623939,    0.0091736475,  0.013522077,
+    0.025175273,   0.0005455903,  0.00037170923, 0.00036062195, 0.00035356692, 0.00035209427, 0.00035249389,
+    0.00036875275, 0.00039815414, 0.00043199339, 0.000471304,   0.0005165693,  0.00056836149, 0.00062771526,
+    0.0006694512,  0.00071903912, 0.00077550631, 0.00084022118, 0.00091686472, 0.0010085718,  0.0011183885,
+    0.0012506677,  0.0014116542,  0.0016106677,  0.0019274375,  0.0023405962,  0.0028935277,  0.0036588546,
+    0.004792687,   0.0065952893,  0.0095673967,  0.013373735,   0.024929052,   0.00060966006, 0.00041637814,
+    0.00040437246, 0.00039516465, 0.0003930493,  0.00039379625, 0.00040052799, 0.00043094621, 0.00046604805,
+    0.00050638703, 0.00055346655, 0.00060747023, 0.0006694512,  0.00074076012, 0.000792315,   0.0008509137,
+    0.00092024123, 0.0010023764,  0.0011000349,  0.0012170941,  0.0013588334,  0.001531342,   0.0017800233,
+    0.0021256125,  0.0025760736,  0.003178556,   0.0040119761,  0.005218829,   0.0070847394,  0.0094644791,
+    0.013246854,   0.024722738,   0.00067817559, 0.0004643999,  0.00045215141, 0.0004428348,  0.00043994986,
+    0.00044109311, 0.00044537513, 0.00046952089, 0.00050621247, 0.0005484593,  0.00059722661, 0.00065390544,
+    0.00071903912, 0.000792315,   0.00087457988, 0.00093771284, 0.0010124183,  0.0011009406,  0.0012062084,
+    0.0013320963,  0.001483815,   0.0016822729,  0.0019794905,  0.0023588818,  0.0028530811,  0.0035136454,
+    0.0044268607,  0.0057485211,  0.0077908854,  0.0096009346,  0.013295894,   0.024567602,   0.00075744296,
+    0.00051986717, 0.00050725805, 0.00049778185, 0.00049402053, 0.000495616,   0.00050056411, 0.00051485293,
+    0.00055350625, 0.00059807982, 0.00064936228, 0.00070886861, 0.00077550631, 0.0008509137,  0.00093771284,
+    0.0010380344,  0.0011190202,  0.0012149868,  0.0013291101,  0.0014655889,  0.0016300683,  0.0018874101,
+    0.002215296,   0.0026344741,  0.0031801381,  0.0039090244,  0.0049160789,  0.0063727167,  0.0079447664,
+    0.0097800363,  0.013552951,   0.025058351,   0.0008497017,  0.00058435171, 0.00057125726, 0.00056154456,
+    0.00055679504, 0.00055890926, 0.00056463969, 0.00057416997, 0.00060924928, 0.00065663754, 0.00071082846,
+    0.00077067583, 0.00084022118, 0.00092024123, 0.0010124183,  0.0011190202,  0.0012429155,  0.0013475876,
+    0.0014720538,  0.0016208906,  0.0018417919,  0.0021324588,  0.0024969804,  0.0029616053,  0.0035680921,
+    0.004377713,   0.0054956237,  0.0070217471,  0.0081049222,  0.0099847578,  0.013846565,   0.025618585,
+    0.00095781003, 0.00065985468, 0.00064613792, 0.00063610583, 0.00063022313, 0.00063293753, 0.00063958851,
+    0.00065037975, 0.00067515491, 0.00072369125, 0.00077940908, 0.00084332074, 0.00091686472, 0.0010023764,
+    0.0011009406,  0.0012149868,  0.0013475876,  0.0015026716,  0.0016392763,  0.0018354144,  0.0020991911,
+    0.0024254359,  0.0028342868,  0.0033550526,  0.0040319678,  0.0049364707,  0.0061860783,  0.0071764197,
+    0.0082905078,  0.010221609,   0.014185824,   0.026265224,   0.0010854676,  0.00074896315, 0.00073446817,
+    0.00072402402, 0.00071764801, 0.00072023802, 0.00072704646, 0.0007375839,  0.00075265532, 0.00080087944,
+    0.00086079625, 0.0009295785,  0.0010085718,  0.0011000349,  0.0012062084,  0.0013291101,  0.0014720538,
+    0.0016392763,  0.0018667057,  0.0021107879,  0.0024092472,  0.0027781241,  0.0032400733,  0.003828061,
+    0.0045918357,  0.0056099505,  0.0066320617,  0.0073582768,  0.0085081877,  0.010498863,   0.014582291,
+    0.027019849,   0.0012357334,  0.00085367227, 0.00083778438, 0.00082609872, 0.00081863307, 0.00081925828,
+    0.00082664948, 0.00083874189, 0.00085583632, 0.00089300267, 0.00095798075, 0.0010326204,  0.0011183885,
+    0.0012170941,  0.0013320963,  0.0014655889,  0.0016208906,  0.0018354144,  0.0021107879,  0.002445403,
+    0.0027857737,  0.0032061476,  0.0037322138,  0.0044013448,  0.0052699335,  0.0063273781,  0.0068253218,
+    0.0075759734,  0.008766559,   0.010827218,   0.015050948,   0.027910447,   0.0014086879,  0.00097437395,
+    0.00095738034, 0.00094506511, 0.00093744061, 0.00093718304, 0.00094591256, 0.00095987355, 0.00097940431,
+    0.0010049412,  0.0010749884,  0.001156715,   0.0012506677,  0.0013588334,  0.001483815,   0.0016300683,
+    0.0018417919,  0.0020991911,  0.0024092472,  0.0027857737,  0.0032474191,  0.0037305967,  0.0043348297,
+    0.0051028528,  0.0060991398,  0.0065366258,  0.0070584421,  0.0078425398,  0.0090804324,  0.01122079,
+    0.015611588,   0.028974038,   0.0016184739,  0.0011207485,  0.0011023845,  0.0010892812,  0.001081446,
+    0.0010801372,  0.001090488,   0.0011067194,  0.0011292148,  0.0011584698,  0.001217348,   0.0013077231,
+    0.0014116542,  0.001531342,   0.0016822729,  0.0018874101,  0.0021324588,  0.0024254359,  0.0027781241,
+    0.0032061476,  0.0037305967,  0.0043805442,  0.005081166,   0.005971096,   0.0064163683,  0.0067926222,
+    0.0073427465,  0.0081667602,  0.0094650239,  0.011705467,   0.016292507,   0.030260772,   0.0018765966,
+    0.0013008264,  0.0012807549,  0.0012666637,  0.0012585566,  0.0012565156,  0.0012683113,  0.0012873393,
+    0.0013134903,  0.0013473318,  0.0013928403,  0.0014938937,  0.0016106677,  0.0017800233,  0.0019794905,
+    0.002215296,   0.0024969804,  0.0028342868,  0.0032400733,  0.0037322138,  0.0043348297,  0.005081166,
+    0.0060181972,  0.0064315144,  0.0067087831,  0.0071103009,  0.007694534,   0.00856693,    0.0099386694,
+    0.012302885,   0.017139552,   0.034165442,   0.0021997194,  0.0015262378,  0.0015040152,  0.0014886761,
+    0.0014802213,  0.0014787434,  0.0014908726,  0.0015134049,  0.0015441383,  0.0015837309,  0.0016348839,
+    0.0017586248,  0.0019274375,  0.0021256125,  0.0023588818,  0.0026344741,  0.0029616053,  0.0033550526,
+    0.003828061,   0.0044013448,  0.0051028528,  0.005971096,   0.0064315144,  0.006778033,   0.0070787491,
+    0.00751105,    0.0081371684,  0.0090693021,  0.010532098,   0.013050069,   0.01886756,    0.039911121,
+    0.002612893,   0.0018144646,  0.0017894866,  0.0017725482,  0.0017636477,  0.0017628924,  0.0017754439,
+    0.0018024599,  0.0018575913,  0.0019310683,  0.0020207826,  0.0021398899,  0.0023405962,  0.0025760736,
+    0.0028530811,  0.0031801381,  0.0035680921,  0.0040319678,  0.0045918357,  0.0052699335,  0.0060991398,
+    0.0064163683,  0.0067087831,  0.0070787491,  0.0075560398,  0.0080267517,  0.0087054996,  0.0097130574,
+    0.011291193,   0.01415265,    0.022383973,   0.047296364,   0.0031555507,  0.002193026,   0.0021669078,
+    0.0021565645,  0.0021591801,  0.002175004,   0.0022075414,  0.0022644065,  0.0023375514,  0.0024286029,
+    0.0025396293,  0.0026732364,  0.0028935277,  0.003178556,   0.0035136454,  0.0039090244,  0.004377713,
+    0.0049364707,  0.0056099505,  0.0063273781,  0.0065366258,  0.0067926222,  0.0071103009,  0.00751105,
+    0.0080267517,  0.0087072961,  0.0094540818,  0.01055955,    0.012287834,   0.017107019,   0.027026452,
+    0.057043046,   0.0040466604,  0.0028165665,  0.0027882589,  0.0027762495,  0.0027805718,  0.0028015445,
+    0.0028399569,  0.002912289,   0.00300512,    0.0031204908,  0.0032609967,  0.0034299041,  0.0036588546,
+    0.0040119761,  0.0044268607,  0.0049160789,  0.0054956237,  0.0061860783,  0.0066320617,  0.0068253218,
+    0.0070584421,  0.0073427465,  0.007694534,   0.0081371684,  0.0087054996,  0.0094540818,  0.010473616,
+    0.011710811,   0.015105025,   0.021145405,   0.033370156,   0.070356794,   0.0053533185,  0.0037284214,
+    0.0036929436,  0.0036786217,  0.0036855093,  0.0037140276,  0.0037649933,  0.0038554242,  0.0039767851,
+    0.0041273846,  0.0043105762,  0.0045305812,  0.004792687,   0.005218829,   0.0057485211,  0.0063727167,
+    0.0070217471,  0.0071764197,  0.0073582768,  0.0075759734,  0.0078425398,  0.0081667602,  0.00856693,
+    0.0090693021,  0.0097130574,  0.01055955,    0.011710811,   0.014720184,   0.019256866,   0.026928712,
+    0.042451967,   0.089411013,   0.0073829745,  0.0051450753,  0.0050986623,  0.0050809076,  0.0050918958,
+    0.0051322081,  0.0052029593,  0.005320638,   0.0054861424,  0.0056912252,  0.0059404108,  0.00623939,
+    0.0065952893,  0.0070847394,  0.0077908854,  0.0079447664,  0.0081049222,  0.0082905078,  0.0085081877,
+    0.008766559,   0.0090804324,  0.0094650239,  0.0099386694,  0.010532098,   0.011291193,   0.012287834,
+    0.015105025,   0.019256866,   0.025613198,   0.035780203,   0.056347881,   0.1185571,     0.010869758,
+    0.0075791497,  0.0075142612,  0.0074908487,  0.0075090546,  0.0075697373,  0.0076745213,  0.0078380974,
+    0.0080791777,  0.0083775017,  0.0087395925,  0.0091736475,  0.0095673967,  0.0094644791,  0.0096009346,
+    0.0097800363,  0.0099847578,  0.010221609,   0.010498863,   0.010827218,   0.01122079,    0.011705467,
+    0.012302885,   0.013050069,   0.01415265,    0.017107019,   0.021145405,   0.026928712,   0.035780203,
+    0.050789613,   0.079905123,   0.167955,      0.018018194,   0.012570018,   0.012467773,   0.012433167,
+    0.012466465,   0.012569092,   0.012743721,   0.01299985,    0.013395431,   0.01386874,    0.013688202,
+    0.013522077,   0.013373735,   0.013246854,   0.013295894,   0.013552951,   0.013846565,   0.014185824,
+    0.014582291,   0.015050948,   0.015611588,   0.016292507,   0.017139552,   0.01886756,    0.022383973,
+    0.027026452,   0.033370156,   0.042451967,   0.056347881,   0.079905123,   0.12767085,    0.2680957,
+    0.039855745,   0.027817938,   0.027602749,   0.02753487,    0.02761494,    0.02741445,    0.026927261,
+    0.026449621,   0.026088707,   0.025760984,   0.025454629,   0.025175273,   0.024929052,   0.024722738,
+    0.024567602,   0.025058351,   0.025618585,   0.026265224,   0.027019849,   0.027910447,   0.028974038,
+    0.030260772,   0.034165442,   0.039911121,   0.047296364,   0.057043046,   0.070356794,   0.089411013,
+    0.1185571,     0.167955,      0.2680957,     0.57145911,    0.00013418234, 0.0006049251,  0.00091066037,
+    0.0013742581,  0.0020789795,  0.0031529742,  0.0047940579,  0.0066822106,  0.0090455888,  0.01227949,
+    0.016718803,   0.022833707,   0.031287532,   0.04329041,    0.060539998,   0.085005395,   0.11988349,
+    0.1698903,     0.2420485,     0.35366896,    0.55906385,    0.89069045,    1.4320773,     2.3277552,
+    3.8341234,     6.4208732,     6.4711189,     6.5222421,     6.8532467,     7.6873393,     9.7086449,
+    16.36174,      0.0006049251,  0.00011932456, 0.00017314893, 0.00092670874, 0.0030400949,  0.0048829787,
+    0.0064810561,  0.008134827,   0.010263085,   0.013195192,   0.017206132,   0.02268807,    0.030195586,
+    0.04078763,    0.055855636,   0.077019662,   0.10691789,    0.14942388,    0.21028057,    0.30461702,
+    0.4767313,     0.75268865,    1.2003123,     1.9365072,     3.1679394,     5.2719989,     5.2261767,
+    5.2413945,     5.4821115,     6.1230659,     7.7022696,     12.932133,     0.00091066037, 0.00017314893,
+    0.00021325199, 0.00016197078, 0.00043656986, 0.0012493286,  0.0034393235,  0.007199015,   0.013543337,
+    0.021284834,   0.026407257,   0.033435442,   0.043025061,   0.056576762,   0.075616755,   0.10210453,
+    0.13917492,    0.19140878,    0.26556596,    0.38259834,    0.59180522,    0.92456293,    1.4603651,
+    2.3356354,     3.7905874,     6.1649399,     5.971221,      5.9585981,     6.2033305,     6.8987508,
+    8.6431923,     14.457601,     0.0013742581,  0.00092670874, 0.00016197078, 0.00038297498, 0.00030043678,
+    0.0005229098,  0.0010558417,  0.0020832235,  0.0045462833,  0.0089888899,  0.016560849,   0.028978385,
+    0.048805397,   0.080738395,   0.10568261,    0.13947651,    0.18635184,    0.25181603,    0.34395835,
+    0.49451202,    0.75495046,    1.1655655,     1.8213812,     2.8846927,     4.6400166,     7.0804925,
+    6.8208117,     6.7723327,     7.0178375,     7.771028,      9.6971111,     16.160028,     0.0020789795,
+    0.0030400949,  0.00043656986, 0.00030043678, 0.00069117465, 0.00060122908, 0.00065309223, 0.0012022241,
+    0.0020809236,  0.0035657326,  0.0071555926,  0.013477785,   0.024227513,   0.042423192,   0.072187662,
+    0.12014475,    0.1965843,     0.31753355,    0.45572233,    0.65627056,    0.98778462,    1.5055732,
+    2.3253837,     3.6438873,     5.8042407,     8.1243677,     7.784637,      7.6912303,     7.9336996,
+    8.7479982,     10.873236,     18.053457,     0.0031529742,  0.0048829787,  0.0012493286,  0.0005229098,
+    0.00060122908, 0.0011363724,  0.00099386612, 0.00094489945, 0.0014953471,  0.0025369313,  0.0041583185,
+    0.0066373236,  0.012458848,   0.022893829,   0.040666584,   0.070325673,   0.11907424,    0.19832209,
+    0.32618815,    0.57480901,    1.0168946,     1.7934734,     3.0344427,     4.701344,      7.4109964,
+    9.3071136,     8.871829,      8.7234831,     8.9586277,     9.8374596,     12.180492,     20.815918,
+    0.0047940579,  0.0064810561,  0.0034393235,  0.0010558417,  0.00065309223, 0.00099386612, 0.0017445473,
+    0.0016134479,  0.0015893861,  0.001972053,   0.0032800706,  0.0053152614,  0.0085345553,  0.01348386,
+    0.023616049,   0.042234696,   0.073732503,   0.12627064,    0.21663,       0.39399582,    0.71103722,
+    1.2770947,     2.2896698,     4.1105385,     7.4153934,     9.3500586,     10.035181,     9.8765917,
+    10.099777,     11.046735,     13.627371,     24.790821,     0.0066822106,  0.008134827,   0.007199015,
+    0.0020832235,  0.0012022241,  0.00094489945, 0.0016134479,  0.0026923497,  0.0025965059,  0.0026324505,
+    0.0027862408,  0.0044582803,  0.0072217113,  0.011514382,   0.018128315,   0.028261589,   0.046918221,
+    0.082333378,   0.14901558,    0.27633411,    0.50772202,    0.92716414,    1.6879661,     3.0736275,
+    5.6181879,     6.2314086,     6.7986908,     7.5747204,     8.7350492,     10.684034,     14.627648,
+    29.82436,      0.0090455888,  0.010263085,   0.013543337,   0.0045462833,  0.0020809236,  0.0014953471,
+    0.0015893861,  0.0025965059,  0.0041777501,  0.0041634608,  0.0043391753,  0.0047207596,  0.0063369367,
+    0.010153828,   0.016076043,   0.025209887,   0.039242301,   0.060757544,   0.10547609,    0.19888687,
+    0.37119669,    0.68787342,    1.2696241,     2.341697,      3.7752187,     4.1388817,     4.5891786,
+    5.1908016,     6.0711932,     7.5249195,     10.431505,     24.992622,     0.01227949,    0.013195192,
+    0.021284834,   0.0089888899,  0.0035657326,  0.0025369313,  0.001972053,   0.0026324505,  0.0041634608,
+    0.0065367497,  0.0067546363,  0.0072296225,  0.0079858499,  0.0092515061,  0.014682275,   0.023096533,
+    0.036081705,   0.058406167,   0.098097458,   0.16462554,    0.27846655,    0.52257407,    0.97611964,
+    1.8208135,     2.4625993,     2.7453349,     3.092191,      3.5495558,     4.2095933,     5.28616,
+    7.4360328,     19.87001,      0.016718803,   0.017206132,   0.026407257,   0.016560849,   0.0071555926,
+    0.0041583185,  0.0032800706,  0.0027862408,  0.0043391753,  0.0067546363,  0.010453798,   0.011045729,
+    0.012032345,   0.013475897,   0.015478805,   0.021740414,   0.034256596,   0.057665356,   0.096885458,
+    0.16270079,    0.27350101,    0.46097651,    0.78047782,    1.4245517,     1.6085129,     1.8220749,
+    2.0835114,     2.4260564,     2.9162443,     3.7090354,     5.9184146,     15.942664,     0.022833707,
+    0.02268807,    0.033435442,   0.028978385,   0.013477785,   0.0066373236,  0.0053152614,  0.0044582803,
+    0.0047207596,  0.0072296225,  0.011045729,   0.016822061,   0.018118076,   0.020047,      0.022740088,
+    0.026395155,   0.034802306,   0.058469579,   0.098111406,   0.16463387,    0.27664605,    0.46623507,
+    0.78947681,    1.0510172,     1.0914104,     1.2122254,     1.4062309,     1.6599592,     2.0214381,
+    2.6028881,     4.7606173,     12.92082,      0.031287532,   0.030195586,   0.043025061,   0.048805397,
+    0.024227513,   0.012458848,   0.0085345553,  0.0072217113,  0.0063369367,  0.0079858499,  0.012032345,
+    0.018118076,   0.027247805,   0.029844834,   0.033492252,   0.039949544,   0.050345849,   0.064840168,
+    0.10185393,    0.17055818,    0.28615069,    0.48168921,    0.81125957,    0.84477854,    0.88429779,
+    0.93431234,    1.0019039,     1.1386966,     1.4039634,     1.8783089,     3.873533,      10.586617,
+    0.04329041,    0.04078763,    0.056576762,   0.080738395,   0.042423192,   0.022893829,   0.01348386,
+    0.011514382,   0.010153828,   0.0092515061,  0.013475897,   0.020047,      0.029844834,   0.044444092,
+    0.050864838,   0.061888985,   0.077053823,   0.098043829,   0.1273638,     0.18087117,    0.30264586,
+    0.50835156,    0.64871246,    0.68077815,    0.71799982,    0.7641288,     0.82514644,    0.9114911,
+    1.0436512,     1.5371119,     3.1908491,     8.7763805,     0.060539998,   0.055855636,   0.075616755,
+    0.10568261,    0.072187662,   0.040666584,   0.023616049,   0.018128315,   0.016076043,   0.014682275,
+    0.015478805,   0.022740088,   0.033492252,   0.050864838,   0.080078036,   0.096400164,   0.11871461,
+    0.14939259,    0.19194049,    0.25160855,    0.33637759,    0.49364296,    0.52088779,    0.55056387,
+    0.58474308,    0.62656069,    0.68107182,    0.75715458,    0.87228853,    1.4918332,     2.8751035,
+    7.3745074,     0.085005395,   0.077019662,   0.10210453,    0.13947651,    0.12014475,    0.070325673,
+    0.042234696,   0.028261589,   0.025209887,   0.023096533,   0.021740414,   0.026395155,   0.039949544,
+    0.061888985,   0.096400164,   0.15090649,    0.18401642,    0.22924937,    0.29156548,    0.37835249,
+    0.50078136,    0.44352117,    0.4203603,     0.44723365,    0.47808158,    0.51553309,    0.5638693,
+    0.63065255,    0.8661899,     1.4882317,     2.8708451,     7.3706393,     0.11988349,    0.10691789,
+    0.13917492,    0.18635184,    0.1965843,     0.11907424,    0.073732503,   0.046918221,   0.039242301,
+    0.036081705,   0.034256596,   0.034802306,   0.050345849,   0.077053823,   0.11871461,    0.18401642,
+    0.28689298,    0.35415682,    0.44624463,    0.57366085,    0.5500719,     0.47384694,    0.41330901,
+    0.3655999,     0.39274001,    0.42600521,    0.46865317,    0.54125428,    0.87413955,    1.5026597,
+    2.9003561,     7.4510942,     0.1698903,     0.14942388,    0.19140878,    0.25181603,    0.31753355,
+    0.19832209,    0.12627064,    0.082333378,   0.060757544,   0.058406167,   0.057665356,   0.058469579,
+    0.064840168,   0.098043829,   0.14939259,    0.22924937,    0.35415682,    0.55071211,    0.68799752,
+    0.68459702,    0.58427244,    0.50413674,    0.44030654,    0.38988301,    0.3507928,     0.35384047,
+    0.39136419,    0.55328703,    0.89359522,    1.5363207,     2.9660227,     7.6221566,     0.2420485,
+    0.21028057,    0.26556596,    0.34395835,    0.45572233,    0.32618815,    0.21663,       0.14901558,
+    0.10547609,    0.098097458,   0.096885458,   0.098111406,   0.10185393,    0.1273638,     0.19194049,
+    0.29156548,    0.44624463,    0.68799752,    0.85277349,    0.72264946,    0.61826247,    0.53456134,
+    0.4676773,     0.41470417,    0.37355721,    0.34292278,    0.36920604,    0.57340759,    0.92576504,
+    1.5912914,     3.0718582,     7.8941569,     0.35366896,    0.30461702,    0.38259834,    0.49451202,
+    0.65627056,    0.57480901,    0.39399582,    0.27633411,    0.19888687,    0.16462554,    0.16270079,
+    0.16463387,    0.17055818,    0.18087117,    0.25160855,    0.37835249,    0.57366085,    0.68459702,
+    0.72264946,    0.76046145,    0.65243596,    0.5654645,     0.49572998,    0.44034767,    0.39724395,
+    0.37715927,    0.48040938,    0.63373578,    0.97253412,    1.6707735,     3.2239802,     8.2826815,
+    0.55906385,    0.4767313,     0.59180522,    0.75495046,    0.98778462,    1.0168946,     0.71103722,
+    0.50772202,    0.37119669,    0.27846655,    0.27350101,    0.27664605,    0.28615069,    0.30264586,
+    0.33637759,    0.50078136,    0.5500719,     0.58427244,    0.61826247,    0.65243596,    0.68743873,
+    0.59740764,    0.52496356,    0.46726638,    0.42227256,    0.50650573,    0.6423589,     0.84369779,
+    1.1618383,     1.7794881,     3.4313586,     8.8104601,     0.89069045,    0.75268865,    0.92456293,
+    1.1655655,     1.5055732,     1.7934734,     1.2770947,     0.92716414,    0.68787342,    0.52257407,
+    0.46097651,    0.46623507,    0.48168921,    0.50835156,    0.49364296,    0.44352117,    0.47384694,
+    0.50413674,    0.53456134,    0.5654645,     0.59740764,    0.63123727,    0.55613047,    0.49614108,
+    0.5583728,     0.68588966,    0.86631417,    1.1332039,     1.5541564,     2.2876582,     3.8105242,
+    9.5103588,     1.4320773,     1.2003123,     1.4603651,     1.8213812,     2.3253837,     3.0344427,
+    2.2896698,     1.6879661,     1.2696241,     0.97611964,    0.78047782,    0.78947681,    0.81125957,
+    0.64871246,    0.52088779,    0.4203603,     0.41330901,    0.44030654,    0.4676773,     0.49572998,
+    0.52496356,    0.55613047,    0.59033686,    0.64081579,    0.76616907,    0.93760991,    1.1797429,
+    1.5372728,     2.1002181,     3.0795774,     5.1100478,     12.872087,     2.3277552,     1.9365072,
+    2.3356354,     2.8846927,     3.6438873,     4.701344,      4.1105385,     3.0736275,     2.341697,
+    1.8208135,     1.4245517,     1.0510172,     0.84477854,    0.68077815,    0.55056387,    0.44723365,
+    0.3655999,     0.38988301,    0.41470417,    0.44034767,    0.46726638,    0.49614108,    0.64081579,
+    0.89180779,    1.0626506,     1.2959058,     1.6247751,     2.1095555,     2.8716202,     4.1953917,
+    7.3946252,     20.583164,     3.8341234,     3.1679394,     3.7905874,     4.6400166,     5.8042407,
+    7.4109964,     7.4153934,     5.6181879,     3.7752187,     2.4625993,     1.6085129,     1.0914104,
+    0.88429779,    0.71799982,    0.58474308,    0.47808158,    0.39274001,    0.3507928,     0.37355721,
+    0.39724395,    0.42227256,    0.5583728,     0.76616907,    1.0626506,     1.493126,      1.8149942,
+    2.2680588,     2.9348414,     3.9813967,     5.9057994,     12.139041,     33.552834,     6.4208732,
+    5.2719989,     6.1649399,     7.0804925,     8.1243677,     9.3071136,     9.3500586,     6.2314086,
+    4.1388817,     2.7453349,     1.8220749,     1.2122254,     0.93431234,    0.7641288,     0.62656069,
+    0.51553309,    0.42600521,    0.35384047,    0.34292278,    0.37715927,    0.50650573,    0.68588966,
+    0.93760991,    1.2959058,     1.8149942,     2.5841644,     3.2192664,     4.152535,      5.615222,
+    9.9793873,     20.37047,      55.921146,     6.4711189,     5.2261767,     5.971221,      6.8208117,
+    7.784637,      8.871829,      10.035181,     6.7986908,     4.5891786,     3.092191,      2.0835114,
+    1.4062309,     1.0019039,     0.82514644,    0.68107182,    0.5638693,     0.46865317,    0.39136419,
+    0.36920604,    0.48040938,    0.6423589,     0.86631417,    1.1797429,     1.6247751,     2.2680588,
+    3.2192664,     4.6684008,     6.0039721,     9.6931858,     17.31189,      35.101326,     95.721848,
+    6.5222421,     5.2413945,     5.9585981,     6.7723327,     7.6912303,     8.7234831,     9.8765917,
+    7.5747204,     5.1908016,     3.5495558,     2.4260564,     1.6599592,     1.1386966,     0.9114911,
+    0.75715458,    0.63065255,    0.54125428,    0.55328703,    0.57340759,    0.63373578,    0.84369779,
+    1.1332039,     1.5372728,     2.1095555,     2.9348414,     4.152535,      6.0039721,     10.622843,
+    17.502508,     31.055145,     62.558273,     169.49916,     6.8532467,     5.4821115,     6.2033305,
+    7.0178375,     7.9336996,     8.9586277,     10.099777,     8.7350492,     6.0711932,     4.2095933,
+    2.9162443,     2.0214381,     1.4039634,     1.0436512,     0.87228853,    0.8661899,     0.87413955,
+    0.89359522,    0.92576504,    0.97253412,    1.1618383,     1.5541564,     2.1002181,     2.8716202,
+    3.9813967,     5.615222,      9.6931858,     17.502508,     33.078506,     58.32177,      116.74635,
+    314.34311,     7.6873393,     6.1230659,     6.8987508,     7.771028,      8.7479982,     9.8374596,
+    11.046735,     10.684034,     7.5249195,     5.28616,       3.7090354,     2.6028881,     1.8783089,
+    1.5371119,     1.4918332,     1.4882317,     1.5026597,     1.5363207,     1.5912914,     1.6707735,
+    1.7794881,     2.2876582,     3.0795774,     4.1953917,     5.9057994,     9.9793873,     17.31189,
+    31.055145,     58.32177,      117.38472,     233.54657,     625.01978,     9.7086449,     7.7022696,
+    8.6431923,     9.6971111,     10.873236,     12.180492,     13.627371,     14.627648,     10.431505,
+    7.4360328,     5.9184146,     4.7606173,     3.873533,      3.1908491,     2.8751035,     2.8708451,
+    2.9003561,     2.9660227,     3.0718582,     3.2239802,     3.4313586,     3.8105242,     5.1100478,
+    7.3946252,     12.139041,     20.37047,      35.101326,     62.558273,     116.74635,     233.54657,
+    528.05969,     1404.894,      16.36174,      12.932133,     14.457601,     16.160028,     18.053457,
+    20.815918,     24.790821,     29.82436,      24.992622,     19.87001,      15.942664,     12.92082,
+    10.586617,     8.7763805,     7.3745074,     7.3706393,     7.4510942,     7.6221566,     7.8941569,
+    8.2826815,     8.8104601,     9.5103588,     12.872087,     20.583164,     33.552834,     55.921146,
+    95.721848,     169.49916,     314.34311,     625.01978,     1404.894,      4229.9087};
+
+const float inv_dequant0_matrix8x8[3][64] = {
+    3436.9705, 1844.7115, 1476.2125, 1346.8075, 1294.8971, 1219.3058, 1336.3313, 2710.8547, 1844.7115, 1194.9315,
+    1049.9403, 975.21069, 927.14331, 873.85547, 985.37274, 1992.2549, 1476.2125, 1049.9403, 969.95953, 940.32703,
+    888.44812, 860.51233, 1072.2433, 2168.3904, 1346.8075, 975.21069, 940.32703, 879.86719, 833.52606, 832.72064,
+    1202.8158, 2435.8899, 1294.8971, 927.14331, 888.44812, 833.52606, 794.44464, 952.04614, 1430.6332, 2310.3616,
+    1219.3058, 873.85547, 860.51233, 832.72064, 952.04614, 1227.5648, 1841.1595, 1258.6263, 1336.3313, 985.37274,
+    1072.2433, 1202.8158, 1430.6332, 1841.1595, 1102.3274, 693.00897, 2710.8547, 1992.2549, 2168.3904, 2435.8899,
+    2310.3616, 1258.6263, 693.00897, 467.98444, 527.77002, 492.65561, 550.70117, 432.71796, 360.75452, 346.31924,
+    383.70496, 689.83191, 492.65561, 298.52472, 337.552,   245.20557, 212.67296, 212.82637, 246.32417, 450.54834,
+    550.70117, 337.552,   259.35614, 256.15363, 222.99277, 206.25125, 237.36348, 425.44247, 432.71796, 245.20557,
+    256.15363, 199.77573, 211.62845, 227.93318, 283.04922, 490.9874,  360.75452, 212.67296, 222.99277, 211.62845,
+    185.37491, 227.84924, 329.37961, 594.96875, 346.31924, 212.82637, 206.25125, 227.93318, 227.84924, 251.22809,
+    366.73212, 655.12872, 383.70496, 246.32417, 237.36348, 283.04922, 329.37961, 366.73212, 437.50745, 782.39978,
+    689.83191, 450.54834, 425.44247, 490.9874,  594.96875, 655.12872, 782.39978, 1256.8921, 270.96231, 168.16577,
+    68.006966, 141.36403, 296.82614, 222.0249,  202.5833,  710.83905, 168.16577, 34.3498,   16.236092, 33.422955,
+    74.581551, 69.45665,  80.322029, 308.22791, 68.006966, 16.236092, 37.456863, 47.443451, 50.487465, 40.416729,
+    55.474945, 205.44962, 141.36403, 33.422955, 47.443451, 80.821915, 49.162575, 33.970627, 74.286789, 250.84735,
+    296.82614, 74.581551, 50.487465, 49.162575, 52.69698,  61.84082,  122.95097, 273.76575, 222.0249,  69.45665,
+    40.416729, 33.970627, 61.84082,  141.81615, 268.52505, 170.3924,  202.5833,  80.322029, 55.474945, 74.286789,
+    122.95097, 268.52505, 203.73096, 111.4241,  710.83905, 308.22791, 205.44962, 250.84735, 273.76575, 170.3924,
+    111.4241,  87.56189};
+
+const float inv_dequant0_matrix16x16[3][256] = {
+    11501.999,  10368.255,   7641.9771,   5576.3042,   4026.3054,    3653.2012,    3535.0991,    3373.2771,
+    3167.1096,  2742.6836,   2188.0012,   1691.1674,   1250.6367,    578.61859,    81.089149,    8.4954767,
+    10368.255,  15107.03,    11150.453,   9052.5645,   6468.2017,    5887.9912,    5564.8647,    5222.3135,
+    4842.7441,  4126.4912,   3269.1787,   2512.6277,   1849.4532,    805.38654,    112.94549,    11.840084,
+    7641.9771,  11150.453,   9729.4424,   6953.7197,   5813.3975,    5977.7075,    5979.1963,    5717.8613,
+    5230.3096,  4323.9507,   3403.0554,   2601.5161,   1906.396,     698.57043,    98.825317,    10.439095,
+    5576.3042,  9052.5645,   6953.7197,   6141.5532,   5685.8975,    5346.7959,    5410.8452,    5339.0493,
+    5107.5195,  4288.7998,   3482.6494,   2650.0784,   1934.4652,    534.9198,     76.925491,    8.2431412,
+    4026.3054,  6468.2017,   5813.3975,   5685.8975,   5939.3047,    5509.127,     5109.938,     4860.0098,
+    4458.0278,  3779.7075,   3107.3369,   2462.4895,   1859.722,     360.86292,    53.406803,    5.8415651,
+    3653.2012,  5887.9912,   5977.7075,   5346.7959,   5509.127,     5615.1494,    5187.9702,    4701.6045,
+    3876.2273,  3276.0898,   2714.1345,   2165.5901,   1101.6542,    198.20827,    31.068926,    3.590765,
+    3535.0991,  5564.8647,   5979.1963,   5410.8452,   5109.938,     5187.9702,    5178.3481,    4347.7568,
+    3594.9678,  2918.3101,   2331.8926,   1872.3861,   518.45917,    97.569183,    15.917813,    1.906585,
+    3373.2771,  5222.3135,   5717.8613,   5339.0493,   4860.0098,    4701.6045,    4347.7568,    3922.6702,
+    3256.0374,  2651.0408,   2107.5405,   1027.4274,   219.53149,    43.332844,    7.3797588,    0.91884983,
+    3167.1096,  4842.7441,   5230.3096,   5107.5195,   4458.0278,    3876.2273,    3594.9678,    3256.0374,
+    2876.6914,  2350.5154,   1669.6628,   399.24158,   87.962257,    17.546885,    3.111315,     0.41261625,
+    2742.6836,  4126.4912,   4323.9507,   4288.7998,   3779.7075,    3276.0898,    2918.3101,    2651.0408,
+    2350.5154,  2029.33,     556.8006,    140.82146,   32.669235,    6.8328743,    1.2281532,    0.24052225,
+    2188.0012,  3269.1787,   3403.0554,   3482.6494,   3107.3369,    2714.1345,    2331.8926,    2107.5405,
+    1669.6628,  556.8006,    169.53693,   45.292107,   11.058399,    2.4253998,    0.54388332,   0.13828588,
+    1691.1674,  2512.6277,   2601.5161,   2650.0784,   2462.4895,    2165.5901,    1872.3861,    1027.4274,
+    399.24158,  140.82146,   45.292107,   13.295495,   3.4122827,    0.8200677,    0.28942502,   0.075148501,
+    1250.6367,  1849.4532,   1906.396,    1934.4652,   1859.722,     1101.6542,    518.45917,    219.53149,
+    87.962257,  32.669235,   11.058399,   3.4122827,   0.95445555,   0.39649919,   0.14308882,   0.037939593,
+    578.61859,  805.38654,   698.57043,   534.9198,    360.86292,    198.20827,    97.569183,    43.332844,
+    17.546885,  6.8328743,   2.4253998,   0.8200677,   0.39649919,   0.17451052,   0.06436763,   0.017424144,
+    81.089149,  112.94549,   98.825317,   76.925491,   53.406803,    31.068926,    15.917813,    7.3797588,
+    3.111315,   1.2281532,   0.54388332,  0.28942502,  0.14308882,   0.06436763,   0.025046432,  0.0069190548,
+    8.4954767,  11.840084,   10.439095,   8.2431412,   5.8415651,    3.590765,     1.906585,     0.91884983,
+    0.41261625, 0.24052225,  0.13828588,  0.075148501, 0.037939593,  0.017424144,  0.0069190548, 0.0020089978,
+    2525.2463,  2889.5769,   2448.946,    2054.7781,   1705.965,     1334.7472,    1015.6563,    762.1087,
+    562.66223,  435.28247,   349.84714,   272.42859,   202.97031,    138.50891,    79.389687,    34.017712,
+    2889.5769,  4040.8655,   3651.2961,   2971.114,    2442.6589,    1891.9027,    1437.5348,    1077.6138,
+    795.04034,  619.11987,   497.04764,   386.70529,   287.89609,    195.78772,    112.18296,    48.05555,
+    2448.946,   3651.2961,   3170.6489,   2899.0952,   2384.0686,    1825.5522,    1383.5625,    1037.3011,
+    767.29541,  611.52161,   490.77493,   381.7207,    284.1236,     191.57233,    109.81701,    47.060715,
+    2054.7781,  2971.114,    2899.0952,   2438.3906,   2078.4248,    1698.991,     1296.4403,    974.94934,
+    726.59454,  594.83191,   476.06509,   370.42432,   275.80826,    183.46349,    105.29673,    45.171799,
+    1705.965,   2442.6589,   2384.0686,   2078.4248,   1678.5922,    1406.3754,    1140.9033,    885.44263,
+    699.23431,  572.6062,    458.19373,   355.5137,    264.08118,    172.00293,    98.863861,    42.484276,
+    1334.7472,  1891.9027,   1825.5522,   1698.991,    1406.3754,    1129.6786,    931.25238,    756.44586,
+    654.76392,  540.37109,   432.85818,   336.13327,   244.91531,    158.96724,    91.382034,    39.258305,
+    1015.6563,  1437.5348,   1383.5625,   1296.4403,   1140.9033,    931.25238,    741.59857,    660.7334,
+    575.08978,  488.09857,   400.88797,   311.76273,   220.59109,    143.60771,    82.769249,    35.640659,
+    762.1087,   1077.6138,   1037.3011,   974.94934,   885.44263,    756.44586,    660.7334,     567.71997,
+    496.48697,  423.3364,    350.61719,   273.89575,   193.89845,    126.67858,    73.240982,    31.625689,
+    562.66223,  795.04034,   767.29541,   726.59454,   699.23431,    654.76392,    575.08978,    496.48697,
+    420.74704,  360.22037,   297.81375,   225.37112,   162.28207,    108.68169,    63.209522,    27.185785,
+    435.28247,  619.11987,   611.52161,   594.83191,   572.6062,     540.37109,    488.09857,    423.3364,
+    360.22037,  299.95483,   237.33531,   180.70923,   130.87787,    88.126045,    52.347446,    20.319235,
+    349.84714,  497.04764,   490.77493,   476.06509,   458.19373,    432.85818,    400.88797,    350.61719,
+    297.81375,  237.33531,   183.6788,    140.66177,   102.4373,     69.338005,    39.104111,    14.231859,
+    272.42859,  386.70529,   381.7207,    370.42432,   355.5137,     336.13327,    311.76273,    273.89575,
+    225.37112,  180.70923,   140.66177,   105.58356,   77.293259,    51.83585,     25.757425,    9.4778728,
+    202.97031,  287.89609,   284.1236,    275.80826,   264.08118,    244.91531,    220.59109,    193.89845,
+    162.28207,  130.87787,   102.4373,    77.293259,   55.557743,    31.874472,    16.019382,    5.9585524,
+    138.50891,  195.78772,   191.57233,   183.46349,   172.00293,    158.96724,    143.60771,    126.67858,
+    108.68169,  88.126045,   69.338005,   51.83585,    31.874472,    18.105633,    9.1997948,    3.4581161,
+    79.389687,  112.18296,   109.81701,   105.29673,   98.863861,    91.382034,    82.769249,    73.240982,
+    63.209522,  52.347446,   39.104111,   25.757425,   16.019382,    9.1997948,    4.6316514,    1.7588025,
+    34.017712,  48.05555,    47.060715,   45.171799,   42.484276,    39.258305,    35.640659,    31.625689,
+    27.185785,  20.319235,   14.231859,   9.4778728,   5.9585524,    3.4581161,    1.7588025,    0.66216683,
+    1446.5029,  924.62518,   572.92578,   351.4566,    213.33621,    212.72006,    246.48369,    281.63739,
+    316.63077,  251.52148,   143.16299,   78.950287,   41.656433,    17.327106,    4.0522857,    0.70848191,
+    924.62518,  1486.4128,   770.14148,   453.16818,   277.55359,    300.41132,    347.95648,    397.45508,
+    446.72147,  342.48343,   195.58694,   108.15432,   57.19471,     23.270824,    5.4606013,    0.95748037,
+    572.92578,  770.14148,   748.9671,    426.07556,   295.41718,    325.44443,    361.09332,    403.77899,
+    452.23843,  311.41495,   179.25958,   99.774551,   53.052128,    20.188051,    4.7801628,    0.8447665,
+    351.4566,   453.16818,   426.07556,   369.88556,   366.96259,    380.01599,    411.38367,    446.6752,
+    465.06082,  272.61554,   156.15523,   87.772934,   47.059536,    16.029459,    3.8500388,    0.68891639,
+    213.33621,  277.55359,   295.41718,   366.96259,   461.51831,    464.63062,    478.58163,    503.44534,
+    399.84344,  236.99382,   136.74025,   76.280289,   40.646385,    11.710953,    2.8594441,    0.52021164,
+    212.72006,  300.41132,   325.44443,   380.01599,   464.63062,    562.96234,    569.17358,    540.617,
+    327.05588,  195.95148,   114.42867,   64.488777,   29.247095,    8.2048635,    2.023452,     0.37066093,
+    246.48369,  347.95648,   361.09332,   411.38367,   478.58163,    569.17358,    669.84436,    423.19632,
+    260.71509,  156.51753,   91.491295,   52.194027,   18.443186,    5.3108273,    1.3403723,    0.25064033,
+    281.63739,  397.45508,   403.77899,   446.6752,    503.44534,    540.617,      423.19632,    314.78876,
+    197.81201,  120.77065,   71.456383,   33.551113,   10.751633,    3.1861141,    0.825037,     0.15787415,
+    316.63077,  446.72147,   452.23843,   465.06082,   399.84344,    327.05588,    260.71509,    197.81201,
+    143.21358,  89.001381,   50.928936,   17.906925,   5.8927317,    1.7789971,    0.47286242,   0.09345492,
+    251.52148,  342.48343,   311.41495,   272.61554,   236.99382,    195.95148,    156.51753,    120.77065,
+    89.001381,  62.675388,   24.320698,   8.8727207,   3.0197287,    0.94003648,   0.25461531,   0.058018267,
+    143.16299,  195.58694,   179.25958,   156.15523,   136.74025,    114.42867,    91.491295,    71.456383,
+    50.928936,  24.320698,   10.802683,   4.0875583,   1.4389318,    0.46211067,   0.13661577,   0.034396227,
+    78.950287,  108.15432,   99.774551,   87.772934,   76.280289,    64.488777,    52.194027,    33.551113,
+    17.906925,  8.8727207,   4.0875583,   1.7478259,   0.6361227,    0.21388066,   0.074568629,  0.019158321,
+    41.656433,  57.19471,    53.052128,   47.059536,   40.646385,    29.247095,    18.443186,    10.751633,
+    5.8927317,  3.0197287,   1.4389318,   0.6361227,   0.25886643,   0.1060019,    0.037771821,  0.0099044079,
+    17.327106,  23.270824,   20.188051,   16.029459,   11.710953,    8.2048635,    5.3108273,    3.1861141,
+    1.7789971,  0.94003648,  0.46211067,  0.21388066,  0.1060019,    0.047762275,  0.017389525,  0.0046533952,
+    4.0522857,  5.4606013,   4.7801628,   3.8500388,   2.8594441,    2.023452,     1.3403723,    0.825037,
+    0.47286242, 0.25461531,  0.13661577,  0.074568629, 0.037771821,  0.017389525,  0.0069175116, 0.0018885656,
+    0.70848191, 0.95748037,  0.8447665,   0.68891639,  0.52021164,   0.37066093,   0.25064033,   0.15787415,
+    0.09345492, 0.058018267, 0.034396227, 0.019158321, 0.0099044079, 0.0046533952, 0.0018885656, 0.00055991771};
+
+const float inv_dequant0_matrix32x32[3][1024] = {
+    27759.77,    21059.225,   17197.404,   14009.621,    11384.654,    9228.3701,    7461.3525,     7150.5508,
+    7268.2134,   7366.9727,   7445.0757,   7500.7153,    7532.0405,    7672.0742,    7940.2881,     8184.7671,
+    8399.7754,   8578.9199,   8715.1123,   8204.1768,    5901.9595,    4212.6611,    2979.4939,     2084.4783,
+    1439.1108,   977.21906,   498.25595,   243.23961,    113.90276,    49.963619,    19.465729,     5.6832814,
+    21059.225,   41802.43,    33001.137,   22017.973,    14028.052,    11027.208,    9177.4805,     9127.9932,
+    9401.3721,   9626.54,     9807.5059,   9945.9082,    10041.953,    10290.417,    10690.814,     11056.282,
+    11379.305,   11651.347,   11862.804,   11092.277,    7997.4121,    5719.8379,    4052.873,      2840.1489,
+    1963.8131,   1335.384,    678.09991,   331.49792,    155.43265,    68.262848,    26.624926,     7.7816625,
+    17197.404,   33001.137,   31322.645,   16107.518,    20768.982,    16866.672,    11477.315,     10119.021,
+    9224.4365,   8860.9131,   9103.8574,   9295.8721,    9439.124,     9760.751,     10178.821,     10560.983,
+    10900.225,   11188.44,    11416.414,   10419.651,    7535.3262,    5404.1553,    3838.7322,     2696.1909,
+    1868.1443,   1262.802,    637.091,     312.14111,    146.65782,    64.532372,    25.214819,     7.3818192,
+    14009.621,   22017.973,   16107.518,   23356.148,    12579.962,    11930.277,    14327.699,     15589.824,
+    13492.236,   12047.982,   10995.165,   10188.766,    9542.8945,    9282.8301,    9699.457,      10094.318,
+    10446.271,   10747.525,   10989.166,   9622.0049,    6984.8052,    5026.4624,    3581.5305,     2522.6624,
+    1752.4392,   1154.8401,   584.52124,   287.24786,    135.33934,    59.706989,    23.386017,     6.8619599,
+    11384.654,   14028.052,   20768.982,   12579.962,    17330.215,    10664.609,    9358.6357,     12095.17,
+    14840.516,   16818.152,   14877.189,   13421.403,    12340.292,    11826.55,     11420.888,     11086.156,
+    10795.019,   10526.329,   10583.537,   8740.2393,    6372.8252,    4604.4819,    3292.8477,     2327.0752,
+    1621.5214,   1030.8453,   523.88489,   258.42047,    122.18353,    54.078941,    21.246124,     6.2518306,
+    9228.3701,   11027.208,   16866.672,   11930.277,    10664.609,    15460.8,      11664.922,     9283.8359,
+    10725.442,   12959.929,   15160.631,   17272.707,    16045.197,    15082.702,    14317.262,     13686.755,
+    13146.663,   12663.691,   12211.836,   8850.0156,    6169.0576,    4279.8501,    2984.6077,     2117.2358,
+    1480.446,    898.57104,   458.86658,   227.36241,    107.94701,    47.963463,    18.911961,     5.931848,
+    7461.3525,   9177.4805,   11477.315,   14327.699,    9358.6357,    11664.922,    15815.642,     12446.541,
+    10172.596,   9795.2471,   11648.097,   13485.994,    15774.219,    18103.338,    17922.318,     16890.668,
+    16015.786,   15247.486,   13680.967,   9519.6162,    6605.5288,    4563.5151,    3133.4592,     2133.8232,
+    1437.2615,   793.76337,   393.58154,   195.71045,    93.363907,    41.669231,    16.498865,     5.8748393,
+    7150.5508,   9127.9932,   10119.021,   15589.824,    12095.17,     9283.8359,    12446.541,     16093.665,
+    13053.992,   10887.75,    9296.4766,   10870.665,    12847.146,    14894.084,    16975.637,     19054.441,
+    19474.178,   18337.799,   14550.531,   10084.281,    6971.3931,    4799.7104,    3285.1311,     2230.4968,
+    1498.2601,   769.28986,   381.51404,   183.99359,    85.196907,    36.985542,    14.271405,     5.8310022,
+    7268.2134,   9401.3721,   9224.4365,   13492.236,    14840.516,    10725.442,    10172.596,     13053.992,
+    16287.741,   13510.883,   11567.537,   10280.332,    10603.812,    12384.636,    14221.5,       16080.72,
+    17926.018,   19718.295,   15213.538,   10512.919,    7247.4067,    4976.5244,    3397.6372,     2301.4634,
+    1439.8313,   729.37701,   362.04837,   174.75305,    80.981865,    35.181698,    13.584824,     6.4417892,
+    7366.9727,   9626.54,     8860.9131,   12047.982,    16818.152,    12959.929,    9795.2471,     10887.75,
+    13510.883,   16515.686,   14318.475,   12633.561,    11316.671,    10416.793,    12026.117,     13674.461,
+    15329.456,   14617.337,   11877.263,   9498.1074,    7417.9546,    5084.3042,    3465.0359,     2343.084,
+    1333.0708,   676.49011,   336.34363,   162.59087,    75.451576,    32.822098,    12.744491,     7.2561679,
+    7445.0757,   9807.5059,   9103.8574,   10995.165,    14877.189,    15160.631,    11648.097,     9296.4766,
+    11567.537,   14318.475,   17420.896,   15300.858,    13626.496,    12281.412,    11181.207,     11722.818,
+    12854.423,   10747.37,    8832.1777,   7139.3735,    5678.6509,    4444.5615,    3421.6125,     2334.5205,
+    1206.4956,   613.80725,   305.8902,    148.18874,    68.906136,    30.030893,    14.446028,     8.1871176,
+    7500.7153,   9945.9082,   9295.8721,   10188.766,    13421.403,    17272.707,    13485.994,     10870.665,
+    10280.332,   12633.561,   15300.858,   18262.137,    16201.403,    14532.124,    13158.543,     12009.08,
+    9286.1982,   7848.771,    6518.2598,   5322.4702,    4274.624,     3376.6462,    2622.3745,     1767.2256,
+    1026.2209,   544.88373,   272.35867,   132.31184,    61.682884,    26.947779,    16.392521,     9.2476816,
+    7532.0405,   10041.953,   9439.124,    9542.8945,    12340.292,    16045.197,    15774.219,     12847.146,
+    10603.812,   11316.671,   13626.496,   16201.403,    19018.902,    17003.117,    15333.513,     12113.333,
+    8512.9658,   5974.5293,   4778.8994,   3939.0884,    3192.512,     2544.0391,    1987.8954,     1194.4711,
+    701.91656,   402.39334,   224.12312,   115.73875,    54.126377,    24.912899,    18.610136,     10.451086,
+    7672.0742,   10290.417,   9760.751,    9282.8301,    11826.55,     15082.702,    18103.338,     14894.084,
+    12384.636,   10416.793,   12281.412,   14532.124,    17003.117,    19669.309,    15923.036,     11255.816,
+    7943.751,    5594.6123,   3929.5447,   2896.1738,    2367.2974,    1902.0885,    1304.9626,     793.82458,
+    471.99438,   273.63303,   154.04323,   83.633995,    43.267982,    28.411644,    21.124651,     11.810225,
+    7940.2881,   10690.814,   10178.821,   9699.457,     11420.888,    14317.262,    17922.318,     16975.637,
+    14221.5,     12026.117,   11181.207,   13158.543,    15333.513,    15923.036,    14433.291,     10262.04,
+    7278.2539,   5147.5425,   3628.5488,   2547.605,     1779.9957,    1340.0419,    843.30066,     519.19763,
+    312.29068,   183.06111,   104.15273,   57.123077,    29.840292,    27.097876,    21.966597,     13.321898,
+    8184.7671,   11056.282,   10560.983,   10094.318,    11086.156,    13686.755,    16890.668,     19054.441,
+    16080.72,    13674.461,   11722.818,   12009.08,     12113.333,    11255.816,    10262.04,      9185.8691,
+    6550.9277,   4655.3447,   3295.269,    2322.0076,    1627.5142,    917.76697,    536.91687,     334.45496,
+    203.45375,   120.56556,   69.316536,   38.400562,    27.800972,    25.429491,    20.622055,     12.511635,
+    8399.7754,   11379.305,   10900.225,   10446.271,    10795.019,    13146.663,    16015.786,     19474.178,
+    17926.018,   15329.456,   12854.423,   9286.1982,    8512.9658,    7943.751,     7278.2539,     6550.9277,
+    5795.0566,   4139.7749,   2943.8821,   2082.8845,    1252.6182,    702.10406,    388.85199,     212.49629,
+    130.59125,   78.21463,    45.431793,   26.701956,    26.124189,    23.897495,    19.382244,     11.761528,
+    8578.9199,   11651.347,   11188.44,    10747.525,    10526.329,    12663.691,    15247.486,     18337.799,
+    19718.295,   14617.337,   10747.37,    7848.771,     5974.5293,    5594.6123,    5147.5425,     4655.3447,
+    4139.7749,   3620.7822,   2587.6548,   1624.6555,    929.41766,    525.53235,    293.45462,     161.59938,
+    87.580582,   49.999958,   29.33618,    25.129185,    24.578579,    22.479414,    18.230083,     11.061857,
+    8715.1123,   11862.804,   11416.414,   10989.166,    10583.537,    12211.836,    13680.967,     14550.531,
+    15213.538,   11877.263,   8832.1777,   6518.2598,    4778.8994,    3929.5447,    3628.5488,     3295.269,
+    2943.8821,   2587.6548,   1999.1306,   1168.3282,    674.8075,     385.01572,    216.81653,     120.34917,
+    65.71418,    35.191235,   23.185534,   23.670441,    23.139233,    21.153818,    17.149212,     10.403255,
+    8204.1768,   11092.277,   10419.651,   9622.0049,    8740.2393,    8850.0156,    9519.6162,     10084.281,
+    10512.919,   9498.1074,   7139.3735,   5322.4702,    3939.0884,    2896.1738,    2547.605,      2322.0076,
+    2082.8845,   1624.6555,   1168.3282,   822.27863,    479.57297,    276.1431,     156.85684,     87.78054,
+    48.301502,   27.678949,   25.619358,   23.148615,    21.782259,    19.900043,    16.123768,     9.7766094,
+    5901.9595,   7997.4121,   7535.3262,   6984.8052,    6372.8252,    6169.0576,    6605.5288,     6971.3931,
+    7247.4067,   7417.9546,   5678.6509,   4274.624,     3192.512,     2367.2974,    1779.9957,     1627.5142,
+    1252.6182,   929.41766,   674.8075,    479.57297,    333.64413,    193.90337,    111.11308,     62.700363,
+    34.773911,   30.949808,   28.546915,   25.704071,    22.303091,    18.697893,    15.138191,     9.1729412,
+    4212.6611,   5719.8379,   5404.1553,   5026.4624,    4604.4819,    4279.8501,    4563.5151,     4799.7104,
+    4976.5244,   5084.3042,   4444.5615,   3376.6462,    2544.0391,    1902.0885,    1340.0419,     917.76697,
+    702.10406,   525.53235,   385.01572,   276.1431,     193.90337,    133.27422,    77.049034,     43.845497,
+    36.757248,   34.346748,   31.57686,    28.339281,    24.509333,    19.942665,    14.471446,     8.5832787,
+    2979.4939,   4052.873,    3838.7322,   3581.5305,    3292.8477,    2984.6077,    3133.4592,     3285.1311,
+    3397.6372,   3465.0359,   3421.6125,   2622.3745,    1987.8954,    1304.9626,    843.30066,     536.91687,
+    388.85199,   293.45462,   216.81653,   156.85684,    111.11308,    77.049034,    52.270931,     43.039375,
+    40.54631,    37.773903,   34.622028,   30.976885,    26.707983,    21.664829,    15.673048,     10.26242,
+    2084.4783,   2840.1489,   2696.1909,   2522.6624,    2327.0752,    2117.2358,    2133.8232,     2230.4968,
+    2301.4634,   2343.084,    2334.5205,   1767.2256,    1194.4711,    793.82458,    519.19763,     334.45496,
+    212.49629,   161.59938,   120.34917,   87.78054,     62.700363,    43.845497,    43.039375,     47.077499,
+    44.23122,    41.092598,   37.557102,   33.506355,    28.805136,    23.298027,    18.485712,     14.080012,
+    1439.1108,   1963.8131,   1868.1443,   1752.4392,    1621.5214,    1480.446,     1437.2615,     1498.2601,
+    1439.8313,   1333.0708,   1206.4956,   1026.2209,    701.91656,    471.99438,    312.29068,     203.45375,
+    130.59125,   87.580582,   65.71418,    48.301502,    34.773911,    36.757248,    40.54631,      44.23122,
+    47.600193,   44.109062,   40.207745,   35.774635,    30.671337,    25.435469,    25.433935,     19.216208,
+    977.21906,   1335.384,    1262.802,    1154.8401,    1030.8453,    898.57104,    793.76337,     769.28986,
+    729.37701,   676.49011,   613.80725,   544.88373,    402.39334,    273.63303,    183.06111,     120.56556,
+    78.21463,    49.999958,   35.191235,   27.678949,    30.949808,    34.346748,    37.773903,     41.092598,
+    44.109062,   46.559208,   42.337101,   37.574516,    32.132,       34.955532,    34.672985,     25.989962,
+    498.25595,   678.09991,   637.091,     584.52124,    523.88489,    458.86658,    393.58154,     381.51404,
+    362.04837,   336.34363,   305.8902,    272.35867,    224.12312,    154.04323,    104.15273,     69.316536,
+    45.431793,   29.33618,    23.185534,   25.619358,    28.546915,    31.57686,     34.622028,     37.557102,
+    40.207745,   42.337101,   43.629288,   38.630398,    43.10384,     47.337585,    46.587337,     34.651222,
+    243.23961,   331.49792,   312.14111,   287.24786,    258.42047,    227.36241,    195.71045,     183.99359,
+    174.75305,   162.59087,   148.18874,   132.31184,    115.73875,    83.633995,    57.123077,     38.400562,
+    26.701956,   25.129185,   23.670441,   23.148615,    25.704071,    28.339281,    30.976885,     33.506355,
+    35.774635,   37.574516,   38.630398,   49.903534,    57.507961,    62.669315,    61.205349,     45.180832,
+    113.90276,   155.43265,   146.65782,   135.33934,    122.18353,    107.94701,    93.363907,     85.196907,
+    80.981865,   75.451576,   68.906136,   61.682884,    54.126377,    43.267982,    29.840292,     27.800972,
+    26.124189,   24.578579,   23.139233,   21.782259,    22.303091,    24.509333,    26.707983,     28.805136,
+    30.671337,   32.132,      43.10384,    57.507961,    74.019264,    80.057747,    77.606369,     56.866417,
+    49.963619,   68.262848,   64.532372,   59.706989,    54.078941,    47.963463,    41.669231,     36.985542,
+    35.181698,   32.822098,   30.030893,   26.947779,    24.912899,    28.411644,    27.097876,     25.429491,
+    23.897495,   22.479414,   21.153818,   19.900043,    18.697893,    19.942665,    21.664829,     23.298027,
+    25.435469,   34.955532,   47.337585,   62.669315,    80.057747,    96.33802,     92.712547,     67.44825,
+    19.465729,   26.624926,   25.214819,   23.386017,    21.246124,    18.911961,    16.498865,     14.271405,
+    13.584824,   12.744491,   14.446028,   16.392521,    18.610136,    21.124651,    21.966597,     20.622055,
+    19.382244,   18.230083,   17.149212,   16.123768,    15.138191,    14.471446,    15.673048,     18.485712,
+    25.433935,   34.672985,   46.587337,   61.205349,    77.606369,    92.712547,    98.911064,     71.454735,
+    5.6832814,   7.7816625,   7.3818192,   6.8619599,    6.2518306,    5.931848,     5.8748393,     5.8310022,
+    6.4417892,   7.2561679,   8.1871176,   9.2476816,    10.451086,    11.810225,    13.321898,     12.511635,
+    11.761528,   11.061857,   10.403255,   9.7766094,    9.1729412,    8.5832787,    10.26242,      14.080012,
+    19.216208,   25.989962,   34.651222,   45.180832,    56.866417,    67.44825,     71.454735,     57.03159,
+    5647.4438,   6753.8403,   5959.4565,   5245.7246,    4606.1094,    4034.3606,    3524.541,      3157.363,
+    2848.7861,   2563.1165,   2299.2993,   2056.2517,    1832.8772,    1640.2583,    1474.5443,     1320.2314,
+    1176.8837,   1044.0483,   921.2619,    809.23602,    709.88043,    617.86603,    532.87958,     454.60342,
+    382.71756,   316.90189,   247.11736,   186.80002,    135.44676,    91.998367,    55.499458,     25.090487,
+    6753.8403,   9488.665,    9931.2803,   8553.8994,    7292.7812,    6249.7036,    5372.2231,     4770.6211,
+    4266.6396,   3812.8267,   3402.0034,   3029.106,     2690.2749,    2401.6631,    2153.3167,     1923.5682,
+    1711.2981,   1515.4852,   1335.1792,   1171.4097,    1026.3,       892.26086,    768.74207,     655.20587,
+    551.12677,   455.99094,   355.04221,   268.21002,    194.36061,    131.94092,    79.554382,     35.948029,
+    5959.4565,   9931.2803,   7932.855,    7885.7764,    7431.9438,    6555.0298,    5627.7007,     5011.5894,
+    4468.5908,   3979.2915,   3532.7783,   3132.7627,    2772.9871,    2472.9675,    2211.6484,     1971.3832,
+    1750.5248,   1547.6572,   1361.5294,   1193.6245,    1044.517,     907.12451,    780.78949,     664.8869,
+    558.81952,   461.48712,   358.64676,   270.78668,    196.12987,    133.08028,    80.206787,     36.228275,
+    5245.7246,   8553.8994,   7885.7764,   6599.938,     6392.4233,    5990.2383,    5585.4102,     5132.8647,
+    4559.7266,   4052.7339,   3600.2258,   3194.0774,    2828.3188,    2530.5908,    2258.1785,     2008.9121,
+    1780.8026,   1572.0654,   1381.1697,   1210.509,     1058.1282,    918.03662,    789.47559,     671.73779,
+    564.15955,   463.70047,   360.19818,   271.84094,    196.81523,    133.49622,    80.430031,     36.317585,
+    4606.1094,   7292.7812,   7431.9438,   6392.4233,    5463.9927,    5234.7217,    5027.3745,     4747.835,
+    4432.0508,   4077.1138,   3616.7708,   3204.7395,    2840.1484,    2544.2102,    2272.9863,     2024.2074,
+    1795.993,    1586.7396,   1393.4408,   1221.5485,    1066.7343,    924.68793,    794.56097,     675.57465,
+    567.00665,   463.13876,   359.63824,   271.33292,    196.3905,     133.17256,    80.215202,     36.212284,
+    4034.3606,   6249.7036,   6555.0298,   5990.2383,    5234.7217,    4639.8286,    4495.0903,     4289.3535,
+    4036.0081,   3757.8186,   3468.5669,   3177.3008,    2836.9285,    2539.3843,    2267.095,      2017.6912,
+    1789.1992,   1579.9347,   1388.4299,   1220.6163,    1067.0275,    925.80829,    795.85162,     676.24988,
+    567.24963,   459.76926,   356.94595,   269.24948,    194.8479,     132.10498,    79.560242,     36.477112,
+    3524.541,    5372.2231,   5627.7007,   5585.4102,    5027.3745,    4495.0903,    4010.2107,     3850.562,
+    3651.6968,   3424.1008,   3178.1006,   2926.1726,    2711.8442,    2496.7043,    2245.2983,     1997.7461,
+    1771.0409,   1563.5052,   1375.428,    1209.7025,    1057.1802,    917.02057,    788.44995,     670.74811,
+    563.23944,   452.99265,   352.11801,   265.60474,    192.19832,    130.30128,    78.470016,     37.137085,
+    3157.363,    4770.6211,   5011.5894,   5132.8647,    4747.835,     4289.3535,    3850.562,      3447.8291,
+    3286.3145,   3099.2502,   2895.1919,   2699.8142,    2511.5901,    2320.4753,    2129.8306,     1942.3022,
+    1741.6445,   1537.5632,   1355.778,    1192.2618,    1041.8038,    903.57141,    776.79602,     660.76172,
+    554.79736,   441.61682,   343.37253,   259.37482,    187.94739,    127.58199,    76.923965,     37.807724,
+    2848.7861,   4266.6396,   4468.5908,   4559.7266,    4432.0508,    4036.0081,    3651.6968,     3286.3145,
+    2948.2344,   2792.2668,   2631.2812,   2478.1697,    2314.8501,    2145.7014,    1975.4551,     1806.6643,
+    1641.3643,   1481.1416,   1328.6294,   1168.4478,    1021.0288,    885.57111,    761.33032,     647.61041,
+    538.33154,   427.79807,   332.76541,   251.4594,     182.27744,    123.77497,    74.652321,     38.330761,
+    2563.1165,   3812.8267,   3979.2915,   4052.7339,    4077.1138,    3757.8186,    3424.1008,     3099.2502,
+    2792.2668,   2514.804,    2395.2642,   2263.0088,    2121.7727,    1974.774,     1823.2893,     1672.0176,
+    1522.9102,   1381.8047,   1248.6274,   1119.8175,    995.08313,    863.20764,    742.2077,      631.42041,
+    517.84808,   411.75937,   320.4624,    242.28418,    175.70909,    119.36732,    72.104607,     38.818394,
+    2299.2993,   3402.0034,   3532.7783,   3600.2258,    3616.7708,    3468.5669,    3178.1006,     2895.1919,
+    2631.2812,   2395.2642,   2173.4993,   2059.0305,    1935.8486,    1806.7939,    1674.4062,     1539.9724,
+    1406.8092,   1283.0233,   1161.7151,   1043.8623,    930.24261,    821.45776,    717.9574,      611.66425,
+    494.85779,   393.75824,   306.65472,   231.98755,    168.33853,    114.42181,    73.055611,     39.285583,
+    2056.2517,   3029.106,    3132.7627,   3194.0774,    3204.7395,    3177.3008,    2926.1726,     2699.8142,
+    2478.1697,   2263.0088,   2059.0305,   1866.9041,    1759.4436,    1646.1713,    1529.2731,     1410.6986,
+    1297.5624,   1185.7885,   1075.7563,   968.41003,    864.51721,    764.68787,    669.39166,     568.6261,
+    467.31378,   374.0784,    291.55334,   220.72223,    160.27208,    109.0079,     73.953133,     39.721516,
+    1832.8772,   2690.2749,   2772.9871,   2828.3188,    2840.1484,    2836.9285,    2711.8442,     2511.5901,
+    2314.8501,   2121.7727,   1935.8486,   1759.4436,    1593.079,     1493.7609,    1390.7449,     1289.4801,
+    1190.1628,   1090.6735,   991.50104,   894.14368,    799.57288,    708.38879,    620.86053,     518.82355,
+    427.24158,   345.59891,   273.30957,   208.65121,    151.62337,    104.52164,    74.773422,     40.113838,
+    1640.2583,   2401.6631,   2472.9675,   2530.5908,    2544.2102,    2539.3843,    2496.7043,     2320.4753,
+    2145.7014,   1974.774,    1806.7939,   1646.1713,    1493.7609,    1349.9647,    1262.1243,     1175.2073,
+    1086.6716,   997.62921,   909.06207,   821.62915,    735.92542,    653.02197,    561.79041,     470.45264,
+    388.18765,   314.60828,   249.25372,   191.61386,    141.14845,    105.65822,    75.489624,     40.448593,
+    1474.5443,   2153.3167,   2211.6484,   2258.1785,    2272.9863,    2267.095,     2245.2983,     2129.8306,
+    1975.4551,   1823.2893,   1674.4062,   1529.2731,    1390.7449,    1262.1243,    1143.4061,     1066.4246,
+    987.73401,   908.31421,   829.04413,   750.69647,    673.93848,    594.4339,     505.18051,     423.92969,
+    350.49826,   284.60471,   225.89371,   173.95778,    128.35512,    104.15652,    75.211189,     40.704014,
+    1320.2314,   1923.5682,   1971.3832,   2008.9121,    2024.2074,    2017.6912,    1997.7461,     1942.3022,
+    1806.6643,   1672.0176,   1539.9724,   1410.6986,    1289.4801,    1175.2073,    1066.4246,     963.35919,
+    893.63898,   823.0542,    752.38312,   682.31958,    613.47125,    529.8266,     451.40695,     379.5824,
+    314.45175,   255.8183,    203.41415,   156.91895,    125.86903,    102.24911,    73.784668,     39.906857,
+    1176.8837,   1711.2981,   1750.5248,   1780.8026,    1795.993,     1789.1992,    1771.0409,     1741.6445,
+    1641.3643,   1522.9102,   1406.8092,   1297.5624,    1190.1628,    1086.6716,    987.73401,     893.63898,
+    804.55994,   742.06683,   679.323,     616.94482,    542.94952,    468.94223,    400.48373,     337.65472,
+    280.26181,   228.42978,   181.96297,   142.4147,     123.38181,    100.15266,    72.220078,     39.034161,
+    1044.0483,   1515.4852,   1547.6572,   1572.0654,    1586.7396,    1579.9347,    1563.5052,     1537.5632,
+    1481.1416,   1381.8047,   1283.0233,   1185.7885,    1090.6735,    997.62921,    908.31421,     823.0542,
+    742.06683,   665.48138,   610.02527,   544.83606,    476.37396,    412.29703,    352.82245,     298.05792,
+    248.01785,   202.57388,   161.65331,   139.34525,    120.61987,    97.831955,    70.492912,     38.073158,
+    921.2619,    1335.1792,   1361.5294,   1381.1697,    1393.4408,    1388.4299,    1375.428,      1355.778,
+    1328.6294,   1248.6274,   1161.7151,   1075.7563,    991.50104,    909.06207,    829.04413,     752.38312,
+    679.323,     610.02527,   535.70306,   473.75674,    415.06741,    359.95514,    308.63498,     261.22885,
+    217.77783,   178.25468,   150.78267,   135.90138,    117.53384,    95.248413,    68.576332,     37.00983,
+    809.23602,   1171.4097,   1193.6245,   1210.509,     1221.5485,    1220.6163,    1209.7025,     1192.2618,
+    1168.4478,   1119.8175,   1043.8623,   968.41003,    894.14368,    821.62915,    750.69647,     682.31958,
+    616.94482,   544.83606,   473.75674,   408.93054,    358.96671,    311.90079,    267.9375,      227.20329,
+    189.75572,   158.04335,   146.51324,   131.99625,    114.06984,    92.359825,    66.441002,     35.828876,
+    709.88043,   1026.3,      1044.517,    1058.1282,    1066.7343,    1067.0275,    1057.1802,     1041.8038,
+    1021.0288,   995.08313,   930.24261,   864.51721,    799.57288,    735.92542,    673.93848,     613.47125,
+    542.94952,   476.37396,   415.06741,   358.96671,    307.93683,    268.05362,    230.68958,     195.96881,
+    163.95755,   152.98413,   141.67432,   127.50971,    110.12691,    89.120285,    64.054985,     34.513657,
+    617.86603,   892.26086,   907.12451,   918.03662,    924.68793,    925.80829,    917.02057,     903.57141,
+    885.57111,   863.20764,   821.45776,   764.68787,    708.38879,    653.02197,    594.4339,      529.8266,
+    468.94223,   412.29703,   359.95514,   311.90079,    268.05362,    228.28214,    196.80522,     167.47345,
+    155.85141,   147.21855,   136.18883,   122.44758,    105.65214,    85.430168,    61.377911,     33.046085,
+    532.87958,   768.74207,   780.78949,   789.47559,    794.56097,    795.85162,    788.44995,     776.79602,
+    761.33032,   742.2077,    717.9574,    669.39166,    620.86053,    561.79041,    505.18051,     451.40695,
+    400.48373,   352.82245,   308.63498,   267.9375,     230.68958,    196.80522,    166.16272,     155.48438,
+    149.05833,   140.64102,   129.96239,   116.72793,    100.61709,    81.281746,    58.344582,     29.269342,
+    454.60342,   655.20587,   664.8869,    671.73779,    675.57465,    676.24988,    670.74811,     660.76172,
+    647.61041,   631.42041,   611.66425,   568.6261,     518.82355,    470.45264,    423.92969,     379.5824,
+    337.65472,   298.05792,   261.22885,   227.20329,    195.96881,    167.47345,    155.48438,     147.53543,
+    141.2679,    133.13718,   122.89288,   110.26207,    94.947845,    76.627945,    53.001026,     25.055674,
+    382.71756,   551.12677,   558.81952,   564.15955,    567.00665,    567.24963,    563.23944,     554.79736,
+    538.33154,   517.84808,   494.85779,   467.31378,    427.24158,    388.18765,    350.49826,     314.45175,
+    280.26181,   248.01785,   217.77783,   189.75572,    163.95755,    155.85141,    149.05833,     141.2679,
+    132.34445,   124.5834,    114.86992,   102.95419,    88.564606,    70.65815,     44.67482,      21.143274,
+    316.90189,   455.99094,   461.48712,   463.70047,    463.13876,    459.76926,    452.99265,     441.61682,
+    427.79807,   411.75937,   393.75824,   374.0784,     345.59891,    314.60828,    284.60471,     255.8183,
+    228.42978,   202.57388,   178.25468,   158.04335,    152.98413,    147.21855,    140.64102,     133.13718,
+    124.5834,    114.84621,   105.77442,   94.701004,    81.38131,     58.455536,    37.000786,     17.530621,
+    247.11736,   355.04221,   358.64676,   360.19818,    359.63824,    356.94595,    352.11801,     343.37253,
+    332.76541,   320.4624,    306.65472,   291.55334,    273.30957,    249.25372,    225.89371,     203.41415,
+    181.96297,   161.65331,   150.78267,   146.51324,    141.67432,    136.18883,    129.96239,     122.89288,
+    114.86992,   105.77442,   95.478004,   85.391182,    66.203133,    47.291599,    29.966896,     14.213268,
+    186.80002,   268.21002,   270.78668,   271.84094,    271.33292,    269.24948,    265.60474,     259.37482,
+    251.4594,    242.28418,   231.98755,   220.72223,    208.65121,    191.61386,    173.95778,     156.91895,
+    142.4147,    139.34525,   135.90138,   131.99625,    127.50971,    122.44758,    116.72793,     110.26207,
+    102.95419,   94.701004,   85.391182,   67.933937,    51.929531,    37.135086,    23.556034,     11.184304,
+    135.44676,   194.36061,   196.12987,   196.81523,    196.3905,     194.8479,     192.19832,     187.94739,
+    182.27744,   175.70909,   168.33853,   160.27208,    151.62337,    141.14845,    128.35512,     125.86903,
+    123.38181,   120.61987,   117.53384,   114.06984,    110.12691,    105.65214,    100.61709,     94.947845,
+    88.564606,   81.38131,    66.203133,   51.929531,    39.04237,     27.948416,    17.746897,     8.4347544,
+    91.998367,   131.94092,   133.08028,   133.49622,    133.17256,    132.10498,    130.30128,     127.58199,
+    123.77497,   119.36732,   114.42181,   109.0079,     104.52164,    105.65822,    104.15652,     102.24911,
+    100.15266,   97.831955,   95.248413,   92.359825,    89.120285,    85.430168,    81.281746,     76.627945,
+    70.65815,    58.455536,   47.291599,   37.135086,    27.948416,    19.689066,    12.514842,     5.9539757,
+    55.499458,   79.554382,   80.206787,   80.430031,    80.215202,    79.560242,    78.470016,     76.923965,
+    74.652321,   72.104607,   73.055611,   73.953133,    74.773422,    75.489624,    75.211189,     73.784668,
+    72.220078,   70.492912,   68.576332,   66.441002,    64.054985,    61.377911,    58.344582,     53.001026,
+    44.67482,    37.000786,   29.966896,   23.556034,    17.746897,    12.514842,    7.8326411,     3.7300112,
+    25.090487,   35.948029,   36.228275,   36.317585,    36.212284,    36.477112,    37.137085,     37.807724,
+    38.330761,   38.818394,   39.285583,   39.721516,    40.113838,    40.448593,    40.704014,     39.906857,
+    39.034161,   38.073158,   37.00983,    35.828876,    34.513657,    33.046085,    29.269342,     25.055674,
+    21.143274,   17.530621,   14.213268,   11.184304,    8.4347544,    5.9539757,    3.7300112,     1.7499064,
+    7452.5459,   1653.0973,   1098.1042,   727.66534,    481.00522,    317.16086,    208.59155,     149.65108,
+    110.55112,   81.436607,   59.812893,   43.794903,    31.961613,    23.099804,    16.518005,     11.763959,
+    8.3414326,   5.8861513,   4.1314034,   2.827503,     1.7887044,    1.1227245,    0.69828635,    0.42959845,
+    0.2608158,   0.15574206,  0.15453278,  0.1533215,    0.14591624,   0.13008402,   0.10300098,    0.061118193,
+    1653.0973,   8380.5039,   5775.375,    1079.0876,    328.9371,     204.79303,    154.29584,     122.92825,
+    97.436584,   75.785179,   58.118813,   44.076027,    33.117424,    24.517237,    17.903296,     12.983698,
+    9.352972,    6.6923704,   4.7555513,   3.2828107,    2.0976176,    1.3285706,    0.83311653,    0.51639366,
+    0.31566259,  0.18968138,  0.19134447,  0.19078892,   0.18241146,   0.16331688,   0.12983187,    0.077326767,
+    1098.1042,   5775.375,    4689.2881,   6173.9531,    2290.5842,    800.42993,    290.75485,     138.9079,
+    73.837051,   46.981812,   37.868378,   29.908382,    23.242268,    17.6751,      13.224583,     9.7938852,
+    7.1852026,   5.2244205,   3.7655427,   2.6137071,    1.6897452,    1.0815921,    0.68476027,    0.42814901,
+    0.26381135,  0.16220759,  0.16746993,  0.16782472,   0.16120373,   0.14495377,   0.11569799,    0.06916777,
+    727.66534,   1079.0876,   6173.9531,   2611.1367,    3328.4873,    1912.3757,    947.11169,     480.0253,
+    219.9599,    111.24844,   60.383377,   34.50848,     20.489538,    12.385681,    9.4622946,     7.1696663,
+    5.3661933,   3.971153,    2.9073288,   2.0221956,    1.3245902,    0.85795265,   0.54903388,    0.34665737,
+    0.21551648,  0.14123312,  0.14661011,  0.14765961,   0.14249404,   0.12868311,   0.10312349,    0.06188108,
+    481.00522,   328.9371,    2290.5842,   3328.4873,    1446.8124,    1663.2595,    1531.1774,     831.79169,
+    480.55585,   280.44727,   139.75082,   74.196167,    41.275387,    23.572012,    13.852783,     8.3232937,
+    5.0868764,   3.1492734,   2.1943185,   1.5237617,    1.0123664,    0.66419888,   0.43003657,    0.27443221,
+    0.17228782,  0.1230865,   0.12845814,  0.1300182,    0.1260446,    0.11431187,   0.091968946,   0.055391051,
+    317.16086,   204.79303,   800.42993,   1912.3757,    1663.2595,    879.99316,    1006.1718,     1058.3137,
+    668.74103,   394.177,     240.48183,   150.66313,    80.264244,    43.679893,    24.590214,     14.219559,
+    8.3981218,   5.0423026,   3.0657153,   1.7397083,    0.9833861,    0.55757725,   0.32954982,    0.21270514,
+    0.13493462,  0.1074447,   0.11271633,  0.11463311,   0.11162424,   0.10165226,   0.082098484,   0.048040159,
+    208.59155,   154.29584,   290.75485,   947.11169,    1531.1774,    1006.1718,    573.2146,      619.79071,
+    629.17371,   507.08578,   304.87149,   188.1375,     117.17072,    74.162743,    42.344086,     23.677216,
+    13.56254,    7.9194975,   4.6161661,   2.5380981,    1.4063962,    0.78302729,   0.43674421,    0.24327713,
+    0.13485461,  0.1069512,   0.099649422, 0.1012495,    0.099012084,  0.090524487,  0.073381729,   0.04033751,
+    149.65108,   122.92825,   138.9079,    480.0253,     831.79169,    1058.3137,    619.79071,     371.42276,
+    385.13297,   379.87418,   358.90652,   224.30173,    138.47133,    86.847908,    55.162323,     35.383717,
+    21.313681,   12.145742,   6.7107077,   3.6188078,    1.9695817,    1.0785577,    0.59242898,    0.32534847,
+    0.17799333,  0.16047736,  0.14708714,  0.13201807,   0.11448132,   0.093597606,  0.068363689,   0.033529639,
+    110.55112,   97.436584,   73.837051,   219.9599,     480.55585,    668.74103,    629.17371,     385.13297,
+    239.36328,   240.1848,    230.45854,   211.83032,    157.80495,    98.485023,    62.204361,     39.666977,
+    25.482706,   16.458862,   9.4808216,   5.0279841,    2.6939895,    1.4537559,    0.78763467,    0.42704073,
+    0.26488531,  0.24161115,  0.21790393,  0.19264847,   0.16471227,   0.13289179,   0.095863439,   0.040011808,
+    81.436607,   75.785179,   46.981812,   111.24844,    280.44727,    394.177,      507.08578,     379.87418,
+    240.1848,    152.98123,   148.04646,   138.31981,    125.22149,    108.09051,    68.109337,     43.296539,
+    27.714876,   17.121479,   10.193944,   6.0743914,    3.5910954,    1.9136044,    1.0244646,     0.54920506,
+    0.406075,    0.3642543,   0.32339528,  0.28172541,   0.23755264,   0.18917324,   0.13448031,    0.0503271,
+    59.812893,   58.118813,   37.868378,   60.383377,    139.75082,    240.48183,    304.87149,     358.90652,
+    230.45854,   148.04646,   95.659012,   90.53273,     83.109322,    74.206566,    64.604469,     45.997284,
+    29.191458,   17.341434,   10.321466,   6.1462517,    3.6562936,    2.1693079,    1.2812663,     0.70197523,
+    0.62169224,  0.54882485,  0.47995898,  0.41219157,   0.3429068,    0.26961187,   0.16896416,    0.062724777,
+    43.794903,   44.076027,   29.908382,   34.50848,     74.196167,    150.66313,    188.1375,      224.30173,
+    211.83032,   138.31981,   90.53273,    59.445751,    55.193497,    49.882778,    43.975204,     37.885742,
+    28.733728,   17.102911,   10.192494,   6.0740843,    3.6147273,    2.1448407,    1.2666616,     0.95145923,
+    0.91624564,  0.82492906,  0.71112078,  0.60242444,   0.4946973,    0.38418862,   0.2100568,     0.077394471,
+    31.961613,   33.117424,   23.242268,   20.489538,    41.275387,    80.264244,    117.17072,     138.47133,
+    157.80495,   125.22149,   83.109322,   55.193497,    36.700203,    33.506638,    29.857651,     25.031574,
+    19.86261,    15.422539,   9.8179817,   5.863102,     3.494662,     2.0760274,    1.2326511,     1.1837422,
+    1.1308408,   1.0703059,   0.99809974,  0.87819707,   0.71226925,   0.53239381,   0.25816226,    0.094458885,
+    23.099804,   24.517237,   17.6751,     12.385681,    23.572012,    43.679893,    74.162743,     86.847908,
+    98.485023,   108.09051,   74.206566,   49.882778,    33.506638,    22.500179,    19.659946,     16.157965,
+    12.977942,   10.19952,    7.8515244,   5.5287971,    3.3041918,    1.9671426,    1.541515,      1.4689074,
+    1.392758,    1.3086799,   1.2119061,   1.0971035,    0.95817453,   0.65057075,   0.31339622,    0.11394219,
+    16.518005,   17.903296,   13.224583,   9.4622946,    13.852783,    24.590214,    42.344086,     55.162323,
+    62.204361,   68.109337,   64.604469,   43.975204,    29.857651,    19.659946,    12.487819,     10.373426,
+    8.423563,    6.6937723,   5.2099481,   3.9744277,    2.9728496,    2.0257556,    1.9197992,     1.8163197,
+    1.7101527,   1.5960146,   1.468274,    1.3207343,    1.1464096,    0.67031622,   0.34781358,    0.13560228,
+    11.763959,   12.983698,   9.7938852,   7.1696663,    8.3232937,    14.219559,    23.677216,     35.383717,
+    39.666977,   43.296539,   45.997284,   37.885742,    25.031574,    16.157965,    10.373426,     6.6266203,
+    5.4342976,   4.362062,    3.4297614,   2.643038,     1.9968795,    2.2546837,    2.3789117,     2.2359676,
+    2.0916932,   1.9397397,   1.7734606,   1.5856591,    1.1544813,    0.67193842,   0.34832948,    0.13567345,
+    8.3414326,   9.352972,    7.1852026,   5.3661933,    5.0868764,    8.3981218,    13.56254,      21.313681,
+    25.482706,   27.714876,   29.191458,   28.733728,    19.86261,     12.977942,    8.423563,      5.4342976,
+    3.4856203,   2.8236079,   2.2409234,   1.7431903,    1.8179442,    2.1103861,    2.4194973,     2.7352304,
+    2.5462136,   2.3473892,   2.133774,    1.8475604,    1.1439821,    0.66548669,   0.34478524,    0.13420847,
+    5.8861513,   6.6923704,   5.2244205,   3.971153,     3.1492734,    5.0423026,    7.9194975,     12.145742,
+    16.458862,   17.121479,   17.341434,   17.102911,    15.422539,    10.19952,     6.6937723,     4.362062,
+    2.8236079,   1.8158308,   1.4534936,   1.4607134,    1.7115303,    1.9835888,    2.2711449,     2.564872,
+    2.8506856,   2.8261323,   2.5551648,   1.8073802,    1.1190749,    0.65090579,   0.33715183,    0.13119647,
+    4.1314034,   4.7555513,   3.7655427,   2.9073288,    2.1943185,    3.0657153,    4.6161661,     6.7107077,
+    9.4808216,   10.193944,   10.321466,   10.192494,    9.8179817,    7.8515244,    5.2099481,     3.4297614,
+    2.2409234,   1.4534936,   1.1726444,   1.3837968,    1.6174361,    1.8706927,    2.1382265,     2.4113574,
+    2.6769662,   2.9161084,   2.7085147,   1.7439601,    1.0801877,    0.62842041,   0.32553586,    0.12667598,
+    2.827503,    3.2828107,   2.6137071,   2.0221956,    1.5237617,    1.7397083,    2.5380981,     3.6188078,
+    5.0279841,   6.0743914,   6.1462517,   6.0740843,    5.863102,     5.5287971,    3.9744277,     2.643038,
+    1.7431903,   1.4607134,   1.3837968,   1.314991,     1.5327175,    1.7684577,    2.0172272,     2.2709329,
+    2.517345,    2.6513999,   2.081558,    1.5779446,    1.0282415,    0.59852517,   0.3101756,     0.12073385,
+    1.7887044,   2.0976176,   1.6897452,   1.3245902,    1.0123664,    0.9833861,    1.4063962,     1.9695817,
+    2.6939895,   3.5910954,   3.6562936,   3.6147273,    3.494662,     3.3041918,    2.9728496,     1.9968795,
+    1.8179442,   1.7115303,   1.6174361,   1.5327175,    1.4546751,    1.6738989,    1.9048941,     2.1401069,
+    2.3681388,   1.9743114,   1.5567621,   1.1852585,    0.86070496,   0.56195939,   0.29142976,    0.11350145,
+    1.1227245,   1.3285706,   1.0815921,   0.85795265,   0.66419888,   0.55757725,   0.78302729,    1.0785577,
+    1.4537559,   1.9136044,   2.1693079,   2.1448407,    2.0760274,    1.9671426,    2.0257556,     2.2546837,
+    2.1103861,   1.9835888,   1.8706927,   1.7684577,    1.6738989,    1.5841904,    1.7981392,     2.0155559,
+    1.7909182,   1.4579605,   1.1543156,   0.88245374,   0.6434359,    0.43712825,   0.26243109,    0.1051485,
+    0.69828635,  0.83311653,  0.68476027,  0.54903388,   0.43003657,   0.32954982,   0.43674421,    0.59242898,
+    0.78763467,  1.0244646,   1.2812663,   1.2666616,    1.2326511,    1.541515,     1.9197992,     2.3789117,
+    2.4194973,   2.2711449,   2.1382265,   2.0172272,    1.9048941,    1.7981392,    1.6939481,     1.5605109,
+    1.305195,    1.0665417,   0.8476423,   0.65050262,   0.47614104,   0.32471988,   0.19569288,    0.07768748,
+    0.42959845,  0.51639366,  0.42814901,  0.34665737,   0.27443221,   0.21270514,   0.24327713,    0.32534847,
+    0.42704073,  0.54920506,  0.70197523,  0.95145923,   1.1837422,    1.4689074,    1.8163197,     2.2359676,
+    2.7352304,   2.564872,    2.4113574,   2.2709329,    2.1401069,    2.0155559,    1.5605109,     1.1213179,
+    0.94104314,  0.77166098,  0.61546981,  0.4740335,    0.34823546,   0.23835677,   0.13523336,    0.048583396,
+    0.2608158,   0.31566259,  0.26381135,  0.21551648,   0.17228782,   0.13493462,   0.13485461,    0.17799333,
+    0.26488531,  0.406075,    0.62169224,  0.91624564,   1.1308408,    1.392758,     1.7101527,     2.0916932,
+    2.5462136,   2.8506856,   2.6769662,   2.517345,     2.3681388,    1.7909182,    1.305195,      0.94104314,
+    0.66973585,  0.55096596,  0.44090569,  0.34073392,   0.25116813,   0.16932508,   0.082378834,   0.029803742,
+    0.15574206,  0.18968138,  0.16220759,  0.14123312,   0.1230865,    0.1074447,    0.1069512,     0.16047736,
+    0.24161115,  0.3642543,   0.54882485,  0.82492906,   1.0703059,    1.3086799,    1.5960146,     1.9397397,
+    2.3473892,   2.8261323,   2.9161084,   2.6513999,    1.9743114,    1.4579605,    1.0665417,     0.77166098,
+    0.55096596,  0.38697228,  0.31062979,  0.24081676,   0.17808735,   0.10020655,   0.049090669,   0.017882323,
+    0.15453278,  0.19134447,  0.16746993,  0.14661011,   0.12845814,   0.11271633,   0.099649422,   0.14708714,
+    0.21790393,  0.32339528,  0.47995898,  0.71112078,   0.99809974,   1.2119061,    1.468274,      1.7734606,
+    2.133774,    2.5551648,   2.7085147,   2.081558,     1.5567621,    1.1543156,    0.8476423,     0.61546981,
+    0.44090569,  0.31062979,  0.21420611,  0.1665564,    0.10316525,   0.05776377,   0.028488953,   0.010446936,
+    0.1533215,   0.19078892,  0.16782472,  0.14765961,   0.1300182,    0.11463311,   0.1012495,     0.13201807,
+    0.19264847,  0.28172541,  0.41219157,  0.60242444,   0.87819707,   1.0971035,    1.3207343,     1.5856591,
+    1.8475604,   1.8073802,   1.7439601,   1.5779446,    1.1852585,    0.88245374,   0.65050262,    0.4740335,
+    0.34073392,  0.24081676,  0.1665564,   0.09413676,   0.057134669,  0.032200783,  0.015985096,   0.0058997343,
+    0.14591624,  0.18241146,  0.16120373,  0.14249404,   0.1260446,    0.11162424,   0.099012084,   0.11448132,
+    0.16471227,  0.23755264,  0.3429068,   0.4946973,    0.71226925,   0.95817453,   1.1464096,     1.1544813,
+    1.1439821,   1.1190749,   1.0801877,   1.0282415,    0.86070496,   0.6434359,    0.47614104,    0.34823546,
+    0.25116813,  0.17808735,  0.10316525,  0.057134669,  0.030231111,  0.017146256,  0.0085655777,  0.0031812373,
+    0.13008402,  0.16331688,  0.14495377,  0.12868311,   0.11431187,   0.10165226,   0.090524487,   0.093597606,
+    0.13289179,  0.18917324,  0.26961187,  0.38418862,   0.53239381,   0.65057075,   0.67031622,    0.67193842,
+    0.66548669,  0.65090579,  0.62842041,  0.59852517,   0.56195939,   0.43712825,   0.32471988,    0.23835677,
+    0.16932508,  0.10020655,  0.05776377,  0.032200783,  0.017146256,  0.0085189966, 0.0042818012,  0.0015999493,
+    0.10300098,  0.12983187,  0.11569799,  0.10312349,   0.091968946,  0.082098484,  0.073381729,   0.068363689,
+    0.095863439, 0.13448031,  0.16896416,  0.2100568,    0.25816226,   0.31339622,   0.34781358,    0.34832948,
+    0.34478524,  0.33715183,  0.32553586,  0.3101756,    0.29142976,   0.26243109,   0.19569288,    0.13523336,
+    0.082378834, 0.049090669, 0.028488953, 0.015985096,  0.0085655777, 0.0042818012, 0.0018937252,  0.00071179745,
+    0.061118193, 0.077326767, 0.06916777,  0.06188108,   0.055391051,  0.048040159,  0.04033751,    0.033529639,
+    0.040011808, 0.0503271,   0.062724777, 0.077394471,  0.094458885,  0.11394219,   0.13560228,    0.13567345,
+    0.13420847,  0.13119647,  0.12667598,  0.12073385,   0.11350145,   0.1051485,    0.07768748,    0.048583396,
+    0.029803742, 0.017882323, 0.010446936, 0.0058997343, 0.0031812373, 0.0015999493, 0.00071179745, 0.00023641172};
+
+const float inv_dequant_matrix4x4[3][64] = {
+    1309.7115,  2775.4709, 865.56787,  865.56787,  396.73831, 396.73831, 140.12735, 140.12735, 2775.4709, 1754.1111,
+    865.56787,  865.56787, 396.73831,  396.73831,  140.12735, 140.12735, 865.56787, 865.56787, 963.03589, 963.03589,
+    211.96735,  211.96735, 90.725655,  90.725655,  865.56787, 865.56787, 963.03589, 963.03589, 211.96735, 211.96735,
+    90.725655,  90.725655, 396.73831,  396.73831,  211.96735, 211.96735, 296.80579, 296.80579, 74.343353, 74.343353,
+    396.73831,  396.73831, 211.96735,  211.96735,  296.80579, 296.80579, 74.343353, 74.343353, 140.12735, 140.12735,
+    90.725655,  90.725655, 74.343353,  74.343353,  57.308441, 57.308441, 140.12735, 140.12735, 90.725655, 90.725655,
+    74.343353,  74.343353, 57.308441,  57.308441,  223.3136,  806.52777, 318.93411, 318.93411, 149.30334, 149.30334,
+    45.661449,  45.661449, 806.52777,  681.10364,  318.93411, 318.93411, 149.30334, 149.30334, 45.661449, 45.661449,
+    318.93411,  318.93411, 275.21634,  275.21634,  145.66022, 145.66022, 54.467594, 54.467594, 318.93411, 318.93411,
+    275.21634,  275.21634, 145.66022,  145.66022,  54.467594, 54.467594, 149.30334, 149.30334, 145.66022, 145.66022,
+    61.948792,  61.948792, 35.90316,   35.90316,   149.30334, 149.30334, 145.66022, 145.66022, 61.948792, 61.948792,
+    35.90316,   35.90316,  45.661449,  45.661449,  54.467594, 54.467594, 35.90316,  35.90316,  16.643375, 16.643375,
+    45.661449,  45.661449, 54.467594,  54.467594,  35.90316,  35.90316,  16.643375, 16.643375, 131.47845, 139.0233,
+    62.525402,  62.525402, 28.172487,  28.172487,  16.82906,  16.82906,  139.0233,  79.715378, 62.525402, 62.525402,
+    28.172487,  28.172487, 16.82906,   16.82906,   62.525402, 62.525402, 12.412107, 12.412107, 15.369019, 15.369019,
+    7.5774412,  7.5774412, 62.525402,  62.525402,  12.412107, 12.412107, 15.369019, 15.369019, 7.5774412, 7.5774412,
+    28.172487,  28.172487, 15.369019,  15.369019,  15.671031, 15.671031, 1.7665962, 1.7665962, 28.172487, 28.172487,
+    15.369019,  15.369019, 15.671031,  15.671031,  1.7665962, 1.7665962, 16.82906,  16.82906,  7.5774412, 7.5774412,
+    1.7665962,  1.7665962, 0.25403151, 0.25403151, 16.82906,  16.82906,  7.5774412, 7.5774412, 1.7665962, 1.7665962,
+    0.25403151, 0.25403151};
+
+const float inv_dequant_matrix8x8[3][64] = {
+    3436.9705, 1844.7115, 1476.2125, 1346.8075, 1294.8971, 1219.3058, 1336.3313, 2710.8547, 1844.7115, 1194.9315,
+    1049.9403, 975.21069, 927.14331, 873.85547, 985.37274, 1992.2549, 1476.2125, 1049.9403, 969.95953, 940.32703,
+    888.44812, 860.51233, 1072.2433, 2168.3904, 1346.8075, 975.21069, 940.32703, 879.86719, 833.52606, 832.72064,
+    1202.8158, 2435.8899, 1294.8971, 927.14331, 888.44812, 833.52606, 794.44464, 952.04614, 1430.6332, 2310.3616,
+    1219.3058, 873.85547, 860.51233, 832.72064, 952.04614, 1227.5648, 1841.1595, 1258.6263, 1336.3313, 985.37274,
+    1072.2433, 1202.8158, 1430.6332, 1841.1595, 1102.3274, 693.00897, 2710.8547, 1992.2549, 2168.3904, 2435.8899,
+    2310.3616, 1258.6263, 693.00897, 467.98444, 527.77002, 492.65561, 550.70117, 432.71796, 360.75452, 346.31924,
+    383.70496, 689.83191, 492.65561, 298.52472, 337.552,   245.20557, 212.67296, 212.82637, 246.32417, 450.54834,
+    550.70117, 337.552,   259.35614, 256.15363, 222.99277, 206.25125, 237.36348, 425.44247, 432.71796, 245.20557,
+    256.15363, 199.77573, 211.62845, 227.93318, 283.04922, 490.9874,  360.75452, 212.67296, 222.99277, 211.62845,
+    185.37491, 227.84924, 329.37961, 594.96875, 346.31924, 212.82637, 206.25125, 227.93318, 227.84924, 251.22809,
+    366.73212, 655.12872, 383.70496, 246.32417, 237.36348, 283.04922, 329.37961, 366.73212, 437.50745, 782.39978,
+    689.83191, 450.54834, 425.44247, 490.9874,  594.96875, 655.12872, 782.39978, 1256.8921, 270.96231, 168.16577,
+    68.006966, 141.36403, 296.82614, 222.0249,  202.5833,  710.83905, 168.16577, 34.3498,   16.236092, 33.422955,
+    74.581551, 69.45665,  80.322029, 308.22791, 68.006966, 16.236092, 37.456863, 47.443451, 50.487465, 40.416729,
+    55.474945, 205.44962, 141.36403, 33.422955, 47.443451, 80.821915, 49.162575, 33.970627, 74.286789, 250.84735,
+    296.82614, 74.581551, 50.487465, 49.162575, 52.69698,  61.84082,  122.95097, 273.76575, 222.0249,  69.45665,
+    40.416729, 33.970627, 61.84082,  141.81615, 268.52505, 170.3924,  202.5833,  80.322029, 55.474945, 74.286789,
+    122.95097, 268.52505, 203.73096, 111.4241,  710.83905, 308.22791, 205.44962, 250.84735, 273.76575, 170.3924,
+    111.4241,  87.56189};
+
+const float inv_dequant_matrix16x16[3][256] = {
+    11501.999,  10368.255,   7641.9771,   5576.3042,   4026.3054,    3653.2012,    3535.0991,    3373.2771,
+    3167.1096,  2742.6836,   2188.0012,   1691.1674,   1250.6367,    578.61859,    81.089149,    8.4954767,
+    10368.255,  15107.03,    11150.453,   9052.5645,   6468.2017,    5887.9912,    5564.8647,    5222.3135,
+    4842.7441,  4126.4912,   3269.1787,   2512.6277,   1849.4532,    805.38654,    112.94549,    11.840084,
+    7641.9771,  11150.453,   9729.4424,   6953.7197,   5813.3975,    5977.7075,    5979.1963,    5717.8613,
+    5230.3096,  4323.9507,   3403.0554,   2601.5161,   1906.396,     698.57043,    98.825317,    10.439095,
+    5576.3042,  9052.5645,   6953.7197,   6141.5532,   5685.8975,    5346.7959,    5410.8452,    5339.0493,
+    5107.5195,  4288.7998,   3482.6494,   2650.0784,   1934.4652,    534.9198,     76.925491,    8.2431412,
+    4026.3054,  6468.2017,   5813.3975,   5685.8975,   5939.3047,    5509.127,     5109.938,     4860.0098,
+    4458.0278,  3779.7075,   3107.3369,   2462.4895,   1859.722,     360.86292,    53.406803,    5.8415651,
+    3653.2012,  5887.9912,   5977.7075,   5346.7959,   5509.127,     5615.1494,    5187.9702,    4701.6045,
+    3876.2273,  3276.0898,   2714.1345,   2165.5901,   1101.6542,    198.20827,    31.068926,    3.590765,
+    3535.0991,  5564.8647,   5979.1963,   5410.8452,   5109.938,     5187.9702,    5178.3481,    4347.7568,
+    3594.9678,  2918.3101,   2331.8926,   1872.3861,   518.45917,    97.569183,    15.917813,    1.906585,
+    3373.2771,  5222.3135,   5717.8613,   5339.0493,   4860.0098,    4701.6045,    4347.7568,    3922.6702,
+    3256.0374,  2651.0408,   2107.5405,   1027.4274,   219.53149,    43.332844,    7.3797588,    0.91884983,
+    3167.1096,  4842.7441,   5230.3096,   5107.5195,   4458.0278,    3876.2273,    3594.9678,    3256.0374,
+    2876.6914,  2350.5154,   1669.6628,   399.24158,   87.962257,    17.546885,    3.111315,     0.41261625,
+    2742.6836,  4126.4912,   4323.9507,   4288.7998,   3779.7075,    3276.0898,    2918.3101,    2651.0408,
+    2350.5154,  2029.33,     556.8006,    140.82146,   32.669235,    6.8328743,    1.2281532,    0.24052225,
+    2188.0012,  3269.1787,   3403.0554,   3482.6494,   3107.3369,    2714.1345,    2331.8926,    2107.5405,
+    1669.6628,  556.8006,    169.53693,   45.292107,   11.058399,    2.4253998,    0.54388332,   0.13828588,
+    1691.1674,  2512.6277,   2601.5161,   2650.0784,   2462.4895,    2165.5901,    1872.3861,    1027.4274,
+    399.24158,  140.82146,   45.292107,   13.295495,   3.4122827,    0.8200677,    0.28942502,   0.075148501,
+    1250.6367,  1849.4532,   1906.396,    1934.4652,   1859.722,     1101.6542,    518.45917,    219.53149,
+    87.962257,  32.669235,   11.058399,   3.4122827,   0.95445555,   0.39649919,   0.14308882,   0.037939593,
+    578.61859,  805.38654,   698.57043,   534.9198,    360.86292,    198.20827,    97.569183,    43.332844,
+    17.546885,  6.8328743,   2.4253998,   0.8200677,   0.39649919,   0.17451052,   0.06436763,   0.017424144,
+    81.089149,  112.94549,   98.825317,   76.925491,   53.406803,    31.068926,    15.917813,    7.3797588,
+    3.111315,   1.2281532,   0.54388332,  0.28942502,  0.14308882,   0.06436763,   0.025046432,  0.0069190548,
+    8.4954767,  11.840084,   10.439095,   8.2431412,   5.8415651,    3.590765,     1.906585,     0.91884983,
+    0.41261625, 0.24052225,  0.13828588,  0.075148501, 0.037939593,  0.017424144,  0.0069190548, 0.0020089978,
+    2525.2463,  2889.5769,   2448.946,    2054.7781,   1705.965,     1334.7472,    1015.6563,    762.1087,
+    562.66223,  435.28247,   349.84714,   272.42859,   202.97031,    138.50891,    79.389687,    34.017712,
+    2889.5769,  4040.8655,   3651.2961,   2971.114,    2442.6589,    1891.9027,    1437.5348,    1077.6138,
+    795.04034,  619.11987,   497.04764,   386.70529,   287.89609,    195.78772,    112.18296,    48.05555,
+    2448.946,   3651.2961,   3170.6489,   2899.0952,   2384.0686,    1825.5522,    1383.5625,    1037.3011,
+    767.29541,  611.52161,   490.77493,   381.7207,    284.1236,     191.57233,    109.81701,    47.060715,
+    2054.7781,  2971.114,    2899.0952,   2438.3906,   2078.4248,    1698.991,     1296.4403,    974.94934,
+    726.59454,  594.83191,   476.06509,   370.42432,   275.80826,    183.46349,    105.29673,    45.171799,
+    1705.965,   2442.6589,   2384.0686,   2078.4248,   1678.5922,    1406.3754,    1140.9033,    885.44263,
+    699.23431,  572.6062,    458.19373,   355.5137,    264.08118,    172.00293,    98.863861,    42.484276,
+    1334.7472,  1891.9027,   1825.5522,   1698.991,    1406.3754,    1129.6786,    931.25238,    756.44586,
+    654.76392,  540.37109,   432.85818,   336.13327,   244.91531,    158.96724,    91.382034,    39.258305,
+    1015.6563,  1437.5348,   1383.5625,   1296.4403,   1140.9033,    931.25238,    741.59857,    660.7334,
+    575.08978,  488.09857,   400.88797,   311.76273,   220.59109,    143.60771,    82.769249,    35.640659,
+    762.1087,   1077.6138,   1037.3011,   974.94934,   885.44263,    756.44586,    660.7334,     567.71997,
+    496.48697,  423.3364,    350.61719,   273.89575,   193.89845,    126.67858,    73.240982,    31.625689,
+    562.66223,  795.04034,   767.29541,   726.59454,   699.23431,    654.76392,    575.08978,    496.48697,
+    420.74704,  360.22037,   297.81375,   225.37112,   162.28207,    108.68169,    63.209522,    27.185785,
+    435.28247,  619.11987,   611.52161,   594.83191,   572.6062,     540.37109,    488.09857,    423.3364,
+    360.22037,  299.95483,   237.33531,   180.70923,   130.87787,    88.126045,    52.347446,    20.319235,
+    349.84714,  497.04764,   490.77493,   476.06509,   458.19373,    432.85818,    400.88797,    350.61719,
+    297.81375,  237.33531,   183.6788,    140.66177,   102.4373,     69.338005,    39.104111,    14.231859,
+    272.42859,  386.70529,   381.7207,    370.42432,   355.5137,     336.13327,    311.76273,    273.89575,
+    225.37112,  180.70923,   140.66177,   105.58356,   77.293259,    51.83585,     25.757425,    9.4778728,
+    202.97031,  287.89609,   284.1236,    275.80826,   264.08118,    244.91531,    220.59109,    193.89845,
+    162.28207,  130.87787,   102.4373,    77.293259,   55.557743,    31.874472,    16.019382,    5.9585524,
+    138.50891,  195.78772,   191.57233,   183.46349,   172.00293,    158.96724,    143.60771,    126.67858,
+    108.68169,  88.126045,   69.338005,   51.83585,    31.874472,    18.105633,    9.1997948,    3.4581161,
+    79.389687,  112.18296,   109.81701,   105.29673,   98.863861,    91.382034,    82.769249,    73.240982,
+    63.209522,  52.347446,   39.104111,   25.757425,   16.019382,    9.1997948,    4.6316514,    1.7588025,
+    34.017712,  48.05555,    47.060715,   45.171799,   42.484276,    39.258305,    35.640659,    31.625689,
+    27.185785,  20.319235,   14.231859,   9.4778728,   5.9585524,    3.4581161,    1.7588025,    0.66216683,
+    1446.5029,  924.62518,   572.92578,   351.4566,    213.33621,    212.72006,    246.48369,    281.63739,
+    316.63077,  251.52148,   143.16299,   78.950287,   41.656433,    17.327106,    4.0522857,    0.70848191,
+    924.62518,  1486.4128,   770.14148,   453.16818,   277.55359,    300.41132,    347.95648,    397.45508,
+    446.72147,  342.48343,   195.58694,   108.15432,   57.19471,     23.270824,    5.4606013,    0.95748037,
+    572.92578,  770.14148,   748.9671,    426.07556,   295.41718,    325.44443,    361.09332,    403.77899,
+    452.23843,  311.41495,   179.25958,   99.774551,   53.052128,    20.188051,    4.7801628,    0.8447665,
+    351.4566,   453.16818,   426.07556,   369.88556,   366.96259,    380.01599,    411.38367,    446.6752,
+    465.06082,  272.61554,   156.15523,   87.772934,   47.059536,    16.029459,    3.8500388,    0.68891639,
+    213.33621,  277.55359,   295.41718,   366.96259,   461.51831,    464.63062,    478.58163,    503.44534,
+    399.84344,  236.99382,   136.74025,   76.280289,   40.646385,    11.710953,    2.8594441,    0.52021164,
+    212.72006,  300.41132,   325.44443,   380.01599,   464.63062,    562.96234,    569.17358,    540.617,
+    327.05588,  195.95148,   114.42867,   64.488777,   29.247095,    8.2048635,    2.023452,     0.37066093,
+    246.48369,  347.95648,   361.09332,   411.38367,   478.58163,    569.17358,    669.84436,    423.19632,
+    260.71509,  156.51753,   91.491295,   52.194027,   18.443186,    5.3108273,    1.3403723,    0.25064033,
+    281.63739,  397.45508,   403.77899,   446.6752,    503.44534,    540.617,      423.19632,    314.78876,
+    197.81201,  120.77065,   71.456383,   33.551113,   10.751633,    3.1861141,    0.825037,     0.15787415,
+    316.63077,  446.72147,   452.23843,   465.06082,   399.84344,    327.05588,    260.71509,    197.81201,
+    143.21358,  89.001381,   50.928936,   17.906925,   5.8927317,    1.7789971,    0.47286242,   0.09345492,
+    251.52148,  342.48343,   311.41495,   272.61554,   236.99382,    195.95148,    156.51753,    120.77065,
+    89.001381,  62.675388,   24.320698,   8.8727207,   3.0197287,    0.94003648,   0.25461531,   0.058018267,
+    143.16299,  195.58694,   179.25958,   156.15523,   136.74025,    114.42867,    91.491295,    71.456383,
+    50.928936,  24.320698,   10.802683,   4.0875583,   1.4389318,    0.46211067,   0.13661577,   0.034396227,
+    78.950287,  108.15432,   99.774551,   87.772934,   76.280289,    64.488777,    52.194027,    33.551113,
+    17.906925,  8.8727207,   4.0875583,   1.7478259,   0.6361227,    0.21388066,   0.074568629,  0.019158321,
+    41.656433,  57.19471,    53.052128,   47.059536,   40.646385,    29.247095,    18.443186,    10.751633,
+    5.8927317,  3.0197287,   1.4389318,   0.6361227,   0.25886643,   0.1060019,    0.037771821,  0.0099044079,
+    17.327106,  23.270824,   20.188051,   16.029459,   11.710953,    8.2048635,    5.3108273,    3.1861141,
+    1.7789971,  0.94003648,  0.46211067,  0.21388066,  0.1060019,    0.047762275,  0.017389525,  0.0046533952,
+    4.0522857,  5.4606013,   4.7801628,   3.8500388,   2.8594441,    2.023452,     1.3403723,    0.825037,
+    0.47286242, 0.25461531,  0.13661577,  0.074568629, 0.037771821,  0.017389525,  0.0069175116, 0.0018885656,
+    0.70848191, 0.95748037,  0.8447665,   0.68891639,  0.52021164,   0.37066093,   0.25064033,   0.15787415,
+    0.09345492, 0.058018267, 0.034396227, 0.019158321, 0.0099044079, 0.0046533952, 0.0018885656, 0.00055991771};
+
+const float inv_dequant_matrix32x32[3][1024] = {
+    27759.77,    21059.225,   17197.404,   14009.621,    11384.654,    9228.3701,    7461.3525,     7150.5508,
+    7268.2134,   7366.9727,   7445.0757,   7500.7153,    7532.0405,    7672.0742,    7940.2881,     8184.7671,
+    8399.7754,   8578.9199,   8715.1123,   8204.1768,    5901.9595,    4212.6611,    2979.4939,     2084.4783,
+    1439.1108,   977.21906,   498.25595,   243.23961,    113.90276,    49.963619,    19.465729,     5.6832814,
+    21059.225,   41802.43,    33001.137,   22017.973,    14028.052,    11027.208,    9177.4805,     9127.9932,
+    9401.3721,   9626.54,     9807.5059,   9945.9082,    10041.953,    10290.417,    10690.814,     11056.282,
+    11379.305,   11651.347,   11862.804,   11092.277,    7997.4121,    5719.8379,    4052.873,      2840.1489,
+    1963.8131,   1335.384,    678.09991,   331.49792,    155.43265,    68.262848,    26.624926,     7.7816625,
+    17197.404,   33001.137,   31322.645,   16107.518,    20768.982,    16866.672,    11477.315,     10119.021,
+    9224.4365,   8860.9131,   9103.8574,   9295.8721,    9439.124,     9760.751,     10178.821,     10560.983,
+    10900.225,   11188.44,    11416.414,   10419.651,    7535.3262,    5404.1553,    3838.7322,     2696.1909,
+    1868.1443,   1262.802,    637.091,     312.14111,    146.65782,    64.532372,    25.214819,     7.3818192,
+    14009.621,   22017.973,   16107.518,   23356.148,    12579.962,    11930.277,    14327.699,     15589.824,
+    13492.236,   12047.982,   10995.165,   10188.766,    9542.8945,    9282.8301,    9699.457,      10094.318,
+    10446.271,   10747.525,   10989.166,   9622.0049,    6984.8052,    5026.4624,    3581.5305,     2522.6624,
+    1752.4392,   1154.8401,   584.52124,   287.24786,    135.33934,    59.706989,    23.386017,     6.8619599,
+    11384.654,   14028.052,   20768.982,   12579.962,    17330.215,    10664.609,    9358.6357,     12095.17,
+    14840.516,   16818.152,   14877.189,   13421.403,    12340.292,    11826.55,     11420.888,     11086.156,
+    10795.019,   10526.329,   10583.537,   8740.2393,    6372.8252,    4604.4819,    3292.8477,     2327.0752,
+    1621.5214,   1030.8453,   523.88489,   258.42047,    122.18353,    54.078941,    21.246124,     6.2518306,
+    9228.3701,   11027.208,   16866.672,   11930.277,    10664.609,    15460.8,      11664.922,     9283.8359,
+    10725.442,   12959.929,   15160.631,   17272.707,    16045.197,    15082.702,    14317.262,     13686.755,
+    13146.663,   12663.691,   12211.836,   8850.0156,    6169.0576,    4279.8501,    2984.6077,     2117.2358,
+    1480.446,    898.57104,   458.86658,   227.36241,    107.94701,    47.963463,    18.911961,     5.931848,
+    7461.3525,   9177.4805,   11477.315,   14327.699,    9358.6357,    11664.922,    15815.642,     12446.541,
+    10172.596,   9795.2471,   11648.097,   13485.994,    15774.219,    18103.338,    17922.318,     16890.668,
+    16015.786,   15247.486,   13680.967,   9519.6162,    6605.5288,    4563.5151,    3133.4592,     2133.8232,
+    1437.2615,   793.76337,   393.58154,   195.71045,    93.363907,    41.669231,    16.498865,     5.8748393,
+    7150.5508,   9127.9932,   10119.021,   15589.824,    12095.17,     9283.8359,    12446.541,     16093.665,
+    13053.992,   10887.75,    9296.4766,   10870.665,    12847.146,    14894.084,    16975.637,     19054.441,
+    19474.178,   18337.799,   14550.531,   10084.281,    6971.3931,    4799.7104,    3285.1311,     2230.4968,
+    1498.2601,   769.28986,   381.51404,   183.99359,    85.196907,    36.985542,    14.271405,     5.8310022,
+    7268.2134,   9401.3721,   9224.4365,   13492.236,    14840.516,    10725.442,    10172.596,     13053.992,
+    16287.741,   13510.883,   11567.537,   10280.332,    10603.812,    12384.636,    14221.5,       16080.72,
+    17926.018,   19718.295,   15213.538,   10512.919,    7247.4067,    4976.5244,    3397.6372,     2301.4634,
+    1439.8313,   729.37701,   362.04837,   174.75305,    80.981865,    35.181698,    13.584824,     6.4417892,
+    7366.9727,   9626.54,     8860.9131,   12047.982,    16818.152,    12959.929,    9795.2471,     10887.75,
+    13510.883,   16515.686,   14318.475,   12633.561,    11316.671,    10416.793,    12026.117,     13674.461,
+    15329.456,   14617.337,   11877.263,   9498.1074,    7417.9546,    5084.3042,    3465.0359,     2343.084,
+    1333.0708,   676.49011,   336.34363,   162.59087,    75.451576,    32.822098,    12.744491,     7.2561679,
+    7445.0757,   9807.5059,   9103.8574,   10995.165,    14877.189,    15160.631,    11648.097,     9296.4766,
+    11567.537,   14318.475,   17420.896,   15300.858,    13626.496,    12281.412,    11181.207,     11722.818,
+    12854.423,   10747.37,    8832.1777,   7139.3735,    5678.6509,    4444.5615,    3421.6125,     2334.5205,
+    1206.4956,   613.80725,   305.8902,    148.18874,    68.906136,    30.030893,    14.446028,     8.1871176,
+    7500.7153,   9945.9082,   9295.8721,   10188.766,    13421.403,    17272.707,    13485.994,     10870.665,
+    10280.332,   12633.561,   15300.858,   18262.137,    16201.403,    14532.124,    13158.543,     12009.08,
+    9286.1982,   7848.771,    6518.2598,   5322.4702,    4274.624,     3376.6462,    2622.3745,     1767.2256,
+    1026.2209,   544.88373,   272.35867,   132.31184,    61.682884,    26.947779,    16.392521,     9.2476816,
+    7532.0405,   10041.953,   9439.124,    9542.8945,    12340.292,    16045.197,    15774.219,     12847.146,
+    10603.812,   11316.671,   13626.496,   16201.403,    19018.902,    17003.117,    15333.513,     12113.333,
+    8512.9658,   5974.5293,   4778.8994,   3939.0884,    3192.512,     2544.0391,    1987.8954,     1194.4711,
+    701.91656,   402.39334,   224.12312,   115.73875,    54.126377,    24.912899,    18.610136,     10.451086,
+    7672.0742,   10290.417,   9760.751,    9282.8301,    11826.55,     15082.702,    18103.338,     14894.084,
+    12384.636,   10416.793,   12281.412,   14532.124,    17003.117,    19669.309,    15923.036,     11255.816,
+    7943.751,    5594.6123,   3929.5447,   2896.1738,    2367.2974,    1902.0885,    1304.9626,     793.82458,
+    471.99438,   273.63303,   154.04323,   83.633995,    43.267982,    28.411644,    21.124651,     11.810225,
+    7940.2881,   10690.814,   10178.821,   9699.457,     11420.888,    14317.262,    17922.318,     16975.637,
+    14221.5,     12026.117,   11181.207,   13158.543,    15333.513,    15923.036,    14433.291,     10262.04,
+    7278.2539,   5147.5425,   3628.5488,   2547.605,     1779.9957,    1340.0419,    843.30066,     519.19763,
+    312.29068,   183.06111,   104.15273,   57.123077,    29.840292,    27.097876,    21.966597,     13.321898,
+    8184.7671,   11056.282,   10560.983,   10094.318,    11086.156,    13686.755,    16890.668,     19054.441,
+    16080.72,    13674.461,   11722.818,   12009.08,     12113.333,    11255.816,    10262.04,      9185.8691,
+    6550.9277,   4655.3447,   3295.269,    2322.0076,    1627.5142,    917.76697,    536.91687,     334.45496,
+    203.45375,   120.56556,   69.316536,   38.400562,    27.800972,    25.429491,    20.622055,     12.511635,
+    8399.7754,   11379.305,   10900.225,   10446.271,    10795.019,    13146.663,    16015.786,     19474.178,
+    17926.018,   15329.456,   12854.423,   9286.1982,    8512.9658,    7943.751,     7278.2539,     6550.9277,
+    5795.0566,   4139.7749,   2943.8821,   2082.8845,    1252.6182,    702.10406,    388.85199,     212.49629,
+    130.59125,   78.21463,    45.431793,   26.701956,    26.124189,    23.897495,    19.382244,     11.761528,
+    8578.9199,   11651.347,   11188.44,    10747.525,    10526.329,    12663.691,    15247.486,     18337.799,
+    19718.295,   14617.337,   10747.37,    7848.771,     5974.5293,    5594.6123,    5147.5425,     4655.3447,
+    4139.7749,   3620.7822,   2587.6548,   1624.6555,    929.41766,    525.53235,    293.45462,     161.59938,
+    87.580582,   49.999958,   29.33618,    25.129185,    24.578579,    22.479414,    18.230083,     11.061857,
+    8715.1123,   11862.804,   11416.414,   10989.166,    10583.537,    12211.836,    13680.967,     14550.531,
+    15213.538,   11877.263,   8832.1777,   6518.2598,    4778.8994,    3929.5447,    3628.5488,     3295.269,
+    2943.8821,   2587.6548,   1999.1306,   1168.3282,    674.8075,     385.01572,    216.81653,     120.34917,
+    65.71418,    35.191235,   23.185534,   23.670441,    23.139233,    21.153818,    17.149212,     10.403255,
+    8204.1768,   11092.277,   10419.651,   9622.0049,    8740.2393,    8850.0156,    9519.6162,     10084.281,
+    10512.919,   9498.1074,   7139.3735,   5322.4702,    3939.0884,    2896.1738,    2547.605,      2322.0076,
+    2082.8845,   1624.6555,   1168.3282,   822.27863,    479.57297,    276.1431,     156.85684,     87.78054,
+    48.301502,   27.678949,   25.619358,   23.148615,    21.782259,    19.900043,    16.123768,     9.7766094,
+    5901.9595,   7997.4121,   7535.3262,   6984.8052,    6372.8252,    6169.0576,    6605.5288,     6971.3931,
+    7247.4067,   7417.9546,   5678.6509,   4274.624,     3192.512,     2367.2974,    1779.9957,     1627.5142,
+    1252.6182,   929.41766,   674.8075,    479.57297,    333.64413,    193.90337,    111.11308,     62.700363,
+    34.773911,   30.949808,   28.546915,   25.704071,    22.303091,    18.697893,    15.138191,     9.1729412,
+    4212.6611,   5719.8379,   5404.1553,   5026.4624,    4604.4819,    4279.8501,    4563.5151,     4799.7104,
+    4976.5244,   5084.3042,   4444.5615,   3376.6462,    2544.0391,    1902.0885,    1340.0419,     917.76697,
+    702.10406,   525.53235,   385.01572,   276.1431,     193.90337,    133.27422,    77.049034,     43.845497,
+    36.757248,   34.346748,   31.57686,    28.339281,    24.509333,    19.942665,    14.471446,     8.5832787,
+    2979.4939,   4052.873,    3838.7322,   3581.5305,    3292.8477,    2984.6077,    3133.4592,     3285.1311,
+    3397.6372,   3465.0359,   3421.6125,   2622.3745,    1987.8954,    1304.9626,    843.30066,     536.91687,
+    388.85199,   293.45462,   216.81653,   156.85684,    111.11308,    77.049034,    52.270931,     43.039375,
+    40.54631,    37.773903,   34.622028,   30.976885,    26.707983,    21.664829,    15.673048,     10.26242,
+    2084.4783,   2840.1489,   2696.1909,   2522.6624,    2327.0752,    2117.2358,    2133.8232,     2230.4968,
+    2301.4634,   2343.084,    2334.5205,   1767.2256,    1194.4711,    793.82458,    519.19763,     334.45496,
+    212.49629,   161.59938,   120.34917,   87.78054,     62.700363,    43.845497,    43.039375,     47.077499,
+    44.23122,    41.092598,   37.557102,   33.506355,    28.805136,    23.298027,    18.485712,     14.080012,
+    1439.1108,   1963.8131,   1868.1443,   1752.4392,    1621.5214,    1480.446,     1437.2615,     1498.2601,
+    1439.8313,   1333.0708,   1206.4956,   1026.2209,    701.91656,    471.99438,    312.29068,     203.45375,
+    130.59125,   87.580582,   65.71418,    48.301502,    34.773911,    36.757248,    40.54631,      44.23122,
+    47.600193,   44.109062,   40.207745,   35.774635,    30.671337,    25.435469,    25.433935,     19.216208,
+    977.21906,   1335.384,    1262.802,    1154.8401,    1030.8453,    898.57104,    793.76337,     769.28986,
+    729.37701,   676.49011,   613.80725,   544.88373,    402.39334,    273.63303,    183.06111,     120.56556,
+    78.21463,    49.999958,   35.191235,   27.678949,    30.949808,    34.346748,    37.773903,     41.092598,
+    44.109062,   46.559208,   42.337101,   37.574516,    32.132,       34.955532,    34.672985,     25.989962,
+    498.25595,   678.09991,   637.091,     584.52124,    523.88489,    458.86658,    393.58154,     381.51404,
+    362.04837,   336.34363,   305.8902,    272.35867,    224.12312,    154.04323,    104.15273,     69.316536,
+    45.431793,   29.33618,    23.185534,   25.619358,    28.546915,    31.57686,     34.622028,     37.557102,
+    40.207745,   42.337101,   43.629288,   38.630398,    43.10384,     47.337585,    46.587337,     34.651222,
+    243.23961,   331.49792,   312.14111,   287.24786,    258.42047,    227.36241,    195.71045,     183.99359,
+    174.75305,   162.59087,   148.18874,   132.31184,    115.73875,    83.633995,    57.123077,     38.400562,
+    26.701956,   25.129185,   23.670441,   23.148615,    25.704071,    28.339281,    30.976885,     33.506355,
+    35.774635,   37.574516,   38.630398,   49.903534,    57.507961,    62.669315,    61.205349,     45.180832,
+    113.90276,   155.43265,   146.65782,   135.33934,    122.18353,    107.94701,    93.363907,     85.196907,
+    80.981865,   75.451576,   68.906136,   61.682884,    54.126377,    43.267982,    29.840292,     27.800972,
+    26.124189,   24.578579,   23.139233,   21.782259,    22.303091,    24.509333,    26.707983,     28.805136,
+    30.671337,   32.132,      43.10384,    57.507961,    74.019264,    80.057747,    77.606369,     56.866417,
+    49.963619,   68.262848,   64.532372,   59.706989,    54.078941,    47.963463,    41.669231,     36.985542,
+    35.181698,   32.822098,   30.030893,   26.947779,    24.912899,    28.411644,    27.097876,     25.429491,
+    23.897495,   22.479414,   21.153818,   19.900043,    18.697893,    19.942665,    21.664829,     23.298027,
+    25.435469,   34.955532,   47.337585,   62.669315,    80.057747,    96.33802,     92.712547,     67.44825,
+    19.465729,   26.624926,   25.214819,   23.386017,    21.246124,    18.911961,    16.498865,     14.271405,
+    13.584824,   12.744491,   14.446028,   16.392521,    18.610136,    21.124651,    21.966597,     20.622055,
+    19.382244,   18.230083,   17.149212,   16.123768,    15.138191,    14.471446,    15.673048,     18.485712,
+    25.433935,   34.672985,   46.587337,   61.205349,    77.606369,    92.712547,    98.911064,     71.454735,
+    5.6832814,   7.7816625,   7.3818192,   6.8619599,    6.2518306,    5.931848,     5.8748393,     5.8310022,
+    6.4417892,   7.2561679,   8.1871176,   9.2476816,    10.451086,    11.810225,    13.321898,     12.511635,
+    11.761528,   11.061857,   10.403255,   9.7766094,    9.1729412,    8.5832787,    10.26242,      14.080012,
+    19.216208,   25.989962,   34.651222,   45.180832,    56.866417,    67.44825,     71.454735,     57.03159,
+    5647.4438,   6753.8403,   5959.4565,   5245.7246,    4606.1094,    4034.3606,    3524.541,      3157.363,
+    2848.7861,   2563.1165,   2299.2993,   2056.2517,    1832.8772,    1640.2583,    1474.5443,     1320.2314,
+    1176.8837,   1044.0483,   921.2619,    809.23602,    709.88043,    617.86603,    532.87958,     454.60342,
+    382.71756,   316.90189,   247.11736,   186.80002,    135.44676,    91.998367,    55.499458,     25.090487,
+    6753.8403,   9488.665,    9931.2803,   8553.8994,    7292.7812,    6249.7036,    5372.2231,     4770.6211,
+    4266.6396,   3812.8267,   3402.0034,   3029.106,     2690.2749,    2401.6631,    2153.3167,     1923.5682,
+    1711.2981,   1515.4852,   1335.1792,   1171.4097,    1026.3,       892.26086,    768.74207,     655.20587,
+    551.12677,   455.99094,   355.04221,   268.21002,    194.36061,    131.94092,    79.554382,     35.948029,
+    5959.4565,   9931.2803,   7932.855,    7885.7764,    7431.9438,    6555.0298,    5627.7007,     5011.5894,
+    4468.5908,   3979.2915,   3532.7783,   3132.7627,    2772.9871,    2472.9675,    2211.6484,     1971.3832,
+    1750.5248,   1547.6572,   1361.5294,   1193.6245,    1044.517,     907.12451,    780.78949,     664.8869,
+    558.81952,   461.48712,   358.64676,   270.78668,    196.12987,    133.08028,    80.206787,     36.228275,
+    5245.7246,   8553.8994,   7885.7764,   6599.938,     6392.4233,    5990.2383,    5585.4102,     5132.8647,
+    4559.7266,   4052.7339,   3600.2258,   3194.0774,    2828.3188,    2530.5908,    2258.1785,     2008.9121,
+    1780.8026,   1572.0654,   1381.1697,   1210.509,     1058.1282,    918.03662,    789.47559,     671.73779,
+    564.15955,   463.70047,   360.19818,   271.84094,    196.81523,    133.49622,    80.430031,     36.317585,
+    4606.1094,   7292.7812,   7431.9438,   6392.4233,    5463.9927,    5234.7217,    5027.3745,     4747.835,
+    4432.0508,   4077.1138,   3616.7708,   3204.7395,    2840.1484,    2544.2102,    2272.9863,     2024.2074,
+    1795.993,    1586.7396,   1393.4408,   1221.5485,    1066.7343,    924.68793,    794.56097,     675.57465,
+    567.00665,   463.13876,   359.63824,   271.33292,    196.3905,     133.17256,    80.215202,     36.212284,
+    4034.3606,   6249.7036,   6555.0298,   5990.2383,    5234.7217,    4639.8286,    4495.0903,     4289.3535,
+    4036.0081,   3757.8186,   3468.5669,   3177.3008,    2836.9285,    2539.3843,    2267.095,      2017.6912,
+    1789.1992,   1579.9347,   1388.4299,   1220.6163,    1067.0275,    925.80829,    795.85162,     676.24988,
+    567.24963,   459.76926,   356.94595,   269.24948,    194.8479,     132.10498,    79.560242,     36.477112,
+    3524.541,    5372.2231,   5627.7007,   5585.4102,    5027.3745,    4495.0903,    4010.2107,     3850.562,
+    3651.6968,   3424.1008,   3178.1006,   2926.1726,    2711.8442,    2496.7043,    2245.2983,     1997.7461,
+    1771.0409,   1563.5052,   1375.428,    1209.7025,    1057.1802,    917.02057,    788.44995,     670.74811,
+    563.23944,   452.99265,   352.11801,   265.60474,    192.19832,    130.30128,    78.470016,     37.137085,
+    3157.363,    4770.6211,   5011.5894,   5132.8647,    4747.835,     4289.3535,    3850.562,      3447.8291,
+    3286.3145,   3099.2502,   2895.1919,   2699.8142,    2511.5901,    2320.4753,    2129.8306,     1942.3022,
+    1741.6445,   1537.5632,   1355.778,    1192.2618,    1041.8038,    903.57141,    776.79602,     660.76172,
+    554.79736,   441.61682,   343.37253,   259.37482,    187.94739,    127.58199,    76.923965,     37.807724,
+    2848.7861,   4266.6396,   4468.5908,   4559.7266,    4432.0508,    4036.0081,    3651.6968,     3286.3145,
+    2948.2344,   2792.2668,   2631.2812,   2478.1697,    2314.8501,    2145.7014,    1975.4551,     1806.6643,
+    1641.3643,   1481.1416,   1328.6294,   1168.4478,    1021.0288,    885.57111,    761.33032,     647.61041,
+    538.33154,   427.79807,   332.76541,   251.4594,     182.27744,    123.77497,    74.652321,     38.330761,
+    2563.1165,   3812.8267,   3979.2915,   4052.7339,    4077.1138,    3757.8186,    3424.1008,     3099.2502,
+    2792.2668,   2514.804,    2395.2642,   2263.0088,    2121.7727,    1974.774,     1823.2893,     1672.0176,
+    1522.9102,   1381.8047,   1248.6274,   1119.8175,    995.08313,    863.20764,    742.2077,      631.42041,
+    517.84808,   411.75937,   320.4624,    242.28418,    175.70909,    119.36732,    72.104607,     38.818394,
+    2299.2993,   3402.0034,   3532.7783,   3600.2258,    3616.7708,    3468.5669,    3178.1006,     2895.1919,
+    2631.2812,   2395.2642,   2173.4993,   2059.0305,    1935.8486,    1806.7939,    1674.4062,     1539.9724,
+    1406.8092,   1283.0233,   1161.7151,   1043.8623,    930.24261,    821.45776,    717.9574,      611.66425,
+    494.85779,   393.75824,   306.65472,   231.98755,    168.33853,    114.42181,    73.055611,     39.285583,
+    2056.2517,   3029.106,    3132.7627,   3194.0774,    3204.7395,    3177.3008,    2926.1726,     2699.8142,
+    2478.1697,   2263.0088,   2059.0305,   1866.9041,    1759.4436,    1646.1713,    1529.2731,     1410.6986,
+    1297.5624,   1185.7885,   1075.7563,   968.41003,    864.51721,    764.68787,    669.39166,     568.6261,
+    467.31378,   374.0784,    291.55334,   220.72223,    160.27208,    109.0079,     73.953133,     39.721516,
+    1832.8772,   2690.2749,   2772.9871,   2828.3188,    2840.1484,    2836.9285,    2711.8442,     2511.5901,
+    2314.8501,   2121.7727,   1935.8486,   1759.4436,    1593.079,     1493.7609,    1390.7449,     1289.4801,
+    1190.1628,   1090.6735,   991.50104,   894.14368,    799.57288,    708.38879,    620.86053,     518.82355,
+    427.24158,   345.59891,   273.30957,   208.65121,    151.62337,    104.52164,    74.773422,     40.113838,
+    1640.2583,   2401.6631,   2472.9675,   2530.5908,    2544.2102,    2539.3843,    2496.7043,     2320.4753,
+    2145.7014,   1974.774,    1806.7939,   1646.1713,    1493.7609,    1349.9647,    1262.1243,     1175.2073,
+    1086.6716,   997.62921,   909.06207,   821.62915,    735.92542,    653.02197,    561.79041,     470.45264,
+    388.18765,   314.60828,   249.25372,   191.61386,    141.14845,    105.65822,    75.489624,     40.448593,
+    1474.5443,   2153.3167,   2211.6484,   2258.1785,    2272.9863,    2267.095,     2245.2983,     2129.8306,
+    1975.4551,   1823.2893,   1674.4062,   1529.2731,    1390.7449,    1262.1243,    1143.4061,     1066.4246,
+    987.73401,   908.31421,   829.04413,   750.69647,    673.93848,    594.4339,     505.18051,     423.92969,
+    350.49826,   284.60471,   225.89371,   173.95778,    128.35512,    104.15652,    75.211189,     40.704014,
+    1320.2314,   1923.5682,   1971.3832,   2008.9121,    2024.2074,    2017.6912,    1997.7461,     1942.3022,
+    1806.6643,   1672.0176,   1539.9724,   1410.6986,    1289.4801,    1175.2073,    1066.4246,     963.35919,
+    893.63898,   823.0542,    752.38312,   682.31958,    613.47125,    529.8266,     451.40695,     379.5824,
+    314.45175,   255.8183,    203.41415,   156.91895,    125.86903,    102.24911,    73.784668,     39.906857,
+    1176.8837,   1711.2981,   1750.5248,   1780.8026,    1795.993,     1789.1992,    1771.0409,     1741.6445,
+    1641.3643,   1522.9102,   1406.8092,   1297.5624,    1190.1628,    1086.6716,    987.73401,     893.63898,
+    804.55994,   742.06683,   679.323,     616.94482,    542.94952,    468.94223,    400.48373,     337.65472,
+    280.26181,   228.42978,   181.96297,   142.4147,     123.38181,    100.15266,    72.220078,     39.034161,
+    1044.0483,   1515.4852,   1547.6572,   1572.0654,    1586.7396,    1579.9347,    1563.5052,     1537.5632,
+    1481.1416,   1381.8047,   1283.0233,   1185.7885,    1090.6735,    997.62921,    908.31421,     823.0542,
+    742.06683,   665.48138,   610.02527,   544.83606,    476.37396,    412.29703,    352.82245,     298.05792,
+    248.01785,   202.57388,   161.65331,   139.34525,    120.61987,    97.831955,    70.492912,     38.073158,
+    921.2619,    1335.1792,   1361.5294,   1381.1697,    1393.4408,    1388.4299,    1375.428,      1355.778,
+    1328.6294,   1248.6274,   1161.7151,   1075.7563,    991.50104,    909.06207,    829.04413,     752.38312,
+    679.323,     610.02527,   535.70306,   473.75674,    415.06741,    359.95514,    308.63498,     261.22885,
+    217.77783,   178.25468,   150.78267,   135.90138,    117.53384,    95.248413,    68.576332,     37.00983,
+    809.23602,   1171.4097,   1193.6245,   1210.509,     1221.5485,    1220.6163,    1209.7025,     1192.2618,
+    1168.4478,   1119.8175,   1043.8623,   968.41003,    894.14368,    821.62915,    750.69647,     682.31958,
+    616.94482,   544.83606,   473.75674,   408.93054,    358.96671,    311.90079,    267.9375,      227.20329,
+    189.75572,   158.04335,   146.51324,   131.99625,    114.06984,    92.359825,    66.441002,     35.828876,
+    709.88043,   1026.3,      1044.517,    1058.1282,    1066.7343,    1067.0275,    1057.1802,     1041.8038,
+    1021.0288,   995.08313,   930.24261,   864.51721,    799.57288,    735.92542,    673.93848,     613.47125,
+    542.94952,   476.37396,   415.06741,   358.96671,    307.93683,    268.05362,    230.68958,     195.96881,
+    163.95755,   152.98413,   141.67432,   127.50971,    110.12691,    89.120285,    64.054985,     34.513657,
+    617.86603,   892.26086,   907.12451,   918.03662,    924.68793,    925.80829,    917.02057,     903.57141,
+    885.57111,   863.20764,   821.45776,   764.68787,    708.38879,    653.02197,    594.4339,      529.8266,
+    468.94223,   412.29703,   359.95514,   311.90079,    268.05362,    228.28214,    196.80522,     167.47345,
+    155.85141,   147.21855,   136.18883,   122.44758,    105.65214,    85.430168,    61.377911,     33.046085,
+    532.87958,   768.74207,   780.78949,   789.47559,    794.56097,    795.85162,    788.44995,     776.79602,
+    761.33032,   742.2077,    717.9574,    669.39166,    620.86053,    561.79041,    505.18051,     451.40695,
+    400.48373,   352.82245,   308.63498,   267.9375,     230.68958,    196.80522,    166.16272,     155.48438,
+    149.05833,   140.64102,   129.96239,   116.72793,    100.61709,    81.281746,    58.344582,     29.269342,
+    454.60342,   655.20587,   664.8869,    671.73779,    675.57465,    676.24988,    670.74811,     660.76172,
+    647.61041,   631.42041,   611.66425,   568.6261,     518.82355,    470.45264,    423.92969,     379.5824,
+    337.65472,   298.05792,   261.22885,   227.20329,    195.96881,    167.47345,    155.48438,     147.53543,
+    141.2679,    133.13718,   122.89288,   110.26207,    94.947845,    76.627945,    53.001026,     25.055674,
+    382.71756,   551.12677,   558.81952,   564.15955,    567.00665,    567.24963,    563.23944,     554.79736,
+    538.33154,   517.84808,   494.85779,   467.31378,    427.24158,    388.18765,    350.49826,     314.45175,
+    280.26181,   248.01785,   217.77783,   189.75572,    163.95755,    155.85141,    149.05833,     141.2679,
+    132.34445,   124.5834,    114.86992,   102.95419,    88.564606,    70.65815,     44.67482,      21.143274,
+    316.90189,   455.99094,   461.48712,   463.70047,    463.13876,    459.76926,    452.99265,     441.61682,
+    427.79807,   411.75937,   393.75824,   374.0784,     345.59891,    314.60828,    284.60471,     255.8183,
+    228.42978,   202.57388,   178.25468,   158.04335,    152.98413,    147.21855,    140.64102,     133.13718,
+    124.5834,    114.84621,   105.77442,   94.701004,    81.38131,     58.455536,    37.000786,     17.530621,
+    247.11736,   355.04221,   358.64676,   360.19818,    359.63824,    356.94595,    352.11801,     343.37253,
+    332.76541,   320.4624,    306.65472,   291.55334,    273.30957,    249.25372,    225.89371,     203.41415,
+    181.96297,   161.65331,   150.78267,   146.51324,    141.67432,    136.18883,    129.96239,     122.89288,
+    114.86992,   105.77442,   95.478004,   85.391182,    66.203133,    47.291599,    29.966896,     14.213268,
+    186.80002,   268.21002,   270.78668,   271.84094,    271.33292,    269.24948,    265.60474,     259.37482,
+    251.4594,    242.28418,   231.98755,   220.72223,    208.65121,    191.61386,    173.95778,     156.91895,
+    142.4147,    139.34525,   135.90138,   131.99625,    127.50971,    122.44758,    116.72793,     110.26207,
+    102.95419,   94.701004,   85.391182,   67.933937,    51.929531,    37.135086,    23.556034,     11.184304,
+    135.44676,   194.36061,   196.12987,   196.81523,    196.3905,     194.8479,     192.19832,     187.94739,
+    182.27744,   175.70909,   168.33853,   160.27208,    151.62337,    141.14845,    128.35512,     125.86903,
+    123.38181,   120.61987,   117.53384,   114.06984,    110.12691,    105.65214,    100.61709,     94.947845,
+    88.564606,   81.38131,    66.203133,   51.929531,    39.04237,     27.948416,    17.746897,     8.4347544,
+    91.998367,   131.94092,   133.08028,   133.49622,    133.17256,    132.10498,    130.30128,     127.58199,
+    123.77497,   119.36732,   114.42181,   109.0079,     104.52164,    105.65822,    104.15652,     102.24911,
+    100.15266,   97.831955,   95.248413,   92.359825,    89.120285,    85.430168,    81.281746,     76.627945,
+    70.65815,    58.455536,   47.291599,   37.135086,    27.948416,    19.689066,    12.514842,     5.9539757,
+    55.499458,   79.554382,   80.206787,   80.430031,    80.215202,    79.560242,    78.470016,     76.923965,
+    74.652321,   72.104607,   73.055611,   73.953133,    74.773422,    75.489624,    75.211189,     73.784668,
+    72.220078,   70.492912,   68.576332,   66.441002,    64.054985,    61.377911,    58.344582,     53.001026,
+    44.67482,    37.000786,   29.966896,   23.556034,    17.746897,    12.514842,    7.8326411,     3.7300112,
+    25.090487,   35.948029,   36.228275,   36.317585,    36.212284,    36.477112,    37.137085,     37.807724,
+    38.330761,   38.818394,   39.285583,   39.721516,    40.113838,    40.448593,    40.704014,     39.906857,
+    39.034161,   38.073158,   37.00983,    35.828876,    34.513657,    33.046085,    29.269342,     25.055674,
+    21.143274,   17.530621,   14.213268,   11.184304,    8.4347544,    5.9539757,    3.7300112,     1.7499064,
+    7452.5459,   1653.0973,   1098.1042,   727.66534,    481.00522,    317.16086,    208.59155,     149.65108,
+    110.55112,   81.436607,   59.812893,   43.794903,    31.961613,    23.099804,    16.518005,     11.763959,
+    8.3414326,   5.8861513,   4.1314034,   2.827503,     1.7887044,    1.1227245,    0.69828635,    0.42959845,
+    0.2608158,   0.15574206,  0.15453278,  0.1533215,    0.14591624,   0.13008402,   0.10300098,    0.061118193,
+    1653.0973,   8380.5039,   5775.375,    1079.0876,    328.9371,     204.79303,    154.29584,     122.92825,
+    97.436584,   75.785179,   58.118813,   44.076027,    33.117424,    24.517237,    17.903296,     12.983698,
+    9.352972,    6.6923704,   4.7555513,   3.2828107,    2.0976176,    1.3285706,    0.83311653,    0.51639366,
+    0.31566259,  0.18968138,  0.19134447,  0.19078892,   0.18241146,   0.16331688,   0.12983187,    0.077326767,
+    1098.1042,   5775.375,    4689.2881,   6173.9531,    2290.5842,    800.42993,    290.75485,     138.9079,
+    73.837051,   46.981812,   37.868378,   29.908382,    23.242268,    17.6751,      13.224583,     9.7938852,
+    7.1852026,   5.2244205,   3.7655427,   2.6137071,    1.6897452,    1.0815921,    0.68476027,    0.42814901,
+    0.26381135,  0.16220759,  0.16746993,  0.16782472,   0.16120373,   0.14495377,   0.11569799,    0.06916777,
+    727.66534,   1079.0876,   6173.9531,   2611.1367,    3328.4873,    1912.3757,    947.11169,     480.0253,
+    219.9599,    111.24844,   60.383377,   34.50848,     20.489538,    12.385681,    9.4622946,     7.1696663,
+    5.3661933,   3.971153,    2.9073288,   2.0221956,    1.3245902,    0.85795265,   0.54903388,    0.34665737,
+    0.21551648,  0.14123312,  0.14661011,  0.14765961,   0.14249404,   0.12868311,   0.10312349,    0.06188108,
+    481.00522,   328.9371,    2290.5842,   3328.4873,    1446.8124,    1663.2595,    1531.1774,     831.79169,
+    480.55585,   280.44727,   139.75082,   74.196167,    41.275387,    23.572012,    13.852783,     8.3232937,
+    5.0868764,   3.1492734,   2.1943185,   1.5237617,    1.0123664,    0.66419888,   0.43003657,    0.27443221,
+    0.17228782,  0.1230865,   0.12845814,  0.1300182,    0.1260446,    0.11431187,   0.091968946,   0.055391051,
+    317.16086,   204.79303,   800.42993,   1912.3757,    1663.2595,    879.99316,    1006.1718,     1058.3137,
+    668.74103,   394.177,     240.48183,   150.66313,    80.264244,    43.679893,    24.590214,     14.219559,
+    8.3981218,   5.0423026,   3.0657153,   1.7397083,    0.9833861,    0.55757725,   0.32954982,    0.21270514,
+    0.13493462,  0.1074447,   0.11271633,  0.11463311,   0.11162424,   0.10165226,   0.082098484,   0.048040159,
+    208.59155,   154.29584,   290.75485,   947.11169,    1531.1774,    1006.1718,    573.2146,      619.79071,
+    629.17371,   507.08578,   304.87149,   188.1375,     117.17072,    74.162743,    42.344086,     23.677216,
+    13.56254,    7.9194975,   4.6161661,   2.5380981,    1.4063962,    0.78302729,   0.43674421,    0.24327713,
+    0.13485461,  0.1069512,   0.099649422, 0.1012495,    0.099012084,  0.090524487,  0.073381729,   0.04033751,
+    149.65108,   122.92825,   138.9079,    480.0253,     831.79169,    1058.3137,    619.79071,     371.42276,
+    385.13297,   379.87418,   358.90652,   224.30173,    138.47133,    86.847908,    55.162323,     35.383717,
+    21.313681,   12.145742,   6.7107077,   3.6188078,    1.9695817,    1.0785577,    0.59242898,    0.32534847,
+    0.17799333,  0.16047736,  0.14708714,  0.13201807,   0.11448132,   0.093597606,  0.068363689,   0.033529639,
+    110.55112,   97.436584,   73.837051,   219.9599,     480.55585,    668.74103,    629.17371,     385.13297,
+    239.36328,   240.1848,    230.45854,   211.83032,    157.80495,    98.485023,    62.204361,     39.666977,
+    25.482706,   16.458862,   9.4808216,   5.0279841,    2.6939895,    1.4537559,    0.78763467,    0.42704073,
+    0.26488531,  0.24161115,  0.21790393,  0.19264847,   0.16471227,   0.13289179,   0.095863439,   0.040011808,
+    81.436607,   75.785179,   46.981812,   111.24844,    280.44727,    394.177,      507.08578,     379.87418,
+    240.1848,    152.98123,   148.04646,   138.31981,    125.22149,    108.09051,    68.109337,     43.296539,
+    27.714876,   17.121479,   10.193944,   6.0743914,    3.5910954,    1.9136044,    1.0244646,     0.54920506,
+    0.406075,    0.3642543,   0.32339528,  0.28172541,   0.23755264,   0.18917324,   0.13448031,    0.0503271,
+    59.812893,   58.118813,   37.868378,   60.383377,    139.75082,    240.48183,    304.87149,     358.90652,
+    230.45854,   148.04646,   95.659012,   90.53273,     83.109322,    74.206566,    64.604469,     45.997284,
+    29.191458,   17.341434,   10.321466,   6.1462517,    3.6562936,    2.1693079,    1.2812663,     0.70197523,
+    0.62169224,  0.54882485,  0.47995898,  0.41219157,   0.3429068,    0.26961187,   0.16896416,    0.062724777,
+    43.794903,   44.076027,   29.908382,   34.50848,     74.196167,    150.66313,    188.1375,      224.30173,
+    211.83032,   138.31981,   90.53273,    59.445751,    55.193497,    49.882778,    43.975204,     37.885742,
+    28.733728,   17.102911,   10.192494,   6.0740843,    3.6147273,    2.1448407,    1.2666616,     0.95145923,
+    0.91624564,  0.82492906,  0.71112078,  0.60242444,   0.4946973,    0.38418862,   0.2100568,     0.077394471,
+    31.961613,   33.117424,   23.242268,   20.489538,    41.275387,    80.264244,    117.17072,     138.47133,
+    157.80495,   125.22149,   83.109322,   55.193497,    36.700203,    33.506638,    29.857651,     25.031574,
+    19.86261,    15.422539,   9.8179817,   5.863102,     3.494662,     2.0760274,    1.2326511,     1.1837422,
+    1.1308408,   1.0703059,   0.99809974,  0.87819707,   0.71226925,   0.53239381,   0.25816226,    0.094458885,
+    23.099804,   24.517237,   17.6751,     12.385681,    23.572012,    43.679893,    74.162743,     86.847908,
+    98.485023,   108.09051,   74.206566,   49.882778,    33.506638,    22.500179,    19.659946,     16.157965,
+    12.977942,   10.19952,    7.8515244,   5.5287971,    3.3041918,    1.9671426,    1.541515,      1.4689074,
+    1.392758,    1.3086799,   1.2119061,   1.0971035,    0.95817453,   0.65057075,   0.31339622,    0.11394219,
+    16.518005,   17.903296,   13.224583,   9.4622946,    13.852783,    24.590214,    42.344086,     55.162323,
+    62.204361,   68.109337,   64.604469,   43.975204,    29.857651,    19.659946,    12.487819,     10.373426,
+    8.423563,    6.6937723,   5.2099481,   3.9744277,    2.9728496,    2.0257556,    1.9197992,     1.8163197,
+    1.7101527,   1.5960146,   1.468274,    1.3207343,    1.1464096,    0.67031622,   0.34781358,    0.13560228,
+    11.763959,   12.983698,   9.7938852,   7.1696663,    8.3232937,    14.219559,    23.677216,     35.383717,
+    39.666977,   43.296539,   45.997284,   37.885742,    25.031574,    16.157965,    10.373426,     6.6266203,
+    5.4342976,   4.362062,    3.4297614,   2.643038,     1.9968795,    2.2546837,    2.3789117,     2.2359676,
+    2.0916932,   1.9397397,   1.7734606,   1.5856591,    1.1544813,    0.67193842,   0.34832948,    0.13567345,
+    8.3414326,   9.352972,    7.1852026,   5.3661933,    5.0868764,    8.3981218,    13.56254,      21.313681,
+    25.482706,   27.714876,   29.191458,   28.733728,    19.86261,     12.977942,    8.423563,      5.4342976,
+    3.4856203,   2.8236079,   2.2409234,   1.7431903,    1.8179442,    2.1103861,    2.4194973,     2.7352304,
+    2.5462136,   2.3473892,   2.133774,    1.8475604,    1.1439821,    0.66548669,   0.34478524,    0.13420847,
+    5.8861513,   6.6923704,   5.2244205,   3.971153,     3.1492734,    5.0423026,    7.9194975,     12.145742,
+    16.458862,   17.121479,   17.341434,   17.102911,    15.422539,    10.19952,     6.6937723,     4.362062,
+    2.8236079,   1.8158308,   1.4534936,   1.4607134,    1.7115303,    1.9835888,    2.2711449,     2.564872,
+    2.8506856,   2.8261323,   2.5551648,   1.8073802,    1.1190749,    0.65090579,   0.33715183,    0.13119647,
+    4.1314034,   4.7555513,   3.7655427,   2.9073288,    2.1943185,    3.0657153,    4.6161661,     6.7107077,
+    9.4808216,   10.193944,   10.321466,   10.192494,    9.8179817,    7.8515244,    5.2099481,     3.4297614,
+    2.2409234,   1.4534936,   1.1726444,   1.3837968,    1.6174361,    1.8706927,    2.1382265,     2.4113574,
+    2.6769662,   2.9161084,   2.7085147,   1.7439601,    1.0801877,    0.62842041,   0.32553586,    0.12667598,
+    2.827503,    3.2828107,   2.6137071,   2.0221956,    1.5237617,    1.7397083,    2.5380981,     3.6188078,
+    5.0279841,   6.0743914,   6.1462517,   6.0740843,    5.863102,     5.5287971,    3.9744277,     2.643038,
+    1.7431903,   1.4607134,   1.3837968,   1.314991,     1.5327175,    1.7684577,    2.0172272,     2.2709329,
+    2.517345,    2.6513999,   2.081558,    1.5779446,    1.0282415,    0.59852517,   0.3101756,     0.12073385,
+    1.7887044,   2.0976176,   1.6897452,   1.3245902,    1.0123664,    0.9833861,    1.4063962,     1.9695817,
+    2.6939895,   3.5910954,   3.6562936,   3.6147273,    3.494662,     3.3041918,    2.9728496,     1.9968795,
+    1.8179442,   1.7115303,   1.6174361,   1.5327175,    1.4546751,    1.6738989,    1.9048941,     2.1401069,
+    2.3681388,   1.9743114,   1.5567621,   1.1852585,    0.86070496,   0.56195939,   0.29142976,    0.11350145,
+    1.1227245,   1.3285706,   1.0815921,   0.85795265,   0.66419888,   0.55757725,   0.78302729,    1.0785577,
+    1.4537559,   1.9136044,   2.1693079,   2.1448407,    2.0760274,    1.9671426,    2.0257556,     2.2546837,
+    2.1103861,   1.9835888,   1.8706927,   1.7684577,    1.6738989,    1.5841904,    1.7981392,     2.0155559,
+    1.7909182,   1.4579605,   1.1543156,   0.88245374,   0.6434359,    0.43712825,   0.26243109,    0.1051485,
+    0.69828635,  0.83311653,  0.68476027,  0.54903388,   0.43003657,   0.32954982,   0.43674421,    0.59242898,
+    0.78763467,  1.0244646,   1.2812663,   1.2666616,    1.2326511,    1.541515,     1.9197992,     2.3789117,
+    2.4194973,   2.2711449,   2.1382265,   2.0172272,    1.9048941,    1.7981392,    1.6939481,     1.5605109,
+    1.305195,    1.0665417,   0.8476423,   0.65050262,   0.47614104,   0.32471988,   0.19569288,    0.07768748,
+    0.42959845,  0.51639366,  0.42814901,  0.34665737,   0.27443221,   0.21270514,   0.24327713,    0.32534847,
+    0.42704073,  0.54920506,  0.70197523,  0.95145923,   1.1837422,    1.4689074,    1.8163197,     2.2359676,
+    2.7352304,   2.564872,    2.4113574,   2.2709329,    2.1401069,    2.0155559,    1.5605109,     1.1213179,
+    0.94104314,  0.77166098,  0.61546981,  0.4740335,    0.34823546,   0.23835677,   0.13523336,    0.048583396,
+    0.2608158,   0.31566259,  0.26381135,  0.21551648,   0.17228782,   0.13493462,   0.13485461,    0.17799333,
+    0.26488531,  0.406075,    0.62169224,  0.91624564,   1.1308408,    1.392758,     1.7101527,     2.0916932,
+    2.5462136,   2.8506856,   2.6769662,   2.517345,     2.3681388,    1.7909182,    1.305195,      0.94104314,
+    0.66973585,  0.55096596,  0.44090569,  0.34073392,   0.25116813,   0.16932508,   0.082378834,   0.029803742,
+    0.15574206,  0.18968138,  0.16220759,  0.14123312,   0.1230865,    0.1074447,    0.1069512,     0.16047736,
+    0.24161115,  0.3642543,   0.54882485,  0.82492906,   1.0703059,    1.3086799,    1.5960146,     1.9397397,
+    2.3473892,   2.8261323,   2.9161084,   2.6513999,    1.9743114,    1.4579605,    1.0665417,     0.77166098,
+    0.55096596,  0.38697228,  0.31062979,  0.24081676,   0.17808735,   0.10020655,   0.049090669,   0.017882323,
+    0.15453278,  0.19134447,  0.16746993,  0.14661011,   0.12845814,   0.11271633,   0.099649422,   0.14708714,
+    0.21790393,  0.32339528,  0.47995898,  0.71112078,   0.99809974,   1.2119061,    1.468274,      1.7734606,
+    2.133774,    2.5551648,   2.7085147,   2.081558,     1.5567621,    1.1543156,    0.8476423,     0.61546981,
+    0.44090569,  0.31062979,  0.21420611,  0.1665564,    0.10316525,   0.05776377,   0.028488953,   0.010446936,
+    0.1533215,   0.19078892,  0.16782472,  0.14765961,   0.1300182,    0.11463311,   0.1012495,     0.13201807,
+    0.19264847,  0.28172541,  0.41219157,  0.60242444,   0.87819707,   1.0971035,    1.3207343,     1.5856591,
+    1.8475604,   1.8073802,   1.7439601,   1.5779446,    1.1852585,    0.88245374,   0.65050262,    0.4740335,
+    0.34073392,  0.24081676,  0.1665564,   0.09413676,   0.057134669,  0.032200783,  0.015985096,   0.0058997343,
+    0.14591624,  0.18241146,  0.16120373,  0.14249404,   0.1260446,    0.11162424,   0.099012084,   0.11448132,
+    0.16471227,  0.23755264,  0.3429068,   0.4946973,    0.71226925,   0.95817453,   1.1464096,     1.1544813,
+    1.1439821,   1.1190749,   1.0801877,   1.0282415,    0.86070496,   0.6434359,    0.47614104,    0.34823546,
+    0.25116813,  0.17808735,  0.10316525,  0.057134669,  0.030231111,  0.017146256,  0.0085655777,  0.0031812373,
+    0.13008402,  0.16331688,  0.14495377,  0.12868311,   0.11431187,   0.10165226,   0.090524487,   0.093597606,
+    0.13289179,  0.18917324,  0.26961187,  0.38418862,   0.53239381,   0.65057075,   0.67031622,    0.67193842,
+    0.66548669,  0.65090579,  0.62842041,  0.59852517,   0.56195939,   0.43712825,   0.32471988,    0.23835677,
+    0.16932508,  0.10020655,  0.05776377,  0.032200783,  0.017146256,  0.0085189966, 0.0042818012,  0.0015999493,
+    0.10300098,  0.12983187,  0.11569799,  0.10312349,   0.091968946,  0.082098484,  0.073381729,   0.068363689,
+    0.095863439, 0.13448031,  0.16896416,  0.2100568,    0.25816226,   0.31339622,   0.34781358,    0.34832948,
+    0.34478524,  0.33715183,  0.32553586,  0.3101756,    0.29142976,   0.26243109,   0.19569288,    0.13523336,
+    0.082378834, 0.049090669, 0.028488953, 0.015985096,  0.0085655777, 0.0042818012, 0.0018937252,  0.00071179745,
+    0.061118193, 0.077326767, 0.06916777,  0.06188108,   0.055391051,  0.048040159,  0.04033751,    0.033529639,
+    0.040011808, 0.0503271,   0.062724777, 0.077394471,  0.094458885,  0.11394219,   0.13560228,    0.13567345,
+    0.13420847,  0.13119647,  0.12667598,  0.12073385,   0.11350145,   0.1051485,    0.07768748,    0.048583396,
+    0.029803742, 0.017882323, 0.010446936, 0.0058997343, 0.0031812373, 0.0015999493, 0.00071179745, 0.00023641172};
+
+const float dequantY_matrix4x4[64] = {
+    0.0044780076, 0.0012398829, 0.0031354439, 0.0031354439, 0.0066977735, 0.0066977735, 0.021900313, 0.021900313,
+    0.0012398829, 0.0014682054, 0.0031354439, 0.0031354439, 0.0066977735, 0.0066977735, 0.021900313, 0.021900313,
+    0.0031354439, 0.0031354439, 0.0036335052, 0.0036335052, 0.0068652928, 0.0068652928, 0.01835954,  0.01835954,
+    0.0031354439, 0.0031354439, 0.0036335052, 0.0036335052, 0.0068652928, 0.0068652928, 0.01835954,  0.01835954,
+    0.0066977735, 0.0066977735, 0.0068652928, 0.0068652928, 0.016142365,  0.016142365,  0.027852703, 0.027852703,
+    0.0066977735, 0.0066977735, 0.0068652928, 0.0068652928, 0.016142365,  0.016142365,  0.027852703, 0.027852703,
+    0.021900313,  0.021900313,  0.01835954,   0.01835954,   0.027852703,  0.027852703,  0.060083967, 0.060083967,
+    0.021900313,  0.021900313,  0.01835954,   0.01835954,   0.027852703,  0.027852703,  0.060083967, 0.060083967};
+
+const float dequantY_matrix8x8[64] = {
+    0.0018947646, 0.0020298155, 0.0018158669, 0.0023109741, 0.002771968,  0.0028875093, 0.0026061691, 0.0014496285,
+    0.0020298155, 0.0033498062, 0.0029625066, 0.0040782108, 0.0047020554, 0.0046986658, 0.004059691,  0.0022195177,
+    0.0018158669, 0.0029625066, 0.003855702,  0.0039039075, 0.0044844504, 0.0048484555, 0.004212948,  0.002350494,
+    0.0023109741, 0.0040782108, 0.0039039075, 0.005005613,  0.0047252625, 0.0043872506, 0.0035329545, 0.0020367121,
+    0.002771968,  0.0047020554, 0.0044844504, 0.0047252625, 0.0053944732, 0.0043888669, 0.0030360108, 0.0016807605,
+    0.0028875093, 0.0046986658, 0.0048484555, 0.0043872506, 0.0043888669, 0.0039804466, 0.0027267861, 0.0015264176,
+    0.0026061691, 0.004059691,  0.004212948,  0.0035329545, 0.0030360108, 0.0027267861, 0.0022856754, 0.0012781189,
+    0.0014496285, 0.0022195177, 0.002350494,  0.0020367121, 0.0016807605, 0.0015264176, 0.0012781189, 0.00079561322};
+
+const float dequantY_matrix16x16[256] = {
+    0.00039600098, 0.00034607144, 0.00040833891, 0.00048667059, 0.0005861785,  0.00074920553, 0.00098458503,
+    0.0013121488,  0.0017772652,  0.0022973588,  0.0028583913,  0.0036706869,  0.0049268291,  0.0072197518,
+    0.012596095,   0.029396452,   0.00034607144, 0.00024747173, 0.00027387534, 0.0003365741,  0.00040938996,
+    0.0005285684,  0.00069563533, 0.00092797622, 0.0012577978,  0.0016151961,  0.0020118796,  0.0025859487,
+    0.0034734753,  0.0051075728,  0.0089140097,  0.020809252,   0.00040833891, 0.00027387534, 0.00031539286,
+    0.00034493519, 0.00041945101, 0.00054777943, 0.0007227718,  0.00096404023, 0.001303279,   0.0016352652,
+    0.0020375939,  0.0026197163,  0.0035195949,  0.0052199606,  0.0091060577,  0.021249145,   0.00048667059,
+    0.0003365741,  0.00034493519, 0.00041010656, 0.00048113358, 0.00058858463, 0.00077134289, 0.0010256943,
+    0.0013762834,  0.0016811473,  0.0021005531,  0.0026996068,  0.0036257072,  0.0054506757,  0.009496971,
+    0.022137705,   0.0005861785,  0.00040938996, 0.00041945101, 0.00048113358, 0.00059573731, 0.00071104773,
+    0.00087649847, 0.0011293787,  0.0014301358,  0.0017464008,  0.002182483,   0.0028128312,  0.0037867145,
+    0.0058138543,  0.010114919,   0.02353812,    0.00074920553, 0.0005285684,  0.00054777943, 0.00058858463,
+    0.00071104773, 0.0008852075,  0.0010738227,  0.0013219716,  0.001527268,   0.0018505802,  0.0023102255,
+    0.0029750105,  0.0040830439,  0.0062906044,  0.01094307,    0.025472319,   0.00098458503, 0.00069563533,
+    0.0007227718,  0.00077134289, 0.00087649847, 0.0010738227,  0.0013484384,  0.0015134697,  0.0017388589,
+    0.0020487665,  0.0024944625,  0.0032075676,  0.0045332746,  0.0069634146,  0.012081781,   0.028057842,
+    0.0013121488,  0.00092797622, 0.00096404023, 0.0010256943,  0.0011293787,  0.0013219716,  0.0015134697,
+    0.0017614318,  0.0020141515,  0.0023621877,  0.0028521135,  0.0036510243,  0.0051573389,  0.0078939945,
+    0.013653559,   0.031619865,   0.0017772652,  0.0012577978,  0.001303279,   0.0013762834,  0.0014301358,
+    0.001527268,   0.0017388589,  0.0020141515,  0.002376725,   0.0027760784,  0.0033578034,  0.0044371258,
+    0.0061621098,  0.0092011821,  0.015820401,   0.03678393,    0.0022973588,  0.0016151961,  0.0016352652,
+    0.0016811473,  0.0017464008,  0.0018505802,  0.0020487665,  0.0023621877,  0.0027760784,  0.0033338354,
+    0.0042134481,  0.0055337516,  0.0076407115,  0.011347383,   0.019103128,   0.049214453,   0.0028583913,
+    0.0020118796,  0.0020375939,  0.0021005531,  0.002182483,   0.0023102255,  0.0024944625,  0.0028521135,
+    0.0033578034,  0.0042134481,  0.0054442864,  0.0071092523,  0.0097620692,  0.014422105,   0.02557276,
+    0.070264891,   0.0036706869,  0.0025859487,  0.0026197163,  0.0026996068,  0.0028128312,  0.0029750105,
+    0.0032075676,  0.0036510243,  0.0044371258,  0.0055337516,  0.0071092523,  0.0094711715,  0.012937739,
+    0.019291667,   0.038823757,   0.10550891,    0.0049268291,  0.0034734753,  0.0035195949,  0.0036257072,
+    0.0037867145,  0.0040830439,  0.0045332746,  0.0051573389,  0.0061621098,  0.0076407115,  0.0097620692,
+    0.012937739,   0.017999291,   0.031373069,   0.062424377,   0.167826,      0.0072197518,  0.0051075728,
+    0.0052199606,  0.0054506757,  0.0058138543,  0.0062906044,  0.0069634146,  0.0078939945,  0.0092011821,
+    0.011347383,   0.014422105,   0.019291667,   0.031373069,   0.055231433,   0.10869808,    0.2891748,
+    0.012596095,   0.0089140097,  0.0091060577,  0.009496971,   0.010114919,   0.01094307,    0.012081781,
+    0.013653559,   0.015820401,   0.019103128,   0.02557276,    0.038823757,   0.062424377,   0.10869808,
+    0.21590571,    0.56856865,    0.029396452,   0.020809252,   0.021249145,   0.022137705,   0.02353812,
+    0.025472319,   0.028057842,   0.031619865,   0.03678393,    0.049214453,   0.070264891,   0.10550891,
+    0.167826,      0.2891748,     0.56856865,    1.5101935};
+
+const float dequantY_matrix32x32[1024] = {
+    0.00017707126, 0.0001480639,  0.00016780054, 0.00019063143, 0.00021710296, 0.00024787075, 0.00028372489,
+    0.00031671999, 0.00035102671, 0.00039015003, 0.0004349151,  0.00048632181, 0.0005455903,  0.00060966006,
+    0.00067817559, 0.00075744296, 0.0008497017,  0.00095781003, 0.0010854676,  0.0012357334,  0.0014086879,
+    0.0016184739,  0.0018765966,  0.0021997194,  0.002612893,   0.0031555507,  0.0040466604,  0.0053533185,
+    0.0073829745,  0.010869758,   0.018018194,   0.039855745,   0.0001480639,  0.00010538891, 0.00010069195,
+    0.00011690574, 0.0001371219,  0.00016000759, 0.00018614267, 0.00020961632, 0.00023437649, 0.0002622726,
+    0.00029394444, 0.00033013042, 0.00037170923, 0.00041637814, 0.0004643999,  0.00051986717, 0.00058435171,
+    0.00065985468, 0.00074896315, 0.00085367227, 0.00097437395, 0.0011207485,  0.0013008264,  0.0015262378,
+    0.0018144646,  0.002193026,   0.0028165665,  0.0037284214,  0.0051450753,  0.0075791497,  0.012570018,
+    0.027817938,   0.00016780054, 0.00010069195, 0.00012605802, 0.0001268106,  0.0001345543,  0.00015255461,
+    0.00017769246, 0.00019953751, 0.0002237842,  0.00025130101, 0.00028306333, 0.00031920706, 0.00036062195,
+    0.00040437246, 0.00045215141, 0.00050725805, 0.00057125726, 0.00064613792, 0.00073446817, 0.00083778438,
+    0.00095738034, 0.0011023845,  0.0012807549,  0.0015040152,  0.0017894866,  0.0021669078,  0.0027882589,
+    0.0036929436,  0.0050986623,  0.0075142612,  0.012467773,   0.027602749,   0.00019063143, 0.00011690574,
+    0.0001268106,  0.00015151658, 0.0001564352,  0.00016693826, 0.00017903787, 0.00019482298, 0.00021931141,
+    0.00024674702, 0.00027776035, 0.00031307945, 0.00035356692, 0.00039516465, 0.0004428348,  0.00049778185,
+    0.00056154456, 0.00063610583, 0.00072402402, 0.00082609872, 0.00094506511, 0.0010892812,  0.0012666637,
+    0.0014886761,  0.0017725482,  0.0021565645,  0.0027762495,  0.0036786217,  0.0050809076,  0.0074908487,
+    0.012433167,   0.02753487,    0.00021710296, 0.0001371219,  0.0001345543,  0.0001564352,  0.00018301635,
+    0.00019103213, 0.00019891099, 0.00021062233, 0.00022562918, 0.00024527154, 0.00027648974, 0.00031203785,
+    0.00035209427, 0.0003930493,  0.00043994986, 0.00049402053, 0.00055679504, 0.00063022313, 0.00071764801,
+    0.00081863307, 0.00093744061, 0.001081446,   0.0012585566,  0.0014802213,  0.0017636477,  0.0021591801,
+    0.0027805718,  0.0036855093,  0.0050918958,  0.0075090546,  0.012466465,   0.02761494,    0.00024787075,
+    0.00016000759, 0.00015255461, 0.00016693826, 0.00019103213, 0.0002155252,  0.00022246495, 0.00023313538,
+    0.00024776958, 0.00026611183, 0.0002883035,  0.00031473255, 0.00035249389, 0.00039379625, 0.00044109311,
+    0.000495616,   0.00055890926, 0.00063293753, 0.00072023802, 0.00081925828, 0.00093718304, 0.0010801372,
+    0.0012565156,  0.0014787434,  0.0017628924,  0.002175004,   0.0028015445,  0.0037140276,  0.0051322081,
+    0.0075697373,  0.012569092,   0.02741445,    0.00028372489, 0.00018614267, 0.00017769246, 0.00017903787,
+    0.00019891099, 0.00022246495, 0.00024936345, 0.00025970236, 0.0002738453,  0.00029204748, 0.00031465336,
+    0.00034174335, 0.00036875275, 0.00040052799, 0.00044537513, 0.00050056411, 0.00056463969, 0.00063958851,
+    0.00072704646, 0.00082664948, 0.00094591256, 0.001090488,   0.0012683113,  0.0014908726,  0.0017754439,
+    0.0022075414,  0.0028399569,  0.0037649933,  0.0052029593,  0.0076745213,  0.012743721,   0.026927261,
+    0.00031671999, 0.00020961632, 0.00019953751, 0.00019482298, 0.00021062233, 0.00023313538, 0.00025970236,
+    0.00029003757, 0.00030429225, 0.00032265869, 0.00034540024, 0.00037039586, 0.00039815414, 0.00043094621,
+    0.00046952089, 0.00051485293, 0.00057416997, 0.00065037975, 0.0007375839,  0.00083874189, 0.00095987355,
+    0.0011067194,  0.0012873393,  0.0015134049,  0.0018024599,  0.0022644065,  0.002912289,   0.0038554242,
+    0.005320638,   0.0078380974,  0.01299985,    0.026449621,   0.00035102671, 0.00023437649, 0.0002237842,
+    0.00021931141, 0.00022562918, 0.00024776958, 0.0002738453,  0.00030429225, 0.00033918605, 0.00035813198,
+    0.000380043,   0.00040352362, 0.00043199339, 0.00046604805, 0.00050621247, 0.00055350625, 0.00060924928,
+    0.00067515491, 0.00075265532, 0.00085583632, 0.00097940431, 0.0011292148,  0.0013134903,  0.0015441383,
+    0.0018575913,  0.0023375514,  0.00300512,    0.0039767851,  0.0054861424,  0.0080791777,  0.013395431,
+    0.026088707,   0.00039015003, 0.0002622726,  0.00025130101, 0.00024674702, 0.00024527154, 0.00026611183,
+    0.00029204748, 0.00032265869, 0.00035813198, 0.00039764532, 0.00041749049, 0.0004418896,  0.000471304,
+    0.00050638703, 0.0005484593,  0.00059807982, 0.00065663754, 0.00072369125, 0.00080087944, 0.00089300267,
+    0.0010049412,  0.0011584698,  0.0013473318,  0.0015837309,  0.0019310683,  0.0024286029,  0.0031204908,
+    0.0041273846,  0.0056912252,  0.0083775017,  0.01386874,    0.025760984,   0.0004349151,  0.00029394444,
+    0.00028306333, 0.00027776035, 0.00027648974, 0.0002883035,  0.00031465336, 0.00034540024, 0.000380043,
+    0.00041749049, 0.00046008758, 0.00048566549, 0.0005165693,  0.00055346655, 0.00059722661, 0.00064936228,
+    0.00071082846, 0.00077940908, 0.00086079625, 0.00095798075, 0.0010749884,  0.001217348,   0.0013928403,
+    0.0016348839,  0.0020207826,  0.0025396293,  0.0032609967,  0.0043105762,  0.0059404108,  0.0087395925,
+    0.013688202,   0.025454629,   0.00048632181, 0.00033013042, 0.00031920706, 0.00031307945, 0.00031203785,
+    0.00031473255, 0.00034174335, 0.00037039586, 0.00040352362, 0.0004418896,  0.00048566549, 0.00053564616,
+    0.00056836149, 0.00060747023, 0.00065390544, 0.00070886861, 0.00077067583, 0.00084332074, 0.0009295785,
+    0.0010326204,  0.001156715,   0.0013077231,  0.0014938937,  0.0017586248,  0.0021398899,  0.0026732364,
+    0.0034299041,  0.0045305812,  0.00623939,    0.0091736475,  0.013522077,   0.025175273,   0.0005455903,
+    0.00037170923, 0.00036062195, 0.00035356692, 0.00035209427, 0.00035249389, 0.00036875275, 0.00039815414,
+    0.00043199339, 0.000471304,   0.0005165693,  0.00056836149, 0.00062771526, 0.0006694512,  0.00071903912,
+    0.00077550631, 0.00084022118, 0.00091686472, 0.0010085718,  0.0011183885,  0.0012506677,  0.0014116542,
+    0.0016106677,  0.0019274375,  0.0023405962,  0.0028935277,  0.0036588546,  0.004792687,   0.0065952893,
+    0.0095673967,  0.013373735,   0.024929052,   0.00060966006, 0.00041637814, 0.00040437246, 0.00039516465,
+    0.0003930493,  0.00039379625, 0.00040052799, 0.00043094621, 0.00046604805, 0.00050638703, 0.00055346655,
+    0.00060747023, 0.0006694512,  0.00074076012, 0.000792315,   0.0008509137,  0.00092024123, 0.0010023764,
+    0.0011000349,  0.0012170941,  0.0013588334,  0.001531342,   0.0017800233,  0.0021256125,  0.0025760736,
+    0.003178556,   0.0040119761,  0.005218829,   0.0070847394,  0.0094644791,  0.013246854,   0.024722738,
+    0.00067817559, 0.0004643999,  0.00045215141, 0.0004428348,  0.00043994986, 0.00044109311, 0.00044537513,
+    0.00046952089, 0.00050621247, 0.0005484593,  0.00059722661, 0.00065390544, 0.00071903912, 0.000792315,
+    0.00087457988, 0.00093771284, 0.0010124183,  0.0011009406,  0.0012062084,  0.0013320963,  0.001483815,
+    0.0016822729,  0.0019794905,  0.0023588818,  0.0028530811,  0.0035136454,  0.0044268607,  0.0057485211,
+    0.0077908854,  0.0096009346,  0.013295894,   0.024567602,   0.00075744296, 0.00051986717, 0.00050725805,
+    0.00049778185, 0.00049402053, 0.000495616,   0.00050056411, 0.00051485293, 0.00055350625, 0.00059807982,
+    0.00064936228, 0.00070886861, 0.00077550631, 0.0008509137,  0.00093771284, 0.0010380344,  0.0011190202,
+    0.0012149868,  0.0013291101,  0.0014655889,  0.0016300683,  0.0018874101,  0.002215296,   0.0026344741,
+    0.0031801381,  0.0039090244,  0.0049160789,  0.0063727167,  0.0079447664,  0.0097800363,  0.013552951,
+    0.025058351,   0.0008497017,  0.00058435171, 0.00057125726, 0.00056154456, 0.00055679504, 0.00055890926,
+    0.00056463969, 0.00057416997, 0.00060924928, 0.00065663754, 0.00071082846, 0.00077067583, 0.00084022118,
+    0.00092024123, 0.0010124183,  0.0011190202,  0.0012429155,  0.0013475876,  0.0014720538,  0.0016208906,
+    0.0018417919,  0.0021324588,  0.0024969804,  0.0029616053,  0.0035680921,  0.004377713,   0.0054956237,
+    0.0070217471,  0.0081049222,  0.0099847578,  0.013846565,   0.025618585,   0.00095781003, 0.00065985468,
+    0.00064613792, 0.00063610583, 0.00063022313, 0.00063293753, 0.00063958851, 0.00065037975, 0.00067515491,
+    0.00072369125, 0.00077940908, 0.00084332074, 0.00091686472, 0.0010023764,  0.0011009406,  0.0012149868,
+    0.0013475876,  0.0015026716,  0.0016392763,  0.0018354144,  0.0020991911,  0.0024254359,  0.0028342868,
+    0.0033550526,  0.0040319678,  0.0049364707,  0.0061860783,  0.0071764197,  0.0082905078,  0.010221609,
+    0.014185824,   0.026265224,   0.0010854676,  0.00074896315, 0.00073446817, 0.00072402402, 0.00071764801,
+    0.00072023802, 0.00072704646, 0.0007375839,  0.00075265532, 0.00080087944, 0.00086079625, 0.0009295785,
+    0.0010085718,  0.0011000349,  0.0012062084,  0.0013291101,  0.0014720538,  0.0016392763,  0.0018667057,
+    0.0021107879,  0.0024092472,  0.0027781241,  0.0032400733,  0.003828061,   0.0045918357,  0.0056099505,
+    0.0066320617,  0.0073582768,  0.0085081877,  0.010498863,   0.014582291,   0.027019849,   0.0012357334,
+    0.00085367227, 0.00083778438, 0.00082609872, 0.00081863307, 0.00081925828, 0.00082664948, 0.00083874189,
+    0.00085583632, 0.00089300267, 0.00095798075, 0.0010326204,  0.0011183885,  0.0012170941,  0.0013320963,
+    0.0014655889,  0.0016208906,  0.0018354144,  0.0021107879,  0.002445403,   0.0027857737,  0.0032061476,
+    0.0037322138,  0.0044013448,  0.0052699335,  0.0063273781,  0.0068253218,  0.0075759734,  0.008766559,
+    0.010827218,   0.015050948,   0.027910447,   0.0014086879,  0.00097437395, 0.00095738034, 0.00094506511,
+    0.00093744061, 0.00093718304, 0.00094591256, 0.00095987355, 0.00097940431, 0.0010049412,  0.0010749884,
+    0.001156715,   0.0012506677,  0.0013588334,  0.001483815,   0.0016300683,  0.0018417919,  0.0020991911,
+    0.0024092472,  0.0027857737,  0.0032474191,  0.0037305967,  0.0043348297,  0.0051028528,  0.0060991398,
+    0.0065366258,  0.0070584421,  0.0078425398,  0.0090804324,  0.01122079,    0.015611588,   0.028974038,
+    0.0016184739,  0.0011207485,  0.0011023845,  0.0010892812,  0.001081446,   0.0010801372,  0.001090488,
+    0.0011067194,  0.0011292148,  0.0011584698,  0.001217348,   0.0013077231,  0.0014116542,  0.001531342,
+    0.0016822729,  0.0018874101,  0.0021324588,  0.0024254359,  0.0027781241,  0.0032061476,  0.0037305967,
+    0.0043805442,  0.005081166,   0.005971096,   0.0064163683,  0.0067926222,  0.0073427465,  0.0081667602,
+    0.0094650239,  0.011705467,   0.016292507,   0.030260772,   0.0018765966,  0.0013008264,  0.0012807549,
+    0.0012666637,  0.0012585566,  0.0012565156,  0.0012683113,  0.0012873393,  0.0013134903,  0.0013473318,
+    0.0013928403,  0.0014938937,  0.0016106677,  0.0017800233,  0.0019794905,  0.002215296,   0.0024969804,
+    0.0028342868,  0.0032400733,  0.0037322138,  0.0043348297,  0.005081166,   0.0060181972,  0.0064315144,
+    0.0067087831,  0.0071103009,  0.007694534,   0.00856693,    0.0099386694,  0.012302885,   0.017139552,
+    0.034165442,   0.0021997194,  0.0015262378,  0.0015040152,  0.0014886761,  0.0014802213,  0.0014787434,
+    0.0014908726,  0.0015134049,  0.0015441383,  0.0015837309,  0.0016348839,  0.0017586248,  0.0019274375,
+    0.0021256125,  0.0023588818,  0.0026344741,  0.0029616053,  0.0033550526,  0.003828061,   0.0044013448,
+    0.0051028528,  0.005971096,   0.0064315144,  0.006778033,   0.0070787491,  0.00751105,    0.0081371684,
+    0.0090693021,  0.010532098,   0.013050069,   0.01886756,    0.039911121,   0.002612893,   0.0018144646,
+    0.0017894866,  0.0017725482,  0.0017636477,  0.0017628924,  0.0017754439,  0.0018024599,  0.0018575913,
+    0.0019310683,  0.0020207826,  0.0021398899,  0.0023405962,  0.0025760736,  0.0028530811,  0.0031801381,
+    0.0035680921,  0.0040319678,  0.0045918357,  0.0052699335,  0.0060991398,  0.0064163683,  0.0067087831,
+    0.0070787491,  0.0075560398,  0.0080267517,  0.0087054996,  0.0097130574,  0.011291193,   0.01415265,
+    0.022383973,   0.047296364,   0.0031555507,  0.002193026,   0.0021669078,  0.0021565645,  0.0021591801,
+    0.002175004,   0.0022075414,  0.0022644065,  0.0023375514,  0.0024286029,  0.0025396293,  0.0026732364,
+    0.0028935277,  0.003178556,   0.0035136454,  0.0039090244,  0.004377713,   0.0049364707,  0.0056099505,
+    0.0063273781,  0.0065366258,  0.0067926222,  0.0071103009,  0.00751105,    0.0080267517,  0.0087072961,
+    0.0094540818,  0.01055955,    0.012287834,   0.017107019,   0.027026452,   0.057043046,   0.0040466604,
+    0.0028165665,  0.0027882589,  0.0027762495,  0.0027805718,  0.0028015445,  0.0028399569,  0.002912289,
+    0.00300512,    0.0031204908,  0.0032609967,  0.0034299041,  0.0036588546,  0.0040119761,  0.0044268607,
+    0.0049160789,  0.0054956237,  0.0061860783,  0.0066320617,  0.0068253218,  0.0070584421,  0.0073427465,
+    0.007694534,   0.0081371684,  0.0087054996,  0.0094540818,  0.010473616,   0.011710811,   0.015105025,
+    0.021145405,   0.033370156,   0.070356794,   0.0053533185,  0.0037284214,  0.0036929436,  0.0036786217,
+    0.0036855093,  0.0037140276,  0.0037649933,  0.0038554242,  0.0039767851,  0.0041273846,  0.0043105762,
+    0.0045305812,  0.004792687,   0.005218829,   0.0057485211,  0.0063727167,  0.0070217471,  0.0071764197,
+    0.0073582768,  0.0075759734,  0.0078425398,  0.0081667602,  0.00856693,    0.0090693021,  0.0097130574,
+    0.01055955,    0.011710811,   0.014720184,   0.019256866,   0.026928712,   0.042451967,   0.089411013,
+    0.0073829745,  0.0051450753,  0.0050986623,  0.0050809076,  0.0050918958,  0.0051322081,  0.0052029593,
+    0.005320638,   0.0054861424,  0.0056912252,  0.0059404108,  0.00623939,    0.0065952893,  0.0070847394,
+    0.0077908854,  0.0079447664,  0.0081049222,  0.0082905078,  0.0085081877,  0.008766559,   0.0090804324,
+    0.0094650239,  0.0099386694,  0.010532098,   0.011291193,   0.012287834,   0.015105025,   0.019256866,
+    0.025613198,   0.035780203,   0.056347881,   0.1185571,     0.010869758,   0.0075791497,  0.0075142612,
+    0.0074908487,  0.0075090546,  0.0075697373,  0.0076745213,  0.0078380974,  0.0080791777,  0.0083775017,
+    0.0087395925,  0.0091736475,  0.0095673967,  0.0094644791,  0.0096009346,  0.0097800363,  0.0099847578,
+    0.010221609,   0.010498863,   0.010827218,   0.01122079,    0.011705467,   0.012302885,   0.013050069,
+    0.01415265,    0.017107019,   0.021145405,   0.026928712,   0.035780203,   0.050789613,   0.079905123,
+    0.167955,      0.018018194,   0.012570018,   0.012467773,   0.012433167,   0.012466465,   0.012569092,
+    0.012743721,   0.01299985,    0.013395431,   0.01386874,    0.013688202,   0.013522077,   0.013373735,
+    0.013246854,   0.013295894,   0.013552951,   0.013846565,   0.014185824,   0.014582291,   0.015050948,
+    0.015611588,   0.016292507,   0.017139552,   0.01886756,    0.022383973,   0.027026452,   0.033370156,
+    0.042451967,   0.056347881,   0.079905123,   0.12767085,    0.2680957,     0.039855745,   0.027817938,
+    0.027602749,   0.02753487,    0.02761494,    0.02741445,    0.026927261,   0.026449621,   0.026088707,
+    0.025760984,   0.025454629,   0.025175273,   0.024929052,   0.024722738,   0.024567602,   0.025058351,
+    0.025618585,   0.026265224,   0.027019849,   0.027910447,   0.028974038,   0.030260772,   0.034165442,
+    0.039911121,   0.047296364,   0.057043046,   0.070356794,   0.089411013,   0.1185571,     0.167955,
+    0.2680957,     0.57145911};
+
+const float inv_dequantY_matrix4x4[64] = {
+    223.3136,  806.52777, 318.93411, 318.93411, 149.30334, 149.30334, 45.661449, 45.661449, 806.52777, 681.10364,
+    318.93411, 318.93411, 149.30334, 149.30334, 45.661449, 45.661449, 318.93411, 318.93411, 275.21634, 275.21634,
+    145.66022, 145.66022, 54.467594, 54.467594, 318.93411, 318.93411, 275.21634, 275.21634, 145.66022, 145.66022,
+    54.467594, 54.467594, 149.30334, 149.30334, 145.66022, 145.66022, 61.948792, 61.948792, 35.90316,  35.90316,
+    149.30334, 149.30334, 145.66022, 145.66022, 61.948792, 61.948792, 35.90316,  35.90316,  45.661449, 45.661449,
+    54.467594, 54.467594, 35.90316,  35.90316,  16.643375, 16.643375, 45.661449, 45.661449, 54.467594, 54.467594,
+    35.90316,  35.90316,  16.643375, 16.643375};
+
+const float inv_dequantY_matrix8x8[64] = {
+    527.77002, 492.65561, 550.70117, 432.71796, 360.75452, 346.31924, 383.70496, 689.83191, 492.65561, 298.52472,
+    337.552,   245.20557, 212.67296, 212.82637, 246.32417, 450.54834, 550.70117, 337.552,   259.35614, 256.15363,
+    222.99277, 206.25125, 237.36348, 425.44247, 432.71796, 245.20557, 256.15363, 199.77573, 211.62845, 227.93318,
+    283.04922, 490.9874,  360.75452, 212.67296, 222.99277, 211.62845, 185.37491, 227.84924, 329.37961, 594.96875,
+    346.31924, 212.82637, 206.25125, 227.93318, 227.84924, 251.22809, 366.73212, 655.12872, 383.70496, 246.32417,
+    237.36348, 283.04922, 329.37961, 366.73212, 437.50745, 782.39978, 689.83191, 450.54834, 425.44247, 490.9874,
+    594.96875, 655.12872, 782.39978, 1256.8921};
+
+const float inv_dequantY_matrix16x16[256] = {
+    2525.2463, 2889.5769, 2448.946,  2054.7781, 1705.965,  1334.7472, 1015.6563, 762.1087,  562.66223, 435.28247,
+    349.84714, 272.42859, 202.97031, 138.50891, 79.389687, 34.017712, 2889.5769, 4040.8655, 3651.2961, 2971.114,
+    2442.6589, 1891.9027, 1437.5348, 1077.6138, 795.04034, 619.11987, 497.04764, 386.70529, 287.89609, 195.78772,
+    112.18296, 48.05555,  2448.946,  3651.2961, 3170.6489, 2899.0952, 2384.0686, 1825.5522, 1383.5625, 1037.3011,
+    767.29541, 611.52161, 490.77493, 381.7207,  284.1236,  191.57233, 109.81701, 47.060715, 2054.7781, 2971.114,
+    2899.0952, 2438.3906, 2078.4248, 1698.991,  1296.4403, 974.94934, 726.59454, 594.83191, 476.06509, 370.42432,
+    275.80826, 183.46349, 105.29673, 45.171799, 1705.965,  2442.6589, 2384.0686, 2078.4248, 1678.5922, 1406.3754,
+    1140.9033, 885.44263, 699.23431, 572.6062,  458.19373, 355.5137,  264.08118, 172.00293, 98.863861, 42.484276,
+    1334.7472, 1891.9027, 1825.5522, 1698.991,  1406.3754, 1129.6786, 931.25238, 756.44586, 654.76392, 540.37109,
+    432.85818, 336.13327, 244.91531, 158.96724, 91.382034, 39.258305, 1015.6563, 1437.5348, 1383.5625, 1296.4403,
+    1140.9033, 931.25238, 741.59857, 660.7334,  575.08978, 488.09857, 400.88797, 311.76273, 220.59109, 143.60771,
+    82.769249, 35.640659, 762.1087,  1077.6138, 1037.3011, 974.94934, 885.44263, 756.44586, 660.7334,  567.71997,
+    496.48697, 423.3364,  350.61719, 273.89575, 193.89845, 126.67858, 73.240982, 31.625689, 562.66223, 795.04034,
+    767.29541, 726.59454, 699.23431, 654.76392, 575.08978, 496.48697, 420.74704, 360.22037, 297.81375, 225.37112,
+    162.28207, 108.68169, 63.209522, 27.185785, 435.28247, 619.11987, 611.52161, 594.83191, 572.6062,  540.37109,
+    488.09857, 423.3364,  360.22037, 299.95483, 237.33531, 180.70923, 130.87787, 88.126045, 52.347446, 20.319235,
+    349.84714, 497.04764, 490.77493, 476.06509, 458.19373, 432.85818, 400.88797, 350.61719, 297.81375, 237.33531,
+    183.6788,  140.66177, 102.4373,  69.338005, 39.104111, 14.231859, 272.42859, 386.70529, 381.7207,  370.42432,
+    355.5137,  336.13327, 311.76273, 273.89575, 225.37112, 180.70923, 140.66177, 105.58356, 77.293259, 51.83585,
+    25.757425, 9.4778728, 202.97031, 287.89609, 284.1236,  275.80826, 264.08118, 244.91531, 220.59109, 193.89845,
+    162.28207, 130.87787, 102.4373,  77.293259, 55.557743, 31.874472, 16.019382, 5.9585524, 138.50891, 195.78772,
+    191.57233, 183.46349, 172.00293, 158.96724, 143.60771, 126.67858, 108.68169, 88.126045, 69.338005, 51.83585,
+    31.874472, 18.105633, 9.1997948, 3.4581161, 79.389687, 112.18296, 109.81701, 105.29673, 98.863861, 91.382034,
+    82.769249, 73.240982, 63.209522, 52.347446, 39.104111, 25.757425, 16.019382, 9.1997948, 4.6316514, 1.7588025,
+    34.017712, 48.05555,  47.060715, 45.171799, 42.484276, 39.258305, 35.640659, 31.625689, 27.185785, 20.319235,
+    14.231859, 9.4778728, 5.9585524, 3.4581161, 1.7588025, 0.66216683};
+
+const float inv_dequantY_matrix32x32[1024] = {
+    5647.4438, 6753.8403, 5959.4565, 5245.7246, 4606.1094, 4034.3606, 3524.541,  3157.363,  2848.7861, 2563.1165,
+    2299.2993, 2056.2517, 1832.8772, 1640.2583, 1474.5443, 1320.2314, 1176.8837, 1044.0483, 921.2619,  809.23602,
+    709.88043, 617.86603, 532.87958, 454.60342, 382.71756, 316.90189, 247.11736, 186.80002, 135.44676, 91.998367,
+    55.499458, 25.090487, 6753.8403, 9488.665,  9931.2803, 8553.8994, 7292.7812, 6249.7036, 5372.2231, 4770.6211,
+    4266.6396, 3812.8267, 3402.0034, 3029.106,  2690.2749, 2401.6631, 2153.3167, 1923.5682, 1711.2981, 1515.4852,
+    1335.1792, 1171.4097, 1026.3,    892.26086, 768.74207, 655.20587, 551.12677, 455.99094, 355.04221, 268.21002,
+    194.36061, 131.94092, 79.554382, 35.948029, 5959.4565, 9931.2803, 7932.855,  7885.7764, 7431.9438, 6555.0298,
+    5627.7007, 5011.5894, 4468.5908, 3979.2915, 3532.7783, 3132.7627, 2772.9871, 2472.9675, 2211.6484, 1971.3832,
+    1750.5248, 1547.6572, 1361.5294, 1193.6245, 1044.517,  907.12451, 780.78949, 664.8869,  558.81952, 461.48712,
+    358.64676, 270.78668, 196.12987, 133.08028, 80.206787, 36.228275, 5245.7246, 8553.8994, 7885.7764, 6599.938,
+    6392.4233, 5990.2383, 5585.4102, 5132.8647, 4559.7266, 4052.7339, 3600.2258, 3194.0774, 2828.3188, 2530.5908,
+    2258.1785, 2008.9121, 1780.8026, 1572.0654, 1381.1697, 1210.509,  1058.1282, 918.03662, 789.47559, 671.73779,
+    564.15955, 463.70047, 360.19818, 271.84094, 196.81523, 133.49622, 80.430031, 36.317585, 4606.1094, 7292.7812,
+    7431.9438, 6392.4233, 5463.9927, 5234.7217, 5027.3745, 4747.835,  4432.0508, 4077.1138, 3616.7708, 3204.7395,
+    2840.1484, 2544.2102, 2272.9863, 2024.2074, 1795.993,  1586.7396, 1393.4408, 1221.5485, 1066.7343, 924.68793,
+    794.56097, 675.57465, 567.00665, 463.13876, 359.63824, 271.33292, 196.3905,  133.17256, 80.215202, 36.212284,
+    4034.3606, 6249.7036, 6555.0298, 5990.2383, 5234.7217, 4639.8286, 4495.0903, 4289.3535, 4036.0081, 3757.8186,
+    3468.5669, 3177.3008, 2836.9285, 2539.3843, 2267.095,  2017.6912, 1789.1992, 1579.9347, 1388.4299, 1220.6163,
+    1067.0275, 925.80829, 795.85162, 676.24988, 567.24963, 459.76926, 356.94595, 269.24948, 194.8479,  132.10498,
+    79.560242, 36.477112, 3524.541,  5372.2231, 5627.7007, 5585.4102, 5027.3745, 4495.0903, 4010.2107, 3850.562,
+    3651.6968, 3424.1008, 3178.1006, 2926.1726, 2711.8442, 2496.7043, 2245.2983, 1997.7461, 1771.0409, 1563.5052,
+    1375.428,  1209.7025, 1057.1802, 917.02057, 788.44995, 670.74811, 563.23944, 452.99265, 352.11801, 265.60474,
+    192.19832, 130.30128, 78.470016, 37.137085, 3157.363,  4770.6211, 5011.5894, 5132.8647, 4747.835,  4289.3535,
+    3850.562,  3447.8291, 3286.3145, 3099.2502, 2895.1919, 2699.8142, 2511.5901, 2320.4753, 2129.8306, 1942.3022,
+    1741.6445, 1537.5632, 1355.778,  1192.2618, 1041.8038, 903.57141, 776.79602, 660.76172, 554.79736, 441.61682,
+    343.37253, 259.37482, 187.94739, 127.58199, 76.923965, 37.807724, 2848.7861, 4266.6396, 4468.5908, 4559.7266,
+    4432.0508, 4036.0081, 3651.6968, 3286.3145, 2948.2344, 2792.2668, 2631.2812, 2478.1697, 2314.8501, 2145.7014,
+    1975.4551, 1806.6643, 1641.3643, 1481.1416, 1328.6294, 1168.4478, 1021.0288, 885.57111, 761.33032, 647.61041,
+    538.33154, 427.79807, 332.76541, 251.4594,  182.27744, 123.77497, 74.652321, 38.330761, 2563.1165, 3812.8267,
+    3979.2915, 4052.7339, 4077.1138, 3757.8186, 3424.1008, 3099.2502, 2792.2668, 2514.804,  2395.2642, 2263.0088,
+    2121.7727, 1974.774,  1823.2893, 1672.0176, 1522.9102, 1381.8047, 1248.6274, 1119.8175, 995.08313, 863.20764,
+    742.2077,  631.42041, 517.84808, 411.75937, 320.4624,  242.28418, 175.70909, 119.36732, 72.104607, 38.818394,
+    2299.2993, 3402.0034, 3532.7783, 3600.2258, 3616.7708, 3468.5669, 3178.1006, 2895.1919, 2631.2812, 2395.2642,
+    2173.4993, 2059.0305, 1935.8486, 1806.7939, 1674.4062, 1539.9724, 1406.8092, 1283.0233, 1161.7151, 1043.8623,
+    930.24261, 821.45776, 717.9574,  611.66425, 494.85779, 393.75824, 306.65472, 231.98755, 168.33853, 114.42181,
+    73.055611, 39.285583, 2056.2517, 3029.106,  3132.7627, 3194.0774, 3204.7395, 3177.3008, 2926.1726, 2699.8142,
+    2478.1697, 2263.0088, 2059.0305, 1866.9041, 1759.4436, 1646.1713, 1529.2731, 1410.6986, 1297.5624, 1185.7885,
+    1075.7563, 968.41003, 864.51721, 764.68787, 669.39166, 568.6261,  467.31378, 374.0784,  291.55334, 220.72223,
+    160.27208, 109.0079,  73.953133, 39.721516, 1832.8772, 2690.2749, 2772.9871, 2828.3188, 2840.1484, 2836.9285,
+    2711.8442, 2511.5901, 2314.8501, 2121.7727, 1935.8486, 1759.4436, 1593.079,  1493.7609, 1390.7449, 1289.4801,
+    1190.1628, 1090.6735, 991.50104, 894.14368, 799.57288, 708.38879, 620.86053, 518.82355, 427.24158, 345.59891,
+    273.30957, 208.65121, 151.62337, 104.52164, 74.773422, 40.113838, 1640.2583, 2401.6631, 2472.9675, 2530.5908,
+    2544.2102, 2539.3843, 2496.7043, 2320.4753, 2145.7014, 1974.774,  1806.7939, 1646.1713, 1493.7609, 1349.9647,
+    1262.1243, 1175.2073, 1086.6716, 997.62921, 909.06207, 821.62915, 735.92542, 653.02197, 561.79041, 470.45264,
+    388.18765, 314.60828, 249.25372, 191.61386, 141.14845, 105.65822, 75.489624, 40.448593, 1474.5443, 2153.3167,
+    2211.6484, 2258.1785, 2272.9863, 2267.095,  2245.2983, 2129.8306, 1975.4551, 1823.2893, 1674.4062, 1529.2731,
+    1390.7449, 1262.1243, 1143.4061, 1066.4246, 987.73401, 908.31421, 829.04413, 750.69647, 673.93848, 594.4339,
+    505.18051, 423.92969, 350.49826, 284.60471, 225.89371, 173.95778, 128.35512, 104.15652, 75.211189, 40.704014,
+    1320.2314, 1923.5682, 1971.3832, 2008.9121, 2024.2074, 2017.6912, 1997.7461, 1942.3022, 1806.6643, 1672.0176,
+    1539.9724, 1410.6986, 1289.4801, 1175.2073, 1066.4246, 963.35919, 893.63898, 823.0542,  752.38312, 682.31958,
+    613.47125, 529.8266,  451.40695, 379.5824,  314.45175, 255.8183,  203.41415, 156.91895, 125.86903, 102.24911,
+    73.784668, 39.906857, 1176.8837, 1711.2981, 1750.5248, 1780.8026, 1795.993,  1789.1992, 1771.0409, 1741.6445,
+    1641.3643, 1522.9102, 1406.8092, 1297.5624, 1190.1628, 1086.6716, 987.73401, 893.63898, 804.55994, 742.06683,
+    679.323,   616.94482, 542.94952, 468.94223, 400.48373, 337.65472, 280.26181, 228.42978, 181.96297, 142.4147,
+    123.38181, 100.15266, 72.220078, 39.034161, 1044.0483, 1515.4852, 1547.6572, 1572.0654, 1586.7396, 1579.9347,
+    1563.5052, 1537.5632, 1481.1416, 1381.8047, 1283.0233, 1185.7885, 1090.6735, 997.62921, 908.31421, 823.0542,
+    742.06683, 665.48138, 610.02527, 544.83606, 476.37396, 412.29703, 352.82245, 298.05792, 248.01785, 202.57388,
+    161.65331, 139.34525, 120.61987, 97.831955, 70.492912, 38.073158, 921.2619,  1335.1792, 1361.5294, 1381.1697,
+    1393.4408, 1388.4299, 1375.428,  1355.778,  1328.6294, 1248.6274, 1161.7151, 1075.7563, 991.50104, 909.06207,
+    829.04413, 752.38312, 679.323,   610.02527, 535.70306, 473.75674, 415.06741, 359.95514, 308.63498, 261.22885,
+    217.77783, 178.25468, 150.78267, 135.90138, 117.53384, 95.248413, 68.576332, 37.00983,  809.23602, 1171.4097,
+    1193.6245, 1210.509,  1221.5485, 1220.6163, 1209.7025, 1192.2618, 1168.4478, 1119.8175, 1043.8623, 968.41003,
+    894.14368, 821.62915, 750.69647, 682.31958, 616.94482, 544.83606, 473.75674, 408.93054, 358.96671, 311.90079,
+    267.9375,  227.20329, 189.75572, 158.04335, 146.51324, 131.99625, 114.06984, 92.359825, 66.441002, 35.828876,
+    709.88043, 1026.3,    1044.517,  1058.1282, 1066.7343, 1067.0275, 1057.1802, 1041.8038, 1021.0288, 995.08313,
+    930.24261, 864.51721, 799.57288, 735.92542, 673.93848, 613.47125, 542.94952, 476.37396, 415.06741, 358.96671,
+    307.93683, 268.05362, 230.68958, 195.96881, 163.95755, 152.98413, 141.67432, 127.50971, 110.12691, 89.120285,
+    64.054985, 34.513657, 617.86603, 892.26086, 907.12451, 918.03662, 924.68793, 925.80829, 917.02057, 903.57141,
+    885.57111, 863.20764, 821.45776, 764.68787, 708.38879, 653.02197, 594.4339,  529.8266,  468.94223, 412.29703,
+    359.95514, 311.90079, 268.05362, 228.28214, 196.80522, 167.47345, 155.85141, 147.21855, 136.18883, 122.44758,
+    105.65214, 85.430168, 61.377911, 33.046085, 532.87958, 768.74207, 780.78949, 789.47559, 794.56097, 795.85162,
+    788.44995, 776.79602, 761.33032, 742.2077,  717.9574,  669.39166, 620.86053, 561.79041, 505.18051, 451.40695,
+    400.48373, 352.82245, 308.63498, 267.9375,  230.68958, 196.80522, 166.16272, 155.48438, 149.05833, 140.64102,
+    129.96239, 116.72793, 100.61709, 81.281746, 58.344582, 29.269342, 454.60342, 655.20587, 664.8869,  671.73779,
+    675.57465, 676.24988, 670.74811, 660.76172, 647.61041, 631.42041, 611.66425, 568.6261,  518.82355, 470.45264,
+    423.92969, 379.5824,  337.65472, 298.05792, 261.22885, 227.20329, 195.96881, 167.47345, 155.48438, 147.53543,
+    141.2679,  133.13718, 122.89288, 110.26207, 94.947845, 76.627945, 53.001026, 25.055674, 382.71756, 551.12677,
+    558.81952, 564.15955, 567.00665, 567.24963, 563.23944, 554.79736, 538.33154, 517.84808, 494.85779, 467.31378,
+    427.24158, 388.18765, 350.49826, 314.45175, 280.26181, 248.01785, 217.77783, 189.75572, 163.95755, 155.85141,
+    149.05833, 141.2679,  132.34445, 124.5834,  114.86992, 102.95419, 88.564606, 70.65815,  44.67482,  21.143274,
+    316.90189, 455.99094, 461.48712, 463.70047, 463.13876, 459.76926, 452.99265, 441.61682, 427.79807, 411.75937,
+    393.75824, 374.0784,  345.59891, 314.60828, 284.60471, 255.8183,  228.42978, 202.57388, 178.25468, 158.04335,
+    152.98413, 147.21855, 140.64102, 133.13718, 124.5834,  114.84621, 105.77442, 94.701004, 81.38131,  58.455536,
+    37.000786, 17.530621, 247.11736, 355.04221, 358.64676, 360.19818, 359.63824, 356.94595, 352.11801, 343.37253,
+    332.76541, 320.4624,  306.65472, 291.55334, 273.30957, 249.25372, 225.89371, 203.41415, 181.96297, 161.65331,
+    150.78267, 146.51324, 141.67432, 136.18883, 129.96239, 122.89288, 114.86992, 105.77442, 95.478004, 85.391182,
+    66.203133, 47.291599, 29.966896, 14.213268, 186.80002, 268.21002, 270.78668, 271.84094, 271.33292, 269.24948,
+    265.60474, 259.37482, 251.4594,  242.28418, 231.98755, 220.72223, 208.65121, 191.61386, 173.95778, 156.91895,
+    142.4147,  139.34525, 135.90138, 131.99625, 127.50971, 122.44758, 116.72793, 110.26207, 102.95419, 94.701004,
+    85.391182, 67.933937, 51.929531, 37.135086, 23.556034, 11.184304, 135.44676, 194.36061, 196.12987, 196.81523,
+    196.3905,  194.8479,  192.19832, 187.94739, 182.27744, 175.70909, 168.33853, 160.27208, 151.62337, 141.14845,
+    128.35512, 125.86903, 123.38181, 120.61987, 117.53384, 114.06984, 110.12691, 105.65214, 100.61709, 94.947845,
+    88.564606, 81.38131,  66.203133, 51.929531, 39.04237,  27.948416, 17.746897, 8.4347544, 91.998367, 131.94092,
+    133.08028, 133.49622, 133.17256, 132.10498, 130.30128, 127.58199, 123.77497, 119.36732, 114.42181, 109.0079,
+    104.52164, 105.65822, 104.15652, 102.24911, 100.15266, 97.831955, 95.248413, 92.359825, 89.120285, 85.430168,
+    81.281746, 76.627945, 70.65815,  58.455536, 47.291599, 37.135086, 27.948416, 19.689066, 12.514842, 5.9539757,
+    55.499458, 79.554382, 80.206787, 80.430031, 80.215202, 79.560242, 78.470016, 76.923965, 74.652321, 72.104607,
+    73.055611, 73.953133, 74.773422, 75.489624, 75.211189, 73.784668, 72.220078, 70.492912, 68.576332, 66.441002,
+    64.054985, 61.377911, 58.344582, 53.001026, 44.67482,  37.000786, 29.966896, 23.556034, 17.746897, 12.514842,
+    7.8326411, 3.7300112, 25.090487, 35.948029, 36.228275, 36.317585, 36.212284, 36.477112, 37.137085, 37.807724,
+    38.330761, 38.818394, 39.285583, 39.721516, 40.113838, 40.448593, 40.704014, 39.906857, 39.034161, 38.073158,
+    37.00983,  35.828876, 34.513657, 33.046085, 29.269342, 25.055674, 21.143274, 17.530621, 14.213268, 11.184304,
+    8.4347544, 5.9539757, 3.7300112, 1.7499064};
+
+const float dequantDCx = 0.0002909539;
+const float dequantDCy = 0.0018947646;
+const float dequantDCb = 0.0036905503;
+
+const float invDequantDCx = 3436.9705;
+const float invDequantDCy = 527.77002;
+const float invDequantDCb = 270.96231;
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/kernel3/ac_tokenize.hpp b/codec/L2/include/hw/pikEnc/kernel3/ac_tokenize.hpp
new file mode 100644
index 0000000000..309dbf0723
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/kernel3/ac_tokenize.hpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ac_tokenize.hpp
+ */
+
+#ifndef _XF_CODEC_AC_TOKENIZE_HPP_
+#define _XF_CODEC_AC_TOKENIZE_HPP_
+
+#include "kernel3/kernel3_common.hpp"
+
+const int hls_kZeroDensityContextCount = 105;
+
+// For DCT 8x8 there could be up to 63 non-zero AC coefficients (and one DC
+// coefficient). To reduce the total number of contexts,
+// the values are combined in pairs, i.e. 0..63 -> 0..31.
+const uint32_t hls_kNonZeroBuckets = 32;
+
+// TODO(user): find better clustering for PIK use case.
+static const uint8_t hls_kCoeffFreqContext[64] = {
+    0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8,  8,  8,  9,  9,  9,  9,  10, 10,
+    10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14,
+    14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+};
+
+// TODO(user): find better clustering for PIK use case.
+static const uint8_t hls_kCoeffNumNonzeroContext[65] = { // 0xBAD=255,
+    255, 0,  0,  16, 16, 16, 32, 32, 32, 32, 48, 48, 48, 48, 48, 48, 64, 64, 64, 64, 64, 64,
+    64,  64, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 93, 93, 93, 93,
+    93,  93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93};
+
+const uint8_t hls_kSkipAndBitsSymbol[256] = {
+    0,   1,   2,   3,   5,   10,  17,  32,  68,  83,  84,  85,  86,  87,  88,  89,  90,  4,   7,   12,  22,  31,
+    43,  60,  91,  92,  93,  94,  95,  96,  97,  98,  99,  6,   14,  26,  36,  48,  66,  100, 101, 102, 103, 104,
+    105, 106, 107, 108, 109, 8,   19,  34,  44,  57,  78,  110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 9,
+    27,  39,  52,  61,  79,  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 11,  28,  41,  53,  64,  80,  130,
+    131, 132, 133, 134, 135, 136, 137, 138, 139, 13,  33,  46,  63,  72,  140, 141, 142, 143, 144, 145, 146, 147,
+    148, 149, 150, 15,  35,  47,  65,  69,  151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 16,  37,  51,
+    62,  74,  162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 18,  38,  50,  59,  75,  173, 174, 175, 176,
+    177, 178, 179, 180, 181, 182, 183, 20,  40,  54,  76,  82,  184, 185, 186, 187, 188, 189, 190, 191, 192, 193,
+    194, 23,  42,  55,  77,  195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 24,  45,  56,  70,  207,
+    208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 25,  49,  58,  71,  219, 220, 221, 222, 223, 224, 225,
+    226, 227, 228, 229, 230, 29,  67,  81,  231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 21,  30,
+    73,  243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
+};
+
+//----------------------------------------------------------
+void XAcc_TokenizeCoefficients5(const int32_t orders[3][64], // color dct_band
+                                hls_Rect& rect,
+                                hls::stream<dct_t> strm_coef_raster[8],
+                                uint8_t ac_static_context_map[hls_kNumContexts],
+                                hls::stream<ap_uint<13> >& strm_token_addr,
+                                hls::stream<ap_uint<24> >& strm_token_symb,
+                                hls::stream<ap_uint<24> >& strm_token_bits,
+                                hls::stream<bool>& strm_e_addr,
+                                hls::stream<bool>& strm_e_token);
+
+void XAcc_TokenizeCoefficients6(const int32_t orders[3][64], // color dct_band
+                                const group_rect rect,
+                                hls::stream<dct_t>& strm_coef_raster,
+                                uint8_t ac_static_context_map[hls_kNumContexts],
+                                hls::stream<ap_uint<13> >& strm_token_addr,
+                                hls::stream<hls_Token_symb>& strm_token_symb,
+                                hls::stream<hls_Token_bits>& strm_token_bits,
+                                hls::stream<bool>& strm_e_addr,
+                                hls::stream<bool>& strm_e_token);
+
+// for cosim
+void hls_orderblk_tokennz(const int32_t orders[3][64], // color dct_band
+                          const hls_blksize rect,
+                          hls::stream<dct_t>& strm_coef_raster,
+                          hls::stream<nzeros_t>& cnt_nz,
+                          hls::stream<hls_Token>& strm_nz_token,
+                          hls::stream<dct_t> strm_coef_orderd[64]);
+
+void hls_order_blk(const hls_blksize rect,
+                   const int32_t orders[3][64],
+                   hls::stream<dct_t>& strm_coef_raster,
+                   hls::stream<dct_t>& strm_coef_orderd);
+#endif
diff --git a/codec/L2/include/hw/pikEnc/kernel3/ans.hpp b/codec/L2/include/hw/pikEnc/kernel3/ans.hpp
new file mode 100644
index 0000000000..4e0d70a34b
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/kernel3/ans.hpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ans.hpp
+ */
+
+#ifndef _XF_CODEC_ANS_HPP_
+#define _XF_CODEC_ANS_HPP_
+
+#include "kernel3/kernel3_common.hpp"
+
+void XAcc_WriteTokens_wapper( // input
+    const int start,
+    const int end,
+    hls::stream<hls_Token_symb>& strm_ac_token_reverse,
+    hls::stream<hls_Token_bits>& strm_token_bit,
+    hls_ANSEncSymbolInfo codes[hls_kNumStaticContexts][hls_alphabet_size],
+    uint8_t context_map[hls_kNumContexts], // table
+    const bool is_dc,
+    uint8_t dc_context_map[MAX_NUM_COLOR],
+
+    // output
+    uint32_t& pos,
+    uint8_t& cnt_buffer,
+    uint16_t& reg_buffer,
+    hls::stream<uint16_t>& strm_pos_byte,
+    hls::stream<bool>& strm_ac_e);
+
+void hls_WriteTokensTop(
+
+    // input
+    const int total_token,
+    hls::stream<hls_Token_symb>& strm_ac_token_reverse,
+    hls::stream<hls_Token_bits>& strm_token_bit,
+    hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][hls_alphabet_size],
+    uint8_t ac_static_context_map[hls_kNumContexts], // table
+    const bool is_dc,
+    uint8_t dc_context_map[MAX_NUM_COLOR],
+
+    // output
+    int& len_ac,
+    hls::stream<uint16_t>& strm_ac_byte,
+    hls::stream<bool>& strm_ac_e);
+
+struct x_PosAndCount {
+    uint32_t pos;
+    uint32_t count;
+};
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/kernel3/build_cluster.hpp b/codec/L2/include/hw/pikEnc/kernel3/build_cluster.hpp
new file mode 100644
index 0000000000..f7452fc78f
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/kernel3/build_cluster.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file build_cluster.hpp
+ */
+
+#ifndef _XF_CODEC_BUILD_CLUSTER_HPP
+#define _XF_CODEC_BUILD_CLUSTER_HPP
+
+#include "kernel3/build_table_encode_histo.hpp"
+#include "kernel3/kernel3_common.hpp"
+
+void hls_ClusterHistograms_top(const hist_t hls_clustgrams[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                               int max_histograms,
+                               int& num_clusters,
+                               uint8_t max_nz_symbol[MAX_NUM_COLOR],
+                               hist_t hls_clustgrams_out[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                               uint8_t dc_context_map[MAX_NUM_COLOR]);
+
+float hls_ANSPopulationCost(const hist_t* data, int alphabet_size, int total_count);
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/kernel3/build_table_encode_histo.hpp b/codec/L2/include/hw/pikEnc/kernel3/build_table_encode_histo.hpp
new file mode 100644
index 0000000000..63a951ab35
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/kernel3/build_table_encode_histo.hpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file build_table_encode_histo.hpp
+ */
+
+#ifndef _XF_CODEC_BUILD_TABLE_ENCODE_HISTO_HPP_
+#define _XF_CODEC_BUILD_TABLE_ENCODE_HISTO_HPP_
+
+#include "kernel3/build_cluster.hpp"
+#include "kernel3/kernel3_common.hpp"
+
+// Static Huffman code for encoding logcounts. The last symbol is used as RLE
+// sequence.
+static const uint8_t hls_kLogCountBitLengths[hls_ANS_LOG_TAB_SIZE + 2] = {
+    5, 4, 4, 4, 3, 3, 2, 3, 3, 6, 7, 7,
+};
+static const uint16_t hls_kLogCountSymbols[hls_ANS_LOG_TAB_SIZE + 2] = {
+    15, 3, 11, 7, 2, 6, 0, 1, 5, 31, 63, 127,
+};
+
+void XAcc_BuildAndStore_top(uint32_t histogram[MAX_ALPHABET_SIZE],
+                            const uint16_t alphabet_size,
+                            hls_ANSEncSymbolInfo ans_table[MAX_ALPHABET_SIZE],
+                            int* pos, // tmp cache int64
+                            hls::stream<uint8_t>& strm_histo,
+                            uint8_t& tail_bits);
+
+void XAcc_EncodeHistogramsFast_top(const bool is_dc,
+                                   uint8_t dc_context_map[MAX_NUM_COLOR],
+                                   hls::stream<ap_uint<13> >& strm_token_addr,
+                                   hls::stream<bool>& strm_e_addr,
+                                   hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][hls_alphabet_size],
+                                   hist_t hls_histograms[hls_NumHistograms],
+                                   int pos,
+                                   int& len_histo,
+                                   hls::stream<uint8_t>& strm_histo_byte,
+                                   hls::stream<bool>& strm_histo_e);
+
+void ADD_FP_strm(const int num_in,
+                 hls::stream<float>& strm_in, // max 256 input
+                 hls::stream<float>& strm_sum);
+
+void build_historgram(hls::stream<ap_uint<13> >& strm_token_addr,
+                      hls::stream<bool>& strm_e_addr,
+                      hist_t total[hls_kMinClustersForHistogramRemap],
+                      hist_t hls_histograms[hls_NumHistograms]);
+
+void build_historgram_syn(hls::stream<ap_uint<13> >& strm_token_addr,
+                          hls::stream<bool>& strm_e_addr,
+                          hist_t total[hls_kMinClustersForHistogramRemap],
+                          hist_t hls_histograms[hls_NumHistograms],
+                          hist_t hls_histograms2[hls_kNumStaticContexts][MAX_ALPHABET_SIZE]);
+
+void hls_EncodeContextMap(const uint8_t context_map[MAX_NUM_COLOR],
+                          const int num_histograms,
+                          int& num,
+                          hls::stream<nbits_t>& strm_nbits,
+                          hls::stream<uint16_t>& strm_bits);
+
+void hls_build_and_encode_top(const bool is_dc,
+                              uint32_t histogram[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                              const uint16_t alphabet_size,
+                              const int cluster_size,
+                              const uint16_t alphabet_size_dc[MAX_NUM_COLOR],
+                              hls_ANSEncSymbolInfo ans_table[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                              int& pos, // tmp cache int64
+                              const bool do_encode,
+                              hls::stream<uint8_t>& strm_histo,
+                              hls::stream<bool>& strm_histo_e,
+                              uint8_t& tail_bits);
+
+void hls_encode_histo_context(const bool is_dc,
+                              const hist_t hls_histograms2[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                              hls_ANSEncSymbolInfo hls_codes[hls_kNumStaticContexts][MAX_ALPHABET_SIZE],
+                              uint8_t dc_context_map[MAX_NUM_COLOR],
+                              int& pos,
+                              hls::stream<uint8_t>& strm_histo_byte,
+                              hls::stream<bool>& strm_histo_e);
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/kernel3/ctrl_tokenize.hpp b/codec/L2/include/hw/pikEnc/kernel3/ctrl_tokenize.hpp
new file mode 100644
index 0000000000..45af753bd4
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/kernel3/ctrl_tokenize.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ctrl_tokenize.hpp
+ */
+
+#ifndef _XF_CODEC_CTRL_TOKENIZE_HPP_
+#define _XF_CODEC_CTRL_TOKENIZE_HPP_
+
+#include "kernel3/kernel3_common.hpp"
+
+void Xacc_TokenizeCtrlField_top(hls_Rect dc_rect,
+                                hls::stream<uint8_t>& strm_strategy,
+                                hls::stream<quant_t>& strm_quant_field,
+                                hls::stream<arsigma_t>& strm_arsigma,
+                                hls::stream<bool>& strm_strategy_block0,
+                                hls::stream<bool>& strm_strategy_block1,
+                                hls::stream<bool>& strm_strategy_block2,
+
+                                hls::stream<addr_t>& strm_token_ct_addr,
+                                hls::stream<hls_Token_symb>& strm_token_symb,
+                                hls::stream<hls_Token_bits>& strm_token_bits,
+                                hls::stream<bool>& strm_e_ct_addr,
+                                hls::stream<bool>& strm_e_ctrl);
+#endif
diff --git a/codec/L2/include/hw/pikEnc/kernel3/dc_shrink.hpp b/codec/L2/include/hw/pikEnc/kernel3/dc_shrink.hpp
new file mode 100644
index 0000000000..58d1c2feeb
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/kernel3/dc_shrink.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file dc_shrink.hpp
+ */
+
+#ifndef _XF_CODEC_DC_SHRINK_HPP_
+#define _XF_CODEC_DC_SHRINK_HPP_
+
+#include "kernel3/kernel3_common.hpp"
+
+void hls_shink_Y_adaptive(const int xsize,
+                          const bool is_by1,
+                          const int m_idx, // 0,1,2,0,1,2
+                          dct_t line3_y[3][MAX_NUM_BLOCK88_W],
+                          hls::stream<dct_t>& strm_dc_y,
+                          hls::stream<dct_t>& strm_dc_residuals);
+
+void hls_shink_fixed(const int xsize,
+                     hls::stream<dct_t>& strm_dc_y,
+                     dct_t line3_y[3][MAX_NUM_BLOCK88_W],
+                     hls::stream<dct_t>& strm_dc_residuals);
+
+void hls_shink_XB_adaptive(const int xsize,
+                           const bool is_by1,
+                           const bool ym_idx, // 0,1,0,1
+                           const int m_idx,   // 0,1,2,0,1,2
+                           dct_t line2_y[2][MAX_NUM_BLOCK88_W],
+                           dct_t line3_xb[3][MAX_NUM_BLOCK88_W],
+                           hls::stream<dct_t>& strm_dc_y,
+                           hls::stream<dct_t>& strm_dc_xb,
+                           hls::stream<dct_t>& strm_dc_residuals);
+
+void hls_shink_xb_fixed(const int xsize,
+                        hls::stream<dct_t>& strm_dc_y,
+                        hls::stream<dct_t>& strm_dc_xb,
+                        dct_t line2_y[2][MAX_NUM_BLOCK88_W],
+                        dct_t line3_xb[3][MAX_NUM_BLOCK88_W],
+                        hls::stream<dct_t>& strm_dc_residuals);
+
+void hls_ShrinkDC_top(const hls_Rect rect_dc,
+                      hls::stream<dct_t>& strm_dc_y1,
+                      hls::stream<dct_t>& strm_dc_y2,
+                      hls::stream<dct_t>& strm_dc_y3,
+                      hls::stream<dct_t>& strm_dc_x,
+                      hls::stream<dct_t>& strm_dc_b,
+                      hls::stream<dct_t>& strm_dc_residuals);
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/kernel3/dc_tokenize.hpp b/codec/L2/include/hw/pikEnc/kernel3/dc_tokenize.hpp
new file mode 100644
index 0000000000..fb88cb9f16
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/kernel3/dc_tokenize.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file dc_tokenize.hpp
+ */
+
+#ifndef _XF_CODEC_DC_TOKENIZE_HPP_
+#define _XF_CODEC_DC_TOKENIZE_HPP_
+
+#include "kernel3/dc_shrink.hpp"
+#include "kernel3/kernel3_common.hpp"
+
+void hls_encode_dc_top(const bool rle,
+                       const hls_Rect rect_dc,
+                       hls::stream<dct_t>& strm_dc_y1,
+                       hls::stream<dct_t>& strm_dc_y2,
+                       hls::stream<dct_t>& strm_dc_y3,
+                       hls::stream<dct_t>& strm_dc_x,
+                       hls::stream<dct_t>& strm_dc_b,
+                       hls::stream<addr_t>& strm_token_addr,
+                       hls::stream<hls_Token_symb>& strm_token_symb,
+                       hls::stream<hls_Token_bits>& strm_token_bits,
+                       hls::stream<bool>& strm_e_addr,
+                       hls::stream<bool>& strm_e_dc);
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/kernel3/encode_order.hpp b/codec/L2/include/hw/pikEnc/kernel3/encode_order.hpp
new file mode 100644
index 0000000000..e69f9744ba
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/kernel3/encode_order.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file encode_order.hpp
+ */
+
+#ifndef _XF_CODEC_ENCODE_ORDER_HPP_
+#define _XF_CODEC_ENCODE_ORDER_HPP_
+
+#include "kernel3/kernel3_common.hpp"
+
+// Size of batch of Lehmer-transformed order of coefficients.
+// If all codes in the batch are zero, then span is encoded with a single bit.
+#define hls_kCoeffOrderCodeSpan (16)
+
+void hls_EncodeCoeffOrder(hls::stream<int>& strm_order,
+                          int& num_bits,
+                          int& num,
+                          hls::stream<nbits_t>& strm_nbits,
+                          hls::stream<uint16_t>& strm_bits);
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/kernel3/kernel3_common.hpp b/codec/L2/include/hw/pikEnc/kernel3/kernel3_common.hpp
new file mode 100644
index 0000000000..12bd6d4dd0
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/kernel3/kernel3_common.hpp
@@ -0,0 +1,210 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file kernel3_common.hpp
+ * @brief JPEG XL codec common include struct.
+ */
+
+#ifndef __cplusplus
+#error " pik_codec_common.hpp hls::stream<> interface, and thus requires C++"
+#endif
+
+#ifndef _XF_CODEC_KERNEL3_COMMON_HPP_
+#define _XF_CODEC_KERNEL3_COMMON_HPP_
+
+#include <ap_int.h>
+#include <hls_math.h>
+#include <hls_stream.h>
+#include <stdint.h>
+
+#include "pik_common.hpp"
+
+#ifndef __SYNTHESIS__
+// For debug
+#include <bitset>
+#include <cstdio>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#endif
+
+// ------------------------------------------------------------
+// for dc Predictor
+// ------------------------------------------------------------
+
+#define hls_kNumPredictors (8)
+// ------------------------------------------------------------
+// for histogram
+// ------------------------------------------------------------
+
+typedef ap_uint<13> addr_t;
+
+// for tokenize type
+typedef int16_t dct_t;
+typedef uint8_t nzeros_t;
+
+struct hls_strategy {
+    uint8_t strategy_;
+    bool block_;
+};
+typedef int16_t quant_t;
+typedef uint8_t arsigma_t;
+
+struct hls_Token {
+    uint16_t context; // 0~411
+    uint8_t symbol;
+    uint8_t nbits;
+    uint16_t bits;
+};
+
+#define hls_kAcStrategyContexts (1)
+#define hls_kQuantFieldContexts (1)
+#define hls_kARParamsContexts (1)
+#define hls_QuantContext (2)
+
+static const int hls_kMaxNumSymbolsForSmallCode = 4;
+
+#define hls_PackSigned(value) ((uint16_t)value << 1) ^ (((uint16_t)(~value) >> 15) - 1)
+
+// to be remove
+#define hls_Log2FloorNonZero_16b(n) 15 ^ __builtin_clz((uint16_t)n)
+#define hls_Log2FloorNonZero_32b(n) 31 ^ __builtin_clz((uint32_t)n)
+#define hls_Log2FloorNonZero_64b(n) 63 ^ __builtin_clz((uint64_t)n)
+#define hls_Log2Floor_32b(n) n == 0 ? -1 : (31 ^ __builtin_clz((uint32_t)n))
+
+#define hls_PackSigned_32b(value) ((uint32_t)value << 1) ^ (((uint32_t)(~value) >> 31) - 1)
+#define hls_PackSigned_16b(value) ((uint16_t)value << 1) ^ (((uint16_t)(~value) >> 15) - 1)
+
+typedef uint32_t hist_t;
+typedef uint8_t nbits_t;
+
+// for encode
+const int32_t hls_kNaturalCoeffOrder8[8 * 8] = {0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
+                                                12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
+                                                35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+                                                58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};
+
+const int32_t hls_kNaturalCoeffOrderLut8[8 * 8] = {0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42,
+                                                   3,  8,  12, 17, 25, 30, 41, 43, 9,  11, 18, 24, 31, 40, 44, 53,
+                                                   10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
+                                                   21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63};
+
+// ------------------------------------------------------------
+// for debug  population
+// ------------------------------------------------------------
+#define _XF_IMAGE_VOID_CAST static_cast<void>
+#ifndef __SYNTHESIS__
+#define _XF_IMAGE_PRINT(msg...) \
+    do {                        \
+        printf(msg);            \
+    } while (0)
+#else
+#define _XF_IMAGE_PRINT(msg...) (_XF_IMAGE_VOID_CAST(0))
+#endif
+
+// ------------------------------------------------------------
+// for noise
+// ------------------------------------------------------------
+const int hls_kMaxNoiseSize = 16;
+
+// ------------------------------------------------------------
+// for encode
+// ------------------------------------------------------------
+
+#define hls_kTileDimInBlocks (8)
+
+struct hls_Rect { // to be removed
+    int x0;
+    int y0;
+    uint16_t xsize;
+    uint16_t ysize;
+    uint8_t xsize_tiles;
+    uint8_t ysize_tiles;
+    uint8_t xsize_blocks; // use as const
+    uint8_t ysize_blocks;
+    uint8_t n_tiles;
+};
+
+struct group_rect {
+    uint8_t xsize_tiles;
+    uint8_t ysize_tiles;
+    uint8_t xsize_blocks; // use as const
+    uint8_t ysize_blocks;
+};
+
+struct hls_blksize {
+    uint8_t xsize;
+    uint8_t ysize;
+};
+
+struct hls_Token_symb {
+    uint16_t context;
+    uint8_t symbol;
+};
+
+struct hls_ANSEncSymbolInfo {
+    uint16_t freq_;
+    uint16_t start_;
+
+    uint64_t ifreq_;
+};
+
+struct hls_TokenInfo {
+    hls_ANSEncSymbolInfo info;
+};
+
+struct hls_Token_bits {
+    uint16_t bits;
+    uint8_t nbits;
+};
+
+typedef uint64_t hls_Runbit_t;
+typedef uint32_t hls_Runbit_t2;
+
+#define hls_RECIPROCAL_PRECISION 42
+const int hls_kMaxClusters = 256;
+const uint16_t hls_alphabet_size = 256;
+#define MAX_ALPHABET_SIZE 256
+
+void hls_WriteBits_strm(const nbits_t n_bits,
+                        uint16_t bits,
+                        int& num_bits,
+                        int& num,
+                        hls::stream<nbits_t>& strm_nbits,
+                        hls::stream<uint16_t>& strm_bits);
+
+void hls_WriteBits_strm_nodepend(const nbits_t n_bits,
+                                 uint16_t bits,
+                                 hls::stream<nbits_t>& strm_nbits,
+                                 hls::stream<uint16_t>& strm_bits);
+
+void hls_StoreVarLenUint16(
+    uint32_t n, int& num_bits, int& num, hls::stream<nbits_t>& strm_nbits, hls::stream<uint16_t>& strm_bits);
+
+float hls_FastLog2(int v);
+
+void hls_WriteBitToStream(const int num_pair,
+                          uint8_t& byte_tail,
+                          hls::stream<nbits_t>& strm_nbits,
+                          hls::stream<uint16_t>& strm_bits,
+                          int& pos,
+                          hls::stream<uint8_t>& strm_byte,
+                          hls::stream<bool>& strm_histo_e);
+
+void hls_WriteZeroesToByteBoundary(int* pos);
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/pik_common.hpp b/codec/L2/include/hw/pikEnc/pik_common.hpp
new file mode 100644
index 0000000000..759563a244
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/pik_common.hpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file pik_common.hpp
+ */
+
+#ifndef _XF_CODEC_PIK_COMMON_HPP_
+#define _XF_CODEC_PIK_COMMON_HPP_
+
+#include <ap_int.h>
+#include <hls_math.h>
+#include <hls_stream.h>
+
+#define MAX_EXPONENT_PIX (11)
+#define MAX_NUM_COLOR 3
+#define MAX_NUM_BLOCK88_W (1024)
+#define MAX_NUM_BLOCK88_H (1024)
+#define MAX_NUM_BLOCK88 (MAX_NUM_BLOCK88_W * MAX_NUM_BLOCK88_H)
+
+#define MAX_NUM_PIXEL (8192 * 8192)
+#define ALL_PIXEL (MAX_NUM_PIXEL * MAX_NUM_COLOR)
+
+#define MAX_NUM_BLOCK88_W_TITLE (8)
+#define MAX_NUM_BLOCK88_H_TITLE (8)
+
+#define MAX_PIX_W (MAX_NUM_BLOCK88_W << 3)
+#define MAX_PIX_H (MAX_NUM_BLOCK88_H << 3)
+#define MAX_NUM_PIX (MAX_NUM_BLOCK88 * MAX_NUM_COLOR * 64)
+#define MAX_NUM_COEF (MAX_NUM_BLOCK88 * MAX_NUM_COLOR * 64)
+#define MAX_SIZE_COEF (MAX_NUM_COEF * 2)
+
+#define BLKDIM (8)
+#define BLOCK_SIZE (64)
+#define hls_kDcGroupDimInBlocks (256)
+#define hls_kGroupDim (512)
+#define hls_kTileDim (64)
+#define hls_kNumPredictors (8)
+
+#define DCGROUP_SIZE (hls_kDcGroupDimInBlocks * hls_kDcGroupDimInBlocks)
+#define ACGROUP_SIZE (hls_kGroupDim * hls_kGroupDim)
+#define TILE_SIZE (hls_kTileDim * hls_kTileDim)
+
+#define MAX_AC_GROUP (256)
+#define MAX_DC_GROUP (16)
+
+#define AXI_SZ (32)
+#define AXI_WIDTH (AXI_SZ * 2)
+#define BURST_LENTH (32)
+#define DT_SZ (4)
+typedef ap_int<8 * DT_SZ> DT;
+
+#define BLKDIM (8)
+#define MAX_EXPONENT_PIX (11)
+
+#define DIVCEIL(a, b) ((a + b - 1) / b)
+#define XBLOCKS_32X32 DIVCEIL(MAX_NUM_BLOCK88_W, 4)
+#define YBLOCKS_32X32 DIVCEIL(MAX_NUM_BLOCK88_H, 4)
+
+#define ELEM_SPACE (MAX_NUM_BLOCK88_W * MAX_NUM_BLOCK88_H * 8 * 8)
+#define BUF_DEPTH ALL_PIXEL
+
+#define AXI_OUT (MAX_NUM_COLOR * XBLOCKS_32X32 * YBLOCKS_32X32 * 32 * 32)
+#define AXI_CMAP (MAX_NUM_BLOCK88 / 64 * 2 + 2)
+#define AXI_QF (MAX_NUM_BLOCK88 + 2)
+
+#define MAX_NUM_CONFIG (32)
+#define MAX_NUM_DC (MAX_NUM_BLOCK88 * MAX_NUM_COLOR)
+#define MAX_NUM_AC AXI_OUT
+
+#define XGROUPS_512X512 DIVCEIL(MAX_NUM_BLOCK88_W, 64)
+#define YGROUPS_512X512 DIVCEIL(MAX_NUM_BLOCK88_H, 64)
+#define MAX_NUM_ORDER XGROUPS_512X512* YGROUPS_512X512* MAX_NUM_COLOR * 64
+#define MAX_NUM_GROUP 256
+
+#define hls_kHybridEncodingSplitToken (16)
+#define hls_kHybridEncodingDirectSplitExponent (4)
+
+#define hls_kRleSymStart (39)
+#define hls_kEntropyCodingNumSymbols (78)
+
+#define hls_kANSBufferSize (1 << 16)
+#define hls_kMaxBufSize (3 << 16)
+#define hls_kTokenMaxSize 24576
+#define hls_kTotalSize 49152
+
+#define hls_ANS_LOG_TAB_SIZE (10)
+#define hls_ANS_TAB_SIZE (1 << hls_ANS_LOG_TAB_SIZE)
+#define hls_ANS_TAB_MASK (hls_ANS_TAB_SIZE - 1)
+#define hls_ANS_SIGNATURE (0x13) // Initial state, used as CRC.
+
+#define hls_MAX_ALPHABET_SIZE 256
+#define hls_kAlphabetSize (272)
+
+#define hls_kOrderContexts 3
+
+#define hls_kClustersLimit 64
+#define hls_kNumStaticZdensContexts 7
+#define hls_kNumStaticOrderFreeContexts 3
+#define hls_kNumStaticContexts 24
+#define hls_kMinClustersForHistogramRemap (24)
+#define hls_NumHistograms 6144
+
+#define hls_kNumContexts 411
+#define MAX_DC_SIZE (4 * (2 * hls_kTotalSize) + 4096)
+#define MAX_DC_HISTO_SIZE (1024 * (MAX_NUM_COLOR + 4))
+#define MAX_AC_SIZE (4 * ((4 * hls_kTotalSize)) + 4096)
+#define MAX_AC_HISTO_SIZE (hls_kNumStaticContexts * 1024)
+
+template <typename I, typename F>
+inline F bitsToF(I in) {
+    union {
+        I __I;
+        F __F;
+    } __T;
+    __T.__I = in;
+    return __T.__F;
+}
+
+template <typename F, typename I>
+inline I fToBits(F in) {
+    union {
+        I __I;
+        F __F;
+    } __T;
+    __T.__F = in;
+    return __T.__I;
+}
+
+#endif
diff --git a/codec/L2/include/hw/pikEnc/resize_mem.hpp b/codec/L2/include/hw/pikEnc/resize_mem.hpp
new file mode 100644
index 0000000000..b9d19e9923
--- /dev/null
+++ b/codec/L2/include/hw/pikEnc/resize_mem.hpp
@@ -0,0 +1,817 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file resize_mem.hpp
+ */
+
+#ifndef _XF_CODEC_RESIZE_MEM_HPP_
+#define _XF_CODEC_RESIZE_MEM_HPP_
+
+#include "ap_int.h"
+#include <assert.h>
+
+typedef ap_uint<32> HLS_SIZE_T;
+typedef ap_uint<5> HLS_CHANNEL_T;
+
+/* Template class of Window */
+template <int ROWS, int COLS, typename T>
+class Window {
+   public:
+    Window(){
+#pragma HLS ARRAY_PARTITION variable = val dim = 1 complete
+#pragma HLS ARRAY_PARTITION variable = val dim = 2 complete
+    };
+
+    /* Window main APIs */
+    void shift_pixels_left();
+    void shift_pixels_right();
+    void shift_pixels_up();
+    void shift_pixels_down();
+    void shift_diagonal();
+    void insert_pixel(T value, int row, int col);
+    void insert_row(T value[COLS], int row);
+    void insert_top_row(T value[COLS]);
+    void insert_bottom_row(T value[COLS]);
+    void insert_col(T value[ROWS], int col);
+    void insert_left_col(T value[ROWS]);
+    void insert_right_col(T value[ROWS]);
+    void copy_one_row(int row1, int row2);
+    void copy_one_col(int col1, int col2);
+
+    T& getval(int row, int col);
+    T& operator()(int row, int col);
+
+    /* Back compatible APIs */
+    void shift_left();
+    void shift_right();
+    void shift_up();
+    void shift_down();
+    void insert(T value, int row, int col);
+    void insert_top(T value[COLS]);
+    void insert_bottom(T value[COLS]);
+    void insert_left(T value[ROWS]);
+    void insert_right(T value[ROWS]);
+    // T& getval(int row, int col);
+    // T& operator ()(int row, int col);
+
+    T val[ROWS][COLS];
+#ifndef __SYNTHESIS__
+    void restore_val();
+    void window_print();
+    T val_t[ROWS][COLS];
+#endif
+};
+
+/* Member functions of Window class */
+/* Origin in upper-left point */
+/*       0   1        C-2 C-1
+ *     +---+---+-...-+---+---+
+ *  0  |   |   |     |   |   |
+ *     +---+---+-...-+---+---+
+ *  1  |   |   |     |   |   |
+ *     +---+---+-...-+---+---+
+ *       ...     ...    ...
+ *     +---+---+-...-+---+---+
+ * R-2 |   |   |     |   |   |
+ *     +---+---+-...-+---+---+
+ * R-1 |   |   |     |   |   |
+ *     +---+---+-...-+---+---+
+ *
+ */
+
+/*
+ * Window content shift left
+ * Assumes new values will be placed in right column = COLS-1
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_pixels_left() {
+#pragma HLS inline
+
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        for (j = 0; j < COLS - 1; j++) {
+#pragma HLS unroll
+            val[i][j] = val[i][j + 1];
+        }
+    }
+}
+
+/*
+ * Window content shift right
+ * Assumes new values will be placed in left column = 0
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_pixels_right() {
+#pragma HLS inline
+
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        for (j = COLS - 1; j > 0; j--) {
+#pragma HLS unroll
+            val[i][j] = val[i][j - 1];
+        }
+    }
+}
+
+/*
+ * Window content shift up
+ * Assumes new values will be placed in bottom row = ROWS-1
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_pixels_up() {
+#pragma HLS inline
+
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS - 1; i++) {
+#pragma HLS unroll
+        for (j = 0; j < COLS; j++) {
+#pragma HLS unroll
+            val[i][j] = val[i + 1][j];
+        }
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::copy_one_col(int col1, int col2) {
+#pragma HLS inline
+    assert(col1 >= 0 && col1 < COLS && col2 >= 0 && col2 < COLS);
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        val[i][col1] = val[i][col2];
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::copy_one_row(int row1, int row2) {
+#pragma HLS inline
+    assert(row1 >= 0 && row1 < ROWS && row2 >= 0 && row2 < ROWS);
+    HLS_SIZE_T j;
+    for (j = 0; j < COLS; j++) {
+        val[row1][j] = val[row2][j];
+    }
+}
+
+/*
+ * Window content shift down
+ * Assumes new values will be placed in top row = 0
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_pixels_down() {
+#pragma HLS inline
+
+    HLS_SIZE_T i, j;
+    for (i = ROWS - 1; i > 0; i--) {
+#pragma HLS unroll
+        for (j = 0; j < COLS; j++) {
+#pragma HLS unroll
+            val[i][j] = val[i - 1][j];
+        }
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_diagonal() {
+#pragma HLS inline off
+    assert(ROWS == COLS);
+    HLS_SIZE_T i, j;
+
+    T tmp;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        for (j = 0; j < COLS; j++) {
+#pragma HLS unroll
+            if (i < j) {
+                tmp = val[j][i];
+                val[j][i] = val[i][j];
+                val[i][j] = tmp;
+            }
+        }
+    }
+}
+
+/* Window insert pixel
+ * Inserts a new value at any location of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_pixel(T value, int row, int col) {
+#pragma HLS inline
+    assert(row >= 0 && row < ROWS && col >= 0 && col < COLS);
+
+    val[row][col] = value;
+}
+
+/* Window insert row
+ * Inserts a set of values in any row of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_row(T value[COLS], int row) {
+#pragma HLS inline
+
+    HLS_SIZE_T j;
+    for (j = 0; j < COLS; j++) {
+#pragma HLS unroll
+        val[row][j] = value[j];
+    }
+}
+
+/* Window insert top row
+ * Inserts a set of values in top row = 0 of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_top_row(T value[COLS]) {
+#pragma HLS inline
+
+    insert_row(value, 0);
+}
+
+/* Window insert bottom row
+ * Inserts a set of values in bottom row = ROWS-1 of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_bottom_row(T value[COLS]) {
+#pragma HLS inline
+
+    insert_row(value, ROWS - 1);
+}
+
+/* Window insert column
+ * Inserts a set of values in any column of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_col(T value[ROWS], int col) {
+#pragma HLS inline
+
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        val[i][col] = value[i];
+    }
+}
+
+/* Window insert left column
+ * Inserts a set of values in left column = 0 of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_left_col(T value[ROWS]) {
+#pragma HLS inline
+
+    insert_col(value, 0);
+}
+
+/* Window insert right column
+ * Inserts a set of values in right column = COLS-1 of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_right_col(T value[ROWS]) {
+#pragma HLS inline
+
+    insert_col(value, COLS - 1);
+}
+
+/* Window getval
+ * Returns the data value in the window at position (row,col)
+ */
+template <int ROWS, int COLS, typename T>
+T& Window<ROWS, COLS, T>::getval(int row, int col) {
+#pragma HLS inline
+    assert(row >= 0 && row < ROWS && col >= 0 && col < COLS);
+    return val[row][col];
+}
+
+/* Window getval
+ * Returns the data value in the window at position (row,col)
+ */
+template <int ROWS, int COLS, typename T>
+T& Window<ROWS, COLS, T>::operator()(int row, int col) {
+#pragma HLS inline
+    return getval(row, col);
+}
+
+#ifndef __SYNTHESIS__
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::restore_val() {
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+        for (j = 0; j < COLS; j++) {
+            val_t[i][j] = val[i][j];
+        }
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::window_print() {
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+        for (j = 0; j < COLS; j++) {
+            std::cout << std::setw(5) << val[i][j];
+        }
+    }
+    std::cout << "\n";
+}
+#endif
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window shift left, while contents shift right
+ * Assumes new values will be placed in left column(=COLS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_left() {
+#pragma HLS inline
+    shift_pixels_left(); // take upper-left point as origin
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window shift right, while contents shift left
+ * Assumes new values will be placed in right column(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_right() {
+#pragma HLS inline
+    shift_pixels_right(); // take upper-left point as origin
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window shift up, while contents shift down
+ * Assumes new values will be placed in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_up() {
+#pragma HLS inline
+    shift_pixels_up(); // take upper-left point as origin
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window shift down, while contents shift up
+ * Assumes new values will be placed in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_down() {
+#pragma HLS inline
+    shift_pixels_down(); // take upper-left point as origin
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert
+ * Inserts a new value at any location of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert(T value, int row, int col) {
+#pragma HLS inline
+    insert_pixel(value, row, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert top
+ * Inserts a set of values in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_top(T value[COLS]) {
+#pragma HLS inline
+    insert_bottom_row(value);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert bottom
+ * Inserts a set of values in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_bottom(T value[COLS]) {
+#pragma HLS inline
+    insert_top_row(value);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert left
+ * Inserts a set of values in left column(=COLS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_left(T value[ROWS]) {
+#pragma HLS inline
+    insert_right_col(value);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert right
+ * Inserts a set of values in right column(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_right(T value[ROWS]) {
+#pragma HLS inline
+    insert_left_col(value);
+}
+
+/* Template class of Line Buffer */
+template <int ROWS, int COLS, typename T, int RESHAPE = 0>
+class LineBuffer;
+
+template <int ROWS, int COLS, typename T>
+class LineBuffer<ROWS, COLS, T, 0> {
+   public:
+    LineBuffer(){
+#pragma HLS array_partition variable = val dim = 1 complete
+#pragma HLS dependence variable = val inter false
+#pragma HLS dependence variable = val intra false
+    };
+    /* LineBuffer main APIs */
+    void shift_pixels_up(int col);
+    void shift_pixels_down(int col);
+    void insert_bottom_row(T value, int col);
+    void insert_top_row(T value, int col);
+    void get_col(T value[ROWS], int col);
+    T& getval(int row, int col);
+    T& operator()(int row, int col);
+
+    /* Back compatible APIs */
+    void shift_up(int col);
+    void shift_down(int col);
+    void insert_bottom(T value, int col);
+    void insert_top(T value, int col);
+    // T& getval(int row, int col);
+    // T& operator ()(int row, int col);
+
+    T val[ROWS][COLS];
+};
+
+/* Member functions of LineBuffer class */
+/* Origin in upper-left point */
+/*       0   1            C-2 C-1
+ *     +---+---+-... ...-+---+---+
+ *  0  |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *  1  |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *       ...     ... ...    ...
+ *     +---+---+-... ...-+---+---+
+ * R-2 |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ * R-1 |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *
+ */
+
+/* Member functions of LineBuffer class */
+
+/*
+ * LineBuffer content shift down
+ * Assumes new values will be placed in top row = 0
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::shift_pixels_down(int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+    HLS_SIZE_T i;
+    for (i = ROWS - 1; i > 0; i--) {
+#pragma HLS unroll
+        val[i][col] = val[i - 1][col];
+    }
+}
+
+/*
+ * LineBuffer content shift up
+ * Assumes new values will be placed in top row = ROWS-1
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::shift_pixels_up(int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS - 1; i++) {
+#pragma HLS unroll
+        val[i][col] = val[i + 1][col];
+    }
+}
+
+/* LineBuffer insert bottom row
+ * Inserts a new value in bottom row= ROWS-1 of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::insert_bottom_row(T value, int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    val[ROWS - 1][col] = value;
+}
+
+/* LineBuffer insert top row
+ * Inserts a new value in top row=0 of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::insert_top_row(T value, int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    val[0][col] = value;
+}
+
+/* LineBuffer get a column
+ * Get a column value of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::get_col(T value[ROWS], int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        value[i] = val[i][col];
+    }
+}
+
+/* Line buffer getval
+ * Returns the data value in the line buffer at position row, col
+ */
+template <int ROWS, int COLS, typename T>
+T& LineBuffer<ROWS, COLS, T>::getval(int row, int col) {
+#pragma HLS inline
+    assert(row >= 0 && row < ROWS && col >= 0 && col < COLS);
+    return val[row][col];
+}
+
+/* Line buffer getval
+ * Returns the data value in the line buffer at position row, col
+ */
+template <int ROWS, int COLS, typename T>
+T& LineBuffer<ROWS, COLS, T>::operator()(int row, int col) {
+#pragma HLS inline
+    return getval(row, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer shift down, while contents shift up
+ * Assumes new values will be placed in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::shift_down(int col) {
+#pragma HLS inline
+    shift_pixels_down(col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer shift up, while contents shift down
+ * Assumes new values will be placed in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::shift_up(int col) {
+#pragma HLS inline
+    shift_pixels_up(col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer insert
+ * Inserts a new value in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::insert_bottom(T value, int col) {
+#pragma HLS inline
+    insert_top_row(value, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer insert
+ * Inserts a new value in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::insert_top(T value, int col) {
+#pragma HLS inline
+    insert_bottom_row(value, col);
+}
+
+template <int ROWS, int COLS, typename T>
+class LineBuffer<ROWS, COLS, T, 1> {
+   public:
+    LineBuffer(){
+#pragma HLS RESOURCE variable = val core = XPM_MEMORY uram
+#pragma HLS array_reshape variable = val dim = 1
+#pragma HLS dependence variable = val inter false
+#pragma HLS dependence variable = val intra false
+    };
+    /* LineBuffer main APIs */
+    void shift_pixels_up(int col);
+    void shift_pixels_down(int col);
+    void insert_bottom_row(T value, int col);
+    void insert_top_row(T value, int col);
+    void get_col(T value[ROWS], int col);
+    T& getval(int row, int col);
+    T& operator()(int row, int col);
+
+    /* Back compatible APIs */
+    void shift_up(int col);
+    void shift_down(int col);
+    void insert_bottom(T value, int col);
+    void insert_top(T value, int col);
+    // T& getval(int row, int col);
+    // T& operator ()(int row, int col);
+
+    T val[ROWS][COLS];
+#ifndef __SYNTHESIS__
+    void restore_val();
+    void linebuffer_print(int col);
+    T val_t[ROWS][COLS];
+#endif
+};
+
+/* Member functions of LineBuffer_reshape class */
+
+/* Origin in upper-left point */
+/*       0   1            C-2 C-1
+ *     +---+---+-... ...-+---+---+
+ *  0  |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *  1  |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *       ...     ... ...    ...
+ *     +---+---+-... ...-+---+---+
+ * R-2 |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ * R-1 |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *
+ */
+
+/* Member functions of LineBuffer_reshape class */
+
+/*
+ * LineBuffer content shift down
+ * Assumes new values will be placed in top row = 0
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::shift_pixels_down(int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    HLS_SIZE_T i;
+    for (i = ROWS - 1; i > 0; i--) {
+#pragma HLS unroll
+        val[i][col] = val[i - 1][col];
+    }
+}
+
+/*
+ * LineBuffer content shift up
+ * Assumes new values will be placed in top row = ROWS-1
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::shift_pixels_up(int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS - 1; i++) {
+#pragma HLS unroll
+        val[i][col] = val[i + 1][col];
+    }
+}
+
+/* LineBuffer insert bottom row
+ * Inserts a new value in bottom row= ROWS-1 of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::insert_bottom_row(T value, int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    val[ROWS - 1][col] = value;
+}
+
+/* LineBuffer insert top row
+ * Inserts a new value in top row=0 of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::insert_top_row(T value, int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    val[0][col] = value;
+}
+
+/* LineBuffer get a column
+ * Get a column value of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::get_col(T value[ROWS], int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        value[i] = val[i][col];
+    }
+}
+
+/* Line buffer getval
+ * Returns the data value in the line buffer at position row, col
+ */
+template <int ROWS, int COLS, typename T>
+T& LineBuffer<ROWS, COLS, T, 1>::getval(int row, int col) {
+#pragma HLS inline
+    assert(row >= 0 && row < ROWS && col >= 0 && col < COLS);
+    return val[row][col];
+}
+
+/* Line buffer getval
+ * Returns the data value in the line buffer at position row, col
+ */
+template <int ROWS, int COLS, typename T>
+T& LineBuffer<ROWS, COLS, T, 1>::operator()(int row, int col) {
+#pragma HLS inline
+    return getval(row, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer shift down, while contents shift up
+ * Assumes new values will be placed in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::shift_down(int col) {
+#pragma HLS inline
+    shift_pixels_down(col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer shift up, while contents shift down
+ * Assumes new values will be placed in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::shift_up(int col) {
+#pragma HLS inline
+    shift_pixels_up(col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer insert
+ * Inserts a new value in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::insert_bottom(T value, int col) {
+#pragma HLS inline
+    insert_top_row(value, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer insert
+ * Inserts a new value in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::insert_top(T value, int col) {
+#pragma HLS inline
+    insert_bottom_row(value, col);
+}
+
+#ifndef __SYNTHESIS__
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::restore_val() {
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+        for (j = 0; j < COLS; j++) {
+            val_t[i][j] = val[i][j];
+        }
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::linebuffer_print(int col) {
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+        std::cout << "\n";
+        std::cout << std::setw(20) << val[i][col];
+    }
+    std::cout << "\n\n";
+}
+#endif
+
+#endif //_RESIZE_MEM_HPP_
diff --git a/codec/L2/include/hw/resize/bicubicinterpolator.hpp b/codec/L2/include/hw/resize/bicubicinterpolator.hpp
new file mode 100644
index 0000000000..b248a672fe
--- /dev/null
+++ b/codec/L2/include/hw/resize/bicubicinterpolator.hpp
@@ -0,0 +1,887 @@
+/*
+ * Copyright 2019 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file bicubicinterpolator.hpp
+ *
+ * @brief This file contains top function of test case.
+ */
+
+#ifndef _BICUBIC_INTERPOLATOR_HPP_
+#define _BICUBIC_INTERPOLATOR_HPP_
+
+#include "ap_int.h"
+#include "hls_math.h"
+#include "hls_stream.h"
+#include "resize_mem.hpp"
+#include <iostream>
+#include <math.h>
+
+namespace xf {
+namespace codec {
+namespace details {
+
+/**
+ * @class Bicubic interpolation is an extension of cubic interpolation for
+ * interpolating data points on a two-dimensional regular grid.
+ * The interpolated surface is smoother than corresponding surfaces obtained by
+ * bilinear interpolation or nearest-neighbor interpolation.
+ * It can be used to resize images.
+ *
+ * @param _W  _W-bit representing the first parameter for ap_fixed type
+ * @param _I _I-bit representing the numbers above the decimal point and _W-_I-bits representing the value below the
+ * decimal point.
+ * @param _WBIT the input width of pixels
+ * @param _NPPC 1 representing 1-pixel/clock for one interpolation or 8 representing 8-pixel/clock for one interpolation
+ */
+
+template <int _W, int _I, int _WBIT, int _NPPC>
+class BicubicInterpolator {
+   public:
+    // constructor
+    BicubicInterpolator() {}
+
+    typedef ap_fixed<_W, _I> fixed_t;
+    typedef ap_uint<_WBIT * _NPPC> pixel_t;
+    typedef ap_uint<72> pixel_8x_t;
+
+    fixed_t FMul(fixed_t in1, fixed_t in2) {
+        //#pragma HLS inline off
+        fixed_t r = 0;
+#pragma HLS resource variable = r core = FMul_fulldsp
+        r = in1 * in2;
+        return r;
+    }
+
+    fixed_t cubicInterpolate(fixed_t window[4], fixed_t x) {
+#pragma HLS inline
+        fixed_t a = 0.5;
+        fixed_t b = 2.0;
+        fixed_t c = 3.0;
+        fixed_t d = 4.0;
+        fixed_t e = 5.0;
+
+        fixed_t tmpC = FMul(c, (window[1] - window[2]));
+        fixed_t tmpB = FMul(b, window[0]);
+        fixed_t tmpE = FMul(e, window[1]);
+        fixed_t tmpD = FMul(d, window[2]);
+        fixed_t tmpA = FMul(a, x);
+        fixed_t tmp1 = FMul(x, (tmpC + window[3] - window[0]));
+        fixed_t tmp2 = FMul(x, (tmpB - tmpE + tmpD - window[3] + tmp1));
+        return window[1] + FMul(tmpA, (window[2] - window[0] + tmp2));
+    }
+
+    fixed_t bicubicInterpolate(Window<4, 4, fixed_t>& window, fixed_t y, fixed_t x) {
+#pragma HLS inline
+        fixed_t arr[4];
+        arr[0] = cubicInterpolate(window.val[0], x);
+        arr[1] = cubicInterpolate(window.val[1], x);
+        arr[2] = cubicInterpolate(window.val[2], x);
+        arr[3] = cubicInterpolate(window.val[3], x);
+
+        return cubicInterpolate(arr, y);
+    }
+
+    // using the formulas
+    fixed_t cubicInterpolate_one(Window<1, 4, fixed_t>& window, fixed_t x) {
+#pragma HLS inline off
+        fixed_t a = 0.5;
+        fixed_t b = 2.0;
+        fixed_t c = 3.0;
+        fixed_t d = 4.0;
+        fixed_t e = 5.0;
+
+        fixed_t tmpC = FMul(c, (window.val[0][1] - window.val[0][2]));
+        fixed_t tmpB = FMul(b, window.val[0][0]);
+        fixed_t tmpE = FMul(e, window.val[0][1]);
+        fixed_t tmpD = FMul(d, window.val[0][2]);
+        fixed_t tmpA = FMul(a, x);
+        fixed_t tmp1 = FMul(x, (tmpC + window.val[0][3] - window.val[0][0]));
+        fixed_t tmp2 = FMul(x, (tmpB - tmpE + tmpD - window.val[0][3] + tmp1));
+        return window.val[0][1] + FMul(tmpA, (window.val[0][2] - window.val[0][0] + tmp2));
+    }
+
+    void resizeDown_1x(
+        hls::stream<pixel_t>& src_strm, int srcW, int srcH, int dstW, int dstH, hls::stream<pixel_t>& dst_srm) {
+        typedef short N16;
+        const int NTAPS = 4;
+
+        LineBuffer<4, 8024, pixel_t> linebuf; // the common image width is 8k
+        Window<4, 4, fixed_t> window;         // the window is 4*4
+        pixel_t pix_in, temp_in[NTAPS], temp_out[NTAPS];
+
+        N16 row, col;
+        N16 rows, cols;
+        N16 rows_rw = -1;
+        N16 cols_rw = -1;
+        N16 rel_row, rel_col;
+
+        bool col_rd_en;
+        bool col_wr_en;
+        bool row_rd_en;
+        bool row_wr_en; // row or col read or write flag
+
+        fixed_t row_ratio = ((fixed_t)dstH) / (fixed_t)srcH;
+        fixed_t col_ratio = ((fixed_t)dstW) / (fixed_t)srcW;
+        fixed_t row_ratio_recip = ((fixed_t)1) / row_ratio;
+        fixed_t col_ratio_recip = ((fixed_t)1) / col_ratio;
+
+        ap_ufixed<_W - _I, 0> du, dv;
+
+        int row_rate = (ap_fixed<4, 2, AP_RND>(0.5) + row_ratio * 65536);
+        int col_rate = (ap_fixed<4, 2, AP_RND>(0.5) + col_ratio * 65536);
+
+        rows = (srcH > dstH) ? srcH : dstH;
+        cols = (srcW > dstW) ? srcW : dstW;
+        assert(rows <= srcH || rows <= dstH);
+        assert(cols <= srcW || cols <= dstW);
+
+    INTERPOLATE_ROW:
+        for (row = 0; row < (rows + 3); row++) {
+#pragma HLS LOOP_TRIPCOUNT min = 514 max = 8194
+        COL_LOOP:
+            for (col = 0; col < (cols + 3); col++) {
+#pragma HLS LOOP_TRIPCOUNT min = 514 max = 8194
+#pragma HLS PIPELINE
+#pragma HLS DEPENDENCE array inter false
+                // row write?
+                if (col == 0) {
+                    if (row_rate <= 65536) { // Down scaling, writes are less frequent than reads
+                        row_rd_en = true;
+                        N16 drow = row * row_ratio;
+                        fixed_t y = (fixed_t)drow * row_ratio_recip;
+                        du.range(_W - _I - 1, 0) = y.range(_W - _I - 1, 0);
+                        if (rows_rw != drow) {
+                            row_wr_en = true;
+                            rows_rw = drow;
+                        } else
+                            row_wr_en = false;
+                    }
+                }
+
+                // col write?
+                if (col_rate <= 65536) { // Down scaling, writes are less frequent than reads
+                    col_rd_en = true;
+                    N16 dcol = col * col_ratio;
+                    fixed_t x = (fixed_t)dcol * col_ratio_recip;
+                    dv.range(_W - _I - 1, 0) = x.range(_W - _I - 1, 0);
+                    if (col == 0 || (col > 0 && cols_rw != dcol)) {
+                        col_wr_en = true;
+                        cols_rw = dcol;
+                    } else
+                        col_wr_en = false;
+                }
+
+                for (int i = 0; i < NTAPS; i++) {
+                    temp_out[i] = linebuf.val[i][col];
+                }
+
+                if (col_rd_en) {
+                    window.shift_left();
+                    if (row_rd_en) {
+                        if (col < cols && row < rows) {
+                            pix_in = src_strm.read();
+                            // std::cout << pix_in << std::endl;
+                            temp_in[NTAPS - 1] = pix_in;
+                        } else {
+                            temp_in[NTAPS - 1] = temp_out[NTAPS - 1];
+                        }
+                    }
+                }
+
+                for (int i = NTAPS - 1; i > 0; i--) {
+                    temp_in[i - 1] = temp_out[i];
+                }
+
+                for (int i = 0; i < NTAPS; i++) {
+                    if (col == 0 && row == 0) {
+                        window.insert(temp_in[NTAPS - 1], i, 0);
+                        window.insert(temp_in[NTAPS - 1], i, 1);
+                        window.insert(temp_in[NTAPS - 1], i, 2);
+                        window.insert(temp_in[NTAPS - 1], i, 3);
+                    } else if (col == 0 && row > 0) {
+                        window.insert(temp_in[i], i, 0);
+                        window.insert(temp_in[i], i, 1);
+                        window.insert(temp_in[i], i, 2);
+                        window.insert(temp_in[i], i, 3);
+                    } else if (col < cols) {
+                        (row > 0) ? window.insert(temp_in[i], i, NTAPS - 1)
+                                  : window.insert(temp_in[NTAPS - 1], i, NTAPS - 1);
+                    }
+                }
+
+                for (int i = 0; i < NTAPS; i++) {
+                    linebuf.val[i][col] = (row > 0) ? temp_in[i] : temp_in[NTAPS - 1];
+                }
+
+#ifndef __SYNTHESIS__
+// for (int m = 0; m < 4; m++) {
+//   for (int n = 0; n < 4; n++) {
+//     std::cout << window.getval(m, n) << " ";
+//   }
+//   std::cout << std::endl;
+// }
+// std::cout << std::endl;
+#endif
+                if (row >= 2 && col >= 2 && row_wr_en && col_wr_en && row < (rows + 2) && col < (cols + 2)) {
+                    fixed_t dstPixel = bicubicInterpolate(window, du, dv);
+                    dst_srm.write((pixel_t)dstPixel);
+                }
+            } // col
+        }     // row
+    }
+
+    void resizeDown_8x(
+        hls::stream<pixel_t>& src_strm, int srcW, int srcH, int dstW, int dstH, hls::stream<pixel_8x_t>& dst_srm) {
+        typedef short N16;
+        const int NTAPS = 4;
+
+        LineBuffer<4, 1024, pixel_t> linebuf;
+        Window<4, 4, fixed_t> window;
+        pixel_t pix_in;
+        pixel_8x_t pix_out;
+
+        ap_uint<_WBIT> temp_in[NTAPS][_NPPC], temp_out[NTAPS][_NPPC], temp_pix[_NPPC];
+#pragma HLS array_partition variable = temp_in dim = 1 complete
+#pragma HLS array_partition variable = temp_in dim = 2 complete
+#pragma HLS array_partition variable = temp_out dim = 1 complete
+#pragma HLS array_partition variable = temp_out dim = 2 complete
+#pragma HLS array_partition variable = temp_pix dim = 1 complete
+
+        N16 row, col;
+        N16 rows, cols, new_row, new_col;
+        N16 rows_rw = -1;
+        N16 cols_rw[_NPPC] = {-1};
+        N16 rel_row, rel_col;
+
+        bool col_rd_en[_NPPC]; // 8x/clock
+        bool col_wr_en[_NPPC]; // 8x/clock
+        bool row_rd_en;
+        bool row_wr_en; // row or col read or write flag
+
+//#pragma HLS array_partition variable = col_rd_en dim = 0 complete
+#pragma HLS array_partition variable = col_wr_en dim = 0 complete
+
+        fixed_t row_ratio = ((fixed_t)dstH) / (fixed_t)srcH;
+        fixed_t col_ratio = ((fixed_t)dstW) / (fixed_t)srcW;
+        fixed_t row_ratio_recip = ((fixed_t)1) / row_ratio;
+        fixed_t col_ratio_recip = ((fixed_t)1) / col_ratio;
+        ap_ufixed<_W - _I, 0> du, dv[_NPPC];
+        ap_uint<4> index_pix = 0;
+
+        int row_rate = (ap_fixed<4, 2, AP_RND>(0.5) + row_ratio * 65536);
+        int col_rate = (ap_fixed<4, 2, AP_RND>(0.5) + col_ratio * 65536);
+
+        rows = (srcH > dstH) ? srcH : dstH;
+        cols = (srcW > dstW) ? srcW : dstW;
+        new_row = rows + 3;
+        new_col = (cols >> 3) + 1;
+        N16 end_row = rows + 2;
+        N16 end_col = cols + 2;
+
+        assert(rows <= srcH || rows <= dstH);
+        assert(cols <= srcW || cols <= dstW);
+
+    ROW_LOOP:
+        for (row = 0; row < new_row; row++) {
+#pragma HLS LOOP_TRIPCOUNT min = 66 max = 1026
+        COL_LOOP:
+            for (col = 0; col < new_col; col++) {
+#pragma HLS LOOP_TRIPCOUNT min = 65 max = 1025
+#pragma HLS PIPELINE II = 1
+#pragma HLS DEPENDENCE array inter false
+                // row write?
+                if (col == 0) {
+                    if (row_rate <= 65536) { // Down scaling, writes are less frequent than reads
+                        row_rd_en = true;
+                        N16 drow = row * row_ratio;
+                        fixed_t y = (fixed_t)drow * row_ratio_recip;
+                        du.range(_W - _I - 1, 0) = y.range(_W - _I - 1, 0);
+                        if (rows_rw != drow) {
+                            row_wr_en = true;
+                            rows_rw = drow;
+                        } else
+                            row_wr_en = false;
+                    }
+                }
+
+                // col write?
+                rel_col = col << 3;
+            NPPC_LOOP:
+                for (N16 nc = 0; nc < _NPPC; nc++, rel_col++) {
+#pragma HLS unroll
+                    if (col_rate <= 65536) { // Down scaling, writes are less frequent than reads
+                        col_rd_en[nc] = true;
+                        N16 dcol = rel_col * col_ratio;
+                        fixed_t x = (fixed_t)dcol * col_ratio_recip;
+                        dv[nc].range(_W - _I - 1, 0) = x.range(_W - _I - 1, 0);
+
+                        if (rel_col == 0 || (rel_col > 0 && cols_rw[nc] != dcol)) {
+                            col_wr_en[nc] = true;
+                            cols_rw[(ap_uint<3>)(nc + 1)] = dcol;
+                        } else {
+                            col_wr_en[nc] = false;
+                            cols_rw[(ap_uint<3>)(nc + 1)] = dcol;
+                        }
+                    }
+                } // nc
+
+            // get a element in linebuf
+            READ_FROM_LINEBUF_TO_TEMPOUT:
+                for (int i = 0; i < NTAPS; i++) {
+                    pixel_t tmp = linebuf.val[i][col];
+                    temp_out[i][0] = tmp.range(7, 0);
+                    temp_out[i][1] = tmp.range(15, 8);
+                    temp_out[i][2] = tmp.range(23, 16);
+                    temp_out[i][3] = tmp.range(31, 24);
+                    temp_out[i][4] = tmp.range(39, 32);
+                    temp_out[i][5] = tmp.range(47, 40);
+                    temp_out[i][6] = tmp.range(55, 48);
+                    temp_out[i][7] = tmp.range(63, 56);
+                } // i
+
+                // read a element from stream
+                if (col_rd_en[0]) {
+                    // window.shift_left();
+                    if (row_rd_en) {
+                        if (col < (new_col - 1) && row < rows) {
+                            ap_uint<_WBIT> tmp;
+                            pix_in = src_strm.read();
+                            for (int nb = 0; nb < _NPPC; nb++) {
+#pragma HLS unroll
+                                tmp.range(_WBIT - 1, 0) = pix_in.range(nb * _WBIT + _WBIT - 1, nb * _WBIT);
+                                temp_in[NTAPS - 1][nb] = tmp;
+                            } // nb
+                        } else {
+                            for (int nb = 0; nb < _NPPC; nb++) {
+#pragma HLS unroll
+                                temp_in[NTAPS - 1][nb] = temp_out[NTAPS - 1][nb];
+                            } // nb
+                        }
+                    }
+                }
+
+            // put all elements in temp_in into temp_out
+            LOAD_FROM_TEMPOUT_TO_TEMPIN:
+                for (int i = NTAPS - 1; i > 0; i--) {
+                    for (int nc = 0; nc < _NPPC; nc++) {
+                        temp_in[i - 1][nc] = temp_out[i][nc];
+                    }
+                }
+
+                rel_col = col << 3;
+            LOAD_TO_WIN_NPPC:
+                for (int nc = 0; nc < _NPPC; nc++, rel_col++) {
+#pragma HLS unroll
+                    if (col_rd_en[nc]) window.shift_left();
+                LOAD_TO_WIN_ONE:
+                    for (int i = 0; i < NTAPS; i++) {
+                        if (rel_col == 0 && row == 0) {
+                            window.insert(temp_in[NTAPS - 1][0], i, 0);
+                            window.insert(temp_in[NTAPS - 1][0], i, 1);
+                            window.insert(temp_in[NTAPS - 1][0], i, 2);
+                            window.insert(temp_in[NTAPS - 1][0], i, 3);
+                        } else if (rel_col == 0 && row > 0) {
+                            window.insert(temp_in[i][0], i, 0);
+                            window.insert(temp_in[i][0], i, 1);
+                            window.insert(temp_in[i][0], i, 2);
+                            window.insert(temp_in[i][0], i, 3);
+                        } else if (rel_col < cols) {
+                            (row > 0) ? window.insert(temp_in[i][nc], i, NTAPS - 1)
+                                      : window.insert(temp_in[NTAPS - 1][nc], i, NTAPS - 1);
+                        }
+                    } // i
+#ifndef __SYNTHESIS__
+                    for (int m = 0; m < 4; m++) {
+                        for (int n = 0; n < 4; n++) {
+                            std::cout << window.getval(m, n) << " ";
+                        }
+                        std::cout << std::endl;
+                    }
+                    std::cout << std::endl;
+#endif
+                    if (row >= 2 && rel_col >= 2 && row_wr_en && col_wr_en[nc] && row < end_row && rel_col < end_col) {
+                        fixed_t dstPixel = bicubicInterpolate(window, du, dv[nc]);
+                        temp_pix[index_pix++] = (ap_uint<_WBIT>)dstPixel;
+                    }
+                } // nb
+
+                if (index_pix > 0) {
+                    pix_out.range(7, 0) = temp_pix[0].range(7, 0);
+                    pix_out.range(15, 8) = temp_pix[1].range(7, 0);
+                    pix_out.range(23, 16) = temp_pix[2].range(7, 0);
+                    pix_out.range(31, 24) = temp_pix[3].range(7, 0);
+                    pix_out.range(39, 32) = temp_pix[4].range(7, 0);
+                    pix_out.range(47, 40) = temp_pix[5].range(7, 0);
+                    pix_out.range(55, 48) = temp_pix[6].range(7, 0);
+                    pix_out.range(63, 56) = temp_pix[7].range(7, 0);
+                    pix_out.range(71, 64) = index_pix;
+                    dst_srm.write(pix_out);
+                    index_pix = 0;
+                }
+
+            WRITE_BACK_LINEBUF:
+                for (int i = 0; i < NTAPS; i++) {
+                    pixel_t tmp;
+                    if (row > 0) {
+                        tmp.range(7, 0) = temp_in[i][0].range(7, 0);
+                        tmp.range(15, 8) = temp_in[i][1].range(7, 0);
+                        tmp.range(23, 16) = temp_in[i][2].range(7, 0);
+                        tmp.range(31, 24) = temp_in[i][3].range(7, 0);
+                        tmp.range(39, 32) = temp_in[i][4].range(7, 0);
+                        tmp.range(47, 40) = temp_in[i][5].range(7, 0);
+                        tmp.range(55, 48) = temp_in[i][6].range(7, 0);
+                        tmp.range(63, 56) = temp_in[i][7].range(7, 0);
+                    } else {
+                        tmp.range(7, 0) = temp_in[NTAPS - 1][0].range(7, 0);
+                        tmp.range(15, 8) = temp_in[NTAPS - 1][1].range(7, 0);
+                        tmp.range(23, 16) = temp_in[NTAPS - 1][2].range(7, 0);
+                        tmp.range(31, 24) = temp_in[NTAPS - 1][3].range(7, 0);
+                        tmp.range(39, 32) = temp_in[NTAPS - 1][4].range(7, 0);
+                        tmp.range(47, 40) = temp_in[NTAPS - 1][5].range(7, 0);
+                        tmp.range(55, 48) = temp_in[NTAPS - 1][6].range(7, 0);
+                        tmp.range(63, 56) = temp_in[NTAPS - 1][7].range(7, 0);
+                    }
+                    linebuf.val[i][col] = tmp;
+                } // i
+            }     // new_col
+        }         // row
+    }
+
+    void resizeDown_opt_8x(int srcW,
+                           int srcH,
+                           int dstW,
+                           int dstH,
+                           hls::stream<pixel_t>& src_strm,
+                           hls::stream<pixel_8x_t>& dst_srm,
+                           hls::stream<bool>& e_dst) {
+        typedef short N16;
+        const int NTAPS = 4;
+
+        LineBuffer<4, 1024, pixel_t> linebuf;
+
+        Window<1, 4, fixed_t> x_win;
+        Window<1, 4, fixed_t> y_win;
+        Window<1, 8, fixed_t> y_buff;
+        pixel_t pix_in;
+        pixel_8x_t pix_out;
+
+        ap_uint<_WBIT> temp_in[NTAPS][_NPPC], temp_out[NTAPS][_NPPC], temp_pix[_NPPC];
+#pragma HLS array_partition variable = temp_in dim = 1 complete
+#pragma HLS array_partition variable = temp_in dim = 2 complete
+#pragma HLS array_partition variable = temp_out dim = 1 complete
+#pragma HLS array_partition variable = temp_out dim = 2 complete
+#pragma HLS array_partition variable = temp_pix dim = 1 complete
+
+        N16 row, col;
+        N16 rows, cols, new_row, new_col;
+        N16 rows_rw = -1;
+        N16 cols_rw[_NPPC] = {-1};
+        N16 rel_row, rel_col;
+        N16 end_row, end_col;
+
+        bool col_rd_en[_NPPC]; // 8x/clock
+        bool col_wr_en[_NPPC]; // 8x/clock
+        bool row_rd_en;
+        bool row_wr_en; // row or col read or write flag
+
+//#pragma HLS array_partition variable = col_rd_en dim = 0 complete
+#pragma HLS array_partition variable = col_wr_en dim = 0 complete
+
+        fixed_t row_ratio = ((fixed_t)dstH) / (fixed_t)srcH;
+        fixed_t col_ratio = ((fixed_t)dstW) / (fixed_t)srcW;
+        fixed_t row_ratio_recip = ((fixed_t)1) / row_ratio;
+        fixed_t col_ratio_recip = ((fixed_t)1) / col_ratio;
+
+        ap_ufixed<_W - _I, 0> du, dv[_NPPC];
+
+        ap_uint<4> index_pix = 0;
+        ap_uint<3> num_pix = (fixed_t)_NPPC * col_ratio;
+
+        int row_rate = (ap_fixed<4, 2, AP_RND>(0.5) + row_ratio * 65536);
+        int col_rate = (ap_fixed<4, 2, AP_RND>(0.5) + col_ratio * 65536);
+
+        rows = (srcH > dstH) ? srcH : dstH;
+        cols = (srcW > dstW) ? srcW : dstW;
+        new_row = rows + 3;
+        new_col = (cols >> 3) + 1;
+        end_row = rows + 2;
+        end_col = cols + 2;
+        assert(rows <= srcH || rows <= dstH);
+        assert(cols <= srcW || cols <= dstW);
+
+    INTERPOLATE_ROW:
+        for (row = 0; row < new_row; row++) {
+#pragma HLS LOOP_TRIPCOUNT min = 66 max = 1026
+        COL_LOOP:
+            for (col = 0; col < new_col; col++) {
+#pragma HLS LOOP_TRIPCOUNT min = 65 max = 1025
+#pragma HLS PIPELINE II = 1
+#pragma HLS DEPENDENCE array inter false
+                // row write?
+                if (col == 0) {
+                    if (row_rate <= 65536) { // Down scaling, writes are less frequent than reads
+                        row_rd_en = true;
+                        N16 drow = row * row_ratio;
+                        fixed_t y = (fixed_t)drow * row_ratio_recip;
+                        du.range(_W - _I - 1, 0) = y.range(_W - _I - 1, 0);
+                        if (rows_rw != drow) {
+                            row_wr_en = true;
+                            rows_rw = drow;
+                        } else
+                            row_wr_en = false;
+                    }
+                }
+
+                // col write?
+                rel_col = col << 3;
+            NPPC_LOOP:
+                for (N16 nc = 0; nc < _NPPC; nc++, rel_col++) {
+#pragma HLS unroll
+                    if (col_rate <= 65536) { // Down scaling, writes are less frequent than reads
+                        col_rd_en[nc] = true;
+                        N16 dcol = rel_col * col_ratio;
+                        fixed_t x = (fixed_t)dcol * col_ratio_recip;
+                        dv[nc].range(_W - _I - 1, 0) = x.range(_W - _I - 1, 0);
+
+                        if (rel_col == 0 || (rel_col > 0 && cols_rw[nc] != dcol)) {
+                            col_wr_en[nc] = true;
+                            cols_rw[(ap_uint<3>)(nc + 1)] = dcol;
+                        } else {
+                            col_wr_en[nc] = false;
+                            cols_rw[(ap_uint<3>)(nc + 1)] = dcol;
+                        }
+                    }
+                } // _NPPC
+
+            // get a element in linebuf
+            READ_FROM_LINEBUF_TO_TEMPOUT:
+                for (int i = 0; i < NTAPS; i++) {
+                    pixel_t tmp = linebuf.val[i][col];
+                    temp_out[i][0] = tmp.range(7, 0);
+                    temp_out[i][1] = tmp.range(15, 8);
+                    temp_out[i][2] = tmp.range(23, 16);
+                    temp_out[i][3] = tmp.range(31, 24);
+                    temp_out[i][4] = tmp.range(39, 32);
+                    temp_out[i][5] = tmp.range(47, 40);
+                    temp_out[i][6] = tmp.range(55, 48);
+                    temp_out[i][7] = tmp.range(63, 56);
+                } // i
+
+                // read a element from stream
+                if (col_rd_en[0]) {
+                    // window.shift_left();
+                    if (row_rd_en) {
+                        if (col < (new_col - 1) && row < rows) {
+                            ap_uint<_WBIT> tmp;
+                            pix_in = src_strm.read();
+                            for (int nb = 0; nb < _NPPC; nb++) {
+#pragma HLS unroll
+                                tmp.range(_WBIT - 1, 0) = pix_in.range(nb * _WBIT + _WBIT - 1, nb * _WBIT);
+                                temp_in[NTAPS - 1][nb] = tmp;
+                            } // nb
+                        } else {
+                            for (int nb = 0; nb < _NPPC; nb++) {
+#pragma HLS unroll
+                                temp_in[NTAPS - 1][nb] = temp_out[NTAPS - 1][nb];
+                            } // nb
+                        }
+                    }
+                }
+
+                // put all elements in temp_in into temp_out
+                rel_col = col << 3;
+            LOAD_FROM_TEMPOUT_TO_TEMPIN:
+                for (int nc = 0; nc < _NPPC; nc++, rel_col++) {
+                    for (int i = NTAPS - 1; i > 0; i--) {
+                        fixed_t fix;
+                        fix = temp_out[i][nc];
+                        temp_in[i - 1][nc] = fix;
+                    }
+
+                    if (row == 0 && rel_col < cols) {
+                        y_win.val[0][0] = temp_in[NTAPS - 1][nc];
+                        y_win.val[0][1] = temp_in[NTAPS - 1][nc];
+                        y_win.val[0][2] = temp_in[NTAPS - 1][nc];
+                        y_win.val[0][3] = temp_in[NTAPS - 1][nc];
+                    } else if (rel_col < cols) {
+                        y_win.val[0][0] = temp_in[0][nc];
+                        y_win.val[0][1] = temp_in[1][nc];
+                        y_win.val[0][2] = temp_in[2][nc];
+                        y_win.val[0][3] = temp_in[3][nc];
+                    }
+                    y_buff.val[0][nc] = cubicInterpolate_one(y_win, du);
+                }
+
+                rel_col = col << 3;
+            LOAD_TO_WIN_NPPC:
+                for (int nc = 0; nc < _NPPC; nc++, rel_col++) {
+                    if (col_rd_en[nc]) x_win.shift_left();
+
+                    if (rel_col == 0) {
+                        x_win.insert(y_buff.val[0][nc], 0, 0);
+                        x_win.insert(y_buff.val[0][nc], 0, 1);
+                        x_win.insert(y_buff.val[0][nc], 0, 2);
+                        x_win.insert(y_buff.val[0][nc], 0, 3);
+                    } else if (rel_col < cols)
+                        x_win.insert(y_buff.val[0][nc], 0, NTAPS - 1);
+
+                    if (row >= 2 && rel_col >= 2 && row_wr_en && col_wr_en[nc] && row < end_row && rel_col < end_col) {
+                        fixed_t dstPixel = cubicInterpolate_one(x_win, dv[nc]);
+                        temp_pix[index_pix++] = (ap_uint<_WBIT>)dstPixel;
+                    }
+                } // nb
+
+                if (index_pix > 0) {
+                    pix_out.range(7, 0) = temp_pix[0].range(7, 0);
+                    pix_out.range(15, 8) = temp_pix[1].range(7, 0);
+                    pix_out.range(23, 16) = temp_pix[2].range(7, 0);
+                    pix_out.range(31, 24) = temp_pix[3].range(7, 0);
+                    pix_out.range(39, 32) = temp_pix[4].range(7, 0);
+                    pix_out.range(47, 40) = temp_pix[5].range(7, 0);
+                    pix_out.range(55, 48) = temp_pix[6].range(7, 0);
+                    pix_out.range(63, 56) = temp_pix[7].range(7, 0);
+                    pix_out.range(71, 64) = index_pix;
+                    dst_srm.write(pix_out);
+                    e_dst.write(false);
+                    index_pix = 0;
+                }
+
+            WRITE_BACK_LINEBUF:
+                for (int i = 0; i < NTAPS; i++) {
+                    pixel_t tmp;
+                    if (row > 0) {
+                        tmp.range(7, 0) = temp_in[i][0].range(7, 0);
+                        tmp.range(15, 8) = temp_in[i][1].range(7, 0);
+                        tmp.range(23, 16) = temp_in[i][2].range(7, 0);
+                        tmp.range(31, 24) = temp_in[i][3].range(7, 0);
+                        tmp.range(39, 32) = temp_in[i][4].range(7, 0);
+                        tmp.range(47, 40) = temp_in[i][5].range(7, 0);
+                        tmp.range(55, 48) = temp_in[i][6].range(7, 0);
+                        tmp.range(63, 56) = temp_in[i][7].range(7, 0);
+                    } else {
+                        tmp.range(7, 0) = temp_in[NTAPS - 1][0].range(7, 0);
+                        tmp.range(15, 8) = temp_in[NTAPS - 1][1].range(7, 0);
+                        tmp.range(23, 16) = temp_in[NTAPS - 1][2].range(7, 0);
+                        tmp.range(31, 24) = temp_in[NTAPS - 1][3].range(7, 0);
+                        tmp.range(39, 32) = temp_in[NTAPS - 1][4].range(7, 0);
+                        tmp.range(47, 40) = temp_in[NTAPS - 1][5].range(7, 0);
+                        tmp.range(55, 48) = temp_in[NTAPS - 1][6].range(7, 0);
+                        tmp.range(63, 56) = temp_in[NTAPS - 1][7].range(7, 0);
+                    }
+                    linebuf.val[i][col] = tmp;
+                } // i
+            }     // new_col
+        }         // row
+        e_dst.write(true);
+    }
+};
+
+#if NPPC == 1
+/**
+ * @brief The function is loading the pixels of image into stream
+ *
+ * @param width representing the number of input image each row
+ * @param height representing the number of input image each column
+ * @param axi_src the hbm port for input
+ * @param src_strm the input stream of bicubic interpolator
+ */
+void loadToStrm(ap_uint<32> width,
+                ap_uint<32> height,
+                ap_uint<WDATA>* axi_src,
+                hls::stream<ap_uint<WDATA> >& src_strm) {
+#pragma HLS INLINE off
+LOAD_STRM:
+    for (ap_uint<32> i = 0; i < (width * height); i++) {
+#pragma HLS PIPELINE II = 1
+        src_strm.write(axi_src[i]);
+    }
+}
+
+/**
+ * @brief The function is putting the result of interpolation into memory hbm
+ *
+ * @param width representing the number of output image each row
+ * @param height representing the number of output image each column
+ * @param dst_strm the output stream of bicubic interpolator
+ * @param axi_dst the hbm port for output
+ */
+void loadToImage(ap_uint<32> width,
+                 ap_uint<32> height,
+                 hls::stream<ap_uint<WDATA> >& dst_strm,
+                 ap_uint<WDATA>* axi_dst) {
+#pragma HLS INLINE off
+LOAD_IMAGE:
+    for (ap_uint<32> i = 0; i < (width * height); i++) {
+#pragma HLS PIPELINE II = 1
+        axi_dst[i] = dst_strm.read();
+    }
+}
+
+#else
+
+/**
+ * @brief The function is loading the pixels of image into stream
+ *
+ * @param width representing the number of input image each row
+ * @param height representing the number of input image each column
+ * @param axi_src the hbm port for input
+ * @param src_strm the input stream of bicubic interpolator
+ */
+void loadToStrm(ap_uint<32> width,
+                ap_uint<32> height,
+                ap_uint<WDATA>* axi_src,
+                hls::stream<ap_uint<WDATA> >& src_strm) {
+#pragma HLS INLINE off
+LOAD_STRM:
+    for (ap_uint<32> i = 0; i<(width * height)>> 3; i++) {
+#pragma HLS PIPELINE II = 1
+        src_strm.write(axi_src[i]);
+    }
+}
+
+/**
+ * @brief The function is picking out valid value of interpolation from 72-bits, the (0, 63) saving the valid value and
+ * the (64, 71) representing the number of valid value of interpolation
+ *
+ * @param dst_strm the output of bicubic interpolator
+ * @param e_dst    the flag of output
+ * @param pixel_strm the compact 64-bits or representing 8 pixels
+ */
+void pickOutStrm(hls::stream<ap_uint<72> >& dst_strm,
+                 hls::stream<bool>& e_dst,
+                 hls::stream<ap_uint<WDATA> >& pixel_strm) {
+#pragma HLS INLINE off
+    bool stop(false);
+    ap_uint<72> pixel_72;
+    ap_uint<64> pixel_out;
+    int num_cur = 0;
+
+PICK_UP_PIXELS:
+    while (!stop) {
+#pragma HLS PIPELINE II = 1
+        e_dst.read(stop);
+        if (!stop) {
+            dst_strm.read(pixel_72);
+            int num_pixs = pixel_72.range(71, 64);
+            num_cur += num_pixs;
+            if (num_cur < 8) {
+                pixel_out.range(num_cur * 8 - 1, (num_cur - num_pixs) * 8) = pixel_72.range(num_pixs * 8 - 1, 0);
+            } else {
+                int tmp = num_cur - 8;
+                if (tmp != 0) {
+                    pixel_out.range(63, (num_cur - num_pixs) * 8) = pixel_72.range(num_pixs * 8 - 1, 0);
+                    pixel_strm.write(pixel_out);
+                    pixel_out.range(tmp * 8 - 1, 0) = pixel_72.range(num_pixs * 8 - 1, (num_pixs - tmp) * 8);
+                } else {
+                    pixel_out.range(63, (num_cur - num_pixs) * 8) = pixel_72.range(num_pixs * 8 - 1, 0);
+                    pixel_strm.write(pixel_out);
+                }
+                num_cur = tmp;
+            }
+        }
+    }
+    if (num_cur > 0) pixel_strm.write(pixel_out);
+}
+
+/**
+ * @brief The function is putting the result of interpolation into memory hbm
+ *
+ * @param width representing the number of output image each row
+ * @param height representing the number of output image each column
+ * @param dst_strm the output stream of bicubic interpolator
+ * @param axi_dst the hbm port for output
+ */
+void loadToImage(ap_uint<32> width,
+                 ap_uint<32> height,
+                 hls::stream<ap_uint<WDATA> >& pixel_strm,
+                 ap_uint<WDATA>* axi_dst) {
+#pragma HLS INLINE off
+    ap_uint<64> pixel_64;
+
+LOAD_IMAGE:
+    for (ap_uint<32> i = 0; i < DivCeil(width * height, 8); i++) {
+#pragma HLS PIPELINE II = 1
+        pixel_strm.read(pixel_64);
+        // for (int i = 0; i < 8; i++) std::cout << (int)pixel_64.range((i + 1) * 8 - 1, i * 8) << std::endl;
+        axi_dst[i] = pixel_64;
+    }
+}
+#endif
+
+} // namespace details
+
+/**
+ * @brief Resize scales the image from bigger to smaller based bicubic interpolation algorithm and it takes advantage of
+ * uram storage features to implement 8-pixels/clock.
+ *
+ * @param configs the stored parameters representing src_width, src_height, dst_width, dst_height.
+ * @param axi_src the hbm memory for input
+ * @param axi_dst the hbm memory for output
+ *
+ */
+inline void resizeTop(ap_uint<32>* configs, ap_uint<WDATA>* axi_src, ap_uint<WDATA>* axi_dst) {
+#pragma HLS INLINE off
+#pragma HLS DATAFLOW
+
+    // define the bicubic interpolator
+    hls::stream<ap_uint<WDATA> > src_strm("src_strm");
+#pragma HLS RESOURCE variable = src_strm core = FIFO_LUTRAM
+#pragma HLS STREAM variable = src_strm depth = 512
+
+#if NPPC == 1
+    hls::stream<ap_uint<WDATA> > dst_strm("dst_strm");
+#pragma HLS RESOURCE variable = dst_strm core = FIFO_LUTRAM
+#pragma HLS STREAM variable = dst_strm depth = 512
+
+    ap_uint<32> src_width = configs[0];
+    ap_uint<32> src_height = configs[1];
+    ap_uint<32> dst_width = configs[2];
+    ap_uint<32> dst_height = configs[3];
+
+    details::BicubicInterpolator<W, I, WBIT, NPPC> interpolator;
+    details::loadToStrm(src_width, src_height, axi_src, src_strm);
+    interpolator.resizeDown_1x(src_strm, src_width, src_height, dst_width, dst_height, dst_strm);
+    details::loadToImage(dst_width, dst_height, dst_strm, axi_dst);
+#else
+    hls::stream<ap_uint<72> > dst_strm("dst_strm");
+#pragma HLS RESOURCE variable = dst_strm core = FIFO_LUTRAM
+#pragma HLS STREAM variable = dst_strm depth = 512
+
+    hls::stream<bool> e_dst("e_dst");
+#pragma HLS RESOURCE variable = e_dst core = FIFO_LUTRAM
+#pragma HLS STREAM variable = e_dst depth = 512
+
+    hls::stream<ap_uint<WDATA> > pixel_strm("pixel_strm");
+#pragma HLS RESOURCE variable = pixel_strm core = FIFO_LUTRAM
+#pragma HLS STREAM variable = pixel_strm depth = 512
+
+    ap_uint<32> src_width = configs[0];
+    ap_uint<32> src_height = configs[1];
+    ap_uint<32> dst_width = configs[2];
+    ap_uint<32> dst_height = configs[3];
+
+    details::BicubicInterpolator<W, I, WBIT, NPPC> interpolator;
+    details::loadToStrm(src_width, src_height, axi_src, src_strm);
+    interpolator.resizeDown_opt_8x(src_width, src_height, dst_width, dst_height, src_strm, dst_strm, e_dst);
+    details::pickOutStrm(dst_strm, e_dst, pixel_strm);
+    details::loadToImage(dst_width, dst_height, pixel_strm, axi_dst);
+#endif
+}
+
+} // namespace codec
+} // namespace xf
+#endif // _BICUBIC_INTERPOLATOR_HPP_
diff --git a/codec/L2/include/hw/resize/resize_mem.hpp b/codec/L2/include/hw/resize/resize_mem.hpp
new file mode 100644
index 0000000000..9c3dee61e7
--- /dev/null
+++ b/codec/L2/include/hw/resize/resize_mem.hpp
@@ -0,0 +1,832 @@
+/*****************************************************************************
+ *
+ *     Author: Xilinx, Inc.
+ *
+ *     This text contains proprietary, confidential information of
+ *     Xilinx, Inc. , is distributed by under license from Xilinx,
+ *     Inc., and may be used, copied and/or disclosed only pursuant to
+ *     the terms of a valid license agreement with Xilinx, Inc.
+ *
+ *     XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS"
+ *     AS A COURTESY TO YOU, SOLELY FOR USE IN DEVELOPING PROGRAMS AND
+ *     SOLUTIONS FOR XILINX DEVICES.  BY PROVIDING THIS DESIGN, CODE,
+ *     OR INFORMATION AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE,
+ *     APPLICATION OR STANDARD, XILINX IS MAKING NO REPRESENTATION
+ *     THAT THIS IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT,
+ *     AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE
+ *     FOR YOUR IMPLEMENTATION.  XILINX EXPRESSLY DISCLAIMS ANY
+ *     WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE
+ *     IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR
+ *     REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF
+ *     INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE.
+ *
+ *     Xilinx products are not intended for use in life support appliances,
+ *     devices, or systems. Use in such applications is expressly prohibited.
+ *
+ *     (c) Copyright 2012-2013 Xilinx Inc.
+ *     All rights reserved.
+ *
+ *****************************************************************************/
+
+/*
+ * HLS Video Memory Partition Header File
+ */
+
+#ifndef _RESIZE_MEM_HPP_
+#define _RESIZE_MEM_HPP_
+
+#include "ap_int.h"
+#include <assert.h>
+
+typedef ap_uint<32> HLS_SIZE_T;
+typedef ap_uint<5> HLS_CHANNEL_T;
+
+/* Template class of Window */
+template <int ROWS, int COLS, typename T>
+class Window {
+   public:
+    Window(){
+#pragma HLS ARRAY_PARTITION variable = val dim = 1 complete
+#pragma HLS ARRAY_PARTITION variable = val dim = 2 complete
+    };
+
+    /* Window main APIs */
+    void shift_pixels_left();
+    void shift_pixels_right();
+    void shift_pixels_up();
+    void shift_pixels_down();
+    void shift_diagonal();
+    void insert_pixel(T value, int row, int col);
+    void insert_row(T value[COLS], int row);
+    void insert_top_row(T value[COLS]);
+    void insert_bottom_row(T value[COLS]);
+    void insert_col(T value[ROWS], int col);
+    void insert_left_col(T value[ROWS]);
+    void insert_right_col(T value[ROWS]);
+    void copy_one_row(int row1, int row2);
+    void copy_one_col(int col1, int col2);
+
+    T& getval(int row, int col);
+    T& operator()(int row, int col);
+
+    /* Back compatible APIs */
+    void shift_left();
+    void shift_right();
+    void shift_up();
+    void shift_down();
+    void insert(T value, int row, int col);
+    void insert_top(T value[COLS]);
+    void insert_bottom(T value[COLS]);
+    void insert_left(T value[ROWS]);
+    void insert_right(T value[ROWS]);
+    // T& getval(int row, int col);
+    // T& operator ()(int row, int col);
+
+    T val[ROWS][COLS];
+#ifndef __SYNTHESIS__
+    void restore_val();
+    void window_print();
+    T val_t[ROWS][COLS];
+#endif
+};
+
+/* Member functions of Window class */
+/* Origin in upper-left point */
+/*       0   1        C-2 C-1
+ *     +---+---+-...-+---+---+
+ *  0  |   |   |     |   |   |
+ *     +---+---+-...-+---+---+
+ *  1  |   |   |     |   |   |
+ *     +---+---+-...-+---+---+
+ *       ...     ...    ...
+ *     +---+---+-...-+---+---+
+ * R-2 |   |   |     |   |   |
+ *     +---+---+-...-+---+---+
+ * R-1 |   |   |     |   |   |
+ *     +---+---+-...-+---+---+
+ *
+ */
+
+/*
+ * Window content shift left
+ * Assumes new values will be placed in right column = COLS-1
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_pixels_left() {
+#pragma HLS inline
+
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        for (j = 0; j < COLS - 1; j++) {
+#pragma HLS unroll
+            val[i][j] = val[i][j + 1];
+        }
+    }
+}
+
+/*
+ * Window content shift right
+ * Assumes new values will be placed in left column = 0
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_pixels_right() {
+#pragma HLS inline
+
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        for (j = COLS - 1; j > 0; j--) {
+#pragma HLS unroll
+            val[i][j] = val[i][j - 1];
+        }
+    }
+}
+
+/*
+ * Window content shift up
+ * Assumes new values will be placed in bottom row = ROWS-1
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_pixels_up() {
+#pragma HLS inline
+
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS - 1; i++) {
+#pragma HLS unroll
+        for (j = 0; j < COLS; j++) {
+#pragma HLS unroll
+            val[i][j] = val[i + 1][j];
+        }
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::copy_one_col(int col1, int col2) {
+#pragma HLS inline
+    assert(col1 >= 0 && col1 < COLS && col2 >= 0 && col2 < COLS);
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        val[i][col1] = val[i][col2];
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::copy_one_row(int row1, int row2) {
+#pragma HLS inline
+    assert(row1 >= 0 && row1 < ROWS && row2 >= 0 && row2 < ROWS);
+    HLS_SIZE_T j;
+    for (j = 0; j < COLS; j++) {
+        val[row1][j] = val[row2][j];
+    }
+}
+
+/*
+ * Window content shift down
+ * Assumes new values will be placed in top row = 0
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_pixels_down() {
+#pragma HLS inline
+
+    HLS_SIZE_T i, j;
+    for (i = ROWS - 1; i > 0; i--) {
+#pragma HLS unroll
+        for (j = 0; j < COLS; j++) {
+#pragma HLS unroll
+            val[i][j] = val[i - 1][j];
+        }
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_diagonal() {
+#pragma HLS inline off
+    assert(ROWS == COLS);
+    HLS_SIZE_T i, j;
+
+    T tmp;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        for (j = 0; j < COLS; j++) {
+#pragma HLS unroll
+            if (i < j) {
+                tmp = val[j][i];
+                val[j][i] = val[i][j];
+                val[i][j] = tmp;
+            }
+        }
+    }
+}
+
+/* Window insert pixel
+ * Inserts a new value at any location of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_pixel(T value, int row, int col) {
+#pragma HLS inline
+    assert(row >= 0 && row < ROWS && col >= 0 && col < COLS);
+
+    val[row][col] = value;
+}
+
+/* Window insert row
+ * Inserts a set of values in any row of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_row(T value[COLS], int row) {
+#pragma HLS inline
+
+    HLS_SIZE_T j;
+    for (j = 0; j < COLS; j++) {
+#pragma HLS unroll
+        val[row][j] = value[j];
+    }
+}
+
+/* Window insert top row
+ * Inserts a set of values in top row = 0 of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_top_row(T value[COLS]) {
+#pragma HLS inline
+
+    insert_row(value, 0);
+}
+
+/* Window insert bottom row
+ * Inserts a set of values in bottom row = ROWS-1 of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_bottom_row(T value[COLS]) {
+#pragma HLS inline
+
+    insert_row(value, ROWS - 1);
+}
+
+/* Window insert column
+ * Inserts a set of values in any column of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_col(T value[ROWS], int col) {
+#pragma HLS inline
+
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        val[i][col] = value[i];
+    }
+}
+
+/* Window insert left column
+ * Inserts a set of values in left column = 0 of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_left_col(T value[ROWS]) {
+#pragma HLS inline
+
+    insert_col(value, 0);
+}
+
+/* Window insert right column
+ * Inserts a set of values in right column = COLS-1 of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_right_col(T value[ROWS]) {
+#pragma HLS inline
+
+    insert_col(value, COLS - 1);
+}
+
+/* Window getval
+ * Returns the data value in the window at position (row,col)
+ */
+template <int ROWS, int COLS, typename T>
+T& Window<ROWS, COLS, T>::getval(int row, int col) {
+#pragma HLS inline
+    assert(row >= 0 && row < ROWS && col >= 0 && col < COLS);
+    return val[row][col];
+}
+
+/* Window getval
+ * Returns the data value in the window at position (row,col)
+ */
+template <int ROWS, int COLS, typename T>
+T& Window<ROWS, COLS, T>::operator()(int row, int col) {
+#pragma HLS inline
+    return getval(row, col);
+}
+
+#ifndef __SYNTHESIS__
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::restore_val() {
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+        for (j = 0; j < COLS; j++) {
+            val_t[i][j] = val[i][j];
+        }
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::window_print() {
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+        for (j = 0; j < COLS; j++) {
+            std::cout << std::setw(5) << val[i][j];
+        }
+    }
+    std::cout << "\n";
+}
+#endif
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window shift left, while contents shift right
+ * Assumes new values will be placed in left column(=COLS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_left() {
+#pragma HLS inline
+    shift_pixels_left(); // take upper-left point as origin
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window shift right, while contents shift left
+ * Assumes new values will be placed in right column(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_right() {
+#pragma HLS inline
+    shift_pixels_right(); // take upper-left point as origin
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window shift up, while contents shift down
+ * Assumes new values will be placed in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_up() {
+#pragma HLS inline
+    shift_pixels_up(); // take upper-left point as origin
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window shift down, while contents shift up
+ * Assumes new values will be placed in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::shift_down() {
+#pragma HLS inline
+    shift_pixels_down(); // take upper-left point as origin
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert
+ * Inserts a new value at any location of the window
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert(T value, int row, int col) {
+#pragma HLS inline
+    insert_pixel(value, row, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert top
+ * Inserts a set of values in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_top(T value[COLS]) {
+#pragma HLS inline
+    insert_bottom_row(value);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert bottom
+ * Inserts a set of values in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_bottom(T value[COLS]) {
+#pragma HLS inline
+    insert_top_row(value);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert left
+ * Inserts a set of values in left column(=COLS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_left(T value[ROWS]) {
+#pragma HLS inline
+    insert_right_col(value);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-right point as the origin
+ * Window insert right
+ * Inserts a set of values in right column(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void Window<ROWS, COLS, T>::insert_right(T value[ROWS]) {
+#pragma HLS inline
+    insert_left_col(value);
+}
+
+/* Template class of Line Buffer */
+template <int ROWS, int COLS, typename T, int RESHAPE = 0>
+class LineBuffer;
+
+template <int ROWS, int COLS, typename T>
+class LineBuffer<ROWS, COLS, T, 0> {
+   public:
+    LineBuffer(){
+#pragma HLS array_partition variable = val dim = 1 complete
+#pragma HLS dependence variable = val inter false
+#pragma HLS dependence variable = val intra false
+    };
+    /* LineBuffer main APIs */
+    void shift_pixels_up(int col);
+    void shift_pixels_down(int col);
+    void insert_bottom_row(T value, int col);
+    void insert_top_row(T value, int col);
+    void get_col(T value[ROWS], int col);
+    T& getval(int row, int col);
+    T& operator()(int row, int col);
+
+    /* Back compatible APIs */
+    void shift_up(int col);
+    void shift_down(int col);
+    void insert_bottom(T value, int col);
+    void insert_top(T value, int col);
+    // T& getval(int row, int col);
+    // T& operator ()(int row, int col);
+
+    T val[ROWS][COLS];
+};
+
+/* Member functions of LineBuffer class */
+/* Origin in upper-left point */
+/*       0   1            C-2 C-1
+ *     +---+---+-... ...-+---+---+
+ *  0  |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *  1  |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *       ...     ... ...    ...
+ *     +---+---+-... ...-+---+---+
+ * R-2 |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ * R-1 |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *
+ */
+
+/* Member functions of LineBuffer class */
+
+/*
+ * LineBuffer content shift down
+ * Assumes new values will be placed in top row = 0
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::shift_pixels_down(int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+    HLS_SIZE_T i;
+    for (i = ROWS - 1; i > 0; i--) {
+#pragma HLS unroll
+        val[i][col] = val[i - 1][col];
+    }
+}
+
+/*
+ * LineBuffer content shift up
+ * Assumes new values will be placed in top row = ROWS-1
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::shift_pixels_up(int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS - 1; i++) {
+#pragma HLS unroll
+        val[i][col] = val[i + 1][col];
+    }
+}
+
+/* LineBuffer insert bottom row
+ * Inserts a new value in bottom row= ROWS-1 of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::insert_bottom_row(T value, int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    val[ROWS - 1][col] = value;
+}
+
+/* LineBuffer insert top row
+ * Inserts a new value in top row=0 of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::insert_top_row(T value, int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    val[0][col] = value;
+}
+
+/* LineBuffer get a column
+ * Get a column value of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::get_col(T value[ROWS], int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        value[i] = val[i][col];
+    }
+}
+
+/* Line buffer getval
+ * Returns the data value in the line buffer at position row, col
+ */
+template <int ROWS, int COLS, typename T>
+T& LineBuffer<ROWS, COLS, T>::getval(int row, int col) {
+#pragma HLS inline
+    assert(row >= 0 && row < ROWS && col >= 0 && col < COLS);
+    return val[row][col];
+}
+
+/* Line buffer getval
+ * Returns the data value in the line buffer at position row, col
+ */
+template <int ROWS, int COLS, typename T>
+T& LineBuffer<ROWS, COLS, T>::operator()(int row, int col) {
+#pragma HLS inline
+    return getval(row, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer shift down, while contents shift up
+ * Assumes new values will be placed in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::shift_down(int col) {
+#pragma HLS inline
+    shift_pixels_down(col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer shift up, while contents shift down
+ * Assumes new values will be placed in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::shift_up(int col) {
+#pragma HLS inline
+    shift_pixels_up(col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer insert
+ * Inserts a new value in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::insert_bottom(T value, int col) {
+#pragma HLS inline
+    insert_top_row(value, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer insert
+ * Inserts a new value in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T>::insert_top(T value, int col) {
+#pragma HLS inline
+    insert_bottom_row(value, col);
+}
+
+template <int ROWS, int COLS, typename T>
+class LineBuffer<ROWS, COLS, T, 1> {
+   public:
+    LineBuffer(){
+#pragma HLS RESOURCE variable = val core = RAM_2P_URAM
+#pragma HLS ARRAY_PARTITION variable = val complete dim = 1
+#pragma HLS dependence variable = val inter false
+#pragma HLS dependence variable = val intra false
+    };
+    /* LineBuffer main APIs */
+    void shift_pixels_up(int col);
+    void shift_pixels_down(int col);
+    void insert_bottom_row(T value, int col);
+    void insert_top_row(T value, int col);
+    void get_col(T value[ROWS], int col);
+    T& getval(int row, int col);
+    T& operator()(int row, int col);
+
+    /* Back compatible APIs */
+    void shift_up(int col);
+    void shift_down(int col);
+    void insert_bottom(T value, int col);
+    void insert_top(T value, int col);
+    // T& getval(int row, int col);
+    // T& operator ()(int row, int col);
+
+    T val[ROWS][COLS];
+#ifndef __SYNTHESIS__
+    void restore_val();
+    void linebuffer_print(int col);
+    T val_t[ROWS][COLS];
+#endif
+};
+
+/* Member functions of LineBuffer_reshape class */
+
+/* Origin in upper-left point */
+/*       0   1            C-2 C-1
+ *     +---+---+-... ...-+---+---+
+ *  0  |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *  1  |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *       ...     ... ...    ...
+ *     +---+---+-... ...-+---+---+
+ * R-2 |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ * R-1 |   |   |         |   |   |
+ *     +---+---+-... ...-+---+---+
+ *
+ */
+
+/* Member functions of LineBuffer_reshape class */
+
+/*
+ * LineBuffer content shift down
+ * Assumes new values will be placed in top row = 0
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::shift_pixels_down(int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    HLS_SIZE_T i;
+    for (i = ROWS - 1; i > 0; i--) {
+#pragma HLS unroll
+        val[i][col] = val[i - 1][col];
+    }
+}
+
+/*
+ * LineBuffer content shift up
+ * Assumes new values will be placed in top row = ROWS-1
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::shift_pixels_up(int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS - 1; i++) {
+#pragma HLS unroll
+        val[i][col] = val[i + 1][col];
+    }
+}
+
+/* LineBuffer insert bottom row
+ * Inserts a new value in bottom row= ROWS-1 of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::insert_bottom_row(T value, int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    val[ROWS - 1][col] = value;
+}
+
+/* LineBuffer insert top row
+ * Inserts a new value in top row=0 of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::insert_top_row(T value, int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+
+    val[0][col] = value;
+}
+
+/* LineBuffer get a column
+ * Get a column value of the linebuffer
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::get_col(T value[ROWS], int col) {
+#pragma HLS inline
+    assert(col >= 0 && col < COLS);
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+#pragma HLS unroll
+        value[i] = val[i][col];
+    }
+}
+
+/* Line buffer getval
+ * Returns the data value in the line buffer at position row, col
+ */
+template <int ROWS, int COLS, typename T>
+T& LineBuffer<ROWS, COLS, T, 1>::getval(int row, int col) {
+#pragma HLS inline
+    assert(row >= 0 && row < ROWS && col >= 0 && col < COLS);
+    return val[row][col];
+}
+
+/* Line buffer getval
+ * Returns the data value in the line buffer at position row, col
+ */
+template <int ROWS, int COLS, typename T>
+T& LineBuffer<ROWS, COLS, T, 1>::operator()(int row, int col) {
+#pragma HLS inline
+    return getval(row, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer shift down, while contents shift up
+ * Assumes new values will be placed in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::shift_down(int col) {
+#pragma HLS inline
+    shift_pixels_down(col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer shift up, while contents shift down
+ * Assumes new values will be placed in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::shift_up(int col) {
+#pragma HLS inline
+    shift_pixels_up(col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer insert
+ * Inserts a new value in bottom row(=0)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::insert_bottom(T value, int col) {
+#pragma HLS inline
+    insert_top_row(value, col);
+}
+
+/* NOTE:
+ * Back compatible APIs, take bottom-left point as the origin
+ * LineBuffer insert
+ * Inserts a new value in top row(=ROWS-1)
+ */
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::insert_top(T value, int col) {
+#pragma HLS inline
+    insert_bottom_row(value, col);
+}
+
+#ifndef __SYNTHESIS__
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::restore_val() {
+    HLS_SIZE_T i, j;
+    for (i = 0; i < ROWS; i++) {
+        for (j = 0; j < COLS; j++) {
+            val_t[i][j] = val[i][j];
+        }
+    }
+}
+
+template <int ROWS, int COLS, typename T>
+void LineBuffer<ROWS, COLS, T, 1>::linebuffer_print(int col) {
+    HLS_SIZE_T i;
+    for (i = 0; i < ROWS; i++) {
+        std::cout << "\n";
+        std::cout << std::setw(20) << val[i][col];
+    }
+    std::cout << "\n\n";
+}
+#endif
+
+#endif //_RESIZE_MEM_HPP_
diff --git a/codec/L2/include/hw/webpEnc/vp8_AsyncConfig.h b/codec/L2/include/hw/webpEnc/vp8_AsyncConfig.h
new file mode 100644
index 0000000000..2e74da2a31
--- /dev/null
+++ b/codec/L2/include/hw/webpEnc/vp8_AsyncConfig.h
@@ -0,0 +1,42 @@
+/**********
+
+  Copyright (c) 2017, Xilinx, Inc.
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without modification,
+  are permitted provided that the following conditions are met:
+
+  1. Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+  2. Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+  3. Neither the name of the copyright holder nor the names of its contributors
+  may be used to endorse or promote products derived from this software
+  without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+  THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+**********/
+
+#ifndef _VP8_ASYNCCONFIG_
+#define _VP8_ASYNCCONFIG_
+
+// const int NPicPool = 1; // Size of picture pools
+
+const int Ninstances = WEBP_NBINSTANCES; // Nb of instances
+
+// const int NUM_BURST_PIC = 1; // Pack NUM_BURST_PIC
+
+const int NasyncDepth = 1; // Depth Ping-Pong-like buffers
+
+#endif
diff --git a/codec/L2/include/hw/webpEnc/vp8_hls_syn.h b/codec/L2/include/hw/webpEnc/vp8_hls_syn.h
new file mode 100644
index 0000000000..24ff7cd705
--- /dev/null
+++ b/codec/L2/include/hw/webpEnc/vp8_hls_syn.h
@@ -0,0 +1,616 @@
+/**********
+           Copyright (c) 2017, Xilinx, Inc.
+           All rights reserved.
+           Redistribution and use in source and binary forms, with or without modification,
+           are permitted provided that the following conditions are met:
+           1. Redistributions of source code must retain the above copyright notice,
+           this list of conditions and the following disclaimer.
+           2. Redistributions in binary form must reproduce the above copyright notice,
+           this list of conditions and the following disclaimer in the documentation
+           and/or other materials provided with the distribution.
+           3. Neither the name of the copyright holder nor the names of its contributors
+           may be used to endorse or promote products derived from this software
+           without specific prior written permission.
+           THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+           ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+           THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+           IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+           INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+           PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+           HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+           OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+           EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********/
+
+#ifndef _VP8_HLS_SYN_H_
+#define _VP8_HLS_SYN_H_
+
+#define _KEEP_PSNR_
+
+#include <hls_stream.h>
+#include <ap_int.h>
+
+#define WD_PIX (8)
+#define WD_MODE (4)
+#define WD_SUB (WD_PIX + 1)
+#define WD_DCT (12)
+#define WD_WHT (15)
+#define WD_IWHT (16)
+#define WD_Q (WD_DCT)
+#define WD_IQ (WD_DCT)
+#define WD_IQT (WD_DCT)
+#define WD_IDCT (WD_DCT)
+#define WD_SSE4 (8 + 8 + 4)
+#define WD_SSE16 (WD_SSE4 + 4)
+#define WD_TTW (6)
+#define WD_TTR (WD_TTW + WD_PIX + 3 + 1 + 1 - 1 + 4)
+#define WD_DISTO (WD_TTR + 1 - 1 - 5)
+#define MAX_WIDTH (65536 * 2)
+#define MAX_HEIGHT (65536 * 2)
+#define WD_sharpen (MY_SHARPEN_BITS)
+#define WD_QT (32)
+#define WD_q (7)
+#define WD_iq (16)
+#define WD_bias (32)
+#define WD_zthresh (32)
+#define WD_MLEVEL (32)
+#define WD_LEVEL (12)
+#define WD_LMD (12)
+#define WD_FAST (WD_LEVEL + 4)
+#define MAX_W_PIX (4096)
+#define MAX_H_PIX (4096)
+#define LG2_MAX_W_PIX (12)
+#define LG2_MAX_H_PIX (12)
+#define MAX_NUM_MB_W ((MAX_W_PIX + 15) / 16)
+#define MAX_NUM_MB_H ((MAX_H_PIX + 15) / 16)
+#define MAX_NUM_MB (MAX_NUM_MB_W * MAX_NUM_MB_H)
+#define LG2_MAX_NUM_MB_W (LG2_MAX_W_PIX - 4)
+#define LG2_MAX_NUM_MB_H (LG2_MAX_H_PIX - 4)
+#define SIZE_P_INFO (256 * 4)
+#define SIZE_P_YSRC (MAX_NUM_MB * 256 * 1)
+#define SIZE_P_USRC (MAX_NUM_MB * 64 * 1)
+#define SIZE_P_VSRC (MAX_NUM_MB * 64 * 1)
+#define SIZE_P_OUT (MAX_NUM_MB * 512 * 2)
+#define WD_RD_SCORE (40)
+
+#define MAX_DEEP_LOOP (20) // clock cycles for PRLoop;
+
+#define NUM_BURST_READ (64)
+#define LG2_NUM_BURST_READ (6)
+
+#define OFF_NUM_MB_32 (1056 / 4)
+#define OFF_PID_PROB_8BIT (1200)
+
+typedef unsigned char uint8_t;
+typedef int int32_t;
+typedef short int16_t;
+#define SB_GET(sb, line, col, wd) (sb(wd - 1 + ((line)*4 + (col)) * (wd), ((line)*4 + (col)) * (wd)))
+#define VCT_GET(vect, mi, wd) (vect(wd - 1 + (mi) * (wd), (mi) * (wd)))
+#define VCT_SET_VAL(vect, mi, wd, val) (vect(wd - 1 + (mi) * (wd), (mi) * (wd)) = val)
+#define SB_SET_VAL(sb, line, col, wd, val) SB_GET(sb, line, col, wd) = val
+#define SB_SET_COL_VAL(sb, col, wd, val) \
+    SB_SET_VAL(sb, 0, col, wd, val);     \
+    SB_SET_VAL(sb, 1, col, wd, val);     \
+    SB_SET_VAL(sb, 2, col, wd, val);     \
+    SB_SET_VAL(sb, 3, col, wd, val)
+#define SB_SET(sb, line, col, wd, val) (sb(wd - 1 + ((line)*4 + (col)) * (wd), ((line)*4 + (col)) * (wd)) = val)
+
+#define VCT_SET_COL_SB(sb, col, wd, vect)                                                          \
+    (VCT_GET(vect, 0, wd) = SB_GET(sb, 0, col, wd); VCT_GET(vect, 1, wd) = SB_GET(sb, 1, col, wd); \
+     VCT_GET(vect, 2, wd) = SB_GET(sb, 2, col, wd); VCT_GET(vect, 3, wd) = SB_GET(sb, 3, col, wd))
+
+#define SB_SET_COL_VCT(sb, col, wd, vect)                                                                \
+    (SB_SET_VAL(sb, 0, col, wd, VCT_GET(vect, 0, wd)); SB_SET_VAL(sb, 1, col, wd, VCT_GET(vect, 1, wd)); \
+     SB_SET_VAL(sb, 2, col, wd, VCT_GET(vect, 2, wd)); SB_SET_VAL(sb, 3, col, wd, VCT_GET(vect, 3, wd)))
+
+#define SB_SET_COL_SB(sb, col, wd, sbs)                                                                        \
+    (SB_SET_VAL(sb, 0, col, wd, SB_GET(sbs, 0, col, wd)); SB_SET_VAL(sb, 1, col, wd, SB_GET(sbs, 1, col, wd)); \
+     SB_SET_VAL(sb, 2, col, wd, SB_GET(sbs, 2, col, wd)); SB_SET_VAL(sb, 3, col, wd, SB_GET(sbs, 3, col, wd)))
+#define VCT_SET_VAL_ALL(vect, mii, wd, val) \
+    for (int mi = 0; mi < mii; mi++) {      \
+        VCT_SET_VAL(vect, mi, wd, val);     \
+    }
+#define VCT4_SET_LINE_SAME_VAL(vector, wd, val) (SB_SET_LINE_VAL((vector), (0), (wd), (val)))
+#define SB_SET_LINE_VAL(sb, line, wd, val) \
+    SB_SET_VAL(sb, line, 0, wd, val);      \
+    SB_SET_VAL(sb, line, 1, wd, val);      \
+    SB_SET_VAL(sb, line, 2, wd, val);      \
+    SB_SET_VAL(sb, line, 3, wd, val)
+
+#define SB_SET_LINE_VCT(sb, line, wd, val) sb(((line)*4 + 1) * (wd)-1, (line) * (wd)*4) = val((wd)*4 - 1, 0)
+#define SB_SET_LINE_SB(sb, line, wd, sbs) \
+    sb(((line)*4 + 1) * (wd)-1, (line) * (wd)*4) = sbs(((line)*4 + 1) * (wd)-1, (line) * (wd)*4)
+
+#define AP_DST(sb, x, y, wd) SB_GET(sb, y, x, wd)
+#define AP_AVG3(a, b, c, wd) (((a) + (ap_uint<wd + 2>(b) << 1) + (c) + 2) >> 2)
+#define AP_AVG2(a, b, wd) ((ap_uint<wd + 1>(a) + ap_uint<wd + 1>(b) + 1) >> 1)
+
+#define AP_TREEADD2(v0, v1, wd) ((ap_uint<wd + 1>(v0)) + (ap_uint<wd + 1>(v1)))
+#define AP_TREEADD4(v0, v1, v2, v3, wd) \
+    ((ap_uint<wd + 2> AP_TREEADD2(v0, v1, wd)) + (ap_uint<wd + 2> AP_TREEADD2(v2, v3, wd)))
+#define AP_TREEADD4_VCT(vct4, wd) \
+    (AP_TREEADD4((VCT_GET(vct4, 0, wd)), (VCT_GET(vct4, 1, wd)), (VCT_GET(vct4, 2, wd)), (VCT_GET(vct4, 3, wd)), wd))
+
+#define A44 (VCT_GET(abcd, 0, WD_PIX))
+#define B44 (VCT_GET(abcd, 1, WD_PIX))
+#define C44 (VCT_GET(abcd, 2, WD_PIX))
+#define D44 (VCT_GET(abcd, 3, WD_PIX))
+
+#define E44 (VCT_GET(efgh, 0, WD_PIX))
+#define F44 (VCT_GET(efgh, 1, WD_PIX))
+#define G44 (VCT_GET(efgh, 2, WD_PIX))
+#define H44 (VCT_GET(efgh, 3, WD_PIX))
+
+#define I44 (VCT_GET(ijkl, 0, WD_PIX))
+#define J44 (VCT_GET(ijkl, 1, WD_PIX))
+#define K44 (VCT_GET(ijkl, 2, WD_PIX))
+#define L44 (VCT_GET(ijkl, 3, WD_PIX))
+
+#define BPS 32 // this is the common stride for enc/dec
+#define DST(x, y) dst[(x) + (y)*BPS]
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+// Get pix position from Block numb, Sub-block number and pix number in sub-block
+#define SB2MB_X(Bn, Sbn) ((Bn)&2 + (Sbn) / 2)
+#define SB2MB_Y(Bn, Sbn) (((Bn) % 2) * 2 + (Sbn) % 2)
+#define SB2MB_XP(Bn, Sbn, pos) ((SB2MB_H(Bn, Sbn)) * 4 + (pos) % 4)
+#define SB2MB_YP(Bn, Sbn, pos) ((SB2MB_V(Bn, Sbn)) * 4 + (pos) / 4)
+// From pix position of MB, get Block numb, Sub-block number, and pix number in sub-block;
+#define NUM_B2SB(bx, by) ((by / 4) * 2 + (bx) / 4)
+#define NUM_MB2B(mx, my) (((my) / 8) * 2 + (mx) / 8)
+#define NUM_MB2SB(mx, my) (NUM_B2SB((mx % 8), (my % 8)))
+#define NUM_MB2PIX(mx, my) ((my % 4) * 4 + (mx % 4))
+#define NUM_B2PIX(bx, by) ((by % 4) * 4 + (bx % 4))
+#define NUM_Bn(mbn) (((mbn / 4) & 2) + (((mbn % 4) & 2) / 2))
+#define NUM_Sn(mbn) ((((mbn / 4) & 1) * 2) + ((mbn % 4) & 1))
+#define NUM_MBn2SBn(mbn) (NUM_Bn(mbn) * 4 + NUM_Sn(mbn))
+#define NUM_MBSB(n_sb) \
+    (n_sb) //((n_sb==2||n_sb==3||n_sb==10||n_sb==11)?(n_sb+2):(n_sb==4||n_sb==5||n_sb==12||n_sb==13)?(n_sb-2):n_sb)
+typedef unsigned int uint32_t;
+#define MY_MAX_LEVEL (2047)
+#define MY_SHARPEN_BITS (11)
+#define ZIGZAG(k)                                                                          \
+    k == 2 ? 4                                                                             \
+           : k == 3 ? 8                                                                    \
+                    : k == 4 ? 5                                                           \
+                             : k == 5 ? 2                                                  \
+                                      : k == 6 ? 3                                         \
+                                               : k == 7 ? 6                                \
+                                                        : k == 8 ? 9                       \
+                                                                 : k == 9 ? 12             \
+                                                                          : k == 10        \
+                                                                                ? 13       \
+                                                                                : k == 11  \
+                                                                                      ? 10 \
+                                                                                      : k == 12 ? 7 : k == 13 ? 11 : k
+#define CPY16(vc, ap, wd) \
+    for (int i = 0; i < 16; i++) vc[i] = (ap_int<wd>)VCT_GET(ap, i, wd)
+#define CPY16U(vc, ap, wd) \
+    for (int i = 0; i < 16; i++) vc[i] = (ap_uint<wd>)VCT_GET(ap, i, wd)
+#define SET16(vc, ap, wd) \
+    for (int i = 0; i < 16; i++) VCT_GET(ap, i, wd) = vc[i]
+
+typedef unsigned short int uint16_t;
+#define SIZE32_MEM_INFO (256)
+#define SIZE32_MEM_YSRC (MAX_NUM_MB_W * MAX_NUM_MB_H * 256 / 4)
+#define SIZE32_MEM_UVSRC (MAX_NUM_MB_W * MAX_NUM_MB_H * 64 / 4)
+#define SIZE32_MEM_UVSRC (MAX_NUM_MB_W * MAX_NUM_MB_H * 64 / 4)
+#define SIZE32_MEM_LEVEL (MAX_NUM_MB_W * MAX_NUM_MB_H * 512 / 2)
+#define SIZE32_AC_STATE (8)
+#define SYSTEM_MIN_COMP_RATIO (4)
+#define SYSTEM_MAX_COMP_BPP (12 / SYSTEM_MIN_COMP_RATIO) // 3
+#define WD_BUS_BYTE (4)
+#define SIZE32_MEM_BW (MAX_NUM_MB_W * MAX_NUM_MB_H * 384 / SYSTEM_MIN_COMP_RATIO / WD_BUS_BYTE)
+#define SIZE32_MEM_RET (MAX_NUM_MB_W * MAX_NUM_MB_H * 1 / WD_BUS_BYTE)
+#define SIZE32_MEM_PRED (MAX_NUM_MB_W * MAX_NUM_MB_H * 8 / WD_BUS_BYTE)
+#define SIZE8_MEM_PROB 2048 // instead of (4*8*11*3)
+#define SIZE8_MEM_BW (SIZE32_MEM_BW * 4)
+#define SIZE8_MEM_RET (SIZE32_MEM_RET * 4)
+#define SIZE8_MEM_PRED (SIZE32_MEM_PRED * 4)
+
+#define TOKEN_ID2(t, b) (((b) << 5) + (b) + ((((t) << 5) + (t)) << 3))
+
+struct hls_QMatrix {
+    ap_uint<WD_q> q_0; // quantizer steps
+    ap_uint<WD_q> q_n;
+    ap_uint<WD_iq> iq_0; // reciprocals, fixed point.
+    ap_uint<WD_iq> iq_n;
+    ap_uint<WD_bias> bias_0; // rounding bias
+    ap_uint<WD_bias> bias_n;
+};
+
+typedef struct {
+    ap_uint<WD_SSE4 + 4> d;     // 24bit
+    ap_uint<WD_DISTO + 4> sd;   // 21b   // distortion, spectral distortion
+    ap_uint<WD_FAST + 1 + 4> r; // 21=15+4+1
+    ap_uint<12> h;
+    ap_uint<25> nz;
+    ap_uint<WD_RD_SCORE + 4> score;
+    void init() {
+        d = 0;
+        sd = 0;
+        r = 0;
+        h = 0;
+        nz = 0;
+    };
+    void ca_score(ap_uint<WD_LMD> lmbda) {
+        score = (((ap_uint<WD_RD_SCORE + 4>)(d + (ap_uint<WD_SSE4 + 4>)sd)) << 8) +
+                ((ap_uint<WD_RD_SCORE + 4>)(r + h)) * lmbda;
+    };
+} str_dis;
+
+typedef struct {
+    ap_uint<25> nz;
+    ap_uint<WD_RD_SCORE + 4> score; //[16];
+    ap_uint<4> mode;
+    void init() {
+        nz = 0;
+        mode = 15;
+        score = -1;
+    };
+    void ca_score(ap_uint<WD_LMD> lmbda, str_dis* dis, ap_uint<4> m) {
+        nz = dis->nz;
+        score = (((ap_uint<WD_RD_SCORE + 4>)(dis->d + (ap_uint<WD_SSE4 + 4>)(dis->sd))) << 8) +
+                ((ap_uint<WD_RD_SCORE + 4>)(dis->r + dis->h)) * lmbda;
+        mode = m;
+    }; // ca_score
+    void ca_score2(ap_uint<WD_LMD> lmbda,
+                   ap_uint<WD_SSE4 + 4> d,
+                   ap_uint<WD_DISTO + 4> sd,
+                   ap_uint<WD_FAST + 1 + 4> r,
+                   ap_uint<12> h,
+                   ap_uint<4> m) {
+        score = (((ap_uint<WD_RD_SCORE + 4>)(d + (ap_uint<WD_SSE4 + 4>)sd)) << 8) +
+                ((ap_uint<WD_RD_SCORE + 4>)(r + h)) * lmbda;
+        mode = m;
+    }; // ca_score
+
+} str_rd;
+
+typedef struct {
+    ap_uint<25> nz;
+    ap_uint<WD_RD_SCORE + 4> score; //[16];
+    ap_uint<WD_MODE> mode[16];
+    void init() {
+        nz = 0;
+        score = 0;
+    };
+    void acc_rd(str_rd* rd_sb, ap_uint<4> n) {
+        nz |= rd_sb->nz;
+        score += rd_sb->score;
+        mode[n] = rd_sb->mode;
+    }
+} str_rd_i4;
+
+struct AllPicInfo {
+    int id_pic;         // 0
+    int cnt_line_mb;    //
+    int y_stride;       // = pic->y_stride;
+    int uv_stride;      // = pic->uv_stride
+    int width;          //      = p_info[4];  // = pic->width
+    int height;         //    = p_info[5];  // = pic->height
+    int mb_w;           // = enc->mb_w_;//
+    int mb_h;           // = enc->mb_h_;
+    int seg_lambda_p16; // = dqm->lambda_i16_;
+    int seg_lambda_p44; // = dqm->lambda_i4_;
+    int seg_tlambda;    // = dqm->tlambda_;
+    int seg_lambda_uv;  // = dqm->lambda_uv_;
+    int seg_tlambda_m;  // = dqm->lambda_mode_;//10
+    int seg_y1_q_0;     // = dqm->lambda_mode_;
+    int seg_y1_q_n;     // = dqm->lambda_mode_;
+    int seg_y1_iq_0;    // = dqm->lambda_mode_;
+    int seg_y1_iq_n;    // = dqm->lambda_mode_;
+    int seg_y1_bias_0;  // = dqm->lambda_mode_;
+    int seg_y1_bias_n;  // = dqm->lambda_mode_;//16
+    int seg_y2_q_0;     // = dqm->lambda_mode_;
+    int seg_y2_q_n;     // = dqm->lambda_mode_;
+    int seg_y2_iq_0;    // = dqm->lambda_mode_;
+    int seg_y2_iq_n;    // = dqm->lambda_mode_;
+    int seg_y2_bias_0;  // = dqm->lambda_mode_;
+    int seg_y2_bias_n;  // = dqm->lambda_mode_;//22
+    int seg_uv_q_0;     // = dqm->lambda_mode_;
+    int seg_uv_q_n;     // = dqm->lambda_mode_;
+    int seg_uv_iq_0;    // = dqm->lambda_mode_;
+    int seg_uv_iq_n;    // = dqm->lambda_mode_;
+    int seg_uv_bias_0;  // = dqm->lambda_mode_;
+    int seg_uv_bias_n;  // = dqm->lambda_mode_;//28
+    int seg_y1_sharpen[16];
+    int seg_uv_sharpen[16];
+};
+
+struct AllPicInfo_kernel {
+    ap_uint<32> id_pic;               // p_info[0];
+    ap_uint<32> mb_line;              // p_info[1];
+    ap_uint<LG2_MAX_W_PIX> y_stride;  // p_info[2];  // ;//pic->y_stride;
+    ap_uint<LG2_MAX_W_PIX> uv_stride; // p_info[3]; // ;//pic->uv_stride
+    ap_uint<LG2_MAX_W_PIX> width;     // p_info[4];  // ;//pic->width
+    ap_uint<LG2_MAX_W_PIX> height;    // p_info[5];  // ;//pic->height
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w;   // p_info[2+2+2];///;
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h;   // p_info[3+2+2];//;
+    ap_uint<WD_LMD> lambda_p16;       // p_info[4+2+2];//dqm->lambda_i16_;
+    ap_uint<WD_LMD> lambda_p44;       // p_info[5+2+2];//dqm->lambda_i4_;
+    ap_uint<WD_LMD> tlambda;          // p_info[6+2+2];//dqm->tlambda_;
+    ap_uint<WD_LMD> lambda_uv;        // p_info[7+2+2];//dqm->lambda_uv_;
+    ap_uint<WD_LMD> tlambda_m;        // p_info[8+2+2];//dqm->lambda_mode_;
+
+    hls_QMatrix hls_qm1, hls_qm2, hls_qm_uv;
+    ap_int<WD_sharpen * 16> ap_sharpen, ap_sharpen_uv;
+    void SetData(int* p_info) {
+        id_pic = p_info[0];
+        mb_line = p_info[1];
+        y_stride = p_info[2];           // = pic->y_stride;
+        uv_stride = p_info[3];          // = pic->uv_stride
+        width = p_info[4];              // = pic->width
+        height = p_info[5];             // = pic->height
+        mb_w = p_info[2 + 2 + 2];       ///;
+        mb_h = p_info[3 + 2 + 2];       //;
+        lambda_p16 = p_info[4 + 2 + 2]; // dqm->lambda_i16_;
+        lambda_p44 = p_info[5 + 2 + 2]; // dqm->lambda_i4_;
+        tlambda = p_info[6 + 2 + 2];    // dqm->tlambda_;
+        lambda_uv = p_info[7 + 2 + 2];  // dqm->lambda_uv_;
+        tlambda_m = p_info[8 + 2 + 2];  // dqm->lambda_mode_;
+
+        hls_qm1.q_0 = p_info[11 + 2];    // pm->q_[0];     // quantizer steps
+        hls_qm1.q_n = p_info[12 + 2];    // pm->q_[1];
+        hls_qm1.iq_0 = p_info[13 + 2];   // pm->iq_[0];    // reciprocals fixed point.
+        hls_qm1.iq_n = p_info[14 + 2];   // pm->iq_[1];
+        hls_qm1.bias_0 = p_info[15 + 2]; // pm->bias_[0];  // rounding bias
+        hls_qm1.bias_n = p_info[16 + 2]; // pm->bias_[1];
+
+        hls_qm2.q_0 = p_info[17 + 2];    // pm->q_[0];     // quantizer steps
+        hls_qm2.q_n = p_info[18 + 2];    // pm->q_[1];
+        hls_qm2.iq_0 = p_info[19 + 2];   // pm->iq_[0];    // reciprocals fixed point.
+        hls_qm2.iq_n = p_info[20 + 2];   // pm->iq_[1];
+        hls_qm2.bias_0 = p_info[21 + 2]; // pm->bias_[0];  // rounding bias
+        hls_qm2.bias_n = p_info[22 + 2]; // pm->bias_[1];
+
+        hls_qm_uv.q_0 = p_info[23 + 2];    // pm->q_[0];     // quantizer steps
+        hls_qm_uv.q_n = p_info[24 + 2];    // pm->q_[1];
+        hls_qm_uv.iq_0 = p_info[25 + 2];   // pm->iq_[0];    // reciprocals fixed point.
+        hls_qm_uv.iq_n = p_info[26 + 2];   // pm->iq_[1];
+        hls_qm_uv.bias_0 = p_info[27 + 2]; // pm->bias_[0];  // rounding bias
+        hls_qm_uv.bias_n = p_info[28 + 2]; // pm->bias_[1];
+    SHARPEN0:
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen, i, WD_sharpen) = p_info[29 + 2 + i];
+    SHARPEN1:
+        for (int i = 0; i < 16; i++)
+#pragma HLS UNROLL
+            VCT_GET(ap_sharpen_uv, i, WD_sharpen) = p_info[29 + 2 + 16 + i];
+    }
+};
+
+struct ap_NoneZero {
+    ap_uint<25> nz_current;
+    ap_uint<9> top_nz, left_nz;
+    ap_uint<25> line_nz[MAX_NUM_MB_W];
+    void set_nz_y(ap_uint<25> nz_y) {
+        nz_current(15, 0) = nz_y(15, 0);
+        nz_current[24] = nz_y[24];
+    };
+    void set_nz_uv(ap_uint<25> nz_uv) { nz_current(23, 16) = nz_uv(23, 16); };
+    void store_nz(ap_uint<25> nz, ap_uint<LG2_MAX_NUM_MB_W> x_mb) { line_nz[x_mb] = nz; };
+    void store_nz(ap_uint<LG2_MAX_NUM_MB_W> x_mb) { line_nz[x_mb] = nz_current; };
+    ap_uint<9> load_top9(ap_uint<LG2_MAX_NUM_MB_W> x_mb, ap_uint<LG2_MAX_NUM_MB_H> y_mb) {
+        if (y_mb == 0) return 0;
+        ap_uint<25> BIT = line_nz[x_mb];
+        top_nz[0] = BIT(12, 12);
+        top_nz[1] = BIT(13, 13);
+        top_nz[2] = BIT(14, 14);
+        top_nz[3] = BIT(15, 15);
+        // Top-U
+        top_nz[4] = BIT(18, 18);
+        top_nz[5] = BIT(19, 19);
+        // Top-V
+        top_nz[6] = BIT(22, 22);
+        top_nz[7] = BIT(23, 23);
+        // DC
+        top_nz[8] = BIT(24, 24);
+        return top_nz;
+    };
+    ap_uint<9> load_left9(ap_uint<LG2_MAX_NUM_MB_W> x_mb) {
+        // ap_uint<9> left_nz;
+        if (x_mb == 0) return 0;
+        ap_uint<25> BIT = line_nz[x_mb - 1];
+        left_nz[0] = BIT(3, 3);
+        left_nz[1] = BIT(7, 7);
+        left_nz[2] = BIT(11, 11);
+        left_nz[3] = BIT(15, 15);
+        // left-U
+        left_nz[4] = BIT(17, 17);
+        left_nz[5] = BIT(19, 19);
+        // left-V
+        left_nz[6] = BIT(21, 21);
+        left_nz[7] = BIT(23, 23);
+        return left_nz;
+    }
+};
+
+// Used for creating xclbin
+extern "C" {
+
+void kernel_IntraPredLoop2_NoOut(int32_t* p_info,    // 256
+                                 uint32_t* ysrc,     // 4096x4096//32bb
+                                 uint32_t* usrc,     // 2048x2048//32bb
+                                 uint32_t* vsrc,     // 2048x2048//32bb
+                                 int32_t* pout_level // 65536*512/2 int16_t* pout_level//65536*512
+                                 ,
+                                 uint8_t* pout_prob);
+
+void kernel_2_ArithmeticCoding(uint32_t pin_level[SIZE32_MEM_BW],
+                               uint32_t pin_prob[2048 / 4], // with some reduncency
+                               uint32_t pout_bw[SIZE32_MEM_BW],
+                               uint32_t pout_ret[SIZE32_MEM_RET],
+                               uint32_t pout_pred[SIZE32_MEM_PRED]);
+
+void kernel_IntraPredLoop2_NoOut_2(int32_t* p_info,    // 256
+                                   uint32_t* ysrc,     // 4096x4096//32bb
+                                   uint32_t* usrc,     // 2048x2048//32bb
+                                   uint32_t* vsrc,     // 2048x2048//32bb
+                                   int32_t* pout_level // 65536*512/2 int16_t* pout_level//65536*512
+                                   ,
+                                   uint8_t* pout_prob);
+
+void kernel_2_ArithmeticCoding_2(uint32_t pin_level[SIZE32_MEM_BW],
+                                 uint32_t pin_prob[2048 / 4], // with some reduncency
+                                 uint32_t pout_bw[SIZE32_MEM_BW],
+                                 uint32_t pout_ret[SIZE32_MEM_RET],
+                                 uint32_t pout_pred[SIZE32_MEM_PRED]);
+void kernel_IntraPredLoop2_NoOut_3(int32_t* p_info,    // 256
+                                   uint32_t* ysrc,     // 4096x4096//32bb
+                                   uint32_t* usrc,     // 2048x2048//32bb
+                                   uint32_t* vsrc,     // 2048x2048//32bb
+                                   int32_t* pout_level // 65536*512/2 int16_t* pout_level//65536*512
+                                   ,
+                                   uint8_t* pout_prob);
+
+void kernel_2_ArithmeticCoding_3(uint32_t pin_level[SIZE32_MEM_BW],
+                                 uint32_t pin_prob[2048 / 4], // with some reduncency
+                                 uint32_t pout_bw[SIZE32_MEM_BW],
+                                 uint32_t pout_ret[SIZE32_MEM_RET],
+                                 uint32_t pout_pred[SIZE32_MEM_PRED]);
+/////////////////////////////
+void kernel_IntraPredLoop2_NoOut_4(int32_t* p_info,    // 256
+                                   uint32_t* ysrc,     // 4096x4096//32bb
+                                   uint32_t* usrc,     // 2048x2048//32bb
+                                   uint32_t* vsrc,     // 2048x2048//32bb
+                                   int32_t* pout_level // 65536*512/2 int16_t* pout_level//65536*512
+                                   ,
+                                   uint8_t* pout_prob);
+
+void kernel_2_ArithmeticCoding_4(uint32_t pin_level[SIZE32_MEM_BW],
+                                 uint32_t pin_prob[2048 / 4], // with some reduncency
+                                 uint32_t pout_bw[SIZE32_MEM_BW],
+                                 uint32_t pout_ret[SIZE32_MEM_RET],
+                                 uint32_t pout_pred[SIZE32_MEM_PRED]);
+/////////////////////////////
+void kernel_IntraPredLoop2_NoOut_5(int32_t* p_info,    // 256
+                                   uint32_t* ysrc,     // 4096x4096//32bb
+                                   uint32_t* usrc,     // 2048x2048//32bb
+                                   uint32_t* vsrc,     // 2048x2048//32bb
+                                   int32_t* pout_level // 65536*512/2 int16_t* pout_level//65536*512
+                                   ,
+                                   uint8_t* pout_prob);
+
+void kernel_2_ArithmeticCoding_5(uint32_t pin_level[SIZE32_MEM_BW],
+                                 uint32_t pin_prob[2048 / 4], // with some reduncency
+                                 uint32_t pout_bw[SIZE32_MEM_BW],
+                                 uint32_t pout_ret[SIZE32_MEM_RET],
+                                 uint32_t pout_pred[SIZE32_MEM_PRED]);
+/////////////////////////////
+void kernel_IntraPredLoop2_NoOut_6(int32_t* p_info,    // 256
+                                   uint32_t* ysrc,     // 4096x4096//32bb
+                                   uint32_t* usrc,     // 2048x2048//32bb
+                                   uint32_t* vsrc,     // 2048x2048//32bb
+                                   int32_t* pout_level // 65536*512/2 int16_t* pout_level//65536*512
+                                   ,
+                                   uint8_t* pout_prob);
+
+void kernel_2_ArithmeticCoding_6(uint32_t pin_level[SIZE32_MEM_BW],
+                                 uint32_t pin_prob[2048 / 4], // with some reduncency
+                                 uint32_t pout_bw[SIZE32_MEM_BW],
+                                 uint32_t pout_ret[SIZE32_MEM_RET],
+                                 uint32_t pout_pred[SIZE32_MEM_PRED]);
+/////////////////////////////
+void kernel_IntraPredLoop2_NoOut_7(int32_t* p_info,    // 256
+                                   uint32_t* ysrc,     // 4096x4096//32bb
+                                   uint32_t* usrc,     // 2048x2048//32bb
+                                   uint32_t* vsrc,     // 2048x2048//32bb
+                                   int32_t* pout_level // 65536*512/2 int16_t* pout_level//65536*512
+                                   ,
+                                   uint8_t* pout_prob);
+
+void kernel_2_ArithmeticCoding_7(uint32_t pin_level[SIZE32_MEM_BW],
+                                 uint32_t pin_prob[2048 / 4], // with some reduncency
+                                 uint32_t pout_bw[SIZE32_MEM_BW],
+                                 uint32_t pout_ret[SIZE32_MEM_RET],
+                                 uint32_t pout_pred[SIZE32_MEM_PRED]);
+/////////////////////////////
+void kernel_IntraPredLoop2_NoOut_8(int32_t* p_info,    // 256
+                                   uint32_t* ysrc,     // 4096x4096//32bb
+                                   uint32_t* usrc,     // 2048x2048//32bb
+                                   uint32_t* vsrc,     // 2048x2048//32bb
+                                   int32_t* pout_level // 65536*512/2 int16_t* pout_level//65536*512
+                                   ,
+                                   uint8_t* pout_prob);
+
+void kernel_2_ArithmeticCoding_8(uint32_t pin_level[SIZE32_MEM_BW],
+                                 uint32_t pin_prob[2048 / 4], // with some reduncency
+                                 uint32_t pout_bw[SIZE32_MEM_BW],
+                                 uint32_t pout_ret[SIZE32_MEM_RET],
+                                 uint32_t pout_pred[SIZE32_MEM_PRED]);
+}
+/*
+ * //Other used for host convenience
+ */
+void set_vect_to(ap_uint<8 * 16> src, unsigned char* des, int strip);
+inline uint32_t Get_Busoffset_info_32bits() {
+    return 64;
+};
+inline uint32_t Get_Busoffset_ysrc(uint32_t y_size) {
+    return (y_size + 3) / sizeof(uint32_t);
+};
+inline uint32_t Get_Busoffset_uvsrc(uint32_t uv_size) {
+    return (uv_size + 3) / sizeof(uint32_t);
+};
+
+inline uint32_t Get_Busoffset_level(uint32_t num_mb) {
+    return num_mb * (512 * sizeof(int16_t)) / sizeof(uint32_t);
+};
+inline uint32_t Get_Busoffset_prob_32bits() {
+    return 2048 / sizeof(uint32_t);
+};
+inline uint32_t Get_Busoffset_prob_8bits() {
+    return 2048;
+};
+inline uint32_t Get_Busoffset_pout_bw(uint32_t num_mb) {
+    return num_mb * (384 / SYSTEM_MIN_COMP_RATIO / sizeof(uint32_t)) + 1000;
+};
+inline uint32_t Get_Busoffset_pout_ret(uint32_t num_mb) {
+    return (num_mb + 3) / sizeof(uint32_t);
+};
+inline uint32_t Get_Busoffset_pout_pred(uint32_t num_mb) {
+    return (num_mb * 8 + 3) / sizeof(uint32_t);
+};
+
+namespace xf {
+namespace codec {
+// For multi-instance
+/**
+ * @brief Level 2 : kernel for WebP intra prediction
+ *
+ * @tparam p_info basic information of image and compression parameters. More details can be found in function
+ * kernel_IntraPredLoop2_NoOut_core.
+ * @param ysrc the Y sample of image as input.
+ * @param usrc the U sample of image as input.
+ * @param vsrc the V sample of image as input.
+ * @param pout_level point to structures contains coefficients from a MB for output.
+ * @param pout_prob probability table for output.
+ */
+void webp_IntraPredLoop2_NoOut_1(int32_t* p_info,    // 256
+                                 uint32_t* ysrc,     // 4096x4096//32bb
+                                 uint32_t* usrc,     // 2048x2048//32bb
+                                 uint32_t* vsrc,     // 2048x2048//32bb
+                                 int32_t* pout_level // 65536*512/2 int16_t* pout_level//65536*512
+                                 ,
+                                 uint8_t* pout_prob);
+
+/**
+ * @brief Level 2 : kernel for WebP arithmetic coding
+ *
+ * @param pin_level point to structures contains coefficients from a MB as an input.
+ * @param pin_prob probability table as an input.
+ * @param pout_bw byte-stream created by arithmetic coding
+ * @param pout_ret output stream in which the element indicates the non-zero status of 6 blocks of a MB
+ * @param pout_pred output stream of prediction mode of Y.
+ */
+void webp_2_ArithmeticCoding_1(uint32_t pin_level[SIZE32_MEM_BW],
+                               uint32_t pin_prob[2048 / 4], // with some reduncency
+                               uint32_t pout_bw[SIZE32_MEM_BW],
+                               uint32_t pout_ret[SIZE32_MEM_RET],
+                               uint32_t pout_pred[SIZE32_MEM_PRED]);
+} // namespace codec
+} // namespace xf
+
+#endif
diff --git a/codec/L2/include/hw/webpEnc/vp8_hls_syn2.h b/codec/L2/include/hw/webpEnc/vp8_hls_syn2.h
new file mode 100644
index 0000000000..f25a31a65a
--- /dev/null
+++ b/codec/L2/include/hw/webpEnc/vp8_hls_syn2.h
@@ -0,0 +1,1632 @@
+/**********
+           Copyright (c) 2017, Xilinx, Inc.
+           All rights reserved.
+           Redistribution and use in source and binary forms, with or without modification,
+           are permitted provided that the following conditions are met:
+           1. Redistributions of source code must retain the above copyright notice,
+           this list of conditions and the following disclaimer.
+           2. Redistributions in binary form must reproduce the above copyright notice,
+           this list of conditions and the following disclaimer in the documentation
+           and/or other materials provided with the distribution.
+           3. Neither the name of the copyright holder nor the names of its contributors
+           may be used to endorse or promote products derived from this software
+           without specific prior written permission.
+           THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+           ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+           THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+           IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+           INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+           PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+           HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+           OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+           EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********/
+
+#ifndef _VP8_HLS_SYN2_H_
+#define _VP8_HLS_SYN2_H_
+
+#include "vp8_hls_syn.h"
+#include <hls_stream.h>
+#include <ap_int.h>
+#include <stdio.h>
+#include <string.h>
+
+enum {
+    B_DC_PRED = 0, // 4x4 modes
+    B_TM_PRED = 1,
+    B_VE_PRED = 2,
+    B_HE_PRED = 3,
+    B_RD_PRED = 4,
+    B_VR_PRED = 5,
+    B_LD_PRED = 6,
+    B_VL_PRED = 7,
+    B_HD_PRED = 8,
+    B_HU_PRED = 9,
+    NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10
+
+    // Luma16 or UV modes
+    DC_PRED = B_DC_PRED,
+    V_PRED = B_VE_PRED,
+    H_PRED = B_HE_PRED,
+    TM_PRED = B_TM_PRED,
+    B_PRED = NUM_BMODES, // refined I4x4 mode
+    NUM_PRED_MODES = 4,
+
+    // special modes
+    B_DC_PRED_NOTOP = 4,
+    B_DC_PRED_NOLEFT = 5,
+    B_DC_PRED_NOTOPLEFT = 6,
+    NUM_B_DC_MODES = 7
+};
+
+const ap_uint<12> my_VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
+    {{40, 1151, 1723, 1874, 2103, 2019, 1628, 1777, 2226, 2137},
+     {192, 469, 1296, 1308, 1849, 1794, 1781, 1703, 1713, 1522},
+     {142, 910, 762, 1684, 1849, 1576, 1460, 1305, 1801, 1657},
+     {559, 641, 1370, 421, 1182, 1569, 1612, 1725, 863, 1007},
+     {299, 1059, 1256, 1108, 636, 1068, 1581, 1883, 869, 1142},
+     {277, 1111, 707, 1362, 1089, 672, 1603, 1541, 1545, 1291},
+     {214, 781, 1609, 1303, 1632, 2229, 726, 1560, 1713, 918},
+     {152, 1037, 1046, 1759, 1983, 2174, 1358, 742, 1740, 1390},
+     {512, 1046, 1420, 753, 752, 1297, 1486, 1613, 460, 1207},
+     {424, 827, 1362, 719, 1462, 1202, 1199, 1476, 1199, 538}},
+    {{240, 402, 1134, 1491, 1659, 1505, 1517, 1555, 1979, 2099},
+     {467, 242, 960, 1232, 1714, 1620, 1834, 1570, 1676, 1391},
+     {500, 455, 463, 1507, 1699, 1282, 1564, 982, 2114, 2114},
+     {672, 643, 1372, 331, 1589, 1667, 1453, 1938, 996, 876},
+     {458, 783, 1037, 911, 738, 968, 1165, 1518, 859, 1033},
+     {504, 815, 504, 1139, 1219, 719, 1506, 1085, 1268, 1268},
+     {333, 630, 1445, 1239, 1883, 3672, 799, 1548, 1865, 598},
+     {399, 644, 746, 1342, 1856, 1350, 1493, 613, 1855, 1015},
+     {622, 749, 1205, 608, 1066, 1408, 1290, 1406, 546, 971},
+     {500, 753, 1041, 668, 1230, 1617, 1297, 1425, 1383, 523}},
+    {{394, 553, 523, 1502, 1536, 981, 1608, 1142, 1666, 2181},
+     {655, 430, 375, 1411, 1861, 1220, 1677, 1135, 1978, 1553},
+     {690, 640, 245, 1954, 2070, 1194, 1528, 982, 1972, 2232},
+     {559, 834, 741, 867, 1131, 980, 1225, 852, 1092, 784},
+     {690, 875, 516, 959, 673, 894, 1056, 1190, 1528, 1126},
+     {740, 951, 384, 1277, 1177, 492, 1579, 1155, 1846, 1513},
+     {323, 775, 1062, 1776, 3062, 1274, 813, 1188, 1372, 655},
+     {488, 971, 484, 1767, 1515, 1775, 1115, 503, 1539, 1461},
+     {740, 1006, 998, 709, 851, 1230, 1337, 788, 741, 721},
+     {522, 1073, 573, 1045, 1346, 887, 1046, 1146, 1203, 697}},
+    {{105, 864, 1442, 1009, 1934, 1840, 1519, 1920, 1673, 1579},
+     {534, 305, 1193, 683, 1388, 2164, 1802, 1894, 1264, 1170},
+     {305, 518, 877, 1108, 1426, 3215, 1425, 1064, 1320, 1242},
+     {683, 732, 1927, 257, 1493, 2048, 1858, 1552, 1055, 947},
+     {394, 814, 1024, 660, 959, 1556, 1282, 1289, 893, 1047},
+     {528, 615, 996, 940, 1201, 635, 1094, 2515, 803, 1358},
+     {347, 614, 1609, 1187, 3133, 1345, 1007, 1339, 1017, 667},
+     {218, 740, 878, 1605, 3650, 3650, 1345, 758, 1357, 1617},
+     {672, 750, 1541, 558, 1257, 1599, 1870, 2135, 402, 1087},
+     {592, 684, 1161, 430, 1092, 1497, 1475, 1489, 1095, 822}},
+    {{228, 1056, 1059, 1368, 752, 982, 1512, 1518, 987, 1782},
+     {494, 514, 818, 942, 965, 892, 1610, 1356, 1048, 1363},
+     {512, 648, 591, 1042, 761, 991, 1196, 1454, 1309, 1463},
+     {683, 749, 1043, 676, 841, 1396, 1133, 1138, 654, 939},
+     {622, 1101, 1126, 994, 361, 1077, 1203, 1318, 877, 1219},
+     {631, 1068, 857, 1650, 651, 477, 1650, 1419, 828, 1170},
+     {555, 727, 1068, 1335, 3127, 1339, 820, 1331, 1077, 429},
+     {504, 879, 624, 1398, 889, 889, 1392, 808, 891, 1406},
+     {683, 1602, 1289, 977, 578, 983, 1280, 1708, 406, 1122},
+     {399, 865, 1433, 1070, 1072, 764, 968, 1477, 1223, 678}},
+    {{333, 760, 935, 1638, 1010, 529, 1646, 1410, 1472, 2219},
+     {512, 494, 750, 1160, 1215, 610, 1870, 1868, 1628, 1169},
+     {572, 646, 492, 1934, 1208, 603, 1580, 1099, 1398, 1995},
+     {786, 789, 942, 581, 1018, 951, 1599, 1207, 731, 768},
+     {690, 1015, 672, 1078, 582, 504, 1693, 1438, 1108, 2897},
+     {768, 1267, 571, 2005, 1243, 244, 2881, 1380, 1786, 1453},
+     {452, 899, 1293, 903, 1311, 3100, 465, 1311, 1319, 813},
+     {394, 927, 942, 1103, 1358, 1104, 946, 593, 1363, 1109},
+     {559, 1005, 1007, 1016, 658, 1173, 1021, 1164, 623, 1028},
+     {564, 796, 632, 1005, 1014, 863, 2316, 1268, 938, 764}},
+    {{266, 606, 1098, 1228, 1497, 1243, 948, 1030, 1734, 1461},
+     {366, 585, 901, 1060, 1407, 1247, 876, 1134, 1620, 1054},
+     {452, 565, 542, 1729, 1479, 1479, 1016, 886, 2938, 1150},
+     {555, 1088, 1533, 950, 1354, 895, 834, 1019, 1021, 496},
+     {704, 815, 1193, 971, 973, 640, 1217, 2214, 832, 578},
+     {672, 1245, 579, 871, 875, 774, 872, 1273, 1027, 949},
+     {296, 1134, 2050, 1784, 1636, 3425, 442, 1550, 2076, 722},
+     {342, 982, 1259, 1846, 1848, 1848, 622, 568, 1847, 1052},
+     {555, 1064, 1304, 828, 746, 1343, 1075, 1329, 1078, 494},
+     {288, 1167, 1285, 1174, 1639, 1639, 833, 2254, 1304, 509}},
+    {{342, 719, 767, 1866, 1757, 1270, 1246, 550, 1746, 2151},
+     {483, 653, 694, 1509, 1459, 1410, 1218, 507, 1914, 1266},
+     {488, 757, 447, 2979, 1813, 1268, 1654, 539, 1849, 2109},
+     {522, 1097, 1085, 851, 1365, 1111, 851, 901, 961, 605},
+     {709, 716, 841, 728, 736, 945, 941, 862, 2845, 1057},
+     {512, 1323, 500, 1336, 1083, 681, 1342, 717, 1604, 1350},
+     {452, 1155, 1372, 1900, 1501, 3290, 311, 944, 1919, 922},
+     {403, 1520, 977, 2132, 1733, 3522, 1076, 276, 3335, 1547},
+     {559, 1374, 1101, 615, 673, 2462, 974, 795, 984, 984},
+     {547, 1122, 1062, 812, 1410, 951, 1140, 622, 1268, 651}},
+    {{165, 982, 1235, 938, 1334, 1366, 1659, 1578, 964, 1612},
+     {592, 422, 925, 847, 1139, 1112, 1387, 2036, 861, 1041},
+     {403, 837, 732, 770, 941, 1658, 1250, 809, 1407, 1407},
+     {896, 874, 1071, 381, 1568, 1722, 1437, 2192, 480, 1035},
+     {640, 1098, 1012, 1032, 684, 1382, 1581, 2106, 416, 865},
+     {559, 1005, 819, 914, 710, 770, 1418, 920, 838, 1435},
+     {415, 1258, 1245, 870, 1278, 3067, 770, 1021, 1287, 522},
+     {406, 990, 601, 1009, 1265, 1265, 1267, 759, 1017, 1277},
+     {968, 1182, 1329, 788, 1032, 1292, 1705, 1714, 203, 1403},
+     {732, 877, 1279, 471, 901, 1161, 1545, 1294, 755, 755}},
+    {{111, 931, 1378, 1185, 1933, 1648, 1148, 1714, 1873, 1307},
+     {406, 414, 1030, 1023, 1910, 1404, 1313, 1647, 1509, 793},
+     {342, 640, 575, 1088, 1241, 1349, 1161, 1350, 1756, 1502},
+     {559, 766, 1185, 357, 1682, 1428, 1329, 1897, 1219, 802},
+     {473, 909, 1164, 771, 719, 2508, 1427, 1432, 722, 782},
+     {342, 892, 785, 1145, 1150, 794, 1296, 1550, 973, 1057},
+     {208, 1036, 1326, 1343, 1606, 3395, 815, 1455, 1618, 712},
+     {228, 928, 890, 1046, 3499, 1711, 994, 829, 1720, 1318},
+     {768, 724, 1058, 636, 991, 1075, 1319, 1324, 616, 825},
+     {305, 1167, 1358, 899, 1587, 1587, 987, 1988, 1332, 501}}};
+
+/////hls_VP8CoeffsUpdateProba//////////////////
+static const uint8_t hls_VP8CoeffsUpdateProba[4][8][3][11] = { // 4, 8, 4(3), 8(11)}=1024 *
+    {{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+      {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+      {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255},
+      {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+       255}}}, /////////////////////////////////////////////////////////////////////
+    {{{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255},
+      {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255}},
+     {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+       255}}}, /////////////////////////////////////////////////////////////
+    {{{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255},
+      {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255},
+      {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255}},
+     {{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+       255}}}, /////////////////////////////////////////////////////////////
+    {{{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255},
+      {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255}},
+     {{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+      {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+      {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
+     {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}}};
+
+///////////hls_VP8EntropyCost//////////////
+static const ap_uint<11> hls_VP8EntropyCost[256] = {
+    1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216, 1178, 1152, 1110, 1076, 1061, 1024, 1024, 992, 968, 951,
+    939,  911,  896,  878,  871,  854,  838,  820,  811,  794,  786,  768,  768,  752,  740,  732,  720,  709, 704, 690,
+    683,  672,  666,  655,  647,  640,  631,  622,  615,  607,  598,  592,  586,  576,  572,  564,  559,  555, 547, 541,
+    534,  528,  522,  512,  512,  504,  500,  494,  488,  483,  477,  473,  467,  461,  458,  452,  448,  443, 438, 434,
+    427,  424,  419,  415,  410,  406,  403,  399,  394,  390,  384,  384,  377,  374,  370,  366,  362,  359, 355, 351,
+    347,  342,  342,  336,  333,  330,  326,  323,  320,  316,  312,  308,  305,  302,  299,  296,  293,  288, 287, 283,
+    280,  277,  274,  272,  268,  266,  262,  256,  256,  256,  251,  248,  245,  242,  240,  237,  234,  232, 228, 226,
+    223,  221,  218,  216,  214,  211,  208,  205,  203,  201,  198,  196,  192,  191,  188,  187,  183,  181, 179, 176,
+    175,  171,  171,  168,  165,  163,  160,  159,  156,  154,  152,  150,  148,  146,  144,  142,  139,  138, 135, 133,
+    131,  128,  128,  125,  123,  121,  119,  117,  115,  113,  111,  110,  107,  105,  103,  102,  100,  98,  96,  94,
+    92,   91,   89,   86,   86,   83,   82,   80,   77,   76,   74,   73,   71,   69,   67,   66,   64,   63,  61,  59,
+    57,   55,   54,   52,   51,   49,   47,   46,   44,   43,   41,   40,   38,   36,   35,   33,   32,   30,  29,  27,
+    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,   10,   9,    7,    6,    4,    3};
+
+static const ap_uint<9> VP8LevelCodes_hls[67][2] = {
+    {0x001, 0x000}, {0x007, 0x001}, {0x00f, 0x005}, {0x00f, 0x00d}, {0x033, 0x003}, {0x033, 0x003}, {0x033, 0x023},
+    {0x033, 0x023}, {0x033, 0x023}, {0x033, 0x023}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013},
+    {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+    {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+    {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+    {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x153}};
+////hls_VP8CoeffsProba0//////////////////////////
+const uint8_t hls_VP8CoeffsProba0[4][8][3][11] = {{{{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}},
+                                                   {{253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128},
+                                                    {189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128},
+                                                    {106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128}},
+                                                   {
+                                                       {1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128},
+                                                       {181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128},
+                                                       {78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128},
+                                                   },
+                                                   {
+                                                       {1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128},
+                                                       {184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128},
+                                                       {77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128},
+                                                   },
+                                                   {{1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128},
+                                                    {170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128},
+                                                    {37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128}},
+                                                   {{1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128},
+                                                    {207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128},
+                                                    {102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128}},
+                                                   {{1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128},
+                                                    {177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128},
+                                                    {80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128}},
+                                                   {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
+                                                  {{{198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62},
+                                                    {131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1},
+                                                    {68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128}},
+                                                   {{1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128},
+                                                    {184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128},
+                                                    {81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128}},
+                                                   {{1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128},
+                                                    {99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128},
+                                                    {23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128}},
+                                                   {{1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128},
+                                                    {109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128},
+                                                    {44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128}},
+                                                   {{1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128},
+                                                    {94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128},
+                                                    {22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128}},
+                                                   {{1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128},
+                                                    {124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128},
+                                                    {35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128}},
+                                                   {{1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128},
+                                                    {121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128},
+                                                    {45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128}},
+                                                   {{1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128},
+                                                    {203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+                                                    {137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128}}},
+                                                  {{{253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128},
+                                                    {175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128},
+                                                    {73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128}},
+                                                   {{1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128},
+                                                    {239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128},
+                                                    {155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128}},
+                                                   {{1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128},
+                                                    {201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128},
+                                                    {69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128}},
+                                                   {{1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128},
+                                                    {223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128},
+                                                    {141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128}},
+                                                   {{1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+                                                    {190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128},
+                                                    {149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+                                                   {{1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+                                                   {{1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128},
+                                                    {213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128},
+                                                    {55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
+                                                   {{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
+                                                  {{{202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255},
+                                                    {126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128},
+                                                    {61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128}},
+                                                   {{1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128},
+                                                    {166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128},
+                                                    {39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128}},
+                                                   {{1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128},
+                                                    {124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128},
+                                                    {24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128}},
+                                                   {{1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128},
+                                                    {149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128},
+                                                    {28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128}},
+                                                   {{1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128},
+                                                    {123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128},
+                                                    {20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128}},
+                                                   {{1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128},
+                                                    {168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128},
+                                                    {47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128}},
+                                                   {{1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128},
+                                                    {141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128},
+                                                    {42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128}},
+                                                   {{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+                                                    {238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}}}};
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//==========================      kernel_IntraPredLoop2_NoOut             ==============================//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+// kernel_IntraPredLoop2_NoOut
+//|-memcpy
+//|-TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO
+//  |-TopVp8_read_2_32bit_NoStruct
+//  | |-TopVp8_read__dataflow_32bit...
+//  |-TopVp8_compute...
+//  |-TopVp8_RecordCoeff_hls_cnt
+//  | |-FindLast
+//  | |-VP8RecordCoeffs_hls_str_w_cnt
+//  |   |-Record_str
+//  |   |-VP8EncBands_hls
+//  |-TopVp8_RecordProb_hls_cnt
+//  | |-RecordPorb_ReadCoeff_dataflow2_cnt
+//  |   |-RecordPorb_ReadCoeff_dataflow_dc_cnt
+//  |     |-RecordPorb_ReadCoeff_dataflow_ac_cnt
+//  |     | |-VP8RecordCoeffs_hls_str_r_cnt
+//  |     |-RecordPorb_ReadCoeff_dataflow_uv_cnt...
+//  |     |-RecordPorb_ReadCoeff_dataflow2_cnt...
+//  |-TopVp8_send_32bit
+void TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO(ap_uint<32> id_pic,
+                                                       ap_uint<32> mb_line,
+                                                       ap_uint<LG2_MAX_W_PIX> y_stride,
+                                                       ap_uint<LG2_MAX_W_PIX> uv_stride,
+                                                       ap_uint<LG2_MAX_W_PIX> width,
+                                                       ap_uint<LG2_MAX_W_PIX> height,
+                                                       ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                                       ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                                                       ap_uint<WD_LMD> lambda_p16,
+                                                       ap_uint<WD_LMD> lambda_p44,
+                                                       ap_uint<WD_LMD> tlambda,
+                                                       ap_uint<WD_LMD> lambda_uv,
+                                                       ap_uint<WD_LMD> tlambda_m,
+                                                       hls_QMatrix hls_qm1,
+                                                       hls_QMatrix hls_qm2,
+                                                       hls_QMatrix hls_qm_uv,
+                                                       ap_int<WD_sharpen * 16> ap_sharpen,
+                                                       ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                                       uint32_t* ysrc,
+                                                       uint32_t* usrc,
+                                                       uint32_t* vsrc,
+                                                       int32_t* pout_level,
+                                                       uint8_t* pout_prob,
+                                                       int* dirty);
+void TopVp8_top_dataflow_32bit_k1NoStruct_cnt_DeepFIFO_HideDirty(ap_uint<32> id_pic,
+                                                                 ap_uint<32> mb_line,
+                                                                 ap_uint<LG2_MAX_W_PIX> y_stride,
+                                                                 ap_uint<LG2_MAX_W_PIX> uv_stride,
+                                                                 ap_uint<LG2_MAX_W_PIX> width,
+                                                                 ap_uint<LG2_MAX_W_PIX> height,
+                                                                 ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                                                 ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                                                                 ap_uint<WD_LMD> lambda_p16,
+                                                                 ap_uint<WD_LMD> lambda_p44,
+                                                                 ap_uint<WD_LMD> tlambda,
+                                                                 ap_uint<WD_LMD> lambda_uv,
+                                                                 ap_uint<WD_LMD> tlambda_m,
+                                                                 hls_QMatrix hls_qm1,
+                                                                 hls_QMatrix hls_qm2,
+                                                                 hls_QMatrix hls_qm_uv,
+                                                                 ap_int<WD_sharpen * 16> ap_sharpen,
+                                                                 ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                                                 uint32_t* ysrc,
+                                                                 uint32_t* usrc,
+                                                                 uint32_t* vsrc,
+                                                                 int32_t* pout_level,
+                                                                 uint8_t* pout_prob);
+
+//////////======================================================================/////////////////////////////
+//////////====================  TopVp8_read_2_32bit_NoStruct  =================/////////////////////////////
+//////////======================================================================/////////////////////////////
+void TopVp8_read_2_32bit_NoStruct(
+    // input
+    uint32_t* ysrc,
+    uint32_t* usrc,
+    uint32_t* vsrc,
+    ap_uint<LG2_MAX_W_PIX> y_stride,
+    ap_uint<LG2_MAX_W_PIX> uv_stride,
+    ap_uint<LG2_MAX_W_PIX> width,
+    ap_uint<LG2_MAX_W_PIX> height,
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+    // output
+    hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+    hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv);
+
+void TopVp8_read__dataflow_32bit(
+    // input
+    ap_uint<LG2_MAX_W_PIX> y_stride,  //
+    ap_uint<LG2_MAX_W_PIX> uv_stride, //
+    ap_uint<LG2_MAX_W_PIX> width,     //
+    ap_uint<LG2_MAX_W_PIX> height,    //
+    ap_uint<LG2_MAX_NUM_MB_W> mb_w,   //
+    ap_uint<LG2_MAX_NUM_MB_H> mb_h,   //
+    uint32_t ysrc[MAX_W_PIX * MAX_H_PIX / 4],
+    uint32_t usrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+    uint32_t vsrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+    // output
+    hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+    hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv);
+
+void hls_ReadMBLine_32bit_const(uint32_t ysrc[MAX_W_PIX * MAX_H_PIX / 4],
+                                uint32_t usrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                uint32_t vsrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                int y_mb,
+                                int y_stride,
+                                int uv_stride,
+                                // output
+                                uint32_t buff_line_mb_y[MAX_W_PIX * 16 / 4], // 32bb
+                                uint32_t buff_line_mb_u[MAX_W_PIX * 4 / 4],  // 32bb
+                                uint32_t buff_line_mb_v[MAX_W_PIX * 4 / 4]   // 32bb
+                                );
+
+void hls_CopyMBLine_y_32bit_const(uint32_t ydes[MAX_W_PIX * 16 / 4],
+                                  uint32_t ysrc[MAX_W_PIX * MAX_H_PIX / 4],
+                                  int num_read);
+
+void hls_CopyMBLine_uv_32bit_const(uint32_t uvdes[MAX_W_PIX / 2 * 8 / 4],
+                                   uint32_t uvsrc[MAX_W_PIX * MAX_H_PIX / 4 / 4],
+                                   int num_read);
+
+void TopVp8_read_MB_32bit_const(ap_uint<LG2_MAX_W_PIX> width,   //      = p_info[4];  // = pic->width
+                                ap_uint<LG2_MAX_W_PIX> height,  //      = p_info[5];  // = pic->height
+                                ap_uint<LG2_MAX_NUM_MB_W> mb_w, // = p_info[2+2+2];///;
+                                ap_uint<LG2_MAX_NUM_MB_H> mb_h, // = p_info[3+2+2];//;
+                                int y_mb,
+                                uint32_t buff_line_mb_y[MAX_W_PIX * 16 / 4],
+                                uint32_t buff_line_mb_u[MAX_W_PIX * 4 / 4],
+                                uint32_t buff_line_mb_v[MAX_W_PIX * 4 / 4],
+                                // uint32_t  buff_line_mb_y2[MAX_W_PIX*16/4],
+                                // uint32_t  buff_line_mb_u2[MAX_W_PIX*4/4],
+                                // uint32_t  buff_line_mb_v2[MAX_W_PIX*4/4],
+                                int stride_y,
+                                int stride_uv,
+                                // output
+                                hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+                                hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv);
+
+void hls_GetMB_parallel_32bit_const(uint32_t ysrc_MBline[MAX_W_PIX * 16 / 4],
+                                    uint32_t usrc_MBline[MAX_W_PIX * 4 / 4],
+                                    uint32_t vsrc_MBline[MAX_W_PIX * 4 / 4],
+                                    int x_mb,
+                                    int y_mb,
+                                    int width,
+                                    int height,
+                                    int stride_y,
+                                    int stride_uv,
+                                    ap_uint<WD_PIX * 16> ap_y_in_[16],
+                                    ap_uint<WD_PIX * 16> ap_u_in_[4],
+                                    ap_uint<WD_PIX * 16> ap_v_in_[4]);
+
+void hls_GetMB_y_32bit_const(uint32_t src[MAX_W_PIX * 16 / 4],
+                             int x_mb,
+                             int y_mb,
+                             int width,
+                             int height,
+                             int stride,
+                             ap_uint<WD_PIX * 16> ap_y_in_[16]);
+
+ap_uint<32> GetEdgeImage(ap_uint<32> org, int off, bool isAllIn, bool isAllOut);
+
+void hls_GetMB_uv_32bit_const(uint32_t src[MAX_W_PIX * 4 / 4],
+                              int x_mb,
+                              int y_mb,
+                              int width,
+                              int height,
+                              int stride,
+                              ap_uint<WD_PIX * 16> ap_uv_in_[4]);
+
+ap_uint<32> get32bits_2_const(ap_uint<3> n_rem, ap_uint<32> rem, ap_uint<32> crt);
+//////////======================================================================/////////////////////////////
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+//////////======================================================================/////////////////////////////
+// TopVp8_compute_NoOut===========================================================================/
+//(Note, following names of functions may has already changed but not updated)
+//-Intraprediction_mb_syn_str2
+//--hls_LoadPre_out
+//--hls_LoadPre_mode
+//--Pickup_dataflow3
+//---Pickup_Y44
+//----hls_p4_test
+//----hls_GetCost
+//----hls_channel_p44
+//-----hls_FTransform
+//-----hls_QuantizeBlock
+//-----hls_ITransformOne
+//-----hls_SSE4X4
+//-----hls_Disto4x4
+//-----hls_fast_cost
+//---Pickup_Y16
+//----hls_channel_p16
+//-----hls_p16_test
+//-----hls_FTransform
+//-----hls_FTransformWHT
+//-----hls_QuantizeBlockWHT
+//-----hls_IFTransformWHT
+//-----hls_QuantizeBlock
+//-----hls_ITransformOne
+//-----hls_SSE4X4
+//-----hls_Disto4x4
+//-----hls_fast_cost
+//-----hls_ca_score
+//---Pickup_UV
+//----hls_p8_test
+//----hls_channel_uv_8
+//-----hls_p8_test
+//-----hls_FTransform
+//-----hls_QuantizeBlock
+//-----hls_ITransformOne
+//-----hls_fast_cost
+//-----hls_ca_score
+//--hls_SetBestAs4_mode
+
+void TopVp8_compute(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                    ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                    hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+                    hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv,
+                    ap_uint<WD_LMD> lambda_p16,
+                    ap_uint<WD_LMD> lambda_p44,
+                    ap_uint<WD_LMD> tlambda,
+                    ap_uint<WD_LMD> lambda_uv,
+                    ap_uint<WD_LMD> tlambda_m,
+                    hls_QMatrix hls_qm1,
+                    hls_QMatrix hls_qm2,
+                    hls_QMatrix hls_qm_uv,
+                    ap_int<WD_sharpen * 16> ap_sharpen,
+                    ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                    hls::stream<ap_uint<WD_PIX * 16> >* str_out,
+                    hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                    hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                    hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                    hls::stream<ap_int<64> >* str_pred,
+                    hls::stream<ap_int<6> >* str_ret);
+void TopVp8_compute_NoOut(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                          ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                          hls::stream<ap_uint<WD_PIX * 16> >* str_din_y,
+                          hls::stream<ap_uint<WD_PIX * 16> >* str_din_uv,
+                          ap_uint<WD_LMD> lambda_p16,
+                          ap_uint<WD_LMD> lambda_p44,
+                          ap_uint<WD_LMD> tlambda,
+                          ap_uint<WD_LMD> lambda_uv,
+                          ap_uint<WD_LMD> tlambda_m,
+                          hls_QMatrix hls_qm1,
+                          hls_QMatrix hls_qm2,
+                          hls_QMatrix hls_qm_uv,
+                          ap_int<WD_sharpen * 16> ap_sharpen,
+                          ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                          hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                          hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                          hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                          hls::stream<ap_int<64> >* str_pred,
+                          hls::stream<ap_int<6> >* str_ret);
+
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void Intraprediction_mb_syn_str2_widen(ap_uint<LG2_MAX_NUM_MB_W> x_mb,
+                                       ap_uint<LG2_MAX_NUM_MB_W> y_mb,
+                                       ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                       hls::stream<ap_uint<WD_PIX * 16> >* str_ap_yuv_in_y,
+                                       hls::stream<ap_uint<WD_PIX * 16> >* str_ap_yuv_in_uv,
+                                       ap_uint<WD_LMD> lambda_p16,
+                                       ap_uint<WD_LMD> lambda_p44,
+                                       ap_uint<WD_LMD> tlambda,
+                                       ap_uint<WD_LMD> lambda_uv,
+                                       ap_uint<WD_LMD> tlambda_m,
+                                       hls_QMatrix hls_qm1,
+                                       hls_QMatrix hls_qm2,
+                                       hls_QMatrix hls_qm_uv,
+                                       ap_int<WD_sharpen * 16> ap_sharpen,
+                                       ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                       hls::stream<ap_uint<WD_PIX * 16> >* str_out,
+                                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                       hls::stream<ap_int<64> >* str_pred,
+                                       hls::stream<ap_int<6> >* str_ret);
+
+void Intraprediction_mb_syn_str2_widen_NoOut(ap_uint<LG2_MAX_NUM_MB_W> x_mb,
+                                             ap_uint<LG2_MAX_NUM_MB_W> y_mb,
+                                             ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                             hls::stream<ap_uint<WD_PIX * 16> >* str_ap_yuv_in_y,
+                                             hls::stream<ap_uint<WD_PIX * 16> >* str_ap_yuv_in_uv,
+                                             ap_uint<WD_LMD> lambda_p16,
+                                             ap_uint<WD_LMD> lambda_p44,
+                                             ap_uint<WD_LMD> tlambda,
+                                             ap_uint<WD_LMD> lambda_uv,
+                                             ap_uint<WD_LMD> tlambda_m,
+                                             hls_QMatrix hls_qm1,
+                                             hls_QMatrix hls_qm2,
+                                             hls_QMatrix hls_qm_uv,
+                                             ap_int<WD_sharpen * 16> ap_sharpen,
+                                             ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                             // NoOut: hls::stream<ap_uint<WD_PIX * 16> >* str_out,
+                                             hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                             hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                             hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                             hls::stream<ap_int<64> >* str_pred,
+                                             hls::stream<ap_int<6> >* str_ret);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_LoadPre_out_widen(ap_uint<WD_PIX>* ap_y_m,
+                           ap_uint<WD_PIX>* ap_u_m,
+                           ap_uint<WD_PIX>* ap_v_m,
+                           ap_uint<WD_PIX * 4> ap_y_top_c[4],
+                           ap_uint<WD_PIX * 4> ap_y4_top_c[4],
+                           ap_uint<WD_PIX * 4> ap_uv_top_c[4],
+                           ap_uint<WD_PIX * 4>* ap_y4_topright_c,
+                           ap_uint<WD_PIX * 4> ap_y_left_c[4],
+                           ap_uint<WD_PIX * 4> ap_y4_left_c[4],
+                           ap_uint<WD_PIX * 4> ap_uv_left_c[4],
+                           ap_uint<WD_PIX * 4> ap_y_left_[4],
+                           ap_uint<WD_PIX * 4> ap_uv_left_[4],
+                           ap_uint<WD_PIX * 4> ap_y_top_[MAX_NUM_MB_W * 4],
+                           ap_uint<WD_PIX * 4> ap_uv_top_[MAX_NUM_MB_W * 4],
+                           ap_uint<LG2_MAX_NUM_MB_W> x_mb,
+                           ap_uint<LG2_MAX_NUM_MB_W> y_mb,
+                           ap_uint<LG2_MAX_NUM_MB_W> mb_w);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_LoadPre_mode_widen(ap_uint<WD_MODE> ap_y_top_mode[MAX_NUM_MB_W * 4],
+                            ap_uint<WD_MODE> ap_y_left_mode[4],
+                            ap_uint<WD_MODE> ap_y_top_c_mode[4],
+                            ap_uint<WD_MODE> ap_y4_top_c_mode[16],
+                            ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                            ap_uint<WD_MODE>* ap_y_m_mode,
+                            ap_uint<LG2_MAX_NUM_MB_W> x_mb,
+                            ap_uint<LG2_MAX_NUM_MB_W> y_mb,
+                            ap_uint<LG2_MAX_NUM_MB_W> mb_w);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<12> hls_GetCost_widen(ap_uint<4> n_sb,
+                              ap_uint<4> mode,
+                              ap_uint<WD_MODE> ap_y_top_c_mode[4], // at beginning, default is DC
+                              ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                              ap_uint<WD_MODE> local_mod);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void Pickup_dataflow3_widen(
+    // Parameters unParameters changed for one picture/segment
+    ap_uint<WD_LMD> I__tlambda,               //              :
+    ap_uint<WD_LMD> I__tlambda_m,             //
+    ap_uint<WD_LMD> I__lambda_p44,            //
+    ap_uint<WD_LMD> I__lambda_p16,            //
+    ap_uint<WD_LMD> I__lambda_uv,             //
+    hls_QMatrix I__hls_qm1,                   // y44,y16
+    hls_QMatrix I__hls_qm2,                   // y16
+    hls_QMatrix I__hls_qm_uv,                 //
+    ap_int<WD_sharpen * 16> I__ap_sharpen,    //
+    ap_int<WD_sharpen * 16> I__ap_sharpen_uv, //
+    // Parameters changed for each MB
+    ap_uint<WD_PIX * 16> I__ap_yuv_in_y44[16], //
+    ap_uint<WD_PIX * 16> I__ap_yuv_in_y16[16], //
+    ap_uint<WD_PIX * 16> I__ap_uv_in_[8],      //
+    ap_uint<1> I__istop,                       //
+    ap_uint<1> I__isleft,                      //
+    ap_uint<1> I__isright,                     //
+    // image context
+    ap_uint<WD_PIX * 4> I__ap_y_top_c_y44[4],  //
+    ap_uint<WD_PIX * 4> I__ap_y_top_c_y16[4],  //
+    ap_uint<WD_PIX * 4> I__ap_y_left_c_y44[4], //
+    ap_uint<WD_PIX * 4> I__ap_y_left_c_y16[4], //
+    ap_uint<WD_PIX * 4> I__ap_uv_top_c[4],     //
+    ap_uint<WD_PIX * 4> I__ap_uv_left_c[4],    //
+    ap_uint<WD_PIX> I__ap_y_m,                 //
+    ap_uint<WD_PIX> I__ap_u_m,                 //
+    ap_uint<WD_PIX> I__ap_v_m,                 //
+    ap_uint<WD_PIX * 4> I__ap_y4_topright_c,   //
+    // mode context
+    ap_uint<WD_MODE> I__ap_y_top_c_mode[4],  //
+    ap_uint<WD_MODE> I__ap_y_left_c_mode[4], //
+    // OUTPUT
+    ap_uint<WD_PIX * 16> O__ap_y4_out_cb[16],       //
+    ap_uint<WD_PIX * 16> O__ap_y_out_cb[2][17],     //
+    ap_uint<WD_PIX * 16> O__ap_uv_out_cb[2][17],    //
+    ap_int<WD_LEVEL * 16> O__ap_y4_level_cb[17],    //
+    ap_int<WD_LEVEL * 16> O__ap_y_level_cb[2][17],  //
+    ap_int<WD_LEVEL * 16> O__ap_y16dc_level_cb[2],  //
+    ap_int<WD_LEVEL * 16> O__ap_uv_level_cb[2][16], //
+    // str_rd_i4*              OP_rd_y4_acc,//
+    ap_uint<WD_RD_SCORE + 4>* O__score_acc,
+    ap_uint<25>* O__nz_mb,
+    str_rd O__rd_y16_cb[2],                  //
+    str_rd O__rd_uv_cb[2],                   //
+    ap_uint<WD_MODE> O_ap_y4_top_c_mode[16], //
+    ap_uint<WD_MODE>* OP_ap_y16_mode_c,      //
+    ap_uint<WD_MODE>* OP_ap_uv_mode_c,       //
+    ap_uint<1>* OP_b_uv,                     //
+    ap_uint<2>* OP_b_y                       //
+    );
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void Pickup_Y44_widen(ap_uint<1> istop,
+                      ap_uint<1> isleft,
+                      ap_uint<WD_PIX * 4> ap_y_top_c[4],
+                      ap_uint<WD_PIX * 4> ap_y_left_c[4],
+                      ap_uint<WD_MODE> ap_y_top_c_mode[4], // at beginning, default is DC
+                      ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                      // ap_uint<WD_MODE>        ap_y4_top_c_mode[16],
+                      ap_uint<WD_PIX * 4> ap_y4_topright_c,
+                      ap_uint<WD_PIX * 4> ap_y_m,
+                      // ap_uint<4>              MACRO_n_sb,
+                      ap_uint<WD_PIX * 16> ap_yuv_in[16],
+                      hls_QMatrix hls_qm1,
+                      ap_int<WD_sharpen * 16> ap_sharpen,
+                      ap_uint<WD_LMD> lambda_p44,
+                      ap_uint<WD_LMD> tlambda,
+                      ap_uint<WD_LMD> tlambda_m,
+                      ap_uint<WD_PIX * 16> ap_y4_out_mb[16],
+                      ap_int<WD_LEVEL * 16> ap_y4_level_mb[16],
+                      ap_uint<WD_RD_SCORE + 4>* score_acc,
+                      ap_uint<25>* nz_mb,
+                      ap_uint<WD_MODE> O__modes_mb[16]);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_LoadPreds4_ins(ap_uint<WD_PIX * 4> ap_y4_top_c[16],
+                        ap_uint<WD_PIX * 4> ap_y4_left_c[16],
+                        ap_uint<WD_PIX * 4> ap_y4_topright_c,
+                        ap_uint<WD_PIX * 4> ap_y_m,
+                        ap_uint<WD_PIX * 4>* abcd,
+                        ap_uint<WD_PIX * 4>* efgh,
+                        ap_uint<WD_PIX * 4>* ijkl,
+                        ap_uint<WD_PIX>* x44,
+                        ap_uint<1> isleft,
+                        ap_uint<1> istop,
+                        ap_uint<4> n_sb);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_p4_test(
+    ap_uint<WD_PIX * 4> abcd, ap_uint<WD_PIX * 4> efgh, ap_uint<WD_PIX * 4> ijkl, ap_uint<WD_PIX> x44, ap_uint<4> mode);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_channel_p44(ap_uint<4> mode_in,
+                     ap_uint<WD_PIX * 16> ap_yuv_in_sb,
+                     ap_uint<WD_PIX * 16> ap_ref_p44,
+                     hls_QMatrix hls_qm1,
+                     ap_int<WD_sharpen * 16> ap_sharpen,
+                     ap_uint<WD_LMD> lambda_p44,
+                     ap_uint<WD_LMD> tlambda,
+                     ap_uint<WD_LMD> tlambda_m,
+                     ap_uint<12> pre_dis_h,
+                     ap_uint<WD_PIX * 16>* ap_y4_out_cb_n_sb2,
+                     ap_int<WD_LEVEL * 16>* ap_y4_level_cb_n_sb2,
+                     ap_uint<WD_RD_SCORE + 4>* score_sb,
+                     ap_uint<25>* nz_sb,
+                     ap_uint<4>* mode_out);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+/* FOR SD CACULATION */
+ap_uint<WD_DISTO> hls_Disto4x4(ap_uint<WD_PIX * 16> a, ap_uint<WD_PIX * 16> b);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+/* FOR R CACULATION */
+ap_uint<WD_LEVEL + 4> hls_LV0(ap_uint<WD_LEVEL - 1> lv);
+
+ap_uint<WD_LEVEL> hls_LV1(ap_uint<WD_LEVEL - 1> lv);
+
+ap_uint<WD_LEVEL> hls_LV2(ap_uint<WD_LEVEL - 1> lv);
+
+ap_uint<WD_LEVEL> hls_LVn(ap_uint<WD_LEVEL - 1> lv);
+
+ap_uint<WD_FAST> hls_fast_cost(ap_int<WD_LEVEL * 16> vlevel, ap_uint<2> type);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+/* FOR S CACULATION */
+ap_uint<WD_SSE4> hls_SSE4X4(ap_uint<WD_PIX * 16> src, ap_uint<WD_PIX * 16> rec);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+/* NORMAL TRANSFORM */
+ap_int<WD_DCT * 16> hls_FTransform(ap_uint<WD_PIX * 16> src_ap, ap_uint<WD_PIX * 16> ref_ap);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<5> hls_QuantizeBlock(ap_int<WD_DCT * 16> in,
+                             ap_int<WD_LEVEL * 16>* out,
+                             ap_int<WD_DCT * 16>* out2,
+                             hls_QMatrix* pQM, // frequency boosters for slight sharpening
+                             ap_uint<WD_sharpen * 16> sharpen_,
+                             ap_uint<1> is16);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+
+/* QUANTITION FOR NORMAL AND DC */
+ap_uint<5> hls_QuantizeBlock_old(ap_int<WD_DCT * 16> in,
+                                 ap_int<WD_LEVEL * 16>* out,
+                                 ap_int<WD_DCT * 16>* out2,
+                                 ap_uint<WD_q> q_0, // quantizer steps
+                                 ap_uint<WD_q> q_n,
+                                 ap_uint<WD_iq> iq_0, // reciprocals, fixed point.
+                                 ap_uint<WD_iq> iq_n,
+                                 ap_uint<WD_bias> bias_0, // rounding bias
+                                 ap_uint<WD_bias> bias_n,
+                                 ap_uint<WD_sharpen * 16> sharpen_,
+                                 ap_uint<1> is16);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+/*Invers  Transforms */
+ap_uint<WD_PIX * 16> hls_ITransformOne(ap_uint<WD_PIX * 16> ap_ref, ap_int<WD_IQT * 16> ap_in);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void Pickup_Y16(ap_uint<WD_LMD> I__tlambda,            //              :
+                ap_uint<WD_LMD> I__tlambda_m,          //
+                ap_uint<WD_LMD> I__lambda_p16,         //
+                hls_QMatrix I__hls_qm1,                // y44,y16
+                hls_QMatrix I__hls_qm2,                // y16
+                ap_int<WD_sharpen * 16> I__ap_sharpen, //
+                // Parameters changed for each MB
+                ap_uint<WD_PIX * 16> I__ap_y_in_[16], //
+                ap_uint<1> I__istop,                  //
+                ap_uint<1> I__isleft,                 //
+                ap_uint<1> I__isright,                //
+                // image context
+                ap_uint<WD_PIX * 4> I__ap_y_top_c[4],  //
+                ap_uint<WD_PIX * 4> I__ap_y_left_c[4], //
+                ap_uint<WD_PIX> I__ap_y_m,             //
+                // OUTPUT
+                ap_uint<WD_PIX * 16> O__ap_y_out_cb[2][17],    //
+                ap_int<WD_LEVEL * 16> O__ap_y_level_cb[2][17], //
+                ap_int<WD_LEVEL * 16> O__ap_y16dc_level_cb[2], //
+                str_rd O__rd_y16_cb[2],                        //
+                ap_uint<WD_MODE>* OP_ap_y16_mode_c,            //
+                ap_uint<2>* OP_b_y                             //
+                );
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_RD_SCORE + 4> hls_channel_p16(ap_uint<4> mode_p16,
+                                         ap_uint<1> istop,
+                                         ap_uint<1> isleft,
+                                         ap_uint<WD_PIX * 4> ap_y_top_c[4],
+                                         ap_uint<WD_PIX * 4> ap_y_left_c[4],
+                                         ap_uint<WD_PIX> ap_y_m,
+                                         ap_uint<WD_PIX * 16> ap_yuv_in_[24],
+                                         hls_QMatrix hls_qm1,
+                                         hls_QMatrix hls_qm2,
+                                         ap_int<WD_sharpen * 16> ap_sharpen,
+                                         ap_uint<WD_LMD> tlambda,   //     = dqm->tlambda_;
+                                         ap_uint<WD_LMD> tlambda_m, //   = dqm->lambda_mode_;
+                                         ap_int<WD_LEVEL * 16> ap_y16_level_c[17],
+                                         ap_int<WD_LEVEL * 16>* ap_y16dc_level_c,
+                                         ap_uint<WD_PIX * 16> ap_y16_out_c[17],
+                                         ap_uint<25>* nz);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+/*Invers  Transforms */
+ap_int<WD_IWHT * 16> hls_ITransformWHT(ap_int<WD_WHT * 16> in);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+/* QUANTITION FOR DC */
+ap_uint<1> hls_QuantizeBlockWHT_old(ap_int<WD_WHT * 16> in,
+                                    ap_int<WD_LEVEL * 16>* out,
+                                    ap_int<WD_WHT * 16>* out2,
+                                    ap_uint<WD_q> q_0, // quantizer steps
+                                    ap_uint<WD_q> q_n,
+                                    ap_uint<WD_iq> iq_0, // reciprocals, fixed point.
+                                    ap_uint<WD_iq> iq_n,
+                                    ap_uint<WD_bias> bias_0, // rounding bias
+                                    ap_uint<WD_bias> bias_n);
+
+ap_uint<1> hls_QuantizeBlockWHT(ap_int<WD_WHT * 16> in,
+                                ap_int<WD_LEVEL * 16>* out,
+                                ap_int<WD_WHT * 16>* out2,
+                                hls_QMatrix* pQM);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_p16_test(ap_uint<2> mode,
+                                  ap_uint<4> n,
+                                  ap_uint<1> istop,
+                                  ap_uint<1> isleft,
+                                  ap_uint<WD_PIX * 4> ap_y_top_c[4],
+                                  ap_uint<WD_PIX * 4> ap_y_left_c[4],
+                                  ap_uint<WD_PIX> ap_y_m);
+
+ap_uint<WD_RD_SCORE + 4> hls_ca_score(ap_uint<WD_LMD> lmbda, str_dis* dis, ap_uint<4> m);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void Pickup_UV(
+    // Parameters unParameters changed for one picture/segment
+    ap_uint<WD_LMD> I__tlambda,               //              :
+    ap_uint<WD_LMD> I__tlambda_m,             //
+    ap_uint<WD_LMD> I__lambda_uv,             //
+    hls_QMatrix I__hls_qm_uv,                 //
+    ap_int<WD_sharpen * 16> I__ap_sharpen_uv, //
+    // Parameters changed for each MB
+    ap_uint<WD_PIX * 16> I__ap_uv_in_[8], //
+    ap_uint<1> I__istop,                  //
+    ap_uint<1> I__isleft,                 //
+    ap_uint<1> I__isright,                //
+    // image context
+    ap_uint<WD_PIX * 4> I__ap_uv_top_c[4],          //
+    ap_uint<WD_PIX * 4> I__ap_uv_left_c[4],         //
+    ap_uint<WD_PIX> I__ap_u_m,                      //
+    ap_uint<WD_PIX> I__ap_v_m,                      //
+    ap_uint<WD_PIX * 16> O__ap_uv_out_cb[2][17],    //
+    ap_int<WD_LEVEL * 16> O__ap_uv_level_cb[2][16], //
+    str_rd O__rd_uv_cb[2],                          //
+    ap_uint<WD_MODE>* OP_ap_uv_mode_c,              //
+    ap_uint<1>* OP_b_uv                             //
+    );
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_RD_SCORE + 4> hls_channel_uv_8(ap_uint<4> mode_uv,
+                                          ap_uint<1> istop,
+                                          ap_uint<1> isleft,
+                                          ap_uint<WD_PIX * 4> ap_uv_top_c[4],
+                                          ap_uint<WD_PIX * 4> ap_uv_left_c[4],
+                                          ap_uint<WD_PIX> ap_u_m,
+                                          ap_uint<WD_PIX> ap_v_m,
+                                          ap_uint<WD_PIX * 16> ap_uv_in_[8],
+                                          hls_QMatrix hls_qm_uv,
+                                          ap_int<WD_sharpen * 16> ap_sharpen_uv,
+                                          ap_uint<WD_LMD> lambda_uv, //     = dqm->tlambda_;
+                                          ap_int<WD_LEVEL * 16> ap_uv_level_c[8],
+                                          ap_uint<WD_PIX * 16> ap_uv_out_c[8],
+                                          ap_uint<25>* nz);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_SetBestAs4_mode_widen(ap_uint<WD_MODE> ap_y_top_mode[MAX_NUM_MB_W * 4],
+                               ap_uint<WD_MODE> ap_y_left_mode[4],
+                               ap_uint<WD_MODE> ap_y4_top_c_mode[16],
+                               ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                               ap_uint<WD_MODE * 16>* ap_y_mode_b,
+                               ap_uint<LG2_MAX_NUM_MB_W + 2> x_sb_w);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_SetBestAs16_mode_widen(ap_uint<WD_MODE> ap_y_top_mode[MAX_NUM_MB_W * 4],
+                                ap_uint<WD_MODE> ap_y_left_mode[4],
+                                ap_uint<WD_MODE> ap_y16_mode_c,
+                                ap_uint<WD_MODE * 16>* ap_y_mode_b,
+                                ap_uint<LG2_MAX_NUM_MB_W + 2> x_sb_w);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_StoreTopLeft_uv(ap_uint<WD_PIX * 4> ap_uv_top_[MAX_NUM_MB_W * 4],
+                         ap_uint<WD_PIX * 4> ap_uv_left_[4],
+                         ap_uint<WD_PIX * 16> ap_uv_out_cb[8],
+                         ap_uint<LG2_MAX_NUM_MB_W> x_mb);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+void hls_StoreTopLeft_y(ap_uint<WD_PIX * 4> ap_y_top_[MAX_NUM_MB_W * 4],
+                        ap_uint<WD_PIX * 4> ap_y_left_[4],
+                        ap_uint<WD_PIX * 16> ap_y_out_cb[16],
+                        ap_uint<LG2_MAX_NUM_MB_W> x_mb);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<12> hls_GetCost(ap_uint<4> n_sb,
+                        ap_uint<4> mode,
+                        ap_uint<WD_MODE> ap_y_top_c_mode[4], // at beginning, default is DC
+                        ap_uint<WD_MODE> ap_y_left_c_mode[4],
+                        ap_uint<WD_MODE> ap_y4_top_c_mode[16]);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_p8_test(ap_uint<2> mode,
+                                 ap_uint<3> n,
+                                 ap_uint<1> istop,
+                                 ap_uint<1> isleft,
+                                 ap_uint<WD_PIX * 4> ap_uv_top_c[4],
+                                 ap_uint<WD_PIX * 4> ap_uv_left_c[4],
+                                 ap_uint<WD_PIX> ap_u_m,
+                                 ap_uint<WD_PIX> ap_v_m);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_DC16_4_uv_old( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> top[2],
+    ap_uint<WD_PIX * 4> left[2],
+    ap_uint<1> istop,
+    ap_uint<1> isleft);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_TM16_4( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44,
+    ap_uint<1> istop,
+    ap_uint<1> isleft);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_VE16_4( // ref: lut:56, 3.19+1.25//lut 452vs997, 2.72+1.25ns,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<1> istop);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_HE16_4( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<1> isleft);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_int<WD_WHT * 16> hls_FTransformWHT(ap_int<WD_DCT * 16> in);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_int<WD_IWHT * 16> hls_ITransformWHT(ap_int<WD_WHT * 16> in);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_TTR> hls_TTransform(ap_uint<WD_PIX * 16> in);
+//////////=====================   TopVp8_compute_NoOut==========================/////////////////////////////
+ap_uint<WD_PIX * 16> hls_DC16_4_y( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> top0,
+    ap_uint<WD_PIX * 4> top1,
+    ap_uint<WD_PIX * 4> top2,
+    ap_uint<WD_PIX * 4> top3,
+    ap_uint<WD_PIX * 4> left0,
+    ap_uint<WD_PIX * 4> left1,
+    ap_uint<WD_PIX * 4> left2,
+    ap_uint<WD_PIX * 4> left3,
+    ap_uint<1> istop,
+    ap_uint<1> isleft);
+//////////======================================================================/////////////////////////////
+//////////====================  TopVp8_send_32bit  =================/////////////////////////////
+//////////======================================================================/////////////////////////////
+void TopVp8_send_32bit(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                       ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                       hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                       hls::stream<ap_int<64> >* str_pred,
+                       hls::stream<ap_int<6> >* str_ret,
+                       // output
+                       int32_t* pout_level);
+//////////====================  TopVp8_send_32bit  =================/////////////////////////////
+void TopVp8_send__strs_to_array(short int* pout,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                hls::stream<ap_int<64> >* str_pred,
+                                hls::stream<ap_int<6> >* str_ret);
+//////////======================================================================/////////////////////////////
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+//////////======================================================================/////////////////////////////
+void TopVp8_RecordCoeff_hls_cnt(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                hls::stream<ap_int<64> >* str_pred,
+                                hls::stream<ap_int<6> >* str_ret,
+                                // output
+                                hls::stream<ap_uint<1> >& str_mb_type,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc2,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y2,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv2,
+                                hls::stream<ap_int<64> >* str_pred2,
+                                hls::stream<ap_int<6> >* str_ret2,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                                hls::stream<ap_uint<8> >& str_cnt_dc,
+                                hls::stream<ap_uint<8> >& str_cnt_ac,
+                                hls::stream<ap_uint<8> >& str_cnt_uv);
+
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+ap_uint<9> RecordCoeff_dataflow(hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                                hls::stream<ap_int<64> >* str_pred,
+                                hls::stream<ap_int<6> >* str_ret,
+                                // output
+                                hls::stream<ap_uint<1> >& str_mb_type,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc2,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y2,
+                                hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv2,
+                                hls::stream<ap_int<64> >* str_pred2,
+                                hls::stream<ap_int<6> >* str_ret2,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                                hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                                hls::stream<ap_uint<8> >& str_cnt_dc,
+                                hls::stream<ap_uint<8> >& str_cnt_ac,
+                                hls::stream<ap_uint<8> >& str_cnt_uv,
+                                ap_uint<9>& top_nz_dc, //
+                                ap_uint<9> left_nz_dc, // = ap_left_nz;
+                                ap_uint<9>& top_nz_y,  // = ap_top_nz;
+                                ap_uint<9>& left_nz_y, // = ap_left_nz;
+                                ap_uint<9>& top_nz_uv, // = ap_top_nz;
+                                ap_uint<9>& left_nz_uv // = ap_left_nz;
+                                );
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+ap_uint<9> RecordCoeff_dataflow_dc(ap_uint<1> mb_type,
+                                   hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc,
+                                   // output
+                                   hls::stream<ap_int<WD_LEVEL * 16> >* str_level_dc2,
+                                   hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                                   hls::stream<ap_uint<8> >& str_cnt_dc,
+                                   ap_uint<9>& top_nz_dc, //
+                                   ap_uint<9> left_nz_dc  // = ap_left_nz;
+                                   );
+
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+
+void RecordCoeff_dataflow_y(ap_uint<1> mb_type,
+                            hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y,
+                            // output
+                            hls::stream<ap_int<WD_LEVEL * 16> >* str_level_y2,
+                            hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                            hls::stream<ap_uint<8> >& str_cnt_ac,
+                            ap_uint<9>& top_nz_y, // = ap_top_nz;
+                            ap_uint<9>& left_nz_y // = ap_left_nz;
+                            );
+
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+
+void RecordCoeff_dataflow_uv(hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv,
+                             // output
+                             hls::stream<ap_int<WD_LEVEL * 16> >* str_level_uv2,
+                             hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                             hls::stream<ap_uint<8> >& str_cnt_uv,
+                             ap_uint<9>& top_nz_uv, // = ap_top_nz;
+                             ap_uint<9>& left_nz_uv // = ap_left_nz;
+                             );
+
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+static ap_int<5> FindLast(ap_int<WD_LEVEL * 16> level);
+
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+int VP8RecordCoeffs_hls_str_w_cnt(ap_uint<2> ctx,
+                                  ap_int<WD_LEVEL * 16> coeffs,
+                                  ap_uint<1> first,
+                                  ap_int<5> last,
+                                  hls::stream<ap_uint<11> >& str_rec,
+                                  hls::stream<ap_uint<8> >& str_cnt);
+
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+void Record_str(hls::stream<ap_uint<11> >& str_rec,
+                ap_uint<1> isEnd,
+                ap_uint<1> bit,
+                ap_uint<3> band,
+                ap_uint<2> ctx,
+                ap_uint<4> off);
+
+//////////=====================   TopVp8_RecordCoeff_hls_cnt          ===========/////////////////////////////
+static ap_uint<3> VP8EncBands_hls(ap_uint<5> n);
+
+//////////======================================================================/////////////////////////////
+//////////============  TopVp8_RecordProb_hls_cnt                    ===========/////////////////////////////
+//////////======================================================================/////////////////////////////
+int TopVp8_RecordProb_hls_cnt(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                              ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                              hls::stream<ap_uint<1> >& str_mb_type,
+                              hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                              hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                              hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                              hls::stream<ap_uint<8> >& str_cnt_dc,
+                              hls::stream<ap_uint<8> >& str_cnt_ac,
+                              hls::stream<ap_uint<8> >& str_cnt_uv,
+                              uint8_t* pout_prob // 4, 8, 3,11
+                              );
+void TopVp8_RecordProb_hls_cnt_HideDirty(ap_uint<LG2_MAX_NUM_MB_W> mb_w,
+                                         ap_uint<LG2_MAX_NUM_MB_H> mb_h,
+                                         hls::stream<ap_uint<1> >& str_mb_type,
+                                         hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_dc,
+                                         hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_ac,
+                                         hls::stream<ap_uint<1 + 1 + 3 + 2 + 4> >& str_rec_uv,
+                                         hls::stream<ap_uint<8> >& str_cnt_dc,
+                                         hls::stream<ap_uint<8> >& str_cnt_ac,
+                                         hls::stream<ap_uint<8> >& str_cnt_uv,
+                                         uint8_t* pout_prob // 4, 8, 3,11
+                                         );
+
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+void RecordPorb_ReadCoeff_dataflow2_cnt(ap_uint<1> mb_type,
+                                        hls::stream<ap_uint<11> >& str_rec_dc,
+                                        hls::stream<ap_uint<11> >& str_rec_ac,
+                                        hls::stream<ap_uint<11> >& str_rec_uv,
+                                        hls::stream<ap_uint<8> >& str_cnt_dc,
+                                        hls::stream<ap_uint<8> >& str_cnt_ac,
+                                        hls::stream<ap_uint<8> >& str_cnt_uv,
+                                        uint32_t stats_dc[8][3][11],
+                                        uint32_t stats_ac0_dc[8][3][11],
+                                        uint32_t stats_ac3[8][3][11],
+                                        uint32_t stats_uv[8][3][11]);
+
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+////RecordPorb_ReadCoeff_dataflow_dc_cnt//////////////////////////
+void RecordPorb_ReadCoeff_dataflow_dc_cnt(ap_uint<1> mb_type,
+                                          hls::stream<ap_uint<11> >& str_rec_dc,
+                                          hls::stream<ap_uint<8> >& str_cnt,
+                                          uint32_t stats_dc[8][3][11]);
+
+////RecordPorb_ReadCoeff_dataflow_ac_cnt//////////////////////////
+void RecordPorb_ReadCoeff_dataflow_ac_cnt(ap_uint<1> mb_type,
+                                          hls::stream<ap_uint<11> >& str_rec_ac,
+                                          hls::stream<ap_uint<8> >& str_cnt,
+                                          uint32_t stats_ac0_dc[8][3][11],
+                                          uint32_t stats_ac3[8][3][11]);
+
+////RecordPorb_ReadCoeff_dataflow_uv_cnt//////////////////////////
+void RecordPorb_ReadCoeff_dataflow_uv_cnt(hls::stream<ap_uint<11> >& str_rec_uv,
+                                          hls::stream<ap_uint<8> >& str_cnt,
+                                          uint32_t stats_uv[8][3][11]);
+
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+
+//////VP8RecordCoeffs_hls_str_r_cnt//////////////////
+void VP8RecordCoeffs_hls_str_r_cnt(hls::stream<ap_uint<11> >& str_rec,
+                                   hls::stream<ap_uint<8> >& str_cnt,
+                                   uint32_t stats[8][3][11]);
+
+void VP8RecordCoeffs_hls_str_r_cnt_old(hls::stream<ap_uint<11> >& str_rec,
+                                       hls::stream<ap_uint<8> >& str_cnt,
+                                       uint32_t stats[8][3][11]);
+
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+ap_uint<32> Record_hls(ap_uint<1> bit, ap_uint<32> p);
+
+//////////============  TopVp8_RecordProb_hls_cnt_HideDirty          ===========/////////////////////////////
+int FinalizeTokenProbas_hls(uint32_t p_stats[4][8][3][11], uint8_t p_coeffs_[4][8][3][11], int* dirty);
+/* 1 */
+ap_uint<WD_PIX * 16> hls_DC4( // ref:581  lut 56, 4.46+1.25 ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl);
+
+/* 2 */
+ap_uint<WD_PIX * 16> hls_VE4( // ref: lut:56, 3.19+1.25//lut 452vs997, 2.72+1.25ns,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> efgh,
+    ap_uint<WD_PIX> X44);
+
+/* 3 */
+ap_uint<WD_PIX * 16> hls_HE4( // ref: lut:56, 3.19+1.25
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44);
+
+/* 4 */
+ap_uint<WD_PIX * 16> hls_RD4( // ref: lut:98  3.19+1.25, ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44);
+
+/* 5 */
+ap_uint<WD_PIX * 16> hls_LD4( // ref: lut:98  3.19+1.25  , ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> efgh);
+
+/* 6 */
+ap_uint<WD_PIX * 16> hls_VR4( // ref: lut: 100  3.19+1.25 , ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44);
+
+/* 7 */
+ap_uint<WD_PIX * 16> hls_VL4( // ref: lut: 100 3.19+1.25 , ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> efgh);
+
+/* 8 */
+ap_uint<WD_PIX * 16> hls_HU4( // ref: lut 54 3.19+1.25 , ,
+    ap_uint<WD_PIX * 4> ijkl);
+
+/* 9 */
+ap_uint<WD_PIX * 16> hls_HD4( // ref:544 vs lut:100 ,3.19+1.25 ,
+    ap_uint<WD_PIX * 4> abcd,
+    ap_uint<WD_PIX * 4> ijkl,
+    ap_uint<WD_PIX> X44);
+
+/* 10 */
+ap_uint<WD_PIX * 16> hls_TM4(ap_uint<WD_PIX * 4> abcd, ap_uint<WD_PIX * 4> ijkl, ap_uint<WD_PIX> X44);
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//==================================kernel_2_ArithmeticCoding===========================================//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+// kernel_2_ArithmeticCoding
+//|-memcpy
+//|-Kernel2_top_read
+//|-kernel_2_RecordTokens_pre
+//|-kernel_2_CreateTokens_with_isFinal
+//|-VP8EmitTokens_str_hls_4stages
+//|-PackStr2Mem32_t_NoLast
+//|-PackWideStr2Mem32_t_NoLast
+//==================================kernel_2_ArithmeticCoding===========================================//
+void Kernel2_top_read(uint32_t pin_level[SIZE32_MEM_LEVEL],
+                      // output
+                      hls::stream<ap_int<WD_LEVEL * 16> >& str_level_dc,
+                      hls::stream<ap_int<WD_LEVEL * 16> >& str_level_ac,
+                      hls::stream<ap_int<WD_LEVEL * 16> >& str_level_uv,
+                      hls::stream<ap_uint<64> >& str_pred,
+                      hls::stream<ap_uint<6> >& str_ret,
+                      hls::stream<ap_uint<1> >& str_type_mb,
+                      hls::stream<uint16_t>& str_mb_h,
+                      hls::stream<uint16_t>& str_mb_w);
+//==================================kernel_2_ArithmeticCoding===========================================//
+void Kernel2_read__array_to_str(uint32_t pin[256],
+                                hls::stream<ap_int<WD_LEVEL * 16> >& str_level_dc,
+                                hls::stream<ap_int<WD_LEVEL * 16> >& str_level_ac,
+                                hls::stream<ap_int<WD_LEVEL * 16> >& str_level_uv,
+                                hls::stream<ap_uint<64> >& str_pred,
+                                hls::stream<ap_uint<6> >& str_ret,
+                                hls::stream<ap_uint<1> >& str_type_mb);
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_int<WD_LEVEL * 16> SetVectFrom32bit(uint32_t* pin);
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<4 * 16> SetVect64From32bit(uint32_t* pin);
+//==================================kernel_2_ArithmeticCoding===========================================//
+void kernel_2_RecordTokens_pre(hls::stream<uint16_t>& str_mb_h,
+                               hls::stream<uint16_t>& str_mb_w,
+                               hls::stream<ap_uint<1> >& str_type_mb,
+                               hls::stream<ap_int<WD_LEVEL * 16> >& str_level_dc,
+                               hls::stream<ap_int<WD_LEVEL * 16> >& str_level_ac,
+                               hls::stream<ap_int<WD_LEVEL * 16> >& str_level_uv,
+                               hls::stream<ap_uint<64> >& str_0_dc,
+                               hls::stream<ap_uint<64> >& str_1_dc,
+                               hls::stream<ap_uint<64> >& str_2_dc,
+                               hls::stream<ap_uint<64> >& str_3_dc,
+                               hls::stream<ap_uint<64> >& str_0_ac,
+                               hls::stream<ap_uint<64> >& str_1_ac,
+                               hls::stream<ap_uint<64> >& str_2_ac,
+                               hls::stream<ap_uint<64> >& str_3_ac,
+                               hls::stream<ap_uint<64> >& str_0_uv,
+                               hls::stream<ap_uint<64> >& str_1_uv,
+                               hls::stream<ap_uint<64> >& str_2_uv,
+                               hls::stream<ap_uint<64> >& str_3_uv,
+                               hls::stream<uint16_t>& str_mb_h_out,
+                               hls::stream<uint16_t>& str_mb_w_out,
+                               hls::stream<ap_uint<1> >& str_type_mb_out);
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+/////RecordTokens_nrd2_mb_w////////////////////////////////
+void RecordTokens_nrd2_mb_w(ap_NoneZero* ap_nz,
+                            int x_,
+                            int y_,
+                            hls::stream<ap_uint<1> >& str_type_mb,
+                            hls::stream<ap_int<WD_LEVEL * 16> >& str_level_dc,
+                            hls::stream<ap_int<WD_LEVEL * 16> >& str_level_ac,
+                            hls::stream<ap_int<WD_LEVEL * 16> >& str_level_uv,
+                            hls::stream<ap_uint<64> >& str_0_dc,
+                            hls::stream<ap_uint<64> >& str_1_dc,
+                            hls::stream<ap_uint<64> >& str_2_dc,
+                            hls::stream<ap_uint<64> >& str_3_dc,
+                            hls::stream<ap_uint<64> >& str_0_ac,
+                            hls::stream<ap_uint<64> >& str_1_ac,
+                            hls::stream<ap_uint<64> >& str_2_ac,
+                            hls::stream<ap_uint<64> >& str_3_ac,
+                            hls::stream<ap_uint<64> >& str_0_uv,
+                            hls::stream<ap_uint<64> >& str_1_uv,
+                            hls::stream<ap_uint<64> >& str_2_uv,
+                            hls::stream<ap_uint<64> >& str_3_uv,
+                            hls::stream<ap_uint<1> >& str_type_mb_out);
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+void VP8RecordCoeffTokens_hls_w(ap_uint<2> ctx,
+                                ap_uint<2> coeff_type,
+                                ap_int<5> last,
+                                ap_int<WD_LEVEL * 16> coeffs,
+                                hls::stream<ap_uint<64> >& str_0,
+                                hls::stream<ap_uint<64> >& str_1,
+                                hls::stream<ap_uint<64> >& str_2,
+                                hls::stream<ap_uint<64> >& str_3);
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+void PackToken_hls(ap_uint<64>& w, ap_uint<2> be, uint32_t bit, uint32_t proba_idx);
+void PackConstantToken_hls(ap_uint<64>& w, ap_uint<2> be, uint32_t bit, uint32_t proba_idx);
+/////TokensStr0_hls////////////////////////
+void TokensStr0_hls(ap_uint<2> ctx, ap_uint<2> coeff_type, ap_int<5> last, hls::stream<ap_uint<64> >& str_0);
+
+/////TokensStr1_hls//////////////////////////////////////
+
+void TokensStr1_hls(ap_uint<1> isV_N0, // = v!=0,
+                    ap_uint<1> isV_B1, // = v>1
+                    ap_uint<1> sign,
+                    ap_uint<1> isLastBEi, // = i<last,
+                    ap_uint<11> base_id,
+                    ap_uint<11> base_id_next,
+                    ap_uint<11> v,
+                    hls::stream<ap_uint<64> >& str_1);
+
+/////TokensStr2_hls/////////////////////////////////////////
+void TokensStr2_hls(ap_uint<1> isV_B4,  // = v>4;
+                    ap_uint<1> isV_N2,  // = v!=2;
+                    ap_uint<1> isV_4,   // = v==4;
+                    ap_uint<1> isV_B10, // = v>10;
+                    ap_uint<11> base_id,
+                    hls::stream<ap_uint<64> >& str_2);
+
+/////TokensStr3_hls//////////////////////////////////////
+void TokensStr3_hls(ap_uint<1> isV_B6,   // = v>6;
+                    ap_uint<1> isV_6,    // = v==6;
+                    ap_uint<1> isV_BE9,  // = v>=9;
+                    ap_uint<1> isV_even, // = 1-v&1;//!(v & 1)
+                    ap_uint<11> base_id,
+                    hls::stream<ap_uint<64> >& str_3);
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+void kernel_2_CreateTokens_with_isFinal(hls::stream<uint16_t>& str_mb_h,
+                                        hls::stream<uint16_t>& str_mb_w,
+                                        hls::stream<ap_uint<1> >& str_type_mb,
+                                        hls::stream<ap_uint<64> >& str_0_dc,
+                                        hls::stream<ap_uint<64> >& str_1_dc,
+                                        hls::stream<ap_uint<64> >& str_2_dc,
+                                        hls::stream<ap_uint<64> >& str_3_dc,
+                                        hls::stream<ap_uint<64> >& str_0_ac,
+                                        hls::stream<ap_uint<64> >& str_1_ac,
+                                        hls::stream<ap_uint<64> >& str_2_ac,
+                                        hls::stream<ap_uint<64> >& str_3_ac,
+                                        hls::stream<ap_uint<64> >& str_0_uv,
+                                        hls::stream<ap_uint<64> >& str_1_uv,
+                                        hls::stream<ap_uint<64> >& str_2_uv,
+                                        hls::stream<ap_uint<64> >& str_3_uv,
+                                        hls::stream<uint16_t>& str_mb_h_out,
+                                        hls::stream<uint16_t>& str_mb_w_out,
+                                        hls::stream<ap_uint<16> >& str_tokens_final);
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+void RecordTokens_nrd2_mb_r_str_AddFinal(hls::stream<ap_uint<1> >& str_type_mb,
+                                         hls::stream<ap_uint<64> >& str_0_dc,
+                                         hls::stream<ap_uint<64> >& str_1_dc,
+                                         hls::stream<ap_uint<64> >& str_2_dc,
+                                         hls::stream<ap_uint<64> >& str_3_dc,
+                                         hls::stream<ap_uint<64> >& str_0_ac,
+                                         hls::stream<ap_uint<64> >& str_1_ac,
+                                         hls::stream<ap_uint<64> >& str_2_ac,
+                                         hls::stream<ap_uint<64> >& str_3_ac,
+                                         hls::stream<ap_uint<64> >& str_0_uv,
+                                         hls::stream<ap_uint<64> >& str_1_uv,
+                                         hls::stream<ap_uint<64> >& str_2_uv,
+                                         hls::stream<ap_uint<64> >& str_3_uv,
+                                         hls::stream<ap_uint<16> >& tokens,
+                                         bool isFinal);
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+int VP8RecordCoeffTokens_hls_r_str_AddFanel(hls::stream<ap_uint<64> >& str_0,
+                                            hls::stream<ap_uint<64> >& str_1,
+                                            hls::stream<ap_uint<64> >& str_2,
+                                            hls::stream<ap_uint<64> >& str_3,
+                                            hls::stream<ap_uint<16> >& tokens,
+                                            bool isFinal);
+
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<1> AddConstantToken_hls_AddFanel(hls::stream<ap_uint<16> >& str_tokens,
+                                         ap_uint<1> bit,
+                                         ap_uint<11> proba_idx,
+                                         ap_uint<1> isFinal);
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<1> AddToken_hls_AddFanel(hls::stream<ap_uint<16> >& str_tokens,
+                                 ap_uint<1> bit,
+                                 ap_uint<11> proba_idx,
+                                 ap_uint<1> isFinal);
+//==================================kernel_2_ArithmeticCoding===========================================//
+//==================================kernel_2_ArithmeticCoding===========================================//
+void VP8EmitTokens_allstr_hls_dataflow_4stages(uint32_t pout_bw[SIZE32_MEM_BW],
+                                               hls::stream<ap_uint<16> >& str_token,
+                                               uint8_t probas[4 * 8 * 3 * 11],
+                                               ap_uint<8>& bw_range,  // = 254;      // range-1
+                                               ap_uint<24>& bw_value, //= 0;
+                                               ap_int<4>& bw_nb_bits, // = -8;
+                                               ap_uint<32>& bw_pos,   //= 0
+                                               ap_uint<16>& bw_run);
+//==================================kernel_2_ArithmeticCoding===========================================//
+void VP8EmitTokens_str_hls_4stages(uint32_t pout_bw[SIZE32_MEM_BW],
+                                   hls::stream<ap_uint<16> >& str_token,
+                                   uint8_t probas[4 * 8 * 3 * 11]);
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<8> hls_AC_range_str(hls::stream<ap_uint<16> >& str_token,
+                            uint8_t probas[4 * 8 * 3 * 11],
+                            hls::stream<ap_uint<2 + 3 + 8> >& str_fnl_bit_shift_split_1);
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<4 + 24> hls_AC_value_str(hls::stream<ap_uint<2 + 3 + 8> >& str_fnl_bit_shift_split_1,
+                                 hls::stream<ap_uint<18> >& str_fnl_isBit_Bits);
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<16> VP8PutBit_hls_BytePackage_str_run(hls::stream<ap_uint<18> >& str_Last_isBit_Bits,
+                                              hls::stream<ap_uint<26> >& str_isFinal_run_cy_pre);
+//==================================kernel_2_ArithmeticCoding===========================================//
+ap_uint<32> VP8PutBit_hls_BytePackage_str_pos(hls::stream<ap_uint<26> >& str_isFinal_run_cy_pre,
+                                              hls::stream<ap_uint<9> >& str_Last_byte);
+
+/////template PackStr2Mem_t////////////////////////
+template <int W_STR, int B_LAST, int N_BURST>
+int PackStr2Mem_t(uint32_t* pdes, hls::stream<ap_uint<W_STR> >& str_s) {
+    const int N_BYTE = ((W_STR - 1 + 7) / 8);
+    const int N_PACK = (4 / N_BYTE);
+    uint32_t* ptmp = pdes;
+    int num_w = 0;
+    ap_uint<1> isLast = 0;
+    uint32_t buff[512];
+#ifndef __SYNTHESIS__
+    assert(N_BURST <= 512);
+#endif
+    ap_uint<N_BYTE * 8 * N_PACK> tmp;
+    // hls::stream<ap_uint<1>> str_last_buff;
+    do {
+    PACK_BURST:
+        for (int i = 0; i < N_BURST * N_PACK; i++) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<2> bs = i % N_PACK;
+            if (isLast == 0) {
+                ap_uint<W_STR> w = str_s.read();
+                isLast = w(W_STR - 1, W_STR - 1);
+                tmp(bs * N_BYTE * 8 + W_STR - 2, bs * N_BYTE * 8) = w(W_STR - 2, 0);
+                num_w++;
+            } else
+                tmp(bs * N_BYTE * 8 + W_STR - 2, bs * N_BYTE * 8) = 0;
+            if (bs == N_PACK - 1) {
+                buff[i / N_PACK] = tmp;
+                // str_last_buff.write(isLast);
+            }
+        }
+    // memcpy((void*)ptmp, (void*)buff, N_BURST*N_PACK);
+    MEMCPY_N_BURST:
+        for (int j = 0; j < N_BURST; j++)
+#pragma HLS PIPELINE II = 1
+            ptmp[j] = buff[j];
+        ptmp += N_BURST;
+    } while (isLast == 0);
+    return num_w;
+}
+//////template PackStr2Mem32_t_NoLast////////////////////////
+template <int W_STR, int N_BURST>
+int PackStr2Mem32_t_NoLast(uint32_t* pdes, hls::stream<ap_uint<W_STR> >& str_s, int num_str) {
+    const int N_BYTE = ((W_STR - 1 + 7) / 8);
+    const int N_PACK = (4 / N_BYTE);
+    uint32_t* ptmp = pdes;
+    int num_w = 0;
+    ap_uint<1> isLast = 0;
+    uint32_t buff[512];
+#ifndef __SYNTHESIS__
+    assert(N_BURST <= 512);
+#endif
+    ap_uint<N_BYTE * 8 * N_PACK> tmp;
+    // hls::stream<ap_uint<1>> str_last_buff;
+    do {
+    PACK_BURST_NOLAST:
+        for (int i = 0; i < N_BURST * N_PACK; i++) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<2> bs = i % N_PACK;
+            if (isLast == 0) {
+                ap_uint<W_STR> w = str_s.read();
+                if (num_w == num_str - 1) isLast = 1;
+                tmp(bs * N_BYTE * 8 + W_STR - 1, bs * N_BYTE * 8) = w(W_STR - 1, 0);
+                num_w++;
+            } else
+                tmp(bs * N_BYTE * 8 + W_STR - 1, bs * N_BYTE * 8) = 0;
+            if (bs == N_PACK - 1) {
+                buff[i / N_PACK] = tmp;
+            }
+        }
+    N_BURST_NOLAST:
+        for (int j = 0; j < N_BURST; j++)
+#pragma HLS PIPELINE II = 1
+            ptmp[j] = buff[j];
+        ptmp += N_BURST;
+    } while (isLast == 0);
+    return num_w;
+}
+//////template PackWideStr2Mem32_t_NoLast////////////////////////
+template <int W_STR, int W_BURST>
+void PackWideStr2Mem32_t_NoLast(uint32_t* pdes, hls::stream<ap_uint<64> >& str_w, int num_str) {
+    uint32_t buff[512];
+    const int NUM_B32 = (W_STR + 31) >> 5; // 8*4/8 = 4;
+#ifndef __SYNTHESIS__
+    assert(W_BURST * NUM_B32 <= 512);
+#endif
+
+PACK_WIDE_NOLAST:
+    for (int num_w = 0; num_w < num_str;) {
+        uint16_t offset = 0;
+    W_BURST:
+        for (int n_wb = 0; n_wb < W_BURST; n_wb++) {
+#pragma HLS PIPELINE II = 1
+            ap_uint<W_STR> w;
+            if (num_w < num_str) {
+                w = str_w.read();
+                num_w++;
+            } else
+                w = 0;
+
+        SHIFT_LEFT_32:
+            for (int n_b32 = 0; n_b32 < NUM_B32; n_b32++)
+#pragma HLS PIPELINE II = 1
+                buff[offset++] = w((n_b32 << 5) + 31, n_b32 << 5);
+        } // n_wb
+        memcpy(pdes, buff, offset * WD_BUS_BYTE);
+        pdes += offset;
+    } // num_w
+}
+
+#endif
diff --git a/codec/L2/meta/api.json b/codec/L2/meta/api.json
index d64036fd83..e4cd8527f7 100644
--- a/codec/L2/meta/api.json
+++ b/codec/L2/meta/api.json
@@ -1,6 +1,6 @@
 {
 	"schema": "vitis_libraries_api_list_schema-1.0",
-	"api_list": [
+		"api_list": [
 		{
 			"api_name": "xf::codec::kernelJpegDecoderTop",
 			"spec": {
@@ -18,28 +18,899 @@
 				"instance": "function",
 				"parameters": [],
 				"ports": [
-					{
-						"name": "jpeg_pointer",
-						"direction": "",
-						"type": "ap_uint< (16) > *"
-					},
-					{
-						"name": "size",
-						"direction": "",
-						"type": "const int"
-					},
-					{
-						"name": "yuv_mcu_pointer",
-						"direction": "",
-						"type": "ap_uint< 64 > *"
-					},
-					{
-						"name": "info",
-						"direction": "",
-						"type": "ap_uint< 32 > *"
-					}
+				{
+					"name": "jpeg_pointer",
+					"direction": "",
+					"type": "ap_uint< (16) > *"
+				},
+				{
+					"name": "size",
+					"direction": "",
+					"type": "const int"
+				},
+				{
+					"name": "yuv_mcu_pointer",
+					"direction": "",
+					"type": "ap_uint< 64 > *"
+				},
+				{
+					"name": "info",
+					"direction": "",
+					"type": "ap_uint< 32 > *"
+				}
+				]
+			}
+		},
+		{
+			"api_name": "hls_ANSclusterHistogram",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "hls_ANSclusterHistogram",
+				"display_name": "hls_ANSclusterHistogram",
+				"brief": "JXL ANS cluster Histogram kernel",
+				"target_domain": "PL",
+				"header_file_name": [
+					"hls_cluster_histogram.hpp"
+				],
+				"search_paths": [
+					"L2/demos/jxlEnc/acc_cluster_histogram/kernel"
+				],
+				"instance": "function",
+				"parameters": [],
+				"ports": [
+				{
+					"name": "config",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms0_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_totalcnt0_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histo_size0_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty_histo0_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "ctx_map0_ptr",
+					"direction": "",
+					"type": "uint8_t*"
+				},
+				{
+					"name": "histograms_clusd0_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_size_clusd0_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histo_clusdin0_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms1_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_totalcnt1_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histo_size1_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty_histo1_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "ctx_map1_ptr",
+					"direction": "",
+					"type": "uint8_t*"
+				},
+				{
+					"name": "histograms_clusd1_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_size_clusd1_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms_clusdin1_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histograms2_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_totalcnt2_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_size2_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty_histo2_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "ctx_map2_ptr",
+					"direction": "",
+					"type": "uint8_t*"
+				},
+				{
+					"name": "histograms_clusd2_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_size_clusd2_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms_clusdin2_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histograms3_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_totalcnt3_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histo_size3_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty_histo3_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "ctx_map3_ptr",
+					"direction": "",
+					"type": "uint8_t*"
+				},
+				{
+					"name": "histograms_clusd3_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_size_clusd3_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms_clusdin3_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histograms4_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_totalcnt4_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histo_size4_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty_histo4_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "ctx_map4_ptr",
+					"direction": "",
+					"type": "uint8_t*"
+				},
+				{
+					"name": "histograms_clusd4_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histo_size_clusd4_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms_clusdin4_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				}
+				]
+			}
+		},
+		{
+			"api_name": "hls_lossy_enc_compute",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "hls_lossy_enc_compute",
+				"display_name": "hls_lossy_enc_compute",
+				"brief": "Level 2 : kernel implement for JXL lossy frame encode computing",
+				"target_domain": "PL",
+				"header_file_name": [
+					"hls_lossy_enc_compute.hpp"
+				],
+				"search_paths": [
+					"L2/demos/jxlEnc/acc_lossy_enc_compute/kernel"
+				],
+				"instance": "function",
+				"parameters": [],
+				"ports": [
+				{
+					"name": "config",
+					"direction": "",
+					"type": "int*"
+				},
+				{
+					"name": "config_fl",
+					"direction": "",
+					"type": "float*"
+				},
+				{
+					"name": "hls_opsin_1",
+					"direction": "",
+					"type": "float*"
+				},
+				{
+					"name": "hls_opsin_2",
+					"direction": "",
+					"type": "float*"
+				},
+				{
+					"name": "hls_opsin_3",
+					"direction": "",
+					"type": "float*"
+				},
+				{
+					"name": "quant_field_row",
+					"direction": "",
+					"type": "float*"
+				},
+				{
+					"name": "masking_field_row",
+					"direction": "",
+					"type": "float*"
+				},
+				{
+					"name": "aq_map_f",
+					"direction": "",
+					"type": "float*"
+				},
+				{
+					"name": "cmap_axi",
+					"direction": "",
+					"type": "int8_t*  "
+				},
+				{
+					"name": "ac_coef_axiout",
+					"direction": "",
+					"type": "int*"
+				},
+				{
+					"name": "strategy_all",
+					"direction": "",
+					"type": "uint8_t*"
+				},
+				{
+					"name": "raw_quant_field_i",
+					"direction": "",
+					"type": "int*"
+				},
+				{
+					"name": "hls_order",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "hls_dc8x8",
+					"direction": "",
+					"type": "float*"
+				},
+				{
+					"name": "hls_dc16x16",
+					"direction": "",
+					"type": "float*"
+				},
+				{
+					"name": "hls_dc32x32",
+					"direction": "",
+					"type": "float*"
+				}
+				]
+			}
+		},
+
+		{
+			"api_name": "hls_ANSinitHistogram",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "hls_ANSinitHistogram",
+				"display_name": "hls_ANSinitHistogram",
+				"brief": "",
+				"target_domain": "PL",
+				"header_file_name": [
+					"hls_init_histogram.hpp"
+				],
+				"search_paths": [
+					"L2/demos/jxlEnc/acc_tokInit_histogram/kernel"
+				],
+				"instance": "function",
+				"parameters": [],
+				"ports": [
+				{
+					"name": "config[32]",
+					"direction": "",
+					"type": "int"
+				},
+				{
+					"name": "ac_coeff_ordered_ddr[ALL_PIXEL]",
+					"direction": "",
+					"type": "int32_t"
+				},
+				{
+					"name": "strategy_ddr[MAX_NUM_BLK88]",
+					"direction": "",
+					"type": "int32_t"
+				},
+				{
+					"name": "qf_ddr[MAX_NUM_BLK88]",
+					"direction": "",
+					"type": "int32_t"
+				},
+				{
+					"name": "qdc_ddr[MAX_NUM_BLK88]",
+					"direction": "",
+					"type": "uint8_t"
+				},
+				{
+					"name": "ctx_map[MAX_QF_THRESH_SIZE]",
+					"direction": "",
+					"type": "uint8_t"
+				},
+				{
+					"name": "qf_thresholds[MAX_CTX_MAP_SIZE]",
+					"direction": "",
+					"type": "uint32_t"
+				},
+				{
+					"name": "ac_tokens_ddr[MAX_AC_TOKEN_SIZE]",
+					"direction": "",
+					"type": "uint64_t"
+				},
+				{
+					"name": "tokens0_ptr",
+					"direction": "",
+					"type": "ap_uint<64>*"
+				},
+				{
+					"name": "tokens1_ptr",
+					"direction": "",
+					"type": "ap_uint<64>*"
+				},
+				{
+					"name": "tokens2_ptr",
+					"direction": "",
+					"type": "ap_uint<64>*"
+				},
+				{
+					"name": "tokens3_ptr",
+					"direction": "",
+					"type": "ap_uint<64>*"
+				},
+				{
+					"name": "histograms0_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histograms_size0_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "total_count0_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty0_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms1_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histograms_size1_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "total_count1_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty1_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms2_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histograms_size2_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "total_count2_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty2_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms3_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histograms_size3_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "total_count3_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty3_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "histograms4_ptr",
+					"direction": "",
+					"type": "int32_t*"
+				},
+				{
+					"name": "histograms_size4_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "total_count4_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "nonempty4_ptr",
+					"direction": "",
+					"type": "uint32_t*"
+				}
+				]
+			}
+		},
+		{
+			"api_name": "xf::image::jpegDecLeptonEnc",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "xf::image::jpegDecLeptonEnc",
+				"display_name": "jpegDecLeptonEnc",
+				"brief": "",
+				"target_domain": "PL",
+				"header_file_name": [
+					"jpeg_dec_lepton_enc.hpp"
+				],
+				"instance": "function",
+				"search_paths": [
+					"L2/include/hw/leptonEnc/lepton"
+				],
+				"parameters": [],
+				"ports": [
+				{
+					"name": "datainDDR",
+					"direction": "",
+					"type": "ap_uint<AXI_WIDTH>*"
+				},
+				{
+					"name": "jpgSize",
+					"direction": "",
+					"type": "int"
+				},
+				{
+					"name": "arithInfo",
+					"direction": "",
+					"type": "int"
+				},
+				{
+					"name": "res",
+					"direction": "",
+					"type": "ap_uint<8>*"
+				}
+				]
+			}
+		},
+		{
+			"api_name": "kernel1Top",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "kernel1Top",
+				"display_name": "kernel1Top",
+				"brief": "",
+				"target_domain": "PL",
+				"header_file_name": [
+					"XAccPIKKernel1.hpp"
+				],
+				"search_paths": [
+					"L2/include/hw/pikEnc"
+				],
+				"instance": "function",
+				"parameters": [],
+				"ports": [
+				{
+					"name": "config[MAX_NUM_CONFIG],",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "rbuf[BUF_DEPTH / 2],",
+					"direction": "",
+					"type": "ap_uint<64>*"
+				},
+				{
+					"name": "axi_out[AXI_OUT],",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "axi_cmap[AXI_CMAP],",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "axi_qf[AXI_QF]",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				}
+				]
+			}
+		},
+		{
+			"api_name": "kernel2Top",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "kernel2Top",
+				"display_name": "kernel2Top",
+				"brief": "",
+				"target_domain": "PL",
+				"header_file_name": [
+					"XAccPIKKernel2.hpp"
+				],
+				"search_paths": [
+					"L2/include/hw/pikEnc"
+				],
+				"instance": "function",
+				"parameters": [],
+				"ports": [
+				{
+					"name": "config[MAX_NUM_CONFIG]",
+					"direction": "",
+					"type": "ap_uint<AXI_SZ>"
+				},
+				{
+					"name": "src[AXI_OUT / 2]",
+					"direction": "",
+					"type": "ap_uint<2 * AXI_SZ>"
+				},
+				{
+					"name": "quant_field_in[AXI_QF]",
+					"direction": "",
+					"type": "ap_uint<AXI_SZ>"
+				},
+				{
+					"name": "cmap[AXI_CMAP]",
+					"direction": "",
+					"type": "ap_uint<AXI_SZ>"
+				},
+				{
+					"name": "ac[MAX_NUM_AC]",
+					"direction": "",
+					"type": "ap_uint<AXI_SZ>"
+				},
+				{
+					"name": "dc[MAX_NUM_DC]",
+					"direction": "",
+					"type": "ap_uint<AXI_SZ>"
+				},
+				{
+					"name": "quant_field_out[AXI_QF]",
+					"direction": "",
+					"type": "ap_uint<AXI_SZ>"
+				},
+				{
+					"name": "ac_strategy[MAX_NUM_BLOCK88]",
+					"direction": "",
+					"type": "ap_uint<AXI_SZ>"
+				},
+				{
+					"name": "block[MAX_NUM_BLOCK88]",
+					"direction": "",
+					"type": "ap_uint<AXI_SZ>"
+				},
+				{
+					"name": "order[MAX_NUM_ORDER]",
+					"direction": "",
+					"type": "ap_uint<AXI_SZ>"
+				}
+				]
+			}
+		},
+		{
+			"api_name": "kernel3Top",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "kernel3Top",
+				"display_name": "kernel3Top",
+				"brief": "",
+				"target_domain": "PL",
+				"header_file_name": [
+					"XAccPIKKernel3.hpp"
+				],
+				"search_paths": [
+					"L2/include/hw/pikEnc"
+				],
+				"instance": "function",
+				"parameters": [],
+				"ports": [
+				{
+					"name": "config",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "ddr_ac",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "ddr_dc",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "ddr_quant_field",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "ddr_ac_strategy",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "ddr_block",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "hls_order",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "histo_cfg",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "dc_histo_code_out",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "dc_code_out",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "ac_histo_code_out",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				},
+				{
+					"name": "ac_code_ou",
+					"direction": "",
+					"type": "ap_uint<32>*"
+				}
+				]
+			}
+		},
+		{
+			"api_name": "kernel_resize",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "kernel_resize",
+				"display_name": "kernel_resize",
+				"brief": "",
+				"target_domain": "PL",
+				"header_file_name": [
+					"kernel_resize.hpp"
+				],
+				"search_paths": [
+					"L2/demos/resize/kernel"
+				],
+				"instance": "function",
+				"parameters": [],
+				"ports": [
+				{
+					"name": "configs",
+					"direction": "",
+					"type": "ap_uint<32>*   "
+				},
+				{
+					"name": "axi_src",
+					"direction": "",
+					"type": "ap_uint<64>*"
+				},
+				{
+					"name": "axi_dst",
+					"direction": "",
+					"type": "ap_uint<64>*"
+				}
+				]
+			}
+		},
+		{
+			"api_name": "kernel_IntraPredLoop2_NoOut_1",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "kernel_IntraPredLoop2_NoOut_1",
+				"display_name": "kernel_IntraPredLoop2_NoOut_1",
+				"brief": "",
+				"target_domain": "PL",
+				"header_file_name": [
+					"vp8_hls_syn.h"
+				],
+				"search_paths": [
+					"L2/include/hw/webpEnc"
+				],
+				"instance": "function",
+				"parameters": [],
+				"ports": [
+				{
+					"name": "p_info",
+					"direction": "",
+					"type": "int32_t* "
+				},
+				{
+					"name": "ysrc",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "usrc",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "vsrc",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "pout_level",
+					"direction": "",
+					"type": "int32_t* "
+				},
+				{
+					"name": "pout_prob",
+					"direction": "",
+					"type": "uint8_t* "
+				}
+				]
+			}
+		},
+		{
+			"api_name": "kernel_2_ArithmeticCoding_8",
+			"spec": {
+				"schema": "vitis_libraries_api_list_schema-1.0",
+				"api_name": "kernel_2_ArithmeticCoding_8",
+				"display_name": "kernel_2_ArithmeticCoding_8",
+				"brief": "",
+				"target_domain": "PL",
+				"header_file_name": [
+					"vp8_hls_syn.h"
+				],
+				"search_paths": [
+					"L2/include/hw/webpEnc"
+				],
+				"instance": "function",
+				"parameters": [],
+				"ports": [
+				{
+					"name": "pin_level",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "pin_prob",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "pout_bw",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "pout_ret",
+					"direction": "",
+					"type": "uint32_t*"
+				},
+				{
+					"name": "pout_pred",
+					"direction": "",
+					"type": "uint32_t*"
+				}
 				]
 			}
 		}
 	]
-}
\ No newline at end of file
+}
diff --git a/codec/README.md b/codec/README.md
index ff87b9b4f6..90c05116f5 100644
--- a/codec/README.md
+++ b/codec/README.md
@@ -1,12 +1,17 @@
 # Codec Library
 
-Codec Library is an open-sourced library written in C/C++ for accelerating image coding, decoding and related processing algorithms. Currently, 1 algorithm, JPEG decoding, is accelerated.
+Codec Library is an open-sourced library written in C/C++ for accelerating image coding, decoding and related processing algorithms. Currently, 6 kinds of algorithms are accelerated, including JPEG decoding, pik encoding, WebP encoding, lepton encoding JPEG-XL encoding and bicubic resizing.
 
 ## Overview
 
 The brief information about algorithms accelerated by Codec Library:
 
 1. JPEG decoding: one L2 API is provided for accelerating entire JPEG decoding process, which supports the ‘Sequential DCT-based mode’ of ISO/IEC 10918-1 standard. It can process 1 Huffman token and create up to 8 DCT coefficients within one cycle. It is also an easy-to-use decoder as it can directly parse the JPEG file header without help of software functions. In addition, L1 API is provided for Huffman decoding.
+2. Pik encoding: 3 L2 APIs are provided for accelerating about 90% workload of lossy compression in Google’s pik. The pikEnc used the ‘fast mode’ of pik encoder which can provide better encoding efficiency than most of other still image encoding methods.
+3. WebP encoding: 2 L2 APIs are provided for accelerating about 90% workload of lossy compression in WebP which is a popular image format developed by Google and supported in Chrome, Opera and Android, that is optimized to enable faster and smaller images on the Web.
+4. Lepton encoding: the API ‘jpegDecLeptonEnc’ can be used for accelerating the encoding process for a new image format 'Lepton' developed by Dropbox. The format can save about 22% size of JPEG images losslessly.
+5. JPEG-XL encoding: 3 L2 APIs are provided for accelerating the lossy encoding process of the JPEG XL Image Coding System (ISO/IEC 18181). Currently, not all computing intensive modules are offloaded, and more accelerating APIs will be available in feature.
+6. Bicubic resizing: the L2 APIs 'resizeTop' is based on bicubic algorithm, which can take 1 or 8 input samples per cycle. When taking 8 samples, it can process 80 8K images per second. Although resizing is not a coding or encoding algorithm, it is widely used with image codecs in image transcoding applications.
 
 ## Benchmark Result
 
diff --git a/codec/docs/Doxyfile_L2 b/codec/docs/Doxyfile_L2
index 00eaa6c7ec..e0c90cdf97 100644
--- a/codec/docs/Doxyfile_L2
+++ b/codec/docs/Doxyfile_L2
@@ -798,7 +798,7 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = ../L2/include/hw/pikEnc ../L2/include/hw/jpegDec
+INPUT                  = ../L2/include/hw/pikEnc ../L2/include/hw/jpegDec ../L2/include/hw/resize ../L2/include/hw/webpEnc ../L2/include/hw/leptonEnc/lepton ../L2/include/hw/jxlEnc
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
diff --git a/codec/docs/benchmark.rst b/codec/docs/benchmark.rst
index a4952a8e14..4f7196dfbe 100644
--- a/codec/docs/benchmark.rst
+++ b/codec/docs/benchmark.rst
@@ -33,8 +33,44 @@ Performance Summary for APIs
 +------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
 |  API                         | Type  | Input Description      | FPS   | MB/s   | MP/s  | Freq.  | LUT    | BRAM| URAM| DSP   |
 +==============================+=======+========================+=======+========+=======+========+========+=====+=====+=======+
+|  pikEncKernel1Top            | HW    | lena_c_512.jpg         |  62.5 |        |  16.4 | 200MHz |  97.4k |  25 |  93 |  568  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  pikEncKernel2Top            | HW    | lena_c_512.jpg         |  62.5 |        |  16.4 | 200MHz | 262.5k | 411 | 252 | 1614  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  pikEncKernel3Top            | HW    | lena_c_512.jpg         |  62.5 |        |  16.4 | 200MHz |  90.0k | 178 | 128 |  216  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  pikEncKernel1Top            | HW    | lena_c_2048.png        |   5.2 |        |    22 | 200MHz |  97.4k |  25 |  93 |  568  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  pikEncKernel2Top            | HW    | lena_c_2048.png        |   5.2 |        |    22 | 200MHz | 262.5k | 411 | 252 | 1614  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  pikEncKernel3Top            | HW    | lena_c_2048.png        |   5.2 |        |    22 | 200MHz |  90.0k | 178 | 128 |  216  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
 |  kernelJpegDecoderTop        | HW    | lena_c_512.jpg         |  1148 | 87.0   |       | 243MHz |  23.1k |  28 |   0 |   39  |
 +------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  resizeTop(NP=8)             | HW    | 7680*4320 to 512*512   |  79.7 | 2644.3 |       | 341MHz |  15.0k |  29 |   0 |  168  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  resizeTop(NP=8)             | HW    | 7680*4320 to 1920*1080 |  80.5 | 2670.8 |       | 341MHz |  15.0k |  29 |   0 |  168  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  webp_IntraPredLoop2_NoOut_1 | HW    | lena_c_512.png         |       | 127.17 |       | 250MHz |  52.9k |  72 |  10 |  410  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  webp_2_ArithmeticCoding_1   | HW    | lena_c_512.png         |       | 127.17 |       | 250MHz |  15.9k | 157 |   0 |    4  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  webp_IntraPredLoop2_NoOut_1 | HW    | 1920x1080.png          |       | 172.54 |       | 250MHz |  52.9k |  72 |  10 |  410  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  webp_2_ArithmeticCoding_1   | HW    | 1920x1080.png          |       | 172.54 |       | 250MHz |  15.9k | 157 |   0 |    4  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  JxlEnc_ans_clusterHistogram | HW    | lena_c_512.png         |       |        |  56.9 | 291MHz |  38.5K |  70 |  28 |   51  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  JxlEnc_lossy_enc_compute    | HW    | lena_c_512.png         |       |        |  72.2 | 260MHz | 121.7K | 364 |  53 |  498  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  JxlEnc_ans_initHistogram    | HW    | lena_c_512.png         |       |        |  43.2 | 289MHz |  39.3K |  50 |  41 |   95  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  JxlEnc_ans_clusterHistogram | HW    | hq_2Kx2K.png           |       |        | 101.9 | 291MHz |  38.5K |  70 |  28 |   51  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  JxlEnc_lossy_enc_compute    | HW    | hq_2Kx2K.png           |       |        |  83.3 | 260MHz | 121.7K | 364 |  53 |  498  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
+|  JxlEnc_ans_initHistogram    | HW    | hq_2Kx2K.png           |       |        |  52.9 | 289MHz |  39.3K |  50 |  41 |   95  |
++------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
 |  jpegHuffmanDecoder          | cosim | lena_c_512.jpg         |  2288 |    174 |       | 270MHz |   7.9K |   5 |   0 |    2  |
 +------------------------------+-------+------------------------+-------+--------+-------+--------+--------+-----+-----+-------+
 
@@ -46,7 +82,10 @@ These are details for benchmark result and usage steps.
 
    benchmark/jpegHuffmanDecoderIP.rst
    benchmark/jpegDecoder.rst
-   
+   benchmark/pikEnc.rst
+   benchmark/resize.rst
+   benchmark/webpEnc.rst
+   benchmark/jxlEnc.rst
 
 Test Overview
 --------------
diff --git a/codec/docs/benchmark/jxlEnc.rst b/codec/docs/benchmark/jxlEnc.rst
new file mode 100644
index 0000000000..0613722368
--- /dev/null
+++ b/codec/docs/benchmark/jxlEnc.rst
@@ -0,0 +1,127 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+.. _l2_manual_jxl_encoder:
+
+========
+JXL Encoder
+========
+
+JXL Encoder example resides in ``L2/demos/jxlEnc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Executable Usage
+===============
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in :ref:`l2_vitis_codec`. For getting the design,
+
+.. code-block:: bash
+
+    cd L2/demos/jxlEnc
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+.. code-block:: bash
+
+    make run TARGET=hw 
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+.. code-block:: bash
+
+    PATH_TO_BUILD/host.exe --xclbin PATH_TO_BUILD/jxlEnc.xclbin PNGFilePath JXLFilePath
+
+Note: "PATH_TO_BUILD" is decided by your chosen "DEVICE=" when running hw build, Default arguments are set in Makefile.   
+
+JXL Encoder Input Arguments:
+
+.. code-block:: bash
+
+   Usage: host.exe -[-xclbin]
+          --xclbin:     the kernel name
+          PNGFilePath:  the path to the input *.PNG
+          JXLFilePath:  the path to the output *.jxl
+
+Note: Default arguments are set in Makefile, you can use other :ref:`pictures` listed in the table.
+
+Profiling
+=========
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+
+.. table:: Table 1 IP resources for JXL encoder
+    :align: center
+
+    +---------------------------------+----------+----------+----------+----------+---------+
+    |               IP                |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   |
+    +---------------------------------+----------+----------+----------+----------+---------+
+    |    JxlEnc_lossy_enc_compute     |    364   |    53    |    498   |   145111 |  121741 |
+    +---------------------------------+----------+----------+----------+----------+---------+
+    |    JxlEnc_ans_clusterHistogram  |    70    |    28    |    51    |   60744  |  38507  |
+    +---------------------------------+----------+----------+----------+----------+---------+
+    |    JxlEnc_ans_initHistogram     |    150   |    41    |    95    |   64710  |  39289  |
+    +---------------------------------+----------+----------+----------+----------+---------+
+
+Result
+======
+
+.. table:: Table JxlEnc_lossy_enc_compute Encoder Performance
+    :align: center
+      
+    +-------------------+---------------+------------+--------------------+      
+    |       Image       |      Size     |  Time(ms)  |  Throughput(MP/s)  |   
+    +-------------------+---------------+------------+--------------------+   
+    |  lena_c_512.png   |    512x512    |    3.63    |        72.21       |       
+    +-------------------+---------------+------------+--------------------+   
+    |  hq_1024x1024.png |   1024x1024   |    13.06   |        80.29       |      
+    +-------------------+---------------+------------+--------------------+   
+    |  hq_2Kx2K.png     |   2048x2048   |    50.33   |        83.34       |     
+    +-------------------+---------------+------------+--------------------+      
+
+.. table:: Table JxlEnc_ans_clusterHistogram Encoder Performance
+    :align: center
+ 
+    +-------------------+---------------+------------+--------------------+
+    |       Image       |      Size     |  Time(ms)  |  Throughput(MP/s)  |
+    +-------------------+---------------+------------+--------------------+
+    |  lena_c_512.png   |    512x512    |    4.6     |        56.98       |     
+    +-------------------+---------------+------------+--------------------+
+    |  hq_1024x1024.png |   1024x1024   |    14.6    |        71.82       |    
+    +-------------------+---------------+------------+--------------------+
+    |  hq_2Kx2K.png     |   2048x2048   |    41.13   |        101.97      | 
+    +-------------------+---------------+------------+--------------------+
+
+.. table:: JxlEnc_ans_initHistogram Encoder Performance
+    :align: center
+
+    +-------------------+---------------+-------------+--------------------+
+    |       Image       |      Size     |   Time(ms)  |  Throughput(MP/s)  |
+    +-------------------+---------------+-------------+--------------------+
+    |  lena_c_512.png   |    512x512    |    6.07     |        43.19       |     
+    +-------------------+---------------+-------------+--------------------+
+    |  hq_1024x1024.png |   1024x1024   |    18.03    |        58.16       |    
+    +-------------------+---------------+-------------+--------------------+
+    |  hq_2Kx2K.png     |   2048x2048   |    79.30    |        52.89       |  
+    +-------------------+---------------+-------------+--------------------+
+
+.. toctree::
+   :maxdepth: 1
\ No newline at end of file
diff --git a/codec/docs/benchmark/pikEnc.rst b/codec/docs/benchmark/pikEnc.rst
new file mode 100644
index 0000000000..34cfef8378
--- /dev/null
+++ b/codec/docs/benchmark/pikEnc.rst
@@ -0,0 +1,120 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+.. _l2_manual_pik_encoder:
+
+========
+PIK Encoder
+========
+
+PIK Encoder example resides in ``L2/demos/pikEnc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Executable Usage
+===============
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in :ref:`l2_vitis_codec`. For getting the design,
+
+.. code-block:: bash
+
+   cd L2/demos/pikEnc
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+.. code-block:: bash
+
+   make run TARGET=hw DEVICE=xilinx_u200_xdma_201830_2
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+.. code-block:: bash
+
+   ./build_dir.hw.xilinx_u200_xdma_201830_2/host.exe --xclbin build_dir.hw.xilinx_u200_xdma_201830_2/lepEnc.xclbin PNGFilePath PIKFilePath --fast
+
+PIK Encoder Input Arguments:
+
+.. code-block:: bash
+
+   Usage: host.exe -[-xclbin]
+          --xclbin:         the kernel name
+          --fast:           the encoding mode
+          PNGFilePath:      the path to the input *.PNG
+          PIKFilePath:  the path to the output *.pik
+
+Note: Default arguments are set in Makefile, you can use other :ref:`pictures` listed in the table.
+
+* **Example output(Step 4)** 
+
+.. code-block:: bash
+
+   Found Platform
+   Platform Name: Xilinx
+   INFO: Found Device=xilinx_u200_xdma_201830_2
+   INFO: Importing build_dir.hw.xilinx_u200_xdma_201830_2/pikEnc.xclbin
+   Loading: 'build_dir.hw.xilinx_u200_xdma_201830_2/pikEnc.xclbin'
+   INFO: Kernel has been created
+   INFO: Finish kernel setup
+   ...
+
+   INFO: Finish kernel execution
+   INFO: Finish E2E execution
+   INFO: Data transfer from host to device: 100 us
+   INFO: Data transfer from device to host: 20 us
+   INFO: Average kernel execution per run: 600 ms
+
+Profiling
+=========
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+
+.. table:: Table 1 IP resources for PIK encoder 
+    :align: center
+
+    +---------------+----------+----------+----------+----------+---------+
+    |      IP       |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   |
+    +---------------+----------+----------+----------+----------+---------+
+    |    Kernel1    |    25    |    93    |    568   |   125920 |  97441  |
+    +---------------+----------+----------+----------+----------+---------+
+    |    Kernel2    |    411   |    252   |    1614  |   309222 |  262543 |
+    +---------------+----------+----------+----------+----------+---------+
+    |    Kernel3    |    178   |    128   |    216   |   114845 |  90011  |
+    +---------------+----------+----------+----------+----------+---------+
+
+Result
+======
+
+.. table:: Figure 1 PIK Encoder Performance
+    :align: center
+      
+    +------------------+-----------+-----------+-----------+
+    |   Size\Time(ms)  |  Kernel1  |  Kernel2  |  Kernel3  |
+    +------------------+-----------+-----------+-----------+
+    |     512x512      |    16     |    14     |     7     |
+    +------------------+-----------+-----------+-----------+
+    |    1024x1024     |    52     |    48     |    24     |
+    +------------------+-----------+-----------+-----------+
+    |    2048x2048     |    191    |    180    |    86     |
+    +------------------+-----------+-----------+-----------+
+
+.. toctree::
+   :maxdepth: 1
+
diff --git a/codec/docs/benchmark/resize.rst b/codec/docs/benchmark/resize.rst
new file mode 100644
index 0000000000..3397ed56ca
--- /dev/null
+++ b/codec/docs/benchmark/resize.rst
@@ -0,0 +1,128 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+=========
+Resize 
+=========
+
+Renumber example resides in ``L2/demos/reszie`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Executable Usage
+================
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in :ref:`l2_vitis_codec`. For getting the design,
+
+.. code-block:: bash
+
+   cd L2/demos/resize
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+.. code-block:: bash
+
+   make run TARGET=hw DEVICE=xilinx_u50_gen3x16_xdma_201920_3
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+.. code-block:: bash
+
+   ./build_dir.hw.xilinx_u50_gen3x16_xdma_201920_3/host.exe -xclbin build_dir.hw.xilinx_u50_gen3x16_xdma_201920_3/kernel_resize.xclbin -i images/t0.raw -srcw 512 -srch 512 -dstw 256 -dsth 256 
+
+Renumber Input Arguments:
+
+.. code-block:: bash
+
+   Usage: host.exe -[-xclbin -i -srcw -srch -dstw -dsth]
+         -xclbin:           the kernel name
+         -i:                the input bin file
+         -srcw:             the source image width
+         -srch:             the source image height
+         -dstw:             the destination width 
+         -dsth:             the destination height
+
+Note: Default arguments are set in Makefile, the data have only one column that the node's community id is divided by other clustering algorithm, for example louvain.
+
+* **Example output(Step 4)** 
+
+.. code-block:: bash
+
+    Read image successfully.
+    Found Platform
+    Platform Name: Xilinx
+    Info: Context created
+    Info: Command queue created
+    Found Device=xilinx_u50_gen3x16_xdma_201920_3
+    INFO: Importing build_dir.sw_emu.xilinx_u50_gen3x16_xdma_201920_3/kernel_resize.xclbin
+    Loading: 'build_dir.sw_emu.xilinx_u50_gen3x16_xdma_201920_3/kernel_resize.xclbin'
+    Info: Program created
+    Info: Kernel created
+    kernel has been created
+    INFO: kernel start------
+    INFO: kernel end------
+    INFO: Execution time 6334.86ms
+    Info: Time in host-to-device: 14.9204ms
+    Info: Time in kernel: 6319.06ms
+    Info: Time in device-to-host: 0.475648ms
+    The src image size is 512*512.
+    The dst image size is 89*27.
+    Image resized successfully.
+    PASS: no error found.
+    Info: Test passed
+    
+Profiling
+=========
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+.. table:: Table 1 : Hardware resources for Resize 
+    :align: center
+
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+    |    Kernel           |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+    |  kernel_resize(1x)  |    14    |    0     |    53    |   8635   |  6566   |      397.1      |
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+    |  kernel_resize(8x)  |    29    |    0     |    168   |   20824  |  15087  |      340.9      |
+    +---------------------+----------+----------+----------+----------+---------+-----------------+
+
+.. table:: Table 2 : Resize FPGA acceleration benchmark 
+    :align: center
+
+    +---------------+-----------+--------------------+-----------------+
+    |    Inputs     |   Size    |  FPGA 1x/8x (ms)   |   Fps 1x / 8x   |
+    +---------------+-----------+--------------------+-----------------+
+    |   7680*4320   |  512*512  |    84.30 / 12.55   |  11.86 / 79.67  |
+    +---------------+-----------+--------------------+-----------------+
+    |   7680*4320   | 1920*1080 |    84.35 / 12.43   |  11.86 / 80.46  | 
+    +---------------+-----------+--------------------+-----------------+
+    |   7680*4320   | 3840*2160 |    84.34 / 12.43   |  11.86 / 80.46  | 
+    +---------------+-----------+--------------------+-----------------+
+
+Note: This table is the result of each image resize down 8 times.
+
+.. Note::
+
+   1. Resize running on Intel(R) Xeon(R) Silver 4116 CPU @ 2.10GHz, cache(16896 KB), cores(12).
+   2. time unit: ms.
+
+.. toctree::
+    :maxdepth: 1
diff --git a/codec/docs/benchmark/webpEnc.rst b/codec/docs/benchmark/webpEnc.rst
new file mode 100644
index 0000000000..6e7cdffbca
--- /dev/null
+++ b/codec/docs/benchmark/webpEnc.rst
@@ -0,0 +1,145 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+.. _l2_webp:
+
+============
+Webp Encoder
+============
+
+Webp encoder demo resides in ``L2/demo/webpEnc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Executable Usage
+================
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in :ref:`l2_vitis_codec`. For getting the design,
+
+.. code-block:: bash
+
+   cd L2/demo/webpEnc
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+.. code-block:: bash
+
+   make run TARGET=hw DEVICE=xilinx_u200_xdma_201830_2
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+.. code-block:: bash
+
+   ./build_dir.hw.xilinx_u200_xdma_201830_2/cwebp list.rst -use_ocl -q 80 -o output
+
+Webp Input Arguments:
+
+.. code-block:: bash
+
+   Usage: cwebp -[-use_ocl -q -o]
+          list.rst:     the input list
+          -use_ocl:     should be kept
+          -q:           compression quality
+          -o:           output directory
+
+Note: Default arguments are set in Makefile, you can use other :ref:`pictures` listed in the table.
+
+* **Example output(Step 4)** 
+
+.. code-block:: bash
+
+   INFO: CreateKernel start.
+   INFO: Number of Platforms: 1
+   INFO: Selected Platform: Xilinx
+   INFO: Number of devices for platform 0: 1
+   INFO: target_device found:   xilinx_u200_xdma_201830_2
+   INFO: target_device chosen:  xilinx_u200_xdma_201830_2
+   NFO: OpenCL Version: 1.-48
+   INFO: Loading kernel.xclbin
+   INFO: Loading kernel.xclbin Finished
+
+   ...
+
+   *** Picture: 1 - 1,  Buffer: 0, Instance: 0, Event: 0 ***
+   INFO: Host2Device finished. Computation time is 0.480000 (ms)
+   INFO: PredKernel Finished. Computation time is 0.042000 (ms)
+   INFO: ACKernel Finished. Computation time is 0.012000 (ms)
+   INFO: Device2Host finished. Computation time is 0.005000 (ms)
+   INFO: Loop of Pictures Finished. Computation time is 16.500000 (ms)
+   INFO: VP8EncTokenLoopAsync Finished. Computation time is 22.676000 (ms)
+   INFO: WebPEncodeAsync Finished. Computation time is 47.519000 (ms)
+   INFO: Release Kernel.
+
+Profiling
+=========
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+
+.. table:: Table 1 Hardware resources for webp kernels
+    :align: center
+
+    +-----------+----------+----------+----------+----------+---------+-----------------+
+    |   Kernel  |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   | Frequency(MHz)  |
+    +-----------+----------+----------+----------+----------+---------+-----------------+
+    |  kernel1  |    72    |    10    |   410    |   56498  |  48301  |       250       |
+    +-----------+----------+----------+----------+----------+---------+-----------------+
+    |  kernel2  |    11    |    0     |    5     |   23073  |  16375  |       250       |
+    +-----------+----------+----------+----------+----------+---------+-----------------+
+
+
+* One instance achieves about 6~14 times acceleration. Here are some examples:
+
+
+.. table:: Table 2 Performance of Webp Encoder for FPGA 
+    :align: center
+
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+    |    Kernel    | Width (pix) | Height (pix) | -q |  latency (ms)  | Throughput FPGA B (Mb/s) | Throughput FPGA P (Mp/s) | FPs (fps)  |
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+    |    Kernel1   |    1920     |     1080     | 80 |     21.18      |          146.83          |          97.88           |   47.20    |
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+    |    Kernel2   |    1920     |     1080     | 80 |     14.57      |          213.54          |         142.36           |   68.65    |
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+    |    Kernel1   |    512      |     512      | 80 |     3.22       |          122.03          |          81.35           |   310.33   |
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+    |    Kernel2   |    512      |     512      | 80 |     2.92       |          134.65          |          89.77           |   342.43   |
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+    |    Kernel1   |    1920     |     1080     | 90 |     21.03      |          147.87          |          98.58           |   47.54    |
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+    |    Kernel2   |    1920     |     1080     | 90 |     15.92      |          195.43          |          130.29          |   62.83    |
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+    |    Kernel1   |    512      |     512      | 90 |     4.73       |          83.12           |          55.41           |   211.39   |
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+    |    Kernel2   |    512      |     512      | 90 |     4.93       |          79.73           |          53.16           |   202.78   |
+    +--------------+-------------+--------------+----+----------------+--------------------------+--------------------------+------------+
+
+
+* Platform: FPGA U200, CPU details are listd belowd (single thread)
+
+.. note::
+    | 1. Kernels running on platform with Intel(R) Xeon(R) CPU E5-2603 v3 @ 1.60GHz, 48 Threads.
+    | 2. time unit: ms.
+    | 3. "-" Indicates that the result could not be obtained due to insufficient memory.
+    | 4. FPGA time is the kernel runtime by adding data transfer and executed with webp encoder. 
+
+.. toctree::
+   :maxdepth: 1
+
diff --git a/codec/docs/guide_L2/internals.rst b/codec/docs/guide_L2/internals.rst
index 0a501f7825..fb9c73a49d 100644
--- a/codec/docs/guide_L2/internals.rst
+++ b/codec/docs/guide_L2/internals.rst
@@ -22,3 +22,8 @@ Design Internals
    :maxdepth: 1
 
    kernels/jpegDecoder.rst
+   kernels/pikEnc.rst
+   kernels/leptonEnc.rst
+   kernels/webpEnc.rst
+   kernels/resize.rst
+   kernels/jxlEnc.rst
diff --git a/codec/docs/guide_L2/kernels/jxlEnc.rst b/codec/docs/guide_L2/kernels/jxlEnc.rst
new file mode 100644
index 0000000000..6f88705bf3
--- /dev/null
+++ b/codec/docs/guide_L2/kernels/jxlEnc.rst
@@ -0,0 +1,147 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+.. _l2_kernel_jxl_encoder:
+
+===========
+JXL Encoder
+===========
+
+JXL Encoder example resides in ``L2/demos/jxlEnc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Overview
+========
+JXL Encoder demos contain 3 kernels which show acceleration cases on different parts of JPEG-XL encoder. JxlEnc_lossy_enc_compute kernel mainly responsible for accelerating AC and DC generation and processing. The input is XYB image data and other componets such as maskfield, raw quantfield and aq maps. The output is AC and DC coefficients, and block order and strategy and quantfield for next steps. JxlEnc_ans_initHistogram and JxlEnc_ans_clusterHistogram are two parts of ANS encoding, accelerating on these two kernels responsible for generation of ACtokens and histograms. The internel block design is show as below.
+
+The design of the JxlEnc_lossy_enc_compute kernel is as follows:
+
+.. image:: /images/JxlEnc_lossy_enc_compute_blockDesign.png
+   :alt: Block Design of JxlEnc_lossy_enc_compute
+   :align: center
+
+LoadData is responsible for load host data to internal stream and pass to next step. Then, a parallel computing of DCT8x8, DCT16x16 and DCT32x32 is processed by VarDCT and the result is sending to acs_heuritic which further compute ac strategy for each image block. CFL is responsible for color correlation of YtoX and YtoB and also pass quantfield and acs to next module. In Compute_CoeffAC, ac coefficients are generated and then ouput to AXI writeout. All order of image blocks are computed after dataflow processing of AC and DC coefficients, its' result are then send to AXI writeout.   
+
+The design of the JxlEnc_ans_initHistogram and JxlEnc_ans_clusterHistogram is as follows:
+
+.. image:: /images/JxlEnc_init_cluster_blockDesign.png
+   :alt: Block Design of JxlEnc_ans_initHistogram AND JxlEnc_ans_initHistogram
+   :align: center
+
+Kernel JxlEnc_ans_initHistogram and JxlEnc_ans_clusterHistogram are designed for accelerating ANS encoding. The JxlEnc_ans_initHistogram is processed within dataflow and parallely doing AC Tokenize and Histogram initiation. For JxlEnc_ans_clusterHistogram, it is processed in pipeline acceleration and generates all histograms for post-processing in JPEG-XL computing flow.
+
+Executable Usage
+===============
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in :ref:`l2_vitis_codec`. For getting the design,
+
+.. code-block:: bash
+
+    cd L2/demos/jxlEnc
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+.. code-block:: bash
+
+    make run TARGET=hw 
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+.. code-block:: bash
+
+    PATH_TO_BUILD/host.exe --xclbin PATH_TO_BUILD/jxlEnc.xclbin PNGFilePath JXLFilePath
+
+Note: "PATH_TO_BUILD" is decided by your chosen "DEVICE=" when running hw build, Default arguments are set in Makefile.   
+
+JXL Encoder Input Arguments:
+
+.. code-block:: bash
+
+   Usage: host.exe -[-xclbin]
+          --xclbin:     the kernel name
+          PNGFilePath:  the path to the input *.PNG
+          JXLFilePath:  the path to the output *.jxl
+
+Note: Default arguments are set in Makefile, you can use other :ref:`pictures` listed in the table.
+
+Profiling
+=========
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+
+.. table:: Table 1 IP resources for JXL encoder
+    :align: center
+
+    +---------------------------------+----------+----------+----------+----------+---------+
+    |               IP                |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   |
+    +---------------------------------+----------+----------+----------+----------+---------+
+    |    JxlEnc_lossy_enc_compute     |    364   |    53    |    498   |   145111 |  121741 |
+    +---------------------------------+----------+----------+----------+----------+---------+
+    |    JxlEnc_ans_clusterHistogram  |    70    |    28    |    51    |   60744  |  38507  |
+    +---------------------------------+----------+----------+----------+----------+---------+
+    |    JxlEnc_ans_initHistogram     |    150   |    41    |    95    |   64710  |  39289  |
+    +---------------------------------+----------+----------+----------+----------+---------+
+
+Result
+======
+
+.. table:: Table JxlEnc_lossy_enc_compute Encoder Performance
+    :align: center
+      
+    +-------------------+---------------+------------+--------------------+      
+    |       Image       |      Size     |  Time(ms)  |  Throughput(MP/s)  |   
+    +-------------------+---------------+------------+--------------------+   
+    |  lena_c_512.png   |    512x512    |    3.63    |        72.21       |       
+    +-------------------+---------------+------------+--------------------+   
+    |  hq_1024x1024.png |   1024x1024   |    13.06   |        80.29       |      
+    +-------------------+---------------+------------+--------------------+   
+    |  hq_2Kx2K.png     |   2048x2048   |    50.33   |        83.34       |     
+    +-------------------+---------------+------------+--------------------+      
+
+.. table:: Table JxlEnc_ans_clusterHistogram Encoder Performance
+    :align: center
+ 
+    +-------------------+---------------+------------+--------------------+
+    |       Image       |      Size     |  Time(ms)  |  Throughput(MP/s)  |
+    +-------------------+---------------+------------+--------------------+
+    |  lena_c_512.png   |    512x512    |    4.6     |        56.98       |     
+    +-------------------+---------------+------------+--------------------+
+    |  hq_1024x1024.png |   1024x1024   |    14.6    |        71.82       |    
+    +-------------------+---------------+------------+--------------------+
+    |  hq_2Kx2K.png     |   2048x2048   |    41.13   |        101.97      | 
+    +-------------------+---------------+------------+--------------------+
+
+.. table:: JxlEnc_ans_initHistogram Encoder Performance
+    :align: center
+
+    +-------------------+---------------+-------------+--------------------+
+    |       Image       |      Size     |   Time(ms)  |  Throughput(MP/s)  |
+    +-------------------+---------------+-------------+--------------------+
+    |  lena_c_512.png   |    512x512    |    6.07     |        43.19       |     
+    +-------------------+---------------+-------------+--------------------+
+    |  hq_1024x1024.png |   1024x1024   |    18.03    |        58.16       |    
+    +-------------------+---------------+-------------+--------------------+
+    |  hq_2Kx2K.png     |   2048x2048   |    79.30    |        52.89       |  
+    +-------------------+---------------+-------------+--------------------+
+
+.. toctree::
+   :maxdepth: 1
diff --git a/codec/docs/guide_L2/kernels/leptonEnc.rst b/codec/docs/guide_L2/kernels/leptonEnc.rst
new file mode 100644
index 0000000000..dc3894c1bb
--- /dev/null
+++ b/codec/docs/guide_L2/kernels/leptonEnc.rst
@@ -0,0 +1,152 @@
+.. 
+   Copyright 2021 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+.. _l2_kernel_lepton_encoder:
+
+===========
+Lepton Encoder
+===========
+
+Lepton Encoder example resides in ``L2/demos/leptonEnc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Internal Designs
+================
+
+* **Overall Designs**
+
+The design of the Lepton Encoder is as follows:
+
+.. image:: /images/leptonEncoder.png
+   :alt: Block Design of Lepton Encoder
+   :align: center
+
+The Lepton Encoder is composed of the following components:
+
+AXI-to-Stream is responsible to load jpeg image from external memory into the FPGA.
+
+Jpeg decode is responsible to decode the the jpeg data format into DCT coefficients.
+
+IDCT is responsible to convert the DCT coefficients into pixels.
+
+DC Predict is responsible to make prediction of DC coefficients. Pixels will be used in the dc prediction.
+
+Line buffer implement a line buffer for the prediction of AC coefficients.
+
+The Serialize and predict modules is responsible to do the prediction of AC coefficients.
+
+The AC and DC coefficients and their predictions will be collected and then used to build up the probability tables.
+
+Then Arithmetic Encode is used to generate the compressed bitstream.
+
+The bitstream will be written to the external memory and moved back to the host to generate lepton file.
+
+Software and system requirements
+================================
+
+The following packages are required to run this application:
+* Xilinx Vitis 2022.1
+* GCC 8.x
+* make
+* PLATFORM: xilinx_u200_gen3x16_xdma_2_202110_1
+
+Building the accelerated Lepton encoder
+=====================================
+
+* In a terminal window, execute the following commands to set-up the Vitis environment
+
+.. code-block:: bash
+
+    cd L2/demos/leptonEnc
+    source $XILINX_VITIS/settings64.sh 
+
+* Build the accelerated Lepton encoder and run software emulation with the following command:
+
+.. code-block:: bash
+
+    make run TARGET=sw_emu
+
+* Build the accelerated Lepton encoder and run hardware emulation with the following command:
+
+.. code-block:: bash
+
+    make run TARGET=hw_emu
+
+* Build the accelerated Lepton encoder for on board execution with the following command:
+
+.. code-block:: bash
+
+    make run TARGET=hw
+
+* lepEnc.xclbin and host.exe will generated in directory build_dir.hw.xilinx_u200_gen3x16_xdma_2_202110_1/
+
+
+Running the accelerated Lepton encoder
+===============
+
+To get the benchmark results, please run the following command.
+
+.. code-block:: bash
+   
+   source /opt/xilinx/xrt/setup.sh
+   ./build_dir.hw.xilinx_u200_gen3x16_xdma_2_202110_1/host.exe --xclbin build_dir.hw.xilinx_u200_gen3x16_xdma_2_202110_1/lepEnc.xclbin images
+
+Input Arguments:
+
+.. code-block:: bash
+
+   Usage: host.exe [--xclbin] [JPGDATAPATH]
+          --xclbin:         the kernel name
+          JPGDATAPATH:      a list of jpeg file to be encoded
+
+Note: Default arguments are set in Makefile.
+
+Performance
+=========
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+
+.. table:: Table 1 Acceleration performance on FPGA
+    :align: center
+
+    +-----------------+-------------+--------------+--------+----------+---------------+------------+-------------+-----------------------------+
+    |    Pictures     | Width (pix) | Height (pix) | Format | Size(MB) | Comprs. Ratio | Freq (MHz) | Latency(ms) | Throughput FPGA U200 (MB/s) |
+    +-----------------+-------------+--------------+--------+----------+---------------+------------+-------------+-----------------------------+
+    | android.jpg     |    960      |     1280     |  420   |   0.13   |      1.33     |    202     |     6.63    |            159.74           |
+    +-----------------+-------------+--------------+--------+----------+---------------+------------+-------------+-----------------------------+
+    | iphone.jpg      |    3264     |     2448     |  420   |   2.1    |      1.32     |    202     |    94.69    |            172.54           |
+    +-----------------+-------------+--------------+--------+----------+---------------+------------+-------------+-----------------------------+
+    | offset.jpg      |    5184     |     3456     |  422   |   7.4    |      1.30     |    202     |   332.35    |            156.45           |
+    +-----------------+-------------+--------------+--------+----------+---------------+------------+-------------+-----------------------------+
+    | hq.jpg          |    5760     |     3840     |  444   |  22.60   |      1.22     |    202     |  1056.62    |            127.17           |
+    +-----------------+-------------+--------------+--------+----------+---------------+------------+-------------+-----------------------------+
+
+.. table:: Table 2  Resource using for FPGA
+    :align: center
+    
+    +---------------+--------------+------------+
+    | Utilizations  |    Lepton    | Percentage |
+    +---------------+--------------+------------+
+    |     LUT       |     80699    |    8.11%   |
+    +---------------+--------------+------------+
+    |     FF        |     72706    |    3.46%   |
+    +---------------+--------------+------------+
+    |     DSP       |      64      |    0.94%   |
+    +---------------+--------------+------------+
+    |     BRAM      |      58      |    3.08%   |
+    +---------------+--------------+------------+
+    |     URAM      |      86      |   29.15%   |
+    +---------------+--------------+------------+
diff --git a/codec/docs/guide_L2/kernels/pikEnc.rst b/codec/docs/guide_L2/kernels/pikEnc.rst
new file mode 100644
index 0000000000..fa07a198ba
--- /dev/null
+++ b/codec/docs/guide_L2/kernels/pikEnc.rst
@@ -0,0 +1,144 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+.. _l2_kernel_pik_encoder:
+
+===========
+PIK Encoder
+===========
+
+PIK Encoder example resides in ``L2/demos/pikEnc`` directory. The tutorial provides a step-by-step guide that covers commands for building and running kernel.
+
+Internal Designs
+================
+
+* **Overall Designs**
+
+PIK Encoder is a multi-kernel design because PIK is a very complex algorithm compared with other image codec. So the algorithm is divided into three kernel and connected by on-board DDR. The overall design are shown in the picture below.
+
+.. image:: /images/PIKEncoder.png
+   :alt: Pik Encoder Structure
+   :scale: 60%
+   :align: center
+
+* **Kernel1**
+
+Kernel1 is responsable for transferring RGB data to XYB. Correlation Map and flooating-point Quant-field stream are also initialized in kernel1.
+
+* **Kernel2**
+
+Kernel2 firstly do DCT transformation using XYB data and then makes AC strategy decision. The AC stream is composed by several types of DCT streams (DCT4x4, DCT8x8, DCT16x61 and DCT32x32) by the information of AC strategy. The DC stream is generated from AC stream by partial selection. Quantizer takes float stream of AC and DC and quantize them into int16_t. Order data is generated by counting non-zero in quantized AC, then sorting the counting results. Quant-field stream is also quantized into int16_t in kernel2.
+
+* **Kernel3**
+
+Kernal3 is the encode part. It takes the data (quantized AC, quantized DC, order, AC strategy, block and quant-field) into the encoder and generates serilized token. The final ouput contains AC token, DC token and file headers in which are AC strategy, order,block and quant-field information. 
+
+Executable Usage
+===============
+
+* **Work Directory(Step 1)**
+
+The steps for library download and environment setup can be found in :ref:`l2_vitis_codec`. For getting the design,
+
+.. code-block:: bash
+
+   cd L2/demos/pikEnc
+
+* **Build kernel(Step 2)**
+
+Run the following make command to build your XCLBIN and host binary targeting a specific device. Please be noticed that this process will take a long time, maybe couple of hours.
+
+.. code-block:: bash
+
+   make run TARGET=hw DEVICE=xilinx_u200_xdma_201830_2
+
+* **Run kernel(Step 3)**
+
+To get the benchmark results, please run the following command.
+
+.. code-block:: bash
+
+   ./build_dir.hw.xilinx_u200_xdma_201830_2/host.exe --xclbin build_dir.hw.xilinx_u200_xdma_201830_2/pikEnc.xclbin PNGFilePath PIKFilePath --fast
+
+PIK Encoder Input Arguments:
+
+.. code-block:: bash
+
+   Usage: host.exe -[-xclbin]
+          --xclbin:         the kernel name
+          --fast:           the encoding mode
+          PNGFilePath:      the path to the input *.PNG
+          PIKFilePath:  the path to the output *.pik
+
+Note: Default arguments are set in Makefile, you can use other :ref:`pictures` listed in the table.
+
+* **Example output(Step 4)** 
+
+.. code-block:: bash
+
+   Found Platform
+   Platform Name: Xilinx
+   INFO: Found Device=xilinx_u200_xdma_201830_2
+   INFO: Importing build_dir.hw.xilinx_u200_xdma_201830_2/pikEnc.xclbin
+   Loading: 'build_dir.hw.xilinx_u200_xdma_201830_2/pikEnc.xclbin'
+   INFO: Kernel has been created
+   INFO: Finish kernel setup
+   ...
+
+   INFO: Finish kernel execution
+   INFO: Finish E2E execution
+   INFO: Data transfer from host to device: 100 us
+   INFO: Data transfer from device to host: 20 us
+   INFO: Average kernel execution per run: 600 ms
+
+Profiling
+=========
+
+The hardware resource utilizations are listed in the following table.
+Different tool versions may result slightly different resource.
+
+
+.. table:: Table 1 IP resources for PIK encoder 
+    :align: center
+
+    +---------------+----------+----------+----------+----------+---------+
+    |      IP       |   BRAM   |   URAM   |    DSP   |    FF    |   LUT   |
+    +---------------+----------+----------+----------+----------+---------+
+    |    Kernel1    |    25    |    93    |    568   |   125920 |  97441  |
+    +---------------+----------+----------+----------+----------+---------+
+    |    Kernel2    |    411   |    252   |    1614  |   309222 |  262543 |
+    +---------------+----------+----------+----------+----------+---------+
+    |    Kernel3    |    178   |    128   |    216   |   114845 |  90011  |
+    +---------------+----------+----------+----------+----------+---------+
+
+Result
+======
+
+.. table:: Figure 1 PIK Encoder Performance
+    :align: center
+      
+    +------------------+-----------+-----------+-----------+
+    |   Size\Time(ms)  |  Kernel1  |  Kernel2  |  Kernel3  |
+    +------------------+-----------+-----------+-----------+
+    |     512x512      |    16     |    14     |     7     |
+    +------------------+-----------+-----------+-----------+
+    |    1024x1024     |    52     |    48     |    24     |
+    +------------------+-----------+-----------+-----------+
+    |    2048x2048     |    191    |    180    |    86     |
+    +------------------+-----------+-----------+-----------+
+
+.. toctree::
+   :maxdepth: 1
+
diff --git a/codec/docs/guide_L2/kernels/resize.rst b/codec/docs/guide_L2/kernels/resize.rst
new file mode 100644
index 0000000000..919c5cab39
--- /dev/null
+++ b/codec/docs/guide_L2/kernels/resize.rst
@@ -0,0 +1,52 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+*************************************************
+Resize Down 
+*************************************************
+
+
+Overview
+========
+The resize refers to the resizing of a digital image, but resize kernel only can process one channel of XYB. And our resize based on bicubic interpolation what is an extension of cubic interpolation for interpolating data points on a two demensional regular grid. Image resampled with bicubic interpolation are smoother and have fewer interpolation artifacts. If you want to get more about bicubic interpolation, you can read more details from `here <https://en.wikipedia.org/wiki/Bicubic_interpolation>`_.
+
+Implementation
+==============
+According to characteristic of bicubic interpolation, we get derivative \int_{xy} by first calculating \int_{y} and then \int_{x} from those. And interpolator optimizated is a 8x acceleration compare with original algorithm using sliding window on FPGA. The implemention is shown in the figure below:
+
+** 
+
+.. image:: /images/resize_design.png
+   :alt: renumber design
+   :width: 60%
+   :align: center
+
+The kernel will do the following steps:
+
+1. Load data from HBM: Load the original image pixels to stream and eight-bits represents a pixel. If NPPC=1 refers one pixel processing for every clock and NPPC=8 refers eight pixels processing for every clock.
+
+2. Image resampling: In image processing, we apply cubic interpolation to a data set from sliding window which is our proposed a structure for image processing. Here we can process 8 pixels in a clock that we take full advantage of the features of URAM to support. We can get several(<=8) results using 8 interpolator with ervery eight pixels, and put these results into a stream. 
+
+3. Pick out pixel: We would pick out real and effective pixels from a 72-bits unit and making up these pixels a 64-bits unit.
+
+4. Load output to HBM: Scan stream to get data and write back to HBM.
+
+Interface
+=========
+Currently, the input should be a channel of image and the pixel width is eight. This kernel can process a 8K image to smaller image.
+
+The output is a resized down image which you want to be. The image surface is smoother than correponding surfaces obtained by bilinear interpolation or nearest-neighbor interpolation.
+
diff --git a/codec/docs/guide_L2/kernels/webpEnc.rst b/codec/docs/guide_L2/kernels/webpEnc.rst
new file mode 100644
index 0000000000..7f3b3fc2fd
--- /dev/null
+++ b/codec/docs/guide_L2/kernels/webpEnc.rst
@@ -0,0 +1,154 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+.. _l2_kernel_webp_encoder:
+
+=============
+ WebP Encoder
+=============
+
+WebP is a new image format developed by Google and supported in Chrome, Opera and Android that is optimized to enable faster and smaller images on the Web. WebP images are about 30% smaller in size compared to PNG and JPEG images at equivalent visual quality. In addition, the WebP image format has feature parity with other formats as well.
+
+Implementation
+==============
+
+This accelerated WebP encoder project is based on `libwebp` open source project. For one input picutre (.png), the output picutre (.webp) is achieved after following six steps:
+
+.. image:: /images/webp_steps.png
+   :alt: Webp Encoder Structure
+   :scale: 60%
+   :align: center
+
+Time-consuming functions are accelerated by 2 FPGA kernels including:
+
+.. code-block:: bash
+
+  Kernel-1: intra-prediction and probability counting
+  Kernel-2: arithmetic coding
+
+Performance
+===========
+
+* One instance achieves about 6~14 times acceleration. Here are some examples:
+  
+.. table:: Table 1  Acceleration process on CPU comparison with FPGA
+    :align: center
+
+    +-----------------+--------------------+-------------+--------------+----+---------------------+----------------------+------------+-----------------------------+-----------------------+----------+
+    |    Pictures     | Texture complexity | Width (pix) | Height (pix) | -q | kernel1 latency(ms) | kernel2 latency (ms) | Freq (MHz) | Throughput FPGA U200 (MB/s) | Throughput CPU (MB/s) | Speed up |
+    +-----------------+--------------------+-------------+--------------+----+---------------------+----------------------+------------+-----------------------------+-----------------------+----------+
+    | 3840-city.png   |       complex      |    3840     |     2160     | 80 |        95.30        |         87.93        |    250     |            129.82           |         18.46         |   7.03   |
+    +-----------------+--------------------+-------------+--------------+----+---------------------+----------------------+------------+-----------------------------+-----------------------+----------+
+    | 1920x1080x4.png |       simple       |    3840     |     2160     | 80 |        83.90        |         74.96        |    250     |            159.74           |         16.17         |   9.88   |
+    +-----------------+--------------------+-------------+--------------+----+---------------------+----------------------+------------+-----------------------------+-----------------------+----------+
+    | 1920x1080.png   |       simple       |    1920     |     1080     | 80 |        21.51        |         18.60        |    250     |            172.54           |         11.85         |   14.56  |
+    +-----------------+--------------------+-------------+--------------+----+---------------------+----------------------+------------+-----------------------------+-----------------------+----------+
+    | 853x640.png     |       simple       |    853      |     640      | 80 |         4.13        |         74.96        |    250     |            156.45           |         20.97         |   7.46   |
+    +-----------------+--------------------+-------------+--------------+----+---------------------+----------------------+------------+-----------------------------+-----------------------+----------+
+    | lena_c_512.png  |       middle       |    512      |     512      | 80 |         2.90        |         2.84         |    250     |            127.17           |         21.32         |   5.96   |
+    +-----------------+--------------------+-------------+--------------+----+---------------------+----------------------+------------+-----------------------------+-----------------------+----------+
+  
+Platform: CPU: Intel(R) Xeon(R) Gold 6142 CPU @ 2.60GHz (single thread)
+
+* One instance takes about 6% resource of U200 acceleraction card, following is the detail:
+
+.. table:: Table 2  Resource using for FPGA
+    :align: center
+    
+    +---------------+--------------+--------------+---------------------+
+    | Utilizations  |   Kernel-1   |   Kernel-2   | Kernel-1 + Kernel-2 |
+    +---------------+--------------+--------------+---------------------+
+    |     LUT       |    52889     |    15866     |        5.37%        |
+    +---------------+--------------+--------------+---------------------+
+    |     FF        |    68991     |    23039     |        3.30%        |
+    +---------------+--------------+--------------+---------------------+
+    |     DSP       |     410      |      4       |        6.00%        |
+    +---------------+--------------+--------------+---------------------+
+    |     BRAM      |      72      |     157      |        4.00%        |
+    +---------------+--------------+--------------+---------------------+
+    |     URAM      |      10      |      0       |        2.08%        |
+    +---------------+--------------+--------------+---------------------+
+
+* Multi-pictures process. Host code supports multi-pictures process with asynchronous behaviors, which allows to overlap host-device communiations, prediction kernel computation and arithmetic coding kernel computation. This is shown by following demonstration picture and profiling result.
+
+.. image:: /images/webp_overlap.png
+   :alt: Webp Encoder OverLap
+   :scale: 60%
+   :align: center
+ 
+.. image:: /images/webp_profiling.png
+   :alt: Webp Encoder Profiling
+   :scale: 60%
+   :align: center
+
+Software and system requirements
+================================
+
+The following packages are required to run this application:
+* Xilinx Vitis 2022.1
+* GCC 8.x
+* make
+* PLATFORM: xilinx_u200_gen3x16_xdma_2_202110_1
+
+
+Building the accelerated WebP encoder
+=====================================
+
+* In a terminal window, execute the following commands to set-up the Vitis environment
+
+.. code-block:: bash
+
+    cd L2/demos/webpEnc
+    source $XILINX_VITIS/settings64.sh 
+
+* Build the accelerated WebP encoder and run software emulation with the following command:
+
+.. code-block:: bash
+
+    make run TARGET=sw_emu
+
+* Build the accelerated WebP encoder and run hardware emulation with the following command:
+
+.. code-block:: bash
+
+    make run TARGET=hw_emu
+
+* Build the accelerated WebP encoder for on board execution with the following command:
+
+.. code-block:: bash
+
+    make run TARGET=hw
+
+* kernel.xclbin and cwebp will generated in directory build_dir.hw.xilinx_u200_gen3x16_xdma_2_202110_1/
+
+
+Running the accelerated WebP encoder
+====================================
+
+* The `cwebp` application takes the following arguments:
+
+.. code-block:: bash
+
+    list.rst is text file lists input pictures, should be equal to "NPicPool" defined in src_syn/vp8_AsyncConfig.h
+    -use_ocl: should be kept
+    -q: compression quality
+    -o: output directory
+
+* Run the accelerated WebP encoder with the following commands:
+
+.. code-block:: bash
+
+    source /opt/xilinx/xrt/setup.sh
+    ./cwebp -xclbin kernel.xclbin list.rst -use_ocl -q 80 -o ./images/
diff --git a/codec/docs/guide_L2/pik_api.rst b/codec/docs/guide_L2/pik_api.rst
new file mode 100644
index 0000000000..28c1a616a5
--- /dev/null
+++ b/codec/docs/guide_L2/pik_api.rst
@@ -0,0 +1,252 @@
+.. 
+   Copyright 2019 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+XAccPIKEncoderKernel1Top
+--------------------
+
+
+.. code-block:: cpp
+	
+	#include "pikEnc/XAccPIKEncoderKernel1.hpp"
+
+
+
+.. ref-code-block:: cpp
+	:class: title-code-block
+
+	void kernel1Top(ap_uint<32> config[MAX_NUM_CONFIG],
+                           ap_uint<AXI_WIDTH> rbuf[BUF_DEPTH / 2],
+                           ap_uint<32> axi_out[AXI_OUT],
+                           ap_uint<32> axi_cmap[AXI_CMAP],
+                           ap_uint<32> axi_qf[AXI_QF]);
+
+
+
+
+.. rubric:: Parameters:
+
+.. list-table::
+    :widths: 20 80
+
+    *
+        - config
+
+        - control signals, such as image size information, stream length and offsets of each colors.
+
+    *
+        - rbuf
+
+        - the input RGB data, alligned by different colors.
+
+    *
+        - axi_out
+
+        - the output of XYB data generated by color transformation of RGB.
+
+    *
+        - axi_cmap
+
+        - color correlation map, for adjust color in AC quantization.
+
+    *
+        - axi_qf
+
+        - information of the quant-field, for choosing correct quant-table in AC quantization.
+
+
+XAccPIKEncoderKernel2Top
+--------------------
+
+
+.. code-block:: cpp
+	
+	#include "pikEnc/XAccPIKEncoderKernel2.hpp"
+
+
+
+.. ref-code-block:: cpp
+	:class: title-code-block
+
+	void kernel2Top(ap_uint<AXI_SZ> config[MAX_NUM_CONFIG],
+
+                           ap_uint<2 * AXI_SZ> src[AXI_OUT / 2],
+                           ap_uint<AXI_SZ> quant_field_in[AXI_QF],
+                           ap_uint<AXI_SZ> cmap[AXI_CMAP],
+
+                           ap_uint<AXI_SZ> ac[MAX_NUM_AC],
+                           ap_uint<AXI_SZ> dc[MAX_NUM_DC],
+                           ap_uint<AXI_SZ> quant_field_out[AXI_QF],
+                           ap_uint<AXI_SZ> ac_strategy[MAX_NUM_BLOCK88],
+                           ap_uint<AXI_SZ> block[MAX_NUM_BLOCK88],
+                           ap_uint<AXI_SZ> order[MAX_NUM_ORDER]);
+
+
+
+
+
+.. rubric:: Parameters:
+
+.. list-table::
+    :widths: 20 80
+
+    *
+        - config
+
+        - control signals, such as image size information, stream length and offsets of each colors.
+
+    *
+        - src
+
+        - the input XYB data, alligned by different colors and stored in 32x32 of rectangular block order.
+
+    *
+        - quant_field_in
+
+        - information of the floating point quant-field, for choosing correct quant-table in AC quantization.
+
+    *
+        - cmap
+
+        - color correlation map, for adjust color in AC quantization.
+
+    *
+        - ac
+
+        - output quantized AC stream.
+
+    *
+        - dc
+
+        - output quantized DC stream.
+
+    *
+        - quant_field_out
+
+        - output information of quantized quant-field, it indicate the correct quant-table for decoder.
+
+    *
+        - ac_startegy
+
+        - output AC strategy is the information about DCT size of each 8x8 block in image.
+
+    *
+        - block
+
+        - output block show the beginning position of each DCT computation.
+
+    *
+        - order
+
+        - output information of order is used for zigzag encoding in kernel3.
+
+
+
+XAccPIKEncoderKernel3Top
+--------------------
+
+
+.. code-block:: cpp
+	
+	#include "pikEnc/XAccPIKEncoderKernel3.hpp"
+
+
+
+.. ref-code-block:: cpp
+	:class: title-code-block
+
+	void kernel3Top(ap_uint<32>* config,
+
+                           ap_uint<32>* ddr_ac,
+                           ap_uint<32>* ddr_dc,
+                           ap_uint<32>* ddr_quant_field,
+                           ap_uint<32>* ddr_ac_strategy,
+                           ap_uint<32>* ddr_block,
+                           ap_uint<32>* hls_order,
+
+                           ap_uint<32>* histo_cfg,
+                           ap_uint<32>* dc_histo_code_out,
+                           ap_uint<32>* dc_code_out,
+                           ap_uint<32>* ac_histo_code_out,
+                           ap_uint<32>* ac_code_out);
+
+
+
+
+
+
+.. rubric:: Parameters:
+
+.. list-table::
+    :widths: 20 80
+
+    *
+        - config
+
+        - control signals, such as image size information, stream length and offsets of each colors.
+
+    *
+        - ddr_ac
+
+        - the input quantized AC.
+
+    *
+        - ddr_dc
+
+        - the input quantized DC.
+
+    *
+        - ddr_quant_field
+
+        - the input of quant-table information for decoder
+
+    *
+        - ddr_ac_strategy
+
+        - the input information of DCT size of each small block8x8.
+
+    *
+        - ddr_block
+
+        - the input information of DCT starting position.
+
+    *
+        - hls_order
+
+        - the input information of encoding order.
+
+    *
+        - histo_cfg
+
+        - the output config for AC and DC histo lengths.
+
+    *
+        - dc_histo_code_out
+
+        - the encoding result of DC histo.
+
+    *
+        - dc_code_out
+
+        - the encoding result of DC token.
+
+    *
+        - ac_histo_code_out
+
+        - the encoding result of AC histo.
+
+    *
+        - ac_code_out
+
+        - the encoding result of AC token.
diff --git a/codec/docs/images/JxlEnc_init_cluster_blockDesign.png b/codec/docs/images/JxlEnc_init_cluster_blockDesign.png
new file mode 100644
index 0000000000..89d9a07d36
Binary files /dev/null and b/codec/docs/images/JxlEnc_init_cluster_blockDesign.png differ
diff --git a/codec/docs/images/JxlEnc_lossy_enc_compute_blockDesign.png b/codec/docs/images/JxlEnc_lossy_enc_compute_blockDesign.png
new file mode 100644
index 0000000000..54619d9185
Binary files /dev/null and b/codec/docs/images/JxlEnc_lossy_enc_compute_blockDesign.png differ
diff --git a/codec/docs/images/PIKEncoder.png b/codec/docs/images/PIKEncoder.png
new file mode 100644
index 0000000000..a1a1b55f47
Binary files /dev/null and b/codec/docs/images/PIKEncoder.png differ
diff --git a/codec/docs/images/leptonEncoder.png b/codec/docs/images/leptonEncoder.png
new file mode 100644
index 0000000000..0b89c61cbe
Binary files /dev/null and b/codec/docs/images/leptonEncoder.png differ
diff --git a/codec/docs/images/resize_design.png b/codec/docs/images/resize_design.png
new file mode 100644
index 0000000000..fa2f5b5774
Binary files /dev/null and b/codec/docs/images/resize_design.png differ
diff --git a/codec/docs/images/tutorial/L2jpegdec-6.PNG b/codec/docs/images/tutorial/L2jpegdec-6.PNG
new file mode 100644
index 0000000000..81c985087d
Binary files /dev/null and b/codec/docs/images/tutorial/L2jpegdec-6.PNG differ
diff --git a/codec/docs/images/tutorial/L2jpegdec-7.PNG b/codec/docs/images/tutorial/L2jpegdec-7.PNG
new file mode 100644
index 0000000000..b7680e8b7a
Binary files /dev/null and b/codec/docs/images/tutorial/L2jpegdec-7.PNG differ
diff --git a/codec/docs/images/tutorial/L2jpegdec-8.PNG b/codec/docs/images/tutorial/L2jpegdec-8.PNG
new file mode 100644
index 0000000000..08872c8a44
Binary files /dev/null and b/codec/docs/images/tutorial/L2jpegdec-8.PNG differ
diff --git a/codec/docs/images/tutorial/L3jpegdec-1.PNG b/codec/docs/images/tutorial/L3jpegdec-1.PNG
new file mode 100644
index 0000000000..1b06dceaa8
Binary files /dev/null and b/codec/docs/images/tutorial/L3jpegdec-1.PNG differ
diff --git a/codec/docs/images/tutorial/L3jpegdec-2.PNG b/codec/docs/images/tutorial/L3jpegdec-2.PNG
new file mode 100644
index 0000000000..108b40a411
Binary files /dev/null and b/codec/docs/images/tutorial/L3jpegdec-2.PNG differ
diff --git a/codec/docs/images/tutorial/L3jpegdec-3.PNG b/codec/docs/images/tutorial/L3jpegdec-3.PNG
new file mode 100644
index 0000000000..aad861cc7f
Binary files /dev/null and b/codec/docs/images/tutorial/L3jpegdec-3.PNG differ
diff --git a/codec/docs/images/tutorial/lab4-1.png b/codec/docs/images/tutorial/lab4-1.png
new file mode 100644
index 0000000000..a5863c1cd0
Binary files /dev/null and b/codec/docs/images/tutorial/lab4-1.png differ
diff --git a/codec/docs/images/tutorial/lab4-2.png b/codec/docs/images/tutorial/lab4-2.png
new file mode 100644
index 0000000000..89ce8815d7
Binary files /dev/null and b/codec/docs/images/tutorial/lab4-2.png differ
diff --git a/codec/docs/images/tutorial/lab4-3.png b/codec/docs/images/tutorial/lab4-3.png
new file mode 100644
index 0000000000..529ac176d6
Binary files /dev/null and b/codec/docs/images/tutorial/lab4-3.png differ
diff --git a/codec/docs/images/tutorial/lab4-4.png b/codec/docs/images/tutorial/lab4-4.png
new file mode 100644
index 0000000000..43a92eda63
Binary files /dev/null and b/codec/docs/images/tutorial/lab4-4.png differ
diff --git a/codec/docs/images/tutorial/lab4-5.png b/codec/docs/images/tutorial/lab4-5.png
new file mode 100644
index 0000000000..1d5d9f32d3
Binary files /dev/null and b/codec/docs/images/tutorial/lab4-5.png differ
diff --git a/codec/docs/images/tutorial/lab4-6.png b/codec/docs/images/tutorial/lab4-6.png
new file mode 100644
index 0000000000..c9f7226649
Binary files /dev/null and b/codec/docs/images/tutorial/lab4-6.png differ
diff --git a/codec/docs/images/webp_overlap.png b/codec/docs/images/webp_overlap.png
new file mode 100644
index 0000000000..653d7c628c
Binary files /dev/null and b/codec/docs/images/webp_overlap.png differ
diff --git a/codec/docs/images/webp_profiling.png b/codec/docs/images/webp_profiling.png
new file mode 100644
index 0000000000..66303bdb9b
Binary files /dev/null and b/codec/docs/images/webp_profiling.png differ
diff --git a/codec/docs/images/webp_steps.png b/codec/docs/images/webp_steps.png
new file mode 100644
index 0000000000..939cfdcb86
Binary files /dev/null and b/codec/docs/images/webp_steps.png differ
diff --git a/codec/docs/index.rst b/codec/docs/index.rst
index 96c5bf9e76..6a074c4b40 100644
--- a/codec/docs/index.rst
+++ b/codec/docs/index.rst
@@ -22,10 +22,15 @@
 Vitis Codec Library
 ==========================
 
-Codec Library is an open-sourced library written in C/C++ accelerating image coding, decoding and related processing. The algorithms of JPEG decoding is accelerated
+Codec Library is an open-sourced library written in C/C++ accelerating image coding, decoding and related processing. About 6 kinds of algorithms are accelerated, including JPEG decoding, pik encoding, WebP encoding, lepton encoding JPEG-XL encoding and bicubic resizing:
 
 
 - For JPEG decoding, one L2 API is provided for accelerating entire JPEG decoding process, which supports the ‘Sequential DCT-based mode’ of ISO/IEC 10918-1 standard. It can process 1 Huffman token and create up to 8 DCT coefficients within one cycle. It is also an easy-to-use decoder as it can directly parse the JPEG file header without help of software functions. In addition, L1 API is provided for Huffman decoding. 
+- For pik encoding, 3 L2 APIs are provided for accelerating about 90% workload of lossy compression in Google’s pik. The pikEnc used the ‘fast mode’ of pik encoder which can provide better encoding efficiency than most of other still image encoding methods.
+- For WebP encoding, 2 L2 APIs are provided for accelerating about 90% workload of lossy compression in WebP which is a popular image format developed by Google and supported in Chrome, Opera and Android, that is optimized to enable faster and smaller images on the Web.
+- For lepton encoding, the API ‘jpegDecLeptonEnc’ can be used for accelerating the encoding process for a new image format 'Lepton' developed by Dropbox. The format can save about 22% size of JPEG images losslessly.
+- For JPEG-XL encoding, 3 L2 APIs are provided for accelerating the lossy encoding process of the JPEG XL Image Coding System (ISO/IEC 18181). Currently, not all computing intensive modules are offloaded, and more accelerating APIs will be available in feature. 
+- For bicubic resizing, the L2 APIs 'resizeTop' is based on bicubic algorithm, which can take 1 or 8 input samples per cycle. When taking 8 samples, it can process 80 8K images per second. Although resizing is not a coding or encoding algorithm, it is widely used with image codecs in image transcoding applications.
 
 
 .. toctree::
@@ -34,6 +39,7 @@ Codec Library is an open-sourced library written in C/C++ accelerating image cod
 
    overview.rst
    release.rst
+   tutorial.rst
 
 .. toctree::
    :caption: L1 User Guide
diff --git a/codec/docs/release.rst b/codec/docs/release.rst
index 403034efe5..85c8087102 100644
--- a/codec/docs/release.rst
+++ b/codec/docs/release.rst
@@ -22,7 +22,7 @@ Release Note
    :hidden:
    :maxdepth: 1
 
-Codec Library is an open-sourced library written in C/C++ for accelerating image coding, decoding and related processing algorithms. It now covers a level of acceleration: the module level(L1) and the pre-defined kernel level(L2).
+Codec Library is an open-sourced library written in C/C++ for accelerating image coding, decoding and related processing algorithms. It now covers a level of acceleration: the module level(L1) and the pre-defined kernel level(L2). Currently 6 kinds of algorithms are accelerated. 
 
 2022.1
 ----
@@ -30,3 +30,8 @@ Codec Library is an open-sourced library written in C/C++ for accelerating image
 The 2022.1 release provides a range of algorithms, includes:
 
 1. JPEG decoding: one L2 API is provided for accelerating entire JPEG decoding process, which supports the ‘Sequential DCT-based mode’ of ISO/IEC 10918-1 standard. It can process 1 Huffman token and create up to 8 DCT coefficients within one cycle. It is also an easy-to-use decoder as it can directly parse the JPEG file header without help of software functions. In addition, L1 API is provided for Huffman decoding.
+2. Pik encoding: 3 L2 APIs are provided for accelerating about 90% workload of lossy compression in Google’s pik. The pikEnc used the ‘fast mode’ of pik encoder which can provide better encoding efficiency than most of other still image encoding methods.
+3. WebP encoding: 2 L2 APIs are provided for accelerating about 90% workload of lossy compression in WebP which is a popular image format developed by Google and supported in Chrome, Opera and Android, that is optimized to enable faster and smaller images on the Web.
+4. Lepton encoding: the API ‘jpegDecLeptonEnc’ can be used for accelerating the encoding process for a new image format 'Lepton' developed by Dropbox. The format can save about 22% size of JPEG images losslessly.
+5. JPEG-XL encoding: 3 L2 APIs are provided for accelerating the lossy encoding process of the JPEG XL Image Coding System (ISO/IEC 18181). Currently, not all computing intensive modules are offloaded, and more accelerating APIs will be available in feature.
+6. Bicubic resizing: the L2 APIs 'resizeTop' is based on bicubic algorithm, which can take 1 or 8 input samples per cycle. When taking 8 samples, it can process 80 8K images per second. Although resizing is not a coding or encoding algorithm, it is widely used with image codecs in image transcoding applications.
diff --git a/codec/docs/tutorial.rst b/codec/docs/tutorial.rst
new file mode 100644
index 0000000000..d26cae5589
--- /dev/null
+++ b/codec/docs/tutorial.rst
@@ -0,0 +1,898 @@
+.. 
+   Copyright 2019-2020 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+.. meta::
+   :keywords: Vitis, Database, Vitis Database Library, Alveo
+   :description: Vitis Database Library is an open-sourced Vitis library written in C++ for accelerating database applications in a variety of use cases.
+   :xlnxdocumentclass: Document
+   :xlnxdocumenttype: Tutorials
+
+.. _brief:
+
+================================
+Vitis Codec Library Tutorial
+================================
+
+Vitis Codec and Hardware Acceleration
+-----------------------------------------
+
+Image encoding and decoding (codec in short) are very common and important operations in Internet applications. The Vitis Codec library provides a set of acceleration APIs to accelerate image encoding, decoding and other related algorithms. This tutorial will be based on the popular JPEG decoding and WebP encoding to show how to accelerate image codec projects by using these APIs.
+
+
+The tutorial includes four labs:
+
+* Lab-1: How Vitis Codec Library Works
+
+* Lab-2: Using L1-level API to evaluate JPEG decoding acceleration
+
+* Lab-3: Using L2-level API to implement a single-kernel acceleration for JPEG decoding 
+
+* Lab-4: Using multi-kernel solution to accelerate WebP encoding based on open-source project
+
+
+Lab-1: How Vitis Codec Library Works
+-----------------------------------------
+
+Vitis Codec Library is an open-sourced library written in HLS C/C++ for the acceleration of image processing. It aims to provides reference designs for image codec algorithms that fit the `Xilinx Alveo Series <https://www.xilinx.com/products/boards-and-kits/alveo.html>`_
+acceleration cards. The APIs in Vitis Codec Library have been classified into two layers, namely L1/L2. Each targets to serve different audience.
+
+* L1 APIs locate at ``Vitis_Libraries/codec/L1``. They are basic components that will be used to compose compute-units. The L1 APIs are all well-optimized HLS designs and are able to fit into various resource constraints.
+
+* L2 APIs locate at ``Vitis_Libraries/codec/L2``. They are a number of compute-unit designs running on Alveo cards. It provides a set of compute-unit designs implemented in HLS codes. These L2 APIs needs to be compiled as OpenCL kernels and will be called by OpenCL APIs.
+
+Get the Vitis Codec Library
+----------------------------------
+
+Get the Dependencies
+^^^^^^^^^^^^^^^^^
+
+| `Vitis <https://www.xilinx.com/products/design-tools/vitis/vitis-platform.html>`_, Instructions to install Vitis can be found `here <https://docs.xilinx.com/r/en-US/ug1393-vitis-application-acceleration/Installation>`_.
+| `Alveo U50 packages <https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/alveo/u50.html>`_, Instructions to deploy Alveo U50 can be found `here <https://www.xilinx.com/support/documentation/boards_and_kits/accelerator-cards/1_8/ug1370-u50-installation.pdf>`_.
+| `Alveo U200 packages <https://www.xilinx.com/products/boards-and-kits/alveo/u200.html#gettingStarted>`_, Instructions to deploy Alveo U200 can be found `here <https://www.xilinx.com/cgi-bin/docs/pdfdoc?url=https://www.xilinx.com/support/documentation/boards_and_kits/accelerator-cards/2_0/ug1301-getting-started-guide-alveo-accelerator-cards.pdf>`_.
+
+Setup Environment
+^^^^^^^^^^^^^^^^^
+
+.. code-block:: shell
+
+   #!/bin/bash
+   source <Vitis_install_path>/Vitis/2022.1/settings64.sh
+   source <install_path_xrt>/xrt/setup.sh
+   export PLATFORM_REPO_PATHS=<install_path_platforms>
+   export DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1
+   export TARGET=sw_emu
+
+Note: The TARGET environment variable can be set as sw_emu, hw_emu and hw according to which emulation mode is expected to run.
+sw_emu is for C level emulations. hw_emu is for RTL level emulations. hw is for real on-board test. For more information about the Vitis Target please have a look at `here <https://docs.xilinx.com/r/en-US/ug1393-vitis-application-acceleration/Build-Targets?tocId=8ijg9En3MQ_7CJBZrUFENw>`_.
+
+Download the Vitis Graph Library
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: shell
+
+   #!/bin/bash
+   git clone https://github.com/Xilinx/Vitis_Libraries.git
+   cd Vitis_Libraries/codec
+
+Command to Run L1 cases
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: shell
+
+   #!/bin/bash
+   cd L1/tests/jpegdec                    # jpegdec is an example case. Please change directory to any other cases in L1/test if interested
+   make help                              # show available make command
+   make run CSIM=1                        # run C level simulation of the HLS code
+   make run CSYNTH=1 COSIM=1              # run RTL level simulation of the HLS code
+   make cleanall
+
+Test control variables are:
+
+* ``CSIM`` for C level simulation.
+* ``CSYNTH`` for high level synthesis to RTL.
+* ``COSIM`` for co-simulation between software test bench and generated RTL.
+* ``VIVADO_SYN`` for synthesis by Vivado.
+* ``VIVADO_IMPL`` for implementation by Vivado.
+
+For all these variables, setting to ``1`` indicates execution while ``0`` for skipping.
+The default value of all these control variables are ``0``, so they can be omitted from command line
+if the corresponding step is not wanted.
+
+For more information about L1 APIs please have :ref:`tutorial::lab2`.
+
+Command to Run L2 cases
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: shell
+
+   #!/bin/bash
+   cd L2/demos/jpegDec                    # jpegDec is an example case. Please change directory to any other cases in L2/demos if interested.
+   make help                              # show available make command
+   make host                              # build the binary running on host
+   make build                             # build the binary running on Alveo
+   make run                               # run the entire program
+   make cleanall
+
+Here, ``TARGET`` decides the FPGA binary type
+
+* ``sw_emu`` is for software emulation
+* ``hw_emu`` is for hardware emulation
+* ``hw`` is for deployment on physical card. (Compilation to hardware binary often takes hours.)
+
+Besides ``run``, the Vitis case makefile also allows ``host`` and ``xclbin`` as build target.
+
+For more information about L2 APIs please have a look at :ref:`tutorial::lab3`.
+
+
+.. _tutorial::lab2:
+
+Lab-2: Using L1-level API to evaluate JPEG decoding acceleration
+----------------------------------------------------------------------------------
+
+Lab purpose
+^^^^^^^^^^^^^^
+
+Before using Vitis flow to build a full-function kernel running on hardware, users may want to use a relative simple flow to estimate performance and resource consumption for some key modules of a complex algorithm. In this lab, users will estimate a key module called 'kernel_parser_decoder ' which involves JPEG parsing and Huffman decoding. Users will get an exported IP of the key module in the end of this lab, but this is just the first step to achieve a successful design.
+
+Operation steps
+^^^^^^^^^^^^^^^^^^^^
+
+(1) Learn about ``run_hls.tcl`` file
+"""""""""""""""""""""""""""""""""""""""
+
+In Vitis libraries, all L1 flows are controlled by a tcl file named ``run_hls.tcl``. The file for this lab can be found at ``L1/tests/jpegDec/run_ hls.tcl``. Compared to L2 flow which is based on Opencl kernels, L1 flow allows users to quickly set the top-level functions so that they can focus more on a few functions of interests, analyze the performance bottlenecks of these functions, or run rapid synthesis and simulation without any source code modification.
+
+(2) CSIM:
+"""""""""""""""""""""""""""""""""""""""
+
+1. Build and run one of the following using U200 platform
+
+.. code-block:: shell
+
+   cd L1/tests/jpegdec
+
+   make run DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1.xpfm CSIM=1
+
+   # DEVICE is case-insensitive and support awk regex.  
+
+   # Alternatively, the FPGA part can be speficied via XPART. When XPART is set, DEVICE will be ignored.
+
+   make run XPART=xcu200-fsgd2104-2-e CSIM=1
+
+   # delete generated files
+   make clean
+
+2. Change input jpeg file for test
+
+.. code-block:: shell
+
+   vi run_hls.tcl
+
+   # update the *.jpg path after the "-JPEGFile"
+   -JPEGFile *.jpg
+
+
+Example csim output:
+
+.. code-block:: shell
+
+   ------------ Test for decode image.jpg  -------------
+   WARNING: Vitis_Libraries/codec/L1/images/t0.jpg will be opened for binary read.
+   51193 entries read from Vitis_Libraries/codec/L1/images/t0.jpg
+   hls_mcuv=33, hls_mcuh=39, hls_mcuc=1287,
+   huffman 1 bits codes is :0b0000000000000000
+   huffman 2 bits codes is :0b0000000000000000
+   huffman 3 bits codes is :0b0000000000000010
+   huffman 4 bits codes is :0b0000000000001110
+   huffman 5 bits codes is :0b0000000000011110
+   huffman 6 bits codes is :0b0000000000111110
+   huffman 7 bits codes is :0b0000000001111110
+   huffman 8 bits codes is :0b0000000011111110
+   huffman 9 bits codes is :0b0000000111111110
+   huffman 10 bits codes is :0b0000001111111110
+   huffman 11 bits codes is :0b0000011111111100
+   huffman 12 bits codes is :0b0000111111111000
+   huffman 13 bits codes is :0b0001111111110000
+   huffman 14 bits codes is :0b0011111111100000
+   huffman 15 bits codes is :0b0111111111000000
+   huffman 16 bits codes is :0b1111111110000000
+   ...
+
+   the end 3 blocks before zigzag are :
+   ffffffb6,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   ffffffe6,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0015,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   Ready for next image!
+   INFO: [SIM 211-1] CSim done with 0 errors.
+
+In order to facilitate user observation, the key module prints out the last three 8x8 DCT coefficients of the last MCU, including a Y, U and V.
+
+(3) Synthesis:
+"""""""""""""""""""""""""""""""""""""""
+
+1. Build and run one of the following using U200 platform
+
+.. code-block:: shell
+
+   make run DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1.xpfm CSYNTH=1
+
+   # DEVICE is case-insensitive and support awk regex.  
+
+   # Alternatively, the FPGA part can be speficied via XPART. When XPART is set, DEVICE will be ignored.
+
+   make run XPART=xcu200-fsgd2104-2-e CSYNTH=1
+
+2. Quick reset the top-level functions so that they can focus more on a few functions of interest
+
+.. code-block:: shell
+
+   vi run_hls.tcl
+
+   # update the "set_top kernel_parser_decoder", for example "set_top Huffman_decoder", the name of top is the function name in the design codes.
+   set_top kernel_parser_decoder --> set_top Huffman_decoder
+
+Then rerun the command of CSYNTH, will allow user to analyze the performance bottlenecks of "Huffman_decoder" function, or run rapid synthesis and simulation without any source code modification.
+
+
+Example Synthesis output:
+
+.. code-block:: shell
+
+   Vitis HLS - High-Level Synthesis from C, C++ and OpenCL v2022.1 (64-bit)
+   ...
+
+   INFO: [HLS 200-1510] Running: set_top kernel_parser_decoder
+   INFO: [HLS 200-1510] Running: open_solution -reset solution1
+   ...
+   
+   INFO: [VHDL 208-304] Generating VHDL RTL for kernel_parser_decoder.
+   INFO: [VLOG 209-307] Generating Verilog RTL for kernel_parser_decoder.
+   INFO: [HLS 200-790] **** Loop Constraint Status: All loop constraints were NOT satisfied.
+   INFO: [HLS 200-789] **** Estimated Fmax: 271.96 MHz
+   INFO: [HLS 200-111] Finished Command csynth_design CPU user time: 65.56 seconds. CPU system time: 4.61 seconds. Elapsed time: 73.87 seconds; current allocated memory: 448.0
+   00 MB.
+   INFO: [HLS 200-112] Total CPU user time: 71.64 seconds. Total CPU system time: 6.21 seconds. Total elapsed time: 80.36 seconds; peak allocated memory: 1.195 GB.
+
+Loop constraints may not be satisfied, as the goal of loop is set to 300MHz in the run_hls.tcl, and different hls tool version may result in different "Estimated Fmax".
+
+3. Check the unsatisfied path
+
+Read the report of  CSYNTH, grep "critical path" like below: 
+
+.. code-block:: shell
+
+   INFO: [HLS 200-10] ----------------------------------------------------------------
+   INFO: [HLS 200-42] -- Implementing module 'Huffman_decoder_Pipeline_DECODE_LOOP'
+   INFO: [HLS 200-10] ----------------------------------------------------------------
+   INFO: [SCHED 204-11] Starting scheduling ...
+   INFO: [SCHED 204-61] Pipelining loop 'DECODE_LOOP'.
+   INFO: [HLS 200-1470] Pipelining result : Target II = 1, Final II = 1, Depth = 4, loop 'DECODE_LOOP'
+   WARNING: [HLS 200-1016] The critical path in module 'Huffman_decoder_Pipeline_DECODE_LOOP' consists of the following:   'add' operation 
+   ('add_ln503', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:503) [582]  (0.705 ns)
+      'shl' operation ('shl_ln503', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:503) [584]  (0 ns)
+      'icmp' operation ('icmp_ln503', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:503) [585]  (0.859 ns)
+      'and' operation ('and_ln503', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:503) [591]  (0 ns)
+      'select' operation ('select_ln503', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:503) [592]  (0 ns)
+      'select' operation ('block_tmp', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:498) [593]  (0.243 ns)
+      'add' operation ('block', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) [599]  (0.785 ns)
+      multiplexor before 'phi' operation ('block') with incoming values : ('lastDC_load', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) ('block', 
+      Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) [628]  (0.387 ns)
+      'phi' operation ('block') with incoming values : ('lastDC_load', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) ('block', 
+      Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) [628]  (0 ns)
+      multiplexor before 'phi' operation ('empty_304', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) with incoming values : ('lastDC_load',
+      Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) ('block', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) ('lastDC_load_1') [632]   
+      (0.387 ns)
+      'phi' operation ('empty_304', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) with incoming values : ('lastDC_load', Vitis_Libraries/codec/
+      L1/src/XAcc_jpegdecoder.cpp:516) ('block', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:516) ('lastDC_load_1') [632]  (0 ns)
+      'select' operation ('select_ln549_2', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:549) [641]  (0.243 ns)
+      'store' operation ('lastDC_write_ln592', Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:592) of variable 'select_ln549_2', 
+      Vitis_Libraries/codec/L1/src/XAcc_jpegdecoder.cpp:549 on local variable 'op' [651]  (0.453 ns)
+   ...
+
+Then check the report for this loop: use command "vi test.prj/solution1/syn/report/Huffman_decoder_Pipeline_DECODE_LOOP_csynth.rpt " in the meanwhile open the GUI.
+
+In the Schedule Viewer in GUI, users could check the details of the circuit: 
+
+.. _tutorial-figure-lab2-6:
+.. figure:: /images/tutorial/L2jpegdec-6.PNG
+      :width: 100%
+      :align: center
+
+Comparing the two above, it can be seen that the timing is not satisfied because the number of bits of the shift register and comparator is large. There is no better optimization method for this situation. Users can reduce the bit width of this circuit according to their needs to improve the timing. Of course, this change may also lead to a reduction in bandwidth, so there needs a trade-off between the width and frequency to achieve the best performance.
+
+(4) COSIM:
+"""""""""""""
+
+1. Build and run one of the following with U200 platform
+
+.. code-block:: shell
+
+    make run DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1.xpfm COSIM=1
+
+    # DEVICE is case-insensitive and support awk regex.  
+
+    # Alternatively, the FPGA part can be speficied via XPART. When XPART is set, DEVICE will be ignored.
+
+    make run XPART=xcu200-fsgd2104-2-e COSIM=1
+
+Example output:
+
+.. code-block:: shell
+
+   ...
+
+   # xsim {kernel_parser_decoder} -autoloadwcfg -tclbatch {kernel_parser_decoder.tcl}
+   Time resolution is 1 ps
+   source kernel_parser_decoder.tcl
+   ## run all
+   ////////////////////////////////////////////////////////////////////////////////////
+   // Inter-Transaction Progress: Completed Transaction / Total Transaction
+   // Intra-Transaction Progress: Measured Latency / Latency Estimation * 100%
+   //
+   // RTL Simulation : "Inter-Transaction Progress" ["Intra-Transaction Progress"] @ "Simulation Time"
+   ////////////////////////////////////////////////////////////////////////////////////
+   // RTL Simulation : 0 / 1 [n/a] @ "109000"
+   // RTL Simulation : 1 / 1 [n/a] @ "543586000"
+   ////////////////////////////////////////////////////////////////////////////////////
+   $finish called at time : 543586000 ps : File "Vitis_Libraries/codec/L1/tests/jpegdec/test.prj/solution1/sim/verilog/kernel_parser_decoder.autotb.v" Line
+   1564
+   run: Time (s): cpu = 00:00:02 ; elapsed = 00:01:18 . Memory (MB): peak = 2840.148 ; gain = 0.000 ; free physical = 28775 ; free virtual = 213419
+   ## quit
+   INFO: xsimkernel Simulation Memory Usage: 307116 KB (Peak: 371652 KB), Simulation CPU Usage: 77750 ms
+   INFO: [Common 17-206] Exiting xsim at Sun Apr 17 20:36:36 2022...
+   INFO: [COSIM 212-316] Starting C post checking ...
+
+   ------------ Test for decode image.jpg  -------------
+   WARNING: Vitis_Libraries/codec/L1/images/t0.jpg will be opened for binary read.
+   51193 entries read from Vitis_Libraries/codec/L1/images/t0.jpg
+   ****the end 3 blocks before zigzag are :
+   ffffffb6,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   ffffffe6,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0015,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,  0000,
+   Ready for next image!
+   INFO: [COSIM 212-1000] *** C/RTL co-simulation finished: PASS ***
+   ...
+
+
+(5) Design with export
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+In this step, the HLS tool will run CSYNTH, VIVADO_SYN and VIVADO_IMPL flow to generate the IP file.
+
+1. Build and run one of the following using U200 platform
+
+.. code-block:: shell
+
+   make run DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1.xpfm VIVADO_IMPL=1
+
+   # DEVICE is case-insensitive and support awk regex.  
+
+   # Alternatively, the FPGA part can be speficied via XPART. When XPART is set, DEVICE will be ignored.
+
+   make run XPART=xcu200-fsgd2104-2-e VIVADO_IMPL=1
+
+Example output:
+
+.. code-block:: shell
+
+   Implementation tool: Xilinx Vivado v.2022.1
+
+   ...
+
+   #=== Post-Implementation Resource usage ===
+   SLICE:            0
+   LUT:           7945
+   FF:            8073
+   DSP:             12
+   BRAM:             5
+   URAM:             0
+   LATCH:            0
+   SRL:            678
+   CLB:           1746
+
+   #=== Final timing ===
+   CP required:                     3.330
+   CP achieved post-synthesis:      3.605
+   CP achieved post-implementation: 3.347
+   Timing not met
+
+
+The report shows 'timing not met', that means the Vivado implementation process cannot achieve the targeted frequency (300MHz set in the run_hls.tcl). As this module always plays a role of bottleneck in entire JPGE decoding architecture, the final JPEG decoder should be likely to work at 270 to 280 MHz. That is a common situation for complex HLS designs. This tutorial will not discuss solutions for timing problem but for most of cases we still have a chance to improve the frequency.
+
+Based on the above results, we can make some estimates about the throughputs, including:
+
+* The design can process a Huffman symbol up to 270 million per second
+* Assuming that if the compression ratio is 4 ~ 8 for a JPEG image, the final output speed will be up to 1 ~ 2GB of YUV data per second
+* If the inverse quantization and inverse DCT transform modules need matching throughput of Huffman, it is best to recovery 4 ~ 8 pixels in a cycle
+
+Compared with synthesis, using ``Export`` can obtain more accurate performance and resource consumption. Users usually needn't to do ``Export`` for each design iteration, but it is recommended to periodically perform ``Export`` to confirm whether the performance and area of the design can meet the requirement.
+
+Lab summary
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* L1 is based on HLS flow. The main steps include CSIM, synthesis, COSIM and export which are controlled by a ``run_hls.tcl`` file
+* L1 flow is helpful to estimate resources and performance
+* L1 flow makes it easier to change the top-level function
+
+
+.. _tutorial::lab3:
+
+Lab-3: Using L2-level API to implement a single-kernel acceleration for JPEG decoding
+----------------------------------------------------------------------------------
+
+Lab purpose
+^^^^^^^^^^^^^
+
+To learn:
+
+* Basic process of L2 operation
+
+* Implement complete accelerated application
+
+Operation steps
+^^^^^^^^^^^^^^^^^^
+
+(1) Understand the Work Directory
+"""""""""""""""""""""""""""""""""""""""
+
+* ``Makefile``: L2 flow control file
+
+* ``conn_u200.cfg``: to specify the external memory ports map. Some constraints of Vivado can also be added here
+
+* ``description.json``: The description of the L2 API used for creating the Makefile automatically 
+
+* ``utils.mk``: included by the Makefile
+
+Setup environment
+
+.. code-block:: shell
+
+   source <intstall_path_vitis>/installs/lin64/Vitis/2022.1/settings64.sh
+   source <intstall_path_xrt>/xrt/setup.sh
+   export PLATFORM_REPO_PATHS=<intstall_path_platform>/platforms
+
+
+(2) Build kernel for different modes
+"""""""""""""""""""""""""""""""""""""""
+
+.. code-block:: shell
+
+   cd L2/demos/jpegDec
+
+   # build and run one of the following using U200 platform
+   make run TARGET=sw_emu DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1.xpfm
+
+   # delete generated files
+   make cleanall
+
+Here, ``TARGET`` decides the FPGA binary type
+
+* ``sw_emu`` is for software emulation
+* ``hw_emu`` is for hardware emulation
+* ``hw`` is for deployment on physical card. (Compilation to hardware binary often takes hours.)
+
+Besides ``run``, the Vitis case makefile also allows ``host`` and ``xclbin`` as build target.
+
+(3) Run kernel in Software-Emulation mode
+""""""""""""""""""""""""""""""""""""""""""""
+
+.. code-block:: shell
+
+   # build and run JPEG Decoder using U200 platform
+   make run TARGET=sw_emu DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1.xpfm
+
+Example output:
+
+.. code-block:: shell
+
+   ...
+
+   Info: Test passed
+   INFO: writing the YUV file!
+   WARNING: t0.raw will be opened for binary write.
+   WARNING: t0.yuv will be opened for binary write.
+   INFO: fmt 1, bas_info->mcu_cmp = 6
+   INFO: bas_info->hls_mbs[cmp] 4, 1, 1
+   3F, 3F, 3F, 3F, 3F, 3F, 3F, 3F,
+   3F, 3F, 3F, 3F, 3F, 3F, 3F, 3F,
+   3F, 3F, 3F, 3F, 3F, 3F, 3F, 3F,
+   3F, 3F, 3E, 3E, 3E, 3E, 3E, 3E,
+   3D, 3E, 3E, 3E, 3F, 3F, 3F, 3F,
+   3F, 3F, 3F, 3F, 40, 40, 40, 40,
+   40, 40, 40, 40, 40, 40, 40, 40,
+   3F, 3F, 3F, 3F, 3F, 3F, 3F, 3F,
+   3E, 3E, 3E, 3E, 3E, 3E, 3E, 3E,
+   40, 40, 40, 40, 40, 40, 40, 40,
+   3F, 40, 40, 40, 40, 40, 40, 40,
+   40, 40, 40, 40, 40, 3F, 3F, 3F,
+   41, 41, 40, 40, 3F, 40, 40, 40,
+   40, 40, 40, 41, 41, 41, 41, 41,
+   41, 41, 41, 41, 41, 41, 41, 41,
+   40, 40, 40, 41, 41, 41, 41, 41,
+   63, 63, 63, 63, 63, 63, 63, 63,
+   63, 63, 63, 63, 63, 63, 63, 63,
+   63, 63, 63, 63, 63, 63, 63, 63,
+   63, 63, 62, 62, 62, 62, 62, 62,
+   61, 62, 62, 62, 63, 63, 63, 63,
+   63, 63, 63, 63, 64, 64, 64, 64,
+   64, 64, 64, 64, 64, 64, 64, 64,
+   63, 63, 63, 63, 63, 63, 63, 63,
+   62, 62, 62, 62, 62, 62, 62, 62,
+   64, 64, 64, 64, 64, 64, 64, 64,
+   63, 64, 64, 64, 64, 64, 64, 64,
+   64, 64, 64, 64, 64, 63, 63, 63,
+   65, 65, 64, 64, 63, 64, 64, 64,
+   64, 64, 64, 65, 65, 65, 65, 65,
+   65, 65, 65, 65, 65, 65, 65, 65,
+   64, 64, 64, 65, 65, 65, 65, 65,
+   Please open the YUV file with fmt 1 and (width, height) = (624, 528)
+
+   ...
+
+(4) Run kernel in Hardware-Emulation mode
+""""""""""""""""""""""""""""""""""""""""""""
+
+.. code-block:: shell
+
+   # build and run JPEG Decoder using U200 platform
+   make run TARGET=hw_emu DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1.xpfm
+
+Now the test bench will run the case 10 times to calculate an average speed of the kernel
+
+Example output
+
+.. code-block:: shell
+
+   ...
+
+   ------------ Test for decode image.jpg  -------------
+   WARNING: Vitis_Libraries/codec/L2/demos/jpegDec/images/t0.jpg will be opened for binary read.
+   51193 entries read from Vitis_Libraries/codec/L2/demos/jpegDec/images/t0.jpg
+   Found Platform
+   Platform Name: Xilinx
+   Info: Context created
+   Info: Command queue created
+   INFO: Found Device=xilinx_u50_gen3x16_xdma_201920_3
+   INFO: Importing build_dir.hw_emu.xilinx_u50_gen3x16_xdma_201920_3/kernelJpegDecoder.xclbin
+   Loading: 'build_dir.hw_emu.xilinx_u50_gen3x16_xdma_201920_3/kernelJpegDecoder.xclbin'
+   Loading: 'build_dir.hw_emu.xilinx_u50_gen3x16_xdma_201920_3/kernelJpegDecoder.xclbin'
+   INFO: [HW-EMU 01] Hardware emulation runs simulation underneath. Using a large data set will result in long simulation times. It is recommended that a small dataset is 
+   used for faster execution. The flow uses approximate models for Global memories and interconnect and hence the performance data generated is approximate.
+   configuring penguin scheduler mode
+   scheduler config ert(0), dataflow(1), slots(16), cudma(1), cuisr(0), cdma(0), cus(1)
+   Info: Program created
+   INFO: Kernel has been created
+   Info: Kernel created
+   INFO: Kernel has been created
+   INFO: Finish kernel setup
+   INFO: Finish kernel execution
+   INFO: Finish E2E execution
+   -------------------------------------------------------
+   INFO: Data transfer from host to device: 360540 us
+   -------------------------------------------------------
+   INFO: Data transfer from device to host: 296951 us
+   -------------------------------------------------------
+   INFO: kernel 0: execution time 135012750 usec
+   INFO: kernel 1: execution time 131009663 usec
+   INFO: kernel 2: execution time 134012825 usec
+   INFO: kernel 3: execution time 133013391 usec
+   INFO: kernel 4: execution time 132012707 usec
+   INFO: kernel 5: execution time 133013044 usec
+   INFO: kernel 6: execution time 130013132 usec
+   INFO: kernel 7: execution time 130012762 usec
+   INFO: kernel 8: execution time 130012930 usec
+   INFO: kernel 9: execution time 135013237 usec
+   INFO: Average kernel execution per run: 132312644 us
+   -------------------------------------------------------
+   INFO: Average E2E per run: 1355900288 us
+   -------------------------------------------------------
+
+   ...
+
+   Please open the YUV file with fmt 1 and (width, height) = (624, 528)
+   WARNING: Vitis_Libraries/codec/L2/demos/jpegDec/images/t0.yuv.h will be opened for binary write.
+   Ready for next image!
+   INFO: [HW-EMU 06-0] Waiting for the simulator process to exit
+   INFO: [HW-EMU 06-1] All the simulator processes exited successfully
+
+(5) Run kernel in Hardware
+""""""""""""""""""""""""""""
+
+Now the test bench will run the case 10 times to calculate an average speed of the kernel
+
+.. code-block:: shell
+
+   # build and run JPEG Decoder using U200 platform
+   make run TARGET=hw DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1.xpfm
+
+Building xclbin will take about 4 hours, take a coffee break.
+
+Example output:
+
+.. code-block:: shell
+
+   Found Platform
+   Platform Name: Xilinx
+   INFO: Found Device=xilinx_u200_gen3x16_xdma_2_202110_1
+   INFO: Importing kernelJpegDecoder.xclbin
+   Loading: 'kernelJpegDecoder.xclbin'
+   INFO: Kernel has been created
+   INFO: Finish kernel setup
+   ...
+
+   INFO: Finish kernel execution
+   INFO: Finish E2E execution
+   INFO: Data transfer from host to device: 108 us
+   INFO: Data transfer from device to host: 726 us
+   INFO: Average kernel execution per run: 1515 us
+   ...
+
+   INFO: android.yuv will be generated from the jpeg decoder's output
+   INFO: android.yuv is generated correctly
+
+So for this 1280x960 android.jpg file the output throughput is about 1216MB/s ( (1280x960x3)/2/1515 ).   
+
+To check the output yuv file, download https://sourceforge.net/projects/raw-yuvplayer/ . Then upload the rebuild_image.yuv, set the right sample radio and custom size on the software, and check the yuv file.
+
+Lab summary
+^^^^^^^^^^^^^^
+
+*  L2 flow is based on Vitis flow, and the main steps include sw_emu, hw_emu, and hw
+
+*  Run hardware acceleration application on a device
+
+
+.. _tutorial::lab4:
+
+Lab-4: Using multi-kernel solution to accelerate WebP encoding based on open-source project
+----------------------------------------------------------------------------------
+
+Lab purpose
+^^^^^^^^^^^^^^
+
+The user's image codec may be based on an open source project. This lab will show an accelerated process based on an open source project, the Webp encoder. Webp image coding is not only more complex, but also involves HW/SW partition and the design of multiple kernels.
+To learn:
+
+* L2 accelerated process for open source projects
+* Multi kernel acceleration process
+
+Operation steps
+^^^^^^^^^^^^^^
+
+(1) Open source project analysis and kernel partition
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Here are two basic kernel partition principles:
+
+* 1. Focus on the operation which computing workload related to image size.  And try to abstract some one-time or limit-time operations in pre-processing or post-processing which can be excluded from kernel. Although the computation of image encoding is large, some preprocessing and post-processing workload have no relation with the image size, so they can be excluded outside from kernel. This situation is common for many image codec algorithms. For example, encoding always needs to calculate some quantization parameters by using some complex floating operations but only for limit time for an image. Another example is the adding  head for compressed bit-stream. 
+* 2. Serial running modules with large latency related to image size should be divided into different kernels to realize multi kernel concurrency
+Webp can be divided into two serial modules, one is for prediction and probability statistics, and the other is for arithmetic coding. Since the arithmetic coding can't start until the probability statistics module finish scanning the entire image, it should be divided into two kernels. In this way, when processing multiple images, the two kernels can be concurrent, which increases the system throughput.
+
+(2) Project files for multi-kernel design
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+* ``Makefile``
+* ``conn_u200.ini``
+* ``description.json``
+* ``utils.mk``
+
+(3) Software Emulation
+"""""""""""""""""""""""""
+
+.. code-block:: shell
+
+   cd L2/demos/webpEnc
+   make run TARGET=sw_emu DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1
+
+(4) Hardware Emulation
+"""""""""""""""""""""""""
+
+.. code-block:: shell
+
+   cd L2/demos/webpEnc
+   make run TARGET=hw_emu DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1
+
+report path: reports/_x.hw_emu.xilinx_u200_gen3x16_xdma_2_202110_1/webp_IntraPredLoop2_NoOut_1/hls_reports/webp_IntraPredLoop2_NoOut_1_csynth.rpt
+
+.. code-block:: shell
+
+    +---------------------+---------+------+---------+---------+-----+
+    |         Name        | BRAM_18K|  DSP |    FF   |   LUT   | URAM|
+    +---------------------+---------+------+---------+---------+-----+
+    |DSP                  |        -|     -|        -|        -|    -|
+    |Expression           |        -|     -|        0|        2|    -|
+    |FIFO                 |        -|     -|        -|        -|    -|
+    |Instance             |      105|   387|   119670|   178708|    8|
+    |Memory               |        -|     -|        -|        -|    -|
+    |Multiplexer          |        -|     -|        -|      101|    -|
+    |Register             |        -|     -|      392|        -|    -|
+    +---------------------+---------+------+---------+---------+-----+
+    |Total                |      105|   387|   120062|   178811|    8|
+    +---------------------+---------+------+---------+---------+-----+
+    |Available SLR        |     1440|  2280|   788160|   394080|  320|
+    +---------------------+---------+------+---------+---------+-----+
+    |Utilization SLR (%)  |        7|    16|       15|       45|    2|
+    +---------------------+---------+------+---------+---------+-----+
+    |Available            |     4320|  6840|  2364480|  1182240|  960|
+    +---------------------+---------+------+---------+---------+-----+
+    |Utilization (%)      |        2|     5|        5|       15|   ~0|
+    +---------------------+---------+------+---------+---------+-----+
+
+report path: reports/_x.hw.xilinx_u200_gen3x16_xdma_2_202110_1/webp_2_ArithmeticCoding_1/hls_reports/webp_2_ArithmeticCoding_1_csynth.rpt
+
+.. code-block:: shell
+
+    +---------------------+---------+------+---------+---------+-----+
+    |         Name        | BRAM_18K|  DSP |    FF   |   LUT   | URAM|
+    +---------------------+---------+------+---------+---------+-----+
+    |DSP                  |        -|     -|        -|        -|    -|
+    |Expression           |        -|     -|        0|     1127|    -|
+    |FIFO                 |        -|     -|        -|        -|    -|
+    |Instance             |       24|     3|    26227|    33840|    0|
+    |Memory               |        1|     -|        0|        0|    0|
+    |Multiplexer          |        -|     -|        -|     1610|    -|
+    |Register             |        -|     -|     1415|        -|    -|
+    +---------------------+---------+------+---------+---------+-----+
+    |Total                |       25|     3|    27642|    36577|    0|
+    +---------------------+---------+------+---------+---------+-----+
+    |Available SLR        |     1440|  2280|   788160|   394080|  320|
+    +---------------------+---------+------+---------+---------+-----+
+    |Utilization SLR (%)  |        1|    ~0|        3|        9|    0|
+    +---------------------+---------+------+---------+---------+-----+
+    |Available            |     4320|  6840|  2364480|  1182240|  960|
+    +---------------------+---------+------+---------+---------+-----+
+    |Utilization (%)      |       ~0|    ~0|        1|        3|    0|
+    +---------------------+---------+------+---------+---------+-----+
+
+(5) Hardware Build and Check Resource Consumption
+"""""""""""""""""""""""""
+
+.. code-block:: shell
+
+   cd L2/demos/webpEnc
+   make run TARGET=hw DEVICE=xilinx_u200_gen3x16_xdma_2_202110_1
+
+report path: _x_temp.hw.xilinx_u200_gen3x16_xdma_2_202110_1/link/vivado/vpl/prj/prj.runs/impl_1/kernel_util_routed.rpt
+
+.. code-block:: shell
+
+    +----------------------------------+------------------+------------------+-------------------+----------------+---------------+----------------+
+    | Name                             | LUT              | LUTAsMem         | REG               | BRAM           | URAM          | DSP            |
+    +----------------------------------+------------------+------------------+-------------------+----------------+---------------+----------------+
+    | Platform                         | 192064 [ 16.25%] |  17282 [  2.92%] |  268446 [ 11.35%] |  314 [ 14.54%] |  20 [  2.08%] |   10 [  0.15%] |
+    | User Budget                      | 990176 [100.00%] | 574558 [100.00%] | 2096034 [100.00%] | 1846 [100.00%] | 940 [100.00%] | 6830 [100.00%] |
+    |    Used Resources                |  69389 [  7.01%] |   7136 [  1.24%] |   91572 [  4.37%] |   87 [  4.71%] |  10 [  1.06%] |  414 [  6.06%] |
+    |    Unused Resources              | 920787 [ 92.99%] | 567422 [ 98.76%] | 2004462 [ 95.63%] | 1759 [ 95.29%] | 930 [ 98.94%] | 6416 [ 93.94%] |
+    | webp_2_ArithmeticCoding_1        |  16065 [  1.62%] |   2520 [  0.44%] |   22841 [  1.09%] |   15 [  0.81%] |   0 [  0.00%] |    4 [  0.06%] |
+    |    webp_2_ArithmeticCoding_1_1   |  16065 [  1.62%] |   2520 [  0.44%] |   22841 [  1.09%] |   15 [  0.81%] |   0 [  0.00%] |    4 [  0.06%] |
+    | webp_IntraPredLoop2_NoOut_1      |  53324 [  5.39%] |   4616 [  0.80%] |   68731 [  3.28%] |   72 [  3.90%] |  10 [  1.06%] |  410 [  6.00%] |
+    |    webp_IntraPredLoop2_NoOut_1_1 |  53324 [  5.39%] |   4616 [  0.80%] |   68731 [  3.28%] |   72 [  3.90%] |  10 [  1.06%] |  410 [  6.00%] |
+    +----------------------------------+------------------+------------------+-------------------+----------------+---------------+----------------+
+
+(6) Hardware Running
+"""""""""""""""""""""""""
+
+Webp Input Arguments:
+
+.. code-block:: shell
+
+   Usage: cwebp -[-use_ocl -q -o]
+         -xclbin :     the kernel file
+         list.rst:     the input list
+         -use_ocl:     should be kept
+         -q:           compression quality
+         -o:           output directory
+
+Compared to original command-line parameter, there are three differences here. The first is '-xclbin' for specifying the kernel files. The second is a change for input image file which is replaced by a file list file in which more than one input images are listed line by line. The third, the '-use_ocl' is used for enable vitis flow. 
+
+The following figure shows the host information when run on board. The time listed in the figure is not accurate.
+
+.. code-block:: shell
+
+    ./cwebp -xclbin kernel.xclbin list.rst -use_ocl -q 80 -o ./images
+    INFO: CreateKernel start.
+    INFO: Number of Platforms: 1
+    INFO: Selected Platform: Xilinx
+    INFO: Number of devices for platform 0: 2
+    INFO: target_device found:   xilinx_u200_gen3x16_xdma_base_2
+    INFO: target_device chosen:  xilinx_u200_gen3x16_xdma_base_2
+    Info: Context created
+    Info: Command queue created
+    INFO: OpenCL Version: 1.-48
+    INFO: Loading kernel.xclbin
+    INFO: Loading kernel.xclbin Finished
+    Info: Program created
+    Info: Kernel created
+    Info: Kernel created
+    INFO: CreateKernel finished. Computation time is 328.504000 (ms)
+    
+    INFO: Create buffers started.
+    INFO: Create buffers finished. Computation time is 48.225000 (ms)
+    
+    INFO: WebPEncodeAsync Starts...
+    INFO: Nloop = 1
+    INFO: VP8EncTokenLoopAsync starts ...
+    
+    *** Picture: 1 - 1,  Buffer: 0, Instance: 0, Event: 0 ***
+    HtoD webpen.c
+    INFO: Host2Device finished. Computation time is 0.874000 (ms)
+    INFO: PredKernel Finished. Computation time is 0.258000 (ms)
+    INFO: ACKernel Finished. Computation time is 0.155000 (ms)
+    INFO: Device2Host finished. Computation time is 0.118000 (ms)
+    
+    INFO: Loop of Pictures Finished. Computation time is 17.825000 (ms)
+    INFO: VP8EncTokenLoopAsync Finished. Computation time is 24.683000 (ms)
+    INFO: WebPEncodeAsync Finished. Computation time is 31.885000 (ms)
+    
+    INFO: Release Kernel.
+    Info: Test passed
+
+
+To get the accurate kernel execution time, please add a file "xrt.ini", and fill this file with following directives.
+
+.. code-block:: shell
+
+    #Start of Debug group
+    [Debug]
+    profile=true
+    timeline_trace=true
+    data_transfer_trace=fine
+    app_debug=true
+    opencl_summary=true
+    opencl_trace=true
+    
+    #Start of Runtime group
+    [Runtime]
+    runtime_log = console
+
+.. code-block:: shell
+
+    Kernel Execution
+    Kernel,Number Of Enqueues,Total Time (ms),Minimum Time (ms),Average Time (ms),Maximum Time (ms),
+    webp_2_ArithmeticCoding_1,1,2.95381,2.95381,2.95381,2.95381,
+    webp_IntraPredLoop2_NoOut_1,1,3.61861,3.61861,3.61861,3.61861,
+
+For more information about how to analyze performance, please refer to `Application Acceleration Development (UG1393) <https://docs.xilinx.com/r/2020.2-English/ug1393-vitis-application-acceleration/Profiling-Optimizing-and-Debugging-the-Application>`_
+
+
+Lab summary
+^^^^^^^^^^^^^^
+
+* Focus on the operation which computing workload related to image size
+
+* Serial processed modules may be divided into multiple kernels to realize multi-kernel concurrency
+
+Tutorial Summary
+------------------
+
+JPEG decoder and webp encoder are very representative in image transcoding applications. Codec Library has also launched many other open source and self-developed APIs some of them can support the developing flow based on System Compiler from 22.1. The tutorial will be developed to cover more codecs and their combinations, more flows and more classic applications.
diff --git a/data_analytics/Jenkinsfile b/data_analytics/Jenkinsfile
index bfedb9cc26..4e083a65d0 100644
--- a/data_analytics/Jenkinsfile
+++ b/data_analytics/Jenkinsfile
@@ -1,4 +1,4 @@
 @Library('pipeline-library')_
-VitisLibPipeline (branch: 'next', libname: 'xf_DataAnalytics', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
-                  upstream_dependencies: 'xf_utils_hw,next,../utils; xf_compression,next,../data_compression; xf_security,next,../security; xf_graph,next,../graph; xf_database,next,../database',
-                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest')
+VitisLibPipeline (branch: 'main', libname: 'xf_DataAnalytics', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
+                  upstream_dependencies: 'xf_utils_hw,main,../utils; xf_compression,main,../data_compression; xf_security,main,../security; xf_graph,main,../graph; xf_database,main,../database',
+                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released')
diff --git a/data_analytics/L1/tests/regression/LASSORegressionPredict/test.cpp b/data_analytics/L1/tests/regression/LASSORegressionPredict/test.cpp
index 9965bbaae5..eb66740e60 100644
--- a/data_analytics/L1/tests/regression/LASSORegressionPredict/test.cpp
+++ b/data_analytics/L1/tests/regression/LASSORegressionPredict/test.cpp
@@ -83,7 +83,7 @@ int main() {
     bool res = true;
     for (int i = 0; i < rows; i++) {
         eRetStrm.read();
-        res = diff(retStrm[0].read(), golden[i]);
+        res &= diff(retStrm[0].read(), golden[i]);
     }
     eRetStrm.read();
 
diff --git a/data_analytics/L1/tests/regression/linearLeastSquareRegressionPredict/test.cpp b/data_analytics/L1/tests/regression/linearLeastSquareRegressionPredict/test.cpp
index 3a66a73317..00c0388d3f 100644
--- a/data_analytics/L1/tests/regression/linearLeastSquareRegressionPredict/test.cpp
+++ b/data_analytics/L1/tests/regression/linearLeastSquareRegressionPredict/test.cpp
@@ -83,7 +83,7 @@ int main() {
     bool res = true;
     for (int i = 0; i < rows; i++) {
         eRetStrm.read();
-        res = diff(retStrm[0].read(), golden[i]);
+        res &= diff(retStrm[0].read(), golden[i]);
     }
     eRetStrm.read();
 
diff --git a/data_analytics/L2/benchmarks/classification/naive_bayes/Makefile b/data_analytics/L2/benchmarks/classification/naive_bayes/Makefile
index b63e082a39..1cb695f179 100644
--- a/data_analytics/L2/benchmarks/classification/naive_bayes/Makefile
+++ b/data_analytics/L2/benchmarks/classification/naive_bayes/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test_nb.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(CUR_DIR)/host -I $(CUR_DIR)/kern
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_nb.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -203,11 +210,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -241,21 +243,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -302,12 +304,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/naiveBayesTrain_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_analytics/L2/benchmarks/classification/naive_bayes/utils.mk b/data_analytics/L2/benchmarks/classification/naive_bayes/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/benchmarks/classification/naive_bayes/utils.mk
+++ b/data_analytics/L2/benchmarks/classification/naive_bayes/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/demos/text/dup_match/Makefile b/data_analytics/L2/demos/text/dup_match/Makefile
index aa41834f40..f5d10b9e38 100644
--- a/data_analytics/L2/demos/text/dup_match/Makefile
+++ b/data_analytics/L2/demos/text/dup_match/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(CUR_DIR)/host/dup_match.cpp $(CUR_DIR)/host/dm/cluster.cpp $(CUR_DIR)/host/dm/common.cpp $(CUR_DIR)/host/dm/predicate.cpp $(CUR_DIR)/host/ext/hcluster.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include -I $(CUR_DIR)//host/ext/re_compile/lib/include -I $(XFLIB_DIR)/L1/include/sw -I $(XFLIB_DIR)/L3/include/sw -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include
@@ -118,6 +120,11 @@ CXXFLAGS += -O3
 LDFLAGS +=  -L $(CUR_DIR)//host/ext/re_compile/lib/lib
 LDFLAGS +=  -l onig
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -157,7 +164,7 @@ endif
 $(TEMP_DIR)/TGP_Kernel.xo: $(CUR_DIR)/kernel/predicate_kernel.cpp 
 	$(ECHO) "Compiling Kernel: TGP_Kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TGP_Kernel) $(VPP_FLAGS) -k TGP_Kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TGP_Kernel) $(VPP_FLAGS) -k TGP_Kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TGP_Kernel_OBJS += $(TEMP_DIR)/TGP_Kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TGP_Kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -181,11 +188,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -219,21 +221,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -280,14 +282,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TGP_Kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -324,12 +328,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/demos/text/dup_match/utils.mk b/data_analytics/L2/demos/text/dup_match/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/demos/text/dup_match/utils.mk
+++ b/data_analytics/L2/demos/text/dup_match/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/demos/text/log_analyzer/Makefile b/data_analytics/L2/demos/text/log_analyzer/Makefile
index 459c7fe737..3fafe00ef3 100644
--- a/data_analytics/L2/demos/text/log_analyzer/Makefile
+++ b/data_analytics/L2/demos/text/log_analyzer/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(CUR_DIR)/host/log_analyzer.cpp $(CUR_DIR)/host/log_analyzer_config.cpp $(CUR_DIR)/utils_sw/xclhost.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -127,6 +129,11 @@ LDFLAGS +=  -L $(CUR_DIR)//re_compile/lib/lib -L $(CUR_DIR)//re_compile -L $(CUR
 LDFLAGS +=  -l onig -l xfcompile -l maxminddb
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -152,9 +159,9 @@ VPP_FLAGS_reEngineKernel += --hls.clock 300000000:reEngineKernel
 VPP_FLAGS_GeoIP_kernel += --hls.clock 300000000:GeoIP_kernel
 VPP_FLAGS_WJ_kernel += --hls.clock 300000000:WJ_kernel
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_logAnalyzer += --clock.defaultFreqHz 300000000
+VPP_LDFLAGS_logAnalyzer += --clock.defaultFreqHz 250000000
 else
-VPP_LDFLAGS_logAnalyzer += --kernel_frequency 300
+VPP_LDFLAGS_logAnalyzer += --kernel_frequency 250
 endif
 VPP_LDFLAGS_logAnalyzer_temp := --config $(CUR_DIR)/conn_u200.cfg
 VPP_LDFLAGS_logAnalyzer += $(VPP_LDFLAGS_logAnalyzer_temp)
@@ -164,9 +171,9 @@ VPP_FLAGS_reEngineKernel += --hls.clock 300000000:reEngineKernel
 VPP_FLAGS_GeoIP_kernel += --hls.clock 300000000:GeoIP_kernel
 VPP_FLAGS_WJ_kernel += --hls.clock 300000000:WJ_kernel
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_logAnalyzer += --clock.defaultFreqHz 300000000
+VPP_LDFLAGS_logAnalyzer += --clock.defaultFreqHz 250000000
 else
-VPP_LDFLAGS_logAnalyzer += --kernel_frequency 300
+VPP_LDFLAGS_logAnalyzer += --kernel_frequency 250
 endif
 
 endif
@@ -181,17 +188,17 @@ endif
 $(TEMP_DIR)/reEngineKernel.xo: $(CUR_DIR)/kernel/re_engine_kernel.cpp 
 	$(ECHO) "Compiling Kernel: reEngineKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_reEngineKernel) $(VPP_FLAGS) -k reEngineKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_reEngineKernel) $(VPP_FLAGS) -k reEngineKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_logAnalyzer_OBJS += $(TEMP_DIR)/reEngineKernel.xo
 $(TEMP_DIR)/GeoIP_kernel.xo: $(CUR_DIR)/kernel/geoip_kernel.cpp 
 	$(ECHO) "Compiling Kernel: GeoIP_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_GeoIP_kernel) $(VPP_FLAGS) -k GeoIP_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_GeoIP_kernel) $(VPP_FLAGS) -k GeoIP_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_logAnalyzer_OBJS += $(TEMP_DIR)/GeoIP_kernel.xo
 $(TEMP_DIR)/WJ_kernel.xo: $(CUR_DIR)/kernel/WJ_kernel.cpp 
 	$(ECHO) "Compiling Kernel: WJ_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_WJ_kernel) $(VPP_FLAGS) -k WJ_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_WJ_kernel) $(VPP_FLAGS) -k WJ_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_logAnalyzer_OBJS += $(TEMP_DIR)/WJ_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_logAnalyzer_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -215,11 +222,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -253,21 +255,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -314,14 +316,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/logAnalyzer.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -358,12 +362,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/demos/text/log_analyzer/README.md b/data_analytics/L2/demos/text/log_analyzer/README.md
index c8866e4c44..c3857f0a6f 100644
--- a/data_analytics/L2/demos/text/log_analyzer/README.md
+++ b/data_analytics/L2/demos/text/log_analyzer/README.md
@@ -88,8 +88,3 @@ note:
 1. The each line in the input log must has less than 4090 characters.
 2. The baseline version is a single thread program.
 
-
-## Known Issues
-
-* This case fails hardware build with 2022.1 Vitis. Please use 2021.2 Vitis for it,
-
diff --git a/data_analytics/L2/demos/text/log_analyzer/description.json b/data_analytics/L2/demos/text/log_analyzer/description.json
index c9ab7935a2..e8f3ac2cbb 100644
--- a/data_analytics/L2/demos/text/log_analyzer/description.json
+++ b/data_analytics/L2/demos/text/log_analyzer/description.json
@@ -114,7 +114,7 @@
                     "num_compute_units": 1
                 }
             ],
-            "frequency": 300,
+            "frequency": 250,
             "name": "logAnalyzer"
         }
     ],
@@ -143,6 +143,8 @@
         "targets": [
             "vitis_sw_emu",
             "vitis_hw_emu",
+            "vitis_hw_build",
+            "vitis_hw_run"
         ],
         "category": "canary"
     }
diff --git a/data_analytics/L2/demos/text/log_analyzer/utils.mk b/data_analytics/L2/demos/text/log_analyzer/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/demos/text/log_analyzer/utils.mk
+++ b/data_analytics/L2/demos/text/log_analyzer/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/include/hw/xf_data_analytics/dataframe/obj2parquet.hpp b/data_analytics/L2/include/hw/xf_data_analytics/dataframe/obj2parquet.hpp
index 526c5c52b5..8df32c832c 100644
--- a/data_analytics/L2/include/hw/xf_data_analytics/dataframe/obj2parquet.hpp
+++ b/data_analytics/L2/include/hw/xf_data_analytics/dataframe/obj2parquet.hpp
@@ -18,11 +18,10 @@
 #define _XF_DATA_ANALYTICS_ETL_HPP_
 /**
  * @brief From DataFrame to Parquet
- * \rst
- * \endrst
  *
- * @param
- * TBD
+ * @param ddr_obj pointer to DDR buffer contains input objects
+ * @param schema schema
+ * @param ddr_parquet pointer to DDR buffer contains output parquet results
  *
  */
 extern "C" void ObjToParquet(ap_uint<88> ddr_obj[1 << 25], ap_uint<8> schema[16], ap_uint<64> ddr_parquet[1 << 25]);
diff --git a/data_analytics/L2/tests/classification/decisiontree/Makefile b/data_analytics/L2/tests/classification/decisiontree/Makefile
index 8b045b4e1c..9606099125 100644
--- a/data_analytics/L2/tests/classification/decisiontree/Makefile
+++ b/data_analytics/L2/tests/classification/decisiontree/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -65,7 +71,7 @@ PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  u250 u50 aws-vu9p-f1
+PLATFORM_ALLOWLIST +=  u250
 PLATFORM_BLOCKLIST += 
 
 include ./utils.mk
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_decisionTree.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -178,7 +185,7 @@ endif
 $(TEMP_DIR)/DecisionTree.xo: $(XFLIB_DIR)/L2/src/classification/decision_tree.cpp 
 	$(ECHO) "Compiling Kernel: DecisionTree"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_DecisionTree) $(VPP_FLAGS) -k DecisionTree -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_DecisionTree) $(VPP_FLAGS) -k DecisionTree -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_decision_tree_OBJS += $(TEMP_DIR)/DecisionTree.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_decision_tree_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,14 +303,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decision_tree.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -342,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/classification/decisiontree/utils.mk b/data_analytics/L2/tests/classification/decisiontree/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/classification/decisiontree/utils.mk
+++ b/data_analytics/L2/tests/classification/decisiontree/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/classification/gradientBoostedDecisionTree/Makefile b/data_analytics/L2/tests/classification/gradientBoostedDecisionTree/Makefile
index f723f34c88..0f95ed3529 100644
--- a/data_analytics/L2/tests/classification/gradientBoostedDecisionTree/Makefile
+++ b/data_analytics/L2/tests/classification/gradientBoostedDecisionTree/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_decisionTree.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,12 +287,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decision_tree.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_analytics/L2/tests/classification/gradientBoostedDecisionTree/utils.mk b/data_analytics/L2/tests/classification/gradientBoostedDecisionTree/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/classification/gradientBoostedDecisionTree/utils.mk
+++ b/data_analytics/L2/tests/classification/gradientBoostedDecisionTree/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/classification/randomforest/Makefile b/data_analytics/L2/tests/classification/randomforest/Makefile
index bde3d7d115..ac9a89502e 100644
--- a/data_analytics/L2/tests/classification/randomforest/Makefile
+++ b/data_analytics/L2/tests/classification/randomforest/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/classification/randomforest/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_rf.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -192,11 +199,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -230,21 +232,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -291,12 +293,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rf_v0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_analytics/L2/tests/classification/randomforest/utils.mk b/data_analytics/L2/tests/classification/randomforest/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/classification/randomforest/utils.mk
+++ b/data_analytics/L2/tests/classification/randomforest/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/classification/svm/Makefile b/data_analytics/L2/tests/classification/svm/Makefile
index 72cf269c34..4ce6f17f58 100644
--- a/data_analytics/L2/tests/classification/svm/Makefile
+++ b/data_analytics/L2/tests/classification/svm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -65,7 +71,7 @@ PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  u250 u50
+PLATFORM_ALLOWLIST +=  u250
 PLATFORM_BLOCKLIST +=  aws-vu9p-f1
 
 include ./utils.mk
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_svm.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -178,7 +185,7 @@ endif
 $(TEMP_DIR)/SVM.xo: $(XFLIB_DIR)/L2/src/classification/svm.cpp 
 	$(ECHO) "Compiling Kernel: SVM"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_SVM) $(VPP_FLAGS) -k SVM -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_SVM) $(VPP_FLAGS) -k SVM -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_svm_OBJS += $(TEMP_DIR)/SVM.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_svm_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,14 +303,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/svm.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -342,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/classification/svm/utils.mk b/data_analytics/L2/tests/classification/svm/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/classification/svm/utils.mk
+++ b/data_analytics/L2/tests/classification/svm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/classification/xGradientBoost/Makefile b/data_analytics/L2/tests/classification/xGradientBoost/Makefile
index 343486bee0..093a0ff7d1 100644
--- a/data_analytics/L2/tests/classification/xGradientBoost/Makefile
+++ b/data_analytics/L2/tests/classification/xGradientBoost/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_decisionTree.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,12 +287,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decision_tree.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_analytics/L2/tests/classification/xGradientBoost/utils.mk b/data_analytics/L2/tests/classification/xGradientBoost/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/classification/xGradientBoost/utils.mk
+++ b/data_analytics/L2/tests/classification/xGradientBoost/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/clustering/kmeans/Makefile b/data_analytics/L2/tests/clustering/kmeans/Makefile
index ebc02a1072..3275ac9792 100644
--- a/data_analytics/L2/tests/clustering/kmeans/Makefile
+++ b/data_analytics/L2/tests/clustering/kmeans/Makefile
@@ -18,25 +18,31 @@
 
 help::
 	$(ECHO) "Makefile Usage:"
-	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=</x86/aarch64>"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to generate the design for specified Target and Shell."
-	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=</x86/aarch64>"
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to run application in emulation."
-	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH required for SoC shells"
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=</x86/aarch64>"
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to build xclbin application."
-	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
 	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
-	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -54,19 +60,19 @@ XFLIB_DIR = $(XF_PROJ_ROOT)
 
 # setting devault value
 TARGET ?= sw_emu
-HOST_ARCH ?= 
+HOST_ARCH ?= x86
 
 #setting PLATFORM
 ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := xilinx_u250_gen3x16_xdma_3_1_202020_1
+PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  xilinx_u250_gen3x16_xdma_3_1_202020_1 xilinx_u50_gen3x16_xdma_201920_3 aws-vu9p-f1 vck190
-PLATFORM_BLOCKLIST +=  xilinx_u250_xdma_201830_2 xilinx_u200_xdma_201830_2
+PLATFORM_ALLOWLIST +=  u250 u50 aws-vu9p-f1
+PLATFORM_BLOCKLIST +=  vck190
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(XF
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -178,7 +185,7 @@ endif
 $(TEMP_DIR)/kmeansKernel.xo: $(CUR_DIR)/kernel/kernel.cpp 
 	$(ECHO) "Compiling Kernel: kmeansKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kmeansKernel) $(VPP_FLAGS) -k kmeansKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kmeansKernel) $(VPP_FLAGS) -k kmeansKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kmeanskernel_OBJS += $(TEMP_DIR)/kmeansKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kmeanskernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,14 +303,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kmeanskernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -342,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/clustering/kmeans/description.json b/data_analytics/L2/tests/clustering/kmeans/description.json
index 3a4b81ab31..1489063471 100644
--- a/data_analytics/L2/tests/clustering/kmeans/description.json
+++ b/data_analytics/L2/tests/clustering/kmeans/description.json
@@ -5,14 +5,12 @@
     "flow": "vitis", 
     "platform_type": "", 
     "platform_allowlist": [
-        "xilinx_u250_gen3x16_xdma_3_1_202020_1", 
-        "xilinx_u50_gen3x16_xdma_201920_3", 
+        "u250", 
+        "u50", 
         "aws-vu9p-f1" 
     ], 
     "platform_blocklist": [
-	"vck190",
-        "xilinx_u250_xdma_201830_2",
-        "xilinx_u200_xdma_201830_2"
+	    "vck190"
     ], 
     "platform_properties": {
         "u250": {
diff --git a/data_analytics/L2/tests/clustering/kmeans/utils.mk b/data_analytics/L2/tests/clustering/kmeans/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/clustering/kmeans/utils.mk
+++ b/data_analytics/L2/tests/clustering/kmeans/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/clustering/kmeans_sc/Makefile b/data_analytics/L2/tests/clustering/kmeans_sc/Makefile
index 4e46c8a0f3..f2ffcdd8bd 100755
--- a/data_analytics/L2/tests/clustering/kmeans_sc/Makefile
+++ b/data_analytics/L2/tests/clustering/kmeans_sc/Makefile
@@ -53,12 +53,12 @@ ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := xilinx_u250_gen3x16_xdma_3_1_202020_1
+PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u250 u200
-PLATFORM_BLOCKLIST +=  xilinx_u250_xdma_201830_2 xilinx_u200_xdma_201830_2 zc aws-vu9p-f1
+PLATFORM_BLOCKLIST +=  zc aws-vu9p-f1
 
 GCC_INTOOL := 8.3.0
 BINUTILS_INTOOL := 2.37
@@ -158,7 +158,7 @@ $(ACC_OBJS_kmeansKernel): $(TEMP_DIR)/%.o : %.cpp $$(@D)/.f
 BINARY_CONTAINERS_DEPS  += $(ACC_OBJS_kmeansKernel) 
 $(BINARY_CONTAINERS_TMP) : $(BINARY_CONTAINERS_DEPS)
 	@echo "--> Making $@ from: $?"
-	$(VPP) $(VPP_FLAGS) $(VPP_LDFLAGS) -o $(BINARY_CONTAINERS) -l $^
+	$(VPP) $(VPP_FLAGS) $(VPP_LDFLAGS) $(VPP_LDFLAGS_kmeanskernel) -o $(BINARY_CONTAINERS) -l $^
 EXE_FILE_DEPS += $(BINARY_CONTAINERS_TMP)
 EXE_FILE_DEPS += $(BINARY_CONTAINERS_DEPS)
 
@@ -246,12 +246,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/clustering/kmeans_sc/description.json b/data_analytics/L2/tests/clustering/kmeans_sc/description.json
index 05e331bb86..be4fecc6ec 100644
--- a/data_analytics/L2/tests/clustering/kmeans_sc/description.json
+++ b/data_analytics/L2/tests/clustering/kmeans_sc/description.json
@@ -9,8 +9,6 @@
         "u200"
     ],
     "platform_blocklist": [
-        "xilinx_u250_xdma_201830_2",
-        "xilinx_u200_xdma_201830_2",
         "zc",
         "aws-vu9p-f1"
     ],
diff --git a/data_analytics/L2/tests/dataframe/writeparquet/Makefile b/data_analytics/L2/tests/dataframe/writeparquet/Makefile
index 593552c3e7..3fa5860458 100644
--- a/data_analytics/L2/tests/dataframe/writeparquet/Makefile
+++ b/data_analytics/L2/tests/dataframe/writeparquet/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(CUR_DIR)/host/utils_sw/xclhost.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(CUR_DIR)/host/utils_sw -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/../utils/L1/include -I $(CUR_DIR)//arrow_compile/arrow/cpp/release/thrift_ep-install/include/ -I $(CUR_DIR)//arrow_compile/arrow/cpp/src -I $(CUR_DIR)//arrow_compile/arrow/cpp/release/src/ -I $(XFLIB_DIR)/L1/include/sw -I $(XFLIB_DIR)/L3/include/sw -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include
@@ -186,11 +188,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +221,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,12 +282,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/ObjToParquet.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -338,4 +335,4 @@ cleanall: cleanh cleank
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
 	-$(RMDIR) 
 
-clean: cleanh
\ No newline at end of file
+clean: cleanh
diff --git a/data_analytics/L2/tests/dataframe/writeparquet/utils.mk b/data_analytics/L2/tests/dataframe/writeparquet/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/dataframe/writeparquet/utils.mk
+++ b/data_analytics/L2/tests/dataframe/writeparquet/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/geospatial/knn_sc/Makefile b/data_analytics/L2/tests/geospatial/knn_sc/Makefile
index 00184740a8..60984b0009 100644
--- a/data_analytics/L2/tests/geospatial/knn_sc/Makefile
+++ b/data_analytics/L2/tests/geospatial/knn_sc/Makefile
@@ -18,17 +18,17 @@
 
 help::
 	$(ECHO) "Makefile Usage:"
-	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<>"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to generate the design for specified Target and Shell."
-	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<>"
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to run application in emulation."
-	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH required for SoC shells"
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
 	$(ECHO) ""
 	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
-	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
+	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -46,18 +46,18 @@ XFLIB_DIR = $(XF_PROJ_ROOT)
 
 # setting devault value
 TARGET ?= sw_emu
-HOST_ARCH ?= 
+HOST_ARCH ?= x86
 
 #setting PLATFORM
 ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := xilinx_u50_gen3x16_xdma_201920_3
+PLATFORM := xilinx_u50_gen3x16_xdma_5_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  xilinx_u50_gen3x16_xdma_201920_3
+PLATFORM_ALLOWLIST +=  u50
 PLATFORM_BLOCKLIST +=  zc
 
 GCC_INTOOL := 8.3.0
@@ -78,8 +78,6 @@ RUN_DEPS :=
 # set debug switch
 ifneq ($(debug),yes)
 CXXFLAGS += -O3
-else
-CXXFLAGS += -g
 endif
 
 # get global setting
@@ -140,8 +138,7 @@ PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args
 endif
 
 ########################## Kernel compiler global settings ##########################
-ifneq (,$(shell echo $(XPLATFORM) | awk '/xilinx_u2_gen3x4_xdma_gc_2_202110_1/'))
-VPP_FLAGS +=   -DUSE_U2
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 VPP_FLAGS +=  -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/../graph/L1/include/hw -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw
 
 else 
@@ -178,7 +175,7 @@ $(ACC_OBJS_kernel_knn): $(TEMP_DIR)/%.o : %.cpp $$(@D)/.f
 BINARY_CONTAINERS_DEPS  += $(ACC_OBJS_kernel_knn) 
 $(BINARY_CONTAINERS_TMP) : $(BINARY_CONTAINERS_DEPS)
 	@echo "--> Making $@ from: $?"
-	$(VPP) $(VPP_FLAGS) $(VPP_LDFLAGS) -o $(BINARY_CONTAINERS) -l $^
+	$(VPP) $(VPP_FLAGS) $(VPP_LDFLAGS) $(VPP_LDFLAGS_kernel_knn) -o $(BINARY_CONTAINERS) -l $^
 EXE_FILE_DEPS += $(BINARY_CONTAINERS_TMP)
 EXE_FILE_DEPS += $(BINARY_CONTAINERS_DEPS)
 
@@ -266,12 +263,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/regression/LASSORegressionSGDTrain/Makefile b/data_analytics/L2/tests/regression/LASSORegressionSGDTrain/Makefile
index 3e87452cf7..77c40bbb2f 100644
--- a/data_analytics/L2/tests/regression/LASSORegressionSGDTrain/Makefile
+++ b/data_analytics/L2/tests/regression/LASSORegressionSGDTrain/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_main.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -178,7 +185,7 @@ endif
 $(TEMP_DIR)/LASSORegressionTrain.xo: $(CUR_DIR)/kernel/kernel.cpp 
 	$(ECHO) "Compiling Kernel: LASSORegressionTrain"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_LASSORegressionTrain) $(VPP_FLAGS) -k LASSORegressionTrain -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_LASSORegressionTrain) $(VPP_FLAGS) -k LASSORegressionTrain -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_lasso_regression_OBJS += $(TEMP_DIR)/LASSORegressionTrain.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_lasso_regression_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,14 +303,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/lasso_regression.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -342,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/regression/LASSORegressionSGDTrain/utils.mk b/data_analytics/L2/tests/regression/LASSORegressionSGDTrain/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/regression/LASSORegressionSGDTrain/utils.mk
+++ b/data_analytics/L2/tests/regression/LASSORegressionSGDTrain/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/regression/decisiontree/Makefile b/data_analytics/L2/tests/regression/decisiontree/Makefile
index 7bebcb53bf..de0c45cd4e 100644
--- a/data_analytics/L2/tests/regression/decisiontree/Makefile
+++ b/data_analytics/L2/tests/regression/decisiontree/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_decisionTree.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -178,7 +185,7 @@ endif
 $(TEMP_DIR)/DecisionTree.xo: $(XFLIB_DIR)/L2/src/regression/decision_tree_regression.cpp 
 	$(ECHO) "Compiling Kernel: DecisionTree"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_DecisionTree) $(VPP_FLAGS) -k DecisionTree -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_DecisionTree) $(VPP_FLAGS) -k DecisionTree -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_decision_tree_OBJS += $(TEMP_DIR)/DecisionTree.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_decision_tree_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,14 +303,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decision_tree.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -342,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/regression/decisiontree/utils.mk b/data_analytics/L2/tests/regression/decisiontree/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/regression/decisiontree/utils.mk
+++ b/data_analytics/L2/tests/regression/decisiontree/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/regression/gradientBoostedDecisionTree/Makefile b/data_analytics/L2/tests/regression/gradientBoostedDecisionTree/Makefile
index 843bf86626..809759e373 100644
--- a/data_analytics/L2/tests/regression/gradientBoostedDecisionTree/Makefile
+++ b/data_analytics/L2/tests/regression/gradientBoostedDecisionTree/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_decisionTree.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,12 +287,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decision_tree.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_analytics/L2/tests/regression/gradientBoostedDecisionTree/utils.mk b/data_analytics/L2/tests/regression/gradientBoostedDecisionTree/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/regression/gradientBoostedDecisionTree/utils.mk
+++ b/data_analytics/L2/tests/regression/gradientBoostedDecisionTree/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/regression/linearRegressionSGDTrain/Makefile b/data_analytics/L2/tests/regression/linearRegressionSGDTrain/Makefile
index 557a3bade9..d7b8ca6a38 100644
--- a/data_analytics/L2/tests/regression/linearRegressionSGDTrain/Makefile
+++ b/data_analytics/L2/tests/regression/linearRegressionSGDTrain/Makefile
@@ -18,15 +18,15 @@
 
 help::
 	$(ECHO) "Makefile Usage:"
-	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86/aarch64>"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to generate the design for specified Target and Shell."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86/aarch64>"
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to run application in emulation."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86/aarch64>"
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -65,8 +71,8 @@ PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  u250 u50 aws-vu9p-f1 vck190
-PLATFORM_BLOCKLIST += 
+PLATFORM_ALLOWLIST +=  u250 u50 aws-vu9p-f1
+PLATFORM_BLOCKLIST +=  vck190
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_main.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -178,7 +185,7 @@ endif
 $(TEMP_DIR)/linearLeastSquareSGDTrain.xo: $(CUR_DIR)/kernel/kernel.cpp 
 	$(ECHO) "Compiling Kernel: linearLeastSquareSGDTrain"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_linearLeastSquareSGDTrain) $(VPP_FLAGS) -k linearLeastSquareSGDTrain -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_linearLeastSquareSGDTrain) $(VPP_FLAGS) -k linearLeastSquareSGDTrain -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_linear_regression_OBJS += $(TEMP_DIR)/linearLeastSquareSGDTrain.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_linear_regression_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,14 +303,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/linear_regression.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -342,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/regression/linearRegressionSGDTrain/utils.mk b/data_analytics/L2/tests/regression/linearRegressionSGDTrain/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/regression/linearRegressionSGDTrain/utils.mk
+++ b/data_analytics/L2/tests/regression/linearRegressionSGDTrain/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/regression/randomforest/Makefile b/data_analytics/L2/tests/regression/randomforest/Makefile
index 3f02f5057c..d0eb157526 100644
--- a/data_analytics/L2/tests/regression/randomforest/Makefile
+++ b/data_analytics/L2/tests/regression/randomforest/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/regression/randomforest/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_rf.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -192,11 +199,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -230,21 +232,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -291,12 +293,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rf_v0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_analytics/L2/tests/regression/randomforest/utils.mk b/data_analytics/L2/tests/regression/randomforest/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/regression/randomforest/utils.mk
+++ b/data_analytics/L2/tests/regression/randomforest/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/regression/ridgeRegressionSGDTrain/Makefile b/data_analytics/L2/tests/regression/ridgeRegressionSGDTrain/Makefile
index b9e73c1def..22dc0bfca8 100644
--- a/data_analytics/L2/tests/regression/ridgeRegressionSGDTrain/Makefile
+++ b/data_analytics/L2/tests/regression/ridgeRegressionSGDTrain/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_main.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -178,7 +185,7 @@ endif
 $(TEMP_DIR)/ridgeRegressionTrain.xo: $(CUR_DIR)/kernel/kernel.cpp 
 	$(ECHO) "Compiling Kernel: ridgeRegressionTrain"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_ridgeRegressionTrain) $(VPP_FLAGS) -k ridgeRegressionTrain -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_ridgeRegressionTrain) $(VPP_FLAGS) -k ridgeRegressionTrain -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_ridge_regression_OBJS += $(TEMP_DIR)/ridgeRegressionTrain.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_ridge_regression_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,14 +303,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/ridge_regression.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -342,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/regression/ridgeRegressionSGDTrain/utils.mk b/data_analytics/L2/tests/regression/ridgeRegressionSGDTrain/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/regression/ridgeRegressionSGDTrain/utils.mk
+++ b/data_analytics/L2/tests/regression/ridgeRegressionSGDTrain/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/regression/xGradientBoost/Makefile b/data_analytics/L2/tests/regression/xGradientBoost/Makefile
index f923dd8cae..d707b66f7d 100644
--- a/data_analytics/L2/tests/regression/xGradientBoost/Makefile
+++ b/data_analytics/L2/tests/regression/xGradientBoost/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/host.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(CU
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_decisionTree.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,12 +287,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decision_tree.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_analytics/L2/tests/regression/xGradientBoost/utils.mk b/data_analytics/L2/tests/regression/xGradientBoost/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/regression/xGradientBoost/utils.mk
+++ b/data_analytics/L2/tests/regression/xGradientBoost/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/text/geoip/Makefile b/data_analytics/L2/tests/text/geoip/Makefile
index 0511a0200c..acb1531fc7 100644
--- a/data_analytics/L2/tests/text/geoip/Makefile
+++ b/data_analytics/L2/tests/text/geoip/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(CUR_DIR)/host -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/L2/include/
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -171,7 +178,7 @@ endif
 $(TEMP_DIR)/GeoIP_kernel.xo: $(CUR_DIR)/kernel/geoip_kernel.cpp 
 	$(ECHO) "Compiling Kernel: GeoIP_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_GeoIP_kernel) $(VPP_FLAGS) -k GeoIP_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_GeoIP_kernel) $(VPP_FLAGS) -k GeoIP_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_GeoIP_kernel_OBJS += $(TEMP_DIR)/GeoIP_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_GeoIP_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -195,11 +202,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -233,21 +235,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -294,14 +296,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/GeoIP_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -335,12 +339,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/text/geoip/utils.mk b/data_analytics/L2/tests/text/geoip/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/text/geoip/utils.mk
+++ b/data_analytics/L2/tests/text/geoip/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/text/reEngine/Makefile b/data_analytics/L2/tests/text/reEngine/Makefile
index f7291b8d13..5e1c6d0853 100644
--- a/data_analytics/L2/tests/text/reEngine/Makefile
+++ b/data_analytics/L2/tests/text/reEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -141,6 +143,11 @@ LDFLAGS +=  -L $(CUR_DIR)//re_compile/lib/lib
 LDFLAGS +=  -l onig -l xfcompile
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -257,11 +264,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -295,21 +297,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -356,12 +358,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/reEngineKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_analytics/L2/tests/text/reEngine/utils.mk b/data_analytics/L2/tests/text/reEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/text/reEngine/utils.mk
+++ b/data_analytics/L2/tests/text/reEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L2/tests/text/two_gram_predicate/Makefile b/data_analytics/L2/tests/text/two_gram_predicate/Makefile
index 43a18d0fca..99a0fbc0e3 100644
--- a/data_analytics/L2/tests/text/two_gram_predicate/Makefile
+++ b/data_analytics/L2/tests/text/two_gram_predicate/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(CUR_DIR)/host -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/L2/include/
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -170,7 +177,7 @@ endif
 $(TEMP_DIR)/TGP_Kernel.xo: $(CUR_DIR)/kernel/predicate_kernel.cpp 
 	$(ECHO) "Compiling Kernel: TGP_Kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TGP_Kernel) $(VPP_FLAGS) -k TGP_Kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TGP_Kernel) $(VPP_FLAGS) -k TGP_Kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TGP_Kernel_OBJS += $(TEMP_DIR)/TGP_Kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TGP_Kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,14 +295,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TGP_Kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -334,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L2/tests/text/two_gram_predicate/utils.mk b/data_analytics/L2/tests/text/two_gram_predicate/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L2/tests/text/two_gram_predicate/utils.mk
+++ b/data_analytics/L2/tests/text/two_gram_predicate/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/data_engine_config.hpp b/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/data_engine_config.hpp
index a69952b9f4..a12c43ce9e 100644
--- a/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/data_engine_config.hpp
+++ b/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/data_engine_config.hpp
@@ -29,6 +29,9 @@
 #define N 12
 
 namespace sssd_engine {
+/**
+ * @brief generating configuration bits for CSV scanner kernel
+ */
 class DataEngineConfig {
    private:
     ap_uint<64> convert(sssd_filter_t* filter);
@@ -45,6 +48,7 @@ class DataEngineConfig {
      * @param gzip file under gzip or not
      * @param sd schema for describing the table
      * @param cfg kernel configurations
+     * @return error code
      *
      */
     ErrorCode genConfigBits(bool gzip, sssd_scandesc_t* sd, uint64_t* cfg);
diff --git a/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/data_engine_sc.hpp b/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/data_engine_sc.hpp
index e47d4fd761..7f7933edf2 100644
--- a/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/data_engine_sc.hpp
+++ b/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/data_engine_sc.hpp
@@ -43,15 +43,17 @@ struct FileDescriptors {
     int* fd;
 };
 
-/* class to manage kernels on single device
+/**
+ * @brief class to manage kernels on single device
  */
 class DataEngine {
    private:
-    /*
-     *@brief execute kernel
+    /**
+     * @brief execute kernel
      * @param file_path file path.
      * @param size size of input file in bytes
-     * @param cfg pointer to configuration buffer.
+     * @param cfg pointer to configuration buffer
+     * @return object that contains the scan error code and poninter to result buffer
      */
     RetObj run_all(std::string file_path, size_t size, uint64_t* cfg);
 
@@ -72,8 +74,6 @@ class DataEngine {
     uint64_t* dummy_cfg;
     /**
      * @brief execute data engine.
-     *
-     * @param file_path path of xclbin.
      */
     void run();
 
@@ -94,7 +94,7 @@ class DataEngine {
    public:
     /**
      * @brief constructor of data engine.
-     * context, program, command queue are created and ready after fpga init.
+     * context, program, command queue are created and ready after FPGA init
      *
      * @param t_id targeted device id.
      * @param _csvInBufPool input CSV buffer pool
@@ -126,7 +126,7 @@ class DataEngine {
     /**
      * @brief push request to queue
      *
-     * @param prom promise to synchronize status of exection
+     * @param prom promise to synchronize status of execution
      * @param file_path file path
      * @param size file size
      * @param cfg pointer to configuration buffer
diff --git a/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/smart_ssd_cache.hpp b/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/smart_ssd_cache.hpp
index 49fc727955..e53d057c29 100644
--- a/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/smart_ssd_cache.hpp
+++ b/data_analytics/L3/include/sw/xf_data_analytics/gunzip_csv/smart_ssd_cache.hpp
@@ -28,7 +28,9 @@
 #include "data_engine_sc.hpp"
 
 namespace sssd_engine {
-/********** SmartSSD Software API definition **********/
+/**
+ *@brief class to handle a specific SSD device
+ */
 class SmartSSDCache {
    private:
     /**
@@ -59,7 +61,7 @@ class SmartSSDCache {
     FILE* log_ptr;
 
     /**
-     * get the file size
+     * @brief get the file size
      * @param file_path file path
      */
     inline size_t getFileSize(std::string file_path) {
@@ -72,7 +74,8 @@ class SmartSSDCache {
     }
 
     /**
-     * Check basename and get the disk id based on mount info
+     * @brief Check basename and get the disk id based on mount info
+     * @param file_path file path
      */
     int32_t getDiskID(const std::string& file_path);
 
@@ -146,7 +149,7 @@ class SmartSSDCache {
      *
      * @param value values
      * @param isnull is null flags
-     * @pram hash hash value
+     * @param hash hash value
      * @param sd the schema
      *
      */
diff --git a/data_analytics/L3/src/sw/gunzip_csv/data_engine_sc.cpp b/data_analytics/L3/src/sw/gunzip_csv/data_engine_sc.cpp
index 132fbd7540..332e7dcb51 100644
--- a/data_analytics/L3/src/sw/gunzip_csv/data_engine_sc.cpp
+++ b/data_analytics/L3/src/sw/gunzip_csv/data_engine_sc.cpp
@@ -139,9 +139,14 @@ void DataEngine::run() {
         ap_uint<128>* hbuf_in;
         FileDescriptors* s_r_handle = new FileDescriptors;
         int* fd_collect = new int[file_nm];
+        int* buf_sz_collect = new int[file_nm];
         for (int i = 0; i < file_nm; ++i) {
+#ifdef USE_P2P
             // p2p data transfer size aligned to 4K
             int fd = open(file_list[i].c_str(), O_RDONLY | O_DIRECT);
+#else
+            int fd = open(file_list[i].c_str(), O_RDONLY);
+#endif
 
             if (fd == -1) {
                 fprintf(stderr, "ERROR: Cannot open the input file!!\n");
@@ -156,13 +161,27 @@ void DataEngine::run() {
             hbuf_meta[N + 1 + i] += out_offt; // file size
             // align to 4K
             int csv_buf_sz = (sz + 4095) / 4096 * 4096;
+            buf_sz_collect[i] = csv_buf_sz;
             in_offt += csv_buf_sz / 16;
             out_offt += (out_sz + 31) / 32;
 
+#ifdef USE_P2P
             hbuf_in = (ap_uint<128>*)data_engine_acc::file_buf(csvInBufPool, fd, csv_buf_sz, 0, buf_offt);
+#endif
             buf_offt += csv_buf_sz;
             last_file = file_list[i];
         }
+#ifndef USE_P2P
+        hbuf_in = (ap_uint<128>*)data_engine_acc::alloc_buf(csvInBufPool, buf_offt);
+        uint8_t* hbuf_in_i8 = reinterpret_cast<uint8_t*>(hbuf_in);
+        buf_offt = 0;
+        for (int i = 0; i < file_nm; i++) {
+            if (pread(fd_collect[i], hbuf_in_i8 + buf_offt, buf_sz_collect[i], 0) == -1) {
+                fprintf(stderr, "ERROR: File reading failed.\n");
+            }
+            buf_offt += buf_sz_collect[i];
+        }
+#endif
         s_r_handle->fd = fd_collect;
         data_engine_acc::set_handle(int64_t(s_r_handle));
         for (int i = file_nm; i < N; ++i) {
diff --git a/data_analytics/L3/src/sw/gunzip_csv/smart_ssd_cache.cpp b/data_analytics/L3/src/sw/gunzip_csv/smart_ssd_cache.cpp
index 9bc82554d3..2a3cd22c35 100644
--- a/data_analytics/L3/src/sw/gunzip_csv/smart_ssd_cache.cpp
+++ b/data_analytics/L3/src/sw/gunzip_csv/smart_ssd_cache.cpp
@@ -32,7 +32,11 @@ SmartSSDCache::SmartSSDCache(const char* xclbin_path, int card_num, sssd_info_t*
         printf("add card %d\n", i);
         data_engine_acc::add_card(cuCluster[i], i);
     }
-    csvInBufPool = data_engine_acc::create_bufpool(vpp::input, vpp::p2p);
+#ifdef USE_P2P
+    csvInBufPool = data_engine_acc::create_bufpool(vpp::input, vpp::p2p); // for U.2. device only
+#else
+    csvInBufPool = data_engine_acc::create_bufpool(vpp::input);
+#endif
     cfgInBufPool = data_engine_acc::create_bufpool(vpp::input);
     outBufPool = data_engine_acc::create_bufpool(vpp::output);
     metaBufPool = data_engine_acc::create_bufpool(vpp::bidirectional);
diff --git a/data_analytics/L3/tests/aml_test/Makefile b/data_analytics/L3/tests/aml_test/Makefile
index cbc954dd39..50bd91e011 100644
--- a/data_analytics/L3/tests/aml_test/Makefile
+++ b/data_analytics/L3/tests/aml_test/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -65,7 +71,7 @@ PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  u200 xilinx_u250_xdma_201830_2 u50 aws-vu9p-f1
+PLATFORM_ALLOWLIST +=  u200 u250 u50 aws-vu9p-f1
 PLATFORM_BLOCKLIST +=  zc vck190
 
 include ./utils.mk
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)//host/main.cpp $(XFLIB_DIR)/L3/src/sw/text/aml_checker.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -138,6 +140,11 @@ CXXFLAGS +=  -I $(CUR_DIR)//kernel -I $(XFLIB_DIR)/L3/include/sw -I $(XFLIB_DIR)
 CXXFLAGS += -O3 -std=c++14
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -258,7 +265,7 @@ endif
 $(TEMP_DIR)/fuzzy_kernel.xo: $(CUR_DIR)//kernel/fuzzy_kernel.cpp 
 	$(ECHO) "Compiling Kernel: fuzzy_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_fuzzy_kernel) $(VPP_FLAGS) -k fuzzy_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_fuzzy_kernel) $(VPP_FLAGS) -k fuzzy_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_fuzzy_kernel_OBJS += $(TEMP_DIR)/fuzzy_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_fuzzy_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -282,11 +289,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -320,21 +322,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -381,14 +383,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/fuzzy_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -422,12 +426,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
\ No newline at end of file
+clean: cleanh
diff --git a/data_analytics/L3/tests/aml_test/description.json b/data_analytics/L3/tests/aml_test/description.json
index 6f50b70cad..ebce4d741c 100644
--- a/data_analytics/L3/tests/aml_test/description.json
+++ b/data_analytics/L3/tests/aml_test/description.json
@@ -5,7 +5,7 @@
     "platform_type": "", 
     "platform_allowlist": [
         "u200", 
-        "xilinx_u250_xdma_201830_2", 
+        "u250", 
         "u50", 
         "aws-vu9p-f1"
     ], 
diff --git a/data_analytics/L3/tests/aml_test/utils.mk b/data_analytics/L3/tests/aml_test/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L3/tests/aml_test/utils.mk
+++ b/data_analytics/L3/tests/aml_test/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/L3/tests/contains_test/description.json b/data_analytics/L3/tests/contains_test/description.json
index 845424784a..2750f7a4be 100644
--- a/data_analytics/L3/tests/contains_test/description.json
+++ b/data_analytics/L3/tests/contains_test/description.json
@@ -10,6 +10,7 @@
     "platform_blocklist": [
         "zc"
     ], 
+    "match_makefile": "false",
     "testinfo": {
         "disable": false, 
         "jobs": [
diff --git a/data_analytics/L3/tests/gunzip_csv_sc_test/Makefile b/data_analytics/L3/tests/gunzip_csv_sc_test/Makefile
index 2408efa85f..61253ddd62 100644
--- a/data_analytics/L3/tests/gunzip_csv_sc_test/Makefile
+++ b/data_analytics/L3/tests/gunzip_csv_sc_test/Makefile
@@ -18,11 +18,11 @@
 
 help::
 	$(ECHO) "Makefile Usage:"
-	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<>"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=</x86>"
 	$(ECHO) "      Command to generate the design for specified Target and Shell."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<>"
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=</x86>"
 	$(ECHO) "      Command to run application in emulation."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH required for SoC shells"
 	$(ECHO) ""
@@ -53,11 +53,11 @@ ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := samsung_U2
+PLATFORM := u2_
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  samsung_U2
+PLATFORM_ALLOWLIST +=  u2_ u50
 PLATFORM_BLOCKLIST +=  zc
 
 GCC_INTOOL := 8.3.0
@@ -122,10 +122,18 @@ endif
 ########################## Setting up Host Variables ##########################
 
 #Inclue Required Host Source Files
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u2_/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/L3/src/sw/gunzip_csv/data_engine_sc.cpp $(XFLIB_DIR)/L3/src/sw/gunzip_csv/data_engine_config.cpp $(XFLIB_DIR)/L3/src/sw/gunzip_csv/smart_ssd_cache.cpp $(XFLIB_DIR)/L3/src/sw/gunzip_csv/sssd_api.cpp 
+CXXFLAGS +=  -D USE_P2P
 CXXFLAGS +=  -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/../data_compression/L1/include/hw -I $(XFLIB_DIR)/../security/L1/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw/xf_data_analytics/dataframe -I $(XFLIB_DIR)/L2/include/hw/xf_data_analytics/dataframe/gunzip_csv -I $(XFLIB_DIR)/L3/include/sw/xf_data_analytics/gunzip_csv -I $(XFLIB_DIR)/L1/include/sw -I $(XFLIB_DIR)/L3/include/sw -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3
 
+else 
+HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/L3/src/sw/gunzip_csv/data_engine_sc.cpp $(XFLIB_DIR)/L3/src/sw/gunzip_csv/data_engine_config.cpp $(XFLIB_DIR)/L3/src/sw/gunzip_csv/smart_ssd_cache.cpp $(XFLIB_DIR)/L3/src/sw/gunzip_csv/sssd_api.cpp 
+CXXFLAGS +=  -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/../data_compression/L1/include/hw -I $(XFLIB_DIR)/../security/L1/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw/xf_data_analytics/dataframe -I $(XFLIB_DIR)/L2/include/hw/xf_data_analytics/dataframe/gunzip_csv -I $(XFLIB_DIR)/L3/include/sw/xf_data_analytics/gunzip_csv -I $(XFLIB_DIR)/L1/include/sw -I $(XFLIB_DIR)/L3/include/sw -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include
+CXXFLAGS += -O3
+
+endif
 EXE_NAME := host.exe
 EXE_OBJS := $(addprefix $(TEMP_DIR)/, $(addsuffix .o,$(basename $(HOST_SRCS))))
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
@@ -169,7 +177,7 @@ $(ACC_OBJS_data_engine_acc_hls_kernel): $(TEMP_DIR)/%.o : %.cpp $$(@D)/.f
 BINARY_CONTAINERS_DEPS  += $(ACC_OBJS_data_engine_acc_hls_kernel) 
 $(BINARY_CONTAINERS_TMP) : $(BINARY_CONTAINERS_DEPS)
 	@echo "--> Making $@ from: $?"
-	$(VPP) $(VPP_FLAGS) $(VPP_LDFLAGS) -o $(BINARY_CONTAINERS) -l $^
+	$(VPP) $(VPP_FLAGS) $(VPP_LDFLAGS) $(VPP_LDFLAGS_data_engine_acc_hls_kernel) -o $(BINARY_CONTAINERS) -l $^
 EXE_FILE_DEPS += $(BINARY_CONTAINERS_TMP)
 EXE_FILE_DEPS += $(BINARY_CONTAINERS_DEPS)
 
@@ -257,12 +265,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_analytics/L3/tests/gunzip_csv_sc_test/README.md b/data_analytics/L3/tests/gunzip_csv_sc_test/README.md
new file mode 100644
index 0000000000..854ad8d8f6
--- /dev/null
+++ b/data_analytics/L3/tests/gunzip_csv_sc_test/README.md
@@ -0,0 +1,37 @@
+# CSV Scanner Vitis System Compiler Project
+
+This project is trying to give a showcase for how to integrate the CSV scanner hardware accelerator with System Compiler tool to decompress a compresssed CSV file using Gzip algorithm, hash and/or filter for specific column(s).
+
+## Contents
+
+* The multithreaded test code can be found in the current directory, and the detailed example usage can be found in the [HTML doc](https://xilinx.github.io/Vitis_Libraries/data_analytics/2022.1/index.html).
+* The implementation of the L3 APIs are resided in `xf_DataAnalytics/L3/include/sw/xf_data_analytics/gunzip_csv` and `xf_DataAnalytics/L3/src/sw/gunzip_csv`, to simplify the users integration efforts, we packed the Vitis System Compiler application layer in our L3 APIs to implement a task queue that is used to take the acceleration request from thread and emit the result to the corresponding thread.
+* For detailed hardware accelerator implementation, please kindly find it at `xf_DataAnalytics/L2/include/hw/xf_data_analytics/dataframe`.
+
+## How to Use
+
+For command-line developers the following settings are required before running any case in this library:
+
+```console
+source /opt/xilinx/Vitis/2022.1/settings64.sh
+source /opt/xilinx/xrt/setup.sh
+export PLATFORM_REPO_PATHS=/opt/xilinx/platforms
+```
+
+For `csh` users, please look for corresponding scripts with `.csh` suffix and adjust the variable setting command accordingly.
+
+The `PLATFORM_REPO_PATHS` environment variable points to directories containing platforms.
+
+```console
+# build and run one of the following using U.2. platform
+make run TARGET=sw_emu DEVICE=/path/to/xilinx_u2_gen3x4_xdma_gc_2_202110_1.xpfm
+
+# delete generated files
+make cleanall
+```
+
+Here, `TARGET` decides the FPGA binary type
+- `sw_emu` is for software emulation
+- `hw_emu` is for hardware emulation
+- `hw` is for deployment on physical card. (Compilation to hardware binary often takes hours.)
+
diff --git a/data_analytics/L3/tests/gunzip_csv_sc_test/description.json b/data_analytics/L3/tests/gunzip_csv_sc_test/description.json
index ccc4bc0699..16f47e2bce 100644
--- a/data_analytics/L3/tests/gunzip_csv_sc_test/description.json
+++ b/data_analytics/L3/tests/gunzip_csv_sc_test/description.json
@@ -4,11 +4,23 @@
     "description": "A SystemCompiler example for Gunzip CSV parser acceleration.", 
     "flow": "vitis", 
     "platform_allowlist": [
-        "u2"
+        "u2_",
+        "u50"
     ], 
     "platform_blocklist": [
         "zc"
     ], 
+    "platform_properties": {
+        "u2_": {
+            "host": {
+                "compiler": {
+                    "symbols": [
+                        "USE_P2P"
+                    ]
+                }
+            }
+        }
+    }, 
     "launch": [
         {
             "cmd_args": "not_use_by_now \"LIB_DIR/L3/tests/gunzip_csv_sc_test/data/input*.csv.gz\" \"LIB_DIR/L3/tests/gunzip_csv_sc_test/data\"", 
diff --git a/data_analytics/L3/tests/gunzip_csv_sc_test/kernel/data_engine_acc.hpp b/data_analytics/L3/tests/gunzip_csv_sc_test/kernel/data_engine_acc.hpp
index 0b6966adc1..48aaa72dc8 100644
--- a/data_analytics/L3/tests/gunzip_csv_sc_test/kernel/data_engine_acc.hpp
+++ b/data_analytics/L3/tests/gunzip_csv_sc_test/kernel/data_engine_acc.hpp
@@ -29,6 +29,11 @@ class data_engine_acc : public VPP_ACC<data_engine_acc, /*NCU=*/1> {
     SYS_PORT(cfgBuf, bank0);
     SYS_PORT(firValue, bank0);
 
+    SYS_PORT_PFM(u50, csvBuf, HBM[0]);
+    SYS_PORT_PFM(u50, szBuf, HBM[1]);
+    SYS_PORT_PFM(u50, cfgBuf, HBM[2]);
+    SYS_PORT_PFM(u50, firValue, HBM[3]);
+
    public:
     /**
      * @brief top of kernel
diff --git a/data_analytics/L3/tests/gunzip_csv_sc_test/preSysLink.tcl b/data_analytics/L3/tests/gunzip_csv_sc_test/preSysLink.tcl
new file mode 100644
index 0000000000..664d9964c1
--- /dev/null
+++ b/data_analytics/L3/tests/gunzip_csv_sc_test/preSysLink.tcl
@@ -0,0 +1 @@
+upgrade_ip [get_bd_cells -filter {VLNV=~*hbm_memory_subsystem*}]
diff --git a/data_analytics/L3/tests/gunzip_csv_sc_test/utils.mk b/data_analytics/L3/tests/gunzip_csv_sc_test/utils.mk
index 17354308c6..e930830997 100644
--- a/data_analytics/L3/tests/gunzip_csv_sc_test/utils.mk
+++ b/data_analytics/L3/tests/gunzip_csv_sc_test/utils.mk
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.4
+# sc makefile-generator v1.0.0
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -70,15 +70,16 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
 HOST_ARCH := aarch32
-else ifeq ($(HOST_ARCH_temp), cortex-a*)
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
+endif
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -236,3 +237,10 @@ RMDIR = rm -rf
 MV = mv -f
 CP = cp -rf
 ECHO:= @echo
+
+ifneq (,$(shell echo $(XPLATFORM) | awk '/xilinx_u50_gen3x16_xdma_201920_3/'))
+VPP_FLAGS += --advanced.param compiler.ignorePlatformCompatibilityCheck=true
+ifeq ($(TARGET), hw)
+VPP_LDFLAGS += --advanced.param compiler.userPreSysLinkOverlayTcl=preSysLink.tcl
+endif
+endif
diff --git a/data_analytics/L3/tests/re_test/Makefile b/data_analytics/L3/tests/re_test/Makefile
index 83435331f9..ce01e52047 100644
--- a/data_analytics/L3/tests/re_test/Makefile
+++ b/data_analytics/L3/tests/re_test/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)//host/main.cpp $(XFLIB_DIR)/L3/src/sw/text/regex_engine.cpp $(XFLIB_DIR)/L3/src/sw/text/reEngine_config.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -141,6 +143,11 @@ LDFLAGS +=  -L $(CUR_DIR)//re_compile/lib/lib
 LDFLAGS +=  -l onig -l xfcompile
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -257,11 +264,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -295,21 +297,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -356,12 +358,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/reEngineKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_analytics/L3/tests/re_test/utils.mk b/data_analytics/L3/tests/re_test/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_analytics/L3/tests/re_test/utils.mk
+++ b/data_analytics/L3/tests/re_test/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_analytics/README.md b/data_analytics/README.md
index c7620dd04b..c67ec17529 100644
--- a/data_analytics/README.md
+++ b/data_analytics/README.md
@@ -45,7 +45,7 @@ Apache 2.0 license, advanced users are empowered to easily tailor, optimize and
 Supported operating systems are RHEL/CentOS 7.4, 7.5 and Ubuntu 16.04.4 LTS, 18.04.1 LTS.
 
 _GCC 5.0 or above_ is required for C++11/C++14 support.
-With CentOS/RHEL 7.4 and 7.5, it could enabled via
+With CentOS/RHEL 7.4 and 7.5, it can be enabled via
 [devtoolset-6](https://www.softwarecollections.org/en/scls/rhscl/devtoolset-6/).
 
 ### Development Tools
@@ -55,13 +55,13 @@ and a matching version of XRT should be installed.
 
 ## Running Test Cases
 
-This library ships two types of case: HLS cases and Vitis cases.
-HLS cases can only be found in `L1/tests` folder, and are created to test module-level functionality.
-Both types of cases are driven by makefiles.
+This library ships two types of test cases: HLS test cases and Vitis test cases.
+HLS test cases can only be found in `L1/tests` folder, and are created to test module-level functionality.
+Both types of test cases are driven by makefiles.
 
 ### Shell Environment
 
-For command-line developers the following settings are required before running any case in this library:
+For command-line developers the following settings are required before running any test case in this library:
 
 ```console
 source /opt/xilinx/Vitis/2022.1/settings64.sh
@@ -112,7 +112,7 @@ Here, `TARGET` decides the FPGA binary type
 
 ## Benchmark Result
 
-In `L2/benchmarks` and `L2/demo`, these Kernels are built into xclbins targeting Alveo U200/U250/U50. We achieved a good performance. For more details about the benchmarks, please kindly find them in [benchmark results](https://xilinx.github.io/Vitis_Libraries/data_analytics/2022.1/benchmark/benchmark.html).
+The kernels in `L2/benchmarks` and `L2/demo` are built into xclbins targeting Alveo U200/U250/U50. We achieved a good performance. For more details about the benchmarks, please kindly find them in [benchmark results](https://xilinx.github.io/Vitis_Libraries/data_analytics/2022.1/benchmark/benchmark.html).
 
 
 ## License
diff --git a/data_analytics/docs/Doxyfile_L1 b/data_analytics/docs/Doxyfile_L1
index 6528cc68c3..2bad1c5c0b 100644
--- a/data_analytics/docs/Doxyfile_L1
+++ b/data_analytics/docs/Doxyfile_L1
@@ -27,7 +27,7 @@ INPUT                  = ../L1/include/hw/xf_data_analytics/classification \
                          ../L1/include/hw/xf_data_analytics/clustering \
                          ../L1/include/hw/xf_data_analytics/common \
                          ../L1/include/hw/xf_data_analytics/dataframe \
-                         ../L1/include/hw/xf_data_analytics/text \
+                         ../L1/include/hw/xf_data_analytics/regression \
                          ../L1/include/sw/xf_data_analytics/text
 
 # The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
diff --git a/data_analytics/docs/Doxyfile_L2 b/data_analytics/docs/Doxyfile_L2
index 413526568d..2468a3118d 100644
--- a/data_analytics/docs/Doxyfile_L2
+++ b/data_analytics/docs/Doxyfile_L2
@@ -24,6 +24,7 @@
 
 INPUT                  = ../L2/include/hw/xf_data_analytics/classification \
                          ../L2/include/hw/xf_data_analytics/clustering \
+                         ../L2/include/hw/xf_data_analytics/dataframe \
                          ../L2/include/hw/xf_data_analytics/regression \
                          ../L2/include/hw/xf_data_analytics/text \
                          ../L2/include/hw/xf_data_analytics/geospatial
diff --git a/data_analytics/docs/Doxyfile_L3 b/data_analytics/docs/Doxyfile_L3
index 86a66888dd..3006706fc8 100644
--- a/data_analytics/docs/Doxyfile_L3
+++ b/data_analytics/docs/Doxyfile_L3
@@ -22,7 +22,9 @@
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = ../L3/include/sw/xf_data_analytics/text
+INPUT                  = ../L3/include/sw/xf_data_analytics/text \
+                         ../L3/include/sw/xf_data_analytics/geospatial \
+                         ../L3/include/sw/xf_data_analytics/gunzip_csv
 
 # The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
 # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
diff --git a/data_analytics/docs/guide_L2/l2_api.rst b/data_analytics/docs/guide_L2/l2_api.rst
index fc05e23c76..4b4f03731e 100644
--- a/data_analytics/docs/guide_L2/l2_api.rst
+++ b/data_analytics/docs/guide_L2/l2_api.rst
@@ -36,6 +36,12 @@ Kernel Templates in ``xf::data_analytics::text``
 .. include:: ../rst_L2/namespace_xf_data_analytics_text.rst
    :start-after: FunctionSection
 
+Kernel Templates in ``xf::data_analytics::dataframe``
+*****************************************************
+
+.. include:: ../rst_L2/namespace_xf_data_analytics_dataframe.rst
+   :start-after: FunctionSection
+
 Kernel Templates in ``xf::data_analytics::geospatial``
 ******************************************************
 
diff --git a/data_analytics/docs/guide_L3/L3.rst b/data_analytics/docs/guide_L3/L3.rst
index a0a9ae8067..62746b8c07 100644
--- a/data_analytics/docs/guide_L3/L3.rst
+++ b/data_analytics/docs/guide_L3/L3.rst
@@ -26,4 +26,5 @@ Currenly one solution is provided: regular expression match acceleration.
    :maxdepth: 2
 
    overlay_class.rst
-   re/get_start.rst
+   re_engine/get_start.rst
+   data_engine/get_start.rst
diff --git a/data_analytics/docs/guide_L3/data_engine/get_start.rst b/data_analytics/docs/guide_L3/data_engine/get_start.rst
new file mode 100644
index 0000000000..7540b88b07
--- /dev/null
+++ b/data_analytics/docs/guide_L3/data_engine/get_start.rst
@@ -0,0 +1,210 @@
+.. 
+   Copyright 2022 Xilinx, Inc.
+  
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+CSV Scanner
+***********
+
+Getting Started
+===============
+
+As we utilize the Gunzip for decompressing the compressed CSV file and CRC32C for calculating the checksum, please make sure you download the whole package of ``Vitis_Libraries``, otherwise the target will not be able to be built successfully.
+
+To simplify the potential users integration efforts, we created a multithreaded test environment at ``L3/tests/gunzip_csv_sc_test`` and packed the Vitis System Compiler applicatoin layer in ``L3/src/sw/gunzip_csv``, so that the users won't need to take care of the detailed scheduling works.
+
+Limitation
+----------
+
+The CSV scanner kernel can be described as:
+
+.. image:: /images/12pipeline_gzip_csv_structure.png
+    :alt: CSV scanner structure
+    :width: 80%
+    :align: center
+
+- `maximal number of input files`: as we have only 12 processing units (PU) inside the kernel, the maximal number of input files should be limited to 12 at one call.
+
+- `Data types`: 5 data types are supported currently, including: int (64-bit), string (up to 1024 chars), date, bool, and numeric (56-bit significand, 8-bit exponent).
+
+- `CSV format`: up to 16 columns are supported, up to 8 columns post CSV parsing, and the length of one row should be less than or equal to 1024 bytes.
+
+- `filter operator`: only 7 types of operator are supported, the operators are listed below:
+
+    +----------+-----------------------+
+    | FilterOp | Description           |
+    +----------+-----------------------+
+    | FOP_DC   | don't care            |
+    +----------+-----------------------+
+    | FOP_EQ   | equal                 |
+    +----------+-----------------------+
+    | FOP_NE   | not equal             |
+    +----------+-----------------------+
+    | FOP_GT   | greater than          |
+    +----------+-----------------------+
+    | FOP_LT   | less than             |
+    +----------+-----------------------+
+    | FOP_GE   | greater than or equal |
+    +----------+-----------------------+
+    | FOP_LE   | less than or equal    |
+    +----------+-----------------------+
+
+Example Usage
+=============
+
+Let's take TPC-H query-1 for example:
+
+At first, you have to set up the schema for the input CSV files to let our engine knows the specific data type that each column is
+
+.. code-block:: cpp
+
+    // declare the scan description
+    sssd_scandesc_t sd_q1;
+    // set the schema
+    sssd_schema_t schema;
+    schema.natt = 16;
+    sssd_dtype_t* dtype = (sssd_dtype_t*)malloc(sizeof(sssd_dtype_t) * schema.natt);
+    dtype[0] = SSSD_DTYPE_INT;     // l_orderkey
+    dtype[1] = SSSD_DTYPE_INT;     // l_partkey
+    dtype[2] = SSSD_DTYPE_INT;     // l_suppkey
+    dtype[3] = SSSD_DTYPE_INT;     // l_linenumber
+    dtype[4] = SSSD_DTYPE_NUMERIC; // l_quantity
+    dtype[5] = SSSD_DTYPE_NUMERIC; // l_extendedprice
+    dtype[6] = SSSD_DTYPE_NUMERIC; // l_discount
+    dtype[7] = SSSD_DTYPE_NUMERIC; // l_tax
+    dtype[8] = SSSD_DTYPE_STRING;  // l_returnflag
+    dtype[9] = SSSD_DTYPE_STRING;  // l_linestatus
+    dtype[10] = SSSD_DTYPE_DATE;   // l_shipdate
+    dtype[11] = SSSD_DTYPE_DATE;   // l_commitdate
+    dtype[12] = SSSD_DTYPE_DATE;   // l_receiptdate
+    dtype[13] = SSSD_DTYPE_STRING; // l_shipinstruct
+    dtype[14] = SSSD_DTYPE_STRING; // l_shipmode
+    dtype[15] = SSSD_DTYPE_STRING; // l_comment
+    schema.dtype = dtype;
+    schema.ftype = "csv";
+    schema.u.csv.header = 0;
+    schema.u.csv.delim = 0;
+    schema.u.csv.quote = 0;
+    // give the schema to the scan description
+    sd_q1.schema = schema;
+
+Secondly, you may want to specify which columns that you want to calculate the hash value by
+
+.. code-block:: cpp
+
+    // number of hashes
+    sd_q1.nhashatt = 2;
+    sd_q1.hashatt = (int32_t*)malloc(sizeof(int32_t) * sd_q1.nhashatt);
+    // which column that need to be hashed
+    sd_q1.hashatt[0] = 8;
+    sd_q1.hashatt[1] = 9;
+
+Then, you should choose which columns that should be given in the result buffer
+
+.. code-block:: cpp
+
+    // number of output columns
+    sd_q1.natt = 7;
+    sd_q1.att = (int32_t*)malloc(sizeof(int32_t) * sd_q1.natt);
+    // which column that should be output
+    sd_q1.att[0] = 4;  // l_quantity
+    sd_q1.att[1] = 5;  // l_extendedprice
+    sd_q1.att[2] = 6;  // l_discount;
+    sd_q1.att[3] = 7;  // l_tax;
+    sd_q1.att[4] = 8;  // l_returnflag
+    sd_q1.att[5] = 9;  // l_linestatus;
+    sd_q1.att[6] = 10; // l_shipdate;
+
+For filtering the specific column, you should set up a filter like
+
+.. code-block:: cpp
+
+    // number of filter
+    sd_q1.nfilter = 1;
+    sssd_filter_t** filter = (sssd_filter_t**)malloc(sizeof(sssd_filter_t*) * sd_q1.nfilter);
+    for (int i = 0; i < sd_q1.nfilter; ++i) {
+        filter[i] = (sssd_filter_t*)malloc(sizeof(sssd_filter_t));
+    }
+    // l_shipdate <= 19980902
+    filter[0]->att = 10; // l_shipdate
+    filter[0]->dtype = SSSD_DTYPE_DATE;
+    filter[0]->cmp = SSSD_LE;
+    filter[0]->arg_value.cmp_date.year = 1998;
+    filter[0]->arg_value.cmp_date.month = 9;
+    filter[0]->arg_value.cmp_date.day = 2;
+    // push the filter into the scan description
+    sd_q1.filter = filter;
+
+After all the setups, you may want to set callback and instantiate the multi-thread processing by
+
+.. code-block:: cpp
+
+    // set callback
+    sssd_listfn_t fl = sssd_listfn;
+    sssd_scanfn_t fn = sssd_scanfn;
+    list_out_t list_ctxt = {0, 0};
+    list_ctxt.list_out = (char**)malloc(sizeof(char*) * 1024);
+    for (int i = 0; i < 1024; ++i) {
+        list_ctxt.list_out[i] = (char*)malloc(sizeof(char) * 1024);
+    }
+    // Multiple thread test
+    std::thread t1(
+        [&sssd, &fl, &list_ctxt](const char* pattern) {
+            int ret = sssd_list(sssd, pattern, fl, &list_ctxt);
+            if (ret == -1) printf("list failed\n");
+        },
+        path_pattern);
+    t1.join();
+    printf("fnm = %d\n", list_ctxt.fnm);
+
+    std::thread t_pool[list_ctxt.fnm];
+    scan_out_t* scan_ctxt = (scan_out_t*)malloc(sizeof(scan_out_t) * list_ctxt.fnm);
+    int t_nm = 36;
+    if (list_ctxt.fnm < t_nm) t_nm = list_ctxt.fnm;
+    for (int i = 0; i < t_nm; ++i) {
+        // int ret = sssd_scan(sssd, list_ctxt.list_out[i], &sd_q1, fn, &scan_ctxt[i]);
+        // if(i < list_ctxt.fnm) {
+        t_pool[i] = std::thread(
+            [&sssd, &sd_q1, &fn, &list_ctxt, &scan_ctxt](const int nm, const int id) {
+                for (int j = 0; j < (list_ctxt.fnm + nm - 1) / nm; ++j) {
+                    int idx = j * nm + id;
+                    if (idx < list_ctxt.fnm) {
+                        scan_ctxt[idx].row_nm = 0;
+                        scan_ctxt[idx].sd = &sd_q1;
+                        char* file_name = list_ctxt.list_out[idx];
+                        scan_out_t* ctxt = &scan_ctxt[idx];
+                        int ret = sssd_scan(sssd, file_name, &sd_q1, fn, ctxt);
+                        if (ret == -1) printf("scan failed\n");
+                    }
+                }
+            },
+            t_nm, i);
+    }
+    for (int i = 0; i < t_nm; ++i) {
+        t_pool[i].join();
+        printf("output rows %d\n", scan_ctxt[i].row_nm);
+    }
+
+Finally, don't forget to release the resources after the acceleration process done
+
+.. code-block:: cpp
+
+    // release resources
+    free(sd_q1.att);
+    for (int i = 0; i < sd_q1.nfilter; ++i) {
+        free(filter[i]);
+    }
+    free(filter);
+
+    free(sd_q1.hashatt);
diff --git a/data_analytics/docs/guide_L3/overlay_class.rst b/data_analytics/docs/guide_L3/overlay_class.rst
index ac159bcce6..d560b0ae11 100644
--- a/data_analytics/docs/guide_L3/overlay_class.rst
+++ b/data_analytics/docs/guide_L3/overlay_class.rst
@@ -23,4 +23,7 @@ Software Acceleration Classes
 
 .. include:: ../rst_L3/enum_xf_data_analytics_text_re_ErrCode.rst
 .. include:: ../rst_L3/class_xf_data_analytics_text_re_RegexEngine.rst
+.. include:: ../rst_L3/class_sssd_engine_DataEngineConfig.rst
+.. include:: ../rst_L3/class_sssd_engine_data_engine_sc_DataEngine.rst
+.. include:: ../rst_L3/class_sssd_engine_SmartSSDCache.rst
 
diff --git a/data_analytics/docs/guide_L3/re/get_start.rst b/data_analytics/docs/guide_L3/re_engine/get_start.rst
similarity index 100%
rename from data_analytics/docs/guide_L3/re/get_start.rst
rename to data_analytics/docs/guide_L3/re_engine/get_start.rst
diff --git a/data_analytics/docs/release.rst b/data_analytics/docs/release.rst
index 6604392917..c687c80ac4 100644
--- a/data_analytics/docs/release.rst
+++ b/data_analytics/docs/release.rst
@@ -29,10 +29,6 @@ The Data Analytics Library has the following addition in the 2022.1 release:
 * **csv scanner** could be used to accelerate the extract, transform and load process. It integrates GZIP decompression, CSV parser, filter module together to make them work in parallel. ETL accelerator could work together with database to run queries on large size of semi-structured and unstructured data.
 * **Geospatial APIs.** Two major APIs in this family has been included: the Spatial Join and KNN. The former API inserts the columns from one feature table to another based on location or proximity, while the latter is often used to find the K nearest neighbors around the center point. They are both vital for spatial analysis and spatial data mining.
 
-There are some known issues for this release.
-
-* Log Analyer in L2 demo fails hardware build with 2022.1 Vitis. Please use 2021.2 Vitis for it,
-
 2021.2
 ------
 
diff --git a/data_analytics/docs/tutorial.rst b/data_analytics/docs/tutorial.rst
index 417e3e4ada..30d39fa6c8 100644
--- a/data_analytics/docs/tutorial.rst
+++ b/data_analytics/docs/tutorial.rst
@@ -109,11 +109,6 @@ Target audience of L2 API are users who has certain understanding of HLS and pro
 (4) Get accurate kernel resources and clock.
 (5) Analyze kernel's timing performance and throughput.
 
-.. image:: /images/12pipeline_gzip_csv_structure.png
-   :alt: Kernel Structure
-   :scale: 50%
-   :align: center
-
 Command to Run L2 cases
 -------------------------
 
diff --git a/data_compression/Jenkinsfile b/data_compression/Jenkinsfile
index 05fab0d462..bc6b560fb6 100644
--- a/data_compression/Jenkinsfile
+++ b/data_compression/Jenkinsfile
@@ -1,5 +1,5 @@
 @Library('pipeline-library')_
 
-VitisLibPipeline (branch: 'next', libname: 'xf_compression', run_sw_in_pr: 'true',
+VitisLibPipeline (branch: 'main', libname: 'xf_compression', run_sw_in_pr: 'true',
 TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu',
-upstream_dependencies: 'xf_security,next,../security', devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest')
+upstream_dependencies: 'xf_security,main,../security', devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released')
diff --git a/data_compression/L2/demos/gzip/Makefile b/data_compression/L2/demos/gzip/Makefile
index 7d6b5cf409..609408c818 100644
--- a/data_compression/L2/demos/gzip/Makefile
+++ b/data_compression/L2/demos/gzip/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -66,7 +72,7 @@ endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u200 u250 vck190 aws-vu9p-f1
-PLATFORM_BLOCKLIST +=  zc u50
+PLATFORM_BLOCKLIST +=  zc u50 vck190_base_dfx
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -88,7 +94,7 @@ RUN_DEPS :=
 # get global setting
 ifeq ($(HOST_ARCH), x86)
 CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
-LDFLAGS += -pthread -L$(XILINX_XRT)/lib -Wl,--as-needed -lOpenCL -lxrt_coreutil 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
 VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
 VPP_LDFLAGS += --optimize 2 -R 2 
 else ifeq ($(HOST_ARCH), aarch64)
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D GZIP_MULTICORE_COMPRESS -D PARALLEL_BLOCK=1 -D GZIP_MODE=1
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,12 +300,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress_decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress_decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -341,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/demos/gzip/README.rst b/data_compression/L2/demos/gzip/README.rst
index bbfa668b88..84cc86eca1 100644
--- a/data_compression/L2/demos/gzip/README.rst
+++ b/data_compression/L2/demos/gzip/README.rst
@@ -11,12 +11,12 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name>``
-2. To execute single file for decompression           : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -d <compressed file_name>``
-3. To validate single file (compress & decompress)    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -t <input file_name>``
-4. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -dfl <compressed files.list>``
-6. To validate multiple files (compress & decompress) : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -l <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name>``
+2. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -d <compressed file_name>``
+3. To validate single file (compress & decompress)    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -t <input file_name>``
+4. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -dfl <compressed files.list>``
+6. To validate multiple files (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -l <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/demos/gzip/description.json b/data_compression/L2/demos/gzip/description.json
index 54c9f58178..e4d1a55d07 100644
--- a/data_compression/L2/demos/gzip/description.json
+++ b/data_compression/L2/demos/gzip/description.json
@@ -21,7 +21,8 @@
     "default_device": "u200",
     "platform_blocklist": [
         "zc",
-        "u50"
+        "u50",
+        "vck190_base_dfx"
     ],
     "platform_allowlist": [
         "u200",
@@ -157,4 +158,4 @@
             ]
         }
     }
-}
\ No newline at end of file
+}
diff --git a/data_compression/L2/demos/gzip/utils.mk b/data_compression/L2/demos/gzip/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/demos/gzip/utils.mk
+++ b/data_compression/L2/demos/gzip/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/demos/lz4/Makefile b/data_compression/L2/demos/lz4/Makefile
index f921496cfc..c0ec2a8b29 100644
--- a/data_compression/L2/demos/lz4/Makefile
+++ b/data_compression/L2/demos/lz4/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -66,7 +72,7 @@ endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u200 u250 vck190 aws-vu9p-f1
-PLATFORM_BLOCKLIST +=  zc
+PLATFORM_BLOCKLIST +=  zc vck190_base_dfx
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -87,7 +93,7 @@ RUN_DEPS :=
 # get global setting
 ifeq ($(HOST_ARCH), x86)
 CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
-LDFLAGS += -pthread -L$(XILINX_XRT)/lib -Wl,--as-needed -lOpenCL -lxrt_coreutil 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
 VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
 VPP_LDFLAGS += --optimize 2 -R 2 
 else ifeq ($(HOST_ARCH), aarch64)
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,11 +196,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -227,21 +229,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -288,12 +290,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress_decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress_decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -331,12 +333,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/demos/lz4/README.rst b/data_compression/L2/demos/lz4/README.rst
index c4019c97f3..dbbdb53524 100644
--- a/data_compression/L2/demos/lz4/README.rst
+++ b/data_compression/L2/demos/lz4/README.rst
@@ -59,12 +59,12 @@ Software & Hardware
 Executable Usage
 ----------------
  
-1. To execute single file for compression             : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -c <file_name>``
-2. To execute single file for decompression           : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -d <file_name.lz4>``
-3. To validate single file (compress & decompress)    : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -t <file_name>``
-4. To execute multiple files for compression     : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -cfl <files.list>``
-5. To execute multiple files for decompression     : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress xclbin> -dfl <compressed files.list>``
-6. To validate multiple files (compress and decompress) : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress xclbin> -l <files.list>``  
+1. To execute single file for compression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -c <file_name>``
+2. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -d <file_name.lz4>``
+3. To validate single file (compress & decompress)    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -t <file_name>``
+4. To execute multiple files for compression     : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -cfl <files.list>``
+5. To execute multiple files for decompression     : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress xclbin> -dfl <compressed files.list>``
+6. To validate multiple files (compress and decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress xclbin> -l <files.list>``  
            
       - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/demos/lz4/description.json b/data_compression/L2/demos/lz4/description.json
index b74a9b02fd..a2b471f7b7 100644
--- a/data_compression/L2/demos/lz4/description.json
+++ b/data_compression/L2/demos/lz4/description.json
@@ -32,7 +32,8 @@
     ],
     "default_device": "u200",
     "platform_blocklist": [
-        "zc"
+        "zc",
+        "vck190_base_dfx"
     ],
     "platform_allowlist": [
         "u200",
diff --git a/data_compression/L2/demos/lz4/utils.mk b/data_compression/L2/demos/lz4/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/demos/lz4/utils.mk
+++ b/data_compression/L2/demos/lz4/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/demos/lz4_streaming/Makefile b/data_compression/L2/demos/lz4_streaming/Makefile
index b2cf76c819..5433636cdb 100644
--- a/data_compression/L2/demos/lz4_streaming/Makefile
+++ b/data_compression/L2/demos/lz4_streaming/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -123,6 +125,11 @@ HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHos
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4_streaming
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,12 +300,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress_decompress_streaming.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress_decompress_streaming.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -341,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/demos/lz4_streaming/README.rst b/data_compression/L2/demos/lz4_streaming/README.rst
index ca91e551d7..5b42d6ffb9 100644
--- a/data_compression/L2/demos/lz4_streaming/README.rst
+++ b/data_compression/L2/demos/lz4_streaming/README.rst
@@ -44,12 +44,12 @@ Note: Overall throughput can still be increased with multiple compute units.
 Executable Usage
 ~~~~~~~~~~~~~~~
 
-1. To execute single file for compression 	: ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -c <file_name>``
-2. To execute single file for decompression	: ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -d <file_name.lz4>``
-3. To validate single file (compress & decompress) : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -t <file_name>``
-4. To execute multiple files for compression           : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression          : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -dfl <compressed files.list>``   
-6. To validate multiple files (compress & decompress)      : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -l <files.list>``  
+1. To execute single file for compression 	: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -c <file_name>``
+2. To execute single file for decompression	: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -d <file_name.lz4>``
+3. To validate single file (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -t <file_name>``
+4. To execute multiple files for compression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -dfl <compressed files.list>``   
+6. To validate multiple files (compress & decompress)      : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -l <files.list>``  
 	
       - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/demos/lz4_streaming/utils.mk b/data_compression/L2/demos/lz4_streaming/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/demos/lz4_streaming/utils.mk
+++ b/data_compression/L2/demos/lz4_streaming/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/demos/snappy/Makefile b/data_compression/L2/demos/snappy/Makefile
index f70014330e..e0cc34ae30 100644
--- a/data_compression/L2/demos/snappy/Makefile
+++ b/data_compression/L2/demos/snappy/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/snappyOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/snappyBase.cpp $(XFLIB_DIR)/common/libs/compress/snappyApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_snappy
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,11 +196,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -227,21 +229,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -288,12 +290,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress_decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress_decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -331,12 +333,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/demos/snappy/README.rst b/data_compression/L2/demos/snappy/README.rst
index fb616e7e0e..59c7ec3a9d 100644
--- a/data_compression/L2/demos/snappy/README.rst
+++ b/data_compression/L2/demos/snappy/README.rst
@@ -86,12 +86,12 @@ Hardware
 Executable Usage
 ----------------
  
-1. To execute single file for compression             : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -c <file_name>``
-2. To execute single file for decompression           : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -d <file_name.snappy>``
-3. To validate single file (compress & decompress)    : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -t <file_name>``
-4. To execute multiple files for compression     : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -cfl <files.list>``
-5. To execute multiple files for decompression     : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -dfl <compressed files.list>``
-6. To validate multiple files (compress and decompress) : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -l <files.list>``  
+1. To execute single file for compression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -c <file_name>``
+2. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -d <file_name.snappy>``
+3. To validate single file (compress & decompress)    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -t <file_name>``
+4. To execute multiple files for compression     : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -cfl <files.list>``
+5. To execute multiple files for decompression     : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -dfl <compressed files.list>``
+6. To validate multiple files (compress and decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -l <files.list>``  
                
       - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/demos/snappy/utils.mk b/data_compression/L2/demos/snappy/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/demos/snappy/utils.mk
+++ b/data_compression/L2/demos/snappy/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/demos/snappy_streaming/Makefile b/data_compression/L2/demos/snappy_streaming/Makefile
index a986be6ddb..222b04b291 100644
--- a/data_compression/L2/demos/snappy_streaming/Makefile
+++ b/data_compression/L2/demos/snappy_streaming/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/snappyOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/snappyBase.cpp $(XFLIB_DIR)/common/libs/compress/snappyApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D SNAPPY_STREAM -D PARALLEL_BLOCK=1 -D MULTIPLE_BYTES=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_snappy_streaming
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,12 +303,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress_decompress_streaming.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress_decompress_streaming.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -344,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/demos/snappy_streaming/README.rst b/data_compression/L2/demos/snappy_streaming/README.rst
index c1eb4eda8d..dd508109b4 100644
--- a/data_compression/L2/demos/snappy_streaming/README.rst
+++ b/data_compression/L2/demos/snappy_streaming/README.rst
@@ -44,12 +44,12 @@ Note: Overall throughput can still be increased with multiple compute units.
 Executable Usage
 ~~~~~~~~~~~~~~~
                                                                                                                                                              
-1. To execute single file for compression   : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -c <file_name>``
-2. To execute single file for decompression : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -d <file_name.snappy>``
-3. To validate single file (compress & decompress) : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -t <files_name>``
-4. To execute multiple files for compression           : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression          : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -dfl <compressed files.list>``   
-6. To validate multiple files (compress & decompress)      : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -l <files.list>``  
+1. To execute single file for compression   : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -c <file_name>``
+2. To execute single file for decompression : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -d <file_name.snappy>``
+3. To validate single file (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -t <files_name>``
+4. To execute multiple files for compression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -dfl <compressed files.list>``   
+6. To validate multiple files (compress & decompress)      : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -l <files.list>``  
         
       - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/demos/snappy_streaming/utils.mk b/data_compression/L2/demos/snappy_streaming/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/demos/snappy_streaming/utils.mk
+++ b/data_compression/L2/demos/snappy_streaming/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/demos/zstd_decompress/Makefile b/data_compression/L2/demos/zstd_decompress/Makefile
index 1ac36b81bb..0dd3061176 100644
--- a/data_compression/L2/demos/zstd_decompress/Makefile
+++ b/data_compression/L2/demos/zstd_decompress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D ZSTD_BLOCK_SIZE_KB=32
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -140,7 +147,7 @@ VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/../security/L1/includ
 
 ######################### binary container global settings ##########################
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
-VPP_LDFLAGS_xilZstdDecompressStream_temp := --config $(CUR_DIR)/connectivity.cfg --clock.id 4:xilZstdDecompressStream_1
+VPP_LDFLAGS_xilZstdDecompressStream_temp := --clock.id 4:xilZstdDecompressStream_1
 VPP_LDFLAGS_xilZstdDecompressStream += $(VPP_LDFLAGS_xilZstdDecompressStream_temp)
 
 else 
@@ -193,11 +200,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -231,21 +233,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -292,12 +294,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdDecompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdDecompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -335,12 +337,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/demos/zstd_decompress/README.rst b/data_compression/L2/demos/zstd_decompress/README.rst
index d7d4dffc33..1a8046b253 100644
--- a/data_compression/L2/demos/zstd_decompress/README.rst
+++ b/data_compression/L2/demos/zstd_decompress/README.rst
@@ -42,8 +42,8 @@ Performance Data
 
 Executable Usage:
 
-1. To execute single file for decompression           : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -d <compressed file_name>``
-2. To decompress multiple files                       : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -l <files.list>``
+1. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -d <compressed file_name>``
+2. To decompress multiple files                       : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -l <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/demos/zstd_decompress/utils.mk b/data_compression/L2/demos/zstd_decompress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/demos/zstd_decompress/utils.mk
+++ b/data_compression/L2/demos/zstd_decompress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/adler32_mm/Makefile b/data_compression/L2/tests/adler32_mm/Makefile
index b3950817e7..9af11c38b2 100644
--- a/data_compression/L2/tests/adler32_mm/Makefile
+++ b/data_compression/L2/tests/adler32_mm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -123,6 +125,11 @@ HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XF
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_adler32
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -172,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -210,21 +212,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -271,12 +273,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/adler32.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/adler32_mm/utils.mk b/data_compression/L2/tests/adler32_mm/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/adler32_mm/utils.mk
+++ b/data_compression/L2/tests/adler32_mm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/checksum32_mm/Makefile b/data_compression/L2/tests/checksum32_mm/Makefile
index b0783f1562..d425b26e93 100644
--- a/data_compression/L2/tests/checksum32_mm/Makefile
+++ b/data_compression/L2/tests/checksum32_mm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c 
@@ -123,6 +125,11 @@ HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XF
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_checksum
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -172,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -210,21 +212,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -271,12 +273,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/checksum.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/checksum32_mm/utils.mk b/data_compression/L2/tests/checksum32_mm/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/checksum32_mm/utils.mk
+++ b/data_compression/L2/tests/checksum32_mm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/crc32_mm/Makefile b/data_compression/L2/tests/crc32_mm/Makefile
index bcdf2c26d6..9ebb78f789 100644
--- a/data_compression/L2/tests/crc32_mm/Makefile
+++ b/data_compression/L2/tests/crc32_mm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c 
@@ -123,6 +125,11 @@ HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XF
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_crc32
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -172,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -210,21 +212,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -271,12 +273,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/crc32.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/crc32_mm/utils.mk b/data_compression/L2/tests/crc32_mm/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/crc32_mm/utils.mk
+++ b/data_compression/L2/tests/crc32_mm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzip_decompress/Makefile b/data_compression/L2/tests/gzip_decompress/Makefile
index 4991c00b16..b738646383 100644
--- a/data_compression/L2/tests/gzip_decompress/Makefile
+++ b/data_compression/L2/tests/gzip_decompress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=1 -D GZIP_MODE=1
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -336,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.gz.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.gz.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/gzip_decompress/README.rst b/data_compression/L2/tests/gzip_decompress/README.rst
index f1a390d361..b81368c287 100644
--- a/data_compression/L2/tests/gzip_decompress/README.rst
+++ b/data_compression/L2/tests/gzip_decompress/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -d <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -dfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -d <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -dfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzip_decompress/utils.mk b/data_compression/L2/tests/gzip_decompress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzip_decompress/utils.mk
+++ b/data_compression/L2/tests/gzip_decompress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzip_decompress_dynamic/Makefile b/data_compression/L2/tests/gzip_decompress_dynamic/Makefile
index acb26026e8..fa11a17f3b 100644
--- a/data_compression/L2/tests/gzip_decompress_dynamic/Makefile
+++ b/data_compression/L2/tests/gzip_decompress_dynamic/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=1 -D GZIP_MODE=1
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/gzip/ -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -336,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.gz.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.gz.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/gzip_decompress_dynamic/utils.mk b/data_compression/L2/tests/gzip_decompress_dynamic/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzip_decompress_dynamic/utils.mk
+++ b/data_compression/L2/tests/gzip_decompress_dynamic/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzip_decompress_fixed/Makefile b/data_compression/L2/tests/gzip_decompress_fixed/Makefile
index 0b4feb8ae7..b8d03644b6 100644
--- a/data_compression/L2/tests/gzip_decompress_fixed/Makefile
+++ b/data_compression/L2/tests/gzip_decompress_fixed/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=1 -D GZIP_MODE=1
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -336,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.gz.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.gz.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/gzip_decompress_fixed/utils.mk b/data_compression/L2/tests/gzip_decompress_fixed/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzip_decompress_fixed/utils.mk
+++ b/data_compression/L2/tests/gzip_decompress_fixed/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzip_decompress_lowLatency/Makefile b/data_compression/L2/tests/gzip_decompress_lowLatency/Makefile
index cd517ea741..667047fcb2 100644
--- a/data_compression/L2/tests/gzip_decompress_lowLatency/Makefile
+++ b/data_compression/L2/tests/gzip_decompress_lowLatency/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D PARALLEL_BLOCK=1 -D GZIP_MODE=1
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/gzip_decompress_lowLatency/utils.mk b/data_compression/L2/tests/gzip_decompress_lowLatency/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzip_decompress_lowLatency/utils.mk
+++ b/data_compression/L2/tests/gzip_decompress_lowLatency/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzip_decompress_mm/Makefile b/data_compression/L2/tests/gzip_decompress_mm/Makefile
index bf14cf0984..54ca1cf7ca 100644
--- a/data_compression/L2/tests/gzip_decompress_mm/Makefile
+++ b/data_compression/L2/tests/gzip_decompress_mm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=1 -D GZIP_MODE=1 -D DECOMPRESS_MM
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,11 +188,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -219,21 +221,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -280,12 +282,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -323,12 +325,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/sample.txt.gz.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/sample.txt.gz.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/gzip_decompress_mm/README.rst b/data_compression/L2/tests/gzip_decompress_mm/README.rst
index aadbc7c9f6..05469763ba 100644
--- a/data_compression/L2/tests/gzip_decompress_mm/README.rst
+++ b/data_compression/L2/tests/gzip_decompress_mm/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -d <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -dfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -d <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -dfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzip_decompress_mm/utils.mk b/data_compression/L2/tests/gzip_decompress_mm/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzip_decompress_mm/utils.mk
+++ b/data_compression/L2/tests/gzip_decompress_mm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzip_decompress_xrt/Makefile b/data_compression/L2/tests/gzip_decompress_xrt/Makefile
index 3f1acc4e73..4b10dcf327 100644
--- a/data_compression/L2/tests/gzip_decompress_xrt/Makefile
+++ b/data_compression/L2/tests/gzip_decompress_xrt/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipXrtHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -129,6 +131,11 @@ CXXFLAGS += -std=c++1y
 LDFLAGS +=  -l uuid -l xrt_coreutil
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -197,11 +204,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -235,21 +237,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -296,12 +298,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -339,12 +341,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.gz.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.gz.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/gzip_decompress_xrt/README.rst b/data_compression/L2/tests/gzip_decompress_xrt/README.rst
index a183f5de39..3d67a337ee 100644
--- a/data_compression/L2/tests/gzip_decompress_xrt/README.rst
+++ b/data_compression/L2/tests/gzip_decompress_xrt/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -d <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -dfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -d <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -dfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzip_decompress_xrt/utils.mk b/data_compression/L2/tests/gzip_decompress_xrt/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzip_decompress_xrt/utils.mk
+++ b/data_compression/L2/tests/gzip_decompress_xrt/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzip_p2p_decompress/Makefile b/data_compression/L2/tests/gzip_p2p_decompress/Makefile
index 6b52992306..c42db6bb85 100644
--- a/data_compression/L2/tests/gzip_p2p_decompress/Makefile
+++ b/data_compression/L2/tests/gzip_p2p_decompress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -66,7 +72,7 @@ endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  samsung_u2 samsung_U2 u200 u250 u50 vck190 aws-vu9p-f1
-PLATFORM_BLOCKLIST +=  zc
+PLATFORM_BLOCKLIST +=  zc vck190_base_dfx
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/L2/tests/src/zlib.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=1 -D D_COMPUTE_UNIT=1 -D GZIP_MODE=1
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/ -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -188,11 +195,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -226,21 +228,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -287,12 +289,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress_stream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress_stream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/gzip_p2p_decompress/description.json b/data_compression/L2/tests/gzip_p2p_decompress/description.json
index ffff3bf8ed..46a5955ec6 100644
--- a/data_compression/L2/tests/gzip_p2p_decompress/description.json
+++ b/data_compression/L2/tests/gzip_p2p_decompress/description.json
@@ -7,7 +7,8 @@
     "flow": "vitis", 
     "default_device": "samsung_u2",
     "platform_blocklist": [
-        "zc"
+        "zc",
+        "vck190_base_dfx"
     ], 
     "platform_allowlist": [
         "samsung_u2", 
diff --git a/data_compression/L2/tests/gzip_p2p_decompress/run.sh b/data_compression/L2/tests/gzip_p2p_decompress/run.sh
index dec3f7f4a4..d42b949568 100755
--- a/data_compression/L2/tests/gzip_p2p_decompress/run.sh
+++ b/data_compression/L2/tests/gzip_p2p_decompress/run.sh
@@ -3,17 +3,19 @@ EXE_FILE=$1
 LIB_PROJ_ROOT=$2
 XCLBIN_FILE=$3
 echo "XCL_MODE=${XCL_EMULATION_MODE}"
-if [ "${XCL_EMULATION_MODE}" != "hw_emu" ] 
-then
-    cp $LIB_PROJ_ROOT/common/data/sample.txt .
-  
-    echo -e "\n\n----------Comparing files after Decompression---------\n"
-    cmd1=$(diff data/sample.txt data/sample.txt.gz.*)
-    if [ $? -eq 0 ]
-     then
-        echo "files are the same"
-    else
-        echo "files are different"
-        echo "$cmd1"
-   fi     
+PLATFORM_NAME=${PLATFORM}
+DEV='vck190'
+if [[ "$PLATFORM_NAME" != *"$DEV"* ]]; then
+    if [ "${XCL_EMULATION_MODE}" != "hw_emu" ]; then
+        cp $LIB_PROJ_ROOT/common/data/sample.txt .
+
+        echo -e "\n\n----------Comparing files after Decompression---------\n"
+        cmd1=$(diff data/sample.txt data/sample.txt.gz.*)
+        if [ $? -eq 0 ]; then
+            echo "files are the same"
+        else
+            echo "files are different"
+            echo "$cmd1"
+        fi
+    fi
 fi
diff --git a/data_compression/L2/tests/gzip_p2p_decompress/utils.mk b/data_compression/L2/tests/gzip_p2p_decompress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzip_p2p_decompress/utils.mk
+++ b/data_compression/L2/tests/gzip_p2p_decompress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzipc/Makefile b/data_compression/L2/tests/gzipc/Makefile
index 2bab5616ad..d118904545 100644
--- a/data_compression/L2/tests/gzipc/Makefile
+++ b/data_compression/L2/tests/gzipc/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -190,11 +197,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -228,21 +230,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -289,12 +291,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -332,12 +334,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/gzipc/README.rst b/data_compression/L2/tests/gzipc/README.rst
index b03c63efea..99eeba9d43 100644
--- a/data_compression/L2/tests/gzipc/README.rst
+++ b/data_compression/L2/tests/gzipc/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzipc/utils.mk b/data_compression/L2/tests/gzipc/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzipc/utils.mk
+++ b/data_compression/L2/tests/gzipc/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzipc_16KB/Makefile b/data_compression/L2/tests/gzipc_16KB/Makefile
index aa603b0dd6..00047abc98 100644
--- a/data_compression/L2/tests/gzipc_16KB/Makefile
+++ b/data_compression/L2/tests/gzipc_16KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM -D BLOCK_SIZE_IN_KB=16
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -190,11 +197,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -228,21 +230,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -289,12 +291,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -332,12 +334,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/gzipc_16KB/README.rst b/data_compression/L2/tests/gzipc_16KB/README.rst
index f11218eed1..6f7708b41c 100644
--- a/data_compression/L2/tests/gzipc_16KB/README.rst
+++ b/data_compression/L2/tests/gzipc_16KB/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzipc_16KB/utils.mk b/data_compression/L2/tests/gzipc_16KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzipc_16KB/utils.mk
+++ b/data_compression/L2/tests/gzipc_16KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzipc_8KB/Makefile b/data_compression/L2/tests/gzipc_8KB/Makefile
index 07d2703ce7..becfd866d3 100644
--- a/data_compression/L2/tests/gzipc_8KB/Makefile
+++ b/data_compression/L2/tests/gzipc_8KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM -D BLOCK_SIZE_IN_KB=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -190,11 +197,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -228,21 +230,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -289,12 +291,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -332,12 +334,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/gzipc_8KB/README.rst b/data_compression/L2/tests/gzipc_8KB/README.rst
index fb508aafc6..afd4c0282e 100644
--- a/data_compression/L2/tests/gzipc_8KB/README.rst
+++ b/data_compression/L2/tests/gzipc_8KB/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzipc_8KB/utils.mk b/data_compression/L2/tests/gzipc_8KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzipc_8KB/utils.mk
+++ b/data_compression/L2/tests/gzipc_8KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzipc_block_mm/Makefile b/data_compression/L2/tests/gzipc_block_mm/Makefile
index ce896ffe2d..93641e8649 100644
--- a/data_compression/L2/tests/gzipc_block_mm/Makefile
+++ b/data_compression/L2/tests/gzipc_block_mm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D GZIP_MULTICORE_COMPRESS -D GZIP_MODE
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,11 +188,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -219,21 +221,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -280,12 +282,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -323,12 +325,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/gzipc_block_mm/README.rst b/data_compression/L2/tests/gzipc_block_mm/README.rst
index a2ebc60b8f..d149872b29 100644
--- a/data_compression/L2/tests/gzipc_block_mm/README.rst
+++ b/data_compression/L2/tests/gzipc_block_mm/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzipc_block_mm/utils.mk b/data_compression/L2/tests/gzipc_block_mm/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzipc_block_mm/utils.mk
+++ b/data_compression/L2/tests/gzipc_block_mm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzipc_block_mm_xrt/Makefile b/data_compression/L2/tests/gzipc_block_mm_xrt/Makefile
index 224da3ce28..2d260de6d1 100644
--- a/data_compression/L2/tests/gzipc_block_mm_xrt/Makefile
+++ b/data_compression/L2/tests/gzipc_block_mm_xrt/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipXrtHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D GZIP_MULTICORE_COMPRESS -D GZIP_MODE
@@ -119,6 +121,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XF
 CXXFLAGS += -std=c++1y
 LDFLAGS +=  -l uuid -l xrt_coreutil
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -168,11 +175,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -206,21 +208,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -267,12 +269,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/gzipc_block_mm_xrt/README.rst b/data_compression/L2/tests/gzipc_block_mm_xrt/README.rst
index 6c5298408b..00724d99dd 100644
--- a/data_compression/L2/tests/gzipc_block_mm_xrt/README.rst
+++ b/data_compression/L2/tests/gzipc_block_mm_xrt/README.rst
@@ -12,8 +12,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzipc_block_mm_xrt/utils.mk b/data_compression/L2/tests/gzipc_block_mm_xrt/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzipc_block_mm_xrt/utils.mk
+++ b/data_compression/L2/tests/gzipc_block_mm_xrt/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzipc_chain_dm/Makefile b/data_compression/L2/tests/gzipc_chain_dm/Makefile
index 788bca3e9b..97e95d5d5e 100644
--- a/data_compression/L2/tests/gzipc_chain_dm/Makefile
+++ b/data_compression/L2/tests/gzipc_chain_dm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D GZIP_STREAM
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/gzipc_chain_dm/README.rst b/data_compression/L2/tests/gzipc_chain_dm/README.rst
index 1e97953c5e..5b5ccdd916 100644
--- a/data_compression/L2/tests/gzipc_chain_dm/README.rst
+++ b/data_compression/L2/tests/gzipc_chain_dm/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzipc_chain_dm/utils.mk b/data_compression/L2/tests/gzipc_chain_dm/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzipc_chain_dm/utils.mk
+++ b/data_compression/L2/tests/gzipc_chain_dm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzipc_static/Makefile b/data_compression/L2/tests/gzipc_static/Makefile
index e506bf22e7..3015a715cd 100644
--- a/data_compression/L2/tests/gzipc_static/Makefile
+++ b/data_compression/L2/tests/gzipc_static/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -184,11 +191,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -222,21 +224,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -283,12 +285,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/gzipc_static/README.rst b/data_compression/L2/tests/gzipc_static/README.rst
index 5dc613cdf8..afb7f4bc7d 100644
--- a/data_compression/L2/tests/gzipc_static/README.rst
+++ b/data_compression/L2/tests/gzipc_static/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzipc_static/utils.mk b/data_compression/L2/tests/gzipc_static/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzipc_static/utils.mk
+++ b/data_compression/L2/tests/gzipc_static/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzipc_xrt/Makefile b/data_compression/L2/tests/gzipc_xrt/Makefile
index c60bb4603c..4b8a24bbea 100644
--- a/data_compression/L2/tests/gzipc_xrt/Makefile
+++ b/data_compression/L2/tests/gzipc_xrt/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipXrtHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM
@@ -119,6 +121,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XF
 CXXFLAGS += -std=c++1y
 LDFLAGS +=  -l uuid -l xrt_coreutil
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -177,11 +184,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -215,21 +217,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -276,12 +278,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/gzipc_xrt/README.rst b/data_compression/L2/tests/gzipc_xrt/README.rst
index ca21f8e965..c215166982 100644
--- a/data_compression/L2/tests/gzipc_xrt/README.rst
+++ b/data_compression/L2/tests/gzipc_xrt/README.rst
@@ -12,8 +12,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/gzipc_xrt/utils.mk b/data_compression/L2/tests/gzipc_xrt/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzipc_xrt/utils.mk
+++ b/data_compression/L2/tests/gzipc_xrt/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/gzipd_quadcores/Makefile b/data_compression/L2/tests/gzipd_quadcores/Makefile
index 2560150247..8526da2424 100644
--- a/data_compression/L2/tests/gzipd_quadcores/Makefile
+++ b/data_compression/L2/tests/gzipd_quadcores/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D BLOCK_SIZE_IN_KB=32
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlib
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/gzipd_quadcores/README.rst b/data_compression/L2/tests/gzipd_quadcores/README.rst
index d04fe9f6ed..ebd9f6cd66 100644
--- a/data_compression/L2/tests/gzipd_quadcores/README.rst
+++ b/data_compression/L2/tests/gzipd_quadcores/README.rst
@@ -11,7 +11,7 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-To execute single file for compression 	    : ``./build/xil_zlib ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin  <file_name>``
+To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin  <file_name>``
 
 Results
 -------
diff --git a/data_compression/L2/tests/gzipd_quadcores/utils.mk b/data_compression/L2/tests/gzipd_quadcores/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/gzipd_quadcores/utils.mk
+++ b/data_compression/L2/tests/gzipd_quadcores/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/lz4_compress/Makefile b/data_compression/L2/tests/lz4_compress/Makefile
index 20274057aa..5cd37cf178 100644
--- a/data_compression/L2/tests/lz4_compress/Makefile
+++ b/data_compression/L2/tests/lz4_compress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/lz4_compress/README.rst b/data_compression/L2/tests/lz4_compress/README.rst
index 5590ae48b3..de7b8fb24a 100644
--- a/data_compression/L2/tests/lz4_compress/README.rst
+++ b/data_compression/L2/tests/lz4_compress/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as **xil_lz4** and it is present in ``./
 Executable Usage
 ----------------
 
-1. To execute single file for compression             : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <input file_name>``
-2. To execute multiple files for compression    : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <input file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/lz4_compress/utils.mk b/data_compression/L2/tests/lz4_compress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/lz4_compress/utils.mk
+++ b/data_compression/L2/tests/lz4_compress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/lz4_compress_parallelBlock1/Makefile b/data_compression/L2/tests/lz4_compress_parallelBlock1/Makefile
index 7249f0680e..63f2ca734d 100644
--- a/data_compression/L2/tests/lz4_compress_parallelBlock1/Makefile
+++ b/data_compression/L2/tests/lz4_compress_parallelBlock1/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=1
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/lz4_compress_parallelBlock1/run.sh b/data_compression/L2/tests/lz4_compress_parallelBlock1/run.sh
index 6e26b51c9e..1d9e3a43f0 100755
--- a/data_compression/L2/tests/lz4_compress_parallelBlock1/run.sh
+++ b/data_compression/L2/tests/lz4_compress_parallelBlock1/run.sh
@@ -3,18 +3,19 @@ EXE_FILE=$1
 LIB_PROJ_ROOT=$2
 XCLBIN_FILE=$3
 echo "XCL_MODE=${XCL_EMULATION_MODE}"
-if [ "${XCL_EMULATION_MODE}" != "hw_emu" ]
-then
-    cp $LIB_PROJ_ROOT/common/data/sample.txt ./sample_run.txt
-    find ./reports/ -type f | xargs cat >> ./sample_run.txt
-    split -b 100M ./sample_run.txt ./segment
-    mv ./segmentaa ./sample_run.txt 
-    rm -f segment*
-   
+PLATFORM_NAME=${PLATFORM}
+DEV='vck190'
+if [[ "$PLATFORM_NAME" != *"$DEV"* ]]; then
+    if [ "${XCL_EMULATION_MODE}" != "hw_emu" ]; then
+        cp $LIB_PROJ_ROOT/common/data/sample.txt ./sample_run.txt
+        find ./reports/ -type f | xargs cat >>./sample_run.txt
+        split -b 100M ./sample_run.txt ./segment
+        mv ./segmentaa ./sample_run.txt
+        rm -f segment*
 
-    echo -e "\n\n-----------Running Compression for large file-----------\n"
-    cmd1="$EXE_FILE -c ./sample_run.txt -xbin $XCLBIN_FILE"
-    echo $cmd1
-    $cmd1
+        echo -e "\n\n-----------Running Compression for large file-----------\n"
+        cmd1="$EXE_FILE -c ./sample_run.txt -xbin $XCLBIN_FILE"
+        echo $cmd1
+        $cmd1
+    fi
 fi
-
diff --git a/data_compression/L2/tests/lz4_compress_parallelBlock1/utils.mk b/data_compression/L2/tests/lz4_compress_parallelBlock1/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/lz4_compress_parallelBlock1/utils.mk
+++ b/data_compression/L2/tests/lz4_compress_parallelBlock1/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/lz4_compress_parallelBlock2/Makefile b/data_compression/L2/tests/lz4_compress_parallelBlock2/Makefile
index 87b06e5cc0..114ca85d43 100644
--- a/data_compression/L2/tests/lz4_compress_parallelBlock2/Makefile
+++ b/data_compression/L2/tests/lz4_compress_parallelBlock2/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/lz4_compress_parallelBlock2/utils.mk b/data_compression/L2/tests/lz4_compress_parallelBlock2/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/lz4_compress_parallelBlock2/utils.mk
+++ b/data_compression/L2/tests/lz4_compress_parallelBlock2/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/lz4_compress_parallelBlock4/Makefile b/data_compression/L2/tests/lz4_compress_parallelBlock4/Makefile
index 2d8da77bf9..284ee0b7ae 100644
--- a/data_compression/L2/tests/lz4_compress_parallelBlock4/Makefile
+++ b/data_compression/L2/tests/lz4_compress_parallelBlock4/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -66,7 +72,7 @@ endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u200 u250 u50 vck190 aws-vu9p-f1
-PLATFORM_BLOCKLIST +=  zc
+PLATFORM_BLOCKLIST +=  zc vck190_base_dfx
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/lz4_compress_parallelBlock4/description.json b/data_compression/L2/tests/lz4_compress_parallelBlock4/description.json
index 8fc1709a06..53805f7acb 100644
--- a/data_compression/L2/tests/lz4_compress_parallelBlock4/description.json
+++ b/data_compression/L2/tests/lz4_compress_parallelBlock4/description.json
@@ -32,7 +32,8 @@
     ], 
     "default_device": "u200",
     "platform_blocklist": [
-        "zc"
+        "zc",
+        "vck190_base_dfx"
     ], 
     "platform_allowlist": [
         "u200", 
diff --git a/data_compression/L2/tests/lz4_compress_parallelBlock4/run.sh b/data_compression/L2/tests/lz4_compress_parallelBlock4/run.sh
index f9b74ee9e8..1d9e3a43f0 100755
--- a/data_compression/L2/tests/lz4_compress_parallelBlock4/run.sh
+++ b/data_compression/L2/tests/lz4_compress_parallelBlock4/run.sh
@@ -3,16 +3,19 @@ EXE_FILE=$1
 LIB_PROJ_ROOT=$2
 XCLBIN_FILE=$3
 echo "XCL_MODE=${XCL_EMULATION_MODE}"
-if [ "${XCL_EMULATION_MODE}" != "hw_emu" ]
-then
-    cp $LIB_PROJ_ROOT/common/data/sample.txt ./sample_run.txt
-    find ./reports/ -type f | xargs cat >> ./sample_run.txt
-    split -b 100M ./sample_run.txt ./segment
-    mv ./segmentaa ./sample_run.txt 
-    rm -f segment*
-   
-    echo -e "\n\n-----------Running Compression for large file-----------\n"
-    cmd1="$EXE_FILE -c ./sample_run.txt -xbin $XCLBIN_FILE"
-    echo $cmd1
-    $cmd1
+PLATFORM_NAME=${PLATFORM}
+DEV='vck190'
+if [[ "$PLATFORM_NAME" != *"$DEV"* ]]; then
+    if [ "${XCL_EMULATION_MODE}" != "hw_emu" ]; then
+        cp $LIB_PROJ_ROOT/common/data/sample.txt ./sample_run.txt
+        find ./reports/ -type f | xargs cat >>./sample_run.txt
+        split -b 100M ./sample_run.txt ./segment
+        mv ./segmentaa ./sample_run.txt
+        rm -f segment*
+
+        echo -e "\n\n-----------Running Compression for large file-----------\n"
+        cmd1="$EXE_FILE -c ./sample_run.txt -xbin $XCLBIN_FILE"
+        echo $cmd1
+        $cmd1
+    fi
 fi
diff --git a/data_compression/L2/tests/lz4_compress_parallelBlock4/utils.mk b/data_compression/L2/tests/lz4_compress_parallelBlock4/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/lz4_compress_parallelBlock4/utils.mk
+++ b/data_compression/L2/tests/lz4_compress_parallelBlock4/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/lz4_compress_streaming/Makefile b/data_compression/L2/tests/lz4_compress_streaming/Makefile
index 967944ae1b..5eb4c03098 100644
--- a/data_compression/L2/tests/lz4_compress_streaming/Makefile
+++ b/data_compression/L2/tests/lz4_compress_streaming/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4_streaming
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -182,11 +189,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -220,21 +222,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -281,12 +283,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress_streaming.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress_streaming.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/lz4_compress_streaming/README.rst b/data_compression/L2/tests/lz4_compress_streaming/README.rst
index 27ef2a58bf..e27172ec17 100644
--- a/data_compression/L2/tests/lz4_compress_streaming/README.rst
+++ b/data_compression/L2/tests/lz4_compress_streaming/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as **xil_lz4_streaming** and it is prese
 Executable Usage
 ----------------
 
-1. To execute single file for compression             : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_streaming.xclbin -c <input file_name>``
-2. To execute multiple files for compression    : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_streaming.xclbin -cfl <files.list>``
+1. To execute single file for compression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_streaming.xclbin -c <input file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_streaming.xclbin -cfl <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/lz4_compress_streaming/utils.mk b/data_compression/L2/tests/lz4_compress_streaming/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/lz4_compress_streaming/utils.mk
+++ b/data_compression/L2/tests/lz4_compress_streaming/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/lz4_dec_parallelByte4/Makefile b/data_compression/L2/tests/lz4_dec_parallelByte4/Makefile
index 15295bf7ee..dcf2e1de72 100644
--- a/data_compression/L2/tests/lz4_dec_parallelByte4/Makefile
+++ b/data_compression/L2/tests/lz4_dec_parallelByte4/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D HOST_BUFFER_SIZE_IN_MB=128
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,11 +188,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -219,21 +221,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -280,12 +282,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -323,12 +325,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.lz4.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.lz4.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/lz4_dec_parallelByte4/utils.mk b/data_compression/L2/tests/lz4_dec_parallelByte4/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/lz4_dec_parallelByte4/utils.mk
+++ b/data_compression/L2/tests/lz4_dec_parallelByte4/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/lz4_dec_parallelByte8/Makefile b/data_compression/L2/tests/lz4_dec_parallelByte8/Makefile
index 1639ff20f2..f3b820fc67 100644
--- a/data_compression/L2/tests/lz4_dec_parallelByte8/Makefile
+++ b/data_compression/L2/tests/lz4_dec_parallelByte8/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D HOST_BUFFER_SIZE_IN_MB=128
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,11 +188,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -219,21 +221,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -280,12 +282,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -323,12 +325,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.lz4.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.lz4.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/lz4_dec_parallelByte8/utils.mk b/data_compression/L2/tests/lz4_dec_parallelByte8/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/lz4_dec_parallelByte8/utils.mk
+++ b/data_compression/L2/tests/lz4_dec_parallelByte8/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/Makefile b/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/Makefile
index d4573976a6..09ddd111d3 100644
--- a/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/Makefile
+++ b/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D MULTIPLE_BYTES=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4_decompress_streaming
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -188,11 +195,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -226,21 +228,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -287,12 +289,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress_streaming.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress_streaming.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -330,12 +332,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.lz4.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.lz4.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/README.rst b/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/README.rst
index 34e61c5d91..64d135bb30 100644
--- a/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/README.rst
+++ b/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as **xil_lz4_decompress_streaming** and
 Executable Usage
 ----------------
 
-1. To execute single file for decompression             : ``./build/xil_lz4_decompress_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress_streaming.xclbin -d <input file_name>``
-2. To execute multiple files for decompression    : ``./build/xil_lz4_decompress_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress_streaming.xclbin -dfl <files.list>``
+1. To execute single file for decompression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_decompress_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress_streaming.xclbin -d <input file_name>``
+2. To execute multiple files for decompression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_decompress_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress_streaming.xclbin -dfl <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/utils.mk b/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/utils.mk
+++ b/data_compression/L2/tests/lz4_dec_streaming_parallelByte8/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/lz4_p2p_decompress/Makefile b/data_compression/L2/tests/lz4_p2p_decompress/Makefile
index bfd781ee0b..d9f51552be 100644
--- a/data_compression/L2/tests/lz4_p2p_decompress/Makefile
+++ b/data_compression/L2/tests/lz4_p2p_decompress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/samsung_u2/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/L2/tests/src/lz4_stream.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -134,6 +136,11 @@ HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/L2/tests/src/lz4_stream.cpp $(
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -197,11 +204,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -235,21 +237,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -296,12 +298,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress_streaming.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress_streaming.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -339,12 +341,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.lz4.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.lz4.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/lz4_p2p_decompress/utils.mk b/data_compression/L2/tests/lz4_p2p_decompress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/lz4_p2p_decompress/utils.mk
+++ b/data_compression/L2/tests/lz4_p2p_decompress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/snappy_compress/Makefile b/data_compression/L2/tests/snappy_compress/Makefile
index a42720aa68..637a4a31b6 100644
--- a/data_compression/L2/tests/snappy_compress/Makefile
+++ b/data_compression/L2/tests/snappy_compress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/snappyOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/snappyBase.cpp $(XFLIB_DIR)/common/libs/compress/snappyApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_snappy
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,11 +188,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -219,21 +221,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -280,12 +282,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -323,12 +325,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/snappy_compress/README.rst b/data_compression/L2/tests/snappy_compress/README.rst
index 23f8ba255a..11731a8153 100644
--- a/data_compression/L2/tests/snappy_compress/README.rst
+++ b/data_compression/L2/tests/snappy_compress/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as **xil_snappy** and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression             : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <input file_name>``
-2. To execute multiple files for compression    : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <input file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/snappy_compress/utils.mk b/data_compression/L2/tests/snappy_compress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/snappy_compress/utils.mk
+++ b/data_compression/L2/tests/snappy_compress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/snappy_dec_parallelByte4/Makefile b/data_compression/L2/tests/snappy_dec_parallelByte4/Makefile
index f2212ffcbd..b5fb922354 100644
--- a/data_compression/L2/tests/snappy_dec_parallelByte4/Makefile
+++ b/data_compression/L2/tests/snappy_dec_parallelByte4/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/snappyOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/snappyBase.cpp $(XFLIB_DIR)/common/libs/compress/snappyApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -123,6 +125,11 @@ HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/snappyOCL
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_snappy
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -321,12 +323,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.snappy.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.snappy.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/snappy_dec_parallelByte4/utils.mk b/data_compression/L2/tests/snappy_dec_parallelByte4/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/snappy_dec_parallelByte4/utils.mk
+++ b/data_compression/L2/tests/snappy_dec_parallelByte4/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/snappy_dec_parallelByte8/Makefile b/data_compression/L2/tests/snappy_dec_parallelByte8/Makefile
index 90c45653c5..2c1c1357d3 100644
--- a/data_compression/L2/tests/snappy_dec_parallelByte8/Makefile
+++ b/data_compression/L2/tests/snappy_dec_parallelByte8/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/snappyOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/snappyBase.cpp $(XFLIB_DIR)/common/libs/compress/snappyApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -123,6 +125,11 @@ HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/snappyOCL
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_snappy
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -321,12 +323,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.snappy.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.snappy.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/snappy_dec_parallelByte8/utils.mk b/data_compression/L2/tests/snappy_dec_parallelByte8/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/snappy_dec_parallelByte8/utils.mk
+++ b/data_compression/L2/tests/snappy_dec_parallelByte8/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/Makefile b/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/Makefile
index fac24af7bd..b01961d42b 100644
--- a/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/Makefile
+++ b/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/snappyOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/snappyBase.cpp $(XFLIB_DIR)/common/libs/compress/snappyApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D SNAPPY_STREAM -D MULTIPLE_BYTES=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_snappy_decompress_streaming
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -188,11 +195,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -226,21 +228,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -287,12 +289,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress_streaming.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress_streaming.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -330,12 +332,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.snappy.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.snappy.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/README.rst b/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/README.rst
index 1a395855dc..5d2c69f3df 100644
--- a/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/README.rst
+++ b/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as **xil_snappy_decompress_streaming** a
 Executable Usage
 ----------------
 
-1. To execute single file for decompression             : ``./build/xil_snappy_decompress_streaming  -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress_streaming.xclbin -d <input file_name>``
-2. To execute multiple files for decompression    : ``./build/xil_snappy_decompress_streaming  -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress_streaming.xclbin -dfl <files.list>``
+1. To execute single file for decompression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_decompress_streaming  -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress_streaming.xclbin -d <input file_name>``
+2. To execute multiple files for decompression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_decompress_streaming  -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress_streaming.xclbin -dfl <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/utils.mk b/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/utils.mk
+++ b/data_compression/L2/tests/snappy_dec_streaming_parallelByte8/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/snappy_dualcore_decompress/Makefile b/data_compression/L2/tests/snappy_dualcore_decompress/Makefile
index 17cfaa94aa..4ad888a6e9 100644
--- a/data_compression/L2/tests/snappy_dualcore_decompress/Makefile
+++ b/data_compression/L2/tests/snappy_dualcore_decompress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/L2/tests/src/xil_snappy_streaming.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D NUM_CORES=2 -D MULTIPLE_BYTES=8 -D FREE_RUNNING_KERNEL
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xsnappy_decompress_streaming
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,11 +196,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -227,21 +229,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -288,12 +290,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/dualcore_decompress_streaming.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/dualcore_decompress_streaming.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -331,12 +333,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/y $(CUR_DIR)/p $(CUR_DIR)/p $(CUR_DIR)/a $(CUR_DIR)/n $(CUR_DIR)/s $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/y $(CUR_DIR)/p $(CUR_DIR)/p $(CUR_DIR)/a $(CUR_DIR)/n $(CUR_DIR)/s $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/snappy_dualcore_decompress/utils.mk b/data_compression/L2/tests/snappy_dualcore_decompress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/snappy_dualcore_decompress/utils.mk
+++ b/data_compression/L2/tests/snappy_dualcore_decompress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/snappy_p2p_decompress/Makefile b/data_compression/L2/tests/snappy_p2p_decompress/Makefile
index 8e293ac3d3..644ea4ae6c 100644
--- a/data_compression/L2/tests/snappy_p2p_decompress/Makefile
+++ b/data_compression/L2/tests/snappy_p2p_decompress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/samsung_u2/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/snappyOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/snappyBase.cpp $(XFLIB_DIR)/common/libs/compress/snappyApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -D SNAPPY_STREAM -D ENABLE_P2P=0
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_snappy_decompress_streaming
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,12 +300,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress_streaming.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress_streaming.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -341,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/*.orig $(CUR_DIR)/data/*.orig 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/*.orig $(CUR_DIR)/data/*.orig 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/snappy_p2p_decompress/utils.mk b/data_compression/L2/tests/snappy_p2p_decompress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/snappy_p2p_decompress/utils.mk
+++ b/data_compression/L2/tests/snappy_p2p_decompress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zlib_decompress_8KB_checksum/Makefile b/data_compression/L2/tests/zlib_decompress_8KB_checksum/Makefile
index 9544546b24..e0f3d753b6 100644
--- a/data_compression/L2/tests/zlib_decompress_8KB_checksum/Makefile
+++ b/data_compression/L2/tests/zlib_decompress_8KB_checksum/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -65,7 +71,7 @@ PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  u200 u250 u50 vck190 aws-vu9p-f1
+PLATFORM_ALLOWLIST +=  u200 u250 vck190 aws-vu9p-f1
 PLATFORM_BLOCKLIST +=  zc
 
 include ./utils.mk
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D PARALLEL_BLOCK=1 -D GZIP_MODE=1 -D TUSER_DWIDTH=32
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin $(PLATFORM)
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -336,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.zlib.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.zlib.* 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zlib_decompress_8KB_checksum/README.rst b/data_compression/L2/tests/zlib_decompress_8KB_checksum/README.rst
index e2300ba7f2..04fdcba170 100644
--- a/data_compression/L2/tests/zlib_decompress_8KB_checksum/README.rst
+++ b/data_compression/L2/tests/zlib_decompress_8KB_checksum/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -d <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -dfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -d <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -dfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zlib_decompress_8KB_checksum/utils.mk b/data_compression/L2/tests/zlib_decompress_8KB_checksum/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zlib_decompress_8KB_checksum/utils.mk
+++ b/data_compression/L2/tests/zlib_decompress_8KB_checksum/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zlibc/Makefile b/data_compression/L2/tests/zlibc/Makefile
index 08a80ac468..0cae63f46a 100644
--- a/data_compression/L2/tests/zlibc/Makefile
+++ b/data_compression/L2/tests/zlibc/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlib
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/zlibc/README.rst b/data_compression/L2/tests/zlibc/README.rst
index 4bb59c7133..1afb3ab74c 100644
--- a/data_compression/L2/tests/zlibc/README.rst
+++ b/data_compression/L2/tests/zlibc/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zlibc/utils.mk b/data_compression/L2/tests/zlibc/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zlibc/utils.mk
+++ b/data_compression/L2/tests/zlibc/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zlibc_16KB/Makefile b/data_compression/L2/tests/zlibc_16KB/Makefile
index e5b6911969..a519371ee9 100644
--- a/data_compression/L2/tests/zlibc_16KB/Makefile
+++ b/data_compression/L2/tests/zlibc_16KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM -D BLOCK_SIZE_IN_KB=16
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlib
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/zlibc_16KB/README.rst b/data_compression/L2/tests/zlibc_16KB/README.rst
index 8e463b949d..c2c4f8e501 100644
--- a/data_compression/L2/tests/zlibc_16KB/README.rst
+++ b/data_compression/L2/tests/zlibc_16KB/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zlibc_16KB/utils.mk b/data_compression/L2/tests/zlibc_16KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zlibc_16KB/utils.mk
+++ b/data_compression/L2/tests/zlibc_16KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zlibc_8KB/Makefile b/data_compression/L2/tests/zlibc_8KB/Makefile
index ae4a37e2e4..babf9b0663 100644
--- a/data_compression/L2/tests/zlibc_8KB/Makefile
+++ b/data_compression/L2/tests/zlibc_8KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM -D BLOCK_SIZE_IN_KB=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlib
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/zlibc_8KB/README.rst b/data_compression/L2/tests/zlibc_8KB/README.rst
index 2ce564fc5d..aa491c8227 100644
--- a/data_compression/L2/tests/zlibc_8KB/README.rst
+++ b/data_compression/L2/tests/zlibc_8KB/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zlibc_8KB/utils.mk b/data_compression/L2/tests/zlibc_8KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zlibc_8KB/utils.mk
+++ b/data_compression/L2/tests/zlibc_8KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zlibc_static/Makefile b/data_compression/L2/tests/zlibc_static/Makefile
index 0e62993f0d..ab528b1e27 100644
--- a/data_compression/L2/tests/zlibc_static/Makefile
+++ b/data_compression/L2/tests/zlibc_static/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlib
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -190,11 +197,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -228,21 +230,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -289,12 +291,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -332,12 +334,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zlibc_static/README.rst b/data_compression/L2/tests/zlibc_static/README.rst
index cdaee09baf..5ccc8a55a7 100644
--- a/data_compression/L2/tests/zlibc_static/README.rst
+++ b/data_compression/L2/tests/zlibc_static/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zlibc_static/utils.mk b/data_compression/L2/tests/zlibc_static/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zlibc_static/utils.mk
+++ b/data_compression/L2/tests/zlibc_static/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zlibc_static_16KB/Makefile b/data_compression/L2/tests/zlibc_static_16KB/Makefile
index 05a22941e1..6d94762d1f 100644
--- a/data_compression/L2/tests/zlibc_static_16KB/Makefile
+++ b/data_compression/L2/tests/zlibc_static_16KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM -D BLOCK_SIZE_IN_KB=16
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlib
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -190,11 +197,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -228,21 +230,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -289,12 +291,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -332,12 +334,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zlibc_static_16KB/README.rst b/data_compression/L2/tests/zlibc_static_16KB/README.rst
index 83a92d3150..36238b4ede 100644
--- a/data_compression/L2/tests/zlibc_static_16KB/README.rst
+++ b/data_compression/L2/tests/zlibc_static_16KB/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zlibc_static_16KB/utils.mk b/data_compression/L2/tests/zlibc_static_16KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zlibc_static_16KB/utils.mk
+++ b/data_compression/L2/tests/zlibc_static_16KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zlibc_static_8KB/Makefile b/data_compression/L2/tests/zlibc_static_8KB/Makefile
index 23d721d735..7a63ab4a14 100644
--- a/data_compression/L2/tests/zlibc_static_8KB/Makefile
+++ b/data_compression/L2/tests/zlibc_static_8KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D GZIP_STREAM -D PERF_DM -D BLOCK_SIZE_IN_KB=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlib
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -190,11 +197,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -228,21 +230,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -289,12 +291,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -332,12 +334,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/test.list $(CUR_DIR)/sample_run.* $(CUR_DIR)/data/sample.txt.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zlibc_static_8KB/README.rst b/data_compression/L2/tests/zlibc_static_8KB/README.rst
index 69d3865d92..bcdc929b60 100644
--- a/data_compression/L2/tests/zlibc_static_8KB/README.rst
+++ b/data_compression/L2/tests/zlibc_static_8KB/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zlibc_static_8KB/utils.mk b/data_compression/L2/tests/zlibc_static_8KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zlibc_static_8KB/utils.mk
+++ b/data_compression/L2/tests/zlibc_static_8KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zlibd_quadcores_8KB/Makefile b/data_compression/L2/tests/zlibd_quadcores_8KB/Makefile
index 5176b49ee3..53c765d2f4 100644
--- a/data_compression/L2/tests/zlibd_quadcores_8KB/Makefile
+++ b/data_compression/L2/tests/zlibd_quadcores_8KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -66,7 +72,7 @@ endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u200 u250 u50 vck190 aws-vu9p-f1
-PLATFORM_BLOCKLIST +=  zc xilinx_u50_gen3x16_xdma_5_202210_1
+PLATFORM_BLOCKLIST +=  zc xilinx_u50_gen3x16_xdma_5_202210_1 vck190_base_dfx
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D MULTICORE_DECOMPRESS -D PARALLEL_BLOCK=1 -D GZIP_MODE=1
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlib
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -140,9 +147,15 @@ VPP_FLAGS +=  -D NUM_CORES=4 -D MULTIPLE_BYTES=8 -D GZIP_DECOMPRESS_CHECKSUM
 VPP_FLAGS +=  -I $(XFLIB_DIR)/../security/L1/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/src
 
 ######################### binary container global settings ##########################
+ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
+VPP_LDFLAGS_decompress_temp := --config $(CUR_DIR)/connectivity.cfg --clock.id 4:xilDecompress_1
+VPP_LDFLAGS_decompress += $(VPP_LDFLAGS_decompress_temp)
+
+else 
 VPP_LDFLAGS_decompress_temp := --config $(CUR_DIR)/connectivity.cfg
 VPP_LDFLAGS_decompress += $(VPP_LDFLAGS_decompress_temp)
 
+endif
 ifeq ($(HOST_ARCH), x86)
 BINARY_CONTAINERS += $(BUILD_DIR)/decompress.xclbin
 else
@@ -188,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -226,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -287,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/zlibd_quadcores_8KB/README.rst b/data_compression/L2/tests/zlibd_quadcores_8KB/README.rst
index 51eee0c49a..4cc4d1662b 100644
--- a/data_compression/L2/tests/zlibd_quadcores_8KB/README.rst
+++ b/data_compression/L2/tests/zlibd_quadcores_8KB/README.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -d <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin  -dfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -d <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin  -dfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zlibd_quadcores_8KB/description.json b/data_compression/L2/tests/zlibd_quadcores_8KB/description.json
index 49839163f0..80ada93e7a 100644
--- a/data_compression/L2/tests/zlibd_quadcores_8KB/description.json
+++ b/data_compression/L2/tests/zlibd_quadcores_8KB/description.json
@@ -21,7 +21,8 @@
     "default_device": "u250",
     "platform_blocklist": [
         "zc",
-        "xilinx_u50_gen3x16_xdma_5_202210_1"
+        "xilinx_u50_gen3x16_xdma_5_202210_1",
+        "vck190_base_dfx"
     ],
     "platform_allowlist": [
         "u200",
@@ -143,7 +144,13 @@
                     ],
                     "options": "--sysroot=$(SYSROOT)"
                 }
-            }
+            },
+            "containers": [
+                {
+                    "name": "decompress",
+                    "ldclflags": "--config PROJECT/connectivity.cfg --clock.id 4:xilDecompress_1"
+                }
+            ]
         }
     }
-}
+}
\ No newline at end of file
diff --git a/data_compression/L2/tests/zlibd_quadcores_8KB/run.sh b/data_compression/L2/tests/zlibd_quadcores_8KB/run.sh
index 145ae1fd71..4dacce36bf 100755
--- a/data_compression/L2/tests/zlibd_quadcores_8KB/run.sh
+++ b/data_compression/L2/tests/zlibd_quadcores_8KB/run.sh
@@ -3,17 +3,19 @@ EXE_FILE=$1
 LIB_PROJ_ROOT=$2
 XCLBIN_FILE=$3
 echo "XCL_MODE=${XCL_EMULATION_MODE}"
-if [ "${XCL_EMULATION_MODE}" != "hw_emu" ] 
-then
-    cp $LIB_PROJ_ROOT/common/data/sample.txt .
-  
-    echo -e "\n\n----------Comparing files after Decompression---------\n"
-    cmd1=$(diff data/sample.txt data/sample.txt.zlib.*)
-    if [ $? -eq 0 ]
-     then
-        echo "PASS: files are the same"
-    else
-        echo "ERROR: files are different"
-        echo "$cmd1"
-   fi     
+PLATFORM_NAME=${PLATFORM}
+DEV='vck190'
+if [[ "$PLATFORM_NAME" != *"$DEV"* ]]; then
+    if [ "${XCL_EMULATION_MODE}" != "hw_emu" ]; then
+        cp $LIB_PROJ_ROOT/common/data/sample.txt .
+
+        echo -e "\n\n----------Comparing files after Decompression---------\n"
+        cmd1=$(diff data/sample.txt data/sample.txt.zlib.*)
+        if [ $? -eq 0 ]; then
+            echo "PASS: files are the same"
+        else
+            echo "ERROR: files are different"
+            echo "$cmd1"
+        fi
+    fi
 fi
diff --git a/data_compression/L2/tests/zlibd_quadcores_8KB/utils.mk b/data_compression/L2/tests/zlibd_quadcores_8KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zlibd_quadcores_8KB/utils.mk
+++ b/data_compression/L2/tests/zlibd_quadcores_8KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zstd_compress/Makefile b/data_compression/L2/tests/zstd_compress/Makefile
index d39bd6b4a1..b1a3e6bbb0 100644
--- a/data_compression/L2/tests/zstd_compress/Makefile
+++ b/data_compression/L2/tests/zstd_compress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -66,7 +72,7 @@ endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u200 u250 u50 u280 vck190 aws-vu9p-f1
-PLATFORM_BLOCKLIST +=  xilinx_vck190_base_202210_1
+PLATFORM_BLOCKLIST +=  vck190_base_dfx
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D MULTIPLE_BYTES=8 -D ZSTD_BLOCK_SIZE_KB=32
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdCompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdCompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L2/tests/zstd_compress/description.json b/data_compression/L2/tests/zstd_compress/description.json
index f29e4bb500..d06143debf 100644
--- a/data_compression/L2/tests/zstd_compress/description.json
+++ b/data_compression/L2/tests/zstd_compress/description.json
@@ -6,7 +6,7 @@
     "flow": "vitis",
     "default_device": "u200",
     "platform_blocklist": [
-        "xilinx_vck190_base_202210_1"
+        "vck190_base_dfx"
     ],
     "platform_allowlist": [
         "u200", 
diff --git a/data_compression/L2/tests/zstd_compress/utils.mk b/data_compression/L2/tests/zstd_compress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zstd_compress/utils.mk
+++ b/data_compression/L2/tests/zstd_compress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zstd_hexacore_compress/Makefile b/data_compression/L2/tests/zstd_hexacore_compress/Makefile
index f79cafbd7e..57b98f8119 100644
--- a/data_compression/L2/tests/zstd_hexacore_compress/Makefile
+++ b/data_compression/L2/tests/zstd_hexacore_compress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D MULTIPLE_BYTES=8 -D ZSTD_BLOCK_SIZE_KB=32 -D CORE_NUM=6
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,11 +196,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -227,21 +229,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -288,12 +290,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdCompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdCompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -331,12 +333,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zstd_hexacore_compress/utils.mk b/data_compression/L2/tests/zstd_hexacore_compress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zstd_hexacore_compress/utils.mk
+++ b/data_compression/L2/tests/zstd_hexacore_compress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zstd_quadcore_compress/Makefile b/data_compression/L2/tests/zstd_quadcore_compress/Makefile
index ebd877014f..3698969e2e 100644
--- a/data_compression/L2/tests/zstd_quadcore_compress/Makefile
+++ b/data_compression/L2/tests/zstd_quadcore_compress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D MULTIPLE_BYTES=8 -D ZSTD_BLOCK_SIZE_KB=32 -D CORE_NUM=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,11 +196,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -227,21 +229,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -288,12 +290,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdCompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdCompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -331,12 +333,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zstd_quadcore_compress/README.rst b/data_compression/L2/tests/zstd_quadcore_compress/README.rst
index b9ef4af0ce..724e4f7357 100644
--- a/data_compression/L2/tests/zstd_quadcore_compress/README.rst
+++ b/data_compression/L2/tests/zstd_quadcore_compress/README.rst
@@ -37,8 +37,8 @@ Average Compression Ratio	    2.68x (Silesia Benchmark)
 
 Executable Usage:
 
-1. To execute single file for decompression           : ``./build/xil_zstd -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/xilZstdCompressStream.xclbin -c <compressed file_name>``
-2. To decompress multiple files                       : ``./build/xil_zstd -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/xilZstdCompressStream.xclbin -cfl <files.list>``
+1. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zstd -xbin ./build_dir.<TARGET mode>.<xsa_name>/xilZstdCompressStream.xclbin -c <compressed file_name>``
+2. To decompress multiple files                       : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zstd -xbin ./build_dir.<TARGET mode>.<xsa_name>/xilZstdCompressStream.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zstd_quadcore_compress/utils.mk b/data_compression/L2/tests/zstd_quadcore_compress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zstd_quadcore_compress/utils.mk
+++ b/data_compression/L2/tests/zstd_quadcore_compress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zstdc_16KB/Makefile b/data_compression/L2/tests/zstdc_16KB/Makefile
index e1952ddf8b..85b623dae0 100644
--- a/data_compression/L2/tests/zstdc_16KB/Makefile
+++ b/data_compression/L2/tests/zstdc_16KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D MULTIPLE_BYTES=8 -D ZSTD_BLOCK_SIZE_KB=16 -D CORE_NUM=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,11 +196,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -227,21 +229,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -288,12 +290,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdCompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdCompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -331,12 +333,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zstdc_16KB/utils.mk b/data_compression/L2/tests/zstdc_16KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zstdc_16KB/utils.mk
+++ b/data_compression/L2/tests/zstdc_16KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zstdc_8KB/Makefile b/data_compression/L2/tests/zstdc_8KB/Makefile
index 5268b141ac..1fbf4c3d92 100644
--- a/data_compression/L2/tests/zstdc_8KB/Makefile
+++ b/data_compression/L2/tests/zstdc_8KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D MULTIPLE_BYTES=8 -D ZSTD_BLOCK_SIZE_KB=8 -D CORE_NUM=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,11 +196,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -227,21 +229,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -288,12 +290,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdCompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdCompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -331,12 +333,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/* $(CUR_DIR)/. $(CUR_DIR)/t $(CUR_DIR)/x $(CUR_DIR)/t $(CUR_DIR)/. $(CUR_DIR)/e $(CUR_DIR)/l $(CUR_DIR)/p $(CUR_DIR)/m $(CUR_DIR)/a $(CUR_DIR)/s $(CUR_DIR)// $(CUR_DIR)/a $(CUR_DIR)/t $(CUR_DIR)/a $(CUR_DIR)/d 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zstdc_8KB/utils.mk b/data_compression/L2/tests/zstdc_8KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zstdc_8KB/utils.mk
+++ b/data_compression/L2/tests/zstdc_8KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zstdd_128KB/Makefile b/data_compression/L2/tests/zstdd_128KB/Makefile
index 6ce9356ee4..3bc6630262 100644
--- a/data_compression/L2/tests/zstdd_128KB/Makefile
+++ b/data_compression/L2/tests/zstdd_128KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -66,7 +72,7 @@ endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u200 u250 u50 u280 vck190 aws-vu9p-f1
-PLATFORM_BLOCKLIST += 
+PLATFORM_BLOCKLIST +=  vck190_base_dfx
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -88,7 +94,7 @@ RUN_DEPS :=
 # get global setting
 ifeq ($(HOST_ARCH), x86)
 CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
-LDFLAGS += -pthread -L$(XILINX_XRT)/lib -Wl,--as-needed -lOpenCL -lxrt_coreutil 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
 VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
 VPP_LDFLAGS += --optimize 2 -R 2 
 else ifeq ($(HOST_ARCH), aarch64)
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D ZSTD_BLOCK_SIZE_KB=128
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdDecompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdDecompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -336,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/data/sample.txt.zst.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/data/sample.txt.zst.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zstdd_128KB/description.json b/data_compression/L2/tests/zstdd_128KB/description.json
index 0a0a5f6adf..bbfa5a7590 100644
--- a/data_compression/L2/tests/zstdd_128KB/description.json
+++ b/data_compression/L2/tests/zstdd_128KB/description.json
@@ -5,7 +5,9 @@
     ],
     "flow": "vitis",
     "default_device": "u200",
-    "platform_blocklist": [],
+    "platform_blocklist": [
+        "vck190_base_dfx"
+     ],
     "platform_allowlist": [
         "u200",
         "u250",
@@ -143,4 +145,4 @@
             ]
         }
     }
-}
\ No newline at end of file
+}
diff --git a/data_compression/L2/tests/zstdd_128KB/utils.mk b/data_compression/L2/tests/zstdd_128KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zstdd_128KB/utils.mk
+++ b/data_compression/L2/tests/zstdd_128KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zstdd_32KB/Makefile b/data_compression/L2/tests/zstdd_32KB/Makefile
index d85da37383..56469fd8c1 100644
--- a/data_compression/L2/tests/zstdd_32KB/Makefile
+++ b/data_compression/L2/tests/zstdd_32KB/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D ZSTD_BLOCK_SIZE_KB=32
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdDecompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdDecompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -336,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/data/sample.txt.zst.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/data/sample.txt.zst.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zstdd_32KB/README.rst b/data_compression/L2/tests/zstdd_32KB/README.rst
index 21e90c3e61..210c41cc3b 100644
--- a/data_compression/L2/tests/zstdd_32KB/README.rst
+++ b/data_compression/L2/tests/zstdd_32KB/README.rst
@@ -38,8 +38,8 @@ Performance Data
 
 Executable Usage:
 
-1. To execute single file for decompression           : ``./build/xil_zstd -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/xilZstdDecompressStream.xclbin -d <compressed file_name>``
-2. To decompress multiple files                       : ``./build/xil_zstd -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/xilZstdDecompressStream.xclbin -dfl <files.list>``
+1. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zstd -xbin ./build_dir.<TARGET mode>.<xsa_name>/xilZstdDecompressStream.xclbin -d <compressed file_name>``
+2. To decompress multiple files                       : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zstd -xbin ./build_dir.<TARGET mode>.<xsa_name>/xilZstdDecompressStream.xclbin -dfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L2/tests/zstdd_32KB/utils.mk b/data_compression/L2/tests/zstdd_32KB/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zstdd_32KB/utils.mk
+++ b/data_compression/L2/tests/zstdd_32KB/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zstdd_32KB_fastHuff/Makefile b/data_compression/L2/tests/zstdd_32KB_fastHuff/Makefile
index 7f5d28fed8..fd589ca4c5 100644
--- a/data_compression/L2/tests/zstdd_32KB_fastHuff/Makefile
+++ b/data_compression/L2/tests/zstdd_32KB_fastHuff/Makefile
@@ -18,15 +18,15 @@
 
 help::
 	$(ECHO) "Makefile Usage:"
-	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86/aarch64>"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to generate the design for specified Target and Shell."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86/aarch64>"
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to run application in emulation."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86/aarch64>"
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -65,8 +71,8 @@ PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  u200 u250 u280 vck190 aws-vu9p-f1
-PLATFORM_BLOCKLIST += 
+PLATFORM_ALLOWLIST +=  u200 u250 u280 aws-vu9p-f1
+PLATFORM_BLOCKLIST +=  vck190
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -88,7 +94,7 @@ RUN_DEPS :=
 # get global setting
 ifeq ($(HOST_ARCH), x86)
 CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
-LDFLAGS += -pthread -L$(XILINX_XRT)/lib -Wl,--as-needed -lOpenCL -lxrt_coreutil 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
 VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
 VPP_LDFLAGS += --optimize 2 -R 2 
 else ifeq ($(HOST_ARCH), aarch64)
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D ZSTD_BLOCK_SIZE_KB=32
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdDecompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdDecompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -336,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/data/sample.txt.zst.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/data/sample.txt.zst.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zstdd_32KB_fastHuff/description.json b/data_compression/L2/tests/zstdd_32KB_fastHuff/description.json
index 713148a76d..58a418cabe 100644
--- a/data_compression/L2/tests/zstdd_32KB_fastHuff/description.json
+++ b/data_compression/L2/tests/zstdd_32KB_fastHuff/description.json
@@ -5,12 +5,13 @@
     ],
     "flow": "vitis",
     "default_device": "u200",
-    "platform_blocklist": [],
+    "platform_blocklist": [
+        "vck190"
+    ],
     "platform_allowlist": [
         "u200",
         "u250",
         "u280",
-        "vck190",
         "aws-vu9p-f1"
     ],
     "data": [
diff --git a/data_compression/L2/tests/zstdd_32KB_fastHuff/utils.mk b/data_compression/L2/tests/zstdd_32KB_fastHuff/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zstdd_32KB_fastHuff/utils.mk
+++ b/data_compression/L2/tests/zstdd_32KB_fastHuff/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L2/tests/zstdd_32KB_fastSeq/Makefile b/data_compression/L2/tests/zstdd_32KB_fastSeq/Makefile
index 1d8d09da62..d1d260f679 100644
--- a/data_compression/L2/tests/zstdd_32KB_fastSeq/Makefile
+++ b/data_compression/L2/tests/zstdd_32KB_fastSeq/Makefile
@@ -18,15 +18,15 @@
 
 help::
 	$(ECHO) "Makefile Usage:"
-	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86/aarch64>"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to generate the design for specified Target and Shell."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86/aarch64>"
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to run application in emulation."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86/aarch64>"
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<x86>"
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -65,8 +71,8 @@ PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
-PLATFORM_ALLOWLIST +=  u200 u250 u50 u280 vck190 aws-vu9p-f1
-PLATFORM_BLOCKLIST += 
+PLATFORM_ALLOWLIST +=  u200 u250 u50 u280 aws-vu9p-f1
+PLATFORM_BLOCKLIST +=  vck190
 
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
@@ -88,7 +94,7 @@ RUN_DEPS :=
 # get global setting
 ifeq ($(HOST_ARCH), x86)
 CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
-LDFLAGS += -pthread -L$(XILINX_XRT)/lib -Wl,--as-needed -lOpenCL -lxrt_coreutil 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
 VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
 VPP_LDFLAGS += --optimize 2 -R 2 
 else ifeq ($(HOST_ARCH), aarch64)
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/zstdOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/zstdApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D ZSTD_BLOCK_SIZE_KB=32
 CXXFLAGS +=  -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zstd
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/xilZstdDecompressStream.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/xilZstdDecompressStream.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
@@ -336,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/data/sample.txt.zst.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/data/sample.txt.zst.* 
 
 clean: cleanh
\ No newline at end of file
diff --git a/data_compression/L2/tests/zstdd_32KB_fastSeq/description.json b/data_compression/L2/tests/zstdd_32KB_fastSeq/description.json
index 6ea8cf64ac..684bc55a3e 100644
--- a/data_compression/L2/tests/zstdd_32KB_fastSeq/description.json
+++ b/data_compression/L2/tests/zstdd_32KB_fastSeq/description.json
@@ -5,13 +5,14 @@
     ],
     "flow": "vitis",
     "default_device": "u200",
-    "platform_blocklist": [],
+    "platform_blocklist": [
+        "vck190"
+    ],
     "platform_allowlist": [
         "u200",
         "u250",
         "u50",
         "u280",
-        "vck190",
         "aws-vu9p-f1"
     ],
     "data": [
@@ -144,4 +145,4 @@
             ]
         }
     }
-}
\ No newline at end of file
+}
diff --git a/data_compression/L2/tests/zstdd_32KB_fastSeq/utils.mk b/data_compression/L2/tests/zstdd_32KB_fastSeq/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L2/tests/zstdd_32KB_fastSeq/utils.mk
+++ b/data_compression/L2/tests/zstdd_32KB_fastSeq/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L3/benchmarks/lz4_p2p_compress/Makefile b/data_compression/L3/benchmarks/lz4_p2p_compress/Makefile
index a8abc51a77..e4ae3f3738 100644
--- a/data_compression/L3/benchmarks/lz4_p2p_compress/Makefile
+++ b/data_compression/L3/benchmarks/lz4_p2p_compress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/L3/src/lz4_p2p_comp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
 CXXFLAGS +=  -D PARALLEL_BLOCK=8 -D SINGLE_XCLBIN=false
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -172,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -210,21 +212,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -271,12 +273,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L3/benchmarks/lz4_p2p_compress/README.rst b/data_compression/L3/benchmarks/lz4_p2p_compress/README.rst
index cc3d3cbdcb..1a9af0096d 100644
--- a/data_compression/L3/benchmarks/lz4_p2p_compress/README.rst
+++ b/data_compression/L3/benchmarks/lz4_p2p_compress/README.rst
@@ -48,8 +48,8 @@ This application is present in ``L3/benchmarks/lz4_p2p_compress`` directory. Fol
 
 The binary host file generated is named as "**xil_lz4**" and it is present in ``./build`` directory.
 
-1. To execute single file for compression   : ``./build/xil_lz4 -cx ./build/xclbin_<xsa_name>_<TARGET mode>/<compress.xclbin> -c <file_name>``
-2. To execute multiple files for compression        : ``./build/xil_lz4 -cx ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -l <files.list>``
+1. To execute single file for compression   : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -cx ./build_dir.<TARGET mode>.<xsa_name>/<compress.xclbin> -c <file_name>``
+2. To execute multiple files for compression        : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -cx ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -l <files.list>``
 
      - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L3/benchmarks/lz4_p2p_compress/utils.mk b/data_compression/L3/benchmarks/lz4_p2p_compress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L3/benchmarks/lz4_p2p_compress/utils.mk
+++ b/data_compression/L3/benchmarks/lz4_p2p_compress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L3/benchmarks/lz4_p2p_decompress/Makefile b/data_compression/L3/benchmarks/lz4_p2p_decompress/Makefile
index ca6642ad8e..6b12ea31fd 100644
--- a/data_compression/L3/benchmarks/lz4_p2p_decompress/Makefile
+++ b/data_compression/L3/benchmarks/lz4_p2p_decompress/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/L3/src/lz4_p2p_dec.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
 CXXFLAGS +=  -D PARALLEL_BLOCK=1 -D C_COMPUTE_UNIT=1 -D D_COMPUTE_UNIT=2 -D OVERLAP_HOST_DEVICE
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -174,11 +181,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -212,21 +214,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -273,12 +275,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L3/benchmarks/lz4_p2p_decompress/README.rst b/data_compression/L3/benchmarks/lz4_p2p_decompress/README.rst
index 63e055aa04..102a0355e5 100644
--- a/data_compression/L3/benchmarks/lz4_p2p_decompress/README.rst
+++ b/data_compression/L3/benchmarks/lz4_p2p_decompress/README.rst
@@ -47,8 +47,8 @@ This application is present in ``L3/benchmarks/lz4_p2p_decompress`` directory. F
 
 The binary host file generated is named as "**xil_lz4**" and it is present in ``./build`` directory.
 
-1. To execute single file for decompression   : ``./build/xil_lz4 -dx ./build/xclbin_<xsa_name>_<TARGET mode>/<decompress.xclbin> -d <file_name>``
-2. To execute multiple files for decompression        : ``./build/xil_lz4 -dx ./build/xclbin_<xsa_name>_<TARGET mode>/decompress.xclbin -l <files.list>``
+1. To execute single file for decompression   : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -dx ./build_dir.<TARGET mode>.<xsa_name>/<decompress.xclbin> -d <file_name>``
+2. To execute multiple files for decompression        : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -dx ./build_dir.<TARGET mode>.<xsa_name>/decompress.xclbin -l <files.list>``
      - ``<files.list>``: Contains various file names with current path
 
 The usage of the generated executable is as follows:
diff --git a/data_compression/L3/benchmarks/lz4_p2p_decompress/utils.mk b/data_compression/L3/benchmarks/lz4_p2p_decompress/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L3/benchmarks/lz4_p2p_decompress/utils.mk
+++ b/data_compression/L3/benchmarks/lz4_p2p_decompress/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L3/demos/gzip_app/Makefile b/data_compression/L3/demos/gzip_app/Makefile
index ab78702b2b..eb71c7ac93 100644
--- a/data_compression/L3/demos/gzip_app/Makefile
+++ b/data_compression/L3/demos/gzip_app/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,16 +114,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D C_COMPUTE_UNIT=2 -D D_COMPUTE_UNIT=8 -D PARALLEL_BLOCK=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/L2/tests/src/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_gzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,12 +287,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress_decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress_decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L3/demos/gzip_app/README.rst b/data_compression/L3/demos/gzip_app/README.rst
index 026454c529..0619a91f26 100644
--- a/data_compression/L3/demos/gzip_app/README.rst
+++ b/data_compression/L3/demos/gzip_app/README.rst
@@ -39,19 +39,19 @@ The host executable generated is named as "**xil_gzip**" and it is generated in
 
 Following is the usage of the executable:
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name>``
-2. To execute single file for decompression           : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -d <compressed file_name>``
-3. To validate single file (compress & decompress)    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -t <input file_name>``
-4. To execute multiple files for compression          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression        : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -dfl <compressed files.list>``
-6. To validate multiple files (compress & decompress) : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -l <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name>``
+2. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -d <compressed file_name>``
+3. To validate single file (compress & decompress)    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -t <input file_name>``
+4. To execute multiple files for compression          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression        : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -dfl <compressed files.list>``
+6. To validate multiple files (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -l <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
 The default design flow is GZIP design to run the ZLIB, enable the switch ``-zlib`` in the command line, as mentioned below:
-``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name> -zlib 1``
+``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name> -zlib 1``
 
-The -xbin option mentioned above is optional, you can provide path to your binary file using -xbin option otherwise it will by default map to ``./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin`` 
+The -xbin option mentioned above is optional, you can provide path to your binary file using -xbin option otherwise it will by default map to ``./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin`` 
 
 
 The usage of the generated executable is as follows:
diff --git a/data_compression/L3/demos/gzip_app/utils.mk b/data_compression/L3/demos/gzip_app/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L3/demos/gzip_app/utils.mk
+++ b/data_compression/L3/demos/gzip_app/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L3/demos/gzip_hbm/Makefile b/data_compression/L3/demos/gzip_hbm/Makefile
index deb34f9749..9bf3070e06 100644
--- a/data_compression/L3/demos/gzip_hbm/Makefile
+++ b/data_compression/L3/demos/gzip_hbm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D C_COMPUTE_UNIT=6 -D D_COMPUTE_UNIT=8 -D PARALLEL_BLOCK=4 -D USE_HBM
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xgzip
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -184,11 +191,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -222,21 +224,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -283,18 +285,18 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress_decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BINARY_CONTAINERS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BINARY_CONTAINERS)
 endif
 else ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
-	XCL_EMULATION_MODE=$(TARGET) ./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BINARY_CONTAINERS)
+	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BINARY_CONTAINERS)
 else
 	$(ECHO) "Please copy the content of sd_card folder and data to an SD Card and run on the board"
 endif
diff --git a/data_compression/L3/demos/gzip_hbm/README.rst b/data_compression/L3/demos/gzip_hbm/README.rst
index 676e07d30b..e5a998218f 100644
--- a/data_compression/L3/demos/gzip_hbm/README.rst
+++ b/data_compression/L3/demos/gzip_hbm/README.rst
@@ -39,19 +39,19 @@ The host executable generated is named as "**xgzip**" and it is generated in ``.
 
 Following is the usage of the executable:
 
-1. To execute single file for compression                      : ``./build/xgzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name>``
-2. To execute single file for decompression                    : ``./build/xgzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -d <compressed file_name>``
-3. To test and validate single file (compress & decompress)    : ``./build/xgzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -t <input file_name>``
-4. To execute multiple files for compression          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression        : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -dfl <compressed files.list>``                       
-6. To validate multiple files (compress & decompress)          : ``./build/xgzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -l <files.list>``
+1. To execute single file for compression                      : ``./build_dir.<TARGET mode>.<xsa_name>/xgzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name>``
+2. To execute single file for decompression                    : ``./build_dir.<TARGET mode>.<xsa_name>/xgzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -d <compressed file_name>``
+3. To test and validate single file (compress & decompress)    : ``./build_dir.<TARGET mode>.<xsa_name>/xgzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -t <input file_name>``
+4. To execute multiple files for compression          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression        : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -dfl <compressed files.list>``                       
+6. To validate multiple files (compress & decompress)          : ``./build_dir.<TARGET mode>.<xsa_name>/xgzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -l <files.list>``
 
                 - ``<files.list>``: Contains various file names with current path
 
 The default design flow is GZIP design to run the ZLIB, enable the switch ``-zlib`` in the command line, as mentioned below:
-``./build/xgzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name> -zlib 1``
+``./build_dir.<TARGET mode>.<xsa_name>/xgzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name> -zlib 1``
 
-The -xbin option mentioned above is optional, you can provide path to your binary file using -xbin option otherwise it will by default map to ``./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin`` 
+The -xbin option mentioned above is optional, you can provide path to your binary file using -xbin option otherwise it will by default map to ``./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin`` 
 
 The usage of the generated executable is as follows:
 
diff --git a/data_compression/L3/demos/gzip_hbm/description.json b/data_compression/L3/demos/gzip_hbm/description.json
index e052813c58..1b4030df2f 100644
--- a/data_compression/L3/demos/gzip_hbm/description.json
+++ b/data_compression/L3/demos/gzip_hbm/description.json
@@ -22,9 +22,22 @@
     ],
     "post_launch": [
         {
+	    "target": "sw_emu",
             "launch_cmd": [
                 "XCL_EMULATION_MODE=$(TARGET) ./run.sh HOST_EXE LIB_DIR $(BINARY_CONTAINERS)"
             ]
+        },
+ 	{
+	    "target": "hw_emu",
+            "launch_cmd": [
+                "XCL_EMULATION_MODE=$(TARGET) ./run.sh HOST_EXE LIB_DIR $(BINARY_CONTAINERS)"
+            ]
+        },
+        {
+	    "target": "hw",
+            "launch_cmd": [
+                "./run.sh HOST_EXE LIB_DIR $(BINARY_CONTAINERS)"
+            ]
         }
     ],
     "match_ini": "false",
diff --git a/data_compression/L3/demos/gzip_hbm/run.sh b/data_compression/L3/demos/gzip_hbm/run.sh
index 64f6e1fca7..57f6cfff6d 100755
--- a/data_compression/L3/demos/gzip_hbm/run.sh
+++ b/data_compression/L3/demos/gzip_hbm/run.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 EXE_FILE=$1
 LIB_PROJ_ROOT=$2
-BIN_PATH=$3
+XCLBIN_FILE=$3
 echo "XCL_MODE=${XCL_EMULATION_MODE}"
-export XILINX_LIBZ_XCLBIN=$BIN_PATH
+export XILINX_LIBZ_XCLBIN=$XCLBIN_FILE
 if [ "${XCL_EMULATION_MODE}" == "sw_emu" ]; 
 then
     echo -e "\n\n-----------Supported Options-----------\n"
@@ -17,11 +17,8 @@ then
     cp $LIB_PROJ_ROOT/common/data/test.list test.list
     echo "sample_run.txt.gz" > gzip_test_decomp.list
     echo "sample_run.txt.xz" > zlib_test_decomp.list
-    for ((i = 0 ; i < 10 ; i++))
-    do
-        find ./reports/ -type f | xargs cat >> sample_run.txt
-    done
-   
+    find ./reports/ -type f | xargs cat >> sample_run.txt
+    
     for ((i = 0 ; i < 10 ; i++)) 
     do
 	cat sample_run.txt >> sample_run.txt${i}
@@ -31,32 +28,32 @@ then
     done
 
 echo -e "\n\n-----------ZLIB Flow-----------\n"
-    cmd1="$EXE_FILE -t sample.txt -zlib 1"
+    cmd1="$EXE_FILE -xbin $XCLBIN_FILE -t sample.txt -zlib 1"
     echo $cmd1
     $cmd1
 
 echo -e "\n\n-----------GZIP Flow (-xbin option)-----------\n"
-    cmd2="$EXE_FILE -xbin $BIN_PATH -t sample.txt"
+    cmd2="$EXE_FILE -xbin $XCLBIN_FILE -t sample.txt"
     echo $cmd2
     $cmd2
 
 echo -e "\n\n-----------GZIP Compress list of files -----------\n"
-    cmd2="$EXE_FILE -xbin $BIN_PATH -cfl ./test.list"
+    cmd2="$EXE_FILE -xbin $XCLBIN_FILE -cfl ./test.list"
     echo $cmd2
     $cmd2
 
 echo -e "\n\n-----------ZLIB Compress list of files -----------\n"
-    cmd2="$EXE_FILE -xbin $BIN_PATH -cfl ./test.list -zlib 1"
+    cmd2="$EXE_FILE -xbin $XCLBIN_FILE -cfl ./test.list -zlib 1"
     echo $cmd2
     $cmd2
 
 echo -e "\n\n-----------GZIP Decompress list of files -----------\n"
-    cmd2="$EXE_FILE -xbin $BIN_PATH -dfl ./gzip_test_decomp.list"
+    cmd2="$EXE_FILE -xbin $XCLBIN_FILE -dfl ./gzip_test_decomp.list"
     echo $cmd2
     $cmd2
 
 echo -e "\n\n-----------ZLIB Decompress list of files -----------\n"
-    cmd2="$EXE_FILE -xbin $BIN_PATH -dfl ./zlib_test_decomp.list"
+    cmd2="$EXE_FILE -xbin $XCLBIN_FILE -dfl ./zlib_test_decomp.list"
     echo $cmd2
     $cmd2
 fi
diff --git a/data_compression/L3/demos/gzip_hbm/utils.mk b/data_compression/L3/demos/gzip_hbm/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L3/demos/gzip_hbm/utils.mk
+++ b/data_compression/L3/demos/gzip_hbm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L3/demos/lz4_app/Makefile b/data_compression/L3/demos/lz4_app/Makefile
index f4ee5cf768..4ace77a3f9 100644
--- a/data_compression/L3/demos/lz4_app/Makefile
+++ b/data_compression/L3/demos/lz4_app/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/lz4OCLHost.cpp $(XFLIB_DIR)/common/libs/compress/lz4Base.cpp $(XFLIB_DIR)/common/libs/compress/lz4App.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/xxhash/xxhash.c 
 CXXFLAGS +=  -D PARALLEL_BLOCK=8 -D C_COMPUTE_UNIT=2 -D D_COMPUTE_UNIT=2 -D OVERLAP_HOST_DEVICE
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/xxhash -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_lz4
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,12 +276,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress_decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	./run.sh $(EXE_FILE) $(XFLIB_DIR) $(BUILD_DIR)/compress_decompress.xclbin
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L3/demos/lz4_app/README.rst b/data_compression/L3/demos/lz4_app/README.rst
index 55d9d0d7ca..dd042b0660 100644
--- a/data_compression/L3/demos/lz4_app/README.rst
+++ b/data_compression/L3/demos/lz4_app/README.rst
@@ -17,12 +17,12 @@ This application is present in ``L3/demos/lz4_app`` directory. Follow build inst
 
 The binary host file generated is named as "**xil_lz4**" and it is present in ``./build`` directory.
 
-1. To execute single file for compression 	: ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -c <file_name>``
-2. To execute single file for decompression	: ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -d <file_name.lz4>``
-3. To validate single file (compress & decompress) : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -t <input file_name>``
-4. To execute multiple files for compression        : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression        : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -dfl <compressed files.list>``   
-6. To validate multiple files files (compress & decompress)		: ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -l <files.list>``
+1. To execute single file for compression 	: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -c <file_name>``
+2. To execute single file for decompression	: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -d <file_name.lz4>``
+3. To validate single file (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -t <input file_name>``
+4. To execute multiple files for compression        : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression        : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -dfl <compressed files.list>``   
+6. To validate multiple files files (compress & decompress)		: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -l <files.list>``
 	
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/L3/demos/lz4_app/utils.mk b/data_compression/L3/demos/lz4_app/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L3/demos/lz4_app/utils.mk
+++ b/data_compression/L3/demos/lz4_app/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L3/tests/zlib_compress_sb/Makefile b/data_compression/L3/tests/zlib_compress_sb/Makefile
index 237412250f..b9311954fb 100644
--- a/data_compression/L3/tests/zlib_compress_sb/Makefile
+++ b/data_compression/L3/tests/zlib_compress_sb/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D C_COMPUTE_UNIT=6
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlibc
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -168,11 +175,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -206,21 +208,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -267,12 +269,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/compress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L3/tests/zlib_compress_sb/utils.mk b/data_compression/L3/tests/zlib_compress_sb/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L3/tests/zlib_compress_sb/utils.mk
+++ b/data_compression/L3/tests/zlib_compress_sb/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/L3/tests/zlib_decompress_sb/Makefile b/data_compression/L3/tests/zlib_decompress_sb/Makefile
index 045d85204c..60a0333eb3 100644
--- a/data_compression/L3/tests/zlib_decompress_sb/Makefile
+++ b/data_compression/L3/tests/zlib_decompress_sb/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,16 +114,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(XFLIB_DIR)/common/libs/compress/gzipOCLHost.cpp $(XFLIB_DIR)/common/libs/compress/gzipBase.cpp $(XFLIB_DIR)/common/libs/compress/gzipApp.cpp $(XFLIB_DIR)/common/libs/compress/compressApp.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/crc32.c $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7/adler32.c 
 CXXFLAGS +=  -D D_COMPUTE_UNIT=1 -D PARALLEL_BLOCK=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/thirdParty/zlib-1.2.7 -I $(XFLIB_DIR)/common/libs/compress/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_zlibc
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,11 +188,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -219,21 +221,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -280,12 +282,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/decompress.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/data_compression/L3/tests/zlib_decompress_sb/utils.mk b/data_compression/L3/tests/zlib_decompress_sb/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/data_compression/L3/tests/zlib_decompress_sb/utils.mk
+++ b/data_compression/L3/tests/zlib_decompress_sb/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/data_compression/docs/conf.py b/data_compression/docs/conf.py
index d43cfcd8a9..45e1b8d90c 100644
--- a/data_compression/docs/conf.py
+++ b/data_compression/docs/conf.py
@@ -43,9 +43,9 @@
 author = 'Xilinx'
 
 # The short X.Y version
-version = '2021.2'
+version = '2022.1'
 # The full version, including alpha/beta/rc tags
-release = '2021.2 release'
+release = '2022.1 release'
 html_last_updated_fmt = '%B %d, %Y'
 
 rst_epilog = """
diff --git a/data_compression/docs/source/L2/gzip.rst b/data_compression/docs/source/L2/gzip.rst
index bbfa668b88..84cc86eca1 100644
--- a/data_compression/docs/source/L2/gzip.rst
+++ b/data_compression/docs/source/L2/gzip.rst
@@ -11,12 +11,12 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name>``
-2. To execute single file for decompression           : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -d <compressed file_name>``
-3. To validate single file (compress & decompress)    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -t <input file_name>``
-4. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -dfl <compressed files.list>``
-6. To validate multiple files (compress & decompress) : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -l <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name>``
+2. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -d <compressed file_name>``
+3. To validate single file (compress & decompress)    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -t <input file_name>``
+4. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -dfl <compressed files.list>``
+6. To validate multiple files (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -l <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/gzipc.rst b/data_compression/docs/source/L2/gzipc.rst
index b03c63efea..99eeba9d43 100644
--- a/data_compression/docs/source/L2/gzipc.rst
+++ b/data_compression/docs/source/L2/gzipc.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/gzipc_16KB.rst b/data_compression/docs/source/L2/gzipc_16KB.rst
index f11218eed1..6f7708b41c 100644
--- a/data_compression/docs/source/L2/gzipc_16KB.rst
+++ b/data_compression/docs/source/L2/gzipc_16KB.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/gzipc_8KB.rst b/data_compression/docs/source/L2/gzipc_8KB.rst
index fb508aafc6..afd4c0282e 100644
--- a/data_compression/docs/source/L2/gzipc_8KB.rst
+++ b/data_compression/docs/source/L2/gzipc_8KB.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/gzipc_block_mm.rst b/data_compression/docs/source/L2/gzipc_block_mm.rst
index a2ebc60b8f..d149872b29 100644
--- a/data_compression/docs/source/L2/gzipc_block_mm.rst
+++ b/data_compression/docs/source/L2/gzipc_block_mm.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/gzipc_static.rst b/data_compression/docs/source/L2/gzipc_static.rst
index 5dc613cdf8..afb7f4bc7d 100644
--- a/data_compression/docs/source/L2/gzipc_static.rst
+++ b/data_compression/docs/source/L2/gzipc_static.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_gzip**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name>``
-2. To execute multiple files for compression    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/lz4.rst b/data_compression/docs/source/L2/lz4.rst
index 67b8e2afb0..21abac0d23 100644
--- a/data_compression/docs/source/L2/lz4.rst
+++ b/data_compression/docs/source/L2/lz4.rst
@@ -59,12 +59,12 @@ Software & Hardware
 Executable Usage
 ----------------
  
-1. To execute single file for compression             : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -c <file_name>``
-2. To execute single file for decompression           : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -d <file_name.lz4>``
-3. To validate single file (compress & decompress)    : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -t <file_name>``
-4. To execute multiple files for compression     : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -cfl <files.list>``
-5. To execute multiple files for decompression     : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress xclbin> -dfl <compressed files.list>``
-6. To validate multiple files (compress and decompress) : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress xclbin> -l <files.list>``  
+1. To execute single file for compression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -c <file_name>``
+2. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -d <file_name.lz4>``
+3. To validate single file (compress & decompress)    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -t <file_name>``
+4. To execute multiple files for compression     : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -cfl <files.list>``
+5. To execute multiple files for decompression     : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress xclbin> -dfl <compressed files.list>``
+6. To validate multiple files (compress and decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress xclbin> -l <files.list>``  
            
       - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/lz4_compress_streaming.rst b/data_compression/docs/source/L2/lz4_compress_streaming.rst
index 27ef2a58bf..e27172ec17 100644
--- a/data_compression/docs/source/L2/lz4_compress_streaming.rst
+++ b/data_compression/docs/source/L2/lz4_compress_streaming.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as **xil_lz4_streaming** and it is prese
 Executable Usage
 ----------------
 
-1. To execute single file for compression             : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_streaming.xclbin -c <input file_name>``
-2. To execute multiple files for compression    : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_streaming.xclbin -cfl <files.list>``
+1. To execute single file for compression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_streaming.xclbin -c <input file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_streaming.xclbin -cfl <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/lz4_dec_streaming_parallelByte8.rst b/data_compression/docs/source/L2/lz4_dec_streaming_parallelByte8.rst
index ec25afa8a2..e1a2de6e60 100644
--- a/data_compression/docs/source/L2/lz4_dec_streaming_parallelByte8.rst
+++ b/data_compression/docs/source/L2/lz4_dec_streaming_parallelByte8.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as **lz4** and it is present in ``./buil
 Executable Usage
 ----------------
 
-1. To execute single file for decompression             : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress_streaming.xclbin -d <input file_name>``
-2. To execute multiple files for decompression    : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress_streaming.xclbin -dfl <files.list>``
+1. To execute single file for decompression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress_streaming.xclbin -d <input file_name>``
+2. To execute multiple files for decompression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress_streaming.xclbin -dfl <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/lz4_streaming.rst b/data_compression/docs/source/L2/lz4_streaming.rst
index 2ae5daf859..002ce6b217 100644
--- a/data_compression/docs/source/L2/lz4_streaming.rst
+++ b/data_compression/docs/source/L2/lz4_streaming.rst
@@ -44,12 +44,12 @@ Note: Overall throughput can still be increased with multiple compute units.
 Executable Usage
 ~~~~~~~~~~~~~~~
 
-1. To execute single file for compression 	: ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -c <file_name>``
-2. To execute single file for decompression	: ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -d <file_name.lz4>``
-3. To validate single file (compress & decompress) : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -t <file_name>``
-4. To execute multiple files for compression           : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression          : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -dfl <compressed files.list>``   
-6. To validate multiple files (compress & decompress)      : ``./build/xil_lz4_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -l <files.list>``  
+1. To execute single file for compression 	: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -c <file_name>``
+2. To execute single file for decompression	: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -d <file_name.lz4>``
+3. To validate single file (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -t <file_name>``
+4. To execute multiple files for compression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -dfl <compressed files.list>``   
+6. To validate multiple files (compress & decompress)      : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -l <files.list>``  
 	
       - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/snappy.rst b/data_compression/docs/source/L2/snappy.rst
index 90b38a02c0..1b75349d9e 100644
--- a/data_compression/docs/source/L2/snappy.rst
+++ b/data_compression/docs/source/L2/snappy.rst
@@ -86,12 +86,12 @@ Hardware
 Executable Usage
 ----------------
  
-1. To execute single file for compression             : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -c <file_name>``
-2. To execute single file for decompression           : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -d <file_name.snappy>``
-3. To validate single file (compress & decompress)    : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -t <file_name>``
-4. To execute multiple files for compression     : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -cfl <files.list>``
-5. To execute multiple files for decompression     : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -dfl <compressed files.list>``
-6. To validate multiple files (compress and decompress) : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -l <files.list>``  
+1. To execute single file for compression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -c <file_name>``
+2. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -d <file_name.snappy>``
+3. To validate single file (compress & decompress)    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -t <file_name>``
+4. To execute multiple files for compression     : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -cfl <files.list>``
+5. To execute multiple files for decompression     : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -dfl <compressed files.list>``
+6. To validate multiple files (compress and decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -l <files.list>``  
                
       - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/snappy_compress.rst b/data_compression/docs/source/L2/snappy_compress.rst
index 550d7830c5..7e4a48e311 100644
--- a/data_compression/docs/source/L2/snappy_compress.rst
+++ b/data_compression/docs/source/L2/snappy_compress.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as **xil_snappy** and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression             : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <input file_name>``
-2. To execute multiple files for compression    : ``./build/xil_snappy -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for compression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <input file_name>``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/snappy_dec_streaming_parallelByte8.rst b/data_compression/docs/source/L2/snappy_dec_streaming_parallelByte8.rst
index 1a395855dc..5d2c69f3df 100644
--- a/data_compression/docs/source/L2/snappy_dec_streaming_parallelByte8.rst
+++ b/data_compression/docs/source/L2/snappy_dec_streaming_parallelByte8.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as **xil_snappy_decompress_streaming** a
 Executable Usage
 ----------------
 
-1. To execute single file for decompression             : ``./build/xil_snappy_decompress_streaming  -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress_streaming.xclbin -d <input file_name>``
-2. To execute multiple files for decompression    : ``./build/xil_snappy_decompress_streaming  -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/decompress_streaming.xclbin -dfl <files.list>``
+1. To execute single file for decompression             : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_decompress_streaming  -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress_streaming.xclbin -d <input file_name>``
+2. To execute multiple files for decompression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_decompress_streaming  -xbin ./build_dir.<TARGET mode>.<xsa_name>/decompress_streaming.xclbin -dfl <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/snappy_streaming.rst b/data_compression/docs/source/L2/snappy_streaming.rst
index c1eb4eda8d..dd508109b4 100644
--- a/data_compression/docs/source/L2/snappy_streaming.rst
+++ b/data_compression/docs/source/L2/snappy_streaming.rst
@@ -44,12 +44,12 @@ Note: Overall throughput can still be increased with multiple compute units.
 Executable Usage
 ~~~~~~~~~~~~~~~
                                                                                                                                                              
-1. To execute single file for compression   : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -c <file_name>``
-2. To execute single file for decompression : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -d <file_name.snappy>``
-3. To validate single file (compress & decompress) : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin> -t <files_name>``
-4. To execute multiple files for compression           : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression          : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -dfl <compressed files.list>``   
-6. To validate multiple files (compress & decompress)      : ``./build/xil_snappy_streaming -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress_streaming.xclbin -l <files.list>``  
+1. To execute single file for compression   : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -c <file_name>``
+2. To execute single file for decompression : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -d <file_name.snappy>``
+3. To validate single file (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin> -t <files_name>``
+4. To execute multiple files for compression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -dfl <compressed files.list>``   
+6. To validate multiple files (compress & decompress)      : ``./build_dir.<TARGET mode>.<xsa_name>/xil_snappy_streaming -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress_streaming.xclbin -l <files.list>``  
         
       - ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/zlibc.rst b/data_compression/docs/source/L2/zlibc.rst
index 4bb59c7133..1afb3ab74c 100644
--- a/data_compression/docs/source/L2/zlibc.rst
+++ b/data_compression/docs/source/L2/zlibc.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/zlibc_16KB.rst b/data_compression/docs/source/L2/zlibc_16KB.rst
index 8e463b949d..c2c4f8e501 100644
--- a/data_compression/docs/source/L2/zlibc_16KB.rst
+++ b/data_compression/docs/source/L2/zlibc_16KB.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/zlibc_8KB.rst b/data_compression/docs/source/L2/zlibc_8KB.rst
index 2ce564fc5d..aa491c8227 100644
--- a/data_compression/docs/source/L2/zlibc_8KB.rst
+++ b/data_compression/docs/source/L2/zlibc_8KB.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/zlibc_static.rst b/data_compression/docs/source/L2/zlibc_static.rst
index cdaee09baf..5ccc8a55a7 100644
--- a/data_compression/docs/source/L2/zlibc_static.rst
+++ b/data_compression/docs/source/L2/zlibc_static.rst
@@ -11,8 +11,8 @@ The binary host file generated is named as "**xil_zlib**" and it is present in `
 Executable Usage
 ----------------
 
-1. To execute single file for compression 	    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <file_name> -zlib 1``
-2. To execute multiple files for compression    : ``./build/xil_zlib -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list> -zlib 1``
+1. To execute single file for compression 	    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <file_name> -zlib 1``
+2. To execute multiple files for compression    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list> -zlib 1``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/zstd_quadcore_compress.rst b/data_compression/docs/source/L2/zstd_quadcore_compress.rst
index 0c7f25e522..d32d00f0bd 100644
--- a/data_compression/docs/source/L2/zstd_quadcore_compress.rst
+++ b/data_compression/docs/source/L2/zstd_quadcore_compress.rst
@@ -37,8 +37,8 @@ Average Compression Ratio	    2.68x (Silesia Benchmark)
 
 Executable Usage:
 
-1. To execute single file for decompression           : ``./build/xil_zlib -cx ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -c <compressed file_name>``
-2. To decompress multiple files                       : ``./build/xil_zlib -cx ./build/xclbin_<xsa_name>_<TARGET mode>/compress.xclbin -cfl <files.list>``
+1. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -cx ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -c <compressed file_name>``
+2. To decompress multiple files                       : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -cx ./build_dir.<TARGET mode>.<xsa_name>/compress.xclbin -cfl <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L2/zstdd_32KB.rst b/data_compression/docs/source/L2/zstdd_32KB.rst
index 276a59be1d..147a17f129 100644
--- a/data_compression/docs/source/L2/zstdd_32KB.rst
+++ b/data_compression/docs/source/L2/zstdd_32KB.rst
@@ -38,8 +38,8 @@ Performance Data
 
 Executable Usage:
 
-1. To execute single file for decompression           : ``./build/xil_zlib -dx ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -d <compressed file_name>``
-2. To decompress multiple files                       : ``./build/xil_zlib -dx ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -l <files.list>``
+1. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -dx ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -d <compressed file_name>``
+2. To decompress multiple files                       : ``./build_dir.<TARGET mode>.<xsa_name>/xil_zlib -dx ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -l <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/source/L3/gzip_app.rst b/data_compression/docs/source/L3/gzip_app.rst
index a7a97143a3..ef134d8128 100644
--- a/data_compression/docs/source/L3/gzip_app.rst
+++ b/data_compression/docs/source/L3/gzip_app.rst
@@ -39,19 +39,19 @@ The host executable generated is named as "**xil_gzip**" and it is generated in
 
 Following is the usage of the executable:
 
-1. To execute single file for compression 	          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name>``
-2. To execute single file for decompression           : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -d <compressed file_name>``
-3. To validate single file (compress & decompress)    : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -t <input file_name>``
-4. To execute multiple files for compression          : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression        : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -dfl <compressed files.list>``
-6. To validate multiple files (compress & decompress) : ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -l <files.list>``
+1. To execute single file for compression 	          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name>``
+2. To execute single file for decompression           : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -d <compressed file_name>``
+3. To validate single file (compress & decompress)    : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -t <input file_name>``
+4. To execute multiple files for compression          : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression        : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -dfl <compressed files.list>``
+6. To validate multiple files (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -l <files.list>``
 
 	- ``<files.list>``: Contains various file names with current path
 
 The default design flow is GZIP design to run the ZLIB, enable the switch ``-zlib`` in the command line, as mentioned below:
-``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name> -zlib 1``
+``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name> -zlib 1``
 
-The -xbin option mentioned above is optional, you can provide path to your binary file using -xbin option otherwise it will by default map to ``./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin`` 
+The -xbin option mentioned above is optional, you can provide path to your binary file using -xbin option otherwise it will by default map to ``./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin`` 
 
 
 The usage of the generated executable is as follows:
diff --git a/data_compression/docs/source/L3/lz4_app.rst b/data_compression/docs/source/L3/lz4_app.rst
index 55d9d0d7ca..dd042b0660 100644
--- a/data_compression/docs/source/L3/lz4_app.rst
+++ b/data_compression/docs/source/L3/lz4_app.rst
@@ -17,12 +17,12 @@ This application is present in ``L3/demos/lz4_app`` directory. Follow build inst
 
 The binary host file generated is named as "**xil_lz4**" and it is present in ``./build`` directory.
 
-1. To execute single file for compression 	: ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -c <file_name>``
-2. To execute single file for decompression	: ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -d <file_name.lz4>``
-3. To validate single file (compress & decompress) : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -t <input file_name>``
-4. To execute multiple files for compression        : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -cfl <files.list>``
-5. To execute multiple files for decompression        : ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -dfl <compressed files.list>``   
-6. To validate multiple files files (compress & decompress)		: ``./build/xil_lz4 -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/<compress_decompress.xclbin> -l <files.list>``
+1. To execute single file for compression 	: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -c <file_name>``
+2. To execute single file for decompression	: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -d <file_name.lz4>``
+3. To validate single file (compress & decompress) : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -t <input file_name>``
+4. To execute multiple files for compression        : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -cfl <files.list>``
+5. To execute multiple files for decompression        : ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -dfl <compressed files.list>``   
+6. To validate multiple files files (compress & decompress)		: ``./build_dir.<TARGET mode>.<xsa_name>/xil_lz4 -xbin ./build_dir.<TARGET mode>.<xsa_name>/<compress_decompress.xclbin> -l <files.list>``
 	
 	- ``<files.list>``: Contains various file names with current path
 
diff --git a/data_compression/docs/tutorial.rst b/data_compression/docs/tutorial.rst
index 453c4b0ad2..a6ed80dd29 100644
--- a/data_compression/docs/tutorial.rst
+++ b/data_compression/docs/tutorial.rst
@@ -48,9 +48,12 @@ L3 API are more scalable solutions to achieve maximum performance with optimized
 
 This demo is aimed at showcasing Xilinx Alveo U250 acceleration of Gzip_app and Xilinx Alveo U50 (HBM Platform) acceleration of Gzip_hbm  for both compression and decompression, it also supports Zlib with a host argument switch.
 
-Tested Tool: 2021.2
-Tested XRT:  2021.2
-Tested XSA:  xilinx_u50_gen3x16_xdma_201920_3
+.. code-block:: shell
+   
+   Tested Tool:  2022.1
+   Tested XRT :  2022.1
+   Tested XSA :  xilinx_u250_gen3x16_xdma_4_1_202210_1
+   Tested XSA :  xilinx_u50_gen3x16_xdma_5_202210_1
 
 +---------------------------------------------------------------------------------------------------------+--------------------------------+-------------------+----------+---------+-------+-------+--------+------------------------------------------------+
 | Flow                                                                                                    |Target Compute units            |Compression-Ratio  |  FMax    |  LUT    |  BRAM |  URAM | Memory | Througput                                      |
@@ -63,29 +66,29 @@ Tested XSA:  xilinx_u50_gen3x16_xdma_201920_3
 
 This application is present under ``L3/demos`` directory. Follow build instructions to generate executable and binary.
 
-The host executable generated is named as "**xil_gzip**" and it is generated in ``./build`` directory.
+The host executable generated is named as "**xil_gzip**" and it is generated in ``./build_dir.<TARGET mode>.<xsa_name>/`` directory.
 
 Executable Usage
 ----------------
 
 1. To execute single file for compression               
-                                          ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name>``
+                                          ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name>``
 
 2. To execute single file for decompression           :
-                                            ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -d <compressed file_name>``
+                                            ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin  -d <compressed file_name>``
 
 3. To validate single file (compress & decompress) 
-                                            ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -t <input file_name>``
+                                            ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -t <input file_name>``
 4. To execute multiple files for compression   
-                                            ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -cfl <files.list>``
+                                            ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -cfl <files.list>``
 5. To execute multiple files for decompression   
-                                             ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -dfl <compressed files.list>``
+                                             ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -dfl <compressed files.list>``
 6. To validate multiple files (compress & decompress) 
-                                             ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -l <files.list>``
+                                             ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -l <files.list>``
 
     - ``<files.list>``: Contains various file names with current path
 
-The default design flow is GZIP design to run the ZLIB, enable the switch ``-zlib`` in the command line, as mentioned below: ``./build/xil_gzip -xbin ./build/xclbin_<xsa_name>_<TARGET mode>/compress_decompress.xclbin -c <input file_name> -zlib 1``
+The default design flow is GZIP design to run the ZLIB, enable the switch ``-zlib`` in the command line, as mentioned below: ``./build_dir.<TARGET mode>.<xsa_name>/xil_gzip -xbin ./build_dir.<TARGET mode>.<xsa_name>/compress_decompress.xclbin -c <input file_name> -zlib 1``
 
 
 L2 API
@@ -129,7 +132,7 @@ Commands to Run L2 and L3 cases
 
     cd L2/tests/    
     # build and run one of the following using U250 platform
-    make run TARGET=sw_emu DEVICE=/path/to/xilinx_u250_gen3x16_xdma_3_1_202020_1/
+    make run TARGET=sw_emu PLATFORM=/path/to/xilinx_u250_gen3x16_xdma_4_1_202210_1/
     
     # delete generated files
     make cleanall
@@ -164,7 +167,7 @@ Command to Run L1 cases
     cd L1/tests/
     
     make run CSIM=1 CSYNTH=0 COSIM=0 VIVADO_SYN=0 VIVADO_IMPL=0 \
-        DEVICE=/path/to/xilinx_u250_gen3x16_xdma_3_1_202020_1/
+        PLATFORM=/path/to/xilinx_u250_gen3x16_xdma_4_1_202210_1/
 
 Test control variables are:
 
diff --git a/dsp/Jenkinsfile b/dsp/Jenkinsfile
index 381833e48d..49f272f37f 100644
--- a/dsp/Jenkinsfile
+++ b/dsp/Jenkinsfile
@@ -1,7 +1,7 @@
 @Library('pipeline-library')_
 
-VitisLibPipeline (branch: 'next', libname: 'xf_dsp', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_hw_run:vitis_aie_sim:vitis_aie_x86sim',
-                  upstream_dependencies: 'xf_utils_hw,next,../utils; dsplib_internal_scripts,main,../dsplib_internal_scripts',
-                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest',
+VitisLibPipeline (branch: 'main', libname: 'xf_dsp', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_hw_run:vitis_aie_sim:vitis_aie_x86sim',
+                  upstream_dependencies: 'xf_utils_hw,main,../utils; dsplib_internal_scripts,main,../dsplib_internal_scripts',
+                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released',
                   post_launch: '../dsplib_internal_scripts/scripts/create_html_report.sh')
 
diff --git a/dsp/L2/include/aie/fft_bufs.h b/dsp/L2/include/aie/fft_bufs.h
index 6682277ae0..2c7da84845 100644
--- a/dsp/L2/include/aie/fft_bufs.h
+++ b/dsp/L2/include/aie/fft_bufs.h
@@ -19,56 +19,56 @@
 #include "fft_com_inc.h"
 
 // Inter-rank temporary storage buffers
-alignas(32) extern cint32_t fft_16_tmp1[FFT16_SIZE];
-alignas(32) extern cint32_t fft_16_tmp2[FFT16_SIZE];
-alignas(32) extern cint32_t fft_32_tmp1[FFT32_SIZE];
-alignas(32) extern cint32_t fft_32_tmp2[FFT32_SIZE];
-alignas(32) extern cint32_t fft_64_tmp1[FFT64_SIZE];
-alignas(32) extern cint32_t fft_64_tmp2[FFT64_SIZE];
-alignas(32) extern cint32_t fft_128_tmp1[FFT128_SIZE];
-alignas(32) extern cint32_t fft_128_tmp2[FFT128_SIZE];
-alignas(32) extern cint32_t fft_256_tmp1[FFT256_SIZE];
-alignas(32) extern cint32_t fft_256_tmp2[FFT256_SIZE];
-alignas(32) extern cint32_t fft_512_tmp1[FFT512_SIZE];
-alignas(32) extern cint32_t fft_512_tmp2[FFT512_SIZE];
-alignas(32) extern cint32_t fft_1024_tmp1[FFT1024_SIZE];
-alignas(32) extern cint32_t fft_1024_tmp2[FFT1024_SIZE];
-alignas(32) extern cint32_t fft_2048_tmp1[FFT2048_SIZE];
-alignas(32) extern cint32_t fft_2048_tmp2[FFT2048_SIZE];
-alignas(32) extern cint32_t fft_4096_tmp1[FFT4096_SIZE];
-alignas(32) extern cint32_t fft_4096_tmp2[FFT4096_SIZE];
+alignas(32) cint32_t fft_16_tmp1[FFT16_SIZE];
+alignas(32) cint32_t fft_16_tmp2[FFT16_SIZE];
+alignas(32) cint32_t fft_32_tmp1[FFT32_SIZE];
+alignas(32) cint32_t fft_32_tmp2[FFT32_SIZE];
+alignas(32) cint32_t fft_64_tmp1[FFT64_SIZE];
+alignas(32) cint32_t fft_64_tmp2[FFT64_SIZE];
+alignas(32) cint32_t fft_128_tmp1[FFT128_SIZE];
+alignas(32) cint32_t fft_128_tmp2[FFT128_SIZE];
+alignas(32) cint32_t fft_256_tmp1[FFT256_SIZE];
+alignas(32) cint32_t fft_256_tmp2[FFT256_SIZE];
+alignas(32) cint32_t fft_512_tmp1[FFT512_SIZE];
+alignas(32) cint32_t fft_512_tmp2[FFT512_SIZE];
+alignas(32) cint32_t fft_1024_tmp1[FFT1024_SIZE];
+alignas(32) cint32_t fft_1024_tmp2[FFT1024_SIZE];
+alignas(32) cint32_t fft_2048_tmp1[FFT2048_SIZE];
+alignas(32) cint32_t fft_2048_tmp2[FFT2048_SIZE];
+alignas(32) cint32_t fft_4096_tmp1[FFT4096_SIZE];
+alignas(32) cint32_t fft_4096_tmp2[FFT4096_SIZE];
 
 // Twiddle tables
 // Half-size integer tables
 // This is an optimization possible because in a radix4 unit, the second rank butterflies use the same
 // twiddle just 90 degrees (minus j) rotated. Minus J rotation is supported by hw, so only the first
 // quadrant need be stores - the other quadrant can be extracted by minus j rotation.
-alignas(32) extern const cint16_t fft_lut_tw1_half[1];
-alignas(32) extern const cint16_t fft_lut_tw2_half[1];
-alignas(32) extern const cint16_t fft_lut_tw4_half[2];
-alignas(32) extern const cint16_t fft_lut_tw8_half[4];
-alignas(32) extern const cint16_t fft_lut_tw16_half[FFT_16 / 2];
-alignas(32) extern const cint16_t fft_lut_tw32_half[FFT_32 / 2];
-alignas(32) extern const cint16_t fft_lut_tw64_half[FFT_64 / 2];
-alignas(32) extern const cint16_t fft_lut_tw128_half[FFT_128 / 2];
-alignas(32) extern const cint16_t fft_lut_tw256_half[FFT_256 / 2];
-alignas(32) extern const cint16_t fft_lut_tw512_half[FFT_512 / 2];
-alignas(32) extern const cint16_t fft_lut_tw1024_half[FFT_1024 / 2];
-alignas(32) extern const cint16_t fft_lut_tw2048_half[FFT_2048 / 2];
+alignas(32) cint16_t fft_lut_tw2_half[1];
+alignas(32) cint16_t fft_lut_tw4_half[2];
+alignas(32) cint16_t fft_lut_tw8_half[4];
+alignas(32) cint16_t fft_lut_tw1_half[1];
+alignas(32) cint16_t fft_lut_tw16_half[FFT_16 / 2];
+alignas(32) cint16_t fft_lut_tw32_half[FFT_32 / 2];
+alignas(32) cint16_t fft_lut_tw64_half[FFT_64 / 2];
+alignas(32) cint16_t fft_lut_tw128_half[FFT_128 / 2];
+alignas(32) cint16_t fft_lut_tw256_half[FFT_256 / 2];
+alignas(32) cint16_t fft_lut_tw512_half[FFT_512 / 2];
+alignas(32) cint16_t fft_lut_tw1024_half[FFT_1024 / 2];
+alignas(32) cint16_t fft_lut_tw2048_half[FFT_2048 / 2];
 
 // Full (2 quadrant) integer tables
-alignas(32) extern const cint16_t fft_lut_tw1[FFT_1];
-alignas(32) extern const cint16_t fft_lut_tw2[FFT_2];
-alignas(32) extern const cint16_t fft_lut_tw4[FFT_4];
-alignas(32) extern const cint16_t fft_lut_tw8[FFT_8];
-alignas(32) extern const cint16_t fft_lut_tw16[FFT_16];
-alignas(32) extern const cint16_t fft_lut_tw32[FFT_32];
-alignas(32) extern const cint16_t fft_lut_tw64[FFT_64];
-alignas(32) extern const cint16_t fft_lut_tw128[FFT_128];
-alignas(32) extern const cint16_t fft_lut_tw256[FFT_256];
-alignas(32) extern const cint16_t fft_lut_tw512[FFT_512];
-alignas(32) extern const cint16_t fft_lut_tw1024[FFT_1024];
-alignas(32) extern const cint16_t fft_lut_tw2048[FFT_2048];
+alignas(32) cint16_t fft_lut_tw1[FFT_1];
+alignas(32) cint16_t fft_lut_tw2[FFT_2];
+alignas(32) cint16_t fft_lut_tw4[FFT_4];
+alignas(32) cint16_t fft_lut_tw8[FFT_8];
+alignas(32) cint16_t fft_lut_tw16[FFT_16];
+alignas(32) cint16_t fft_lut_tw32[FFT_32];
+alignas(32) cint16_t fft_lut_tw64[FFT_64];
+alignas(32) cint16_t fft_lut_tw128[FFT_128];
+alignas(32) cint16_t fft_lut_tw256[FFT_256];
+alignas(32) cint16_t fft_lut_tw512[FFT_512];
+alignas(32) cint16_t fft_lut_tw1024[FFT_1024];
+alignas(32) cint16_t fft_lut_tw2048[FFT_2048];
 
 // Full (2 quadrant) float tables.
 // Float cannot use the one quadrant trick because float cannot use radix4 functions.
@@ -76,17 +76,17 @@ alignas(32) extern const cint16_t fft_lut_tw2048[FFT_2048];
 // This means that the acc registers are unavailable to store data in float and this means
 // there is not the capacity in registers required for the storage of inter-rank values in a radix 4
 // stage, hence float uses radix2.
-alignas(32) extern const cfloat fft_lut_tw1_cfloat[FFT_1];
-alignas(32) extern const cfloat fft_lut_tw2_cfloat[FFT_2];
-alignas(32) extern const cfloat fft_lut_tw4_cfloat[FFT_4];
-alignas(32) extern const cfloat fft_lut_tw8_cfloat[FFT_8];
-alignas(32) extern const cfloat fft_lut_tw16_cfloat[FFT_16];
-alignas(32) extern const cfloat fft_lut_tw32_cfloat[FFT_32];
-alignas(32) extern const cfloat fft_lut_tw64_cfloat[FFT_64];
-alignas(32) extern const cfloat fft_lut_tw128_cfloat[FFT_128];
-alignas(32) extern const cfloat fft_lut_tw256_cfloat[FFT_256];
-alignas(32) extern const cfloat fft_lut_tw512_cfloat[FFT_512];
-alignas(32) extern const cfloat fft_lut_tw1024_cfloat[FFT_1024];
-alignas(32) extern const cfloat fft_lut_tw2048_cfloat[FFT_2048];
+alignas(32) cfloat fft_lut_tw1_cfloat[FFT_1];
+alignas(32) cfloat fft_lut_tw2_cfloat[FFT_2];
+alignas(32) cfloat fft_lut_tw4_cfloat[FFT_4];
+alignas(32) cfloat fft_lut_tw8_cfloat[FFT_8];
+alignas(32) cfloat fft_lut_tw16_cfloat[FFT_16];
+alignas(32) cfloat fft_lut_tw32_cfloat[FFT_32];
+alignas(32) cfloat fft_lut_tw64_cfloat[FFT_64];
+alignas(32) cfloat fft_lut_tw128_cfloat[FFT_128];
+alignas(32) cfloat fft_lut_tw256_cfloat[FFT_256];
+alignas(32) cfloat fft_lut_tw512_cfloat[FFT_512];
+alignas(32) cfloat fft_lut_tw1024_cfloat[FFT_1024];
+alignas(32) cfloat fft_lut_tw2048_cfloat[FFT_2048];
 
 #endif /* __FFT_BUFS_H__ */
diff --git a/dsp/L2/include/aie/fir_decimate_asym_graph.hpp b/dsp/L2/include/aie/fir_decimate_asym_graph.hpp
index c5dbad9060..769ebd337f 100644
--- a/dsp/L2/include/aie/fir_decimate_asym_graph.hpp
+++ b/dsp/L2/include/aie/fir_decimate_asym_graph.hpp
@@ -524,9 +524,9 @@ class fir_decimate_asym_graph : public graph {
         }
     }
 
+   public:
     kernel m_firKernels[TP_CASC_LEN];
 
-   public:
     /**
      * The input data to the function. This input is either a window API of
      * samples of TT_DATA type or stream API (depending on TP_API).
diff --git a/dsp/L2/include/aie/fir_decimate_hb_graph.hpp b/dsp/L2/include/aie/fir_decimate_hb_graph.hpp
index 4bfc1fc6af..299e01b40c 100644
--- a/dsp/L2/include/aie/fir_decimate_hb_graph.hpp
+++ b/dsp/L2/include/aie/fir_decimate_hb_graph.hpp
@@ -516,9 +516,8 @@ class fir_decimate_hb_graph : public graph {
         }
     }
 
-    kernel m_firKernels[TP_CASC_LEN];
-
    public:
+    kernel m_firKernels[TP_CASC_LEN];
     /**
      * The input data to the function. This input is either a window API of
      * samples of TT_DATA type or stream API (depending on TP_API).
diff --git a/dsp/L2/include/aie/fir_decimate_sym_graph.hpp b/dsp/L2/include/aie/fir_decimate_sym_graph.hpp
index 068fd4c4df..d9c64fd1de 100644
--- a/dsp/L2/include/aie/fir_decimate_sym_graph.hpp
+++ b/dsp/L2/include/aie/fir_decimate_sym_graph.hpp
@@ -548,9 +548,9 @@ class fir_decimate_sym_graph : public graph {
         }
     }
 
+   public:
     kernel m_firKernels[TP_CASC_LEN];
 
-   public:
     /**
      * The input data to the function. This input is either a window API of
      * samples of TT_DATA type or stream API (depending on TP_API).
diff --git a/dsp/L2/include/aie/fir_interpolate_asym_graph.hpp b/dsp/L2/include/aie/fir_interpolate_asym_graph.hpp
index 0d8cc0345a..be47cd295a 100644
--- a/dsp/L2/include/aie/fir_interpolate_asym_graph.hpp
+++ b/dsp/L2/include/aie/fir_interpolate_asym_graph.hpp
@@ -545,9 +545,9 @@ class fir_interpolate_asym_graph : public graph {
         }
     }
 
+   public:
     kernel m_firKernels[TP_CASC_LEN];
 
-   public:
     /**
      * The input data to the function. This input is either a window API of
      * samples of TT_DATA type or stream API (depending on TP_API).
diff --git a/dsp/L2/include/aie/fir_interpolate_hb_graph.hpp b/dsp/L2/include/aie/fir_interpolate_hb_graph.hpp
index ba8e4afed0..4d37a39eb2 100644
--- a/dsp/L2/include/aie/fir_interpolate_hb_graph.hpp
+++ b/dsp/L2/include/aie/fir_interpolate_hb_graph.hpp
@@ -539,9 +539,9 @@ class fir_interpolate_hb_graph : public graph {
         }
     }
 
+   public:
     kernel m_firKernels[TP_CASC_LEN];
 
-   public:
     /**
      * The input data to the function. This input is either a window API of
      * samples of TT_DATA type or stream API (depending on TP_API).
diff --git a/dsp/L2/include/aie/fir_resampler_graph.hpp b/dsp/L2/include/aie/fir_resampler_graph.hpp
index a4e788c058..04479a158f 100644
--- a/dsp/L2/include/aie/fir_resampler_graph.hpp
+++ b/dsp/L2/include/aie/fir_resampler_graph.hpp
@@ -599,9 +599,10 @@ class fir_resampler_graph : public graph {
             source(m_firKernels[i]) = "fir_resampler.cpp";
         }
     }
-    kernel m_firKernels[TP_CASC_LEN];
 
    public:
+    kernel m_firKernels[TP_CASC_LEN];
+
     /**
      * The input data to the function. This input is either a window API of
      * samples of TT_DATA type or stream API (depending on TP_API).
diff --git a/dsp/L2/include/aie/fir_sr_asym_graph.hpp b/dsp/L2/include/aie/fir_sr_asym_graph.hpp
index 0827b9f0e5..e08004c2c5 100644
--- a/dsp/L2/include/aie/fir_sr_asym_graph.hpp
+++ b/dsp/L2/include/aie/fir_sr_asym_graph.hpp
@@ -470,8 +470,6 @@ class fir_sr_asym_graph : public graph {
     static constexpr unsigned int RTP_PORT_POS =
         ((TP_DUAL_IP == DUAL_IP_DUAL) ? ((TP_CASC_IN == CASC_IN_TRUE) ? 3 : 2) : 1);
 
-    kernel m_firKernels[TP_SSR * TP_SSR * TP_CASC_LEN];
-
     /**
      * @brief Helper Aliases
      */
@@ -740,6 +738,7 @@ class fir_sr_asym_graph : public graph {
                   "Module size of 32kB");
 
    public:
+    kernel m_firKernels[TP_SSR * TP_SSR * TP_CASC_LEN];
     /**
      * The input data array to the function. This input array is either a window API of
      * samples of TT_DATA type or stream API (depending on TP_API).
diff --git a/dsp/L2/include/aie/fir_sr_sym_graph.hpp b/dsp/L2/include/aie/fir_sr_sym_graph.hpp
index 3b4cbf6d85..b0ca7096b8 100644
--- a/dsp/L2/include/aie/fir_sr_sym_graph.hpp
+++ b/dsp/L2/include/aie/fir_sr_sym_graph.hpp
@@ -418,9 +418,9 @@ class fir_sr_sym_graph : public graph {
         }
     }
 
+   public:
     kernel m_firKernels[TP_CASC_LEN];
 
-   public:
     /**
      * The input data to the function. This input is either a window API of
      * samples of TT_DATA type or stream API (depending on TP_API).
diff --git a/dsp/L2/tests/aie/dds_mixer/Makefile b/dsp/L2/tests/aie/dds_mixer/Makefile
index 605792d555..688949a664 100644
--- a/dsp/L2/tests/aie/dds_mixer/Makefile
+++ b/dsp/L2/tests/aie/dds_mixer/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -184,14 +190,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -232,11 +239,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -270,21 +272,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -357,16 +359,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/dds_mixer/utils.mk b/dsp/L2/tests/aie/dds_mixer/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/dds_mixer/utils.mk
+++ b/dsp/L2/tests/aie/dds_mixer/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fft_ifft_dit_1ch/Makefile b/dsp/L2/tests/aie/fft_ifft_dit_1ch/Makefile
index 76ccee6b6a..973788b121 100644
--- a/dsp/L2/tests/aie/fft_ifft_dit_1ch/Makefile
+++ b/dsp/L2/tests/aie/fft_ifft_dit_1ch/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -182,14 +188,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -230,11 +237,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -268,21 +270,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -348,16 +350,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fft_ifft_dit_1ch/utils.mk b/dsp/L2/tests/aie/fft_ifft_dit_1ch/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fft_ifft_dit_1ch/utils.mk
+++ b/dsp/L2/tests/aie/fft_ifft_dit_1ch/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fir_decimate_asym/Makefile b/dsp/L2/tests/aie/fir_decimate_asym/Makefile
index 34a80721a6..7151b26127 100644
--- a/dsp/L2/tests/aie/fir_decimate_asym/Makefile
+++ b/dsp/L2/tests/aie/fir_decimate_asym/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -201,14 +207,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -249,11 +256,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -287,21 +289,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -374,16 +376,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fir_decimate_asym/utils.mk b/dsp/L2/tests/aie/fir_decimate_asym/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fir_decimate_asym/utils.mk
+++ b/dsp/L2/tests/aie/fir_decimate_asym/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fir_decimate_hb/Makefile b/dsp/L2/tests/aie/fir_decimate_hb/Makefile
index 659da33a3c..2bb17877ff 100644
--- a/dsp/L2/tests/aie/fir_decimate_hb/Makefile
+++ b/dsp/L2/tests/aie/fir_decimate_hb/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -199,14 +205,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -247,11 +254,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -285,21 +287,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -372,16 +374,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fir_decimate_hb/utils.mk b/dsp/L2/tests/aie/fir_decimate_hb/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fir_decimate_hb/utils.mk
+++ b/dsp/L2/tests/aie/fir_decimate_hb/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fir_decimate_sym/Makefile b/dsp/L2/tests/aie/fir_decimate_sym/Makefile
index e407a275a7..4e5cab0c2f 100644
--- a/dsp/L2/tests/aie/fir_decimate_sym/Makefile
+++ b/dsp/L2/tests/aie/fir_decimate_sym/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -199,14 +205,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -247,11 +254,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -285,21 +287,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -372,16 +374,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fir_decimate_sym/utils.mk b/dsp/L2/tests/aie/fir_decimate_sym/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fir_decimate_sym/utils.mk
+++ b/dsp/L2/tests/aie/fir_decimate_sym/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fir_interpolate_asym/Makefile b/dsp/L2/tests/aie/fir_interpolate_asym/Makefile
index 09235392e2..568cb1f0e6 100644
--- a/dsp/L2/tests/aie/fir_interpolate_asym/Makefile
+++ b/dsp/L2/tests/aie/fir_interpolate_asym/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -200,14 +206,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -248,11 +255,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -286,21 +288,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -373,16 +375,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fir_interpolate_asym/utils.mk b/dsp/L2/tests/aie/fir_interpolate_asym/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fir_interpolate_asym/utils.mk
+++ b/dsp/L2/tests/aie/fir_interpolate_asym/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fir_interpolate_fract_asym/Makefile b/dsp/L2/tests/aie/fir_interpolate_fract_asym/Makefile
index d959dffbc9..d44bceab3c 100644
--- a/dsp/L2/tests/aie/fir_interpolate_fract_asym/Makefile
+++ b/dsp/L2/tests/aie/fir_interpolate_fract_asym/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -198,14 +204,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -246,11 +253,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -284,21 +286,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -371,16 +373,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fir_interpolate_fract_asym/utils.mk b/dsp/L2/tests/aie/fir_interpolate_fract_asym/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fir_interpolate_fract_asym/utils.mk
+++ b/dsp/L2/tests/aie/fir_interpolate_fract_asym/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fir_interpolate_hb/Makefile b/dsp/L2/tests/aie/fir_interpolate_hb/Makefile
index 05d39424cc..ee1893ef8e 100644
--- a/dsp/L2/tests/aie/fir_interpolate_hb/Makefile
+++ b/dsp/L2/tests/aie/fir_interpolate_hb/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -200,14 +206,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -248,11 +255,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -286,21 +288,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -373,16 +375,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fir_interpolate_hb/utils.mk b/dsp/L2/tests/aie/fir_interpolate_hb/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fir_interpolate_hb/utils.mk
+++ b/dsp/L2/tests/aie/fir_interpolate_hb/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fir_resampler/Makefile b/dsp/L2/tests/aie/fir_resampler/Makefile
index 35b26b6ed1..fd89abd3d9 100644
--- a/dsp/L2/tests/aie/fir_resampler/Makefile
+++ b/dsp/L2/tests/aie/fir_resampler/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -201,14 +207,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -249,11 +256,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -287,21 +289,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -374,16 +376,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fir_resampler/utils.mk b/dsp/L2/tests/aie/fir_resampler/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fir_resampler/utils.mk
+++ b/dsp/L2/tests/aie/fir_resampler/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fir_sr_asym/Makefile b/dsp/L2/tests/aie/fir_sr_asym/Makefile
index 49a0498bd4..b0f11f0238 100644
--- a/dsp/L2/tests/aie/fir_sr_asym/Makefile
+++ b/dsp/L2/tests/aie/fir_sr_asym/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -201,14 +207,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -249,11 +256,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -287,21 +289,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -374,16 +376,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fir_sr_asym/utils.mk b/dsp/L2/tests/aie/fir_sr_asym/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fir_sr_asym/utils.mk
+++ b/dsp/L2/tests/aie/fir_sr_asym/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/fir_sr_sym/Makefile b/dsp/L2/tests/aie/fir_sr_sym/Makefile
index 7cea5b5a58..c4913b6e96 100644
--- a/dsp/L2/tests/aie/fir_sr_sym/Makefile
+++ b/dsp/L2/tests/aie/fir_sr_sym/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -198,14 +204,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -246,11 +253,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -284,21 +286,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -371,16 +373,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/fir_sr_sym/utils.mk b/dsp/L2/tests/aie/fir_sr_sym/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/fir_sr_sym/utils.mk
+++ b/dsp/L2/tests/aie/fir_sr_sym/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/matrix_mult/Makefile b/dsp/L2/tests/aie/matrix_mult/Makefile
index 03a52b817f..afed1da5a4 100644
--- a/dsp/L2/tests/aie/matrix_mult/Makefile
+++ b/dsp/L2/tests/aie/matrix_mult/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -204,14 +210,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -252,11 +259,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -290,21 +292,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -377,16 +379,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/matrix_mult/utils.mk b/dsp/L2/tests/aie/matrix_mult/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/matrix_mult/utils.mk
+++ b/dsp/L2/tests/aie/matrix_mult/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/matrix_mult_with_datamover/Makefile b/dsp/L2/tests/aie/matrix_mult_with_datamover/Makefile
index a01ca6b282..2d2f38de56 100644
--- a/dsp/L2/tests/aie/matrix_mult_with_datamover/Makefile
+++ b/dsp/L2/tests/aie/matrix_mult_with_datamover/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -146,10 +152,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)//host.cpp $(CUR_DIR)//gui_hack/aie_control_xrt.cpp 
 CXXFLAGS +=  -D __PS_ENABLE_AIE__ -D USING_PL_MOVER=1 -D USING_UUT=1
@@ -158,6 +160,11 @@ CXXFLAGS += --sysroot=$(SYSROOT)
 LDFLAGS +=  -L $(SYSROOT)/usr/lib/
 LDFLAGS += --sysroot=$(SYSROOT)
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.elf
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -230,11 +237,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -268,21 +270,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -343,12 +345,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/dsp/L2/tests/aie/matrix_mult_with_datamover/utils.mk b/dsp/L2/tests/aie/matrix_mult_with_datamover/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/matrix_mult_with_datamover/utils.mk
+++ b/dsp/L2/tests/aie/matrix_mult_with_datamover/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/widget_api_cast/Makefile b/dsp/L2/tests/aie/widget_api_cast/Makefile
index e801efa69a..44770460d1 100644
--- a/dsp/L2/tests/aie/widget_api_cast/Makefile
+++ b/dsp/L2/tests/aie/widget_api_cast/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -182,14 +188,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -230,11 +237,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -268,21 +270,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -357,16 +359,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/widget_api_cast/utils.mk b/dsp/L2/tests/aie/widget_api_cast/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/widget_api_cast/utils.mk
+++ b/dsp/L2/tests/aie/widget_api_cast/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/aie/widget_real2complex/Makefile b/dsp/L2/tests/aie/widget_real2complex/Makefile
index 5d2e40d518..538913e730 100644
--- a/dsp/L2/tests/aie/widget_real2complex/Makefile
+++ b/dsp/L2/tests/aie/widget_real2complex/Makefile
@@ -1,5 +1,5 @@
+# Copyright 2019-2022 Xilinx, Inc.
 #
-# Copyright 2022 Xilinx, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -38,9 +38,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -178,14 +184,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := 
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -226,11 +233,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -264,21 +266,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE) --package.boot_mode sd  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -353,16 +355,7 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
-	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
-else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
-	
-endif
-else ifeq ($(HOST_ARCH), x86)
+ifeq ($(HOST_ARCH), x86)
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
diff --git a/dsp/L2/tests/aie/widget_real2complex/utils.mk b/dsp/L2/tests/aie/widget_real2complex/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/dsp/L2/tests/aie/widget_real2complex/utils.mk
+++ b/dsp/L2/tests/aie/widget_real2complex/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/dsp/L2/tests/hw/1dfft/fixed/Makefile b/dsp/L2/tests/hw/1dfft/fixed/Makefile
index 655225d9f3..771c4a6487 100644
--- a/dsp/L2/tests/hw/1dfft/fixed/Makefile
+++ b/dsp/L2/tests/hw/1dfft/fixed/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -61,18 +67,20 @@ ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := xilinx_u250_gen3x16_xdma_3_1_202020_1
+PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u250 aws-vu9p-f1 vck190
 PLATFORM_BLOCKLIST +=  zc
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/vitis_fft/fixed -I $(XFLIB_DIR)/L2/in
 CXXFLAGS += -O3
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := testFFT.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -173,10 +182,22 @@ endif
 
 ######################### binary container global settings ##########################
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
+VPP_FLAGS_fft1DKernel += --hls.clock 300000000:fft1DKernel
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_fft1DKernel += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_fft1DKernel += --kernel_frequency 300
+endif
 VPP_LDFLAGS_fft1DKernel_temp := --config $(CUR_DIR)/conn_u250.cfg
 VPP_LDFLAGS_fft1DKernel += $(VPP_LDFLAGS_fft1DKernel_temp)
 
 else ifneq (,$(shell echo $(XPLATFORM) | awk '/aws-vu9p-f1/'))
+VPP_FLAGS_fft1DKernel += --hls.clock 300000000:fft1DKernel
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_fft1DKernel += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_fft1DKernel += --kernel_frequency 300
+endif
 VPP_LDFLAGS_fft1DKernel_temp := --config $(CUR_DIR)/conn_aws_f1.cfg
 VPP_LDFLAGS_fft1DKernel += $(VPP_LDFLAGS_fft1DKernel_temp)
 
@@ -200,7 +221,7 @@ endif
 $(TEMP_DIR)/fft1DKernel.xo: $(CUR_DIR)/kernel/top_module.cpp 
 	$(ECHO) "Compiling Kernel: fft1DKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_fft1DKernel) $(VPP_FLAGS) -k fft1DKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_fft1DKernel) $(VPP_FLAGS) -k fft1DKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_fft1DKernel_OBJS += $(TEMP_DIR)/fft1DKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_fft1DKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -224,11 +245,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -262,12 +278,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -275,7 +302,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device  $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -304,7 +338,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/fft1DKernel.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -314,13 +358,8 @@ endif
 
 ############################## Setting Targets ##############################
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all:  check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all:  check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -343,13 +382,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/dsp/L2/tests/hw/1dfft/fixed/utils.mk b/dsp/L2/tests/hw/1dfft/fixed/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/dsp/L2/tests/hw/1dfft/fixed/utils.mk
+++ b/dsp/L2/tests/hw/1dfft/fixed/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/dsp/L2/tests/hw/1dfft/float/Makefile b/dsp/L2/tests/hw/1dfft/float/Makefile
index af0b2d81d6..e67e1e02f4 100644
--- a/dsp/L2/tests/hw/1dfft/float/Makefile
+++ b/dsp/L2/tests/hw/1dfft/float/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -61,18 +67,20 @@ ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := xilinx_u250_gen3x16_xdma_3_1_202020_1
+PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u250 aws-vu9p-f1 vck190
 PLATFORM_BLOCKLIST +=  zc
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/vitis_fft/float -I $(XFLIB_DIR)/L2/in
 CXXFLAGS += -O3
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := testFFT.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -173,10 +182,22 @@ endif
 
 ######################### binary container global settings ##########################
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
+VPP_FLAGS_fft1DKernel += --hls.clock 300000000:fft1DKernel
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_fft1DKernel += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_fft1DKernel += --kernel_frequency 300
+endif
 VPP_LDFLAGS_fft1DKernel_temp := --config $(CUR_DIR)/conn_u250.cfg
 VPP_LDFLAGS_fft1DKernel += $(VPP_LDFLAGS_fft1DKernel_temp)
 
 else ifneq (,$(shell echo $(XPLATFORM) | awk '/aws-vu9p-f1/'))
+VPP_FLAGS_fft1DKernel += --hls.clock 300000000:fft1DKernel
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_fft1DKernel += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_fft1DKernel += --kernel_frequency 300
+endif
 VPP_LDFLAGS_fft1DKernel_temp := --config $(CUR_DIR)/conn_aws_f1.cfg
 VPP_LDFLAGS_fft1DKernel += $(VPP_LDFLAGS_fft1DKernel_temp)
 
@@ -200,7 +221,7 @@ endif
 $(TEMP_DIR)/fft1DKernel.xo: $(CUR_DIR)/kernel/top_module.cpp 
 	$(ECHO) "Compiling Kernel: fft1DKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_fft1DKernel) $(VPP_FLAGS) -k fft1DKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_fft1DKernel) $(VPP_FLAGS) -k fft1DKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_fft1DKernel_OBJS += $(TEMP_DIR)/fft1DKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_fft1DKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -224,11 +245,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -262,12 +278,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -275,7 +302,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device  $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -304,7 +338,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/fft1DKernel.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -314,13 +358,8 @@ endif
 
 ############################## Setting Targets ##############################
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all:  check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all:  check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -343,13 +382,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/dsp/L2/tests/hw/1dfft/float/utils.mk b/dsp/L2/tests/hw/1dfft/float/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/dsp/L2/tests/hw/1dfft/float/utils.mk
+++ b/dsp/L2/tests/hw/1dfft/float/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/dsp/L2/tests/hw/2dfft/fixed/Makefile b/dsp/L2/tests/hw/2dfft/fixed/Makefile
index d4725cb467..e43df0be34 100644
--- a/dsp/L2/tests/hw/2dfft/fixed/Makefile
+++ b/dsp/L2/tests/hw/2dfft/fixed/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -61,18 +67,20 @@ ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := xilinx_u250_gen3x16_xdma_3_1_202020_1
+PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u250 aws-vu9p-f1 vck190
 PLATFORM_BLOCKLIST +=  zc
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/vitis_2dfft/fixed -I $(XFLIB_DIR)/L2/
 CXXFLAGS += -O3
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := testFFT.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -162,10 +171,22 @@ VPP_FLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/vitis_2dfft/fixed -I $(XFLIB_DIR)/L2
 
 ######################### binary container global settings ##########################
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
+VPP_FLAGS_fft2DKernel += --hls.clock 300000000:fft2DKernel
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_fft2DKernel += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_fft2DKernel += --kernel_frequency 300
+endif
 VPP_LDFLAGS_fft2DKernel_temp := --config $(CUR_DIR)/conn_u250.cfg
 VPP_LDFLAGS_fft2DKernel += $(VPP_LDFLAGS_fft2DKernel_temp)
 
 else ifneq (,$(shell echo $(XPLATFORM) | awk '/aws-vu9p-f1/'))
+VPP_FLAGS_fft2DKernel += --hls.clock 300000000:fft2DKernel
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_fft2DKernel += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_fft2DKernel += --kernel_frequency 300
+endif
 VPP_LDFLAGS_fft2DKernel_temp := --config $(CUR_DIR)/conn_aws_f1.cfg
 VPP_LDFLAGS_fft2DKernel += $(VPP_LDFLAGS_fft2DKernel_temp)
 
@@ -189,7 +210,7 @@ endif
 $(TEMP_DIR)/fft2DKernel.xo: $(CUR_DIR)/kernel/top_module.cpp 
 	$(ECHO) "Compiling Kernel: fft2DKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_fft2DKernel) $(VPP_FLAGS) -k fft2DKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_fft2DKernel) $(VPP_FLAGS) -k fft2DKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_fft2DKernel_OBJS += $(TEMP_DIR)/fft2DKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_fft2DKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -213,11 +234,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,12 +267,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -264,7 +291,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device  $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -293,7 +327,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/fft2DKernel.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -303,13 +347,8 @@ endif
 
 ############################## Setting Targets ##############################
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all:  check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all:  check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -332,13 +371,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/dsp/L2/tests/hw/2dfft/fixed/utils.mk b/dsp/L2/tests/hw/2dfft/fixed/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/dsp/L2/tests/hw/2dfft/fixed/utils.mk
+++ b/dsp/L2/tests/hw/2dfft/fixed/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/dsp/L2/tests/hw/2dfft/float/Makefile b/dsp/L2/tests/hw/2dfft/float/Makefile
index 7fc4086249..283511e60e 100644
--- a/dsp/L2/tests/hw/2dfft/float/Makefile
+++ b/dsp/L2/tests/hw/2dfft/float/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -61,18 +67,20 @@ ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := xilinx_u250_gen3x16_xdma_3_1_202020_1
+PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
 PLATFORM_ALLOWLIST +=  u250 aws-vu9p-f1 vck190
 PLATFORM_BLOCKLIST +=  zc
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/vitis_2dfft/float -I $(XFLIB_DIR)/L2/
 CXXFLAGS += -O3
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := testFFT.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -162,10 +171,22 @@ VPP_FLAGS +=  -I $(XFLIB_DIR)/L1/include/hw/vitis_2dfft/float -I $(XFLIB_DIR)/L2
 
 ######################### binary container global settings ##########################
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
+VPP_FLAGS_fft2DKernel += --hls.clock 300000000:fft2DKernel
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_fft2DKernel += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_fft2DKernel += --kernel_frequency 300
+endif
 VPP_LDFLAGS_fft2DKernel_temp := --config $(CUR_DIR)/conn_u250.cfg
 VPP_LDFLAGS_fft2DKernel += $(VPP_LDFLAGS_fft2DKernel_temp)
 
 else ifneq (,$(shell echo $(XPLATFORM) | awk '/aws-vu9p-f1/'))
+VPP_FLAGS_fft2DKernel += --hls.clock 300000000:fft2DKernel
+ifneq ($(HOST_ARCH), x86)
+VPP_LDFLAGS_fft2DKernel += --clock.defaultFreqHz 300000000
+else
+VPP_LDFLAGS_fft2DKernel += --kernel_frequency 300
+endif
 VPP_LDFLAGS_fft2DKernel_temp := --config $(CUR_DIR)/conn_aws_f1.cfg
 VPP_LDFLAGS_fft2DKernel += $(VPP_LDFLAGS_fft2DKernel_temp)
 
@@ -189,7 +210,7 @@ endif
 $(TEMP_DIR)/fft2DKernel.xo: $(CUR_DIR)/kernel/top_module.cpp 
 	$(ECHO) "Compiling Kernel: fft2DKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_fft2DKernel) $(VPP_FLAGS) -k fft2DKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_fft2DKernel) $(VPP_FLAGS) -k fft2DKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_fft2DKernel_OBJS += $(TEMP_DIR)/fft2DKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_fft2DKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -213,11 +234,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,12 +267,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -264,7 +291,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device  $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -293,7 +327,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/fft2DKernel.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -303,13 +347,8 @@ endif
 
 ############################## Setting Targets ##############################
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all:  check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all:  check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -332,13 +371,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/dsp/L2/tests/hw/2dfft/float/utils.mk b/dsp/L2/tests/hw/2dfft/float/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/dsp/L2/tests/hw/2dfft/float/utils.mk
+++ b/dsp/L2/tests/hw/2dfft/float/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/genomics/Jenkinsfile b/genomics/Jenkinsfile
index 71dcc9f7a4..43da031ddd 100644
--- a/genomics/Jenkinsfile
+++ b/genomics/Jenkinsfile
@@ -1,4 +1,4 @@
 @Library('pipeline-library')_
 
-VitisLibPipeline (branch: 'next', libname: 'xf_genomics', run_sw_in_pr: 'true',
-TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu', devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest')
+VitisLibPipeline (branch: 'main', libname: 'xf_genomics', run_sw_in_pr: 'true',
+TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu', devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released')
diff --git a/genomics/L1/include/hw/sw.h b/genomics/L1/include/hw/sw.h
index 228f20f22f..4e00418add 100644
--- a/genomics/L1/include/hw/sw.h
+++ b/genomics/L1/include/hw/sw.h
@@ -22,9 +22,7 @@
 #define MATCH 2
 #define MISS_MATCH -1
 #define ABSMAXCOST MATCH
-#define MAXPE 32
 #define MINVAL -32000
-#define NUMPACKED 32
 
 #define UINTSZ sizeof(unsigned int)
 #define UINTSZ_K (NUMPACKED * 2) / 8
diff --git a/genomics/L1/include/hw/swmaxscore_compute.hpp b/genomics/L1/include/hw/swmaxscore_compute.hpp
index 750bf848bc..ba5f4543db 100644
--- a/genomics/L1/include/hw/swmaxscore_compute.hpp
+++ b/genomics/L1/include/hw/swmaxscore_compute.hpp
@@ -21,6 +21,8 @@
 #include <string.h>
 #include "hls_stream.h"
 #include <stdint.h>
+#define NUMPACKED 32
+#define MAXPE 32
 
 namespace xf {
 namespace genomics {
diff --git a/genomics/L1/tests/smithwaterman/smithwaterman_test.cpp b/genomics/L1/tests/smithwaterman/smithwaterman_test.cpp
index fe90581b87..5b5063759d 100644
--- a/genomics/L1/tests/smithwaterman/smithwaterman_test.cpp
+++ b/genomics/L1/tests/smithwaterman/smithwaterman_test.cpp
@@ -6,6 +6,8 @@
 #include <stdint.h>
 #define NUMITER 4
 #define NUM_BLOCKS 1
+#define NUMPACKED 32
+#define MAXPE 32
 
 void smithwatermanMaxscore(ap_uint<NUMPACKED * 2>* input, ap_uint<NUMPACKED * 2>* output, int* size) {
 #pragma HLS INTERFACE m_axi port = input offset = slave bundle = gmem depth = 1536
diff --git a/genomics/L2/demos/smithwaterman/Makefile b/genomics/L2/demos/smithwaterman/Makefile
index d4986d47d5..644622286b 100644
--- a/genomics/L2/demos/smithwaterman/Makefile
+++ b/genomics/L2/demos/smithwaterman/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.7
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/main.cpp $(XFLIB_DIR)/common/libs/smithwaterman/matcharray.cpp $(XFLIB_DIR)/common/libs/smithwaterman/smithwaterman.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D MAXPE=32 -D NUMPACKED=32
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 LDFLAGS += -fopenmp
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_sw
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -143,7 +150,7 @@ endif
 $(TEMP_DIR)/opencl_sw_maxscore.xo: $(XFLIB_DIR)/L2/src/opencl_sw_maxscore_systolic.cpp 
 	$(ECHO) "Compiling Kernel: opencl_sw_maxscore"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_opencl_sw_maxscore) $(VPP_FLAGS) -k opencl_sw_maxscore -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_opencl_sw_maxscore) $(VPP_FLAGS) -k opencl_sw_maxscore -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_krnl_smithwaterman_OBJS += $(TEMP_DIR)/opencl_sw_maxscore.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_krnl_smithwaterman_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -167,11 +174,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -205,21 +207,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)), vck190_base_dfx)
-ifeq ($(TARGET),$(filter $(TARGET), hw))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -265,7 +267,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/krnl_smithwaterman.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -299,13 +311,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/genomics/L2/demos/smithwaterman/src/main.cpp b/genomics/L2/demos/smithwaterman/src/main.cpp
index 67c959a1a8..b94c88de50 100644
--- a/genomics/L2/demos/smithwaterman/src/main.cpp
+++ b/genomics/L2/demos/smithwaterman/src/main.cpp
@@ -76,7 +76,6 @@ int main(int argc, char* argv[]) {
     }
     int doubleBuffered = parser.value_to_int("double-buffered");
     int idxSelectedDevice = parser.value_to_int("select-device");
-    int nThreads = parser.value_to_int("number-of-threads");
     int verifyMode = parser.value_to_int("verify-mode");
 
     LogInfo("Platform: %s, Device: %s", strPlatformName.c_str(), strDeviceName.c_str());
diff --git a/genomics/L2/demos/smithwaterman/utils.mk b/genomics/L2/demos/smithwaterman/utils.mk
index a38e143571..1d97b0ad1a 100755
--- a/genomics/L2/demos/smithwaterman/utils.mk
+++ b/genomics/L2/demos/smithwaterman/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/genomics/L2/tests/pairhmm_8x2/Makefile b/genomics/L2/tests/pairhmm_8x2/Makefile
index 7a2173bc28..7910aa1e7a 100644
--- a/genomics/L2/tests/pairhmm_8x2/Makefile
+++ b/genomics/L2/tests/pairhmm_8x2/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.7
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(wildcard $(XFLIB_DIR)/common/libs/pairhmm/*.cpp) $(wildcard $(XFLIB_DIR)/common/libs/xcl2/*.cpp) 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D SLR0_PE_NUM=16 -D SLR1_PE_NUM=16 -D SLR2_PE_NUM=16 -D FPGA -D DI
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/pairhmm -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_pairhmm
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -152,7 +159,7 @@ endif
 $(TEMP_DIR)/pairhmm.xo: $(XFLIB_DIR)/L2/src/pairhmm.cpp 
 	$(ECHO) "Compiling Kernel: pairhmm"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_pairhmm) $(VPP_FLAGS) -k pairhmm -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_pairhmm) $(VPP_FLAGS) -k pairhmm -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_pairhmm_OBJS += $(TEMP_DIR)/pairhmm.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_pairhmm_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -176,11 +183,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -214,12 +216,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -263,7 +276,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/pairhmm.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -297,13 +320,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/genomics/L2/tests/pairhmm_8x2/utils.mk b/genomics/L2/tests/pairhmm_8x2/utils.mk
index a38e143571..1d97b0ad1a 100644
--- a/genomics/L2/tests/pairhmm_8x2/utils.mk
+++ b/genomics/L2/tests/pairhmm_8x2/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/genomics/L2/tests/pairhmm_8x8/Makefile b/genomics/L2/tests/pairhmm_8x8/Makefile
new file mode 100644
index 0000000000..7f5d4afa2d
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/Makefile
@@ -0,0 +1,329 @@
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+
+############################## Help Section ##############################
+.PHONY: help
+
+help::
+	$(ECHO) "Makefile Usage:"
+	$(ECHO) "  make all TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<aarch64>"
+	$(ECHO) "      Command to generate the design for specified Target and Shell."
+	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make run TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<aarch64>"
+	$(ECHO) "      Command to run application in emulation."
+	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make xclbin TARGET=<hw/hw_emu/sw_emu/> PLATFORM=<FPGA platform> HOST_ARCH=<aarch64>"
+	$(ECHO) "      Command to build xclbin application."
+	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "      Command to build host application."
+	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
+	$(ECHO) ""
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
+	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) ""
+	$(ECHO) "  make clean "
+	$(ECHO) "      Command to remove the generated non-hardware files."
+	$(ECHO) ""
+	$(ECHO) "  make cleanall"
+	$(ECHO) "      Command to remove all the generated files."
+	$(ECHO) ""
+
+############################## Setting up Project Variables ##############################
+
+MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/L2/*}')
+CUR_DIR := $(patsubst %/,%,$(dir $(MK_PATH)))
+XFLIB_DIR = $(XF_PROJ_ROOT)
+
+# setting devault value
+TARGET ?= sw_emu
+HOST_ARCH ?= aarch64
+
+#setting PLATFORM
+ifeq ($(PLATFORM),)
+PLATFORM := $(DEVICE)
+endif
+ifeq ($(PLATFORM),)
+PLATFORM := xilinx_vck190_base_202210_1
+endif
+
+# #################### Checking if PLATFORM in whitelist ############################
+PLATFORM_ALLOWLIST +=  vck190
+PLATFORM_BLOCKLIST +=  zc u50 u200 u250
+
+include ./utils.mk
+TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
+TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
+BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
+BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
+EMCONFIG := $(BUILD_DIR)/emconfig.json
+XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
+export XCL_BINDIR = $(XCLBIN_DIR)
+include ././config.mk
+
+EXE_FILE_DEPS :=
+BINARY_CONTAINERS_DEPS :=
+RUN_DEPS :=
+
+# get global setting
+ifeq ($(HOST_ARCH), x86)
+CXXFLAGS += -fmessage-length=0 -I$(CUR_DIR)/src/ -I$(XILINX_XRT)/include -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(XILINX_XRT)/lib -L$(XILINX_HLS)/lnx64/tools/fpo_v7_0  -Wl,--as-needed -lOpenCL -lxrt_coreutil -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+else ifeq ($(HOST_ARCH), aarch64)
+CXXFLAGS += -I$(CUR_DIR)/src/ -fmessage-length=0 --sysroot=$(SYSROOT)  -I$(SYSROOT)/usr/include/xrt -I$(XILINX_HLS)/include -std=c++14 -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label 
+LDFLAGS += -pthread -L$(SYSROOT)/usr/lib -L$(XILINX_VITIS_AIETOOLS)/lib/aarch64.o -Wl,--as-needed -lxilinxopencl -lxrt_coreutil 
+VPP_FLAGS += -t $(TARGET) --platform $(XPLATFORM) --save-temps 
+VPP_LDFLAGS += --optimize 2 -R 2 
+endif
+CXXFLAGS += $(EXTRA_CXXFLAGS)
+VPP_FLAGS += $(EXTRA_VPP_FLAGS)
+
+########################## Setting up Host Variables ##########################
+ifeq ($(TARGET),sw_emu)
+CXXFLAGS += -D SW_EMU_TEST
+endif
+ifeq ($(TARGET),hw_emu)
+CXXFLAGS += -D HW_EMU_TEST
+endif
+
+#Inclue Required Host Source Files
+ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
+HOST_SRCS += $(CUR_DIR)/src/host.cpp $(CUR_DIR)/src/*.cpp $(wildcard $(XFLIB_DIR)/common/libs/xcl2/*.cpp) 
+CXXFLAGS +=  -D SLR0_PE_NUM=64 -D SLR1_PE_NUM=64 -D SLR2_PE_NUM=64 -D FPGA -D DIE_NUM=3
+CXXFLAGS +=  -I $(SYSROOT)/usr/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/pairhmm -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
+CXXFLAGS += --sysroot=$(SYSROOT)
+LDFLAGS +=  -L $(SYSROOT)/usr/lib -L ${SYSROOT}/opt/xilinx/xrt/lib
+
+else 
+HOST_SRCS += $(CUR_DIR)/src/host.cpp $(CUR_DIR)/src/*.cpp $(wildcard $(XFLIB_DIR)/common/libs/xcl2/*.cpp) 
+CXXFLAGS +=  -D SLR0_PE_NUM=64 -D SLR1_PE_NUM=64 -D SLR2_PE_NUM=64 -D FPGA -D DIE_NUM=3
+CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/pairhmm -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
+
+endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
+EXE_NAME := xil_pairhmm
+EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
+EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
+
+HOST_ARGS := $(BUILD_DIR)/pairhmm.xclbin --syn 10
+ifneq ($(HOST_ARCH), x86)
+PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
+endif
+
+########################## Kernel compiler global settings ##########################
+VPP_FLAGS +=  -D SLR_PE_NUM=64 -D FPGA
+VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/pairhmm -I $(XFLIB_DIR)/L2/src -I $(XFLIB_DIR)/L2/src -I $(XFLIB_DIR)/common/libs
+VPP_LDFLAGS += --clock.id 4:pairhmm_1
+######################### binary container global settings ##########################
+
+ifeq ($(HOST_ARCH), x86)
+BINARY_CONTAINERS += $(BUILD_DIR)/pairhmm.xclbin
+else
+BINARY_CONTAINERS += $(BUILD_DIR)/pairhmm_pkg.$(LINK_TARGET_FMT)
+BINARY_CONTAINERS_PKG += $(BUILD_DIR)/pairhmm.xclbin
+endif
+
+# ################ Setting Rules for Binary Containers (Building Kernels) ################
+$(TEMP_DIR)/pairhmm.xo: $(XFLIB_DIR)/L2/src/pairhmm.cpp 
+	$(ECHO) "Compiling Kernel: pairhmm"
+	mkdir -p $(TEMP_DIR)
+	$(VPP) -c $(VPP_FLAGS_pairhmm) $(VPP_FLAGS) -k pairhmm -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
+BINARY_CONTAINER_pairhmm_OBJS += $(TEMP_DIR)/pairhmm.xo
+BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_pairhmm_OBJS)
+$(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
+	mkdir -p $(BUILD_DIR)
+	$(VPP) -l $(VPP_FLAGS) --temp_dir $(TEMP_DIR) --report_dir $(BUILD_REPORT_DIR)/pairhmm $(VPP_LDFLAGS)  $(VPP_LDFLAGS_pairhmm) $(AIE_LDFLAGS)   -o $@ $^
+
+############################## Setting Rules for Host (Building Host Executable) ##############################
+ifeq ($(HOST_ARCH), x86)
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_xrt
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+else
+$(EXE_FILE): $(EXE_FILE_DEPS) |  check_sysroot
+	mkdir -p $(BUILD_DIR)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+endif
+
+$(EMCONFIG):
+	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
+############################## Preparing sdcard folder ##############################
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE := $(SYSROOT)/../../uImage
+else
+K_IMAGE := $(SYSROOT)/../../Image
+endif
+RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
+$(RUN_SCRIPT):
+	rm -rf $(RUN_SCRIPT)
+	@echo 'export LD_LIBRARY_PATH=/mnt:/tmp:$(LIBRARY_PATH)' >> $(RUN_SCRIPT)
+ifneq ($(filter sw_emu hw_emu, $(TARGET)),)
+	@echo 'export XCL_EMULATION_MODE=$(TARGET)' >> $(RUN_SCRIPT)
+endif
+	@echo 'export XILINX_VITIS=/mnt' >> $(RUN_SCRIPT)
+	@echo 'export XILINX_XRT=/usr' >> $(RUN_SCRIPT)
+	@echo 'if [ -f platform_desc.txt  ]; then' >> $(RUN_SCRIPT)
+	@echo '        cp platform_desc.txt /etc/xocl.txt' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo './$(EXE_NAME) $(PKG_HOST_ARGS)' >> $(RUN_SCRIPT)
+	@echo 'return_code=$$?' >> $(RUN_SCRIPT)
+	@echo 'if [ $$return_code -ne 0 ]; then' >> $(RUN_SCRIPT)
+	@echo '        echo "ERROR: Embedded host run failed, RC=$$return_code"' >> $(RUN_SCRIPT)
+	@echo 'else' >> $(RUN_SCRIPT)
+	@echo '        echo "INFO: TEST PASSED, RC=0"' >> $(RUN_SCRIPT)
+	@echo 'fi' >> $(RUN_SCRIPT)
+	@echo 'echo "INFO: Embedded host run completed."' >> $(RUN_SCRIPT)
+	@echo 'exit $$return_code' >> $(RUN_SCRIPT)
+DATA_FILE := 
+DATA_DIR := 
+SD_FILES += $(RUN_SCRIPT)
+SD_FILES += $(EXE_FILE)
+SD_FILES += $(EMCONFIG)
+SD_FILES += xrt.ini
+SD_FILES += $(DATA_FILE)# where define DATAFILE in json
+SD_FILES_WITH_PREFIX = $(foreach sd_file,$(SD_FILES), $(if $(filter $(sd_file),$(wildcard $(sd_file))), --package.sd_file $(sd_file)))
+SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
+PACKAGE_FILES := $(BINARY_CONTAINERS)
+PACKAGE_FILES += $(AIE_CONTAINER)
+SD_CARD := $(CUR_DIR)/package_$(TARGET)
+vck190_dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+	@echo "Generating sd_card folder...."
+	mkdir -p $(SD_CARD)
+	chmod a+rx $(BUILD_DIR)/run_script.sh
+ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+vck190_dfx_hw := true
+endif
+endif
+ifeq ($(vck190_dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
+
+.PHONY: sd_card
+sd_card: $(SD_CARD)
+endif
+############################## Setting Essential Checks and Building Rules ##############################
+RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
+RUN_DEPS += $(SD_CARD)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device  check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
+#hw_emu
+ifneq (,$(filter hw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS)
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#sw_emu
+ifneq (,$(filter sw_emu, $(TARGET)))
+ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	XCL_EMULATION_MODE=$(TARGET) $(EXE_FILE) $(HOST_ARGS) 
+	
+else
+	@echo $(RUN_DEPS)
+	$(SD_CARD)/launch_$(TARGET).sh -no-reboot -run-app $(notdir $(RUN_SCRIPT)) 
+	grep "TEST PASSED, RC=0" $(SD_CARD)/qemu_output.log || exit 1
+	
+endif
+endif
+#hw
+ifeq ($(TARGET), hw)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(HOST_ARGS)
+	
+else
+	$(ECHO) "Please copy the content of sd_card folder and data to an SD Card and run on the board"
+endif
+endif
+
+############################## Setting Targets ##############################
+
+.PHONY: clean cleanall emconfig
+emconfig: $(EMCONFIG)
+
+.PHONY: host
+ifeq ($(HOST_ARCH), x86)
+host:  check_xrt $(EXE_FILE)
+else
+host:  check_sysroot $(EXE_FILE)
+endif
+
+.PHONY: xclbin
+ifeq ($(HOST_ARCH), x86)
+xclbin:  check_vpp check_xrt $(BINARY_CONTAINERS) 
+else
+xclbin:  check_vpp check_sysroot $(BINARY_CONTAINERS) 
+endif
+
+############################## Cleaning Rules ##############################
+cleanh:
+	-$(RMDIR) $(EXE_FILE) vitis_* TempConfig system_estimate.xtxt *.rpt .run/
+	-$(RMDIR) src/*.ll _xocc_* .Xil dltmp* xmltmp* *.log *.jou *.wcfg *.wdb sample_link.ini sample_compile.ini obj*  bin* *.csv *.jpg *.jpeg *.png
+
+cleank:
+	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
+	-$(RMDIR) _x_temp.*
+
+cleanall: cleanh cleank
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
+	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
+
+clean: cleanh
diff --git a/genomics/L2/tests/pairhmm_8x8/README.rst b/genomics/L2/tests/pairhmm_8x8/README.rst
new file mode 100644
index 0000000000..5b2d8f53bf
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/README.rst
@@ -0,0 +1,48 @@
+===============================
+Xilinx PairHMM Algorithm 64PE
+===============================
+
+PairHMM examples resides in ``L2/tests/pairhmm_8x8`` directory.
+
+Follow build instructions to build host executable and directory.
+
+THe binary host file generated is named as "**xil_pairhmm**" and is present in ``./build`` directory.
+
+Executable Usage
+-----------------
+
+To execute Single PairHMM kernel :      ``./build/xil_pairhmm ./build/xclbin_<xsa_name>_<TARGET mode>/pairhmm.xclbin --syn <number of tests>``
+
+
+
+
+**Software Emulation:** make run TARGET=sw_emu 
+
+**Hardware Emulation:** make run TARGET=hw_emu
+
+**Hardware:** make all TARGET=hw 
+
+**Execution:** ./xil_pairhmm ./pairhmm.xclbin --syn <number of tests> 
+
+
+Resources  
+---------
+
+**Design:** PairHMM Algorithm 64PE
+
+**Board:** Versal vck190 board.
+
+============= ======= ====== ====== ======= ========
+Flow           LUT     BRAM   URAM    DSP     Fmax
+============= ======= ====== ====== ======= ========
+pairhmm 8x8    154K     95     56     588    156MHz
+============= ======= ====== ====== ======= ========
+
+Results
+-------
+
+====================== =========================
+Topic                         Results
+====================== =========================
+Performance                   9GCups
+====================== =========================
diff --git a/genomics/L2/tests/pairhmm_8x8/config.mk b/genomics/L2/tests/pairhmm_8x8/config.mk
new file mode 100644
index 0000000000..a2be756417
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/config.mk
@@ -0,0 +1 @@
+CXXFLAGS += -mabi=lp64 
diff --git a/genomics/L2/tests/pairhmm_8x8/description.json b/genomics/L2/tests/pairhmm_8x8/description.json
new file mode 100644
index 0000000000..0c70c5e2a6
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/description.json
@@ -0,0 +1,118 @@
+{
+    "name": "Xilinx Pairwise Hidden Markov Model Test (pairhmm)", 
+    "description": [
+        "Xilinx PairHMM is an hardware accelerated algorithm used as part of genomic pipelines"
+    ], 
+    "flow": "vitis", 
+    "launch": [
+        {
+            "cmd_args": "BUILD/pairhmm.xclbin --syn 10", 
+            "name": "generic launch for all flows"
+        }
+    ], 
+    "platform_blocklist": [
+        "zc", 
+        "u50",
+	"u200",
+	"u250"
+    ], 
+    "platform_allowlist": [
+	"vck190"
+    ],
+    "config_make" : "./config.mk",
+    "host": {
+        "host_exe": "xil_pairhmm", 
+        "compiler": {
+            "sources": [
+                "./src/host.cpp", 
+                "./src/*.cpp", 
+                "$(wildcard LIB_DIR/common/libs/xcl2/*.cpp)" 
+            ], 
+            "includepaths": [
+                "LIB_DIR/L1/include/hw",
+                "LIB_DIR/common/libs/pairhmm",
+                "LIB_DIR/common/libs/xcl2"
+            ], 
+            "symbols": [
+                "SLR0_PE_NUM=64",
+                "SLR1_PE_NUM=64",
+                "SLR2_PE_NUM=64",
+                "FPGA",
+                "DIE_NUM=3"
+            ]
+        }
+    }, 
+    "v++": {
+        "compiler": {
+            "includepaths": [
+                 "LIB_DIR/L2/include",
+                 "LIB_DIR/L1/include/hw",
+                 "LIB_DIR/common/libs/pairhmm",
+                 "LIB_DIR/L2/src"
+            ],
+            "symbols": [
+                "SLR_PE_NUM=64",
+                "FPGA"
+            ]
+        }
+    }, 
+    "containers": [
+        {
+            "name": "pairhmm", 
+            "accelerators": [
+                {
+                    "name": "pairhmm", 
+                    "location": "LIB_DIR/L2/src/pairhmm.cpp"
+                } 
+            ]
+        }
+    ], 
+    "testinfo": {
+        "disable": false, 
+        "jobs": [
+            {
+                "index": 0, 
+                "dependency": [], 
+                "env": "", 
+                "cmd": "", 
+                "max_memory_MB": {
+                    "vitis_hw_build": 40960, 
+                    "vitis_hw_emu": 28672, 
+                    "vitis_sw_emu": 10240, 
+                    "vitis_hw_run": 10240
+                }, 
+                "max_time_min": {
+                    "vitis_hw_build": 600, 
+                    "vitis_hw_emu": 600, 
+                    "vitis_sw_emu": 25, 
+                    "vitis_hw_run": 10
+                }
+            }
+        ], 
+        "targets": [
+            "vitis_sw_emu",
+            "vitis_hw_emu", 
+            "vitis_hw_build", 
+            "vitis_hw_run"
+        ], 
+        "category": "canary"
+    }, 
+    "platform_properties": {
+        "vck190": {
+            "host": {
+                "linker": {
+                    "librarypaths": [
+                        "$(SYSROOT)/usr/lib", 
+                        "${SYSROOT}/opt/xilinx/xrt/lib"
+                    ]
+                }, 
+                "compiler": {
+                    "includepaths": [
+                        "$(SYSROOT)/usr/include"
+                    ], 
+                    "options": "--sysroot=$(SYSROOT)"
+                }
+            }
+        }
+    }
+}
diff --git a/genomics/L2/tests/pairhmm_8x8/src/Context.h b/genomics/L2/tests/pairhmm_8x8/src/Context.h
new file mode 100644
index 0000000000..a7a51521ff
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/src/Context.h
@@ -0,0 +1,192 @@
+/*
+ * (c) Copyright 2022 Xilinx, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#ifndef CONTEXT_H
+#define CONTEXT_H
+
+#include <cmath>     // std::isinf
+#include <algorithm> // std::min
+
+#define MAX_QUAL 254
+#define MAX_JACOBIAN_TOLERANCE 8.0
+#define JACOBIAN_LOG_TABLE_STEP 0.0001
+#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP)
+#define JACOBIAN_LOG_TABLE_SIZE ((int)(MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1)
+
+template <class NUMBER>
+class ContextBase {
+   public:
+    static NUMBER ph2pr[128];
+    static NUMBER INITIAL_CONSTANT;
+    static NUMBER LOG10_INITIAL_CONSTANT;
+    static NUMBER RESULT_THRESHOLD;
+
+    static bool staticMembersInitializedFlag;
+    static NUMBER jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
+    static NUMBER matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
+
+    static void initializeStaticMembers() {
+        // Order of calls important - Jacobian first, then MatchToMatch
+        initializeJacobianLogTable();
+        initializeMatchToMatchProb();
+    }
+
+    static void deleteStaticMembers() {
+        if (staticMembersInitializedFlag) {
+            staticMembersInitializedFlag = false;
+        }
+    }
+
+    // Called only once during library load - don't bother to optimize with single precision fp
+    static void initializeJacobianLogTable() {
+        for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
+            jacobianLogTable[k] = (NUMBER)(log10(1.0 + pow(10.0, -((double)k) * JACOBIAN_LOG_TABLE_STEP)));
+        }
+    }
+
+    // Called only once per library load - don't bother optimizing with single fp
+    static void initializeMatchToMatchProb() {
+        double LN10 = log(10);
+        double INV_LN10 = 1.0 / LN10;
+        for (int i = 0, offset = 0; i <= MAX_QUAL; offset += ++i)
+            for (int j = 0; j <= i; j++) {
+                double log10Sum = approximateLog10SumLog10(-0.1 * i, -0.1 * j);
+                double matchToMatchLog10 = log1p(-std::min(1.0, pow(10, log10Sum))) * INV_LN10;
+                matchToMatchProb[offset + j] = (NUMBER)(pow(10, matchToMatchLog10));
+            }
+    }
+    // Called during computation - use single precision where possible
+    static int fastRound(NUMBER d) { return (d > ((NUMBER)0.0)) ? (int)(d + ((NUMBER)0.5)) : (int)(d - ((NUMBER)0.5)); }
+    // Called during computation - use single precision where possible
+    static NUMBER approximateLog10SumLog10(NUMBER small, NUMBER big) {
+        // make sure small is really the smaller value
+        if (small > big) {
+            NUMBER t = big;
+            big = small;
+            small = t;
+        }
+
+        if (std::isinf(small) == -1 || std::isinf(big) == -1) return big;
+
+        NUMBER diff = big - small;
+        if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE)) return big;
+
+        // OK, so |y-x| < tol: we use the following identity then:
+        // we need to compute log10(10^x + 10^y)
+        // By Jacobian logarithm identity, this is equal to
+        // max(x,y) + log10(1+10^-abs(x-y))
+        // we compute the second term as a table lookup with integer quantization
+        // we have pre-stored correction for 0,0.1,0.2,... 10.0
+        int ind = fastRound((NUMBER)(diff * ((NUMBER)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding
+        return big + jacobianLogTable[ind];
+    }
+};
+
+template <class NUMBER>
+class Context : public ContextBase<NUMBER> {};
+
+template <>
+class Context<double> : public ContextBase<double> {
+   public:
+    Context() : ContextBase<double>() {
+        if (!staticMembersInitializedFlag) {
+            initializeStaticMembers();
+
+            for (int x = 0; x < 128; x++) {
+                ph2pr[x] = pow(10.0, -((double)x) / 10.0);
+            }
+
+            INITIAL_CONSTANT = ldexp(1.0, 1020.0);
+            LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT);
+            RESULT_THRESHOLD = 0.0;
+
+            staticMembersInitializedFlag = true;
+        }
+    }
+
+    double LOG10(double v) { return log10(v); }
+    inline double POW(double b, double e) { return pow(b, e); }
+
+    static double _(double n) { return n; }
+    static double _(float n) { return ((double)n); }
+
+    inline double set_mm_prob(int insQual, int delQual) {
+        int minQual = delQual;
+        int maxQual = insQual;
+        if (insQual <= delQual) {
+            minQual = insQual;
+            maxQual = delQual;
+        }
+
+        return MAX_QUAL < maxQual ? 1.0 - POW(10.0, approximateLog10SumLog10(-0.1 * minQual, -0.1 * maxQual))
+                                  : matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual];
+    }
+};
+
+template <>
+class Context<float> : public ContextBase<float> {
+   public:
+    Context() : ContextBase<float>() {
+        if (!staticMembersInitializedFlag) {
+            initializeStaticMembers();
+
+            for (int x = 0; x < 128; x++) {
+                ph2pr[x] = powf(10.f, -((float)x) / 10.f);
+            }
+
+            INITIAL_CONSTANT = ldexpf(1.f, 120.f);
+            LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT);
+            RESULT_THRESHOLD = ldexpf(1.f, -110.f);
+
+            staticMembersInitializedFlag = true;
+        }
+    }
+
+    float LOG10(float v) { return log10f(v); }
+    inline float POW(float b, float e) { return powf(b, e); }
+
+    static float _(double n) { return ((float)n); }
+    static float _(float n) { return n; }
+
+    inline float set_mm_prob(int insQual, int delQual) {
+        int minQual = delQual;
+        int maxQual = insQual;
+        if (insQual <= delQual) {
+            minQual = insQual;
+            maxQual = delQual;
+        }
+
+        return MAX_QUAL < maxQual ? 1.0f - POW(10.0f, approximateLog10SumLog10(-0.1f * minQual, -0.1f * maxQual))
+                                  : matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual];
+    }
+};
+
+template <typename NUMBER>
+NUMBER ContextBase<NUMBER>::ph2pr[128];
+template <typename NUMBER>
+NUMBER ContextBase<NUMBER>::INITIAL_CONSTANT;
+template <typename NUMBER>
+NUMBER ContextBase<NUMBER>::LOG10_INITIAL_CONSTANT;
+template <typename NUMBER>
+NUMBER ContextBase<NUMBER>::RESULT_THRESHOLD;
+template <typename NUMBER>
+bool ContextBase<NUMBER>::staticMembersInitializedFlag = false;
+template <typename NUMBER>
+NUMBER ContextBase<NUMBER>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
+template <typename NUMBER>
+NUMBER ContextBase<NUMBER>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
+
+#endif
diff --git a/genomics/L2/tests/pairhmm_8x8/src/gensynthdata.hpp b/genomics/L2/tests/pairhmm_8x8/src/gensynthdata.hpp
new file mode 100644
index 0000000000..938e47eb0b
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/src/gensynthdata.hpp
@@ -0,0 +1,262 @@
+/*
+ * (c) Copyright 2022 Xilinx, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#pragma once
+#include <vector>
+#include <string>
+#include <string.h>
+#include <fstream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <dirent.h>
+#include <cmath>
+#include <random>
+
+double avg_numReads;
+double avg_numHaps;
+int case_counter;
+
+char GenRandBase() {
+    std::default_random_engine generator;
+    std::uniform_int_distribution<int> distribution(0, 3);
+    int number = distribution(generator);
+    if (number == 0)
+        return 'A';
+    else if (number == 1)
+        return 'T';
+    else if (number == 2)
+        return 'C';
+    else
+        return 'G';
+}
+
+int GenQuals() {
+    std::default_random_engine generator;
+    std::normal_distribution<double> distribution(30.0, 5.0);
+    int quals = distribution(generator);
+    if (quals < 6) quals = 6;
+    return quals;
+}
+
+int GenInDel() {
+    std::default_random_engine generator;
+    std::normal_distribution<double> distribution(40.0, 1.0);
+    int quals = distribution(generator);
+    if (quals < 1) quals = 1;
+    return quals;
+}
+
+int GenLen(int limit) {
+    std::default_random_engine generator;
+    std::uniform_int_distribution<int> distribution(limit / 4, limit);
+    int number = distribution(generator);
+    return number;
+}
+
+int GenInputs(pairhmmInput* in, int size) {
+    in->reads.clear();
+    in->haps.clear();
+    in->reads.resize(16 * (size + 1));
+    in->haps.resize((size + 1));
+    printf("%s - readsize %d \n", __FUNCTION__, in->reads.size());
+    printf("%s - readsize %d \n", __FUNCTION__, in->haps.size());
+    for (int i = 0; (size_t)i < in->reads.size(); i++) {
+        Read& curRead = in->reads[i];
+        for (int j = 0; j < GenLen(MAX_READ_LEN); j++) {
+            curRead.bases.push_back(GenRandBase());
+            curRead._q.push_back(GenQuals());
+            curRead._i.push_back(GenInDel());
+            curRead._d.push_back(GenInDel());
+            curRead._c.push_back(10);
+        }
+    }
+    for (int i = 0; (size_t)i < in->haps.size(); i++) {
+        Hap& curHap = in->haps[i];
+        for (int j = 0; j < GenLen(MAX_HAP_LEN); j++) {
+            curHap.bases.push_back(GenRandBase());
+        }
+    }
+    return 0;
+}
+
+#if 0
+int GenOutputs(pairhmmInput* in, pairhmmOutput* out) {
+    xilPairHMM* accelPhmm = new xilPairHMM();
+    accelPhmm->computePairhmmAVX(in, out, false);
+    delete accelPhmm;
+    return 0;
+}
+#endif
+
+int GetInputs(pairhmmInput* in, std::string filename) {
+    std::ifstream ifs(filename.c_str(), std::ifstream::in);
+    int numReads;
+    int numHaplotypes;
+    // thie first line is number of reads and number of haplotypes
+    char lineBuf[1024];
+    if (!ifs.good()) printf("bad file name %s\n", filename.c_str());
+    ifs.getline(lineBuf, 1024);
+    char* token;
+    token = strtok(lineBuf, " ");
+    token = strtok(NULL, " ");
+    numReads = atoi(token);
+    token = strtok(NULL, " ");
+    token = strtok(NULL, " ");
+    numHaplotypes = atoi(token);
+    // start from the second line are all the reads
+    for (int i = 0; i < numReads; ++i) {
+        int curReadLen;
+        Read curRead;
+        ifs.getline(lineBuf, 1024);
+        curReadLen = atoi(lineBuf);
+        ifs.getline(lineBuf, 1024);
+        ifs.getline(lineBuf, 1024);
+        for (int j = 0; j < curReadLen; j++) {
+            if (j == 0)
+                token = strtok(lineBuf, " ");
+            else
+                token = strtok(NULL, " ");
+            curRead.bases.push_back((char)atoi(token));
+        }
+        ifs.getline(lineBuf, 1024);
+        ifs.getline(lineBuf, 1024);
+        for (int j = 0; j < curReadLen; j++) {
+            if (j == 0)
+                token = strtok(lineBuf, " ");
+            else
+                token = strtok(NULL, " ");
+            curRead._q.push_back((char)atoi(token));
+        }
+        ifs.getline(lineBuf, 1024);
+        ifs.getline(lineBuf, 1024);
+        for (int j = 0; j < curReadLen; j++) {
+            if (j == 0)
+                token = strtok(lineBuf, " ");
+            else
+                token = strtok(NULL, " ");
+            curRead._i.push_back((char)atoi(token));
+        }
+        ifs.getline(lineBuf, 1024);
+        ifs.getline(lineBuf, 1024);
+        for (int j = 0; j < curReadLen; j++) {
+            if (j == 0)
+                token = strtok(lineBuf, " ");
+            else
+                token = strtok(NULL, " ");
+            curRead._d.push_back((char)atoi(token));
+        }
+        ifs.getline(lineBuf, 1024);
+        ifs.getline(lineBuf, 1024);
+        for (int j = 0; j < curReadLen; j++) {
+            if (j == 0)
+                token = strtok(lineBuf, " ");
+            else
+                token = strtok(NULL, " ");
+            curRead._c.push_back((char)atoi(token));
+        }
+        in->reads.push_back(curRead);
+    }
+    ifs.getline(lineBuf, 1024);
+    for (int i = 0; i < numHaplotypes; ++i) {
+        int curHapLen;
+        Hap curHap;
+        ifs.getline(lineBuf, 1024);
+        curHapLen = atoi(lineBuf);
+        ifs.getline(lineBuf, 1024);
+        ifs.getline(lineBuf, 1024);
+        for (int j = 0; j < curHapLen; j++) {
+            curHap.bases.push_back(lineBuf[j]);
+        }
+        in->haps.push_back(curHap);
+    }
+    ifs.close();
+    return 0;
+}
+
+int GetOutputs(pairhmmOutput* out, int outputSize, std::string filename) {
+    std::ifstream ifs(filename.c_str(), std::ifstream::in);
+    if (!ifs.good()) printf("bad file name %s\n", filename.c_str());
+    for (int i = 0; i < outputSize; ++i) {
+        double ref;
+        ifs >> ref;
+        union {
+            long long i;
+            double d;
+        } value;
+        ifs >> value.i;
+        out->likelihoodData.push_back(value.d);
+    }
+    return 0;
+}
+
+int cmp(pairhmmOutput* target,
+        pairhmmOutput* golden,
+        int size,
+        int test_id,
+        bool exact_match,
+        double& total_error_count,
+        double& largest_error) {
+    int error_count = 0;
+    for (int i = 0; i < size; i++) {
+        if (exact_match) {
+            if (target->likelihoodData[i] != golden->likelihoodData[i]) {
+                printf("errors in %d th data of %d test, target is %f, golden is %f\n", i, test_id,
+                       target->likelihoodData[i], golden->likelihoodData[i]);
+                error_count++;
+            }
+        } else {
+            if (std::isnan(target->likelihoodData[i])) {
+                printf("error, target is nan\n");
+                error_count++;
+            }
+            double cur_error =
+                fabs((target->likelihoodData[i] - golden->likelihoodData[i]) / golden->likelihoodData[i]);
+            if (cur_error > largest_error) {
+                largest_error = cur_error;
+            }
+            if (cur_error > 5e-3) {
+                printf("%dth test: %dth result has significant error, golden=%f, target=%f\n", test_id, i,
+                       golden->likelihoodData[i], target->likelihoodData[i]);
+                error_count++;
+            }
+        }
+    }
+    if (error_count > 0) {
+        printf("%d out of %d have significant error\n", error_count, size);
+    }
+
+    total_error_count = total_error_count + error_count;
+    return 0;
+}
+
+double countCells(pairhmmInput* input) {
+    double numCell = 0;
+    for (int i = 0; (size_t)i < input->reads.size(); i++) {
+        for (int j = 0; (size_t)j < input->haps.size(); j++) {
+            double cur_numCell = input->reads[i].bases.size() * input->haps[j].bases.size();
+            numCell += cur_numCell;
+        }
+    }
+    return numCell;
+}
+
+void printHelp() {
+    printf("./test_bin -v or ./test_bin --version: get the compatible platform for current host binary\n");
+    printf("./test_bin -h or ./test_bin --help: print the help information\n");
+    printf("./test_bin [bitstream filename] --real [real cases folder]: run real tests\n");
+    printf("./test_bin [bitstream filename] --syn [syn cases number]: run synthetic tests\n");
+}
diff --git a/genomics/L2/tests/pairhmm_8x8/src/headers.h b/genomics/L2/tests/pairhmm_8x8/src/headers.h
new file mode 100644
index 0000000000..940b5b0e09
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/src/headers.h
@@ -0,0 +1,70 @@
+/*Copyright (c) 2012 The Broad Institute
+
+*Permission is hereby granted, free of charge, to any person
+*obtaining a copy of this software and associated documentation
+*files (the "Software"), to deal in the Software without
+*restriction, including without limitation the rights to use,
+*copy, modify, merge, publish, distribute, sublicense, and/or sell
+*copies of the Software, and to permit persons to whom the
+*Software is furnished to do so, subject to the following
+*conditions:
+
+*The above copyright notice and this permission notice shall be
+*included in all copies or substantial portions of the Software.
+
+*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef COMMON_HEADERS_H
+#define COMMON_HEADERS_H
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include <sys/time.h>
+
+// #include <immintrin.h>
+//#include <emmintrin.h>
+// #include <omp.h>
+
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <fstream>
+#include <vector>
+#include <map>
+#include <set>
+#include <cstdio>
+#include <cstring>
+#include <cstdlib>
+#include <cmath>
+#include <fenv.h>
+
+extern uint64_t exceptions_array[128];
+extern FILE* g_debug_fptr;
+#define STORE_FP_EXCEPTIONS(flagp, exceptions_array)                 \
+    fegetexceptflag(&flagp, FE_ALL_EXCEPT | __FE_DENORM);            \
+    exceptions_array[FE_INVALID] += ((flagp & FE_INVALID));          \
+    exceptions_array[__FE_DENORM] += ((flagp & __FE_DENORM) >> 1);   \
+    exceptions_array[FE_DIVBYZERO] += ((flagp & FE_DIVBYZERO) >> 2); \
+    exceptions_array[FE_OVERFLOW] += ((flagp & FE_OVERFLOW) >> 3);   \
+    exceptions_array[FE_UNDERFLOW] += ((flagp & FE_UNDERFLOW) >> 4); \
+    feclearexcept(FE_ALL_EXCEPT | __FE_DENORM);
+
+#define CONVERT_AND_PRINT(X) \
+    g_converter.f = (X);     \
+    fwrite(&(g_converter.i), 4, 1, g_debug_fptr);
+
+#endif
diff --git a/genomics/L2/tests/pairhmm_8x8/src/host.cpp b/genomics/L2/tests/pairhmm_8x8/src/host.cpp
new file mode 100644
index 0000000000..54266535be
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/src/host.cpp
@@ -0,0 +1,174 @@
+/*
+ * (c) Copyright 2022 Xilinx, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#include "pairHmm.hpp"
+#include "gensynthdata.hpp"
+
+int main(int argc, char* argv[]) {
+    if (argc == 2) {
+        if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) {
+            // printVersion();
+            return 0;
+        } else if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0) {
+            printHelp();
+            return 0;
+        } else {
+            printf("Invalid argument list\n");
+            printHelp();
+            return EXIT_FAILURE;
+        }
+    }
+
+    if (argc != 4) {
+        printf("Invalid argument list\n");
+        printHelp();
+        return EXIT_FAILURE;
+    }
+
+    std::string input_common_prefix;
+    std::string output_common_prefix;
+    DIR* dir;
+    struct dirent* ent;
+
+    bool synthetic = false;
+    int test_num = 0;
+
+    if (strcmp(argv[2], "--real") == 0) {
+        if ((dir = opendir(argv[3])) != NULL) {
+            while ((ent = readdir(dir)) != NULL) {
+                test_num++;
+            }
+            closedir(dir);
+        } else {
+            printf("cannot find dir %s\n", argv[3]);
+            return EXIT_FAILURE;
+        }
+        printf("find %d files in dir %s\n", test_num, argv[3]);
+
+        input_common_prefix = std::string(argv[3]) + std::string("input");
+        output_common_prefix = std::string(argv[3]) + std::string("output");
+        test_num = (test_num - 2) / 2;
+    } else if (strcmp(argv[2], "--syn") == 0) {
+        try {
+            test_num = xcl::is_emulation() ? 4 : std::stoi(std::string(argv[3]));
+        } catch (const std::invalid_argument& ia) {
+            std::cout << "Invalid synthetic cases number " << ia.what() << '\n';
+            return EXIT_FAILURE;
+        }
+        synthetic = true;
+    } else {
+        printHelp();
+        return EXIT_FAILURE;
+    }
+
+    int totalTestNum = test_num;
+
+    avg_numReads = 0;
+    avg_numHaps = 0;
+    int total_numHaps = 0;
+    case_counter = 0;
+    pairhmmInput* input;
+    pairhmmOutput* golden_output;
+    pairhmmOutput* target_output;
+    input = new pairhmmInput();
+    golden_output = new pairhmmOutput();
+    target_output = new pairhmmOutput();
+    struct timespec time1, time2, time_diff;
+    double total_avx_count = 0;
+    double total_fpga_count = 0;
+    double total_avx_time = 0;
+    double total_fpga_time = 0;
+    double total_kernel_time = 0;
+    double cur_time = 0;
+    double total_avx_cells = 0;
+    double total_fpga_cells = 0;
+    double total_fpga_results = 0;
+    double total_avx_results = 0;
+    double current_cells = 0;
+    double error_count = 0;
+    double largest_error = 0;
+    float peak_GCUPS = 0;
+
+    xilPairHMM* accelPhmm = new xilPairHMM(argv[1], true);
+
+    for (int i = 0; i < totalTestNum; ++i) {
+        if (synthetic == false) {
+            std::string test_id = std::to_string(i);
+            std::string input_filename = input_common_prefix + test_id;
+            GetInputs(input, input_filename);
+            std::string output_filename = output_common_prefix + test_id;
+            GetOutputs(golden_output, input->reads.size() * input->haps.size(), output_filename);
+        } else {
+            GenInputs(input, i);
+        }
+
+        bool usedFPGA = true;
+        current_cells = countCells(input);
+        clock_gettime(CLOCK_REALTIME, &time1);
+        accelPhmm->computePairhmm(input, target_output, usedFPGA);
+        clock_gettime(CLOCK_REALTIME, &time2);
+
+        time_diff = diff_time(time1, time2);
+
+        cur_time = (long)(time_diff.tv_sec * 1e9 + time_diff.tv_nsec);
+
+        if (current_cells / cur_time > peak_GCUPS) peak_GCUPS = current_cells / cur_time;
+        if (usedFPGA) {
+
+            total_fpga_time += cur_time;
+            total_fpga_cells += current_cells;
+            total_fpga_results += input->reads.size() * input->haps.size();
+            total_fpga_count += 1;
+            total_numHaps += input->haps.size();
+        } else {
+
+            total_avx_time += cur_time;
+            total_avx_cells += current_cells;
+            total_avx_results += input->reads.size() * input->haps.size();
+            total_avx_count += 1;
+        }
+        //     cmp(target_output, golden_output, input->reads.size() * input->haps.size(), i, false, error_count,
+        //         largest_error);
+
+        printf("%d test passed, overall time is %f secs, overall GCUPS is %f, current GCUPS is %f, peak GCUPS is %f\n",
+               i, (total_avx_time + total_fpga_time) * (1e-9),
+               (total_fpga_cells + total_avx_cells) / (total_fpga_time + total_avx_time), current_cells / cur_time,
+               peak_GCUPS);
+        input->reads.clear();
+        input->haps.clear();
+        golden_output->likelihoodData.clear();
+        target_output->likelihoodData.clear();
+    }
+#ifdef FPGA
+    total_kernel_time = accelPhmm->get_kernel_time();
+    delete accelPhmm;
+    printf("%f out of %d tests use FPGA, use %f secs, FPGA GCUPs is %f, avg numHaps = %f\n", total_fpga_count,
+           totalTestNum, total_fpga_time * 1e-9, total_fpga_cells / total_fpga_time,
+           (float)total_numHaps / (float)total_fpga_count);
+    printf("pure kernel GCUPs is %f\n", total_fpga_cells / total_kernel_time);
+#endif
+    delete input;
+    delete golden_output;
+    delete target_output;
+    printf("%f out of %d tests use AVX, use %f secs, AVX GCUPs is %f\n", total_avx_count, totalTestNum,
+           total_avx_time * 1e-9, total_avx_cells / total_avx_time);
+    printf(
+        "%e out of %e FPGA run results have significant errors, total number of results is %e, error rate is %e, "
+        "largest error is %e\n",
+        error_count, total_fpga_results, (total_fpga_results + total_avx_results), error_count / total_fpga_results,
+        largest_error);
+    return 0;
+}
diff --git a/genomics/L2/tests/pairhmm_8x8/src/host_type.h b/genomics/L2/tests/pairhmm_8x8/src/host_type.h
new file mode 100644
index 0000000000..62ab9cdc76
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/src/host_type.h
@@ -0,0 +1,152 @@
+/*
+ * (c) Copyright 2022 Xilinx, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef HOST_TYPE_H
+#define HOST_TYPE_H
+#if 0
+#if defined(_MSC_VER)
+#include <intrin.h> // SIMD intrinsics for Windows
+#else
+#include <x86intrin.h> // SIMD intrinsics for GCC
+#endif
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <time.h>
+#include <string>
+
+#include "headers.h"
+#include "common.hpp"
+
+#define CAT(X, Y) X##Y
+#define CONCAT(X, Y) CAT(X, Y)
+
+#define MIN_ACCEPTED 1e-28f
+#define NUM_DISTINCT_CHARS 5
+#define AMBIG_CHAR 4
+
+#define TRANS_PROB_ARRAY_LENGTH 6
+
+#define TRANSITION_matchToMatch 0
+#define TRANSITION_indelToMatch 1
+#define TRANSITION_matchToInsertion 2
+#define TRANSITION_insertionToInsertion 3
+#define TRANSITION_matchToDeletion 4
+#define TRANSITION_deletionToDeletion 5
+
+#define MM 0
+#define GapM 1
+#define MX 2
+#define XX 3
+#define MY 4
+#define YY 5
+
+#define MAX_QUAL 254
+#define MAX_JACOBIAN_TOLERANCE 8.0
+#define JACOBIAN_LOG_TABLE_STEP 0.0001
+#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP)
+#define MAXN 70000
+#define LOG10_CACHE_SIZE (4 * MAXN) // we need to be able to go up to 2*(2N) when calculating some of the coefficients
+#define JACOBIAN_LOG_TABLE_SIZE ((int)(MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1)
+
+#define SET_MATCH_TO_MATCH_PROB(output, insQual, delQual)                                                              \
+    {                                                                                                                  \
+        int minQual = delQual;                                                                                         \
+        int maxQual = insQual;                                                                                         \
+        if (insQual <= delQual) {                                                                                      \
+            minQual = insQual;                                                                                         \
+            maxQual = delQual;                                                                                         \
+        }                                                                                                              \
+        (output) = (MAX_QUAL < maxQual)                                                                                \
+                       ? ((NUMBER)1.0) -                                                                               \
+                             ctx.POW(((NUMBER)10),                                                                     \
+                                     ctx.approximateLog10SumLog10(((NUMBER)-0.1) * minQual, ((NUMBER)-0.1) * maxQual)) \
+                       : ctx.matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual];                             \
+    }
+
+using namespace std;
+
+struct timespec diff_time(struct timespec start, struct timespec end);
+
+typedef struct {
+    int rslen, haplen;
+    const char *q, *i, *d, *c;
+    const char *hap, *rs;
+} testcase;
+
+typedef struct {
+    int hapLen;
+    float oneDivHapLen;
+} hapLenPack;
+
+typedef struct {
+    uint32_t scores; // 31:28 readBases, 27:21 readQuals, 20:14 insertionGOP, 13:7 deletionGOP, 6:0 overallGCP
+    float m2m;
+} readDataPack;
+
+typedef struct {
+    uint64_t readInfo[MAX_RSDATA_NUM / READ_BLOCK_SIZE];
+    readDataPack readData[MAX_RSDATA_NUM][MAX_READ_LEN];
+    hapLenPack hapDataLen[MAX_HAPDATA_NUM];
+    uint16_t hapData[MAX_HAPDATA_NUM / HAP_BLOCK_SIZE][MAX_HAP_LEN];
+    uint16_t numReadPU[64]; // numRead per PU
+    uint64_t iterNum[64];   // iterNum per PU
+} InputDataPackOpt;
+
+typedef struct {
+    InputDataPackOpt dataPack;
+    int numRead;
+    int numHap;
+} FPGAInput;
+
+typedef struct {
+    string bases;
+    string _q;
+    string _i;
+    string _d;
+    string _c;
+} Read;
+
+typedef struct { string bases; } Hap;
+
+typedef struct {
+    vector<Read> reads;
+    vector<Hap> haps;
+} pairhmmInput;
+
+typedef struct { vector<double> likelihoodData; } pairhmmOutput;
+
+class ConvertChar {
+    static uint8_t conversionTable[255];
+
+   public:
+    static void init() {
+        assert(NUM_DISTINCT_CHARS == 5);
+        assert(AMBIG_CHAR == 4);
+
+        conversionTable['A'] = 0;
+        conversionTable['C'] = 1;
+        conversionTable['T'] = 2;
+        conversionTable['G'] = 3;
+        conversionTable['N'] = 4;
+    }
+
+    static inline uint8_t get(uint8_t input) { return conversionTable[input]; }
+};
+
+#endif // PAIRHMM_COMMON_H
diff --git a/genomics/L2/tests/pairhmm_8x8/src/pairHmm.cpp b/genomics/L2/tests/pairhmm_8x8/src/pairHmm.cpp
new file mode 100644
index 0000000000..a62e0c0c51
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/src/pairHmm.cpp
@@ -0,0 +1,1185 @@
+/*
+ * (c) Copyright 2022 Xilinx, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#include <vector>
+#include <string>
+#include <string.h>
+#include <fstream>
+#include <algorithm>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <malloc.h>
+#include "pairHmm.hpp"
+#include "m2m.hpp"
+
+uint8_t ConvertChar::conversionTable[255];
+bool cmpReadInfo(struct readInfo a, struct readInfo b) {
+    return (a.new_rows > b.new_rows);
+}
+
+struct timespec diff_time(struct timespec start, struct timespec end) {
+    struct timespec temp;
+    if ((end.tv_nsec - start.tv_nsec) < 0) {
+        temp.tv_sec = end.tv_sec - start.tv_sec - 1;
+        temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
+    } else {
+        temp.tv_sec = end.tv_sec - start.tv_sec;
+        temp.tv_nsec = end.tv_nsec - start.tv_nsec;
+    }
+    return temp;
+}
+
+/* int xilPairHMM::computePairhmmBaseline(pairhmmInput* input, pairhmmOutput* output, bool use_double) {
+    // baseline pairhmm algorithm ported from GATK v3.7 gatk3/public/VectorPairHMM/src/main/c++/baseline.cc
+
+    testcase testCase;
+    output->likelihoodData.clear();
+    for (int i = 0; (size_t)i < input->reads.size(); i++) {
+        testCase.rslen = input->reads[i].bases.size();
+        testCase.rs = input->reads[i].bases.c_str();
+        testCase.i = input->reads[i]._i.c_str();
+        testCase.d = input->reads[i]._d.c_str();
+        testCase.c = input->reads[i]._c.c_str();
+        testCase.q = input->reads[i]._q.c_str();
+        for (int j = 0; (size_t)j < input->haps.size(); j++) {
+            testCase.haplen = input->haps[j].bases.size();
+            testCase.hap = input->haps[j].bases.c_str();
+            double result_final = 0;
+            float result_float = use_double ? 0.0f : compute_full_prob_baseline<float>(&testCase, NULL);
+            if (result_float < MIN_ACCEPTED) {
+                double result_double = compute_full_prob_baseline<double>(&testCase, NULL);
+                result_final = log10(result_double) - g_ctxd.LOG10_INITIAL_CONSTANT;
+            } else {
+                result_final = (double)(log10f(result_float) - g_ctxf.LOG10_INITIAL_CONSTANT);
+            }
+            output->likelihoodData.push_back(result_final);
+        }
+    }
+    return 0;
+} */
+
+/* int xilPairHMM::computePairhmmAVX(pairhmmInput* input, pairhmmOutput* output, bool use_double) {
+    testcase testCase;
+    output->likelihoodData.clear();
+    for (int i = 0; (size_t)i < input->reads.size(); i++) {
+        testCase.rslen = input->reads[i].bases.size();
+        testCase.rs = input->reads[i].bases.c_str();
+        testCase.i = input->reads[i]._i.c_str();
+        testCase.d = input->reads[i]._d.c_str();
+        testCase.c = input->reads[i]._c.c_str();
+        testCase.q = input->reads[i]._q.c_str();
+        for (int j = 0; (size_t)j < input->haps.size(); j++) {
+            testCase.haplen = input->haps[j].bases.size();
+            testCase.hap = input->haps[j].bases.c_str();
+            double result_final = 0;
+            float result_float = use_double ? 0.0f : compute_fp_avxs(&testCase);
+            if (result_float < MIN_ACCEPTED) {
+                double result_double = compute_fp_avxd(&testCase);
+                result_final = log10(result_double) - g_ctxd.LOG10_INITIAL_CONSTANT;
+            } else {
+                result_final = (double)(log10f(result_float) - g_ctxf.LOG10_INITIAL_CONSTANT);
+            }
+            output->likelihoodData.push_back(result_final);
+        }
+    }
+    return 0;
+} */
+
+double countCell(pairhmmInput* input, short maxCols, bool& violate) {
+    double numCell = 0;
+    for (int i = 0; (size_t)i < input->reads.size(); i++) {
+        for (int j = 0; (size_t)j < input->haps.size(); j++) {
+            int cur_numCell = input->reads[i].bases.size() * input->haps[j].bases.size();
+            if (input->reads[i].bases.size() * (input->reads[i].bases.size() + maxCols) <=
+                1 + READ_BLOCK_SIZE * MAX_READ_LEN) {
+                violate = true;
+                return numCell;
+            }
+            numCell += cur_numCell;
+        }
+    }
+    return numCell;
+}
+
+bool worthFPGA(pairhmmInput* input, short maxCols, double cellNum) {
+    if (input->reads.size() < DIE_NUM) return false;
+    bool violate = false;
+    if (cellNum <= 0) cellNum = countCell(input, maxCols, violate);
+    if (violate) return false;
+    double AVX_perf = AVX_PERF;
+    double FPGA_perf = FPGA_PERF;
+
+    double AVX_time = cellNum / AVX_perf; // unit nano seconds, 0.6 GCUPS
+    double data_prepare_time = 5e5;
+    ;
+    double CPU_to_DRAM = 1 * 1e6; // DIE_NUM * 300000 + DIE_NUM * sizeof(FPGAInput) / 3.0;
+    double DRAM_to_FPGA = 0;
+    double DRAM_to_CPU = 5e5 + (input->reads.size() * input->haps.size() * 4) / 3.0;
+    double recompute_estm = 1e6;
+    double FPGA_compute = cellNum / FPGA_perf;
+    if (AVX_time > (CPU_to_DRAM + DRAM_to_FPGA + DRAM_to_CPU + FPGA_compute + data_prepare_time + recompute_estm)) {
+        return true;
+    } else
+        return false;
+}
+
+int xilPairHMM::get_max_rsdata_num() {
+    int slr0_pe_num = SLR0_PE_NUM;
+    int slr1_pe_num = SLR1_PE_NUM;
+    int slr2_pe_num = SLR2_PE_NUM;
+    int max_pe_num = slr0_pe_num;
+    if (max_pe_num < slr1_pe_num) {
+        max_pe_num = slr1_pe_num;
+    }
+    if (max_pe_num < slr2_pe_num) {
+        max_pe_num = slr2_pe_num;
+    }
+    return (int)floor((float)MAX_RSDATA_NUM / (float)max_pe_num * (float)TOTAL_PE_NUM);
+}
+
+void xilPairHMM::convert_read_input(readDataPack* cur_host_read, Read* input, int idx) {
+    uint8_t _rs = ConvertChar::get(input->bases[idx]);
+    uint8_t _q = input->_q[idx];
+    uint8_t _i = input->_i[idx];
+    uint8_t _d = input->_d[idx];
+    uint8_t _c = input->_c[idx];
+    uint32_t score = ((uint32_t)(_rs & 0x7) << 28) | ((uint32_t)(_q & 127) << 21) | ((uint32_t)(_i & 127) << 14) |
+                     ((uint32_t)(_d & 127) << 7) | (uint32_t)(_c & 127);
+    float m2m_val = m2m_table[128 * _i + _d];
+    cur_host_read->scores = score;
+    cur_host_read->m2m = m2m_val;
+}
+
+void xilPairHMM::distributeReads(pairhmmInput* input,
+                                 int read_base_index,
+                                 int hap_base_index,
+                                 int& numRead0,
+                                 int& numRead1,
+                                 int& numRead2,
+                                 int& totalNumRead,
+                                 int& totalNumHap,
+                                 short maxCols,
+                                 bool& violate) {
+    // first get the total actual cells
+    double numCell = 0;
+    for (int i = read_base_index; i < read_base_index + totalNumRead; i++) {
+        for (int j = hap_base_index; j < hap_base_index + totalNumHap; j++) {
+            int amended_read_length = input->reads[i].bases.size() + 1;
+            int new_rows = amended_read_length;
+            if (new_rows < DEP_DIST) new_rows = DEP_DIST;
+            int cur_numCell = (new_rows + 1) * (amended_read_length + maxCols);
+            numCell += cur_numCell;
+        }
+    }
+    curNumCell = numCell;
+    //  if (!worthFPGA(input, maxCols, numCell)) {
+    //      violate = true;
+    //      return;
+    //  }
+    float SLR_numCells[3];
+    if (DIE_NUM == 2) {
+        SLR_numCells[0] = floor(numCell * SLR0_PE_NUM / (SLR0_PE_NUM + SLR1_PE_NUM));
+        SLR_numCells[1] = numCell - SLR_numCells[0];
+        SLR_numCells[2] = 0;
+    } else {
+        SLR_numCells[0] = floor(numCell * SLR0_PE_NUM / (SLR0_PE_NUM + SLR1_PE_NUM + SLR2_PE_NUM));
+        SLR_numCells[1] = floor(numCell * SLR1_PE_NUM / (SLR0_PE_NUM + SLR1_PE_NUM + SLR2_PE_NUM));
+        SLR_numCells[2] = numCell - SLR_numCells[0] - SLR_numCells[1];
+    }
+    float curCells = 0.0;
+    int readCount = 0;
+    int i = 0;
+    for (i = read_base_index; i < read_base_index + totalNumRead; i++) {
+        for (int j = hap_base_index; j < hap_base_index + totalNumHap; j++) {
+            int amended_read_length = input->reads[i].bases.size() + 1;
+            int new_rows = amended_read_length;
+            if (new_rows < DEP_DIST) new_rows = DEP_DIST;
+            curCells += (new_rows + 1) * (amended_read_length + maxCols);
+            ;
+        }
+        readCount++;
+        if (curCells >= SLR_numCells[0]) {
+            numRead0 = readCount;
+            break;
+        }
+    }
+    if (numRead0 <= 0) {
+        violate = true;
+        return;
+    }
+    if (DIE_NUM == 2) {
+        numRead1 = totalNumRead - numRead0;
+        if (numRead1 <= 0) violate = true;
+        numRead2 = 0;
+        return;
+    } else {
+        curCells = 0;
+        readCount = 0;
+        i++;
+        for (; i < read_base_index + totalNumRead; i++) {
+            for (int j = hap_base_index; j < hap_base_index + totalNumHap; j++) {
+                int amended_read_length = input->reads[i].bases.size() + 1;
+                int new_rows = amended_read_length;
+                if (new_rows < DEP_DIST) new_rows = DEP_DIST;
+                curCells += (new_rows + 1) * (amended_read_length + maxCols);
+                ;
+            }
+            readCount++;
+            if (curCells >= SLR_numCells[1]) {
+                numRead1 = readCount;
+                break;
+            }
+        }
+        numRead2 = totalNumRead - numRead0 - numRead1;
+        if (numRead2 <= 0) violate = true;
+    }
+}
+
+/*void xilPairHMM::computePairhmmAVXSegment(pairhmmInput* input,
+                                          int read_base_index,
+                                          int hap_base_index,
+                                          int cur_numRead,
+                                          int cur_numHap,
+                                          vector<float>& output) {
+    testcase testCase;
+    for (int i = read_base_index; i < read_base_index + cur_numRead; i++) {
+        testCase.rslen = input->reads[i].bases.size();
+        testCase.rs = input->reads[i].bases.c_str();
+        testCase.i = input->reads[i]._i.c_str();
+        testCase.d = input->reads[i]._d.c_str();
+        testCase.c = input->reads[i]._c.c_str();
+        testCase.q = input->reads[i]._q.c_str();
+        for (int j = hap_base_index; j < hap_base_index + cur_numHap; j++) {
+            testCase.haplen = input->haps[j].bases.size();
+            testCase.hap = input->haps[j].bases.c_str();
+            output[i * input->haps.size() + j] = compute_fp_avxs(&testCase);
+        }
+    }
+}
+*/
+void xilPairHMM::sortReads(pairhmmInput* input,
+                           int read_base_index,
+                           int cur_numRead,
+                           int cur_numHap,
+                           short maxCols,
+                           int slr_pu_num[3],
+                           bool& violate) {
+    vector<readInfo> sortedReadInfo[3];
+    int read_start_index = 0;
+    for (int k = 0; k < DIE_NUM; k++) {
+        for (int i = 0; i < host_input[k]->numRead; i += 2) {
+            readInfo curInfo;
+            int cur_read_len = input->reads[i + read_start_index + read_base_index].bases.size();
+            //   if (cur_read_len > MAX_READ_LEN) {
+            //       violate = true;
+            //       return;
+            //   }
+            curInfo.readLen[0] = cur_read_len;
+            curInfo.big_rows = cur_read_len + 1;
+            curInfo.oneOrTwo = false;
+            curInfo.resultOffset = cur_numHap * i;
+            curInfo.readID[0] = i;
+            if (i + 1 < host_input[k]->numRead) {
+                cur_read_len = input->reads[i + 1 + read_start_index + read_base_index].bases.size();
+                //   if (cur_read_len > MAX_READ_LEN) {
+                //       violate = true;
+                //       return;
+                //   }
+                curInfo.readLen[1] = cur_read_len;
+                curInfo.oneOrTwo = true;
+                if (cur_read_len + 1 > curInfo.big_rows) curInfo.big_rows = cur_read_len + 1;
+                curInfo.readID[1] = i + 1;
+            }
+            if (curInfo.big_rows < DEP_DIST)
+                curInfo.new_rows = DEP_DIST;
+            else
+                curInfo.new_rows = curInfo.big_rows;
+            curInfo.curIterNum = (curInfo.new_rows + 1) * (curInfo.big_rows + maxCols);
+            curInfo.infoPacked = curInfo.readLen[0] + (curInfo.readLen[1] << 8) +
+                                 ((uint64_t)curInfo.resultOffset << 16) + (((uint64_t)curInfo.curIterNum - 1) << 37) +
+                                 ((uint64_t)curInfo.oneOrTwo << 58);
+            sortedReadInfo[k].push_back(curInfo);
+        }
+
+        sort(sortedReadInfo[k].begin(), sortedReadInfo[k].end(), cmpReadInfo);
+
+        for (int j = 0; (size_t)j < sortedReadInfo[k].size(); j += slr_pu_num[k]) {
+            int upper_bound = slr_pu_num[k];
+            if ((size_t)(j + upper_bound) >= sortedReadInfo[k].size()) upper_bound = sortedReadInfo[k].size() - j;
+            for (int m = 0; m < upper_bound / 2; m++) {
+                swap(sortedReadInfo[k][j + m], sortedReadInfo[k][j + upper_bound - 1 - m]);
+            }
+        }
+        read_start_index += host_input[k]->numRead;
+    }
+    read_start_index = 0;
+    for (int slr_id = 0; slr_id < DIE_NUM; slr_id++) {
+        for (int i = 0; (size_t)i < sortedReadInfo[slr_id].size(); i++) {
+            for (int j = 0; j < sortedReadInfo[slr_id][i].readLen[0]; j++) {
+                convert_read_input(
+                    &(host_input[slr_id]->dataPack.readData[READ_BLOCK_SIZE * i][j]),
+                    &(input->reads[sortedReadInfo[slr_id][i].readID[0] + read_base_index + read_start_index]), j);
+            }
+            if (sortedReadInfo[slr_id][i].oneOrTwo) {
+                for (int j = 0; j < sortedReadInfo[slr_id][i].readLen[1]; j++) {
+                    convert_read_input(
+                        &(host_input[slr_id]->dataPack.readData[READ_BLOCK_SIZE * i + 1][j]),
+                        &(input->reads[sortedReadInfo[slr_id][i].readID[1] + read_base_index + read_start_index]), j);
+                }
+            }
+        }
+        read_start_index += host_input[slr_id]->numRead;
+    }
+    if (violate) return;
+
+    for (int slr_id = 0; slr_id < 3; slr_id++) {
+        int PU_id = 0;
+        for (int i = 0; (size_t)i < sortedReadInfo[slr_id].size(); i++) {
+            host_input[slr_id]->dataPack.iterNum[PU_id] += sortedReadInfo[slr_id][i].curIterNum;
+            host_input[slr_id]->dataPack.readInfo[i] = sortedReadInfo[slr_id][i].infoPacked;
+            host_input[slr_id]->dataPack.numReadPU[PU_id] += (sortedReadInfo[slr_id][i].oneOrTwo) ? 2 : 1;
+            PU_id = (PU_id == slr_pu_num[slr_id] - 1) ? 0 : PU_id + 1;
+        }
+    }
+
+    int hap_batch_size = cur_numHap / HAP_BLOCK_SIZE + (cur_numHap % HAP_BLOCK_SIZE != 0);
+    for (int slr_id = 0; slr_id < 3; slr_id++) {
+        long long total_slr_itercount = 0;
+        for (int k = 0; k < slr_pu_num[slr_id]; k++) {
+            host_input[slr_id]->dataPack.iterNum[k] *= hap_batch_size;
+            total_slr_itercount += host_input[slr_id]->dataPack.iterNum[k];
+        }
+    }
+}
+
+void xilPairHMM::update_host_inputs_new(
+    pairhmmInput* input, int read_base_index, int hap_base_index, int cur_numRead, int cur_numHap, bool& violate) {
+    struct timespec time1, time2, time_diff;
+    clock_gettime(CLOCK_REALTIME, &time1);
+
+    for (int i = 0; i < DIE_NUM; i++) {
+        host_input[i]->numHap = cur_numHap;
+    }
+
+    short maxCols = 0;
+    for (int i = 0; i < cur_numHap; i++) {
+        //   if (input->haps[i + hap_base_index].bases.size() > MAX_HAP_LEN) {
+        //      violate = true;
+        //      return;
+        //  }
+        short hapLenTmp = input->haps[i + hap_base_index].bases.size();
+        if (hapLenTmp + 1 > maxCols) {
+            maxCols = hapLenTmp + 1;
+        }
+        for (int j = 0; j < DIE_NUM; j++) {
+            host_input[j]->dataPack.hapDataLen[i].hapLen = hapLenTmp;
+            host_input[j]->dataPack.hapDataLen[i].oneDivHapLen = g_ctxf.INITIAL_CONSTANT / (float)(hapLenTmp);
+        }
+    }
+
+    for (int i = 0; i < MAX_HAPDATA_NUM / HAP_BLOCK_SIZE; i++) {
+        for (int j = 0; j < MAX_HAP_LEN; j++) {
+            uint16_t tmp = 0;
+            for (int k = 0; k < HAP_BLOCK_SIZE; k++) {
+                char cur_base;
+                if (i * HAP_BLOCK_SIZE + k + hap_base_index >= (int)input->haps.size())
+                    cur_base = 'N';
+                else if ((size_t)j >= input->haps[i * HAP_BLOCK_SIZE + k + hap_base_index].bases.size())
+                    cur_base = 'N';
+                else {
+                    cur_base = input->haps[i * HAP_BLOCK_SIZE + k + hap_base_index].bases[j];
+                }
+                tmp = tmp | ((uint16_t)(ConvertChar::get(cur_base)) << (4 * k));
+            }
+            for (int k = 0; k < DIE_NUM; k++) {
+                host_input[k]->dataPack.hapData[i][j] = tmp;
+            }
+        }
+    }
+
+    // distribute reads to each PE
+    int slr_pu_num[3];
+    slr_pu_num[0] = SLR0_PE_NUM / (READ_BLOCK_SIZE * HAP_BLOCK_SIZE);
+    slr_pu_num[1] = SLR1_PE_NUM / (READ_BLOCK_SIZE * HAP_BLOCK_SIZE);
+    slr_pu_num[2] = SLR2_PE_NUM / (READ_BLOCK_SIZE * HAP_BLOCK_SIZE);
+
+    for (int i = 0; i < DIE_NUM; i++) {
+        for (int j = 0; j < 64; j++) {
+            host_input[i]->dataPack.numReadPU[j] = 0;
+            host_input[i]->dataPack.iterNum[j] = 0;
+        }
+    }
+
+    distributeReads(input, read_base_index, hap_base_index, host_input[0]->numRead, host_input[1]->numRead,
+                    host_input[2]->numRead, cur_numRead, cur_numHap, maxCols, violate);
+    printf("numRead0 = %d, numRead1 = %d, numRead2 = %d, total read = %d\n", host_input[0]->numRead,
+           host_input[1]->numRead, host_input[2]->numRead, cur_numRead);
+    if (violate) {
+        printf("Either one SLR has 0 reads or this segment is too small to be run on FPGA, switch back to AVX\n");
+        return;
+    }
+    sortReads(input, read_base_index, cur_numRead, cur_numHap, maxCols, slr_pu_num, violate);
+    clock_gettime(CLOCK_REALTIME, &time2);
+    time_diff = diff_time(time1, time2);
+    data_prepare_time += (double)(time_diff.tv_sec * 1e9 + time_diff.tv_nsec);
+}
+
+void xilPairHMM::update_host_inputs(
+    pairhmmInput* input, int read_base_index, int hap_base_index, int cur_numRead, int cur_numHap, bool& violate) {
+    short maxCols = 0;
+    for (int i = 0; i < cur_numHap; i++) {
+        if (input->haps[i + hap_base_index].bases.size() > MAX_HAP_LEN) {
+            violate = true;
+            return;
+        }
+        short hapLenTmp = input->haps[i + hap_base_index].bases.size();
+        if (hapLenTmp + 1 > maxCols) {
+            maxCols = hapLenTmp + 1;
+        }
+        for (int j = 0; j < DIE_NUM; j++) {
+            host_input[j]->dataPack.hapDataLen[i].hapLen = hapLenTmp;
+            host_input[j]->dataPack.hapDataLen[i].oneDivHapLen = g_ctxf.INITIAL_CONSTANT / (float)(hapLenTmp);
+        }
+    }
+    distributeReads(input, read_base_index, hap_base_index, host_input[0]->numRead, host_input[1]->numRead,
+                    host_input[2]->numRead, cur_numRead, cur_numHap, maxCols, violate);
+    printf("numRead0 = %d, numRead1 = %d, numRead2 = %d, total read = %d\n", host_input[0]->numRead,
+           host_input[1]->numRead, host_input[2]->numRead, cur_numRead);
+    if (violate) {
+        printf("Either one SLR has 0 reads or this segment is too small to be run on FPGA, switch back to AVX\n");
+        return;
+    }
+    for (int i = 0; i < DIE_NUM; i++) {
+        host_input[i]->numHap = cur_numHap;
+    }
+    uint8_t readDataLen[3][MAX_RSDATA_NUM];
+
+    for (int i = 0; i < cur_numRead; i++) {
+        int cur_read_len = input->reads[i + read_base_index].bases.size();
+        if (cur_read_len > MAX_READ_LEN) {
+            violate = true;
+            return;
+        }
+        if (i < host_input[0]->numRead) {
+            readDataLen[0][i] = cur_read_len;
+            for (int j = 0; j < cur_read_len; j++) {
+                convert_read_input(&(host_input[0]->dataPack.readData[i][j]), &(input->reads[i + read_base_index]), j);
+            }
+        } else if (i - host_input[0]->numRead < host_input[1]->numRead) {
+            int offset = i - host_input[0]->numRead;
+            readDataLen[1][offset] = cur_read_len;
+            for (int j = 0; j < cur_read_len; j++) {
+                convert_read_input(&(host_input[1]->dataPack.readData[offset][j]), &(input->reads[i + read_base_index]),
+                                   j);
+            }
+        } else if (DIE_NUM > 2) {
+            int offset = i - host_input[0]->numRead - host_input[1]->numRead;
+            readDataLen[2][offset] = cur_read_len;
+            for (int j = 0; j < cur_read_len; j++) {
+                convert_read_input(&(host_input[2]->dataPack.readData[offset][j]), &(input->reads[i + read_base_index]),
+                                   j);
+            }
+        }
+    }
+
+    for (int i = 0; i < MAX_HAPDATA_NUM / HAP_BLOCK_SIZE; i++) {
+        for (int j = 0; j < MAX_HAP_LEN; j++) {
+            uint32_t tmp = 0;
+            for (int k = 0; k < HAP_BLOCK_SIZE; k++) {
+                char cur_base;
+                if (i * HAP_BLOCK_SIZE + k + hap_base_index >= (int)input->haps.size())
+                    cur_base = 'N';
+                else if ((size_t)j >= input->haps[i * HAP_BLOCK_SIZE + k + hap_base_index].bases.size())
+                    cur_base = 'N';
+                else
+                    cur_base = input->haps[i * HAP_BLOCK_SIZE + k + hap_base_index].bases[j];
+                tmp = tmp | ((uint16_t)(ConvertChar::get(cur_base)) << (4 * k));
+            }
+            for (int k = 0; k < DIE_NUM; k++) {
+                host_input[k]->dataPack.hapData[i][j] = tmp;
+            }
+        }
+    }
+
+    // distribute reads to each PE
+    int slr_pu_num[3];
+    slr_pu_num[0] = SLR0_PE_NUM / (READ_BLOCK_SIZE * HAP_BLOCK_SIZE);
+    slr_pu_num[1] = SLR1_PE_NUM / (READ_BLOCK_SIZE * HAP_BLOCK_SIZE);
+    slr_pu_num[2] = SLR2_PE_NUM / (READ_BLOCK_SIZE * HAP_BLOCK_SIZE);
+    for (int i = 0; i < DIE_NUM; i++) {
+        for (int j = 0; j < 64; j++) {
+            host_input[i]->dataPack.numReadPU[j] = 0;
+            host_input[i]->dataPack.iterNum[j] = 0;
+        }
+    }
+
+    for (int slr_id = 0; slr_id < 3; slr_id++) {
+        uint64_t resultOffset = 0;
+        int PU_id = 0;
+        for (int i = 0; i < host_input[slr_id]->numRead; i += READ_BLOCK_SIZE) {
+            uint64_t curInfo = (resultOffset << 16) + (readDataLen[slr_id][i + 1] << 8) + readDataLen[slr_id][i];
+            bool oneOrTwo = (i + 1 < host_input[slr_id]->numRead);
+            uint8_t big_rows = readDataLen[slr_id][i] + 1;
+            if (oneOrTwo && big_rows < readDataLen[slr_id][i + 1] + 1) {
+                big_rows = readDataLen[slr_id][i + 1] + 1;
+            }
+            uint8_t new_rows = big_rows;
+            if (big_rows < DEP_DIST) new_rows = DEP_DIST;
+            uint64_t curIterNum = (new_rows + 1) * (big_rows + maxCols);
+            host_input[slr_id]->dataPack.iterNum[PU_id] += curIterNum;
+            curInfo += (((curIterNum - 1) << 37) + ((uint64_t)oneOrTwo << 58));
+            host_input[slr_id]->dataPack.readInfo[i / READ_BLOCK_SIZE] = curInfo;
+            host_input[slr_id]->dataPack.numReadPU[PU_id] += (i + 1 < host_input[slr_id]->numRead) ? 2 : 1;
+            PU_id = (PU_id == slr_pu_num[slr_id] - 1) ? 0 : PU_id + 1;
+            resultOffset += READ_BLOCK_SIZE * cur_numHap;
+        }
+    }
+    int hap_batch_size = cur_numHap / HAP_BLOCK_SIZE + (cur_numHap % HAP_BLOCK_SIZE != 0);
+    for (int slr_id = 0; slr_id < 3; slr_id++) {
+        long long total_slr_itercount = 0;
+        for (int k = 0; k < slr_pu_num[slr_id]; k++) {
+            host_input[slr_id]->dataPack.iterNum[k] *= hap_batch_size;
+            printf("slr %d, PU %d has %ld iterations in total\n", slr_id, k, host_input[slr_id]->dataPack.iterNum[k]);
+            total_slr_itercount += host_input[slr_id]->dataPack.iterNum[k];
+        }
+    }
+}
+
+int xilPairHMM::computePairhmmxil(pairhmmInput* input, pairhmmOutput* output, bool& usedFPGA) {
+    struct timespec time1, time2, time_diff;
+    short maxCols = 0;
+
+    for (int i = 0; (size_t)i < input->haps.size(); i++) {
+        short hapLenTmp = input->haps[i].bases.size();
+        if (hapLenTmp + 1 > maxCols) {
+            maxCols = hapLenTmp + 1;
+        }
+    }
+
+#if 0   
+if (!worthFPGA(input, maxCols, 0)) {
+        printf("Using AVX\n");
+        usedFPGA = false;
+        computePairhmmAVX(input, output, false);
+        return 0;
+    }
+#endif
+    output->likelihoodData.clear();
+
+    printf("Trying to use FPGA, numRead = %ld, numHap = %ld\n", input->reads.size(), input->haps.size());
+
+    int singleFP_violate_count = 0;
+    int numRead = input->reads.size();
+    int numHap = input->haps.size();
+    host_raw_output.resize(numRead * numHap);
+    int max_rsdata_num = MAX_RSDATA_NUM;
+    int max_hapdata_num = MAX_HAPDATA_NUM;
+    int numReadSeg = numRead / max_rsdata_num - (numRead % max_rsdata_num == 0) + 1;
+    int numHapSeg = numHap / max_hapdata_num - (numHap % max_hapdata_num == 0) + 1;
+    bool violate = false;
+    long long base = 0;
+    int read_base_index = 0;
+    int hap_base_index = 0;
+
+    for (int i = 0; i < numReadSeg; i++) {
+        int cur_numRead = i + 1 < numReadSeg ? max_rsdata_num : numRead - i * max_rsdata_num;
+        hap_base_index = 0;
+        for (int j = 0; j < numHapSeg; j++) {
+            base = read_base_index * numHap + hap_base_index;
+            int cur_numHap = j + 1 < numHapSeg ? max_hapdata_num : numHap - j * max_hapdata_num;
+            update_host_inputs_new(input, read_base_index, hap_base_index, cur_numRead, cur_numHap, violate);
+            // if (violate) {
+            // computePairhmmAVXSegment(input, read_base_index, hap_base_index, cur_numRead, cur_numHap,
+            //                          host_raw_output);
+            //} else {
+            usedFPGA = true;
+            if (this->is_seq()) {
+                computePairhmmFPGASeq((char*)&(host_input[0]->dataPack), host_input[0]->numRead, host_input[0]->numHap,
+                                      host_output[0], (char*)&(host_input[1]->dataPack), host_input[1]->numRead,
+                                      host_input[1]->numHap, host_output[1], (char*)&(host_input[2]->dataPack),
+                                      host_input[2]->numRead, host_input[2]->numHap, host_output[2]);
+            } else {
+                computePairhmmFPGAOverlap(
+                    (char*)&(host_input[0]->dataPack), host_input[0]->numRead, host_input[0]->numHap, host_output[0],
+                    (char*)&(host_input[1]->dataPack), host_input[1]->numRead, host_input[1]->numHap, host_output[1],
+                    (char*)&(host_input[2]->dataPack), host_input[2]->numRead, host_input[2]->numHap, host_output[2]);
+            }
+
+            for (int k = 0; k < cur_numHap * cur_numRead; k++) {
+                float cur_float_result;
+                if (k < host_input[0]->numRead * cur_numHap)
+                    cur_float_result = host_output[0][k];
+                else if (k - host_input[0]->numRead * cur_numHap < host_input[1]->numRead * cur_numHap)
+                    cur_float_result = host_output[1][k - host_input[0]->numRead * cur_numHap];
+                else
+                    cur_float_result =
+                        host_output[2][k - host_input[0]->numRead * cur_numHap - host_input[1]->numRead * cur_numHap];
+                host_raw_output[base + (k / cur_numHap) * numHap + (k % cur_numHap)] = cur_float_result;
+            }
+            // }
+            hap_base_index += cur_numHap;
+        }
+        read_base_index += cur_numRead;
+    }
+
+    clock_gettime(CLOCK_REALTIME, &time1);
+
+    for (int k = 0; k < numRead * numHap; k++) {
+        //      if (host_raw_output[k] < MIN_ACCEPTED) {
+        //          singleFP_violate_count++;
+        //          testcase testCase;
+        //          int ii = k / numHap;
+        //          int jj = k % numHap;
+        //          testCase.rslen = input->reads[ii].bases.size();
+        //        testCase.rs = input->reads[ii].bases.c_str();
+        //       testCase.i = input->reads[ii]._i.c_str();
+        //       testCase.d = input->reads[ii]._d.c_str();
+        //       testCase.q = input->reads[ii]._q.c_str();
+        //       testCase.c = input->reads[ii]._c.c_str();
+        //       testCase.haplen = input->haps[jj].bases.size();
+        //       testCase.hap = input->haps[jj].bases.c_str();
+        //       double result_double = compute_fp_avxd(&testCase);
+        //       output->likelihoodData.push_back(log10(result_double) - g_ctxd.LOG10_INITIAL_CONSTANT);
+        //   } else {
+
+        output->likelihoodData.push_back(log10(host_raw_output[k]) - g_ctxf.LOG10_INITIAL_CONSTANT);
+        //  }
+    }
+
+    clock_gettime(CLOCK_REALTIME, &time2);
+    time_diff = diff_time(time1, time2);
+    recompute_time += (double)(time_diff.tv_sec * 1e9 + time_diff.tv_nsec);
+    printf("This FPGA run is done, %d of %d single FP is overlowed and recomputed using AVX double precsion\n",
+           singleFP_violate_count, numRead * numHap);
+    return 0;
+}
+
+typedef union {
+    long long u;
+    double f;
+} ieee754;
+
+typedef union {
+    int u;
+    float f;
+} ieee754s;
+#define LN10_HEX 0x40135d8e
+
+#define MAX_QUAL 254
+#define MAX_JACOBIAN_TOLERANCE 8.0
+#define JACOBIAN_LOG_TABLE_STEP 0.0001
+#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP)
+
+static int fastRoundMerlin(float d) {
+    int rose_temp;
+    if (d > ((float)0.0)) {
+        rose_temp = ((int)(d + ((float)0.5)));
+    } else {
+        rose_temp = ((int)(d - ((float)0.5)));
+    }
+    return rose_temp;
+}
+
+float approximateLog10SumLog10Merlin(float small, float big, float* jacobianLogTable) {
+    if (small > big) {
+        float t = big;
+        big = small;
+        small = t;
+    }
+    float ret;
+    if (isinf(small) || isinf(big)) {
+        ret = big;
+    } else {
+        float diff = big - small;
+        if (diff >= ((float)MAX_JACOBIAN_TOLERANCE))
+            ret = big;
+        else {
+            int ind = fastRoundMerlin((float)(diff * ((float)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding
+            ret = big + jacobianLogTable[ind];
+        }
+    }
+    return ret;
+}
+
+//#define LN10_HEX 0x40026bb1bbb55516 //double
+float setMatchToMatchProbMerlin(char insQual, char delQual, float* jacobianLogTable) {
+    unsigned char minQual = (unsigned char)delQual;
+    unsigned char maxQual = (unsigned char)insQual;
+    if (insQual <= delQual) {
+        minQual = ((unsigned char)insQual);
+        maxQual = ((unsigned char)delQual);
+    }
+    float LN10 = log(10.0);
+    float output_data;
+    float temp0 = exp((approximateLog10SumLog10Merlin(((float)(-0.1)) * ((float)((int)minQual)),
+                                                      ((float)(-0.1)) * ((float)((int)maxQual)), jacobianLogTable)) *
+                      LN10);
+    float temp1 = exp(log1p(-fmin((float)1.0, temp0)));
+    output_data = (MAX_QUAL >= maxQual) ? temp1 : 1.0 - temp0;
+    return output_data;
+}
+
+int printPH2PR() {
+    float LN10 = log(10.0);
+    FILE* fp0 = fopen("ph2pr.h", "w");
+    FILE* fp1 = fopen("ph2pr_sub1.h", "w");
+    FILE* fp2 = fopen("ph2pr_div3.h", "w");
+    fprintf(fp0, "#define PH2PR_INIT {");
+    fprintf(fp1, "#define PH2PR_SUB1_INIT {");
+    fprintf(fp2, "#define PH2PR_DIV3_INIT {");
+    for (int x = 0; x < 128; x++) {
+        ieee754s c;
+        c.f = exp(-((float)x) / 10.0 * LN10);
+        fprintf(fp0, "%10.60f", c.f);
+        fprintf(fp1, "%10.60f", 1.0 - c.f);
+        fprintf(fp2, "%10.60f", c.f / 3.0);
+        if (x != 127) {
+            fprintf(fp0, ", ");
+            fprintf(fp1, ", ");
+            fprintf(fp2, ", ");
+        }
+    }
+    fprintf(fp0, "};");
+    fprintf(fp1, "};");
+    fprintf(fp2, "};");
+    fclose(fp0);
+    fclose(fp1);
+    fclose(fp2);
+    return 0;
+}
+
+int printM2MPROB() {
+    float jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
+    float LN10 = log(10.0);
+
+    for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
+        jacobianLogTable[k] = (float)(log10(1.0 + exp(-((float)k) * JACOBIAN_LOG_TABLE_STEP * LN10)));
+    }
+    FILE* fp = fopen("m2m.h", "w");
+    fprintf(fp, "#define M2M_INIT {");
+    int table_count = 0;
+    for (unsigned char x = 0; x < 128; x++) {
+        for (unsigned char y = 0; y < 128; y++) {
+            ieee754s c;
+            c.f = setMatchToMatchProbMerlin(x, y, jacobianLogTable);
+            fprintf(fp, "%10.60f", c.f);
+
+            if (y != 127 || x != 127) fprintf(fp, ", \\\n");
+            table_count++;
+        }
+    }
+    fprintf(fp, "}");
+    fclose(fp);
+    printf("table size = %d\n", table_count);
+    return 0;
+}
+
+int load_file_to_memory(const char* filename, char** result) {
+    size_t size = 0;
+    FILE* f = fopen(filename, "rb");
+    if (f == NULL) {
+        printf("ERROR : Kernel binary %s not exist!\n", filename);
+        *result = NULL;
+        return -1; // -1 means file opening fail
+    }
+    fseek(f, 0, SEEK_END);
+    size = ftell(f);
+    fseek(f, 0, SEEK_SET);
+    *result = (char*)malloc(size + 1);
+    if ((int)size != (int)fread(*result, sizeof(char), size, f)) {
+        free(*result);
+        return -2; // -2 means file reading fail
+    }
+    fclose(f);
+    (*result)[size] = 0;
+    return size;
+}
+
+xilPairHMM::xilPairHMM() {
+    useFPGA = true;
+    ConvertChar::init();
+    // _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+}
+
+bool xilPairHMM::init_FPGA(char* bitstream) {
+    // get platform info
+    char cl_platform_vendor[1001];
+    char cl_platform_name[1001];
+    cl_platform_vendor[0] = 0;
+    cl_platform_name[0] = 0;
+    int err;
+
+    err = clGetPlatformIDs(1, &platform_id, NULL);
+    if (err != CL_SUCCESS) {
+        printf("Warning: Failed to find an OpenCL platform!Code %i\n", err);
+        return false;
+    }
+    printf("Successfully create platform\n");
+    err = clGetPlatformInfo(platform_id, CL_PLATFORM_VENDOR, 1000, (void*)cl_platform_vendor, NULL);
+    if (err != CL_SUCCESS) {
+        printf("Warning: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed! Code %i\n", err);
+        return false;
+    }
+    printf("CL_PLATFORM_VENDOR %s\n", cl_platform_vendor);
+    err = clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, 1000, (void*)cl_platform_name, NULL);
+    if (err != CL_SUCCESS) {
+        printf("Warning: clGetPlatformInfo(CL_PLATFORM_NAME) failed! Code %i\n", err);
+        return false;
+    }
+    printf("CL_PLATFORM_NAME %s\n", cl_platform_name);
+
+    err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ACCELERATOR, 1, &device_id, NULL);
+    if (err != CL_SUCCESS) {
+        printf("Warning: Failed to create a device group! Code %i\n", err);
+        return false;
+    }
+    printf("Successfully create device\n");
+
+    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
+    if (!context) {
+        printf("Warning: Failed to create a compute context! Code %i\n", err);
+        return false;
+    }
+    printf("Successfully create context \n");
+    for (int i = 0; i < KERNEL_NUM; i++) {
+        commands[i] = clCreateCommandQueue(context, device_id, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+        if (!commands[i]) {
+            printf("Warning: Failed to create a command queue commands[%d]! Code %i\n", i, err);
+            return false;
+        }
+    }
+    printf("Successfully create command queue \n");
+    unsigned char* kernelbinary;
+
+    int n_i = 0;
+    n_i = load_file_to_memory(bitstream, (char**)&kernelbinary);
+    if (n_i < 0) {
+        printf("Warning : failed to load kernel from binary: %s\n", bitstream);
+        return false;
+    }
+    printf("Successfully load kernel from binary: %s\n", bitstream);
+
+    int status;
+    size_t n = n_i;
+    program =
+        clCreateProgramWithBinary(context, 1, &device_id, &n, (const unsigned char**)&kernelbinary, &status, &err);
+    if ((!program) || (err != CL_SUCCESS)) {
+        printf("Warning: Failed to create compute program from binary! Code %d\n", err);
+        return false;
+    }
+    printf("Success to create compute program from binary! \n");
+
+    // Build the program executable
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS) {
+        size_t len;
+        char buffer[2048];
+        printf("Warning: Failed to build program executable!\n");
+        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+        printf("%s\n", buffer);
+        return false;
+    }
+    printf("Sucess to build program executable!\n");
+    std::string kname = "pairhmm";
+    std::string kernelName = kname + ":{" + kname + "_" + "1" + "}";
+    _pmm_kernel[0] = clCreateKernel(program, kernelName.c_str(), &err);
+    if ((!_pmm_kernel[0]) || err != CL_SUCCESS) {
+        printf("Warning: Failed to create compute kernel for pairhmm core0\n");
+        return false;
+    }
+    if (!this->is_seq()) {
+        kernelName = kname + ":{" + kname + "_" + "2" + "}";
+        _pmm_kernel[1] = clCreateKernel(program, kernelName.c_str(), &err);
+        if ((!_pmm_kernel[1]) || err != CL_SUCCESS) {
+            printf("Warning: Failed to create compute kernel for pairhmm core1\n");
+            return false;
+        }
+        kernelName = kname + ":{" + kname + "_" + "3" + "}";
+        _pmm_kernel[2] = clCreateKernel(program, kernelName.c_str(), &err);
+        if ((!_pmm_kernel[2]) || err != CL_SUCCESS) {
+            printf("Warning: Failed to create compute kernel for pairhmm core2\n");
+            return false;
+        }
+    }
+    return true;
+}
+
+bool xilPairHMM::init_FPGA_buffer() {
+    int err;
+    for (int i = 0; i < 3; i++) {
+        OCL_CHECK(err, _input_buffer[i] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+                                                         sizeof(InputDataPackOpt), &(host_input[i]->dataPack), &err));
+        OCL_CHECK(err, _output_buffer[i] =
+                           clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+                                          sizeof(float) * MAX_RSDATA_NUM * MAX_HAPDATA_NUM, host_output[i], &err));
+    }
+    return true;
+}
+
+bool xilPairHMM::destroy_FPGA_buffer() {
+    for (int i = 0; i < 3; i++) {
+        clReleaseMemObject(_input_buffer[i]);
+        clReleaseMemObject(_output_buffer[i]);
+    }
+    return true;
+}
+
+xilPairHMM::xilPairHMM(char* bitstream, bool seq) {
+    ConvertChar::init();
+    m_seq = seq;
+    //  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+    float m2m[128 * 128] = M2M_INIT;
+    m2m_table = (float*)malloc(128 * 128 * sizeof(float));
+    for (int i = 0; i < 128 * 128; i++) {
+        m2m_table[i] = m2m[i];
+    }
+    data_prepare_time = 0;
+    cl_write_time = 0;
+    peak_kernel_gcups = 0;
+    kernel_time = 0;
+    cl_read_time = 0;
+    recompute_time = 0;
+    printf("Start the constructor\n");
+    if ((useFPGA = init_FPGA(bitstream))) {
+        for (int i = 0; i < 3; i++) {
+            host_input[i] = (FPGAInput*)memalign(4096, sizeof(FPGAInput));
+            host_output[i] = (float*)memalign(4096, sizeof(float) * MAX_RSDATA_NUM * MAX_HAPDATA_NUM);
+        }
+        useFPGA = init_FPGA_buffer();
+        printf("init FPGA buffer succeeds\n");
+    }
+    printf("depth of input is %ld 512-bit, %ld bytes \n", sizeof(InputDataPackOpt) / 64, sizeof(InputDataPackOpt));
+}
+
+bool xilPairHMM::computePairhmmFPGASeq(char* input0,
+                                       int numRead0,
+                                       int numHap0,
+                                       float* output0,
+                                       char* input1,
+                                       int numRead1,
+                                       int numHap1,
+                                       float* output1,
+                                       char* input2,
+                                       int numRead2,
+                                       int numHap2,
+                                       float* output2) {
+    int err;
+    struct timespec time1, time2, time_diff;
+    clock_gettime(CLOCK_REALTIME, &time1);
+
+    int narg = 0;
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(cl_mem), &_input_buffer[0]);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(cl_mem), &_output_buffer[0]);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(int), &numRead0);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(int), &numHap0);
+
+    err = clEnqueueMigrateMemObjects(commands[0], DIE_NUM, _input_buffer, 0, 0, nullptr, nullptr);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to migrate write to device buffer! Code %d\n", err);
+        return false;
+    }
+    clFinish(commands[0]);
+
+    clock_gettime(CLOCK_REALTIME, &time2);
+    time_diff = diff_time(time1, time2);
+    cl_write_time += (double)(time_diff.tv_sec * 1e9 + time_diff.tv_nsec);
+
+    clock_gettime(CLOCK_REALTIME, &time1);
+    err = clEnqueueTask(commands[0], _pmm_kernel[0], 0, nullptr, nullptr);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to execute kernel0! Code %d\n", err);
+        return false;
+    }
+
+    narg = 0;
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(cl_mem), &_input_buffer[1]);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(cl_mem), &_output_buffer[1]);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(int), &numRead1);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(int), &numHap1);
+
+    err = clEnqueueTask(commands[0], _pmm_kernel[0], 0, nullptr, nullptr);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to execute kernel1! Code %d\n", err);
+        return false;
+    }
+    clFinish(commands[0]);
+
+    narg = 0;
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(cl_mem), &_input_buffer[2]);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(cl_mem), &_output_buffer[2]);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(int), &numRead2);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(int), &numHap2);
+
+    err = clEnqueueTask(commands[0], _pmm_kernel[0], 0, nullptr, nullptr);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to execute kernel2! Code %d\n", err);
+        return false;
+    }
+    clFinish(commands[0]);
+
+    clock_gettime(CLOCK_REALTIME, &time2);
+    time_diff = diff_time(time1, time2);
+    double cur_kernel_time = (double)(time_diff.tv_sec * 1e9 + time_diff.tv_nsec);
+    if (curNumCell / cur_kernel_time > peak_kernel_gcups) peak_kernel_gcups = curNumCell / cur_kernel_time;
+    kernel_time += cur_kernel_time;
+
+    clock_gettime(CLOCK_REALTIME, &time1);
+    err = clEnqueueMigrateMemObjects(commands[0], DIE_NUM, _output_buffer, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr,
+                                     nullptr);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to migrate read from device buffer! Code %d\n", err);
+        return false;
+    }
+    clFinish(commands[0]);
+
+    clock_gettime(CLOCK_REALTIME, &time2);
+    time_diff = diff_time(time1, time2);
+    cl_read_time += (double)(time_diff.tv_sec * 1e9 + time_diff.tv_nsec);
+
+    clFlush(commands[0]);
+    clFinish(commands[0]);
+    return true;
+}
+
+bool xilPairHMM::computePairhmmFPGAOverlap(char* input0,
+                                           int numRead0,
+                                           int numHap0,
+                                           float* output0,
+                                           char* input1,
+                                           int numRead1,
+                                           int numHap1,
+                                           float* output1,
+                                           char* input2,
+                                           int numRead2,
+                                           int numHap2,
+                                           float* output2) {
+    printf("Overlap solution \n");
+    int err;
+    struct timespec time1, time2, time_diff;
+    cl_event event_kernel[3];
+    cl_event event_write;
+    cl_event event_read;
+    clock_gettime(CLOCK_REALTIME, &time1);
+
+    int narg = 0;
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(cl_mem), &_input_buffer[0]);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(cl_mem), &_output_buffer[0]);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(int), &numRead0);
+    clSetKernelArg(_pmm_kernel[0], narg++, sizeof(int), &numHap0);
+
+    narg = 0;
+    clSetKernelArg(_pmm_kernel[1], narg++, sizeof(cl_mem), &_input_buffer[1]);
+    clSetKernelArg(_pmm_kernel[1], narg++, sizeof(cl_mem), &_output_buffer[1]);
+    clSetKernelArg(_pmm_kernel[1], narg++, sizeof(int), &numRead1);
+    clSetKernelArg(_pmm_kernel[1], narg++, sizeof(int), &numHap1);
+
+    narg = 0;
+    clSetKernelArg(_pmm_kernel[2], narg++, sizeof(cl_mem), &_input_buffer[2]);
+    clSetKernelArg(_pmm_kernel[2], narg++, sizeof(cl_mem), &_output_buffer[2]);
+    clSetKernelArg(_pmm_kernel[2], narg++, sizeof(int), &numRead2);
+    clSetKernelArg(_pmm_kernel[2], narg++, sizeof(int), &numHap2);
+
+    err = clEnqueueMigrateMemObjects(commands[0], DIE_NUM, _input_buffer, 0, 0, NULL, &event_write);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to migrate write to device buffer! Code %d\n", err);
+        return false;
+    }
+    clWaitForEvents(1, &event_write);
+
+    clock_gettime(CLOCK_REALTIME, &time2);
+    time_diff = diff_time(time1, time2);
+    cl_write_time += (double)(time_diff.tv_sec * 1e9 + time_diff.tv_nsec);
+
+    clock_gettime(CLOCK_REALTIME, &time1);
+    err = clEnqueueTask(commands[0], _pmm_kernel[0], 0, NULL, &event_kernel[0]);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to execute kernel0! Code %d\n", err);
+        return false;
+    }
+    err = clEnqueueTask(commands[0], _pmm_kernel[1], 0, NULL, &event_kernel[1]);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to execute kernel1! Code %d\n", err);
+        return false;
+    }
+    err = clEnqueueTask(commands[0], _pmm_kernel[2], 0, NULL, &event_kernel[2]);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to execute kernel2! Code %d\n", err);
+        return false;
+    }
+    clWaitForEvents(3, event_kernel);
+    clock_gettime(CLOCK_REALTIME, &time2);
+    time_diff = diff_time(time1, time2);
+    double cur_kernel_time = (double)(time_diff.tv_sec * 1e9 + time_diff.tv_nsec);
+    if (curNumCell / cur_kernel_time > peak_kernel_gcups) peak_kernel_gcups = curNumCell / cur_kernel_time;
+    kernel_time += cur_kernel_time;
+
+    clock_gettime(CLOCK_REALTIME, &time1);
+    err = clEnqueueMigrateMemObjects(commands[0], DIE_NUM, _output_buffer, CL_MIGRATE_MEM_OBJECT_HOST, 0, NULL,
+                                     &event_read);
+    if (err != CL_SUCCESS) {
+        printf("Error: Failed to migrate read from device buffer! Code %d\n", err);
+        return false;
+    }
+
+    clWaitForEvents(1, &event_read);
+    clock_gettime(CLOCK_REALTIME, &time2);
+    time_diff = diff_time(time1, time2);
+    cl_read_time += (double)(time_diff.tv_sec * 1e9 + time_diff.tv_nsec);
+
+    clFlush(commands[0]);
+    clFinish(commands[0]);
+    clReleaseEvent(event_write);
+    clReleaseEvent(event_kernel[0]);
+    clReleaseEvent(event_kernel[1]);
+    clReleaseEvent(event_kernel[2]);
+    return true;
+}
+
+double xilPairHMM::get_kernel_time() {
+    return kernel_time;
+}
+
+void xilPairHMM::computePairhmm(pairhmmInput* input, pairhmmOutput* output, bool& usedFPGA) {
+    if (useFPGA) computePairhmmxil(input, output, usedFPGA);
+    //  else
+    // computePairhmmAVX(input, output, false);
+}
+
+xilPairHMM::~xilPairHMM() {
+    if (useFPGA) {
+        for (int i = 0; i < 3; i++) {
+//            clReleaseMemObject(_input_buffer[i]);
+//            clReleaseMemObject(_output_buffer[i]);
+//            clReleaseKernel(_pmm_kernel[i]);
+            free(host_input[i]);
+            free(host_output[i]);
+        }
+        free(m2m_table);
+        host_raw_output.clear();
+//        clReleaseProgram(program);
+//        clReleaseCommandQueue(commands[0]);
+//        clReleaseContext(context);
+    }
+//    printf("total data prepare time is %e secs\n", data_prepare_time * 1e-9);
+//    printf("total OpenCL write buffer time is %e secs\n", cl_write_time * 1e-9);
+//    printf("total kernel time is %e secs\n", kernel_time * 1e-9);
+//    printf("total OpenCL read buffer time is %e secs\n", cl_read_time * 1e-9);
+//    printf("total recompute time is %e secs\n", recompute_time * 1e-9);
+//    printf("total time is %e secs\n",
+//           (data_prepare_time + cl_write_time + kernel_time + cl_read_time + recompute_time) * 1e-9);
+//    printf("peak kernel gcups is %2.1f\n", peak_kernel_gcups);
+}
diff --git a/genomics/L2/tests/pairhmm_8x8/src/pairHmm.hpp b/genomics/L2/tests/pairhmm_8x8/src/pairHmm.hpp
new file mode 100644
index 0000000000..dbb8b0718d
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/src/pairHmm.hpp
@@ -0,0 +1,141 @@
+/*
+ * (c) Copyright 2022 Xilinx, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#ifndef PAIRHMM_HPP_
+#define PAIRHMM_HPP_
+#include <CL/opencl.h>
+#include <CL/cl_ext.h>
+//#include "avx_impl.h"
+//#include "baseline_impl.h"
+#include "xcl2.hpp"
+
+#include "Context.h"
+#include "host_type.h"
+#define KERNEL_NUM 1
+
+#define TOTAL_PE_NUM (SLR0_PE_NUM + SLR1_PE_NUM + SLR2_PE_NUM)
+using namespace std;
+
+class xilPairHMM {
+   public:
+    xilPairHMM();
+    xilPairHMM(char* bitstream, bool seq = false);
+    void computePairhmm(pairhmmInput* input, pairhmmOutput* output, bool& usedFPGA);
+    //  int computePairhmmBaseline(pairhmmInput* input, pairhmmOutput* output, bool use_double);
+    //  int computePairhmmAVX(pairhmmInput* input, pairhmmOutput* output, bool use_double);
+    //  void computePairhmmAVXSegment(pairhmmInput* input,
+    //                                int read_base_index,
+    //                                int hap_base_index,
+    //                                int cur_numRead,
+    //                                int cur_numHap,
+    //                                vector<float>& output);
+    int computePairhmmxil(pairhmmInput* input, pairhmmOutput* output, bool& usedFPGA);
+    double get_kernel_time();
+    ~xilPairHMM();
+
+   private:
+    bool is_seq(void) { return m_seq; }
+    bool computePairhmmFPGASeq(char* input0,
+                               int numRead0,
+                               int numHap0,
+                               float* output0,
+                               char* input1,
+                               int numRead1,
+                               int numHap1,
+                               float* output1,
+                               char* input2,
+                               int numRead2,
+                               int numHap2,
+                               float* output2);
+    bool computePairhmmFPGAOverlap(char* input0,
+                                   int numRead0,
+                                   int numHap0,
+                                   float* output0,
+                                   char* input1,
+                                   int numRead1,
+                                   int numHap1,
+                                   float* output1,
+                                   char* input2,
+                                   int numRead2,
+                                   int numHap2,
+                                   float* output2);
+    bool init_FPGA(char* bitstream);
+    bool init_FPGA_buffer();
+    bool destroy_FPGA_buffer();
+    int get_max_rsdata_num();
+    void convert_read_input(readDataPack* cur_host_read, Read* input, int idx);
+    void distributeReads(pairhmmInput* input,
+                         int read_base_index,
+                         int hap_base_index,
+                         int& numRead0,
+                         int& numRead1,
+                         int& numRead2,
+                         int& totalNumRead,
+                         int& totalNumHap,
+                         short maxCols,
+                         bool& violate);
+    void sortReads(pairhmmInput* input,
+                   int read_base_index,
+                   int cur_numRead,
+                   int cur_numHap,
+                   short maxCols,
+                   int slr_pu_num[3],
+                   bool& violate);
+    void update_host_inputs(
+        pairhmmInput* input, int read_base_index, int hap_base_index, int cur_numRead, int cur_numHap, bool& violate);
+    void update_host_inputs_new(
+        pairhmmInput* input, int read_base_index, int hap_base_index, int cur_numRead, int cur_numHap, bool& violate);
+    bool useFPGA;
+    cl_platform_id platform_id;            // platform id
+    cl_device_id device_id;                // compute device id
+    cl_context context;                    // compute context
+    cl_command_queue commands[KERNEL_NUM]; // compute command queue
+    cl_program program;                    // compute program
+    cl_mem _input_buffer[3];
+    cl_mem _output_buffer[3];
+    cl_kernel _pmm_kernel[3];
+    FPGAInput* host_input[3];
+    float* host_output[3];
+    vector<float> host_raw_output;
+    Context<float> g_ctxf;
+    Context<double> g_ctxd;
+    bool m_seq;
+    float* m2m_table;
+    double data_prepare_time;
+    double cl_write_time;
+    double kernel_time;
+    double peak_kernel_gcups;
+    double curNumCell;
+    double cl_read_time;
+    double recompute_time;
+};
+double countCell(pairhmmInput* input, short maxCols);
+bool worthFPGA(pairhmmInput* input, short maxCols, double cellNum);
+int printM2MPROB();
+struct timespec diff_time(struct timespec start, struct timespec end);
+
+struct readInfo {
+    uint8_t readLen[READ_BLOCK_SIZE];
+    uint8_t big_rows;
+    uint8_t new_rows;
+    bool oneOrTwo;
+    int resultOffset;
+    int curIterNum;
+    short readID[READ_BLOCK_SIZE];
+    uint64_t infoPacked;
+};
+
+#endif
diff --git a/genomics/L2/tests/pairhmm_8x8/utils.mk b/genomics/L2/tests/pairhmm_8x8/utils.mk
new file mode 100644
index 0000000000..0ee80e90da
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/utils.mk
@@ -0,0 +1,270 @@
+#
+# Copyright 2019-2022 Xilinx, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# vitis makefile-generator v2.0.6
+#
+#+-------------------------------------------------------------------------------
+# The following parameters are assigned with default values. These parameters can
+# be overridden through the make command line
+#+-------------------------------------------------------------------------------
+
+REPORT := no
+PROFILE := no
+DEBUG := no
+
+#'estimate' for estimate report generation
+#'system' for system report generation
+ifneq ($(REPORT), no)
+VPP_LDFLAGS += --report estimate
+VPP_LDFLAGS += --report system
+endif
+
+#Generates profile summary report
+ifeq ($(PROFILE), yes)
+VPP_LDFLAGS += --profile_kernel data:all:all:all
+endif
+
+#Generates debug summary report
+ifeq ($(DEBUG), yes)
+VPP_LDFLAGS += --dk protocol:all:all:all
+endif
+
+#Check environment setup
+ifndef XILINX_VITIS
+  XILINX_VITIS = /opt/xilinx/Vitis/$(TOOL_VERSION)
+  export XILINX_VITIS
+endif
+ifndef XILINX_XRT
+  XILINX_XRT = /opt/xilinx/xrt
+  export XILINX_XRT
+endif
+
+check_device:
+	@set -eu; \
+	inallowlist=False; \
+	inblocklist=False; \
+	for dev in $(PLATFORM_ALLOWLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inallowlist=True; fi; \
+	done ;\
+	for dev in $(PLATFORM_BLOCKLIST); \
+	    do if [[ $$(echo $(PLATFORM_NAME) | grep $$dev) != "" ]]; \
+		then inblocklist=True; fi; \
+	done ;\
+	if [[ $$inallowlist == False ]]; \
+	    then echo "[Warning]: The device $(PLATFORM_NAME) not in allowlist."; \
+	fi; \
+	if [[ $$inblocklist == True ]]; \
+	    then echo "[ERROR]: The device $(PLATFORM_NAME) in blocklist."; exit 1;\
+	fi;
+
+#get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
+HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(HOST_ARCH_temp), x86)
+HOST_ARCH := x86
+else ifeq ($(HOST_ARCH_temp), cortex-a9)
+HOST_ARCH := aarch32
+else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
+HOST_ARCH := aarch64
+endif
+endif
+
+
+#get suffix of kernel by PLATFORM
+VITIS_VER = $(shell v++ --version | grep 'v++' | sed 's/^[[:space:]]*//' | sed -e 's/^[*]* v++ v//g' | cut -d " " -f1)
+DEVICE_TYPE = $(shell platforminfo -p $(PLATFORM) | grep 'FPGA Family' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
+ifeq ($(DEVICE_TYPE), versal)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+LINK_TARGET_FMT := xsa
+else
+LINK_TARGET_FMT := xclbin
+endif
+else
+LINK_TARGET_FMT := xclbin
+endif
+
+#Checks for Device Family
+ifeq ($(HOST_ARCH), aarch32)
+	DEV_FAM = 7Series
+else ifeq ($(HOST_ARCH), aarch64)
+	DEV_FAM = Ultrascale
+endif
+
+#Checks for Correct architecture
+ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
+$(error HOST_ARCH variable not set, please set correctly and rerun)
+endif
+
+check_version:
+ifneq (, $(shell which git))
+ifneq (,$(wildcard $(XFLIB_DIR)/.git))
+	@cd $(XFLIB_DIR) && git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit -n 1 && cd -
+endif
+endif
+
+#Checks for SYSROOT
+check_sysroot:
+ifneq ($(HOST_ARCH), x86)
+ifndef SYSROOT
+	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+
+#Checks for g++
+CXX := g++
+ifeq ($(HOST_ARCH), x86)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
+ifndef XILINX_VIVADO
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
+else
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
+ifeq ($(LD_LIBRARY_PATH),)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
+else
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
+endif
+$(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
+endif
+endif
+else ifeq ($(HOST_ARCH), aarch64)
+CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
+else ifeq ($(HOST_ARCH), aarch32)
+CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
+endif
+
+#Check OS and setting env for xrt c++ api
+OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
+OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
+
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+endif
+
+#Setting VPP
+VPP := v++
+
+#Cheks for aiecompiler
+AIECXX := aiecompiler
+AIESIMULATOR := aiesimulator
+X86SIMULATOR := x86simulator
+
+.PHONY: check_vivado
+check_vivado:
+ifeq (,$(wildcard $(XILINX_VIVADO)/bin/vivado))
+	@echo "Cannot locate Vivado installation. Please set XILINX_VIVADO variable." && false
+endif
+
+.PHONY: check_vpp
+check_vpp:
+ifeq (,$(wildcard $(XILINX_VITIS)/bin/v++))
+	@echo "Cannot locate Vitis installation. Please set XILINX_VITIS variable." && false
+endif
+
+.PHONY: check_xrt
+check_xrt:
+ifeq (,$(wildcard $(XILINX_XRT)/lib/libxilinxopencl.so))
+	@echo "Cannot locate XRT installation. Please set XILINX_XRT variable." && false
+endif
+
+export PATH := $(XILINX_VITIS)/bin:$(XILINX_XRT)/bin:$(PATH)
+ifeq ($(HOST_ARCH), x86)
+ifeq (,$(LD_LIBRARY_PATH))
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib
+else
+LD_LIBRARY_PATH := $(XILINX_XRT)/lib:$(LD_LIBRARY_PATH)
+endif
+endif
+
+ifneq (,$(wildcard $(PLATFORM)))
+# Use PLATFORM as a file path
+XPLATFORM := $(PLATFORM)
+else
+# Use PLATFORM as a file name pattern
+# 1. search paths specified by variable
+ifneq (,$(PLATFORM_REPO_PATHS))
+# 1.1 as exact name
+XPLATFORM := $(strip $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/$(PLATFORM)/$(PLATFORM).xpfm)))
+# 1.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(foreach p, $(subst :, ,$(PLATFORM_REPO_PATHS)), $(wildcard $(p)/*/*.xpfm))
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 1.2
+endif # 1
+# 2. search Vitis installation
+ifeq (,$(XPLATFORM))
+# 2.1 as exact name
+XPLATFORM := $(strip $(wildcard $(XILINX_VITIS)/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 2.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard $(XILINX_VITIS)/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 2.2
+endif # 2
+# 3. search default locations
+ifeq (,$(XPLATFORM))
+# 3.1 as exact name
+XPLATFORM := $(strip $(wildcard /opt/xilinx/platforms/$(PLATFORM)/$(PLATFORM).xpfm))
+# 3.2 as a pattern
+ifeq (,$(XPLATFORM))
+XPLATFORMS := $(wildcard /opt/xilinx/platforms/*/*.xpfm)
+XPLATFORM := $(strip $(foreach p, $(XPLATFORMS), $(shell echo $(p) | awk '$$1 ~ /$(PLATFORM)/')))
+endif # 3.2
+endif # 3
+endif
+
+define MSG_PLATFORM
+No platform matched pattern '$(PLATFORM)'.
+Available platforms are: $(XPLATFORMS)
+To add more platform directories, set the PLATFORM_REPO_PATHS variable or point PLATFORM variable to the full path of platform .xpfm file.
+endef
+export MSG_PLATFORM
+
+
+.PHONY: check_platform
+check_platform:
+ifeq (,$(XPLATFORM))
+	@echo "$${MSG_PLATFORM}" && false
+endif
+#Check ends
+
+#   device2xsa - create a filesystem friendly name from device name
+#   $(1) - full name of device
+PLATFORM_NAME = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM))))
+
+
+# Cleaning stuff
+RM = rm -f
+RMDIR = rm -rf
+
+MV = mv -f
+CP = cp -rf
+ECHO:= @echo
diff --git a/genomics/L2/tests/pairhmm_8x8/xrt.ini b/genomics/L2/tests/pairhmm_8x8/xrt.ini
new file mode 100644
index 0000000000..9b26086567
--- /dev/null
+++ b/genomics/L2/tests/pairhmm_8x8/xrt.ini
@@ -0,0 +1,7 @@
+[Debug]
+profile=false
+timeline_trace=false
+device_profile=false
+data_transfer_trace=fine
+[Emulation]
+enable_shared_memory=false
diff --git a/genomics/L2/tests/smem/Makefile b/genomics/L2/tests/smem/Makefile
index 99b48940ec..f8ec18b4a8 100644
--- a/genomics/L2/tests/smem/Makefile
+++ b/genomics/L2/tests/smem/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.7
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -61,7 +67,7 @@ ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := xilinx_u250_gen3x16_xdma_3_1_202020_1
+PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(wildcard $(XFLIB_DIR)/common/libs/smem/*.cpp) $(wildcard $(XFLIB_DIR)/common/libs/xcl2/*.cpp) 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D COMPUTE_UNIT=1
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/smem/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_smem
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -150,7 +157,7 @@ endif
 $(TEMP_DIR)/mem_collect_intv_core.xo: $(XFLIB_DIR)/L2/src/smem.cpp 
 	$(ECHO) "Compiling Kernel: mem_collect_intv_core"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_mem_collect_intv_core) $(VPP_FLAGS) -k mem_collect_intv_core -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_mem_collect_intv_core) $(VPP_FLAGS) -k mem_collect_intv_core -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_smem_OBJS += $(TEMP_DIR)/mem_collect_intv_core.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_smem_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -174,11 +181,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -212,21 +214,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)), vck190_base_dfx)
-ifeq ($(TARGET),$(filter $(TARGET), hw))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -272,7 +274,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/smem.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -306,13 +318,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/genomics/L2/tests/smem/utils.mk b/genomics/L2/tests/smem/utils.mk
index a38e143571..1d97b0ad1a 100644
--- a/genomics/L2/tests/smem/utils.mk
+++ b/genomics/L2/tests/smem/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/genomics/L2/tests/smithwaterman_16PE/Makefile b/genomics/L2/tests/smithwaterman_16PE/Makefile
index 5818db6c51..69cce6d1c9 100644
--- a/genomics/L2/tests/smithwaterman_16PE/Makefile
+++ b/genomics/L2/tests/smithwaterman_16PE/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.7
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/main.cpp $(XFLIB_DIR)/L2/tests/src/matcharray.cpp $(XFLIB_DIR)/L2/tests/src/smithwaterman.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D MAXPE=16 -D NUMPACKED=80
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 LDFLAGS += -fopenmp
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_sw
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -143,7 +150,7 @@ endif
 $(TEMP_DIR)/opencl_sw_maxscore.xo: $(XFLIB_DIR)/L2/src/opencl_sw_maxscore_systolic.cpp 
 	$(ECHO) "Compiling Kernel: opencl_sw_maxscore"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_opencl_sw_maxscore) $(VPP_FLAGS) -k opencl_sw_maxscore -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_opencl_sw_maxscore) $(VPP_FLAGS) -k opencl_sw_maxscore -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_krnl_smithwaterman_OBJS += $(TEMP_DIR)/opencl_sw_maxscore.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_krnl_smithwaterman_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -167,11 +174,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -205,21 +207,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)), vck190_base_dfx)
-ifeq ($(TARGET),$(filter $(TARGET), hw))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -265,7 +267,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/krnl_smithwaterman.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -299,13 +311,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/genomics/L2/tests/smithwaterman_16PE/src/main.cpp b/genomics/L2/tests/smithwaterman_16PE/src/main.cpp
index 67c959a1a8..b94c88de50 100644
--- a/genomics/L2/tests/smithwaterman_16PE/src/main.cpp
+++ b/genomics/L2/tests/smithwaterman_16PE/src/main.cpp
@@ -76,7 +76,6 @@ int main(int argc, char* argv[]) {
     }
     int doubleBuffered = parser.value_to_int("double-buffered");
     int idxSelectedDevice = parser.value_to_int("select-device");
-    int nThreads = parser.value_to_int("number-of-threads");
     int verifyMode = parser.value_to_int("verify-mode");
 
     LogInfo("Platform: %s, Device: %s", strPlatformName.c_str(), strDeviceName.c_str());
diff --git a/genomics/L2/tests/smithwaterman_16PE/utils.mk b/genomics/L2/tests/smithwaterman_16PE/utils.mk
index a38e143571..1d97b0ad1a 100644
--- a/genomics/L2/tests/smithwaterman_16PE/utils.mk
+++ b/genomics/L2/tests/smithwaterman_16PE/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/genomics/L2/tests/smithwaterman_32PE/Makefile b/genomics/L2/tests/smithwaterman_32PE/Makefile
index 229d3b69d1..1046c24954 100644
--- a/genomics/L2/tests/smithwaterman_32PE/Makefile
+++ b/genomics/L2/tests/smithwaterman_32PE/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.7
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/src/main.cpp $(XFLIB_DIR)/L2/tests/src/matcharray.cpp $(XFLIB_DIR)/L2/tests/src/smithwaterman.cpp $(XFLIB_DIR)/common/libs/logger/logger.cpp $(XFLIB_DIR)/common/libs/cmdparser/cmdlineparser.cpp $(XFLIB_DIR)/common/libs/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D MAXPE=32 -D NUMPACKED=42
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/src -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 LDFLAGS += -fopenmp
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_sw
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -143,7 +150,7 @@ endif
 $(TEMP_DIR)/opencl_sw_maxscore.xo: $(XFLIB_DIR)/L2/src/opencl_sw_maxscore_systolic.cpp 
 	$(ECHO) "Compiling Kernel: opencl_sw_maxscore"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_opencl_sw_maxscore) $(VPP_FLAGS) -k opencl_sw_maxscore -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_opencl_sw_maxscore) $(VPP_FLAGS) -k opencl_sw_maxscore -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_krnl_smithwaterman_OBJS += $(TEMP_DIR)/opencl_sw_maxscore.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_krnl_smithwaterman_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -167,11 +174,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -205,21 +207,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)), vck190_base_dfx)
-ifeq ($(TARGET),$(filter $(TARGET), hw))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -265,7 +267,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/krnl_smithwaterman.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -299,13 +311,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/genomics/L2/tests/smithwaterman_32PE/src/main.cpp b/genomics/L2/tests/smithwaterman_32PE/src/main.cpp
index 67c959a1a8..b94c88de50 100644
--- a/genomics/L2/tests/smithwaterman_32PE/src/main.cpp
+++ b/genomics/L2/tests/smithwaterman_32PE/src/main.cpp
@@ -76,7 +76,6 @@ int main(int argc, char* argv[]) {
     }
     int doubleBuffered = parser.value_to_int("double-buffered");
     int idxSelectedDevice = parser.value_to_int("select-device");
-    int nThreads = parser.value_to_int("number-of-threads");
     int verifyMode = parser.value_to_int("verify-mode");
 
     LogInfo("Platform: %s, Device: %s", strPlatformName.c_str(), strDeviceName.c_str());
diff --git a/genomics/L2/tests/smithwaterman_32PE/utils.mk b/genomics/L2/tests/smithwaterman_32PE/utils.mk
index a38e143571..1d97b0ad1a 100644
--- a/genomics/L2/tests/smithwaterman_32PE/utils.mk
+++ b/genomics/L2/tests/smithwaterman_32PE/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/genomics/L2/tests/src/genseq.hpp b/genomics/L2/tests/src/genseq.hpp
index 2345305c6a..53d524193c 100644
--- a/genomics/L2/tests/src/genseq.hpp
+++ b/genomics/L2/tests/src/genseq.hpp
@@ -312,7 +312,7 @@ int readReadRefFile(char* fname, unsigned int** pairs, unsigned int** maxv, int
     int refSz = 0;
     int sampleNum = 0;
     int numInt = 0;
-    int numSamples;
+    int numSamples = 0;
     while ((sampleNum < N) && getToken(fp, string)) {
         if (!strcmp(string, "rdsz")) {
             getToken(fp, string);
diff --git a/genomics/L2/tests/src/smithwaterman.cpp b/genomics/L2/tests/src/smithwaterman.cpp
index 799a9219e6..800f1e6eab 100644
--- a/genomics/L2/tests/src/smithwaterman.cpp
+++ b/genomics/L2/tests/src/smithwaterman.cpp
@@ -417,7 +417,7 @@ bool SmithWatermanApp::run(int idevice, int nruns) {
     int err;
     unsigned int* output;
     unsigned int* outputGolden;
-    unsigned int* input;
+    unsigned int* input = 0;
     int* iterNum;
     int hwBlockSize = NUMPACKED * m_blockSz;
     int totalSamples = m_numSamples;
diff --git a/genomics/L3/demos/pairhmm_8x2/Makefile b/genomics/L3/demos/pairhmm_8x2/Makefile
index cdd4fb98b8..ec43b64032 100644
--- a/genomics/L3/demos/pairhmm_8x2/Makefile
+++ b/genomics/L3/demos/pairhmm_8x2/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.7
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/sw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/sw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(wildcard $(XFLIB_DIR)/common/libs/pairhmm/*.cpp) $(wildcard $(XFLIB_DIR)/common/libs/xcl2/*.cpp) 
@@ -126,6 +128,11 @@ CXXFLAGS +=  -D SLR0_PE_NUM=16 -D SLR1_PE_NUM=16 -D SLR2_PE_NUM=16 -D FPGA -D DI
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/pairhmm -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_pairhmm
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -136,10 +143,13 @@ PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args
 endif
 
 ########################## Kernel compiler global settings ##########################
+VPP_FLAGS +=   --config $(CUR_DIR)/advanced.cfg
 VPP_FLAGS +=  -D SLR_PE_NUM=16 -D FPGA
 VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/pairhmm -I $(XFLIB_DIR)/L2/src -I $(XFLIB_DIR)/L2/src -I $(XFLIB_DIR)/common/libs
 
 ######################### binary container global settings ##########################
+VPP_LDFLAGS_pairhmm_temp := --kernel_frequency 250 --config $(CUR_DIR)/auto_pairhmm.cfg
+VPP_LDFLAGS_pairhmm += $(VPP_LDFLAGS_pairhmm_temp)
 
 ifeq ($(HOST_ARCH), x86)
 BINARY_CONTAINERS += $(BUILD_DIR)/pairhmm.xclbin
@@ -152,7 +162,7 @@ endif
 $(TEMP_DIR)/pairhmm.xo: $(XFLIB_DIR)/L2/src/pairhmm.cpp 
 	$(ECHO) "Compiling Kernel: pairhmm"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_pairhmm) $(VPP_FLAGS) -k pairhmm -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_pairhmm) $(VPP_FLAGS) -k pairhmm -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_pairhmm_OBJS += $(TEMP_DIR)/pairhmm.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_pairhmm_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -176,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -214,12 +219,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -263,7 +279,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/pairhmm.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -297,13 +323,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/genomics/L3/demos/pairhmm_8x2/auto_pairhmm.cfg b/genomics/L3/demos/pairhmm_8x2/auto_pairhmm.cfg
new file mode 100644
index 0000000000..1d244a32ba
--- /dev/null
+++ b/genomics/L3/demos/pairhmm_8x2/auto_pairhmm.cfg
@@ -0,0 +1,2 @@
+[connectivity]
+nk=pairhmm:3
diff --git a/genomics/L3/demos/pairhmm_8x2/description.json b/genomics/L3/demos/pairhmm_8x2/description.json
index d6306a52ba..5b6fa3572e 100644
--- a/genomics/L3/demos/pairhmm_8x2/description.json
+++ b/genomics/L3/demos/pairhmm_8x2/description.json
@@ -43,6 +43,9 @@
         }
     }, 
     "v++": {
+        "build_datafiles": [
+            "PROJECT/advanced.cfg"
+        ],
         "compiler": {
             "includepaths": [
                  "LIB_DIR/L2/include",
@@ -53,12 +56,16 @@
             "symbols": [
                 "SLR_PE_NUM=16",
                 "FPGA"
+            ],
+            "clflags": [
+                "--config PROJECT/advanced.cfg"
             ]
         }
     }, 
     "containers": [
         {
-            "name": "pairhmm",  
+            "name": "pairhmm", 
+            "ldclflags": "--kernel_frequency 250 --config PROJECT/auto_pairhmm.cfg",  
             "accelerators": [
                 {
                     "name": "pairhmm", 
diff --git a/genomics/L3/demos/pairhmm_8x2/utils.mk b/genomics/L3/demos/pairhmm_8x2/utils.mk
index a38e143571..1d97b0ad1a 100644
--- a/genomics/L3/demos/pairhmm_8x2/utils.mk
+++ b/genomics/L3/demos/pairhmm_8x2/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/genomics/L3/demos/smem/Makefile b/genomics/L3/demos/smem/Makefile
index 7dcbfb103a..f8935aaab2 100644
--- a/genomics/L3/demos/smem/Makefile
+++ b/genomics/L3/demos/smem/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.7
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -61,7 +67,7 @@ ifeq ($(PLATFORM),)
 PLATFORM := $(DEVICE)
 endif
 ifeq ($(PLATFORM),)
-PLATFORM := xilinx_u250_gen3x16_xdma_3_1_202020_1
+PLATFORM := xilinx_u250_gen3x16_xdma_4_1_202210_1
 endif
 
 # #################### Checking if PLATFORM in whitelist ############################
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/src/host.cpp $(wildcard $(XFLIB_DIR)/common/libs/smem/*.cpp) $(wildcard $(XFLIB_DIR)/common/libs/xcl2/*.cpp) 
@@ -125,6 +127,11 @@ CXXFLAGS +=  -D COMPUTE_UNIT=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/common/libs/smem/ -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/xcl2 -I $(XFLIB_DIR)/common/libs/cmdparser -I $(XFLIB_DIR)/common/libs/logger
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := xil_smem
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -152,7 +159,7 @@ endif
 $(TEMP_DIR)/mem_collect_intv_core.xo: $(XFLIB_DIR)/L2/src/smem.cpp 
 	$(ECHO) "Compiling Kernel: mem_collect_intv_core"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_mem_collect_intv_core) $(VPP_FLAGS) -k mem_collect_intv_core -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_mem_collect_intv_core) $(VPP_FLAGS) -k mem_collect_intv_core -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_smem_OBJS += $(TEMP_DIR)/mem_collect_intv_core.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_smem_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -176,11 +183,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -214,21 +216,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)), vck190_base_dfx)
-ifeq ($(TARGET),$(filter $(TARGET), hw))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,7 +276,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/smem.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -308,13 +320,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/genomics/L3/demos/smem/utils.mk b/genomics/L3/demos/smem/utils.mk
index a38e143571..1d97b0ad1a 100644
--- a/genomics/L3/demos/smem/utils.mk
+++ b/genomics/L3/demos/smem/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/genomics/common/libs/pairhmm/Context.h b/genomics/common/libs/pairhmm/Context.h
index a7a51521ff..0360697c28 100644
--- a/genomics/common/libs/pairhmm/Context.h
+++ b/genomics/common/libs/pairhmm/Context.h
@@ -79,8 +79,6 @@ class ContextBase {
             small = t;
         }
 
-        if (std::isinf(small) == -1 || std::isinf(big) == -1) return big;
-
         NUMBER diff = big - small;
         if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE)) return big;
 
diff --git a/genomics/common/libs/pairhmm/avx-pairhmm-template.h b/genomics/common/libs/pairhmm/avx-pairhmm-template.h
index 0cd3f860d0..54f47ecfb0 100644
--- a/genomics/common/libs/pairhmm/avx-pairhmm-template.h
+++ b/genomics/common/libs/pairhmm/avx-pairhmm-template.h
@@ -32,7 +32,7 @@ void CONCAT(CONCAT(precompute_masks_, SIMD_ENGINE),
         int mOffset = (col - 1) % maskBitCnt;
         MASK_TYPE bitMask = ((MASK_TYPE)0x1) << (maskBitCnt - 1 - mOffset);
 
-        char hapChar = ConvertChar::get(tc.hap[col - 1]);
+        unsigned char hapChar = ConvertChar::get(tc.hap[col - 1]);
 
         if (hapChar == AMBIG_CHAR) {
             for (int ci = 0; ci < NUM_DISTINCT_CHARS; ++ci) maskArr[mIndex][ci] |= bitMask;
@@ -301,20 +301,16 @@ NUMBER CONCAT(CONCAT(compute_full_prob_, SIMD_ENGINE), PRECISION)(testcase* tc)
     /* Probality vectors */
     SIMD_TYPE pGAPM, pMM, pMX, pXX, pMY, pYY;
 
-    struct timeval start, end;
     NUMBER result_avx2;
     Context<NUMBER> ctx;
     UNION_TYPE rs, rsN;
-    HAP_TYPE hap;
-    SIMD_TYPE distmSel, distmChosen;
+    SIMD_TYPE distmChosen;
     SIMD_TYPE distm, _1_distm;
 
-    int r, c;
     NUMBER zero = ctx._(0.0);
-    UNION_TYPE packed1;
+    UNION_TYPE packed1 __attribute__((unused));
     packed1.d = VEC_SET1_VAL(1.0);
     SIMD_TYPE N_packed256 = VEC_POPCVT_CHAR('N');
-    NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
     int remainingRows = (ROWS - 1) % AVX_LENGTH;
     int stripe_cnt = ((ROWS - 1) / AVX_LENGTH) + (remainingRows != 0);
 
diff --git a/genomics/common/libs/pairhmm/baseline_impl.cpp b/genomics/common/libs/pairhmm/baseline_impl.cpp
index 82b1e7e50f..9882705223 100644
--- a/genomics/common/libs/pairhmm/baseline_impl.cpp
+++ b/genomics/common/libs/pairhmm/baseline_impl.cpp
@@ -22,9 +22,9 @@ using namespace std;
 
 template <class NUMBER>
 NUMBER compute_full_prob_baseline(testcase* tc, NUMBER* before_last_log) {
-    int r, c;
-    int ROWS = tc->rslen + 1;
-    int COLS = tc->haplen + 1;
+    unsigned int r, c;
+    unsigned int ROWS = tc->rslen + 1;
+    unsigned int COLS = tc->haplen + 1;
 
     Context<NUMBER> ctx;
 
diff --git a/genomics/common/libs/pairhmm/gensynthdata.hpp b/genomics/common/libs/pairhmm/gensynthdata.hpp
index 2483ecc368..31a0ef9282 100644
--- a/genomics/common/libs/pairhmm/gensynthdata.hpp
+++ b/genomics/common/libs/pairhmm/gensynthdata.hpp
@@ -72,8 +72,8 @@ int GenInputs(pairhmmInput* in, int size) {
     in->haps.clear();
     in->reads.resize(16 * (size + 1));
     in->haps.resize((size + 1));
-    printf("%s - readsize %d \n", __FUNCTION__, in->reads.size());
-    printf("%s - readsize %d \n", __FUNCTION__, in->haps.size());
+    printf("%s - readsize %lu \n", __FUNCTION__, in->reads.size());
+    printf("%s - readsize %lu \n", __FUNCTION__, in->haps.size());
     for (int i = 0; (size_t)i < in->reads.size(); i++) {
         Read& curRead = in->reads[i];
         for (int j = 0; j < GenLen(MAX_READ_LEN); j++) {
diff --git a/genomics/common/libs/smithwaterman/genseq.hpp b/genomics/common/libs/smithwaterman/genseq.hpp
index 2345305c6a..53d524193c 100644
--- a/genomics/common/libs/smithwaterman/genseq.hpp
+++ b/genomics/common/libs/smithwaterman/genseq.hpp
@@ -312,7 +312,7 @@ int readReadRefFile(char* fname, unsigned int** pairs, unsigned int** maxv, int
     int refSz = 0;
     int sampleNum = 0;
     int numInt = 0;
-    int numSamples;
+    int numSamples = 0;
     while ((sampleNum < N) && getToken(fp, string)) {
         if (!strcmp(string, "rdsz")) {
             getToken(fp, string);
diff --git a/genomics/common/libs/smithwaterman/smithwaterman.cpp b/genomics/common/libs/smithwaterman/smithwaterman.cpp
index 799a9219e6..800f1e6eab 100644
--- a/genomics/common/libs/smithwaterman/smithwaterman.cpp
+++ b/genomics/common/libs/smithwaterman/smithwaterman.cpp
@@ -417,7 +417,7 @@ bool SmithWatermanApp::run(int idevice, int nruns) {
     int err;
     unsigned int* output;
     unsigned int* outputGolden;
-    unsigned int* input;
+    unsigned int* input = 0;
     int* iterNum;
     int hwBlockSize = NUMPACKED * m_blockSz;
     int totalSamples = m_numSamples;
diff --git a/genomics/docs/conf.py b/genomics/docs/conf.py
index bd0d218b22..9d57d64814 100644
--- a/genomics/docs/conf.py
+++ b/genomics/docs/conf.py
@@ -41,9 +41,9 @@
 author = 'Xilinx'
 
 # The short X.Y version
-version = '2021.2'
+version = '2022.1'
 # The full version, including alpha/beta/rc tags
-release = '2021.2 release'
+release = '2022.1 release'
 html_last_updated_fmt = '%B %d, %Y'
 
 rst_epilog = """
diff --git a/graph/Jenkinsfile b/graph/Jenkinsfile
index fbe4e9bc0e..70aa33680d 100644
--- a/graph/Jenkinsfile
+++ b/graph/Jenkinsfile
@@ -1,4 +1,4 @@
 @Library('pipeline-library')_
-VitisLibPipeline (branch: 'next', libname: 'xf_graph', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
-                  upstream_dependencies: 'xf_utils_hw,next,../utils; xf_database,next,../database; xf_fintech,next,../quantitative_finance',
-		  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest', 'os_types' : 'centos7:rhel7')
+VitisLibPipeline (branch: 'main', libname: 'xf_graph', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
+                  upstream_dependencies: 'xf_utils_hw,main,../utils; xf_database,main,../database; xf_fintech,main,../quantitative_finance',
+		  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released', 'os_types' : 'centos7:rhel7')
diff --git a/graph/L2/benchmarks/connected_component/Makefile b/graph/L2/benchmarks/connected_component/Makefile
index c22cb7fe42..6cf6b0e6b6 100644
--- a/graph/L2/benchmarks/connected_component/Makefile
+++ b/graph/L2/benchmarks/connected_component/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/benchmarks/connected_
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/wcc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/connected_component/utils.mk b/graph/L2/benchmarks/connected_component/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/connected_component/utils.mk
+++ b/graph/L2/benchmarks/connected_component/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/label_propagation/Makefile b/graph/L2/benchmarks/label_propagation/Makefile
index e0f5a4dc83..89812ab3ca 100644
--- a/graph/L2/benchmarks/label_propagation/Makefile
+++ b/graph/L2/benchmarks/label_propagation/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/label_propagation/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/benchmarks/label_prop
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/LPKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/label_propagation/utils.mk b/graph/L2/benchmarks/label_propagation/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/label_propagation/utils.mk
+++ b/graph/L2/benchmarks/label_propagation/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/louvain_fast/Makefile b/graph/L2/benchmarks/louvain_fast/Makefile
index c55b7fab18..112a65500c 100644
--- a/graph/L2/benchmarks/louvain_fast/Makefile
+++ b/graph/L2/benchmarks/louvain_fast/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u55c/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/driverForGraphClustering.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/RngStream.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/utilityFunctions.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/parseInputFiles.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/writeGraphDimacsFormat.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/buildNextPhase.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/coloringDistanceOne.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/utilityClusteringFunctions.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/parallelLouvainMethod.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/parallelLouvainWithColoring.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/louvainMultiPhaseRun.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/parseInputParameters.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/vertexFollowing.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/partition/test.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/partition/ParLV.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/partition/ctrlLV.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/partition/louvainPhase.cpp $(XFLIB_DIR)/L2/benchmarks/louvain_fast/host/partition/partitionLouvain.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS += -O3 -DPRAGMA -Ofast -fopenmp -fPIC -DMULTITHREAD -B/usr/lib/x86_64-l
 LDFLAGS += -lgomp
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -198,11 +205,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -236,21 +238,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -297,12 +299,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_louvain.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/louvain_fast/utils.mk b/graph/L2/benchmarks/louvain_fast/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/louvain_fast/utils.mk
+++ b/graph/L2/benchmarks/louvain_fast/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/maximal_independent_set/Makefile b/graph/L2/benchmarks/maximal_independent_set/Makefile
index 346825da17..f155b80532 100644
--- a/graph/L2/benchmarks/maximal_independent_set/Makefile
+++ b/graph/L2/benchmarks/maximal_independent_set/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/maximal_independent_set/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/benchmarks/maximal_independent_set/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/mis_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/maximal_independent_set/utils.mk b/graph/L2/benchmarks/maximal_independent_set/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/maximal_independent_set/utils.mk
+++ b/graph/L2/benchmarks/maximal_independent_set/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/pagerank/Makefile b/graph/L2/benchmarks/pagerank/Makefile
index 11a24225fc..10d2c4b942 100644
--- a/graph/L2/benchmarks/pagerank/Makefile
+++ b/graph/L2/benchmarks/pagerank/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/pagerank/host/test_pagerank.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/benchmarks/pagerank/h
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -195,11 +202,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -233,21 +235,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -294,12 +296,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pagerank_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/pagerank/utils.mk b/graph/L2/benchmarks/pagerank/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/pagerank/utils.mk
+++ b/graph/L2/benchmarks/pagerank/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/pagerank_cache/Makefile b/graph/L2/benchmarks/pagerank_cache/Makefile
index efc7253868..f1aacd30a9 100644
--- a/graph/L2/benchmarks/pagerank_cache/Makefile
+++ b/graph/L2/benchmarks/pagerank_cache/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/pagerank_cache/host/test_pagerank.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/benchmarks/pagerank_c
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -195,11 +202,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -233,21 +235,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -294,12 +296,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pagerank_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/pagerank_cache/utils.mk b/graph/L2/benchmarks/pagerank_cache/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/pagerank_cache/utils.mk
+++ b/graph/L2/benchmarks/pagerank_cache/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/pagerank_multi_channels/Makefile b/graph/L2/benchmarks/pagerank_multi_channels/Makefile
index 80a6ef2dea..ee1390117b 100644
--- a/graph/L2/benchmarks/pagerank_multi_channels/Makefile
+++ b/graph/L2/benchmarks/pagerank_multi_channels/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/pagerank_multi_channels/host/test_pagerank.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/benchmarks/pagerank_multi_channels/host -I $(XFLIB_DIR)/L2/benchmarks/pagerank_multi_channels/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pagerank_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/pagerank_multi_channels/utils.mk b/graph/L2/benchmarks/pagerank_multi_channels/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/pagerank_multi_channels/utils.mk
+++ b/graph/L2/benchmarks/pagerank_multi_channels/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/pagerank_personalized/Makefile b/graph/L2/benchmarks/pagerank_personalized/Makefile
index ad14cbd13e..30344c749c 100644
--- a/graph/L2/benchmarks/pagerank_personalized/Makefile
+++ b/graph/L2/benchmarks/pagerank_personalized/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/pagerank_personalized/host/test_pagerank.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/benchmarks/pagerank_personalized/host -I $(XFLIB_DIR)/L2/benchmarks/pagerank_personalized/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pagerank_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/pagerank_personalized/utils.mk b/graph/L2/benchmarks/pagerank_personalized/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/pagerank_personalized/utils.mk
+++ b/graph/L2/benchmarks/pagerank_personalized/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/renumber/Makefile b/graph/L2/benchmarks/renumber/Makefile
index 8d219f0cad..3922dd2e4f 100644
--- a/graph/L2/benchmarks/renumber/Makefile
+++ b/graph/L2/benchmarks/renumber/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/test_renumber.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/renumber/host -I $(XFLIB_DIR)/L2/tests/renumber/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 -std=c++11
 LDFLAGS += -lgomp
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,11 +187,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -218,21 +220,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -279,12 +281,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_renumber.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/renumber/utils.mk b/graph/L2/benchmarks/renumber/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/renumber/utils.mk
+++ b/graph/L2/benchmarks/renumber/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/shortest_path_float_pred/Makefile b/graph/L2/benchmarks/shortest_path_float_pred/Makefile
index 7547c39568..8edd24f665 100644
--- a/graph/L2/benchmarks/shortest_path_float_pred/Makefile
+++ b/graph/L2/benchmarks/shortest_path_float_pred/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/shortest_path_float_pred/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/benchmarks/shortest_p
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -187,11 +194,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -225,21 +227,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -286,12 +288,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/shortestPath_top.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/shortest_path_float_pred/utils.mk b/graph/L2/benchmarks/shortest_path_float_pred/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/shortest_path_float_pred/utils.mk
+++ b/graph/L2/benchmarks/shortest_path_float_pred/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/strongly_connected_component/Makefile b/graph/L2/benchmarks/strongly_connected_component/Makefile
index f74e0111b5..628c0a0976 100644
--- a/graph/L2/benchmarks/strongly_connected_component/Makefile
+++ b/graph/L2/benchmarks/strongly_connected_component/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/strongly_connected_component/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/benchmarks/strongly_c
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/strongly_connected_component/utils.mk b/graph/L2/benchmarks/strongly_connected_component/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/strongly_connected_component/utils.mk
+++ b/graph/L2/benchmarks/strongly_connected_component/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/triangle_count/Makefile b/graph/L2/benchmarks/triangle_count/Makefile
index 9e718630d3..3d7bd28d50 100644
--- a/graph/L2/benchmarks/triangle_count/Makefile
+++ b/graph/L2/benchmarks/triangle_count/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/triangle_count/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/benchmarks/triangle_c
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TC_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/triangle_count/utils.mk b/graph/L2/benchmarks/triangle_count/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/triangle_count/utils.mk
+++ b/graph/L2/benchmarks/triangle_count/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/benchmarks/twoHop/Makefile b/graph/L2/benchmarks/twoHop/Makefile
index 9429d3cfe5..4990f56d14 100644
--- a/graph/L2/benchmarks/twoHop/Makefile
+++ b/graph/L2/benchmarks/twoHop/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/benchmarks/twoHop/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/benchmarks/twoHop/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/twoHop_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/benchmarks/twoHop/utils.mk b/graph/L2/benchmarks/twoHop/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/benchmarks/twoHop/utils.mk
+++ b/graph/L2/benchmarks/twoHop/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/bfs/Makefile b/graph/L2/tests/bfs/Makefile
index fbfd755e28..77ff8dfaa2 100644
--- a/graph/L2/tests/bfs/Makefile
+++ b/graph/L2/tests/bfs/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/bfs/host -I $(X
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/bfs_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/bfs/utils.mk b/graph/L2/tests/bfs/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/bfs/utils.mk
+++ b/graph/L2/tests/bfs/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/calcu_degree/Makefile b/graph/L2/tests/calcu_degree/Makefile
index 7a0e076b26..f778aff889 100644
--- a/graph/L2/tests/calcu_degree/Makefile
+++ b/graph/L2/tests/calcu_degree/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/host/test_calcuDegree.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/calcu_degree/ho
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -209,11 +216,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -247,21 +249,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -308,12 +310,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_calcuDegree_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/calcu_degree/utils.mk b/graph/L2/tests/calcu_degree/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/calcu_degree/utils.mk
+++ b/graph/L2/tests/calcu_degree/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/connected_component/Makefile b/graph/L2/tests/connected_component/Makefile
index a557f15a3f..5ecce6297d 100644
--- a/graph/L2/tests/connected_component/Makefile
+++ b/graph/L2/tests/connected_component/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/connected_compo
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/wcc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/connected_component/utils.mk b/graph/L2/tests/connected_component/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/connected_component/utils.mk
+++ b/graph/L2/tests/connected_component/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/convert_csc_csr/Makefile b/graph/L2/tests/convert_csc_csr/Makefile
index f83705c141..7768954ad7 100644
--- a/graph/L2/tests/convert_csc_csr/Makefile
+++ b/graph/L2/tests/convert_csc_csr/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/convert_csc_csr/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/convert_csc_csr
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/convertCsrCsc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/convert_csc_csr/utils.mk b/graph/L2/tests/convert_csc_csr/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/convert_csc_csr/utils.mk
+++ b/graph/L2/tests/convert_csc_csr/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/dense_similarity/Makefile b/graph/L2/tests/dense_similarity/Makefile
index 8943e2cb9e..35f2d12601 100644
--- a/graph/L2/tests/dense_similarity/Makefile
+++ b/graph/L2/tests/dense_similarity/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/dense_similarity/host/test_similarity.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/L2/tests/dense_similarity/host -I $(XFLIB_DIR)/L2/tests/dense_similarity/kernel -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/dense_similarity/utils.mk b/graph/L2/tests/dense_similarity/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/dense_similarity/utils.mk
+++ b/graph/L2/tests/dense_similarity/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/dense_similarity_coeffs/Makefile b/graph/L2/tests/dense_similarity_coeffs/Makefile
index 6287486c50..d0c50b93e1 100644
--- a/graph/L2/tests/dense_similarity_coeffs/Makefile
+++ b/graph/L2/tests/dense_similarity_coeffs/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/dense_similarity_coeffs/host/test_similarity.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/L2/tests/dense_similarity_coeffs/host -I $(XFLIB_DIR)/L2/tests/dense_similarity_coeffs/kernel -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/dense_similarity_coeffs/utils.mk b/graph/L2/tests/dense_similarity_coeffs/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/dense_similarity_coeffs/utils.mk
+++ b/graph/L2/tests/dense_similarity_coeffs/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/dense_similarity_int/Makefile b/graph/L2/tests/dense_similarity_int/Makefile
index f59afcc326..bb1648717c 100644
--- a/graph/L2/tests/dense_similarity_int/Makefile
+++ b/graph/L2/tests/dense_similarity_int/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/dense_similarity_int/host/test_similarity.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/L2/tests/dense_similarity_int/host -I $(XFLIB_DIR)/L2/tests/dense_similarity_int/kernel -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/dense_similarity_int/utils.mk b/graph/L2/tests/dense_similarity_int/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/dense_similarity_int/utils.mk
+++ b/graph/L2/tests/dense_similarity_int/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/dense_similarity_multi_kernel/Makefile b/graph/L2/tests/dense_similarity_multi_kernel/Makefile
index af66fe1afc..3c138454b9 100644
--- a/graph/L2/tests/dense_similarity_multi_kernel/Makefile
+++ b/graph/L2/tests/dense_similarity_multi_kernel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/dense_similarity_multi_kernel/host/test_similarity.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/L2/tests/dense_similarity_multi_kernel/host -I $(XFLIB_DIR)/L2/tests/dense_similarity_multi_kernel/kernel -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/dense_similarity_multi_kernel/utils.mk b/graph/L2/tests/dense_similarity_multi_kernel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/dense_similarity_multi_kernel/utils.mk
+++ b/graph/L2/tests/dense_similarity_multi_kernel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/dense_similarity_multi_source/Makefile b/graph/L2/tests/dense_similarity_multi_source/Makefile
index 05e511d1c5..06844eb42c 100644
--- a/graph/L2/tests/dense_similarity_multi_source/Makefile
+++ b/graph/L2/tests/dense_similarity_multi_source/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -186,11 +192,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +225,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,12 +286,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/dense_similarity_multi_source/utils.mk b/graph/L2/tests/dense_similarity_multi_source/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/dense_similarity_multi_source/utils.mk
+++ b/graph/L2/tests/dense_similarity_multi_source/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/diameter/Makefile b/graph/L2/tests/diameter/Makefile
index b8f849528b..69d75910da 100644
--- a/graph/L2/tests/diameter/Makefile
+++ b/graph/L2/tests/diameter/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/diameter/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/diameter/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/diameter_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/diameter/utils.mk b/graph/L2/tests/diameter/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/diameter/utils.mk
+++ b/graph/L2/tests/diameter/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/general_similarity/Makefile b/graph/L2/tests/general_similarity/Makefile
index f038cb3489..f20ef79c70 100644
--- a/graph/L2/tests/general_similarity/Makefile
+++ b/graph/L2/tests/general_similarity/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/general_similarity/host/test_similarity.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/L2/tests/general_similarity/host -I $(XFLIB_DIR)/L2/tests/general_similarity/kernel -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/generalSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/general_similarity/utils.mk b/graph/L2/tests/general_similarity/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/general_similarity/utils.mk
+++ b/graph/L2/tests/general_similarity/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/label_propagation/Makefile b/graph/L2/tests/label_propagation/Makefile
index 072eb1261b..5db9236405 100644
--- a/graph/L2/tests/label_propagation/Makefile
+++ b/graph/L2/tests/label_propagation/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/label_propagation/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/label_propagati
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/LPKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/label_propagation/utils.mk b/graph/L2/tests/label_propagation/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/label_propagation/utils.mk
+++ b/graph/L2/tests/label_propagation/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/louvain_fast/Makefile b/graph/L2/tests/louvain_fast/Makefile
index ea36e2b433..d4175b05a4 100644
--- a/graph/L2/tests/louvain_fast/Makefile
+++ b/graph/L2/tests/louvain_fast/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u55c/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/louvain_fast/host/driverForGraphClustering.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/RngStream.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/utilityFunctions.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/parseInputFiles.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/writeGraphDimacsFormat.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/buildNextPhase.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/coloringDistanceOne.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/utilityClusteringFunctions.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/parallelLouvainMethod.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/parallelLouvainWithColoring.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/louvainMultiPhaseRun.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/parseInputParameters.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/vertexFollowing.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/partition/test.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/partition/ParLV.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/partition/ctrlLV.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/partition/louvainPhase.cpp $(XFLIB_DIR)/L2/tests/louvain_fast/host/partition/partitionLouvain.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -126,6 +128,11 @@ CXXFLAGS += -O3 -DPRAGMA -Ofast -fopenmp -fPIC -DMULTITHREAD -B/usr/lib/x86_64-l
 LDFLAGS += -lgomp
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -198,11 +205,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -236,21 +238,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -297,12 +299,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_louvain.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/louvain_fast/utils.mk b/graph/L2/tests/louvain_fast/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/louvain_fast/utils.mk
+++ b/graph/L2/tests/louvain_fast/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/maximal_independent_set/Makefile b/graph/L2/tests/maximal_independent_set/Makefile
index 7e80192744..4e6eeda4ff 100644
--- a/graph/L2/tests/maximal_independent_set/Makefile
+++ b/graph/L2/tests/maximal_independent_set/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/maximal_independent_set/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/maximal_independent_set/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/mis_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/maximal_independent_set/utils.mk b/graph/L2/tests/maximal_independent_set/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/maximal_independent_set/utils.mk
+++ b/graph/L2/tests/maximal_independent_set/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/mst/Makefile b/graph/L2/tests/mst/Makefile
index a5a248fa7c..e2d9b505ab 100644
--- a/graph/L2/tests/mst/Makefile
+++ b/graph/L2/tests/mst/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/mst/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/mst/host -I $(X
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -187,11 +194,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -225,21 +227,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -286,12 +288,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/mst_top.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/mst/utils.mk b/graph/L2/tests/mst/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/mst/utils.mk
+++ b/graph/L2/tests/mst/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/pagerank/Makefile b/graph/L2/tests/pagerank/Makefile
index db0fc5fe1a..97385b32e9 100644
--- a/graph/L2/tests/pagerank/Makefile
+++ b/graph/L2/tests/pagerank/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/pagerank/host/test_pagerank.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/pagerank/host -
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -207,11 +214,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -245,21 +247,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -306,12 +308,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pagerank_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/pagerank/utils.mk b/graph/L2/tests/pagerank/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/pagerank/utils.mk
+++ b/graph/L2/tests/pagerank/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/pagerank_cache/Makefile b/graph/L2/tests/pagerank_cache/Makefile
index 6a3b92d2c4..2e6e641e1b 100644
--- a/graph/L2/tests/pagerank_cache/Makefile
+++ b/graph/L2/tests/pagerank_cache/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/pagerank_cache/host/test_pagerank.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/pagerank_cache/
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -195,11 +202,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -233,21 +235,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -294,12 +296,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pagerank_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/pagerank_cache/utils.mk b/graph/L2/tests/pagerank_cache/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/pagerank_cache/utils.mk
+++ b/graph/L2/tests/pagerank_cache/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/pagerank_multi_channels/Makefile b/graph/L2/tests/pagerank_multi_channels/Makefile
index ced88268e3..46dc00bc41 100644
--- a/graph/L2/tests/pagerank_multi_channels/Makefile
+++ b/graph/L2/tests/pagerank_multi_channels/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/pagerank_multi_channels/host/test_pagerank.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/pagerank_multi_channels/host -I $(XFLIB_DIR)/L2/tests/pagerank_multi_channels/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pagerank_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/pagerank_multi_channels/utils.mk b/graph/L2/tests/pagerank_multi_channels/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/pagerank_multi_channels/utils.mk
+++ b/graph/L2/tests/pagerank_multi_channels/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/pagerank_personalized/Makefile b/graph/L2/tests/pagerank_personalized/Makefile
index 81f97c6042..a0eb6946d9 100644
--- a/graph/L2/tests/pagerank_personalized/Makefile
+++ b/graph/L2/tests/pagerank_personalized/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/pagerank_personalized/host/test_pagerank.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/pagerank_personalized/host -I $(XFLIB_DIR)/L2/tests/pagerank_personalized/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pagerank_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/pagerank_personalized/utils.mk b/graph/L2/tests/pagerank_personalized/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/pagerank_personalized/utils.mk
+++ b/graph/L2/tests/pagerank_personalized/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/renumber/Makefile b/graph/L2/tests/renumber/Makefile
index 8d219f0cad..3922dd2e4f 100644
--- a/graph/L2/tests/renumber/Makefile
+++ b/graph/L2/tests/renumber/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/test_renumber.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/renumber/host -I $(XFLIB_DIR)/L2/tests/renumber/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 -std=c++11
 LDFLAGS += -lgomp
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,11 +187,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -218,21 +220,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -279,12 +281,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_renumber.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/renumber/utils.mk b/graph/L2/tests/renumber/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/renumber/utils.mk
+++ b/graph/L2/tests/renumber/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/shortest_path_double_pred/Makefile b/graph/L2/tests/shortest_path_double_pred/Makefile
index 6189e8a1e7..3735eb367b 100644
--- a/graph/L2/tests/shortest_path_double_pred/Makefile
+++ b/graph/L2/tests/shortest_path_double_pred/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/shortest_path_double_pred/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/shortest_path_d
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -187,11 +194,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -225,21 +227,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -286,12 +288,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/shortestPath_top.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/shortest_path_double_pred/utils.mk b/graph/L2/tests/shortest_path_double_pred/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/shortest_path_double_pred/utils.mk
+++ b/graph/L2/tests/shortest_path_double_pred/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/shortest_path_float_nopred/Makefile b/graph/L2/tests/shortest_path_float_nopred/Makefile
index 574405efc6..25b5850e6f 100644
--- a/graph/L2/tests/shortest_path_float_nopred/Makefile
+++ b/graph/L2/tests/shortest_path_float_nopred/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/shortest_path_float_nopred/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/shortest_path_f
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -187,11 +194,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -225,21 +227,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -286,12 +288,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/shortestPath_top.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/shortest_path_float_nopred/utils.mk b/graph/L2/tests/shortest_path_float_nopred/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/shortest_path_float_nopred/utils.mk
+++ b/graph/L2/tests/shortest_path_float_nopred/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/shortest_path_float_pred/Makefile b/graph/L2/tests/shortest_path_float_pred/Makefile
index 32f4fba8b2..ba372ba4e6 100644
--- a/graph/L2/tests/shortest_path_float_pred/Makefile
+++ b/graph/L2/tests/shortest_path_float_pred/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/shortest_path_float_pred/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/shortest_path_f
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -187,11 +194,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -225,21 +227,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -286,12 +288,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/shortestPath_top.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/shortest_path_float_pred/utils.mk b/graph/L2/tests/shortest_path_float_pred/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/shortest_path_float_pred/utils.mk
+++ b/graph/L2/tests/shortest_path_float_pred/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/shortest_path_unweighted_pred/Makefile b/graph/L2/tests/shortest_path_unweighted_pred/Makefile
index 2676ad72d0..e89ade43d3 100644
--- a/graph/L2/tests/shortest_path_unweighted_pred/Makefile
+++ b/graph/L2/tests/shortest_path_unweighted_pred/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/shortest_path_unweighted_pred/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/shortest_path_u
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -187,11 +194,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -225,21 +227,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -286,12 +288,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/shortestPath_top.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/shortest_path_unweighted_pred/utils.mk b/graph/L2/tests/shortest_path_unweighted_pred/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/shortest_path_unweighted_pred/utils.mk
+++ b/graph/L2/tests/shortest_path_unweighted_pred/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/sparse_similarity/Makefile b/graph/L2/tests/sparse_similarity/Makefile
index 9f295eff2c..3fbd8725bc 100644
--- a/graph/L2/tests/sparse_similarity/Makefile
+++ b/graph/L2/tests/sparse_similarity/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/sparse_similarity/host/test_similarity.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/hw -I $(XFLIB_DIR)/L2/tests/sparse_similarity/host -I $(XFLIB_DIR)/L2/tests/sparse_similarity/kernel -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/sparseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/sparse_similarity/utils.mk b/graph/L2/tests/sparse_similarity/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/sparse_similarity/utils.mk
+++ b/graph/L2/tests/sparse_similarity/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/strongly_connected_component/Makefile b/graph/L2/tests/strongly_connected_component/Makefile
index 673c550462..32f9fd1eb8 100644
--- a/graph/L2/tests/strongly_connected_component/Makefile
+++ b/graph/L2/tests/strongly_connected_component/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/strongly_connected_component/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/strongly_connec
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/strongly_connected_component/utils.mk b/graph/L2/tests/strongly_connected_component/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/strongly_connected_component/utils.mk
+++ b/graph/L2/tests/strongly_connected_component/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/triangle_count/Makefile b/graph/L2/tests/triangle_count/Makefile
index be85ae68c6..8b82ad7f32 100644
--- a/graph/L2/tests/triangle_count/Makefile
+++ b/graph/L2/tests/triangle_count/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/triangle_count/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/triangle_count/
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -220,11 +227,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -258,21 +260,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -319,12 +321,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TC_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/triangle_count/utils.mk b/graph/L2/tests/triangle_count/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/triangle_count/utils.mk
+++ b/graph/L2/tests/triangle_count/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L2/tests/twoHop/Makefile b/graph/L2/tests/twoHop/Makefile
index 3739a35ad6..6431891815 100644
--- a/graph/L2/tests/twoHop/Makefile
+++ b/graph/L2/tests/twoHop/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/twoHop/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/twoHop/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,12 +280,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/twoHop_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L2/tests/twoHop/utils.mk b/graph/L2/tests/twoHop/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/graph/L2/tests/twoHop/utils.mk
+++ b/graph/L2/tests/twoHop/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/lib/Makefile b/graph/L3/lib/Makefile
index 33d8e3de7c..3e053abd81 100755
--- a/graph/L3/lib/Makefile
+++ b/graph/L3/lib/Makefile
@@ -79,7 +79,7 @@ CXXFLAGS += -I$(XFLIB_DIR)/L3/include
 CXXFLAGS += -I$(XFLIB_DIR)/L3/include/graphPartition/grappolo/include
 CXXFLAGS += -I$(XFLIB_DIR)/../utils/L1/include
 
-CXXFLAGS += -I$(XILINX_XRT)/include -I$(XILINX_VIVADO)/include -I$(XILINX_XRM)/include -std=c++11 -Wall -Wno-unknown-pragmas -Wno-unused-label
+CXXFLAGS += -I$(XILINX_XRT)/include -I$(XILINX_VIVADO)/include -I$(XILINX_XRM)/include -std=c++14 -Wall -Wno-unknown-pragmas -Wno-unused-label
 LDFLAGS += -L$(XILINX_XRT)/lib -lOpenCL -pthread -lrt -Wno-unused-label -Wno-narrowing -DVERBOSE -L$(XILINX_XRM)/lib -lxrm -fopenmp
 CXXFLAGS += -fmessage-length=0 -DNDEBUG -D_GLIBCXX_USE_CXX11_ABI=0
 CXXFLAGS +=-I$(CUR_DIR)/src/ 
diff --git a/graph/L3/tests/BFS/Makefile b/graph/L3/tests/BFS/Makefile
index 7714bcfc15..99f75f6547 100755
--- a/graph/L3/tests/BFS/Makefile
+++ b/graph/L3/tests/BFS/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/BFS/test_bfs.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm -lOpenCL -pthread -lrt -Wno-unused-label -Wno-narrowing -DVERBOSE -fopenmp
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/bfs_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/BFS/utils.mk b/graph/L3/tests/BFS/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/BFS/utils.mk
+++ b/graph/L3/tests/BFS/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/MSSP/Makefile b/graph/L3/tests/MSSP/Makefile
index 1667520670..5787a09696 100755
--- a/graph/L3/tests/MSSP/Makefile
+++ b/graph/L3/tests/MSSP/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/MSSP/test_mssp.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/shortestPath_top.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/MSSP/utils.mk b/graph/L3/tests/MSSP/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/MSSP/utils.mk
+++ b/graph/L3/tests/MSSP/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/SCC/Makefile b/graph/L3/tests/SCC/Makefile
index 947595fd93..5af59bed5c 100755
--- a/graph/L3/tests/SCC/Makefile
+++ b/graph/L3/tests/SCC/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/SCC/test_scc.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/SCC/utils.mk b/graph/L3/tests/SCC/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/SCC/utils.mk
+++ b/graph/L3/tests/SCC/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/SSSP/Makefile b/graph/L3/tests/SSSP/Makefile
index ac04af0c0a..7eebf67f5c 100755
--- a/graph/L3/tests/SSSP/Makefile
+++ b/graph/L3/tests/SSSP/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/SSSP/test_sssp.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/shortestPath_top.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/SSSP/utils.mk b/graph/L3/tests/SSSP/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/SSSP/utils.mk
+++ b/graph/L3/tests/SSSP/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/WCC/Makefile b/graph/L3/tests/WCC/Makefile
index 678a530eda..d2194afc9a 100755
--- a/graph/L3/tests/WCC/Makefile
+++ b/graph/L3/tests/WCC/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/WCC/test_wcc.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/wcc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/WCC/utils.mk b/graph/L3/tests/WCC/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/WCC/utils.mk
+++ b/graph/L3/tests/WCC/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/convertCsrCsc/Makefile b/graph/L3/tests/convertCsrCsc/Makefile
index 27362153d9..401ca5933e 100755
--- a/graph/L3/tests/convertCsrCsc/Makefile
+++ b/graph/L3/tests/convertCsrCsc/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/convertCsrCsc/test_convert.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/convertCsrCsc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/convertCsrCsc/utils.mk b/graph/L3/tests/convertCsrCsc/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/convertCsrCsc/utils.mk
+++ b/graph/L3/tests/convertCsrCsc/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/cosineSimilarityAPDense/Makefile b/graph/L3/tests/cosineSimilarityAPDense/Makefile
index e27d1ecba9..5791f616a4 100755
--- a/graph/L3/tests/cosineSimilarityAPDense/Makefile
+++ b/graph/L3/tests/cosineSimilarityAPDense/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/cosineSimilarityAPDense/test_cosineSimilarityAPDense.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/cosineSimilarityAPDense/utils.mk b/graph/L3/tests/cosineSimilarityAPDense/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/cosineSimilarityAPDense/utils.mk
+++ b/graph/L3/tests/cosineSimilarityAPDense/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/cosineSimilarityAPSparse/Makefile b/graph/L3/tests/cosineSimilarityAPSparse/Makefile
index d94aa26b16..56a6d83354 100755
--- a/graph/L3/tests/cosineSimilarityAPSparse/Makefile
+++ b/graph/L3/tests/cosineSimilarityAPSparse/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/cosineSimilarityAPSparse/test_cosineSimilarityAPSparse.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/sparseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/cosineSimilarityAPSparse/utils.mk b/graph/L3/tests/cosineSimilarityAPSparse/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/cosineSimilarityAPSparse/utils.mk
+++ b/graph/L3/tests/cosineSimilarityAPSparse/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/cosineSimilaritySSDense/Makefile b/graph/L3/tests/cosineSimilaritySSDense/Makefile
index 1c6b07df5b..901b580dab 100755
--- a/graph/L3/tests/cosineSimilaritySSDense/Makefile
+++ b/graph/L3/tests/cosineSimilaritySSDense/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/cosineSimilaritySSDense/test_cosineSimilaritySSDense.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/cosineSimilaritySSDense/utils.mk b/graph/L3/tests/cosineSimilaritySSDense/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/cosineSimilaritySSDense/utils.mk
+++ b/graph/L3/tests/cosineSimilaritySSDense/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/cosineSimilaritySSDenseInt/Makefile b/graph/L3/tests/cosineSimilaritySSDenseInt/Makefile
index 10dfbf298c..c209891599 100755
--- a/graph/L3/tests/cosineSimilaritySSDenseInt/Makefile
+++ b/graph/L3/tests/cosineSimilaritySSDenseInt/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/cosineSimilaritySSDenseInt/test_cosineSimilaritySSDense.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/cosineSimilaritySSDenseInt/utils.mk b/graph/L3/tests/cosineSimilaritySSDenseInt/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/cosineSimilaritySSDenseInt/utils.mk
+++ b/graph/L3/tests/cosineSimilaritySSDenseInt/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/cosineSimilaritySSDenseIntBench/Makefile b/graph/L3/tests/cosineSimilaritySSDenseIntBench/Makefile
index b6f220c22c..11b63a231a 100755
--- a/graph/L3/tests/cosineSimilaritySSDenseIntBench/Makefile
+++ b/graph/L3/tests/cosineSimilaritySSDenseIntBench/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/cosineSimilaritySSDenseIntBench/test_cosineSimilaritySSDense.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/cosineSimilaritySSDenseIntBench/utils.mk b/graph/L3/tests/cosineSimilaritySSDenseIntBench/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/cosineSimilaritySSDenseIntBench/utils.mk
+++ b/graph/L3/tests/cosineSimilaritySSDenseIntBench/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/cosineSimilaritySSSparse/Makefile b/graph/L3/tests/cosineSimilaritySSSparse/Makefile
index c3eadb372a..70eb077229 100755
--- a/graph/L3/tests/cosineSimilaritySSSparse/Makefile
+++ b/graph/L3/tests/cosineSimilaritySSSparse/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/cosineSimilaritySSSparse/test_cosineSimilaritySSSparse.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/sparseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/cosineSimilaritySSSparse/utils.mk b/graph/L3/tests/cosineSimilaritySSSparse/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/cosineSimilaritySSSparse/utils.mk
+++ b/graph/L3/tests/cosineSimilaritySSSparse/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/jaccardSimilarityAPDense/Makefile b/graph/L3/tests/jaccardSimilarityAPDense/Makefile
index 4553426ca0..cc906f4f51 100755
--- a/graph/L3/tests/jaccardSimilarityAPDense/Makefile
+++ b/graph/L3/tests/jaccardSimilarityAPDense/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/jaccardSimilarityAPDense/test_jaccardSimilarityAPDense.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/jaccardSimilarityAPDense/utils.mk b/graph/L3/tests/jaccardSimilarityAPDense/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/jaccardSimilarityAPDense/utils.mk
+++ b/graph/L3/tests/jaccardSimilarityAPDense/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/jaccardSimilarityAPSparse/Makefile b/graph/L3/tests/jaccardSimilarityAPSparse/Makefile
index fbe83066de..acf29446c3 100755
--- a/graph/L3/tests/jaccardSimilarityAPSparse/Makefile
+++ b/graph/L3/tests/jaccardSimilarityAPSparse/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/jaccardSimilarityAPSparse/test_jaccardSimilarityAPSparse.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/sparseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/jaccardSimilarityAPSparse/utils.mk b/graph/L3/tests/jaccardSimilarityAPSparse/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/jaccardSimilarityAPSparse/utils.mk
+++ b/graph/L3/tests/jaccardSimilarityAPSparse/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/jaccardSimilaritySSDense/Makefile b/graph/L3/tests/jaccardSimilaritySSDense/Makefile
index 470aa3b20a..1b50513a33 100755
--- a/graph/L3/tests/jaccardSimilaritySSDense/Makefile
+++ b/graph/L3/tests/jaccardSimilaritySSDense/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/jaccardSimilaritySSDense/test_jaccardSimilaritySSDense.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/jaccardSimilaritySSDense/utils.mk b/graph/L3/tests/jaccardSimilaritySSDense/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/jaccardSimilaritySSDense/utils.mk
+++ b/graph/L3/tests/jaccardSimilaritySSDense/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/jaccardSimilaritySSSparse/Makefile b/graph/L3/tests/jaccardSimilaritySSSparse/Makefile
index 6fbd3f11a8..ae7359fe77 100755
--- a/graph/L3/tests/jaccardSimilaritySSSparse/Makefile
+++ b/graph/L3/tests/jaccardSimilaritySSSparse/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/jaccardSimilaritySSSparse/test_jaccardSimilaritySSSparse.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/sparseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/jaccardSimilaritySSSparse/utils.mk b/graph/L3/tests/jaccardSimilaritySSSparse/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/jaccardSimilaritySSSparse/utils.mk
+++ b/graph/L3/tests/jaccardSimilaritySSSparse/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/knnSimilarityAPDense/Makefile b/graph/L3/tests/knnSimilarityAPDense/Makefile
index d7eb962b93..30094ee703 100755
--- a/graph/L3/tests/knnSimilarityAPDense/Makefile
+++ b/graph/L3/tests/knnSimilarityAPDense/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/knnSimilarityAPDense/test_knnSimilarityAPDense.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/knnSimilarityAPDense/utils.mk b/graph/L3/tests/knnSimilarityAPDense/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/knnSimilarityAPDense/utils.mk
+++ b/graph/L3/tests/knnSimilarityAPDense/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/knnSimilarityAPSparse/Makefile b/graph/L3/tests/knnSimilarityAPSparse/Makefile
index 891242a589..eef3786b72 100755
--- a/graph/L3/tests/knnSimilarityAPSparse/Makefile
+++ b/graph/L3/tests/knnSimilarityAPSparse/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/knnSimilarityAPSparse/test_knnSimilarityAPSparse.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/iclude -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/sparseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/knnSimilarityAPSparse/utils.mk b/graph/L3/tests/knnSimilarityAPSparse/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/knnSimilarityAPSparse/utils.mk
+++ b/graph/L3/tests/knnSimilarityAPSparse/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/knnSimilaritySSDense/Makefile b/graph/L3/tests/knnSimilaritySSDense/Makefile
index 183100465b..8ab2861342 100755
--- a/graph/L3/tests/knnSimilaritySSDense/Makefile
+++ b/graph/L3/tests/knnSimilaritySSDense/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/knnSimilaritySSDense/test_knnSimilaritySSDense.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/denseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/knnSimilaritySSDense/utils.mk b/graph/L3/tests/knnSimilaritySSDense/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/knnSimilaritySSDense/utils.mk
+++ b/graph/L3/tests/knnSimilaritySSDense/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/knnSimilaritySSSparse/Makefile b/graph/L3/tests/knnSimilaritySSSparse/Makefile
index b2148ebba9..7b7d8b5cd3 100755
--- a/graph/L3/tests/knnSimilaritySSSparse/Makefile
+++ b/graph/L3/tests/knnSimilaritySSSparse/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/knnSimilaritySSSparse/test_knnSimilaritySSSparse.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/sparseSimilarityKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/knnSimilaritySSSparse/utils.mk b/graph/L3/tests/knnSimilaritySSSparse/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/knnSimilaritySSSparse/utils.mk
+++ b/graph/L3/tests/knnSimilaritySSSparse/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/labelPropagation/Makefile b/graph/L3/tests/labelPropagation/Makefile
index db1fcba7a0..88276a8f0e 100755
--- a/graph/L3/tests/labelPropagation/Makefile
+++ b/graph/L3/tests/labelPropagation/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/labelPropagation/test_labelpropagation.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/LPKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/labelPropagation/utils.mk b/graph/L3/tests/labelPropagation/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/labelPropagation/utils.mk
+++ b/graph/L3/tests/labelPropagation/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/louvainModularity/Makefile b/graph/L3/tests/louvainModularity/Makefile
index ebabbd0f8f..a9cfd71566 100644
--- a/graph/L3/tests/louvainModularity/Makefile
+++ b/graph/L3/tests/louvainModularity/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u55c/'))
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/louvainModularity/test_louvainRun.cpp 
@@ -130,6 +132,11 @@ LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm -lgomp -B/usr/lib/x86_64-linux-gnu
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_louvain.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/louvainModularity/utils.mk b/graph/L3/tests/louvainModularity/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/louvainModularity/utils.mk
+++ b/graph/L3/tests/louvainModularity/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/louvainPartition/Makefile b/graph/L3/tests/louvainPartition/Makefile
index 070acac9a8..43774f257b 100644
--- a/graph/L3/tests/louvainPartition/Makefile
+++ b/graph/L3/tests/louvainPartition/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u55c/'))
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/louvainPartition/test_partition.cpp 
@@ -130,6 +132,11 @@ LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm -lgomp 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,12 +295,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_louvain.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/louvainPartition/utils.mk b/graph/L3/tests/louvainPartition/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/louvainPartition/utils.mk
+++ b/graph/L3/tests/louvainPartition/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/pageRank/Makefile b/graph/L3/tests/pageRank/Makefile
index 9ec903d88b..59e83abcc4 100755
--- a/graph/L3/tests/pageRank/Makefile
+++ b/graph/L3/tests/pageRank/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/pageRank/test_pagerank.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pagerank_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/pageRank/utils.mk b/graph/L3/tests/pageRank/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/pageRank/utils.mk
+++ b/graph/L3/tests/pageRank/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/triangleCount/Makefile b/graph/L3/tests/triangleCount/Makefile
index 5507697cc8..e2233171c4 100755
--- a/graph/L3/tests/triangleCount/Makefile
+++ b/graph/L3/tests/triangleCount/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/triangleCount/test_trianglecount.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,12 +284,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TC_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/triangleCount/utils.mk b/graph/L3/tests/triangleCount/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/triangleCount/utils.mk
+++ b/graph/L3/tests/triangleCount/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/graph/L3/tests/twoHop/Makefile b/graph/L3/tests/twoHop/Makefile
index be61cec549..72c6f32493 100755
--- a/graph/L3/tests/twoHop/Makefile
+++ b/graph/L3/tests/twoHop/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L3/tests/twoHop/test_twoHop.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/../database/L1/include/hw -I $(XFLIB_DIR)/../quantitative_finance/L1/include -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
@@ -119,6 +121,11 @@ LDFLAGS +=  -L $(BUILD_DIR)
 LDFLAGS +=  -l graphL3
 LDFLAGS += -L$(XILINX_XRM)/lib -lxrm 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -185,11 +192,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -223,21 +225,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -284,12 +286,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/twoHop_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/graph/L3/tests/twoHop/utils.mk b/graph/L3/tests/twoHop/utils.mk
index 0ee80e90da..1d97b0ad1a 100755
--- a/graph/L3/tests/twoHop/utils.mk
+++ b/graph/L3/tests/twoHop/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/Jenkinsfile b/hpc/Jenkinsfile
index a8e67a6bcb..4a1252f0d2 100644
--- a/hpc/Jenkinsfile
+++ b/hpc/Jenkinsfile
@@ -1,4 +1,4 @@
 @Library('pipeline-library')_
 
-VitisLibPipeline (branch: 'next', libname: 'xf_hpc', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
-        upstream_dependencies: 'xf_blas,next,../blas;xf_sparse,next,../sparse', devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest', mail_on:'daily:PR')
+VitisLibPipeline (branch: 'main', libname: 'xf_hpc', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
+        upstream_dependencies: 'xf_blas,main,../blas;xf_sparse,main,../sparse', devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released', mail_on:'daily:PR')
diff --git a/hpc/L2/benchmarks/cg_gemv_jacobi/Makefile b/hpc/L2/benchmarks/cg_gemv_jacobi/Makefile
index e76ee12e71..fb50021d65 100644
--- a/hpc/L2/benchmarks/cg_gemv_jacobi/Makefile
+++ b/hpc/L2/benchmarks/cg_gemv_jacobi/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_gemv_jacobi.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=3e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=16 -D CG_parEntries=4 -D CG_instrBytes=64
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -251,11 +258,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -289,21 +291,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -350,14 +352,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -391,12 +395,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/benchmarks/cg_gemv_jacobi/utils.mk b/hpc/L2/benchmarks/cg_gemv_jacobi/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/benchmarks/cg_gemv_jacobi/utils.mk
+++ b/hpc/L2/benchmarks/cg_gemv_jacobi/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/benchmarks/cg_spmv_jacobi/Makefile b/hpc/L2/benchmarks/cg_spmv_jacobi/Makefile
index 279b87f176..e3c493cb21 100644
--- a/hpc/L2/benchmarks/cg_spmv_jacobi/Makefile
+++ b/hpc/L2/benchmarks/cg_spmv_jacobi/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_spmv_jacobi.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=4e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=16 -D CG_parEntries=4 -D CG_instrBytes=64 -D SPARSE_dataType=double -D SPARSE_indexType=uint16_t -D SPARSE_parEntries=4 -D SPARSE_hbmChannels=16 -D SPARSE_hbmMemBits=256
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2 -I $(XFLIB_DIR)/../sparse/L2/include/sw/fp64 -I $(XFLIB_DIR)/..
 CXXFLAGS += -g -O0
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -156,7 +163,7 @@ VPP_FLAGS_selMultXkernel += --hls.clock 333000000:selMultXkernel
 VPP_FLAGS_rowAccKernel += --hls.clock 333000000:rowAccKernel
 VPP_FLAGS_assembleYkernel += --hls.clock 333000000:assembleYkernel
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_cgSolver += --clock.defaultFreqHz 333000000
+VPP_LDFLAGS_cgSolver += --clock.defaultFreqHz 300000000
 else
 VPP_LDFLAGS_cgSolver += --kernel_frequency 300
 endif
@@ -294,11 +301,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -332,21 +334,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -393,14 +395,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -434,12 +438,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/benchmarks/cg_spmv_jacobi/utils.mk b/hpc/L2/benchmarks/cg_spmv_jacobi/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/benchmarks/cg_spmv_jacobi/utils.mk
+++ b/hpc/L2/benchmarks/cg_spmv_jacobi/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/Makefile b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/Makefile
index eee062da76..1288fb9064 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/Makefile
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_gemv.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=3e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=1 -D CG_parEntries=4 -D CG_instrBytes=64
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -237,11 +244,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -275,21 +277,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -336,14 +338,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -377,12 +381,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/tests/Makefile b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/tests/Makefile
index 328b1b8dc7..28c583ce83 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/tests/Makefile
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/tests/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_gemv.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=3e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=1 -D CG_parEntries=4 -D CG_instrBytes=64
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -258,11 +265,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -296,21 +298,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -357,14 +359,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -398,12 +402,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/tests/utils.mk b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/tests/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/tests/utils.mk
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/tests/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/utils.mk b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/utils.mk
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/Makefile b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/Makefile
index 083b167c89..5b6cbfdbe4 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/Makefile
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_gemv_jacobi.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=3e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=16 -D CG_parEntries=4 -D CG_instrBytes=64
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -253,11 +260,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -291,21 +293,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -352,14 +354,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -393,12 +397,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/tests/Makefile b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/tests/Makefile
index 61dcbfaad7..f6e3d69536 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/tests/Makefile
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/tests/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_gemv_jacobi.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=3e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=16 -D CG_parEntries=4 -D CG_instrBytes=64
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -253,11 +260,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -291,21 +293,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -352,14 +354,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -393,12 +397,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/tests/utils.mk b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/tests/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/tests/utils.mk
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/tests/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/utils.mk b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/utils.mk
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_16ch/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/Makefile b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/Makefile
index 5aa46e425a..7c3540059f 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/Makefile
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_gemv_jacobi.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=3e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=1 -D CG_parEntries=4 -D CG_instrBytes=64
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -258,11 +265,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -296,21 +298,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -357,14 +359,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -398,12 +402,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/tests/Makefile b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/tests/Makefile
index 9910943b8a..134067ee53 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/tests/Makefile
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/tests/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_gemv_jacobi.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=3e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=1 -D CG_parEntries=4 -D CG_instrBytes=64
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -258,11 +265,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -296,21 +298,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -357,14 +359,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -398,12 +402,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/tests/utils.mk b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/tests/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/tests/utils.mk
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/tests/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/utils.mk b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/utils.mk
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_1ch/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/Makefile b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/Makefile
index 91d721b666..c9bb371a5d 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/Makefile
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_gemv_jacobi.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=3e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=8 -D CG_parEntries=4 -D CG_instrBytes=64
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -258,11 +265,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -296,21 +298,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -357,14 +359,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -398,12 +402,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/tests/Makefile b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/tests/Makefile
index 05b8acd2be..0fa428209c 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/tests/Makefile
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/tests/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_gemv_jacobi.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=3e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=8 -D CG_parEntries=4 -D CG_instrBytes=64
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -258,11 +265,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -296,21 +298,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -357,14 +359,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -398,12 +402,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/tests/utils.mk b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/tests/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/tests/utils.mk
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/tests/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/utils.mk b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/utils.mk
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_gemv_jacobi_8ch/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_spmv_jacobi_16ch/Makefile b/hpc/L2/tests/cgSolver/krnl_cgSolver_spmv_jacobi_16ch/Makefile
index b8683b3de5..bd5798e1b8 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_spmv_jacobi_16ch/Makefile
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_spmv_jacobi_16ch/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/cgSolver/test_cgSolver_spmv_jacobi.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D HW_CLK=4e-9 -D CG_numTasks=1 -D CG_dataType=double -D CG_numChannels=16 -D CG_parEntries=4 -D CG_instrBytes=64 -D SPARSE_dataType=double -D SPARSE_indexType=uint16_t -D SPARSE_parEntries=4 -D SPARSE_hbmChannels=16 -D SPARSE_hbmMemBits=256
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/L2/include/sw/cgSolver -I $(XFLIB_DIR)/L2/include/common/cgSolver -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2 -I $(XFLIB_DIR)/../sparse/L2/include/sw/fp64 -I $(XFLIB_DIR)/..
 CXXFLAGS += -g -O0
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -157,7 +164,7 @@ VPP_FLAGS_selMultXkernel += --hls.clock 333000000:selMultXkernel
 VPP_FLAGS_rowAccKernel += --hls.clock 333000000:rowAccKernel
 VPP_FLAGS_assembleYkernel += --hls.clock 333000000:assembleYkernel
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_cgSolver += --clock.defaultFreqHz 333000000
+VPP_LDFLAGS_cgSolver += --clock.defaultFreqHz 300000000
 else
 VPP_LDFLAGS_cgSolver += --kernel_frequency 300
 endif
@@ -295,11 +302,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -333,21 +335,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -394,14 +396,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cgSolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -435,12 +439,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/cgSolver/krnl_cgSolver_spmv_jacobi_16ch/utils.mk b/hpc/L2/tests/cgSolver/krnl_cgSolver_spmv_jacobi_16ch/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/cgSolver/krnl_cgSolver_spmv_jacobi_16ch/utils.mk
+++ b/hpc/L2/tests/cgSolver/krnl_cgSolver_spmv_jacobi_16ch/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/mlp/fcn_1CU/Makefile b/hpc/L2/tests/mlp/fcn_1CU/Makefile
index 002b6ad61f..cf4ef6d722 100644
--- a/hpc/L2/tests/mlp/fcn_1CU/Makefile
+++ b/hpc/L2/tests/mlp/fcn_1CU/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/src/sw/mlp/api_fcn_multiInstr.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/sw/mlp -I $(XFLIB_DIR)/../blas/L1/includ
 CXXFLAGS += -O3
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -146,7 +153,7 @@ endif
 ######################### binary container global settings ##########################
 VPP_FLAGS_fcnKernel += --hls.clock 300000000:fcnKernel
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_fcn += --clock.defaultFreqHz 300000000
+VPP_LDFLAGS_fcn += --clock.defaultFreqHz 200000000
 else
 VPP_LDFLAGS_fcn += --kernel_frequency 200
 endif
@@ -188,11 +195,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -226,21 +228,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -287,14 +289,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/fcn.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -328,12 +332,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/mlp/fcn_1CU/utils.mk b/hpc/L2/tests/mlp/fcn_1CU/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/mlp/fcn_1CU/utils.mk
+++ b/hpc/L2/tests/mlp/fcn_1CU/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm2d/rtm/Makefile b/hpc/L2/tests/rtm2d/rtm/Makefile
index b3577da657..62500642cf 100644
--- a/hpc/L2/tests/rtm2d/rtm/Makefile
+++ b/hpc/L2/tests/rtm2d/rtm/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/rtm2d/rtm/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numFSMs=10 -D RTM_numBSMs=5 -D RTM_maxDim=1280 -D RTM_MaxB=40 -D RTM_NXB=40 -D RTM_NZB=40 -D RTM_order=8 -D RTM_parEntries=8 -D RTM_verify=1 -D RTM_nPE=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/sw/rtm2d -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,14 +295,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtm.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -334,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm2d/rtm/tests/dataset_h128_w128_t10_s1/Makefile b/hpc/L2/tests/rtm2d/rtm/tests/dataset_h128_w128_t10_s1/Makefile
index b14fa1265c..fa9c84dba3 100644
--- a/hpc/L2/tests/rtm2d/rtm/tests/dataset_h128_w128_t10_s1/Makefile
+++ b/hpc/L2/tests/rtm2d/rtm/tests/dataset_h128_w128_t10_s1/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/rtm2d/rtm/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numFSMs=2 -D RTM_numBSMs=2 -D RTM_maxDim=256 -D RTM_MaxB=40 -D RTM_NXB=40 -D RTM_NZB=40 -D RTM_order=8 -D RTM_parEntries=8 -D RTM_verify=1 -D RTM_nPE=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/sw/rtm2d/ -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,11 +201,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -232,21 +234,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -293,14 +295,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtm.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -334,12 +338,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm2d/rtm/tests/dataset_h128_w128_t10_s1/utils.mk b/hpc/L2/tests/rtm2d/rtm/tests/dataset_h128_w128_t10_s1/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm2d/rtm/tests/dataset_h128_w128_t10_s1/utils.mk
+++ b/hpc/L2/tests/rtm2d/rtm/tests/dataset_h128_w128_t10_s1/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm2d/rtm/utils.mk b/hpc/L2/tests/rtm2d/rtm/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm2d/rtm/utils.mk
+++ b/hpc/L2/tests/rtm2d/rtm/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm2d/rtmbackward/Makefile b/hpc/L2/tests/rtm2d/rtmbackward/Makefile
index 5ba7260ae2..77c6a8794c 100644
--- a/hpc/L2/tests/rtm2d/rtmbackward/Makefile
+++ b/hpc/L2/tests/rtm2d/rtmbackward/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/rtm2d/rtmbackward/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numBSMs=5 -D RTM_maxDim=1280 -D RTM_MaxB=40 -D RTM_NXB=40 -D RTM_NZB=40 -D RTM_order=8 -D RTM_parEntries=8 -D RTM_verify=1 -D RTM_nPE=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include/hw -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/sw/rtm2d -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,14 +276,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtmbackward.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -315,12 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm2d/rtmbackward/tests/dataset_h128_w128_t10/Makefile b/hpc/L2/tests/rtm2d/rtmbackward/tests/dataset_h128_w128_t10/Makefile
index 8fba70a660..80ddf6e481 100644
--- a/hpc/L2/tests/rtm2d/rtmbackward/tests/dataset_h128_w128_t10/Makefile
+++ b/hpc/L2/tests/rtm2d/rtmbackward/tests/dataset_h128_w128_t10/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/rtm2d/rtmbackward/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numBSMs=2 -D RTM_maxDim=256 -D RTM_MaxB=40 -D RTM_NXB=40 -D RTM_NZB=40 -D RTM_order=8 -D RTM_parEntries=8 -D RTM_verify=1 -D RTM_nPE=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/sw/rtm2d/ -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,14 +276,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtmbackward.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -315,12 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm2d/rtmbackward/tests/dataset_h128_w128_t10/utils.mk b/hpc/L2/tests/rtm2d/rtmbackward/tests/dataset_h128_w128_t10/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm2d/rtmbackward/tests/dataset_h128_w128_t10/utils.mk
+++ b/hpc/L2/tests/rtm2d/rtmbackward/tests/dataset_h128_w128_t10/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm2d/rtmbackward/utils.mk b/hpc/L2/tests/rtm2d/rtmbackward/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm2d/rtmbackward/utils.mk
+++ b/hpc/L2/tests/rtm2d/rtmbackward/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm2d/rtmforward/Makefile b/hpc/L2/tests/rtm2d/rtmforward/Makefile
index 843f8e888a..04bbb3d776 100644
--- a/hpc/L2/tests/rtm2d/rtmforward/Makefile
+++ b/hpc/L2/tests/rtm2d/rtmforward/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/rtm2d/rtmforward/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numFSMs=10 -D RTM_maxDim=1280 -D RTM_MaxB=40 -D RTM_NXB=40 -D RTM_NZB=40 -D RTM_order=8 -D RTM_parEntries=8 -D RTM_verify=1 -D RTM_nPE=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/sw/rtm2d -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,14 +276,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtmforward.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -315,12 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm2d/rtmforward/tests/dataset_h128_w128_t10/Makefile b/hpc/L2/tests/rtm2d/rtmforward/tests/dataset_h128_w128_t10/Makefile
index 8a76ed9370..c90cce006c 100644
--- a/hpc/L2/tests/rtm2d/rtmforward/tests/dataset_h128_w128_t10/Makefile
+++ b/hpc/L2/tests/rtm2d/rtmforward/tests/dataset_h128_w128_t10/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/rtm2d/rtmforward/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numFSMs=2 -D RTM_maxDim=256 -D RTM_MaxB=40 -D RTM_NXB=40 -D RTM_NZB=40 -D RTM_order=8 -D RTM_parEntries=8 -D RTM_verify=1 -D RTM_nPE=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/sw -I $(XFLIB_DIR)/L2/include/sw/rtm2d -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2
 CXXFLAGS += -O3
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,14 +276,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtmforward.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -315,12 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm2d/rtmforward/tests/dataset_h128_w128_t10/utils.mk b/hpc/L2/tests/rtm2d/rtmforward/tests/dataset_h128_w128_t10/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm2d/rtmforward/tests/dataset_h128_w128_t10/utils.mk
+++ b/hpc/L2/tests/rtm2d/rtmforward/tests/dataset_h128_w128_t10/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm2d/rtmforward/utils.mk b/hpc/L2/tests/rtm2d/rtmforward/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm2d/rtmforward/utils.mk
+++ b/hpc/L2/tests/rtm2d/rtmforward/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/Makefile b/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/Makefile
index 6df1fb2b05..275ec61e7a 100755
--- a/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/Makefile
+++ b/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numFSMs=2 -D RTM_maxZ=280 -D RTM_maxY=180 -D RTM_MaxB=20 -D RTM_NXB=20 -D RTM_NYB=20 -D RTM_NZB=20 -D RTM_order=8 -D RTM_nPEZ=2 -D RTM_nPEX=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/sw/rtm3d/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2 -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/
 CXXFLAGS += -g
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,14 +284,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtmforward.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -323,12 +327,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/tests/dataset_z70_y60_x80_t4/Makefile b/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/tests/dataset_z70_y60_x80_t4/Makefile
index edf51929c2..1de8e37beb 100755
--- a/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/tests/dataset_z70_y60_x80_t4/Makefile
+++ b/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/tests/dataset_z70_y60_x80_t4/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/rtm3d/rtmforward_Domain_HBC/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numFSMs=2 -D RTM_maxZ=256 -D RTM_maxY=128 -D RTM_MaxB=20 -D RTM_NXB=20 -D RTM_NYB=20 -D RTM_NZB=20 -D RTM_order=8 -D RTM_nPEZ=2 -D RTM_nPEX=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/sw/rtm3d/ -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2 -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/
 CXXFLAGS += -g
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,11 +182,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -213,21 +215,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -274,14 +276,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtmforward.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -315,12 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/tests/dataset_z70_y60_x80_t4/utils.mk b/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/tests/dataset_z70_y60_x80_t4/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/tests/dataset_z70_y60_x80_t4/utils.mk
+++ b/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/tests/dataset_z70_y60_x80_t4/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/utils.mk b/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/utils.mk
+++ b/hpc/L2/tests/rtm3d/rtmforward_Domain_HBC/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/Makefile b/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/Makefile
index 4600f708e9..dcdb107c8c 100755
--- a/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/Makefile
+++ b/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numFSMs=2 -D RTM_maxZ=280 -D RTM_maxY=180 -D RTM_MaxB=20 -D RTM_NXB=20 -D RTM_NYB=20 -D RTM_NZB=20 -D RTM_order=8 -D RTM_nPEZ=2 -D RTM_nPEX=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/sw/rtm3d -I $(XFLIB_DIR)/../blas/L2/include/xcl2 -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/
 CXXFLAGS += -g
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,14 +284,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtmforward.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -323,12 +327,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/tests/dataset_z90_y60_x80_t4/Makefile b/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/tests/dataset_z90_y60_x80_t4/Makefile
index 02e243630c..6393d93ae1 100755
--- a/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/tests/dataset_z90_y60_x80_t4/Makefile
+++ b/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/tests/dataset_z90_y60_x80_t4/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/rtm3d/rtmforward_Domain_RBC/main.cpp $(XFLIB_DIR)/../blas/L2/src/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D RTM_dataType=float -D RTM_numFSMs=2 -D RTM_maxZ=280 -D RTM_maxY=180 -D RTM_MaxB=20 -D RTM_NXB=20 -D RTM_NYB=20 -D RTM_NZB=20 -D RTM_order=8 -D RTM_nPEZ=2 -D RTM_nPEX=4
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include/sw/ -I $(XFLIB_DIR)/L2/include/sw/rtm3d -I $(XFLIB_DIR)/../blas/L2/include/xcl2 -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/
 CXXFLAGS += -g
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -141,7 +148,7 @@ endif
 ######################### binary container global settings ##########################
 VPP_FLAGS_rtmforward += --hls.clock 300000000:rtmforward
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_rtmforward += --clock.defaultFreqHz 250000000
+VPP_LDFLAGS_rtmforward += --clock.defaultFreqHz 200000000
 else
 VPP_LDFLAGS_rtmforward += --kernel_frequency 200
 endif
@@ -183,11 +190,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -221,21 +223,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -282,14 +284,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtmforward.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -323,12 +327,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/tests/dataset_z90_y60_x80_t4/utils.mk b/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/tests/dataset_z90_y60_x80_t4/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/tests/dataset_z90_y60_x80_t4/utils.mk
+++ b/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/tests/dataset_z90_y60_x80_t4/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/utils.mk b/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/utils.mk
+++ b/hpc/L2/tests/rtm3d/rtmforward_Domain_RBC/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L3/examples/mlp/Makefile b/hpc/L3/examples/mlp/Makefile
index 221d8f9513..f893b7611a 100644
--- a/hpc/L3/examples/mlp/Makefile
+++ b/hpc/L3/examples/mlp/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,10 +114,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(XFLIB_DIR)/L3/examples/mlp/fcn_example.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include/sw/mlp -I $(XFLIB_DIR)/../blas/L3/includ
 LDFLAGS += -luuid -lxrt_coreutil
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := fcn_example.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -188,11 +195,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -226,21 +228,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -287,14 +289,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/fcn.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	make dump_config
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	make dump_config
 else
@@ -328,12 +332,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/hpc/L3/examples/mlp/utils.mk b/hpc/L3/examples/mlp/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L3/examples/mlp/utils.mk
+++ b/hpc/L3/examples/mlp/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/hpc/L3/tests/rtm2d/Makefile b/hpc/L3/tests/rtm2d/Makefile
index 287e407d52..21c5553fe5 100644
--- a/hpc/L3/tests/rtm2d/Makefile
+++ b/hpc/L3/tests/rtm2d/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -108,15 +114,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/main.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L3/include/sw -I $(XFLIB_DIR)/L2/include/hw/rtm2d -I $(XFLIB_DIR)/L2/include/sw/rtm2d -I $(XFLIB_DIR)/L3/include/sw/rtm2d -I $(XFLIB_DIR)/../blas/L1/tests/sw/include/ -I $(XFLIB_DIR)/../blas/L2/include/xcl2/
 LDFLAGS += -luuid -lxrt_coreutil
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -134,7 +141,7 @@ ifneq (,$(shell echo $(XPLATFORM) | awk '/u280/'))
 VPP_FLAGS_rtmforward += --hls.clock 250000000:rtmforward
 VPP_FLAGS_rtmbackward += --hls.clock 250000000:rtmbackward
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_rtm += --clock.defaultFreqHz 250000000
+VPP_LDFLAGS_rtm += --clock.defaultFreqHz 150000000
 else
 VPP_LDFLAGS_rtm += --kernel_frequency 150
 endif
@@ -145,7 +152,7 @@ else
 VPP_FLAGS_rtmforward += --hls.clock 250000000:rtmforward
 VPP_FLAGS_rtmbackward += --hls.clock 250000000:rtmbackward
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_rtm += --clock.defaultFreqHz 250000000
+VPP_LDFLAGS_rtm += --clock.defaultFreqHz 150000000
 else
 VPP_LDFLAGS_rtm += --kernel_frequency 150
 endif
@@ -193,11 +200,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -231,21 +233,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -292,14 +294,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rtm.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -333,12 +337,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/hpc/L3/tests/rtm2d/utils.mk b/hpc/L3/tests/rtm2d/utils.mk
index 9b889021fc..0779ab9693 100644
--- a/hpc/L3/tests/rtm2d/utils.mk
+++ b/hpc/L3/tests/rtm2d/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/Jenkinsfile b/quantitative_finance/Jenkinsfile
index ca5b577f86..77e8bdcb42 100644
--- a/quantitative_finance/Jenkinsfile
+++ b/quantitative_finance/Jenkinsfile
@@ -1,4 +1,4 @@
 @Library('pipeline-library')_
-VitisLibPipeline (branch: 'next', libname: 'xf_fintech', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
-                  upstream_dependencies: 'xf_utils_hw,next,../utils; xf_solver,next,../solver',
-                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest')
+VitisLibPipeline (branch: 'main', libname: 'xf_fintech', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
+                  upstream_dependencies: 'xf_utils_hw,main,../utils; xf_solver,main,../solver',
+                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released')
diff --git a/quantitative_finance/L1/benchmarks/SVD/Makefile b/quantitative_finance/L1/benchmarks/SVD/Makefile
index c73c2a87f3..a42102d1ca 100644
--- a/quantitative_finance/L1/benchmarks/SVD/Makefile
+++ b/quantitative_finance/L1/benchmarks/SVD/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(CUR_DIR)/host/svd.cpp $(CUR_DIR)/host/util.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L1/benchmarks/SVD/kernel -
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,7 +186,7 @@ endif
 $(TEMP_DIR)/kernel_svd_0.xo: $(CUR_DIR)/kernel/kernel_svd.cpp 
 	$(ECHO) "Compiling Kernel: kernel_svd_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_svd_0) $(VPP_FLAGS) -k kernel_svd_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_svd_0) $(VPP_FLAGS) -k kernel_svd_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_svd_OBJS += $(TEMP_DIR)/kernel_svd_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_svd_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -203,11 +210,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -241,21 +243,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -302,14 +304,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_svd.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -343,12 +347,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L1/benchmarks/SVD/utils.mk b/quantitative_finance/L1/benchmarks/SVD/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L1/benchmarks/SVD/utils.mk
+++ b/quantitative_finance/L1/benchmarks/SVD/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/MCAmericanEngineMultiKernel/Makefile b/quantitative_finance/L2/benchmarks/MCAmericanEngineMultiKernel/Makefile
index a1089a5738..2d4854b770 100644
--- a/quantitative_finance/L2/benchmarks/MCAmericanEngineMultiKernel/Makefile
+++ b/quantitative_finance/L2/benchmarks/MCAmericanEngineMultiKernel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -183,17 +190,17 @@ endif
 $(TEMP_DIR)/MCAE_k0.xo: $(CUR_DIR)/kernel/MCAE_k0.cpp 
 	$(ECHO) "Compiling Kernel: MCAE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCAE_k0) $(VPP_FLAGS) -k MCAE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCAE_k0) $(VPP_FLAGS) -k MCAE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCAE_k_OBJS += $(TEMP_DIR)/MCAE_k0.xo
 $(TEMP_DIR)/MCAE_k1.xo: $(CUR_DIR)/kernel/MCAE_k1.cpp 
 	$(ECHO) "Compiling Kernel: MCAE_k1"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCAE_k1) $(VPP_FLAGS) -k MCAE_k1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCAE_k1) $(VPP_FLAGS) -k MCAE_k1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCAE_k_OBJS += $(TEMP_DIR)/MCAE_k1.xo
 $(TEMP_DIR)/MCAE_k2.xo: $(CUR_DIR)/kernel/MCAE_k2.cpp 
 	$(ECHO) "Compiling Kernel: MCAE_k2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCAE_k2) $(VPP_FLAGS) -k MCAE_k2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCAE_k2) $(VPP_FLAGS) -k MCAE_k2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCAE_k_OBJS += $(TEMP_DIR)/MCAE_k2.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_MCAE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -217,11 +224,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -255,21 +257,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -316,14 +318,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/MCAE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -357,12 +361,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/MCAmericanEngineMultiKernel/utils.mk b/quantitative_finance/L2/benchmarks/MCAmericanEngineMultiKernel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/MCAmericanEngineMultiKernel/utils.mk
+++ b/quantitative_finance/L2/benchmarks/MCAmericanEngineMultiKernel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/MCEuropeanEngine/Makefile b/quantitative_finance/L2/benchmarks/MCEuropeanEngine/Makefile
index fa01e9e11f..430042c681 100644
--- a/quantitative_finance/L2/benchmarks/MCEuropeanEngine/Makefile
+++ b/quantitative_finance/L2/benchmarks/MCEuropeanEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -162,7 +169,7 @@ endif
 $(TEMP_DIR)/kernel_mc.xo: $(CUR_DIR)/kernel/kernel_mc.cpp 
 	$(ECHO) "Compiling Kernel: kernel_mc"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_mc) $(VPP_FLAGS) -k kernel_mc -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_mc) $(VPP_FLAGS) -k kernel_mc -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_mc_OBJS += $(TEMP_DIR)/kernel_mc.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_mc_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,14 +287,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_mc.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -326,12 +330,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/MCEuropeanEngine/utils.mk b/quantitative_finance/L2/benchmarks/MCEuropeanEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/MCEuropeanEngine/utils.mk
+++ b/quantitative_finance/L2/benchmarks/MCEuropeanEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeCallableEngineHWModel/Makefile b/quantitative_finance/L2/benchmarks/TreeEngine/TreeCallableEngineHWModel/Makefile
index 291c7f1e85..cb17dc43b4 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeCallableEngineHWModel/Makefile
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeCallableEngineHWModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,7 +182,7 @@ endif
 $(TEMP_DIR)/scanTreeKernel.xo: $(CUR_DIR)/kernel/scan_tree_kernel.cpp 
 	$(ECHO) "Compiling Kernel: scanTreeKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_scanTreeKernel_OBJS += $(TEMP_DIR)/scanTreeKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_scanTreeKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,14 +300,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scanTreeKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -339,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeCallableEngineHWModel/utils.mk b/quantitative_finance/L2/benchmarks/TreeEngine/TreeCallableEngineHWModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeCallableEngineHWModel/utils.mk
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeCallableEngineHWModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeCapFloorEngineHWModel/Makefile b/quantitative_finance/L2/benchmarks/TreeEngine/TreeCapFloorEngineHWModel/Makefile
index dcb79dad4d..f378ed7642 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeCapFloorEngineHWModel/Makefile
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeCapFloorEngineHWModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,7 +182,7 @@ endif
 $(TEMP_DIR)/scanTreeKernel.xo: $(CUR_DIR)/kernel/scan_tree_kernel.cpp 
 	$(ECHO) "Compiling Kernel: scanTreeKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_scanTreeKernel_OBJS += $(TEMP_DIR)/scanTreeKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_scanTreeKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,14 +300,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scanTreeKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -339,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeCapFloorEngineHWModel/utils.mk b/quantitative_finance/L2/benchmarks/TreeEngine/TreeCapFloorEngineHWModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeCapFloorEngineHWModel/utils.mk
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeCapFloorEngineHWModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwapEngineHWModel/Makefile b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwapEngineHWModel/Makefile
index ec764e70af..ed7c46c1c1 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwapEngineHWModel/Makefile
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwapEngineHWModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -128,6 +130,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -194,7 +201,7 @@ endif
 $(TEMP_DIR)/scanTreeKernel.xo: $(CUR_DIR)/kernel/scan_tree_kernel.cpp 
 	$(ECHO) "Compiling Kernel: scanTreeKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_scanTreeKernel_OBJS += $(TEMP_DIR)/scanTreeKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_scanTreeKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -218,11 +225,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -256,21 +258,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -317,14 +319,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scanTreeKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -358,12 +362,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwapEngineHWModel/utils.mk b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwapEngineHWModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwapEngineHWModel/utils.mk
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwapEngineHWModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineBKModel/Makefile b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineBKModel/Makefile
index 1fc8cd7684..2d6d89ca36 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineBKModel/Makefile
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineBKModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,7 +182,7 @@ endif
 $(TEMP_DIR)/scanTreeKernel.xo: $(CUR_DIR)/kernel/scan_tree_kernel.cpp 
 	$(ECHO) "Compiling Kernel: scanTreeKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_scanTreeKernel_OBJS += $(TEMP_DIR)/scanTreeKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_scanTreeKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,14 +300,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scanTreeKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -339,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineBKModel/utils.mk b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineBKModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineBKModel/utils.mk
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineBKModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineCIRModel/Makefile b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineCIRModel/Makefile
index e590222b40..74afccfccc 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineCIRModel/Makefile
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineCIRModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,7 +182,7 @@ endif
 $(TEMP_DIR)/scanTreeKernel.xo: $(CUR_DIR)/kernel/scan_tree_kernel.cpp 
 	$(ECHO) "Compiling Kernel: scanTreeKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_scanTreeKernel_OBJS += $(TEMP_DIR)/scanTreeKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_scanTreeKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,14 +300,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scanTreeKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -339,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineCIRModel/utils.mk b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineCIRModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineCIRModel/utils.mk
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineCIRModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineECIRModel/Makefile b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineECIRModel/Makefile
index b0ba19d33c..1f6526f4a9 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineECIRModel/Makefile
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineECIRModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,7 +182,7 @@ endif
 $(TEMP_DIR)/scanTreeKernel.xo: $(CUR_DIR)/kernel/scan_tree_kernel.cpp 
 	$(ECHO) "Compiling Kernel: scanTreeKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_scanTreeKernel_OBJS += $(TEMP_DIR)/scanTreeKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_scanTreeKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,14 +300,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scanTreeKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -339,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineECIRModel/utils.mk b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineECIRModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineECIRModel/utils.mk
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineECIRModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineG2Model/Makefile b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineG2Model/Makefile
index 34fba70b53..1ece4a84e4 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineG2Model/Makefile
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineG2Model/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,7 +182,7 @@ endif
 $(TEMP_DIR)/scanTreeKernel.xo: $(CUR_DIR)/kernel/scan_tree_kernel.cpp 
 	$(ECHO) "Compiling Kernel: scanTreeKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_scanTreeKernel_OBJS += $(TEMP_DIR)/scanTreeKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_scanTreeKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,14 +300,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scanTreeKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -339,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineG2Model/utils.mk b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineG2Model/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineG2Model/utils.mk
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineG2Model/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineHWModel/Makefile b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineHWModel/Makefile
index 2377723812..766d9ea652 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineHWModel/Makefile
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineHWModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,7 +182,7 @@ endif
 $(TEMP_DIR)/scanTreeKernel.xo: $(CUR_DIR)/kernel/scan_tree_kernel.cpp 
 	$(ECHO) "Compiling Kernel: scanTreeKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_scanTreeKernel_OBJS += $(TEMP_DIR)/scanTreeKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_scanTreeKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,14 +300,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scanTreeKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -339,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineHWModel/utils.mk b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineHWModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineHWModel/utils.mk
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineHWModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineVModel/Makefile b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineVModel/Makefile
index b7e62f815e..dd591bee01 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineVModel/Makefile
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineVModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -123,6 +125,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -175,7 +182,7 @@ endif
 $(TEMP_DIR)/scanTreeKernel.xo: $(CUR_DIR)/kernel/scan_tree_kernel.cpp 
 	$(ECHO) "Compiling Kernel: scanTreeKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_scanTreeKernel) $(VPP_FLAGS) -k scanTreeKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_scanTreeKernel_OBJS += $(TEMP_DIR)/scanTreeKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_scanTreeKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -199,11 +206,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -237,21 +239,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -298,14 +300,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/scanTreeKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -339,12 +343,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineVModel/utils.mk b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineVModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineVModel/utils.mk
+++ b/quantitative_finance/L2/benchmarks/TreeEngine/TreeSwaptionEngineVModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/demos/Quadrature/Makefile b/quantitative_finance/L2/demos/Quadrature/Makefile
index 840a91ccc9..34f1e9a4d8 100755
--- a/quantitative_finance/L2/demos/Quadrature/Makefile
+++ b/quantitative_finance/L2/demos/Quadrature/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,14 +113,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(CUR_DIR)/src/host/quad_hcf_test.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/demos/Quadrature/src/kernel -I $(XFLIB_DIR)/L2/include/ -I $(XFLIB_DIR)/L1/include/ -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -164,11 +171,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -202,21 +204,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -263,14 +265,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/quad_hcf_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -304,12 +308,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/demos/Quadrature/utils.mk b/quantitative_finance/L2/demos/Quadrature/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/demos/Quadrature/utils.mk
+++ b/quantitative_finance/L2/demos/Quadrature/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/BinomialTreeEngine/Makefile b/quantitative_finance/L2/tests/BinomialTreeEngine/Makefile
index 5c480ccdad..9e256dfc40 100644
--- a/quantitative_finance/L2/tests/BinomialTreeEngine/Makefile
+++ b/quantitative_finance/L2/tests/BinomialTreeEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(CUR_DIR)/src/host/binomialtreecpu.cpp $(CUR_DIR)/src/host/binomialtreehost.cpp 
 CXXFLAGS +=  -D TEST_DT=float -D TEST_PARALLEL_ENGINES=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(CUR_DIR)/src/kernel -I $(XFLIB_DIR)/ext/xcl2/ -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -166,11 +173,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -204,21 +206,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -265,14 +267,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/binomialtree.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -306,12 +310,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/BinomialTreeEngine/utils.mk b/quantitative_finance/L2/tests/BinomialTreeEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/BinomialTreeEngine/utils.mk
+++ b/quantitative_finance/L2/tests/BinomialTreeEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/CFBlack76/Makefile b/quantitative_finance/L2/tests/CFBlack76/Makefile
index c84a89cfe7..ae976f0eb3 100644
--- a/quantitative_finance/L2/tests/CFBlack76/Makefile
+++ b/quantitative_finance/L2/tests/CFBlack76/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/CFBlack76/src/host/b76_test.cpp $(XFLIB_DIR)/L2/tests/CFBlack76/src/host/b76_model.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2/ -I $(XFLIB_DIR
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := b76_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -182,7 +189,7 @@ endif
 $(TEMP_DIR)/b76_kernel.xo: $(XFLIB_DIR)/L2/tests/CFBlack76/src/kernel/b76_kernel.cpp 
 	$(ECHO) "Compiling Kernel: b76_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_b76_kernel) $(VPP_FLAGS) -k b76_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_b76_kernel) $(VPP_FLAGS) -k b76_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_b76_kernel_OBJS += $(TEMP_DIR)/b76_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_b76_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -206,11 +213,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -244,21 +246,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -305,14 +307,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/b76_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -346,12 +350,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/CFBlack76/utils.mk b/quantitative_finance/L2/tests/CFBlack76/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/CFBlack76/utils.mk
+++ b/quantitative_finance/L2/tests/CFBlack76/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/CFBlackScholes/Makefile b/quantitative_finance/L2/tests/CFBlackScholes/Makefile
index cb880d470a..79fa043080 100644
--- a/quantitative_finance/L2/tests/CFBlackScholes/Makefile
+++ b/quantitative_finance/L2/tests/CFBlackScholes/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/CFBlackScholes/src/host/bs_test.cpp $(XFLIB_DIR)/L2/tests/CFBlackScholes/src/host/bsm_model.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2/ -I $(XFLIB_DIR
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := bs_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -182,7 +189,7 @@ endif
 $(TEMP_DIR)/bs_kernel.xo: $(XFLIB_DIR)/L2/tests/CFBlackScholes/src/kernel/bs_kernel.cpp 
 	$(ECHO) "Compiling Kernel: bs_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_bs_kernel) $(VPP_FLAGS) -k bs_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_bs_kernel) $(VPP_FLAGS) -k bs_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_bs_kernel_OBJS += $(TEMP_DIR)/bs_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_bs_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -206,11 +213,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -244,21 +246,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -305,14 +307,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/bs_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -346,12 +350,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/CFBlackScholes/utils.mk b/quantitative_finance/L2/tests/CFBlackScholes/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/CFBlackScholes/utils.mk
+++ b/quantitative_finance/L2/tests/CFBlackScholes/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/CFBlackScholesMerton/Makefile b/quantitative_finance/L2/tests/CFBlackScholesMerton/Makefile
index ce0d15cde4..2f9888438a 100755
--- a/quantitative_finance/L2/tests/CFBlackScholesMerton/Makefile
+++ b/quantitative_finance/L2/tests/CFBlackScholesMerton/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/CFBlackScholesMerton/src/host/bsm_test.cpp $(XFLIB_DIR)/L2/tests/CFBlackScholesMerton/src/host/bsm_model.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2/ -I $(XFLIB_DIR
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := bsm_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -182,7 +189,7 @@ endif
 $(TEMP_DIR)/bsm_kernel.xo: $(XFLIB_DIR)/L2/tests/CFBlackScholesMerton/src/kernel/bsm_kernel.cpp 
 	$(ECHO) "Compiling Kernel: bsm_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_bsm_kernel) $(VPP_FLAGS) -k bsm_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_bsm_kernel) $(VPP_FLAGS) -k bsm_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_bsm_kernel_OBJS += $(TEMP_DIR)/bsm_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_bsm_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -206,11 +213,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -244,21 +246,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -305,14 +307,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/bsm_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -346,12 +350,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/CFBlackScholesMerton/utils.mk b/quantitative_finance/L2/tests/CFBlackScholesMerton/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/CFBlackScholesMerton/utils.mk
+++ b/quantitative_finance/L2/tests/CFBlackScholesMerton/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/CPICapFloorEngine/Makefile b/quantitative_finance/L2/tests/CPICapFloorEngine/Makefile
index 91c365dc13..efa3d67566 100644
--- a/quantitative_finance/L2/tests/CPICapFloorEngine/Makefile
+++ b/quantitative_finance/L2/tests/CPICapFloorEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -184,7 +191,7 @@ endif
 $(TEMP_DIR)/CPI_k0.xo: $(CUR_DIR)/kernel/CPI_k0.cpp 
 	$(ECHO) "Compiling Kernel: CPI_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_CPI_k0) $(VPP_FLAGS) -k CPI_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_CPI_k0) $(VPP_FLAGS) -k CPI_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_CPI_k_OBJS += $(TEMP_DIR)/CPI_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_CPI_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -208,11 +215,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -246,21 +248,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -307,14 +309,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/CPI_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -348,12 +352,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/CPICapFloorEngine/utils.mk b/quantitative_finance/L2/tests/CPICapFloorEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/CPICapFloorEngine/utils.mk
+++ b/quantitative_finance/L2/tests/CPICapFloorEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/CreditDefaultSwapEngine/Makefile b/quantitative_finance/L2/tests/CreditDefaultSwapEngine/Makefile
index 9f369b39b2..36d2499ec9 100644
--- a/quantitative_finance/L2/tests/CreditDefaultSwapEngine/Makefile
+++ b/quantitative_finance/L2/tests/CreditDefaultSwapEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/CreditDefaultSwapEngine/src/host/main.cpp $(XFLIB_DIR)/L2/tests/CreditDefaultSwapEngine/src/host/cpu.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D TEST_DT=float
 CXXFLAGS +=  -I $(XFLIB_DIR)/L2/tests/CreditDefaultSwapEngine/src/kernel -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := cds_host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -192,11 +199,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -230,21 +232,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -291,14 +293,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/cds_engine.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -332,12 +336,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/CreditDefaultSwapEngine/utils.mk b/quantitative_finance/L2/tests/CreditDefaultSwapEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/CreditDefaultSwapEngine/utils.mk
+++ b/quantitative_finance/L2/tests/CreditDefaultSwapEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/FDBlackScholesLocalVolatilityEngine/Makefile b/quantitative_finance/L2/tests/FDBlackScholesLocalVolatilityEngine/Makefile
index e1cdd3365e..a5eb777fcb 100755
--- a/quantitative_finance/L2/tests/FDBlackScholesLocalVolatilityEngine/Makefile
+++ b/quantitative_finance/L2/tests/FDBlackScholesLocalVolatilityEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/FDBlackScholesLocalVolatilityEngine/src/host/fd_bs_lv_test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D FD_DATA_TYPE=float -D FD_DATA_EQ_TYPE=int32_t -D FD_BITS_PER_DATA_TYPE=32 -D FD_N_SIZE=128 -D FD_LOG2_N_SIZE=7 -D FD_M_SIZE=256 -D FD_DATA_WORDS_IN_DDR=16 -D FD_NUM_PCR=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/FDBlackScholesLocalVolatilityEngine/src/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/tests/FDBlackScholesLocalVolatilityEngine/src/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := fd_bs_lv_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -149,7 +156,7 @@ endif
 $(TEMP_DIR)/fd_bs_lv_kernel.xo: $(XFLIB_DIR)/L2/tests/FDBlackScholesLocalVolatilityEngine/src/kernel/fd_bs_lv_kernel.cpp 
 	$(ECHO) "Compiling Kernel: fd_bs_lv_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_fd_bs_lv_kernel) $(VPP_FLAGS) -k fd_bs_lv_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_fd_bs_lv_kernel) $(VPP_FLAGS) -k fd_bs_lv_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_fd_bs_lv_kernel_N128_M256_OBJS += $(TEMP_DIR)/fd_bs_lv_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_fd_bs_lv_kernel_N128_M256_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -173,11 +180,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -211,21 +213,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -272,14 +274,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/fd_bs_lv_kernel_N128_M256.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -313,12 +317,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/FDBlackScholesLocalVolatilityEngine/utils.mk b/quantitative_finance/L2/tests/FDBlackScholesLocalVolatilityEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/FDBlackScholesLocalVolatilityEngine/utils.mk
+++ b/quantitative_finance/L2/tests/FDBlackScholesLocalVolatilityEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/FdEuropeanHestonEngine/Makefile b/quantitative_finance/L2/tests/FdEuropeanHestonEngine/Makefile
index 59cdca68df..b469277bae 100644
--- a/quantitative_finance/L2/tests/FdEuropeanHestonEngine/Makefile
+++ b/quantitative_finance/L2/tests/FdEuropeanHestonEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/FdEuropeanHestonEngine/src/host/fd_test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D FD_DATA_TYPE=double -D FD_DATA_EQ_TYPE=int64_t -D FD_M_SIZE=8192 -D FD_DATA_WORDS_IN_DDR=8 -D FD_NUM_PCR=2
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/FdEuropeanHestonEngine/src/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/tests/FdEuropeanHestonEngine/src/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := fd_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -163,7 +170,7 @@ endif
 $(TEMP_DIR)/fd_kernel.xo: $(XFLIB_DIR)/L2/tests/FdEuropeanHestonEngine/src/kernel/fd_kernel.cpp 
 	$(ECHO) "Compiling Kernel: fd_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_fd_kernel) $(VPP_FLAGS) -k fd_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_fd_kernel) $(VPP_FLAGS) -k fd_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_fd_heston_kernel_u250_sw_emu_m8192_double_OBJS += $(TEMP_DIR)/fd_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_fd_heston_kernel_u250_sw_emu_m8192_double_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -187,11 +194,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -225,21 +227,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -286,14 +288,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/fd_heston_kernel_u250_sw_emu_m8192_double.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -327,12 +331,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/FdEuropeanHestonEngine/utils.mk b/quantitative_finance/L2/tests/FdEuropeanHestonEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/FdEuropeanHestonEngine/utils.mk
+++ b/quantitative_finance/L2/tests/FdEuropeanHestonEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/FdmG2SwaptionEngine/Makefile b/quantitative_finance/L2/tests/FdmG2SwaptionEngine/Makefile
index 698321cd15..35d1218060 100644
--- a/quantitative_finance/L2/tests/FdmG2SwaptionEngine/Makefile
+++ b/quantitative_finance/L2/tests/FdmG2SwaptionEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/FDMG2_k0.xo: $(CUR_DIR)/kernel/FDMG2_k0.cpp 
 	$(ECHO) "Compiling Kernel: FDMG2_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_FDMG2_k0) $(VPP_FLAGS) -k FDMG2_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_FDMG2_k0) $(VPP_FLAGS) -k FDMG2_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_FDMG2_k_OBJS += $(TEMP_DIR)/FDMG2_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_FDMG2_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/FDMG2_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/FdmG2SwaptionEngine/utils.mk b/quantitative_finance/L2/tests/FdmG2SwaptionEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/FdmG2SwaptionEngine/utils.mk
+++ b/quantitative_finance/L2/tests/FdmG2SwaptionEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/GarmanKohlhagenEngine/Makefile b/quantitative_finance/L2/tests/GarmanKohlhagenEngine/Makefile
index 31964b3a8c..9f6ecf39c2 100755
--- a/quantitative_finance/L2/tests/GarmanKohlhagenEngine/Makefile
+++ b/quantitative_finance/L2/tests/GarmanKohlhagenEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,14 +113,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(CUR_DIR)/src/host/gk_test.cpp $(CUR_DIR)/src/host/gk_parser.cpp $(CUR_DIR)/src/host/gk_cpu.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,14 +280,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/gk_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -319,12 +323,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/GarmanKohlhagenEngine/utils.mk b/quantitative_finance/L2/tests/GarmanKohlhagenEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/GarmanKohlhagenEngine/utils.mk
+++ b/quantitative_finance/L2/tests/GarmanKohlhagenEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/HCFEngine/Makefile b/quantitative_finance/L2/tests/HCFEngine/Makefile
index a43b9963bc..79bdc6212e 100644
--- a/quantitative_finance/L2/tests/HCFEngine/Makefile
+++ b/quantitative_finance/L2/tests/HCFEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,20 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
-ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(CUR_DIR)/src/host/hcf_host.cpp $(CUR_DIR)/src/host/hcf_cpu.cpp $(CUR_DIR)/src/host/hcf_parser.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
-CXXFLAGS +=  -D USE_DDR -D TEST_DT=float -D DEVICE_PART=u200
+CXXFLAGS +=  -D USE_DDR -D TEST_DT=float -D DEVICE_PART=u50
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(CUR_DIR)/src/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
-else ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
+else ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/src/host/hcf_host.cpp $(CUR_DIR)/src/host/hcf_cpu.cpp $(CUR_DIR)/src/host/hcf_parser.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
-CXXFLAGS +=  -D USE_DDR -D TEST_DT=float -D DEVICE_PART=u50
+CXXFLAGS +=  -D USE_DDR -D TEST_DT=float -D DEVICE_PART=u200
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(CUR_DIR)/src/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
@@ -137,6 +139,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(CUR_DIR)
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := hcf_host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -147,12 +154,12 @@ PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args
 endif
 
 ########################## Kernel compiler global settings ##########################
-ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
-VPP_FLAGS +=   --config $(CUR_DIR)/conn_u200.cfg
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u50.cfg
 VPP_FLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include
 
-else ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
-VPP_FLAGS +=   --config $(CUR_DIR)/conn_u50.cfg
+else ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u200.cfg
 VPP_FLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include
 
 else ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
@@ -208,11 +215,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -246,21 +248,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -307,14 +309,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/hcf.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -348,12 +352,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/HCFEngine/utils.mk b/quantitative_finance/L2/tests/HCFEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/HCFEngine/utils.mk
+++ b/quantitative_finance/L2/tests/HCFEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/HeathJarrowMorton/Makefile b/quantitative_finance/L2/tests/HeathJarrowMorton/Makefile
index a4a191c657..cee66a0100 100644
--- a/quantitative_finance/L2/tests/HeathJarrowMorton/Makefile
+++ b/quantitative_finance/L2/tests/HeathJarrowMorton/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/HeathJarrowMorton/src/host/hjm_test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/HeathJarrowMorton/src/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/tests/HeathJarrowMorton/src/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := hjm_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -171,11 +178,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -209,21 +211,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -270,14 +272,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/hjm_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -311,12 +315,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/HeathJarrowMorton/utils.mk b/quantitative_finance/L2/tests/HeathJarrowMorton/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/HeathJarrowMorton/utils.mk
+++ b/quantitative_finance/L2/tests/HeathJarrowMorton/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/HullWhiteAnalyticEngine/Makefile b/quantitative_finance/L2/tests/HullWhiteAnalyticEngine/Makefile
index 20269d744f..33f5d77c39 100644
--- a/quantitative_finance/L2/tests/HullWhiteAnalyticEngine/Makefile
+++ b/quantitative_finance/L2/tests/HullWhiteAnalyticEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/HullWhiteAnalyticEngine/src/host/main.cpp $(XFLIB_DIR)/L2/tests/HullWhiteAnalyticEngine/src/host/cpu.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D TEST_DT=double
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/HullWhiteAnalyticEngine/src/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/tests/HullWhiteAnalyticEngine/src/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := hwa_host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,14 +303,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/hwa_engine.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -342,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/HullWhiteAnalyticEngine/utils.mk b/quantitative_finance/L2/tests/HullWhiteAnalyticEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/HullWhiteAnalyticEngine/utils.mk
+++ b/quantitative_finance/L2/tests/HullWhiteAnalyticEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/InflationBlackCapFloorEngine/Makefile b/quantitative_finance/L2/tests/InflationBlackCapFloorEngine/Makefile
index d31de780d4..420567b10c 100644
--- a/quantitative_finance/L2/tests/InflationBlackCapFloorEngine/Makefile
+++ b/quantitative_finance/L2/tests/InflationBlackCapFloorEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := .exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/INFLATION_k0.xo: $(CUR_DIR)/kernel/INFLATION_k0.cpp 
 	$(ECHO) "Compiling Kernel: INFLATION_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_INFLATION_k0) $(VPP_FLAGS) -k INFLATION_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_INFLATION_k0) $(VPP_FLAGS) -k INFLATION_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_INFLATION_k_OBJS += $(TEMP_DIR)/INFLATION_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_INFLATION_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/INFLATION_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/InflationBlackCapFloorEngine/utils.mk b/quantitative_finance/L2/tests/InflationBlackCapFloorEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/InflationBlackCapFloorEngine/utils.mk
+++ b/quantitative_finance/L2/tests/InflationBlackCapFloorEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/LMMEngineCap/Makefile b/quantitative_finance/L2/tests/LMMEngineCap/Makefile
index 225fa2eda9..9dbfcdfde9 100644
--- a/quantitative_finance/L2/tests/LMMEngineCap/Makefile
+++ b/quantitative_finance/L2/tests/LMMEngineCap/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/LMMEngineCap/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/LMMEngineCap/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/tests/LMMEngineCap/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := lmmCapTest.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -147,7 +154,7 @@ endif
 $(TEMP_DIR)/lmmCapKernel.xo: $(XFLIB_DIR)/L2/tests/LMMEngineCap/kernel/lmmCapKernel.cpp 
 	$(ECHO) "Compiling Kernel: lmmCapKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_lmmCapKernel) $(VPP_FLAGS) -k lmmCapKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_lmmCapKernel) $(VPP_FLAGS) -k lmmCapKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_lmm_cap_OBJS += $(TEMP_DIR)/lmmCapKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_lmm_cap_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -171,11 +178,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -209,21 +211,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -270,14 +272,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/lmm_cap.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -311,12 +315,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/LMMEngineCap/utils.mk b/quantitative_finance/L2/tests/LMMEngineCap/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/LMMEngineCap/utils.mk
+++ b/quantitative_finance/L2/tests/LMMEngineCap/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/LMMEngineRatchetCap/Makefile b/quantitative_finance/L2/tests/LMMEngineRatchetCap/Makefile
index 03388bc80e..76cc82c467 100644
--- a/quantitative_finance/L2/tests/LMMEngineRatchetCap/Makefile
+++ b/quantitative_finance/L2/tests/LMMEngineRatchetCap/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/LMMEngineRatchetCap/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/LMMEngineRatchetCap/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/tests/LMMEngineRatchetCap/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := lmmRatchetCapTest.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -147,7 +154,7 @@ endif
 $(TEMP_DIR)/lmmRatchetCapKernel.xo: $(XFLIB_DIR)/L2/tests/LMMEngineRatchetCap/kernel/lmmRatchetCapKernel.cpp 
 	$(ECHO) "Compiling Kernel: lmmRatchetCapKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_lmmRatchetCapKernel) $(VPP_FLAGS) -k lmmRatchetCapKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_lmmRatchetCapKernel) $(VPP_FLAGS) -k lmmRatchetCapKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_lmm_ratchetcap_OBJS += $(TEMP_DIR)/lmmRatchetCapKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_lmm_ratchetcap_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -171,11 +178,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -209,21 +211,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -270,14 +272,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/lmm_ratchetcap.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -311,12 +315,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/LMMEngineRatchetCap/utils.mk b/quantitative_finance/L2/tests/LMMEngineRatchetCap/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/LMMEngineRatchetCap/utils.mk
+++ b/quantitative_finance/L2/tests/LMMEngineRatchetCap/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/LMMEngineRatchetFloater/Makefile b/quantitative_finance/L2/tests/LMMEngineRatchetFloater/Makefile
index 0c35cb58bb..745945048f 100644
--- a/quantitative_finance/L2/tests/LMMEngineRatchetFloater/Makefile
+++ b/quantitative_finance/L2/tests/LMMEngineRatchetFloater/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/LMMEngineRatchetFloater/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/LMMEngineRatchetFloater/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/tests/LMMEngineRatchetFloater/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := lmmRatchetFloaterTest.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -147,7 +154,7 @@ endif
 $(TEMP_DIR)/lmmRatchetFloaterKernel.xo: $(XFLIB_DIR)/L2/tests/LMMEngineRatchetFloater/kernel/lmmRatchetFloaterKernel.cpp 
 	$(ECHO) "Compiling Kernel: lmmRatchetFloaterKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_lmmRatchetFloaterKernel) $(VPP_FLAGS) -k lmmRatchetFloaterKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_lmmRatchetFloaterKernel) $(VPP_FLAGS) -k lmmRatchetFloaterKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_lmm_ratchetfloater_OBJS += $(TEMP_DIR)/lmmRatchetFloaterKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_lmm_ratchetfloater_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -171,11 +178,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -209,21 +211,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -270,14 +272,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/lmm_ratchetfloater.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -311,12 +315,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/LMMEngineRatchetFloater/utils.mk b/quantitative_finance/L2/tests/LMMEngineRatchetFloater/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/LMMEngineRatchetFloater/utils.mk
+++ b/quantitative_finance/L2/tests/LMMEngineRatchetFloater/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/M76Engine/Makefile b/quantitative_finance/L2/tests/M76Engine/Makefile
index b285c6a426..868e5db456 100644
--- a/quantitative_finance/L2/tests/M76Engine/Makefile
+++ b/quantitative_finance/L2/tests/M76Engine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/M76Engine/src/host/m76_host.cpp $(XFLIB_DIR)/L2/tests/M76Engine/src/host/m76_cpu.cpp $(XFLIB_DIR)/L2/tests/M76Engine/src/host/m76_parser.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -131,6 +133,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := m76_host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -174,7 +181,7 @@ endif
 $(TEMP_DIR)/m76_kernel.xo: $(XFLIB_DIR)/L2/tests/M76Engine/src/kernel/m76_kernel.cpp 
 	$(ECHO) "Compiling Kernel: m76_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_m76_kernel) $(VPP_FLAGS) -k m76_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_m76_kernel) $(VPP_FLAGS) -k m76_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_m76_OBJS += $(TEMP_DIR)/m76_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_m76_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -198,11 +205,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -236,21 +238,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -297,14 +299,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/m76.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -338,12 +342,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/M76Engine/utils.mk b/quantitative_finance/L2/tests/M76Engine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/M76Engine/utils.mk
+++ b/quantitative_finance/L2/tests/M76Engine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCAmericanEngine/Makefile b/quantitative_finance/L2/tests/MCAmericanEngine/Makefile
index 8009d63054..26d317e874 100644
--- a/quantitative_finance/L2/tests/MCAmericanEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCAmericanEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/kernel_mcae_0.xo: $(CUR_DIR)/kernel/kernel_mcae.cpp 
 	$(ECHO) "Compiling Kernel: kernel_mcae_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_mcae_0) $(VPP_FLAGS) -k kernel_mcae_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_mcae_0) $(VPP_FLAGS) -k kernel_mcae_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_mcae_OBJS += $(TEMP_DIR)/kernel_mcae_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_mcae_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_mcae.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCAmericanEngine/utils.mk b/quantitative_finance/L2/tests/MCAmericanEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCAmericanEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCAmericanEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCAmericanEngineMultiKernel/Makefile b/quantitative_finance/L2/tests/MCAmericanEngineMultiKernel/Makefile
index 17606f6576..bddbcae65d 100644
--- a/quantitative_finance/L2/tests/MCAmericanEngineMultiKernel/Makefile
+++ b/quantitative_finance/L2/tests/MCAmericanEngineMultiKernel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -231,17 +238,17 @@ endif
 $(TEMP_DIR)/MCAE_k0.xo: $(CUR_DIR)/kernel/MCAE_k0.cpp 
 	$(ECHO) "Compiling Kernel: MCAE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCAE_k0) $(VPP_FLAGS) -k MCAE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCAE_k0) $(VPP_FLAGS) -k MCAE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCAE_k_OBJS += $(TEMP_DIR)/MCAE_k0.xo
 $(TEMP_DIR)/MCAE_k1.xo: $(CUR_DIR)/kernel/MCAE_k1.cpp 
 	$(ECHO) "Compiling Kernel: MCAE_k1"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCAE_k1) $(VPP_FLAGS) -k MCAE_k1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCAE_k1) $(VPP_FLAGS) -k MCAE_k1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCAE_k_OBJS += $(TEMP_DIR)/MCAE_k1.xo
 $(TEMP_DIR)/MCAE_k2.xo: $(CUR_DIR)/kernel/MCAE_k2.cpp 
 	$(ECHO) "Compiling Kernel: MCAE_k2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCAE_k2) $(VPP_FLAGS) -k MCAE_k2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCAE_k2) $(VPP_FLAGS) -k MCAE_k2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCAE_k_OBJS += $(TEMP_DIR)/MCAE_k2.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_MCAE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -265,11 +272,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -303,21 +305,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -364,14 +366,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/MCAE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -405,12 +409,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCAmericanEngineMultiKernel/utils.mk b/quantitative_finance/L2/tests/MCAmericanEngineMultiKernel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCAmericanEngineMultiKernel/utils.mk
+++ b/quantitative_finance/L2/tests/MCAmericanEngineMultiKernel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCAsianAPEngine/Makefile b/quantitative_finance/L2/tests/MCAsianAPEngine/Makefile
index c579fb84d2..a57641ca40 100644
--- a/quantitative_finance/L2/tests/MCAsianAPEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCAsianAPEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/kernel_MCAsianAP_0.xo: $(CUR_DIR)/kernel/kernel_MCAsianAPEngine.cpp 
 	$(ECHO) "Compiling Kernel: kernel_MCAsianAP_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_MCAsianAP_0) $(VPP_FLAGS) -k kernel_MCAsianAP_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_MCAsianAP_0) $(VPP_FLAGS) -k kernel_MCAsianAP_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_MCAsianAP_OBJS += $(TEMP_DIR)/kernel_MCAsianAP_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_MCAsianAP_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_MCAsianAP.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCAsianAPEngine/utils.mk b/quantitative_finance/L2/tests/MCAsianAPEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCAsianAPEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCAsianAPEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCAsianASEngine/Makefile b/quantitative_finance/L2/tests/MCAsianASEngine/Makefile
index b5cbec88e8..dc416f4751 100644
--- a/quantitative_finance/L2/tests/MCAsianASEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCAsianASEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/kernel_MCAsianAS_0.xo: $(CUR_DIR)/kernel/kernel_MCAsianASEngine.cpp 
 	$(ECHO) "Compiling Kernel: kernel_MCAsianAS_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_MCAsianAS_0) $(VPP_FLAGS) -k kernel_MCAsianAS_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_MCAsianAS_0) $(VPP_FLAGS) -k kernel_MCAsianAS_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_MCAsianAS_OBJS += $(TEMP_DIR)/kernel_MCAsianAS_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_MCAsianAS_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_MCAsianAS.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCAsianASEngine/utils.mk b/quantitative_finance/L2/tests/MCAsianASEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCAsianASEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCAsianASEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCAsianGPEngine/Makefile b/quantitative_finance/L2/tests/MCAsianGPEngine/Makefile
index 59939cd7da..adb832720e 100644
--- a/quantitative_finance/L2/tests/MCAsianGPEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCAsianGPEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/kernel_MCAsianGP_0.xo: $(CUR_DIR)/kernel/kernel_MCAsianGPEngine.cpp 
 	$(ECHO) "Compiling Kernel: kernel_MCAsianGP_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_MCAsianGP_0) $(VPP_FLAGS) -k kernel_MCAsianGP_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_MCAsianGP_0) $(VPP_FLAGS) -k kernel_MCAsianGP_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_MCAsianGP_OBJS += $(TEMP_DIR)/kernel_MCAsianGP_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_MCAsianGP_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_MCAsianGP.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCAsianGPEngine/utils.mk b/quantitative_finance/L2/tests/MCAsianGPEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCAsianGPEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCAsianGPEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCBarrierBiasedEngine/Makefile b/quantitative_finance/L2/tests/MCBarrierBiasedEngine/Makefile
index 73e7a461c1..8782c215c2 100644
--- a/quantitative_finance/L2/tests/MCBarrierBiasedEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCBarrierBiasedEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/McBarrierBiasedEngine_k.xo: $(CUR_DIR)/kernel/McBarrierBiasedEngine_k.cpp 
 	$(ECHO) "Compiling Kernel: McBarrierBiasedEngine_k"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_McBarrierBiasedEngine_k) $(VPP_FLAGS) -k McBarrierBiasedEngine_k -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_McBarrierBiasedEngine_k) $(VPP_FLAGS) -k McBarrierBiasedEngine_k -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_McBarrierBiasedEngine_k_OBJS += $(TEMP_DIR)/McBarrierBiasedEngine_k.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_McBarrierBiasedEngine_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/McBarrierBiasedEngine_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCBarrierBiasedEngine/utils.mk b/quantitative_finance/L2/tests/MCBarrierBiasedEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCBarrierBiasedEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCBarrierBiasedEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCBarrierEngine/Makefile b/quantitative_finance/L2/tests/MCBarrierEngine/Makefile
index 2d1c289829..e9ca6b513d 100644
--- a/quantitative_finance/L2/tests/MCBarrierEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCBarrierEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/MCBarrierNoBiasEngine_k0.xo: $(CUR_DIR)/kernel/MCBarrierNoBiasEngine_k0.cpp 
 	$(ECHO) "Compiling Kernel: MCBarrierNoBiasEngine_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCBarrierNoBiasEngine_k0) $(VPP_FLAGS) -k MCBarrierNoBiasEngine_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCBarrierNoBiasEngine_k0) $(VPP_FLAGS) -k MCBarrierNoBiasEngine_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCBarrierNoBiasEngine_k_OBJS += $(TEMP_DIR)/MCBarrierNoBiasEngine_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_MCBarrierNoBiasEngine_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/MCBarrierNoBiasEngine_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCBarrierEngine/utils.mk b/quantitative_finance/L2/tests/MCBarrierEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCBarrierEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCBarrierEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCCliquetEngine/Makefile b/quantitative_finance/L2/tests/MCCliquetEngine/Makefile
index c6610fffbf..1d1ef383ac 100644
--- a/quantitative_finance/L2/tests/MCCliquetEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCCliquetEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/MCCliquetEngine_k.xo: $(CUR_DIR)/kernel/MCCliquetEngine_k.cpp 
 	$(ECHO) "Compiling Kernel: MCCliquetEngine_k"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCCliquetEngine_k) $(VPP_FLAGS) -k MCCliquetEngine_k -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCCliquetEngine_k) $(VPP_FLAGS) -k MCCliquetEngine_k -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCCliquetEngine_k_OBJS += $(TEMP_DIR)/MCCliquetEngine_k.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_MCCliquetEngine_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/MCCliquetEngine_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCCliquetEngine/utils.mk b/quantitative_finance/L2/tests/MCCliquetEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCCliquetEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCCliquetEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCDigitalEngine/Makefile b/quantitative_finance/L2/tests/MCDigitalEngine/Makefile
index edf30258ec..11adafdd87 100644
--- a/quantitative_finance/L2/tests/MCDigitalEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCDigitalEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +136,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,7 +188,7 @@ endif
 $(TEMP_DIR)/MCDigitalEngine_k.xo: $(CUR_DIR)/kernel/MCDigitalEngine_k.cpp 
 	$(ECHO) "Compiling Kernel: MCDigitalEngine_k"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCDigitalEngine_k) $(VPP_FLAGS) -k MCDigitalEngine_k -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCDigitalEngine_k) $(VPP_FLAGS) -k MCDigitalEngine_k -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCDigitalEngine_k_OBJS += $(TEMP_DIR)/MCDigitalEngine_k.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_MCDigitalEngine_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -205,11 +212,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -243,21 +245,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -304,14 +306,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/MCDigitalEngine_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -345,12 +349,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCDigitalEngine/utils.mk b/quantitative_finance/L2/tests/MCDigitalEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCDigitalEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCDigitalEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCEuropeanDowJonesEngine/Makefile b/quantitative_finance/L2/tests/MCEuropeanDowJonesEngine/Makefile
index 5ed547f6b5..087146621d 100644
--- a/quantitative_finance/L2/tests/MCEuropeanDowJonesEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCEuropeanDowJonesEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,18 +113,14 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
-ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D USE_DDR
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
-else ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
+else ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -D USE_DDR
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
@@ -137,6 +139,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -147,12 +154,12 @@ PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args
 endif
 
 ########################## Kernel compiler global settings ##########################
-ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
-VPP_FLAGS +=   --config $(CUR_DIR)/conn_u200.cfg
+ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u50.cfg
 VPP_FLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include
 
-else ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
-VPP_FLAGS +=   --config $(CUR_DIR)/conn_u50.cfg
+else ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
+VPP_FLAGS +=   --config $(CUR_DIR)/conn_u200.cfg
 VPP_FLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include
 
 else ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
@@ -186,7 +193,7 @@ endif
 $(TEMP_DIR)/kernel_mc_0.xo: $(CUR_DIR)/kernel/kernel_mceuropeanengine.cpp 
 	$(ECHO) "Compiling Kernel: kernel_mc_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_mc_european_dowjones_OBJS += $(TEMP_DIR)/kernel_mc_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_mc_european_dowjones_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -210,11 +217,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -248,21 +250,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -309,14 +311,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/mc_european_dowjones.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -350,12 +354,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCEuropeanDowJonesEngine/utils.mk b/quantitative_finance/L2/tests/MCEuropeanDowJonesEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCEuropeanDowJonesEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCEuropeanDowJonesEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCEuropeanEngine/Makefile b/quantitative_finance/L2/tests/MCEuropeanEngine/Makefile
index ee51a49ee7..d9bd1b325a 100644
--- a/quantitative_finance/L2/tests/MCEuropeanEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCEuropeanEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/mc_euro_k.xo: $(CUR_DIR)/kernel/mc_euro_k.cpp 
 	$(ECHO) "Compiling Kernel: mc_euro_k"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_mc_euro_k) $(VPP_FLAGS) -k mc_euro_k -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_mc_euro_k) $(VPP_FLAGS) -k mc_euro_k -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_mc_euro_k_OBJS += $(TEMP_DIR)/mc_euro_k.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_mc_euro_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/mc_euro_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCEuropeanEngine/utils.mk b/quantitative_finance/L2/tests/MCEuropeanEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCEuropeanEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCEuropeanEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCEuropeanHestonEngine/Makefile b/quantitative_finance/L2/tests/MCEuropeanHestonEngine/Makefile
index 69b5aca4b2..bf1be1a664 100644
--- a/quantitative_finance/L2/tests/MCEuropeanHestonEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCEuropeanHestonEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/kernel_mc_0.xo: $(CUR_DIR)/kernel/kernel_mceuropeanengine.cpp 
 	$(ECHO) "Compiling Kernel: kernel_mc_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_mc_OBJS += $(TEMP_DIR)/kernel_mc_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_mc_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_mc.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCEuropeanHestonEngine/utils.mk b/quantitative_finance/L2/tests/MCEuropeanHestonEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCEuropeanHestonEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCEuropeanHestonEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCEuropeanHestonGreeksEngine/Makefile b/quantitative_finance/L2/tests/MCEuropeanHestonGreeksEngine/Makefile
index 9e7d9479ab..e281885bc4 100644
--- a/quantitative_finance/L2/tests/MCEuropeanHestonGreeksEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCEuropeanHestonGreeksEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +136,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,7 +188,7 @@ endif
 $(TEMP_DIR)/MCEHGEngine_k0.xo: $(CUR_DIR)/kernel/mcengine_top.cpp 
 	$(ECHO) "Compiling Kernel: MCEHGEngine_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_MCEHGEngine_k0) $(VPP_FLAGS) -k MCEHGEngine_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_MCEHGEngine_k0) $(VPP_FLAGS) -k MCEHGEngine_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_MCEHGEngine_k0_OBJS += $(TEMP_DIR)/MCEHGEngine_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_MCEHGEngine_k0_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -205,11 +212,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -243,21 +245,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -304,14 +306,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/MCEHGEngine_k0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -345,12 +349,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCEuropeanHestonGreeksEngine/utils.mk b/quantitative_finance/L2/tests/MCEuropeanHestonGreeksEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCEuropeanHestonGreeksEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCEuropeanHestonGreeksEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCHullWhiteCapFloorEngine/Makefile b/quantitative_finance/L2/tests/MCHullWhiteCapFloorEngine/Makefile
index 99e1f1fcfe..879f2f66cc 100644
--- a/quantitative_finance/L2/tests/MCHullWhiteCapFloorEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCHullWhiteCapFloorEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/kernel_mc_0.xo: $(CUR_DIR)/kernel/kernel_mceuropeanengine.cpp 
 	$(ECHO) "Compiling Kernel: kernel_mc_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_mc_OBJS += $(TEMP_DIR)/kernel_mc_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_mc_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_mc.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCHullWhiteCapFloorEngine/utils.mk b/quantitative_finance/L2/tests/MCHullWhiteCapFloorEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCHullWhiteCapFloorEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCHullWhiteCapFloorEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/MCMultiAssetEuropeanHestonEngine/Makefile b/quantitative_finance/L2/tests/MCMultiAssetEuropeanHestonEngine/Makefile
index 1da97d63f6..682b9cc8cb 100644
--- a/quantitative_finance/L2/tests/MCMultiAssetEuropeanHestonEngine/Makefile
+++ b/quantitative_finance/L2/tests/MCMultiAssetEuropeanHestonEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/kernel_mc_0.xo: $(CUR_DIR)/kernel/kernel_mceuropeanengine.cpp 
 	$(ECHO) "Compiling Kernel: kernel_mc_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_mc_OBJS += $(TEMP_DIR)/kernel_mc_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_mc_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_mc.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/MCMultiAssetEuropeanHestonEngine/utils.mk b/quantitative_finance/L2/tests/MCMultiAssetEuropeanHestonEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/MCMultiAssetEuropeanHestonEngine/utils.mk
+++ b/quantitative_finance/L2/tests/MCMultiAssetEuropeanHestonEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/PopMCMC/Makefile b/quantitative_finance/L2/tests/PopMCMC/Makefile
index cc0f84955e..e0c494577c 100755
--- a/quantitative_finance/L2/tests/PopMCMC/Makefile
+++ b/quantitative_finance/L2/tests/PopMCMC/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(XFLIB_DIR)/L2/tests/PopMCMC/src/host/mcmc_test.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := mcmc_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -182,7 +189,7 @@ endif
 $(TEMP_DIR)/mcmc_kernel.xo: $(XFLIB_DIR)/L2/tests/PopMCMC/src/kernel/mcmc_kernel.cpp 
 	$(ECHO) "Compiling Kernel: mcmc_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_mcmc_kernel) $(VPP_FLAGS) -k mcmc_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_mcmc_kernel) $(VPP_FLAGS) -k mcmc_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_mcmc_kernel_OBJS += $(TEMP_DIR)/mcmc_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_mcmc_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -206,11 +213,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -244,21 +246,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -305,14 +307,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/mcmc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -346,12 +350,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/PopMCMC/utils.mk b/quantitative_finance/L2/tests/PopMCMC/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/PopMCMC/utils.mk
+++ b/quantitative_finance/L2/tests/PopMCMC/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/PortfolioOptimisation/Makefile b/quantitative_finance/L2/tests/PortfolioOptimisation/Makefile
index 0598d84699..e0861a120f 100755
--- a/quantitative_finance/L2/tests/PortfolioOptimisation/Makefile
+++ b/quantitative_finance/L2/tests/PortfolioOptimisation/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(CUR_DIR)/src/host/po_test.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := po_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -147,7 +154,7 @@ endif
 $(TEMP_DIR)/po_kernel.xo: $(CUR_DIR)/src/kernel/po_kernel.cpp 
 	$(ECHO) "Compiling Kernel: po_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_po_kernel) $(VPP_FLAGS) -k po_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_po_kernel) $(VPP_FLAGS) -k po_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_po_kernel_OBJS += $(TEMP_DIR)/po_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_po_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -171,11 +178,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -209,21 +211,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -270,14 +272,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/po_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -311,12 +315,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/PortfolioOptimisation/utils.mk b/quantitative_finance/L2/tests/PortfolioOptimisation/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/PortfolioOptimisation/utils.mk
+++ b/quantitative_finance/L2/tests/PortfolioOptimisation/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/Quadrature/Makefile b/quantitative_finance/L2/tests/Quadrature/Makefile
index 905436f7fb..d7e8e6c74e 100755
--- a/quantitative_finance/L2/tests/Quadrature/Makefile
+++ b/quantitative_finance/L2/tests/Quadrature/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,14 +113,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(CUR_DIR)/src/host/quad_test.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -164,11 +171,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -202,21 +204,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -263,14 +265,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/quad_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -304,12 +308,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/Quadrature/utils.mk b/quantitative_finance/L2/tests/Quadrature/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/Quadrature/utils.mk
+++ b/quantitative_finance/L2/tests/Quadrature/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/Quanto/Makefile b/quantitative_finance/L2/tests/Quanto/Makefile
index 57a5a0147b..c64c2049b9 100755
--- a/quantitative_finance/L2/tests/Quanto/Makefile
+++ b/quantitative_finance/L2/tests/Quanto/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,14 +113,15 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(CUR_DIR)/src/host/quanto_cpu.cpp $(CUR_DIR)/src/host/quanto_parser.cpp $(CUR_DIR)/src/host/quanto_test.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -179,11 +186,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -217,21 +219,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -278,14 +280,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/quanto_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -319,12 +323,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/Quanto/utils.mk b/quantitative_finance/L2/tests/Quanto/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/Quanto/utils.mk
+++ b/quantitative_finance/L2/tests/Quanto/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/TreeCallableEngineHWModel/Makefile b/quantitative_finance/L2/tests/TreeCallableEngineHWModel/Makefile
index be137fdaf2..742c92c0ef 100644
--- a/quantitative_finance/L2/tests/TreeCallableEngineHWModel/Makefile
+++ b/quantitative_finance/L2/tests/TreeCallableEngineHWModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/TREE_k0.xo: $(CUR_DIR)/kernel/TREE_k0.cpp 
 	$(ECHO) "Compiling Kernel: TREE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TREE_k_OBJS += $(TEMP_DIR)/TREE_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TREE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TREE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/TreeCallableEngineHWModel/utils.mk b/quantitative_finance/L2/tests/TreeCallableEngineHWModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/TreeCallableEngineHWModel/utils.mk
+++ b/quantitative_finance/L2/tests/TreeCallableEngineHWModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/TreeCapFloorEngineHWModel/Makefile b/quantitative_finance/L2/tests/TreeCapFloorEngineHWModel/Makefile
index f0680b7108..ffdce22bed 100644
--- a/quantitative_finance/L2/tests/TreeCapFloorEngineHWModel/Makefile
+++ b/quantitative_finance/L2/tests/TreeCapFloorEngineHWModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/TREE_k0.xo: $(CUR_DIR)/kernel/TREE_k0.cpp 
 	$(ECHO) "Compiling Kernel: TREE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TREE_k_OBJS += $(TEMP_DIR)/TREE_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TREE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TREE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/TreeCapFloorEngineHWModel/utils.mk b/quantitative_finance/L2/tests/TreeCapFloorEngineHWModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/TreeCapFloorEngineHWModel/utils.mk
+++ b/quantitative_finance/L2/tests/TreeCapFloorEngineHWModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/TreeSwapEngineHWModel/Makefile b/quantitative_finance/L2/tests/TreeSwapEngineHWModel/Makefile
index bc4e7a885d..717fad0b2e 100644
--- a/quantitative_finance/L2/tests/TreeSwapEngineHWModel/Makefile
+++ b/quantitative_finance/L2/tests/TreeSwapEngineHWModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +136,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,7 +188,7 @@ endif
 $(TEMP_DIR)/TREE_k0.xo: $(CUR_DIR)/kernel/TREE_k0.cpp 
 	$(ECHO) "Compiling Kernel: TREE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TREE_k_OBJS += $(TEMP_DIR)/TREE_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TREE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -205,11 +212,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -243,21 +245,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -304,14 +306,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TREE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -345,12 +349,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/TreeSwapEngineHWModel/utils.mk b/quantitative_finance/L2/tests/TreeSwapEngineHWModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/TreeSwapEngineHWModel/utils.mk
+++ b/quantitative_finance/L2/tests/TreeSwapEngineHWModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineBKModel/Makefile b/quantitative_finance/L2/tests/TreeSwaptionEngineBKModel/Makefile
index 4c60fc525b..aeb58b3893 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineBKModel/Makefile
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineBKModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +136,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,7 +188,7 @@ endif
 $(TEMP_DIR)/TREE_k0.xo: $(CUR_DIR)/kernel/TREE_k0.cpp 
 	$(ECHO) "Compiling Kernel: TREE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TREE_k_OBJS += $(TEMP_DIR)/TREE_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TREE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -205,11 +212,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -243,21 +245,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -304,14 +306,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TREE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -345,12 +349,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineBKModel/utils.mk b/quantitative_finance/L2/tests/TreeSwaptionEngineBKModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineBKModel/utils.mk
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineBKModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineCIRModel/Makefile b/quantitative_finance/L2/tests/TreeSwaptionEngineCIRModel/Makefile
index 9e7664d712..81cf71b0cc 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineCIRModel/Makefile
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineCIRModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +136,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,7 +188,7 @@ endif
 $(TEMP_DIR)/TREE_k0.xo: $(CUR_DIR)/kernel/TREE_k0.cpp 
 	$(ECHO) "Compiling Kernel: TREE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TREE_k_OBJS += $(TEMP_DIR)/TREE_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TREE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -205,11 +212,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -243,21 +245,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -304,14 +306,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TREE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -345,12 +349,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineCIRModel/utils.mk b/quantitative_finance/L2/tests/TreeSwaptionEngineCIRModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineCIRModel/utils.mk
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineCIRModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineECIRModel/Makefile b/quantitative_finance/L2/tests/TreeSwaptionEngineECIRModel/Makefile
index 00e62bf33d..9e1420e1c5 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineECIRModel/Makefile
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineECIRModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/TREE_k0.xo: $(CUR_DIR)/kernel/TREE_k0.cpp 
 	$(ECHO) "Compiling Kernel: TREE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TREE_k_OBJS += $(TEMP_DIR)/TREE_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TREE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TREE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineECIRModel/utils.mk b/quantitative_finance/L2/tests/TreeSwaptionEngineECIRModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineECIRModel/utils.mk
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineECIRModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineG2Model/Makefile b/quantitative_finance/L2/tests/TreeSwaptionEngineG2Model/Makefile
index fbb39886c7..75ed1fb3d9 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineG2Model/Makefile
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineG2Model/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,11 +135,15 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
 
-
 HOST_ARGS :=  -xclbin $(BUILD_DIR)/TREE_k.xclbin
 ifneq ($(HOST_ARCH), x86)
 PKG_HOST_ARGS = $(foreach args,$(HOST_ARGS),$(subst $(dir $(patsubst %/,%,$(args))),,$(args)))
@@ -205,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -243,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -304,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TREE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -345,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineG2Model/utils.mk b/quantitative_finance/L2/tests/TreeSwaptionEngineG2Model/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineG2Model/utils.mk
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineG2Model/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineHWModel/Makefile b/quantitative_finance/L2/tests/TreeSwaptionEngineHWModel/Makefile
index 183c2109fc..1969aed87c 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineHWModel/Makefile
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineHWModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +136,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,7 +188,7 @@ endif
 $(TEMP_DIR)/TREE_k0.xo: $(CUR_DIR)/kernel/TREE_k0.cpp 
 	$(ECHO) "Compiling Kernel: TREE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TREE_k_OBJS += $(TEMP_DIR)/TREE_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TREE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -205,11 +212,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -243,21 +245,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -304,14 +306,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TREE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -345,12 +349,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineHWModel/utils.mk b/quantitative_finance/L2/tests/TreeSwaptionEngineHWModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineHWModel/utils.mk
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineHWModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineVModel/Makefile b/quantitative_finance/L2/tests/TreeSwaptionEngineVModel/Makefile
index f3d6ac9f37..6a395d114d 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineVModel/Makefile
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineVModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -134,6 +136,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -181,7 +188,7 @@ endif
 $(TEMP_DIR)/TREE_k0.xo: $(CUR_DIR)/kernel/TREE_k0.cpp 
 	$(ECHO) "Compiling Kernel: TREE_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_TREE_k0) $(VPP_FLAGS) -k TREE_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_TREE_k_OBJS += $(TEMP_DIR)/TREE_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_TREE_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -205,11 +212,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -243,21 +245,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -304,14 +306,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/TREE_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -345,12 +349,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/TreeSwaptionEngineVModel/utils.mk b/quantitative_finance/L2/tests/TreeSwaptionEngineVModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/TreeSwaptionEngineVModel/utils.mk
+++ b/quantitative_finance/L2/tests/TreeSwaptionEngineVModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L2/tests/ZCDiscountingBondEngine/Makefile b/quantitative_finance/L2/tests/ZCDiscountingBondEngine/Makefile
index cb86d1a4cc..b556755278 100644
--- a/quantitative_finance/L2/tests/ZCDiscountingBondEngine/Makefile
+++ b/quantitative_finance/L2/tests/ZCDiscountingBondEngine/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u200/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -133,6 +135,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DI
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -180,7 +187,7 @@ endif
 $(TEMP_DIR)/BOND_k0.xo: $(CUR_DIR)/kernel/BOND_k0.cpp 
 	$(ECHO) "Compiling Kernel: BOND_k0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_BOND_k0) $(VPP_FLAGS) -k BOND_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_BOND_k0) $(VPP_FLAGS) -k BOND_k0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_BOND_k_OBJS += $(TEMP_DIR)/BOND_k0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_BOND_k_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,14 +305,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/BOND_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -344,12 +348,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L2/tests/ZCDiscountingBondEngine/utils.mk b/quantitative_finance/L2/tests/ZCDiscountingBondEngine/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L2/tests/ZCDiscountingBondEngine/utils.mk
+++ b/quantitative_finance/L2/tests/ZCDiscountingBondEngine/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/BinomialTree/Makefile b/quantitative_finance/L3/tests/BinomialTree/Makefile
index e55cef4acf..6916f75b1f 100644
--- a/quantitative_finance/L3/tests/BinomialTree/Makefile
+++ b/quantitative_finance/L3/tests/BinomialTree/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/BinomialTree/xf_fintech_binomialtree_exe.cpp $(XFLIB_DIR)/L3/src/models/binomial_tree/src/xf_fintech_binomialtree.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -D TEST_DT=double -D TEST_PARALLEL_ENGINES=8
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/src/models/binomial_tree/include -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/BinomialTreeEngine/src/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := binomial_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -147,7 +154,7 @@ VPP_FLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include
 VPP_FLAGS_BinomialTreeKernel +=  -D TEST_DT=double -D TEST_PARALLEL_ENGINES=8
 VPP_FLAGS_BinomialTreeKernel += --hls.clock 300000000:BinomialTreeKernel
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_binomialtree += --clock.defaultFreqHz 300000000
+VPP_LDFLAGS_binomialtree += --clock.defaultFreqHz 250000000
 else
 VPP_LDFLAGS_binomialtree += --kernel_frequency 250
 endif
@@ -187,11 +194,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -225,21 +227,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -286,14 +288,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/binomialtree.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -327,12 +331,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/BinomialTree/utils.mk b/quantitative_finance/L3/tests/BinomialTree/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/BinomialTree/utils.mk
+++ b/quantitative_finance/L3/tests/BinomialTree/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/CFBlackScholes/Makefile b/quantitative_finance/L3/tests/CFBlackScholes/Makefile
index 8309e795ca..cd435e8473 100644
--- a/quantitative_finance/L3/tests/CFBlackScholes/Makefile
+++ b/quantitative_finance/L3/tests/CFBlackScholes/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/CFBlackScholes/xf_fintech_cf_BlackScholes.cpp $(XFLIB_DIR)/L3/src/models/cf_black_scholes/src/xf_fintech_cf_black_scholes.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/CFBlackScholes/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := bs_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -200,11 +207,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -238,21 +240,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -299,14 +301,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/bs_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -340,12 +344,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/CFBlackScholes/utils.mk b/quantitative_finance/L3/tests/CFBlackScholes/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/CFBlackScholes/utils.mk
+++ b/quantitative_finance/L3/tests/CFBlackScholes/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/CFBlackScholesMerton/Makefile b/quantitative_finance/L3/tests/CFBlackScholesMerton/Makefile
index 744348b845..44c97a851d 100644
--- a/quantitative_finance/L3/tests/CFBlackScholesMerton/Makefile
+++ b/quantitative_finance/L3/tests/CFBlackScholesMerton/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/CFBlackScholesMerton/xf_fintech_cf_BlackScholesMerton.cpp $(XFLIB_DIR)/L3/src/models/cf_black_scholes/src/xf_fintech_cf_black_scholes.cpp $(XFLIB_DIR)/L3/src/models/cf_black_scholes_merton/src/xf_fintech_cf_black_scholes_merton.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/CFBlackScholesMerton/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := bsm_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -200,11 +207,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -238,21 +240,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -299,14 +301,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/bsm_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -340,12 +344,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/CFBlackScholesMerton/utils.mk b/quantitative_finance/L3/tests/CFBlackScholesMerton/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/CFBlackScholesMerton/utils.mk
+++ b/quantitative_finance/L3/tests/CFBlackScholesMerton/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/CFGarmanKohlhagen/Makefile b/quantitative_finance/L3/tests/CFGarmanKohlhagen/Makefile
index 13db4487e7..ea281e45e1 100644
--- a/quantitative_finance/L3/tests/CFGarmanKohlhagen/Makefile
+++ b/quantitative_finance/L3/tests/CFGarmanKohlhagen/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/CFGarmanKohlhagen/xf_fintech_cf_GarmanKohlhagen.cpp $(XFLIB_DIR)/L3/src/models/cf_black_scholes/src/xf_fintech_cf_black_scholes.cpp $(XFLIB_DIR)/L3/src/models/cf_garman_kohlhagen/src/xf_fintech_cf_garman_kohlhagen.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/GarmanKohlhagenEngine/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := gk_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -200,11 +207,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -238,21 +240,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -299,14 +301,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/gk_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -340,12 +344,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/CFGarmanKohlhagen/utils.mk b/quantitative_finance/L3/tests/CFGarmanKohlhagen/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/CFGarmanKohlhagen/utils.mk
+++ b/quantitative_finance/L3/tests/CFGarmanKohlhagen/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/HeathJarrowMorton/Makefile b/quantitative_finance/L3/tests/HeathJarrowMorton/Makefile
index dc9f05e3c3..bd3f172cfc 100644
--- a/quantitative_finance/L3/tests/HeathJarrowMorton/Makefile
+++ b/quantitative_finance/L3/tests/HeathJarrowMorton/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/HeathJarrowMorton/src/xf_fintech_hjm_cl.cpp $(XFLIB_DIR)/L3/src/models/hjm/src/xf_fintech_hjm.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L3/src/models/hjm/include -I $(XFLIB_DIR)/L2/tests/HeathJarrowMorton/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := hjm_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -171,11 +178,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -209,21 +211,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -270,14 +272,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/hjm_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -311,12 +315,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/HeathJarrowMorton/utils.mk b/quantitative_finance/L3/tests/HeathJarrowMorton/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/HeathJarrowMorton/utils.mk
+++ b/quantitative_finance/L3/tests/HeathJarrowMorton/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/HestonFD/Makefile b/quantitative_finance/L3/tests/HestonFD/Makefile
index 4d7257df25..0ef9e197ea 100644
--- a/quantitative_finance/L3/tests/HestonFD/Makefile
+++ b/quantitative_finance/L3/tests/HestonFD/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/HestonFD/src/xf_heston_harness.cpp $(XFLIB_DIR)/L3/tests/HestonFD/src/xf_heston_test_suite.cpp $(XFLIB_DIR)/L3/tests/HestonFD/src/xf_heston_test_case.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_adi_solver.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_coeffs.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_execution_time.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_kernel_interface.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_matrices.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_model_parameters.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_numpy_like_functions.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_ocl_objects.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_price_ram.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_solver_parameters.cpp $(XFLIB_DIR)/L3/src/models/heston_fd/src/xf_fintech_heston_wrapper.cpp $(XFLIB_DIR)/L3/src/utils/linear_interpolation/src/xf_fintech_li_api.cpp $(XFLIB_DIR)/L3/src/utils/linear_interpolation/src/xf_fintech_li_grid.cpp $(XFLIB_DIR)/L3/src/utils/linear_interpolation/src/xf_fintech_li_interpolator.cpp $(XFLIB_DIR)/L3/src/utils/linear_interpolation/src/xf_fintech_li_rules.cpp $(XFLIB_DIR)/L3/src/utils/linear_interpolation/src/xf_fintech_li_vector.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/FdEuropeanHestonEngine/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L3/src/models/heston_fd/include -I $(XFLIB_DIR)/L3/tests/HestonFD/include -I $(XFLIB_DIR)/L3/src/utils/linear_interpolation/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := fd_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -143,7 +150,7 @@ endif
 $(TEMP_DIR)/fd_kernel.xo: $(XFLIB_DIR)/L2/tests/FdEuropeanHestonEngine/src/kernel/fd_kernel.cpp 
 	$(ECHO) "Compiling Kernel: fd_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_fd_kernel) $(VPP_FLAGS) -k fd_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_fd_kernel) $(VPP_FLAGS) -k fd_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_fd_kernel_OBJS += $(TEMP_DIR)/fd_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_fd_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -167,11 +174,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -205,21 +207,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -266,14 +268,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/fd_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -307,12 +311,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/HestonFD/utils.mk b/quantitative_finance/L3/tests/HestonFD/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/HestonFD/utils.mk
+++ b/quantitative_finance/L3/tests/HestonFD/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/LiborMarketModel/Makefile b/quantitative_finance/L3/tests/LiborMarketModel/Makefile
index 565da70095..e6b19b9f3b 100644
--- a/quantitative_finance/L3/tests/LiborMarketModel/Makefile
+++ b/quantitative_finance/L3/tests/LiborMarketModel/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/LiborMarketModel/src/xf_fintech_lmm_example.cpp $(XFLIB_DIR)/L3/src/models/lmm/src/xf_fintech_lmm.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L3/src/models/lmm/include -I $(XFLIB_DIR)/L2/tests/LMMEngineCap/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := lmm_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -185,11 +192,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -223,21 +225,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -284,14 +286,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/lmmCapKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -325,12 +329,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/LiborMarketModel/utils.mk b/quantitative_finance/L3/tests/LiborMarketModel/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/LiborMarketModel/utils.mk
+++ b/quantitative_finance/L3/tests/LiborMarketModel/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/MonteCarlo/Makefile b/quantitative_finance/L3/tests/MonteCarlo/Makefile
index 51d87a1b34..4d14dd85a1 100644
--- a/quantitative_finance/L3/tests/MonteCarlo/Makefile
+++ b/quantitative_finance/L3/tests/MonteCarlo/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/MonteCarlo/xf_fintech_mc_example.cpp $(XFLIB_DIR)/L3/tests/MonteCarlo/xf_fintech_mc_european_single.cpp $(XFLIB_DIR)/L3/src/models/mc_european/src/xf_fintech_mc_european.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L3/src/models/mc_european/include -I $(XFLIB_DIR)/L2/tests/MCEuropeanEngine/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := mc_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -185,11 +192,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -223,21 +225,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -284,14 +286,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/mc_euro_k.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -325,12 +329,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/MonteCarlo/utils.mk b/quantitative_finance/L3/tests/MonteCarlo/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/MonteCarlo/utils.mk
+++ b/quantitative_finance/L3/tests/MonteCarlo/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/MonteCarloDJE/Makefile b/quantitative_finance/L3/tests/MonteCarloDJE/Makefile
index 1c6bcd360e..cfd8a60ad1 100644
--- a/quantitative_finance/L3/tests/MonteCarloDJE/Makefile
+++ b/quantitative_finance/L3/tests/MonteCarloDJE/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/MonteCarloDJE/xf_fintech_mc_dje_example.cpp $(XFLIB_DIR)/L3/src/models/mc_european_dje/src/xf_fintech_mc_european_dje.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -D DEVICE_PART=$(XDEVICE)
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L3/src/models/mc_european_dje/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := mcdje_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -160,7 +167,7 @@ endif
 $(TEMP_DIR)/kernel_mc_0.xo: $(XFLIB_DIR)/L2/tests/MCEuropeanDowJonesEngine/kernel/kernel_mceuropeanengine.cpp 
 	$(ECHO) "Compiling Kernel: kernel_mc_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_mc_0) $(VPP_FLAGS) -k kernel_mc_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_mc_0_OBJS += $(TEMP_DIR)/kernel_mc_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_mc_0_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -184,11 +191,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -222,21 +224,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -283,14 +285,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_mc_0.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -324,12 +328,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/MonteCarloDJE/utils.mk b/quantitative_finance/L3/tests/MonteCarloDJE/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/MonteCarloDJE/utils.mk
+++ b/quantitative_finance/L3/tests/MonteCarloDJE/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/PopMCMC/Makefile b/quantitative_finance/L3/tests/PopMCMC/Makefile
index 1ad372e5e6..1f23c750a2 100644
--- a/quantitative_finance/L3/tests/PopMCMC/Makefile
+++ b/quantitative_finance/L3/tests/PopMCMC/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/PopMCMC/xf_fintech_pop_mcmc_exe.cpp $(XFLIB_DIR)/L3/src/models/pop_mcmc/src/xf_fintech_pop_mcmc.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L3/src/models/pop_mcmc/include -I $(XFLIB_DIR)/L2/tests/PopMCMC/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := mcmc_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -185,11 +192,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -223,21 +225,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -284,14 +286,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/mcmc_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -325,12 +329,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/PopMCMC/utils.mk b/quantitative_finance/L3/tests/PopMCMC/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/PopMCMC/utils.mk
+++ b/quantitative_finance/L3/tests/PopMCMC/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/PortfolioOptimisation/Makefile b/quantitative_finance/L3/tests/PortfolioOptimisation/Makefile
index 97872d19c4..65ec1bfae6 100644
--- a/quantitative_finance/L3/tests/PortfolioOptimisation/Makefile
+++ b/quantitative_finance/L3/tests/PortfolioOptimisation/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/PortfolioOptimisation/xf_fintech_portfolio_optimisation.cpp $(XFLIB_DIR)/L3/src/models/portfolio_optimisation/src/xf_fintech_po.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/PortfolioOptimisation/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := po_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -142,7 +149,7 @@ endif
 $(TEMP_DIR)/po_kernel.xo: $(XFLIB_DIR)/L2/tests/PortfolioOptimisation/src/kernel/po_kernel.cpp 
 	$(ECHO) "Compiling Kernel: po_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_po_kernel) $(VPP_FLAGS) -k po_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_po_kernel) $(VPP_FLAGS) -k po_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_po_kernel_OBJS += $(TEMP_DIR)/po_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_po_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -166,11 +173,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -204,21 +206,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -265,14 +267,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/po_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -306,12 +310,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/PortfolioOptimisation/utils.mk b/quantitative_finance/L3/tests/PortfolioOptimisation/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/PortfolioOptimisation/utils.mk
+++ b/quantitative_finance/L3/tests/PortfolioOptimisation/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/Quanto/Makefile b/quantitative_finance/L3/tests/Quanto/Makefile
index aaf28fe92f..66b3d6fd05 100644
--- a/quantitative_finance/L3/tests/Quanto/Makefile
+++ b/quantitative_finance/L3/tests/Quanto/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/Quanto/xf_fintech_Quanto.cpp $(XFLIB_DIR)/L3/src/models/cf_black_scholes/src/xf_fintech_cf_black_scholes.cpp $(XFLIB_DIR)/L3/src/models/quanto/src/xf_fintech_quanto.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/Quanto/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := quanto_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -200,11 +207,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -238,21 +240,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -299,14 +301,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/quanto_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -340,12 +344,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/Quanto/utils.mk b/quantitative_finance/L3/tests/Quanto/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/Quanto/utils.mk
+++ b/quantitative_finance/L3/tests/Quanto/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/b76/Makefile b/quantitative_finance/L3/tests/b76/Makefile
index 21400a369d..4a303d56dc 100644
--- a/quantitative_finance/L3/tests/b76/Makefile
+++ b/quantitative_finance/L3/tests/b76/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/b76/xf_fintech_cfb76.cpp $(XFLIB_DIR)/L3/src/models/cf_b76/src/xf_fintech_cf_b76.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/CFBlack76/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := b76_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -200,11 +207,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -238,21 +240,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -299,14 +301,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/b76_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -340,12 +344,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/b76/utils.mk b/quantitative_finance/L3/tests/b76/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/b76/utils.mk
+++ b/quantitative_finance/L3/tests/b76/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/cds/Makefile b/quantitative_finance/L3/tests/cds/Makefile
index 056f0a4ef2..82526b9c7a 100644
--- a/quantitative_finance/L3/tests/cds/Makefile
+++ b/quantitative_finance/L3/tests/cds/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/cds/xf_fintech_cds_exe.cpp $(XFLIB_DIR)/L3/src/models/cds/src/xf_fintech_cds.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/CreditDefaultSwapEngine/src/kernel -I $(XFLIB_DIR)/L3/src/models/cds/include -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := cds_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,14 +287,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/CDS_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -326,12 +330,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/cds/utils.mk b/quantitative_finance/L3/tests/cds/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/cds/utils.mk
+++ b/quantitative_finance/L3/tests/cds/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/fdbslv/Makefile b/quantitative_finance/L3/tests/fdbslv/Makefile
index 654ad09ce3..0094a1392c 100644
--- a/quantitative_finance/L3/tests/fdbslv/Makefile
+++ b/quantitative_finance/L3/tests/fdbslv/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/fdbslv/xf_fintech_fdbslv_exe.cpp $(XFLIB_DIR)/L3/src/models/fdbslv/src/xf_fintech_fdbslv.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L2/tests/FDBlackScholesLocalVolatilityEngine/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := fdbslv_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,14 +287,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/fd_bs_lv_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -326,12 +330,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/fdbslv/utils.mk b/quantitative_finance/L3/tests/fdbslv/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/fdbslv/utils.mk
+++ b/quantitative_finance/L3/tests/fdbslv/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/hcf/Makefile b/quantitative_finance/L3/tests/hcf/Makefile
index e3b6f76553..f93523c331 100644
--- a/quantitative_finance/L3/tests/hcf/Makefile
+++ b/quantitative_finance/L3/tests/hcf/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/hcf/xf_fintech_hcf_exe.cpp $(XFLIB_DIR)/L3/src/models/hcf/src/xf_fintech_hcf.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L3/src/models/hcf/include -I $(XFLIB_DIR)/L2/tests/HCFEngine/src/kernel -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := hcf_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,14 +287,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/hcf_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -326,12 +330,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/hcf/utils.mk b/quantitative_finance/L3/tests/hcf/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/hcf/utils.mk
+++ b/quantitative_finance/L3/tests/hcf/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/hullwhite/Makefile b/quantitative_finance/L3/tests/hullwhite/Makefile
index 469bbe15e6..79aa7f75a5 100644
--- a/quantitative_finance/L3/tests/hullwhite/Makefile
+++ b/quantitative_finance/L3/tests/hullwhite/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/hullwhite/xf_fintech_hullwhite_exe.cpp $(XFLIB_DIR)/L3/src/models/hullwhite/src/xf_fintech_hullwhite.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -D TEST_DT=float
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/src/models/hullwhite/include -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/HullWhiteAnalyticEngine/src/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := hullwhite_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -201,11 +208,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -239,21 +241,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -300,14 +302,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/hwa_engine.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -341,12 +345,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/hullwhite/utils.mk b/quantitative_finance/L3/tests/hullwhite/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/hullwhite/utils.mk
+++ b/quantitative_finance/L3/tests/hullwhite/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/L3/tests/m76/Makefile b/quantitative_finance/L3/tests/m76/Makefile
index 66ecedd302..812037b1d6 100644
--- a/quantitative_finance/L3/tests/m76/Makefile
+++ b/quantitative_finance/L3/tests/m76/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,16 +113,17 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/ext/xcl2/xcl2.cpp $(XFLIB_DIR)/L3/tests/m76/xf_fintech_m76_exe.cpp $(XFLIB_DIR)/L3/src/models/m76/src/xf_fintech_m76.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device.cpp $(XFLIB_DIR)/L3/src/xf_fintech_device_manager.cpp $(XFLIB_DIR)/L3/src/xf_fintech_internal.cpp $(XFLIB_DIR)/L3/src/xf_fintech_ocl_controller.cpp $(XFLIB_DIR)/L3/src/xf_fintech_timestamp.cpp $(XFLIB_DIR)/L3/src/xf_fintech_trace.cpp 
 CXXFLAGS +=  -D TEST_DT=float
 CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L3/src/models/m76/include -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/L2/tests/M76Engine/src/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/quantlib -I $(XFLIB_DIR)/ext/rng -I $(XFLIB_DIR)/L3/include -I $(XFLIB_DIR)/L3/include/models -I $(XFLIB_DIR)/../utils/L1/include
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := m76_test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -144,7 +151,7 @@ endif
 $(TEMP_DIR)/m76_kernel.xo: $(XFLIB_DIR)/L2/tests/M76Engine/src/kernel/m76_kernel.cpp 
 	$(ECHO) "Compiling Kernel: m76_kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_m76_kernel) $(VPP_FLAGS) -k m76_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_m76_kernel) $(VPP_FLAGS) -k m76_kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_m76_kernel_OBJS += $(TEMP_DIR)/m76_kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_m76_kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -168,11 +175,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -206,21 +208,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -267,14 +269,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/m76_kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -308,12 +312,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/quantitative_finance/L3/tests/m76/utils.mk b/quantitative_finance/L3/tests/m76/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/quantitative_finance/L3/tests/m76/utils.mk
+++ b/quantitative_finance/L3/tests/m76/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/quantitative_finance/docs/conf.py b/quantitative_finance/docs/conf.py
index c394a17356..f472fe39a9 100644
--- a/quantitative_finance/docs/conf.py
+++ b/quantitative_finance/docs/conf.py
@@ -42,13 +42,13 @@
 # -- Project information -----------------------------------------------------
 
 project = 'Vitis Quantitative Finance Library'
-copyright = '2021, Xilinx'
+copyright = '2022, Xilinx'
 author = 'Xilinx'
 
 # The short X.Y version
-version = '2021.2'
+version = '2022.1'
 # The full version, including alpha/beta/rc tags
-release = '2021.2_release'
+release = '2022.1_release'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/security/Jenkinsfile b/security/Jenkinsfile
index 417cbcd67c..6e0cac9744 100644
--- a/security/Jenkinsfile
+++ b/security/Jenkinsfile
@@ -1,4 +1,4 @@
 @Library('pipeline-library')_
-VitisLibPipeline (branch: 'next', libname: 'xf_security', TARGETS: 'hls_csim:hls_cosim:vivado_syn:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
-                  upstream_dependencies: 'xf_utils_hw,next,../utils',
-                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest', mail_on:'daily:PR')
+VitisLibPipeline (branch: 'main', libname: 'xf_security', TARGETS: 'hls_csim:hls_cosim:vivado_syn:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
+                  upstream_dependencies: 'xf_utils_hw,main,../utils',
+                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released', mail_on:'daily:PR')
diff --git a/security/L1/benchmarks/adler32/Makefile b/security/L1/benchmarks/adler32/Makefile
index 417ed619a5..b5ec8b12b9 100644
--- a/security/L1/benchmarks/adler32/Makefile
+++ b/security/L1/benchmarks/adler32/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(XFLIB_DIR)/L1/benchmarks/adler32/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L1/benchmarks/adler32/host -I $(XFLIB_DIR)/L1/benchmarks/adler32/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 CXXFLAGS += -O3 
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -154,7 +161,7 @@ endif
 $(TEMP_DIR)/Adler32Kernel.xo: $(XFLIB_DIR)/L1/benchmarks/adler32/kernel/adler32_kernel.cpp 
 	$(ECHO) "Compiling Kernel: Adler32Kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_Adler32Kernel) $(VPP_FLAGS) -k Adler32Kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_Adler32Kernel) $(VPP_FLAGS) -k Adler32Kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_Adler32Kernel_OBJS += $(TEMP_DIR)/Adler32Kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_Adler32Kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -178,11 +185,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -216,21 +218,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -277,14 +279,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/Adler32Kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -318,12 +322,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/security/L1/benchmarks/adler32/utils.mk b/security/L1/benchmarks/adler32/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/security/L1/benchmarks/adler32/utils.mk
+++ b/security/L1/benchmarks/adler32/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/security/L1/benchmarks/aes256CbcDecrypt/Makefile b/security/L1/benchmarks/aes256CbcDecrypt/Makefile
index 7381419994..d5435c9dee 100644
--- a/security/L1/benchmarks/aes256CbcDecrypt/Makefile
+++ b/security/L1/benchmarks/aes256CbcDecrypt/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -136,6 +138,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(CUR_DIR)/host -I $(CUR_DIR)/kernel
 CXXFLAGS += -O3
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := aes256CbcDecryptBenchmark.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -178,7 +185,7 @@ endif
 $(TEMP_DIR)/aes256CbcDecryptKernel.xo: $(CUR_DIR)/kernel/aes256CbcDecryptKernel.cpp 
 	$(ECHO) "Compiling Kernel: aes256CbcDecryptKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_aes256CbcDecryptKernel) $(VPP_FLAGS) -k aes256CbcDecryptKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_aes256CbcDecryptKernel) $(VPP_FLAGS) -k aes256CbcDecryptKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_aes256CbcDecryptKernel_OBJS += $(TEMP_DIR)/aes256CbcDecryptKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_aes256CbcDecryptKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -202,11 +209,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -240,21 +242,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -301,14 +303,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/aes256CbcDecryptKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -342,12 +346,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/security/L1/benchmarks/aes256CbcDecrypt/utils.mk b/security/L1/benchmarks/aes256CbcDecrypt/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/security/L1/benchmarks/aes256CbcDecrypt/utils.mk
+++ b/security/L1/benchmarks/aes256CbcDecrypt/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/security/L1/benchmarks/aes256CbcEncrypt/Makefile b/security/L1/benchmarks/aes256CbcEncrypt/Makefile
index 2006853b8e..7a7a6e1e70 100644
--- a/security/L1/benchmarks/aes256CbcEncrypt/Makefile
+++ b/security/L1/benchmarks/aes256CbcEncrypt/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -130,6 +132,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(CUR_DIR)/host -I $(CUR_DIR)/kernel
 CXXFLAGS += -O3
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := aes256CbcEncryptBenchmark.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -168,7 +175,7 @@ endif
 $(TEMP_DIR)/aes256CbcEncryptKernel.xo: $(CUR_DIR)/kernel/aes256CbcEncryptKernel.cpp 
 	$(ECHO) "Compiling Kernel: aes256CbcEncryptKernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_aes256CbcEncryptKernel) $(VPP_FLAGS) -k aes256CbcEncryptKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_aes256CbcEncryptKernel) $(VPP_FLAGS) -k aes256CbcEncryptKernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_aes256CbcEncryptKernel_OBJS += $(TEMP_DIR)/aes256CbcEncryptKernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_aes256CbcEncryptKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -192,11 +199,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -230,21 +232,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -291,14 +293,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/aes256CbcEncryptKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -332,12 +336,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/security/L1/benchmarks/aes256CbcEncrypt/utils.mk b/security/L1/benchmarks/aes256CbcEncrypt/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/security/L1/benchmarks/aes256CbcEncrypt/utils.mk
+++ b/security/L1/benchmarks/aes256CbcEncrypt/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/security/L1/benchmarks/crc32/Makefile b/security/L1/benchmarks/crc32/Makefile
index 54bcfb0ca3..a0fbfabf68 100644
--- a/security/L1/benchmarks/crc32/Makefile
+++ b/security/L1/benchmarks/crc32/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/vck190/'))
 HOST_SRCS += $(XFLIB_DIR)/L1/benchmarks/crc32/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -124,6 +126,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L1/benchmarks/crc32/host
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := host.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -162,7 +169,7 @@ endif
 $(TEMP_DIR)/CRC32Kernel.xo: $(XFLIB_DIR)/L1/benchmarks/crc32/kernel/crc32_kernel.cpp 
 	$(ECHO) "Compiling Kernel: CRC32Kernel"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_CRC32Kernel) $(VPP_FLAGS) -k CRC32Kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_CRC32Kernel) $(VPP_FLAGS) -k CRC32Kernel -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_CRC32Kernel_OBJS += $(TEMP_DIR)/CRC32Kernel.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_CRC32Kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -186,11 +193,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -224,21 +226,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -285,14 +287,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/CRC32Kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -326,12 +330,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/security/L1/benchmarks/crc32/utils.mk b/security/L1/benchmarks/crc32/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/security/L1/benchmarks/crc32/utils.mk
+++ b/security/L1/benchmarks/crc32/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/security/L1/benchmarks/hmac_sha1/Makefile b/security/L1/benchmarks/hmac_sha1/Makefile
index 78a77d50fd..ba7f0e7b34 100644
--- a/security/L1/benchmarks/hmac_sha1/Makefile
+++ b/security/L1/benchmarks/hmac_sha1/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -128,6 +130,11 @@ HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(CUR_DIR)/host -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := hmacSha1Benchmark.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -169,22 +176,22 @@ endif
 $(TEMP_DIR)/hmacSha1Kernel_1.xo: $(CUR_DIR)/kernel/hmacSha1Kernel1.cpp 
 	$(ECHO) "Compiling Kernel: hmacSha1Kernel_1"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_hmacSha1Kernel_1) $(VPP_FLAGS) -k hmacSha1Kernel_1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_hmacSha1Kernel_1) $(VPP_FLAGS) -k hmacSha1Kernel_1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_hmacSha1Kernel_OBJS += $(TEMP_DIR)/hmacSha1Kernel_1.xo
 $(TEMP_DIR)/hmacSha1Kernel_2.xo: $(CUR_DIR)/kernel/hmacSha1Kernel2.cpp 
 	$(ECHO) "Compiling Kernel: hmacSha1Kernel_2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_hmacSha1Kernel_2) $(VPP_FLAGS) -k hmacSha1Kernel_2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_hmacSha1Kernel_2) $(VPP_FLAGS) -k hmacSha1Kernel_2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_hmacSha1Kernel_OBJS += $(TEMP_DIR)/hmacSha1Kernel_2.xo
 $(TEMP_DIR)/hmacSha1Kernel_3.xo: $(CUR_DIR)/kernel/hmacSha1Kernel3.cpp 
 	$(ECHO) "Compiling Kernel: hmacSha1Kernel_3"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_hmacSha1Kernel_3) $(VPP_FLAGS) -k hmacSha1Kernel_3 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_hmacSha1Kernel_3) $(VPP_FLAGS) -k hmacSha1Kernel_3 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_hmacSha1Kernel_OBJS += $(TEMP_DIR)/hmacSha1Kernel_3.xo
 $(TEMP_DIR)/hmacSha1Kernel_4.xo: $(CUR_DIR)/kernel/hmacSha1Kernel4.cpp 
 	$(ECHO) "Compiling Kernel: hmacSha1Kernel_4"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_hmacSha1Kernel_4) $(VPP_FLAGS) -k hmacSha1Kernel_4 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_hmacSha1Kernel_4) $(VPP_FLAGS) -k hmacSha1Kernel_4 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_hmacSha1Kernel_OBJS += $(TEMP_DIR)/hmacSha1Kernel_4.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_hmacSha1Kernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -208,11 +215,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -246,21 +248,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -307,14 +309,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/hmacSha1Kernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -348,12 +352,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/security/L1/benchmarks/hmac_sha1/utils.mk b/security/L1/benchmarks/hmac_sha1/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/security/L1/benchmarks/hmac_sha1/utils.mk
+++ b/security/L1/benchmarks/hmac_sha1/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/security/L1/benchmarks/rc4Encrypt/Makefile b/security/L1/benchmarks/rc4Encrypt/Makefile
index 1aa34091a5..7ecf39d370 100644
--- a/security/L1/benchmarks/rc4Encrypt/Makefile
+++ b/security/L1/benchmarks/rc4Encrypt/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -128,6 +130,11 @@ HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp
 CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(CUR_DIR)/host -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/../utils/L1/include -I $(XFLIB_DIR)/ext/xcl2
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := rc4EncryptBenchmarck.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -150,7 +157,7 @@ endif
 ######################### binary container global settings ##########################
 VPP_FLAGS_rc4EncryptKernel_1 += --hls.clock 200000000:rc4EncryptKernel_1
 ifneq ($(HOST_ARCH), x86)
-VPP_LDFLAGS_rc4EncryptKernel += --clock.defaultFreqHz 200000000
+VPP_LDFLAGS_rc4EncryptKernel += --clock.defaultFreqHz 150000000
 else
 VPP_LDFLAGS_rc4EncryptKernel += --kernel_frequency 150
 endif
@@ -166,7 +173,7 @@ endif
 $(TEMP_DIR)/rc4EncryptKernel_1.xo: $(CUR_DIR)/kernel/rc4EncryptKernel1.cpp 
 	$(ECHO) "Compiling Kernel: rc4EncryptKernel_1"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_rc4EncryptKernel_1) $(VPP_FLAGS) -k rc4EncryptKernel_1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_rc4EncryptKernel_1) $(VPP_FLAGS) -k rc4EncryptKernel_1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_rc4EncryptKernel_OBJS += $(TEMP_DIR)/rc4EncryptKernel_1.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_rc4EncryptKernel_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -190,11 +197,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -228,21 +230,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -289,14 +291,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/rc4EncryptKernel.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -330,12 +334,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/security/L1/benchmarks/rc4Encrypt/utils.mk b/security/L1/benchmarks/rc4Encrypt/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/security/L1/benchmarks/rc4Encrypt/utils.mk
+++ b/security/L1/benchmarks/rc4Encrypt/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/security/docs/tutorial.rst b/security/docs/tutorial.rst
index 9d7313d4df..8561cdbb5c 100644
--- a/security/docs/tutorial.rst
+++ b/security/docs/tutorial.rst
@@ -29,11 +29,11 @@ Vitis Security Library Tutorial
 Crypto Algorithm Hardware Acceleration
 =======================================
 
-Crypto algorithms plays an important roll in transfer, verification, storage and processing of data.
+Crypto algorithms play an important role in transfer, verification, storage and processing of data.
 Crypto application includes encryption/decryption, checksum, hash, signature/verification, etc.
-Modern crypto algorithms are complicated and keep evovling, especially in new area like block-chain application.
-FPGA is suitable for a wide variety of crypto algortihm and able to provide competitive or even better performance than CPU.
-It is also convenient to construct a processing pipeline while crypto processing is one of the pipeline stage, which will save bandwidth cost and improve performance.
+Modern crypto algorithms are complicated and keep evolving, especially in new areas like block-chain application.
+FPGA is suitable for a wide variety of crypto algortihms and able to provide competitive or even better performance than CPU.
+It is also convenient to construct a processing pipeline while crypto processing is one of the pipeline stages, which will save bandwidth cost and improve performance.
 Last but not the least, FPGA could have multiple compute unit working in parallel, which will also improve total performance.
 
 .. image:: /images/pipeline_processing.png
@@ -54,7 +54,7 @@ L1 API
 Target Audience and Major Features
 ------------------------------------
 
-Target audience of L1 API are users who is familiar with HLS programming and want to tests / profile / modify operators or add new APIs.
+Target audience of L1 API are users who is familiar with HLS programming and want to test / profile / modify operators or add new APIs.
 With the HLS test project provided in L1 layer, user could get:
 
 (1) Function correctness tests, both in c-simulation and co-simulation
@@ -65,9 +65,9 @@ With the HLS test project provided in L1 layer, user could get:
 Input / output interface
 --------------------------
 
-For easy connection with other HLS component, most of security library L1 API takes hls::stream interface.
+For easy connection with other HLS components, most of security library L1 API takes hls::stream interface.
 For APIs which could take variable length of input, API will include either scalar parameter for input length or "end flag" stream working with data stream in a 1:1 fashion to tell if this is the last block of input data.
-For details of interface definition and how padding will happen in stream interface, please take reference of documentation and corresponding test cases.
+For details of interface definition and how padding will happen in stream interface, please take reference to documentation and corresponding test cases.
 
 Command to Run L1 cases
 -------------------------
diff --git a/solver/Jenkinsfile b/solver/Jenkinsfile
index 9264d1df36..89339a43d9 100644
--- a/solver/Jenkinsfile
+++ b/solver/Jenkinsfile
@@ -1,5 +1,5 @@
 @Library('pipeline-library')_
 
-VitisLibPipeline (branch: 'next', libname: 'xf_solver', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
-                  upstream_dependencies: 'xf_utils_hw,next,../utils',
-                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest', mail_on:'daily:PR')
+VitisLibPipeline (branch: 'main', libname: 'xf_solver', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
+                  upstream_dependencies: 'xf_utils_hw,main,../utils',
+                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released', mail_on:'daily:PR')
diff --git a/solver/L2/benchmarks/gesvdj/Makefile b/solver/L2/benchmarks/gesvdj/Makefile
index 59da6aa5a2..18cbed40f4 100644
--- a/solver/L2/benchmarks/gesvdj/Makefile
+++ b/solver/L2/benchmarks/gesvdj/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_gesvdj.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_gesvdj.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -213,11 +220,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,21 +253,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -312,12 +314,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_gesvdj.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/solver/L2/benchmarks/gesvdj/utils.mk b/solver/L2/benchmarks/gesvdj/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/benchmarks/gesvdj/utils.mk
+++ b/solver/L2/benchmarks/gesvdj/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/benchmarks/gesvj/Makefile b/solver/L2/benchmarks/gesvj/Makefile
index 088d213cf5..2d168c28f1 100644
--- a/solver/L2/benchmarks/gesvj/Makefile
+++ b/solver/L2/benchmarks/gesvj/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_gesvj.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_gesvj.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -224,11 +231,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -262,21 +264,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -323,12 +325,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_gesvj.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/solver/L2/benchmarks/gesvj/utils.mk b/solver/L2/benchmarks/gesvj/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/benchmarks/gesvj/utils.mk
+++ b/solver/L2/benchmarks/gesvj/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/benchmarks/gtsv/Makefile b/solver/L2/benchmarks/gtsv/Makefile
index 78531f5b85..71c04fbc67 100644
--- a/solver/L2/benchmarks/gtsv/Makefile
+++ b/solver/L2/benchmarks/gtsv/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_gtsv.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_gtsv.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,7 +196,7 @@ endif
 $(TEMP_DIR)/kernel_gtsv_0.xo: $(CUR_DIR)/kernel_gtsv.cpp 
 	$(ECHO) "Compiling Kernel: kernel_gtsv_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_gtsv_0) $(VPP_FLAGS) -k kernel_gtsv_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_gtsv_0) $(VPP_FLAGS) -k kernel_gtsv_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_gtsv_OBJS += $(TEMP_DIR)/kernel_gtsv_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_gtsv_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -213,11 +220,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,21 +253,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -312,14 +314,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_gtsv.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -353,12 +357,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/benchmarks/gtsv/utils.mk b/solver/L2/benchmarks/gtsv/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/benchmarks/gtsv/utils.mk
+++ b/solver/L2/benchmarks/gtsv/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/gelinearsolver/Makefile b/solver/L2/tests/gelinearsolver/Makefile
index 4afc544a35..04c323a3c1 100644
--- a/solver/L2/tests/gelinearsolver/Makefile
+++ b/solver/L2/tests/gelinearsolver/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_gelinearsolver.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_gelinearsolver.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,7 +196,7 @@ endif
 $(TEMP_DIR)/kernel_gelinearsolver_0.xo: $(CUR_DIR)/kernel_gelinearsolver.cpp 
 	$(ECHO) "Compiling Kernel: kernel_gelinearsolver_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_gelinearsolver_0) $(VPP_FLAGS) -k kernel_gelinearsolver_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_gelinearsolver_0) $(VPP_FLAGS) -k kernel_gelinearsolver_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_gelinearsolver_OBJS += $(TEMP_DIR)/kernel_gelinearsolver_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_gelinearsolver_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -213,11 +220,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,21 +253,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -312,14 +314,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_gelinearsolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -353,12 +357,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/gelinearsolver/utils.mk b/solver/L2/tests/gelinearsolver/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/gelinearsolver/utils.mk
+++ b/solver/L2/tests/gelinearsolver/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/gematrixinverse/Makefile b/solver/L2/tests/gematrixinverse/Makefile
index 40b1317cd6..97ecc98db8 100644
--- a/solver/L2/tests/gematrixinverse/Makefile
+++ b/solver/L2/tests/gematrixinverse/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_gematrixinverse.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_gematrixinverse.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,7 +196,7 @@ endif
 $(TEMP_DIR)/kernel_gematrixinverse_0.xo: $(CUR_DIR)/kernel_gematrixinverse.cpp 
 	$(ECHO) "Compiling Kernel: kernel_gematrixinverse_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_gematrixinverse_0) $(VPP_FLAGS) -k kernel_gematrixinverse_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_gematrixinverse_0) $(VPP_FLAGS) -k kernel_gematrixinverse_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_gematrixinverse_OBJS += $(TEMP_DIR)/kernel_gematrixinverse_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_gematrixinverse_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -213,11 +220,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,21 +253,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -312,14 +314,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_gematrixinverse.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -353,12 +357,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/gematrixinverse/utils.mk b/solver/L2/tests/gematrixinverse/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/gematrixinverse/utils.mk
+++ b/solver/L2/tests/gematrixinverse/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/geqrf/Makefile b/solver/L2/tests/geqrf/Makefile
index 1ee88a459e..cccd736d31 100644
--- a/solver/L2/tests/geqrf/Makefile
+++ b/solver/L2/tests/geqrf/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_geqrf.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_geqrf.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,7 +196,7 @@ endif
 $(TEMP_DIR)/kernel_geqrf_0.xo: $(CUR_DIR)/kernel_geqrf.cpp 
 	$(ECHO) "Compiling Kernel: kernel_geqrf_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_geqrf_0) $(VPP_FLAGS) -k kernel_geqrf_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_geqrf_0) $(VPP_FLAGS) -k kernel_geqrf_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_geqrf_OBJS += $(TEMP_DIR)/kernel_geqrf_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_geqrf_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -213,11 +220,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,21 +253,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -312,14 +314,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_geqrf.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -353,12 +357,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/geqrf/utils.mk b/solver/L2/tests/geqrf/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/geqrf/utils.mk
+++ b/solver/L2/tests/geqrf/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/getrf/Makefile b/solver/L2/tests/getrf/Makefile
index 0869943043..9e36613fcd 100644
--- a/solver/L2/tests/getrf/Makefile
+++ b/solver/L2/tests/getrf/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_getrf.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_getrf.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,7 +196,7 @@ endif
 $(TEMP_DIR)/kernel_getrf_0.xo: $(CUR_DIR)/kernel_getrf.cpp 
 	$(ECHO) "Compiling Kernel: kernel_getrf_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_getrf_0) $(VPP_FLAGS) -k kernel_getrf_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_getrf_0) $(VPP_FLAGS) -k kernel_getrf_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_getrf_OBJS += $(TEMP_DIR)/kernel_getrf_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_getrf_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -213,11 +220,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,21 +253,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -312,14 +314,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_getrf.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -353,12 +357,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/getrf/utils.mk b/solver/L2/tests/getrf/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/getrf/utils.mk
+++ b/solver/L2/tests/getrf/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/getrf_nopivot/Makefile b/solver/L2/tests/getrf_nopivot/Makefile
index 9af54f2732..75339e7a78 100644
--- a/solver/L2/tests/getrf_nopivot/Makefile
+++ b/solver/L2/tests/getrf_nopivot/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_getrf_nopivot.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_getrf_nopivot.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,7 +196,7 @@ endif
 $(TEMP_DIR)/kernel_getrf_nopivot_0.xo: $(CUR_DIR)/kernel_getrf_nopivot.cpp 
 	$(ECHO) "Compiling Kernel: kernel_getrf_nopivot_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_getrf_nopivot_0) $(VPP_FLAGS) -k kernel_getrf_nopivot_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_getrf_nopivot_0) $(VPP_FLAGS) -k kernel_getrf_nopivot_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_getrf_nopivot_OBJS += $(TEMP_DIR)/kernel_getrf_nopivot_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_getrf_nopivot_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -213,11 +220,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,21 +253,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -312,14 +314,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_getrf_nopivot.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -353,12 +357,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/getrf_nopivot/utils.mk b/solver/L2/tests/getrf_nopivot/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/getrf_nopivot/utils.mk
+++ b/solver/L2/tests/getrf_nopivot/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/polinearsolver/Makefile b/solver/L2/tests/polinearsolver/Makefile
index 2b8cb31eab..465229dd41 100644
--- a/solver/L2/tests/polinearsolver/Makefile
+++ b/solver/L2/tests/polinearsolver/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_polinearsolver.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_polinearsolver.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -200,7 +207,7 @@ endif
 $(TEMP_DIR)/kernel_polinearsolver_0.xo: $(CUR_DIR)/kernel_polinearsolver.cpp 
 	$(ECHO) "Compiling Kernel: kernel_polinearsolver_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_polinearsolver_0) $(VPP_FLAGS) -k kernel_polinearsolver_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_polinearsolver_0) $(VPP_FLAGS) -k kernel_polinearsolver_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_polinearsolver_OBJS += $(TEMP_DIR)/kernel_polinearsolver_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_polinearsolver_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -224,11 +231,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -262,21 +264,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -323,14 +325,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_polinearsolver.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -364,12 +368,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/polinearsolver/utils.mk b/solver/L2/tests/polinearsolver/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/polinearsolver/utils.mk
+++ b/solver/L2/tests/polinearsolver/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/pomatrixinverse/Makefile b/solver/L2/tests/pomatrixinverse/Makefile
index 0be116eb58..2614a11242 100644
--- a/solver/L2/tests/pomatrixinverse/Makefile
+++ b/solver/L2/tests/pomatrixinverse/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_pomatrixinverse.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_pomatrixinverse.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -200,7 +207,7 @@ endif
 $(TEMP_DIR)/kernel_pomatrixinverse_0.xo: $(CUR_DIR)/kernel_pomatrixinverse.cpp 
 	$(ECHO) "Compiling Kernel: kernel_pomatrixinverse_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_pomatrixinverse_0) $(VPP_FLAGS) -k kernel_pomatrixinverse_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_pomatrixinverse_0) $(VPP_FLAGS) -k kernel_pomatrixinverse_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_pomatrixinverse_OBJS += $(TEMP_DIR)/kernel_pomatrixinverse_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_pomatrixinverse_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -224,11 +231,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -262,21 +264,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -323,14 +325,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_pomatrixinverse.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -364,12 +368,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/pomatrixinverse/utils.mk b/solver/L2/tests/pomatrixinverse/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/pomatrixinverse/utils.mk
+++ b/solver/L2/tests/pomatrixinverse/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/potrf/Makefile b/solver/L2/tests/potrf/Makefile
index 8a9ff6023a..aff4e09c00 100644
--- a/solver/L2/tests/potrf/Makefile
+++ b/solver/L2/tests/potrf/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_potrf.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_potrf.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -200,7 +207,7 @@ endif
 $(TEMP_DIR)/kernel_potrf_0.xo: $(CUR_DIR)/kernel_potrf.cpp 
 	$(ECHO) "Compiling Kernel: kernel_potrf_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_potrf_0) $(VPP_FLAGS) -k kernel_potrf_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_potrf_0) $(VPP_FLAGS) -k kernel_potrf_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_potrf_OBJS += $(TEMP_DIR)/kernel_potrf_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_potrf_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -224,11 +231,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -262,21 +264,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -323,14 +325,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_potrf.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -364,12 +368,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/potrf/utils.mk b/solver/L2/tests/potrf/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/potrf/utils.mk
+++ b/solver/L2/tests/potrf/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/syevj/Makefile b/solver/L2/tests/syevj/Makefile
index f9920ae615..04d84376de 100644
--- a/solver/L2/tests/syevj/Makefile
+++ b/solver/L2/tests/syevj/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_syevj.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_syevj.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -163,7 +170,6 @@ VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext -I $(XFLIB_DIR)/L1/
 else ifneq (,$(shell echo $(XPLATFORM) | awk '/u50/'))
 VPP_FLAGS +=   --config $(CUR_DIR)/conn_u50.cfg
 VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include
-VPP_LDFLAGS += --vivado.prop run.impl_1.STEPS.PLACE_DESIGN.ARGS.DIRECTIVE=SSI_SpreadLogic_high
 
 else 
 VPP_FLAGS +=  -I $(XFLIB_DIR)/L2/include -I $(XFLIB_DIR)/ext -I $(XFLIB_DIR)/L1/include -I $(XFLIB_DIR)/L2/include
@@ -190,7 +196,7 @@ endif
 $(TEMP_DIR)/kernel_syevj_0.xo: $(CUR_DIR)/kernel_syevj.cpp 
 	$(ECHO) "Compiling Kernel: kernel_syevj_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_syevj_0) $(VPP_FLAGS) -k kernel_syevj_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_syevj_0) $(VPP_FLAGS) -k kernel_syevj_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_syevj_OBJS += $(TEMP_DIR)/kernel_syevj_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_syevj_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -214,11 +220,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -252,21 +253,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -313,14 +314,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_syevj.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -354,12 +357,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
-clean: cleanh
+clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/syevj/utils.mk b/solver/L2/tests/syevj/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/syevj/utils.mk
+++ b/solver/L2/tests/syevj/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/solver/L2/tests/trtrs/Makefile b/solver/L2/tests/trtrs/Makefile
index 9ed6b3b22e..bea2562480 100644
--- a/solver/L2/tests/trtrs/Makefile
+++ b/solver/L2/tests/trtrs/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u250/'))
 HOST_SRCS += $(CUR_DIR)/test_trtrs.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
@@ -142,6 +144,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/ext/MatrixGen -I $(XFLIB_D
 CXXFLAGS += -O3 
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_trtrs.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -189,7 +196,7 @@ endif
 $(TEMP_DIR)/kernel_trtrs_0.xo: $(CUR_DIR)/kernel_trtrs.cpp 
 	$(ECHO) "Compiling Kernel: kernel_trtrs_0"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_kernel_trtrs_0) $(VPP_FLAGS) -k kernel_trtrs_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$^'
+	$(VPP) -c $(VPP_FLAGS_kernel_trtrs_0) $(VPP_FLAGS) -k kernel_trtrs_0 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_kernel_trtrs_OBJS += $(TEMP_DIR)/kernel_trtrs_0.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_kernel_trtrs_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -213,11 +220,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -251,21 +253,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifneq (,$(findstring vck190_base_dfx, $(PLATFORM_NAME)))
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -312,14 +314,16 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/kernel_trtrs.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
-	$(EXE_FILE) $(HOST_ARGS)
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -353,12 +357,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
 	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) 
 
 clean: cleanh
\ No newline at end of file
diff --git a/solver/L2/tests/trtrs/utils.mk b/solver/L2/tests/trtrs/utils.mk
index 0ee80e90da..1d97b0ad1a 100644
--- a/solver/L2/tests/trtrs/utils.mk
+++ b/solver/L2/tests/trtrs/utils.mk
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -107,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -114,13 +116,34 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
diff --git a/utils/Jenkinsfile b/utils/Jenkinsfile
index 4a664cdbd1..51a99e0881 100644
--- a/utils/Jenkinsfile
+++ b/utils/Jenkinsfile
@@ -1,3 +1,3 @@
 @Library('pipeline-library')_
-VitisLibPipeline (branch: 'next', libname: 'xf_utils_hw', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
-                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_stable_latest', mail_on:'daily:PR')
+VitisLibPipeline (branch: 'main', libname: 'xf_utils_hw', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build',
+                  devtest: 'RunDeploy.sh', TOOLVERSION: '2022.1_released', mail_on:'daily:PR')
diff --git a/utils/L2/tests/datamover/load_master_to_stream/Makefile b/utils/L2/tests/datamover/load_master_to_stream/Makefile
index b59132dcaa..448c5a13df 100644
--- a/utils/L2/tests/datamover/load_master_to_stream/Makefile
+++ b/utils/L2/tests/datamover/load_master_to_stream/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -68,11 +74,13 @@ endif
 PLATFORM_ALLOWLIST +=  vck190 u200 aws-vu9p-f1
 PLATFORM_BLOCKLIST += 
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L1/include
 CXXFLAGS += -std=c++14
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_mover.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -141,12 +150,12 @@ endif
 $(TEMP_DIR)/m2s_x2.xo: $(CUR_DIR)/kernel/m2s_x2.cpp 
 	$(ECHO) "Compiling Kernel: m2s_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/m2s_x2.xo
 $(TEMP_DIR)/s2m_x2.xo: $(CUR_DIR)/kernel/s2m_x2.cpp 
 	$(ECHO) "Compiling Kernel: s2m_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_s2m_x2) $(VPP_FLAGS) -k s2m_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_s2m_x2) $(VPP_FLAGS) -k s2m_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/s2m_x2.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_datamover_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -170,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -208,12 +212,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -221,7 +236,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device pre_build $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device pre_build check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -250,7 +272,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/datamover.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -263,13 +295,8 @@ endif
 pre_build:
 	make -f $(CUR_DIR)/ksrc.mk GENKERNEL=$(XFLIB_DIR)/L2/scripts/generate_kernels SPEC=$(CUR_DIR)/kernel/spec.json TOOLDIR=$(CUR_DIR)/_krnlgen
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all: pre_build check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all: pre_build check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -292,13 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
 
 clean: cleanh
\ No newline at end of file
diff --git a/utils/L2/tests/datamover/load_master_to_stream/utils.mk b/utils/L2/tests/datamover/load_master_to_stream/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/utils/L2/tests/datamover/load_master_to_stream/utils.mk
+++ b/utils/L2/tests/datamover/load_master_to_stream/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/utils/L2/tests/datamover/load_master_to_stream_with_counter/Makefile b/utils/L2/tests/datamover/load_master_to_stream_with_counter/Makefile
index a387e81982..53093b8808 100644
--- a/utils/L2/tests/datamover/load_master_to_stream_with_counter/Makefile
+++ b/utils/L2/tests/datamover/load_master_to_stream_with_counter/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -68,11 +74,13 @@ endif
 PLATFORM_ALLOWLIST +=  vck190 u200 aws-vu9p-f1
 PLATFORM_BLOCKLIST += 
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L1/include
 CXXFLAGS += -std=c++14
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_mover.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -141,12 +150,12 @@ endif
 $(TEMP_DIR)/m2s_x2.xo: $(CUR_DIR)/kernel/m2s_x2.cpp 
 	$(ECHO) "Compiling Kernel: m2s_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/m2s_x2.xo
 $(TEMP_DIR)/s2m_x2.xo: $(CUR_DIR)/kernel/s2m_x2.cpp 
 	$(ECHO) "Compiling Kernel: s2m_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_s2m_x2) $(VPP_FLAGS) -k s2m_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_s2m_x2) $(VPP_FLAGS) -k s2m_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/s2m_x2.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_datamover_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -170,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -208,12 +212,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -221,7 +236,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device pre_build $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device pre_build check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -250,7 +272,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/datamover.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -263,13 +295,8 @@ endif
 pre_build:
 	make -f $(CUR_DIR)/ksrc.mk GENKERNEL=$(XFLIB_DIR)/L2/scripts/generate_kernels SPEC=$(CUR_DIR)/kernel/spec.json TOOLDIR=$(CUR_DIR)/_krnlgen
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all: pre_build check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all: pre_build check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -292,13 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
 
 clean: cleanh
\ No newline at end of file
diff --git a/utils/L2/tests/datamover/load_master_to_stream_with_counter/utils.mk b/utils/L2/tests/datamover/load_master_to_stream_with_counter/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/utils/L2/tests/datamover/load_master_to_stream_with_counter/utils.mk
+++ b/utils/L2/tests/datamover/load_master_to_stream_with_counter/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/utils/L2/tests/datamover/send_ram_to_stream/Makefile b/utils/L2/tests/datamover/send_ram_to_stream/Makefile
index 5985745680..e0ebd0b890 100644
--- a/utils/L2/tests/datamover/send_ram_to_stream/Makefile
+++ b/utils/L2/tests/datamover/send_ram_to_stream/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -68,11 +74,13 @@ endif
 PLATFORM_ALLOWLIST +=  vck190 u200 aws-vu9p-f1
 PLATFORM_BLOCKLIST += 
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L1/include
 CXXFLAGS += -std=c++14
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_mover.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -141,12 +150,12 @@ endif
 $(TEMP_DIR)/ram2s_x2.xo: $(CUR_DIR)/kernel/ram2s_x2.cpp 
 	$(ECHO) "Compiling Kernel: ram2s_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_ram2s_x2) $(VPP_FLAGS) -k ram2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_ram2s_x2) $(VPP_FLAGS) -k ram2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/ram2s_x2.xo
 $(TEMP_DIR)/s2m_x2.xo: $(CUR_DIR)/kernel/s2m_x2.cpp 
 	$(ECHO) "Compiling Kernel: s2m_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_s2m_x2) $(VPP_FLAGS) -k s2m_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_s2m_x2) $(VPP_FLAGS) -k s2m_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/s2m_x2.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_datamover_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -170,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -208,12 +212,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -221,7 +236,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device pre_build $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device pre_build check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -250,7 +272,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/datamover.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -263,13 +295,8 @@ endif
 pre_build:
 	make -f $(CUR_DIR)/ksrc.mk GENKERNEL=$(XFLIB_DIR)/L2/scripts/generate_kernels SPEC=$(CUR_DIR)/kernel/spec.json TOOLDIR=$(CUR_DIR)/_krnlgen
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all: pre_build check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all: pre_build check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -292,13 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
 
 clean: cleanh
\ No newline at end of file
diff --git a/utils/L2/tests/datamover/send_ram_to_stream/utils.mk b/utils/L2/tests/datamover/send_ram_to_stream/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/utils/L2/tests/datamover/send_ram_to_stream/utils.mk
+++ b/utils/L2/tests/datamover/send_ram_to_stream/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/utils/L2/tests/datamover/send_rom_to_stream/Makefile b/utils/L2/tests/datamover/send_rom_to_stream/Makefile
index 397664c80d..b78ee6be12 100644
--- a/utils/L2/tests/datamover/send_rom_to_stream/Makefile
+++ b/utils/L2/tests/datamover/send_rom_to_stream/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -68,11 +74,13 @@ endif
 PLATFORM_ALLOWLIST +=  vck190 u200 aws-vu9p-f1
 PLATFORM_BLOCKLIST += 
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L1/include
 CXXFLAGS += -std=c++14
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_mover.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -141,12 +150,12 @@ endif
 $(TEMP_DIR)/rom2s_x2.xo: $(CUR_DIR)/kernel/rom2s_x2.cpp 
 	$(ECHO) "Compiling Kernel: rom2s_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_rom2s_x2) $(VPP_FLAGS) -k rom2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_rom2s_x2) $(VPP_FLAGS) -k rom2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/rom2s_x2.xo
 $(TEMP_DIR)/s2m_x2.xo: $(CUR_DIR)/kernel/s2m_x2.cpp 
 	$(ECHO) "Compiling Kernel: s2m_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_s2m_x2) $(VPP_FLAGS) -k s2m_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_s2m_x2) $(VPP_FLAGS) -k s2m_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/s2m_x2.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_datamover_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -170,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -208,12 +212,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -221,7 +236,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device pre_build $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device pre_build check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -250,7 +272,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/datamover.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -263,13 +295,8 @@ endif
 pre_build:
 	make -f $(CUR_DIR)/ksrc.mk GENKERNEL=$(XFLIB_DIR)/L2/scripts/generate_kernels SPEC=$(CUR_DIR)/kernel/spec.json TOOLDIR=$(CUR_DIR)/_krnlgen
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all: pre_build check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all: pre_build check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -292,13 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
 
 clean: cleanh
\ No newline at end of file
diff --git a/utils/L2/tests/datamover/send_rom_to_stream/utils.mk b/utils/L2/tests/datamover/send_rom_to_stream/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/utils/L2/tests/datamover/send_rom_to_stream/utils.mk
+++ b/utils/L2/tests/datamover/send_rom_to_stream/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/utils/L2/tests/datamover/store_stream_to_master/Makefile b/utils/L2/tests/datamover/store_stream_to_master/Makefile
index 3ebc90d6df..372f998e30 100644
--- a/utils/L2/tests/datamover/store_stream_to_master/Makefile
+++ b/utils/L2/tests/datamover/store_stream_to_master/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -68,11 +74,13 @@ endif
 PLATFORM_ALLOWLIST +=  vck190 u200 aws-vu9p-f1
 PLATFORM_BLOCKLIST += 
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L1/include
 CXXFLAGS += -std=c++14
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_mover.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -141,12 +150,12 @@ endif
 $(TEMP_DIR)/m2s_x1.xo: $(CUR_DIR)/kernel/m2s_x1.cpp 
 	$(ECHO) "Compiling Kernel: m2s_x1"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_m2s_x1) $(VPP_FLAGS) -k m2s_x1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_m2s_x1) $(VPP_FLAGS) -k m2s_x1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/m2s_x1.xo
 $(TEMP_DIR)/s2m_x1.xo: $(CUR_DIR)/kernel/s2m_x1.cpp 
 	$(ECHO) "Compiling Kernel: s2m_x1"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_s2m_x1) $(VPP_FLAGS) -k s2m_x1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_s2m_x1) $(VPP_FLAGS) -k s2m_x1 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/s2m_x1.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_datamover_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -170,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -208,12 +212,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -221,7 +236,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device pre_build $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device pre_build check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -250,7 +272,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/datamover.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -263,13 +295,8 @@ endif
 pre_build:
 	make -f $(CUR_DIR)/ksrc.mk GENKERNEL=$(XFLIB_DIR)/L2/scripts/generate_kernels SPEC=$(CUR_DIR)/kernel/spec.json TOOLDIR=$(CUR_DIR)/_krnlgen
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all: pre_build check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all: pre_build check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -292,13 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
 
 clean: cleanh
\ No newline at end of file
diff --git a/utils/L2/tests/datamover/store_stream_to_master/utils.mk b/utils/L2/tests/datamover/store_stream_to_master/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/utils/L2/tests/datamover/store_stream_to_master/utils.mk
+++ b/utils/L2/tests/datamover/store_stream_to_master/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/utils/L2/tests/datamover/validate_stream_with_master/Makefile b/utils/L2/tests/datamover/validate_stream_with_master/Makefile
index a99c4036e4..cf62101247 100644
--- a/utils/L2/tests/datamover/validate_stream_with_master/Makefile
+++ b/utils/L2/tests/datamover/validate_stream_with_master/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -68,11 +74,13 @@ endif
 PLATFORM_ALLOWLIST +=  vck190 u200 aws-vu9p-f1
 PLATFORM_BLOCKLIST += 
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L1/include
 CXXFLAGS += -std=c++14
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_mover.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -141,12 +150,12 @@ endif
 $(TEMP_DIR)/m2s_x2.xo: $(CUR_DIR)/kernel/m2s_x2.cpp 
 	$(ECHO) "Compiling Kernel: m2s_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/m2s_x2.xo
 $(TEMP_DIR)/sCm_x2.xo: $(CUR_DIR)/kernel/sCm_x2.cpp 
 	$(ECHO) "Compiling Kernel: sCm_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_sCm_x2) $(VPP_FLAGS) -k sCm_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_sCm_x2) $(VPP_FLAGS) -k sCm_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/sCm_x2.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_datamover_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -170,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -208,12 +212,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -221,7 +236,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device pre_build $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device pre_build check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -250,7 +272,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/datamover.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -263,13 +295,8 @@ endif
 pre_build:
 	make -f $(CUR_DIR)/ksrc.mk GENKERNEL=$(XFLIB_DIR)/L2/scripts/generate_kernels SPEC=$(CUR_DIR)/kernel/spec.json TOOLDIR=$(CUR_DIR)/_krnlgen
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all: pre_build check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all: pre_build check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -292,13 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
 
 clean: cleanh
\ No newline at end of file
diff --git a/utils/L2/tests/datamover/validate_stream_with_master/utils.mk b/utils/L2/tests/datamover/validate_stream_with_master/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/utils/L2/tests/datamover/validate_stream_with_master/utils.mk
+++ b/utils/L2/tests/datamover/validate_stream_with_master/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/utils/L2/tests/datamover/validate_stream_with_ram/Makefile b/utils/L2/tests/datamover/validate_stream_with_ram/Makefile
index 28c4af1204..633e007cc2 100644
--- a/utils/L2/tests/datamover/validate_stream_with_ram/Makefile
+++ b/utils/L2/tests/datamover/validate_stream_with_ram/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -68,11 +74,13 @@ endif
 PLATFORM_ALLOWLIST +=  vck190 u200 aws-vu9p-f1
 PLATFORM_BLOCKLIST += 
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L1/include
 CXXFLAGS += -std=c++14
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_mover.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -141,12 +150,12 @@ endif
 $(TEMP_DIR)/m2s_x2.xo: $(CUR_DIR)/kernel/m2s_x2.cpp 
 	$(ECHO) "Compiling Kernel: m2s_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/m2s_x2.xo
 $(TEMP_DIR)/ramCs_x2.xo: $(CUR_DIR)/kernel/ramCs_x2.cpp 
 	$(ECHO) "Compiling Kernel: ramCs_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_ramCs_x2) $(VPP_FLAGS) -k ramCs_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_ramCs_x2) $(VPP_FLAGS) -k ramCs_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/ramCs_x2.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_datamover_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -170,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -208,12 +212,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -221,7 +236,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device pre_build $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device pre_build check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -250,7 +272,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/datamover.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -263,13 +295,8 @@ endif
 pre_build:
 	make -f $(CUR_DIR)/ksrc.mk GENKERNEL=$(XFLIB_DIR)/L2/scripts/generate_kernels SPEC=$(CUR_DIR)/kernel/spec.json TOOLDIR=$(CUR_DIR)/_krnlgen
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all: pre_build check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all: pre_build check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -292,13 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
 
 clean: cleanh
\ No newline at end of file
diff --git a/utils/L2/tests/datamover/validate_stream_with_ram/utils.mk b/utils/L2/tests/datamover/validate_stream_with_ram/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/utils/L2/tests/datamover/validate_stream_with_ram/utils.mk
+++ b/utils/L2/tests/datamover/validate_stream_with_ram/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/utils/L2/tests/datamover/validate_stream_with_rom/Makefile b/utils/L2/tests/datamover/validate_stream_with_rom/Makefile
index 2524f90ec6..e11bb89fe2 100644
--- a/utils/L2/tests/datamover/validate_stream_with_rom/Makefile
+++ b/utils/L2/tests/datamover/validate_stream_with_rom/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 
 ############################## Help Section ##############################
 .PHONY: help
@@ -30,13 +30,19 @@ help::
 	$(ECHO) "      Command to build xclbin application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  make host HOST_ARCH=<hw/hw_emu/>"
+	$(ECHO) "  make host TARGET=<hw/hw_emu/>"
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=aarch64. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -68,11 +74,13 @@ endif
 PLATFORM_ALLOWLIST +=  vck190 u200 aws-vu9p-f1
 PLATFORM_BLOCKLIST += 
 
-GCC_INTOOL := 8.3.0
 include ./utils.mk
 TEMP_DIR := _x_temp.$(TARGET).$(PLATFORM_NAME)
 TEMP_REPORT_DIR := $(CUR_DIR)/reports/_x.$(TARGET).$(PLATFORM_NAME)
 BUILD_DIR := build_dir.$(TARGET).$(PLATFORM_NAME)
+ifneq ($(RESULT_DIR),)
+BUILD_DIR = $(RESULT_DIR)
+endif
 BUILD_REPORT_DIR := $(CUR_DIR)/reports/_build.$(TARGET).$(PLATFORM_NAME)
 EMCONFIG := $(BUILD_DIR)/emconfig.json
 XCLBIN_DIR := $(CUR_DIR)/$(BUILD_DIR)
@@ -105,15 +113,16 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifneq (,$(findstring OPENCV_INCLUDE,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 HOST_SRCS += $(CUR_DIR)/host/main.cpp $(XFLIB_DIR)/ext/xcl2/xcl2.cpp 
 CXXFLAGS +=  -I $(CUR_DIR)/host -I $(CUR_DIR)/kernel -I $(XFLIB_DIR)/ext/xcl2 -I $(XFLIB_DIR)/L1/include
 CXXFLAGS += -std=c++14
 
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test_mover.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -141,12 +150,12 @@ endif
 $(TEMP_DIR)/m2s_x2.xo: $(CUR_DIR)/kernel/m2s_x2.cpp 
 	$(ECHO) "Compiling Kernel: m2s_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_m2s_x2) $(VPP_FLAGS) -k m2s_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/m2s_x2.xo
 $(TEMP_DIR)/romCs_x2.xo: $(CUR_DIR)/kernel/romCs_x2.cpp 
 	$(ECHO) "Compiling Kernel: romCs_x2"
 	mkdir -p $(TEMP_DIR)
-	$(VPP) -c $(VPP_FLAGS_romCs_x2) $(VPP_FLAGS) -k romCs_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o'$@' '$<'
+	$(VPP) -c $(VPP_FLAGS_romCs_x2) $(VPP_FLAGS) -k romCs_x2 -I'$(<D)' --temp_dir $(TEMP_DIR) --report_dir $(TEMP_REPORT_DIR) -o $@ $^
 BINARY_CONTAINER_datamover_OBJS += $(TEMP_DIR)/romCs_x2.xo
 BINARY_CONTAINERS_DEPS += $(BINARY_CONTAINER_datamover_OBJS)
 $(BINARY_CONTAINERS): $(BINARY_CONTAINERS_DEPS)
@@ -170,11 +179,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -208,12 +212,23 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
+ifeq ($(TARGET),hw)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	@echo "### ***** sd_card generation done! ***** ###"
+dfx_hw := true
+endif
+endif
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
+endif
 
 .PHONY: sd_card
 sd_card: $(SD_CARD)
@@ -221,7 +236,14 @@ endif
 ############################## Setting Essential Checks and Building Rules ##############################
 RUN_DEPS += $(EXE_FILE) $(BINARY_CONTAINERS) $(EMCONFIG)
 RUN_DEPS += $(SD_CARD)
-run: check_device pre_build $(RUN_DEPS)
+
+.PHONY: mkflag all run
+mkflag:
+	mkdir -p $(BUILD_DIR)
+	rm -rf $(BUILD_DIR)/makefile_args.txt
+	@for var in $(MAKEFLAGS); do echo $$var >> $(BUILD_DIR)/makefile_args.txt; done
+all: check_device pre_build check_vpp check_platform mkflag $(RUN_DEPS)
+run: all
 #hw_emu
 ifneq (,$(filter hw_emu, $(TARGET)))
 ifeq ($(HOST_ARCH), x86)
@@ -250,7 +272,17 @@ endif
 endif
 #hw
 ifeq ($(TARGET), hw)
-ifeq ($(HOST_ARCH), x86)
+ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
+ifeq (,$(wildcard $(BUILD_DIR)/datamover.awsxclbin))
+	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
+else
+	$(ECHO) "Running HW using generated .awsxclbin"
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
+	
+endif
+else ifeq ($(HOST_ARCH), x86)
+	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
 	$(EXE_FILE) $(HOST_ARGS)
 	
 else
@@ -263,13 +295,8 @@ endif
 pre_build:
 	make -f $(CUR_DIR)/ksrc.mk GENKERNEL=$(XFLIB_DIR)/L2/scripts/generate_kernels SPEC=$(CUR_DIR)/kernel/spec.json TOOLDIR=$(CUR_DIR)/_krnlgen
 
-.PHONY: all clean cleanall emconfig
+.PHONY: clean cleanall emconfig
 emconfig: $(EMCONFIG)
-ifeq ($(HOST_ARCH), x86)
-all: pre_build check_vpp check_platform check_xrt $(EXE_FILE) $(BINARY_CONTAINERS) emconfig
-else
-all: pre_build check_vpp check_platform check_sysroot $(EXE_FILE) $(BINARY_CONTAINERS) emconfig sd_card
-endif
 
 .PHONY: host
 ifeq ($(HOST_ARCH), x86)
@@ -292,13 +319,11 @@ cleanh:
 
 cleank:
 	-$(RMDIR) $(BUILD_DIR)/*.xclbin _vimage *xclbin.run_summary qemu-memory-_* emulation/ _vimage/ pl*start_simulation. sh *.xclbin
-	-$(RMDIR) _x_temp.*/_x.* _x_temp.*/.Xil _x_temp.*/profile_summary.* xo_* _x*
-	-$(RMDIR) _x_temp.*/dltmp* _x_temp.*/kernel_info.dat _x_temp.*/*.log
-	-$(RMDIR) _x_temp.* $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
+	-$(RMDIR) _x_temp.*
 
 cleanall: cleanh cleank
-	-$(RMDIR) $(BUILD_DIR)  build_dir.* emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
-	-$(RMDIR) $(XFLIB_DIR)/common/data/*.xe2xd* $(XFLIB_DIR)/common/data/*.orig*
+	-$(RMDIR) $(BUILD_DIR)  emconfig.json *.html $(TEMP_DIR) $(CUR_DIR)/reports *.csv *.run_summary  $(CUR_DIR)/*.raw package_*   $(BUILD_DIR)/run_script.sh .ipcache *.str
 	-$(RMDIR)  $(CUR_DIR)/Work $(CUR_DIR)/*.xpe $(CUR_DIR)/hw.o $(CUR_DIR)/*.xsa $(CUR_DIR)/xnwOut
+	-$(RMDIR) $(CUR_DIR)/_krnlgen $(CUR_DIR)/kernel/.stamp $(CUR_DIR)/kernel/*.inc $(CUR_DIR)/kernel/*.cpp 
 
 clean: cleanh
\ No newline at end of file
diff --git a/utils/L2/tests/datamover/validate_stream_with_rom/utils.mk b/utils/L2/tests/datamover/validate_stream_with_rom/utils.mk
index 4c6ffe588f..1d97b0ad1a 100644
--- a/utils/L2/tests/datamover/validate_stream_with_rom/utils.mk
+++ b/utils/L2/tests/datamover/validate_stream_with_rom/utils.mk
@@ -1,5 +1,5 @@
 #
-# Copyright 2019-2021 Xilinx, Inc.
+# Copyright 2019-2022 Xilinx, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# vitis makefile-generator v2.0.5
+# vitis makefile-generator v2.0.6
 #
 #+-------------------------------------------------------------------------------
 # The following parameters are assigned with default values. These parameters can
@@ -50,6 +50,7 @@ ifndef XILINX_XRT
   export XILINX_XRT
 endif
 
+.PHONY: check_device
 check_device:
 	@set -eu; \
 	inallowlist=False; \
@@ -70,8 +71,8 @@ check_device:
 	fi;
 
 #get HOST_ARCH by PLATFORM
+ifneq (,$(PLATFORM))
 HOST_ARCH_temp = $(shell platforminfo -p $(PLATFORM) | grep 'CPU Type' | sed 's/.*://' | sed '/ai_engine/d' | sed 's/^[[:space:]]*//')
-$(warning HOST_ARCH_temp:$(HOST_ARCH_temp))
 ifeq ($(HOST_ARCH_temp), x86)
 HOST_ARCH := x86
 else ifeq ($(HOST_ARCH_temp), cortex-a9)
@@ -79,7 +80,7 @@ HOST_ARCH := aarch32
 else ifneq (,$(findstring cortex-a, $(HOST_ARCH_temp)))
 HOST_ARCH := aarch64
 endif
-$(info HOST_ARCH: $(HOST_ARCH))
+endif
 
 
 #get suffix of kernel by PLATFORM
@@ -94,7 +95,6 @@ endif
 else
 LINK_TARGET_FMT := xclbin
 endif
-$(warning LINK_TARGET_FMT:$(LINK_TARGET_FMT))
 
 #Checks for Device Family
 ifeq ($(HOST_ARCH), aarch32)
@@ -108,6 +108,7 @@ ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
 $(error HOST_ARCH variable not set, please set correctly and rerun)
 endif
 
+.PHONY: check_version check_sysroot check_kimage check_rootfs
 check_version:
 ifneq (, $(shell which git))
 ifneq (,$(wildcard $(XFLIB_DIR)/.git))
@@ -115,27 +116,53 @@ ifneq (,$(wildcard $(XFLIB_DIR)/.git))
 endif
 endif
 
-#Checks for SYSROOT
+#Set/Check SYSROOT/K_IMAGE/ROOTFS
+ifneq ($(HOST_ARCH), x86)
+ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
+K_IMAGE ?= $(SYSROOT)/../../uImage
+else
+K_IMAGE ?= $(SYSROOT)/../../Image
+endif
+ROOTFS ?= $(SYSROOT)/../../rootfs.ext4
+endif
+
 check_sysroot:
 ifneq ($(HOST_ARCH), x86)
-ifndef SYSROOT
+ifeq (,$(wildcard $(SYSROOT)))
 	$(error SYSROOT ENV variable is not set, please set ENV variable correctly and rerun)
 endif
 endif
+check_kimage:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(K_IMAGE)))
+	$(error K_IMAGE ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
+check_rootfs:
+ifneq ($(HOST_ARCH), x86)
+ifeq (,$(wildcard $(ROOTFS)))
+	$(error ROOTFS ENV variable is not set, please set ENV variable correctly and rerun)
+endif
+endif
 
 #Checks for g++
 CXX := g++
-CXX_REQ := $(shell echo $(GCC_INTOOL) | cut -f 1 -d ".")
 ifeq ($(HOST_ARCH), x86)
-ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_REQ)), 1)
+ifeq ($(shell expr $(VITIS_VER) \>= 2022.1), 1)
+CXX_VER := 8.3.0
+else
+CXX_VER := 6.2.0
+endif
+CXX_V := $(shell echo $(CXX_VER) | awk -F. '{print tolower($$1)}')
+ifneq ($(shell expr $(shell echo "__GNUG__" | g++ -E -x c++ - | tail -1) \>= $(CXX_V)), 1)
 ifndef XILINX_VIVADO
-$(error [ERROR]: g++ version too old. Please use $(CXX_REQ) or above)
+$(error [ERROR]: g++ version too old. Please use $(CXX_VER) or above)
 else
-CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/bin/g++
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/bin/g++
 ifeq ($(LD_LIBRARY_PATH),)
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64
 else
-export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(GCC_INTOOL)/lib64:$(LD_LIBRARY_PATH)
+export LD_LIBRARY_PATH := $(XILINX_VIVADO)/tps/lnx64/gcc-$(CXX_VER)/lib64:$(LD_LIBRARY_PATH)
 endif
 $(warning [WARNING]: g++ version too old. Using g++ provided by the tool: $(CXX))
 endif
@@ -150,7 +177,14 @@ endif
 OSDIST = $(shell lsb_release -i |awk -F: '{print tolower($$2)}' | tr -d ' \t' )
 OSREL = $(shell lsb_release -r |awk -F: '{print tolower($$2)}' |tr -d ' \t')
 
-ifeq ($(OSDIST), centos)
+# for centos and redhat
+ifneq ($(findstring centos,$(OSDIST)),)
+ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
+ifeq ($(HOST_ARCH), x86)
+XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+endif
+else ifneq ($(findstring redhat,$(OSDIST)),)
 ifeq (7,$(shell echo $(OSREL) | awk -F. '{print tolower($$1)}' ))
 ifeq ($(HOST_ARCH), x86)
 XRT_CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
diff --git a/utils/L2/tests/utils_sw/shared_host/Makefile b/utils/L2/tests/utils_sw/shared_host/Makefile
index badc7352e3..3e97096ee5 100644
--- a/utils/L2/tests/utils_sw/shared_host/Makefile
+++ b/utils/L2/tests/utils_sw/shared_host/Makefile
@@ -34,9 +34,15 @@ help::
 	$(ECHO) "      Command to build host application."
 	$(ECHO) "      By default, HOST_ARCH=x86. HOST_ARCH is required for SoC shells"
 	$(ECHO) ""
-	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, env variable SYSROOT and EDGE_COMMON_SW need to be set first, and HOST_ARCH is either aarch32 or aarch64. For example,"
-	$(ECHO) "       export SYSROOT=< path-to-platform-sysroot >"
-	$(ECHO) "       export EDGE_COMMON_SW=< path-to-rootfs-and-Image-files >"
+	$(ECHO) "  NOTE: For embedded devices, e.g. zcu102/zcu104/vck190, HOST_ARCH is either aarch32 or aarch64."
+	$(ECHO) "      a.IF Download the platform, and common-image from Xilinx Download Center(Suggested):"
+	$(ECHO) "        Run the sdk.sh script from the common-image directory to install sysroot using the command : ./sdk.sh -y -d ./ -p "
+	$(ECHO) "        Unzip the rootfs file : gunzip ./rootfs.ext4.gz"
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "      b. User could also define SYSROOT, K_IMAGE and ROOTFS by themselves: "
+	$(ECHO) "        export SYSROOT=< path-to-platform-sysroot >"
+	$(ECHO) "        export K_IMAGE=< path-to-Image-files >"
+	$(ECHO) "        export ROOTFS=< path-to-rootfs >"
 	$(ECHO) ""
 	$(ECHO) "  make clean "
 	$(ECHO) "      Command to remove the generated non-hardware files."
@@ -107,10 +113,6 @@ ifeq ($(TARGET),hw_emu)
 CXXFLAGS += -D HW_EMU_TEST
 endif
 
-ifeq (,$(findstring opencv,$(CXXFLAGS)))
-CXXFLAGS += $(XRT_CXXFLAGS)
-endif
-
 #Inclue Required Host Source Files
 ifneq (,$(shell echo $(XPLATFORM) | awk '/u280/'))
 HOST_SRCS += $(CUR_DIR)/test.cpp $(CUR_DIR)/xcl2/xcl2.cpp 
@@ -128,6 +130,11 @@ CXXFLAGS +=  -I $(XFLIB_DIR)/L1/include -I $(CUR_DIR)/xcl2 -I $(XFLIB_DIR)/L1/in
 CXXFLAGS += -g -O3 -D XDEVICE=$(XDEVICE)
 
 endif
+# workaround for opencv
+ifeq (,$(findstring opencv,$(CXXFLAGS)))
+CXXFLAGS += $(XRT_CXXFLAGS)
+endif
+
 EXE_NAME := test.exe
 EXE_FILE := $(BUILD_DIR)/$(EXE_NAME)
 EXE_FILE_DEPS := $(HOST_SRCS) $(EXE_FILE_DEPS)
@@ -204,11 +211,6 @@ $(EMCONFIG):
 	emconfigutil --platform $(XPLATFORM) --od $(BUILD_DIR)
 ############################## Preparing sdcard folder ##############################
 ifneq ($(HOST_ARCH), x86)
-ifneq (,$(findstring zc706, $(PLATFORM_NAME)))
-K_IMAGE := $(SYSROOT)/../../uImage
-else
-K_IMAGE := $(SYSROOT)/../../Image
-endif
 RUN_SCRIPT := $(BUILD_DIR)/run_script.sh
 $(RUN_SCRIPT):
 	rm -rf $(RUN_SCRIPT)
@@ -242,21 +244,21 @@ SD_DIRS_WITH_PREFIX = $(foreach sd_dir,$(DATA_DIR),--package.sd_dir $(sd_dir))
 PACKAGE_FILES := $(BINARY_CONTAINERS)
 PACKAGE_FILES += $(AIE_CONTAINER)
 SD_CARD := $(CUR_DIR)/package_$(TARGET)
-vck190_dfx_hw := false
-$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG)
+dfx_hw := false
+$(SD_CARD): $(EXE_FILE) $(BINARY_CONTAINERS) $(RUN_SCRIPT) $(EMCONFIG) check_kimage check_rootfs
 	@echo "Generating sd_card folder...."
 	mkdir -p $(SD_CARD)
 	chmod a+rx $(BUILD_DIR)/run_script.sh
-ifeq ($(findstring vck190_base_dfx, $(PLATFORM_NAME)),vck190_base_dfx)
+ifeq ($(findstring _dfx_, $(PLATFORM_NAME)),_dfx_)
 ifeq ($(TARGET),hw)
 	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p $(PACKAGE_FILES) $(VPP_PACKAGE) -o $(BINARY_CONTAINERS_PKG)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -p --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX) --package.sd_file $(BINARY_CONTAINERS_PKG)
 	@echo "### ***** sd_card generation done! ***** ###"
-vck190_dfx_hw := true
+dfx_hw := true
 endif
 endif
-ifeq ($(vck190_dfx_hw), false)
-	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(SYSROOT)/../../rootfs.ext4 --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
+ifeq ($(dfx_hw), false)
+	$(VPP) -t $(TARGET) --platform $(XPLATFORM) -o $(BINARY_CONTAINERS_PKG) -p $(PACKAGE_FILES) $(VPP_PACKAGE) --package.out_dir  $(SD_CARD) --package.rootfs $(ROOTFS) --package.kernel_image $(K_IMAGE)  $(SD_FILES_WITH_PREFIX) $(SD_DIRS_WITH_PREFIX)
 	@echo "### ***** sd_card generation done! ***** ###"
 endif
 
@@ -303,12 +305,12 @@ endif
 #hw
 ifeq ($(TARGET), hw)
 ifneq (,$(findstring aws-vu9p-f1, $(PLATFORM_NAME)))
-ifneq ($(JENKINS_INTERNAL_BUILD), 1)
+ifeq (,$(wildcard $(BUILD_DIR)/vadd.awsxclbin))
 	$(ECHO) "This makefile does not directly support converting .xclbin to .awsxclbin, please refer https://github.com/aws/aws-fpga/blob/master/Vitis/README.md for next operations"
 else
-	$(ECHO) "Running inside Xilinx regression without converting to .awsxclbin"
+	$(ECHO) "Running HW using generated .awsxclbin"
 	LD_LIBRARY_PATH=$(LIBRARY_PATH):$$LD_LIBRARY_PATH \
-	$(EXE_FILE) $(HOST_ARGS)
+	$(EXE_FILE) $(subst .xclbin,.awsxclbin,$(HOST_ARGS))
 	
 endif
 else ifeq ($(HOST_ARCH), x86)
diff --git a/utils/L2/tests/utils_sw/shared_host/preSysLink.tcl b/utils/L2/tests/utils_sw/shared_host/preSysLink.tcl
new file mode 100644
index 0000000000..664d9964c1
--- /dev/null
+++ b/utils/L2/tests/utils_sw/shared_host/preSysLink.tcl
@@ -0,0 +1 @@
+upgrade_ip [get_bd_cells -filter {VLNV=~*hbm_memory_subsystem*}]
diff --git a/utils/docs/src/conf.py b/utils/docs/src/conf.py
index 0ebe5b46fa..18c23b4baa 100644
--- a/utils/docs/src/conf.py
+++ b/utils/docs/src/conf.py
@@ -40,13 +40,13 @@
 # -- Project information -----------------------------------------------------
 
 project = 'Vitis Utility Library'
-copyright = '2021, Xilinx'
+copyright = '2022, Xilinx'
 author = 'Xilinx'
 
 # The short X.Y version
-version = '2021.2'
+version = '2022.1'
 # The full version, including alpha/beta/rc tags
-release = 'v2021.2'
+release = 'v2022.1'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/utils/docs/src/tutorial.rst b/utils/docs/src/tutorial.rst
index c934628a73..42e4d09c56 100644
--- a/utils/docs/src/tutorial.rst
+++ b/utils/docs/src/tutorial.rst
@@ -30,6 +30,7 @@ How Vitis Utils Library Works
 ==================================
 
 Vitis utils library does not contain any acceleration applications, but utility functions that help Vitis design. It comes in two parts: HLS hardware utilities, Software utilities and Datamover.
+
 * HLS hardware utilities are most commonly used HLS design pattern, like Memory Access by AXI, Low latency URAM, Stream combine and merge. Utils library provide standard and optimized design to help avoid re-invent the wheels. They're in L1/include/xf_utils_hw.
 * Software utilities are commonly used functions in Vitis host design. They are pure C++ design and contains log and error printing functions which help unify testing. They're in L1/include/xf_utils_sw.
 * Datamover are kernels load data from DDR/HBM to streams and write data from streams to DDR/HBM. They help AIE to access data in better bandwidth. They're in L2/src and need run python script to generate final source code. Please take reference test cases from L2/tests/datamover.
@@ -40,7 +41,7 @@ HLS hardware utiliy API
 Target Audience and Major Features
 ------------------------------------
 
-Target audience of L1 API are users who is familiar with HLS programming and want to tests / profile / modify utility functions.
+Target audience of L1 API are users who are familiar with HLS programming and want to test / profile / modify utility functions.
 With the HLS test project provided in L1 layer, user could get:
 
 (1) Function correctness tests, both in c-simulation and co-simulation
@@ -66,7 +67,7 @@ Test control variables are:
 * ``VIVADO_IMPL`` for implementation by Vivado.
 
 For all these variables, setting to ``1`` indicates execution while ``0`` for skipping.
-The default value of all these control variables are ``0``, so they can be omitted from command line
+The default value of all these control variables is ``0``, so it can be omitted from command line
 if the corresponding step is not wanted.
 
 
@@ -76,9 +77,12 @@ Datamover
 Target Audience and Major Features
 ------------------------------------
 
-Target audience of datamover are AIE application designer who want to move data between AIE and DDR/HBM.
-Current datamover is a template design and need pre-build to generate final source code. Please take reference of Makefile of test cases in L2/datamover to check how to run pre-build. Current datamover design support datamovement: (1) AXI master to stream (2) Stream to AXI master (3) RAM to stream (4) Stream to RAM.
-Although design for AIE application, test cases don't directly include an AIE program. They use another dummy kernel to consume/generate data, mimicing an AIE program that read/write streams. Dummy kernels connect with datamover kernel by AXI streams, just like datamover connection with AIE array.
+The target audiece of datamover is AIE application designers who want to move data between AIE and DDR/HBM. Current datamover is a template design and needs pre-build to generate the final source code.Although designed for AIE applications, test cases don’t directly include an AIE program. They use another dummy kernel to consume/generate data, mimicking an AIE program that reads/writes streams. Dummy kernels connect with datamover kernel by AXI streams, just like datamover connection with AIE array. Please take reference to Makefile of test cases in L2/datamover to check how to run pre-build. Current datamover design support data movement:
+
+(1) AXI master to stream
+(2) Stream to AXI master
+(3) RAM to stream
+(4) Stream to RAM
 
 
 Command to Run L2 cases
@@ -88,8 +92,8 @@ Command to Run L2 cases
 
     cd L2/tests/vitis_case_folder
     
-    # build and run one of the following using U280 platform
-    make run TARGET=sw_emu DEVICE=/path/to/xilinx_u280_xdma_201920_3.xpfm
+    # build and run one of the following using target device's platform
+    make run TARGET=sw_emu DEVICE=/path/to/target_device.xpfm
     
     # delete generated files
     make cleanall